summaryrefslogtreecommitdiff
path: root/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt
diff options
context:
space:
mode:
Diffstat (limited to 'FreeRTOS-Plus/Source/WolfSSL/wolfcrypt')
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/benchmark/benchmark.c6668
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/benchmark/benchmark.sln39
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/benchmark/benchmark.vcproj10
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/benchmark/include.am6
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes.c7579
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes_asm.S1338
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes_asm.asm1021
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes_gcm_asm.S8733
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/arc4.c129
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/asm.c799
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/asn.c16530
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/async.c0
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/blake2b.c43
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/blake2s.c446
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/camellia.c84
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/chacha.c167
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/chacha20_poly1305.c328
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/chacha_asm.S1420
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/cmac.c215
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/coding.c294
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/compress.c57
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/cpuid.c110
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/cryptocb.c648
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/curve25519.c467
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/curve448.c635
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/des3.c2343
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/dh.c2457
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/dsa.c808
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ecc.c10389
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ed25519.c678
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ed448.c917
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/error.c227
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/evp.c6595
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_448.c2458
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_low_mem.c91
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_operations.c285
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_x25519_128.i625
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_x25519_asm.S16542
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fips.c0
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fips_test.c0
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mont_small.i40
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_12.i31
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_17.i31
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_20.i33
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_24.i31
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_28.i31
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_3.i15
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_32.i37
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_4.i31
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_48.i31
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_6.i31
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_64.i31
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_7.i31
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_8.i31
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_9.i31
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_small_set.i92
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_12.i30
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_17.i29
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_20.i33
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_24.i33
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_28.i33
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_3.i13
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_32.i33
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_4.i29
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_48.i33
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_6.i29
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_64.i33
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_7.i29
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_8.i33
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_9.i29
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_small_set.i66
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ge_448.c10780
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ge_low_mem.c531
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ge_operations.c8672
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/hash.c1661
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/hc128.c109
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/hmac.c1619
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/idea.c303
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/include.am83
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/integer.c1464
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/logging.c773
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/md2.c10
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/md4.c31
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/md5.c701
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/memory.c1015
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/misc.c264
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/pkcs12.c2403
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/pkcs7.c12701
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/poly1305.c1124
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/poly1305_asm.S1105
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/pic32/pic32mz-crypt.c804
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/ti/ti-aes.c147
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/ti/ti-ccm.c62
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/ti/ti-des3.c65
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/ti/ti-hash.c229
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/pwdbased.c785
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/rabbit.c56
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/random.c2487
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ripemd.c60
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/rsa.c4333
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/selftest.c0
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha.c1036
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha256.c2954
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha256_asm.S22653
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha3.c1216
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha512.c2354
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha512_asm.S10741
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/signature.c559
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_arm32.c89057
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_arm64.c42082
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_armthumb.c27863
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_c32.c23857
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_c64.c23220
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_cortexm.c25687
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_dsp32.c4908
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_int.c2203
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_x86_64.c29555
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_x86_64_asm.S41830
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/srp.c756
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/tfm.c3629
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wc_dsp.c327
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wc_encrypt.c660
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wc_pkcs11.c2546
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wc_port.c2534
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wolfcrypt_first.c0
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wolfcrypt_last.c0
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wolfevent.c283
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wolfmath.c381
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/include.am9
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/test.c27246
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/test.h20
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/test.sln39
-rw-r--r--FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/test.vcproj10
133 files changed, 514198 insertions, 23618 deletions
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/benchmark/benchmark.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/benchmark/benchmark.c
index f284774f3..f79f7c86f 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/benchmark/benchmark.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/benchmark/benchmark.c
@@ -1,8 +1,8 @@
/* benchmark.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
/* wolfCrypt benchmark */
@@ -26,17 +27,104 @@
#include <config.h>
#endif
+#ifndef WOLFSSL_USER_SETTINGS
+ #include <wolfssl/options.h>
+#endif
#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/version.h>
+#include <wolfssl/wolfcrypt/wc_port.h>
+
+/* Macro to disable benchmark */
+#ifndef NO_CRYPT_BENCHMARK
-#include <string.h>
+/* only for stack size check */
+#ifdef HAVE_STACK_SIZE
+ #include <wolfssl/ssl.h>
+ #include <wolfssl/test.h>
+#endif
+
+#ifdef USE_FLAT_BENCHMARK_H
+ #include "benchmark.h"
+#else
+ #include "wolfcrypt/benchmark/benchmark.h"
+#endif
+/* printf mappings */
#ifdef FREESCALE_MQX
#include <mqx.h>
- #include <fio.h>
-#else
+ /* see wc_port.h for fio.h and nio.h includes */
+#elif defined(FREESCALE_KSDK_1_3)
+ #include "fsl_debug_console.h"
+ #include "fsl_os_abstraction.h"
+
+ #undef printf
+ #define printf PRINTF
+#elif defined(WOLFSSL_DEOS)
+ #include <deos.h>
+ #undef printf
+ #define printf printx
+#elif defined(MICRIUM)
+ #include <bsp_ser.h>
+ void BSP_Ser_Printf (CPU_CHAR* format, ...);
+ #undef printf
+ #define printf BSP_Ser_Printf
+#elif defined(WOLFSSL_ZEPHYR)
+ #include <stdio.h>
+ #define BENCH_EMBEDDED
+ #define printf printfk
+ static int printfk(const char *fmt, ...)
+ {
+ int ret;
+ char line[150];
+ va_list ap;
+
+ va_start(ap, fmt);
+
+ ret = vsnprintf(line, sizeof(line), fmt, ap);
+ line[sizeof(line)-1] = '\0';
+ printk("%s", line);
+
+ va_end(ap);
+
+ return ret;
+ }
+
+#elif defined(WOLFSSL_TELIT_M2MB)
+ #include <stdarg.h>
#include <stdio.h>
+ #include <string.h>
+ #include "m2m_log.h" /* for M2M_LOG_INFO - not standard API */
+ /* remap printf */
+ #undef printf
+ #define printf M2M_LOG_INFO
+ /* OS requires occasional sleep() */
+ #ifndef TEST_SLEEP_MS
+ #define TEST_SLEEP_MS 50
+ #endif
+ #define TEST_SLEEP() m2mb_os_taskSleep(M2MB_OS_MS2TICKS(TEST_SLEEP_MS))
+ /* don't use file system for these tests, since ./certs dir isn't loaded */
+ #undef NO_FILESYSTEM
+ #define NO_FILESYSTEM
+
+#else
+ #if defined(XMALLOC_USER) || defined(FREESCALE_MQX)
+ /* MQX classic needs for EXIT_FAILURE */
+ #include <stdlib.h> /* we're using malloc / free direct here */
+ #endif
+
+ #ifndef STRING_USER
+ #include <string.h>
+ #include <stdio.h>
+ #endif
+
+ /* enable way for customer to override test/bench printf */
+ #ifdef XPRINTF
+ #undef printf
+ #define printf XPRINTF
+ #endif
#endif
+#include <wolfssl/wolfcrypt/memory.h>
#include <wolfssl/wolfcrypt/random.h>
#include <wolfssl/wolfcrypt/des3.h>
#include <wolfssl/wolfcrypt/arc4.h>
@@ -51,68 +139,554 @@
#include <wolfssl/wolfcrypt/sha.h>
#include <wolfssl/wolfcrypt/sha256.h>
#include <wolfssl/wolfcrypt/sha512.h>
+#include <wolfssl/wolfcrypt/sha3.h>
#include <wolfssl/wolfcrypt/rsa.h>
#include <wolfssl/wolfcrypt/asn.h>
#include <wolfssl/wolfcrypt/ripemd.h>
+#include <wolfssl/wolfcrypt/cmac.h>
+#ifndef NO_HMAC
+ #include <wolfssl/wolfcrypt/hmac.h>
+#endif
+#ifndef NO_PWDBASED
+ #include <wolfssl/wolfcrypt/pwdbased.h>
+#endif
#ifdef HAVE_ECC
#include <wolfssl/wolfcrypt/ecc.h>
#endif
+#ifdef HAVE_IDEA
+ #include <wolfssl/wolfcrypt/idea.h>
+#endif
#ifdef HAVE_CURVE25519
#include <wolfssl/wolfcrypt/curve25519.h>
#endif
#ifdef HAVE_ED25519
#include <wolfssl/wolfcrypt/ed25519.h>
#endif
+#ifdef HAVE_CURVE448
+ #include <wolfssl/wolfcrypt/curve448.h>
+#endif
+#ifdef HAVE_ED448
+ #include <wolfssl/wolfcrypt/ed448.h>
+#endif
#include <wolfssl/wolfcrypt/dh.h>
-#ifdef HAVE_CAVIUM
- #include "cavium_sysdep.h"
- #include "cavium_common.h"
- #include "cavium_ioctl.h"
+#ifdef HAVE_NTRU
+ #include "libntruencrypt/ntru_crypto.h"
+#endif
+#include <wolfssl/wolfcrypt/random.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/types.h>
+
+#ifdef WOLF_CRYPTO_CB
+ #include <wolfssl/wolfcrypt/cryptocb.h>
+ #ifdef HAVE_INTEL_QA_SYNC
+ #include <wolfssl/wolfcrypt/port/intel/quickassist_sync.h>
+ #endif
+ #ifdef HAVE_CAVIUM_OCTEON_SYNC
+ #include <wolfssl/wolfcrypt/port/cavium/cavium_octeon_sync.h>
+ #endif
+#endif
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+ #include <wolfssl/wolfcrypt/async.h>
+#endif
+
+
+#ifdef WOLFSSL_STATIC_MEMORY
+ static WOLFSSL_HEAP_HINT* HEAP_HINT;
+#else
+ #define HEAP_HINT NULL
+#endif /* WOLFSSL_STATIC_MEMORY */
+
+#ifndef EXIT_FAILURE
+#define EXIT_FAILURE 1
+#endif
+
+/* optional macro to add sleep between tests */
+#ifndef TEST_SLEEP
+ /* stub the sleep macro */
+ #define TEST_SLEEP()
+#endif
+
+
+/* Bit values for each algorithm that is able to be benchmarked.
+ * Common grouping of algorithms also.
+ * Each algorithm has a unique value for its type e.g. cipher.
+ */
+/* Cipher algorithms. */
+#define BENCH_AES_CBC 0x00000001
+#define BENCH_AES_GCM 0x00000002
+#define BENCH_AES_ECB 0x00000004
+#define BENCH_AES_XTS 0x00000008
+#define BENCH_AES_CTR 0x00000010
+#define BENCH_AES_CCM 0x00000020
+#define BENCH_CAMELLIA 0x00000100
+#define BENCH_ARC4 0x00000200
+#define BENCH_HC128 0x00000400
+#define BENCH_RABBIT 0x00000800
+#define BENCH_CHACHA20 0x00001000
+#define BENCH_CHACHA20_POLY1305 0x00002000
+#define BENCH_DES 0x00004000
+#define BENCH_IDEA 0x00008000
+#define BENCH_AES_CFB 0x00010000
+#define BENCH_AES_OFB 0x00020000
+/* Digest algorithms. */
+#define BENCH_MD5 0x00000001
+#define BENCH_POLY1305 0x00000002
+#define BENCH_SHA 0x00000004
+#define BENCH_SHA224 0x00000010
+#define BENCH_SHA256 0x00000020
+#define BENCH_SHA384 0x00000040
+#define BENCH_SHA512 0x00000080
+#define BENCH_SHA2 (BENCH_SHA224 | BENCH_SHA256 | \
+ BENCH_SHA384 | BENCH_SHA512)
+#define BENCH_SHA3_224 0x00000100
+#define BENCH_SHA3_256 0x00000200
+#define BENCH_SHA3_384 0x00000400
+#define BENCH_SHA3_512 0x00000800
+#define BENCH_SHA3 (BENCH_SHA3_224 | BENCH_SHA3_256 | \
+ BENCH_SHA3_384 | BENCH_SHA3_512)
+#define BENCH_RIPEMD 0x00001000
+#define BENCH_BLAKE2B 0x00002000
+#define BENCH_BLAKE2S 0x00004000
+
+/* MAC algorithms. */
+#define BENCH_CMAC 0x00000001
+#define BENCH_HMAC_MD5 0x00000002
+#define BENCH_HMAC_SHA 0x00000004
+#define BENCH_HMAC_SHA224 0x00000010
+#define BENCH_HMAC_SHA256 0x00000020
+#define BENCH_HMAC_SHA384 0x00000040
+#define BENCH_HMAC_SHA512 0x00000080
+#define BENCH_HMAC (BENCH_HMAC_MD5 | BENCH_HMAC_SHA | \
+ BENCH_HMAC_SHA224 | BENCH_HMAC_SHA256 | \
+ BENCH_HMAC_SHA384 | BENCH_HMAC_SHA512)
+#define BENCH_PBKDF2 0x00000100
+
+/* Asymmetric algorithms. */
+#define BENCH_RSA_KEYGEN 0x00000001
+#define BENCH_RSA 0x00000002
+#define BENCH_RSA_SZ 0x00000004
+#define BENCH_DH 0x00000010
+#define BENCH_NTRU 0x00000100
+#define BENCH_NTRU_KEYGEN 0x00000200
+#define BENCH_ECC_MAKEKEY 0x00001000
+#define BENCH_ECC 0x00002000
+#define BENCH_ECC_ENCRYPT 0x00004000
+#define BENCH_CURVE25519_KEYGEN 0x00010000
+#define BENCH_CURVE25519_KA 0x00020000
+#define BENCH_ED25519_KEYGEN 0x00040000
+#define BENCH_ED25519_SIGN 0x00080000
+#define BENCH_CURVE448_KEYGEN 0x00100000
+#define BENCH_CURVE448_KA 0x00200000
+#define BENCH_ED448_KEYGEN 0x00400000
+#define BENCH_ED448_SIGN 0x00800000
+/* Other */
+#define BENCH_RNG 0x00000001
+#define BENCH_SCRYPT 0x00000002
+
+
+/* Benchmark all compiled in algorithms.
+ * When 1, ignore other benchmark algorithm values.
+ * 0, only benchmark algorithm values set.
+ */
+static int bench_all = 1;
+/* Cipher algorithms to benchmark. */
+static int bench_cipher_algs = 0;
+/* Digest algorithms to benchmark. */
+static int bench_digest_algs = 0;
+/* MAC algorithms to benchmark. */
+static int bench_mac_algs = 0;
+/* Asymmetric algorithms to benchmark. */
+static int bench_asym_algs = 0;
+/* Other cryptographic algorithms to benchmark. */
+static int bench_other_algs = 0;
+
+#if !defined(WOLFSSL_BENCHMARK_ALL) && !defined(NO_MAIN_DRIVER)
+
+/* The mapping of command line option to bit values. */
+typedef struct bench_alg {
+ /* Command line option string. */
+ const char* str;
+ /* Bit values to set. */
+ int val;
+} bench_alg;
+
+#ifndef MAIN_NO_ARGS
+/* All recognized cipher algorithm choosing command line options. */
+static const bench_alg bench_cipher_opt[] = {
+ { "-cipher", -1 },
+#ifdef HAVE_AES_CBC
+ { "-aes-cbc", BENCH_AES_CBC },
+#endif
+#ifdef HAVE_AESGCM
+ { "-aes-gcm", BENCH_AES_GCM },
+#endif
+#ifdef WOLFSSL_AES_DIRECT
+ { "-aes-ecb", BENCH_AES_ECB },
+#endif
+#ifdef WOLFSSL_AES_XTS
+ { "-aes-xts", BENCH_AES_XTS },
+#endif
+#ifdef WOLFSSL_AES_CFB
+ { "-aes-cfb", BENCH_AES_CFB },
+#endif
+#ifdef WOLFSSL_AES_OFB
+ { "-aes-ofb", BENCH_AES_OFB },
+#endif
+#ifdef WOLFSSL_AES_COUNTER
+ { "-aes-ctr", BENCH_AES_CTR },
+#endif
+#ifdef HAVE_AESCCM
+ { "-aes-ccm", BENCH_AES_CCM },
+#endif
+#ifdef HAVE_CAMELLIA
+ { "-camellia", BENCH_CAMELLIA },
+#endif
+#ifndef NO_RC4
+ { "-arc4", BENCH_ARC4 },
+#endif
+#ifdef HAVE_HC128
+ { "-hc128", BENCH_HC128 },
+#endif
+#ifndef NO_RABBIT
+ { "-rabbit", BENCH_RABBIT },
+#endif
+#ifdef HAVE_CHACHA
+ { "-chacha20", BENCH_CHACHA20 },
+#endif
+#if defined(HAVE_CHACHA) && defined(HAVE_POLY1305)
+ { "-chacha20-poly1305", BENCH_CHACHA20_POLY1305 },
+#endif
+#ifndef NO_DES3
+ { "-des", BENCH_DES },
+#endif
+#ifdef HAVE_IDEA
+ { "-idea", BENCH_IDEA },
+#endif
+ { NULL, 0}
+};
+
+/* All recognized digest algorithm choosing command line options. */
+static const bench_alg bench_digest_opt[] = {
+ { "-digest", -1 },
+#ifndef NO_MD5
+ { "-md5", BENCH_MD5 },
+#endif
+#ifdef HAVE_POLY1305
+ { "-poly1305", BENCH_POLY1305 },
+#endif
+#ifndef NO_SHA
+ { "-sha", BENCH_SHA },
+#endif
+#if defined(WOLFSSL_SHA224) || !defined(NO_SHA256) || defined(WOLFSSL_SHA384) \
+ || defined(WOLFSSL_SHA512)
+ { "-sha2", BENCH_SHA2 },
+#endif
+#ifdef WOLFSSL_SHA224
+ { "-sha224", BENCH_SHA224 },
+#endif
+#ifndef NO_SHA256
+ { "-sha256", BENCH_SHA256 },
+#endif
+#ifdef WOLFSSL_SHA384
+ { "-sha384", BENCH_SHA384 },
+#endif
+#ifdef WOLFSSL_SHA512
+ { "-sha512", BENCH_SHA512 },
+#endif
+#ifdef WOLFSSL_SHA3
+ { "-sha3", BENCH_SHA3 },
+ #ifndef WOLFSSL_NOSHA3_224
+ { "-sha3-224", BENCH_SHA3_224 },
+ #endif
+ #ifndef WOLFSSL_NOSHA3_256
+ { "-sha3-256", BENCH_SHA3_256 },
+ #endif
+ #ifndef WOLFSSL_NOSHA3_384
+ { "-sha3-384", BENCH_SHA3_384 },
+ #endif
+ #ifndef WOLFSSL_NOSHA3_512
+ { "-sha3-512", BENCH_SHA3_512 },
+ #endif
+#endif
+#ifdef WOLFSSL_RIPEMD
+ { "-ripemd", BENCH_RIPEMD },
+#endif
+#ifdef HAVE_BLAKE2
+ { "-blake2b", BENCH_BLAKE2B },
+#endif
+#ifdef HAVE_BLAKE2S
+ { "-blake2s", BENCH_BLAKE2S },
+#endif
+ { NULL, 0}
+};
+
+/* All recognized MAC algorithm choosing command line options. */
+static const bench_alg bench_mac_opt[] = {
+ { "-mac", -1 },
+#ifdef WOLFSSL_CMAC
+ { "-cmac", BENCH_CMAC },
+#endif
+#ifndef NO_HMAC
+ { "-hmac", BENCH_HMAC },
+ #ifndef NO_MD5
+ { "-hmac-md5", BENCH_HMAC_MD5 },
+ #endif
+ #ifndef NO_SHA
+ { "-hmac-sha", BENCH_HMAC_SHA },
+ #endif
+ #ifdef WOLFSSL_SHA224
+ { "-hmac-sha224", BENCH_HMAC_SHA224 },
+ #endif
+ #ifndef NO_SHA256
+ { "-hmac-sha256", BENCH_HMAC_SHA256 },
+ #endif
+ #ifdef WOLFSSL_SHA384
+ { "-hmac-sha384", BENCH_HMAC_SHA384 },
+ #endif
+ #ifdef WOLFSSL_SHA512
+ { "-hmac-sha512", BENCH_HMAC_SHA512 },
+ #endif
+ #ifndef NO_PWDBASED
+ { "-pbkdf2", BENCH_PBKDF2 },
+ #endif
+#endif
+ { NULL, 0}
+};
+
+/* All recognized asymmetric algorithm choosing command line options. */
+static const bench_alg bench_asym_opt[] = {
+ { "-asym", -1 },
+#ifndef NO_RSA
+ #ifdef WOLFSSL_KEY_GEN
+ { "-rsa-kg", BENCH_RSA_KEYGEN },
+ #endif
+ { "-rsa", BENCH_RSA },
+ { "-rsa-sz", BENCH_RSA_SZ },
+#endif
+#ifndef NO_DH
+ { "-dh", BENCH_DH },
#endif
#ifdef HAVE_NTRU
- #include "ntru_crypto.h"
+ { "-ntru", BENCH_NTRU },
+ { "-ntru-kg", BENCH_NTRU_KEYGEN },
+#endif
+#ifdef HAVE_ECC
+ { "-ecc-kg", BENCH_ECC_MAKEKEY },
+ { "-ecc", BENCH_ECC },
+ #ifdef HAVE_ECC_ENCRYPT
+ { "-ecc-enc", BENCH_ECC_ENCRYPT },
+ #endif
+#endif
+#ifdef HAVE_CURVE25519
+ { "-curve25519-kg", BENCH_CURVE25519_KEYGEN },
+ #ifdef HAVE_CURVE25519_SHARED_SECRET
+ { "-x25519", BENCH_CURVE25519_KA },
+ #endif
+#endif
+#ifdef HAVE_ED25519
+ { "-ed25519-kg", BENCH_ED25519_KEYGEN },
+ { "-ed25519", BENCH_ED25519_SIGN },
+#endif
+#ifdef HAVE_CURVE448
+ { "-curve448-kg", BENCH_CURVE448_KEYGEN },
+ #ifdef HAVE_CURVE448_SHARED_SECRET
+ { "-x448", BENCH_CURVE448_KA },
+ #endif
+#endif
+#ifdef HAVE_ED448
+ { "-ed448-kg", BENCH_ED448_KEYGEN },
+ { "-ed448", BENCH_ED448_SIGN },
+#endif
+ { NULL, 0}
+};
+
+/* All recognized other cryptographic algorithm choosing command line options.
+ */
+static const bench_alg bench_other_opt[] = {
+ { "-other", -1 },
+#ifndef WC_NO_RNG
+ { "-rng", BENCH_RNG },
+#endif
+#ifdef HAVE_SCRYPT
+ { "-scrypt", BENCH_SCRYPT },
+#endif
+ { NULL, 0}
+};
+#endif /* MAIN_NO_ARGS */
+
+#endif /* !WOLFSSL_BENCHMARK_ALL && !NO_MAIN_DRIVER */
+
+
+#ifdef HAVE_WNR
+ const char* wnrConfigFile = "wnr-example.conf";
#endif
#if defined(WOLFSSL_MDK_ARM)
- extern FILE * wolfSSL_fopen(const char *fname, const char *mode) ;
+ extern XFILE wolfSSL_fopen(const char *fname, const char *mode);
#define fopen wolfSSL_fopen
#endif
-#if defined(__GNUC__) && defined(__x86_64__) && !defined(NO_ASM)
+static int lng_index = 0;
+
+#ifndef NO_MAIN_DRIVER
+#ifndef MAIN_NO_ARGS
+static const char* bench_Usage_msg1[][16] = {
+ /* 0 English */
+ { "-? <num> Help, print this usage\n 0: English, 1: Japanese\n",
+ "-csv Print terminal output in csv format\n",
+ "-base10 Display bytes as power of 10 (eg 1 kB = 1000 Bytes)\n",
+ "-no_aad No additional authentication data passed.\n",
+ "-dgst_full Full digest operation performed.\n",
+ "-rsa_sign Measure RSA sign/verify instead of encrypt/decrypt.\n",
+ "<keySz> -rsa-sz\n Measure RSA <key size> performance.\n",
+ "-ffhdhe2048 Measure DH using FFDHE 2048-bit parameters.\n",
+ "-ffhdhe3072 Measure DH using FFDHE 3072-bit parameters.\n",
+ "-p256 Measure ECC using P-256 curve.\n",
+ "-p384 Measure ECC using P-384 curve.\n",
+ "-<alg> Algorithm to benchmark. Available algorithms include:\n",
+ "-lng <num> Display benchmark result by specified language.\n 0: English, 1: Japanese\n",
+ "<num> Size of block in bytes\n",
+ "-threads <num> Number of threads to run\n",
+ "-print Show benchmark stats summary\n"
+ },
+#ifndef NO_MULTIBYTE_PRINT
+ /* 1 Japanese */
+ { "-? <num> ヘルプ, 使い方を表示します。\n 0: 英語、 1: 日本語\n",
+ "-csv csv 形式で端末に出力します。\n",
+ "-base10 バイトを10のべき乗で表示します。(例 1 kB = 1000 Bytes)\n",
+ "-no_aad 追加の認証データを使用しません.\n",
+ "-dgst_full フルの digest 暗号操作を実施します。\n",
+ "-rsa_sign 暗号/復号化の代わりに RSA の署名/検証を測定します。\n",
+ "<keySz> -rsa-sz\n RSA <key size> の性能を測定します。\n",
+ "-ffhdhe2048 Measure DH using FFDHE 2048-bit parameters.\n",
+ "-ffhdhe3072 Measure DH using FFDHE 3072-bit parameters.\n",
+ "-p256 Measure ECC using P-256 curve.\n",
+ "-p384 Measure ECC using P-384 curve.\n",
+ "-<alg> アルゴリズムのベンチマークを実施します。\n 利用可能なアルゴリズムは下記を含みます:\n",
+ "-lng <num> 指定された言語でベンチマーク結果を表示します。\n 0: 英語、 1: 日本語\n",
+ "<num> ブロックサイズをバイト単位で指定します。\n",
+ "-threads <num> 実行するスレッド数\n",
+ "-print ベンチマーク統計の要約を表示する\n"
+ },
+#endif
+};
+#endif /* MAIN_NO_ARGS */
+#endif
+
+static const char* bench_result_words1[][4] = {
+ { "took", "seconds" , "Cycles per byte", NULL }, /* 0 English */
+#ifndef NO_MULTIBYTE_PRINT
+ { "を" , "秒で処理", "1バイトあたりのサイクル数", NULL }, /* 1 Japanese */
+#endif
+};
+
+#if !defined(NO_RSA) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_NTRU) || \
+ defined(HAVE_ECC) || !defined(NO_DH) || defined(HAVE_ECC_ENCRYPT) || \
+ defined(HAVE_CURVE25519) || defined(HAVE_CURVE25519_SHARED_SECRET) || \
+ defined(HAVE_ED25519) || defined(HAVE_CURVE448) || \
+ defined(HAVE_CURVE448_SHARED_SECRET) || defined(HAVE_ED448)
+#if defined(HAVE_ECC) || !defined(WOLFSSL_RSA_PUBLIC_ONLY) || \
+ defined(WOLFSSL_PUBLIC_MP) || !defined(NO_DH)
+
+static const char* bench_desc_words[][9] = {
+ /* 0 1 2 3 4 5 6 7 8 */
+ {"public", "private", "key gen", "agree" , "sign", "verify", "encryption", "decryption", NULL}, /* 0 English */
+#ifndef NO_MULTIBYTE_PRINT
+ {"公開鍵", "秘密鍵" ,"鍵生成" , "鍵共有" , "署名", "検証" , "暗号化" , "復号化" , NULL}, /* 1 Japanese */
+#endif
+};
+
+#endif
+#endif
+
+#if defined(__GNUC__) && defined(__x86_64__) && !defined(NO_ASM) && !defined(WOLFSSL_SGX)
#define HAVE_GET_CYCLES
- static INLINE word64 get_intel_cycles(void);
- static word64 total_cycles;
+ static WC_INLINE word64 get_intel_cycles(void);
+ static THREAD_LS_T word64 total_cycles;
+ #define INIT_CYCLE_COUNTER
#define BEGIN_INTEL_CYCLES total_cycles = get_intel_cycles();
#define END_INTEL_CYCLES total_cycles = get_intel_cycles() - total_cycles;
- #define SHOW_INTEL_CYCLES printf(" Cycles per byte = %6.2f", \
- (float)total_cycles / (numBlocks*sizeof(plain)));
+ /* s == size in bytes that 1 count represents, normally BENCH_SIZE */
+ #define SHOW_INTEL_CYCLES(b, n, s) \
+ XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), " %s = %6.2f\n", \
+ bench_result_words1[lng_index][2], \
+ count == 0 ? 0 : (float)total_cycles / ((word64)count*s))
+ #define SHOW_INTEL_CYCLES_CSV(b, n, s) \
+ XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), "%.2f,\n", \
+ count == 0 ? 0 : (float)total_cycles / ((word64)count*s))
+#elif defined(LINUX_CYCLE_COUNT)
+ #include <linux/perf_event.h>
+ #include <sys/syscall.h>
+ #include <unistd.h>
+
+ static THREAD_LS_T word64 begin_cycles;
+ static THREAD_LS_T word64 total_cycles;
+ static THREAD_LS_T int cycles = -1;
+ static THREAD_LS_T struct perf_event_attr atr;
+
+ #define INIT_CYCLE_COUNTER do { \
+ atr.type = PERF_TYPE_HARDWARE; \
+ atr.config = PERF_COUNT_HW_CPU_CYCLES; \
+ cycles = (int)syscall(__NR_perf_event_open, &atr, 0, -1, -1, 0); \
+ } while (0);
+
+ #define BEGIN_INTEL_CYCLES read(cycles, &begin_cycles, sizeof(begin_cycles));
+ #define END_INTEL_CYCLES do { \
+ read(cycles, &total_cycles, sizeof(total_cycles)); \
+ total_cycles = total_cycles - begin_cycles; \
+ } while (0);
+
+ /* s == size in bytes that 1 count represents, normally BENCH_SIZE */
+ #define SHOW_INTEL_CYCLES(b, n, s) \
+ XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), " %s = %6.2f\n", \
+ bench_result_words1[lng_index][2], \
+ (float)total_cycles / (count*s))
+ #define SHOW_INTEL_CYCLES_CSV(b, n, s) \
+ XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), "%.2f,\n", \
+ (float)total_cycles / (count*s))
+
+#elif defined(SYNERGY_CYCLE_COUNT)
+ #include "hal_data.h"
+ static THREAD_LS_T word64 begin_cycles;
+ static THREAD_LS_T word64 total_cycles;
+
+ #define INIT_CYCLE_COUNTER
+ #define BEGIN_INTEL_CYCLES begin_cycles = DWT->CYCCNT = 0;
+ #define END_INTEL_CYCLES total_cycles = DWT->CYCCNT - begin_cycles;
+
+ /* s == size in bytes that 1 count represents, normally BENCH_SIZE */
+ #define SHOW_INTEL_CYCLES(b, n, s) \
+ XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), " %s = %6.2f\n", \
+ bench_result_words1[lng_index][2], \
+ (float)total_cycles / (count*s))
+ #define SHOW_INTEL_CYCLES_CSV(b, n, s) \
+ XSNPRINTF(b + XSTRLEN(b), n - XSTRLEN(b), "%.2f,\n", \
+ (float)total_cycles / (count*s))
+
#else
+ #define INIT_CYCLE_COUNTER
#define BEGIN_INTEL_CYCLES
#define END_INTEL_CYCLES
- #define SHOW_INTEL_CYCLES
+ #define SHOW_INTEL_CYCLES(b, n, s) b[XSTRLEN(b)] = '\n'
+ #define SHOW_INTEL_CYCLES_CSV(b, n, s) b[XSTRLEN(b)] = '\n'
#endif
-/* let's use buffers, we have them */
-#if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048)
- #define USE_CERT_BUFFERS_2048
+/* determine benchmark buffer to use (if NO_FILESYSTEM) */
+#if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048) && \
+ !defined(USE_CERT_BUFFERS_3072)
+ #define USE_CERT_BUFFERS_2048 /* default to 2048 */
#endif
-#if defined(USE_CERT_BUFFERS_1024) || defined(USE_CERT_BUFFERS_2048) \
- || !defined(NO_DH)
+#if defined(USE_CERT_BUFFERS_1024) || defined(USE_CERT_BUFFERS_2048) || \
+ defined(USE_CERT_BUFFERS_3072) || !defined(NO_DH)
/* include test cert and key buffers for use with NO_FILESYSTEM */
- #if defined(WOLFSSL_MDK_ARM)
- #include "cert_data.h" /* use certs_test.c for initial data,
- so other commands can share the data. */
- #else
- #include <wolfssl/certs_test.h>
- #endif
+ #include <wolfssl/certs_test.h>
#endif
-
-#ifdef HAVE_BLAKE2
+#if defined(HAVE_BLAKE2) || defined(HAVE_BLAKE2S)
#include <wolfssl/wolfcrypt/blake2.h>
- void bench_blake2(void);
#endif
#ifdef _MSC_VER
@@ -121,469 +695,2060 @@
#endif
-void bench_des(void);
-void bench_arc4(void);
-void bench_hc128(void);
-void bench_rabbit(void);
-void bench_chacha(void);
-void bench_chacha20_poly1305_aead(void);
-void bench_aes(int);
-void bench_aesgcm(void);
-void bench_aesccm(void);
-void bench_aesctr(void);
-void bench_poly1305(void);
-void bench_camellia(void);
-
-void bench_md5(void);
-void bench_sha(void);
-void bench_sha256(void);
-void bench_sha384(void);
-void bench_sha512(void);
-void bench_ripemd(void);
-
-void bench_rsa(void);
-void bench_rsaKeyGen(void);
-void bench_dh(void);
-#ifdef HAVE_ECC
-void bench_eccKeyGen(void);
-void bench_eccKeyAgree(void);
+#ifdef WOLFSSL_CURRTIME_REMAP
+ #define current_time WOLFSSL_CURRTIME_REMAP
+#elif !defined(HAVE_STACK_SIZE)
+ double current_time(int);
#endif
-#ifdef HAVE_CURVE25519
-void bench_curve25519KeyGen(void);
-void bench_curve25519KeyAgree(void);
+
+#if defined(DEBUG_WOLFSSL) && !defined(HAVE_VALGRIND) && \
+ !defined(HAVE_STACK_SIZE)
+#ifdef __cplusplus
+ extern "C" {
#endif
-#ifdef HAVE_ED25519
-void bench_ed25519KeyGen(void);
-void bench_ed25519KeySign(void);
+ WOLFSSL_API int wolfSSL_Debugging_ON(void);
+ WOLFSSL_API void wolfSSL_Debugging_OFF(void);
+#ifdef __cplusplus
+ } /* extern "C" */
#endif
-#ifdef HAVE_NTRU
-void bench_ntru(void);
-void bench_ntruKeyGen(void);
#endif
-double current_time(int);
+#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || !defined(NO_DH) \
+ || defined(WOLFSSL_KEY_GEN) || defined(HAVE_ECC) \
+ || defined(HAVE_CURVE25519) || defined(HAVE_ED25519) \
+ || defined(HAVE_CURVE448) || defined(HAVE_ED448)
+ #define HAVE_LOCAL_RNG
+ static THREAD_LS_T WC_RNG gRng;
+#endif
+#if defined(HAVE_ED25519) || defined(HAVE_CURVE25519) || \
+ defined(HAVE_CURVE448) || defined(HAVE_ED448) || \
+ defined(HAVE_ECC) || defined(HAVE_NTRU) || !defined(NO_DH) || \
+ !defined(NO_RSA) || defined(HAVE_SCRYPT)
+ #define BENCH_ASYM
+#endif
-#ifdef HAVE_CAVIUM
+#if defined(BENCH_ASYM)
+#if defined(HAVE_ECC) || !defined(WOLFSSL_RSA_PUBLIC_ONLY) || \
+ defined(WOLFSSL_PUBLIC_MP) || !defined(NO_DH)
+static const char* bench_result_words2[][5] = {
+ { "ops took", "sec" , "avg" , "ops/sec", NULL }, /* 0 English */
+#ifndef NO_MULTIBYTE_PRINT
+ { "回処理を", "秒で実施", "平均", "処理/秒", NULL }, /* 1 Japanese */
+#endif
+};
+#endif
+#endif
-static int OpenNitroxDevice(int dma_mode,int dev_id)
-{
- Csp1CoreAssignment core_assign;
- Uint32 device;
+/* Asynchronous helper macros */
+static THREAD_LS_T int devId = INVALID_DEVID;
- if (CspInitialize(CAVIUM_DIRECT,CAVIUM_DEV_ID))
- return -1;
- if (Csp1GetDevType(&device))
- return -1;
- if (device != NPX_DEVICE) {
- if (ioctl(gpkpdev_hdlr[CAVIUM_DEV_ID], IOCTL_CSP1_GET_CORE_ASSIGNMENT,
- (Uint32 *)&core_assign)!= 0)
- return -1;
- }
- CspShutdown(CAVIUM_DEV_ID);
+#ifdef WOLFSSL_ASYNC_CRYPT
+ static WOLF_EVENT_QUEUE eventQueue;
- return CspInitialize(dma_mode, dev_id);
-}
+ #define BENCH_ASYNC_GET_DEV(obj) (&(obj)->asyncDev)
+ #define BENCH_ASYNC_GET_NAME(doAsync) (doAsync) ? "HW" : "SW"
+ #define BENCH_MAX_PENDING (WOLF_ASYNC_MAX_PENDING)
+#ifndef WC_NO_ASYNC_THREADING
+ typedef struct ThreadData {
+ pthread_t thread_id;
+ } ThreadData;
+ static ThreadData* g_threadData;
+ static int g_threadCount;
#endif
-#if defined(DEBUG_WOLFSSL) && !defined(HAVE_VALGRIND)
- WOLFSSL_API int wolfSSL_Debugging_ON();
-#endif
+ static int bench_async_check(int* ret, WC_ASYNC_DEV* asyncDev,
+ int callAgain, int* times, int limit, int* pending)
+ {
+ int allowNext = 0;
+
+ /* this state can be set from a different thread */
+ WOLF_EVENT_STATE state = asyncDev->event.state;
+
+ /* if algo doesn't require calling again then use this flow */
+ if (state == WOLF_EVENT_STATE_DONE) {
+ if (callAgain) {
+ /* needs called again, so allow it and handle completion in bench_async_handle */
+ allowNext = 1;
+ }
+ else {
+ *ret = asyncDev->event.ret;
+ asyncDev->event.state = WOLF_EVENT_STATE_READY;
+ (*times)++;
+ if (*pending > 0) /* to support case where async blocks */
+ (*pending)--;
+
+ if ((*times + *pending) < limit)
+ allowNext = 1;
+ }
+ }
-#if !defined(NO_RSA) || !defined(NO_DH) \
- || defined(WOLFSSL_KEYGEN) || defined(HAVE_ECC)
- #define HAVE_LOCAL_RNG
- static RNG rng;
-#endif
+ /* if slot is available and we haven't reached limit, start another */
+ else if (state == WOLF_EVENT_STATE_READY && (*times + *pending) < limit) {
+ allowNext = 1;
+ }
+
+ return allowNext;
+ }
+
+ static int bench_async_handle(int* ret, WC_ASYNC_DEV* asyncDev,
+ int callAgain, int* times, int* pending)
+ {
+ WOLF_EVENT_STATE state = asyncDev->event.state;
+
+ if (*ret == WC_PENDING_E) {
+ if (state == WOLF_EVENT_STATE_DONE) {
+ *ret = asyncDev->event.ret;
+ asyncDev->event.state = WOLF_EVENT_STATE_READY;
+ (*times)++;
+ (*pending)--;
+ }
+ else {
+ (*pending)++;
+ *ret = wc_AsyncHandle(asyncDev, &eventQueue,
+ callAgain ? WC_ASYNC_FLAG_CALL_AGAIN : WC_ASYNC_FLAG_NONE);
+ }
+ }
+ else if (*ret >= 0) {
+ *ret = asyncDev->event.ret;
+ asyncDev->event.state = WOLF_EVENT_STATE_READY;
+ (*times)++;
+ if (*pending > 0) /* to support case where async blocks */
+ (*pending)--;
+ }
+
+ return (*ret >= 0) ? 1 : 0;
+ }
+
+ static WC_INLINE int bench_async_poll(int* pending)
+ {
+ int ret, asyncDone = 0;
+
+ ret = wolfAsync_EventQueuePoll(&eventQueue, NULL, NULL, 0,
+ WOLF_POLL_FLAG_CHECK_HW, &asyncDone);
+ if (ret != 0) {
+ printf("Async poll failed %d\n", ret);
+ return ret;
+ }
+
+ if (asyncDone == 0) {
+ #ifndef WC_NO_ASYNC_THREADING
+ /* give time to other threads */
+ wc_AsyncThreadYield();
+ #endif
+ }
+
+ (void)pending;
+
+ return asyncDone;
+ }
-/* use kB instead of mB for embedded benchmarking */
-#ifdef BENCH_EMBEDDED
- static byte plain [1024];
#else
- static byte plain [1024*1024];
+ #define BENCH_MAX_PENDING (1)
+ #define BENCH_ASYNC_GET_NAME(doAsync) ""
+ #define BENCH_ASYNC_GET_DEV(obj) NULL
+
+ static WC_INLINE int bench_async_check(int* ret, void* asyncDev,
+ int callAgain, int* times, int limit, int* pending)
+ {
+ (void)ret;
+ (void)asyncDev;
+ (void)callAgain;
+ (void)times;
+ (void)limit;
+ (void)pending;
+
+ return 1;
+ }
+
+ static WC_INLINE int bench_async_handle(int* ret, void* asyncDev,
+ int callAgain, int* times, int* pending)
+ {
+ (void)asyncDev;
+ (void)callAgain;
+ (void)pending;
+
+ if (*ret >= 0) {
+ /* operation completed */
+ (*times)++;
+ return 1;
+ }
+ return 0;
+ }
+ #define bench_async_poll(p)
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+
+
+/* maximum runtime for each benchmark */
+#define BENCH_MIN_RUNTIME_SEC 1.0f
+
+
+#if defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
+ #define AES_AUTH_ADD_SZ 13
+ #define AES_AUTH_TAG_SZ 16
+ #define BENCH_CIPHER_ADD AES_AUTH_TAG_SZ
+ static word32 aesAuthAddSz = AES_AUTH_ADD_SZ;
+#endif
+#ifndef BENCH_CIPHER_ADD
+ #define BENCH_CIPHER_ADD 0
#endif
/* use kB instead of mB for embedded benchmarking */
#ifdef BENCH_EMBEDDED
- static byte cipher[1024];
+ enum BenchmarkBounds {
+ scryptCnt = 1,
+ ntimes = 2,
+ genTimes = BENCH_MAX_PENDING,
+ agreeTimes = 2
+ };
+ static int numBlocks = 25; /* how many kB to test (en/de)cryption */
+ static word32 bench_size = (1024ul);
#else
- static byte cipher[1024*1024];
+ enum BenchmarkBounds {
+ scryptCnt = 10,
+ ntimes = 100,
+ genTimes = BENCH_MAX_PENDING, /* must be at least BENCH_MAX_PENDING */
+ agreeTimes = 100
+ };
+ static int numBlocks = 5; /* how many megs to test (en/de)cryption */
+ static word32 bench_size = (1024*1024ul);
+#endif
+static int base2 = 1;
+static int digest_stream = 1;
+#ifndef NO_RSA
+/* Don't measure RSA sign/verify by default */
+static int rsa_sign_verify = 0;
+#endif
+#ifndef NO_DH
+/* Use the FFDHE parameters */
+static int use_ffdhe = 0;
+#endif
+
+/* Don't print out in CSV format by default */
+static int csv_format = 0;
+#ifdef BENCH_ASYM
+static int csv_header_count = 0;
#endif
+/* for compatibility */
+#define BENCH_SIZE bench_size
-static const XGEN_ALIGN byte key[] =
+/* globals for cipher tests */
+static THREAD_LS_T byte* bench_plain = NULL;
+static THREAD_LS_T byte* bench_cipher = NULL;
+
+static const XGEN_ALIGN byte bench_key_buf[] =
{
0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,
0xfe,0xde,0xba,0x98,0x76,0x54,0x32,0x10,
- 0x89,0xab,0xcd,0xef,0x01,0x23,0x45,0x67
+ 0x89,0xab,0xcd,0xef,0x01,0x23,0x45,0x67,
+ 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef
};
-static const XGEN_ALIGN byte iv[] =
+static const XGEN_ALIGN byte bench_iv_buf[] =
{
0x12,0x34,0x56,0x78,0x90,0xab,0xcd,0xef,
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
0x11,0x21,0x31,0x41,0x51,0x61,0x71,0x81
};
+static THREAD_LS_T byte* bench_key = NULL;
+static THREAD_LS_T byte* bench_iv = NULL;
+#ifdef WOLFSSL_STATIC_MEMORY
+ #ifdef BENCH_EMBEDDED
+ static byte gBenchMemory[50000];
+ #else
+ static byte gBenchMemory[400000];
+ #endif
+#endif
-/* so embedded projects can pull in tests on their own */
-#if !defined(NO_MAIN_DRIVER)
-
-int main(int argc, char** argv)
+/* This code handles cases with systems where static (non cost) ram variables
+ aren't properly initialized with data */
+static int gBenchStaticInit = 0;
+static void benchmark_static_init(void)
{
- (void)argc;
- (void)argv;
+ if (gBenchStaticInit == 0) {
+ gBenchStaticInit = 1;
+
+ /* Init static variables */
+ bench_all = 1;
+ #ifdef BENCH_EMBEDDED
+ numBlocks = 25; /* how many kB to test (en/de)cryption */
+ bench_size = (1024ul);
+ #else
+ numBlocks = 5; /* how many megs to test (en/de)cryption */
+ bench_size = (1024*1024ul);
+ #endif
+ #if defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
+ aesAuthAddSz = AES_AUTH_ADD_SZ;
+ #endif
+ base2 = 1;
+ digest_stream = 1;
+ }
+}
+
+
+
+/******************************************************************************/
+/* Begin Stats Functions */
+/******************************************************************************/
+static int gPrintStats = 0;
+typedef enum bench_stat_type {
+ BENCH_STAT_ASYM,
+ BENCH_STAT_SYM,
+} bench_stat_type_t;
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_NO_ASYNC_THREADING)
+ typedef struct bench_stats {
+ struct bench_stats* next;
+ struct bench_stats* prev;
+ const char* algo;
+ const char* desc;
+ double perfsec;
+ int strength;
+ int doAsync;
+ int finishCount;
+ bench_stat_type_t type;
+ int lastRet;
+ const char* perftype;
+ } bench_stats_t;
+ static bench_stats_t* bench_stats_head;
+ static bench_stats_t* bench_stats_tail;
+ static pthread_mutex_t bench_lock = PTHREAD_MUTEX_INITIALIZER;
+
+ static bench_stats_t* bench_stats_add(bench_stat_type_t type,
+ const char* algo, int strength, const char* desc, int doAsync,
+ double perfsec, const char* perftype, int ret)
+ {
+ bench_stats_t* bstat;
+
+ /* protect bench_stats_head and bench_stats_tail access */
+ pthread_mutex_lock(&bench_lock);
+
+ /* locate existing in list */
+ for (bstat = bench_stats_head; bstat != NULL; bstat = bstat->next) {
+ /* match based on algo, strength and desc */
+ if (bstat->algo == algo && bstat->strength == strength && bstat->desc == desc && bstat->doAsync == doAsync) {
+ break;
+ }
+ }
+
+ if (bstat == NULL) {
+ /* allocate new and put on list */
+ bstat = (bench_stats_t*)XMALLOC(sizeof(bench_stats_t), NULL, DYNAMIC_TYPE_INFO);
+ if (bstat) {
+ XMEMSET(bstat, 0, sizeof(bench_stats_t));
+
+ /* add to list */
+ bstat->next = NULL;
+ if (bench_stats_tail == NULL) {
+ bench_stats_head = bstat;
+ }
+ else {
+ bench_stats_tail->next = bstat;
+ bstat->prev = bench_stats_tail;
+ }
+ bench_stats_tail = bstat; /* add to the end either way */
+ }
+ }
+
+ if (bstat) {
+ bstat->type = type;
+ bstat->algo = algo;
+ bstat->strength = strength;
+ bstat->desc = desc;
+ bstat->doAsync = doAsync;
+ bstat->perfsec += perfsec;
+ bstat->finishCount++;
+ bstat->perftype = perftype;
+ if (bstat->lastRet > ret)
+ bstat->lastRet = ret; /* track last error */
+
+ pthread_mutex_unlock(&bench_lock);
+
+ /* wait until remaining are complete */
+ while (bstat->finishCount < g_threadCount) {
+ wc_AsyncThreadYield();
+ }
+ }
+ else {
+ pthread_mutex_unlock(&bench_lock);
+ }
+
+ return bstat;
+ }
+
+ void bench_stats_print(void)
+ {
+ bench_stats_t* bstat;
+
+ /* protect bench_stats_head and bench_stats_tail access */
+ pthread_mutex_lock(&bench_lock);
+
+ for (bstat = bench_stats_head; bstat != NULL; ) {
+ if (bstat->type == BENCH_STAT_SYM) {
+ printf("%-16s%s %8.3f %s/s\n", bstat->desc,
+ BENCH_ASYNC_GET_NAME(bstat->doAsync), bstat->perfsec,
+ base2 ? "MB" : "mB");
+ }
+ else {
+ printf("%-5s %4d %-9s %s %.3f ops/sec\n",
+ bstat->algo, bstat->strength, bstat->desc,
+ BENCH_ASYNC_GET_NAME(bstat->doAsync), bstat->perfsec);
+ }
+
+ bstat = bstat->next;
+ }
+
+ pthread_mutex_unlock(&bench_lock);
+ }
+
#else
-int benchmark_test(void *args)
+
+ typedef struct bench_stats {
+ const char* algo;
+ const char* desc;
+ double perfsec;
+ const char* perftype;
+ int strength;
+ bench_stat_type_t type;
+ int ret;
+ } bench_stats_t;
+ #define MAX_BENCH_STATS 50
+ static bench_stats_t gStats[MAX_BENCH_STATS];
+ static int gStatsCount;
+
+ static bench_stats_t* bench_stats_add(bench_stat_type_t type,
+ const char* algo, int strength, const char* desc, int doAsync,
+ double perfsec, const char* perftype, int ret)
+ {
+ bench_stats_t* bstat = NULL;
+ if (gStatsCount >= MAX_BENCH_STATS)
+ return bstat;
+
+ bstat = &gStats[gStatsCount++];
+ bstat->algo = algo;
+ bstat->desc = desc;
+ bstat->perfsec = perfsec;
+ bstat->perftype = perftype;
+ bstat->strength = strength;
+ bstat->type = type;
+ bstat->ret = ret;
+
+ (void)doAsync;
+
+ return bstat;
+ }
+
+ void bench_stats_print(void)
+ {
+ int i;
+ bench_stats_t* bstat;
+ for (i=0; i<gStatsCount; i++) {
+ bstat = &gStats[i];
+ if (bstat->type == BENCH_STAT_SYM) {
+ printf("%-16s %8.3f %s/s\n", bstat->desc, bstat->perfsec,
+ base2 ? "MB" : "mB");
+ }
+ else {
+ printf("%-5s %4d %-9s %.3f ops/sec\n",
+ bstat->algo, bstat->strength, bstat->desc, bstat->perfsec);
+ }
+ }
+ }
+#endif /* WOLFSSL_ASYNC_CRYPT && !WC_NO_ASYNC_THREADING */
+
+static WC_INLINE void bench_stats_init(void)
{
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_NO_ASYNC_THREADING)
+ bench_stats_head = NULL;
+ bench_stats_tail = NULL;
#endif
+ INIT_CYCLE_COUNTER
+}
- #if defined(DEBUG_WOLFSSL) && !defined(HAVE_VALGRIND)
- wolfSSL_Debugging_ON();
- #endif
+static WC_INLINE void bench_stats_start(int* count, double* start)
+{
+ *count = 0;
+ *start = current_time(1);
+ BEGIN_INTEL_CYCLES
+}
- (void)plain;
- (void)cipher;
- (void)key;
- (void)iv;
+static WC_INLINE int bench_stats_sym_check(double start)
+{
+ return ((current_time(0) - start) < BENCH_MIN_RUNTIME_SEC);
+}
- #ifdef HAVE_CAVIUM
- int ret = OpenNitroxDevice(CAVIUM_DIRECT, CAVIUM_DEV_ID);
- if (ret != 0) {
- printf("Cavium OpenNitroxDevice failed\n");
- exit(-1);
+
+/* countSz is number of bytes that 1 count represents. Normally bench_size,
+ * except for AES direct that operates on AES_BLOCK_SIZE blocks */
+static void bench_stats_sym_finish(const char* desc, int doAsync, int count,
+ int countSz, double start, int ret)
+{
+ double total, persec = 0, blocks = count;
+ const char* blockType;
+ char msg[128] = {0};
+ const char** word = bench_result_words1[lng_index];
+
+ END_INTEL_CYCLES
+ total = current_time(0) - start;
+
+ /* calculate actual bytes */
+ blocks *= countSz;
+
+ if (base2) {
+ /* determine if we should show as KB or MB */
+ if (blocks > (1024ul * 1024ul)) {
+ blocks /= (1024ul * 1024ul);
+ blockType = "MB";
+ }
+ else if (blocks > 1024) {
+ blocks /= 1024; /* make KB */
+ blockType = "KB";
+ }
+ else {
+ blockType = "bytes";
+ }
}
-#endif /* HAVE_CAVIUM */
+ else {
+ /* determine if we should show as kB or mB */
+ if (blocks > (1000ul * 1000ul)) {
+ blocks /= (1000ul * 1000ul);
+ blockType = "mB";
+ }
+ else if (blocks > 1000) {
+ blocks /= 1000; /* make kB */
+ blockType = "kB";
+ }
+ else {
+ blockType = "bytes";
+ }
+ }
+
+ /* caclulcate blocks per second */
+ if (total > 0) {
+ persec = (1 / total) * blocks;
+ }
+
+ /* format and print to terminal */
+ if (csv_format == 1) {
+ XSNPRINTF(msg, sizeof(msg), "%s,%.3f,", desc, persec);
+ SHOW_INTEL_CYCLES_CSV(msg, sizeof(msg), countSz);
+ } else {
+ XSNPRINTF(msg, sizeof(msg), "%-16s%s %5.0f %s %s %5.3f %s, %8.3f %s/s",
+ desc, BENCH_ASYNC_GET_NAME(doAsync), blocks, blockType, word[0], total, word[1],
+ persec, blockType);
+ SHOW_INTEL_CYCLES(msg, sizeof(msg), countSz);
+ }
+ printf("%s", msg);
+
+ /* show errors */
+ if (ret < 0) {
+ printf("Benchmark %s failed: %d\n", desc, ret);
+ }
+
+ /* Add to thread stats */
+ bench_stats_add(BENCH_STAT_SYM, NULL, 0, desc, doAsync, persec, blockType, ret);
+
+ (void)doAsync;
+ (void)ret;
+
+ TEST_SLEEP();
+}
+
+#ifdef BENCH_ASYM
+#if defined(HAVE_ECC) || !defined(WOLFSSL_RSA_PUBLIC_ONLY) || \
+ defined(WOLFSSL_PUBLIC_MP) || !defined(NO_DH)
+static void bench_stats_asym_finish(const char* algo, int strength,
+ const char* desc, int doAsync, int count, double start, int ret)
+{
+ double total, each = 0, opsSec, milliEach;
+ const char **word = bench_result_words2[lng_index];
+ const char* kOpsSec = "Ops/Sec";
+ char msg[128] = {0};
+
+ total = current_time(0) - start;
+ if (count > 0)
+ each = total / count; /* per second */
+ opsSec = count / total; /* ops second */
+ milliEach = each * 1000; /* milliseconds */
+
+ /* format and print to terminal */
+ if (csv_format == 1) {
+ /* only print out header once */
+ if (csv_header_count == 1) {
+ printf("\nAsymmetric Ciphers:\n\n");
+ printf("Algorithm,avg ms,ops/sec,\n");
+ csv_header_count++;
+ }
+ XSNPRINTF(msg, sizeof(msg), "%s %d %s,%.3f,%.3f,\n", algo, strength, desc, milliEach, opsSec);
+ } else {
+ XSNPRINTF(msg, sizeof(msg), "%-6s %5d %-9s %s %6d %s %5.3f %s, %s %5.3f ms,"
+ " %.3f %s\n", algo, strength, desc, BENCH_ASYNC_GET_NAME(doAsync),
+ count, word[0], total, word[1], word[2], milliEach, opsSec, word[3]);
+ }
+ printf("%s", msg);
+
+ /* show errors */
+ if (ret < 0) {
+ printf("Benchmark %s %s %d failed: %d\n", algo, desc, strength, ret);
+ }
+
+ /* Add to thread stats */
+ bench_stats_add(BENCH_STAT_ASYM, algo, strength, desc, doAsync, opsSec, kOpsSec, ret);
+
+ (void)doAsync;
+ (void)ret;
+
+ TEST_SLEEP();
+}
+#endif
+#endif /* BENCH_ASYM */
+
+static WC_INLINE void bench_stats_free(void)
+{
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_NO_ASYNC_THREADING)
+ bench_stats_t* bstat;
+ for (bstat = bench_stats_head; bstat != NULL; ) {
+ bench_stats_t* next = bstat->next;
+ XFREE(bstat, NULL, DYNAMIC_TYPE_INFO);
+ bstat = next;
+ }
+ bench_stats_head = NULL;
+ bench_stats_tail = NULL;
+#endif
+}
+/******************************************************************************/
+/* End Stats Functions */
+/******************************************************************************/
+
+
+static void* benchmarks_do(void* args)
+{
+ int bench_buf_size;
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+#ifndef WC_NO_ASYNC_THREADING
+ ThreadData* threadData = (ThreadData*)args;
+
+ if (wolfAsync_DevOpenThread(&devId, &threadData->thread_id) < 0)
+#else
+ if (wolfAsync_DevOpen(&devId) < 0)
+#endif
+ {
+ printf("Async device open failed\nRunning without async\n");
+ }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+ (void)args;
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+ if (wolfEventQueue_Init(&eventQueue) != 0) {
+ printf("Async event queue init failure!\n");
+ }
+#endif
+
+#ifdef WOLF_CRYPTO_CB
+#ifdef HAVE_INTEL_QA_SYNC
+ devId = wc_CryptoCb_InitIntelQa();
+ if (devId == INVALID_DEVID) {
+ printf("Couldn't init the Intel QA\n");
+ }
+#endif
+#ifdef HAVE_CAVIUM_OCTEON_SYNC
+ devId = wc_CryptoCb_InitOcteon();
+ if (devId == INVALID_DEVID) {
+ printf("Couldn't get the Octeon device ID\n");
+ }
+#endif
+#endif
#if defined(HAVE_LOCAL_RNG)
{
- int rngRet = wc_InitRng(&rng);
+ int rngRet;
+
+#ifndef HAVE_FIPS
+ rngRet = wc_InitRng_ex(&gRng, HEAP_HINT, devId);
+#else
+ rngRet = wc_InitRng(&gRng);
+#endif
if (rngRet < 0) {
printf("InitRNG failed\n");
- return rngRet;
+ return NULL;
}
}
#endif
+ /* setup bench plain, cipher, key and iv globals */
+ /* make sure bench buffer is multiple of 16 (AES block size) */
+ bench_buf_size = (int)bench_size + BENCH_CIPHER_ADD;
+ if (bench_buf_size % 16)
+ bench_buf_size += 16 - (bench_buf_size % 16);
+
+#ifdef WOLFSSL_AFALG_XILINX_AES
+ bench_plain = (byte*)aligned_alloc(64, (size_t)bench_buf_size + 16);
+ bench_cipher = (byte*)aligned_alloc(64, (size_t)bench_buf_size + 16);
+#else
+ bench_plain = (byte*)XMALLOC((size_t)bench_buf_size + 16, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+ bench_cipher = (byte*)XMALLOC((size_t)bench_buf_size + 16, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+#endif
+ if (bench_plain == NULL || bench_cipher == NULL) {
+ XFREE(bench_plain, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+ XFREE(bench_cipher, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+ bench_plain = bench_cipher = NULL;
+
+ printf("Benchmark block buffer alloc failed!\n");
+ goto exit;
+ }
+ XMEMSET(bench_plain, 0, (size_t)bench_buf_size);
+ XMEMSET(bench_cipher, 0, (size_t)bench_buf_size);
+
+#if defined(WOLFSSL_ASYNC_CRYPT) || defined(HAVE_INTEL_QA_SYNC)
+ bench_key = (byte*)XMALLOC(sizeof(bench_key_buf), HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+ bench_iv = (byte*)XMALLOC(sizeof(bench_iv_buf), HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+ if (bench_key == NULL || bench_iv == NULL) {
+ XFREE(bench_key, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+ XFREE(bench_iv, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+ bench_key = bench_iv = NULL;
+
+ printf("Benchmark cipher buffer alloc failed!\n");
+ goto exit;
+ }
+ XMEMCPY(bench_key, bench_key_buf, sizeof(bench_key_buf));
+ XMEMCPY(bench_iv, bench_iv_buf, sizeof(bench_iv_buf));
+#else
+ bench_key = (byte*)bench_key_buf;
+ bench_iv = (byte*)bench_iv_buf;
+#endif
+
+#ifndef WC_NO_RNG
+ if (bench_all || (bench_other_algs & BENCH_RNG))
+ bench_rng();
+#endif /* WC_NO_RNG */
#ifndef NO_AES
- bench_aes(0);
- bench_aes(1);
+#ifdef HAVE_AES_CBC
+ if (bench_all || (bench_cipher_algs & BENCH_AES_CBC)) {
+ #ifndef NO_SW_BENCH
+ bench_aescbc(0);
+ #endif
+ #if ((defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)) || \
+ defined(HAVE_INTEL_QA_SYNC) || defined(HAVE_CAVIUM_OCTEON_SYNC)) && \
+ !defined(NO_HW_BENCH)
+ bench_aescbc(1);
+ #endif
+ }
#endif
#ifdef HAVE_AESGCM
- bench_aesgcm();
+ if (bench_all || (bench_cipher_algs & BENCH_AES_GCM)) {
+ #ifndef NO_SW_BENCH
+ bench_aesgcm(0);
+ #endif
+ #if ((defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)) || \
+ defined(HAVE_INTEL_QA_SYNC) || defined(HAVE_CAVIUM_OCTEON_SYNC)) && \
+ !defined(NO_HW_BENCH)
+ bench_aesgcm(1);
+ #endif
+ }
+#endif
+#ifdef WOLFSSL_AES_DIRECT
+ if (bench_all || (bench_cipher_algs & BENCH_AES_ECB)) {
+ #ifndef NO_SW_BENCH
+ bench_aesecb(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) && \
+ !defined(NO_HW_BENCH)
+ bench_aesecb(1);
+ #endif
+ }
+#endif
+#ifdef WOLFSSL_AES_XTS
+ if (bench_all || (bench_cipher_algs & BENCH_AES_XTS))
+ bench_aesxts();
+#endif
+#ifdef WOLFSSL_AES_CFB
+ if (bench_all || (bench_cipher_algs & BENCH_AES_CFB))
+ bench_aescfb();
+#endif
+#ifdef WOLFSSL_AES_OFB
+ if (bench_all || (bench_cipher_algs & BENCH_AES_OFB))
+ bench_aesofb();
#endif
-
#ifdef WOLFSSL_AES_COUNTER
- bench_aesctr();
+ if (bench_all || (bench_cipher_algs & BENCH_AES_CTR))
+ bench_aesctr();
#endif
-
#ifdef HAVE_AESCCM
- bench_aesccm();
+ if (bench_all || (bench_cipher_algs & BENCH_AES_CCM))
+ bench_aesccm();
#endif
+#endif /* !NO_AES */
+
#ifdef HAVE_CAMELLIA
- bench_camellia();
+ if (bench_all || (bench_cipher_algs & BENCH_CAMELLIA))
+ bench_camellia();
#endif
#ifndef NO_RC4
- bench_arc4();
+ if (bench_all || (bench_cipher_algs & BENCH_ARC4)) {
+ #ifndef NO_SW_BENCH
+ bench_arc4(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ARC4) && \
+ !defined(NO_HW_BENCH)
+ bench_arc4(1);
+ #endif
+ }
#endif
#ifdef HAVE_HC128
- bench_hc128();
+ if (bench_all || (bench_cipher_algs & BENCH_HC128))
+ bench_hc128();
#endif
#ifndef NO_RABBIT
- bench_rabbit();
+ if (bench_all || (bench_cipher_algs & BENCH_RABBIT))
+ bench_rabbit();
#endif
#ifdef HAVE_CHACHA
- bench_chacha();
+ if (bench_all || (bench_cipher_algs & BENCH_CHACHA20))
+ bench_chacha();
#endif
#if defined(HAVE_CHACHA) && defined(HAVE_POLY1305)
- bench_chacha20_poly1305_aead();
+ if (bench_all || (bench_cipher_algs & BENCH_CHACHA20_POLY1305))
+ bench_chacha20_poly1305_aead();
#endif
#ifndef NO_DES3
- bench_des();
+ if (bench_all || (bench_cipher_algs & BENCH_DES)) {
+ #ifndef NO_SW_BENCH
+ bench_des(0);
+ #endif
+ #if ((defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)) || \
+ defined(HAVE_INTEL_QA_SYNC) || defined(HAVE_CAVIUM_OCTEON_SYNC)) && \
+ !defined(NO_HW_BENCH)
+ bench_des(1);
+ #endif
+ }
+#endif
+#ifdef HAVE_IDEA
+ if (bench_all || (bench_cipher_algs & BENCH_IDEA))
+ bench_idea();
#endif
-
- printf("\n");
#ifndef NO_MD5
- bench_md5();
+ if (bench_all || (bench_digest_algs & BENCH_MD5)) {
+ #ifndef NO_SW_BENCH
+ bench_md5(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5) && \
+ !defined(NO_HW_BENCH)
+ bench_md5(1);
+ #endif
+ }
#endif
#ifdef HAVE_POLY1305
- bench_poly1305();
+ if (bench_all || (bench_digest_algs & BENCH_POLY1305))
+ bench_poly1305();
#endif
#ifndef NO_SHA
- bench_sha();
+ if (bench_all || (bench_digest_algs & BENCH_SHA)) {
+ #ifndef NO_SW_BENCH
+ bench_sha(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA) && \
+ !defined(NO_HW_BENCH)
+ bench_sha(1);
+ #endif
+ }
+#endif
+#ifdef WOLFSSL_SHA224
+ if (bench_all || (bench_digest_algs & BENCH_SHA224)) {
+ #ifndef NO_SW_BENCH
+ bench_sha224(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224) && \
+ !defined(NO_HW_BENCH)
+ bench_sha224(1);
+ #endif
+ }
#endif
#ifndef NO_SHA256
- bench_sha256();
+ if (bench_all || (bench_digest_algs & BENCH_SHA256)) {
+ #ifndef NO_SW_BENCH
+ bench_sha256(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256) && \
+ !defined(NO_HW_BENCH)
+ bench_sha256(1);
+ #endif
+ }
#endif
#ifdef WOLFSSL_SHA384
- bench_sha384();
+ if (bench_all || (bench_digest_algs & BENCH_SHA384)) {
+ #ifndef NO_SW_BENCH
+ bench_sha384(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384) && \
+ !defined(NO_HW_BENCH)
+ bench_sha384(1);
+ #endif
+ }
#endif
#ifdef WOLFSSL_SHA512
- bench_sha512();
+ if (bench_all || (bench_digest_algs & BENCH_SHA512)) {
+ #ifndef NO_SW_BENCH
+ bench_sha512(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512) && \
+ !defined(NO_HW_BENCH)
+ bench_sha512(1);
+ #endif
+ }
+#endif
+#ifdef WOLFSSL_SHA3
+ #ifndef WOLFSSL_NOSHA3_224
+ if (bench_all || (bench_digest_algs & BENCH_SHA3_224)) {
+ #ifndef NO_SW_BENCH
+ bench_sha3_224(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA3) && \
+ !defined(NO_HW_BENCH)
+ bench_sha3_224(1);
+ #endif
+ }
+ #endif /* WOLFSSL_NOSHA3_224 */
+ #ifndef WOLFSSL_NOSHA3_256
+ if (bench_all || (bench_digest_algs & BENCH_SHA3_256)) {
+ #ifndef NO_SW_BENCH
+ bench_sha3_256(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA3) && \
+ !defined(NO_HW_BENCH)
+ bench_sha3_256(1);
+ #endif
+ }
+ #endif /* WOLFSSL_NOSHA3_256 */
+ #ifndef WOLFSSL_NOSHA3_384
+ if (bench_all || (bench_digest_algs & BENCH_SHA3_384)) {
+ #ifndef NO_SW_BENCH
+ bench_sha3_384(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA3) && \
+ !defined(NO_HW_BENCH)
+ bench_sha3_384(1);
+ #endif
+ }
+ #endif /* WOLFSSL_NOSHA3_384 */
+ #ifndef WOLFSSL_NOSHA3_512
+ if (bench_all || (bench_digest_algs & BENCH_SHA3_512)) {
+ #ifndef NO_SW_BENCH
+ bench_sha3_512(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA3) && \
+ !defined(NO_HW_BENCH)
+ bench_sha3_512(1);
+ #endif
+ }
+ #endif /* WOLFSSL_NOSHA3_512 */
#endif
#ifdef WOLFSSL_RIPEMD
- bench_ripemd();
+ if (bench_all || (bench_digest_algs & BENCH_RIPEMD))
+ bench_ripemd();
#endif
#ifdef HAVE_BLAKE2
- bench_blake2();
+ if (bench_all || (bench_digest_algs & BENCH_BLAKE2B))
+ bench_blake2b();
+#endif
+#ifdef HAVE_BLAKE2S
+ if (bench_all || (bench_digest_algs & BENCH_BLAKE2S))
+ bench_blake2s();
+#endif
+#ifdef WOLFSSL_CMAC
+ if (bench_all || (bench_mac_algs & BENCH_CMAC))
+ bench_cmac();
#endif
- printf("\n");
+#ifndef NO_HMAC
+ #ifndef NO_MD5
+ if (bench_all || (bench_mac_algs & BENCH_HMAC_MD5)) {
+ #ifndef NO_SW_BENCH
+ bench_hmac_md5(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC) && \
+ defined(WC_ASYNC_ENABLE_MD5) && !defined(NO_HW_BENCH)
+ bench_hmac_md5(1);
+ #endif
+ }
+ #endif
+ #ifndef NO_SHA
+ if (bench_all || (bench_mac_algs & BENCH_HMAC_SHA)) {
+ #ifndef NO_SW_BENCH
+ bench_hmac_sha(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC) && \
+ defined(WC_ASYNC_ENABLE_SHA) && !defined(NO_HW_BENCH)
+ bench_hmac_sha(1);
+ #endif
+ }
+ #endif
+ #ifdef WOLFSSL_SHA224
+ if (bench_all || (bench_mac_algs & BENCH_HMAC_SHA224)) {
+ #ifndef NO_SW_BENCH
+ bench_hmac_sha224(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC) && \
+ defined(WC_ASYNC_ENABLE_SHA224) && !defined(NO_HW_BENCH)
+ bench_hmac_sha224(1);
+ #endif
+ }
+ #endif
+ #ifndef NO_SHA256
+ if (bench_all || (bench_mac_algs & BENCH_HMAC_SHA256)) {
+ #ifndef NO_SW_BENCH
+ bench_hmac_sha256(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC) && \
+ defined(WC_ASYNC_ENABLE_SHA256) && !defined(NO_HW_BENCH)
+ bench_hmac_sha256(1);
+ #endif
+ }
+ #endif
+ #ifdef WOLFSSL_SHA384
+ if (bench_all || (bench_mac_algs & BENCH_HMAC_SHA384)) {
+ #ifndef NO_SW_BENCH
+ bench_hmac_sha384(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC) && \
+ defined(WC_ASYNC_ENABLE_SHA384) && !defined(NO_HW_BENCH)
+ bench_hmac_sha384(1);
+ #endif
+ }
+ #endif
+ #ifdef WOLFSSL_SHA512
+ if (bench_all || (bench_mac_algs & BENCH_HMAC_SHA512)) {
+ #ifndef NO_SW_BENCH
+ bench_hmac_sha512(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC) && \
+ defined(WC_ASYNC_ENABLE_SHA512) && !defined(NO_HW_BENCH)
+ bench_hmac_sha512(1);
+ #endif
+ }
+ #endif
+ #ifndef NO_PWDBASED
+ if (bench_all || (bench_mac_algs & BENCH_PBKDF2)) {
+ bench_pbkdf2();
+ }
+ #endif
+#endif /* NO_HMAC */
-#ifndef NO_RSA
- bench_rsa();
+#ifdef HAVE_SCRYPT
+ if (bench_all || (bench_other_algs & BENCH_SCRYPT))
+ bench_scrypt();
#endif
-#ifdef HAVE_NTRU
- bench_ntru();
-#endif
+#ifndef NO_RSA
+ #ifdef WOLFSSL_KEY_GEN
+ if (bench_all || (bench_asym_algs & BENCH_RSA_KEYGEN)) {
+ #ifndef NO_SW_BENCH
+ if (bench_asym_algs & BENCH_RSA_SZ) {
+ bench_rsaKeyGen_size(0, bench_size);
+ }
+ else {
+ bench_rsaKeyGen(0);
+ }
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA_KEYGEN) \
+ && !defined(NO_HW_BENCH)
+ if (bench_asym_algs & BENCH_RSA_SZ) {
+ bench_rsaKeyGen_size(1, bench_size);
+ }
+ else {
+ bench_rsaKeyGen(1);
+ }
+ #endif
+ }
+ #endif
+ if (bench_all || (bench_asym_algs & BENCH_RSA)) {
+ #ifndef NO_SW_BENCH
+ bench_rsa(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) && \
+ !defined(NO_HW_BENCH)
+ bench_rsa(1);
+ #endif
+ }
-#ifndef NO_DH
- bench_dh();
+ #ifdef WOLFSSL_KEY_GEN
+ if (bench_asym_algs & BENCH_RSA_SZ) {
+ #ifndef NO_SW_BENCH
+ bench_rsa_key(0, bench_size);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) && \
+ !defined(NO_HW_BENCH)
+ bench_rsa_key(1, bench_size);
+ #endif
+ }
+ #endif
#endif
-#if defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA)
- bench_rsaKeyGen();
+#ifndef NO_DH
+ if (bench_all || (bench_asym_algs & BENCH_DH)) {
+ #ifndef NO_SW_BENCH
+ bench_dh(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH) && \
+ !defined(NO_HW_BENCH)
+ bench_dh(1);
+ #endif
+ }
#endif
#ifdef HAVE_NTRU
- bench_ntruKeyGen();
+ if (bench_all || (bench_asym_algs & BENCH_NTRU))
+ bench_ntru();
+ if (bench_all || (bench_asym_algs & BENCH_NTRU_KEYGEN))
+ bench_ntruKeyGen();
#endif
#ifdef HAVE_ECC
- bench_eccKeyGen();
- bench_eccKeyAgree();
- #if defined(FP_ECC)
- wc_ecc_fp_free();
+ if (bench_all || (bench_asym_algs & BENCH_ECC_MAKEKEY)) {
+ #ifndef NO_SW_BENCH
+ bench_eccMakeKey(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) && \
+ !defined(NO_HW_BENCH)
+ bench_eccMakeKey(1);
+ #endif
+ }
+ if (bench_all || (bench_asym_algs & BENCH_ECC)) {
+ #ifndef NO_SW_BENCH
+ bench_ecc(0);
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) && \
+ !defined(NO_HW_BENCH)
+ bench_ecc(1);
+ #endif
+ }
+ #ifdef HAVE_ECC_ENCRYPT
+ if (bench_all || (bench_asym_algs & BENCH_ECC_ENCRYPT))
+ bench_eccEncrypt();
#endif
#endif
#ifdef HAVE_CURVE25519
- bench_curve25519KeyGen();
- bench_curve25519KeyAgree();
+ if (bench_all || (bench_asym_algs & BENCH_CURVE25519_KEYGEN))
+ bench_curve25519KeyGen();
+ #ifdef HAVE_CURVE25519_SHARED_SECRET
+ if (bench_all || (bench_asym_algs & BENCH_CURVE25519_KA))
+ bench_curve25519KeyAgree();
+ #endif
#endif
#ifdef HAVE_ED25519
- bench_ed25519KeyGen();
- bench_ed25519KeySign();
+ if (bench_all || (bench_asym_algs & BENCH_ED25519_KEYGEN))
+ bench_ed25519KeyGen();
+ if (bench_all || (bench_asym_algs & BENCH_ED25519_SIGN))
+ bench_ed25519KeySign();
#endif
-#if defined(HAVE_LOCAL_RNG)
- wc_FreeRng(&rng);
+#ifdef HAVE_CURVE448
+ if (bench_all || (bench_asym_algs & BENCH_CURVE448_KEYGEN))
+ bench_curve448KeyGen();
+ #ifdef HAVE_CURVE448_SHARED_SECRET
+ if (bench_all || (bench_asym_algs & BENCH_CURVE448_KA))
+ bench_curve448KeyAgree();
+ #endif
#endif
- return 0;
-}
+#ifdef HAVE_ED448
+ if (bench_all || (bench_asym_algs & BENCH_ED448_KEYGEN))
+ bench_ed448KeyGen();
+ if (bench_all || (bench_asym_algs & BENCH_ED448_SIGN))
+ bench_ed448KeySign();
+#endif
+exit:
+ /* free benchmark buffers */
+ XFREE(bench_plain, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+ XFREE(bench_cipher, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+#ifdef WOLFSSL_ASYNC_CRYPT
+ XFREE(bench_key, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+ XFREE(bench_iv, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+#endif
-#ifdef BENCH_EMBEDDED
-enum BenchmarkBounds {
- numBlocks = 25, /* how many kB to test (en/de)cryption */
- ntimes = 1,
- genTimes = 5, /* public key iterations */
- agreeTimes = 5
-};
-static const char blockType[] = "kB"; /* used in printf output */
-#else
-enum BenchmarkBounds {
- numBlocks = 50, /* how many megs to test (en/de)cryption */
- ntimes = 100,
- genTimes = 100,
- agreeTimes = 100
-};
-static const char blockType[] = "megs"; /* used in printf output */
+#ifdef WOLF_CRYPTO_CB
+#ifdef HAVE_INTEL_QA_SYNC
+ wc_CryptoCb_CleanupIntelQa(&devId);
+#endif
+#ifdef HAVE_CAVIUM_OCTEON_SYNC
+ wc_CryptoCb_CleanupOcteon(&devId);
+#endif
#endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+ /* free event queue */
+ wolfEventQueue_Free(&eventQueue);
+#endif
-#ifndef NO_AES
+#if defined(HAVE_LOCAL_RNG)
+ wc_FreeRng(&gRng);
+#endif
-void bench_aes(int show)
+#ifdef WOLFSSL_ASYNC_CRYPT
+ wolfAsync_DevClose(&devId);
+#endif
+
+/* cleanup the thread if fixed point cache is enabled and have thread local */
+#if defined(HAVE_THREAD_LS) && defined(HAVE_ECC) && defined(FP_ECC)
+ wc_ecc_fp_free();
+#endif
+
+ (void)bench_cipher_algs;
+ (void)bench_digest_algs;
+ (void)bench_mac_algs;
+ (void)bench_asym_algs;
+ (void)bench_other_algs;
+
+ return NULL;
+}
+
+int benchmark_init(void)
{
- Aes enc;
- double start, total, persec;
- int i;
- int ret;
+ int ret = 0;
-#ifdef HAVE_CAVIUM
- if (wc_AesInitCavium(&enc, CAVIUM_DEV_ID) != 0) {
- printf("aes init cavium failed\n");
- return;
+ benchmark_static_init();
+
+#ifdef WOLFSSL_STATIC_MEMORY
+ ret = wc_LoadStaticMemory(&HEAP_HINT, gBenchMemory, sizeof(gBenchMemory),
+ WOLFMEM_GENERAL, 1);
+ if (ret != 0) {
+ printf("unable to load static memory %d\n", ret);
+ }
+#endif /* WOLFSSL_STATIC_MEMORY */
+
+ if ((ret = wolfCrypt_Init()) != 0) {
+ printf("wolfCrypt_Init failed %d\n", ret);
+ return EXIT_FAILURE;
}
+
+ bench_stats_init();
+
+#if defined(DEBUG_WOLFSSL) && !defined(HAVE_VALGRIND)
+ wolfSSL_Debugging_ON();
#endif
- ret = wc_AesSetKey(&enc, key, 16, iv, AES_ENCRYPTION);
+ if (csv_format == 1) {
+ printf("wolfCrypt Benchmark (block bytes %d, min %.1f sec each)\n",
+ (int)BENCH_SIZE, BENCH_MIN_RUNTIME_SEC);
+ printf("This format allows you to easily copy the output to a csv file.");
+ printf("\n\nSymmetric Ciphers:\n\n");
+ printf("Algorithm,MB/s,Cycles per byte,\n");
+ } else {
+ printf("wolfCrypt Benchmark (block bytes %d, min %.1f sec each)\n",
+ (int)BENCH_SIZE, BENCH_MIN_RUNTIME_SEC);
+ }
+
+#ifdef HAVE_WNR
+ ret = wc_InitNetRandom(wnrConfigFile, NULL, 5000);
if (ret != 0) {
- printf("AesSetKey failed, ret = %d\n", ret);
- return;
+ printf("Whitewood netRandom config init failed %d\n", ret);
}
- start = current_time(1);
- BEGIN_INTEL_CYCLES
+#endif /* HAVE_WNR */
- for(i = 0; i < numBlocks; i++)
- wc_AesCbcEncrypt(&enc, plain, cipher, sizeof(plain));
+ return ret;
+}
- END_INTEL_CYCLES
- total = current_time(0) - start;
+int benchmark_free(void)
+{
+ int ret;
- persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
- /* since using kB, convert to MB/s */
- persec = persec / 1024;
+#ifdef HAVE_WNR
+ ret = wc_FreeNetRandom();
+ if (ret < 0) {
+ printf("Failed to free netRandom context %d\n", ret);
+ }
#endif
- if (show) {
- printf("AES %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
- blockType, total, persec);
- SHOW_INTEL_CYCLES
- printf("\n");
+ if (gPrintStats || devId != INVALID_DEVID) {
+ bench_stats_print();
}
-#ifdef HAVE_CAVIUM
- wc_AesFreeCavium(&enc);
+
+ bench_stats_free();
+
+ if ((ret = wolfCrypt_Cleanup()) != 0) {
+ printf("error %d with wolfCrypt_Cleanup\n", ret);
+ }
+
+ return ret;
+}
+
+/* so embedded projects can pull in tests on their own */
+#ifdef HAVE_STACK_SIZE
+THREAD_RETURN WOLFSSL_THREAD benchmark_test(void* args)
+#else
+int benchmark_test(void *args)
#endif
+{
+ int ret;
+
+ (void)args;
+
+ printf("------------------------------------------------------------------------------\n");
+ printf(" wolfSSL version %s\n", LIBWOLFSSL_VERSION_STRING);
+ printf("------------------------------------------------------------------------------\n");
+
+ ret = benchmark_init();
+ if (ret != 0)
+ EXIT_TEST(ret);
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_NO_ASYNC_THREADING)
+{
+ int i;
+
+ if (g_threadCount == 0) {
+ #ifdef WC_ASYNC_BENCH_THREAD_COUNT
+ g_threadCount = WC_ASYNC_BENCH_THREAD_COUNT;
+ #else
+ g_threadCount = wc_AsyncGetNumberOfCpus();
+ #endif
+ }
+
+ printf("CPUs: %d\n", g_threadCount);
+
+ g_threadData = (ThreadData*)XMALLOC(sizeof(ThreadData) * g_threadCount,
+ HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (g_threadData == NULL) {
+ printf("Thread data alloc failed!\n");
+ EXIT_TEST(EXIT_FAILURE);
+ }
+
+ /* Create threads */
+ for (i = 0; i < g_threadCount; i++) {
+ ret = wc_AsyncThreadCreate(&g_threadData[i].thread_id,
+ benchmarks_do, &g_threadData[i]);
+ if (ret != 0) {
+ printf("Error creating benchmark thread %d\n", ret);
+ EXIT_TEST(EXIT_FAILURE);
+ }
+ }
+
+ /* Start threads */
+ for (i = 0; i < g_threadCount; i++) {
+ wc_AsyncThreadJoin(&g_threadData[i].thread_id);
+ }
+
+ XFREE(g_threadData, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
}
+#else
+ benchmarks_do(NULL);
#endif
+ printf("Benchmark complete\n");
-#if defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
- static byte additional[13];
- static byte tag[16];
+ ret = benchmark_free();
+
+ EXIT_TEST(ret);
+}
+
+
+#ifndef WC_NO_RNG
+void bench_rng(void)
+{
+ int ret, i, count;
+ double start;
+ long pos, len, remain;
+ WC_RNG myrng;
+
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&myrng, HEAP_HINT, devId);
+#else
+ ret = wc_InitRng(&myrng);
#endif
+ if (ret < 0) {
+ printf("InitRNG failed %d\n", ret);
+ return;
+ }
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ /* Split request to handle large RNG request */
+ pos = 0;
+ remain = (int)BENCH_SIZE;
+ while (remain > 0) {
+ len = remain;
+ if (len > RNG_MAX_BLOCK_LEN)
+ len = RNG_MAX_BLOCK_LEN;
+ ret = wc_RNG_GenerateBlock(&myrng, &bench_plain[pos], (word32)len);
+ if (ret < 0)
+ goto exit_rng;
+
+ remain -= len;
+ pos += len;
+ }
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+exit_rng:
+ bench_stats_sym_finish("RNG", 0, count, bench_size, start, ret);
-#ifdef HAVE_AESGCM
-void bench_aesgcm(void)
+ wc_FreeRng(&myrng);
+}
+#endif /* WC_NO_RNG */
+
+
+#ifndef NO_AES
+
+#ifdef HAVE_AES_CBC
+static void bench_aescbc_internal(int doAsync, const byte* key, word32 keySz,
+ const byte* iv, const char* encLabel,
+ const char* decLabel)
{
- Aes enc;
- double start, total, persec;
- int i;
+ int ret = 0, i, count = 0, times, pending = 0;
+ Aes enc[BENCH_MAX_PENDING];
+ double start;
+
+ /* clear for done cleanup */
+ XMEMSET(enc, 0, sizeof(enc));
+
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if ((ret = wc_AesInit(&enc[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID)) != 0) {
+ printf("AesInit failed, ret = %d\n", ret);
+ goto exit;
+ }
- wc_AesGcmSetKey(&enc, key, 16);
- start = current_time(1);
- BEGIN_INTEL_CYCLES
+ ret = wc_AesSetKey(&enc[i], key, keySz, iv, AES_ENCRYPTION);
+ if (ret != 0) {
+ printf("AesSetKey failed, ret = %d\n", ret);
+ goto exit;
+ }
+ }
- for(i = 0; i < numBlocks; i++)
- wc_AesGcmEncrypt(&enc, cipher, plain, sizeof(plain), iv, 12,
- tag, 16, additional, 13);
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_AesCbcEncrypt(&enc[i], bench_plain, bench_cipher,
+ BENCH_SIZE);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, &times, &pending)) {
+ goto exit_aes_enc;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+exit_aes_enc:
+ bench_stats_sym_finish(encLabel, doAsync, count, bench_size, start, ret);
- END_INTEL_CYCLES
- total = current_time(0) - start;
+ if (ret < 0) {
+ goto exit;
+ }
- persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
- /* since using kB, convert to MB/s */
- persec = persec / 1024;
-#endif
+#ifdef HAVE_AES_DECRYPT
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ ret = wc_AesSetKey(&enc[i], key, keySz, iv, AES_DECRYPTION);
+ if (ret != 0) {
+ printf("AesSetKey failed, ret = %d\n", ret);
+ goto exit;
+ }
+ }
- printf("AES-GCM %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
- blockType, total, persec);
- SHOW_INTEL_CYCLES
- printf("\n");
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_AesCbcDecrypt(&enc[i], bench_plain, bench_cipher,
+ BENCH_SIZE);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, &times, &pending)) {
+ goto exit_aes_dec;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+exit_aes_dec:
+ bench_stats_sym_finish(decLabel, doAsync, count, bench_size, start, ret);
+
+#endif /* HAVE_AES_DECRYPT */
+
+ (void)decLabel;
+exit:
+
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_AesFree(&enc[i]);
+ }
}
+
+void bench_aescbc(int doAsync)
+{
+#ifdef WOLFSSL_AES_128
+ bench_aescbc_internal(doAsync, bench_key, 16, bench_iv,
+ "AES-128-CBC-enc", "AES-128-CBC-dec");
+#endif
+#ifdef WOLFSSL_AES_192
+ bench_aescbc_internal(doAsync, bench_key, 24, bench_iv,
+ "AES-192-CBC-enc", "AES-192-CBC-dec");
+#endif
+#ifdef WOLFSSL_AES_256
+ bench_aescbc_internal(doAsync, bench_key, 32, bench_iv,
+ "AES-256-CBC-enc", "AES-256-CBC-dec");
#endif
+}
-#ifdef WOLFSSL_AES_COUNTER
-void bench_aesctr(void)
+#endif /* HAVE_AES_CBC */
+
+#ifdef HAVE_AESGCM
+static void bench_aesgcm_internal(int doAsync, const byte* key, word32 keySz,
+ const byte* iv, word32 ivSz,
+ const char* encLabel, const char* decLabel)
{
- Aes enc;
- double start, total, persec;
- int i;
+ int ret = 0, i, count = 0, times, pending = 0;
+ Aes enc[BENCH_MAX_PENDING];
+#ifdef HAVE_AES_DECRYPT
+ Aes dec[BENCH_MAX_PENDING];
+#endif
+ double start;
- wc_AesSetKeyDirect(&enc, key, AES_BLOCK_SIZE, iv, AES_ENCRYPTION);
- start = current_time(1);
- BEGIN_INTEL_CYCLES
+ DECLARE_VAR(bench_additional, byte, AES_AUTH_ADD_SZ, HEAP_HINT);
+ DECLARE_VAR(bench_tag, byte, AES_AUTH_TAG_SZ, HEAP_HINT);
- for(i = 0; i < numBlocks; i++)
- wc_AesCtrEncrypt(&enc, plain, cipher, sizeof(plain));
+ /* clear for done cleanup */
+ XMEMSET(enc, 0, sizeof(enc));
+#ifdef HAVE_AES_DECRYPT
+ XMEMSET(dec, 0, sizeof(dec));
+#endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+ if (bench_additional)
+#endif
+ XMEMSET(bench_additional, 0, AES_AUTH_ADD_SZ);
+#ifdef WOLFSSL_ASYNC_CRYPT
+ if (bench_tag)
+#endif
+ XMEMSET(bench_tag, 0, AES_AUTH_TAG_SZ);
+
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if ((ret = wc_AesInit(&enc[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID)) != 0) {
+ printf("AesInit failed, ret = %d\n", ret);
+ goto exit;
+ }
- END_INTEL_CYCLES
- total = current_time(0) - start;
+ ret = wc_AesGcmSetKey(&enc[i], key, keySz);
+ if (ret != 0) {
+ printf("AesGcmSetKey failed, ret = %d\n", ret);
+ goto exit;
+ }
+ }
- persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
- /* since using kB, convert to MB/s */
- persec = persec / 1024;
+ /* GCM uses same routine in backend for both encrypt and decrypt */
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_AesGcmEncrypt(&enc[i], bench_cipher,
+ bench_plain, BENCH_SIZE,
+ iv, ivSz, bench_tag, AES_AUTH_TAG_SZ,
+ bench_additional, aesAuthAddSz);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, &times, &pending)) {
+ goto exit_aes_gcm;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+exit_aes_gcm:
+ bench_stats_sym_finish(encLabel, doAsync, count, bench_size, start, ret);
+
+#ifdef HAVE_AES_DECRYPT
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if ((ret = wc_AesInit(&dec[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID)) != 0) {
+ printf("AesInit failed, ret = %d\n", ret);
+ goto exit;
+ }
+
+ ret = wc_AesGcmSetKey(&dec[i], key, keySz);
+ if (ret != 0) {
+ printf("AesGcmSetKey failed, ret = %d\n", ret);
+ goto exit;
+ }
+ }
+
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&dec[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_AesGcmDecrypt(&dec[i], bench_plain,
+ bench_cipher, BENCH_SIZE,
+ iv, ivSz, bench_tag, AES_AUTH_TAG_SZ,
+ bench_additional, aesAuthAddSz);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&dec[i]), 0, &times, &pending)) {
+ goto exit_aes_gcm_dec;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+exit_aes_gcm_dec:
+ bench_stats_sym_finish(decLabel, doAsync, count, bench_size, start, ret);
+#endif /* HAVE_AES_DECRYPT */
+
+ (void)decLabel;
+
+exit:
+
+ if (ret < 0) {
+ printf("bench_aesgcm failed: %d\n", ret);
+ }
+#ifdef HAVE_AES_DECRYPT
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_AesFree(&dec[i]);
+ }
#endif
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_AesFree(&enc[i]);
+ }
- printf("AES-CTR %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
- blockType, total, persec);
- SHOW_INTEL_CYCLES
- printf("\n");
+ FREE_VAR(bench_additional, HEAP_HINT);
+ FREE_VAR(bench_tag, HEAP_HINT);
}
+
+void bench_aesgcm(int doAsync)
+{
+#if defined(WOLFSSL_AES_128) && !defined(WOLFSSL_AFALG_XILINX_AES)
+ bench_aesgcm_internal(doAsync, bench_key, 16, bench_iv, 12,
+ "AES-128-GCM-enc", "AES-128-GCM-dec");
+#endif
+#if defined(WOLFSSL_AES_192) && !defined(WOLFSSL_AFALG_XILINX_AES)
+ bench_aesgcm_internal(doAsync, bench_key, 24, bench_iv, 12,
+ "AES-192-GCM-enc", "AES-192-GCM-dec");
+#endif
+#ifdef WOLFSSL_AES_256
+ bench_aesgcm_internal(doAsync, bench_key, 32, bench_iv, 12,
+ "AES-256-GCM-enc", "AES-256-GCM-dec");
#endif
+}
+#endif /* HAVE_AESGCM */
+#ifdef WOLFSSL_AES_DIRECT
+static void bench_aesecb_internal(int doAsync, const byte* key, word32 keySz,
+ const char* encLabel, const char* decLabel)
+{
+ int ret, i, count = 0, times, pending = 0;
+ Aes enc[BENCH_MAX_PENDING];
+ double start;
+
+ /* clear for done cleanup */
+ XMEMSET(enc, 0, sizeof(enc));
+
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if ((ret = wc_AesInit(&enc[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID)) != 0) {
+ printf("AesInit failed, ret = %d\n", ret);
+ goto exit;
+ }
-#ifdef HAVE_AESCCM
-void bench_aesccm(void)
+ ret = wc_AesSetKey(&enc[i], key, keySz, bench_iv, AES_ENCRYPTION);
+ if (ret != 0) {
+ printf("AesSetKey failed, ret = %d\n", ret);
+ goto exit;
+ }
+ }
+
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, &times, numBlocks, &pending)) {
+ wc_AesEncryptDirect(&enc[i], bench_cipher, bench_plain);
+ ret = 0;
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, &times, &pending)) {
+ goto exit_aes_enc;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+exit_aes_enc:
+ bench_stats_sym_finish(encLabel, doAsync, count, AES_BLOCK_SIZE,
+ start, ret);
+
+#ifdef HAVE_AES_DECRYPT
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ ret = wc_AesSetKey(&enc[i], key, keySz, bench_iv, AES_DECRYPTION);
+ if (ret != 0) {
+ printf("AesSetKey failed, ret = %d\n", ret);
+ goto exit;
+ }
+ }
+
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, &times, numBlocks, &pending)) {
+ wc_AesDecryptDirect(&enc[i], bench_plain,
+ bench_cipher);
+ ret = 0;
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, &times, &pending)) {
+ goto exit_aes_dec;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+exit_aes_dec:
+ bench_stats_sym_finish(decLabel, doAsync, count, AES_BLOCK_SIZE,
+ start, ret);
+
+#endif /* HAVE_AES_DECRYPT */
+
+exit:
+
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_AesFree(&enc[i]);
+ }
+}
+
+void bench_aesecb(int doAsync)
{
- Aes enc;
- double start, total, persec;
- int i;
+#ifdef WOLFSSL_AES_128
+ bench_aesecb_internal(doAsync, bench_key, 16,
+ "AES-128-ECB-enc", "AES-128-ECB-dec");
+#endif
+#ifdef WOLFSSL_AES_192
+ bench_aesecb_internal(doAsync, bench_key, 24,
+ "AES-192-ECB-enc", "AES-192-ECB-dec");
+#endif
+#ifdef WOLFSSL_AES_256
+ bench_aesecb_internal(doAsync, bench_key, 32,
+ "AES-256-ECB-enc", "AES-256-ECB-dec");
+#endif
+}
+#endif /* WOLFSSL_AES_DIRECT */
- wc_AesCcmSetKey(&enc, key, 16);
- start = current_time(1);
- BEGIN_INTEL_CYCLES
+#ifdef WOLFSSL_AES_CFB
+static void bench_aescfb_internal(const byte* key, word32 keySz, const byte* iv,
+ const char* label)
+{
+ Aes enc;
+ double start;
+ int i, ret, count;
- for(i = 0; i < numBlocks; i++)
- wc_AesCcmEncrypt(&enc, cipher, plain, sizeof(plain), iv, 12,
- tag, 16, additional, 13);
+ ret = wc_AesSetKey(&enc, key, keySz, iv, AES_ENCRYPTION);
+ if (ret != 0) {
+ printf("AesSetKey failed, ret = %d\n", ret);
+ return;
+ }
- END_INTEL_CYCLES
- total = current_time(0) - start;
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ if((ret = wc_AesCfbEncrypt(&enc, bench_plain, bench_cipher,
+ BENCH_SIZE)) != 0) {
+ printf("wc_AesCfbEncrypt failed, ret = %d\n", ret);
+ return;
+ }
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+ bench_stats_sym_finish(label, 0, count, bench_size, start, ret);
+}
- persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
- /* since using kB, convert to MB/s */
- persec = persec / 1024;
+void bench_aescfb(void)
+{
+#ifdef WOLFSSL_AES_128
+ bench_aescfb_internal(bench_key, 16, bench_iv, "AES-128-CFB");
+#endif
+#ifdef WOLFSSL_AES_192
+ bench_aescfb_internal(bench_key, 24, bench_iv, "AES-192-CFB");
+#endif
+#ifdef WOLFSSL_AES_256
+ bench_aescfb_internal(bench_key, 32, bench_iv, "AES-256-CFB");
#endif
+}
+#endif /* WOLFSSL_AES_CFB */
- printf("AES-CCM %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
- blockType, total, persec);
- SHOW_INTEL_CYCLES
- printf("\n");
+
+#ifdef WOLFSSL_AES_OFB
+static void bench_aesofb_internal(const byte* key, word32 keySz, const byte* iv,
+ const char* label)
+{
+ Aes enc;
+ double start;
+ int i, ret, count;
+
+ ret = wc_AesSetKey(&enc, key, keySz, iv, AES_ENCRYPTION);
+ if (ret != 0) {
+ printf("AesSetKey failed, ret = %d\n", ret);
+ return;
+ }
+
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ if((ret = wc_AesOfbEncrypt(&enc, bench_plain, bench_cipher,
+ BENCH_SIZE)) != 0) {
+ printf("wc_AesCfbEncrypt failed, ret = %d\n", ret);
+ return;
+ }
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+ bench_stats_sym_finish(label, 0, count, bench_size, start, ret);
}
+
+void bench_aesofb(void)
+{
+#ifdef WOLFSSL_AES_128
+ bench_aesofb_internal(bench_key, 16, bench_iv, "AES-128-OFB");
+#endif
+#ifdef WOLFSSL_AES_192
+ bench_aesofb_internal(bench_key, 24, bench_iv, "AES-192-OFB");
#endif
+#ifdef WOLFSSL_AES_256
+ bench_aesofb_internal(bench_key, 32, bench_iv, "AES-256-OFB");
+#endif
+}
+#endif /* WOLFSSL_AES_CFB */
-#ifdef HAVE_POLY1305
-void bench_poly1305()
+#ifdef WOLFSSL_AES_XTS
+void bench_aesxts(void)
{
- Poly1305 enc;
- byte mac[16];
- double start, total, persec;
- int i;
- int ret;
+ XtsAes aes;
+ double start;
+ int i, count, ret;
+
+ static unsigned char k1[] = {
+ 0xa1, 0xb9, 0x0c, 0xba, 0x3f, 0x06, 0xac, 0x35,
+ 0x3b, 0x2c, 0x34, 0x38, 0x76, 0x08, 0x17, 0x62,
+ 0x09, 0x09, 0x23, 0x02, 0x6e, 0x91, 0x77, 0x18,
+ 0x15, 0xf2, 0x9d, 0xab, 0x01, 0x93, 0x2f, 0x2f
+ };
+ static unsigned char i1[] = {
+ 0x4f, 0xae, 0xf7, 0x11, 0x7c, 0xda, 0x59, 0xc6,
+ 0x6e, 0x4b, 0x92, 0x01, 0x3e, 0x76, 0x8a, 0xd5
+ };
- ret = wc_Poly1305SetKey(&enc, key, 32);
+ ret = wc_AesXtsSetKey(&aes, k1, sizeof(k1), AES_ENCRYPTION,
+ HEAP_HINT, devId);
if (ret != 0) {
- printf("Poly1305SetKey failed, ret = %d\n", ret);
+ printf("wc_AesXtsSetKey failed, ret = %d\n", ret);
return;
}
- start = current_time(1);
- BEGIN_INTEL_CYCLES
- for(i = 0; i < numBlocks; i++)
- wc_Poly1305Update(&enc, plain, sizeof(plain));
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ if ((ret = wc_AesXtsEncrypt(&aes, bench_plain, bench_cipher,
+ BENCH_SIZE, i1, sizeof(i1))) != 0) {
+ printf("wc_AesXtsEncrypt failed, ret = %d\n", ret);
+ return;
+ }
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+ bench_stats_sym_finish("AES-XTS-enc", 0, count, bench_size, start, ret);
+ wc_AesXtsFree(&aes);
+
+ /* decryption benchmark */
+ ret = wc_AesXtsSetKey(&aes, k1, sizeof(k1), AES_DECRYPTION,
+ HEAP_HINT, devId);
+ if (ret != 0) {
+ printf("wc_AesXtsSetKey failed, ret = %d\n", ret);
+ return;
+ }
- wc_Poly1305Final(&enc, mac);
- END_INTEL_CYCLES
- total = current_time(0) - start;
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ if ((ret = wc_AesXtsDecrypt(&aes, bench_plain, bench_cipher,
+ BENCH_SIZE, i1, sizeof(i1))) != 0) {
+ printf("wc_AesXtsDecrypt failed, ret = %d\n", ret);
+ return;
+ }
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+ bench_stats_sym_finish("AES-XTS-dec", 0, count, bench_size, start, ret);
+ wc_AesXtsFree(&aes);
+}
+#endif /* WOLFSSL_AES_XTS */
- persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
- /* since using kB, convert to MB/s */
- persec = persec / 1024;
+
+#ifdef WOLFSSL_AES_COUNTER
+static void bench_aesctr_internal(const byte* key, word32 keySz, const byte* iv,
+ const char* label)
+{
+ Aes enc;
+ double start;
+ int i, count, ret = 0;
+
+ wc_AesSetKeyDirect(&enc, key, keySz, iv, AES_ENCRYPTION);
+
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ if((ret = wc_AesCtrEncrypt(&enc, bench_plain, bench_cipher, BENCH_SIZE)) != 0) {
+ printf("wc_AesCtrEncrypt failed, ret = %d\n", ret);
+ return;
+ }
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+ bench_stats_sym_finish(label, 0, count, bench_size, start, ret);
+}
+
+void bench_aesctr(void)
+{
+#ifdef WOLFSSL_AES_128
+ bench_aesctr_internal(bench_key, 16, bench_iv, "AES-128-CTR");
#endif
+#ifdef WOLFSSL_AES_192
+ bench_aesctr_internal(bench_key, 24, bench_iv, "AES-192-CTR");
+#endif
+#ifdef WOLFSSL_AES_256
+ bench_aesctr_internal(bench_key, 32, bench_iv, "AES-256-CTR");
+#endif
+}
+#endif /* WOLFSSL_AES_COUNTER */
- printf("POLY1305 %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
- blockType, total, persec);
- SHOW_INTEL_CYCLES
- printf("\n");
+
+#ifdef HAVE_AESCCM
+void bench_aesccm(void)
+{
+ Aes enc;
+ double start;
+ int ret, i, count;
+
+ DECLARE_VAR(bench_additional, byte, AES_AUTH_ADD_SZ, HEAP_HINT);
+ DECLARE_VAR(bench_tag, byte, AES_AUTH_TAG_SZ, HEAP_HINT);
+
+ XMEMSET(bench_tag, 0, AES_AUTH_TAG_SZ);
+ XMEMSET(bench_additional, 0, AES_AUTH_ADD_SZ);
+
+ if ((ret = wc_AesCcmSetKey(&enc, bench_key, 16)) != 0) {
+ printf("wc_AesCcmSetKey failed, ret = %d\n", ret);
+ return;
+ }
+
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ wc_AesCcmEncrypt(&enc, bench_cipher, bench_plain, BENCH_SIZE,
+ bench_iv, 12, bench_tag, AES_AUTH_TAG_SZ,
+ bench_additional, aesAuthAddSz);
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+ bench_stats_sym_finish("AES-CCM-Enc", 0, count, bench_size, start, ret);
+
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ wc_AesCcmDecrypt(&enc, bench_plain, bench_cipher, BENCH_SIZE,
+ bench_iv, 12, bench_tag, AES_AUTH_TAG_SZ,
+ bench_additional, aesAuthAddSz);
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+ bench_stats_sym_finish("AES-CCM-Dec", 0, count, bench_size, start, ret);
+
+
+ FREE_VAR(bench_additional, HEAP_HINT);
+ FREE_VAR(bench_tag, HEAP_HINT);
+}
+#endif /* HAVE_AESCCM */
+#endif /* !NO_AES */
+
+
+#ifdef HAVE_POLY1305
+void bench_poly1305(void)
+{
+ Poly1305 enc;
+ byte mac[16];
+ double start;
+ int ret = 0, i, count;
+
+ if (digest_stream) {
+ ret = wc_Poly1305SetKey(&enc, bench_key, 32);
+ if (ret != 0) {
+ printf("Poly1305SetKey failed, ret = %d\n", ret);
+ return;
+ }
+
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ ret = wc_Poly1305Update(&enc, bench_plain, BENCH_SIZE);
+ if (ret != 0) {
+ printf("Poly1305Update failed: %d\n", ret);
+ break;
+ }
+ }
+ wc_Poly1305Final(&enc, mac);
+ count += i;
+ } while (bench_stats_sym_check(start));
+ bench_stats_sym_finish("POLY1305", 0, count, bench_size, start, ret);
+ }
+ else {
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ ret = wc_Poly1305SetKey(&enc, bench_key, 32);
+ if (ret != 0) {
+ printf("Poly1305SetKey failed, ret = %d\n", ret);
+ return;
+ }
+ ret = wc_Poly1305Update(&enc, bench_plain, BENCH_SIZE);
+ if (ret != 0) {
+ printf("Poly1305Update failed: %d\n", ret);
+ break;
+ }
+ wc_Poly1305Final(&enc, mac);
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+ bench_stats_sym_finish("POLY1305", 0, count, bench_size, start, ret);
+ }
}
#endif /* HAVE_POLY1305 */
@@ -592,143 +2757,185 @@ void bench_poly1305()
void bench_camellia(void)
{
Camellia cam;
- double start, total, persec;
- int i, ret;
+ double start;
+ int ret, i, count;
- ret = wc_CamelliaSetKey(&cam, key, 16, iv);
+ ret = wc_CamelliaSetKey(&cam, bench_key, 16, bench_iv);
if (ret != 0) {
printf("CamelliaSetKey failed, ret = %d\n", ret);
return;
}
- start = current_time(1);
- BEGIN_INTEL_CYCLES
- for(i = 0; i < numBlocks; i++)
- wc_CamelliaCbcEncrypt(&cam, plain, cipher, sizeof(plain));
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ ret = wc_CamelliaCbcEncrypt(&cam, bench_plain, bench_cipher,
+ BENCH_SIZE);
+ if (ret < 0) {
+ printf("CamelliaCbcEncrypt failed: %d\n", ret);
+ return;
+ }
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+ bench_stats_sym_finish("Camellia", 0, count, bench_size, start, ret);
+}
+#endif
- END_INTEL_CYCLES
- total = current_time(0) - start;
- persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
- /* since using kB, convert to MB/s */
- persec = persec / 1024;
-#endif
+#ifndef NO_DES3
+void bench_des(int doAsync)
+{
+ int ret = 0, i, count = 0, times, pending = 0;
+ Des3 enc[BENCH_MAX_PENDING];
+ double start;
+
+ /* clear for done cleanup */
+ XMEMSET(enc, 0, sizeof(enc));
+
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if ((ret = wc_Des3Init(&enc[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID)) != 0) {
+ printf("Des3Init failed, ret = %d\n", ret);
+ goto exit;
+ }
- printf("Camellia %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
- blockType, total, persec);
- SHOW_INTEL_CYCLES
- printf("\n");
+ ret = wc_Des3_SetKey(&enc[i], bench_key, bench_iv, DES_ENCRYPTION);
+ if (ret != 0) {
+ printf("Des3_SetKey failed, ret = %d\n", ret);
+ goto exit;
+ }
+ }
+
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_Des3_CbcEncrypt(&enc[i], bench_plain, bench_cipher,
+ BENCH_SIZE);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, &times, &pending)) {
+ goto exit_3des;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+exit_3des:
+ bench_stats_sym_finish("3DES", doAsync, count, bench_size, start, ret);
+
+exit:
+
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_Des3Free(&enc[i]);
+ }
}
-#endif
+#endif /* !NO_DES3 */
-#ifndef NO_DES3
-void bench_des(void)
+#ifdef HAVE_IDEA
+void bench_idea(void)
{
- Des3 enc;
- double start, total, persec;
- int i, ret;
+ Idea enc;
+ double start;
+ int ret = 0, i, count;
-#ifdef HAVE_CAVIUM
- if (wc_Des3_InitCavium(&enc, CAVIUM_DEV_ID) != 0)
- printf("des3 init cavium failed\n");
-#endif
- ret = wc_Des3_SetKey(&enc, key, iv, DES_ENCRYPTION);
+ ret = wc_IdeaSetKey(&enc, bench_key, IDEA_KEY_SIZE, bench_iv,
+ IDEA_ENCRYPTION);
if (ret != 0) {
printf("Des3_SetKey failed, ret = %d\n", ret);
return;
}
- start = current_time(1);
- BEGIN_INTEL_CYCLES
- for(i = 0; i < numBlocks; i++)
- wc_Des3_CbcEncrypt(&enc, plain, cipher, sizeof(plain));
-
- END_INTEL_CYCLES
- total = current_time(0) - start;
-
- persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
- /* since using kB, convert to MB/s */
- persec = persec / 1024;
-#endif
-
- printf("3DES %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
- blockType, total, persec);
- SHOW_INTEL_CYCLES
- printf("\n");
-#ifdef HAVE_CAVIUM
- wc_Des3_FreeCavium(&enc);
-#endif
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ wc_IdeaCbcEncrypt(&enc, bench_plain, bench_cipher, BENCH_SIZE);
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+ bench_stats_sym_finish("IDEA", 0, count, bench_size, start, ret);
}
-#endif
+#endif /* HAVE_IDEA */
#ifndef NO_RC4
-void bench_arc4(void)
+void bench_arc4(int doAsync)
{
- Arc4 enc;
- double start, total, persec;
- int i;
-
-#ifdef HAVE_CAVIUM
- if (wc_Arc4InitCavium(&enc, CAVIUM_DEV_ID) != 0)
- printf("arc4 init cavium failed\n");
-#endif
-
- wc_Arc4SetKey(&enc, key, 16);
- start = current_time(1);
- BEGIN_INTEL_CYCLES
-
- for(i = 0; i < numBlocks; i++)
- wc_Arc4Process(&enc, cipher, plain, sizeof(plain));
+ int ret = 0, i, count = 0, times, pending = 0;
+ Arc4 enc[BENCH_MAX_PENDING];
+ double start;
+
+ /* clear for done cleanup */
+ XMEMSET(enc, 0, sizeof(enc));
+
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if ((ret = wc_Arc4Init(&enc[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID)) != 0) {
+ printf("Arc4Init failed, ret = %d\n", ret);
+ goto exit;
+ }
- END_INTEL_CYCLES
- total = current_time(0) - start;
- persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
- /* since using kB, convert to MB/s */
- persec = persec / 1024;
-#endif
+ ret = wc_Arc4SetKey(&enc[i], bench_key, 16);
+ if (ret != 0) {
+ printf("Arc4SetKey failed, ret = %d\n", ret);
+ goto exit;
+ }
+ }
- printf("ARC4 %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
- blockType, total, persec);
- SHOW_INTEL_CYCLES
- printf("\n");
-#ifdef HAVE_CAVIUM
- wc_Arc4FreeCavium(&enc);
-#endif
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_Arc4Process(&enc[i], bench_cipher, bench_plain,
+ BENCH_SIZE);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, &times, &pending)) {
+ goto exit_arc4;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+exit_arc4:
+ bench_stats_sym_finish("ARC4", doAsync, count, bench_size, start, ret);
+
+exit:
+
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_Arc4Free(&enc[i]);
+ }
}
-#endif
+#endif /* !NO_RC4 */
#ifdef HAVE_HC128
void bench_hc128(void)
{
HC128 enc;
- double start, total, persec;
- int i;
-
- wc_Hc128_SetKey(&enc, key, iv);
- start = current_time(1);
- BEGIN_INTEL_CYCLES
-
- for(i = 0; i < numBlocks; i++)
- wc_Hc128_Process(&enc, cipher, plain, sizeof(plain));
+ double start;
+ int i, count;
- END_INTEL_CYCLES
- total = current_time(0) - start;
- persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
- /* since using kB, convert to MB/s */
- persec = persec / 1024;
-#endif
+ wc_Hc128_SetKey(&enc, bench_key, bench_iv);
- printf("HC128 %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
- blockType, total, persec);
- SHOW_INTEL_CYCLES
- printf("\n");
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ wc_Hc128_Process(&enc, bench_cipher, bench_plain, BENCH_SIZE);
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+ bench_stats_sym_finish("HC128", 0, count, bench_size, start, 0);
}
#endif /* HAVE_HC128 */
@@ -736,29 +2943,20 @@ void bench_hc128(void)
#ifndef NO_RABBIT
void bench_rabbit(void)
{
- Rabbit enc;
- double start, total, persec;
- int i;
-
- wc_RabbitSetKey(&enc, key, iv);
- start = current_time(1);
- BEGIN_INTEL_CYCLES
-
- for(i = 0; i < numBlocks; i++)
- wc_RabbitProcess(&enc, cipher, plain, sizeof(plain));
+ Rabbit enc;
+ double start;
+ int i, count;
- END_INTEL_CYCLES
- total = current_time(0) - start;
- persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
- /* since using kB, convert to MB/s */
- persec = persec / 1024;
-#endif
+ wc_RabbitSetKey(&enc, bench_key, bench_iv);
- printf("RABBIT %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
- blockType, total, persec);
- SHOW_INTEL_CYCLES
- printf("\n");
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ wc_RabbitProcess(&enc, bench_cipher, bench_plain, BENCH_SIZE);
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+ bench_stats_sym_finish("RABBIT", 0, count, bench_size, start, 0);
}
#endif /* NO_RABBIT */
@@ -767,356 +2965,1391 @@ void bench_rabbit(void)
void bench_chacha(void)
{
ChaCha enc;
- double start, total, persec;
- int i;
+ double start;
+ int i, count;
- wc_Chacha_SetKey(&enc, key, 16);
- start = current_time(1);
- BEGIN_INTEL_CYCLES
-
- for (i = 0; i < numBlocks; i++) {
- wc_Chacha_SetIV(&enc, iv, 0);
- wc_Chacha_Process(&enc, cipher, plain, sizeof(plain));
- }
-
- END_INTEL_CYCLES
- total = current_time(0) - start;
- persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
- /* since using kB, convert to MB/s */
- persec = persec / 1024;
-#endif
-
- printf("CHACHA %d %s took %5.3f seconds, %8.3f MB/s", numBlocks, blockType, total, persec);
- SHOW_INTEL_CYCLES
- printf("\n");
+ wc_Chacha_SetKey(&enc, bench_key, 16);
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ wc_Chacha_SetIV(&enc, bench_iv, 0);
+ wc_Chacha_Process(&enc, bench_cipher, bench_plain, BENCH_SIZE);
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+ bench_stats_sym_finish("CHACHA", 0, count, bench_size, start, 0);
}
#endif /* HAVE_CHACHA*/
-#if( defined( HAVE_CHACHA ) && defined( HAVE_POLY1305 ) )
+#if defined(HAVE_CHACHA) && defined(HAVE_POLY1305)
void bench_chacha20_poly1305_aead(void)
{
- double start, total, persec;
- int i;
+ double start;
+ int ret = 0, i, count;
byte authTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE];
- XMEMSET( authTag, 0, sizeof( authTag ) );
-
- start = current_time(1);
- BEGIN_INTEL_CYCLES
-
- for (i = 0; i < numBlocks; i++)
- {
- wc_ChaCha20Poly1305_Encrypt(key, iv, NULL, 0, plain, sizeof(plain),
- cipher, authTag );
- }
-
- END_INTEL_CYCLES
- total = current_time(0) - start;
- persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
- /* since using kB, convert to MB/s */
- persec = persec / 1024;
-#endif
-
- printf("CHA-POLY %d %s took %5.3f seconds, %8.3f MB/s",
- numBlocks, blockType, total, persec);
- SHOW_INTEL_CYCLES
- printf("\n");
-
+ XMEMSET(authTag, 0, sizeof(authTag));
+
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ ret = wc_ChaCha20Poly1305_Encrypt(bench_key, bench_iv, NULL, 0,
+ bench_plain, BENCH_SIZE, bench_cipher, authTag);
+ if (ret < 0) {
+ printf("wc_ChaCha20Poly1305_Encrypt error: %d\n", ret);
+ break;
+ }
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+ bench_stats_sym_finish("CHA-POLY", 0, count, bench_size, start, ret);
}
#endif /* HAVE_CHACHA && HAVE_POLY1305 */
#ifndef NO_MD5
-void bench_md5(void)
+void bench_md5(int doAsync)
{
- Md5 hash;
- byte digest[MD5_DIGEST_SIZE];
- double start, total, persec;
- int i;
+ wc_Md5 hash[BENCH_MAX_PENDING];
+ double start;
+ int ret = 0, i, count = 0, times, pending = 0;
+ DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, WC_MD5_DIGEST_SIZE, HEAP_HINT);
+
+ /* clear for done cleanup */
+ XMEMSET(hash, 0, sizeof(hash));
+
+ if (digest_stream) {
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ ret = wc_InitMd5_ex(&hash[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID);
+ if (ret != 0) {
+ printf("InitMd5_ex failed, ret = %d\n", ret);
+ goto exit;
+ }
+ #ifdef WOLFSSL_PIC32MZ_HASH
+ wc_Md5SizeSet(&hash[i], numBlocks * BENCH_SIZE);
+ #endif
+ }
- wc_InitMd5(&hash);
- start = current_time(1);
- BEGIN_INTEL_CYCLES
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_Md5Update(&hash[i], bench_plain,
+ BENCH_SIZE);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, &pending)) {
+ goto exit_md5;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+
+ times = 0;
+ do {
+ bench_async_poll(&pending);
+
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_Md5Final(&hash[i], digest[i]);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, &pending)) {
+ goto exit_md5;
+ }
+ }
+ } /* for i */
+ } while (pending > 0);
+ } while (bench_stats_sym_check(start));
+ }
+ else {
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks; times++) {
+ ret = wc_InitMd5_ex(hash, HEAP_HINT, INVALID_DEVID);
+ ret |= wc_Md5Update(hash, bench_plain, BENCH_SIZE);
+ ret |= wc_Md5Final(hash, digest[0]);
+ if (ret != 0)
+ goto exit_md5;
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+ }
+exit_md5:
+ bench_stats_sym_finish("MD5", doAsync, count, bench_size, start, ret);
- for(i = 0; i < numBlocks; i++)
- wc_Md5Update(&hash, plain, sizeof(plain));
-
- wc_Md5Final(&hash, digest);
+exit:
- END_INTEL_CYCLES
- total = current_time(0) - start;
- persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
- /* since using kB, convert to MB/s */
- persec = persec / 1024;
+#ifdef WOLFSSL_ASYNC_CRYPT
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_Md5Free(&hash[i]);
+ }
#endif
- printf("MD5 %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
- blockType, total, persec);
- SHOW_INTEL_CYCLES
- printf("\n");
+ FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
}
-#endif /* NO_MD5 */
+#endif /* !NO_MD5 */
#ifndef NO_SHA
-void bench_sha(void)
-{
- Sha hash;
- byte digest[SHA_DIGEST_SIZE];
- double start, total, persec;
- int i, ret;
-
- ret = wc_InitSha(&hash);
- if (ret != 0) {
- printf("InitSha failed, ret = %d\n", ret);
- return;
+void bench_sha(int doAsync)
+{
+ wc_Sha hash[BENCH_MAX_PENDING];
+ double start;
+ int ret = 0, i, count = 0, times, pending = 0;
+ DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, WC_SHA_DIGEST_SIZE, HEAP_HINT);
+
+ /* clear for done cleanup */
+ XMEMSET(hash, 0, sizeof(hash));
+
+ if (digest_stream) {
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ ret = wc_InitSha_ex(&hash[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID);
+ if (ret != 0) {
+ printf("InitSha failed, ret = %d\n", ret);
+ goto exit;
+ }
+ #ifdef WOLFSSL_PIC32MZ_HASH
+ wc_ShaSizeSet(&hash[i], numBlocks * BENCH_SIZE);
+ #endif
+ }
+
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_ShaUpdate(&hash[i], bench_plain,
+ BENCH_SIZE);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, &pending)) {
+ goto exit_sha;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+
+ times = 0;
+ do {
+ bench_async_poll(&pending);
+
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_ShaFinal(&hash[i], digest[i]);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, &pending)) {
+ goto exit_sha;
+ }
+ }
+ } /* for i */
+ } while (pending > 0);
+ } while (bench_stats_sym_check(start));
}
- start = current_time(1);
- BEGIN_INTEL_CYCLES
-
- for(i = 0; i < numBlocks; i++)
- wc_ShaUpdate(&hash, plain, sizeof(plain));
-
- wc_ShaFinal(&hash, digest);
+ else {
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks; times++) {
+ ret = wc_InitSha_ex(hash, HEAP_HINT, INVALID_DEVID);
+ ret |= wc_ShaUpdate(hash, bench_plain, BENCH_SIZE);
+ ret |= wc_ShaFinal(hash, digest[0]);
+ if (ret != 0)
+ goto exit_sha;
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+ }
+exit_sha:
+ bench_stats_sym_finish("SHA", doAsync, count, bench_size, start, ret);
- END_INTEL_CYCLES
- total = current_time(0) - start;
- persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
- /* since using kB, convert to MB/s */
- persec = persec / 1024;
-#endif
+exit:
- printf("SHA %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
- blockType, total, persec);
- SHOW_INTEL_CYCLES
- printf("\n");
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_ShaFree(&hash[i]);
+ }
+
+ FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
}
#endif /* NO_SHA */
-#ifndef NO_SHA256
-void bench_sha256(void)
+#ifdef WOLFSSL_SHA224
+void bench_sha224(int doAsync)
{
- Sha256 hash;
- byte digest[SHA256_DIGEST_SIZE];
- double start, total, persec;
- int i, ret;
+ wc_Sha224 hash[BENCH_MAX_PENDING];
+ double start;
+ int ret = 0, i, count = 0, times, pending = 0;
+ DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, WC_SHA224_DIGEST_SIZE, HEAP_HINT);
+
+ /* clear for done cleanup */
+ XMEMSET(hash, 0, sizeof(hash));
+
+ if (digest_stream) {
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ ret = wc_InitSha224_ex(&hash[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID);
+ if (ret != 0) {
+ printf("InitSha224_ex failed, ret = %d\n", ret);
+ goto exit;
+ }
+ }
- ret = wc_InitSha256(&hash);
- if (ret != 0) {
- printf("InitSha256 failed, ret = %d\n", ret);
- return;
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_Sha224Update(&hash[i], bench_plain,
+ BENCH_SIZE);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, &pending)) {
+ goto exit_sha224;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+
+ times = 0;
+ do {
+ bench_async_poll(&pending);
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_Sha224Final(&hash[i], digest[i]);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, &pending)) {
+ goto exit_sha224;
+ }
+ }
+ } /* for i */
+ } while (pending > 0);
+ } while (bench_stats_sym_check(start));
}
- start = current_time(1);
- BEGIN_INTEL_CYCLES
-
- for(i = 0; i < numBlocks; i++) {
- ret = wc_Sha256Update(&hash, plain, sizeof(plain));
- if (ret != 0) {
- printf("Sha256Update failed, ret = %d\n", ret);
- return;
- }
+ else {
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks; times++) {
+ ret = wc_InitSha224_ex(hash, HEAP_HINT, INVALID_DEVID);
+ ret |= wc_Sha224Update(hash, bench_plain, BENCH_SIZE);
+ ret |= wc_Sha224Final(hash, digest[0]);
+ if (ret != 0)
+ goto exit_sha224;
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
}
+exit_sha224:
+ bench_stats_sym_finish("SHA-224", doAsync, count, bench_size, start, ret);
- ret = wc_Sha256Final(&hash, digest);
- if (ret != 0) {
- printf("Sha256Final failed, ret = %d\n", ret);
- return;
+exit:
+
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_Sha224Free(&hash[i]);
}
- END_INTEL_CYCLES
- total = current_time(0) - start;
- persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
- /* since using kB, convert to MB/s */
- persec = persec / 1024;
+ FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+}
#endif
- printf("SHA-256 %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
- blockType, total, persec);
- SHOW_INTEL_CYCLES
- printf("\n");
+#ifndef NO_SHA256
+void bench_sha256(int doAsync)
+{
+ wc_Sha256 hash[BENCH_MAX_PENDING];
+ double start;
+ int ret = 0, i, count = 0, times, pending = 0;
+ DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, WC_SHA256_DIGEST_SIZE, HEAP_HINT);
+
+ /* clear for done cleanup */
+ XMEMSET(hash, 0, sizeof(hash));
+
+ if (digest_stream) {
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ ret = wc_InitSha256_ex(&hash[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID);
+ if (ret != 0) {
+ printf("InitSha256_ex failed, ret = %d\n", ret);
+ goto exit;
+ }
+ #ifdef WOLFSSL_PIC32MZ_HASH
+ wc_Sha256SizeSet(&hash[i], numBlocks * BENCH_SIZE);
+ #endif
+ }
+
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_Sha256Update(&hash[i], bench_plain,
+ BENCH_SIZE);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, &pending)) {
+ goto exit_sha256;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+
+ times = 0;
+ do {
+ bench_async_poll(&pending);
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_Sha256Final(&hash[i], digest[i]);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, &pending)) {
+ goto exit_sha256;
+ }
+ }
+ } /* for i */
+ } while (pending > 0);
+ } while (bench_stats_sym_check(start));
+ }
+ else {
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks; times++) {
+ ret = wc_InitSha256_ex(hash, HEAP_HINT, INVALID_DEVID);
+ ret |= wc_Sha256Update(hash, bench_plain, BENCH_SIZE);
+ ret |= wc_Sha256Final(hash, digest[0]);
+ if (ret != 0)
+ goto exit_sha256;
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+ }
+exit_sha256:
+ bench_stats_sym_finish("SHA-256", doAsync, count, bench_size, start, ret);
+
+exit:
+
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_Sha256Free(&hash[i]);
+ }
+
+ FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
}
#endif
#ifdef WOLFSSL_SHA384
-void bench_sha384(void)
+void bench_sha384(int doAsync)
{
- Sha384 hash;
- byte digest[SHA384_DIGEST_SIZE];
- double start, total, persec;
- int i, ret;
+ wc_Sha384 hash[BENCH_MAX_PENDING];
+ double start;
+ int ret = 0, i, count = 0, times, pending = 0;
+ DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, WC_SHA384_DIGEST_SIZE, HEAP_HINT);
+
+ /* clear for done cleanup */
+ XMEMSET(hash, 0, sizeof(hash));
+
+ if (digest_stream) {
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ ret = wc_InitSha384_ex(&hash[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID);
+ if (ret != 0) {
+ printf("InitSha384_ex failed, ret = %d\n", ret);
+ goto exit;
+ }
+ }
- ret = wc_InitSha384(&hash);
- if (ret != 0) {
- printf("InitSha384 failed, ret = %d\n", ret);
- return;
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_Sha384Update(&hash[i], bench_plain,
+ BENCH_SIZE);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, &pending)) {
+ goto exit_sha384;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+
+ times = 0;
+ do {
+ bench_async_poll(&pending);
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_Sha384Final(&hash[i], digest[i]);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, &pending)) {
+ goto exit_sha384;
+ }
+ }
+ } /* for i */
+ } while (pending > 0);
+ } while (bench_stats_sym_check(start));
}
- start = current_time(1);
- BEGIN_INTEL_CYCLES
+ else {
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks; times++) {
+ ret = wc_InitSha384_ex(hash, HEAP_HINT, INVALID_DEVID);
+ ret |= wc_Sha384Update(hash, bench_plain, BENCH_SIZE);
+ ret |= wc_Sha384Final(hash, digest[0]);
+ if (ret != 0)
+ goto exit_sha384;
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+ }
+exit_sha384:
+ bench_stats_sym_finish("SHA-384", doAsync, count, bench_size, start, ret);
- for(i = 0; i < numBlocks; i++) {
- ret = wc_Sha384Update(&hash, plain, sizeof(plain));
- if (ret != 0) {
- printf("Sha384Update failed, ret = %d\n", ret);
- return;
+exit:
+
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_Sha384Free(&hash[i]);
+ }
+
+ FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+}
+#endif
+
+#ifdef WOLFSSL_SHA512
+void bench_sha512(int doAsync)
+{
+ wc_Sha512 hash[BENCH_MAX_PENDING];
+ double start;
+ int ret = 0, i, count = 0, times, pending = 0;
+ DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, WC_SHA512_DIGEST_SIZE, HEAP_HINT);
+
+ /* clear for done cleanup */
+ XMEMSET(hash, 0, sizeof(hash));
+
+ if (digest_stream) {
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ ret = wc_InitSha512_ex(&hash[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID);
+ if (ret != 0) {
+ printf("InitSha512_ex failed, ret = %d\n", ret);
+ goto exit;
+ }
}
+
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_Sha512Update(&hash[i], bench_plain,
+ BENCH_SIZE);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, &pending)) {
+ goto exit_sha512;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+
+ times = 0;
+ do {
+ bench_async_poll(&pending);
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_Sha512Final(&hash[i], digest[i]);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, &pending)) {
+ goto exit_sha512;
+ }
+ }
+ } /* for i */
+ } while (pending > 0);
+ } while (bench_stats_sym_check(start));
+ }
+ else {
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks; times++) {
+ ret = wc_InitSha512_ex(hash, HEAP_HINT, INVALID_DEVID);
+ ret |= wc_Sha512Update(hash, bench_plain, BENCH_SIZE);
+ ret |= wc_Sha512Final(hash, digest[0]);
+ if (ret != 0)
+ goto exit_sha512;
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
}
+exit_sha512:
+ bench_stats_sym_finish("SHA-512", doAsync, count, bench_size, start, ret);
- ret = wc_Sha384Final(&hash, digest);
- if (ret != 0) {
- printf("Sha384Final failed, ret = %d\n", ret);
- return;
+exit:
+
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_Sha512Free(&hash[i]);
}
- END_INTEL_CYCLES
- total = current_time(0) - start;
- persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
- /* since using kB, convert to MB/s */
- persec = persec / 1024;
+ FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+}
#endif
- printf("SHA-384 %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
- blockType, total, persec);
- SHOW_INTEL_CYCLES
- printf("\n");
+
+#ifdef WOLFSSL_SHA3
+#ifndef WOLFSSL_NOSHA3_224
+void bench_sha3_224(int doAsync)
+{
+ wc_Sha3 hash[BENCH_MAX_PENDING];
+ double start;
+ int ret = 0, i, count = 0, times, pending = 0;
+ DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, WC_SHA3_224_DIGEST_SIZE, HEAP_HINT);
+
+ /* clear for done cleanup */
+ XMEMSET(hash, 0, sizeof(hash));
+
+ if (digest_stream) {
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ ret = wc_InitSha3_224(&hash[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID);
+ if (ret != 0) {
+ printf("InitSha3_224 failed, ret = %d\n", ret);
+ goto exit;
+ }
+ }
+
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_Sha3_224_Update(&hash[i], bench_plain,
+ BENCH_SIZE);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, &pending)) {
+ goto exit_sha3_224;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+
+ times = 0;
+ do {
+ bench_async_poll(&pending);
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_Sha3_224_Final(&hash[i], digest[i]);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, &pending)) {
+ goto exit_sha3_224;
+ }
+ }
+ } /* for i */
+ } while (pending > 0);
+ } while (bench_stats_sym_check(start));
+ }
+ else {
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks; times++) {
+ ret = wc_InitSha3_224(hash, HEAP_HINT, INVALID_DEVID);
+ ret |= wc_Sha3_224_Update(hash, bench_plain, BENCH_SIZE);
+ ret |= wc_Sha3_224_Final(hash, digest[0]);
+ if (ret != 0)
+ goto exit_sha3_224;
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+ }
+exit_sha3_224:
+ bench_stats_sym_finish("SHA3-224", doAsync, count, bench_size, start, ret);
+
+exit:
+
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_Sha3_224_Free(&hash[i]);
+ }
+
+ FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
}
-#endif
+#endif /* WOLFSSL_NOSHA3_224 */
-#ifdef WOLFSSL_SHA512
-void bench_sha512(void)
-{
- Sha512 hash;
- byte digest[SHA512_DIGEST_SIZE];
- double start, total, persec;
- int i, ret;
-
- ret = wc_InitSha512(&hash);
- if (ret != 0) {
- printf("InitSha512 failed, ret = %d\n", ret);
- return;
+#ifndef WOLFSSL_NOSHA3_256
+void bench_sha3_256(int doAsync)
+{
+ wc_Sha3 hash[BENCH_MAX_PENDING];
+ double start;
+ int ret = 0, i, count = 0, times, pending = 0;
+ DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, WC_SHA3_256_DIGEST_SIZE, HEAP_HINT);
+
+ /* clear for done cleanup */
+ XMEMSET(hash, 0, sizeof(hash));
+
+ if (digest_stream) {
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ ret = wc_InitSha3_256(&hash[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID);
+ if (ret != 0) {
+ printf("InitSha3_256 failed, ret = %d\n", ret);
+ goto exit;
+ }
+ }
+
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_Sha3_256_Update(&hash[i], bench_plain,
+ BENCH_SIZE);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, &pending)) {
+ goto exit_sha3_256;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+
+ times = 0;
+ do {
+ bench_async_poll(&pending);
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_Sha3_256_Final(&hash[i], digest[i]);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, &pending)) {
+ goto exit_sha3_256;
+ }
+ }
+ } /* for i */
+ } while (pending > 0);
+ } while (bench_stats_sym_check(start));
}
- start = current_time(1);
- BEGIN_INTEL_CYCLES
-
- for(i = 0; i < numBlocks; i++) {
- ret = wc_Sha512Update(&hash, plain, sizeof(plain));
- if (ret != 0) {
- printf("Sha512Update failed, ret = %d\n", ret);
- return;
+ else {
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks; times++) {
+ ret = wc_InitSha3_256(hash, HEAP_HINT, INVALID_DEVID);
+ ret |= wc_Sha3_256_Update(hash, bench_plain, BENCH_SIZE);
+ ret |= wc_Sha3_256_Final(hash, digest[0]);
+ if (ret != 0)
+ goto exit_sha3_256;
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+ }
+exit_sha3_256:
+ bench_stats_sym_finish("SHA3-256", doAsync, count, bench_size, start, ret);
+
+exit:
+
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_Sha3_256_Free(&hash[i]);
+ }
+
+ FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+}
+#endif /* WOLFSSL_NOSHA3_256 */
+
+#ifndef WOLFSSL_NOSHA3_384
+void bench_sha3_384(int doAsync)
+{
+ wc_Sha3 hash[BENCH_MAX_PENDING];
+ double start;
+ int ret = 0, i, count = 0, times, pending = 0;
+ DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, WC_SHA3_384_DIGEST_SIZE, HEAP_HINT);
+
+ /* clear for done cleanup */
+ XMEMSET(hash, 0, sizeof(hash));
+
+ if (digest_stream) {
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ ret = wc_InitSha3_384(&hash[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID);
+ if (ret != 0) {
+ printf("InitSha3_384 failed, ret = %d\n", ret);
+ goto exit;
+ }
}
+
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_Sha3_384_Update(&hash[i], bench_plain,
+ BENCH_SIZE);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, &pending)) {
+ goto exit_sha3_384;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+
+ times = 0;
+ do {
+ bench_async_poll(&pending);
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_Sha3_384_Final(&hash[i], digest[i]);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, &pending)) {
+ goto exit_sha3_384;
+ }
+ }
+ } /* for i */
+ } while (pending > 0);
+ } while (bench_stats_sym_check(start));
+ }
+ else {
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks; times++) {
+ ret = wc_InitSha3_384(hash, HEAP_HINT, INVALID_DEVID);
+ ret |= wc_Sha3_384_Update(hash, bench_plain, BENCH_SIZE);
+ ret |= wc_Sha3_384_Final(hash, digest[0]);
+ if (ret != 0)
+ goto exit_sha3_384;
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
}
+exit_sha3_384:
+ bench_stats_sym_finish("SHA3-384", doAsync, count, bench_size, start, ret);
- ret = wc_Sha512Final(&hash, digest);
- if (ret != 0) {
- printf("Sha512Final failed, ret = %d\n", ret);
- return;
+exit:
+
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_Sha3_384_Free(&hash[i]);
}
- END_INTEL_CYCLES
- total = current_time(0) - start;
- persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
- /* since using kB, convert to MB/s */
- persec = persec / 1024;
-#endif
+ FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+}
+#endif /* WOLFSSL_NOSHA3_384 */
- printf("SHA-512 %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
- blockType, total, persec);
- SHOW_INTEL_CYCLES
- printf("\n");
+#ifndef WOLFSSL_NOSHA3_512
+void bench_sha3_512(int doAsync)
+{
+ wc_Sha3 hash[BENCH_MAX_PENDING];
+ double start;
+ int ret = 0, i, count = 0, times, pending = 0;
+ DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, WC_SHA3_512_DIGEST_SIZE, HEAP_HINT);
+
+ /* clear for done cleanup */
+ XMEMSET(hash, 0, sizeof(hash));
+
+ if (digest_stream) {
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ ret = wc_InitSha3_512(&hash[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID);
+ if (ret != 0) {
+ printf("InitSha3_512 failed, ret = %d\n", ret);
+ goto exit;
+ }
+ }
+
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_Sha3_512_Update(&hash[i], bench_plain,
+ BENCH_SIZE);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, &pending)) {
+ goto exit_sha3_512;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+
+ times = 0;
+ do {
+ bench_async_poll(&pending);
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, numBlocks, &pending)) {
+ ret = wc_Sha3_512_Final(&hash[i], digest[i]);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, &times, &pending)) {
+ goto exit_sha3_512;
+ }
+ }
+ } /* for i */
+ } while (pending > 0);
+ } while (bench_stats_sym_check(start));
+ }
+ else {
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks; times++) {
+ ret = wc_InitSha3_512(hash, HEAP_HINT, INVALID_DEVID);
+ ret |= wc_Sha3_512_Update(hash, bench_plain, BENCH_SIZE);
+ ret |= wc_Sha3_512_Final(hash, digest[0]);
+ if (ret != 0)
+ goto exit_sha3_512;
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+ }
+exit_sha3_512:
+ bench_stats_sym_finish("SHA3-512", doAsync, count, bench_size, start, ret);
+
+exit:
+
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_Sha3_512_Free(&hash[i]);
+ }
+
+ FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
}
+#endif /* WOLFSSL_NOSHA3_512 */
#endif
+
#ifdef WOLFSSL_RIPEMD
-void bench_ripemd(void)
+int bench_ripemd(void)
{
RipeMd hash;
byte digest[RIPEMD_DIGEST_SIZE];
- double start, total, persec;
- int i;
-
- wc_InitRipeMd(&hash);
- start = current_time(1);
- BEGIN_INTEL_CYCLES
-
- for(i = 0; i < numBlocks; i++)
- wc_RipeMdUpdate(&hash, plain, sizeof(plain));
-
- wc_RipeMdFinal(&hash, digest);
+ double start;
+ int i, count, ret = 0;
- END_INTEL_CYCLES
- total = current_time(0) - start;
- persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
- /* since using kB, convert to MB/s */
- persec = persec / 1024;
-#endif
+ if (digest_stream) {
+ ret = wc_InitRipeMd(&hash);
+ if (ret != 0) {
+ return ret;
+ }
- printf("RIPEMD %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
- blockType, total, persec);
- SHOW_INTEL_CYCLES
- printf("\n");
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ ret = wc_RipeMdUpdate(&hash, bench_plain, BENCH_SIZE);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+ ret = wc_RipeMdFinal(&hash, digest);
+ if (ret != 0) {
+ return ret;
+ }
+
+ count += i;
+ } while (bench_stats_sym_check(start));
+ }
+ else {
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ ret = wc_InitRipeMd(&hash);
+ if (ret != 0) {
+ return ret;
+ }
+ ret = wc_RipeMdUpdate(&hash, bench_plain, BENCH_SIZE);
+ if (ret != 0) {
+ return ret;
+ }
+ ret = wc_RipeMdFinal(&hash, digest);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+ }
+ bench_stats_sym_finish("RIPEMD", 0, count, bench_size, start, ret);
+
+ return 0;
}
#endif
#ifdef HAVE_BLAKE2
-void bench_blake2(void)
+void bench_blake2b(void)
{
Blake2b b2b;
byte digest[64];
- double start, total, persec;
- int i, ret;
-
- ret = wc_InitBlake2b(&b2b, 64);
- if (ret != 0) {
- printf("InitBlake2b failed, ret = %d\n", ret);
- return;
+ double start;
+ int ret = 0, i, count;
+
+ if (digest_stream) {
+ ret = wc_InitBlake2b(&b2b, 64);
+ if (ret != 0) {
+ printf("InitBlake2b failed, ret = %d\n", ret);
+ return;
+ }
+
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ ret = wc_Blake2bUpdate(&b2b, bench_plain, BENCH_SIZE);
+ if (ret != 0) {
+ printf("Blake2bUpdate failed, ret = %d\n", ret);
+ return;
+ }
+ }
+ ret = wc_Blake2bFinal(&b2b, digest, 64);
+ if (ret != 0) {
+ printf("Blake2bFinal failed, ret = %d\n", ret);
+ return;
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
}
- start = current_time(1);
- BEGIN_INTEL_CYCLES
-
- for(i = 0; i < numBlocks; i++) {
- ret = wc_Blake2bUpdate(&b2b, plain, sizeof(plain));
+ else {
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ ret = wc_InitBlake2b(&b2b, 64);
+ if (ret != 0) {
+ printf("InitBlake2b failed, ret = %d\n", ret);
+ return;
+ }
+ ret = wc_Blake2bUpdate(&b2b, bench_plain, BENCH_SIZE);
+ if (ret != 0) {
+ printf("Blake2bUpdate failed, ret = %d\n", ret);
+ return;
+ }
+ ret = wc_Blake2bFinal(&b2b, digest, 64);
+ if (ret != 0) {
+ printf("Blake2bFinal failed, ret = %d\n", ret);
+ return;
+ }
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+ }
+ bench_stats_sym_finish("BLAKE2b", 0, count, bench_size, start, ret);
+}
+#endif
+
+#if defined(HAVE_BLAKE2S)
+void bench_blake2s(void)
+{
+ Blake2s b2s;
+ byte digest[32];
+ double start;
+ int ret = 0, i, count;
+
+ if (digest_stream) {
+ ret = wc_InitBlake2s(&b2s, 32);
if (ret != 0) {
- printf("Blake2bUpdate failed, ret = %d\n", ret);
+ printf("InitBlake2s failed, ret = %d\n", ret);
return;
}
+
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ ret = wc_Blake2sUpdate(&b2s, bench_plain, BENCH_SIZE);
+ if (ret != 0) {
+ printf("Blake2sUpdate failed, ret = %d\n", ret);
+ return;
+ }
+ }
+ ret = wc_Blake2sFinal(&b2s, digest, 32);
+ if (ret != 0) {
+ printf("Blake2sFinal failed, ret = %d\n", ret);
+ return;
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
}
-
- ret = wc_Blake2bFinal(&b2b, digest, 64);
- if (ret != 0) {
- printf("Blake2bFinal failed, ret = %d\n", ret);
- return;
+ else {
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < numBlocks; i++) {
+ ret = wc_InitBlake2s(&b2s, 32);
+ if (ret != 0) {
+ printf("InitBlake2b failed, ret = %d\n", ret);
+ return;
+ }
+ ret = wc_Blake2sUpdate(&b2s, bench_plain, BENCH_SIZE);
+ if (ret != 0) {
+ printf("Blake2bUpdate failed, ret = %d\n", ret);
+ return;
+ }
+ ret = wc_Blake2sFinal(&b2s, digest, 32);
+ if (ret != 0) {
+ printf("Blake2sFinal failed, ret = %d\n", ret);
+ return;
+ }
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
}
+ bench_stats_sym_finish("BLAKE2s", 0, count, bench_size, start, ret);
+}
+#endif
- END_INTEL_CYCLES
- total = current_time(0) - start;
- persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
- /* since using kB, convert to MB/s */
- persec = persec / 1024;
+
+#ifdef WOLFSSL_CMAC
+
+static void bench_cmac_helper(int keySz, const char* outMsg)
+{
+ Cmac cmac;
+ byte digest[AES_BLOCK_SIZE];
+ word32 digestSz = sizeof(digest);
+ double start;
+ int ret, i, count;
+
+ bench_stats_start(&count, &start);
+ do {
+ ret = wc_InitCmac(&cmac, bench_key, keySz, WC_CMAC_AES, NULL);
+ if (ret != 0) {
+ printf("InitCmac failed, ret = %d\n", ret);
+ return;
+ }
+
+ for (i = 0; i < numBlocks; i++) {
+ ret = wc_CmacUpdate(&cmac, bench_plain, BENCH_SIZE);
+ if (ret != 0) {
+ printf("CmacUpdate failed, ret = %d\n", ret);
+ return;
+ }
+ }
+ /* Note: final force zero's the Cmac struct */
+ ret = wc_CmacFinal(&cmac, digest, &digestSz);
+ if (ret != 0) {
+ printf("CmacFinal failed, ret = %d\n", ret);
+ return;
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+ bench_stats_sym_finish(outMsg, 0, count, bench_size, start, ret);
+}
+
+void bench_cmac(void)
+{
+#ifdef WOLFSSL_AES_128
+ bench_cmac_helper(16, "AES-128-CMAC");
+#endif
+#ifdef WOLFSSL_AES_256
+ bench_cmac_helper(32, "AES-256-CMAC");
#endif
- printf("BLAKE2b %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
- blockType, total, persec);
- SHOW_INTEL_CYCLES
- printf("\n");
}
+#endif /* WOLFSSL_CMAC */
+
+#ifdef HAVE_SCRYPT
+
+void bench_scrypt(void)
+{
+ byte derived[64];
+ double start;
+ int ret, i, count;
+
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < scryptCnt; i++) {
+ ret = wc_scrypt(derived, (byte*)"pleaseletmein", 13,
+ (byte*)"SodiumChloride", 14, 14, 8, 1, sizeof(derived));
+ if (ret != 0) {
+ printf("scrypt failed, ret = %d\n", ret);
+ goto exit;
+ }
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+exit:
+ bench_stats_asym_finish("scrypt", 17, "", 0, count, start, ret);
+}
+
+#endif /* HAVE_SCRYPT */
+
+#ifndef NO_HMAC
+
+static void bench_hmac(int doAsync, int type, int digestSz,
+ byte* key, word32 keySz, const char* label)
+{
+ Hmac hmac[BENCH_MAX_PENDING];
+ double start;
+ int ret = 0, i, count = 0, times, pending = 0;
+#ifdef WOLFSSL_ASYNC_CRYPT
+ DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, WC_MAX_DIGEST_SIZE, HEAP_HINT);
+#else
+ byte digest[BENCH_MAX_PENDING][WC_MAX_DIGEST_SIZE];
#endif
+ (void)digestSz;
+
+ /* clear for done cleanup */
+ XMEMSET(hmac, 0, sizeof(hmac));
+
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ ret = wc_HmacInit(&hmac[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID);
+ if (ret != 0) {
+ printf("wc_HmacInit failed for %s, ret = %d\n", label, ret);
+ goto exit;
+ }
+
+ ret = wc_HmacSetKey(&hmac[i], type, key, keySz);
+ if (ret != 0) {
+ printf("wc_HmacSetKey failed for %s, ret = %d\n", label, ret);
+ goto exit;
+ }
+ }
+
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < numBlocks || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hmac[i]), 0,
+ &times, numBlocks, &pending)) {
+ ret = wc_HmacUpdate(&hmac[i], bench_plain, BENCH_SIZE);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hmac[i]),
+ 0, &times, &pending)) {
+ goto exit_hmac;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+
+ times = 0;
+ do {
+ bench_async_poll(&pending);
+
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hmac[i]), 0,
+ &times, numBlocks, &pending)) {
+ ret = wc_HmacFinal(&hmac[i], digest[i]);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hmac[i]),
+ 0, &times, &pending)) {
+ goto exit_hmac;
+ }
+ }
+ } /* for i */
+ } while (pending > 0);
+ } while (bench_stats_sym_check(start));
+exit_hmac:
+ bench_stats_sym_finish(label, doAsync, count, bench_size, start, ret);
+
+exit:
+
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_HmacFree(&hmac[i]);
+ }
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+ FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+#endif
+}
+
+#ifndef NO_MD5
+
+void bench_hmac_md5(int doAsync)
+{
+ byte key[] = { 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b };
+
+ bench_hmac(doAsync, WC_MD5, WC_MD5_DIGEST_SIZE, key, sizeof(key),
+ "HMAC-MD5");
+}
+
+#endif /* NO_MD5 */
+
+#ifndef NO_SHA
+
+void bench_hmac_sha(int doAsync)
+{
+ byte key[] = { 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b };
+
+ bench_hmac(doAsync, WC_SHA, WC_SHA_DIGEST_SIZE, key, sizeof(key),
+ "HMAC-SHA");
+}
+
+#endif /* NO_SHA */
+
+#ifdef WOLFSSL_SHA224
+
+void bench_hmac_sha224(int doAsync)
+{
+ byte key[] = { 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b };
+
+ bench_hmac(doAsync, WC_SHA224, WC_SHA224_DIGEST_SIZE, key, sizeof(key),
+ "HMAC-SHA224");
+}
+
+#endif /* WOLFSSL_SHA224 */
+
+#ifndef NO_SHA256
+
+void bench_hmac_sha256(int doAsync)
+{
+ byte key[] = { 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b };
+
+ bench_hmac(doAsync, WC_SHA256, WC_SHA256_DIGEST_SIZE, key, sizeof(key),
+ "HMAC-SHA256");
+}
+
+#endif /* NO_SHA256 */
+
+#ifdef WOLFSSL_SHA384
+
+void bench_hmac_sha384(int doAsync)
+{
+ byte key[] = { 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b };
+
+ bench_hmac(doAsync, WC_SHA384, WC_SHA384_DIGEST_SIZE, key, sizeof(key),
+ "HMAC-SHA384");
+}
+
+#endif /* WOLFSSL_SHA384 */
+
+#ifdef WOLFSSL_SHA512
+
+void bench_hmac_sha512(int doAsync)
+{
+ byte key[] = { 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b };
+
+ bench_hmac(doAsync, WC_SHA512, WC_SHA512_DIGEST_SIZE, key, sizeof(key),
+ "HMAC-SHA512");
+}
+
+#endif /* WOLFSSL_SHA512 */
+
+#ifndef NO_PWDBASED
+void bench_pbkdf2(void)
+{
+ double start;
+ int ret = 0, count = 0;
+ const char* passwd32 = "passwordpasswordpasswordpassword";
+ const byte salt32[] = { 0x78, 0x57, 0x8E, 0x5a, 0x5d, 0x63, 0xcb, 0x06,
+ 0x78, 0x57, 0x8E, 0x5a, 0x5d, 0x63, 0xcb, 0x06,
+ 0x78, 0x57, 0x8E, 0x5a, 0x5d, 0x63, 0xcb, 0x06,
+ 0x78, 0x57, 0x8E, 0x5a, 0x5d, 0x63, 0xcb, 0x06 };
+ byte derived[32];
+
+ bench_stats_start(&count, &start);
+ do {
+ ret = wc_PBKDF2(derived, (const byte*)passwd32, (int)XSTRLEN(passwd32),
+ salt32, (int)sizeof(salt32), 1000, 32, WC_SHA256);
+ count++;
+ } while (bench_stats_sym_check(start));
+ bench_stats_sym_finish("PBKDF2", 32, count, 32, start, ret);
+}
+#endif /* !NO_PWDBASED */
+
+#endif /* NO_HMAC */
#ifndef NO_RSA
+#if defined(WOLFSSL_KEY_GEN)
+static void bench_rsaKeyGen_helper(int doAsync, int keySz)
+{
+ RsaKey genKey[BENCH_MAX_PENDING];
+ double start;
+ int ret = 0, i, count = 0, times, pending = 0;
+ const long rsa_e_val = WC_RSA_EXPONENT;
+ const char**desc = bench_desc_words[lng_index];
+
+ /* clear for done cleanup */
+ XMEMSET(genKey, 0, sizeof(genKey));
+
+ bench_stats_start(&count, &start);
+ do {
+ /* while free pending slots in queue, submit ops */
+ for (times = 0; times < genTimes || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 0, &times, genTimes, &pending)) {
+
+ wc_FreeRsaKey(&genKey[i]);
+ ret = wc_InitRsaKey_ex(&genKey[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ ret = wc_MakeRsaKey(&genKey[i], keySz, rsa_e_val, &gRng);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 0, &times, &pending)) {
+ goto exit;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+exit:
+ bench_stats_asym_finish("RSA", keySz, desc[2], doAsync, count, start, ret);
+
+ /* cleanup */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_FreeRsaKey(&genKey[i]);
+ }
+}
+
+void bench_rsaKeyGen(int doAsync)
+{
+ int k, keySz;
+#ifndef WOLFSSL_SP_MATH
+ const int keySizes[2] = {1024, 2048};
+#else
+ const int keySizes[1] = {2048};
+#endif
-#if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048)
+ for (k = 0; k < (int)(sizeof(keySizes)/sizeof(int)); k++) {
+ keySz = keySizes[k];
+ bench_rsaKeyGen_helper(doAsync, keySz);
+ }
+}
+
+
+void bench_rsaKeyGen_size(int doAsync, int keySz)
+{
+ bench_rsaKeyGen_helper(doAsync, keySz);
+}
+#endif /* WOLFSSL_KEY_GEN */
+
+#if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048) && \
+ !defined(USE_CERT_BUFFERS_3072)
#if defined(WOLFSSL_MDK_SHELL)
static char *certRSAname = "certs/rsa2048.der";
/* set by shell command */
@@ -1128,88 +4361,442 @@ void bench_blake2(void)
#endif
#endif
-void bench_rsa(void)
+#define RSA_BUF_SIZE 384 /* for up to 3072 bit */
+
+#if !defined(WOLFSSL_RSA_VERIFY_INLINE) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#elif defined(WOLFSSL_PUBLIC_MP) || !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+ #if defined(USE_CERT_BUFFERS_2048)
+static unsigned char rsa_2048_sig[] = {
+ 0x8c, 0x9e, 0x37, 0xbf, 0xc3, 0xa6, 0xba, 0x1c,
+ 0x53, 0x22, 0x40, 0x4b, 0x8b, 0x0d, 0x3c, 0x0e,
+ 0x2e, 0x8c, 0x31, 0x2c, 0x47, 0xbf, 0x03, 0x48,
+ 0x18, 0x46, 0x73, 0x8d, 0xd7, 0xdd, 0x17, 0x64,
+ 0x0d, 0x7f, 0xdc, 0x74, 0xed, 0x80, 0xc3, 0xe8,
+ 0x9a, 0x18, 0x33, 0xd4, 0xe6, 0xc5, 0xe1, 0x54,
+ 0x75, 0xd1, 0xbb, 0x40, 0xde, 0xa8, 0xb9, 0x1b,
+ 0x14, 0xe8, 0xc1, 0x39, 0xeb, 0xa0, 0x69, 0x8a,
+ 0xc6, 0x9b, 0xef, 0x53, 0xb5, 0x23, 0x2b, 0x78,
+ 0x06, 0x43, 0x37, 0x11, 0x81, 0x84, 0x73, 0x33,
+ 0x33, 0xfe, 0xf7, 0x5d, 0x2b, 0x84, 0xd6, 0x83,
+ 0xd6, 0xdd, 0x55, 0x33, 0xef, 0xd1, 0xf7, 0x12,
+ 0xb0, 0xc2, 0x0e, 0xb1, 0x78, 0xd4, 0xa8, 0xa3,
+ 0x25, 0xeb, 0xed, 0x9a, 0xb3, 0xee, 0xc3, 0x7e,
+ 0xce, 0x13, 0x18, 0x86, 0x31, 0xe1, 0xef, 0x01,
+ 0x0f, 0x6e, 0x67, 0x24, 0x74, 0xbd, 0x0b, 0x7f,
+ 0xa9, 0xca, 0x6f, 0xaa, 0x83, 0x28, 0x90, 0x40,
+ 0xf1, 0xb5, 0x10, 0x0e, 0x26, 0x03, 0x05, 0x5d,
+ 0x87, 0xb4, 0xe0, 0x4c, 0x98, 0xd8, 0xc6, 0x42,
+ 0x89, 0x77, 0xeb, 0xb6, 0xd4, 0xe6, 0x26, 0xf3,
+ 0x31, 0x25, 0xde, 0x28, 0x38, 0x58, 0xe8, 0x2c,
+ 0xf4, 0x56, 0x7c, 0xb6, 0xfd, 0x99, 0xb0, 0xb0,
+ 0xf4, 0x83, 0xb6, 0x74, 0xa9, 0x5b, 0x9f, 0xe8,
+ 0xe9, 0xf1, 0xa1, 0x2a, 0xbd, 0xf6, 0x83, 0x28,
+ 0x09, 0xda, 0xa6, 0xd6, 0xcd, 0x61, 0x60, 0xf7,
+ 0x13, 0x4e, 0x46, 0x57, 0x38, 0x1e, 0x11, 0x92,
+ 0x6b, 0x6b, 0xcf, 0xd3, 0xf4, 0x8b, 0x66, 0x03,
+ 0x25, 0xa3, 0x7a, 0x2f, 0xce, 0xc1, 0x85, 0xa5,
+ 0x48, 0x91, 0x8a, 0xb3, 0x4f, 0x5d, 0x98, 0xb1,
+ 0x69, 0x58, 0x47, 0x69, 0x0c, 0x52, 0xdc, 0x42,
+ 0x4c, 0xef, 0xe8, 0xd4, 0x4d, 0x6a, 0x33, 0x7d,
+ 0x9e, 0xd2, 0x51, 0xe6, 0x41, 0xbf, 0x4f, 0xa2
+};
+ #elif defined(USE_CERT_BUFFERS_3072)
+static unsigned char rsa_3072_sig[] = {
+ 0x1a, 0xd6, 0x0d, 0xfd, 0xe3, 0x41, 0x95, 0x76,
+ 0x27, 0x16, 0x7d, 0xc7, 0x94, 0x16, 0xca, 0xa8,
+ 0x26, 0x08, 0xbe, 0x78, 0x87, 0x72, 0x4c, 0xd9,
+ 0xa7, 0xfc, 0x33, 0x77, 0x2d, 0x53, 0x07, 0xb5,
+ 0x8c, 0xce, 0x48, 0x17, 0x9b, 0xff, 0x9f, 0x9b,
+ 0x17, 0xc4, 0xbb, 0x72, 0xed, 0xdb, 0xa0, 0x34,
+ 0x69, 0x5b, 0xc7, 0x4e, 0xbf, 0xec, 0x13, 0xc5,
+ 0x98, 0x71, 0x9a, 0x4e, 0x18, 0x0e, 0xcb, 0xe7,
+ 0xc6, 0xd5, 0x21, 0x31, 0x7c, 0x0d, 0xae, 0x14,
+ 0x2b, 0x87, 0x4f, 0x77, 0x95, 0x2e, 0x26, 0xe2,
+ 0x83, 0xfe, 0x49, 0x1e, 0x87, 0x19, 0x4a, 0x63,
+ 0x73, 0x75, 0xf1, 0xf5, 0x71, 0xd2, 0xce, 0xd4,
+ 0x39, 0x2b, 0xd9, 0xe0, 0x76, 0x70, 0xc8, 0xf8,
+ 0xed, 0xdf, 0x90, 0x57, 0x17, 0xb9, 0x16, 0xf6,
+ 0xe9, 0x49, 0x48, 0xce, 0x5a, 0x8b, 0xe4, 0x84,
+ 0x7c, 0xf3, 0x31, 0x68, 0x97, 0x45, 0x68, 0x38,
+ 0x50, 0x3a, 0x70, 0xbd, 0xb3, 0xd3, 0xd2, 0xe0,
+ 0x56, 0x5b, 0xc2, 0x0c, 0x2c, 0x10, 0x70, 0x7b,
+ 0xd4, 0x99, 0xf9, 0x38, 0x31, 0xb1, 0x86, 0xa0,
+ 0x07, 0xf1, 0xf6, 0x53, 0xb0, 0x44, 0x82, 0x40,
+ 0xd2, 0xab, 0x0e, 0x71, 0x5d, 0xe1, 0xea, 0x3a,
+ 0x77, 0xc9, 0xef, 0xfe, 0x54, 0x65, 0xa3, 0x49,
+ 0xfd, 0xa5, 0x33, 0xaa, 0x16, 0x1a, 0x38, 0xe7,
+ 0xaa, 0xb7, 0x13, 0xb2, 0x3b, 0xc7, 0x00, 0x87,
+ 0x12, 0xfe, 0xfd, 0xf4, 0x55, 0x6d, 0x1d, 0x4a,
+ 0x0e, 0xad, 0xd0, 0x4c, 0x55, 0x91, 0x60, 0xd9,
+ 0xef, 0x74, 0x69, 0x22, 0x8c, 0x51, 0x65, 0xc2,
+ 0x04, 0xac, 0xd3, 0x8d, 0xf7, 0x35, 0x29, 0x13,
+ 0x6d, 0x61, 0x7c, 0x39, 0x2f, 0x41, 0x4c, 0xdf,
+ 0x38, 0xfd, 0x1a, 0x7d, 0x42, 0xa7, 0x6f, 0x3f,
+ 0x3d, 0x9b, 0xd1, 0x97, 0xab, 0xc0, 0xa7, 0x28,
+ 0x1c, 0xc0, 0x02, 0x26, 0xeb, 0xce, 0xf9, 0xe1,
+ 0x34, 0x45, 0xaf, 0xbf, 0x8d, 0xb8, 0xe0, 0xff,
+ 0xd9, 0x6f, 0x77, 0xf3, 0xf7, 0xed, 0x6a, 0xbb,
+ 0x03, 0x52, 0xfb, 0x38, 0xfc, 0xea, 0x9f, 0xc9,
+ 0x98, 0xed, 0x21, 0x45, 0xaf, 0x43, 0x2b, 0x64,
+ 0x96, 0x82, 0x30, 0xe9, 0xb4, 0x36, 0x89, 0x77,
+ 0x07, 0x4a, 0xc6, 0x1f, 0x38, 0x7a, 0xee, 0xb6,
+ 0x86, 0xf6, 0x2f, 0x03, 0xec, 0xa2, 0xe5, 0x48,
+ 0xe5, 0x5a, 0xf5, 0x1c, 0xd2, 0xd9, 0xd8, 0x2d,
+ 0x9d, 0x06, 0x07, 0xc9, 0x8b, 0x5d, 0xe0, 0x0f,
+ 0x5e, 0x0c, 0x53, 0x27, 0xff, 0x23, 0xee, 0xca,
+ 0x5e, 0x4d, 0xf1, 0x95, 0x77, 0x78, 0x1f, 0xf2,
+ 0x44, 0x5b, 0x7d, 0x01, 0x49, 0x61, 0x6f, 0x6d,
+ 0xbf, 0xf5, 0x19, 0x06, 0x39, 0xe9, 0xe9, 0x29,
+ 0xde, 0x47, 0x5e, 0x2e, 0x1f, 0x68, 0xf4, 0x32,
+ 0x5e, 0xe9, 0xd0, 0xa7, 0xb4, 0x2a, 0x45, 0xdf,
+ 0x15, 0x7d, 0x0d, 0x5b, 0xef, 0xc6, 0x23, 0xac
+};
+ #else
+ #error Not Supported Yet!
+ #endif
+#endif
+
+#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || defined(WOLFSSL_PUBLIC_MP)
+static void bench_rsa_helper(int doAsync, RsaKey rsaKey[BENCH_MAX_PENDING],
+ int rsaKeySz)
{
- int i;
- int ret;
- size_t bytes;
- word32 idx = 0;
- const byte* tmp;
+ int ret = 0, i, times, count = 0, pending = 0;
+ word32 idx = 0;
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
+ const char* messageStr = "Everyone gets Friday off.";
+ const int len = (int)XSTRLEN((char*)messageStr);
+#endif
+ double start = 0.0f;
+ const char**desc = bench_desc_words[lng_index];
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
+ DECLARE_VAR_INIT(message, byte, len, messageStr, HEAP_HINT);
+#endif
+ #if !defined(WOLFSSL_MDK5_COMPLv5)
+ /* MDK5 compiler regard this as a executable statement, and does not allow declarations after the line. */
+ DECLARE_ARRAY_DYNAMIC_DEC(enc, byte, BENCH_MAX_PENDING, rsaKeySz, HEAP_HINT);
+ #else
+ byte* enc[BENCH_MAX_PENDING];
+ #endif
+ #if !defined(WOLFSSL_RSA_VERIFY_INLINE) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+ #if !defined(WOLFSSL_MDK5_COMPLv5)
+ /* MDK5 compiler regard this as a executable statement, and does not allow declarations after the line. */
+ DECLARE_ARRAY_DYNAMIC_DEC(out, byte, BENCH_MAX_PENDING, rsaKeySz, HEAP_HINT);
+ #else
+ int idxout;
+ byte* out[BENCH_MAX_PENDING];
+ #endif
+ #else
+ byte* out[BENCH_MAX_PENDING];
+ #endif
- byte message[] = "Everyone gets Friday off.";
- byte enc[256]; /* for up to 2048 bit */
- const int len = (int)strlen((char*)message);
- double start, total, each, milliEach;
+ DECLARE_ARRAY_DYNAMIC_EXE(enc, byte, BENCH_MAX_PENDING, rsaKeySz, HEAP_HINT);
+ #if !defined(WOLFSSL_RSA_VERIFY_INLINE) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+ DECLARE_ARRAY_DYNAMIC_EXE(out, byte, BENCH_MAX_PENDING, rsaKeySz, HEAP_HINT);
+ #endif
- RsaKey rsaKey;
- int rsaKeySz = 2048; /* used in printf */
+ if (!rsa_sign_verify) {
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
+ /* begin public RSA */
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < ntimes || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&rsaKey[i]),
+ 1, &times, ntimes, &pending)) {
+ ret = wc_RsaPublicEncrypt(message, (word32)len, enc[i],
+ rsaKeySz/8, &rsaKey[i],
+ &gRng);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(
+ &rsaKey[i]), 1, &times, &pending)) {
+ goto exit_rsa_pub;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+exit_rsa_pub:
+ bench_stats_asym_finish("RSA", rsaKeySz, desc[0], doAsync, count,
+ start, ret);
+#endif
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ if (ret < 0) {
+ goto exit;
+ }
+
+ /* capture resulting encrypt length */
+ idx = (word32)(rsaKeySz/8);
+
+ /* begin private async RSA */
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < ntimes || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&rsaKey[i]),
+ 1, &times, ntimes, &pending)) {
+ ret = wc_RsaPrivateDecrypt(enc[i], idx, out[i],
+ rsaKeySz/8, &rsaKey[i]);
+ if (!bench_async_handle(&ret,
+ BENCH_ASYNC_GET_DEV(&rsaKey[i]),
+ 1, &times, &pending)) {
+ goto exit;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+exit:
+ bench_stats_asym_finish("RSA", rsaKeySz, desc[1], doAsync, count,
+ start, ret);
+#endif
+ }
+ else {
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ /* begin RSA sign */
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < ntimes || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&rsaKey[i]),
+ 1, &times, ntimes, &pending)) {
+ ret = wc_RsaSSL_Sign(message, len, enc[i],
+ rsaKeySz/8, &rsaKey[i], &gRng);
+ if (!bench_async_handle(&ret,
+ BENCH_ASYNC_GET_DEV(&rsaKey[i]),
+ 1, &times, &pending)) {
+ goto exit_rsa_sign;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+exit_rsa_sign:
+ bench_stats_asym_finish("RSA", rsaKeySz, desc[4], doAsync, count, start,
+ ret);
+
+ if (ret < 0) {
+ goto exit;
+ }
+#endif
+
+ /* capture resulting encrypt length */
+ idx = rsaKeySz/8;
+
+ /* begin RSA verify */
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < ntimes || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&rsaKey[i]),
+ 1, &times, ntimes, &pending)) {
+ #if !defined(WOLFSSL_RSA_VERIFY_INLINE) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+ ret = wc_RsaSSL_Verify(enc[i], idx, out[i],
+ rsaKeySz/8, &rsaKey[i]);
+ #elif defined(USE_CERT_BUFFERS_2048)
+ XMEMCPY(enc[i], rsa_2048_sig, sizeof(rsa_2048_sig));
+ idx = sizeof(rsa_2048_sig);
+ out[i] = NULL;
+ ret = wc_RsaSSL_VerifyInline(enc[i], idx, &out[i],
+ &rsaKey[i]);
+ if (ret > 0)
+ ret = 0;
+ #elif defined(USE_CERT_BUFFERS_3072)
+ XMEMCPY(enc[i], rsa_3072_sig, sizeof(rsa_3072_sig));
+ idx = sizeof(rsa_3072_sig);
+ out[i] = NULL;
+ ret = wc_RsaSSL_VerifyInline(enc[i], idx, &out[i],
+ &rsaKey[i]);
+ if (ret > 0)
+ ret = 0;
+ #endif
+ if (!bench_async_handle(&ret,
+ BENCH_ASYNC_GET_DEV(&rsaKey[i]),
+ 1, &times, &pending)) {
+ goto exit_rsa_verify;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+exit_rsa_verify:
+ bench_stats_asym_finish("RSA", rsaKeySz, desc[5], doAsync, count,
+ start, ret);
+ }
+
+ FREE_ARRAY_DYNAMIC(enc, BENCH_MAX_PENDING, HEAP_HINT);
+#if !defined(WOLFSSL_RSA_VERIFY_INLINE) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+ FREE_ARRAY_DYNAMIC(out, BENCH_MAX_PENDING, HEAP_HINT);
+#endif
+ FREE_VAR(message, HEAP_HINT);
+}
+#endif
+
+void bench_rsa(int doAsync)
+{
+ int ret = 0, i;
+ RsaKey rsaKey[BENCH_MAX_PENDING];
+#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || defined(WOLFSSL_PUBLIC_MP)
+ int rsaKeySz; /* used in printf */
+ size_t bytes;
+ const byte* tmp;
+ word32 idx;
#ifdef USE_CERT_BUFFERS_1024
tmp = rsa_key_der_1024;
- bytes = sizeof_rsa_key_der_1024;
+ bytes = (size_t)sizeof_rsa_key_der_1024;
rsaKeySz = 1024;
#elif defined(USE_CERT_BUFFERS_2048)
tmp = rsa_key_der_2048;
- bytes = sizeof_rsa_key_der_2048;
+ bytes = (size_t)sizeof_rsa_key_der_2048;
+ rsaKeySz = 2048;
+#elif defined(USE_CERT_BUFFERS_3072)
+ tmp = rsa_key_der_3072;
+ bytes = (size_t)sizeof_rsa_key_der_3072;
+ rsaKeySz = 3072;
#else
#error "need a cert buffer size"
#endif /* USE_CERT_BUFFERS */
-
-
-#ifdef HAVE_CAVIUM
- if (wc_RsaInitCavium(&rsaKey, CAVIUM_DEV_ID) != 0)
- printf("RSA init cavium failed\n");
#endif
- ret = wc_InitRsaKey(&rsaKey, 0);
- if (ret < 0) {
- printf("InitRsaKey failed\n");
- return;
- }
- ret = wc_RsaPrivateKeyDecode(tmp, &idx, &rsaKey, (word32)bytes);
- start = current_time(1);
+ /* clear for done cleanup */
+ XMEMSET(rsaKey, 0, sizeof(rsaKey));
- for (i = 0; i < ntimes; i++)
- ret = wc_RsaPublicEncrypt(message,len,enc,sizeof(enc), &rsaKey, &rng);
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ /* setup an async context for each key */
+ if ((ret = wc_InitRsaKey_ex(&rsaKey[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID)) < 0) {
+ goto exit_bench_rsa;
+ }
- total = current_time(0) - start;
- each = total / ntimes; /* per second */
- milliEach = each * 1000; /* milliseconds */
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
+ #ifdef WC_RSA_BLINDING
+ ret = wc_RsaSetRNG(&rsaKey[i], &gRng);
+ if (ret != 0)
+ goto exit_bench_rsa;
+ #endif
+#endif
- printf("RSA %d encryption took %6.3f milliseconds, avg over %d"
- " iterations\n", rsaKeySz, milliEach, ntimes);
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ /* decode the private key */
+ idx = 0;
+ if ((ret = wc_RsaPrivateKeyDecode(tmp, &idx, &rsaKey[i],
+ (word32)bytes)) != 0) {
+ printf("wc_RsaPrivateKeyDecode failed! %d\n", ret);
+ goto exit_bench_rsa;
+ }
+#elif defined(WOLFSSL_PUBLIC_MP)
+ #ifdef USE_CERT_BUFFERS_2048
+ ret = mp_read_unsigned_bin(&rsaKey[i].n, &tmp[12], 256);
+ if (ret != 0) {
+ printf("wc_RsaPrivateKeyDecode failed! %d\n", ret);
+ goto exit_bench_rsa;
+ }
+ ret = mp_set_int(&rsaKey[i].e, WC_RSA_EXPONENT);
+ if (ret != 0) {
+ printf("wc_RsaPrivateKeyDecode failed! %d\n", ret);
+ goto exit_bench_rsa;
+ }
+ #else
+ #error Not supported yet!
+ #endif
+ (void)idx;
+ (void)bytes;
+#endif
- if (ret < 0) {
- printf("Rsa Public Encrypt failed\n");
- return;
}
- start = current_time(1);
-
- for (i = 0; i < ntimes; i++) {
- byte out[256]; /* for up to 2048 bit */
- wc_RsaPrivateDecrypt(enc, (word32)ret, out, sizeof(out), &rsaKey);
+#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || defined(WOLFSSL_PUBLIC_MP)
+ bench_rsa_helper(doAsync, rsaKey, rsaKeySz);
+#endif
+exit_bench_rsa:
+ /* cleanup */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_FreeRsaKey(&rsaKey[i]);
}
+}
- total = current_time(0) - start;
- each = total / ntimes; /* per second */
- milliEach = each * 1000; /* milliseconds */
-
- printf("RSA %d decryption took %6.3f milliseconds, avg over %d"
- " iterations\n", rsaKeySz, milliEach, ntimes);
- wc_FreeRsaKey(&rsaKey);
-#ifdef HAVE_CAVIUM
- wc_RsaFreeCavium(&rsaKey);
-#endif
+#ifdef WOLFSSL_KEY_GEN
+/* bench any size of RSA key */
+void bench_rsa_key(int doAsync, int rsaKeySz)
+{
+ int ret = 0, i, pending = 0;
+ RsaKey rsaKey[BENCH_MAX_PENDING];
+ int isPending[BENCH_MAX_PENDING];
+ long exp = 65537l;
+
+ /* clear for done cleanup */
+ XMEMSET(rsaKey, 0, sizeof(rsaKey));
+ XMEMSET(isPending, 0, sizeof(isPending));
+
+ /* init keys */
+ do {
+ pending = 0;
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (!isPending[i]) { /* if making the key is pending then just call
+ * wc_MakeRsaKey again */
+ /* setup an async context for each key */
+ if ((ret = wc_InitRsaKey_ex(&rsaKey[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID)) < 0) {
+ goto exit_bench_rsa_key;
+ }
+
+ #ifdef WC_RSA_BLINDING
+ ret = wc_RsaSetRNG(&rsaKey[i], &gRng);
+ if (ret != 0)
+ goto exit_bench_rsa_key;
+ #endif
+ }
+
+ /* create the RSA key */
+ ret = wc_MakeRsaKey(&rsaKey[i], rsaKeySz, exp, &gRng);
+ if (ret == WC_PENDING_E) {
+ isPending[i] = 1;
+ pending = 1;
+ }
+ else if (ret != 0) {
+ printf("wc_MakeRsaKey failed! %d\n", ret);
+ goto exit_bench_rsa_key;
+ }
+ } /* for i */
+ } while (pending > 0);
+
+ bench_rsa_helper(doAsync, rsaKey, rsaKeySz);
+exit_bench_rsa_key:
+
+ /* cleanup */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_FreeRsaKey(&rsaKey[i]);
+ }
}
-#endif
+#endif /* WOLFSSL_KEY_GEN */
+#endif /* !NO_RSA */
#ifndef NO_DH
-
-#if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048)
+#if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048) && \
+ !defined(USE_CERT_BUFFERS_3072)
#if defined(WOLFSSL_MDK_SHELL)
static char *certDHname = "certs/dh2048.der";
/* set by shell command */
@@ -1223,134 +4810,186 @@ void bench_rsa(void)
#endif
#endif
-void bench_dh(void)
-{
- int i ;
- size_t bytes;
- word32 idx = 0, pubSz, privSz = 0, pubSz2, privSz2, agreeSz;
- const byte* tmp;
+#define BENCH_DH_KEY_SIZE 384 /* for 3072 bit */
+#define BENCH_DH_PRIV_SIZE (BENCH_DH_KEY_SIZE/8)
- byte pub[256]; /* for 2048 bit */
- byte pub2[256]; /* for 2048 bit */
- byte agree[256]; /* for 2048 bit */
- byte priv[32]; /* for 2048 bit */
- byte priv2[32]; /* for 2048 bit */
+void bench_dh(int doAsync)
+{
+ int ret = 0, i;
+ int count = 0, times, pending = 0;
+ const byte* tmp = NULL;
+ double start = 0.0f;
+ DhKey dhKey[BENCH_MAX_PENDING];
+ int dhKeySz = BENCH_DH_KEY_SIZE * 8; /* used in printf */
+ const char**desc = bench_desc_words[lng_index];
+#ifndef NO_ASN
+ size_t bytes = 0;
+ word32 idx;
+#endif
+ word32 pubSz[BENCH_MAX_PENDING];
+ word32 privSz[BENCH_MAX_PENDING];
+ word32 pubSz2;
+ word32 privSz2;
+ word32 agreeSz[BENCH_MAX_PENDING];
+#ifdef HAVE_FFDHE_2048
+ const DhParams *params = NULL;
+#endif
- double start, total, each, milliEach;
- DhKey dhKey;
- int dhKeySz = 2048; /* used in printf */
+ DECLARE_ARRAY(pub, byte, BENCH_MAX_PENDING, BENCH_DH_KEY_SIZE, HEAP_HINT);
+ DECLARE_VAR(pub2, byte, BENCH_DH_KEY_SIZE, HEAP_HINT);
+ DECLARE_ARRAY(agree, byte, BENCH_MAX_PENDING, BENCH_DH_KEY_SIZE, HEAP_HINT);
+ DECLARE_ARRAY(priv, byte, BENCH_MAX_PENDING, BENCH_DH_PRIV_SIZE, HEAP_HINT);
+ DECLARE_VAR(priv2, byte, BENCH_DH_PRIV_SIZE, HEAP_HINT);
- (void)idx;
(void)tmp;
-
-#ifdef USE_CERT_BUFFERS_1024
- tmp = dh_key_der_1024;
- bytes = sizeof_dh_key_der_1024;
- dhKeySz = 1024;
+ if (!use_ffdhe) {
+#if defined(NO_ASN)
+ dhKeySz = 1024;
+ /* do nothing, but don't use default FILE */
+#elif defined(USE_CERT_BUFFERS_1024)
+ tmp = dh_key_der_1024;
+ bytes = (size_t)sizeof_dh_key_der_1024;
+ dhKeySz = 1024;
#elif defined(USE_CERT_BUFFERS_2048)
- tmp = dh_key_der_2048;
- bytes = sizeof_dh_key_der_2048;
-#elif defined(NO_ASN)
- dhKeySz = 1024;
- /* do nothing, but don't use default FILE */
+ tmp = dh_key_der_2048;
+ bytes = (size_t)sizeof_dh_key_der_2048;
+ dhKeySz = 2048;
+#elif defined(USE_CERT_BUFFERS_3072)
+ tmp = dh_key_der_3072;
+ bytes = (size_t)sizeof_dh_key_der_3072;
+ dhKeySz = 3072;
#else
#error "need to define a cert buffer size"
#endif /* USE_CERT_BUFFERS */
-
-
- wc_InitDhKey(&dhKey);
-#ifdef NO_ASN
- bytes = wc_DhSetKey(&dhKey, dh_p, sizeof(dh_p), dh_g, sizeof(dh_g));
-#else
- bytes = wc_DhKeyDecode(tmp, &idx, &dhKey, (word32)bytes);
+ }
+#ifdef HAVE_FFDHE_2048
+ else if (use_ffdhe == 2048) {
+ params = wc_Dh_ffdhe2048_Get();
+ dhKeySz = 2048;
+ }
#endif
- if (bytes != 0) {
- printf("dhekydecode failed, can't benchmark\n");
- return;
+#ifdef HAVE_FFDHE_3072
+ else if (use_ffdhe == 3072) {
+ params = wc_Dh_ffdhe3072_Get();
+ dhKeySz = 3072;
}
-
- start = current_time(1);
-
- for (i = 0; i < ntimes; i++)
- wc_DhGenerateKeyPair(&dhKey, &rng, priv, &privSz, pub, &pubSz);
-
- total = current_time(0) - start;
- each = total / ntimes; /* per second */
- milliEach = each * 1000; /* milliseconds */
-
- printf("DH %d key generation %6.3f milliseconds, avg over %d"
- " iterations\n", dhKeySz, milliEach, ntimes);
-
- wc_DhGenerateKeyPair(&dhKey, &rng, priv2, &privSz2, pub2, &pubSz2);
- start = current_time(1);
-
- for (i = 0; i < ntimes; i++)
- wc_DhAgree(&dhKey, agree, &agreeSz, priv, privSz, pub2, pubSz2);
-
- total = current_time(0) - start;
- each = total / ntimes; /* per second */
- milliEach = each * 1000; /* milliseconds */
-
- printf("DH %d key agreement %6.3f milliseconds, avg over %d"
- " iterations\n", dhKeySz, milliEach, ntimes);
-
- wc_FreeDhKey(&dhKey);
-}
#endif
-#if defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA)
-void bench_rsaKeyGen(void)
-{
- RsaKey genKey;
- double start, total, each, milliEach;
- int i;
-
- /* 1024 bit */
- start = current_time(1);
-
- for(i = 0; i < genTimes; i++) {
- wc_InitRsaKey(&genKey, 0);
- wc_MakeRsaKey(&genKey, 1024, 65537, &rng);
- wc_FreeRsaKey(&genKey);
+ /* clear for done cleanup */
+ XMEMSET(dhKey, 0, sizeof(dhKey));
+
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ /* setup an async context for each key */
+ ret = wc_InitDhKey_ex(&dhKey[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID);
+ if (ret != 0)
+ goto exit;
+
+ /* setup key */
+ if (!use_ffdhe) {
+ #ifdef NO_ASN
+ ret = wc_DhSetKey(&dhKey[i], dh_p, sizeof(dh_p), dh_g,
+ sizeof(dh_g));
+ #else
+ idx = 0;
+ ret = wc_DhKeyDecode(tmp, &idx, &dhKey[i], (word32)bytes);
+ #endif
+ }
+ #if defined(HAVE_FFDHE_2048) || defined(HAVE_FFDHE_3072)
+ else if (params != NULL) {
+ ret = wc_DhSetKey(&dhKey[i], params->p, params->p_len, params->g,
+ params->g_len);
+ }
+ #endif
+ if (ret != 0) {
+ printf("DhKeyDecode failed %d, can't benchmark\n", ret);
+ goto exit;
+ }
}
- total = current_time(0) - start;
- each = total / genTimes; /* per second */
- milliEach = each * 1000; /* millisconds */
- printf("\n");
- printf("RSA 1024 key generation %6.3f milliseconds, avg over %d"
- " iterations\n", milliEach, genTimes);
+ /* Key Gen */
+ bench_stats_start(&count, &start);
+ do {
+ /* while free pending slots in queue, submit ops */
+ for (times = 0; times < genTimes || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&dhKey[i]), 0, &times, genTimes, &pending)) {
+ privSz[i] = 0;
+ ret = wc_DhGenerateKeyPair(&dhKey[i], &gRng, priv[i], &privSz[i],
+ pub[i], &pubSz[i]);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&dhKey[i]), 0, &times, &pending)) {
+ goto exit_dh_gen;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+exit_dh_gen:
+ bench_stats_asym_finish("DH", dhKeySz, desc[2], doAsync, count, start, ret);
+
+ if (ret < 0) {
+ goto exit;
+ }
- /* 2048 bit */
- start = current_time(1);
+ /* Generate key to use as other public */
+ ret = wc_DhGenerateKeyPair(&dhKey[0], &gRng, priv2, &privSz2, pub2, &pubSz2);
+#ifdef WOLFSSL_ASYNC_CRYPT
+ ret = wc_AsyncWait(ret, &dhKey[0].asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
- for(i = 0; i < genTimes; i++) {
- wc_InitRsaKey(&genKey, 0);
- wc_MakeRsaKey(&genKey, 2048, 65537, &rng);
- wc_FreeRsaKey(&genKey);
+ /* Key Agree */
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < agreeTimes || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&dhKey[i]), 0, &times, agreeTimes, &pending)) {
+ ret = wc_DhAgree(&dhKey[i], agree[i], &agreeSz[i], priv[i], privSz[i],
+ pub2, pubSz2);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&dhKey[i]), 0, &times, &pending)) {
+ goto exit;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+exit:
+ bench_stats_asym_finish("DH", dhKeySz, desc[3], doAsync, count, start, ret);
+
+ /* cleanup */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_FreeDhKey(&dhKey[i]);
}
- total = current_time(0) - start;
- each = total / genTimes; /* per second */
- milliEach = each * 1000; /* millisconds */
- printf("RSA 2048 key generation %6.3f milliseconds, avg over %d"
- " iterations\n", milliEach, genTimes);
+ FREE_ARRAY(pub, BENCH_MAX_PENDING, HEAP_HINT);
+ FREE_VAR(pub2, HEAP_HINT);
+ FREE_ARRAY(priv, BENCH_MAX_PENDING, HEAP_HINT);
+ FREE_VAR(priv2, HEAP_HINT);
+ FREE_ARRAY(agree, BENCH_MAX_PENDING, HEAP_HINT);
}
-#endif /* WOLFSSL_KEY_GEN */
+#endif /* !NO_DH */
+
#ifdef HAVE_NTRU
byte GetEntropy(ENTROPY_CMD cmd, byte* out);
byte GetEntropy(ENTROPY_CMD cmd, byte* out)
{
if (cmd == INIT)
- return (wc_InitRng(&rng) == 0) ? 1 : 0;
+ return 1; /* using local rng */
if (out == NULL)
return 0;
if (cmd == GET_BYTE_OF_ENTROPY)
- return (wc_RNG_GenerateBlock(&rng, out, 1) == 0) ? 1 : 0;
+ return (wc_RNG_GenerateBlock(&gRng, out, 1) == 0) ? 1 : 0;
if (cmd == GET_NUM_BYTES_PER_BYTE_OF_ENTROPY) {
*out = 1;
@@ -1363,17 +5002,21 @@ byte GetEntropy(ENTROPY_CMD cmd, byte* out)
void bench_ntru(void)
{
int i;
- double start, total, each, milliEach;
+ double start;
- byte public_key[557];
+ byte public_key[1027];
word16 public_key_len = sizeof(public_key);
- byte private_key[607];
+ byte private_key[1120];
word16 private_key_len = sizeof(private_key);
+ word16 ntruBits = 128;
+ word16 type = 0;
+ word32 ret;
- byte ciphertext[552];
+ byte ciphertext[1022];
word16 ciphertext_len;
byte plaintext[16];
word16 plaintext_len;
+ const char**desc = bench_desc_words[lng_index];
DRBG_HANDLE drbg;
static byte const aes_key[] = {
@@ -1381,425 +5024,799 @@ void bench_ntru(void)
0x7b, 0x12, 0x49, 0x88, 0xaf, 0xb3, 0x22, 0xd8
};
- static byte const cyasslStr[] = {
- 'C', 'y', 'a', 'S', 'S', 'L', ' ', 'N', 'T', 'R', 'U'
+ static byte const wolfsslStr[] = {
+ 'w', 'o', 'l', 'f', 'S', 'S', 'L', ' ', 'N', 'T', 'R', 'U'
};
- word32 rc = ntru_crypto_drbg_instantiate(112, cyasslStr, sizeof(cyasslStr),
- (ENTROPY_FN) GetEntropy, &drbg);
- if(rc != DRBG_OK) {
- printf("NTRU drbg instantiate failed\n");
- return;
- }
+ for (ntruBits = 128; ntruBits < 257; ntruBits += 64) {
+ switch (ntruBits) {
+ case 128:
+ type = NTRU_EES439EP1;
+ break;
+ case 192:
+ type = NTRU_EES593EP1;
+ break;
+ case 256:
+ type = NTRU_EES743EP1;
+ break;
+ }
- rc = ntru_crypto_ntru_encrypt_keygen(drbg, NTRU_EES401EP2,
- &public_key_len, NULL, &private_key_len, NULL);
- if (rc != NTRU_OK) {
- ntru_crypto_drbg_uninstantiate(drbg);
- printf("NTRU failed to get key lengths\n");
- return;
- }
+ ret = ntru_crypto_drbg_instantiate(ntruBits, wolfsslStr,
+ sizeof(wolfsslStr), (ENTROPY_FN) GetEntropy, &drbg);
+ if(ret != DRBG_OK) {
+ printf("NTRU drbg instantiate failed\n");
+ return;
+ }
- rc = ntru_crypto_ntru_encrypt_keygen(drbg, NTRU_EES401EP2, &public_key_len,
- public_key, &private_key_len,
- private_key);
+ /* set key sizes */
+ ret = ntru_crypto_ntru_encrypt_keygen(drbg, type, &public_key_len,
+ NULL, &private_key_len, NULL);
+ if (ret != NTRU_OK) {
+ ntru_crypto_drbg_uninstantiate(drbg);
+ printf("NTRU failed to get key lengths\n");
+ return;
+ }
- ntru_crypto_drbg_uninstantiate(drbg);
+ ret = ntru_crypto_ntru_encrypt_keygen(drbg, type, &public_key_len,
+ public_key, &private_key_len,
+ private_key);
- if (rc != NTRU_OK) {
ntru_crypto_drbg_uninstantiate(drbg);
- printf("NTRU keygen failed\n");
- return;
- }
-
- rc = ntru_crypto_drbg_instantiate(112, NULL, 0, (ENTROPY_FN)GetEntropy,
- &drbg);
- if (rc != DRBG_OK) {
- printf("NTRU error occurred during DRBG instantiation\n");
- return;
- }
-
- rc = ntru_crypto_ntru_encrypt(drbg, public_key_len, public_key, sizeof(
- aes_key), aes_key, &ciphertext_len, NULL);
-
- if (rc != NTRU_OK) {
- printf("NTRU error occurred requesting the buffer size needed\n");
- return;
- }
- start = current_time(1);
- for (i = 0; i < ntimes; i++) {
-
- rc = ntru_crypto_ntru_encrypt(drbg, public_key_len, public_key, sizeof(
- aes_key), aes_key, &ciphertext_len, ciphertext);
+ if (ret != NTRU_OK) {
+ printf("NTRU keygen failed\n");
+ return;
+ }
- if (rc != NTRU_OK) {
- printf("NTRU encrypt error\n");
+ ret = ntru_crypto_drbg_instantiate(ntruBits, NULL, 0,
+ (ENTROPY_FN)GetEntropy, &drbg);
+ if (ret != DRBG_OK) {
+ printf("NTRU error occurred during DRBG instantiation\n");
return;
}
- }
- rc = ntru_crypto_drbg_uninstantiate(drbg);
+ ret = ntru_crypto_ntru_encrypt(drbg, public_key_len, public_key,
+ sizeof(aes_key), aes_key, &ciphertext_len, NULL);
- if (rc != DRBG_OK) {
- printf("NTRU error occurred uninstantiating the DRBG\n");
- return;
- }
-
- total = current_time(0) - start;
- each = total / ntimes; /* per second */
- milliEach = each * 1000; /* milliseconds */
+ if (ret != NTRU_OK) {
+ printf("NTRU error occurred requesting the buffer size needed\n");
+ return;
+ }
- printf("NTRU 112 encryption took %6.3f milliseconds, avg over %d"
- " iterations\n", milliEach, ntimes);
+ bench_stats_start(&i, &start);
+ for (i = 0; i < ntimes; i++) {
+ ret = ntru_crypto_ntru_encrypt(drbg, public_key_len, public_key,
+ sizeof(aes_key), aes_key, &ciphertext_len, ciphertext);
+ if (ret != NTRU_OK) {
+ printf("NTRU encrypt error\n");
+ return;
+ }
+ }
+ bench_stats_asym_finish("NTRU", ntruBits, desc[6], 0, i, start, ret);
+ ret = ntru_crypto_drbg_uninstantiate(drbg);
+ if (ret != DRBG_OK) {
+ printf("NTRU error occurred uninstantiating the DRBG\n");
+ return;
+ }
- rc = ntru_crypto_ntru_decrypt(private_key_len, private_key, ciphertext_len,
- ciphertext, &plaintext_len, NULL);
+ ret = ntru_crypto_ntru_decrypt(private_key_len, private_key,
+ ciphertext_len, ciphertext, &plaintext_len, NULL);
- if (rc != NTRU_OK) {
- printf("NTRU decrypt error occurred getting the buffer size needed\n");
- return;
- }
+ if (ret != NTRU_OK) {
+ printf("NTRU decrypt error occurred getting the buffer size needed\n");
+ return;
+ }
- plaintext_len = sizeof(plaintext);
- start = current_time(1);
+ plaintext_len = sizeof(plaintext);
- for (i = 0; i < ntimes; i++) {
- rc = ntru_crypto_ntru_decrypt(private_key_len, private_key,
+ bench_stats_start(&i, &start);
+ for (i = 0; i < ntimes; i++) {
+ ret = ntru_crypto_ntru_decrypt(private_key_len, private_key,
ciphertext_len, ciphertext,
&plaintext_len, plaintext);
- if (rc != NTRU_OK) {
- printf("NTRU error occurred decrypting the key\n");
- return;
+ if (ret != NTRU_OK) {
+ printf("NTRU error occurred decrypting the key\n");
+ return;
+ }
}
+ bench_stats_asym_finish("NTRU", ntruBits, desc[7], 0, i, start, ret);
}
- total = current_time(0) - start;
- each = total / ntimes; /* per second */
- milliEach = each * 1000; /* milliseconds */
-
- printf("NTRU 112 decryption took %6.3f milliseconds, avg over %d"
- " iterations\n", milliEach, ntimes);
}
void bench_ntruKeyGen(void)
{
- double start, total, each, milliEach;
+ double start;
int i;
- byte public_key[557]; /* 2048 key equivalent to rsa */
+ byte public_key[1027];
word16 public_key_len = sizeof(public_key);
- byte private_key[607];
+ byte private_key[1120];
word16 private_key_len = sizeof(private_key);
+ word16 ntruBits = 128;
+ word16 type = 0;
+ word32 ret;
+ const char**desc = bench_desc_words[lng_index];
DRBG_HANDLE drbg;
static uint8_t const pers_str[] = {
- 'C', 'y', 'a', 'S', 'S', 'L', ' ', 't', 'e', 's', 't'
+ 'w', 'o', 'l', 'f', 'S', 'S', 'L', ' ', 't', 'e', 's', 't'
};
- word32 rc = ntru_crypto_drbg_instantiate(112, pers_str, sizeof(pers_str),
- GetEntropy, &drbg);
- if(rc != DRBG_OK) {
- printf("NTRU drbg instantiate failed\n");
- return;
- }
+ for (ntruBits = 128; ntruBits < 257; ntruBits += 64) {
+ ret = ntru_crypto_drbg_instantiate(ntruBits, pers_str,
+ sizeof(pers_str), GetEntropy, &drbg);
+ if (ret != DRBG_OK) {
+ printf("NTRU drbg instantiate failed\n");
+ return;
+ }
- start = current_time(1);
+ switch (ntruBits) {
+ case 128:
+ type = NTRU_EES439EP1;
+ break;
+ case 192:
+ type = NTRU_EES593EP1;
+ break;
+ case 256:
+ type = NTRU_EES743EP1;
+ break;
+ }
- for(i = 0; i < genTimes; i++) {
- ntru_crypto_ntru_encrypt_keygen(drbg, NTRU_EES401EP2, &public_key_len,
- public_key, &private_key_len,
- private_key);
- }
+ /* set key sizes */
+ ret = ntru_crypto_ntru_encrypt_keygen(drbg, type, &public_key_len,
+ NULL, &private_key_len, NULL);
- total = current_time(0) - start;
+ bench_stats_start(&i, &start);
+ for (i = 0; i < genTimes; i++) {
+ ret = ntru_crypto_ntru_encrypt_keygen(drbg, type, &public_key_len,
+ public_key, &private_key_len,
+ private_key);
+ }
+ bench_stats_asym_finish("NTRU", ntruBits, desc[2], 0, i, start, ret);
- rc = ntru_crypto_drbg_uninstantiate(drbg);
+ if (ret != NTRU_OK) {
+ return;
+ }
- if (rc != NTRU_OK) {
- printf("NTRU drbg uninstantiate failed\n");
- return;
- }
+ ret = ntru_crypto_drbg_uninstantiate(drbg);
- each = total / genTimes;
- milliEach = each * 1000;
+ if (ret != NTRU_OK) {
+ printf("NTRU drbg uninstantiate failed\n");
+ return;
+ }
+ }
+}
+#endif
- printf("\n");
- printf("NTRU 112 key generation %6.3f milliseconds, avg over %d"
- " iterations\n", milliEach, genTimes);
+#ifdef HAVE_ECC
-}
+#ifndef BENCH_ECC_SIZE
+ #ifdef HAVE_ECC384
+ #define BENCH_ECC_SIZE 48
+ #else
+ #define BENCH_ECC_SIZE 32
+ #endif
#endif
-#ifdef HAVE_ECC
-void bench_eccKeyGen(void)
-{
- ecc_key genKey;
- double start, total, each, milliEach;
- int i;
-
- /* 256 bit */
- start = current_time(1);
+/* Default to testing P-256 */
+static int bench_ecc_size = 32;
- for(i = 0; i < genTimes; i++) {
- wc_ecc_init(&genKey);
- wc_ecc_make_key(&rng, 32, &genKey);
- wc_ecc_free(&genKey);
+void bench_eccMakeKey(int doAsync)
+{
+ int ret = 0, i, times, count, pending = 0;
+ const int keySize = bench_ecc_size;
+ ecc_key genKey[BENCH_MAX_PENDING];
+ double start;
+ const char**desc = bench_desc_words[lng_index];
+
+ /* clear for done cleanup */
+ XMEMSET(&genKey, 0, sizeof(genKey));
+
+ /* ECC Make Key */
+ bench_stats_start(&count, &start);
+ do {
+ /* while free pending slots in queue, submit ops */
+ for (times = 0; times < genTimes || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 0, &times, genTimes, &pending)) {
+
+ wc_ecc_free(&genKey[i]);
+ ret = wc_ecc_init_ex(&genKey[i], HEAP_HINT, doAsync ? devId : INVALID_DEVID);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ ret = wc_ecc_make_key(&gRng, keySize, &genKey[i]);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 0, &times, &pending)) {
+ goto exit;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+exit:
+ bench_stats_asym_finish("ECC", keySize * 8, desc[2], doAsync, count, start, ret);
+
+ /* cleanup */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_ecc_free(&genKey[i]);
}
-
- total = current_time(0) - start;
- each = total / genTimes; /* per second */
- milliEach = each * 1000; /* millisconds */
- printf("\n");
- printf("ECC 256 key generation %6.3f milliseconds, avg over %d"
- " iterations\n", milliEach, genTimes);
}
-
-void bench_eccKeyAgree(void)
+void bench_ecc(int doAsync)
{
- ecc_key genKey, genKey2;
- double start, total, each, milliEach;
- int i, ret;
- byte shared[32];
- byte sig[64+16]; /* der encoding too */
- byte digest[32];
- word32 x = 0;
+ int ret = 0, i, times, count, pending = 0;
+ const int keySize = bench_ecc_size;
+ ecc_key genKey[BENCH_MAX_PENDING];
+#ifdef HAVE_ECC_DHE
+ ecc_key genKey2[BENCH_MAX_PENDING];
+#endif
+#if !defined(NO_ASN) && defined(HAVE_ECC_SIGN)
+#ifdef HAVE_ECC_VERIFY
+ int verify[BENCH_MAX_PENDING];
+#endif
+#endif
+ word32 x[BENCH_MAX_PENDING];
+ double start;
+ const char**desc = bench_desc_words[lng_index];
- wc_ecc_init(&genKey);
- wc_ecc_init(&genKey2);
+#ifdef HAVE_ECC_DHE
+ DECLARE_ARRAY(shared, byte, BENCH_MAX_PENDING, BENCH_ECC_SIZE, HEAP_HINT);
+#endif
+#if !defined(NO_ASN) && defined(HAVE_ECC_SIGN)
+ DECLARE_ARRAY(sig, byte, BENCH_MAX_PENDING, ECC_MAX_SIG_SIZE, HEAP_HINT);
+ DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, BENCH_ECC_SIZE, HEAP_HINT);
+#endif
- ret = wc_ecc_make_key(&rng, 32, &genKey);
- if (ret != 0) {
- printf("ecc_make_key failed\n");
- return;
+ /* clear for done cleanup */
+ XMEMSET(&genKey, 0, sizeof(genKey));
+#ifdef HAVE_ECC_DHE
+ XMEMSET(&genKey2, 0, sizeof(genKey2));
+#endif
+
+ /* init keys */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ /* setup an context for each key */
+ if ((ret = wc_ecc_init_ex(&genKey[i], HEAP_HINT,
+ doAsync ? devId : INVALID_DEVID)) < 0) {
+ goto exit;
+ }
+ ret = wc_ecc_make_key(&gRng, keySize, &genKey[i]);
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ ret = wc_AsyncWait(ret, &genKey[i].asyncDev, WC_ASYNC_FLAG_NONE);
+ #endif
+ if (ret < 0) {
+ goto exit;
+ }
+
+ #ifdef HAVE_ECC_DHE
+ if ((ret = wc_ecc_init_ex(&genKey2[i], HEAP_HINT, INVALID_DEVID)) < 0) {
+ goto exit;
+ }
+ if ((ret = wc_ecc_make_key(&gRng, keySize, &genKey2[i])) > 0) {
+ goto exit;
+ }
+ #endif
}
- ret = wc_ecc_make_key(&rng, 32, &genKey2);
- if (ret != 0) {
- printf("ecc_make_key failed\n");
- return;
+
+#ifdef HAVE_ECC_DHE
+
+ /* ECC Shared Secret */
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < agreeTimes || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 1, &times, agreeTimes, &pending)) {
+ x[i] = (word32)keySize;
+ ret = wc_ecc_shared_secret(&genKey[i], &genKey2[i], shared[i], &x[i]);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 1, &times, &pending)) {
+ goto exit_ecdhe;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+exit_ecdhe:
+ bench_stats_asym_finish("ECDHE", keySize * 8, desc[3], doAsync, count, start, ret);
+
+ if (ret < 0) {
+ goto exit;
}
+#endif /* HAVE_ECC_DHE */
- /* 256 bit */
- start = current_time(1);
+#if !defined(NO_ASN) && defined(HAVE_ECC_SIGN)
- for(i = 0; i < agreeTimes; i++) {
- x = sizeof(shared);
- ret = wc_ecc_shared_secret(&genKey, &genKey2, shared, &x);
- if (ret != 0) {
- printf("ecc_shared_secret failed\n");
- return;
+ /* Init digest to sign */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ for (count = 0; count < keySize; count++) {
+ digest[i][count] = (byte)count;
}
}
- total = current_time(0) - start;
- each = total / agreeTimes; /* per second */
- milliEach = each * 1000; /* millisconds */
- printf("EC-DHE key agreement %6.3f milliseconds, avg over %d"
- " iterations\n", milliEach, agreeTimes);
+ /* ECC Sign */
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < agreeTimes || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 1, &times, agreeTimes, &pending)) {
+ if (genKey[i].state == 0)
+ x[i] = ECC_MAX_SIG_SIZE;
+ ret = wc_ecc_sign_hash(digest[i], (word32)keySize, sig[i], &x[i],
+ &gRng, &genKey[i]);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 1, &times, &pending)) {
+ goto exit_ecdsa_sign;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+exit_ecdsa_sign:
+ bench_stats_asym_finish("ECDSA", keySize * 8, desc[4], doAsync, count, start, ret);
- /* make dummy digest */
- for (i = 0; i < (int)sizeof(digest); i++)
- digest[i] = (byte)i;
+ if (ret < 0) {
+ goto exit;
+ }
+
+#ifdef HAVE_ECC_VERIFY
+
+ /* ECC Verify */
+ bench_stats_start(&count, &start);
+ do {
+ for (times = 0; times < agreeTimes || pending > 0; ) {
+ bench_async_poll(&pending);
+
+ /* while free pending slots in queue, submit ops */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 1, &times, agreeTimes, &pending)) {
+ if (genKey[i].state == 0)
+ verify[i] = 0;
+ ret = wc_ecc_verify_hash(sig[i], x[i], digest[i],
+ (word32)keySize, &verify[i], &genKey[i]);
+ if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 1, &times, &pending)) {
+ goto exit_ecdsa_verify;
+ }
+ }
+ } /* for i */
+ } /* for times */
+ count += times;
+ } while (bench_stats_sym_check(start));
+exit_ecdsa_verify:
+ bench_stats_asym_finish("ECDSA", keySize * 8, desc[5], doAsync, count, start, ret);
+#endif /* HAVE_ECC_VERIFY */
+#endif /* !NO_ASN && HAVE_ECC_SIGN */
+
+exit:
+
+ /* cleanup */
+ for (i = 0; i < BENCH_MAX_PENDING; i++) {
+ wc_ecc_free(&genKey[i]);
+ #ifdef HAVE_ECC_DHE
+ wc_ecc_free(&genKey2[i]);
+ #endif
+ }
+#ifdef HAVE_ECC_DHE
+ FREE_ARRAY(shared, BENCH_MAX_PENDING, HEAP_HINT);
+#endif
+#if !defined(NO_ASN) && defined(HAVE_ECC_SIGN)
+ FREE_ARRAY(sig, BENCH_MAX_PENDING, HEAP_HINT);
+ FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+#endif
+}
- start = current_time(1);
- for(i = 0; i < agreeTimes; i++) {
- x = sizeof(sig);
- ret = wc_ecc_sign_hash(digest, sizeof(digest), sig, &x, &rng, &genKey);
- if (ret != 0) {
- printf("ecc_sign_hash failed\n");
- return;
- }
+#ifdef HAVE_ECC_ENCRYPT
+void bench_eccEncrypt(void)
+{
+ ecc_key userA, userB;
+ const int keySize = bench_ecc_size;
+ byte msg[48];
+ byte out[80];
+ word32 outSz = sizeof(out);
+ word32 bench_plainSz = BENCH_SIZE;
+ int ret, i, count;
+ double start;
+ const char**desc = bench_desc_words[lng_index];
+
+ ret = wc_ecc_init_ex(&userA, HEAP_HINT, devId);
+ if (ret != 0) {
+ printf("wc_ecc_encrypt make key A failed: %d\n", ret);
+ return;
}
- total = current_time(0) - start;
- each = total / agreeTimes; /* per second */
- milliEach = each * 1000; /* millisconds */
- printf("EC-DSA sign time %6.3f milliseconds, avg over %d"
- " iterations\n", milliEach, agreeTimes);
+ ret = wc_ecc_init_ex(&userB, HEAP_HINT, devId);
+ if (ret != 0) {
+ printf("wc_ecc_encrypt make key B failed: %d\n", ret);
+ wc_ecc_free(&userA);
+ return;
+ }
- start = current_time(1);
+ ret = wc_ecc_make_key(&gRng, keySize, &userA);
+#ifdef WOLFSSL_ASYNC_CRYPT
+ ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ goto exit;
+ ret = wc_ecc_make_key(&gRng, keySize, &userB);
+#ifdef WOLFSSL_ASYNC_CRYPT
+ ret = wc_AsyncWait(ret, &userB.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ goto exit;
- for(i = 0; i < agreeTimes; i++) {
- int verify = 0;
- ret = wc_ecc_verify_hash(sig, x, digest, sizeof(digest), &verify, &genKey);
- if (ret != 0) {
- printf("ecc_verify_hash failed\n");
- return;
+ for (i = 0; i < (int)sizeof(msg); i++)
+ msg[i] = i;
+
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < ntimes; i++) {
+ /* encrypt msg to B */
+ ret = wc_ecc_encrypt(&userA, &userB, msg, sizeof(msg), out, &outSz, NULL);
+ if (ret != 0) {
+ printf("wc_ecc_encrypt failed! %d\n", ret);
+ goto exit_enc;
+ }
}
- }
+ count += i;
+ } while (bench_stats_sym_check(start));
+exit_enc:
+ bench_stats_asym_finish("ECC", keySize * 8, desc[6], 0, count, start, ret);
+
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < ntimes; i++) {
+ /* decrypt msg from A */
+ ret = wc_ecc_decrypt(&userB, &userA, out, outSz, bench_plain, &bench_plainSz, NULL);
+ if (ret != 0) {
+ printf("wc_ecc_decrypt failed! %d\n", ret);
+ goto exit_dec;
+ }
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+exit_dec:
+ bench_stats_asym_finish("ECC", keySize * 8, desc[7], 0, count, start, ret);
- total = current_time(0) - start;
- each = total / agreeTimes; /* per second */
- milliEach = each * 1000; /* millisconds */
- printf("EC-DSA verify time %6.3f milliseconds, avg over %d"
- " iterations\n", milliEach, agreeTimes);
+exit:
- wc_ecc_free(&genKey2);
- wc_ecc_free(&genKey);
+ /* cleanup */
+ wc_ecc_free(&userB);
+ wc_ecc_free(&userA);
}
+#endif
#endif /* HAVE_ECC */
#ifdef HAVE_CURVE25519
void bench_curve25519KeyGen(void)
{
curve25519_key genKey;
- double start, total, each, milliEach;
- int i;
-
- /* 256 bit */
- start = current_time(1);
-
- for(i = 0; i < genTimes; i++) {
- wc_curve25519_make_key(&rng, 32, &genKey);
- wc_curve25519_free(&genKey);
- }
-
- total = current_time(0) - start;
- each = total / genTimes; /* per second */
- milliEach = each * 1000; /* millisconds */
- printf("\n");
- printf("CURVE25519 256 key generation %6.3f milliseconds, avg over %d"
- " iterations\n", milliEach, genTimes);
+ double start;
+ int ret = 0, i, count;
+ const char**desc = bench_desc_words[lng_index];
+
+ /* Key Gen */
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < genTimes; i++) {
+ ret = wc_curve25519_make_key(&gRng, 32, &genKey);
+ wc_curve25519_free(&genKey);
+ if (ret != 0) {
+ printf("wc_curve25519_make_key failed: %d\n", ret);
+ break;
+ }
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+ bench_stats_asym_finish("CURVE", 25519, desc[2], 0, count, start, ret);
}
-
+#ifdef HAVE_CURVE25519_SHARED_SECRET
void bench_curve25519KeyAgree(void)
{
curve25519_key genKey, genKey2;
- double start, total, each, milliEach;
- int i, ret;
+ double start;
+ int ret, i, count;
byte shared[32];
+ const char**desc = bench_desc_words[lng_index];
word32 x = 0;
wc_curve25519_init(&genKey);
wc_curve25519_init(&genKey2);
- ret = wc_curve25519_make_key(&rng, 32, &genKey);
+ ret = wc_curve25519_make_key(&gRng, 32, &genKey);
if (ret != 0) {
printf("curve25519_make_key failed\n");
return;
}
- ret = wc_curve25519_make_key(&rng, 32, &genKey2);
+ ret = wc_curve25519_make_key(&gRng, 32, &genKey2);
if (ret != 0) {
- printf("curve25519_make_key failed\n");
+ printf("curve25519_make_key failed: %d\n", ret);
+ wc_curve25519_free(&genKey);
return;
}
- /* 256 bit */
- start = current_time(1);
-
- for(i = 0; i < agreeTimes; i++) {
- x = sizeof(shared);
- ret = wc_curve25519_shared_secret(&genKey, &genKey2, shared, &x);
- if (ret != 0) {
- printf("curve25519_shared_secret failed\n");
- return;
+ /* Shared secret */
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < agreeTimes; i++) {
+ x = sizeof(shared);
+ ret = wc_curve25519_shared_secret(&genKey, &genKey2, shared, &x);
+ if (ret != 0) {
+ printf("curve25519_shared_secret failed: %d\n", ret);
+ goto exit;
+ }
}
- }
-
- total = current_time(0) - start;
- each = total / agreeTimes; /* per second */
- milliEach = each * 1000; /* millisconds */
- printf("CURVE25519 key agreement %6.3f milliseconds, avg over %d"
- " iterations\n", milliEach, agreeTimes);
+ count += i;
+ } while (bench_stats_sym_check(start));
+exit:
+ bench_stats_asym_finish("CURVE", 25519, desc[3], 0, count, start, ret);
wc_curve25519_free(&genKey2);
wc_curve25519_free(&genKey);
}
+#endif /* HAVE_CURVE25519_SHARED_SECRET */
#endif /* HAVE_CURVE25519 */
#ifdef HAVE_ED25519
void bench_ed25519KeyGen(void)
{
ed25519_key genKey;
- double start, total, each, milliEach;
- int i;
-
- /* 256 bit */
- start = current_time(1);
-
- for(i = 0; i < genTimes; i++) {
- wc_ed25519_init(&genKey);
- wc_ed25519_make_key(&rng, 32, &genKey);
- wc_ed25519_free(&genKey);
- }
-
- total = current_time(0) - start;
- each = total / genTimes; /* per second */
- milliEach = each * 1000; /* millisconds */
- printf("\n");
- printf("ED25519 key generation %6.3f milliseconds, avg over %d"
- " iterations\n", milliEach, genTimes);
+ double start;
+ int i, count;
+ const char**desc = bench_desc_words[lng_index];
+
+ /* Key Gen */
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < genTimes; i++) {
+ wc_ed25519_init(&genKey);
+ (void)wc_ed25519_make_key(&gRng, 32, &genKey);
+ wc_ed25519_free(&genKey);
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+ bench_stats_asym_finish("ED", 25519, desc[2], 0, count, start, 0);
}
void bench_ed25519KeySign(void)
{
+ int ret;
ed25519_key genKey;
- double start, total, each, milliEach;
- int i, ret;
+#ifdef HAVE_ED25519_SIGN
+ double start;
+ int i, count;
byte sig[ED25519_SIG_SIZE];
byte msg[512];
word32 x = 0;
+#endif
+ const char**desc = bench_desc_words[lng_index];
wc_ed25519_init(&genKey);
- ret = wc_ed25519_make_key(&rng, ED25519_KEY_SIZE, &genKey);
+ ret = wc_ed25519_make_key(&gRng, ED25519_KEY_SIZE, &genKey);
if (ret != 0) {
printf("ed25519_make_key failed\n");
return;
}
+
+#ifdef HAVE_ED25519_SIGN
/* make dummy msg */
for (i = 0; i < (int)sizeof(msg); i++)
msg[i] = (byte)i;
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < agreeTimes; i++) {
+ x = sizeof(sig);
+ ret = wc_ed25519_sign_msg(msg, sizeof(msg), sig, &x, &genKey);
+ if (ret != 0) {
+ printf("ed25519_sign_msg failed\n");
+ goto exit_ed_sign;
+ }
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+exit_ed_sign:
+ bench_stats_asym_finish("ED", 25519, desc[4], 0, count, start, ret);
+
+#ifdef HAVE_ED25519_VERIFY
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < agreeTimes; i++) {
+ int verify = 0;
+ ret = wc_ed25519_verify_msg(sig, x, msg, sizeof(msg), &verify,
+ &genKey);
+ if (ret != 0 || verify != 1) {
+ printf("ed25519_verify_msg failed\n");
+ goto exit_ed_verify;
+ }
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+exit_ed_verify:
+ bench_stats_asym_finish("ED", 25519, desc[5], 0, count, start, ret);
+#endif /* HAVE_ED25519_VERIFY */
+#endif /* HAVE_ED25519_SIGN */
- start = current_time(1);
+ wc_ed25519_free(&genKey);
+}
+#endif /* HAVE_ED25519 */
- for(i = 0; i < agreeTimes; i++) {
- x = sizeof(sig);
- ret = wc_ed25519_sign_msg(msg, sizeof(msg), sig, &x, &genKey);
- if (ret != 0) {
- printf("ed25519_sign_msg failed\n");
- return;
+#ifdef HAVE_CURVE448
+void bench_curve448KeyGen(void)
+{
+ curve448_key genKey;
+ double start;
+ int ret = 0, i, count;
+ const char**desc = bench_desc_words[lng_index];
+
+ /* Key Gen */
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < genTimes; i++) {
+ ret = wc_curve448_make_key(&gRng, 56, &genKey);
+ wc_curve448_free(&genKey);
+ if (ret != 0) {
+ printf("wc_curve448_make_key failed: %d\n", ret);
+ break;
+ }
}
+ count += i;
+ } while (bench_stats_sym_check(start));
+ bench_stats_asym_finish("CURVE", 448, desc[2], 0, count, start, ret);
+}
+
+#ifdef HAVE_CURVE448_SHARED_SECRET
+void bench_curve448KeyAgree(void)
+{
+ curve448_key genKey, genKey2;
+ double start;
+ int ret, i, count;
+ byte shared[56];
+ const char**desc = bench_desc_words[lng_index];
+ word32 x = 0;
+
+ wc_curve448_init(&genKey);
+ wc_curve448_init(&genKey2);
+
+ ret = wc_curve448_make_key(&gRng, 56, &genKey);
+ if (ret != 0) {
+ printf("curve448_make_key failed\n");
+ return;
+ }
+ ret = wc_curve448_make_key(&gRng, 56, &genKey2);
+ if (ret != 0) {
+ printf("curve448_make_key failed: %d\n", ret);
+ wc_curve448_free(&genKey);
+ return;
}
- total = current_time(0) - start;
- each = total / agreeTimes; /* per second */
- milliEach = each * 1000; /* millisconds */
- printf("ED25519 sign time %6.3f milliseconds, avg over %d"
- " iterations\n", milliEach, agreeTimes);
-
- start = current_time(1);
-
- for(i = 0; i < agreeTimes; i++) {
- int verify = 0;
- ret = wc_ed25519_verify_msg(sig, x, msg, sizeof(msg), &verify,
- &genKey);
- if (ret != 0 || verify != 1) {
- printf("ed25519_verify_msg failed\n");
- return;
+ /* Shared secret */
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < agreeTimes; i++) {
+ x = sizeof(shared);
+ ret = wc_curve448_shared_secret(&genKey, &genKey2, shared, &x);
+ if (ret != 0) {
+ printf("curve448_shared_secret failed: %d\n", ret);
+ goto exit;
+ }
}
- }
+ count += i;
+ } while (bench_stats_sym_check(start));
+exit:
+ bench_stats_asym_finish("CURVE", 448, desc[3], 0, count, start, ret);
- total = current_time(0) - start;
- each = total / agreeTimes; /* per second */
- milliEach = each * 1000; /* millisconds */
- printf("ED25519 verify time %6.3f milliseconds, avg over %d"
- " iterations\n", milliEach, agreeTimes);
+ wc_curve448_free(&genKey2);
+ wc_curve448_free(&genKey);
+}
+#endif /* HAVE_CURVE448_SHARED_SECRET */
+#endif /* HAVE_CURVE448 */
- wc_ed25519_free(&genKey);
+#ifdef HAVE_ED448
+void bench_ed448KeyGen(void)
+{
+ ed448_key genKey;
+ double start;
+ int i, count;
+ const char**desc = bench_desc_words[lng_index];
+
+ /* Key Gen */
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < genTimes; i++) {
+ wc_ed448_init(&genKey);
+ (void)wc_ed448_make_key(&gRng, ED448_KEY_SIZE, &genKey);
+ wc_ed448_free(&genKey);
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+ bench_stats_asym_finish("ED", 448, desc[2], 0, count, start, 0);
}
-#endif /* HAVE_ED25519 */
-#ifdef _WIN32
+void bench_ed448KeySign(void)
+{
+ int ret;
+ ed448_key genKey;
+#ifdef HAVE_ED448_SIGN
+ double start;
+ int i, count;
+ byte sig[ED448_SIG_SIZE];
+ byte msg[512];
+ word32 x = 0;
+#endif
+ const char**desc = bench_desc_words[lng_index];
+
+ wc_ed448_init(&genKey);
+
+ ret = wc_ed448_make_key(&gRng, ED448_KEY_SIZE, &genKey);
+ if (ret != 0) {
+ printf("ed448_make_key failed\n");
+ return;
+ }
+
+#ifdef HAVE_ED448_SIGN
+ /* make dummy msg */
+ for (i = 0; i < (int)sizeof(msg); i++)
+ msg[i] = (byte)i;
+
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < agreeTimes; i++) {
+ x = sizeof(sig);
+ ret = wc_ed448_sign_msg(msg, sizeof(msg), sig, &x, &genKey,
+ NULL, 0);
+ if (ret != 0) {
+ printf("ed448_sign_msg failed\n");
+ goto exit_ed_sign;
+ }
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+exit_ed_sign:
+ bench_stats_asym_finish("ED", 448, desc[4], 0, count, start, ret);
+
+#ifdef HAVE_ED448_VERIFY
+ bench_stats_start(&count, &start);
+ do {
+ for (i = 0; i < agreeTimes; i++) {
+ int verify = 0;
+ ret = wc_ed448_verify_msg(sig, x, msg, sizeof(msg), &verify,
+ &genKey, NULL, 0);
+ if (ret != 0 || verify != 1) {
+ printf("ed448_verify_msg failed\n");
+ goto exit_ed_verify;
+ }
+ }
+ count += i;
+ } while (bench_stats_sym_check(start));
+exit_ed_verify:
+ bench_stats_asym_finish("ED", 448, desc[5], 0, count, start, ret);
+#endif /* HAVE_ED448_VERIFY */
+#endif /* HAVE_ED448_SIGN */
+
+ wc_ed448_free(&genKey);
+}
+#endif /* HAVE_ED448 */
+
+#ifndef HAVE_STACK_SIZE
+#if defined(_WIN32) && !defined(INTIME_RTOS)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
@@ -1808,7 +5825,7 @@ void bench_ed25519KeySign(void)
{
static int init = 0;
static LARGE_INTEGER freq;
-
+
LARGE_INTEGER count;
(void)reset;
@@ -1827,10 +5844,10 @@ void bench_ed25519KeySign(void)
#if defined(WOLFSSL_MICROCHIP_PIC32MZ)
#define CLOCK 80000000.0
#else
- #include <peripheral/timer.h>
#define CLOCK 40000000.0
#endif
-
+ extern void WriteCoreTimer(word32 t);
+ extern word32 ReadCoreTimer(void);
double current_time(int reset)
{
unsigned int ns;
@@ -1846,11 +5863,19 @@ void bench_ed25519KeySign(void)
return ( ns / CLOCK * 2.0);
}
-#elif defined(WOLFSSL_IAR_ARM_TIME) || defined (WOLFSSL_MDK_ARM) || defined(WOLFSSL_USER_CURRTIME)
- extern double current_time(int reset);
-
-#elif defined FREERTOS
+#elif defined(WOLFSSL_IAR_ARM_TIME) || defined (WOLFSSL_MDK_ARM) || \
+ defined(WOLFSSL_USER_CURRTIME) || defined(WOLFSSL_CURRTIME_REMAP)
+ /* declared above at line 239 */
+ /* extern double current_time(int reset); */
+
+#elif defined(FREERTOS)
+ #include "task.h"
+#if defined(WOLFSSL_ESPIDF)
+ /* proto type definition */
+ int construct_argv();
+ extern char* __argv[22];
+#endif
double current_time(int reset)
{
portTickType tickCount;
@@ -1876,6 +5901,107 @@ void bench_ed25519KeySign(void)
return (double)tv.SECONDS + (double)tv.MILLISECONDS / 1000;
}
+#elif defined(FREESCALE_KSDK_BM)
+
+ double current_time(int reset)
+ {
+ return (double)OSA_TimeGetMsec() / 1000;
+ }
+
+#elif defined(WOLFSSL_EMBOS)
+
+ #include "RTOS.h"
+
+ double current_time(int reset)
+ {
+ double time_now;
+ double current_s = OS_GetTime() / 1000.0;
+ double current_us = OS_GetTime_us() / 1000000.0;
+ time_now = (double)( current_s + current_us);
+
+ (void) reset;
+
+ return time_now;
+ }
+#elif defined(WOLFSSL_SGX)
+ double current_time(int reset);
+
+#elif defined(WOLFSSL_DEOS)
+ double current_time(int reset)
+ {
+ const uint32_t systemTickTimeInHz = 1000000 / systemTickInMicroseconds();
+ uint32_t *systemTickPtr = systemTickPointer();
+
+ (void)reset;
+
+ return (double) *systemTickPtr/systemTickTimeInHz;
+ }
+
+#elif defined(MICRIUM)
+ double current_time(int reset)
+ {
+ CPU_ERR err;
+
+ (void)reset;
+ return (double) CPU_TS_Get32()/CPU_TS_TmrFreqGet(&err);
+ }
+#elif defined(WOLFSSL_ZEPHYR)
+
+ #include <time.h>
+
+ double current_time(int reset)
+ {
+ (void)reset;
+
+ #if defined(CONFIG_ARCH_POSIX)
+ k_cpu_idle();
+ #endif
+
+ return (double)k_uptime_get() / 1000;
+ }
+
+#elif defined(WOLFSSL_NETBURNER)
+ #include <predef.h>
+ #include <utils.h>
+ #include <constants.h>
+
+ double current_time(int reset)
+ {
+ DWORD ticks = TimeTick; /* ticks since system start */
+ (void)reset;
+
+ return (double) ticks/TICKS_PER_SECOND;
+ }
+
+#elif defined(THREADX)
+ #include "tx_api.h"
+ double current_time(int reset)
+ {
+ (void)reset;
+ return (double) tx_time_get() / TX_TIMER_TICKS_PER_SECOND;
+ }
+
+#elif defined(WOLFSSL_XILINX)
+ #ifndef XPAR_CPU_CORTEXA53_0_TIMESTAMP_CLK_FREQ
+ #define XPAR_CPU_CORTEXA53_0_TIMESTAMP_CLK_FREQ 50000000
+ #endif
+ #ifndef COUNTS_PER_SECOND
+ #define COUNTS_PER_SECOND XPAR_CPU_CORTEXA53_0_TIMESTAMP_CLK_FREQ
+ #endif
+
+ double current_time(int reset)
+ {
+ double timer;
+ uint64_t cntPct = 0;
+ asm volatile("mrs %0, CNTPCT_EL0" : "=r" (cntPct));
+
+ /* Convert to milliseconds */
+ timer = (double)(cntPct / (COUNTS_PER_SECOND / 1000));
+ /* Convert to seconds.millisecond */
+ timer /= 1000;
+ return timer;
+ }
+
#else
#include <sys/time.h>
@@ -1892,10 +6018,11 @@ void bench_ed25519KeySign(void)
}
#endif /* _WIN32 */
+#endif /* !HAVE_STACK_SIZE */
-#ifdef HAVE_GET_CYCLES
+#if defined(HAVE_GET_CYCLES)
-static INLINE word64 get_intel_cycles(void)
+static WC_INLINE word64 get_intel_cycles(void)
{
unsigned int lo_c, hi_c;
__asm__ __volatile__ (
@@ -1908,3 +6035,294 @@ static INLINE word64 get_intel_cycles(void)
}
#endif /* HAVE_GET_CYCLES */
+
+void benchmark_configure(int block_size)
+{
+ /* must be greater than 0 */
+ if (block_size > 0) {
+ numBlocks = numBlocks * bench_size / block_size;
+ bench_size = (word32)block_size;
+ }
+}
+
+#ifndef NO_MAIN_DRIVER
+
+#ifndef MAIN_NO_ARGS
+
+#ifndef WOLFSSL_BENCHMARK_ALL
+/* Display the algorithm string and keep to 80 characters per line.
+ *
+ * str Algorithm string to print.
+ * line Length of line used so far.
+ */
+static void print_alg(const char* str, int* line)
+{
+ int optLen;
+
+ optLen = (int)XSTRLEN(str) + 1;
+ if (optLen + *line > 80) {
+ printf("\n ");
+ *line = 13;
+ }
+ *line += optLen;
+ printf(" %s", str);
+}
+#endif
+
+/* Display the usage options of the benchmark program. */
+static void Usage(void)
+{
+#ifndef WOLFSSL_BENCHMARK_ALL
+ int i;
+ int line;
+#endif
+
+ printf("benchmark\n");
+ printf("%s", bench_Usage_msg1[lng_index][0]); /* option -? */
+ printf("%s", bench_Usage_msg1[lng_index][1]); /* option -csv */
+ printf("%s", bench_Usage_msg1[lng_index][2]); /* option -base10 */
+#if defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
+ printf("%s", bench_Usage_msg1[lng_index][3]); /* option -no_add */
+#endif
+ printf("%s", bench_Usage_msg1[lng_index][4]); /* option -dgst_full */
+#ifndef NO_RSA
+ printf("%s", bench_Usage_msg1[lng_index][5]); /* option -ras_sign */
+ #ifdef WOLFSSL_KEY_GEN
+ printf("%s", bench_Usage_msg1[lng_index][6]); /* option -rsa-sz */
+ #endif
+#endif
+#if !defined(NO_DH) && defined(HAVE_FFDHE_2048)
+ printf("%s", bench_Usage_msg1[lng_index][7]); /* option -ffdhe2048 */
+#endif
+#if !defined(NO_DH) && defined(HAVE_FFDHE_3072)
+ printf("%s", bench_Usage_msg1[lng_index][8]); /* option -ffdhe3072 */
+#endif
+#if defined(HAVE_ECC) && !defined(NO_ECC256)
+ printf("%s", bench_Usage_msg1[lng_index][9]); /* option -p256 */
+#endif
+#if defined(HAVE_ECC) && defined(HAVE_ECC384)
+ printf("%s", bench_Usage_msg1[lng_index][10]); /* option -p384 */
+#endif
+#ifndef WOLFSSL_BENCHMARK_ALL
+ printf("%s", bench_Usage_msg1[lng_index][11]); /* option -<alg> */
+ printf(" ");
+ line = 13;
+ for (i=0; bench_cipher_opt[i].str != NULL; i++)
+ print_alg(bench_cipher_opt[i].str + 1, &line);
+ printf("\n ");
+ line = 13;
+ for (i=0; bench_digest_opt[i].str != NULL; i++)
+ print_alg(bench_digest_opt[i].str + 1, &line);
+ printf("\n ");
+ line = 13;
+ for (i=0; bench_mac_opt[i].str != NULL; i++)
+ print_alg(bench_mac_opt[i].str + 1, &line);
+ printf("\n ");
+ line = 13;
+ for (i=0; bench_asym_opt[i].str != NULL; i++)
+ print_alg(bench_asym_opt[i].str + 1, &line);
+ printf("\n ");
+ line = 13;
+ for (i=0; bench_other_opt[i].str != NULL; i++)
+ print_alg(bench_other_opt[i].str + 1, &line);
+ printf("\n");
+#endif
+ printf("%s", bench_Usage_msg1[lng_index][12]); /* option -lng */
+ printf("%s", bench_Usage_msg1[lng_index][13]); /* option <num> */
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_NO_ASYNC_THREADING)
+ printf("%s", bench_Usage_msg1[lng_index][14]); /* option -threads <num> */
+#endif
+ printf("%s", bench_Usage_msg1[lng_index][15]); /* option -print */
+}
+
+/* Match the command line argument with the string.
+ *
+ * arg Command line argument.
+ * str String to check for.
+ * return 1 if the command line argument matches the string, 0 otherwise.
+ */
+static int string_matches(const char* arg, const char* str)
+{
+ int len = (int)XSTRLEN(str) + 1;
+ return XSTRNCMP(arg, str, len) == 0;
+}
+#endif /* MAIN_NO_ARGS */
+
+#ifdef WOLFSSL_ESPIDF
+int wolf_benchmark_task( )
+#elif defined(MAIN_NO_ARGS)
+int main()
+#else
+int main(int argc, char** argv)
+#endif
+{
+ int ret = 0;
+#ifndef MAIN_NO_ARGS
+ int optMatched;
+#ifdef WOLFSSL_ESPIDF
+ int argc = construct_argv();
+ char** argv = (char**)__argv;
+#endif
+#ifndef WOLFSSL_BENCHMARK_ALL
+ int i;
+#endif
+#endif
+
+ benchmark_static_init();
+
+#ifndef MAIN_NO_ARGS
+ while (argc > 1) {
+ if (string_matches(argv[1], "-?")) {
+ if(--argc>1){
+ lng_index = XATOI((++argv)[1]);
+ if(lng_index<0||lng_index>1) {
+ lng_index = 0;
+ }
+ }
+ Usage();
+ return 0;
+ }
+ else if (string_matches(argv[1], "-v")) {
+ printf("-----------------------------------------------------------"
+ "-------------------\n wolfSSL version %s\n-----------------"
+ "-----------------------------------------------------------"
+ "--\n", LIBWOLFSSL_VERSION_STRING);
+ return 0;
+ }
+ else if (string_matches(argv[1], "-lng")) {
+ argc--;
+ argv++;
+ if(argc>1) {
+ lng_index = XATOI(argv[1]);
+ if(lng_index<0||lng_index>1){
+ printf("invalid number(%d) is specified. [<num> :0-1]\n",lng_index);
+ lng_index = 0;
+ }
+ }
+ }
+ else if (string_matches(argv[1], "-base10"))
+ base2 = 0;
+#if defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
+ else if (string_matches(argv[1], "-no_aad"))
+ aesAuthAddSz = 0;
+#endif
+ else if (string_matches(argv[1], "-dgst_full"))
+ digest_stream = 0;
+#ifndef NO_RSA
+ else if (string_matches(argv[1], "-rsa_sign"))
+ rsa_sign_verify = 1;
+#endif
+#if !defined(NO_DH) && defined(HAVE_FFDHE_2048)
+ else if (string_matches(argv[1], "-ffdhe2048"))
+ use_ffdhe = 2048;
+#endif
+#if !defined(NO_DH) && defined(HAVE_FFDHE_3072)
+ else if (string_matches(argv[1], "-ffdhe3072"))
+ use_ffdhe = 3072;
+#endif
+#if defined(HAVE_ECC) && !defined(NO_ECC256)
+ else if (string_matches(argv[1], "-p256"))
+ bench_ecc_size = 32;
+#endif
+#if defined(HAVE_ECC) && defined(HAVE_ECC384)
+ else if (string_matches(argv[1], "-p384"))
+ bench_ecc_size = 48;
+#endif
+#ifdef BENCH_ASYM
+ else if (string_matches(argv[1], "-csv")) {
+ csv_format = 1;
+ csv_header_count = 1;
+ }
+#endif
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_NO_ASYNC_THREADING)
+ else if (string_matches(argv[1], "-threads")) {
+ argc--;
+ argv++;
+ if (argc > 1) {
+ g_threadCount = XATOI(argv[1]);
+ if (g_threadCount < 1 || lng_index > 128){
+ printf("invalid number(%d) is specified. [<num> :1-128]\n",
+ g_threadCount);
+ g_threadCount = 0;
+ }
+ }
+ }
+#endif
+ else if (string_matches(argv[1], "-print")) {
+ gPrintStats = 1;
+ }
+ else if (argv[1][0] == '-') {
+ optMatched = 0;
+#ifndef WOLFSSL_BENCHMARK_ALL
+ /* Check known algorithm choosing command line options. */
+ /* Known cipher algorithms */
+ for (i=0; !optMatched && bench_cipher_opt[i].str != NULL; i++) {
+ if (string_matches(argv[1], bench_cipher_opt[i].str)) {
+ bench_cipher_algs |= bench_cipher_opt[i].val;
+ bench_all = 0;
+ optMatched = 1;
+ }
+ }
+ /* Known digest algorithms */
+ for (i=0; !optMatched && bench_digest_opt[i].str != NULL; i++) {
+ if (string_matches(argv[1], bench_digest_opt[i].str)) {
+ bench_digest_algs |= bench_digest_opt[i].val;
+ bench_all = 0;
+ optMatched = 1;
+ }
+ }
+ /* Known MAC algorithms */
+ for (i=0; !optMatched && bench_mac_opt[i].str != NULL; i++) {
+ if (string_matches(argv[1], bench_mac_opt[i].str)) {
+ bench_mac_algs |= bench_mac_opt[i].val;
+ bench_all = 0;
+ optMatched = 1;
+ }
+ }
+ /* Known asymmetric algorithms */
+ for (i=0; !optMatched && bench_asym_opt[i].str != NULL; i++) {
+ if (string_matches(argv[1], bench_asym_opt[i].str)) {
+ bench_asym_algs |= bench_asym_opt[i].val;
+ bench_all = 0;
+ optMatched = 1;
+ }
+ }
+ /* Other known cryptographic algorithms */
+ for (i=0; !optMatched && bench_other_opt[i].str != NULL; i++) {
+ if (string_matches(argv[1], bench_other_opt[i].str)) {
+ bench_other_algs |= bench_other_opt[i].val;
+ bench_all = 0;
+ optMatched = 1;
+ }
+ }
+#endif
+ if (!optMatched) {
+ printf("Option not recognized: %s\n", argv[1]);
+ Usage();
+ return 1;
+ }
+ }
+ else {
+ /* parse for block size */
+ benchmark_configure(XATOI(argv[1]));
+ }
+ argc--;
+ argv++;
+ }
+#endif /* MAIN_NO_ARGS */
+
+#ifdef HAVE_STACK_SIZE
+ ret = StackSizeCheck(NULL, benchmark_test);
+#else
+ ret = benchmark_test(NULL);
+#endif
+
+ return ret;
+}
+#endif /* !NO_MAIN_DRIVER */
+
+#else
+ #ifndef NO_MAIN_DRIVER
+ int main() { return 0; }
+ #endif
+#endif /* !NO_CRYPT_BENCHMARK */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/benchmark/benchmark.sln b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/benchmark/benchmark.sln
index e3e9483b8..6c555724a 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/benchmark/benchmark.sln
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/benchmark/benchmark.sln
@@ -2,17 +2,56 @@
Microsoft Visual Studio Solution File, Format Version 9.00
# Visual C++ Express 2005
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "benchmark", "benchmark.vcproj", "{615AEC46-5595-4DEA-9490-DBD5DE0F8772}"
+ ProjectSection(ProjectDependencies) = postProject
+ {73973223-5EE8-41CA-8E88-1D60E89A237B} = {73973223-5EE8-41CA-8E88-1D60E89A237B}
+ EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "wolfssl", "..\..\wolfssl.vcxproj", "{73973223-5EE8-41CA-8E88-1D60E89A237B}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
+ Debug|x64 = Debug|x64
+ DLL Debug|Win32 = DLL Debug|Win32
+ DLL Debug|x64 = DLL Debug|x64
+ DLL Release|Win32 = DLL Release|Win32
+ DLL Release|x64 = DLL Release|x64
Release|Win32 = Release|Win32
+ Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{615AEC46-5595-4DEA-9490-DBD5DE0F8772}.Debug|Win32.ActiveCfg = Debug|Win32
{615AEC46-5595-4DEA-9490-DBD5DE0F8772}.Debug|Win32.Build.0 = Debug|Win32
+ {615AEC46-5595-4DEA-9490-DBD5DE0F8772}.Debug|x64.ActiveCfg = Debug|x64
+ {615AEC46-5595-4DEA-9490-DBD5DE0F8772}.Debug|x64.Build.0 = Debug|x64
+ {615AEC46-5595-4DEA-9490-DBD5DE0F8772}.DLL Debug|Win32.ActiveCfg = Debug|Win32
+ {615AEC46-5595-4DEA-9490-DBD5DE0F8772}.DLL Debug|Win32.Build.0 = Debug|Win32
+ {615AEC46-5595-4DEA-9490-DBD5DE0F8772}.DLL Debug|x64.ActiveCfg = Debug|x64
+ {615AEC46-5595-4DEA-9490-DBD5DE0F8772}.DLL Debug|x64.Build.0 = Debug|x64
+ {615AEC46-5595-4DEA-9490-DBD5DE0F8772}.DLL Release|Win32.ActiveCfg = Release|Win32
+ {615AEC46-5595-4DEA-9490-DBD5DE0F8772}.DLL Release|Win32.Build.0 = Release|Win32
+ {615AEC46-5595-4DEA-9490-DBD5DE0F8772}.DLL Release|x64.ActiveCfg = Release|x64
+ {615AEC46-5595-4DEA-9490-DBD5DE0F8772}.DLL Release|x64.Build.0 = Release|x64
{615AEC46-5595-4DEA-9490-DBD5DE0F8772}.Release|Win32.ActiveCfg = Release|Win32
{615AEC46-5595-4DEA-9490-DBD5DE0F8772}.Release|Win32.Build.0 = Release|Win32
+ {615AEC46-5595-4DEA-9490-DBD5DE0F8772}.Release|x64.ActiveCfg = Release|x64
+ {615AEC46-5595-4DEA-9490-DBD5DE0F8772}.Release|x64.Build.0 = Release|x64
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.Debug|Win32.ActiveCfg = Debug|Win32
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.Debug|Win32.Build.0 = Debug|Win32
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.Debug|x64.ActiveCfg = Debug|x64
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.Debug|x64.Build.0 = Debug|x64
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.DLL Debug|Win32.ActiveCfg = DLL Debug|Win32
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.DLL Debug|Win32.Build.0 = DLL Debug|Win32
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.DLL Debug|x64.ActiveCfg = DLL Debug|x64
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.DLL Debug|x64.Build.0 = DLL Debug|x64
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.DLL Release|Win32.ActiveCfg = DLL Release|Win32
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.DLL Release|Win32.Build.0 = DLL Release|Win32
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.DLL Release|x64.ActiveCfg = DLL Release|x64
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.DLL Release|x64.Build.0 = DLL Release|x64
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.Release|Win32.ActiveCfg = Release|Win32
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.Release|Win32.Build.0 = Release|Win32
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.Release|x64.ActiveCfg = Release|x64
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/benchmark/benchmark.vcproj b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/benchmark/benchmark.vcproj
index 5db23c372..86c58c985 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/benchmark/benchmark.vcproj
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/benchmark/benchmark.vcproj
@@ -38,8 +38,8 @@
<Tool
Name="VCCLCompilerTool"
Optimization="0"
- AdditionalIncludeDirectories="../include"
- PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE;"
+ AdditionalIncludeDirectories="../..;../../IDE/WIN;"
+ PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE;WOLFSSL_LIB;WOLFSSL_USER_SETTINGS;"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
@@ -59,6 +59,7 @@
/>
<Tool
Name="VCLinkerTool"
+ AdditionalDependencies="Ws2_32.lib"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
@@ -112,8 +113,8 @@
/>
<Tool
Name="VCCLCompilerTool"
- AdditionalIncludeDirectories="../include"
- PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;"
+ AdditionalIncludeDirectories="../..;../../IDE/WIN;"
+ PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;WOLFSSL_LIB;WOLFSSL_USER_SETTINGS;"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
WarningLevel="3"
@@ -131,6 +132,7 @@
/>
<Tool
Name="VCLinkerTool"
+ AdditionalDependencies="Ws2_32.lib"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/benchmark/include.am b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/benchmark/include.am
index db70ba79c..d91a701d7 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/benchmark/include.am
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/benchmark/include.am
@@ -1,10 +1,14 @@
# vim:ft=automake
# All paths should be given relative to the root
+if BUILD_WOLFCRYPT_TESTS
noinst_PROGRAMS += wolfcrypt/benchmark/benchmark
wolfcrypt_benchmark_benchmark_SOURCES = wolfcrypt/benchmark/benchmark.c
-wolfcrypt_benchmark_benchmark_LDADD = src/libwolfssl.la
+wolfcrypt_benchmark_benchmark_LDADD = src/libwolfssl.la $(LIB_STATIC_ADD)
wolfcrypt_benchmark_benchmark_DEPENDENCIES = src/libwolfssl.la
+noinst_HEADERS += wolfcrypt/benchmark/benchmark.h
+endif
EXTRA_DIST += wolfcrypt/benchmark/benchmark.sln
EXTRA_DIST += wolfcrypt/benchmark/benchmark.vcproj
+EXTRA_DIST += wolfcrypt/benchmark/README.md
DISTCLEANFILES+= wolfcrypt/benchmark/.libs/benchmark
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes.c
index 85f01a0d1..4b5b437ca 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes.c
@@ -1,8 +1,8 @@
/* aes.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,194 +16,458 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
-#ifndef NO_AES
+#if !defined(NO_AES)
-#include <wolfssl/wolfcrypt/aes.h>
+/* Tip: Locate the software cipher modes by searching for "Software AES" */
-#ifdef HAVE_FIPS
-int wc_AesSetKey(Aes* aes, const byte* key, word32 len, const byte* iv,
- int dir)
-{
- return AesSetKey_fips(aes, key, len, iv, dir);
-}
+#if defined(HAVE_FIPS) && \
+ defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
+ /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
+ #define FIPS_NO_WRAPPERS
-int wc_AesSetIV(Aes* aes, const byte* iv)
-{
- return AesSetIV_fips(aes, iv);
-}
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$g")
+ #pragma const_seg(".fipsB$g")
+ #endif
+#endif
+#include <wolfssl/wolfcrypt/aes.h>
+#include <wolfssl/wolfcrypt/cpuid.h>
-int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
-{
- return AesCbcEncrypt_fips(aes, out, in, sz);
-}
+#ifdef WOLF_CRYPTO_CB
+ #include <wolfssl/wolfcrypt/cryptocb.h>
+#endif
-int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
-{
- return AesCbcDecrypt_fips(aes, out, in, sz);
-}
+/* fips wrapper calls, user can call direct */
+#if defined(HAVE_FIPS) && \
+ (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2))
+ int wc_AesSetKey(Aes* aes, const byte* key, word32 len, const byte* iv,
+ int dir)
+ {
+ if (aes == NULL || !( (len == 16) || (len == 24) || (len == 32)) ) {
+ return BAD_FUNC_ARG;
+ }
-int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz,
- const byte* key, word32 keySz, const byte* iv)
-{
- return AesCbcDecryptWithKey(out, in, inSz, key, keySz, iv);
-}
+ return AesSetKey_fips(aes, key, len, iv, dir);
+ }
+ int wc_AesSetIV(Aes* aes, const byte* iv)
+ {
+ if (aes == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ return AesSetIV_fips(aes, iv);
+ }
+ #ifdef HAVE_AES_CBC
+ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+ {
+ if (aes == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
-/* AES-CTR */
-#ifdef WOLFSSL_AES_COUNTER
-void wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
-{
- AesCtrEncrypt(aes, out, in, sz);
-}
-#endif
+ return AesCbcEncrypt_fips(aes, out, in, sz);
+ }
+ #ifdef HAVE_AES_DECRYPT
+ int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+ {
+ if (aes == NULL || out == NULL || in == NULL
+ || sz % AES_BLOCK_SIZE != 0) {
+ return BAD_FUNC_ARG;
+ }
-/* AES-DIRECT */
-#if defined(WOLFSSL_AES_DIRECT)
-void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in)
-{
- AesEncryptDirect(aes, out, in);
-}
+ return AesCbcDecrypt_fips(aes, out, in, sz);
+ }
+ #endif /* HAVE_AES_DECRYPT */
+ #endif /* HAVE_AES_CBC */
+ /* AES-CTR */
+ #ifdef WOLFSSL_AES_COUNTER
+ int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+ {
+ if (aes == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
-void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in)
-{
- AesDecryptDirect(aes, out, in);
-}
+ return AesCtrEncrypt(aes, out, in, sz);
+ }
+ #endif
+ /* AES-DIRECT */
+ #if defined(WOLFSSL_AES_DIRECT)
+ void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in)
+ {
+ AesEncryptDirect(aes, out, in);
+ }
-int wc_AesSetKeyDirect(Aes* aes, const byte* key, word32 len,
- const byte* iv, int dir)
-{
- return AesSetKeyDirect(aes, key, len, iv, dir);
-}
-#endif
+ #ifdef HAVE_AES_DECRYPT
+ void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in)
+ {
+ AesDecryptDirect(aes, out, in);
+ }
+ #endif /* HAVE_AES_DECRYPT */
+ int wc_AesSetKeyDirect(Aes* aes, const byte* key, word32 len,
+ const byte* iv, int dir)
+ {
+ return AesSetKeyDirect(aes, key, len, iv, dir);
+ }
+ #endif /* WOLFSSL_AES_DIRECT */
-#ifdef HAVE_AESGCM
-int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
-{
- return AesGcmSetKey_fips(aes, key, len);
-}
+ /* AES-GCM */
+ #ifdef HAVE_AESGCM
+ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
+ {
+ if (aes == NULL || !( (len == 16) || (len == 24) || (len == 32)) ) {
+ return BAD_FUNC_ARG;
+ }
+ return AesGcmSetKey_fips(aes, key, len);
+ }
+ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
+ const byte* iv, word32 ivSz,
+ byte* authTag, word32 authTagSz,
+ const byte* authIn, word32 authInSz)
+ {
+ if (aes == NULL || authTagSz > AES_BLOCK_SIZE ||
+ authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ ||
+ ivSz == 0 || ivSz > AES_BLOCK_SIZE) {
+ return BAD_FUNC_ARG;
+ }
-int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
- const byte* iv, word32 ivSz,
- byte* authTag, word32 authTagSz,
- const byte* authIn, word32 authInSz)
-{
- return AesGcmEncrypt_fips(aes, out, in, sz, iv, ivSz, authTag, authTagSz,
- authIn, authInSz);
-}
+ return AesGcmEncrypt_fips(aes, out, in, sz, iv, ivSz, authTag,
+ authTagSz, authIn, authInSz);
+ }
+ #ifdef HAVE_AES_DECRYPT
+ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
+ const byte* iv, word32 ivSz,
+ const byte* authTag, word32 authTagSz,
+ const byte* authIn, word32 authInSz)
+ {
+ if (aes == NULL || out == NULL || in == NULL || iv == NULL
+ || authTag == NULL || authTagSz > AES_BLOCK_SIZE ||
+ ivSz == 0 || ivSz > AES_BLOCK_SIZE) {
+ return BAD_FUNC_ARG;
+ }
-int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
- const byte* iv, word32 ivSz,
- const byte* authTag, word32 authTagSz,
- const byte* authIn, word32 authInSz)
-{
- return AesGcmDecrypt_fips(aes, out, in, sz, iv, ivSz, authTag, authTagSz,
- authIn, authInSz);
-}
+ return AesGcmDecrypt_fips(aes, out, in, sz, iv, ivSz, authTag,
+ authTagSz, authIn, authInSz);
+ }
+ #endif /* HAVE_AES_DECRYPT */
+ int wc_GmacSetKey(Gmac* gmac, const byte* key, word32 len)
+ {
+ if (gmac == NULL || key == NULL || !((len == 16) ||
+ (len == 24) || (len == 32)) ) {
+ return BAD_FUNC_ARG;
+ }
-int wc_GmacSetKey(Gmac* gmac, const byte* key, word32 len)
-{
- return GmacSetKey(gmac, key, len);
-}
+ return GmacSetKey(gmac, key, len);
+ }
+ int wc_GmacUpdate(Gmac* gmac, const byte* iv, word32 ivSz,
+ const byte* authIn, word32 authInSz,
+ byte* authTag, word32 authTagSz)
+ {
+ if (gmac == NULL || authTagSz > AES_BLOCK_SIZE ||
+ authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) {
+ return BAD_FUNC_ARG;
+ }
+ return GmacUpdate(gmac, iv, ivSz, authIn, authInSz,
+ authTag, authTagSz);
+ }
+ #endif /* HAVE_AESGCM */
-int wc_GmacUpdate(Gmac* gmac, const byte* iv, word32 ivSz,
- const byte* authIn, word32 authInSz,
- byte* authTag, word32 authTagSz)
-{
- return GmacUpdate(gmac, iv, ivSz, authIn, authInSz,
- authTag, authTagSz);
-}
+ /* AES-CCM */
+ #if defined(HAVE_AESCCM) && \
+ defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
+ int wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz)
+ {
+ return AesCcmSetKey(aes, key, keySz);
+ }
+ int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
+ const byte* nonce, word32 nonceSz,
+ byte* authTag, word32 authTagSz,
+ const byte* authIn, word32 authInSz)
+ {
+ /* sanity check on arguments */
+ if (aes == NULL || out == NULL || in == NULL || nonce == NULL
+ || authTag == NULL || nonceSz < 7 || nonceSz > 13)
+ return BAD_FUNC_ARG;
-#endif /* HAVE_AESGCM */
-#ifdef HAVE_AESCCM
-void wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz)
-{
- AesCcmSetKey(aes, key, keySz);
-}
+ AesCcmEncrypt(aes, out, in, inSz, nonce, nonceSz, authTag,
+ authTagSz, authIn, authInSz);
+ return 0;
+ }
+ #ifdef HAVE_AES_DECRYPT
+ int wc_AesCcmDecrypt(Aes* aes, byte* out,
+ const byte* in, word32 inSz,
+ const byte* nonce, word32 nonceSz,
+ const byte* authTag, word32 authTagSz,
+ const byte* authIn, word32 authInSz)
+ {
-void wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
- const byte* nonce, word32 nonceSz,
- byte* authTag, word32 authTagSz,
- const byte* authIn, word32 authInSz)
-{
- AesCcmEncrypt(aes, out, in, inSz, nonce, nonceSz, authTag, authTagSz,
- authIn, authInSz);
-}
+ if (aes == NULL || out == NULL || in == NULL || nonce == NULL
+ || authTag == NULL || nonceSz < 7 || nonceSz > 13) {
+ return BAD_FUNC_ARG;
+ }
+ return AesCcmDecrypt(aes, out, in, inSz, nonce, nonceSz,
+ authTag, authTagSz, authIn, authInSz);
+ }
+ #endif /* HAVE_AES_DECRYPT */
+ #endif /* HAVE_AESCCM && HAVE_FIPS_VERSION 2 */
-int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
- const byte* nonce, word32 nonceSz,
- const byte* authTag, word32 authTagSz,
- const byte* authIn, word32 authInSz)
-{
- return AesCcmDecrypt(aes, out, in, inSz, nonce, nonceSz, authTag, authTagSz,
- authIn, authInSz);
-}
-#endif /* HAVE_AESCCM */
+ int wc_AesInit(Aes* aes, void* h, int i)
+ {
+ if (aes == NULL)
+ return BAD_FUNC_ARG;
-#ifdef HAVE_CAVIUM
-int wc_AesInitCavium(Aes* aes, int i)
-{
- return AesInitCavium(aes, i);
-}
+ (void)h;
+ (void)i;
+ /* FIPS doesn't support:
+ return AesInit(aes, h, i); */
+ return 0;
+ }
+ void wc_AesFree(Aes* aes)
+ {
+ (void)aes;
+ /* FIPS doesn't support:
+ AesFree(aes); */
+ }
-void wc_AesFreeCavium(Aes* aes)
-{
- AesFreeCavium(aes);
-}
-#endif
-#else /* HAVE_FIPS */
+#else /* else build without fips, or for FIPS v2 */
-#ifdef WOLFSSL_TI_CRYPT
-#include <wolfcrypt/src/port/ti/ti-aes.c>
+
+#if defined(WOLFSSL_TI_CRYPT)
+ #include <wolfcrypt/src/port/ti/ti-aes.c>
#else
-#include <wolfssl/wolfcrypt/error-crypt.h>
#include <wolfssl/wolfcrypt/logging.h>
+
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
+
+#if !defined(WOLFSSL_ARMASM)
+
+#ifdef WOLFSSL_IMX6_CAAM_BLOB
+ /* case of possibly not using hardware acceleration for AES but using key
+ blobs */
+ #include <wolfssl/wolfcrypt/port/caam/wolfcaam.h>
+#endif
+
#ifdef DEBUG_AESNI
#include <stdio.h>
#endif
-
#ifdef _MSC_VER
/* 4127 warning constant while(1) */
#pragma warning(disable: 4127)
#endif
-#if defined(STM32F2_CRYPTO)
- /* STM32F2 hardware AES support for CBC, CTR modes through the STM32F2
- * Standard Peripheral Library. Documentation located in STM32F2xx
- * Standard Peripheral Library document (See note in README).
- * NOTE: no support for AES-GCM/CCM/Direct */
- #include "stm32f2xx.h"
- #include "stm32f2xx_cryp.h"
+/* Define AES implementation includes and functions */
+#if defined(STM32_CRYPTO)
+ /* STM32F2/F4/F7/L4 hardware AES support for ECB, CBC, CTR and GCM modes */
+
+#if defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
+
+ static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
+ {
+ int ret = 0;
+ #ifdef WOLFSSL_STM32_CUBEMX
+ CRYP_HandleTypeDef hcryp;
+ #else
+ CRYP_InitTypeDef cryptInit;
+ CRYP_KeyInitTypeDef keyInit;
+ #endif
+
+ #ifdef WOLFSSL_STM32_CUBEMX
+ ret = wc_Stm32_Aes_Init(aes, &hcryp);
+ if (ret != 0)
+ return ret;
+
+ #ifdef STM32_CRYPTO_AES_ONLY
+ hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT;
+ hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_ECB;
+ hcryp.Init.KeyWriteFlag = CRYP_KEY_WRITE_ENABLE;
+ #elif defined(STM32_HAL_V2)
+ hcryp.Init.Algorithm = CRYP_AES_ECB;
+ #endif
+ HAL_CRYP_Init(&hcryp);
+
+ #ifdef STM32_CRYPTO_AES_ONLY
+ ret = HAL_CRYPEx_AES(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE,
+ outBlock, STM32_HAL_TIMEOUT);
+ #elif defined(STM32_HAL_V2)
+ ret = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)inBlock, AES_BLOCK_SIZE,
+ (uint32_t*)outBlock, STM32_HAL_TIMEOUT);
+ #else
+ ret = HAL_CRYP_AESECB_Encrypt(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE,
+ outBlock, STM32_HAL_TIMEOUT);
+ #endif
+ if (ret != HAL_OK) {
+ ret = WC_TIMEOUT_E;
+ }
+ HAL_CRYP_DeInit(&hcryp);
+
+ #else /* STD_PERI_LIB */
+ ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit);
+ if (ret != 0)
+ return ret;
+
+ /* reset registers to their default values */
+ CRYP_DeInit();
+
+ /* setup key */
+ CRYP_KeyInit(&keyInit);
+
+ /* set direction and mode */
+ cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt;
+ cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_ECB;
+ CRYP_Init(&cryptInit);
+
+ /* enable crypto processor */
+ CRYP_Cmd(ENABLE);
+
+ /* flush IN/OUT FIFOs */
+ CRYP_FIFOFlush();
+
+ CRYP_DataIn(*(uint32_t*)&inBlock[0]);
+ CRYP_DataIn(*(uint32_t*)&inBlock[4]);
+ CRYP_DataIn(*(uint32_t*)&inBlock[8]);
+ CRYP_DataIn(*(uint32_t*)&inBlock[12]);
+
+ /* wait until the complete message has been processed */
+ while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
+
+ *(uint32_t*)&outBlock[0] = CRYP_DataOut();
+ *(uint32_t*)&outBlock[4] = CRYP_DataOut();
+ *(uint32_t*)&outBlock[8] = CRYP_DataOut();
+ *(uint32_t*)&outBlock[12] = CRYP_DataOut();
+
+ /* disable crypto processor */
+ CRYP_Cmd(DISABLE);
+ #endif /* WOLFSSL_STM32_CUBEMX */
+
+ return ret;
+ }
+#endif /* WOLFSSL_AES_DIRECT || HAVE_AESGCM || HAVE_AESCCM */
+
+#ifdef HAVE_AES_DECRYPT
+ #if defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESCCM)
+ static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
+ {
+ int ret = 0;
+ #ifdef WOLFSSL_STM32_CUBEMX
+ CRYP_HandleTypeDef hcryp;
+ #else
+ CRYP_InitTypeDef cryptInit;
+ CRYP_KeyInitTypeDef keyInit;
+ #endif
+
+ #ifdef WOLFSSL_STM32_CUBEMX
+ ret = wc_Stm32_Aes_Init(aes, &hcryp);
+ if (ret != 0)
+ return ret;
+
+ #ifdef STM32_CRYPTO_AES_ONLY
+ hcryp.Init.OperatingMode = CRYP_ALGOMODE_DECRYPT;
+ hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_ECB;
+ hcryp.Init.KeyWriteFlag = CRYP_KEY_WRITE_ENABLE;
+ #elif defined(STM32_HAL_V2)
+ hcryp.Init.Algorithm = CRYP_AES_ECB;
+ #endif
+ HAL_CRYP_Init(&hcryp);
+
+ #ifdef STM32_CRYPTO_AES_ONLY
+ ret = HAL_CRYPEx_AES(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE,
+ outBlock, STM32_HAL_TIMEOUT);
+ #elif defined(STM32_HAL_V2)
+ ret = HAL_CRYP_Decrypt(&hcryp, (uint32_t*)inBlock, AES_BLOCK_SIZE,
+ (uint32_t*)outBlock, STM32_HAL_TIMEOUT);
+ #else
+ ret = HAL_CRYP_AESECB_Decrypt(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE,
+ outBlock, STM32_HAL_TIMEOUT);
+ #endif
+ if (ret != HAL_OK) {
+ ret = WC_TIMEOUT_E;
+ }
+ HAL_CRYP_DeInit(&hcryp);
+
+ #else /* STD_PERI_LIB */
+ ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit);
+ if (ret != 0)
+ return ret;
+
+ /* reset registers to their default values */
+ CRYP_DeInit();
+
+ /* set direction and key */
+ CRYP_KeyInit(&keyInit);
+ cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt;
+ cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key;
+ CRYP_Init(&cryptInit);
+
+ /* enable crypto processor */
+ CRYP_Cmd(ENABLE);
+
+ /* wait until decrypt key has been initialized */
+ while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
+
+ /* set direction and mode */
+ cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt;
+ cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_ECB;
+ CRYP_Init(&cryptInit);
+
+ /* enable crypto processor */
+ CRYP_Cmd(ENABLE);
+
+ /* flush IN/OUT FIFOs */
+ CRYP_FIFOFlush();
+
+ CRYP_DataIn(*(uint32_t*)&inBlock[0]);
+ CRYP_DataIn(*(uint32_t*)&inBlock[4]);
+ CRYP_DataIn(*(uint32_t*)&inBlock[8]);
+ CRYP_DataIn(*(uint32_t*)&inBlock[12]);
+
+ /* wait until the complete message has been processed */
+ while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
+
+ *(uint32_t*)&outBlock[0] = CRYP_DataOut();
+ *(uint32_t*)&outBlock[4] = CRYP_DataOut();
+ *(uint32_t*)&outBlock[8] = CRYP_DataOut();
+ *(uint32_t*)&outBlock[12] = CRYP_DataOut();
+
+ /* disable crypto processor */
+ CRYP_Cmd(DISABLE);
+ #endif /* WOLFSSL_STM32_CUBEMX */
+
+ return ret;
+ }
+ #endif /* WOLFSSL_AES_DIRECT || HAVE_AESCCM */
+#endif /* HAVE_AES_DECRYPT */
+
#elif defined(HAVE_COLDFIRE_SEC)
/* Freescale Coldfire SEC support for CBC mode.
* NOTE: no support for AES-CTR/GCM/CCM/Direct */
@@ -211,34 +475,453 @@ void wc_AesFreeCavium(Aes* aes)
#include "sec.h"
#include "mcf5475_sec.h"
#include "mcf5475_siu.h"
+#elif defined(FREESCALE_LTC)
+ #include "fsl_ltc.h"
+ #if defined(FREESCALE_LTC_AES_GCM)
+ #undef NEED_AES_TABLES
+ #undef GCM_TABLE
+ #else
+ /* if LTC doesn't have GCM, use software with LTC AES ECB mode */
+ static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
+ {
+ wc_AesEncryptDirect(aes, outBlock, inBlock);
+ return 0;
+ }
+ static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
+ {
+ wc_AesDecryptDirect(aes, outBlock, inBlock);
+ return 0;
+ }
+ #endif
#elif defined(FREESCALE_MMCAU)
/* Freescale mmCAU hardware AES support for Direct, CBC, CCM, GCM modes
* through the CAU/mmCAU library. Documentation located in
* ColdFire/ColdFire+ CAU and Kinetis mmCAU Software Library User
- * Guide (See note in README).
- * NOTE: no support for AES-CTR */
- #include "cau_api.h"
+ * Guide (See note in README). */
+ #ifdef FREESCALE_MMCAU_CLASSIC
+ /* MMCAU 1.4 library used with non-KSDK / classic MQX builds */
+ #include "cau_api.h"
+ #else
+ #include "fsl_mmcau.h"
+ #endif
+
+ static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
+ {
+ int ret;
+
+ #ifdef FREESCALE_MMCAU_CLASSIC
+ if ((wolfssl_word)outBlock % WOLFSSL_MMCAU_ALIGNMENT) {
+ WOLFSSL_MSG("Bad cau_aes_encrypt alignment");
+ return BAD_ALIGN_E;
+ }
+ #endif
+
+ ret = wolfSSL_CryptHwMutexLock();
+ if(ret == 0) {
+ #ifdef FREESCALE_MMCAU_CLASSIC
+ cau_aes_encrypt(inBlock, (byte*)aes->key, aes->rounds, outBlock);
+ #else
+ MMCAU_AES_EncryptEcb(inBlock, (byte*)aes->key, aes->rounds,
+ outBlock);
+ #endif
+ wolfSSL_CryptHwMutexUnLock();
+ }
+ return ret;
+ }
+ #ifdef HAVE_AES_DECRYPT
+ static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
+ {
+ int ret;
+
+ #ifdef FREESCALE_MMCAU_CLASSIC
+ if ((wolfssl_word)outBlock % WOLFSSL_MMCAU_ALIGNMENT) {
+ WOLFSSL_MSG("Bad cau_aes_decrypt alignment");
+ return BAD_ALIGN_E;
+ }
+ #endif
+
+ ret = wolfSSL_CryptHwMutexLock();
+ if(ret == 0) {
+ #ifdef FREESCALE_MMCAU_CLASSIC
+ cau_aes_decrypt(inBlock, (byte*)aes->key, aes->rounds, outBlock);
+ #else
+ MMCAU_AES_DecryptEcb(inBlock, (byte*)aes->key, aes->rounds,
+ outBlock);
+ #endif
+ wolfSSL_CryptHwMutexUnLock();
+ }
+ return ret;
+ }
+ #endif /* HAVE_AES_DECRYPT */
+
#elif defined(WOLFSSL_PIC32MZ_CRYPT)
- /* NOTE: no support for AES-CCM/Direct */
- #define DEBUG_WOLFSSL
- #include "wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h"
-#elif defined(HAVE_CAVIUM)
- #include <wolfssl/wolfcrypt/logging.h>
- #include "cavium_common.h"
-
- /* still leave SW crypto available */
+
+ #include <wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h>
+
+ #if defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT)
+ static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
+ {
+ return wc_Pic32AesCrypt(aes->key, aes->keylen, NULL, 0,
+ outBlock, inBlock, AES_BLOCK_SIZE,
+ PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RECB);
+ }
+ #endif
+
+ #if defined(HAVE_AES_DECRYPT) && defined(WOLFSSL_AES_DIRECT)
+ static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
+ {
+ return wc_Pic32AesCrypt(aes->key, aes->keylen, NULL, 0,
+ outBlock, inBlock, AES_BLOCK_SIZE,
+ PIC32_DECRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RECB);
+ }
+ #endif
+
+#elif defined(WOLFSSL_NRF51_AES)
+ /* Use built-in AES hardware - AES 128 ECB Encrypt Only */
+ #include "wolfssl/wolfcrypt/port/nrf51.h"
+
+ static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
+ {
+ return nrf51_aes_encrypt(inBlock, (byte*)aes->key, aes->rounds, outBlock);
+ }
+
+ #ifdef HAVE_AES_DECRYPT
+ #error nRF51 AES Hardware does not support decrypt
+ #endif /* HAVE_AES_DECRYPT */
+
+#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES)
+
+ #include "wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h"
+
+ #if defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT)
+ static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
+ {
+ return wc_esp32AesEncrypt(aes, inBlock, outBlock);
+ }
+ #endif
+
+ #if defined(HAVE_AES_DECRYPT) && defined(WOLFSSL_AES_DIRECT)
+ static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
+ {
+ return wc_esp32AesDecrypt(aes, inBlock, outBlock);
+ }
+ #endif
+
+#elif defined(WOLFSSL_AESNI)
+
#define NEED_AES_TABLES
- static int wc_AesCaviumSetKey(Aes* aes, const byte* key, word32 length,
- const byte* iv);
- static int wc_AesCaviumCbcEncrypt(Aes* aes, byte* out, const byte* in,
- word32 length);
- static int wc_AesCaviumCbcDecrypt(Aes* aes, byte* out, const byte* in,
- word32 length);
+ /* Each platform needs to query info type 1 from cpuid to see if aesni is
+ * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
+ */
+
+ #ifndef AESNI_ALIGN
+ #define AESNI_ALIGN 16
+ #endif
+
+ #ifdef _MSC_VER
+ #define XASM_LINK(f)
+ #elif defined(__APPLE__)
+ #define XASM_LINK(f) asm("_" f)
+ #else
+ #define XASM_LINK(f) asm(f)
+ #endif /* _MSC_VER */
+
+ static int checkAESNI = 0;
+ static int haveAESNI = 0;
+ static word32 intel_flags = 0;
+
+ static int Check_CPU_support_AES(void)
+ {
+ intel_flags = cpuid_get_flags();
+
+ return IS_INTEL_AESNI(intel_flags) != 0;
+ }
+
+
+ /* tell C compiler these are asm functions in case any mix up of ABI underscore
+ prefix between clang/gcc/llvm etc */
+ #ifdef HAVE_AES_CBC
+ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out,
+ unsigned char* ivec, unsigned long length,
+ const unsigned char* KS, int nr)
+ XASM_LINK("AES_CBC_encrypt");
+
+ #ifdef HAVE_AES_DECRYPT
+ #if defined(WOLFSSL_AESNI_BY4)
+ void AES_CBC_decrypt_by4(const unsigned char* in, unsigned char* out,
+ unsigned char* ivec, unsigned long length,
+ const unsigned char* KS, int nr)
+ XASM_LINK("AES_CBC_decrypt_by4");
+ #elif defined(WOLFSSL_AESNI_BY6)
+ void AES_CBC_decrypt_by6(const unsigned char* in, unsigned char* out,
+ unsigned char* ivec, unsigned long length,
+ const unsigned char* KS, int nr)
+ XASM_LINK("AES_CBC_decrypt_by6");
+ #else /* WOLFSSL_AESNI_BYx */
+ void AES_CBC_decrypt_by8(const unsigned char* in, unsigned char* out,
+ unsigned char* ivec, unsigned long length,
+ const unsigned char* KS, int nr)
+ XASM_LINK("AES_CBC_decrypt_by8");
+ #endif /* WOLFSSL_AESNI_BYx */
+ #endif /* HAVE_AES_DECRYPT */
+ #endif /* HAVE_AES_CBC */
+
+ void AES_ECB_encrypt(const unsigned char* in, unsigned char* out,
+ unsigned long length, const unsigned char* KS, int nr)
+ XASM_LINK("AES_ECB_encrypt");
+
+ #ifdef HAVE_AES_DECRYPT
+ void AES_ECB_decrypt(const unsigned char* in, unsigned char* out,
+ unsigned long length, const unsigned char* KS, int nr)
+ XASM_LINK("AES_ECB_decrypt");
+ #endif
+
+ void AES_128_Key_Expansion(const unsigned char* userkey,
+ unsigned char* key_schedule)
+ XASM_LINK("AES_128_Key_Expansion");
+
+ void AES_192_Key_Expansion(const unsigned char* userkey,
+ unsigned char* key_schedule)
+ XASM_LINK("AES_192_Key_Expansion");
+
+ void AES_256_Key_Expansion(const unsigned char* userkey,
+ unsigned char* key_schedule)
+ XASM_LINK("AES_256_Key_Expansion");
+
+
+ static int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+ Aes* aes)
+ {
+ int ret;
+
+ if (!userKey || !aes)
+ return BAD_FUNC_ARG;
+
+ switch (bits) {
+ case 128:
+ AES_128_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 10;
+ return 0;
+ case 192:
+ AES_192_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 12;
+ return 0;
+ case 256:
+ AES_256_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 14;
+ return 0;
+ default:
+ ret = BAD_FUNC_ARG;
+ }
+
+ return ret;
+ }
+
+ #ifdef HAVE_AES_DECRYPT
+ static int AES_set_decrypt_key(const unsigned char* userKey,
+ const int bits, Aes* aes)
+ {
+ int nr;
+ Aes temp_key;
+ __m128i *Key_Schedule = (__m128i*)aes->key;
+ __m128i *Temp_Key_Schedule = (__m128i*)temp_key.key;
+
+ if (!userKey || !aes)
+ return BAD_FUNC_ARG;
+
+ if (AES_set_encrypt_key(userKey,bits,&temp_key) == BAD_FUNC_ARG)
+ return BAD_FUNC_ARG;
+
+ nr = temp_key.rounds;
+ aes->rounds = nr;
+
+ Key_Schedule[nr] = Temp_Key_Schedule[0];
+ Key_Schedule[nr-1] = _mm_aesimc_si128(Temp_Key_Schedule[1]);
+ Key_Schedule[nr-2] = _mm_aesimc_si128(Temp_Key_Schedule[2]);
+ Key_Schedule[nr-3] = _mm_aesimc_si128(Temp_Key_Schedule[3]);
+ Key_Schedule[nr-4] = _mm_aesimc_si128(Temp_Key_Schedule[4]);
+ Key_Schedule[nr-5] = _mm_aesimc_si128(Temp_Key_Schedule[5]);
+ Key_Schedule[nr-6] = _mm_aesimc_si128(Temp_Key_Schedule[6]);
+ Key_Schedule[nr-7] = _mm_aesimc_si128(Temp_Key_Schedule[7]);
+ Key_Schedule[nr-8] = _mm_aesimc_si128(Temp_Key_Schedule[8]);
+ Key_Schedule[nr-9] = _mm_aesimc_si128(Temp_Key_Schedule[9]);
+
+ if (nr>10) {
+ Key_Schedule[nr-10] = _mm_aesimc_si128(Temp_Key_Schedule[10]);
+ Key_Schedule[nr-11] = _mm_aesimc_si128(Temp_Key_Schedule[11]);
+ }
+
+ if (nr>12) {
+ Key_Schedule[nr-12] = _mm_aesimc_si128(Temp_Key_Schedule[12]);
+ Key_Schedule[nr-13] = _mm_aesimc_si128(Temp_Key_Schedule[13]);
+ }
+
+ Key_Schedule[0] = Temp_Key_Schedule[nr];
+
+ return 0;
+ }
+ #endif /* HAVE_AES_DECRYPT */
+
+#elif (defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)) || \
+ ((defined(WOLFSSL_AFALG) || defined(WOLFSSL_DEVCRYPTO_AES)) && \
+ defined(HAVE_AESCCM))
+ static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
+ {
+ wc_AesEncryptDirect(aes, outBlock, inBlock);
+ return 0;
+ }
+
+#elif defined(WOLFSSL_AFALG)
+#elif defined(WOLFSSL_DEVCRYPTO_AES)
+
+#elif defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES)
+ #include "hal_data.h"
+
+ #ifndef WOLFSSL_SCE_AES256_HANDLE
+ #define WOLFSSL_SCE_AES256_HANDLE g_sce_aes_256
+ #endif
+
+ #ifndef WOLFSSL_SCE_AES192_HANDLE
+ #define WOLFSSL_SCE_AES192_HANDLE g_sce_aes_192
+ #endif
+
+ #ifndef WOLFSSL_SCE_AES128_HANDLE
+ #define WOLFSSL_SCE_AES128_HANDLE g_sce_aes_128
+ #endif
+
+ static int AES_ECB_encrypt(Aes* aes, const byte* inBlock, byte* outBlock,
+ int sz)
+ {
+ uint32_t ret;
+
+ if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag ==
+ CRYPTO_WORD_ENDIAN_BIG) {
+ ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz);
+ }
+
+ switch (aes->keylen) {
+ #ifdef WOLFSSL_AES_128
+ case AES_128_KEY_SIZE:
+ ret = WOLFSSL_SCE_AES128_HANDLE.p_api->encrypt(
+ WOLFSSL_SCE_AES128_HANDLE.p_ctrl, aes->key,
+ NULL, (sz / sizeof(word32)), (word32*)inBlock,
+ (word32*)outBlock);
+ break;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES_192_KEY_SIZE:
+ ret = WOLFSSL_SCE_AES192_HANDLE.p_api->encrypt(
+ WOLFSSL_SCE_AES192_HANDLE.p_ctrl, aes->key,
+ NULL, (sz / sizeof(word32)), (word32*)inBlock,
+ (word32*)outBlock);
+ break;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES_256_KEY_SIZE:
+ ret = WOLFSSL_SCE_AES256_HANDLE.p_api->encrypt(
+ WOLFSSL_SCE_AES256_HANDLE.p_ctrl, aes->key,
+ NULL, (sz / sizeof(word32)), (word32*)inBlock,
+ (word32*)outBlock);
+ break;
+ #endif
+ default:
+ WOLFSSL_MSG("Unknown key size");
+ return BAD_FUNC_ARG;
+ }
+
+ if (ret != SSP_SUCCESS) {
+ /* revert input */
+ ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz);
+ return WC_HW_E;
+ }
+
+ if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag ==
+ CRYPTO_WORD_ENDIAN_BIG) {
+ ByteReverseWords((word32*)outBlock, (word32*)outBlock, sz);
+ if (inBlock != outBlock) {
+ /* revert input */
+ ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz);
+ }
+ }
+ return 0;
+ }
+
+ #if defined(HAVE_AES_DECRYPT)
+ static int AES_ECB_decrypt(Aes* aes, const byte* inBlock, byte* outBlock,
+ int sz)
+ {
+ uint32_t ret;
+
+ if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag ==
+ CRYPTO_WORD_ENDIAN_BIG) {
+ ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz);
+ }
+
+ switch (aes->keylen) {
+ #ifdef WOLFSSL_AES_128
+ case AES_128_KEY_SIZE:
+ ret = WOLFSSL_SCE_AES128_HANDLE.p_api->decrypt(
+ WOLFSSL_SCE_AES128_HANDLE.p_ctrl, aes->key, aes->reg,
+ (sz / sizeof(word32)), (word32*)inBlock,
+ (word32*)outBlock);
+ break;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES_192_KEY_SIZE:
+ ret = WOLFSSL_SCE_AES192_HANDLE.p_api->decrypt(
+ WOLFSSL_SCE_AES192_HANDLE.p_ctrl, aes->key, aes->reg,
+ (sz / sizeof(word32)), (word32*)inBlock,
+ (word32*)outBlock);
+ break;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES_256_KEY_SIZE:
+ ret = WOLFSSL_SCE_AES256_HANDLE.p_api->decrypt(
+ WOLFSSL_SCE_AES256_HANDLE.p_ctrl, aes->key, aes->reg,
+ (sz / sizeof(word32)), (word32*)inBlock,
+ (word32*)outBlock);
+ break;
+ #endif
+ default:
+ WOLFSSL_MSG("Unknown key size");
+ return BAD_FUNC_ARG;
+ }
+ if (ret != SSP_SUCCESS) {
+ return WC_HW_E;
+ }
+
+ if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag ==
+ CRYPTO_WORD_ENDIAN_BIG) {
+ ByteReverseWords((word32*)outBlock, (word32*)outBlock, sz);
+ if (inBlock != outBlock) {
+ /* revert input */
+ ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz);
+ }
+ }
+
+ return 0;
+ }
+
+ #endif
+
+ #if defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT)
+ static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
+ {
+ return AES_ECB_encrypt(aes, inBlock, outBlock, AES_BLOCK_SIZE);
+ }
+ #endif
+
+ #if defined(HAVE_AES_DECRYPT) && defined(WOLFSSL_AES_DIRECT)
+ static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
+ {
+ return AES_ECB_decrypt(aes, inBlock, outBlock, AES_BLOCK_SIZE);
+ }
+ #endif
#else
- /* using CTaoCrypt software AES implementation */
+
+ /* using wolfCrypt software implementation */
#define NEED_AES_TABLES
-#endif /* STM32F2_CRYPTO */
+#endif
+
#ifdef NEED_AES_TABLES
@@ -250,7 +933,8 @@ static const word32 rcon[] = {
/* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
};
-static const word32 Te[5][256] = {
+#ifndef WOLFSSL_AES_SMALL_TABLES
+static const word32 Te[4][256] = {
{
0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
@@ -514,76 +1198,11 @@ static const word32 Te[5][256] = {
0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
-},
-{
- 0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
- 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
- 0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
- 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
- 0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU,
- 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
- 0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU,
- 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
- 0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U,
- 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
- 0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U,
- 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
- 0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U,
- 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
- 0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U,
- 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
- 0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU,
- 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
- 0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U,
- 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
- 0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU,
- 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
- 0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U,
- 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
- 0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU,
- 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
- 0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU,
- 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
- 0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU,
- 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
- 0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U,
- 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
- 0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU,
- 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
- 0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU,
- 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
- 0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU,
- 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
- 0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U,
- 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
- 0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU,
- 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
- 0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U,
- 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
- 0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU,
- 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
- 0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU,
- 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
- 0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU,
- 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
- 0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU,
- 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
- 0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U,
- 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
- 0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U,
- 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
- 0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U,
- 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
- 0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U,
- 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
- 0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU,
- 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
- 0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
- 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
}
};
-static const word32 Td[5][256] = {
+#ifdef HAVE_AES_DECRYPT
+static const word32 Td[4][256] = {
{
0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
@@ -848,232 +1467,166 @@ static const word32 Td[5][256] = {
0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
-},
-{
- 0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U,
- 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
- 0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU,
- 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU,
- 0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U,
- 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U,
- 0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U,
- 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU,
- 0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U,
- 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU,
- 0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU,
- 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU,
- 0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U,
- 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U,
- 0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U,
- 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U,
- 0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U,
- 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U,
- 0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU,
- 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U,
- 0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U,
- 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU,
- 0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U,
- 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U,
- 0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U,
- 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU,
- 0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U,
- 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U,
- 0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU,
- 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U,
- 0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U,
- 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU,
- 0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U,
- 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU,
- 0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU,
- 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U,
- 0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U,
- 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U,
- 0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U,
- 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU,
- 0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U,
- 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U,
- 0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU,
- 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU,
- 0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU,
- 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U,
- 0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU,
- 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U,
- 0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U,
- 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U,
- 0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U,
- 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU,
- 0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U,
- 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU,
- 0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU,
- 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU,
- 0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU,
- 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U,
- 0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU,
- 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U,
- 0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU,
- 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U,
- 0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U,
- 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU,
}
};
+#endif /* HAVE_AES_DECRYPT */
+#endif
-#define GETBYTE(x, y) (word32)((byte)((x) >> (8 * (y))))
-
-#ifdef WOLFSSL_AESNI
-
-/* Each platform needs to query info type 1 from cpuid to see if aesni is
- * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
- */
-
-#ifndef _MSC_VER
-
- #define cpuid(reg, func)\
- __asm__ __volatile__ ("cpuid":\
- "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
- "a" (func));
-
- #define XASM_LINK(f) asm(f)
-#else
-
- #include <intrin.h>
- #define cpuid(a,b) __cpuid((int*)a,b)
+#ifdef HAVE_AES_DECRYPT
+#if (defined(HAVE_AES_CBC) && !defined(WOLFSSL_DEVCRYPTO_CBC)) \
+ || defined(WOLFSSL_AES_DIRECT)
+static const byte Td4[256] =
+{
+ 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
+ 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
+ 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
+ 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
+ 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
+ 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
+ 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
+ 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
+ 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
+ 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
+ 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
+ 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
+ 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
+ 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
+ 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
+ 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
+ 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
+ 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
+ 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
+ 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
+ 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
+ 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
+ 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
+ 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
+ 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
+ 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
+ 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
+ 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
+ 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
+ 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
+ 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
+ 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
+};
+#endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT */
+#endif /* HAVE_AES_DECRYPT */
- #define XASM_LINK(f)
+#define GETBYTE(x, y) (word32)((byte)((x) >> (8 * (y))))
-#endif /* _MSC_VER */
+#ifdef WOLFSSL_AES_SMALL_TABLES
+static const byte Tsbox[256] = {
+ 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
+ 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
+ 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
+ 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
+ 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
+ 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
+ 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
+ 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
+ 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
+ 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
+ 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
+ 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
+ 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
+ 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
+ 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
+ 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
+ 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
+ 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
+ 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
+ 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
+ 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
+ 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
+ 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
+ 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
+ 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
+ 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
+ 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
+ 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
+ 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
+ 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
+ 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
+ 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
+};
+#define AES_XTIME(x) ((byte)((byte)((x) << 1) ^ ((0 - ((x) >> 7)) & 0x1b)))
-static int Check_CPU_support_AES(void)
+static word32 col_mul(word32 t, int i2, int i3, int ia, int ib)
{
- unsigned int reg[4]; /* put a,b,c,d into 0,1,2,3 */
- cpuid(reg, 1); /* query info 1 */
-
- if (reg[2] & 0x2000000)
- return 1;
+ byte t3 = GETBYTE(t, i3);
+ byte tm = AES_XTIME(GETBYTE(t, i2) ^ t3);
- return 0;
+ return GETBYTE(t, ia) ^ GETBYTE(t, ib) ^ t3 ^ tm;
}
-static int checkAESNI = 0;
-static int haveAESNI = 0;
-
-
-/* tell C compiler these are asm functions in case any mix up of ABI underscore
- prefix between clang/gcc/llvm etc */
-void AES_CBC_encrypt(const unsigned char* in, unsigned char* out,
- unsigned char* ivec, unsigned long length,
- const unsigned char* KS, int nr)
- XASM_LINK("AES_CBC_encrypt");
-
-
-void AES_CBC_decrypt(const unsigned char* in, unsigned char* out,
- unsigned char* ivec, unsigned long length,
- const unsigned char* KS, int nr)
- XASM_LINK("AES_CBC_decrypt");
-
-void AES_ECB_encrypt(const unsigned char* in, unsigned char* out,
- unsigned long length, const unsigned char* KS, int nr)
- XASM_LINK("AES_ECB_encrypt");
-
-
-void AES_ECB_decrypt(const unsigned char* in, unsigned char* out,
- unsigned long length, const unsigned char* KS, int nr)
- XASM_LINK("AES_ECB_decrypt");
-
-void AES_128_Key_Expansion(const unsigned char* userkey,
- unsigned char* key_schedule)
- XASM_LINK("AES_128_Key_Expansion");
+static word32 inv_col_mul(word32 t, int i9, int ib, int id, int ie)
+{
+ byte t9 = GETBYTE(t, i9);
+ byte tb = GETBYTE(t, ib);
+ byte td = GETBYTE(t, id);
+ byte te = GETBYTE(t, ie);
+ byte t0 = t9 ^ tb ^ td;
+ return t0 ^ AES_XTIME(AES_XTIME(AES_XTIME(t0 ^ te) ^ td ^ te) ^ tb ^ te);
+}
+#endif
-void AES_192_Key_Expansion(const unsigned char* userkey,
- unsigned char* key_schedule)
- XASM_LINK("AES_192_Key_Expansion");
+#if defined(HAVE_AES_CBC) || defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESGCM)
-void AES_256_Key_Expansion(const unsigned char* userkey,
- unsigned char* key_schedule)
- XASM_LINK("AES_256_Key_Expansion");
+#ifndef WC_CACHE_LINE_SZ
+ #if defined(__x86_64__) || defined(_M_X64) || \
+ (defined(__ILP32__) && (__ILP32__ >= 1))
+ #define WC_CACHE_LINE_SZ 64
+ #else
+ /* default cache line size */
+ #define WC_CACHE_LINE_SZ 32
+ #endif
+#endif
-static int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
- Aes* aes)
+#ifndef WOLFSSL_AES_SMALL_TABLES
+/* load 4 Te Tables into cache by cache line stride */
+static WC_INLINE word32 PreFetchTe(void)
{
- if (!userKey || !aes)
- return BAD_FUNC_ARG;
+ word32 x = 0;
+ int i,j;
- if (bits == 128) {
- AES_128_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 10;
- return 0;
- }
- else if (bits == 192) {
- AES_192_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 12;
- return 0;
- }
- else if (bits == 256) {
- AES_256_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 14;
- return 0;
+ for (i = 0; i < 4; i++) {
+ /* 256 elements, each one is 4 bytes */
+ for (j = 0; j < 256; j += WC_CACHE_LINE_SZ/4) {
+ x &= Te[i][j];
+ }
}
- return BAD_FUNC_ARG;
+ return x;
}
-
-
-static int AES_set_decrypt_key(const unsigned char* userKey, const int bits,
- Aes* aes)
+#else
+/* load sbox into cache by cache line stride */
+static WC_INLINE word32 PreFetchSBox(void)
{
- int nr;
- Aes temp_key;
- __m128i *Key_Schedule = (__m128i*)aes->key;
- __m128i *Temp_Key_Schedule = (__m128i*)temp_key.key;
-
- if (!userKey || !aes)
- return BAD_FUNC_ARG;
-
- if (AES_set_encrypt_key(userKey,bits,&temp_key) == BAD_FUNC_ARG)
- return BAD_FUNC_ARG;
-
- nr = temp_key.rounds;
- aes->rounds = nr;
-
- Key_Schedule[nr] = Temp_Key_Schedule[0];
- Key_Schedule[nr-1] = _mm_aesimc_si128(Temp_Key_Schedule[1]);
- Key_Schedule[nr-2] = _mm_aesimc_si128(Temp_Key_Schedule[2]);
- Key_Schedule[nr-3] = _mm_aesimc_si128(Temp_Key_Schedule[3]);
- Key_Schedule[nr-4] = _mm_aesimc_si128(Temp_Key_Schedule[4]);
- Key_Schedule[nr-5] = _mm_aesimc_si128(Temp_Key_Schedule[5]);
- Key_Schedule[nr-6] = _mm_aesimc_si128(Temp_Key_Schedule[6]);
- Key_Schedule[nr-7] = _mm_aesimc_si128(Temp_Key_Schedule[7]);
- Key_Schedule[nr-8] = _mm_aesimc_si128(Temp_Key_Schedule[8]);
- Key_Schedule[nr-9] = _mm_aesimc_si128(Temp_Key_Schedule[9]);
-
- if(nr>10) {
- Key_Schedule[nr-10] = _mm_aesimc_si128(Temp_Key_Schedule[10]);
- Key_Schedule[nr-11] = _mm_aesimc_si128(Temp_Key_Schedule[11]);
- }
+ word32 x = 0;
+ int i;
- if(nr>12) {
- Key_Schedule[nr-12] = _mm_aesimc_si128(Temp_Key_Schedule[12]);
- Key_Schedule[nr-13] = _mm_aesimc_si128(Temp_Key_Schedule[13]);
+ for (i = 0; i < 256; i += WC_CACHE_LINE_SZ/4) {
+ x &= Tsbox[i];
}
-
- Key_Schedule[0] = Temp_Key_Schedule[nr];
-
- return 0;
+ return x;
}
+#endif
-
-
-#endif /* WOLFSSL_AESNI */
-
-
+/* Software AES - ECB Encrypt */
static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
{
word32 s0, s1, s2, s3;
word32 t0, t1, t2, t3;
word32 r = aes->rounds >> 1;
-
const word32* rk = aes->key;
+
if (r > 7 || r == 0) {
WOLFSSL_MSG("AesEncrypt encountered improper key, set it up");
- return; /* stop instead of segfaulting, set up your keys! */
+ return; /* stop instead of seg-faulting, set up your keys! */
}
+
#ifdef WOLFSSL_AESNI
if (haveAESNI && aes->use_aesni) {
#ifdef DEBUG_AESNI
@@ -1086,17 +1639,20 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
#endif
/* check alignment, decrypt doesn't need alignment */
- if ((wolfssl_word)inBlock % 16) {
+ if ((wolfssl_word)inBlock % AESNI_ALIGN) {
#ifndef NO_WOLFSSL_ALLOC_ALIGN
- byte* tmp = (byte*)XMALLOC(AES_BLOCK_SIZE, NULL,
+ byte* tmp = (byte*)XMALLOC(AES_BLOCK_SIZE + AESNI_ALIGN, aes->heap,
DYNAMIC_TYPE_TMP_BUFFER);
+ byte* tmp_align;
if (tmp == NULL) return;
- XMEMCPY(tmp, inBlock, AES_BLOCK_SIZE);
- AES_ECB_encrypt(tmp, tmp, AES_BLOCK_SIZE, (byte*)aes->key,
- aes->rounds);
- XMEMCPY(outBlock, tmp, AES_BLOCK_SIZE);
- XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN));
+
+ XMEMCPY(tmp_align, inBlock, AES_BLOCK_SIZE);
+ AES_ECB_encrypt(tmp_align, tmp_align, AES_BLOCK_SIZE,
+ (byte*)aes->key, aes->rounds);
+ XMEMCPY(outBlock, tmp_align, AES_BLOCK_SIZE);
+ XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
return;
#else
WOLFSSL_MSG("AES-ECB encrypt with bad alignment");
@@ -1115,6 +1671,10 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
#endif
}
#endif
+#if defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES)
+ AES_ECB_encrypt(aes, inBlock, outBlock, AES_BLOCK_SIZE);
+ return;
+#endif
/*
* map byte array block to cipher state
@@ -1125,46 +1685,50 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
XMEMCPY(&s2, inBlock + 2 * sizeof(s0), sizeof(s2));
XMEMCPY(&s3, inBlock + 3 * sizeof(s0), sizeof(s3));
- #ifdef LITTLE_ENDIAN_ORDER
- s0 = ByteReverseWord32(s0);
- s1 = ByteReverseWord32(s1);
- s2 = ByteReverseWord32(s2);
- s3 = ByteReverseWord32(s3);
- #endif
+#ifdef LITTLE_ENDIAN_ORDER
+ s0 = ByteReverseWord32(s0);
+ s1 = ByteReverseWord32(s1);
+ s2 = ByteReverseWord32(s2);
+ s3 = ByteReverseWord32(s3);
+#endif
+ /* AddRoundKey */
s0 ^= rk[0];
s1 ^= rk[1];
s2 ^= rk[2];
s3 ^= rk[3];
+#ifndef WOLFSSL_AES_SMALL_TABLES
+ s0 |= PreFetchTe();
+
/*
* Nr - 1 full rounds:
*/
for (;;) {
t0 =
- Te[0][GETBYTE(s0, 3)] ^
- Te[1][GETBYTE(s1, 2)] ^
- Te[2][GETBYTE(s2, 1)] ^
- Te[3][GETBYTE(s3, 0)] ^
+ Te[0][GETBYTE(s0, 3)] ^
+ Te[1][GETBYTE(s1, 2)] ^
+ Te[2][GETBYTE(s2, 1)] ^
+ Te[3][GETBYTE(s3, 0)] ^
rk[4];
t1 =
- Te[0][GETBYTE(s1, 3)] ^
- Te[1][GETBYTE(s2, 2)] ^
- Te[2][GETBYTE(s3, 1)] ^
- Te[3][GETBYTE(s0, 0)] ^
+ Te[0][GETBYTE(s1, 3)] ^
+ Te[1][GETBYTE(s2, 2)] ^
+ Te[2][GETBYTE(s3, 1)] ^
+ Te[3][GETBYTE(s0, 0)] ^
rk[5];
t2 =
Te[0][GETBYTE(s2, 3)] ^
- Te[1][GETBYTE(s3, 2)] ^
- Te[2][GETBYTE(s0, 1)] ^
- Te[3][GETBYTE(s1, 0)] ^
+ Te[1][GETBYTE(s3, 2)] ^
+ Te[2][GETBYTE(s0, 1)] ^
+ Te[3][GETBYTE(s1, 0)] ^
rk[6];
t3 =
Te[0][GETBYTE(s3, 3)] ^
- Te[1][GETBYTE(s0, 2)] ^
- Te[2][GETBYTE(s1, 1)] ^
- Te[3][GETBYTE(s2, 0)] ^
+ Te[1][GETBYTE(s0, 2)] ^
+ Te[2][GETBYTE(s1, 1)] ^
+ Te[3][GETBYTE(s2, 0)] ^
rk[7];
rk += 8;
@@ -1204,44 +1768,158 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
*/
s0 =
- (Te[4][GETBYTE(t0, 3)] & 0xff000000) ^
- (Te[4][GETBYTE(t1, 2)] & 0x00ff0000) ^
- (Te[4][GETBYTE(t2, 1)] & 0x0000ff00) ^
- (Te[4][GETBYTE(t3, 0)] & 0x000000ff) ^
+ (Te[2][GETBYTE(t0, 3)] & 0xff000000) ^
+ (Te[3][GETBYTE(t1, 2)] & 0x00ff0000) ^
+ (Te[0][GETBYTE(t2, 1)] & 0x0000ff00) ^
+ (Te[1][GETBYTE(t3, 0)] & 0x000000ff) ^
rk[0];
s1 =
- (Te[4][GETBYTE(t1, 3)] & 0xff000000) ^
- (Te[4][GETBYTE(t2, 2)] & 0x00ff0000) ^
- (Te[4][GETBYTE(t3, 1)] & 0x0000ff00) ^
- (Te[4][GETBYTE(t0, 0)] & 0x000000ff) ^
+ (Te[2][GETBYTE(t1, 3)] & 0xff000000) ^
+ (Te[3][GETBYTE(t2, 2)] & 0x00ff0000) ^
+ (Te[0][GETBYTE(t3, 1)] & 0x0000ff00) ^
+ (Te[1][GETBYTE(t0, 0)] & 0x000000ff) ^
rk[1];
s2 =
- (Te[4][GETBYTE(t2, 3)] & 0xff000000) ^
- (Te[4][GETBYTE(t3, 2)] & 0x00ff0000) ^
- (Te[4][GETBYTE(t0, 1)] & 0x0000ff00) ^
- (Te[4][GETBYTE(t1, 0)] & 0x000000ff) ^
+ (Te[2][GETBYTE(t2, 3)] & 0xff000000) ^
+ (Te[3][GETBYTE(t3, 2)] & 0x00ff0000) ^
+ (Te[0][GETBYTE(t0, 1)] & 0x0000ff00) ^
+ (Te[1][GETBYTE(t1, 0)] & 0x000000ff) ^
rk[2];
s3 =
- (Te[4][GETBYTE(t3, 3)] & 0xff000000) ^
- (Te[4][GETBYTE(t0, 2)] & 0x00ff0000) ^
- (Te[4][GETBYTE(t1, 1)] & 0x0000ff00) ^
- (Te[4][GETBYTE(t2, 0)] & 0x000000ff) ^
+ (Te[2][GETBYTE(t3, 3)] & 0xff000000) ^
+ (Te[3][GETBYTE(t0, 2)] & 0x00ff0000) ^
+ (Te[0][GETBYTE(t1, 1)] & 0x0000ff00) ^
+ (Te[1][GETBYTE(t2, 0)] & 0x000000ff) ^
rk[3];
+#else
+ s0 |= PreFetchSBox();
+
+ r *= 2;
+ /* Two rounds at a time */
+ for (rk += 4; r > 1; r--, rk += 4) {
+ t0 =
+ ((word32)Tsbox[GETBYTE(s0, 3)] << 24) ^
+ ((word32)Tsbox[GETBYTE(s1, 2)] << 16) ^
+ ((word32)Tsbox[GETBYTE(s2, 1)] << 8) ^
+ ((word32)Tsbox[GETBYTE(s3, 0)]);
+ t1 =
+ ((word32)Tsbox[GETBYTE(s1, 3)] << 24) ^
+ ((word32)Tsbox[GETBYTE(s2, 2)] << 16) ^
+ ((word32)Tsbox[GETBYTE(s3, 1)] << 8) ^
+ ((word32)Tsbox[GETBYTE(s0, 0)]);
+ t2 =
+ ((word32)Tsbox[GETBYTE(s2, 3)] << 24) ^
+ ((word32)Tsbox[GETBYTE(s3, 2)] << 16) ^
+ ((word32)Tsbox[GETBYTE(s0, 1)] << 8) ^
+ ((word32)Tsbox[GETBYTE(s1, 0)]);
+ t3 =
+ ((word32)Tsbox[GETBYTE(s3, 3)] << 24) ^
+ ((word32)Tsbox[GETBYTE(s0, 2)] << 16) ^
+ ((word32)Tsbox[GETBYTE(s1, 1)] << 8) ^
+ ((word32)Tsbox[GETBYTE(s2, 0)]);
+
+ s0 =
+ (col_mul(t0, 3, 2, 0, 1) << 24) ^
+ (col_mul(t0, 2, 1, 0, 3) << 16) ^
+ (col_mul(t0, 1, 0, 2, 3) << 8) ^
+ (col_mul(t0, 0, 3, 2, 1) ) ^
+ rk[0];
+ s1 =
+ (col_mul(t1, 3, 2, 0, 1) << 24) ^
+ (col_mul(t1, 2, 1, 0, 3) << 16) ^
+ (col_mul(t1, 1, 0, 2, 3) << 8) ^
+ (col_mul(t1, 0, 3, 2, 1) ) ^
+ rk[1];
+ s2 =
+ (col_mul(t2, 3, 2, 0, 1) << 24) ^
+ (col_mul(t2, 2, 1, 0, 3) << 16) ^
+ (col_mul(t2, 1, 0, 2, 3) << 8) ^
+ (col_mul(t2, 0, 3, 2, 1) ) ^
+ rk[2];
+ s3 =
+ (col_mul(t3, 3, 2, 0, 1) << 24) ^
+ (col_mul(t3, 2, 1, 0, 3) << 16) ^
+ (col_mul(t3, 1, 0, 2, 3) << 8) ^
+ (col_mul(t3, 0, 3, 2, 1) ) ^
+ rk[3];
+ }
+
+ t0 =
+ ((word32)Tsbox[GETBYTE(s0, 3)] << 24) ^
+ ((word32)Tsbox[GETBYTE(s1, 2)] << 16) ^
+ ((word32)Tsbox[GETBYTE(s2, 1)] << 8) ^
+ ((word32)Tsbox[GETBYTE(s3, 0)]);
+ t1 =
+ ((word32)Tsbox[GETBYTE(s1, 3)] << 24) ^
+ ((word32)Tsbox[GETBYTE(s2, 2)] << 16) ^
+ ((word32)Tsbox[GETBYTE(s3, 1)] << 8) ^
+ ((word32)Tsbox[GETBYTE(s0, 0)]);
+ t2 =
+ ((word32)Tsbox[GETBYTE(s2, 3)] << 24) ^
+ ((word32)Tsbox[GETBYTE(s3, 2)] << 16) ^
+ ((word32)Tsbox[GETBYTE(s0, 1)] << 8) ^
+ ((word32)Tsbox[GETBYTE(s1, 0)]);
+ t3 =
+ ((word32)Tsbox[GETBYTE(s3, 3)] << 24) ^
+ ((word32)Tsbox[GETBYTE(s0, 2)] << 16) ^
+ ((word32)Tsbox[GETBYTE(s1, 1)] << 8) ^
+ ((word32)Tsbox[GETBYTE(s2, 0)]);
+ s0 = t0 ^ rk[0];
+ s1 = t1 ^ rk[1];
+ s2 = t2 ^ rk[2];
+ s3 = t3 ^ rk[3];
+#endif
/* write out */
- #ifdef LITTLE_ENDIAN_ORDER
- s0 = ByteReverseWord32(s0);
- s1 = ByteReverseWord32(s1);
- s2 = ByteReverseWord32(s2);
- s3 = ByteReverseWord32(s3);
- #endif
+#ifdef LITTLE_ENDIAN_ORDER
+ s0 = ByteReverseWord32(s0);
+ s1 = ByteReverseWord32(s1);
+ s2 = ByteReverseWord32(s2);
+ s3 = ByteReverseWord32(s3);
+#endif
XMEMCPY(outBlock, &s0, sizeof(s0));
XMEMCPY(outBlock + sizeof(s0), &s1, sizeof(s1));
XMEMCPY(outBlock + 2 * sizeof(s0), &s2, sizeof(s2));
XMEMCPY(outBlock + 3 * sizeof(s0), &s3, sizeof(s3));
+
}
+#endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT || HAVE_AESGCM */
+
+#if defined(HAVE_AES_DECRYPT)
+#if (defined(HAVE_AES_CBC) && !defined(WOLFSSL_DEVCRYPTO_CBC)) || \
+ defined(WOLFSSL_AES_DIRECT)
+
+#ifndef WOLFSSL_AES_SMALL_TABLES
+/* load 4 Td Tables into cache by cache line stride */
+static WC_INLINE word32 PreFetchTd(void)
+{
+ word32 x = 0;
+ int i,j;
+ for (i = 0; i < 4; i++) {
+ /* 256 elements, each one is 4 bytes */
+ for (j = 0; j < 256; j += WC_CACHE_LINE_SZ/4) {
+ x &= Td[i][j];
+ }
+ }
+ return x;
+}
+#endif
+
+/* load Td Table4 into cache by cache line stride */
+static WC_INLINE word32 PreFetchTd4(void)
+{
+ word32 x = 0;
+ int i;
+
+ for (i = 0; i < 256; i += WC_CACHE_LINE_SZ) {
+ x &= (word32)Td4[i];
+ }
+ return x;
+}
+
+/* Software AES - ECB Decrypt */
static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
{
word32 s0, s1, s2, s3;
@@ -1251,7 +1929,7 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
const word32* rk = aes->key;
if (r > 7 || r == 0) {
WOLFSSL_MSG("AesDecrypt encountered improper key, set it up");
- return; /* stop instead of segfaulting, set up your keys! */
+ return; /* stop instead of seg-faulting, set up your keys! */
}
#ifdef WOLFSSL_AESNI
if (haveAESNI && aes->use_aesni) {
@@ -1265,7 +1943,8 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
#endif
/* if input and output same will overwrite input iv */
- XMEMCPY(aes->tmp, inBlock, AES_BLOCK_SIZE);
+ if ((const byte*)aes->tmp != inBlock)
+ XMEMCPY(aes->tmp, inBlock, AES_BLOCK_SIZE);
AES_ECB_decrypt(inBlock, outBlock, AES_BLOCK_SIZE, (byte*)aes->key,
aes->rounds);
return;
@@ -1275,6 +1954,9 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
printf("Skipping AES-NI\n");
#endif
}
+#endif /* WOLFSSL_AESNI */
+#if defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES)
+ return AES_ECB_decrypt(aes, inBlock, outBlock, AES_BLOCK_SIZE);
#endif
/*
@@ -1286,18 +1968,21 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
XMEMCPY(&s2, inBlock + 2 * sizeof(s0), sizeof(s2));
XMEMCPY(&s3, inBlock + 3 * sizeof(s0), sizeof(s3));
- #ifdef LITTLE_ENDIAN_ORDER
- s0 = ByteReverseWord32(s0);
- s1 = ByteReverseWord32(s1);
- s2 = ByteReverseWord32(s2);
- s3 = ByteReverseWord32(s3);
- #endif
+#ifdef LITTLE_ENDIAN_ORDER
+ s0 = ByteReverseWord32(s0);
+ s1 = ByteReverseWord32(s1);
+ s2 = ByteReverseWord32(s2);
+ s3 = ByteReverseWord32(s3);
+#endif
s0 ^= rk[0];
s1 ^= rk[1];
s2 ^= rk[2];
s3 ^= rk[3];
+#ifndef WOLFSSL_AES_SMALL_TABLES
+ s0 |= PreFetchTd();
+
/*
* Nr - 1 full rounds:
*/
@@ -1362,70 +2047,170 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
* apply last round and
* map cipher state to byte array block:
*/
+
+ t0 |= PreFetchTd4();
+
s0 =
- (Td[4][GETBYTE(t0, 3)] & 0xff000000) ^
- (Td[4][GETBYTE(t3, 2)] & 0x00ff0000) ^
- (Td[4][GETBYTE(t2, 1)] & 0x0000ff00) ^
- (Td[4][GETBYTE(t1, 0)] & 0x000000ff) ^
+ ((word32)Td4[GETBYTE(t0, 3)] << 24) ^
+ ((word32)Td4[GETBYTE(t3, 2)] << 16) ^
+ ((word32)Td4[GETBYTE(t2, 1)] << 8) ^
+ ((word32)Td4[GETBYTE(t1, 0)]) ^
rk[0];
s1 =
- (Td[4][GETBYTE(t1, 3)] & 0xff000000) ^
- (Td[4][GETBYTE(t0, 2)] & 0x00ff0000) ^
- (Td[4][GETBYTE(t3, 1)] & 0x0000ff00) ^
- (Td[4][GETBYTE(t2, 0)] & 0x000000ff) ^
+ ((word32)Td4[GETBYTE(t1, 3)] << 24) ^
+ ((word32)Td4[GETBYTE(t0, 2)] << 16) ^
+ ((word32)Td4[GETBYTE(t3, 1)] << 8) ^
+ ((word32)Td4[GETBYTE(t2, 0)]) ^
rk[1];
s2 =
- (Td[4][GETBYTE(t2, 3)] & 0xff000000) ^
- (Td[4][GETBYTE(t1, 2)] & 0x00ff0000) ^
- (Td[4][GETBYTE(t0, 1)] & 0x0000ff00) ^
- (Td[4][GETBYTE(t3, 0)] & 0x000000ff) ^
+ ((word32)Td4[GETBYTE(t2, 3)] << 24) ^
+ ((word32)Td4[GETBYTE(t1, 2)] << 16) ^
+ ((word32)Td4[GETBYTE(t0, 1)] << 8) ^
+ ((word32)Td4[GETBYTE(t3, 0)]) ^
rk[2];
s3 =
- (Td[4][GETBYTE(t3, 3)] & 0xff000000) ^
- (Td[4][GETBYTE(t2, 2)] & 0x00ff0000) ^
- (Td[4][GETBYTE(t1, 1)] & 0x0000ff00) ^
- (Td[4][GETBYTE(t0, 0)] & 0x000000ff) ^
+ ((word32)Td4[GETBYTE(t3, 3)] << 24) ^
+ ((word32)Td4[GETBYTE(t2, 2)] << 16) ^
+ ((word32)Td4[GETBYTE(t1, 1)] << 8) ^
+ ((word32)Td4[GETBYTE(t0, 0)]) ^
rk[3];
+#else
+ s0 |= PreFetchTd4();
+
+ r *= 2;
+ for (rk += 4; r > 1; r--, rk += 4) {
+ t0 =
+ ((word32)Td4[GETBYTE(s0, 3)] << 24) ^
+ ((word32)Td4[GETBYTE(s3, 2)] << 16) ^
+ ((word32)Td4[GETBYTE(s2, 1)] << 8) ^
+ ((word32)Td4[GETBYTE(s1, 0)]) ^
+ rk[0];
+ t1 =
+ ((word32)Td4[GETBYTE(s1, 3)] << 24) ^
+ ((word32)Td4[GETBYTE(s0, 2)] << 16) ^
+ ((word32)Td4[GETBYTE(s3, 1)] << 8) ^
+ ((word32)Td4[GETBYTE(s2, 0)]) ^
+ rk[1];
+ t2 =
+ ((word32)Td4[GETBYTE(s2, 3)] << 24) ^
+ ((word32)Td4[GETBYTE(s1, 2)] << 16) ^
+ ((word32)Td4[GETBYTE(s0, 1)] << 8) ^
+ ((word32)Td4[GETBYTE(s3, 0)]) ^
+ rk[2];
+ t3 =
+ ((word32)Td4[GETBYTE(s3, 3)] << 24) ^
+ ((word32)Td4[GETBYTE(s2, 2)] << 16) ^
+ ((word32)Td4[GETBYTE(s1, 1)] << 8) ^
+ ((word32)Td4[GETBYTE(s0, 0)]) ^
+ rk[3];
+
+ s0 =
+ (inv_col_mul(t0, 0, 2, 1, 3) << 24) ^
+ (inv_col_mul(t0, 3, 1, 0, 2) << 16) ^
+ (inv_col_mul(t0, 2, 0, 3, 1) << 8) ^
+ (inv_col_mul(t0, 1, 3, 2, 0) );
+ s1 =
+ (inv_col_mul(t1, 0, 2, 1, 3) << 24) ^
+ (inv_col_mul(t1, 3, 1, 0, 2) << 16) ^
+ (inv_col_mul(t1, 2, 0, 3, 1) << 8) ^
+ (inv_col_mul(t1, 1, 3, 2, 0) );
+ s2 =
+ (inv_col_mul(t2, 0, 2, 1, 3) << 24) ^
+ (inv_col_mul(t2, 3, 1, 0, 2) << 16) ^
+ (inv_col_mul(t2, 2, 0, 3, 1) << 8) ^
+ (inv_col_mul(t2, 1, 3, 2, 0) );
+ s3 =
+ (inv_col_mul(t3, 0, 2, 1, 3) << 24) ^
+ (inv_col_mul(t3, 3, 1, 0, 2) << 16) ^
+ (inv_col_mul(t3, 2, 0, 3, 1) << 8) ^
+ (inv_col_mul(t3, 1, 3, 2, 0) );
+ }
+
+ t0 =
+ ((word32)Td4[GETBYTE(s0, 3)] << 24) ^
+ ((word32)Td4[GETBYTE(s3, 2)] << 16) ^
+ ((word32)Td4[GETBYTE(s2, 1)] << 8) ^
+ ((word32)Td4[GETBYTE(s1, 0)]);
+ t1 =
+ ((word32)Td4[GETBYTE(s1, 3)] << 24) ^
+ ((word32)Td4[GETBYTE(s0, 2)] << 16) ^
+ ((word32)Td4[GETBYTE(s3, 1)] << 8) ^
+ ((word32)Td4[GETBYTE(s2, 0)]);
+ t2 =
+ ((word32)Td4[GETBYTE(s2, 3)] << 24) ^
+ ((word32)Td4[GETBYTE(s1, 2)] << 16) ^
+ ((word32)Td4[GETBYTE(s0, 1)] << 8) ^
+ ((word32)Td4[GETBYTE(s3, 0)]);
+ t3 =
+ ((word32)Td4[GETBYTE(s3, 3)] << 24) ^
+ ((word32)Td4[GETBYTE(s2, 2)] << 16) ^
+ ((word32)Td4[GETBYTE(s1, 1)] << 8) ^
+ ((word32)Td4[GETBYTE(s0, 0)]);
+ s0 = t0 ^ rk[0];
+ s1 = t1 ^ rk[1];
+ s2 = t2 ^ rk[2];
+ s3 = t3 ^ rk[3];
+#endif
/* write out */
- #ifdef LITTLE_ENDIAN_ORDER
- s0 = ByteReverseWord32(s0);
- s1 = ByteReverseWord32(s1);
- s2 = ByteReverseWord32(s2);
- s3 = ByteReverseWord32(s3);
- #endif
+#ifdef LITTLE_ENDIAN_ORDER
+ s0 = ByteReverseWord32(s0);
+ s1 = ByteReverseWord32(s1);
+ s2 = ByteReverseWord32(s2);
+ s3 = ByteReverseWord32(s3);
+#endif
XMEMCPY(outBlock, &s0, sizeof(s0));
XMEMCPY(outBlock + sizeof(s0), &s1, sizeof(s1));
XMEMCPY(outBlock + 2 * sizeof(s0), &s2, sizeof(s2));
XMEMCPY(outBlock + 3 * sizeof(s0), &s3, sizeof(s3));
}
+#endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT */
+#endif /* HAVE_AES_DECRYPT */
#endif /* NEED_AES_TABLES */
+
/* wc_AesSetKey */
-#ifdef STM32F2_CRYPTO
- int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv,
- int dir)
+#if defined(STM32_CRYPTO)
+
+ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
+ const byte* iv, int dir)
{
- word32 *rk = aes->key;
+ word32 *rk;
- if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
+ (void)dir;
+
+ if (aes == NULL || (keylen != 16 &&
+ #ifdef WOLFSSL_AES_192
+ keylen != 24 &&
+ #endif
+ keylen != 32)) {
return BAD_FUNC_ARG;
+ }
+ rk = aes->key;
+ aes->keylen = keylen;
aes->rounds = keylen/4 + 6;
XMEMCPY(rk, userKey, keylen);
+ #if !defined(WOLFSSL_STM32_CUBEMX) || defined(STM32_HAL_V2)
ByteReverseWords(rk, rk, keylen);
+ #endif
+ #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \
+ defined(WOLFSSL_AES_OFB)
+ aes->left = 0;
+ #endif
return wc_AesSetIV(aes, iv);
}
-
- int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
- const byte* iv, int dir)
- {
- return wc_AesSetKey(aes, userKey, keylen, iv, dir);
- }
+ #if defined(WOLFSSL_AES_DIRECT)
+ int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
+ const byte* iv, int dir)
+ {
+ return wc_AesSetKey(aes, userKey, keylen, iv, dir);
+ }
+ #endif
#elif defined(HAVE_COLDFIRE_SEC)
#if defined (HAVE_THREADX)
@@ -1447,30 +2232,30 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
extern volatile unsigned char __MBAR[];
- int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv,
- int dir)
+ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
+ const byte* iv, int dir)
{
if (AESBuffIn == NULL) {
- #if defined (HAVE_THREADX)
- int s1, s2, s3, s4, s5 ;
- s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc,
- sizeof(SECdescriptorType), TX_NO_WAIT);
- s1 = tx_byte_allocate(&mp_ncached, (void *)&AESBuffIn,
- AES_BUFFER_SIZE, TX_NO_WAIT);
- s2 = tx_byte_allocate(&mp_ncached, (void *)&AESBuffOut,
- AES_BUFFER_SIZE, TX_NO_WAIT);
- s3 = tx_byte_allocate(&mp_ncached, (void *)&secKey,
- AES_BLOCK_SIZE*2, TX_NO_WAIT);
- s4 = tx_byte_allocate(&mp_ncached, (void *)&secReg,
- AES_BLOCK_SIZE, TX_NO_WAIT);
-
- if(s1 || s2 || s3 || s4 || s5)
- return BAD_FUNC_ARG;
- #else
- #warning "Allocate non-Cache buffers"
- #endif
+ #if defined (HAVE_THREADX)
+ int s1, s2, s3, s4, s5;
+ s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc,
+ sizeof(SECdescriptorType), TX_NO_WAIT);
+ s1 = tx_byte_allocate(&mp_ncached, (void *)&AESBuffIn,
+ AES_BUFFER_SIZE, TX_NO_WAIT);
+ s2 = tx_byte_allocate(&mp_ncached, (void *)&AESBuffOut,
+ AES_BUFFER_SIZE, TX_NO_WAIT);
+ s3 = tx_byte_allocate(&mp_ncached, (void *)&secKey,
+ AES_BLOCK_SIZE*2, TX_NO_WAIT);
+ s4 = tx_byte_allocate(&mp_ncached, (void *)&secReg,
+ AES_BLOCK_SIZE, TX_NO_WAIT);
+
+ if (s1 || s2 || s3 || s4 || s5)
+ return BAD_FUNC_ARG;
+ #else
+ #warning "Allocate non-Cache buffers"
+ #endif
- InitMutex(&Mutex_AesSEC);
+ wc_InitMutex(&Mutex_AesSEC);
}
if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
@@ -1479,29 +2264,165 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
if (aes == NULL)
return BAD_FUNC_ARG;
+ aes->keylen = keylen;
aes->rounds = keylen/4 + 6;
XMEMCPY(aes->key, userKey, keylen);
if (iv)
XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE);
+ #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \
+ defined(WOLFSSL_AES_OFB)
+ aes->left = 0;
+ #endif
+
return 0;
}
-#elif defined(FREESCALE_MMCAU)
+#elif defined(FREESCALE_LTC)
int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv,
int dir)
{
- byte *rk = (byte*)aes->key;
+ if (aes == NULL || !((keylen == 16) || (keylen == 24) || (keylen == 32)))
+ return BAD_FUNC_ARG;
+
+ aes->rounds = keylen/4 + 6;
+ XMEMCPY(aes->key, userKey, keylen);
+
+ #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \
+ defined(WOLFSSL_AES_OFB)
+ aes->left = 0;
+ #endif
+
+ return wc_AesSetIV(aes, iv);
+ }
+
+ int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
+ const byte* iv, int dir)
+ {
+ return wc_AesSetKey(aes, userKey, keylen, iv, dir);
+ }
+#elif defined(FREESCALE_MMCAU)
+ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
+ const byte* iv, int dir)
+ {
+ int ret;
+ byte* rk;
+ byte* tmpKey = (byte*)userKey;
+ int tmpKeyDynamic = 0;
+ word32 alignOffset = 0;
+
+ (void)dir;
if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
return BAD_FUNC_ARG;
+ if (aes == NULL)
+ return BAD_FUNC_ARG;
+ rk = (byte*)aes->key;
if (rk == NULL)
return BAD_FUNC_ARG;
+ #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \
+ defined(WOLFSSL_AES_OFB)
+ aes->left = 0;
+ #endif
+
aes->rounds = keylen/4 + 6;
- cau_aes_set_key(userKey, keylen*8, rk);
+ #ifdef FREESCALE_MMCAU_CLASSIC
+ if ((wolfssl_word)userKey % WOLFSSL_MMCAU_ALIGNMENT) {
+ #ifndef NO_WOLFSSL_ALLOC_ALIGN
+ byte* tmp = (byte*)XMALLOC(keylen + WOLFSSL_MMCAU_ALIGNMENT,
+ aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (tmp == NULL) {
+ return MEMORY_E;
+ }
+ alignOffset = WOLFSSL_MMCAU_ALIGNMENT -
+ ((wolfssl_word)tmp % WOLFSSL_MMCAU_ALIGNMENT);
+ tmpKey = tmp + alignOffset;
+ XMEMCPY(tmpKey, userKey, keylen);
+ tmpKeyDynamic = 1;
+ #else
+ WOLFSSL_MSG("Bad cau_aes_set_key alignment");
+ return BAD_ALIGN_E;
+ #endif
+ }
+ #endif
+
+ ret = wolfSSL_CryptHwMutexLock();
+ if(ret == 0) {
+ #ifdef FREESCALE_MMCAU_CLASSIC
+ cau_aes_set_key(tmpKey, keylen*8, rk);
+ #else
+ MMCAU_AES_SetKey(tmpKey, keylen, rk);
+ #endif
+ wolfSSL_CryptHwMutexUnLock();
+
+ ret = wc_AesSetIV(aes, iv);
+ }
+
+ if (tmpKeyDynamic == 1) {
+ XFREE(tmpKey - alignOffset, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+
+ return ret;
+ }
+
+ int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
+ const byte* iv, int dir)
+ {
+ return wc_AesSetKey(aes, userKey, keylen, iv, dir);
+ }
+
+#elif defined(WOLFSSL_NRF51_AES)
+ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
+ const byte* iv, int dir)
+ {
+ int ret;
+
+ (void)dir;
+ (void)iv;
+
+ if (aes == NULL || keylen != 16)
+ return BAD_FUNC_ARG;
+
+ aes->keylen = keylen;
+ aes->rounds = keylen/4 + 6;
+ ret = nrf51_aes_set_key(userKey);
+
+ #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \
+ defined(WOLFSSL_AES_OFB)
+ aes->left = 0;
+ #endif
+
+ return ret;
+ }
+
+ int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
+ const byte* iv, int dir)
+ {
+ return wc_AesSetKey(aes, userKey, keylen, iv, dir);
+ }
+#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES)
+
+ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
+ const byte* iv, int dir)
+ {
+ (void)dir;
+ (void)iv;
+
+ if (aes == NULL || (keylen != 16 && keylen != 24 && keylen != 32)) {
+ return BAD_FUNC_ARG;
+ }
+
+ aes->keylen = keylen;
+ aes->rounds = keylen/4 + 6;
+
+ XMEMCPY(aes->key, userKey, keylen);
+ #if defined(WOLFSSL_AES_COUNTER)
+ aes->left = 0;
+ #endif
return wc_AesSetIV(aes, iv);
}
@@ -1510,51 +2431,141 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
{
return wc_AesSetKey(aes, userKey, keylen, iv, dir);
}
+#elif defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES)
+
+ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv,
+ int dir)
+ {
+ SaSiError_t ret = SASI_OK;
+ SaSiAesIv_t iv_aes;
+
+ if (aes == NULL ||
+ (keylen != AES_128_KEY_SIZE &&
+ keylen != AES_192_KEY_SIZE &&
+ keylen != AES_256_KEY_SIZE)) {
+ return BAD_FUNC_ARG;
+ }
+ #if defined(AES_MAX_KEY_SIZE)
+ if (keylen > (AES_MAX_KEY_SIZE/8)) {
+ return BAD_FUNC_ARG;
+ }
+ #endif
+ if (dir != AES_ENCRYPTION &&
+ dir != AES_DECRYPTION) {
+ return BAD_FUNC_ARG;
+ }
+
+ if (dir == AES_ENCRYPTION) {
+ aes->ctx.mode = SASI_AES_ENCRYPT;
+ SaSi_AesInit(&aes->ctx.user_ctx,
+ SASI_AES_ENCRYPT,
+ SASI_AES_MODE_CBC,
+ SASI_AES_PADDING_NONE);
+ }
+ else {
+ aes->ctx.mode = SASI_AES_DECRYPT;
+ SaSi_AesInit(&aes->ctx.user_ctx,
+ SASI_AES_DECRYPT,
+ SASI_AES_MODE_CBC,
+ SASI_AES_PADDING_NONE);
+ }
+
+ aes->keylen = keylen;
+ aes->rounds = keylen/4 + 6;
+ XMEMCPY(aes->key, userKey, keylen);
+
+ aes->ctx.key.pKey = (uint8_t*)aes->key;
+ aes->ctx.key.keySize= keylen;
+
+ ret = SaSi_AesSetKey(&aes->ctx.user_ctx,
+ SASI_AES_USER_KEY,
+ &aes->ctx.key,
+ sizeof(aes->ctx.key));
+ if (ret != SASI_OK) {
+ return BAD_FUNC_ARG;
+ }
+
+ ret = wc_AesSetIV(aes, iv);
+
+ if (iv)
+ XMEMCPY(iv_aes, iv, AES_BLOCK_SIZE);
+ else
+ XMEMSET(iv_aes, 0, AES_BLOCK_SIZE);
+
+
+ ret = SaSi_AesSetIv(&aes->ctx.user_ctx, iv_aes);
+ if (ret != SASI_OK) {
+ return ret;
+ }
+ return ret;
+ }
+ #if defined(WOLFSSL_AES_DIRECT)
+ int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
+ const byte* iv, int dir)
+ {
+ return wc_AesSetKey(aes, userKey, keylen, iv, dir);
+ }
+ #endif
+
+#elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)
+ /* implemented in wolfcrypt/src/port/caam/caam_aes.c */
+
+#elif defined(WOLFSSL_AFALG)
+ /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */
+
+#elif defined(WOLFSSL_DEVCRYPTO_AES)
+ /* implemented in wolfcrypt/src/port/devcrypto/devcrypto_aes.c */
+
#else
+
+ /* Software AES - SetKey */
static int wc_AesSetKeyLocal(Aes* aes, const byte* userKey, word32 keylen,
const byte* iv, int dir)
{
- word32 temp, *rk = aes->key;
+ word32 *rk = aes->key;
+ #ifdef NEED_AES_TABLES
+ word32 temp;
unsigned int i = 0;
+ #endif
#ifdef WOLFSSL_AESNI
aes->use_aesni = 0;
#endif /* WOLFSSL_AESNI */
- #ifdef WOLFSSL_AES_COUNTER
+ #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \
+ defined(WOLFSSL_AES_OFB)
aes->left = 0;
- #endif /* WOLFSSL_AES_COUNTER */
+ #endif
- aes->rounds = keylen/4 + 6;
+ aes->keylen = keylen;
+ aes->rounds = (keylen/4) + 6;
XMEMCPY(rk, userKey, keylen);
- #ifdef LITTLE_ENDIAN_ORDER
- ByteReverseWords(rk, rk, keylen);
- #endif
-
- #ifdef WOLFSSL_PIC32MZ_CRYPT
- {
- word32 *akey1 = aes->key_ce;
- word32 *areg = aes->iv_ce ;
- aes->keylen = keylen ;
- XMEMCPY(akey1, userKey, keylen);
- if (iv)
- XMEMCPY(areg, iv, AES_BLOCK_SIZE);
- else
- XMEMSET(areg, 0, AES_BLOCK_SIZE);
- }
- #endif
+ #if defined(LITTLE_ENDIAN_ORDER) && !defined(WOLFSSL_PIC32MZ_CRYPT) && \
+ (!defined(WOLFSSL_ESP32WROOM32_CRYPT) || \
+ defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES))
+ ByteReverseWords(rk, rk, keylen);
+ #endif
- switch(keylen)
- {
+#ifdef NEED_AES_TABLES
+ switch (keylen) {
+ #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 128 && \
+ defined(WOLFSSL_AES_128)
case 16:
while (1)
{
temp = rk[3];
rk[4] = rk[0] ^
- (Te[4][GETBYTE(temp, 2)] & 0xff000000) ^
- (Te[4][GETBYTE(temp, 1)] & 0x00ff0000) ^
- (Te[4][GETBYTE(temp, 0)] & 0x0000ff00) ^
- (Te[4][GETBYTE(temp, 3)] & 0x000000ff) ^
+ #ifndef WOLFSSL_AES_SMALL_TABLES
+ (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^
+ (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^
+ (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^
+ (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^
+ #else
+ ((word32)Tsbox[GETBYTE(temp, 2)] << 24) ^
+ ((word32)Tsbox[GETBYTE(temp, 1)] << 16) ^
+ ((word32)Tsbox[GETBYTE(temp, 0)] << 8) ^
+ ((word32)Tsbox[GETBYTE(temp, 3)]) ^
+ #endif
rcon[i];
rk[5] = rk[1] ^ rk[4];
rk[6] = rk[2] ^ rk[5];
@@ -1564,17 +2575,27 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
rk += 4;
}
break;
+ #endif /* 128 */
+ #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 192 && \
+ defined(WOLFSSL_AES_192)
case 24:
/* for (;;) here triggers a bug in VC60 SP4 w/ Pro Pack */
while (1)
{
temp = rk[ 5];
rk[ 6] = rk[ 0] ^
- (Te[4][GETBYTE(temp, 2)] & 0xff000000) ^
- (Te[4][GETBYTE(temp, 1)] & 0x00ff0000) ^
- (Te[4][GETBYTE(temp, 0)] & 0x0000ff00) ^
- (Te[4][GETBYTE(temp, 3)] & 0x000000ff) ^
+ #ifndef WOLFSSL_AES_SMALL_TABLES
+ (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^
+ (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^
+ (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^
+ (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^
+ #else
+ ((word32)Tsbox[GETBYTE(temp, 2)] << 24) ^
+ ((word32)Tsbox[GETBYTE(temp, 1)] << 16) ^
+ ((word32)Tsbox[GETBYTE(temp, 0)] << 8) ^
+ ((word32)Tsbox[GETBYTE(temp, 3)]) ^
+ #endif
rcon[i];
rk[ 7] = rk[ 1] ^ rk[ 6];
rk[ 8] = rk[ 2] ^ rk[ 7];
@@ -1586,16 +2607,26 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
rk += 6;
}
break;
+ #endif /* 192 */
+ #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 256 && \
+ defined(WOLFSSL_AES_256)
case 32:
while (1)
{
temp = rk[ 7];
rk[ 8] = rk[ 0] ^
- (Te[4][GETBYTE(temp, 2)] & 0xff000000) ^
- (Te[4][GETBYTE(temp, 1)] & 0x00ff0000) ^
- (Te[4][GETBYTE(temp, 0)] & 0x0000ff00) ^
- (Te[4][GETBYTE(temp, 3)] & 0x000000ff) ^
+ #ifndef WOLFSSL_AES_SMALL_TABLES
+ (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^
+ (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^
+ (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^
+ (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^
+ #else
+ ((word32)Tsbox[GETBYTE(temp, 2)] << 24) ^
+ ((word32)Tsbox[GETBYTE(temp, 1)] << 16) ^
+ ((word32)Tsbox[GETBYTE(temp, 0)] << 8) ^
+ ((word32)Tsbox[GETBYTE(temp, 3)]) ^
+ #endif
rcon[i];
rk[ 9] = rk[ 1] ^ rk[ 8];
rk[10] = rk[ 2] ^ rk[ 9];
@@ -1604,10 +2635,17 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
break;
temp = rk[11];
rk[12] = rk[ 4] ^
- (Te[4][GETBYTE(temp, 3)] & 0xff000000) ^
- (Te[4][GETBYTE(temp, 2)] & 0x00ff0000) ^
- (Te[4][GETBYTE(temp, 1)] & 0x0000ff00) ^
- (Te[4][GETBYTE(temp, 0)] & 0x000000ff);
+ #ifndef WOLFSSL_AES_SMALL_TABLES
+ (Te[2][GETBYTE(temp, 3)] & 0xff000000) ^
+ (Te[3][GETBYTE(temp, 2)] & 0x00ff0000) ^
+ (Te[0][GETBYTE(temp, 1)] & 0x0000ff00) ^
+ (Te[1][GETBYTE(temp, 0)] & 0x000000ff);
+ #else
+ ((word32)Tsbox[GETBYTE(temp, 3)] << 24) ^
+ ((word32)Tsbox[GETBYTE(temp, 2)] << 16) ^
+ ((word32)Tsbox[GETBYTE(temp, 1)] << 8) ^
+ ((word32)Tsbox[GETBYTE(temp, 0)]);
+ #endif
rk[13] = rk[ 5] ^ rk[12];
rk[14] = rk[ 6] ^ rk[13];
rk[15] = rk[ 7] ^ rk[14];
@@ -1615,13 +2653,14 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
rk += 8;
}
break;
+ #endif /* 256 */
default:
return BAD_FUNC_ARG;
- }
+ } /* switch */
- if (dir == AES_DECRYPTION)
- {
+ #if defined(HAVE_AES_DECRYPT)
+ if (dir == AES_DECRYPTION) {
unsigned int j;
rk = aes->key;
@@ -1632,78 +2671,169 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
}
+ #if !defined(WOLFSSL_AES_SMALL_TABLES)
/* apply the inverse MixColumn transform to all round keys but the
first and the last: */
for (i = 1; i < aes->rounds; i++) {
rk += 4;
rk[0] =
- Td[0][Te[4][GETBYTE(rk[0], 3)] & 0xff] ^
- Td[1][Te[4][GETBYTE(rk[0], 2)] & 0xff] ^
- Td[2][Te[4][GETBYTE(rk[0], 1)] & 0xff] ^
- Td[3][Te[4][GETBYTE(rk[0], 0)] & 0xff];
+ Td[0][Te[1][GETBYTE(rk[0], 3)] & 0xff] ^
+ Td[1][Te[1][GETBYTE(rk[0], 2)] & 0xff] ^
+ Td[2][Te[1][GETBYTE(rk[0], 1)] & 0xff] ^
+ Td[3][Te[1][GETBYTE(rk[0], 0)] & 0xff];
rk[1] =
- Td[0][Te[4][GETBYTE(rk[1], 3)] & 0xff] ^
- Td[1][Te[4][GETBYTE(rk[1], 2)] & 0xff] ^
- Td[2][Te[4][GETBYTE(rk[1], 1)] & 0xff] ^
- Td[3][Te[4][GETBYTE(rk[1], 0)] & 0xff];
+ Td[0][Te[1][GETBYTE(rk[1], 3)] & 0xff] ^
+ Td[1][Te[1][GETBYTE(rk[1], 2)] & 0xff] ^
+ Td[2][Te[1][GETBYTE(rk[1], 1)] & 0xff] ^
+ Td[3][Te[1][GETBYTE(rk[1], 0)] & 0xff];
rk[2] =
- Td[0][Te[4][GETBYTE(rk[2], 3)] & 0xff] ^
- Td[1][Te[4][GETBYTE(rk[2], 2)] & 0xff] ^
- Td[2][Te[4][GETBYTE(rk[2], 1)] & 0xff] ^
- Td[3][Te[4][GETBYTE(rk[2], 0)] & 0xff];
+ Td[0][Te[1][GETBYTE(rk[2], 3)] & 0xff] ^
+ Td[1][Te[1][GETBYTE(rk[2], 2)] & 0xff] ^
+ Td[2][Te[1][GETBYTE(rk[2], 1)] & 0xff] ^
+ Td[3][Te[1][GETBYTE(rk[2], 0)] & 0xff];
rk[3] =
- Td[0][Te[4][GETBYTE(rk[3], 3)] & 0xff] ^
- Td[1][Te[4][GETBYTE(rk[3], 2)] & 0xff] ^
- Td[2][Te[4][GETBYTE(rk[3], 1)] & 0xff] ^
- Td[3][Te[4][GETBYTE(rk[3], 0)] & 0xff];
+ Td[0][Te[1][GETBYTE(rk[3], 3)] & 0xff] ^
+ Td[1][Te[1][GETBYTE(rk[3], 2)] & 0xff] ^
+ Td[2][Te[1][GETBYTE(rk[3], 1)] & 0xff] ^
+ Td[3][Te[1][GETBYTE(rk[3], 0)] & 0xff];
}
+ #endif
+ }
+ #else
+ (void)dir;
+ #endif /* HAVE_AES_DECRYPT */
+ (void)temp;
+#endif /* NEED_AES_TABLES */
+
+#if defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES)
+ XMEMCPY((byte*)aes->key, userKey, keylen);
+ if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag == CRYPTO_WORD_ENDIAN_BIG) {
+ ByteReverseWords(aes->key, aes->key, 32);
}
+#endif
return wc_AesSetIV(aes, iv);
}
- int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv,
- int dir)
+ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
+ const byte* iv, int dir)
{
+ int ret;
+ #if defined(AES_MAX_KEY_SIZE)
+ const word32 max_key_len = (AES_MAX_KEY_SIZE / 8);
+ #endif
- if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
+ #ifdef WOLFSSL_IMX6_CAAM_BLOB
+ byte local[32];
+ word32 localSz = 32;
+
+ if (keylen == (16 + WC_CAAM_BLOB_SZ) ||
+ keylen == (24 + WC_CAAM_BLOB_SZ) ||
+ keylen == (32 + WC_CAAM_BLOB_SZ)) {
+ if (wc_caamOpenBlob((byte*)userKey, keylen, local, &localSz) != 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* set local values */
+ userKey = local;
+ keylen = localSz;
+ }
+ #endif
+ if (aes == NULL ||
+ !((keylen == 16) || (keylen == 24) || (keylen == 32))) {
return BAD_FUNC_ARG;
+ }
- #ifdef HAVE_CAVIUM
- if (aes->magic == WOLFSSL_AES_CAVIUM_MAGIC)
- return wc_AesCaviumSetKey(aes, userKey, keylen, iv);
+ #if defined(AES_MAX_KEY_SIZE)
+ /* Check key length */
+ if (keylen > max_key_len) {
+ return BAD_FUNC_ARG;
+ }
+ #endif
+ aes->keylen = keylen;
+ aes->rounds = keylen/4 + 6;
+
+ #if defined(WOLF_CRYPTO_CB) || (defined(WOLFSSL_DEVCRYPTO) && \
+ (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC))) || \
+ (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES))
+ #ifdef WOLF_CRYPTO_CB
+ if (aes->devId != INVALID_DEVID)
#endif
+ {
+ XMEMCPY(aes->devKey, userKey, keylen);
+ }
+ #endif
- #ifdef WOLFSSL_AESNI
+ #ifdef WOLFSSL_AESNI
if (checkAESNI == 0) {
haveAESNI = Check_CPU_support_AES();
checkAESNI = 1;
}
if (haveAESNI) {
+ #if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB) || \
+ defined(WOLFSSL_AES_OFB)
+ aes->left = 0;
+ #endif /* WOLFSSL_AES_COUNTER */
aes->use_aesni = 1;
if (iv)
XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE);
+ else
+ XMEMSET(aes->reg, 0, AES_BLOCK_SIZE);
if (dir == AES_ENCRYPTION)
return AES_set_encrypt_key(userKey, keylen * 8, aes);
+ #ifdef HAVE_AES_DECRYPT
else
return AES_set_decrypt_key(userKey, keylen * 8, aes);
+ #endif
}
- #endif /* WOLFSSL_AESNI */
+ #endif /* WOLFSSL_AESNI */
+
+ ret = wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir);
- return wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir);
+ #if defined(WOLFSSL_DEVCRYPTO) && \
+ (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC))
+ aes->ctx.cfd = -1;
+ #endif
+ #ifdef WOLFSSL_IMX6_CAAM_BLOB
+ ForceZero(local, sizeof(local));
+ #endif
+ return ret;
}
#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
+ /* AES-CTR and AES-DIRECT need to use this for key setup, no aesni yet */
+ int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
+ const byte* iv, int dir)
+ {
+ int ret;
- /* AES-CTR and AES-DIRECT need to use this for key setup, no aesni yet */
- int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
- const byte* iv, int dir)
- {
- return wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir);
- }
+ #ifdef WOLFSSL_IMX6_CAAM_BLOB
+ byte local[32];
+ word32 localSz = 32;
+
+ if (keylen == (16 + WC_CAAM_BLOB_SZ) ||
+ keylen == (24 + WC_CAAM_BLOB_SZ) ||
+ keylen == (32 + WC_CAAM_BLOB_SZ)) {
+ if (wc_caamOpenBlob((byte*)userKey, keylen, local, &localSz)
+ != 0) {
+ return BAD_FUNC_ARG;
+ }
+ /* set local values */
+ userKey = local;
+ keylen = localSz;
+ }
+ #endif
+ ret = wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir);
+
+ #ifdef WOLFSSL_IMX6_CAAM_BLOB
+ ForceZero(local, sizeof(local));
+ #endif
+
+ return ret;
+ }
#endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
-#endif /* STM32F2_CRYPTO, wc_AesSetKey block */
+#endif /* wc_AesSetKey block */
/* wc_AesSetIV is shared between software and hardware */
@@ -1720,160 +2850,265 @@ int wc_AesSetIV(Aes* aes, const byte* iv)
return 0;
}
-
-int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz,
- const byte* key, word32 keySz, const byte* iv)
-{
- int ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
- Aes* aes = NULL;
-#else
- Aes aes[1];
-#endif
-
-#ifdef WOLFSSL_SMALL_STACK
- aes = (Aes*)XMALLOC(sizeof(Aes), NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (aes == NULL)
- return MEMORY_E;
-#endif
-
- ret = wc_AesSetKey(aes, key, keySz, iv, AES_DECRYPTION);
- if (ret == 0)
- ret = wc_AesCbcDecrypt(aes, out, in, inSz);
-
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(aes, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
- return ret;
-}
-
-
/* AES-DIRECT */
#if defined(WOLFSSL_AES_DIRECT)
- #if defined(FREESCALE_MMCAU)
+ #if defined(HAVE_COLDFIRE_SEC)
+ #error "Coldfire SEC doesn't yet support AES direct"
+ #elif defined(FREESCALE_LTC)
/* Allow direct access to one block encrypt */
void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in)
{
- byte* key;
+ byte *key;
+ uint32_t keySize;
+
key = (byte*)aes->key;
+ wc_AesGetKeySize(aes, &keySize);
- return cau_aes_encrypt(in, key, aes->rounds, out);
+ LTC_AES_EncryptEcb(LTC_BASE, in, out, AES_BLOCK_SIZE,
+ key, keySize);
}
/* Allow direct access to one block decrypt */
void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in)
{
- byte* key;
+ byte *key;
+ uint32_t keySize;
+
key = (byte*)aes->key;
+ wc_AesGetKeySize(aes, &keySize);
- return cau_aes_decrypt(in, key, aes->rounds, out);
+ LTC_AES_DecryptEcb(LTC_BASE, in, out, AES_BLOCK_SIZE,
+ key, keySize, kLTC_EncryptKey);
}
- #elif defined(STM32F2_CRYPTO)
- #error "STM32F2 crypto doesn't yet support AES direct"
+ #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)
+ /* implemented in wolfcrypt/src/port/caam/caam_aes.c */
- #elif defined(HAVE_COLDFIRE_SEC)
- #error "Coldfire SEC doesn't yet support AES direct"
+ #elif defined(WOLFSSL_AFALG)
+ /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */
- #elif defined(WOLFSSL_PIC32MZ_CRYPT)
- #error "PIC32MZ doesn't yet support AES direct"
+ #elif defined(WOLFSSL_DEVCRYPTO_AES)
+ /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */
+
+ #elif defined(STM32_CRYPTO)
+ /* Allow direct access to one block encrypt */
+ void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in)
+ {
+ if (wolfSSL_CryptHwMutexLock() == 0) {
+ wc_AesEncrypt(aes, in, out);
+ wolfSSL_CryptHwMutexUnLock();
+ }
+ }
+ #ifdef HAVE_AES_DECRYPT
+ /* Allow direct access to one block decrypt */
+ void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in)
+ {
+ if (wolfSSL_CryptHwMutexLock() == 0) {
+ wc_AesDecrypt(aes, in, out);
+ wolfSSL_CryptHwMutexUnLock();
+ }
+ }
+ #endif /* HAVE_AES_DECRYPT */
+ #elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES)
+
+ /* Allow direct access to one block encrypt */
+ void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in)
+ {
+ wc_AesEncrypt(aes, in, out);
+ }
+ #ifdef HAVE_AES_DECRYPT
+ /* Allow direct access to one block decrypt */
+ void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in)
+ {
+ wc_AesDecrypt(aes, in, out);
+ }
+ #endif /* HAVE_AES_DECRYPT */
#else
/* Allow direct access to one block encrypt */
void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in)
{
wc_AesEncrypt(aes, in, out);
}
-
+ #ifdef HAVE_AES_DECRYPT
/* Allow direct access to one block decrypt */
void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in)
{
wc_AesDecrypt(aes, in, out);
}
-
- #endif /* FREESCALE_MMCAU, AES direct block */
+ #endif /* HAVE_AES_DECRYPT */
+ #endif /* AES direct block */
#endif /* WOLFSSL_AES_DIRECT */
/* AES-CBC */
-#ifdef STM32F2_CRYPTO
+#ifdef HAVE_AES_CBC
+#if defined(STM32_CRYPTO)
+
+#ifdef WOLFSSL_STM32_CUBEMX
int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
{
- word32 *enc_key, *iv;
- CRYP_InitTypeDef AES_CRYP_InitStructure;
- CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure;
- CRYP_IVInitTypeDef AES_CRYP_IVInitStructure;
+ int ret = 0;
+ word32 blocks = (sz / AES_BLOCK_SIZE);
+ CRYP_HandleTypeDef hcryp;
- enc_key = aes->key;
- iv = aes->reg;
+ ret = wc_Stm32_Aes_Init(aes, &hcryp);
+ if (ret != 0)
+ return ret;
- /* crypto structure initialization */
- CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure);
- CRYP_StructInit(&AES_CRYP_InitStructure);
- CRYP_IVStructInit(&AES_CRYP_IVInitStructure);
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret != 0) {
+ return ret;
+ }
- /* reset registers to their default values */
- CRYP_DeInit();
+ #ifdef STM32_CRYPTO_AES_ONLY
+ hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT;
+ hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_CBC;
+ hcryp.Init.KeyWriteFlag = CRYP_KEY_WRITE_ENABLE;
+ #elif defined(STM32_HAL_V2)
+ hcryp.Init.Algorithm = CRYP_AES_CBC;
+ ByteReverseWords(aes->reg, aes->reg, AES_BLOCK_SIZE);
+ #endif
+ hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg;
+ HAL_CRYP_Init(&hcryp);
- /* load key into correct registers */
- switch(aes->rounds)
- {
- case 10: /* 128-bit key */
- AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b;
- AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[0];
- AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1];
- AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[2];
- AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3];
+ while (blocks--) {
+ #ifdef STM32_CRYPTO_AES_ONLY
+ ret = HAL_CRYPEx_AES(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE,
+ out, STM32_HAL_TIMEOUT);
+ #elif defined(STM32_HAL_V2)
+ ret = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)in, AES_BLOCK_SIZE,
+ (uint32_t*)out, STM32_HAL_TIMEOUT);
+ #else
+ ret = HAL_CRYP_AESCBC_Encrypt(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE,
+ out, STM32_HAL_TIMEOUT);
+ #endif
+ if (ret != HAL_OK) {
+ ret = WC_TIMEOUT_E;
break;
+ }
- case 12: /* 192-bit key */
- AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b;
- AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[0];
- AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1];
- AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[2];
- AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3];
- AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[4];
- AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5];
- break;
+ /* store iv for next call */
+ XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
- case 14: /* 256-bit key */
- AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b;
- AES_CRYP_KeyInitStructure.CRYP_Key0Left = enc_key[0];
- AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1];
- AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[2];
- AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3];
- AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[4];
- AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5];
- AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[6];
- AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7];
- break;
+ sz -= AES_BLOCK_SIZE;
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ }
- default:
+ HAL_CRYP_DeInit(&hcryp);
+
+ wolfSSL_CryptHwMutexUnLock();
+
+ return ret;
+ }
+ #ifdef HAVE_AES_DECRYPT
+ int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+ {
+ int ret = 0;
+ word32 blocks = (sz / AES_BLOCK_SIZE);
+ CRYP_HandleTypeDef hcryp;
+
+ ret = wc_Stm32_Aes_Init(aes, &hcryp);
+ if (ret != 0)
+ return ret;
+
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret != 0) {
+ return ret;
+ }
+
+ /* if input and output same will overwrite input iv */
+ XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+
+ #ifdef STM32_CRYPTO_AES_ONLY
+ hcryp.Init.OperatingMode = CRYP_ALGOMODE_KEYDERIVATION_DECRYPT;
+ hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_CBC;
+ hcryp.Init.KeyWriteFlag = CRYP_KEY_WRITE_ENABLE;
+ #elif defined(STM32_HAL_V2)
+ hcryp.Init.Algorithm = CRYP_AES_CBC;
+ ByteReverseWords(aes->reg, aes->reg, AES_BLOCK_SIZE);
+ #endif
+
+ hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg;
+ HAL_CRYP_Init(&hcryp);
+
+ while (blocks--) {
+ #ifdef STM32_CRYPTO_AES_ONLY
+ ret = HAL_CRYPEx_AES(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE,
+ out, STM32_HAL_TIMEOUT);
+ #elif defined(STM32_HAL_V2)
+ ret = HAL_CRYP_Decrypt(&hcryp, (uint32_t*)in, AES_BLOCK_SIZE,
+ (uint32_t*)out, STM32_HAL_TIMEOUT);
+ #else
+ ret = HAL_CRYP_AESCBC_Decrypt(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE,
+ out, STM32_HAL_TIMEOUT);
+ #endif
+ if (ret != HAL_OK) {
+ ret = WC_TIMEOUT_E;
break;
+ }
+
+ /* store iv for next call */
+ XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
+
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ }
+
+ HAL_CRYP_DeInit(&hcryp);
+ wolfSSL_CryptHwMutexUnLock();
+
+ return ret;
+ }
+ #endif /* HAVE_AES_DECRYPT */
+
+#else /* STD_PERI_LIB */
+ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+ {
+ int ret;
+ word32 *iv;
+ word32 blocks = (sz / AES_BLOCK_SIZE);
+ CRYP_InitTypeDef cryptInit;
+ CRYP_KeyInitTypeDef keyInit;
+ CRYP_IVInitTypeDef ivInit;
+
+ ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit);
+ if (ret != 0)
+ return ret;
+
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret != 0) {
+ return ret;
}
- CRYP_KeyInit(&AES_CRYP_KeyInitStructure);
+
+ /* reset registers to their default values */
+ CRYP_DeInit();
+
+ /* set key */
+ CRYP_KeyInit(&keyInit);
/* set iv */
+ iv = aes->reg;
+ CRYP_IVStructInit(&ivInit);
ByteReverseWords(iv, iv, AES_BLOCK_SIZE);
- AES_CRYP_IVInitStructure.CRYP_IV0Left = iv[0];
- AES_CRYP_IVInitStructure.CRYP_IV0Right = iv[1];
- AES_CRYP_IVInitStructure.CRYP_IV1Left = iv[2];
- AES_CRYP_IVInitStructure.CRYP_IV1Right = iv[3];
- CRYP_IVInit(&AES_CRYP_IVInitStructure);
-
- /* set direction, mode, and datatype */
- AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt;
- AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC;
- AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
- CRYP_Init(&AES_CRYP_InitStructure);
+ ivInit.CRYP_IV0Left = iv[0];
+ ivInit.CRYP_IV0Right = iv[1];
+ ivInit.CRYP_IV1Left = iv[2];
+ ivInit.CRYP_IV1Right = iv[3];
+ CRYP_IVInit(&ivInit);
+
+ /* set direction and mode */
+ cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt;
+ cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC;
+ CRYP_Init(&cryptInit);
/* enable crypto processor */
CRYP_Cmd(ENABLE);
- while (sz > 0)
- {
+ while (blocks--) {
/* flush IN/OUT FIFOs */
CRYP_FIFOFlush();
@@ -1883,7 +3118,7 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz,
CRYP_DataIn(*(uint32_t*)&in[12]);
/* wait until the complete message has been processed */
- while(CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
+ while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
*(uint32_t*)&out[0] = CRYP_DataOut();
*(uint32_t*)&out[4] = CRYP_DataOut();
@@ -1893,31 +3128,36 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz,
/* store iv for next call */
XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
- sz -= 16;
- in += 16;
- out += 16;
+ sz -= AES_BLOCK_SIZE;
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
}
/* disable crypto processor */
CRYP_Cmd(DISABLE);
+ wolfSSL_CryptHwMutexUnLock();
- return 0;
+ return ret;
}
+ #ifdef HAVE_AES_DECRYPT
int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
{
- word32 *dec_key, *iv;
- CRYP_InitTypeDef AES_CRYP_InitStructure;
- CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure;
- CRYP_IVInitTypeDef AES_CRYP_IVInitStructure;
-
- dec_key = aes->key;
- iv = aes->reg;
-
- /* crypto structure initialization */
- CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure);
- CRYP_StructInit(&AES_CRYP_InitStructure);
- CRYP_IVStructInit(&AES_CRYP_IVInitStructure);
+ int ret;
+ word32 *iv;
+ word32 blocks = (sz / AES_BLOCK_SIZE);
+ CRYP_InitTypeDef cryptInit;
+ CRYP_KeyInitTypeDef keyInit;
+ CRYP_IVInitTypeDef ivInit;
+
+ ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit);
+ if (ret != 0)
+ return ret;
+
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret != 0) {
+ return ret;
+ }
/* if input and output same will overwrite input iv */
XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
@@ -1925,76 +3165,37 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz,
/* reset registers to their default values */
CRYP_DeInit();
- /* load key into correct registers */
- switch(aes->rounds)
- {
- case 10: /* 128-bit key */
- AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b;
- AES_CRYP_KeyInitStructure.CRYP_Key2Left = dec_key[0];
- AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[1];
- AES_CRYP_KeyInitStructure.CRYP_Key3Left = dec_key[2];
- AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[3];
- break;
-
- case 12: /* 192-bit key */
- AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b;
- AES_CRYP_KeyInitStructure.CRYP_Key1Left = dec_key[0];
- AES_CRYP_KeyInitStructure.CRYP_Key1Right = dec_key[1];
- AES_CRYP_KeyInitStructure.CRYP_Key2Left = dec_key[2];
- AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[3];
- AES_CRYP_KeyInitStructure.CRYP_Key3Left = dec_key[4];
- AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[5];
- break;
-
- case 14: /* 256-bit key */
- AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b;
- AES_CRYP_KeyInitStructure.CRYP_Key0Left = dec_key[0];
- AES_CRYP_KeyInitStructure.CRYP_Key0Right = dec_key[1];
- AES_CRYP_KeyInitStructure.CRYP_Key1Left = dec_key[2];
- AES_CRYP_KeyInitStructure.CRYP_Key1Right = dec_key[3];
- AES_CRYP_KeyInitStructure.CRYP_Key2Left = dec_key[4];
- AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[5];
- AES_CRYP_KeyInitStructure.CRYP_Key3Left = dec_key[6];
- AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[7];
- break;
-
- default:
- break;
- }
-
- /* set direction, mode, and datatype for key preparation */
- AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt;
- AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key;
- AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_32b;
- CRYP_Init(&AES_CRYP_InitStructure);
- CRYP_KeyInit(&AES_CRYP_KeyInitStructure);
+ /* set direction and key */
+ CRYP_KeyInit(&keyInit);
+ cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt;
+ cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key;
+ CRYP_Init(&cryptInit);
/* enable crypto processor */
CRYP_Cmd(ENABLE);
/* wait until key has been prepared */
- while(CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
+ while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
- /* set direction, mode, and datatype for decryption */
- AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt;
- AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC;
- AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
- CRYP_Init(&AES_CRYP_InitStructure);
+ /* set direction and mode */
+ cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt;
+ cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC;
+ CRYP_Init(&cryptInit);
/* set iv */
+ iv = aes->reg;
+ CRYP_IVStructInit(&ivInit);
ByteReverseWords(iv, iv, AES_BLOCK_SIZE);
-
- AES_CRYP_IVInitStructure.CRYP_IV0Left = iv[0];
- AES_CRYP_IVInitStructure.CRYP_IV0Right = iv[1];
- AES_CRYP_IVInitStructure.CRYP_IV1Left = iv[2];
- AES_CRYP_IVInitStructure.CRYP_IV1Right = iv[3];
- CRYP_IVInit(&AES_CRYP_IVInitStructure);
+ ivInit.CRYP_IV0Left = iv[0];
+ ivInit.CRYP_IV0Right = iv[1];
+ ivInit.CRYP_IV1Left = iv[2];
+ ivInit.CRYP_IV1Right = iv[3];
+ CRYP_IVInit(&ivInit);
/* enable crypto processor */
CRYP_Cmd(ENABLE);
- while (sz > 0)
- {
+ while (blocks--) {
/* flush IN/OUT FIFOs */
CRYP_FIFOFlush();
@@ -2004,7 +3205,7 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz,
CRYP_DataIn(*(uint32_t*)&in[12]);
/* wait until the complete message has been processed */
- while(CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
+ while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
*(uint32_t*)&out[0] = CRYP_DataOut();
*(uint32_t*)&out[4] = CRYP_DataOut();
@@ -2014,24 +3215,26 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz,
/* store iv for next call */
XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
- sz -= 16;
- in += 16;
- out += 16;
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
}
/* disable crypto processor */
CRYP_Cmd(DISABLE);
+ wolfSSL_CryptHwMutexUnLock();
- return 0;
+ return ret;
}
+ #endif /* HAVE_AES_DECRYPT */
+#endif /* WOLFSSL_STM32_CUBEMX */
#elif defined(HAVE_COLDFIRE_SEC)
static int wc_AesCbcCrypt(Aes* aes, byte* po, const byte* pi, word32 sz,
- word32 descHeader)
+ word32 descHeader)
{
#ifdef DEBUG_WOLFSSL
int i; int stat1, stat2; int ret;
- #endif
+ #endif
int size;
volatile int v;
@@ -2039,7 +3242,7 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz,
if ((pi == NULL) || (po == NULL))
return BAD_FUNC_ARG; /*wrong pointer*/
- LockMutex(&Mutex_AesSEC);
+ wc_LockMutex(&Mutex_AesSEC);
/* Set descriptor for SEC */
secDesc->length1 = 0x0;
@@ -2049,9 +3252,9 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz,
secDesc->pointer2 = (byte *)secReg; /* Initial Vector */
switch(aes->rounds) {
- case 10: secDesc->length3 = 16 ; break ;
- case 12: secDesc->length3 = 24 ; break ;
- case 14: secDesc->length3 = 32 ; break ;
+ case 10: secDesc->length3 = 16; break;
+ case 12: secDesc->length3 = 24; break;
+ case 14: secDesc->length3 = 32; break;
}
XMEMCPY(secKey, aes->key, secDesc->length3);
@@ -2115,7 +3318,7 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz,
po += size;
}
- UnLockMutex(&Mutex_AesSEC);
+ wc_UnLockMutex(&Mutex_AesSEC);
return 0;
}
@@ -2124,40 +3327,92 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz,
return (wc_AesCbcCrypt(aes, po, pi, sz, SEC_DESC_AES_CBC_ENCRYPT));
}
+ #ifdef HAVE_AES_DECRYPT
int wc_AesCbcDecrypt(Aes* aes, byte* po, const byte* pi, word32 sz)
{
return (wc_AesCbcCrypt(aes, po, pi, sz, SEC_DESC_AES_CBC_DECRYPT));
}
+ #endif /* HAVE_AES_DECRYPT */
-#elif defined(FREESCALE_MMCAU)
+#elif defined(FREESCALE_LTC)
int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
{
- int i;
- int offset = 0;
- int len = sz;
-
+ uint32_t keySize;
+ status_t status;
byte *iv, *enc_key;
- byte temp_block[AES_BLOCK_SIZE];
+ word32 blocks = (sz / AES_BLOCK_SIZE);
iv = (byte*)aes->reg;
enc_key = (byte*)aes->key;
- if ((wolfssl_word)out % WOLFSSL_MMCAU_ALIGNMENT) {
- WOLFSSL_MSG("Bad cau_aes_encrypt alignment");
- return BAD_ALIGN_E;
+ status = wc_AesGetKeySize(aes, &keySize);
+ if (status != 0) {
+ return status;
}
- while (len > 0)
- {
+ status = LTC_AES_EncryptCbc(LTC_BASE, in, out, blocks * AES_BLOCK_SIZE,
+ iv, enc_key, keySize);
+
+ /* store iv for next call */
+ if (status == kStatus_Success) {
+ XMEMCPY(iv, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ }
+
+ return (status == kStatus_Success) ? 0 : -1;
+ }
+
+ #ifdef HAVE_AES_DECRYPT
+ int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+ {
+ uint32_t keySize;
+ status_t status;
+ byte* iv, *dec_key;
+ word32 blocks = (sz / AES_BLOCK_SIZE);
+ byte temp_block[AES_BLOCK_SIZE];
+
+ iv = (byte*)aes->reg;
+ dec_key = (byte*)aes->key;
+
+ status = wc_AesGetKeySize(aes, &keySize);
+ if (status != 0) {
+ return status;
+ }
+
+ /* get IV for next call */
+ XMEMCPY(temp_block, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+
+ status = LTC_AES_DecryptCbc(LTC_BASE, in, out, blocks * AES_BLOCK_SIZE,
+ iv, dec_key, keySize, kLTC_EncryptKey);
+
+ /* store IV for next call */
+ if (status == kStatus_Success) {
+ XMEMCPY(iv, temp_block, AES_BLOCK_SIZE);
+ }
+
+ return (status == kStatus_Success) ? 0 : -1;
+ }
+ #endif /* HAVE_AES_DECRYPT */
+
+#elif defined(FREESCALE_MMCAU)
+ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+ {
+ int i;
+ int offset = 0;
+ word32 blocks = (sz / AES_BLOCK_SIZE);
+ byte *iv;
+ byte temp_block[AES_BLOCK_SIZE];
+
+ iv = (byte*)aes->reg;
+
+ while (blocks--) {
XMEMCPY(temp_block, in + offset, AES_BLOCK_SIZE);
/* XOR block with IV for CBC */
for (i = 0; i < AES_BLOCK_SIZE; i++)
temp_block[i] ^= iv[i];
- cau_aes_encrypt(temp_block, enc_key, aes->rounds, out + offset);
+ wc_AesEncrypt(aes, temp_block, out + offset);
- len -= AES_BLOCK_SIZE;
offset += AES_BLOCK_SIZE;
/* store IV for next block */
@@ -2166,29 +3421,21 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz,
return 0;
}
-
+ #ifdef HAVE_AES_DECRYPT
int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
{
int i;
int offset = 0;
- int len = sz;
-
- byte* iv, *dec_key;
+ word32 blocks = (sz / AES_BLOCK_SIZE);
+ byte* iv;
byte temp_block[AES_BLOCK_SIZE];
iv = (byte*)aes->reg;
- dec_key = (byte*)aes->key;
- if ((wolfssl_word)out % WOLFSSL_MMCAU_ALIGNMENT) {
- WOLFSSL_MSG("Bad cau_aes_decrypt alignment");
- return BAD_ALIGN_E;
- }
-
- while (len > 0)
- {
+ while (blocks--) {
XMEMCPY(temp_block, in + offset, AES_BLOCK_SIZE);
- cau_aes_decrypt(in + offset, dec_key, aes->rounds, out + offset);
+ wc_AesDecrypt(aes, in + offset, out + offset);
/* XOR block with IV for CBC */
for (i = 0; i < AES_BLOCK_SIZE; i++)
@@ -2197,132 +3444,131 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz,
/* store IV for next block */
XMEMCPY(iv, temp_block, AES_BLOCK_SIZE);
- len -= AES_BLOCK_SIZE;
offset += AES_BLOCK_SIZE;
}
return 0;
}
+ #endif /* HAVE_AES_DECRYPT */
#elif defined(WOLFSSL_PIC32MZ_CRYPT)
- /* core hardware crypt engine driver */
- static void wc_AesCrypt(Aes *aes, byte* out, const byte* in, word32 sz,
- int dir, int algo, int cryptoalgo)
+
+ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
{
- securityAssociation *sa_p ;
- bufferDescriptor *bd_p ;
-
- volatile securityAssociation sa __attribute__((aligned (8)));
- volatile bufferDescriptor bd __attribute__((aligned (8)));
- volatile int k ;
-
- /* get uncached address */
- sa_p = KVA0_TO_KVA1(&sa) ;
- bd_p = KVA0_TO_KVA1(&bd) ;
-
- /* Sync cache and physical memory */
- if(PIC32MZ_IF_RAM(in)) {
- XMEMCPY((void *)KVA0_TO_KVA1(in), (void *)in, sz);
- }
- XMEMSET((void *)KVA0_TO_KVA1(out), 0, sz);
- /* Set up the Security Association */
- XMEMSET((byte *)KVA0_TO_KVA1(&sa), 0, sizeof(sa));
- sa_p->SA_CTRL.ALGO = algo ; /* AES */
- sa_p->SA_CTRL.LNC = 1;
- sa_p->SA_CTRL.LOADIV = 1;
- sa_p->SA_CTRL.FB = 1;
- sa_p->SA_CTRL.ENCTYPE = dir ; /* Encryption/Decryption */
- sa_p->SA_CTRL.CRYPTOALGO = cryptoalgo;
-
- if(cryptoalgo == PIC32_CRYPTOALGO_AES_GCM){
- switch(aes->keylen) {
- case 32:
- sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_256 ;
- break ;
- case 24:
- sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_192 ;
- break ;
- case 16:
- sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_128 ;
- break ;
- }
- } else
- sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_128 ;
-
- ByteReverseWords(
- (word32 *)KVA0_TO_KVA1(sa.SA_ENCKEY + 8 - aes->keylen/sizeof(word32)),
- (word32 *)aes->key_ce, aes->keylen);
- ByteReverseWords(
- (word32*)KVA0_TO_KVA1(sa.SA_ENCIV), (word32 *)aes->iv_ce, 16);
-
- XMEMSET((byte *)KVA0_TO_KVA1(&bd), 0, sizeof(bd));
- /* Set up the Buffer Descriptor */
- bd_p->BD_CTRL.BUFLEN = sz;
- if(cryptoalgo == PIC32_CRYPTOALGO_AES_GCM) {
- if(sz % 0x10)
- bd_p->BD_CTRL.BUFLEN = (sz/0x10 + 1) * 0x10 ;
- }
- bd_p->BD_CTRL.LIFM = 1;
- bd_p->BD_CTRL.SA_FETCH_EN = 1;
- bd_p->BD_CTRL.LAST_BD = 1;
- bd_p->BD_CTRL.DESC_EN = 1;
-
- bd_p->SA_ADDR = (unsigned int)KVA_TO_PA(&sa) ;
- bd_p->SRCADDR = (unsigned int)KVA_TO_PA(in) ;
- bd_p->DSTADDR = (unsigned int)KVA_TO_PA(out);
- bd_p->MSGLEN = sz ;
-
- CECON = 1 << 6;
- while (CECON);
-
- /* Run the engine */
- CEBDPADDR = (unsigned int)KVA_TO_PA(&bd) ;
- CEINTEN = 0x07;
- CECON = 0x27;
-
- WAIT_ENGINE ;
-
- if((cryptoalgo == PIC32_CRYPTOALGO_CBC) ||
- (cryptoalgo == PIC32_CRYPTOALGO_TCBC)||
- (cryptoalgo == PIC32_CRYPTOALGO_RCBC)) {
- /* set iv for the next call */
- if(dir == PIC32_ENCRYPTION) {
- XMEMCPY((void *)aes->iv_ce,
- (void*)KVA0_TO_KVA1(out + sz - AES_BLOCK_SIZE),
- AES_BLOCK_SIZE) ;
- } else {
- ByteReverseWords((word32*)aes->iv_ce,
- (word32 *)KVA0_TO_KVA1(in + sz - AES_BLOCK_SIZE),
- AES_BLOCK_SIZE);
- }
+ int ret;
+
+ /* hardware fails on input that is not a multiple of AES block size */
+ if (sz % AES_BLOCK_SIZE != 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ ret = wc_Pic32AesCrypt(
+ aes->key, aes->keylen, aes->reg, AES_BLOCK_SIZE,
+ out, in, sz, PIC32_ENCRYPTION,
+ PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCBC);
+
+ /* store iv for next call */
+ if (ret == 0) {
+ XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ }
+
+ return ret;
+ }
+ #ifdef HAVE_AES_DECRYPT
+ int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+ {
+ int ret;
+ byte scratch[AES_BLOCK_SIZE];
+
+ /* hardware fails on input that is not a multiple of AES block size */
+ if (sz % AES_BLOCK_SIZE != 0) {
+ return BAD_FUNC_ARG;
+ }
+ XMEMCPY(scratch, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+
+ ret = wc_Pic32AesCrypt(
+ aes->key, aes->keylen, aes->reg, AES_BLOCK_SIZE,
+ out, in, sz, PIC32_DECRYPTION,
+ PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCBC);
+
+ /* store iv for next call */
+ if (ret == 0) {
+ XMEMCPY((byte*)aes->reg, scratch, AES_BLOCK_SIZE);
}
- XMEMCPY((byte *)out, (byte *)KVA0_TO_KVA1(out), sz) ;
- ByteReverseWords((word32*)out, (word32 *)out, sz);
+
+ return ret;
}
+ #endif /* HAVE_AES_DECRYPT */
+#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES)
int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
{
- wc_AesCrypt(aes, out, in, sz, PIC32_ENCRYPTION, PIC32_ALGO_AES,
- PIC32_CRYPTOALGO_RCBC );
- return 0 ;
+ return wc_esp32AesCbcEncrypt(aes, out, in, sz);
+ }
+ int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+ {
+ return wc_esp32AesCbcDecrypt(aes, out, in, sz);
+ }
+#elif defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES)
+ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+ {
+ return SaSi_AesBlock(&aes->ctx.user_ctx, (uint8_t* )in, sz, out);
}
-
int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
{
- wc_AesCrypt(aes, out, in, sz, PIC32_DECRYPTION, PIC32_ALGO_AES,
- PIC32_CRYPTOALGO_RCBC);
- return 0 ;
+ return SaSi_AesBlock(&aes->ctx.user_ctx, (uint8_t* )in, sz, out);
}
+#elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)
+ /* implemented in wolfcrypt/src/port/caam/caam_aes.c */
+
+#elif defined(WOLFSSL_AFALG)
+ /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */
+
+#elif defined(WOLFSSL_DEVCRYPTO_CBC)
+ /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */
#else
+
+ /* Software AES - CBC Encrypt */
int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
{
- word32 blocks = sz / AES_BLOCK_SIZE;
+ word32 blocks = (sz / AES_BLOCK_SIZE);
+
+ if (aes == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
- #ifdef HAVE_CAVIUM
- if (aes->magic == WOLFSSL_AES_CAVIUM_MAGIC)
- return wc_AesCaviumCbcEncrypt(aes, out, in, sz);
+ #ifdef WOLF_CRYPTO_CB
+ if (aes->devId != INVALID_DEVID) {
+ int ret = wc_CryptoCb_AesCbcEncrypt(aes, out, in, sz);
+ if (ret != CRYPTOCB_UNAVAILABLE)
+ return ret;
+ /* fall-through when unavailable */
+ }
#endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+ /* if async and byte count above threshold */
+ if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES &&
+ sz >= WC_ASYNC_THRESH_AES_CBC) {
+ #if defined(HAVE_CAVIUM)
+ return NitroxAesCbcEncrypt(aes, out, in, sz);
+ #elif defined(HAVE_INTEL_QA)
+ return IntelQaSymAesCbcEncrypt(&aes->asyncDev, out, in, sz,
+ (const byte*)aes->devKey, aes->keylen,
+ (byte*)aes->reg, AES_BLOCK_SIZE);
+ #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+ if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_CBC_ENCRYPT)) {
+ WC_ASYNC_TEST* testDev = &aes->asyncDev.test;
+ testDev->aes.aes = aes;
+ testDev->aes.out = out;
+ testDev->aes.in = in;
+ testDev->aes.sz = sz;
+ return WC_PENDING_E;
+ }
+ #endif
+ }
+ #endif /* WOLFSSL_ASYNC_CRYPT */
#ifdef WOLFSSL_AESNI
if (haveAESNI) {
@@ -2337,22 +3583,25 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz,
#endif
/* check alignment, decrypt doesn't need alignment */
- if ((wolfssl_word)in % 16) {
+ if ((wolfssl_word)in % AESNI_ALIGN) {
#ifndef NO_WOLFSSL_ALLOC_ALIGN
- byte* tmp = (byte*)XMALLOC(sz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- WOLFSSL_MSG("AES-CBC encrypt with bad alignment");
+ byte* tmp = (byte*)XMALLOC(sz + AES_BLOCK_SIZE + AESNI_ALIGN,
+ aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ byte* tmp_align;
if (tmp == NULL) return MEMORY_E;
- XMEMCPY(tmp, in, sz);
- AES_CBC_encrypt(tmp, tmp, (byte*)aes->reg, sz, (byte*)aes->key,
- aes->rounds);
+ tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN));
+ XMEMCPY(tmp_align, in, sz);
+ AES_CBC_encrypt(tmp_align, tmp_align, (byte*)aes->reg, sz,
+ (byte*)aes->key, aes->rounds);
/* store iv for next call */
- XMEMCPY(aes->reg, tmp + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ XMEMCPY(aes->reg, tmp_align + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
- XMEMCPY(out, tmp, sz);
- XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XMEMCPY(out, tmp_align, sz);
+ XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
return 0;
#else
+ WOLFSSL_MSG("AES-CBC encrypt with bad alignment");
return BAD_ALIGN_E;
#endif
}
@@ -2378,13 +3627,46 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz,
return 0;
}
+ #ifdef HAVE_AES_DECRYPT
+ /* Software AES - CBC Decrypt */
int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
{
- word32 blocks = sz / AES_BLOCK_SIZE;
+ word32 blocks;
+
+ if (aes == NULL || out == NULL || in == NULL
+ || sz % AES_BLOCK_SIZE != 0) {
+ return BAD_FUNC_ARG;
+ }
- #ifdef HAVE_CAVIUM
- if (aes->magic == WOLFSSL_AES_CAVIUM_MAGIC)
- return wc_AesCaviumCbcDecrypt(aes, out, in, sz);
+ #ifdef WOLF_CRYPTO_CB
+ if (aes->devId != INVALID_DEVID) {
+ int ret = wc_CryptoCb_AesCbcDecrypt(aes, out, in, sz);
+ if (ret != CRYPTOCB_UNAVAILABLE)
+ return ret;
+ /* fall-through when unavailable */
+ }
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+ /* if async and byte count above threshold */
+ if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES &&
+ sz >= WC_ASYNC_THRESH_AES_CBC) {
+ #if defined(HAVE_CAVIUM)
+ return NitroxAesCbcDecrypt(aes, out, in, sz);
+ #elif defined(HAVE_INTEL_QA)
+ return IntelQaSymAesCbcDecrypt(&aes->asyncDev, out, in, sz,
+ (const byte*)aes->devKey, aes->keylen,
+ (byte*)aes->reg, AES_BLOCK_SIZE);
+ #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+ if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_CBC_DECRYPT)) {
+ WC_ASYNC_TEST* testDev = &aes->asyncDev.test;
+ testDev->aes.aes = aes;
+ testDev->aes.out = out;
+ testDev->aes.in = in;
+ testDev->aes.sz = sz;
+ return WC_PENDING_E;
+ }
+ #endif
+ }
#endif
#ifdef WOLFSSL_AESNI
@@ -2401,18 +3683,28 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz,
/* if input and output same will overwrite input iv */
XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
- AES_CBC_decrypt(in, out, (byte*)aes->reg, sz, (byte*)aes->key,
+ #if defined(WOLFSSL_AESNI_BY4)
+ AES_CBC_decrypt_by4(in, out, (byte*)aes->reg, sz, (byte*)aes->key,
+ aes->rounds);
+ #elif defined(WOLFSSL_AESNI_BY6)
+ AES_CBC_decrypt_by6(in, out, (byte*)aes->reg, sz, (byte*)aes->key,
aes->rounds);
+ #else /* WOLFSSL_AESNI_BYx */
+ AES_CBC_decrypt_by8(in, out, (byte*)aes->reg, sz, (byte*)aes->key,
+ aes->rounds);
+ #endif /* WOLFSSL_AESNI_BYx */
/* store iv for next call */
XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
return 0;
}
#endif
+ blocks = sz / AES_BLOCK_SIZE;
while (blocks--) {
XMEMCPY(aes->tmp, in, AES_BLOCK_SIZE);
wc_AesDecrypt(aes, (byte*)aes->tmp, out);
xorbuf(out, (byte*)aes->reg, AES_BLOCK_SIZE);
+ /* store iv for next call */
XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
out += AES_BLOCK_SIZE;
@@ -2421,198 +3713,226 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz,
return 0;
}
+ #endif
-#endif /* STM32F2_CRYPTO, AES-CBC block */
+#endif /* AES-CBC block */
+#endif /* HAVE_AES_CBC */
/* AES-CTR */
-#ifdef WOLFSSL_AES_COUNTER
+#if defined(WOLFSSL_AES_COUNTER)
- #ifdef STM32F2_CRYPTO
- void wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
- {
- word32 *enc_key, *iv;
- CRYP_InitTypeDef AES_CRYP_InitStructure;
- CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure;
- CRYP_IVInitTypeDef AES_CRYP_IVInitStructure;
+ #ifdef STM32_CRYPTO
+ #define NEED_AES_CTR_SOFT
+ #define XTRANSFORM_AESCTRBLOCK wc_AesCtrEncryptBlock
- enc_key = aes->key;
- iv = aes->reg;
+ int wc_AesCtrEncryptBlock(Aes* aes, byte* out, const byte* in)
+ {
+ int ret = 0;
+ #ifdef WOLFSSL_STM32_CUBEMX
+ CRYP_HandleTypeDef hcryp;
+ #ifdef STM32_HAL_V2
+ word32 iv[AES_BLOCK_SIZE/sizeof(word32)];
+ #endif
+ #else
+ word32 *iv;
+ CRYP_InitTypeDef cryptInit;
+ CRYP_KeyInitTypeDef keyInit;
+ CRYP_IVInitTypeDef ivInit;
+ #endif
- /* crypto structure initialization */
- CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure);
- CRYP_StructInit(&AES_CRYP_InitStructure);
- CRYP_IVStructInit(&AES_CRYP_IVInitStructure);
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret != 0) {
+ return ret;
+ }
- /* reset registers to their default values */
- CRYP_DeInit();
+ #ifdef WOLFSSL_STM32_CUBEMX
+ ret = wc_Stm32_Aes_Init(aes, &hcryp);
+ if (ret != 0) {
+ wolfSSL_CryptHwMutexUnLock();
+ return ret;
+ }
- /* load key into correct registers */
- switch(aes->rounds)
- {
- case 10: /* 128-bit key */
- AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b;
- AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[0];
- AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1];
- AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[2];
- AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3];
- break;
+ #ifdef STM32_CRYPTO_AES_ONLY
+ hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT;
+ hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_CTR;
+ hcryp.Init.KeyWriteFlag = CRYP_KEY_WRITE_ENABLE;
+ hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg;
+ #elif defined(STM32_HAL_V2)
+ hcryp.Init.Algorithm = CRYP_AES_CTR;
+ ByteReverseWords(iv, aes->reg, AES_BLOCK_SIZE);
+ hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)iv;
+ #else
+ hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg;
+ #endif
+ HAL_CRYP_Init(&hcryp);
+
+ #ifdef STM32_CRYPTO_AES_ONLY
+ ret = HAL_CRYPEx_AES(&hcryp, (byte*)in, AES_BLOCK_SIZE,
+ out, STM32_HAL_TIMEOUT);
+ #elif defined(STM32_HAL_V2)
+ ret = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)in, AES_BLOCK_SIZE,
+ (uint32_t*)out, STM32_HAL_TIMEOUT);
+ #else
+ ret = HAL_CRYP_AESCTR_Encrypt(&hcryp, (byte*)in, AES_BLOCK_SIZE,
+ out, STM32_HAL_TIMEOUT);
+ #endif
+ if (ret != HAL_OK) {
+ ret = WC_TIMEOUT_E;
+ }
+ HAL_CRYP_DeInit(&hcryp);
- case 12: /* 192-bit key */
- AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b;
- AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[0];
- AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1];
- AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[2];
- AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3];
- AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[4];
- AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5];
- break;
+ #else /* STD_PERI_LIB */
+ ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit);
+ if (ret != 0) {
+ wolfSSL_CryptHwMutexUnLock();
+ return ret;
+ }
- case 14: /* 256-bit key */
- AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b;
- AES_CRYP_KeyInitStructure.CRYP_Key0Left = enc_key[0];
- AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1];
- AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[2];
- AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3];
- AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[4];
- AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5];
- AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[6];
- AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7];
- break;
+ /* reset registers to their default values */
+ CRYP_DeInit();
- default:
- break;
- }
- CRYP_KeyInit(&AES_CRYP_KeyInitStructure);
+ /* set key */
+ CRYP_KeyInit(&keyInit);
/* set iv */
- ByteReverseWords(iv, iv, AES_BLOCK_SIZE);
- AES_CRYP_IVInitStructure.CRYP_IV0Left = iv[0];
- AES_CRYP_IVInitStructure.CRYP_IV0Right = iv[1];
- AES_CRYP_IVInitStructure.CRYP_IV1Left = iv[2];
- AES_CRYP_IVInitStructure.CRYP_IV1Right = iv[3];
- CRYP_IVInit(&AES_CRYP_IVInitStructure);
-
- /* set direction, mode, and datatype */
- AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt;
- AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CTR;
- AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
- CRYP_Init(&AES_CRYP_InitStructure);
+ iv = aes->reg;
+ CRYP_IVStructInit(&ivInit);
+ ivInit.CRYP_IV0Left = ByteReverseWord32(iv[0]);
+ ivInit.CRYP_IV0Right = ByteReverseWord32(iv[1]);
+ ivInit.CRYP_IV1Left = ByteReverseWord32(iv[2]);
+ ivInit.CRYP_IV1Right = ByteReverseWord32(iv[3]);
+ CRYP_IVInit(&ivInit);
+
+ /* set direction and mode */
+ cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt;
+ cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_CTR;
+ CRYP_Init(&cryptInit);
/* enable crypto processor */
CRYP_Cmd(ENABLE);
- while (sz > 0)
- {
- /* flush IN/OUT FIFOs */
- CRYP_FIFOFlush();
-
- CRYP_DataIn(*(uint32_t*)&in[0]);
- CRYP_DataIn(*(uint32_t*)&in[4]);
- CRYP_DataIn(*(uint32_t*)&in[8]);
- CRYP_DataIn(*(uint32_t*)&in[12]);
-
- /* wait until the complete message has been processed */
- while(CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
+ /* flush IN/OUT FIFOs */
+ CRYP_FIFOFlush();
- *(uint32_t*)&out[0] = CRYP_DataOut();
- *(uint32_t*)&out[4] = CRYP_DataOut();
- *(uint32_t*)&out[8] = CRYP_DataOut();
- *(uint32_t*)&out[12] = CRYP_DataOut();
+ CRYP_DataIn(*(uint32_t*)&in[0]);
+ CRYP_DataIn(*(uint32_t*)&in[4]);
+ CRYP_DataIn(*(uint32_t*)&in[8]);
+ CRYP_DataIn(*(uint32_t*)&in[12]);
- /* store iv for next call */
- XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ /* wait until the complete message has been processed */
+ while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
- sz -= 16;
- in += 16;
- out += 16;
- }
+ *(uint32_t*)&out[0] = CRYP_DataOut();
+ *(uint32_t*)&out[4] = CRYP_DataOut();
+ *(uint32_t*)&out[8] = CRYP_DataOut();
+ *(uint32_t*)&out[12] = CRYP_DataOut();
/* disable crypto processor */
CRYP_Cmd(DISABLE);
+
+ #endif /* WOLFSSL_STM32_CUBEMX */
+
+ wolfSSL_CryptHwMutexUnLock();
+ return ret;
}
+
#elif defined(WOLFSSL_PIC32MZ_CRYPT)
- void wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+
+ #define NEED_AES_CTR_SOFT
+ #define XTRANSFORM_AESCTRBLOCK wc_AesCtrEncryptBlock
+
+ int wc_AesCtrEncryptBlock(Aes* aes, byte* out, const byte* in)
{
- int i ;
- char out_block[AES_BLOCK_SIZE] ;
- int odd ;
- int even ;
- char *tmp ; /* (char *)aes->tmp, for short */
-
- tmp = (char *)aes->tmp ;
- if(aes->left) {
- if((aes->left + sz) >= AES_BLOCK_SIZE){
- odd = AES_BLOCK_SIZE - aes->left ;
- } else {
- odd = sz ;
- }
- XMEMCPY(tmp+aes->left, in, odd) ;
- if((odd+aes->left) == AES_BLOCK_SIZE){
- wc_AesCrypt(aes, out_block, tmp, AES_BLOCK_SIZE,
- PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCTR);
- XMEMCPY(out, out_block+aes->left, odd) ;
- aes->left = 0 ;
- XMEMSET(tmp, 0x0, AES_BLOCK_SIZE) ;
- /* Increment IV */
- for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
- if (++((byte *)aes->iv_ce)[i])
- break ;
- }
- }
- in += odd ;
- out+= odd ;
- sz -= odd ;
- }
- odd = sz % AES_BLOCK_SIZE ; /* if there is tail flagment */
- if(sz / AES_BLOCK_SIZE) {
- even = (sz/AES_BLOCK_SIZE)*AES_BLOCK_SIZE ;
- wc_AesCrypt(aes, out, in, even, PIC32_ENCRYPTION, PIC32_ALGO_AES,
- PIC32_CRYPTOALGO_RCTR);
- out += even ;
- in += even ;
- do { /* Increment IV */
- for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
- if (++((byte *)aes->iv_ce)[i])
- break ;
- }
- even -= AES_BLOCK_SIZE ;
- } while((int)even > 0) ;
- }
- if(odd) {
- XMEMSET(tmp+aes->left, 0x0, AES_BLOCK_SIZE - aes->left) ;
- XMEMCPY(tmp+aes->left, in, odd) ;
- wc_AesCrypt(aes, out_block, tmp, AES_BLOCK_SIZE,
- PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCTR);
- XMEMCPY(out, out_block+aes->left,odd) ;
- aes->left += odd ;
- }
+ word32 tmpIv[AES_BLOCK_SIZE / sizeof(word32)];
+ XMEMCPY(tmpIv, aes->reg, AES_BLOCK_SIZE);
+ return wc_Pic32AesCrypt(
+ aes->key, aes->keylen, tmpIv, AES_BLOCK_SIZE,
+ out, in, AES_BLOCK_SIZE,
+ PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCTR);
}
#elif defined(HAVE_COLDFIRE_SEC)
#error "Coldfire SEC doesn't currently support AES-CTR mode"
- #elif defined(FREESCALE_MMCAU)
- #error "Freescale mmCAU doesn't currently support AES-CTR mode"
+ #elif defined(FREESCALE_LTC)
+ int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+ {
+ uint32_t keySize;
+ byte *iv, *enc_key;
+ byte* tmp;
+
+ if (aes == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* consume any unused bytes left in aes->tmp */
+ tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left;
+ while (aes->left && sz) {
+ *(out++) = *(in++) ^ *(tmp++);
+ aes->left--;
+ sz--;
+ }
+
+ if (sz) {
+ iv = (byte*)aes->reg;
+ enc_key = (byte*)aes->key;
+
+ wc_AesGetKeySize(aes, &keySize);
+
+ LTC_AES_CryptCtr(LTC_BASE, in, out, sz,
+ iv, enc_key, keySize, (byte*)aes->tmp,
+ (uint32_t*)&aes->left);
+ }
+
+ return 0;
+ }
+
+ #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)
+ /* implemented in wolfcrypt/src/port/caam/caam_aes.c */
+
+ #elif defined(WOLFSSL_AFALG)
+ /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */
+
+ #elif defined(WOLFSSL_DEVCRYPTO_AES)
+ /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */
+
+ #elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES)
+ /* esp32 doesn't support CRT mode by hw. */
+ /* use aes ecnryption plus sw implementation */
+ #define NEED_AES_CTR_SOFT
#else
+
+ /* Use software based AES counter */
+ #define NEED_AES_CTR_SOFT
+ #endif
+
+ #ifdef NEED_AES_CTR_SOFT
/* Increment AES counter */
- static INLINE void IncrementAesCounter(byte* inOutCtr)
+ static WC_INLINE void IncrementAesCounter(byte* inOutCtr)
{
- int i;
-
/* in network byte order so start at end and work back */
+ int i;
for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
if (++inOutCtr[i]) /* we're done unless we overflow */
return;
}
}
- void wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+ /* Software AES - CTR Encrypt */
+ int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
{
- byte* tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left;
+ byte* tmp;
+ byte scratch[AES_BLOCK_SIZE];
+
+ if (aes == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
/* consume any unused bytes left in aes->tmp */
+ tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left;
while (aes->left && sz) {
*(out++) = *(in++) ^ *(tmp++);
aes->left--;
@@ -2621,17 +3941,23 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz,
/* do as many block size ops as possible */
while (sz >= AES_BLOCK_SIZE) {
- wc_AesEncrypt(aes, (byte*)aes->reg, out);
+ #ifdef XTRANSFORM_AESCTRBLOCK
+ XTRANSFORM_AESCTRBLOCK(aes, out, in);
+ #else
+ wc_AesEncrypt(aes, (byte*)aes->reg, scratch);
+ xorbuf(scratch, in, AES_BLOCK_SIZE);
+ XMEMCPY(out, scratch, AES_BLOCK_SIZE);
+ #endif
IncrementAesCounter((byte*)aes->reg);
- xorbuf(out, in, AES_BLOCK_SIZE);
out += AES_BLOCK_SIZE;
in += AES_BLOCK_SIZE;
sz -= AES_BLOCK_SIZE;
aes->left = 0;
}
+ ForceZero(scratch, AES_BLOCK_SIZE);
- /* handle non block size remaining and sotre unused byte count in left */
+ /* handle non block size remaining and store unused byte count in left */
if (sz) {
wc_AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->tmp);
IncrementAesCounter((byte*)aes->reg);
@@ -2644,49 +3970,63 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz,
aes->left--;
}
}
+
+ return 0;
}
- #endif /* STM32F2_CRYPTO, AES-CTR block */
+ #endif /* NEED_AES_CTR_SOFT */
#endif /* WOLFSSL_AES_COUNTER */
+#endif /* !WOLFSSL_ARMASM */
-#ifdef HAVE_AESGCM
/*
- * The IV for AES GCM, stored in struct Aes's member reg, is comprised of
- * three parts in order:
- * 1. The implicit IV. This is generated from the PRF using the shared
- * secrets between endpoints. It is 4 bytes long.
- * 2. The explicit IV. This is set by the user of the AES. It needs to be
- * unique for each call to encrypt. The explicit IV is shared with the
- * other end of the transaction in the clear.
- * 3. The counter. Each block of data is encrypted with its own sequence
- * number counter.
+ * The IV for AES GCM and CCM, stored in struct Aes's member reg, is comprised
+ * of two parts in order:
+ * 1. The fixed field which may be 0 or 4 bytes long. In TLS, this is set
+ * to the implicit IV.
+ * 2. The explicit IV is generated by wolfCrypt. It needs to be managed
+ * by wolfCrypt to ensure the IV is unique for each call to encrypt.
+ * The IV may be a 96-bit random value, or the 32-bit fixed value and a
+ * 64-bit set of 0 or random data. The final 32-bits of reg is used as a
+ * block counter during the encryption.
*/
-#ifdef STM32F2_CRYPTO
- #error "STM32F2 crypto doesn't currently support AES-GCM mode"
+#if (defined(HAVE_AESGCM) && !defined(WC_NO_RNG)) || defined(HAVE_AESCCM)
+static WC_INLINE void IncCtr(byte* ctr, word32 ctrSz)
+{
+ int i;
+ for (i = ctrSz-1; i >= 0; i--) {
+ if (++ctr[i])
+ break;
+ }
+}
+#endif /* HAVE_AESGCM || HAVE_AESCCM */
-#elif defined(HAVE_COLDFIRE_SEC)
+
+#ifdef HAVE_AESGCM
+
+#if defined(HAVE_COLDFIRE_SEC)
#error "Coldfire SEC doesn't currently support AES-GCM mode"
+#elif defined(WOLFSSL_NRF51_AES)
+ #error "nRF51 doesn't currently support AES-GCM mode"
+
#endif
-enum {
- CTR_SZ = 4
-};
+#ifdef WOLFSSL_ARMASM
+ /* implementation is located in wolfcrypt/src/port/arm/armv8-aes.c */
+#elif defined(WOLFSSL_AFALG)
+ /* implemented in wolfcrypt/src/port/afalg/afalg_aes.c */
-static INLINE void InitGcmCounter(byte* inOutCtr)
-{
- inOutCtr[AES_BLOCK_SIZE - 4] = 0;
- inOutCtr[AES_BLOCK_SIZE - 3] = 0;
- inOutCtr[AES_BLOCK_SIZE - 2] = 0;
- inOutCtr[AES_BLOCK_SIZE - 1] = 1;
-}
+#elif defined(WOLFSSL_DEVCRYPTO_AES)
+ /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */
+#else /* software + AESNI implementation */
-static INLINE void IncrementGcmCounter(byte* inOutCtr)
+#if !defined(FREESCALE_LTC_AES_GCM)
+static WC_INLINE void IncrementGcmCounter(byte* inOutCtr)
{
int i;
@@ -2696,11 +4036,23 @@ static INLINE void IncrementGcmCounter(byte* inOutCtr)
return;
}
}
+#ifdef STM32_CRYPTO_AES_GCM
+static WC_INLINE void DecrementGcmCounter(byte* inOutCtr)
+{
+ int i;
+ /* in network byte order so start at end and work back */
+ for (i = AES_BLOCK_SIZE - 1; i >= AES_BLOCK_SIZE - CTR_SZ; i--) {
+ if (--inOutCtr[i] != 0xFF) /* we're done unless we underflow */
+ return;
+ }
+}
+#endif /* STM32_CRYPTO_AES_GCM */
+#endif /* !FREESCALE_LTC_AES_GCM */
#if defined(GCM_SMALL) || defined(GCM_TABLE)
-static INLINE void FlattenSzInBits(byte* buf, word32 sz)
+static WC_INLINE void FlattenSzInBits(byte* buf, word32 sz)
{
/* Multiply the sz by 8 */
word32 szHi = (sz >> (8*sizeof(sz) - 3));
@@ -2718,7 +4070,7 @@ static INLINE void FlattenSzInBits(byte* buf, word32 sz)
}
-static INLINE void RIGHTSHIFTX(byte* x)
+static WC_INLINE void RIGHTSHIFTX(byte* x)
{
int i;
int carryOut = 0;
@@ -2762,39 +4114,1250 @@ static void GenerateM0(Aes* aes)
#endif /* GCM_TABLE */
-
+/* Software AES - GCM SetKey */
int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
{
int ret;
byte iv[AES_BLOCK_SIZE];
- #ifdef FREESCALE_MMCAU
- byte* rk = (byte*)aes->key;
+ #ifdef WOLFSSL_IMX6_CAAM_BLOB
+ byte local[32];
+ word32 localSz = 32;
+
+ if (len == (16 + WC_CAAM_BLOB_SZ) ||
+ len == (24 + WC_CAAM_BLOB_SZ) ||
+ len == (32 + WC_CAAM_BLOB_SZ)) {
+ if (wc_caamOpenBlob((byte*)key, len, local, &localSz) != 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* set local values */
+ key = local;
+ len = localSz;
+ }
#endif
if (!((len == 16) || (len == 24) || (len == 32)))
return BAD_FUNC_ARG;
+#ifdef OPENSSL_EXTRA
+ if (aes != NULL) {
+ XMEMSET(aes->aadH, 0, sizeof(aes->aadH));
+ aes->aadLen = 0;
+ }
+#endif
XMEMSET(iv, 0, AES_BLOCK_SIZE);
ret = wc_AesSetKey(aes, key, len, iv, AES_ENCRYPTION);
+ #ifdef WOLFSSL_AESNI
+ /* AES-NI code generates its own H value. */
+ if (haveAESNI)
+ return ret;
+ #endif /* WOLFSSL_AESNI */
+
+#if !defined(FREESCALE_LTC_AES_GCM)
if (ret == 0) {
- #ifdef FREESCALE_MMCAU
- cau_aes_encrypt(iv, rk, aes->rounds, aes->H);
- #else
wc_AesEncrypt(aes, iv, aes->H);
- #endif
#ifdef GCM_TABLE
GenerateM0(aes);
#endif /* GCM_TABLE */
}
+#endif /* FREESCALE_LTC_AES_GCM */
+
+#if defined(WOLFSSL_XILINX_CRYPT)
+ wc_AesGcmSetKey_ex(aes, key, len, XSECURE_CSU_AES_KEY_SRC_KUP);
+#elif defined(WOLFSSL_AFALG_XILINX_AES)
+ wc_AesGcmSetKey_ex(aes, key, len, 0);
+#endif
+
+#ifdef WOLF_CRYPTO_CB
+ if (aes->devId != INVALID_DEVID) {
+ XMEMCPY(aes->devKey, key, len);
+ }
+#endif
+
+#ifdef WOLFSSL_IMX6_CAAM_BLOB
+ ForceZero(local, sizeof(local));
+#endif
return ret;
}
-#if defined(GCM_SMALL)
+#ifdef WOLFSSL_AESNI
+
+#if defined(USE_INTEL_SPEEDUP)
+ #define HAVE_INTEL_AVX1
+ #define HAVE_INTEL_AVX2
+#endif /* USE_INTEL_SPEEDUP */
+
+#ifndef _MSC_VER
+
+void AES_GCM_encrypt(const unsigned char *in, unsigned char *out,
+ const unsigned char* addt, const unsigned char* ivec,
+ unsigned char *tag, unsigned int nbytes,
+ unsigned int abytes, unsigned int ibytes,
+ unsigned int tbytes, const unsigned char* key, int nr)
+ XASM_LINK("AES_GCM_encrypt");
+#ifdef HAVE_INTEL_AVX1
+void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out,
+ const unsigned char* addt, const unsigned char* ivec,
+ unsigned char *tag, unsigned int nbytes,
+ unsigned int abytes, unsigned int ibytes,
+ unsigned int tbytes, const unsigned char* key,
+ int nr)
+ XASM_LINK("AES_GCM_encrypt_avx1");
+#ifdef HAVE_INTEL_AVX2
+void AES_GCM_encrypt_avx2(const unsigned char *in, unsigned char *out,
+ const unsigned char* addt, const unsigned char* ivec,
+ unsigned char *tag, unsigned int nbytes,
+ unsigned int abytes, unsigned int ibytes,
+ unsigned int tbytes, const unsigned char* key,
+ int nr)
+ XASM_LINK("AES_GCM_encrypt_avx2");
+#endif /* HAVE_INTEL_AVX2 */
+#endif /* HAVE_INTEL_AVX1 */
+
+#ifdef HAVE_AES_DECRYPT
+void AES_GCM_decrypt(const unsigned char *in, unsigned char *out,
+ const unsigned char* addt, const unsigned char* ivec,
+ const unsigned char *tag, int nbytes, int abytes,
+ int ibytes, int tbytes, const unsigned char* key, int nr,
+ int* res)
+ XASM_LINK("AES_GCM_decrypt");
+#ifdef HAVE_INTEL_AVX1
+void AES_GCM_decrypt_avx1(const unsigned char *in, unsigned char *out,
+ const unsigned char* addt, const unsigned char* ivec,
+ const unsigned char *tag, int nbytes, int abytes,
+ int ibytes, int tbytes, const unsigned char* key,
+ int nr, int* res)
+ XASM_LINK("AES_GCM_decrypt_avx1");
+#ifdef HAVE_INTEL_AVX2
+void AES_GCM_decrypt_avx2(const unsigned char *in, unsigned char *out,
+ const unsigned char* addt, const unsigned char* ivec,
+ const unsigned char *tag, int nbytes, int abytes,
+ int ibytes, int tbytes, const unsigned char* key,
+ int nr, int* res)
+ XASM_LINK("AES_GCM_decrypt_avx2");
+#endif /* HAVE_INTEL_AVX2 */
+#endif /* HAVE_INTEL_AVX1 */
+#endif /* HAVE_AES_DECRYPT */
+
+#else /* _MSC_VER */
+
+#define S(w,z) ((char)((unsigned long long)(w) >> (8*(7-(z))) & 0xFF))
+#define M128_INIT(x,y) { S((x),7), S((x),6), S((x),5), S((x),4), \
+ S((x),3), S((x),2), S((x),1), S((x),0), \
+ S((y),7), S((y),6), S((y),5), S((y),4), \
+ S((y),3), S((y),2), S((y),1), S((y),0) }
+
+static const __m128i MOD2_128 =
+ M128_INIT(0x1, (long long int)0xc200000000000000UL);
+
+
+/* See Intel® Carry-Less Multiplication Instruction
+ * and its Usage for Computing the GCM Mode White Paper
+ * by Shay Gueron, Intel Mobility Group, Israel Development Center;
+ * and Michael E. Kounavis, Intel Labs, Circuits and Systems Research */
+
+
+/* Figure 9. AES-GCM – Encrypt With Single Block Ghash at a Time */
+
+static const __m128i ONE = M128_INIT(0x0, 0x1);
+#ifndef AES_GCM_AESNI_NO_UNROLL
+static const __m128i TWO = M128_INIT(0x0, 0x2);
+static const __m128i THREE = M128_INIT(0x0, 0x3);
+static const __m128i FOUR = M128_INIT(0x0, 0x4);
+static const __m128i FIVE = M128_INIT(0x0, 0x5);
+static const __m128i SIX = M128_INIT(0x0, 0x6);
+static const __m128i SEVEN = M128_INIT(0x0, 0x7);
+static const __m128i EIGHT = M128_INIT(0x0, 0x8);
+#endif
+static const __m128i BSWAP_EPI64 =
+ M128_INIT(0x0001020304050607, 0x08090a0b0c0d0e0f);
+static const __m128i BSWAP_MASK =
+ M128_INIT(0x08090a0b0c0d0e0f, 0x0001020304050607);
+
+
+/* The following are for MSC based builds which do not allow
+ * inline assembly. Intrinsic functions are used instead. */
+
+#define aes_gcm_calc_iv_12(KEY, ivec, nr, H, Y, T) \
+do \
+{ \
+ word32 iv12[4]; \
+ iv12[0] = *(word32*)&ivec[0]; \
+ iv12[1] = *(word32*)&ivec[4]; \
+ iv12[2] = *(word32*)&ivec[8]; \
+ iv12[3] = 0x01000000; \
+ Y = _mm_loadu_si128((__m128i*)iv12); \
+ \
+ /* (Compute E[ZERO, KS] and E[Y0, KS] together */ \
+ tmp1 = _mm_load_si128(&KEY[0]); \
+ tmp2 = _mm_xor_si128(Y, KEY[0]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); \
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[1]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); \
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[2]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); \
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[3]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); \
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[4]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); \
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[5]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); \
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[6]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); \
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[7]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); \
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[8]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); \
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[9]); \
+ lastKey = KEY[10]; \
+ if (nr > 10) { \
+ tmp1 = _mm_aesenc_si128(tmp1, lastKey); \
+ tmp2 = _mm_aesenc_si128(tmp2, lastKey); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); \
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[11]); \
+ lastKey = KEY[12]; \
+ if (nr > 12) { \
+ tmp1 = _mm_aesenc_si128(tmp1, lastKey); \
+ tmp2 = _mm_aesenc_si128(tmp2, lastKey); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); \
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[13]); \
+ lastKey = KEY[14]; \
+ } \
+ } \
+ H = _mm_aesenclast_si128(tmp1, lastKey); \
+ T = _mm_aesenclast_si128(tmp2, lastKey); \
+ H = _mm_shuffle_epi8(H, BSWAP_MASK); \
+} \
+while (0)
+
+#define aes_gcm_calc_iv(KEY, ivec, ibytes, nr, H, Y, T) \
+do \
+{ \
+ if (ibytes % 16) { \
+ i = ibytes / 16; \
+ for (j=0; j < (int)(ibytes%16); j++) \
+ ((unsigned char*)&last_block)[j] = ivec[i*16+j]; \
+ } \
+ tmp1 = _mm_load_si128(&KEY[0]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); \
+ lastKey = KEY[10]; \
+ if (nr > 10) { \
+ tmp1 = _mm_aesenc_si128(tmp1, lastKey); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); \
+ lastKey = KEY[12]; \
+ if (nr > 12) { \
+ tmp1 = _mm_aesenc_si128(tmp1, lastKey); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); \
+ lastKey = KEY[14]; \
+ } \
+ } \
+ H = _mm_aesenclast_si128(tmp1, lastKey); \
+ H = _mm_shuffle_epi8(H, BSWAP_MASK); \
+ Y = _mm_setzero_si128(); \
+ for (i=0; i < (int)(ibytes/16); i++) { \
+ tmp1 = _mm_loadu_si128(&((__m128i*)ivec)[i]); \
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); \
+ Y = _mm_xor_si128(Y, tmp1); \
+ Y = gfmul_sw(Y, H); \
+ } \
+ if (ibytes % 16) { \
+ tmp1 = last_block; \
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); \
+ Y = _mm_xor_si128(Y, tmp1); \
+ Y = gfmul_sw(Y, H); \
+ } \
+ tmp1 = _mm_insert_epi64(tmp1, ibytes*8, 0); \
+ tmp1 = _mm_insert_epi64(tmp1, 0, 1); \
+ Y = _mm_xor_si128(Y, tmp1); \
+ Y = gfmul_sw(Y, H); \
+ Y = _mm_shuffle_epi8(Y, BSWAP_MASK); /* Compute E(K, Y0) */ \
+ tmp1 = _mm_xor_si128(Y, KEY[0]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); \
+ lastKey = KEY[10]; \
+ if (nr > 10) { \
+ tmp1 = _mm_aesenc_si128(tmp1, lastKey); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); \
+ lastKey = KEY[12]; \
+ if (nr > 12) { \
+ tmp1 = _mm_aesenc_si128(tmp1, lastKey); \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); \
+ lastKey = KEY[14]; \
+ } \
+ } \
+ T = _mm_aesenclast_si128(tmp1, lastKey); \
+} \
+while (0)
+
+#define AES_ENC_8(j) \
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); \
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); \
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[j]); \
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[j]); \
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[j]); \
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[j]); \
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[j]); \
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[j]);
+
+#define AES_ENC_LAST_8() \
+ tmp1 =_mm_aesenclast_si128(tmp1, lastKey); \
+ tmp2 =_mm_aesenclast_si128(tmp2, lastKey); \
+ tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[i*8+0])); \
+ tmp2 = _mm_xor_si128(tmp2, _mm_loadu_si128(&((__m128i*)in)[i*8+1])); \
+ _mm_storeu_si128(&((__m128i*)out)[i*8+0], tmp1); \
+ _mm_storeu_si128(&((__m128i*)out)[i*8+1], tmp2); \
+ tmp3 =_mm_aesenclast_si128(tmp3, lastKey); \
+ tmp4 =_mm_aesenclast_si128(tmp4, lastKey); \
+ tmp3 = _mm_xor_si128(tmp3, _mm_loadu_si128(&((__m128i*)in)[i*8+2])); \
+ tmp4 = _mm_xor_si128(tmp4, _mm_loadu_si128(&((__m128i*)in)[i*8+3])); \
+ _mm_storeu_si128(&((__m128i*)out)[i*8+2], tmp3); \
+ _mm_storeu_si128(&((__m128i*)out)[i*8+3], tmp4); \
+ tmp5 =_mm_aesenclast_si128(tmp5, lastKey); \
+ tmp6 =_mm_aesenclast_si128(tmp6, lastKey); \
+ tmp5 = _mm_xor_si128(tmp5, _mm_loadu_si128(&((__m128i*)in)[i*8+4])); \
+ tmp6 = _mm_xor_si128(tmp6, _mm_loadu_si128(&((__m128i*)in)[i*8+5])); \
+ _mm_storeu_si128(&((__m128i*)out)[i*8+4], tmp5); \
+ _mm_storeu_si128(&((__m128i*)out)[i*8+5], tmp6); \
+ tmp7 =_mm_aesenclast_si128(tmp7, lastKey); \
+ tmp8 =_mm_aesenclast_si128(tmp8, lastKey); \
+ tmp7 = _mm_xor_si128(tmp7, _mm_loadu_si128(&((__m128i*)in)[i*8+6])); \
+ tmp8 = _mm_xor_si128(tmp8, _mm_loadu_si128(&((__m128i*)in)[i*8+7])); \
+ _mm_storeu_si128(&((__m128i*)out)[i*8+6], tmp7); \
+ _mm_storeu_si128(&((__m128i*)out)[i*8+7], tmp8);
+
+
+static __m128i gfmul_sw(__m128i a, __m128i b)
+{
+ __m128i r, t1, t2, t3, t4, t5, t6, t7;
+ t2 = _mm_shuffle_epi32(b, 78);
+ t3 = _mm_shuffle_epi32(a, 78);
+ t2 = _mm_xor_si128(t2, b);
+ t3 = _mm_xor_si128(t3, a);
+ t4 = _mm_clmulepi64_si128(b, a, 0x11);
+ t1 = _mm_clmulepi64_si128(b, a, 0x00);
+ t2 = _mm_clmulepi64_si128(t2, t3, 0x00);
+ t2 = _mm_xor_si128(t2, t1);
+ t2 = _mm_xor_si128(t2, t4);
+ t3 = _mm_slli_si128(t2, 8);
+ t2 = _mm_srli_si128(t2, 8);
+ t1 = _mm_xor_si128(t1, t3);
+ t4 = _mm_xor_si128(t4, t2);
+
+ t5 = _mm_srli_epi32(t1, 31);
+ t6 = _mm_srli_epi32(t4, 31);
+ t1 = _mm_slli_epi32(t1, 1);
+ t4 = _mm_slli_epi32(t4, 1);
+ t7 = _mm_srli_si128(t5, 12);
+ t5 = _mm_slli_si128(t5, 4);
+ t6 = _mm_slli_si128(t6, 4);
+ t4 = _mm_or_si128(t4, t7);
+ t1 = _mm_or_si128(t1, t5);
+ t4 = _mm_or_si128(t4, t6);
+
+ t5 = _mm_slli_epi32(t1, 31);
+ t6 = _mm_slli_epi32(t1, 30);
+ t7 = _mm_slli_epi32(t1, 25);
+ t5 = _mm_xor_si128(t5, t6);
+ t5 = _mm_xor_si128(t5, t7);
+
+ t6 = _mm_srli_si128(t5, 4);
+ t5 = _mm_slli_si128(t5, 12);
+ t1 = _mm_xor_si128(t1, t5);
+ t7 = _mm_srli_epi32(t1, 1);
+ t3 = _mm_srli_epi32(t1, 2);
+ t2 = _mm_srli_epi32(t1, 7);
+
+ t7 = _mm_xor_si128(t7, t3);
+ t7 = _mm_xor_si128(t7, t2);
+ t7 = _mm_xor_si128(t7, t6);
+ t7 = _mm_xor_si128(t7, t1);
+ r = _mm_xor_si128(t4, t7);
+
+ return r;
+}
+
+static void gfmul_only(__m128i a, __m128i b, __m128i* r0, __m128i* r1)
+{
+ __m128i t1, t2, t3, t4;
+
+ /* 128 x 128 Carryless Multiply */
+ t2 = _mm_shuffle_epi32(b, 78);
+ t3 = _mm_shuffle_epi32(a, 78);
+ t2 = _mm_xor_si128(t2, b);
+ t3 = _mm_xor_si128(t3, a);
+ t4 = _mm_clmulepi64_si128(b, a, 0x11);
+ t1 = _mm_clmulepi64_si128(b, a, 0x00);
+ t2 = _mm_clmulepi64_si128(t2, t3, 0x00);
+ t2 = _mm_xor_si128(t2, t1);
+ t2 = _mm_xor_si128(t2, t4);
+ t3 = _mm_slli_si128(t2, 8);
+ t2 = _mm_srli_si128(t2, 8);
+ t1 = _mm_xor_si128(t1, t3);
+ t4 = _mm_xor_si128(t4, t2);
+ *r0 = _mm_xor_si128(t1, *r0);
+ *r1 = _mm_xor_si128(t4, *r1);
+}
+
+static __m128i gfmul_shl1(__m128i a)
+{
+ __m128i t1 = a, t2;
+ t2 = _mm_srli_epi64(t1, 63);
+ t1 = _mm_slli_epi64(t1, 1);
+ t2 = _mm_slli_si128(t2, 8);
+ t1 = _mm_or_si128(t1, t2);
+ /* if (a[1] >> 63) t1 = _mm_xor_si128(t1, MOD2_128); */
+ a = _mm_shuffle_epi32(a, 0xff);
+ a = _mm_srai_epi32(a, 31);
+ a = _mm_and_si128(a, MOD2_128);
+ t1 = _mm_xor_si128(t1, a);
+ return t1;
+}
+
+static __m128i ghash_red(__m128i r0, __m128i r1)
+{
+ __m128i t2, t3;
+ __m128i t5, t6, t7;
+
+ t5 = _mm_slli_epi32(r0, 31);
+ t6 = _mm_slli_epi32(r0, 30);
+ t7 = _mm_slli_epi32(r0, 25);
+ t5 = _mm_xor_si128(t5, t6);
+ t5 = _mm_xor_si128(t5, t7);
+
+ t6 = _mm_srli_si128(t5, 4);
+ t5 = _mm_slli_si128(t5, 12);
+ r0 = _mm_xor_si128(r0, t5);
+ t7 = _mm_srli_epi32(r0, 1);
+ t3 = _mm_srli_epi32(r0, 2);
+ t2 = _mm_srli_epi32(r0, 7);
+
+ t7 = _mm_xor_si128(t7, t3);
+ t7 = _mm_xor_si128(t7, t2);
+ t7 = _mm_xor_si128(t7, t6);
+ t7 = _mm_xor_si128(t7, r0);
+ return _mm_xor_si128(r1, t7);
+}
+
+static __m128i gfmul_shifted(__m128i a, __m128i b)
+{
+ __m128i t0 = _mm_setzero_si128(), t1 = _mm_setzero_si128();
+ gfmul_only(a, b, &t0, &t1);
+ return ghash_red(t0, t1);
+}
+
+#ifndef AES_GCM_AESNI_NO_UNROLL
+static __m128i gfmul8(__m128i a1, __m128i a2, __m128i a3, __m128i a4,
+ __m128i a5, __m128i a6, __m128i a7, __m128i a8,
+ __m128i b1, __m128i b2, __m128i b3, __m128i b4,
+ __m128i b5, __m128i b6, __m128i b7, __m128i b8)
+{
+ __m128i t0 = _mm_setzero_si128(), t1 = _mm_setzero_si128();
+ gfmul_only(a1, b8, &t0, &t1);
+ gfmul_only(a2, b7, &t0, &t1);
+ gfmul_only(a3, b6, &t0, &t1);
+ gfmul_only(a4, b5, &t0, &t1);
+ gfmul_only(a5, b4, &t0, &t1);
+ gfmul_only(a6, b3, &t0, &t1);
+ gfmul_only(a7, b2, &t0, &t1);
+ gfmul_only(a8, b1, &t0, &t1);
+ return ghash_red(t0, t1);
+}
+#endif
+
+
+static void AES_GCM_encrypt(const unsigned char *in,
+ unsigned char *out,
+ const unsigned char* addt,
+ const unsigned char* ivec,
+ unsigned char *tag, unsigned int nbytes,
+ unsigned int abytes, unsigned int ibytes,
+ unsigned int tbytes,
+ const unsigned char* key, int nr)
+{
+ int i, j ,k;
+ __m128i ctr1;
+ __m128i H, Y, T;
+ __m128i X = _mm_setzero_si128();
+ __m128i *KEY = (__m128i*)key, lastKey;
+ __m128i last_block = _mm_setzero_si128();
+ __m128i tmp1, tmp2;
+#ifndef AES_GCM_AESNI_NO_UNROLL
+ __m128i HT[8];
+ __m128i r0, r1;
+ __m128i XV;
+ __m128i tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+#endif
+
+ if (ibytes == GCM_NONCE_MID_SZ)
+ aes_gcm_calc_iv_12(KEY, ivec, nr, H, Y, T);
+ else
+ aes_gcm_calc_iv(KEY, ivec, ibytes, nr, H, Y, T);
+
+ for (i=0; i < (int)(abytes/16); i++) {
+ tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i]);
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ X = _mm_xor_si128(X, tmp1);
+ X = gfmul_sw(X, H);
+ }
+ if (abytes%16) {
+ last_block = _mm_setzero_si128();
+ for (j=0; j < (int)(abytes%16); j++)
+ ((unsigned char*)&last_block)[j] = addt[i*16+j];
+ tmp1 = last_block;
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ X = _mm_xor_si128(X, tmp1);
+ X = gfmul_sw(X, H);
+ }
+ tmp1 = _mm_shuffle_epi8(Y, BSWAP_EPI64);
+ ctr1 = _mm_add_epi32(tmp1, ONE);
+ H = gfmul_shl1(H);
+
+#ifndef AES_GCM_AESNI_NO_UNROLL
+ i = 0;
+ if (nbytes >= 16*8) {
+ HT[0] = H;
+ HT[1] = gfmul_shifted(H, H);
+ HT[2] = gfmul_shifted(H, HT[1]);
+ HT[3] = gfmul_shifted(HT[1], HT[1]);
+ HT[4] = gfmul_shifted(HT[1], HT[2]);
+ HT[5] = gfmul_shifted(HT[2], HT[2]);
+ HT[6] = gfmul_shifted(HT[2], HT[3]);
+ HT[7] = gfmul_shifted(HT[3], HT[3]);
+
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ tmp2 = _mm_add_epi32(ctr1, ONE);
+ tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_EPI64);
+ tmp3 = _mm_add_epi32(ctr1, TWO);
+ tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_EPI64);
+ tmp4 = _mm_add_epi32(ctr1, THREE);
+ tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_EPI64);
+ tmp5 = _mm_add_epi32(ctr1, FOUR);
+ tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_EPI64);
+ tmp6 = _mm_add_epi32(ctr1, FIVE);
+ tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_EPI64);
+ tmp7 = _mm_add_epi32(ctr1, SIX);
+ tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_EPI64);
+ tmp8 = _mm_add_epi32(ctr1, SEVEN);
+ tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_EPI64);
+ ctr1 = _mm_add_epi32(ctr1, EIGHT);
+ tmp1 =_mm_xor_si128(tmp1, KEY[0]);
+ tmp2 =_mm_xor_si128(tmp2, KEY[0]);
+ tmp3 =_mm_xor_si128(tmp3, KEY[0]);
+ tmp4 =_mm_xor_si128(tmp4, KEY[0]);
+ tmp5 =_mm_xor_si128(tmp5, KEY[0]);
+ tmp6 =_mm_xor_si128(tmp6, KEY[0]);
+ tmp7 =_mm_xor_si128(tmp7, KEY[0]);
+ tmp8 =_mm_xor_si128(tmp8, KEY[0]);
+ AES_ENC_8(1);
+ AES_ENC_8(2);
+ AES_ENC_8(3);
+ AES_ENC_8(4);
+ AES_ENC_8(5);
+ AES_ENC_8(6);
+ AES_ENC_8(7);
+ AES_ENC_8(8);
+ AES_ENC_8(9);
+ lastKey = KEY[10];
+ if (nr > 10) {
+ AES_ENC_8(10);
+ AES_ENC_8(11);
+ lastKey = KEY[12];
+ if (nr > 12) {
+ AES_ENC_8(12);
+ AES_ENC_8(13);
+ lastKey = KEY[14];
+ }
+ }
+ AES_ENC_LAST_8();
+
+ for (i=1; i < (int)(nbytes/16/8); i++) {
+ r0 = _mm_setzero_si128();
+ r1 = _mm_setzero_si128();
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ tmp2 = _mm_add_epi32(ctr1, ONE);
+ tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_EPI64);
+ tmp3 = _mm_add_epi32(ctr1, TWO);
+ tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_EPI64);
+ tmp4 = _mm_add_epi32(ctr1, THREE);
+ tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_EPI64);
+ tmp5 = _mm_add_epi32(ctr1, FOUR);
+ tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_EPI64);
+ tmp6 = _mm_add_epi32(ctr1, FIVE);
+ tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_EPI64);
+ tmp7 = _mm_add_epi32(ctr1, SIX);
+ tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_EPI64);
+ tmp8 = _mm_add_epi32(ctr1, SEVEN);
+ tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_EPI64);
+ ctr1 = _mm_add_epi32(ctr1, EIGHT);
+ tmp1 =_mm_xor_si128(tmp1, KEY[0]);
+ tmp2 =_mm_xor_si128(tmp2, KEY[0]);
+ tmp3 =_mm_xor_si128(tmp3, KEY[0]);
+ tmp4 =_mm_xor_si128(tmp4, KEY[0]);
+ tmp5 =_mm_xor_si128(tmp5, KEY[0]);
+ tmp6 =_mm_xor_si128(tmp6, KEY[0]);
+ tmp7 =_mm_xor_si128(tmp7, KEY[0]);
+ tmp8 =_mm_xor_si128(tmp8, KEY[0]);
+ /* 128 x 128 Carryless Multiply */
+ XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+0]);
+ XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
+ XV = _mm_xor_si128(XV, X);
+ gfmul_only(XV, HT[7], &r0, &r1);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[1]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[1]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[1]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[1]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[1]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[1]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[1]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[1]);
+ /* 128 x 128 Carryless Multiply */
+ XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+1]);
+ XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
+ gfmul_only(XV, HT[6], &r0, &r1);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[2]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[2]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[2]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[2]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[2]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[2]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[2]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[2]);
+ /* 128 x 128 Carryless Multiply */
+ XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+2]);
+ XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
+ gfmul_only(XV, HT[5], &r0, &r1);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[3]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[3]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[3]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[3]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[3]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[3]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[3]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[3]);
+ /* 128 x 128 Carryless Multiply */
+ XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+3]);
+ XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
+ gfmul_only(XV, HT[4], &r0, &r1);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[4]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[4]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[4]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[4]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[4]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[4]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[4]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[4]);
+ /* 128 x 128 Carryless Multiply */
+ XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+4]);
+ XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
+ gfmul_only(XV, HT[3], &r0, &r1);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[5]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[5]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[5]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[5]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[5]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[5]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[5]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[5]);
+ /* 128 x 128 Carryless Multiply */
+ XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+5]);
+ XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
+ gfmul_only(XV, HT[2], &r0, &r1);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[6]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[6]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[6]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[6]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[6]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[6]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[6]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[6]);
+ /* 128 x 128 Carryless Multiply */
+ XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+6]);
+ XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
+ gfmul_only(XV, HT[1], &r0, &r1);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[7]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[7]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[7]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[7]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[7]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[7]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[7]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[7]);
+ /* 128 x 128 Carryless Multiply */
+ XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+7]);
+ XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
+ gfmul_only(XV, HT[0], &r0, &r1);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[8]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[8]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[8]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[8]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[8]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[8]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[8]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[8]);
+ /* Reduction */
+ X = ghash_red(r0, r1);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[9]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[9]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[9]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[9]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[9]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[9]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[9]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[9]);
+ lastKey = KEY[10];
+ if (nr > 10) {
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[10]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[10]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[10]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[10]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[10]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[10]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[10]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[10]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[11]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[11]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[11]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[11]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[11]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[11]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[11]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[11]);
+ lastKey = KEY[12];
+ if (nr > 12) {
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[12]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[12]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[12]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[12]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[12]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[12]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[12]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[12]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[13]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[13]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[13]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[13]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[13]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[13]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[13]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[13]);
+ lastKey = KEY[14];
+ }
+ }
+ AES_ENC_LAST_8();
+ }
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK);
+ tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK);
+ tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK);
+ tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_MASK);
+ tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_MASK);
+ tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_MASK);
+ tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_MASK);
+ tmp1 = _mm_xor_si128(X, tmp1);
+ X = gfmul8(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8,
+ HT[0], HT[1], HT[2], HT[3], HT[4], HT[5], HT[6], HT[7]);
+ }
+ for (k = i*8; k < (int)(nbytes/16); k++) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ ctr1 = _mm_add_epi32(ctr1, ONE);
+ tmp1 = _mm_xor_si128(tmp1, KEY[0]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[1]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[2]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[3]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[4]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[5]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[6]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[7]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[8]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[9]);
+ lastKey = KEY[10];
+ if (nr > 10) {
+ tmp1 = _mm_aesenc_si128(tmp1, lastKey);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[11]);
+ lastKey = KEY[12];
+ if (nr > 12) {
+ tmp1 = _mm_aesenc_si128(tmp1, lastKey);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[13]);
+ lastKey = KEY[14];
+ }
+ }
+ tmp1 = _mm_aesenclast_si128(tmp1, lastKey);
+ tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k]));
+ _mm_storeu_si128(&((__m128i*)out)[k], tmp1);
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ X =_mm_xor_si128(X, tmp1);
+ X = gfmul_shifted(X, H);
+ }
+#else /* AES_GCM_AESNI_NO_UNROLL */
+ for (k = 0; k < (int)(nbytes/16) && k < 1; k++) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ ctr1 = _mm_add_epi32(ctr1, ONE);
+ tmp1 = _mm_xor_si128(tmp1, KEY[0]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[1]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[2]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[3]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[4]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[5]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[6]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[7]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[8]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[9]);
+ lastKey = KEY[10];
+ if (nr > 10) {
+ tmp1 = _mm_aesenc_si128(tmp1, lastKey);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[11]);
+ lastKey = KEY[12];
+ if (nr > 12) {
+ tmp1 = _mm_aesenc_si128(tmp1, lastKey);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[13]);
+ lastKey = KEY[14];
+ }
+ }
+ tmp1 = _mm_aesenclast_si128(tmp1, lastKey);
+ tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k]));
+ _mm_storeu_si128(&((__m128i*)out)[k], tmp1);
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ X =_mm_xor_si128(X, tmp1);
+ }
+ for (; k < (int)(nbytes/16); k++) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ ctr1 = _mm_add_epi32(ctr1, ONE);
+ tmp1 = _mm_xor_si128(tmp1, KEY[0]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[1]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[2]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[3]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[4]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[5]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[6]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[7]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[8]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[9]);
+ X = gfmul_shifted(X, H);
+ lastKey = KEY[10];
+ if (nr > 10) {
+ tmp1 = _mm_aesenc_si128(tmp1, lastKey);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[11]);
+ lastKey = KEY[12];
+ if (nr > 12) {
+ tmp1 = _mm_aesenc_si128(tmp1, lastKey);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[13]);
+ lastKey = KEY[14];
+ }
+ }
+ tmp1 = _mm_aesenclast_si128(tmp1, lastKey);
+ tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k]));
+ _mm_storeu_si128(&((__m128i*)out)[k], tmp1);
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ X =_mm_xor_si128(X, tmp1);
+ }
+ if (k > 0) {
+ X = gfmul_shifted(X, H);
+ }
+#endif /* AES_GCM_AESNI_NO_UNROLL */
+
+ /* If one partial block remains */
+ if (nbytes % 16) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ tmp1 = _mm_xor_si128(tmp1, KEY[0]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[1]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[2]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[3]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[4]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[5]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[6]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[7]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[8]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[9]);
+ lastKey = KEY[10];
+ if (nr > 10) {
+ tmp1 = _mm_aesenc_si128(tmp1, lastKey);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[11]);
+ lastKey = KEY[12];
+ if (nr > 12) {
+ tmp1 = _mm_aesenc_si128(tmp1, lastKey);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[13]);
+ lastKey = KEY[14];
+ }
+ }
+ tmp1 = _mm_aesenclast_si128(tmp1, lastKey);
+ last_block = tmp1;
+ for (j=0; j < (int)(nbytes%16); j++)
+ ((unsigned char*)&last_block)[j] = in[k*16+j];
+ tmp1 = _mm_xor_si128(tmp1, last_block);
+ last_block = tmp1;
+ for (j=0; j < (int)(nbytes%16); j++)
+ out[k*16+j] = ((unsigned char*)&last_block)[j];
+ tmp1 = last_block;
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ X =_mm_xor_si128(X, tmp1);
+ X = gfmul_shifted(X, H);
+ }
+ tmp1 = _mm_insert_epi64(tmp1, nbytes*8, 0);
+ tmp1 = _mm_insert_epi64(tmp1, abytes*8, 1);
+ X = _mm_xor_si128(X, tmp1);
+ X = gfmul_shifted(X, H);
+ X = _mm_shuffle_epi8(X, BSWAP_MASK);
+ T = _mm_xor_si128(X, T);
+ /*_mm_storeu_si128((__m128i*)tag, T);*/
+ XMEMCPY(tag, &T, tbytes);
+}
+
+#ifdef HAVE_AES_DECRYPT
+
+static void AES_GCM_decrypt(const unsigned char *in,
+ unsigned char *out,
+ const unsigned char* addt,
+ const unsigned char* ivec,
+ const unsigned char *tag, int nbytes, int abytes,
+ int ibytes, word32 tbytes, const unsigned char* key,
+ int nr, int* res)
+{
+ int i, j ,k;
+ __m128i H, Y, T;
+ __m128i *KEY = (__m128i*)key, lastKey;
+ __m128i ctr1;
+ __m128i last_block = _mm_setzero_si128();
+ __m128i X = _mm_setzero_si128();
+ __m128i tmp1, tmp2, XV;
+#ifndef AES_GCM_AESNI_NO_UNROLL
+ __m128i HT[8];
+ __m128i r0, r1;
+ __m128i tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+#endif /* AES_GCM_AESNI_NO_UNROLL */
+
+ if (ibytes == GCM_NONCE_MID_SZ)
+ aes_gcm_calc_iv_12(KEY, ivec, nr, H, Y, T);
+ else
+ aes_gcm_calc_iv(KEY, ivec, ibytes, nr, H, Y, T);
+
+ for (i=0; i<abytes/16; i++) {
+ tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i]);
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ X = _mm_xor_si128(X, tmp1);
+ X = gfmul_sw(X, H);
+ }
+ if (abytes%16) {
+ last_block = _mm_setzero_si128();
+ for (j=0; j<abytes%16; j++)
+ ((unsigned char*)&last_block)[j] = addt[i*16+j];
+ tmp1 = last_block;
+ tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
+ X = _mm_xor_si128(X, tmp1);
+ X = gfmul_sw(X, H);
+ }
+
+ tmp1 = _mm_shuffle_epi8(Y, BSWAP_EPI64);
+ ctr1 = _mm_add_epi32(tmp1, ONE);
+ H = gfmul_shl1(H);
+ i = 0;
+
+#ifndef AES_GCM_AESNI_NO_UNROLL
+
+ if (0 < nbytes/16/8) {
+ HT[0] = H;
+ HT[1] = gfmul_shifted(H, H);
+ HT[2] = gfmul_shifted(H, HT[1]);
+ HT[3] = gfmul_shifted(HT[1], HT[1]);
+ HT[4] = gfmul_shifted(HT[1], HT[2]);
+ HT[5] = gfmul_shifted(HT[2], HT[2]);
+ HT[6] = gfmul_shifted(HT[2], HT[3]);
+ HT[7] = gfmul_shifted(HT[3], HT[3]);
+
+ for (; i < nbytes/16/8; i++) {
+ r0 = _mm_setzero_si128();
+ r1 = _mm_setzero_si128();
+
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ tmp2 = _mm_add_epi32(ctr1, ONE);
+ tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_EPI64);
+ tmp3 = _mm_add_epi32(ctr1, TWO);
+ tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_EPI64);
+ tmp4 = _mm_add_epi32(ctr1, THREE);
+ tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_EPI64);
+ tmp5 = _mm_add_epi32(ctr1, FOUR);
+ tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_EPI64);
+ tmp6 = _mm_add_epi32(ctr1, FIVE);
+ tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_EPI64);
+ tmp7 = _mm_add_epi32(ctr1, SIX);
+ tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_EPI64);
+ tmp8 = _mm_add_epi32(ctr1, SEVEN);
+ tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_EPI64);
+ ctr1 = _mm_add_epi32(ctr1, EIGHT);
+ tmp1 =_mm_xor_si128(tmp1, KEY[0]);
+ tmp2 =_mm_xor_si128(tmp2, KEY[0]);
+ tmp3 =_mm_xor_si128(tmp3, KEY[0]);
+ tmp4 =_mm_xor_si128(tmp4, KEY[0]);
+ tmp5 =_mm_xor_si128(tmp5, KEY[0]);
+ tmp6 =_mm_xor_si128(tmp6, KEY[0]);
+ tmp7 =_mm_xor_si128(tmp7, KEY[0]);
+ tmp8 =_mm_xor_si128(tmp8, KEY[0]);
+ /* 128 x 128 Carryless Multiply */
+ XV = _mm_loadu_si128(&((__m128i*)in)[i*8+0]);
+ XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
+ XV = _mm_xor_si128(XV, X);
+ gfmul_only(XV, HT[7], &r0, &r1);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[1]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[1]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[1]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[1]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[1]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[1]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[1]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[1]);
+ /* 128 x 128 Carryless Multiply */
+ XV = _mm_loadu_si128(&((__m128i*)in)[i*8+1]);
+ XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
+ gfmul_only(XV, HT[6], &r0, &r1);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[2]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[2]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[2]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[2]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[2]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[2]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[2]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[2]);
+ /* 128 x 128 Carryless Multiply */
+ XV = _mm_loadu_si128(&((__m128i*)in)[i*8+2]);
+ XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
+ gfmul_only(XV, HT[5], &r0, &r1);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[3]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[3]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[3]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[3]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[3]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[3]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[3]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[3]);
+ /* 128 x 128 Carryless Multiply */
+ XV = _mm_loadu_si128(&((__m128i*)in)[i*8+3]);
+ XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
+ gfmul_only(XV, HT[4], &r0, &r1);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[4]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[4]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[4]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[4]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[4]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[4]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[4]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[4]);
+ /* 128 x 128 Carryless Multiply */
+ XV = _mm_loadu_si128(&((__m128i*)in)[i*8+4]);
+ XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
+ gfmul_only(XV, HT[3], &r0, &r1);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[5]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[5]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[5]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[5]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[5]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[5]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[5]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[5]);
+ /* 128 x 128 Carryless Multiply */
+ XV = _mm_loadu_si128(&((__m128i*)in)[i*8+5]);
+ XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
+ gfmul_only(XV, HT[2], &r0, &r1);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[6]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[6]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[6]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[6]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[6]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[6]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[6]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[6]);
+ /* 128 x 128 Carryless Multiply */
+ XV = _mm_loadu_si128(&((__m128i*)in)[i*8+6]);
+ XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
+ gfmul_only(XV, HT[1], &r0, &r1);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[7]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[7]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[7]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[7]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[7]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[7]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[7]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[7]);
+ /* 128 x 128 Carryless Multiply */
+ XV = _mm_loadu_si128(&((__m128i*)in)[i*8+7]);
+ XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
+ gfmul_only(XV, HT[0], &r0, &r1);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[8]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[8]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[8]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[8]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[8]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[8]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[8]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[8]);
+ /* Reduction */
+ X = ghash_red(r0, r1);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[9]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[9]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[9]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[9]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[9]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[9]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[9]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[9]);
+ lastKey = KEY[10];
+ if (nr > 10) {
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[10]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[10]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[10]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[10]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[10]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[10]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[10]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[10]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[11]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[11]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[11]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[11]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[11]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[11]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[11]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[11]);
+ lastKey = KEY[12];
+ if (nr > 12) {
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[12]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[12]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[12]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[12]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[12]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[12]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[12]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[12]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[13]);
+ tmp2 = _mm_aesenc_si128(tmp2, KEY[13]);
+ tmp3 = _mm_aesenc_si128(tmp3, KEY[13]);
+ tmp4 = _mm_aesenc_si128(tmp4, KEY[13]);
+ tmp5 = _mm_aesenc_si128(tmp5, KEY[13]);
+ tmp6 = _mm_aesenc_si128(tmp6, KEY[13]);
+ tmp7 = _mm_aesenc_si128(tmp7, KEY[13]);
+ tmp8 = _mm_aesenc_si128(tmp8, KEY[13]);
+ lastKey = KEY[14];
+ }
+ }
+ AES_ENC_LAST_8();
+ }
+ }
+
+#endif /* AES_GCM_AESNI_NO_UNROLL */
+
+ for (k = i*8; k < nbytes/16; k++) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ ctr1 = _mm_add_epi32(ctr1, ONE);
+ tmp1 = _mm_xor_si128(tmp1, KEY[0]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[1]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[2]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[3]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[4]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[5]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[6]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[7]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[8]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[9]);
+ /* 128 x 128 Carryless Multiply */
+ XV = _mm_loadu_si128(&((__m128i*)in)[k]);
+ XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
+ XV = _mm_xor_si128(XV, X);
+ X = gfmul_shifted(XV, H);
+ lastKey = KEY[10];
+ if (nr > 10) {
+ tmp1 = _mm_aesenc_si128(tmp1, lastKey);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[11]);
+ lastKey = KEY[12];
+ if (nr > 12) {
+ tmp1 = _mm_aesenc_si128(tmp1, lastKey);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[13]);
+ lastKey = KEY[14];
+ }
+ }
+ tmp1 = _mm_aesenclast_si128(tmp1, lastKey);
+ tmp2 = _mm_loadu_si128(&((__m128i*)in)[k]);
+ tmp1 = _mm_xor_si128(tmp1, tmp2);
+ _mm_storeu_si128(&((__m128i*)out)[k], tmp1);
+ }
+
+ /* If one partial block remains */
+ if (nbytes % 16) {
+ tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
+ tmp1 = _mm_xor_si128(tmp1, KEY[0]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[1]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[2]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[3]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[4]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[5]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[6]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[7]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[8]);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[9]);
+ lastKey = KEY[10];
+ if (nr > 10) {
+ tmp1 = _mm_aesenc_si128(tmp1, lastKey);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[11]);
+ lastKey = KEY[12];
+ if (nr > 12) {
+ tmp1 = _mm_aesenc_si128(tmp1, lastKey);
+ tmp1 = _mm_aesenc_si128(tmp1, KEY[13]);
+ lastKey = KEY[14];
+ }
+ }
+ tmp1 = _mm_aesenclast_si128(tmp1, lastKey);
+ last_block = _mm_setzero_si128();
+ for (j=0; j < nbytes%16; j++)
+ ((unsigned char*)&last_block)[j] = in[k*16+j];
+ XV = last_block;
+ tmp1 = _mm_xor_si128(tmp1, last_block);
+ last_block = tmp1;
+ for (j=0; j < nbytes%16; j++)
+ out[k*16+j] = ((unsigned char*)&last_block)[j];
+ XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
+ XV = _mm_xor_si128(XV, X);
+ X = gfmul_shifted(XV, H);
+ }
+
+ tmp1 = _mm_insert_epi64(tmp1, nbytes*8, 0);
+ tmp1 = _mm_insert_epi64(tmp1, abytes*8, 1);
+ /* 128 x 128 Carryless Multiply */
+ X = _mm_xor_si128(X, tmp1);
+ X = gfmul_shifted(X, H);
+ X = _mm_shuffle_epi8(X, BSWAP_MASK);
+ T = _mm_xor_si128(X, T);
+
+/* if (0xffff !=
+ _mm_movemask_epi8(_mm_cmpeq_epi8(T, _mm_loadu_si128((__m128i*)tag)))) */
+ if (XMEMCMP(tag, &T, tbytes) != 0)
+ *res = 0; /* in case the authentication failed */
+ else
+ *res = 1; /* when successful returns 1 */
+}
+
+#endif /* HAVE_AES_DECRYPT */
+#endif /* _MSC_VER */
+#endif /* WOLFSSL_AESNI */
+
+
+#if defined(GCM_SMALL)
static void GMULT(byte* X, byte* Y)
{
byte Z[AES_BLOCK_SIZE];
@@ -2820,8 +5383,8 @@ static void GMULT(byte* X, byte* Y)
}
-static void GHASH(Aes* aes, const byte* a, word32 aSz,
- const byte* c, word32 cSz, byte* s, word32 sSz)
+void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c,
+ word32 cSz, byte* s, word32 sSz)
{
byte x[AES_BLOCK_SIZE];
byte scratch[AES_BLOCK_SIZE];
@@ -2969,8 +5532,8 @@ static void GMULT(byte *x, byte m[256][AES_BLOCK_SIZE])
}
-static void GHASH(Aes* aes, const byte* a, word32 aSz,
- const byte* c, word32 cSz, byte* s, word32 sSz)
+void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c,
+ word32 cSz, byte* s, word32 sSz)
{
byte x[AES_BLOCK_SIZE];
byte scratch[AES_BLOCK_SIZE];
@@ -3025,12 +5588,13 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz,
/* end GCM_TABLE */
#elif defined(WORD64_AVAILABLE) && !defined(GCM_WORD32)
+#if !defined(FREESCALE_LTC_AES_GCM)
static void GMULT(word64* X, word64* Y)
{
word64 Z[2] = {0,0};
- word64 V[2] ;
+ word64 V[2];
int i, j;
- V[0] = X[0] ; V[1] = X[1] ;
+ V[0] = X[0]; V[1] = X[1];
for (i = 0; i < 2; i++)
{
@@ -3044,13 +5608,15 @@ static void GMULT(word64* X, word64* Y)
if (V[1] & 0x0000000000000001) {
V[1] >>= 1;
- V[1] |= ((V[0] & 0x0000000000000001) ? 0x8000000000000000ULL : 0);
+ V[1] |= ((V[0] & 0x0000000000000001) ?
+ 0x8000000000000000ULL : 0);
V[0] >>= 1;
V[0] ^= 0xE100000000000000ULL;
}
else {
V[1] >>= 1;
- V[1] |= ((V[0] & 0x0000000000000001) ? 0x8000000000000000ULL : 0);
+ V[1] |= ((V[0] & 0x0000000000000001) ?
+ 0x8000000000000000ULL : 0);
V[0] >>= 1;
}
y <<= 1;
@@ -3061,8 +5627,8 @@ static void GMULT(word64* X, word64* Y)
}
-static void GHASH(Aes* aes, const byte* a, word32 aSz,
- const byte* c, word32 cSz, byte* s, word32 sSz)
+void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c,
+ word32 cSz, byte* s, word32 sSz)
{
word64 x[2] = {0,0};
word32 blocks, partial;
@@ -3070,7 +5636,7 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz,
XMEMCPY(bigH, aes->H, AES_BLOCK_SIZE);
#ifdef LITTLE_ENDIAN_ORDER
- ByteReverseWords64(bigH, bigH, AES_BLOCK_SIZE);
+ ByteReverseWords64(bigH, bigH, AES_BLOCK_SIZE);
#endif
/* Hash in A, the Additional Authentication Data */
@@ -3098,6 +5664,13 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz,
x[1] ^= bigA[1];
GMULT(x, bigH);
}
+#ifdef OPENSSL_EXTRA
+ /* store AAD partial tag for next call */
+ aes->aadH[0] = (word32)((x[0] & 0xFFFFFFFF00000000) >> 32);
+ aes->aadH[1] = (word32)(x[0] & 0xFFFFFFFF);
+ aes->aadH[2] = (word32)((x[1] & 0xFFFFFFFF00000000) >> 32);
+ aes->aadH[3] = (word32)(x[1] & 0xFFFFFFFF);
+#endif
}
/* Hash in C, the Ciphertext */
@@ -3105,6 +5678,13 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz,
word64 bigC[2];
blocks = cSz / AES_BLOCK_SIZE;
partial = cSz % AES_BLOCK_SIZE;
+#ifdef OPENSSL_EXTRA
+ /* Start from last AAD partial tag */
+ if(aes->aadLen) {
+ x[0] = ((word64)aes->aadH[0]) << 32 | aes->aadH[1];
+ x[1] = ((word64)aes->aadH[2]) << 32 | aes->aadH[3];
+ }
+#endif
while (blocks--) {
XMEMCPY(bigC, c, AES_BLOCK_SIZE);
#ifdef LITTLE_ENDIAN_ORDER
@@ -3129,9 +5709,12 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz,
/* Hash in the lengths in bits of A and C */
{
- word64 len[2] ;
- len[0] = aSz ; len[1] = cSz;
-
+ word64 len[2];
+ len[0] = aSz; len[1] = cSz;
+#ifdef OPENSSL_EXTRA
+ if (aes->aadLen)
+ len[0] = (word64)aes->aadLen;
+#endif
/* Lengths are in bytes. Convert to bits. */
len[0] *= 8;
len[1] *= 8;
@@ -3145,6 +5728,7 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz,
#endif
XMEMCPY(s, x, sSz);
}
+#endif /* !FREESCALE_LTC_AES_GCM */
/* end defined(WORD64_AVAILABLE) && !defined(GCM_WORD32) */
#else /* GCM_WORD32 */
@@ -3152,7 +5736,7 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz,
static void GMULT(word32* X, word32* Y)
{
word32 Z[4] = {0,0,0,0};
- word32 V[4] ;
+ word32 V[4];
int i, j;
V[0] = X[0]; V[1] = X[1]; V[2] = X[2]; V[3] = X[3];
@@ -3197,8 +5781,8 @@ static void GMULT(word32* X, word32* Y)
}
-static void GHASH(Aes* aes, const byte* a, word32 aSz,
- const byte* c, word32 cSz, byte* s, word32 sSz)
+void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c,
+ word32 cSz, byte* s, word32 sSz)
{
word32 x[4] = {0,0,0,0};
word32 blocks, partial;
@@ -3296,165 +5880,1071 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz,
#endif /* end GCM_WORD32 */
+#if !defined(WOLFSSL_XILINX_CRYPT) && !defined(WOLFSSL_AFALG_XILINX_AES)
+#ifdef FREESCALE_LTC_AES_GCM
int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
const byte* iv, word32 ivSz,
byte* authTag, word32 authTagSz,
const byte* authIn, word32 authInSz)
{
+ status_t status;
+ word32 keySize;
+
+ /* argument checks */
+ if (aes == NULL || authTagSz > AES_BLOCK_SIZE || ivSz == 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) {
+ WOLFSSL_MSG("GcmEncrypt authTagSz too small error");
+ return BAD_FUNC_ARG;
+ }
+
+ status = wc_AesGetKeySize(aes, &keySize);
+ if (status)
+ return status;
+
+ status = LTC_AES_EncryptTagGcm(LTC_BASE, in, out, sz, iv, ivSz,
+ authIn, authInSz, (byte*)aes->key, keySize, authTag, authTagSz);
+
+ return (status == kStatus_Success) ? 0 : AES_GCM_AUTH_E;
+}
+
+#else
+
+#ifdef STM32_CRYPTO_AES_GCM
+
+/* this function supports inline encrypt */
+static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz,
+ const byte* iv, word32 ivSz,
+ byte* authTag, word32 authTagSz,
+ const byte* authIn, word32 authInSz)
+{
+ int ret;
+#ifdef WOLFSSL_STM32_CUBEMX
+ CRYP_HandleTypeDef hcryp;
+#else
+ word32 keyCopy[AES_256_KEY_SIZE/sizeof(word32)];
+#endif
+ word32 keySize;
+ int status = HAL_OK;
+ word32 blocks = sz / AES_BLOCK_SIZE;
+ word32 partial = sz % AES_BLOCK_SIZE;
+ byte tag[AES_BLOCK_SIZE];
+ byte partialBlock[AES_BLOCK_SIZE];
+ byte ctr[AES_BLOCK_SIZE];
+ byte* authInPadded = NULL;
+ int authPadSz;
+
+ ret = wc_AesGetKeySize(aes, &keySize);
+ if (ret != 0)
+ return ret;
+
+#ifdef WOLFSSL_STM32_CUBEMX
+ ret = wc_Stm32_Aes_Init(aes, &hcryp);
+ if (ret != 0)
+ return ret;
+#endif
+
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret != 0) {
+ return ret;
+ }
+
+ XMEMSET(ctr, 0, AES_BLOCK_SIZE);
+ if (ivSz == GCM_NONCE_MID_SZ) {
+ XMEMCPY(ctr, iv, ivSz);
+ ctr[AES_BLOCK_SIZE - 1] = 1;
+ }
+ else {
+ GHASH(aes, NULL, 0, iv, ivSz, ctr, AES_BLOCK_SIZE);
+ }
+ /* Hardware requires counter + 1 */
+ IncrementGcmCounter(ctr);
+
+ if (authInSz == 0 || (authInSz % AES_BLOCK_SIZE) != 0) {
+ /* Need to pad the AAD to a full block with zeros. */
+ authPadSz = ((authInSz / AES_BLOCK_SIZE) + 1) * AES_BLOCK_SIZE;
+ authInPadded = (byte*)XMALLOC(authPadSz, aes->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (authInPadded == NULL) {
+ wolfSSL_CryptHwMutexUnLock();
+ return MEMORY_E;
+ }
+ XMEMSET(authInPadded, 0, authPadSz);
+ XMEMCPY(authInPadded, authIn, authInSz);
+ } else {
+ authPadSz = authInSz;
+ authInPadded = (byte*)authIn;
+ }
+
+#ifdef WOLFSSL_STM32_CUBEMX
+ hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)ctr;
+ hcryp.Init.Header = (STM_CRYPT_TYPE*)authInPadded;
+ hcryp.Init.HeaderSize = authInSz;
+
+#ifdef STM32_CRYPTO_AES_ONLY
+ /* Set the CRYP parameters */
+ hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_GCM_GMAC;
+ hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT;
+ hcryp.Init.GCMCMACPhase = CRYP_INIT_PHASE;
+ HAL_CRYP_Init(&hcryp);
+
+ /* GCM init phase */
+ status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT);
+ if (status == HAL_OK) {
+ /* GCM header phase */
+ hcryp.Init.GCMCMACPhase = CRYP_HEADER_PHASE;
+ status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT);
+ }
+ if (status == HAL_OK) {
+ /* GCM payload phase - blocks */
+ hcryp.Init.GCMCMACPhase = CRYP_PAYLOAD_PHASE;
+ if (blocks) {
+ status = HAL_CRYPEx_AES_Auth(&hcryp, (byte*)in,
+ (blocks * AES_BLOCK_SIZE), out, STM32_HAL_TIMEOUT);
+ }
+ }
+ if (status == HAL_OK && (partial != 0 || blocks == 0)) {
+ /* GCM payload phase - partial remainder */
+ XMEMSET(partialBlock, 0, sizeof(partialBlock));
+ XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial);
+ status = HAL_CRYPEx_AES_Auth(&hcryp, partialBlock, partial,
+ partialBlock, STM32_HAL_TIMEOUT);
+ XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial);
+ }
+ if (status == HAL_OK) {
+ /* GCM final phase */
+ hcryp.Init.GCMCMACPhase = CRYP_FINAL_PHASE;
+ status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, tag, STM32_HAL_TIMEOUT);
+ }
+#elif defined(STM32_HAL_V2)
+ hcryp.Init.Algorithm = CRYP_AES_GCM;
+ ByteReverseWords((word32*)partialBlock, (word32*)ctr, AES_BLOCK_SIZE);
+ hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)partialBlock;
+ HAL_CRYP_Init(&hcryp);
+
+ /* GCM payload phase - can handle partial blocks */
+ status = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)in,
+ (blocks * AES_BLOCK_SIZE) + partial, (uint32_t*)out, STM32_HAL_TIMEOUT);
+ if (status == HAL_OK) {
+ /* Compute the authTag */
+ status = HAL_CRYPEx_AESGCM_GenerateAuthTAG(&hcryp, (uint32_t*)tag,
+ STM32_HAL_TIMEOUT);
+ }
+#else
+ HAL_CRYP_Init(&hcryp);
+ if (blocks) {
+ /* GCM payload phase - blocks */
+ status = HAL_CRYPEx_AESGCM_Encrypt(&hcryp, (byte*)in,
+ (blocks * AES_BLOCK_SIZE), out, STM32_HAL_TIMEOUT);
+ }
+ if (status == HAL_OK && (partial != 0 || blocks == 0)) {
+ /* GCM payload phase - partial remainder */
+ XMEMSET(partialBlock, 0, sizeof(partialBlock));
+ XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial);
+ status = HAL_CRYPEx_AESGCM_Encrypt(&hcryp, partialBlock, partial,
+ partialBlock, STM32_HAL_TIMEOUT);
+ XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial);
+ }
+ if (status == HAL_OK) {
+ /* Compute the authTag */
+ status = HAL_CRYPEx_AESGCM_Finish(&hcryp, sz, tag, STM32_HAL_TIMEOUT);
+ }
+#endif
+
+ if (status != HAL_OK)
+ ret = AES_GCM_AUTH_E;
+ HAL_CRYP_DeInit(&hcryp);
+
+#else /* STD_PERI_LIB */
+ ByteReverseWords(keyCopy, (word32*)aes->key, keySize);
+ status = CRYP_AES_GCM(MODE_ENCRYPT, (uint8_t*)ctr,
+ (uint8_t*)keyCopy, keySize * 8,
+ (uint8_t*)in, sz,
+ (uint8_t*)authInPadded, authInSz,
+ (uint8_t*)out, tag);
+ if (status != SUCCESS)
+ ret = AES_GCM_AUTH_E;
+#endif /* WOLFSSL_STM32_CUBEMX */
+
+ if (ret == 0) {
+ /* return authTag */
+ if (authTag) {
+ /* STM32 GCM won't compute Auth correctly for partial or
+ when IV != 12, so use software here */
+ if (sz == 0 || partial != 0 || ivSz != GCM_NONCE_MID_SZ) {
+ DecrementGcmCounter(ctr); /* hardware requires +1, so subtract it */
+ GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz);
+ wc_AesEncrypt(aes, ctr, tag);
+ xorbuf(authTag, tag, authTagSz);
+ }
+ else {
+ XMEMCPY(authTag, tag, authTagSz);
+ }
+ }
+ }
+
+ /* Free memory if not a multiple of AES_BLOCK_SZ */
+ if (authInPadded != authIn) {
+ XFREE(authInPadded, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+
+ wolfSSL_CryptHwMutexUnLock();
+
+ return ret;
+}
+
+#endif /* STM32_CRYPTO_AES_GCM */
+
+#ifdef WOLFSSL_AESNI
+int AES_GCM_encrypt_C(Aes* aes, byte* out, const byte* in, word32 sz,
+ const byte* iv, word32 ivSz,
+ byte* authTag, word32 authTagSz,
+ const byte* authIn, word32 authInSz);
+#else
+static
+#endif
+int AES_GCM_encrypt_C(Aes* aes, byte* out, const byte* in, word32 sz,
+ const byte* iv, word32 ivSz,
+ byte* authTag, word32 authTagSz,
+ const byte* authIn, word32 authInSz)
+{
+ int ret = 0;
word32 blocks = sz / AES_BLOCK_SIZE;
word32 partial = sz % AES_BLOCK_SIZE;
const byte* p = in;
byte* c = out;
byte counter[AES_BLOCK_SIZE];
- byte *ctr ;
+ byte initialCounter[AES_BLOCK_SIZE];
+ byte *ctr;
byte scratch[AES_BLOCK_SIZE];
-
-#ifdef FREESCALE_MMCAU
- byte* key = (byte*)aes->key;
+#ifdef OPENSSL_EXTRA
+ word32 aadTemp;
#endif
-
- WOLFSSL_ENTER("AesGcmEncrypt");
+ ctr = counter;
+ XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
+ XMEMSET(scratch, 0, AES_BLOCK_SIZE);
+ if (ivSz == GCM_NONCE_MID_SZ) {
+ XMEMCPY(initialCounter, iv, ivSz);
+ initialCounter[AES_BLOCK_SIZE - 1] = 1;
+ }
+ else {
+#ifdef OPENSSL_EXTRA
+ aadTemp = aes->aadLen;
+ aes->aadLen = 0;
+#endif
+ GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE);
+#ifdef OPENSSL_EXTRA
+ aes->aadLen = aadTemp;
+#endif
+ }
+ XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE);
#ifdef WOLFSSL_PIC32MZ_CRYPT
- ctr = (char *)aes->iv_ce ;
-#else
- ctr = counter ;
+ if (blocks) {
+ /* use initial IV for HW, but don't use it below */
+ XMEMCPY(aes->reg, ctr, AES_BLOCK_SIZE);
+
+ ret = wc_Pic32AesCrypt(
+ aes->key, aes->keylen, aes->reg, AES_BLOCK_SIZE,
+ out, in, (blocks * AES_BLOCK_SIZE),
+ PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_AES_GCM);
+ if (ret != 0)
+ return ret;
+ }
+ /* process remainder using partial handling */
#endif
- XMEMSET(ctr, 0, AES_BLOCK_SIZE);
- XMEMCPY(ctr, iv, ivSz);
- InitGcmCounter(ctr);
+#if defined(HAVE_AES_ECB) && !defined(WOLFSSL_PIC32MZ_CRYPT)
+ /* some hardware acceleration can gain performance from doing AES encryption
+ * of the whole buffer at once */
+ if (c != p && blocks > 0) { /* can not handle inline encryption */
+ while (blocks--) {
+ IncrementGcmCounter(ctr);
+ XMEMCPY(c, ctr, AES_BLOCK_SIZE);
+ c += AES_BLOCK_SIZE;
+ }
+
+ /* reset number of blocks and then do encryption */
+ blocks = sz / AES_BLOCK_SIZE;
+ wc_AesEcbEncrypt(aes, out, out, AES_BLOCK_SIZE * blocks);
+ xorbuf(out, p, AES_BLOCK_SIZE * blocks);
+ p += AES_BLOCK_SIZE * blocks;
+ }
+ else
+#endif /* HAVE_AES_ECB && !WOLFSSL_PIC32MZ_CRYPT */
-#ifdef WOLFSSL_PIC32MZ_CRYPT
- if(blocks)
- wc_AesCrypt(aes, out, in, blocks*AES_BLOCK_SIZE,
- PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_AES_GCM );
-#endif
while (blocks--) {
IncrementGcmCounter(ctr);
- #ifndef WOLFSSL_PIC32MZ_CRYPT
- #ifdef FREESCALE_MMCAU
- cau_aes_encrypt(ctr, key, aes->rounds, scratch);
- #else
- wc_AesEncrypt(aes, ctr, scratch);
- #endif
+ #if !defined(WOLFSSL_PIC32MZ_CRYPT)
+ wc_AesEncrypt(aes, ctr, scratch);
xorbuf(scratch, p, AES_BLOCK_SIZE);
XMEMCPY(c, scratch, AES_BLOCK_SIZE);
- #endif
+ #endif
p += AES_BLOCK_SIZE;
c += AES_BLOCK_SIZE;
}
if (partial != 0) {
IncrementGcmCounter(ctr);
- #ifdef FREESCALE_MMCAU
- cau_aes_encrypt(ctr, key, aes->rounds, scratch);
- #else
- wc_AesEncrypt(aes, ctr, scratch);
- #endif
+ wc_AesEncrypt(aes, ctr, scratch);
xorbuf(scratch, p, partial);
XMEMCPY(c, scratch, partial);
+ }
+ if (authTag) {
+ GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz);
+ wc_AesEncrypt(aes, initialCounter, scratch);
+ xorbuf(authTag, scratch, authTagSz);
+#ifdef OPENSSL_EXTRA
+ if (!in && !sz)
+ /* store AAD size for next call */
+ aes->aadLen = authInSz;
+#endif
+ }
+
+ return ret;
+}
+/* Software AES - GCM Encrypt */
+int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
+ const byte* iv, word32 ivSz,
+ byte* authTag, word32 authTagSz,
+ const byte* authIn, word32 authInSz)
+{
+ /* argument checks */
+ if (aes == NULL || authTagSz > AES_BLOCK_SIZE || ivSz == 0) {
+ return BAD_FUNC_ARG;
}
- GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz);
- InitGcmCounter(ctr);
- #ifdef FREESCALE_MMCAU
- cau_aes_encrypt(ctr, key, aes->rounds, scratch);
- #else
- wc_AesEncrypt(aes, ctr, scratch);
+ if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) {
+ WOLFSSL_MSG("GcmEncrypt authTagSz too small error");
+ return BAD_FUNC_ARG;
+ }
+
+#ifdef WOLF_CRYPTO_CB
+ if (aes->devId != INVALID_DEVID) {
+ int ret = wc_CryptoCb_AesGcmEncrypt(aes, out, in, sz, iv, ivSz,
+ authTag, authTagSz, authIn, authInSz);
+ if (ret != CRYPTOCB_UNAVAILABLE)
+ return ret;
+ /* fall-through when unavailable */
+ }
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+ /* if async and byte count above threshold */
+ /* only 12-byte IV is supported in HW */
+ if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES &&
+ sz >= WC_ASYNC_THRESH_AES_GCM && ivSz == GCM_NONCE_MID_SZ) {
+ #if defined(HAVE_CAVIUM)
+ #ifdef HAVE_CAVIUM_V
+ if (authInSz == 20) { /* Nitrox V GCM is only working with 20 byte AAD */
+ return NitroxAesGcmEncrypt(aes, out, in, sz,
+ (const byte*)aes->devKey, aes->keylen, iv, ivSz,
+ authTag, authTagSz, authIn, authInSz);
+ }
+ #endif
+ #elif defined(HAVE_INTEL_QA)
+ return IntelQaSymAesGcmEncrypt(&aes->asyncDev, out, in, sz,
+ (const byte*)aes->devKey, aes->keylen, iv, ivSz,
+ authTag, authTagSz, authIn, authInSz);
+ #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+ if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_GCM_ENCRYPT)) {
+ WC_ASYNC_TEST* testDev = &aes->asyncDev.test;
+ testDev->aes.aes = aes;
+ testDev->aes.out = out;
+ testDev->aes.in = in;
+ testDev->aes.sz = sz;
+ testDev->aes.iv = iv;
+ testDev->aes.ivSz = ivSz;
+ testDev->aes.authTag = authTag;
+ testDev->aes.authTagSz = authTagSz;
+ testDev->aes.authIn = authIn;
+ testDev->aes.authInSz = authInSz;
+ return WC_PENDING_E;
+ }
#endif
- xorbuf(authTag, scratch, authTagSz);
+ }
+#endif /* WOLFSSL_ASYNC_CRYPT */
- return 0;
+#ifdef STM32_CRYPTO_AES_GCM
+ /* The STM standard peripheral library API's doesn't support partial blocks */
+ #ifdef STD_PERI_LIB
+ if (partial == 0)
+ #endif
+ {
+ return wc_AesGcmEncrypt_STM32(
+ aes, out, in, sz, iv, ivSz,
+ authTag, authTagSz, authIn, authInSz);
+ }
+#endif /* STM32_CRYPTO_AES_GCM */
+
+#ifdef WOLFSSL_AESNI
+ #ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_AVX2(intel_flags)) {
+ AES_GCM_encrypt_avx2(in, out, authIn, iv, authTag, sz, authInSz, ivSz,
+ authTagSz, (const byte*)aes->key, aes->rounds);
+ return 0;
+ }
+ else
+ #endif
+ #ifdef HAVE_INTEL_AVX1
+ if (IS_INTEL_AVX1(intel_flags)) {
+ AES_GCM_encrypt_avx1(in, out, authIn, iv, authTag, sz, authInSz, ivSz,
+ authTagSz, (const byte*)aes->key, aes->rounds);
+ return 0;
+ }
+ else
+ #endif
+ if (haveAESNI) {
+ AES_GCM_encrypt(in, out, authIn, iv, authTag, sz, authInSz, ivSz,
+ authTagSz, (const byte*)aes->key, aes->rounds);
+ return 0;
+ }
+ else
+#endif
+ {
+ return AES_GCM_encrypt_C(aes, out, in, sz, iv, ivSz, authTag, authTagSz,
+ authIn, authInSz);
+ }
}
+#endif
+
+/* AES GCM Decrypt */
+#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AESGCM_DECRYPT)
+#ifdef FREESCALE_LTC_AES_GCM
int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
const byte* iv, word32 ivSz,
const byte* authTag, word32 authTagSz,
const byte* authIn, word32 authInSz)
{
+ int ret;
+ word32 keySize;
+ status_t status;
+
+ /* argument checks */
+ /* If the sz is non-zero, both in and out must be set. If sz is 0,
+ * in and out are don't cares, as this is is the GMAC case. */
+ if (aes == NULL || iv == NULL || (sz != 0 && (in == NULL || out == NULL)) ||
+ authTag == NULL || authTagSz > AES_BLOCK_SIZE || authTagSz == 0 ||
+ ivSz == 0) {
+
+ return BAD_FUNC_ARG;
+ }
+
+ ret = wc_AesGetKeySize(aes, &keySize);
+ if (ret != 0) {
+ return ret;
+ }
+
+ status = LTC_AES_DecryptTagGcm(LTC_BASE, in, out, sz, iv, ivSz,
+ authIn, authInSz, (byte*)aes->key, keySize, authTag, authTagSz);
+
+ return (status == kStatus_Success) ? 0 : AES_GCM_AUTH_E;
+}
+
+#else
+
+#ifdef STM32_CRYPTO_AES_GCM
+/* this function supports inline decrypt */
+static int wc_AesGcmDecrypt_STM32(Aes* aes, byte* out,
+ const byte* in, word32 sz,
+ const byte* iv, word32 ivSz,
+ const byte* authTag, word32 authTagSz,
+ const byte* authIn, word32 authInSz)
+{
+ int ret;
+#ifdef WOLFSSL_STM32_CUBEMX
+ CRYP_HandleTypeDef hcryp;
+#else
+ word32 keyCopy[AES_256_KEY_SIZE/sizeof(word32)];
+#endif
+ word32 keySize;
+ int status = HAL_OK;
word32 blocks = sz / AES_BLOCK_SIZE;
word32 partial = sz % AES_BLOCK_SIZE;
- const byte* c = in;
- byte* p = out;
- byte counter[AES_BLOCK_SIZE];
- byte *ctr ;
- byte scratch[AES_BLOCK_SIZE];
-
-#ifdef FREESCALE_MMCAU
- byte* key = (byte*)aes->key;
+ byte tag[AES_BLOCK_SIZE];
+ byte partialBlock[AES_BLOCK_SIZE];
+ byte ctr[AES_BLOCK_SIZE];
+ byte* authInPadded = NULL;
+ int authPadSz;
+
+ ret = wc_AesGetKeySize(aes, &keySize);
+ if (ret != 0)
+ return ret;
+
+#ifdef WOLFSSL_STM32_CUBEMX
+ ret = wc_Stm32_Aes_Init(aes, &hcryp);
+ if (ret != 0)
+ return ret;
#endif
- WOLFSSL_ENTER("AesGcmDecrypt");
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret != 0) {
+ return ret;
+ }
-#ifdef WOLFSSL_PIC32MZ_CRYPT
- ctr = (char *)aes->iv_ce ;
+ XMEMSET(ctr, 0, AES_BLOCK_SIZE);
+ if (ivSz == GCM_NONCE_MID_SZ) {
+ XMEMCPY(ctr, iv, ivSz);
+ ctr[AES_BLOCK_SIZE - 1] = 1;
+ }
+ else {
+ GHASH(aes, NULL, 0, iv, ivSz, ctr, AES_BLOCK_SIZE);
+ }
+ /* Hardware requires counter + 1 */
+ IncrementGcmCounter(ctr);
+
+ if (authInSz == 0 || (authInSz % AES_BLOCK_SIZE) != 0) {
+ /* Need to pad the AAD to a full block with zeros. */
+ authPadSz = ((authInSz / AES_BLOCK_SIZE) + 1) * AES_BLOCK_SIZE;
+ authInPadded = (byte*)XMALLOC(authPadSz, aes->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (authInPadded == NULL) {
+ wolfSSL_CryptHwMutexUnLock();
+ return MEMORY_E;
+ }
+ XMEMSET(authInPadded, 0, authPadSz);
+ XMEMCPY(authInPadded, authIn, authInSz);
+ } else {
+ authPadSz = authInSz;
+ authInPadded = (byte*)authIn;
+ }
+
+#ifdef WOLFSSL_STM32_CUBEMX
+ hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)ctr;
+ hcryp.Init.Header = (STM_CRYPT_TYPE*)authInPadded;
+ hcryp.Init.HeaderSize = authInSz;
+
+#ifdef STM32_CRYPTO_AES_ONLY
+ /* Set the CRYP parameters */
+ hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_GCM_GMAC;
+ hcryp.Init.OperatingMode = CRYP_ALGOMODE_DECRYPT;
+ hcryp.Init.GCMCMACPhase = CRYP_INIT_PHASE;
+ HAL_CRYP_Init(&hcryp);
+
+ /* GCM init phase */
+ status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT);
+ if (status == HAL_OK) {
+ /* GCM header phase */
+ hcryp.Init.GCMCMACPhase = CRYP_HEADER_PHASE;
+ status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT);
+ }
+ if (status == HAL_OK) {
+ /* GCM payload phase - blocks */
+ hcryp.Init.GCMCMACPhase = CRYP_PAYLOAD_PHASE;
+ if (blocks) {
+ status = HAL_CRYPEx_AES_Auth(&hcryp, (byte*)in,
+ (blocks * AES_BLOCK_SIZE), out, STM32_HAL_TIMEOUT);
+ }
+ }
+ if (status == HAL_OK && (partial != 0 || blocks == 0)) {
+ /* GCM payload phase - partial remainder */
+ XMEMSET(partialBlock, 0, sizeof(partialBlock));
+ XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial);
+ status = HAL_CRYPEx_AES_Auth(&hcryp, partialBlock, partial,
+ partialBlock, STM32_HAL_TIMEOUT);
+ XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial);
+ }
+ if (status == HAL_OK) {
+ /* GCM final phase */
+ hcryp.Init.GCMCMACPhase = CRYP_FINAL_PHASE;
+ status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, tag, STM32_HAL_TIMEOUT);
+ }
+#elif defined(STM32_HAL_V2)
+ hcryp.Init.Algorithm = CRYP_AES_GCM;
+ ByteReverseWords((word32*)partialBlock, (word32*)ctr, AES_BLOCK_SIZE);
+ hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)partialBlock;
+ HAL_CRYP_Init(&hcryp);
+
+ /* GCM payload phase - can handle partial blocks */
+ status = HAL_CRYP_Decrypt(&hcryp, (uint32_t*)in,
+ (blocks * AES_BLOCK_SIZE) + partial, (uint32_t*)out, STM32_HAL_TIMEOUT);
+ if (status == HAL_OK) {
+ /* Compute the authTag */
+ status = HAL_CRYPEx_AESGCM_GenerateAuthTAG(&hcryp, (uint32_t*)tag,
+ STM32_HAL_TIMEOUT);
+ }
#else
- ctr = counter ;
+ HAL_CRYP_Init(&hcryp);
+ if (blocks) {
+ /* GCM payload phase - blocks */
+ status = HAL_CRYPEx_AESGCM_Decrypt(&hcryp, (byte*)in,
+ (blocks * AES_BLOCK_SIZE), out, STM32_HAL_TIMEOUT);
+ }
+ if (status == HAL_OK && (partial != 0 || blocks == 0)) {
+ /* GCM payload phase - partial remainder */
+ XMEMSET(partialBlock, 0, sizeof(partialBlock));
+ XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial);
+ status = HAL_CRYPEx_AESGCM_Decrypt(&hcryp, partialBlock, partial,
+ partialBlock, STM32_HAL_TIMEOUT);
+ XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial);
+ }
+ if (status == HAL_OK) {
+ /* Compute the authTag */
+ status = HAL_CRYPEx_AESGCM_Finish(&hcryp, sz, tag, STM32_HAL_TIMEOUT);
+ }
#endif
- XMEMSET(ctr, 0, AES_BLOCK_SIZE);
- XMEMCPY(ctr, iv, ivSz);
- InitGcmCounter(ctr);
+ if (status != HAL_OK)
+ ret = AES_GCM_AUTH_E;
+
+ HAL_CRYP_DeInit(&hcryp);
+
+#else /* STD_PERI_LIB */
+ ByteReverseWords(keyCopy, (word32*)aes->key, aes->keylen);
+
+ /* Input size and auth size need to be the actual sizes, even though
+ * they are not block aligned, because this length (in bits) is used
+ * in the final GHASH. */
+ status = CRYP_AES_GCM(MODE_DECRYPT, (uint8_t*)ctr,
+ (uint8_t*)keyCopy, keySize * 8,
+ (uint8_t*)in, sz,
+ (uint8_t*)authInPadded, authInSz,
+ (uint8_t*)out, tag);
+ if (status != SUCCESS)
+ ret = AES_GCM_AUTH_E;
+#endif /* WOLFSSL_STM32_CUBEMX */
+
+ /* STM32 GCM hardware only supports IV of 12 bytes, so use software for auth */
+ if (sz == 0 || ivSz != GCM_NONCE_MID_SZ) {
+ DecrementGcmCounter(ctr); /* hardware requires +1, so subtract it */
+ GHASH(aes, authIn, authInSz, in, sz, tag, sizeof(tag));
+ wc_AesEncrypt(aes, ctr, partialBlock);
+ xorbuf(tag, partialBlock, sizeof(tag));
+ }
- /* Calculate the authTag again using the received auth data and the
- * cipher text. */
- {
- byte Tprime[AES_BLOCK_SIZE];
- byte EKY0[AES_BLOCK_SIZE];
+ if (ConstantCompare(authTag, tag, authTagSz) != 0) {
+ ret = AES_GCM_AUTH_E;
+ }
- GHASH(aes, authIn, authInSz, in, sz, Tprime, sizeof(Tprime));
- #ifdef FREESCALE_MMCAU
- cau_aes_encrypt(ctr, key, aes->rounds, EKY0);
- #else
- wc_AesEncrypt(aes, ctr, EKY0);
- #endif
- xorbuf(Tprime, EKY0, sizeof(Tprime));
+ /* Free memory if not a multiple of AES_BLOCK_SZ */
+ if (authInPadded != authIn) {
+ XFREE(authInPadded, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ }
- if (ConstantCompare(authTag, Tprime, authTagSz) != 0) {
- return AES_GCM_AUTH_E;
- }
+ wolfSSL_CryptHwMutexUnLock();
+
+ return ret;
+}
+
+#endif /* STM32_CRYPTO_AES_GCM */
+
+#ifdef WOLFSSL_AESNI
+int AES_GCM_decrypt_C(Aes* aes, byte* out, const byte* in, word32 sz,
+ const byte* iv, word32 ivSz,
+ const byte* authTag, word32 authTagSz,
+ const byte* authIn, word32 authInSz);
+#else
+static
+#endif
+int AES_GCM_decrypt_C(Aes* aes, byte* out, const byte* in, word32 sz,
+ const byte* iv, word32 ivSz,
+ const byte* authTag, word32 authTagSz,
+ const byte* authIn, word32 authInSz)
+{
+ int ret = 0;
+ word32 blocks = sz / AES_BLOCK_SIZE;
+ word32 partial = sz % AES_BLOCK_SIZE;
+ const byte* c = in;
+ byte* p = out;
+ byte counter[AES_BLOCK_SIZE];
+ byte initialCounter[AES_BLOCK_SIZE];
+ byte *ctr;
+ byte scratch[AES_BLOCK_SIZE];
+ byte Tprime[AES_BLOCK_SIZE];
+ byte EKY0[AES_BLOCK_SIZE];
+#ifdef OPENSSL_EXTRA
+ word32 aadTemp;
+#endif
+ ctr = counter;
+ XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
+ if (ivSz == GCM_NONCE_MID_SZ) {
+ XMEMCPY(initialCounter, iv, ivSz);
+ initialCounter[AES_BLOCK_SIZE - 1] = 1;
+ }
+ else {
+#ifdef OPENSSL_EXTRA
+ aadTemp = aes->aadLen;
+ aes->aadLen = 0;
+#endif
+ GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE);
+#ifdef OPENSSL_EXTRA
+ aes->aadLen = aadTemp;
+#endif
+ }
+ XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE);
+
+ /* Calc the authTag again using the received auth data and the cipher text */
+ GHASH(aes, authIn, authInSz, in, sz, Tprime, sizeof(Tprime));
+ wc_AesEncrypt(aes, ctr, EKY0);
+ xorbuf(Tprime, EKY0, sizeof(Tprime));
+
+#ifdef OPENSSL_EXTRA
+ if (!out) {
+ /* authenticated, non-confidential data */
+ /* store AAD size for next call */
+ aes->aadLen = authInSz;
+ }
+#endif
+ if (ConstantCompare(authTag, Tprime, authTagSz) != 0) {
+ return AES_GCM_AUTH_E;
}
-#ifdef WOLFSSL_PIC32MZ_CRYPT
- if(blocks)
- wc_AesCrypt(aes, out, in, blocks*AES_BLOCK_SIZE,
- PIC32_DECRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_AES_GCM );
+#if defined(WOLFSSL_PIC32MZ_CRYPT)
+ if (blocks) {
+ /* use initial IV for HW, but don't use it below */
+ XMEMCPY(aes->reg, ctr, AES_BLOCK_SIZE);
+
+ ret = wc_Pic32AesCrypt(
+ aes->key, aes->keylen, aes->reg, AES_BLOCK_SIZE,
+ out, in, (blocks * AES_BLOCK_SIZE),
+ PIC32_DECRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_AES_GCM);
+ if (ret != 0)
+ return ret;
+ }
+ /* process remainder using partial handling */
#endif
+#if defined(HAVE_AES_ECB) && !defined(WOLFSSL_PIC32MZ_CRYPT)
+ /* some hardware acceleration can gain performance from doing AES encryption
+ * of the whole buffer at once */
+ if (c != p && blocks > 0) { /* can not handle inline decryption */
+ while (blocks--) {
+ IncrementGcmCounter(ctr);
+ XMEMCPY(p, ctr, AES_BLOCK_SIZE);
+ p += AES_BLOCK_SIZE;
+ }
+
+ /* reset number of blocks and then do encryption */
+ blocks = sz / AES_BLOCK_SIZE;
+
+ wc_AesEcbEncrypt(aes, out, out, AES_BLOCK_SIZE * blocks);
+ xorbuf(out, c, AES_BLOCK_SIZE * blocks);
+ c += AES_BLOCK_SIZE * blocks;
+ }
+ else
+#endif /* HAVE_AES_ECB && !PIC32MZ */
while (blocks--) {
IncrementGcmCounter(ctr);
- #ifndef WOLFSSL_PIC32MZ_CRYPT
- #ifdef FREESCALE_MMCAU
- cau_aes_encrypt(ctr, key, aes->rounds, scratch);
- #else
- wc_AesEncrypt(aes, ctr, scratch);
- #endif
+ #if !defined(WOLFSSL_PIC32MZ_CRYPT)
+ wc_AesEncrypt(aes, ctr, scratch);
xorbuf(scratch, c, AES_BLOCK_SIZE);
XMEMCPY(p, scratch, AES_BLOCK_SIZE);
- #endif
+ #endif
p += AES_BLOCK_SIZE;
c += AES_BLOCK_SIZE;
}
+
if (partial != 0) {
IncrementGcmCounter(ctr);
- #ifdef FREESCALE_MMCAU
- cau_aes_encrypt(ctr, key, aes->rounds, scratch);
- #else
- wc_AesEncrypt(aes, ctr, scratch);
- #endif
+ wc_AesEncrypt(aes, ctr, scratch);
xorbuf(scratch, c, partial);
XMEMCPY(p, scratch, partial);
}
- return 0;
+
+ return ret;
+}
+
+/* Software AES - GCM Decrypt */
+int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
+ const byte* iv, word32 ivSz,
+ const byte* authTag, word32 authTagSz,
+ const byte* authIn, word32 authInSz)
+{
+#ifdef WOLFSSL_AESNI
+ int res = AES_GCM_AUTH_E;
+#endif
+
+ /* argument checks */
+ /* If the sz is non-zero, both in and out must be set. If sz is 0,
+ * in and out are don't cares, as this is is the GMAC case. */
+ if (aes == NULL || iv == NULL || (sz != 0 && (in == NULL || out == NULL)) ||
+ authTag == NULL || authTagSz > AES_BLOCK_SIZE || authTagSz == 0 ||
+ ivSz == 0) {
+
+ return BAD_FUNC_ARG;
+ }
+
+#ifdef WOLF_CRYPTO_CB
+ if (aes->devId != INVALID_DEVID) {
+ int ret = wc_CryptoCb_AesGcmDecrypt(aes, out, in, sz, iv, ivSz,
+ authTag, authTagSz, authIn, authInSz);
+ if (ret != CRYPTOCB_UNAVAILABLE)
+ return ret;
+ /* fall-through when unavailable */
+ }
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+ /* if async and byte count above threshold */
+ /* only 12-byte IV is supported in HW */
+ if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES &&
+ sz >= WC_ASYNC_THRESH_AES_GCM && ivSz == GCM_NONCE_MID_SZ) {
+ #if defined(HAVE_CAVIUM)
+ #ifdef HAVE_CAVIUM_V
+ if (authInSz == 20) { /* Nitrox V GCM is only working with 20 byte AAD */
+ return NitroxAesGcmDecrypt(aes, out, in, sz,
+ (const byte*)aes->devKey, aes->keylen, iv, ivSz,
+ authTag, authTagSz, authIn, authInSz);
+ }
+ #endif
+ #elif defined(HAVE_INTEL_QA)
+ return IntelQaSymAesGcmDecrypt(&aes->asyncDev, out, in, sz,
+ (const byte*)aes->devKey, aes->keylen, iv, ivSz,
+ authTag, authTagSz, authIn, authInSz);
+ #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+ if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_GCM_DECRYPT)) {
+ WC_ASYNC_TEST* testDev = &aes->asyncDev.test;
+ testDev->aes.aes = aes;
+ testDev->aes.out = out;
+ testDev->aes.in = in;
+ testDev->aes.sz = sz;
+ testDev->aes.iv = iv;
+ testDev->aes.ivSz = ivSz;
+ testDev->aes.authTag = (byte*)authTag;
+ testDev->aes.authTagSz = authTagSz;
+ testDev->aes.authIn = authIn;
+ testDev->aes.authInSz = authInSz;
+ return WC_PENDING_E;
+ }
+ #endif
+ }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+#ifdef STM32_CRYPTO_AES_GCM
+ /* The STM standard peripheral library API's doesn't support partial blocks */
+ #ifdef STD_PERI_LIB
+ if (partial == 0)
+ #endif
+ {
+ return wc_AesGcmDecrypt_STM32(
+ aes, out, in, sz, iv, ivSz,
+ authTag, authTagSz, authIn, authInSz);
+ }
+#endif /* STM32_CRYPTO_AES_GCM */
+
+#ifdef WOLFSSL_AESNI
+ #ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_AVX2(intel_flags)) {
+ AES_GCM_decrypt_avx2(in, out, authIn, iv, authTag, sz, authInSz, ivSz,
+ authTagSz, (byte*)aes->key, aes->rounds, &res);
+ if (res == 0)
+ return AES_GCM_AUTH_E;
+ return 0;
+ }
+ else
+ #endif
+ #ifdef HAVE_INTEL_AVX1
+ if (IS_INTEL_AVX1(intel_flags)) {
+ AES_GCM_decrypt_avx1(in, out, authIn, iv, authTag, sz, authInSz, ivSz,
+ authTagSz, (byte*)aes->key, aes->rounds, &res);
+ if (res == 0)
+ return AES_GCM_AUTH_E;
+ return 0;
+ }
+ else
+ #endif
+ if (haveAESNI) {
+ AES_GCM_decrypt(in, out, authIn, iv, authTag, sz, authInSz, ivSz,
+ authTagSz, (byte*)aes->key, aes->rounds, &res);
+ if (res == 0)
+ return AES_GCM_AUTH_E;
+ return 0;
+ }
+ else
+#endif
+ {
+ return AES_GCM_decrypt_C(aes, out, in, sz, iv, ivSz, authTag, authTagSz,
+ authIn, authInSz);
+ }
+}
+#endif
+#endif /* HAVE_AES_DECRYPT || HAVE_AESGCM_DECRYPT */
+#endif /* WOLFSSL_XILINX_CRYPT */
+#endif /* end of block for AESGCM implementation selection */
+
+
+/* Common to all, abstract functions that build off of lower level AESGCM
+ * functions */
+#ifndef WC_NO_RNG
+
+int wc_AesGcmSetExtIV(Aes* aes, const byte* iv, word32 ivSz)
+{
+ int ret = 0;
+
+ if (aes == NULL || iv == NULL ||
+ (ivSz != GCM_NONCE_MIN_SZ && ivSz != GCM_NONCE_MID_SZ &&
+ ivSz != GCM_NONCE_MAX_SZ)) {
+
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (ret == 0) {
+ XMEMCPY((byte*)aes->reg, iv, ivSz);
+
+ /* If the IV is 96, allow for a 2^64 invocation counter.
+ * For any other size for the nonce, limit the invocation
+ * counter to 32-bits. (SP 800-38D 8.3) */
+ aes->invokeCtr[0] = 0;
+ aes->invokeCtr[1] = (ivSz == GCM_NONCE_MID_SZ) ? 0 : 0xFFFFFFFF;
+ aes->nonceSz = ivSz;
+ }
+
+ return ret;
+}
+
+
+int wc_AesGcmSetIV(Aes* aes, word32 ivSz,
+ const byte* ivFixed, word32 ivFixedSz,
+ WC_RNG* rng)
+{
+ int ret = 0;
+
+ if (aes == NULL || rng == NULL ||
+ (ivSz != GCM_NONCE_MIN_SZ && ivSz != GCM_NONCE_MID_SZ &&
+ ivSz != GCM_NONCE_MAX_SZ) ||
+ (ivFixed == NULL && ivFixedSz != 0) ||
+ (ivFixed != NULL && ivFixedSz != AES_IV_FIXED_SZ)) {
+
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (ret == 0) {
+ byte* iv = (byte*)aes->reg;
+
+ if (ivFixedSz)
+ XMEMCPY(iv, ivFixed, ivFixedSz);
+
+ ret = wc_RNG_GenerateBlock(rng, iv + ivFixedSz, ivSz - ivFixedSz);
+ }
+
+ if (ret == 0) {
+ /* If the IV is 96, allow for a 2^64 invocation counter.
+ * For any other size for the nonce, limit the invocation
+ * counter to 32-bits. (SP 800-38D 8.3) */
+ aes->invokeCtr[0] = 0;
+ aes->invokeCtr[1] = (ivSz == GCM_NONCE_MID_SZ) ? 0 : 0xFFFFFFFF;
+ aes->nonceSz = ivSz;
+ }
+
+ return ret;
+}
+
+
+int wc_AesGcmEncrypt_ex(Aes* aes, byte* out, const byte* in, word32 sz,
+ byte* ivOut, word32 ivOutSz,
+ byte* authTag, word32 authTagSz,
+ const byte* authIn, word32 authInSz)
+{
+ int ret = 0;
+
+ if (aes == NULL || (sz != 0 && (in == NULL || out == NULL)) ||
+ ivOut == NULL || ivOutSz != aes->nonceSz ||
+ (authIn == NULL && authInSz != 0)) {
+
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (ret == 0) {
+ aes->invokeCtr[0]++;
+ if (aes->invokeCtr[0] == 0) {
+ aes->invokeCtr[1]++;
+ if (aes->invokeCtr[1] == 0)
+ ret = AES_GCM_OVERFLOW_E;
+ }
+ }
+
+ if (ret == 0) {
+ XMEMCPY(ivOut, aes->reg, ivOutSz);
+ ret = wc_AesGcmEncrypt(aes, out, in, sz,
+ (byte*)aes->reg, ivOutSz,
+ authTag, authTagSz,
+ authIn, authInSz);
+ if (ret == 0)
+ IncCtr((byte*)aes->reg, ivOutSz);
+ }
+
+ return ret;
+}
+
+int wc_Gmac(const byte* key, word32 keySz, byte* iv, word32 ivSz,
+ const byte* authIn, word32 authInSz,
+ byte* authTag, word32 authTagSz, WC_RNG* rng)
+{
+ Aes aes;
+ int ret;
+
+ if (key == NULL || iv == NULL || (authIn == NULL && authInSz != 0) ||
+ authTag == NULL || authTagSz == 0 || rng == NULL) {
+
+ return BAD_FUNC_ARG;
+ }
+
+ ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_AesGcmSetKey(&aes, key, keySz);
+ if (ret == 0)
+ ret = wc_AesGcmSetIV(&aes, ivSz, NULL, 0, rng);
+ if (ret == 0)
+ ret = wc_AesGcmEncrypt_ex(&aes, NULL, NULL, 0, iv, ivSz,
+ authTag, authTagSz, authIn, authInSz);
+ wc_AesFree(&aes);
+ }
+ ForceZero(&aes, sizeof(aes));
+
+ return ret;
}
+int wc_GmacVerify(const byte* key, word32 keySz,
+ const byte* iv, word32 ivSz,
+ const byte* authIn, word32 authInSz,
+ const byte* authTag, word32 authTagSz)
+{
+ int ret;
+#ifndef NO_AES_DECRYPT
+ Aes aes;
+
+ if (key == NULL || iv == NULL || (authIn == NULL && authInSz != 0) ||
+ authTag == NULL || authTagSz == 0 || authTagSz > AES_BLOCK_SIZE) {
+
+ return BAD_FUNC_ARG;
+ }
+
+ ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_AesGcmSetKey(&aes, key, keySz);
+ if (ret == 0)
+ ret = wc_AesGcmDecrypt(&aes, NULL, NULL, 0, iv, ivSz,
+ authTag, authTagSz, authIn, authInSz);
+ wc_AesFree(&aes);
+ }
+ ForceZero(&aes, sizeof(aes));
+#else
+ (void)key;
+ (void)keySz;
+ (void)iv;
+ (void)ivSz;
+ (void)authIn;
+ (void)authInSz;
+ (void)authTag;
+ (void)authTagSz;
+ ret = NOT_COMPILED_IN;
+#endif
+ return ret;
+}
+
+#endif /* WC_NO_RNG */
WOLFSSL_API int wc_GmacSetKey(Gmac* gmac, const byte* key, word32 len)
{
+ if (gmac == NULL || key == NULL) {
+ return BAD_FUNC_ARG;
+ }
return wc_AesGcmSetKey(&gmac->aes, key, len);
}
@@ -3472,69 +6962,114 @@ WOLFSSL_API int wc_GmacUpdate(Gmac* gmac, const byte* iv, word32 ivSz,
#ifdef HAVE_AESCCM
-#ifdef STM32F2_CRYPTO
- #error "STM32F2 crypto doesn't currently support AES-CCM mode"
+int wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz)
+{
+ if (!((keySz == 16) || (keySz == 24) || (keySz == 32)))
+ return BAD_FUNC_ARG;
+
+ return wc_AesSetKey(aes, key, keySz, NULL, AES_ENCRYPTION);
+}
+
+#ifdef WOLFSSL_ARMASM
+ /* implementation located in wolfcrypt/src/port/arm/armv8-aes.c */
#elif defined(HAVE_COLDFIRE_SEC)
#error "Coldfire SEC doesn't currently support AES-CCM mode"
-#elif defined(WOLFSSL_PIC32MZ_CRYPT)
- #error "PIC32MZ doesn't currently support AES-CCM mode"
+#elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)
+ /* implemented in wolfcrypt/src/port/caam_aes.c */
-#endif
+#elif defined(FREESCALE_LTC)
-void wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz)
+/* return 0 on success */
+int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
+ const byte* nonce, word32 nonceSz,
+ byte* authTag, word32 authTagSz,
+ const byte* authIn, word32 authInSz)
{
- byte nonce[AES_BLOCK_SIZE];
+ byte *key;
+ uint32_t keySize;
+ status_t status;
- if (!((keySz == 16) || (keySz == 24) || (keySz == 32)))
- return;
+ /* sanity check on arguments */
+ if (aes == NULL || out == NULL || in == NULL || nonce == NULL
+ || authTag == NULL || nonceSz < 7 || nonceSz > 13)
+ return BAD_FUNC_ARG;
+
+ key = (byte*)aes->key;
+
+ status = wc_AesGetKeySize(aes, &keySize);
+ if (status != 0) {
+ return status;
+ }
- XMEMSET(nonce, 0, sizeof(nonce));
- wc_AesSetKey(aes, key, keySz, nonce, AES_ENCRYPTION);
+ status = LTC_AES_EncryptTagCcm(LTC_BASE, in, out, inSz,
+ nonce, nonceSz, authIn, authInSz, key, keySize, authTag, authTagSz);
+
+ return (kStatus_Success == status) ? 0 : BAD_FUNC_ARG;
}
+#ifdef HAVE_AES_DECRYPT
+int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
+ const byte* nonce, word32 nonceSz,
+ const byte* authTag, word32 authTagSz,
+ const byte* authIn, word32 authInSz)
+{
+ byte *key;
+ uint32_t keySize;
+ status_t status;
+
+ /* sanity check on arguments */
+ if (aes == NULL || out == NULL || in == NULL || nonce == NULL
+ || authTag == NULL || nonceSz < 7 || nonceSz > 13)
+ return BAD_FUNC_ARG;
+
+ key = (byte*)aes->key;
+ status = wc_AesGetKeySize(aes, &keySize);
+ if (status != 0) {
+ return status;
+ }
+
+ status = LTC_AES_DecryptTagCcm(LTC_BASE, in, out, inSz,
+ nonce, nonceSz, authIn, authInSz, key, keySize, authTag, authTagSz);
+
+ if (status == kStatus_Success) {
+ return 0;
+ }
+ else {
+ XMEMSET(out, 0, inSz);
+ return AES_CCM_AUTH_E;
+ }
+}
+#endif /* HAVE_AES_DECRYPT */
+
+#else
+
+/* Software CCM */
static void roll_x(Aes* aes, const byte* in, word32 inSz, byte* out)
{
- #ifdef FREESCALE_MMCAU
- byte* key = (byte*)aes->key;
- #endif
-
/* process the bulk of the data */
while (inSz >= AES_BLOCK_SIZE) {
xorbuf(out, in, AES_BLOCK_SIZE);
in += AES_BLOCK_SIZE;
inSz -= AES_BLOCK_SIZE;
- #ifdef FREESCALE_MMCAU
- cau_aes_encrypt(out, key, aes->rounds, out);
- #else
- wc_AesEncrypt(aes, out, out);
- #endif
+ wc_AesEncrypt(aes, out, out);
}
/* process remainder of the data */
if (inSz > 0) {
xorbuf(out, in, inSz);
- #ifdef FREESCALE_MMCAU
- cau_aes_encrypt(out, key, aes->rounds, out);
- #else
- wc_AesEncrypt(aes, out, out);
- #endif
+ wc_AesEncrypt(aes, out, out);
}
}
-
static void roll_auth(Aes* aes, const byte* in, word32 inSz, byte* out)
{
word32 authLenSz;
word32 remainder;
- #ifdef FREESCALE_MMCAU
- byte* key = (byte*)aes->key;
- #endif
-
/* encode the length in */
if (inSz <= 0xFEFF) {
authLenSz = 2;
@@ -3568,18 +7103,14 @@ static void roll_auth(Aes* aes, const byte* in, word32 inSz, byte* out)
xorbuf(out + authLenSz, in, inSz);
inSz = 0;
}
- #ifdef FREESCALE_MMCAU
- cau_aes_encrypt(out, key, aes->rounds, out);
- #else
- wc_AesEncrypt(aes, out, out);
- #endif
+ wc_AesEncrypt(aes, out, out);
if (inSz > 0)
roll_x(aes, in, inSz, out);
}
-static INLINE void AesCcmCtrInc(byte* B, word32 lenSz)
+static WC_INLINE void AesCcmCtrInc(byte* B, word32 lenSz)
{
word32 i;
@@ -3588,34 +7119,85 @@ static INLINE void AesCcmCtrInc(byte* B, word32 lenSz)
}
}
+#ifdef WOLFSSL_AESNI
+static WC_INLINE void AesCcmCtrIncSet4(byte* B, word32 lenSz)
+{
+ word32 i;
+
+ /* B+1 = B */
+ XMEMCPY(B + AES_BLOCK_SIZE * 1, B, AES_BLOCK_SIZE);
+ /* B+2,B+3 = B,B+1 */
+ XMEMCPY(B + AES_BLOCK_SIZE * 2, B, AES_BLOCK_SIZE * 2);
+
+ for (i = 0; i < lenSz; i++) {
+ if (++B[AES_BLOCK_SIZE * 1 - 1 - i] != 0) break;
+ }
+ B[AES_BLOCK_SIZE * 2 - 1] += 2;
+ if (B[AES_BLOCK_SIZE * 2 - 1] < 2) {
+ for (i = 1; i < lenSz; i++) {
+ if (++B[AES_BLOCK_SIZE * 2 - 1 - i] != 0) break;
+ }
+ }
+ B[AES_BLOCK_SIZE * 3 - 1] += 3;
+ if (B[AES_BLOCK_SIZE * 3 - 1] < 3) {
+ for (i = 1; i < lenSz; i++) {
+ if (++B[AES_BLOCK_SIZE * 3 - 1 - i] != 0) break;
+ }
+ }
+}
+
+static WC_INLINE void AesCcmCtrInc4(byte* B, word32 lenSz)
+{
+ word32 i;
-void wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
+ B[AES_BLOCK_SIZE - 1] += 4;
+ if (B[AES_BLOCK_SIZE - 1] < 4) {
+ for (i = 1; i < lenSz; i++) {
+ if (++B[AES_BLOCK_SIZE - 1 - i] != 0) break;
+ }
+ }
+}
+#endif
+
+/* Software AES - CCM Encrypt */
+/* return 0 on success */
+int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
const byte* nonce, word32 nonceSz,
byte* authTag, word32 authTagSz,
const byte* authIn, word32 authInSz)
{
+#ifndef WOLFSSL_AESNI
byte A[AES_BLOCK_SIZE];
byte B[AES_BLOCK_SIZE];
+#else
+ ALIGN128 byte A[AES_BLOCK_SIZE * 4];
+ ALIGN128 byte B[AES_BLOCK_SIZE * 4];
+#endif
byte lenSz;
word32 i;
+ byte mask = 0xFF;
+ const word32 wordSz = (word32)sizeof(word32);
- #ifdef FREESCALE_MMCAU
- byte* key = (byte*)aes->key;
- #endif
+ /* sanity check on arguments */
+ if (aes == NULL || out == NULL || in == NULL || nonce == NULL
+ || authTag == NULL || nonceSz < 7 || nonceSz > 13 ||
+ authTagSz > AES_BLOCK_SIZE)
+ return BAD_FUNC_ARG;
+ XMEMSET(A, 0, sizeof(A));
XMEMCPY(B+1, nonce, nonceSz);
lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz;
B[0] = (authInSz > 0 ? 64 : 0)
+ (8 * (((byte)authTagSz - 2) / 2))
+ (lenSz - 1);
- for (i = 0; i < lenSz; i++)
- B[AES_BLOCK_SIZE - 1 - i] = (inSz >> (8 * i)) & 0xFF;
+ for (i = 0; i < lenSz; i++) {
+ if (mask && i >= wordSz)
+ mask = 0x00;
+ B[AES_BLOCK_SIZE - 1 - i] = (inSz >> ((8 * i) & mask)) & mask;
+ }
+
+ wc_AesEncrypt(aes, B, A);
- #ifdef FREESCALE_MMCAU
- cau_aes_encrypt(B, key, aes->rounds, A);
- #else
- wc_AesEncrypt(aes, B, A);
- #endif
if (authInSz > 0)
roll_auth(aes, authIn, authInSz, A);
if (inSz > 0)
@@ -3625,20 +7207,32 @@ void wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
B[0] = lenSz - 1;
for (i = 0; i < lenSz; i++)
B[AES_BLOCK_SIZE - 1 - i] = 0;
- #ifdef FREESCALE_MMCAU
- cau_aes_encrypt(B, key, aes->rounds, A);
- #else
- wc_AesEncrypt(aes, B, A);
- #endif
+ wc_AesEncrypt(aes, B, A);
xorbuf(authTag, A, authTagSz);
B[15] = 1;
+#ifdef WOLFSSL_AESNI
+ if (haveAESNI && aes->use_aesni) {
+ while (inSz >= AES_BLOCK_SIZE * 4) {
+ AesCcmCtrIncSet4(B, lenSz);
+
+ AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key,
+ aes->rounds);
+ xorbuf(A, in, AES_BLOCK_SIZE * 4);
+ XMEMCPY(out, A, AES_BLOCK_SIZE * 4);
+
+ inSz -= AES_BLOCK_SIZE * 4;
+ in += AES_BLOCK_SIZE * 4;
+ out += AES_BLOCK_SIZE * 4;
+
+ if (inSz < AES_BLOCK_SIZE * 4) {
+ AesCcmCtrInc4(B, lenSz);
+ }
+ }
+ }
+#endif
while (inSz >= AES_BLOCK_SIZE) {
- #ifdef FREESCALE_MMCAU
- cau_aes_encrypt(B, key, aes->rounds, A);
- #else
- wc_AesEncrypt(aes, B, A);
- #endif
+ wc_AesEncrypt(aes, B, A);
xorbuf(A, in, AES_BLOCK_SIZE);
XMEMCPY(out, A, AES_BLOCK_SIZE);
@@ -3648,35 +7242,43 @@ void wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
out += AES_BLOCK_SIZE;
}
if (inSz > 0) {
- #ifdef FREESCALE_MMCAU
- cau_aes_encrypt(B, key, aes->rounds, A);
- #else
- wc_AesEncrypt(aes, B, A);
- #endif
+ wc_AesEncrypt(aes, B, A);
xorbuf(A, in, inSz);
XMEMCPY(out, A, inSz);
}
ForceZero(A, AES_BLOCK_SIZE);
ForceZero(B, AES_BLOCK_SIZE);
-}
+ return 0;
+}
+#ifdef HAVE_AES_DECRYPT
+/* Software AES - CCM Decrypt */
int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
const byte* nonce, word32 nonceSz,
const byte* authTag, word32 authTagSz,
const byte* authIn, word32 authInSz)
{
+#ifndef WOLFSSL_AESNI
byte A[AES_BLOCK_SIZE];
byte B[AES_BLOCK_SIZE];
+#else
+ ALIGN128 byte B[AES_BLOCK_SIZE * 4];
+ ALIGN128 byte A[AES_BLOCK_SIZE * 4];
+#endif
byte* o;
byte lenSz;
word32 i, oSz;
int result = 0;
+ byte mask = 0xFF;
+ const word32 wordSz = (word32)sizeof(word32);
- #ifdef FREESCALE_MMCAU
- byte* key = (byte*)aes->key;
- #endif
+ /* sanity check on arguments */
+ if (aes == NULL || out == NULL || in == NULL || nonce == NULL
+ || authTag == NULL || nonceSz < 7 || nonceSz > 13 ||
+ authTagSz > AES_BLOCK_SIZE)
+ return BAD_FUNC_ARG;
o = out;
oSz = inSz;
@@ -3688,12 +7290,28 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
B[AES_BLOCK_SIZE - 1 - i] = 0;
B[15] = 1;
+#ifdef WOLFSSL_AESNI
+ if (haveAESNI && aes->use_aesni) {
+ while (oSz >= AES_BLOCK_SIZE * 4) {
+ AesCcmCtrIncSet4(B, lenSz);
+
+ AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key,
+ aes->rounds);
+ xorbuf(A, in, AES_BLOCK_SIZE * 4);
+ XMEMCPY(o, A, AES_BLOCK_SIZE * 4);
+
+ oSz -= AES_BLOCK_SIZE * 4;
+ in += AES_BLOCK_SIZE * 4;
+ o += AES_BLOCK_SIZE * 4;
+
+ if (oSz < AES_BLOCK_SIZE * 4) {
+ AesCcmCtrInc4(B, lenSz);
+ }
+ }
+ }
+#endif
while (oSz >= AES_BLOCK_SIZE) {
- #ifdef FREESCALE_MMCAU
- cau_aes_encrypt(B, key, aes->rounds, A);
- #else
- wc_AesEncrypt(aes, B, A);
- #endif
+ wc_AesEncrypt(aes, B, A);
xorbuf(A, in, AES_BLOCK_SIZE);
XMEMCPY(o, A, AES_BLOCK_SIZE);
@@ -3703,22 +7321,14 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
o += AES_BLOCK_SIZE;
}
if (inSz > 0) {
- #ifdef FREESCALE_MMCAU
- cau_aes_encrypt(B, key, aes->rounds, A);
- #else
- wc_AesEncrypt(aes, B, A);
- #endif
+ wc_AesEncrypt(aes, B, A);
xorbuf(A, in, oSz);
XMEMCPY(o, A, oSz);
}
for (i = 0; i < lenSz; i++)
B[AES_BLOCK_SIZE - 1 - i] = 0;
- #ifdef FREESCALE_MMCAU
- cau_aes_encrypt(B, key, aes->rounds, A);
- #else
- wc_AesEncrypt(aes, B, A);
- #endif
+ wc_AesEncrypt(aes, B, A);
o = out;
oSz = inSz;
@@ -3726,14 +7336,14 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
B[0] = (authInSz > 0 ? 64 : 0)
+ (8 * (((byte)authTagSz - 2) / 2))
+ (lenSz - 1);
- for (i = 0; i < lenSz; i++)
- B[AES_BLOCK_SIZE - 1 - i] = (inSz >> (8 * i)) & 0xFF;
+ for (i = 0; i < lenSz; i++) {
+ if (mask && i >= wordSz)
+ mask = 0x00;
+ B[AES_BLOCK_SIZE - 1 - i] = (inSz >> ((8 * i) & mask)) & mask;
+ }
+
+ wc_AesEncrypt(aes, B, A);
- #ifdef FREESCALE_MMCAU
- cau_aes_encrypt(B, key, aes->rounds, A);
- #else
- wc_AesEncrypt(aes, B, A);
- #endif
if (authInSz > 0)
roll_auth(aes, authIn, authInSz, A);
if (inSz > 0)
@@ -3742,11 +7352,7 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
B[0] = lenSz - 1;
for (i = 0; i < lenSz; i++)
B[AES_BLOCK_SIZE - 1 - i] = 0;
- #ifdef FREESCALE_MMCAU
- cau_aes_encrypt(B, key, aes->rounds, B);
- #else
- wc_AesEncrypt(aes, B, B);
- #endif
+ wc_AesEncrypt(aes, B, B);
xorbuf(A, B, authTagSz);
if (ConstantCompare(A, authTag, authTagSz) != 0) {
@@ -3764,134 +7370,1335 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
return result;
}
-#endif /* HAVE_AESCCM */
+#endif /* HAVE_AES_DECRYPT */
+#endif /* software CCM */
+/* abstract functions that call lower level AESCCM functions */
+#ifndef WC_NO_RNG
-#ifdef HAVE_CAVIUM
+int wc_AesCcmSetNonce(Aes* aes, const byte* nonce, word32 nonceSz)
+{
+ int ret = 0;
+
+ if (aes == NULL || nonce == NULL ||
+ nonceSz < CCM_NONCE_MIN_SZ || nonceSz > CCM_NONCE_MAX_SZ) {
-#include <wolfssl/ctaocrypt/logging.h>
-#include "cavium_common.h"
+ ret = BAD_FUNC_ARG;
+ }
-/* Initiliaze Aes for use with Nitrox device */
-int wc_AesInitCavium(Aes* aes, int devId)
+ if (ret == 0) {
+ XMEMCPY(aes->reg, nonce, nonceSz);
+ aes->nonceSz = nonceSz;
+
+ /* Invocation counter should be 2^61 */
+ aes->invokeCtr[0] = 0;
+ aes->invokeCtr[1] = 0xE0000000;
+ }
+
+ return ret;
+}
+
+
+int wc_AesCcmEncrypt_ex(Aes* aes, byte* out, const byte* in, word32 sz,
+ byte* ivOut, word32 ivOutSz,
+ byte* authTag, word32 authTagSz,
+ const byte* authIn, word32 authInSz)
{
+ int ret = 0;
+
+ if (aes == NULL || out == NULL ||
+ (in == NULL && sz != 0) ||
+ ivOut == NULL ||
+ (authIn == NULL && authInSz != 0) ||
+ (ivOutSz != aes->nonceSz)) {
+
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (ret == 0) {
+ aes->invokeCtr[0]++;
+ if (aes->invokeCtr[0] == 0) {
+ aes->invokeCtr[1]++;
+ if (aes->invokeCtr[1] == 0)
+ ret = AES_CCM_OVERFLOW_E;
+ }
+ }
+
+ if (ret == 0) {
+ ret = wc_AesCcmEncrypt(aes, out, in, sz,
+ (byte*)aes->reg, aes->nonceSz,
+ authTag, authTagSz,
+ authIn, authInSz);
+ if (ret == 0) {
+ XMEMCPY(ivOut, aes->reg, aes->nonceSz);
+ IncCtr((byte*)aes->reg, aes->nonceSz);
+ }
+ }
+
+ return ret;
+}
+
+#endif /* WC_NO_RNG */
+
+#endif /* HAVE_AESCCM */
+
+
+/* Initialize Aes for use with async hardware */
+int wc_AesInit(Aes* aes, void* heap, int devId)
+{
+ int ret = 0;
+
if (aes == NULL)
- return -1;
+ return BAD_FUNC_ARG;
- if (CspAllocContext(CONTEXT_SSL, &aes->contextHandle, devId) != 0)
- return -1;
+ aes->heap = heap;
+#ifdef WOLF_CRYPTO_CB
aes->devId = devId;
- aes->magic = WOLFSSL_AES_CAVIUM_MAGIC;
-
- return 0;
+ aes->devCtx = NULL;
+#else
+ (void)devId;
+#endif
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+ ret = wolfAsync_DevCtxInit(&aes->asyncDev, WOLFSSL_ASYNC_MARKER_AES,
+ aes->heap, devId);
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+#ifdef WOLFSSL_AFALG
+ aes->alFd = -1;
+ aes->rdFd = -1;
+#endif
+#if defined(WOLFSSL_DEVCRYPTO) && \
+ (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC))
+ aes->ctx.cfd = -1;
+#endif
+#if defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES)
+ XMEMSET(&aes->ctx, 0, sizeof(aes->ctx));
+#endif
+#ifdef HAVE_AESGCM
+#ifdef OPENSSL_EXTRA
+ XMEMSET(aes->aadH, 0, sizeof(aes->aadH));
+ aes->aadLen = 0;
+#endif
+#endif
+ return ret;
}
+#ifdef HAVE_PKCS11
+int wc_AesInit_Id(Aes* aes, unsigned char* id, int len, void* heap, int devId)
+{
+ int ret = 0;
-/* Free Aes from use with Nitrox device */
-void wc_AesFreeCavium(Aes* aes)
+ if (aes == NULL)
+ ret = BAD_FUNC_ARG;
+ if (ret == 0 && (len < 0 || len > AES_MAX_ID_LEN))
+ ret = BUFFER_E;
+
+ if (ret == 0)
+ ret = wc_AesInit(aes, heap, devId);
+ if (ret == 0) {
+ XMEMCPY(aes->id, id, len);
+ aes->idLen = len;
+ }
+
+ return ret;
+}
+#endif
+
+/* Free Aes from use with async hardware */
+void wc_AesFree(Aes* aes)
{
if (aes == NULL)
return;
- if (aes->magic != WOLFSSL_AES_CAVIUM_MAGIC)
- return;
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+ wolfAsync_DevCtxFree(&aes->asyncDev, WOLFSSL_ASYNC_MARKER_AES);
+#endif /* WOLFSSL_ASYNC_CRYPT */
+#if defined(WOLFSSL_AFALG) || defined(WOLFSSL_AFALG_XILINX_AES)
+ if (aes->rdFd > 0) { /* negative is error case */
+ close(aes->rdFd);
+ }
+ if (aes->alFd > 0) {
+ close(aes->alFd);
+ }
+#endif /* WOLFSSL_AFALG */
+#if defined(WOLFSSL_DEVCRYPTO) && \
+ (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC))
+ wc_DevCryptoFree(&aes->ctx);
+#endif
+#if defined(WOLF_CRYPTO_CB) || (defined(WOLFSSL_DEVCRYPTO) && \
+ (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC))) || \
+ (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES))
+ ForceZero((byte*)aes->devKey, AES_MAX_KEY_SIZE/WOLFSSL_BIT_SIZE);
+#endif
+}
+
+
+int wc_AesGetKeySize(Aes* aes, word32* keySize)
+{
+ int ret = 0;
+
+ if (aes == NULL || keySize == NULL) {
+ return BAD_FUNC_ARG;
+ }
+#if defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES)
+ *keySize = aes->ctx.key.keySize;
+ return ret;
+#endif
+ switch (aes->rounds) {
+#ifdef WOLFSSL_AES_128
+ case 10:
+ *keySize = 16;
+ break;
+#endif
+#ifdef WOLFSSL_AES_192
+ case 12:
+ *keySize = 24;
+ break;
+#endif
+#ifdef WOLFSSL_AES_256
+ case 14:
+ *keySize = 32;
+ break;
+#endif
+ default:
+ *keySize = 0;
+ ret = BAD_FUNC_ARG;
+ }
+
+ return ret;
+}
+
+#endif /* !WOLFSSL_TI_CRYPT */
+
+#ifdef HAVE_AES_ECB
+#if defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)
+ /* implemented in wolfcrypt/src/port/caam/caam_aes.c */
- CspFreeContext(CONTEXT_SSL, aes->contextHandle, aes->devId);
- aes->magic = 0;
+#elif defined(WOLFSSL_AFALG)
+ /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */
+
+#elif defined(WOLFSSL_DEVCRYPTO_AES)
+ /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */
+
+#elif defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES)
+
+/* Software AES - ECB */
+int wc_AesEcbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+{
+ if ((in == NULL) || (out == NULL) || (aes == NULL))
+ return BAD_FUNC_ARG;
+
+ return AES_ECB_encrypt(aes, in, out, sz);
}
-static int wc_AesCaviumSetKey(Aes* aes, const byte* key, word32 length,
- const byte* iv)
+int wc_AesEcbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
{
- if (aes == NULL)
- return -1;
+ if ((in == NULL) || (out == NULL) || (aes == NULL))
+ return BAD_FUNC_ARG;
- XMEMCPY(aes->key, key, length); /* key still holds key, iv still in reg */
- if (length == 16)
- aes->type = AES_128;
- else if (length == 24)
- aes->type = AES_192;
- else if (length == 32)
- aes->type = AES_256;
+ return AES_ECB_decrypt(aes, in, out, sz);
+}
- return wc_AesSetIV(aes, iv);
+#else
+
+/* Software AES - ECB */
+int wc_AesEcbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+{
+ word32 blocks = sz / AES_BLOCK_SIZE;
+
+ if ((in == NULL) || (out == NULL) || (aes == NULL))
+ return BAD_FUNC_ARG;
+ while (blocks>0) {
+ wc_AesEncryptDirect(aes, out, in);
+ out += AES_BLOCK_SIZE;
+ in += AES_BLOCK_SIZE;
+ sz -= AES_BLOCK_SIZE;
+ blocks--;
+ }
+ return 0;
}
-static int AesCaviumCbcEncrypt(Aes* aes, byte* out, const byte* in,
- word32 length)
+int wc_AesEcbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
{
- wolfssl_word offset = 0;
- word32 requestId;
+ word32 blocks = sz / AES_BLOCK_SIZE;
- while (length > WOLFSSL_MAX_16BIT) {
- word16 slen = (word16)WOLFSSL_MAX_16BIT;
- if (CspEncryptAes(CAVIUM_BLOCKING, aes->contextHandle, CAVIUM_NO_UPDATE,
- aes->type, slen, (byte*)in + offset, out + offset,
- (byte*)aes->reg, (byte*)aes->key, &requestId,
- aes->devId) != 0) {
- WOLFSSL_MSG("Bad Cavium Aes Encrypt");
- return -1;
+ if ((in == NULL) || (out == NULL) || (aes == NULL))
+ return BAD_FUNC_ARG;
+ while (blocks>0) {
+ wc_AesDecryptDirect(aes, out, in);
+ out += AES_BLOCK_SIZE;
+ in += AES_BLOCK_SIZE;
+ sz -= AES_BLOCK_SIZE;
+ blocks--;
+ }
+ return 0;
+}
+#endif
+#endif /* HAVE_AES_ECB */
+
+#if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_OFB)
+/* Feedback AES mode
+ *
+ * aes structure holding key to use for encryption
+ * out buffer to hold result of encryption (must be at least as large as input
+ * buffer)
+ * in buffer to encrypt
+ * sz size of input buffer
+ * mode flag to specify AES mode
+ *
+ * returns 0 on success and negative error values on failure
+ */
+/* Software AES - CFB Encrypt */
+static int wc_AesFeedbackEncrypt(Aes* aes, byte* out, const byte* in,
+ word32 sz, byte mode)
+{
+ byte* tmp = NULL;
+#ifdef WOLFSSL_AES_CFB
+ byte* reg = NULL;
+#endif
+
+ if (aes == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+#ifdef WOLFSSL_AES_CFB
+ if (aes->left && sz) {
+ reg = (byte*)aes->reg + AES_BLOCK_SIZE - aes->left;
+ }
+#endif
+
+ /* consume any unused bytes left in aes->tmp */
+ tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left;
+ while (aes->left && sz) {
+ *(out) = *(in++) ^ *(tmp++);
+ #ifdef WOLFSSL_AES_CFB
+ if (mode == AES_CFB_MODE) {
+ *(reg++) = *out;
}
- length -= WOLFSSL_MAX_16BIT;
- offset += WOLFSSL_MAX_16BIT;
- XMEMCPY(aes->reg, out + offset - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ #endif
+ out++;
+ aes->left--;
+ sz--;
}
- if (length) {
- word16 slen = (word16)length;
- if (CspEncryptAes(CAVIUM_BLOCKING, aes->contextHandle, CAVIUM_NO_UPDATE,
- aes->type, slen, (byte*)in + offset, out + offset,
- (byte*)aes->reg, (byte*)aes->key, &requestId,
- aes->devId) != 0) {
- WOLFSSL_MSG("Bad Cavium Aes Encrypt");
- return -1;
+
+ while (sz >= AES_BLOCK_SIZE) {
+ /* Using aes->tmp here for inline case i.e. in=out */
+ wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg);
+ #ifdef WOLFSSL_AES_OFB
+ if (mode == AES_OFB_MODE) {
+ XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
}
- XMEMCPY(aes->reg, out + offset+length - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ #endif
+ xorbuf((byte*)aes->tmp, in, AES_BLOCK_SIZE);
+ #ifdef WOLFSSL_AES_CFB
+ if (mode == AES_CFB_MODE) {
+ XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
+ }
+ #endif
+ XMEMCPY(out, aes->tmp, AES_BLOCK_SIZE);
+ out += AES_BLOCK_SIZE;
+ in += AES_BLOCK_SIZE;
+ sz -= AES_BLOCK_SIZE;
+ aes->left = 0;
}
+
+ /* encrypt left over data */
+ if (sz) {
+ wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg);
+ aes->left = AES_BLOCK_SIZE;
+ tmp = (byte*)aes->tmp;
+ #ifdef WOLFSSL_AES_OFB
+ if (mode == AES_OFB_MODE) {
+ XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
+ }
+ #endif
+ #ifdef WOLFSSL_AES_CFB
+ reg = (byte*)aes->reg;
+ #endif
+
+ while (sz--) {
+ *(out) = *(in++) ^ *(tmp++);
+ #ifdef WOLFSSL_AES_CFB
+ if (mode == AES_CFB_MODE) {
+ *(reg++) = *out;
+ }
+ #endif
+ out++;
+ aes->left--;
+ }
+ }
+
return 0;
}
-static int AesCaviumCbcDecrypt(Aes* aes, byte* out, const byte* in,
- word32 length)
+
+#ifdef HAVE_AES_DECRYPT
+/* CFB 128
+ *
+ * aes structure holding key to use for decryption
+ * out buffer to hold result of decryption (must be at least as large as input
+ * buffer)
+ * in buffer to decrypt
+ * sz size of input buffer
+ *
+ * returns 0 on success and negative error values on failure
+ */
+/* Software AES - CFB Decrypt */
+static int wc_AesFeedbackDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
+ byte mode)
{
- word32 requestId;
- wolfssl_word offset = 0;
+ byte* tmp;
+
+ if (aes == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ #ifdef WOLFSSL_AES_CFB
+ /* check if more input needs copied over to aes->reg */
+ if (aes->left && sz && mode == AES_CFB_MODE) {
+ int size = min(aes->left, sz);
+ XMEMCPY((byte*)aes->reg + AES_BLOCK_SIZE - aes->left, in, size);
+ }
+ #endif
+
+ /* consume any unused bytes left in aes->tmp */
+ tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left;
+ while (aes->left && sz) {
+ *(out++) = *(in++) ^ *(tmp++);
+ aes->left--;
+ sz--;
+ }
- while (length > WOLFSSL_MAX_16BIT) {
- word16 slen = (word16)WOLFSSL_MAX_16BIT;
- XMEMCPY(aes->tmp, in + offset + slen - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
- if (CspDecryptAes(CAVIUM_BLOCKING, aes->contextHandle, CAVIUM_NO_UPDATE,
- aes->type, slen, (byte*)in + offset, out + offset,
- (byte*)aes->reg, (byte*)aes->key, &requestId,
- aes->devId) != 0) {
- WOLFSSL_MSG("Bad Cavium Aes Decrypt");
- return -1;
+ while (sz > AES_BLOCK_SIZE) {
+ /* Using aes->tmp here for inline case i.e. in=out */
+ wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg);
+ #ifdef WOLFSSL_AES_OFB
+ if (mode == AES_OFB_MODE) {
+ XMEMCPY((byte*)aes->reg, (byte*)aes->tmp, AES_BLOCK_SIZE);
+ }
+ #endif
+ xorbuf((byte*)aes->tmp, in, AES_BLOCK_SIZE);
+ #ifdef WOLFSSL_AES_CFB
+ if (mode == AES_CFB_MODE) {
+ XMEMCPY(aes->reg, in, AES_BLOCK_SIZE);
}
- length -= WOLFSSL_MAX_16BIT;
- offset += WOLFSSL_MAX_16BIT;
- XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
+ #endif
+ XMEMCPY(out, (byte*)aes->tmp, AES_BLOCK_SIZE);
+ out += AES_BLOCK_SIZE;
+ in += AES_BLOCK_SIZE;
+ sz -= AES_BLOCK_SIZE;
+ aes->left = 0;
}
- if (length) {
- word16 slen = (word16)length;
- XMEMCPY(aes->tmp, in + offset + slen - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
- if (CspDecryptAes(CAVIUM_BLOCKING, aes->contextHandle, CAVIUM_NO_UPDATE,
- aes->type, slen, (byte*)in + offset, out + offset,
- (byte*)aes->reg, (byte*)aes->key, &requestId,
- aes->devId) != 0) {
- WOLFSSL_MSG("Bad Cavium Aes Decrypt");
- return -1;
+
+ /* decrypt left over data */
+ if (sz) {
+ wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg);
+ #ifdef WOLFSSL_AES_CFB
+ if (mode == AES_CFB_MODE) {
+ XMEMCPY(aes->reg, in, sz);
+ }
+ #endif
+ #ifdef WOLFSSL_AES_OFB
+ if (mode == AES_OFB_MODE) {
+ XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
+ }
+ #endif
+
+ aes->left = AES_BLOCK_SIZE;
+ tmp = (byte*)aes->tmp;
+
+ while (sz--) {
+ *(out++) = *(in++) ^ *(tmp++);
+ aes->left--;
}
- XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
}
+
return 0;
}
+#endif /* HAVE_AES_DECRYPT */
+#endif /* WOLFSSL_AES_CFB */
-#endif /* HAVE_CAVIUM */
+#ifdef WOLFSSL_AES_CFB
+/* CFB 128
+ *
+ * aes structure holding key to use for encryption
+ * out buffer to hold result of encryption (must be at least as large as input
+ * buffer)
+ * in buffer to encrypt
+ * sz size of input buffer
+ *
+ * returns 0 on success and negative error values on failure
+ */
+/* Software AES - CFB Encrypt */
+int wc_AesCfbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+{
+ return wc_AesFeedbackEncrypt(aes, out, in, sz, AES_CFB_MODE);
+}
-#endif /* WOLFSSL_TI_CRYPT */
-#endif /* HAVE_FIPS */
+#ifdef HAVE_AES_DECRYPT
+/* CFB 128
+ *
+ * aes structure holding key to use for decryption
+ * out buffer to hold result of decryption (must be at least as large as input
+ * buffer)
+ * in buffer to decrypt
+ * sz size of input buffer
+ *
+ * returns 0 on success and negative error values on failure
+ */
+/* Software AES - CFB Decrypt */
+int wc_AesCfbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+{
+ return wc_AesFeedbackDecrypt(aes, out, in, sz, AES_CFB_MODE);
+}
+#endif /* HAVE_AES_DECRYPT */
+
+
+/* shift the whole AES_BLOCK_SIZE array left by 8 or 1 bits */
+static void shiftLeftArray(byte* ary, byte shift)
+{
+ int i;
+
+ if (shift == WOLFSSL_BIT_SIZE) {
+ /* shifting over by 8 bits */
+ for (i = 0; i < AES_BLOCK_SIZE - 1; i++) {
+ ary[i] = ary[i+1];
+ }
+ ary[i] = 0;
+ }
+ else {
+ byte carry = 0;
+
+ /* shifting over by 7 or less bits */
+ for (i = 0; i < AES_BLOCK_SIZE - 1; i++) {
+ carry = ary[i+1] & (0XFF << (WOLFSSL_BIT_SIZE - shift));
+ carry >>= (WOLFSSL_BIT_SIZE - shift);
+ ary[i] = (ary[i] << shift) + carry;
+ }
+ ary[i] = ary[i] << shift;
+ }
+}
+
+
+/* returns 0 on success and negative values on failure */
+static int wc_AesFeedbackCFB8(Aes* aes, byte* out, const byte* in,
+ word32 sz, byte dir)
+{
+ byte *pt;
+
+ if (aes == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ if (sz == 0) {
+ return 0;
+ }
+
+ while (sz > 0) {
+ wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg);
+ if (dir == AES_DECRYPTION) {
+ pt = (byte*)aes->reg;
+
+ /* LSB + CAT */
+ shiftLeftArray(pt, WOLFSSL_BIT_SIZE);
+ pt[AES_BLOCK_SIZE - 1] = in[0];
+ }
+
+ /* MSB + XOR */
+ out[0] = aes->tmp[0] ^ in[0];
+ if (dir == AES_ENCRYPTION) {
+ pt = (byte*)aes->reg;
+
+ /* LSB + CAT */
+ shiftLeftArray(pt, WOLFSSL_BIT_SIZE);
+ pt[AES_BLOCK_SIZE - 1] = out[0];
+ }
+
+ out += 1;
+ in += 1;
+ sz -= 1;
+ }
+
+ return 0;
+}
+
+
+/* returns 0 on success and negative values on failure */
+static int wc_AesFeedbackCFB1(Aes* aes, byte* out, const byte* in,
+ word32 sz, byte dir)
+{
+ byte tmp;
+ byte cur = 0; /* hold current work in order to handle inline in=out */
+ byte* pt;
+ int bit = 7;
+
+ if (aes == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ if (sz == 0) {
+ return 0;
+ }
+
+ while (sz > 0) {
+ wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg);
+ if (dir == AES_DECRYPTION) {
+ pt = (byte*)aes->reg;
+
+ /* LSB + CAT */
+ tmp = (0X01 << bit) & in[0];
+ tmp = tmp >> bit;
+ tmp &= 0x01;
+ shiftLeftArray((byte*)aes->reg, 1);
+ pt[AES_BLOCK_SIZE - 1] |= tmp;
+ }
-#endif /* NO_AES */
+ /* MSB + XOR */
+ tmp = (0X01 << bit) & in[0];
+ pt = (byte*)aes->tmp;
+ tmp = (pt[0] >> 7) ^ (tmp >> bit);
+ tmp &= 0x01;
+ cur |= (tmp << bit);
+
+
+ if (dir == AES_ENCRYPTION) {
+ pt = (byte*)aes->reg;
+
+ /* LSB + CAT */
+ shiftLeftArray((byte*)aes->reg, 1);
+ pt[AES_BLOCK_SIZE - 1] |= tmp;
+ }
+
+ bit--;
+ if (bit < 0) {
+ out[0] = cur;
+ out += 1;
+ in += 1;
+ sz -= 1;
+ bit = 7;
+ cur = 0;
+ }
+ else {
+ sz -= 1;
+ }
+ }
+
+ if (bit > 0 && bit < 7) {
+ out[0] = cur;
+ }
+
+ return 0;
+}
+
+
+/* CFB 1
+ *
+ * aes structure holding key to use for encryption
+ * out buffer to hold result of encryption (must be at least as large as input
+ * buffer)
+ * in buffer to encrypt (packed to left, i.e. 101 is 0x90)
+ * sz size of input buffer in bits (0x1 would be size of 1 and 0xFF size of 8)
+ *
+ * returns 0 on success and negative values on failure
+ */
+int wc_AesCfb1Encrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+{
+ return wc_AesFeedbackCFB1(aes, out, in, sz, AES_ENCRYPTION);
+}
+
+
+/* CFB 8
+ *
+ * aes structure holding key to use for encryption
+ * out buffer to hold result of encryption (must be at least as large as input
+ * buffer)
+ * in buffer to encrypt
+ * sz size of input buffer
+ *
+ * returns 0 on success and negative values on failure
+ */
+int wc_AesCfb8Encrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+{
+ return wc_AesFeedbackCFB8(aes, out, in, sz, AES_ENCRYPTION);
+}
+#ifdef HAVE_AES_DECRYPT
+
+/* CFB 1
+ *
+ * aes structure holding key to use for encryption
+ * out buffer to hold result of encryption (must be at least as large as input
+ * buffer)
+ * in buffer to encrypt
+ * sz size of input buffer in bits (0x1 would be size of 1 and 0xFF size of 8)
+ *
+ * returns 0 on success and negative values on failure
+ */
+int wc_AesCfb1Decrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+{
+ return wc_AesFeedbackCFB1(aes, out, in, sz, AES_DECRYPTION);
+}
+
+
+/* CFB 8
+ *
+ * aes structure holding key to use for encryption
+ * out buffer to hold result of encryption (must be at least as large as input
+ * buffer)
+ * in buffer to encrypt
+ * sz size of input buffer
+ *
+ * returns 0 on success and negative values on failure
+ */
+int wc_AesCfb8Decrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+{
+ return wc_AesFeedbackCFB8(aes, out, in, sz, AES_DECRYPTION);
+}
+#endif /* HAVE_AES_DECRYPT */
+#endif /* WOLFSSL_AES_CFB */
+
+#ifdef WOLFSSL_AES_OFB
+/* OFB
+ *
+ * aes structure holding key to use for encryption
+ * out buffer to hold result of encryption (must be at least as large as input
+ * buffer)
+ * in buffer to encrypt
+ * sz size of input buffer
+ *
+ * returns 0 on success and negative error values on failure
+ */
+/* Software AES - CFB Encrypt */
+int wc_AesOfbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+{
+ return wc_AesFeedbackEncrypt(aes, out, in, sz, AES_OFB_MODE);
+}
+
+
+#ifdef HAVE_AES_DECRYPT
+/* OFB
+ *
+ * aes structure holding key to use for decryption
+ * out buffer to hold result of decryption (must be at least as large as input
+ * buffer)
+ * in buffer to decrypt
+ * sz size of input buffer
+ *
+ * returns 0 on success and negative error values on failure
+ */
+/* Software AES - OFB Decrypt */
+int wc_AesOfbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+{
+ return wc_AesFeedbackDecrypt(aes, out, in, sz, AES_OFB_MODE);
+}
+#endif /* HAVE_AES_DECRYPT */
+#endif /* WOLFSSL_AES_OFB */
+
+
+#ifdef HAVE_AES_KEYWRAP
+
+/* Initialize key wrap counter with value */
+static WC_INLINE void InitKeyWrapCounter(byte* inOutCtr, word32 value)
+{
+ int i;
+ word32 bytes;
+
+ bytes = sizeof(word32);
+ for (i = 0; i < (int)sizeof(word32); i++) {
+ inOutCtr[i+sizeof(word32)] = (value >> ((bytes - 1) * 8)) & 0xFF;
+ bytes--;
+ }
+}
+
+/* Increment key wrap counter */
+static WC_INLINE void IncrementKeyWrapCounter(byte* inOutCtr)
+{
+ int i;
+
+ /* in network byte order so start at end and work back */
+ for (i = KEYWRAP_BLOCK_SIZE - 1; i >= 0; i--) {
+ if (++inOutCtr[i]) /* we're done unless we overflow */
+ return;
+ }
+}
+
+/* Decrement key wrap counter */
+static WC_INLINE void DecrementKeyWrapCounter(byte* inOutCtr)
+{
+ int i;
+
+ for (i = KEYWRAP_BLOCK_SIZE - 1; i >= 0; i--) {
+ if (--inOutCtr[i] != 0xFF) /* we're done unless we underflow */
+ return;
+ }
+}
+
+/* perform AES key wrap (RFC3394), return out sz on success, negative on err */
+int wc_AesKeyWrap(const byte* key, word32 keySz, const byte* in, word32 inSz,
+ byte* out, word32 outSz, const byte* iv)
+{
+ Aes aes;
+ byte* r;
+ word32 i;
+ int ret, j;
+
+ byte t[KEYWRAP_BLOCK_SIZE];
+ byte tmp[AES_BLOCK_SIZE];
+
+ /* n must be at least 2, output size is n + 8 bytes */
+ if (key == NULL || in == NULL || inSz < 2 ||
+ out == NULL || outSz < (inSz + KEYWRAP_BLOCK_SIZE))
+ return BAD_FUNC_ARG;
+
+ /* input must be multiple of 64-bits */
+ if (inSz % KEYWRAP_BLOCK_SIZE != 0)
+ return BAD_FUNC_ARG;
+
+ /* user IV is optional */
+ if (iv == NULL) {
+ XMEMSET(tmp, 0xA6, KEYWRAP_BLOCK_SIZE);
+ } else {
+ XMEMCPY(tmp, iv, KEYWRAP_BLOCK_SIZE);
+ }
+
+ r = out + 8;
+ XMEMCPY(r, in, inSz);
+ XMEMSET(t, 0, sizeof(t));
+
+ ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+ if (ret != 0)
+ return ret;
+
+ ret = wc_AesSetKey(&aes, key, keySz, NULL, AES_ENCRYPTION);
+ if (ret != 0)
+ return ret;
+
+ for (j = 0; j <= 5; j++) {
+ for (i = 1; i <= inSz / KEYWRAP_BLOCK_SIZE; i++) {
+
+ /* load R[i] */
+ XMEMCPY(tmp + KEYWRAP_BLOCK_SIZE, r, KEYWRAP_BLOCK_SIZE);
+
+ wc_AesEncryptDirect(&aes, tmp, tmp);
+
+ /* calculate new A */
+ IncrementKeyWrapCounter(t);
+ xorbuf(tmp, t, KEYWRAP_BLOCK_SIZE);
+
+ /* save R[i] */
+ XMEMCPY(r, tmp + KEYWRAP_BLOCK_SIZE, KEYWRAP_BLOCK_SIZE);
+ r += KEYWRAP_BLOCK_SIZE;
+ }
+ r = out + KEYWRAP_BLOCK_SIZE;
+ }
+
+ /* C[0] = A */
+ XMEMCPY(out, tmp, KEYWRAP_BLOCK_SIZE);
+
+ wc_AesFree(&aes);
+
+ return inSz + KEYWRAP_BLOCK_SIZE;
+}
+
+int wc_AesKeyUnWrap(const byte* key, word32 keySz, const byte* in, word32 inSz,
+ byte* out, word32 outSz, const byte* iv)
+{
+ Aes aes;
+ byte* r;
+ word32 i, n;
+ int ret, j;
+
+ byte t[KEYWRAP_BLOCK_SIZE];
+ byte tmp[AES_BLOCK_SIZE];
+
+ const byte* expIv;
+ const byte defaultIV[] = {
+ 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6
+ };
+
+ (void)iv;
+
+ if (key == NULL || in == NULL || inSz < 3 ||
+ out == NULL || outSz < (inSz - KEYWRAP_BLOCK_SIZE))
+ return BAD_FUNC_ARG;
+
+ /* input must be multiple of 64-bits */
+ if (inSz % KEYWRAP_BLOCK_SIZE != 0)
+ return BAD_FUNC_ARG;
+
+ /* user IV optional */
+ if (iv != NULL) {
+ expIv = iv;
+ } else {
+ expIv = defaultIV;
+ }
+
+ /* A = C[0], R[i] = C[i] */
+ XMEMCPY(tmp, in, KEYWRAP_BLOCK_SIZE);
+ XMEMCPY(out, in + KEYWRAP_BLOCK_SIZE, inSz - KEYWRAP_BLOCK_SIZE);
+ XMEMSET(t, 0, sizeof(t));
+
+ ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+ if (ret != 0)
+ return ret;
+
+ ret = wc_AesSetKey(&aes, key, keySz, NULL, AES_DECRYPTION);
+ if (ret != 0)
+ return ret;
+
+ /* initialize counter to 6n */
+ n = (inSz - 1) / KEYWRAP_BLOCK_SIZE;
+ InitKeyWrapCounter(t, 6 * n);
+
+ for (j = 5; j >= 0; j--) {
+ for (i = n; i >= 1; i--) {
+
+ /* calculate A */
+ xorbuf(tmp, t, KEYWRAP_BLOCK_SIZE);
+ DecrementKeyWrapCounter(t);
+
+ /* load R[i], starting at end of R */
+ r = out + ((i - 1) * KEYWRAP_BLOCK_SIZE);
+ XMEMCPY(tmp + KEYWRAP_BLOCK_SIZE, r, KEYWRAP_BLOCK_SIZE);
+ wc_AesDecryptDirect(&aes, tmp, tmp);
+
+ /* save R[i] */
+ XMEMCPY(r, tmp + KEYWRAP_BLOCK_SIZE, KEYWRAP_BLOCK_SIZE);
+ }
+ }
+
+ wc_AesFree(&aes);
+
+ /* verify IV */
+ if (XMEMCMP(tmp, expIv, KEYWRAP_BLOCK_SIZE) != 0)
+ return BAD_KEYWRAP_IV_E;
+
+ return inSz - KEYWRAP_BLOCK_SIZE;
+}
+
+#endif /* HAVE_AES_KEYWRAP */
+
+#ifdef WOLFSSL_AES_XTS
+
+/* Galios Field to use */
+#define GF_XTS 0x87
+
+/* This is to help with setting keys to correct encrypt or decrypt type.
+ *
+ * tweak AES key for tweak in XTS
+ * aes AES key for encrypt/decrypt process
+ * key buffer holding aes key | tweak key
+ * len length of key buffer in bytes. Should be twice that of key size. i.e.
+ * 32 for a 16 byte key.
+ * dir direction, either AES_ENCRYPTION or AES_DECRYPTION
+ * heap heap hint to use for memory. Can be NULL
+ * devId id to use with async crypto. Can be 0
+ *
+ * Note: is up to user to call wc_AesFree on tweak and aes key when done.
+ *
+ * return 0 on success
+ */
+int wc_AesXtsSetKey(XtsAes* aes, const byte* key, word32 len, int dir,
+ void* heap, int devId)
+{
+ word32 keySz;
+ int ret = 0;
+
+ if (aes == NULL || key == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ if ((ret = wc_AesInit(&aes->tweak, heap, devId)) != 0) {
+ return ret;
+ }
+ if ((ret = wc_AesInit(&aes->aes, heap, devId)) != 0) {
+ return ret;
+ }
+
+ keySz = len/2;
+ if (keySz != 16 && keySz != 32) {
+ WOLFSSL_MSG("Unsupported key size");
+ return WC_KEY_SIZE_E;
+ }
+
+ if ((ret = wc_AesSetKey(&aes->aes, key, keySz, NULL, dir)) == 0) {
+ ret = wc_AesSetKey(&aes->tweak, key + keySz, keySz, NULL,
+ AES_ENCRYPTION);
+ if (ret != 0) {
+ wc_AesFree(&aes->aes);
+ }
+ }
+
+ return ret;
+}
+
+
+/* This is used to free up resources used by Aes structs
+ *
+ * aes AES keys to free
+ *
+ * return 0 on success
+ */
+int wc_AesXtsFree(XtsAes* aes)
+{
+ if (aes != NULL) {
+ wc_AesFree(&aes->aes);
+ wc_AesFree(&aes->tweak);
+ }
+
+ return 0;
+}
+
+
+/* Same process as wc_AesXtsEncrypt but uses a word64 type as the tweak value
+ * instead of a byte array. This just converts the word64 to a byte array and
+ * calls wc_AesXtsEncrypt.
+ *
+ * aes AES keys to use for block encrypt/decrypt
+ * out output buffer to hold cipher text
+ * in input plain text buffer to encrypt
+ * sz size of both out and in buffers
+ * sector value to use for tweak
+ *
+ * returns 0 on success
+ */
+int wc_AesXtsEncryptSector(XtsAes* aes, byte* out, const byte* in,
+ word32 sz, word64 sector)
+{
+ byte* pt;
+ byte i[AES_BLOCK_SIZE];
+
+ XMEMSET(i, 0, AES_BLOCK_SIZE);
+#ifdef BIG_ENDIAN_ORDER
+ sector = ByteReverseWord64(sector);
+#endif
+ pt = (byte*)&sector;
+ XMEMCPY(i, pt, sizeof(word64));
+
+ return wc_AesXtsEncrypt(aes, out, in, sz, (const byte*)i, AES_BLOCK_SIZE);
+}
+
+
+/* Same process as wc_AesXtsDecrypt but uses a word64 type as the tweak value
+ * instead of a byte array. This just converts the word64 to a byte array.
+ *
+ * aes AES keys to use for block encrypt/decrypt
+ * out output buffer to hold plain text
+ * in input cipher text buffer to encrypt
+ * sz size of both out and in buffers
+ * sector value to use for tweak
+ *
+ * returns 0 on success
+ */
+int wc_AesXtsDecryptSector(XtsAes* aes, byte* out, const byte* in, word32 sz,
+ word64 sector)
+{
+ byte* pt;
+ byte i[AES_BLOCK_SIZE];
+
+ XMEMSET(i, 0, AES_BLOCK_SIZE);
+#ifdef BIG_ENDIAN_ORDER
+ sector = ByteReverseWord64(sector);
+#endif
+ pt = (byte*)&sector;
+ XMEMCPY(i, pt, sizeof(word64));
+
+ return wc_AesXtsDecrypt(aes, out, in, sz, (const byte*)i, AES_BLOCK_SIZE);
+}
+
+#ifdef HAVE_AES_ECB
+/* helper function for encrypting / decrypting full buffer at once */
+static int _AesXtsHelper(Aes* aes, byte* out, const byte* in, word32 sz, int dir)
+{
+ word32 outSz = sz;
+ word32 totalSz = (sz / AES_BLOCK_SIZE) * AES_BLOCK_SIZE; /* total bytes */
+ byte* pt = out;
+
+ outSz -= AES_BLOCK_SIZE;
+
+ while (outSz > 0) {
+ word32 j;
+ byte carry = 0;
+
+ /* multiply by shift left and propagate carry */
+ for (j = 0; j < AES_BLOCK_SIZE && outSz > 0; j++, outSz--) {
+ byte tmpC;
+
+ tmpC = (pt[j] >> 7) & 0x01;
+ pt[j+AES_BLOCK_SIZE] = ((pt[j] << 1) + carry) & 0xFF;
+ carry = tmpC;
+ }
+ if (carry) {
+ pt[AES_BLOCK_SIZE] ^= GF_XTS;
+ }
+
+ pt += AES_BLOCK_SIZE;
+ }
+
+ xorbuf(out, in, totalSz);
+ if (dir == AES_ENCRYPTION) {
+ return wc_AesEcbEncrypt(aes, out, out, totalSz);
+ }
+ else {
+ return wc_AesEcbDecrypt(aes, out, out, totalSz);
+ }
+}
+#endif /* HAVE_AES_ECB */
+
+
+/* AES with XTS mode. (XTS) XEX encryption with Tweak and cipher text Stealing.
+ *
+ * xaes AES keys to use for block encrypt/decrypt
+ * out output buffer to hold cipher text
+ * in input plain text buffer to encrypt
+ * sz size of both out and in buffers
+ * i value to use for tweak
+ * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input
+ * adds a sanity check on how the user calls the function.
+ *
+ * returns 0 on success
+ */
+/* Software AES - XTS Encrypt */
+int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
+ const byte* i, word32 iSz)
+{
+ int ret = 0;
+ word32 blocks = (sz / AES_BLOCK_SIZE);
+ Aes *aes, *tweak;
+
+ if (xaes == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ aes = &xaes->aes;
+ tweak = &xaes->tweak;
+
+ if (iSz < AES_BLOCK_SIZE) {
+ return BAD_FUNC_ARG;
+ }
+
+ if (blocks > 0) {
+ byte tmp[AES_BLOCK_SIZE];
+
+ XMEMSET(tmp, 0, AES_BLOCK_SIZE); /* set to 0's in case of improper AES
+ * key setup passed to encrypt direct*/
+
+ wc_AesEncryptDirect(tweak, tmp, i);
+
+ #ifdef HAVE_AES_ECB
+ /* encrypt all of buffer at once when possible */
+ if (in != out) { /* can not handle inline */
+ XMEMCPY(out, tmp, AES_BLOCK_SIZE);
+ if ((ret = _AesXtsHelper(aes, out, in, sz, AES_ENCRYPTION)) != 0) {
+ return ret;
+ }
+ }
+ #endif
+
+ while (blocks > 0) {
+ word32 j;
+ byte carry = 0;
+ byte buf[AES_BLOCK_SIZE];
+
+ #ifdef HAVE_AES_ECB
+ if (in == out) { /* check for if inline */
+ #endif
+ XMEMCPY(buf, in, AES_BLOCK_SIZE);
+ xorbuf(buf, tmp, AES_BLOCK_SIZE);
+ wc_AesEncryptDirect(aes, out, buf);
+ #ifdef HAVE_AES_ECB
+ }
+ #endif
+ xorbuf(out, tmp, AES_BLOCK_SIZE);
+
+ /* multiply by shift left and propagate carry */
+ for (j = 0; j < AES_BLOCK_SIZE; j++) {
+ byte tmpC;
+
+ tmpC = (tmp[j] >> 7) & 0x01;
+ tmp[j] = ((tmp[j] << 1) + carry) & 0xFF;
+ carry = tmpC;
+ }
+ if (carry) {
+ tmp[0] ^= GF_XTS;
+ }
+
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ sz -= AES_BLOCK_SIZE;
+ blocks--;
+ }
+
+ /* stealing operation of XTS to handle left overs */
+ if (sz > 0) {
+ byte buf[AES_BLOCK_SIZE];
+
+ XMEMCPY(buf, out - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ if (sz >= AES_BLOCK_SIZE) { /* extra sanity check before copy */
+ return BUFFER_E;
+ }
+ XMEMCPY(out, buf, sz);
+ XMEMCPY(buf, in, sz);
+
+ xorbuf(buf, tmp, AES_BLOCK_SIZE);
+ wc_AesEncryptDirect(aes, out - AES_BLOCK_SIZE, buf);
+ xorbuf(out - AES_BLOCK_SIZE, tmp, AES_BLOCK_SIZE);
+ }
+ }
+ else {
+ WOLFSSL_MSG("Plain text input too small for encryption");
+ return BAD_FUNC_ARG;
+ }
+
+ return ret;
+}
+
+
+/* Same process as encryption but Aes key is AES_DECRYPTION type.
+ *
+ * xaes AES keys to use for block encrypt/decrypt
+ * out output buffer to hold plain text
+ * in input cipher text buffer to decrypt
+ * sz size of both out and in buffers
+ * i value to use for tweak
+ * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input
+ * adds a sanity check on how the user calls the function.
+ *
+ * returns 0 on success
+ */
+/* Software AES - XTS Decrypt */
+int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
+ const byte* i, word32 iSz)
+{
+ int ret = 0;
+ word32 blocks = (sz / AES_BLOCK_SIZE);
+ Aes *aes, *tweak;
+
+ if (xaes == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ aes = &xaes->aes;
+ tweak = &xaes->tweak;
+
+ if (iSz < AES_BLOCK_SIZE) {
+ return BAD_FUNC_ARG;
+ }
+
+ if (blocks > 0) {
+ word32 j;
+ byte carry = 0;
+ byte tmp[AES_BLOCK_SIZE];
+ byte stl = (sz % AES_BLOCK_SIZE);
+
+ XMEMSET(tmp, 0, AES_BLOCK_SIZE); /* set to 0's in case of improper AES
+ * key setup passed to decrypt direct*/
+
+ wc_AesEncryptDirect(tweak, tmp, i);
+
+ /* if Stealing then break out of loop one block early to handle special
+ * case */
+ if (stl > 0) {
+ blocks--;
+ }
+
+ #ifdef HAVE_AES_ECB
+ /* decrypt all of buffer at once when possible */
+ if (in != out) { /* can not handle inline */
+ XMEMCPY(out, tmp, AES_BLOCK_SIZE);
+ if ((ret = _AesXtsHelper(aes, out, in, sz, AES_DECRYPTION)) != 0) {
+ return ret;
+ }
+ }
+ #endif
+
+ while (blocks > 0) {
+ byte buf[AES_BLOCK_SIZE];
+
+ #ifdef HAVE_AES_ECB
+ if (in == out) { /* check for if inline */
+ #endif
+ XMEMCPY(buf, in, AES_BLOCK_SIZE);
+ xorbuf(buf, tmp, AES_BLOCK_SIZE);
+ wc_AesDecryptDirect(aes, out, buf);
+ #ifdef HAVE_AES_ECB
+ }
+ #endif
+ xorbuf(out, tmp, AES_BLOCK_SIZE);
+
+ /* multiply by shift left and propagate carry */
+ for (j = 0; j < AES_BLOCK_SIZE; j++) {
+ byte tmpC;
+
+ tmpC = (tmp[j] >> 7) & 0x01;
+ tmp[j] = ((tmp[j] << 1) + carry) & 0xFF;
+ carry = tmpC;
+ }
+ if (carry) {
+ tmp[0] ^= GF_XTS;
+ }
+ carry = 0;
+
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ sz -= AES_BLOCK_SIZE;
+ blocks--;
+ }
+
+ /* stealing operation of XTS to handle left overs */
+ if (sz > 0) {
+ byte buf[AES_BLOCK_SIZE];
+ byte tmp2[AES_BLOCK_SIZE];
+
+ /* multiply by shift left and propagate carry */
+ for (j = 0; j < AES_BLOCK_SIZE; j++) {
+ byte tmpC;
+
+ tmpC = (tmp[j] >> 7) & 0x01;
+ tmp2[j] = ((tmp[j] << 1) + carry) & 0xFF;
+ carry = tmpC;
+ }
+ if (carry) {
+ tmp2[0] ^= GF_XTS;
+ }
+
+ XMEMCPY(buf, in, AES_BLOCK_SIZE);
+ xorbuf(buf, tmp2, AES_BLOCK_SIZE);
+ wc_AesDecryptDirect(aes, out, buf);
+ xorbuf(out, tmp2, AES_BLOCK_SIZE);
+
+ /* tmp2 holds partial | last */
+ XMEMCPY(tmp2, out, AES_BLOCK_SIZE);
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ sz -= AES_BLOCK_SIZE;
+
+ /* Make buffer with end of cipher text | last */
+ XMEMCPY(buf, tmp2, AES_BLOCK_SIZE);
+ if (sz >= AES_BLOCK_SIZE) { /* extra sanity check before copy */
+ return BUFFER_E;
+ }
+ XMEMCPY(buf, in, sz);
+ XMEMCPY(out, tmp2, sz);
+
+ xorbuf(buf, tmp, AES_BLOCK_SIZE);
+ wc_AesDecryptDirect(aes, tmp2, buf);
+ xorbuf(tmp2, tmp, AES_BLOCK_SIZE);
+ XMEMCPY(out - AES_BLOCK_SIZE, tmp2, AES_BLOCK_SIZE);
+ }
+ }
+ else {
+ WOLFSSL_MSG("Plain text input too small for encryption");
+ return BAD_FUNC_ARG;
+ }
+
+ return ret;
+}
+
+#endif /* WOLFSSL_AES_XTS */
+
+#endif /* HAVE_FIPS */
+#endif /* !NO_AES */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes_asm.S b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes_asm.S
new file mode 100644
index 000000000..ae1c801d6
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes_asm.S
@@ -0,0 +1,1338 @@
+/* aes_asm.S
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+
+
+/* This file is in at&t asm syntax, see .asm for intel syntax */
+
+/* See Intel® Advanced Encryption Standard (AES) Instructions Set White Paper
+ * by Intel Mobility Group, Israel Development Center, Israel Shay Gueron
+ */
+
+
+/*
+AES_CBC_encrypt (const unsigned char *in,
+ unsigned char *out,
+ unsigned char ivec[16],
+ unsigned long length,
+ const unsigned char *KS,
+ int nr)
+*/
+#ifndef __APPLE__
+.globl AES_CBC_encrypt
+AES_CBC_encrypt:
+#else
+.globl _AES_CBC_encrypt
+_AES_CBC_encrypt:
+#endif
+# parameter 1: %rdi
+# parameter 2: %rsi
+# parameter 3: %rdx
+# parameter 4: %rcx
+# parameter 5: %r8
+# parameter 6: %r9d
+movq %rcx, %r10
+shrq $4, %rcx
+shlq $60, %r10
+je NO_PARTS
+addq $1, %rcx
+NO_PARTS:
+subq $16, %rsi
+movdqa (%rdx), %xmm1
+LOOP:
+pxor (%rdi), %xmm1
+pxor (%r8), %xmm1
+addq $16,%rsi
+addq $16,%rdi
+cmpl $12, %r9d
+aesenc 16(%r8),%xmm1
+aesenc 32(%r8),%xmm1
+aesenc 48(%r8),%xmm1
+aesenc 64(%r8),%xmm1
+aesenc 80(%r8),%xmm1
+aesenc 96(%r8),%xmm1
+aesenc 112(%r8),%xmm1
+aesenc 128(%r8),%xmm1
+aesenc 144(%r8),%xmm1
+movdqa 160(%r8),%xmm2
+jb LAST
+cmpl $14, %r9d
+
+aesenc 160(%r8),%xmm1
+aesenc 176(%r8),%xmm1
+movdqa 192(%r8),%xmm2
+jb LAST
+aesenc 192(%r8),%xmm1
+aesenc 208(%r8),%xmm1
+movdqa 224(%r8),%xmm2
+LAST:
+decq %rcx
+aesenclast %xmm2,%xmm1
+movdqu %xmm1,(%rsi)
+jne LOOP
+ret
+
+
+#if defined(WOLFSSL_AESNI_BY4)
+
+/*
+AES_CBC_decrypt_by4 (const unsigned char *in,
+ unsigned char *out,
+ unsigned char ivec[16],
+ unsigned long length,
+ const unsigned char *KS,
+ int nr)
+*/
+#ifndef __APPLE__
+.globl AES_CBC_decrypt_by4
+AES_CBC_decrypt_by4:
+#else
+.globl _AES_CBC_decrypt_by4
+_AES_CBC_decrypt_by4:
+#endif
+# parameter 1: %rdi
+# parameter 2: %rsi
+# parameter 3: %rdx
+# parameter 4: %rcx
+# parameter 5: %r8
+# parameter 6: %r9d
+
+ movq %rcx, %r10
+ shrq $4, %rcx
+ shlq $60, %r10
+ je DNO_PARTS_4
+ addq $1, %rcx
+DNO_PARTS_4:
+ movq %rcx, %r10
+ shlq $62, %r10
+ shrq $62, %r10
+ shrq $2, %rcx
+ movdqu (%rdx),%xmm5
+ je DREMAINDER_4
+ subq $64, %rsi
+DLOOP_4:
+ movdqu (%rdi), %xmm1
+ movdqu 16(%rdi), %xmm2
+ movdqu 32(%rdi), %xmm3
+ movdqu 48(%rdi), %xmm4
+ movdqa %xmm1, %xmm6
+ movdqa %xmm2, %xmm7
+ movdqa %xmm3, %xmm8
+ movdqa %xmm4, %xmm15
+ movdqa (%r8), %xmm9
+ movdqa 16(%r8), %xmm10
+ movdqa 32(%r8), %xmm11
+ movdqa 48(%r8), %xmm12
+ pxor %xmm9, %xmm1
+ pxor %xmm9, %xmm2
+ pxor %xmm9, %xmm3
+ pxor %xmm9, %xmm4
+ aesdec %xmm10, %xmm1
+ aesdec %xmm10, %xmm2
+ aesdec %xmm10, %xmm3
+ aesdec %xmm10, %xmm4
+ aesdec %xmm11, %xmm1
+ aesdec %xmm11, %xmm2
+ aesdec %xmm11, %xmm3
+ aesdec %xmm11, %xmm4
+ aesdec %xmm12, %xmm1
+ aesdec %xmm12, %xmm2
+ aesdec %xmm12, %xmm3
+ aesdec %xmm12, %xmm4
+ movdqa 64(%r8), %xmm9
+ movdqa 80(%r8), %xmm10
+ movdqa 96(%r8), %xmm11
+ movdqa 112(%r8), %xmm12
+ aesdec %xmm9, %xmm1
+ aesdec %xmm9, %xmm2
+ aesdec %xmm9, %xmm3
+ aesdec %xmm9, %xmm4
+ aesdec %xmm10, %xmm1
+ aesdec %xmm10, %xmm2
+ aesdec %xmm10, %xmm3
+ aesdec %xmm10, %xmm4
+ aesdec %xmm11, %xmm1
+ aesdec %xmm11, %xmm2
+ aesdec %xmm11, %xmm3
+ aesdec %xmm11, %xmm4
+ aesdec %xmm12, %xmm1
+ aesdec %xmm12, %xmm2
+ aesdec %xmm12, %xmm3
+ aesdec %xmm12, %xmm4
+ movdqa 128(%r8), %xmm9
+ movdqa 144(%r8), %xmm10
+ movdqa 160(%r8), %xmm11
+ cmpl $12, %r9d
+ aesdec %xmm9, %xmm1
+ aesdec %xmm9, %xmm2
+ aesdec %xmm9, %xmm3
+ aesdec %xmm9, %xmm4
+ aesdec %xmm10, %xmm1
+ aesdec %xmm10, %xmm2
+ aesdec %xmm10, %xmm3
+ aesdec %xmm10, %xmm4
+ jb DLAST_4
+ movdqa 160(%r8), %xmm9
+ movdqa 176(%r8), %xmm10
+ movdqa 192(%r8), %xmm11
+ cmpl $14, %r9d
+ aesdec %xmm9, %xmm1
+ aesdec %xmm9, %xmm2
+ aesdec %xmm9, %xmm3
+ aesdec %xmm9, %xmm4
+ aesdec %xmm10, %xmm1
+ aesdec %xmm10, %xmm2
+ aesdec %xmm10, %xmm3
+ aesdec %xmm10, %xmm4
+ jb DLAST_4
+ movdqa 192(%r8), %xmm9
+ movdqa 208(%r8), %xmm10
+ movdqa 224(%r8), %xmm11
+ aesdec %xmm9, %xmm1
+ aesdec %xmm9, %xmm2
+ aesdec %xmm9, %xmm3
+ aesdec %xmm9, %xmm4
+ aesdec %xmm10, %xmm1
+ aesdec %xmm10, %xmm2
+ aesdec %xmm10, %xmm3
+ aesdec %xmm10, %xmm4
+DLAST_4:
+ addq $64, %rdi
+ addq $64, %rsi
+ decq %rcx
+ aesdeclast %xmm11, %xmm1
+ aesdeclast %xmm11, %xmm2
+ aesdeclast %xmm11, %xmm3
+ aesdeclast %xmm11, %xmm4
+ pxor %xmm5, %xmm1
+ pxor %xmm6, %xmm2
+ pxor %xmm7, %xmm3
+ pxor %xmm8, %xmm4
+ movdqu %xmm1, (%rsi)
+ movdqu %xmm2, 16(%rsi)
+ movdqu %xmm3, 32(%rsi)
+ movdqu %xmm4, 48(%rsi)
+ movdqa %xmm15,%xmm5
+ jne DLOOP_4
+ addq $64, %rsi
+DREMAINDER_4:
+ cmpq $0, %r10
+ je DEND_4
+DLOOP_4_2:
+ movdqu (%rdi), %xmm1
+ movdqa %xmm1, %xmm15
+ addq $16, %rdi
+ pxor (%r8), %xmm1
+ movdqu 160(%r8), %xmm2
+ cmpl $12, %r9d
+ aesdec 16(%r8), %xmm1
+ aesdec 32(%r8), %xmm1
+ aesdec 48(%r8), %xmm1
+ aesdec 64(%r8), %xmm1
+ aesdec 80(%r8), %xmm1
+ aesdec 96(%r8), %xmm1
+ aesdec 112(%r8), %xmm1
+ aesdec 128(%r8), %xmm1
+ aesdec 144(%r8), %xmm1
+ jb DLAST_4_2
+ movdqu 192(%r8), %xmm2
+ cmpl $14, %r9d
+ aesdec 160(%r8), %xmm1
+ aesdec 176(%r8), %xmm1
+ jb DLAST_4_2
+ movdqu 224(%r8), %xmm2
+ aesdec 192(%r8), %xmm1
+ aesdec 208(%r8), %xmm1
+DLAST_4_2:
+ aesdeclast %xmm2, %xmm1
+ pxor %xmm5, %xmm1
+ movdqa %xmm15, %xmm5
+ movdqu %xmm1, (%rsi)
+ addq $16, %rsi
+ decq %r10
+ jne DLOOP_4_2
+DEND_4:
+ ret
+
+#elif defined(WOLFSSL_AESNI_BY6)
+
+/*
+AES_CBC_decrypt_by6 (const unsigned char *in,
+ unsigned char *out,
+ unsigned char ivec[16],
+ unsigned long length,
+ const unsigned char *KS,
+ int nr)
+*/
+#ifndef __APPLE__
+.globl AES_CBC_decrypt_by6
+AES_CBC_decrypt_by6:
+#else
+.globl _AES_CBC_decrypt_by6
+_AES_CBC_decrypt_by6:
+#endif
+# parameter 1: %rdi - in
+# parameter 2: %rsi - out
+# parameter 3: %rdx - ivec
+# parameter 4: %rcx - length
+# parameter 5: %r8 - KS
+# parameter 6: %r9d - nr
+
+ movq %rcx, %r10
+ shrq $4, %rcx
+ shlq $60, %r10
+ je DNO_PARTS_6
+ addq $1, %rcx
+DNO_PARTS_6:
+ movq %rax, %r12
+ movq %rdx, %r13
+ movq %rbx, %r14
+ movq $0, %rdx
+ movq %rcx, %rax
+ movq $6, %rbx
+ div %rbx
+ movq %rax, %rcx
+ movq %rdx, %r10
+ movq %r12, %rax
+ movq %r13, %rdx
+ movq %r14, %rbx
+ cmpq $0, %rcx
+ movdqu (%rdx), %xmm7
+ je DREMAINDER_6
+ subq $96, %rsi
+DLOOP_6:
+ movdqu (%rdi), %xmm1
+ movdqu 16(%rdi), %xmm2
+ movdqu 32(%rdi), %xmm3
+ movdqu 48(%rdi), %xmm4
+ movdqu 64(%rdi), %xmm5
+ movdqu 80(%rdi), %xmm6
+ movdqa (%r8), %xmm8
+ movdqa 16(%r8), %xmm9
+ movdqa 32(%r8), %xmm10
+ movdqa 48(%r8), %xmm11
+ pxor %xmm8, %xmm1
+ pxor %xmm8, %xmm2
+ pxor %xmm8, %xmm3
+ pxor %xmm8, %xmm4
+ pxor %xmm8, %xmm5
+ pxor %xmm8, %xmm6
+ aesdec %xmm9, %xmm1
+ aesdec %xmm9, %xmm2
+ aesdec %xmm9, %xmm3
+ aesdec %xmm9, %xmm4
+ aesdec %xmm9, %xmm5
+ aesdec %xmm9, %xmm6
+ aesdec %xmm10, %xmm1
+ aesdec %xmm10, %xmm2
+ aesdec %xmm10, %xmm3
+ aesdec %xmm10, %xmm4
+ aesdec %xmm10, %xmm5
+ aesdec %xmm10, %xmm6
+ aesdec %xmm11, %xmm1
+ aesdec %xmm11, %xmm2
+ aesdec %xmm11, %xmm3
+ aesdec %xmm11, %xmm4
+ aesdec %xmm11, %xmm5
+ aesdec %xmm11, %xmm6
+ movdqa 64(%r8), %xmm8
+ movdqa 80(%r8), %xmm9
+ movdqa 96(%r8), %xmm10
+ movdqa 112(%r8), %xmm11
+ aesdec %xmm8, %xmm1
+ aesdec %xmm8, %xmm2
+ aesdec %xmm8, %xmm3
+ aesdec %xmm8, %xmm4
+ aesdec %xmm8, %xmm5
+ aesdec %xmm8, %xmm6
+ aesdec %xmm9, %xmm1
+ aesdec %xmm9, %xmm2
+ aesdec %xmm9, %xmm3
+ aesdec %xmm9, %xmm4
+ aesdec %xmm9, %xmm5
+ aesdec %xmm9, %xmm6
+ aesdec %xmm10, %xmm1
+ aesdec %xmm10, %xmm2
+ aesdec %xmm10, %xmm3
+ aesdec %xmm10, %xmm4
+ aesdec %xmm10, %xmm5
+ aesdec %xmm10, %xmm6
+ aesdec %xmm11, %xmm1
+ aesdec %xmm11, %xmm2
+ aesdec %xmm11, %xmm3
+ aesdec %xmm11, %xmm4
+ aesdec %xmm11, %xmm5
+ aesdec %xmm11, %xmm6
+ movdqa 128(%r8), %xmm8
+ movdqa 144(%r8), %xmm9
+ movdqa 160(%r8), %xmm10
+ cmpl $12, %r9d
+ aesdec %xmm8, %xmm1
+ aesdec %xmm8, %xmm2
+ aesdec %xmm8, %xmm3
+ aesdec %xmm8, %xmm4
+ aesdec %xmm8, %xmm5
+ aesdec %xmm8, %xmm6
+ aesdec %xmm9, %xmm1
+ aesdec %xmm9, %xmm2
+ aesdec %xmm9, %xmm3
+ aesdec %xmm9, %xmm4
+ aesdec %xmm9, %xmm5
+ aesdec %xmm9, %xmm6
+ jb DLAST_6
+ movdqa 160(%r8), %xmm8
+ movdqa 176(%r8), %xmm9
+ movdqa 192(%r8), %xmm10
+ cmpl $14, %r9d
+ aesdec %xmm8, %xmm1
+ aesdec %xmm8, %xmm2
+ aesdec %xmm8, %xmm3
+ aesdec %xmm8, %xmm4
+ aesdec %xmm8, %xmm5
+ aesdec %xmm8, %xmm6
+ aesdec %xmm9, %xmm1
+ aesdec %xmm9, %xmm2
+ aesdec %xmm9, %xmm3
+ aesdec %xmm9, %xmm4
+ aesdec %xmm9, %xmm5
+ aesdec %xmm9, %xmm6
+ jb DLAST_6
+ movdqa 192(%r8), %xmm8
+ movdqa 208(%r8), %xmm9
+ movdqa 224(%r8), %xmm10
+ aesdec %xmm8, %xmm1
+ aesdec %xmm8, %xmm2
+ aesdec %xmm8, %xmm3
+ aesdec %xmm8, %xmm4
+ aesdec %xmm8, %xmm5
+ aesdec %xmm8, %xmm6
+ aesdec %xmm9, %xmm1
+ aesdec %xmm9, %xmm2
+ aesdec %xmm9, %xmm3
+ aesdec %xmm9, %xmm4
+ aesdec %xmm9, %xmm5
+ aesdec %xmm9, %xmm6
+DLAST_6:
+ addq $96, %rsi
+ aesdeclast %xmm10, %xmm1
+ aesdeclast %xmm10, %xmm2
+ aesdeclast %xmm10, %xmm3
+ aesdeclast %xmm10, %xmm4
+ aesdeclast %xmm10, %xmm5
+ aesdeclast %xmm10, %xmm6
+ movdqu (%rdi), %xmm8
+ movdqu 16(%rdi), %xmm9
+ movdqu 32(%rdi), %xmm10
+ movdqu 48(%rdi), %xmm11
+ movdqu 64(%rdi), %xmm12
+ movdqu 80(%rdi), %xmm13
+ pxor %xmm7, %xmm1
+ pxor %xmm8, %xmm2
+ pxor %xmm9, %xmm3
+ pxor %xmm10, %xmm4
+ pxor %xmm11, %xmm5
+ pxor %xmm12, %xmm6
+ movdqu %xmm13, %xmm7
+ movdqu %xmm1, (%rsi)
+ movdqu %xmm2, 16(%rsi)
+ movdqu %xmm3, 32(%rsi)
+ movdqu %xmm4, 48(%rsi)
+ movdqu %xmm5, 64(%rsi)
+ movdqu %xmm6, 80(%rsi)
+ addq $96, %rdi
+ decq %rcx
+ jne DLOOP_6
+ addq $96, %rsi
+DREMAINDER_6:
+ cmpq $0, %r10
+ je DEND_6
+DLOOP_6_2:
+ movdqu (%rdi), %xmm1
+ movdqa %xmm1, %xmm10
+ addq $16, %rdi
+ pxor (%r8), %xmm1
+ movdqu 160(%r8), %xmm2
+ cmpl $12, %r9d
+ aesdec 16(%r8), %xmm1
+ aesdec 32(%r8), %xmm1
+ aesdec 48(%r8), %xmm1
+ aesdec 64(%r8), %xmm1
+ aesdec 80(%r8), %xmm1
+ aesdec 96(%r8), %xmm1
+ aesdec 112(%r8), %xmm1
+ aesdec 128(%r8), %xmm1
+ aesdec 144(%r8), %xmm1
+ jb DLAST_6_2
+ movdqu 192(%r8), %xmm2
+ cmpl $14, %r9d
+ aesdec 160(%r8), %xmm1
+ aesdec 176(%r8), %xmm1
+ jb DLAST_6_2
+ movdqu 224(%r8), %xmm2
+ aesdec 192(%r8), %xmm1
+ aesdec 208(%r8), %xmm1
+DLAST_6_2:
+ aesdeclast %xmm2, %xmm1
+ pxor %xmm7, %xmm1
+ movdqa %xmm10, %xmm7
+ movdqu %xmm1, (%rsi)
+ addq $16, %rsi
+ decq %r10
+ jne DLOOP_6_2
+DEND_6:
+ ret
+
+#else /* WOLFSSL_AESNI_BYx */
+
+/*
+AES_CBC_decrypt_by8 (const unsigned char *in,
+ unsigned char *out,
+ unsigned char ivec[16],
+ unsigned long length,
+ const unsigned char *KS,
+ int nr)
+*/
+#ifndef __APPLE__
+.globl AES_CBC_decrypt_by8
+AES_CBC_decrypt_by8:
+#else
+.globl _AES_CBC_decrypt_by8
+_AES_CBC_decrypt_by8:
+#endif
+# parameter 1: %rdi - in
+# parameter 2: %rsi - out
+# parameter 3: %rdx - ivec
+# parameter 4: %rcx - length
+# parameter 5: %r8 - KS
+# parameter 6: %r9d - nr
+
+ movq %rcx, %r10
+ shrq $4, %rcx
+ shlq $60, %r10
+ je DNO_PARTS_8
+ addq $1, %rcx
+DNO_PARTS_8:
+ movq %rcx, %r10
+ shlq $61, %r10
+ shrq $61, %r10
+ shrq $3, %rcx
+ movdqu (%rdx), %xmm9
+ je DREMAINDER_8
+ subq $128, %rsi
+DLOOP_8:
+ movdqu (%rdi), %xmm1
+ movdqu 16(%rdi), %xmm2
+ movdqu 32(%rdi), %xmm3
+ movdqu 48(%rdi), %xmm4
+ movdqu 64(%rdi), %xmm5
+ movdqu 80(%rdi), %xmm6
+ movdqu 96(%rdi), %xmm7
+ movdqu 112(%rdi), %xmm8
+ movdqa (%r8), %xmm10
+ movdqa 16(%r8), %xmm11
+ movdqa 32(%r8), %xmm12
+ movdqa 48(%r8), %xmm13
+ pxor %xmm10, %xmm1
+ pxor %xmm10, %xmm2
+ pxor %xmm10, %xmm3
+ pxor %xmm10, %xmm4
+ pxor %xmm10, %xmm5
+ pxor %xmm10, %xmm6
+ pxor %xmm10, %xmm7
+ pxor %xmm10, %xmm8
+ aesdec %xmm11, %xmm1
+ aesdec %xmm11, %xmm2
+ aesdec %xmm11, %xmm3
+ aesdec %xmm11, %xmm4
+ aesdec %xmm11, %xmm5
+ aesdec %xmm11, %xmm6
+ aesdec %xmm11, %xmm7
+ aesdec %xmm11, %xmm8
+ aesdec %xmm12, %xmm1
+ aesdec %xmm12, %xmm2
+ aesdec %xmm12, %xmm3
+ aesdec %xmm12, %xmm4
+ aesdec %xmm12, %xmm5
+ aesdec %xmm12, %xmm6
+ aesdec %xmm12, %xmm7
+ aesdec %xmm12, %xmm8
+ aesdec %xmm13, %xmm1
+ aesdec %xmm13, %xmm2
+ aesdec %xmm13, %xmm3
+ aesdec %xmm13, %xmm4
+ aesdec %xmm13, %xmm5
+ aesdec %xmm13, %xmm6
+ aesdec %xmm13, %xmm7
+ aesdec %xmm13, %xmm8
+ movdqa 64(%r8), %xmm10
+ movdqa 80(%r8), %xmm11
+ movdqa 96(%r8), %xmm12
+ movdqa 112(%r8), %xmm13
+ aesdec %xmm10, %xmm1
+ aesdec %xmm10, %xmm2
+ aesdec %xmm10, %xmm3
+ aesdec %xmm10, %xmm4
+ aesdec %xmm10, %xmm5
+ aesdec %xmm10, %xmm6
+ aesdec %xmm10, %xmm7
+ aesdec %xmm10, %xmm8
+ aesdec %xmm11, %xmm1
+ aesdec %xmm11, %xmm2
+ aesdec %xmm11, %xmm3
+ aesdec %xmm11, %xmm4
+ aesdec %xmm11, %xmm5
+ aesdec %xmm11, %xmm6
+ aesdec %xmm11, %xmm7
+ aesdec %xmm11, %xmm8
+ aesdec %xmm12, %xmm1
+ aesdec %xmm12, %xmm2
+ aesdec %xmm12, %xmm3
+ aesdec %xmm12, %xmm4
+ aesdec %xmm12, %xmm5
+ aesdec %xmm12, %xmm6
+ aesdec %xmm12, %xmm7
+ aesdec %xmm12, %xmm8
+ aesdec %xmm13, %xmm1
+ aesdec %xmm13, %xmm2
+ aesdec %xmm13, %xmm3
+ aesdec %xmm13, %xmm4
+ aesdec %xmm13, %xmm5
+ aesdec %xmm13, %xmm6
+ aesdec %xmm13, %xmm7
+ aesdec %xmm13, %xmm8
+ movdqa 128(%r8), %xmm10
+ movdqa 144(%r8), %xmm11
+ movdqa 160(%r8), %xmm12
+ cmpl $12, %r9d
+ aesdec %xmm10, %xmm1
+ aesdec %xmm10, %xmm2
+ aesdec %xmm10, %xmm3
+ aesdec %xmm10, %xmm4
+ aesdec %xmm10, %xmm5
+ aesdec %xmm10, %xmm6
+ aesdec %xmm10, %xmm7
+ aesdec %xmm10, %xmm8
+ aesdec %xmm11, %xmm1
+ aesdec %xmm11, %xmm2
+ aesdec %xmm11, %xmm3
+ aesdec %xmm11, %xmm4
+ aesdec %xmm11, %xmm5
+ aesdec %xmm11, %xmm6
+ aesdec %xmm11, %xmm7
+ aesdec %xmm11, %xmm8
+ jb DLAST_8
+ movdqa 160(%r8), %xmm10
+ movdqa 176(%r8), %xmm11
+ movdqa 192(%r8), %xmm12
+ cmpl $14, %r9d
+ aesdec %xmm10, %xmm1
+ aesdec %xmm10, %xmm2
+ aesdec %xmm10, %xmm3
+ aesdec %xmm10, %xmm4
+ aesdec %xmm10, %xmm5
+ aesdec %xmm10, %xmm6
+ aesdec %xmm10, %xmm7
+ aesdec %xmm10, %xmm8
+ aesdec %xmm11, %xmm1
+ aesdec %xmm11, %xmm2
+ aesdec %xmm11, %xmm3
+ aesdec %xmm11, %xmm4
+ aesdec %xmm11, %xmm5
+ aesdec %xmm11, %xmm6
+ aesdec %xmm11, %xmm7
+ aesdec %xmm11, %xmm8
+ jb DLAST_8
+ movdqa 192(%r8), %xmm10
+ movdqa 208(%r8), %xmm11
+ movdqa 224(%r8), %xmm12
+ aesdec %xmm10, %xmm1
+ aesdec %xmm10, %xmm2
+ aesdec %xmm10, %xmm3
+ aesdec %xmm10, %xmm4
+ aesdec %xmm10, %xmm5
+ aesdec %xmm10, %xmm6
+ aesdec %xmm10, %xmm7
+ aesdec %xmm10, %xmm8
+ aesdec %xmm11, %xmm1
+ aesdec %xmm11, %xmm2
+ aesdec %xmm11, %xmm3
+ aesdec %xmm11, %xmm4
+ aesdec %xmm11, %xmm5
+ aesdec %xmm11, %xmm6
+ aesdec %xmm11, %xmm7
+ aesdec %xmm11, %xmm8
+DLAST_8:
+ addq $128, %rsi
+ aesdeclast %xmm12, %xmm1
+ aesdeclast %xmm12, %xmm2
+ aesdeclast %xmm12, %xmm3
+ aesdeclast %xmm12, %xmm4
+ aesdeclast %xmm12, %xmm5
+ aesdeclast %xmm12, %xmm6
+ aesdeclast %xmm12, %xmm7
+ aesdeclast %xmm12, %xmm8
+ movdqu (%rdi), %xmm10
+ movdqu 16(%rdi), %xmm11
+ movdqu 32(%rdi), %xmm12
+ movdqu 48(%rdi), %xmm13
+ pxor %xmm9, %xmm1
+ pxor %xmm10, %xmm2
+ pxor %xmm11, %xmm3
+ pxor %xmm12, %xmm4
+ pxor %xmm13, %xmm5
+ movdqu 64(%rdi), %xmm10
+ movdqu 80(%rdi), %xmm11
+ movdqu 96(%rdi), %xmm12
+ movdqu 112(%rdi), %xmm9
+ pxor %xmm10, %xmm6
+ pxor %xmm11, %xmm7
+ pxor %xmm12, %xmm8
+ movdqu %xmm1, (%rsi)
+ movdqu %xmm2, 16(%rsi)
+ movdqu %xmm3, 32(%rsi)
+ movdqu %xmm4, 48(%rsi)
+ movdqu %xmm5, 64(%rsi)
+ movdqu %xmm6, 80(%rsi)
+ movdqu %xmm7, 96(%rsi)
+ movdqu %xmm8, 112(%rsi)
+ addq $128, %rdi
+ decq %rcx
+ jne DLOOP_8
+ addq $128, %rsi
+DREMAINDER_8:
+ cmpq $0, %r10
+ je DEND_8
+DLOOP_8_2:
+ movdqu (%rdi), %xmm1
+ movdqa %xmm1, %xmm10
+ addq $16, %rdi
+ pxor (%r8), %xmm1
+ movdqu 160(%r8), %xmm2
+ cmpl $12, %r9d
+ aesdec 16(%r8), %xmm1
+ aesdec 32(%r8), %xmm1
+ aesdec 48(%r8), %xmm1
+ aesdec 64(%r8), %xmm1
+ aesdec 80(%r8), %xmm1
+ aesdec 96(%r8), %xmm1
+ aesdec 112(%r8), %xmm1
+ aesdec 128(%r8), %xmm1
+ aesdec 144(%r8), %xmm1
+ jb DLAST_8_2
+ movdqu 192(%r8), %xmm2
+ cmpl $14, %r9d
+ aesdec 160(%r8), %xmm1
+ aesdec 176(%r8), %xmm1
+ jb DLAST_8_2
+ movdqu 224(%r8), %xmm2
+ aesdec 192(%r8), %xmm1
+ aesdec 208(%r8), %xmm1
+DLAST_8_2:
+ aesdeclast %xmm2, %xmm1
+ pxor %xmm9, %xmm1
+ movdqa %xmm10, %xmm9
+ movdqu %xmm1, (%rsi)
+ addq $16, %rsi
+ decq %r10
+ jne DLOOP_8_2
+DEND_8:
+ ret
+
+#endif /* WOLFSSL_AESNI_BYx */
+
+
+/*
+AES_ECB_encrypt (const unsigned char *in,
+ unsigned char *out,
+ unsigned long length,
+ const unsigned char *KS,
+ int nr)
+*/
+#ifndef __APPLE__
+.globl AES_ECB_encrypt
+AES_ECB_encrypt:
+#else
+.globl _AES_ECB_encrypt
+_AES_ECB_encrypt:
+#endif
+# parameter 1: %rdi
+# parameter 2: %rsi
+# parameter 3: %rdx
+# parameter 4: %rcx
+# parameter 5: %r8d
+ movq %rdx, %r10
+ shrq $4, %rdx
+ shlq $60, %r10
+ je EECB_NO_PARTS_4
+ addq $1, %rdx
+EECB_NO_PARTS_4:
+ movq %rdx, %r10
+ shlq $62, %r10
+ shrq $62, %r10
+ shrq $2, %rdx
+ je EECB_REMAINDER_4
+ subq $64, %rsi
+EECB_LOOP_4:
+ movdqu (%rdi), %xmm1
+ movdqu 16(%rdi), %xmm2
+ movdqu 32(%rdi), %xmm3
+ movdqu 48(%rdi), %xmm4
+ movdqa (%rcx), %xmm9
+ movdqa 16(%rcx), %xmm10
+ movdqa 32(%rcx), %xmm11
+ movdqa 48(%rcx), %xmm12
+ pxor %xmm9, %xmm1
+ pxor %xmm9, %xmm2
+ pxor %xmm9, %xmm3
+ pxor %xmm9, %xmm4
+ aesenc %xmm10, %xmm1
+ aesenc %xmm10, %xmm2
+ aesenc %xmm10, %xmm3
+ aesenc %xmm10, %xmm4
+ aesenc %xmm11, %xmm1
+ aesenc %xmm11, %xmm2
+ aesenc %xmm11, %xmm3
+ aesenc %xmm11, %xmm4
+ aesenc %xmm12, %xmm1
+ aesenc %xmm12, %xmm2
+ aesenc %xmm12, %xmm3
+ aesenc %xmm12, %xmm4
+ movdqa 64(%rcx), %xmm9
+ movdqa 80(%rcx), %xmm10
+ movdqa 96(%rcx), %xmm11
+ movdqa 112(%rcx), %xmm12
+ aesenc %xmm9, %xmm1
+ aesenc %xmm9, %xmm2
+ aesenc %xmm9, %xmm3
+ aesenc %xmm9, %xmm4
+ aesenc %xmm10, %xmm1
+ aesenc %xmm10, %xmm2
+ aesenc %xmm10, %xmm3
+ aesenc %xmm10, %xmm4
+ aesenc %xmm11, %xmm1
+ aesenc %xmm11, %xmm2
+ aesenc %xmm11, %xmm3
+ aesenc %xmm11, %xmm4
+ aesenc %xmm12, %xmm1
+ aesenc %xmm12, %xmm2
+ aesenc %xmm12, %xmm3
+ aesenc %xmm12, %xmm4
+ movdqa 128(%rcx), %xmm9
+ movdqa 144(%rcx), %xmm10
+ movdqa 160(%rcx), %xmm11
+ cmpl $12, %r8d
+ aesenc %xmm9, %xmm1
+ aesenc %xmm9, %xmm2
+ aesenc %xmm9, %xmm3
+ aesenc %xmm9, %xmm4
+ aesenc %xmm10, %xmm1
+ aesenc %xmm10, %xmm2
+ aesenc %xmm10, %xmm3
+ aesenc %xmm10, %xmm4
+ jb EECB_LAST_4
+ movdqa 160(%rcx), %xmm9
+ movdqa 176(%rcx), %xmm10
+ movdqa 192(%rcx), %xmm11
+ cmpl $14, %r8d
+ aesenc %xmm9, %xmm1
+ aesenc %xmm9, %xmm2
+ aesenc %xmm9, %xmm3
+ aesenc %xmm9, %xmm4
+ aesenc %xmm10, %xmm1
+ aesenc %xmm10, %xmm2
+ aesenc %xmm10, %xmm3
+ aesenc %xmm10, %xmm4
+ jb EECB_LAST_4
+ movdqa 192(%rcx), %xmm9
+ movdqa 208(%rcx), %xmm10
+ movdqa 224(%rcx), %xmm11
+ aesenc %xmm9, %xmm1
+ aesenc %xmm9, %xmm2
+ aesenc %xmm9, %xmm3
+ aesenc %xmm9, %xmm4
+ aesenc %xmm10, %xmm1
+ aesenc %xmm10, %xmm2
+ aesenc %xmm10, %xmm3
+ aesenc %xmm10, %xmm4
+EECB_LAST_4:
+ addq $64, %rdi
+ addq $64, %rsi
+ decq %rdx
+ aesenclast %xmm11, %xmm1
+ aesenclast %xmm11, %xmm2
+ aesenclast %xmm11, %xmm3
+ aesenclast %xmm11, %xmm4
+ movdqu %xmm1, (%rsi)
+ movdqu %xmm2, 16(%rsi)
+ movdqu %xmm3, 32(%rsi)
+ movdqu %xmm4, 48(%rsi)
+ jne EECB_LOOP_4
+ addq $64, %rsi
+EECB_REMAINDER_4:
+ cmpq $0, %r10
+ je EECB_END_4
+EECB_LOOP_4_2:
+ movdqu (%rdi), %xmm1
+ addq $16, %rdi
+ pxor (%rcx), %xmm1
+ movdqu 160(%rcx), %xmm2
+ aesenc 16(%rcx), %xmm1
+ aesenc 32(%rcx), %xmm1
+ aesenc 48(%rcx), %xmm1
+ aesenc 64(%rcx), %xmm1
+ aesenc 80(%rcx), %xmm1
+ aesenc 96(%rcx), %xmm1
+ aesenc 112(%rcx), %xmm1
+ aesenc 128(%rcx), %xmm1
+ aesenc 144(%rcx), %xmm1
+ cmpl $12, %r8d
+ jb EECB_LAST_4_2
+ movdqu 192(%rcx), %xmm2
+ aesenc 160(%rcx), %xmm1
+ aesenc 176(%rcx), %xmm1
+ cmpl $14, %r8d
+ jb EECB_LAST_4_2
+ movdqu 224(%rcx), %xmm2
+ aesenc 192(%rcx), %xmm1
+ aesenc 208(%rcx), %xmm1
+EECB_LAST_4_2:
+ aesenclast %xmm2, %xmm1
+ movdqu %xmm1, (%rsi)
+ addq $16, %rsi
+ decq %r10
+ jne EECB_LOOP_4_2
+EECB_END_4:
+ ret
+
+
+/*
+AES_ECB_decrypt (const unsigned char *in,
+ unsigned char *out,
+ unsigned long length,
+ const unsigned char *KS,
+ int nr)
+*/
+#ifndef __APPLE__
+.globl AES_ECB_decrypt
+AES_ECB_decrypt:
+#else
+.globl _AES_ECB_decrypt
+_AES_ECB_decrypt:
+#endif
+# parameter 1: %rdi
+# parameter 2: %rsi
+# parameter 3: %rdx
+# parameter 4: %rcx
+# parameter 5: %r8d
+
+ movq %rdx, %r10
+ shrq $4, %rdx
+ shlq $60, %r10
+ je DECB_NO_PARTS_4
+ addq $1, %rdx
+DECB_NO_PARTS_4:
+ movq %rdx, %r10
+ shlq $62, %r10
+ shrq $62, %r10
+ shrq $2, %rdx
+ je DECB_REMAINDER_4
+ subq $64, %rsi
+DECB_LOOP_4:
+ movdqu (%rdi), %xmm1
+ movdqu 16(%rdi), %xmm2
+ movdqu 32(%rdi), %xmm3
+ movdqu 48(%rdi), %xmm4
+ movdqa (%rcx), %xmm9
+ movdqa 16(%rcx), %xmm10
+ movdqa 32(%rcx), %xmm11
+ movdqa 48(%rcx), %xmm12
+ pxor %xmm9, %xmm1
+ pxor %xmm9, %xmm2
+ pxor %xmm9, %xmm3
+ pxor %xmm9, %xmm4
+ aesdec %xmm10, %xmm1
+ aesdec %xmm10, %xmm2
+ aesdec %xmm10, %xmm3
+ aesdec %xmm10, %xmm4
+ aesdec %xmm11, %xmm1
+ aesdec %xmm11, %xmm2
+ aesdec %xmm11, %xmm3
+ aesdec %xmm11, %xmm4
+ aesdec %xmm12, %xmm1
+ aesdec %xmm12, %xmm2
+ aesdec %xmm12, %xmm3
+ aesdec %xmm12, %xmm4
+ movdqa 64(%rcx), %xmm9
+ movdqa 80(%rcx), %xmm10
+ movdqa 96(%rcx), %xmm11
+ movdqa 112(%rcx), %xmm12
+ aesdec %xmm9, %xmm1
+ aesdec %xmm9, %xmm2
+ aesdec %xmm9, %xmm3
+ aesdec %xmm9, %xmm4
+ aesdec %xmm10, %xmm1
+ aesdec %xmm10, %xmm2
+ aesdec %xmm10, %xmm3
+ aesdec %xmm10, %xmm4
+ aesdec %xmm11, %xmm1
+ aesdec %xmm11, %xmm2
+ aesdec %xmm11, %xmm3
+ aesdec %xmm11, %xmm4
+ aesdec %xmm12, %xmm1
+ aesdec %xmm12, %xmm2
+ aesdec %xmm12, %xmm3
+ aesdec %xmm12, %xmm4
+ movdqa 128(%rcx), %xmm9
+ movdqa 144(%rcx), %xmm10
+ movdqa 160(%rcx), %xmm11
+ cmpl $12, %r8d
+ aesdec %xmm9, %xmm1
+ aesdec %xmm9, %xmm2
+ aesdec %xmm9, %xmm3
+ aesdec %xmm9, %xmm4
+ aesdec %xmm10, %xmm1
+ aesdec %xmm10, %xmm2
+ aesdec %xmm10, %xmm3
+ aesdec %xmm10, %xmm4
+ jb DECB_LAST_4
+ movdqa 160(%rcx), %xmm9
+ movdqa 176(%rcx), %xmm10
+ movdqa 192(%rcx), %xmm11
+ cmpl $14, %r8d
+ aesdec %xmm9, %xmm1
+ aesdec %xmm9, %xmm2
+ aesdec %xmm9, %xmm3
+ aesdec %xmm9, %xmm4
+ aesdec %xmm10, %xmm1
+ aesdec %xmm10, %xmm2
+ aesdec %xmm10, %xmm3
+ aesdec %xmm10, %xmm4
+ jb DECB_LAST_4
+ movdqa 192(%rcx), %xmm9
+ movdqa 208(%rcx), %xmm10
+ movdqa 224(%rcx), %xmm11
+ aesdec %xmm9, %xmm1
+ aesdec %xmm9, %xmm2
+ aesdec %xmm9, %xmm3
+ aesdec %xmm9, %xmm4
+ aesdec %xmm10, %xmm1
+ aesdec %xmm10, %xmm2
+ aesdec %xmm10, %xmm3
+ aesdec %xmm10, %xmm4
+DECB_LAST_4:
+ addq $64, %rdi
+ addq $64, %rsi
+ decq %rdx
+ aesdeclast %xmm11, %xmm1
+ aesdeclast %xmm11, %xmm2
+ aesdeclast %xmm11, %xmm3
+ aesdeclast %xmm11, %xmm4
+ movdqu %xmm1, (%rsi)
+ movdqu %xmm2, 16(%rsi)
+ movdqu %xmm3, 32(%rsi)
+ movdqu %xmm4, 48(%rsi)
+ jne DECB_LOOP_4
+ addq $64, %rsi
+DECB_REMAINDER_4:
+ cmpq $0, %r10
+ je DECB_END_4
+DECB_LOOP_4_2:
+ movdqu (%rdi), %xmm1
+ addq $16, %rdi
+ pxor (%rcx), %xmm1
+ movdqu 160(%rcx), %xmm2
+ cmpl $12, %r8d
+ aesdec 16(%rcx), %xmm1
+ aesdec 32(%rcx), %xmm1
+ aesdec 48(%rcx), %xmm1
+ aesdec 64(%rcx), %xmm1
+ aesdec 80(%rcx), %xmm1
+ aesdec 96(%rcx), %xmm1
+ aesdec 112(%rcx), %xmm1
+ aesdec 128(%rcx), %xmm1
+ aesdec 144(%rcx), %xmm1
+ jb DECB_LAST_4_2
+ cmpl $14, %r8d
+ movdqu 192(%rcx), %xmm2
+ aesdec 160(%rcx), %xmm1
+ aesdec 176(%rcx), %xmm1
+ jb DECB_LAST_4_2
+ movdqu 224(%rcx), %xmm2
+ aesdec 192(%rcx), %xmm1
+ aesdec 208(%rcx), %xmm1
+DECB_LAST_4_2:
+ aesdeclast %xmm2, %xmm1
+ movdqu %xmm1, (%rsi)
+ addq $16, %rsi
+ decq %r10
+ jne DECB_LOOP_4_2
+DECB_END_4:
+ ret
+
+
+
+
+/*
+void AES_128_Key_Expansion(const unsigned char* userkey,
+ unsigned char* key_schedule);
+*/
+.align 16,0x90
+#ifndef __APPLE__
+.globl AES_128_Key_Expansion
+AES_128_Key_Expansion:
+#else
+.globl _AES_128_Key_Expansion
+_AES_128_Key_Expansion:
+#endif
+# parameter 1: %rdi
+# parameter 2: %rsi
+movl $10, 240(%rsi)
+
+movdqu (%rdi), %xmm1
+movdqa %xmm1, (%rsi)
+
+
+ASSISTS:
+aeskeygenassist $1, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 16(%rsi)
+aeskeygenassist $2, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 32(%rsi)
+aeskeygenassist $4, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 48(%rsi)
+aeskeygenassist $8, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 64(%rsi)
+aeskeygenassist $16, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 80(%rsi)
+aeskeygenassist $32, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 96(%rsi)
+aeskeygenassist $64, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 112(%rsi)
+aeskeygenassist $0x80, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 128(%rsi)
+aeskeygenassist $0x1b, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 144(%rsi)
+aeskeygenassist $0x36, %xmm1, %xmm2
+call PREPARE_ROUNDKEY_128
+movdqa %xmm1, 160(%rsi)
+ret
+
+PREPARE_ROUNDKEY_128:
+pshufd $255, %xmm2, %xmm2
+movdqa %xmm1, %xmm3
+pslldq $4, %xmm3
+pxor %xmm3, %xmm1
+pslldq $4, %xmm3
+pxor %xmm3, %xmm1
+pslldq $4, %xmm3
+pxor %xmm3, %xmm1
+pxor %xmm2, %xmm1
+ret
+
+
+/*
+void AES_192_Key_Expansion (const unsigned char *userkey,
+ unsigned char *key)
+*/
+#ifndef __APPLE__
+.globl AES_192_Key_Expansion
+AES_192_Key_Expansion:
+#else
+.globl _AES_192_Key_Expansion
+_AES_192_Key_Expansion:
+#endif
+# parameter 1: %rdi
+# parameter 2: %rsi
+
+movdqu (%rdi), %xmm1
+movq 16(%rdi), %xmm3
+movdqa %xmm1, (%rsi)
+movdqa %xmm3, %xmm5
+
+aeskeygenassist $0x1, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+shufpd $0, %xmm1, %xmm5
+movdqa %xmm5, 16(%rsi)
+movdqa %xmm1, %xmm6
+shufpd $1, %xmm3, %xmm6
+movdqa %xmm6, 32(%rsi)
+
+aeskeygenassist $0x2, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+movdqa %xmm1, 48(%rsi)
+movdqa %xmm3, %xmm5
+
+aeskeygenassist $0x4, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+shufpd $0, %xmm1, %xmm5
+movdqa %xmm5, 64(%rsi)
+movdqa %xmm1, %xmm6
+shufpd $1, %xmm3, %xmm6
+movdqa %xmm6, 80(%rsi)
+
+aeskeygenassist $0x8, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+movdqa %xmm1, 96(%rsi)
+movdqa %xmm3, %xmm5
+
+aeskeygenassist $0x10, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+shufpd $0, %xmm1, %xmm5
+movdqa %xmm5, 112(%rsi)
+movdqa %xmm1, %xmm6
+shufpd $1, %xmm3, %xmm6
+movdqa %xmm6, 128(%rsi)
+
+aeskeygenassist $0x20, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+movdqa %xmm1, 144(%rsi)
+movdqa %xmm3, %xmm5
+
+aeskeygenassist $0x40, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+shufpd $0, %xmm1, %xmm5
+movdqa %xmm5, 160(%rsi)
+movdqa %xmm1, %xmm6
+shufpd $1, %xmm3, %xmm6
+movdqa %xmm6, 176(%rsi)
+
+aeskeygenassist $0x80, %xmm3, %xmm2
+call PREPARE_ROUNDKEY_192
+movdqa %xmm1, 192(%rsi)
+movdqa %xmm3, 208(%rsi)
+ret
+
+PREPARE_ROUNDKEY_192:
+pshufd $0x55, %xmm2, %xmm2
+movdqu %xmm1, %xmm4
+pslldq $4, %xmm4
+pxor %xmm4, %xmm1
+
+pslldq $4, %xmm4
+pxor %xmm4, %xmm1
+pslldq $4, %xmm4
+pxor %xmm4, %xmm1
+pxor %xmm2, %xmm1
+pshufd $0xff, %xmm1, %xmm2
+movdqu %xmm3, %xmm4
+pslldq $4, %xmm4
+pxor %xmm4, %xmm3
+pxor %xmm2, %xmm3
+ret
+
+
+/*
+void AES_256_Key_Expansion (const unsigned char *userkey,
+ unsigned char *key)
+*/
+#ifndef __APPLE__
+.globl AES_256_Key_Expansion
+AES_256_Key_Expansion:
+#else
+.globl _AES_256_Key_Expansion
+_AES_256_Key_Expansion:
+#endif
+# parameter 1: %rdi
+# parameter 2: %rsi
+
+movdqu (%rdi), %xmm1
+movdqu 16(%rdi), %xmm3
+movdqa %xmm1, (%rsi)
+movdqa %xmm3, 16(%rsi)
+
+aeskeygenassist $0x1, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 32(%rsi)
+aeskeygenassist $0x0, %xmm1, %xmm2
+call MAKE_RK256_b
+movdqa %xmm3, 48(%rsi)
+aeskeygenassist $0x2, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 64(%rsi)
+aeskeygenassist $0x0, %xmm1, %xmm2
+call MAKE_RK256_b
+movdqa %xmm3, 80(%rsi)
+aeskeygenassist $0x4, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 96(%rsi)
+aeskeygenassist $0x0, %xmm1, %xmm2
+call MAKE_RK256_b
+movdqa %xmm3, 112(%rsi)
+aeskeygenassist $0x8, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 128(%rsi)
+aeskeygenassist $0x0, %xmm1, %xmm2
+call MAKE_RK256_b
+movdqa %xmm3, 144(%rsi)
+aeskeygenassist $0x10, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 160(%rsi)
+aeskeygenassist $0x0, %xmm1, %xmm2
+call MAKE_RK256_b
+movdqa %xmm3, 176(%rsi)
+aeskeygenassist $0x20, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 192(%rsi)
+
+aeskeygenassist $0x0, %xmm1, %xmm2
+call MAKE_RK256_b
+movdqa %xmm3, 208(%rsi)
+aeskeygenassist $0x40, %xmm3, %xmm2
+call MAKE_RK256_a
+movdqa %xmm1, 224(%rsi)
+
+ret
+
+MAKE_RK256_a:
+pshufd $0xff, %xmm2, %xmm2
+movdqa %xmm1, %xmm4
+pslldq $4, %xmm4
+pxor %xmm4, %xmm1
+pslldq $4, %xmm4
+pxor %xmm4, %xmm1
+pslldq $4, %xmm4
+pxor %xmm4, %xmm1
+pxor %xmm2, %xmm1
+ret
+
+MAKE_RK256_b:
+pshufd $0xaa, %xmm2, %xmm2
+movdqa %xmm3, %xmm4
+pslldq $4, %xmm4
+pxor %xmm4, %xmm3
+pslldq $4, %xmm4
+pxor %xmm4, %xmm3
+pslldq $4, %xmm4
+pxor %xmm4, %xmm3
+pxor %xmm2, %xmm3
+ret
+
+#if defined(__linux__) && defined(__ELF__)
+ .section .note.GNU-stack,"",%progbits
+#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes_asm.asm b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes_asm.asm
index 1e3d2d99e..b3cc94d9e 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes_asm.asm
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes_asm.asm
@@ -1,31 +1,49 @@
-; /*aes_asm . asm
-; *
-; *Copyright[C]2006 -2014 wolfSSL Inc .
-; *
-; *This file is part of wolfssl. (formerly known as CyaSSL)
-; *
-; *wolfSSL is free software/ you can redistribute it and/or modify
-; *it under the terms of the GNU General Public License as published by
-; *the Free Software Foundation/ either version 2 of the License, or
-; *[at your option]any later version .
-; *
-; *wolfSSL ,is distributed in the hope that it will be useful
-; *but WITHOUT ANY WARRANTY/ without even the implied warranty of
-; *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
-; *GNU General Public License for more details .
-; *
-; *You should have received a copy of the GNU General Public License
-; *along with this program/ if not, write to the Free Software
-; *Foundation,Inc .,51 Franklin Street,Fifth Floor,Boston,MA 02110-1301,USA
-; */
+; /* aes_asm.asm
+; *
+; * Copyright (C) 2006-2020 wolfSSL Inc.
+; *
+; * This file is part of wolfSSL.
+; *
+; * wolfSSL is free software; you can redistribute it and/or modify
+; * it under the terms of the GNU General Public License as published by
+; * the Free Software Foundation; either version 2 of the License, or
+; * (at your option) any later version.
+; *
+; * wolfSSL is distributed in the hope that it will be useful,
+; * but WITHOUT ANY WARRANTY; without even the implied warranty of
+; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+; * GNU General Public License for more details.
+; *
+; * You should have received a copy of the GNU General Public License
+; * along with this program; if not, write to the Free Software
+; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+; */
+
;
;
-; /*See IntelA dvanced Encryption Standard[AES]Instructions Set White Paper
-; *by Israel,Intel Mobility Group Development Center,Israel Shay Gueron
-; */
+; /* See Intel Advanced Encryption Standard (AES) Instructions Set White Paper
+; * by Israel, Intel Mobility Group Development Center, Israel Shay Gueron
+; */
;
; /* This file is in intel asm syntax, see .s for at&t syntax */
;
+
+
+fips_version = 0
+IFDEF HAVE_FIPS
+ fips_version = 1
+ IFDEF HAVE_FIPS_VERSION
+ fips_version = HAVE_FIPS_VERSION
+ ENDIF
+ENDIF
+
+IF fips_version GE 2
+ fipsAh SEGMENT ALIAS(".fipsA$h") 'CODE'
+ELSE
+ _text SEGMENT
+ENDIF
+
+
; /*
; AES_CBC_encrypt[const ,unsigned char*in
; unsigned ,char*out
@@ -34,7 +52,6 @@
; const ,unsigned char*KS
; int nr]
; */
-_text SEGMENT
AES_CBC_encrypt PROC
;# parameter 1: rdi
;# parameter 2: rsi
@@ -101,220 +118,753 @@ LAST:
AES_CBC_encrypt ENDP
-
-; /*
-; AES_CBC_decrypt[const ,unsigned char*in
-; unsigned ,char*out
-; unsigned ,char ivec+16
-; unsigned ,long length
-; const ,unsigned char*KS
-; int nr]
-; */
-; . globl AES_CBC_decrypt
-AES_CBC_decrypt PROC
-;# parameter 1: rdi
-;# parameter 2: rsi
-;# parameter 3: rdx
-;# parameter 4: rcx
-;# parameter 5: r8
-;# parameter 6: r9d
-
-; save rdi and rsi to rax and r11, restore before ret
- mov rax,rdi
- mov r11,rsi
-
-; convert to what we had for att&t convention
- mov rdi,rcx
- mov rsi,rdx
- mov rdx,r8
- mov rcx,r9
- mov r8,[rsp+40]
- mov r9d,[rsp+48]
-
-; on microsoft xmm6-xmm15 are non volaitle, let's save on stack and restore at end
- sub rsp,8+8*16 ; 8 = align stack , 8 xmm6-12,15 16 bytes each
- movdqa [rsp+0], xmm6
- movdqa [rsp+16], xmm7
- movdqa [rsp+32], xmm8
- movdqa [rsp+48], xmm9
- movdqa [rsp+64], xmm10
- movdqa [rsp+80], xmm11
- movdqa [rsp+96], xmm12
- movdqa [rsp+112], xmm15
-
- mov r10,rcx
- shr rcx,4
- shl r10,60
- je DNO_PARTS_4
- add rcx,1
+; void AES_CBC_decrypt_by4(const unsigned char* in,
+; unsigned char* out,
+; unsigned char ivec[16],
+; unsigned long length,
+; const unsigned char* KS,
+; int nr)
+AES_CBC_decrypt_by4 PROC
+; parameter 1: rdi
+; parameter 2: rsi
+; parameter 3: rdx
+; parameter 4: rcx
+; parameter 5: r8
+; parameter 6: r9d
+
+ ; save rdi and rsi to rax and r11, restore before ret
+ mov rax, rdi
+ mov r11, rsi
+ ; convert to what we had for att&t convention
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx,r9
+ mov r8, [rsp+40]
+ mov r9d, [rsp+48]
+ ; on microsoft xmm6-xmm15 are non volatile,
+ ; let's save on stack and restore at end
+ sub rsp, 8+8*16 ; 8 = align stack , 8 xmm6-12,15 16 bytes each
+ movdqa [rsp+0], xmm6
+ movdqa [rsp+16], xmm7
+ movdqa [rsp+32], xmm8
+ movdqa [rsp+48], xmm9
+ movdqa [rsp+64], xmm10
+ movdqa [rsp+80], xmm11
+ movdqa [rsp+96], xmm12
+ movdqa [rsp+112], xmm15
+ ; back to our original code, more or less
+ mov r10, rcx
+ shr rcx, 4
+ shl r10, 60
+ je DNO_PARTS_4
+ add rcx, 1
DNO_PARTS_4:
- mov r10,rcx
- shl r10,62
- shr r10,62
- shr rcx,2
- movdqu xmm5,[rdx]
- je DREMAINDER_4
- sub rsi,64
+ mov r10, rcx
+ shl r10, 62
+ shr r10, 62
+ shr rcx, 2
+ movdqu xmm5, [rdx]
+ je DREMAINDER_4
+ sub rsi, 64
DLOOP_4:
- movdqu xmm1,[rdi]
- movdqu xmm2,16[rdi]
- movdqu xmm3,32[rdi]
- movdqu xmm4,48[rdi]
- movdqa xmm6,xmm1
- movdqa xmm7,xmm2
- movdqa xmm8,xmm3
- movdqa xmm15,xmm4
- movdqa xmm9,[r8]
- movdqa xmm10,16[r8]
- movdqa xmm11,32[r8]
- movdqa xmm12,48[r8]
- pxor xmm1,xmm9
- pxor xmm2,xmm9
- pxor xmm3,xmm9
-
- pxor xmm4,xmm9
- aesdec xmm1,xmm10
- aesdec xmm2,xmm10
- aesdec xmm3,xmm10
- aesdec xmm4,xmm10
- aesdec xmm1,xmm11
- aesdec xmm2,xmm11
- aesdec xmm3,xmm11
- aesdec xmm4,xmm11
- aesdec xmm1,xmm12
- aesdec xmm2,xmm12
- aesdec xmm3,xmm12
- aesdec xmm4,xmm12
- movdqa xmm9,64[r8]
- movdqa xmm10,80[r8]
- movdqa xmm11,96[r8]
- movdqa xmm12,112[r8]
- aesdec xmm1,xmm9
- aesdec xmm2,xmm9
- aesdec xmm3,xmm9
- aesdec xmm4,xmm9
- aesdec xmm1,xmm10
- aesdec xmm2,xmm10
- aesdec xmm3,xmm10
- aesdec xmm4,xmm10
- aesdec xmm1,xmm11
- aesdec xmm2,xmm11
- aesdec xmm3,xmm11
- aesdec xmm4,xmm11
- aesdec xmm1,xmm12
- aesdec xmm2,xmm12
- aesdec xmm3,xmm12
- aesdec xmm4,xmm12
- movdqa xmm9,128[r8]
- movdqa xmm10,144[r8]
- movdqa xmm11,160[r8]
- cmp r9d,12
- aesdec xmm1,xmm9
- aesdec xmm2,xmm9
- aesdec xmm3,xmm9
- aesdec xmm4,xmm9
- aesdec xmm1,xmm10
- aesdec xmm2,xmm10
- aesdec xmm3,xmm10
- aesdec xmm4,xmm10
- jb DLAST_4
- movdqa xmm9,160[r8]
- movdqa xmm10,176[r8]
- movdqa xmm11,192[r8]
- cmp r9d,14
- aesdec xmm1,xmm9
- aesdec xmm2,xmm9
- aesdec xmm3,xmm9
- aesdec xmm4,xmm9
- aesdec xmm1,xmm10
- aesdec xmm2,xmm10
- aesdec xmm3,xmm10
- aesdec xmm4,xmm10
- jb DLAST_4
-
- movdqa xmm9,192[r8]
- movdqa xmm10,208[r8]
- movdqa xmm11,224[r8]
- aesdec xmm1,xmm9
- aesdec xmm2,xmm9
- aesdec xmm3,xmm9
- aesdec xmm4,xmm9
- aesdec xmm1,xmm10
- aesdec xmm2,xmm10
- aesdec xmm3,xmm10
- aesdec xmm4,xmm10
+ movdqu xmm1, [rdi]
+ movdqu xmm2, 16[rdi]
+ movdqu xmm3, 32[rdi]
+ movdqu xmm4, 48[rdi]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+ movdqa xmm8, xmm3
+ movdqa xmm15, xmm4
+ movdqa xmm9, [r8]
+ movdqa xmm10, 16[r8]
+ movdqa xmm11, 32[r8]
+ movdqa xmm12, 48[r8]
+ pxor xmm1, xmm9
+ pxor xmm2, xmm9
+ pxor xmm3, xmm9
+ pxor xmm4, xmm9
+ aesdec xmm1, xmm10
+ aesdec xmm2, xmm10
+ aesdec xmm3, xmm10
+ aesdec xmm4, xmm10
+ aesdec xmm1, xmm11
+ aesdec xmm2, xmm11
+ aesdec xmm3, xmm11
+ aesdec xmm4, xmm11
+ aesdec xmm1, xmm12
+ aesdec xmm2, xmm12
+ aesdec xmm3, xmm12
+ aesdec xmm4, xmm12
+ movdqa xmm9, 64[r8]
+ movdqa xmm10, 80[r8]
+ movdqa xmm11, 96[r8]
+ movdqa xmm12, 112[r8]
+ aesdec xmm1, xmm9
+ aesdec xmm2, xmm9
+ aesdec xmm3, xmm9
+ aesdec xmm4, xmm9
+ aesdec xmm1, xmm10
+ aesdec xmm2, xmm10
+ aesdec xmm3, xmm10
+ aesdec xmm4, xmm10
+ aesdec xmm1, xmm11
+ aesdec xmm2, xmm11
+ aesdec xmm3, xmm11
+ aesdec xmm4, xmm11
+ aesdec xmm1, xmm12
+ aesdec xmm2, xmm12
+ aesdec xmm3, xmm12
+ aesdec xmm4, xmm12
+ movdqa xmm9, 128[r8]
+ movdqa xmm10, 144[r8]
+ movdqa xmm11, 160[r8]
+ cmp r9d, 12
+ aesdec xmm1, xmm9
+ aesdec xmm2, xmm9
+ aesdec xmm3, xmm9
+ aesdec xmm4, xmm9
+ aesdec xmm1, xmm10
+ aesdec xmm2, xmm10
+ aesdec xmm3, xmm10
+ aesdec xmm4, xmm10
+ jb DLAST_4
+ movdqa xmm9, 160[r8]
+ movdqa xmm10, 176[r8]
+ movdqa xmm11, 192[r8]
+ cmp r9d, 14
+ aesdec xmm1, xmm9
+ aesdec xmm2, xmm9
+ aesdec xmm3, xmm9
+ aesdec xmm4, xmm9
+ aesdec xmm1, xmm10
+ aesdec xmm2, xmm10
+ aesdec xmm3, xmm10
+ aesdec xmm4, xmm10
+ jb DLAST_4
+ movdqa xmm9, 192[r8]
+ movdqa xmm10, 208[r8]
+ movdqa xmm11, 224[r8]
+ aesdec xmm1, xmm9
+ aesdec xmm2, xmm9
+ aesdec xmm3, xmm9
+ aesdec xmm4, xmm9
+ aesdec xmm1, xmm10
+ aesdec xmm2, xmm10
+ aesdec xmm3, xmm10
+ aesdec xmm4, xmm10
DLAST_4:
- add rdi,64
- add rsi,64
- dec rcx
- aesdeclast xmm1,xmm11
- aesdeclast xmm2,xmm11
- aesdeclast xmm3,xmm11
- aesdeclast xmm4,xmm11
- pxor xmm1,xmm5
- pxor xmm2,xmm6
- pxor xmm3,xmm7
- pxor xmm4,xmm8
- movdqu [rsi],xmm1
- movdqu 16[rsi],xmm2
- movdqu 32[rsi],xmm3
- movdqu 48[rsi],xmm4
- movdqa xmm5,xmm15
- jne DLOOP_4
- add rsi,64
+ add rdi, 64
+ add rsi, 64
+ dec rcx
+ aesdeclast xmm1, xmm11
+ aesdeclast xmm2, xmm11
+ aesdeclast xmm3, xmm11
+ aesdeclast xmm4, xmm11
+ pxor xmm1, xmm5
+ pxor xmm2, xmm6
+ pxor xmm3, xmm7
+ pxor xmm4, xmm8
+ movdqu [rsi], xmm1
+ movdqu 16[rsi], xmm2
+ movdqu 32[rsi], xmm3
+ movdqu 48[rsi], xmm4
+ movdqa xmm5, xmm15
+ jne DLOOP_4
+ add rsi, 64
DREMAINDER_4:
- cmp r10,0
- je DEND_4
+ cmp r10, 0
+ je DEND_4
DLOOP_4_2:
- movdqu xmm1,[rdi]
- movdqa xmm15,xmm1
- add rdi,16
- pxor xmm1,[r8]
- movdqu xmm2,160[r8]
- cmp r9d,12
- aesdec xmm1,16[r8]
- aesdec xmm1,32[r8]
- aesdec xmm1,48[r8]
- aesdec xmm1,64[r8]
- aesdec xmm1,80[r8]
- aesdec xmm1,96[r8]
- aesdec xmm1,112[r8]
- aesdec xmm1,128[r8]
- aesdec xmm1,144[r8]
- jb DLAST_4_2
- movdqu xmm2,192[r8]
- cmp r9d,14
- aesdec xmm1,160[r8]
- aesdec xmm1,176[r8]
- jb DLAST_4_2
- movdqu xmm2,224[r8]
- aesdec xmm1,192[r8]
- aesdec xmm1,208[r8]
+ movdqu xmm1, [rdi]
+ movdqa xmm15, xmm1
+ add rdi, 16
+ pxor xmm1, [r8]
+ movdqu xmm2, 160[r8]
+ cmp r9d, 12
+ aesdec xmm1, 16[r8]
+ aesdec xmm1, 32[r8]
+ aesdec xmm1, 48[r8]
+ aesdec xmm1, 64[r8]
+ aesdec xmm1, 80[r8]
+ aesdec xmm1, 96[r8]
+ aesdec xmm1, 112[r8]
+ aesdec xmm1, 128[r8]
+ aesdec xmm1, 144[r8]
+ jb DLAST_4_2
+ movdqu xmm2, 192[r8]
+ cmp r9d, 14
+ aesdec xmm1, 160[r8]
+ aesdec xmm1, 176[r8]
+ jb DLAST_4_2
+ movdqu xmm2, 224[r8]
+ aesdec xmm1, 192[r8]
+ aesdec xmm1, 208[r8]
DLAST_4_2:
- aesdeclast xmm1,xmm2
- pxor xmm1,xmm5
- movdqa xmm5,xmm15
- movdqu [rsi],xmm1
-
- add rsi,16
- dec r10
- jne DLOOP_4_2
+ aesdeclast xmm1, xmm2
+ pxor xmm1, xmm5
+ movdqa xmm5, xmm15
+ movdqu [rsi], xmm1
+ add rsi, 16
+ dec r10
+ jne DLOOP_4_2
DEND_4:
- ; restore non volatile rdi,rsi
- mov rdi,rax
- mov rsi,r11
- ; restore non volatile xmms from stack
- movdqa xmm6, [rsp+0]
- movdqa xmm7, [rsp+16]
- movdqa xmm8, [rsp+32]
- movdqa xmm9, [rsp+48]
- movdqa xmm10, [rsp+64]
- movdqa xmm11, [rsp+80]
- movdqa xmm12, [rsp+96]
- movdqa xmm15, [rsp+112]
- add rsp,8+8*16 ; 8 = align stack , 8 xmm6-12,15 16 bytes each
- ret
-AES_CBC_decrypt ENDP
+ ; restore non volatile rdi,rsi
+ mov rdi, rax
+ mov rsi, r11
+ ; restore non volatile xmms from stack
+ movdqa xmm6, [rsp+0]
+ movdqa xmm7, [rsp+16]
+ movdqa xmm8, [rsp+32]
+ movdqa xmm9, [rsp+48]
+ movdqa xmm10, [rsp+64]
+ movdqa xmm11, [rsp+80]
+ movdqa xmm12, [rsp+96]
+ movdqa xmm15, [rsp+112]
+ add rsp, 8+8*16 ; 8 = align stack , 8 xmm6-12,15 16 bytes each
+ ret
+AES_CBC_decrypt_by4 ENDP
+
+
+; void AES_CBC_decrypt_by6(const unsigned char *in,
+; unsigned char *out,
+; unsigned char ivec[16],
+; unsigned long length,
+; const unsigned char *KS,
+; int nr)
+AES_CBC_decrypt_by6 PROC
+; parameter 1: rdi - in
+; parameter 2: rsi - out
+; parameter 3: rdx - ivec
+; parameter 4: rcx - length
+; parameter 5: r8 - KS
+; parameter 6: r9d - nr
+
+ ; save rdi and rsi to rax and r11, restore before ret
+ mov rax, rdi
+ mov r11, rsi
+ ; convert to what we had for att&t convention
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov r8, [rsp+40]
+ mov r9d, [rsp+48]
+ ; on microsoft xmm6-xmm15 are non volatile,
+ ; let's save on stack and restore at end
+ sub rsp, 8+9*16 ; 8 = align stack , 9 xmm6-14 16 bytes each
+ movdqa [rsp+0], xmm6
+ movdqa [rsp+16], xmm7
+ movdqa [rsp+32], xmm8
+ movdqa [rsp+48], xmm9
+ movdqa [rsp+64], xmm10
+ movdqa [rsp+80], xmm11
+ movdqa [rsp+96], xmm12
+ movdqa [rsp+112], xmm13
+ movdqa [rsp+128], xmm14
+ ; back to our original code, more or less
+ mov r10, rcx
+ shr rcx, 4
+ shl r10, 60
+ je DNO_PARTS_6
+ add rcx, 1
+DNO_PARTS_6:
+ mov r12, rax
+ mov r13, rdx
+ mov r14, rbx
+ mov rdx, 0
+ mov rax, rcx
+ mov rbx, 6
+ div rbx
+ mov rcx, rax
+ mov r10, rdx
+ mov rax, r12
+ mov rdx, r13
+ mov rbx, r14
+ cmp rcx, 0
+ movdqu xmm7, [rdx]
+ je DREMAINDER_6
+ sub rsi, 96
+DLOOP_6:
+ movdqu xmm1, [rdi]
+ movdqu xmm2, 16[rdi]
+ movdqu xmm3, 32[rdi]
+ movdqu xmm4, 48[rdi]
+ movdqu xmm5, 64[rdi]
+ movdqu xmm6, 80[rdi]
+ movdqa xmm8, [r8]
+ movdqa xmm9, 16[r8]
+ movdqa xmm10, 32[r8]
+ movdqa xmm11, 48[r8]
+ pxor xmm1, xmm8
+ pxor xmm2, xmm8
+ pxor xmm3, xmm8
+ pxor xmm4, xmm8
+ pxor xmm5, xmm8
+ pxor xmm6, xmm8
+ aesdec xmm1, xmm9
+ aesdec xmm2, xmm9
+ aesdec xmm3, xmm9
+ aesdec xmm4, xmm9
+ aesdec xmm5, xmm9
+ aesdec xmm6, xmm9
+ aesdec xmm1, xmm10
+ aesdec xmm2, xmm10
+ aesdec xmm3, xmm10
+ aesdec xmm4, xmm10
+ aesdec xmm5, xmm10
+ aesdec xmm6, xmm10
+ aesdec xmm1, xmm11
+ aesdec xmm2, xmm11
+ aesdec xmm3, xmm11
+ aesdec xmm4, xmm11
+ aesdec xmm5, xmm11
+ aesdec xmm6, xmm11
+ movdqa xmm8, 64[r8]
+ movdqa xmm9, 80[r8]
+ movdqa xmm10, 96[r8]
+ movdqa xmm11, 112[r8]
+ aesdec xmm1, xmm8
+ aesdec xmm2, xmm8
+ aesdec xmm3, xmm8
+ aesdec xmm4, xmm8
+ aesdec xmm5, xmm8
+ aesdec xmm6, xmm8
+ aesdec xmm1, xmm9
+ aesdec xmm2, xmm9
+ aesdec xmm3, xmm9
+ aesdec xmm4, xmm9
+ aesdec xmm5, xmm9
+ aesdec xmm6, xmm9
+ aesdec xmm1, xmm10
+ aesdec xmm2, xmm10
+ aesdec xmm3, xmm10
+ aesdec xmm4, xmm10
+ aesdec xmm5, xmm10
+ aesdec xmm6, xmm10
+ aesdec xmm1, xmm11
+ aesdec xmm2, xmm11
+ aesdec xmm3, xmm11
+ aesdec xmm4, xmm11
+ aesdec xmm5, xmm11
+ aesdec xmm6, xmm11
+ movdqa xmm8, 128[r8]
+ movdqa xmm9, 144[r8]
+ movdqa xmm10, 160[r8]
+ cmp r9d, 12
+ aesdec xmm1, xmm8
+ aesdec xmm2, xmm8
+ aesdec xmm3, xmm8
+ aesdec xmm4, xmm8
+ aesdec xmm5, xmm8
+ aesdec xmm6, xmm8
+ aesdec xmm1, xmm9
+ aesdec xmm2, xmm9
+ aesdec xmm3, xmm9
+ aesdec xmm4, xmm9
+ aesdec xmm5, xmm9
+ aesdec xmm6, xmm9
+ jb DLAST_6
+ movdqa xmm8, 160[r8]
+ movdqa xmm9, 176[r8]
+ movdqa xmm10, 192[r8]
+ cmp r9d, 14
+ aesdec xmm1, xmm8
+ aesdec xmm2, xmm8
+ aesdec xmm3, xmm8
+ aesdec xmm4, xmm8
+ aesdec xmm5, xmm8
+ aesdec xmm6, xmm8
+ aesdec xmm1, xmm9
+ aesdec xmm2, xmm9
+ aesdec xmm3, xmm9
+ aesdec xmm4, xmm9
+ aesdec xmm5, xmm9
+ aesdec xmm6, xmm9
+ jb DLAST_6
+ movdqa xmm8, 192[r8]
+ movdqa xmm9, 208[r8]
+ movdqa xmm10, 224[r8]
+ aesdec xmm1, xmm8
+ aesdec xmm2, xmm8
+ aesdec xmm3, xmm8
+ aesdec xmm4, xmm8
+ aesdec xmm5, xmm8
+ aesdec xmm6, xmm8
+ aesdec xmm1, xmm9
+ aesdec xmm2, xmm9
+ aesdec xmm3, xmm9
+ aesdec xmm4, xmm9
+ aesdec xmm5, xmm9
+ aesdec xmm6, xmm9
+DLAST_6:
+ add rsi, 96
+ aesdeclast xmm1, xmm10
+ aesdeclast xmm2, xmm10
+ aesdeclast xmm3, xmm10
+ aesdeclast xmm4, xmm10
+ aesdeclast xmm5, xmm10
+ aesdeclast xmm6, xmm10
+ movdqu xmm8, [rdi]
+ movdqu xmm9, 16[rdi]
+ movdqu xmm10, 32[rdi]
+ movdqu xmm11, 48[rdi]
+ movdqu xmm12, 64[rdi]
+ movdqu xmm13, 80[rdi]
+ pxor xmm1, xmm7
+ pxor xmm2, xmm8
+ pxor xmm3, xmm9
+ pxor xmm4, xmm10
+ pxor xmm5, xmm11
+ pxor xmm6, xmm12
+ movdqu xmm7, xmm13
+ movdqu [rsi], xmm1
+ movdqu 16[rsi], xmm2
+ movdqu 32[rsi], xmm3
+ movdqu 48[rsi], xmm4
+ movdqu 64[rsi], xmm5
+ movdqu 80[rsi], xmm6
+ add rdi, 96
+ dec rcx
+ jne DLOOP_6
+ add rsi, 96
+DREMAINDER_6:
+ cmp r10, 0
+ je DEND_6
+DLOOP_6_2:
+ movdqu xmm1, [rdi]
+ movdqa xmm10, xmm1
+ add rdi, 16
+ pxor xmm1, [r8]
+ movdqu xmm2, 160[r8]
+ cmp r9d, 12
+ aesdec xmm1, 16[r8]
+ aesdec xmm1, 32[r8]
+ aesdec xmm1, 48[r8]
+ aesdec xmm1, 64[r8]
+ aesdec xmm1, 80[r8]
+ aesdec xmm1, 96[r8]
+ aesdec xmm1, 112[r8]
+ aesdec xmm1, 128[r8]
+ aesdec xmm1, 144[r8]
+ jb DLAST_6_2
+ movdqu xmm2, 192[r8]
+ cmp r9d, 14
+ aesdec xmm1, 160[r8]
+ aesdec xmm1, 176[r8]
+ jb DLAST_6_2
+ movdqu xmm2, 224[r8]
+ aesdec xmm1, 192[r8]
+ aesdec xmm1, 208[r8]
+DLAST_6_2:
+ aesdeclast xmm1, xmm2
+ pxor xmm1, xmm7
+ movdqa xmm7, xmm10
+ movdqu [rsi], xmm1
+ add rsi, 16
+ dec r10
+ jne DLOOP_6_2
+DEND_6:
+ ; restore non volatile rdi,rsi
+ mov rdi, rax
+ mov rsi, r11
+ ; restore non volatile xmms from stack
+ movdqa xmm6, [rsp+0]
+ movdqa xmm7, [rsp+16]
+ movdqa xmm8, [rsp+32]
+ movdqa xmm9, [rsp+48]
+ movdqa xmm10, [rsp+64]
+ movdqa xmm11, [rsp+80]
+ movdqa xmm12, [rsp+96]
+ movdqa xmm13, [rsp+112]
+ movdqa xmm14, [rsp+128]
+ add rsp, 8+9*16 ; 8 = align stack , 9 xmm6-14 16 bytes each
+ ret
+AES_CBC_decrypt_by6 ENDP
+
+
+; void AES_CBC_decrypt_by8(const unsigned char *in,
+; unsigned char *out,
+; unsigned char ivec[16],
+; unsigned long length,
+; const unsigned char *KS,
+; int nr)
+AES_CBC_decrypt_by8 PROC
+; parameter 1: rdi - in
+; parameter 2: rsi - out
+; parameter 3: rdx - ivec
+; parameter 4: rcx - length
+; parameter 5: r8 - KS
+; parameter 6: r9d - nr
+
+ ; save rdi and rsi to rax and r11, restore before ret
+ mov rax, rdi
+ mov r11, rsi
+ ; convert to what we had for att&t convention
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx,r9
+ mov r8, [rsp+40]
+ mov r9d, [rsp+48]
+ ; on microsoft xmm6-xmm15 are non volatile,
+ ; let's save on stack and restore at end
+ sub rsp, 8+8*16 ; 8 = align stack , 8 xmm6-13 16 bytes each
+ movdqa [rsp+0], xmm6
+ movdqa [rsp+16], xmm7
+ movdqa [rsp+32], xmm8
+ movdqa [rsp+48], xmm9
+ movdqa [rsp+64], xmm10
+ movdqa [rsp+80], xmm11
+ movdqa [rsp+96], xmm12
+ movdqa [rsp+112], xmm13
+ ; back to our original code, more or less
+ mov r10, rcx
+ shr rcx, 4
+ shl r10, 60
+ je DNO_PARTS_8
+ add rcx, 1
+DNO_PARTS_8:
+ mov r10, rcx
+ shl r10, 61
+ shr r10, 61
+ shr rcx, 3
+ movdqu xmm9, [rdx]
+ je DREMAINDER_8
+ sub rsi, 128
+DLOOP_8:
+ movdqu xmm1, [rdi]
+ movdqu xmm2, 16[rdi]
+ movdqu xmm3, 32[rdi]
+ movdqu xmm4, 48[rdi]
+ movdqu xmm5, 64[rdi]
+ movdqu xmm6, 80[rdi]
+ movdqu xmm7, 96[rdi]
+ movdqu xmm8, 112[rdi]
+ movdqa xmm10, [r8]
+ movdqa xmm11, 16[r8]
+ movdqa xmm12, 32[r8]
+ movdqa xmm13, 48[r8]
+ pxor xmm1, xmm10
+ pxor xmm2, xmm10
+ pxor xmm3, xmm10
+ pxor xmm4, xmm10
+ pxor xmm5, xmm10
+ pxor xmm6, xmm10
+ pxor xmm7, xmm10
+ pxor xmm8, xmm10
+ aesdec xmm1, xmm11
+ aesdec xmm2, xmm11
+ aesdec xmm3, xmm11
+ aesdec xmm4, xmm11
+ aesdec xmm5, xmm11
+ aesdec xmm6, xmm11
+ aesdec xmm7, xmm11
+ aesdec xmm8, xmm11
+ aesdec xmm1, xmm12
+ aesdec xmm2, xmm12
+ aesdec xmm3, xmm12
+ aesdec xmm4, xmm12
+ aesdec xmm5, xmm12
+ aesdec xmm6, xmm12
+ aesdec xmm7, xmm12
+ aesdec xmm8, xmm12
+ aesdec xmm1, xmm13
+ aesdec xmm2, xmm13
+ aesdec xmm3, xmm13
+ aesdec xmm4, xmm13
+ aesdec xmm5, xmm13
+ aesdec xmm6, xmm13
+ aesdec xmm7, xmm13
+ aesdec xmm8, xmm13
+ movdqa xmm10, 64[r8]
+ movdqa xmm11, 80[r8]
+ movdqa xmm12, 96[r8]
+ movdqa xmm13, 112[r8]
+ aesdec xmm1, xmm10
+ aesdec xmm2, xmm10
+ aesdec xmm3, xmm10
+ aesdec xmm4, xmm10
+ aesdec xmm5, xmm10
+ aesdec xmm6, xmm10
+ aesdec xmm7, xmm10
+ aesdec xmm8, xmm10
+ aesdec xmm1, xmm11
+ aesdec xmm2, xmm11
+ aesdec xmm3, xmm11
+ aesdec xmm4, xmm11
+ aesdec xmm5, xmm11
+ aesdec xmm6, xmm11
+ aesdec xmm7, xmm11
+ aesdec xmm8, xmm11
+ aesdec xmm1, xmm12
+ aesdec xmm2, xmm12
+ aesdec xmm3, xmm12
+ aesdec xmm4, xmm12
+ aesdec xmm5, xmm12
+ aesdec xmm6, xmm12
+ aesdec xmm7, xmm12
+ aesdec xmm8, xmm12
+ aesdec xmm1, xmm13
+ aesdec xmm2, xmm13
+ aesdec xmm3, xmm13
+ aesdec xmm4, xmm13
+ aesdec xmm5, xmm13
+ aesdec xmm6, xmm13
+ aesdec xmm7, xmm13
+ aesdec xmm8, xmm13
+ movdqa xmm10, 128[r8]
+ movdqa xmm11, 144[r8]
+ movdqa xmm12, 160[r8]
+ cmp r9d, 12
+ aesdec xmm1, xmm10
+ aesdec xmm2, xmm10
+ aesdec xmm3, xmm10
+ aesdec xmm4, xmm10
+ aesdec xmm5, xmm10
+ aesdec xmm6, xmm10
+ aesdec xmm7, xmm10
+ aesdec xmm8, xmm10
+ aesdec xmm1, xmm11
+ aesdec xmm2, xmm11
+ aesdec xmm3, xmm11
+ aesdec xmm4, xmm11
+ aesdec xmm5, xmm11
+ aesdec xmm6, xmm11
+ aesdec xmm7, xmm11
+ aesdec xmm8, xmm11
+ jb DLAST_8
+ movdqa xmm10, 160[r8]
+ movdqa xmm11, 176[r8]
+ movdqa xmm12, 192[r8]
+ cmp r9d, 14
+ aesdec xmm1, xmm10
+ aesdec xmm2, xmm10
+ aesdec xmm3, xmm10
+ aesdec xmm4, xmm10
+ aesdec xmm5, xmm10
+ aesdec xmm6, xmm10
+ aesdec xmm7, xmm10
+ aesdec xmm8, xmm10
+ aesdec xmm1, xmm11
+ aesdec xmm2, xmm11
+ aesdec xmm3, xmm11
+ aesdec xmm4, xmm11
+ aesdec xmm5, xmm11
+ aesdec xmm6, xmm11
+ aesdec xmm7, xmm11
+ aesdec xmm8, xmm11
+ jb DLAST_8
+ movdqa xmm10, 192[r8]
+ movdqa xmm11, 208[r8]
+ movdqa xmm12, 224[r8]
+ aesdec xmm1, xmm10
+ aesdec xmm2, xmm10
+ aesdec xmm3, xmm10
+ aesdec xmm4, xmm10
+ aesdec xmm5, xmm10
+ aesdec xmm6, xmm10
+ aesdec xmm7, xmm10
+ aesdec xmm8, xmm10
+ aesdec xmm1, xmm11
+ aesdec xmm2, xmm11
+ aesdec xmm3, xmm11
+ aesdec xmm4, xmm11
+ aesdec xmm5, xmm11
+ aesdec xmm6, xmm11
+ aesdec xmm7, xmm11
+ aesdec xmm8, xmm11
+DLAST_8:
+ add rsi, 128
+ aesdeclast xmm1, xmm12
+ aesdeclast xmm2, xmm12
+ aesdeclast xmm3, xmm12
+ aesdeclast xmm4, xmm12
+ aesdeclast xmm5, xmm12
+ aesdeclast xmm6, xmm12
+ aesdeclast xmm7, xmm12
+ aesdeclast xmm8, xmm12
+ movdqu xmm10, [rdi]
+ movdqu xmm11, 16[rdi]
+ movdqu xmm12, 32[rdi]
+ movdqu xmm13, 48[rdi]
+ pxor xmm1, xmm9
+ pxor xmm2, xmm10
+ pxor xmm3, xmm11
+ pxor xmm4, xmm12
+ pxor xmm5, xmm13
+ movdqu xmm10, 64[rdi]
+ movdqu xmm11, 80[rdi]
+ movdqu xmm12, 96[rdi]
+ movdqu xmm9, 112[rdi]
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ pxor xmm8, xmm12
+ movdqu [rsi], xmm1
+ movdqu 16[rsi], xmm2
+ movdqu 32[rsi], xmm3
+ movdqu 48[rsi], xmm4
+ movdqu 64[rsi], xmm5
+ movdqu 80[rsi], xmm6
+ movdqu 96[rsi], xmm7
+ movdqu 112[rsi], xmm8
+ add rdi, 128
+ dec rcx
+ jne DLOOP_8
+ add rsi, 128
+DREMAINDER_8:
+ cmp r10, 0
+ je DEND_8
+DLOOP_8_2:
+ movdqu xmm1, [rdi]
+ movdqa xmm10, xmm1
+ add rdi, 16
+ pxor xmm1, [r8]
+ movdqu xmm2, 160[r8]
+ cmp r9d, 12
+ aesdec xmm1, 16[r8]
+ aesdec xmm1, 32[r8]
+ aesdec xmm1, 48[r8]
+ aesdec xmm1, 64[r8]
+ aesdec xmm1, 80[r8]
+ aesdec xmm1, 96[r8]
+ aesdec xmm1, 112[r8]
+ aesdec xmm1, 128[r8]
+ aesdec xmm1, 144[r8]
+ jb DLAST_8_2
+ movdqu xmm2, 192[r8]
+ cmp r9d, 14
+ aesdec xmm1, 160[r8]
+ aesdec xmm1, 176[r8]
+ jb DLAST_8_2
+ movdqu xmm2, 224[r8]
+ aesdec xmm1, 192[r8]
+ aesdec xmm1, 208[r8]
+DLAST_8_2:
+ aesdeclast xmm1, xmm2
+ pxor xmm1, xmm9
+ movdqa xmm9, xmm10
+ movdqu [rsi], xmm1
+ add rsi, 16
+ dec r10
+ jne DLOOP_8_2
+DEND_8:
+ ; restore non volatile rdi,rsi
+ mov rdi, rax
+ mov rsi, r11
+ ; restore non volatile xmms from stack
+ movdqa xmm6, [rsp+0]
+ movdqa xmm7, [rsp+16]
+ movdqa xmm8, [rsp+32]
+ movdqa xmm9, [rsp+48]
+ movdqa xmm10, [rsp+64]
+ movdqa xmm11, [rsp+80]
+ movdqa xmm12, [rsp+96]
+ movdqa xmm13, [rsp+112]
+ add rsp, 8+8*16 ; 8 = align stack , 8 xmm6-13 16 bytes each
+ ret
+AES_CBC_decrypt_by8 ENDP
+
; /*
; AES_ECB_encrypt[const ,unsigned char*in
@@ -794,7 +1344,7 @@ AES_192_Key_Expansion PROC
movdqa [rsp+0], xmm6
movdqu xmm1,[rdi]
- movdqu xmm3,16[rdi]
+ movq xmm3,qword ptr 16[rdi]
movdqa [rsi],xmm1
movdqa xmm5,xmm3
@@ -969,4 +1519,11 @@ MAKE_RK256_b:
pxor xmm3,xmm2
ret
+
+IF fips_version GE 2
+ fipsAh ENDS
+ELSE
+ _text ENDS
+ENDIF
+
END
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes_gcm_asm.S b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes_gcm_asm.S
new file mode 100644
index 000000000..e878690e8
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes_gcm_asm.S
@@ -0,0 +1,8733 @@
+/* aes_gcm_asm
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+#ifndef HAVE_INTEL_AVX1
+#define HAVE_INTEL_AVX1
+#endif /* HAVE_INTEL_AVX1 */
+#ifndef NO_AVX2_SUPPORT
+#define HAVE_INTEL_AVX2
+#endif /* NO_AVX2_SUPPORT */
+
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_aes_gcm_one:
+.quad 0x0, 0x1
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_aes_gcm_two:
+.quad 0x0, 0x2
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_aes_gcm_three:
+.quad 0x0, 0x3
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_aes_gcm_four:
+.quad 0x0, 0x4
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_aes_gcm_five:
+.quad 0x0, 0x5
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_aes_gcm_six:
+.quad 0x0, 0x6
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_aes_gcm_seven:
+.quad 0x0, 0x7
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_aes_gcm_eight:
+.quad 0x0, 0x8
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_aes_gcm_bswap_epi64:
+.quad 0x1020304050607, 0x8090a0b0c0d0e0f
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_aes_gcm_bswap_mask:
+.quad 0x8090a0b0c0d0e0f, 0x1020304050607
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_aes_gcm_mod2_128:
+.quad 0x1, 0xc200000000000000
+#ifndef __APPLE__
+.text
+.globl AES_GCM_encrypt
+.type AES_GCM_encrypt,@function
+.align 4
+AES_GCM_encrypt:
+#else
+.section __TEXT,__text
+.globl _AES_GCM_encrypt
+.p2align 2
+_AES_GCM_encrypt:
+#endif /* __APPLE__ */
+ pushq %r13
+ pushq %r12
+ pushq %rbx
+ pushq %r14
+ pushq %r15
+ movq %rdx, %r12
+ movq %rcx, %rax
+ movl 48(%rsp), %r11d
+ movl 56(%rsp), %ebx
+ movl 64(%rsp), %r14d
+ movq 72(%rsp), %r15
+ movl 80(%rsp), %r10d
+ subq $0xa0, %rsp
+ pxor %xmm4, %xmm4
+ pxor %xmm6, %xmm6
+ cmpl $12, %ebx
+ movl %ebx, %edx
+ jne L_AES_GCM_encrypt_iv_not_12
+ # # Calculate values when IV is 12 bytes
+ # Set counter based on IV
+ movl $0x1000000, %ecx
+ pinsrq $0x00, (%rax), %xmm4
+ pinsrd $2, 8(%rax), %xmm4
+ pinsrd $3, %ecx, %xmm4
+ # H = Encrypt X(=0) and T = Encrypt counter
+ movdqa %xmm4, %xmm1
+ movdqa (%r15), %xmm5
+ pxor %xmm5, %xmm1
+ movdqa 16(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 32(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 48(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 64(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 80(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 96(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 112(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 128(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 144(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ cmpl $11, %r10d
+ movdqa 160(%r15), %xmm7
+ jl L_AES_GCM_encrypt_calc_iv_12_last
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 176(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ cmpl $13, %r10d
+ movdqa 192(%r15), %xmm7
+ jl L_AES_GCM_encrypt_calc_iv_12_last
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 208(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 224(%r15), %xmm7
+L_AES_GCM_encrypt_calc_iv_12_last:
+ aesenclast %xmm7, %xmm5
+ aesenclast %xmm7, %xmm1
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm5
+ movdqa %xmm1, 144(%rsp)
+ jmp L_AES_GCM_encrypt_iv_done
+L_AES_GCM_encrypt_iv_not_12:
+ # Calculate values when IV is not 12 bytes
+ # H = Encrypt X(=0)
+ movdqa (%r15), %xmm5
+ aesenc 16(%r15), %xmm5
+ aesenc 32(%r15), %xmm5
+ aesenc 48(%r15), %xmm5
+ aesenc 64(%r15), %xmm5
+ aesenc 80(%r15), %xmm5
+ aesenc 96(%r15), %xmm5
+ aesenc 112(%r15), %xmm5
+ aesenc 128(%r15), %xmm5
+ aesenc 144(%r15), %xmm5
+ cmpl $11, %r10d
+ movdqa 160(%r15), %xmm9
+ jl L_AES_GCM_encrypt_calc_iv_1_aesenc_avx_last
+ aesenc %xmm9, %xmm5
+ aesenc 176(%r15), %xmm5
+ cmpl $13, %r10d
+ movdqa 192(%r15), %xmm9
+ jl L_AES_GCM_encrypt_calc_iv_1_aesenc_avx_last
+ aesenc %xmm9, %xmm5
+ aesenc 208(%r15), %xmm5
+ movdqa 224(%r15), %xmm9
+L_AES_GCM_encrypt_calc_iv_1_aesenc_avx_last:
+ aesenclast %xmm9, %xmm5
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm5
+ # Calc counter
+ # Initialization vector
+ cmpl $0x00, %edx
+ movq $0x00, %rcx
+ je L_AES_GCM_encrypt_calc_iv_done
+ cmpl $16, %edx
+ jl L_AES_GCM_encrypt_calc_iv_lt16
+ andl $0xfffffff0, %edx
+L_AES_GCM_encrypt_calc_iv_16_loop:
+ movdqu (%rax,%rcx,1), %xmm8
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm8
+ pxor %xmm8, %xmm4
+ pshufd $0x4e, %xmm4, %xmm1
+ pshufd $0x4e, %xmm5, %xmm2
+ movdqa %xmm5, %xmm3
+ movdqa %xmm5, %xmm0
+ pclmulqdq $0x11, %xmm4, %xmm3
+ pclmulqdq $0x00, %xmm4, %xmm0
+ pxor %xmm4, %xmm1
+ pxor %xmm5, %xmm2
+ pclmulqdq $0x00, %xmm2, %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm3, %xmm1
+ movdqa %xmm1, %xmm2
+ movdqa %xmm0, %xmm7
+ movdqa %xmm3, %xmm4
+ pslldq $8, %xmm2
+ psrldq $8, %xmm1
+ pxor %xmm2, %xmm7
+ pxor %xmm1, %xmm4
+ movdqa %xmm7, %xmm0
+ movdqa %xmm4, %xmm1
+ psrld $31, %xmm0
+ psrld $31, %xmm1
+ pslld $0x01, %xmm7
+ pslld $0x01, %xmm4
+ movdqa %xmm0, %xmm2
+ pslldq $4, %xmm0
+ psrldq $12, %xmm2
+ pslldq $4, %xmm1
+ por %xmm2, %xmm4
+ por %xmm0, %xmm7
+ por %xmm1, %xmm4
+ movdqa %xmm7, %xmm0
+ movdqa %xmm7, %xmm1
+ movdqa %xmm7, %xmm2
+ pslld $31, %xmm0
+ pslld $30, %xmm1
+ pslld $25, %xmm2
+ pxor %xmm1, %xmm0
+ pxor %xmm2, %xmm0
+ movdqa %xmm0, %xmm1
+ psrldq $4, %xmm1
+ pslldq $12, %xmm0
+ pxor %xmm0, %xmm7
+ movdqa %xmm7, %xmm2
+ movdqa %xmm7, %xmm3
+ movdqa %xmm7, %xmm0
+ psrld $0x01, %xmm2
+ psrld $2, %xmm3
+ psrld $7, %xmm0
+ pxor %xmm3, %xmm2
+ pxor %xmm0, %xmm2
+ pxor %xmm1, %xmm2
+ pxor %xmm7, %xmm2
+ pxor %xmm2, %xmm4
+ addl $16, %ecx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_encrypt_calc_iv_16_loop
+ movl %ebx, %edx
+ cmpl %edx, %ecx
+ je L_AES_GCM_encrypt_calc_iv_done
+L_AES_GCM_encrypt_calc_iv_lt16:
+ subq $16, %rsp
+ pxor %xmm8, %xmm8
+ xorl %ebx, %ebx
+ movdqa %xmm8, (%rsp)
+L_AES_GCM_encrypt_calc_iv_loop:
+ movzbl (%rax,%rcx,1), %r13d
+ movb %r13b, (%rsp,%rbx,1)
+ incl %ecx
+ incl %ebx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_encrypt_calc_iv_loop
+ movdqa (%rsp), %xmm8
+ addq $16, %rsp
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm8
+ pxor %xmm8, %xmm4
+ pshufd $0x4e, %xmm4, %xmm1
+ pshufd $0x4e, %xmm5, %xmm2
+ movdqa %xmm5, %xmm3
+ movdqa %xmm5, %xmm0
+ pclmulqdq $0x11, %xmm4, %xmm3
+ pclmulqdq $0x00, %xmm4, %xmm0
+ pxor %xmm4, %xmm1
+ pxor %xmm5, %xmm2
+ pclmulqdq $0x00, %xmm2, %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm3, %xmm1
+ movdqa %xmm1, %xmm2
+ movdqa %xmm0, %xmm7
+ movdqa %xmm3, %xmm4
+ pslldq $8, %xmm2
+ psrldq $8, %xmm1
+ pxor %xmm2, %xmm7
+ pxor %xmm1, %xmm4
+ movdqa %xmm7, %xmm0
+ movdqa %xmm4, %xmm1
+ psrld $31, %xmm0
+ psrld $31, %xmm1
+ pslld $0x01, %xmm7
+ pslld $0x01, %xmm4
+ movdqa %xmm0, %xmm2
+ pslldq $4, %xmm0
+ psrldq $12, %xmm2
+ pslldq $4, %xmm1
+ por %xmm2, %xmm4
+ por %xmm0, %xmm7
+ por %xmm1, %xmm4
+ movdqa %xmm7, %xmm0
+ movdqa %xmm7, %xmm1
+ movdqa %xmm7, %xmm2
+ pslld $31, %xmm0
+ pslld $30, %xmm1
+ pslld $25, %xmm2
+ pxor %xmm1, %xmm0
+ pxor %xmm2, %xmm0
+ movdqa %xmm0, %xmm1
+ psrldq $4, %xmm1
+ pslldq $12, %xmm0
+ pxor %xmm0, %xmm7
+ movdqa %xmm7, %xmm2
+ movdqa %xmm7, %xmm3
+ movdqa %xmm7, %xmm0
+ psrld $0x01, %xmm2
+ psrld $2, %xmm3
+ psrld $7, %xmm0
+ pxor %xmm3, %xmm2
+ pxor %xmm0, %xmm2
+ pxor %xmm1, %xmm2
+ pxor %xmm7, %xmm2
+ pxor %xmm2, %xmm4
+L_AES_GCM_encrypt_calc_iv_done:
+ # T = Encrypt counter
+ pxor %xmm0, %xmm0
+ shll $3, %edx
+ pinsrq $0x00, %rdx, %xmm0
+ pxor %xmm0, %xmm4
+ pshufd $0x4e, %xmm4, %xmm1
+ pshufd $0x4e, %xmm5, %xmm2
+ movdqa %xmm5, %xmm3
+ movdqa %xmm5, %xmm0
+ pclmulqdq $0x11, %xmm4, %xmm3
+ pclmulqdq $0x00, %xmm4, %xmm0
+ pxor %xmm4, %xmm1
+ pxor %xmm5, %xmm2
+ pclmulqdq $0x00, %xmm2, %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm3, %xmm1
+ movdqa %xmm1, %xmm2
+ movdqa %xmm0, %xmm7
+ movdqa %xmm3, %xmm4
+ pslldq $8, %xmm2
+ psrldq $8, %xmm1
+ pxor %xmm2, %xmm7
+ pxor %xmm1, %xmm4
+ movdqa %xmm7, %xmm0
+ movdqa %xmm4, %xmm1
+ psrld $31, %xmm0
+ psrld $31, %xmm1
+ pslld $0x01, %xmm7
+ pslld $0x01, %xmm4
+ movdqa %xmm0, %xmm2
+ pslldq $4, %xmm0
+ psrldq $12, %xmm2
+ pslldq $4, %xmm1
+ por %xmm2, %xmm4
+ por %xmm0, %xmm7
+ por %xmm1, %xmm4
+ movdqa %xmm7, %xmm0
+ movdqa %xmm7, %xmm1
+ movdqa %xmm7, %xmm2
+ pslld $31, %xmm0
+ pslld $30, %xmm1
+ pslld $25, %xmm2
+ pxor %xmm1, %xmm0
+ pxor %xmm2, %xmm0
+ movdqa %xmm0, %xmm1
+ psrldq $4, %xmm1
+ pslldq $12, %xmm0
+ pxor %xmm0, %xmm7
+ movdqa %xmm7, %xmm2
+ movdqa %xmm7, %xmm3
+ movdqa %xmm7, %xmm0
+ psrld $0x01, %xmm2
+ psrld $2, %xmm3
+ psrld $7, %xmm0
+ pxor %xmm3, %xmm2
+ pxor %xmm0, %xmm2
+ pxor %xmm1, %xmm2
+ pxor %xmm7, %xmm2
+ pxor %xmm2, %xmm4
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm4
+ # Encrypt counter
+ movdqa (%r15), %xmm8
+ pxor %xmm4, %xmm8
+ aesenc 16(%r15), %xmm8
+ aesenc 32(%r15), %xmm8
+ aesenc 48(%r15), %xmm8
+ aesenc 64(%r15), %xmm8
+ aesenc 80(%r15), %xmm8
+ aesenc 96(%r15), %xmm8
+ aesenc 112(%r15), %xmm8
+ aesenc 128(%r15), %xmm8
+ aesenc 144(%r15), %xmm8
+ cmpl $11, %r10d
+ movdqa 160(%r15), %xmm9
+ jl L_AES_GCM_encrypt_calc_iv_2_aesenc_avx_last
+ aesenc %xmm9, %xmm8
+ aesenc 176(%r15), %xmm8
+ cmpl $13, %r10d
+ movdqa 192(%r15), %xmm9
+ jl L_AES_GCM_encrypt_calc_iv_2_aesenc_avx_last
+ aesenc %xmm9, %xmm8
+ aesenc 208(%r15), %xmm8
+ movdqa 224(%r15), %xmm9
+L_AES_GCM_encrypt_calc_iv_2_aesenc_avx_last:
+ aesenclast %xmm9, %xmm8
+ movdqa %xmm8, 144(%rsp)
+L_AES_GCM_encrypt_iv_done:
+ # Additional authentication data
+ movl %r11d, %edx
+ cmpl $0x00, %edx
+ je L_AES_GCM_encrypt_calc_aad_done
+ xorl %ecx, %ecx
+ cmpl $16, %edx
+ jl L_AES_GCM_encrypt_calc_aad_lt16
+ andl $0xfffffff0, %edx
+L_AES_GCM_encrypt_calc_aad_16_loop:
+ movdqu (%r12,%rcx,1), %xmm8
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm8
+ pxor %xmm8, %xmm6
+ pshufd $0x4e, %xmm6, %xmm1
+ pshufd $0x4e, %xmm5, %xmm2
+ movdqa %xmm5, %xmm3
+ movdqa %xmm5, %xmm0
+ pclmulqdq $0x11, %xmm6, %xmm3
+ pclmulqdq $0x00, %xmm6, %xmm0
+ pxor %xmm6, %xmm1
+ pxor %xmm5, %xmm2
+ pclmulqdq $0x00, %xmm2, %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm3, %xmm1
+ movdqa %xmm1, %xmm2
+ movdqa %xmm0, %xmm7
+ movdqa %xmm3, %xmm6
+ pslldq $8, %xmm2
+ psrldq $8, %xmm1
+ pxor %xmm2, %xmm7
+ pxor %xmm1, %xmm6
+ movdqa %xmm7, %xmm0
+ movdqa %xmm6, %xmm1
+ psrld $31, %xmm0
+ psrld $31, %xmm1
+ pslld $0x01, %xmm7
+ pslld $0x01, %xmm6
+ movdqa %xmm0, %xmm2
+ pslldq $4, %xmm0
+ psrldq $12, %xmm2
+ pslldq $4, %xmm1
+ por %xmm2, %xmm6
+ por %xmm0, %xmm7
+ por %xmm1, %xmm6
+ movdqa %xmm7, %xmm0
+ movdqa %xmm7, %xmm1
+ movdqa %xmm7, %xmm2
+ pslld $31, %xmm0
+ pslld $30, %xmm1
+ pslld $25, %xmm2
+ pxor %xmm1, %xmm0
+ pxor %xmm2, %xmm0
+ movdqa %xmm0, %xmm1
+ psrldq $4, %xmm1
+ pslldq $12, %xmm0
+ pxor %xmm0, %xmm7
+ movdqa %xmm7, %xmm2
+ movdqa %xmm7, %xmm3
+ movdqa %xmm7, %xmm0
+ psrld $0x01, %xmm2
+ psrld $2, %xmm3
+ psrld $7, %xmm0
+ pxor %xmm3, %xmm2
+ pxor %xmm0, %xmm2
+ pxor %xmm1, %xmm2
+ pxor %xmm7, %xmm2
+ pxor %xmm2, %xmm6
+ addl $16, %ecx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_encrypt_calc_aad_16_loop
+ movl %r11d, %edx
+ cmpl %edx, %ecx
+ je L_AES_GCM_encrypt_calc_aad_done
+L_AES_GCM_encrypt_calc_aad_lt16:
+ subq $16, %rsp
+ pxor %xmm8, %xmm8
+ xorl %ebx, %ebx
+ movdqa %xmm8, (%rsp)
+L_AES_GCM_encrypt_calc_aad_loop:
+ movzbl (%r12,%rcx,1), %r13d
+ movb %r13b, (%rsp,%rbx,1)
+ incl %ecx
+ incl %ebx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_encrypt_calc_aad_loop
+ movdqa (%rsp), %xmm8
+ addq $16, %rsp
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm8
+ pxor %xmm8, %xmm6
+ pshufd $0x4e, %xmm6, %xmm1
+ pshufd $0x4e, %xmm5, %xmm2
+ movdqa %xmm5, %xmm3
+ movdqa %xmm5, %xmm0
+ pclmulqdq $0x11, %xmm6, %xmm3
+ pclmulqdq $0x00, %xmm6, %xmm0
+ pxor %xmm6, %xmm1
+ pxor %xmm5, %xmm2
+ pclmulqdq $0x00, %xmm2, %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm3, %xmm1
+ movdqa %xmm1, %xmm2
+ movdqa %xmm0, %xmm7
+ movdqa %xmm3, %xmm6
+ pslldq $8, %xmm2
+ psrldq $8, %xmm1
+ pxor %xmm2, %xmm7
+ pxor %xmm1, %xmm6
+ movdqa %xmm7, %xmm0
+ movdqa %xmm6, %xmm1
+ psrld $31, %xmm0
+ psrld $31, %xmm1
+ pslld $0x01, %xmm7
+ pslld $0x01, %xmm6
+ movdqa %xmm0, %xmm2
+ pslldq $4, %xmm0
+ psrldq $12, %xmm2
+ pslldq $4, %xmm1
+ por %xmm2, %xmm6
+ por %xmm0, %xmm7
+ por %xmm1, %xmm6
+ movdqa %xmm7, %xmm0
+ movdqa %xmm7, %xmm1
+ movdqa %xmm7, %xmm2
+ pslld $31, %xmm0
+ pslld $30, %xmm1
+ pslld $25, %xmm2
+ pxor %xmm1, %xmm0
+ pxor %xmm2, %xmm0
+ movdqa %xmm0, %xmm1
+ psrldq $4, %xmm1
+ pslldq $12, %xmm0
+ pxor %xmm0, %xmm7
+ movdqa %xmm7, %xmm2
+ movdqa %xmm7, %xmm3
+ movdqa %xmm7, %xmm0
+ psrld $0x01, %xmm2
+ psrld $2, %xmm3
+ psrld $7, %xmm0
+ pxor %xmm3, %xmm2
+ pxor %xmm0, %xmm2
+ pxor %xmm1, %xmm2
+ pxor %xmm7, %xmm2
+ pxor %xmm2, %xmm6
+L_AES_GCM_encrypt_calc_aad_done:
+ # Calculate counter and H
+ pshufb L_aes_gcm_bswap_epi64(%rip), %xmm4
+ movdqa %xmm5, %xmm9
+ paddd L_aes_gcm_one(%rip), %xmm4
+ movdqa %xmm5, %xmm8
+ movdqa %xmm4, 128(%rsp)
+ psrlq $63, %xmm9
+ psllq $0x01, %xmm8
+ pslldq $8, %xmm9
+ por %xmm9, %xmm8
+ pshufd $0xff, %xmm5, %xmm5
+ psrad $31, %xmm5
+ pand L_aes_gcm_mod2_128(%rip), %xmm5
+ pxor %xmm8, %xmm5
+ xorq %rbx, %rbx
+ cmpl $0x80, %r9d
+ movl %r9d, %r13d
+ jl L_AES_GCM_encrypt_done_128
+ andl $0xffffff80, %r13d
+ movdqa %xmm6, %xmm2
+ # H ^ 1
+ movdqa %xmm5, (%rsp)
+ # H ^ 2
+ pshufd $0x4e, %xmm5, %xmm9
+ pshufd $0x4e, %xmm5, %xmm10
+ movdqa %xmm5, %xmm11
+ movdqa %xmm5, %xmm8
+ pclmulqdq $0x11, %xmm5, %xmm11
+ pclmulqdq $0x00, %xmm5, %xmm8
+ pxor %xmm5, %xmm9
+ pxor %xmm5, %xmm10
+ pclmulqdq $0x00, %xmm10, %xmm9
+ pxor %xmm8, %xmm9
+ pxor %xmm11, %xmm9
+ movdqa %xmm9, %xmm10
+ movdqa %xmm11, %xmm0
+ pslldq $8, %xmm10
+ psrldq $8, %xmm9
+ pxor %xmm10, %xmm8
+ pxor %xmm9, %xmm0
+ movdqa %xmm8, %xmm12
+ movdqa %xmm8, %xmm13
+ movdqa %xmm8, %xmm14
+ pslld $31, %xmm12
+ pslld $30, %xmm13
+ pslld $25, %xmm14
+ pxor %xmm13, %xmm12
+ pxor %xmm14, %xmm12
+ movdqa %xmm12, %xmm13
+ psrldq $4, %xmm13
+ pslldq $12, %xmm12
+ pxor %xmm12, %xmm8
+ movdqa %xmm8, %xmm14
+ movdqa %xmm8, %xmm10
+ movdqa %xmm8, %xmm9
+ psrld $0x01, %xmm14
+ psrld $2, %xmm10
+ psrld $7, %xmm9
+ pxor %xmm10, %xmm14
+ pxor %xmm9, %xmm14
+ pxor %xmm13, %xmm14
+ pxor %xmm8, %xmm14
+ pxor %xmm14, %xmm0
+ movdqa %xmm0, 16(%rsp)
+ # H ^ 3
+ pshufd $0x4e, %xmm5, %xmm9
+ pshufd $0x4e, %xmm0, %xmm10
+ movdqa %xmm0, %xmm11
+ movdqa %xmm0, %xmm8
+ pclmulqdq $0x11, %xmm5, %xmm11
+ pclmulqdq $0x00, %xmm5, %xmm8
+ pxor %xmm5, %xmm9
+ pxor %xmm0, %xmm10
+ pclmulqdq $0x00, %xmm10, %xmm9
+ pxor %xmm8, %xmm9
+ pxor %xmm11, %xmm9
+ movdqa %xmm9, %xmm10
+ movdqa %xmm11, %xmm1
+ pslldq $8, %xmm10
+ psrldq $8, %xmm9
+ pxor %xmm10, %xmm8
+ pxor %xmm9, %xmm1
+ movdqa %xmm8, %xmm12
+ movdqa %xmm8, %xmm13
+ movdqa %xmm8, %xmm14
+ pslld $31, %xmm12
+ pslld $30, %xmm13
+ pslld $25, %xmm14
+ pxor %xmm13, %xmm12
+ pxor %xmm14, %xmm12
+ movdqa %xmm12, %xmm13
+ psrldq $4, %xmm13
+ pslldq $12, %xmm12
+ pxor %xmm12, %xmm8
+ movdqa %xmm8, %xmm14
+ movdqa %xmm8, %xmm10
+ movdqa %xmm8, %xmm9
+ psrld $0x01, %xmm14
+ psrld $2, %xmm10
+ psrld $7, %xmm9
+ pxor %xmm10, %xmm14
+ pxor %xmm9, %xmm14
+ pxor %xmm13, %xmm14
+ pxor %xmm8, %xmm14
+ pxor %xmm14, %xmm1
+ movdqa %xmm1, 32(%rsp)
+ # H ^ 4
+ pshufd $0x4e, %xmm0, %xmm9
+ pshufd $0x4e, %xmm0, %xmm10
+ movdqa %xmm0, %xmm11
+ movdqa %xmm0, %xmm8
+ pclmulqdq $0x11, %xmm0, %xmm11
+ pclmulqdq $0x00, %xmm0, %xmm8
+ pxor %xmm0, %xmm9
+ pxor %xmm0, %xmm10
+ pclmulqdq $0x00, %xmm10, %xmm9
+ pxor %xmm8, %xmm9
+ pxor %xmm11, %xmm9
+ movdqa %xmm9, %xmm10
+ movdqa %xmm11, %xmm3
+ pslldq $8, %xmm10
+ psrldq $8, %xmm9
+ pxor %xmm10, %xmm8
+ pxor %xmm9, %xmm3
+ movdqa %xmm8, %xmm12
+ movdqa %xmm8, %xmm13
+ movdqa %xmm8, %xmm14
+ pslld $31, %xmm12
+ pslld $30, %xmm13
+ pslld $25, %xmm14
+ pxor %xmm13, %xmm12
+ pxor %xmm14, %xmm12
+ movdqa %xmm12, %xmm13
+ psrldq $4, %xmm13
+ pslldq $12, %xmm12
+ pxor %xmm12, %xmm8
+ movdqa %xmm8, %xmm14
+ movdqa %xmm8, %xmm10
+ movdqa %xmm8, %xmm9
+ psrld $0x01, %xmm14
+ psrld $2, %xmm10
+ psrld $7, %xmm9
+ pxor %xmm10, %xmm14
+ pxor %xmm9, %xmm14
+ pxor %xmm13, %xmm14
+ pxor %xmm8, %xmm14
+ pxor %xmm14, %xmm3
+ movdqa %xmm3, 48(%rsp)
+ # H ^ 5
+ pshufd $0x4e, %xmm0, %xmm9
+ pshufd $0x4e, %xmm1, %xmm10
+ movdqa %xmm1, %xmm11
+ movdqa %xmm1, %xmm8
+ pclmulqdq $0x11, %xmm0, %xmm11
+ pclmulqdq $0x00, %xmm0, %xmm8
+ pxor %xmm0, %xmm9
+ pxor %xmm1, %xmm10
+ pclmulqdq $0x00, %xmm10, %xmm9
+ pxor %xmm8, %xmm9
+ pxor %xmm11, %xmm9
+ movdqa %xmm9, %xmm10
+ movdqa %xmm11, %xmm7
+ pslldq $8, %xmm10
+ psrldq $8, %xmm9
+ pxor %xmm10, %xmm8
+ pxor %xmm9, %xmm7
+ movdqa %xmm8, %xmm12
+ movdqa %xmm8, %xmm13
+ movdqa %xmm8, %xmm14
+ pslld $31, %xmm12
+ pslld $30, %xmm13
+ pslld $25, %xmm14
+ pxor %xmm13, %xmm12
+ pxor %xmm14, %xmm12
+ movdqa %xmm12, %xmm13
+ psrldq $4, %xmm13
+ pslldq $12, %xmm12
+ pxor %xmm12, %xmm8
+ movdqa %xmm8, %xmm14
+ movdqa %xmm8, %xmm10
+ movdqa %xmm8, %xmm9
+ psrld $0x01, %xmm14
+ psrld $2, %xmm10
+ psrld $7, %xmm9
+ pxor %xmm10, %xmm14
+ pxor %xmm9, %xmm14
+ pxor %xmm13, %xmm14
+ pxor %xmm8, %xmm14
+ pxor %xmm14, %xmm7
+ movdqa %xmm7, 64(%rsp)
+ # H ^ 6
+ pshufd $0x4e, %xmm1, %xmm9
+ pshufd $0x4e, %xmm1, %xmm10
+ movdqa %xmm1, %xmm11
+ movdqa %xmm1, %xmm8
+ pclmulqdq $0x11, %xmm1, %xmm11
+ pclmulqdq $0x00, %xmm1, %xmm8
+ pxor %xmm1, %xmm9
+ pxor %xmm1, %xmm10
+ pclmulqdq $0x00, %xmm10, %xmm9
+ pxor %xmm8, %xmm9
+ pxor %xmm11, %xmm9
+ movdqa %xmm9, %xmm10
+ movdqa %xmm11, %xmm7
+ pslldq $8, %xmm10
+ psrldq $8, %xmm9
+ pxor %xmm10, %xmm8
+ pxor %xmm9, %xmm7
+ movdqa %xmm8, %xmm12
+ movdqa %xmm8, %xmm13
+ movdqa %xmm8, %xmm14
+ pslld $31, %xmm12
+ pslld $30, %xmm13
+ pslld $25, %xmm14
+ pxor %xmm13, %xmm12
+ pxor %xmm14, %xmm12
+ movdqa %xmm12, %xmm13
+ psrldq $4, %xmm13
+ pslldq $12, %xmm12
+ pxor %xmm12, %xmm8
+ movdqa %xmm8, %xmm14
+ movdqa %xmm8, %xmm10
+ movdqa %xmm8, %xmm9
+ psrld $0x01, %xmm14
+ psrld $2, %xmm10
+ psrld $7, %xmm9
+ pxor %xmm10, %xmm14
+ pxor %xmm9, %xmm14
+ pxor %xmm13, %xmm14
+ pxor %xmm8, %xmm14
+ pxor %xmm14, %xmm7
+ movdqa %xmm7, 80(%rsp)
+ # H ^ 7
+ pshufd $0x4e, %xmm1, %xmm9
+ pshufd $0x4e, %xmm3, %xmm10
+ movdqa %xmm3, %xmm11
+ movdqa %xmm3, %xmm8
+ pclmulqdq $0x11, %xmm1, %xmm11
+ pclmulqdq $0x00, %xmm1, %xmm8
+ pxor %xmm1, %xmm9
+ pxor %xmm3, %xmm10
+ pclmulqdq $0x00, %xmm10, %xmm9
+ pxor %xmm8, %xmm9
+ pxor %xmm11, %xmm9
+ movdqa %xmm9, %xmm10
+ movdqa %xmm11, %xmm7
+ pslldq $8, %xmm10
+ psrldq $8, %xmm9
+ pxor %xmm10, %xmm8
+ pxor %xmm9, %xmm7
+ movdqa %xmm8, %xmm12
+ movdqa %xmm8, %xmm13
+ movdqa %xmm8, %xmm14
+ pslld $31, %xmm12
+ pslld $30, %xmm13
+ pslld $25, %xmm14
+ pxor %xmm13, %xmm12
+ pxor %xmm14, %xmm12
+ movdqa %xmm12, %xmm13
+ psrldq $4, %xmm13
+ pslldq $12, %xmm12
+ pxor %xmm12, %xmm8
+ movdqa %xmm8, %xmm14
+ movdqa %xmm8, %xmm10
+ movdqa %xmm8, %xmm9
+ psrld $0x01, %xmm14
+ psrld $2, %xmm10
+ psrld $7, %xmm9
+ pxor %xmm10, %xmm14
+ pxor %xmm9, %xmm14
+ pxor %xmm13, %xmm14
+ pxor %xmm8, %xmm14
+ pxor %xmm14, %xmm7
+ movdqa %xmm7, 96(%rsp)
+ # H ^ 8
+ pshufd $0x4e, %xmm3, %xmm9
+ pshufd $0x4e, %xmm3, %xmm10
+ movdqa %xmm3, %xmm11
+ movdqa %xmm3, %xmm8
+ pclmulqdq $0x11, %xmm3, %xmm11
+ pclmulqdq $0x00, %xmm3, %xmm8
+ pxor %xmm3, %xmm9
+ pxor %xmm3, %xmm10
+ pclmulqdq $0x00, %xmm10, %xmm9
+ pxor %xmm8, %xmm9
+ pxor %xmm11, %xmm9
+ movdqa %xmm9, %xmm10
+ movdqa %xmm11, %xmm7
+ pslldq $8, %xmm10
+ psrldq $8, %xmm9
+ pxor %xmm10, %xmm8
+ pxor %xmm9, %xmm7
+ movdqa %xmm8, %xmm12
+ movdqa %xmm8, %xmm13
+ movdqa %xmm8, %xmm14
+ pslld $31, %xmm12
+ pslld $30, %xmm13
+ pslld $25, %xmm14
+ pxor %xmm13, %xmm12
+ pxor %xmm14, %xmm12
+ movdqa %xmm12, %xmm13
+ psrldq $4, %xmm13
+ pslldq $12, %xmm12
+ pxor %xmm12, %xmm8
+ movdqa %xmm8, %xmm14
+ movdqa %xmm8, %xmm10
+ movdqa %xmm8, %xmm9
+ psrld $0x01, %xmm14
+ psrld $2, %xmm10
+ psrld $7, %xmm9
+ pxor %xmm10, %xmm14
+ pxor %xmm9, %xmm14
+ pxor %xmm13, %xmm14
+ pxor %xmm8, %xmm14
+ pxor %xmm14, %xmm7
+ movdqa %xmm7, 112(%rsp)
+ # First 128 bytes of input
+ movdqa 128(%rsp), %xmm8
+ movdqa L_aes_gcm_bswap_epi64(%rip), %xmm1
+ movdqa %xmm8, %xmm0
+ pshufb %xmm1, %xmm8
+ movdqa %xmm0, %xmm9
+ paddd L_aes_gcm_one(%rip), %xmm9
+ pshufb %xmm1, %xmm9
+ movdqa %xmm0, %xmm10
+ paddd L_aes_gcm_two(%rip), %xmm10
+ pshufb %xmm1, %xmm10
+ movdqa %xmm0, %xmm11
+ paddd L_aes_gcm_three(%rip), %xmm11
+ pshufb %xmm1, %xmm11
+ movdqa %xmm0, %xmm12
+ paddd L_aes_gcm_four(%rip), %xmm12
+ pshufb %xmm1, %xmm12
+ movdqa %xmm0, %xmm13
+ paddd L_aes_gcm_five(%rip), %xmm13
+ pshufb %xmm1, %xmm13
+ movdqa %xmm0, %xmm14
+ paddd L_aes_gcm_six(%rip), %xmm14
+ pshufb %xmm1, %xmm14
+ movdqa %xmm0, %xmm15
+ paddd L_aes_gcm_seven(%rip), %xmm15
+ pshufb %xmm1, %xmm15
+ paddd L_aes_gcm_eight(%rip), %xmm0
+ movdqa (%r15), %xmm7
+ movdqa %xmm0, 128(%rsp)
+ pxor %xmm7, %xmm8
+ pxor %xmm7, %xmm9
+ pxor %xmm7, %xmm10
+ pxor %xmm7, %xmm11
+ pxor %xmm7, %xmm12
+ pxor %xmm7, %xmm13
+ pxor %xmm7, %xmm14
+ pxor %xmm7, %xmm15
+ movdqa 16(%r15), %xmm7
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ movdqa 32(%r15), %xmm7
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ movdqa 48(%r15), %xmm7
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ movdqa 64(%r15), %xmm7
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ movdqa 80(%r15), %xmm7
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ movdqa 96(%r15), %xmm7
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ movdqa 112(%r15), %xmm7
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ movdqa 128(%r15), %xmm7
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ movdqa 144(%r15), %xmm7
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ cmpl $11, %r10d
+ movdqa 160(%r15), %xmm7
+ jl L_AES_GCM_encrypt_enc_done
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ movdqa 176(%r15), %xmm7
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ cmpl $13, %r10d
+ movdqa 192(%r15), %xmm7
+ jl L_AES_GCM_encrypt_enc_done
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ movdqa 208(%r15), %xmm7
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ movdqa 224(%r15), %xmm7
+L_AES_GCM_encrypt_enc_done:
+ aesenclast %xmm7, %xmm8
+ aesenclast %xmm7, %xmm9
+ movdqu (%rdi), %xmm0
+ movdqu 16(%rdi), %xmm1
+ pxor %xmm0, %xmm8
+ pxor %xmm1, %xmm9
+ movdqu %xmm8, (%rsi)
+ movdqu %xmm9, 16(%rsi)
+ aesenclast %xmm7, %xmm10
+ aesenclast %xmm7, %xmm11
+ movdqu 32(%rdi), %xmm0
+ movdqu 48(%rdi), %xmm1
+ pxor %xmm0, %xmm10
+ pxor %xmm1, %xmm11
+ movdqu %xmm10, 32(%rsi)
+ movdqu %xmm11, 48(%rsi)
+ aesenclast %xmm7, %xmm12
+ aesenclast %xmm7, %xmm13
+ movdqu 64(%rdi), %xmm0
+ movdqu 80(%rdi), %xmm1
+ pxor %xmm0, %xmm12
+ pxor %xmm1, %xmm13
+ movdqu %xmm12, 64(%rsi)
+ movdqu %xmm13, 80(%rsi)
+ aesenclast %xmm7, %xmm14
+ aesenclast %xmm7, %xmm15
+ movdqu 96(%rdi), %xmm0
+ movdqu 112(%rdi), %xmm1
+ pxor %xmm0, %xmm14
+ pxor %xmm1, %xmm15
+ movdqu %xmm14, 96(%rsi)
+ movdqu %xmm15, 112(%rsi)
+ cmpl $0x80, %r13d
+ movl $0x80, %ebx
+ jle L_AES_GCM_encrypt_end_128
+ # More 128 bytes of input
+L_AES_GCM_encrypt_ghash_128:
+ leaq (%rdi,%rbx,1), %rcx
+ leaq (%rsi,%rbx,1), %rdx
+ movdqa 128(%rsp), %xmm8
+ movdqa L_aes_gcm_bswap_epi64(%rip), %xmm1
+ movdqa %xmm8, %xmm0
+ pshufb %xmm1, %xmm8
+ movdqa %xmm0, %xmm9
+ paddd L_aes_gcm_one(%rip), %xmm9
+ pshufb %xmm1, %xmm9
+ movdqa %xmm0, %xmm10
+ paddd L_aes_gcm_two(%rip), %xmm10
+ pshufb %xmm1, %xmm10
+ movdqa %xmm0, %xmm11
+ paddd L_aes_gcm_three(%rip), %xmm11
+ pshufb %xmm1, %xmm11
+ movdqa %xmm0, %xmm12
+ paddd L_aes_gcm_four(%rip), %xmm12
+ pshufb %xmm1, %xmm12
+ movdqa %xmm0, %xmm13
+ paddd L_aes_gcm_five(%rip), %xmm13
+ pshufb %xmm1, %xmm13
+ movdqa %xmm0, %xmm14
+ paddd L_aes_gcm_six(%rip), %xmm14
+ pshufb %xmm1, %xmm14
+ movdqa %xmm0, %xmm15
+ paddd L_aes_gcm_seven(%rip), %xmm15
+ pshufb %xmm1, %xmm15
+ paddd L_aes_gcm_eight(%rip), %xmm0
+ movdqa (%r15), %xmm7
+ movdqa %xmm0, 128(%rsp)
+ pxor %xmm7, %xmm8
+ pxor %xmm7, %xmm9
+ pxor %xmm7, %xmm10
+ pxor %xmm7, %xmm11
+ pxor %xmm7, %xmm12
+ pxor %xmm7, %xmm13
+ pxor %xmm7, %xmm14
+ pxor %xmm7, %xmm15
+ movdqa 112(%rsp), %xmm7
+ movdqu -128(%rdx), %xmm0
+ aesenc 16(%r15), %xmm8
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm0
+ pxor %xmm2, %xmm0
+ pshufd $0x4e, %xmm7, %xmm1
+ pshufd $0x4e, %xmm0, %xmm5
+ pxor %xmm7, %xmm1
+ pxor %xmm0, %xmm5
+ movdqa %xmm0, %xmm3
+ pclmulqdq $0x11, %xmm7, %xmm3
+ aesenc 16(%r15), %xmm9
+ aesenc 16(%r15), %xmm10
+ movdqa %xmm0, %xmm2
+ pclmulqdq $0x00, %xmm7, %xmm2
+ aesenc 16(%r15), %xmm11
+ aesenc 16(%r15), %xmm12
+ pclmulqdq $0x00, %xmm5, %xmm1
+ aesenc 16(%r15), %xmm13
+ aesenc 16(%r15), %xmm14
+ aesenc 16(%r15), %xmm15
+ pxor %xmm2, %xmm1
+ pxor %xmm3, %xmm1
+ movdqa 96(%rsp), %xmm7
+ movdqu -112(%rdx), %xmm0
+ pshufd $0x4e, %xmm7, %xmm4
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm0
+ aesenc 32(%r15), %xmm8
+ pxor %xmm7, %xmm4
+ pshufd $0x4e, %xmm0, %xmm5
+ pxor %xmm0, %xmm5
+ movdqa %xmm0, %xmm6
+ pclmulqdq $0x11, %xmm7, %xmm6
+ aesenc 32(%r15), %xmm9
+ aesenc 32(%r15), %xmm10
+ pclmulqdq $0x00, %xmm0, %xmm7
+ aesenc 32(%r15), %xmm11
+ aesenc 32(%r15), %xmm12
+ pclmulqdq $0x00, %xmm5, %xmm4
+ aesenc 32(%r15), %xmm13
+ aesenc 32(%r15), %xmm14
+ aesenc 32(%r15), %xmm15
+ pxor %xmm7, %xmm1
+ pxor %xmm7, %xmm2
+ pxor %xmm6, %xmm1
+ pxor %xmm6, %xmm3
+ pxor %xmm4, %xmm1
+ movdqa 80(%rsp), %xmm7
+ movdqu -96(%rdx), %xmm0
+ pshufd $0x4e, %xmm7, %xmm4
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm0
+ aesenc 48(%r15), %xmm8
+ pxor %xmm7, %xmm4
+ pshufd $0x4e, %xmm0, %xmm5
+ pxor %xmm0, %xmm5
+ movdqa %xmm0, %xmm6
+ pclmulqdq $0x11, %xmm7, %xmm6
+ aesenc 48(%r15), %xmm9
+ aesenc 48(%r15), %xmm10
+ pclmulqdq $0x00, %xmm0, %xmm7
+ aesenc 48(%r15), %xmm11
+ aesenc 48(%r15), %xmm12
+ pclmulqdq $0x00, %xmm5, %xmm4
+ aesenc 48(%r15), %xmm13
+ aesenc 48(%r15), %xmm14
+ aesenc 48(%r15), %xmm15
+ pxor %xmm7, %xmm1
+ pxor %xmm7, %xmm2
+ pxor %xmm6, %xmm1
+ pxor %xmm6, %xmm3
+ pxor %xmm4, %xmm1
+ movdqa 64(%rsp), %xmm7
+ movdqu -80(%rdx), %xmm0
+ pshufd $0x4e, %xmm7, %xmm4
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm0
+ aesenc 64(%r15), %xmm8
+ pxor %xmm7, %xmm4
+ pshufd $0x4e, %xmm0, %xmm5
+ pxor %xmm0, %xmm5
+ movdqa %xmm0, %xmm6
+ pclmulqdq $0x11, %xmm7, %xmm6
+ aesenc 64(%r15), %xmm9
+ aesenc 64(%r15), %xmm10
+ pclmulqdq $0x00, %xmm0, %xmm7
+ aesenc 64(%r15), %xmm11
+ aesenc 64(%r15), %xmm12
+ pclmulqdq $0x00, %xmm5, %xmm4
+ aesenc 64(%r15), %xmm13
+ aesenc 64(%r15), %xmm14
+ aesenc 64(%r15), %xmm15
+ pxor %xmm7, %xmm1
+ pxor %xmm7, %xmm2
+ pxor %xmm6, %xmm1
+ pxor %xmm6, %xmm3
+ pxor %xmm4, %xmm1
+ movdqa 48(%rsp), %xmm7
+ movdqu -64(%rdx), %xmm0
+ pshufd $0x4e, %xmm7, %xmm4
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm0
+ aesenc 80(%r15), %xmm8
+ pxor %xmm7, %xmm4
+ pshufd $0x4e, %xmm0, %xmm5
+ pxor %xmm0, %xmm5
+ movdqa %xmm0, %xmm6
+ pclmulqdq $0x11, %xmm7, %xmm6
+ aesenc 80(%r15), %xmm9
+ aesenc 80(%r15), %xmm10
+ pclmulqdq $0x00, %xmm0, %xmm7
+ aesenc 80(%r15), %xmm11
+ aesenc 80(%r15), %xmm12
+ pclmulqdq $0x00, %xmm5, %xmm4
+ aesenc 80(%r15), %xmm13
+ aesenc 80(%r15), %xmm14
+ aesenc 80(%r15), %xmm15
+ pxor %xmm7, %xmm1
+ pxor %xmm7, %xmm2
+ pxor %xmm6, %xmm1
+ pxor %xmm6, %xmm3
+ pxor %xmm4, %xmm1
+ movdqa 32(%rsp), %xmm7
+ movdqu -48(%rdx), %xmm0
+ pshufd $0x4e, %xmm7, %xmm4
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm0
+ aesenc 96(%r15), %xmm8
+ pxor %xmm7, %xmm4
+ pshufd $0x4e, %xmm0, %xmm5
+ pxor %xmm0, %xmm5
+ movdqa %xmm0, %xmm6
+ pclmulqdq $0x11, %xmm7, %xmm6
+ aesenc 96(%r15), %xmm9
+ aesenc 96(%r15), %xmm10
+ pclmulqdq $0x00, %xmm0, %xmm7
+ aesenc 96(%r15), %xmm11
+ aesenc 96(%r15), %xmm12
+ pclmulqdq $0x00, %xmm5, %xmm4
+ aesenc 96(%r15), %xmm13
+ aesenc 96(%r15), %xmm14
+ aesenc 96(%r15), %xmm15
+ pxor %xmm7, %xmm1
+ pxor %xmm7, %xmm2
+ pxor %xmm6, %xmm1
+ pxor %xmm6, %xmm3
+ pxor %xmm4, %xmm1
+ movdqa 16(%rsp), %xmm7
+ movdqu -32(%rdx), %xmm0
+ pshufd $0x4e, %xmm7, %xmm4
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm0
+ aesenc 112(%r15), %xmm8
+ pxor %xmm7, %xmm4
+ pshufd $0x4e, %xmm0, %xmm5
+ pxor %xmm0, %xmm5
+ movdqa %xmm0, %xmm6
+ pclmulqdq $0x11, %xmm7, %xmm6
+ aesenc 112(%r15), %xmm9
+ aesenc 112(%r15), %xmm10
+ pclmulqdq $0x00, %xmm0, %xmm7
+ aesenc 112(%r15), %xmm11
+ aesenc 112(%r15), %xmm12
+ pclmulqdq $0x00, %xmm5, %xmm4
+ aesenc 112(%r15), %xmm13
+ aesenc 112(%r15), %xmm14
+ aesenc 112(%r15), %xmm15
+ pxor %xmm7, %xmm1
+ pxor %xmm7, %xmm2
+ pxor %xmm6, %xmm1
+ pxor %xmm6, %xmm3
+ pxor %xmm4, %xmm1
+ movdqa (%rsp), %xmm7
+ movdqu -16(%rdx), %xmm0
+ pshufd $0x4e, %xmm7, %xmm4
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm0
+ aesenc 128(%r15), %xmm8
+ pxor %xmm7, %xmm4
+ pshufd $0x4e, %xmm0, %xmm5
+ pxor %xmm0, %xmm5
+ movdqa %xmm0, %xmm6
+ pclmulqdq $0x11, %xmm7, %xmm6
+ aesenc 128(%r15), %xmm9
+ aesenc 128(%r15), %xmm10
+ pclmulqdq $0x00, %xmm0, %xmm7
+ aesenc 128(%r15), %xmm11
+ aesenc 128(%r15), %xmm12
+ pclmulqdq $0x00, %xmm5, %xmm4
+ aesenc 128(%r15), %xmm13
+ aesenc 128(%r15), %xmm14
+ aesenc 128(%r15), %xmm15
+ pxor %xmm7, %xmm1
+ pxor %xmm7, %xmm2
+ pxor %xmm6, %xmm1
+ pxor %xmm6, %xmm3
+ pxor %xmm4, %xmm1
+ movdqa %xmm1, %xmm5
+ psrldq $8, %xmm1
+ pslldq $8, %xmm5
+ aesenc 144(%r15), %xmm8
+ pxor %xmm5, %xmm2
+ pxor %xmm1, %xmm3
+ movdqa %xmm2, %xmm7
+ movdqa %xmm2, %xmm4
+ movdqa %xmm2, %xmm5
+ aesenc 144(%r15), %xmm9
+ pslld $31, %xmm7
+ pslld $30, %xmm4
+ pslld $25, %xmm5
+ aesenc 144(%r15), %xmm10
+ pxor %xmm4, %xmm7
+ pxor %xmm5, %xmm7
+ aesenc 144(%r15), %xmm11
+ movdqa %xmm7, %xmm4
+ pslldq $12, %xmm7
+ psrldq $4, %xmm4
+ aesenc 144(%r15), %xmm12
+ pxor %xmm7, %xmm2
+ movdqa %xmm2, %xmm5
+ movdqa %xmm2, %xmm1
+ movdqa %xmm2, %xmm0
+ aesenc 144(%r15), %xmm13
+ psrld $0x01, %xmm5
+ psrld $2, %xmm1
+ psrld $7, %xmm0
+ aesenc 144(%r15), %xmm14
+ pxor %xmm1, %xmm5
+ pxor %xmm0, %xmm5
+ aesenc 144(%r15), %xmm15
+ pxor %xmm4, %xmm5
+ pxor %xmm5, %xmm2
+ pxor %xmm3, %xmm2
+ cmpl $11, %r10d
+ movdqa 160(%r15), %xmm7
+ jl L_AES_GCM_encrypt_aesenc_128_ghash_avx_done
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ movdqa 176(%r15), %xmm7
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ cmpl $13, %r10d
+ movdqa 192(%r15), %xmm7
+ jl L_AES_GCM_encrypt_aesenc_128_ghash_avx_done
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ movdqa 208(%r15), %xmm7
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ movdqa 224(%r15), %xmm7
+L_AES_GCM_encrypt_aesenc_128_ghash_avx_done:
+ aesenclast %xmm7, %xmm8
+ aesenclast %xmm7, %xmm9
+ movdqu (%rcx), %xmm0
+ movdqu 16(%rcx), %xmm1
+ pxor %xmm0, %xmm8
+ pxor %xmm1, %xmm9
+ movdqu %xmm8, (%rdx)
+ movdqu %xmm9, 16(%rdx)
+ aesenclast %xmm7, %xmm10
+ aesenclast %xmm7, %xmm11
+ movdqu 32(%rcx), %xmm0
+ movdqu 48(%rcx), %xmm1
+ pxor %xmm0, %xmm10
+ pxor %xmm1, %xmm11
+ movdqu %xmm10, 32(%rdx)
+ movdqu %xmm11, 48(%rdx)
+ aesenclast %xmm7, %xmm12
+ aesenclast %xmm7, %xmm13
+ movdqu 64(%rcx), %xmm0
+ movdqu 80(%rcx), %xmm1
+ pxor %xmm0, %xmm12
+ pxor %xmm1, %xmm13
+ movdqu %xmm12, 64(%rdx)
+ movdqu %xmm13, 80(%rdx)
+ aesenclast %xmm7, %xmm14
+ aesenclast %xmm7, %xmm15
+ movdqu 96(%rcx), %xmm0
+ movdqu 112(%rcx), %xmm1
+ pxor %xmm0, %xmm14
+ pxor %xmm1, %xmm15
+ movdqu %xmm14, 96(%rdx)
+ movdqu %xmm15, 112(%rdx)
+ addl $0x80, %ebx
+ cmpl %r13d, %ebx
+ jl L_AES_GCM_encrypt_ghash_128
+L_AES_GCM_encrypt_end_128:
+ movdqa L_aes_gcm_bswap_mask(%rip), %xmm4
+ pshufb %xmm4, %xmm8
+ pshufb %xmm4, %xmm9
+ pshufb %xmm4, %xmm10
+ pshufb %xmm4, %xmm11
+ pxor %xmm2, %xmm8
+ pshufb %xmm4, %xmm12
+ pshufb %xmm4, %xmm13
+ pshufb %xmm4, %xmm14
+ pshufb %xmm4, %xmm15
+ movdqa 112(%rsp), %xmm7
+ pshufd $0x4e, %xmm8, %xmm1
+ pshufd $0x4e, %xmm7, %xmm2
+ movdqa %xmm7, %xmm3
+ movdqa %xmm7, %xmm0
+ pclmulqdq $0x11, %xmm8, %xmm3
+ pclmulqdq $0x00, %xmm8, %xmm0
+ pxor %xmm8, %xmm1
+ pxor %xmm7, %xmm2
+ pclmulqdq $0x00, %xmm2, %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm3, %xmm1
+ movdqa %xmm1, %xmm2
+ movdqa %xmm0, %xmm4
+ movdqa %xmm3, %xmm6
+ pslldq $8, %xmm2
+ psrldq $8, %xmm1
+ pxor %xmm2, %xmm4
+ pxor %xmm1, %xmm6
+ movdqa 96(%rsp), %xmm7
+ pshufd $0x4e, %xmm9, %xmm1
+ pshufd $0x4e, %xmm7, %xmm2
+ movdqa %xmm7, %xmm3
+ movdqa %xmm7, %xmm0
+ pclmulqdq $0x11, %xmm9, %xmm3
+ pclmulqdq $0x00, %xmm9, %xmm0
+ pxor %xmm9, %xmm1
+ pxor %xmm7, %xmm2
+ pclmulqdq $0x00, %xmm2, %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm3, %xmm1
+ movdqa %xmm1, %xmm2
+ pxor %xmm0, %xmm4
+ pxor %xmm3, %xmm6
+ pslldq $8, %xmm2
+ psrldq $8, %xmm1
+ pxor %xmm2, %xmm4
+ pxor %xmm1, %xmm6
+ movdqa 80(%rsp), %xmm7
+ pshufd $0x4e, %xmm10, %xmm1
+ pshufd $0x4e, %xmm7, %xmm2
+ movdqa %xmm7, %xmm3
+ movdqa %xmm7, %xmm0
+ pclmulqdq $0x11, %xmm10, %xmm3
+ pclmulqdq $0x00, %xmm10, %xmm0
+ pxor %xmm10, %xmm1
+ pxor %xmm7, %xmm2
+ pclmulqdq $0x00, %xmm2, %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm3, %xmm1
+ movdqa %xmm1, %xmm2
+ pxor %xmm0, %xmm4
+ pxor %xmm3, %xmm6
+ pslldq $8, %xmm2
+ psrldq $8, %xmm1
+ pxor %xmm2, %xmm4
+ pxor %xmm1, %xmm6
+ movdqa 64(%rsp), %xmm7
+ pshufd $0x4e, %xmm11, %xmm1
+ pshufd $0x4e, %xmm7, %xmm2
+ movdqa %xmm7, %xmm3
+ movdqa %xmm7, %xmm0
+ pclmulqdq $0x11, %xmm11, %xmm3
+ pclmulqdq $0x00, %xmm11, %xmm0
+ pxor %xmm11, %xmm1
+ pxor %xmm7, %xmm2
+ pclmulqdq $0x00, %xmm2, %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm3, %xmm1
+ movdqa %xmm1, %xmm2
+ pxor %xmm0, %xmm4
+ pxor %xmm3, %xmm6
+ pslldq $8, %xmm2
+ psrldq $8, %xmm1
+ pxor %xmm2, %xmm4
+ pxor %xmm1, %xmm6
+ movdqa 48(%rsp), %xmm7
+ pshufd $0x4e, %xmm12, %xmm1
+ pshufd $0x4e, %xmm7, %xmm2
+ movdqa %xmm7, %xmm3
+ movdqa %xmm7, %xmm0
+ pclmulqdq $0x11, %xmm12, %xmm3
+ pclmulqdq $0x00, %xmm12, %xmm0
+ pxor %xmm12, %xmm1
+ pxor %xmm7, %xmm2
+ pclmulqdq $0x00, %xmm2, %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm3, %xmm1
+ movdqa %xmm1, %xmm2
+ pxor %xmm0, %xmm4
+ pxor %xmm3, %xmm6
+ pslldq $8, %xmm2
+ psrldq $8, %xmm1
+ pxor %xmm2, %xmm4
+ pxor %xmm1, %xmm6
+ movdqa 32(%rsp), %xmm7
+ pshufd $0x4e, %xmm13, %xmm1
+ pshufd $0x4e, %xmm7, %xmm2
+ movdqa %xmm7, %xmm3
+ movdqa %xmm7, %xmm0
+ pclmulqdq $0x11, %xmm13, %xmm3
+ pclmulqdq $0x00, %xmm13, %xmm0
+ pxor %xmm13, %xmm1
+ pxor %xmm7, %xmm2
+ pclmulqdq $0x00, %xmm2, %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm3, %xmm1
+ movdqa %xmm1, %xmm2
+ pxor %xmm0, %xmm4
+ pxor %xmm3, %xmm6
+ pslldq $8, %xmm2
+ psrldq $8, %xmm1
+ pxor %xmm2, %xmm4
+ pxor %xmm1, %xmm6
+ movdqa 16(%rsp), %xmm7
+ pshufd $0x4e, %xmm14, %xmm1
+ pshufd $0x4e, %xmm7, %xmm2
+ movdqa %xmm7, %xmm3
+ movdqa %xmm7, %xmm0
+ pclmulqdq $0x11, %xmm14, %xmm3
+ pclmulqdq $0x00, %xmm14, %xmm0
+ pxor %xmm14, %xmm1
+ pxor %xmm7, %xmm2
+ pclmulqdq $0x00, %xmm2, %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm3, %xmm1
+ movdqa %xmm1, %xmm2
+ pxor %xmm0, %xmm4
+ pxor %xmm3, %xmm6
+ pslldq $8, %xmm2
+ psrldq $8, %xmm1
+ pxor %xmm2, %xmm4
+ pxor %xmm1, %xmm6
+ movdqa (%rsp), %xmm7
+ pshufd $0x4e, %xmm15, %xmm1
+ pshufd $0x4e, %xmm7, %xmm2
+ movdqa %xmm7, %xmm3
+ movdqa %xmm7, %xmm0
+ pclmulqdq $0x11, %xmm15, %xmm3
+ pclmulqdq $0x00, %xmm15, %xmm0
+ pxor %xmm15, %xmm1
+ pxor %xmm7, %xmm2
+ pclmulqdq $0x00, %xmm2, %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm3, %xmm1
+ movdqa %xmm1, %xmm2
+ pxor %xmm0, %xmm4
+ pxor %xmm3, %xmm6
+ pslldq $8, %xmm2
+ psrldq $8, %xmm1
+ pxor %xmm2, %xmm4
+ pxor %xmm1, %xmm6
+ movdqa %xmm4, %xmm0
+ movdqa %xmm4, %xmm1
+ movdqa %xmm4, %xmm2
+ pslld $31, %xmm0
+ pslld $30, %xmm1
+ pslld $25, %xmm2
+ pxor %xmm1, %xmm0
+ pxor %xmm2, %xmm0
+ movdqa %xmm0, %xmm1
+ psrldq $4, %xmm1
+ pslldq $12, %xmm0
+ pxor %xmm0, %xmm4
+ movdqa %xmm4, %xmm2
+ movdqa %xmm4, %xmm3
+ movdqa %xmm4, %xmm0
+ psrld $0x01, %xmm2
+ psrld $2, %xmm3
+ psrld $7, %xmm0
+ pxor %xmm3, %xmm2
+ pxor %xmm0, %xmm2
+ pxor %xmm1, %xmm2
+ pxor %xmm4, %xmm2
+ pxor %xmm2, %xmm6
+ movdqa (%rsp), %xmm5
+L_AES_GCM_encrypt_done_128:
+ movl %r9d, %edx
+ cmpl %edx, %ebx
+ jge L_AES_GCM_encrypt_done_enc
+ movl %r9d, %r13d
+ andl $0xfffffff0, %r13d
+ cmpl %r13d, %ebx
+ jge L_AES_GCM_encrypt_last_block_done
+ leaq (%rdi,%rbx,1), %rcx
+ leaq (%rsi,%rbx,1), %rdx
+ movdqa 128(%rsp), %xmm8
+ movdqa %xmm8, %xmm9
+ pshufb L_aes_gcm_bswap_epi64(%rip), %xmm8
+ paddd L_aes_gcm_one(%rip), %xmm9
+ pxor (%r15), %xmm8
+ movdqa %xmm9, 128(%rsp)
+ aesenc 16(%r15), %xmm8
+ aesenc 32(%r15), %xmm8
+ aesenc 48(%r15), %xmm8
+ aesenc 64(%r15), %xmm8
+ aesenc 80(%r15), %xmm8
+ aesenc 96(%r15), %xmm8
+ aesenc 112(%r15), %xmm8
+ aesenc 128(%r15), %xmm8
+ aesenc 144(%r15), %xmm8
+ cmpl $11, %r10d
+ movdqa 160(%r15), %xmm9
+ jl L_AES_GCM_encrypt_aesenc_block_aesenc_avx_last
+ aesenc %xmm9, %xmm8
+ aesenc 176(%r15), %xmm8
+ cmpl $13, %r10d
+ movdqa 192(%r15), %xmm9
+ jl L_AES_GCM_encrypt_aesenc_block_aesenc_avx_last
+ aesenc %xmm9, %xmm8
+ aesenc 208(%r15), %xmm8
+ movdqa 224(%r15), %xmm9
+L_AES_GCM_encrypt_aesenc_block_aesenc_avx_last:
+ aesenclast %xmm9, %xmm8
+ movdqu (%rcx), %xmm9
+ pxor %xmm9, %xmm8
+ movdqu %xmm8, (%rdx)
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm8
+ pxor %xmm8, %xmm6
+ addl $16, %ebx
+ cmpl %r13d, %ebx
+ jge L_AES_GCM_encrypt_last_block_ghash
+L_AES_GCM_encrypt_last_block_start:
+ leaq (%rdi,%rbx,1), %rcx
+ leaq (%rsi,%rbx,1), %rdx
+ movdqa 128(%rsp), %xmm8
+ movdqa %xmm8, %xmm9
+ pshufb L_aes_gcm_bswap_epi64(%rip), %xmm8
+ paddd L_aes_gcm_one(%rip), %xmm9
+ pxor (%r15), %xmm8
+ movdqa %xmm9, 128(%rsp)
+ movdqa %xmm6, %xmm10
+ pclmulqdq $16, %xmm5, %xmm10
+ aesenc 16(%r15), %xmm8
+ aesenc 32(%r15), %xmm8
+ movdqa %xmm6, %xmm11
+ pclmulqdq $0x01, %xmm5, %xmm11
+ aesenc 48(%r15), %xmm8
+ aesenc 64(%r15), %xmm8
+ movdqa %xmm6, %xmm12
+ pclmulqdq $0x00, %xmm5, %xmm12
+ aesenc 80(%r15), %xmm8
+ movdqa %xmm6, %xmm1
+ pclmulqdq $0x11, %xmm5, %xmm1
+ aesenc 96(%r15), %xmm8
+ pxor %xmm11, %xmm10
+ movdqa %xmm10, %xmm2
+ psrldq $8, %xmm10
+ pslldq $8, %xmm2
+ aesenc 112(%r15), %xmm8
+ movdqa %xmm1, %xmm3
+ pxor %xmm12, %xmm2
+ pxor %xmm10, %xmm3
+ movdqa L_aes_gcm_mod2_128(%rip), %xmm0
+ movdqa %xmm2, %xmm11
+ pclmulqdq $16, %xmm0, %xmm11
+ aesenc 128(%r15), %xmm8
+ pshufd $0x4e, %xmm2, %xmm10
+ pxor %xmm11, %xmm10
+ movdqa %xmm10, %xmm11
+ pclmulqdq $16, %xmm0, %xmm11
+ aesenc 144(%r15), %xmm8
+ pshufd $0x4e, %xmm10, %xmm6
+ pxor %xmm11, %xmm6
+ pxor %xmm3, %xmm6
+ cmpl $11, %r10d
+ movdqa 160(%r15), %xmm9
+ jl L_AES_GCM_encrypt_aesenc_gfmul_last
+ aesenc %xmm9, %xmm8
+ aesenc 176(%r15), %xmm8
+ cmpl $13, %r10d
+ movdqa 192(%r15), %xmm9
+ jl L_AES_GCM_encrypt_aesenc_gfmul_last
+ aesenc %xmm9, %xmm8
+ aesenc 208(%r15), %xmm8
+ movdqa 224(%r15), %xmm9
+L_AES_GCM_encrypt_aesenc_gfmul_last:
+ aesenclast %xmm9, %xmm8
+ movdqu (%rcx), %xmm9
+ pxor %xmm9, %xmm8
+ movdqu %xmm8, (%rdx)
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm8
+ pxor %xmm8, %xmm6
+ addl $16, %ebx
+ cmpl %r13d, %ebx
+ jl L_AES_GCM_encrypt_last_block_start
+L_AES_GCM_encrypt_last_block_ghash:
+ pshufd $0x4e, %xmm5, %xmm9
+ pshufd $0x4e, %xmm6, %xmm10
+ movdqa %xmm6, %xmm11
+ movdqa %xmm6, %xmm8
+ pclmulqdq $0x11, %xmm5, %xmm11
+ pclmulqdq $0x00, %xmm5, %xmm8
+ pxor %xmm5, %xmm9
+ pxor %xmm6, %xmm10
+ pclmulqdq $0x00, %xmm10, %xmm9
+ pxor %xmm8, %xmm9
+ pxor %xmm11, %xmm9
+ movdqa %xmm9, %xmm10
+ movdqa %xmm11, %xmm6
+ pslldq $8, %xmm10
+ psrldq $8, %xmm9
+ pxor %xmm10, %xmm8
+ pxor %xmm9, %xmm6
+ movdqa %xmm8, %xmm12
+ movdqa %xmm8, %xmm13
+ movdqa %xmm8, %xmm14
+ pslld $31, %xmm12
+ pslld $30, %xmm13
+ pslld $25, %xmm14
+ pxor %xmm13, %xmm12
+ pxor %xmm14, %xmm12
+ movdqa %xmm12, %xmm13
+ psrldq $4, %xmm13
+ pslldq $12, %xmm12
+ pxor %xmm12, %xmm8
+ movdqa %xmm8, %xmm14
+ movdqa %xmm8, %xmm10
+ movdqa %xmm8, %xmm9
+ psrld $0x01, %xmm14
+ psrld $2, %xmm10
+ psrld $7, %xmm9
+ pxor %xmm10, %xmm14
+ pxor %xmm9, %xmm14
+ pxor %xmm13, %xmm14
+ pxor %xmm8, %xmm14
+ pxor %xmm14, %xmm6
+L_AES_GCM_encrypt_last_block_done:
+ movl %r9d, %ecx
+ movl %ecx, %edx
+ andl $15, %ecx
+ jz L_AES_GCM_encrypt_aesenc_last15_enc_avx_done
+ movdqa 128(%rsp), %xmm4
+ pshufb L_aes_gcm_bswap_epi64(%rip), %xmm4
+ pxor (%r15), %xmm4
+ aesenc 16(%r15), %xmm4
+ aesenc 32(%r15), %xmm4
+ aesenc 48(%r15), %xmm4
+ aesenc 64(%r15), %xmm4
+ aesenc 80(%r15), %xmm4
+ aesenc 96(%r15), %xmm4
+ aesenc 112(%r15), %xmm4
+ aesenc 128(%r15), %xmm4
+ aesenc 144(%r15), %xmm4
+ cmpl $11, %r10d
+ movdqa 160(%r15), %xmm9
+ jl L_AES_GCM_encrypt_aesenc_last15_enc_avx_aesenc_avx_last
+ aesenc %xmm9, %xmm4
+ aesenc 176(%r15), %xmm4
+ cmpl $13, %r10d
+ movdqa 192(%r15), %xmm9
+ jl L_AES_GCM_encrypt_aesenc_last15_enc_avx_aesenc_avx_last
+ aesenc %xmm9, %xmm4
+ aesenc 208(%r15), %xmm4
+ movdqa 224(%r15), %xmm9
+L_AES_GCM_encrypt_aesenc_last15_enc_avx_aesenc_avx_last:
+ aesenclast %xmm9, %xmm4
+ subq $16, %rsp
+ xorl %ecx, %ecx
+ movdqa %xmm4, (%rsp)
+L_AES_GCM_encrypt_aesenc_last15_enc_avx_loop:
+ movzbl (%rdi,%rbx,1), %r13d
+ xorb (%rsp,%rcx,1), %r13b
+ movb %r13b, (%rsi,%rbx,1)
+ movb %r13b, (%rsp,%rcx,1)
+ incl %ebx
+ incl %ecx
+ cmpl %edx, %ebx
+ jl L_AES_GCM_encrypt_aesenc_last15_enc_avx_loop
+ xorq %r13, %r13
+ cmpl $16, %ecx
+ je L_AES_GCM_encrypt_aesenc_last15_enc_avx_finish_enc
+L_AES_GCM_encrypt_aesenc_last15_enc_avx_byte_loop:
+ movb %r13b, (%rsp,%rcx,1)
+ incl %ecx
+ cmpl $16, %ecx
+ jl L_AES_GCM_encrypt_aesenc_last15_enc_avx_byte_loop
+L_AES_GCM_encrypt_aesenc_last15_enc_avx_finish_enc:
+ movdqa (%rsp), %xmm4
+ addq $16, %rsp
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm4
+ pxor %xmm4, %xmm6
+ pshufd $0x4e, %xmm5, %xmm9
+ pshufd $0x4e, %xmm6, %xmm10
+ movdqa %xmm6, %xmm11
+ movdqa %xmm6, %xmm8
+ pclmulqdq $0x11, %xmm5, %xmm11
+ pclmulqdq $0x00, %xmm5, %xmm8
+ pxor %xmm5, %xmm9
+ pxor %xmm6, %xmm10
+ pclmulqdq $0x00, %xmm10, %xmm9
+ pxor %xmm8, %xmm9
+ pxor %xmm11, %xmm9
+ movdqa %xmm9, %xmm10
+ movdqa %xmm11, %xmm6
+ pslldq $8, %xmm10
+ psrldq $8, %xmm9
+ pxor %xmm10, %xmm8
+ pxor %xmm9, %xmm6
+ movdqa %xmm8, %xmm12
+ movdqa %xmm8, %xmm13
+ movdqa %xmm8, %xmm14
+ pslld $31, %xmm12
+ pslld $30, %xmm13
+ pslld $25, %xmm14
+ pxor %xmm13, %xmm12
+ pxor %xmm14, %xmm12
+ movdqa %xmm12, %xmm13
+ psrldq $4, %xmm13
+ pslldq $12, %xmm12
+ pxor %xmm12, %xmm8
+ movdqa %xmm8, %xmm14
+ movdqa %xmm8, %xmm10
+ movdqa %xmm8, %xmm9
+ psrld $0x01, %xmm14
+ psrld $2, %xmm10
+ psrld $7, %xmm9
+ pxor %xmm10, %xmm14
+ pxor %xmm9, %xmm14
+ pxor %xmm13, %xmm14
+ pxor %xmm8, %xmm14
+ pxor %xmm14, %xmm6
+L_AES_GCM_encrypt_aesenc_last15_enc_avx_done:
+L_AES_GCM_encrypt_done_enc:
+ movl %r9d, %edx
+ movl %r11d, %ecx
+ shlq $3, %rdx
+ shlq $3, %rcx
+ pinsrq $0x00, %rdx, %xmm0
+ pinsrq $0x01, %rcx, %xmm0
+ pxor %xmm0, %xmm6
+ pshufd $0x4e, %xmm5, %xmm9
+ pshufd $0x4e, %xmm6, %xmm10
+ movdqa %xmm6, %xmm11
+ movdqa %xmm6, %xmm8
+ pclmulqdq $0x11, %xmm5, %xmm11
+ pclmulqdq $0x00, %xmm5, %xmm8
+ pxor %xmm5, %xmm9
+ pxor %xmm6, %xmm10
+ pclmulqdq $0x00, %xmm10, %xmm9
+ pxor %xmm8, %xmm9
+ pxor %xmm11, %xmm9
+ movdqa %xmm9, %xmm10
+ movdqa %xmm11, %xmm6
+ pslldq $8, %xmm10
+ psrldq $8, %xmm9
+ pxor %xmm10, %xmm8
+ pxor %xmm9, %xmm6
+ movdqa %xmm8, %xmm12
+ movdqa %xmm8, %xmm13
+ movdqa %xmm8, %xmm14
+ pslld $31, %xmm12
+ pslld $30, %xmm13
+ pslld $25, %xmm14
+ pxor %xmm13, %xmm12
+ pxor %xmm14, %xmm12
+ movdqa %xmm12, %xmm13
+ psrldq $4, %xmm13
+ pslldq $12, %xmm12
+ pxor %xmm12, %xmm8
+ movdqa %xmm8, %xmm14
+ movdqa %xmm8, %xmm10
+ movdqa %xmm8, %xmm9
+ psrld $0x01, %xmm14
+ psrld $2, %xmm10
+ psrld $7, %xmm9
+ pxor %xmm10, %xmm14
+ pxor %xmm9, %xmm14
+ pxor %xmm13, %xmm14
+ pxor %xmm8, %xmm14
+ pxor %xmm14, %xmm6
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm6
+ movdqa 144(%rsp), %xmm0
+ pxor %xmm6, %xmm0
+ cmpl $16, %r14d
+ je L_AES_GCM_encrypt_store_tag_16
+ xorq %rcx, %rcx
+ movdqa %xmm0, (%rsp)
+L_AES_GCM_encrypt_store_tag_loop:
+ movzbl (%rsp,%rcx,1), %r13d
+ movb %r13b, (%r8,%rcx,1)
+ incl %ecx
+ cmpl %r14d, %ecx
+ jne L_AES_GCM_encrypt_store_tag_loop
+ jmp L_AES_GCM_encrypt_store_tag_done
+L_AES_GCM_encrypt_store_tag_16:
+ movdqu %xmm0, (%r8)
+L_AES_GCM_encrypt_store_tag_done:
+ addq $0xa0, %rsp
+ popq %r15
+ popq %r14
+ popq %rbx
+ popq %r12
+ popq %r13
+ repz retq
+#ifndef __APPLE__
+.size AES_GCM_encrypt,.-AES_GCM_encrypt
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl AES_GCM_decrypt
+.type AES_GCM_decrypt,@function
+.align 4
+AES_GCM_decrypt:
+#else
+.section __TEXT,__text
+.globl _AES_GCM_decrypt
+.p2align 2
+_AES_GCM_decrypt:
+#endif /* __APPLE__ */
+ pushq %r13
+ pushq %r12
+ pushq %rbx
+ pushq %r14
+ pushq %r15
+ pushq %rbp
+ movq %rdx, %r12
+ movq %rcx, %rax
+ movl 56(%rsp), %r11d
+ movl 64(%rsp), %ebx
+ movl 72(%rsp), %r14d
+ movq 80(%rsp), %r15
+ movl 88(%rsp), %r10d
+ movq 96(%rsp), %rbp
+ subq $0xa8, %rsp
+ pxor %xmm4, %xmm4
+ pxor %xmm6, %xmm6
+ cmpl $12, %ebx
+ movl %ebx, %edx
+ jne L_AES_GCM_decrypt_iv_not_12
+ # # Calculate values when IV is 12 bytes
+ # Set counter based on IV
+ movl $0x1000000, %ecx
+ pinsrq $0x00, (%rax), %xmm4
+ pinsrd $2, 8(%rax), %xmm4
+ pinsrd $3, %ecx, %xmm4
+ # H = Encrypt X(=0) and T = Encrypt counter
+ movdqa %xmm4, %xmm1
+ movdqa (%r15), %xmm5
+ pxor %xmm5, %xmm1
+ movdqa 16(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 32(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 48(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 64(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 80(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 96(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 112(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 128(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 144(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ cmpl $11, %r10d
+ movdqa 160(%r15), %xmm7
+ jl L_AES_GCM_decrypt_calc_iv_12_last
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 176(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ cmpl $13, %r10d
+ movdqa 192(%r15), %xmm7
+ jl L_AES_GCM_decrypt_calc_iv_12_last
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 208(%r15), %xmm7
+ aesenc %xmm7, %xmm5
+ aesenc %xmm7, %xmm1
+ movdqa 224(%r15), %xmm7
+L_AES_GCM_decrypt_calc_iv_12_last:
+ aesenclast %xmm7, %xmm5
+ aesenclast %xmm7, %xmm1
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm5
+ movdqa %xmm1, 144(%rsp)
+ jmp L_AES_GCM_decrypt_iv_done
+L_AES_GCM_decrypt_iv_not_12:
+ # Calculate values when IV is not 12 bytes
+ # H = Encrypt X(=0)
+ movdqa (%r15), %xmm5
+ aesenc 16(%r15), %xmm5
+ aesenc 32(%r15), %xmm5
+ aesenc 48(%r15), %xmm5
+ aesenc 64(%r15), %xmm5
+ aesenc 80(%r15), %xmm5
+ aesenc 96(%r15), %xmm5
+ aesenc 112(%r15), %xmm5
+ aesenc 128(%r15), %xmm5
+ aesenc 144(%r15), %xmm5
+ cmpl $11, %r10d
+ movdqa 160(%r15), %xmm9
+ jl L_AES_GCM_decrypt_calc_iv_1_aesenc_avx_last
+ aesenc %xmm9, %xmm5
+ aesenc 176(%r15), %xmm5
+ cmpl $13, %r10d
+ movdqa 192(%r15), %xmm9
+ jl L_AES_GCM_decrypt_calc_iv_1_aesenc_avx_last
+ aesenc %xmm9, %xmm5
+ aesenc 208(%r15), %xmm5
+ movdqa 224(%r15), %xmm9
+L_AES_GCM_decrypt_calc_iv_1_aesenc_avx_last:
+ aesenclast %xmm9, %xmm5
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm5
+ # Calc counter
+ # Initialization vector
+ cmpl $0x00, %edx
+ movq $0x00, %rcx
+ je L_AES_GCM_decrypt_calc_iv_done
+ cmpl $16, %edx
+ jl L_AES_GCM_decrypt_calc_iv_lt16
+ andl $0xfffffff0, %edx
+L_AES_GCM_decrypt_calc_iv_16_loop:
+ movdqu (%rax,%rcx,1), %xmm8
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm8
+ pxor %xmm8, %xmm4
+ pshufd $0x4e, %xmm4, %xmm1
+ pshufd $0x4e, %xmm5, %xmm2
+ movdqa %xmm5, %xmm3
+ movdqa %xmm5, %xmm0
+ pclmulqdq $0x11, %xmm4, %xmm3
+ pclmulqdq $0x00, %xmm4, %xmm0
+ pxor %xmm4, %xmm1
+ pxor %xmm5, %xmm2
+ pclmulqdq $0x00, %xmm2, %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm3, %xmm1
+ movdqa %xmm1, %xmm2
+ movdqa %xmm0, %xmm7
+ movdqa %xmm3, %xmm4
+ pslldq $8, %xmm2
+ psrldq $8, %xmm1
+ pxor %xmm2, %xmm7
+ pxor %xmm1, %xmm4
+ movdqa %xmm7, %xmm0
+ movdqa %xmm4, %xmm1
+ psrld $31, %xmm0
+ psrld $31, %xmm1
+ pslld $0x01, %xmm7
+ pslld $0x01, %xmm4
+ movdqa %xmm0, %xmm2
+ pslldq $4, %xmm0
+ psrldq $12, %xmm2
+ pslldq $4, %xmm1
+ por %xmm2, %xmm4
+ por %xmm0, %xmm7
+ por %xmm1, %xmm4
+ movdqa %xmm7, %xmm0
+ movdqa %xmm7, %xmm1
+ movdqa %xmm7, %xmm2
+ pslld $31, %xmm0
+ pslld $30, %xmm1
+ pslld $25, %xmm2
+ pxor %xmm1, %xmm0
+ pxor %xmm2, %xmm0
+ movdqa %xmm0, %xmm1
+ psrldq $4, %xmm1
+ pslldq $12, %xmm0
+ pxor %xmm0, %xmm7
+ movdqa %xmm7, %xmm2
+ movdqa %xmm7, %xmm3
+ movdqa %xmm7, %xmm0
+ psrld $0x01, %xmm2
+ psrld $2, %xmm3
+ psrld $7, %xmm0
+ pxor %xmm3, %xmm2
+ pxor %xmm0, %xmm2
+ pxor %xmm1, %xmm2
+ pxor %xmm7, %xmm2
+ pxor %xmm2, %xmm4
+ addl $16, %ecx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_decrypt_calc_iv_16_loop
+ movl %ebx, %edx
+ cmpl %edx, %ecx
+ je L_AES_GCM_decrypt_calc_iv_done
+L_AES_GCM_decrypt_calc_iv_lt16:
+ subq $16, %rsp
+ pxor %xmm8, %xmm8
+ xorl %ebx, %ebx
+ movdqa %xmm8, (%rsp)
+L_AES_GCM_decrypt_calc_iv_loop:
+ movzbl (%rax,%rcx,1), %r13d
+ movb %r13b, (%rsp,%rbx,1)
+ incl %ecx
+ incl %ebx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_decrypt_calc_iv_loop
+ movdqa (%rsp), %xmm8
+ addq $16, %rsp
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm8
+ pxor %xmm8, %xmm4
+ pshufd $0x4e, %xmm4, %xmm1
+ pshufd $0x4e, %xmm5, %xmm2
+ movdqa %xmm5, %xmm3
+ movdqa %xmm5, %xmm0
+ pclmulqdq $0x11, %xmm4, %xmm3
+ pclmulqdq $0x00, %xmm4, %xmm0
+ pxor %xmm4, %xmm1
+ pxor %xmm5, %xmm2
+ pclmulqdq $0x00, %xmm2, %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm3, %xmm1
+ movdqa %xmm1, %xmm2
+ movdqa %xmm0, %xmm7
+ movdqa %xmm3, %xmm4
+ pslldq $8, %xmm2
+ psrldq $8, %xmm1
+ pxor %xmm2, %xmm7
+ pxor %xmm1, %xmm4
+ movdqa %xmm7, %xmm0
+ movdqa %xmm4, %xmm1
+ psrld $31, %xmm0
+ psrld $31, %xmm1
+ pslld $0x01, %xmm7
+ pslld $0x01, %xmm4
+ movdqa %xmm0, %xmm2
+ pslldq $4, %xmm0
+ psrldq $12, %xmm2
+ pslldq $4, %xmm1
+ por %xmm2, %xmm4
+ por %xmm0, %xmm7
+ por %xmm1, %xmm4
+ movdqa %xmm7, %xmm0
+ movdqa %xmm7, %xmm1
+ movdqa %xmm7, %xmm2
+ pslld $31, %xmm0
+ pslld $30, %xmm1
+ pslld $25, %xmm2
+ pxor %xmm1, %xmm0
+ pxor %xmm2, %xmm0
+ movdqa %xmm0, %xmm1
+ psrldq $4, %xmm1
+ pslldq $12, %xmm0
+ pxor %xmm0, %xmm7
+ movdqa %xmm7, %xmm2
+ movdqa %xmm7, %xmm3
+ movdqa %xmm7, %xmm0
+ psrld $0x01, %xmm2
+ psrld $2, %xmm3
+ psrld $7, %xmm0
+ pxor %xmm3, %xmm2
+ pxor %xmm0, %xmm2
+ pxor %xmm1, %xmm2
+ pxor %xmm7, %xmm2
+ pxor %xmm2, %xmm4
+L_AES_GCM_decrypt_calc_iv_done:
+ # T = Encrypt counter
+ pxor %xmm0, %xmm0
+ shll $3, %edx
+ pinsrq $0x00, %rdx, %xmm0
+ pxor %xmm0, %xmm4
+ pshufd $0x4e, %xmm4, %xmm1
+ pshufd $0x4e, %xmm5, %xmm2
+ movdqa %xmm5, %xmm3
+ movdqa %xmm5, %xmm0
+ pclmulqdq $0x11, %xmm4, %xmm3
+ pclmulqdq $0x00, %xmm4, %xmm0
+ pxor %xmm4, %xmm1
+ pxor %xmm5, %xmm2
+ pclmulqdq $0x00, %xmm2, %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm3, %xmm1
+ movdqa %xmm1, %xmm2
+ movdqa %xmm0, %xmm7
+ movdqa %xmm3, %xmm4
+ pslldq $8, %xmm2
+ psrldq $8, %xmm1
+ pxor %xmm2, %xmm7
+ pxor %xmm1, %xmm4
+ movdqa %xmm7, %xmm0
+ movdqa %xmm4, %xmm1
+ psrld $31, %xmm0
+ psrld $31, %xmm1
+ pslld $0x01, %xmm7
+ pslld $0x01, %xmm4
+ movdqa %xmm0, %xmm2
+ pslldq $4, %xmm0
+ psrldq $12, %xmm2
+ pslldq $4, %xmm1
+ por %xmm2, %xmm4
+ por %xmm0, %xmm7
+ por %xmm1, %xmm4
+ movdqa %xmm7, %xmm0
+ movdqa %xmm7, %xmm1
+ movdqa %xmm7, %xmm2
+ pslld $31, %xmm0
+ pslld $30, %xmm1
+ pslld $25, %xmm2
+ pxor %xmm1, %xmm0
+ pxor %xmm2, %xmm0
+ movdqa %xmm0, %xmm1
+ psrldq $4, %xmm1
+ pslldq $12, %xmm0
+ pxor %xmm0, %xmm7
+ movdqa %xmm7, %xmm2
+ movdqa %xmm7, %xmm3
+ movdqa %xmm7, %xmm0
+ psrld $0x01, %xmm2
+ psrld $2, %xmm3
+ psrld $7, %xmm0
+ pxor %xmm3, %xmm2
+ pxor %xmm0, %xmm2
+ pxor %xmm1, %xmm2
+ pxor %xmm7, %xmm2
+ pxor %xmm2, %xmm4
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm4
+ # Encrypt counter
+ movdqa (%r15), %xmm8
+ pxor %xmm4, %xmm8
+ aesenc 16(%r15), %xmm8
+ aesenc 32(%r15), %xmm8
+ aesenc 48(%r15), %xmm8
+ aesenc 64(%r15), %xmm8
+ aesenc 80(%r15), %xmm8
+ aesenc 96(%r15), %xmm8
+ aesenc 112(%r15), %xmm8
+ aesenc 128(%r15), %xmm8
+ aesenc 144(%r15), %xmm8
+ cmpl $11, %r10d
+ movdqa 160(%r15), %xmm9
+ jl L_AES_GCM_decrypt_calc_iv_2_aesenc_avx_last
+ aesenc %xmm9, %xmm8
+ aesenc 176(%r15), %xmm8
+ cmpl $13, %r10d
+ movdqa 192(%r15), %xmm9
+ jl L_AES_GCM_decrypt_calc_iv_2_aesenc_avx_last
+ aesenc %xmm9, %xmm8
+ aesenc 208(%r15), %xmm8
+ movdqa 224(%r15), %xmm9
+L_AES_GCM_decrypt_calc_iv_2_aesenc_avx_last:
+ aesenclast %xmm9, %xmm8
+ movdqa %xmm8, 144(%rsp)
+L_AES_GCM_decrypt_iv_done:
+ # Additional authentication data
+ movl %r11d, %edx
+ cmpl $0x00, %edx
+ je L_AES_GCM_decrypt_calc_aad_done
+ xorl %ecx, %ecx
+ cmpl $16, %edx
+ jl L_AES_GCM_decrypt_calc_aad_lt16
+ andl $0xfffffff0, %edx
+L_AES_GCM_decrypt_calc_aad_16_loop:
+ movdqu (%r12,%rcx,1), %xmm8
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm8
+ pxor %xmm8, %xmm6
+ pshufd $0x4e, %xmm6, %xmm1
+ pshufd $0x4e, %xmm5, %xmm2
+ movdqa %xmm5, %xmm3
+ movdqa %xmm5, %xmm0
+ pclmulqdq $0x11, %xmm6, %xmm3
+ pclmulqdq $0x00, %xmm6, %xmm0
+ pxor %xmm6, %xmm1
+ pxor %xmm5, %xmm2
+ pclmulqdq $0x00, %xmm2, %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm3, %xmm1
+ movdqa %xmm1, %xmm2
+ movdqa %xmm0, %xmm7
+ movdqa %xmm3, %xmm6
+ pslldq $8, %xmm2
+ psrldq $8, %xmm1
+ pxor %xmm2, %xmm7
+ pxor %xmm1, %xmm6
+ movdqa %xmm7, %xmm0
+ movdqa %xmm6, %xmm1
+ psrld $31, %xmm0
+ psrld $31, %xmm1
+ pslld $0x01, %xmm7
+ pslld $0x01, %xmm6
+ movdqa %xmm0, %xmm2
+ pslldq $4, %xmm0
+ psrldq $12, %xmm2
+ pslldq $4, %xmm1
+ por %xmm2, %xmm6
+ por %xmm0, %xmm7
+ por %xmm1, %xmm6
+ movdqa %xmm7, %xmm0
+ movdqa %xmm7, %xmm1
+ movdqa %xmm7, %xmm2
+ pslld $31, %xmm0
+ pslld $30, %xmm1
+ pslld $25, %xmm2
+ pxor %xmm1, %xmm0
+ pxor %xmm2, %xmm0
+ movdqa %xmm0, %xmm1
+ psrldq $4, %xmm1
+ pslldq $12, %xmm0
+ pxor %xmm0, %xmm7
+ movdqa %xmm7, %xmm2
+ movdqa %xmm7, %xmm3
+ movdqa %xmm7, %xmm0
+ psrld $0x01, %xmm2
+ psrld $2, %xmm3
+ psrld $7, %xmm0
+ pxor %xmm3, %xmm2
+ pxor %xmm0, %xmm2
+ pxor %xmm1, %xmm2
+ pxor %xmm7, %xmm2
+ pxor %xmm2, %xmm6
+ addl $16, %ecx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_decrypt_calc_aad_16_loop
+ movl %r11d, %edx
+ cmpl %edx, %ecx
+ je L_AES_GCM_decrypt_calc_aad_done
+L_AES_GCM_decrypt_calc_aad_lt16:
+ subq $16, %rsp
+ pxor %xmm8, %xmm8
+ xorl %ebx, %ebx
+ movdqa %xmm8, (%rsp)
+L_AES_GCM_decrypt_calc_aad_loop:
+ movzbl (%r12,%rcx,1), %r13d
+ movb %r13b, (%rsp,%rbx,1)
+ incl %ecx
+ incl %ebx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_decrypt_calc_aad_loop
+ movdqa (%rsp), %xmm8
+ addq $16, %rsp
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm8
+ pxor %xmm8, %xmm6
+ pshufd $0x4e, %xmm6, %xmm1
+ pshufd $0x4e, %xmm5, %xmm2
+ movdqa %xmm5, %xmm3
+ movdqa %xmm5, %xmm0
+ pclmulqdq $0x11, %xmm6, %xmm3
+ pclmulqdq $0x00, %xmm6, %xmm0
+ pxor %xmm6, %xmm1
+ pxor %xmm5, %xmm2
+ pclmulqdq $0x00, %xmm2, %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm3, %xmm1
+ movdqa %xmm1, %xmm2
+ movdqa %xmm0, %xmm7
+ movdqa %xmm3, %xmm6
+ pslldq $8, %xmm2
+ psrldq $8, %xmm1
+ pxor %xmm2, %xmm7
+ pxor %xmm1, %xmm6
+ movdqa %xmm7, %xmm0
+ movdqa %xmm6, %xmm1
+ psrld $31, %xmm0
+ psrld $31, %xmm1
+ pslld $0x01, %xmm7
+ pslld $0x01, %xmm6
+ movdqa %xmm0, %xmm2
+ pslldq $4, %xmm0
+ psrldq $12, %xmm2
+ pslldq $4, %xmm1
+ por %xmm2, %xmm6
+ por %xmm0, %xmm7
+ por %xmm1, %xmm6
+ movdqa %xmm7, %xmm0
+ movdqa %xmm7, %xmm1
+ movdqa %xmm7, %xmm2
+ pslld $31, %xmm0
+ pslld $30, %xmm1
+ pslld $25, %xmm2
+ pxor %xmm1, %xmm0
+ pxor %xmm2, %xmm0
+ movdqa %xmm0, %xmm1
+ psrldq $4, %xmm1
+ pslldq $12, %xmm0
+ pxor %xmm0, %xmm7
+ movdqa %xmm7, %xmm2
+ movdqa %xmm7, %xmm3
+ movdqa %xmm7, %xmm0
+ psrld $0x01, %xmm2
+ psrld $2, %xmm3
+ psrld $7, %xmm0
+ pxor %xmm3, %xmm2
+ pxor %xmm0, %xmm2
+ pxor %xmm1, %xmm2
+ pxor %xmm7, %xmm2
+ pxor %xmm2, %xmm6
+L_AES_GCM_decrypt_calc_aad_done:
+ # Calculate counter and H
+ pshufb L_aes_gcm_bswap_epi64(%rip), %xmm4
+ movdqa %xmm5, %xmm9
+ paddd L_aes_gcm_one(%rip), %xmm4
+ movdqa %xmm5, %xmm8
+ movdqa %xmm4, 128(%rsp)
+ psrlq $63, %xmm9
+ psllq $0x01, %xmm8
+ pslldq $8, %xmm9
+ por %xmm9, %xmm8
+ pshufd $0xff, %xmm5, %xmm5
+ psrad $31, %xmm5
+ pand L_aes_gcm_mod2_128(%rip), %xmm5
+ pxor %xmm8, %xmm5
+ xorl %ebx, %ebx
+ cmpl $0x80, %r9d
+ movl %r9d, %r13d
+ jl L_AES_GCM_decrypt_done_128
+ andl $0xffffff80, %r13d
+ movdqa %xmm6, %xmm2
+ # H ^ 1
+ movdqa %xmm5, (%rsp)
+ # H ^ 2
+ pshufd $0x4e, %xmm5, %xmm9
+ pshufd $0x4e, %xmm5, %xmm10
+ movdqa %xmm5, %xmm11
+ movdqa %xmm5, %xmm8
+ pclmulqdq $0x11, %xmm5, %xmm11
+ pclmulqdq $0x00, %xmm5, %xmm8
+ pxor %xmm5, %xmm9
+ pxor %xmm5, %xmm10
+ pclmulqdq $0x00, %xmm10, %xmm9
+ pxor %xmm8, %xmm9
+ pxor %xmm11, %xmm9
+ movdqa %xmm9, %xmm10
+ movdqa %xmm11, %xmm0
+ pslldq $8, %xmm10
+ psrldq $8, %xmm9
+ pxor %xmm10, %xmm8
+ pxor %xmm9, %xmm0
+ movdqa %xmm8, %xmm12
+ movdqa %xmm8, %xmm13
+ movdqa %xmm8, %xmm14
+ pslld $31, %xmm12
+ pslld $30, %xmm13
+ pslld $25, %xmm14
+ pxor %xmm13, %xmm12
+ pxor %xmm14, %xmm12
+ movdqa %xmm12, %xmm13
+ psrldq $4, %xmm13
+ pslldq $12, %xmm12
+ pxor %xmm12, %xmm8
+ movdqa %xmm8, %xmm14
+ movdqa %xmm8, %xmm10
+ movdqa %xmm8, %xmm9
+ psrld $0x01, %xmm14
+ psrld $2, %xmm10
+ psrld $7, %xmm9
+ pxor %xmm10, %xmm14
+ pxor %xmm9, %xmm14
+ pxor %xmm13, %xmm14
+ pxor %xmm8, %xmm14
+ pxor %xmm14, %xmm0
+ movdqa %xmm0, 16(%rsp)
+ # H ^ 3
+ pshufd $0x4e, %xmm5, %xmm9
+ pshufd $0x4e, %xmm0, %xmm10
+ movdqa %xmm0, %xmm11
+ movdqa %xmm0, %xmm8
+ pclmulqdq $0x11, %xmm5, %xmm11
+ pclmulqdq $0x00, %xmm5, %xmm8
+ pxor %xmm5, %xmm9
+ pxor %xmm0, %xmm10
+ pclmulqdq $0x00, %xmm10, %xmm9
+ pxor %xmm8, %xmm9
+ pxor %xmm11, %xmm9
+ movdqa %xmm9, %xmm10
+ movdqa %xmm11, %xmm1
+ pslldq $8, %xmm10
+ psrldq $8, %xmm9
+ pxor %xmm10, %xmm8
+ pxor %xmm9, %xmm1
+ movdqa %xmm8, %xmm12
+ movdqa %xmm8, %xmm13
+ movdqa %xmm8, %xmm14
+ pslld $31, %xmm12
+ pslld $30, %xmm13
+ pslld $25, %xmm14
+ pxor %xmm13, %xmm12
+ pxor %xmm14, %xmm12
+ movdqa %xmm12, %xmm13
+ psrldq $4, %xmm13
+ pslldq $12, %xmm12
+ pxor %xmm12, %xmm8
+ movdqa %xmm8, %xmm14
+ movdqa %xmm8, %xmm10
+ movdqa %xmm8, %xmm9
+ psrld $0x01, %xmm14
+ psrld $2, %xmm10
+ psrld $7, %xmm9
+ pxor %xmm10, %xmm14
+ pxor %xmm9, %xmm14
+ pxor %xmm13, %xmm14
+ pxor %xmm8, %xmm14
+ pxor %xmm14, %xmm1
+ movdqa %xmm1, 32(%rsp)
+ # H ^ 4
+ pshufd $0x4e, %xmm0, %xmm9
+ pshufd $0x4e, %xmm0, %xmm10
+ movdqa %xmm0, %xmm11
+ movdqa %xmm0, %xmm8
+ pclmulqdq $0x11, %xmm0, %xmm11
+ pclmulqdq $0x00, %xmm0, %xmm8
+ pxor %xmm0, %xmm9
+ pxor %xmm0, %xmm10
+ pclmulqdq $0x00, %xmm10, %xmm9
+ pxor %xmm8, %xmm9
+ pxor %xmm11, %xmm9
+ movdqa %xmm9, %xmm10
+ movdqa %xmm11, %xmm3
+ pslldq $8, %xmm10
+ psrldq $8, %xmm9
+ pxor %xmm10, %xmm8
+ pxor %xmm9, %xmm3
+ movdqa %xmm8, %xmm12
+ movdqa %xmm8, %xmm13
+ movdqa %xmm8, %xmm14
+ pslld $31, %xmm12
+ pslld $30, %xmm13
+ pslld $25, %xmm14
+ pxor %xmm13, %xmm12
+ pxor %xmm14, %xmm12
+ movdqa %xmm12, %xmm13
+ psrldq $4, %xmm13
+ pslldq $12, %xmm12
+ pxor %xmm12, %xmm8
+ movdqa %xmm8, %xmm14
+ movdqa %xmm8, %xmm10
+ movdqa %xmm8, %xmm9
+ psrld $0x01, %xmm14
+ psrld $2, %xmm10
+ psrld $7, %xmm9
+ pxor %xmm10, %xmm14
+ pxor %xmm9, %xmm14
+ pxor %xmm13, %xmm14
+ pxor %xmm8, %xmm14
+ pxor %xmm14, %xmm3
+ movdqa %xmm3, 48(%rsp)
+ # H ^ 5
+ pshufd $0x4e, %xmm0, %xmm9
+ pshufd $0x4e, %xmm1, %xmm10
+ movdqa %xmm1, %xmm11
+ movdqa %xmm1, %xmm8
+ pclmulqdq $0x11, %xmm0, %xmm11
+ pclmulqdq $0x00, %xmm0, %xmm8
+ pxor %xmm0, %xmm9
+ pxor %xmm1, %xmm10
+ pclmulqdq $0x00, %xmm10, %xmm9
+ pxor %xmm8, %xmm9
+ pxor %xmm11, %xmm9
+ movdqa %xmm9, %xmm10
+ movdqa %xmm11, %xmm7
+ pslldq $8, %xmm10
+ psrldq $8, %xmm9
+ pxor %xmm10, %xmm8
+ pxor %xmm9, %xmm7
+ movdqa %xmm8, %xmm12
+ movdqa %xmm8, %xmm13
+ movdqa %xmm8, %xmm14
+ pslld $31, %xmm12
+ pslld $30, %xmm13
+ pslld $25, %xmm14
+ pxor %xmm13, %xmm12
+ pxor %xmm14, %xmm12
+ movdqa %xmm12, %xmm13
+ psrldq $4, %xmm13
+ pslldq $12, %xmm12
+ pxor %xmm12, %xmm8
+ movdqa %xmm8, %xmm14
+ movdqa %xmm8, %xmm10
+ movdqa %xmm8, %xmm9
+ psrld $0x01, %xmm14
+ psrld $2, %xmm10
+ psrld $7, %xmm9
+ pxor %xmm10, %xmm14
+ pxor %xmm9, %xmm14
+ pxor %xmm13, %xmm14
+ pxor %xmm8, %xmm14
+ pxor %xmm14, %xmm7
+ movdqa %xmm7, 64(%rsp)
+ # H ^ 6
+ pshufd $0x4e, %xmm1, %xmm9
+ pshufd $0x4e, %xmm1, %xmm10
+ movdqa %xmm1, %xmm11
+ movdqa %xmm1, %xmm8
+ pclmulqdq $0x11, %xmm1, %xmm11
+ pclmulqdq $0x00, %xmm1, %xmm8
+ pxor %xmm1, %xmm9
+ pxor %xmm1, %xmm10
+ pclmulqdq $0x00, %xmm10, %xmm9
+ pxor %xmm8, %xmm9
+ pxor %xmm11, %xmm9
+ movdqa %xmm9, %xmm10
+ movdqa %xmm11, %xmm7
+ pslldq $8, %xmm10
+ psrldq $8, %xmm9
+ pxor %xmm10, %xmm8
+ pxor %xmm9, %xmm7
+ movdqa %xmm8, %xmm12
+ movdqa %xmm8, %xmm13
+ movdqa %xmm8, %xmm14
+ pslld $31, %xmm12
+ pslld $30, %xmm13
+ pslld $25, %xmm14
+ pxor %xmm13, %xmm12
+ pxor %xmm14, %xmm12
+ movdqa %xmm12, %xmm13
+ psrldq $4, %xmm13
+ pslldq $12, %xmm12
+ pxor %xmm12, %xmm8
+ movdqa %xmm8, %xmm14
+ movdqa %xmm8, %xmm10
+ movdqa %xmm8, %xmm9
+ psrld $0x01, %xmm14
+ psrld $2, %xmm10
+ psrld $7, %xmm9
+ pxor %xmm10, %xmm14
+ pxor %xmm9, %xmm14
+ pxor %xmm13, %xmm14
+ pxor %xmm8, %xmm14
+ pxor %xmm14, %xmm7
+ movdqa %xmm7, 80(%rsp)
+ # H ^ 7
+ pshufd $0x4e, %xmm1, %xmm9
+ pshufd $0x4e, %xmm3, %xmm10
+ movdqa %xmm3, %xmm11
+ movdqa %xmm3, %xmm8
+ pclmulqdq $0x11, %xmm1, %xmm11
+ pclmulqdq $0x00, %xmm1, %xmm8
+ pxor %xmm1, %xmm9
+ pxor %xmm3, %xmm10
+ pclmulqdq $0x00, %xmm10, %xmm9
+ pxor %xmm8, %xmm9
+ pxor %xmm11, %xmm9
+ movdqa %xmm9, %xmm10
+ movdqa %xmm11, %xmm7
+ pslldq $8, %xmm10
+ psrldq $8, %xmm9
+ pxor %xmm10, %xmm8
+ pxor %xmm9, %xmm7
+ movdqa %xmm8, %xmm12
+ movdqa %xmm8, %xmm13
+ movdqa %xmm8, %xmm14
+ pslld $31, %xmm12
+ pslld $30, %xmm13
+ pslld $25, %xmm14
+ pxor %xmm13, %xmm12
+ pxor %xmm14, %xmm12
+ movdqa %xmm12, %xmm13
+ psrldq $4, %xmm13
+ pslldq $12, %xmm12
+ pxor %xmm12, %xmm8
+ movdqa %xmm8, %xmm14
+ movdqa %xmm8, %xmm10
+ movdqa %xmm8, %xmm9
+ psrld $0x01, %xmm14
+ psrld $2, %xmm10
+ psrld $7, %xmm9
+ pxor %xmm10, %xmm14
+ pxor %xmm9, %xmm14
+ pxor %xmm13, %xmm14
+ pxor %xmm8, %xmm14
+ pxor %xmm14, %xmm7
+ movdqa %xmm7, 96(%rsp)
+ # H ^ 8
+ pshufd $0x4e, %xmm3, %xmm9
+ pshufd $0x4e, %xmm3, %xmm10
+ movdqa %xmm3, %xmm11
+ movdqa %xmm3, %xmm8
+ pclmulqdq $0x11, %xmm3, %xmm11
+ pclmulqdq $0x00, %xmm3, %xmm8
+ pxor %xmm3, %xmm9
+ pxor %xmm3, %xmm10
+ pclmulqdq $0x00, %xmm10, %xmm9
+ pxor %xmm8, %xmm9
+ pxor %xmm11, %xmm9
+ movdqa %xmm9, %xmm10
+ movdqa %xmm11, %xmm7
+ pslldq $8, %xmm10
+ psrldq $8, %xmm9
+ pxor %xmm10, %xmm8
+ pxor %xmm9, %xmm7
+ movdqa %xmm8, %xmm12
+ movdqa %xmm8, %xmm13
+ movdqa %xmm8, %xmm14
+ pslld $31, %xmm12
+ pslld $30, %xmm13
+ pslld $25, %xmm14
+ pxor %xmm13, %xmm12
+ pxor %xmm14, %xmm12
+ movdqa %xmm12, %xmm13
+ psrldq $4, %xmm13
+ pslldq $12, %xmm12
+ pxor %xmm12, %xmm8
+ movdqa %xmm8, %xmm14
+ movdqa %xmm8, %xmm10
+ movdqa %xmm8, %xmm9
+ psrld $0x01, %xmm14
+ psrld $2, %xmm10
+ psrld $7, %xmm9
+ pxor %xmm10, %xmm14
+ pxor %xmm9, %xmm14
+ pxor %xmm13, %xmm14
+ pxor %xmm8, %xmm14
+ pxor %xmm14, %xmm7
+ movdqa %xmm7, 112(%rsp)
+L_AES_GCM_decrypt_ghash_128:
+ leaq (%rdi,%rbx,1), %rcx
+ leaq (%rsi,%rbx,1), %rdx
+ movdqa 128(%rsp), %xmm8
+ movdqa L_aes_gcm_bswap_epi64(%rip), %xmm1
+ movdqa %xmm8, %xmm0
+ pshufb %xmm1, %xmm8
+ movdqa %xmm0, %xmm9
+ paddd L_aes_gcm_one(%rip), %xmm9
+ pshufb %xmm1, %xmm9
+ movdqa %xmm0, %xmm10
+ paddd L_aes_gcm_two(%rip), %xmm10
+ pshufb %xmm1, %xmm10
+ movdqa %xmm0, %xmm11
+ paddd L_aes_gcm_three(%rip), %xmm11
+ pshufb %xmm1, %xmm11
+ movdqa %xmm0, %xmm12
+ paddd L_aes_gcm_four(%rip), %xmm12
+ pshufb %xmm1, %xmm12
+ movdqa %xmm0, %xmm13
+ paddd L_aes_gcm_five(%rip), %xmm13
+ pshufb %xmm1, %xmm13
+ movdqa %xmm0, %xmm14
+ paddd L_aes_gcm_six(%rip), %xmm14
+ pshufb %xmm1, %xmm14
+ movdqa %xmm0, %xmm15
+ paddd L_aes_gcm_seven(%rip), %xmm15
+ pshufb %xmm1, %xmm15
+ paddd L_aes_gcm_eight(%rip), %xmm0
+ movdqa (%r15), %xmm7
+ movdqa %xmm0, 128(%rsp)
+ pxor %xmm7, %xmm8
+ pxor %xmm7, %xmm9
+ pxor %xmm7, %xmm10
+ pxor %xmm7, %xmm11
+ pxor %xmm7, %xmm12
+ pxor %xmm7, %xmm13
+ pxor %xmm7, %xmm14
+ pxor %xmm7, %xmm15
+ movdqa 112(%rsp), %xmm7
+ movdqu (%rcx), %xmm0
+ aesenc 16(%r15), %xmm8
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm0
+ pxor %xmm2, %xmm0
+ pshufd $0x4e, %xmm7, %xmm1
+ pshufd $0x4e, %xmm0, %xmm5
+ pxor %xmm7, %xmm1
+ pxor %xmm0, %xmm5
+ movdqa %xmm0, %xmm3
+ pclmulqdq $0x11, %xmm7, %xmm3
+ aesenc 16(%r15), %xmm9
+ aesenc 16(%r15), %xmm10
+ movdqa %xmm0, %xmm2
+ pclmulqdq $0x00, %xmm7, %xmm2
+ aesenc 16(%r15), %xmm11
+ aesenc 16(%r15), %xmm12
+ pclmulqdq $0x00, %xmm5, %xmm1
+ aesenc 16(%r15), %xmm13
+ aesenc 16(%r15), %xmm14
+ aesenc 16(%r15), %xmm15
+ pxor %xmm2, %xmm1
+ pxor %xmm3, %xmm1
+ movdqa 96(%rsp), %xmm7
+ movdqu 16(%rcx), %xmm0
+ pshufd $0x4e, %xmm7, %xmm4
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm0
+ aesenc 32(%r15), %xmm8
+ pxor %xmm7, %xmm4
+ pshufd $0x4e, %xmm0, %xmm5
+ pxor %xmm0, %xmm5
+ movdqa %xmm0, %xmm6
+ pclmulqdq $0x11, %xmm7, %xmm6
+ aesenc 32(%r15), %xmm9
+ aesenc 32(%r15), %xmm10
+ pclmulqdq $0x00, %xmm0, %xmm7
+ aesenc 32(%r15), %xmm11
+ aesenc 32(%r15), %xmm12
+ pclmulqdq $0x00, %xmm5, %xmm4
+ aesenc 32(%r15), %xmm13
+ aesenc 32(%r15), %xmm14
+ aesenc 32(%r15), %xmm15
+ pxor %xmm7, %xmm1
+ pxor %xmm7, %xmm2
+ pxor %xmm6, %xmm1
+ pxor %xmm6, %xmm3
+ pxor %xmm4, %xmm1
+ movdqa 80(%rsp), %xmm7
+ movdqu 32(%rcx), %xmm0
+ pshufd $0x4e, %xmm7, %xmm4
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm0
+ aesenc 48(%r15), %xmm8
+ pxor %xmm7, %xmm4
+ pshufd $0x4e, %xmm0, %xmm5
+ pxor %xmm0, %xmm5
+ movdqa %xmm0, %xmm6
+ pclmulqdq $0x11, %xmm7, %xmm6
+ aesenc 48(%r15), %xmm9
+ aesenc 48(%r15), %xmm10
+ pclmulqdq $0x00, %xmm0, %xmm7
+ aesenc 48(%r15), %xmm11
+ aesenc 48(%r15), %xmm12
+ pclmulqdq $0x00, %xmm5, %xmm4
+ aesenc 48(%r15), %xmm13
+ aesenc 48(%r15), %xmm14
+ aesenc 48(%r15), %xmm15
+ pxor %xmm7, %xmm1
+ pxor %xmm7, %xmm2
+ pxor %xmm6, %xmm1
+ pxor %xmm6, %xmm3
+ pxor %xmm4, %xmm1
+ movdqa 64(%rsp), %xmm7
+ movdqu 48(%rcx), %xmm0
+ pshufd $0x4e, %xmm7, %xmm4
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm0
+ aesenc 64(%r15), %xmm8
+ pxor %xmm7, %xmm4
+ pshufd $0x4e, %xmm0, %xmm5
+ pxor %xmm0, %xmm5
+ movdqa %xmm0, %xmm6
+ pclmulqdq $0x11, %xmm7, %xmm6
+ aesenc 64(%r15), %xmm9
+ aesenc 64(%r15), %xmm10
+ pclmulqdq $0x00, %xmm0, %xmm7
+ aesenc 64(%r15), %xmm11
+ aesenc 64(%r15), %xmm12
+ pclmulqdq $0x00, %xmm5, %xmm4
+ aesenc 64(%r15), %xmm13
+ aesenc 64(%r15), %xmm14
+ aesenc 64(%r15), %xmm15
+ pxor %xmm7, %xmm1
+ pxor %xmm7, %xmm2
+ pxor %xmm6, %xmm1
+ pxor %xmm6, %xmm3
+ pxor %xmm4, %xmm1
+ movdqa 48(%rsp), %xmm7
+ movdqu 64(%rcx), %xmm0
+ pshufd $0x4e, %xmm7, %xmm4
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm0
+ aesenc 80(%r15), %xmm8
+ pxor %xmm7, %xmm4
+ pshufd $0x4e, %xmm0, %xmm5
+ pxor %xmm0, %xmm5
+ movdqa %xmm0, %xmm6
+ pclmulqdq $0x11, %xmm7, %xmm6
+ aesenc 80(%r15), %xmm9
+ aesenc 80(%r15), %xmm10
+ pclmulqdq $0x00, %xmm0, %xmm7
+ aesenc 80(%r15), %xmm11
+ aesenc 80(%r15), %xmm12
+ pclmulqdq $0x00, %xmm5, %xmm4
+ aesenc 80(%r15), %xmm13
+ aesenc 80(%r15), %xmm14
+ aesenc 80(%r15), %xmm15
+ pxor %xmm7, %xmm1
+ pxor %xmm7, %xmm2
+ pxor %xmm6, %xmm1
+ pxor %xmm6, %xmm3
+ pxor %xmm4, %xmm1
+ movdqa 32(%rsp), %xmm7
+ movdqu 80(%rcx), %xmm0
+ pshufd $0x4e, %xmm7, %xmm4
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm0
+ aesenc 96(%r15), %xmm8
+ pxor %xmm7, %xmm4
+ pshufd $0x4e, %xmm0, %xmm5
+ pxor %xmm0, %xmm5
+ movdqa %xmm0, %xmm6
+ pclmulqdq $0x11, %xmm7, %xmm6
+ aesenc 96(%r15), %xmm9
+ aesenc 96(%r15), %xmm10
+ pclmulqdq $0x00, %xmm0, %xmm7
+ aesenc 96(%r15), %xmm11
+ aesenc 96(%r15), %xmm12
+ pclmulqdq $0x00, %xmm5, %xmm4
+ aesenc 96(%r15), %xmm13
+ aesenc 96(%r15), %xmm14
+ aesenc 96(%r15), %xmm15
+ pxor %xmm7, %xmm1
+ pxor %xmm7, %xmm2
+ pxor %xmm6, %xmm1
+ pxor %xmm6, %xmm3
+ pxor %xmm4, %xmm1
+ movdqa 16(%rsp), %xmm7
+ movdqu 96(%rcx), %xmm0
+ pshufd $0x4e, %xmm7, %xmm4
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm0
+ aesenc 112(%r15), %xmm8
+ pxor %xmm7, %xmm4
+ pshufd $0x4e, %xmm0, %xmm5
+ pxor %xmm0, %xmm5
+ movdqa %xmm0, %xmm6
+ pclmulqdq $0x11, %xmm7, %xmm6
+ aesenc 112(%r15), %xmm9
+ aesenc 112(%r15), %xmm10
+ pclmulqdq $0x00, %xmm0, %xmm7
+ aesenc 112(%r15), %xmm11
+ aesenc 112(%r15), %xmm12
+ pclmulqdq $0x00, %xmm5, %xmm4
+ aesenc 112(%r15), %xmm13
+ aesenc 112(%r15), %xmm14
+ aesenc 112(%r15), %xmm15
+ pxor %xmm7, %xmm1
+ pxor %xmm7, %xmm2
+ pxor %xmm6, %xmm1
+ pxor %xmm6, %xmm3
+ pxor %xmm4, %xmm1
+ movdqa (%rsp), %xmm7
+ movdqu 112(%rcx), %xmm0
+ pshufd $0x4e, %xmm7, %xmm4
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm0
+ aesenc 128(%r15), %xmm8
+ pxor %xmm7, %xmm4
+ pshufd $0x4e, %xmm0, %xmm5
+ pxor %xmm0, %xmm5
+ movdqa %xmm0, %xmm6
+ pclmulqdq $0x11, %xmm7, %xmm6
+ aesenc 128(%r15), %xmm9
+ aesenc 128(%r15), %xmm10
+ pclmulqdq $0x00, %xmm0, %xmm7
+ aesenc 128(%r15), %xmm11
+ aesenc 128(%r15), %xmm12
+ pclmulqdq $0x00, %xmm5, %xmm4
+ aesenc 128(%r15), %xmm13
+ aesenc 128(%r15), %xmm14
+ aesenc 128(%r15), %xmm15
+ pxor %xmm7, %xmm1
+ pxor %xmm7, %xmm2
+ pxor %xmm6, %xmm1
+ pxor %xmm6, %xmm3
+ pxor %xmm4, %xmm1
+ movdqa %xmm1, %xmm5
+ psrldq $8, %xmm1
+ pslldq $8, %xmm5
+ aesenc 144(%r15), %xmm8
+ pxor %xmm5, %xmm2
+ pxor %xmm1, %xmm3
+ movdqa %xmm2, %xmm7
+ movdqa %xmm2, %xmm4
+ movdqa %xmm2, %xmm5
+ aesenc 144(%r15), %xmm9
+ pslld $31, %xmm7
+ pslld $30, %xmm4
+ pslld $25, %xmm5
+ aesenc 144(%r15), %xmm10
+ pxor %xmm4, %xmm7
+ pxor %xmm5, %xmm7
+ aesenc 144(%r15), %xmm11
+ movdqa %xmm7, %xmm4
+ pslldq $12, %xmm7
+ psrldq $4, %xmm4
+ aesenc 144(%r15), %xmm12
+ pxor %xmm7, %xmm2
+ movdqa %xmm2, %xmm5
+ movdqa %xmm2, %xmm1
+ movdqa %xmm2, %xmm0
+ aesenc 144(%r15), %xmm13
+ psrld $0x01, %xmm5
+ psrld $2, %xmm1
+ psrld $7, %xmm0
+ aesenc 144(%r15), %xmm14
+ pxor %xmm1, %xmm5
+ pxor %xmm0, %xmm5
+ aesenc 144(%r15), %xmm15
+ pxor %xmm4, %xmm5
+ pxor %xmm5, %xmm2
+ pxor %xmm3, %xmm2
+ cmpl $11, %r10d
+ movdqa 160(%r15), %xmm7
+ jl L_AES_GCM_decrypt_aesenc_128_ghash_avx_done
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ movdqa 176(%r15), %xmm7
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ cmpl $13, %r10d
+ movdqa 192(%r15), %xmm7
+ jl L_AES_GCM_decrypt_aesenc_128_ghash_avx_done
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ movdqa 208(%r15), %xmm7
+ aesenc %xmm7, %xmm8
+ aesenc %xmm7, %xmm9
+ aesenc %xmm7, %xmm10
+ aesenc %xmm7, %xmm11
+ aesenc %xmm7, %xmm12
+ aesenc %xmm7, %xmm13
+ aesenc %xmm7, %xmm14
+ aesenc %xmm7, %xmm15
+ movdqa 224(%r15), %xmm7
+L_AES_GCM_decrypt_aesenc_128_ghash_avx_done:
+ aesenclast %xmm7, %xmm8
+ aesenclast %xmm7, %xmm9
+ movdqu (%rcx), %xmm0
+ movdqu 16(%rcx), %xmm1
+ pxor %xmm0, %xmm8
+ pxor %xmm1, %xmm9
+ movdqu %xmm8, (%rdx)
+ movdqu %xmm9, 16(%rdx)
+ aesenclast %xmm7, %xmm10
+ aesenclast %xmm7, %xmm11
+ movdqu 32(%rcx), %xmm0
+ movdqu 48(%rcx), %xmm1
+ pxor %xmm0, %xmm10
+ pxor %xmm1, %xmm11
+ movdqu %xmm10, 32(%rdx)
+ movdqu %xmm11, 48(%rdx)
+ aesenclast %xmm7, %xmm12
+ aesenclast %xmm7, %xmm13
+ movdqu 64(%rcx), %xmm0
+ movdqu 80(%rcx), %xmm1
+ pxor %xmm0, %xmm12
+ pxor %xmm1, %xmm13
+ movdqu %xmm12, 64(%rdx)
+ movdqu %xmm13, 80(%rdx)
+ aesenclast %xmm7, %xmm14
+ aesenclast %xmm7, %xmm15
+ movdqu 96(%rcx), %xmm0
+ movdqu 112(%rcx), %xmm1
+ pxor %xmm0, %xmm14
+ pxor %xmm1, %xmm15
+ movdqu %xmm14, 96(%rdx)
+ movdqu %xmm15, 112(%rdx)
+ addl $0x80, %ebx
+ cmpl %r13d, %ebx
+ jl L_AES_GCM_decrypt_ghash_128
+ movdqa %xmm2, %xmm6
+ movdqa (%rsp), %xmm5
+L_AES_GCM_decrypt_done_128:
+ movl %r9d, %edx
+ cmpl %edx, %ebx
+ jge L_AES_GCM_decrypt_done_dec
+ movl %r9d, %r13d
+ andl $0xfffffff0, %r13d
+ cmpl %r13d, %ebx
+ jge L_AES_GCM_decrypt_last_block_done
+L_AES_GCM_decrypt_last_block_start:
+ leaq (%rdi,%rbx,1), %rcx
+ leaq (%rsi,%rbx,1), %rdx
+ movdqu (%rcx), %xmm1
+ movdqa %xmm5, %xmm0
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm1
+ pxor %xmm6, %xmm1
+ movdqa 128(%rsp), %xmm8
+ movdqa %xmm8, %xmm9
+ pshufb L_aes_gcm_bswap_epi64(%rip), %xmm8
+ paddd L_aes_gcm_one(%rip), %xmm9
+ pxor (%r15), %xmm8
+ movdqa %xmm9, 128(%rsp)
+ movdqa %xmm1, %xmm10
+ pclmulqdq $16, %xmm0, %xmm10
+ aesenc 16(%r15), %xmm8
+ aesenc 32(%r15), %xmm8
+ movdqa %xmm1, %xmm11
+ pclmulqdq $0x01, %xmm0, %xmm11
+ aesenc 48(%r15), %xmm8
+ aesenc 64(%r15), %xmm8
+ movdqa %xmm1, %xmm12
+ pclmulqdq $0x00, %xmm0, %xmm12
+ aesenc 80(%r15), %xmm8
+ movdqa %xmm1, %xmm1
+ pclmulqdq $0x11, %xmm0, %xmm1
+ aesenc 96(%r15), %xmm8
+ pxor %xmm11, %xmm10
+ movdqa %xmm10, %xmm2
+ psrldq $8, %xmm10
+ pslldq $8, %xmm2
+ aesenc 112(%r15), %xmm8
+ movdqa %xmm1, %xmm3
+ pxor %xmm12, %xmm2
+ pxor %xmm10, %xmm3
+ movdqa L_aes_gcm_mod2_128(%rip), %xmm0
+ movdqa %xmm2, %xmm11
+ pclmulqdq $16, %xmm0, %xmm11
+ aesenc 128(%r15), %xmm8
+ pshufd $0x4e, %xmm2, %xmm10
+ pxor %xmm11, %xmm10
+ movdqa %xmm10, %xmm11
+ pclmulqdq $16, %xmm0, %xmm11
+ aesenc 144(%r15), %xmm8
+ pshufd $0x4e, %xmm10, %xmm6
+ pxor %xmm11, %xmm6
+ pxor %xmm3, %xmm6
+ cmpl $11, %r10d
+ movdqa 160(%r15), %xmm9
+ jl L_AES_GCM_decrypt_aesenc_gfmul_last
+ aesenc %xmm9, %xmm8
+ aesenc 176(%r15), %xmm8
+ cmpl $13, %r10d
+ movdqa 192(%r15), %xmm9
+ jl L_AES_GCM_decrypt_aesenc_gfmul_last
+ aesenc %xmm9, %xmm8
+ aesenc 208(%r15), %xmm8
+ movdqa 224(%r15), %xmm9
+L_AES_GCM_decrypt_aesenc_gfmul_last:
+ aesenclast %xmm9, %xmm8
+ movdqu (%rcx), %xmm9
+ pxor %xmm9, %xmm8
+ movdqu %xmm8, (%rdx)
+ addl $16, %ebx
+ cmpl %r13d, %ebx
+ jl L_AES_GCM_decrypt_last_block_start
+L_AES_GCM_decrypt_last_block_done:
+ movl %r9d, %ecx
+ movl %ecx, %edx
+ andl $15, %ecx
+ jz L_AES_GCM_decrypt_aesenc_last15_dec_avx_done
+ movdqa 128(%rsp), %xmm4
+ pshufb L_aes_gcm_bswap_epi64(%rip), %xmm4
+ pxor (%r15), %xmm4
+ aesenc 16(%r15), %xmm4
+ aesenc 32(%r15), %xmm4
+ aesenc 48(%r15), %xmm4
+ aesenc 64(%r15), %xmm4
+ aesenc 80(%r15), %xmm4
+ aesenc 96(%r15), %xmm4
+ aesenc 112(%r15), %xmm4
+ aesenc 128(%r15), %xmm4
+ aesenc 144(%r15), %xmm4
+ cmpl $11, %r10d
+ movdqa 160(%r15), %xmm9
+ jl L_AES_GCM_decrypt_aesenc_last15_dec_avx_aesenc_avx_last
+ aesenc %xmm9, %xmm4
+ aesenc 176(%r15), %xmm4
+ cmpl $13, %r10d
+ movdqa 192(%r15), %xmm9
+ jl L_AES_GCM_decrypt_aesenc_last15_dec_avx_aesenc_avx_last
+ aesenc %xmm9, %xmm4
+ aesenc 208(%r15), %xmm4
+ movdqa 224(%r15), %xmm9
+L_AES_GCM_decrypt_aesenc_last15_dec_avx_aesenc_avx_last:
+ aesenclast %xmm9, %xmm4
+ subq $32, %rsp
+ xorl %ecx, %ecx
+ movdqa %xmm4, (%rsp)
+ pxor %xmm0, %xmm0
+ movdqa %xmm0, 16(%rsp)
+L_AES_GCM_decrypt_aesenc_last15_dec_avx_loop:
+ movzbl (%rdi,%rbx,1), %r13d
+ movb %r13b, 16(%rsp,%rcx,1)
+ xorb (%rsp,%rcx,1), %r13b
+ movb %r13b, (%rsi,%rbx,1)
+ incl %ebx
+ incl %ecx
+ cmpl %edx, %ebx
+ jl L_AES_GCM_decrypt_aesenc_last15_dec_avx_loop
+ movdqa 16(%rsp), %xmm4
+ addq $32, %rsp
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm4
+ pxor %xmm4, %xmm6
+ pshufd $0x4e, %xmm5, %xmm9
+ pshufd $0x4e, %xmm6, %xmm10
+ movdqa %xmm6, %xmm11
+ movdqa %xmm6, %xmm8
+ pclmulqdq $0x11, %xmm5, %xmm11
+ pclmulqdq $0x00, %xmm5, %xmm8
+ pxor %xmm5, %xmm9
+ pxor %xmm6, %xmm10
+ pclmulqdq $0x00, %xmm10, %xmm9
+ pxor %xmm8, %xmm9
+ pxor %xmm11, %xmm9
+ movdqa %xmm9, %xmm10
+ movdqa %xmm11, %xmm6
+ pslldq $8, %xmm10
+ psrldq $8, %xmm9
+ pxor %xmm10, %xmm8
+ pxor %xmm9, %xmm6
+ movdqa %xmm8, %xmm12
+ movdqa %xmm8, %xmm13
+ movdqa %xmm8, %xmm14
+ pslld $31, %xmm12
+ pslld $30, %xmm13
+ pslld $25, %xmm14
+ pxor %xmm13, %xmm12
+ pxor %xmm14, %xmm12
+ movdqa %xmm12, %xmm13
+ psrldq $4, %xmm13
+ pslldq $12, %xmm12
+ pxor %xmm12, %xmm8
+ movdqa %xmm8, %xmm14
+ movdqa %xmm8, %xmm10
+ movdqa %xmm8, %xmm9
+ psrld $0x01, %xmm14
+ psrld $2, %xmm10
+ psrld $7, %xmm9
+ pxor %xmm10, %xmm14
+ pxor %xmm9, %xmm14
+ pxor %xmm13, %xmm14
+ pxor %xmm8, %xmm14
+ pxor %xmm14, %xmm6
+L_AES_GCM_decrypt_aesenc_last15_dec_avx_done:
+L_AES_GCM_decrypt_done_dec:
+ movl %r9d, %edx
+ movl %r11d, %ecx
+ shlq $3, %rdx
+ shlq $3, %rcx
+ pinsrq $0x00, %rdx, %xmm0
+ pinsrq $0x01, %rcx, %xmm0
+ pxor %xmm0, %xmm6
+ pshufd $0x4e, %xmm5, %xmm9
+ pshufd $0x4e, %xmm6, %xmm10
+ movdqa %xmm6, %xmm11
+ movdqa %xmm6, %xmm8
+ pclmulqdq $0x11, %xmm5, %xmm11
+ pclmulqdq $0x00, %xmm5, %xmm8
+ pxor %xmm5, %xmm9
+ pxor %xmm6, %xmm10
+ pclmulqdq $0x00, %xmm10, %xmm9
+ pxor %xmm8, %xmm9
+ pxor %xmm11, %xmm9
+ movdqa %xmm9, %xmm10
+ movdqa %xmm11, %xmm6
+ pslldq $8, %xmm10
+ psrldq $8, %xmm9
+ pxor %xmm10, %xmm8
+ pxor %xmm9, %xmm6
+ movdqa %xmm8, %xmm12
+ movdqa %xmm8, %xmm13
+ movdqa %xmm8, %xmm14
+ pslld $31, %xmm12
+ pslld $30, %xmm13
+ pslld $25, %xmm14
+ pxor %xmm13, %xmm12
+ pxor %xmm14, %xmm12
+ movdqa %xmm12, %xmm13
+ psrldq $4, %xmm13
+ pslldq $12, %xmm12
+ pxor %xmm12, %xmm8
+ movdqa %xmm8, %xmm14
+ movdqa %xmm8, %xmm10
+ movdqa %xmm8, %xmm9
+ psrld $0x01, %xmm14
+ psrld $2, %xmm10
+ psrld $7, %xmm9
+ pxor %xmm10, %xmm14
+ pxor %xmm9, %xmm14
+ pxor %xmm13, %xmm14
+ pxor %xmm8, %xmm14
+ pxor %xmm14, %xmm6
+ pshufb L_aes_gcm_bswap_mask(%rip), %xmm6
+ movdqa 144(%rsp), %xmm0
+ pxor %xmm6, %xmm0
+ cmpl $16, %r14d
+ je L_AES_GCM_decrypt_cmp_tag_16
+ subq $16, %rsp
+ xorq %rcx, %rcx
+ xorq %rbx, %rbx
+ movdqa %xmm0, (%rsp)
+L_AES_GCM_decrypt_cmp_tag_loop:
+ movzbl (%rsp,%rcx,1), %r13d
+ xorb (%r8,%rcx,1), %r13b
+ orb %r13b, %bl
+ incl %ecx
+ cmpl %r14d, %ecx
+ jne L_AES_GCM_decrypt_cmp_tag_loop
+ cmpb $0x00, %bl
+ sete %bl
+ addq $16, %rsp
+ xorq %rcx, %rcx
+ jmp L_AES_GCM_decrypt_cmp_tag_done
+L_AES_GCM_decrypt_cmp_tag_16:
+ movdqu (%r8), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %rdx
+ # %%edx == 0xFFFF then return 1 else => return 0
+ xorl %ebx, %ebx
+ cmpl $0xffff, %edx
+ sete %bl
+L_AES_GCM_decrypt_cmp_tag_done:
+ movl %ebx, (%rbp)
+ addq $0xa8, %rsp
+ popq %rbp
+ popq %r15
+ popq %r14
+ popq %rbx
+ popq %r12
+ popq %r13
+ repz retq
+#ifndef __APPLE__
+.size AES_GCM_decrypt,.-AES_GCM_decrypt
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX1
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_aes_gcm_one:
+.quad 0x0, 0x1
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_aes_gcm_two:
+.quad 0x0, 0x2
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_aes_gcm_three:
+.quad 0x0, 0x3
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_aes_gcm_four:
+.quad 0x0, 0x4
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_aes_gcm_five:
+.quad 0x0, 0x5
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_aes_gcm_six:
+.quad 0x0, 0x6
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_aes_gcm_seven:
+.quad 0x0, 0x7
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_aes_gcm_eight:
+.quad 0x0, 0x8
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_aes_gcm_bswap_epi64:
+.quad 0x1020304050607, 0x8090a0b0c0d0e0f
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_aes_gcm_bswap_mask:
+.quad 0x8090a0b0c0d0e0f, 0x1020304050607
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_aes_gcm_mod2_128:
+.quad 0x1, 0xc200000000000000
+#ifndef __APPLE__
+.text
+.globl AES_GCM_encrypt_avx1
+.type AES_GCM_encrypt_avx1,@function
+.align 4
+AES_GCM_encrypt_avx1:
+#else
+.section __TEXT,__text
+.globl _AES_GCM_encrypt_avx1
+.p2align 2
+_AES_GCM_encrypt_avx1:
+#endif /* __APPLE__ */
+ pushq %r13
+ pushq %r12
+ pushq %rbx
+ pushq %r14
+ pushq %r15
+ movq %rdx, %r12
+ movq %rcx, %rax
+ movl 48(%rsp), %r11d
+ movl 56(%rsp), %ebx
+ movl 64(%rsp), %r14d
+ movq 72(%rsp), %r15
+ movl 80(%rsp), %r10d
+ subq $0xa0, %rsp
+ vpxor %xmm4, %xmm4, %xmm4
+ vpxor %xmm6, %xmm6, %xmm6
+ movl %ebx, %edx
+ cmpl $12, %edx
+ jne L_AES_GCM_encrypt_avx1_iv_not_12
+ # # Calculate values when IV is 12 bytes
+ # Set counter based on IV
+ movl $0x1000000, %ecx
+ vpinsrq $0x00, (%rax), %xmm4, %xmm4
+ vpinsrd $2, 8(%rax), %xmm4, %xmm4
+ vpinsrd $3, %ecx, %xmm4, %xmm4
+ # H = Encrypt X(=0) and T = Encrypt counter
+ vmovdqa (%r15), %xmm5
+ vpxor %xmm5, %xmm4, %xmm1
+ vmovdqa 16(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 32(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 48(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 64(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 80(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 96(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 112(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 128(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 144(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ cmpl $11, %r10d
+ vmovdqa 160(%r15), %xmm7
+ jl L_AES_GCM_encrypt_avx1_calc_iv_12_last
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 176(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ cmpl $13, %r10d
+ vmovdqa 192(%r15), %xmm7
+ jl L_AES_GCM_encrypt_avx1_calc_iv_12_last
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 208(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 224(%r15), %xmm7
+L_AES_GCM_encrypt_avx1_calc_iv_12_last:
+ vaesenclast %xmm7, %xmm5, %xmm5
+ vaesenclast %xmm7, %xmm1, %xmm1
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm5, %xmm5
+ vmovdqa %xmm1, 144(%rsp)
+ jmp L_AES_GCM_encrypt_avx1_iv_done
+L_AES_GCM_encrypt_avx1_iv_not_12:
+ # Calculate values when IV is not 12 bytes
+ # H = Encrypt X(=0)
+ vmovdqa (%r15), %xmm5
+ vaesenc 16(%r15), %xmm5, %xmm5
+ vaesenc 32(%r15), %xmm5, %xmm5
+ vaesenc 48(%r15), %xmm5, %xmm5
+ vaesenc 64(%r15), %xmm5, %xmm5
+ vaesenc 80(%r15), %xmm5, %xmm5
+ vaesenc 96(%r15), %xmm5, %xmm5
+ vaesenc 112(%r15), %xmm5, %xmm5
+ vaesenc 128(%r15), %xmm5, %xmm5
+ vaesenc 144(%r15), %xmm5, %xmm5
+ cmpl $11, %r10d
+ vmovdqa 160(%r15), %xmm9
+ jl L_AES_GCM_encrypt_avx1_calc_iv_1_aesenc_avx_last
+ vaesenc %xmm9, %xmm5, %xmm5
+ vaesenc 176(%r15), %xmm5, %xmm5
+ cmpl $13, %r10d
+ vmovdqa 192(%r15), %xmm9
+ jl L_AES_GCM_encrypt_avx1_calc_iv_1_aesenc_avx_last
+ vaesenc %xmm9, %xmm5, %xmm5
+ vaesenc 208(%r15), %xmm5, %xmm5
+ vmovdqa 224(%r15), %xmm9
+L_AES_GCM_encrypt_avx1_calc_iv_1_aesenc_avx_last:
+ vaesenclast %xmm9, %xmm5, %xmm5
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm5, %xmm5
+ # Calc counter
+ # Initialization vector
+ cmpl $0x00, %edx
+ movq $0x00, %rcx
+ je L_AES_GCM_encrypt_avx1_calc_iv_done
+ cmpl $16, %edx
+ jl L_AES_GCM_encrypt_avx1_calc_iv_lt16
+ andl $0xfffffff0, %edx
+L_AES_GCM_encrypt_avx1_calc_iv_16_loop:
+ vmovdqu (%rax,%rcx,1), %xmm8
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8
+ vpxor %xmm8, %xmm4, %xmm4
+ # ghash_gfmul_avx
+ vpshufd $0x4e, %xmm4, %xmm1
+ vpshufd $0x4e, %xmm5, %xmm2
+ vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3
+ vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0
+ vpxor %xmm4, %xmm1, %xmm1
+ vpxor %xmm5, %xmm2, %xmm2
+ vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm3, %xmm1, %xmm1
+ vmovdqa %xmm0, %xmm7
+ vmovdqa %xmm3, %xmm4
+ vpslldq $8, %xmm1, %xmm2
+ vpsrldq $8, %xmm1, %xmm1
+ vpxor %xmm2, %xmm7, %xmm7
+ vpxor %xmm1, %xmm4, %xmm4
+ vpsrld $31, %xmm7, %xmm0
+ vpsrld $31, %xmm4, %xmm1
+ vpslld $0x01, %xmm7, %xmm7
+ vpslld $0x01, %xmm4, %xmm4
+ vpsrldq $12, %xmm0, %xmm2
+ vpslldq $4, %xmm0, %xmm0
+ vpslldq $4, %xmm1, %xmm1
+ vpor %xmm2, %xmm4, %xmm4
+ vpor %xmm0, %xmm7, %xmm7
+ vpor %xmm1, %xmm4, %xmm4
+ vpslld $31, %xmm7, %xmm0
+ vpslld $30, %xmm7, %xmm1
+ vpslld $25, %xmm7, %xmm2
+ vpxor %xmm1, %xmm0, %xmm0
+ vpxor %xmm2, %xmm0, %xmm0
+ vmovdqa %xmm0, %xmm1
+ vpsrldq $4, %xmm1, %xmm1
+ vpslldq $12, %xmm0, %xmm0
+ vpxor %xmm0, %xmm7, %xmm7
+ vpsrld $0x01, %xmm7, %xmm2
+ vpsrld $2, %xmm7, %xmm3
+ vpsrld $7, %xmm7, %xmm0
+ vpxor %xmm3, %xmm2, %xmm2
+ vpxor %xmm0, %xmm2, %xmm2
+ vpxor %xmm1, %xmm2, %xmm2
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm2, %xmm4, %xmm4
+ addl $16, %ecx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_encrypt_avx1_calc_iv_16_loop
+ movl %ebx, %edx
+ cmpl %edx, %ecx
+ je L_AES_GCM_encrypt_avx1_calc_iv_done
+L_AES_GCM_encrypt_avx1_calc_iv_lt16:
+ subq $16, %rsp
+ vpxor %xmm8, %xmm8, %xmm8
+ xorl %ebx, %ebx
+ vmovdqa %xmm8, (%rsp)
+L_AES_GCM_encrypt_avx1_calc_iv_loop:
+ movzbl (%rax,%rcx,1), %r13d
+ movb %r13b, (%rsp,%rbx,1)
+ incl %ecx
+ incl %ebx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_encrypt_avx1_calc_iv_loop
+ vmovdqa (%rsp), %xmm8
+ addq $16, %rsp
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8
+ vpxor %xmm8, %xmm4, %xmm4
+ # ghash_gfmul_avx
+ vpshufd $0x4e, %xmm4, %xmm1
+ vpshufd $0x4e, %xmm5, %xmm2
+ vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3
+ vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0
+ vpxor %xmm4, %xmm1, %xmm1
+ vpxor %xmm5, %xmm2, %xmm2
+ vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm3, %xmm1, %xmm1
+ vmovdqa %xmm0, %xmm7
+ vmovdqa %xmm3, %xmm4
+ vpslldq $8, %xmm1, %xmm2
+ vpsrldq $8, %xmm1, %xmm1
+ vpxor %xmm2, %xmm7, %xmm7
+ vpxor %xmm1, %xmm4, %xmm4
+ vpsrld $31, %xmm7, %xmm0
+ vpsrld $31, %xmm4, %xmm1
+ vpslld $0x01, %xmm7, %xmm7
+ vpslld $0x01, %xmm4, %xmm4
+ vpsrldq $12, %xmm0, %xmm2
+ vpslldq $4, %xmm0, %xmm0
+ vpslldq $4, %xmm1, %xmm1
+ vpor %xmm2, %xmm4, %xmm4
+ vpor %xmm0, %xmm7, %xmm7
+ vpor %xmm1, %xmm4, %xmm4
+ vpslld $31, %xmm7, %xmm0
+ vpslld $30, %xmm7, %xmm1
+ vpslld $25, %xmm7, %xmm2
+ vpxor %xmm1, %xmm0, %xmm0
+ vpxor %xmm2, %xmm0, %xmm0
+ vmovdqa %xmm0, %xmm1
+ vpsrldq $4, %xmm1, %xmm1
+ vpslldq $12, %xmm0, %xmm0
+ vpxor %xmm0, %xmm7, %xmm7
+ vpsrld $0x01, %xmm7, %xmm2
+ vpsrld $2, %xmm7, %xmm3
+ vpsrld $7, %xmm7, %xmm0
+ vpxor %xmm3, %xmm2, %xmm2
+ vpxor %xmm0, %xmm2, %xmm2
+ vpxor %xmm1, %xmm2, %xmm2
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm2, %xmm4, %xmm4
+L_AES_GCM_encrypt_avx1_calc_iv_done:
+ # T = Encrypt counter
+ vpxor %xmm0, %xmm0, %xmm0
+ shll $3, %edx
+ vpinsrq $0x00, %rdx, %xmm0, %xmm0
+ vpxor %xmm0, %xmm4, %xmm4
+ # ghash_gfmul_avx
+ vpshufd $0x4e, %xmm4, %xmm1
+ vpshufd $0x4e, %xmm5, %xmm2
+ vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3
+ vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0
+ vpxor %xmm4, %xmm1, %xmm1
+ vpxor %xmm5, %xmm2, %xmm2
+ vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm3, %xmm1, %xmm1
+ vmovdqa %xmm0, %xmm7
+ vmovdqa %xmm3, %xmm4
+ vpslldq $8, %xmm1, %xmm2
+ vpsrldq $8, %xmm1, %xmm1
+ vpxor %xmm2, %xmm7, %xmm7
+ vpxor %xmm1, %xmm4, %xmm4
+ vpsrld $31, %xmm7, %xmm0
+ vpsrld $31, %xmm4, %xmm1
+ vpslld $0x01, %xmm7, %xmm7
+ vpslld $0x01, %xmm4, %xmm4
+ vpsrldq $12, %xmm0, %xmm2
+ vpslldq $4, %xmm0, %xmm0
+ vpslldq $4, %xmm1, %xmm1
+ vpor %xmm2, %xmm4, %xmm4
+ vpor %xmm0, %xmm7, %xmm7
+ vpor %xmm1, %xmm4, %xmm4
+ vpslld $31, %xmm7, %xmm0
+ vpslld $30, %xmm7, %xmm1
+ vpslld $25, %xmm7, %xmm2
+ vpxor %xmm1, %xmm0, %xmm0
+ vpxor %xmm2, %xmm0, %xmm0
+ vmovdqa %xmm0, %xmm1
+ vpsrldq $4, %xmm1, %xmm1
+ vpslldq $12, %xmm0, %xmm0
+ vpxor %xmm0, %xmm7, %xmm7
+ vpsrld $0x01, %xmm7, %xmm2
+ vpsrld $2, %xmm7, %xmm3
+ vpsrld $7, %xmm7, %xmm0
+ vpxor %xmm3, %xmm2, %xmm2
+ vpxor %xmm0, %xmm2, %xmm2
+ vpxor %xmm1, %xmm2, %xmm2
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm2, %xmm4, %xmm4
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4
+ # Encrypt counter
+ vmovdqa (%r15), %xmm8
+ vpxor %xmm4, %xmm8, %xmm8
+ vaesenc 16(%r15), %xmm8, %xmm8
+ vaesenc 32(%r15), %xmm8, %xmm8
+ vaesenc 48(%r15), %xmm8, %xmm8
+ vaesenc 64(%r15), %xmm8, %xmm8
+ vaesenc 80(%r15), %xmm8, %xmm8
+ vaesenc 96(%r15), %xmm8, %xmm8
+ vaesenc 112(%r15), %xmm8, %xmm8
+ vaesenc 128(%r15), %xmm8, %xmm8
+ vaesenc 144(%r15), %xmm8, %xmm8
+ cmpl $11, %r10d
+ vmovdqa 160(%r15), %xmm9
+ jl L_AES_GCM_encrypt_avx1_calc_iv_2_aesenc_avx_last
+ vaesenc %xmm9, %xmm8, %xmm8
+ vaesenc 176(%r15), %xmm8, %xmm8
+ cmpl $13, %r10d
+ vmovdqa 192(%r15), %xmm9
+ jl L_AES_GCM_encrypt_avx1_calc_iv_2_aesenc_avx_last
+ vaesenc %xmm9, %xmm8, %xmm8
+ vaesenc 208(%r15), %xmm8, %xmm8
+ vmovdqa 224(%r15), %xmm9
+L_AES_GCM_encrypt_avx1_calc_iv_2_aesenc_avx_last:
+ vaesenclast %xmm9, %xmm8, %xmm8
+ vmovdqa %xmm8, 144(%rsp)
+L_AES_GCM_encrypt_avx1_iv_done:
+ # Additional authentication data
+ movl %r11d, %edx
+ cmpl $0x00, %edx
+ je L_AES_GCM_encrypt_avx1_calc_aad_done
+ xorl %ecx, %ecx
+ cmpl $16, %edx
+ jl L_AES_GCM_encrypt_avx1_calc_aad_lt16
+ andl $0xfffffff0, %edx
+L_AES_GCM_encrypt_avx1_calc_aad_16_loop:
+ vmovdqu (%r12,%rcx,1), %xmm8
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8
+ vpxor %xmm8, %xmm6, %xmm6
+ # ghash_gfmul_avx
+ vpshufd $0x4e, %xmm6, %xmm1
+ vpshufd $0x4e, %xmm5, %xmm2
+ vpclmulqdq $0x11, %xmm6, %xmm5, %xmm3
+ vpclmulqdq $0x00, %xmm6, %xmm5, %xmm0
+ vpxor %xmm6, %xmm1, %xmm1
+ vpxor %xmm5, %xmm2, %xmm2
+ vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm3, %xmm1, %xmm1
+ vmovdqa %xmm0, %xmm7
+ vmovdqa %xmm3, %xmm6
+ vpslldq $8, %xmm1, %xmm2
+ vpsrldq $8, %xmm1, %xmm1
+ vpxor %xmm2, %xmm7, %xmm7
+ vpxor %xmm1, %xmm6, %xmm6
+ vpsrld $31, %xmm7, %xmm0
+ vpsrld $31, %xmm6, %xmm1
+ vpslld $0x01, %xmm7, %xmm7
+ vpslld $0x01, %xmm6, %xmm6
+ vpsrldq $12, %xmm0, %xmm2
+ vpslldq $4, %xmm0, %xmm0
+ vpslldq $4, %xmm1, %xmm1
+ vpor %xmm2, %xmm6, %xmm6
+ vpor %xmm0, %xmm7, %xmm7
+ vpor %xmm1, %xmm6, %xmm6
+ vpslld $31, %xmm7, %xmm0
+ vpslld $30, %xmm7, %xmm1
+ vpslld $25, %xmm7, %xmm2
+ vpxor %xmm1, %xmm0, %xmm0
+ vpxor %xmm2, %xmm0, %xmm0
+ vmovdqa %xmm0, %xmm1
+ vpsrldq $4, %xmm1, %xmm1
+ vpslldq $12, %xmm0, %xmm0
+ vpxor %xmm0, %xmm7, %xmm7
+ vpsrld $0x01, %xmm7, %xmm2
+ vpsrld $2, %xmm7, %xmm3
+ vpsrld $7, %xmm7, %xmm0
+ vpxor %xmm3, %xmm2, %xmm2
+ vpxor %xmm0, %xmm2, %xmm2
+ vpxor %xmm1, %xmm2, %xmm2
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm2, %xmm6, %xmm6
+ addl $16, %ecx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_encrypt_avx1_calc_aad_16_loop
+ movl %r11d, %edx
+ cmpl %edx, %ecx
+ je L_AES_GCM_encrypt_avx1_calc_aad_done
+L_AES_GCM_encrypt_avx1_calc_aad_lt16:
+ subq $16, %rsp
+ vpxor %xmm8, %xmm8, %xmm8
+ xorl %ebx, %ebx
+ vmovdqa %xmm8, (%rsp)
+L_AES_GCM_encrypt_avx1_calc_aad_loop:
+ movzbl (%r12,%rcx,1), %r13d
+ movb %r13b, (%rsp,%rbx,1)
+ incl %ecx
+ incl %ebx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_encrypt_avx1_calc_aad_loop
+ vmovdqa (%rsp), %xmm8
+ addq $16, %rsp
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8
+ vpxor %xmm8, %xmm6, %xmm6
+ # ghash_gfmul_avx
+ vpshufd $0x4e, %xmm6, %xmm1
+ vpshufd $0x4e, %xmm5, %xmm2
+ vpclmulqdq $0x11, %xmm6, %xmm5, %xmm3
+ vpclmulqdq $0x00, %xmm6, %xmm5, %xmm0
+ vpxor %xmm6, %xmm1, %xmm1
+ vpxor %xmm5, %xmm2, %xmm2
+ vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm3, %xmm1, %xmm1
+ vmovdqa %xmm0, %xmm7
+ vmovdqa %xmm3, %xmm6
+ vpslldq $8, %xmm1, %xmm2
+ vpsrldq $8, %xmm1, %xmm1
+ vpxor %xmm2, %xmm7, %xmm7
+ vpxor %xmm1, %xmm6, %xmm6
+ vpsrld $31, %xmm7, %xmm0
+ vpsrld $31, %xmm6, %xmm1
+ vpslld $0x01, %xmm7, %xmm7
+ vpslld $0x01, %xmm6, %xmm6
+ vpsrldq $12, %xmm0, %xmm2
+ vpslldq $4, %xmm0, %xmm0
+ vpslldq $4, %xmm1, %xmm1
+ vpor %xmm2, %xmm6, %xmm6
+ vpor %xmm0, %xmm7, %xmm7
+ vpor %xmm1, %xmm6, %xmm6
+ vpslld $31, %xmm7, %xmm0
+ vpslld $30, %xmm7, %xmm1
+ vpslld $25, %xmm7, %xmm2
+ vpxor %xmm1, %xmm0, %xmm0
+ vpxor %xmm2, %xmm0, %xmm0
+ vmovdqa %xmm0, %xmm1
+ vpsrldq $4, %xmm1, %xmm1
+ vpslldq $12, %xmm0, %xmm0
+ vpxor %xmm0, %xmm7, %xmm7
+ vpsrld $0x01, %xmm7, %xmm2
+ vpsrld $2, %xmm7, %xmm3
+ vpsrld $7, %xmm7, %xmm0
+ vpxor %xmm3, %xmm2, %xmm2
+ vpxor %xmm0, %xmm2, %xmm2
+ vpxor %xmm1, %xmm2, %xmm2
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm2, %xmm6, %xmm6
+L_AES_GCM_encrypt_avx1_calc_aad_done:
+ # Calculate counter and H
+ vpsrlq $63, %xmm5, %xmm9
+ vpsllq $0x01, %xmm5, %xmm8
+ vpslldq $8, %xmm9, %xmm9
+ vpor %xmm9, %xmm8, %xmm8
+ vpshufd $0xff, %xmm5, %xmm5
+ vpsrad $31, %xmm5, %xmm5
+ vpshufb L_avx1_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm4
+ vpand L_avx1_aes_gcm_mod2_128(%rip), %xmm5, %xmm5
+ vpaddd L_avx1_aes_gcm_one(%rip), %xmm4, %xmm4
+ vpxor %xmm8, %xmm5, %xmm5
+ vmovdqa %xmm4, 128(%rsp)
+ xorl %ebx, %ebx
+ cmpl $0x80, %r9d
+ movl %r9d, %r13d
+ jl L_AES_GCM_encrypt_avx1_done_128
+ andl $0xffffff80, %r13d
+ vmovdqa %xmm6, %xmm2
+ # H ^ 1
+ vmovdqa %xmm5, (%rsp)
+ # H ^ 2
+ vpclmulqdq $0x00, %xmm5, %xmm5, %xmm8
+ vpclmulqdq $0x11, %xmm5, %xmm5, %xmm0
+ vpslld $31, %xmm8, %xmm12
+ vpslld $30, %xmm8, %xmm13
+ vpslld $25, %xmm8, %xmm14
+ vpxor %xmm13, %xmm12, %xmm12
+ vpxor %xmm14, %xmm12, %xmm12
+ vpsrldq $4, %xmm12, %xmm13
+ vpslldq $12, %xmm12, %xmm12
+ vpxor %xmm12, %xmm8, %xmm8
+ vpsrld $0x01, %xmm8, %xmm14
+ vpsrld $2, %xmm8, %xmm10
+ vpsrld $7, %xmm8, %xmm9
+ vpxor %xmm10, %xmm14, %xmm14
+ vpxor %xmm9, %xmm14, %xmm14
+ vpxor %xmm13, %xmm14, %xmm14
+ vpxor %xmm8, %xmm14, %xmm14
+ vpxor %xmm14, %xmm0, %xmm0
+ vmovdqa %xmm0, 16(%rsp)
+ # H ^ 3
+ # ghash_gfmul_red_avx
+ vpshufd $0x4e, %xmm5, %xmm9
+ vpshufd $0x4e, %xmm0, %xmm10
+ vpclmulqdq $0x11, %xmm5, %xmm0, %xmm11
+ vpclmulqdq $0x00, %xmm5, %xmm0, %xmm8
+ vpxor %xmm5, %xmm9, %xmm9
+ vpxor %xmm0, %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9
+ vpxor %xmm8, %xmm9, %xmm9
+ vpxor %xmm11, %xmm9, %xmm9
+ vpslldq $8, %xmm9, %xmm10
+ vpsrldq $8, %xmm9, %xmm9
+ vpxor %xmm10, %xmm8, %xmm8
+ vpxor %xmm9, %xmm11, %xmm1
+ vpslld $31, %xmm8, %xmm12
+ vpslld $30, %xmm8, %xmm13
+ vpslld $25, %xmm8, %xmm14
+ vpxor %xmm13, %xmm12, %xmm12
+ vpxor %xmm14, %xmm12, %xmm12
+ vpsrldq $4, %xmm12, %xmm13
+ vpslldq $12, %xmm12, %xmm12
+ vpxor %xmm12, %xmm8, %xmm8
+ vpsrld $0x01, %xmm8, %xmm14
+ vpsrld $2, %xmm8, %xmm10
+ vpsrld $7, %xmm8, %xmm9
+ vpxor %xmm10, %xmm14, %xmm14
+ vpxor %xmm9, %xmm14, %xmm14
+ vpxor %xmm13, %xmm14, %xmm14
+ vpxor %xmm8, %xmm14, %xmm14
+ vpxor %xmm14, %xmm1, %xmm1
+ vmovdqa %xmm1, 32(%rsp)
+ # H ^ 4
+ vpclmulqdq $0x00, %xmm0, %xmm0, %xmm8
+ vpclmulqdq $0x11, %xmm0, %xmm0, %xmm3
+ vpslld $31, %xmm8, %xmm12
+ vpslld $30, %xmm8, %xmm13
+ vpslld $25, %xmm8, %xmm14
+ vpxor %xmm13, %xmm12, %xmm12
+ vpxor %xmm14, %xmm12, %xmm12
+ vpsrldq $4, %xmm12, %xmm13
+ vpslldq $12, %xmm12, %xmm12
+ vpxor %xmm12, %xmm8, %xmm8
+ vpsrld $0x01, %xmm8, %xmm14
+ vpsrld $2, %xmm8, %xmm10
+ vpsrld $7, %xmm8, %xmm9
+ vpxor %xmm10, %xmm14, %xmm14
+ vpxor %xmm9, %xmm14, %xmm14
+ vpxor %xmm13, %xmm14, %xmm14
+ vpxor %xmm8, %xmm14, %xmm14
+ vpxor %xmm14, %xmm3, %xmm3
+ vmovdqa %xmm3, 48(%rsp)
+ # H ^ 5
+ # ghash_gfmul_red_avx
+ vpshufd $0x4e, %xmm0, %xmm9
+ vpshufd $0x4e, %xmm1, %xmm10
+ vpclmulqdq $0x11, %xmm0, %xmm1, %xmm11
+ vpclmulqdq $0x00, %xmm0, %xmm1, %xmm8
+ vpxor %xmm0, %xmm9, %xmm9
+ vpxor %xmm1, %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9
+ vpxor %xmm8, %xmm9, %xmm9
+ vpxor %xmm11, %xmm9, %xmm9
+ vpslldq $8, %xmm9, %xmm10
+ vpsrldq $8, %xmm9, %xmm9
+ vpxor %xmm10, %xmm8, %xmm8
+ vpxor %xmm9, %xmm11, %xmm7
+ vpslld $31, %xmm8, %xmm12
+ vpslld $30, %xmm8, %xmm13
+ vpslld $25, %xmm8, %xmm14
+ vpxor %xmm13, %xmm12, %xmm12
+ vpxor %xmm14, %xmm12, %xmm12
+ vpsrldq $4, %xmm12, %xmm13
+ vpslldq $12, %xmm12, %xmm12
+ vpxor %xmm12, %xmm8, %xmm8
+ vpsrld $0x01, %xmm8, %xmm14
+ vpsrld $2, %xmm8, %xmm10
+ vpsrld $7, %xmm8, %xmm9
+ vpxor %xmm10, %xmm14, %xmm14
+ vpxor %xmm9, %xmm14, %xmm14
+ vpxor %xmm13, %xmm14, %xmm14
+ vpxor %xmm8, %xmm14, %xmm14
+ vpxor %xmm14, %xmm7, %xmm7
+ vmovdqa %xmm7, 64(%rsp)
+ # H ^ 6
+ vpclmulqdq $0x00, %xmm1, %xmm1, %xmm8
+ vpclmulqdq $0x11, %xmm1, %xmm1, %xmm7
+ vpslld $31, %xmm8, %xmm12
+ vpslld $30, %xmm8, %xmm13
+ vpslld $25, %xmm8, %xmm14
+ vpxor %xmm13, %xmm12, %xmm12
+ vpxor %xmm14, %xmm12, %xmm12
+ vpsrldq $4, %xmm12, %xmm13
+ vpslldq $12, %xmm12, %xmm12
+ vpxor %xmm12, %xmm8, %xmm8
+ vpsrld $0x01, %xmm8, %xmm14
+ vpsrld $2, %xmm8, %xmm10
+ vpsrld $7, %xmm8, %xmm9
+ vpxor %xmm10, %xmm14, %xmm14
+ vpxor %xmm9, %xmm14, %xmm14
+ vpxor %xmm13, %xmm14, %xmm14
+ vpxor %xmm8, %xmm14, %xmm14
+ vpxor %xmm14, %xmm7, %xmm7
+ vmovdqa %xmm7, 80(%rsp)
+ # H ^ 7
+ # ghash_gfmul_red_avx
+ vpshufd $0x4e, %xmm1, %xmm9
+ vpshufd $0x4e, %xmm3, %xmm10
+ vpclmulqdq $0x11, %xmm1, %xmm3, %xmm11
+ vpclmulqdq $0x00, %xmm1, %xmm3, %xmm8
+ vpxor %xmm1, %xmm9, %xmm9
+ vpxor %xmm3, %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9
+ vpxor %xmm8, %xmm9, %xmm9
+ vpxor %xmm11, %xmm9, %xmm9
+ vpslldq $8, %xmm9, %xmm10
+ vpsrldq $8, %xmm9, %xmm9
+ vpxor %xmm10, %xmm8, %xmm8
+ vpxor %xmm9, %xmm11, %xmm7
+ vpslld $31, %xmm8, %xmm12
+ vpslld $30, %xmm8, %xmm13
+ vpslld $25, %xmm8, %xmm14
+ vpxor %xmm13, %xmm12, %xmm12
+ vpxor %xmm14, %xmm12, %xmm12
+ vpsrldq $4, %xmm12, %xmm13
+ vpslldq $12, %xmm12, %xmm12
+ vpxor %xmm12, %xmm8, %xmm8
+ vpsrld $0x01, %xmm8, %xmm14
+ vpsrld $2, %xmm8, %xmm10
+ vpsrld $7, %xmm8, %xmm9
+ vpxor %xmm10, %xmm14, %xmm14
+ vpxor %xmm9, %xmm14, %xmm14
+ vpxor %xmm13, %xmm14, %xmm14
+ vpxor %xmm8, %xmm14, %xmm14
+ vpxor %xmm14, %xmm7, %xmm7
+ vmovdqa %xmm7, 96(%rsp)
+ # H ^ 8
+ vpclmulqdq $0x00, %xmm3, %xmm3, %xmm8
+ vpclmulqdq $0x11, %xmm3, %xmm3, %xmm7
+ vpslld $31, %xmm8, %xmm12
+ vpslld $30, %xmm8, %xmm13
+ vpslld $25, %xmm8, %xmm14
+ vpxor %xmm13, %xmm12, %xmm12
+ vpxor %xmm14, %xmm12, %xmm12
+ vpsrldq $4, %xmm12, %xmm13
+ vpslldq $12, %xmm12, %xmm12
+ vpxor %xmm12, %xmm8, %xmm8
+ vpsrld $0x01, %xmm8, %xmm14
+ vpsrld $2, %xmm8, %xmm10
+ vpsrld $7, %xmm8, %xmm9
+ vpxor %xmm10, %xmm14, %xmm14
+ vpxor %xmm9, %xmm14, %xmm14
+ vpxor %xmm13, %xmm14, %xmm14
+ vpxor %xmm8, %xmm14, %xmm14
+ vpxor %xmm14, %xmm7, %xmm7
+ vmovdqa %xmm7, 112(%rsp)
+ # First 128 bytes of input
+ vmovdqa 128(%rsp), %xmm0
+ vmovdqa L_avx1_aes_gcm_bswap_epi64(%rip), %xmm1
+ vpshufb %xmm1, %xmm0, %xmm8
+ vpaddd L_avx1_aes_gcm_one(%rip), %xmm0, %xmm9
+ vpshufb %xmm1, %xmm9, %xmm9
+ vpaddd L_avx1_aes_gcm_two(%rip), %xmm0, %xmm10
+ vpshufb %xmm1, %xmm10, %xmm10
+ vpaddd L_avx1_aes_gcm_three(%rip), %xmm0, %xmm11
+ vpshufb %xmm1, %xmm11, %xmm11
+ vpaddd L_avx1_aes_gcm_four(%rip), %xmm0, %xmm12
+ vpshufb %xmm1, %xmm12, %xmm12
+ vpaddd L_avx1_aes_gcm_five(%rip), %xmm0, %xmm13
+ vpshufb %xmm1, %xmm13, %xmm13
+ vpaddd L_avx1_aes_gcm_six(%rip), %xmm0, %xmm14
+ vpshufb %xmm1, %xmm14, %xmm14
+ vpaddd L_avx1_aes_gcm_seven(%rip), %xmm0, %xmm15
+ vpshufb %xmm1, %xmm15, %xmm15
+ vpaddd L_avx1_aes_gcm_eight(%rip), %xmm0, %xmm0
+ vmovdqa (%r15), %xmm7
+ vmovdqa %xmm0, 128(%rsp)
+ vpxor %xmm7, %xmm8, %xmm8
+ vpxor %xmm7, %xmm9, %xmm9
+ vpxor %xmm7, %xmm10, %xmm10
+ vpxor %xmm7, %xmm11, %xmm11
+ vpxor %xmm7, %xmm12, %xmm12
+ vpxor %xmm7, %xmm13, %xmm13
+ vpxor %xmm7, %xmm14, %xmm14
+ vpxor %xmm7, %xmm15, %xmm15
+ vmovdqa 16(%r15), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 32(%r15), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 48(%r15), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 64(%r15), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 80(%r15), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 96(%r15), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 112(%r15), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 128(%r15), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 144(%r15), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ cmpl $11, %r10d
+ vmovdqa 160(%r15), %xmm7
+ jl L_AES_GCM_encrypt_avx1_aesenc_128_enc_done
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 176(%r15), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ cmpl $13, %r10d
+ vmovdqa 192(%r15), %xmm7
+ jl L_AES_GCM_encrypt_avx1_aesenc_128_enc_done
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 208(%r15), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 224(%r15), %xmm7
+L_AES_GCM_encrypt_avx1_aesenc_128_enc_done:
+ vaesenclast %xmm7, %xmm8, %xmm8
+ vaesenclast %xmm7, %xmm9, %xmm9
+ vmovdqu (%rdi), %xmm0
+ vmovdqu 16(%rdi), %xmm1
+ vpxor %xmm0, %xmm8, %xmm8
+ vpxor %xmm1, %xmm9, %xmm9
+ vmovdqu %xmm8, (%rsi)
+ vmovdqu %xmm9, 16(%rsi)
+ vaesenclast %xmm7, %xmm10, %xmm10
+ vaesenclast %xmm7, %xmm11, %xmm11
+ vmovdqu 32(%rdi), %xmm0
+ vmovdqu 48(%rdi), %xmm1
+ vpxor %xmm0, %xmm10, %xmm10
+ vpxor %xmm1, %xmm11, %xmm11
+ vmovdqu %xmm10, 32(%rsi)
+ vmovdqu %xmm11, 48(%rsi)
+ vaesenclast %xmm7, %xmm12, %xmm12
+ vaesenclast %xmm7, %xmm13, %xmm13
+ vmovdqu 64(%rdi), %xmm0
+ vmovdqu 80(%rdi), %xmm1
+ vpxor %xmm0, %xmm12, %xmm12
+ vpxor %xmm1, %xmm13, %xmm13
+ vmovdqu %xmm12, 64(%rsi)
+ vmovdqu %xmm13, 80(%rsi)
+ vaesenclast %xmm7, %xmm14, %xmm14
+ vaesenclast %xmm7, %xmm15, %xmm15
+ vmovdqu 96(%rdi), %xmm0
+ vmovdqu 112(%rdi), %xmm1
+ vpxor %xmm0, %xmm14, %xmm14
+ vpxor %xmm1, %xmm15, %xmm15
+ vmovdqu %xmm14, 96(%rsi)
+ vmovdqu %xmm15, 112(%rsi)
+ cmpl $0x80, %r13d
+ movl $0x80, %ebx
+ jle L_AES_GCM_encrypt_avx1_end_128
+ # More 128 bytes of input
+L_AES_GCM_encrypt_avx1_ghash_128:
+ leaq (%rdi,%rbx,1), %rcx
+ leaq (%rsi,%rbx,1), %rdx
+ vmovdqa 128(%rsp), %xmm0
+ vmovdqa L_avx1_aes_gcm_bswap_epi64(%rip), %xmm1
+ vpshufb %xmm1, %xmm0, %xmm8
+ vpaddd L_avx1_aes_gcm_one(%rip), %xmm0, %xmm9
+ vpshufb %xmm1, %xmm9, %xmm9
+ vpaddd L_avx1_aes_gcm_two(%rip), %xmm0, %xmm10
+ vpshufb %xmm1, %xmm10, %xmm10
+ vpaddd L_avx1_aes_gcm_three(%rip), %xmm0, %xmm11
+ vpshufb %xmm1, %xmm11, %xmm11
+ vpaddd L_avx1_aes_gcm_four(%rip), %xmm0, %xmm12
+ vpshufb %xmm1, %xmm12, %xmm12
+ vpaddd L_avx1_aes_gcm_five(%rip), %xmm0, %xmm13
+ vpshufb %xmm1, %xmm13, %xmm13
+ vpaddd L_avx1_aes_gcm_six(%rip), %xmm0, %xmm14
+ vpshufb %xmm1, %xmm14, %xmm14
+ vpaddd L_avx1_aes_gcm_seven(%rip), %xmm0, %xmm15
+ vpshufb %xmm1, %xmm15, %xmm15
+ vpaddd L_avx1_aes_gcm_eight(%rip), %xmm0, %xmm0
+ vmovdqa (%r15), %xmm7
+ vmovdqa %xmm0, 128(%rsp)
+ vpxor %xmm7, %xmm8, %xmm8
+ vpxor %xmm7, %xmm9, %xmm9
+ vpxor %xmm7, %xmm10, %xmm10
+ vpxor %xmm7, %xmm11, %xmm11
+ vpxor %xmm7, %xmm12, %xmm12
+ vpxor %xmm7, %xmm13, %xmm13
+ vpxor %xmm7, %xmm14, %xmm14
+ vpxor %xmm7, %xmm15, %xmm15
+ vmovdqa 112(%rsp), %xmm7
+ vmovdqu -128(%rdx), %xmm0
+ vaesenc 16(%r15), %xmm8, %xmm8
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vpxor %xmm2, %xmm0, %xmm0
+ vpshufd $0x4e, %xmm7, %xmm1
+ vpshufd $0x4e, %xmm0, %xmm5
+ vpxor %xmm7, %xmm1, %xmm1
+ vpxor %xmm0, %xmm5, %xmm5
+ vpclmulqdq $0x11, %xmm7, %xmm0, %xmm3
+ vaesenc 16(%r15), %xmm9, %xmm9
+ vaesenc 16(%r15), %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm7, %xmm0, %xmm2
+ vaesenc 16(%r15), %xmm11, %xmm11
+ vaesenc 16(%r15), %xmm12, %xmm12
+ vpclmulqdq $0x00, %xmm5, %xmm1, %xmm1
+ vaesenc 16(%r15), %xmm13, %xmm13
+ vaesenc 16(%r15), %xmm14, %xmm14
+ vaesenc 16(%r15), %xmm15, %xmm15
+ vpxor %xmm2, %xmm1, %xmm1
+ vpxor %xmm3, %xmm1, %xmm1
+ vmovdqa 96(%rsp), %xmm7
+ vmovdqu -112(%rdx), %xmm0
+ vpshufd $0x4e, %xmm7, %xmm4
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vaesenc 32(%r15), %xmm8, %xmm8
+ vpxor %xmm7, %xmm4, %xmm4
+ vpshufd $0x4e, %xmm0, %xmm5
+ vpxor %xmm0, %xmm5, %xmm5
+ vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6
+ vaesenc 32(%r15), %xmm9, %xmm9
+ vaesenc 32(%r15), %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7
+ vaesenc 32(%r15), %xmm11, %xmm11
+ vaesenc 32(%r15), %xmm12, %xmm12
+ vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4
+ vaesenc 32(%r15), %xmm13, %xmm13
+ vaesenc 32(%r15), %xmm14, %xmm14
+ vaesenc 32(%r15), %xmm15, %xmm15
+ vpxor %xmm7, %xmm1, %xmm1
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm6, %xmm1, %xmm1
+ vpxor %xmm6, %xmm3, %xmm3
+ vpxor %xmm4, %xmm1, %xmm1
+ vmovdqa 80(%rsp), %xmm7
+ vmovdqu -96(%rdx), %xmm0
+ vpshufd $0x4e, %xmm7, %xmm4
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vaesenc 48(%r15), %xmm8, %xmm8
+ vpxor %xmm7, %xmm4, %xmm4
+ vpshufd $0x4e, %xmm0, %xmm5
+ vpxor %xmm0, %xmm5, %xmm5
+ vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6
+ vaesenc 48(%r15), %xmm9, %xmm9
+ vaesenc 48(%r15), %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7
+ vaesenc 48(%r15), %xmm11, %xmm11
+ vaesenc 48(%r15), %xmm12, %xmm12
+ vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4
+ vaesenc 48(%r15), %xmm13, %xmm13
+ vaesenc 48(%r15), %xmm14, %xmm14
+ vaesenc 48(%r15), %xmm15, %xmm15
+ vpxor %xmm7, %xmm1, %xmm1
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm6, %xmm1, %xmm1
+ vpxor %xmm6, %xmm3, %xmm3
+ vpxor %xmm4, %xmm1, %xmm1
+ vmovdqa 64(%rsp), %xmm7
+ vmovdqu -80(%rdx), %xmm0
+ vpshufd $0x4e, %xmm7, %xmm4
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vaesenc 64(%r15), %xmm8, %xmm8
+ vpxor %xmm7, %xmm4, %xmm4
+ vpshufd $0x4e, %xmm0, %xmm5
+ vpxor %xmm0, %xmm5, %xmm5
+ vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6
+ vaesenc 64(%r15), %xmm9, %xmm9
+ vaesenc 64(%r15), %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7
+ vaesenc 64(%r15), %xmm11, %xmm11
+ vaesenc 64(%r15), %xmm12, %xmm12
+ vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4
+ vaesenc 64(%r15), %xmm13, %xmm13
+ vaesenc 64(%r15), %xmm14, %xmm14
+ vaesenc 64(%r15), %xmm15, %xmm15
+ vpxor %xmm7, %xmm1, %xmm1
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm6, %xmm1, %xmm1
+ vpxor %xmm6, %xmm3, %xmm3
+ vpxor %xmm4, %xmm1, %xmm1
+ vmovdqa 48(%rsp), %xmm7
+ vmovdqu -64(%rdx), %xmm0
+ vpshufd $0x4e, %xmm7, %xmm4
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vaesenc 80(%r15), %xmm8, %xmm8
+ vpxor %xmm7, %xmm4, %xmm4
+ vpshufd $0x4e, %xmm0, %xmm5
+ vpxor %xmm0, %xmm5, %xmm5
+ vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6
+ vaesenc 80(%r15), %xmm9, %xmm9
+ vaesenc 80(%r15), %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7
+ vaesenc 80(%r15), %xmm11, %xmm11
+ vaesenc 80(%r15), %xmm12, %xmm12
+ vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4
+ vaesenc 80(%r15), %xmm13, %xmm13
+ vaesenc 80(%r15), %xmm14, %xmm14
+ vaesenc 80(%r15), %xmm15, %xmm15
+ vpxor %xmm7, %xmm1, %xmm1
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm6, %xmm1, %xmm1
+ vpxor %xmm6, %xmm3, %xmm3
+ vpxor %xmm4, %xmm1, %xmm1
+ vmovdqa 32(%rsp), %xmm7
+ vmovdqu -48(%rdx), %xmm0
+ vpshufd $0x4e, %xmm7, %xmm4
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vaesenc 96(%r15), %xmm8, %xmm8
+ vpxor %xmm7, %xmm4, %xmm4
+ vpshufd $0x4e, %xmm0, %xmm5
+ vpxor %xmm0, %xmm5, %xmm5
+ vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6
+ vaesenc 96(%r15), %xmm9, %xmm9
+ vaesenc 96(%r15), %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7
+ vaesenc 96(%r15), %xmm11, %xmm11
+ vaesenc 96(%r15), %xmm12, %xmm12
+ vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4
+ vaesenc 96(%r15), %xmm13, %xmm13
+ vaesenc 96(%r15), %xmm14, %xmm14
+ vaesenc 96(%r15), %xmm15, %xmm15
+ vpxor %xmm7, %xmm1, %xmm1
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm6, %xmm1, %xmm1
+ vpxor %xmm6, %xmm3, %xmm3
+ vpxor %xmm4, %xmm1, %xmm1
+ vmovdqa 16(%rsp), %xmm7
+ vmovdqu -32(%rdx), %xmm0
+ vpshufd $0x4e, %xmm7, %xmm4
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vaesenc 112(%r15), %xmm8, %xmm8
+ vpxor %xmm7, %xmm4, %xmm4
+ vpshufd $0x4e, %xmm0, %xmm5
+ vpxor %xmm0, %xmm5, %xmm5
+ vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6
+ vaesenc 112(%r15), %xmm9, %xmm9
+ vaesenc 112(%r15), %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7
+ vaesenc 112(%r15), %xmm11, %xmm11
+ vaesenc 112(%r15), %xmm12, %xmm12
+ vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4
+ vaesenc 112(%r15), %xmm13, %xmm13
+ vaesenc 112(%r15), %xmm14, %xmm14
+ vaesenc 112(%r15), %xmm15, %xmm15
+ vpxor %xmm7, %xmm1, %xmm1
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm6, %xmm1, %xmm1
+ vpxor %xmm6, %xmm3, %xmm3
+ vpxor %xmm4, %xmm1, %xmm1
+ vmovdqa (%rsp), %xmm7
+ vmovdqu -16(%rdx), %xmm0
+ vpshufd $0x4e, %xmm7, %xmm4
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vaesenc 128(%r15), %xmm8, %xmm8
+ vpxor %xmm7, %xmm4, %xmm4
+ vpshufd $0x4e, %xmm0, %xmm5
+ vpxor %xmm0, %xmm5, %xmm5
+ vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6
+ vaesenc 128(%r15), %xmm9, %xmm9
+ vaesenc 128(%r15), %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7
+ vaesenc 128(%r15), %xmm11, %xmm11
+ vaesenc 128(%r15), %xmm12, %xmm12
+ vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4
+ vaesenc 128(%r15), %xmm13, %xmm13
+ vaesenc 128(%r15), %xmm14, %xmm14
+ vaesenc 128(%r15), %xmm15, %xmm15
+ vpxor %xmm7, %xmm1, %xmm1
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm6, %xmm1, %xmm1
+ vpxor %xmm6, %xmm3, %xmm3
+ vpxor %xmm4, %xmm1, %xmm1
+ vpslldq $8, %xmm1, %xmm5
+ vpsrldq $8, %xmm1, %xmm1
+ vaesenc 144(%r15), %xmm8, %xmm8
+ vpxor %xmm5, %xmm2, %xmm2
+ vpxor %xmm1, %xmm3, %xmm3
+ vaesenc 144(%r15), %xmm9, %xmm9
+ vpslld $31, %xmm2, %xmm7
+ vpslld $30, %xmm2, %xmm4
+ vpslld $25, %xmm2, %xmm5
+ vaesenc 144(%r15), %xmm10, %xmm10
+ vpxor %xmm4, %xmm7, %xmm7
+ vpxor %xmm5, %xmm7, %xmm7
+ vaesenc 144(%r15), %xmm11, %xmm11
+ vpsrldq $4, %xmm7, %xmm4
+ vpslldq $12, %xmm7, %xmm7
+ vaesenc 144(%r15), %xmm12, %xmm12
+ vpxor %xmm7, %xmm2, %xmm2
+ vpsrld $0x01, %xmm2, %xmm5
+ vaesenc 144(%r15), %xmm13, %xmm13
+ vpsrld $2, %xmm2, %xmm1
+ vpsrld $7, %xmm2, %xmm0
+ vaesenc 144(%r15), %xmm14, %xmm14
+ vpxor %xmm1, %xmm5, %xmm5
+ vpxor %xmm0, %xmm5, %xmm5
+ vaesenc 144(%r15), %xmm15, %xmm15
+ vpxor %xmm4, %xmm5, %xmm5
+ vpxor %xmm5, %xmm2, %xmm2
+ vpxor %xmm3, %xmm2, %xmm2
+ cmpl $11, %r10d
+ vmovdqa 160(%r15), %xmm7
+ jl L_AES_GCM_encrypt_avx1_aesenc_128_ghash_avx_done
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 176(%r15), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ cmpl $13, %r10d
+ vmovdqa 192(%r15), %xmm7
+ jl L_AES_GCM_encrypt_avx1_aesenc_128_ghash_avx_done
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 208(%r15), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 224(%r15), %xmm7
+L_AES_GCM_encrypt_avx1_aesenc_128_ghash_avx_done:
+ vaesenclast %xmm7, %xmm8, %xmm8
+ vaesenclast %xmm7, %xmm9, %xmm9
+ vmovdqu (%rcx), %xmm0
+ vmovdqu 16(%rcx), %xmm1
+ vpxor %xmm0, %xmm8, %xmm8
+ vpxor %xmm1, %xmm9, %xmm9
+ vmovdqu %xmm8, (%rdx)
+ vmovdqu %xmm9, 16(%rdx)
+ vaesenclast %xmm7, %xmm10, %xmm10
+ vaesenclast %xmm7, %xmm11, %xmm11
+ vmovdqu 32(%rcx), %xmm0
+ vmovdqu 48(%rcx), %xmm1
+ vpxor %xmm0, %xmm10, %xmm10
+ vpxor %xmm1, %xmm11, %xmm11
+ vmovdqu %xmm10, 32(%rdx)
+ vmovdqu %xmm11, 48(%rdx)
+ vaesenclast %xmm7, %xmm12, %xmm12
+ vaesenclast %xmm7, %xmm13, %xmm13
+ vmovdqu 64(%rcx), %xmm0
+ vmovdqu 80(%rcx), %xmm1
+ vpxor %xmm0, %xmm12, %xmm12
+ vpxor %xmm1, %xmm13, %xmm13
+ vmovdqu %xmm12, 64(%rdx)
+ vmovdqu %xmm13, 80(%rdx)
+ vaesenclast %xmm7, %xmm14, %xmm14
+ vaesenclast %xmm7, %xmm15, %xmm15
+ vmovdqu 96(%rcx), %xmm0
+ vmovdqu 112(%rcx), %xmm1
+ vpxor %xmm0, %xmm14, %xmm14
+ vpxor %xmm1, %xmm15, %xmm15
+ vmovdqu %xmm14, 96(%rdx)
+ vmovdqu %xmm15, 112(%rdx)
+ addl $0x80, %ebx
+ cmpl %r13d, %ebx
+ jl L_AES_GCM_encrypt_avx1_ghash_128
+L_AES_GCM_encrypt_avx1_end_128:
+ vmovdqa L_avx1_aes_gcm_bswap_mask(%rip), %xmm4
+ vpshufb %xmm4, %xmm8, %xmm8
+ vpshufb %xmm4, %xmm9, %xmm9
+ vpshufb %xmm4, %xmm10, %xmm10
+ vpshufb %xmm4, %xmm11, %xmm11
+ vpxor %xmm2, %xmm8, %xmm8
+ vpshufb %xmm4, %xmm12, %xmm12
+ vpshufb %xmm4, %xmm13, %xmm13
+ vpshufb %xmm4, %xmm14, %xmm14
+ vpshufb %xmm4, %xmm15, %xmm15
+ vmovdqa (%rsp), %xmm7
+ vmovdqa 16(%rsp), %xmm5
+ # ghash_gfmul_avx
+ vpshufd $0x4e, %xmm15, %xmm1
+ vpshufd $0x4e, %xmm7, %xmm2
+ vpclmulqdq $0x11, %xmm15, %xmm7, %xmm3
+ vpclmulqdq $0x00, %xmm15, %xmm7, %xmm0
+ vpxor %xmm15, %xmm1, %xmm1
+ vpxor %xmm7, %xmm2, %xmm2
+ vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm3, %xmm1, %xmm1
+ vmovdqa %xmm0, %xmm4
+ vmovdqa %xmm3, %xmm6
+ vpslldq $8, %xmm1, %xmm2
+ vpsrldq $8, %xmm1, %xmm1
+ vpxor %xmm2, %xmm4, %xmm4
+ vpxor %xmm1, %xmm6, %xmm6
+ # ghash_gfmul_xor_avx
+ vpshufd $0x4e, %xmm14, %xmm1
+ vpshufd $0x4e, %xmm5, %xmm2
+ vpclmulqdq $0x11, %xmm14, %xmm5, %xmm3
+ vpclmulqdq $0x00, %xmm14, %xmm5, %xmm0
+ vpxor %xmm14, %xmm1, %xmm1
+ vpxor %xmm5, %xmm2, %xmm2
+ vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm3, %xmm1, %xmm1
+ vpxor %xmm0, %xmm4, %xmm4
+ vpxor %xmm3, %xmm6, %xmm6
+ vpslldq $8, %xmm1, %xmm2
+ vpsrldq $8, %xmm1, %xmm1
+ vpxor %xmm2, %xmm4, %xmm4
+ vpxor %xmm1, %xmm6, %xmm6
+ vmovdqa 32(%rsp), %xmm7
+ vmovdqa 48(%rsp), %xmm5
+ # ghash_gfmul_xor_avx
+ vpshufd $0x4e, %xmm13, %xmm1
+ vpshufd $0x4e, %xmm7, %xmm2
+ vpclmulqdq $0x11, %xmm13, %xmm7, %xmm3
+ vpclmulqdq $0x00, %xmm13, %xmm7, %xmm0
+ vpxor %xmm13, %xmm1, %xmm1
+ vpxor %xmm7, %xmm2, %xmm2
+ vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm3, %xmm1, %xmm1
+ vpxor %xmm0, %xmm4, %xmm4
+ vpxor %xmm3, %xmm6, %xmm6
+ vpslldq $8, %xmm1, %xmm2
+ vpsrldq $8, %xmm1, %xmm1
+ vpxor %xmm2, %xmm4, %xmm4
+ vpxor %xmm1, %xmm6, %xmm6
+ # ghash_gfmul_xor_avx
+ vpshufd $0x4e, %xmm12, %xmm1
+ vpshufd $0x4e, %xmm5, %xmm2
+ vpclmulqdq $0x11, %xmm12, %xmm5, %xmm3
+ vpclmulqdq $0x00, %xmm12, %xmm5, %xmm0
+ vpxor %xmm12, %xmm1, %xmm1
+ vpxor %xmm5, %xmm2, %xmm2
+ vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm3, %xmm1, %xmm1
+ vpxor %xmm0, %xmm4, %xmm4
+ vpxor %xmm3, %xmm6, %xmm6
+ vpslldq $8, %xmm1, %xmm2
+ vpsrldq $8, %xmm1, %xmm1
+ vpxor %xmm2, %xmm4, %xmm4
+ vpxor %xmm1, %xmm6, %xmm6
+ vmovdqa 64(%rsp), %xmm7
+ vmovdqa 80(%rsp), %xmm5
+ # ghash_gfmul_xor_avx
+ vpshufd $0x4e, %xmm11, %xmm1
+ vpshufd $0x4e, %xmm7, %xmm2
+ vpclmulqdq $0x11, %xmm11, %xmm7, %xmm3
+ vpclmulqdq $0x00, %xmm11, %xmm7, %xmm0
+ vpxor %xmm11, %xmm1, %xmm1
+ vpxor %xmm7, %xmm2, %xmm2
+ vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm3, %xmm1, %xmm1
+ vpxor %xmm0, %xmm4, %xmm4
+ vpxor %xmm3, %xmm6, %xmm6
+ vpslldq $8, %xmm1, %xmm2
+ vpsrldq $8, %xmm1, %xmm1
+ vpxor %xmm2, %xmm4, %xmm4
+ vpxor %xmm1, %xmm6, %xmm6
+ # ghash_gfmul_xor_avx
+ vpshufd $0x4e, %xmm10, %xmm1
+ vpshufd $0x4e, %xmm5, %xmm2
+ vpclmulqdq $0x11, %xmm10, %xmm5, %xmm3
+ vpclmulqdq $0x00, %xmm10, %xmm5, %xmm0
+ vpxor %xmm10, %xmm1, %xmm1
+ vpxor %xmm5, %xmm2, %xmm2
+ vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm3, %xmm1, %xmm1
+ vpxor %xmm0, %xmm4, %xmm4
+ vpxor %xmm3, %xmm6, %xmm6
+ vpslldq $8, %xmm1, %xmm2
+ vpsrldq $8, %xmm1, %xmm1
+ vpxor %xmm2, %xmm4, %xmm4
+ vpxor %xmm1, %xmm6, %xmm6
+ vmovdqa 96(%rsp), %xmm7
+ vmovdqa 112(%rsp), %xmm5
+ # ghash_gfmul_xor_avx
+ vpshufd $0x4e, %xmm9, %xmm1
+ vpshufd $0x4e, %xmm7, %xmm2
+ vpclmulqdq $0x11, %xmm9, %xmm7, %xmm3
+ vpclmulqdq $0x00, %xmm9, %xmm7, %xmm0
+ vpxor %xmm9, %xmm1, %xmm1
+ vpxor %xmm7, %xmm2, %xmm2
+ vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm3, %xmm1, %xmm1
+ vpxor %xmm0, %xmm4, %xmm4
+ vpxor %xmm3, %xmm6, %xmm6
+ vpslldq $8, %xmm1, %xmm2
+ vpsrldq $8, %xmm1, %xmm1
+ vpxor %xmm2, %xmm4, %xmm4
+ vpxor %xmm1, %xmm6, %xmm6
+ # ghash_gfmul_xor_avx
+ vpshufd $0x4e, %xmm8, %xmm1
+ vpshufd $0x4e, %xmm5, %xmm2
+ vpclmulqdq $0x11, %xmm8, %xmm5, %xmm3
+ vpclmulqdq $0x00, %xmm8, %xmm5, %xmm0
+ vpxor %xmm8, %xmm1, %xmm1
+ vpxor %xmm5, %xmm2, %xmm2
+ vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm3, %xmm1, %xmm1
+ vpxor %xmm0, %xmm4, %xmm4
+ vpxor %xmm3, %xmm6, %xmm6
+ vpslldq $8, %xmm1, %xmm2
+ vpsrldq $8, %xmm1, %xmm1
+ vpxor %xmm2, %xmm4, %xmm4
+ vpxor %xmm1, %xmm6, %xmm6
+ vpslld $31, %xmm4, %xmm0
+ vpslld $30, %xmm4, %xmm1
+ vpslld $25, %xmm4, %xmm2
+ vpxor %xmm1, %xmm0, %xmm0
+ vpxor %xmm2, %xmm0, %xmm0
+ vmovdqa %xmm0, %xmm1
+ vpsrldq $4, %xmm1, %xmm1
+ vpslldq $12, %xmm0, %xmm0
+ vpxor %xmm0, %xmm4, %xmm4
+ vpsrld $0x01, %xmm4, %xmm2
+ vpsrld $2, %xmm4, %xmm3
+ vpsrld $7, %xmm4, %xmm0
+ vpxor %xmm3, %xmm2, %xmm2
+ vpxor %xmm0, %xmm2, %xmm2
+ vpxor %xmm1, %xmm2, %xmm2
+ vpxor %xmm4, %xmm2, %xmm2
+ vpxor %xmm2, %xmm6, %xmm6
+ vmovdqa (%rsp), %xmm5
+L_AES_GCM_encrypt_avx1_done_128:
+ movl %r9d, %edx
+ cmpl %edx, %ebx
+ jge L_AES_GCM_encrypt_avx1_done_enc
+ movl %r9d, %r13d
+ andl $0xfffffff0, %r13d
+ cmpl %r13d, %ebx
+ jge L_AES_GCM_encrypt_avx1_last_block_done
+ vmovdqa 128(%rsp), %xmm9
+ vpshufb L_avx1_aes_gcm_bswap_epi64(%rip), %xmm9, %xmm8
+ vpaddd L_avx1_aes_gcm_one(%rip), %xmm9, %xmm9
+ vmovdqa %xmm9, 128(%rsp)
+ vpxor (%r15), %xmm8, %xmm8
+ vaesenc 16(%r15), %xmm8, %xmm8
+ vaesenc 32(%r15), %xmm8, %xmm8
+ vaesenc 48(%r15), %xmm8, %xmm8
+ vaesenc 64(%r15), %xmm8, %xmm8
+ vaesenc 80(%r15), %xmm8, %xmm8
+ vaesenc 96(%r15), %xmm8, %xmm8
+ vaesenc 112(%r15), %xmm8, %xmm8
+ vaesenc 128(%r15), %xmm8, %xmm8
+ vaesenc 144(%r15), %xmm8, %xmm8
+ cmpl $11, %r10d
+ vmovdqa 160(%r15), %xmm9
+ jl L_AES_GCM_encrypt_avx1_aesenc_block_last
+ vaesenc %xmm9, %xmm8, %xmm8
+ vaesenc 176(%r15), %xmm8, %xmm8
+ cmpl $13, %r10d
+ vmovdqa 192(%r15), %xmm9
+ jl L_AES_GCM_encrypt_avx1_aesenc_block_last
+ vaesenc %xmm9, %xmm8, %xmm8
+ vaesenc 208(%r15), %xmm8, %xmm8
+ vmovdqa 224(%r15), %xmm9
+L_AES_GCM_encrypt_avx1_aesenc_block_last:
+ vaesenclast %xmm9, %xmm8, %xmm8
+ vmovdqu (%rdi,%rbx,1), %xmm9
+ vpxor %xmm9, %xmm8, %xmm8
+ vmovdqu %xmm8, (%rsi,%rbx,1)
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8
+ vpxor %xmm8, %xmm6, %xmm6
+ addl $16, %ebx
+ cmpl %r13d, %ebx
+ jge L_AES_GCM_encrypt_avx1_last_block_ghash
+L_AES_GCM_encrypt_avx1_last_block_start:
+ vmovdqu (%rdi,%rbx,1), %xmm13
+ vmovdqa 128(%rsp), %xmm9
+ vpshufb L_avx1_aes_gcm_bswap_epi64(%rip), %xmm9, %xmm8
+ vpaddd L_avx1_aes_gcm_one(%rip), %xmm9, %xmm9
+ vmovdqa %xmm9, 128(%rsp)
+ vpxor (%r15), %xmm8, %xmm8
+ vpclmulqdq $16, %xmm5, %xmm6, %xmm10
+ vaesenc 16(%r15), %xmm8, %xmm8
+ vaesenc 32(%r15), %xmm8, %xmm8
+ vpclmulqdq $0x01, %xmm5, %xmm6, %xmm11
+ vaesenc 48(%r15), %xmm8, %xmm8
+ vaesenc 64(%r15), %xmm8, %xmm8
+ vpclmulqdq $0x00, %xmm5, %xmm6, %xmm12
+ vaesenc 80(%r15), %xmm8, %xmm8
+ vpclmulqdq $0x11, %xmm5, %xmm6, %xmm1
+ vaesenc 96(%r15), %xmm8, %xmm8
+ vpxor %xmm11, %xmm10, %xmm10
+ vpslldq $8, %xmm10, %xmm2
+ vpsrldq $8, %xmm10, %xmm10
+ vaesenc 112(%r15), %xmm8, %xmm8
+ vpxor %xmm12, %xmm2, %xmm2
+ vpxor %xmm10, %xmm1, %xmm3
+ vmovdqa L_avx1_aes_gcm_mod2_128(%rip), %xmm0
+ vpclmulqdq $16, %xmm0, %xmm2, %xmm11
+ vaesenc 128(%r15), %xmm8, %xmm8
+ vpshufd $0x4e, %xmm2, %xmm10
+ vpxor %xmm11, %xmm10, %xmm10
+ vpclmulqdq $16, %xmm0, %xmm10, %xmm11
+ vaesenc 144(%r15), %xmm8, %xmm8
+ vpshufd $0x4e, %xmm10, %xmm10
+ vpxor %xmm11, %xmm10, %xmm10
+ vpxor %xmm3, %xmm10, %xmm6
+ cmpl $11, %r10d
+ vmovdqa 160(%r15), %xmm9
+ jl L_AES_GCM_encrypt_avx1_aesenc_gfmul_last
+ vaesenc %xmm9, %xmm8, %xmm8
+ vaesenc 176(%r15), %xmm8, %xmm8
+ cmpl $13, %r10d
+ vmovdqa 192(%r15), %xmm9
+ jl L_AES_GCM_encrypt_avx1_aesenc_gfmul_last
+ vaesenc %xmm9, %xmm8, %xmm8
+ vaesenc 208(%r15), %xmm8, %xmm8
+ vmovdqa 224(%r15), %xmm9
+L_AES_GCM_encrypt_avx1_aesenc_gfmul_last:
+ vaesenclast %xmm9, %xmm8, %xmm8
+ vmovdqa %xmm13, %xmm0
+ vpxor %xmm0, %xmm8, %xmm8
+ vmovdqu %xmm8, (%rsi,%rbx,1)
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8
+ addl $16, %ebx
+ vpxor %xmm8, %xmm6, %xmm6
+ cmpl %r13d, %ebx
+ jl L_AES_GCM_encrypt_avx1_last_block_start
+L_AES_GCM_encrypt_avx1_last_block_ghash:
+ # ghash_gfmul_red_avx
+ vpshufd $0x4e, %xmm5, %xmm9
+ vpshufd $0x4e, %xmm6, %xmm10
+ vpclmulqdq $0x11, %xmm5, %xmm6, %xmm11
+ vpclmulqdq $0x00, %xmm5, %xmm6, %xmm8
+ vpxor %xmm5, %xmm9, %xmm9
+ vpxor %xmm6, %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9
+ vpxor %xmm8, %xmm9, %xmm9
+ vpxor %xmm11, %xmm9, %xmm9
+ vpslldq $8, %xmm9, %xmm10
+ vpsrldq $8, %xmm9, %xmm9
+ vpxor %xmm10, %xmm8, %xmm8
+ vpxor %xmm9, %xmm11, %xmm6
+ vpslld $31, %xmm8, %xmm12
+ vpslld $30, %xmm8, %xmm13
+ vpslld $25, %xmm8, %xmm14
+ vpxor %xmm13, %xmm12, %xmm12
+ vpxor %xmm14, %xmm12, %xmm12
+ vpsrldq $4, %xmm12, %xmm13
+ vpslldq $12, %xmm12, %xmm12
+ vpxor %xmm12, %xmm8, %xmm8
+ vpsrld $0x01, %xmm8, %xmm14
+ vpsrld $2, %xmm8, %xmm10
+ vpsrld $7, %xmm8, %xmm9
+ vpxor %xmm10, %xmm14, %xmm14
+ vpxor %xmm9, %xmm14, %xmm14
+ vpxor %xmm13, %xmm14, %xmm14
+ vpxor %xmm8, %xmm14, %xmm14
+ vpxor %xmm14, %xmm6, %xmm6
+L_AES_GCM_encrypt_avx1_last_block_done:
+ movl %r9d, %ecx
+ movl %ecx, %edx
+ andl $15, %ecx
+ jz L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_done
+ vmovdqa 128(%rsp), %xmm4
+ vpshufb L_avx1_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm4
+ vpxor (%r15), %xmm4, %xmm4
+ vaesenc 16(%r15), %xmm4, %xmm4
+ vaesenc 32(%r15), %xmm4, %xmm4
+ vaesenc 48(%r15), %xmm4, %xmm4
+ vaesenc 64(%r15), %xmm4, %xmm4
+ vaesenc 80(%r15), %xmm4, %xmm4
+ vaesenc 96(%r15), %xmm4, %xmm4
+ vaesenc 112(%r15), %xmm4, %xmm4
+ vaesenc 128(%r15), %xmm4, %xmm4
+ vaesenc 144(%r15), %xmm4, %xmm4
+ cmpl $11, %r10d
+ vmovdqa 160(%r15), %xmm9
+ jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_aesenc_avx_last
+ vaesenc %xmm9, %xmm4, %xmm4
+ vaesenc 176(%r15), %xmm4, %xmm4
+ cmpl $13, %r10d
+ vmovdqa 192(%r15), %xmm9
+ jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_aesenc_avx_last
+ vaesenc %xmm9, %xmm4, %xmm4
+ vaesenc 208(%r15), %xmm4, %xmm4
+ vmovdqa 224(%r15), %xmm9
+L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_aesenc_avx_last:
+ vaesenclast %xmm9, %xmm4, %xmm4
+ subq $16, %rsp
+ xorl %ecx, %ecx
+ vmovdqa %xmm4, (%rsp)
+L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_loop:
+ movzbl (%rdi,%rbx,1), %r13d
+ xorb (%rsp,%rcx,1), %r13b
+ movb %r13b, (%rsi,%rbx,1)
+ movb %r13b, (%rsp,%rcx,1)
+ incl %ebx
+ incl %ecx
+ cmpl %edx, %ebx
+ jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_loop
+ xorq %r13, %r13
+ cmpl $16, %ecx
+ je L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_finish_enc
+L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_byte_loop:
+ movb %r13b, (%rsp,%rcx,1)
+ incl %ecx
+ cmpl $16, %ecx
+ jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_byte_loop
+L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_finish_enc:
+ vmovdqa (%rsp), %xmm4
+ addq $16, %rsp
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4
+ vpxor %xmm4, %xmm6, %xmm6
+ # ghash_gfmul_red_avx
+ vpshufd $0x4e, %xmm5, %xmm9
+ vpshufd $0x4e, %xmm6, %xmm10
+ vpclmulqdq $0x11, %xmm5, %xmm6, %xmm11
+ vpclmulqdq $0x00, %xmm5, %xmm6, %xmm8
+ vpxor %xmm5, %xmm9, %xmm9
+ vpxor %xmm6, %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9
+ vpxor %xmm8, %xmm9, %xmm9
+ vpxor %xmm11, %xmm9, %xmm9
+ vpslldq $8, %xmm9, %xmm10
+ vpsrldq $8, %xmm9, %xmm9
+ vpxor %xmm10, %xmm8, %xmm8
+ vpxor %xmm9, %xmm11, %xmm6
+ vpslld $31, %xmm8, %xmm12
+ vpslld $30, %xmm8, %xmm13
+ vpslld $25, %xmm8, %xmm14
+ vpxor %xmm13, %xmm12, %xmm12
+ vpxor %xmm14, %xmm12, %xmm12
+ vpsrldq $4, %xmm12, %xmm13
+ vpslldq $12, %xmm12, %xmm12
+ vpxor %xmm12, %xmm8, %xmm8
+ vpsrld $0x01, %xmm8, %xmm14
+ vpsrld $2, %xmm8, %xmm10
+ vpsrld $7, %xmm8, %xmm9
+ vpxor %xmm10, %xmm14, %xmm14
+ vpxor %xmm9, %xmm14, %xmm14
+ vpxor %xmm13, %xmm14, %xmm14
+ vpxor %xmm8, %xmm14, %xmm14
+ vpxor %xmm14, %xmm6, %xmm6
+L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_done:
+L_AES_GCM_encrypt_avx1_done_enc:
+ movl %r9d, %edx
+ movl %r11d, %ecx
+ shlq $3, %rdx
+ shlq $3, %rcx
+ vpinsrq $0x00, %rdx, %xmm0, %xmm0
+ vpinsrq $0x01, %rcx, %xmm0, %xmm0
+ vpxor %xmm0, %xmm6, %xmm6
+ # ghash_gfmul_red_avx
+ vpshufd $0x4e, %xmm5, %xmm9
+ vpshufd $0x4e, %xmm6, %xmm10
+ vpclmulqdq $0x11, %xmm5, %xmm6, %xmm11
+ vpclmulqdq $0x00, %xmm5, %xmm6, %xmm8
+ vpxor %xmm5, %xmm9, %xmm9
+ vpxor %xmm6, %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9
+ vpxor %xmm8, %xmm9, %xmm9
+ vpxor %xmm11, %xmm9, %xmm9
+ vpslldq $8, %xmm9, %xmm10
+ vpsrldq $8, %xmm9, %xmm9
+ vpxor %xmm10, %xmm8, %xmm8
+ vpxor %xmm9, %xmm11, %xmm6
+ vpslld $31, %xmm8, %xmm12
+ vpslld $30, %xmm8, %xmm13
+ vpslld $25, %xmm8, %xmm14
+ vpxor %xmm13, %xmm12, %xmm12
+ vpxor %xmm14, %xmm12, %xmm12
+ vpsrldq $4, %xmm12, %xmm13
+ vpslldq $12, %xmm12, %xmm12
+ vpxor %xmm12, %xmm8, %xmm8
+ vpsrld $0x01, %xmm8, %xmm14
+ vpsrld $2, %xmm8, %xmm10
+ vpsrld $7, %xmm8, %xmm9
+ vpxor %xmm10, %xmm14, %xmm14
+ vpxor %xmm9, %xmm14, %xmm14
+ vpxor %xmm13, %xmm14, %xmm14
+ vpxor %xmm8, %xmm14, %xmm14
+ vpxor %xmm14, %xmm6, %xmm6
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm6, %xmm6
+ vpxor 144(%rsp), %xmm6, %xmm0
+ cmpl $16, %r14d
+ je L_AES_GCM_encrypt_avx1_store_tag_16
+ xorq %rcx, %rcx
+ vmovdqa %xmm0, (%rsp)
+L_AES_GCM_encrypt_avx1_store_tag_loop:
+ movzbl (%rsp,%rcx,1), %r13d
+ movb %r13b, (%r8,%rcx,1)
+ incl %ecx
+ cmpl %r14d, %ecx
+ jne L_AES_GCM_encrypt_avx1_store_tag_loop
+ jmp L_AES_GCM_encrypt_avx1_store_tag_done
+L_AES_GCM_encrypt_avx1_store_tag_16:
+ vmovdqu %xmm0, (%r8)
+L_AES_GCM_encrypt_avx1_store_tag_done:
+ vzeroupper
+ addq $0xa0, %rsp
+ popq %r15
+ popq %r14
+ popq %rbx
+ popq %r12
+ popq %r13
+ repz retq
+#ifndef __APPLE__
+.size AES_GCM_encrypt_avx1,.-AES_GCM_encrypt_avx1
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl AES_GCM_decrypt_avx1
+.type AES_GCM_decrypt_avx1,@function
+.align 4
+AES_GCM_decrypt_avx1:
+#else
+.section __TEXT,__text
+.globl _AES_GCM_decrypt_avx1
+.p2align 2
+_AES_GCM_decrypt_avx1:
+#endif /* __APPLE__ */
+ pushq %r13
+ pushq %r12
+ pushq %rbx
+ pushq %r14
+ pushq %r15
+ pushq %rbp
+ movq %rdx, %r12
+ movq %rcx, %rax
+ movl 56(%rsp), %r11d
+ movl 64(%rsp), %ebx
+ movl 72(%rsp), %r14d
+ movq 80(%rsp), %r15
+ movl 88(%rsp), %r10d
+ movq 96(%rsp), %rbp
+ subq $0xa8, %rsp
+ vpxor %xmm4, %xmm4, %xmm4
+ vpxor %xmm6, %xmm6, %xmm6
+ cmpl $12, %ebx
+ movl %ebx, %edx
+ jne L_AES_GCM_decrypt_avx1_iv_not_12
+ # # Calculate values when IV is 12 bytes
+ # Set counter based on IV
+ movl $0x1000000, %ecx
+ vpinsrq $0x00, (%rax), %xmm4, %xmm4
+ vpinsrd $2, 8(%rax), %xmm4, %xmm4
+ vpinsrd $3, %ecx, %xmm4, %xmm4
+ # H = Encrypt X(=0) and T = Encrypt counter
+ vmovdqa (%r15), %xmm5
+ vpxor %xmm5, %xmm4, %xmm1
+ vmovdqa 16(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 32(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 48(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 64(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 80(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 96(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 112(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 128(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 144(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ cmpl $11, %r10d
+ vmovdqa 160(%r15), %xmm7
+ jl L_AES_GCM_decrypt_avx1_calc_iv_12_last
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 176(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ cmpl $13, %r10d
+ vmovdqa 192(%r15), %xmm7
+ jl L_AES_GCM_decrypt_avx1_calc_iv_12_last
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 208(%r15), %xmm7
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm1, %xmm1
+ vmovdqa 224(%r15), %xmm7
+L_AES_GCM_decrypt_avx1_calc_iv_12_last:
+ vaesenclast %xmm7, %xmm5, %xmm5
+ vaesenclast %xmm7, %xmm1, %xmm1
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm5, %xmm5
+ vmovdqa %xmm1, 144(%rsp)
+ jmp L_AES_GCM_decrypt_avx1_iv_done
+L_AES_GCM_decrypt_avx1_iv_not_12:
+ # Calculate values when IV is not 12 bytes
+ # H = Encrypt X(=0)
+ vmovdqa (%r15), %xmm5
+ vaesenc 16(%r15), %xmm5, %xmm5
+ vaesenc 32(%r15), %xmm5, %xmm5
+ vaesenc 48(%r15), %xmm5, %xmm5
+ vaesenc 64(%r15), %xmm5, %xmm5
+ vaesenc 80(%r15), %xmm5, %xmm5
+ vaesenc 96(%r15), %xmm5, %xmm5
+ vaesenc 112(%r15), %xmm5, %xmm5
+ vaesenc 128(%r15), %xmm5, %xmm5
+ vaesenc 144(%r15), %xmm5, %xmm5
+ cmpl $11, %r10d
+ vmovdqa 160(%r15), %xmm9
+ jl L_AES_GCM_decrypt_avx1_calc_iv_1_aesenc_avx_last
+ vaesenc %xmm9, %xmm5, %xmm5
+ vaesenc 176(%r15), %xmm5, %xmm5
+ cmpl $13, %r10d
+ vmovdqa 192(%r15), %xmm9
+ jl L_AES_GCM_decrypt_avx1_calc_iv_1_aesenc_avx_last
+ vaesenc %xmm9, %xmm5, %xmm5
+ vaesenc 208(%r15), %xmm5, %xmm5
+ vmovdqa 224(%r15), %xmm9
+L_AES_GCM_decrypt_avx1_calc_iv_1_aesenc_avx_last:
+ vaesenclast %xmm9, %xmm5, %xmm5
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm5, %xmm5
+ # Calc counter
+ # Initialization vector
+ cmpl $0x00, %edx
+ movq $0x00, %rcx
+ je L_AES_GCM_decrypt_avx1_calc_iv_done
+ cmpl $16, %edx
+ jl L_AES_GCM_decrypt_avx1_calc_iv_lt16
+ andl $0xfffffff0, %edx
+L_AES_GCM_decrypt_avx1_calc_iv_16_loop:
+ vmovdqu (%rax,%rcx,1), %xmm8
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8
+ vpxor %xmm8, %xmm4, %xmm4
+ # ghash_gfmul_avx
+ vpshufd $0x4e, %xmm4, %xmm1
+ vpshufd $0x4e, %xmm5, %xmm2
+ vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3
+ vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0
+ vpxor %xmm4, %xmm1, %xmm1
+ vpxor %xmm5, %xmm2, %xmm2
+ vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm3, %xmm1, %xmm1
+ vmovdqa %xmm0, %xmm7
+ vmovdqa %xmm3, %xmm4
+ vpslldq $8, %xmm1, %xmm2
+ vpsrldq $8, %xmm1, %xmm1
+ vpxor %xmm2, %xmm7, %xmm7
+ vpxor %xmm1, %xmm4, %xmm4
+ vpsrld $31, %xmm7, %xmm0
+ vpsrld $31, %xmm4, %xmm1
+ vpslld $0x01, %xmm7, %xmm7
+ vpslld $0x01, %xmm4, %xmm4
+ vpsrldq $12, %xmm0, %xmm2
+ vpslldq $4, %xmm0, %xmm0
+ vpslldq $4, %xmm1, %xmm1
+ vpor %xmm2, %xmm4, %xmm4
+ vpor %xmm0, %xmm7, %xmm7
+ vpor %xmm1, %xmm4, %xmm4
+ vpslld $31, %xmm7, %xmm0
+ vpslld $30, %xmm7, %xmm1
+ vpslld $25, %xmm7, %xmm2
+ vpxor %xmm1, %xmm0, %xmm0
+ vpxor %xmm2, %xmm0, %xmm0
+ vmovdqa %xmm0, %xmm1
+ vpsrldq $4, %xmm1, %xmm1
+ vpslldq $12, %xmm0, %xmm0
+ vpxor %xmm0, %xmm7, %xmm7
+ vpsrld $0x01, %xmm7, %xmm2
+ vpsrld $2, %xmm7, %xmm3
+ vpsrld $7, %xmm7, %xmm0
+ vpxor %xmm3, %xmm2, %xmm2
+ vpxor %xmm0, %xmm2, %xmm2
+ vpxor %xmm1, %xmm2, %xmm2
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm2, %xmm4, %xmm4
+ addl $16, %ecx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_decrypt_avx1_calc_iv_16_loop
+ movl %ebx, %edx
+ cmpl %edx, %ecx
+ je L_AES_GCM_decrypt_avx1_calc_iv_done
+L_AES_GCM_decrypt_avx1_calc_iv_lt16:
+ subq $16, %rsp
+ vpxor %xmm8, %xmm8, %xmm8
+ xorl %ebx, %ebx
+ vmovdqa %xmm8, (%rsp)
+L_AES_GCM_decrypt_avx1_calc_iv_loop:
+ movzbl (%rax,%rcx,1), %r13d
+ movb %r13b, (%rsp,%rbx,1)
+ incl %ecx
+ incl %ebx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_decrypt_avx1_calc_iv_loop
+ vmovdqa (%rsp), %xmm8
+ addq $16, %rsp
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8
+ vpxor %xmm8, %xmm4, %xmm4
+ # ghash_gfmul_avx
+ vpshufd $0x4e, %xmm4, %xmm1
+ vpshufd $0x4e, %xmm5, %xmm2
+ vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3
+ vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0
+ vpxor %xmm4, %xmm1, %xmm1
+ vpxor %xmm5, %xmm2, %xmm2
+ vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm3, %xmm1, %xmm1
+ vmovdqa %xmm0, %xmm7
+ vmovdqa %xmm3, %xmm4
+ vpslldq $8, %xmm1, %xmm2
+ vpsrldq $8, %xmm1, %xmm1
+ vpxor %xmm2, %xmm7, %xmm7
+ vpxor %xmm1, %xmm4, %xmm4
+ vpsrld $31, %xmm7, %xmm0
+ vpsrld $31, %xmm4, %xmm1
+ vpslld $0x01, %xmm7, %xmm7
+ vpslld $0x01, %xmm4, %xmm4
+ vpsrldq $12, %xmm0, %xmm2
+ vpslldq $4, %xmm0, %xmm0
+ vpslldq $4, %xmm1, %xmm1
+ vpor %xmm2, %xmm4, %xmm4
+ vpor %xmm0, %xmm7, %xmm7
+ vpor %xmm1, %xmm4, %xmm4
+ vpslld $31, %xmm7, %xmm0
+ vpslld $30, %xmm7, %xmm1
+ vpslld $25, %xmm7, %xmm2
+ vpxor %xmm1, %xmm0, %xmm0
+ vpxor %xmm2, %xmm0, %xmm0
+ vmovdqa %xmm0, %xmm1
+ vpsrldq $4, %xmm1, %xmm1
+ vpslldq $12, %xmm0, %xmm0
+ vpxor %xmm0, %xmm7, %xmm7
+ vpsrld $0x01, %xmm7, %xmm2
+ vpsrld $2, %xmm7, %xmm3
+ vpsrld $7, %xmm7, %xmm0
+ vpxor %xmm3, %xmm2, %xmm2
+ vpxor %xmm0, %xmm2, %xmm2
+ vpxor %xmm1, %xmm2, %xmm2
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm2, %xmm4, %xmm4
+L_AES_GCM_decrypt_avx1_calc_iv_done:
+ # T = Encrypt counter
+ vpxor %xmm0, %xmm0, %xmm0
+ shll $3, %edx
+ vpinsrq $0x00, %rdx, %xmm0, %xmm0
+ vpxor %xmm0, %xmm4, %xmm4
+ # ghash_gfmul_avx
+ vpshufd $0x4e, %xmm4, %xmm1
+ vpshufd $0x4e, %xmm5, %xmm2
+ vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3
+ vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0
+ vpxor %xmm4, %xmm1, %xmm1
+ vpxor %xmm5, %xmm2, %xmm2
+ vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm3, %xmm1, %xmm1
+ vmovdqa %xmm0, %xmm7
+ vmovdqa %xmm3, %xmm4
+ vpslldq $8, %xmm1, %xmm2
+ vpsrldq $8, %xmm1, %xmm1
+ vpxor %xmm2, %xmm7, %xmm7
+ vpxor %xmm1, %xmm4, %xmm4
+ vpsrld $31, %xmm7, %xmm0
+ vpsrld $31, %xmm4, %xmm1
+ vpslld $0x01, %xmm7, %xmm7
+ vpslld $0x01, %xmm4, %xmm4
+ vpsrldq $12, %xmm0, %xmm2
+ vpslldq $4, %xmm0, %xmm0
+ vpslldq $4, %xmm1, %xmm1
+ vpor %xmm2, %xmm4, %xmm4
+ vpor %xmm0, %xmm7, %xmm7
+ vpor %xmm1, %xmm4, %xmm4
+ vpslld $31, %xmm7, %xmm0
+ vpslld $30, %xmm7, %xmm1
+ vpslld $25, %xmm7, %xmm2
+ vpxor %xmm1, %xmm0, %xmm0
+ vpxor %xmm2, %xmm0, %xmm0
+ vmovdqa %xmm0, %xmm1
+ vpsrldq $4, %xmm1, %xmm1
+ vpslldq $12, %xmm0, %xmm0
+ vpxor %xmm0, %xmm7, %xmm7
+ vpsrld $0x01, %xmm7, %xmm2
+ vpsrld $2, %xmm7, %xmm3
+ vpsrld $7, %xmm7, %xmm0
+ vpxor %xmm3, %xmm2, %xmm2
+ vpxor %xmm0, %xmm2, %xmm2
+ vpxor %xmm1, %xmm2, %xmm2
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm2, %xmm4, %xmm4
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4
+ # Encrypt counter
+ vmovdqa (%r15), %xmm8
+ vpxor %xmm4, %xmm8, %xmm8
+ vaesenc 16(%r15), %xmm8, %xmm8
+ vaesenc 32(%r15), %xmm8, %xmm8
+ vaesenc 48(%r15), %xmm8, %xmm8
+ vaesenc 64(%r15), %xmm8, %xmm8
+ vaesenc 80(%r15), %xmm8, %xmm8
+ vaesenc 96(%r15), %xmm8, %xmm8
+ vaesenc 112(%r15), %xmm8, %xmm8
+ vaesenc 128(%r15), %xmm8, %xmm8
+ vaesenc 144(%r15), %xmm8, %xmm8
+ cmpl $11, %r10d
+ vmovdqa 160(%r15), %xmm9
+ jl L_AES_GCM_decrypt_avx1_calc_iv_2_aesenc_avx_last
+ vaesenc %xmm9, %xmm8, %xmm8
+ vaesenc 176(%r15), %xmm8, %xmm8
+ cmpl $13, %r10d
+ vmovdqa 192(%r15), %xmm9
+ jl L_AES_GCM_decrypt_avx1_calc_iv_2_aesenc_avx_last
+ vaesenc %xmm9, %xmm8, %xmm8
+ vaesenc 208(%r15), %xmm8, %xmm8
+ vmovdqa 224(%r15), %xmm9
+L_AES_GCM_decrypt_avx1_calc_iv_2_aesenc_avx_last:
+ vaesenclast %xmm9, %xmm8, %xmm8
+ vmovdqa %xmm8, 144(%rsp)
+L_AES_GCM_decrypt_avx1_iv_done:
+ # Additional authentication data
+ movl %r11d, %edx
+ cmpl $0x00, %edx
+ je L_AES_GCM_decrypt_avx1_calc_aad_done
+ xorl %ecx, %ecx
+ cmpl $16, %edx
+ jl L_AES_GCM_decrypt_avx1_calc_aad_lt16
+ andl $0xfffffff0, %edx
+L_AES_GCM_decrypt_avx1_calc_aad_16_loop:
+ vmovdqu (%r12,%rcx,1), %xmm8
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8
+ vpxor %xmm8, %xmm6, %xmm6
+ # ghash_gfmul_avx
+ vpshufd $0x4e, %xmm6, %xmm1
+ vpshufd $0x4e, %xmm5, %xmm2
+ vpclmulqdq $0x11, %xmm6, %xmm5, %xmm3
+ vpclmulqdq $0x00, %xmm6, %xmm5, %xmm0
+ vpxor %xmm6, %xmm1, %xmm1
+ vpxor %xmm5, %xmm2, %xmm2
+ vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm3, %xmm1, %xmm1
+ vmovdqa %xmm0, %xmm7
+ vmovdqa %xmm3, %xmm6
+ vpslldq $8, %xmm1, %xmm2
+ vpsrldq $8, %xmm1, %xmm1
+ vpxor %xmm2, %xmm7, %xmm7
+ vpxor %xmm1, %xmm6, %xmm6
+ vpsrld $31, %xmm7, %xmm0
+ vpsrld $31, %xmm6, %xmm1
+ vpslld $0x01, %xmm7, %xmm7
+ vpslld $0x01, %xmm6, %xmm6
+ vpsrldq $12, %xmm0, %xmm2
+ vpslldq $4, %xmm0, %xmm0
+ vpslldq $4, %xmm1, %xmm1
+ vpor %xmm2, %xmm6, %xmm6
+ vpor %xmm0, %xmm7, %xmm7
+ vpor %xmm1, %xmm6, %xmm6
+ vpslld $31, %xmm7, %xmm0
+ vpslld $30, %xmm7, %xmm1
+ vpslld $25, %xmm7, %xmm2
+ vpxor %xmm1, %xmm0, %xmm0
+ vpxor %xmm2, %xmm0, %xmm0
+ vmovdqa %xmm0, %xmm1
+ vpsrldq $4, %xmm1, %xmm1
+ vpslldq $12, %xmm0, %xmm0
+ vpxor %xmm0, %xmm7, %xmm7
+ vpsrld $0x01, %xmm7, %xmm2
+ vpsrld $2, %xmm7, %xmm3
+ vpsrld $7, %xmm7, %xmm0
+ vpxor %xmm3, %xmm2, %xmm2
+ vpxor %xmm0, %xmm2, %xmm2
+ vpxor %xmm1, %xmm2, %xmm2
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm2, %xmm6, %xmm6
+ addl $16, %ecx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_decrypt_avx1_calc_aad_16_loop
+ movl %r11d, %edx
+ cmpl %edx, %ecx
+ je L_AES_GCM_decrypt_avx1_calc_aad_done
+L_AES_GCM_decrypt_avx1_calc_aad_lt16:
+ subq $16, %rsp
+ vpxor %xmm8, %xmm8, %xmm8
+ xorl %ebx, %ebx
+ vmovdqa %xmm8, (%rsp)
+L_AES_GCM_decrypt_avx1_calc_aad_loop:
+ movzbl (%r12,%rcx,1), %r13d
+ movb %r13b, (%rsp,%rbx,1)
+ incl %ecx
+ incl %ebx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_decrypt_avx1_calc_aad_loop
+ vmovdqa (%rsp), %xmm8
+ addq $16, %rsp
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8
+ vpxor %xmm8, %xmm6, %xmm6
+ # ghash_gfmul_avx
+ vpshufd $0x4e, %xmm6, %xmm1
+ vpshufd $0x4e, %xmm5, %xmm2
+ vpclmulqdq $0x11, %xmm6, %xmm5, %xmm3
+ vpclmulqdq $0x00, %xmm6, %xmm5, %xmm0
+ vpxor %xmm6, %xmm1, %xmm1
+ vpxor %xmm5, %xmm2, %xmm2
+ vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm3, %xmm1, %xmm1
+ vmovdqa %xmm0, %xmm7
+ vmovdqa %xmm3, %xmm6
+ vpslldq $8, %xmm1, %xmm2
+ vpsrldq $8, %xmm1, %xmm1
+ vpxor %xmm2, %xmm7, %xmm7
+ vpxor %xmm1, %xmm6, %xmm6
+ vpsrld $31, %xmm7, %xmm0
+ vpsrld $31, %xmm6, %xmm1
+ vpslld $0x01, %xmm7, %xmm7
+ vpslld $0x01, %xmm6, %xmm6
+ vpsrldq $12, %xmm0, %xmm2
+ vpslldq $4, %xmm0, %xmm0
+ vpslldq $4, %xmm1, %xmm1
+ vpor %xmm2, %xmm6, %xmm6
+ vpor %xmm0, %xmm7, %xmm7
+ vpor %xmm1, %xmm6, %xmm6
+ vpslld $31, %xmm7, %xmm0
+ vpslld $30, %xmm7, %xmm1
+ vpslld $25, %xmm7, %xmm2
+ vpxor %xmm1, %xmm0, %xmm0
+ vpxor %xmm2, %xmm0, %xmm0
+ vmovdqa %xmm0, %xmm1
+ vpsrldq $4, %xmm1, %xmm1
+ vpslldq $12, %xmm0, %xmm0
+ vpxor %xmm0, %xmm7, %xmm7
+ vpsrld $0x01, %xmm7, %xmm2
+ vpsrld $2, %xmm7, %xmm3
+ vpsrld $7, %xmm7, %xmm0
+ vpxor %xmm3, %xmm2, %xmm2
+ vpxor %xmm0, %xmm2, %xmm2
+ vpxor %xmm1, %xmm2, %xmm2
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm2, %xmm6, %xmm6
+L_AES_GCM_decrypt_avx1_calc_aad_done:
+ # Calculate counter and H
+ vpsrlq $63, %xmm5, %xmm9
+ vpsllq $0x01, %xmm5, %xmm8
+ vpslldq $8, %xmm9, %xmm9
+ vpor %xmm9, %xmm8, %xmm8
+ vpshufd $0xff, %xmm5, %xmm5
+ vpsrad $31, %xmm5, %xmm5
+ vpshufb L_avx1_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm4
+ vpand L_avx1_aes_gcm_mod2_128(%rip), %xmm5, %xmm5
+ vpaddd L_avx1_aes_gcm_one(%rip), %xmm4, %xmm4
+ vpxor %xmm8, %xmm5, %xmm5
+ vmovdqa %xmm4, 128(%rsp)
+ xorl %ebx, %ebx
+ cmpl $0x80, %r9d
+ movl %r9d, %r13d
+ jl L_AES_GCM_decrypt_avx1_done_128
+ andl $0xffffff80, %r13d
+ vmovdqa %xmm6, %xmm2
+ # H ^ 1
+ vmovdqa %xmm5, (%rsp)
+ # H ^ 2
+ vpclmulqdq $0x00, %xmm5, %xmm5, %xmm8
+ vpclmulqdq $0x11, %xmm5, %xmm5, %xmm0
+ vpslld $31, %xmm8, %xmm12
+ vpslld $30, %xmm8, %xmm13
+ vpslld $25, %xmm8, %xmm14
+ vpxor %xmm13, %xmm12, %xmm12
+ vpxor %xmm14, %xmm12, %xmm12
+ vpsrldq $4, %xmm12, %xmm13
+ vpslldq $12, %xmm12, %xmm12
+ vpxor %xmm12, %xmm8, %xmm8
+ vpsrld $0x01, %xmm8, %xmm14
+ vpsrld $2, %xmm8, %xmm10
+ vpsrld $7, %xmm8, %xmm9
+ vpxor %xmm10, %xmm14, %xmm14
+ vpxor %xmm9, %xmm14, %xmm14
+ vpxor %xmm13, %xmm14, %xmm14
+ vpxor %xmm8, %xmm14, %xmm14
+ vpxor %xmm14, %xmm0, %xmm0
+ vmovdqa %xmm0, 16(%rsp)
+ # H ^ 3
+ # ghash_gfmul_red_avx
+ vpshufd $0x4e, %xmm5, %xmm9
+ vpshufd $0x4e, %xmm0, %xmm10
+ vpclmulqdq $0x11, %xmm5, %xmm0, %xmm11
+ vpclmulqdq $0x00, %xmm5, %xmm0, %xmm8
+ vpxor %xmm5, %xmm9, %xmm9
+ vpxor %xmm0, %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9
+ vpxor %xmm8, %xmm9, %xmm9
+ vpxor %xmm11, %xmm9, %xmm9
+ vpslldq $8, %xmm9, %xmm10
+ vpsrldq $8, %xmm9, %xmm9
+ vpxor %xmm10, %xmm8, %xmm8
+ vpxor %xmm9, %xmm11, %xmm1
+ vpslld $31, %xmm8, %xmm12
+ vpslld $30, %xmm8, %xmm13
+ vpslld $25, %xmm8, %xmm14
+ vpxor %xmm13, %xmm12, %xmm12
+ vpxor %xmm14, %xmm12, %xmm12
+ vpsrldq $4, %xmm12, %xmm13
+ vpslldq $12, %xmm12, %xmm12
+ vpxor %xmm12, %xmm8, %xmm8
+ vpsrld $0x01, %xmm8, %xmm14
+ vpsrld $2, %xmm8, %xmm10
+ vpsrld $7, %xmm8, %xmm9
+ vpxor %xmm10, %xmm14, %xmm14
+ vpxor %xmm9, %xmm14, %xmm14
+ vpxor %xmm13, %xmm14, %xmm14
+ vpxor %xmm8, %xmm14, %xmm14
+ vpxor %xmm14, %xmm1, %xmm1
+ vmovdqa %xmm1, 32(%rsp)
+ # H ^ 4
+ vpclmulqdq $0x00, %xmm0, %xmm0, %xmm8
+ vpclmulqdq $0x11, %xmm0, %xmm0, %xmm3
+ vpslld $31, %xmm8, %xmm12
+ vpslld $30, %xmm8, %xmm13
+ vpslld $25, %xmm8, %xmm14
+ vpxor %xmm13, %xmm12, %xmm12
+ vpxor %xmm14, %xmm12, %xmm12
+ vpsrldq $4, %xmm12, %xmm13
+ vpslldq $12, %xmm12, %xmm12
+ vpxor %xmm12, %xmm8, %xmm8
+ vpsrld $0x01, %xmm8, %xmm14
+ vpsrld $2, %xmm8, %xmm10
+ vpsrld $7, %xmm8, %xmm9
+ vpxor %xmm10, %xmm14, %xmm14
+ vpxor %xmm9, %xmm14, %xmm14
+ vpxor %xmm13, %xmm14, %xmm14
+ vpxor %xmm8, %xmm14, %xmm14
+ vpxor %xmm14, %xmm3, %xmm3
+ vmovdqa %xmm3, 48(%rsp)
+ # H ^ 5
+ # ghash_gfmul_red_avx
+ vpshufd $0x4e, %xmm0, %xmm9
+ vpshufd $0x4e, %xmm1, %xmm10
+ vpclmulqdq $0x11, %xmm0, %xmm1, %xmm11
+ vpclmulqdq $0x00, %xmm0, %xmm1, %xmm8
+ vpxor %xmm0, %xmm9, %xmm9
+ vpxor %xmm1, %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9
+ vpxor %xmm8, %xmm9, %xmm9
+ vpxor %xmm11, %xmm9, %xmm9
+ vpslldq $8, %xmm9, %xmm10
+ vpsrldq $8, %xmm9, %xmm9
+ vpxor %xmm10, %xmm8, %xmm8
+ vpxor %xmm9, %xmm11, %xmm7
+ vpslld $31, %xmm8, %xmm12
+ vpslld $30, %xmm8, %xmm13
+ vpslld $25, %xmm8, %xmm14
+ vpxor %xmm13, %xmm12, %xmm12
+ vpxor %xmm14, %xmm12, %xmm12
+ vpsrldq $4, %xmm12, %xmm13
+ vpslldq $12, %xmm12, %xmm12
+ vpxor %xmm12, %xmm8, %xmm8
+ vpsrld $0x01, %xmm8, %xmm14
+ vpsrld $2, %xmm8, %xmm10
+ vpsrld $7, %xmm8, %xmm9
+ vpxor %xmm10, %xmm14, %xmm14
+ vpxor %xmm9, %xmm14, %xmm14
+ vpxor %xmm13, %xmm14, %xmm14
+ vpxor %xmm8, %xmm14, %xmm14
+ vpxor %xmm14, %xmm7, %xmm7
+ vmovdqa %xmm7, 64(%rsp)
+ # H ^ 6
+ vpclmulqdq $0x00, %xmm1, %xmm1, %xmm8
+ vpclmulqdq $0x11, %xmm1, %xmm1, %xmm7
+ vpslld $31, %xmm8, %xmm12
+ vpslld $30, %xmm8, %xmm13
+ vpslld $25, %xmm8, %xmm14
+ vpxor %xmm13, %xmm12, %xmm12
+ vpxor %xmm14, %xmm12, %xmm12
+ vpsrldq $4, %xmm12, %xmm13
+ vpslldq $12, %xmm12, %xmm12
+ vpxor %xmm12, %xmm8, %xmm8
+ vpsrld $0x01, %xmm8, %xmm14
+ vpsrld $2, %xmm8, %xmm10
+ vpsrld $7, %xmm8, %xmm9
+ vpxor %xmm10, %xmm14, %xmm14
+ vpxor %xmm9, %xmm14, %xmm14
+ vpxor %xmm13, %xmm14, %xmm14
+ vpxor %xmm8, %xmm14, %xmm14
+ vpxor %xmm14, %xmm7, %xmm7
+ vmovdqa %xmm7, 80(%rsp)
+ # H ^ 7
+ # ghash_gfmul_red_avx
+ vpshufd $0x4e, %xmm1, %xmm9
+ vpshufd $0x4e, %xmm3, %xmm10
+ vpclmulqdq $0x11, %xmm1, %xmm3, %xmm11
+ vpclmulqdq $0x00, %xmm1, %xmm3, %xmm8
+ vpxor %xmm1, %xmm9, %xmm9
+ vpxor %xmm3, %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9
+ vpxor %xmm8, %xmm9, %xmm9
+ vpxor %xmm11, %xmm9, %xmm9
+ vpslldq $8, %xmm9, %xmm10
+ vpsrldq $8, %xmm9, %xmm9
+ vpxor %xmm10, %xmm8, %xmm8
+ vpxor %xmm9, %xmm11, %xmm7
+ vpslld $31, %xmm8, %xmm12
+ vpslld $30, %xmm8, %xmm13
+ vpslld $25, %xmm8, %xmm14
+ vpxor %xmm13, %xmm12, %xmm12
+ vpxor %xmm14, %xmm12, %xmm12
+ vpsrldq $4, %xmm12, %xmm13
+ vpslldq $12, %xmm12, %xmm12
+ vpxor %xmm12, %xmm8, %xmm8
+ vpsrld $0x01, %xmm8, %xmm14
+ vpsrld $2, %xmm8, %xmm10
+ vpsrld $7, %xmm8, %xmm9
+ vpxor %xmm10, %xmm14, %xmm14
+ vpxor %xmm9, %xmm14, %xmm14
+ vpxor %xmm13, %xmm14, %xmm14
+ vpxor %xmm8, %xmm14, %xmm14
+ vpxor %xmm14, %xmm7, %xmm7
+ vmovdqa %xmm7, 96(%rsp)
+ # H ^ 8
+ vpclmulqdq $0x00, %xmm3, %xmm3, %xmm8
+ vpclmulqdq $0x11, %xmm3, %xmm3, %xmm7
+ vpslld $31, %xmm8, %xmm12
+ vpslld $30, %xmm8, %xmm13
+ vpslld $25, %xmm8, %xmm14
+ vpxor %xmm13, %xmm12, %xmm12
+ vpxor %xmm14, %xmm12, %xmm12
+ vpsrldq $4, %xmm12, %xmm13
+ vpslldq $12, %xmm12, %xmm12
+ vpxor %xmm12, %xmm8, %xmm8
+ vpsrld $0x01, %xmm8, %xmm14
+ vpsrld $2, %xmm8, %xmm10
+ vpsrld $7, %xmm8, %xmm9
+ vpxor %xmm10, %xmm14, %xmm14
+ vpxor %xmm9, %xmm14, %xmm14
+ vpxor %xmm13, %xmm14, %xmm14
+ vpxor %xmm8, %xmm14, %xmm14
+ vpxor %xmm14, %xmm7, %xmm7
+ vmovdqa %xmm7, 112(%rsp)
+L_AES_GCM_decrypt_avx1_ghash_128:
+ leaq (%rdi,%rbx,1), %rcx
+ leaq (%rsi,%rbx,1), %rdx
+ vmovdqa 128(%rsp), %xmm0
+ vmovdqa L_avx1_aes_gcm_bswap_epi64(%rip), %xmm1
+ vpshufb %xmm1, %xmm0, %xmm8
+ vpaddd L_avx1_aes_gcm_one(%rip), %xmm0, %xmm9
+ vpshufb %xmm1, %xmm9, %xmm9
+ vpaddd L_avx1_aes_gcm_two(%rip), %xmm0, %xmm10
+ vpshufb %xmm1, %xmm10, %xmm10
+ vpaddd L_avx1_aes_gcm_three(%rip), %xmm0, %xmm11
+ vpshufb %xmm1, %xmm11, %xmm11
+ vpaddd L_avx1_aes_gcm_four(%rip), %xmm0, %xmm12
+ vpshufb %xmm1, %xmm12, %xmm12
+ vpaddd L_avx1_aes_gcm_five(%rip), %xmm0, %xmm13
+ vpshufb %xmm1, %xmm13, %xmm13
+ vpaddd L_avx1_aes_gcm_six(%rip), %xmm0, %xmm14
+ vpshufb %xmm1, %xmm14, %xmm14
+ vpaddd L_avx1_aes_gcm_seven(%rip), %xmm0, %xmm15
+ vpshufb %xmm1, %xmm15, %xmm15
+ vpaddd L_avx1_aes_gcm_eight(%rip), %xmm0, %xmm0
+ vmovdqa (%r15), %xmm7
+ vmovdqa %xmm0, 128(%rsp)
+ vpxor %xmm7, %xmm8, %xmm8
+ vpxor %xmm7, %xmm9, %xmm9
+ vpxor %xmm7, %xmm10, %xmm10
+ vpxor %xmm7, %xmm11, %xmm11
+ vpxor %xmm7, %xmm12, %xmm12
+ vpxor %xmm7, %xmm13, %xmm13
+ vpxor %xmm7, %xmm14, %xmm14
+ vpxor %xmm7, %xmm15, %xmm15
+ vmovdqa 112(%rsp), %xmm7
+ vmovdqu (%rcx), %xmm0
+ vaesenc 16(%r15), %xmm8, %xmm8
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vpxor %xmm2, %xmm0, %xmm0
+ vpshufd $0x4e, %xmm7, %xmm1
+ vpshufd $0x4e, %xmm0, %xmm5
+ vpxor %xmm7, %xmm1, %xmm1
+ vpxor %xmm0, %xmm5, %xmm5
+ vpclmulqdq $0x11, %xmm7, %xmm0, %xmm3
+ vaesenc 16(%r15), %xmm9, %xmm9
+ vaesenc 16(%r15), %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm7, %xmm0, %xmm2
+ vaesenc 16(%r15), %xmm11, %xmm11
+ vaesenc 16(%r15), %xmm12, %xmm12
+ vpclmulqdq $0x00, %xmm5, %xmm1, %xmm1
+ vaesenc 16(%r15), %xmm13, %xmm13
+ vaesenc 16(%r15), %xmm14, %xmm14
+ vaesenc 16(%r15), %xmm15, %xmm15
+ vpxor %xmm2, %xmm1, %xmm1
+ vpxor %xmm3, %xmm1, %xmm1
+ vmovdqa 96(%rsp), %xmm7
+ vmovdqu 16(%rcx), %xmm0
+ vpshufd $0x4e, %xmm7, %xmm4
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vaesenc 32(%r15), %xmm8, %xmm8
+ vpxor %xmm7, %xmm4, %xmm4
+ vpshufd $0x4e, %xmm0, %xmm5
+ vpxor %xmm0, %xmm5, %xmm5
+ vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6
+ vaesenc 32(%r15), %xmm9, %xmm9
+ vaesenc 32(%r15), %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7
+ vaesenc 32(%r15), %xmm11, %xmm11
+ vaesenc 32(%r15), %xmm12, %xmm12
+ vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4
+ vaesenc 32(%r15), %xmm13, %xmm13
+ vaesenc 32(%r15), %xmm14, %xmm14
+ vaesenc 32(%r15), %xmm15, %xmm15
+ vpxor %xmm7, %xmm1, %xmm1
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm6, %xmm1, %xmm1
+ vpxor %xmm6, %xmm3, %xmm3
+ vpxor %xmm4, %xmm1, %xmm1
+ vmovdqa 80(%rsp), %xmm7
+ vmovdqu 32(%rcx), %xmm0
+ vpshufd $0x4e, %xmm7, %xmm4
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vaesenc 48(%r15), %xmm8, %xmm8
+ vpxor %xmm7, %xmm4, %xmm4
+ vpshufd $0x4e, %xmm0, %xmm5
+ vpxor %xmm0, %xmm5, %xmm5
+ vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6
+ vaesenc 48(%r15), %xmm9, %xmm9
+ vaesenc 48(%r15), %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7
+ vaesenc 48(%r15), %xmm11, %xmm11
+ vaesenc 48(%r15), %xmm12, %xmm12
+ vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4
+ vaesenc 48(%r15), %xmm13, %xmm13
+ vaesenc 48(%r15), %xmm14, %xmm14
+ vaesenc 48(%r15), %xmm15, %xmm15
+ vpxor %xmm7, %xmm1, %xmm1
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm6, %xmm1, %xmm1
+ vpxor %xmm6, %xmm3, %xmm3
+ vpxor %xmm4, %xmm1, %xmm1
+ vmovdqa 64(%rsp), %xmm7
+ vmovdqu 48(%rcx), %xmm0
+ vpshufd $0x4e, %xmm7, %xmm4
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vaesenc 64(%r15), %xmm8, %xmm8
+ vpxor %xmm7, %xmm4, %xmm4
+ vpshufd $0x4e, %xmm0, %xmm5
+ vpxor %xmm0, %xmm5, %xmm5
+ vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6
+ vaesenc 64(%r15), %xmm9, %xmm9
+ vaesenc 64(%r15), %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7
+ vaesenc 64(%r15), %xmm11, %xmm11
+ vaesenc 64(%r15), %xmm12, %xmm12
+ vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4
+ vaesenc 64(%r15), %xmm13, %xmm13
+ vaesenc 64(%r15), %xmm14, %xmm14
+ vaesenc 64(%r15), %xmm15, %xmm15
+ vpxor %xmm7, %xmm1, %xmm1
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm6, %xmm1, %xmm1
+ vpxor %xmm6, %xmm3, %xmm3
+ vpxor %xmm4, %xmm1, %xmm1
+ vmovdqa 48(%rsp), %xmm7
+ vmovdqu 64(%rcx), %xmm0
+ vpshufd $0x4e, %xmm7, %xmm4
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vaesenc 80(%r15), %xmm8, %xmm8
+ vpxor %xmm7, %xmm4, %xmm4
+ vpshufd $0x4e, %xmm0, %xmm5
+ vpxor %xmm0, %xmm5, %xmm5
+ vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6
+ vaesenc 80(%r15), %xmm9, %xmm9
+ vaesenc 80(%r15), %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7
+ vaesenc 80(%r15), %xmm11, %xmm11
+ vaesenc 80(%r15), %xmm12, %xmm12
+ vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4
+ vaesenc 80(%r15), %xmm13, %xmm13
+ vaesenc 80(%r15), %xmm14, %xmm14
+ vaesenc 80(%r15), %xmm15, %xmm15
+ vpxor %xmm7, %xmm1, %xmm1
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm6, %xmm1, %xmm1
+ vpxor %xmm6, %xmm3, %xmm3
+ vpxor %xmm4, %xmm1, %xmm1
+ vmovdqa 32(%rsp), %xmm7
+ vmovdqu 80(%rcx), %xmm0
+ vpshufd $0x4e, %xmm7, %xmm4
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vaesenc 96(%r15), %xmm8, %xmm8
+ vpxor %xmm7, %xmm4, %xmm4
+ vpshufd $0x4e, %xmm0, %xmm5
+ vpxor %xmm0, %xmm5, %xmm5
+ vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6
+ vaesenc 96(%r15), %xmm9, %xmm9
+ vaesenc 96(%r15), %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7
+ vaesenc 96(%r15), %xmm11, %xmm11
+ vaesenc 96(%r15), %xmm12, %xmm12
+ vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4
+ vaesenc 96(%r15), %xmm13, %xmm13
+ vaesenc 96(%r15), %xmm14, %xmm14
+ vaesenc 96(%r15), %xmm15, %xmm15
+ vpxor %xmm7, %xmm1, %xmm1
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm6, %xmm1, %xmm1
+ vpxor %xmm6, %xmm3, %xmm3
+ vpxor %xmm4, %xmm1, %xmm1
+ vmovdqa 16(%rsp), %xmm7
+ vmovdqu 96(%rcx), %xmm0
+ vpshufd $0x4e, %xmm7, %xmm4
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vaesenc 112(%r15), %xmm8, %xmm8
+ vpxor %xmm7, %xmm4, %xmm4
+ vpshufd $0x4e, %xmm0, %xmm5
+ vpxor %xmm0, %xmm5, %xmm5
+ vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6
+ vaesenc 112(%r15), %xmm9, %xmm9
+ vaesenc 112(%r15), %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7
+ vaesenc 112(%r15), %xmm11, %xmm11
+ vaesenc 112(%r15), %xmm12, %xmm12
+ vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4
+ vaesenc 112(%r15), %xmm13, %xmm13
+ vaesenc 112(%r15), %xmm14, %xmm14
+ vaesenc 112(%r15), %xmm15, %xmm15
+ vpxor %xmm7, %xmm1, %xmm1
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm6, %xmm1, %xmm1
+ vpxor %xmm6, %xmm3, %xmm3
+ vpxor %xmm4, %xmm1, %xmm1
+ vmovdqa (%rsp), %xmm7
+ vmovdqu 112(%rcx), %xmm0
+ vpshufd $0x4e, %xmm7, %xmm4
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vaesenc 128(%r15), %xmm8, %xmm8
+ vpxor %xmm7, %xmm4, %xmm4
+ vpshufd $0x4e, %xmm0, %xmm5
+ vpxor %xmm0, %xmm5, %xmm5
+ vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6
+ vaesenc 128(%r15), %xmm9, %xmm9
+ vaesenc 128(%r15), %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7
+ vaesenc 128(%r15), %xmm11, %xmm11
+ vaesenc 128(%r15), %xmm12, %xmm12
+ vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4
+ vaesenc 128(%r15), %xmm13, %xmm13
+ vaesenc 128(%r15), %xmm14, %xmm14
+ vaesenc 128(%r15), %xmm15, %xmm15
+ vpxor %xmm7, %xmm1, %xmm1
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm6, %xmm1, %xmm1
+ vpxor %xmm6, %xmm3, %xmm3
+ vpxor %xmm4, %xmm1, %xmm1
+ vpslldq $8, %xmm1, %xmm5
+ vpsrldq $8, %xmm1, %xmm1
+ vaesenc 144(%r15), %xmm8, %xmm8
+ vpxor %xmm5, %xmm2, %xmm2
+ vpxor %xmm1, %xmm3, %xmm3
+ vaesenc 144(%r15), %xmm9, %xmm9
+ vpslld $31, %xmm2, %xmm7
+ vpslld $30, %xmm2, %xmm4
+ vpslld $25, %xmm2, %xmm5
+ vaesenc 144(%r15), %xmm10, %xmm10
+ vpxor %xmm4, %xmm7, %xmm7
+ vpxor %xmm5, %xmm7, %xmm7
+ vaesenc 144(%r15), %xmm11, %xmm11
+ vpsrldq $4, %xmm7, %xmm4
+ vpslldq $12, %xmm7, %xmm7
+ vaesenc 144(%r15), %xmm12, %xmm12
+ vpxor %xmm7, %xmm2, %xmm2
+ vpsrld $0x01, %xmm2, %xmm5
+ vaesenc 144(%r15), %xmm13, %xmm13
+ vpsrld $2, %xmm2, %xmm1
+ vpsrld $7, %xmm2, %xmm0
+ vaesenc 144(%r15), %xmm14, %xmm14
+ vpxor %xmm1, %xmm5, %xmm5
+ vpxor %xmm0, %xmm5, %xmm5
+ vaesenc 144(%r15), %xmm15, %xmm15
+ vpxor %xmm4, %xmm5, %xmm5
+ vpxor %xmm5, %xmm2, %xmm2
+ vpxor %xmm3, %xmm2, %xmm2
+ cmpl $11, %r10d
+ vmovdqa 160(%r15), %xmm7
+ jl L_AES_GCM_decrypt_avx1_aesenc_128_ghash_avx_done
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 176(%r15), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ cmpl $13, %r10d
+ vmovdqa 192(%r15), %xmm7
+ jl L_AES_GCM_decrypt_avx1_aesenc_128_ghash_avx_done
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 208(%r15), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 224(%r15), %xmm7
+L_AES_GCM_decrypt_avx1_aesenc_128_ghash_avx_done:
+ vaesenclast %xmm7, %xmm8, %xmm8
+ vaesenclast %xmm7, %xmm9, %xmm9
+ vmovdqu (%rcx), %xmm0
+ vmovdqu 16(%rcx), %xmm1
+ vpxor %xmm0, %xmm8, %xmm8
+ vpxor %xmm1, %xmm9, %xmm9
+ vmovdqu %xmm8, (%rdx)
+ vmovdqu %xmm9, 16(%rdx)
+ vaesenclast %xmm7, %xmm10, %xmm10
+ vaesenclast %xmm7, %xmm11, %xmm11
+ vmovdqu 32(%rcx), %xmm0
+ vmovdqu 48(%rcx), %xmm1
+ vpxor %xmm0, %xmm10, %xmm10
+ vpxor %xmm1, %xmm11, %xmm11
+ vmovdqu %xmm10, 32(%rdx)
+ vmovdqu %xmm11, 48(%rdx)
+ vaesenclast %xmm7, %xmm12, %xmm12
+ vaesenclast %xmm7, %xmm13, %xmm13
+ vmovdqu 64(%rcx), %xmm0
+ vmovdqu 80(%rcx), %xmm1
+ vpxor %xmm0, %xmm12, %xmm12
+ vpxor %xmm1, %xmm13, %xmm13
+ vmovdqu %xmm12, 64(%rdx)
+ vmovdqu %xmm13, 80(%rdx)
+ vaesenclast %xmm7, %xmm14, %xmm14
+ vaesenclast %xmm7, %xmm15, %xmm15
+ vmovdqu 96(%rcx), %xmm0
+ vmovdqu 112(%rcx), %xmm1
+ vpxor %xmm0, %xmm14, %xmm14
+ vpxor %xmm1, %xmm15, %xmm15
+ vmovdqu %xmm14, 96(%rdx)
+ vmovdqu %xmm15, 112(%rdx)
+ addl $0x80, %ebx
+ cmpl %r13d, %ebx
+ jl L_AES_GCM_decrypt_avx1_ghash_128
+ vmovdqa %xmm2, %xmm6
+ vmovdqa (%rsp), %xmm5
+L_AES_GCM_decrypt_avx1_done_128:
+ movl %r9d, %edx
+ cmpl %edx, %ebx
+ jge L_AES_GCM_decrypt_avx1_done_dec
+ movl %r9d, %r13d
+ andl $0xfffffff0, %r13d
+ cmpl %r13d, %ebx
+ jge L_AES_GCM_decrypt_avx1_last_block_done
+L_AES_GCM_decrypt_avx1_last_block_start:
+ vmovdqu (%rdi,%rbx,1), %xmm13
+ vmovdqa %xmm5, %xmm0
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm13, %xmm1
+ vpxor %xmm6, %xmm1, %xmm1
+ vmovdqa 128(%rsp), %xmm9
+ vpshufb L_avx1_aes_gcm_bswap_epi64(%rip), %xmm9, %xmm8
+ vpaddd L_avx1_aes_gcm_one(%rip), %xmm9, %xmm9
+ vmovdqa %xmm9, 128(%rsp)
+ vpxor (%r15), %xmm8, %xmm8
+ vpclmulqdq $16, %xmm0, %xmm1, %xmm10
+ vaesenc 16(%r15), %xmm8, %xmm8
+ vaesenc 32(%r15), %xmm8, %xmm8
+ vpclmulqdq $0x01, %xmm0, %xmm1, %xmm11
+ vaesenc 48(%r15), %xmm8, %xmm8
+ vaesenc 64(%r15), %xmm8, %xmm8
+ vpclmulqdq $0x00, %xmm0, %xmm1, %xmm12
+ vaesenc 80(%r15), %xmm8, %xmm8
+ vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1
+ vaesenc 96(%r15), %xmm8, %xmm8
+ vpxor %xmm11, %xmm10, %xmm10
+ vpslldq $8, %xmm10, %xmm2
+ vpsrldq $8, %xmm10, %xmm10
+ vaesenc 112(%r15), %xmm8, %xmm8
+ vpxor %xmm12, %xmm2, %xmm2
+ vpxor %xmm10, %xmm1, %xmm3
+ vmovdqa L_avx1_aes_gcm_mod2_128(%rip), %xmm0
+ vpclmulqdq $16, %xmm0, %xmm2, %xmm11
+ vaesenc 128(%r15), %xmm8, %xmm8
+ vpshufd $0x4e, %xmm2, %xmm10
+ vpxor %xmm11, %xmm10, %xmm10
+ vpclmulqdq $16, %xmm0, %xmm10, %xmm11
+ vaesenc 144(%r15), %xmm8, %xmm8
+ vpshufd $0x4e, %xmm10, %xmm10
+ vpxor %xmm11, %xmm10, %xmm10
+ vpxor %xmm3, %xmm10, %xmm6
+ cmpl $11, %r10d
+ vmovdqa 160(%r15), %xmm9
+ jl L_AES_GCM_decrypt_avx1_aesenc_gfmul_last
+ vaesenc %xmm9, %xmm8, %xmm8
+ vaesenc 176(%r15), %xmm8, %xmm8
+ cmpl $13, %r10d
+ vmovdqa 192(%r15), %xmm9
+ jl L_AES_GCM_decrypt_avx1_aesenc_gfmul_last
+ vaesenc %xmm9, %xmm8, %xmm8
+ vaesenc 208(%r15), %xmm8, %xmm8
+ vmovdqa 224(%r15), %xmm9
+L_AES_GCM_decrypt_avx1_aesenc_gfmul_last:
+ vaesenclast %xmm9, %xmm8, %xmm8
+ vmovdqa %xmm13, %xmm0
+ vpxor %xmm0, %xmm8, %xmm8
+ vmovdqu %xmm8, (%rsi,%rbx,1)
+ addl $16, %ebx
+ cmpl %r13d, %ebx
+ jl L_AES_GCM_decrypt_avx1_last_block_start
+L_AES_GCM_decrypt_avx1_last_block_done:
+ movl %r9d, %ecx
+ movl %ecx, %edx
+ andl $15, %ecx
+ jz L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_done
+ vmovdqa 128(%rsp), %xmm4
+ vpshufb L_avx1_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm4
+ vpxor (%r15), %xmm4, %xmm4
+ vaesenc 16(%r15), %xmm4, %xmm4
+ vaesenc 32(%r15), %xmm4, %xmm4
+ vaesenc 48(%r15), %xmm4, %xmm4
+ vaesenc 64(%r15), %xmm4, %xmm4
+ vaesenc 80(%r15), %xmm4, %xmm4
+ vaesenc 96(%r15), %xmm4, %xmm4
+ vaesenc 112(%r15), %xmm4, %xmm4
+ vaesenc 128(%r15), %xmm4, %xmm4
+ vaesenc 144(%r15), %xmm4, %xmm4
+ cmpl $11, %r10d
+ vmovdqa 160(%r15), %xmm9
+ jl L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_aesenc_avx_last
+ vaesenc %xmm9, %xmm4, %xmm4
+ vaesenc 176(%r15), %xmm4, %xmm4
+ cmpl $13, %r10d
+ vmovdqa 192(%r15), %xmm9
+ jl L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_aesenc_avx_last
+ vaesenc %xmm9, %xmm4, %xmm4
+ vaesenc 208(%r15), %xmm4, %xmm4
+ vmovdqa 224(%r15), %xmm9
+L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_aesenc_avx_last:
+ vaesenclast %xmm9, %xmm4, %xmm4
+ subq $32, %rsp
+ xorl %ecx, %ecx
+ vmovdqa %xmm4, (%rsp)
+ vpxor %xmm0, %xmm0, %xmm0
+ vmovdqa %xmm0, 16(%rsp)
+L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_loop:
+ movzbl (%rdi,%rbx,1), %r13d
+ movb %r13b, 16(%rsp,%rcx,1)
+ xorb (%rsp,%rcx,1), %r13b
+ movb %r13b, (%rsi,%rbx,1)
+ incl %ebx
+ incl %ecx
+ cmpl %edx, %ebx
+ jl L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_loop
+ vmovdqa 16(%rsp), %xmm4
+ addq $32, %rsp
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4
+ vpxor %xmm4, %xmm6, %xmm6
+ # ghash_gfmul_red_avx
+ vpshufd $0x4e, %xmm5, %xmm9
+ vpshufd $0x4e, %xmm6, %xmm10
+ vpclmulqdq $0x11, %xmm5, %xmm6, %xmm11
+ vpclmulqdq $0x00, %xmm5, %xmm6, %xmm8
+ vpxor %xmm5, %xmm9, %xmm9
+ vpxor %xmm6, %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9
+ vpxor %xmm8, %xmm9, %xmm9
+ vpxor %xmm11, %xmm9, %xmm9
+ vpslldq $8, %xmm9, %xmm10
+ vpsrldq $8, %xmm9, %xmm9
+ vpxor %xmm10, %xmm8, %xmm8
+ vpxor %xmm9, %xmm11, %xmm6
+ vpslld $31, %xmm8, %xmm12
+ vpslld $30, %xmm8, %xmm13
+ vpslld $25, %xmm8, %xmm14
+ vpxor %xmm13, %xmm12, %xmm12
+ vpxor %xmm14, %xmm12, %xmm12
+ vpsrldq $4, %xmm12, %xmm13
+ vpslldq $12, %xmm12, %xmm12
+ vpxor %xmm12, %xmm8, %xmm8
+ vpsrld $0x01, %xmm8, %xmm14
+ vpsrld $2, %xmm8, %xmm10
+ vpsrld $7, %xmm8, %xmm9
+ vpxor %xmm10, %xmm14, %xmm14
+ vpxor %xmm9, %xmm14, %xmm14
+ vpxor %xmm13, %xmm14, %xmm14
+ vpxor %xmm8, %xmm14, %xmm14
+ vpxor %xmm14, %xmm6, %xmm6
+L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_done:
+L_AES_GCM_decrypt_avx1_done_dec:
+ movl %r9d, %edx
+ movl %r11d, %ecx
+ shlq $3, %rdx
+ shlq $3, %rcx
+ vpinsrq $0x00, %rdx, %xmm0, %xmm0
+ vpinsrq $0x01, %rcx, %xmm0, %xmm0
+ vpxor %xmm0, %xmm6, %xmm6
+ # ghash_gfmul_red_avx
+ vpshufd $0x4e, %xmm5, %xmm9
+ vpshufd $0x4e, %xmm6, %xmm10
+ vpclmulqdq $0x11, %xmm5, %xmm6, %xmm11
+ vpclmulqdq $0x00, %xmm5, %xmm6, %xmm8
+ vpxor %xmm5, %xmm9, %xmm9
+ vpxor %xmm6, %xmm10, %xmm10
+ vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9
+ vpxor %xmm8, %xmm9, %xmm9
+ vpxor %xmm11, %xmm9, %xmm9
+ vpslldq $8, %xmm9, %xmm10
+ vpsrldq $8, %xmm9, %xmm9
+ vpxor %xmm10, %xmm8, %xmm8
+ vpxor %xmm9, %xmm11, %xmm6
+ vpslld $31, %xmm8, %xmm12
+ vpslld $30, %xmm8, %xmm13
+ vpslld $25, %xmm8, %xmm14
+ vpxor %xmm13, %xmm12, %xmm12
+ vpxor %xmm14, %xmm12, %xmm12
+ vpsrldq $4, %xmm12, %xmm13
+ vpslldq $12, %xmm12, %xmm12
+ vpxor %xmm12, %xmm8, %xmm8
+ vpsrld $0x01, %xmm8, %xmm14
+ vpsrld $2, %xmm8, %xmm10
+ vpsrld $7, %xmm8, %xmm9
+ vpxor %xmm10, %xmm14, %xmm14
+ vpxor %xmm9, %xmm14, %xmm14
+ vpxor %xmm13, %xmm14, %xmm14
+ vpxor %xmm8, %xmm14, %xmm14
+ vpxor %xmm14, %xmm6, %xmm6
+ vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm6, %xmm6
+ vpxor 144(%rsp), %xmm6, %xmm0
+ cmpl $16, %r14d
+ je L_AES_GCM_decrypt_avx1_cmp_tag_16
+ subq $16, %rsp
+ xorq %rcx, %rcx
+ xorq %rbx, %rbx
+ vmovdqa %xmm0, (%rsp)
+L_AES_GCM_decrypt_avx1_cmp_tag_loop:
+ movzbl (%rsp,%rcx,1), %r13d
+ xorb (%r8,%rcx,1), %r13b
+ orb %r13b, %bl
+ incl %ecx
+ cmpl %r14d, %ecx
+ jne L_AES_GCM_decrypt_avx1_cmp_tag_loop
+ cmpb $0x00, %bl
+ sete %bl
+ addq $16, %rsp
+ xorq %rcx, %rcx
+ jmp L_AES_GCM_decrypt_avx1_cmp_tag_done
+L_AES_GCM_decrypt_avx1_cmp_tag_16:
+ vmovdqu (%r8), %xmm1
+ vpcmpeqb %xmm1, %xmm0, %xmm0
+ vpmovmskb %xmm0, %rdx
+ # %%edx == 0xFFFF then return 1 else => return 0
+ xorl %ebx, %ebx
+ cmpl $0xffff, %edx
+ sete %bl
+L_AES_GCM_decrypt_avx1_cmp_tag_done:
+ movl %ebx, (%rbp)
+ vzeroupper
+ addq $0xa8, %rsp
+ popq %rbp
+ popq %r15
+ popq %r14
+ popq %rbx
+ popq %r12
+ popq %r13
+ repz retq
+#ifndef __APPLE__
+.size AES_GCM_decrypt_avx1,.-AES_GCM_decrypt_avx1
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX1 */
+#ifdef HAVE_INTEL_AVX2
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx2_aes_gcm_one:
+.quad 0x0, 0x1
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx2_aes_gcm_two:
+.quad 0x0, 0x2
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx2_aes_gcm_three:
+.quad 0x0, 0x3
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx2_aes_gcm_four:
+.quad 0x0, 0x4
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx2_aes_gcm_five:
+.quad 0x0, 0x5
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx2_aes_gcm_six:
+.quad 0x0, 0x6
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx2_aes_gcm_seven:
+.quad 0x0, 0x7
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx2_aes_gcm_eight:
+.quad 0x0, 0x8
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx2_aes_gcm_bswap_one:
+.quad 0x0, 0x100000000000000
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx2_aes_gcm_bswap_epi64:
+.quad 0x1020304050607, 0x8090a0b0c0d0e0f
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx2_aes_gcm_bswap_mask:
+.quad 0x8090a0b0c0d0e0f, 0x1020304050607
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx2_aes_gcm_mod2_128:
+.quad 0x1, 0xc200000000000000
+#ifndef __APPLE__
+.text
+.globl AES_GCM_encrypt_avx2
+.type AES_GCM_encrypt_avx2,@function
+.align 4
+AES_GCM_encrypt_avx2:
+#else
+.section __TEXT,__text
+.globl _AES_GCM_encrypt_avx2
+.p2align 2
+_AES_GCM_encrypt_avx2:
+#endif /* __APPLE__ */
+ pushq %r13
+ pushq %r12
+ pushq %r15
+ pushq %rbx
+ pushq %r14
+ movq %rdx, %r12
+ movq %rcx, %rax
+ movq %r8, %r15
+ movq %rsi, %r8
+ movl %r9d, %r10d
+ movl 48(%rsp), %r11d
+ movl 56(%rsp), %ebx
+ movl 64(%rsp), %r14d
+ movq 72(%rsp), %rsi
+ movl 80(%rsp), %r9d
+ subq $0xa0, %rsp
+ vpxor %xmm4, %xmm4, %xmm4
+ vpxor %xmm6, %xmm6, %xmm6
+ movl %ebx, %edx
+ cmpl $12, %edx
+ je L_AES_GCM_encrypt_avx2_iv_12
+ # Calculate values when IV is not 12 bytes
+ # H = Encrypt X(=0)
+ vmovdqa (%rsi), %xmm5
+ vaesenc 16(%rsi), %xmm5, %xmm5
+ vaesenc 32(%rsi), %xmm5, %xmm5
+ vaesenc 48(%rsi), %xmm5, %xmm5
+ vaesenc 64(%rsi), %xmm5, %xmm5
+ vaesenc 80(%rsi), %xmm5, %xmm5
+ vaesenc 96(%rsi), %xmm5, %xmm5
+ vaesenc 112(%rsi), %xmm5, %xmm5
+ vaesenc 128(%rsi), %xmm5, %xmm5
+ vaesenc 144(%rsi), %xmm5, %xmm5
+ cmpl $11, %r9d
+ vmovdqa 160(%rsi), %xmm0
+ jl L_AES_GCM_encrypt_avx2_calc_iv_1_aesenc_avx_last
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc 176(%rsi), %xmm5, %xmm5
+ cmpl $13, %r9d
+ vmovdqa 192(%rsi), %xmm0
+ jl L_AES_GCM_encrypt_avx2_calc_iv_1_aesenc_avx_last
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc 208(%rsi), %xmm5, %xmm5
+ vmovdqa 224(%rsi), %xmm0
+L_AES_GCM_encrypt_avx2_calc_iv_1_aesenc_avx_last:
+ vaesenclast %xmm0, %xmm5, %xmm5
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm5, %xmm5
+ # Calc counter
+ # Initialization vector
+ cmpl $0x00, %edx
+ movq $0x00, %rcx
+ je L_AES_GCM_encrypt_avx2_calc_iv_done
+ cmpl $16, %edx
+ jl L_AES_GCM_encrypt_avx2_calc_iv_lt16
+ andl $0xfffffff0, %edx
+L_AES_GCM_encrypt_avx2_calc_iv_16_loop:
+ vmovdqu (%rax,%rcx,1), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vpxor %xmm0, %xmm4, %xmm4
+ # ghash_gfmul_avx
+ vpclmulqdq $16, %xmm4, %xmm5, %xmm2
+ vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1
+ vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0
+ vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3
+ vpxor %xmm1, %xmm2, %xmm2
+ vpslldq $8, %xmm2, %xmm1
+ vpsrldq $8, %xmm2, %xmm2
+ vpxor %xmm1, %xmm0, %xmm7
+ vpxor %xmm2, %xmm3, %xmm4
+ # ghash_mid
+ vpsrld $31, %xmm7, %xmm0
+ vpsrld $31, %xmm4, %xmm1
+ vpslld $0x01, %xmm7, %xmm7
+ vpslld $0x01, %xmm4, %xmm4
+ vpsrldq $12, %xmm0, %xmm2
+ vpslldq $4, %xmm0, %xmm0
+ vpslldq $4, %xmm1, %xmm1
+ vpor %xmm2, %xmm4, %xmm4
+ vpor %xmm0, %xmm7, %xmm7
+ vpor %xmm1, %xmm4, %xmm4
+ # ghash_red
+ vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2
+ vpclmulqdq $16, %xmm2, %xmm7, %xmm0
+ vpshufd $0x4e, %xmm7, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpclmulqdq $16, %xmm2, %xmm1, %xmm0
+ vpshufd $0x4e, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm1, %xmm4, %xmm4
+ addl $16, %ecx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_encrypt_avx2_calc_iv_16_loop
+ movl %ebx, %edx
+ cmpl %edx, %ecx
+ je L_AES_GCM_encrypt_avx2_calc_iv_done
+L_AES_GCM_encrypt_avx2_calc_iv_lt16:
+ vpxor %xmm0, %xmm0, %xmm0
+ xorl %ebx, %ebx
+ vmovdqa %xmm0, (%rsp)
+L_AES_GCM_encrypt_avx2_calc_iv_loop:
+ movzbl (%rax,%rcx,1), %r13d
+ movb %r13b, (%rsp,%rbx,1)
+ incl %ecx
+ incl %ebx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_encrypt_avx2_calc_iv_loop
+ vmovdqa (%rsp), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vpxor %xmm0, %xmm4, %xmm4
+ # ghash_gfmul_avx
+ vpclmulqdq $16, %xmm4, %xmm5, %xmm2
+ vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1
+ vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0
+ vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3
+ vpxor %xmm1, %xmm2, %xmm2
+ vpslldq $8, %xmm2, %xmm1
+ vpsrldq $8, %xmm2, %xmm2
+ vpxor %xmm1, %xmm0, %xmm7
+ vpxor %xmm2, %xmm3, %xmm4
+ # ghash_mid
+ vpsrld $31, %xmm7, %xmm0
+ vpsrld $31, %xmm4, %xmm1
+ vpslld $0x01, %xmm7, %xmm7
+ vpslld $0x01, %xmm4, %xmm4
+ vpsrldq $12, %xmm0, %xmm2
+ vpslldq $4, %xmm0, %xmm0
+ vpslldq $4, %xmm1, %xmm1
+ vpor %xmm2, %xmm4, %xmm4
+ vpor %xmm0, %xmm7, %xmm7
+ vpor %xmm1, %xmm4, %xmm4
+ # ghash_red
+ vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2
+ vpclmulqdq $16, %xmm2, %xmm7, %xmm0
+ vpshufd $0x4e, %xmm7, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpclmulqdq $16, %xmm2, %xmm1, %xmm0
+ vpshufd $0x4e, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm1, %xmm4, %xmm4
+L_AES_GCM_encrypt_avx2_calc_iv_done:
+ # T = Encrypt counter
+ vpxor %xmm0, %xmm0, %xmm0
+ shll $3, %edx
+ vpinsrq $0x00, %rdx, %xmm0, %xmm0
+ vpxor %xmm0, %xmm4, %xmm4
+ # ghash_gfmul_avx
+ vpclmulqdq $16, %xmm4, %xmm5, %xmm2
+ vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1
+ vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0
+ vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3
+ vpxor %xmm1, %xmm2, %xmm2
+ vpslldq $8, %xmm2, %xmm1
+ vpsrldq $8, %xmm2, %xmm2
+ vpxor %xmm1, %xmm0, %xmm7
+ vpxor %xmm2, %xmm3, %xmm4
+ # ghash_mid
+ vpsrld $31, %xmm7, %xmm0
+ vpsrld $31, %xmm4, %xmm1
+ vpslld $0x01, %xmm7, %xmm7
+ vpslld $0x01, %xmm4, %xmm4
+ vpsrldq $12, %xmm0, %xmm2
+ vpslldq $4, %xmm0, %xmm0
+ vpslldq $4, %xmm1, %xmm1
+ vpor %xmm2, %xmm4, %xmm4
+ vpor %xmm0, %xmm7, %xmm7
+ vpor %xmm1, %xmm4, %xmm4
+ # ghash_red
+ vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2
+ vpclmulqdq $16, %xmm2, %xmm7, %xmm0
+ vpshufd $0x4e, %xmm7, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpclmulqdq $16, %xmm2, %xmm1, %xmm0
+ vpshufd $0x4e, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm1, %xmm4, %xmm4
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4
+ # Encrypt counter
+ vmovdqa (%rsi), %xmm15
+ vpxor %xmm4, %xmm15, %xmm15
+ vaesenc 16(%rsi), %xmm15, %xmm15
+ vaesenc 32(%rsi), %xmm15, %xmm15
+ vaesenc 48(%rsi), %xmm15, %xmm15
+ vaesenc 64(%rsi), %xmm15, %xmm15
+ vaesenc 80(%rsi), %xmm15, %xmm15
+ vaesenc 96(%rsi), %xmm15, %xmm15
+ vaesenc 112(%rsi), %xmm15, %xmm15
+ vaesenc 128(%rsi), %xmm15, %xmm15
+ vaesenc 144(%rsi), %xmm15, %xmm15
+ cmpl $11, %r9d
+ vmovdqa 160(%rsi), %xmm0
+ jl L_AES_GCM_encrypt_avx2_calc_iv_2_aesenc_avx_last
+ vaesenc %xmm0, %xmm15, %xmm15
+ vaesenc 176(%rsi), %xmm15, %xmm15
+ cmpl $13, %r9d
+ vmovdqa 192(%rsi), %xmm0
+ jl L_AES_GCM_encrypt_avx2_calc_iv_2_aesenc_avx_last
+ vaesenc %xmm0, %xmm15, %xmm15
+ vaesenc 208(%rsi), %xmm15, %xmm15
+ vmovdqa 224(%rsi), %xmm0
+L_AES_GCM_encrypt_avx2_calc_iv_2_aesenc_avx_last:
+ vaesenclast %xmm0, %xmm15, %xmm15
+ jmp L_AES_GCM_encrypt_avx2_iv_done
+L_AES_GCM_encrypt_avx2_iv_12:
+ # # Calculate values when IV is 12 bytes
+ # Set counter based on IV
+ vmovdqa L_avx2_aes_gcm_bswap_one(%rip), %xmm4
+ vmovdqa (%rsi), %xmm5
+ vpblendd $7, (%rax), %xmm4, %xmm4
+ # H = Encrypt X(=0) and T = Encrypt counter
+ vmovdqa 16(%rsi), %xmm7
+ vpxor %xmm5, %xmm4, %xmm15
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 32(%rsi), %xmm0
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ vmovdqa 48(%rsi), %xmm0
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ vmovdqa 64(%rsi), %xmm0
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ vmovdqa 80(%rsi), %xmm0
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ vmovdqa 96(%rsi), %xmm0
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ vmovdqa 112(%rsi), %xmm0
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ vmovdqa 128(%rsi), %xmm0
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ vmovdqa 144(%rsi), %xmm0
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ cmpl $11, %r9d
+ vmovdqa 160(%rsi), %xmm0
+ jl L_AES_GCM_encrypt_avx2_calc_iv_12_last
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ vmovdqa 176(%rsi), %xmm0
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ cmpl $13, %r9d
+ vmovdqa 192(%rsi), %xmm0
+ jl L_AES_GCM_encrypt_avx2_calc_iv_12_last
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ vmovdqa 208(%rsi), %xmm0
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ vmovdqa 224(%rsi), %xmm0
+L_AES_GCM_encrypt_avx2_calc_iv_12_last:
+ vaesenclast %xmm0, %xmm5, %xmm5
+ vaesenclast %xmm0, %xmm15, %xmm15
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm5, %xmm5
+L_AES_GCM_encrypt_avx2_iv_done:
+ # Additional authentication data
+ movl %r11d, %edx
+ cmpl $0x00, %edx
+ je L_AES_GCM_encrypt_avx2_calc_aad_done
+ xorl %ecx, %ecx
+ cmpl $16, %edx
+ jl L_AES_GCM_encrypt_avx2_calc_aad_lt16
+ andl $0xfffffff0, %edx
+L_AES_GCM_encrypt_avx2_calc_aad_16_loop:
+ vmovdqu (%r12,%rcx,1), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vpxor %xmm0, %xmm6, %xmm6
+ # ghash_gfmul_avx
+ vpclmulqdq $16, %xmm6, %xmm5, %xmm2
+ vpclmulqdq $0x01, %xmm6, %xmm5, %xmm1
+ vpclmulqdq $0x00, %xmm6, %xmm5, %xmm0
+ vpclmulqdq $0x11, %xmm6, %xmm5, %xmm3
+ vpxor %xmm1, %xmm2, %xmm2
+ vpslldq $8, %xmm2, %xmm1
+ vpsrldq $8, %xmm2, %xmm2
+ vpxor %xmm1, %xmm0, %xmm7
+ vpxor %xmm2, %xmm3, %xmm6
+ # ghash_mid
+ vpsrld $31, %xmm7, %xmm0
+ vpsrld $31, %xmm6, %xmm1
+ vpslld $0x01, %xmm7, %xmm7
+ vpslld $0x01, %xmm6, %xmm6
+ vpsrldq $12, %xmm0, %xmm2
+ vpslldq $4, %xmm0, %xmm0
+ vpslldq $4, %xmm1, %xmm1
+ vpor %xmm2, %xmm6, %xmm6
+ vpor %xmm0, %xmm7, %xmm7
+ vpor %xmm1, %xmm6, %xmm6
+ # ghash_red
+ vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2
+ vpclmulqdq $16, %xmm2, %xmm7, %xmm0
+ vpshufd $0x4e, %xmm7, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpclmulqdq $16, %xmm2, %xmm1, %xmm0
+ vpshufd $0x4e, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm1, %xmm6, %xmm6
+ addl $16, %ecx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_encrypt_avx2_calc_aad_16_loop
+ movl %r11d, %edx
+ cmpl %edx, %ecx
+ je L_AES_GCM_encrypt_avx2_calc_aad_done
+L_AES_GCM_encrypt_avx2_calc_aad_lt16:
+ vpxor %xmm0, %xmm0, %xmm0
+ xorl %ebx, %ebx
+ vmovdqa %xmm0, (%rsp)
+L_AES_GCM_encrypt_avx2_calc_aad_loop:
+ movzbl (%r12,%rcx,1), %r13d
+ movb %r13b, (%rsp,%rbx,1)
+ incl %ecx
+ incl %ebx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_encrypt_avx2_calc_aad_loop
+ vmovdqa (%rsp), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vpxor %xmm0, %xmm6, %xmm6
+ # ghash_gfmul_avx
+ vpclmulqdq $16, %xmm6, %xmm5, %xmm2
+ vpclmulqdq $0x01, %xmm6, %xmm5, %xmm1
+ vpclmulqdq $0x00, %xmm6, %xmm5, %xmm0
+ vpclmulqdq $0x11, %xmm6, %xmm5, %xmm3
+ vpxor %xmm1, %xmm2, %xmm2
+ vpslldq $8, %xmm2, %xmm1
+ vpsrldq $8, %xmm2, %xmm2
+ vpxor %xmm1, %xmm0, %xmm7
+ vpxor %xmm2, %xmm3, %xmm6
+ # ghash_mid
+ vpsrld $31, %xmm7, %xmm0
+ vpsrld $31, %xmm6, %xmm1
+ vpslld $0x01, %xmm7, %xmm7
+ vpslld $0x01, %xmm6, %xmm6
+ vpsrldq $12, %xmm0, %xmm2
+ vpslldq $4, %xmm0, %xmm0
+ vpslldq $4, %xmm1, %xmm1
+ vpor %xmm2, %xmm6, %xmm6
+ vpor %xmm0, %xmm7, %xmm7
+ vpor %xmm1, %xmm6, %xmm6
+ # ghash_red
+ vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2
+ vpclmulqdq $16, %xmm2, %xmm7, %xmm0
+ vpshufd $0x4e, %xmm7, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpclmulqdq $16, %xmm2, %xmm1, %xmm0
+ vpshufd $0x4e, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm1, %xmm6, %xmm6
+L_AES_GCM_encrypt_avx2_calc_aad_done:
+ # Calculate counter and H
+ vpsrlq $63, %xmm5, %xmm1
+ vpsllq $0x01, %xmm5, %xmm0
+ vpslldq $8, %xmm1, %xmm1
+ vpor %xmm1, %xmm0, %xmm0
+ vpshufd $0xff, %xmm5, %xmm5
+ vpsrad $31, %xmm5, %xmm5
+ vpshufb L_avx2_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm4
+ vpand L_avx2_aes_gcm_mod2_128(%rip), %xmm5, %xmm5
+ vpaddd L_avx2_aes_gcm_one(%rip), %xmm4, %xmm4
+ vpxor %xmm0, %xmm5, %xmm5
+ xorl %ebx, %ebx
+ cmpl $0x80, %r10d
+ movl %r10d, %r13d
+ jl L_AES_GCM_encrypt_avx2_done_128
+ andl $0xffffff80, %r13d
+ vmovdqa %xmm4, 128(%rsp)
+ vmovdqa %xmm15, 144(%rsp)
+ vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm3
+ # H ^ 1 and H ^ 2
+ vpclmulqdq $0x00, %xmm5, %xmm5, %xmm9
+ vpclmulqdq $0x11, %xmm5, %xmm5, %xmm10
+ vpclmulqdq $16, %xmm3, %xmm9, %xmm8
+ vpshufd $0x4e, %xmm9, %xmm9
+ vpxor %xmm8, %xmm9, %xmm9
+ vpclmulqdq $16, %xmm3, %xmm9, %xmm8
+ vpshufd $0x4e, %xmm9, %xmm9
+ vpxor %xmm8, %xmm9, %xmm9
+ vpxor %xmm9, %xmm10, %xmm0
+ vmovdqa %xmm5, (%rsp)
+ vmovdqa %xmm0, 16(%rsp)
+ # H ^ 3 and H ^ 4
+ vpclmulqdq $16, %xmm5, %xmm0, %xmm11
+ vpclmulqdq $0x01, %xmm5, %xmm0, %xmm10
+ vpclmulqdq $0x00, %xmm5, %xmm0, %xmm9
+ vpclmulqdq $0x11, %xmm5, %xmm0, %xmm12
+ vpclmulqdq $0x00, %xmm0, %xmm0, %xmm13
+ vpclmulqdq $0x11, %xmm0, %xmm0, %xmm14
+ vpxor %xmm10, %xmm11, %xmm11
+ vpslldq $8, %xmm11, %xmm10
+ vpsrldq $8, %xmm11, %xmm11
+ vpxor %xmm9, %xmm10, %xmm10
+ vpclmulqdq $16, %xmm3, %xmm13, %xmm8
+ vpclmulqdq $16, %xmm3, %xmm10, %xmm9
+ vpshufd $0x4e, %xmm10, %xmm10
+ vpshufd $0x4e, %xmm13, %xmm13
+ vpxor %xmm9, %xmm10, %xmm10
+ vpxor %xmm8, %xmm13, %xmm13
+ vpclmulqdq $16, %xmm3, %xmm10, %xmm9
+ vpclmulqdq $16, %xmm3, %xmm13, %xmm8
+ vpshufd $0x4e, %xmm10, %xmm10
+ vpshufd $0x4e, %xmm13, %xmm13
+ vpxor %xmm11, %xmm12, %xmm12
+ vpxor %xmm8, %xmm13, %xmm13
+ vpxor %xmm12, %xmm10, %xmm10
+ vpxor %xmm14, %xmm13, %xmm2
+ vpxor %xmm9, %xmm10, %xmm1
+ vmovdqa %xmm1, 32(%rsp)
+ vmovdqa %xmm2, 48(%rsp)
+ # H ^ 5 and H ^ 6
+ vpclmulqdq $16, %xmm0, %xmm1, %xmm11
+ vpclmulqdq $0x01, %xmm0, %xmm1, %xmm10
+ vpclmulqdq $0x00, %xmm0, %xmm1, %xmm9
+ vpclmulqdq $0x11, %xmm0, %xmm1, %xmm12
+ vpclmulqdq $0x00, %xmm1, %xmm1, %xmm13
+ vpclmulqdq $0x11, %xmm1, %xmm1, %xmm14
+ vpxor %xmm10, %xmm11, %xmm11
+ vpslldq $8, %xmm11, %xmm10
+ vpsrldq $8, %xmm11, %xmm11
+ vpxor %xmm9, %xmm10, %xmm10
+ vpclmulqdq $16, %xmm3, %xmm13, %xmm8
+ vpclmulqdq $16, %xmm3, %xmm10, %xmm9
+ vpshufd $0x4e, %xmm10, %xmm10
+ vpshufd $0x4e, %xmm13, %xmm13
+ vpxor %xmm9, %xmm10, %xmm10
+ vpxor %xmm8, %xmm13, %xmm13
+ vpclmulqdq $16, %xmm3, %xmm10, %xmm9
+ vpclmulqdq $16, %xmm3, %xmm13, %xmm8
+ vpshufd $0x4e, %xmm10, %xmm10
+ vpshufd $0x4e, %xmm13, %xmm13
+ vpxor %xmm11, %xmm12, %xmm12
+ vpxor %xmm8, %xmm13, %xmm13
+ vpxor %xmm12, %xmm10, %xmm10
+ vpxor %xmm14, %xmm13, %xmm0
+ vpxor %xmm9, %xmm10, %xmm7
+ vmovdqa %xmm7, 64(%rsp)
+ vmovdqa %xmm0, 80(%rsp)
+ # H ^ 7 and H ^ 8
+ vpclmulqdq $16, %xmm1, %xmm2, %xmm11
+ vpclmulqdq $0x01, %xmm1, %xmm2, %xmm10
+ vpclmulqdq $0x00, %xmm1, %xmm2, %xmm9
+ vpclmulqdq $0x11, %xmm1, %xmm2, %xmm12
+ vpclmulqdq $0x00, %xmm2, %xmm2, %xmm13
+ vpclmulqdq $0x11, %xmm2, %xmm2, %xmm14
+ vpxor %xmm10, %xmm11, %xmm11
+ vpslldq $8, %xmm11, %xmm10
+ vpsrldq $8, %xmm11, %xmm11
+ vpxor %xmm9, %xmm10, %xmm10
+ vpclmulqdq $16, %xmm3, %xmm13, %xmm8
+ vpclmulqdq $16, %xmm3, %xmm10, %xmm9
+ vpshufd $0x4e, %xmm10, %xmm10
+ vpshufd $0x4e, %xmm13, %xmm13
+ vpxor %xmm9, %xmm10, %xmm10
+ vpxor %xmm8, %xmm13, %xmm13
+ vpclmulqdq $16, %xmm3, %xmm10, %xmm9
+ vpclmulqdq $16, %xmm3, %xmm13, %xmm8
+ vpshufd $0x4e, %xmm10, %xmm10
+ vpshufd $0x4e, %xmm13, %xmm13
+ vpxor %xmm11, %xmm12, %xmm12
+ vpxor %xmm8, %xmm13, %xmm13
+ vpxor %xmm12, %xmm10, %xmm10
+ vpxor %xmm14, %xmm13, %xmm0
+ vpxor %xmm9, %xmm10, %xmm7
+ vmovdqa %xmm7, 96(%rsp)
+ vmovdqa %xmm0, 112(%rsp)
+ # First 128 bytes of input
+ # aesenc_128
+ # aesenc_ctr
+ vmovdqa 128(%rsp), %xmm0
+ vmovdqa L_avx2_aes_gcm_bswap_epi64(%rip), %xmm1
+ vpaddd L_avx2_aes_gcm_one(%rip), %xmm0, %xmm9
+ vpshufb %xmm1, %xmm0, %xmm8
+ vpaddd L_avx2_aes_gcm_two(%rip), %xmm0, %xmm10
+ vpshufb %xmm1, %xmm9, %xmm9
+ vpaddd L_avx2_aes_gcm_three(%rip), %xmm0, %xmm11
+ vpshufb %xmm1, %xmm10, %xmm10
+ vpaddd L_avx2_aes_gcm_four(%rip), %xmm0, %xmm12
+ vpshufb %xmm1, %xmm11, %xmm11
+ vpaddd L_avx2_aes_gcm_five(%rip), %xmm0, %xmm13
+ vpshufb %xmm1, %xmm12, %xmm12
+ vpaddd L_avx2_aes_gcm_six(%rip), %xmm0, %xmm14
+ vpshufb %xmm1, %xmm13, %xmm13
+ vpaddd L_avx2_aes_gcm_seven(%rip), %xmm0, %xmm15
+ vpshufb %xmm1, %xmm14, %xmm14
+ vpaddd L_avx2_aes_gcm_eight(%rip), %xmm0, %xmm0
+ vpshufb %xmm1, %xmm15, %xmm15
+ # aesenc_xor
+ vmovdqa (%rsi), %xmm7
+ vmovdqa %xmm0, 128(%rsp)
+ vpxor %xmm7, %xmm8, %xmm8
+ vpxor %xmm7, %xmm9, %xmm9
+ vpxor %xmm7, %xmm10, %xmm10
+ vpxor %xmm7, %xmm11, %xmm11
+ vpxor %xmm7, %xmm12, %xmm12
+ vpxor %xmm7, %xmm13, %xmm13
+ vpxor %xmm7, %xmm14, %xmm14
+ vpxor %xmm7, %xmm15, %xmm15
+ vmovdqa 16(%rsi), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 32(%rsi), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 48(%rsi), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 64(%rsi), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 80(%rsi), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 96(%rsi), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 112(%rsi), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 128(%rsi), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 144(%rsi), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ cmpl $11, %r9d
+ vmovdqa 160(%rsi), %xmm7
+ jl L_AES_GCM_encrypt_avx2_aesenc_128_enc_done
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 176(%rsi), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ cmpl $13, %r9d
+ vmovdqa 192(%rsi), %xmm7
+ jl L_AES_GCM_encrypt_avx2_aesenc_128_enc_done
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 208(%rsi), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 224(%rsi), %xmm7
+L_AES_GCM_encrypt_avx2_aesenc_128_enc_done:
+ # aesenc_last
+ vaesenclast %xmm7, %xmm8, %xmm8
+ vaesenclast %xmm7, %xmm9, %xmm9
+ vaesenclast %xmm7, %xmm10, %xmm10
+ vaesenclast %xmm7, %xmm11, %xmm11
+ vmovdqu (%rdi), %xmm0
+ vmovdqu 16(%rdi), %xmm1
+ vmovdqu 32(%rdi), %xmm2
+ vmovdqu 48(%rdi), %xmm3
+ vpxor %xmm0, %xmm8, %xmm8
+ vpxor %xmm1, %xmm9, %xmm9
+ vpxor %xmm2, %xmm10, %xmm10
+ vpxor %xmm3, %xmm11, %xmm11
+ vmovdqu %xmm8, (%r8)
+ vmovdqu %xmm9, 16(%r8)
+ vmovdqu %xmm10, 32(%r8)
+ vmovdqu %xmm11, 48(%r8)
+ vaesenclast %xmm7, %xmm12, %xmm12
+ vaesenclast %xmm7, %xmm13, %xmm13
+ vaesenclast %xmm7, %xmm14, %xmm14
+ vaesenclast %xmm7, %xmm15, %xmm15
+ vmovdqu 64(%rdi), %xmm0
+ vmovdqu 80(%rdi), %xmm1
+ vmovdqu 96(%rdi), %xmm2
+ vmovdqu 112(%rdi), %xmm3
+ vpxor %xmm0, %xmm12, %xmm12
+ vpxor %xmm1, %xmm13, %xmm13
+ vpxor %xmm2, %xmm14, %xmm14
+ vpxor %xmm3, %xmm15, %xmm15
+ vmovdqu %xmm12, 64(%r8)
+ vmovdqu %xmm13, 80(%r8)
+ vmovdqu %xmm14, 96(%r8)
+ vmovdqu %xmm15, 112(%r8)
+ cmpl $0x80, %r13d
+ movl $0x80, %ebx
+ jle L_AES_GCM_encrypt_avx2_end_128
+ # More 128 bytes of input
+L_AES_GCM_encrypt_avx2_ghash_128:
+ # aesenc_128_ghash
+ leaq (%rdi,%rbx,1), %rcx
+ leaq (%r8,%rbx,1), %rdx
+ # aesenc_ctr
+ vmovdqa 128(%rsp), %xmm0
+ vmovdqa L_avx2_aes_gcm_bswap_epi64(%rip), %xmm1
+ vpaddd L_avx2_aes_gcm_one(%rip), %xmm0, %xmm9
+ vpshufb %xmm1, %xmm0, %xmm8
+ vpaddd L_avx2_aes_gcm_two(%rip), %xmm0, %xmm10
+ vpshufb %xmm1, %xmm9, %xmm9
+ vpaddd L_avx2_aes_gcm_three(%rip), %xmm0, %xmm11
+ vpshufb %xmm1, %xmm10, %xmm10
+ vpaddd L_avx2_aes_gcm_four(%rip), %xmm0, %xmm12
+ vpshufb %xmm1, %xmm11, %xmm11
+ vpaddd L_avx2_aes_gcm_five(%rip), %xmm0, %xmm13
+ vpshufb %xmm1, %xmm12, %xmm12
+ vpaddd L_avx2_aes_gcm_six(%rip), %xmm0, %xmm14
+ vpshufb %xmm1, %xmm13, %xmm13
+ vpaddd L_avx2_aes_gcm_seven(%rip), %xmm0, %xmm15
+ vpshufb %xmm1, %xmm14, %xmm14
+ vpaddd L_avx2_aes_gcm_eight(%rip), %xmm0, %xmm0
+ vpshufb %xmm1, %xmm15, %xmm15
+ # aesenc_xor
+ vmovdqa (%rsi), %xmm7
+ vmovdqa %xmm0, 128(%rsp)
+ vpxor %xmm7, %xmm8, %xmm8
+ vpxor %xmm7, %xmm9, %xmm9
+ vpxor %xmm7, %xmm10, %xmm10
+ vpxor %xmm7, %xmm11, %xmm11
+ vpxor %xmm7, %xmm12, %xmm12
+ vpxor %xmm7, %xmm13, %xmm13
+ vpxor %xmm7, %xmm14, %xmm14
+ vpxor %xmm7, %xmm15, %xmm15
+ # aesenc_pclmul_1
+ vmovdqu -128(%rdx), %xmm1
+ vmovdqu 16(%rsi), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1
+ vmovdqa 112(%rsp), %xmm2
+ vpxor %xmm6, %xmm1, %xmm1
+ vpclmulqdq $16, %xmm2, %xmm1, %xmm5
+ vpclmulqdq $0x01, %xmm2, %xmm1, %xmm3
+ vpclmulqdq $0x00, %xmm2, %xmm1, %xmm6
+ vpclmulqdq $0x11, %xmm2, %xmm1, %xmm7
+ vaesenc %xmm0, %xmm8, %xmm8
+ vaesenc %xmm0, %xmm9, %xmm9
+ vaesenc %xmm0, %xmm10, %xmm10
+ vaesenc %xmm0, %xmm11, %xmm11
+ vaesenc %xmm0, %xmm12, %xmm12
+ vaesenc %xmm0, %xmm13, %xmm13
+ vaesenc %xmm0, %xmm14, %xmm14
+ vaesenc %xmm0, %xmm15, %xmm15
+ # aesenc_pclmul_2
+ vmovdqu -112(%rdx), %xmm1
+ vmovdqa 96(%rsp), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1
+ vpxor %xmm3, %xmm5, %xmm5
+ vpclmulqdq $16, %xmm0, %xmm1, %xmm2
+ vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3
+ vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4
+ vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1
+ vmovdqu 32(%rsi), %xmm0
+ vpxor %xmm1, %xmm7, %xmm7
+ vaesenc %xmm0, %xmm8, %xmm8
+ vaesenc %xmm0, %xmm9, %xmm9
+ vaesenc %xmm0, %xmm10, %xmm10
+ vaesenc %xmm0, %xmm11, %xmm11
+ vaesenc %xmm0, %xmm12, %xmm12
+ vaesenc %xmm0, %xmm13, %xmm13
+ vaesenc %xmm0, %xmm14, %xmm14
+ vaesenc %xmm0, %xmm15, %xmm15
+ # aesenc_pclmul_n
+ vmovdqu -96(%rdx), %xmm1
+ vmovdqa 80(%rsp), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1
+ vpxor %xmm2, %xmm5, %xmm5
+ vpclmulqdq $16, %xmm0, %xmm1, %xmm2
+ vpxor %xmm3, %xmm5, %xmm5
+ vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3
+ vpxor %xmm4, %xmm6, %xmm6
+ vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4
+ vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1
+ vmovdqu 48(%rsi), %xmm0
+ vpxor %xmm1, %xmm7, %xmm7
+ vaesenc %xmm0, %xmm8, %xmm8
+ vaesenc %xmm0, %xmm9, %xmm9
+ vaesenc %xmm0, %xmm10, %xmm10
+ vaesenc %xmm0, %xmm11, %xmm11
+ vaesenc %xmm0, %xmm12, %xmm12
+ vaesenc %xmm0, %xmm13, %xmm13
+ vaesenc %xmm0, %xmm14, %xmm14
+ vaesenc %xmm0, %xmm15, %xmm15
+ # aesenc_pclmul_n
+ vmovdqu -80(%rdx), %xmm1
+ vmovdqa 64(%rsp), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1
+ vpxor %xmm2, %xmm5, %xmm5
+ vpclmulqdq $16, %xmm0, %xmm1, %xmm2
+ vpxor %xmm3, %xmm5, %xmm5
+ vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3
+ vpxor %xmm4, %xmm6, %xmm6
+ vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4
+ vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1
+ vmovdqu 64(%rsi), %xmm0
+ vpxor %xmm1, %xmm7, %xmm7
+ vaesenc %xmm0, %xmm8, %xmm8
+ vaesenc %xmm0, %xmm9, %xmm9
+ vaesenc %xmm0, %xmm10, %xmm10
+ vaesenc %xmm0, %xmm11, %xmm11
+ vaesenc %xmm0, %xmm12, %xmm12
+ vaesenc %xmm0, %xmm13, %xmm13
+ vaesenc %xmm0, %xmm14, %xmm14
+ vaesenc %xmm0, %xmm15, %xmm15
+ # aesenc_pclmul_n
+ vmovdqu -64(%rdx), %xmm1
+ vmovdqa 48(%rsp), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1
+ vpxor %xmm2, %xmm5, %xmm5
+ vpclmulqdq $16, %xmm0, %xmm1, %xmm2
+ vpxor %xmm3, %xmm5, %xmm5
+ vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3
+ vpxor %xmm4, %xmm6, %xmm6
+ vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4
+ vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1
+ vmovdqu 80(%rsi), %xmm0
+ vpxor %xmm1, %xmm7, %xmm7
+ vaesenc %xmm0, %xmm8, %xmm8
+ vaesenc %xmm0, %xmm9, %xmm9
+ vaesenc %xmm0, %xmm10, %xmm10
+ vaesenc %xmm0, %xmm11, %xmm11
+ vaesenc %xmm0, %xmm12, %xmm12
+ vaesenc %xmm0, %xmm13, %xmm13
+ vaesenc %xmm0, %xmm14, %xmm14
+ vaesenc %xmm0, %xmm15, %xmm15
+ # aesenc_pclmul_n
+ vmovdqu -48(%rdx), %xmm1
+ vmovdqa 32(%rsp), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1
+ vpxor %xmm2, %xmm5, %xmm5
+ vpclmulqdq $16, %xmm0, %xmm1, %xmm2
+ vpxor %xmm3, %xmm5, %xmm5
+ vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3
+ vpxor %xmm4, %xmm6, %xmm6
+ vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4
+ vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1
+ vmovdqu 96(%rsi), %xmm0
+ vpxor %xmm1, %xmm7, %xmm7
+ vaesenc %xmm0, %xmm8, %xmm8
+ vaesenc %xmm0, %xmm9, %xmm9
+ vaesenc %xmm0, %xmm10, %xmm10
+ vaesenc %xmm0, %xmm11, %xmm11
+ vaesenc %xmm0, %xmm12, %xmm12
+ vaesenc %xmm0, %xmm13, %xmm13
+ vaesenc %xmm0, %xmm14, %xmm14
+ vaesenc %xmm0, %xmm15, %xmm15
+ # aesenc_pclmul_n
+ vmovdqu -32(%rdx), %xmm1
+ vmovdqa 16(%rsp), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1
+ vpxor %xmm2, %xmm5, %xmm5
+ vpclmulqdq $16, %xmm0, %xmm1, %xmm2
+ vpxor %xmm3, %xmm5, %xmm5
+ vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3
+ vpxor %xmm4, %xmm6, %xmm6
+ vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4
+ vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1
+ vmovdqu 112(%rsi), %xmm0
+ vpxor %xmm1, %xmm7, %xmm7
+ vaesenc %xmm0, %xmm8, %xmm8
+ vaesenc %xmm0, %xmm9, %xmm9
+ vaesenc %xmm0, %xmm10, %xmm10
+ vaesenc %xmm0, %xmm11, %xmm11
+ vaesenc %xmm0, %xmm12, %xmm12
+ vaesenc %xmm0, %xmm13, %xmm13
+ vaesenc %xmm0, %xmm14, %xmm14
+ vaesenc %xmm0, %xmm15, %xmm15
+ # aesenc_pclmul_n
+ vmovdqu -16(%rdx), %xmm1
+ vmovdqa (%rsp), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1
+ vpxor %xmm2, %xmm5, %xmm5
+ vpclmulqdq $16, %xmm0, %xmm1, %xmm2
+ vpxor %xmm3, %xmm5, %xmm5
+ vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3
+ vpxor %xmm4, %xmm6, %xmm6
+ vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4
+ vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1
+ vmovdqu 128(%rsi), %xmm0
+ vpxor %xmm1, %xmm7, %xmm7
+ vaesenc %xmm0, %xmm8, %xmm8
+ vaesenc %xmm0, %xmm9, %xmm9
+ vaesenc %xmm0, %xmm10, %xmm10
+ vaesenc %xmm0, %xmm11, %xmm11
+ vaesenc %xmm0, %xmm12, %xmm12
+ vaesenc %xmm0, %xmm13, %xmm13
+ vaesenc %xmm0, %xmm14, %xmm14
+ vaesenc %xmm0, %xmm15, %xmm15
+ # aesenc_pclmul_l
+ vpxor %xmm2, %xmm5, %xmm5
+ vpxor %xmm4, %xmm6, %xmm6
+ vpxor %xmm3, %xmm5, %xmm5
+ vpslldq $8, %xmm5, %xmm1
+ vpsrldq $8, %xmm5, %xmm5
+ vmovdqa 144(%rsi), %xmm4
+ vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm0
+ vaesenc %xmm4, %xmm8, %xmm8
+ vpxor %xmm1, %xmm6, %xmm6
+ vpxor %xmm5, %xmm7, %xmm7
+ vpclmulqdq $16, %xmm0, %xmm6, %xmm3
+ vaesenc %xmm4, %xmm9, %xmm9
+ vaesenc %xmm4, %xmm10, %xmm10
+ vaesenc %xmm4, %xmm11, %xmm11
+ vpshufd $0x4e, %xmm6, %xmm6
+ vpxor %xmm3, %xmm6, %xmm6
+ vpclmulqdq $16, %xmm0, %xmm6, %xmm3
+ vaesenc %xmm4, %xmm12, %xmm12
+ vaesenc %xmm4, %xmm13, %xmm13
+ vaesenc %xmm4, %xmm14, %xmm14
+ vpshufd $0x4e, %xmm6, %xmm6
+ vpxor %xmm3, %xmm6, %xmm6
+ vpxor %xmm7, %xmm6, %xmm6
+ vaesenc %xmm4, %xmm15, %xmm15
+ cmpl $11, %r9d
+ vmovdqa 160(%rsi), %xmm7
+ jl L_AES_GCM_encrypt_avx2_aesenc_128_ghash_avx_done
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 176(%rsi), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ cmpl $13, %r9d
+ vmovdqa 192(%rsi), %xmm7
+ jl L_AES_GCM_encrypt_avx2_aesenc_128_ghash_avx_done
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 208(%rsi), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 224(%rsi), %xmm7
+L_AES_GCM_encrypt_avx2_aesenc_128_ghash_avx_done:
+ # aesenc_last
+ vaesenclast %xmm7, %xmm8, %xmm8
+ vaesenclast %xmm7, %xmm9, %xmm9
+ vaesenclast %xmm7, %xmm10, %xmm10
+ vaesenclast %xmm7, %xmm11, %xmm11
+ vmovdqu (%rcx), %xmm0
+ vmovdqu 16(%rcx), %xmm1
+ vmovdqu 32(%rcx), %xmm2
+ vmovdqu 48(%rcx), %xmm3
+ vpxor %xmm0, %xmm8, %xmm8
+ vpxor %xmm1, %xmm9, %xmm9
+ vpxor %xmm2, %xmm10, %xmm10
+ vpxor %xmm3, %xmm11, %xmm11
+ vmovdqu %xmm8, (%rdx)
+ vmovdqu %xmm9, 16(%rdx)
+ vmovdqu %xmm10, 32(%rdx)
+ vmovdqu %xmm11, 48(%rdx)
+ vaesenclast %xmm7, %xmm12, %xmm12
+ vaesenclast %xmm7, %xmm13, %xmm13
+ vaesenclast %xmm7, %xmm14, %xmm14
+ vaesenclast %xmm7, %xmm15, %xmm15
+ vmovdqu 64(%rcx), %xmm0
+ vmovdqu 80(%rcx), %xmm1
+ vmovdqu 96(%rcx), %xmm2
+ vmovdqu 112(%rcx), %xmm3
+ vpxor %xmm0, %xmm12, %xmm12
+ vpxor %xmm1, %xmm13, %xmm13
+ vpxor %xmm2, %xmm14, %xmm14
+ vpxor %xmm3, %xmm15, %xmm15
+ vmovdqu %xmm12, 64(%rdx)
+ vmovdqu %xmm13, 80(%rdx)
+ vmovdqu %xmm14, 96(%rdx)
+ vmovdqu %xmm15, 112(%rdx)
+ # aesenc_128_ghash - end
+ addl $0x80, %ebx
+ cmpl %r13d, %ebx
+ jl L_AES_GCM_encrypt_avx2_ghash_128
+L_AES_GCM_encrypt_avx2_end_128:
+ vmovdqa L_avx2_aes_gcm_bswap_mask(%rip), %xmm4
+ vpshufb %xmm4, %xmm8, %xmm8
+ vpshufb %xmm4, %xmm9, %xmm9
+ vpshufb %xmm4, %xmm10, %xmm10
+ vpshufb %xmm4, %xmm11, %xmm11
+ vpshufb %xmm4, %xmm12, %xmm12
+ vpshufb %xmm4, %xmm13, %xmm13
+ vpshufb %xmm4, %xmm14, %xmm14
+ vpshufb %xmm4, %xmm15, %xmm15
+ vpxor %xmm6, %xmm8, %xmm8
+ vmovdqu (%rsp), %xmm7
+ vpclmulqdq $16, %xmm15, %xmm7, %xmm5
+ vpclmulqdq $0x01, %xmm15, %xmm7, %xmm1
+ vpclmulqdq $0x00, %xmm15, %xmm7, %xmm4
+ vpclmulqdq $0x11, %xmm15, %xmm7, %xmm6
+ vpxor %xmm1, %xmm5, %xmm5
+ vmovdqu 16(%rsp), %xmm7
+ vpclmulqdq $16, %xmm14, %xmm7, %xmm2
+ vpclmulqdq $0x01, %xmm14, %xmm7, %xmm1
+ vpclmulqdq $0x00, %xmm14, %xmm7, %xmm0
+ vpclmulqdq $0x11, %xmm14, %xmm7, %xmm3
+ vpxor %xmm1, %xmm2, %xmm2
+ vpxor %xmm3, %xmm6, %xmm6
+ vpxor %xmm2, %xmm5, %xmm5
+ vpxor %xmm0, %xmm4, %xmm4
+ vmovdqu 32(%rsp), %xmm15
+ vmovdqu 48(%rsp), %xmm7
+ vpclmulqdq $16, %xmm13, %xmm15, %xmm2
+ vpclmulqdq $0x01, %xmm13, %xmm15, %xmm1
+ vpclmulqdq $0x00, %xmm13, %xmm15, %xmm0
+ vpclmulqdq $0x11, %xmm13, %xmm15, %xmm3
+ vpxor %xmm1, %xmm2, %xmm2
+ vpxor %xmm3, %xmm6, %xmm6
+ vpxor %xmm2, %xmm5, %xmm5
+ vpxor %xmm0, %xmm4, %xmm4
+ vpclmulqdq $16, %xmm12, %xmm7, %xmm2
+ vpclmulqdq $0x01, %xmm12, %xmm7, %xmm1
+ vpclmulqdq $0x00, %xmm12, %xmm7, %xmm0
+ vpclmulqdq $0x11, %xmm12, %xmm7, %xmm3
+ vpxor %xmm1, %xmm2, %xmm2
+ vpxor %xmm3, %xmm6, %xmm6
+ vpxor %xmm2, %xmm5, %xmm5
+ vpxor %xmm0, %xmm4, %xmm4
+ vmovdqu 64(%rsp), %xmm15
+ vmovdqu 80(%rsp), %xmm7
+ vpclmulqdq $16, %xmm11, %xmm15, %xmm2
+ vpclmulqdq $0x01, %xmm11, %xmm15, %xmm1
+ vpclmulqdq $0x00, %xmm11, %xmm15, %xmm0
+ vpclmulqdq $0x11, %xmm11, %xmm15, %xmm3
+ vpxor %xmm1, %xmm2, %xmm2
+ vpxor %xmm3, %xmm6, %xmm6
+ vpxor %xmm2, %xmm5, %xmm5
+ vpxor %xmm0, %xmm4, %xmm4
+ vpclmulqdq $16, %xmm10, %xmm7, %xmm2
+ vpclmulqdq $0x01, %xmm10, %xmm7, %xmm1
+ vpclmulqdq $0x00, %xmm10, %xmm7, %xmm0
+ vpclmulqdq $0x11, %xmm10, %xmm7, %xmm3
+ vpxor %xmm1, %xmm2, %xmm2
+ vpxor %xmm3, %xmm6, %xmm6
+ vpxor %xmm2, %xmm5, %xmm5
+ vpxor %xmm0, %xmm4, %xmm4
+ vmovdqu 96(%rsp), %xmm15
+ vmovdqu 112(%rsp), %xmm7
+ vpclmulqdq $16, %xmm9, %xmm15, %xmm2
+ vpclmulqdq $0x01, %xmm9, %xmm15, %xmm1
+ vpclmulqdq $0x00, %xmm9, %xmm15, %xmm0
+ vpclmulqdq $0x11, %xmm9, %xmm15, %xmm3
+ vpxor %xmm1, %xmm2, %xmm2
+ vpxor %xmm3, %xmm6, %xmm6
+ vpxor %xmm2, %xmm5, %xmm5
+ vpxor %xmm0, %xmm4, %xmm4
+ vpclmulqdq $16, %xmm8, %xmm7, %xmm2
+ vpclmulqdq $0x01, %xmm8, %xmm7, %xmm1
+ vpclmulqdq $0x00, %xmm8, %xmm7, %xmm0
+ vpclmulqdq $0x11, %xmm8, %xmm7, %xmm3
+ vpxor %xmm1, %xmm2, %xmm2
+ vpxor %xmm3, %xmm6, %xmm6
+ vpxor %xmm2, %xmm5, %xmm5
+ vpxor %xmm0, %xmm4, %xmm4
+ vpslldq $8, %xmm5, %xmm7
+ vpsrldq $8, %xmm5, %xmm5
+ vpxor %xmm7, %xmm4, %xmm4
+ vpxor %xmm5, %xmm6, %xmm6
+ # ghash_red
+ vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2
+ vpclmulqdq $16, %xmm2, %xmm4, %xmm0
+ vpshufd $0x4e, %xmm4, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpclmulqdq $16, %xmm2, %xmm1, %xmm0
+ vpshufd $0x4e, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm1, %xmm6, %xmm6
+ vmovdqa (%rsp), %xmm5
+ vmovdqu 128(%rsp), %xmm4
+ vmovdqu 144(%rsp), %xmm15
+L_AES_GCM_encrypt_avx2_done_128:
+ cmpl %r10d, %ebx
+ je L_AES_GCM_encrypt_avx2_done_enc
+ movl %r10d, %r13d
+ andl $0xfffffff0, %r13d
+ cmpl %r13d, %ebx
+ jge L_AES_GCM_encrypt_avx2_last_block_done
+ # aesenc_block
+ vmovdqa %xmm4, %xmm1
+ vpshufb L_avx2_aes_gcm_bswap_epi64(%rip), %xmm1, %xmm0
+ vpaddd L_avx2_aes_gcm_one(%rip), %xmm1, %xmm1
+ vpxor (%rsi), %xmm0, %xmm0
+ vmovdqa 16(%rsi), %xmm2
+ vaesenc %xmm2, %xmm0, %xmm0
+ vmovdqa 32(%rsi), %xmm2
+ vaesenc %xmm2, %xmm0, %xmm0
+ vmovdqa 48(%rsi), %xmm2
+ vaesenc %xmm2, %xmm0, %xmm0
+ vmovdqa 64(%rsi), %xmm2
+ vaesenc %xmm2, %xmm0, %xmm0
+ vmovdqa 80(%rsi), %xmm2
+ vaesenc %xmm2, %xmm0, %xmm0
+ vmovdqa 96(%rsi), %xmm2
+ vaesenc %xmm2, %xmm0, %xmm0
+ vmovdqa 112(%rsi), %xmm2
+ vaesenc %xmm2, %xmm0, %xmm0
+ vmovdqa 128(%rsi), %xmm2
+ vaesenc %xmm2, %xmm0, %xmm0
+ vmovdqa 144(%rsi), %xmm2
+ vaesenc %xmm2, %xmm0, %xmm0
+ vmovdqa %xmm1, %xmm4
+ cmpl $11, %r9d
+ vmovdqa 160(%rsi), %xmm1
+ jl L_AES_GCM_encrypt_avx2_aesenc_block_last
+ vaesenc %xmm1, %xmm0, %xmm0
+ vmovdqa 176(%rsi), %xmm2
+ vaesenc %xmm2, %xmm0, %xmm0
+ cmpl $13, %r9d
+ vmovdqa 192(%rsi), %xmm1
+ jl L_AES_GCM_encrypt_avx2_aesenc_block_last
+ vaesenc %xmm1, %xmm0, %xmm0
+ vmovdqa 208(%rsi), %xmm2
+ vaesenc %xmm2, %xmm0, %xmm0
+ vmovdqa 224(%rsi), %xmm1
+L_AES_GCM_encrypt_avx2_aesenc_block_last:
+ vaesenclast %xmm1, %xmm0, %xmm0
+ vmovdqu (%rdi,%rbx,1), %xmm1
+ vpxor %xmm1, %xmm0, %xmm0
+ vmovdqu %xmm0, (%r8,%rbx,1)
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vpxor %xmm0, %xmm6, %xmm6
+ addl $16, %ebx
+ cmpl %r13d, %ebx
+ jge L_AES_GCM_encrypt_avx2_last_block_ghash
+L_AES_GCM_encrypt_avx2_last_block_start:
+ vmovdqu (%rdi,%rbx,1), %xmm12
+ vpshufb L_avx2_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm11
+ vpaddd L_avx2_aes_gcm_one(%rip), %xmm4, %xmm4
+ # aesenc_gfmul_sb
+ vpclmulqdq $0x01, %xmm5, %xmm6, %xmm2
+ vpclmulqdq $16, %xmm5, %xmm6, %xmm3
+ vpclmulqdq $0x00, %xmm5, %xmm6, %xmm1
+ vpclmulqdq $0x11, %xmm5, %xmm6, %xmm8
+ vpxor (%rsi), %xmm11, %xmm11
+ vaesenc 16(%rsi), %xmm11, %xmm11
+ vpxor %xmm2, %xmm3, %xmm3
+ vpslldq $8, %xmm3, %xmm2
+ vpsrldq $8, %xmm3, %xmm3
+ vaesenc 32(%rsi), %xmm11, %xmm11
+ vpxor %xmm1, %xmm2, %xmm2
+ vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm2, %xmm1
+ vaesenc 48(%rsi), %xmm11, %xmm11
+ vaesenc 64(%rsi), %xmm11, %xmm11
+ vaesenc 80(%rsi), %xmm11, %xmm11
+ vpshufd $0x4e, %xmm2, %xmm2
+ vpxor %xmm1, %xmm2, %xmm2
+ vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm2, %xmm1
+ vaesenc 96(%rsi), %xmm11, %xmm11
+ vaesenc 112(%rsi), %xmm11, %xmm11
+ vaesenc 128(%rsi), %xmm11, %xmm11
+ vpshufd $0x4e, %xmm2, %xmm2
+ vaesenc 144(%rsi), %xmm11, %xmm11
+ vpxor %xmm3, %xmm8, %xmm8
+ vpxor %xmm8, %xmm2, %xmm2
+ vmovdqa 160(%rsi), %xmm0
+ cmpl $11, %r9d
+ jl L_AES_GCM_encrypt_avx2_aesenc_gfmul_sb_last
+ vaesenc %xmm0, %xmm11, %xmm11
+ vaesenc 176(%rsi), %xmm11, %xmm11
+ vmovdqa 192(%rsi), %xmm0
+ cmpl $13, %r9d
+ jl L_AES_GCM_encrypt_avx2_aesenc_gfmul_sb_last
+ vaesenc %xmm0, %xmm11, %xmm11
+ vaesenc 208(%rsi), %xmm11, %xmm11
+ vmovdqa 224(%rsi), %xmm0
+L_AES_GCM_encrypt_avx2_aesenc_gfmul_sb_last:
+ vaesenclast %xmm0, %xmm11, %xmm11
+ vpxor %xmm1, %xmm2, %xmm6
+ vpxor %xmm12, %xmm11, %xmm11
+ vmovdqu %xmm11, (%r8,%rbx,1)
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm11, %xmm11
+ vpxor %xmm11, %xmm6, %xmm6
+ addl $16, %ebx
+ cmpl %r13d, %ebx
+ jl L_AES_GCM_encrypt_avx2_last_block_start
+L_AES_GCM_encrypt_avx2_last_block_ghash:
+ # ghash_gfmul_red
+ vpclmulqdq $16, %xmm5, %xmm6, %xmm10
+ vpclmulqdq $0x01, %xmm5, %xmm6, %xmm9
+ vpclmulqdq $0x00, %xmm5, %xmm6, %xmm8
+ vpxor %xmm9, %xmm10, %xmm10
+ vpslldq $8, %xmm10, %xmm9
+ vpsrldq $8, %xmm10, %xmm10
+ vpxor %xmm8, %xmm9, %xmm9
+ vpclmulqdq $0x11, %xmm5, %xmm6, %xmm6
+ vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm9, %xmm8
+ vpshufd $0x4e, %xmm9, %xmm9
+ vpxor %xmm8, %xmm9, %xmm9
+ vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm9, %xmm8
+ vpshufd $0x4e, %xmm9, %xmm9
+ vpxor %xmm10, %xmm6, %xmm6
+ vpxor %xmm9, %xmm6, %xmm6
+ vpxor %xmm8, %xmm6, %xmm6
+L_AES_GCM_encrypt_avx2_last_block_done:
+ movl %r10d, %ecx
+ movl %r10d, %edx
+ andl $15, %ecx
+ jz L_AES_GCM_encrypt_avx2_done_enc
+ # aesenc_last15_enc
+ vpshufb L_avx2_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm4
+ vpxor (%rsi), %xmm4, %xmm4
+ vaesenc 16(%rsi), %xmm4, %xmm4
+ vaesenc 32(%rsi), %xmm4, %xmm4
+ vaesenc 48(%rsi), %xmm4, %xmm4
+ vaesenc 64(%rsi), %xmm4, %xmm4
+ vaesenc 80(%rsi), %xmm4, %xmm4
+ vaesenc 96(%rsi), %xmm4, %xmm4
+ vaesenc 112(%rsi), %xmm4, %xmm4
+ vaesenc 128(%rsi), %xmm4, %xmm4
+ vaesenc 144(%rsi), %xmm4, %xmm4
+ cmpl $11, %r9d
+ vmovdqa 160(%rsi), %xmm0
+ jl L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_aesenc_avx_last
+ vaesenc %xmm0, %xmm4, %xmm4
+ vaesenc 176(%rsi), %xmm4, %xmm4
+ cmpl $13, %r9d
+ vmovdqa 192(%rsi), %xmm0
+ jl L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_aesenc_avx_last
+ vaesenc %xmm0, %xmm4, %xmm4
+ vaesenc 208(%rsi), %xmm4, %xmm4
+ vmovdqa 224(%rsi), %xmm0
+L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_aesenc_avx_last:
+ vaesenclast %xmm0, %xmm4, %xmm4
+ xorl %ecx, %ecx
+ vpxor %xmm0, %xmm0, %xmm0
+ vmovdqa %xmm4, (%rsp)
+ vmovdqa %xmm0, 16(%rsp)
+L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_loop:
+ movzbl (%rdi,%rbx,1), %r13d
+ xorb (%rsp,%rcx,1), %r13b
+ movb %r13b, 16(%rsp,%rcx,1)
+ movb %r13b, (%r8,%rbx,1)
+ incl %ebx
+ incl %ecx
+ cmpl %edx, %ebx
+ jl L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_loop
+L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_finish_enc:
+ vmovdqa 16(%rsp), %xmm4
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4
+ vpxor %xmm4, %xmm6, %xmm6
+ # ghash_gfmul_red
+ vpclmulqdq $16, %xmm5, %xmm6, %xmm2
+ vpclmulqdq $0x01, %xmm5, %xmm6, %xmm1
+ vpclmulqdq $0x00, %xmm5, %xmm6, %xmm0
+ vpxor %xmm1, %xmm2, %xmm2
+ vpslldq $8, %xmm2, %xmm1
+ vpsrldq $8, %xmm2, %xmm2
+ vpxor %xmm0, %xmm1, %xmm1
+ vpclmulqdq $0x11, %xmm5, %xmm6, %xmm6
+ vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm1, %xmm0
+ vpshufd $0x4e, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm1, %xmm0
+ vpshufd $0x4e, %xmm1, %xmm1
+ vpxor %xmm2, %xmm6, %xmm6
+ vpxor %xmm1, %xmm6, %xmm6
+ vpxor %xmm0, %xmm6, %xmm6
+L_AES_GCM_encrypt_avx2_done_enc:
+ # calc_tag
+ shlq $3, %r10
+ vpinsrq $0x00, %r10, %xmm0, %xmm0
+ shlq $3, %r11
+ vpinsrq $0x01, %r11, %xmm1, %xmm1
+ vpblendd $12, %xmm1, %xmm0, %xmm0
+ vpxor %xmm6, %xmm0, %xmm0
+ # ghash_gfmul_red
+ vpclmulqdq $16, %xmm5, %xmm0, %xmm4
+ vpclmulqdq $0x01, %xmm5, %xmm0, %xmm3
+ vpclmulqdq $0x00, %xmm5, %xmm0, %xmm2
+ vpxor %xmm3, %xmm4, %xmm4
+ vpslldq $8, %xmm4, %xmm3
+ vpsrldq $8, %xmm4, %xmm4
+ vpxor %xmm2, %xmm3, %xmm3
+ vpclmulqdq $0x11, %xmm5, %xmm0, %xmm0
+ vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm3, %xmm2
+ vpshufd $0x4e, %xmm3, %xmm3
+ vpxor %xmm2, %xmm3, %xmm3
+ vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm3, %xmm2
+ vpshufd $0x4e, %xmm3, %xmm3
+ vpxor %xmm4, %xmm0, %xmm0
+ vpxor %xmm3, %xmm0, %xmm0
+ vpxor %xmm2, %xmm0, %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vpxor %xmm15, %xmm0, %xmm0
+ # store_tag
+ cmpl $16, %r14d
+ je L_AES_GCM_encrypt_avx2_store_tag_16
+ xorq %rcx, %rcx
+ vmovdqa %xmm0, (%rsp)
+L_AES_GCM_encrypt_avx2_store_tag_loop:
+ movzbl (%rsp,%rcx,1), %r13d
+ movb %r13b, (%r15,%rcx,1)
+ incl %ecx
+ cmpl %r14d, %ecx
+ jne L_AES_GCM_encrypt_avx2_store_tag_loop
+ jmp L_AES_GCM_encrypt_avx2_store_tag_done
+L_AES_GCM_encrypt_avx2_store_tag_16:
+ vmovdqu %xmm0, (%r15)
+L_AES_GCM_encrypt_avx2_store_tag_done:
+ vzeroupper
+ addq $0xa0, %rsp
+ popq %r14
+ popq %rbx
+ popq %r15
+ popq %r12
+ popq %r13
+ repz retq
+#ifndef __APPLE__
+.size AES_GCM_encrypt_avx2,.-AES_GCM_encrypt_avx2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl AES_GCM_decrypt_avx2
+.type AES_GCM_decrypt_avx2,@function
+.align 4
+AES_GCM_decrypt_avx2:
+#else
+.section __TEXT,__text
+.globl _AES_GCM_decrypt_avx2
+.p2align 2
+_AES_GCM_decrypt_avx2:
+#endif /* __APPLE__ */
+ pushq %r13
+ pushq %r12
+ pushq %r14
+ pushq %rbx
+ pushq %r15
+ pushq %rbp
+ movq %rdx, %r12
+ movq %rcx, %rax
+ movq %r8, %r14
+ movq %rsi, %r8
+ movl %r9d, %r10d
+ movl 56(%rsp), %r11d
+ movl 64(%rsp), %ebx
+ movl 72(%rsp), %r15d
+ movq 80(%rsp), %rsi
+ movl 88(%rsp), %r9d
+ movq 96(%rsp), %rbp
+ subq $0xa8, %rsp
+ vpxor %xmm4, %xmm4, %xmm4
+ vpxor %xmm6, %xmm6, %xmm6
+ movl %ebx, %edx
+ cmpl $12, %edx
+ je L_AES_GCM_decrypt_avx2_iv_12
+ # Calculate values when IV is not 12 bytes
+ # H = Encrypt X(=0)
+ vmovdqa (%rsi), %xmm5
+ vaesenc 16(%rsi), %xmm5, %xmm5
+ vaesenc 32(%rsi), %xmm5, %xmm5
+ vaesenc 48(%rsi), %xmm5, %xmm5
+ vaesenc 64(%rsi), %xmm5, %xmm5
+ vaesenc 80(%rsi), %xmm5, %xmm5
+ vaesenc 96(%rsi), %xmm5, %xmm5
+ vaesenc 112(%rsi), %xmm5, %xmm5
+ vaesenc 128(%rsi), %xmm5, %xmm5
+ vaesenc 144(%rsi), %xmm5, %xmm5
+ cmpl $11, %r9d
+ vmovdqa 160(%rsi), %xmm0
+ jl L_AES_GCM_decrypt_avx2_calc_iv_1_aesenc_avx_last
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc 176(%rsi), %xmm5, %xmm5
+ cmpl $13, %r9d
+ vmovdqa 192(%rsi), %xmm0
+ jl L_AES_GCM_decrypt_avx2_calc_iv_1_aesenc_avx_last
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc 208(%rsi), %xmm5, %xmm5
+ vmovdqa 224(%rsi), %xmm0
+L_AES_GCM_decrypt_avx2_calc_iv_1_aesenc_avx_last:
+ vaesenclast %xmm0, %xmm5, %xmm5
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm5, %xmm5
+ # Calc counter
+ # Initialization vector
+ cmpl $0x00, %edx
+ movq $0x00, %rcx
+ je L_AES_GCM_decrypt_avx2_calc_iv_done
+ cmpl $16, %edx
+ jl L_AES_GCM_decrypt_avx2_calc_iv_lt16
+ andl $0xfffffff0, %edx
+L_AES_GCM_decrypt_avx2_calc_iv_16_loop:
+ vmovdqu (%rax,%rcx,1), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vpxor %xmm0, %xmm4, %xmm4
+ # ghash_gfmul_avx
+ vpclmulqdq $16, %xmm4, %xmm5, %xmm2
+ vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1
+ vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0
+ vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3
+ vpxor %xmm1, %xmm2, %xmm2
+ vpslldq $8, %xmm2, %xmm1
+ vpsrldq $8, %xmm2, %xmm2
+ vpxor %xmm1, %xmm0, %xmm7
+ vpxor %xmm2, %xmm3, %xmm4
+ # ghash_mid
+ vpsrld $31, %xmm7, %xmm0
+ vpsrld $31, %xmm4, %xmm1
+ vpslld $0x01, %xmm7, %xmm7
+ vpslld $0x01, %xmm4, %xmm4
+ vpsrldq $12, %xmm0, %xmm2
+ vpslldq $4, %xmm0, %xmm0
+ vpslldq $4, %xmm1, %xmm1
+ vpor %xmm2, %xmm4, %xmm4
+ vpor %xmm0, %xmm7, %xmm7
+ vpor %xmm1, %xmm4, %xmm4
+ # ghash_red
+ vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2
+ vpclmulqdq $16, %xmm2, %xmm7, %xmm0
+ vpshufd $0x4e, %xmm7, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpclmulqdq $16, %xmm2, %xmm1, %xmm0
+ vpshufd $0x4e, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm1, %xmm4, %xmm4
+ addl $16, %ecx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_decrypt_avx2_calc_iv_16_loop
+ movl %ebx, %edx
+ cmpl %edx, %ecx
+ je L_AES_GCM_decrypt_avx2_calc_iv_done
+L_AES_GCM_decrypt_avx2_calc_iv_lt16:
+ vpxor %xmm0, %xmm0, %xmm0
+ xorl %ebx, %ebx
+ vmovdqa %xmm0, (%rsp)
+L_AES_GCM_decrypt_avx2_calc_iv_loop:
+ movzbl (%rax,%rcx,1), %r13d
+ movb %r13b, (%rsp,%rbx,1)
+ incl %ecx
+ incl %ebx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_decrypt_avx2_calc_iv_loop
+ vmovdqa (%rsp), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vpxor %xmm0, %xmm4, %xmm4
+ # ghash_gfmul_avx
+ vpclmulqdq $16, %xmm4, %xmm5, %xmm2
+ vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1
+ vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0
+ vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3
+ vpxor %xmm1, %xmm2, %xmm2
+ vpslldq $8, %xmm2, %xmm1
+ vpsrldq $8, %xmm2, %xmm2
+ vpxor %xmm1, %xmm0, %xmm7
+ vpxor %xmm2, %xmm3, %xmm4
+ # ghash_mid
+ vpsrld $31, %xmm7, %xmm0
+ vpsrld $31, %xmm4, %xmm1
+ vpslld $0x01, %xmm7, %xmm7
+ vpslld $0x01, %xmm4, %xmm4
+ vpsrldq $12, %xmm0, %xmm2
+ vpslldq $4, %xmm0, %xmm0
+ vpslldq $4, %xmm1, %xmm1
+ vpor %xmm2, %xmm4, %xmm4
+ vpor %xmm0, %xmm7, %xmm7
+ vpor %xmm1, %xmm4, %xmm4
+ # ghash_red
+ vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2
+ vpclmulqdq $16, %xmm2, %xmm7, %xmm0
+ vpshufd $0x4e, %xmm7, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpclmulqdq $16, %xmm2, %xmm1, %xmm0
+ vpshufd $0x4e, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm1, %xmm4, %xmm4
+L_AES_GCM_decrypt_avx2_calc_iv_done:
+ # T = Encrypt counter
+ vpxor %xmm0, %xmm0, %xmm0
+ shll $3, %edx
+ vpinsrq $0x00, %rdx, %xmm0, %xmm0
+ vpxor %xmm0, %xmm4, %xmm4
+ # ghash_gfmul_avx
+ vpclmulqdq $16, %xmm4, %xmm5, %xmm2
+ vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1
+ vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0
+ vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3
+ vpxor %xmm1, %xmm2, %xmm2
+ vpslldq $8, %xmm2, %xmm1
+ vpsrldq $8, %xmm2, %xmm2
+ vpxor %xmm1, %xmm0, %xmm7
+ vpxor %xmm2, %xmm3, %xmm4
+ # ghash_mid
+ vpsrld $31, %xmm7, %xmm0
+ vpsrld $31, %xmm4, %xmm1
+ vpslld $0x01, %xmm7, %xmm7
+ vpslld $0x01, %xmm4, %xmm4
+ vpsrldq $12, %xmm0, %xmm2
+ vpslldq $4, %xmm0, %xmm0
+ vpslldq $4, %xmm1, %xmm1
+ vpor %xmm2, %xmm4, %xmm4
+ vpor %xmm0, %xmm7, %xmm7
+ vpor %xmm1, %xmm4, %xmm4
+ # ghash_red
+ vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2
+ vpclmulqdq $16, %xmm2, %xmm7, %xmm0
+ vpshufd $0x4e, %xmm7, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpclmulqdq $16, %xmm2, %xmm1, %xmm0
+ vpshufd $0x4e, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm1, %xmm4, %xmm4
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4
+ # Encrypt counter
+ vmovdqa (%rsi), %xmm15
+ vpxor %xmm4, %xmm15, %xmm15
+ vaesenc 16(%rsi), %xmm15, %xmm15
+ vaesenc 32(%rsi), %xmm15, %xmm15
+ vaesenc 48(%rsi), %xmm15, %xmm15
+ vaesenc 64(%rsi), %xmm15, %xmm15
+ vaesenc 80(%rsi), %xmm15, %xmm15
+ vaesenc 96(%rsi), %xmm15, %xmm15
+ vaesenc 112(%rsi), %xmm15, %xmm15
+ vaesenc 128(%rsi), %xmm15, %xmm15
+ vaesenc 144(%rsi), %xmm15, %xmm15
+ cmpl $11, %r9d
+ vmovdqa 160(%rsi), %xmm0
+ jl L_AES_GCM_decrypt_avx2_calc_iv_2_aesenc_avx_last
+ vaesenc %xmm0, %xmm15, %xmm15
+ vaesenc 176(%rsi), %xmm15, %xmm15
+ cmpl $13, %r9d
+ vmovdqa 192(%rsi), %xmm0
+ jl L_AES_GCM_decrypt_avx2_calc_iv_2_aesenc_avx_last
+ vaesenc %xmm0, %xmm15, %xmm15
+ vaesenc 208(%rsi), %xmm15, %xmm15
+ vmovdqa 224(%rsi), %xmm0
+L_AES_GCM_decrypt_avx2_calc_iv_2_aesenc_avx_last:
+ vaesenclast %xmm0, %xmm15, %xmm15
+ jmp L_AES_GCM_decrypt_avx2_iv_done
+L_AES_GCM_decrypt_avx2_iv_12:
+ # # Calculate values when IV is 12 bytes
+ # Set counter based on IV
+ vmovdqa L_avx2_aes_gcm_bswap_one(%rip), %xmm4
+ vmovdqa (%rsi), %xmm5
+ vpblendd $7, (%rax), %xmm4, %xmm4
+ # H = Encrypt X(=0) and T = Encrypt counter
+ vmovdqa 16(%rsi), %xmm7
+ vpxor %xmm5, %xmm4, %xmm15
+ vaesenc %xmm7, %xmm5, %xmm5
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 32(%rsi), %xmm0
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ vmovdqa 48(%rsi), %xmm0
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ vmovdqa 64(%rsi), %xmm0
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ vmovdqa 80(%rsi), %xmm0
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ vmovdqa 96(%rsi), %xmm0
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ vmovdqa 112(%rsi), %xmm0
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ vmovdqa 128(%rsi), %xmm0
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ vmovdqa 144(%rsi), %xmm0
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ cmpl $11, %r9d
+ vmovdqa 160(%rsi), %xmm0
+ jl L_AES_GCM_decrypt_avx2_calc_iv_12_last
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ vmovdqa 176(%rsi), %xmm0
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ cmpl $13, %r9d
+ vmovdqa 192(%rsi), %xmm0
+ jl L_AES_GCM_decrypt_avx2_calc_iv_12_last
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ vmovdqa 208(%rsi), %xmm0
+ vaesenc %xmm0, %xmm5, %xmm5
+ vaesenc %xmm0, %xmm15, %xmm15
+ vmovdqa 224(%rsi), %xmm0
+L_AES_GCM_decrypt_avx2_calc_iv_12_last:
+ vaesenclast %xmm0, %xmm5, %xmm5
+ vaesenclast %xmm0, %xmm15, %xmm15
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm5, %xmm5
+L_AES_GCM_decrypt_avx2_iv_done:
+ # Additional authentication data
+ movl %r11d, %edx
+ cmpl $0x00, %edx
+ je L_AES_GCM_decrypt_avx2_calc_aad_done
+ xorl %ecx, %ecx
+ cmpl $16, %edx
+ jl L_AES_GCM_decrypt_avx2_calc_aad_lt16
+ andl $0xfffffff0, %edx
+L_AES_GCM_decrypt_avx2_calc_aad_16_loop:
+ vmovdqu (%r12,%rcx,1), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vpxor %xmm0, %xmm6, %xmm6
+ # ghash_gfmul_avx
+ vpclmulqdq $16, %xmm6, %xmm5, %xmm2
+ vpclmulqdq $0x01, %xmm6, %xmm5, %xmm1
+ vpclmulqdq $0x00, %xmm6, %xmm5, %xmm0
+ vpclmulqdq $0x11, %xmm6, %xmm5, %xmm3
+ vpxor %xmm1, %xmm2, %xmm2
+ vpslldq $8, %xmm2, %xmm1
+ vpsrldq $8, %xmm2, %xmm2
+ vpxor %xmm1, %xmm0, %xmm7
+ vpxor %xmm2, %xmm3, %xmm6
+ # ghash_mid
+ vpsrld $31, %xmm7, %xmm0
+ vpsrld $31, %xmm6, %xmm1
+ vpslld $0x01, %xmm7, %xmm7
+ vpslld $0x01, %xmm6, %xmm6
+ vpsrldq $12, %xmm0, %xmm2
+ vpslldq $4, %xmm0, %xmm0
+ vpslldq $4, %xmm1, %xmm1
+ vpor %xmm2, %xmm6, %xmm6
+ vpor %xmm0, %xmm7, %xmm7
+ vpor %xmm1, %xmm6, %xmm6
+ # ghash_red
+ vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2
+ vpclmulqdq $16, %xmm2, %xmm7, %xmm0
+ vpshufd $0x4e, %xmm7, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpclmulqdq $16, %xmm2, %xmm1, %xmm0
+ vpshufd $0x4e, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm1, %xmm6, %xmm6
+ addl $16, %ecx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_decrypt_avx2_calc_aad_16_loop
+ movl %r11d, %edx
+ cmpl %edx, %ecx
+ je L_AES_GCM_decrypt_avx2_calc_aad_done
+L_AES_GCM_decrypt_avx2_calc_aad_lt16:
+ vpxor %xmm0, %xmm0, %xmm0
+ xorl %ebx, %ebx
+ vmovdqa %xmm0, (%rsp)
+L_AES_GCM_decrypt_avx2_calc_aad_loop:
+ movzbl (%r12,%rcx,1), %r13d
+ movb %r13b, (%rsp,%rbx,1)
+ incl %ecx
+ incl %ebx
+ cmpl %edx, %ecx
+ jl L_AES_GCM_decrypt_avx2_calc_aad_loop
+ vmovdqa (%rsp), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vpxor %xmm0, %xmm6, %xmm6
+ # ghash_gfmul_avx
+ vpclmulqdq $16, %xmm6, %xmm5, %xmm2
+ vpclmulqdq $0x01, %xmm6, %xmm5, %xmm1
+ vpclmulqdq $0x00, %xmm6, %xmm5, %xmm0
+ vpclmulqdq $0x11, %xmm6, %xmm5, %xmm3
+ vpxor %xmm1, %xmm2, %xmm2
+ vpslldq $8, %xmm2, %xmm1
+ vpsrldq $8, %xmm2, %xmm2
+ vpxor %xmm1, %xmm0, %xmm7
+ vpxor %xmm2, %xmm3, %xmm6
+ # ghash_mid
+ vpsrld $31, %xmm7, %xmm0
+ vpsrld $31, %xmm6, %xmm1
+ vpslld $0x01, %xmm7, %xmm7
+ vpslld $0x01, %xmm6, %xmm6
+ vpsrldq $12, %xmm0, %xmm2
+ vpslldq $4, %xmm0, %xmm0
+ vpslldq $4, %xmm1, %xmm1
+ vpor %xmm2, %xmm6, %xmm6
+ vpor %xmm0, %xmm7, %xmm7
+ vpor %xmm1, %xmm6, %xmm6
+ # ghash_red
+ vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2
+ vpclmulqdq $16, %xmm2, %xmm7, %xmm0
+ vpshufd $0x4e, %xmm7, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpclmulqdq $16, %xmm2, %xmm1, %xmm0
+ vpshufd $0x4e, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpxor %xmm1, %xmm6, %xmm6
+L_AES_GCM_decrypt_avx2_calc_aad_done:
+ # Calculate counter and H
+ vpsrlq $63, %xmm5, %xmm1
+ vpsllq $0x01, %xmm5, %xmm0
+ vpslldq $8, %xmm1, %xmm1
+ vpor %xmm1, %xmm0, %xmm0
+ vpshufd $0xff, %xmm5, %xmm5
+ vpsrad $31, %xmm5, %xmm5
+ vpshufb L_avx2_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm4
+ vpand L_avx2_aes_gcm_mod2_128(%rip), %xmm5, %xmm5
+ vpaddd L_avx2_aes_gcm_one(%rip), %xmm4, %xmm4
+ vpxor %xmm0, %xmm5, %xmm5
+ xorl %ebx, %ebx
+ cmpl $0x80, %r10d
+ movl %r10d, %r13d
+ jl L_AES_GCM_decrypt_avx2_done_128
+ andl $0xffffff80, %r13d
+ vmovdqa %xmm4, 128(%rsp)
+ vmovdqa %xmm15, 144(%rsp)
+ vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm3
+ # H ^ 1 and H ^ 2
+ vpclmulqdq $0x00, %xmm5, %xmm5, %xmm9
+ vpclmulqdq $0x11, %xmm5, %xmm5, %xmm10
+ vpclmulqdq $16, %xmm3, %xmm9, %xmm8
+ vpshufd $0x4e, %xmm9, %xmm9
+ vpxor %xmm8, %xmm9, %xmm9
+ vpclmulqdq $16, %xmm3, %xmm9, %xmm8
+ vpshufd $0x4e, %xmm9, %xmm9
+ vpxor %xmm8, %xmm9, %xmm9
+ vpxor %xmm9, %xmm10, %xmm0
+ vmovdqa %xmm5, (%rsp)
+ vmovdqa %xmm0, 16(%rsp)
+ # H ^ 3 and H ^ 4
+ vpclmulqdq $16, %xmm5, %xmm0, %xmm11
+ vpclmulqdq $0x01, %xmm5, %xmm0, %xmm10
+ vpclmulqdq $0x00, %xmm5, %xmm0, %xmm9
+ vpclmulqdq $0x11, %xmm5, %xmm0, %xmm12
+ vpclmulqdq $0x00, %xmm0, %xmm0, %xmm13
+ vpclmulqdq $0x11, %xmm0, %xmm0, %xmm14
+ vpxor %xmm10, %xmm11, %xmm11
+ vpslldq $8, %xmm11, %xmm10
+ vpsrldq $8, %xmm11, %xmm11
+ vpxor %xmm9, %xmm10, %xmm10
+ vpclmulqdq $16, %xmm3, %xmm13, %xmm8
+ vpclmulqdq $16, %xmm3, %xmm10, %xmm9
+ vpshufd $0x4e, %xmm10, %xmm10
+ vpshufd $0x4e, %xmm13, %xmm13
+ vpxor %xmm9, %xmm10, %xmm10
+ vpxor %xmm8, %xmm13, %xmm13
+ vpclmulqdq $16, %xmm3, %xmm10, %xmm9
+ vpclmulqdq $16, %xmm3, %xmm13, %xmm8
+ vpshufd $0x4e, %xmm10, %xmm10
+ vpshufd $0x4e, %xmm13, %xmm13
+ vpxor %xmm11, %xmm12, %xmm12
+ vpxor %xmm8, %xmm13, %xmm13
+ vpxor %xmm12, %xmm10, %xmm10
+ vpxor %xmm14, %xmm13, %xmm2
+ vpxor %xmm9, %xmm10, %xmm1
+ vmovdqa %xmm1, 32(%rsp)
+ vmovdqa %xmm2, 48(%rsp)
+ # H ^ 5 and H ^ 6
+ vpclmulqdq $16, %xmm0, %xmm1, %xmm11
+ vpclmulqdq $0x01, %xmm0, %xmm1, %xmm10
+ vpclmulqdq $0x00, %xmm0, %xmm1, %xmm9
+ vpclmulqdq $0x11, %xmm0, %xmm1, %xmm12
+ vpclmulqdq $0x00, %xmm1, %xmm1, %xmm13
+ vpclmulqdq $0x11, %xmm1, %xmm1, %xmm14
+ vpxor %xmm10, %xmm11, %xmm11
+ vpslldq $8, %xmm11, %xmm10
+ vpsrldq $8, %xmm11, %xmm11
+ vpxor %xmm9, %xmm10, %xmm10
+ vpclmulqdq $16, %xmm3, %xmm13, %xmm8
+ vpclmulqdq $16, %xmm3, %xmm10, %xmm9
+ vpshufd $0x4e, %xmm10, %xmm10
+ vpshufd $0x4e, %xmm13, %xmm13
+ vpxor %xmm9, %xmm10, %xmm10
+ vpxor %xmm8, %xmm13, %xmm13
+ vpclmulqdq $16, %xmm3, %xmm10, %xmm9
+ vpclmulqdq $16, %xmm3, %xmm13, %xmm8
+ vpshufd $0x4e, %xmm10, %xmm10
+ vpshufd $0x4e, %xmm13, %xmm13
+ vpxor %xmm11, %xmm12, %xmm12
+ vpxor %xmm8, %xmm13, %xmm13
+ vpxor %xmm12, %xmm10, %xmm10
+ vpxor %xmm14, %xmm13, %xmm0
+ vpxor %xmm9, %xmm10, %xmm7
+ vmovdqa %xmm7, 64(%rsp)
+ vmovdqa %xmm0, 80(%rsp)
+ # H ^ 7 and H ^ 8
+ vpclmulqdq $16, %xmm1, %xmm2, %xmm11
+ vpclmulqdq $0x01, %xmm1, %xmm2, %xmm10
+ vpclmulqdq $0x00, %xmm1, %xmm2, %xmm9
+ vpclmulqdq $0x11, %xmm1, %xmm2, %xmm12
+ vpclmulqdq $0x00, %xmm2, %xmm2, %xmm13
+ vpclmulqdq $0x11, %xmm2, %xmm2, %xmm14
+ vpxor %xmm10, %xmm11, %xmm11
+ vpslldq $8, %xmm11, %xmm10
+ vpsrldq $8, %xmm11, %xmm11
+ vpxor %xmm9, %xmm10, %xmm10
+ vpclmulqdq $16, %xmm3, %xmm13, %xmm8
+ vpclmulqdq $16, %xmm3, %xmm10, %xmm9
+ vpshufd $0x4e, %xmm10, %xmm10
+ vpshufd $0x4e, %xmm13, %xmm13
+ vpxor %xmm9, %xmm10, %xmm10
+ vpxor %xmm8, %xmm13, %xmm13
+ vpclmulqdq $16, %xmm3, %xmm10, %xmm9
+ vpclmulqdq $16, %xmm3, %xmm13, %xmm8
+ vpshufd $0x4e, %xmm10, %xmm10
+ vpshufd $0x4e, %xmm13, %xmm13
+ vpxor %xmm11, %xmm12, %xmm12
+ vpxor %xmm8, %xmm13, %xmm13
+ vpxor %xmm12, %xmm10, %xmm10
+ vpxor %xmm14, %xmm13, %xmm0
+ vpxor %xmm9, %xmm10, %xmm7
+ vmovdqa %xmm7, 96(%rsp)
+ vmovdqa %xmm0, 112(%rsp)
+L_AES_GCM_decrypt_avx2_ghash_128:
+ # aesenc_128_ghash
+ leaq (%rdi,%rbx,1), %rcx
+ leaq (%r8,%rbx,1), %rdx
+ # aesenc_ctr
+ vmovdqa 128(%rsp), %xmm0
+ vmovdqa L_avx2_aes_gcm_bswap_epi64(%rip), %xmm1
+ vpaddd L_avx2_aes_gcm_one(%rip), %xmm0, %xmm9
+ vpshufb %xmm1, %xmm0, %xmm8
+ vpaddd L_avx2_aes_gcm_two(%rip), %xmm0, %xmm10
+ vpshufb %xmm1, %xmm9, %xmm9
+ vpaddd L_avx2_aes_gcm_three(%rip), %xmm0, %xmm11
+ vpshufb %xmm1, %xmm10, %xmm10
+ vpaddd L_avx2_aes_gcm_four(%rip), %xmm0, %xmm12
+ vpshufb %xmm1, %xmm11, %xmm11
+ vpaddd L_avx2_aes_gcm_five(%rip), %xmm0, %xmm13
+ vpshufb %xmm1, %xmm12, %xmm12
+ vpaddd L_avx2_aes_gcm_six(%rip), %xmm0, %xmm14
+ vpshufb %xmm1, %xmm13, %xmm13
+ vpaddd L_avx2_aes_gcm_seven(%rip), %xmm0, %xmm15
+ vpshufb %xmm1, %xmm14, %xmm14
+ vpaddd L_avx2_aes_gcm_eight(%rip), %xmm0, %xmm0
+ vpshufb %xmm1, %xmm15, %xmm15
+ # aesenc_xor
+ vmovdqa (%rsi), %xmm7
+ vmovdqa %xmm0, 128(%rsp)
+ vpxor %xmm7, %xmm8, %xmm8
+ vpxor %xmm7, %xmm9, %xmm9
+ vpxor %xmm7, %xmm10, %xmm10
+ vpxor %xmm7, %xmm11, %xmm11
+ vpxor %xmm7, %xmm12, %xmm12
+ vpxor %xmm7, %xmm13, %xmm13
+ vpxor %xmm7, %xmm14, %xmm14
+ vpxor %xmm7, %xmm15, %xmm15
+ # aesenc_pclmul_1
+ vmovdqu (%rcx), %xmm1
+ vmovdqu 16(%rsi), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1
+ vmovdqa 112(%rsp), %xmm2
+ vpxor %xmm6, %xmm1, %xmm1
+ vpclmulqdq $16, %xmm2, %xmm1, %xmm5
+ vpclmulqdq $0x01, %xmm2, %xmm1, %xmm3
+ vpclmulqdq $0x00, %xmm2, %xmm1, %xmm6
+ vpclmulqdq $0x11, %xmm2, %xmm1, %xmm7
+ vaesenc %xmm0, %xmm8, %xmm8
+ vaesenc %xmm0, %xmm9, %xmm9
+ vaesenc %xmm0, %xmm10, %xmm10
+ vaesenc %xmm0, %xmm11, %xmm11
+ vaesenc %xmm0, %xmm12, %xmm12
+ vaesenc %xmm0, %xmm13, %xmm13
+ vaesenc %xmm0, %xmm14, %xmm14
+ vaesenc %xmm0, %xmm15, %xmm15
+ # aesenc_pclmul_2
+ vmovdqu 16(%rcx), %xmm1
+ vmovdqa 96(%rsp), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1
+ vpxor %xmm3, %xmm5, %xmm5
+ vpclmulqdq $16, %xmm0, %xmm1, %xmm2
+ vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3
+ vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4
+ vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1
+ vmovdqu 32(%rsi), %xmm0
+ vpxor %xmm1, %xmm7, %xmm7
+ vaesenc %xmm0, %xmm8, %xmm8
+ vaesenc %xmm0, %xmm9, %xmm9
+ vaesenc %xmm0, %xmm10, %xmm10
+ vaesenc %xmm0, %xmm11, %xmm11
+ vaesenc %xmm0, %xmm12, %xmm12
+ vaesenc %xmm0, %xmm13, %xmm13
+ vaesenc %xmm0, %xmm14, %xmm14
+ vaesenc %xmm0, %xmm15, %xmm15
+ # aesenc_pclmul_n
+ vmovdqu 32(%rcx), %xmm1
+ vmovdqa 80(%rsp), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1
+ vpxor %xmm2, %xmm5, %xmm5
+ vpclmulqdq $16, %xmm0, %xmm1, %xmm2
+ vpxor %xmm3, %xmm5, %xmm5
+ vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3
+ vpxor %xmm4, %xmm6, %xmm6
+ vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4
+ vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1
+ vmovdqu 48(%rsi), %xmm0
+ vpxor %xmm1, %xmm7, %xmm7
+ vaesenc %xmm0, %xmm8, %xmm8
+ vaesenc %xmm0, %xmm9, %xmm9
+ vaesenc %xmm0, %xmm10, %xmm10
+ vaesenc %xmm0, %xmm11, %xmm11
+ vaesenc %xmm0, %xmm12, %xmm12
+ vaesenc %xmm0, %xmm13, %xmm13
+ vaesenc %xmm0, %xmm14, %xmm14
+ vaesenc %xmm0, %xmm15, %xmm15
+ # aesenc_pclmul_n
+ vmovdqu 48(%rcx), %xmm1
+ vmovdqa 64(%rsp), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1
+ vpxor %xmm2, %xmm5, %xmm5
+ vpclmulqdq $16, %xmm0, %xmm1, %xmm2
+ vpxor %xmm3, %xmm5, %xmm5
+ vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3
+ vpxor %xmm4, %xmm6, %xmm6
+ vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4
+ vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1
+ vmovdqu 64(%rsi), %xmm0
+ vpxor %xmm1, %xmm7, %xmm7
+ vaesenc %xmm0, %xmm8, %xmm8
+ vaesenc %xmm0, %xmm9, %xmm9
+ vaesenc %xmm0, %xmm10, %xmm10
+ vaesenc %xmm0, %xmm11, %xmm11
+ vaesenc %xmm0, %xmm12, %xmm12
+ vaesenc %xmm0, %xmm13, %xmm13
+ vaesenc %xmm0, %xmm14, %xmm14
+ vaesenc %xmm0, %xmm15, %xmm15
+ # aesenc_pclmul_n
+ vmovdqu 64(%rcx), %xmm1
+ vmovdqa 48(%rsp), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1
+ vpxor %xmm2, %xmm5, %xmm5
+ vpclmulqdq $16, %xmm0, %xmm1, %xmm2
+ vpxor %xmm3, %xmm5, %xmm5
+ vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3
+ vpxor %xmm4, %xmm6, %xmm6
+ vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4
+ vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1
+ vmovdqu 80(%rsi), %xmm0
+ vpxor %xmm1, %xmm7, %xmm7
+ vaesenc %xmm0, %xmm8, %xmm8
+ vaesenc %xmm0, %xmm9, %xmm9
+ vaesenc %xmm0, %xmm10, %xmm10
+ vaesenc %xmm0, %xmm11, %xmm11
+ vaesenc %xmm0, %xmm12, %xmm12
+ vaesenc %xmm0, %xmm13, %xmm13
+ vaesenc %xmm0, %xmm14, %xmm14
+ vaesenc %xmm0, %xmm15, %xmm15
+ # aesenc_pclmul_n
+ vmovdqu 80(%rcx), %xmm1
+ vmovdqa 32(%rsp), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1
+ vpxor %xmm2, %xmm5, %xmm5
+ vpclmulqdq $16, %xmm0, %xmm1, %xmm2
+ vpxor %xmm3, %xmm5, %xmm5
+ vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3
+ vpxor %xmm4, %xmm6, %xmm6
+ vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4
+ vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1
+ vmovdqu 96(%rsi), %xmm0
+ vpxor %xmm1, %xmm7, %xmm7
+ vaesenc %xmm0, %xmm8, %xmm8
+ vaesenc %xmm0, %xmm9, %xmm9
+ vaesenc %xmm0, %xmm10, %xmm10
+ vaesenc %xmm0, %xmm11, %xmm11
+ vaesenc %xmm0, %xmm12, %xmm12
+ vaesenc %xmm0, %xmm13, %xmm13
+ vaesenc %xmm0, %xmm14, %xmm14
+ vaesenc %xmm0, %xmm15, %xmm15
+ # aesenc_pclmul_n
+ vmovdqu 96(%rcx), %xmm1
+ vmovdqa 16(%rsp), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1
+ vpxor %xmm2, %xmm5, %xmm5
+ vpclmulqdq $16, %xmm0, %xmm1, %xmm2
+ vpxor %xmm3, %xmm5, %xmm5
+ vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3
+ vpxor %xmm4, %xmm6, %xmm6
+ vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4
+ vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1
+ vmovdqu 112(%rsi), %xmm0
+ vpxor %xmm1, %xmm7, %xmm7
+ vaesenc %xmm0, %xmm8, %xmm8
+ vaesenc %xmm0, %xmm9, %xmm9
+ vaesenc %xmm0, %xmm10, %xmm10
+ vaesenc %xmm0, %xmm11, %xmm11
+ vaesenc %xmm0, %xmm12, %xmm12
+ vaesenc %xmm0, %xmm13, %xmm13
+ vaesenc %xmm0, %xmm14, %xmm14
+ vaesenc %xmm0, %xmm15, %xmm15
+ # aesenc_pclmul_n
+ vmovdqu 112(%rcx), %xmm1
+ vmovdqa (%rsp), %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1
+ vpxor %xmm2, %xmm5, %xmm5
+ vpclmulqdq $16, %xmm0, %xmm1, %xmm2
+ vpxor %xmm3, %xmm5, %xmm5
+ vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3
+ vpxor %xmm4, %xmm6, %xmm6
+ vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4
+ vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1
+ vmovdqu 128(%rsi), %xmm0
+ vpxor %xmm1, %xmm7, %xmm7
+ vaesenc %xmm0, %xmm8, %xmm8
+ vaesenc %xmm0, %xmm9, %xmm9
+ vaesenc %xmm0, %xmm10, %xmm10
+ vaesenc %xmm0, %xmm11, %xmm11
+ vaesenc %xmm0, %xmm12, %xmm12
+ vaesenc %xmm0, %xmm13, %xmm13
+ vaesenc %xmm0, %xmm14, %xmm14
+ vaesenc %xmm0, %xmm15, %xmm15
+ # aesenc_pclmul_l
+ vpxor %xmm2, %xmm5, %xmm5
+ vpxor %xmm4, %xmm6, %xmm6
+ vpxor %xmm3, %xmm5, %xmm5
+ vpslldq $8, %xmm5, %xmm1
+ vpsrldq $8, %xmm5, %xmm5
+ vmovdqa 144(%rsi), %xmm4
+ vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm0
+ vaesenc %xmm4, %xmm8, %xmm8
+ vpxor %xmm1, %xmm6, %xmm6
+ vpxor %xmm5, %xmm7, %xmm7
+ vpclmulqdq $16, %xmm0, %xmm6, %xmm3
+ vaesenc %xmm4, %xmm9, %xmm9
+ vaesenc %xmm4, %xmm10, %xmm10
+ vaesenc %xmm4, %xmm11, %xmm11
+ vpshufd $0x4e, %xmm6, %xmm6
+ vpxor %xmm3, %xmm6, %xmm6
+ vpclmulqdq $16, %xmm0, %xmm6, %xmm3
+ vaesenc %xmm4, %xmm12, %xmm12
+ vaesenc %xmm4, %xmm13, %xmm13
+ vaesenc %xmm4, %xmm14, %xmm14
+ vpshufd $0x4e, %xmm6, %xmm6
+ vpxor %xmm3, %xmm6, %xmm6
+ vpxor %xmm7, %xmm6, %xmm6
+ vaesenc %xmm4, %xmm15, %xmm15
+ cmpl $11, %r9d
+ vmovdqa 160(%rsi), %xmm7
+ jl L_AES_GCM_decrypt_avx2_aesenc_128_ghash_avx_done
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 176(%rsi), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ cmpl $13, %r9d
+ vmovdqa 192(%rsi), %xmm7
+ jl L_AES_GCM_decrypt_avx2_aesenc_128_ghash_avx_done
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 208(%rsi), %xmm7
+ vaesenc %xmm7, %xmm8, %xmm8
+ vaesenc %xmm7, %xmm9, %xmm9
+ vaesenc %xmm7, %xmm10, %xmm10
+ vaesenc %xmm7, %xmm11, %xmm11
+ vaesenc %xmm7, %xmm12, %xmm12
+ vaesenc %xmm7, %xmm13, %xmm13
+ vaesenc %xmm7, %xmm14, %xmm14
+ vaesenc %xmm7, %xmm15, %xmm15
+ vmovdqa 224(%rsi), %xmm7
+L_AES_GCM_decrypt_avx2_aesenc_128_ghash_avx_done:
+ # aesenc_last
+ vaesenclast %xmm7, %xmm8, %xmm8
+ vaesenclast %xmm7, %xmm9, %xmm9
+ vaesenclast %xmm7, %xmm10, %xmm10
+ vaesenclast %xmm7, %xmm11, %xmm11
+ vmovdqu (%rcx), %xmm0
+ vmovdqu 16(%rcx), %xmm1
+ vmovdqu 32(%rcx), %xmm2
+ vmovdqu 48(%rcx), %xmm3
+ vpxor %xmm0, %xmm8, %xmm8
+ vpxor %xmm1, %xmm9, %xmm9
+ vpxor %xmm2, %xmm10, %xmm10
+ vpxor %xmm3, %xmm11, %xmm11
+ vmovdqu %xmm8, (%rdx)
+ vmovdqu %xmm9, 16(%rdx)
+ vmovdqu %xmm10, 32(%rdx)
+ vmovdqu %xmm11, 48(%rdx)
+ vaesenclast %xmm7, %xmm12, %xmm12
+ vaesenclast %xmm7, %xmm13, %xmm13
+ vaesenclast %xmm7, %xmm14, %xmm14
+ vaesenclast %xmm7, %xmm15, %xmm15
+ vmovdqu 64(%rcx), %xmm0
+ vmovdqu 80(%rcx), %xmm1
+ vmovdqu 96(%rcx), %xmm2
+ vmovdqu 112(%rcx), %xmm3
+ vpxor %xmm0, %xmm12, %xmm12
+ vpxor %xmm1, %xmm13, %xmm13
+ vpxor %xmm2, %xmm14, %xmm14
+ vpxor %xmm3, %xmm15, %xmm15
+ vmovdqu %xmm12, 64(%rdx)
+ vmovdqu %xmm13, 80(%rdx)
+ vmovdqu %xmm14, 96(%rdx)
+ vmovdqu %xmm15, 112(%rdx)
+ # aesenc_128_ghash - end
+ addl $0x80, %ebx
+ cmpl %r13d, %ebx
+ jl L_AES_GCM_decrypt_avx2_ghash_128
+ vmovdqa (%rsp), %xmm5
+ vmovdqa 128(%rsp), %xmm4
+ vmovdqa 144(%rsp), %xmm15
+L_AES_GCM_decrypt_avx2_done_128:
+ cmpl %r10d, %ebx
+ jge L_AES_GCM_decrypt_avx2_done_dec
+ movl %r10d, %r13d
+ andl $0xfffffff0, %r13d
+ cmpl %r13d, %ebx
+ jge L_AES_GCM_decrypt_avx2_last_block_done
+L_AES_GCM_decrypt_avx2_last_block_start:
+ vmovdqu (%rdi,%rbx,1), %xmm11
+ vpshufb L_avx2_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm10
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm11, %xmm12
+ vpaddd L_avx2_aes_gcm_one(%rip), %xmm4, %xmm4
+ vpxor %xmm6, %xmm12, %xmm12
+ # aesenc_gfmul_sb
+ vpclmulqdq $0x01, %xmm5, %xmm12, %xmm2
+ vpclmulqdq $16, %xmm5, %xmm12, %xmm3
+ vpclmulqdq $0x00, %xmm5, %xmm12, %xmm1
+ vpclmulqdq $0x11, %xmm5, %xmm12, %xmm8
+ vpxor (%rsi), %xmm10, %xmm10
+ vaesenc 16(%rsi), %xmm10, %xmm10
+ vpxor %xmm2, %xmm3, %xmm3
+ vpslldq $8, %xmm3, %xmm2
+ vpsrldq $8, %xmm3, %xmm3
+ vaesenc 32(%rsi), %xmm10, %xmm10
+ vpxor %xmm1, %xmm2, %xmm2
+ vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm2, %xmm1
+ vaesenc 48(%rsi), %xmm10, %xmm10
+ vaesenc 64(%rsi), %xmm10, %xmm10
+ vaesenc 80(%rsi), %xmm10, %xmm10
+ vpshufd $0x4e, %xmm2, %xmm2
+ vpxor %xmm1, %xmm2, %xmm2
+ vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm2, %xmm1
+ vaesenc 96(%rsi), %xmm10, %xmm10
+ vaesenc 112(%rsi), %xmm10, %xmm10
+ vaesenc 128(%rsi), %xmm10, %xmm10
+ vpshufd $0x4e, %xmm2, %xmm2
+ vaesenc 144(%rsi), %xmm10, %xmm10
+ vpxor %xmm3, %xmm8, %xmm8
+ vpxor %xmm8, %xmm2, %xmm2
+ vmovdqa 160(%rsi), %xmm0
+ cmpl $11, %r9d
+ jl L_AES_GCM_decrypt_avx2_aesenc_gfmul_sb_last
+ vaesenc %xmm0, %xmm10, %xmm10
+ vaesenc 176(%rsi), %xmm10, %xmm10
+ vmovdqa 192(%rsi), %xmm0
+ cmpl $13, %r9d
+ jl L_AES_GCM_decrypt_avx2_aesenc_gfmul_sb_last
+ vaesenc %xmm0, %xmm10, %xmm10
+ vaesenc 208(%rsi), %xmm10, %xmm10
+ vmovdqa 224(%rsi), %xmm0
+L_AES_GCM_decrypt_avx2_aesenc_gfmul_sb_last:
+ vaesenclast %xmm0, %xmm10, %xmm10
+ vpxor %xmm1, %xmm2, %xmm6
+ vpxor %xmm11, %xmm10, %xmm10
+ vmovdqu %xmm10, (%r8,%rbx,1)
+ addl $16, %ebx
+ cmpl %r13d, %ebx
+ jl L_AES_GCM_decrypt_avx2_last_block_start
+L_AES_GCM_decrypt_avx2_last_block_done:
+ movl %r10d, %ecx
+ movl %r10d, %edx
+ andl $15, %ecx
+ jz L_AES_GCM_decrypt_avx2_done_dec
+ # aesenc_last15_dec
+ vpshufb L_avx2_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm4
+ vpxor (%rsi), %xmm4, %xmm4
+ vaesenc 16(%rsi), %xmm4, %xmm4
+ vaesenc 32(%rsi), %xmm4, %xmm4
+ vaesenc 48(%rsi), %xmm4, %xmm4
+ vaesenc 64(%rsi), %xmm4, %xmm4
+ vaesenc 80(%rsi), %xmm4, %xmm4
+ vaesenc 96(%rsi), %xmm4, %xmm4
+ vaesenc 112(%rsi), %xmm4, %xmm4
+ vaesenc 128(%rsi), %xmm4, %xmm4
+ vaesenc 144(%rsi), %xmm4, %xmm4
+ cmpl $11, %r9d
+ vmovdqa 160(%rsi), %xmm1
+ jl L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_aesenc_avx_last
+ vaesenc %xmm1, %xmm4, %xmm4
+ vaesenc 176(%rsi), %xmm4, %xmm4
+ cmpl $13, %r9d
+ vmovdqa 192(%rsi), %xmm1
+ jl L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_aesenc_avx_last
+ vaesenc %xmm1, %xmm4, %xmm4
+ vaesenc 208(%rsi), %xmm4, %xmm4
+ vmovdqa 224(%rsi), %xmm1
+L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_aesenc_avx_last:
+ vaesenclast %xmm1, %xmm4, %xmm4
+ xorl %ecx, %ecx
+ vpxor %xmm0, %xmm0, %xmm0
+ vmovdqa %xmm4, (%rsp)
+ vmovdqa %xmm0, 16(%rsp)
+L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_loop:
+ movzbl (%rdi,%rbx,1), %r13d
+ movb %r13b, 16(%rsp,%rcx,1)
+ xorb (%rsp,%rcx,1), %r13b
+ movb %r13b, (%r8,%rbx,1)
+ incl %ebx
+ incl %ecx
+ cmpl %edx, %ebx
+ jl L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_loop
+ vmovdqa 16(%rsp), %xmm4
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4
+ vpxor %xmm4, %xmm6, %xmm6
+ # ghash_gfmul_red
+ vpclmulqdq $16, %xmm5, %xmm6, %xmm2
+ vpclmulqdq $0x01, %xmm5, %xmm6, %xmm1
+ vpclmulqdq $0x00, %xmm5, %xmm6, %xmm0
+ vpxor %xmm1, %xmm2, %xmm2
+ vpslldq $8, %xmm2, %xmm1
+ vpsrldq $8, %xmm2, %xmm2
+ vpxor %xmm0, %xmm1, %xmm1
+ vpclmulqdq $0x11, %xmm5, %xmm6, %xmm6
+ vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm1, %xmm0
+ vpshufd $0x4e, %xmm1, %xmm1
+ vpxor %xmm0, %xmm1, %xmm1
+ vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm1, %xmm0
+ vpshufd $0x4e, %xmm1, %xmm1
+ vpxor %xmm2, %xmm6, %xmm6
+ vpxor %xmm1, %xmm6, %xmm6
+ vpxor %xmm0, %xmm6, %xmm6
+L_AES_GCM_decrypt_avx2_done_dec:
+ # calc_tag
+ shlq $3, %r10
+ vpinsrq $0x00, %r10, %xmm0, %xmm0
+ shlq $3, %r11
+ vpinsrq $0x01, %r11, %xmm1, %xmm1
+ vpblendd $12, %xmm1, %xmm0, %xmm0
+ vpxor %xmm6, %xmm0, %xmm0
+ # ghash_gfmul_red
+ vpclmulqdq $16, %xmm5, %xmm0, %xmm4
+ vpclmulqdq $0x01, %xmm5, %xmm0, %xmm3
+ vpclmulqdq $0x00, %xmm5, %xmm0, %xmm2
+ vpxor %xmm3, %xmm4, %xmm4
+ vpslldq $8, %xmm4, %xmm3
+ vpsrldq $8, %xmm4, %xmm4
+ vpxor %xmm2, %xmm3, %xmm3
+ vpclmulqdq $0x11, %xmm5, %xmm0, %xmm0
+ vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm3, %xmm2
+ vpshufd $0x4e, %xmm3, %xmm3
+ vpxor %xmm2, %xmm3, %xmm3
+ vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm3, %xmm2
+ vpshufd $0x4e, %xmm3, %xmm3
+ vpxor %xmm4, %xmm0, %xmm0
+ vpxor %xmm3, %xmm0, %xmm0
+ vpxor %xmm2, %xmm0, %xmm0
+ vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0
+ vpxor %xmm15, %xmm0, %xmm0
+ # cmp_tag
+ cmpl $16, %r15d
+ je L_AES_GCM_decrypt_avx2_cmp_tag_16
+ xorq %rdx, %rdx
+ xorq %rax, %rax
+ vmovdqa %xmm0, (%rsp)
+L_AES_GCM_decrypt_avx2_cmp_tag_loop:
+ movzbl (%rsp,%rdx,1), %r13d
+ xorb (%r14,%rdx,1), %r13b
+ orb %r13b, %al
+ incl %edx
+ cmpl %r15d, %edx
+ jne L_AES_GCM_decrypt_avx2_cmp_tag_loop
+ cmpb $0x00, %al
+ sete %al
+ jmp L_AES_GCM_decrypt_avx2_cmp_tag_done
+L_AES_GCM_decrypt_avx2_cmp_tag_16:
+ vmovdqu (%r14), %xmm1
+ vpcmpeqb %xmm1, %xmm0, %xmm0
+ vpmovmskb %xmm0, %rdx
+ # %%edx == 0xFFFF then return 1 else => return 0
+ xorl %eax, %eax
+ cmpl $0xffff, %edx
+ sete %al
+L_AES_GCM_decrypt_avx2_cmp_tag_done:
+ movl %eax, (%rbp)
+ vzeroupper
+ addq $0xa8, %rsp
+ popq %rbp
+ popq %r15
+ popq %rbx
+ popq %r14
+ popq %r12
+ popq %r13
+ repz retq
+#ifndef __APPLE__
+.size AES_GCM_decrypt_avx2,.-AES_GCM_decrypt_avx2
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/arc4.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/arc4.c
index 21ed2e79d..7eb8268e3 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/arc4.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/arc4.c
@@ -1,8 +1,8 @@
/* arc4.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -27,23 +28,25 @@
#ifndef NO_RC4
+#include <wolfssl/wolfcrypt/error-crypt.h>
#include <wolfssl/wolfcrypt/arc4.h>
-#ifdef HAVE_CAVIUM
- static void wc_Arc4CaviumSetKey(Arc4* arc4, const byte* key, word32 length);
- static void wc_Arc4CaviumProcess(Arc4* arc4, byte* out, const byte* in,
- word32 length);
-#endif
-
-void wc_Arc4SetKey(Arc4* arc4, const byte* key, word32 length)
+int wc_Arc4SetKey(Arc4* arc4, const byte* key, word32 length)
{
+ int ret = 0;
word32 i;
word32 keyIndex = 0, stateIndex = 0;
-#ifdef HAVE_CAVIUM
- if (arc4->magic == WOLFSSL_ARC4_CAVIUM_MAGIC)
- return wc_Arc4CaviumSetKey(arc4, key, length);
+ if (arc4 == NULL || key == NULL || length == 0) {
+ return BAD_FUNC_ARG;
+ }
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ARC4) && \
+ defined(HAVE_CAVIUM) && !defined(HAVE_CAVIUM_V)
+ if (arc4->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ARC4) {
+ return NitroxArc4SetKey(arc4, key, length);
+ }
#endif
arc4->x = 1;
@@ -62,10 +65,12 @@ void wc_Arc4SetKey(Arc4* arc4, const byte* key, word32 length)
if (++keyIndex >= length)
keyIndex = 0;
}
+
+ return ret;
}
-static INLINE byte MakeByte(word32* x, word32* y, byte* s)
+static WC_INLINE byte MakeByte(word32* x, word32* y, byte* s)
{
word32 a = s[*x], b;
*y = (*y+a) & 0xff;
@@ -79,14 +84,21 @@ static INLINE byte MakeByte(word32* x, word32* y, byte* s)
}
-void wc_Arc4Process(Arc4* arc4, byte* out, const byte* in, word32 length)
+int wc_Arc4Process(Arc4* arc4, byte* out, const byte* in, word32 length)
{
+ int ret = 0;
word32 x;
word32 y;
-#ifdef HAVE_CAVIUM
- if (arc4->magic == WOLFSSL_ARC4_CAVIUM_MAGIC)
- return wc_Arc4CaviumProcess(arc4, out, in, length);
+ if (arc4 == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ARC4) && \
+ defined(HAVE_CAVIUM) && !defined(HAVE_CAVIUM_V)
+ if (arc4->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ARC4) {
+ return NitroxArc4Process(arc4, out, in, length);
+ }
#endif
x = arc4->x;
@@ -97,82 +109,41 @@ void wc_Arc4Process(Arc4* arc4, byte* out, const byte* in, word32 length)
arc4->x = (byte)x;
arc4->y = (byte)y;
-}
-
-#ifdef HAVE_CAVIUM
-
-#include <wolfssl/wolfcrypt/logging.h>
-#include "cavium_common.h"
+ return ret;
+}
-/* Initiliaze Arc4 for use with Nitrox device */
-int wc_Arc4InitCavium(Arc4* arc4, int devId)
+/* Initialize Arc4 for use with async device */
+int wc_Arc4Init(Arc4* arc4, void* heap, int devId)
{
+ int ret = 0;
+
if (arc4 == NULL)
- return -1;
+ return BAD_FUNC_ARG;
- if (CspAllocContext(CONTEXT_SSL, &arc4->contextHandle, devId) != 0)
- return -1;
+ arc4->heap = heap;
- arc4->devId = devId;
- arc4->magic = WOLFSSL_ARC4_CAVIUM_MAGIC;
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ARC4)
+ ret = wolfAsync_DevCtxInit(&arc4->asyncDev, WOLFSSL_ASYNC_MARKER_ARC4,
+ arc4->heap, devId);
+#else
+ (void)devId;
+#endif /* WOLFSSL_ASYNC_CRYPT */
- return 0;
+ return ret;
}
-/* Free Arc4 from use with Nitrox device */
-void wc_Arc4FreeCavium(Arc4* arc4)
+/* Free Arc4 from use with async device */
+void wc_Arc4Free(Arc4* arc4)
{
if (arc4 == NULL)
return;
- if (arc4->magic != WOLFSSL_ARC4_CAVIUM_MAGIC)
- return;
-
- CspFreeContext(CONTEXT_SSL, arc4->contextHandle, arc4->devId);
- arc4->magic = 0;
-}
-
-
-static void wc_Arc4CaviumSetKey(Arc4* arc4, const byte* key, word32 length)
-{
- word32 requestId;
-
- if (CspInitializeRc4(CAVIUM_BLOCKING, arc4->contextHandle, length,
- (byte*)key, &requestId, arc4->devId) != 0) {
- WOLFSSL_MSG("Bad Cavium Arc4 Init");
- }
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ARC4)
+ wolfAsync_DevCtxFree(&arc4->asyncDev, WOLFSSL_ASYNC_MARKER_ARC4);
+#endif /* WOLFSSL_ASYNC_CRYPT */
}
-
-static void wc_Arc4CaviumProcess(Arc4* arc4, byte* out, const byte* in,
- word32 length)
-{
- wolfssl_word offset = 0;
- word32 requestId;
-
- while (length > WOLFSSL_MAX_16BIT) {
- word16 slen = (word16)WOLFSSL_MAX_16BIT;
- if (CspEncryptRc4(CAVIUM_BLOCKING, arc4->contextHandle,CAVIUM_UPDATE,
- slen, (byte*)in + offset, out + offset, &requestId,
- arc4->devId) != 0) {
- WOLFSSL_MSG("Bad Cavium Arc4 Encrypt");
- }
- length -= WOLFSSL_MAX_16BIT;
- offset += WOLFSSL_MAX_16BIT;
- }
- if (length) {
- word16 slen = (word16)length;
- if (CspEncryptRc4(CAVIUM_BLOCKING, arc4->contextHandle,CAVIUM_UPDATE,
- slen, (byte*)in + offset, out + offset, &requestId,
- arc4->devId) != 0) {
- WOLFSSL_MSG("Bad Cavium Arc4 Encrypt");
- }
- }
-}
-
-#endif /* HAVE_CAVIUM */
-
#endif /* NO_RC4 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/asm.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/asm.c
index 9f8458588..0af4447c7 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/asm.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/asm.c
@@ -1,8 +1,8 @@
/* asm.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -50,7 +51,7 @@
#else
#include <intrin.h>
- #define cpuid(a,b) __cpuid((int*)a,b)
+ #define cpuid(a,b,c) __cpuidex((int*)a,b,c)
#define XASM_LINK(f)
@@ -58,9 +59,9 @@
#define EAX 0
#define EBX 1
-#define ECX 2
+#define ECX 2
#define EDX 3
-
+
#define CPUID_AVX1 0x1
#define CPUID_AVX2 0x2
#define CPUID_RDRAND 0x4
@@ -74,30 +75,40 @@
#define IS_INTEL_ADX (cpuid_flags&CPUID_ADX)
#define IS_INTEL_RDRAND (cpuid_flags&CPUID_RDRAND)
#define IS_INTEL_RDSEED (cpuid_flags&CPUID_RDSEED)
-#define SET_FLAGS
+#define SET_FLAGS
static word32 cpuid_check = 0 ;
static word32 cpuid_flags = 0 ;
static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
- int got_intel_cpu=0;
- unsigned int reg[5];
-
+ int got_intel_cpu = 0;
+ int got_amd_cpu = 0;
+ unsigned int reg[5];
+
reg[4] = '\0' ;
- cpuid(reg, 0, 0);
- if(memcmp((char *)&(reg[EBX]), "Genu", 4) == 0 &&
- memcmp((char *)&(reg[EDX]), "ineI", 4) == 0 &&
- memcmp((char *)&(reg[ECX]), "ntel", 4) == 0) {
- got_intel_cpu = 1;
- }
- if (got_intel_cpu) {
+ cpuid(reg, 0, 0);
+
+ /* check for intel cpu */
+ if( memcmp((char *)&(reg[EBX]), "Genu", 4) == 0 &&
+ memcmp((char *)&(reg[EDX]), "ineI", 4) == 0 &&
+ memcmp((char *)&(reg[ECX]), "ntel", 4) == 0) {
+ got_intel_cpu = 1;
+ }
+
+ /* check for AMD cpu */
+ if( memcmp((char *)&(reg[EBX]), "Auth", 4) == 0 &&
+ memcmp((char *)&(reg[EDX]), "enti", 4) == 0 &&
+ memcmp((char *)&(reg[ECX]), "cAMD", 4) == 0) {
+ got_amd_cpu = 1;
+ }
+ if (got_intel_cpu || got_amd_cpu) {
cpuid(reg, leaf, sub);
return((reg[num]>>bit)&0x1) ;
}
return 0 ;
}
-INLINE static int set_cpuid_flags(void) {
+WC_INLINE static int set_cpuid_flags(void) {
if(cpuid_check == 0) {
if(cpuid_flag(7, 0, EBX, 8)){ cpuid_flags |= CPUID_BMI2 ; }
if(cpuid_flag(7, 0, EBX,19)){ cpuid_flags |= CPUID_ADX ; }
@@ -116,17 +127,17 @@ INLINE static int set_cpuid_flags(void) {
#define IF_HAVE_INTEL_MULX(func, ret)
#endif
-#if defined(TFM_X86) && !defined(TFM_SSE2)
+#if defined(TFM_X86) && !defined(TFM_SSE2)
/* x86-32 code */
-#define MONT_START
+#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
-__asm__( \
+__asm__( \
"movl %5,%%eax \n\t" \
"mull %4 \n\t" \
"addl %1,%%eax \n\t" \
@@ -135,11 +146,11 @@ __asm__( \
"adcl $0,%%edx \n\t" \
"movl %%edx,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
-:"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \
+:"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
: "%eax", "%edx", "cc")
#define PROPCARRY \
-__asm__( \
+__asm__( \
"addl %1,%0 \n\t" \
"setb %%al \n\t" \
"movzbl %%al,%1 \n\t" \
@@ -151,14 +162,14 @@ __asm__( \
#elif defined(TFM_X86_64)
/* x86-64 code */
-#define MONT_START
+#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
- mu = c[x] * mp;
+ mu = c[x] * mp
#define INNERMUL \
-__asm__( \
+__asm__( \
"movq %5,%%rax \n\t" \
"mulq %4 \n\t" \
"addq %1,%%rax \n\t" \
@@ -171,70 +182,63 @@ __asm__( \
: "%rax", "%rdx", "cc")
#if defined(HAVE_INTEL_MULX)
-#define MULX_INIT(a0, c0, cy)\
- __asm__ volatile( \
- "xorq %%r10, %%r10\n\t" \
- "movq %1,%%rdx\n\t" \
- "addq %2, %0\n\t" /* c0+=cy; Set CF, OF */ \
- "adoxq %%r10, %%r10\n\t" /* Reset OF */ \
- :"+m"(c0):"r"(a0),"r"(cy):"%r8","%r9", "%r10","%r11","%r12","%rdx") ; \
-
-#define MULX_INNERMUL_R1(c0, c1, pre, rdx)\
- { \
- __asm__ volatile ( \
- "movq %3, %%rdx\n\t" \
- "mulx %%r11,%%r9, %%r8 \n\t" \
- "movq %2, %%r12\n\t" \
- "adoxq %%r9,%0 \n\t" \
- "adcxq %%r8,%1 \n\t" \
- :"+r"(c0),"+r"(c1):"m"(pre),"r"(rdx):"%r8","%r9", "%r10", "%r11","%r12","%rdx" \
- ); }
-
-
-#define MULX_INNERMUL_R2(c0, c1, pre, rdx)\
- { \
- __asm__ volatile ( \
- "movq %3, %%rdx\n\t" \
- "mulx %%r12,%%r9, %%r8 \n\t" \
- "movq %2, %%r11\n\t" \
- "adoxq %%r9,%0 \n\t" \
- "adcxq %%r8,%1 \n\t" \
- :"+r"(c0),"+r"(c1):"m"(pre),"r"(rdx):"%r8","%r9", "%r10", "%r11","%r12","%rdx" \
- ); }
-
-#define MULX_LOAD_R1(val)\
- __asm__ volatile ( \
- "movq %0, %%r11\n\t"\
- ::"m"(val):"%r8","%r9", "%r10", "%r11","%r12","%rdx"\
-) ;
-
-#define MULX_INNERMUL_LAST(c0, c1, rdx)\
- { \
- __asm__ volatile ( \
- "movq %2, %%rdx\n\t" \
- "mulx %%r12,%%r9, %%r8 \n\t" \
- "movq $0, %%r10 \n\t" \
- "adoxq %%r10, %%r9 \n\t" \
- "adcq $0,%%r8 \n\t" \
- "addq %%r9,%0 \n\t" \
- "adcq $0,%%r8 \n\t" \
- "movq %%r8,%1 \n\t" \
- :"+m"(c0),"=m"(c1):"r"(rdx):"%r8","%r9","%r10", "%r11", "%r12","%rdx"\
- ); }
-
-#define MULX_INNERMUL8(x,y,z,cy)\
-{ word64 rdx = y ;\
- MULX_LOAD_R1(x[0]) ;\
- MULX_INIT(y, _c0, cy) ; /* rdx=y; z0+=cy; */ \
- MULX_INNERMUL_R1(_c0, _c1, x[1], rdx) ;\
- MULX_INNERMUL_R2(_c1, _c2, x[2], rdx) ;\
- MULX_INNERMUL_R1(_c2, _c3, x[3], rdx) ;\
- MULX_INNERMUL_R2(_c3, _c4, x[4], rdx) ;\
- MULX_INNERMUL_R1(_c4, _c5, x[5], rdx) ;\
- MULX_INNERMUL_R2(_c5, _c6, x[6], rdx) ;\
- MULX_INNERMUL_R1(_c6, _c7, x[7], rdx) ;\
- MULX_INNERMUL_LAST(_c7, cy, rdx) ;\
-}
+#define MULX_INNERMUL8(x,y,z,cy) \
+ __asm__ volatile ( \
+ "movq %[yn], %%rdx\n\t" \
+ "xorq %%rcx, %%rcx\n\t" \
+ "movq 0(%[c]), %%r8\n\t" \
+ "movq 8(%[c]), %%r9\n\t" \
+ "movq 16(%[c]), %%r10\n\t" \
+ "movq 24(%[c]), %%r11\n\t" \
+ "movq 32(%[c]), %%r12\n\t" \
+ "movq 40(%[c]), %%r13\n\t" \
+ "movq 48(%[c]), %%r14\n\t" \
+ "movq 56(%[c]), %%r15\n\t" \
+ \
+ "mulx 0(%[xp]), %%rax, %%rcx\n\t" \
+ "adcxq %[cy], %%r8\n\t" \
+ "adoxq %%rax, %%r8\n\t" \
+ "mulx 8(%[xp]), %%rax, %[cy]\n\t" \
+ "adcxq %%rcx, %%r9\n\t" \
+ "adoxq %%rax, %%r9\n\t" \
+ "mulx 16(%[xp]), %%rax, %%rcx\n\t" \
+ "adcxq %[cy], %%r10\n\t" \
+ "adoxq %%rax, %%r10\n\t" \
+ "mulx 24(%[xp]), %%rax, %[cy]\n\t" \
+ "adcxq %%rcx, %%r11\n\t" \
+ "adoxq %%rax, %%r11\n\t" \
+ "mulx 32(%[xp]), %%rax, %%rcx\n\t" \
+ "adcxq %[cy], %%r12\n\t" \
+ "adoxq %%rax, %%r12\n\t" \
+ "mulx 40(%[xp]), %%rax, %[cy]\n\t" \
+ "adcxq %%rcx, %%r13\n\t" \
+ "adoxq %%rax, %%r13\n\t" \
+ "mulx 48(%[xp]), %%rax, %%rcx\n\t" \
+ "adcxq %[cy], %%r14\n\t" \
+ "adoxq %%rax, %%r14\n\t" \
+ "adcxq %%rcx, %%r15\n\t" \
+ "mulx 56(%[xp]), %%rax, %[cy]\n\t" \
+ "movq $0, %%rdx\n\t" \
+ "adoxq %%rdx, %%rax\n\t" \
+ "adcxq %%rdx, %[cy]\n\t" \
+ "adoxq %%rdx, %[cy]\n\t" \
+ "addq %%rax, %%r15\n\t" \
+ "adcq $0, %[cy]\n\t" \
+ \
+ "movq %%r8, 0(%[c])\n\t" \
+ "movq %%r9, 8(%[c])\n\t" \
+ "movq %%r10, 16(%[c])\n\t" \
+ "movq %%r11, 24(%[c])\n\t" \
+ "movq %%r12, 32(%[c])\n\t" \
+ "movq %%r13, 40(%[c])\n\t" \
+ "movq %%r14, 48(%[c])\n\t" \
+ "movq %%r15, 56(%[c])\n\t" \
+ : [cy] "+r" (cy) \
+ : [xp] "r" (x), [c] "r" (c_mulx), [yn] "rm" (y) \
+ :"%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", \
+ "%rdx", "%rax", "%rcx" \
+ )
+
#define INNERMUL8_MULX \
{\
MULX_INNERMUL8(tmpm, mu, _c, cy);\
@@ -242,7 +246,7 @@ __asm__( \
#endif
#define INNERMUL8 \
- __asm__( \
+ __asm__( \
"movq 0(%5),%%rax \n\t" \
"movq 0(%2),%%r10 \n\t" \
"movq 0x8(%5),%%r11 \n\t" \
@@ -332,10 +336,10 @@ __asm__( \
\
:"=r"(_c), "=r"(cy) \
: "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
-: "%rax", "%rdx", "%r10", "%r11", "cc")\
+: "%rax", "%rdx", "%r10", "%r11", "cc")
#define PROPCARRY \
-__asm__( \
+__asm__( \
"addq %1,%0 \n\t" \
"setb %%al \n\t" \
"movzbq %%al,%1 \n\t" \
@@ -344,7 +348,7 @@ __asm__( \
: "%rax", "cc")
/******************************************************************/
-#elif defined(TFM_SSE2)
+#elif defined(TFM_SSE2)
/* SSE2 code (assumes 32-bit fp_digits) */
/* XMM register assignments:
* xmm0 *tmpm++, then Mu * (*tmpm++)
@@ -361,7 +365,7 @@ __asm__( \
__asm__("emms")
#define LOOP_START \
-__asm__( \
+__asm__( \
"movd %0,%%mm1 \n\t" \
"pxor %%mm3,%%mm3 \n\t" \
"pmuludq %%mm2,%%mm1 \n\t" \
@@ -369,7 +373,7 @@ __asm__( \
/* pmuludq on mmx registers does a 32x32->64 multiply. */
#define INNERMUL \
-__asm__( \
+__asm__( \
"movd %1,%%mm4 \n\t" \
"movd %2,%%mm0 \n\t" \
"paddq %%mm4,%%mm3 \n\t" \
@@ -380,7 +384,7 @@ __asm__( \
:"=g"(_c[LO]) : "0"(_c[LO]), "g"(*tmpm++) );
#define INNERMUL8 \
-__asm__( \
+__asm__( \
"movd 0(%1),%%mm4 \n\t" \
"movd 0(%2),%%mm0 \n\t" \
"paddq %%mm4,%%mm3 \n\t" \
@@ -453,7 +457,7 @@ __asm__( \
__asm__( "movd %%mm3,%0 \n" :"=r"(cy))
#define PROPCARRY \
-__asm__( \
+__asm__( \
"addl %1,%0 \n\t" \
"setb %%al \n\t" \
"movzbl %%al,%1 \n\t" \
@@ -465,7 +469,7 @@ __asm__( \
#elif defined(TFM_ARM)
/* ARMv4 code */
-#define MONT_START
+#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
@@ -475,7 +479,7 @@ __asm__( \
#ifdef __thumb__
#define INNERMUL \
-__asm__( \
+__asm__( \
" LDR r0,%1 \n\t" \
" ADDS r0,r0,%0 \n\t" \
" ITE CS \n\t" \
@@ -486,7 +490,7 @@ __asm__( \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0]):"r0","cc");
#define PROPCARRY \
-__asm__( \
+__asm__( \
" LDR r0,%1 \n\t" \
" ADDS r0,r0,%0 \n\t" \
" STR r0,%1 \n\t" \
@@ -502,7 +506,7 @@ __asm__( \
#else /* __thumb__ */
#define INNERMUL \
-__asm__( \
+__asm__( \
" LDR r0,%1 \n\t" \
" ADDS r0,r0,%0 \n\t" \
" MOVCS %0,#1 \n\t" \
@@ -512,7 +516,7 @@ __asm__( \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","cc");
#define PROPCARRY \
-__asm__( \
+__asm__( \
" LDR r0,%1 \n\t" \
" ADDS r0,r0,%0 \n\t" \
" STR r0,%1 \n\t" \
@@ -525,76 +529,72 @@ __asm__( \
#elif defined(TFM_PPC32)
/* PPC32 */
-#define MONT_START
+#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
-__asm__( \
+__asm__( \
" mullw 16,%3,%4 \n\t" \
" mulhwu 17,%3,%4 \n\t" \
- " addc 16,16,%0 \n\t" \
+ " addc 16,16,%2 \n\t" \
" addze 17,17 \n\t" \
- " lwz 18,%1 \n\t" \
- " addc 16,16,18 \n\t" \
+ " addc %1,16,%5 \n\t" \
" addze %0,17 \n\t" \
- " stw 16,%1 \n\t" \
-:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","cc"); ++tmpm;
+:"=r"(cy),"=r"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "cc"); ++tmpm;
#define PROPCARRY \
-__asm__( \
- " lwz 16,%1 \n\t" \
- " addc 16,16,%0 \n\t" \
- " stw 16,%1 \n\t" \
- " xor %0,%0,%0 \n\t" \
- " addze %0,%0 \n\t" \
-:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","cc");
+__asm__( \
+ " addc %1,%3,%2 \n\t" \
+ " xor %0,%2,%2 \n\t" \
+ " addze %0,%2 \n\t" \
+:"=r"(cy),"=r"(_c[0]):"0"(cy),"1"(_c[0]):"cc");
#elif defined(TFM_PPC64)
/* PPC64 */
-#define MONT_START
+#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
-#define INNERMUL \
-__asm__( \
- " mulld 16,%3,%4 \n\t" \
- " mulhdu 17,%3,%4 \n\t" \
- " addc 16,16,%0 \n\t" \
- " addze 17,17 \n\t" \
- " ldx 18,0,%1 \n\t" \
- " addc 16,16,18 \n\t" \
- " addze %0,17 \n\t" \
- " sdx 16,0,%1 \n\t" \
-:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","cc"); ++tmpm;
-
-#define PROPCARRY \
-__asm__( \
- " ldx 16,0,%1 \n\t" \
- " addc 16,16,%0 \n\t" \
- " sdx 16,0,%1 \n\t" \
- " xor %0,%0,%0 \n\t" \
- " addze %0,%0 \n\t" \
-:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","cc");
+#define INNERMUL \
+__asm__( \
+ " mulld r16,%3,%4 \n\t" \
+ " mulhdu r17,%3,%4 \n\t" \
+ " addc r16,16,%0 \n\t" \
+ " addze r17,r17 \n\t" \
+ " ldx r18,0,%1 \n\t" \
+ " addc r16,r16,r18 \n\t" \
+ " addze %0,r17 \n\t" \
+ " sdx r16,0,%1 \n\t" \
+:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"r16", "r17", "r18","cc"); ++tmpm;
+
+#define PROPCARRY \
+__asm__( \
+ " ldx r16,0,%1 \n\t" \
+ " addc r16,r16,%0 \n\t" \
+ " sdx r16,0,%1 \n\t" \
+ " xor %0,%0,%0 \n\t" \
+ " addze %0,%0 \n\t" \
+:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","cc");
/******************************************************************/
#elif defined(TFM_AVR32)
/* AVR32 */
-#define MONT_START
+#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
-__asm__( \
+__asm__( \
" ld.w r2,%1 \n\t" \
" add r2,%0 \n\t" \
" eor r3,r3 \n\t" \
@@ -605,7 +605,7 @@ __asm__( \
:"=r"(cy),"=r"(_c):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c):"r2","r3");
#define PROPCARRY \
-__asm__( \
+__asm__( \
" ld.w r2,%1 \n\t" \
" add r2,%0 \n\t" \
" st.w %1,r2 \n\t" \
@@ -613,10 +613,44 @@ __asm__( \
" acr %0 \n\t" \
:"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","cc");
+/******************************************************************/
+#elif defined(TFM_MIPS)
+
+/* MIPS */
+#define MONT_START
+#define MONT_FINI
+#define LOOP_END
+#define LOOP_START \
+ mu = c[x] * mp
+
+#define INNERMUL \
+__asm__( \
+ " multu %3,%4 \n\t" \
+ " mflo $12 \n\t" \
+ " mfhi $13 \n\t" \
+ " addu $12,$12,%0 \n\t" \
+ " sltu $10,$12,%0 \n\t" \
+ " addu $13,$13,$10 \n\t" \
+ " lw $10,%1 \n\t" \
+ " addu $12,$12,$10 \n\t" \
+ " sltu $10,$12,$10 \n\t" \
+ " addu %0,$13,$10 \n\t" \
+ " sw $12,%1 \n\t" \
+:"+r"(cy),"+m"(_c[0]):""(cy),"r"(mu),"r"(tmpm[0]),""(_c[0]):"$10","$12","$13"); ++tmpm;
+
+#define PROPCARRY \
+__asm__( \
+ " lw $10,%1 \n\t" \
+ " addu $10,$10,%0 \n\t" \
+ " sw $10,%1 \n\t" \
+ " sltu %0,$10,%0 \n\t" \
+:"+r"(cy),"+m"(_c[0]):""(cy),""(_c[0]):"$10");
+
+/******************************************************************/
#else
/* ISO C code */
-#define MONT_START
+#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
@@ -663,7 +697,7 @@ __asm__( \
#define COMBA_FINI
#define SQRADD(i, j) \
-__asm__( \
+__asm__( \
"movl %6,%%eax \n\t" \
"mull %%eax \n\t" \
"addl %%eax,%0 \n\t" \
@@ -672,7 +706,7 @@ __asm__( \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","cc");
#define SQRADD2(i, j) \
-__asm__( \
+__asm__( \
"movl %6,%%eax \n\t" \
"mull %7 \n\t" \
"addl %%eax,%0 \n\t" \
@@ -692,10 +726,8 @@ __asm__( \
"xorl %2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%eax","%edx","cc");
-/* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
-
#define SQRADDAC(i, j) \
-__asm__( \
+__asm__( \
"movl %6,%%eax \n\t" \
"mull %7 \n\t" \
"addl %%eax,%0 \n\t" \
@@ -704,7 +736,7 @@ __asm__( \
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","cc");
#define SQRADDDB \
-__asm__( \
+__asm__( \
"addl %6,%0 \n\t" \
"adcl %7,%1 \n\t" \
"adcl %8,%2 \n\t" \
@@ -733,16 +765,16 @@ __asm__( \
#define COMBA_FINI
#define SQRADD(i, j) \
-__asm__( \
+__asm__( \
"movq %6,%%rax \n\t" \
"mulq %%rax \n\t" \
"addq %%rax,%0 \n\t" \
"adcq %%rdx,%1 \n\t" \
"adcq $0,%2 \n\t" \
- :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","cc");
+ :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "x"(i) :"%rax","%rdx","cc");
#define SQRADD2(i, j) \
-__asm__( \
+__asm__( \
"movq %6,%%rax \n\t" \
"mulq %7 \n\t" \
"addq %%rax,%0 \n\t" \
@@ -754,7 +786,7 @@ __asm__( \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
#define SQRADDSC(i, j) \
-__asm__( \
+__asm__( \
"movq %3,%%rax \n\t" \
"mulq %4 \n\t" \
"movq %%rax,%0 \n\t" \
@@ -762,10 +794,8 @@ __asm__( \
"xorq %2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%rax","%rdx","cc");
-/* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
-
#define SQRADDAC(i, j) \
-__asm__( \
+__asm__( \
"movq %6,%%rax \n\t" \
"mulq %7 \n\t" \
"addq %%rax,%0 \n\t" \
@@ -774,7 +804,7 @@ __asm__( \
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc");
#define SQRADDDB \
-__asm__( \
+__asm__( \
"addq %6,%0 \n\t" \
"adcq %7,%1 \n\t" \
"adcq %8,%2 \n\t" \
@@ -804,7 +834,7 @@ __asm__( \
__asm__("emms");
#define SQRADD(i, j) \
-__asm__( \
+__asm__( \
"movd %6,%%mm0 \n\t" \
"pmuludq %%mm0,%%mm0\n\t" \
"movd %%mm0,%%eax \n\t" \
@@ -816,7 +846,7 @@ __asm__( \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","cc");
#define SQRADD2(i, j) \
-__asm__( \
+__asm__( \
"movd %6,%%mm0 \n\t" \
"movd %7,%%mm1 \n\t" \
"pmuludq %%mm1,%%mm0\n\t" \
@@ -832,7 +862,7 @@ __asm__( \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
#define SQRADDSC(i, j) \
-__asm__( \
+__asm__( \
"movd %3,%%mm0 \n\t" \
"movd %4,%%mm1 \n\t" \
"pmuludq %%mm1,%%mm0\n\t" \
@@ -845,7 +875,7 @@ __asm__( \
/* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
#define SQRADDAC(i, j) \
-__asm__( \
+__asm__( \
"movd %6,%%mm0 \n\t" \
"movd %7,%%mm1 \n\t" \
"pmuludq %%mm1,%%mm0\n\t" \
@@ -858,7 +888,7 @@ __asm__( \
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","cc");
#define SQRADDDB \
-__asm__( \
+__asm__( \
"addl %6,%0 \n\t" \
"adcl %7,%1 \n\t" \
"adcl %8,%2 \n\t" \
@@ -889,16 +919,16 @@ __asm__( \
/* multiplies point i and j, updates carry "c1" and digit c2 */
#define SQRADD(i, j) \
-__asm__( \
+__asm__( \
" UMULL r0,r1,%6,%6 \n\t" \
" ADDS %0,%0,r0 \n\t" \
" ADCS %1,%1,r1 \n\t" \
" ADC %2,%2,#0 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "cc");
-
+
/* for squaring some of the terms are doubled... */
#define SQRADD2(i, j) \
-__asm__( \
+__asm__( \
" UMULL r0,r1,%6,%7 \n\t" \
" ADDS %0,%0,r0 \n\t" \
" ADCS %1,%1,r1 \n\t" \
@@ -909,7 +939,7 @@ __asm__( \
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
#define SQRADDSC(i, j) \
-__asm__( \
+__asm__( \
" UMULL %0,%1,%3,%4 \n\t" \
" SUB %2,%2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "r"(i), "r"(j) : "cc");
@@ -917,7 +947,7 @@ __asm__( \
/* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
#define SQRADDAC(i, j) \
-__asm__( \
+__asm__( \
" UMULL r0,r1,%6,%7 \n\t" \
" ADDS %0,%0,r0 \n\t" \
" ADCS %1,%1,r1 \n\t" \
@@ -925,7 +955,7 @@ __asm__( \
:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "cc");
#define SQRADDDB \
-__asm__( \
+__asm__( \
" ADDS %0,%0,%3 \n\t" \
" ADCS %1,%1,%4 \n\t" \
" ADC %2,%2,%5 \n\t" \
@@ -956,7 +986,7 @@ __asm__( \
/* multiplies point i and j, updates carry "c1" and digit c2 */
#define SQRADD(i, j) \
-__asm__( \
+__asm__( \
" mullw 16,%6,%6 \n\t" \
" addc %0,%0,16 \n\t" \
" mulhwu 16,%6,%6 \n\t" \
@@ -966,7 +996,7 @@ __asm__( \
/* for squaring some of the terms are doubled... */
#define SQRADD2(i, j) \
-__asm__( \
+__asm__( \
" mullw 16,%6,%7 \n\t" \
" mulhwu 17,%6,%7 \n\t" \
" addc %0,%0,16 \n\t" \
@@ -978,14 +1008,14 @@ __asm__( \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc");
#define SQRADDSC(i, j) \
-__asm__( \
+__asm__( \
" mullw %0,%6,%7 \n\t" \
" mulhwu %1,%6,%7 \n\t" \
" xor %2,%2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
#define SQRADDAC(i, j) \
-__asm__( \
+__asm__( \
" mullw 16,%6,%7 \n\t" \
" addc %0,%0,16 \n\t" \
" mulhwu 16,%6,%7 \n\t" \
@@ -994,7 +1024,7 @@ __asm__( \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc");
#define SQRADDDB \
-__asm__( \
+__asm__( \
" addc %0,%0,%3 \n\t" \
" adde %1,%1,%4 \n\t" \
" adde %2,%2,%5 \n\t" \
@@ -1023,46 +1053,46 @@ __asm__( \
#define COMBA_FINI
/* multiplies point i and j, updates carry "c1" and digit c2 */
-#define SQRADD(i, j) \
-__asm__( \
- " mulld 16,%6,%6 \n\t" \
- " addc %0,%0,16 \n\t" \
- " mulhdu 16,%6,%6 \n\t" \
- " adde %1,%1,16 \n\t" \
- " addze %2,%2 \n\t" \
-:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc");
+#define SQRADD(i, j) \
+__asm__( \
+ " mulld r16,%6,%6 \n\t" \
+ " addc %0,%0,r16 \n\t" \
+ " mulhdu r16,%6,%6 \n\t" \
+ " adde %1,%1,r16 \n\t" \
+ " addze %2,%2 \n\t" \
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","cc");
/* for squaring some of the terms are doubled... */
-#define SQRADD2(i, j) \
-__asm__( \
- " mulld 16,%6,%7 \n\t" \
- " mulhdu 17,%6,%7 \n\t" \
- " addc %0,%0,16 \n\t" \
- " adde %1,%1,17 \n\t" \
- " addze %2,%2 \n\t" \
- " addc %0,%0,16 \n\t" \
- " adde %1,%1,17 \n\t" \
- " addze %2,%2 \n\t" \
-:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc");
+#define SQRADD2(i, j) \
+__asm__( \
+ " mulld r16,%6,%7 \n\t" \
+ " mulhdu r17,%6,%7 \n\t" \
+ " addc %0,%0,r16 \n\t" \
+ " adde %1,%1,r17 \n\t" \
+ " addze %2,%2 \n\t" \
+ " addc %0,%0,r16 \n\t" \
+ " adde %1,%1,r17 \n\t" \
+ " addze %2,%2 \n\t" \
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","cc");
#define SQRADDSC(i, j) \
-__asm__( \
+__asm__( \
" mulld %0,%6,%7 \n\t" \
" mulhdu %1,%6,%7 \n\t" \
" xor %2,%2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
-#define SQRADDAC(i, j) \
-__asm__( \
- " mulld 16,%6,%7 \n\t" \
- " addc %0,%0,16 \n\t" \
- " mulhdu 16,%6,%7 \n\t" \
- " adde %1,%1,16 \n\t" \
- " addze %2,%2 \n\t" \
-:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc");
+#define SQRADDAC(i, j) \
+__asm__( \
+ " mulld r16,%6,%7 \n\t" \
+ " addc %0,%0,r16 \n\t" \
+ " mulhdu r16,%6,%7 \n\t" \
+ " adde %1,%1,r16 \n\t" \
+ " addze %2,%2 \n\t" \
+:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "cc");
#define SQRADDDB \
-__asm__( \
+__asm__( \
" addc %0,%0,%3 \n\t" \
" adde %1,%1,%4 \n\t" \
" adde %2,%2,%5 \n\t" \
@@ -1094,7 +1124,7 @@ __asm__( \
/* multiplies point i and j, updates carry "c1" and digit c2 */
#define SQRADD(i, j) \
-__asm__( \
+__asm__( \
" mulu.d r2,%6,%6 \n\t" \
" add %0,%0,r2 \n\t" \
" adc %1,%1,r3 \n\t" \
@@ -1103,7 +1133,7 @@ __asm__( \
/* for squaring some of the terms are doubled... */
#define SQRADD2(i, j) \
-__asm__( \
+__asm__( \
" mulu.d r2,%6,%7 \n\t" \
" add %0,%0,r2 \n\t" \
" adc %1,%1,r3 \n\t" \
@@ -1114,7 +1144,7 @@ __asm__( \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3");
#define SQRADDSC(i, j) \
-__asm__( \
+__asm__( \
" mulu.d r2,%6,%7 \n\t" \
" mov %0,r2 \n\t" \
" mov %1,r3 \n\t" \
@@ -1122,7 +1152,7 @@ __asm__( \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3");
#define SQRADDAC(i, j) \
-__asm__( \
+__asm__( \
" mulu.d r2,%6,%7 \n\t" \
" add %0,%0,r2 \n\t" \
" adc %1,%1,r3 \n\t" \
@@ -1130,7 +1160,7 @@ __asm__( \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3");
#define SQRADDDB \
-__asm__( \
+__asm__( \
" add %0,%0,%3 \n\t" \
" adc %1,%1,%4 \n\t" \
" adc %2,%2,%5 \n\t" \
@@ -1139,6 +1169,112 @@ __asm__( \
" adc %2,%2,%5 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
+#elif defined(TFM_MIPS)
+
+/* MIPS */
+#define COMBA_START
+
+#define CLEAR_CARRY \
+ c0 = c1 = c2 = 0;
+
+#define COMBA_STORE(x) \
+ x = c0;
+
+#define COMBA_STORE2(x) \
+ x = c1;
+
+#define CARRY_FORWARD \
+ do { c0 = c1; c1 = c2; c2 = 0; } while (0);
+
+#define COMBA_FINI
+
+/* multiplies point i and j, updates carry "c1" and digit c2 */
+#define SQRADD(i, j) \
+__asm__( \
+ " multu %6,%6 \n\t" \
+ " mflo $12 \n\t" \
+ " mfhi $13 \n\t" \
+ " addu %0,%0,$12 \n\t" \
+ " sltu $12,%0,$12 \n\t" \
+ " addu %1,%1,$13 \n\t" \
+ " sltu $13,%1,$13 \n\t" \
+ " addu %1,%1,$12 \n\t" \
+ " sltu $12,%1,$12 \n\t" \
+ " addu %2,%2,$13 \n\t" \
+ " addu %2,%2,$12 \n\t" \
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"$12","$13");
+
+/* for squaring some of the terms are doubled... */
+#define SQRADD2(i, j) \
+__asm__( \
+ " multu %6,%7 \n\t" \
+ " mflo $12 \n\t" \
+ " mfhi $13 \n\t" \
+ \
+ " addu %0,%0,$12 \n\t" \
+ " sltu $14,%0,$12 \n\t" \
+ " addu %1,%1,$13 \n\t" \
+ " sltu $15,%1,$13 \n\t" \
+ " addu %1,%1,$14 \n\t" \
+ " sltu $14,%1,$14 \n\t" \
+ " addu %2,%2,$15 \n\t" \
+ " addu %2,%2,$14 \n\t" \
+ \
+ " addu %0,%0,$12 \n\t" \
+ " sltu $14,%0,$12 \n\t" \
+ " addu %1,%1,$13 \n\t" \
+ " sltu $15,%1,$13 \n\t" \
+ " addu %1,%1,$14 \n\t" \
+ " sltu $14,%1,$14 \n\t" \
+ " addu %2,%2,$15 \n\t" \
+ " addu %2,%2,$14 \n\t" \
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12", "$13", "$14", "$15");
+
+#define SQRADDSC(i, j) \
+__asm__( \
+ " multu %6,%7 \n\t" \
+ " mflo %0 \n\t" \
+ " mfhi %1 \n\t" \
+ " xor %2,%2,%2 \n\t" \
+:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
+
+#define SQRADDAC(i, j) \
+__asm__( \
+ " multu %6,%7 \n\t" \
+ " mflo $12 \n\t" \
+ " mfhi $13 \n\t" \
+ " addu %0,%0,$12 \n\t" \
+ " sltu $12,%0,$12 \n\t" \
+ " addu %1,%1,$13 \n\t" \
+ " sltu $13,%1,$13 \n\t" \
+ " addu %1,%1,$12 \n\t" \
+ " sltu $12,%1,$12 \n\t" \
+ " addu %2,%2,$13 \n\t" \
+ " addu %2,%2,$12 \n\t" \
+:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"$12", "$13", "$14");
+
+#define SQRADDDB \
+__asm__( \
+ " addu %0,%0,%3 \n\t" \
+ " sltu $10,%0,%3 \n\t" \
+ " addu %1,%1,$10 \n\t" \
+ " sltu $10,%1,$10 \n\t" \
+ " addu %1,%1,%4 \n\t" \
+ " sltu $11,%1,%4 \n\t" \
+ " addu %2,%2,$10 \n\t" \
+ " addu %2,%2,$11 \n\t" \
+ " addu %2,%2,%5 \n\t" \
+ \
+ " addu %0,%0,%3 \n\t" \
+ " sltu $10,%0,%3 \n\t" \
+ " addu %1,%1,$10 \n\t" \
+ " sltu $10,%1,$10 \n\t" \
+ " addu %1,%1,%4 \n\t" \
+ " sltu $11,%1,%4 \n\t" \
+ " addu %2,%2,$10 \n\t" \
+ " addu %2,%2,$11 \n\t" \
+ " addu %2,%2,%5 \n\t" \
+:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "$10", "$11");
#else
@@ -1169,7 +1305,7 @@ __asm__( \
t = c1 + (t >> DIGIT_BIT); c1 = (fp_digit)t; \
c2 +=(fp_digit) (t >> DIGIT_BIT); \
} while (0);
-
+
/* for squaring some of the terms are doubled... */
#define SQRADD2(i, j) \
@@ -1177,10 +1313,10 @@ __asm__( \
t = ((fp_word)i) * ((fp_word)j); \
tt = (fp_word)c0 + t; c0 = (fp_digit)tt; \
tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = (fp_digit)tt; \
- c2 +=(fp_digit)( tt >> DIGIT_BIT); \
+ c2 +=(fp_digit)(tt >> DIGIT_BIT); \
tt = (fp_word)c0 + t; c0 = (fp_digit)tt; \
tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = (fp_digit)tt; \
- c2 +=(fp_digit) (tt >> DIGIT_BIT); \
+ c2 +=(fp_digit)(tt >> DIGIT_BIT); \
} while (0);
#define SQRADDSC(i, j) \
@@ -1210,46 +1346,46 @@ __asm__( \
#include "fp_sqr_comba_small_set.i"
#endif
-#if defined(TFM_SQR3)
+#if defined(TFM_SQR3) && FP_SIZE >= 6
#include "fp_sqr_comba_3.i"
#endif
-#if defined(TFM_SQR4)
+#if defined(TFM_SQR4) && FP_SIZE >= 8
#include "fp_sqr_comba_4.i"
#endif
-#if defined(TFM_SQR6)
+#if defined(TFM_SQR6) && FP_SIZE >= 12
#include "fp_sqr_comba_6.i"
#endif
-#if defined(TFM_SQR7)
+#if defined(TFM_SQR7) && FP_SIZE >= 14
#include "fp_sqr_comba_7.i"
#endif
-#if defined(TFM_SQR8)
+#if defined(TFM_SQR8) && FP_SIZE >= 16
#include "fp_sqr_comba_8.i"
#endif
-#if defined(TFM_SQR9)
+#if defined(TFM_SQR9) && FP_SIZE >= 18
#include "fp_sqr_comba_9.i"
#endif
-#if defined(TFM_SQR12)
+#if defined(TFM_SQR12) && FP_SIZE >= 24
#include "fp_sqr_comba_12.i"
#endif
-#if defined(TFM_SQR17)
+#if defined(TFM_SQR17) && FP_SIZE >= 34
#include "fp_sqr_comba_17.i"
#endif
-#if defined(TFM_SQR20)
+#if defined(TFM_SQR20) && FP_SIZE >= 40
#include "fp_sqr_comba_20.i"
#endif
-#if defined(TFM_SQR24)
+#if defined(TFM_SQR24) && FP_SIZE >= 48
#include "fp_sqr_comba_24.i"
#endif
-#if defined(TFM_SQR28)
+#if defined(TFM_SQR28) && FP_SIZE >= 56
#include "fp_sqr_comba_28.i"
#endif
-#if defined(TFM_SQR32)
+#if defined(TFM_SQR32) && FP_SIZE >= 64
#include "fp_sqr_comba_32.i"
#endif
-#if defined(TFM_SQR48)
+#if defined(TFM_SQR48) && FP_SIZE >= 96
#include "fp_sqr_comba_48.i"
#endif
-#if defined(TFM_SQR64)
+#if defined(TFM_SQR64) && FP_SIZE >= 128
#include "fp_sqr_comba_64.i"
#endif
/* end fp_sqr_comba.c asm */
@@ -1283,7 +1419,7 @@ __asm__( \
/* this should multiply i and j */
#define MULADD(i, j) \
-__asm__( \
+__asm__( \
"movl %6,%%eax \n\t" \
"mull %7 \n\t" \
"addl %%eax,%0 \n\t" \
@@ -1318,7 +1454,7 @@ __asm__( \
/* this should multiply i and j */
#define MULADD(i, j) \
-__asm__ ( \
+__asm__ ( \
"movq %6,%%rax \n\t" \
"mulq %7 \n\t" \
"addq %%rax,%0 \n\t" \
@@ -1328,61 +1464,54 @@ __asm__ ( \
#if defined(HAVE_INTEL_MULX)
-#define MULADD_MULX(b0, c0, c1, rdx)\
- __asm__ volatile ( \
- "movq %3, %%rdx\n\t" \
- "mulx %2,%%r9, %%r8 \n\t" \
- "adoxq %%r9,%0 \n\t" \
- "adcxq %%r8,%1 \n\t" \
- :"+r"(c0),"+r"(c1):"r"(b0), "r"(rdx):"%r8","%r9","%r10","%rdx"\
+#define MULADD_BODY(a,b,c) \
+ __asm__ volatile( \
+ "movq %[a0],%%rdx\n\t" \
+ "xorq %%rcx, %%rcx\n\t" \
+ "movq 0(%[cp]),%%r8\n\t" \
+ "movq 8(%[cp]),%%r9\n\t" \
+ "movq 16(%[cp]),%%r10\n\t" \
+ "movq 24(%[cp]),%%r11\n\t" \
+ "movq 32(%[cp]),%%r12\n\t" \
+ "movq 40(%[cp]),%%r13\n\t" \
+ \
+ "mulx (%[bp]),%%rax, %%rbx\n\t" \
+ "adoxq %%rax, %%r8\n\t" \
+ "mulx 8(%[bp]),%%rax, %%rcx\n\t" \
+ "adcxq %%rbx, %%r9\n\t" \
+ "adoxq %%rax, %%r9\n\t" \
+ "mulx 16(%[bp]),%%rax, %%rbx\n\t" \
+ "adcxq %%rcx, %%r10\n\t" \
+ "adoxq %%rax, %%r10\n\t" \
+ "mulx 24(%[bp]),%%rax, %%rcx\n\t" \
+ "adcxq %%rbx, %%r11\n\t" \
+ "adoxq %%rax, %%r11\n\t" \
+ "adcxq %%rcx, %%r12\n\t" \
+ "mov $0, %%rdx\n\t" \
+ "adox %%rdx, %%r12\n\t" \
+ "adcx %%rdx, %%r13\n\t" \
+ \
+ "movq %%r8, 0(%[cp])\n\t" \
+ "movq %%r9, 8(%[cp])\n\t" \
+ "movq %%r10, 16(%[cp])\n\t" \
+ "movq %%r11, 24(%[cp])\n\t" \
+ "movq %%r12, 32(%[cp])\n\t" \
+ "movq %%r13, 40(%[cp])\n\t" \
+ : \
+ : [a0] "r" (a->dp[ix]), [bp] "r" (&(b->dp[iy])), \
+ [cp] "r" (&(c->dp[iz])) \
+ : "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", \
+ "%rdx", "%rax", "%rcx", "%rbx" \
)
-
-#define MULADD_MULX_ADD_CARRY(c0, c1)\
- __asm__ volatile(\
- "mov $0, %%r10\n\t"\
- "movq %1, %%r8\n\t"\
- "adox %%r10, %0\n\t"\
- "adcx %%r10, %1\n\t"\
- :"+r"(c0),"+r"(c1)::"%r8","%r9","%r10","%rdx") ;
-
-#define MULADD_SET_A(a0)\
- __asm__ volatile("add $0, %%r8\n\t" \
- "movq %0,%%rdx\n\t" \
- ::"r"(a0):"%r8","%r9","%r10","%rdx") ;
-
-#define MULADD_BODY(a,b,c)\
- { word64 rdx = a->dp[ix] ; \
- cp = &(c->dp[iz]) ; \
- c0 = cp[0] ; c1 = cp[1]; \
- MULADD_SET_A(rdx) ; \
- MULADD_MULX(b0, c0, c1, rdx) ;\
- cp[0]=c0; c0=cp[2]; \
- MULADD_MULX(b1, c1, c0, rdx) ;\
- cp[1]=c1; c1=cp[3]; \
- MULADD_MULX(b2, c0, c1, rdx) ;\
- cp[2]=c0; c0=cp[4]; \
- MULADD_MULX(b3, c1, c0, rdx) ;\
- cp[3]=c1; c1=cp[5]; \
- MULADD_MULX_ADD_CARRY(c0, c1);\
- cp[4]=c0; cp[5]=c1; \
+#define TFM_INTEL_MUL_COMBA(a, b, c) \
+ for (iz=0; iz<pa; iz++) c->dp[iz] = 0; \
+ for (ix=0; ix<a->used; ix++) { \
+ for (iy=0; iy<b->used; iy+=4) { \
+ iz = ix + iy; \
+ MULADD_BODY(a, b, c); \
+ } \
}
-
-#define TFM_INTEL_MUL_COMBA(a, b, c)\
- for(ix=0; ix<pa; ix++)c->dp[ix]=0 ; \
- for(iy=0; (iy<b->used); iy+=4) { \
- fp_digit *bp ; \
- bp = &(b->dp[iy+0]) ; \
- fp_digit b0 = bp[0] , b1= bp[1], \
- b2= bp[2], b3= bp[3]; \
- ix=0, iz=iy; \
- while(ix<a->used) { \
- fp_digit c0, c1; \
- fp_digit *cp ; \
- MULADD_BODY(a,b,c); \
- ix++ ; iz++ ; \
- } \
-};
#endif
#elif defined(TFM_SSE2)
@@ -1413,7 +1542,7 @@ __asm__ ( \
/* this should multiply i and j */
#define MULADD(i, j) \
-__asm__( \
+__asm__( \
"movd %6,%%mm0 \n\t" \
"movd %7,%%mm1 \n\t" \
"pmuludq %%mm1,%%mm0\n\t" \
@@ -1428,7 +1557,7 @@ __asm__( \
#elif defined(TFM_ARM)
/* ARM code */
-#define COMBA_START
+#define COMBA_START
#define COMBA_CLEAR \
c0 = c1 = c2 = 0;
@@ -1445,7 +1574,7 @@ __asm__( \
#define COMBA_FINI
#define MULADD(i, j) \
-__asm__( \
+__asm__( \
" UMULL r0,r1,%6,%7 \n\t" \
" ADDS %0,%0,r0 \n\t" \
" ADCS %1,%1,r1 \n\t" \
@@ -1469,11 +1598,11 @@ __asm__( \
#define COMBA_STORE2(x) \
x = c1;
-#define COMBA_FINI
-
+#define COMBA_FINI
+
/* untested: will mulhwu change the flags? Docs say no */
-#define MULADD(i, j) \
-__asm__( \
+#define MULADD(i, j) \
+__asm__( \
" mullw 16,%6,%7 \n\t" \
" addc %0,%0,16 \n\t" \
" mulhwu 16,%6,%7 \n\t" \
@@ -1498,17 +1627,17 @@ __asm__( \
#define COMBA_STORE2(x) \
x = c1;
-#define COMBA_FINI
-
-/* untested: will mulhwu change the flags? Docs say no */
+#define COMBA_FINI
+
+/* untested: will mulhdu change the flags? Docs say no */
#define MULADD(i, j) \
-____asm__( \
- " mulld 16,%6,%7 \n\t" \
- " addc %0,%0,16 \n\t" \
- " mulhdu 16,%6,%7 \n\t" \
- " adde %1,%1,16 \n\t" \
- " addze %2,%2 \n\t" \
-:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
+____asm__( \
+ " mulld r16,%6,%7 \n\t" \
+ " addc %0,%0,16 \n\t" \
+ " mulhdu r16,%6,%7 \n\t" \
+ " adde %1,%1,16 \n\t" \
+ " addze %2,%2 \n\t" \
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16");
#elif defined(TFM_AVR32)
@@ -1528,16 +1657,50 @@ ____asm__( \
#define COMBA_STORE2(x) \
x = c1;
-#define COMBA_FINI
-
+#define COMBA_FINI
+
#define MULADD(i, j) \
-____asm__( \
+____asm__( \
" mulu.d r2,%6,%7 \n\t"\
" add %0,r2 \n\t"\
" adc %1,%1,r3 \n\t"\
" acr %2 \n\t"\
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3");
+#elif defined(TFM_MIPS)
+
+/* MIPS */
+#define COMBA_START
+
+#define COMBA_CLEAR \
+ c0 = c1 = c2 = 0;
+
+#define COMBA_FORWARD \
+ do { c0 = c1; c1 = c2; c2 = 0; } while (0);
+
+#define COMBA_STORE(x) \
+ x = c0;
+
+#define COMBA_STORE2(x) \
+ x = c1;
+
+#define COMBA_FINI
+
+#define MULADD(i, j) \
+__asm__( \
+ " multu %6,%7 \n\t" \
+ " mflo $12 \n\t" \
+ " mfhi $13 \n\t" \
+ " addu %0,%0,$12 \n\t" \
+ " sltu $12,%0,$12 \n\t" \
+ " addu %1,%1,$13 \n\t" \
+ " sltu $13,%1,$13 \n\t" \
+ " addu %1,%1,$12 \n\t" \
+ " sltu $12,%1,$12 \n\t" \
+ " addu %2,%2,$13 \n\t" \
+ " addu %2,%2,$12 \n\t" \
+:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12","$13");
+
#else
/* ISO C code */
@@ -1555,13 +1718,15 @@ ____asm__( \
#define COMBA_STORE2(x) \
x = c1;
-#define COMBA_FINI
-
+#define COMBA_FINI
+
#define MULADD(i, j) \
- do { fp_word t; \
- t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = (fp_digit)t; \
- t = (fp_word)c1 + (t >> DIGIT_BIT); \
- c1 = (fp_digit)t; c2 += (fp_digit)(t >> DIGIT_BIT); \
+ do { fp_word t; \
+ t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); \
+ c0 = (fp_digit)t; \
+ t = (fp_word)c1 + (t >> DIGIT_BIT); \
+ c1 = (fp_digit)t; \
+ c2 += (fp_digit)(t >> DIGIT_BIT); \
} while (0);
#endif
@@ -1571,46 +1736,46 @@ ____asm__( \
#include "fp_mul_comba_small_set.i"
#endif
-#if defined(TFM_MUL3)
+#if defined(TFM_MUL3) && FP_SIZE >= 6
#include "fp_mul_comba_3.i"
#endif
-#if defined(TFM_MUL4)
+#if defined(TFM_MUL4) && FP_SIZE >= 8
#include "fp_mul_comba_4.i"
#endif
-#if defined(TFM_MUL6)
+#if defined(TFM_MUL6) && FP_SIZE >= 12
#include "fp_mul_comba_6.i"
#endif
-#if defined(TFM_MUL7)
+#if defined(TFM_MUL7) && FP_SIZE >= 14
#include "fp_mul_comba_7.i"
#endif
-#if defined(TFM_MUL8)
+#if defined(TFM_MUL8) && FP_SIZE >= 16
#include "fp_mul_comba_8.i"
#endif
-#if defined(TFM_MUL9)
+#if defined(TFM_MUL9) && FP_SIZE >= 18
#include "fp_mul_comba_9.i"
#endif
-#if defined(TFM_MUL12)
+#if defined(TFM_MUL12) && FP_SIZE >= 24
#include "fp_mul_comba_12.i"
#endif
-#if defined(TFM_MUL17)
+#if defined(TFM_MUL17) && FP_SIZE >= 34
#include "fp_mul_comba_17.i"
#endif
-#if defined(TFM_MUL20)
+#if defined(TFM_MUL20) && FP_SIZE >= 40
#include "fp_mul_comba_20.i"
#endif
-#if defined(TFM_MUL24)
+#if defined(TFM_MUL24) && FP_SIZE >= 48
#include "fp_mul_comba_24.i"
#endif
-#if defined(TFM_MUL28)
+#if defined(TFM_MUL28) && FP_SIZE >= 56
#include "fp_mul_comba_28.i"
#endif
-#if defined(TFM_MUL32)
+#if defined(TFM_MUL32) && FP_SIZE >= 64
#include "fp_mul_comba_32.i"
#endif
-#if defined(TFM_MUL48)
+#if defined(TFM_MUL48) && FP_SIZE >= 96
#include "fp_mul_comba_48.i"
#endif
-#if defined(TFM_MUL64)
+#if defined(TFM_MUL64) && FP_SIZE >= 128
#include "fp_mul_comba_64.i"
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/asn.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/asn.c
index e3d9ff44b..c4e60043f 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/asn.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/asn.c
@@ -1,8 +1,8 @@
/* asn.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,21 +16,47 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <wolfssl/wolfcrypt/settings.h>
-#ifndef NO_ASN
+/*
+ASN Options:
+ * NO_ASN_TIME: Disables time parts of the ASN code for systems without an RTC
+ or wishing to save space.
+ * IGNORE_NAME_CONSTRAINTS: Skip ASN name checks.
+ * ASN_DUMP_OID: Allows dump of OID information for debugging.
+ * RSA_DECODE_EXTRA: Decodes extra information in RSA public key.
+ * WOLFSSL_CERT_GEN: Cert generation. Saves extra certificate info in GetName.
+ * WOLFSSL_NO_ASN_STRICT: Disable strict RFC compliance checks to
+ restore 3.13.0 behavior.
+ * WOLFSSL_NO_OCSP_OPTIONAL_CERTS: Skip optional OCSP certs (responder issuer
+ must still be trusted)
+ * WOLFSSL_NO_TRUSTED_CERTS_VERIFY: Workaround for situation where entire cert
+ chain is not loaded. This only matches on subject and public key and
+ does not perform a PKI validation, so it is not a secure solution.
+ Only enabled for OCSP.
+ * WOLFSSL_NO_OCSP_ISSUER_CHECK: Can be defined for backwards compatibility to
+ disable checking of OCSP subject hash with issuer hash.
+ * WOLFSSL_SMALL_CERT_VERIFY: Verify the certificate signature without using
+ DecodedCert. Doubles up on some code but allows smaller dynamic memory
+ usage.
+ * WOLFSSL_NO_OCSP_DATE_CHECK: Disable date checks for OCSP responses. This
+ may be required when the system's real-time clock is not very accurate.
+ It is recommended to enforce the nonce check instead if possible.
+ * WOLFSSL_FORCE_OCSP_NONCE_CHECK: Require nonces to be available in OCSP
+ responses. The nonces are optional and may not be supported by all
+ responders. If it can be ensured that the used responder sends nonces this
+ option may improve security.
+*/
-#ifdef HAVE_RTP_SYS
- #include "os.h" /* dc_rtc_api needs */
- #include "dc_rtc_api.h" /* to get current time */
-#endif
+#ifndef NO_ASN
#include <wolfssl/wolfcrypt/asn.h>
#include <wolfssl/wolfcrypt/coding.h>
@@ -39,17 +65,25 @@
#include <wolfssl/wolfcrypt/error-crypt.h>
#include <wolfssl/wolfcrypt/pwdbased.h>
#include <wolfssl/wolfcrypt/des3.h>
+#include <wolfssl/wolfcrypt/aes.h>
+#include <wolfssl/wolfcrypt/wc_encrypt.h>
#include <wolfssl/wolfcrypt/logging.h>
#include <wolfssl/wolfcrypt/random.h>
-
+#include <wolfssl/wolfcrypt/hash.h>
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
#ifndef NO_RC4
#include <wolfssl/wolfcrypt/arc4.h>
#endif
#ifdef HAVE_NTRU
- #include "ntru_crypto.h"
+ #include "libntruencrypt/ntru_crypto.h"
#endif
#if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384)
@@ -64,478 +98,640 @@
#include <wolfssl/wolfcrypt/ecc.h>
#endif
-#ifdef WOLFSSL_DEBUG_ENCODING
- #ifdef FREESCALE_MQX
- #include <fio.h>
- #else
- #include <stdio.h>
- #endif
+#ifdef HAVE_ED25519
+ #include <wolfssl/wolfcrypt/ed25519.h>
#endif
-#ifdef _MSC_VER
- /* 4996 warning to use MS extensions e.g., strcpy_s instead of XSTRNCPY */
- #pragma warning(disable: 4996)
+#ifdef HAVE_ED448
+ #include <wolfssl/wolfcrypt/ed448.h>
#endif
-
-#ifndef TRUE
- #define TRUE 1
+#ifndef NO_RSA
+ #include <wolfssl/wolfcrypt/rsa.h>
+#if defined(WOLFSSL_XILINX_CRYPT) || defined(WOLFSSL_CRYPTOCELL)
+extern int wc_InitRsaHw(RsaKey* key);
#endif
-#ifndef FALSE
- #define FALSE 0
#endif
+#ifdef WOLF_CRYPTO_CB
+ #include <wolfssl/wolfcrypt/cryptocb.h>
+#endif
-#ifdef HAVE_RTP_SYS
- /* uses parital <time.h> structures */
- #define XTIME(tl) (0)
- #define XGMTIME(c, t) my_gmtime((c))
- #define XVALIDATE_DATE(d, f, t) ValidateDate((d), (f), (t))
-#elif defined(MICRIUM)
- #if (NET_SECURE_MGR_CFG_EN == DEF_ENABLED)
- #define XVALIDATE_DATE(d,f,t) NetSecure_ValidateDateHandler((d),(f),(t))
- #else
- #define XVALIDATE_DATE(d, f, t) (0)
- #endif
- #define NO_TIME_H
- /* since Micrium not defining XTIME or XGMTIME, CERT_GEN not available */
-#elif defined(MICROCHIP_TCPIP_V5) || defined(MICROCHIP_TCPIP)
- #include <time.h>
- #define XTIME(t1) pic32_time((t1))
- #define XGMTIME(c, t) gmtime((c))
- #define XVALIDATE_DATE(d, f, t) ValidateDate((d), (f), (t))
-#elif defined(FREESCALE_MQX)
- #define XTIME(t1) mqx_time((t1))
- #define XGMTIME(c, t) mqx_gmtime((c), (t))
- #define XVALIDATE_DATE(d, f, t) ValidateDate((d), (f), (t))
-#elif defined(WOLFSSL_MDK_ARM)
- #if defined(WOLFSSL_MDK5)
- #include "cmsis_os.h"
- #else
- #include <rtl.h>
- #endif
- #undef RNG
- #include "wolfssl_MDK_ARM.h"
- #undef RNG
- #define RNG wolfSSL_RNG /*for avoiding name conflict in "stm32f2xx.h" */
- #define XTIME(tl) (0)
- #define XGMTIME(c, t) wolfssl_MDK_gmtime((c))
- #define XVALIDATE_DATE(d, f, t) ValidateDate((d), (f), (t))
-#elif defined(USER_TIME)
- /* user time, and gmtime compatible functions, there is a gmtime
- implementation here that WINCE uses, so really just need some ticks
- since the EPOCH
- */
-
- struct tm {
- int tm_sec; /* seconds after the minute [0-60] */
- int tm_min; /* minutes after the hour [0-59] */
- int tm_hour; /* hours since midnight [0-23] */
- int tm_mday; /* day of the month [1-31] */
- int tm_mon; /* months since January [0-11] */
- int tm_year; /* years since 1900 */
- int tm_wday; /* days since Sunday [0-6] */
- int tm_yday; /* days since January 1 [0-365] */
- int tm_isdst; /* Daylight Savings Time flag */
- long tm_gmtoff; /* offset from CUT in seconds */
- char *tm_zone; /* timezone abbreviation */
- };
- typedef long time_t;
-
- /* forward declaration */
- struct tm* gmtime(const time_t* timer);
- extern time_t XTIME(time_t * timer);
-
- #define XGMTIME(c, t) gmtime((c))
- #define XVALIDATE_DATE(d, f, t) ValidateDate((d), (f), (t))
-
- #ifdef STACK_TRAP
- /* for stack trap tracking, don't call os gmtime on OS X/linux,
- uses a lot of stack spce */
- extern time_t time(time_t * timer);
- #define XTIME(tl) time((tl))
- #endif /* STACK_TRAP */
-
-#elif defined(TIME_OVERRIDES)
- /* user would like to override time() and gmtime() functionality */
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+ #include <wolfssl/openssl/objects.h>
+#endif
- #ifndef HAVE_TIME_T_TYPE
- typedef long time_t;
- #endif
- extern time_t XTIME(time_t * timer);
-
- #ifndef HAVE_TM_TYPE
- struct tm {
- int tm_sec; /* seconds after the minute [0-60] */
- int tm_min; /* minutes after the hour [0-59] */
- int tm_hour; /* hours since midnight [0-23] */
- int tm_mday; /* day of the month [1-31] */
- int tm_mon; /* months since January [0-11] */
- int tm_year; /* years since 1900 */
- int tm_wday; /* days since Sunday [0-6] */
- int tm_yday; /* days since January 1 [0-365] */
- int tm_isdst; /* Daylight Savings Time flag */
- long tm_gmtoff; /* offset from CUT in seconds */
- char *tm_zone; /* timezone abbreviation */
- };
- #endif
- extern struct tm* XGMTIME(const time_t* timer, struct tm* tmp);
+#ifdef _MSC_VER
+ /* 4996 warning to use MS extensions e.g., strcpy_s instead of XSTRNCPY */
+ #pragma warning(disable: 4996)
+#endif
- #ifndef HAVE_VALIDATE_DATE
- #define XVALIDATE_DATE(d, f, t) ValidateDate((d), (f), (t))
+#define ERROR_OUT(err, eLabel) { ret = (err); goto eLabel; }
+
+#if defined(HAVE_SELFTEST) || ( !defined(NO_SKID) && \
+ ( !defined(HAVE_FIPS) || \
+ !defined(HAVE_FIPS_VERSION) ))
+ #ifndef WOLFSSL_AES_KEY_SIZE_ENUM
+ #define WOLFSSL_AES_KEY_SIZE_ENUM
+ enum Asn_Misc {
+ AES_IV_SIZE = 16,
+ AES_128_KEY_SIZE = 16,
+ AES_192_KEY_SIZE = 24,
+ AES_256_KEY_SIZE = 32
+ };
#endif
-#else
- /* default */
- /* uses complete <time.h> facility */
- #include <time.h>
- #define XTIME(tl) time((tl))
- #define XGMTIME(c, t) gmtime((c))
- #define XVALIDATE_DATE(d, f, t) ValidateDate((d), (f), (t))
#endif
-
-
-#ifdef _WIN32_WCE
-/* no time() or gmtime() even though in time.h header?? */
-
-#include <windows.h>
-
-
-time_t time(time_t* timer)
+#ifdef WOLFSSL_RENESAS_TSIP_TLS
+void tsip_inform_key_position(const word32 key_n_start,
+ const word32 key_n_len, const word32 key_e_start,
+ const word32 key_e_len);
+int tsip_tls_CertVerify(const byte *cert, word32 certSz,
+ const byte *signature, word32 sigSz,
+ word32 key_n_start, word32 key_n_len,
+ word32 key_e_start, word32 key_e_len,
+ byte *tsip_encRsaKeyIdx);
+#endif
+int GetLength(const byte* input, word32* inOutIdx, int* len,
+ word32 maxIdx)
{
- SYSTEMTIME sysTime;
- FILETIME fTime;
- ULARGE_INTEGER intTime;
- time_t localTime;
-
- if (timer == NULL)
- timer = &localTime;
-
- GetSystemTime(&sysTime);
- SystemTimeToFileTime(&sysTime, &fTime);
-
- XMEMCPY(&intTime, &fTime, sizeof(FILETIME));
- /* subtract EPOCH */
- intTime.QuadPart -= 0x19db1ded53e8000;
- /* to secs */
- intTime.QuadPart /= 10000000;
- *timer = (time_t)intTime.QuadPart;
-
- return *timer;
+ return GetLength_ex(input, inOutIdx, len, maxIdx, 1);
}
-#endif /* _WIN32_WCE */
-#if defined( _WIN32_WCE ) || defined( USER_TIME )
-struct tm* gmtime(const time_t* timer)
+/* give option to check length value found against index. 1 to check 0 to not */
+int GetLength_ex(const byte* input, word32* inOutIdx, int* len,
+ word32 maxIdx, int check)
{
- #define YEAR0 1900
- #define EPOCH_YEAR 1970
- #define SECS_DAY (24L * 60L * 60L)
- #define LEAPYEAR(year) (!((year) % 4) && (((year) % 100) || !((year) %400)))
- #define YEARSIZE(year) (LEAPYEAR(year) ? 366 : 365)
+ int length = 0;
+ word32 idx = *inOutIdx;
+ byte b;
- static const int _ytab[2][12] =
- {
- {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31},
- {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}
- };
+ *len = 0; /* default length */
- static struct tm st_time;
- struct tm* ret = &st_time;
- time_t secs = *timer;
- unsigned long dayclock, dayno;
- int year = EPOCH_YEAR;
+ if ((idx + 1) > maxIdx) { /* for first read */
+ WOLFSSL_MSG("GetLength bad index on input");
+ return BUFFER_E;
+ }
- dayclock = (unsigned long)secs % SECS_DAY;
- dayno = (unsigned long)secs / SECS_DAY;
+ b = input[idx++];
+ if (b >= ASN_LONG_LENGTH) {
+ word32 bytes = b & 0x7F;
- ret->tm_sec = (int) dayclock % 60;
- ret->tm_min = (int)(dayclock % 3600) / 60;
- ret->tm_hour = (int) dayclock / 3600;
- ret->tm_wday = (int) (dayno + 4) % 7; /* day 0 a Thursday */
+ if ((idx + bytes) > maxIdx) { /* for reading bytes */
+ WOLFSSL_MSG("GetLength bad long length");
+ return BUFFER_E;
+ }
- while(dayno >= (unsigned long)YEARSIZE(year)) {
- dayno -= YEARSIZE(year);
- year++;
+ if (bytes > sizeof(length)) {
+ return ASN_PARSE_E;
+ }
+ while (bytes--) {
+ b = input[idx++];
+ length = (length << 8) | b;
+ }
+ if (length < 0) {
+ return ASN_PARSE_E;
+ }
}
+ else
+ length = b;
- ret->tm_year = year - YEAR0;
- ret->tm_yday = (int)dayno;
- ret->tm_mon = 0;
-
- while(dayno >= (unsigned long)_ytab[LEAPYEAR(year)][ret->tm_mon]) {
- dayno -= _ytab[LEAPYEAR(year)][ret->tm_mon];
- ret->tm_mon++;
+ if (check && (idx + length) > maxIdx) { /* for user of length */
+ WOLFSSL_MSG("GetLength value exceeds buffer length");
+ return BUFFER_E;
}
- ret->tm_mday = (int)++dayno;
- ret->tm_isdst = 0;
+ *inOutIdx = idx;
+ if (length > 0)
+ *len = length;
- return ret;
+ return length;
}
-#endif /* _WIN32_WCE || USER_TIME */
+/* input : buffer to read from
+ * inOutIdx : index to start reading from, gets advanced by 1 if successful
+ * maxIdx : maximum index value
+ * tag : ASN tag value found
+ *
+ * returns 0 on success
+ */
+int GetASNTag(const byte* input, word32* inOutIdx, byte* tag, word32 maxIdx)
+{
+ word32 idx;
-#ifdef HAVE_RTP_SYS
+ if (tag == NULL || inOutIdx == NULL || input == NULL) {
+ return BAD_FUNC_ARG;
+ }
-#define YEAR0 1900
+ idx = *inOutIdx;
+ if (idx + ASN_TAG_SZ > maxIdx) {
+ WOLFSSL_MSG("Buffer too small for ASN tag");
+ return BUFFER_E;
+ }
-struct tm* my_gmtime(const time_t* timer) /* has a gmtime() but hangs */
-{
- static struct tm st_time;
- struct tm* ret = &st_time;
+ *tag = input[idx];
+ *inOutIdx = idx + ASN_TAG_SZ;
+ return 0;
+}
- DC_RTC_CALENDAR cal;
- dc_rtc_time_get(&cal, TRUE);
- ret->tm_year = cal.year - YEAR0; /* gm starts at 1900 */
- ret->tm_mon = cal.month - 1; /* gm starts at 0 */
- ret->tm_mday = cal.day;
- ret->tm_hour = cal.hour;
- ret->tm_min = cal.minute;
- ret->tm_sec = cal.second;
+static int GetASNHeader_ex(const byte* input, byte tag, word32* inOutIdx, int* len,
+ word32 maxIdx, int check)
+{
+ word32 idx = *inOutIdx;
+ byte tagFound;
+ int length;
- return ret;
-}
+ if (GetASNTag(input, &idx, &tagFound, maxIdx) != 0)
+ return ASN_PARSE_E;
-#endif /* HAVE_RTP_SYS */
+ if (tagFound != tag)
+ return ASN_PARSE_E;
+
+ if (GetLength_ex(input, &idx, &length, maxIdx, check) < 0)
+ return ASN_PARSE_E;
+ *len = length;
+ *inOutIdx = idx;
+ return length;
+}
-#if defined(MICROCHIP_TCPIP_V5) || defined(MICROCHIP_TCPIP)
-/*
- * time() is just a stub in Microchip libraries. We need our own
- * implementation. Use SNTP client to get seconds since epoch.
+/* Get the DER/BER encoding of an ASN.1 header.
+ *
+ * input Buffer holding DER/BER encoded data.
+ * tag ASN.1 tag value expected in header.
+ * inOutIdx Current index into buffer to parse.
+ * len The number of bytes in the ASN.1 data.
+ * maxIdx Length of data in buffer.
+ * returns BUFFER_E when there is not enough data to parse.
+ * ASN_PARSE_E when the expected tag is not found or length is invalid.
+ * Otherwise, the number of bytes in the ASN.1 data.
*/
-time_t pic32_time(time_t* timer)
+static int GetASNHeader(const byte* input, byte tag, word32* inOutIdx, int* len,
+ word32 maxIdx)
{
-#ifdef MICROCHIP_TCPIP_V5
- DWORD sec = 0;
-#else
- uint32_t sec = 0;
-#endif
- time_t localTime;
-
- if (timer == NULL)
- timer = &localTime;
+ return GetASNHeader_ex(input, tag, inOutIdx, len, maxIdx, 1);
+}
-#ifdef MICROCHIP_MPLAB_HARMONY
- sec = TCPIP_SNTP_UTCSecondsGet();
-#else
- sec = SNTPGetUTCSeconds();
-#endif
- *timer = (time_t) sec;
+static int GetHeader(const byte* input, byte* tag, word32* inOutIdx, int* len,
+ word32 maxIdx, int check)
+{
+ word32 idx = *inOutIdx;
+ int length;
- return *timer;
-}
+ if ((idx + 1) > maxIdx)
+ return BUFFER_E;
-#endif /* MICROCHIP_TCPIP */
+ *tag = input[idx++];
+ if (GetLength_ex(input, &idx, &length, maxIdx, check) < 0)
+ return ASN_PARSE_E;
-#ifdef FREESCALE_MQX
+ *len = length;
+ *inOutIdx = idx;
+ return length;
+}
-time_t mqx_time(time_t* timer)
+int GetSequence(const byte* input, word32* inOutIdx, int* len,
+ word32 maxIdx)
{
- time_t localTime;
- TIME_STRUCT time_s;
-
- if (timer == NULL)
- timer = &localTime;
+ return GetASNHeader(input, ASN_SEQUENCE | ASN_CONSTRUCTED, inOutIdx, len,
+ maxIdx);
+}
- _time_get(&time_s);
- *timer = (time_t) time_s.SECONDS;
- return *timer;
+int GetSequence_ex(const byte* input, word32* inOutIdx, int* len,
+ word32 maxIdx, int check)
+{
+ return GetASNHeader_ex(input, ASN_SEQUENCE | ASN_CONSTRUCTED, inOutIdx, len,
+ maxIdx, check);
}
-/* CodeWarrior GCC toolchain only has gmtime_r(), no gmtime() */
-struct tm* mqx_gmtime(const time_t* clock, struct tm* tmpTime)
+
+int GetSet(const byte* input, word32* inOutIdx, int* len,
+ word32 maxIdx)
{
- return gmtime_r(clock, tmpTime);
+ return GetASNHeader(input, ASN_SET | ASN_CONSTRUCTED, inOutIdx, len,
+ maxIdx);
}
-#endif /* FREESCALE_MQX */
-#ifdef WOLFSSL_TIRTOS
+int GetSet_ex(const byte* input, word32* inOutIdx, int* len,
+ word32 maxIdx, int check)
+{
+ return GetASNHeader_ex(input, ASN_SET | ASN_CONSTRUCTED, inOutIdx, len,
+ maxIdx, check);
+}
-time_t XTIME(time_t * timer)
+/* Get the DER/BER encoded ASN.1 NULL element.
+ * Ensure that the all fields are as expected and move index past the element.
+ *
+ * input Buffer holding DER/BER encoded data.
+ * inOutIdx Current index into buffer to parse.
+ * maxIdx Length of data in buffer.
+ * returns BUFFER_E when there is not enough data to parse.
+ * ASN_TAG_NULL_E when the NULL tag is not found.
+ * ASN_EXPECT_0_E when the length is not zero.
+ * Otherwise, 0 to indicate success.
+ */
+static int GetASNNull(const byte* input, word32* inOutIdx, word32 maxIdx)
{
- time_t sec = 0;
+ word32 idx = *inOutIdx;
+ byte b;
+
+ if ((idx + 2) > maxIdx)
+ return BUFFER_E;
- sec = (time_t) Seconds_get();
+ b = input[idx++];
+ if (b != ASN_TAG_NULL)
+ return ASN_TAG_NULL_E;
- if (timer != NULL)
- *timer = sec;
+ if (input[idx++] != 0)
+ return ASN_EXPECT_0_E;
- return sec;
+ *inOutIdx = idx;
+ return 0;
}
-#endif /* WOLFSSL_TIRTOS */
-
-static INLINE word32 btoi(byte b)
+/* Set the DER/BER encoding of the ASN.1 NULL element.
+ *
+ * output Buffer to write into.
+ * returns the number of bytes added to the buffer.
+ */
+static int SetASNNull(byte* output)
{
- return b - 0x30;
-}
+ output[0] = ASN_TAG_NULL;
+ output[1] = 0;
+ return 2;
+}
-/* two byte date/time, add to value */
-static INLINE void GetTime(int* value, const byte* date, int* idx)
+/* Get the DER/BER encoding of an ASN.1 BOOLEAN.
+ *
+ * input Buffer holding DER/BER encoded data.
+ * inOutIdx Current index into buffer to parse.
+ * maxIdx Length of data in buffer.
+ * returns BUFFER_E when there is not enough data to parse.
+ * ASN_PARSE_E when the BOOLEAN tag is not found or length is not 1.
+ * Otherwise, 0 to indicate the value was false and 1 to indicate true.
+ */
+static int GetBoolean(const byte* input, word32* inOutIdx, word32 maxIdx)
{
- int i = *idx;
+ word32 idx = *inOutIdx;
+ byte b;
- *value += btoi(date[i++]) * 10;
- *value += btoi(date[i++]);
+ if ((idx + 3) > maxIdx)
+ return BUFFER_E;
- *idx = i;
+ b = input[idx++];
+ if (b != ASN_BOOLEAN)
+ return ASN_PARSE_E;
+
+ if (input[idx++] != 1)
+ return ASN_PARSE_E;
+
+ b = input[idx++] != 0;
+
+ *inOutIdx = idx;
+ return b;
}
+#ifdef ASN1_SET_BOOLEAN
+/* Set the DER/BER encoding of the ASN.1 NULL element.
+ * Note: Function not required as yet.
+ *
+ * val Boolean value to encode.
+ * output Buffer to write into.
+ * returns the number of bytes added to the buffer.
+ */
+static int SetBoolean(int val, byte* output)
+{
+ output[0] = ASN_BOOLEAN;
+ output[1] = 1;
+ output[2] = val ? -1 : 0;
-#if defined(MICRIUM)
+ return 3;
+}
+#endif
-CPU_INT32S NetSecure_ValidateDateHandler(CPU_INT08U *date, CPU_INT08U format,
- CPU_INT08U dateType)
+/* Get the DER/BER encoding of an ASN.1 OCTET_STRING header.
+ *
+ * input Buffer holding DER/BER encoded data.
+ * inOutIdx Current index into buffer to parse.
+ * len The number of bytes in the ASN.1 data.
+ * maxIdx Length of data in buffer.
+ * returns BUFFER_E when there is not enough data to parse.
+ * ASN_PARSE_E when the OCTET_STRING tag is not found or length is
+ * invalid.
+ * Otherwise, the number of bytes in the ASN.1 data.
+ */
+int GetOctetString(const byte* input, word32* inOutIdx, int* len,
+ word32 maxIdx)
{
- CPU_BOOLEAN rtn_code;
- CPU_INT32S i;
- CPU_INT32S val;
- CPU_INT16U year;
- CPU_INT08U month;
- CPU_INT16U day;
- CPU_INT08U hour;
- CPU_INT08U min;
- CPU_INT08U sec;
+ return GetASNHeader(input, ASN_OCTET_STRING, inOutIdx, len, maxIdx);
+}
- i = 0;
- year = 0u;
+/* Get the DER/BER encoding of an ASN.1 INTEGER header.
+ * Removes the leading zero byte when found.
+ *
+ * input Buffer holding DER/BER encoded data.
+ * inOutIdx Current index into buffer to parse.
+ * len The number of bytes in the ASN.1 data (excluding any leading zero).
+ * maxIdx Length of data in buffer.
+ * returns BUFFER_E when there is not enough data to parse.
+ * ASN_PARSE_E when the INTEGER tag is not found, length is invalid,
+ * or invalid use of or missing leading zero.
+ * Otherwise, 0 to indicate success.
+ */
+static int GetASNInt(const byte* input, word32* inOutIdx, int* len,
+ word32 maxIdx)
+{
+ int ret;
- if (format == ASN_UTC_TIME) {
- if (btoi(date[0]) >= 5)
- year = 1900;
- else
- year = 2000;
- }
- else { /* format == GENERALIZED_TIME */
- year += btoi(date[i++]) * 1000;
- year += btoi(date[i++]) * 100;
- }
+ ret = GetASNHeader(input, ASN_INTEGER, inOutIdx, len, maxIdx);
+ if (ret < 0)
+ return ret;
- val = year;
- GetTime(&val, date, &i);
- year = (CPU_INT16U)val;
+ if (*len > 0) {
+ /* remove leading zero, unless there is only one 0x00 byte */
+ if ((input[*inOutIdx] == 0x00) && (*len > 1)) {
+ (*inOutIdx)++;
+ (*len)--;
- val = 0;
- GetTime(&val, date, &i);
- month = (CPU_INT08U)val;
+ if (*len > 0 && (input[*inOutIdx] & 0x80) == 0)
+ return ASN_PARSE_E;
+ }
+ }
- val = 0;
- GetTime(&val, date, &i);
- day = (CPU_INT16U)val;
+ return 0;
+}
- val = 0;
- GetTime(&val, date, &i);
- hour = (CPU_INT08U)val;
+/* Get the DER/BER encoding of an ASN.1 INTEGER that has a value of no more than
+ * 7 bits.
+ *
+ * input Buffer holding DER/BER encoded data.
+ * inOutIdx Current index into buffer to parse.
+ * maxIdx Length of data in buffer.
+ * returns BUFFER_E when there is not enough data to parse.
+ * ASN_PARSE_E when the INTEGER tag is not found or length is invalid.
+ * Otherwise, the 7-bit value.
+ */
+static int GetInteger7Bit(const byte* input, word32* inOutIdx, word32 maxIdx)
+{
+ word32 idx = *inOutIdx;
+ byte b;
- val = 0;
- GetTime(&val, date, &i);
- min = (CPU_INT08U)val;
+ if ((idx + 3) > maxIdx)
+ return BUFFER_E;
- val = 0;
- GetTime(&val, date, &i);
- sec = (CPU_INT08U)val;
+ if (GetASNTag(input, &idx, &b, maxIdx) != 0)
+ return ASN_PARSE_E;
+ if (b != ASN_INTEGER)
+ return ASN_PARSE_E;
+ if (input[idx++] != 1)
+ return ASN_PARSE_E;
+ b = input[idx++];
- return NetSecure_ValidateDate(year, month, day, hour, min, sec, dateType);
+ *inOutIdx = idx;
+ return b;
}
-#endif /* MICRIUM */
+#if !defined(NO_DSA) && !defined(NO_SHA)
+static const char sigSha1wDsaName[] = "SHAwDSA";
+#endif /* NO_DSA */
+#ifndef NO_RSA
+#ifdef WOLFSSL_MD2
+ static const char sigMd2wRsaName[] = "md2WithRSAEncryption";
+#endif
+#ifndef NO_MD5
+ static const char sigMd5wRsaName[] = "md5WithRSAEncryption";
+#endif
+#ifndef NO_SHA
+ static const char sigSha1wRsaName[] = "sha1WithRSAEncryption";
+#endif
+#ifdef WOLFSSL_SHA224
+ static const char sigSha224wRsaName[] = "sha224WithRSAEncryption";
+#endif
+#ifndef NO_SHA256
+ static const char sigSha256wRsaName[] = "sha256WithRSAEncryption";
+#endif
+#ifdef WOLFSSL_SHA384
+ static const char sigSha384wRsaName[] = "sha384WithRSAEncryption";
+#endif
+#ifdef WOLFSSL_SHA512
+ static const char sigSha512wRsaName[] = "sha512WithRSAEncryption";
+#endif
+#endif /* NO_RSA */
+#ifdef HAVE_ECC
+#ifndef NO_SHA
+ static const char sigSha1wEcdsaName[] = "SHAwECDSA";
+#endif
+#ifdef WOLFSSL_SHA224
+ static const char sigSha224wEcdsaName[] = "SHA224wECDSA";
+#endif
+#ifndef NO_SHA256
+ static const char sigSha256wEcdsaName[] = "SHA256wECDSA";
+#endif
+#ifdef WOLFSSL_SHA384
+ static const char sigSha384wEcdsaName[] = "SHA384wECDSA";
+#endif
+#ifdef WOLFSSL_SHA512
+ static const char sigSha512wEcdsaName[] = "SHA512wECDSA";
+#endif
+#endif /* HAVE_ECC */
+static const char sigUnknownName[] = "Unknown";
-WOLFSSL_LOCAL int GetLength(const byte* input, word32* inOutIdx, int* len,
- word32 maxIdx)
-{
- int length = 0;
- word32 i = *inOutIdx;
- byte b;
-
- *len = 0; /* default length */
- if ( (i+1) > maxIdx) { /* for first read */
- WOLFSSL_MSG("GetLength bad index on input");
- return BUFFER_E;
+/* Get the human readable string for a signature type
+ *
+ * oid Oid value for signature
+ */
+const char* GetSigName(int oid) {
+ switch (oid) {
+ #if !defined(NO_DSA) && !defined(NO_SHA)
+ case CTC_SHAwDSA:
+ return sigSha1wDsaName;
+ #endif /* NO_DSA && NO_SHA */
+ #ifndef NO_RSA
+ #ifdef WOLFSSL_MD2
+ case CTC_MD2wRSA:
+ return sigMd2wRsaName;
+ #endif
+ #ifndef NO_MD5
+ case CTC_MD5wRSA:
+ return sigMd5wRsaName;
+ #endif
+ #ifndef NO_SHA
+ case CTC_SHAwRSA:
+ return sigSha1wRsaName;
+ #endif
+ #ifdef WOLFSSL_SHA224
+ case CTC_SHA224wRSA:
+ return sigSha224wRsaName;
+ #endif
+ #ifndef NO_SHA256
+ case CTC_SHA256wRSA:
+ return sigSha256wRsaName;
+ #endif
+ #ifdef WOLFSSL_SHA384
+ case CTC_SHA384wRSA:
+ return sigSha384wRsaName;
+ #endif
+ #ifdef WOLFSSL_SHA512
+ case CTC_SHA512wRSA:
+ return sigSha512wRsaName;
+ #endif
+ #endif /* NO_RSA */
+ #ifdef HAVE_ECC
+ #ifndef NO_SHA
+ case CTC_SHAwECDSA:
+ return sigSha1wEcdsaName;
+ #endif
+ #ifdef WOLFSSL_SHA224
+ case CTC_SHA224wECDSA:
+ return sigSha224wEcdsaName;
+ #endif
+ #ifndef NO_SHA256
+ case CTC_SHA256wECDSA:
+ return sigSha256wEcdsaName;
+ #endif
+ #ifdef WOLFSSL_SHA384
+ case CTC_SHA384wECDSA:
+ return sigSha384wEcdsaName;
+ #endif
+ #ifdef WOLFSSL_SHA512
+ case CTC_SHA512wECDSA:
+ return sigSha512wEcdsaName;
+ #endif
+ #endif /* HAVE_ECC */
+ default:
+ return sigUnknownName;
}
+}
- b = input[i++];
- if (b >= ASN_LONG_LENGTH) {
- word32 bytes = b & 0x7F;
- if ( (i+bytes) > maxIdx) { /* for reading bytes */
- WOLFSSL_MSG("GetLength bad long length");
- return BUFFER_E;
- }
+#if !defined(NO_DSA) || defined(HAVE_ECC) || !defined(NO_CERTS) || \
+ (!defined(NO_RSA) && \
+ (defined(WOLFSSL_CERT_GEN) || \
+ ((defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA)) && !defined(HAVE_USER_RSA))))
+/* Set the DER/BER encoding of the ASN.1 INTEGER header.
+ *
+ * len Length of data to encode.
+ * firstByte First byte of data, most significant byte of integer, to encode.
+ * output Buffer to write into.
+ * returns the number of bytes added to the buffer.
+ */
+static int SetASNInt(int len, byte firstByte, byte* output)
+{
+ word32 idx = 0;
- while (bytes--) {
- b = input[i++];
- length = (length << 8) | b;
- }
- }
- else
- length = b;
-
- if ( (i+length) > maxIdx) { /* for user of length */
- WOLFSSL_MSG("GetLength value exceeds buffer length");
- return BUFFER_E;
+ if (output)
+ output[idx] = ASN_INTEGER;
+ idx++;
+ if (firstByte & 0x80)
+ len++;
+ idx += SetLength(len, output ? output + idx : NULL);
+ if (firstByte & 0x80) {
+ if (output)
+ output[idx] = 0x00;
+ idx++;
}
- *inOutIdx = i;
- if (length > 0)
- *len = length;
-
- return length;
+ return idx;
}
+#endif
-
-WOLFSSL_LOCAL int GetSequence(const byte* input, word32* inOutIdx, int* len,
- word32 maxIdx)
+#if !defined(NO_DSA) || defined(HAVE_ECC) || (defined(WOLFSSL_CERT_GEN) && \
+ !defined(NO_RSA)) || ((defined(WOLFSSL_KEY_GEN) || \
+ defined(OPENSSL_EXTRA)) && !defined(NO_RSA) && !defined(HAVE_USER_RSA))
+/* Set the DER/BER encoding of the ASN.1 INTEGER element with an mp_int.
+ * The number is assumed to be positive.
+ *
+ * n Multi-precision integer to encode.
+ * maxSz Maximum size of the encoded integer.
+ * A negative value indicates no check of length requested.
+ * output Buffer to write into.
+ * returns BUFFER_E when the data is too long for the buffer.
+ * MP_TO_E when encoding the integer fails.
+ * Otherwise, the number of bytes added to the buffer.
+ */
+static int SetASNIntMP(mp_int* n, int maxSz, byte* output)
{
- int length = -1;
- word32 idx = *inOutIdx;
+ int idx = 0;
+ int leadingBit;
+ int length;
+ int err;
- if (input[idx++] != (ASN_SEQUENCE | ASN_CONSTRUCTED) ||
- GetLength(input, &idx, &length, maxIdx) < 0)
- return ASN_PARSE_E;
+ leadingBit = mp_leading_bit(n);
+ length = mp_unsigned_bin_size(n);
+ idx = SetASNInt(length, leadingBit ? 0x80 : 0x00, output);
+ if (maxSz >= 0 && (idx + length) > maxSz)
+ return BUFFER_E;
- *len = length;
- *inOutIdx = idx;
+ if (output) {
+ err = mp_to_unsigned_bin(n, output + idx);
+ if (err != MP_OKAY)
+ return MP_TO_E;
+ }
+ idx += length;
- return length;
+ return idx;
}
+#endif
-
-WOLFSSL_LOCAL int GetSet(const byte* input, word32* inOutIdx, int* len,
- word32 maxIdx)
+#if !defined(NO_RSA) && defined(HAVE_USER_RSA) && \
+ (defined(WOLFSSL_CERT_GEN) || defined(OPENSSL_EXTRA))
+/* Set the DER/BER encoding of the ASN.1 INTEGER element with an mp_int from
+ * an RSA key.
+ * The number is assumed to be positive.
+ *
+ * n Multi-precision integer to encode.
+ * output Buffer to write into.
+ * returns BUFFER_E when the data is too long for the buffer.
+ * MP_TO_E when encoding the integer fails.
+ * Otherwise, the number of bytes added to the buffer.
+ */
+static int SetASNIntRSA(void* n, byte* output)
{
- int length = -1;
- word32 idx = *inOutIdx;
+ int idx = 0;
+ int leadingBit;
+ int length;
+ int err;
- if (input[idx++] != (ASN_SET | ASN_CONSTRUCTED) ||
- GetLength(input, &idx, &length, maxIdx) < 0)
- return ASN_PARSE_E;
+ leadingBit = wc_Rsa_leading_bit(n);
+ length = wc_Rsa_unsigned_bin_size(n);
+ idx = SetASNInt(length, leadingBit ? 0x80 : 0x00, output);
+ if ((idx + length) > MAX_RSA_INT_SZ)
+ return BUFFER_E;
- *len = length;
- *inOutIdx = idx;
+ if (output) {
+ err = wc_Rsa_to_unsigned_bin(n, output + idx, length);
+ if (err != MP_OKAY)
+ return MP_TO_E;
+ }
+ idx += length;
- return length;
+ return idx;
}
+#endif /* !NO_RSA && HAVE_USER_RSA && WOLFSSL_CERT_GEN */
-
-/* winodws header clash for WinCE using GetVersion */
-WOLFSSL_LOCAL int GetMyVersion(const byte* input, word32* inOutIdx, int* version)
+/* Windows header clash for WinCE using GetVersion */
+int GetMyVersion(const byte* input, word32* inOutIdx,
+ int* version, word32 maxIdx)
{
word32 idx = *inOutIdx;
+ byte tag;
- WOLFSSL_ENTER("GetMyVersion");
+ if ((idx + MIN_VERSION_SZ) > maxIdx)
+ return ASN_PARSE_E;
- if (input[idx++] != ASN_INTEGER)
+ if (GetASNTag(input, &idx, &tag, maxIdx) != 0)
+ return ASN_PARSE_E;
+
+ if (tag != ASN_INTEGER)
return ASN_PARSE_E;
if (input[idx++] != 0x01)
@@ -550,20 +746,31 @@ WOLFSSL_LOCAL int GetMyVersion(const byte* input, word32* inOutIdx, int* version
#ifndef NO_PWDBASED
/* Get small count integer, 32 bits or less */
-static int GetShortInt(const byte* input, word32* inOutIdx, int* number)
+int GetShortInt(const byte* input, word32* inOutIdx, int* number, word32 maxIdx)
{
word32 idx = *inOutIdx;
word32 len;
+ byte tag;
*number = 0;
- if (input[idx++] != ASN_INTEGER)
+ /* check for type and length bytes */
+ if ((idx + 2) > maxIdx)
+ return BUFFER_E;
+
+ if (GetASNTag(input, &idx, &tag, maxIdx) != 0)
+ return ASN_PARSE_E;
+
+ if (tag != ASN_INTEGER)
return ASN_PARSE_E;
len = input[idx++];
if (len > 4)
return ASN_PARSE_E;
+ if (len + idx > maxIdx)
+ return ASN_PARSE_E;
+
while (len--) {
*number = *number << 8 | input[idx++];
}
@@ -572,18 +779,68 @@ static int GetShortInt(const byte* input, word32* inOutIdx, int* number)
return *number;
}
-#endif /* !NO_PWDBASED */
+/* Set small integer, 32 bits or less. DER encoding with no leading 0s
+ * returns total amount written including ASN tag and length byte on success */
+int SetShortInt(byte* input, word32* inOutIdx, word32 number, word32 maxIdx)
+{
+ word32 idx = *inOutIdx;
+ word32 len = 0;
+ int i;
+ byte ar[MAX_LENGTH_SZ];
+
+ /* check for room for type and length bytes */
+ if ((idx + 2) > maxIdx)
+ return BUFFER_E;
+
+ input[idx++] = ASN_INTEGER;
+ idx++; /* place holder for length byte */
+ if (MAX_LENGTH_SZ + idx > maxIdx)
+ return ASN_PARSE_E;
+
+ /* find first non zero byte */
+ XMEMSET(ar, 0, MAX_LENGTH_SZ);
+ c32toa(number, ar);
+ for (i = 0; i < MAX_LENGTH_SZ; i++) {
+ if (ar[i] != 0) {
+ break;
+ }
+ }
+
+ /* handle case of 0 */
+ if (i == MAX_LENGTH_SZ) {
+ input[idx++] = 0; len++;
+ }
+
+ for (; i < MAX_LENGTH_SZ && idx < maxIdx; i++) {
+ input[idx++] = ar[i]; len++;
+ }
+
+ /* jump back to beginning of input buffer using unaltered inOutIdx value
+ * and set number of bytes for integer, then update the index value */
+ input[*inOutIdx + 1] = (byte)len;
+ *inOutIdx = idx;
+
+ return len + 2; /* size of integer bytes plus ASN TAG and length byte */
+}
+#endif /* !NO_PWDBASED */
+
/* May not have one, not an error */
-static int GetExplicitVersion(const byte* input, word32* inOutIdx, int* version)
+static int GetExplicitVersion(const byte* input, word32* inOutIdx, int* version,
+ word32 maxIdx)
{
word32 idx = *inOutIdx;
+ byte tag;
WOLFSSL_ENTER("GetExplicitVersion");
- if (input[idx++] == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED)) {
- *inOutIdx = ++idx; /* eat header */
- return GetMyVersion(input, inOutIdx, version);
+
+ if (GetASNTag(input, &idx, &tag, maxIdx) != 0)
+ return ASN_PARSE_E;
+
+ if (tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED)) {
+ *inOutIdx = ++idx; /* skip header */
+ return GetMyVersion(input, inOutIdx, version, maxIdx);
}
/* go back as is */
@@ -592,257 +849,2267 @@ static int GetExplicitVersion(const byte* input, word32* inOutIdx, int* version)
return 0;
}
-
-WOLFSSL_LOCAL int GetInt(mp_int* mpi, const byte* input, word32* inOutIdx,
- word32 maxIdx)
+int GetInt(mp_int* mpi, const byte* input, word32* inOutIdx, word32 maxIdx)
{
- word32 i = *inOutIdx;
- byte b = input[i++];
+ word32 idx = *inOutIdx;
+ int ret;
int length;
- if (b != ASN_INTEGER)
- return ASN_PARSE_E;
-
- if (GetLength(input, &i, &length, maxIdx) < 0)
- return ASN_PARSE_E;
-
- if ( (b = input[i++]) == 0x00)
- length--;
- else
- i--;
+ ret = GetASNInt(input, &idx, &length, maxIdx);
+ if (ret != 0)
+ return ret;
if (mp_init(mpi) != MP_OKAY)
return MP_INIT_E;
- if (mp_read_unsigned_bin(mpi, (byte*)input + i, length) != 0) {
+ if (mp_read_unsigned_bin(mpi, (byte*)input + idx, length) != 0) {
+ mp_clear(mpi);
+ return ASN_GETINT_E;
+ }
+
+#ifdef HAVE_WOLF_BIGINT
+ if (wc_bigint_from_unsigned_bin(&mpi->raw, input + idx, length) != 0) {
mp_clear(mpi);
return ASN_GETINT_E;
}
+#endif /* HAVE_WOLF_BIGINT */
+
+ *inOutIdx = idx + length;
- *inOutIdx = i + length;
return 0;
}
+#if (!defined(WOLFSSL_KEY_GEN) && !defined(OPENSSL_EXTRA) && defined(RSA_LOW_MEM)) \
+ || defined(WOLFSSL_RSA_PUBLIC_ONLY) || (!defined(NO_DSA) && defined(WOLFSSL_QT))
+#if !defined(NO_RSA) && !defined(HAVE_USER_RSA)
+static int SkipInt(const byte* input, word32* inOutIdx, word32 maxIdx)
+{
+ word32 idx = *inOutIdx;
+ int ret;
+ int length;
-static int GetObjectId(const byte* input, word32* inOutIdx, word32* oid,
- word32 maxIdx)
+ ret = GetASNInt(input, &idx, &length, maxIdx);
+ if (ret != 0)
+ return ret;
+
+ *inOutIdx = idx + length;
+
+ return 0;
+}
+#endif
+#endif
+
+static int CheckBitString(const byte* input, word32* inOutIdx, int* len,
+ word32 maxIdx, int zeroBits, byte* unusedBits)
{
+ word32 idx = *inOutIdx;
int length;
- word32 i = *inOutIdx;
byte b;
- *oid = 0;
-
- b = input[i++];
- if (b != ASN_OBJECT_ID)
- return ASN_OBJECT_ID_E;
-
- if (GetLength(input, &i, &length, maxIdx) < 0)
+
+ if (GetASNTag(input, &idx, &b, maxIdx) != 0) {
+ return ASN_BITSTR_E;
+ }
+
+ if (b != ASN_BIT_STRING) {
+ return ASN_BITSTR_E;
+ }
+
+ if (GetLength(input, &idx, &length, maxIdx) < 0)
return ASN_PARSE_E;
-
- while(length--)
- *oid += input[i++];
- /* just sum it up for now */
-
- *inOutIdx = i;
-
+
+ /* extra sanity check that length is greater than 0 */
+ if (length <= 0) {
+ WOLFSSL_MSG("Error length was 0 in CheckBitString");
+ return BUFFER_E;
+ }
+
+ if (idx + 1 > maxIdx) {
+ WOLFSSL_MSG("Attempted buffer read larger than input buffer");
+ return BUFFER_E;
+ }
+
+ b = input[idx];
+ if (zeroBits && b != 0x00)
+ return ASN_EXPECT_0_E;
+ if (b >= 0x08)
+ return ASN_PARSE_E;
+ if (b != 0) {
+ if ((byte)(input[idx + length - 1] << (8 - b)) != 0)
+ return ASN_PARSE_E;
+ }
+ idx++;
+ length--; /* length has been checked for greater than 0 */
+
+ *inOutIdx = idx;
+ if (len != NULL)
+ *len = length;
+ if (unusedBits != NULL)
+ *unusedBits = b;
+
return 0;
}
+/* RSA (with CertGen or KeyGen) OR ECC OR ED25519 OR ED448 (with CertGen or
+ * KeyGen) */
+#if (!defined(NO_RSA) && !defined(HAVE_USER_RSA) && \
+ (defined(WOLFSSL_CERT_GEN) || defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA))) || \
+ (defined(HAVE_ECC) && defined(HAVE_ECC_KEY_EXPORT)) || \
+ ((defined(HAVE_ED25519) || defined(HAVE_ED448)) && \
+ (defined(WOLFSSL_CERT_GEN) || defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA)))
-WOLFSSL_LOCAL int GetAlgoId(const byte* input, word32* inOutIdx, word32* oid,
- word32 maxIdx)
+/* Set the DER/BER encoding of the ASN.1 BIT_STRING header.
+ *
+ * len Length of data to encode.
+ * unusedBits The number of unused bits in the last byte of data.
+ * That is, the number of least significant zero bits before a one.
+ * The last byte is the most-significant non-zero byte of a number.
+ * output Buffer to write into.
+ * returns the number of bytes added to the buffer.
+ */
+word32 SetBitString(word32 len, byte unusedBits, byte* output)
{
- int length;
- word32 i = *inOutIdx;
- byte b;
- *oid = 0;
-
- WOLFSSL_ENTER("GetAlgoId");
+ word32 idx = 0;
- if (GetSequence(input, &i, &length, maxIdx) < 0)
- return ASN_PARSE_E;
-
- b = input[i++];
- if (b != ASN_OBJECT_ID)
- return ASN_OBJECT_ID_E;
-
- if (GetLength(input, &i, &length, maxIdx) < 0)
+ if (output)
+ output[idx] = ASN_BIT_STRING;
+ idx++;
+
+ idx += SetLength(len + 1, output ? output + idx : NULL);
+ if (output)
+ output[idx] = unusedBits;
+ idx++;
+
+ return idx;
+}
+#endif /* !NO_RSA || HAVE_ECC || HAVE_ED25519 || HAVE_ED448 */
+
+#ifdef ASN_BER_TO_DER
+/* Pull informtation from the ASN.1 BER encoded item header */
+static int GetBerHeader(const byte* data, word32* idx, word32 maxIdx,
+ byte* pTag, word32* pLen, int* indef)
+{
+ int len = 0;
+ byte tag;
+ word32 i = *idx;
+
+ *indef = 0;
+
+ /* Check there is enough data for a minimal header */
+ if (i + 2 > maxIdx) {
return ASN_PARSE_E;
-
- while(length--) {
- /* odd HC08 compiler behavior here when input[i++] */
- *oid += input[i];
+ }
+
+ /* Retrieve tag */
+ tag = data[i++];
+
+ /* Indefinite length handled specially */
+ if (data[i] == 0x80) {
+ /* Check valid tag for indefinite */
+ if (((tag & 0xc0) == 0) && ((tag & ASN_CONSTRUCTED) == 0x00)) {
+ return ASN_PARSE_E;
+ }
i++;
+ *indef = 1;
}
- /* just sum it up for now */
-
- /* could have NULL tag and 0 terminator, but may not */
- b = input[i++];
-
- if (b == ASN_TAG_NULL) {
- b = input[i++];
- if (b != 0)
- return ASN_EXPECT_0_E;
+ else if (GetLength(data, &i, &len, maxIdx) < 0) {
+ return ASN_PARSE_E;
}
- else
- /* go back, didn't have it */
- i--;
-
- *inOutIdx = i;
-
+
+ /* Return tag, length and index after BER item header */
+ *pTag = tag;
+ *pLen = len;
+ *idx = i;
return 0;
}
+#ifndef INDEF_ITEMS_MAX
+#define INDEF_ITEMS_MAX 20
+#endif
+
+/* Indef length item data */
+typedef struct Indef {
+ word32 start;
+ int depth;
+ int headerLen;
+ word32 len;
+} Indef;
+
+/* Indef length items */
+typedef struct IndefItems
+{
+ Indef len[INDEF_ITEMS_MAX];
+ int cnt;
+ int idx;
+ int depth;
+} IndefItems;
+
+
+/* Get header length of current item */
+static int IndefItems_HeaderLen(IndefItems* items)
+{
+ return items->len[items->idx].headerLen;
+}
+
+/* Get data length of current item */
+static word32 IndefItems_Len(IndefItems* items)
+{
+ return items->len[items->idx].len;
+}
+
+/* Add a indefinite length item */
+static int IndefItems_AddItem(IndefItems* items, word32 start)
+{
+ int ret = 0;
+ int i;
+
+ if (items->cnt == INDEF_ITEMS_MAX) {
+ ret = MEMORY_E;
+ }
+ else {
+ i = items->cnt++;
+ items->len[i].start = start;
+ items->len[i].depth = items->depth++;
+ items->len[i].headerLen = 1;
+ items->len[i].len = 0;
+ items->idx = i;
+ }
+
+ return ret;
+}
+
+/* Increase data length of current item */
+static void IndefItems_AddData(IndefItems* items, word32 length)
+{
+ items->len[items->idx].len += length;
+}
+
+/* Update header length of current item to reflect data length */
+static void IndefItems_UpdateHeaderLen(IndefItems* items)
+{
+ items->len[items->idx].headerLen +=
+ SetLength(items->len[items->idx].len, NULL);
+}
+
+/* Go to indefinite parent of current item */
+static void IndefItems_Up(IndefItems* items)
+{
+ int i;
+ int depth = items->len[items->idx].depth - 1;
+
+ for (i = items->cnt - 1; i >= 0; i--) {
+ if (items->len[i].depth == depth) {
+ break;
+ }
+ }
+ items->idx = i;
+ items->depth = depth + 1;
+}
+
+/* Calculate final length by adding length of indefinite child items */
+static void IndefItems_CalcLength(IndefItems* items)
+{
+ int i;
+ int idx = items->idx;
+
+ for (i = idx + 1; i < items->cnt; i++) {
+ if (items->len[i].depth == items->depth) {
+ items->len[idx].len += items->len[i].headerLen;
+ items->len[idx].len += items->len[i].len;
+ }
+ }
+ items->len[idx].headerLen += SetLength(items->len[idx].len, NULL);
+}
+
+/* Add more data to indefinite length item */
+static void IndefItems_MoreData(IndefItems* items, word32 length)
+{
+ if (items->cnt > 0 && items->idx >= 0) {
+ items->len[items->idx].len += length;
+ }
+}
+
+/* Convert a BER encoding with indefinite length items to DER.
+ *
+ * ber BER encoded data.
+ * berSz Length of BER encoded data.
+ * der Buffer to hold DER encoded version of data.
+ * NULL indicates only the length is required.
+ * derSz The size of the buffer to hold the DER encoded data.
+ * Will be set if der is NULL, otherwise the value is checked as der is
+ * filled.
+ * returns ASN_PARSE_E if the BER data is invalid and BAD_FUNC_ARG if ber or
+ * derSz are NULL.
+ */
+int wc_BerToDer(const byte* ber, word32 berSz, byte* der, word32* derSz)
+{
+ int ret = 0;
+ word32 i, j;
+#ifdef WOLFSSL_SMALL_STACK
+ IndefItems* indefItems = NULL;
+#else
+ IndefItems indefItems[1];
+#endif
+ byte tag, basic;
+ word32 length;
+ int indef;
+
+ if (ber == NULL || derSz == NULL)
+ return BAD_FUNC_ARG;
+
+#ifdef WOLFSSL_SMALL_STACK
+ indefItems = XMALLOC(sizeof(IndefItems), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (indefItems == NULL) {
+ ret = MEMORY_E;
+ goto end;
+ }
+#endif
+
+ XMEMSET(indefItems, 0, sizeof(*indefItems));
+
+ /* Calculate indefinite item lengths */
+ for (i = 0; i < berSz; ) {
+ word32 start = i;
+
+ /* Get next BER item */
+ ret = GetBerHeader(ber, &i, berSz, &tag, &length, &indef);
+ if (ret != 0) {
+ goto end;
+ }
+
+ if (indef) {
+ /* Indefinite item - add to list */
+ ret = IndefItems_AddItem(indefItems, i);
+ if (ret != 0) {
+ goto end;
+ }
+
+ if ((tag & 0xC0) == 0 &&
+ tag != (ASN_SEQUENCE | ASN_CONSTRUCTED) &&
+ tag != (ASN_SET | ASN_CONSTRUCTED)) {
+ /* Constructed basic type - get repeating tag */
+ basic = tag & (~ASN_CONSTRUCTED);
+
+ /* Add up lengths of each item below */
+ for (; i < berSz; ) {
+ /* Get next BER_item */
+ ret = GetBerHeader(ber, &i, berSz, &tag, &length, &indef);
+ if (ret != 0) {
+ goto end;
+ }
+
+ /* End of content closes item */
+ if (tag == ASN_EOC) {
+ /* Must be zero length */
+ if (length != 0) {
+ ret = ASN_PARSE_E;
+ goto end;
+ }
+ break;
+ }
+
+ /* Must not be indefinite and tag must match parent */
+ if (indef || tag != basic) {
+ ret = ASN_PARSE_E;
+ goto end;
+ }
+
+ /* Add to length */
+ IndefItems_AddData(indefItems, length);
+ /* Skip data */
+ i += length;
+ }
+
+ /* Ensure we got an EOC and not end of data */
+ if (tag != ASN_EOC) {
+ ret = ASN_PARSE_E;
+ goto end;
+ }
+
+ /* Set the header length to include the length field */
+ IndefItems_UpdateHeaderLen(indefItems);
+ /* Go to indefinte parent item */
+ IndefItems_Up(indefItems);
+ }
+ }
+ else if (tag == ASN_EOC) {
+ /* End-of-content must be 0 length */
+ if (length != 0) {
+ ret = ASN_PARSE_E;
+ goto end;
+ }
+ /* Check there is an item to close - missing EOC */
+ if (indefItems->depth == 0) {
+ ret = ASN_PARSE_E;
+ goto end;
+ }
+
+ /* Finish calculation of data length for indefinite item */
+ IndefItems_CalcLength(indefItems);
+ /* Go to indefinte parent item */
+ IndefItems_Up(indefItems);
+ }
+ else {
+ /* Known length item to add in - make sure enough data for it */
+ if (i + length > berSz) {
+ ret = ASN_PARSE_E;
+ goto end;
+ }
+
+ /* Include all data - can't have indefinite inside definite */
+ i += length;
+ /* Add entire item to current indefinite item */
+ IndefItems_MoreData(indefItems, i - start);
+ }
+ }
+ /* Check we had a EOC for each indefinite item */
+ if (indefItems->depth != 0) {
+ ret = ASN_PARSE_E;
+ goto end;
+ }
+
+ /* Write out DER */
+
+ j = 0;
+ /* Reset index */
+ indefItems->idx = 0;
+ for (i = 0; i < berSz; ) {
+ word32 start = i;
+
+ /* Get item - checked above */
+ (void)GetBerHeader(ber, &i, berSz, &tag, &length, &indef);
+ if (indef) {
+ if (der != NULL) {
+ /* Check enough space for header */
+ if (j + IndefItems_HeaderLen(indefItems) > *derSz) {
+ ret = BUFFER_E;
+ goto end;
+ }
+
+ if ((tag & 0xC0) == 0 &&
+ tag != (ASN_SEQUENCE | ASN_CONSTRUCTED) &&
+ tag != (ASN_SET | ASN_CONSTRUCTED)) {
+ /* Remove constructed tag for basic types */
+ tag &= ~ASN_CONSTRUCTED;
+ }
+ /* Add tag and length */
+ der[j] = tag;
+ (void)SetLength(IndefItems_Len(indefItems), der + j + 1);
+ }
+ /* Add header length of indefinite item */
+ j += IndefItems_HeaderLen(indefItems);
+
+ if ((tag & 0xC0) == 0 &&
+ tag != (ASN_SEQUENCE | ASN_CONSTRUCTED) &&
+ tag != (ASN_SET | ASN_CONSTRUCTED)) {
+ /* For basic type - get each child item and add data */
+ for (; i < berSz; ) {
+ (void)GetBerHeader(ber, &i, berSz, &tag, &length, &indef);
+ if (tag == ASN_EOC) {
+ break;
+ }
+ if (der != NULL) {
+ if (j + length > *derSz) {
+ ret = BUFFER_E;
+ goto end;
+ }
+ XMEMCPY(der + j, ber + i, length);
+ }
+ j += length;
+ i += length;
+ }
+ }
+
+ /* Move to next indef item in list */
+ indefItems->idx++;
+ }
+ else if (tag == ASN_EOC) {
+ /* End-Of-Content is not written out in DER */
+ }
+ else {
+ /* Write out definite length item as is. */
+ i += length;
+ if (der != NULL) {
+ /* Ensure space for item */
+ if (j + i - start > *derSz) {
+ ret = BUFFER_E;
+ goto end;
+ }
+ /* Copy item as is */
+ XMEMCPY(der + j, ber + start, i - start);
+ }
+ j += i - start;
+ }
+ }
+
+ /* Return the length of the DER encoded ASN.1 */
+ *derSz = j;
+ if (der == NULL) {
+ ret = LENGTH_ONLY_E;
+ }
+end:
+#ifdef WOLFSSL_SMALL_STACK
+ if (indefItems != NULL) {
+ XFREE(indefItems, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+ return ret;
+}
+#endif
+
+#if defined(WOLFSSL_CERT_GEN) || defined(WOLFSSL_KEY_GEN)
+
+#if (!defined(NO_RSA) && !defined(HAVE_USER_RSA)) || \
+ defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448)
+
+#ifdef WOLFSSL_CERT_EXT
+/* Set the DER/BER encoding of the ASN.1 BIT_STRING with a 16-bit value.
+ *
+ * val 16-bit value to encode.
+ * output Buffer to write into.
+ * returns the number of bytes added to the buffer.
+ */
+static word32 SetBitString16Bit(word16 val, byte* output)
+{
+ word32 idx;
+ int len;
+ byte lastByte;
+ byte unusedBits = 0;
+
+ if ((val >> 8) != 0) {
+ len = 2;
+ lastByte = (byte)(val >> 8);
+ }
+ else {
+ len = 1;
+ lastByte = (byte)val;
+ }
+
+ while (((lastByte >> unusedBits) & 0x01) == 0x00)
+ unusedBits++;
+
+ idx = SetBitString(len, unusedBits, output);
+ output[idx++] = (byte)val;
+ if (len > 1)
+ output[idx++] = (byte)(val >> 8);
+
+ return idx;
+}
+#endif /* WOLFSSL_CERT_EXT */
+#endif /* !NO_RSA || HAVE_ECC || HAVE_ED25519 || defined(HAVE_ED448) */
+#endif /* WOLFSSL_CERT_GEN || WOLFSSL_KEY_GEN */
+
+
+
+/* hashType */
+#ifdef WOLFSSL_MD2
+ static const byte hashMd2hOid[] = {42, 134, 72, 134, 247, 13, 2, 2};
+#endif
+#ifndef NO_MD5
+ static const byte hashMd5hOid[] = {42, 134, 72, 134, 247, 13, 2, 5};
+#endif
+#ifndef NO_SHA
+ static const byte hashSha1hOid[] = {43, 14, 3, 2, 26};
+#endif
+#ifdef WOLFSSL_SHA224
+ static const byte hashSha224hOid[] = {96, 134, 72, 1, 101, 3, 4, 2, 4};
+#endif
+#ifndef NO_SHA256
+ static const byte hashSha256hOid[] = {96, 134, 72, 1, 101, 3, 4, 2, 1};
+#endif
+#ifdef WOLFSSL_SHA384
+ static const byte hashSha384hOid[] = {96, 134, 72, 1, 101, 3, 4, 2, 2};
+#endif
+#ifdef WOLFSSL_SHA512
+ static const byte hashSha512hOid[] = {96, 134, 72, 1, 101, 3, 4, 2, 3};
+#endif
+
+/* hmacType */
+#ifndef NO_HMAC
+ #ifdef WOLFSSL_SHA224
+ static const byte hmacSha224Oid[] = {42, 134, 72, 134, 247, 13, 2, 8};
+ #endif
+ #ifndef NO_SHA256
+ static const byte hmacSha256Oid[] = {42, 134, 72, 134, 247, 13, 2, 9};
+ #endif
+ #ifdef WOLFSSL_SHA384
+ static const byte hmacSha384Oid[] = {42, 134, 72, 134, 247, 13, 2, 10};
+ #endif
+ #ifdef WOLFSSL_SHA512
+ static const byte hmacSha512Oid[] = {42, 134, 72, 134, 247, 13, 2, 11};
+ #endif
+#endif
+
+/* sigType */
+#if !defined(NO_DSA) && !defined(NO_SHA)
+ static const byte sigSha1wDsaOid[] = {42, 134, 72, 206, 56, 4, 3};
+#endif /* NO_DSA */
#ifndef NO_RSA
+ #ifdef WOLFSSL_MD2
+ static const byte sigMd2wRsaOid[] = {42, 134, 72, 134, 247, 13, 1, 1, 2};
+ #endif
+ #ifndef NO_MD5
+ static const byte sigMd5wRsaOid[] = {42, 134, 72, 134, 247, 13, 1, 1, 4};
+ #endif
+ #ifndef NO_SHA
+ static const byte sigSha1wRsaOid[] = {42, 134, 72, 134, 247, 13, 1, 1, 5};
+ #endif
+ #ifdef WOLFSSL_SHA224
+ static const byte sigSha224wRsaOid[] = {42, 134, 72, 134, 247, 13, 1, 1,14};
+ #endif
+ #ifndef NO_SHA256
+ static const byte sigSha256wRsaOid[] = {42, 134, 72, 134, 247, 13, 1, 1,11};
+ #endif
+ #ifdef WOLFSSL_SHA384
+ static const byte sigSha384wRsaOid[] = {42, 134, 72, 134, 247, 13, 1, 1,12};
+ #endif
+ #ifdef WOLFSSL_SHA512
+ static const byte sigSha512wRsaOid[] = {42, 134, 72, 134, 247, 13, 1, 1,13};
+ #endif
+#endif /* NO_RSA */
+#ifdef HAVE_ECC
+ #ifndef NO_SHA
+ static const byte sigSha1wEcdsaOid[] = {42, 134, 72, 206, 61, 4, 1};
+ #endif
+ #ifdef WOLFSSL_SHA224
+ static const byte sigSha224wEcdsaOid[] = {42, 134, 72, 206, 61, 4, 3, 1};
+ #endif
+ #ifndef NO_SHA256
+ static const byte sigSha256wEcdsaOid[] = {42, 134, 72, 206, 61, 4, 3, 2};
+ #endif
+ #ifdef WOLFSSL_SHA384
+ static const byte sigSha384wEcdsaOid[] = {42, 134, 72, 206, 61, 4, 3, 3};
+ #endif
+ #ifdef WOLFSSL_SHA512
+ static const byte sigSha512wEcdsaOid[] = {42, 134, 72, 206, 61, 4, 3, 4};
+ #endif
+#endif /* HAVE_ECC */
+#ifdef HAVE_ED25519
+ static const byte sigEd25519Oid[] = {43, 101, 112};
+#endif /* HAVE_ED25519 */
+#ifdef HAVE_ED448
+ static const byte sigEd448Oid[] = {43, 101, 113};
+#endif /* HAVE_ED448 */
+
+/* keyType */
+#ifndef NO_DSA
+ static const byte keyDsaOid[] = {42, 134, 72, 206, 56, 4, 1};
+#endif /* NO_DSA */
+#ifndef NO_RSA
+ static const byte keyRsaOid[] = {42, 134, 72, 134, 247, 13, 1, 1, 1};
+#endif /* NO_RSA */
+#ifdef HAVE_NTRU
+ static const byte keyNtruOid[] = {43, 6, 1, 4, 1, 193, 22, 1, 1, 1, 1};
+#endif /* HAVE_NTRU */
+#ifdef HAVE_ECC
+ static const byte keyEcdsaOid[] = {42, 134, 72, 206, 61, 2, 1};
+#endif /* HAVE_ECC */
+#ifdef HAVE_ED25519
+ static const byte keyEd25519Oid[] = {43, 101, 112};
+#endif /* HAVE_ED25519 */
+#ifdef HAVE_ED448
+ static const byte keyEd448Oid[] = {43, 101, 113};
+#endif /* HAVE_ED448 */
+#if !defined(NO_DH) && (defined(WOLFSSL_QT) || defined(OPENSSL_ALL))
+ static const byte keyDhOid[] = {42, 134, 72, 134, 247, 13, 1, 3, 1};
+#endif /* ! NO_DH ... */
+
+/* curveType */
+#ifdef HAVE_ECC
+ /* See "ecc_sets" table in ecc.c */
+#endif /* HAVE_ECC */
+#ifdef HAVE_AES_CBC
+/* blkType */
+ #ifdef WOLFSSL_AES_128
+ static const byte blkAes128CbcOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 2};
+ #endif
+ #ifdef WOLFSSL_AES_192
+ static const byte blkAes192CbcOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 22};
+ #endif
+ #ifdef WOLFSSL_AES_256
+ static const byte blkAes256CbcOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 42};
+ #endif
+#endif /* HAVE_AES_CBC */
+#ifdef HAVE_AESGCM
+ #ifdef WOLFSSL_AES_128
+ static const byte blkAes128GcmOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 6};
+ #endif
+ #ifdef WOLFSSL_AES_192
+ static const byte blkAes192GcmOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 26};
+ #endif
+ #ifdef WOLFSSL_AES_256
+ static const byte blkAes256GcmOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 46};
+ #endif
+#endif /* HAVE_AESGCM */
+#ifdef HAVE_AESCCM
+ #ifdef WOLFSSL_AES_128
+ static const byte blkAes128CcmOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 7};
+ #endif
+ #ifdef WOLFSSL_AES_192
+ static const byte blkAes192CcmOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 27};
+ #endif
+ #ifdef WOLFSSL_AES_256
+ static const byte blkAes256CcmOid[] = {96, 134, 72, 1, 101, 3, 4, 1, 47};
+ #endif
+#endif /* HAVE_AESCCM */
-#ifdef HAVE_CAVIUM
+#ifndef NO_DES3
+ static const byte blkDesCbcOid[] = {43, 14, 3, 2, 7};
+ static const byte blkDes3CbcOid[] = {42, 134, 72, 134, 247, 13, 3, 7};
+#endif
-static int GetCaviumInt(byte** buff, word16* buffSz, const byte* input,
- word32* inOutIdx, word32 maxIdx, void* heap)
+/* keyWrapType */
+#ifdef WOLFSSL_AES_128
+ static const byte wrapAes128Oid[] = {96, 134, 72, 1, 101, 3, 4, 1, 5};
+#endif
+#ifdef WOLFSSL_AES_192
+ static const byte wrapAes192Oid[] = {96, 134, 72, 1, 101, 3, 4, 1, 25};
+#endif
+#ifdef WOLFSSL_AES_256
+ static const byte wrapAes256Oid[] = {96, 134, 72, 1, 101, 3, 4, 1, 45};
+#endif
+#ifdef HAVE_PKCS7
+/* From RFC 3211 */
+static const byte wrapPwriKekOid[] = {42, 134, 72, 134, 247, 13, 1, 9, 16, 3,9};
+#endif
+
+/* cmsKeyAgreeType */
+#ifndef NO_SHA
+ static const byte dhSinglePass_stdDH_sha1kdf_Oid[] =
+ {43, 129, 5, 16, 134, 72, 63, 0, 2};
+#endif
+#ifdef WOLFSSL_SHA224
+ static const byte dhSinglePass_stdDH_sha224kdf_Oid[] = {43, 129, 4, 1, 11, 0};
+#endif
+#ifndef NO_SHA256
+ static const byte dhSinglePass_stdDH_sha256kdf_Oid[] = {43, 129, 4, 1, 11, 1};
+#endif
+#ifdef WOLFSSL_SHA384
+ static const byte dhSinglePass_stdDH_sha384kdf_Oid[] = {43, 129, 4, 1, 11, 2};
+#endif
+#ifdef WOLFSSL_SHA512
+ static const byte dhSinglePass_stdDH_sha512kdf_Oid[] = {43, 129, 4, 1, 11, 3};
+#endif
+
+/* ocspType */
+#ifdef HAVE_OCSP
+ static const byte ocspBasicOid[] = {43, 6, 1, 5, 5, 7, 48, 1, 1};
+ static const byte ocspNonceOid[] = {43, 6, 1, 5, 5, 7, 48, 1, 2};
+#endif /* HAVE_OCSP */
+
+/* certExtType */
+static const byte extBasicCaOid[] = {85, 29, 19};
+static const byte extAltNamesOid[] = {85, 29, 17};
+static const byte extCrlDistOid[] = {85, 29, 31};
+static const byte extAuthInfoOid[] = {43, 6, 1, 5, 5, 7, 1, 1};
+static const byte extAuthKeyOid[] = {85, 29, 35};
+static const byte extSubjKeyOid[] = {85, 29, 14};
+static const byte extCertPolicyOid[] = {85, 29, 32};
+static const byte extKeyUsageOid[] = {85, 29, 15};
+static const byte extInhibitAnyOid[] = {85, 29, 54};
+static const byte extExtKeyUsageOid[] = {85, 29, 37};
+#ifndef IGNORE_NAME_CONSTRAINTS
+ static const byte extNameConsOid[] = {85, 29, 30};
+#endif
+
+/* certAuthInfoType */
+#ifdef HAVE_OCSP
+ static const byte extAuthInfoOcspOid[] = {43, 6, 1, 5, 5, 7, 48, 1};
+#endif
+static const byte extAuthInfoCaIssuerOid[] = {43, 6, 1, 5, 5, 7, 48, 2};
+
+/* certPolicyType */
+static const byte extCertPolicyAnyOid[] = {85, 29, 32, 0};
+
+/* certKeyUseType */
+static const byte extAltNamesHwNameOid[] = {43, 6, 1, 5, 5, 7, 8, 4};
+
+/* certKeyUseType */
+static const byte extExtKeyUsageAnyOid[] = {85, 29, 37, 0};
+static const byte extExtKeyUsageServerAuthOid[] = {43, 6, 1, 5, 5, 7, 3, 1};
+static const byte extExtKeyUsageClientAuthOid[] = {43, 6, 1, 5, 5, 7, 3, 2};
+static const byte extExtKeyUsageCodeSigningOid[] = {43, 6, 1, 5, 5, 7, 3, 3};
+static const byte extExtKeyUsageEmailProtectOid[] = {43, 6, 1, 5, 5, 7, 3, 4};
+static const byte extExtKeyUsageTimestampOid[] = {43, 6, 1, 5, 5, 7, 3, 8};
+static const byte extExtKeyUsageOcspSignOid[] = {43, 6, 1, 5, 5, 7, 3, 9};
+
+/* kdfType */
+static const byte pbkdf2Oid[] = {42, 134, 72, 134, 247, 13, 1, 5, 12};
+
+/* PKCS5 */
+#if !defined(NO_DES3) && !defined(NO_SHA)
+static const byte pbeSha1Des[] = {42, 134, 72, 134, 247, 13, 1, 5, 10};
+#endif
+static const byte pbes2[] = {42, 134, 72, 134, 247, 13, 1, 5, 13};
+
+/* PKCS12 */
+#if !defined(NO_RC4) && !defined(NO_SHA)
+static const byte pbeSha1RC4128[] = {42, 134, 72, 134, 247, 13, 1, 12, 1, 1};
+#endif
+#if !defined(NO_DES3) && !defined(NO_SHA)
+static const byte pbeSha1Des3[] = {42, 134, 72, 134, 247, 13, 1, 12, 1, 3};
+#endif
+
+#ifdef HAVE_LIBZ
+/* zlib compression */
+static const byte zlibCompress[] = {42, 134, 72, 134, 247, 13, 1, 9, 16, 3, 8};
+#endif
+#ifdef WOLFSSL_APACHE_HTTPD
+/* tlsExtType */
+static const byte tlsFeatureOid[] = {43, 6, 1, 5, 5, 7, 1, 24};
+/* certNameType */
+static const byte dnsSRVOid[] = {43, 6, 1, 5, 5, 7, 8, 7};
+#endif
+
+
+/* returns a pointer to the OID string on success and NULL on fail */
+const byte* OidFromId(word32 id, word32 type, word32* oidSz)
{
- word32 i = *inOutIdx;
- byte b = input[i++];
+ const byte* oid = NULL;
+
+ *oidSz = 0;
+
+ switch (type) {
+
+ case oidHashType:
+ switch (id) {
+ #ifdef WOLFSSL_MD2
+ case MD2h:
+ oid = hashMd2hOid;
+ *oidSz = sizeof(hashMd2hOid);
+ break;
+ #endif
+ #ifndef NO_MD5
+ case MD5h:
+ oid = hashMd5hOid;
+ *oidSz = sizeof(hashMd5hOid);
+ break;
+ #endif
+ #ifndef NO_SHA
+ case SHAh:
+ oid = hashSha1hOid;
+ *oidSz = sizeof(hashSha1hOid);
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA224
+ case SHA224h:
+ oid = hashSha224hOid;
+ *oidSz = sizeof(hashSha224hOid);
+ break;
+ #endif
+ #ifndef NO_SHA256
+ case SHA256h:
+ oid = hashSha256hOid;
+ *oidSz = sizeof(hashSha256hOid);
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA384
+ case SHA384h:
+ oid = hashSha384hOid;
+ *oidSz = sizeof(hashSha384hOid);
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA512
+ case SHA512h:
+ oid = hashSha512hOid;
+ *oidSz = sizeof(hashSha512hOid);
+ break;
+ #endif
+ }
+ break;
+
+ case oidSigType:
+ switch (id) {
+ #if !defined(NO_DSA) && !defined(NO_SHA)
+ case CTC_SHAwDSA:
+ oid = sigSha1wDsaOid;
+ *oidSz = sizeof(sigSha1wDsaOid);
+ break;
+ #endif /* NO_DSA */
+ #ifndef NO_RSA
+ #ifdef WOLFSSL_MD2
+ case CTC_MD2wRSA:
+ oid = sigMd2wRsaOid;
+ *oidSz = sizeof(sigMd2wRsaOid);
+ break;
+ #endif
+ #ifndef NO_MD5
+ case CTC_MD5wRSA:
+ oid = sigMd5wRsaOid;
+ *oidSz = sizeof(sigMd5wRsaOid);
+ break;
+ #endif
+ #ifndef NO_SHA
+ case CTC_SHAwRSA:
+ oid = sigSha1wRsaOid;
+ *oidSz = sizeof(sigSha1wRsaOid);
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA224
+ case CTC_SHA224wRSA:
+ oid = sigSha224wRsaOid;
+ *oidSz = sizeof(sigSha224wRsaOid);
+ break;
+ #endif
+ #ifndef NO_SHA256
+ case CTC_SHA256wRSA:
+ oid = sigSha256wRsaOid;
+ *oidSz = sizeof(sigSha256wRsaOid);
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA384
+ case CTC_SHA384wRSA:
+ oid = sigSha384wRsaOid;
+ *oidSz = sizeof(sigSha384wRsaOid);
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA512
+ case CTC_SHA512wRSA:
+ oid = sigSha512wRsaOid;
+ *oidSz = sizeof(sigSha512wRsaOid);
+ break;
+ #endif /* WOLFSSL_SHA512 */
+ #endif /* NO_RSA */
+ #ifdef HAVE_ECC
+ #ifndef NO_SHA
+ case CTC_SHAwECDSA:
+ oid = sigSha1wEcdsaOid;
+ *oidSz = sizeof(sigSha1wEcdsaOid);
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA224
+ case CTC_SHA224wECDSA:
+ oid = sigSha224wEcdsaOid;
+ *oidSz = sizeof(sigSha224wEcdsaOid);
+ break;
+ #endif
+ #ifndef NO_SHA256
+ case CTC_SHA256wECDSA:
+ oid = sigSha256wEcdsaOid;
+ *oidSz = sizeof(sigSha256wEcdsaOid);
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA384
+ case CTC_SHA384wECDSA:
+ oid = sigSha384wEcdsaOid;
+ *oidSz = sizeof(sigSha384wEcdsaOid);
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA512
+ case CTC_SHA512wECDSA:
+ oid = sigSha512wEcdsaOid;
+ *oidSz = sizeof(sigSha512wEcdsaOid);
+ break;
+ #endif
+ #endif /* HAVE_ECC */
+ #ifdef HAVE_ED25519
+ case CTC_ED25519:
+ oid = sigEd25519Oid;
+ *oidSz = sizeof(sigEd25519Oid);
+ break;
+ #endif
+ #ifdef HAVE_ED448
+ case CTC_ED448:
+ oid = sigEd448Oid;
+ *oidSz = sizeof(sigEd448Oid);
+ break;
+ #endif
+ default:
+ break;
+ }
+ break;
+
+ case oidKeyType:
+ switch (id) {
+ #ifndef NO_DSA
+ case DSAk:
+ oid = keyDsaOid;
+ *oidSz = sizeof(keyDsaOid);
+ break;
+ #endif /* NO_DSA */
+ #ifndef NO_RSA
+ case RSAk:
+ oid = keyRsaOid;
+ *oidSz = sizeof(keyRsaOid);
+ break;
+ #endif /* NO_RSA */
+ #ifdef HAVE_NTRU
+ case NTRUk:
+ oid = keyNtruOid;
+ *oidSz = sizeof(keyNtruOid);
+ break;
+ #endif /* HAVE_NTRU */
+ #ifdef HAVE_ECC
+ case ECDSAk:
+ oid = keyEcdsaOid;
+ *oidSz = sizeof(keyEcdsaOid);
+ break;
+ #endif /* HAVE_ECC */
+ #ifdef HAVE_ED25519
+ case ED25519k:
+ oid = keyEd25519Oid;
+ *oidSz = sizeof(keyEd25519Oid);
+ break;
+ #endif /* HAVE_ED25519 */
+ #ifdef HAVE_ED448
+ case ED448k:
+ oid = keyEd448Oid;
+ *oidSz = sizeof(keyEd448Oid);
+ break;
+ #endif /* HAVE_ED448 */
+ #if !defined(NO_DH) && (defined(WOLFSSL_QT) || defined(OPENSSL_ALL))
+ case DHk:
+ oid = keyDhOid;
+ *oidSz = sizeof(keyDhOid);
+ break;
+ #endif /* ! NO_DH && (WOLFSSL_QT || OPENSSL_ALL */
+ default:
+ break;
+ }
+ break;
+
+ #ifdef HAVE_ECC
+ case oidCurveType:
+ if (wc_ecc_get_oid(id, &oid, oidSz) < 0) {
+ WOLFSSL_MSG("ECC OID not found");
+ }
+ break;
+ #endif /* HAVE_ECC */
+
+ case oidBlkType:
+ switch (id) {
+ #ifdef HAVE_AES_CBC
+ #ifdef WOLFSSL_AES_128
+ case AES128CBCb:
+ oid = blkAes128CbcOid;
+ *oidSz = sizeof(blkAes128CbcOid);
+ break;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES192CBCb:
+ oid = blkAes192CbcOid;
+ *oidSz = sizeof(blkAes192CbcOid);
+ break;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES256CBCb:
+ oid = blkAes256CbcOid;
+ *oidSz = sizeof(blkAes256CbcOid);
+ break;
+ #endif
+ #endif /* HAVE_AES_CBC */
+ #ifdef HAVE_AESGCM
+ #ifdef WOLFSSL_AES_128
+ case AES128GCMb:
+ oid = blkAes128GcmOid;
+ *oidSz = sizeof(blkAes128GcmOid);
+ break;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES192GCMb:
+ oid = blkAes192GcmOid;
+ *oidSz = sizeof(blkAes192GcmOid);
+ break;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES256GCMb:
+ oid = blkAes256GcmOid;
+ *oidSz = sizeof(blkAes256GcmOid);
+ break;
+ #endif
+ #endif /* HAVE_AESGCM */
+ #ifdef HAVE_AESCCM
+ #ifdef WOLFSSL_AES_128
+ case AES128CCMb:
+ oid = blkAes128CcmOid;
+ *oidSz = sizeof(blkAes128CcmOid);
+ break;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES192CCMb:
+ oid = blkAes192CcmOid;
+ *oidSz = sizeof(blkAes192CcmOid);
+ break;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES256CCMb:
+ oid = blkAes256CcmOid;
+ *oidSz = sizeof(blkAes256CcmOid);
+ break;
+ #endif
+ #endif /* HAVE_AESCCM */
+ #ifndef NO_DES3
+ case DESb:
+ oid = blkDesCbcOid;
+ *oidSz = sizeof(blkDesCbcOid);
+ break;
+ case DES3b:
+ oid = blkDes3CbcOid;
+ *oidSz = sizeof(blkDes3CbcOid);
+ break;
+ #endif /* !NO_DES3 */
+ }
+ break;
+
+ #ifdef HAVE_OCSP
+ case oidOcspType:
+ switch (id) {
+ case OCSP_BASIC_OID:
+ oid = ocspBasicOid;
+ *oidSz = sizeof(ocspBasicOid);
+ break;
+ case OCSP_NONCE_OID:
+ oid = ocspNonceOid;
+ *oidSz = sizeof(ocspNonceOid);
+ break;
+ }
+ break;
+ #endif /* HAVE_OCSP */
+
+ case oidCertExtType:
+ switch (id) {
+ case BASIC_CA_OID:
+ oid = extBasicCaOid;
+ *oidSz = sizeof(extBasicCaOid);
+ break;
+ case ALT_NAMES_OID:
+ oid = extAltNamesOid;
+ *oidSz = sizeof(extAltNamesOid);
+ break;
+ case CRL_DIST_OID:
+ oid = extCrlDistOid;
+ *oidSz = sizeof(extCrlDistOid);
+ break;
+ case AUTH_INFO_OID:
+ oid = extAuthInfoOid;
+ *oidSz = sizeof(extAuthInfoOid);
+ break;
+ case AUTH_KEY_OID:
+ oid = extAuthKeyOid;
+ *oidSz = sizeof(extAuthKeyOid);
+ break;
+ case SUBJ_KEY_OID:
+ oid = extSubjKeyOid;
+ *oidSz = sizeof(extSubjKeyOid);
+ break;
+ case CERT_POLICY_OID:
+ oid = extCertPolicyOid;
+ *oidSz = sizeof(extCertPolicyOid);
+ break;
+ case KEY_USAGE_OID:
+ oid = extKeyUsageOid;
+ *oidSz = sizeof(extKeyUsageOid);
+ break;
+ case INHIBIT_ANY_OID:
+ oid = extInhibitAnyOid;
+ *oidSz = sizeof(extInhibitAnyOid);
+ break;
+ case EXT_KEY_USAGE_OID:
+ oid = extExtKeyUsageOid;
+ *oidSz = sizeof(extExtKeyUsageOid);
+ break;
+ #ifndef IGNORE_NAME_CONSTRAINTS
+ case NAME_CONS_OID:
+ oid = extNameConsOid;
+ *oidSz = sizeof(extNameConsOid);
+ break;
+ #endif
+ }
+ break;
+
+ case oidCrlExtType:
+ #ifdef HAVE_CRL
+ switch (id) {
+ case AUTH_KEY_OID:
+ oid = extAuthKeyOid;
+ *oidSz = sizeof(extAuthKeyOid);
+ break;
+ }
+ #endif
+ break;
+
+ case oidCertAuthInfoType:
+ switch (id) {
+ #ifdef HAVE_OCSP
+ case AIA_OCSP_OID:
+ oid = extAuthInfoOcspOid;
+ *oidSz = sizeof(extAuthInfoOcspOid);
+ break;
+ #endif
+ case AIA_CA_ISSUER_OID:
+ oid = extAuthInfoCaIssuerOid;
+ *oidSz = sizeof(extAuthInfoCaIssuerOid);
+ break;
+ }
+ break;
+
+ case oidCertPolicyType:
+ switch (id) {
+ case CP_ANY_OID:
+ oid = extCertPolicyAnyOid;
+ *oidSz = sizeof(extCertPolicyAnyOid);
+ break;
+ }
+ break;
+
+ case oidCertAltNameType:
+ switch (id) {
+ case HW_NAME_OID:
+ oid = extAltNamesHwNameOid;
+ *oidSz = sizeof(extAltNamesHwNameOid);
+ break;
+ }
+ break;
+
+ case oidCertKeyUseType:
+ switch (id) {
+ case EKU_ANY_OID:
+ oid = extExtKeyUsageAnyOid;
+ *oidSz = sizeof(extExtKeyUsageAnyOid);
+ break;
+ case EKU_SERVER_AUTH_OID:
+ oid = extExtKeyUsageServerAuthOid;
+ *oidSz = sizeof(extExtKeyUsageServerAuthOid);
+ break;
+ case EKU_CLIENT_AUTH_OID:
+ oid = extExtKeyUsageClientAuthOid;
+ *oidSz = sizeof(extExtKeyUsageClientAuthOid);
+ break;
+ case EKU_CODESIGNING_OID:
+ oid = extExtKeyUsageCodeSigningOid;
+ *oidSz = sizeof(extExtKeyUsageCodeSigningOid);
+ break;
+ case EKU_EMAILPROTECT_OID:
+ oid = extExtKeyUsageEmailProtectOid;
+ *oidSz = sizeof(extExtKeyUsageEmailProtectOid);
+ break;
+ case EKU_TIMESTAMP_OID:
+ oid = extExtKeyUsageTimestampOid;
+ *oidSz = sizeof(extExtKeyUsageTimestampOid);
+ break;
+ case EKU_OCSP_SIGN_OID:
+ oid = extExtKeyUsageOcspSignOid;
+ *oidSz = sizeof(extExtKeyUsageOcspSignOid);
+ break;
+ }
+ break;
+
+ case oidKdfType:
+ switch (id) {
+ case PBKDF2_OID:
+ oid = pbkdf2Oid;
+ *oidSz = sizeof(pbkdf2Oid);
+ break;
+ }
+ break;
+
+ case oidPBEType:
+ switch (id) {
+ #if !defined(NO_SHA) && !defined(NO_RC4)
+ case PBE_SHA1_RC4_128:
+ oid = pbeSha1RC4128;
+ *oidSz = sizeof(pbeSha1RC4128);
+ break;
+ #endif
+ #if !defined(NO_SHA) && !defined(NO_DES3)
+ case PBE_SHA1_DES:
+ oid = pbeSha1Des;
+ *oidSz = sizeof(pbeSha1Des);
+ break;
+
+ #endif
+ #if !defined(NO_SHA) && !defined(NO_DES3)
+ case PBE_SHA1_DES3:
+ oid = pbeSha1Des3;
+ *oidSz = sizeof(pbeSha1Des3);
+ break;
+ #endif
+ case PBES2:
+ oid = pbes2;
+ *oidSz = sizeof(pbes2);
+ break;
+ }
+ break;
+
+ case oidKeyWrapType:
+ switch (id) {
+ #ifdef WOLFSSL_AES_128
+ case AES128_WRAP:
+ oid = wrapAes128Oid;
+ *oidSz = sizeof(wrapAes128Oid);
+ break;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES192_WRAP:
+ oid = wrapAes192Oid;
+ *oidSz = sizeof(wrapAes192Oid);
+ break;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES256_WRAP:
+ oid = wrapAes256Oid;
+ *oidSz = sizeof(wrapAes256Oid);
+ break;
+ #endif
+ #ifdef HAVE_PKCS7
+ case PWRI_KEK_WRAP:
+ oid = wrapPwriKekOid;
+ *oidSz = sizeof(wrapPwriKekOid);
+ break;
+ #endif
+ }
+ break;
+
+ case oidCmsKeyAgreeType:
+ switch (id) {
+ #ifndef NO_SHA
+ case dhSinglePass_stdDH_sha1kdf_scheme:
+ oid = dhSinglePass_stdDH_sha1kdf_Oid;
+ *oidSz = sizeof(dhSinglePass_stdDH_sha1kdf_Oid);
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA224
+ case dhSinglePass_stdDH_sha224kdf_scheme:
+ oid = dhSinglePass_stdDH_sha224kdf_Oid;
+ *oidSz = sizeof(dhSinglePass_stdDH_sha224kdf_Oid);
+ break;
+ #endif
+ #ifndef NO_SHA256
+ case dhSinglePass_stdDH_sha256kdf_scheme:
+ oid = dhSinglePass_stdDH_sha256kdf_Oid;
+ *oidSz = sizeof(dhSinglePass_stdDH_sha256kdf_Oid);
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA384
+ case dhSinglePass_stdDH_sha384kdf_scheme:
+ oid = dhSinglePass_stdDH_sha384kdf_Oid;
+ *oidSz = sizeof(dhSinglePass_stdDH_sha384kdf_Oid);
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA512
+ case dhSinglePass_stdDH_sha512kdf_scheme:
+ oid = dhSinglePass_stdDH_sha512kdf_Oid;
+ *oidSz = sizeof(dhSinglePass_stdDH_sha512kdf_Oid);
+ break;
+ #endif
+ }
+ break;
+
+#ifndef NO_HMAC
+ case oidHmacType:
+ switch (id) {
+ #ifdef WOLFSSL_SHA224
+ case HMAC_SHA224_OID:
+ oid = hmacSha224Oid;
+ *oidSz = sizeof(hmacSha224Oid);
+ break;
+ #endif
+ #ifndef NO_SHA256
+ case HMAC_SHA256_OID:
+ oid = hmacSha256Oid;
+ *oidSz = sizeof(hmacSha256Oid);
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA384
+ case HMAC_SHA384_OID:
+ oid = hmacSha384Oid;
+ *oidSz = sizeof(hmacSha384Oid);
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA512
+ case HMAC_SHA512_OID:
+ oid = hmacSha512Oid;
+ *oidSz = sizeof(hmacSha512Oid);
+ break;
+ #endif
+ }
+ break;
+#endif /* !NO_HMAC */
+
+#ifdef HAVE_LIBZ
+ case oidCompressType:
+ switch (id) {
+ case ZLIBc:
+ oid = zlibCompress;
+ *oidSz = sizeof(zlibCompress);
+ break;
+ }
+ break;
+#endif /* HAVE_LIBZ */
+#ifdef WOLFSSL_APACHE_HTTPD
+ case oidCertNameType:
+ switch (id) {
+ case NID_id_on_dnsSRV:
+ oid = dnsSRVOid;
+ *oidSz = sizeof(dnsSRVOid);
+ break;
+ }
+ break;
+ case oidTlsExtType:
+ switch (id) {
+ case TLS_FEATURE_OID:
+ oid = tlsFeatureOid;
+ *oidSz = sizeof(tlsFeatureOid);
+ break;
+ }
+ break;
+#endif /* WOLFSSL_APACHE_HTTPD */
+ case oidIgnoreType:
+ default:
+ break;
+ }
+
+ return oid;
+}
+
+#ifdef HAVE_OID_ENCODING
+int EncodeObjectId(const word16* in, word32 inSz, byte* out, word32* outSz)
+{
+ int i, x, len;
+ word32 d, t;
+
+ /* check args */
+ if (in == NULL || outSz == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* compute length of encoded OID */
+ d = (in[0] * 40) + in[1];
+ len = 0;
+ for (i = 1; i < (int)inSz; i++) {
+ x = 0;
+ t = d;
+ while (t) {
+ x++;
+ t >>= 1;
+ }
+ len += (x / 7) + ((x % 7) ? 1 : 0) + (d == 0 ? 1 : 0);
+
+ if (i < (int)inSz - 1) {
+ d = in[i + 1];
+ }
+ }
+
+ if (out) {
+ /* verify length */
+ if ((int)*outSz < len) {
+ return BUFFER_E; /* buffer provided is not large enough */
+ }
+
+ /* calc first byte */
+ d = (in[0] * 40) + in[1];
+
+ /* encode bytes */
+ x = 0;
+ for (i = 1; i < (int)inSz; i++) {
+ if (d) {
+ int y = x, z;
+ byte mask = 0;
+ while (d) {
+ out[x++] = (byte)((d & 0x7F) | mask);
+ d >>= 7;
+ mask |= 0x80; /* upper bit is set on all but the last byte */
+ }
+ /* now swap bytes y...x-1 */
+ z = x - 1;
+ while (y < z) {
+ mask = out[y];
+ out[y] = out[z];
+ out[z] = mask;
+ ++y;
+ --z;
+ }
+ }
+ else {
+ out[x++] = 0x00; /* zero value */
+ }
+
+ /* next word */
+ if (i < (int)inSz - 1) {
+ d = in[i + 1];
+ }
+ }
+ }
+
+ /* return length */
+ *outSz = len;
+
+ return 0;
+}
+#endif /* HAVE_OID_ENCODING */
+
+#ifdef HAVE_OID_DECODING
+int DecodeObjectId(const byte* in, word32 inSz, word16* out, word32* outSz)
+{
+ int x = 0, y = 0;
+ word32 t = 0;
+
+ /* check args */
+ if (in == NULL || outSz == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* decode bytes */
+ while (inSz--) {
+ t = (t << 7) | (in[x] & 0x7F);
+ if (!(in[x] & 0x80)) {
+ if (y >= (int)*outSz) {
+ return BUFFER_E;
+ }
+ if (y == 0) {
+ out[0] = (t / 40);
+ out[1] = (t % 40);
+ y = 2;
+ }
+ else {
+ out[y++] = t;
+ }
+ t = 0; /* reset tmp */
+ }
+ x++;
+ }
+
+ /* return length */
+ *outSz = y;
+
+ return 0;
+}
+#endif /* HAVE_OID_DECODING */
+
+/* Get the DER/BER encoding of an ASN.1 OBJECT_ID header.
+ *
+ * input Buffer holding DER/BER encoded data.
+ * inOutIdx Current index into buffer to parse.
+ * len The number of bytes in the ASN.1 data.
+ * maxIdx Length of data in buffer.
+ * returns BUFFER_E when there is not enough data to parse.
+ * ASN_OBJECt_ID_E when the OBJECT_ID tag is not found.
+ * ASN_PARSE_E when length is invalid.
+ * Otherwise, 0 to indicate success.
+ */
+int GetASNObjectId(const byte* input, word32* inOutIdx, int* len,
+ word32 maxIdx)
+{
+ word32 idx = *inOutIdx;
int length;
+ byte tag;
- if (b != ASN_INTEGER)
+ if ((idx + 1) > maxIdx)
+ return BUFFER_E;
+
+ if (GetASNTag(input, &idx, &tag, maxIdx) != 0)
return ASN_PARSE_E;
- if (GetLength(input, &i, &length, maxIdx) < 0)
+ if (tag != ASN_OBJECT_ID)
+ return ASN_OBJECT_ID_E;
+
+ if (GetLength(input, &idx, &length, maxIdx) < 0)
return ASN_PARSE_E;
- if ( (b = input[i++]) == 0x00)
- length--;
- else
- i--;
+ *len = length;
+ *inOutIdx = idx;
+ return 0;
+}
- *buffSz = (word16)length;
- *buff = XMALLOC(*buffSz, heap, DYNAMIC_TYPE_CAVIUM_RSA);
- if (*buff == NULL)
- return MEMORY_E;
+/* Set the DER/BER encoding of the ASN.1 OBJECT_ID header.
+ *
+ * len Length of the OBJECT_ID data.
+ * output Buffer to write into.
+ * returns the number of bytes added to the buffer.
+ */
+int SetObjectId(int len, byte* output)
+{
+ int idx = 0;
+
+ output[idx++] = ASN_OBJECT_ID;
+ idx += SetLength(len, output + idx);
- XMEMCPY(*buff, input + i, *buffSz);
+ return idx;
+}
+
+int GetObjectId(const byte* input, word32* inOutIdx, word32* oid,
+ word32 oidType, word32 maxIdx)
+{
+ int ret = 0, length;
+ word32 idx = *inOutIdx;
+#ifndef NO_VERIFY_OID
+ word32 actualOidSz = 0;
+ const byte* actualOid;
+#endif /* NO_VERIFY_OID */
+
+ (void)oidType;
+ WOLFSSL_ENTER("GetObjectId()");
+ *oid = 0;
+
+ ret = GetASNObjectId(input, &idx, &length, maxIdx);
+ if (ret != 0)
+ return ret;
+
+#ifndef NO_VERIFY_OID
+ actualOid = &input[idx];
+ if (length > 0)
+ actualOidSz = (word32)length;
+#endif /* NO_VERIFY_OID */
+
+ while (length--) {
+ /* odd HC08 compiler behavior here when input[idx++] */
+ *oid += (word32)input[idx];
+ idx++;
+ }
+ /* just sum it up for now */
+
+ *inOutIdx = idx;
+
+#ifndef NO_VERIFY_OID
+ {
+ const byte* checkOid = NULL;
+ word32 checkOidSz;
+ #ifdef ASN_DUMP_OID
+ word32 i;
+ #endif
+
+ if (oidType != oidIgnoreType) {
+ checkOid = OidFromId(*oid, oidType, &checkOidSz);
+
+ #ifdef ASN_DUMP_OID
+ /* support for dumping OID information */
+ printf("OID (Type %d, Sz %d, Sum %d): ", oidType, actualOidSz, *oid);
+ for (i=0; i<actualOidSz; i++) {
+ printf("%d, ", actualOid[i]);
+ }
+ printf("\n");
+ #ifdef HAVE_OID_DECODING
+ {
+ word16 decOid[16];
+ word32 decOidSz = sizeof(decOid);
+ ret = DecodeObjectId(actualOid, actualOidSz, decOid, &decOidSz);
+ if (ret == 0) {
+ printf(" Decoded (Sz %d): ", decOidSz);
+ for (i=0; i<decOidSz; i++) {
+ printf("%d.", decOid[i]);
+ }
+ printf("\n");
+ }
+ else {
+ printf("DecodeObjectId failed: %d\n", ret);
+ }
+ }
+ #endif /* HAVE_OID_DECODING */
+ #endif /* ASN_DUMP_OID */
+
+ if (checkOid != NULL &&
+ (checkOidSz != actualOidSz ||
+ XMEMCMP(actualOid, checkOid, checkOidSz) != 0)) {
+ WOLFSSL_MSG("OID Check Failed");
+ return ASN_UNKNOWN_OID_E;
+ }
+ }
+ }
+#endif /* NO_VERIFY_OID */
+
+ return ret;
+}
+
+static int SkipObjectId(const byte* input, word32* inOutIdx, word32 maxIdx)
+{
+ word32 idx = *inOutIdx;
+ int length;
+ int ret;
+
+ ret = GetASNObjectId(input, &idx, &length, maxIdx);
+ if (ret != 0)
+ return ret;
+
+ idx += length;
+ *inOutIdx = idx;
- *inOutIdx = i + length;
return 0;
}
-static int CaviumRsaPrivateKeyDecode(const byte* input, word32* inOutIdx,
- RsaKey* key, word32 inSz)
+int GetAlgoId(const byte* input, word32* inOutIdx, word32* oid,
+ word32 oidType, word32 maxIdx)
{
- int version, length;
- void* h = key->heap;
+ int length;
+ word32 idx = *inOutIdx;
+ int ret;
+ *oid = 0;
- if (GetSequence(input, inOutIdx, &length, inSz) < 0)
- return ASN_PARSE_E;
+ WOLFSSL_ENTER("GetAlgoId");
- if (GetMyVersion(input, inOutIdx, &version) < 0)
+ if (GetSequence(input, &idx, &length, maxIdx) < 0)
return ASN_PARSE_E;
- key->type = RSA_PRIVATE;
+ if (GetObjectId(input, &idx, oid, oidType, maxIdx) < 0)
+ return ASN_OBJECT_ID_E;
- if (GetCaviumInt(&key->c_n, &key->c_nSz, input, inOutIdx, inSz, h) < 0 ||
- GetCaviumInt(&key->c_e, &key->c_eSz, input, inOutIdx, inSz, h) < 0 ||
- GetCaviumInt(&key->c_d, &key->c_dSz, input, inOutIdx, inSz, h) < 0 ||
- GetCaviumInt(&key->c_p, &key->c_pSz, input, inOutIdx, inSz, h) < 0 ||
- GetCaviumInt(&key->c_q, &key->c_qSz, input, inOutIdx, inSz, h) < 0 ||
- GetCaviumInt(&key->c_dP, &key->c_dP_Sz, input, inOutIdx, inSz, h) < 0 ||
- GetCaviumInt(&key->c_dQ, &key->c_dQ_Sz, input, inOutIdx, inSz, h) < 0 ||
- GetCaviumInt(&key->c_u, &key->c_uSz, input, inOutIdx, inSz, h) < 0 )
- return ASN_RSA_KEY_E;
+ /* could have NULL tag and 0 terminator, but may not */
+ if (idx < maxIdx) {
+ word32 localIdx = idx; /*use localIdx to not advance when checking tag*/
+ byte tag;
+
+ if (GetASNTag(input, &localIdx, &tag, maxIdx) == 0) {
+ if (tag == ASN_TAG_NULL) {
+ ret = GetASNNull(input, &idx, maxIdx);
+ if (ret != 0)
+ return ret;
+ }
+ }
+ }
+
+ *inOutIdx = idx;
return 0;
}
+#ifndef NO_RSA
-#endif /* HAVE_CAVIUM */
-
+#ifndef HAVE_USER_RSA
int wc_RsaPrivateKeyDecode(const byte* input, word32* inOutIdx, RsaKey* key,
word32 inSz)
{
- int version, length;
-
-#ifdef HAVE_CAVIUM
- if (key->magic == WOLFSSL_RSA_CAVIUM_MAGIC)
- return CaviumRsaPrivateKeyDecode(input, inOutIdx, key, inSz);
-#endif
+ int version, length;
+ if (inOutIdx == NULL) {
+ return BAD_FUNC_ARG;
+ }
if (GetSequence(input, inOutIdx, &length, inSz) < 0)
return ASN_PARSE_E;
- if (GetMyVersion(input, inOutIdx, &version) < 0)
+ if (GetMyVersion(input, inOutIdx, &version, inSz) < 0)
return ASN_PARSE_E;
key->type = RSA_PRIVATE;
if (GetInt(&key->n, input, inOutIdx, inSz) < 0 ||
GetInt(&key->e, input, inOutIdx, inSz) < 0 ||
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
GetInt(&key->d, input, inOutIdx, inSz) < 0 ||
GetInt(&key->p, input, inOutIdx, inSz) < 0 ||
- GetInt(&key->q, input, inOutIdx, inSz) < 0 ||
- GetInt(&key->dP, input, inOutIdx, inSz) < 0 ||
+ GetInt(&key->q, input, inOutIdx, inSz) < 0)
+#else
+ SkipInt(input, inOutIdx, inSz) < 0 ||
+ SkipInt(input, inOutIdx, inSz) < 0 ||
+ SkipInt(input, inOutIdx, inSz) < 0 )
+
+#endif
+ return ASN_RSA_KEY_E;
+#if (defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || !defined(RSA_LOW_MEM)) \
+ && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+ if (GetInt(&key->dP, input, inOutIdx, inSz) < 0 ||
GetInt(&key->dQ, input, inOutIdx, inSz) < 0 ||
GetInt(&key->u, input, inOutIdx, inSz) < 0 ) return ASN_RSA_KEY_E;
+#else
+ if (SkipInt(input, inOutIdx, inSz) < 0 ||
+ SkipInt(input, inOutIdx, inSz) < 0 ||
+ SkipInt(input, inOutIdx, inSz) < 0 ) return ASN_RSA_KEY_E;
+#endif
+
+#if defined(WOLFSSL_XILINX_CRYPT) || defined(WOLFSSL_CRYPTOCELL)
+ if (wc_InitRsaHw(key) != 0) {
+ return BAD_STATE_E;
+ }
+#endif
return 0;
}
-
+#endif /* HAVE_USER_RSA */
#endif /* NO_RSA */
-/* Remove PKCS8 header, move beginning of traditional to beginning of input */
-int ToTraditional(byte* input, word32 sz)
+#if defined(HAVE_PKCS8) || defined(HAVE_PKCS12)
+
+/* Remove PKCS8 header, place inOutIdx at beginning of traditional,
+ * return traditional length on success, negative on error */
+int ToTraditionalInline_ex(const byte* input, word32* inOutIdx, word32 sz,
+ word32* algId)
{
- word32 inOutIdx = 0, oid;
+ word32 idx;
int version, length;
+ int ret;
+ byte tag;
+
+ if (input == NULL || inOutIdx == NULL)
+ return BAD_FUNC_ARG;
+
+ idx = *inOutIdx;
- if (GetSequence(input, &inOutIdx, &length, sz) < 0)
+ if (GetSequence(input, &idx, &length, sz) < 0)
+ return ASN_PARSE_E;
+
+ if (GetMyVersion(input, &idx, &version, sz) < 0)
return ASN_PARSE_E;
- if (GetMyVersion(input, &inOutIdx, &version) < 0)
+ if (GetAlgoId(input, &idx, algId, oidKeyType, sz) < 0)
return ASN_PARSE_E;
- if (GetAlgoId(input, &inOutIdx, &oid, sz) < 0)
+ if (GetASNTag(input, &idx, &tag, sz) < 0)
return ASN_PARSE_E;
+ idx = idx - 1; /* reset idx after finding tag */
- if (input[inOutIdx] == ASN_OBJECT_ID) {
- /* pkcs8 ecc uses slightly different format */
- inOutIdx++; /* past id */
- if (GetLength(input, &inOutIdx, &length, sz) < 0)
+ if (tag == ASN_OBJECT_ID) {
+ if (SkipObjectId(input, &idx, sz) < 0)
return ASN_PARSE_E;
- inOutIdx += length; /* over sub id, key input will verify */
}
- if (input[inOutIdx++] != ASN_OCTET_STRING)
- return ASN_PARSE_E;
+ ret = GetOctetString(input, &idx, &length, sz);
+ if (ret < 0) {
+ if (ret == BUFFER_E)
+ return ASN_PARSE_E;
+ /* Some private keys don't expect an octet string */
+ WOLFSSL_MSG("Couldn't find Octet string");
+ }
- if (GetLength(input, &inOutIdx, &length, sz) < 0)
- return ASN_PARSE_E;
+ *inOutIdx = idx;
+
+ return length;
+}
+
+int ToTraditionalInline(const byte* input, word32* inOutIdx, word32 sz)
+{
+ word32 oid;
+
+ return ToTraditionalInline_ex(input, inOutIdx, sz, &oid);
+}
+
+/* Remove PKCS8 header, move beginning of traditional to beginning of input */
+int ToTraditional_ex(byte* input, word32 sz, word32* algId)
+{
+ word32 inOutIdx = 0;
+ int length;
+
+ if (input == NULL)
+ return BAD_FUNC_ARG;
+
+ length = ToTraditionalInline_ex(input, &inOutIdx, sz, algId);
+ if (length < 0)
+ return length;
XMEMMOVE(input, input + inOutIdx, length);
return length;
}
+int ToTraditional(byte* input, word32 sz)
+{
+ word32 oid;
+
+ return ToTraditional_ex(input, sz, &oid);
+}
+
+#endif /* HAVE_PKCS8 || HAVE_PKCS12 */
+
+#ifdef HAVE_PKCS8
+
+/* find beginning of traditional key inside PKCS#8 unencrypted buffer
+ * return traditional length on success, with inOutIdx at beginning of
+ * traditional
+ * return negative on failure/error */
+int wc_GetPkcs8TraditionalOffset(byte* input, word32* inOutIdx, word32 sz)
+{
+ int length;
+ word32 algId;
+
+ if (input == NULL || inOutIdx == NULL || (*inOutIdx > sz))
+ return BAD_FUNC_ARG;
+
+ length = ToTraditionalInline_ex(input, inOutIdx, sz, &algId);
+
+ return length;
+}
+
+
+/* PKCS#8 from RFC 5208
+ * This function takes in a DER key and converts it to PKCS#8 format. Used
+ * in creating PKCS#12 shrouded key bags.
+ * Reverse of ToTraditional
+ *
+ * PrivateKeyInfo ::= SEQUENCE {
+ * version Version,
+ * privateKeyAlgorithm PrivateKeyAlgorithmIdentifier,
+ * privateKey PrivateKey,
+ * attributes optional
+ * }
+ * Version ::= INTEGER
+ * PrivateKeyAlgorithmIdentifier ::= AlgorithmIdentifier
+ * PrivateKey ::= OCTET STRING
+ *
+ * out buffer to place result in
+ * outSz size of out buffer
+ * key buffer with DER key
+ * keySz size of key buffer
+ * algoID algorithm ID i.e. RSAk
+ * curveOID ECC curve oid if used. Should be NULL for RSA keys.
+ * oidSz size of curve oid. Is set to 0 if curveOID is NULL.
+ *
+ * Returns the size of PKCS#8 placed into out. In error cases returns negative
+ * values.
+ */
+int wc_CreatePKCS8Key(byte* out, word32* outSz, byte* key, word32 keySz,
+ int algoID, const byte* curveOID, word32 oidSz)
+{
+ word32 keyIdx = 0;
+ word32 tmpSz = 0;
+ word32 sz;
+
+
+ /* If out is NULL then return the max size needed
+ * + 2 for ASN_OBJECT_ID and ASN_OCTET_STRING tags */
+ if (out == NULL && outSz != NULL) {
+ *outSz = keySz + MAX_SEQ_SZ + MAX_VERSION_SZ + MAX_ALGO_SZ
+ + MAX_LENGTH_SZ + MAX_LENGTH_SZ + 2;
+
+ if (curveOID != NULL)
+ *outSz += oidSz + MAX_LENGTH_SZ + 1;
+
+ WOLFSSL_MSG("Checking size of PKCS8");
+
+ return LENGTH_ONLY_E;
+ }
+
+ WOLFSSL_ENTER("wc_CreatePKCS8Key()");
+
+ if (key == NULL || out == NULL || outSz == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* check the buffer has enough room for largest possible size */
+ if (curveOID != NULL) {
+ if (*outSz < (keySz + MAX_SEQ_SZ + MAX_VERSION_SZ + MAX_ALGO_SZ
+ + MAX_LENGTH_SZ + MAX_LENGTH_SZ + 3 + oidSz + MAX_LENGTH_SZ))
+ return BUFFER_E;
+ }
+ else {
+ oidSz = 0; /* with no curveOID oid size must be 0 */
+ if (*outSz < (keySz + MAX_SEQ_SZ + MAX_VERSION_SZ + MAX_ALGO_SZ
+ + MAX_LENGTH_SZ + MAX_LENGTH_SZ + 2))
+ return BUFFER_E;
+ }
+
+ /* PrivateKeyInfo ::= SEQUENCE */
+ keyIdx += MAX_SEQ_SZ; /* save room for sequence */
+
+ /* version Version
+ * no header information just INTEGER */
+ sz = SetMyVersion(PKCS8v0, out + keyIdx, 0);
+ tmpSz += sz; keyIdx += sz;
+
+ /* privateKeyAlgorithm PrivateKeyAlgorithmIdentifier */
+ sz = 0; /* set sz to 0 and get privateKey oid buffer size needed */
+ if (curveOID != NULL && oidSz > 0) {
+ byte buf[MAX_LENGTH_SZ];
+ sz = SetLength(oidSz, buf);
+ sz += 1; /* plus one for ASN object id */
+ }
+ sz = SetAlgoID(algoID, out + keyIdx, oidKeyType, oidSz + sz);
+ tmpSz += sz; keyIdx += sz;
+
+ /* privateKey PrivateKey *
+ * pkcs8 ecc uses slightly different format. Places curve oid in
+ * buffer */
+ if (curveOID != NULL && oidSz > 0) {
+ sz = SetObjectId(oidSz, out + keyIdx);
+ keyIdx += sz; tmpSz += sz;
+ XMEMCPY(out + keyIdx, curveOID, oidSz);
+ keyIdx += oidSz; tmpSz += oidSz;
+ }
+
+ sz = SetOctetString(keySz, out + keyIdx);
+ keyIdx += sz; tmpSz += sz;
+ XMEMCPY(out + keyIdx, key, keySz);
+ tmpSz += keySz;
+
+ /* attributes optional
+ * No attributes currently added */
+
+ /* rewind and add sequence */
+ sz = SetSequence(tmpSz, out);
+ XMEMMOVE(out + sz, out + MAX_SEQ_SZ, tmpSz);
+
+ return tmpSz + sz;
+}
+
+#endif /* HAVE_PKCS8 */
+
+#if defined(HAVE_PKCS12) || !defined(NO_CHECK_PRIVATE_KEY)
+/* check that the private key is a pair for the public key in certificate
+ * return 1 (true) on match
+ * return 0 or negative value on failure/error
+ *
+ * key : buffer holding DER format key
+ * keySz : size of key buffer
+ * der : a initialized and parsed DecodedCert holding a certificate */
+int wc_CheckPrivateKey(byte* key, word32 keySz, DecodedCert* der)
+{
+ int ret;
+ (void)keySz;
+
+ if (key == NULL || der == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ #if !defined(NO_RSA) && !defined(NO_ASN_CRYPT)
+ /* test if RSA key */
+ if (der->keyOID == RSAk) {
+ #ifdef WOLFSSL_SMALL_STACK
+ RsaKey* a;
+ RsaKey* b = NULL;
+ #else
+ RsaKey a[1], b[1];
+ #endif
+ word32 keyIdx = 0;
+
+ #ifdef WOLFSSL_SMALL_STACK
+ a = (RsaKey*)XMALLOC(sizeof(RsaKey), NULL, DYNAMIC_TYPE_RSA);
+ if (a == NULL)
+ return MEMORY_E;
+ b = (RsaKey*)XMALLOC(sizeof(RsaKey), NULL, DYNAMIC_TYPE_RSA);
+ if (b == NULL) {
+ XFREE(a, NULL, DYNAMIC_TYPE_RSA);
+ return MEMORY_E;
+ }
+ #endif
+
+ if ((ret = wc_InitRsaKey(a, NULL)) < 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_RSA);
+ XFREE(a, NULL, DYNAMIC_TYPE_RSA);
+ #endif
+ return ret;
+ }
+ if ((ret = wc_InitRsaKey(b, NULL)) < 0) {
+ wc_FreeRsaKey(a);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_RSA);
+ XFREE(a, NULL, DYNAMIC_TYPE_RSA);
+ #endif
+ return ret;
+ }
+ if ((ret = wc_RsaPrivateKeyDecode(key, &keyIdx, a, keySz)) == 0) {
+ WOLFSSL_MSG("Checking RSA key pair");
+ keyIdx = 0; /* reset to 0 for parsing public key */
+
+ if ((ret = wc_RsaPublicKeyDecode(der->publicKey, &keyIdx, b,
+ der->pubKeySize)) == 0) {
+ /* limit for user RSA crypto because of RsaKey
+ * dereference. */
+ #if defined(HAVE_USER_RSA)
+ WOLFSSL_MSG("Cannot verify RSA pair with user RSA");
+ ret = 1; /* return first RSA cert as match */
+ #else
+ /* both keys extracted successfully now check n and e
+ * values are the same. This is dereferencing RsaKey */
+ if (mp_cmp(&(a->n), &(b->n)) != MP_EQ ||
+ mp_cmp(&(a->e), &(b->e)) != MP_EQ) {
+ ret = MP_CMP_E;
+ }
+ else
+ ret = 1;
+ #endif
+ }
+ }
+ wc_FreeRsaKey(b);
+ wc_FreeRsaKey(a);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_RSA);
+ XFREE(a, NULL, DYNAMIC_TYPE_RSA);
+ #endif
+ }
+ else
+ #endif /* !NO_RSA && !NO_ASN_CRYPT */
+
+ #if defined(HAVE_ECC) && defined(HAVE_ECC_KEY_EXPORT) && !defined(NO_ASN_CRYPT)
+ if (der->keyOID == ECDSAk) {
+ #ifdef WOLFSSL_SMALL_STACK
+ ecc_key* key_pair;
+ byte* privDer;
+ #else
+ ecc_key key_pair[1];
+ byte privDer[MAX_ECC_BYTES];
+ #endif
+ word32 privSz = MAX_ECC_BYTES;
+ word32 keyIdx = 0;
+
+ #ifdef WOLFSSL_SMALL_STACK
+ key_pair = (ecc_key*)XMALLOC(sizeof(ecc_key), NULL, DYNAMIC_TYPE_ECC);
+ if (key_pair == NULL)
+ return MEMORY_E;
+ privDer = (byte*)XMALLOC(MAX_ECC_BYTES, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (privDer == NULL) {
+ XFREE(key_pair, NULL, DYNAMIC_TYPE_ECC);
+ return MEMORY_E;
+ }
+ #endif
+
+ if ((ret = wc_ecc_init(key_pair)) < 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(privDer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(key_pair, NULL, DYNAMIC_TYPE_ECC);
+ #endif
+ return ret;
+ }
+
+ if ((ret = wc_EccPrivateKeyDecode(key, &keyIdx, key_pair,
+ keySz)) == 0) {
+ WOLFSSL_MSG("Checking ECC key pair");
+
+ if ((ret = wc_ecc_export_private_only(key_pair, privDer, &privSz))
+ == 0) {
+ wc_ecc_free(key_pair);
+ ret = wc_ecc_init(key_pair);
+ if (ret == 0) {
+ ret = wc_ecc_import_private_key((const byte*)privDer,
+ privSz, (const byte*)der->publicKey,
+ der->pubKeySize, key_pair);
+ }
+
+ /* public and private extracted successfully now check if is
+ * a pair and also do sanity checks on key. wc_ecc_check_key
+ * checks that private * base generator equals pubkey */
+ if (ret == 0) {
+ if ((ret = wc_ecc_check_key(key_pair)) == 0) {
+ ret = 1;
+ }
+ }
+ ForceZero(privDer, privSz);
+ }
+ }
+ wc_ecc_free(key_pair);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(privDer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(key_pair, NULL, DYNAMIC_TYPE_ECC);
+ #endif
+ }
+ else
+ #endif /* HAVE_ECC && HAVE_ECC_KEY_EXPORT && !NO_ASN_CRYPT */
+
+ #if defined(HAVE_ED25519) && !defined(NO_ASN_CRYPT)
+ if (der->keyOID == ED25519k) {
+ #ifdef WOLFSSL_SMALL_STACK
+ ed25519_key* key_pair;
+ #else
+ ed25519_key key_pair[1];
+ #endif
+ word32 keyIdx = 0;
+
+ #ifdef WOLFSSL_SMALL_STACK
+ key_pair = (ed25519_key*)XMALLOC(sizeof(ed25519_key), NULL,
+ DYNAMIC_TYPE_ED25519);
+ if (key_pair == NULL)
+ return MEMORY_E;
+ #endif
+
+ if ((ret = wc_ed25519_init(key_pair)) < 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(key_pair, NULL, DYNAMIC_TYPE_ED25519);
+ #endif
+ return ret;
+ }
+ if ((ret = wc_Ed25519PrivateKeyDecode(key, &keyIdx, key_pair,
+ keySz)) == 0) {
+ WOLFSSL_MSG("Checking ED25519 key pair");
+ keyIdx = 0;
+ if ((ret = wc_ed25519_import_public(der->publicKey, der->pubKeySize,
+ key_pair)) == 0) {
+ /* public and private extracted successfully no check if is
+ * a pair and also do sanity checks on key. wc_ecc_check_key
+ * checks that private * base generator equals pubkey */
+ if ((ret = wc_ed25519_check_key(key_pair)) == 0)
+ ret = 1;
+ }
+ }
+ wc_ed25519_free(key_pair);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(key_pair, NULL, DYNAMIC_TYPE_ED25519);
+ #endif
+ }
+ else
+ #endif /* HAVE_ED25519 && !NO_ASN_CRYPT */
+
+ #if defined(HAVE_ED448) && !defined(NO_ASN_CRYPT)
+ if (der->keyOID == ED448k) {
+ #ifdef WOLFSSL_SMALL_STACK
+ ed448_key* key_pair = NULL;
+ #else
+ ed448_key key_pair[1];
+ #endif
+ word32 keyIdx = 0;
+
+ #ifdef WOLFSSL_SMALL_STACK
+ key_pair = (ed448_key*)XMALLOC(sizeof(ed448_key), NULL,
+ DYNAMIC_TYPE_ED448);
+ if (key_pair == NULL)
+ return MEMORY_E;
+ #endif
+
+ if ((ret = wc_ed448_init(key_pair)) < 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(key_pair, NULL, DYNAMIC_TYPE_ED448);
+ #endif
+ return ret;
+ }
+ if ((ret = wc_Ed448PrivateKeyDecode(key, &keyIdx, key_pair,
+ keySz)) == 0) {
+ WOLFSSL_MSG("Checking ED448 key pair");
+ keyIdx = 0;
+ if ((ret = wc_ed448_import_public(der->publicKey, der->pubKeySize,
+ key_pair)) == 0) {
+ /* public and private extracted successfully no check if is
+ * a pair and also do sanity checks on key. wc_ecc_check_key
+ * checks that private * base generator equals pubkey */
+ if ((ret = wc_ed448_check_key(key_pair)) == 0)
+ ret = 1;
+ }
+ }
+ wc_ed448_free(key_pair);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(key_pair, NULL, DYNAMIC_TYPE_ED448);
+ #endif
+ }
+ else
+ #endif /* HAVE_ED448 && !NO_ASN_CRYPT */
+ {
+ ret = 0;
+ }
+
+ (void)keySz;
+
+ return ret;
+}
+
+#endif /* HAVE_PKCS12 || !NO_CHECK_PRIVATE_KEY */
#ifndef NO_PWDBASED
+#if defined(HAVE_PKCS8) || defined(HAVE_PKCS12)
/* Check To see if PKCS version algo is supported, set id if it is return 0
< 0 on error */
-static int CheckAlgo(int first, int second, int* id, int* version)
+static int CheckAlgo(int first, int second, int* id, int* version, int* blockSz)
{
*id = ALGO_ID_E;
*version = PKCS5; /* default */
+ if (blockSz) *blockSz = 8; /* default */
if (first == 1) {
switch (second) {
- case 1:
+#if !defined(NO_SHA)
+ #ifndef NO_RC4
+ case PBE_SHA1_RC4_128:
*id = PBE_SHA1_RC4_128;
- *version = PKCS12;
+ *version = PKCS12v1;
return 0;
- case 3:
+ #endif
+ #ifndef NO_DES3
+ case PBE_SHA1_DES3:
*id = PBE_SHA1_DES3;
- *version = PKCS12;
+ *version = PKCS12v1;
+ if (blockSz) *blockSz = DES_BLOCK_SIZE;
return 0;
+ case PBE_SHA1_DES:
+ *id = PBE_SHA1_DES;
+ *version = PKCS12v1;
+ if (blockSz) *blockSz = DES_BLOCK_SIZE;
+ return 0;
+ #endif
+#endif /* !NO_SHA */
default:
return ALGO_ID_E;
}
@@ -857,213 +3124,655 @@ static int CheckAlgo(int first, int second, int* id, int* version)
}
switch (second) {
+#ifndef NO_DES3
+ #ifndef NO_MD5
case 3: /* see RFC 2898 for ids */
*id = PBE_MD5_DES;
+ if (blockSz) *blockSz = DES_BLOCK_SIZE;
return 0;
+ #endif
+ #ifndef NO_SHA
case 10:
*id = PBE_SHA1_DES;
+ if (blockSz) *blockSz = DES_BLOCK_SIZE;
return 0;
+ #endif
+#endif /* !NO_DES3 */
default:
return ALGO_ID_E;
}
}
-
/* Check To see if PKCS v2 algo is supported, set id if it is return 0
< 0 on error */
-static int CheckAlgoV2(int oid, int* id)
+static int CheckAlgoV2(int oid, int* id, int* blockSz)
{
+ if (blockSz) *blockSz = 8; /* default */
+ (void)id; /* not used if AES and DES3 disabled */
switch (oid) {
- case 69:
+#if !defined(NO_DES3) && !defined(NO_SHA)
+ case DESb:
*id = PBE_SHA1_DES;
+ if (blockSz) *blockSz = DES_BLOCK_SIZE;
return 0;
- case 652:
+ case DES3b:
*id = PBE_SHA1_DES3;
+ if (blockSz) *blockSz = DES_BLOCK_SIZE;
return 0;
+#endif
+#ifdef WOLFSSL_AES_256
+ case AES256CBCb:
+ *id = PBE_AES256_CBC;
+ if (blockSz) *blockSz = AES_BLOCK_SIZE;
+ return 0;
+#endif
+#ifdef WOLFSSL_AES_128
+ case AES128CBCb:
+ *id = PBE_AES128_CBC;
+ if (blockSz) *blockSz = AES_BLOCK_SIZE;
+ return 0;
+#endif
default:
+ WOLFSSL_MSG("No PKCS v2 algo found");
return ALGO_ID_E;
}
}
+#endif /* HAVE_PKCS8 || HAVE_PKCS12 */
+
+#ifdef HAVE_PKCS8
-/* Decrypt intput in place from parameters based on id */
-static int DecryptKey(const char* password, int passwordSz, byte* salt,
- int saltSz, int iterations, int id, byte* input,
- int length, int version, byte* cbcIv)
+int wc_GetKeyOID(byte* key, word32 keySz, const byte** curveOID, word32* oidSz,
+ int* algoID, void* heap)
{
- int typeH;
- int derivedLen;
- int decryptionType;
- int ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
- byte* key;
-#else
- byte key[MAX_KEY_SIZE];
-#endif
+ word32 tmpIdx = 0;
- switch (id) {
- case PBE_MD5_DES:
- typeH = MD5;
- derivedLen = 16; /* may need iv for v1.5 */
- decryptionType = DES_TYPE;
- break;
+ if (key == NULL || algoID == NULL)
+ return BAD_FUNC_ARG;
- case PBE_SHA1_DES:
- typeH = SHA;
- derivedLen = 16; /* may need iv for v1.5 */
- decryptionType = DES_TYPE;
- break;
+ *algoID = 0;
- case PBE_SHA1_DES3:
- typeH = SHA;
- derivedLen = 32; /* may need iv for v1.5 */
- decryptionType = DES3_TYPE;
- break;
+ #if !defined(NO_RSA) && !defined(NO_ASN_CRYPT)
+ {
+ RsaKey rsa;
- case PBE_SHA1_RC4_128:
- typeH = SHA;
- derivedLen = 16;
- decryptionType = RC4_TYPE;
- break;
+ wc_InitRsaKey(&rsa, heap);
+ if (wc_RsaPrivateKeyDecode(key, &tmpIdx, &rsa, keySz) == 0) {
+ *algoID = RSAk;
+ }
+ else {
+ WOLFSSL_MSG("Not RSA DER key");
+ }
+ wc_FreeRsaKey(&rsa);
+ }
+ #endif /* !NO_RSA && !NO_ASN_CRYPT */
+ #if defined(HAVE_ECC) && !defined(NO_ASN_CRYPT)
+ if (*algoID == 0) {
+ ecc_key ecc;
+
+ tmpIdx = 0;
+ wc_ecc_init_ex(&ecc, heap, INVALID_DEVID);
+ if (wc_EccPrivateKeyDecode(key, &tmpIdx, &ecc, keySz) == 0) {
+ *algoID = ECDSAk;
+
+ /* now find oid */
+ if (wc_ecc_get_oid(ecc.dp->oidSum, curveOID, oidSz) < 0) {
+ WOLFSSL_MSG("Error getting ECC curve OID");
+ wc_ecc_free(&ecc);
+ return BAD_FUNC_ARG;
+ }
+ }
+ else {
+ WOLFSSL_MSG("Not ECC DER key either");
+ }
+ wc_ecc_free(&ecc);
+ }
+#endif /* HAVE_ECC && !NO_ASN_CRYPT */
+#if defined(HAVE_ED25519) && !defined(NO_ASN_CRYPT)
+ if (*algoID != RSAk && *algoID != ECDSAk) {
+ ed25519_key ed25519;
+
+ tmpIdx = 0;
+ if (wc_ed25519_init(&ed25519) == 0) {
+ if (wc_Ed25519PrivateKeyDecode(key, &tmpIdx, &ed25519, keySz)
+ == 0) {
+ *algoID = ED25519k;
+ }
+ else {
+ WOLFSSL_MSG("Not ED25519 DER key");
+ }
+ wc_ed25519_free(&ed25519);
+ }
+ else {
+ WOLFSSL_MSG("GetKeyOID wc_ed25519_init failed");
+ }
+ }
+#endif /* HAVE_ED25519 && !NO_ASN_CRYPT */
+#if defined(HAVE_ED448) && !defined(NO_ASN_CRYPT)
+ if (*algoID != RSAk && *algoID != ECDSAk && *algoID != ED25519k) {
+ ed448_key ed448;
+
+ tmpIdx = 0;
+ if (wc_ed448_init(&ed448) == 0) {
+ if (wc_Ed448PrivateKeyDecode(key, &tmpIdx, &ed448, keySz) == 0) {
+ *algoID = ED448k;
+ }
+ else {
+ WOLFSSL_MSG("Not ED448 DER key");
+ }
+ wc_ed448_free(&ed448);
+ }
+ else {
+ WOLFSSL_MSG("GetKeyOID wc_ed448_init failed");
+ }
+ }
+#endif /* HAVE_ED448 && !NO_ASN_CRYPT */
- default:
- return ALGO_ID_E;
+ /* if flag is not set then is neither RSA or ECC key that could be
+ * found */
+ if (*algoID == 0) {
+ WOLFSSL_MSG("Bad key DER or compile options");
+ return BAD_FUNC_ARG;
}
-#ifdef WOLFSSL_SMALL_STACK
- key = (byte*)XMALLOC(MAX_KEY_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (key == NULL)
- return MEMORY_E;
-#endif
+ (void)tmpIdx;
+ (void)curveOID;
+ (void)oidSz;
+ (void)keySz;
+ (void)heap;
- if (version == PKCS5v2)
- ret = wc_PBKDF2(key, (byte*)password, passwordSz, salt, saltSz, iterations,
- derivedLen, typeH);
-#ifndef NO_SHA
- else if (version == PKCS5)
- ret = wc_PBKDF1(key, (byte*)password, passwordSz, salt, saltSz, iterations,
- derivedLen, typeH);
-#endif
- else if (version == PKCS12) {
- int i, idx = 0;
- byte unicodePasswd[MAX_UNICODE_SZ];
+ return 1;
+}
+
+#endif /* HAVE_PKCS8 */
+
+#if defined(HAVE_PKCS8) || defined(HAVE_PKCS12)
+
+#define PKCS8_MIN_BLOCK_SIZE 8
+static int Pkcs8Pad(byte* buf, int sz, int blockSz)
+{
+ int i, padSz;
+
+ /* calculate pad size */
+ padSz = blockSz - (sz & (blockSz - 1));
+
+ /* pad with padSz value */
+ if (buf) {
+ for (i = 0; i < padSz; i++) {
+ buf[sz+i] = (byte)(padSz & 0xFF);
+ }
+ }
+
+ /* return adjusted length */
+ return sz + padSz;
+}
+
+#endif /* HAVE_PKCS8 || HAVE_PKCS12 */
+
+#ifdef HAVE_PKCS8
+
+/*
+ * Used when creating PKCS12 shrouded key bags
+ * vPKCS is the version of PKCS to use
+ * vAlgo is the algorithm version to use
+ *
+ * if salt is NULL a random number is generated
+ *
+ * returns the size of encrypted data on success
+ */
+int UnTraditionalEnc(byte* key, word32 keySz, byte* out, word32* outSz,
+ const char* password, int passwordSz, int vPKCS, int vAlgo,
+ byte* salt, word32 saltSz, int itt, WC_RNG* rng, void* heap)
+{
+ int algoID = 0;
+ byte* tmp;
+ word32 tmpSz = 0;
+ word32 sz;
+ word32 seqSz;
+ word32 inOutIdx = 0;
+ word32 totalSz = 0;
+ int version, id;
+ int ret;
+ int blockSz = 0;
+
+ const byte* curveOID = NULL;
+ word32 oidSz = 0;
- if ( (passwordSz * 2 + 2) > (int)sizeof(unicodePasswd)) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ byte* saltTmp = NULL;
+ byte* cbcIv = NULL;
+#else
+ byte saltTmp[MAX_IV_SIZE];
+ byte cbcIv[MAX_IV_SIZE];
#endif
- return UNICODE_SIZE_E;
+
+ WOLFSSL_ENTER("UnTraditionalEnc()");
+
+ if (saltSz > MAX_SALT_SIZE)
+ return ASN_PARSE_E;
+
+
+ inOutIdx += MAX_SEQ_SZ; /* leave room for size of finished shroud */
+ if (CheckAlgo(vPKCS, vAlgo, &id, &version, &blockSz) < 0) {
+ WOLFSSL_MSG("Bad/Unsupported algorithm ID");
+ return ASN_INPUT_E; /* Algo ID error */
+ }
+
+ if (out != NULL) {
+ if (*outSz < inOutIdx + MAX_ALGO_SZ + MAX_SALT_SIZE + MAX_SEQ_SZ + 1 +
+ MAX_LENGTH_SZ + MAX_SHORT_SZ + 1)
+ return BUFFER_E;
+
+ if (version == PKCS5v2) {
+ WOLFSSL_MSG("PKCS5v2 Not supported yet\n");
+ return ASN_VERSION_E;
}
- for (i = 0; i < passwordSz; i++) {
- unicodePasswd[idx++] = 0x00;
- unicodePasswd[idx++] = (byte)password[i];
+ if (salt == NULL || saltSz == 0) {
+ saltSz = 8;
+ #ifdef WOLFSSL_SMALL_STACK
+ saltTmp = (byte*)XMALLOC(saltSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (saltTmp == NULL)
+ return MEMORY_E;
+ #endif
+ salt = saltTmp;
+
+ if ((ret = wc_RNG_GenerateBlock(rng, saltTmp, saltSz)) != 0) {
+ WOLFSSL_MSG("Error generating random salt");
+ #ifdef WOLFSSL_SMALL_STACK
+ if (saltTmp != NULL)
+ XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return ret;
+ }
}
- /* add trailing NULL */
- unicodePasswd[idx++] = 0x00;
- unicodePasswd[idx++] = 0x00;
- ret = wc_PKCS12_PBKDF(key, unicodePasswd, idx, salt, saltSz,
- iterations, derivedLen, typeH, 1);
- if (decryptionType != RC4_TYPE)
- ret += wc_PKCS12_PBKDF(cbcIv, unicodePasswd, idx, salt, saltSz,
- iterations, 8, typeH, 2);
+
+ /* leave room for a sequence (contains salt and iterations int) */
+ inOutIdx += MAX_SEQ_SZ; sz = 0;
+ inOutIdx += MAX_ALGO_SZ;
+
+ /* place salt in buffer */
+ out[inOutIdx++] = ASN_OCTET_STRING; sz++;
+ tmpSz = SetLength(saltSz, out + inOutIdx);
+ inOutIdx += tmpSz; sz += tmpSz;
+ XMEMCPY(out + inOutIdx, salt, saltSz);
+ inOutIdx += saltSz; sz += saltSz;
+
+ /* place iteration count in buffer */
+ ret = SetShortInt(out, &inOutIdx, itt, *outSz);
+ if (ret < 0) {
+ return ret;
+ }
+ sz += (word32)ret;
+
+ /* wind back index and set sequence then clean up buffer */
+ inOutIdx -= (sz + MAX_SEQ_SZ);
+ tmpSz = SetSequence(sz, out + inOutIdx);
+ XMEMMOVE(out + inOutIdx + tmpSz, out + inOutIdx + MAX_SEQ_SZ, sz);
+ totalSz += tmpSz + sz; sz += tmpSz;
+
+ /* add in algo ID */
+ inOutIdx -= MAX_ALGO_SZ;
+ tmpSz = SetAlgoID(id, out + inOutIdx, oidPBEType, sz);
+ XMEMMOVE(out + inOutIdx + tmpSz, out + inOutIdx + MAX_ALGO_SZ, sz);
+ totalSz += tmpSz; inOutIdx += tmpSz + sz;
+
+ /* octet string containing encrypted key */
+ out[inOutIdx++] = ASN_OCTET_STRING; totalSz++;
}
- else {
+
+ /* check key type and get OID if ECC */
+ if ((ret = wc_GetKeyOID(key, keySz, &curveOID, &oidSz, &algoID, heap))< 0) {
+ WOLFSSL_MSG("Error getting key OID");
+ return ret;
+ }
+
+ /* PKCS#8 wrapping around key */
+ if (wc_CreatePKCS8Key(NULL, &tmpSz, key, keySz, algoID, curveOID, oidSz)
+ != LENGTH_ONLY_E) {
+ #ifdef WOLFSSL_SMALL_STACK
+ if (saltTmp != NULL)
+ XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return MEMORY_E;
+ }
+
+ /* check if should return max size */
+ if (out == NULL) {
+ /* account for salt size */
+ if (salt == NULL || saltSz == 0) {
+ tmpSz += MAX_SALT_SIZE;
+ }
+ else {
+ tmpSz += saltSz;
+ }
+
+ /* plus 3 for tags */
+ *outSz = tmpSz + MAX_ALGO_SZ + MAX_LENGTH_SZ +MAX_LENGTH_SZ + MAX_SEQ_SZ
+ + MAX_LENGTH_SZ + MAX_SEQ_SZ + 3;
+ return LENGTH_ONLY_E;
+ }
+
+ /* reserve buffer for crypto and make sure it supports full blocks */
+ tmp = (byte*)XMALLOC(tmpSz + (blockSz-1), heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (tmp == NULL) {
+ #ifdef WOLFSSL_SMALL_STACK
+ if (saltTmp != NULL)
+ XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return MEMORY_E;
+ }
+
+ if ((ret = wc_CreatePKCS8Key(tmp, &tmpSz, key, keySz, algoID, curveOID,
+ oidSz)) < 0) {
+ XFREE(tmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ WOLFSSL_MSG("Error wrapping key with PKCS#8");
+ #ifdef WOLFSSL_SMALL_STACK
+ if (saltTmp != NULL)
+ XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return ret;
+ }
+ tmpSz = ret;
+
+ /* adjust size to pad */
+ tmpSz = Pkcs8Pad(tmp, tmpSz, blockSz);
+
#ifdef WOLFSSL_SMALL_STACK
- XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ cbcIv = (byte*)XMALLOC(MAX_IV_SIZE, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (cbcIv == NULL) {
+ if (saltTmp != NULL)
+ XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(salt, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return MEMORY_E;
+ }
#endif
- return ALGO_ID_E;
+
+ /* encrypt PKCS#8 wrapped key */
+ if ((ret = wc_CryptKey(password, passwordSz, salt, saltSz, itt, id,
+ tmp, tmpSz, version, cbcIv, 1, 0)) < 0) {
+ XFREE(tmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ WOLFSSL_MSG("Error encrypting key");
+ #ifdef WOLFSSL_SMALL_STACK
+ if (saltTmp != NULL)
+ XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (cbcIv != NULL)
+ XFREE(cbcIv, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return ret; /* encryption failure */
}
+ totalSz += tmpSz;
- if (ret != 0) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (saltTmp != NULL)
+ XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (cbcIv != NULL)
+ XFREE(cbcIv, heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- return ret;
+
+ if (*outSz < inOutIdx + tmpSz + MAX_LENGTH_SZ) {
+ XFREE(tmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return BUFFER_E;
}
- switch (decryptionType) {
-#ifndef NO_DES3
- case DES_TYPE:
- {
- Des dec;
- byte* desIv = key + 8;
+ /* set length of key and copy over encrypted key */
+ seqSz = SetLength(tmpSz, out + inOutIdx);
+ inOutIdx += seqSz; totalSz += seqSz;
+ XMEMCPY(out + inOutIdx, tmp, tmpSz);
+ XFREE(tmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
- if (version == PKCS5v2 || version == PKCS12)
- desIv = cbcIv;
+ /* set total size at beginning */
+ sz = SetSequence(totalSz, out);
+ XMEMMOVE(out + sz, out + MAX_SEQ_SZ, totalSz);
- ret = wc_Des_SetKey(&dec, key, desIv, DES_DECRYPTION);
- if (ret != 0) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
- return ret;
- }
+ (void)rng;
- wc_Des_CbcDecrypt(&dec, input, input, length);
- break;
- }
+ return totalSz + sz;
+}
- case DES3_TYPE:
- {
- Des3 dec;
- byte* desIv = key + 24;
+static int GetAlgoV2(int encAlgId, const byte** oid, int *len, int* id,
+ int *blkSz)
+{
+ int ret = 0;
- if (version == PKCS5v2 || version == PKCS12)
- desIv = cbcIv;
- ret = wc_Des3_SetKey(&dec, key, desIv, DES_DECRYPTION);
- if (ret != 0) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ switch (encAlgId) {
+#if !defined(NO_DES3) && !defined(NO_SHA)
+ case DESb:
+ *len = sizeof(blkDesCbcOid);
+ *oid = blkDesCbcOid;
+ *id = PBE_SHA1_DES;
+ *blkSz = 8;
+ break;
+ case DES3b:
+ *len = sizeof(blkDes3CbcOid);
+ *oid = blkDes3CbcOid;
+ *id = PBE_SHA1_DES3;
+ *blkSz = 8;
+ break;
#endif
- return ret;
- }
- ret = wc_Des3_CbcDecrypt(&dec, input, input, length);
- if (ret != 0) {
+#if defined(WOLFSSL_AES_256) && defined(HAVE_AES_CBC)
+ case AES256CBCb:
+ *len = sizeof(blkAes256CbcOid);
+ *oid = blkAes256CbcOid;
+ *id = PBE_AES256_CBC;
+ *blkSz = 16;
+ break;
+#endif
+ default:
+ (void)len;
+ (void)oid;
+ (void)id;
+ (void)blkSz;
+ ret = ALGO_ID_E;
+ }
+
+ return ret;
+}
+
+/* Converts Encrypted PKCS#8 to 'traditional' (i.e. PKCS#8 removed from
+ * decrypted key.)
+ */
+int TraditionalEnc(byte* key, word32 keySz, byte* out, word32* outSz,
+ const char* password, int passwordSz, int vPKCS, int vAlgo,
+ int encAlgId, byte* salt, word32 saltSz, int itt, WC_RNG* rng,
+ void* heap)
+{
+ int ret = 0;
+ int version, blockSz, id;
+ word32 idx = 0, encIdx;
#ifdef WOLFSSL_SMALL_STACK
- XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ byte* saltTmp = NULL;
+#else
+ byte saltTmp[MAX_SALT_SIZE];
#endif
- return ret;
- }
- break;
+ byte cbcIv[MAX_IV_SIZE];
+ byte *pkcs8Key = NULL;
+ word32 pkcs8KeySz = 0, padSz = 0;
+ int algId = 0;
+ const byte* curveOid = NULL;
+ word32 curveOidSz = 0;
+ const byte* pbeOid = NULL;
+ word32 pbeOidSz = 0;
+ const byte* encOid = NULL;
+ int encOidSz = 0;
+ word32 pbeLen = 0, kdfLen = 0, encLen = 0;
+ word32 innerLen = 0, outerLen;
+
+ ret = CheckAlgo(vPKCS, vAlgo, &id, &version, &blockSz);
+ /* create random salt if one not provided */
+ if (ret == 0 && (salt == NULL || saltSz == 0)) {
+ saltSz = 8;
+ #ifdef WOLFSSL_SMALL_STACK
+ saltTmp = (byte*)XMALLOC(saltSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (saltTmp == NULL)
+ return MEMORY_E;
+ #endif
+ salt = saltTmp;
+
+ if ((ret = wc_RNG_GenerateBlock(rng, saltTmp, saltSz)) != 0) {
+ WOLFSSL_MSG("Error generating random salt");
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return ret;
}
-#endif
-#ifndef NO_RC4
- case RC4_TYPE:
- {
- Arc4 dec;
+ }
- wc_Arc4SetKey(&dec, key, derivedLen);
- wc_Arc4Process(&dec, input, input, length);
- break;
+ if (ret == 0) {
+ /* check key type and get OID if ECC */
+ ret = wc_GetKeyOID(key, keySz, &curveOid, &curveOidSz, &algId, heap);
+ if (ret == 1)
+ ret = 0;
+ }
+ if (ret == 0) {
+ ret = wc_CreatePKCS8Key(NULL, &pkcs8KeySz, key, keySz, algId, curveOid,
+ curveOidSz);
+ if (ret == LENGTH_ONLY_E)
+ ret = 0;
+ }
+ if (ret == 0) {
+ pkcs8Key = (byte*)XMALLOC(pkcs8KeySz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (pkcs8Key == NULL)
+ ret = MEMORY_E;
+ }
+ if (ret == 0) {
+ ret = wc_CreatePKCS8Key(pkcs8Key, &pkcs8KeySz, key, keySz, algId,
+ curveOid, curveOidSz);
+ if (ret >= 0) {
+ pkcs8KeySz = ret;
+ ret = 0;
}
-#endif
+ }
- default:
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
- return ALGO_ID_E;
+ if (ret == 0 && version == PKCS5v2)
+ ret = GetAlgoV2(encAlgId, &encOid, &encOidSz, &id, &blockSz);
+
+ if (ret == 0) {
+ padSz = (blockSz - (pkcs8KeySz & (blockSz - 1))) & (blockSz - 1);
+ /* inner = OCT salt INT itt */
+ innerLen = 2 + saltSz + 2 + (itt < 256 ? 1 : 2);
+
+ if (version != PKCS5v2) {
+ pbeOid = OidFromId(id, oidPBEType, &pbeOidSz);
+ /* pbe = OBJ pbse1 SEQ [ inner ] */
+ pbeLen = 2 + pbeOidSz + 2 + innerLen;
+ }
+ else {
+ pbeOid = pbes2;
+ pbeOidSz = sizeof(pbes2);
+ /* kdf = OBJ pbkdf2 [ SEQ innerLen ] */
+ kdfLen = 2 + sizeof(pbkdf2Oid) + 2 + innerLen;
+ /* enc = OBJ enc_alg OCT iv */
+ encLen = 2 + encOidSz + 2 + blockSz;
+ /* pbe = OBJ pbse2 SEQ [ SEQ [ kdf ] SEQ [ enc ] ] */
+ pbeLen = 2 + sizeof(pbes2) + 2 + 2 + kdfLen + 2 + encLen;
+
+ ret = wc_RNG_GenerateBlock(rng, cbcIv, blockSz);
+ }
+ }
+ if (ret == 0) {
+ /* outer = SEQ [ pbe ] OCT encrypted_PKCS#8_key */
+ outerLen = 2 + pbeLen;
+ outerLen += SetOctetString(pkcs8KeySz + padSz, out);
+ outerLen += pkcs8KeySz + padSz;
+
+ idx += SetSequence(outerLen, out + idx);
+
+ encIdx = idx + outerLen - pkcs8KeySz - padSz;
+ /* Put Encrypted content in place. */
+ XMEMCPY(out + encIdx, pkcs8Key, pkcs8KeySz);
+ if (padSz > 0) {
+ XMEMSET(out + encIdx + pkcs8KeySz, padSz, padSz);
+ pkcs8KeySz += padSz;
+ }
+ ret = wc_CryptKey(password, passwordSz, salt, saltSz, itt, id,
+ out + encIdx, pkcs8KeySz, version, cbcIv, 1, 0);
+ }
+ if (ret == 0) {
+ if (version != PKCS5v2) {
+ /* PBE algorithm */
+ idx += SetSequence(pbeLen, out + idx);
+ idx += SetObjectId(pbeOidSz, out + idx);
+ XMEMCPY(out + idx, pbeOid, pbeOidSz);
+ idx += pbeOidSz;
+ }
+ else {
+ /* PBES2 algorithm identifier */
+ idx += SetSequence(pbeLen, out + idx);
+ idx += SetObjectId(pbeOidSz, out + idx);
+ XMEMCPY(out + idx, pbeOid, pbeOidSz);
+ idx += pbeOidSz;
+ /* PBES2 Parameters: SEQ [ kdf ] SEQ [ enc ] */
+ idx += SetSequence(2 + kdfLen + 2 + encLen, out + idx);
+ /* KDF Algorithm Identifier */
+ idx += SetSequence(kdfLen, out + idx);
+ idx += SetObjectId(sizeof(pbkdf2Oid), out + idx);
+ XMEMCPY(out + idx, pbkdf2Oid, sizeof(pbkdf2Oid));
+ idx += sizeof(pbkdf2Oid);
+ }
+ idx += SetSequence(innerLen, out + idx);
+ idx += SetOctetString(saltSz, out + idx);
+ XMEMCPY(out + idx, salt, saltSz); idx += saltSz;
+ ret = SetShortInt(out, &idx, itt, *outSz);
+ if (ret > 0)
+ ret = 0;
+ }
+ if (ret == 0) {
+ if (version == PKCS5v2) {
+ /* Encryption Algorithm Identifier */
+ idx += SetSequence(encLen, out + idx);
+ idx += SetObjectId(encOidSz, out + idx);
+ XMEMCPY(out + idx, encOid, encOidSz);
+ idx += encOidSz;
+ /* Encryption Algorithm Parameter: CBC IV */
+ idx += SetOctetString(blockSz, out + idx);
+ XMEMCPY(out + idx, cbcIv, blockSz);
+ idx += blockSz;
+ }
+ idx += SetOctetString(pkcs8KeySz, out + idx);
+ /* Default PRF - no need to write out OID */
+ idx += pkcs8KeySz;
+
+ ret = idx;
}
+ if (pkcs8Key != NULL) {
+ ForceZero(pkcs8Key, pkcs8KeySz);
+ XFREE(pkcs8Key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
#ifdef WOLFSSL_SMALL_STACK
- XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (saltTmp != NULL) {
+ XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ }
#endif
- return 0;
+ (void)rng;
+
+ return ret;
}
+#endif /* HAVE_PKCS8 */
-/* Remove Encrypted PKCS8 header, move beginning of traditional to beginning
- of input */
-int ToTraditionalEnc(byte* input, word32 sz,const char* password,int passwordSz)
+#if defined(HAVE_PKCS8) || defined(HAVE_PKCS12)
+/* decrypt PKCS
+ *
+ * NOTE: input buffer is overwritten with decrypted data!
+ *
+ * input[in/out] data to decrypt and results are written to
+ * sz size of input buffer
+ * password password if used. Can be NULL for no password
+ * passwordSz size of password buffer
+ *
+ * returns the total size of decrypted content on success.
+ */
+int DecryptContent(byte* input, word32 sz, const char* password, int passwordSz)
{
- word32 inOutIdx = 0, oid;
- int first, second, length, version, saltSz, id;
- int iterations = 0;
+ word32 inOutIdx = 0, seqEnd, oid, shaOid = 0;
+ int ret = 0, first, second, length = 0, version, saltSz, id;
+ int iterations = 0, keySz = 0;
#ifdef WOLFSSL_SMALL_STACK
byte* salt = NULL;
byte* cbcIv = NULL;
@@ -1071,205 +3780,560 @@ int ToTraditionalEnc(byte* input, word32 sz,const char* password,int passwordSz)
byte salt[MAX_SALT_SIZE];
byte cbcIv[MAX_IV_SIZE];
#endif
-
- if (GetSequence(input, &inOutIdx, &length, sz) < 0)
- return ASN_PARSE_E;
+ byte tag;
- if (GetAlgoId(input, &inOutIdx, &oid, sz) < 0)
- return ASN_PARSE_E;
-
- first = input[inOutIdx - 2]; /* PKCS version alwyas 2nd to last byte */
+ if (passwordSz < 0) {
+ WOLFSSL_MSG("Bad password size");
+ return BAD_FUNC_ARG;
+ }
+
+ if (GetAlgoId(input, &inOutIdx, &oid, oidIgnoreType, sz) < 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_dc);
+ }
+
+ first = input[inOutIdx - 2]; /* PKCS version always 2nd to last byte */
second = input[inOutIdx - 1]; /* version.algo, algo id last byte */
- if (CheckAlgo(first, second, &id, &version) < 0)
- return ASN_INPUT_E; /* Algo ID error */
+ if (CheckAlgo(first, second, &id, &version, NULL) < 0) {
+ ERROR_OUT(ASN_INPUT_E, exit_dc); /* Algo ID error */
+ }
if (version == PKCS5v2) {
+ if (GetSequence(input, &inOutIdx, &length, sz) < 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_dc);
+ }
- if (GetSequence(input, &inOutIdx, &length, sz) < 0)
- return ASN_PARSE_E;
+ if (GetAlgoId(input, &inOutIdx, &oid, oidKdfType, sz) < 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_dc);
+ }
- if (GetAlgoId(input, &inOutIdx, &oid, sz) < 0)
- return ASN_PARSE_E;
+ if (oid != PBKDF2_OID) {
+ ERROR_OUT(ASN_PARSE_E, exit_dc);
+ }
+ }
- if (oid != PBKDF2_OID)
- return ASN_PARSE_E;
+ if (GetSequence(input, &inOutIdx, &length, sz) <= 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_dc);
}
+ /* Find the end of this SEQUENCE so we can check for the OPTIONAL and
+ * DEFAULT items. */
+ seqEnd = inOutIdx + length;
- if (GetSequence(input, &inOutIdx, &length, sz) < 0)
- return ASN_PARSE_E;
+ ret = GetOctetString(input, &inOutIdx, &saltSz, sz);
+ if (ret < 0)
+ goto exit_dc;
- if (input[inOutIdx++] != ASN_OCTET_STRING)
- return ASN_PARSE_E;
-
- if (GetLength(input, &inOutIdx, &saltSz, sz) < 0)
- return ASN_PARSE_E;
+ if (saltSz > MAX_SALT_SIZE) {
+ ERROR_OUT(ASN_PARSE_E, exit_dc);
+ }
- if (saltSz > MAX_SALT_SIZE)
- return ASN_PARSE_E;
-
#ifdef WOLFSSL_SMALL_STACK
salt = (byte*)XMALLOC(MAX_SALT_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (salt == NULL)
- return MEMORY_E;
+ if (salt == NULL) {
+ ERROR_OUT(MEMORY_E, exit_dc);
+ }
#endif
XMEMCPY(salt, &input[inOutIdx], saltSz);
inOutIdx += saltSz;
- if (GetShortInt(input, &inOutIdx, &iterations) < 0) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(salt, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
- return ASN_PARSE_E;
+ if (GetShortInt(input, &inOutIdx, &iterations, sz) < 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_dc);
+ }
+
+ /* OPTIONAL key length */
+ if (seqEnd > inOutIdx) {
+ word32 localIdx = inOutIdx;
+
+ if (GetASNTag(input, &localIdx, &tag, sz) < 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_dc);
+ }
+
+ if (tag == ASN_INTEGER &&
+ GetShortInt(input, &inOutIdx, &keySz, sz) < 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_dc);
+ }
+ }
+
+ /* DEFAULT HMAC is SHA-1 */
+ if (seqEnd > inOutIdx) {
+ if (GetAlgoId(input, &inOutIdx, &oid, oidHmacType, sz) < 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_dc);
+ }
+
+ shaOid = oid;
}
#ifdef WOLFSSL_SMALL_STACK
cbcIv = (byte*)XMALLOC(MAX_IV_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (cbcIv == NULL) {
- XFREE(salt, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- return MEMORY_E;
+ ERROR_OUT(MEMORY_E, exit_dc);
}
#endif
if (version == PKCS5v2) {
/* get encryption algo */
- if (GetAlgoId(input, &inOutIdx, &oid, sz) < 0) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(salt, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(cbcIv, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
- return ASN_PARSE_E;
+ if (GetAlgoId(input, &inOutIdx, &oid, oidBlkType, sz) < 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_dc);
}
- if (CheckAlgoV2(oid, &id) < 0) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(salt, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(cbcIv, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
- return ASN_PARSE_E; /* PKCS v2 algo id error */
+ if (CheckAlgoV2(oid, &id, NULL) < 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_dc); /* PKCS v2 algo id error */
}
- if (input[inOutIdx++] != ASN_OCTET_STRING) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(salt, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(cbcIv, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
- return ASN_PARSE_E;
- }
-
- if (GetLength(input, &inOutIdx, &length, sz) < 0) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(salt, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(cbcIv, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
- return ASN_PARSE_E;
+ if (shaOid == 0)
+ shaOid = oid;
+
+ ret = GetOctetString(input, &inOutIdx, &length, sz);
+ if (ret < 0)
+ goto exit_dc;
+
+ if (length > MAX_IV_SIZE) {
+ ERROR_OUT(ASN_PARSE_E, exit_dc);
}
XMEMCPY(cbcIv, &input[inOutIdx], length);
inOutIdx += length;
}
- if (input[inOutIdx++] != ASN_OCTET_STRING) {
+ if (GetASNTag(input, &inOutIdx, &tag, sz) < 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_dc);
+ }
+
+ if (tag != (ASN_CONTEXT_SPECIFIC | 0) && tag != ASN_OCTET_STRING) {
+ ERROR_OUT(ASN_PARSE_E, exit_dc);
+ }
+
+ if (GetLength(input, &inOutIdx, &length, sz) < 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_dc);
+ }
+
+ ret = wc_CryptKey(password, passwordSz, salt, saltSz, iterations, id,
+ input + inOutIdx, length, version, cbcIv, 0, shaOid);
+
+exit_dc:
#ifdef WOLFSSL_SMALL_STACK
- XFREE(salt, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(cbcIv, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(salt, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(cbcIv, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- return ASN_PARSE_E;
+
+ if (ret == 0) {
+ XMEMMOVE(input, input + inOutIdx, length);
+ ret = length;
}
- if (GetLength(input, &inOutIdx, &length, sz) < 0) {
+ return ret;
+}
+
+
+/* Remove Encrypted PKCS8 header, move beginning of traditional to beginning
+ of input */
+int ToTraditionalEnc(byte* input, word32 sz,const char* password,
+ int passwordSz, word32* algId)
+{
+ int ret, length;
+ word32 inOutIdx = 0;
+
+ if (GetSequence(input, &inOutIdx, &length, sz) < 0) {
+ ret = ASN_PARSE_E;
+ }
+ else {
+ ret = DecryptContent(input + inOutIdx, sz - inOutIdx, password,
+ passwordSz);
+ if (ret > 0) {
+ XMEMMOVE(input, input + inOutIdx, ret);
+ ret = ToTraditional_ex(input, ret, algId);
+ }
+ }
+
+ return ret;
+}
+
+#endif /* HAVE_PKCS8 || HAVE_PKCS12 */
+
+#ifdef HAVE_PKCS12
+
+/* encrypt PKCS 12 content
+ *
+ * NOTE: if out is NULL then outSz is set with the total buffer size needed and
+ * the error value LENGTH_ONLY_E is returned.
+ *
+ * input data to encrypt
+ * inputSz size of input buffer
+ * out buffer to hold the result
+ * outSz size of out buffer
+ * password password if used. Can be NULL for no password
+ * passwordSz size of password buffer
+ * vPKCS version of PKCS i.e. PKCS5v2
+ * vAlgo algorithm version
+ * salt buffer holding salt if used. If NULL then a random salt is created
+ * saltSz size of salt buffer if it is not NULL
+ * itt number of iterations used
+ * rng random number generator to use
+ * heap possible heap hint for mallocs/frees
+ *
+ * returns the total size of encrypted content on success.
+ *
+ * data returned is :
+ * [ seq - obj [ seq -salt,itt]] , construct with encrypted data
+ */
+int EncryptContent(byte* input, word32 inputSz, byte* out, word32* outSz,
+ const char* password, int passwordSz, int vPKCS, int vAlgo,
+ byte* salt, word32 saltSz, int itt, WC_RNG* rng, void* heap)
+{
+ word32 sz;
+ word32 inOutIdx = 0;
+ word32 tmpIdx = 0;
+ word32 totalSz = 0;
+ word32 seqSz;
+ word32 innerSz;
+ int ret;
+ int version, id, blockSz = 0;
#ifdef WOLFSSL_SMALL_STACK
- XFREE(salt, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(cbcIv, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ byte* saltTmp = NULL;
+ byte* cbcIv = NULL;
+#else
+ byte saltTmp[MAX_SALT_SIZE];
+ byte cbcIv[MAX_IV_SIZE];
#endif
+ byte seq[MAX_SEQ_SZ];
+ byte shr[MAX_SHORT_SZ];
+ word32 maxShr = MAX_SHORT_SZ;
+ word32 algoSz;
+ const byte* algoName;
+
+ (void)heap;
+
+ WOLFSSL_ENTER("EncryptContent()");
+
+ if (CheckAlgo(vPKCS, vAlgo, &id, &version, &blockSz) < 0)
+ return ASN_INPUT_E; /* Algo ID error */
+
+ if (version == PKCS5v2) {
+ WOLFSSL_MSG("PKCS#5 version 2 not supported yet");
+ return BAD_FUNC_ARG;
+ }
+
+ if (saltSz > MAX_SALT_SIZE)
return ASN_PARSE_E;
+
+ if (outSz == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* calculate size */
+ /* size of constructed string at end */
+ sz = Pkcs8Pad(NULL, inputSz, blockSz);
+ totalSz = ASN_TAG_SZ;
+ totalSz += SetLength(sz, seq);
+ totalSz += sz;
+
+ /* size of sequence holding object id and sub sequence of salt and itt */
+ algoName = OidFromId(id, oidPBEType, &algoSz);
+ if (algoName == NULL) {
+ WOLFSSL_MSG("Unknown Algorithm");
+ return 0;
+ }
+ innerSz = SetObjectId(algoSz, seq);
+ innerSz += algoSz;
+
+ /* get subsequence of salt and itt */
+ if (salt == NULL || saltSz == 0) {
+ sz = 8;
+ }
+ else {
+ sz = saltSz;
+ }
+ seqSz = SetOctetString(sz, seq);
+ seqSz += sz;
+
+ tmpIdx = 0;
+ seqSz += SetShortInt(shr, &tmpIdx, itt, maxShr);
+ innerSz += seqSz + SetSequence(seqSz, seq);
+ totalSz += innerSz + SetSequence(innerSz, seq);
+
+ if (out == NULL) {
+ *outSz = totalSz;
+ return LENGTH_ONLY_E;
+ }
+
+ inOutIdx = 0;
+ if (totalSz > *outSz)
+ return BUFFER_E;
+
+ inOutIdx += SetSequence(innerSz, out + inOutIdx);
+ inOutIdx += SetObjectId(algoSz, out + inOutIdx);
+ XMEMCPY(out + inOutIdx, algoName, algoSz);
+ inOutIdx += algoSz;
+ inOutIdx += SetSequence(seqSz, out + inOutIdx);
+
+ /* create random salt if one not provided */
+ if (salt == NULL || saltSz == 0) {
+ saltSz = 8;
+ #ifdef WOLFSSL_SMALL_STACK
+ saltTmp = (byte*)XMALLOC(saltSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (saltTmp == NULL)
+ return MEMORY_E;
+ #endif
+ salt = saltTmp;
+
+ if ((ret = wc_RNG_GenerateBlock(rng, saltTmp, saltSz)) != 0) {
+ WOLFSSL_MSG("Error generating random salt");
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return ret;
+ }
+ }
+ inOutIdx += SetOctetString(saltSz, out + inOutIdx);
+ if (saltSz + inOutIdx > *outSz) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return BUFFER_E;
}
+ XMEMCPY(out + inOutIdx, salt, saltSz);
+ inOutIdx += saltSz;
- if (DecryptKey(password, passwordSz, salt, saltSz, iterations, id,
- input + inOutIdx, length, version, cbcIv) < 0) {
+ /* place iteration setting in buffer */
+ ret = SetShortInt(out, &inOutIdx, itt, *outSz);
+ if (ret < 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return ret;
+ }
+
+ if (inOutIdx + 1 > *outSz) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return BUFFER_E;
+ }
+ out[inOutIdx++] = ASN_CONTEXT_SPECIFIC | 0;
+
+ /* get pad size and verify buffer room */
+ sz = Pkcs8Pad(NULL, inputSz, blockSz);
+ if (sz + inOutIdx > *outSz) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return BUFFER_E;
+ }
+ inOutIdx += SetLength(sz, out + inOutIdx);
+
+ /* copy input to output buffer and pad end */
+ XMEMCPY(out + inOutIdx, input, inputSz);
+ sz = Pkcs8Pad(out + inOutIdx, inputSz, blockSz);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(salt, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(cbcIv, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ cbcIv = (byte*)XMALLOC(MAX_IV_SIZE, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (cbcIv == NULL) {
+ XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return MEMORY_E;
+ }
#endif
- return ASN_INPUT_E; /* decrypt failure */
+
+ /* encrypt */
+ if ((ret = wc_CryptKey(password, passwordSz, salt, saltSz, itt, id,
+ out + inOutIdx, sz, version, cbcIv, 1, 0)) < 0) {
+
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(cbcIv, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return ret; /* encrypt failure */
}
#ifdef WOLFSSL_SMALL_STACK
- XFREE(salt, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(cbcIv, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(cbcIv, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(saltTmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- XMEMMOVE(input, input + inOutIdx, length);
- return ToTraditional(input, length);
+ (void)rng;
+
+ return inOutIdx + sz;
}
+
+#endif /* HAVE_PKCS12 */
#endif /* NO_PWDBASED */
#ifndef NO_RSA
-int wc_RsaPublicKeyDecode(const byte* input, word32* inOutIdx, RsaKey* key,
- word32 inSz)
+#ifndef HAVE_USER_RSA
+#ifdef WOLFSSL_RENESAS_TSIP
+/* This function is to retrieve key position information in a cert.*
+ * The information will be used to call TSIP TLS-linked API for *
+ * certificate verification. */
+static int RsaPublicKeyDecodeRawIndex(const byte* input, word32* inOutIdx,
+ word32 inSz, word32* key_n,
+ word32* key_n_len, word32* key_e,
+ word32* key_e_len)
{
- int length;
+
+ int ret = 0;
+ int length = 0;
+#if defined(OPENSSL_EXTRA) || defined(RSA_DECODE_EXTRA)
+ byte b;
+#endif
+
+ if (input == NULL || inOutIdx == NULL)
+ return BAD_FUNC_ARG;
if (GetSequence(input, inOutIdx, &length, inSz) < 0)
return ASN_PARSE_E;
- key->type = RSA_PUBLIC;
-
#if defined(OPENSSL_EXTRA) || defined(RSA_DECODE_EXTRA)
- {
- byte b = input[*inOutIdx];
+ if ((*inOutIdx + 1) > inSz)
+ return BUFFER_E;
+
+ b = input[*inOutIdx];
if (b != ASN_INTEGER) {
/* not from decoded cert, will have algo id, skip past */
if (GetSequence(input, inOutIdx, &length, inSz) < 0)
return ASN_PARSE_E;
-
- b = input[(*inOutIdx)++];
- if (b != ASN_OBJECT_ID)
- return ASN_OBJECT_ID_E;
-
- if (GetLength(input, inOutIdx, &length, inSz) < 0)
+
+ if (SkipObjectId(input, inOutIdx, inSz) < 0)
return ASN_PARSE_E;
-
- *inOutIdx += length; /* skip past */
-
- /* could have NULL tag and 0 terminator, but may not */
- b = input[(*inOutIdx)++];
-
- if (b == ASN_TAG_NULL) {
- b = input[(*inOutIdx)++];
- if (b != 0)
- return ASN_EXPECT_0_E;
+
+ /* Option NULL ASN.1 tag */
+ if (*inOutIdx >= inSz) {
+ return BUFFER_E;
}
- else
- /* go back, didn't have it */
- (*inOutIdx)--;
-
+ if (input[*inOutIdx] == ASN_TAG_NULL) {
+ ret = GetASNNull(input, inOutIdx, inSz);
+ if (ret != 0)
+ return ret;
+ }
+
/* should have bit tag length and seq next */
- b = input[(*inOutIdx)++];
- if (b != ASN_BIT_STRING)
- return ASN_BITSTR_E;
-
- if (GetLength(input, inOutIdx, &length, inSz) < 0)
+ ret = CheckBitString(input, inOutIdx, NULL, inSz, 1, NULL);
+ if (ret != 0)
+ return ret;
+
+ if (GetSequence(input, inOutIdx, &length, inSz) < 0)
return ASN_PARSE_E;
-
- /* could have 0 */
- b = input[(*inOutIdx)++];
- if (b != 0)
- (*inOutIdx)--;
-
+ }
+#endif /* OPENSSL_EXTRA */
+
+ /* Get modulus */
+ ret = GetASNInt(input, inOutIdx, &length, inSz);
+ *key_n += *inOutIdx;
+ if (ret < 0) {
+ return ASN_RSA_KEY_E;
+ }
+ if (key_n_len)
+ *key_n_len = length;
+ *inOutIdx += length;
+
+ /* Get exponent */
+ ret = GetASNInt(input, inOutIdx, &length, inSz);
+ *key_e += *inOutIdx;
+ if (ret < 0) {
+ return ASN_RSA_KEY_E;
+ }
+ if (key_e_len)
+ *key_e_len = length;
+
+ return ret;
+}
+#endif /* WOLFSSL_RENESAS_TSIP */
+
+int wc_RsaPublicKeyDecode_ex(const byte* input, word32* inOutIdx, word32 inSz,
+ const byte** n, word32* nSz, const byte** e, word32* eSz)
+{
+ int ret = 0;
+ int length = 0;
+#if defined(OPENSSL_EXTRA) || defined(RSA_DECODE_EXTRA)
+ word32 localIdx;
+ byte tag;
+#endif
+
+ if (input == NULL || inOutIdx == NULL)
+ return BAD_FUNC_ARG;
+
+ if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+ return ASN_PARSE_E;
+
+#if defined(OPENSSL_EXTRA) || defined(RSA_DECODE_EXTRA)
+ localIdx = *inOutIdx;
+ if (GetASNTag(input, &localIdx, &tag, inSz) < 0)
+ return BUFFER_E;
+
+ if (tag != ASN_INTEGER) {
+ /* not from decoded cert, will have algo id, skip past */
if (GetSequence(input, inOutIdx, &length, inSz) < 0)
return ASN_PARSE_E;
- } /* end if */
- } /* openssl var block */
+
+ if (SkipObjectId(input, inOutIdx, inSz) < 0)
+ return ASN_PARSE_E;
+
+ /* Option NULL ASN.1 tag */
+ if (*inOutIdx >= inSz) {
+ return BUFFER_E;
+ }
+
+ localIdx = *inOutIdx;
+ if (GetASNTag(input, &localIdx, &tag, inSz) < 0)
+ return ASN_PARSE_E;
+
+ if (tag == ASN_TAG_NULL) {
+ ret = GetASNNull(input, inOutIdx, inSz);
+ if (ret != 0)
+ return ret;
+ }
+
+ /* should have bit tag length and seq next */
+ ret = CheckBitString(input, inOutIdx, NULL, inSz, 1, NULL);
+ if (ret != 0)
+ return ret;
+
+ if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+ return ASN_PARSE_E;
+ }
#endif /* OPENSSL_EXTRA */
- if (GetInt(&key->n, input, inOutIdx, inSz) < 0 ||
- GetInt(&key->e, input, inOutIdx, inSz) < 0 ) return ASN_RSA_KEY_E;
+ /* Get modulus */
+ ret = GetASNInt(input, inOutIdx, &length, inSz);
+ if (ret < 0) {
+ return ASN_RSA_KEY_E;
+ }
+ if (nSz)
+ *nSz = length;
+ if (n)
+ *n = &input[*inOutIdx];
+ *inOutIdx += length;
- return 0;
+ /* Get exponent */
+ ret = GetASNInt(input, inOutIdx, &length, inSz);
+ if (ret < 0) {
+ return ASN_RSA_KEY_E;
+ }
+ if (eSz)
+ *eSz = length;
+ if (e)
+ *e = &input[*inOutIdx];
+ *inOutIdx += length;
+
+ return ret;
+}
+
+int wc_RsaPublicKeyDecode(const byte* input, word32* inOutIdx, RsaKey* key,
+ word32 inSz)
+{
+ int ret;
+ const byte *n = NULL, *e = NULL;
+ word32 nSz = 0, eSz = 0;
+
+ if (key == NULL)
+ return BAD_FUNC_ARG;
+
+ ret = wc_RsaPublicKeyDecode_ex(input, inOutIdx, inSz, &n, &nSz, &e, &eSz);
+ if (ret == 0) {
+ ret = wc_RsaPublicKeyDecodeRaw(n, nSz, e, eSz, key);
+ }
+
+ return ret;
}
/* import RSA public key elements (n, e) into RsaKey structure (key) */
@@ -1288,6 +4352,12 @@ int wc_RsaPublicKeyDecodeRaw(const byte* n, word32 nSz, const byte* e,
mp_clear(&key->n);
return ASN_GETINT_E;
}
+#ifdef HAVE_WOLF_BIGINT
+ if ((int)nSz > 0 && wc_bigint_from_unsigned_bin(&key->n.raw, n, nSz) != 0) {
+ mp_clear(&key->n);
+ return ASN_GETINT_E;
+ }
+#endif /* HAVE_WOLF_BIGINT */
if (mp_init(&key->e) != MP_OKAY) {
mp_clear(&key->n);
@@ -1299,76 +4369,143 @@ int wc_RsaPublicKeyDecodeRaw(const byte* n, word32 nSz, const byte* e,
mp_clear(&key->e);
return ASN_GETINT_E;
}
+#ifdef HAVE_WOLF_BIGINT
+ if ((int)eSz > 0 && wc_bigint_from_unsigned_bin(&key->e.raw, e, eSz) != 0) {
+ mp_clear(&key->n);
+ mp_clear(&key->e);
+ return ASN_GETINT_E;
+ }
+#endif /* HAVE_WOLF_BIGINT */
+
+#ifdef WOLFSSL_XILINX_CRYPT
+ if (wc_InitRsaHw(key) != 0) {
+ return BAD_STATE_E;
+ }
+#endif
return 0;
}
-
-#endif
+#endif /* HAVE_USER_RSA */
+#endif /* !NO_RSA */
#ifndef NO_DH
int wc_DhKeyDecode(const byte* input, word32* inOutIdx, DhKey* key, word32 inSz)
{
- int length;
+ int ret = 0;
+ int length;
+ #if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+ word32 oid = 0, temp = 0;
+ #endif
+
+ WOLFSSL_ENTER("wc_DhKeyDecode");
+
+ if (inOutIdx == NULL)
+ return BAD_FUNC_ARG;
if (GetSequence(input, inOutIdx, &length, inSz) < 0)
return ASN_PARSE_E;
+ #if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+ temp = *inOutIdx;
+ #endif
+
+ /* Assume input started after 1.2.840.113549.1.3.1 dhKeyAgreement */
if (GetInt(&key->p, input, inOutIdx, inSz) < 0 ||
- GetInt(&key->g, input, inOutIdx, inSz) < 0 ) return ASN_DH_KEY_E;
+ GetInt(&key->g, input, inOutIdx, inSz) < 0) {
+ ret = ASN_DH_KEY_E;
+ }
- return 0;
+ #if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+ /* If ASN_DH_KEY_E: Check if input started at beginning of key */
+ if (ret == ASN_DH_KEY_E) {
+ /* rewind back to after the first sequence */
+ *inOutIdx = temp;
+ if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+ return ASN_PARSE_E;
+
+ /* Check for dhKeyAgreement */
+ ret = GetObjectId(input, inOutIdx, &oid, oidKeyType, inSz);
+ if (oid != DHk || ret < 0)
+ return ASN_DH_KEY_E;
+
+ if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+ return ASN_PARSE_E;
+
+ if (GetInt(&key->p, input, inOutIdx, inSz) < 0 ||
+ GetInt(&key->g, input, inOutIdx, inSz) < 0) {
+ return ASN_DH_KEY_E;
+ }
+ }
+
+ temp = *inOutIdx;
+ ret = (CheckBitString(input, inOutIdx, &length, inSz, 0, NULL) == 0);
+ if (ret > 0) {
+ /* Found Bit String */
+ if (GetInt(&key->pub, input, inOutIdx, inSz) == 0) {
+ WOLFSSL_MSG("Found Public Key");
+ ret = 0;
+ }
+ } else {
+ *inOutIdx = temp;
+ ret = (GetOctetString(input, inOutIdx, &length, inSz) >= 0);
+ if (ret > 0) {
+ /* Found Octet String */
+ if (GetInt(&key->priv, input, inOutIdx, inSz) == 0) {
+ WOLFSSL_MSG("Found Private Key");
+ ret = 0;
+ }
+ } else {
+ /* Don't use length from failed CheckBitString/GetOctetString */
+ *inOutIdx = temp;
+ ret = 0;
+ }
+ }
+ #endif /* WOLFSSL_QT || OPENSSL_ALL */
+
+ WOLFSSL_MSG("wc_DhKeyDecode Success");
+
+ return ret;
}
int wc_DhParamsLoad(const byte* input, word32 inSz, byte* p, word32* pInOutSz,
byte* g, word32* gInOutSz)
{
- word32 i = 0;
- byte b;
+ word32 idx = 0;
+ int ret;
int length;
- if (GetSequence(input, &i, &length, inSz) < 0)
- return ASN_PARSE_E;
-
- b = input[i++];
- if (b != ASN_INTEGER)
- return ASN_PARSE_E;
-
- if (GetLength(input, &i, &length, inSz) < 0)
+ if (GetSequence(input, &idx, &length, inSz) <= 0)
return ASN_PARSE_E;
- if ( (b = input[i++]) == 0x00)
- length--;
- else
- i--;
+ ret = GetASNInt(input, &idx, &length, inSz);
+ if (ret != 0)
+ return ret;
if (length <= (int)*pInOutSz) {
- XMEMCPY(p, &input[i], length);
+ XMEMCPY(p, &input[idx], length);
*pInOutSz = length;
}
- else
+ else {
return BUFFER_E;
+ }
+ idx += length;
- i += length;
-
- b = input[i++];
- if (b != ASN_INTEGER)
- return ASN_PARSE_E;
-
- if (GetLength(input, &i, &length, inSz) < 0)
- return ASN_PARSE_E;
+ ret = GetASNInt(input, &idx, &length, inSz);
+ if (ret != 0)
+ return ret;
if (length <= (int)*gInOutSz) {
- XMEMCPY(g, &input[i], length);
+ XMEMCPY(g, &input[idx], length);
*gInOutSz = length;
}
- else
+ else {
return BUFFER_E;
+ }
return 0;
}
-
#endif /* NO_DH */
@@ -1378,6 +4515,11 @@ int DsaPublicKeyDecode(const byte* input, word32* inOutIdx, DsaKey* key,
word32 inSz)
{
int length;
+ int ret = 0;
+ word32 oid;
+
+ if (input == NULL || inOutIdx == NULL || key == NULL)
+ return BAD_FUNC_ARG;
if (GetSequence(input, inOutIdx, &length, inSz) < 0)
return ASN_PARSE_E;
@@ -1385,148 +4527,431 @@ int DsaPublicKeyDecode(const byte* input, word32* inOutIdx, DsaKey* key,
if (GetInt(&key->p, input, inOutIdx, inSz) < 0 ||
GetInt(&key->q, input, inOutIdx, inSz) < 0 ||
GetInt(&key->g, input, inOutIdx, inSz) < 0 ||
- GetInt(&key->y, input, inOutIdx, inSz) < 0 ) return ASN_DH_KEY_E;
+ GetInt(&key->y, input, inOutIdx, inSz) < 0 )
+ ret = ASN_DH_KEY_E;
+
+ if (ret != 0) {
+ if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+ return ASN_PARSE_E;
+
+ ret = GetObjectId(input, inOutIdx, &oid, oidIgnoreType, inSz);
+ if (ret != 0)
+ return ret;
+
+ if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+ return ASN_PARSE_E;
+
+ if (GetInt(&key->p, input, inOutIdx, inSz) < 0 ||
+ GetInt(&key->q, input, inOutIdx, inSz) < 0 ||
+ GetInt(&key->g, input, inOutIdx, inSz) < 0)
+ return ASN_DH_KEY_E;
+
+ if (CheckBitString(input, inOutIdx, &length, inSz, 0, NULL) < 0)
+ return ASN_PARSE_E;
+
+ if (GetInt(&key->y, input, inOutIdx, inSz) < 0 )
+ return ASN_DH_KEY_E;
+
+ ret = 0;
+ }
key->type = DSA_PUBLIC;
- return 0;
+ return ret;
}
int DsaPrivateKeyDecode(const byte* input, word32* inOutIdx, DsaKey* key,
word32 inSz)
{
- int length, version;
+ int length, version, ret = 0, temp = 0;
+
+ /* Sanity checks on input */
+ if (input == NULL || inOutIdx == NULL || key == NULL) {
+ return BAD_FUNC_ARG;
+ }
if (GetSequence(input, inOutIdx, &length, inSz) < 0)
return ASN_PARSE_E;
- if (GetMyVersion(input, inOutIdx, &version) < 0)
- return ASN_PARSE_E;
+ temp = (int)*inOutIdx;
- if (GetInt(&key->p, input, inOutIdx, inSz) < 0 ||
- GetInt(&key->q, input, inOutIdx, inSz) < 0 ||
- GetInt(&key->g, input, inOutIdx, inSz) < 0 ||
- GetInt(&key->y, input, inOutIdx, inSz) < 0 ||
- GetInt(&key->x, input, inOutIdx, inSz) < 0 ) return ASN_DH_KEY_E;
+ /* Default case expects a certificate with OctetString but no version ID */
+ ret = GetInt(&key->p, input, inOutIdx, inSz);
+ if (ret < 0) {
+ mp_clear(&key->p);
+ ret = ASN_PARSE_E;
+ }
+ else {
+ ret = GetInt(&key->q, input, inOutIdx, inSz);
+ if (ret < 0) {
+ mp_clear(&key->p);
+ mp_clear(&key->q);
+ ret = ASN_PARSE_E;
+ }
+ else {
+ ret = GetInt(&key->g, input, inOutIdx, inSz);
+ if (ret < 0) {
+ mp_clear(&key->p);
+ mp_clear(&key->q);
+ mp_clear(&key->g);
+ ret = ASN_PARSE_E;
+ }
+ else {
+ ret = GetOctetString(input, inOutIdx, &length, inSz);
+ if (ret < 0) {
+ mp_clear(&key->p);
+ mp_clear(&key->q);
+ mp_clear(&key->g);
+ ret = ASN_PARSE_E;
+ }
+ else {
+ ret = GetInt(&key->y, input, inOutIdx, inSz);
+ if (ret < 0) {
+ mp_clear(&key->p);
+ mp_clear(&key->q);
+ mp_clear(&key->g);
+ mp_clear(&key->y);
+ ret = ASN_PARSE_E;
+ }
+ }
+ }
+ }
+ }
+ /* An alternate pass if default certificate fails parsing */
+ if (ret == ASN_PARSE_E) {
+ *inOutIdx = temp;
+ if (GetMyVersion(input, inOutIdx, &version, inSz) < 0)
+ return ASN_PARSE_E;
+
+ if (GetInt(&key->p, input, inOutIdx, inSz) < 0 ||
+ GetInt(&key->q, input, inOutIdx, inSz) < 0 ||
+ GetInt(&key->g, input, inOutIdx, inSz) < 0 ||
+ GetInt(&key->y, input, inOutIdx, inSz) < 0 ||
+ GetInt(&key->x, input, inOutIdx, inSz) < 0 )
+ return ASN_DH_KEY_E;
+ }
key->type = DSA_PRIVATE;
return 0;
}
-#endif /* NO_DSA */
+static mp_int* GetDsaInt(DsaKey* key, int idx)
+{
+ if (idx == 0)
+ return &key->p;
+ if (idx == 1)
+ return &key->q;
+ if (idx == 2)
+ return &key->g;
+ if (idx == 3)
+ return &key->y;
+ if (idx == 4)
+ return &key->x;
+
+ return NULL;
+}
+
+/* Release Tmp DSA resources */
+static WC_INLINE void FreeTmpDsas(byte** tmps, void* heap)
+{
+ int i;
+
+ for (i = 0; i < DSA_INTS; i++)
+ XFREE(tmps[i], heap, DYNAMIC_TYPE_DSA);
+ (void)heap;
+}
-void InitDecodedCert(DecodedCert* cert, byte* source, word32 inSz, void* heap)
+#if !defined(HAVE_SELFTEST) && defined(WOLFSSL_KEY_GEN)
+/* Write a public DSA key to output */
+int wc_SetDsaPublicKey(byte* output, DsaKey* key,
+ int outLen, int with_header)
{
- cert->publicKey = 0;
- cert->pubKeySize = 0;
- cert->pubKeyStored = 0;
- cert->version = 0;
- cert->signature = 0;
- cert->subjectCN = 0;
- cert->subjectCNLen = 0;
- cert->subjectCNEnc = CTC_UTF8;
- cert->subjectCNStored = 0;
- cert->weOwnAltNames = 0;
- cert->altNames = NULL;
-#ifndef IGNORE_NAME_CONSTRAINTS
- cert->altEmailNames = NULL;
- cert->permittedNames = NULL;
- cert->excludedNames = NULL;
-#endif /* IGNORE_NAME_CONSTRAINTS */
- cert->issuer[0] = '\0';
- cert->subject[0] = '\0';
- cert->source = source; /* don't own */
- cert->srcIdx = 0;
- cert->maxIdx = inSz; /* can't go over this index */
- cert->heap = heap;
- XMEMSET(cert->serial, 0, EXTERNAL_SERIAL_SIZE);
- cert->serialSz = 0;
- cert->extensions = 0;
- cert->extensionsSz = 0;
- cert->extensionsIdx = 0;
- cert->extAuthInfo = NULL;
- cert->extAuthInfoSz = 0;
- cert->extCrlInfo = NULL;
- cert->extCrlInfoSz = 0;
- XMEMSET(cert->extSubjKeyId, 0, KEYID_SIZE);
- cert->extSubjKeyIdSet = 0;
- XMEMSET(cert->extAuthKeyId, 0, KEYID_SIZE);
- cert->extAuthKeyIdSet = 0;
- cert->extKeyUsageSet = 0;
- cert->extKeyUsage = 0;
- cert->extExtKeyUsageSet = 0;
- cert->extExtKeyUsage = 0;
- cert->isCA = 0;
-#ifdef HAVE_PKCS7
- cert->issuerRaw = NULL;
- cert->issuerRawLen = 0;
+ /* p, g, q = DSA params, y = public exponent */
+#ifdef WOLFSSL_SMALL_STACK
+ byte* p = NULL;
+ byte* g = NULL;
+ byte* q = NULL;
+ byte* y = NULL;
+#else
+ byte p[MAX_DSA_INT_SZ];
+ byte g[MAX_DSA_INT_SZ];
+ byte q[MAX_DSA_INT_SZ];
+ byte y[MAX_DSA_INT_SZ];
#endif
-#ifdef WOLFSSL_CERT_GEN
- cert->subjectSN = 0;
- cert->subjectSNLen = 0;
- cert->subjectSNEnc = CTC_UTF8;
- cert->subjectC = 0;
- cert->subjectCLen = 0;
- cert->subjectCEnc = CTC_PRINTABLE;
- cert->subjectL = 0;
- cert->subjectLLen = 0;
- cert->subjectLEnc = CTC_UTF8;
- cert->subjectST = 0;
- cert->subjectSTLen = 0;
- cert->subjectSTEnc = CTC_UTF8;
- cert->subjectO = 0;
- cert->subjectOLen = 0;
- cert->subjectOEnc = CTC_UTF8;
- cert->subjectOU = 0;
- cert->subjectOULen = 0;
- cert->subjectOUEnc = CTC_UTF8;
- cert->subjectEmail = 0;
- cert->subjectEmailLen = 0;
-#endif /* WOLFSSL_CERT_GEN */
- cert->beforeDate = NULL;
- cert->beforeDateLen = 0;
- cert->afterDate = NULL;
- cert->afterDateLen = 0;
-#ifdef OPENSSL_EXTRA
- XMEMSET(&cert->issuerName, 0, sizeof(DecodedName));
- XMEMSET(&cert->subjectName, 0, sizeof(DecodedName));
- cert->extBasicConstSet = 0;
- cert->extBasicConstCrit = 0;
- cert->extBasicConstPlSet = 0;
- cert->pathLength = 0;
- cert->extSubjAltNameSet = 0;
- cert->extSubjAltNameCrit = 0;
- cert->extAuthKeyIdCrit = 0;
- cert->extSubjKeyIdCrit = 0;
- cert->extKeyUsageCrit = 0;
- cert->extExtKeyUsageCrit = 0;
- cert->extExtKeyUsageSrc = NULL;
- cert->extExtKeyUsageSz = 0;
- cert->extExtKeyUsageCount = 0;
- cert->extAuthKeyIdSrc = NULL;
- cert->extAuthKeyIdSz = 0;
- cert->extSubjKeyIdSrc = NULL;
- cert->extSubjKeyIdSz = 0;
-#endif /* OPENSSL_EXTRA */
-#if defined(OPENSSL_EXTRA) || !defined(IGNORE_NAME_CONSTRAINTS)
- cert->extNameConstraintSet = 0;
-#endif /* OPENSSL_EXTRA || !IGNORE_NAME_CONSTRAINTS */
-#ifdef HAVE_ECC
- cert->pkCurveOID = 0;
-#endif /* HAVE_ECC */
-#ifdef WOLFSSL_SEP
- cert->deviceTypeSz = 0;
- cert->deviceType = NULL;
- cert->hwTypeSz = 0;
- cert->hwType = NULL;
- cert->hwSerialNumSz = 0;
- cert->hwSerialNum = NULL;
- #ifdef OPENSSL_EXTRA
- cert->extCertPolicySet = 0;
- cert->extCertPolicyCrit = 0;
- #endif /* OPENSSL_EXTRA */
-#endif /* WOLFSSL_SEP */
+ byte innerSeq[MAX_SEQ_SZ];
+ byte outerSeq[MAX_SEQ_SZ];
+ byte bitString[1 + MAX_LENGTH_SZ + 1];
+ int idx, pSz, gSz, qSz, ySz, innerSeqSz, outerSeqSz, bitStringSz = 0;
+
+ WOLFSSL_ENTER("wc_SetDsaPublicKey");
+
+ if (output == NULL || key == NULL || outLen < MAX_SEQ_SZ) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* p */
+#ifdef WOLFSSL_SMALL_STACK
+ p = (byte*)XMALLOC(MAX_DSA_INT_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (p == NULL)
+ return MEMORY_E;
+#endif
+ if ((pSz = SetASNIntMP(&key->p, MAX_DSA_INT_SZ, p)) < 0) {
+ WOLFSSL_MSG("SetASNIntMP Error with p");
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(p, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return pSz;
+ }
+
+ /* q */
+#ifdef WOLFSSL_SMALL_STACK
+ q = (byte*)XMALLOC(MAX_DSA_INT_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (q == NULL)
+ return MEMORY_E;
+#endif
+ if ((qSz = SetASNIntMP(&key->q, MAX_DSA_INT_SZ, q)) < 0) {
+ WOLFSSL_MSG("SetASNIntMP Error with q");
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(p, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(q, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return qSz;
+ }
+
+ /* g */
+#ifdef WOLFSSL_SMALL_STACK
+ g = (byte*)XMALLOC(MAX_DSA_INT_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (g == NULL)
+ return MEMORY_E;
+#endif
+ if ((gSz = SetASNIntMP(&key->g, MAX_DSA_INT_SZ, g)) < 0) {
+ WOLFSSL_MSG("SetASNIntMP Error with g");
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(p, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(q, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(g, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return gSz;
+ }
+
+ /* y */
+#ifdef WOLFSSL_SMALL_STACK
+ y = (byte*)XMALLOC(MAX_DSA_INT_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (y == NULL)
+ return MEMORY_E;
+#endif
+ if ((ySz = SetASNIntMP(&key->y, MAX_DSA_INT_SZ, y)) < 0) {
+ WOLFSSL_MSG("SetASNIntMP Error with y");
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(p, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(q, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(g, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(y, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return ySz;
+ }
+
+ innerSeqSz = SetSequence(pSz + qSz + gSz, innerSeq);
+
+ /* check output size */
+ if ((innerSeqSz + pSz + qSz + gSz) > outLen) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(p, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(q, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(g, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(y, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ WOLFSSL_MSG("Error, output size smaller than outlen");
+ return BUFFER_E;
+ }
+
+ if (with_header) {
+ int algoSz;
+#ifdef WOLFSSL_SMALL_STACK
+ byte* algo = NULL;
+
+ algo = (byte*)XMALLOC(MAX_ALGO_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (algo == NULL) {
+ XFREE(p, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(q, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(g, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(y, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return MEMORY_E;
+ }
+#else
+ byte algo[MAX_ALGO_SZ];
+#endif
+ algoSz = SetAlgoID(DSAk, algo, oidKeyType, 0);
+ bitStringSz = SetBitString(ySz, 0, bitString);
+ outerSeqSz = SetSequence(algoSz + innerSeqSz + pSz + qSz + gSz,
+ outerSeq);
+
+ idx = SetSequence(algoSz + innerSeqSz + pSz + qSz + gSz + bitStringSz +
+ ySz + outerSeqSz, output);
+
+ /* check output size */
+ if ((idx + algoSz + bitStringSz + innerSeqSz + pSz + qSz + gSz + ySz) >
+ outLen) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(p, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(q, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(g, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(y, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(algo, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ WOLFSSL_MSG("Error, output size smaller than outlen");
+ return BUFFER_E;
+ }
+
+ /* outerSeq */
+ XMEMCPY(output + idx, outerSeq, outerSeqSz);
+ idx += outerSeqSz;
+ /* algo */
+ XMEMCPY(output + idx, algo, algoSz);
+ idx += algoSz;
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(algo, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ } else {
+ idx = 0;
+ }
+
+ /* innerSeq */
+ XMEMCPY(output + idx, innerSeq, innerSeqSz);
+ idx += innerSeqSz;
+ /* p */
+ XMEMCPY(output + idx, p, pSz);
+ idx += pSz;
+ /* q */
+ XMEMCPY(output + idx, q, qSz);
+ idx += qSz;
+ /* g */
+ XMEMCPY(output + idx, g, gSz);
+ idx += gSz;
+ /* bit string */
+ XMEMCPY(output + idx, bitString, bitStringSz);
+ idx += bitStringSz;
+ /* y */
+ XMEMCPY(output + idx, y, ySz);
+ idx += ySz;
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(p, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(q, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(g, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(y, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return idx;
+}
+
+/* Convert DSA Public key to DER format, write to output (inLen), return bytes
+ written */
+int wc_DsaKeyToPublicDer(DsaKey* key, byte* output, word32 inLen)
+{
+ return wc_SetDsaPublicKey(output, key, inLen, 1);
+}
+#endif /* !HAVE_SELFTEST && WOLFSSL_KEY_GEN */
+
+/* Convert private DsaKey key to DER format, write to output (inLen),
+ return bytes written */
+int wc_DsaKeyToDer(DsaKey* key, byte* output, word32 inLen)
+{
+ word32 seqSz, verSz, rawLen, intTotalLen = 0;
+ word32 sizes[DSA_INTS];
+ int i, j, outLen, ret = 0, mpSz;
+
+ byte seq[MAX_SEQ_SZ];
+ byte ver[MAX_VERSION_SZ];
+ byte* tmps[DSA_INTS];
+
+ if (!key || !output)
+ return BAD_FUNC_ARG;
+
+ if (key->type != DSA_PRIVATE)
+ return BAD_FUNC_ARG;
+
+ for (i = 0; i < DSA_INTS; i++)
+ tmps[i] = NULL;
+
+ /* write all big ints from key to DER tmps */
+ for (i = 0; i < DSA_INTS; i++) {
+ mp_int* keyInt = GetDsaInt(key, i);
+
+ rawLen = mp_unsigned_bin_size(keyInt) + 1;
+ tmps[i] = (byte*)XMALLOC(rawLen + MAX_SEQ_SZ, key->heap,
+ DYNAMIC_TYPE_DSA);
+ if (tmps[i] == NULL) {
+ ret = MEMORY_E;
+ break;
+ }
+
+ mpSz = SetASNIntMP(keyInt, -1, tmps[i]);
+ if (mpSz < 0) {
+ ret = mpSz;
+ break;
+ }
+ intTotalLen += (sizes[i] = mpSz);
+ }
+
+ if (ret != 0) {
+ FreeTmpDsas(tmps, key->heap);
+ return ret;
+ }
+
+ /* make headers */
+ verSz = SetMyVersion(0, ver, FALSE);
+ seqSz = SetSequence(verSz + intTotalLen, seq);
+
+ outLen = seqSz + verSz + intTotalLen;
+ if (outLen > (int)inLen) {
+ FreeTmpDsas(tmps, key->heap);
+ return BAD_FUNC_ARG;
+ }
+
+ /* write to output */
+ XMEMCPY(output, seq, seqSz);
+ j = seqSz;
+ XMEMCPY(output + j, ver, verSz);
+ j += verSz;
+
+ for (i = 0; i < DSA_INTS; i++) {
+ XMEMCPY(output + j, tmps[i], sizes[i]);
+ j += sizes[i];
+ }
+ FreeTmpDsas(tmps, key->heap);
+
+ return outLen;
+}
+
+#endif /* NO_DSA */
+
+void InitDecodedCert(DecodedCert* cert,
+ const byte* source, word32 inSz, void* heap)
+{
+ if (cert != NULL) {
+ XMEMSET(cert, 0, sizeof(DecodedCert));
+
+ cert->subjectCNEnc = CTC_UTF8;
+ cert->issuer[0] = '\0';
+ cert->subject[0] = '\0';
+ cert->source = source; /* don't own */
+ cert->maxIdx = inSz; /* can't go over this index */
+ cert->heap = heap;
+ cert->maxPathLen = WOLFSSL_MAX_PATH_LEN;
+ #ifdef WOLFSSL_CERT_GEN
+ cert->subjectSNEnc = CTC_UTF8;
+ cert->subjectCEnc = CTC_PRINTABLE;
+ cert->subjectLEnc = CTC_UTF8;
+ cert->subjectSTEnc = CTC_UTF8;
+ cert->subjectOEnc = CTC_UTF8;
+ cert->subjectOUEnc = CTC_UTF8;
+ #endif /* WOLFSSL_CERT_GEN */
+
+ #ifndef NO_CERTS
+ InitSignatureCtx(&cert->sigCtx, heap, INVALID_DEVID);
+ #endif
+ }
}
@@ -1562,10 +4987,12 @@ void FreeNameSubtrees(Base_entry* names, void* heap)
void FreeDecodedCert(DecodedCert* cert)
{
+ if (cert == NULL)
+ return;
if (cert->subjectCNStored == 1)
XFREE(cert->subjectCN, cert->heap, DYNAMIC_TYPE_SUBJECT_CN);
if (cert->pubKeyStored == 1)
- XFREE(cert->publicKey, cert->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+ XFREE((void*)cert->publicKey, cert->heap, DYNAMIC_TYPE_PUBLIC_KEY);
if (cert->weOwnAltNames && cert->altNames)
FreeAltNames(cert->altNames, cert->heap);
#ifndef IGNORE_NAME_CONSTRAINTS
@@ -1577,126 +5004,92 @@ void FreeDecodedCert(DecodedCert* cert)
FreeNameSubtrees(cert->excludedNames, cert->heap);
#endif /* IGNORE_NAME_CONSTRAINTS */
#ifdef WOLFSSL_SEP
- XFREE(cert->deviceType, cert->heap, 0);
- XFREE(cert->hwType, cert->heap, 0);
- XFREE(cert->hwSerialNum, cert->heap, 0);
+ XFREE(cert->deviceType, cert->heap, DYNAMIC_TYPE_X509_EXT);
+ XFREE(cert->hwType, cert->heap, DYNAMIC_TYPE_X509_EXT);
+ XFREE(cert->hwSerialNum, cert->heap, DYNAMIC_TYPE_X509_EXT);
#endif /* WOLFSSL_SEP */
-#ifdef OPENSSL_EXTRA
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
if (cert->issuerName.fullName != NULL)
- XFREE(cert->issuerName.fullName, NULL, DYNAMIC_TYPE_X509);
+ XFREE(cert->issuerName.fullName, cert->heap, DYNAMIC_TYPE_X509);
if (cert->subjectName.fullName != NULL)
- XFREE(cert->subjectName.fullName, NULL, DYNAMIC_TYPE_X509);
+ XFREE(cert->subjectName.fullName, cert->heap, DYNAMIC_TYPE_X509);
#endif /* OPENSSL_EXTRA */
+#ifdef WOLFSSL_RENESAS_TSIP_TLS
+ if (cert->tsip_encRsaKeyIdx != NULL)
+ XFREE(cert->tsip_encRsaKeyIdx, cert->heap, DYNAMIC_TYPE_RSA);
+#endif
+#ifndef NO_CERTS
+ FreeSignatureCtx(&cert->sigCtx);
+#endif
}
-
static int GetCertHeader(DecodedCert* cert)
{
int ret = 0, len;
- byte serialTmp[EXTERNAL_SERIAL_SIZE];
-#if defined(WOLFSSL_SMALL_STACK) && defined(USE_FAST_MATH)
- mp_int* mpi = NULL;
-#else
- mp_int stack_mpi;
- mp_int* mpi = &stack_mpi;
-#endif
if (GetSequence(cert->source, &cert->srcIdx, &len, cert->maxIdx) < 0)
return ASN_PARSE_E;
+ /* Reset the max index for the size indicated in the outer wrapper. */
+ cert->maxIdx = len + cert->srcIdx;
cert->certBegin = cert->srcIdx;
if (GetSequence(cert->source, &cert->srcIdx, &len, cert->maxIdx) < 0)
return ASN_PARSE_E;
- cert->sigIndex = len + cert->srcIdx;
- if (GetExplicitVersion(cert->source, &cert->srcIdx, &cert->version) < 0)
+ cert->sigIndex = len + cert->srcIdx;
+ if (cert->sigIndex > cert->maxIdx)
return ASN_PARSE_E;
-#if defined(WOLFSSL_SMALL_STACK) && defined(USE_FAST_MATH)
- mpi = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (mpi == NULL)
- return MEMORY_E;
-#endif
-
- if (GetInt(mpi, cert->source, &cert->srcIdx, cert->maxIdx) < 0) {
-#if defined(WOLFSSL_SMALL_STACK) && defined(USE_FAST_MATH)
- XFREE(mpi, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+ if (GetExplicitVersion(cert->source, &cert->srcIdx, &cert->version,
+ cert->sigIndex) < 0)
return ASN_PARSE_E;
- }
- len = mp_unsigned_bin_size(mpi);
- if (len < (int)sizeof(serialTmp)) {
- if ( (ret = mp_to_unsigned_bin(mpi, serialTmp)) == MP_OKAY) {
- XMEMCPY(cert->serial, serialTmp, len);
- cert->serialSz = len;
- }
- }
- mp_clear(mpi);
-
-#if defined(WOLFSSL_SMALL_STACK) && defined(USE_FAST_MATH)
- XFREE(mpi, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+ if (GetSerialNumber(cert->source, &cert->srcIdx, cert->serial,
+ &cert->serialSz, cert->sigIndex) < 0)
+ return ASN_PARSE_E;
return ret;
}
#if !defined(NO_RSA)
/* Store Rsa Key, may save later, Dsa could use in future */
-static int StoreRsaKey(DecodedCert* cert)
+static int StoreRsaKey(DecodedCert* cert, word32 bitStringEnd)
{
int length;
word32 recvd = cert->srcIdx;
- if (GetSequence(cert->source, &cert->srcIdx, &length, cert->maxIdx) < 0)
+ if (GetSequence(cert->source, &cert->srcIdx, &length, bitStringEnd) < 0)
return ASN_PARSE_E;
-
+
recvd = cert->srcIdx - recvd;
length += recvd;
while (recvd--)
cert->srcIdx--;
-
+#if defined(WOLFSSL_RENESAS_TSIP)
+ cert->sigCtx.pubkey_n_start = cert->sigCtx.pubkey_e_start = cert->srcIdx;
+#endif
cert->pubKeySize = length;
cert->publicKey = cert->source + cert->srcIdx;
cert->srcIdx += length;
return 0;
}
-#endif
-
+#endif /* !NO_RSA */
#ifdef HAVE_ECC
- /* return 0 on sucess if the ECC curve oid sum is supported */
+ /* return 0 on success if the ECC curve oid sum is supported */
static int CheckCurve(word32 oid)
{
int ret = 0;
+ word32 oidSz = 0;
- switch (oid) {
-#if defined(HAVE_ALL_CURVES) || defined(HAVE_ECC160)
- case ECC_160R1:
-#endif
-#if defined(HAVE_ALL_CURVES) || defined(HAVE_ECC192)
- case ECC_192R1:
-#endif
-#if defined(HAVE_ALL_CURVES) || defined(HAVE_ECC224)
- case ECC_224R1:
-#endif
-#if defined(HAVE_ALL_CURVES) || !defined(NO_ECC256)
- case ECC_256R1:
-#endif
-#if defined(HAVE_ALL_CURVES) || defined(HAVE_ECC384)
- case ECC_384R1:
-#endif
-#if defined(HAVE_ALL_CURVES) || defined(HAVE_ECC521)
- case ECC_521R1:
-#endif
- break;
-
- default:
- ret = ALGO_ID_E;
+ ret = wc_ecc_get_oid(oid, NULL, &oidSz);
+ if (ret < 0 || oidSz == 0) {
+ WOLFSSL_MSG("CheckCurve not found");
+ ret = ALGO_ID_E;
}
return ret;
@@ -1704,35 +5097,46 @@ static int StoreRsaKey(DecodedCert* cert)
#endif /* HAVE_ECC */
-
static int GetKey(DecodedCert* cert)
{
int length;
-#ifdef HAVE_NTRU
+#if !defined(NO_DSA) && defined(WOLFSSL_QT)
+ int tmpLen;
+#endif
+#if defined(HAVE_ECC) || defined(HAVE_NTRU)
int tmpIdx = cert->srcIdx;
#endif
if (GetSequence(cert->source, &cert->srcIdx, &length, cert->maxIdx) < 0)
return ASN_PARSE_E;
-
- if (GetAlgoId(cert->source, &cert->srcIdx, &cert->keyOID, cert->maxIdx) < 0)
+
+#if !defined(NO_DSA) && defined(WOLFSSL_QT)
+ tmpLen = length + 4;
+#endif
+
+ if (GetAlgoId(cert->source, &cert->srcIdx,
+ &cert->keyOID, oidKeyType, cert->maxIdx) < 0)
return ASN_PARSE_E;
switch (cert->keyOID) {
#ifndef NO_RSA
case RSAk:
{
- byte b = cert->source[cert->srcIdx++];
- if (b != ASN_BIT_STRING)
- return ASN_BITSTR_E;
+ int ret;
- if (GetLength(cert->source,&cert->srcIdx,&length,cert->maxIdx) < 0)
- return ASN_PARSE_E;
- b = cert->source[cert->srcIdx++];
- if (b != 0x00)
- return ASN_EXPECT_0_E;
-
- return StoreRsaKey(cert);
+ ret = CheckBitString(cert->source, &cert->srcIdx, &length,
+ cert->maxIdx, 1, NULL);
+ if (ret != 0)
+ return ret;
+
+ #ifdef HAVE_OCSP
+ ret = CalcHashId(cert->source + cert->srcIdx, length,
+ cert->subjectKeyHash);
+ if (ret != 0)
+ return ret;
+ #endif
+
+ return StoreRsaKey(cert, cert->srcIdx + length);
}
#endif /* NO_RSA */
@@ -1744,6 +5148,7 @@ static int GetKey(DecodedCert* cert)
word16 keyLen;
word32 rc;
word32 remaining = cert->maxIdx - cert->srcIdx;
+ byte* publicKey;
#ifdef WOLFSSL_SMALL_STACK
byte* keyBlob = NULL;
#else
@@ -1757,8 +5162,8 @@ static int GetKey(DecodedCert* cert)
return ASN_NTRU_KEY_E;
#ifdef WOLFSSL_SMALL_STACK
- keyBlob = (byte*)XMALLOC(MAX_NTRU_KEY_SZ, NULL,
- DYNAMIC_TYPE_TMP_BUFFER);
+ keyBlob = (byte*)XMALLOC(MAX_NTRU_KEY_SZ, cert->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
if (keyBlob == NULL)
return MEMORY_E;
#endif
@@ -1767,34 +5172,35 @@ static int GetKey(DecodedCert* cert)
&keyLen, keyBlob, &next, &remaining);
if (rc != NTRU_OK) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(keyBlob, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyBlob, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return ASN_NTRU_KEY_E;
}
if ( (next - key) < 0) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(keyBlob, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyBlob, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return ASN_NTRU_KEY_E;
}
cert->srcIdx = tmpIdx + (int)(next - key);
- cert->publicKey = (byte*) XMALLOC(keyLen, cert->heap,
- DYNAMIC_TYPE_PUBLIC_KEY);
- if (cert->publicKey == NULL) {
+ publicKey = (byte*)XMALLOC(keyLen, cert->heap,
+ DYNAMIC_TYPE_PUBLIC_KEY);
+ if (publicKey == NULL) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(keyBlob, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyBlob, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return MEMORY_E;
}
- XMEMCPY(cert->publicKey, keyBlob, keyLen);
+ XMEMCPY(publicKey, keyBlob, keyLen);
+ cert->publicKey = publicKey;
cert->pubKeyStored = 1;
cert->pubKeySize = keyLen;
#ifdef WOLFSSL_SMALL_STACK
- XFREE(keyBlob, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyBlob, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return 0;
@@ -1803,40 +5209,78 @@ static int GetKey(DecodedCert* cert)
#ifdef HAVE_ECC
case ECDSAk:
{
- int oidSz = 0;
- byte b = cert->source[cert->srcIdx++];
-
- if (b != ASN_OBJECT_ID)
- return ASN_OBJECT_ID_E;
-
- if (GetLength(cert->source,&cert->srcIdx,&oidSz,cert->maxIdx) < 0)
+ int ret;
+ byte seq[5];
+ int pubLen = length + 1 + SetLength(length, seq);
+ word32 localIdx;
+ byte* publicKey;
+ byte tag;
+
+ localIdx = cert->srcIdx;
+ if (GetASNTag(cert->source, &localIdx, &tag, cert->maxIdx) < 0)
return ASN_PARSE_E;
- while(oidSz--)
- cert->pkCurveOID += cert->source[cert->srcIdx++];
+ if (tag != (ASN_SEQUENCE | ASN_CONSTRUCTED)) {
+ if (GetObjectId(cert->source, &cert->srcIdx,
+ &cert->pkCurveOID, oidCurveType, cert->maxIdx) < 0)
+ return ASN_PARSE_E;
- if (CheckCurve(cert->pkCurveOID) < 0)
- return ECC_CURVE_OID_E;
+ if (CheckCurve(cert->pkCurveOID) < 0)
+ return ECC_CURVE_OID_E;
+
+ /* key header */
+ ret = CheckBitString(cert->source, &cert->srcIdx, &length,
+ cert->maxIdx, 1, NULL);
+ if (ret != 0)
+ return ret;
+ #ifdef HAVE_OCSP
+ ret = CalcHashId(cert->source + cert->srcIdx, length,
+ cert->subjectKeyHash);
+ if (ret != 0)
+ return ret;
+ #endif
+ }
- /* key header */
- b = cert->source[cert->srcIdx++];
- if (b != ASN_BIT_STRING)
- return ASN_BITSTR_E;
+ publicKey = (byte*)XMALLOC(pubLen, cert->heap,
+ DYNAMIC_TYPE_PUBLIC_KEY);
+ if (publicKey == NULL)
+ return MEMORY_E;
+ XMEMCPY(publicKey, &cert->source[tmpIdx], pubLen);
+ cert->publicKey = publicKey;
+ cert->pubKeyStored = 1;
+ cert->pubKeySize = pubLen;
- if (GetLength(cert->source,&cert->srcIdx,&length,cert->maxIdx) < 0)
- return ASN_PARSE_E;
- b = cert->source[cert->srcIdx++];
- if (b != 0x00)
- return ASN_EXPECT_0_E;
+ cert->srcIdx = tmpIdx + pubLen;
- /* actual key, use length - 1 since ate preceding 0 */
- length -= 1;
+ return 0;
+ }
+ #endif /* HAVE_ECC */
+ #ifdef HAVE_ED25519
+ case ED25519k:
+ {
+ byte* publicKey;
+ int ret;
+
+ cert->pkCurveOID = ED25519k;
- cert->publicKey = (byte*) XMALLOC(length, cert->heap,
- DYNAMIC_TYPE_PUBLIC_KEY);
- if (cert->publicKey == NULL)
+ ret = CheckBitString(cert->source, &cert->srcIdx, &length,
+ cert->maxIdx, 1, NULL);
+ if (ret != 0)
+ return ret;
+
+ #ifdef HAVE_OCSP
+ ret = CalcHashId(cert->source + cert->srcIdx, length,
+ cert->subjectKeyHash);
+ if (ret != 0)
+ return ret;
+ #endif
+
+ publicKey = (byte*) XMALLOC(length, cert->heap,
+ DYNAMIC_TYPE_PUBLIC_KEY);
+ if (publicKey == NULL)
return MEMORY_E;
- XMEMCPY(cert->publicKey, &cert->source[cert->srcIdx], length);
+ XMEMCPY(publicKey, &cert->source[cert->srcIdx], length);
+ cert->publicKey = publicKey;
cert->pubKeyStored = 1;
cert->pubKeySize = length;
@@ -1844,25 +5288,238 @@ static int GetKey(DecodedCert* cert)
return 0;
}
- #endif /* HAVE_ECC */
+ #endif /* HAVE_ED25519 */
+ #ifdef HAVE_ED448
+ case ED448k:
+ {
+ byte* publicKey;
+ int ret;
+
+ cert->pkCurveOID = ED448k;
+
+ ret = CheckBitString(cert->source, &cert->srcIdx, &length,
+ cert->maxIdx, 1, NULL);
+ if (ret != 0)
+ return ret;
+
+ #ifdef HAVE_OCSP
+ ret = CalcHashId(cert->source + cert->srcIdx, length,
+ cert->subjectKeyHash);
+ if (ret != 0)
+ return ret;
+ #endif
+
+ publicKey = (byte*) XMALLOC(length, cert->heap,
+ DYNAMIC_TYPE_PUBLIC_KEY);
+ if (publicKey == NULL)
+ return MEMORY_E;
+ XMEMCPY(publicKey, &cert->source[cert->srcIdx], length);
+ cert->publicKey = publicKey;
+ cert->pubKeyStored = 1;
+ cert->pubKeySize = length;
+
+ cert->srcIdx += length;
+
+ return 0;
+ }
+ #endif /* HAVE_ED448 */
+ #if !defined(NO_DSA) && defined(WOLFSSL_QT)
+ case DSAk:
+ {
+ int ret;
+ ret = GetSequence(cert->source, &cert->srcIdx, &length,
+ cert->maxIdx);
+ if (ret < 0)
+ return ret;
+
+ ret = SkipInt(cert->source, &cert->srcIdx, cert->maxIdx);
+ if (ret != 0)
+ return ret;
+ ret = SkipInt(cert->source, &cert->srcIdx, cert->maxIdx);
+ if (ret != 0)
+ return ret;
+ ret = SkipInt(cert->source, &cert->srcIdx, cert->maxIdx);
+ if (ret != 0)
+ return ret;
+
+ ret = CheckBitString(cert->source, &cert->srcIdx, &length,
+ cert->maxIdx, 1, NULL);
+ if (ret != 0)
+ return ret;
+
+ ret = GetASNInt(cert->source, &cert->srcIdx, &length, cert->maxIdx);
+ if (ret !=0)
+ return ASN_PARSE_E;
+
+ cert->publicKey = cert->source + tmpIdx;
+ cert->pubKeySize = tmpLen;
+ cert->srcIdx += length;
+ return 0;
+ }
+ #endif /* NO_DSA && QT */
default:
return ASN_UNKNOWN_OID_E;
}
}
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+#if defined(HAVE_ECC)
+/* Converts ECC curve enum values in ecc_curve_id to the associated OpenSSL NID
+ value */
+WOLFSSL_API int EccEnumToNID(int n)
+{
+ WOLFSSL_ENTER("EccEnumToNID()");
+
+ switch(n) {
+ case ECC_SECP192R1:
+ return NID_X9_62_prime192v1;
+ case ECC_PRIME192V2:
+ return NID_X9_62_prime192v2;
+ case ECC_PRIME192V3:
+ return NID_X9_62_prime192v3;
+ case ECC_PRIME239V1:
+ return NID_X9_62_prime239v1;
+ case ECC_PRIME239V2:
+ return NID_X9_62_prime239v2;
+ case ECC_PRIME239V3:
+ return NID_X9_62_prime239v3;
+ case ECC_SECP256R1:
+ return NID_X9_62_prime256v1;
+ case ECC_SECP112R1:
+ return NID_secp112r1;
+ case ECC_SECP112R2:
+ return NID_secp112r2;
+ case ECC_SECP128R1:
+ return NID_secp128r1;
+ case ECC_SECP128R2:
+ return NID_secp128r2;
+ case ECC_SECP160R1:
+ return NID_secp160r1;
+ case ECC_SECP160R2:
+ return NID_secp160r2;
+ case ECC_SECP224R1:
+ return NID_secp224r1;
+ case ECC_SECP384R1:
+ return NID_secp384r1;
+ case ECC_SECP521R1:
+ return NID_secp521r1;
+ case ECC_SECP160K1:
+ return NID_secp160k1;
+ case ECC_SECP192K1:
+ return NID_secp192k1;
+ case ECC_SECP224K1:
+ return NID_secp224k1;
+ case ECC_SECP256K1:
+ return NID_secp256k1;
+ case ECC_BRAINPOOLP160R1:
+ return NID_brainpoolP160r1;
+ case ECC_BRAINPOOLP192R1:
+ return NID_brainpoolP192r1;
+ case ECC_BRAINPOOLP224R1:
+ return NID_brainpoolP224r1;
+ case ECC_BRAINPOOLP256R1:
+ return NID_brainpoolP256r1;
+ case ECC_BRAINPOOLP320R1:
+ return NID_brainpoolP320r1;
+ case ECC_BRAINPOOLP384R1:
+ return NID_brainpoolP384r1;
+ case ECC_BRAINPOOLP512R1:
+ return NID_brainpoolP512r1;
+ default:
+ WOLFSSL_MSG("NID not found");
+ return -1;
+ }
+}
+#endif /* HAVE_ECC */
+#endif /* OPENSSL_EXTRA || OPENSSL_EXTRA_X509_SMALL */
+
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+int wc_OBJ_sn2nid(const char *sn)
+{
+ const struct {
+ const char *sn;
+ int nid;
+ } sn2nid[] = {
+ {WOLFSSL_COMMON_NAME, NID_commonName},
+ {WOLFSSL_COUNTRY_NAME, NID_countryName},
+ {WOLFSSL_LOCALITY_NAME, NID_localityName},
+ {WOLFSSL_STATE_NAME, NID_stateOrProvinceName},
+ {WOLFSSL_ORG_NAME, NID_organizationName},
+ {WOLFSSL_ORGUNIT_NAME, NID_organizationalUnitName},
+ {WOLFSSL_EMAIL_ADDR, NID_emailAddress},
+ {NULL, -1}};
+
+ int i;
+ #ifdef HAVE_ECC
+ int eccEnum;
+ #endif
+ WOLFSSL_ENTER("OBJ_sn2nid");
+ for(i=0; sn2nid[i].sn != NULL; i++) {
+ if(XSTRNCMP(sn, sn2nid[i].sn, XSTRLEN(sn2nid[i].sn)) == 0) {
+ return sn2nid[i].nid;
+ }
+ }
+ #ifdef HAVE_ECC
+ /* Nginx uses this OpenSSL string. */
+ if (XSTRNCMP(sn, "prime256v1", 10) == 0)
+ sn = "SECP256R1";
+ if (XSTRNCMP(sn, "secp384r1", 10) == 0)
+ sn = "SECP384R1";
+ /* find based on name and return NID */
+ for (i = 0; ecc_sets[i].size != 0 && ecc_sets[i].name != NULL; i++) {
+ if (XSTRNCMP(sn, ecc_sets[i].name, ECC_MAXNAME) == 0) {
+ eccEnum = ecc_sets[i].id;
+ /* Convert enum value in ecc_curve_id to OpenSSL NID */
+ return EccEnumToNID(eccEnum);
+ }
+ }
+ #endif
+
+ return NID_undef;
+}
+#endif
+
+/* Routine for calculating hashId */
+int CalcHashId(const byte* data, word32 len, byte* hash)
+{
+ int ret;
+
+#ifdef WOLF_CRYPTO_CB
+ /* try to use a registered crypto callback */
+ ret = wc_CryptoCb_Sha256Hash(NULL, data, len, hash);
+ if (ret != CRYPTOCB_UNAVAILABLE)
+ return ret;
+ /* fall-through when unavailable */
+#endif
+
+#if defined(NO_SHA) && !defined(NO_SHA256)
+ ret = wc_Sha256Hash(data, len, hash);
+#elif !defined(NO_SHA)
+ ret = wc_ShaHash(data, len, hash);
+#else
+ ret = NOT_COMPILED_IN;
+#endif
+
+ return ret;
+}
/* process NAME, either issuer or subject */
-static int GetName(DecodedCert* cert, int nameType)
+static int GetName(DecodedCert* cert, int nameType, int maxIdx)
{
int length; /* length of all distinguished names */
int dummy;
int ret;
char* full;
byte* hash;
- word32 idx;
- #ifdef OPENSSL_EXTRA
+ word32 idx, localIdx = 0;
+ byte tag;
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
DecodedName* dName =
(nameType == ISSUER) ? &cert->issuerName : &cert->subjectName;
+ int dcnum = 0;
+ #ifdef OPENSSL_EXTRA
+ int count = 0;
+ #endif
#endif /* OPENSSL_EXTRA */
WOLFSSL_MSG("Getting Cert Name");
@@ -1876,13 +5533,20 @@ static int GetName(DecodedCert* cert, int nameType)
hash = cert->subjectHash;
}
- if (cert->source[cert->srcIdx] == ASN_OBJECT_ID) {
+ if (cert->srcIdx >= (word32)maxIdx) {
+ return BUFFER_E;
+ }
+
+ localIdx = cert->srcIdx;
+ if (GetASNTag(cert->source, &localIdx, &tag, maxIdx) < 0) {
+ return ASN_PARSE_E;
+ }
+
+ if (tag == ASN_OBJECT_ID) {
WOLFSSL_MSG("Trying optional prefix...");
- if (GetLength(cert->source, &cert->srcIdx, &length, cert->maxIdx) < 0)
+ if (SkipObjectId(cert->source, &cert->srcIdx, maxIdx) < 0)
return ASN_PARSE_E;
-
- cert->srcIdx += length;
WOLFSSL_MSG("Got optional prefix");
}
@@ -1890,21 +5554,17 @@ static int GetName(DecodedCert* cert, int nameType)
* calculated over the entire DER encoding of the Name field, including
* the tag and length. */
idx = cert->srcIdx;
- if (GetSequence(cert->source, &cert->srcIdx, &length, cert->maxIdx) < 0)
+ if (GetSequence(cert->source, &cert->srcIdx, &length, maxIdx) < 0)
return ASN_PARSE_E;
-#ifdef NO_SHA
- ret = wc_Sha256Hash(&cert->source[idx], length + cert->srcIdx - idx, hash);
-#else
- ret = wc_ShaHash(&cert->source[idx], length + cert->srcIdx - idx, hash);
-#endif
+ ret = CalcHashId(&cert->source[idx], length + cert->srcIdx - idx, hash);
if (ret != 0)
return ret;
length += cert->srcIdx;
idx = 0;
-#ifdef HAVE_PKCS7
+#if defined(HAVE_PKCS7) || defined(WOLFSSL_CERT_EXT)
/* store pointer to raw issuer */
if (nameType == ISSUER) {
cert->issuerRaw = &cert->source[cert->srcIdx];
@@ -1919,45 +5579,39 @@ static int GetName(DecodedCert* cert, int nameType)
#endif
while (cert->srcIdx < (word32)length) {
- byte b;
- byte joint[2];
- byte tooBig = FALSE;
- int oidSz;
-
- if (GetSet(cert->source, &cert->srcIdx, &dummy, cert->maxIdx) < 0) {
+ byte b = 0;
+ byte joint[3];
+ byte tooBig = FALSE;
+ int oidSz;
+ const char* copy = NULL;
+ int copyLen = 0;
+ int strLen = 0;
+ byte id = 0;
+
+ if (GetSet(cert->source, &cert->srcIdx, &dummy, maxIdx) < 0) {
WOLFSSL_MSG("Cert name lacks set header, trying sequence");
}
- if (GetSequence(cert->source, &cert->srcIdx, &dummy, cert->maxIdx) < 0)
+ if (GetSequence(cert->source, &cert->srcIdx, &dummy, maxIdx) <= 0)
return ASN_PARSE_E;
- b = cert->source[cert->srcIdx++];
- if (b != ASN_OBJECT_ID)
- return ASN_OBJECT_ID_E;
+ ret = GetASNObjectId(cert->source, &cert->srcIdx, &oidSz, maxIdx);
+ if (ret != 0)
+ return ret;
- if (GetLength(cert->source, &cert->srcIdx, &oidSz, cert->maxIdx) < 0)
+ /* make sure there is room for joint */
+ if ((cert->srcIdx + sizeof(joint)) > (word32)maxIdx)
return ASN_PARSE_E;
XMEMCPY(joint, &cert->source[cert->srcIdx], sizeof(joint));
/* v1 name types */
if (joint[0] == 0x55 && joint[1] == 0x04) {
- byte id;
- byte copy = FALSE;
- int strLen;
-
- cert->srcIdx += 2;
- id = cert->source[cert->srcIdx++];
- b = cert->source[cert->srcIdx++]; /* encoding */
-
- if (GetLength(cert->source, &cert->srcIdx, &strLen,
- cert->maxIdx) < 0)
+ cert->srcIdx += 3;
+ id = joint[2];
+ if (GetHeader(cert->source, &b, &cert->srcIdx, &strLen,
+ maxIdx, 1) < 0) {
return ASN_PARSE_E;
-
- if ( (strLen + 14) > (int)(ASN_NAME_MAX - idx)) {
- /* include biggest pre fix header too 4 = "/serialNumber=" */
- WOLFSSL_MSG("ASN Name too big, skipping");
- tooBig = TRUE;
}
if (id == ASN_COMMON_NAME) {
@@ -1967,22 +5621,16 @@ static int GetName(DecodedCert* cert, int nameType)
cert->subjectCNEnc = b;
}
- if (!tooBig) {
- XMEMCPY(&full[idx], "/CN=", 4);
- idx += 4;
- copy = TRUE;
- }
- #ifdef OPENSSL_EXTRA
+ copy = WOLFSSL_COMMON_NAME;
+ copyLen = sizeof(WOLFSSL_COMMON_NAME) - 1;
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
dName->cnIdx = cert->srcIdx;
dName->cnLen = strLen;
#endif /* OPENSSL_EXTRA */
}
else if (id == ASN_SUR_NAME) {
- if (!tooBig) {
- XMEMCPY(&full[idx], "/SN=", 4);
- idx += 4;
- copy = TRUE;
- }
+ copy = WOLFSSL_SUR_NAME;
+ copyLen = sizeof(WOLFSSL_SUR_NAME) - 1;
#ifdef WOLFSSL_CERT_GEN
if (nameType == SUBJECT) {
cert->subjectSN = (char*)&cert->source[cert->srcIdx];
@@ -1990,17 +5638,14 @@ static int GetName(DecodedCert* cert, int nameType)
cert->subjectSNEnc = b;
}
#endif /* WOLFSSL_CERT_GEN */
- #ifdef OPENSSL_EXTRA
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
dName->snIdx = cert->srcIdx;
dName->snLen = strLen;
#endif /* OPENSSL_EXTRA */
}
else if (id == ASN_COUNTRY_NAME) {
- if (!tooBig) {
- XMEMCPY(&full[idx], "/C=", 3);
- idx += 3;
- copy = TRUE;
- }
+ copy = WOLFSSL_COUNTRY_NAME;
+ copyLen = sizeof(WOLFSSL_COUNTRY_NAME) - 1;
#ifdef WOLFSSL_CERT_GEN
if (nameType == SUBJECT) {
cert->subjectC = (char*)&cert->source[cert->srcIdx];
@@ -2008,17 +5653,14 @@ static int GetName(DecodedCert* cert, int nameType)
cert->subjectCEnc = b;
}
#endif /* WOLFSSL_CERT_GEN */
- #ifdef OPENSSL_EXTRA
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
dName->cIdx = cert->srcIdx;
dName->cLen = strLen;
#endif /* OPENSSL_EXTRA */
}
else if (id == ASN_LOCALITY_NAME) {
- if (!tooBig) {
- XMEMCPY(&full[idx], "/L=", 3);
- idx += 3;
- copy = TRUE;
- }
+ copy = WOLFSSL_LOCALITY_NAME;
+ copyLen = sizeof(WOLFSSL_LOCALITY_NAME) - 1;
#ifdef WOLFSSL_CERT_GEN
if (nameType == SUBJECT) {
cert->subjectL = (char*)&cert->source[cert->srcIdx];
@@ -2026,17 +5668,14 @@ static int GetName(DecodedCert* cert, int nameType)
cert->subjectLEnc = b;
}
#endif /* WOLFSSL_CERT_GEN */
- #ifdef OPENSSL_EXTRA
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
dName->lIdx = cert->srcIdx;
dName->lLen = strLen;
#endif /* OPENSSL_EXTRA */
}
else if (id == ASN_STATE_NAME) {
- if (!tooBig) {
- XMEMCPY(&full[idx], "/ST=", 4);
- idx += 4;
- copy = TRUE;
- }
+ copy = WOLFSSL_STATE_NAME;
+ copyLen = sizeof(WOLFSSL_STATE_NAME) - 1;
#ifdef WOLFSSL_CERT_GEN
if (nameType == SUBJECT) {
cert->subjectST = (char*)&cert->source[cert->srcIdx];
@@ -2044,17 +5683,14 @@ static int GetName(DecodedCert* cert, int nameType)
cert->subjectSTEnc = b;
}
#endif /* WOLFSSL_CERT_GEN */
- #ifdef OPENSSL_EXTRA
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
dName->stIdx = cert->srcIdx;
dName->stLen = strLen;
#endif /* OPENSSL_EXTRA */
}
else if (id == ASN_ORG_NAME) {
- if (!tooBig) {
- XMEMCPY(&full[idx], "/O=", 3);
- idx += 3;
- copy = TRUE;
- }
+ copy = WOLFSSL_ORG_NAME;
+ copyLen = sizeof(WOLFSSL_ORG_NAME) - 1;
#ifdef WOLFSSL_CERT_GEN
if (nameType == SUBJECT) {
cert->subjectO = (char*)&cert->source[cert->srcIdx];
@@ -2062,17 +5698,14 @@ static int GetName(DecodedCert* cert, int nameType)
cert->subjectOEnc = b;
}
#endif /* WOLFSSL_CERT_GEN */
- #ifdef OPENSSL_EXTRA
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
dName->oIdx = cert->srcIdx;
dName->oLen = strLen;
#endif /* OPENSSL_EXTRA */
}
else if (id == ASN_ORGUNIT_NAME) {
- if (!tooBig) {
- XMEMCPY(&full[idx], "/OU=", 4);
- idx += 4;
- copy = TRUE;
- }
+ copy = WOLFSSL_ORGUNIT_NAME;
+ copyLen = sizeof(WOLFSSL_ORGUNIT_NAME) - 1;
#ifdef WOLFSSL_CERT_GEN
if (nameType == SUBJECT) {
cert->subjectOU = (char*)&cert->source[cert->srcIdx];
@@ -2080,75 +5713,148 @@ static int GetName(DecodedCert* cert, int nameType)
cert->subjectOUEnc = b;
}
#endif /* WOLFSSL_CERT_GEN */
- #ifdef OPENSSL_EXTRA
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
dName->ouIdx = cert->srcIdx;
dName->ouLen = strLen;
#endif /* OPENSSL_EXTRA */
}
else if (id == ASN_SERIAL_NUMBER) {
- if (!tooBig) {
- XMEMCPY(&full[idx], "/serialNumber=", 14);
- idx += 14;
- copy = TRUE;
- }
- #ifdef OPENSSL_EXTRA
+ copy = WOLFSSL_SERIAL_NUMBER;
+ copyLen = sizeof(WOLFSSL_SERIAL_NUMBER) - 1;
+ #ifdef WOLFSSL_CERT_GEN
+ if (nameType == SUBJECT) {
+ cert->subjectSND = (char*)&cert->source[cert->srcIdx];
+ cert->subjectSNDLen = strLen;
+ cert->subjectSNDEnc = b;
+ }
+ #endif /* WOLFSSL_CERT_GEN */
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
dName->snIdx = cert->srcIdx;
dName->snLen = strLen;
#endif /* OPENSSL_EXTRA */
}
+ #ifdef WOLFSSL_CERT_EXT
+ else if (id == ASN_BUS_CAT) {
+ copy = WOLFSSL_BUS_CAT;
+ copyLen = sizeof(WOLFSSL_BUS_CAT) - 1;
+ #ifdef WOLFSSL_CERT_GEN
+ if (nameType == SUBJECT) {
+ cert->subjectBC = (char*)&cert->source[cert->srcIdx];
+ cert->subjectBCLen = strLen;
+ cert->subjectBCEnc = b;
+ }
+ #endif /* WOLFSSL_CERT_GEN */
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+ dName->bcIdx = cert->srcIdx;
+ dName->bcLen = strLen;
+ #endif /* OPENSSL_EXTRA */
+ }
+ #endif /* WOLFSSL_CERT_EXT */
+ }
+ #ifdef WOLFSSL_CERT_EXT
+ else if ((cert->srcIdx + ASN_JOI_PREFIX_SZ + 2 <= (word32)maxIdx) &&
+ (0 == XMEMCMP(&cert->source[cert->srcIdx], ASN_JOI_PREFIX,
+ ASN_JOI_PREFIX_SZ)) &&
+ ((cert->source[cert->srcIdx+ASN_JOI_PREFIX_SZ] == ASN_JOI_C) ||
+ (cert->source[cert->srcIdx+ASN_JOI_PREFIX_SZ] == ASN_JOI_ST)))
+ {
+ cert->srcIdx += ASN_JOI_PREFIX_SZ;
+ id = cert->source[cert->srcIdx++];
+ b = cert->source[cert->srcIdx++]; /* encoding */
- if (copy && !tooBig) {
- XMEMCPY(&full[idx], &cert->source[cert->srcIdx], strLen);
- idx += strLen;
+ if (GetLength(cert->source, &cert->srcIdx, &strLen,
+ maxIdx) < 0)
+ return ASN_PARSE_E;
+
+ /* Check for jurisdiction of incorporation country name */
+ if (id == ASN_JOI_C) {
+ copy = WOLFSSL_JOI_C;
+ copyLen = sizeof(WOLFSSL_JOI_C) - 1;
+ #ifdef WOLFSSL_CERT_GEN
+ if (nameType == SUBJECT) {
+ cert->subjectJC = (char*)&cert->source[cert->srcIdx];
+ cert->subjectJCLen = strLen;
+ cert->subjectJCEnc = b;
+ }
+ #endif /* WOLFSSL_CERT_GEN */
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+ dName->jcIdx = cert->srcIdx;
+ dName->jcLen = strLen;
+ #endif /* OPENSSL_EXTRA */
+ }
+
+ /* Check for jurisdiction of incorporation state name */
+ else if (id == ASN_JOI_ST) {
+ copy = WOLFSSL_JOI_ST;
+ copyLen = sizeof(WOLFSSL_JOI_ST) - 1;
+ #ifdef WOLFSSL_CERT_GEN
+ if (nameType == SUBJECT) {
+ cert->subjectJS = (char*)&cert->source[cert->srcIdx];
+ cert->subjectJSLen = strLen;
+ cert->subjectJSEnc = b;
+ }
+ #endif /* WOLFSSL_CERT_GEN */
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+ dName->jsIdx = cert->srcIdx;
+ dName->jsLen = strLen;
+ #endif /* OPENSSL_EXTRA */
}
- cert->srcIdx += strLen;
+ if ((strLen + copyLen) > (int)(ASN_NAME_MAX - idx)) {
+ WOLFSSL_MSG("ASN Name too big, skipping");
+ tooBig = TRUE;
+ }
}
+ #endif /* WOLFSSL_CERT_EXT */
else {
/* skip */
byte email = FALSE;
- byte uid = FALSE;
- int adv;
+ byte pilot = FALSE;
- if (joint[0] == 0x2a && joint[1] == 0x86) /* email id hdr */
+ if (joint[0] == 0x2a && joint[1] == 0x86) { /* email id hdr */
+ id = ASN_EMAIL_NAME;
email = TRUE;
+ }
- if (joint[0] == 0x9 && joint[1] == 0x92) /* uid id hdr */
- uid = TRUE;
+ if (joint[0] == 0x9 && joint[1] == 0x92) { /* uid id hdr */
+ /* last value of OID is the type of pilot attribute */
+ id = cert->source[cert->srcIdx + oidSz - 1];
+ pilot = TRUE;
+ }
cert->srcIdx += oidSz + 1;
- if (GetLength(cert->source, &cert->srcIdx, &adv, cert->maxIdx) < 0)
+ if (GetLength(cert->source, &cert->srcIdx, &strLen, maxIdx) < 0)
return ASN_PARSE_E;
- if (adv > (int)(ASN_NAME_MAX - idx)) {
+ if (strLen > (int)(ASN_NAME_MAX - idx)) {
WOLFSSL_MSG("ASN name too big, skipping");
tooBig = TRUE;
}
if (email) {
- if ( (14 + adv) > (int)(ASN_NAME_MAX - idx)) {
+ copyLen = sizeof(WOLFSSL_EMAIL_ADDR) - 1;
+ if ((copyLen + strLen) > (int)(ASN_NAME_MAX - idx)) {
WOLFSSL_MSG("ASN name too big, skipping");
tooBig = TRUE;
}
- if (!tooBig) {
- XMEMCPY(&full[idx], "/emailAddress=", 14);
- idx += 14;
+ else {
+ copy = WOLFSSL_EMAIL_ADDR;
}
#ifdef WOLFSSL_CERT_GEN
if (nameType == SUBJECT) {
cert->subjectEmail = (char*)&cert->source[cert->srcIdx];
- cert->subjectEmailLen = adv;
+ cert->subjectEmailLen = strLen;
}
#endif /* WOLFSSL_CERT_GEN */
- #ifdef OPENSSL_EXTRA
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
dName->emailIdx = cert->srcIdx;
- dName->emailLen = adv;
+ dName->emailLen = strLen;
#endif /* OPENSSL_EXTRA */
#ifndef IGNORE_NAME_CONSTRAINTS
{
- DNS_entry* emailName = NULL;
+ DNS_entry* emailName;
emailName = (DNS_entry*)XMALLOC(sizeof(DNS_entry),
cert->heap, DYNAMIC_TYPE_ALTNAME);
@@ -2156,52 +5862,85 @@ static int GetName(DecodedCert* cert, int nameType)
WOLFSSL_MSG("\tOut of Memory");
return MEMORY_E;
}
- emailName->name = (char*)XMALLOC(adv + 1,
+ emailName->type = 0;
+ emailName->name = (char*)XMALLOC(strLen + 1,
cert->heap, DYNAMIC_TYPE_ALTNAME);
if (emailName->name == NULL) {
WOLFSSL_MSG("\tOut of Memory");
+ XFREE(emailName, cert->heap, DYNAMIC_TYPE_ALTNAME);
return MEMORY_E;
}
- XMEMCPY(emailName->name,
- &cert->source[cert->srcIdx], adv);
- emailName->name[adv] = 0;
+ emailName->len = strLen;
+ XMEMCPY(emailName->name, &cert->source[cert->srcIdx],
+ strLen);
+ emailName->name[strLen] = '\0';
emailName->next = cert->altEmailNames;
cert->altEmailNames = emailName;
}
#endif /* IGNORE_NAME_CONSTRAINTS */
- if (!tooBig) {
- XMEMCPY(&full[idx], &cert->source[cert->srcIdx], adv);
- idx += adv;
- }
}
- if (uid) {
- if ( (5 + adv) > (int)(ASN_NAME_MAX - idx)) {
- WOLFSSL_MSG("ASN name too big, skipping");
- tooBig = TRUE;
- }
- if (!tooBig) {
- XMEMCPY(&full[idx], "/UID=", 5);
- idx += 5;
+ if (pilot) {
+ switch (id) {
+ case ASN_USER_ID:
+ copy = WOLFSSL_USER_ID;
+ copyLen = sizeof(WOLFSSL_USER_ID) - 1;
+ #if defined(OPENSSL_EXTRA) || \
+ defined(OPENSSL_EXTRA_X509_SMALL)
+ dName->uidIdx = cert->srcIdx;
+ dName->uidLen = strLen;
+ #endif /* OPENSSL_EXTRA */
+ break;
- XMEMCPY(&full[idx], &cert->source[cert->srcIdx], adv);
- idx += adv;
+ case ASN_DOMAIN_COMPONENT:
+ copy = WOLFSSL_DOMAIN_COMPONENT;
+ copyLen = sizeof(WOLFSSL_DOMAIN_COMPONENT) - 1;
+ #if defined(OPENSSL_EXTRA) || \
+ defined(OPENSSL_EXTRA_X509_SMALL)
+ dName->dcIdx[dcnum] = cert->srcIdx;
+ dName->dcLen[dcnum] = strLen;
+ dName->dcNum = dcnum + 1;
+ dcnum++;
+ #endif /* OPENSSL_EXTRA */
+ break;
+
+ default:
+ WOLFSSL_MSG("Unknown pilot attribute type");
+ return ASN_PARSE_E;
}
- #ifdef OPENSSL_EXTRA
- dName->uidIdx = cert->srcIdx;
- dName->uidLen = adv;
- #endif /* OPENSSL_EXTRA */
}
+ }
+ if ((copyLen + strLen) > (int)(ASN_NAME_MAX - idx))
+ {
+ WOLFSSL_MSG("ASN Name too big, skipping");
+ tooBig = TRUE;
+ }
+ if ((copy != NULL) && !tooBig) {
+ XMEMCPY(&full[idx], copy, copyLen);
+ idx += copyLen;
+ XMEMCPY(&full[idx], &cert->source[cert->srcIdx], strLen);
+ idx += strLen;
- cert->srcIdx += adv;
+ #ifdef OPENSSL_EXTRA
+ if (count < DOMAIN_COMPONENT_MAX) {
+ /* store order that DN was parsed */
+ dName->loc[count++] = id;
+ }
+ #endif
}
+ cert->srcIdx += strLen;
}
full[idx++] = 0;
+#if defined(OPENSSL_EXTRA)
+ /* store order that DN was parsed */
+ dName->locSz = count;
+#endif
- #ifdef OPENSSL_EXTRA
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
{
int totalLen = 0;
+ int i = 0;
if (dName->cnLen != 0)
totalLen += dName->cnLen + 4;
@@ -2223,14 +5962,20 @@ static int GetName(DecodedCert* cert, int nameType)
totalLen += dName->uidLen + 5;
if (dName->serialLen != 0)
totalLen += dName->serialLen + 14;
+ if (dName->dcNum != 0){
+ for (i = 0;i < dName->dcNum;i++)
+ totalLen += dName->dcLen[i] + 4;
+ }
- dName->fullName = (char*)XMALLOC(totalLen + 1, NULL, DYNAMIC_TYPE_X509);
+ dName->fullName = (char*)XMALLOC(totalLen + 1, cert->heap,
+ DYNAMIC_TYPE_X509);
if (dName->fullName != NULL) {
idx = 0;
if (dName->cnLen != 0) {
dName->entryCount++;
- XMEMCPY(&dName->fullName[idx], "/CN=", 4);
+ XMEMCPY(&dName->fullName[idx], WOLFSSL_COMMON_NAME, 4);
+ dName->cnNid = wc_OBJ_sn2nid((const char *)WOLFSSL_COMMON_NAME);
idx += 4;
XMEMCPY(&dName->fullName[idx],
&cert->source[dName->cnIdx], dName->cnLen);
@@ -2239,7 +5984,8 @@ static int GetName(DecodedCert* cert, int nameType)
}
if (dName->snLen != 0) {
dName->entryCount++;
- XMEMCPY(&dName->fullName[idx], "/SN=", 4);
+ XMEMCPY(&dName->fullName[idx], WOLFSSL_SUR_NAME, 4);
+ dName->snNid = wc_OBJ_sn2nid((const char *)WOLFSSL_SUR_NAME);
idx += 4;
XMEMCPY(&dName->fullName[idx],
&cert->source[dName->snIdx], dName->snLen);
@@ -2248,7 +5994,8 @@ static int GetName(DecodedCert* cert, int nameType)
}
if (dName->cLen != 0) {
dName->entryCount++;
- XMEMCPY(&dName->fullName[idx], "/C=", 3);
+ XMEMCPY(&dName->fullName[idx], WOLFSSL_COUNTRY_NAME, 3);
+ dName->cNid = wc_OBJ_sn2nid((const char *)WOLFSSL_COUNTRY_NAME);
idx += 3;
XMEMCPY(&dName->fullName[idx],
&cert->source[dName->cIdx], dName->cLen);
@@ -2257,7 +6004,8 @@ static int GetName(DecodedCert* cert, int nameType)
}
if (dName->lLen != 0) {
dName->entryCount++;
- XMEMCPY(&dName->fullName[idx], "/L=", 3);
+ XMEMCPY(&dName->fullName[idx], WOLFSSL_LOCALITY_NAME, 3);
+ dName->lNid = wc_OBJ_sn2nid((const char *)WOLFSSL_LOCALITY_NAME);
idx += 3;
XMEMCPY(&dName->fullName[idx],
&cert->source[dName->lIdx], dName->lLen);
@@ -2266,7 +6014,8 @@ static int GetName(DecodedCert* cert, int nameType)
}
if (dName->stLen != 0) {
dName->entryCount++;
- XMEMCPY(&dName->fullName[idx], "/ST=", 4);
+ XMEMCPY(&dName->fullName[idx], WOLFSSL_STATE_NAME, 4);
+ dName->stNid = wc_OBJ_sn2nid((const char *)WOLFSSL_STATE_NAME);
idx += 4;
XMEMCPY(&dName->fullName[idx],
&cert->source[dName->stIdx], dName->stLen);
@@ -2275,7 +6024,8 @@ static int GetName(DecodedCert* cert, int nameType)
}
if (dName->oLen != 0) {
dName->entryCount++;
- XMEMCPY(&dName->fullName[idx], "/O=", 3);
+ XMEMCPY(&dName->fullName[idx], WOLFSSL_ORG_NAME, 3);
+ dName->oNid = wc_OBJ_sn2nid((const char *)WOLFSSL_ORG_NAME);
idx += 3;
XMEMCPY(&dName->fullName[idx],
&cert->source[dName->oIdx], dName->oLen);
@@ -2284,7 +6034,8 @@ static int GetName(DecodedCert* cert, int nameType)
}
if (dName->ouLen != 0) {
dName->entryCount++;
- XMEMCPY(&dName->fullName[idx], "/OU=", 4);
+ XMEMCPY(&dName->fullName[idx], WOLFSSL_ORGUNIT_NAME, 4);
+ dName->ouNid = wc_OBJ_sn2nid((const char *)WOLFSSL_ORGUNIT_NAME);
idx += 4;
XMEMCPY(&dName->fullName[idx],
&cert->source[dName->ouIdx], dName->ouLen);
@@ -2294,15 +6045,28 @@ static int GetName(DecodedCert* cert, int nameType)
if (dName->emailLen != 0) {
dName->entryCount++;
XMEMCPY(&dName->fullName[idx], "/emailAddress=", 14);
+ dName->emailNid = wc_OBJ_sn2nid((const char *)"/emailAddress=");
idx += 14;
XMEMCPY(&dName->fullName[idx],
&cert->source[dName->emailIdx], dName->emailLen);
dName->emailIdx = idx;
idx += dName->emailLen;
}
+ for (i = 0;i < dName->dcNum;i++){
+ if (dName->dcLen[i] != 0) {
+ dName->entryCount++;
+ XMEMCPY(&dName->fullName[idx], WOLFSSL_DOMAIN_COMPONENT, 4);
+ idx += 4;
+ XMEMCPY(&dName->fullName[idx],
+ &cert->source[dName->dcIdx[i]], dName->dcLen[i]);
+ dName->dcIdx[i] = idx;
+ idx += dName->dcLen[i];
+ }
+ }
if (dName->uidLen != 0) {
dName->entryCount++;
XMEMCPY(&dName->fullName[idx], "/UID=", 5);
+ dName->uidNid = wc_OBJ_sn2nid((const char *)"/UID=");
idx += 5;
XMEMCPY(&dName->fullName[idx],
&cert->source[dName->uidIdx], dName->uidLen);
@@ -2311,7 +6075,8 @@ static int GetName(DecodedCert* cert, int nameType)
}
if (dName->serialLen != 0) {
dName->entryCount++;
- XMEMCPY(&dName->fullName[idx], "/serialNumber=", 14);
+ XMEMCPY(&dName->fullName[idx], WOLFSSL_SERIAL_NUMBER, 14);
+ dName->serialNid = wc_OBJ_sn2nid((const char *)WOLFSSL_SERIAL_NUMBER);
idx += 14;
XMEMCPY(&dName->fullName[idx],
&cert->source[dName->serialIdx], dName->serialLen);
@@ -2328,17 +6093,203 @@ static int GetName(DecodedCert* cert, int nameType)
}
-#ifndef NO_TIME_H
+#ifndef NO_ASN_TIME
+
+/* two byte date/time, add to value */
+static WC_INLINE int GetTime(int* value, const byte* date, int* idx)
+{
+ int i = *idx;
+
+ if (date[i] < 0x30 || date[i] > 0x39 || date[i+1] < 0x30 ||
+ date[i+1] > 0x39) {
+ return ASN_PARSE_E;
+ }
+
+ *value += btoi(date[i++]) * 10;
+ *value += btoi(date[i++]);
+
+ *idx = i;
+
+ return 0;
+}
+
+int ExtractDate(const unsigned char* date, unsigned char format,
+ struct tm* certTime, int* idx)
+{
+ XMEMSET(certTime, 0, sizeof(struct tm));
+
+ if (format == ASN_UTC_TIME) {
+ if (btoi(date[*idx]) >= 5)
+ certTime->tm_year = 1900;
+ else
+ certTime->tm_year = 2000;
+ }
+ else { /* format == GENERALIZED_TIME */
+ if (GetTime(&certTime->tm_year, date, idx) != 0) return 0;
+ certTime->tm_year *= 100;
+ }
+
+ /* adjust tm_year, tm_mon */
+ if (GetTime(&certTime->tm_year, date, idx) != 0) return 0;
+ certTime->tm_year -= 1900;
+ if (GetTime(&certTime->tm_mon , date, idx) != 0) return 0;
+ certTime->tm_mon -= 1;
+ if (GetTime(&certTime->tm_mday, date, idx) != 0) return 0;
+ if (GetTime(&certTime->tm_hour, date, idx) != 0) return 0;
+ if (GetTime(&certTime->tm_min , date, idx) != 0) return 0;
+ if (GetTime(&certTime->tm_sec , date, idx) != 0) return 0;
+
+ return 1;
+}
+
+
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_MYSQL_COMPATIBLE) || \
+ defined(OPENSSL_EXTRA) || defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY)
+int GetTimeString(byte* date, int format, char* buf, int len)
+{
+ struct tm t;
+ int idx = 0;
+
+ if (!ExtractDate(date, (unsigned char)format, &t, &idx)) {
+ return 0;
+ }
+
+ if (date[idx] != 'Z') {
+ WOLFSSL_MSG("UTCtime, not Zulu") ;
+ return 0;
+ }
+
+ /* place month in buffer */
+ buf[0] = '\0';
+ switch(t.tm_mon) {
+ case 0: XSTRNCAT(buf, "Jan ", 5); break;
+ case 1: XSTRNCAT(buf, "Feb ", 5); break;
+ case 2: XSTRNCAT(buf, "Mar ", 5); break;
+ case 3: XSTRNCAT(buf, "Apr ", 5); break;
+ case 4: XSTRNCAT(buf, "May ", 5); break;
+ case 5: XSTRNCAT(buf, "Jun ", 5); break;
+ case 6: XSTRNCAT(buf, "Jul ", 5); break;
+ case 7: XSTRNCAT(buf, "Aug ", 5); break;
+ case 8: XSTRNCAT(buf, "Sep ", 5); break;
+ case 9: XSTRNCAT(buf, "Oct ", 5); break;
+ case 10: XSTRNCAT(buf, "Nov ", 5); break;
+ case 11: XSTRNCAT(buf, "Dec ", 5); break;
+ default:
+ return 0;
+
+ }
+ idx = 4; /* use idx now for char buffer */
+
+ XSNPRINTF(buf + idx, len - idx, "%2d %02d:%02d:%02d %d GMT",
+ t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec, t.tm_year + 1900);
+
+ return 1;
+}
+#endif /* OPENSSL_ALL || WOLFSSL_MYSQL_COMPATIBLE || WOLFSSL_NGINX || WOLFSSL_HAPROXY */
+
+
+#if !defined(NO_ASN_TIME) && defined(HAVE_PKCS7)
+
+/* Set current time string, either UTC or GeneralizedTime.
+ * (void*) tm should be a pointer to time_t, output is placed in buf.
+ *
+ * Return time string length placed in buf on success, negative on error */
+int GetAsnTimeString(void* currTime, byte* buf, word32 len)
+{
+ struct tm* ts = NULL;
+ struct tm* tmpTime = NULL;
+#if defined(NEED_TMP_TIME)
+ struct tm tmpTimeStorage;
+ tmpTime = &tmpTimeStorage;
+#else
+ (void)tmpTime;
+#endif
+ byte* data_ptr = buf;
+ word32 data_len = 0;
+ int year, mon, day, hour, mini, sec;
+
+ WOLFSSL_ENTER("SetAsnTimeString");
+
+ if (buf == NULL || len == 0)
+ return BAD_FUNC_ARG;
+
+ ts = (struct tm *)XGMTIME((time_t*)currTime, tmpTime);
+ if (ts == NULL){
+ WOLFSSL_MSG("failed to get time data.");
+ return ASN_TIME_E;
+ }
+
+ /* Note ASN_UTC_TIME_SIZE and ASN_GENERALIZED_TIME_SIZE include space for
+ * the null terminator. ASN encoded values leave off the terminator. */
+
+ if (ts->tm_year >= 50 && ts->tm_year < 150) {
+ /* UTC Time */
+ char utc_str[ASN_UTC_TIME_SIZE];
+ data_len = ASN_UTC_TIME_SIZE - 1 + 2;
+
+ if (len < data_len)
+ return BUFFER_E;
+
+ if (ts->tm_year >= 50 && ts->tm_year < 100) {
+ year = ts->tm_year;
+ } else if (ts->tm_year >= 100 && ts->tm_year < 150) {
+ year = ts->tm_year - 100;
+ }
+ else {
+ WOLFSSL_MSG("unsupported year range");
+ return BAD_FUNC_ARG;
+ }
+ mon = ts->tm_mon + 1;
+ day = ts->tm_mday;
+ hour = ts->tm_hour;
+ mini = ts->tm_min;
+ sec = ts->tm_sec;
+ XSNPRINTF((char *)utc_str, ASN_UTC_TIME_SIZE,
+ "%02d%02d%02d%02d%02d%02dZ", year, mon, day, hour, mini, sec);
+ *data_ptr = (byte) ASN_UTC_TIME; data_ptr++;
+ /* -1 below excludes null terminator */
+ *data_ptr = (byte) ASN_UTC_TIME_SIZE - 1; data_ptr++;
+ XMEMCPY(data_ptr,(byte *)utc_str, ASN_UTC_TIME_SIZE - 1);
+
+ } else {
+ /* GeneralizedTime */
+ char gt_str[ASN_GENERALIZED_TIME_SIZE];
+ data_len = ASN_GENERALIZED_TIME_SIZE - 1 + 2;
+
+ if (len < data_len)
+ return BUFFER_E;
+
+ year = ts->tm_year + 1900;
+ mon = ts->tm_mon + 1;
+ day = ts->tm_mday;
+ hour = ts->tm_hour;
+ mini = ts->tm_min;
+ sec = ts->tm_sec;
+ XSNPRINTF((char *)gt_str, ASN_GENERALIZED_TIME_SIZE,
+ "%4d%02d%02d%02d%02d%02dZ", year, mon, day, hour, mini, sec);
+ *data_ptr = (byte) ASN_GENERALIZED_TIME; data_ptr++;
+ /* -1 below excludes null terminator */
+ *data_ptr = (byte) ASN_GENERALIZED_TIME_SIZE - 1; data_ptr++;
+ XMEMCPY(data_ptr,(byte *)gt_str, ASN_GENERALIZED_TIME_SIZE - 1);
+ }
+
+ return data_len;
+}
+
+#endif /* !NO_ASN_TIME && HAVE_PKCS7 */
+
+
+#if defined(USE_WOLF_VALIDDATE)
/* to the second */
-static int DateGreaterThan(const struct tm* a, const struct tm* b)
+int DateGreaterThan(const struct tm* a, const struct tm* b)
{
if (a->tm_year > b->tm_year)
return 1;
if (a->tm_year == b->tm_year && a->tm_mon > b->tm_mon)
return 1;
-
+
if (a->tm_year == b->tm_year && a->tm_mon == b->tm_mon &&
a->tm_mday > b->tm_mday)
return 1;
@@ -2361,12 +6312,11 @@ static int DateGreaterThan(const struct tm* a, const struct tm* b)
}
-static INLINE int DateLessThan(const struct tm* a, const struct tm* b)
+static WC_INLINE int DateLessThan(const struct tm* a, const struct tm* b)
{
return DateGreaterThan(b,a);
}
-
/* like atoi but only use first byte */
/* Make sure before and after dates are valid */
int ValidateDate(const byte* date, byte format, int dateType)
@@ -2374,64 +6324,142 @@ int ValidateDate(const byte* date, byte format, int dateType)
time_t ltime;
struct tm certTime;
struct tm* localTime;
- struct tm* tmpTime = NULL;
+ struct tm* tmpTime;
int i = 0;
+ int timeDiff = 0 ;
+ int diffHH = 0 ; int diffMM = 0 ;
+ int diffSign = 0 ;
-#if defined(FREESCALE_MQX) || defined(TIME_OVERRIDES)
+#if defined(NEED_TMP_TIME)
struct tm tmpTimeStorage;
tmpTime = &tmpTimeStorage;
#else
- (void)tmpTime;
+ tmpTime = NULL;
#endif
+ (void)tmpTime;
ltime = XTIME(0);
- XMEMSET(&certTime, 0, sizeof(certTime));
- if (format == ASN_UTC_TIME) {
- if (btoi(date[0]) >= 5)
- certTime.tm_year = 1900;
- else
- certTime.tm_year = 2000;
+#ifdef WOLFSSL_BEFORE_DATE_CLOCK_SKEW
+ if (dateType == BEFORE) {
+ WOLFSSL_MSG("Skewing local time for before date check");
+ ltime += WOLFSSL_BEFORE_DATE_CLOCK_SKEW;
}
- else { /* format == GENERALIZED_TIME */
- certTime.tm_year += btoi(date[i++]) * 1000;
- certTime.tm_year += btoi(date[i++]) * 100;
+#endif
+
+#ifdef WOLFSSL_AFTER_DATE_CLOCK_SKEW
+ if (dateType == AFTER) {
+ WOLFSSL_MSG("Skewing local time for after date check");
+ ltime -= WOLFSSL_AFTER_DATE_CLOCK_SKEW;
}
+#endif
- /* adjust tm_year, tm_mon */
- GetTime((int*)&certTime.tm_year, date, &i); certTime.tm_year -= 1900;
- GetTime((int*)&certTime.tm_mon, date, &i); certTime.tm_mon -= 1;
- GetTime((int*)&certTime.tm_mday, date, &i);
- GetTime((int*)&certTime.tm_hour, date, &i);
- GetTime((int*)&certTime.tm_min, date, &i);
- GetTime((int*)&certTime.tm_sec, date, &i);
-
- if (date[i] != 'Z') { /* only Zulu supported for this profile */
- WOLFSSL_MSG("Only Zulu time supported for this profile");
+ if (!ExtractDate(date, format, &certTime, &i)) {
+ WOLFSSL_MSG("Error extracting the date");
return 0;
}
+ if ((date[i] == '+') || (date[i] == '-')) {
+ WOLFSSL_MSG("Using time differential, not Zulu") ;
+ diffSign = date[i++] == '+' ? 1 : -1 ;
+ if (GetTime(&diffHH, date, &i) != 0)
+ return 0;
+ if (GetTime(&diffMM, date, &i) != 0)
+ return 0;
+ timeDiff = diffSign * (diffHH*60 + diffMM) * 60 ;
+ } else if (date[i] != 'Z') {
+ WOLFSSL_MSG("UTCtime, neither Zulu or time differential") ;
+ return 0;
+ }
+
+ ltime -= (time_t)timeDiff ;
localTime = XGMTIME(&ltime, tmpTime);
+ if (localTime == NULL) {
+ WOLFSSL_MSG("XGMTIME failed");
+ return 0;
+ }
+
if (dateType == BEFORE) {
- if (DateLessThan(localTime, &certTime))
+ if (DateLessThan(localTime, &certTime)) {
+ WOLFSSL_MSG("Date BEFORE check failed");
return 0;
+ }
}
- else
- if (DateGreaterThan(localTime, &certTime))
+ else { /* dateType == AFTER */
+ if (DateGreaterThan(localTime, &certTime)) {
+ WOLFSSL_MSG("Date AFTER check failed");
return 0;
+ }
+ }
return 1;
}
+#endif /* USE_WOLF_VALIDDATE */
+
+int wc_GetTime(void* timePtr, word32 timeSize)
+{
+ time_t* ltime = (time_t*)timePtr;
+
+ if (timePtr == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ if ((word32)sizeof(time_t) > timeSize) {
+ return BUFFER_E;
+ }
+
+ *ltime = XTIME(0);
+
+ return 0;
+}
-#endif /* NO_TIME_H */
+#endif /* !NO_ASN_TIME */
-static int GetDate(DecodedCert* cert, int dateType)
+/* Get date buffer, format and length. Returns 0=success or error */
+static int GetDateInfo(const byte* source, word32* idx, const byte** pDate,
+ byte* pFormat, int* pLength, word32 maxIdx)
{
- int length;
+ int length;
+ byte format;
+
+ if (source == NULL || idx == NULL)
+ return BAD_FUNC_ARG;
+
+ /* get ASN format header */
+ if (*idx+1 > maxIdx)
+ return BUFFER_E;
+ format = source[*idx];
+ *idx += 1;
+ if (format != ASN_UTC_TIME && format != ASN_GENERALIZED_TIME)
+ return ASN_TIME_E;
+
+ /* get length */
+ if (GetLength(source, idx, &length, maxIdx) < 0)
+ return ASN_PARSE_E;
+ if (length > MAX_DATE_SIZE || length < MIN_DATE_SIZE)
+ return ASN_DATE_SZ_E;
+
+ /* return format, date and length */
+ if (pFormat)
+ *pFormat = format;
+ if (pDate)
+ *pDate = &source[*idx];
+ if (pLength)
+ *pLength = length;
+
+ *idx += length;
+
+ return 0;
+}
+
+static int GetDate(DecodedCert* cert, int dateType, int verify, int maxIdx)
+{
+ int ret, length;
+ const byte *datePtr = NULL;
byte date[MAX_DATE_SIZE];
- byte b;
+ byte format;
word32 startIdx = 0;
if (dateType == BEFORE)
@@ -2440,49 +6468,50 @@ static int GetDate(DecodedCert* cert, int dateType)
cert->afterDate = &cert->source[cert->srcIdx];
startIdx = cert->srcIdx;
- b = cert->source[cert->srcIdx++];
- if (b != ASN_UTC_TIME && b != ASN_GENERALIZED_TIME)
- return ASN_TIME_E;
-
- if (GetLength(cert->source, &cert->srcIdx, &length, cert->maxIdx) < 0)
- return ASN_PARSE_E;
-
- if (length > MAX_DATE_SIZE || length < MIN_DATE_SIZE)
- return ASN_DATE_SZ_E;
+ ret = GetDateInfo(cert->source, &cert->srcIdx, &datePtr, &format,
+ &length, maxIdx);
+ if (ret < 0)
+ return ret;
- XMEMCPY(date, &cert->source[cert->srcIdx], length);
- cert->srcIdx += length;
+ XMEMSET(date, 0, MAX_DATE_SIZE);
+ XMEMCPY(date, datePtr, length);
if (dateType == BEFORE)
cert->beforeDateLen = cert->srcIdx - startIdx;
else
cert->afterDateLen = cert->srcIdx - startIdx;
- if (!XVALIDATE_DATE(date, b, dateType)) {
+#ifndef NO_ASN_TIME
+ if (verify != NO_VERIFY && verify != VERIFY_SKIP_DATE &&
+ !XVALIDATE_DATE(date, format, dateType)) {
if (dateType == BEFORE)
return ASN_BEFORE_DATE_E;
else
return ASN_AFTER_DATE_E;
}
+#else
+ (void)verify;
+#endif
return 0;
}
-
-static int GetValidity(DecodedCert* cert, int verify)
+static int GetValidity(DecodedCert* cert, int verify, int maxIdx)
{
int length;
int badDate = 0;
- if (GetSequence(cert->source, &cert->srcIdx, &length, cert->maxIdx) < 0)
+ if (GetSequence(cert->source, &cert->srcIdx, &length, maxIdx) < 0)
return ASN_PARSE_E;
- if (GetDate(cert, BEFORE) < 0 && verify)
- badDate = ASN_BEFORE_DATE_E; /* continue parsing */
-
- if (GetDate(cert, AFTER) < 0 && verify)
+ maxIdx = cert->srcIdx + length;
+
+ if (GetDate(cert, BEFORE, verify, maxIdx) < 0)
+ badDate = ASN_BEFORE_DATE_E; /* continue parsing */
+
+ if (GetDate(cert, AFTER, verify, maxIdx) < 0)
return ASN_AFTER_DATE_E;
-
+
if (badDate != 0)
return badDate;
@@ -2490,32 +6519,111 @@ static int GetValidity(DecodedCert* cert, int verify)
}
-int DecodeToKey(DecodedCert* cert, int verify)
+int wc_GetDateInfo(const byte* certDate, int certDateSz, const byte** date,
+ byte* format, int* length)
{
- int badDate = 0;
int ret;
+ word32 idx = 0;
+
+ ret = GetDateInfo(certDate, &idx, date, format, length, certDateSz);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+#ifndef NO_ASN_TIME
+int wc_GetDateAsCalendarTime(const byte* date, int length, byte format,
+ struct tm* timearg)
+{
+ int idx = 0;
+ (void)length;
+ if (!ExtractDate(date, format, timearg, &idx))
+ return ASN_TIME_E;
+ return 0;
+}
+
+#if defined(WOLFSSL_CERT_GEN) && defined(WOLFSSL_ALT_NAMES)
+int wc_GetCertDates(Cert* cert, struct tm* before, struct tm* after)
+{
+ int ret = 0;
+ const byte* date;
+ byte format;
+ int length;
+ if (cert == NULL)
+ return BAD_FUNC_ARG;
+
+ if (before && cert->beforeDateSz > 0) {
+ ret = wc_GetDateInfo(cert->beforeDate, cert->beforeDateSz, &date,
+ &format, &length);
+ if (ret == 0)
+ ret = wc_GetDateAsCalendarTime(date, length, format, before);
+ }
+ if (after && cert->afterDateSz > 0) {
+ ret = wc_GetDateInfo(cert->afterDate, cert->afterDateSz, &date,
+ &format, &length);
+ if (ret == 0)
+ ret = wc_GetDateAsCalendarTime(date, length, format, after);
+ }
+
+ return ret;
+}
+#endif /* WOLFSSL_CERT_GEN && WOLFSSL_ALT_NAMES */
+#endif /* !NO_ASN_TIME */
+
+/* parses certificate up to point of X.509 public key
+ *
+ * if cert date is invalid then badDate gets set to error value, otherwise is 0
+ *
+ * returns a negative value on fail case
+ */
+int wc_GetPubX509(DecodedCert* cert, int verify, int* badDate)
+{
+ int ret;
+
+ if (cert == NULL || badDate == NULL)
+ return BAD_FUNC_ARG;
+
+ *badDate = 0;
if ( (ret = GetCertHeader(cert)) < 0)
return ret;
WOLFSSL_MSG("Got Cert Header");
+ /* Using the sigIndex as the upper bound because that's where the
+ * actual certificate data ends. */
if ( (ret = GetAlgoId(cert->source, &cert->srcIdx, &cert->signatureOID,
- cert->maxIdx)) < 0)
+ oidSigType, cert->sigIndex)) < 0)
return ret;
WOLFSSL_MSG("Got Algo ID");
- if ( (ret = GetName(cert, ISSUER)) < 0)
+ if ( (ret = GetName(cert, ISSUER, cert->sigIndex)) < 0)
return ret;
- if ( (ret = GetValidity(cert, verify)) < 0)
- badDate = ret;
+ if ( (ret = GetValidity(cert, verify, cert->sigIndex)) < 0)
+ *badDate = ret;
- if ( (ret = GetName(cert, SUBJECT)) < 0)
+ if ( (ret = GetName(cert, SUBJECT, cert->sigIndex)) < 0)
return ret;
WOLFSSL_MSG("Got Subject Name");
+ return ret;
+}
+
+int DecodeToKey(DecodedCert* cert, int verify)
+{
+ int badDate = 0;
+ int ret;
+
+ if ( (ret = wc_GetPubX509(cert, verify, &badDate)) < 0)
+ return ret;
+
+ /* Determine if self signed */
+ cert->selfSigned = XMEMCMP(cert->issuerHash,
+ cert->subjectHash,
+ KEYID_SIZE) == 0 ? 1 : 0;
if ( (ret = GetKey(cert)) < 0)
return ret;
@@ -2528,40 +6636,36 @@ int DecodeToKey(DecodedCert* cert, int verify)
return ret;
}
-
static int GetSignature(DecodedCert* cert)
{
- int length;
- byte b = cert->source[cert->srcIdx++];
-
- if (b != ASN_BIT_STRING)
- return ASN_BITSTR_E;
-
- if (GetLength(cert->source, &cert->srcIdx, &length, cert->maxIdx) < 0)
- return ASN_PARSE_E;
+ int length;
+ int ret;
+ ret = CheckBitString(cert->source, &cert->srcIdx, &length, cert->maxIdx, 1,
+ NULL);
+ if (ret != 0)
+ return ret;
cert->sigLength = length;
-
- b = cert->source[cert->srcIdx++];
- if (b != 0x00)
- return ASN_EXPECT_0_E;
-
- cert->sigLength--;
cert->signature = &cert->source[cert->srcIdx];
cert->srcIdx += cert->sigLength;
return 0;
}
+static word32 SetOctetString8Bit(word32 len, byte* output)
+{
+ output[0] = ASN_OCTET_STRING;
+ output[1] = (byte)len;
+ return 2;
+}
static word32 SetDigest(const byte* digest, word32 digSz, byte* output)
{
- output[0] = ASN_OCTET_STRING;
- output[1] = (byte)digSz;
- XMEMCPY(&output[2], digest, digSz);
+ word32 idx = SetOctetString8Bit(digSz, output);
+ XMEMCPY(&output[idx], digest, digSz);
- return digSz + 2;
-}
+ return idx + digSz;
+}
static word32 BytePrecision(word32 value)
@@ -2575,17 +6679,23 @@ static word32 BytePrecision(word32 value)
}
-WOLFSSL_LOCAL word32 SetLength(word32 length, byte* output)
+word32 SetLength(word32 length, byte* output)
{
word32 i = 0, j;
- if (length < ASN_LONG_LENGTH)
- output[i++] = (byte)length;
+ if (length < ASN_LONG_LENGTH) {
+ if (output)
+ output[i] = (byte)length;
+ i++;
+ }
else {
- output[i++] = (byte)(BytePrecision(length) | ASN_LONG_LENGTH);
-
+ if (output)
+ output[i] = (byte)(BytePrecision(length) | ASN_LONG_LENGTH);
+ i++;
+
for (j = BytePrecision(length); j; --j) {
- output[i] = (byte)(length >> ((j - 1) * WOLFSSL_BIT_SIZE));
+ if (output)
+ output[i] = (byte)(length >> ((j - 1) * WOLFSSL_BIT_SIZE));
i++;
}
}
@@ -2593,27 +6703,27 @@ WOLFSSL_LOCAL word32 SetLength(word32 length, byte* output)
return i;
}
-
-WOLFSSL_LOCAL word32 SetSequence(word32 len, byte* output)
+word32 SetSequence(word32 len, byte* output)
{
- output[0] = ASN_SEQUENCE | ASN_CONSTRUCTED;
- return SetLength(len, output + 1) + 1;
+ if (output)
+ output[0] = ASN_SEQUENCE | ASN_CONSTRUCTED;
+ return SetLength(len, output ? output + 1 : NULL) + 1;
}
-WOLFSSL_LOCAL word32 SetOctetString(word32 len, byte* output)
+word32 SetOctetString(word32 len, byte* output)
{
output[0] = ASN_OCTET_STRING;
return SetLength(len, output + 1) + 1;
}
/* Write a set header to output */
-WOLFSSL_LOCAL word32 SetSet(word32 len, byte* output)
+word32 SetSet(word32 len, byte* output)
{
output[0] = ASN_SET | ASN_CONSTRUCTED;
return SetLength(len, output + 1) + 1;
}
-WOLFSSL_LOCAL word32 SetImplicit(byte tag, byte number, word32 len, byte* output)
+word32 SetImplicit(byte tag, byte number, word32 len, byte* output)
{
output[0] = ((tag == ASN_SEQUENCE || tag == ASN_SET) ? ASN_CONSTRUCTED : 0)
@@ -2621,323 +6731,121 @@ WOLFSSL_LOCAL word32 SetImplicit(byte tag, byte number, word32 len, byte* output
return SetLength(len, output + 1) + 1;
}
-WOLFSSL_LOCAL word32 SetExplicit(byte number, word32 len, byte* output)
+word32 SetExplicit(byte number, word32 len, byte* output)
{
output[0] = ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | number;
return SetLength(len, output + 1) + 1;
}
-#if defined(HAVE_ECC) && (defined(WOLFSSL_CERT_GEN) || defined(WOLFSSL_KEY_GEN))
+#if defined(HAVE_ECC) && defined(HAVE_ECC_KEY_EXPORT)
-static word32 SetCurve(ecc_key* key, byte* output)
+static int SetCurve(ecc_key* key, byte* output)
{
-
- /* curve types */
-#if defined(HAVE_ALL_CURVES) || defined(HAVE_ECC192)
- static const byte ECC_192v1_AlgoID[] = { 0x2a, 0x86, 0x48, 0xCE, 0x3d,
- 0x03, 0x01, 0x01};
-#endif
-#if defined(HAVE_ALL_CURVES) || !defined(NO_ECC256)
- static const byte ECC_256v1_AlgoID[] = { 0x2a, 0x86, 0x48, 0xCE, 0x3d,
- 0x03, 0x01, 0x07};
-#endif
-#if defined(HAVE_ALL_CURVES) || defined(HAVE_ECC160)
- static const byte ECC_160r1_AlgoID[] = { 0x2b, 0x81, 0x04, 0x00,
- 0x02};
-#endif
-#if defined(HAVE_ALL_CURVES) || defined(HAVE_ECC224)
- static const byte ECC_224r1_AlgoID[] = { 0x2b, 0x81, 0x04, 0x00,
- 0x21};
-#endif
-#if defined(HAVE_ALL_CURVES) || defined(HAVE_ECC384)
- static const byte ECC_384r1_AlgoID[] = { 0x2b, 0x81, 0x04, 0x00,
- 0x22};
-#endif
-#if defined(HAVE_ALL_CURVES) || defined(HAVE_ECC521)
- static const byte ECC_521r1_AlgoID[] = { 0x2b, 0x81, 0x04, 0x00,
- 0x23};
-#endif
-
- int oidSz = 0;
- int idx = 0;
- int lenSz = 0;
- const byte* oid = 0;
-
- output[0] = ASN_OBJECT_ID;
- idx++;
-
- switch (key->dp->size) {
-#if defined(HAVE_ALL_CURVES) || defined(HAVE_ECC160)
- case 20:
- oidSz = sizeof(ECC_160r1_AlgoID);
- oid = ECC_160r1_AlgoID;
- break;
-#endif
-
-#if defined(HAVE_ALL_CURVES) || defined(HAVE_ECC192)
- case 24:
- oidSz = sizeof(ECC_192v1_AlgoID);
- oid = ECC_192v1_AlgoID;
- break;
-#endif
-
-#if defined(HAVE_ALL_CURVES) || defined(HAVE_ECC224)
- case 28:
- oidSz = sizeof(ECC_224r1_AlgoID);
- oid = ECC_224r1_AlgoID;
- break;
+#ifdef HAVE_OID_ENCODING
+ int ret;
#endif
+ int idx = 0;
+ word32 oidSz = 0;
-#if defined(HAVE_ALL_CURVES) || !defined(NO_ECC256)
- case 32:
- oidSz = sizeof(ECC_256v1_AlgoID);
- oid = ECC_256v1_AlgoID;
- break;
-#endif
+ /* validate key */
+ if (key == NULL || key->dp == NULL) {
+ return BAD_FUNC_ARG;
+ }
-#if defined(HAVE_ALL_CURVES) || defined(HAVE_ECC384)
- case 48:
- oidSz = sizeof(ECC_384r1_AlgoID);
- oid = ECC_384r1_AlgoID;
- break;
+#ifdef HAVE_OID_ENCODING
+ ret = EncodeObjectId(key->dp->oid, key->dp->oidSz, NULL, &oidSz);
+ if (ret != 0) {
+ return ret;
+ }
+#else
+ oidSz = key->dp->oidSz;
#endif
-#if defined(HAVE_ALL_CURVES) || defined(HAVE_ECC521)
- case 66:
- oidSz = sizeof(ECC_521r1_AlgoID);
- oid = ECC_521r1_AlgoID;
- break;
-#endif
+ idx += SetObjectId(oidSz, output);
- default:
- return ASN_UNKNOWN_OID_E;
+#ifdef HAVE_OID_ENCODING
+ ret = EncodeObjectId(key->dp->oid, key->dp->oidSz, output+idx, &oidSz);
+ if (ret != 0) {
+ return ret;
}
- lenSz = SetLength(oidSz, output+idx);
- idx += lenSz;
-
- XMEMCPY(output+idx, oid, oidSz);
+#else
+ XMEMCPY(output+idx, key->dp->oid, oidSz);
+#endif
idx += oidSz;
return idx;
}
-#endif /* HAVE_ECC && WOLFSSL_CERT_GEN */
+#endif /* HAVE_ECC && HAVE_ECC_KEY_EXPORT */
-WOLFSSL_LOCAL word32 SetAlgoID(int algoOID, byte* output, int type, int curveSz)
+#ifdef HAVE_ECC
+static WC_INLINE int IsSigAlgoECDSA(int algoOID)
{
- /* adding TAG_NULL and 0 to end */
-
- /* hashTypes */
- static const byte shaAlgoID[] = { 0x2b, 0x0e, 0x03, 0x02, 0x1a,
- 0x05, 0x00 };
- static const byte sha256AlgoID[] = { 0x60, 0x86, 0x48, 0x01, 0x65, 0x03,
- 0x04, 0x02, 0x01, 0x05, 0x00 };
- static const byte sha384AlgoID[] = { 0x60, 0x86, 0x48, 0x01, 0x65, 0x03,
- 0x04, 0x02, 0x02, 0x05, 0x00 };
- static const byte sha512AlgoID[] = { 0x60, 0x86, 0x48, 0x01, 0x65, 0x03,
- 0x04, 0x02, 0x03, 0x05, 0x00 };
- static const byte md5AlgoID[] = { 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d,
- 0x02, 0x05, 0x05, 0x00 };
- static const byte md2AlgoID[] = { 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d,
- 0x02, 0x02, 0x05, 0x00};
-
- /* blkTypes, no NULL tags because IV is there instead */
- static const byte desCbcAlgoID[] = { 0x2B, 0x0E, 0x03, 0x02, 0x07 };
- static const byte des3CbcAlgoID[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
- 0x0D, 0x03, 0x07 };
-
- /* RSA sigTypes */
- #ifndef NO_RSA
- static const byte md5wRSA_AlgoID[] = { 0x2a, 0x86, 0x48, 0x86, 0xf7,
- 0x0d, 0x01, 0x01, 0x04, 0x05, 0x00};
- static const byte shawRSA_AlgoID[] = { 0x2a, 0x86, 0x48, 0x86, 0xf7,
- 0x0d, 0x01, 0x01, 0x05, 0x05, 0x00};
- static const byte sha256wRSA_AlgoID[] = { 0x2a, 0x86, 0x48, 0x86, 0xf7,
- 0x0d, 0x01, 0x01, 0x0b, 0x05, 0x00};
- static const byte sha384wRSA_AlgoID[] = {0x2a, 0x86, 0x48, 0x86, 0xf7,
- 0x0d, 0x01, 0x01, 0x0c, 0x05, 0x00};
- static const byte sha512wRSA_AlgoID[] = {0x2a, 0x86, 0x48, 0x86, 0xf7,
- 0x0d, 0x01, 0x01, 0x0d, 0x05, 0x00};
- #endif /* NO_RSA */
-
- /* ECDSA sigTypes */
- #ifdef HAVE_ECC
- static const byte shawECDSA_AlgoID[] = { 0x2a, 0x86, 0x48, 0xCE, 0x3d,
- 0x04, 0x01, 0x05, 0x00};
- static const byte sha256wECDSA_AlgoID[] = { 0x2a, 0x86, 0x48, 0xCE,0x3d,
- 0x04, 0x03, 0x02, 0x05, 0x00};
- static const byte sha384wECDSA_AlgoID[] = { 0x2a, 0x86, 0x48, 0xCE,0x3d,
- 0x04, 0x03, 0x03, 0x05, 0x00};
- static const byte sha512wECDSA_AlgoID[] = { 0x2a, 0x86, 0x48, 0xCE,0x3d,
- 0x04, 0x03, 0x04, 0x05, 0x00};
- #endif /* HAVE_ECC */
-
- /* RSA keyType */
- #ifndef NO_RSA
- static const byte RSA_AlgoID[] = { 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d,
- 0x01, 0x01, 0x01, 0x05, 0x00};
- #endif /* NO_RSA */
+ /* ECDSA sigAlgo must not have ASN1 NULL parameters */
+ if (algoOID == CTC_SHAwECDSA || algoOID == CTC_SHA256wECDSA ||
+ algoOID == CTC_SHA384wECDSA || algoOID == CTC_SHA512wECDSA) {
+ return 1;
+ }
- #ifdef HAVE_ECC
- /* ECC keyType */
- /* no tags, so set tagSz smaller later */
- static const byte ECC_AlgoID[] = { 0x2a, 0x86, 0x48, 0xCE, 0x3d,
- 0x02, 0x01};
- #endif /* HAVE_ECC */
+ return 0;
+}
+#endif
- int algoSz = 0;
- int tagSz = 2; /* tag null and terminator */
- word32 idSz, seqSz;
+word32 SetAlgoID(int algoOID, byte* output, int type, int curveSz)
+{
+ word32 tagSz, idSz, seqSz, algoSz = 0;
const byte* algoName = 0;
- byte ID_Length[MAX_LENGTH_SZ];
- byte seqArray[MAX_SEQ_SZ + 1]; /* add object_id to end */
-
- if (type == hashType) {
- switch (algoOID) {
- case SHAh:
- algoSz = sizeof(shaAlgoID);
- algoName = shaAlgoID;
- break;
-
- case SHA256h:
- algoSz = sizeof(sha256AlgoID);
- algoName = sha256AlgoID;
- break;
-
- case SHA384h:
- algoSz = sizeof(sha384AlgoID);
- algoName = sha384AlgoID;
- break;
-
- case SHA512h:
- algoSz = sizeof(sha512AlgoID);
- algoName = sha512AlgoID;
- break;
+ byte ID_Length[1 + MAX_LENGTH_SZ];
+ byte seqArray[MAX_SEQ_SZ + 1]; /* add object_id to end */
+ int length = 0;
+
+ tagSz = (type == oidHashType ||
+ (type == oidSigType
+ #ifdef HAVE_ECC
+ && !IsSigAlgoECDSA(algoOID)
+ #endif
+ #ifdef HAVE_ED25519
+ && algoOID != ED25519k
+ #endif
+ #ifdef HAVE_ED448
+ && algoOID != ED448k
+ #endif
+ ) ||
+ (type == oidKeyType && algoOID == RSAk)) ? 2 : 0;
- case MD2h:
- algoSz = sizeof(md2AlgoID);
- algoName = md2AlgoID;
- break;
+ algoName = OidFromId(algoOID, type, &algoSz);
- case MD5h:
- algoSz = sizeof(md5AlgoID);
- algoName = md5AlgoID;
- break;
-
- default:
- WOLFSSL_MSG("Unknown Hash Algo");
- return 0; /* UNKOWN_HASH_E; */
- }
- }
- else if (type == blkType) {
- switch (algoOID) {
- case DESb:
- algoSz = sizeof(desCbcAlgoID);
- algoName = desCbcAlgoID;
- tagSz = 0;
- break;
- case DES3b:
- algoSz = sizeof(des3CbcAlgoID);
- algoName = des3CbcAlgoID;
- tagSz = 0;
- break;
- default:
- WOLFSSL_MSG("Unknown Block Algo");
- return 0;
- }
+ if (algoName == NULL) {
+ WOLFSSL_MSG("Unknown Algorithm");
+ return 0;
}
- else if (type == sigType) { /* sigType */
- switch (algoOID) {
- #ifndef NO_RSA
- case CTC_MD5wRSA:
- algoSz = sizeof(md5wRSA_AlgoID);
- algoName = md5wRSA_AlgoID;
- break;
- case CTC_SHAwRSA:
- algoSz = sizeof(shawRSA_AlgoID);
- algoName = shawRSA_AlgoID;
- break;
-
- case CTC_SHA256wRSA:
- algoSz = sizeof(sha256wRSA_AlgoID);
- algoName = sha256wRSA_AlgoID;
- break;
-
- case CTC_SHA384wRSA:
- algoSz = sizeof(sha384wRSA_AlgoID);
- algoName = sha384wRSA_AlgoID;
- break;
-
- case CTC_SHA512wRSA:
- algoSz = sizeof(sha512wRSA_AlgoID);
- algoName = sha512wRSA_AlgoID;
- break;
- #endif /* NO_RSA */
- #ifdef HAVE_ECC
- case CTC_SHAwECDSA:
- algoSz = sizeof(shawECDSA_AlgoID);
- algoName = shawECDSA_AlgoID;
- break;
-
- case CTC_SHA256wECDSA:
- algoSz = sizeof(sha256wECDSA_AlgoID);
- algoName = sha256wECDSA_AlgoID;
- break;
-
- case CTC_SHA384wECDSA:
- algoSz = sizeof(sha384wECDSA_AlgoID);
- algoName = sha384wECDSA_AlgoID;
- break;
+ idSz = SetObjectId(algoSz, ID_Length);
+ seqSz = SetSequence(idSz + algoSz + tagSz + curveSz, seqArray);
- case CTC_SHA512wECDSA:
- algoSz = sizeof(sha512wECDSA_AlgoID);
- algoName = sha512wECDSA_AlgoID;
- break;
- #endif /* HAVE_ECC */
- default:
- WOLFSSL_MSG("Unknown Signature Algo");
- return 0;
- }
- }
- else if (type == keyType) { /* keyType */
- switch (algoOID) {
- #ifndef NO_RSA
- case RSAk:
- algoSz = sizeof(RSA_AlgoID);
- algoName = RSA_AlgoID;
- break;
- #endif /* NO_RSA */
- #ifdef HAVE_ECC
- case ECDSAk:
- algoSz = sizeof(ECC_AlgoID);
- algoName = ECC_AlgoID;
- tagSz = 0;
- break;
- #endif /* HAVE_ECC */
- default:
- WOLFSSL_MSG("Unknown Key Algo");
- return 0;
- }
+ /* Copy only algo to output for DSA keys */
+ if (algoOID == DSAk && output) {
+ XMEMCPY(output, ID_Length, idSz);
+ XMEMCPY(output + idSz, algoName, algoSz);
+ if (tagSz == 2)
+ SetASNNull(&output[seqSz + idSz + algoSz]);
}
- else {
- WOLFSSL_MSG("Unknown Algo type");
- return 0;
+ else if (output) {
+ XMEMCPY(output, seqArray, seqSz);
+ XMEMCPY(output + seqSz, ID_Length, idSz);
+ XMEMCPY(output + seqSz + idSz, algoName, algoSz);
+ if (tagSz == 2)
+ SetASNNull(&output[seqSz + idSz + algoSz]);
}
- idSz = SetLength(algoSz - tagSz, ID_Length); /* don't include tags */
- seqSz = SetSequence(idSz + algoSz + 1 + curveSz, seqArray);
- /* +1 for object id, curveID of curveSz follows for ecc */
- seqArray[seqSz++] = ASN_OBJECT_ID;
-
- XMEMCPY(output, seqArray, seqSz);
- XMEMCPY(output + seqSz, ID_Length, idSz);
- XMEMCPY(output + seqSz + idSz, algoName, algoSz);
-
- return seqSz + idSz + algoSz;
+ if (algoOID == DSAk)
+ length = idSz + algoSz + tagSz;
+ else
+ length = seqSz + idSz + algoSz + tagSz;
+ return length;
}
@@ -2950,7 +6858,7 @@ word32 wc_EncodeSignature(byte* out, const byte* digest, word32 digSz,
word32 encDigSz, algoSz, seqSz;
encDigSz = SetDigest(digest, digSz, digArray);
- algoSz = SetAlgoID(hashOID, algoArray, hashType, 0);
+ algoSz = SetAlgoID(hashOID, algoArray, oidHashType, 0);
seqSz = SetSequence(encDigSz + algoSz, seqArray);
XMEMCPY(out, seqArray, seqSz);
@@ -2961,288 +6869,569 @@ word32 wc_EncodeSignature(byte* out, const byte* digest, word32 digSz,
}
+#ifndef NO_CERTS
+
int wc_GetCTC_HashOID(int type)
{
- switch (type) {
-#ifdef WOLFSSL_MD2
- case MD2:
- return MD2h;
-#endif
-#ifndef NO_MD5
- case MD5:
- return MD5h;
-#endif
-#ifndef NO_SHA
- case SHA:
- return SHAh;
-#endif
-#ifndef NO_SHA256
- case SHA256:
- return SHA256h;
-#endif
-#ifdef WOLFSSL_SHA384
- case SHA384:
- return SHA384h;
-#endif
-#ifdef WOLFSSL_SHA512
- case SHA512:
- return SHA512h;
-#endif
- default:
- return 0;
- };
+ int ret;
+ enum wc_HashType hType;
+
+ hType = wc_HashTypeConvert(type);
+ ret = wc_HashGetOID(hType);
+ if (ret < 0)
+ ret = 0; /* backwards compatibility */
+
+ return ret;
}
+void InitSignatureCtx(SignatureCtx* sigCtx, void* heap, int devId)
+{
+ if (sigCtx) {
+ XMEMSET(sigCtx, 0, sizeof(SignatureCtx));
+ sigCtx->devId = devId;
+ sigCtx->heap = heap;
+ }
+}
-/* return true (1) or false (0) for Confirmation */
-static int ConfirmSignature(const byte* buf, word32 bufSz,
- const byte* key, word32 keySz, word32 keyOID,
- const byte* sig, word32 sigSz, word32 sigOID,
- void* heap)
+void FreeSignatureCtx(SignatureCtx* sigCtx)
{
- int typeH = 0, digestSz = 0, ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
- byte* digest;
-#else
- byte digest[MAX_DIGEST_SIZE];
-#endif
+ if (sigCtx == NULL)
+ return;
-#ifdef WOLFSSL_SMALL_STACK
- digest = (byte*)XMALLOC(MAX_DIGEST_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (digest == NULL)
- return 0; /* not confirmed */
+ if (sigCtx->digest) {
+ XFREE(sigCtx->digest, sigCtx->heap, DYNAMIC_TYPE_DIGEST);
+ sigCtx->digest = NULL;
+ }
+#ifndef NO_RSA
+ if (sigCtx->plain) {
+ XFREE(sigCtx->plain, sigCtx->heap, DYNAMIC_TYPE_SIGNATURE);
+ sigCtx->plain = NULL;
+ }
+#endif
+#ifndef NO_ASN_CRYPT
+ if (sigCtx->key.ptr) {
+ switch (sigCtx->keyOID) {
+ #ifndef NO_RSA
+ case RSAk:
+ wc_FreeRsaKey(sigCtx->key.rsa);
+ XFREE(sigCtx->key.ptr, sigCtx->heap, DYNAMIC_TYPE_RSA);
+ break;
+ #endif /* !NO_RSA */
+ #ifdef HAVE_ECC
+ case ECDSAk:
+ wc_ecc_free(sigCtx->key.ecc);
+ XFREE(sigCtx->key.ecc, sigCtx->heap, DYNAMIC_TYPE_ECC);
+ break;
+ #endif /* HAVE_ECC */
+ #ifdef HAVE_ED25519
+ case ED25519k:
+ wc_ed25519_free(sigCtx->key.ed25519);
+ XFREE(sigCtx->key.ed25519, sigCtx->heap, DYNAMIC_TYPE_ED25519);
+ break;
+ #endif /* HAVE_ED25519 */
+ #ifdef HAVE_ED448
+ case ED448k:
+ wc_ed448_free(sigCtx->key.ed448);
+ XFREE(sigCtx->key.ed448, sigCtx->heap, DYNAMIC_TYPE_ED448);
+ break;
+ #endif /* HAVE_ED448 */
+ default:
+ break;
+ } /* switch (keyOID) */
+ sigCtx->key.ptr = NULL;
+ }
#endif
- (void)key;
- (void)keySz;
- (void)sig;
- (void)sigSz;
- (void)heap;
+ /* reset state, we are done */
+ sigCtx->state = SIG_STATE_BEGIN;
+}
+
+#ifndef NO_ASN_CRYPT
+static int HashForSignature(const byte* buf, word32 bufSz, word32 sigOID,
+ byte* digest, int* typeH, int* digestSz, int verify)
+{
+ int ret = 0;
+
+ (void)verify;
switch (sigOID) {
- #ifndef NO_MD5
- case CTC_MD5wRSA:
- if (wc_Md5Hash(buf, bufSz, digest) == 0) {
- typeH = MD5h;
- digestSz = MD5_DIGEST_SIZE;
- }
- break;
- #endif
#if defined(WOLFSSL_MD2)
case CTC_MD2wRSA:
- if (wc_Md2Hash(buf, bufSz, digest) == 0) {
- typeH = MD2h;
- digestSz = MD2_DIGEST_SIZE;
- }
+ if (!verify) {
+ ret = HASH_TYPE_E;
+ WOLFSSL_MSG("MD2 not supported for signing");
+ }
+ else if ((ret = wc_Md2Hash(buf, bufSz, digest)) == 0) {
+ *typeH = MD2h;
+ *digestSz = MD2_DIGEST_SIZE;
+ }
break;
#endif
+ #ifndef NO_MD5
+ case CTC_MD5wRSA:
+ if ((ret = wc_Md5Hash(buf, bufSz, digest)) == 0) {
+ *typeH = MD5h;
+ *digestSz = WC_MD5_DIGEST_SIZE;
+ }
+ break;
+ #endif
#ifndef NO_SHA
case CTC_SHAwRSA:
case CTC_SHAwDSA:
case CTC_SHAwECDSA:
- if (wc_ShaHash(buf, bufSz, digest) == 0) {
- typeH = SHAh;
- digestSz = SHA_DIGEST_SIZE;
- }
- break;
+ if ((ret = wc_ShaHash(buf, bufSz, digest)) == 0) {
+ *typeH = SHAh;
+ *digestSz = WC_SHA_DIGEST_SIZE;
+ }
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA224
+ case CTC_SHA224wRSA:
+ case CTC_SHA224wECDSA:
+ if ((ret = wc_Sha224Hash(buf, bufSz, digest)) == 0) {
+ *typeH = SHA224h;
+ *digestSz = WC_SHA224_DIGEST_SIZE;
+ }
+ break;
#endif
#ifndef NO_SHA256
case CTC_SHA256wRSA:
case CTC_SHA256wECDSA:
- if (wc_Sha256Hash(buf, bufSz, digest) == 0) {
- typeH = SHA256h;
- digestSz = SHA256_DIGEST_SIZE;
- }
- break;
+ if ((ret = wc_Sha256Hash(buf, bufSz, digest)) == 0) {
+ *typeH = SHA256h;
+ *digestSz = WC_SHA256_DIGEST_SIZE;
+ }
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA384
+ case CTC_SHA384wRSA:
+ case CTC_SHA384wECDSA:
+ if ((ret = wc_Sha384Hash(buf, bufSz, digest)) == 0) {
+ *typeH = SHA384h;
+ *digestSz = WC_SHA384_DIGEST_SIZE;
+ }
+ break;
#endif
#ifdef WOLFSSL_SHA512
case CTC_SHA512wRSA:
case CTC_SHA512wECDSA:
- if (wc_Sha512Hash(buf, bufSz, digest) == 0) {
- typeH = SHA512h;
- digestSz = SHA512_DIGEST_SIZE;
- }
- break;
+ if ((ret = wc_Sha512Hash(buf, bufSz, digest)) == 0) {
+ *typeH = SHA512h;
+ *digestSz = WC_SHA512_DIGEST_SIZE;
+ }
+ break;
#endif
- #ifdef WOLFSSL_SHA384
- case CTC_SHA384wRSA:
- case CTC_SHA384wECDSA:
- if (wc_Sha384Hash(buf, bufSz, digest) == 0) {
- typeH = SHA384h;
- digestSz = SHA384_DIGEST_SIZE;
- }
- break;
+ #ifdef HAVE_ED25519
+ case CTC_ED25519:
+ /* Hashes done in signing operation.
+ * Two dependent hashes with prefixes performed.
+ */
+ break;
+ #endif
+ #ifdef HAVE_ED448
+ case CTC_ED448:
+ /* Hashes done in signing operation.
+ * Two dependent hashes with prefixes performed.
+ */
+ break;
#endif
default:
- WOLFSSL_MSG("Verify Signautre has unsupported type");
+ ret = HASH_TYPE_E;
+ WOLFSSL_MSG("Hash for Signature has unsupported type");
}
-
- if (typeH == 0) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(digest, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+ return ret;
+}
+#endif /* !NO_ASN_CRYPT */
+
+/* Return codes: 0=Success, Negative (see error-crypt.h), ASN_SIG_CONFIRM_E */
+static int ConfirmSignature(SignatureCtx* sigCtx,
+ const byte* buf, word32 bufSz,
+ const byte* key, word32 keySz, word32 keyOID,
+ const byte* sig, word32 sigSz, word32 sigOID, byte* rsaKeyIdx)
+{
+ int ret = 0;
+#ifndef WOLFSSL_RENESAS_TSIP_TLS
+ (void)rsaKeyIdx;
#endif
- return 0; /* not confirmed */
+ if (sigCtx == NULL || buf == NULL || bufSz == 0 || key == NULL ||
+ keySz == 0 || sig == NULL || sigSz == 0) {
+ return BAD_FUNC_ARG;
}
- switch (keyOID) {
- #ifndef NO_RSA
- case RSAk:
+ (void)key;
+ (void)keySz;
+ (void)sig;
+ (void)sigSz;
+
+ WOLFSSL_ENTER("ConfirmSignature");
+
+#ifndef NO_ASN_CRYPT
+ switch (sigCtx->state) {
+ case SIG_STATE_BEGIN:
{
- word32 idx = 0;
- int encodedSigSz, verifySz;
- byte* out;
-#ifdef WOLFSSL_SMALL_STACK
- RsaKey* pubKey;
- byte* plain;
- byte* encodedSig;
-#else
- RsaKey pubKey[1];
- byte plain[MAX_ENCODED_SIG_SZ];
- byte encodedSig[MAX_ENCODED_SIG_SZ];
-#endif
+ sigCtx->keyOID = keyOID; /* must set early for cleanup */
-#ifdef WOLFSSL_SMALL_STACK
- pubKey = (RsaKey*)XMALLOC(sizeof(RsaKey), NULL,
- DYNAMIC_TYPE_TMP_BUFFER);
- plain = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ, NULL,
- DYNAMIC_TYPE_TMP_BUFFER);
- encodedSig = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ, NULL,
- DYNAMIC_TYPE_TMP_BUFFER);
-
- if (pubKey == NULL || plain == NULL || encodedSig == NULL) {
- WOLFSSL_MSG("Failed to allocate memory at ConfirmSignature");
-
- if (pubKey)
- XFREE(pubKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (plain)
- XFREE(plain, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (encodedSig)
- XFREE(encodedSig, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-
- break; /* not confirmed */
+ sigCtx->digest = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, sigCtx->heap,
+ DYNAMIC_TYPE_DIGEST);
+ if (sigCtx->digest == NULL) {
+ ERROR_OUT(MEMORY_E, exit_cs);
}
-#endif
- if (sigSz > MAX_ENCODED_SIG_SZ) {
- WOLFSSL_MSG("Verify Signautre is too big");
+ sigCtx->state = SIG_STATE_HASH;
+ } /* SIG_STATE_BEGIN */
+ FALL_THROUGH;
+
+ case SIG_STATE_HASH:
+ {
+ ret = HashForSignature(buf, bufSz, sigOID, sigCtx->digest,
+ &sigCtx->typeH, &sigCtx->digestSz, 1);
+ if (ret != 0) {
+ goto exit_cs;
}
- else if (wc_InitRsaKey(pubKey, heap) != 0) {
- WOLFSSL_MSG("InitRsaKey failed");
+
+ sigCtx->state = SIG_STATE_KEY;
+ } /* SIG_STATE_HASH */
+ FALL_THROUGH;
+
+ case SIG_STATE_KEY:
+ {
+ switch (keyOID) {
+ #ifndef NO_RSA
+ case RSAk:
+ {
+ word32 idx = 0;
+
+ sigCtx->key.rsa = (RsaKey*)XMALLOC(sizeof(RsaKey),
+ sigCtx->heap, DYNAMIC_TYPE_RSA);
+ sigCtx->plain = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ,
+ sigCtx->heap, DYNAMIC_TYPE_SIGNATURE);
+ if (sigCtx->key.rsa == NULL || sigCtx->plain == NULL) {
+ ERROR_OUT(MEMORY_E, exit_cs);
+ }
+ if ((ret = wc_InitRsaKey_ex(sigCtx->key.rsa, sigCtx->heap,
+ sigCtx->devId)) != 0) {
+ goto exit_cs;
+ }
+ if (sigSz > MAX_ENCODED_SIG_SZ) {
+ WOLFSSL_MSG("Verify Signature is too big");
+ ERROR_OUT(BUFFER_E, exit_cs);
+ }
+ if ((ret = wc_RsaPublicKeyDecode(key, &idx, sigCtx->key.rsa,
+ keySz)) != 0) {
+ WOLFSSL_MSG("ASN Key decode error RSA");
+ goto exit_cs;
+ }
+ XMEMCPY(sigCtx->plain, sig, sigSz);
+ sigCtx->out = NULL;
+
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ sigCtx->asyncDev = &sigCtx->key.rsa->asyncDev;
+ #endif
+ break;
+ }
+ #endif /* !NO_RSA */
+ #ifdef HAVE_ECC
+ case ECDSAk:
+ {
+ word32 idx = 0;
+
+ sigCtx->verify = 0;
+ sigCtx->key.ecc = (ecc_key*)XMALLOC(sizeof(ecc_key),
+ sigCtx->heap, DYNAMIC_TYPE_ECC);
+ if (sigCtx->key.ecc == NULL) {
+ ERROR_OUT(MEMORY_E, exit_cs);
+ }
+ if ((ret = wc_ecc_init_ex(sigCtx->key.ecc, sigCtx->heap,
+ sigCtx->devId)) < 0) {
+ goto exit_cs;
+ }
+ ret = wc_EccPublicKeyDecode(key, &idx, sigCtx->key.ecc,
+ keySz);
+ if (ret < 0) {
+ WOLFSSL_MSG("ASN Key import error ECC");
+ goto exit_cs;
+ }
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ sigCtx->asyncDev = &sigCtx->key.ecc->asyncDev;
+ #endif
+ break;
+ }
+ #endif /* HAVE_ECC */
+ #ifdef HAVE_ED25519
+ case ED25519k:
+ {
+ sigCtx->verify = 0;
+ sigCtx->key.ed25519 = (ed25519_key*)XMALLOC(
+ sizeof(ed25519_key), sigCtx->heap,
+ DYNAMIC_TYPE_ED25519);
+ if (sigCtx->key.ed25519 == NULL) {
+ ERROR_OUT(MEMORY_E, exit_cs);
+ }
+ if ((ret = wc_ed25519_init(sigCtx->key.ed25519)) < 0) {
+ goto exit_cs;
+ }
+ if ((ret = wc_ed25519_import_public(key, keySz,
+ sigCtx->key.ed25519)) < 0) {
+ WOLFSSL_MSG("ASN Key import error ED25519");
+ goto exit_cs;
+ }
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ sigCtx->asyncDev = &sigCtx->key.ed25519->asyncDev;
+ #endif
+ break;
+ }
+ #endif
+ #ifdef HAVE_ED448
+ case ED448k:
+ {
+ sigCtx->verify = 0;
+ sigCtx->key.ed448 = (ed448_key*)XMALLOC(
+ sizeof(ed448_key), sigCtx->heap,
+ DYNAMIC_TYPE_ED448);
+ if (sigCtx->key.ed448 == NULL) {
+ ERROR_OUT(MEMORY_E, exit_cs);
+ }
+ if ((ret = wc_ed448_init(sigCtx->key.ed448)) < 0) {
+ goto exit_cs;
+ }
+ if ((ret = wc_ed448_import_public(key, keySz,
+ sigCtx->key.ed448)) < 0) {
+ WOLFSSL_MSG("ASN Key import error ED448");
+ goto exit_cs;
+ }
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ sigCtx->asyncDev = &sigCtx->key.ed448->asyncDev;
+ #endif
+ break;
+ }
+ #endif
+ default:
+ WOLFSSL_MSG("Verify Key type unknown");
+ ret = ASN_UNKNOWN_OID_E;
+ break;
+ } /* switch (keyOID) */
+
+ if (ret != 0) {
+ goto exit_cs;
}
- else if (wc_RsaPublicKeyDecode(key, &idx, pubKey, keySz) < 0) {
- WOLFSSL_MSG("ASN Key decode error RSA");
+
+ sigCtx->state = SIG_STATE_DO;
+
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ if (sigCtx->devId != INVALID_DEVID && sigCtx->asyncDev && sigCtx->asyncCtx) {
+ /* make sure event is initialized */
+ WOLF_EVENT* event = &sigCtx->asyncDev->event;
+ ret = wolfAsync_EventInit(event, WOLF_EVENT_TYPE_ASYNC_WOLFSSL,
+ sigCtx->asyncCtx, WC_ASYNC_FLAG_CALL_AGAIN);
}
- else {
- XMEMCPY(plain, sig, sigSz);
+ #endif
+ } /* SIG_STATE_KEY */
+ FALL_THROUGH;
- if ((verifySz = wc_RsaSSL_VerifyInline(plain, sigSz, &out,
- pubKey)) < 0) {
- WOLFSSL_MSG("Rsa SSL verify error");
+ case SIG_STATE_DO:
+ {
+ switch (keyOID) {
+ #ifndef NO_RSA
+ case RSAk:
+ {
+ #ifdef HAVE_PK_CALLBACKS
+ if (sigCtx->pkCbRsa) {
+ ret = sigCtx->pkCbRsa(
+ sigCtx->plain, sigSz, &sigCtx->out,
+ key, keySz,
+ sigCtx->pkCtxRsa);
+ }
+ else
+ #endif /* HAVE_PK_CALLBACKS */
+ {
+ #ifdef WOLFSSL_RENESAS_TSIP_TLS
+ if (rsaKeyIdx != NULL)
+ {
+ ret = tsip_tls_CertVerify(buf, bufSz, sigCtx->plain,
+ sigSz,
+ sigCtx->pubkey_n_start - sigCtx->certBegin,
+ sigCtx->pubkey_n_len - 1,
+ sigCtx->pubkey_e_start - sigCtx->certBegin,
+ sigCtx->pubkey_e_len - 1,
+ rsaKeyIdx);
+
+ if (ret == 0){
+ sigCtx->verifyByTSIP = 1;
+ ret = 0;
+ } else {
+ WOLFSSL_MSG("RSA Verify by tsip didn't match");
+ ret = ASN_SIG_CONFIRM_E;
+ }
+ } else
+ #endif
+ ret = wc_RsaSSL_VerifyInline(sigCtx->plain, sigSz,
+ &sigCtx->out, sigCtx->key.rsa);
+ }
+ break;
}
- else {
- /* make sure we're right justified */
- encodedSigSz =
- wc_EncodeSignature(encodedSig, digest, digestSz, typeH);
- if (encodedSigSz != verifySz ||
- XMEMCMP(out, encodedSig, encodedSigSz) != 0) {
- WOLFSSL_MSG("Rsa SSL verify match encode error");
+ #endif /* !NO_RSA */
+ #if defined(HAVE_ECC)
+ case ECDSAk:
+ {
+ #ifdef HAVE_PK_CALLBACKS
+ if (sigCtx->pkCbEcc) {
+ ret = sigCtx->pkCbEcc(
+ sig, sigSz,
+ sigCtx->digest, sigCtx->digestSz,
+ key, keySz, &sigCtx->verify,
+ sigCtx->pkCtxEcc);
}
else
- ret = 1; /* match */
-
- #ifdef WOLFSSL_DEBUG_ENCODING
+ #endif /* HAVE_PK_CALLBACKS */
{
- int x;
-
- printf("wolfssl encodedSig:\n");
+ ret = wc_ecc_verify_hash(sig, sigSz, sigCtx->digest,
+ sigCtx->digestSz, &sigCtx->verify,
+ sigCtx->key.ecc);
+ }
+ break;
+ }
+ #endif /* HAVE_ECC */
+ #ifdef HAVE_ED25519
+ case ED25519k:
+ {
+ ret = wc_ed25519_verify_msg(sig, sigSz, buf, bufSz,
+ &sigCtx->verify, sigCtx->key.ed25519);
+ break;
+ }
+ #endif
+ #ifdef HAVE_ED448
+ case ED448k:
+ {
+ ret = wc_ed448_verify_msg(sig, sigSz, buf, bufSz,
+ &sigCtx->verify, sigCtx->key.ed448,
+ NULL, 0);
+ break;
+ }
+ #endif
+ default:
+ break;
+ } /* switch (keyOID) */
- for (x = 0; x < encodedSigSz; x++) {
- printf("%02x ", encodedSig[x]);
- if ( (x % 16) == 15)
- printf("\n");
- }
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ if (ret == WC_PENDING_E) {
+ goto exit_cs;
+ }
+ #endif
- printf("\n");
- printf("actual digest:\n");
+ if (ret < 0) {
+ /* treat all RSA errors as ASN_SIG_CONFIRM_E */
+ ret = ASN_SIG_CONFIRM_E;
+ goto exit_cs;
+ }
- for (x = 0; x < verifySz; x++) {
- printf("%02x ", out[x]);
- if ( (x % 16) == 15)
- printf("\n");
- }
+ sigCtx->state = SIG_STATE_CHECK;
+ } /* SIG_STATE_DO */
+ FALL_THROUGH;
- printf("\n");
+ case SIG_STATE_CHECK:
+ {
+ switch (keyOID) {
+ #ifndef NO_RSA
+ case RSAk:
+ {
+ int encodedSigSz, verifySz;
+ #ifdef WOLFSSL_RENESAS_TSIP
+ if (sigCtx->verifyByTSIP == 1) break;
+ #endif
+ #ifdef WOLFSSL_SMALL_STACK
+ byte* encodedSig = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ,
+ sigCtx->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (encodedSig == NULL) {
+ ERROR_OUT(MEMORY_E, exit_cs);
}
- #endif /* WOLFSSL_DEBUG_ENCODING */
+ #else
+ byte encodedSig[MAX_ENCODED_SIG_SZ];
+ #endif
- }
+ verifySz = ret;
- }
+ /* make sure we're right justified */
+ encodedSigSz = wc_EncodeSignature(encodedSig,
+ sigCtx->digest, sigCtx->digestSz, sigCtx->typeH);
+ if (encodedSigSz == verifySz && sigCtx->out != NULL &&
+ XMEMCMP(sigCtx->out, encodedSig, encodedSigSz) == 0) {
+ ret = 0;
+ }
+ else {
+ WOLFSSL_MSG("RSA SSL verify match encode error");
+ ret = ASN_SIG_CONFIRM_E;
+ }
- wc_FreeRsaKey(pubKey);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(encodedSig, sigCtx->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ break;
+ }
+ #endif /* NO_RSA */
+ #ifdef HAVE_ECC
+ case ECDSAk:
+ {
+ if (sigCtx->verify == 1) {
+ ret = 0;
+ }
+ else {
+ WOLFSSL_MSG("ECC Verify didn't match");
+ ret = ASN_SIG_CONFIRM_E;
+ }
+ break;
+ }
+ #endif /* HAVE_ECC */
+ #ifdef HAVE_ED25519
+ case ED25519k:
+ {
+ if (sigCtx->verify == 1) {
+ ret = 0;
+ }
+ else {
+ WOLFSSL_MSG("ED25519 Verify didn't match");
+ ret = ASN_SIG_CONFIRM_E;
+ }
+ break;
+ }
+ #endif /* HAVE_ED25519 */
+ #ifdef HAVE_ED448
+ case ED448k:
+ {
+ if (sigCtx->verify == 1) {
+ ret = 0;
+ }
+ else {
+ WOLFSSL_MSG("ED448 Verify didn't match");
+ ret = ASN_SIG_CONFIRM_E;
+ }
+ break;
+ }
+ #endif /* HAVE_ED448 */
+ default:
+ break;
+ } /* switch (keyOID) */
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(pubKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(plain, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(encodedSig, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
break;
- }
+ } /* SIG_STATE_CHECK */
+ } /* switch (sigCtx->state) */
- #endif /* NO_RSA */
- #ifdef HAVE_ECC
- case ECDSAk:
- {
- int verify = 0;
-#ifdef WOLFSSL_SMALL_STACK
- ecc_key* pubKey;
-#else
- ecc_key pubKey[1];
-#endif
+exit_cs:
-#ifdef WOLFSSL_SMALL_STACK
- pubKey = (ecc_key*)XMALLOC(sizeof(ecc_key), NULL,
- DYNAMIC_TYPE_TMP_BUFFER);
- if (pubKey == NULL) {
- WOLFSSL_MSG("Failed to allocate pubKey");
- break; /* not confirmed */
- }
-#endif
+#endif /* !NO_ASN_CRYPT */
- if (wc_ecc_init(pubKey) < 0) {
- WOLFSSL_MSG("Failed to initialize key");
- break; /* not confirmed */
- }
- if (wc_ecc_import_x963(key, keySz, pubKey) < 0) {
- WOLFSSL_MSG("ASN Key import error ECC");
- }
- else {
- if (wc_ecc_verify_hash(sig, sigSz, digest, digestSz, &verify,
- pubKey) != 0) {
- WOLFSSL_MSG("ECC verify hash error");
- }
- else if (1 != verify) {
- WOLFSSL_MSG("ECC Verify didn't match");
- } else
- ret = 1; /* match */
+ (void)keyOID;
+ (void)sigOID;
- }
- wc_ecc_free(pubKey);
+ WOLFSSL_LEAVE("ConfirmSignature", ret);
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(pubKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
- break;
- }
- #endif /* HAVE_ECC */
- default:
- WOLFSSL_MSG("Verify Key type unknown");
- }
-
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(digest, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#ifdef WOLFSSL_ASYNC_CRYPT
+ if (ret == WC_PENDING_E)
+ return ret;
#endif
+ FreeSignatureCtx(sigCtx);
+
return ret;
}
@@ -3250,7 +7439,7 @@ static int ConfirmSignature(const byte* buf, word32 bufSz,
#ifndef IGNORE_NAME_CONSTRAINTS
static int MatchBaseName(int type, const char* name, int nameSz,
- const char* base, int baseSz)
+ const char* base, int baseSz)
{
if (base == NULL || baseSz <= 0 || name == NULL || nameSz <= 0 ||
name[0] == '.' || nameSz < baseSz ||
@@ -3302,7 +7491,7 @@ static int MatchBaseName(int type, const char* name, int nameSz,
}
while (nameSz > 0) {
- if (XTOLOWER((unsigned char)*name++) !=
+ if (XTOLOWER((unsigned char)*name++) !=
XTOLOWER((unsigned char)*base++))
return 0;
nameSz--;
@@ -3322,34 +7511,44 @@ static int ConfirmNameConstraints(Signer* signer, DecodedCert* cert)
Base_entry* base = signer->excludedNames;
while (base != NULL) {
- if (base->type == ASN_DNS_TYPE) {
- DNS_entry* name = cert->altNames;
- while (name != NULL) {
- if (MatchBaseName(ASN_DNS_TYPE,
- name->name, (int)XSTRLEN(name->name),
- base->name, base->nameSz))
- return 0;
- name = name->next;
+ switch (base->type) {
+ case ASN_DNS_TYPE:
+ {
+ DNS_entry* name = cert->altNames;
+ while (name != NULL) {
+ if (MatchBaseName(ASN_DNS_TYPE,
+ name->name, name->len,
+ base->name, base->nameSz)) {
+ return 0;
+ }
+ name = name->next;
+ }
+ break;
}
- }
- else if (base->type == ASN_RFC822_TYPE) {
- DNS_entry* name = cert->altEmailNames;
- while (name != NULL) {
- if (MatchBaseName(ASN_RFC822_TYPE,
- name->name, (int)XSTRLEN(name->name),
- base->name, base->nameSz))
- return 0;
-
- name = name->next;
+ case ASN_RFC822_TYPE:
+ {
+ DNS_entry* name = cert->altEmailNames;
+ while (name != NULL) {
+ if (MatchBaseName(ASN_RFC822_TYPE,
+ name->name, name->len,
+ base->name, base->nameSz)) {
+ return 0;
+ }
+ name = name->next;
+ }
+ break;
}
- }
- else if (base->type == ASN_DIR_TYPE) {
- if (cert->subjectRawLen == base->nameSz &&
- XMEMCMP(cert->subjectRaw, base->name, base->nameSz) == 0) {
-
- return 0;
+ case ASN_DIR_TYPE:
+ {
+ /* allow permitted dirName smaller than actual subject */
+ if (cert->subjectRawLen >= base->nameSz &&
+ XMEMCMP(cert->subjectRaw, base->name,
+ base->nameSz) == 0) {
+ return 0;
+ }
+ break;
}
- }
+ }; /* switch */
base = base->next;
}
}
@@ -3365,47 +7564,56 @@ static int ConfirmNameConstraints(Signer* signer, DecodedCert* cert)
Base_entry* base = signer->permittedNames;
while (base != NULL) {
- if (base->type == ASN_DNS_TYPE) {
- DNS_entry* name = cert->altNames;
+ switch (base->type) {
+ case ASN_DNS_TYPE:
+ {
+ DNS_entry* name = cert->altNames;
- if (name != NULL)
- needDns = 1;
+ if (name != NULL)
+ needDns = 1;
- while (name != NULL) {
- matchDns = MatchBaseName(ASN_DNS_TYPE,
- name->name, (int)XSTRLEN(name->name),
+ while (name != NULL) {
+ matchDns = MatchBaseName(ASN_DNS_TYPE,
+ name->name, name->len,
base->name, base->nameSz);
- name = name->next;
+ name = name->next;
+ }
+ break;
}
- }
- else if (base->type == ASN_RFC822_TYPE) {
- DNS_entry* name = cert->altEmailNames;
+ case ASN_RFC822_TYPE:
+ {
+ DNS_entry* name = cert->altEmailNames;
- if (name != NULL)
- needEmail = 1;
+ if (name != NULL)
+ needEmail = 1;
- while (name != NULL) {
- matchEmail = MatchBaseName(ASN_DNS_TYPE,
- name->name, (int)XSTRLEN(name->name),
+ while (name != NULL) {
+ matchEmail = MatchBaseName(ASN_DNS_TYPE,
+ name->name, name->len,
base->name, base->nameSz);
- name = name->next;
+ name = name->next;
+ }
+ break;
}
- }
- else if (base->type == ASN_DIR_TYPE) {
- needDir = 1;
- if (cert->subjectRaw != NULL &&
- cert->subjectRawLen == base->nameSz &&
- XMEMCMP(cert->subjectRaw, base->name, base->nameSz) == 0) {
-
- matchDir = 1;
+ case ASN_DIR_TYPE:
+ {
+ /* allow permitted dirName smaller than actual subject */
+ needDir = 1;
+ if (cert->subjectRaw != NULL &&
+ cert->subjectRawLen >= base->nameSz &&
+ XMEMCMP(cert->subjectRaw, base->name,
+ base->nameSz) == 0) {
+ matchDir = 1;
+ }
+ break;
}
- }
+ } /* switch */
base = base->next;
}
- if ((needDns && !matchDns) || (needEmail && !matchEmail) ||
- (needDir && !matchDir)) {
-
+ if ((needDns && !matchDns) ||
+ (needEmail && !matchEmail) ||
+ (needDir && !matchDir)) {
return 0;
}
}
@@ -3415,8 +7623,7 @@ static int ConfirmNameConstraints(Signer* signer, DecodedCert* cert)
#endif /* IGNORE_NAME_CONSTRAINTS */
-
-static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
+static int DecodeAltNames(const byte* input, int sz, DecodedCert* cert)
{
word32 idx = 0;
int length = 0;
@@ -3428,10 +7635,17 @@ static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
return ASN_PARSE_E;
}
+ if (length == 0) {
+ /* RFC 5280 4.2.1.6. Subject Alternative Name
+ If the subjectAltName extension is present, the sequence MUST
+ contain at least one entry. */
+ return ASN_PARSE_E;
+ }
+
cert->weOwnAltNames = 1;
while (length > 0) {
- byte b = input[idx++];
+ byte b = input[idx++];
length--;
@@ -3452,17 +7666,18 @@ static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
DYNAMIC_TYPE_ALTNAME);
if (dnsEntry == NULL) {
WOLFSSL_MSG("\tOut of Memory");
- return ASN_PARSE_E;
+ return MEMORY_E;
}
+ dnsEntry->type = ASN_DNS_TYPE;
dnsEntry->name = (char*)XMALLOC(strLen + 1, cert->heap,
DYNAMIC_TYPE_ALTNAME);
if (dnsEntry->name == NULL) {
WOLFSSL_MSG("\tOut of Memory");
XFREE(dnsEntry, cert->heap, DYNAMIC_TYPE_ALTNAME);
- return ASN_PARSE_E;
+ return MEMORY_E;
}
-
+ dnsEntry->len = strLen;
XMEMCPY(dnsEntry->name, &input[idx], strLen);
dnsEntry->name[strLen] = '\0';
@@ -3472,7 +7687,7 @@ static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
length -= strLen;
idx += strLen;
}
-#ifndef IGNORE_NAME_CONSTRAINTS
+ #ifndef IGNORE_NAME_CONSTRAINTS
else if (b == (ASN_CONTEXT_SPECIFIC | ASN_RFC822_TYPE)) {
DNS_entry* emailEntry;
int strLen;
@@ -3488,17 +7703,18 @@ static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
DYNAMIC_TYPE_ALTNAME);
if (emailEntry == NULL) {
WOLFSSL_MSG("\tOut of Memory");
- return ASN_PARSE_E;
+ return MEMORY_E;
}
+ emailEntry->type = ASN_RFC822_TYPE;
emailEntry->name = (char*)XMALLOC(strLen + 1, cert->heap,
DYNAMIC_TYPE_ALTNAME);
if (emailEntry->name == NULL) {
WOLFSSL_MSG("\tOut of Memory");
XFREE(emailEntry, cert->heap, DYNAMIC_TYPE_ALTNAME);
- return ASN_PARSE_E;
+ return MEMORY_E;
}
-
+ emailEntry->len = strLen;
XMEMCPY(emailEntry->name, &input[idx], strLen);
emailEntry->name[strLen] = '\0';
@@ -3508,6 +7724,120 @@ static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
length -= strLen;
idx += strLen;
}
+ else if (b == (ASN_CONTEXT_SPECIFIC | ASN_URI_TYPE)) {
+ DNS_entry* uriEntry;
+ int strLen;
+ word32 lenStartIdx = idx;
+
+ WOLFSSL_MSG("\tPutting URI into list but not using");
+ if (GetLength(input, &idx, &strLen, sz) < 0) {
+ WOLFSSL_MSG("\tfail: str length");
+ return ASN_PARSE_E;
+ }
+ length -= (idx - lenStartIdx);
+
+ /* check that strLen at index is not past input buffer */
+ if (strLen + (int)idx > sz) {
+ return BUFFER_E;
+ }
+
+ #ifndef WOLFSSL_NO_ASN_STRICT
+ /* Verify RFC 5280 Sec 4.2.1.6 rule:
+ "The name MUST NOT be a relative URI" */
+
+ {
+ int i;
+
+ /* skip past scheme (i.e http,ftp,...) finding first ':' char */
+ for (i = 0; i < strLen; i++) {
+ if (input[idx + i] == ':') {
+ break;
+ }
+ if (input[idx + i] == '/') {
+ i = strLen; /* error, found relative path since '/' was
+ * encountered before ':'. Returning error
+ * value in next if statement. */
+ }
+ }
+
+ /* test if no ':' char was found and test that the next two
+ * chars are // to match the pattern "://" */
+ if (i >= strLen - 2 || (input[idx + i + 1] != '/' ||
+ input[idx + i + 2] != '/')) {
+ WOLFSSL_MSG("\tAlt Name must be absolute URI");
+ return ASN_ALT_NAME_E;
+ }
+ }
+ #endif
+
+ uriEntry = (DNS_entry*)XMALLOC(sizeof(DNS_entry), cert->heap,
+ DYNAMIC_TYPE_ALTNAME);
+ if (uriEntry == NULL) {
+ WOLFSSL_MSG("\tOut of Memory");
+ return MEMORY_E;
+ }
+
+ uriEntry->type = ASN_URI_TYPE;
+ uriEntry->name = (char*)XMALLOC(strLen + 1, cert->heap,
+ DYNAMIC_TYPE_ALTNAME);
+ if (uriEntry->name == NULL) {
+ WOLFSSL_MSG("\tOut of Memory");
+ XFREE(uriEntry, cert->heap, DYNAMIC_TYPE_ALTNAME);
+ return MEMORY_E;
+ }
+ uriEntry->len = strLen;
+ XMEMCPY(uriEntry->name, &input[idx], strLen);
+ uriEntry->name[strLen] = '\0';
+
+ uriEntry->next = cert->altNames;
+ cert->altNames = uriEntry;
+
+ length -= strLen;
+ idx += strLen;
+ }
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+ else if (b == (ASN_CONTEXT_SPECIFIC | ASN_IP_TYPE)) {
+ DNS_entry* ipAddr;
+ int strLen;
+ word32 lenStartIdx = idx;
+ WOLFSSL_MSG("Decoding Subject Alt. Name: IP Address");
+
+ if (GetLength(input, &idx, &strLen, sz) < 0) {
+ WOLFSSL_MSG("\tfail: str length");
+ return ASN_PARSE_E;
+ }
+ length -= (idx - lenStartIdx);
+ /* check that strLen at index is not past input buffer */
+ if (strLen + (int)idx > sz) {
+ return BUFFER_E;
+ }
+
+ ipAddr = (DNS_entry*)XMALLOC(sizeof(DNS_entry), cert->heap,
+ DYNAMIC_TYPE_ALTNAME);
+ if (ipAddr == NULL) {
+ WOLFSSL_MSG("\tOut of Memory");
+ return MEMORY_E;
+ }
+
+ ipAddr->type = ASN_IP_TYPE;
+ ipAddr->name = (char*)XMALLOC(strLen + 1, cert->heap,
+ DYNAMIC_TYPE_ALTNAME);
+ if (ipAddr->name == NULL) {
+ WOLFSSL_MSG("\tOut of Memory");
+ XFREE(ipAddr, cert->heap, DYNAMIC_TYPE_ALTNAME);
+ return MEMORY_E;
+ }
+ ipAddr->len = strLen;
+ XMEMCPY(ipAddr->name, &input[idx], strLen);
+ ipAddr->name[strLen] = '\0';
+
+ ipAddr->next = cert->altNames;
+ cert->altNames = ipAddr;
+
+ length -= strLen;
+ idx += strLen;
+ }
+#endif /* WOLFSSL_QT || OPENSSL_ALL */
#endif /* IGNORE_NAME_CONSTRAINTS */
#ifdef WOLFSSL_SEP
else if (b == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | ASN_OTHER_TYPE))
@@ -3515,6 +7845,8 @@ static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
int strLen;
word32 lenStartIdx = idx;
word32 oid = 0;
+ int ret;
+ byte tag;
if (GetLength(input, &idx, &strLen, sz) < 0) {
WOLFSSL_MSG("\tfail: other name length");
@@ -3523,7 +7855,7 @@ static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
/* Consume the rest of this sequence. */
length -= (strLen + idx - lenStartIdx);
- if (GetObjectId(input, &idx, &oid, sz) < 0) {
+ if (GetObjectId(input, &idx, &oid, oidCertAltNameType, sz) < 0) {
WOLFSSL_MSG("\tbad OID");
return ASN_PARSE_E;
}
@@ -3533,7 +7865,11 @@ static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
return ASN_PARSE_E;
}
- if (input[idx++] != (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED)) {
+ if (GetASNTag(input, &idx, &tag, sz) < 0) {
+ return ASN_PARSE_E;
+ }
+
+ if (tag != (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED)) {
WOLFSSL_MSG("\twrong type");
return ASN_PARSE_E;
}
@@ -3548,17 +7884,14 @@ static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
return ASN_PARSE_E;
}
- if (input[idx++] != ASN_OBJECT_ID) {
- WOLFSSL_MSG("\texpected OID");
- return ASN_PARSE_E;
- }
-
- if (GetLength(input, &idx, &strLen, sz) < 0) {
- WOLFSSL_MSG("\tfailed: str len");
- return ASN_PARSE_E;
+ ret = GetASNObjectId(input, &idx, &strLen, sz);
+ if (ret != 0) {
+ WOLFSSL_MSG("\tbad OID");
+ return ret;
}
- cert->hwType = (byte*)XMALLOC(strLen, cert->heap, 0);
+ cert->hwType = (byte*)XMALLOC(strLen, cert->heap,
+ DYNAMIC_TYPE_X509_EXT);
if (cert->hwType == NULL) {
WOLFSSL_MSG("\tOut of Memory");
return MEMORY_E;
@@ -3568,17 +7901,12 @@ static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
cert->hwTypeSz = strLen;
idx += strLen;
- if (input[idx++] != ASN_OCTET_STRING) {
- WOLFSSL_MSG("\texpected Octet String");
- return ASN_PARSE_E;
- }
-
- if (GetLength(input, &idx, &strLen, sz) < 0) {
- WOLFSSL_MSG("\tfailed: str len");
- return ASN_PARSE_E;
- }
+ ret = GetOctetString(input, &idx, &strLen, sz);
+ if (ret < 0)
+ return ret;
- cert->hwSerialNum = (byte*)XMALLOC(strLen + 1, cert->heap, 0);
+ cert->hwSerialNum = (byte*)XMALLOC(strLen + 1, cert->heap,
+ DYNAMIC_TYPE_X509_EXT);
if (cert->hwSerialNum == NULL) {
WOLFSSL_MSG("\tOut of Memory");
return MEMORY_E;
@@ -3589,7 +7917,7 @@ static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
cert->hwSerialNumSz = strLen;
idx += strLen;
}
-#endif /* WOLFSSL_SEP */
+ #endif /* WOLFSSL_SEP */
else {
int strLen;
word32 lenStartIdx = idx;
@@ -3607,13 +7935,14 @@ static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
return 0;
}
-
-static int DecodeBasicCaConstraint(byte* input, int sz, DecodedCert* cert)
+static int DecodeBasicCaConstraint(const byte* input, int sz, DecodedCert* cert)
{
word32 idx = 0;
int length = 0;
+ int ret;
WOLFSSL_ENTER("DecodeBasicCaConstraint");
+
if (GetSequence(input, &idx, &length, sz) < 0) {
WOLFSSL_MSG("\tfail: bad SEQUENCE");
return ASN_PARSE_E;
@@ -3625,40 +7954,35 @@ static int DecodeBasicCaConstraint(byte* input, int sz, DecodedCert* cert)
/* If the basic ca constraint is false, this extension may be named, but
* left empty. So, if the length is 0, just return. */
- if (input[idx++] != ASN_BOOLEAN)
- {
- WOLFSSL_MSG("\tfail: constraint not BOOLEAN");
- return ASN_PARSE_E;
- }
+ ret = GetBoolean(input, &idx, sz);
- if (GetLength(input, &idx, &length, sz) < 0)
- {
- WOLFSSL_MSG("\tfail: length");
- return ASN_PARSE_E;
+#ifndef WOLFSSL_X509_BASICCONS_INT
+ if (ret < 0) {
+ WOLFSSL_MSG("\tfail: constraint not valid BOOLEAN");
+ return ret;
}
- if (input[idx++])
- cert->isCA = 1;
-
- #ifdef OPENSSL_EXTRA
- /* If there isn't any more data, return. */
- if (idx >= (word32)sz)
- return 0;
-
- /* Anything left should be the optional pathlength */
- if (input[idx++] != ASN_INTEGER) {
- WOLFSSL_MSG("\tfail: pathlen not INTEGER");
- return ASN_PARSE_E;
- }
+ cert->isCA = (byte)ret;
+#else
+ if (ret < 0) {
+ if(input[idx] == ASN_INTEGER) {
+ /* For OpenSSL compatibility, if ASN_INTEGER it is valid format */
+ cert->isCA = FALSE;
+ } else return ret;
+ } else
+ cert->isCA = (byte)ret;
+#endif
- if (input[idx++] != 1) {
- WOLFSSL_MSG("\tfail: pathlen too long");
- return ASN_PARSE_E;
- }
+ /* If there isn't any more data, return. */
+ if (idx >= (word32)sz) {
+ return 0;
+ }
- cert->pathLength = input[idx];
- cert->extBasicConstPlSet = 1;
- #endif /* OPENSSL_EXTRA */
+ ret = GetInteger7Bit(input, &idx, sz);
+ if (ret < 0)
+ return ret;
+ cert->pathLength = (byte)ret;
+ cert->pathLengthSet = 1;
return 0;
}
@@ -3669,10 +7993,11 @@ static int DecodeBasicCaConstraint(byte* input, int sz, DecodedCert* cert)
#define GENERALNAME_URI 6
/* From RFC3280 SS4.2.1.7, GeneralName */
-static int DecodeCrlDist(byte* input, int sz, DecodedCert* cert)
+static int DecodeCrlDist(const byte* input, int sz, DecodedCert* cert)
{
- word32 idx = 0;
+ word32 idx = 0, localIdx;
int length = 0;
+ byte tag = 0;
WOLFSSL_ENTER("DecodeCrlDist");
@@ -3687,20 +8012,26 @@ static int DecodeCrlDist(byte* input, int sz, DecodedCert* cert)
/* The Distribution Point has three explicit optional members
* First check for a DistributionPointName
*/
- if (input[idx] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
+ localIdx = idx;
+ if (GetASNTag(input, &localIdx, &tag, sz) == 0 &&
+ tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
{
idx++;
if (GetLength(input, &idx, &length, sz) < 0)
return ASN_PARSE_E;
- if (input[idx] ==
- (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | CRLDP_FULL_NAME))
+ localIdx = idx;
+ if (GetASNTag(input, &localIdx, &tag, sz) == 0 &&
+ tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED |
+ CRLDP_FULL_NAME))
{
idx++;
if (GetLength(input, &idx, &length, sz) < 0)
return ASN_PARSE_E;
- if (input[idx] == (ASN_CONTEXT_SPECIFIC | GENERALNAME_URI))
+ localIdx = idx;
+ if (GetASNTag(input, &localIdx, &tag, sz) == 0 &&
+ tag == (ASN_CONTEXT_SPECIFIC | GENERALNAME_URI))
{
idx++;
if (GetLength(input, &idx, &length, sz) < 0)
@@ -3714,14 +8045,17 @@ static int DecodeCrlDist(byte* input, int sz, DecodedCert* cert)
/* This isn't a URI, skip it. */
idx += length;
}
- else
+ else {
/* This isn't a FULLNAME, skip it. */
idx += length;
+ }
}
/* Check for reasonFlags */
+ localIdx = idx;
if (idx < (word32)sz &&
- input[idx] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1))
+ GetASNTag(input, &localIdx, &tag, sz) == 0 &&
+ tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1))
{
idx++;
if (GetLength(input, &idx, &length, sz) < 0)
@@ -3730,8 +8064,10 @@ static int DecodeCrlDist(byte* input, int sz, DecodedCert* cert)
}
/* Check for cRLIssuer */
+ localIdx = idx;
if (idx < (word32)sz &&
- input[idx] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 2))
+ GetASNTag(input, &localIdx, &tag, sz) == 0 &&
+ tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 2))
{
idx++;
if (GetLength(input, &idx, &length, sz) < 0)
@@ -3749,15 +8085,16 @@ static int DecodeCrlDist(byte* input, int sz, DecodedCert* cert)
}
-static int DecodeAuthInfo(byte* input, int sz, DecodedCert* cert)
+static int DecodeAuthInfo(const byte* input, int sz, DecodedCert* cert)
/*
- * Read the first of the Authority Information Access records. If there are
+ * Read Authority Information Access records. If there are
* any issues, return without saving the record.
*/
{
word32 idx = 0;
int length = 0;
- byte b;
+ int count = 0;
+ byte b = 0;
word32 oid;
WOLFSSL_ENTER("DecodeAuthInfo");
@@ -3766,27 +8103,43 @@ static int DecodeAuthInfo(byte* input, int sz, DecodedCert* cert)
if (GetSequence(input, &idx, &length, sz) < 0)
return ASN_PARSE_E;
- while (idx < (word32)sz) {
+ while ((idx < (word32)sz) && (count < MAX_AIA_SZ)) {
/* Unwrap a single AIA */
if (GetSequence(input, &idx, &length, sz) < 0)
return ASN_PARSE_E;
oid = 0;
- if (GetObjectId(input, &idx, &oid, sz) < 0)
+ if (GetObjectId(input, &idx, &oid, oidCertAuthInfoType, sz) < 0)
return ASN_PARSE_E;
/* Only supporting URIs right now. */
- b = input[idx++];
+ if (GetASNTag(input, &idx, &b, sz) < 0)
+ return ASN_PARSE_E;
+
if (GetLength(input, &idx, &length, sz) < 0)
return ASN_PARSE_E;
+ /* Set ocsp entry */
if (b == (ASN_CONTEXT_SPECIFIC | GENERALNAME_URI) &&
oid == AIA_OCSP_OID)
{
cert->extAuthInfoSz = length;
cert->extAuthInfo = input + idx;
+ count++;
+ #if !defined(OPENSSL_ALL) || !defined(WOLFSSL_QT)
break;
+ #endif
+ }
+ #if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+ /* Set CaIssuers entry */
+ else if ((b == (ASN_CONTEXT_SPECIFIC | GENERALNAME_URI)) &&
+ oid == AIA_CA_ISSUER_OID)
+ {
+ cert->extAuthInfoCaIssuerSz = length;
+ cert->extAuthInfoCaIssuer = input + idx;
+ count++;
}
+ #endif
idx += length;
}
@@ -3794,10 +8147,11 @@ static int DecodeAuthInfo(byte* input, int sz, DecodedCert* cert)
}
-static int DecodeAuthKeyId(byte* input, int sz, DecodedCert* cert)
+static int DecodeAuthKeyId(const byte* input, int sz, DecodedCert* cert)
{
word32 idx = 0;
int length = 0, ret = 0;
+ byte tag;
WOLFSSL_ENTER("DecodeAuthKeyId");
@@ -3806,124 +8160,107 @@ static int DecodeAuthKeyId(byte* input, int sz, DecodedCert* cert)
return ASN_PARSE_E;
}
- if (input[idx++] != (ASN_CONTEXT_SPECIFIC | 0)) {
+ if (GetASNTag(input, &idx, &tag, sz) < 0) {
+ return ASN_PARSE_E;
+ }
+
+ if (tag != (ASN_CONTEXT_SPECIFIC | 0)) {
WOLFSSL_MSG("\tinfo: OPTIONAL item 0, not available\n");
+ cert->extAuthKeyIdSet = 0;
return 0;
}
- if (GetLength(input, &idx, &length, sz) < 0) {
+ if (GetLength(input, &idx, &length, sz) <= 0) {
WOLFSSL_MSG("\tfail: extension data length");
return ASN_PARSE_E;
}
- #ifdef OPENSSL_EXTRA
- cert->extAuthKeyIdSrc = &input[idx];
- cert->extAuthKeyIdSz = length;
- #endif /* OPENSSL_EXTRA */
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+ cert->extAuthKeyIdSrc = &input[idx];
+ cert->extAuthKeyIdSz = length;
+#endif /* OPENSSL_EXTRA */
if (length == KEYID_SIZE) {
XMEMCPY(cert->extAuthKeyId, input + idx, length);
}
- else {
- #ifdef NO_SHA
- ret = wc_Sha256Hash(input + idx, length, cert->extAuthKeyId);
- #else
- ret = wc_ShaHash(input + idx, length, cert->extAuthKeyId);
- #endif
- }
+ else
+ ret = CalcHashId(input + idx, length, cert->extAuthKeyId);
return ret;
}
-static int DecodeSubjKeyId(byte* input, int sz, DecodedCert* cert)
+static int DecodeSubjKeyId(const byte* input, int sz, DecodedCert* cert)
{
word32 idx = 0;
int length = 0, ret = 0;
WOLFSSL_ENTER("DecodeSubjKeyId");
- if (input[idx++] != ASN_OCTET_STRING) {
- WOLFSSL_MSG("\tfail: should be an OCTET STRING");
+ if (sz <= 0)
return ASN_PARSE_E;
- }
- if (GetLength(input, &idx, &length, sz) < 0) {
- WOLFSSL_MSG("\tfail: extension data length");
- return ASN_PARSE_E;
- }
+ ret = GetOctetString(input, &idx, &length, sz);
+ if (ret < 0)
+ return ret;
- #ifdef OPENSSL_EXTRA
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
cert->extSubjKeyIdSrc = &input[idx];
cert->extSubjKeyIdSz = length;
#endif /* OPENSSL_EXTRA */
- if (length == SIGNER_DIGEST_SIZE) {
+ if (length == KEYID_SIZE) {
XMEMCPY(cert->extSubjKeyId, input + idx, length);
}
- else {
- #ifdef NO_SHA
- ret = wc_Sha256Hash(input + idx, length, cert->extSubjKeyId);
- #else
- ret = wc_ShaHash(input + idx, length, cert->extSubjKeyId);
- #endif
- }
+ else
+ ret = CalcHashId(input + idx, length, cert->extSubjKeyId);
return ret;
}
-static int DecodeKeyUsage(byte* input, int sz, DecodedCert* cert)
+static int DecodeKeyUsage(const byte* input, int sz, DecodedCert* cert)
{
word32 idx = 0;
int length;
- byte unusedBits;
+ int ret;
WOLFSSL_ENTER("DecodeKeyUsage");
- if (input[idx++] != ASN_BIT_STRING) {
- WOLFSSL_MSG("\tfail: key usage expected bit string");
- return ASN_PARSE_E;
- }
-
- if (GetLength(input, &idx, &length, sz) < 0) {
- WOLFSSL_MSG("\tfail: key usage bad length");
- return ASN_PARSE_E;
- }
-
- unusedBits = input[idx++];
- length--;
+ ret = CheckBitString(input, &idx, &length, sz, 0, NULL);
+ if (ret != 0)
+ return ret;
- if (length == 2) {
- cert->extKeyUsage = (word16)((input[idx] << 8) | input[idx+1]);
- cert->extKeyUsage >>= unusedBits;
- }
- else if (length == 1)
- cert->extKeyUsage = (word16)(input[idx] << 1);
+ cert->extKeyUsage = (word16)(input[idx]);
+ if (length == 2)
+ cert->extKeyUsage |= (word16)(input[idx+1] << 8);
return 0;
}
-static int DecodeExtKeyUsage(byte* input, int sz, DecodedCert* cert)
+static int DecodeExtKeyUsage(const byte* input, int sz, DecodedCert* cert)
{
word32 idx = 0, oid;
- int length;
+ int length, ret;
- WOLFSSL_ENTER("DecodeExtKeyUsage");
+ WOLFSSL_MSG("DecodeExtKeyUsage");
if (GetSequence(input, &idx, &length, sz) < 0) {
WOLFSSL_MSG("\tfail: should be a SEQUENCE");
return ASN_PARSE_E;
}
- #ifdef OPENSSL_EXTRA
- cert->extExtKeyUsageSrc = input + idx;
- cert->extExtKeyUsageSz = length;
- #endif
+#if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+ cert->extExtKeyUsageSrc = input + idx;
+ cert->extExtKeyUsageSz = length;
+#endif
while (idx < (word32)sz) {
- if (GetObjectId(input, &idx, &oid, sz) < 0)
- return ASN_PARSE_E;
+ ret = GetObjectId(input, &idx, &oid, oidCertKeyUseType, sz);
+ if (ret == ASN_UNKNOWN_OID_E)
+ continue;
+ else if (ret < 0)
+ return ret;
switch (oid) {
case EKU_ANY_OID:
@@ -3935,14 +8272,23 @@ static int DecodeExtKeyUsage(byte* input, int sz, DecodedCert* cert)
case EKU_CLIENT_AUTH_OID:
cert->extExtKeyUsage |= EXTKEYUSE_CLIENT_AUTH;
break;
+ case EKU_CODESIGNING_OID:
+ cert->extExtKeyUsage |= EXTKEYUSE_CODESIGN;
+ break;
+ case EKU_EMAILPROTECT_OID:
+ cert->extExtKeyUsage |= EXTKEYUSE_EMAILPROT;
+ break;
+ case EKU_TIMESTAMP_OID:
+ cert->extExtKeyUsage |= EXTKEYUSE_TIMESTAMP;
+ break;
case EKU_OCSP_SIGN_OID:
cert->extExtKeyUsage |= EXTKEYUSE_OCSP_SIGN;
break;
}
- #ifdef OPENSSL_EXTRA
- cert->extExtKeyUsageCount++;
- #endif
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+ cert->extExtKeyUsageCount++;
+ #endif
}
return 0;
@@ -3950,7 +8296,9 @@ static int DecodeExtKeyUsage(byte* input, int sz, DecodedCert* cert)
#ifndef IGNORE_NAME_CONSTRAINTS
-static int DecodeSubtree(byte* input, int sz, Base_entry** head, void* heap)
+#define ASN_TYPE_MASK 0xF
+static int DecodeSubtree(const byte* input, int sz,
+ Base_entry** head, void* heap)
{
word32 idx = 0;
@@ -3959,27 +8307,37 @@ static int DecodeSubtree(byte* input, int sz, Base_entry** head, void* heap)
while (idx < (word32)sz) {
int seqLength, strLength;
word32 nameIdx;
- byte b;
+ byte b, bType;
if (GetSequence(input, &idx, &seqLength, sz) < 0) {
WOLFSSL_MSG("\tfail: should be a SEQUENCE");
return ASN_PARSE_E;
}
-
nameIdx = idx;
b = input[nameIdx++];
+
if (GetLength(input, &nameIdx, &strLength, sz) <= 0) {
WOLFSSL_MSG("\tinvalid length");
return ASN_PARSE_E;
}
- if (b == (ASN_CONTEXT_SPECIFIC | ASN_DNS_TYPE) ||
- b == (ASN_CONTEXT_SPECIFIC | ASN_RFC822_TYPE) ||
- b == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | ASN_DIR_TYPE)) {
+ /* Get type, LSB 4-bits */
+ bType = (b & ASN_TYPE_MASK);
+
+ if (bType == ASN_DNS_TYPE || bType == ASN_RFC822_TYPE ||
+ bType == ASN_DIR_TYPE) {
+ Base_entry* entry;
- Base_entry* entry = (Base_entry*)XMALLOC(sizeof(Base_entry),
- heap, DYNAMIC_TYPE_ALTNAME);
+ /* if constructed has leading sequence */
+ if (b & ASN_CONSTRUCTED) {
+ if (GetSequence(input, &nameIdx, &strLength, sz) < 0) {
+ WOLFSSL_MSG("\tfail: constructed be a SEQUENCE");
+ return ASN_PARSE_E;
+ }
+ }
+ entry = (Base_entry*)XMALLOC(sizeof(Base_entry), heap,
+ DYNAMIC_TYPE_ALTNAME);
if (entry == NULL) {
WOLFSSL_MSG("allocate error");
return MEMORY_E;
@@ -3988,12 +8346,13 @@ static int DecodeSubtree(byte* input, int sz, Base_entry** head, void* heap)
entry->name = (char*)XMALLOC(strLength, heap, DYNAMIC_TYPE_ALTNAME);
if (entry->name == NULL) {
WOLFSSL_MSG("allocate error");
+ XFREE(entry, heap, DYNAMIC_TYPE_ALTNAME);
return MEMORY_E;
}
XMEMCPY(entry->name, &input[nameIdx], strLength);
entry->nameSz = strLength;
- entry->type = b & 0x0F;
+ entry->type = bType;
entry->next = *head;
*head = entry;
@@ -4006,7 +8365,7 @@ static int DecodeSubtree(byte* input, int sz, Base_entry** head, void* heap)
}
-static int DecodeNameConstraints(byte* input, int sz, DecodedCert* cert)
+static int DecodeNameConstraints(const byte* input, int sz, DecodedCert* cert)
{
word32 idx = 0;
int length = 0;
@@ -4045,51 +8404,176 @@ static int DecodeNameConstraints(byte* input, int sz, DecodedCert* cert)
}
#endif /* IGNORE_NAME_CONSTRAINTS */
+#if (defined(WOLFSSL_CERT_EXT) && !defined(WOLFSSL_SEP)) || defined(OPENSSL_EXTRA)
-#ifdef WOLFSSL_SEP
- static int DecodeCertPolicy(byte* input, int sz, DecodedCert* cert)
+/* Decode ITU-T X.690 OID format to a string representation
+ * return string length */
+int DecodePolicyOID(char *out, word32 outSz, const byte *in, word32 inSz)
+{
+ word32 val, inIdx = 0, outIdx = 0;
+ int w = 0;
+
+ if (out == NULL || in == NULL || outSz < 4 || inSz < 2)
+ return BAD_FUNC_ARG;
+
+ /* The first byte expands into b/40 dot b%40. */
+ val = in[inIdx++];
+
+ w = XSNPRINTF(out, outSz, "%u.%u", val / 40, val % 40);
+ if (w < 0)
+ goto exit;
+ outIdx += w;
+ val = 0;
+
+ while (inIdx < inSz && outIdx < outSz) {
+ /* extract the next OID digit from in to val */
+ /* first bit is used to set if value is coded on 1 or multiple bytes */
+ if (in[inIdx] & 0x80) {
+ val += in[inIdx] & 0x7F;
+ val *= 128;
+ }
+ else {
+ /* write val as text into out */
+ val += in[inIdx];
+ w = XSNPRINTF(out + outIdx, outSz - outIdx, ".%u", val);
+ if (w < 0)
+ goto exit;
+ outIdx += w;
+ val = 0;
+ }
+ inIdx++;
+ }
+ if (outIdx == outSz)
+ outIdx--;
+ out[outIdx] = 0;
+
+ w = (int)outIdx;
+
+exit:
+ return w;
+}
+#endif /* WOLFSSL_CERT_EXT && !WOLFSSL_SEP */
+
+#if defined(WOLFSSL_SEP) || defined(WOLFSSL_CERT_EXT) || defined(WOLFSSL_QT)
+ /* Reference: https://tools.ietf.org/html/rfc5280#section-4.2.1.4 */
+ static int DecodeCertPolicy(const byte* input, int sz, DecodedCert* cert)
{
word32 idx = 0;
- int length = 0;
+ word32 oldIdx;
+ int ret;
+ int total_length = 0, policy_length = 0, length = 0;
+ #if !defined(WOLFSSL_SEP) && defined(WOLFSSL_CERT_EXT) && \
+ !defined(WOLFSSL_DUP_CERTPOL)
+ int i;
+ #endif
WOLFSSL_ENTER("DecodeCertPolicy");
+ #if defined(WOLFSSL_SEP) || defined(WOLFSSL_CERT_EXT)
+ /* Check if cert is null before dereferencing below */
+ if (cert == NULL)
+ return BAD_FUNC_ARG;
+ #endif
- /* Unwrap certificatePolicies */
- if (GetSequence(input, &idx, &length, sz) < 0) {
- WOLFSSL_MSG("\tdeviceType isn't OID");
- return ASN_PARSE_E;
- }
+ #if defined(WOLFSSL_CERT_EXT)
+ cert->extCertPoliciesNb = 0;
+ #endif
- if (GetSequence(input, &idx, &length, sz) < 0) {
- WOLFSSL_MSG("\tdeviceType isn't OID");
+ if (GetSequence(input, &idx, &total_length, sz) < 0) {
+ WOLFSSL_MSG("\tGet CertPolicy total seq failed");
return ASN_PARSE_E;
}
- if (input[idx++] != ASN_OBJECT_ID) {
- WOLFSSL_MSG("\tdeviceType isn't OID");
+ /* Validate total length */
+ if (total_length > (sz - (int)idx)) {
+ WOLFSSL_MSG("\tCertPolicy length mismatch");
return ASN_PARSE_E;
}
- if (GetLength(input, &idx, &length, sz) < 0) {
- WOLFSSL_MSG("\tCouldn't read length of deviceType");
- return ASN_PARSE_E;
- }
+ /* Unwrap certificatePolicies */
+ do {
+ if (GetSequence(input, &idx, &policy_length, sz) < 0) {
+ WOLFSSL_MSG("\tGet CertPolicy seq failed");
+ return ASN_PARSE_E;
+ }
- if (length > 0) {
- cert->deviceType = (byte*)XMALLOC(length, cert->heap, 0);
- if (cert->deviceType == NULL) {
- WOLFSSL_MSG("\tCouldn't alloc memory for deviceType");
- return MEMORY_E;
+ oldIdx = idx;
+ ret = GetASNObjectId(input, &idx, &length, sz);
+ if (ret != 0)
+ return ret;
+ policy_length -= idx - oldIdx;
+
+ if (length > 0) {
+ /* Verify length won't overrun buffer */
+ if (length > (sz - (int)idx)) {
+ WOLFSSL_MSG("\tCertPolicy length exceeds input buffer");
+ return ASN_PARSE_E;
+ }
+
+ #if defined(WOLFSSL_SEP)
+ cert->deviceType = (byte*)XMALLOC(length, cert->heap,
+ DYNAMIC_TYPE_X509_EXT);
+ if (cert->deviceType == NULL) {
+ WOLFSSL_MSG("\tCouldn't alloc memory for deviceType");
+ return MEMORY_E;
+ }
+ cert->deviceTypeSz = length;
+ XMEMCPY(cert->deviceType, input + idx, length);
+ break;
+ #elif defined(WOLFSSL_CERT_EXT)
+ /* decode cert policy */
+ if (DecodePolicyOID(cert->extCertPolicies[
+ cert->extCertPoliciesNb], MAX_CERTPOL_SZ,
+ input + idx, length) <= 0) {
+ WOLFSSL_MSG("\tCouldn't decode CertPolicy");
+ return ASN_PARSE_E;
+ }
+ #ifndef WOLFSSL_DUP_CERTPOL
+ /* From RFC 5280 section 4.2.1.3 "A certificate policy OID MUST
+ * NOT appear more than once in a certificate policies
+ * extension". This is a sanity check for duplicates.
+ * extCertPolicies should only have OID values, additional
+ * qualifiers need to be stored in a separate array. */
+ for (i = 0; i < cert->extCertPoliciesNb; i++) {
+ if (XMEMCMP(cert->extCertPolicies[i],
+ cert->extCertPolicies[cert->extCertPoliciesNb],
+ MAX_CERTPOL_SZ) == 0) {
+ WOLFSSL_MSG("Duplicate policy OIDs not allowed");
+ WOLFSSL_MSG("Use WOLFSSL_DUP_CERTPOL if wanted");
+ return CERTPOLICIES_E;
+ }
+ }
+ #endif /* !WOLFSSL_DUP_CERTPOL */
+ cert->extCertPoliciesNb++;
+ #else
+ WOLFSSL_LEAVE("DecodeCertPolicy : unsupported mode", 0);
+ return 0;
+ #endif
}
- cert->deviceTypeSz = length;
- XMEMCPY(cert->deviceType, input + idx, length);
- }
+ idx += policy_length;
+ } while((int)idx < total_length
+ #if defined(WOLFSSL_CERT_EXT)
+ && cert->extCertPoliciesNb < MAX_CERTPOL_NB
+ #endif
+ );
WOLFSSL_LEAVE("DecodeCertPolicy", 0);
return 0;
}
#endif /* WOLFSSL_SEP */
+/* Macro to check if bit is set, if not sets and return success.
+ Otherwise returns failure */
+/* Macro required here because bit-field operation */
+#ifndef WOLFSSL_NO_ASN_STRICT
+ #define VERIFY_AND_SET_OID(bit) \
+ if (bit == 0) \
+ bit = 1; \
+ else \
+ return ASN_OBJECT_ID_E;
+#else
+ /* With no strict defined, the verify is skipped */
+#define VERIFY_AND_SET_OID(bit) bit = 1;
+#endif
static int DecodeCertExtensions(DecodedCert* cert)
/*
@@ -4097,68 +8581,85 @@ static int DecodeCertExtensions(DecodedCert* cert)
* index. It is works starting with the recorded extensions pointer.
*/
{
+ int ret = 0;
word32 idx = 0;
int sz = cert->extensionsSz;
- byte* input = cert->extensions;
+ const byte* input = cert->extensions;
int length;
word32 oid;
byte critical = 0;
byte criticalFail = 0;
+ byte tag = 0;
WOLFSSL_ENTER("DecodeCertExtensions");
if (input == NULL || sz == 0)
return BAD_FUNC_ARG;
- if (input[idx++] != ASN_EXTENSIONS)
+ if (GetASNTag(input, &idx, &tag, sz) < 0) {
return ASN_PARSE_E;
+ }
- if (GetLength(input, &idx, &length, sz) < 0)
+ if (tag != ASN_EXTENSIONS) {
+ WOLFSSL_MSG("\tfail: should be an EXTENSIONS");
return ASN_PARSE_E;
+ }
- if (GetSequence(input, &idx, &length, sz) < 0)
+ if (GetLength(input, &idx, &length, sz) < 0) {
+ WOLFSSL_MSG("\tfail: invalid length");
return ASN_PARSE_E;
-
+ }
+
+ if (GetSequence(input, &idx, &length, sz) < 0) {
+ WOLFSSL_MSG("\tfail: should be a SEQUENCE (1)");
+ return ASN_PARSE_E;
+ }
+
while (idx < (word32)sz) {
+ word32 localIdx;
+
if (GetSequence(input, &idx, &length, sz) < 0) {
WOLFSSL_MSG("\tfail: should be a SEQUENCE");
return ASN_PARSE_E;
}
oid = 0;
- if (GetObjectId(input, &idx, &oid, sz) < 0) {
+ if ((ret = GetObjectId(input, &idx, &oid, oidCertExtType, sz)) < 0) {
WOLFSSL_MSG("\tfail: OBJECT ID");
- return ASN_PARSE_E;
+ return ret;
}
/* check for critical flag */
critical = 0;
- if (input[idx] == ASN_BOOLEAN) {
- int boolLength = 0;
- idx++;
- if (GetLength(input, &idx, &boolLength, sz) < 0) {
- WOLFSSL_MSG("\tfail: critical boolean length");
- return ASN_PARSE_E;
- }
- if (input[idx++])
- critical = 1;
+ if ((idx + 1) > (word32)sz) {
+ WOLFSSL_MSG("\tfail: malformed buffer");
+ return BUFFER_E;
}
- /* process the extension based on the OID */
- if (input[idx++] != ASN_OCTET_STRING) {
- WOLFSSL_MSG("\tfail: should be an OCTET STRING");
- return ASN_PARSE_E;
+ localIdx = idx;
+ if (GetASNTag(input, &localIdx, &tag, sz) == 0) {
+ if (tag == ASN_BOOLEAN) {
+ ret = GetBoolean(input, &idx, sz);
+ if (ret < 0) {
+ WOLFSSL_MSG("\tfail: critical boolean");
+ return ret;
+ }
+
+ critical = (byte)ret;
+ }
}
- if (GetLength(input, &idx, &length, sz) < 0) {
- WOLFSSL_MSG("\tfail: extension data length");
- return ASN_PARSE_E;
+ /* process the extension based on the OID */
+ ret = GetOctetString(input, &idx, &length, sz);
+ if (ret < 0) {
+ WOLFSSL_MSG("\tfail: bad OCTET STRING");
+ return ret;
}
switch (oid) {
case BASIC_CA_OID:
- #ifdef OPENSSL_EXTRA
- cert->extBasicConstSet = 1;
+ VERIFY_AND_SET_OID(cert->extBasicConstSet);
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
cert->extBasicConstCrit = critical;
#endif
if (DecodeBasicCaConstraint(&input[idx], length, cert) < 0)
@@ -4166,57 +8667,95 @@ static int DecodeCertExtensions(DecodedCert* cert)
break;
case CRL_DIST_OID:
+ VERIFY_AND_SET_OID(cert->extCRLdistSet);
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+ cert->extCRLdistCrit = critical;
+ #endif
if (DecodeCrlDist(&input[idx], length, cert) < 0)
return ASN_PARSE_E;
break;
case AUTH_INFO_OID:
+ VERIFY_AND_SET_OID(cert->extAuthInfoSet);
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
+ cert->extAuthInfoCrit = critical;
+ #endif
if (DecodeAuthInfo(&input[idx], length, cert) < 0)
return ASN_PARSE_E;
break;
case ALT_NAMES_OID:
- #ifdef OPENSSL_EXTRA
- cert->extSubjAltNameSet = 1;
+ VERIFY_AND_SET_OID(cert->extSubjAltNameSet);
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
cert->extSubjAltNameCrit = critical;
#endif
- if (DecodeAltNames(&input[idx], length, cert) < 0)
- return ASN_PARSE_E;
+ ret = DecodeAltNames(&input[idx], length, cert);
+ if (ret < 0)
+ return ret;
break;
case AUTH_KEY_OID:
- cert->extAuthKeyIdSet = 1;
- #ifdef OPENSSL_EXTRA
+ VERIFY_AND_SET_OID(cert->extAuthKeyIdSet);
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
cert->extAuthKeyIdCrit = critical;
#endif
+ #ifndef WOLFSSL_ALLOW_CRIT_SKID
+ /* This check is added due to RFC 5280 section 4.2.1.1
+ * stating that conforming CA's must mark this extension
+ * as non-critical. When parsing extensions check that
+ * certificate was made in compliance with this. */
+ if (critical) {
+ WOLFSSL_MSG("Critical Auth Key ID is not allowed");
+ WOLFSSL_MSG("Use macro WOLFSSL_ALLOW_CRIT_SKID if wanted");
+ return ASN_CRIT_EXT_E;
+ }
+ #endif
if (DecodeAuthKeyId(&input[idx], length, cert) < 0)
return ASN_PARSE_E;
break;
case SUBJ_KEY_OID:
- cert->extSubjKeyIdSet = 1;
- #ifdef OPENSSL_EXTRA
+ VERIFY_AND_SET_OID(cert->extSubjKeyIdSet);
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
cert->extSubjKeyIdCrit = critical;
#endif
+ #ifndef WOLFSSL_ALLOW_CRIT_SKID
+ /* This check is added due to RFC 5280 section 4.2.1.2
+ * stating that conforming CA's must mark this extension
+ * as non-critical. When parsing extensions check that
+ * certificate was made in compliance with this. */
+ if (critical) {
+ WOLFSSL_MSG("Critical Subject Key ID is not allowed");
+ WOLFSSL_MSG("Use macro WOLFSSL_ALLOW_CRIT_SKID if wanted");
+ return ASN_CRIT_EXT_E;
+ }
+ #endif
+
if (DecodeSubjKeyId(&input[idx], length, cert) < 0)
return ASN_PARSE_E;
break;
case CERT_POLICY_OID:
- WOLFSSL_MSG("Certificate Policy extension not supported yet.");
- #ifdef WOLFSSL_SEP
- #ifdef OPENSSL_EXTRA
- cert->extCertPolicySet = 1;
+ #if defined(WOLFSSL_SEP) || defined(WOLFSSL_QT)
+ VERIFY_AND_SET_OID(cert->extCertPolicySet);
+ #if defined(OPENSSL_EXTRA) || \
+ defined(OPENSSL_EXTRA_X509_SMALL)
cert->extCertPolicyCrit = critical;
#endif
- if (DecodeCertPolicy(&input[idx], length, cert) < 0)
+ #endif
+ #if defined(WOLFSSL_SEP) || defined(WOLFSSL_CERT_EXT) || \
+ defined(WOLFSSL_QT)
+ if (DecodeCertPolicy(&input[idx], length, cert) < 0) {
return ASN_PARSE_E;
+ }
+ #else
+ WOLFSSL_MSG("Certificate Policy extension not supported yet.");
#endif
break;
case KEY_USAGE_OID:
- cert->extKeyUsageSet = 1;
- #ifdef OPENSSL_EXTRA
+ VERIFY_AND_SET_OID(cert->extKeyUsageSet);
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
cert->extKeyUsageCrit = critical;
#endif
if (DecodeKeyUsage(&input[idx], length, cert) < 0)
@@ -4224,8 +8763,8 @@ static int DecodeCertExtensions(DecodedCert* cert)
break;
case EXT_KEY_USAGE_OID:
- cert->extExtKeyUsageSet = 1;
- #ifdef OPENSSL_EXTRA
+ VERIFY_AND_SET_OID(cert->extExtKeyUsageSet);
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
cert->extExtKeyUsageCrit = critical;
#endif
if (DecodeExtKeyUsage(&input[idx], length, cert) < 0)
@@ -4234,8 +8773,17 @@ static int DecodeCertExtensions(DecodedCert* cert)
#ifndef IGNORE_NAME_CONSTRAINTS
case NAME_CONS_OID:
- cert->extNameConstraintSet = 1;
- #ifdef OPENSSL_EXTRA
+ #ifndef WOLFSSL_NO_ASN_STRICT
+ /* Verify RFC 5280 Sec 4.2.1.10 rule:
+ "The name constraints extension,
+ which MUST be used only in a CA certificate" */
+ if (!cert->isCA) {
+ WOLFSSL_MSG("Name constraints allowed only for CA certs");
+ return ASN_NAME_INVALID_E;
+ }
+ #endif
+ VERIFY_AND_SET_OID(cert->extNameConstraintSet);
+ #if defined(OPENSSL_EXTRA) || defined(OPENSSL_EXTRA_X509_SMALL)
cert->extNameConstraintCrit = critical;
#endif
if (DecodeNameConstraints(&input[idx], length, cert) < 0)
@@ -4244,17 +8792,31 @@ static int DecodeCertExtensions(DecodedCert* cert)
#endif /* IGNORE_NAME_CONSTRAINTS */
case INHIBIT_ANY_OID:
+ VERIFY_AND_SET_OID(cert->inhibitAnyOidSet);
WOLFSSL_MSG("Inhibit anyPolicy extension not supported yet.");
break;
+ #ifndef IGNORE_NETSCAPE_CERT_TYPE
+ case NETSCAPE_CT_OID:
+ WOLFSSL_MSG("Netscape certificate type extension not supported "
+ "yet.");
+ if (CheckBitString(input, &idx, &length, idx + length, 0,
+ NULL) < 0) {
+ return ASN_PARSE_E;
+ }
+ break;
+ #endif
+
default:
+ #ifndef WOLFSSL_NO_ASN_STRICT
/* While it is a failure to not support critical extensions,
* still parse the certificate ignoring the unsupported
- * extention to allow caller to accept it with the verify
+ * extension to allow caller to accept it with the verify
* callback. */
if (critical)
criticalFail = 1;
- break;
+ #endif
+ break;
}
idx += length;
}
@@ -4262,7 +8824,6 @@ static int DecodeCertExtensions(DecodedCert* cert)
return criticalFail ? ASN_CRIT_EXT_E : 0;
}
-
int ParseCert(DecodedCert* cert, int type, int verify, void* cm)
{
int ret;
@@ -4297,126 +8858,662 @@ int ParseCert(DecodedCert* cert, int type, int verify, void* cm)
return ret;
}
-
/* from SSL proper, for locking can't do find here anymore */
#ifdef __cplusplus
extern "C" {
#endif
- WOLFSSL_LOCAL Signer* GetCA(void* signers, byte* hash);
+ Signer* GetCA(void* signers, byte* hash);
#ifndef NO_SKID
- WOLFSSL_LOCAL Signer* GetCAByName(void* signers, byte* hash);
+ Signer* GetCAByName(void* signers, byte* hash);
#endif
#ifdef __cplusplus
- }
+ }
#endif
+#if defined(WOLFCRYPT_ONLY) || defined(NO_CERTS)
+
+/* dummy functions, not using wolfSSL so don't need actual ones */
+Signer* GetCA(void* signers, byte* hash)
+{
+ (void)hash;
+
+ return (Signer*)signers;
+}
+
+#ifndef NO_SKID
+Signer* GetCAByName(void* signers, byte* hash)
+{
+ (void)hash;
+
+ return (Signer*)signers;
+}
+#endif /* NO_SKID */
+
+#endif /* WOLFCRYPT_ONLY || NO_CERTS */
+
+#if defined(WOLFSSL_NO_TRUSTED_CERTS_VERIFY) && !defined(NO_SKID)
+static Signer* GetCABySubjectAndPubKey(DecodedCert* cert, void* cm)
+{
+ Signer* ca = NULL;
+ if (cert->extSubjKeyIdSet)
+ ca = GetCA(cm, cert->extSubjKeyId);
+ if (ca == NULL)
+ ca = GetCAByName(cm, cert->subjectHash);
+ if (ca) {
+ if ((ca->pubKeySize == cert->pubKeySize) &&
+ (XMEMCMP(ca->publicKey, cert->publicKey, ca->pubKeySize) == 0)) {
+ return ca;
+ }
+ }
+ return NULL;
+}
+#endif
+
+#if defined(WOLFSSL_SMALL_CERT_VERIFY) || defined(OPENSSL_EXTRA)
+/* Only quick step through the certificate to find fields that are then used
+ * in certificate signature verification.
+ * Must use the signature OID from the signed part of the certificate.
+ *
+ * This is only for minimizing dynamic memory usage during TLS certificate
+ * chain processing.
+ * Doesn't support:
+ * OCSP Only: alt lookup using subject and pub key w/o sig check
+ */
+static int CheckCertSignature_ex(const byte* cert, word32 certSz, void* heap,
+ void* cm, const byte* pubKey, word32 pubKeySz, int pubKeyOID)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ SignatureCtx sigCtx[1];
+#else
+ SignatureCtx* sigCtx;
+#endif
+ byte hash[KEYID_SIZE];
+ Signer* ca = NULL;
+ word32 idx = 0;
+ int len;
+ word32 tbsCertIdx = 0;
+ word32 sigIndex = 0;
+ word32 signatureOID = 0;
+ word32 oid = 0;
+ word32 issuerIdx = 0;
+ word32 issuerSz = 0;
+#ifndef NO_SKID
+ int extLen = 0;
+ word32 extIdx = 0;
+ word32 extEndIdx = 0;
+ int extAuthKeyIdSet = 0;
+#endif
+ int ret = 0;
+ word32 localIdx;
+ byte tag;
+
+
+ if (cert == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ sigCtx = (SignatureCtx*)XMALLOC(sizeof(*sigCtx), heap, DYNAMIC_TYPE_SIGNATURE);
+ if (sigCtx == NULL)
+ return MEMORY_E;
+#endif
+ InitSignatureCtx(sigCtx, heap, INVALID_DEVID);
+
+ /* Certificate SEQUENCE */
+ if (GetSequence(cert, &idx, &len, certSz) < 0)
+ ret = ASN_PARSE_E;
+ if (ret == 0) {
+ tbsCertIdx = idx;
+
+ /* TBSCertificate SEQUENCE */
+ if (GetSequence(cert, &idx, &len, certSz) < 0)
+ ret = ASN_PARSE_E;
+ }
+ if (ret == 0) {
+ sigIndex = len + idx;
+
+ if ((idx + 1) > certSz)
+ ret = BUFFER_E;
+ }
+ if (ret == 0) {
+ /* version - optional */
+ localIdx = idx;
+ if (GetASNTag(cert, &localIdx, &tag, certSz) == 0) {
+ if (tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED)) {
+ idx++;
+ if (GetLength(cert, &idx, &len, certSz) < 0)
+ ret = ASN_PARSE_E;
+ idx += len;
+ }
+ }
+ }
+
+ if (ret == 0) {
+ /* serialNumber */
+ if (GetASNHeader(cert, ASN_INTEGER, &idx, &len, certSz) < 0)
+ ret = ASN_PARSE_E;
+ }
+ if (ret == 0) {
+ idx += len;
+
+ /* signature */
+ if (GetAlgoId(cert, &idx, &signatureOID, oidSigType, certSz) < 0)
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0) {
+ issuerIdx = idx;
+ /* issuer */
+ if (GetSequence(cert, &idx, &len, certSz) < 0)
+ ret = ASN_PARSE_E;
+ }
+ if (ret == 0) {
+ issuerSz = len + idx - issuerIdx;
+ }
+#ifndef NO_SKID
+ if (ret == 0) {
+ idx += len;
+
+ /* validity */
+ if (GetSequence(cert, &idx, &len, certSz) < 0)
+ ret = ASN_PARSE_E;
+ }
+ if (ret == 0) {
+ idx += len;
+
+ /* subject */
+ if (GetSequence(cert, &idx, &len, certSz) < 0)
+ ret = ASN_PARSE_E;
+ }
+ if (ret == 0) {
+ idx += len;
+
+ /* subjectPublicKeyInfo */
+ if (GetSequence(cert, &idx, &len, certSz) < 0)
+ ret = ASN_PARSE_E;
+ }
+ if (ret == 0) {
+ idx += len;
+
+ if ((idx + 1) > certSz)
+ ret = BUFFER_E;
+ }
+ if (ret == 0) {
+ /* issuerUniqueID - optional */
+ localIdx = idx;
+ if (GetASNTag(cert, &localIdx, &tag, certSz) == 0) {
+ if (tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 1)) {
+ idx++;
+ if (GetLength(cert, &idx, &len, certSz) < 0)
+ ret = ASN_PARSE_E;
+ idx += len;
+ }
+ }
+ }
+ if (ret == 0) {
+ if ((idx + 1) > certSz)
+ ret = BUFFER_E;
+ }
+ if (ret == 0) {
+ /* subjectUniqueID - optional */
+ localIdx = idx;
+ if (GetASNTag(cert, &localIdx, &tag, certSz) == 0) {
+ if (tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 2)) {
+ idx++;
+ if (GetLength(cert, &idx, &len, certSz) < 0)
+ ret = ASN_PARSE_E;
+ idx += len;
+ }
+ }
+ }
+
+ if (ret == 0) {
+ if ((idx + 1) > certSz)
+ ret = BUFFER_E;
+ }
+ /* extensions - optional */
+ localIdx = idx;
+ if (ret == 0 && GetASNTag(cert, &localIdx, &tag, certSz) == 0 &&
+ tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 3)) {
+ idx++;
+ if (GetLength(cert, &idx, &extLen, certSz) < 0)
+ ret = ASN_PARSE_E;
+ if (ret == 0) {
+ if (GetSequence(cert, &idx, &extLen, certSz) < 0)
+ ret = ASN_PARSE_E;
+ }
+ if (ret == 0) {
+ extEndIdx = idx + extLen;
+
+ /* Check each extension for the ones we want. */
+ while (ret == 0 && idx < extEndIdx) {
+ if (GetSequence(cert, &idx, &len, certSz) < 0)
+ ret = ASN_PARSE_E;
+ if (ret == 0) {
+ extIdx = idx;
+ if (GetObjectId(cert, &extIdx, &oid, oidCertExtType,
+ certSz) < 0) {
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0) {
+ if ((extIdx + 1) > certSz)
+ ret = BUFFER_E;
+ }
+ }
+
+ if (ret == 0) {
+ localIdx = extIdx;
+ if (GetASNTag(cert, &localIdx, &tag, certSz) == 0 &&
+ tag == ASN_BOOLEAN) {
+ if (GetBoolean(cert, &extIdx, certSz) < 0)
+ ret = ASN_PARSE_E;
+ }
+ }
+ if (ret == 0) {
+ if (GetOctetString(cert, &extIdx, &extLen, certSz) < 0)
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0) {
+ switch (oid) {
+ case AUTH_KEY_OID:
+ if (GetSequence(cert, &extIdx, &extLen, certSz) < 0)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && (extIdx + 1) >= certSz)
+ ret = BUFFER_E;
+
+ if (ret == 0 &&
+ GetASNTag(cert, &extIdx, &tag, certSz) == 0 &&
+ tag == (ASN_CONTEXT_SPECIFIC | 0)) {
+ if (GetLength(cert, &extIdx, &extLen, certSz) <= 0)
+ ret = ASN_PARSE_E;
+ if (ret == 0) {
+ extAuthKeyIdSet = 1;
+ if (extLen == KEYID_SIZE)
+ XMEMCPY(hash, cert + extIdx, extLen);
+ else {
+ ret = CalcHashId(cert + extIdx, extLen,
+ hash);
+ }
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+ idx += len;
+ }
+ }
+ }
+
+ if (ret == 0 && pubKey == NULL) {
+ if (extAuthKeyIdSet)
+ ca = GetCA(cm, hash);
+ if (ca == NULL) {
+ ret = CalcHashId(cert + issuerIdx, issuerSz, hash);
+ if (ret == 0)
+ ca = GetCAByName(cm, hash);
+ }
+ }
+#else
+ if (ret == 0 && pubKey == NULL) {
+ ret = CalcHashId(cert + issuerIdx, issuerSz, hash);
+ if (ret == 0)
+ ca = GetCA(cm, hash);
+ }
+#endif /* !NO_SKID */
+ if (ca == NULL && pubKey == NULL)
+ ret = ASN_NO_SIGNER_E;
+
+ if (ret == 0) {
+ idx = sigIndex;
+ /* signatureAlgorithm */
+ if (GetAlgoId(cert, &idx, &oid, oidSigType, certSz) < 0)
+ ret = ASN_PARSE_E;
+ }
+ if (ret == 0) {
+ if (oid != signatureOID)
+ ret = ASN_SIG_OID_E;
+ }
+ if (ret == 0) {
+ /* signatureValue */
+ if (CheckBitString(cert, &idx, &len, certSz, 1, NULL) < 0)
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0) {
+ if (pubKey != NULL) {
+ ret = ConfirmSignature(sigCtx, cert + tbsCertIdx,
+ sigIndex - tbsCertIdx,
+ pubKey, pubKeySz, pubKeyOID,
+ cert + idx, len, signatureOID, NULL);
+ }
+ else {
+ ret = ConfirmSignature(sigCtx, cert + tbsCertIdx,
+ sigIndex - tbsCertIdx,
+ ca->publicKey, ca->pubKeySize, ca->keyOID,
+ cert + idx, len, signatureOID, NULL);
+ }
+ if (ret != 0) {
+ WOLFSSL_MSG("Confirm signature failed");
+ }
+ }
+
+ FreeSignatureCtx(sigCtx);
+#ifdef WOLFSSL_SMALL_STACK
+ if (sigCtx != NULL)
+ XFREE(sigCtx, heap, DYNAMIC_TYPE_SIGNATURE);
+#endif
+ return ret;
+}
+
+#ifdef OPENSSL_EXTRA
+/* Call CheckCertSignature_ex using a public key buffer for verification
+ */
+int CheckCertSignaturePubKey(const byte* cert, word32 certSz, void* heap,
+ const byte* pubKey, word32 pubKeySz, int pubKeyOID)
+{
+ return CheckCertSignature_ex(cert, certSz, heap, NULL,
+ pubKey, pubKeySz, pubKeyOID);
+}
+#endif /* OPENSSL_EXTRA */
+#ifdef WOLFSSL_SMALL_CERT_VERIFY
+/* Call CheckCertSignature_ex using a certificate manager (cm)
+ */
+int CheckCertSignature(const byte* cert, word32 certSz, void* heap, void* cm)
+{
+ return CheckCertSignature_ex(cert, certSz, heap, cm, NULL, 0, 0);
+}
+#endif /* WOLFSSL_SMALL_CERT_VERIFY */
+#endif /* WOLFSSL_SMALL_CERT_VERIFY || OPENSSL_EXTRA */
int ParseCertRelative(DecodedCert* cert, int type, int verify, void* cm)
{
+ int ret = 0;
+ int checkPathLen = 0;
+ int decrementMaxPathLen = 0;
word32 confirmOID;
- int ret;
- int badDate = 0;
- int criticalExt = 0;
+#if defined(WOLFSSL_RENESAS_TSIP)
+ int idx = 0;
+#endif
+ byte* tsip_encRsaKeyIdx;
- if ((ret = DecodeToKey(cert, verify)) < 0) {
- if (ret == ASN_BEFORE_DATE_E || ret == ASN_AFTER_DATE_E)
- badDate = ret;
- else
- return ret;
+ if (cert == NULL) {
+ return BAD_FUNC_ARG;
}
- WOLFSSL_MSG("Parsed Past Key");
+ if (cert->sigCtx.state == SIG_STATE_BEGIN) {
+ cert->badDate = 0;
+ cert->criticalExt = 0;
+ if ((ret = DecodeToKey(cert, verify)) < 0) {
+ if (ret == ASN_BEFORE_DATE_E || ret == ASN_AFTER_DATE_E)
+ cert->badDate = ret;
+ else
+ return ret;
+ }
+
+ WOLFSSL_MSG("Parsed Past Key");
- if (cert->srcIdx < cert->sigIndex) {
+ if (cert->srcIdx < cert->sigIndex) {
#ifndef ALLOW_V1_EXTENSIONS
if (cert->version < 2) {
- WOLFSSL_MSG(" v1 and v2 certs not allowed extensions");
+ WOLFSSL_MSG("\tv1 and v2 certs not allowed extensions");
return ASN_VERSION_E;
}
#endif
- /* save extensions */
- cert->extensions = &cert->source[cert->srcIdx];
- cert->extensionsSz = cert->sigIndex - cert->srcIdx;
- cert->extensionsIdx = cert->srcIdx; /* for potential later use */
-
- if ((ret = DecodeCertExtensions(cert)) < 0) {
- if (ret == ASN_CRIT_EXT_E)
- criticalExt = ret;
- else
- return ret;
- }
- /* advance past extensions */
- cert->srcIdx = cert->sigIndex;
- }
+ /* save extensions */
+ cert->extensions = &cert->source[cert->srcIdx];
+ cert->extensionsSz = cert->sigIndex - cert->srcIdx;
+ cert->extensionsIdx = cert->srcIdx; /* for potential later use */
- if ((ret = GetAlgoId(cert->source, &cert->srcIdx, &confirmOID,
- cert->maxIdx)) < 0)
- return ret;
+ if ((ret = DecodeCertExtensions(cert)) < 0) {
+ if (ret == ASN_CRIT_EXT_E)
+ cert->criticalExt = ret;
+ else
+ return ret;
+ }
- if ((ret = GetSignature(cert)) < 0)
- return ret;
+ /* advance past extensions */
+ cert->srcIdx = cert->sigIndex;
+ }
+
+ if ((ret = GetAlgoId(cert->source, &cert->srcIdx, &confirmOID,
+ oidSigType, cert->maxIdx)) < 0)
+ return ret;
+
+ if ((ret = GetSignature(cert)) < 0)
+ return ret;
- if (confirmOID != cert->signatureOID)
- return ASN_SIG_OID_E;
+ if (confirmOID != cert->signatureOID)
+ return ASN_SIG_OID_E;
#ifndef NO_SKID
- if (cert->extSubjKeyIdSet == 0
- && cert->publicKey != NULL && cert->pubKeySize > 0) {
- #ifdef NO_SHA
- ret = wc_Sha256Hash(cert->publicKey, cert->pubKeySize,
+ if (cert->extSubjKeyIdSet == 0 && cert->publicKey != NULL &&
+ cert->pubKeySize > 0) {
+ ret = CalcHashId(cert->publicKey, cert->pubKeySize,
cert->extSubjKeyId);
- #else
- ret = wc_ShaHash(cert->publicKey, cert->pubKeySize,
- cert->extSubjKeyId);
- #endif
if (ret != 0)
return ret;
}
- #endif
+ #endif /* !NO_SKID */
- if (verify && type != CA_TYPE) {
- Signer* ca = NULL;
- #ifndef NO_SKID
- if (cert->extAuthKeyIdSet)
- ca = GetCA(cm, cert->extAuthKeyId);
- if (ca == NULL)
- ca = GetCAByName(cm, cert->issuerHash);
- #else /* NO_SKID */
- ca = GetCA(cm, cert->issuerHash);
- #endif /* NO SKID */
- WOLFSSL_MSG("About to verify certificate signature");
-
- if (ca) {
-#ifdef HAVE_OCSP
- /* Need the ca's public key hash for OCSP */
- #ifdef NO_SHA
- ret = wc_Sha256Hash(ca->publicKey, ca->pubKeySize,
- cert->issuerKeyHash);
- #else /* NO_SHA */
- ret = wc_ShaHash(ca->publicKey, ca->pubKeySize,
- cert->issuerKeyHash);
- #endif /* NO_SHA */
- if (ret != 0)
- return ret;
-#endif /* HAVE_OCSP */
- /* try to confirm/verify signature */
- if (!ConfirmSignature(cert->source + cert->certBegin,
+ if (!cert->selfSigned || (verify != NO_VERIFY && type != CA_TYPE &&
+ type != TRUSTED_PEER_TYPE)) {
+ cert->ca = NULL;
+ #ifndef NO_SKID
+ if (cert->extAuthKeyIdSet) {
+ cert->ca = GetCA(cm, cert->extAuthKeyId);
+ }
+ if (cert->ca == NULL && cert->extSubjKeyIdSet
+ && verify != VERIFY_OCSP) {
+ cert->ca = GetCA(cm, cert->extSubjKeyId);
+ }
+ if (cert->ca != NULL && XMEMCMP(cert->issuerHash,
+ cert->ca->subjectNameHash, KEYID_SIZE) != 0) {
+ cert->ca = NULL;
+ }
+ if (cert->ca == NULL) {
+ cert->ca = GetCAByName(cm, cert->issuerHash);
+ /* If AKID is available then this CA doesn't have the public
+ * key required */
+ if (cert->ca && cert->extAuthKeyIdSet) {
+ WOLFSSL_MSG("CA SKID doesn't match AKID");
+ cert->ca = NULL;
+ }
+ }
+
+ /* OCSP Only: alt lookup using subject and pub key w/o sig check */
+ #ifdef WOLFSSL_NO_TRUSTED_CERTS_VERIFY
+ if (cert->ca == NULL && verify == VERIFY_OCSP) {
+ cert->ca = GetCABySubjectAndPubKey(cert, cm);
+ if (cert->ca) {
+ ret = 0; /* success */
+ goto exit_pcr;
+ }
+ }
+ #endif /* WOLFSSL_NO_TRUSTED_CERTS_VERIFY */
+ #else
+ cert->ca = GetCA(cm, cert->issuerHash);
+ #endif /* !NO_SKID */
+ }
+
+ if (cert->selfSigned) {
+ cert->maxPathLen = WOLFSSL_MAX_PATH_LEN;
+ } else {
+ /* RFC 5280 Section 4.2.1.9:
+ *
+ * load/receive check
+ *
+ * 1) Is CA boolean set?
+ * No - SKIP CHECK
+ * Yes - Check key usage
+ * 2) Is Key usage extension present?
+ * No - goto 3
+ * Yes - check keyCertSign assertion
+ * 2.a) Is keyCertSign asserted?
+ * No - goto 4
+ * Yes - goto 3
+ * 3) Is pathLen set?
+ * No - goto 4
+ * Yes - check pathLen against maxPathLen.
+ * 3.a) Is pathLen less than maxPathLen?
+ * No - goto 4
+ * Yes - set maxPathLen to pathLen and EXIT
+ * 4) Is maxPathLen > 0?
+ * Yes - Reduce by 1
+ * No - ERROR
+ */
+
+ if (cert->ca && cert->pathLengthSet) {
+ cert->maxPathLen = cert->pathLength;
+ if (cert->isCA) {
+ WOLFSSL_MSG("\tCA boolean set");
+ if (cert->extKeyUsageSet) {
+ WOLFSSL_MSG("\tExtension Key Usage Set");
+ if ((cert->extKeyUsage & KEYUSE_KEY_CERT_SIGN) != 0) {
+ checkPathLen = 1;
+ } else {
+ decrementMaxPathLen = 1;
+ }
+ } else {
+ checkPathLen = 1;
+ } /* !cert->ca check */
+ } /* cert is not a CA (assuming entity cert) */
+
+ if (checkPathLen && cert->pathLengthSet) {
+ if (cert->pathLength < cert->ca->maxPathLen) {
+ WOLFSSL_MSG("\tmaxPathLen status: set to pathLength");
+ cert->maxPathLen = cert->pathLength;
+ } else {
+ decrementMaxPathLen = 1;
+ }
+ }
+
+ if (decrementMaxPathLen && cert->ca->maxPathLen > 0) {
+ WOLFSSL_MSG("\tmaxPathLen status: reduce by 1");
+ cert->maxPathLen = cert->ca->maxPathLen - 1;
+ if (verify != NO_VERIFY && type != CA_TYPE &&
+ type != TRUSTED_PEER_TYPE) {
+ WOLFSSL_MSG("\tmaxPathLen status: OK");
+ }
+ } else if (decrementMaxPathLen && cert->ca->maxPathLen == 0) {
+ cert->maxPathLen = 0;
+ if (verify != NO_VERIFY && type != CA_TYPE &&
+ type != TRUSTED_PEER_TYPE) {
+ WOLFSSL_MSG("\tNon-entity cert, maxPathLen is 0");
+ WOLFSSL_MSG("\tmaxPathLen status: ERROR");
+ return ASN_PATHLEN_INV_E;
+ }
+ }
+ } else if (cert->ca && cert->isCA) {
+ /* case where cert->pathLength extension is not set */
+ if (cert->ca->maxPathLen > 0) {
+ cert->maxPathLen = cert->ca->maxPathLen - 1;
+ } else {
+ cert->maxPathLen = 0;
+ if (verify != NO_VERIFY && type != CA_TYPE &&
+ type != TRUSTED_PEER_TYPE) {
+ WOLFSSL_MSG("\tNon-entity cert, maxPathLen is 0");
+ WOLFSSL_MSG("\tmaxPathLen status: ERROR");
+ return ASN_PATHLEN_INV_E;
+ }
+ }
+ }
+ #ifdef HAVE_OCSP
+ if (verify != NO_VERIFY && type != CA_TYPE &&
+ type != TRUSTED_PEER_TYPE) {
+ if (cert->ca) {
+ /* Need the CA's public key hash for OCSP */
+ XMEMCPY(cert->issuerKeyHash, cert->ca->subjectKeyHash,
+ KEYID_SIZE);
+ }
+
+ }
+ #endif /* HAVE_OCSP */
+ }
+ }
+#if defined(WOLFSSL_RENESAS_TSIP)
+ /* prepare for TSIP TLS cert verification API use */
+ if (cert->keyOID == RSAk) {
+ /* to call TSIP API, it needs keys position info in bytes */
+ if ((ret = RsaPublicKeyDecodeRawIndex(cert->publicKey, (word32*)&idx,
+ cert->pubKeySize,
+ &cert->sigCtx.pubkey_n_start,
+ &cert->sigCtx.pubkey_n_len,
+ &cert->sigCtx.pubkey_e_start,
+ &cert->sigCtx.pubkey_e_len)) != 0) {
+ WOLFSSL_MSG("Decoding index from cert failed.");
+ return ret;
+ }
+ cert->sigCtx.certBegin = cert->certBegin;
+ }
+ /* check if we can use TSIP for cert verification */
+ /* if the ca is verified as tsip root ca. */
+ /* TSIP can only handle 2048 bits(256 byte) key. */
+ if (cert->ca && tsip_checkCA(cert->ca->cm_idx) != 0 &&
+ cert->sigCtx.pubkey_n_len == 256) {
+
+ /* assign memory to encrypted tsip Rsa key index */
+ if (!cert->tsip_encRsaKeyIdx)
+ cert->tsip_encRsaKeyIdx =
+ (byte*)XMALLOC(TSIP_TLS_ENCPUBKEY_SZ_BY_CERTVRFY,
+ cert->heap, DYNAMIC_TYPE_RSA);
+ if (cert->tsip_encRsaKeyIdx == NULL)
+ return MEMORY_E;
+ } else {
+ if (cert->ca) {
+ /* TSIP isn't usable */
+ if (tsip_checkCA(cert->ca->cm_idx) == 0)
+ WOLFSSL_MSG("TSIP isn't usable because the ca isn't verified "
+ "by TSIP.");
+ else if (cert->sigCtx.pubkey_n_len != 256)
+ WOLFSSL_MSG("TSIP isn't usable because the ca isn't signed by "
+ "RSA 2048.");
+ else
+ WOLFSSL_MSG("TSIP isn't usable");
+ }
+ cert->tsip_encRsaKeyIdx = NULL;
+ }
+
+ tsip_encRsaKeyIdx = cert->tsip_encRsaKeyIdx;
+#else
+ tsip_encRsaKeyIdx = NULL;
+#endif
+
+ if (verify != NO_VERIFY && type != CA_TYPE && type != TRUSTED_PEER_TYPE) {
+ if (cert->ca) {
+ if (verify == VERIFY || verify == VERIFY_OCSP ||
+ verify == VERIFY_SKIP_DATE) {
+ /* try to confirm/verify signature */
+ if ((ret = ConfirmSignature(&cert->sigCtx,
+ cert->source + cert->certBegin,
cert->sigIndex - cert->certBegin,
- ca->publicKey, ca->pubKeySize, ca->keyOID,
- cert->signature, cert->sigLength, cert->signatureOID,
- cert->heap)) {
- WOLFSSL_MSG("Confirm signature failed");
- return ASN_SIG_CONFIRM_E;
+ cert->ca->publicKey, cert->ca->pubKeySize,
+ cert->ca->keyOID, cert->signature,
+ cert->sigLength, cert->signatureOID,
+ tsip_encRsaKeyIdx)) != 0) {
+ if (ret != 0 && ret != WC_PENDING_E) {
+ WOLFSSL_MSG("Confirm signature failed");
+ }
+ return ret;
+ }
}
-#ifndef IGNORE_NAME_CONSTRAINTS
- /* check that this cert's name is permitted by the signer's
- * name constraints */
- if (!ConfirmNameConstraints(ca, cert)) {
- WOLFSSL_MSG("Confirm name constraint failed");
- return ASN_NAME_INVALID_E;
+ #ifndef IGNORE_NAME_CONSTRAINTS
+ if (verify == VERIFY || verify == VERIFY_OCSP ||
+ verify == VERIFY_NAME || verify == VERIFY_SKIP_DATE) {
+ /* check that this cert's name is permitted by the signer's
+ * name constraints */
+ if (!ConfirmNameConstraints(cert->ca, cert)) {
+ WOLFSSL_MSG("Confirm name constraint failed");
+ return ASN_NAME_INVALID_E;
+ }
}
-#endif /* IGNORE_NAME_CONSTRAINTS */
+ #endif /* IGNORE_NAME_CONSTRAINTS */
}
else {
/* no signer */
@@ -4425,15 +9522,22 @@ int ParseCertRelative(DecodedCert* cert, int type, int verify, void* cm)
}
}
- if (badDate != 0)
- return badDate;
+#if defined(WOLFSSL_NO_TRUSTED_CERTS_VERIFY) && !defined(NO_SKID)
+exit_pcr:
+#endif
- if (criticalExt != 0)
- return criticalExt;
+ if (cert->badDate != 0) {
+ if (verify != VERIFY_SKIP_DATE) {
+ return cert->badDate;
+ }
+ WOLFSSL_MSG("Date error: Verify option is skipping");
+ }
- return 0;
-}
+ if (cert->criticalExt != 0)
+ return cert->criticalExt;
+ return ret;
+}
/* Create and init an new signer */
Signer* MakeSigner(void* heap)
@@ -4441,16 +9545,7 @@ Signer* MakeSigner(void* heap)
Signer* signer = (Signer*) XMALLOC(sizeof(Signer), heap,
DYNAMIC_TYPE_SIGNER);
if (signer) {
- signer->pubKeySize = 0;
- signer->keyOID = 0;
- signer->publicKey = NULL;
- signer->nameLen = 0;
- signer->name = NULL;
- #ifndef IGNORE_NAME_CONSTRAINTS
- signer->permittedNames = NULL;
- signer->excludedNames = NULL;
- #endif /* IGNORE_NAME_CONSTRAINTS */
- signer->next = NULL;
+ XMEMSET(signer, 0, sizeof(Signer));
}
(void)heap;
@@ -4462,13 +9557,16 @@ Signer* MakeSigner(void* heap)
void FreeSigner(Signer* signer, void* heap)
{
XFREE(signer->name, heap, DYNAMIC_TYPE_SUBJECT_CN);
- XFREE(signer->publicKey, heap, DYNAMIC_TYPE_PUBLIC_KEY);
- #ifndef IGNORE_NAME_CONSTRAINTS
- if (signer->permittedNames)
- FreeNameSubtrees(signer->permittedNames, heap);
- if (signer->excludedNames)
- FreeNameSubtrees(signer->excludedNames, heap);
- #endif
+ XFREE((void*)signer->publicKey, heap, DYNAMIC_TYPE_PUBLIC_KEY);
+#ifndef IGNORE_NAME_CONSTRAINTS
+ if (signer->permittedNames)
+ FreeNameSubtrees(signer->permittedNames, heap);
+ if (signer->excludedNames)
+ FreeNameSubtrees(signer->excludedNames, heap);
+#endif
+#ifdef WOLFSSL_SIGNER_DER_CERT
+ FreeDer(&signer->derCert);
+#endif
XFREE(signer, heap, DYNAMIC_TYPE_SIGNER);
(void)heap;
@@ -4491,14 +9589,59 @@ void FreeSignerTable(Signer** table, int rows, void* heap)
}
}
+#ifdef WOLFSSL_TRUST_PEER_CERT
+/* Free an individual trusted peer cert */
+void FreeTrustedPeer(TrustedPeerCert* tp, void* heap)
+{
+ if (tp == NULL) {
+ return;
+ }
+
+ if (tp->name) {
+ XFREE(tp->name, heap, DYNAMIC_TYPE_SUBJECT_CN);
+ }
+
+ if (tp->sig) {
+ XFREE(tp->sig, heap, DYNAMIC_TYPE_SIGNATURE);
+ }
+#ifndef IGNORE_NAME_CONSTRAINTS
+ if (tp->permittedNames)
+ FreeNameSubtrees(tp->permittedNames, heap);
+ if (tp->excludedNames)
+ FreeNameSubtrees(tp->excludedNames, heap);
+#endif
+ XFREE(tp, heap, DYNAMIC_TYPE_CERT);
+
+ (void)heap;
+}
+
+/* Free the whole Trusted Peer linked list */
+void FreeTrustedPeerTable(TrustedPeerCert** table, int rows, void* heap)
+{
+ int i;
+
+ for (i = 0; i < rows; i++) {
+ TrustedPeerCert* tp = table[i];
+ while (tp) {
+ TrustedPeerCert* next = tp->next;
+ FreeTrustedPeer(tp, heap);
+ tp = next;
+ }
+ table[i] = NULL;
+ }
+}
+#endif /* WOLFSSL_TRUST_PEER_CERT */
-WOLFSSL_LOCAL int SetMyVersion(word32 version, byte* output, int header)
+int SetMyVersion(word32 version, byte* output, int header)
{
int i = 0;
+ if (output == NULL)
+ return BAD_FUNC_ARG;
+
if (header) {
output[i++] = ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED;
- output[i++] = ASN_BIT_STRING;
+ output[i++] = 3;
}
output[i++] = ASN_INTEGER;
output[i++] = 0x01;
@@ -4507,65 +9650,558 @@ WOLFSSL_LOCAL int SetMyVersion(word32 version, byte* output, int header)
return i;
}
+int SetSerialNumber(const byte* sn, word32 snSz, byte* output,
+ word32 outputSz, int maxSnSz)
+{
+ int i;
+ int snSzInt = (int)snSz;
+
+ if (sn == NULL || output == NULL || snSzInt < 0)
+ return BAD_FUNC_ARG;
+
+ /* remove leading zeros */
+ while (snSzInt > 0 && sn[0] == 0) {
+ snSzInt--;
+ sn++;
+ }
+ /* RFC 5280 - 4.1.2.2:
+ * Serial numbers must be a positive value (and not zero) */
+ if (snSzInt == 0)
+ return BAD_FUNC_ARG;
+
+ if (sn[0] & 0x80)
+ maxSnSz--;
+ /* truncate if input is too long */
+ if (snSzInt > maxSnSz)
+ snSzInt = maxSnSz;
+
+ i = SetASNInt(snSzInt, sn[0], NULL);
+ /* truncate if input is too long */
+ if (snSzInt > (int)outputSz - i)
+ snSzInt = (int)outputSz - i;
+ /* sanity check number of bytes to copy */
+ if (snSzInt <= 0) {
+ return BUFFER_E;
+ }
+
+ /* write out ASN.1 Integer */
+ (void)SetASNInt(snSzInt, sn[0], output);
+ XMEMCPY(output + i, sn, snSzInt);
-WOLFSSL_LOCAL int SetSerialNumber(const byte* sn, word32 snSz, byte* output)
+ /* compute final length */
+ i += snSzInt;
+
+ return i;
+}
+
+#endif /* !NO_CERTS */
+
+int GetSerialNumber(const byte* input, word32* inOutIdx,
+ byte* serial, int* serialSz, word32 maxIdx)
{
int result = 0;
+ int ret;
+
+ WOLFSSL_ENTER("GetSerialNumber");
+
+ if (serial == NULL || input == NULL || serialSz == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* First byte is ASN type */
+ if ((*inOutIdx+1) > maxIdx) {
+ WOLFSSL_MSG("Bad idx first");
+ return BUFFER_E;
+ }
+
+ ret = GetASNInt(input, inOutIdx, serialSz, maxIdx);
+ if (ret != 0)
+ return ret;
+
+ if (*serialSz > EXTERNAL_SERIAL_SIZE) {
+ WOLFSSL_MSG("Serial size bad");
+ return ASN_PARSE_E;
+ }
+
+ /* return serial */
+ XMEMCPY(serial, &input[*inOutIdx], *serialSz);
+ *inOutIdx += *serialSz;
+
+ return result;
+}
- WOLFSSL_ENTER("SetSerialNumber");
+#ifndef NO_CERTS
- if (snSz <= EXTERNAL_SERIAL_SIZE) {
- output[0] = ASN_INTEGER;
- /* The serial number is always positive. When encoding the
- * INTEGER, if the MSB is 1, add a padding zero to keep the
- * number positive. */
- if (sn[0] & 0x80) {
- output[1] = (byte)snSz + 1;
- output[2] = 0;
- XMEMCPY(&output[3], sn, snSz);
- result = snSz + 3;
+int AllocDer(DerBuffer** pDer, word32 length, int type, void* heap)
+{
+ int ret = BAD_FUNC_ARG;
+ if (pDer) {
+ int dynType = 0;
+ DerBuffer* der;
+
+ /* Determine dynamic type */
+ switch (type) {
+ case CA_TYPE: dynType = DYNAMIC_TYPE_CA; break;
+ case CERT_TYPE: dynType = DYNAMIC_TYPE_CERT; break;
+ case CRL_TYPE: dynType = DYNAMIC_TYPE_CRL; break;
+ case DSA_TYPE: dynType = DYNAMIC_TYPE_DSA; break;
+ case ECC_TYPE: dynType = DYNAMIC_TYPE_ECC; break;
+ case RSA_TYPE: dynType = DYNAMIC_TYPE_RSA; break;
+ default: dynType = DYNAMIC_TYPE_KEY; break;
}
- else {
- output[1] = (byte)snSz;
- XMEMCPY(&output[2], sn, snSz);
- result = snSz + 2;
+
+ /* Setup new buffer */
+ *pDer = (DerBuffer*)XMALLOC(sizeof(DerBuffer) + length, heap, dynType);
+ if (*pDer == NULL) {
+ return MEMORY_E;
}
+ XMEMSET(*pDer, 0, sizeof(DerBuffer) + length);
+
+ der = *pDer;
+ der->type = type;
+ der->dynType = dynType; /* Cache this for FreeDer */
+ der->heap = heap;
+ der->buffer = (byte*)der + sizeof(DerBuffer);
+ der->length = length;
+ ret = 0; /* Success */
}
- return result;
+ return ret;
}
+void FreeDer(DerBuffer** pDer)
+{
+ if (pDer && *pDer)
+ {
+ DerBuffer* der = (DerBuffer*)*pDer;
+ /* ForceZero private keys */
+ if (der->type == PRIVATEKEY_TYPE) {
+ ForceZero(der->buffer, der->length);
+ }
+ der->buffer = NULL;
+ der->length = 0;
+ XFREE(der, der->heap, der->dynType);
+ *pDer = NULL;
+ }
+}
-#if defined(WOLFSSL_KEY_GEN) || defined(WOLFSSL_CERT_GEN)
+int wc_AllocDer(DerBuffer** pDer, word32 length, int type, void* heap)
+{
+ return AllocDer(pDer, length, type, heap);
+}
+void wc_FreeDer(DerBuffer** pDer)
+{
+ FreeDer(pDer);
+}
+
+
+#if defined(WOLFSSL_PEM_TO_DER) || defined(WOLFSSL_DER_TO_PEM)
+
+/* Max X509 header length indicates the max length + 2 ('\n', '\0') */
+#define MAX_X509_HEADER_SZ (37 + 2)
+
+wcchar BEGIN_CERT = "-----BEGIN CERTIFICATE-----";
+wcchar END_CERT = "-----END CERTIFICATE-----";
+#ifdef WOLFSSL_CERT_REQ
+ wcchar BEGIN_CERT_REQ = "-----BEGIN CERTIFICATE REQUEST-----";
+ wcchar END_CERT_REQ = "-----END CERTIFICATE REQUEST-----";
+#endif
+#ifndef NO_DH
+ wcchar BEGIN_DH_PARAM = "-----BEGIN DH PARAMETERS-----";
+ wcchar END_DH_PARAM = "-----END DH PARAMETERS-----";
+#endif
+#ifndef NO_DSA
+ wcchar BEGIN_DSA_PARAM = "-----BEGIN DSA PARAMETERS-----";
+ wcchar END_DSA_PARAM = "-----END DSA PARAMETERS-----";
+#endif
+wcchar BEGIN_X509_CRL = "-----BEGIN X509 CRL-----";
+wcchar END_X509_CRL = "-----END X509 CRL-----";
+wcchar BEGIN_RSA_PRIV = "-----BEGIN RSA PRIVATE KEY-----";
+wcchar END_RSA_PRIV = "-----END RSA PRIVATE KEY-----";
+wcchar BEGIN_PRIV_KEY = "-----BEGIN PRIVATE KEY-----";
+wcchar END_PRIV_KEY = "-----END PRIVATE KEY-----";
+wcchar BEGIN_ENC_PRIV_KEY = "-----BEGIN ENCRYPTED PRIVATE KEY-----";
+wcchar END_ENC_PRIV_KEY = "-----END ENCRYPTED PRIVATE KEY-----";
+#ifdef HAVE_ECC
+ wcchar BEGIN_EC_PRIV = "-----BEGIN EC PRIVATE KEY-----";
+ wcchar END_EC_PRIV = "-----END EC PRIVATE KEY-----";
+#endif
+#if defined(HAVE_ECC) || defined(HAVE_ED25519) || defined(HAVE_ED448) || \
+ !defined(NO_DSA)
+ wcchar BEGIN_DSA_PRIV = "-----BEGIN DSA PRIVATE KEY-----";
+ wcchar END_DSA_PRIV = "-----END DSA PRIVATE KEY-----";
+#endif
+#ifdef OPENSSL_EXTRA
+ const char BEGIN_PRIV_KEY_PREFIX[] = "-----BEGIN";
+ const char PRIV_KEY_SUFFIX[] = "PRIVATE KEY-----";
+ const char END_PRIV_KEY_PREFIX[] = "-----END";
+#endif
+wcchar BEGIN_PUB_KEY = "-----BEGIN PUBLIC KEY-----";
+wcchar END_PUB_KEY = "-----END PUBLIC KEY-----";
+#if defined(HAVE_ED25519) || defined(HAVE_ED448)
+ wcchar BEGIN_EDDSA_PRIV = "-----BEGIN EDDSA PRIVATE KEY-----";
+ wcchar END_EDDSA_PRIV = "-----END EDDSA PRIVATE KEY-----";
+#endif
+#ifdef HAVE_CRL
+ const char *const BEGIN_CRL = "-----BEGIN X509 CRL-----";
+ wcchar END_CRL = "-----END X509 CRL-----";
+#endif
+
+
+static WC_INLINE char* SkipEndOfLineChars(char* line, const char* endOfLine)
+{
+ /* eat end of line characters */
+ while (line < endOfLine &&
+ (line[0] == '\r' || line[0] == '\n')) {
+ line++;
+ }
+ return line;
+}
+
+int wc_PemGetHeaderFooter(int type, const char** header, const char** footer)
+{
+ int ret = BAD_FUNC_ARG;
+
+ switch (type) {
+ case CA_TYPE: /* same as below */
+ case TRUSTED_PEER_TYPE:
+ case CERT_TYPE:
+ if (header) *header = BEGIN_CERT;
+ if (footer) *footer = END_CERT;
+ ret = 0;
+ break;
+
+ case CRL_TYPE:
+ if (header) *header = BEGIN_X509_CRL;
+ if (footer) *footer = END_X509_CRL;
+ ret = 0;
+ break;
+ #ifndef NO_DH
+ case DH_PARAM_TYPE:
+ if (header) *header = BEGIN_DH_PARAM;
+ if (footer) *footer = END_DH_PARAM;
+ ret = 0;
+ break;
+ #endif
+ #ifndef NO_DSA
+ case DSA_PARAM_TYPE:
+ if (header) *header = BEGIN_DSA_PARAM;
+ if (footer) *footer = END_DSA_PARAM;
+ ret = 0;
+ break;
+ #endif
+ #ifdef WOLFSSL_CERT_REQ
+ case CERTREQ_TYPE:
+ if (header) *header = BEGIN_CERT_REQ;
+ if (footer) *footer = END_CERT_REQ;
+ ret = 0;
+ break;
+ #endif
+ #ifndef NO_DSA
+ case DSA_TYPE:
+ case DSA_PRIVATEKEY_TYPE:
+ if (header) *header = BEGIN_DSA_PRIV;
+ if (footer) *footer = END_DSA_PRIV;
+ ret = 0;
+ break;
+ #endif
+ #ifdef HAVE_ECC
+ case ECC_TYPE:
+ case ECC_PRIVATEKEY_TYPE:
+ if (header) *header = BEGIN_EC_PRIV;
+ if (footer) *footer = END_EC_PRIV;
+ ret = 0;
+ break;
+ #endif
+ case RSA_TYPE:
+ case PRIVATEKEY_TYPE:
+ if (header) *header = BEGIN_RSA_PRIV;
+ if (footer) *footer = END_RSA_PRIV;
+ ret = 0;
+ break;
+ #ifdef HAVE_ED25519
+ case ED25519_TYPE:
+ #endif
+ #ifdef HAVE_ED448
+ case ED448_TYPE:
+ #endif
+ #if defined(HAVE_ED25519) || defined(HAVE_ED448)
+ case EDDSA_PRIVATEKEY_TYPE:
+ if (header) *header = BEGIN_EDDSA_PRIV;
+ if (footer) *footer = END_EDDSA_PRIV;
+ ret = 0;
+ break;
+ #endif
+ case PUBLICKEY_TYPE:
+ case ECC_PUBLICKEY_TYPE:
+ if (header) *header = BEGIN_PUB_KEY;
+ if (footer) *footer = END_PUB_KEY;
+ ret = 0;
+ break;
+ #if !defined(NO_DH) && (defined(WOLFSSL_QT) || defined(OPENSSL_ALL))
+ case DH_PRIVATEKEY_TYPE:
+ #endif
+ case PKCS8_PRIVATEKEY_TYPE:
+ if (header) *header = BEGIN_PRIV_KEY;
+ if (footer) *footer = END_PRIV_KEY;
+ ret = 0;
+ break;
+ case PKCS8_ENC_PRIVATEKEY_TYPE:
+ if (header) *header = BEGIN_ENC_PRIV_KEY;
+ if (footer) *footer = END_ENC_PRIV_KEY;
+ ret = 0;
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+#ifdef WOLFSSL_ENCRYPTED_KEYS
+
+static wcchar kProcTypeHeader = "Proc-Type";
+static wcchar kDecInfoHeader = "DEK-Info";
+
+#ifdef WOLFSSL_PEM_TO_DER
+#ifndef NO_DES3
+ static wcchar kEncTypeDes = "DES-CBC";
+ static wcchar kEncTypeDes3 = "DES-EDE3-CBC";
+#endif
+#if !defined(NO_AES) && defined(HAVE_AES_CBC) && defined(WOLFSSL_AES_128)
+ static wcchar kEncTypeAesCbc128 = "AES-128-CBC";
+#endif
+#if !defined(NO_AES) && defined(HAVE_AES_CBC) && defined(WOLFSSL_AES_192)
+ static wcchar kEncTypeAesCbc192 = "AES-192-CBC";
+#endif
+#if !defined(NO_AES) && defined(HAVE_AES_CBC) && defined(WOLFSSL_AES_256)
+ static wcchar kEncTypeAesCbc256 = "AES-256-CBC";
+#endif
+
+int wc_EncryptedInfoGet(EncryptedInfo* info, const char* cipherInfo)
+{
+ int ret = 0;
+
+ if (info == NULL || cipherInfo == NULL)
+ return BAD_FUNC_ARG;
+
+ /* determine cipher information */
+#ifndef NO_DES3
+ if (XSTRNCMP(cipherInfo, kEncTypeDes, XSTRLEN(kEncTypeDes)) == 0) {
+ info->cipherType = WC_CIPHER_DES;
+ info->keySz = DES_KEY_SIZE;
+ if (info->ivSz == 0) info->ivSz = DES_IV_SIZE;
+ }
+ else if (XSTRNCMP(cipherInfo, kEncTypeDes3, XSTRLEN(kEncTypeDes3)) == 0) {
+ info->cipherType = WC_CIPHER_DES3;
+ info->keySz = DES3_KEY_SIZE;
+ if (info->ivSz == 0) info->ivSz = DES_IV_SIZE;
+ }
+ else
+#endif /* !NO_DES3 */
+#if !defined(NO_AES) && defined(HAVE_AES_CBC) && defined(WOLFSSL_AES_128)
+ if (XSTRNCMP(cipherInfo, kEncTypeAesCbc128, XSTRLEN(kEncTypeAesCbc128)) == 0) {
+ info->cipherType = WC_CIPHER_AES_CBC;
+ info->keySz = AES_128_KEY_SIZE;
+ if (info->ivSz == 0) info->ivSz = AES_IV_SIZE;
+ }
+ else
+#endif
+#if !defined(NO_AES) && defined(HAVE_AES_CBC) && defined(WOLFSSL_AES_192)
+ if (XSTRNCMP(cipherInfo, kEncTypeAesCbc192, XSTRLEN(kEncTypeAesCbc192)) == 0) {
+ info->cipherType = WC_CIPHER_AES_CBC;
+ info->keySz = AES_192_KEY_SIZE;
+ if (info->ivSz == 0) info->ivSz = AES_IV_SIZE;
+ }
+ else
+#endif
+#if !defined(NO_AES) && defined(HAVE_AES_CBC) && defined(WOLFSSL_AES_256)
+ if (XSTRNCMP(cipherInfo, kEncTypeAesCbc256, XSTRLEN(kEncTypeAesCbc256)) == 0) {
+ info->cipherType = WC_CIPHER_AES_CBC;
+ info->keySz = AES_256_KEY_SIZE;
+ if (info->ivSz == 0) info->ivSz = AES_IV_SIZE;
+ }
+ else
+#endif
+ {
+ ret = NOT_COMPILED_IN;
+ }
+ return ret;
+}
+
+int wc_EncryptedInfoParse(EncryptedInfo* info, char** pBuffer, size_t bufSz)
+{
+ int err = 0;
+ char* bufferStart;
+ char* bufferEnd;
+ char* line;
+ word32 lineSz;
+ char* finish;
+ word32 finishSz;
+ char* start = NULL;
+ word32 startSz;
+ char* newline = NULL;
+
+ if (info == NULL || pBuffer == NULL || bufSz == 0)
+ return BAD_FUNC_ARG;
+
+ bufferStart = *pBuffer;
+ bufferEnd = bufferStart + bufSz;
+
+ /* find encrypted info marker */
+ line = XSTRNSTR(bufferStart, kProcTypeHeader,
+ min((word32)bufSz, PEM_LINE_LEN));
+ if (line != NULL) {
+ if (line >= bufferEnd) {
+ return BUFFER_E;
+ }
+
+ lineSz = (word32)(bufferEnd - line);
+
+ /* find DEC-Info marker */
+ start = XSTRNSTR(line, kDecInfoHeader, min(lineSz, PEM_LINE_LEN));
+
+ if (start == NULL)
+ return BUFFER_E;
+
+ /* skip dec-info and ": " */
+ start += XSTRLEN(kDecInfoHeader);
+ if (start >= bufferEnd)
+ return BUFFER_E;
+
+ if (start[0] == ':') {
+ start++;
+ if (start >= bufferEnd)
+ return BUFFER_E;
+ }
+ if (start[0] == ' ')
+ start++;
+
+ startSz = (word32)(bufferEnd - start);
+ finish = XSTRNSTR(start, ",", min(startSz, PEM_LINE_LEN));
+
+ if ((start != NULL) && (finish != NULL) && (start < finish)) {
+ if (finish >= bufferEnd) {
+ return BUFFER_E;
+ }
+
+ finishSz = (word32)(bufferEnd - finish);
+ newline = XSTRNSTR(finish, "\r", min(finishSz, PEM_LINE_LEN));
+
+ /* get cipher name */
+ if (NAME_SZ < (finish - start)) /* buffer size of info->name */
+ return BUFFER_E;
+ if (XMEMCPY(info->name, start, finish - start) == NULL)
+ return BUFFER_E;
+ info->name[finish - start] = '\0'; /* null term */
+
+ /* populate info */
+ err = wc_EncryptedInfoGet(info, info->name);
+ if (err != 0)
+ return err;
+
+ /* get IV */
+ if (finishSz < info->ivSz + 1)
+ return BUFFER_E;
+
+ if (newline == NULL) {
+ newline = XSTRNSTR(finish, "\n", min(finishSz,
+ PEM_LINE_LEN));
+ }
+ if ((newline != NULL) && (newline > finish)) {
+ finish++;
+ info->ivSz = (word32)(newline - finish);
+ if (info->ivSz > IV_SZ)
+ return BUFFER_E;
+ if (XMEMCPY(info->iv, finish, info->ivSz) == NULL)
+ return BUFFER_E;
+ info->set = 1;
+ }
+ else
+ return BUFFER_E;
+ }
+ else
+ return BUFFER_E;
+
+ /* eat end of line characters */
+ newline = SkipEndOfLineChars(newline, bufferEnd);
+
+ /* return new headerEnd */
+
+ *pBuffer = newline;
+ }
+
+ return err;
+}
+#endif /* WOLFSSL_PEM_TO_DER */
+
+#ifdef WOLFSSL_DER_TO_PEM
+static int wc_EncryptedInfoAppend(char* dest, int destSz, char* cipherInfo)
+{
+ if (cipherInfo != NULL) {
+ int cipherInfoStrLen = (int)XSTRLEN((char*)cipherInfo);
+
+ if (cipherInfoStrLen > HEADER_ENCRYPTED_KEY_SIZE - (9+14+10+3))
+ cipherInfoStrLen = HEADER_ENCRYPTED_KEY_SIZE - (9+14+10+3);
+
+ if (destSz - (int)XSTRLEN(dest) >= cipherInfoStrLen + (9+14+8+2+2+1)) {
+ /* strncat's src length needs to include the NULL */
+ XSTRNCAT(dest, kProcTypeHeader, 10);
+ XSTRNCAT(dest, ": 4,ENCRYPTED\n", 15);
+ XSTRNCAT(dest, kDecInfoHeader, 9);
+ XSTRNCAT(dest, ": ", 3);
+ XSTRNCAT(dest, cipherInfo, destSz - (int)XSTRLEN(dest) - 1);
+ XSTRNCAT(dest, "\n\n", 4);
+ }
+ }
+ return 0;
+}
+#endif /* WOLFSSL_DER_TO_PEM */
+#endif /* WOLFSSL_ENCRYPTED_KEYS */
+
+#ifdef WOLFSSL_DER_TO_PEM
+
+/* Used for compatibility API */
+int wc_DerToPem(const byte* der, word32 derSz,
+ byte* output, word32 outSz, int type)
+{
+ return wc_DerToPemEx(der, derSz, output, outSz, NULL, type);
+}
/* convert der buffer to pem into output, can't do inplace, der and output
need to be different */
-int wc_DerToPem(const byte* der, word32 derSz, byte* output, word32 outSz,
- int type)
+int wc_DerToPemEx(const byte* der, word32 derSz, byte* output, word32 outSz,
+ byte *cipher_info, int type)
{
+ const char* headerStr = NULL;
+ const char* footerStr = NULL;
#ifdef WOLFSSL_SMALL_STACK
char* header = NULL;
char* footer = NULL;
#else
- char header[80];
- char footer[80];
+ char header[MAX_X509_HEADER_SZ + HEADER_ENCRYPTED_KEY_SIZE];
+ char footer[MAX_X509_HEADER_SZ];
#endif
-
- int headerLen = 80;
- int footerLen = 80;
+ int headerLen = MAX_X509_HEADER_SZ + HEADER_ENCRYPTED_KEY_SIZE;
+ int footerLen = MAX_X509_HEADER_SZ;
int i;
int err;
int outLen; /* return length or error */
+ (void)cipher_info;
+
if (der == output) /* no in place conversion */
return BAD_FUNC_ARG;
+ err = wc_PemGetHeaderFooter(type, &headerStr, &footerStr);
+ if (err != 0)
+ return err;
+
#ifdef WOLFSSL_SMALL_STACK
header = (char*)XMALLOC(headerLen, NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (header == NULL)
return MEMORY_E;
-
+
footer = (char*)XMALLOC(footerLen, NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (footer == NULL) {
XFREE(header, NULL, DYNAMIC_TYPE_TMP_BUFFER);
@@ -4573,39 +10209,44 @@ int wc_DerToPem(const byte* der, word32 derSz, byte* output, word32 outSz,
}
#endif
- if (type == CERT_TYPE) {
- XSTRNCPY(header, "-----BEGIN CERTIFICATE-----\n", headerLen);
- XSTRNCPY(footer, "-----END CERTIFICATE-----\n", footerLen);
- }
- else if (type == PRIVATEKEY_TYPE) {
- XSTRNCPY(header, "-----BEGIN RSA PRIVATE KEY-----\n", headerLen);
- XSTRNCPY(footer, "-----END RSA PRIVATE KEY-----\n", footerLen);
- }
- #ifdef HAVE_ECC
- else if (type == ECC_PRIVATEKEY_TYPE) {
- XSTRNCPY(header, "-----BEGIN EC PRIVATE KEY-----\n", headerLen);
- XSTRNCPY(footer, "-----END EC PRIVATE KEY-----\n", footerLen);
- }
+ /* build header and footer based on type */
+ XSTRNCPY(header, headerStr, headerLen - 1);
+ header[headerLen - 2] = 0;
+ XSTRNCPY(footer, footerStr, footerLen - 1);
+ footer[footerLen - 2] = 0;
+
+ /* add new line to end */
+ XSTRNCAT(header, "\n", 2);
+ XSTRNCAT(footer, "\n", 2);
+
+#ifdef WOLFSSL_ENCRYPTED_KEYS
+ err = wc_EncryptedInfoAppend(header, headerLen, (char*)cipher_info);
+ if (err != 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(header, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(footer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- #ifdef WOLFSSL_CERT_REQ
- else if (type == CERTREQ_TYPE)
- {
- XSTRNCPY(header,
- "-----BEGIN CERTIFICATE REQUEST-----\n", headerLen);
- XSTRNCPY(footer, "-----END CERTIFICATE REQUEST-----\n", footerLen);
+ return err;
}
- #endif
- else {
+#endif
+
+ headerLen = (int)XSTRLEN(header);
+ footerLen = (int)XSTRLEN(footer);
+
+ /* if null output and 0 size passed in then return size needed */
+ if (!output && outSz == 0) {
#ifdef WOLFSSL_SMALL_STACK
XFREE(header, NULL, DYNAMIC_TYPE_TMP_BUFFER);
XFREE(footer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- return BAD_FUNC_ARG;
+ outLen = 0;
+ if ((err = Base64_Encode(der, derSz, NULL, (word32*)&outLen))
+ != LENGTH_ONLY_E) {
+ return err;
+ }
+ return headerLen + footerLen + outLen;
}
- headerLen = (int)XSTRLEN(header);
- footerLen = (int)XSTRLEN(footer);
-
if (!der || !output) {
#ifdef WOLFSSL_SMALL_STACK
XFREE(header, NULL, DYNAMIC_TYPE_TMP_BUFFER);
@@ -4657,12 +10298,846 @@ int wc_DerToPem(const byte* der, word32 derSz, byte* output, word32 outSz,
return outLen + headerLen + footerLen;
}
+#endif /* WOLFSSL_DER_TO_PEM */
+
+#ifdef WOLFSSL_PEM_TO_DER
+
+/* Remove PEM header/footer, convert to ASN1, store any encrypted data
+ info->consumed tracks of PEM bytes consumed in case multiple parts */
+int PemToDer(const unsigned char* buff, long longSz, int type,
+ DerBuffer** pDer, void* heap, EncryptedInfo* info, int* keyFormat)
+{
+ const char* header = NULL;
+ const char* footer = NULL;
+ char* headerEnd;
+ char* footerEnd;
+ char* consumedEnd;
+ char* bufferEnd = (char*)(buff + longSz);
+ long neededSz;
+ int ret = 0;
+ int sz = (int)longSz;
+ int encrypted_key = 0;
+ DerBuffer* der;
+#if defined(HAVE_PKCS8) || defined(WOLFSSL_ENCRYPTED_KEYS)
+ word32 algId = 0;
+ #if defined(WOLFSSL_ENCRYPTED_KEYS) && !defined(NO_DES3) && !defined(NO_WOLFSSL_SKIP_TRAILING_PAD)
+ int padVal = 0;
+ #endif
+#endif
+#ifdef OPENSSL_EXTRA
+ char beginBuf[PEM_LINE_LEN + 1]; /* add 1 for null terminator */
+ char endBuf[PEM_LINE_LEN + 1]; /* add 1 for null terminator */
+#endif
+
+ WOLFSSL_ENTER("PemToDer");
+
+ /* get PEM header and footer based on type */
+ ret = wc_PemGetHeaderFooter(type, &header, &footer);
+ if (ret != 0)
+ return ret;
+
+ /* map header if not found for type */
+ for (;;) {
+ headerEnd = XSTRNSTR((char*)buff, header, sz);
+
+ if (headerEnd) {
+ break;
+ } else
+ if (type == PRIVATEKEY_TYPE) {
+ if (header == BEGIN_RSA_PRIV) {
+ header = BEGIN_PRIV_KEY; footer = END_PRIV_KEY;
+ } else
+ if (header == BEGIN_PRIV_KEY) {
+ header = BEGIN_ENC_PRIV_KEY; footer = END_ENC_PRIV_KEY;
+ } else
+ #ifdef HAVE_ECC
+ if (header == BEGIN_ENC_PRIV_KEY) {
+ header = BEGIN_EC_PRIV; footer = END_EC_PRIV;
+ } else
+ if (header == BEGIN_EC_PRIV) {
+ header = BEGIN_DSA_PRIV; footer = END_DSA_PRIV;
+ } else
+ #endif
+ #if defined(HAVE_ED25519) || defined(HAVE_ED448)
+ #ifdef HAVE_ECC
+ if (header == BEGIN_DSA_PRIV)
+ #else
+ if (header == BEGIN_ENC_PRIV_KEY)
+ #endif
+ {
+ header = BEGIN_EDDSA_PRIV; footer = END_EDDSA_PRIV;
+ } else
+ #endif
+ {
+ break;
+ }
+ } else
+#ifdef HAVE_CRL
+ if ((type == CRL_TYPE) && (header != BEGIN_CRL)) {
+ header = BEGIN_CRL; footer = END_CRL;
+ } else
+#endif
+ {
+ break;
+ }
+ }
+
+ if (!headerEnd) {
+#ifdef OPENSSL_EXTRA
+ char* beginEnd;
+ int endLen;
+ /* see if there is a -----BEGIN * PRIVATE KEY----- header */
+ headerEnd = XSTRNSTR((char*)buff, PRIV_KEY_SUFFIX, sz);
+ if (headerEnd) {
+ beginEnd = headerEnd + XSTR_SIZEOF(PRIV_KEY_SUFFIX);
+ /* back up to BEGIN_PRIV_KEY_PREFIX */
+ headerEnd -= XSTR_SIZEOF(BEGIN_PRIV_KEY_PREFIX);
+ while (headerEnd > (char*)buff &&
+ XSTRNCMP(headerEnd, BEGIN_PRIV_KEY_PREFIX,
+ XSTR_SIZEOF(BEGIN_PRIV_KEY_PREFIX)) != 0) {
+ headerEnd--;
+ }
+ if (headerEnd <= (char*)buff ||
+ XSTRNCMP(headerEnd, BEGIN_PRIV_KEY_PREFIX,
+ XSTR_SIZEOF(BEGIN_PRIV_KEY_PREFIX)) != 0 ||
+ beginEnd - headerEnd > PEM_LINE_LEN) {
+ WOLFSSL_MSG("Couldn't find PEM header");
+ return ASN_NO_PEM_HEADER;
+ }
+ /* headerEnd now points to beginning of header */
+ XMEMCPY(beginBuf, headerEnd, beginEnd - headerEnd);
+ beginBuf[beginEnd - headerEnd] = '\0';
+ /* look for matching footer */
+ footer = XSTRNSTR(beginEnd,
+ beginBuf + XSTR_SIZEOF(BEGIN_PRIV_KEY_PREFIX),
+ (unsigned int)((char*)buff + sz - beginEnd));
+ if (!footer) {
+ WOLFSSL_MSG("Couldn't find PEM footer");
+ return ASN_NO_PEM_HEADER;
+ }
+ footer -= XSTR_SIZEOF(END_PRIV_KEY_PREFIX);
+ endLen = (unsigned int)(beginEnd - headerEnd -
+ (XSTR_SIZEOF(BEGIN_PRIV_KEY_PREFIX) -
+ XSTR_SIZEOF(END_PRIV_KEY_PREFIX)));
+ XMEMCPY(endBuf, footer, endLen);
+ endBuf[endLen] = '\0';
+
+ header = beginBuf;
+ footer = endBuf;
+ headerEnd = beginEnd;
+ } else {
+ WOLFSSL_MSG("Couldn't find PEM header");
+ return ASN_NO_PEM_HEADER;
+ }
+#else
+ WOLFSSL_MSG("Couldn't find PEM header");
+ return ASN_NO_PEM_HEADER;
+#endif
+ } else {
+ headerEnd += XSTRLEN(header);
+ }
+
+ /* eat end of line characters */
+ headerEnd = SkipEndOfLineChars(headerEnd, bufferEnd);
+
+ if (type == PRIVATEKEY_TYPE) {
+ /* keyFormat is Key_Sum enum */
+ if (keyFormat) {
+ #ifdef HAVE_ECC
+ if (header == BEGIN_EC_PRIV)
+ *keyFormat = ECDSAk;
+ #endif
+ #if !defined(NO_DSA)
+ if (header == BEGIN_DSA_PRIV)
+ *keyFormat = DSAk;
+ #endif
+ }
+ }
+
+#ifdef WOLFSSL_ENCRYPTED_KEYS
+ if (info) {
+ ret = wc_EncryptedInfoParse(info, &headerEnd, bufferEnd - headerEnd);
+ if (ret < 0)
+ return ret;
+ if (info->set)
+ encrypted_key = 1;
+ }
+#endif /* WOLFSSL_ENCRYPTED_KEYS */
+
+ /* find footer */
+ footerEnd = XSTRNSTR(headerEnd, footer, (unsigned int)((char*)buff + sz - headerEnd));
+ if (!footerEnd) {
+ if (info)
+ info->consumed = longSz; /* No more certs if no footer */
+ return BUFFER_E;
+ }
+
+ consumedEnd = footerEnd + XSTRLEN(footer);
+
+ if (consumedEnd < bufferEnd) { /* handle no end of line on last line */
+ /* eat end of line characters */
+ consumedEnd = SkipEndOfLineChars(consumedEnd, bufferEnd);
+ /* skip possible null term */
+ if (consumedEnd < bufferEnd && consumedEnd[0] == '\0')
+ consumedEnd++;
+ }
+
+ if (info)
+ info->consumed = (long)(consumedEnd - (char*)buff);
+
+ /* set up der buffer */
+ neededSz = (long)(footerEnd - headerEnd);
+ if (neededSz > sz || neededSz <= 0)
+ return BUFFER_E;
+
+ ret = AllocDer(pDer, (word32)neededSz, type, heap);
+ if (ret < 0) {
+ return ret;
+ }
+ der = *pDer;
+
+ if (Base64_Decode((byte*)headerEnd, (word32)neededSz,
+ der->buffer, &der->length) < 0)
+ return BUFFER_E;
+
+ if ((header == BEGIN_PRIV_KEY
+#ifdef OPENSSL_EXTRA
+ || header == beginBuf
+#endif
+#ifdef HAVE_ECC
+ || header == BEGIN_EC_PRIV
+#endif
+ ) && !encrypted_key)
+ {
+ #ifdef HAVE_PKCS8
+ /* pkcs8 key, convert and adjust length */
+ if ((ret = ToTraditional_ex(der->buffer, der->length, &algId)) > 0) {
+ der->length = ret;
+ if (keyFormat) {
+ *keyFormat = algId;
+ }
+ }
+ else {
+ /* ignore failure here and assume key is not pkcs8 wrapped */
+ }
+ #endif
+
+ return 0;
+ }
+
+#ifdef WOLFSSL_ENCRYPTED_KEYS
+ if (encrypted_key || header == BEGIN_ENC_PRIV_KEY) {
+ int passwordSz = NAME_SZ;
+ #ifdef WOLFSSL_SMALL_STACK
+ char* password = NULL;
+ #else
+ char password[NAME_SZ];
+ #endif
+
+ if (!info || !info->passwd_cb) {
+ WOLFSSL_MSG("No password callback set");
+ return NO_PASSWORD;
+ }
+
+ #ifdef WOLFSSL_SMALL_STACK
+ password = (char*)XMALLOC(passwordSz, heap, DYNAMIC_TYPE_STRING);
+ if (password == NULL)
+ return MEMORY_E;
+ #endif
+
+ /* get password */
+ ret = info->passwd_cb(password, passwordSz, PEM_PASS_READ,
+ info->passwd_userdata);
+ if (ret >= 0) {
+ passwordSz = ret;
+
+ /* convert and adjust length */
+ if (header == BEGIN_ENC_PRIV_KEY) {
+ #ifndef NO_PWDBASED
+ ret = ToTraditionalEnc(der->buffer, der->length,
+ password, passwordSz, &algId);
+
+ if (ret >= 0) {
+ der->length = ret;
+ if (keyFormat) {
+ *keyFormat = algId;
+ }
+ ret = 0;
+ }
+ #else
+ ret = NOT_COMPILED_IN;
+ #endif
+ }
+ /* decrypt the key */
+ else {
+ if (passwordSz == 0) {
+ /* The key is encrypted but does not have a password */
+ WOLFSSL_MSG("No password for encrypted key");
+ ret = NO_PASSWORD;
+ }
+ else {
+ ret = wc_BufferKeyDecrypt(info, der->buffer, der->length,
+ (byte*)password, passwordSz, WC_MD5);
+
+#ifndef NO_WOLFSSL_SKIP_TRAILING_PAD
+ #ifndef NO_DES3
+ if (info->cipherType == WC_CIPHER_DES3) {
+ padVal = der->buffer[der->length-1];
+ if (padVal <= DES_BLOCK_SIZE) {
+ der->length -= padVal;
+ }
+ }
+ #endif /* !NO_DES3 */
+#endif /* !NO_WOLFSSL_SKIP_TRAILING_PAD */
+ }
+ }
+#ifdef OPENSSL_EXTRA
+ if (ret) {
+ PEMerr(0, PEM_R_BAD_DECRYPT);
+ }
+#endif
+ ForceZero(password, passwordSz);
+ }
+#ifdef OPENSSL_EXTRA
+ else {
+ PEMerr(0, PEM_R_BAD_PASSWORD_READ);
+ }
+#endif
+
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(password, heap, DYNAMIC_TYPE_STRING);
+ #endif
+ }
+#endif /* WOLFSSL_ENCRYPTED_KEYS */
+
+ return ret;
+}
+
+int wc_PemToDer(const unsigned char* buff, long longSz, int type,
+ DerBuffer** pDer, void* heap, EncryptedInfo* info, int* eccKey)
+{
+ return PemToDer(buff, longSz, type, pDer, heap, info, eccKey);
+}
+
+
+/* our KeyPemToDer password callback, password in userData */
+static WC_INLINE int OurPasswordCb(char* passwd, int sz, int rw, void* userdata)
+{
+ (void)rw;
+
+ if (userdata == NULL)
+ return 0;
+
+ XSTRNCPY(passwd, (char*)userdata, sz);
+ return min((word32)sz, (word32)XSTRLEN((char*)userdata));
+}
+
+/* Return bytes written to buff or < 0 for error */
+int wc_KeyPemToDer(const unsigned char* pem, int pemSz,
+ unsigned char* buff, int buffSz, const char* pass)
+{
+ int eccKey = 0;
+ int ret;
+ DerBuffer* der = NULL;
+#ifdef WOLFSSL_SMALL_STACK
+ EncryptedInfo* info = NULL;
+#else
+ EncryptedInfo info[1];
+#endif
+
+ WOLFSSL_ENTER("wc_KeyPemToDer");
+
+ if (pem == NULL || buff == NULL || buffSz <= 0) {
+ WOLFSSL_MSG("Bad pem der args");
+ return BAD_FUNC_ARG;
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ info = (EncryptedInfo*)XMALLOC(sizeof(EncryptedInfo), NULL,
+ DYNAMIC_TYPE_ENCRYPTEDINFO);
+ if (info == NULL)
+ return MEMORY_E;
+#endif
+
+ XMEMSET(info, 0, sizeof(EncryptedInfo));
+ info->passwd_cb = OurPasswordCb;
+ info->passwd_userdata = (void*)pass;
+
+ ret = PemToDer(pem, pemSz, PRIVATEKEY_TYPE, &der, NULL, info, &eccKey);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(info, NULL, DYNAMIC_TYPE_ENCRYPTEDINFO);
+#endif
+
+ if (ret < 0 || der == NULL) {
+ WOLFSSL_MSG("Bad Pem To Der");
+ }
+ else {
+ if (der->length <= (word32)buffSz) {
+ XMEMCPY(buff, der->buffer, der->length);
+ ret = der->length;
+ }
+ else {
+ WOLFSSL_MSG("Bad der length");
+ ret = BAD_FUNC_ARG;
+ }
+ }
+
+ FreeDer(&der);
+ return ret;
+}
+
+
+/* Return bytes written to buff or < 0 for error */
+int wc_CertPemToDer(const unsigned char* pem, int pemSz,
+ unsigned char* buff, int buffSz, int type)
+{
+ int eccKey = 0;
+ int ret;
+ DerBuffer* der = NULL;
+
+ WOLFSSL_ENTER("wc_CertPemToDer");
+
+ if (pem == NULL || buff == NULL || buffSz <= 0) {
+ WOLFSSL_MSG("Bad pem der args");
+ return BAD_FUNC_ARG;
+ }
+
+ if (type != CERT_TYPE && type != CA_TYPE && type != CERTREQ_TYPE) {
+ WOLFSSL_MSG("Bad cert type");
+ return BAD_FUNC_ARG;
+ }
+
+
+ ret = PemToDer(pem, pemSz, type, &der, NULL, NULL, &eccKey);
+ if (ret < 0 || der == NULL) {
+ WOLFSSL_MSG("Bad Pem To Der");
+ }
+ else {
+ if (der->length <= (word32)buffSz) {
+ XMEMCPY(buff, der->buffer, der->length);
+ ret = der->length;
+ }
+ else {
+ WOLFSSL_MSG("Bad der length");
+ ret = BAD_FUNC_ARG;
+ }
+ }
+
+ FreeDer(&der);
+ return ret;
+}
+
+#endif /* WOLFSSL_PEM_TO_DER */
+#endif /* WOLFSSL_PEM_TO_DER || WOLFSSL_DER_TO_PEM */
+
+
+#ifdef WOLFSSL_PEM_TO_DER
+#if defined(WOLFSSL_CERT_EXT) || defined(WOLFSSL_PUB_PEM_TO_DER)
+/* Return bytes written to buff or < 0 for error */
+int wc_PubKeyPemToDer(const unsigned char* pem, int pemSz,
+ unsigned char* buff, int buffSz)
+{
+ int ret;
+ DerBuffer* der = NULL;
+
+ WOLFSSL_ENTER("wc_PubKeyPemToDer");
+
+ if (pem == NULL || buff == NULL || buffSz <= 0) {
+ WOLFSSL_MSG("Bad pem der args");
+ return BAD_FUNC_ARG;
+ }
+
+ ret = PemToDer(pem, pemSz, PUBLICKEY_TYPE, &der, NULL, NULL, NULL);
+ if (ret < 0 || der == NULL) {
+ WOLFSSL_MSG("Bad Pem To Der");
+ }
+ else {
+ if (der->length <= (word32)buffSz) {
+ XMEMCPY(buff, der->buffer, der->length);
+ ret = der->length;
+ }
+ else {
+ WOLFSSL_MSG("Bad der length");
+ ret = BAD_FUNC_ARG;
+ }
+ }
+
+ FreeDer(&der);
+ return ret;
+}
+#endif /* WOLFSSL_CERT_EXT || WOLFSSL_PUB_PEM_TO_DER */
+#endif /* WOLFSSL_PEM_TO_DER */
+
+#ifndef NO_FILESYSTEM
+
+#ifdef WOLFSSL_CERT_GEN
+/* load pem cert from file into der buffer, return der size or error */
+int wc_PemCertToDer(const char* fileName, unsigned char* derBuf, int derSz)
+{
+#ifdef WOLFSSL_SMALL_STACK
+ byte staticBuffer[1]; /* force XMALLOC */
+#else
+ byte staticBuffer[FILE_BUFFER_SIZE];
+#endif
+ byte* fileBuf = staticBuffer;
+ int dynamic = 0;
+ int ret = 0;
+ long sz = 0;
+ XFILE file;
+ DerBuffer* converted = NULL;
+
+ WOLFSSL_ENTER("wc_PemCertToDer");
+
+ if (fileName == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+ else {
+ file = XFOPEN(fileName, "rb");
+ if (file == XBADFILE) {
+ ret = BUFFER_E;
+ }
+ }
+
+ if (ret == 0) {
+ if(XFSEEK(file, 0, XSEEK_END) != 0)
+ ret = BUFFER_E;
+ sz = XFTELL(file);
+ XREWIND(file);
+
+ if (sz <= 0) {
+ ret = BUFFER_E;
+ }
+ else if (sz > (long)sizeof(staticBuffer)) {
+ #ifdef WOLFSSL_STATIC_MEMORY
+ WOLFSSL_MSG("File was larger then static buffer");
+ return MEMORY_E;
+ #endif
+ fileBuf = (byte*)XMALLOC(sz, NULL, DYNAMIC_TYPE_FILE);
+ if (fileBuf == NULL)
+ ret = MEMORY_E;
+ else
+ dynamic = 1;
+ }
+
+ if (ret == 0) {
+ if ( (ret = (int)XFREAD(fileBuf, 1, sz, file)) != sz) {
+ ret = BUFFER_E;
+ }
+ #ifdef WOLFSSL_PEM_TO_DER
+ else {
+ ret = PemToDer(fileBuf, sz, CA_TYPE, &converted, 0, NULL,NULL);
+ }
+ #endif
+
+ if (ret == 0) {
+ if (converted->length < (word32)derSz) {
+ XMEMCPY(derBuf, converted->buffer, converted->length);
+ ret = converted->length;
+ }
+ else
+ ret = BUFFER_E;
+ }
+
+ FreeDer(&converted);
+ }
+
+ XFCLOSE(file);
+ if (dynamic)
+ XFREE(fileBuf, NULL, DYNAMIC_TYPE_FILE);
+ }
+
+ return ret;
+}
+#endif /* WOLFSSL_CERT_GEN */
+
+#if defined(WOLFSSL_CERT_EXT) || defined(WOLFSSL_PUB_PEM_TO_DER)
+/* load pem public key from file into der buffer, return der size or error */
+int wc_PemPubKeyToDer(const char* fileName,
+ unsigned char* derBuf, int derSz)
+{
+#ifdef WOLFSSL_SMALL_STACK
+ byte staticBuffer[1]; /* force XMALLOC */
+#else
+ byte staticBuffer[FILE_BUFFER_SIZE];
+#endif
+ byte* fileBuf = staticBuffer;
+ int dynamic = 0;
+ int ret = 0;
+ long sz = 0;
+ XFILE file;
+ DerBuffer* converted = NULL;
+
+ WOLFSSL_ENTER("wc_PemPubKeyToDer");
+
+ if (fileName == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+ else {
+ file = XFOPEN(fileName, "rb");
+ if (file == XBADFILE) {
+ ret = BUFFER_E;
+ }
+ }
+
+ if (ret == 0) {
+ if(XFSEEK(file, 0, XSEEK_END) != 0)
+ ret = BUFFER_E;
+ sz = XFTELL(file);
+ XREWIND(file);
+
+ if (sz <= 0) {
+ ret = BUFFER_E;
+ }
+ else if (sz > (long)sizeof(staticBuffer)) {
+ #ifdef WOLFSSL_STATIC_MEMORY
+ WOLFSSL_MSG("File was larger then static buffer");
+ return MEMORY_E;
+ #endif
+ fileBuf = (byte*)XMALLOC(sz, NULL, DYNAMIC_TYPE_FILE);
+ if (fileBuf == NULL)
+ ret = MEMORY_E;
+ else
+ dynamic = 1;
+ }
+ if (ret == 0) {
+ if ( (ret = (int)XFREAD(fileBuf, 1, sz, file)) != sz) {
+ ret = BUFFER_E;
+ }
+ #ifdef WOLFSSL_PEM_TO_DER
+ else {
+ ret = PemToDer(fileBuf, sz, PUBLICKEY_TYPE, &converted,
+ 0, NULL, NULL);
+ }
+ #endif
+
+ if (ret == 0) {
+ if (converted->length < (word32)derSz) {
+ XMEMCPY(derBuf, converted->buffer, converted->length);
+ ret = converted->length;
+ }
+ else
+ ret = BUFFER_E;
+ }
+
+ FreeDer(&converted);
+ }
+
+ XFCLOSE(file);
+ if (dynamic)
+ XFREE(fileBuf, NULL, DYNAMIC_TYPE_FILE);
+ }
+
+ return ret;
+}
+#endif /* WOLFSSL_CERT_EXT || WOLFSSL_PUB_PEM_TO_DER */
+
+#endif /* !NO_FILESYSTEM */
+
+
+#if !defined(NO_RSA) && (defined(WOLFSSL_CERT_GEN) || \
+ ((defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA)) && !defined(HAVE_USER_RSA)))
+/* USER RSA ifdef portions used instead of refactor in consideration for
+ possible fips build */
+/* Write a public RSA key to output */
+static int SetRsaPublicKey(byte* output, RsaKey* key,
+ int outLen, int with_header)
+{
+#ifdef WOLFSSL_SMALL_STACK
+ byte* n = NULL;
+ byte* e = NULL;
+#else
+ byte n[MAX_RSA_INT_SZ];
+ byte e[MAX_RSA_E_SZ];
+#endif
+ byte seq[MAX_SEQ_SZ];
+ byte bitString[1 + MAX_LENGTH_SZ + 1];
+ int nSz;
+ int eSz;
+ int seqSz;
+ int bitStringSz;
+ int idx;
+
+ if (output == NULL || key == NULL || outLen < MAX_SEQ_SZ)
+ return BAD_FUNC_ARG;
+
+ /* n */
+#ifdef WOLFSSL_SMALL_STACK
+ n = (byte*)XMALLOC(MAX_RSA_INT_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (n == NULL)
+ return MEMORY_E;
+#endif
+
+#ifdef HAVE_USER_RSA
+ nSz = SetASNIntRSA(key->n, n);
+#else
+ nSz = SetASNIntMP(&key->n, MAX_RSA_INT_SZ, n);
+#endif
+ if (nSz < 0) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(n, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return nSz;
+ }
+
+ /* e */
+#ifdef WOLFSSL_SMALL_STACK
+ e = (byte*)XMALLOC(MAX_RSA_E_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (e == NULL) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(n, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return MEMORY_E;
+ }
+#endif
+
+#ifdef HAVE_USER_RSA
+ eSz = SetASNIntRSA(key->e, e);
+#else
+ eSz = SetASNIntMP(&key->e, MAX_RSA_INT_SZ, e);
+#endif
+ if (eSz < 0) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(n, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(e, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return eSz;
+ }
+
+ seqSz = SetSequence(nSz + eSz, seq);
+
+ /* check output size */
+ if ( (seqSz + nSz + eSz) > outLen) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(n, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(e, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return BUFFER_E;
+ }
+
+ /* headers */
+ if (with_header) {
+ int algoSz;
+#ifdef WOLFSSL_SMALL_STACK
+ byte* algo;
+
+ algo = (byte*)XMALLOC(MAX_ALGO_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (algo == NULL) {
+ XFREE(n, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(e, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return MEMORY_E;
+ }
+#else
+ byte algo[MAX_ALGO_SZ];
+#endif
+ algoSz = SetAlgoID(RSAk, algo, oidKeyType, 0);
+ bitStringSz = SetBitString(seqSz + nSz + eSz, 0, bitString);
+
+ idx = SetSequence(nSz + eSz + seqSz + bitStringSz + algoSz, output);
+
+ /* check output size */
+ if ( (idx + algoSz + bitStringSz + seqSz + nSz + eSz) > outLen) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(n, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(e, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(algo, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+
+ return BUFFER_E;
+ }
+
+ /* algo */
+ XMEMCPY(output + idx, algo, algoSz);
+ idx += algoSz;
+ /* bit string */
+ XMEMCPY(output + idx, bitString, bitStringSz);
+ idx += bitStringSz;
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(algo, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ }
+ else
+ idx = 0;
+
+ /* seq */
+ XMEMCPY(output + idx, seq, seqSz);
+ idx += seqSz;
+ /* n */
+ XMEMCPY(output + idx, n, nSz);
+ idx += nSz;
+ /* e */
+ XMEMCPY(output + idx, e, eSz);
+ idx += eSz;
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(n, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(e, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return idx;
+}
+
+#endif /* !NO_RSA && (WOLFSSL_CERT_GEN || (WOLFSSL_KEY_GEN &&
+ !HAVE_USER_RSA))) */
+
+#if !defined(NO_RSA) && (defined(WOLFSSL_CERT_GEN) || defined(OPENSSL_EXTRA))
+int wc_RsaPublicKeyDerSize(RsaKey* key, int with_header)
+{
+ int idx = 0;
+ int nSz, eSz, seqSz, bitStringSz, algoSz;
+
+ if (key == NULL)
+ return BAD_FUNC_ARG;
+
+ /* n */
+#ifdef HAVE_USER_RSA
+ nSz = SetASNIntRSA(key->n, NULL);
+#else
+ nSz = SetASNIntMP(&key->n, MAX_RSA_INT_SZ, NULL);
+#endif
+ if (nSz < 0) {
+ return nSz;
+ }
+
+ /* e */
+#ifdef HAVE_USER_RSA
+ eSz = SetASNIntRSA(key->e, NULL);
+#else
+ eSz = SetASNIntMP(&key->e, MAX_RSA_INT_SZ, NULL);
+#endif
+ if (eSz < 0) {
+ return eSz;
+ }
-#endif /* WOLFSSL_KEY_GEN || WOLFSSL_CERT_GEN */
+ seqSz = SetSequence(nSz + eSz, NULL);
+ /* headers */
+ if (with_header) {
+ algoSz = SetAlgoID(RSAk, NULL, oidKeyType, 0);
+ bitStringSz = SetBitString(seqSz + nSz + eSz, 0, NULL);
-#if defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA)
+ idx += SetSequence(nSz + eSz + seqSz + bitStringSz + algoSz, NULL);
+ /* algo */
+ idx += algoSz;
+ /* bit string */
+ idx += bitStringSz;
+ }
+
+ /* seq */
+ idx += seqSz;
+ /* n */
+ idx += nSz;
+ /* e */
+ idx += eSz;
+
+ return idx;
+}
+
+#endif /* !NO_RSA && WOLFSSL_CERT_GEN */
+
+
+#if defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA) && !defined(HAVE_USER_RSA)
static mp_int* GetRsaInt(RsaKey* key, int idx)
{
@@ -4688,13 +11163,13 @@ static mp_int* GetRsaInt(RsaKey* key, int idx)
/* Release Tmp RSA resources */
-static INLINE void FreeTmpRsas(byte** tmps, void* heap)
+static WC_INLINE void FreeTmpRsas(byte** tmps, void* heap)
{
int i;
(void)heap;
- for (i = 0; i < RSA_INTS; i++)
+ for (i = 0; i < RSA_INTS; i++)
XFREE(tmps[i], heap, DYNAMIC_TYPE_RSA);
}
@@ -4705,13 +11180,13 @@ int wc_RsaKeyToDer(RsaKey* key, byte* output, word32 inLen)
{
word32 seqSz, verSz, rawLen, intTotalLen = 0;
word32 sizes[RSA_INTS];
- int i, j, outLen, ret = 0;
+ int i, j, outLen, ret = 0, mpSz;
byte seq[MAX_SEQ_SZ];
byte ver[MAX_VERSION_SZ];
byte* tmps[RSA_INTS];
- if (!key || !output)
+ if (!key)
return BAD_FUNC_ARG;
if (key->type != RSA_PRIVATE)
@@ -4723,7 +11198,8 @@ int wc_RsaKeyToDer(RsaKey* key, byte* output, word32 inLen)
/* write all big ints from key to DER tmps */
for (i = 0; i < RSA_INTS; i++) {
mp_int* keyInt = GetRsaInt(key, i);
- rawLen = mp_unsigned_bin_size(keyInt);
+
+ rawLen = mp_unsigned_bin_size(keyInt) + 1;
tmps[i] = (byte*)XMALLOC(rawLen + MAX_SEQ_SZ, key->heap,
DYNAMIC_TYPE_RSA);
if (tmps[i] == NULL) {
@@ -4731,24 +11207,12 @@ int wc_RsaKeyToDer(RsaKey* key, byte* output, word32 inLen)
break;
}
- tmps[i][0] = ASN_INTEGER;
- sizes[i] = SetLength(rawLen, tmps[i] + 1) + 1; /* int tag */
-
- if (sizes[i] <= MAX_SEQ_SZ) {
- int err = mp_to_unsigned_bin(keyInt, tmps[i] + sizes[i]);
- if (err == MP_OKAY) {
- sizes[i] += rawLen;
- intTotalLen += sizes[i];
- }
- else {
- ret = err;
- break;
- }
- }
- else {
- ret = ASN_INPUT_E;
+ mpSz = SetASNIntMP(keyInt, MAX_RSA_INT_SZ, tmps[i]);
+ if (mpSz < 0) {
+ ret = mpSz;
break;
}
+ intTotalLen += (sizes[i] = mpSz);
}
if (ret != 0) {
@@ -4761,42 +11225,43 @@ int wc_RsaKeyToDer(RsaKey* key, byte* output, word32 inLen)
seqSz = SetSequence(verSz + intTotalLen, seq);
outLen = seqSz + verSz + intTotalLen;
- if (outLen > (int)inLen)
- return BAD_FUNC_ARG;
+ if (output) {
+ if (outLen > (int)inLen) {
+ FreeTmpRsas(tmps, key->heap);
+ return BAD_FUNC_ARG;
+ }
- /* write to output */
- XMEMCPY(output, seq, seqSz);
- j = seqSz;
- XMEMCPY(output + j, ver, verSz);
- j += verSz;
+ /* write to output */
+ XMEMCPY(output, seq, seqSz);
+ j = seqSz;
+ XMEMCPY(output + j, ver, verSz);
+ j += verSz;
- for (i = 0; i < RSA_INTS; i++) {
- XMEMCPY(output + j, tmps[i], sizes[i]);
- j += sizes[i];
+ for (i = 0; i < RSA_INTS; i++) {
+ XMEMCPY(output + j, tmps[i], sizes[i]);
+ j += sizes[i];
+ }
}
FreeTmpRsas(tmps, key->heap);
return outLen;
}
+#endif
-#endif /* WOLFSSL_KEY_GEN && !NO_RSA */
-
-
-#if defined(WOLFSSL_CERT_GEN) && !defined(NO_RSA)
-
-
-#ifndef WOLFSSL_HAVE_MIN
-#define WOLFSSL_HAVE_MIN
+#if (defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA)) && !defined(NO_RSA) && !defined(HAVE_USER_RSA)
+/* Convert Rsa Public key to DER format, write to output (inLen), return bytes
+ written */
+int wc_RsaKeyToPublicDer(RsaKey* key, byte* output, word32 inLen)
+{
+ return SetRsaPublicKey(output, key, inLen, 1);
+}
- static INLINE word32 min(word32 a, word32 b)
- {
- return a > b ? b : a;
- }
+#endif /* (WOLFSSL_KEY_GEN || OPENSSL_EXTRA) && !NO_RSA && !HAVE_USER_RSA */
-#endif /* WOLFSSL_HAVE_MIN */
+#ifdef WOLFSSL_CERT_GEN
-/* Initialize and Set Certficate defaults:
+/* Initialize and Set Certificate defaults:
version = 3 (0x2)
serial = 0
sigType = SHA_WITH_RSA
@@ -4805,57 +11270,57 @@ int wc_RsaKeyToDer(RsaKey* key, byte* output, word32 inLen)
selfSigned = 1 (true) use subject as issuer
subject = blank
*/
-void wc_InitCert(Cert* cert)
+int wc_InitCert(Cert* cert)
{
+#ifdef WOLFSSL_MULTI_ATTRIB
+ int i = 0;
+#endif
+ if (cert == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ XMEMSET(cert, 0, sizeof(Cert));
+
cert->version = 2; /* version 3 is hex 2 */
+#ifndef NO_SHA
cert->sigType = CTC_SHAwRSA;
+#elif !defined(NO_SHA256)
+ cert->sigType = CTC_SHA256wRSA;
+#else
+ cert->sigType = 0;
+#endif
cert->daysValid = 500;
cert->selfSigned = 1;
- cert->isCA = 0;
- cert->bodySz = 0;
-#ifdef WOLFSSL_ALT_NAMES
- cert->altNamesSz = 0;
- cert->beforeDateSz = 0;
- cert->afterDateSz = 0;
-#endif
cert->keyType = RSA_KEY;
- XMEMSET(cert->serial, 0, CTC_SERIAL_SIZE);
- cert->issuer.country[0] = '\0';
cert->issuer.countryEnc = CTC_PRINTABLE;
- cert->issuer.state[0] = '\0';
cert->issuer.stateEnc = CTC_UTF8;
- cert->issuer.locality[0] = '\0';
cert->issuer.localityEnc = CTC_UTF8;
- cert->issuer.sur[0] = '\0';
cert->issuer.surEnc = CTC_UTF8;
- cert->issuer.org[0] = '\0';
cert->issuer.orgEnc = CTC_UTF8;
- cert->issuer.unit[0] = '\0';
cert->issuer.unitEnc = CTC_UTF8;
- cert->issuer.commonName[0] = '\0';
cert->issuer.commonNameEnc = CTC_UTF8;
- cert->issuer.email[0] = '\0';
- cert->subject.country[0] = '\0';
cert->subject.countryEnc = CTC_PRINTABLE;
- cert->subject.state[0] = '\0';
cert->subject.stateEnc = CTC_UTF8;
- cert->subject.locality[0] = '\0';
cert->subject.localityEnc = CTC_UTF8;
- cert->subject.sur[0] = '\0';
cert->subject.surEnc = CTC_UTF8;
- cert->subject.org[0] = '\0';
cert->subject.orgEnc = CTC_UTF8;
- cert->subject.unit[0] = '\0';
cert->subject.unitEnc = CTC_UTF8;
- cert->subject.commonName[0] = '\0';
cert->subject.commonNameEnc = CTC_UTF8;
- cert->subject.email[0] = '\0';
-#ifdef WOLFSSL_CERT_REQ
- cert->challengePw[0] ='\0';
+#ifdef WOLFSSL_MULTI_ATTRIB
+ for (i = 0; i < CTC_MAX_ATTRIB; i++) {
+ cert->issuer.name[i].type = CTC_UTF8;
+ cert->subject.name[i].type = CTC_UTF8;
+ }
+#endif /* WOLFSSL_MULTI_ATTRIB */
+
+#ifdef WOLFSSL_HEAP_TEST
+ cert->heap = (void*)WOLFSSL_HEAP_TEST;
#endif
+
+ return 0;
}
@@ -4863,26 +11328,46 @@ void wc_InitCert(Cert* cert)
typedef struct DerCert {
byte size[MAX_LENGTH_SZ]; /* length encoded */
byte version[MAX_VERSION_SZ]; /* version encoded */
- byte serial[CTC_SERIAL_SIZE + MAX_LENGTH_SZ]; /* serial number encoded */
+ byte serial[(int)CTC_SERIAL_SIZE + (int)MAX_LENGTH_SZ]; /* serial number encoded */
byte sigAlgo[MAX_ALGO_SZ]; /* signature algo encoded */
byte issuer[ASN_NAME_MAX]; /* issuer encoded */
byte subject[ASN_NAME_MAX]; /* subject encoded */
byte validity[MAX_DATE_SIZE*2 + MAX_SEQ_SZ*2]; /* before and after dates */
byte publicKey[MAX_PUBLIC_KEY_SZ]; /* rsa / ntru public key encoded */
byte ca[MAX_CA_SZ]; /* basic constraint CA true size */
- byte extensions[MAX_EXTENSIONS_SZ]; /* all extensions */
+ byte extensions[MAX_EXTENSIONS_SZ]; /* all extensions */
+#ifdef WOLFSSL_CERT_EXT
+ byte skid[MAX_KID_SZ]; /* Subject Key Identifier extension */
+ byte akid[MAX_KID_SZ]; /* Authority Key Identifier extension */
+ byte keyUsage[MAX_KEYUSAGE_SZ]; /* Key Usage extension */
+ byte extKeyUsage[MAX_EXTKEYUSAGE_SZ]; /* Extended Key Usage extension */
+ byte certPolicies[MAX_CERTPOL_NB*MAX_CERTPOL_SZ]; /* Certificate Policies */
+#endif
#ifdef WOLFSSL_CERT_REQ
byte attrib[MAX_ATTRIB_SZ]; /* Cert req attributes encoded */
#endif
+#ifdef WOLFSSL_ALT_NAMES
+ byte altNames[CTC_MAX_ALT_SIZE]; /* Alternative Names encoded */
+#endif
int sizeSz; /* encoded size length */
int versionSz; /* encoded version length */
int serialSz; /* encoded serial length */
- int sigAlgoSz; /* enocded sig alog length */
+ int sigAlgoSz; /* encoded sig algo length */
int issuerSz; /* encoded issuer length */
int subjectSz; /* encoded subject length */
int validitySz; /* encoded validity length */
int publicKeySz; /* encoded public key length */
int caSz; /* encoded CA extension length */
+#ifdef WOLFSSL_CERT_EXT
+ int skidSz; /* encoded SKID extension length */
+ int akidSz; /* encoded SKID extension length */
+ int keyUsageSz; /* encoded KeyUsage extension length */
+ int extKeyUsageSz; /* encoded ExtendedKeyUsage extension length */
+ int certPoliciesSz; /* encoded CertPolicies extension length*/
+#endif
+#ifdef WOLFSSL_ALT_NAMES
+ int altNamesSz; /* encoded AltNames extension length */
+#endif
int extensionsSz; /* encoded extensions total length */
int total; /* total encoded lengths */
#ifdef WOLFSSL_CERT_REQ
@@ -4894,6 +11379,12 @@ typedef struct DerCert {
#ifdef WOLFSSL_CERT_REQ
/* Write a set header to output */
+static word32 SetPrintableString(word32 len, byte* output)
+{
+ output[0] = ASN_PRINTABLE_STRING;
+ return SetLength(len, output + 1) + 1;
+}
+
static word32 SetUTF8String(word32 len, byte* output)
{
output[0] = ASN_UTF8STRING;
@@ -4903,258 +11394,444 @@ static word32 SetUTF8String(word32 len, byte* output)
#endif /* WOLFSSL_CERT_REQ */
-/* Write a serial number to output */
-static int SetSerial(const byte* serial, byte* output)
+#ifndef WOLFSSL_CERT_GEN_CACHE
+/* wc_SetCert_Free is only public when WOLFSSL_CERT_GEN_CACHE is not defined */
+static
+#endif
+void wc_SetCert_Free(Cert* cert)
{
- int length = 0;
+ if (cert != NULL) {
+ cert->der = NULL;
+ if (cert->decodedCert) {
+ FreeDecodedCert((DecodedCert*)cert->decodedCert);
+
+ XFREE(cert->decodedCert, cert->heap, DYNAMIC_TYPE_DCERT);
+ cert->decodedCert = NULL;
+ }
+ }
+}
+
+static int wc_SetCert_LoadDer(Cert* cert, const byte* der, word32 derSz)
+{
+ int ret;
+
+ if (cert == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+ else {
+ /* Allocate DecodedCert struct and Zero */
+ cert->decodedCert = (void*)XMALLOC(sizeof(DecodedCert), cert->heap,
+ DYNAMIC_TYPE_DCERT);
- output[length++] = ASN_INTEGER;
- length += SetLength(CTC_SERIAL_SIZE, &output[length]);
- XMEMCPY(&output[length], serial, CTC_SERIAL_SIZE);
+ if (cert->decodedCert == NULL) {
+ ret = MEMORY_E;
+ }
+ else {
+ XMEMSET(cert->decodedCert, 0, sizeof(DecodedCert));
+
+ InitDecodedCert((DecodedCert*)cert->decodedCert, der, derSz,
+ cert->heap);
+ ret = ParseCertRelative((DecodedCert*)cert->decodedCert,
+ CERT_TYPE, 0, NULL);
+ if (ret >= 0) {
+ cert->der = (byte*)der;
+ }
+ else {
+ wc_SetCert_Free(cert);
+ }
+ }
+ }
- return length + CTC_SERIAL_SIZE;
+ return ret;
}
+#endif /* WOLFSSL_CERT_GEN */
-#ifdef HAVE_ECC
+#if defined(HAVE_ECC) && defined(HAVE_ECC_KEY_EXPORT)
/* Write a public ECC key to output */
-static int SetEccPublicKey(byte* output, ecc_key* key)
+static int SetEccPublicKey(byte* output, ecc_key* key, int with_header)
{
- byte len[MAX_LENGTH_SZ + 1]; /* trailing 0 */
+ byte bitString[1 + MAX_LENGTH_SZ + 1];
int algoSz;
int curveSz;
- int lenSz;
+ int bitStringSz;
int idx;
word32 pubSz = ECC_BUFSIZE;
#ifdef WOLFSSL_SMALL_STACK
byte* algo = NULL;
byte* curve = NULL;
- byte* pub = NULL;
+ byte* pub;
#else
byte algo[MAX_ALGO_SZ];
byte curve[MAX_ALGO_SZ];
byte pub[ECC_BUFSIZE];
#endif
+ int ret;
#ifdef WOLFSSL_SMALL_STACK
- pub = (byte*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ pub = (byte*)XMALLOC(ECC_BUFSIZE, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
if (pub == NULL)
return MEMORY_E;
#endif
- int ret = wc_ecc_export_x963(key, pub, &pubSz);
+#ifdef HAVE_SELFTEST
+ /* older version of ecc.c can not handle dp being NULL */
+ if (key != NULL && key->dp == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+ else {
+ ret = wc_ecc_export_x963(key, pub, &pubSz);
+ }
+#else
+ ret = wc_ecc_export_x963(key, pub, &pubSz);
+#endif
if (ret != 0) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(pub, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return ret;
}
+ /* headers */
+ if (with_header) {
#ifdef WOLFSSL_SMALL_STACK
- curve = (byte*)XMALLOC(MAX_ALGO_SZ, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (curve == NULL) {
- XFREE(pub, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- return MEMORY_E;
- }
+ curve = (byte*)XMALLOC(MAX_ALGO_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (curve == NULL) {
+ XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return MEMORY_E;
+ }
#endif
-
- /* headers */
- curveSz = SetCurve(key, curve);
- if (curveSz <= 0) {
+ curveSz = SetCurve(key, curve);
+ if (curveSz <= 0) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(curve, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(pub, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(curve, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- return curveSz;
- }
+ return curveSz;
+ }
#ifdef WOLFSSL_SMALL_STACK
- algo = (byte*)XMALLOC(MAX_ALGO_SZ, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (algo == NULL) {
- XFREE(curve, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(pub, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- return MEMORY_E;
- }
+ algo = (byte*)XMALLOC(MAX_ALGO_SZ, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (algo == NULL) {
+ XFREE(curve, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return MEMORY_E;
+ }
#endif
+ algoSz = SetAlgoID(ECDSAk, algo, oidKeyType, curveSz);
+
+ bitStringSz = SetBitString(pubSz, 0, bitString);
+
+ idx = SetSequence(pubSz + curveSz + bitStringSz + algoSz, output);
+ /* algo */
+ if (output)
+ XMEMCPY(output + idx, algo, algoSz);
+ idx += algoSz;
+ /* curve */
+ if (output)
+ XMEMCPY(output + idx, curve, curveSz);
+ idx += curveSz;
+ /* bit string */
+ if (output)
+ XMEMCPY(output + idx, bitString, bitStringSz);
+ idx += bitStringSz;
+ }
+ else
+ idx = 0;
- algoSz = SetAlgoID(ECDSAk, algo, keyType, curveSz);
- lenSz = SetLength(pubSz + 1, len);
- len[lenSz++] = 0; /* trailing 0 */
-
- /* write */
- idx = SetSequence(pubSz + curveSz + lenSz + 1 + algoSz, output);
- /* 1 is for ASN_BIT_STRING */
- /* algo */
- XMEMCPY(output + idx, algo, algoSz);
- idx += algoSz;
- /* curve */
- XMEMCPY(output + idx, curve, curveSz);
- idx += curveSz;
- /* bit string */
- output[idx++] = ASN_BIT_STRING;
- /* length */
- XMEMCPY(output + idx, len, lenSz);
- idx += lenSz;
/* pub */
- XMEMCPY(output + idx, pub, pubSz);
+ if (output)
+ XMEMCPY(output + idx, pub, pubSz);
idx += pubSz;
#ifdef WOLFSSL_SMALL_STACK
- XFREE(algo, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(curve, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(pub, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (with_header) {
+ XFREE(algo, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(curve, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+ XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return idx;
}
-#endif /* HAVE_ECC */
+/* returns the size of buffer used, the public ECC key in DER format is stored
+ in output buffer
+ with_AlgCurve is a flag for when to include a header that has the Algorithm
+ and Curve information */
+int wc_EccPublicKeyToDer(ecc_key* key, byte* output, word32 inLen,
+ int with_AlgCurve)
+{
+ word32 infoSz = 0;
+ word32 keySz = 0;
+ int ret;
+ if (key == NULL) {
+ return BAD_FUNC_ARG;
+ }
-/* Write a public RSA key to output */
-static int SetRsaPublicKey(byte* output, RsaKey* key)
+ if (with_AlgCurve) {
+ /* buffer space for algorithm/curve */
+ infoSz += MAX_SEQ_SZ;
+ infoSz += 2 * MAX_ALGO_SZ;
+
+ /* buffer space for public key sequence */
+ infoSz += MAX_SEQ_SZ;
+ infoSz += TRAILING_ZERO;
+ }
+
+#ifdef HAVE_SELFTEST
+ /* older version of ecc.c can not handle dp being NULL */
+ if (key != NULL && key->dp == NULL) {
+ keySz = 1 + 2 * MAX_ECC_BYTES;
+ ret = LENGTH_ONLY_E;
+ }
+ else {
+ ret = wc_ecc_export_x963(key, NULL, &keySz);
+ }
+#else
+ ret = wc_ecc_export_x963(key, NULL, &keySz);
+#endif
+ if (ret != LENGTH_ONLY_E) {
+ WOLFSSL_MSG("Error in getting ECC public key size");
+ return ret;
+ }
+
+ /* if output null then just return size */
+ if (output == NULL) {
+ return keySz + infoSz;
+ }
+
+ if (inLen < keySz + infoSz) {
+ return BUFFER_E;
+ }
+
+ return SetEccPublicKey(output, key, with_AlgCurve);
+}
+
+int wc_EccPublicKeyDerSize(ecc_key* key, int with_AlgCurve)
+{
+ return wc_EccPublicKeyToDer(key, NULL, 0, with_AlgCurve);
+}
+
+#endif /* HAVE_ECC && HAVE_ECC_KEY_EXPORT */
+
+#if defined(HAVE_ED25519) && (defined(WOLFSSL_CERT_GEN) || \
+ defined(WOLFSSL_KEY_GEN))
+
+/* Write a public ECC key to output */
+static int SetEd25519PublicKey(byte* output, ed25519_key* key, int with_header)
{
+ byte bitString[1 + MAX_LENGTH_SZ + 1];
+ int algoSz;
+ int bitStringSz;
+ int idx;
+ word32 pubSz = ED25519_PUB_KEY_SIZE;
#ifdef WOLFSSL_SMALL_STACK
- byte* n = NULL;
- byte* e = NULL;
byte* algo = NULL;
+ byte* pub;
#else
- byte n[MAX_RSA_INT_SZ];
- byte e[MAX_RSA_E_SZ];
byte algo[MAX_ALGO_SZ];
+ byte pub[ED25519_PUB_KEY_SIZE];
#endif
- byte seq[MAX_SEQ_SZ];
- byte len[MAX_LENGTH_SZ + 1]; /* trailing 0 */
- int nSz;
- int eSz;
- int algoSz;
- int seqSz;
- int lenSz;
- int idx;
- int rawLen;
- int leadingBit;
- int err;
- /* n */
#ifdef WOLFSSL_SMALL_STACK
- n = (byte*)XMALLOC(MAX_RSA_INT_SZ, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (n == NULL)
+ pub = (byte*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (pub == NULL)
return MEMORY_E;
#endif
- leadingBit = mp_leading_bit(&key->n);
- rawLen = mp_unsigned_bin_size(&key->n) + leadingBit;
- n[0] = ASN_INTEGER;
- nSz = SetLength(rawLen, n + 1) + 1; /* int tag */
-
- if ( (nSz + rawLen) < MAX_RSA_INT_SZ) {
- if (leadingBit)
- n[nSz] = 0;
- err = mp_to_unsigned_bin(&key->n, n + nSz + leadingBit);
- if (err == MP_OKAY)
- nSz += rawLen;
- else {
+ idx = wc_ed25519_export_public(key, pub, &pubSz);
+ if (idx != 0) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(n, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- return MP_TO_E;
+ return idx;
+ }
+
+ /* headers */
+ if (with_header) {
+#ifdef WOLFSSL_SMALL_STACK
+ algo = (byte*)XMALLOC(MAX_ALGO_SZ, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (algo == NULL) {
+ XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return MEMORY_E;
}
+#endif
+ algoSz = SetAlgoID(ED25519k, algo, oidKeyType, 0);
+
+ bitStringSz = SetBitString(pubSz, 0, bitString);
+
+ idx = SetSequence(pubSz + bitStringSz + algoSz, output);
+ /* algo */
+ XMEMCPY(output + idx, algo, algoSz);
+ idx += algoSz;
+ /* bit string */
+ XMEMCPY(output + idx, bitString, bitStringSz);
+ idx += bitStringSz;
}
- else {
+ else
+ idx = 0;
+
+ /* pub */
+ XMEMCPY(output + idx, pub, pubSz);
+ idx += pubSz;
+
#ifdef WOLFSSL_SMALL_STACK
- XFREE(n, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (with_header) {
+ XFREE(algo, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+ XFREE(pub, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
+
+ return idx;
+}
+
+int wc_Ed25519PublicKeyToDer(ed25519_key* key, byte* output, word32 inLen,
+ int withAlg)
+{
+ word32 infoSz = 0;
+ word32 keySz = 0;
+ int ret;
+
+ if (output == NULL || key == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ if (withAlg) {
+ /* buffer space for algorithm */
+ infoSz += MAX_SEQ_SZ;
+ infoSz += MAX_ALGO_SZ;
+
+ /* buffer space for public key sequence */
+ infoSz += MAX_SEQ_SZ;
+ infoSz += TRAILING_ZERO;
+ }
+
+ if ((ret = wc_ed25519_export_public(key, output, &keySz)) != BUFFER_E) {
+ WOLFSSL_MSG("Error in getting ECC public key size");
+ return ret;
+ }
+
+ if (inLen < keySz + infoSz) {
return BUFFER_E;
}
- /* e */
-#ifdef WOLFSSL_SMALL_STACK
- e = (byte*)XMALLOC(MAX_RSA_E_SZ, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (e == NULL) {
+ return SetEd25519PublicKey(output, key, withAlg);
+}
+#endif /* HAVE_ED25519 && (WOLFSSL_CERT_GEN || WOLFSSL_KEY_GEN) */
+#if defined(HAVE_ED448) && (defined(WOLFSSL_CERT_GEN) || \
+ defined(WOLFSSL_KEY_GEN))
+
+/* Write a public ECC key to output */
+static int SetEd448PublicKey(byte* output, ed448_key* key, int with_header)
+{
+ byte bitString[1 + MAX_LENGTH_SZ + 1];
+ int algoSz;
+ int bitStringSz;
+ int idx;
+ word32 pubSz = ED448_PUB_KEY_SIZE;
#ifdef WOLFSSL_SMALL_STACK
- XFREE(n, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ byte* algo = NULL;
+ byte* pub = NULL;
+#else
+ byte algo[MAX_ALGO_SZ];
+ byte pub[ED448_PUB_KEY_SIZE];
#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ pub = (byte*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (pub == NULL)
return MEMORY_E;
- }
#endif
- leadingBit = mp_leading_bit(&key->e);
- rawLen = mp_unsigned_bin_size(&key->e) + leadingBit;
- e[0] = ASN_INTEGER;
- eSz = SetLength(rawLen, e + 1) + 1; /* int tag */
-
- if ( (eSz + rawLen) < MAX_RSA_E_SZ) {
- if (leadingBit)
- e[eSz] = 0;
- err = mp_to_unsigned_bin(&key->e, e + eSz + leadingBit);
- if (err == MP_OKAY)
- eSz += rawLen;
- else {
+ idx = wc_ed448_export_public(key, pub, &pubSz);
+ if (idx != 0) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(n, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(e, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- return MP_TO_E;
- }
+ return idx;
}
- else {
+
+ /* headers */
+ if (with_header) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(n, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(e, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ algo = (byte*)XMALLOC(MAX_ALGO_SZ, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (algo == NULL) {
+ XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return MEMORY_E;
+ }
#endif
- return BUFFER_E;
+ algoSz = SetAlgoID(ED448k, algo, oidKeyType, 0);
+
+ bitStringSz = SetBitString(pubSz, 0, bitString);
+
+ idx = SetSequence(pubSz + bitStringSz + algoSz, output);
+ /* algo */
+ XMEMCPY(output + idx, algo, algoSz);
+ idx += algoSz;
+ /* bit string */
+ XMEMCPY(output + idx, bitString, bitStringSz);
+ idx += bitStringSz;
}
+ else
+ idx = 0;
+
+ /* pub */
+ XMEMCPY(output + idx, pub, pubSz);
+ idx += pubSz;
#ifdef WOLFSSL_SMALL_STACK
- algo = (byte*)XMALLOC(MAX_ALGO_SZ, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (algo == NULL) {
- XFREE(n, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(e, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- return MEMORY_E;
+ if (with_header) {
+ XFREE(algo, NULL, DYNAMIC_TYPE_TMP_BUFFER);
}
+ XFREE(pub, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- /* headers */
- algoSz = SetAlgoID(RSAk, algo, keyType, 0);
- seqSz = SetSequence(nSz + eSz, seq);
- lenSz = SetLength(seqSz + nSz + eSz + 1, len);
- len[lenSz++] = 0; /* trailing 0 */
+ return idx;
+}
- /* write */
- idx = SetSequence(nSz + eSz + seqSz + lenSz + 1 + algoSz, output);
- /* 1 is for ASN_BIT_STRING */
- /* algo */
- XMEMCPY(output + idx, algo, algoSz);
- idx += algoSz;
- /* bit string */
- output[idx++] = ASN_BIT_STRING;
- /* length */
- XMEMCPY(output + idx, len, lenSz);
- idx += lenSz;
- /* seq */
- XMEMCPY(output + idx, seq, seqSz);
- idx += seqSz;
- /* n */
- XMEMCPY(output + idx, n, nSz);
- idx += nSz;
- /* e */
- XMEMCPY(output + idx, e, eSz);
- idx += eSz;
+int wc_Ed448PublicKeyToDer(ed448_key* key, byte* output, word32 inLen,
+ int withAlg)
+{
+ word32 infoSz = 0;
+ word32 keySz = 0;
+ int ret;
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(n, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(e, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(algo, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+ if (output == NULL || key == NULL) {
+ return BAD_FUNC_ARG;
+ }
- return idx;
+ if (withAlg) {
+ /* buffer space for algorithm */
+ infoSz += MAX_SEQ_SZ;
+ infoSz += MAX_ALGO_SZ;
+
+ /* buffer space for public key sequence */
+ infoSz += MAX_SEQ_SZ;
+ infoSz += TRAILING_ZERO;
+ }
+
+ if ((ret = wc_ed448_export_public(key, output, &keySz)) != BUFFER_E) {
+ WOLFSSL_MSG("Error in getting ECC public key size");
+ return ret;
+ }
+
+ if (inLen < keySz + infoSz) {
+ return BUFFER_E;
+ }
+
+ return SetEd448PublicKey(output, key, withAlg);
}
+#endif /* HAVE_ED448 && (WOLFSSL_CERT_GEN || WOLFSSL_KEY_GEN) */
+
+#ifdef WOLFSSL_CERT_GEN
-static INLINE byte itob(int number)
+static WC_INLINE byte itob(int number)
{
return (byte)number + 0x30;
}
@@ -5184,7 +11861,7 @@ static void SetTime(struct tm* date, byte* output)
output[i++] = itob(date->tm_sec / 10);
output[i++] = itob(date->tm_sec % 10);
-
+
output[i] = 'Z'; /* Zulu profile */
}
@@ -5200,28 +11877,19 @@ static int CopyValidity(byte* output, Cert* cert)
/* headers and output */
seqSz = SetSequence(cert->beforeDateSz + cert->afterDateSz, output);
- XMEMCPY(output + seqSz, cert->beforeDate, cert->beforeDateSz);
- XMEMCPY(output + seqSz + cert->beforeDateSz, cert->afterDate,
- cert->afterDateSz);
+ if (output) {
+ XMEMCPY(output + seqSz, cert->beforeDate, cert->beforeDateSz);
+ XMEMCPY(output + seqSz + cert->beforeDateSz, cert->afterDate,
+ cert->afterDateSz);
+ }
return seqSz + cert->beforeDateSz + cert->afterDateSz;
}
#endif
-/* for systems where mktime() doesn't normalize fully */
-static void RebuildTime(time_t* in, struct tm* out)
-{
- #ifdef FREESCALE_MQX
- out = localtime_r(in, out);
- #else
- (void)in;
- (void)out;
- #endif
-}
-
-
-/* Set Date validity from now until now + daysValid */
+/* Set Date validity from now until now + daysValid
+ * return size in bytes written to output, 0 on error */
static int SetValidity(byte* output, int daysValid)
{
byte before[MAX_DATE_SIZE];
@@ -5231,55 +11899,60 @@ static int SetValidity(byte* output, int daysValid)
int afterSz;
int seqSz;
- time_t ticks;
- time_t normalTime;
- struct tm* now;
- struct tm* tmpTime = NULL;
- struct tm local;
+ time_t now;
+ time_t then;
+ struct tm* tmpTime;
+ struct tm* expandedTime;
+ struct tm localTime;
-#if defined(FREESCALE_MQX) || defined(TIME_OVERRIDES)
+#if defined(NEED_TMP_TIME)
/* for use with gmtime_r */
struct tm tmpTimeStorage;
tmpTime = &tmpTimeStorage;
#else
- (void)tmpTime;
+ tmpTime = NULL;
#endif
+ (void)tmpTime;
- ticks = XTIME(0);
- now = XGMTIME(&ticks, tmpTime);
+ now = XTIME(0);
/* before now */
- local = *now;
before[0] = ASN_GENERALIZED_TIME;
- beforeSz = SetLength(ASN_GEN_TIME_SZ, before + 1) + 1; /* gen tag */
-
- /* subtract 1 day for more compliance */
- local.tm_mday -= 1;
- normalTime = mktime(&local);
- RebuildTime(&normalTime, &local);
+ beforeSz = SetLength(ASN_GEN_TIME_SZ, before + 1) + 1; /* gen tag */
+
+ /* subtract 1 day of seconds for more compliance */
+ then = now - 86400;
+ expandedTime = XGMTIME(&then, tmpTime);
+ if (expandedTime == NULL) {
+ WOLFSSL_MSG("XGMTIME failed");
+ return 0; /* error */
+ }
+ localTime = *expandedTime;
/* adjust */
- local.tm_year += 1900;
- local.tm_mon += 1;
+ localTime.tm_year += 1900;
+ localTime.tm_mon += 1;
- SetTime(&local, before + beforeSz);
+ SetTime(&localTime, before + beforeSz);
beforeSz += ASN_GEN_TIME_SZ;
- /* after now + daysValid */
- local = *now;
after[0] = ASN_GENERALIZED_TIME;
afterSz = SetLength(ASN_GEN_TIME_SZ, after + 1) + 1; /* gen tag */
- /* add daysValid */
- local.tm_mday += daysValid;
- normalTime = mktime(&local);
- RebuildTime(&normalTime, &local);
+ /* add daysValid of seconds */
+ then = now + (daysValid * (time_t)86400);
+ expandedTime = XGMTIME(&then, tmpTime);
+ if (expandedTime == NULL) {
+ WOLFSSL_MSG("XGMTIME failed");
+ return 0; /* error */
+ }
+ localTime = *expandedTime;
/* adjust */
- local.tm_year += 1900;
- local.tm_mon += 1;
+ localTime.tm_year += 1900;
+ localTime.tm_mon += 1;
- SetTime(&local, after + afterSz);
+ SetTime(&localTime, after + afterSz);
afterSz += ASN_GEN_TIME_SZ;
/* headers and output */
@@ -5327,6 +12000,16 @@ static const char* GetOneName(CertName* name, int idx)
return name->commonName;
case 7:
+ return name->serialDev;
+
+#ifdef WOLFSSL_CERT_EXT
+ case 8:
+ return name->busCat;
+
+ case 9:
+#else
+ case 8:
+#endif
return name->email;
default:
@@ -5360,6 +12043,20 @@ static char GetNameType(CertName* name, int idx)
case 6:
return name->commonNameEnc;
+ case 7:
+ return name->serialDevEnc;
+
+#ifdef WOLFSSL_CERT_EXT
+ case 8:
+ return name->busCatEnc;
+
+ case 9:
+#else
+ case 8:
+#endif
+ /* FALL THROUGH */
+ /* The last index, email name, does not have encoding type.
+ The empty case here is to keep track of it for future reference. */
default:
return 0;
}
@@ -5392,54 +12089,589 @@ static byte GetNameId(int idx)
return ASN_COMMON_NAME;
case 7:
- /* email uses different id type */
- return 0;
+ return ASN_SERIAL_NUMBER;
+
+#ifdef WOLFSSL_CERT_EXT
+ case 8:
+ return ASN_BUS_CAT;
+
+ case 9:
+#else
+ case 8:
+#endif
+ return ASN_EMAIL_NAME;
default:
return 0;
}
}
+/*
+ Extensions ::= SEQUENCE OF Extension
+
+ Extension ::= SEQUENCE {
+ extnId OBJECT IDENTIFIER,
+ critical BOOLEAN DEFAULT FALSE,
+ extnValue OCTET STRING }
+ */
/* encode all extensions, return total bytes written */
-static int SetExtensions(byte* output, const byte* ext, int extSz, int header)
+static int SetExtensions(byte* out, word32 outSz, int *IdxInOut,
+ const byte* ext, int extSz)
+{
+ if (out == NULL || IdxInOut == NULL || ext == NULL)
+ return BAD_FUNC_ARG;
+
+ if (outSz < (word32)(*IdxInOut+extSz))
+ return BUFFER_E;
+
+ XMEMCPY(&out[*IdxInOut], ext, extSz); /* extensions */
+ *IdxInOut += extSz;
+
+ return *IdxInOut;
+}
+
+/* encode extensions header, return total bytes written */
+static int SetExtensionsHeader(byte* out, word32 outSz, int extSz)
{
byte sequence[MAX_SEQ_SZ];
byte len[MAX_LENGTH_SZ];
+ int seqSz, lenSz, idx = 0;
- int sz = 0;
- int seqSz = SetSequence(extSz, sequence);
+ if (out == NULL)
+ return BAD_FUNC_ARG;
- if (header) {
- int lenSz = SetLength(seqSz + extSz, len);
- output[0] = ASN_EXTENSIONS; /* extensions id */
- sz++;
- XMEMCPY(&output[sz], len, lenSz); /* length */
- sz += lenSz;
- }
- XMEMCPY(&output[sz], sequence, seqSz); /* sequence */
- sz += seqSz;
- XMEMCPY(&output[sz], ext, extSz); /* extensions */
- sz += extSz;
+ if (outSz < 3)
+ return BUFFER_E;
- return sz;
+ seqSz = SetSequence(extSz, sequence);
+
+ /* encode extensions length provided */
+ lenSz = SetLength(extSz+seqSz, len);
+
+ if (outSz < (word32)(lenSz+seqSz+1))
+ return BUFFER_E;
+
+ out[idx++] = ASN_EXTENSIONS; /* extensions id */
+ XMEMCPY(&out[idx], len, lenSz); /* length */
+ idx += lenSz;
+
+ XMEMCPY(&out[idx], sequence, seqSz); /* sequence */
+ idx += seqSz;
+
+ return idx;
}
/* encode CA basic constraint true, return total bytes written */
-static int SetCa(byte* output)
+static int SetCa(byte* out, word32 outSz)
{
- static const byte ca[] = { 0x30, 0x0c, 0x06, 0x03, 0x55, 0x1d, 0x13, 0x04,
+ const byte ca[] = { 0x30, 0x0c, 0x06, 0x03, 0x55, 0x1d, 0x13, 0x04,
0x05, 0x30, 0x03, 0x01, 0x01, 0xff };
-
- XMEMCPY(output, ca, sizeof(ca));
+
+ if (out == NULL)
+ return BAD_FUNC_ARG;
+
+ if (outSz < sizeof(ca))
+ return BUFFER_E;
+
+ XMEMCPY(out, ca, sizeof(ca));
return (int)sizeof(ca);
}
+#ifdef WOLFSSL_CERT_EXT
+/* encode OID and associated value, return total bytes written */
+static int SetOidValue(byte* out, word32 outSz, const byte *oid, word32 oidSz,
+ byte *in, word32 inSz)
+{
+ int idx = 0;
+
+ if (out == NULL || oid == NULL || in == NULL)
+ return BAD_FUNC_ARG;
+
+ if (outSz < 3)
+ return BUFFER_E;
+
+ /* sequence, + 1 => byte to put value size */
+ idx = SetSequence(inSz + oidSz + 1, out);
+
+ if ((idx + inSz + oidSz + 1) > outSz)
+ return BUFFER_E;
+
+ XMEMCPY(out+idx, oid, oidSz);
+ idx += oidSz;
+ out[idx++] = (byte)inSz;
+ XMEMCPY(out+idx, in, inSz);
+
+ return (idx+inSz);
+}
+
+/* encode Subject Key Identifier, return total bytes written
+ * RFC5280 : non-critical */
+static int SetSKID(byte* output, word32 outSz, const byte *input, word32 length)
+{
+ byte skid_len[1 + MAX_LENGTH_SZ];
+ byte skid_enc_len[MAX_LENGTH_SZ];
+ int idx = 0, skid_lenSz, skid_enc_lenSz;
+ const byte skid_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x0e, 0x04 };
+
+ if (output == NULL || input == NULL)
+ return BAD_FUNC_ARG;
+
+ /* Octet String header */
+ skid_lenSz = SetOctetString(length, skid_len);
+
+ /* length of encoded value */
+ skid_enc_lenSz = SetLength(length + skid_lenSz, skid_enc_len);
+
+ if (outSz < 3)
+ return BUFFER_E;
+
+ idx = SetSequence(length + sizeof(skid_oid) + skid_lenSz + skid_enc_lenSz,
+ output);
+
+ if ((length + sizeof(skid_oid) + skid_lenSz + skid_enc_lenSz) > outSz)
+ return BUFFER_E;
+
+ /* put oid */
+ XMEMCPY(output+idx, skid_oid, sizeof(skid_oid));
+ idx += sizeof(skid_oid);
+
+ /* put encoded len */
+ XMEMCPY(output+idx, skid_enc_len, skid_enc_lenSz);
+ idx += skid_enc_lenSz;
+
+ /* put octet header */
+ XMEMCPY(output+idx, skid_len, skid_lenSz);
+ idx += skid_lenSz;
+
+ /* put value */
+ XMEMCPY(output+idx, input, length);
+ idx += length;
+
+ return idx;
+}
+
+/* encode Authority Key Identifier, return total bytes written
+ * RFC5280 : non-critical */
+static int SetAKID(byte* output, word32 outSz,
+ byte *input, word32 length, void* heap)
+{
+ byte *enc_val;
+ int ret, enc_valSz;
+ const byte akid_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x23, 0x04 };
+ const byte akid_cs[] = { 0x80 };
+
+ (void)heap;
+
+ if (output == NULL || input == NULL)
+ return BAD_FUNC_ARG;
+
+ enc_valSz = length + 3 + sizeof(akid_cs);
+ enc_val = (byte *)XMALLOC(enc_valSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (enc_val == NULL)
+ return MEMORY_E;
+
+ /* sequence for ContentSpec & value */
+ ret = SetOidValue(enc_val, enc_valSz, akid_cs, sizeof(akid_cs),
+ input, length);
+ if (ret > 0) {
+ enc_valSz = ret;
+
+ ret = SetOidValue(output, outSz, akid_oid, sizeof(akid_oid),
+ enc_val, enc_valSz);
+ }
+
+ XFREE(enc_val, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return ret;
+}
+
+/* encode Key Usage, return total bytes written
+ * RFC5280 : critical */
+static int SetKeyUsage(byte* output, word32 outSz, word16 input)
+{
+ byte ku[5];
+ int idx;
+ const byte keyusage_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x0f,
+ 0x01, 0x01, 0xff, 0x04};
+ if (output == NULL)
+ return BAD_FUNC_ARG;
+
+ idx = SetBitString16Bit(input, ku);
+ return SetOidValue(output, outSz, keyusage_oid, sizeof(keyusage_oid),
+ ku, idx);
+}
+
+static int SetOjectIdValue(byte* output, word32 outSz, int* idx,
+ const byte* oid, word32 oidSz)
+{
+ /* verify room */
+ if (*idx + 2 + oidSz >= outSz)
+ return ASN_PARSE_E;
+
+ *idx += SetObjectId(oidSz, &output[*idx]);
+ XMEMCPY(&output[*idx], oid, oidSz);
+ *idx += oidSz;
+
+ return 0;
+}
+
+/* encode Extended Key Usage (RFC 5280 4.2.1.12), return total bytes written */
+static int SetExtKeyUsage(Cert* cert, byte* output, word32 outSz, byte input)
+{
+ int idx = 0, oidListSz = 0, totalSz, ret = 0;
+ const byte extkeyusage_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x25 };
+
+ if (output == NULL)
+ return BAD_FUNC_ARG;
+
+ /* Skip to OID List */
+ totalSz = 2 + sizeof(extkeyusage_oid) + 4;
+ idx = totalSz;
+
+ /* Build OID List */
+ /* If any set, then just use it */
+ if (input & EXTKEYUSE_ANY) {
+ ret |= SetOjectIdValue(output, outSz, &idx,
+ extExtKeyUsageAnyOid, sizeof(extExtKeyUsageAnyOid));
+ }
+ else {
+ if (input & EXTKEYUSE_SERVER_AUTH)
+ ret |= SetOjectIdValue(output, outSz, &idx,
+ extExtKeyUsageServerAuthOid, sizeof(extExtKeyUsageServerAuthOid));
+ if (input & EXTKEYUSE_CLIENT_AUTH)
+ ret |= SetOjectIdValue(output, outSz, &idx,
+ extExtKeyUsageClientAuthOid, sizeof(extExtKeyUsageClientAuthOid));
+ if (input & EXTKEYUSE_CODESIGN)
+ ret |= SetOjectIdValue(output, outSz, &idx,
+ extExtKeyUsageCodeSigningOid, sizeof(extExtKeyUsageCodeSigningOid));
+ if (input & EXTKEYUSE_EMAILPROT)
+ ret |= SetOjectIdValue(output, outSz, &idx,
+ extExtKeyUsageEmailProtectOid, sizeof(extExtKeyUsageEmailProtectOid));
+ if (input & EXTKEYUSE_TIMESTAMP)
+ ret |= SetOjectIdValue(output, outSz, &idx,
+ extExtKeyUsageTimestampOid, sizeof(extExtKeyUsageTimestampOid));
+ if (input & EXTKEYUSE_OCSP_SIGN)
+ ret |= SetOjectIdValue(output, outSz, &idx,
+ extExtKeyUsageOcspSignOid, sizeof(extExtKeyUsageOcspSignOid));
+ #ifdef WOLFSSL_EKU_OID
+ /* iterate through OID values */
+ if (input & EXTKEYUSE_USER) {
+ int i, sz;
+ for (i = 0; i < CTC_MAX_EKU_NB; i++) {
+ sz = cert->extKeyUsageOIDSz[i];
+ if (sz > 0) {
+ ret |= SetOjectIdValue(output, outSz, &idx,
+ cert->extKeyUsageOID[i], sz);
+ }
+ }
+ }
+ #endif /* WOLFSSL_EKU_OID */
+ }
+ if (ret != 0)
+ return ASN_PARSE_E;
+
+ /* Calculate Sizes */
+ oidListSz = idx - totalSz;
+ totalSz = idx - 2; /* exclude first seq/len (2) */
+
+ /* 1. Seq + Total Len (2) */
+ idx = SetSequence(totalSz, output);
+
+ /* 2. Object ID (2) */
+ XMEMCPY(&output[idx], extkeyusage_oid, sizeof(extkeyusage_oid));
+ idx += sizeof(extkeyusage_oid);
+
+ /* 3. Octet String (2) */
+ idx += SetOctetString(totalSz - idx, &output[idx]);
+
+ /* 4. Seq + OidListLen (2) */
+ idx += SetSequence(oidListSz, &output[idx]);
+
+ /* 5. Oid List (already set in-place above) */
+ idx += oidListSz;
+
+ (void)cert;
+ return idx;
+}
+
+/* encode Certificate Policies, return total bytes written
+ * each input value must be ITU-T X.690 formatted : a.b.c...
+ * input must be an array of values with a NULL terminated for the latest
+ * RFC5280 : non-critical */
+static int SetCertificatePolicies(byte *output,
+ word32 outputSz,
+ char input[MAX_CERTPOL_NB][MAX_CERTPOL_SZ],
+ word16 nb_certpol,
+ void* heap)
+{
+ byte oid[MAX_OID_SZ],
+ der_oid[MAX_CERTPOL_NB][MAX_OID_SZ],
+ out[MAX_CERTPOL_SZ];
+ word32 oidSz;
+ word32 outSz, i = 0, der_oidSz[MAX_CERTPOL_NB];
+ int ret;
+
+ const byte certpol_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x20, 0x04 };
+ const byte oid_oid[] = { 0x06 };
+
+ if (output == NULL || input == NULL || nb_certpol > MAX_CERTPOL_NB)
+ return BAD_FUNC_ARG;
+
+ for (i = 0; i < nb_certpol; i++) {
+ oidSz = sizeof(oid);
+ XMEMSET(oid, 0, oidSz);
+
+ ret = EncodePolicyOID(oid, &oidSz, input[i], heap);
+ if (ret != 0)
+ return ret;
+
+ /* compute sequence value for the oid */
+ ret = SetOidValue(der_oid[i], MAX_OID_SZ, oid_oid,
+ sizeof(oid_oid), oid, oidSz);
+ if (ret <= 0)
+ return ret;
+ else
+ der_oidSz[i] = (word32)ret;
+ }
+
+ /* concatenate oid, keep two byte for sequence/size of the created value */
+ for (i = 0, outSz = 2; i < nb_certpol; i++) {
+ XMEMCPY(out+outSz, der_oid[i], der_oidSz[i]);
+ outSz += der_oidSz[i];
+ }
+
+ /* add sequence */
+ ret = SetSequence(outSz-2, out);
+ if (ret <= 0)
+ return ret;
+
+ /* add Policy OID to compute final value */
+ return SetOidValue(output, outputSz, certpol_oid, sizeof(certpol_oid),
+ out, outSz);
+}
+#endif /* WOLFSSL_CERT_EXT */
+
+
+#ifdef WOLFSSL_ALT_NAMES
+
+/* encode Alternative Names, return total bytes written */
+static int SetAltNames(byte *output, word32 outSz,
+ const byte *input, word32 length)
+{
+ byte san_len[1 + MAX_LENGTH_SZ];
+ int idx = 0, san_lenSz;
+ const byte san_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x11 };
+
+ if (output == NULL || input == NULL)
+ return BAD_FUNC_ARG;
+
+ if (outSz < length)
+ return BUFFER_E;
+
+ /* Octet String header */
+ san_lenSz = SetOctetString(length, san_len);
+
+ if (outSz < MAX_SEQ_SZ)
+ return BUFFER_E;
+
+ idx = SetSequence(length + sizeof(san_oid) + san_lenSz, output);
+
+ if ((length + sizeof(san_oid) + san_lenSz) > outSz)
+ return BUFFER_E;
+
+ /* put oid */
+ XMEMCPY(output+idx, san_oid, sizeof(san_oid));
+ idx += sizeof(san_oid);
+
+ /* put octet header */
+ XMEMCPY(output+idx, san_len, san_lenSz);
+ idx += san_lenSz;
+
+ /* put value */
+ XMEMCPY(output+idx, input, length);
+ idx += length;
+
+ return idx;
+}
+
+
+#ifdef WOLFSSL_CERT_GEN
+
+int FlattenAltNames(byte* output, word32 outputSz, const DNS_entry* names)
+{
+ word32 idx;
+ const DNS_entry* curName;
+ word32 namesSz = 0;
+
+ if (output == NULL)
+ return BAD_FUNC_ARG;
+
+ if (names == NULL)
+ return 0;
+
+ curName = names;
+ do {
+ namesSz += curName->len + 2 +
+ ((curName->len < ASN_LONG_LENGTH) ? 0
+ : BytePrecision(curName->len));
+ curName = curName->next;
+ } while (curName != NULL);
+
+ if (outputSz < MAX_SEQ_SZ + namesSz)
+ return BUFFER_E;
+
+ idx = SetSequence(namesSz, output);
+
+ curName = names;
+ do {
+ output[idx++] = ASN_CONTEXT_SPECIFIC | curName->type;
+ idx += SetLength(curName->len, output + idx);
+ XMEMCPY(output + idx, curName->name, curName->len);
+ idx += curName->len;
+ curName = curName->next;
+ } while (curName != NULL);
+
+ return idx;
+}
+
+#endif /* WOLFSSL_CERT_GEN */
+
+#endif /* WOLFSSL_ALT_NAMES */
+
+/* Encodes one attribute of the name (issuer/subject)
+ *
+ * name structure to hold result of encoding
+ * nameStr value to be encoded
+ * nameType type of encoding i.e CTC_UTF8
+ * type id of attribute i.e ASN_COMMON_NAME
+ *
+ * returns length on success
+ */
+static int wc_EncodeName(EncodedName* name, const char* nameStr, char nameType,
+ byte type)
+{
+ word32 idx = 0;
+
+ if (nameStr) {
+ /* bottom up */
+ byte firstLen[1 + MAX_LENGTH_SZ];
+ byte secondLen[MAX_LENGTH_SZ];
+ byte sequence[MAX_SEQ_SZ];
+ byte set[MAX_SET_SZ];
+
+ int strLen = (int)XSTRLEN(nameStr);
+ int thisLen = strLen;
+ int firstSz, secondSz, seqSz, setSz;
+
+ if (strLen == 0) { /* no user data for this item */
+ name->used = 0;
+ return 0;
+ }
+
+ /* Restrict country code size */
+ if (ASN_COUNTRY_NAME == type && strLen != CTC_COUNTRY_SIZE) {
+ return ASN_COUNTRY_SIZE_E;
+ }
+
+ secondSz = SetLength(strLen, secondLen);
+ thisLen += secondSz;
+ switch (type) {
+ case ASN_EMAIL_NAME: /* email */
+ thisLen += EMAIL_JOINT_LEN;
+ firstSz = EMAIL_JOINT_LEN;
+ break;
+
+ case ASN_DOMAIN_COMPONENT:
+ thisLen += PILOT_JOINT_LEN;
+ firstSz = PILOT_JOINT_LEN;
+ break;
+
+ default:
+ thisLen++; /* str type */
+ thisLen += JOINT_LEN;
+ firstSz = JOINT_LEN + 1;
+ }
+ thisLen++; /* id type */
+ firstSz = SetObjectId(firstSz, firstLen);
+ thisLen += firstSz;
+
+ seqSz = SetSequence(thisLen, sequence);
+ thisLen += seqSz;
+ setSz = SetSet(thisLen, set);
+ thisLen += setSz;
+
+ if (thisLen > (int)sizeof(name->encoded)) {
+ return BUFFER_E;
+ }
+
+ /* store it */
+ idx = 0;
+ /* set */
+ XMEMCPY(name->encoded, set, setSz);
+ idx += setSz;
+ /* seq */
+ XMEMCPY(name->encoded + idx, sequence, seqSz);
+ idx += seqSz;
+ /* asn object id */
+ XMEMCPY(name->encoded + idx, firstLen, firstSz);
+ idx += firstSz;
+ switch (type) {
+ case ASN_EMAIL_NAME:
+ {
+ const byte EMAIL_OID[] = { 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d,
+ 0x01, 0x09, 0x01, 0x16 };
+ /* email joint id */
+ XMEMCPY(name->encoded + idx, EMAIL_OID, sizeof(EMAIL_OID));
+ idx += (int)sizeof(EMAIL_OID);
+ }
+ break;
+
+ case ASN_DOMAIN_COMPONENT:
+ {
+ const byte PILOT_OID[] = { 0x09, 0x92, 0x26, 0x89,
+ 0x93, 0xF2, 0x2C, 0x64, 0x01
+ };
+
+ XMEMCPY(name->encoded + idx, PILOT_OID,
+ sizeof(PILOT_OID));
+ idx += (int)sizeof(PILOT_OID);
+ /* id type */
+ name->encoded[idx++] = type;
+ /* str type */
+ name->encoded[idx++] = nameType;
+ }
+ break;
+
+ default:
+ name->encoded[idx++] = 0x55;
+ name->encoded[idx++] = 0x04;
+ /* id type */
+ name->encoded[idx++] = type;
+ /* str type */
+ name->encoded[idx++] = nameType;
+ }
+ /* second length */
+ XMEMCPY(name->encoded + idx, secondLen, secondSz);
+ idx += secondSz;
+ /* str value */
+ XMEMCPY(name->encoded + idx, nameStr, strLen);
+ idx += strLen;
+
+ name->type = type;
+ name->totalLen = idx;
+ name->used = 1;
+ }
+ else
+ name->used = 0;
+
+ return idx;
+}
+
/* encode CertName into output, return total bytes written */
-static int SetName(byte* output, CertName* name)
+int SetName(byte* output, word32 outputSz, CertName* name)
{
int totalBytes = 0, i, idx;
#ifdef WOLFSSL_SMALL_STACK
@@ -5447,6 +12679,16 @@ static int SetName(byte* output, CertName* name)
#else
EncodedName names[NAME_ENTRIES];
#endif
+#ifdef WOLFSSL_MULTI_ATTRIB
+ EncodedName addNames[CTC_MAX_ATTRIB];
+ int j, type;
+#endif
+
+ if (output == NULL || name == NULL)
+ return BAD_FUNC_ARG;
+
+ if (outputSz < 3)
+ return BUFFER_E;
#ifdef WOLFSSL_SMALL_STACK
names = (EncodedName*)XMALLOC(sizeof(EncodedName) * NAME_ENTRIES, NULL,
@@ -5456,96 +12698,38 @@ static int SetName(byte* output, CertName* name)
#endif
for (i = 0; i < NAME_ENTRIES; i++) {
+ int ret;
const char* nameStr = GetOneName(name, i);
- if (nameStr) {
- /* bottom up */
- byte firstLen[MAX_LENGTH_SZ];
- byte secondLen[MAX_LENGTH_SZ];
- byte sequence[MAX_SEQ_SZ];
- byte set[MAX_SET_SZ];
-
- int email = i == (NAME_ENTRIES - 1) ? 1 : 0;
- int strLen = (int)XSTRLEN(nameStr);
- int thisLen = strLen;
- int firstSz, secondSz, seqSz, setSz;
-
- if (strLen == 0) { /* no user data for this item */
- names[i].used = 0;
- continue;
- }
-
- secondSz = SetLength(strLen, secondLen);
- thisLen += secondSz;
- if (email) {
- thisLen += EMAIL_JOINT_LEN;
- thisLen ++; /* id type */
- firstSz = SetLength(EMAIL_JOINT_LEN, firstLen);
- }
- else {
- thisLen++; /* str type */
- thisLen++; /* id type */
- thisLen += JOINT_LEN;
- firstSz = SetLength(JOINT_LEN + 1, firstLen);
- }
- thisLen += firstSz;
- thisLen++; /* object id */
-
- seqSz = SetSequence(thisLen, sequence);
- thisLen += seqSz;
- setSz = SetSet(thisLen, set);
- thisLen += setSz;
- if (thisLen > (int)sizeof(names[i].encoded)) {
-#ifdef WOLFSSL_SMALL_STACK
+ ret = wc_EncodeName(&names[i], nameStr, GetNameType(name, i),
+ GetNameId(i));
+ if (ret < 0) {
+ #ifdef WOLFSSL_SMALL_STACK
XFREE(names, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+ #endif
+ return BUFFER_E;
+ }
+ totalBytes += ret;
+ }
+#ifdef WOLFSSL_MULTI_ATTRIB
+ for (i = 0; i < CTC_MAX_ATTRIB; i++) {
+ if (name->name[i].sz > 0) {
+ int ret;
+ ret = wc_EncodeName(&addNames[i], name->name[i].value,
+ name->name[i].type, name->name[i].id);
+ if (ret < 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(names, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
return BUFFER_E;
}
-
- /* store it */
- idx = 0;
- /* set */
- XMEMCPY(names[i].encoded, set, setSz);
- idx += setSz;
- /* seq */
- XMEMCPY(names[i].encoded + idx, sequence, seqSz);
- idx += seqSz;
- /* asn object id */
- names[i].encoded[idx++] = ASN_OBJECT_ID;
- /* first length */
- XMEMCPY(names[i].encoded + idx, firstLen, firstSz);
- idx += firstSz;
- if (email) {
- const byte EMAIL_OID[] = { 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d,
- 0x01, 0x09, 0x01, 0x16 };
- /* email joint id */
- XMEMCPY(names[i].encoded + idx, EMAIL_OID, sizeof(EMAIL_OID));
- idx += (int)sizeof(EMAIL_OID);
- }
- else {
- /* joint id */
- byte bType = GetNameId(i);
- names[i].encoded[idx++] = 0x55;
- names[i].encoded[idx++] = 0x04;
- /* id type */
- names[i].encoded[idx++] = bType;
- /* str type */
- names[i].encoded[idx++] = GetNameType(name, i);
- }
- /* second length */
- XMEMCPY(names[i].encoded + idx, secondLen, secondSz);
- idx += secondSz;
- /* str value */
- XMEMCPY(names[i].encoded + idx, nameStr, strLen);
- idx += strLen;
-
- totalBytes += idx;
- names[i].totalLen = idx;
- names[i].used = 1;
+ totalBytes += ret;
+ }
+ else {
+ addNames[i].used = 0;
}
- else
- names[i].used = 0;
}
+#endif /* WOLFSSL_MULTI_ATTRIB */
/* header */
idx = SetSequence(totalBytes, output);
@@ -5558,7 +12742,54 @@ static int SetName(byte* output, CertName* name)
}
for (i = 0; i < NAME_ENTRIES; i++) {
+ #ifdef WOLFSSL_MULTI_ATTRIB
+ type = GetNameId(i);
+
+ /* list all DC values before OUs */
+ if (type == ASN_ORGUNIT_NAME) {
+ type = ASN_DOMAIN_COMPONENT;
+ for (j = 0; j < CTC_MAX_ATTRIB; j++) {
+ if (name->name[j].sz > 0 && type == name->name[j].id) {
+ if (outputSz < (word32)(idx+addNames[j].totalLen)) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(names, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return BUFFER_E;
+ }
+
+ XMEMCPY(output + idx, addNames[j].encoded,
+ addNames[j].totalLen);
+ idx += addNames[j].totalLen;
+ }
+ }
+ type = ASN_ORGUNIT_NAME;
+ }
+
+ /* write all similar types to the buffer */
+ for (j = 0; j < CTC_MAX_ATTRIB; j++) {
+ if (name->name[j].sz > 0 && type == name->name[j].id) {
+ if (outputSz < (word32)(idx+addNames[j].totalLen)) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(names, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return BUFFER_E;
+ }
+
+ XMEMCPY(output + idx, addNames[j].encoded,
+ addNames[j].totalLen);
+ idx += addNames[j].totalLen;
+ }
+ }
+ #endif /* WOLFSSL_MULTI_ATTRIB */
+
if (names[i].used) {
+ if (outputSz < (word32)(idx+names[i].totalLen)) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(names, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return BUFFER_E;
+ }
+
XMEMCPY(output + idx, names[i].encoded, names[i].totalLen);
idx += names[i].totalLen;
}
@@ -5573,13 +12804,19 @@ static int SetName(byte* output, CertName* name)
/* encode info from cert into DER encoded format */
static int EncodeCert(Cert* cert, DerCert* der, RsaKey* rsaKey, ecc_key* eccKey,
- RNG* rng, const byte* ntruKey, word16 ntruSz)
+ WC_RNG* rng, const byte* ntruKey, word16 ntruSz,
+ ed25519_key* ed25519Key, ed448_key* ed448Key)
{
int ret;
- (void)eccKey;
- (void)ntruKey;
- (void)ntruSz;
+ if (cert == NULL || der == NULL || rng == NULL)
+ return BAD_FUNC_ARG;
+
+ /* make sure at least one key type is provided */
+ if (rsaKey == NULL && eccKey == NULL && ed25519Key == NULL &&
+ ed448Key == NULL && ntruKey == NULL) {
+ return PUBLIC_KEY_E;
+ }
/* init */
XMEMSET(der, 0, sizeof(DerCert));
@@ -5587,65 +12824,95 @@ static int EncodeCert(Cert* cert, DerCert* der, RsaKey* rsaKey, ecc_key* eccKey,
/* version */
der->versionSz = SetMyVersion(cert->version, der->version, TRUE);
- /* serial number */
- ret = wc_RNG_GenerateBlock(rng, cert->serial, CTC_SERIAL_SIZE);
- if (ret != 0)
- return ret;
-
- cert->serial[0] = 0x01; /* ensure positive */
- der->serialSz = SetSerial(cert->serial, der->serial);
+ /* serial number (must be positive) */
+ if (cert->serialSz == 0) {
+ /* generate random serial */
+ cert->serialSz = CTC_GEN_SERIAL_SZ;
+ ret = wc_RNG_GenerateBlock(rng, cert->serial, cert->serialSz);
+ if (ret != 0)
+ return ret;
+ /* Clear the top bit to avoid a negative value */
+ cert->serial[0] &= 0x7f;
+ }
+ der->serialSz = SetSerialNumber(cert->serial, cert->serialSz, der->serial,
+ sizeof(der->serial), CTC_SERIAL_SIZE);
+ if (der->serialSz < 0)
+ return der->serialSz;
/* signature algo */
- der->sigAlgoSz = SetAlgoID(cert->sigType, der->sigAlgo, sigType, 0);
- if (der->sigAlgoSz == 0)
+ der->sigAlgoSz = SetAlgoID(cert->sigType, der->sigAlgo, oidSigType, 0);
+ if (der->sigAlgoSz <= 0)
return ALGO_ID_E;
/* public key */
+#ifndef NO_RSA
if (cert->keyType == RSA_KEY) {
if (rsaKey == NULL)
return PUBLIC_KEY_E;
- der->publicKeySz = SetRsaPublicKey(der->publicKey, rsaKey);
- if (der->publicKeySz <= 0)
- return PUBLIC_KEY_E;
+ der->publicKeySz = SetRsaPublicKey(der->publicKey, rsaKey,
+ sizeof(der->publicKey), 1);
}
+#endif
#ifdef HAVE_ECC
if (cert->keyType == ECC_KEY) {
if (eccKey == NULL)
return PUBLIC_KEY_E;
- der->publicKeySz = SetEccPublicKey(der->publicKey, eccKey);
- if (der->publicKeySz <= 0)
+ der->publicKeySz = SetEccPublicKey(der->publicKey, eccKey, 1);
+ }
+#endif
+
+#ifdef HAVE_ED25519
+ if (cert->keyType == ED25519_KEY) {
+ if (ed25519Key == NULL)
return PUBLIC_KEY_E;
+ der->publicKeySz = SetEd25519PublicKey(der->publicKey, ed25519Key, 1);
}
-#endif /* HAVE_ECC */
+#endif
+
+#ifdef HAVE_ED448
+ if (cert->keyType == ED448_KEY) {
+ if (ed448Key == NULL)
+ return PUBLIC_KEY_E;
+ der->publicKeySz = SetEd448PublicKey(der->publicKey, ed448Key, 1);
+ }
+#endif
#ifdef HAVE_NTRU
if (cert->keyType == NTRU_KEY) {
word32 rc;
word16 encodedSz;
- rc = ntru_crypto_ntru_encrypt_publicKey2SubjectPublicKeyInfo( ntruSz,
+ if (ntruKey == NULL)
+ return PUBLIC_KEY_E;
+
+ rc = ntru_crypto_ntru_encrypt_publicKey2SubjectPublicKeyInfo(ntruSz,
ntruKey, &encodedSz, NULL);
if (rc != NTRU_OK)
return PUBLIC_KEY_E;
if (encodedSz > MAX_PUBLIC_KEY_SZ)
return PUBLIC_KEY_E;
- rc = ntru_crypto_ntru_encrypt_publicKey2SubjectPublicKeyInfo( ntruSz,
+ rc = ntru_crypto_ntru_encrypt_publicKey2SubjectPublicKeyInfo(ntruSz,
ntruKey, &encodedSz, der->publicKey);
if (rc != NTRU_OK)
return PUBLIC_KEY_E;
der->publicKeySz = encodedSz;
}
+#else
+ (void)ntruSz;
#endif /* HAVE_NTRU */
+ if (der->publicKeySz <= 0)
+ return PUBLIC_KEY_E;
+
der->validitySz = 0;
#ifdef WOLFSSL_ALT_NAMES
/* date validity copy ? */
if (cert->beforeDateSz && cert->afterDateSz) {
der->validitySz = CopyValidity(der->validity, cert);
- if (der->validitySz == 0)
+ if (der->validitySz <= 0)
return DATE_E;
}
#endif
@@ -5653,49 +12920,247 @@ static int EncodeCert(Cert* cert, DerCert* der, RsaKey* rsaKey, ecc_key* eccKey,
/* date validity */
if (der->validitySz == 0) {
der->validitySz = SetValidity(der->validity, cert->daysValid);
- if (der->validitySz == 0)
+ if (der->validitySz <= 0)
return DATE_E;
}
/* subject name */
- der->subjectSz = SetName(der->subject, &cert->subject);
- if (der->subjectSz == 0)
+#ifdef WOLFSSL_CERT_EXT
+ if (XSTRLEN((const char*)cert->sbjRaw) > 0) {
+ /* Use the raw subject */
+ int idx;
+
+ der->subjectSz = min(sizeof(der->subject),
+ (word32)XSTRLEN((const char*)cert->sbjRaw));
+ /* header */
+ idx = SetSequence(der->subjectSz, der->subject);
+ if (der->subjectSz + idx > (int)sizeof(der->subject)) {
+ return SUBJECT_E;
+ }
+
+ XMEMCPY((char*)der->subject + idx, (const char*)cert->sbjRaw,
+ der->subjectSz);
+ der->subjectSz += idx;
+ }
+ else
+#endif
+ {
+ /* Use the name structure */
+ der->subjectSz = SetName(der->subject, sizeof(der->subject),
+ &cert->subject);
+ }
+ if (der->subjectSz <= 0)
return SUBJECT_E;
/* issuer name */
- der->issuerSz = SetName(der->issuer, cert->selfSigned ?
- &cert->subject : &cert->issuer);
- if (der->issuerSz == 0)
+#ifdef WOLFSSL_CERT_EXT
+ if (XSTRLEN((const char*)cert->issRaw) > 0) {
+ /* Use the raw issuer */
+ int idx;
+
+ der->issuerSz = min(sizeof(der->issuer),
+ (word32)XSTRLEN((const char*)cert->issRaw));
+ /* header */
+ idx = SetSequence(der->issuerSz, der->issuer);
+ if (der->issuerSz + idx > (int)sizeof(der->issuer)) {
+ return ISSUER_E;
+ }
+
+ XMEMCPY((char*)der->issuer + idx, (const char*)cert->issRaw,
+ der->issuerSz);
+ der->issuerSz += idx;
+ }
+ else
+#endif
+ {
+ /* Use the name structure */
+ der->issuerSz = SetName(der->issuer, sizeof(der->issuer),
+ cert->selfSigned ? &cert->subject : &cert->issuer);
+ }
+ if (der->issuerSz <= 0)
return ISSUER_E;
+ /* set the extensions */
+ der->extensionsSz = 0;
+
/* CA */
if (cert->isCA) {
- der->caSz = SetCa(der->ca);
- if (der->caSz == 0)
+ der->caSz = SetCa(der->ca, sizeof(der->ca));
+ if (der->caSz <= 0)
return CA_TRUE_E;
+
+ der->extensionsSz += der->caSz;
}
else
der->caSz = 0;
- /* extensions, just CA now */
- if (cert->isCA) {
- der->extensionsSz = SetExtensions(der->extensions,
- der->ca, der->caSz, TRUE);
- if (der->extensionsSz == 0)
- return EXTENSIONS_E;
+#ifdef WOLFSSL_ALT_NAMES
+ /* Alternative Name */
+ if (cert->altNamesSz) {
+ der->altNamesSz = SetAltNames(der->altNames, sizeof(der->altNames),
+ cert->altNames, cert->altNamesSz);
+ if (der->altNamesSz <= 0)
+ return ALT_NAME_E;
+
+ der->extensionsSz += der->altNamesSz;
}
else
- der->extensionsSz = 0;
+ der->altNamesSz = 0;
+#endif
-#ifdef WOLFSSL_ALT_NAMES
- if (der->extensionsSz == 0 && cert->altNamesSz) {
- der->extensionsSz = SetExtensions(der->extensions, cert->altNames,
- cert->altNamesSz, TRUE);
- if (der->extensionsSz == 0)
- return EXTENSIONS_E;
+#ifdef WOLFSSL_CERT_EXT
+ /* SKID */
+ if (cert->skidSz) {
+ /* check the provided SKID size */
+ if (cert->skidSz > (int)min(CTC_MAX_SKID_SIZE, sizeof(der->skid)))
+ return SKID_E;
+
+ /* Note: different skid buffers sizes for der (MAX_KID_SZ) and
+ cert (CTC_MAX_SKID_SIZE). */
+ der->skidSz = SetSKID(der->skid, sizeof(der->skid),
+ cert->skid, cert->skidSz);
+ if (der->skidSz <= 0)
+ return SKID_E;
+
+ der->extensionsSz += der->skidSz;
+ }
+ else
+ der->skidSz = 0;
+
+ /* AKID */
+ if (cert->akidSz) {
+ /* check the provided AKID size */
+ if (cert->akidSz > (int)min(CTC_MAX_AKID_SIZE, sizeof(der->akid)))
+ return AKID_E;
+
+ der->akidSz = SetAKID(der->akid, sizeof(der->akid),
+ cert->akid, cert->akidSz, cert->heap);
+ if (der->akidSz <= 0)
+ return AKID_E;
+
+ der->extensionsSz += der->akidSz;
+ }
+ else
+ der->akidSz = 0;
+
+ /* Key Usage */
+ if (cert->keyUsage != 0){
+ der->keyUsageSz = SetKeyUsage(der->keyUsage, sizeof(der->keyUsage),
+ cert->keyUsage);
+ if (der->keyUsageSz <= 0)
+ return KEYUSAGE_E;
+
+ der->extensionsSz += der->keyUsageSz;
+ }
+ else
+ der->keyUsageSz = 0;
+
+ /* Extended Key Usage */
+ if (cert->extKeyUsage != 0){
+ der->extKeyUsageSz = SetExtKeyUsage(cert, der->extKeyUsage,
+ sizeof(der->extKeyUsage), cert->extKeyUsage);
+ if (der->extKeyUsageSz <= 0)
+ return EXTKEYUSAGE_E;
+
+ der->extensionsSz += der->extKeyUsageSz;
+ }
+ else
+ der->extKeyUsageSz = 0;
+
+ /* Certificate Policies */
+ if (cert->certPoliciesNb != 0) {
+ der->certPoliciesSz = SetCertificatePolicies(der->certPolicies,
+ sizeof(der->certPolicies),
+ cert->certPolicies,
+ cert->certPoliciesNb,
+ cert->heap);
+ if (der->certPoliciesSz <= 0)
+ return CERTPOLICIES_E;
+
+ der->extensionsSz += der->certPoliciesSz;
}
+ else
+ der->certPoliciesSz = 0;
+#endif /* WOLFSSL_CERT_EXT */
+
+ /* put extensions */
+ if (der->extensionsSz > 0) {
+
+ /* put the start of extensions sequence (ID, Size) */
+ der->extensionsSz = SetExtensionsHeader(der->extensions,
+ sizeof(der->extensions),
+ der->extensionsSz);
+ if (der->extensionsSz <= 0)
+ return EXTENSIONS_E;
+
+ /* put CA */
+ if (der->caSz) {
+ ret = SetExtensions(der->extensions, sizeof(der->extensions),
+ &der->extensionsSz,
+ der->ca, der->caSz);
+ if (ret == 0)
+ return EXTENSIONS_E;
+ }
+
+#ifdef WOLFSSL_ALT_NAMES
+ /* put Alternative Names */
+ if (der->altNamesSz) {
+ ret = SetExtensions(der->extensions, sizeof(der->extensions),
+ &der->extensionsSz,
+ der->altNames, der->altNamesSz);
+ if (ret <= 0)
+ return EXTENSIONS_E;
+ }
#endif
+#ifdef WOLFSSL_CERT_EXT
+ /* put SKID */
+ if (der->skidSz) {
+ ret = SetExtensions(der->extensions, sizeof(der->extensions),
+ &der->extensionsSz,
+ der->skid, der->skidSz);
+ if (ret <= 0)
+ return EXTENSIONS_E;
+ }
+
+ /* put AKID */
+ if (der->akidSz) {
+ ret = SetExtensions(der->extensions, sizeof(der->extensions),
+ &der->extensionsSz,
+ der->akid, der->akidSz);
+ if (ret <= 0)
+ return EXTENSIONS_E;
+ }
+
+ /* put KeyUsage */
+ if (der->keyUsageSz) {
+ ret = SetExtensions(der->extensions, sizeof(der->extensions),
+ &der->extensionsSz,
+ der->keyUsage, der->keyUsageSz);
+ if (ret <= 0)
+ return EXTENSIONS_E;
+ }
+
+ /* put ExtendedKeyUsage */
+ if (der->extKeyUsageSz) {
+ ret = SetExtensions(der->extensions, sizeof(der->extensions),
+ &der->extensionsSz,
+ der->extKeyUsage, der->extKeyUsageSz);
+ if (ret <= 0)
+ return EXTENSIONS_E;
+ }
+
+ /* put Certificate Policies */
+ if (der->certPoliciesSz) {
+ ret = SetExtensions(der->extensions, sizeof(der->extensions),
+ &der->extensionsSz,
+ der->certPolicies, der->certPoliciesSz);
+ if (ret <= 0)
+ return EXTENSIONS_E;
+ }
+#endif /* WOLFSSL_CERT_EXT */
+ }
+
der->total = der->versionSz + der->serialSz + der->sigAlgoSz +
der->publicKeySz + der->validitySz + der->subjectSz + der->issuerSz +
der->extensionsSz;
@@ -5705,37 +13170,37 @@ static int EncodeCert(Cert* cert, DerCert* der, RsaKey* rsaKey, ecc_key* eccKey,
/* write DER encoded cert to buffer, size already checked */
-static int WriteCertBody(DerCert* der, byte* buffer)
+static int WriteCertBody(DerCert* der, byte* buf)
{
int idx;
/* signed part header */
- idx = SetSequence(der->total, buffer);
+ idx = SetSequence(der->total, buf);
/* version */
- XMEMCPY(buffer + idx, der->version, der->versionSz);
+ XMEMCPY(buf + idx, der->version, der->versionSz);
idx += der->versionSz;
/* serial */
- XMEMCPY(buffer + idx, der->serial, der->serialSz);
+ XMEMCPY(buf + idx, der->serial, der->serialSz);
idx += der->serialSz;
/* sig algo */
- XMEMCPY(buffer + idx, der->sigAlgo, der->sigAlgoSz);
+ XMEMCPY(buf + idx, der->sigAlgo, der->sigAlgoSz);
idx += der->sigAlgoSz;
/* issuer */
- XMEMCPY(buffer + idx, der->issuer, der->issuerSz);
+ XMEMCPY(buf + idx, der->issuer, der->issuerSz);
idx += der->issuerSz;
/* validity */
- XMEMCPY(buffer + idx, der->validity, der->validitySz);
+ XMEMCPY(buf + idx, der->validity, der->validitySz);
idx += der->validitySz;
/* subject */
- XMEMCPY(buffer + idx, der->subject, der->subjectSz);
+ XMEMCPY(buf + idx, der->subject, der->subjectSz);
idx += der->subjectSz;
/* public key */
- XMEMCPY(buffer + idx, der->publicKey, der->publicKeySz);
+ XMEMCPY(buf + idx, der->publicKey, der->publicKeySz);
idx += der->publicKeySz;
if (der->extensionsSz) {
/* extensions */
- XMEMCPY(buffer + idx, der->extensions, min(der->extensionsSz,
- sizeof(der->extensions)));
+ XMEMCPY(buf + idx, der->extensions, min(der->extensionsSz,
+ (int)sizeof(der->extensions)));
idx += der->extensionsSz;
}
@@ -5744,97 +13209,126 @@ static int WriteCertBody(DerCert* der, byte* buffer)
/* Make RSA signature from buffer (sz), write to sig (sigSz) */
-static int MakeSignature(const byte* buffer, int sz, byte* sig, int sigSz,
- RsaKey* rsaKey, ecc_key* eccKey, RNG* rng,
- int sigAlgoType)
+static int MakeSignature(CertSignCtx* certSignCtx, const byte* buf, int sz,
+ byte* sig, int sigSz, RsaKey* rsaKey, ecc_key* eccKey,
+ ed25519_key* ed25519Key, ed448_key* ed448Key, WC_RNG* rng, int sigAlgoType,
+ void* heap)
{
- int encSigSz, digestSz, typeH = 0, ret = 0;
- byte digest[SHA256_DIGEST_SIZE]; /* max size */
-#ifdef WOLFSSL_SMALL_STACK
- byte* encSig;
-#else
- byte encSig[MAX_ENCODED_DIG_SZ + MAX_ALGO_SZ + MAX_SEQ_SZ];
-#endif
+ int digestSz = 0, typeH = 0, ret = 0;
- (void)digest;
(void)digestSz;
- (void)encSig;
- (void)encSigSz;
(void)typeH;
-
- (void)buffer;
+ (void)buf;
(void)sz;
(void)sig;
(void)sigSz;
(void)rsaKey;
(void)eccKey;
+ (void)ed25519Key;
+ (void)ed448Key;
(void)rng;
+ (void)heap;
- switch (sigAlgoType) {
- #ifndef NO_MD5
- case CTC_MD5wRSA:
- if ((ret = wc_Md5Hash(buffer, sz, digest)) == 0) {
- typeH = MD5h;
- digestSz = MD5_DIGEST_SIZE;
+ switch (certSignCtx->state) {
+ case CERTSIGN_STATE_BEGIN:
+ case CERTSIGN_STATE_DIGEST:
+
+ certSignCtx->state = CERTSIGN_STATE_DIGEST;
+ certSignCtx->digest = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (certSignCtx->digest == NULL) {
+ ret = MEMORY_E; goto exit_ms;
}
- break;
- #endif
- #ifndef NO_SHA
- case CTC_SHAwRSA:
- case CTC_SHAwECDSA:
- if ((ret = wc_ShaHash(buffer, sz, digest)) == 0) {
- typeH = SHAh;
- digestSz = SHA_DIGEST_SIZE;
+
+ ret = HashForSignature(buf, sz, sigAlgoType, certSignCtx->digest,
+ &typeH, &digestSz, 0);
+ /* set next state, since WC_PENDING_E rentry for these are not "call again" */
+ certSignCtx->state = CERTSIGN_STATE_ENCODE;
+ if (ret != 0) {
+ goto exit_ms;
}
- break;
- #endif
- #ifndef NO_SHA256
- case CTC_SHA256wRSA:
- case CTC_SHA256wECDSA:
- if ((ret = wc_Sha256Hash(buffer, sz, digest)) == 0) {
- typeH = SHA256h;
- digestSz = SHA256_DIGEST_SIZE;
+ FALL_THROUGH;
+
+ case CERTSIGN_STATE_ENCODE:
+ #ifndef NO_RSA
+ if (rsaKey) {
+ certSignCtx->encSig = (byte*)XMALLOC(MAX_DER_DIGEST_SZ, heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (certSignCtx->encSig == NULL) {
+ ret = MEMORY_E; goto exit_ms;
+ }
+
+ /* signature */
+ certSignCtx->encSigSz = wc_EncodeSignature(certSignCtx->encSig,
+ certSignCtx->digest, digestSz, typeH);
}
+ #endif /* !NO_RSA */
+ FALL_THROUGH;
+
+ case CERTSIGN_STATE_DO:
+ certSignCtx->state = CERTSIGN_STATE_DO;
+ ret = ALGO_ID_E; /* default to error */
+
+ #ifndef NO_RSA
+ if (rsaKey) {
+ /* signature */
+ ret = wc_RsaSSL_Sign(certSignCtx->encSig, certSignCtx->encSigSz,
+ sig, sigSz, rsaKey, rng);
+ }
+ #endif /* !NO_RSA */
+
+ #ifdef HAVE_ECC
+ if (!rsaKey && eccKey) {
+ word32 outSz = sigSz;
+
+ ret = wc_ecc_sign_hash(certSignCtx->digest, digestSz,
+ sig, &outSz, rng, eccKey);
+ if (ret == 0)
+ ret = outSz;
+ }
+ #endif /* HAVE_ECC */
+
+ #ifdef HAVE_ED25519
+ if (!rsaKey && !eccKey && ed25519Key) {
+ word32 outSz = sigSz;
+
+ ret = wc_ed25519_sign_msg(buf, sz, sig, &outSz, ed25519Key);
+ if (ret == 0)
+ ret = outSz;
+ }
+ #endif /* HAVE_ECC */
+
+ #ifdef HAVE_ED448
+ if (!rsaKey && !eccKey && !ed25519Key && ed448Key) {
+ word32 outSz = sigSz;
+
+ ret = wc_ed448_sign_msg(buf, sz, sig, &outSz, ed448Key, NULL, 0);
+ if (ret == 0)
+ ret = outSz;
+ }
+ #endif /* HAVE_ECC */
break;
- #endif
- default:
- WOLFSSL_MSG("MakeSignautre called with unsupported type");
- ret = ALGO_ID_E;
}
-
- if (ret != 0)
+
+exit_ms:
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+ if (ret == WC_PENDING_E) {
return ret;
-
-#ifdef WOLFSSL_SMALL_STACK
- encSig = (byte*)XMALLOC(MAX_ENCODED_DIG_SZ + MAX_ALGO_SZ + MAX_SEQ_SZ,
- NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (encSig == NULL)
- return MEMORY_E;
+ }
#endif
-
- ret = ALGO_ID_E;
-
+
#ifndef NO_RSA
if (rsaKey) {
- /* signature */
- encSigSz = wc_EncodeSignature(encSig, digest, digestSz, typeH);
- ret = wc_RsaSSL_Sign(encSig, encSigSz, sig, sigSz, rsaKey, rng);
+ XFREE(certSignCtx->encSig, heap, DYNAMIC_TYPE_TMP_BUFFER);
}
-#endif
-
-#ifdef HAVE_ECC
- if (!rsaKey && eccKey) {
- word32 outSz = sigSz;
- ret = wc_ecc_sign_hash(digest, digestSz, sig, &outSz, rng, eccKey);
+#endif /* !NO_RSA */
- if (ret == 0)
- ret = outSz;
- }
-#endif
+ XFREE(certSignCtx->digest, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ certSignCtx->digest = NULL;
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encSig, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+ /* reset state */
+ certSignCtx->state = CERTSIGN_STATE_BEGIN;
return ret;
}
@@ -5842,27 +13336,27 @@ static int MakeSignature(const byte* buffer, int sz, byte* sig, int sigSz,
/* add signature to end of buffer, size of buffer assumed checked, return
new length */
-static int AddSignature(byte* buffer, int bodySz, const byte* sig, int sigSz,
+static int AddSignature(byte* buf, int bodySz, const byte* sig, int sigSz,
int sigAlgoType)
{
byte seq[MAX_SEQ_SZ];
int idx = bodySz, seqSz;
/* algo */
- idx += SetAlgoID(sigAlgoType, buffer + idx, sigType, 0);
+ idx += SetAlgoID(sigAlgoType, buf ? buf + idx : NULL, oidSigType, 0);
/* bit string */
- buffer[idx++] = ASN_BIT_STRING;
- /* length */
- idx += SetLength(sigSz + 1, buffer + idx);
- buffer[idx++] = 0; /* trailing 0 */
+ idx += SetBitString(sigSz, 0, buf ? buf + idx : NULL);
/* signature */
- XMEMCPY(buffer + idx, sig, sigSz);
+ if (buf)
+ XMEMCPY(buf + idx, sig, sigSz);
idx += sigSz;
/* make room for overall header */
seqSz = SetSequence(idx, seq);
- XMEMMOVE(buffer + seqSz, buffer, idx);
- XMEMCPY(buffer, seq, seqSz);
+ if (buf) {
+ XMEMMOVE(buf + seqSz, buf, idx);
+ XMEMCPY(buf, seq, seqSz);
+ }
return idx + seqSz;
}
@@ -5870,8 +13364,9 @@ static int AddSignature(byte* buffer, int bodySz, const byte* sig, int sigSz,
/* Make an x509 Certificate v3 any key type from cert input, write to buffer */
static int MakeAnyCert(Cert* cert, byte* derBuffer, word32 derSz,
- RsaKey* rsaKey, ecc_key* eccKey, RNG* rng,
- const byte* ntruKey, word16 ntruSz)
+ RsaKey* rsaKey, ecc_key* eccKey, WC_RNG* rng,
+ const byte* ntruKey, word16 ntruSz,
+ ed25519_key* ed25519Key, ed448_key* ed448Key)
{
int ret;
#ifdef WOLFSSL_SMALL_STACK
@@ -5880,16 +13375,21 @@ static int MakeAnyCert(Cert* cert, byte* derBuffer, word32 derSz,
DerCert der[1];
#endif
- cert->keyType = eccKey ? ECC_KEY : (rsaKey ? RSA_KEY : NTRU_KEY);
+ if (derBuffer == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ cert->keyType = eccKey ? ECC_KEY : (rsaKey ? RSA_KEY :
+ (ed25519Key ? ED25519_KEY : (ed448Key ? ED448_KEY : NTRU_KEY)));
#ifdef WOLFSSL_SMALL_STACK
- der = (DerCert*)XMALLOC(sizeof(DerCert), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ der = (DerCert*)XMALLOC(sizeof(DerCert), cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
if (der == NULL)
return MEMORY_E;
#endif
- ret = EncodeCert(cert, der, rsaKey, eccKey, rng, ntruKey, ntruSz);
-
+ ret = EncodeCert(cert, der, rsaKey, eccKey, rng, ntruKey, ntruSz,
+ ed25519Key, ed448Key);
if (ret == 0) {
if (der->total + MAX_SEQ_SZ * 2 > (int)derSz)
ret = BUFFER_E;
@@ -5898,7 +13398,7 @@ static int MakeAnyCert(Cert* cert, byte* derBuffer, word32 derSz,
}
#ifdef WOLFSSL_SMALL_STACK
- XFREE(der, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(der, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return ret;
@@ -5906,19 +13406,41 @@ static int MakeAnyCert(Cert* cert, byte* derBuffer, word32 derSz,
/* Make an x509 Certificate v3 RSA or ECC from cert input, write to buffer */
+int wc_MakeCert_ex(Cert* cert, byte* derBuffer, word32 derSz, int keyType,
+ void* key, WC_RNG* rng)
+{
+ RsaKey* rsaKey = NULL;
+ ecc_key* eccKey = NULL;
+ ed25519_key* ed25519Key = NULL;
+ ed448_key* ed448Key = NULL;
+
+ if (keyType == RSA_TYPE)
+ rsaKey = (RsaKey*)key;
+ else if (keyType == ECC_TYPE)
+ eccKey = (ecc_key*)key;
+ else if (keyType == ED25519_TYPE)
+ ed25519Key = (ed25519_key*)key;
+ else if (keyType == ED448_TYPE)
+ ed448Key = (ed448_key*)key;
+
+ return MakeAnyCert(cert, derBuffer, derSz, rsaKey, eccKey, rng, NULL, 0,
+ ed25519Key, ed448Key);
+}
+/* Make an x509 Certificate v3 RSA or ECC from cert input, write to buffer */
int wc_MakeCert(Cert* cert, byte* derBuffer, word32 derSz, RsaKey* rsaKey,
- ecc_key* eccKey, RNG* rng)
+ ecc_key* eccKey, WC_RNG* rng)
{
- return MakeAnyCert(cert, derBuffer, derSz, rsaKey, eccKey, rng, NULL, 0);
+ return MakeAnyCert(cert, derBuffer, derSz, rsaKey, eccKey, rng, NULL, 0,
+ NULL, NULL);
}
#ifdef HAVE_NTRU
int wc_MakeNtruCert(Cert* cert, byte* derBuffer, word32 derSz,
- const byte* ntruKey, word16 keySz, RNG* rng)
+ const byte* ntruKey, word16 keySz, WC_RNG* rng)
{
- return MakeAnyCert(cert, derBuffer, derSz, NULL, NULL, rng, ntruKey, keySz);
+ return MakeAnyCert(cert, derBuffer, derSz, NULL, NULL, rng, ntruKey, keySz, NULL);
}
#endif /* HAVE_NTRU */
@@ -5926,12 +13448,13 @@ int wc_MakeNtruCert(Cert* cert, byte* derBuffer, word32 derSz,
#ifdef WOLFSSL_CERT_REQ
-static int SetReqAttrib(byte* output, char* pw, int extSz)
+static int SetReqAttrib(byte* output, char* pw, int pwPrintableString,
+ int extSz)
{
- static const byte cpOid[] =
+ const byte cpOid[] =
{ ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01,
0x09, 0x07 };
- static const byte erOid[] =
+ const byte erOid[] =
{ ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01,
0x09, 0x0e };
@@ -5955,7 +13478,11 @@ static int SetReqAttrib(byte* output, char* pw, int extSz)
if (pw && pw[0]) {
pwSz = (int)XSTRLEN(pw);
- cpStrSz = SetUTF8String(pwSz, cpStr);
+ if (pwPrintableString) {
+ cpStrSz = SetPrintableString(pwSz, cpStr);
+ } else {
+ cpStrSz = SetUTF8String(pwSz, cpStr);
+ }
cpSetSz = SetSet(cpStrSz + pwSz, cpSet);
cpSeqSz = SetSequence(sizeof(cpOid) + cpSetSz + cpStrSz + pwSz, cpSeq);
cpSz = cpSeqSz + sizeof(cpOid) + cpSetSz + cpStrSz + pwSz;
@@ -5998,10 +13525,21 @@ static int SetReqAttrib(byte* output, char* pw, int extSz)
/* encode info from cert into DER encoded format */
-static int EncodeCertReq(Cert* cert, DerCert* der,
- RsaKey* rsaKey, ecc_key* eccKey)
+static int EncodeCertReq(Cert* cert, DerCert* der, RsaKey* rsaKey,
+ ecc_key* eccKey, ed25519_key* ed25519Key,
+ ed448_key* ed448Key)
{
(void)eccKey;
+ (void)ed25519Key;
+ (void)ed448Key;
+
+ if (cert == NULL || der == NULL)
+ return BAD_FUNC_ARG;
+
+ if (rsaKey == NULL && eccKey == NULL && ed25519Key == NULL &&
+ ed448Key == NULL) {
+ return PUBLIC_KEY_E;
+ }
/* init */
XMEMSET(der, 0, sizeof(DerCert));
@@ -6010,51 +13548,163 @@ static int EncodeCertReq(Cert* cert, DerCert* der,
der->versionSz = SetMyVersion(cert->version, der->version, FALSE);
/* subject name */
- der->subjectSz = SetName(der->subject, &cert->subject);
- if (der->subjectSz == 0)
+ der->subjectSz = SetName(der->subject, sizeof(der->subject), &cert->subject);
+ if (der->subjectSz <= 0)
return SUBJECT_E;
/* public key */
+#ifndef NO_RSA
if (cert->keyType == RSA_KEY) {
if (rsaKey == NULL)
return PUBLIC_KEY_E;
- der->publicKeySz = SetRsaPublicKey(der->publicKey, rsaKey);
- if (der->publicKeySz <= 0)
- return PUBLIC_KEY_E;
+ der->publicKeySz = SetRsaPublicKey(der->publicKey, rsaKey,
+ sizeof(der->publicKey), 1);
}
+#endif
#ifdef HAVE_ECC
if (cert->keyType == ECC_KEY) {
- if (eccKey == NULL)
+ der->publicKeySz = SetEccPublicKey(der->publicKey, eccKey, 1);
+ }
+#endif
+
+#ifdef HAVE_ED25519
+ if (cert->keyType == ED25519_KEY) {
+ if (ed25519Key == NULL)
return PUBLIC_KEY_E;
- der->publicKeySz = SetEccPublicKey(der->publicKey, eccKey);
- if (der->publicKeySz <= 0)
+ der->publicKeySz = SetEd25519PublicKey(der->publicKey, ed25519Key, 1);
+ }
+#endif
+
+#ifdef HAVE_ED448
+ if (cert->keyType == ED448_KEY) {
+ if (ed448Key == NULL)
return PUBLIC_KEY_E;
+ der->publicKeySz = SetEd448PublicKey(der->publicKey, ed448Key, 1);
}
-#endif /* HAVE_ECC */
+#endif
+ if (der->publicKeySz <= 0)
+ return PUBLIC_KEY_E;
+
+ /* set the extensions */
+ der->extensionsSz = 0;
/* CA */
if (cert->isCA) {
- der->caSz = SetCa(der->ca);
- if (der->caSz == 0)
+ der->caSz = SetCa(der->ca, sizeof(der->ca));
+ if (der->caSz <= 0)
return CA_TRUE_E;
+
+ der->extensionsSz += der->caSz;
}
else
der->caSz = 0;
- /* extensions, just CA now */
- if (cert->isCA) {
- der->extensionsSz = SetExtensions(der->extensions,
- der->ca, der->caSz, FALSE);
- if (der->extensionsSz == 0)
- return EXTENSIONS_E;
+#ifdef WOLFSSL_CERT_EXT
+ /* SKID */
+ if (cert->skidSz) {
+ /* check the provided SKID size */
+ if (cert->skidSz > (int)min(CTC_MAX_SKID_SIZE, sizeof(der->skid)))
+ return SKID_E;
+
+ der->skidSz = SetSKID(der->skid, sizeof(der->skid),
+ cert->skid, cert->skidSz);
+ if (der->skidSz <= 0)
+ return SKID_E;
+
+ der->extensionsSz += der->skidSz;
+ }
+ else
+ der->skidSz = 0;
+
+ /* Key Usage */
+ if (cert->keyUsage != 0){
+ der->keyUsageSz = SetKeyUsage(der->keyUsage, sizeof(der->keyUsage),
+ cert->keyUsage);
+ if (der->keyUsageSz <= 0)
+ return KEYUSAGE_E;
+
+ der->extensionsSz += der->keyUsageSz;
}
else
- der->extensionsSz = 0;
+ der->keyUsageSz = 0;
- der->attribSz = SetReqAttrib(der->attrib,
- cert->challengePw, der->extensionsSz);
- if (der->attribSz == 0)
+ /* Extended Key Usage */
+ if (cert->extKeyUsage != 0){
+ der->extKeyUsageSz = SetExtKeyUsage(cert, der->extKeyUsage,
+ sizeof(der->extKeyUsage), cert->extKeyUsage);
+ if (der->extKeyUsageSz <= 0)
+ return EXTKEYUSAGE_E;
+
+ der->extensionsSz += der->extKeyUsageSz;
+ }
+ else
+ der->extKeyUsageSz = 0;
+
+#endif /* WOLFSSL_CERT_EXT */
+
+ /* put extensions */
+ if (der->extensionsSz > 0) {
+ int ret;
+
+ /* put the start of sequence (ID, Size) */
+ der->extensionsSz = SetSequence(der->extensionsSz, der->extensions);
+ if (der->extensionsSz <= 0)
+ return EXTENSIONS_E;
+
+ /* put CA */
+ if (der->caSz) {
+ ret = SetExtensions(der->extensions, sizeof(der->extensions),
+ &der->extensionsSz,
+ der->ca, der->caSz);
+ if (ret <= 0)
+ return EXTENSIONS_E;
+ }
+
+#ifdef WOLFSSL_CERT_EXT
+ /* put SKID */
+ if (der->skidSz) {
+ ret = SetExtensions(der->extensions, sizeof(der->extensions),
+ &der->extensionsSz,
+ der->skid, der->skidSz);
+ if (ret <= 0)
+ return EXTENSIONS_E;
+ }
+
+ /* put AKID */
+ if (der->akidSz) {
+ ret = SetExtensions(der->extensions, sizeof(der->extensions),
+ &der->extensionsSz,
+ der->akid, der->akidSz);
+ if (ret <= 0)
+ return EXTENSIONS_E;
+ }
+
+ /* put KeyUsage */
+ if (der->keyUsageSz) {
+ ret = SetExtensions(der->extensions, sizeof(der->extensions),
+ &der->extensionsSz,
+ der->keyUsage, der->keyUsageSz);
+ if (ret <= 0)
+ return EXTENSIONS_E;
+ }
+
+ /* put ExtendedKeyUsage */
+ if (der->extKeyUsageSz) {
+ ret = SetExtensions(der->extensions, sizeof(der->extensions),
+ &der->extensionsSz,
+ der->extKeyUsage, der->extKeyUsageSz);
+ if (ret <= 0)
+ return EXTENSIONS_E;
+ }
+
+#endif /* WOLFSSL_CERT_EXT */
+ }
+
+ der->attribSz = SetReqAttrib(der->attrib, cert->challengePw,
+ cert->challengePwPrintableString,
+ der->extensionsSz);
+ if (der->attribSz <= 0)
return REQ_ATTRIBUTE_E;
der->total = der->versionSz + der->subjectSz + der->publicKeySz +
@@ -6065,28 +13715,33 @@ static int EncodeCertReq(Cert* cert, DerCert* der,
/* write DER encoded cert req to buffer, size already checked */
-static int WriteCertReqBody(DerCert* der, byte* buffer)
+static int WriteCertReqBody(DerCert* der, byte* buf)
{
int idx;
/* signed part header */
- idx = SetSequence(der->total, buffer);
+ idx = SetSequence(der->total, buf);
/* version */
- XMEMCPY(buffer + idx, der->version, der->versionSz);
+ if (buf)
+ XMEMCPY(buf + idx, der->version, der->versionSz);
idx += der->versionSz;
/* subject */
- XMEMCPY(buffer + idx, der->subject, der->subjectSz);
+ if (buf)
+ XMEMCPY(buf + idx, der->subject, der->subjectSz);
idx += der->subjectSz;
/* public key */
- XMEMCPY(buffer + idx, der->publicKey, der->publicKeySz);
+ if (buf)
+ XMEMCPY(buf + idx, der->publicKey, der->publicKeySz);
idx += der->publicKeySz;
/* attributes */
- XMEMCPY(buffer + idx, der->attrib, der->attribSz);
+ if (buf)
+ XMEMCPY(buf + idx, der->attrib, der->attribSz);
idx += der->attribSz;
/* extensions */
if (der->extensionsSz) {
- XMEMCPY(buffer + idx, der->extensions, min(der->extensionsSz,
- sizeof(der->extensions)));
+ if (buf)
+ XMEMCPY(buf + idx, der->extensions, min(der->extensionsSz,
+ (int)sizeof(der->extensions)));
idx += der->extensionsSz;
}
@@ -6094,8 +13749,9 @@ static int WriteCertReqBody(DerCert* der, byte* buffer)
}
-int wc_MakeCertReq(Cert* cert, byte* derBuffer, word32 derSz,
- RsaKey* rsaKey, ecc_key* eccKey)
+static int MakeCertReq(Cert* cert, byte* derBuffer, word32 derSz,
+ RsaKey* rsaKey, ecc_key* eccKey, ed25519_key* ed25519Key,
+ ed448_key* ed448Key)
{
int ret;
#ifdef WOLFSSL_SMALL_STACK
@@ -6104,15 +13760,17 @@ int wc_MakeCertReq(Cert* cert, byte* derBuffer, word32 derSz,
DerCert der[1];
#endif
- cert->keyType = eccKey ? ECC_KEY : RSA_KEY;
+ cert->keyType = eccKey ? ECC_KEY : (ed25519Key ? ED25519_KEY :
+ (ed448Key ? ED448_KEY: RSA_KEY));
#ifdef WOLFSSL_SMALL_STACK
- der = (DerCert*)XMALLOC(sizeof(DerCert), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ der = (DerCert*)XMALLOC(sizeof(DerCert), cert->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
if (der == NULL)
return MEMORY_E;
#endif
- ret = EncodeCertReq(cert, der, rsaKey, eccKey);
+ ret = EncodeCertReq(cert, der, rsaKey, eccKey, ed25519Key, ed448Key);
if (ret == 0) {
if (der->total + MAX_SEQ_SZ * 2 > (int)derSz)
@@ -6122,100 +13780,669 @@ int wc_MakeCertReq(Cert* cert, byte* derBuffer, word32 derSz,
}
#ifdef WOLFSSL_SMALL_STACK
- XFREE(der, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(der, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return ret;
}
+int wc_MakeCertReq_ex(Cert* cert, byte* derBuffer, word32 derSz, int keyType,
+ void* key)
+{
+ RsaKey* rsaKey = NULL;
+ ecc_key* eccKey = NULL;
+ ed25519_key* ed25519Key = NULL;
+ ed448_key* ed448Key = NULL;
+
+ if (keyType == RSA_TYPE)
+ rsaKey = (RsaKey*)key;
+ else if (keyType == ECC_TYPE)
+ eccKey = (ecc_key*)key;
+ else if (keyType == ED25519_TYPE)
+ ed25519Key = (ed25519_key*)key;
+ else if (keyType == ED448_TYPE)
+ ed448Key = (ed448_key*)key;
+
+ return MakeCertReq(cert, derBuffer, derSz, rsaKey, eccKey, ed25519Key,
+ ed448Key);
+}
+
+int wc_MakeCertReq(Cert* cert, byte* derBuffer, word32 derSz,
+ RsaKey* rsaKey, ecc_key* eccKey)
+{
+ return MakeCertReq(cert, derBuffer, derSz, rsaKey, eccKey, NULL, NULL);
+}
#endif /* WOLFSSL_CERT_REQ */
-int wc_SignCert(int requestSz, int sType, byte* buffer, word32 buffSz,
- RsaKey* rsaKey, ecc_key* eccKey, RNG* rng)
+static int SignCert(int requestSz, int sType, byte* buf, word32 buffSz,
+ RsaKey* rsaKey, ecc_key* eccKey, ed25519_key* ed25519Key,
+ ed448_key* ed448Key, WC_RNG* rng)
{
- int sigSz;
-#ifdef WOLFSSL_SMALL_STACK
- byte* sig;
+ int sigSz = 0;
+ void* heap = NULL;
+ CertSignCtx* certSignCtx;
+#ifndef WOLFSSL_ASYNC_CRYPT
+ CertSignCtx certSignCtx_lcl;
+
+ certSignCtx = &certSignCtx_lcl;
+ XMEMSET(certSignCtx, 0, sizeof(CertSignCtx));
#else
- byte sig[MAX_ENCODED_SIG_SZ];
+ certSignCtx = NULL;
#endif
if (requestSz < 0)
return requestSz;
-#ifdef WOLFSSL_SMALL_STACK
- sig = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (sig == NULL)
- return MEMORY_E;
+ /* locate ctx */
+ if (rsaKey) {
+ #ifndef NO_RSA
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ certSignCtx = &rsaKey->certSignCtx;
+ #endif
+ heap = rsaKey->heap;
+ #else
+ return NOT_COMPILED_IN;
+ #endif /* NO_RSA */
+ }
+ else if (eccKey) {
+ #ifdef HAVE_ECC
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ certSignCtx = &eccKey->certSignCtx;
+ #endif
+ heap = eccKey->heap;
+ #else
+ return NOT_COMPILED_IN;
+ #endif /* HAVE_ECC */
+ }
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+ if (certSignCtx == NULL) {
+ return BAD_FUNC_ARG;
+ }
#endif
- sigSz = MakeSignature(buffer, requestSz, sig, MAX_ENCODED_SIG_SZ, rsaKey,
- eccKey, rng, sType);
+ if (certSignCtx->sig == NULL) {
+ certSignCtx->sig = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ, heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (certSignCtx->sig == NULL)
+ return MEMORY_E;
+ }
+
+ sigSz = MakeSignature(certSignCtx, buf, requestSz, certSignCtx->sig,
+ MAX_ENCODED_SIG_SZ, rsaKey, eccKey, ed25519Key, ed448Key, rng, sType,
+ heap);
+#ifdef WOLFSSL_ASYNC_CRYPT
+ if (sigSz == WC_PENDING_E) {
+ /* Not free'ing certSignCtx->sig here because it could still be in use
+ * with async operations. */
+ return sigSz;
+ }
+#endif
if (sigSz >= 0) {
if (requestSz + MAX_SEQ_SZ * 2 + sigSz > (int)buffSz)
sigSz = BUFFER_E;
else
- sigSz = AddSignature(buffer, requestSz, sig, sigSz, sType);
+ sigSz = AddSignature(buf, requestSz, certSignCtx->sig, sigSz,
+ sType);
}
-
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(sig, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+
+ XFREE(certSignCtx->sig, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ certSignCtx->sig = NULL;
return sigSz;
}
+int wc_SignCert_ex(int requestSz, int sType, byte* buf, word32 buffSz,
+ int keyType, void* key, WC_RNG* rng)
+{
+ RsaKey* rsaKey = NULL;
+ ecc_key* eccKey = NULL;
+ ed25519_key* ed25519Key = NULL;
+ ed448_key* ed448Key = NULL;
+
+ if (keyType == RSA_TYPE)
+ rsaKey = (RsaKey*)key;
+ else if (keyType == ECC_TYPE)
+ eccKey = (ecc_key*)key;
+ else if (keyType == ED25519_TYPE)
+ ed25519Key = (ed25519_key*)key;
+ else if (keyType == ED448_TYPE)
+ ed448Key = (ed448_key*)key;
+
+ return SignCert(requestSz, sType, buf, buffSz, rsaKey, eccKey, ed25519Key,
+ ed448Key, rng);
+}
+
+int wc_SignCert(int requestSz, int sType, byte* buf, word32 buffSz,
+ RsaKey* rsaKey, ecc_key* eccKey, WC_RNG* rng)
+{
+ return SignCert(requestSz, sType, buf, buffSz, rsaKey, eccKey, NULL, NULL,
+ rng);
+}
-int wc_MakeSelfCert(Cert* cert, byte* buffer, word32 buffSz, RsaKey* key, RNG* rng)
+int wc_MakeSelfCert(Cert* cert, byte* buf, word32 buffSz,
+ RsaKey* key, WC_RNG* rng)
{
- int ret = wc_MakeCert(cert, buffer, buffSz, key, NULL, rng);
+ int ret;
+ ret = wc_MakeCert(cert, buf, buffSz, key, NULL, rng);
if (ret < 0)
return ret;
- return wc_SignCert(cert->bodySz, cert->sigType, buffer, buffSz, key, NULL,rng);
+ return wc_SignCert(cert->bodySz, cert->sigType,
+ buf, buffSz, key, NULL, rng);
}
-#ifdef WOLFSSL_ALT_NAMES
+#ifdef WOLFSSL_CERT_EXT
-/* Set Alt Names from der cert, return 0 on success */
-static int SetAltNamesFromCert(Cert* cert, const byte* der, int derSz)
+/* Get raw subject from cert, which may contain OIDs not parsed by Decode.
+ The raw subject pointer will only be valid while "cert" is valid. */
+int wc_GetSubjectRaw(byte **subjectRaw, Cert *cert)
{
- int ret;
-#ifdef WOLFSSL_SMALL_STACK
- DecodedCert* decoded;
+ int rc = BAD_FUNC_ARG;
+ if ((subjectRaw != NULL) && (cert != NULL)) {
+ *subjectRaw = cert->sbjRaw;
+ rc = 0;
+ }
+ return rc;
+}
+
+/* Set KID from public key */
+static int SetKeyIdFromPublicKey(Cert *cert, RsaKey *rsakey, ecc_key *eckey,
+ byte *ntruKey, word16 ntruKeySz,
+ ed25519_key* ed25519Key, ed448_key* ed448Key,
+ int kid_type)
+{
+ byte *buf;
+ int bufferSz, ret;
+
+ if (cert == NULL ||
+ (rsakey == NULL && eckey == NULL && ntruKey == NULL &&
+ ed25519Key == NULL && ed448Key == NULL) ||
+ (kid_type != SKID_TYPE && kid_type != AKID_TYPE))
+ return BAD_FUNC_ARG;
+
+ buf = (byte *)XMALLOC(MAX_PUBLIC_KEY_SZ, cert->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (buf == NULL)
+ return MEMORY_E;
+
+ /* Public Key */
+ bufferSz = -1;
+#ifndef NO_RSA
+ /* RSA public key */
+ if (rsakey != NULL)
+ bufferSz = SetRsaPublicKey(buf, rsakey, MAX_PUBLIC_KEY_SZ, 0);
+#endif
+#ifdef HAVE_ECC
+ /* ECC public key */
+ if (eckey != NULL)
+ bufferSz = SetEccPublicKey(buf, eckey, 0);
+#endif
+#ifdef HAVE_NTRU
+ /* NTRU public key */
+ if (ntruKey != NULL) {
+ bufferSz = MAX_PUBLIC_KEY_SZ;
+ ret = ntru_crypto_ntru_encrypt_publicKey2SubjectPublicKeyInfo(
+ ntruKeySz, ntruKey, (word16 *)(&bufferSz), buf);
+ if (ret != NTRU_OK)
+ bufferSz = -1;
+ }
#else
- DecodedCert decoded[1];
+ (void)ntruKeySz;
+#endif
+#ifdef HAVE_ED25519
+ /* ED25519 public key */
+ if (ed25519Key != NULL)
+ bufferSz = SetEd25519PublicKey(buf, ed25519Key, 0);
+#endif
+#ifdef HAVE_ED448
+ /* ED448 public key */
+ if (ed448Key != NULL)
+ bufferSz = SetEd448PublicKey(buffer, ed448Key, 0);
#endif
- if (derSz < 0)
+ if (bufferSz <= 0) {
+ XFREE(buf, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return PUBLIC_KEY_E;
+ }
+
+ /* Compute SKID by hashing public key */
+ if (kid_type == SKID_TYPE) {
+ ret = CalcHashId(buf, bufferSz, cert->skid);
+ cert->skidSz = KEYID_SIZE;
+ }
+ else if (kid_type == AKID_TYPE) {
+ ret = CalcHashId(buf, bufferSz, cert->akid);
+ cert->akidSz = KEYID_SIZE;
+ }
+ else
+ ret = BAD_FUNC_ARG;
+
+ XFREE(buf, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return ret;
+}
+
+int wc_SetSubjectKeyIdFromPublicKey_ex(Cert *cert, int keyType, void* key)
+{
+ RsaKey* rsaKey = NULL;
+ ecc_key* eccKey = NULL;
+ ed25519_key* ed25519Key = NULL;
+ ed448_key* ed448Key = NULL;
+
+ if (keyType == RSA_TYPE)
+ rsaKey = (RsaKey*)key;
+ else if (keyType == ECC_TYPE)
+ eccKey = (ecc_key*)key;
+ else if (keyType == ED25519_TYPE)
+ ed25519Key = (ed25519_key*)key;
+ else if (keyType == ED448_TYPE)
+ ed448Key = (ed448_key*)key;
+
+ return SetKeyIdFromPublicKey(cert, rsaKey, eccKey, NULL, 0, ed25519Key,
+ ed448Key, SKID_TYPE);
+}
+
+/* Set SKID from RSA or ECC public key */
+int wc_SetSubjectKeyIdFromPublicKey(Cert *cert, RsaKey *rsakey, ecc_key *eckey)
+{
+ return SetKeyIdFromPublicKey(cert, rsakey, eckey, NULL, 0, NULL, NULL,
+ SKID_TYPE);
+}
+
+#ifdef HAVE_NTRU
+/* Set SKID from NTRU public key */
+int wc_SetSubjectKeyIdFromNtruPublicKey(Cert *cert,
+ byte *ntruKey, word16 ntruKeySz)
+{
+ return SetKeyIdFromPublicKey(cert, NULL,NULL,ntruKey, ntruKeySz, NULL, NULL,
+ SKID_TYPE);
+}
+#endif
+
+int wc_SetAuthKeyIdFromPublicKey_ex(Cert *cert, int keyType, void* key)
+{
+ RsaKey* rsaKey = NULL;
+ ecc_key* eccKey = NULL;
+ ed25519_key* ed25519Key = NULL;
+ ed448_key* ed448Key = NULL;
+
+ if (keyType == RSA_TYPE)
+ rsaKey = (RsaKey*)key;
+ else if (keyType == ECC_TYPE)
+ eccKey = (ecc_key*)key;
+ else if (keyType == ED25519_TYPE)
+ ed25519Key = (ed25519_key*)key;
+ else if (keyType == ED448_TYPE)
+ ed448Key = (ed448_key*)key;
+
+ return SetKeyIdFromPublicKey(cert, rsaKey, eccKey, NULL, 0, ed25519Key,
+ ed448Key, AKID_TYPE);
+}
+
+/* Set SKID from RSA or ECC public key */
+int wc_SetAuthKeyIdFromPublicKey(Cert *cert, RsaKey *rsakey, ecc_key *eckey)
+{
+ return SetKeyIdFromPublicKey(cert, rsakey, eckey, NULL, 0, NULL, NULL,
+ AKID_TYPE);
+}
+
+
+#if !defined(NO_FILESYSTEM) && !defined(NO_ASN_CRYPT)
+
+/* Set SKID from public key file in PEM */
+int wc_SetSubjectKeyId(Cert *cert, const char* file)
+{
+ int ret, derSz;
+ byte* der;
+ word32 idx;
+ RsaKey *rsakey = NULL;
+ ecc_key *eckey = NULL;
+
+ if (cert == NULL || file == NULL)
+ return BAD_FUNC_ARG;
+
+ der = (byte*)XMALLOC(MAX_PUBLIC_KEY_SZ, cert->heap, DYNAMIC_TYPE_CERT);
+ if (der == NULL) {
+ WOLFSSL_MSG("wc_SetSubjectKeyId memory Problem");
+ return MEMORY_E;
+ }
+ derSz = MAX_PUBLIC_KEY_SZ;
+
+ XMEMSET(der, 0, derSz);
+ derSz = wc_PemPubKeyToDer(file, der, derSz);
+ if (derSz <= 0) {
+ XFREE(der, cert->heap, DYNAMIC_TYPE_CERT);
return derSz;
+ }
-#ifdef WOLFSSL_SMALL_STACK
- decoded = (DecodedCert*)XMALLOC(sizeof(DecodedCert), NULL,
- DYNAMIC_TYPE_TMP_BUFFER);
- if (decoded == NULL)
+ /* Load PubKey in internal structure */
+#ifndef NO_RSA
+ rsakey = (RsaKey*) XMALLOC(sizeof(RsaKey), cert->heap, DYNAMIC_TYPE_RSA);
+ if (rsakey == NULL) {
+ XFREE(der, cert->heap, DYNAMIC_TYPE_CERT);
+ return MEMORY_E;
+ }
+
+ if (wc_InitRsaKey(rsakey, cert->heap) != 0) {
+ WOLFSSL_MSG("wc_InitRsaKey failure");
+ XFREE(rsakey, cert->heap, DYNAMIC_TYPE_RSA);
+ XFREE(der, cert->heap, DYNAMIC_TYPE_CERT);
return MEMORY_E;
+ }
+
+ idx = 0;
+ ret = wc_RsaPublicKeyDecode(der, &idx, rsakey, derSz);
+ if (ret != 0)
#endif
-
- InitDecodedCert(decoded, (byte*)der, derSz, 0);
- ret = ParseCertRelative(decoded, CA_TYPE, NO_VERIFY, 0);
+ {
+#ifndef NO_RSA
+ WOLFSSL_MSG("wc_RsaPublicKeyDecode failed");
+ wc_FreeRsaKey(rsakey);
+ XFREE(rsakey, cert->heap, DYNAMIC_TYPE_RSA);
+ rsakey = NULL;
+#endif
+#ifdef HAVE_ECC
+ /* Check to load ecc public key */
+ eckey = (ecc_key*) XMALLOC(sizeof(ecc_key), cert->heap,
+ DYNAMIC_TYPE_ECC);
+ if (eckey == NULL) {
+ XFREE(der, cert->heap, DYNAMIC_TYPE_CERT);
+ return MEMORY_E;
+ }
- if (ret < 0) {
- WOLFSSL_MSG("ParseCertRelative error");
+ if (wc_ecc_init(eckey) != 0) {
+ WOLFSSL_MSG("wc_ecc_init failure");
+ wc_ecc_free(eckey);
+ XFREE(eckey, cert->heap, DYNAMIC_TYPE_ECC);
+ XFREE(der, cert->heap, DYNAMIC_TYPE_CERT);
+ return MEMORY_E;
+ }
+
+ idx = 0;
+ ret = wc_EccPublicKeyDecode(der, &idx, eckey, derSz);
+ if (ret != 0) {
+ WOLFSSL_MSG("wc_EccPublicKeyDecode failed");
+ XFREE(der, cert->heap, DYNAMIC_TYPE_CERT);
+ wc_ecc_free(eckey);
+ XFREE(eckey, cert->heap, DYNAMIC_TYPE_ECC);
+ return PUBLIC_KEY_E;
+ }
+#else
+ XFREE(der, cert->heap, DYNAMIC_TYPE_CERT);
+ return PUBLIC_KEY_E;
+#endif /* HAVE_ECC */
+ }
+
+ XFREE(der, cert->heap, DYNAMIC_TYPE_CERT);
+
+ ret = wc_SetSubjectKeyIdFromPublicKey(cert, rsakey, eckey);
+
+#ifndef NO_RSA
+ wc_FreeRsaKey(rsakey);
+ XFREE(rsakey, cert->heap, DYNAMIC_TYPE_RSA);
+#endif
+#ifdef HAVE_ECC
+ wc_ecc_free(eckey);
+ XFREE(eckey, cert->heap, DYNAMIC_TYPE_ECC);
+#endif
+ return ret;
+}
+
+#endif /* !NO_FILESYSTEM && !NO_ASN_CRYPT */
+
+static int SetAuthKeyIdFromDcert(Cert* cert, DecodedCert* decoded)
+{
+ int ret = 0;
+
+ /* Subject Key Id not found !! */
+ if (decoded->extSubjKeyIdSet == 0) {
+ ret = ASN_NO_SKID;
+ }
+
+ /* SKID invalid size */
+ else if (sizeof(cert->akid) < sizeof(decoded->extSubjKeyId)) {
+ ret = MEMORY_E;
+ }
+
+ else {
+ /* Put the SKID of CA to AKID of certificate */
+ XMEMCPY(cert->akid, decoded->extSubjKeyId, KEYID_SIZE);
+ cert->akidSz = KEYID_SIZE;
+ }
+
+ return ret;
+}
+
+/* Set AKID from certificate contains in buffer (DER encoded) */
+int wc_SetAuthKeyIdFromCert(Cert *cert, const byte *der, int derSz)
+{
+ int ret = 0;
+
+ if (cert == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+ else {
+ /* Check if decodedCert is cached */
+ if (cert->der != der) {
+ /* Allocate cache for the decoded cert */
+ ret = wc_SetCert_LoadDer(cert, der, derSz);
+ }
+
+ if (ret >= 0) {
+ ret = SetAuthKeyIdFromDcert(cert, (DecodedCert*)cert->decodedCert);
+#ifndef WOLFSSL_CERT_GEN_CACHE
+ wc_SetCert_Free(cert);
+#endif
+ }
+ }
+
+ return ret;
+}
+
+
+#ifndef NO_FILESYSTEM
+
+/* Set AKID from certificate file in PEM */
+int wc_SetAuthKeyId(Cert *cert, const char* file)
+{
+ int ret;
+ int derSz;
+ byte* der;
+
+ if (cert == NULL || file == NULL)
+ return BAD_FUNC_ARG;
+
+ der = (byte*)XMALLOC(EIGHTK_BUF, cert->heap, DYNAMIC_TYPE_CERT);
+ if (der == NULL) {
+ WOLFSSL_MSG("wc_SetAuthKeyId OOF Problem");
+ return MEMORY_E;
+ }
+
+ derSz = wc_PemCertToDer(file, der, EIGHTK_BUF);
+ if (derSz <= 0)
+ {
+ XFREE(der, cert->heap, DYNAMIC_TYPE_CERT);
+ return derSz;
}
- else if (decoded->extensions) {
- byte b;
+
+ ret = wc_SetAuthKeyIdFromCert(cert, der, derSz);
+ XFREE(der, cert->heap, DYNAMIC_TYPE_CERT);
+
+ return ret;
+}
+
+#endif /* !NO_FILESYSTEM */
+
+/* Set KeyUsage from human readable string */
+int wc_SetKeyUsage(Cert *cert, const char *value)
+{
+ int ret = 0;
+ char *token, *str, *ptr;
+ word32 len;
+
+ if (cert == NULL || value == NULL)
+ return BAD_FUNC_ARG;
+
+ cert->keyUsage = 0;
+
+ /* duplicate string (including terminator) */
+ len = (word32)XSTRLEN(value);
+ str = (char*)XMALLOC(len+1, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (str == NULL)
+ return MEMORY_E;
+ XMEMCPY(str, value, len+1);
+
+ /* parse value, and set corresponding Key Usage value */
+ if ((token = XSTRTOK(str, ",", &ptr)) == NULL) {
+ XFREE(str, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return KEYUSAGE_E;
+ }
+ while (token != NULL)
+ {
+ len = (word32)XSTRLEN(token);
+
+ if (!XSTRNCASECMP(token, "digitalSignature", len))
+ cert->keyUsage |= KEYUSE_DIGITAL_SIG;
+ else if (!XSTRNCASECMP(token, "nonRepudiation", len) ||
+ !XSTRNCASECMP(token, "contentCommitment", len))
+ cert->keyUsage |= KEYUSE_CONTENT_COMMIT;
+ else if (!XSTRNCASECMP(token, "keyEncipherment", len))
+ cert->keyUsage |= KEYUSE_KEY_ENCIPHER;
+ else if (!XSTRNCASECMP(token, "dataEncipherment", len))
+ cert->keyUsage |= KEYUSE_DATA_ENCIPHER;
+ else if (!XSTRNCASECMP(token, "keyAgreement", len))
+ cert->keyUsage |= KEYUSE_KEY_AGREE;
+ else if (!XSTRNCASECMP(token, "keyCertSign", len))
+ cert->keyUsage |= KEYUSE_KEY_CERT_SIGN;
+ else if (!XSTRNCASECMP(token, "cRLSign", len))
+ cert->keyUsage |= KEYUSE_CRL_SIGN;
+ else if (!XSTRNCASECMP(token, "encipherOnly", len))
+ cert->keyUsage |= KEYUSE_ENCIPHER_ONLY;
+ else if (!XSTRNCASECMP(token, "decipherOnly", len))
+ cert->keyUsage |= KEYUSE_DECIPHER_ONLY;
+ else {
+ ret = KEYUSAGE_E;
+ break;
+ }
+
+ token = XSTRTOK(NULL, ",", &ptr);
+ }
+
+ XFREE(str, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return ret;
+}
+
+/* Set ExtendedKeyUsage from human readable string */
+int wc_SetExtKeyUsage(Cert *cert, const char *value)
+{
+ int ret = 0;
+ char *token, *str, *ptr;
+ word32 len;
+
+ if (cert == NULL || value == NULL)
+ return BAD_FUNC_ARG;
+
+ cert->extKeyUsage = 0;
+
+ /* duplicate string (including terminator) */
+ len = (word32)XSTRLEN(value);
+ str = (char*)XMALLOC(len+1, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (str == NULL)
+ return MEMORY_E;
+ XMEMCPY(str, value, len+1);
+
+ /* parse value, and set corresponding Key Usage value */
+ if ((token = XSTRTOK(str, ",", &ptr)) == NULL) {
+ XFREE(str, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return EXTKEYUSAGE_E;
+ }
+
+ while (token != NULL)
+ {
+ len = (word32)XSTRLEN(token);
+
+ if (!XSTRNCASECMP(token, "any", len))
+ cert->extKeyUsage |= EXTKEYUSE_ANY;
+ else if (!XSTRNCASECMP(token, "serverAuth", len))
+ cert->extKeyUsage |= EXTKEYUSE_SERVER_AUTH;
+ else if (!XSTRNCASECMP(token, "clientAuth", len))
+ cert->extKeyUsage |= EXTKEYUSE_CLIENT_AUTH;
+ else if (!XSTRNCASECMP(token, "codeSigning", len))
+ cert->extKeyUsage |= EXTKEYUSE_CODESIGN;
+ else if (!XSTRNCASECMP(token, "emailProtection", len))
+ cert->extKeyUsage |= EXTKEYUSE_EMAILPROT;
+ else if (!XSTRNCASECMP(token, "timeStamping", len))
+ cert->extKeyUsage |= EXTKEYUSE_TIMESTAMP;
+ else if (!XSTRNCASECMP(token, "OCSPSigning", len))
+ cert->extKeyUsage |= EXTKEYUSE_OCSP_SIGN;
+ else {
+ ret = EXTKEYUSAGE_E;
+ break;
+ }
+
+ token = XSTRTOK(NULL, ",", &ptr);
+ }
+
+ XFREE(str, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return ret;
+}
+
+#ifdef WOLFSSL_EKU_OID
+/*
+ * cert structure to set EKU oid in
+ * oid the oid in byte representation
+ * sz size of oid buffer
+ * idx index of array to place oid
+ *
+ * returns 0 on success
+ */
+int wc_SetExtKeyUsageOID(Cert *cert, const char *in, word32 sz, byte idx,
+ void* heap)
+{
+ byte oid[MAX_OID_SZ];
+ word32 oidSz = MAX_OID_SZ;
+
+ if (idx >= CTC_MAX_EKU_NB || sz >= CTC_MAX_EKU_OID_SZ) {
+ WOLFSSL_MSG("Either idx or sz was too large");
+ return BAD_FUNC_ARG;
+ }
+
+ if (EncodePolicyOID(oid, &oidSz, in, heap) != 0) {
+ return BUFFER_E;
+ }
+
+ XMEMCPY(cert->extKeyUsageOID[idx], oid, oidSz);
+ cert->extKeyUsageOIDSz[idx] = oidSz;
+ cert->extKeyUsage |= EXTKEYUSE_USER;
+
+ return 0;
+}
+#endif /* WOLFSSL_EKU_OID */
+#endif /* WOLFSSL_CERT_EXT */
+
+
+#ifdef WOLFSSL_ALT_NAMES
+
+static int SetAltNamesFromDcert(Cert* cert, DecodedCert* decoded)
+{
+ int ret = 0;
+ byte tag;
+
+ if (decoded->extensions) {
int length;
word32 maxExtensionsIdx;
decoded->srcIdx = decoded->extensionsIdx;
- b = decoded->source[decoded->srcIdx++];
-
- if (b != ASN_EXTENSIONS) {
+ if (GetASNTag(decoded->source, &decoded->srcIdx, &tag, decoded->maxIdx)
+ != 0) {
+ return ASN_PARSE_E;
+ }
+
+ if (tag != ASN_EXTENSIONS) {
ret = ASN_PARSE_E;
}
else if (GetLength(decoded->source, &decoded->srcIdx, &length,
@@ -6244,7 +14471,7 @@ static int SetAltNamesFromCert(Cert* cert, const byte* der, int derSz)
decoded->srcIdx = startIdx;
if (GetAlgoId(decoded->source, &decoded->srcIdx, &oid,
- decoded->maxIdx) < 0) {
+ oidCertExtType, decoded->maxIdx) < 0) {
ret = ASN_PARSE_E;
break;
}
@@ -6267,17 +14494,13 @@ static int SetAltNamesFromCert(Cert* cert, const byte* der, int derSz)
}
}
- FreeDecodedCert(decoded);
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(decoded, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
- return ret < 0 ? ret : 0;
+ return ret;
}
+#ifndef NO_FILESYSTEM
-/* Set Dates from der cert, return 0 on success */
-static int SetDatesFromCert(Cert* cert, const byte* der, int derSz)
+/* Set Alt Names from der cert, return 0 on success */
+static int SetAltNamesFromCert(Cert* cert, const byte* der, int derSz)
{
int ret;
#ifdef WOLFSSL_SMALL_STACK
@@ -6286,28 +14509,45 @@ static int SetDatesFromCert(Cert* cert, const byte* der, int derSz)
DecodedCert decoded[1];
#endif
- WOLFSSL_ENTER("SetDatesFromCert");
if (derSz < 0)
return derSz;
-
+
#ifdef WOLFSSL_SMALL_STACK
- decoded = (DecodedCert*)XMALLOC(sizeof(DecodedCert), NULL,
+ decoded = (DecodedCert*)XMALLOC(sizeof(DecodedCert), cert->heap,
DYNAMIC_TYPE_TMP_BUFFER);
if (decoded == NULL)
return MEMORY_E;
#endif
- InitDecodedCert(decoded, (byte*)der, derSz, 0);
+ InitDecodedCert(decoded, der, derSz, NULL);
ret = ParseCertRelative(decoded, CA_TYPE, NO_VERIFY, 0);
if (ret < 0) {
WOLFSSL_MSG("ParseCertRelative error");
}
- else if (decoded->beforeDate == NULL || decoded->afterDate == NULL) {
+ else {
+ ret = SetAltNamesFromDcert(cert, decoded);
+ }
+
+ FreeDecodedCert(decoded);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(decoded, cert->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return ret < 0 ? ret : 0;
+}
+
+#endif
+
+static int SetDatesFromDcert(Cert* cert, DecodedCert* decoded)
+{
+ int ret = 0;
+
+ if (decoded->beforeDate == NULL || decoded->afterDate == NULL) {
WOLFSSL_MSG("Couldn't extract dates");
ret = -1;
}
- else if (decoded->beforeDateLen > MAX_DATE_SIZE ||
+ else if (decoded->beforeDateLen > MAX_DATE_SIZE ||
decoded->afterDateLen > MAX_DATE_SIZE) {
WOLFSSL_MSG("Bad date size");
ret = -1;
@@ -6320,23 +14560,108 @@ static int SetDatesFromCert(Cert* cert, const byte* der, int derSz)
cert->afterDateSz = decoded->afterDateLen;
}
- FreeDecodedCert(decoded);
-
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(decoded, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
- return ret < 0 ? ret : 0;
+ return ret;
}
+#endif /* WOLFSSL_ALT_NAMES */
-#endif /* WOLFSSL_ALT_NAMES && !NO_RSA */
+static void SetNameFromDcert(CertName* cn, DecodedCert* decoded)
+{
+ int sz;
+
+ if (decoded->subjectCN) {
+ sz = (decoded->subjectCNLen < CTC_NAME_SIZE) ? decoded->subjectCNLen
+ : CTC_NAME_SIZE - 1;
+ XSTRNCPY(cn->commonName, decoded->subjectCN, sz);
+ cn->commonName[sz] = '\0';
+ cn->commonNameEnc = decoded->subjectCNEnc;
+ }
+ if (decoded->subjectC) {
+ sz = (decoded->subjectCLen < CTC_NAME_SIZE) ? decoded->subjectCLen
+ : CTC_NAME_SIZE - 1;
+ XSTRNCPY(cn->country, decoded->subjectC, sz);
+ cn->country[sz] = '\0';
+ cn->countryEnc = decoded->subjectCEnc;
+ }
+ if (decoded->subjectST) {
+ sz = (decoded->subjectSTLen < CTC_NAME_SIZE) ? decoded->subjectSTLen
+ : CTC_NAME_SIZE - 1;
+ XSTRNCPY(cn->state, decoded->subjectST, sz);
+ cn->state[sz] = '\0';
+ cn->stateEnc = decoded->subjectSTEnc;
+ }
+ if (decoded->subjectL) {
+ sz = (decoded->subjectLLen < CTC_NAME_SIZE) ? decoded->subjectLLen
+ : CTC_NAME_SIZE - 1;
+ XSTRNCPY(cn->locality, decoded->subjectL, sz);
+ cn->locality[sz] = '\0';
+ cn->localityEnc = decoded->subjectLEnc;
+ }
+ if (decoded->subjectO) {
+ sz = (decoded->subjectOLen < CTC_NAME_SIZE) ? decoded->subjectOLen
+ : CTC_NAME_SIZE - 1;
+ XSTRNCPY(cn->org, decoded->subjectO, sz);
+ cn->org[sz] = '\0';
+ cn->orgEnc = decoded->subjectOEnc;
+ }
+ if (decoded->subjectOU) {
+ sz = (decoded->subjectOULen < CTC_NAME_SIZE) ? decoded->subjectOULen
+ : CTC_NAME_SIZE - 1;
+ XSTRNCPY(cn->unit, decoded->subjectOU, sz);
+ cn->unit[sz] = '\0';
+ cn->unitEnc = decoded->subjectOUEnc;
+ }
+ if (decoded->subjectSN) {
+ sz = (decoded->subjectSNLen < CTC_NAME_SIZE) ? decoded->subjectSNLen
+ : CTC_NAME_SIZE - 1;
+ XSTRNCPY(cn->sur, decoded->subjectSN, sz);
+ cn->sur[sz] = '\0';
+ cn->surEnc = decoded->subjectSNEnc;
+ }
+ if (decoded->subjectSND) {
+ sz = (decoded->subjectSNDLen < CTC_NAME_SIZE) ? decoded->subjectSNDLen
+ : CTC_NAME_SIZE - 1;
+ XSTRNCPY(cn->serialDev, decoded->subjectSND, sz);
+ cn->serialDev[sz] = '\0';
+ cn->serialDevEnc = decoded->subjectSNDEnc;
+ }
+#ifdef WOLFSSL_CERT_EXT
+ if (decoded->subjectBC) {
+ sz = (decoded->subjectBCLen < CTC_NAME_SIZE) ? decoded->subjectBCLen
+ : CTC_NAME_SIZE - 1;
+ XSTRNCPY(cn->busCat, decoded->subjectBC, sz);
+ cn->busCat[sz] = '\0';
+ cn->busCatEnc = decoded->subjectBCEnc;
+ }
+ if (decoded->subjectJC) {
+ sz = (decoded->subjectJCLen < CTC_NAME_SIZE) ? decoded->subjectJCLen
+ : CTC_NAME_SIZE - 1;
+ XSTRNCPY(cn->joiC, decoded->subjectJC, sz);
+ cn->joiC[sz] = '\0';
+ cn->joiCEnc = decoded->subjectJCEnc;
+ }
+ if (decoded->subjectJS) {
+ sz = (decoded->subjectJSLen < CTC_NAME_SIZE) ? decoded->subjectJSLen
+ : CTC_NAME_SIZE - 1;
+ XSTRNCPY(cn->joiSt, decoded->subjectJS, sz);
+ cn->joiSt[sz] = '\0';
+ cn->joiStEnc = decoded->subjectJSEnc;
+ }
+#endif
+ if (decoded->subjectEmail) {
+ sz = (decoded->subjectEmailLen < CTC_NAME_SIZE)
+ ? decoded->subjectEmailLen : CTC_NAME_SIZE - 1;
+ XSTRNCPY(cn->email, decoded->subjectEmail, sz);
+ cn->email[sz] = '\0';
+ }
+}
+#ifndef NO_FILESYSTEM
/* Set cn name from der buffer, return 0 on success */
static int SetNameFromCert(CertName* cn, const byte* der, int derSz)
{
- int ret, sz;
+ int ret;
#ifdef WOLFSSL_SMALL_STACK
DecodedCert* decoded;
#else
@@ -6353,68 +14678,14 @@ static int SetNameFromCert(CertName* cn, const byte* der, int derSz)
return MEMORY_E;
#endif
- InitDecodedCert(decoded, (byte*)der, derSz, 0);
+ InitDecodedCert(decoded, der, derSz, NULL);
ret = ParseCertRelative(decoded, CA_TYPE, NO_VERIFY, 0);
if (ret < 0) {
WOLFSSL_MSG("ParseCertRelative error");
}
else {
- if (decoded->subjectCN) {
- sz = (decoded->subjectCNLen < CTC_NAME_SIZE) ? decoded->subjectCNLen
- : CTC_NAME_SIZE - 1;
- strncpy(cn->commonName, decoded->subjectCN, CTC_NAME_SIZE);
- cn->commonName[sz] = 0;
- cn->commonNameEnc = decoded->subjectCNEnc;
- }
- if (decoded->subjectC) {
- sz = (decoded->subjectCLen < CTC_NAME_SIZE) ? decoded->subjectCLen
- : CTC_NAME_SIZE - 1;
- strncpy(cn->country, decoded->subjectC, CTC_NAME_SIZE);
- cn->country[sz] = 0;
- cn->countryEnc = decoded->subjectCEnc;
- }
- if (decoded->subjectST) {
- sz = (decoded->subjectSTLen < CTC_NAME_SIZE) ? decoded->subjectSTLen
- : CTC_NAME_SIZE - 1;
- strncpy(cn->state, decoded->subjectST, CTC_NAME_SIZE);
- cn->state[sz] = 0;
- cn->stateEnc = decoded->subjectSTEnc;
- }
- if (decoded->subjectL) {
- sz = (decoded->subjectLLen < CTC_NAME_SIZE) ? decoded->subjectLLen
- : CTC_NAME_SIZE - 1;
- strncpy(cn->locality, decoded->subjectL, CTC_NAME_SIZE);
- cn->locality[sz] = 0;
- cn->localityEnc = decoded->subjectLEnc;
- }
- if (decoded->subjectO) {
- sz = (decoded->subjectOLen < CTC_NAME_SIZE) ? decoded->subjectOLen
- : CTC_NAME_SIZE - 1;
- strncpy(cn->org, decoded->subjectO, CTC_NAME_SIZE);
- cn->org[sz] = 0;
- cn->orgEnc = decoded->subjectOEnc;
- }
- if (decoded->subjectOU) {
- sz = (decoded->subjectOULen < CTC_NAME_SIZE) ? decoded->subjectOULen
- : CTC_NAME_SIZE - 1;
- strncpy(cn->unit, decoded->subjectOU, CTC_NAME_SIZE);
- cn->unit[sz] = 0;
- cn->unitEnc = decoded->subjectOUEnc;
- }
- if (decoded->subjectSN) {
- sz = (decoded->subjectSNLen < CTC_NAME_SIZE) ? decoded->subjectSNLen
- : CTC_NAME_SIZE - 1;
- strncpy(cn->sur, decoded->subjectSN, CTC_NAME_SIZE);
- cn->sur[sz] = 0;
- cn->surEnc = decoded->subjectSNEnc;
- }
- if (decoded->subjectEmail) {
- sz = (decoded->subjectEmailLen < CTC_NAME_SIZE)
- ? decoded->subjectEmailLen : CTC_NAME_SIZE - 1;
- strncpy(cn->email, decoded->subjectEmail, CTC_NAME_SIZE);
- cn->email[sz] = 0;
- }
+ SetNameFromDcert(cn, decoded);
}
FreeDecodedCert(decoded);
@@ -6426,24 +14697,26 @@ static int SetNameFromCert(CertName* cn, const byte* der, int derSz)
return ret < 0 ? ret : 0;
}
-
-#ifndef NO_FILESYSTEM
-
/* Set cert issuer from issuerFile in PEM */
int wc_SetIssuer(Cert* cert, const char* issuerFile)
{
int ret;
int derSz;
- byte* der = (byte*)XMALLOC(EIGHTK_BUF, NULL, DYNAMIC_TYPE_CERT);
+ byte* der;
+ if (cert == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ der = (byte*)XMALLOC(EIGHTK_BUF, cert->heap, DYNAMIC_TYPE_CERT);
if (der == NULL) {
WOLFSSL_MSG("wc_SetIssuer OOF Problem");
return MEMORY_E;
}
- derSz = wolfSSL_PemCertToDer(issuerFile, der, EIGHTK_BUF);
+ derSz = wc_PemCertToDer(issuerFile, der, EIGHTK_BUF);
cert->selfSigned = 0;
ret = SetNameFromCert(&cert->issuer, der, derSz);
- XFREE(der, NULL, DYNAMIC_TYPE_CERT);
+ XFREE(der, cert->heap, DYNAMIC_TYPE_CERT);
return ret;
}
@@ -6454,77 +14727,367 @@ int wc_SetSubject(Cert* cert, const char* subjectFile)
{
int ret;
int derSz;
- byte* der = (byte*)XMALLOC(EIGHTK_BUF, NULL, DYNAMIC_TYPE_CERT);
+ byte* der;
+
+ if (cert == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ der = (byte*)XMALLOC(EIGHTK_BUF, cert->heap, DYNAMIC_TYPE_CERT);
if (der == NULL) {
WOLFSSL_MSG("wc_SetSubject OOF Problem");
return MEMORY_E;
}
- derSz = wolfSSL_PemCertToDer(subjectFile, der, EIGHTK_BUF);
+
+ derSz = wc_PemCertToDer(subjectFile, der, EIGHTK_BUF);
ret = SetNameFromCert(&cert->subject, der, derSz);
- XFREE(der, NULL, DYNAMIC_TYPE_CERT);
+ XFREE(der, cert->heap, DYNAMIC_TYPE_CERT);
return ret;
}
-
#ifdef WOLFSSL_ALT_NAMES
-/* Set atl names from file in PEM */
+/* Set alt names from file in PEM */
int wc_SetAltNames(Cert* cert, const char* file)
{
int ret;
int derSz;
- byte* der = (byte*)XMALLOC(EIGHTK_BUF, NULL, DYNAMIC_TYPE_CERT);
+ byte* der;
+
+ if (cert == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ der = (byte*)XMALLOC(EIGHTK_BUF, cert->heap, DYNAMIC_TYPE_CERT);
if (der == NULL) {
WOLFSSL_MSG("wc_SetAltNames OOF Problem");
return MEMORY_E;
}
- derSz = wolfSSL_PemCertToDer(file, der, EIGHTK_BUF);
+ derSz = wc_PemCertToDer(file, der, EIGHTK_BUF);
ret = SetAltNamesFromCert(cert, der, derSz);
- XFREE(der, NULL, DYNAMIC_TYPE_CERT);
+ XFREE(der, cert->heap, DYNAMIC_TYPE_CERT);
return ret;
}
#endif /* WOLFSSL_ALT_NAMES */
-#endif /* NO_FILESYSTEM */
+#endif /* !NO_FILESYSTEM */
/* Set cert issuer from DER buffer */
int wc_SetIssuerBuffer(Cert* cert, const byte* der, int derSz)
{
- cert->selfSigned = 0;
- return SetNameFromCert(&cert->issuer, der, derSz);
-}
+ int ret = 0;
+
+ if (cert == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+ else {
+ cert->selfSigned = 0;
+
+ /* Check if decodedCert is cached */
+ if (cert->der != der) {
+ /* Allocate cache for the decoded cert */
+ ret = wc_SetCert_LoadDer(cert, der, derSz);
+ }
+ if (ret >= 0) {
+ SetNameFromDcert(&cert->issuer, (DecodedCert*)cert->decodedCert);
+#ifndef WOLFSSL_CERT_GEN_CACHE
+ wc_SetCert_Free(cert);
+#endif
+ }
+ }
+
+ return ret;
+}
/* Set cert subject from DER buffer */
int wc_SetSubjectBuffer(Cert* cert, const byte* der, int derSz)
{
- return SetNameFromCert(&cert->subject, der, derSz);
+ int ret = 0;
+
+ if (cert == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+ else {
+ /* Check if decodedCert is cached */
+ if (cert->der != der) {
+ /* Allocate cache for the decoded cert */
+ ret = wc_SetCert_LoadDer(cert, der, derSz);
+ }
+
+ if (ret >= 0) {
+ SetNameFromDcert(&cert->subject, (DecodedCert*)cert->decodedCert);
+#ifndef WOLFSSL_CERT_GEN_CACHE
+ wc_SetCert_Free(cert);
+#endif
+ }
+ }
+
+ return ret;
+}
+#ifdef WOLFSSL_CERT_EXT
+/* Set cert raw subject from DER buffer */
+int wc_SetSubjectRaw(Cert* cert, const byte* der, int derSz)
+{
+ int ret = 0;
+
+ if (cert == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+ else {
+ /* Check if decodedCert is cached */
+ if (cert->der != der) {
+ /* Allocate cache for the decoded cert */
+ ret = wc_SetCert_LoadDer(cert, der, derSz);
+ }
+
+ if (ret >= 0) {
+ if ((((DecodedCert*)cert->decodedCert)->subjectRaw) &&
+ (((DecodedCert*)cert->decodedCert)->subjectRawLen <=
+ (int)sizeof(CertName))) {
+ XMEMCPY(cert->sbjRaw,
+ ((DecodedCert*)cert->decodedCert)->subjectRaw,
+ ((DecodedCert*)cert->decodedCert)->subjectRawLen);
+ }
+#ifndef WOLFSSL_CERT_GEN_CACHE
+ wc_SetCert_Free(cert);
+#endif
+ }
+ }
+
+ return ret;
}
+/* Set cert raw issuer from DER buffer */
+int wc_SetIssuerRaw(Cert* cert, const byte* der, int derSz)
+{
+ int ret = 0;
+
+ if (cert == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+ else {
+ /* Check if decodedCert is cached */
+ if (cert->der != der) {
+ /* Allocate cache for the decoded cert */
+ ret = wc_SetCert_LoadDer(cert, der, derSz);
+ }
+
+ if (ret >= 0) {
+ if ((((DecodedCert*)cert->decodedCert)->issuerRaw) &&
+ (((DecodedCert*)cert->decodedCert)->issuerRawLen <=
+ (int)sizeof(CertName))) {
+ XMEMCPY(cert->issRaw,
+ ((DecodedCert*)cert->decodedCert)->issuerRaw,
+ ((DecodedCert*)cert->decodedCert)->issuerRawLen);
+ }
+#ifndef WOLFSSL_CERT_GEN_CACHE
+ wc_SetCert_Free(cert);
+#endif
+ }
+ }
+ return ret;
+}
+#endif
#ifdef WOLFSSL_ALT_NAMES
/* Set cert alt names from DER buffer */
int wc_SetAltNamesBuffer(Cert* cert, const byte* der, int derSz)
{
- return SetAltNamesFromCert(cert, der, derSz);
+ int ret = 0;
+
+ if (cert == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+ else {
+ /* Check if decodedCert is cached */
+ if (cert->der != der) {
+ /* Allocate cache for the decoded cert */
+ ret = wc_SetCert_LoadDer(cert, der, derSz);
+ }
+
+ if (ret >= 0) {
+ ret = SetAltNamesFromDcert(cert, (DecodedCert*)cert->decodedCert);
+#ifndef WOLFSSL_CERT_GEN_CACHE
+ wc_SetCert_Free(cert);
+#endif
+ }
+ }
+
+ return(ret);
}
/* Set cert dates from DER buffer */
int wc_SetDatesBuffer(Cert* cert, const byte* der, int derSz)
{
- return SetDatesFromCert(cert, der, derSz);
+ int ret = 0;
+
+ if (cert == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+ else {
+ /* Check if decodedCert is cached */
+ if (cert->der != der) {
+ /* Allocate cache for the decoded cert */
+ ret = wc_SetCert_LoadDer(cert, der, derSz);
+ }
+
+ if (ret >= 0) {
+ ret = SetDatesFromDcert(cert, (DecodedCert*)cert->decodedCert);
+#ifndef WOLFSSL_CERT_GEN_CACHE
+ wc_SetCert_Free(cert);
+#endif
+ }
+ }
+
+ return(ret);
}
#endif /* WOLFSSL_ALT_NAMES */
#endif /* WOLFSSL_CERT_GEN */
+#if (defined(WOLFSSL_CERT_GEN) && defined(WOLFSSL_CERT_EXT)) \
+ || defined(OPENSSL_EXTRA)
+/* Encode OID string representation to ITU-T X.690 format */
+int EncodePolicyOID(byte *out, word32 *outSz, const char *in, void* heap)
+{
+ word32 val, idx = 0, nb_val;
+ char *token, *str, *ptr;
+ word32 len;
+
+ (void)heap;
+
+ if (out == NULL || outSz == NULL || *outSz < 2 || in == NULL)
+ return BAD_FUNC_ARG;
+
+ /* duplicate string (including terminator) */
+ len = (word32)XSTRLEN(in);
+ str = (char *)XMALLOC(len+1, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (str == NULL)
+ return MEMORY_E;
+ XMEMCPY(str, in, len+1);
+
+ nb_val = 0;
+
+ /* parse value, and set corresponding Policy OID value */
+ token = XSTRTOK(str, ".", &ptr);
+ while (token != NULL)
+ {
+ val = (word32)XATOI(token);
+
+ if (nb_val == 0) {
+ if (val > 2) {
+ XFREE(str, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return ASN_OBJECT_ID_E;
+ }
+
+ out[idx] = (byte)(40 * val);
+ }
+ else if (nb_val == 1) {
+ if (val > 127) {
+ XFREE(str, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return ASN_OBJECT_ID_E;
+ }
+
+ if (idx > *outSz) {
+ XFREE(str, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return BUFFER_E;
+ }
+
+ out[idx++] += (byte)val;
+ }
+ else {
+ word32 tb = 0, x;
+ int i = 0;
+ byte oid[MAX_OID_SZ];
+
+ while (val >= 128) {
+ x = val % 128;
+ val /= 128;
+ oid[i++] = (byte) (((tb++) ? 0x80 : 0) | x);
+ }
+
+ if ((idx+(word32)i) > *outSz) {
+ XFREE(str, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return BUFFER_E;
+ }
+
+ oid[i] = (byte) (((tb++) ? 0x80 : 0) | val);
+
+ /* push value in the right order */
+ while (i >= 0)
+ out[idx++] = oid[i--];
+ }
+
+ token = XSTRTOK(NULL, ".", &ptr);
+ nb_val++;
+ }
+
+ *outSz = idx;
+
+ XFREE(str, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return 0;
+}
+#endif /* WOLFSSL_CERT_EXT || OPENSSL_EXTRA */
+
+#endif /* !NO_CERTS */
+
+#if !defined(NO_DH) && (defined(WOLFSSL_QT) || defined(OPENSSL_ALL))
+/* Helper function for wolfSSL_i2d_DHparams */
+int StoreDHparams(byte* out, word32* outLen, mp_int* p, mp_int* g)
+{
+ word32 idx = 0;
+ int pSz;
+ int gSz;
+ unsigned int tmp;
+ word32 headerSz = 4; /* 2*ASN_TAG + 2*LEN(ENUM) */
+
+ /* If the leading bit on the INTEGER is a 1, add a leading zero */
+ int pLeadingZero = mp_leading_bit(p);
+ int gLeadingZero = mp_leading_bit(g);
+ int pLen = mp_unsigned_bin_size(p);
+ int gLen = mp_unsigned_bin_size(g);
+
+ WOLFSSL_ENTER("StoreDHparams");
+ if (out == NULL) {
+ WOLFSSL_MSG("Null buffer error");
+ return BUFFER_E;
+ }
+
+ tmp = pLeadingZero + gLeadingZero + pLen + gLen;
+ if (*outLen < (tmp + headerSz)) {
+ return BUFFER_E;
+ }
+
+ /* Set sequence */
+ idx = SetSequence(tmp + headerSz + 2, out);
+
+ /* Encode p */
+ pSz = SetASNIntMP(p, -1, &out[idx]);
+ if (pSz < 0) {
+ WOLFSSL_MSG("SetASNIntMP failed");
+ return pSz;
+ }
+ idx += pSz;
+
+ /* Encode g */
+ gSz = SetASNIntMP(g, -1, &out[idx]);
+ if (gSz < 0) {
+ WOLFSSL_MSG("SetASNIntMP failed");
+ return gSz;
+ }
+ idx += gSz;
+
+ *outLen = idx;
+
+ return 0;
+}
+#endif /* !NO_DH && WOLFSSL_QT || OPENSSL_ALL */
#ifdef HAVE_ECC
@@ -6532,8 +15095,8 @@ int wc_SetDatesBuffer(Cert* cert, const byte* der, int derSz)
int StoreECC_DSA_Sig(byte* out, word32* outLen, mp_int* r, mp_int* s)
{
word32 idx = 0;
- word32 rSz; /* encoding size */
- word32 sSz;
+ int rSz; /* encoding size */
+ int sSz;
word32 headerSz = 4; /* 2*ASN_TAG + 2*LEN(ENUM) */
/* If the leading bit on the INTEGER is a 1, add a leading zero */
@@ -6541,33 +15104,24 @@ int StoreECC_DSA_Sig(byte* out, word32* outLen, mp_int* r, mp_int* s)
int sLeadingZero = mp_leading_bit(s);
int rLen = mp_unsigned_bin_size(r); /* big int size */
int sLen = mp_unsigned_bin_size(s);
- int err;
if (*outLen < (rLen + rLeadingZero + sLen + sLeadingZero +
headerSz + 2)) /* SEQ_TAG + LEN(ENUM) */
- return BAD_FUNC_ARG;
+ return BUFFER_E;
- idx = SetSequence(rLen+rLeadingZero+sLen+sLeadingZero+headerSz, out);
+ idx = SetSequence(rLen + rLeadingZero + sLen+sLeadingZero + headerSz, out);
/* store r */
- out[idx++] = ASN_INTEGER;
- rSz = SetLength(rLen + rLeadingZero, &out[idx]);
+ rSz = SetASNIntMP(r, -1, &out[idx]);
+ if (rSz < 0)
+ return rSz;
idx += rSz;
- if (rLeadingZero)
- out[idx++] = 0;
- err = mp_to_unsigned_bin(r, &out[idx]);
- if (err != MP_OKAY) return err;
- idx += rLen;
/* store s */
- out[idx++] = ASN_INTEGER;
- sSz = SetLength(sLen + sLeadingZero, &out[idx]);
+ sSz = SetASNIntMP(s, -1, &out[idx]);
+ if (sSz < 0)
+ return sSz;
idx += sSz;
- if (sLeadingZero)
- out[idx++] = 0;
- err = mp_to_unsigned_bin(s, &out[idx]);
- if (err != MP_OKAY) return err;
- idx += sLen;
*outLen = idx;
@@ -6575,23 +15129,35 @@ int StoreECC_DSA_Sig(byte* out, word32* outLen, mp_int* r, mp_int* s)
}
-/* Der Decode ECC-DSA Signautre, r & s stored as big ints */
+/* Der Decode ECC-DSA Signature, r & s stored as big ints */
int DecodeECC_DSA_Sig(const byte* sig, word32 sigLen, mp_int* r, mp_int* s)
{
word32 idx = 0;
int len = 0;
- if (GetSequence(sig, &idx, &len, sigLen) < 0)
+ if (GetSequence(sig, &idx, &len, sigLen) < 0) {
return ASN_ECC_KEY_E;
+ }
- if ((word32)len > (sigLen - idx))
+#ifndef NO_STRICT_ECDSA_LEN
+ /* enable strict length checking for signature */
+ if (sigLen != idx + (word32)len) {
+ return ASN_ECC_KEY_E;
+ }
+#else
+ /* allow extra signature bytes at end */
+ if ((word32)len > (sigLen - idx)) {
return ASN_ECC_KEY_E;
+ }
+#endif
- if (GetInt(r, sig, &idx, sigLen) < 0)
+ if (GetInt(r, sig, &idx, sigLen) < 0) {
return ASN_ECC_KEY_E;
+ }
- if (GetInt(s, sig, &idx, sigLen) < 0)
+ if (GetInt(s, sig, &idx, sigLen) < 0) {
return ASN_ECC_KEY_E;
+ }
return 0;
}
@@ -6600,18 +15166,20 @@ int DecodeECC_DSA_Sig(const byte* sig, word32 sigLen, mp_int* r, mp_int* s)
int wc_EccPrivateKeyDecode(const byte* input, word32* inOutIdx, ecc_key* key,
word32 inSz)
{
- word32 oid = 0;
+ word32 oidSum;
int version, length;
- int privSz, pubSz;
+ int privSz, pubSz = 0;
byte b;
int ret = 0;
+ int curve_id = ECC_CURVE_DEF;
#ifdef WOLFSSL_SMALL_STACK
byte* priv;
byte* pub;
#else
- byte priv[ECC_MAXSIZE];
- byte pub[ECC_MAXSIZE * 2 + 1]; /* public key has two parts plus header */
+ byte priv[ECC_MAXSIZE+1];
+ byte pub[2*(ECC_MAXSIZE+1)]; /* public key has two parts plus header */
#endif
+ byte* pubData = NULL;
if (input == NULL || inOutIdx == NULL || key == NULL || inSz == 0)
return BAD_FUNC_ARG;
@@ -6619,14 +15187,17 @@ int wc_EccPrivateKeyDecode(const byte* input, word32* inOutIdx, ecc_key* key,
if (GetSequence(input, inOutIdx, &length, inSz) < 0)
return ASN_PARSE_E;
- if (GetMyVersion(input, inOutIdx, &version) < 0)
+ if (GetMyVersion(input, inOutIdx, &version, inSz) < 0)
+ return ASN_PARSE_E;
+
+ if (*inOutIdx >= inSz)
return ASN_PARSE_E;
b = input[*inOutIdx];
*inOutIdx += 1;
/* priv type */
- if (b != 4 && b != 6 && b != 7)
+ if (b != 4 && b != 6 && b != 7)
return ASN_PARSE_E;
if (GetLength(input, inOutIdx, &length, inSz) < 0)
@@ -6636,13 +15207,13 @@ int wc_EccPrivateKeyDecode(const byte* input, word32* inOutIdx, ecc_key* key,
return BUFFER_E;
#ifdef WOLFSSL_SMALL_STACK
- priv = (byte*)XMALLOC(ECC_MAXSIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ priv = (byte*)XMALLOC(ECC_MAXSIZE+1, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
if (priv == NULL)
return MEMORY_E;
-
- pub = (byte*)XMALLOC(ECC_MAXSIZE * 2 + 1, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+ pub = (byte*)XMALLOC(2*(ECC_MAXSIZE+1), key->heap, DYNAMIC_TYPE_TMP_BUFFER);
if (pub == NULL) {
- XFREE(priv, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(priv, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
return MEMORY_E;
}
#endif
@@ -6652,36 +15223,30 @@ int wc_EccPrivateKeyDecode(const byte* input, word32* inOutIdx, ecc_key* key,
XMEMCPY(priv, &input[*inOutIdx], privSz);
*inOutIdx += length;
- /* prefix 0, may have */
- b = input[*inOutIdx];
- if (b == ECC_PREFIX_0) {
- *inOutIdx += 1;
-
- if (GetLength(input, inOutIdx, &length, inSz) < 0)
- ret = ASN_PARSE_E;
- else {
- /* object id */
- b = input[*inOutIdx];
+ if ((*inOutIdx + 1) < inSz) {
+ /* prefix 0, may have */
+ b = input[*inOutIdx];
+ if (b == ECC_PREFIX_0) {
*inOutIdx += 1;
- if (b != ASN_OBJECT_ID) {
- ret = ASN_OBJECT_ID_E;
- }
- else if (GetLength(input, inOutIdx, &length, inSz) < 0) {
+ if (GetLength(input, inOutIdx, &length, inSz) <= 0)
ret = ASN_PARSE_E;
- }
else {
- while(length--) {
- oid += input[*inOutIdx];
- *inOutIdx += 1;
+ ret = GetObjectId(input, inOutIdx, &oidSum, oidIgnoreType,
+ inSz);
+ if (ret == 0) {
+ if ((ret = CheckCurve(oidSum)) < 0)
+ ret = ECC_CURVE_OID_E;
+ else {
+ curve_id = ret;
+ ret = 0;
+ }
}
- if (CheckCurve(oid) < 0)
- ret = ECC_CURVE_OID_E;
}
}
}
- if (ret == 0) {
+ if (ret == 0 && (*inOutIdx + 1) < inSz) {
/* prefix 1 */
b = input[*inOutIdx];
*inOutIdx += 1;
@@ -6689,142 +15254,879 @@ int wc_EccPrivateKeyDecode(const byte* input, word32* inOutIdx, ecc_key* key,
if (b != ECC_PREFIX_1) {
ret = ASN_ECC_KEY_E;
}
- else if (GetLength(input, inOutIdx, &length, inSz) < 0) {
+ else if (GetLength(input, inOutIdx, &length, inSz) <= 0) {
ret = ASN_PARSE_E;
}
else {
/* key header */
- b = input[*inOutIdx];
- *inOutIdx += 1;
-
- if (b != ASN_BIT_STRING) {
- ret = ASN_BITSTR_E;
- }
- else if (GetLength(input, inOutIdx, &length, inSz) < 0) {
- ret = ASN_PARSE_E;
- }
- else {
- b = input[*inOutIdx];
- *inOutIdx += 1;
-
- if (b != 0x00) {
- ret = ASN_EXPECT_0_E;
- }
- else {
- /* pub key */
- pubSz = length - 1; /* null prefix */
- if (pubSz < (ECC_MAXSIZE*2 + 1)) {
- XMEMCPY(pub, &input[*inOutIdx], pubSz);
- *inOutIdx += length;
- ret = wc_ecc_import_private_key(priv, privSz, pub, pubSz,
- key);
- } else
- ret = BUFFER_E;
+ ret = CheckBitString(input, inOutIdx, &length, inSz, 0, NULL);
+ if (ret == 0) {
+ /* pub key */
+ pubSz = length;
+ if (pubSz < 2*(ECC_MAXSIZE+1)) {
+ XMEMCPY(pub, &input[*inOutIdx], pubSz);
+ *inOutIdx += length;
+ pubData = pub;
}
+ else
+ ret = BUFFER_E;
}
}
}
+ if (ret == 0) {
+ ret = wc_ecc_import_private_key_ex(priv, privSz, pubData, pubSz, key,
+ curve_id);
+ }
+
#ifdef WOLFSSL_SMALL_STACK
- XFREE(priv, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(pub, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(priv, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return ret;
}
-#ifdef WOLFSSL_KEY_GEN
+#ifdef WOLFSSL_CUSTOM_CURVES
+static void ByteToHex(byte n, char* str)
+{
+ const char hexChar[] = { '0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
-/* Write a Private ecc key to DER format, length on success else < 0 */
-int wc_EccKeyToDer(ecc_key* key, byte* output, word32 inLen)
+ str[0] = hexChar[n >> 4];
+ str[1] = hexChar[n & 0xf];
+}
+
+/* returns 0 on success */
+static int ASNToHexString(const byte* input, word32* inOutIdx, char** out,
+ word32 inSz, void* heap, int heapType)
+{
+ int len;
+ int i;
+ char* str;
+ word32 localIdx;
+ byte tag;
+
+ if (*inOutIdx >= inSz) {
+ return BUFFER_E;
+ }
+
+ localIdx = *inOutIdx;
+ if (GetASNTag(input, &localIdx, &tag, inSz) == 0 && tag == ASN_INTEGER) {
+ if (GetASNInt(input, inOutIdx, &len, inSz) < 0)
+ return ASN_PARSE_E;
+ }
+ else {
+ if (GetOctetString(input, inOutIdx, &len, inSz) < 0)
+ return ASN_PARSE_E;
+ }
+
+ str = (char*)XMALLOC(len * 2 + 1, heap, heapType);
+ for (i=0; i<len; i++)
+ ByteToHex(input[*inOutIdx + i], str + i*2);
+ str[len*2] = '\0';
+
+ *inOutIdx += len;
+ *out = str;
+
+ (void)heap;
+ (void)heapType;
+
+ return 0;
+}
+#endif /* WOLFSSL_CUSTOM_CURVES */
+
+#ifdef WOLFSSL_CUSTOM_CURVES
+static int EccKeyParamCopy(char** dst, char* src)
+{
+ int ret = 0;
+#ifdef WOLFSSL_ECC_CURVE_STATIC
+ word32 length;
+#endif
+
+ if (dst == NULL || src == NULL)
+ return BAD_FUNC_ARG;
+
+#ifndef WOLFSSL_ECC_CURVE_STATIC
+ *dst = src;
+#else
+ length = (int)XSTRLEN(src) + 1;
+ if (length > MAX_ECC_STRING) {
+ WOLFSSL_MSG("ECC Param too large for buffer");
+ ret = BUFFER_E;
+ }
+ else {
+ XSTRNCPY(*dst, src, length);
+ }
+ XFREE(src, key->heap, DYNAMIC_TYPE_ECC_BUFFER);
+#endif
+
+ return ret;
+}
+#endif /* WOLFSSL_CUSTOM_CURVES */
+
+int wc_EccPublicKeyDecode(const byte* input, word32* inOutIdx,
+ ecc_key* key, word32 inSz)
+{
+ int length;
+ int ret;
+ int curve_id = ECC_CURVE_DEF;
+ word32 oidSum, localIdx;
+ byte tag;
+
+ if (input == NULL || inOutIdx == NULL || key == NULL || inSz == 0)
+ return BAD_FUNC_ARG;
+
+ if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+ return ASN_PARSE_E;
+
+ if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+ return ASN_PARSE_E;
+
+ ret = SkipObjectId(input, inOutIdx, inSz);
+ if (ret != 0)
+ return ret;
+
+ if (*inOutIdx >= inSz) {
+ return BUFFER_E;
+ }
+
+ localIdx = *inOutIdx;
+ if (GetASNTag(input, &localIdx, &tag, inSz) == 0 &&
+ tag == (ASN_SEQUENCE | ASN_CONSTRUCTED)) {
+#ifdef WOLFSSL_CUSTOM_CURVES
+ ecc_set_type* curve;
+ int len;
+ char* point = NULL;
+
+ ret = 0;
+
+ curve = (ecc_set_type*)XMALLOC(sizeof(*curve), key->heap,
+ DYNAMIC_TYPE_ECC_BUFFER);
+ if (curve == NULL)
+ ret = MEMORY_E;
+
+ if (ret == 0) {
+ static const char customName[] = "Custom";
+ XMEMSET(curve, 0, sizeof(*curve));
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ curve->name = customName;
+ #else
+ XMEMCPY((void*)curve->name, customName, sizeof(customName));
+ #endif
+ curve->id = ECC_CURVE_CUSTOM;
+
+ if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0) {
+ GetInteger7Bit(input, inOutIdx, inSz);
+ if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+ ret = ASN_PARSE_E;
+ }
+ if (ret == 0) {
+ char* p = NULL;
+ SkipObjectId(input, inOutIdx, inSz);
+ ret = ASNToHexString(input, inOutIdx, &p, inSz,
+ key->heap, DYNAMIC_TYPE_ECC_BUFFER);
+ if (ret == 0)
+ ret = EccKeyParamCopy((char**)&curve->prime, p);
+ }
+ if (ret == 0) {
+ curve->size = (int)XSTRLEN(curve->prime) / 2;
+
+ if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+ ret = ASN_PARSE_E;
+ }
+ if (ret == 0) {
+ char* af = NULL;
+ ret = ASNToHexString(input, inOutIdx, &af, inSz,
+ key->heap, DYNAMIC_TYPE_ECC_BUFFER);
+ if (ret == 0)
+ ret = EccKeyParamCopy((char**)&curve->Af, af);
+ }
+ if (ret == 0) {
+ char* bf = NULL;
+ ret = ASNToHexString(input, inOutIdx, &bf, inSz,
+ key->heap, DYNAMIC_TYPE_ECC_BUFFER);
+ if (ret == 0)
+ ret = EccKeyParamCopy((char**)&curve->Bf, bf);
+ }
+ if (ret == 0) {
+ localIdx = *inOutIdx;
+ if (*inOutIdx < inSz && GetASNTag(input, &localIdx, &tag, inSz)
+ == 0 && tag == ASN_BIT_STRING) {
+ len = 0;
+ ret = GetASNHeader(input, ASN_BIT_STRING, inOutIdx, &len, inSz);
+ *inOutIdx += len;
+ }
+ }
+ if (ret == 0) {
+ ret = ASNToHexString(input, inOutIdx, (char**)&point, inSz,
+ key->heap, DYNAMIC_TYPE_ECC_BUFFER);
+
+ /* sanity check that point buffer is not smaller than the expected
+ * size to hold ( 0 4 || Gx || Gy )
+ * where Gx and Gy are each the size of curve->size * 2 */
+ if (ret == 0 && (int)XSTRLEN(point) < (curve->size * 4) + 2) {
+ XFREE(point, key->heap, DYNAMIC_TYPE_ECC_BUFFER);
+ ret = BUFFER_E;
+ }
+ }
+ if (ret == 0) {
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ curve->Gx = (const char*)XMALLOC(curve->size * 2 + 2, key->heap,
+ DYNAMIC_TYPE_ECC_BUFFER);
+ curve->Gy = (const char*)XMALLOC(curve->size * 2 + 2, key->heap,
+ DYNAMIC_TYPE_ECC_BUFFER);
+ if (curve->Gx == NULL || curve->Gy == NULL) {
+ XFREE(point, key->heap, DYNAMIC_TYPE_ECC_BUFFER);
+ ret = MEMORY_E;
+ }
+ #else
+ if (curve->size * 2 + 2 > MAX_ECC_STRING) {
+ WOLFSSL_MSG("curve size is too large to fit in buffer");
+ ret = BUFFER_E;
+ }
+ #endif
+ }
+ if (ret == 0) {
+ char* o = NULL;
+
+ XMEMCPY((char*)curve->Gx, point + 2, curve->size * 2);
+ XMEMCPY((char*)curve->Gy, point + curve->size * 2 + 2,
+ curve->size * 2);
+ ((char*)curve->Gx)[curve->size * 2] = '\0';
+ ((char*)curve->Gy)[curve->size * 2] = '\0';
+ XFREE(point, key->heap, DYNAMIC_TYPE_ECC_BUFFER);
+ ret = ASNToHexString(input, inOutIdx, &o, inSz,
+ key->heap, DYNAMIC_TYPE_ECC_BUFFER);
+ if (ret == 0)
+ ret = EccKeyParamCopy((char**)&curve->order, o);
+ }
+ if (ret == 0) {
+ curve->cofactor = GetInteger7Bit(input, inOutIdx, inSz);
+
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ curve->oid = NULL;
+ #else
+ XMEMSET((void*)curve->oid, 0, sizeof(curve->oid));
+ #endif
+ curve->oidSz = 0;
+ curve->oidSum = 0;
+
+ if (wc_ecc_set_custom_curve(key, curve) < 0) {
+ ret = ASN_PARSE_E;
+ }
+ #ifdef WOLFSSL_CUSTOM_CURVES
+ key->deallocSet = 1;
+ #endif
+ curve = NULL;
+ }
+ if (curve != NULL)
+ wc_ecc_free_curve(curve, key->heap);
+
+ if (ret < 0)
+ return ret;
+#else
+ return ASN_PARSE_E;
+#endif /* WOLFSSL_CUSTOM_CURVES */
+ }
+ else {
+ /* ecc params information */
+ ret = GetObjectId(input, inOutIdx, &oidSum, oidIgnoreType, inSz);
+ if (ret != 0)
+ return ret;
+
+ /* get curve id */
+ curve_id = wc_ecc_get_oid(oidSum, NULL, 0);
+ if (curve_id < 0)
+ return ECC_CURVE_OID_E;
+ }
+
+ /* key header */
+ ret = CheckBitString(input, inOutIdx, &length, inSz, 1, NULL);
+ if (ret != 0)
+ return ret;
+
+ /* This is the raw point data compressed or uncompressed. */
+ if (wc_ecc_import_x963_ex(input + *inOutIdx, length, key,
+ curve_id) != 0) {
+ return ASN_ECC_KEY_E;
+ }
+
+ *inOutIdx += length;
+
+ return 0;
+}
+
+#if defined(HAVE_ECC_KEY_EXPORT) && !defined(NO_ASN_CRYPT)
+/* build DER formatted ECC key, include optional public key if requested,
+ * return length on success, negative on error */
+static int wc_BuildEccKeyDer(ecc_key* key, byte* output, word32 inLen,
+ int pubIn)
{
- byte curve[MAX_ALGO_SZ];
+ byte curve[MAX_ALGO_SZ+2];
byte ver[MAX_VERSION_SZ];
byte seq[MAX_SEQ_SZ];
- int ret;
- int curveSz;
- int verSz;
+ byte *prv = NULL, *pub = NULL;
+ int ret, totalSz, curveSz, verSz;
int privHdrSz = ASN_ECC_HEADER_SZ;
int pubHdrSz = ASN_ECC_CONTEXT_SZ + ASN_ECC_HEADER_SZ;
- int curveHdrSz = ASN_ECC_CONTEXT_SZ;
- word32 seqSz;
- word32 idx = 0;
- word32 pubSz = ECC_BUFSIZE;
- word32 privSz;
- word32 totalSz;
+
+ word32 idx = 0, prvidx = 0, pubidx = 0, curveidx = 0;
+ word32 seqSz, privSz, pubSz = ECC_BUFSIZE;
if (key == NULL || output == NULL || inLen == 0)
return BAD_FUNC_ARG;
- ret = wc_ecc_export_x963(key, NULL, &pubSz);
- if (ret != LENGTH_ONLY_E) {
- return ret;
- }
- curveSz = SetCurve(key, curve);
- if (curveSz < 0) {
+ /* curve */
+ curve[curveidx++] = ECC_PREFIX_0;
+ curveidx++ /* to put the size after computation */;
+ curveSz = SetCurve(key, curve+curveidx);
+ if (curveSz < 0)
return curveSz;
- }
+ /* set computed size */
+ curve[1] = (byte)curveSz;
+ curveidx += curveSz;
+ /* private */
privSz = key->dp->size;
+ prv = (byte*)XMALLOC(privSz + privHdrSz + MAX_SEQ_SZ,
+ key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (prv == NULL) {
+ return MEMORY_E;
+ }
+ prvidx += SetOctetString8Bit(key->dp->size, &prv[prvidx]);
+ ret = wc_ecc_export_private_only(key, prv + prvidx, &privSz);
+ if (ret < 0) {
+ XFREE(prv, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return ret;
+ }
+ prvidx += privSz;
- verSz = SetMyVersion(1, ver, FALSE);
- if (verSz < 0) {
- return verSz;
+ /* pubIn */
+ if (pubIn) {
+ ret = wc_ecc_export_x963(key, NULL, &pubSz);
+ if (ret != LENGTH_ONLY_E) {
+ XFREE(prv, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return ret;
+ }
+
+ pub = (byte*)XMALLOC(pubSz + pubHdrSz + MAX_SEQ_SZ,
+ key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (pub == NULL) {
+ XFREE(prv, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return MEMORY_E;
+ }
+
+ pub[pubidx++] = ECC_PREFIX_1;
+ if (pubSz > 128) /* leading zero + extra size byte */
+ pubidx += SetLength(pubSz + ASN_ECC_CONTEXT_SZ + 2, pub+pubidx);
+ else /* leading zero */
+ pubidx += SetLength(pubSz + ASN_ECC_CONTEXT_SZ + 1, pub+pubidx);
+
+ /* SetBitString adds leading zero */
+ pubidx += SetBitString(pubSz, 0, pub + pubidx);
+ ret = wc_ecc_export_x963(key, pub + pubidx, &pubSz);
+ if (ret != 0) {
+ XFREE(prv, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return ret;
+ }
+ pubidx += pubSz;
}
- totalSz = verSz + privSz + privHdrSz + curveSz + curveHdrSz +
- pubSz + pubHdrSz + 1; /* plus null byte b4 public */
- seqSz = SetSequence(totalSz, seq);
- totalSz += seqSz;
+ /* make headers */
+ verSz = SetMyVersion(1, ver, FALSE);
+ seqSz = SetSequence(verSz + prvidx + pubidx + curveidx, seq);
- if (totalSz > inLen) {
- return BUFFER_E;
+ totalSz = prvidx + pubidx + curveidx + verSz + seqSz;
+ if (totalSz > (int)inLen) {
+ XFREE(prv, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (pubIn) {
+ XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+ return BAD_FUNC_ARG;
}
- /* write it out */
+ /* write out */
/* seq */
XMEMCPY(output + idx, seq, seqSz);
- idx += seqSz;
+ idx = seqSz;
- /* ver */
+ /* ver */
XMEMCPY(output + idx, ver, verSz);
idx += verSz;
/* private */
- output[idx++] = ASN_OCTET_STRING;
- output[idx++] = (byte)privSz;
- ret = wc_ecc_export_private_only(key, output + idx, &privSz);
+ XMEMCPY(output + idx, prv, prvidx);
+ idx += prvidx;
+ XFREE(prv, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+ /* curve */
+ XMEMCPY(output + idx, curve, curveidx);
+ idx += curveidx;
+
+ /* pubIn */
+ if (pubIn) {
+ XMEMCPY(output + idx, pub, pubidx);
+ /* idx += pubidx; not used after write, if more data remove comment */
+ XFREE(pub, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+
+ return totalSz;
+}
+
+/* Write a Private ecc key, including public to DER format,
+ * length on success else < 0 */
+int wc_EccKeyToDer(ecc_key* key, byte* output, word32 inLen)
+{
+ return wc_BuildEccKeyDer(key, output, inLen, 1);
+}
+
+
+/* Write only private ecc key to DER format,
+ * length on success else < 0 */
+int wc_EccPrivateKeyToDer(ecc_key* key, byte* output, word32 inLen)
+{
+ return wc_BuildEccKeyDer(key, output, inLen, 0);
+}
+
+#ifdef HAVE_PKCS8
+/* Write only private ecc key to unencrypted PKCS#8 format.
+ *
+ * If output is NULL, places required PKCS#8 buffer size in outLen and
+ * returns LENGTH_ONLY_E.
+ *
+ * return length on success else < 0 */
+int wc_EccPrivateKeyToPKCS8(ecc_key* key, byte* output, word32* outLen)
+{
+ int ret, tmpDerSz;
+ int algoID = 0;
+ word32 oidSz = 0;
+ word32 pkcs8Sz = 0;
+ const byte* curveOID = NULL;
+ byte* tmpDer = NULL;
+
+ if (key == NULL || outLen == NULL)
+ return BAD_FUNC_ARG;
+
+ /* set algoID, get curve OID */
+ algoID = ECDSAk;
+ ret = wc_ecc_get_oid(key->dp->oidSum, &curveOID, &oidSz);
+ if (ret < 0)
+ return ret;
+
+ /* temp buffer for plain DER key */
+ tmpDer = (byte*)XMALLOC(ECC_BUFSIZE, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (tmpDer == NULL)
+ return MEMORY_E;
+
+ XMEMSET(tmpDer, 0, ECC_BUFSIZE);
+
+ tmpDerSz = wc_BuildEccKeyDer(key, tmpDer, ECC_BUFSIZE, 0);
+ if (tmpDerSz < 0) {
+ XFREE(tmpDer, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return tmpDerSz;
+ }
+
+ /* get pkcs8 expected output size */
+ ret = wc_CreatePKCS8Key(NULL, &pkcs8Sz, tmpDer, tmpDerSz, algoID,
+ curveOID, oidSz);
+ if (ret != LENGTH_ONLY_E) {
+ XFREE(tmpDer, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return ret;
+ }
+
+ if (output == NULL) {
+ XFREE(tmpDer, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ *outLen = pkcs8Sz;
+ return LENGTH_ONLY_E;
+
+ } else if (*outLen < pkcs8Sz) {
+ XFREE(tmpDer, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ WOLFSSL_MSG("Input buffer too small for ECC PKCS#8 key");
+ return BUFFER_E;
+ }
+
+ ret = wc_CreatePKCS8Key(output, &pkcs8Sz, tmpDer, tmpDerSz,
+ algoID, curveOID, oidSz);
if (ret < 0) {
+ XFREE(tmpDer, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
return ret;
}
- idx += privSz;
- /* curve */
- output[idx++] = ECC_PREFIX_0;
- output[idx++] = (byte)curveSz;
- XMEMCPY(output + idx, curve, curveSz);
- idx += curveSz;
-
- /* public */
- output[idx++] = ECC_PREFIX_1;
- output[idx++] = (byte)pubSz + ASN_ECC_CONTEXT_SZ + 1; /* plus null byte */
- output[idx++] = ASN_BIT_STRING;
- output[idx++] = (byte)pubSz + 1; /* plus null byte */
- output[idx++] = (byte)0; /* null byte */
- ret = wc_ecc_export_x963(key, output + idx, &pubSz);
- if (ret != 0) {
+ XFREE(tmpDer, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+ *outLen = ret;
+ return ret;
+}
+#endif /* HAVE_PKCS8 */
+#endif /* HAVE_ECC_KEY_EXPORT && !NO_ASN_CRYPT */
+#endif /* HAVE_ECC */
+
+
+#ifdef HAVE_ED25519
+
+int wc_Ed25519PrivateKeyDecode(const byte* input, word32* inOutIdx,
+ ed25519_key* key, word32 inSz)
+{
+ word32 oid;
+ int ret, version, length, endKeyIdx, privSz, pubSz;
+ const byte* priv;
+ const byte* pub;
+
+ if (input == NULL || inOutIdx == NULL || key == NULL || inSz == 0)
+ return BAD_FUNC_ARG;
+
+ if (GetSequence(input, inOutIdx, &length, inSz) >= 0) {
+ endKeyIdx = *inOutIdx + length;
+
+ if (GetMyVersion(input, inOutIdx, &version, inSz) < 0)
+ return ASN_PARSE_E;
+ if (version != 0) {
+ WOLFSSL_MSG("Unrecognized version of ED25519 private key");
+ return ASN_PARSE_E;
+ }
+
+ if (GetAlgoId(input, inOutIdx, &oid, oidKeyType, inSz) < 0)
+ return ASN_PARSE_E;
+ if (oid != ED25519k)
+ return ASN_PARSE_E;
+
+ if (GetOctetString(input, inOutIdx, &length, inSz) < 0)
+ return ASN_PARSE_E;
+
+ if (GetOctetString(input, inOutIdx, &privSz, inSz) < 0)
+ return ASN_PARSE_E;
+
+ priv = input + *inOutIdx;
+ *inOutIdx += privSz;
+ }
+ else {
+ if (GetOctetString(input, inOutIdx, &privSz, inSz) < 0)
+ return ASN_PARSE_E;
+
+ if (privSz != 32)
+ return ASN_PARSE_E;
+
+ priv = input + *inOutIdx;
+ *inOutIdx += privSz;
+ endKeyIdx = *inOutIdx;
+ }
+
+ if (endKeyIdx == (int)*inOutIdx) {
+ ret = wc_ed25519_import_private_only(priv, privSz, key);
+ }
+ else {
+ if (GetASNHeader(input, ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 1,
+ inOutIdx, &length, inSz) < 0) {
+ return ASN_PARSE_E;
+ }
+ if (GetOctetString(input, inOutIdx, &pubSz, inSz) < 0)
+ return ASN_PARSE_E;
+ pub = input + *inOutIdx;
+ *inOutIdx += pubSz;
+
+ ret = wc_ed25519_import_private_key(priv, privSz, pub, pubSz, key);
+ }
+ if (ret == 0 && endKeyIdx != (int)*inOutIdx)
+ return ASN_PARSE_E;
+
+ return ret;
+}
+
+
+int wc_Ed25519PublicKeyDecode(const byte* input, word32* inOutIdx,
+ ed25519_key* key, word32 inSz)
+{
+ int length;
+ int ret;
+
+ if (input == NULL || inOutIdx == NULL || key == NULL || inSz == 0)
+ return BAD_FUNC_ARG;
+
+ if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+ return ASN_PARSE_E;
+
+ if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+ return ASN_PARSE_E;
+
+ ret = SkipObjectId(input, inOutIdx, inSz);
+ if (ret != 0)
+ return ret;
+
+ /* key header */
+ ret = CheckBitString(input, inOutIdx, NULL, inSz, 1, NULL);
+ if (ret != 0)
return ret;
+
+ /* This is the raw point data compressed or uncompressed. */
+ if (wc_ed25519_import_public(input + *inOutIdx, inSz - *inOutIdx, key) != 0)
+ return ASN_ECC_KEY_E;
+
+ return 0;
+}
+
+
+#ifdef WOLFSSL_KEY_GEN
+
+/* build DER formatted ED25519 key,
+ * return length on success, negative on error */
+static int wc_BuildEd25519KeyDer(ed25519_key* key, byte* output, word32 inLen,
+ int pubOut)
+{
+ byte algoArray[MAX_ALGO_SZ];
+ byte ver[MAX_VERSION_SZ];
+ byte seq[MAX_SEQ_SZ];
+ int ret;
+ word32 idx = 0, seqSz, verSz, algoSz, privSz, pubSz = 0;
+
+ if (key == NULL || output == NULL || inLen == 0)
+ return BAD_FUNC_ARG;
+
+ if (pubOut)
+ pubSz = 2 + 2 + ED25519_PUB_KEY_SIZE;
+ privSz = 2 + 2 + ED25519_KEY_SIZE;
+ algoSz = SetAlgoID(ED25519k, algoArray, oidKeyType, 0);
+ verSz = SetMyVersion(0, ver, FALSE);
+ seqSz = SetSequence(verSz + algoSz + privSz + pubSz, seq);
+
+ if (seqSz + verSz + algoSz + privSz + pubSz > inLen)
+ return BAD_FUNC_ARG;
+
+ /* write out */
+ /* seq */
+ XMEMCPY(output + idx, seq, seqSz);
+ idx = seqSz;
+ /* ver */
+ XMEMCPY(output + idx, ver, verSz);
+ idx += verSz;
+ /* algo */
+ XMEMCPY(output + idx, algoArray, algoSz);
+ idx += algoSz;
+ /* privKey */
+ idx += SetOctetString(2 + ED25519_KEY_SIZE, output + idx);
+ idx += SetOctetString(ED25519_KEY_SIZE, output + idx);
+ ret = wc_ed25519_export_private_only(key, output + idx, &privSz);
+ if (ret != 0)
+ return ret;
+ idx += privSz;
+ /* pubKey */
+ if (pubOut) {
+ idx += SetExplicit(1, 2 + ED25519_PUB_KEY_SIZE, output + idx);
+ idx += SetOctetString(ED25519_KEY_SIZE, output + idx);
+ ret = wc_ed25519_export_public(key, output + idx, &pubSz);
+ if (ret != 0)
+ return ret;
+ idx += pubSz;
}
- /* idx += pubSz if do more later */
- return totalSz;
+ return idx;
+}
+
+/* Write a Private ecc key, including public to DER format,
+ * length on success else < 0 */
+int wc_Ed25519KeyToDer(ed25519_key* key, byte* output, word32 inLen)
+{
+ return wc_BuildEd25519KeyDer(key, output, inLen, 1);
+}
+
+
+
+/* Write only private ecc key to DER format,
+ * length on success else < 0 */
+int wc_Ed25519PrivateKeyToDer(ed25519_key* key, byte* output, word32 inLen)
+{
+ return wc_BuildEd25519KeyDer(key, output, inLen, 0);
}
#endif /* WOLFSSL_KEY_GEN */
-#endif /* HAVE_ECC */
+#endif /* HAVE_ED25519 */
+#ifdef HAVE_ED448
+
+int wc_Ed448PrivateKeyDecode(const byte* input, word32* inOutIdx,
+ ed448_key* key, word32 inSz)
+{
+ word32 oid;
+ int ret, version, length, endKeyIdx, privSz, pubSz;
+ const byte* priv;
+ const byte* pub;
+
+ if (input == NULL || inOutIdx == NULL || key == NULL || inSz == 0)
+ return BAD_FUNC_ARG;
+
+ if (GetSequence(input, inOutIdx, &length, inSz) >= 0) {
+ endKeyIdx = *inOutIdx + length;
+
+ if (GetMyVersion(input, inOutIdx, &version, inSz) < 0)
+ return ASN_PARSE_E;
+ if (version != 0) {
+ WOLFSSL_MSG("Unrecognized version of ED448 private key");
+ return ASN_PARSE_E;
+ }
+
+ if (GetAlgoId(input, inOutIdx, &oid, oidKeyType, inSz) < 0)
+ return ASN_PARSE_E;
+ if (oid != ED448k)
+ return ASN_PARSE_E;
+
+ if (GetOctetString(input, inOutIdx, &length, inSz) < 0)
+ return ASN_PARSE_E;
+
+ if (GetOctetString(input, inOutIdx, &privSz, inSz) < 0)
+ return ASN_PARSE_E;
+
+ priv = input + *inOutIdx;
+ *inOutIdx += privSz;
+ }
+ else {
+ if (GetOctetString(input, inOutIdx, &privSz, inSz) < 0)
+ return ASN_PARSE_E;
+
+ if (privSz != 57)
+ return ASN_PARSE_E;
+
+ priv = input + *inOutIdx;
+ *inOutIdx += privSz;
+ endKeyIdx = *inOutIdx;
+ }
+
+ if (endKeyIdx == (int)*inOutIdx) {
+ ret = wc_ed448_import_private_only(priv, privSz, key);
+ }
+ else {
+ if (GetASNHeader(input, ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 1,
+ inOutIdx, &length, inSz) < 0) {
+ return ASN_PARSE_E;
+ }
+ if (GetOctetString(input, inOutIdx, &pubSz, inSz) < 0)
+ return ASN_PARSE_E;
+ pub = input + *inOutIdx;
+ *inOutIdx += pubSz;
+
+ ret = wc_ed448_import_private_key(priv, privSz, pub, pubSz, key);
+ }
+ if (ret == 0 && endKeyIdx != (int)*inOutIdx)
+ return ASN_PARSE_E;
+
+ return ret;
+}
+
+
+int wc_Ed448PublicKeyDecode(const byte* input, word32* inOutIdx,
+ ed448_key* key, word32 inSz)
+{
+ int length;
+ int ret;
+
+ if (input == NULL || inOutIdx == NULL || key == NULL || inSz == 0)
+ return BAD_FUNC_ARG;
+
+ if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+ return ASN_PARSE_E;
+
+ if (GetSequence(input, inOutIdx, &length, inSz) < 0)
+ return ASN_PARSE_E;
+
+ ret = SkipObjectId(input, inOutIdx, inSz);
+ if (ret != 0)
+ return ret;
+
+ /* key header */
+ ret = CheckBitString(input, inOutIdx, NULL, inSz, 1, NULL);
+ if (ret != 0)
+ return ret;
+
+ /* This is the raw point data compressed or uncompressed. */
+ if (wc_ed448_import_public(input + *inOutIdx, inSz - *inOutIdx, key) != 0)
+ return ASN_ECC_KEY_E;
+
+ return 0;
+}
+
+
+#ifdef WOLFSSL_KEY_GEN
+
+/* build DER formatted ED448 key,
+ * return length on success, negative on error */
+static int wc_BuildEd448KeyDer(ed448_key* key, byte* output, word32 inLen,
+ int pubOut)
+{
+ byte algoArray[MAX_ALGO_SZ];
+ byte ver[MAX_VERSION_SZ];
+ byte seq[MAX_SEQ_SZ];
+ int ret;
+ word32 idx = 0, seqSz, verSz, algoSz, privSz, pubSz = 0;
+
+ if (key == NULL || output == NULL || inLen == 0)
+ return BAD_FUNC_ARG;
+
+ if (pubOut) {
+ pubSz = 2 + 2 + ED448_PUB_KEY_SIZE;
+ }
+ privSz = 2 + 2 + ED448_KEY_SIZE;
+ algoSz = SetAlgoID(ED448k, algoArray, oidKeyType, 0);
+ verSz = SetMyVersion(0, ver, FALSE);
+ seqSz = SetSequence(verSz + algoSz + privSz + pubSz, seq);
+
+ if (seqSz + verSz + algoSz + privSz + pubSz > inLen)
+ return BAD_FUNC_ARG;
+
+ /* write out */
+ /* seq */
+ XMEMCPY(output + idx, seq, seqSz);
+ idx = seqSz;
+ /* ver */
+ XMEMCPY(output + idx, ver, verSz);
+ idx += verSz;
+ /* algo */
+ XMEMCPY(output + idx, algoArray, algoSz);
+ idx += algoSz;
+ /* privKey */
+ idx += SetOctetString(2 + ED448_KEY_SIZE, output + idx);
+ idx += SetOctetString(ED448_KEY_SIZE, output + idx);
+ ret = wc_ed448_export_private_only(key, output + idx, &privSz);
+ if (ret != 0)
+ return ret;
+ idx += privSz;
+ /* pubKey */
+ if (pubOut) {
+ idx += SetExplicit(1, 2 + ED448_PUB_KEY_SIZE, output + idx);
+ idx += SetOctetString(ED448_KEY_SIZE, output + idx);
+ ret = wc_ed448_export_public(key, output + idx, &pubSz);
+ if (ret != 0)
+ return ret;
+ idx += pubSz;
+ }
+
+ return idx;
+}
+
+/* Write a Private ecc key, including public to DER format,
+ * length on success else < 0 */
+int wc_Ed448KeyToDer(ed448_key* key, byte* output, word32 inLen)
+{
+ return wc_BuildEd448KeyDer(key, output, inLen, 1);
+}
+
+
+
+/* Write only private ecc key to DER format,
+ * length on success else < 0 */
+int wc_Ed448PrivateKeyToDer(ed448_key* key, byte* output, word32 inLen)
+{
+ return wc_BuildEd448KeyDer(key, output, inLen, 0);
+}
+
+#endif /* WOLFSSL_KEY_GEN */
+
+#endif /* HAVE_ED448 */
#if defined(HAVE_OCSP) || defined(HAVE_CRL)
@@ -6832,46 +16134,47 @@ int wc_EccKeyToDer(ecc_key* key, byte* output, word32 inLen)
static int GetBasicDate(const byte* source, word32* idx, byte* date,
byte* format, int maxIdx)
{
- int length;
+ int ret, length;
+ const byte *datePtr = NULL;
WOLFSSL_ENTER("GetBasicDate");
- *format = source[*idx];
- *idx += 1;
- if (*format != ASN_UTC_TIME && *format != ASN_GENERALIZED_TIME)
- return ASN_TIME_E;
-
- if (GetLength(source, idx, &length, maxIdx) < 0)
- return ASN_PARSE_E;
-
- if (length > MAX_DATE_SIZE || length < MIN_DATE_SIZE)
- return ASN_DATE_SZ_E;
+ ret = GetDateInfo(source, idx, &datePtr, format, &length, maxIdx);
+ if (ret < 0)
+ return ret;
- XMEMCPY(date, &source[*idx], length);
- *idx += length;
+ XMEMCPY(date, datePtr, length);
return 0;
}
-#endif
+#endif /* HAVE_OCSP || HAVE_CRL */
#ifdef HAVE_OCSP
-static int GetEnumerated(const byte* input, word32* inOutIdx, int *value)
+static int GetEnumerated(const byte* input, word32* inOutIdx, int *value,
+ int sz)
{
word32 idx = *inOutIdx;
word32 len;
+ byte tag;
WOLFSSL_ENTER("GetEnumerated");
*value = 0;
- if (input[idx++] != ASN_ENUMERATED)
+ if (GetASNTag(input, &idx, &tag, sz) < 0)
+ return ASN_PARSE_E;
+
+ if (tag != ASN_ENUMERATED)
return ASN_PARSE_E;
+ if ((int)idx >= sz)
+ return BUFFER_E;
+
len = input[idx++];
- if (len > 4)
+ if (len > 4 || (int)(len + idx) > sz)
return ASN_PARSE_E;
while (len--) {
@@ -6887,9 +16190,11 @@ static int GetEnumerated(const byte* input, word32* inOutIdx, int *value)
static int DecodeSingleResponse(byte* source,
word32* ioIndex, OcspResponse* resp, word32 size)
{
- word32 idx = *ioIndex, prevIndex, oid;
+ word32 idx = *ioIndex, prevIndex, oid, localIdx;
int length, wrapperSz;
CertStatus* cs = resp->status;
+ int ret;
+ byte tag;
WOLFSSL_ENTER("DecodeSingleResponse");
@@ -6910,45 +16215,27 @@ static int DecodeSingleResponse(byte* source,
if (GetSequence(source, &idx, &length, size) < 0)
return ASN_PARSE_E;
/* Skip the hash algorithm */
- if (GetAlgoId(source, &idx, &oid, size) < 0)
+ if (GetAlgoId(source, &idx, &oid, oidIgnoreType, size) < 0)
return ASN_PARSE_E;
/* Save reference to the hash of CN */
- if (source[idx++] != ASN_OCTET_STRING)
- return ASN_PARSE_E;
- if (GetLength(source, &idx, &length, size) < 0)
- return ASN_PARSE_E;
+ ret = GetOctetString(source, &idx, &length, size);
+ if (ret < 0)
+ return ret;
resp->issuerHash = source + idx;
idx += length;
/* Save reference to the hash of the issuer public key */
- if (source[idx++] != ASN_OCTET_STRING)
- return ASN_PARSE_E;
- if (GetLength(source, &idx, &length, size) < 0)
- return ASN_PARSE_E;
+ ret = GetOctetString(source, &idx, &length, size);
+ if (ret < 0)
+ return ret;
resp->issuerKeyHash = source + idx;
idx += length;
- /* Read the serial number, it is handled as a string, not as a
- * proper number. Just XMEMCPY the data over, rather than load it
- * as an mp_int. */
- if (source[idx++] != ASN_INTEGER)
- return ASN_PARSE_E;
- if (GetLength(source, &idx, &length, size) < 0)
+ /* Get serial number */
+ if (GetSerialNumber(source, &idx, cs->serial, &cs->serialSz, size) < 0)
return ASN_PARSE_E;
- if (length <= EXTERNAL_SERIAL_SIZE)
- {
- if (source[idx] == 0)
- {
- idx++;
- length--;
- }
- XMEMCPY(cs->serial, source + idx, length);
- cs->serialSz = length;
- }
- else
- {
- return ASN_GETINT_E;
- }
- idx += length;
+
+ if ( idx >= size )
+ return BUFFER_E;
/* CertStatus */
switch (source[idx++])
@@ -6971,27 +16258,66 @@ static int DecodeSingleResponse(byte* source,
return ASN_PARSE_E;
}
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY)
+ cs->thisDateAsn = source + idx;
+ localIdx = 0;
+ if (GetDateInfo(cs->thisDateAsn, &localIdx, NULL,
+ (byte*)&cs->thisDateParsed.type,
+ &cs->thisDateParsed.length, size) < 0)
+ return ASN_PARSE_E;
+ XMEMCPY(cs->thisDateParsed.data,
+ cs->thisDateAsn + localIdx - cs->thisDateParsed.length,
+ cs->thisDateParsed.length);
+#endif
if (GetBasicDate(source, &idx, cs->thisDate,
&cs->thisDateFormat, size) < 0)
return ASN_PARSE_E;
+
+#ifndef NO_ASN_TIME
+#ifndef WOLFSSL_NO_OCSP_DATE_CHECK
if (!XVALIDATE_DATE(cs->thisDate, cs->thisDateFormat, BEFORE))
return ASN_BEFORE_DATE_E;
-
+#endif
+#endif
+
/* The following items are optional. Only check for them if there is more
* unprocessed data in the singleResponse wrapper. */
-
+
+ localIdx = idx;
if (((int)(idx - prevIndex) < wrapperSz) &&
- (source[idx] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)))
+ GetASNTag(source, &localIdx, &tag, size) == 0 &&
+ tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
{
idx++;
if (GetLength(source, &idx, &length, size) < 0)
return ASN_PARSE_E;
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY)
+ cs->nextDateAsn = source + idx;
+ localIdx = 0;
+ if (GetDateInfo(cs->nextDateAsn, &localIdx, NULL,
+ (byte*)&cs->nextDateParsed.type,
+ &cs->nextDateParsed.length, size) < 0)
+ return ASN_PARSE_E;
+ XMEMCPY(cs->nextDateParsed.data,
+ cs->nextDateAsn + localIdx - cs->nextDateParsed.length,
+ cs->nextDateParsed.length);
+#endif
if (GetBasicDate(source, &idx, cs->nextDate,
&cs->nextDateFormat, size) < 0)
return ASN_PARSE_E;
+
+#ifndef NO_ASN_TIME
+#ifndef WOLFSSL_NO_OCSP_DATE_CHECK
+ if (!XVALIDATE_DATE(cs->nextDate, cs->nextDateFormat, AFTER))
+ return ASN_AFTER_DATE_E;
+#endif
+#endif
}
+
+ localIdx = idx;
if (((int)(idx - prevIndex) < wrapperSz) &&
- (source[idx] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)))
+ GetASNTag(source, &localIdx, &tag, size) == 0 &&
+ tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1))
{
idx++;
if (GetLength(source, &idx, &length, size) < 0)
@@ -7011,48 +16337,66 @@ static int DecodeOcspRespExtensions(byte* source,
int length;
int ext_bound; /* boundary index for the sequence of extensions */
word32 oid;
+ int ret;
+ byte tag;
WOLFSSL_ENTER("DecodeOcspRespExtensions");
- if (source[idx++] != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1))
+ if ((idx + 1) > sz)
+ return BUFFER_E;
+
+ if (GetASNTag(source, &idx, &tag, sz) < 0)
+ return ASN_PARSE_E;
+
+ if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1))
return ASN_PARSE_E;
- if (GetLength(source, &idx, &length, sz) < 0) return ASN_PARSE_E;
+ if (GetLength(source, &idx, &length, sz) < 0)
+ return ASN_PARSE_E;
+
+ if (GetSequence(source, &idx, &length, sz) < 0)
+ return ASN_PARSE_E;
- if (GetSequence(source, &idx, &length, sz) < 0) return ASN_PARSE_E;
-
ext_bound = idx + length;
while (idx < (word32)ext_bound) {
+ word32 localIdx;
+
if (GetSequence(source, &idx, &length, sz) < 0) {
WOLFSSL_MSG("\tfail: should be a SEQUENCE");
return ASN_PARSE_E;
}
oid = 0;
- if (GetObjectId(source, &idx, &oid, sz) < 0) {
+ if (GetObjectId(source, &idx, &oid, oidOcspType, sz) < 0) {
WOLFSSL_MSG("\tfail: OBJECT ID");
return ASN_PARSE_E;
}
/* check for critical flag */
- if (source[idx] == ASN_BOOLEAN) {
- WOLFSSL_MSG("\tfound optional critical flag, moving past");
- idx += (ASN_BOOL_SIZE + 1);
+ if ((idx + 1) > (word32)sz) {
+ WOLFSSL_MSG("\tfail: malformed buffer");
+ return BUFFER_E;
}
- /* process the extension based on the OID */
- if (source[idx++] != ASN_OCTET_STRING) {
- WOLFSSL_MSG("\tfail: should be an OCTET STRING");
- return ASN_PARSE_E;
+ localIdx = idx;
+ if (GetASNTag(source, &localIdx, &tag, sz) == 0 && tag == ASN_BOOLEAN) {
+ WOLFSSL_MSG("\tfound optional critical flag, moving past");
+ ret = GetBoolean(source, &idx, sz);
+ if (ret < 0)
+ return ret;
}
- if (GetLength(source, &idx, &length, sz) < 0) {
- WOLFSSL_MSG("\tfail: extension data length");
- return ASN_PARSE_E;
- }
+ ret = GetOctetString(source, &idx, &length, sz);
+ if (ret < 0)
+ return ret;
if (oid == OCSP_NONCE_OID) {
+ /* get data inside extra OCTET_STRING */
+ ret = GetOctetString(source, &idx, &length, sz);
+ if (ret < 0)
+ return ret;
+
resp->nonce = source + idx;
resp->nonceSz = length;
}
@@ -7068,10 +16412,11 @@ static int DecodeOcspRespExtensions(byte* source,
static int DecodeResponseData(byte* source,
word32* ioIndex, OcspResponse* resp, word32 size)
{
- word32 idx = *ioIndex, prev_idx;
+ word32 idx = *ioIndex, prev_idx, localIdx;
int length;
int version;
- word32 responderId = 0;
+ int ret;
+ byte tag;
WOLFSSL_ENTER("DecodeResponseData");
@@ -7085,49 +16430,64 @@ static int DecodeResponseData(byte* source,
* item isn't an EXPLICIT[0], then set version to zero and move
* onto the next item.
*/
- if (source[idx] == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED))
- {
+ localIdx = idx;
+ if (GetASNTag(source, &localIdx, &tag, size) == 0 &&
+ tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED))
+ {
idx += 2; /* Eat the value and length */
- if (GetMyVersion(source, &idx, &version) < 0)
+ if (GetMyVersion(source, &idx, &version, size) < 0)
return ASN_PARSE_E;
} else
version = 0;
- responderId = source[idx++];
- if ((responderId == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 1)) ||
- (responderId == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 2)))
+ localIdx = idx;
+ if (GetASNTag(source, &localIdx, &tag, size) == 0 &&
+ ( tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 1) ||
+ tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 2) ))
{
+ idx++; /* advance past ASN tag */
if (GetLength(source, &idx, &length, size) < 0)
return ASN_PARSE_E;
idx += length;
}
else
return ASN_PARSE_E;
-
+
/* save pointer to the producedAt time */
if (GetBasicDate(source, &idx, resp->producedDate,
&resp->producedDateFormat, size) < 0)
return ASN_PARSE_E;
- if (DecodeSingleResponse(source, &idx, resp, size) < 0)
- return ASN_PARSE_E;
+ if ((ret = DecodeSingleResponse(source, &idx, resp, size)) < 0)
+ return ret; /* ASN_PARSE_E, ASN_BEFORE_DATE_E, ASN_AFTER_DATE_E */
- if (DecodeOcspRespExtensions(source, &idx, resp, size) < 0)
- return ASN_PARSE_E;
+ /*
+ * Check the length of the ResponseData against the current index to
+ * see if there are extensions, they are optional.
+ */
+ if (idx - prev_idx < resp->responseSz)
+ if (DecodeOcspRespExtensions(source, &idx, resp, size) < 0)
+ return ASN_PARSE_E;
*ioIndex = idx;
return 0;
}
+#ifndef WOLFSSL_NO_OCSP_OPTIONAL_CERTS
+
static int DecodeCerts(byte* source,
word32* ioIndex, OcspResponse* resp, word32 size)
{
word32 idx = *ioIndex;
+ byte tag;
WOLFSSL_ENTER("DecodeCerts");
- if (source[idx++] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC))
+ if (GetASNTag(source, &idx, &tag, size) < 0)
+ return ASN_PARSE_E;
+
+ if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC))
{
int length;
@@ -7146,14 +16506,20 @@ static int DecodeCerts(byte* source,
return 0;
}
-static int DecodeBasicOcspResponse(byte* source,
- word32* ioIndex, OcspResponse* resp, word32 size)
+#endif /* WOLFSSL_NO_OCSP_OPTIONAL_CERTS */
+
+
+static int DecodeBasicOcspResponse(byte* source, word32* ioIndex,
+ OcspResponse* resp, word32 size, void* cm, void* heap, int noVerify)
{
- int length;
+ int length;
word32 idx = *ioIndex;
word32 end_index;
+ int ret;
+ int sigLength;
WOLFSSL_ENTER("DecodeBasicOcspResponse");
+ (void)heap;
if (GetSequence(source, &idx, &length, size) < 0)
return ASN_PARSE_E;
@@ -7162,51 +16528,102 @@ static int DecodeBasicOcspResponse(byte* source,
return ASN_INPUT_E;
end_index = idx + length;
- if (DecodeResponseData(source, &idx, resp, size) < 0)
- return ASN_PARSE_E;
-
+ if ((ret = DecodeResponseData(source, &idx, resp, size)) < 0)
+ return ret; /* ASN_PARSE_E, ASN_BEFORE_DATE_E, ASN_AFTER_DATE_E */
+
/* Get the signature algorithm */
- if (GetAlgoId(source, &idx, &resp->sigOID, size) < 0)
+ if (GetAlgoId(source, &idx, &resp->sigOID, oidSigType, size) < 0)
return ASN_PARSE_E;
- /* Obtain pointer to the start of the signature, and save the size */
- if (source[idx++] == ASN_BIT_STRING)
- {
- int sigLength = 0;
- if (GetLength(source, &idx, &sigLength, size) < 0)
- return ASN_PARSE_E;
- resp->sigSz = sigLength;
- resp->sig = source + idx;
- idx += sigLength;
- }
+ ret = CheckBitString(source, &idx, &sigLength, size, 1, NULL);
+ if (ret != 0)
+ return ret;
+
+ resp->sigSz = sigLength;
+ resp->sig = source + idx;
+ idx += sigLength;
/*
* Check the length of the BasicOcspResponse against the current index to
* see if there are certificates, they are optional.
*/
+#ifndef WOLFSSL_NO_OCSP_OPTIONAL_CERTS
if (idx < end_index)
{
DecodedCert cert;
- int ret;
if (DecodeCerts(source, &idx, resp, size) < 0)
return ASN_PARSE_E;
- InitDecodedCert(&cert, resp->cert, resp->certSz, 0);
- ret = ParseCertRelative(&cert, CA_TYPE, NO_VERIFY, 0);
- if (ret < 0)
+ InitDecodedCert(&cert, resp->cert, resp->certSz, heap);
+
+ /* Don't verify if we don't have access to Cert Manager. */
+ ret = ParseCertRelative(&cert, CERT_TYPE,
+ noVerify ? NO_VERIFY : VERIFY_OCSP, cm);
+ if (ret < 0) {
+ WOLFSSL_MSG("\tOCSP Responder certificate parsing failed");
+ FreeDecodedCert(&cert);
return ret;
+ }
+
+#ifndef WOLFSSL_NO_OCSP_ISSUER_CHECK
+ if ((cert.extExtKeyUsage & EXTKEYUSE_OCSP_SIGN) == 0) {
+ if (XMEMCMP(cert.subjectHash,
+ resp->issuerHash, KEYID_SIZE) == 0) {
+ WOLFSSL_MSG("\tOCSP Response signed by issuer");
+ }
+ else {
+ WOLFSSL_MSG("\tOCSP Responder key usage check failed");
+ #ifdef OPENSSL_EXTRA
+ resp->verifyError = OCSP_BAD_ISSUER;
+ #else
+ FreeDecodedCert(&cert);
+ return BAD_OCSP_RESPONDER;
+ #endif
+ }
+ }
+#endif
+
+ /* ConfirmSignature is blocking here */
+ ret = ConfirmSignature(&cert.sigCtx,
+ resp->response, resp->responseSz,
+ cert.publicKey, cert.pubKeySize, cert.keyOID,
+ resp->sig, resp->sigSz, resp->sigOID, NULL);
- ret = ConfirmSignature(resp->response, resp->responseSz,
- cert.publicKey, cert.pubKeySize, cert.keyOID,
- resp->sig, resp->sigSz, resp->sigOID, NULL);
FreeDecodedCert(&cert);
- if (ret == 0)
- {
+ if (ret != 0) {
+ WOLFSSL_MSG("\tOCSP Confirm signature failed");
+ return ASN_OCSP_CONFIRM_E;
+ }
+ }
+ else
+#endif /* WOLFSSL_NO_OCSP_OPTIONAL_CERTS */
+ {
+ Signer* ca;
+ int sigValid = -1;
+
+ #ifndef NO_SKID
+ ca = GetCA(cm, resp->issuerKeyHash);
+ #else
+ ca = GetCA(cm, resp->issuerHash);
+ #endif
+
+ if (ca) {
+ SignatureCtx sigCtx;
+ InitSignatureCtx(&sigCtx, heap, INVALID_DEVID);
+
+ /* ConfirmSignature is blocking here */
+ sigValid = ConfirmSignature(&sigCtx, resp->response,
+ resp->responseSz, ca->publicKey, ca->pubKeySize, ca->keyOID,
+ resp->sig, resp->sigSz, resp->sigOID, NULL);
+ }
+ if (ca == NULL || sigValid != 0) {
WOLFSSL_MSG("\tOCSP Confirm signature failed");
return ASN_OCSP_CONFIRM_E;
}
+
+ (void)noVerify;
}
*ioIndex = idx;
@@ -7219,39 +16636,34 @@ void InitOcspResponse(OcspResponse* resp, CertStatus* status,
{
WOLFSSL_ENTER("InitOcspResponse");
+ XMEMSET(status, 0, sizeof(CertStatus));
+ XMEMSET(resp, 0, sizeof(OcspResponse));
+
resp->responseStatus = -1;
- resp->response = NULL;
- resp->responseSz = 0;
- resp->producedDateFormat = 0;
- resp->issuerHash = NULL;
- resp->issuerKeyHash = NULL;
- resp->sig = NULL;
- resp->sigSz = 0;
- resp->sigOID = 0;
- resp->status = status;
- resp->nonce = NULL;
- resp->nonceSz = 0;
- resp->source = source;
- resp->maxIdx = inSz;
+ resp->status = status;
+ resp->source = source;
+ resp->maxIdx = inSz;
}
-int OcspResponseDecode(OcspResponse* resp)
+int OcspResponseDecode(OcspResponse* resp, void* cm, void* heap, int noVerify)
{
+ int ret;
int length = 0;
word32 idx = 0;
byte* source = resp->source;
word32 size = resp->maxIdx;
word32 oid;
+ byte tag;
WOLFSSL_ENTER("OcspResponseDecode");
/* peel the outer SEQUENCE wrapper */
if (GetSequence(source, &idx, &length, size) < 0)
return ASN_PARSE_E;
-
+
/* First get the responseStatus, an ENUMERATED */
- if (GetEnumerated(source, &idx, &resp->responseStatus) < 0)
+ if (GetEnumerated(source, &idx, &resp->responseStatus, size) < 0)
return ASN_PARSE_E;
if (resp->responseStatus != OCSP_SUCCESSFUL)
@@ -7260,7 +16672,9 @@ int OcspResponseDecode(OcspResponse* resp)
/* Next is an EXPLICIT record called ResponseBytes, OPTIONAL */
if (idx >= size)
return ASN_INPUT_E;
- if (source[idx++] != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC))
+ if (GetASNTag(source, &idx, &tag, size) < 0)
+ return ASN_PARSE_E;
+ if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC))
return ASN_PARSE_E;
if (GetLength(source, &idx, &length, size) < 0)
return ASN_PARSE_E;
@@ -7270,77 +16684,72 @@ int OcspResponseDecode(OcspResponse* resp)
return ASN_PARSE_E;
/* Check ObjectID for the resposeBytes */
- if (GetObjectId(source, &idx, &oid, size) < 0)
+ if (GetObjectId(source, &idx, &oid, oidOcspType, size) < 0)
return ASN_PARSE_E;
if (oid != OCSP_BASIC_OID)
return ASN_PARSE_E;
- if (source[idx++] != ASN_OCTET_STRING)
- return ASN_PARSE_E;
+ ret = GetOctetString(source, &idx, &length, size);
+ if (ret < 0)
+ return ret;
- if (GetLength(source, &idx, &length, size) < 0)
- return ASN_PARSE_E;
+ ret = DecodeBasicOcspResponse(source, &idx, resp, size, cm, heap, noVerify);
+ if (ret < 0)
+ return ret;
- if (DecodeBasicOcspResponse(source, &idx, resp, size) < 0)
- return ASN_PARSE_E;
-
return 0;
}
-static word32 SetOcspReqExtensions(word32 extSz, byte* output,
- const byte* nonce, word32 nonceSz)
+word32 EncodeOcspRequestExtensions(OcspRequest* req, byte* output, word32 size)
{
- static const byte NonceObjId[] = { 0x2b, 0x06, 0x01, 0x05, 0x05, 0x07,
+ const byte NonceObjId[] = { 0x2b, 0x06, 0x01, 0x05, 0x05, 0x07,
0x30, 0x01, 0x02 };
byte seqArray[5][MAX_SEQ_SZ];
- word32 seqSz[5], totalSz;
+ word32 seqSz[5], totalSz = (word32)sizeof(NonceObjId);
WOLFSSL_ENTER("SetOcspReqExtensions");
- if (nonce == NULL || nonceSz == 0) return 0;
-
- seqArray[0][0] = ASN_OCTET_STRING;
- seqSz[0] = 1 + SetLength(nonceSz, &seqArray[0][1]);
+ if (!req || !output || !req->nonceSz)
+ return 0;
+
+ totalSz += req->nonceSz;
+ totalSz += seqSz[0] = SetOctetString(req->nonceSz, seqArray[0]);
+ totalSz += seqSz[1] = SetOctetString(req->nonceSz + seqSz[0], seqArray[1]);
+ totalSz += seqSz[2] = SetObjectId(sizeof(NonceObjId), seqArray[2]);
+ totalSz += seqSz[3] = SetSequence(totalSz, seqArray[3]);
+ totalSz += seqSz[4] = SetSequence(totalSz, seqArray[4]);
- seqArray[1][0] = ASN_OBJECT_ID;
- seqSz[1] = 1 + SetLength(sizeof(NonceObjId), &seqArray[1][1]);
+ if (totalSz > size)
+ return 0;
- totalSz = seqSz[0] + seqSz[1] + nonceSz + (word32)sizeof(NonceObjId);
+ totalSz = 0;
- seqSz[2] = SetSequence(totalSz, seqArray[2]);
- totalSz += seqSz[2];
+ XMEMCPY(output + totalSz, seqArray[4], seqSz[4]);
+ totalSz += seqSz[4];
- seqSz[3] = SetSequence(totalSz, seqArray[3]);
+ XMEMCPY(output + totalSz, seqArray[3], seqSz[3]);
totalSz += seqSz[3];
- seqArray[4][0] = (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 2);
- seqSz[4] = 1 + SetLength(totalSz, &seqArray[4][1]);
- totalSz += seqSz[4];
+ XMEMCPY(output + totalSz, seqArray[2], seqSz[2]);
+ totalSz += seqSz[2];
- if (totalSz < extSz)
- {
- totalSz = 0;
- XMEMCPY(output + totalSz, seqArray[4], seqSz[4]);
- totalSz += seqSz[4];
- XMEMCPY(output + totalSz, seqArray[3], seqSz[3]);
- totalSz += seqSz[3];
- XMEMCPY(output + totalSz, seqArray[2], seqSz[2]);
- totalSz += seqSz[2];
- XMEMCPY(output + totalSz, seqArray[1], seqSz[1]);
- totalSz += seqSz[1];
- XMEMCPY(output + totalSz, NonceObjId, sizeof(NonceObjId));
- totalSz += (word32)sizeof(NonceObjId);
- XMEMCPY(output + totalSz, seqArray[0], seqSz[0]);
- totalSz += seqSz[0];
- XMEMCPY(output + totalSz, nonce, nonceSz);
- totalSz += nonceSz;
- }
+ XMEMCPY(output + totalSz, NonceObjId, sizeof(NonceObjId));
+ totalSz += (word32)sizeof(NonceObjId);
+
+ XMEMCPY(output + totalSz, seqArray[1], seqSz[1]);
+ totalSz += seqSz[1];
+
+ XMEMCPY(output + totalSz, seqArray[0], seqSz[0]);
+ totalSz += seqSz[0];
+
+ XMEMCPY(output + totalSz, req->nonce, req->nonceSz);
+ totalSz += req->nonceSz;
return totalSz;
}
-int EncodeOcspRequest(OcspRequest* req)
+int EncodeOcspRequest(OcspRequest* req, byte* output, word32 size)
{
byte seqArray[5][MAX_SEQ_SZ];
/* The ASN.1 of the OCSP Request is an onion of sequences */
@@ -7349,66 +16758,65 @@ int EncodeOcspRequest(OcspRequest* req)
byte issuerKeyArray[MAX_ENCODED_DIG_SZ];
byte snArray[MAX_SN_SZ];
byte extArray[MAX_OCSP_EXT_SZ];
- byte* output = req->dest;
- word32 seqSz[5], algoSz, issuerSz, issuerKeySz, snSz, extSz, totalSz;
- int i;
+ word32 seqSz[5], algoSz, issuerSz, issuerKeySz, extSz, totalSz;
+ int i, snSz;
WOLFSSL_ENTER("EncodeOcspRequest");
#ifdef NO_SHA
- algoSz = SetAlgoID(SHA256h, algoArray, hashType, 0);
+ algoSz = SetAlgoID(SHA256h, algoArray, oidHashType, 0);
#else
- algoSz = SetAlgoID(SHAh, algoArray, hashType, 0);
+ algoSz = SetAlgoID(SHAh, algoArray, oidHashType, 0);
#endif
- req->issuerHash = req->cert->issuerHash;
- issuerSz = SetDigest(req->cert->issuerHash, KEYID_SIZE, issuerArray);
-
- req->issuerKeyHash = req->cert->issuerKeyHash;
- issuerKeySz = SetDigest(req->cert->issuerKeyHash,
- KEYID_SIZE, issuerKeyArray);
-
- req->serial = req->cert->serial;
- req->serialSz = req->cert->serialSz;
- snSz = SetSerialNumber(req->cert->serial, req->cert->serialSz, snArray);
-
- extSz = 0;
- if (req->useNonce) {
- RNG rng;
- if (wc_InitRng(&rng) != 0) {
- WOLFSSL_MSG("\tCannot initialize RNG. Skipping the OSCP Nonce.");
- } else {
- if (wc_RNG_GenerateBlock(&rng, req->nonce, MAX_OCSP_NONCE_SZ) != 0)
- WOLFSSL_MSG("\tCannot run RNG. Skipping the OSCP Nonce.");
- else {
- req->nonceSz = MAX_OCSP_NONCE_SZ;
- extSz = SetOcspReqExtensions(MAX_OCSP_EXT_SZ, extArray,
- req->nonce, req->nonceSz);
- }
- wc_FreeRng(&rng);
- }
+ issuerSz = SetDigest(req->issuerHash, KEYID_SIZE, issuerArray);
+ issuerKeySz = SetDigest(req->issuerKeyHash, KEYID_SIZE, issuerKeyArray);
+ snSz = SetSerialNumber(req->serial, req->serialSz, snArray,
+ MAX_SN_SZ, MAX_SN_SZ);
+ extSz = 0;
+
+ if (snSz < 0)
+ return snSz;
+
+ if (req->nonceSz) {
+ /* TLS Extensions use this function too - put extensions after
+ * ASN.1: Context Specific [2].
+ */
+ extSz = EncodeOcspRequestExtensions(req, extArray + 2,
+ OCSP_NONCE_EXT_SZ);
+ extSz += SetExplicit(2, extSz, extArray);
}
totalSz = algoSz + issuerSz + issuerKeySz + snSz;
-
for (i = 4; i >= 0; i--) {
seqSz[i] = SetSequence(totalSz, seqArray[i]);
totalSz += seqSz[i];
if (i == 2) totalSz += extSz;
}
+
+ if (output == NULL)
+ return totalSz;
+ if (totalSz > size)
+ return BUFFER_E;
+
totalSz = 0;
for (i = 0; i < 5; i++) {
XMEMCPY(output + totalSz, seqArray[i], seqSz[i]);
totalSz += seqSz[i];
}
+
XMEMCPY(output + totalSz, algoArray, algoSz);
totalSz += algoSz;
+
XMEMCPY(output + totalSz, issuerArray, issuerSz);
totalSz += issuerSz;
+
XMEMCPY(output + totalSz, issuerKeyArray, issuerKeySz);
totalSz += issuerKeySz;
+
XMEMCPY(output + totalSz, snArray, snSz);
totalSz += snSz;
+
if (extSz != 0) {
XMEMCPY(output + totalSz, extArray, extSz);
totalSz += extSz;
@@ -7418,19 +16826,91 @@ int EncodeOcspRequest(OcspRequest* req)
}
-void InitOcspRequest(OcspRequest* req, DecodedCert* cert, byte useNonce,
- byte* dest, word32 destSz)
+int InitOcspRequest(OcspRequest* req, DecodedCert* cert, byte useNonce,
+ void* heap)
{
+ int ret;
+
WOLFSSL_ENTER("InitOcspRequest");
- req->cert = cert;
- req->useNonce = useNonce;
- req->nonceSz = 0;
- req->issuerHash = NULL;
- req->issuerKeyHash = NULL;
- req->serial = NULL;
- req->dest = dest;
- req->destSz = destSz;
+ if (req == NULL)
+ return BAD_FUNC_ARG;
+
+ ForceZero(req, sizeof(OcspRequest));
+ req->heap = heap;
+
+ if (cert) {
+ XMEMCPY(req->issuerHash, cert->issuerHash, KEYID_SIZE);
+ XMEMCPY(req->issuerKeyHash, cert->issuerKeyHash, KEYID_SIZE);
+
+ req->serial = (byte*)XMALLOC(cert->serialSz, req->heap,
+ DYNAMIC_TYPE_OCSP_REQUEST);
+ if (req->serial == NULL)
+ return MEMORY_E;
+
+ XMEMCPY(req->serial, cert->serial, cert->serialSz);
+ req->serialSz = cert->serialSz;
+
+ if (cert->extAuthInfoSz != 0 && cert->extAuthInfo != NULL) {
+ req->url = (byte*)XMALLOC(cert->extAuthInfoSz + 1, req->heap,
+ DYNAMIC_TYPE_OCSP_REQUEST);
+ if (req->url == NULL) {
+ XFREE(req->serial, req->heap, DYNAMIC_TYPE_OCSP);
+ return MEMORY_E;
+ }
+
+ XMEMCPY(req->url, cert->extAuthInfo, cert->extAuthInfoSz);
+ req->urlSz = cert->extAuthInfoSz;
+ req->url[req->urlSz] = 0;
+ }
+ }
+
+ if (useNonce) {
+ WC_RNG rng;
+
+ #ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, req->heap, INVALID_DEVID);
+ #else
+ ret = wc_InitRng(&rng);
+ #endif
+ if (ret != 0) {
+ WOLFSSL_MSG("\tCannot initialize RNG. Skipping the OSCP Nonce.");
+ } else {
+ if (wc_RNG_GenerateBlock(&rng, req->nonce, MAX_OCSP_NONCE_SZ) != 0)
+ WOLFSSL_MSG("\tCannot run RNG. Skipping the OSCP Nonce.");
+ else
+ req->nonceSz = MAX_OCSP_NONCE_SZ;
+
+ wc_FreeRng(&rng);
+ }
+ }
+
+ return 0;
+}
+
+void FreeOcspRequest(OcspRequest* req)
+{
+ WOLFSSL_ENTER("FreeOcspRequest");
+
+ if (req) {
+ if (req->serial)
+ XFREE(req->serial, req->heap, DYNAMIC_TYPE_OCSP_REQUEST);
+ req->serial = NULL;
+
+#ifdef OPENSSL_EXTRA
+ if (req->serialInt) {
+ if (req->serialInt->isDynamic) {
+ XFREE(req->serialInt->data, NULL, DYNAMIC_TYPE_OPENSSL);
+ }
+ XFREE(req->serialInt, NULL, DYNAMIC_TYPE_OPENSSL);
+ }
+ req->serialInt = NULL;
+#endif
+
+ if (req->url)
+ XFREE(req->url, req->heap, DYNAMIC_TYPE_OCSP_REQUEST);
+ req->url = NULL;
+ }
}
@@ -7454,14 +16934,18 @@ int CompareOcspReqResp(OcspRequest* req, OcspResponse* resp)
/* Nonces are not critical. The responder may not necessarily add
* the nonce to the response. */
- if (req->useNonce && resp->nonceSz != 0) {
+ if (req->nonceSz
+#ifndef WOLFSSL_FORCE_OCSP_NONCE_CHECK
+ && resp->nonceSz != 0
+#endif
+ ) {
cmp = req->nonceSz - resp->nonceSz;
if (cmp != 0)
{
WOLFSSL_MSG("\tnonceSz mismatch");
return cmp;
}
-
+
cmp = XMEMCMP(req->nonce, resp->nonce, req->nonceSz);
if (cmp != 0)
{
@@ -7501,20 +16985,22 @@ int CompareOcspReqResp(OcspRequest* req, OcspResponse* resp)
return 0;
}
-#endif
+#endif /* HAVE_OCSP */
-/* store SHA hash of NAME */
-WOLFSSL_LOCAL int GetNameHash(const byte* source, word32* idx, byte* hash,
+/* store WC_SHA hash of NAME */
+int GetNameHash(const byte* source, word32* idx, byte* hash,
int maxIdx)
{
int length; /* length of all distinguished names */
int ret;
word32 dummy;
+ byte tag;
WOLFSSL_ENTER("GetNameHash");
- if (source[*idx] == ASN_OBJECT_ID) {
+ dummy = *idx;
+ if (GetASNTag(source, &dummy, &tag, maxIdx) == 0 && tag == ASN_OBJECT_ID) {
WOLFSSL_MSG("Trying optional prefix...");
if (GetLength(source, idx, &length, maxIdx) < 0)
@@ -7531,11 +17017,7 @@ WOLFSSL_LOCAL int GetNameHash(const byte* source, word32* idx, byte* hash,
if (GetSequence(source, idx, &length, maxIdx) < 0)
return ASN_PARSE_E;
-#ifdef NO_SHA
- ret = wc_Sha256Hash(source + dummy, length + *idx - dummy, hash);
-#else
- ret = wc_ShaHash(source + dummy, length + *idx - dummy, hash);
-#endif
+ ret = CalcHashId(source + dummy, length + *idx - dummy, hash);
*idx += length;
@@ -7546,16 +17028,15 @@ WOLFSSL_LOCAL int GetNameHash(const byte* source, word32* idx, byte* hash,
#ifdef HAVE_CRL
/* initialize decoded CRL */
-void InitDecodedCRL(DecodedCRL* dcrl)
+void InitDecodedCRL(DecodedCRL* dcrl, void* heap)
{
WOLFSSL_MSG("InitDecodedCRL");
- dcrl->certBegin = 0;
- dcrl->sigIndex = 0;
- dcrl->sigLength = 0;
- dcrl->signatureOID = 0;
- dcrl->certs = NULL;
- dcrl->totalCerts = 0;
+ XMEMSET(dcrl, 0, sizeof(DecodedCRL));
+ dcrl->heap = heap;
+ #ifdef WOLFSSL_HEAP_TEST
+ dcrl->heap = (void*)WOLFSSL_HEAP_TEST;
+ #endif
}
@@ -7568,7 +17049,7 @@ void FreeDecodedCRL(DecodedCRL* dcrl)
while(tmp) {
RevokedCert* next = tmp->next;
- XFREE(tmp, NULL, DYNAMIC_TYPE_REVOKED);
+ XFREE(tmp, dcrl->heap, DYNAMIC_TYPE_REVOKED);
tmp = next;
}
}
@@ -7578,7 +17059,7 @@ void FreeDecodedCRL(DecodedCRL* dcrl)
static int GetRevoked(const byte* buff, word32* idx, DecodedCRL* dcrl,
int maxIdx)
{
- int len;
+ int ret, len;
word32 end;
byte b;
RevokedCert* rc;
@@ -7590,56 +17071,33 @@ static int GetRevoked(const byte* buff, word32* idx, DecodedCRL* dcrl,
end = *idx + len;
- /* get serial number */
- b = buff[*idx];
- *idx += 1;
-
- if (b != ASN_INTEGER) {
- WOLFSSL_MSG("Expecting Integer");
- return ASN_PARSE_E;
- }
-
- if (GetLength(buff, idx, &len, maxIdx) < 0)
- return ASN_PARSE_E;
-
- if (len > EXTERNAL_SERIAL_SIZE) {
- WOLFSSL_MSG("Serial Size too big");
- return ASN_PARSE_E;
- }
-
- rc = (RevokedCert*)XMALLOC(sizeof(RevokedCert), NULL, DYNAMIC_TYPE_CRL);
+ rc = (RevokedCert*)XMALLOC(sizeof(RevokedCert), dcrl->heap,
+ DYNAMIC_TYPE_REVOKED);
if (rc == NULL) {
WOLFSSL_MSG("Alloc Revoked Cert failed");
return MEMORY_E;
}
- XMEMCPY(rc->serialNumber, &buff[*idx], len);
- rc->serialSz = len;
+ if (GetSerialNumber(buff, idx, rc->serialNumber, &rc->serialSz,
+ maxIdx) < 0) {
+ XFREE(rc, dcrl->heap, DYNAMIC_TYPE_REVOKED);
+ return ASN_PARSE_E;
+ }
/* add to list */
rc->next = dcrl->certs;
dcrl->certs = rc;
dcrl->totalCerts++;
- *idx += len;
-
/* get date */
- b = buff[*idx];
- *idx += 1;
-
- if (b != ASN_UTC_TIME && b != ASN_GENERALIZED_TIME) {
+ ret = GetDateInfo(buff, idx, NULL, &b, NULL, maxIdx);
+ if (ret < 0) {
WOLFSSL_MSG("Expecting Date");
- return ASN_PARSE_E;
+ return ret;
}
- if (GetLength(buff, idx, &len, maxIdx) < 0)
- return ASN_PARSE_E;
-
- /* skip for now */
- *idx += len;
-
- if (*idx != end) /* skip extensions */
- *idx = end;
+ /* skip extensions */
+ *idx = end;
return 0;
}
@@ -7650,29 +17108,244 @@ static int GetCRL_Signature(const byte* source, word32* idx, DecodedCRL* dcrl,
int maxIdx)
{
int length;
- byte b;
+ int ret;
WOLFSSL_ENTER("GetCRL_Signature");
- b = source[*idx];
- *idx += 1;
- if (b != ASN_BIT_STRING)
- return ASN_BITSTR_E;
+ ret = CheckBitString(source, idx, &length, maxIdx, 1, NULL);
+ if (ret != 0)
+ return ret;
+ dcrl->sigLength = length;
- if (GetLength(source, idx, &length, maxIdx) < 0)
+ dcrl->signature = (byte*)&source[*idx];
+ *idx += dcrl->sigLength;
+
+ return 0;
+}
+
+int VerifyCRL_Signature(SignatureCtx* sigCtx, const byte* toBeSigned,
+ word32 tbsSz, const byte* signature, word32 sigSz,
+ word32 signatureOID, Signer *ca, void* heap)
+{
+ /* try to confirm/verify signature */
+#ifndef IGNORE_KEY_EXTENSIONS
+ if ((ca->keyUsage & KEYUSE_CRL_SIGN) == 0) {
+ WOLFSSL_MSG("CA cannot sign CRLs");
+ return ASN_CRL_NO_SIGNER_E;
+ }
+#endif /* IGNORE_KEY_EXTENSIONS */
+
+ InitSignatureCtx(sigCtx, heap, INVALID_DEVID);
+ if (ConfirmSignature(sigCtx, toBeSigned, tbsSz, ca->publicKey,
+ ca->pubKeySize, ca->keyOID, signature, sigSz,
+ signatureOID, NULL) != 0) {
+ WOLFSSL_MSG("CRL Confirm signature failed");
+ return ASN_CRL_CONFIRM_E;
+ }
+
+ return 0;
+}
+
+
+static int ParseCRL_CertList(DecodedCRL* dcrl, const byte* buf,
+ word32* inOutIdx, int sz)
+{
+ word32 oid, dateIdx, idx, checkIdx;
+ int version, doNextDate = 1;
+ byte tag;
+
+ if (dcrl == NULL || inOutIdx == NULL || buf == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* may have version */
+ idx = *inOutIdx;
+
+ checkIdx = idx;
+ if (GetASNTag(buf, &checkIdx, &tag, sz) == 0 && tag == ASN_INTEGER) {
+ if (GetMyVersion(buf, &idx, &version, sz) < 0)
+ return ASN_PARSE_E;
+ }
+
+ if (GetAlgoId(buf, &idx, &oid, oidIgnoreType, sz) < 0)
return ASN_PARSE_E;
- dcrl->sigLength = length;
+ if (GetNameHash(buf, &idx, dcrl->issuerHash, sz) < 0)
+ return ASN_PARSE_E;
- b = source[*idx];
- *idx += 1;
- if (b != 0x00)
- return ASN_EXPECT_0_E;
+ if (GetBasicDate(buf, &idx, dcrl->lastDate, &dcrl->lastDateFormat, sz) < 0)
+ return ASN_PARSE_E;
- dcrl->sigLength--;
- dcrl->signature = (byte*)&source[*idx];
+ dateIdx = idx;
- *idx += dcrl->sigLength;
+ if (GetBasicDate(buf, &idx, dcrl->nextDate, &dcrl->nextDateFormat, sz) < 0)
+ {
+#ifndef WOLFSSL_NO_CRL_NEXT_DATE
+ (void)dateIdx;
+ return ASN_PARSE_E;
+#else
+ dcrl->nextDateFormat = ASN_OTHER_TYPE; /* skip flag */
+ doNextDate = 0;
+ idx = dateIdx;
+#endif
+ }
+
+ if (doNextDate) {
+#ifndef NO_ASN_TIME
+ if (!XVALIDATE_DATE(dcrl->nextDate, dcrl->nextDateFormat, AFTER)) {
+ WOLFSSL_MSG("CRL after date is no longer valid");
+ return ASN_AFTER_DATE_E;
+ }
+#endif
+ }
+
+ checkIdx = idx;
+ if (idx != dcrl->sigIndex &&
+ GetASNTag(buf, &checkIdx, &tag, sz) == 0 && tag != CRL_EXTENSIONS) {
+
+ int len;
+
+ if (GetSequence(buf, &idx, &len, sz) < 0)
+ return ASN_PARSE_E;
+ len += idx;
+
+ while (idx < (word32)len) {
+ if (GetRevoked(buf, &idx, dcrl, len) < 0)
+ return ASN_PARSE_E;
+ }
+ }
+
+ *inOutIdx = idx;
+
+ return 0;
+}
+
+
+#ifndef NO_SKID
+static int ParseCRL_AuthKeyIdExt(const byte* input, int sz, DecodedCRL* dcrl)
+{
+ word32 idx = 0;
+ int length = 0, ret = 0;
+ byte tag;
+
+ WOLFSSL_ENTER("ParseCRL_AuthKeyIdExt");
+
+ if (GetSequence(input, &idx, &length, sz) < 0) {
+ WOLFSSL_MSG("\tfail: should be a SEQUENCE\n");
+ return ASN_PARSE_E;
+ }
+
+ if (GetASNTag(input, &idx, &tag, sz) < 0) {
+ return ASN_PARSE_E;
+ }
+
+ if (tag != (ASN_CONTEXT_SPECIFIC | 0)) {
+ WOLFSSL_MSG("\tinfo: OPTIONAL item 0, not available\n");
+ return 0;
+ }
+
+ if (GetLength(input, &idx, &length, sz) <= 0) {
+ WOLFSSL_MSG("\tfail: extension data length");
+ return ASN_PARSE_E;
+ }
+
+ dcrl->extAuthKeyIdSet = 1;
+ if (length == KEYID_SIZE) {
+ XMEMCPY(dcrl->extAuthKeyId, input + idx, length);
+ }
+ else {
+ ret = CalcHashId(input + idx, length, dcrl->extAuthKeyId);
+ }
+
+ return ret;
+}
+#endif
+
+
+static int ParseCRL_Extensions(DecodedCRL* dcrl, const byte* buf,
+ word32* inOutIdx, word32 sz)
+{
+ int length;
+ word32 idx;
+ word32 ext_bound; /* boundary index for the sequence of extensions */
+ word32 oid;
+ byte tag;
+
+ WOLFSSL_ENTER("ParseCRL_Extensions");
+ (void)dcrl;
+
+ if (inOutIdx == NULL)
+ return BAD_FUNC_ARG;
+
+ idx = *inOutIdx;
+
+ /* CRL Extensions are optional */
+ if ((idx + 1) > sz)
+ return 0;
+
+ /* CRL Extensions are optional */
+ if (GetASNTag(buf, &idx, &tag, sz) < 0)
+ return 0;
+
+ /* CRL Extensions are optional */
+ if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
+ return 0;
+
+ if (GetLength(buf, &idx, &length, sz) < 0)
+ return ASN_PARSE_E;
+
+ if (GetSequence(buf, &idx, &length, sz) < 0)
+ return ASN_PARSE_E;
+
+ ext_bound = idx + length;
+
+ while (idx < (word32)ext_bound) {
+ word32 localIdx;
+ int ret;
+
+ if (GetSequence(buf, &idx, &length, sz) < 0) {
+ WOLFSSL_MSG("\tfail: should be a SEQUENCE");
+ return ASN_PARSE_E;
+ }
+
+ oid = 0;
+ if (GetObjectId(buf, &idx, &oid, oidCrlExtType, sz) < 0) {
+ WOLFSSL_MSG("\tfail: OBJECT ID");
+ return ASN_PARSE_E;
+ }
+
+ /* check for critical flag */
+ if ((idx + 1) > (word32)sz) {
+ WOLFSSL_MSG("\tfail: malformed buffer");
+ return BUFFER_E;
+ }
+
+ localIdx = idx;
+ if (GetASNTag(buf, &localIdx, &tag, sz) == 0 && tag == ASN_BOOLEAN) {
+ WOLFSSL_MSG("\tfound optional critical flag, moving past");
+ ret = GetBoolean(buf, &idx, sz);
+ if (ret < 0)
+ return ret;
+ }
+
+ ret = GetOctetString(buf, &idx, &length, sz);
+ if (ret < 0)
+ return ret;
+
+ if (oid == AUTH_KEY_OID) {
+ #ifndef NO_SKID
+ ret = ParseCRL_AuthKeyIdExt(buf + idx, length, dcrl);
+ if (ret < 0) {
+ WOLFSSL_MSG("\tcouldn't parse AuthKeyId extension");
+ return ret;
+ }
+ #endif
+ }
+
+ idx += length;
+ }
+
+ *inOutIdx = idx;
return 0;
}
@@ -7681,15 +17354,16 @@ static int GetCRL_Signature(const byte* source, word32* idx, DecodedCRL* dcrl,
/* prase crl buffer into decoded state, 0 on success */
int ParseCRL(DecodedCRL* dcrl, const byte* buff, word32 sz, void* cm)
{
- int version, len;
- word32 oid, idx = 0;
- Signer* ca = NULL;
+ int len;
+ word32 idx = 0;
+ Signer* ca = NULL;
+ SignatureCtx sigCtx;
WOLFSSL_MSG("ParseCRL");
/* raw crl hash */
/* hash here if needed for optimized comparisons
- * Sha sha;
+ * wc_Sha sha;
* wc_InitSha(&sha);
* wc_ShaUpdate(&sha, buff, sz);
* wc_ShaFinal(&sha, dcrl->crlHash); */
@@ -7698,98 +17372,154 @@ int ParseCRL(DecodedCRL* dcrl, const byte* buff, word32 sz, void* cm)
return ASN_PARSE_E;
dcrl->certBegin = idx;
+ /* Normalize sz for the length inside the outer sequence. */
+ sz = len + idx;
if (GetSequence(buff, &idx, &len, sz) < 0)
return ASN_PARSE_E;
dcrl->sigIndex = len + idx;
- /* may have version */
- if (buff[idx] == ASN_INTEGER) {
- if (GetMyVersion(buff, &idx, &version) < 0)
- return ASN_PARSE_E;
- }
-
- if (GetAlgoId(buff, &idx, &oid, sz) < 0)
+ if (ParseCRL_CertList(dcrl, buff, &idx, idx + len) < 0)
return ASN_PARSE_E;
- if (GetNameHash(buff, &idx, dcrl->issuerHash, sz) < 0)
+ if (ParseCRL_Extensions(dcrl, buff, &idx, idx + len) < 0)
return ASN_PARSE_E;
- if (GetBasicDate(buff, &idx, dcrl->lastDate, &dcrl->lastDateFormat, sz) < 0)
+ idx = dcrl->sigIndex;
+
+ if (GetAlgoId(buff, &idx, &dcrl->signatureOID, oidSigType, sz) < 0)
return ASN_PARSE_E;
- if (GetBasicDate(buff, &idx, dcrl->nextDate, &dcrl->nextDateFormat, sz) < 0)
+ if (GetCRL_Signature(buff, &idx, dcrl, sz) < 0)
return ASN_PARSE_E;
- if (!XVALIDATE_DATE(dcrl->nextDate, dcrl->nextDateFormat, AFTER)) {
- WOLFSSL_MSG("CRL after date is no longer valid");
- return ASN_AFTER_DATE_E;
+ /* openssl doesn't add skid by default for CRLs cause firefox chokes
+ if experiencing issues uncomment NO_SKID define in CRL section of
+ wolfssl/wolfcrypt/settings.h */
+#ifndef NO_SKID
+ if (dcrl->extAuthKeyIdSet) {
+ ca = GetCA(cm, dcrl->extAuthKeyId); /* more unique than issuerHash */
+ }
+ if (ca != NULL && XMEMCMP(dcrl->issuerHash, ca->subjectNameHash,
+ KEYID_SIZE) != 0) {
+ ca = NULL;
+ }
+ if (ca == NULL) {
+ ca = GetCAByName(cm, dcrl->issuerHash); /* last resort */
+ /* If AKID is available then this CA doesn't have the public
+ * key required */
+ if (ca && dcrl->extAuthKeyIdSet) {
+ WOLFSSL_MSG("CA SKID doesn't match AKID");
+ ca = NULL;
+ }
}
+#else
+ ca = GetCA(cm, dcrl->issuerHash);
+#endif /* !NO_SKID */
+ WOLFSSL_MSG("About to verify CRL signature");
- if (idx != dcrl->sigIndex && buff[idx] != CRL_EXTENSIONS) {
- if (GetSequence(buff, &idx, &len, sz) < 0)
- return ASN_PARSE_E;
+ if (ca == NULL) {
+ WOLFSSL_MSG("Did NOT find CRL issuer CA");
+ return ASN_CRL_NO_SIGNER_E;
+ }
- len += idx;
+ WOLFSSL_MSG("Found CRL issuer CA");
+ return VerifyCRL_Signature(&sigCtx, buff + dcrl->certBegin,
+ dcrl->sigIndex - dcrl->certBegin, dcrl->signature, dcrl->sigLength,
+ dcrl->signatureOID, ca, dcrl->heap);
+}
- while (idx < (word32)len) {
- if (GetRevoked(buff, &idx, dcrl, sz) < 0)
- return ASN_PARSE_E;
+#endif /* HAVE_CRL */
+
+
+
+#ifdef WOLFSSL_CERT_PIV
+
+int wc_ParseCertPIV(wc_CertPIV* piv, const byte* buf, word32 totalSz)
+{
+ int length = 0;
+ word32 idx = 0;
+
+ WOLFSSL_ENTER("wc_ParseCertPIV");
+
+ if (piv == NULL || buf == NULL || totalSz == 0)
+ return BAD_FUNC_ARG;
+
+ XMEMSET(piv, 0, sizeof(wc_CertPIV));
+
+ /* Detect Identiv PIV (with 0x0A, 0x0B and 0x0C sections) */
+ /* Certificate (0A 82 05FA) */
+ if (GetASNHeader(buf, ASN_PIV_CERT, &idx, &length, totalSz) >= 0) {
+ /* Identiv Type PIV card */
+ piv->isIdentiv = 1;
+
+ piv->cert = &buf[idx];
+ piv->certSz = length;
+ idx += length;
+
+ /* Nonce (0B 14) */
+ if (GetASNHeader(buf, ASN_PIV_NONCE, &idx, &length, totalSz) >= 0) {
+ piv->nonce = &buf[idx];
+ piv->nonceSz = length;
+ idx += length;
}
- }
- if (idx != dcrl->sigIndex)
- idx = dcrl->sigIndex; /* skip extensions */
+ /* Signed Nonce (0C 82 0100) */
+ if (GetASNHeader(buf, ASN_PIV_SIGNED_NONCE, &idx, &length, totalSz) >= 0) {
+ piv->signedNonce = &buf[idx];
+ piv->signedNonceSz = length;
+ }
- if (GetAlgoId(buff, &idx, &dcrl->signatureOID, sz) < 0)
- return ASN_PARSE_E;
+ idx = 0;
+ buf = piv->cert;
+ totalSz = piv->certSz;
+ }
- if (GetCRL_Signature(buff, &idx, dcrl, sz) < 0)
+ /* Certificate Buffer Total Size (53 82 05F6) */
+ if (GetASNHeader(buf, ASN_APPLICATION | ASN_PRINTABLE_STRING, &idx,
+ &length, totalSz) < 0) {
return ASN_PARSE_E;
+ }
+ /* PIV Certificate (70 82 05ED) */
+ if (GetASNHeader(buf, ASN_PIV_TAG_CERT, &idx, &length,
+ totalSz) < 0) {
+ return ASN_PARSE_E;
+ }
- /* openssl doesn't add skid by default for CRLs cause firefox chokes
- we're not assuming it's available yet */
- #if !defined(NO_SKID) && defined(CRL_SKID_READY)
- if (dcrl->extAuthKeyIdSet)
- ca = GetCA(cm, dcrl->extAuthKeyId);
- if (ca == NULL)
- ca = GetCAByName(cm, dcrl->issuerHash);
- #else /* NO_SKID */
- ca = GetCA(cm, dcrl->issuerHash);
- #endif /* NO_SKID */
- WOLFSSL_MSG("About to verify CRL signature");
+ /* Capture certificate buffer pointer and length */
+ piv->cert = &buf[idx];
+ piv->certSz = length;
+ idx += length;
- if (ca) {
- WOLFSSL_MSG("Found CRL issuer CA");
- /* try to confirm/verify signature */
- #ifndef IGNORE_KEY_EXTENSIONS
- if ((ca->keyUsage & KEYUSE_CRL_SIGN) == 0) {
- WOLFSSL_MSG("CA cannot sign CRLs");
- return ASN_CRL_NO_SIGNER_E;
- }
- #endif /* IGNORE_KEY_EXTENSIONS */
- if (!ConfirmSignature(buff + dcrl->certBegin,
- dcrl->sigIndex - dcrl->certBegin,
- ca->publicKey, ca->pubKeySize, ca->keyOID,
- dcrl->signature, dcrl->sigLength, dcrl->signatureOID, NULL)) {
- WOLFSSL_MSG("CRL Confirm signature failed");
- return ASN_CRL_CONFIRM_E;
+ /* PIV Certificate Info (71 01 00) */
+ if (GetASNHeader(buf, ASN_PIV_TAG_CERT_INFO, &idx, &length,
+ totalSz) >= 0) {
+ if (length >= 1) {
+ piv->compression = (buf[idx] & ASN_PIV_CERT_INFO_COMPRESSED);
+ piv->isX509 = (buf[idx] & ASN_PIV_CERT_INFO_ISX509);
}
+ idx += length;
}
- else {
- WOLFSSL_MSG("Did NOT find CRL issuer CA");
- return ASN_CRL_NO_SIGNER_E;
+
+ /* PIV Error Detection (FE 00) */
+ if (GetASNHeader(buf, ASN_PIV_TAG_ERR_DET, &idx, &length,
+ totalSz) >= 0) {
+ piv->certErrDet = &buf[idx];
+ piv->certErrDetSz = length;
+ idx += length;
}
return 0;
}
-#endif /* HAVE_CRL */
-#endif
+#endif /* WOLFSSL_CERT_PIV */
-#ifdef WOLFSSL_SEP
+#undef ERROR_OUT
+#endif /* !NO_ASN */
-#endif /* WOLFSSL_SEP */
+#ifdef WOLFSSL_SEP
+
+#endif /* WOLFSSL_SEP */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/async.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/async.c
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/async.c
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/blake2b.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/blake2b.c
index 6ae5afd23..1541947dd 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/blake2b.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/blake2b.c
@@ -12,9 +12,9 @@
*/
/* blake2b.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -28,10 +28,11 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -69,14 +70,14 @@ static const byte blake2b_sigma[12][16] =
};
-static INLINE int blake2b_set_lastnode( blake2b_state *S )
+static WC_INLINE int blake2b_set_lastnode( blake2b_state *S )
{
S->f[1] = ~0ULL;
return 0;
}
/* Some helper functions, not necessarily useful */
-static INLINE int blake2b_set_lastblock( blake2b_state *S )
+static WC_INLINE int blake2b_set_lastblock( blake2b_state *S )
{
if( S->last_node ) blake2b_set_lastnode( S );
@@ -84,7 +85,7 @@ static INLINE int blake2b_set_lastblock( blake2b_state *S )
return 0;
}
-static INLINE int blake2b_increment_counter( blake2b_state *S, const word64
+static WC_INLINE int blake2b_increment_counter( blake2b_state *S, const word64
inc )
{
S->t[0] += inc;
@@ -92,7 +93,7 @@ static INLINE int blake2b_increment_counter( blake2b_state *S, const word64
return 0;
}
-static INLINE int blake2b_init0( blake2b_state *S )
+static WC_INLINE int blake2b_init0( blake2b_state *S )
{
int i;
XMEMSET( S, 0, sizeof( blake2b_state ) );
@@ -106,8 +107,9 @@ static INLINE int blake2b_init0( blake2b_state *S )
int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
{
word32 i;
+ byte *p ;
blake2b_init0( S );
- byte *p = ( byte * )( P );
+ p = ( byte * )( P );
/* IV XOR ParamBlock */
for( i = 0; i < 8; ++i )
@@ -124,6 +126,7 @@ int blake2b_init( blake2b_state *S, const byte outlen )
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
+#ifdef WOLFSSL_BLAKE2B_INIT_EACH_FIELD
P->digest_length = outlen;
P->key_length = 0;
P->fanout = 1;
@@ -135,6 +138,12 @@ int blake2b_init( blake2b_state *S, const byte outlen )
XMEMSET( P->reserved, 0, sizeof( P->reserved ) );
XMEMSET( P->salt, 0, sizeof( P->salt ) );
XMEMSET( P->personal, 0, sizeof( P->personal ) );
+#else
+ XMEMSET( P, 0, sizeof( *P ) );
+ P->digest_length = outlen;
+ P->fanout = 1;
+ P->depth = 1;
+#endif
return blake2b_init_param( S, P );
}
@@ -148,6 +157,7 @@ int blake2b_init_key( blake2b_state *S, const byte outlen, const void *key,
if ( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1;
+#ifdef WOLFSSL_BLAKE2B_INIT_EACH_FIELD
P->digest_length = outlen;
P->key_length = keylen;
P->fanout = 1;
@@ -159,6 +169,13 @@ int blake2b_init_key( blake2b_state *S, const byte outlen, const void *key,
XMEMSET( P->reserved, 0, sizeof( P->reserved ) );
XMEMSET( P->salt, 0, sizeof( P->salt ) );
XMEMSET( P->personal, 0, sizeof( P->personal ) );
+#else
+ XMEMSET( P, 0, sizeof( *P ) );
+ P->digest_length = outlen;
+ P->key_length = keylen;
+ P->fanout = 1;
+ P->depth = 1;
+#endif
if( blake2b_init_param( S, P ) < 0 ) return -1;
@@ -300,8 +317,7 @@ int blake2b_update( blake2b_state *S, const byte *in, word64 inlen )
{
XMEMCPY( S->buf + left, in, (wolfssl_word)inlen );
S->buflen += inlen; /* Be lazy, do not compress */
- in += inlen;
- inlen -= inlen;
+ inlen = 0;
}
}
@@ -387,7 +403,7 @@ int main( int argc, char **argv )
return -1;
}
- if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) )
+ if( 0 != XMEMCMP( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) )
{
puts( "error" );
return -1;
@@ -402,9 +418,12 @@ int main( int argc, char **argv )
/* wolfCrypt API */
-/* Init Blake2b digest, track size incase final doesn't want to "remember" */
+/* Init Blake2b digest, track size in case final doesn't want to "remember" */
int wc_InitBlake2b(Blake2b* b2b, word32 digestSz)
{
+ if (b2b == NULL){
+ return -1;
+ }
b2b->digestSz = digestSz;
return blake2b_init(b2b->S, (byte)digestSz);
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/blake2s.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/blake2s.c
new file mode 100644
index 000000000..651a1d18d
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/blake2s.c
@@ -0,0 +1,446 @@
+/*
+ BLAKE2 reference source code package - reference C implementations
+
+ Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
+
+ To the extent possible under law, the author(s) have dedicated all copyright
+ and related and neighboring rights to this software to the public domain
+ worldwide. This software is distributed without any warranty.
+
+ You should have received a copy of the CC0 Public Domain Dedication along with
+ this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
+*/
+/* blake2s.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#ifdef HAVE_BLAKE2S
+
+#include <wolfssl/wolfcrypt/blake2.h>
+#include <wolfssl/wolfcrypt/blake2-impl.h>
+
+
+static const word32 blake2s_IV[8] =
+{
+ 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
+ 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
+};
+
+static const byte blake2s_sigma[10][16] =
+{
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
+ { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
+ { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
+ { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
+ { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
+ { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
+ { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
+ { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
+ { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 }
+};
+
+
+static WC_INLINE int blake2s_set_lastnode( blake2s_state *S )
+{
+ S->f[1] = ~0;
+ return 0;
+}
+
+/* Some helper functions, not necessarily useful */
+static WC_INLINE int blake2s_set_lastblock( blake2s_state *S )
+{
+ if( S->last_node ) blake2s_set_lastnode( S );
+
+ S->f[0] = ~0;
+ return 0;
+}
+
+static WC_INLINE int blake2s_increment_counter( blake2s_state *S, const word32
+ inc )
+{
+ S->t[0] += inc;
+ S->t[1] += ( S->t[0] < inc );
+ return 0;
+}
+
+static WC_INLINE int blake2s_init0( blake2s_state *S )
+{
+ int i;
+ XMEMSET( S, 0, sizeof( blake2s_state ) );
+
+ for( i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i];
+
+ return 0;
+}
+
+/* init xors IV with input parameter block */
+int blake2s_init_param( blake2s_state *S, const blake2s_param *P )
+{
+ word32 i;
+ byte *p ;
+ blake2s_init0( S );
+ p = ( byte * )( P );
+
+ /* IV XOR ParamBlock */
+ for( i = 0; i < 8; ++i )
+ S->h[i] ^= load32( p + sizeof( S->h[i] ) * i );
+
+ return 0;
+}
+
+
+
+int blake2s_init( blake2s_state *S, const byte outlen )
+{
+ blake2s_param P[1];
+
+ if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
+
+#ifdef WOLFSSL_BLAKE2S_INIT_EACH_FIELD
+ P->digest_length = outlen;
+ P->key_length = 0;
+ P->fanout = 1;
+ P->depth = 1;
+ store32( &P->leaf_length, 0 );
+ store32( &P->node_offset, 0 );
+ P->node_depth = 0;
+ P->inner_length = 0;
+ XMEMSET( P->reserved, 0, sizeof( P->reserved ) );
+ XMEMSET( P->salt, 0, sizeof( P->salt ) );
+ XMEMSET( P->personal, 0, sizeof( P->personal ) );
+#else
+ XMEMSET( P, 0, sizeof( *P ) );
+ P->digest_length = outlen;
+ P->fanout = 1;
+ P->depth = 1;
+#endif
+ return blake2s_init_param( S, P );
+}
+
+
+int blake2s_init_key( blake2s_state *S, const byte outlen, const void *key,
+ const byte keylen )
+{
+ blake2s_param P[1];
+
+ if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
+
+ if ( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
+
+#ifdef WOLFSSL_BLAKE2S_INIT_EACH_FIELD
+ P->digest_length = outlen;
+ P->key_length = keylen;
+ P->fanout = 1;
+ P->depth = 1;
+ store32( &P->leaf_length, 0 );
+ store64( &P->node_offset, 0 );
+ P->node_depth = 0;
+ P->inner_length = 0;
+ XMEMSET( P->reserved, 0, sizeof( P->reserved ) );
+ XMEMSET( P->salt, 0, sizeof( P->salt ) );
+ XMEMSET( P->personal, 0, sizeof( P->personal ) );
+#else
+ XMEMSET( P, 0, sizeof( *P ) );
+ P->digest_length = outlen;
+ P->key_length = keylen;
+ P->fanout = 1;
+ P->depth = 1;
+#endif
+
+ if( blake2s_init_param( S, P ) < 0 ) return -1;
+
+ {
+#ifdef WOLFSSL_SMALL_STACK
+ byte* block;
+
+ block = (byte*)XMALLOC(BLAKE2S_BLOCKBYTES, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+ if ( block == NULL ) return -1;
+#else
+ byte block[BLAKE2S_BLOCKBYTES];
+#endif
+
+ XMEMSET( block, 0, BLAKE2S_BLOCKBYTES );
+ XMEMCPY( block, key, keylen );
+ blake2s_update( S, block, BLAKE2S_BLOCKBYTES );
+ secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from */
+ /* memory */
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(block, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ }
+ return 0;
+}
+
+static int blake2s_compress( blake2s_state *S,
+ const byte block[BLAKE2S_BLOCKBYTES] )
+{
+ int i;
+
+#ifdef WOLFSSL_SMALL_STACK
+ word32* m;
+ word32* v;
+
+ m = (word32*)XMALLOC(sizeof(word32) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+ if ( m == NULL ) return -1;
+
+ v = (word32*)XMALLOC(sizeof(word32) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+ if ( v == NULL )
+ {
+ XFREE(m, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ return -1;
+ }
+#else
+ word32 m[16];
+ word32 v[16];
+#endif
+
+ for( i = 0; i < 16; ++i )
+ m[i] = load32( block + i * sizeof( m[i] ) );
+
+ for( i = 0; i < 8; ++i )
+ v[i] = S->h[i];
+
+ v[ 8] = blake2s_IV[0];
+ v[ 9] = blake2s_IV[1];
+ v[10] = blake2s_IV[2];
+ v[11] = blake2s_IV[3];
+ v[12] = S->t[0] ^ blake2s_IV[4];
+ v[13] = S->t[1] ^ blake2s_IV[5];
+ v[14] = S->f[0] ^ blake2s_IV[6];
+ v[15] = S->f[1] ^ blake2s_IV[7];
+#define G(r,i,a,b,c,d) \
+ do { \
+ a = a + b + m[blake2s_sigma[r][2*i+0]]; \
+ d = rotr32(d ^ a, 16); \
+ c = c + d; \
+ b = rotr32(b ^ c, 12); \
+ a = a + b + m[blake2s_sigma[r][2*i+1]]; \
+ d = rotr32(d ^ a, 8); \
+ c = c + d; \
+ b = rotr32(b ^ c, 7); \
+ } while(0)
+#define ROUND(r) \
+ do { \
+ G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
+ G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
+ G(r,2,v[ 2],v[ 6],v[10],v[14]); \
+ G(r,3,v[ 3],v[ 7],v[11],v[15]); \
+ G(r,4,v[ 0],v[ 5],v[10],v[15]); \
+ G(r,5,v[ 1],v[ 6],v[11],v[12]); \
+ G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
+ G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
+ } while(0)
+ ROUND( 0 );
+ ROUND( 1 );
+ ROUND( 2 );
+ ROUND( 3 );
+ ROUND( 4 );
+ ROUND( 5 );
+ ROUND( 6 );
+ ROUND( 7 );
+ ROUND( 8 );
+ ROUND( 9 );
+
+ for( i = 0; i < 8; ++i )
+ S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
+
+#undef G
+#undef ROUND
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(m, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(v, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return 0;
+}
+
+/* inlen now in bytes */
+int blake2s_update( blake2s_state *S, const byte *in, word32 inlen )
+{
+ while( inlen > 0 )
+ {
+ word32 left = S->buflen;
+ word32 fill = 2 * BLAKE2S_BLOCKBYTES - left;
+
+ if( inlen > fill )
+ {
+ XMEMCPY( S->buf + left, in, (wolfssl_word)fill ); /* Fill buffer */
+ S->buflen += fill;
+ blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
+
+ if ( blake2s_compress( S, S->buf ) < 0 ) return -1; /* Compress */
+
+ XMEMCPY( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES );
+ /* Shift buffer left */
+ S->buflen -= BLAKE2S_BLOCKBYTES;
+ in += fill;
+ inlen -= fill;
+ }
+ else /* inlen <= fill */
+ {
+ XMEMCPY( S->buf + left, in, (wolfssl_word)inlen );
+ S->buflen += inlen; /* Be lazy, do not compress */
+ inlen = 0;
+ }
+ }
+
+ return 0;
+}
+
+/* Is this correct? */
+int blake2s_final( blake2s_state *S, byte *out, byte outlen )
+{
+ int i;
+ byte buffer[BLAKE2S_BLOCKBYTES];
+
+ if( S->buflen > BLAKE2S_BLOCKBYTES )
+ {
+ blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
+
+ if ( blake2s_compress( S, S->buf ) < 0 ) return -1;
+
+ S->buflen -= BLAKE2S_BLOCKBYTES;
+ XMEMCPY( S->buf, S->buf + BLAKE2S_BLOCKBYTES, (wolfssl_word)S->buflen );
+ }
+
+ blake2s_increment_counter( S, S->buflen );
+ blake2s_set_lastblock( S );
+ XMEMSET( S->buf + S->buflen, 0, (wolfssl_word)(2 * BLAKE2S_BLOCKBYTES - S->buflen) );
+ /* Padding */
+ if ( blake2s_compress( S, S->buf ) < 0 ) return -1;
+
+ for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
+ store64( buffer + sizeof( S->h[i] ) * i, S->h[i] );
+
+ XMEMCPY( out, buffer, outlen );
+ return 0;
+}
+
+/* inlen, at least, should be word32. Others can be size_t. */
+int blake2s( byte *out, const void *in, const void *key, const byte outlen,
+ const word32 inlen, byte keylen )
+{
+ blake2s_state S[1];
+
+ /* Verify parameters */
+ if ( NULL == in ) return -1;
+
+ if ( NULL == out ) return -1;
+
+ if( NULL == key ) keylen = 0;
+
+ if( keylen > 0 )
+ {
+ if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
+ }
+ else
+ {
+ if( blake2s_init( S, outlen ) < 0 ) return -1;
+ }
+
+ if ( blake2s_update( S, ( byte * )in, inlen ) < 0) return -1;
+
+ return blake2s_final( S, out, outlen );
+}
+
+#if defined(BLAKE2S_SELFTEST)
+#include <string.h>
+#include "blake2-kat.h"
+int main( int argc, char **argv )
+{
+ byte key[BLAKE2S_KEYBYTES];
+ byte buf[KAT_LENGTH];
+
+ for( word32 i = 0; i < BLAKE2S_KEYBYTES; ++i )
+ key[i] = ( byte )i;
+
+ for( word32 i = 0; i < KAT_LENGTH; ++i )
+ buf[i] = ( byte )i;
+
+ for( word32 i = 0; i < KAT_LENGTH; ++i )
+ {
+ byte hash[BLAKE2S_OUTBYTES];
+ if ( blake2s( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ) < 0 )
+ {
+ puts( "error" );
+ return -1;
+ }
+
+ if( 0 != XMEMCMP( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) )
+ {
+ puts( "error" );
+ return -1;
+ }
+ }
+
+ puts( "ok" );
+ return 0;
+}
+#endif
+
+
+/* wolfCrypt API */
+
+/* Init Blake2s digest, track size in case final doesn't want to "remember" */
+int wc_InitBlake2s(Blake2s* b2s, word32 digestSz)
+{
+ if (b2s == NULL){
+ return -1;
+ }
+ b2s->digestSz = digestSz;
+
+ return blake2s_init(b2s->S, (byte)digestSz);
+}
+
+
+/* Blake2s Update */
+int wc_Blake2sUpdate(Blake2s* b2s, const byte* data, word32 sz)
+{
+ return blake2s_update(b2s->S, data, sz);
+}
+
+
+/* Blake2s Final, if pass in zero size we use init digestSz */
+int wc_Blake2sFinal(Blake2s* b2s, byte* final, word32 requestSz)
+{
+ word32 sz = requestSz ? requestSz : b2s->digestSz;
+
+ return blake2s_final(b2s->S, final, (byte)sz);
+}
+
+
+/* end CTaoCrypt API */
+
+#endif /* HAVE_BLAKE2S */
+
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/camellia.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/camellia.c
index 071019c6c..89ee6617a 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/camellia.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/camellia.c
@@ -27,9 +27,9 @@
/* camellia.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -43,11 +43,12 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
/*
- * Algorithm Specification
+ * Algorithm Specification
* http://info.isl.ntt.co.jp/crypt/eng/camellia/specifications.html
*/
@@ -66,6 +67,7 @@
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
@@ -510,7 +512,7 @@ static int camellia_setup128(const unsigned char *key, u32 *subkey)
#endif
/**
- * k == kll || klr || krl || krr (|| is concatination)
+ * k == kll || klr || krl || krr (|| is concatenation)
*/
kll = GETU32(key );
klr = GETU32(key + 4);
@@ -744,7 +746,7 @@ static int camellia_setup256(const unsigned char *key, u32 *subkey)
/**
* key = (kll || klr || krl || krr || krll || krlr || krrl || krrr)
- * (|| is concatination)
+ * (|| is concatenation)
*/
kll = GETU32(key );
@@ -1015,7 +1017,7 @@ static int camellia_setup256(const unsigned char *key, u32 *subkey)
CamelliaSubkeyR(30) = CamelliaSubkeyL(30) ^ dw, CamelliaSubkeyL(30) = dw;
dw = CamelliaSubkeyL(31) ^ CamelliaSubkeyR(31), dw = CAMELLIA_RL8(dw);
CamelliaSubkeyR(31) = CamelliaSubkeyL(31) ^ dw,CamelliaSubkeyL(31) = dw;
-
+
#ifdef WOLFSSL_SMALL_STACK
XFREE(subL, NULL, DYNAMIC_TYPE_TMP_BUFFER);
XFREE(subR, NULL, DYNAMIC_TYPE_TMP_BUFFER);
@@ -1029,13 +1031,13 @@ static int camellia_setup192(const unsigned char *key, u32 *subkey)
unsigned char kk[32];
u32 krll, krlr, krrl,krrr;
- memcpy(kk, key, 24);
- memcpy((unsigned char *)&krll, key+16,4);
- memcpy((unsigned char *)&krlr, key+20,4);
+ XMEMCPY(kk, key, 24);
+ XMEMCPY((unsigned char *)&krll, key+16,4);
+ XMEMCPY((unsigned char *)&krlr, key+20,4);
krrl = ~krll;
krrr = ~krlr;
- memcpy(kk+24, (unsigned char *)&krrl, 4);
- memcpy(kk+28, (unsigned char *)&krrr, 4);
+ XMEMCPY(kk+24, (unsigned char *)&krrl, 4);
+ XMEMCPY(kk+28, (unsigned char *)&krrr, 4);
return camellia_setup256(kk, subkey);
}
@@ -1132,14 +1134,14 @@ static void camellia_encrypt128(const u32 *subkey, u32 *io)
io[1] = io[3];
io[2] = t0;
io[3] = t1;
-
+
return;
}
static void camellia_decrypt128(const u32 *subkey, u32 *io)
{
- u32 il,ir,t0,t1; /* temporary valiables */
-
+ u32 il,ir,t0,t1; /* temporary variables */
+
/* pre whitening but absorb kw2*/
io[0] ^= CamelliaSubkeyL(24);
io[1] ^= CamelliaSubkeyR(24);
@@ -1231,7 +1233,7 @@ static void camellia_decrypt128(const u32 *subkey, u32 *io)
*/
static void camellia_encrypt256(const u32 *subkey, u32 *io)
{
- u32 il,ir,t0,t1; /* temporary valiables */
+ u32 il,ir,t0,t1; /* temporary variables */
/* pre whitening but absorb kw2*/
io[0] ^= CamelliaSubkeyL(0);
@@ -1345,12 +1347,12 @@ static void camellia_encrypt256(const u32 *subkey, u32 *io)
static void camellia_decrypt256(const u32 *subkey, u32 *io)
{
- u32 il,ir,t0,t1; /* temporary valiables */
+ u32 il,ir,t0,t1; /* temporary variables */
/* pre whitening but absorb kw2*/
io[0] ^= CamelliaSubkeyL(32);
io[1] ^= CamelliaSubkeyR(32);
-
+
/* main iteration */
CAMELLIA_ROUNDSM(io[0],io[1],
CamelliaSubkeyL(31),CamelliaSubkeyR(31),
@@ -1462,9 +1464,9 @@ static void camellia_decrypt256(const u32 *subkey, u32 *io)
* API for compatibility
*/
-static void Camellia_EncryptBlock(const int keyBitLength,
- const unsigned char *plaintext,
- const KEY_TABLE_TYPE keyTable,
+static void Camellia_EncryptBlock(const int keyBitLength,
+ const unsigned char *plaintext,
+ const KEY_TABLE_TYPE keyTable,
unsigned char *ciphertext)
{
u32 tmp[4];
@@ -1493,9 +1495,9 @@ static void Camellia_EncryptBlock(const int keyBitLength,
PUTU32(ciphertext + 12, tmp[3]);
}
-static void Camellia_DecryptBlock(const int keyBitLength,
- const unsigned char *ciphertext,
- const KEY_TABLE_TYPE keyTable,
+static void Camellia_DecryptBlock(const int keyBitLength,
+ const unsigned char *ciphertext,
+ const KEY_TABLE_TYPE keyTable,
unsigned char *plaintext)
{
u32 tmp[4];
@@ -1572,21 +1574,35 @@ int wc_CamelliaSetIV(Camellia* cam, const byte* iv)
}
-void wc_CamelliaEncryptDirect(Camellia* cam, byte* out, const byte* in)
+int wc_CamelliaEncryptDirect(Camellia* cam, byte* out, const byte* in)
{
+ if (cam == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
Camellia_EncryptBlock(cam->keySz, in, cam->key, out);
+
+ return 0;
}
-void wc_CamelliaDecryptDirect(Camellia* cam, byte* out, const byte* in)
+int wc_CamelliaDecryptDirect(Camellia* cam, byte* out, const byte* in)
{
+ if (cam == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
Camellia_DecryptBlock(cam->keySz, in, cam->key, out);
+
+ return 0;
}
-void wc_CamelliaCbcEncrypt(Camellia* cam, byte* out, const byte* in, word32 sz)
+int wc_CamelliaCbcEncrypt(Camellia* cam, byte* out, const byte* in, word32 sz)
{
- word32 blocks = sz / CAMELLIA_BLOCK_SIZE;
+ word32 blocks;
+ if (cam == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ blocks = sz / CAMELLIA_BLOCK_SIZE;
while (blocks--) {
xorbuf((byte*)cam->reg, in, CAMELLIA_BLOCK_SIZE);
@@ -1597,12 +1613,18 @@ void wc_CamelliaCbcEncrypt(Camellia* cam, byte* out, const byte* in, word32 sz)
out += CAMELLIA_BLOCK_SIZE;
in += CAMELLIA_BLOCK_SIZE;
}
+
+ return 0;
}
-void wc_CamelliaCbcDecrypt(Camellia* cam, byte* out, const byte* in, word32 sz)
+int wc_CamelliaCbcDecrypt(Camellia* cam, byte* out, const byte* in, word32 sz)
{
- word32 blocks = sz / CAMELLIA_BLOCK_SIZE;
+ word32 blocks;
+ if (cam == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ blocks = sz / CAMELLIA_BLOCK_SIZE;
while (blocks--) {
XMEMCPY(cam->tmp, in, CAMELLIA_BLOCK_SIZE);
@@ -1613,6 +1635,8 @@ void wc_CamelliaCbcDecrypt(Camellia* cam, byte* out, const byte* in, word32 sz)
out += CAMELLIA_BLOCK_SIZE;
in += CAMELLIA_BLOCK_SIZE;
}
+
+ return 0;
}
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/chacha.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/chacha.c
index 4e95bdbd0..38a1ede7d 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/chacha.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/chacha.c
@@ -1,8 +1,8 @@
/* chacha.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,8 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
- *
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/*
* based from
* chacha-ref.c version 20080118
* D. J. Bernstein
@@ -25,20 +27,26 @@
*/
+#ifdef WOLFSSL_ARMASM
+ /* implementation is located in wolfcrypt/src/port/arm/armv8-chacha.c */
+
+#else
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <wolfssl/wolfcrypt/settings.h>
-#ifdef HAVE_CHACHA
+#if defined(HAVE_CHACHA) && !defined(WOLFSSL_ARMASM)
#include <wolfssl/wolfcrypt/chacha.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#include <wolfssl/wolfcrypt/logging.h>
+#include <wolfssl/wolfcrypt/cpuid.h>
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
@@ -46,6 +54,31 @@
#include <stdio.h>
#endif
+#ifdef USE_INTEL_CHACHA_SPEEDUP
+ #include <emmintrin.h>
+ #include <immintrin.h>
+
+ #if defined(__GNUC__) && ((__GNUC__ < 4) || \
+ (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
+ #undef NO_AVX2_SUPPORT
+ #define NO_AVX2_SUPPORT
+ #endif
+ #if defined(__clang__) && ((__clang_major__ < 3) || \
+ (__clang_major__ == 3 && __clang_minor__ <= 5))
+ #undef NO_AVX2_SUPPORT
+ #define NO_AVX2_SUPPORT
+ #elif defined(__clang__) && defined(NO_AVX2_SUPPORT)
+ #undef NO_AVX2_SUPPORT
+ #endif
+
+ #ifndef NO_AVX2_SUPPORT
+ #define HAVE_INTEL_AVX2
+ #endif
+
+ static int cpuidFlagsSet = 0;
+ static int cpuidFlags = 0;
+#endif
+
#ifdef BIG_ENDIAN_ORDER
#define LITTLE32(x) ByteReverseWord32(x)
#else
@@ -77,12 +110,12 @@
*/
int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter)
{
- word32 temp[3]; /* used for alignment of memory */
+ word32 temp[CHACHA_IV_WORDS];/* used for alignment of memory */
#ifdef CHACHA_AEAD_TEST
word32 i;
printf("NONCE : ");
- for (i = 0; i < 12; i++) {
+ for (i = 0; i < CHACHA_IV_BYTES; i++) {
printf("%02x", inIv[i]);
}
printf("\n\n");
@@ -91,12 +124,13 @@ int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter)
if (ctx == NULL)
return BAD_FUNC_ARG;
- XMEMCPY(temp, inIv, 12);
+ XMEMCPY(temp, inIv, CHACHA_IV_BYTES);
- ctx->X[12] = LITTLE32(counter); /* block counter */
- ctx->X[13] = LITTLE32(temp[0]); /* fixed variable from nonce */
- ctx->X[14] = LITTLE32(temp[1]); /* counter from nonce */
- ctx->X[15] = LITTLE32(temp[2]); /* counter from nonce */
+ ctx->left = 0; /* resets state */
+ ctx->X[CHACHA_IV_BYTES+0] = counter; /* block counter */
+ ctx->X[CHACHA_IV_BYTES+1] = LITTLE32(temp[0]); /* fixed variable from nonce */
+ ctx->X[CHACHA_IV_BYTES+2] = LITTLE32(temp[1]); /* counter from nonce */
+ ctx->X[CHACHA_IV_BYTES+3] = LITTLE32(temp[2]); /* counter from nonce */
return 0;
}
@@ -121,7 +155,7 @@ int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
if (ctx == NULL)
return BAD_FUNC_ARG;
- if (keySz != 16 && keySz != 32)
+ if (keySz != (CHACHA_MAX_KEY_SZ/2) && keySz != CHACHA_MAX_KEY_SZ)
return BAD_FUNC_ARG;
#ifdef XSTREAM_ALIGN
@@ -152,7 +186,7 @@ int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
ctx->X[5] = U8TO32_LITTLE(k + 4);
ctx->X[6] = U8TO32_LITTLE(k + 8);
ctx->X[7] = U8TO32_LITTLE(k + 12);
- if (keySz == 32) {
+ if (keySz == CHACHA_MAX_KEY_SZ) {
k += 16;
constants = sigma;
}
@@ -167,6 +201,7 @@ int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
ctx->X[ 1] = constants[1];
ctx->X[ 2] = constants[2];
ctx->X[ 3] = constants[3];
+ ctx->left = 0; /* resets state */
return 0;
}
@@ -174,12 +209,13 @@ int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
/**
* Converts word into bytes with rotations having been done.
*/
-static INLINE void wc_Chacha_wordtobyte(word32 output[16], const word32 input[16])
+static WC_INLINE void wc_Chacha_wordtobyte(word32 output[CHACHA_CHUNK_WORDS],
+ const word32 input[CHACHA_CHUNK_WORDS])
{
- word32 x[16];
+ word32 x[CHACHA_CHUNK_WORDS];
word32 i;
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
x[i] = input[i];
}
@@ -194,54 +230,114 @@ static INLINE void wc_Chacha_wordtobyte(word32 output[16], const word32 input[16
QUARTERROUND(3, 4, 9, 14)
}
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
x[i] = PLUS(x[i], input[i]);
}
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
output[i] = LITTLE32(x[i]);
}
}
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+extern void chacha_encrypt_x64(ChaCha* ctx, const byte* m, byte* c,
+ word32 bytes);
+extern void chacha_encrypt_avx1(ChaCha* ctx, const byte* m, byte* c,
+ word32 bytes);
+extern void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c,
+ word32 bytes);
+
+#ifdef __cplusplus
+ } /* extern "C" */
+#endif
+
+
/**
* Encrypt a stream of bytes
*/
static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c,
- word32 bytes)
+ word32 bytes)
{
byte* output;
- word32 temp[16]; /* used to make sure aligned */
+ word32 temp[CHACHA_CHUNK_WORDS]; /* used to make sure aligned */
word32 i;
- output = (byte*)temp;
+ /* handle left overs */
+ if (bytes > 0 && ctx->left > 0) {
+ wc_Chacha_wordtobyte(temp, ctx->X); /* recreate the stream */
+ output = (byte*)temp + CHACHA_CHUNK_BYTES - ctx->left;
+ for (i = 0; i < bytes && i < ctx->left; i++) {
+ c[i] = m[i] ^ output[i];
+ }
+ ctx->left = ctx->left - i;
- if (!bytes) return;
- for (;;) {
+ /* Used up all of the stream that was left, increment the counter */
+ if (ctx->left == 0) {
+ ctx->X[CHACHA_IV_BYTES] = PLUSONE(ctx->X[CHACHA_IV_BYTES]);
+ }
+ bytes = bytes - i;
+ c += i;
+ m += i;
+ }
+
+ output = (byte*)temp;
+ while (bytes >= CHACHA_CHUNK_BYTES) {
wc_Chacha_wordtobyte(temp, ctx->X);
- ctx->X[12] = PLUSONE(ctx->X[12]);
- if (bytes <= 64) {
- for (i = 0; i < bytes; ++i) {
- c[i] = m[i] ^ output[i];
- }
- return;
+ ctx->X[CHACHA_IV_BYTES] = PLUSONE(ctx->X[CHACHA_IV_BYTES]);
+ for (i = 0; i < CHACHA_CHUNK_BYTES; ++i) {
+ c[i] = m[i] ^ output[i];
}
- for (i = 0; i < 64; ++i) {
+ bytes -= CHACHA_CHUNK_BYTES;
+ c += CHACHA_CHUNK_BYTES;
+ m += CHACHA_CHUNK_BYTES;
+ }
+
+ if (bytes) {
+ /* in this case there will always be some left over since bytes is less
+ * than CHACHA_CHUNK_BYTES, so do not increment counter after getting
+ * stream in order for the stream to be recreated on next call */
+ wc_Chacha_wordtobyte(temp, ctx->X);
+ for (i = 0; i < bytes; ++i) {
c[i] = m[i] ^ output[i];
}
- bytes -= 64;
- c += 64;
- m += 64;
+ ctx->left = CHACHA_CHUNK_BYTES - i;
}
}
+
/**
* API to encrypt/decrypt a message of any size.
*/
-int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input, word32 msglen)
+int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
+ word32 msglen)
{
if (ctx == NULL)
return BAD_FUNC_ARG;
+#ifdef USE_INTEL_CHACHA_SPEEDUP
+ if (!cpuidFlagsSet) {
+ cpuidFlags = cpuid_get_flags();
+ cpuidFlagsSet = 1;
+ }
+
+ #ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_AVX2(cpuidFlags)) {
+ chacha_encrypt_avx2(ctx, input, output, msglen);
+ return 0;
+ }
+ #endif
+ if (IS_INTEL_AVX1(cpuidFlags)) {
+ chacha_encrypt_avx1(ctx, input, output, msglen);
+ return 0;
+ }
+ else {
+ chacha_encrypt_x64(ctx, input, output, msglen);
+ return 0;
+ }
+#endif
wc_Chacha_encrypt_bytes(ctx, input, output, msglen);
return 0;
@@ -249,3 +345,4 @@ int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input, word32 msgle
#endif /* HAVE_CHACHA*/
+#endif /* WOLFSSL_ARMASM */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/chacha20_poly1305.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/chacha20_poly1305.c
index 4a2b1be22..64bc4c199 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/chacha20_poly1305.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/chacha20_poly1305.c
@@ -1,8 +1,8 @@
/* chacha.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,10 +16,11 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -31,29 +32,15 @@
#include <wolfssl/wolfcrypt/chacha20_poly1305.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#include <wolfssl/wolfcrypt/logging.h>
-#include <wolfssl/wolfcrypt/chacha.h>
-#include <wolfssl/wolfcrypt/poly1305.h>
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+#define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
-#ifdef CHACHA_AEAD_TEST
-#include <stdio.h>
-#endif
-
#define CHACHA20_POLY1305_AEAD_INITIAL_COUNTER 0
-#define CHACHA20_POLY1305_MAC_PADDING_ALIGNMENT 16
-
-static void word32ToLittle64(const word32 inLittle32, byte outLittle64[8]);
-static int calculateAuthTag(
- const byte inAuthKey[CHACHA20_POLY1305_AEAD_KEYSIZE],
- const byte* inAAD, const word32 inAADLen,
- const byte *inCiphertext, const word32 inCiphertextLen,
- byte outAuthTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE]);
-
int wc_ChaCha20Poly1305_Encrypt(
const byte inKey[CHACHA20_POLY1305_AEAD_KEYSIZE],
const byte inIV[CHACHA20_POLY1305_AEAD_IV_SIZE],
@@ -62,12 +49,10 @@ int wc_ChaCha20Poly1305_Encrypt(
byte* outCiphertext,
byte outAuthTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE])
{
- int err;
- byte poly1305Key[CHACHA20_POLY1305_AEAD_KEYSIZE];
- ChaCha chaChaCtx;
+ int ret;
+ ChaChaPoly_Aead aead;
/* Validate function arguments */
-
if (!inKey || !inIV ||
!inPlaintext || !inPlaintextLen ||
!outCiphertext ||
@@ -76,35 +61,18 @@ int wc_ChaCha20Poly1305_Encrypt(
return BAD_FUNC_ARG;
}
- XMEMSET(poly1305Key, 0, sizeof(poly1305Key));
-
- /* Create the Poly1305 key */
- err = wc_Chacha_SetKey(&chaChaCtx, inKey, CHACHA20_POLY1305_AEAD_KEYSIZE);
- if (err != 0) return err;
-
- err = wc_Chacha_SetIV(&chaChaCtx, inIV,
- CHACHA20_POLY1305_AEAD_INITIAL_COUNTER);
- if (err != 0) return err;
-
- err = wc_Chacha_Process(&chaChaCtx, poly1305Key, poly1305Key,
- CHACHA20_POLY1305_AEAD_KEYSIZE);
- if (err != 0) return err;
-
- /* Encrypt the plaintext using ChaCha20 */
- err = wc_Chacha_Process(&chaChaCtx, outCiphertext, inPlaintext,
- inPlaintextLen);
- /* Calculate the Poly1305 auth tag */
- if (err == 0)
- err = calculateAuthTag(poly1305Key,
- inAAD, inAADLen,
- outCiphertext, inPlaintextLen,
- outAuthTag);
- ForceZero(poly1305Key, sizeof(poly1305Key));
-
- return err;
+ ret = wc_ChaCha20Poly1305_Init(&aead, inKey, inIV,
+ CHACHA20_POLY1305_AEAD_ENCRYPT);
+ if (ret == 0)
+ ret = wc_ChaCha20Poly1305_UpdateAad(&aead, inAAD, inAADLen);
+ if (ret == 0)
+ ret = wc_ChaCha20Poly1305_UpdateData(&aead, inPlaintext, outCiphertext,
+ inPlaintextLen);
+ if (ret == 0)
+ ret = wc_ChaCha20Poly1305_Final(&aead, outAuthTag);
+ return ret;
}
-
int wc_ChaCha20Poly1305_Decrypt(
const byte inKey[CHACHA20_POLY1305_AEAD_KEYSIZE],
const byte inIV[CHACHA20_POLY1305_AEAD_IV_SIZE],
@@ -113,13 +81,11 @@ int wc_ChaCha20Poly1305_Decrypt(
const byte inAuthTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE],
byte* outPlaintext)
{
- int err;
- byte poly1305Key[CHACHA20_POLY1305_AEAD_KEYSIZE];
- ChaCha chaChaCtx;
+ int ret;
+ ChaChaPoly_Aead aead;
byte calculatedAuthTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE];
/* Validate function arguments */
-
if (!inKey || !inIV ||
!inCiphertext || !inCiphertextLen ||
!inAuthTag ||
@@ -129,146 +95,192 @@ int wc_ChaCha20Poly1305_Decrypt(
}
XMEMSET(calculatedAuthTag, 0, sizeof(calculatedAuthTag));
- XMEMSET(poly1305Key, 0, sizeof(poly1305Key));
-
- /* Create the Poly1305 key */
- err = wc_Chacha_SetKey(&chaChaCtx, inKey, CHACHA20_POLY1305_AEAD_KEYSIZE);
- if (err != 0) return err;
-
- err = wc_Chacha_SetIV(&chaChaCtx, inIV,
- CHACHA20_POLY1305_AEAD_INITIAL_COUNTER);
- if (err != 0) return err;
-
- err = wc_Chacha_Process(&chaChaCtx, poly1305Key, poly1305Key,
- CHACHA20_POLY1305_AEAD_KEYSIZE);
- if (err != 0) return err;
-
- /* Calculate the Poly1305 auth tag */
- err = calculateAuthTag(poly1305Key,
- inAAD, inAADLen,
- inCiphertext, inCiphertextLen,
- calculatedAuthTag);
-
- /* Compare the calculated auth tag with the received one */
- if (err == 0 && ConstantCompare(inAuthTag, calculatedAuthTag,
- CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE) != 0)
- {
- err = MAC_CMP_FAILED_E;
- }
- /* Decrypt the received ciphertext */
- if (err == 0)
- err = wc_Chacha_Process(&chaChaCtx, outPlaintext, inCiphertext,
- inCiphertextLen);
- ForceZero(poly1305Key, sizeof(poly1305Key));
-
- return err;
+ ret = wc_ChaCha20Poly1305_Init(&aead, inKey, inIV,
+ CHACHA20_POLY1305_AEAD_DECRYPT);
+ if (ret == 0)
+ ret = wc_ChaCha20Poly1305_UpdateAad(&aead, inAAD, inAADLen);
+ if (ret == 0)
+ ret = wc_ChaCha20Poly1305_UpdateData(&aead, inCiphertext, outPlaintext,
+ inCiphertextLen);
+ if (ret == 0)
+ ret = wc_ChaCha20Poly1305_Final(&aead, calculatedAuthTag);
+ if (ret == 0)
+ ret = wc_ChaCha20Poly1305_CheckTag(inAuthTag, calculatedAuthTag);
+ return ret;
}
+int wc_ChaCha20Poly1305_CheckTag(
+ const byte authTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE],
+ const byte authTagChk[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE])
+{
+ int ret = 0;
+ if (authTag == NULL || authTagChk == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ if (ConstantCompare(authTag, authTagChk,
+ CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE) != 0) {
+ ret = MAC_CMP_FAILED_E;
+ }
+ return ret;
+}
-static int calculateAuthTag(
- const byte inAuthKey[CHACHA20_POLY1305_AEAD_KEYSIZE],
- const byte *inAAD, const word32 inAADLen,
- const byte *inCiphertext, const word32 inCiphertextLen,
- byte outAuthTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE])
+int wc_ChaCha20Poly1305_Init(ChaChaPoly_Aead* aead,
+ const byte inKey[CHACHA20_POLY1305_AEAD_KEYSIZE],
+ const byte inIV[CHACHA20_POLY1305_AEAD_IV_SIZE],
+ int isEncrypt)
{
- int err;
- Poly1305 poly1305Ctx;
- byte padding[CHACHA20_POLY1305_MAC_PADDING_ALIGNMENT - 1];
- word32 paddingLen;
- byte little64[8];
+ int ret;
+ byte authKey[CHACHA20_POLY1305_AEAD_KEYSIZE];
- XMEMSET(padding, 0, sizeof(padding));
+ /* check arguments */
+ if (aead == NULL || inKey == NULL || inIV == NULL) {
+ return BAD_FUNC_ARG;
+ }
- /* Initialize Poly1305 */
+ /* setup aead context */
+ XMEMSET(aead, 0, sizeof(ChaChaPoly_Aead));
+ XMEMSET(authKey, 0, sizeof(authKey));
+ aead->isEncrypt = isEncrypt;
+
+ /* Initialize the ChaCha20 context (key and iv) */
+ ret = wc_Chacha_SetKey(&aead->chacha, inKey,
+ CHACHA20_POLY1305_AEAD_KEYSIZE);
+ if (ret == 0) {
+ ret = wc_Chacha_SetIV(&aead->chacha, inIV,
+ CHACHA20_POLY1305_AEAD_INITIAL_COUNTER);
+ }
- err = wc_Poly1305SetKey(&poly1305Ctx, inAuthKey,
- CHACHA20_POLY1305_AEAD_KEYSIZE);
- if (err)
- {
- return err;
+ /* Create the Poly1305 key */
+ if (ret == 0) {
+ ret = wc_Chacha_Process(&aead->chacha, authKey, authKey,
+ CHACHA20_POLY1305_AEAD_KEYSIZE);
}
- /* Create the authTag by MAC'ing the following items: */
+ /* Initialize Poly1305 context */
+ if (ret == 0) {
+ ret = wc_Poly1305SetKey(&aead->poly, authKey,
+ CHACHA20_POLY1305_AEAD_KEYSIZE);
+ }
- /* -- AAD */
+ /* advance counter by 1 after creating Poly1305 key */
+ if (ret == 0) {
+ ret = wc_Chacha_SetIV(&aead->chacha, inIV,
+ CHACHA20_POLY1305_AEAD_INITIAL_COUNTER + 1);
+ }
- if (inAAD && inAADLen)
- {
- err = wc_Poly1305Update(&poly1305Ctx, inAAD, inAADLen);
+ if (ret == 0) {
+ aead->state = CHACHA20_POLY1305_STATE_READY;
+ }
- /* -- padding1: pad the AAD to 16 bytes */
+ return ret;
+}
- paddingLen = -inAADLen & (CHACHA20_POLY1305_MAC_PADDING_ALIGNMENT - 1);
- if (paddingLen)
- {
- err += wc_Poly1305Update(&poly1305Ctx, padding, paddingLen);
- }
+/* optional additional authentication data */
+int wc_ChaCha20Poly1305_UpdateAad(ChaChaPoly_Aead* aead,
+ const byte* inAAD, word32 inAADLen)
+{
+ int ret = 0;
- if (err)
- {
- return err;
+ if (aead == NULL || (inAAD == NULL && inAADLen > 0)) {
+ return BAD_FUNC_ARG;
+ }
+ if (aead->state != CHACHA20_POLY1305_STATE_READY &&
+ aead->state != CHACHA20_POLY1305_STATE_AAD) {
+ return BAD_STATE_E;
+ }
+
+ if (inAAD && inAADLen > 0) {
+ ret = wc_Poly1305Update(&aead->poly, inAAD, inAADLen);
+ if (ret == 0) {
+ aead->aadLen += inAADLen;
+ aead->state = CHACHA20_POLY1305_STATE_AAD;
}
}
- /* -- Ciphertext */
+ return ret;
+}
- err = wc_Poly1305Update(&poly1305Ctx, inCiphertext, inCiphertextLen);
- if (err)
- {
- return err;
+/* inData and outData can be same pointer (inline) */
+int wc_ChaCha20Poly1305_UpdateData(ChaChaPoly_Aead* aead,
+ const byte* inData, byte* outData, word32 dataLen)
+{
+ int ret = 0;
+
+ if (aead == NULL || inData == NULL || outData == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ if (aead->state != CHACHA20_POLY1305_STATE_READY &&
+ aead->state != CHACHA20_POLY1305_STATE_AAD &&
+ aead->state != CHACHA20_POLY1305_STATE_DATA) {
+ return BAD_STATE_E;
}
- /* -- padding2: pad the ciphertext to 16 bytes */
+ /* Pad the AAD */
+ if (aead->state == CHACHA20_POLY1305_STATE_AAD) {
+ ret = wc_Poly1305_Pad(&aead->poly, aead->aadLen);
+ }
- paddingLen = -inCiphertextLen &
- (CHACHA20_POLY1305_MAC_PADDING_ALIGNMENT - 1);
- if (paddingLen)
- {
- err = wc_Poly1305Update(&poly1305Ctx, padding, paddingLen);
- if (err)
- {
- return err;
+ /* advance state */
+ aead->state = CHACHA20_POLY1305_STATE_DATA;
+
+ /* Perform ChaCha20 encrypt/decrypt and Poly1305 auth calc */
+ if (ret == 0) {
+ if (aead->isEncrypt) {
+ ret = wc_Chacha_Process(&aead->chacha, outData, inData, dataLen);
+ if (ret == 0)
+ ret = wc_Poly1305Update(&aead->poly, outData, dataLen);
+ }
+ else {
+ ret = wc_Poly1305Update(&aead->poly, inData, dataLen);
+ if (ret == 0)
+ ret = wc_Chacha_Process(&aead->chacha, outData, inData, dataLen);
}
}
+ if (ret == 0) {
+ aead->dataLen += dataLen;
+ }
+ return ret;
+}
- /* -- AAD length as a 64-bit little endian integer */
-
- word32ToLittle64(inAADLen, little64);
+int wc_ChaCha20Poly1305_Final(ChaChaPoly_Aead* aead,
+ byte outAuthTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE])
+{
+ int ret = 0;
- err = wc_Poly1305Update(&poly1305Ctx, little64, sizeof(little64));
- if (err)
- {
- return err;
+ if (aead == NULL || outAuthTag == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ if (aead->state != CHACHA20_POLY1305_STATE_AAD &&
+ aead->state != CHACHA20_POLY1305_STATE_DATA) {
+ return BAD_STATE_E;
}
- /* -- Ciphertext length as a 64-bit little endian integer */
+ /* Pad the AAD - Make sure it is done */
+ if (aead->state == CHACHA20_POLY1305_STATE_AAD) {
+ ret = wc_Poly1305_Pad(&aead->poly, aead->aadLen);
+ }
- word32ToLittle64(inCiphertextLen, little64);
+ /* Pad the ciphertext to 16 bytes */
+ if (ret == 0) {
+ ret = wc_Poly1305_Pad(&aead->poly, aead->dataLen);
+ }
- err = wc_Poly1305Update(&poly1305Ctx, little64, sizeof(little64));
- if (err)
- {
- return err;
+ /* Add the aad length and plaintext/ciphertext length */
+ if (ret == 0) {
+ ret = wc_Poly1305_EncodeSizes(&aead->poly, aead->aadLen,
+ aead->dataLen);
}
/* Finalize the auth tag */
+ if (ret == 0) {
+ ret = wc_Poly1305Final(&aead->poly, outAuthTag);
+ }
- err = wc_Poly1305Final(&poly1305Ctx, outAuthTag);
-
- return err;
-}
-
-
-static void word32ToLittle64(const word32 inLittle32, byte outLittle64[8])
-{
- XMEMSET(outLittle64, 0, 8);
+ /* reset and cleanup sensitive context */
+ ForceZero(aead, sizeof(ChaChaPoly_Aead));
- outLittle64[0] = (inLittle32 & 0x000000FF);
- outLittle64[1] = (inLittle32 & 0x0000FF00) >> 8;
- outLittle64[2] = (inLittle32 & 0x00FF0000) >> 16;
- outLittle64[3] = (inLittle32 & 0xFF000000) >> 24;
+ return ret;
}
-
#endif /* HAVE_CHACHA && HAVE_POLY1305 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/chacha_asm.S b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/chacha_asm.S
new file mode 100644
index 000000000..f9d5fff81
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/chacha_asm.S
@@ -0,0 +1,1420 @@
+/* chacha_asm
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+#ifndef HAVE_INTEL_AVX1
+#define HAVE_INTEL_AVX1
+#endif /* HAVE_INTEL_AVX1 */
+#ifndef NO_AVX2_SUPPORT
+#define HAVE_INTEL_AVX2
+#endif /* NO_AVX2_SUPPORT */
+
+#ifndef __APPLE__
+.text
+.globl chacha_encrypt_x64
+.type chacha_encrypt_x64,@function
+.align 4
+chacha_encrypt_x64:
+#else
+.section __TEXT,__text
+.globl _chacha_encrypt_x64
+.p2align 2
+_chacha_encrypt_x64:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $0x40, %rsp
+ cmpl $0x40, %ecx
+ jl L_chacha_x64_small
+L_chacha_x64_start:
+ subq $48, %rsp
+ movq %rdx, 24(%rsp)
+ movq %rsi, 32(%rsp)
+ movq %rcx, 40(%rsp)
+ movq 32(%rdi), %rax
+ movq 40(%rdi), %rbx
+ movq %rax, 8(%rsp)
+ movq %rbx, 16(%rsp)
+ movl (%rdi), %eax
+ movl 4(%rdi), %ebx
+ movl 8(%rdi), %ecx
+ movl 12(%rdi), %edx
+ movl 16(%rdi), %r8d
+ movl 20(%rdi), %r9d
+ movl 24(%rdi), %r10d
+ movl 28(%rdi), %r11d
+ movl 48(%rdi), %r12d
+ movl 52(%rdi), %r13d
+ movl 56(%rdi), %r14d
+ movl 60(%rdi), %r15d
+ movb $10, (%rsp)
+ movl 8(%rsp), %esi
+ movl 12(%rsp), %ebp
+L_chacha_x64_block_crypt_start:
+ addl %r8d, %eax
+ addl %r9d, %ebx
+ xorl %eax, %r12d
+ xorl %ebx, %r13d
+ roll $16, %r12d
+ roll $16, %r13d
+ addl %r12d, %esi
+ addl %r13d, %ebp
+ xorl %esi, %r8d
+ xorl %ebp, %r9d
+ roll $12, %r8d
+ roll $12, %r9d
+ addl %r8d, %eax
+ addl %r9d, %ebx
+ xorl %eax, %r12d
+ xorl %ebx, %r13d
+ roll $8, %r12d
+ roll $8, %r13d
+ addl %r12d, %esi
+ addl %r13d, %ebp
+ xorl %esi, %r8d
+ xorl %ebp, %r9d
+ roll $7, %r8d
+ roll $7, %r9d
+ movl %esi, 8(%rsp)
+ movl %ebp, 12(%rsp)
+ movl 16(%rsp), %esi
+ movl 20(%rsp), %ebp
+ addl %r10d, %ecx
+ addl %r11d, %edx
+ xorl %ecx, %r14d
+ xorl %edx, %r15d
+ roll $16, %r14d
+ roll $16, %r15d
+ addl %r14d, %esi
+ addl %r15d, %ebp
+ xorl %esi, %r10d
+ xorl %ebp, %r11d
+ roll $12, %r10d
+ roll $12, %r11d
+ addl %r10d, %ecx
+ addl %r11d, %edx
+ xorl %ecx, %r14d
+ xorl %edx, %r15d
+ roll $8, %r14d
+ roll $8, %r15d
+ addl %r14d, %esi
+ addl %r15d, %ebp
+ xorl %esi, %r10d
+ xorl %ebp, %r11d
+ roll $7, %r10d
+ roll $7, %r11d
+ addl %r9d, %eax
+ addl %r10d, %ebx
+ xorl %eax, %r15d
+ xorl %ebx, %r12d
+ roll $16, %r15d
+ roll $16, %r12d
+ addl %r15d, %esi
+ addl %r12d, %ebp
+ xorl %esi, %r9d
+ xorl %ebp, %r10d
+ roll $12, %r9d
+ roll $12, %r10d
+ addl %r9d, %eax
+ addl %r10d, %ebx
+ xorl %eax, %r15d
+ xorl %ebx, %r12d
+ roll $8, %r15d
+ roll $8, %r12d
+ addl %r15d, %esi
+ addl %r12d, %ebp
+ xorl %esi, %r9d
+ xorl %ebp, %r10d
+ roll $7, %r9d
+ roll $7, %r10d
+ movl %esi, 16(%rsp)
+ movl %ebp, 20(%rsp)
+ movl 8(%rsp), %esi
+ movl 12(%rsp), %ebp
+ addl %r11d, %ecx
+ addl %r8d, %edx
+ xorl %ecx, %r13d
+ xorl %edx, %r14d
+ roll $16, %r13d
+ roll $16, %r14d
+ addl %r13d, %esi
+ addl %r14d, %ebp
+ xorl %esi, %r11d
+ xorl %ebp, %r8d
+ roll $12, %r11d
+ roll $12, %r8d
+ addl %r11d, %ecx
+ addl %r8d, %edx
+ xorl %ecx, %r13d
+ xorl %edx, %r14d
+ roll $8, %r13d
+ roll $8, %r14d
+ addl %r13d, %esi
+ addl %r14d, %ebp
+ xorl %esi, %r11d
+ xorl %ebp, %r8d
+ roll $7, %r11d
+ roll $7, %r8d
+ decb (%rsp)
+ jnz L_chacha_x64_block_crypt_start
+ movl %esi, 8(%rsp)
+ movl %ebp, 12(%rsp)
+ movq 32(%rsp), %rsi
+ movq 24(%rsp), %rbp
+ addl (%rdi), %eax
+ addl 4(%rdi), %ebx
+ addl 8(%rdi), %ecx
+ addl 12(%rdi), %edx
+ addl 16(%rdi), %r8d
+ addl 20(%rdi), %r9d
+ addl 24(%rdi), %r10d
+ addl 28(%rdi), %r11d
+ addl 48(%rdi), %r12d
+ addl 52(%rdi), %r13d
+ addl 56(%rdi), %r14d
+ addl 60(%rdi), %r15d
+ xorl (%rsi), %eax
+ xorl 4(%rsi), %ebx
+ xorl 8(%rsi), %ecx
+ xorl 12(%rsi), %edx
+ xorl 16(%rsi), %r8d
+ xorl 20(%rsi), %r9d
+ xorl 24(%rsi), %r10d
+ xorl 28(%rsi), %r11d
+ xorl 48(%rsi), %r12d
+ xorl 52(%rsi), %r13d
+ xorl 56(%rsi), %r14d
+ xorl 60(%rsi), %r15d
+ movl %eax, (%rbp)
+ movl %ebx, 4(%rbp)
+ movl %ecx, 8(%rbp)
+ movl %edx, 12(%rbp)
+ movl %r8d, 16(%rbp)
+ movl %r9d, 20(%rbp)
+ movl %r10d, 24(%rbp)
+ movl %r11d, 28(%rbp)
+ movl %r12d, 48(%rbp)
+ movl %r13d, 52(%rbp)
+ movl %r14d, 56(%rbp)
+ movl %r15d, 60(%rbp)
+ movl 8(%rsp), %eax
+ movl 12(%rsp), %ebx
+ movl 16(%rsp), %ecx
+ movl 20(%rsp), %edx
+ addl 32(%rdi), %eax
+ addl 36(%rdi), %ebx
+ addl 40(%rdi), %ecx
+ addl 44(%rdi), %edx
+ xorl 32(%rsi), %eax
+ xorl 36(%rsi), %ebx
+ xorl 40(%rsi), %ecx
+ xorl 44(%rsi), %edx
+ movl %eax, 32(%rbp)
+ movl %ebx, 36(%rbp)
+ movl %ecx, 40(%rbp)
+ movl %edx, 44(%rbp)
+ movq 24(%rsp), %rdx
+ movq 40(%rsp), %rcx
+ addl $0x01, 48(%rdi)
+ addq $48, %rsp
+ subl $0x40, %ecx
+ addq $0x40, %rsi
+ addq $0x40, %rdx
+ cmpl $0x40, %ecx
+ jge L_chacha_x64_start
+L_chacha_x64_small:
+ cmpl $0x00, %ecx
+ je L_chacha_x64_done
+ subq $48, %rsp
+ movq %rdx, 24(%rsp)
+ movq %rsi, 32(%rsp)
+ movq %rcx, 40(%rsp)
+ movq 32(%rdi), %rax
+ movq 40(%rdi), %rbx
+ movq %rax, 8(%rsp)
+ movq %rbx, 16(%rsp)
+ movl (%rdi), %eax
+ movl 4(%rdi), %ebx
+ movl 8(%rdi), %ecx
+ movl 12(%rdi), %edx
+ movl 16(%rdi), %r8d
+ movl 20(%rdi), %r9d
+ movl 24(%rdi), %r10d
+ movl 28(%rdi), %r11d
+ movl 48(%rdi), %r12d
+ movl 52(%rdi), %r13d
+ movl 56(%rdi), %r14d
+ movl 60(%rdi), %r15d
+ movb $10, (%rsp)
+ movl 8(%rsp), %esi
+ movl 12(%rsp), %ebp
+L_chacha_x64_partial_crypt_start:
+ addl %r8d, %eax
+ addl %r9d, %ebx
+ xorl %eax, %r12d
+ xorl %ebx, %r13d
+ roll $16, %r12d
+ roll $16, %r13d
+ addl %r12d, %esi
+ addl %r13d, %ebp
+ xorl %esi, %r8d
+ xorl %ebp, %r9d
+ roll $12, %r8d
+ roll $12, %r9d
+ addl %r8d, %eax
+ addl %r9d, %ebx
+ xorl %eax, %r12d
+ xorl %ebx, %r13d
+ roll $8, %r12d
+ roll $8, %r13d
+ addl %r12d, %esi
+ addl %r13d, %ebp
+ xorl %esi, %r8d
+ xorl %ebp, %r9d
+ roll $7, %r8d
+ roll $7, %r9d
+ movl %esi, 8(%rsp)
+ movl %ebp, 12(%rsp)
+ movl 16(%rsp), %esi
+ movl 20(%rsp), %ebp
+ addl %r10d, %ecx
+ addl %r11d, %edx
+ xorl %ecx, %r14d
+ xorl %edx, %r15d
+ roll $16, %r14d
+ roll $16, %r15d
+ addl %r14d, %esi
+ addl %r15d, %ebp
+ xorl %esi, %r10d
+ xorl %ebp, %r11d
+ roll $12, %r10d
+ roll $12, %r11d
+ addl %r10d, %ecx
+ addl %r11d, %edx
+ xorl %ecx, %r14d
+ xorl %edx, %r15d
+ roll $8, %r14d
+ roll $8, %r15d
+ addl %r14d, %esi
+ addl %r15d, %ebp
+ xorl %esi, %r10d
+ xorl %ebp, %r11d
+ roll $7, %r10d
+ roll $7, %r11d
+ addl %r9d, %eax
+ addl %r10d, %ebx
+ xorl %eax, %r15d
+ xorl %ebx, %r12d
+ roll $16, %r15d
+ roll $16, %r12d
+ addl %r15d, %esi
+ addl %r12d, %ebp
+ xorl %esi, %r9d
+ xorl %ebp, %r10d
+ roll $12, %r9d
+ roll $12, %r10d
+ addl %r9d, %eax
+ addl %r10d, %ebx
+ xorl %eax, %r15d
+ xorl %ebx, %r12d
+ roll $8, %r15d
+ roll $8, %r12d
+ addl %r15d, %esi
+ addl %r12d, %ebp
+ xorl %esi, %r9d
+ xorl %ebp, %r10d
+ roll $7, %r9d
+ roll $7, %r10d
+ movl %esi, 16(%rsp)
+ movl %ebp, 20(%rsp)
+ movl 8(%rsp), %esi
+ movl 12(%rsp), %ebp
+ addl %r11d, %ecx
+ addl %r8d, %edx
+ xorl %ecx, %r13d
+ xorl %edx, %r14d
+ roll $16, %r13d
+ roll $16, %r14d
+ addl %r13d, %esi
+ addl %r14d, %ebp
+ xorl %esi, %r11d
+ xorl %ebp, %r8d
+ roll $12, %r11d
+ roll $12, %r8d
+ addl %r11d, %ecx
+ addl %r8d, %edx
+ xorl %ecx, %r13d
+ xorl %edx, %r14d
+ roll $8, %r13d
+ roll $8, %r14d
+ addl %r13d, %esi
+ addl %r14d, %ebp
+ xorl %esi, %r11d
+ xorl %ebp, %r8d
+ roll $7, %r11d
+ roll $7, %r8d
+ decb (%rsp)
+ jnz L_chacha_x64_partial_crypt_start
+ movl %esi, 8(%rsp)
+ movl %ebp, 12(%rsp)
+ movq 32(%rsp), %rsi
+ addl (%rdi), %eax
+ addl 4(%rdi), %ebx
+ addl 8(%rdi), %ecx
+ addl 12(%rdi), %edx
+ addl 16(%rdi), %r8d
+ addl 20(%rdi), %r9d
+ addl 24(%rdi), %r10d
+ addl 28(%rdi), %r11d
+ addl 48(%rdi), %r12d
+ addl 52(%rdi), %r13d
+ addl 56(%rdi), %r14d
+ addl 60(%rdi), %r15d
+ movl %eax, 48(%rsp)
+ movl %ebx, 52(%rsp)
+ movl %ecx, 56(%rsp)
+ movl %edx, 60(%rsp)
+ movl %r8d, 64(%rsp)
+ movl %r9d, 68(%rsp)
+ movl %r10d, 72(%rsp)
+ movl %r11d, 76(%rsp)
+ movl %r12d, 96(%rsp)
+ movl %r13d, 100(%rsp)
+ movl %r14d, 104(%rsp)
+ movl %r15d, 108(%rsp)
+ movl 8(%rsp), %eax
+ movl 12(%rsp), %ebx
+ movl 16(%rsp), %ecx
+ movl 20(%rsp), %edx
+ addl 32(%rdi), %eax
+ addl 36(%rdi), %ebx
+ addl 40(%rdi), %ecx
+ addl 44(%rdi), %edx
+ movl %eax, 80(%rsp)
+ movl %ebx, 84(%rsp)
+ movl %ecx, 88(%rsp)
+ movl %edx, 92(%rsp)
+ movq 24(%rsp), %rdx
+ movq 40(%rsp), %rcx
+ addl $0x01, 48(%rdi)
+ addq $48, %rsp
+ movl %ecx, %r8d
+ xorq %rbx, %rbx
+ andl $7, %r8d
+ jz L_chacha_x64_partial_start64
+L_chacha_x64_partial_start8:
+ movzbl (%rsp,%rbx,1), %eax
+ xorb (%rsi,%rbx,1), %al
+ movb %al, (%rdx,%rbx,1)
+ incl %ebx
+ cmpl %r8d, %ebx
+ jne L_chacha_x64_partial_start8
+ je L_chacha_x64_partial_end64
+L_chacha_x64_partial_start64:
+ movq (%rsp,%rbx,1), %rax
+ xorq (%rsi,%rbx,1), %rax
+ movq %rax, (%rdx,%rbx,1)
+ addl $8, %ebx
+L_chacha_x64_partial_end64:
+ cmpl %ecx, %ebx
+ jne L_chacha_x64_partial_start64
+L_chacha_x64_done:
+ addq $0x40, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size chacha_encrypt_x64,.-chacha_encrypt_x64
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX1
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_chacha20_avx1_rotl8:
+.quad 0x605040702010003, 0xe0d0c0f0a09080b
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_chacha20_avx1_rotl16:
+.quad 0x504070601000302, 0xd0c0f0e09080b0a
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_chacha20_avx1_add:
+.quad 0x100000000, 0x300000002
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_chacha20_avx1_four:
+.quad 0x400000004, 0x400000004
+#ifndef __APPLE__
+.text
+.globl chacha_encrypt_avx1
+.type chacha_encrypt_avx1,@function
+.align 4
+chacha_encrypt_avx1:
+#else
+.section __TEXT,__text
+.globl _chacha_encrypt_avx1
+.p2align 2
+_chacha_encrypt_avx1:
+#endif /* __APPLE__ */
+ subq $0x190, %rsp
+ movq %rsp, %r9
+ leaq 256(%rsp), %r10
+ andq $-16, %r9
+ andq $-16, %r10
+ movl %ecx, %eax
+ shrl $8, %eax
+ jz L_chacha20_avx1_end128
+ vpshufd $0x00, (%rdi), %xmm0
+ vpshufd $0x00, 4(%rdi), %xmm1
+ vpshufd $0x00, 8(%rdi), %xmm2
+ vpshufd $0x00, 12(%rdi), %xmm3
+ vpshufd $0x00, 16(%rdi), %xmm4
+ vpshufd $0x00, 20(%rdi), %xmm5
+ vpshufd $0x00, 24(%rdi), %xmm6
+ vpshufd $0x00, 28(%rdi), %xmm7
+ vpshufd $0x00, 32(%rdi), %xmm8
+ vpshufd $0x00, 36(%rdi), %xmm9
+ vpshufd $0x00, 40(%rdi), %xmm10
+ vpshufd $0x00, 44(%rdi), %xmm11
+ vpshufd $0x00, 48(%rdi), %xmm12
+ vpshufd $0x00, 52(%rdi), %xmm13
+ vpshufd $0x00, 56(%rdi), %xmm14
+ vpshufd $0x00, 60(%rdi), %xmm15
+ vpaddd L_chacha20_avx1_add(%rip), %xmm12, %xmm12
+ vmovdqa %xmm0, (%r9)
+ vmovdqa %xmm1, 16(%r9)
+ vmovdqa %xmm2, 32(%r9)
+ vmovdqa %xmm3, 48(%r9)
+ vmovdqa %xmm4, 64(%r9)
+ vmovdqa %xmm5, 80(%r9)
+ vmovdqa %xmm6, 96(%r9)
+ vmovdqa %xmm7, 112(%r9)
+ vmovdqa %xmm8, 128(%r9)
+ vmovdqa %xmm9, 144(%r9)
+ vmovdqa %xmm10, 160(%r9)
+ vmovdqa %xmm11, 176(%r9)
+ vmovdqa %xmm12, 192(%r9)
+ vmovdqa %xmm13, 208(%r9)
+ vmovdqa %xmm14, 224(%r9)
+ vmovdqa %xmm15, 240(%r9)
+L_chacha20_avx1_start128:
+ vmovdqa %xmm11, 48(%r10)
+ movb $10, %r8b
+L_chacha20_avx1_loop128:
+ vpaddd %xmm4, %xmm0, %xmm0
+ vpxor %xmm0, %xmm12, %xmm12
+ vmovdqa 48(%r10), %xmm11
+ vpshufb L_chacha20_avx1_rotl16(%rip), %xmm12, %xmm12
+ vpaddd %xmm12, %xmm8, %xmm8
+ vpxor %xmm8, %xmm4, %xmm4
+ vpaddd %xmm5, %xmm1, %xmm1
+ vpxor %xmm1, %xmm13, %xmm13
+ vpshufb L_chacha20_avx1_rotl16(%rip), %xmm13, %xmm13
+ vpaddd %xmm13, %xmm9, %xmm9
+ vpxor %xmm9, %xmm5, %xmm5
+ vpaddd %xmm6, %xmm2, %xmm2
+ vpxor %xmm2, %xmm14, %xmm14
+ vpshufb L_chacha20_avx1_rotl16(%rip), %xmm14, %xmm14
+ vpaddd %xmm14, %xmm10, %xmm10
+ vpxor %xmm10, %xmm6, %xmm6
+ vpaddd %xmm7, %xmm3, %xmm3
+ vpxor %xmm3, %xmm15, %xmm15
+ vpshufb L_chacha20_avx1_rotl16(%rip), %xmm15, %xmm15
+ vpaddd %xmm15, %xmm11, %xmm11
+ vpxor %xmm11, %xmm7, %xmm7
+ vmovdqa %xmm11, 48(%r10)
+ vpsrld $20, %xmm4, %xmm11
+ vpslld $12, %xmm4, %xmm4
+ vpxor %xmm11, %xmm4, %xmm4
+ vpsrld $20, %xmm5, %xmm11
+ vpslld $12, %xmm5, %xmm5
+ vpxor %xmm11, %xmm5, %xmm5
+ vpsrld $20, %xmm6, %xmm11
+ vpslld $12, %xmm6, %xmm6
+ vpxor %xmm11, %xmm6, %xmm6
+ vpsrld $20, %xmm7, %xmm11
+ vpslld $12, %xmm7, %xmm7
+ vpxor %xmm11, %xmm7, %xmm7
+ vpaddd %xmm4, %xmm0, %xmm0
+ vpxor %xmm0, %xmm12, %xmm12
+ vmovdqa 48(%r10), %xmm11
+ vpshufb L_chacha20_avx1_rotl8(%rip), %xmm12, %xmm12
+ vpaddd %xmm12, %xmm8, %xmm8
+ vpxor %xmm8, %xmm4, %xmm4
+ vpaddd %xmm5, %xmm1, %xmm1
+ vpxor %xmm1, %xmm13, %xmm13
+ vpshufb L_chacha20_avx1_rotl8(%rip), %xmm13, %xmm13
+ vpaddd %xmm13, %xmm9, %xmm9
+ vpxor %xmm9, %xmm5, %xmm5
+ vpaddd %xmm6, %xmm2, %xmm2
+ vpxor %xmm2, %xmm14, %xmm14
+ vpshufb L_chacha20_avx1_rotl8(%rip), %xmm14, %xmm14
+ vpaddd %xmm14, %xmm10, %xmm10
+ vpxor %xmm10, %xmm6, %xmm6
+ vpaddd %xmm7, %xmm3, %xmm3
+ vpxor %xmm3, %xmm15, %xmm15
+ vpshufb L_chacha20_avx1_rotl8(%rip), %xmm15, %xmm15
+ vpaddd %xmm15, %xmm11, %xmm11
+ vpxor %xmm11, %xmm7, %xmm7
+ vmovdqa %xmm11, 48(%r10)
+ vpsrld $25, %xmm4, %xmm11
+ vpslld $7, %xmm4, %xmm4
+ vpxor %xmm11, %xmm4, %xmm4
+ vpsrld $25, %xmm5, %xmm11
+ vpslld $7, %xmm5, %xmm5
+ vpxor %xmm11, %xmm5, %xmm5
+ vpsrld $25, %xmm6, %xmm11
+ vpslld $7, %xmm6, %xmm6
+ vpxor %xmm11, %xmm6, %xmm6
+ vpsrld $25, %xmm7, %xmm11
+ vpslld $7, %xmm7, %xmm7
+ vpxor %xmm11, %xmm7, %xmm7
+ vpaddd %xmm5, %xmm0, %xmm0
+ vpxor %xmm0, %xmm15, %xmm15
+ vmovdqa 48(%r10), %xmm11
+ vpshufb L_chacha20_avx1_rotl16(%rip), %xmm15, %xmm15
+ vpaddd %xmm15, %xmm10, %xmm10
+ vpxor %xmm10, %xmm5, %xmm5
+ vpaddd %xmm6, %xmm1, %xmm1
+ vpxor %xmm1, %xmm12, %xmm12
+ vpshufb L_chacha20_avx1_rotl16(%rip), %xmm12, %xmm12
+ vpaddd %xmm12, %xmm11, %xmm11
+ vpxor %xmm11, %xmm6, %xmm6
+ vpaddd %xmm7, %xmm2, %xmm2
+ vpxor %xmm2, %xmm13, %xmm13
+ vpshufb L_chacha20_avx1_rotl16(%rip), %xmm13, %xmm13
+ vpaddd %xmm13, %xmm8, %xmm8
+ vpxor %xmm8, %xmm7, %xmm7
+ vpaddd %xmm4, %xmm3, %xmm3
+ vpxor %xmm3, %xmm14, %xmm14
+ vpshufb L_chacha20_avx1_rotl16(%rip), %xmm14, %xmm14
+ vpaddd %xmm14, %xmm9, %xmm9
+ vpxor %xmm9, %xmm4, %xmm4
+ vmovdqa %xmm11, 48(%r10)
+ vpsrld $20, %xmm5, %xmm11
+ vpslld $12, %xmm5, %xmm5
+ vpxor %xmm11, %xmm5, %xmm5
+ vpsrld $20, %xmm6, %xmm11
+ vpslld $12, %xmm6, %xmm6
+ vpxor %xmm11, %xmm6, %xmm6
+ vpsrld $20, %xmm7, %xmm11
+ vpslld $12, %xmm7, %xmm7
+ vpxor %xmm11, %xmm7, %xmm7
+ vpsrld $20, %xmm4, %xmm11
+ vpslld $12, %xmm4, %xmm4
+ vpxor %xmm11, %xmm4, %xmm4
+ vpaddd %xmm5, %xmm0, %xmm0
+ vpxor %xmm0, %xmm15, %xmm15
+ vmovdqa 48(%r10), %xmm11
+ vpshufb L_chacha20_avx1_rotl8(%rip), %xmm15, %xmm15
+ vpaddd %xmm15, %xmm10, %xmm10
+ vpxor %xmm10, %xmm5, %xmm5
+ vpaddd %xmm6, %xmm1, %xmm1
+ vpxor %xmm1, %xmm12, %xmm12
+ vpshufb L_chacha20_avx1_rotl8(%rip), %xmm12, %xmm12
+ vpaddd %xmm12, %xmm11, %xmm11
+ vpxor %xmm11, %xmm6, %xmm6
+ vpaddd %xmm7, %xmm2, %xmm2
+ vpxor %xmm2, %xmm13, %xmm13
+ vpshufb L_chacha20_avx1_rotl8(%rip), %xmm13, %xmm13
+ vpaddd %xmm13, %xmm8, %xmm8
+ vpxor %xmm8, %xmm7, %xmm7
+ vpaddd %xmm4, %xmm3, %xmm3
+ vpxor %xmm3, %xmm14, %xmm14
+ vpshufb L_chacha20_avx1_rotl8(%rip), %xmm14, %xmm14
+ vpaddd %xmm14, %xmm9, %xmm9
+ vpxor %xmm9, %xmm4, %xmm4
+ vmovdqa %xmm11, 48(%r10)
+ vpsrld $25, %xmm5, %xmm11
+ vpslld $7, %xmm5, %xmm5
+ vpxor %xmm11, %xmm5, %xmm5
+ vpsrld $25, %xmm6, %xmm11
+ vpslld $7, %xmm6, %xmm6
+ vpxor %xmm11, %xmm6, %xmm6
+ vpsrld $25, %xmm7, %xmm11
+ vpslld $7, %xmm7, %xmm7
+ vpxor %xmm11, %xmm7, %xmm7
+ vpsrld $25, %xmm4, %xmm11
+ vpslld $7, %xmm4, %xmm4
+ vpxor %xmm11, %xmm4, %xmm4
+ decb %r8b
+ jnz L_chacha20_avx1_loop128
+ vmovdqa 48(%r10), %xmm11
+ vpaddd (%r9), %xmm0, %xmm0
+ vpaddd 16(%r9), %xmm1, %xmm1
+ vpaddd 32(%r9), %xmm2, %xmm2
+ vpaddd 48(%r9), %xmm3, %xmm3
+ vpaddd 64(%r9), %xmm4, %xmm4
+ vpaddd 80(%r9), %xmm5, %xmm5
+ vpaddd 96(%r9), %xmm6, %xmm6
+ vpaddd 112(%r9), %xmm7, %xmm7
+ vpaddd 128(%r9), %xmm8, %xmm8
+ vpaddd 144(%r9), %xmm9, %xmm9
+ vpaddd 160(%r9), %xmm10, %xmm10
+ vpaddd 176(%r9), %xmm11, %xmm11
+ vpaddd 192(%r9), %xmm12, %xmm12
+ vpaddd 208(%r9), %xmm13, %xmm13
+ vpaddd 224(%r9), %xmm14, %xmm14
+ vpaddd 240(%r9), %xmm15, %xmm15
+ vmovdqa %xmm8, (%r10)
+ vmovdqa %xmm9, 16(%r10)
+ vmovdqa %xmm10, 32(%r10)
+ vmovdqa %xmm11, 48(%r10)
+ vmovdqa %xmm12, 64(%r10)
+ vmovdqa %xmm13, 80(%r10)
+ vmovdqa %xmm14, 96(%r10)
+ vmovdqa %xmm15, 112(%r10)
+ vpunpckldq %xmm1, %xmm0, %xmm8
+ vpunpckldq %xmm3, %xmm2, %xmm9
+ vpunpckhdq %xmm1, %xmm0, %xmm12
+ vpunpckhdq %xmm3, %xmm2, %xmm13
+ vpunpckldq %xmm5, %xmm4, %xmm10
+ vpunpckldq %xmm7, %xmm6, %xmm11
+ vpunpckhdq %xmm5, %xmm4, %xmm14
+ vpunpckhdq %xmm7, %xmm6, %xmm15
+ vpunpcklqdq %xmm9, %xmm8, %xmm0
+ vpunpcklqdq %xmm11, %xmm10, %xmm1
+ vpunpckhqdq %xmm9, %xmm8, %xmm2
+ vpunpckhqdq %xmm11, %xmm10, %xmm3
+ vpunpcklqdq %xmm13, %xmm12, %xmm4
+ vpunpcklqdq %xmm15, %xmm14, %xmm5
+ vpunpckhqdq %xmm13, %xmm12, %xmm6
+ vpunpckhqdq %xmm15, %xmm14, %xmm7
+ vmovdqu (%rsi), %xmm8
+ vmovdqu 16(%rsi), %xmm9
+ vmovdqu 64(%rsi), %xmm10
+ vmovdqu 80(%rsi), %xmm11
+ vmovdqu 128(%rsi), %xmm12
+ vmovdqu 144(%rsi), %xmm13
+ vmovdqu 192(%rsi), %xmm14
+ vmovdqu 208(%rsi), %xmm15
+ vpxor %xmm8, %xmm0, %xmm0
+ vpxor %xmm9, %xmm1, %xmm1
+ vpxor %xmm10, %xmm2, %xmm2
+ vpxor %xmm11, %xmm3, %xmm3
+ vpxor %xmm12, %xmm4, %xmm4
+ vpxor %xmm13, %xmm5, %xmm5
+ vpxor %xmm14, %xmm6, %xmm6
+ vpxor %xmm15, %xmm7, %xmm7
+ vmovdqu %xmm0, (%rdx)
+ vmovdqu %xmm1, 16(%rdx)
+ vmovdqu %xmm2, 64(%rdx)
+ vmovdqu %xmm3, 80(%rdx)
+ vmovdqu %xmm4, 128(%rdx)
+ vmovdqu %xmm5, 144(%rdx)
+ vmovdqu %xmm6, 192(%rdx)
+ vmovdqu %xmm7, 208(%rdx)
+ vmovdqa (%r10), %xmm0
+ vmovdqa 16(%r10), %xmm1
+ vmovdqa 32(%r10), %xmm2
+ vmovdqa 48(%r10), %xmm3
+ vmovdqa 64(%r10), %xmm4
+ vmovdqa 80(%r10), %xmm5
+ vmovdqa 96(%r10), %xmm6
+ vmovdqa 112(%r10), %xmm7
+ vpunpckldq %xmm1, %xmm0, %xmm8
+ vpunpckldq %xmm3, %xmm2, %xmm9
+ vpunpckhdq %xmm1, %xmm0, %xmm12
+ vpunpckhdq %xmm3, %xmm2, %xmm13
+ vpunpckldq %xmm5, %xmm4, %xmm10
+ vpunpckldq %xmm7, %xmm6, %xmm11
+ vpunpckhdq %xmm5, %xmm4, %xmm14
+ vpunpckhdq %xmm7, %xmm6, %xmm15
+ vpunpcklqdq %xmm9, %xmm8, %xmm0
+ vpunpcklqdq %xmm11, %xmm10, %xmm1
+ vpunpckhqdq %xmm9, %xmm8, %xmm2
+ vpunpckhqdq %xmm11, %xmm10, %xmm3
+ vpunpcklqdq %xmm13, %xmm12, %xmm4
+ vpunpcklqdq %xmm15, %xmm14, %xmm5
+ vpunpckhqdq %xmm13, %xmm12, %xmm6
+ vpunpckhqdq %xmm15, %xmm14, %xmm7
+ vmovdqu 32(%rsi), %xmm8
+ vmovdqu 48(%rsi), %xmm9
+ vmovdqu 96(%rsi), %xmm10
+ vmovdqu 112(%rsi), %xmm11
+ vmovdqu 160(%rsi), %xmm12
+ vmovdqu 176(%rsi), %xmm13
+ vmovdqu 224(%rsi), %xmm14
+ vmovdqu 240(%rsi), %xmm15
+ vpxor %xmm8, %xmm0, %xmm0
+ vpxor %xmm9, %xmm1, %xmm1
+ vpxor %xmm10, %xmm2, %xmm2
+ vpxor %xmm11, %xmm3, %xmm3
+ vpxor %xmm12, %xmm4, %xmm4
+ vpxor %xmm13, %xmm5, %xmm5
+ vpxor %xmm14, %xmm6, %xmm6
+ vpxor %xmm15, %xmm7, %xmm7
+ vmovdqu %xmm0, 32(%rdx)
+ vmovdqu %xmm1, 48(%rdx)
+ vmovdqu %xmm2, 96(%rdx)
+ vmovdqu %xmm3, 112(%rdx)
+ vmovdqu %xmm4, 160(%rdx)
+ vmovdqu %xmm5, 176(%rdx)
+ vmovdqu %xmm6, 224(%rdx)
+ vmovdqu %xmm7, 240(%rdx)
+ vmovdqa 192(%r9), %xmm12
+ addq $0x100, %rsi
+ addq $0x100, %rdx
+ vpaddd L_chacha20_avx1_four(%rip), %xmm12, %xmm12
+ subl $0x100, %ecx
+ vmovdqa %xmm12, 192(%r9)
+ cmpl $0x100, %ecx
+ jl L_chacha20_avx1_done128
+ vmovdqa (%r9), %xmm0
+ vmovdqa 16(%r9), %xmm1
+ vmovdqa 32(%r9), %xmm2
+ vmovdqa 48(%r9), %xmm3
+ vmovdqa 64(%r9), %xmm4
+ vmovdqa 80(%r9), %xmm5
+ vmovdqa 96(%r9), %xmm6
+ vmovdqa 112(%r9), %xmm7
+ vmovdqa 128(%r9), %xmm8
+ vmovdqa 144(%r9), %xmm9
+ vmovdqa 160(%r9), %xmm10
+ vmovdqa 176(%r9), %xmm11
+ vmovdqa 192(%r9), %xmm12
+ vmovdqa 208(%r9), %xmm13
+ vmovdqa 224(%r9), %xmm14
+ vmovdqa 240(%r9), %xmm15
+ jmp L_chacha20_avx1_start128
+L_chacha20_avx1_done128:
+ shl $2, %eax
+ addl %eax, 48(%rdi)
+L_chacha20_avx1_end128:
+ cmpl $0x40, %ecx
+ jl L_chacha20_avx1_block_done
+L_chacha20_avx1_block_start:
+ vmovdqu (%rdi), %xmm0
+ vmovdqu 16(%rdi), %xmm1
+ vmovdqu 32(%rdi), %xmm2
+ vmovdqu 48(%rdi), %xmm3
+ vmovdqa %xmm0, %xmm5
+ vmovdqa %xmm1, %xmm6
+ vmovdqa %xmm2, %xmm7
+ vmovdqa %xmm3, %xmm8
+ movb $10, %al
+L_chacha20_avx1_block_crypt_start:
+ vpaddd %xmm1, %xmm0, %xmm0
+ vpxor %xmm0, %xmm3, %xmm3
+ vpshufb L_chacha20_avx1_rotl16(%rip), %xmm3, %xmm3
+ vpaddd %xmm3, %xmm2, %xmm2
+ vpxor %xmm2, %xmm1, %xmm1
+ vpsrld $20, %xmm1, %xmm4
+ vpslld $12, %xmm1, %xmm1
+ vpxor %xmm4, %xmm1, %xmm1
+ vpaddd %xmm1, %xmm0, %xmm0
+ vpxor %xmm0, %xmm3, %xmm3
+ vpshufb L_chacha20_avx1_rotl8(%rip), %xmm3, %xmm3
+ vpaddd %xmm3, %xmm2, %xmm2
+ vpxor %xmm2, %xmm1, %xmm1
+ vpsrld $25, %xmm1, %xmm4
+ vpslld $7, %xmm1, %xmm1
+ vpxor %xmm4, %xmm1, %xmm1
+ vpshufd $57, %xmm1, %xmm1
+ vpshufd $0x4e, %xmm2, %xmm2
+ vpshufd $0x93, %xmm3, %xmm3
+ vpaddd %xmm1, %xmm0, %xmm0
+ vpxor %xmm0, %xmm3, %xmm3
+ vpshufb L_chacha20_avx1_rotl16(%rip), %xmm3, %xmm3
+ vpaddd %xmm3, %xmm2, %xmm2
+ vpxor %xmm2, %xmm1, %xmm1
+ vpsrld $20, %xmm1, %xmm4
+ vpslld $12, %xmm1, %xmm1
+ vpxor %xmm4, %xmm1, %xmm1
+ vpaddd %xmm1, %xmm0, %xmm0
+ vpxor %xmm0, %xmm3, %xmm3
+ vpshufb L_chacha20_avx1_rotl8(%rip), %xmm3, %xmm3
+ vpaddd %xmm3, %xmm2, %xmm2
+ vpxor %xmm2, %xmm1, %xmm1
+ vpsrld $25, %xmm1, %xmm4
+ vpslld $7, %xmm1, %xmm1
+ vpxor %xmm4, %xmm1, %xmm1
+ vpshufd $0x93, %xmm1, %xmm1
+ vpshufd $0x4e, %xmm2, %xmm2
+ vpshufd $57, %xmm3, %xmm3
+ decb %al
+ jnz L_chacha20_avx1_block_crypt_start
+ vpaddd %xmm5, %xmm0, %xmm0
+ vpaddd %xmm6, %xmm1, %xmm1
+ vpaddd %xmm7, %xmm2, %xmm2
+ vpaddd %xmm8, %xmm3, %xmm3
+ vmovdqu (%rsi), %xmm5
+ vmovdqu 16(%rsi), %xmm6
+ vmovdqu 32(%rsi), %xmm7
+ vmovdqu 48(%rsi), %xmm8
+ vpxor %xmm5, %xmm0, %xmm0
+ vpxor %xmm6, %xmm1, %xmm1
+ vpxor %xmm7, %xmm2, %xmm2
+ vpxor %xmm8, %xmm3, %xmm3
+ vmovdqu %xmm0, (%rdx)
+ vmovdqu %xmm1, 16(%rdx)
+ vmovdqu %xmm2, 32(%rdx)
+ vmovdqu %xmm3, 48(%rdx)
+ addl $0x01, 48(%rdi)
+ subl $0x40, %ecx
+ addq $0x40, %rsi
+ addq $0x40, %rdx
+ cmpl $0x40, %ecx
+ jge L_chacha20_avx1_block_start
+L_chacha20_avx1_block_done:
+ cmpl $0x00, %ecx
+ je L_chacha20_avx1_partial_done
+ vmovdqu (%rdi), %xmm0
+ vmovdqu 16(%rdi), %xmm1
+ vmovdqu 32(%rdi), %xmm2
+ vmovdqu 48(%rdi), %xmm3
+ vmovdqa %xmm0, %xmm5
+ vmovdqa %xmm1, %xmm6
+ vmovdqa %xmm2, %xmm7
+ vmovdqa %xmm3, %xmm8
+ movb $10, %al
+L_chacha20_avx1_partial_crypt_start:
+ vpaddd %xmm1, %xmm0, %xmm0
+ vpxor %xmm0, %xmm3, %xmm3
+ vpshufb L_chacha20_avx1_rotl16(%rip), %xmm3, %xmm3
+ vpaddd %xmm3, %xmm2, %xmm2
+ vpxor %xmm2, %xmm1, %xmm1
+ vpsrld $20, %xmm1, %xmm4
+ vpslld $12, %xmm1, %xmm1
+ vpxor %xmm4, %xmm1, %xmm1
+ vpaddd %xmm1, %xmm0, %xmm0
+ vpxor %xmm0, %xmm3, %xmm3
+ vpshufb L_chacha20_avx1_rotl8(%rip), %xmm3, %xmm3
+ vpaddd %xmm3, %xmm2, %xmm2
+ vpxor %xmm2, %xmm1, %xmm1
+ vpsrld $25, %xmm1, %xmm4
+ vpslld $7, %xmm1, %xmm1
+ vpxor %xmm4, %xmm1, %xmm1
+ vpshufd $57, %xmm1, %xmm1
+ vpshufd $0x4e, %xmm2, %xmm2
+ vpshufd $0x93, %xmm3, %xmm3
+ vpaddd %xmm1, %xmm0, %xmm0
+ vpxor %xmm0, %xmm3, %xmm3
+ vpshufb L_chacha20_avx1_rotl16(%rip), %xmm3, %xmm3
+ vpaddd %xmm3, %xmm2, %xmm2
+ vpxor %xmm2, %xmm1, %xmm1
+ vpsrld $20, %xmm1, %xmm4
+ vpslld $12, %xmm1, %xmm1
+ vpxor %xmm4, %xmm1, %xmm1
+ vpaddd %xmm1, %xmm0, %xmm0
+ vpxor %xmm0, %xmm3, %xmm3
+ vpshufb L_chacha20_avx1_rotl8(%rip), %xmm3, %xmm3
+ vpaddd %xmm3, %xmm2, %xmm2
+ vpxor %xmm2, %xmm1, %xmm1
+ vpsrld $25, %xmm1, %xmm4
+ vpslld $7, %xmm1, %xmm1
+ vpxor %xmm4, %xmm1, %xmm1
+ vpshufd $0x93, %xmm1, %xmm1
+ vpshufd $0x4e, %xmm2, %xmm2
+ vpshufd $57, %xmm3, %xmm3
+ decb %al
+ jnz L_chacha20_avx1_partial_crypt_start
+ vpaddd %xmm5, %xmm0, %xmm0
+ vpaddd %xmm6, %xmm1, %xmm1
+ vpaddd %xmm7, %xmm2, %xmm2
+ vpaddd %xmm8, %xmm3, %xmm3
+ vmovdqu %xmm0, (%r10)
+ vmovdqu %xmm1, 16(%r10)
+ vmovdqu %xmm2, 32(%r10)
+ vmovdqu %xmm3, 48(%r10)
+ addl $0x01, 48(%rdi)
+ movl %ecx, %r8d
+ xorq %r11, %r11
+ andl $7, %r8d
+ jz L_chacha20_avx1_partial_start64
+L_chacha20_avx1_partial_start8:
+ movzbl (%r10,%r11,1), %eax
+ xorb (%rsi,%r11,1), %al
+ movb %al, (%rdx,%r11,1)
+ incl %r11d
+ cmpl %r8d, %r11d
+ jne L_chacha20_avx1_partial_start8
+ je L_chacha20_avx1_partial_end64
+L_chacha20_avx1_partial_start64:
+ movq (%r10,%r11,1), %rax
+ xorq (%rsi,%r11,1), %rax
+ movq %rax, (%rdx,%r11,1)
+ addl $8, %r11d
+L_chacha20_avx1_partial_end64:
+ cmpl %ecx, %r11d
+ jne L_chacha20_avx1_partial_start64
+L_chacha20_avx1_partial_done:
+ addq $0x190, %rsp
+ repz retq
+#ifndef __APPLE__
+.size chacha_encrypt_avx1,.-chacha_encrypt_avx1
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX1 */
+#ifdef HAVE_INTEL_AVX2
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 32
+#else
+.p2align 5
+#endif /* __APPLE__ */
+L_chacha20_avx2_rotl8:
+.quad 0x605040702010003, 0xe0d0c0f0a09080b
+.quad 0x605040702010003, 0xe0d0c0f0a09080b
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 32
+#else
+.p2align 5
+#endif /* __APPLE__ */
+L_chacha20_avx2_rotl16:
+.quad 0x504070601000302, 0xd0c0f0e09080b0a
+.quad 0x504070601000302, 0xd0c0f0e09080b0a
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 32
+#else
+.p2align 5
+#endif /* __APPLE__ */
+L_chacha20_avx2_add:
+.quad 0x100000000, 0x300000002
+.quad 0x500000004, 0x700000006
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 32
+#else
+.p2align 5
+#endif /* __APPLE__ */
+L_chacha20_avx2_eight:
+.quad 0x800000008, 0x800000008
+.quad 0x800000008, 0x800000008
+#ifndef __APPLE__
+.text
+.globl chacha_encrypt_avx2
+.type chacha_encrypt_avx2,@function
+.align 4
+chacha_encrypt_avx2:
+#else
+.section __TEXT,__text
+.globl _chacha_encrypt_avx2
+.p2align 2
+_chacha_encrypt_avx2:
+#endif /* __APPLE__ */
+ subq $0x310, %rsp
+ movq %rsp, %r9
+ leaq 512(%rsp), %r10
+ andq $-32, %r9
+ andq $-32, %r10
+ movl %ecx, %eax
+ shrl $9, %eax
+ jz L_chacha20_avx2_end256
+ vpbroadcastd (%rdi), %ymm0
+ vpbroadcastd 4(%rdi), %ymm1
+ vpbroadcastd 8(%rdi), %ymm2
+ vpbroadcastd 12(%rdi), %ymm3
+ vpbroadcastd 16(%rdi), %ymm4
+ vpbroadcastd 20(%rdi), %ymm5
+ vpbroadcastd 24(%rdi), %ymm6
+ vpbroadcastd 28(%rdi), %ymm7
+ vpbroadcastd 32(%rdi), %ymm8
+ vpbroadcastd 36(%rdi), %ymm9
+ vpbroadcastd 40(%rdi), %ymm10
+ vpbroadcastd 44(%rdi), %ymm11
+ vpbroadcastd 48(%rdi), %ymm12
+ vpbroadcastd 52(%rdi), %ymm13
+ vpbroadcastd 56(%rdi), %ymm14
+ vpbroadcastd 60(%rdi), %ymm15
+ vpaddd L_chacha20_avx2_add(%rip), %ymm12, %ymm12
+ vmovdqa %ymm0, (%r9)
+ vmovdqa %ymm1, 32(%r9)
+ vmovdqa %ymm2, 64(%r9)
+ vmovdqa %ymm3, 96(%r9)
+ vmovdqa %ymm4, 128(%r9)
+ vmovdqa %ymm5, 160(%r9)
+ vmovdqa %ymm6, 192(%r9)
+ vmovdqa %ymm7, 224(%r9)
+ vmovdqa %ymm8, 256(%r9)
+ vmovdqa %ymm9, 288(%r9)
+ vmovdqa %ymm10, 320(%r9)
+ vmovdqa %ymm11, 352(%r9)
+ vmovdqa %ymm12, 384(%r9)
+ vmovdqa %ymm13, 416(%r9)
+ vmovdqa %ymm14, 448(%r9)
+ vmovdqa %ymm15, 480(%r9)
+L_chacha20_avx2_start256:
+ movb $10, %r8b
+ vmovdqa %ymm11, 96(%r10)
+L_chacha20_avx2_loop256:
+ vpaddd %ymm4, %ymm0, %ymm0
+ vpxor %ymm0, %ymm12, %ymm12
+ vmovdqa 96(%r10), %ymm11
+ vpshufb L_chacha20_avx2_rotl16(%rip), %ymm12, %ymm12
+ vpaddd %ymm12, %ymm8, %ymm8
+ vpxor %ymm8, %ymm4, %ymm4
+ vpaddd %ymm5, %ymm1, %ymm1
+ vpxor %ymm1, %ymm13, %ymm13
+ vpshufb L_chacha20_avx2_rotl16(%rip), %ymm13, %ymm13
+ vpaddd %ymm13, %ymm9, %ymm9
+ vpxor %ymm9, %ymm5, %ymm5
+ vpaddd %ymm6, %ymm2, %ymm2
+ vpxor %ymm2, %ymm14, %ymm14
+ vpshufb L_chacha20_avx2_rotl16(%rip), %ymm14, %ymm14
+ vpaddd %ymm14, %ymm10, %ymm10
+ vpxor %ymm10, %ymm6, %ymm6
+ vpaddd %ymm7, %ymm3, %ymm3
+ vpxor %ymm3, %ymm15, %ymm15
+ vpshufb L_chacha20_avx2_rotl16(%rip), %ymm15, %ymm15
+ vpaddd %ymm15, %ymm11, %ymm11
+ vpxor %ymm11, %ymm7, %ymm7
+ vmovdqa %ymm11, 96(%r10)
+ vpsrld $20, %ymm4, %ymm11
+ vpslld $12, %ymm4, %ymm4
+ vpxor %ymm11, %ymm4, %ymm4
+ vpsrld $20, %ymm5, %ymm11
+ vpslld $12, %ymm5, %ymm5
+ vpxor %ymm11, %ymm5, %ymm5
+ vpsrld $20, %ymm6, %ymm11
+ vpslld $12, %ymm6, %ymm6
+ vpxor %ymm11, %ymm6, %ymm6
+ vpsrld $20, %ymm7, %ymm11
+ vpslld $12, %ymm7, %ymm7
+ vpxor %ymm11, %ymm7, %ymm7
+ vpaddd %ymm4, %ymm0, %ymm0
+ vpxor %ymm0, %ymm12, %ymm12
+ vmovdqa 96(%r10), %ymm11
+ vpshufb L_chacha20_avx2_rotl8(%rip), %ymm12, %ymm12
+ vpaddd %ymm12, %ymm8, %ymm8
+ vpxor %ymm8, %ymm4, %ymm4
+ vpaddd %ymm5, %ymm1, %ymm1
+ vpxor %ymm1, %ymm13, %ymm13
+ vpshufb L_chacha20_avx2_rotl8(%rip), %ymm13, %ymm13
+ vpaddd %ymm13, %ymm9, %ymm9
+ vpxor %ymm9, %ymm5, %ymm5
+ vpaddd %ymm6, %ymm2, %ymm2
+ vpxor %ymm2, %ymm14, %ymm14
+ vpshufb L_chacha20_avx2_rotl8(%rip), %ymm14, %ymm14
+ vpaddd %ymm14, %ymm10, %ymm10
+ vpxor %ymm10, %ymm6, %ymm6
+ vpaddd %ymm7, %ymm3, %ymm3
+ vpxor %ymm3, %ymm15, %ymm15
+ vpshufb L_chacha20_avx2_rotl8(%rip), %ymm15, %ymm15
+ vpaddd %ymm15, %ymm11, %ymm11
+ vpxor %ymm11, %ymm7, %ymm7
+ vmovdqa %ymm11, 96(%r10)
+ vpsrld $25, %ymm4, %ymm11
+ vpslld $7, %ymm4, %ymm4
+ vpxor %ymm11, %ymm4, %ymm4
+ vpsrld $25, %ymm5, %ymm11
+ vpslld $7, %ymm5, %ymm5
+ vpxor %ymm11, %ymm5, %ymm5
+ vpsrld $25, %ymm6, %ymm11
+ vpslld $7, %ymm6, %ymm6
+ vpxor %ymm11, %ymm6, %ymm6
+ vpsrld $25, %ymm7, %ymm11
+ vpslld $7, %ymm7, %ymm7
+ vpxor %ymm11, %ymm7, %ymm7
+ vpaddd %ymm5, %ymm0, %ymm0
+ vpxor %ymm0, %ymm15, %ymm15
+ vmovdqa 96(%r10), %ymm11
+ vpshufb L_chacha20_avx2_rotl16(%rip), %ymm15, %ymm15
+ vpaddd %ymm15, %ymm10, %ymm10
+ vpxor %ymm10, %ymm5, %ymm5
+ vpaddd %ymm6, %ymm1, %ymm1
+ vpxor %ymm1, %ymm12, %ymm12
+ vpshufb L_chacha20_avx2_rotl16(%rip), %ymm12, %ymm12
+ vpaddd %ymm12, %ymm11, %ymm11
+ vpxor %ymm11, %ymm6, %ymm6
+ vpaddd %ymm7, %ymm2, %ymm2
+ vpxor %ymm2, %ymm13, %ymm13
+ vpshufb L_chacha20_avx2_rotl16(%rip), %ymm13, %ymm13
+ vpaddd %ymm13, %ymm8, %ymm8
+ vpxor %ymm8, %ymm7, %ymm7
+ vpaddd %ymm4, %ymm3, %ymm3
+ vpxor %ymm3, %ymm14, %ymm14
+ vpshufb L_chacha20_avx2_rotl16(%rip), %ymm14, %ymm14
+ vpaddd %ymm14, %ymm9, %ymm9
+ vpxor %ymm9, %ymm4, %ymm4
+ vmovdqa %ymm11, 96(%r10)
+ vpsrld $20, %ymm5, %ymm11
+ vpslld $12, %ymm5, %ymm5
+ vpxor %ymm11, %ymm5, %ymm5
+ vpsrld $20, %ymm6, %ymm11
+ vpslld $12, %ymm6, %ymm6
+ vpxor %ymm11, %ymm6, %ymm6
+ vpsrld $20, %ymm7, %ymm11
+ vpslld $12, %ymm7, %ymm7
+ vpxor %ymm11, %ymm7, %ymm7
+ vpsrld $20, %ymm4, %ymm11
+ vpslld $12, %ymm4, %ymm4
+ vpxor %ymm11, %ymm4, %ymm4
+ vpaddd %ymm5, %ymm0, %ymm0
+ vpxor %ymm0, %ymm15, %ymm15
+ vmovdqa 96(%r10), %ymm11
+ vpshufb L_chacha20_avx2_rotl8(%rip), %ymm15, %ymm15
+ vpaddd %ymm15, %ymm10, %ymm10
+ vpxor %ymm10, %ymm5, %ymm5
+ vpaddd %ymm6, %ymm1, %ymm1
+ vpxor %ymm1, %ymm12, %ymm12
+ vpshufb L_chacha20_avx2_rotl8(%rip), %ymm12, %ymm12
+ vpaddd %ymm12, %ymm11, %ymm11
+ vpxor %ymm11, %ymm6, %ymm6
+ vpaddd %ymm7, %ymm2, %ymm2
+ vpxor %ymm2, %ymm13, %ymm13
+ vpshufb L_chacha20_avx2_rotl8(%rip), %ymm13, %ymm13
+ vpaddd %ymm13, %ymm8, %ymm8
+ vpxor %ymm8, %ymm7, %ymm7
+ vpaddd %ymm4, %ymm3, %ymm3
+ vpxor %ymm3, %ymm14, %ymm14
+ vpshufb L_chacha20_avx2_rotl8(%rip), %ymm14, %ymm14
+ vpaddd %ymm14, %ymm9, %ymm9
+ vpxor %ymm9, %ymm4, %ymm4
+ vmovdqa %ymm11, 96(%r10)
+ vpsrld $25, %ymm5, %ymm11
+ vpslld $7, %ymm5, %ymm5
+ vpxor %ymm11, %ymm5, %ymm5
+ vpsrld $25, %ymm6, %ymm11
+ vpslld $7, %ymm6, %ymm6
+ vpxor %ymm11, %ymm6, %ymm6
+ vpsrld $25, %ymm7, %ymm11
+ vpslld $7, %ymm7, %ymm7
+ vpxor %ymm11, %ymm7, %ymm7
+ vpsrld $25, %ymm4, %ymm11
+ vpslld $7, %ymm4, %ymm4
+ vpxor %ymm11, %ymm4, %ymm4
+ decb %r8b
+ jnz L_chacha20_avx2_loop256
+ vmovdqa 96(%r10), %ymm11
+ vpaddd (%r9), %ymm0, %ymm0
+ vpaddd 32(%r9), %ymm1, %ymm1
+ vpaddd 64(%r9), %ymm2, %ymm2
+ vpaddd 96(%r9), %ymm3, %ymm3
+ vpaddd 128(%r9), %ymm4, %ymm4
+ vpaddd 160(%r9), %ymm5, %ymm5
+ vpaddd 192(%r9), %ymm6, %ymm6
+ vpaddd 224(%r9), %ymm7, %ymm7
+ vpaddd 256(%r9), %ymm8, %ymm8
+ vpaddd 288(%r9), %ymm9, %ymm9
+ vpaddd 320(%r9), %ymm10, %ymm10
+ vpaddd 352(%r9), %ymm11, %ymm11
+ vpaddd 384(%r9), %ymm12, %ymm12
+ vpaddd 416(%r9), %ymm13, %ymm13
+ vpaddd 448(%r9), %ymm14, %ymm14
+ vpaddd 480(%r9), %ymm15, %ymm15
+ vmovdqa %ymm8, (%r10)
+ vmovdqa %ymm9, 32(%r10)
+ vmovdqa %ymm10, 64(%r10)
+ vmovdqa %ymm11, 96(%r10)
+ vmovdqa %ymm12, 128(%r10)
+ vmovdqa %ymm13, 160(%r10)
+ vmovdqa %ymm14, 192(%r10)
+ vmovdqa %ymm15, 224(%r10)
+ vpunpckldq %ymm1, %ymm0, %ymm8
+ vpunpckldq %ymm3, %ymm2, %ymm9
+ vpunpckhdq %ymm1, %ymm0, %ymm12
+ vpunpckhdq %ymm3, %ymm2, %ymm13
+ vpunpckldq %ymm5, %ymm4, %ymm10
+ vpunpckldq %ymm7, %ymm6, %ymm11
+ vpunpckhdq %ymm5, %ymm4, %ymm14
+ vpunpckhdq %ymm7, %ymm6, %ymm15
+ vpunpcklqdq %ymm9, %ymm8, %ymm0
+ vpunpcklqdq %ymm11, %ymm10, %ymm1
+ vpunpckhqdq %ymm9, %ymm8, %ymm2
+ vpunpckhqdq %ymm11, %ymm10, %ymm3
+ vpunpcklqdq %ymm13, %ymm12, %ymm4
+ vpunpcklqdq %ymm15, %ymm14, %ymm5
+ vpunpckhqdq %ymm13, %ymm12, %ymm6
+ vpunpckhqdq %ymm15, %ymm14, %ymm7
+ vperm2i128 $32, %ymm1, %ymm0, %ymm8
+ vperm2i128 $32, %ymm3, %ymm2, %ymm9
+ vperm2i128 $49, %ymm1, %ymm0, %ymm12
+ vperm2i128 $49, %ymm3, %ymm2, %ymm13
+ vperm2i128 $32, %ymm5, %ymm4, %ymm10
+ vperm2i128 $32, %ymm7, %ymm6, %ymm11
+ vperm2i128 $49, %ymm5, %ymm4, %ymm14
+ vperm2i128 $49, %ymm7, %ymm6, %ymm15
+ vmovdqu (%rsi), %ymm0
+ vmovdqu 64(%rsi), %ymm1
+ vmovdqu 128(%rsi), %ymm2
+ vmovdqu 192(%rsi), %ymm3
+ vmovdqu 256(%rsi), %ymm4
+ vmovdqu 320(%rsi), %ymm5
+ vmovdqu 384(%rsi), %ymm6
+ vmovdqu 448(%rsi), %ymm7
+ vpxor %ymm0, %ymm8, %ymm8
+ vpxor %ymm1, %ymm9, %ymm9
+ vpxor %ymm2, %ymm10, %ymm10
+ vpxor %ymm3, %ymm11, %ymm11
+ vpxor %ymm4, %ymm12, %ymm12
+ vpxor %ymm5, %ymm13, %ymm13
+ vpxor %ymm6, %ymm14, %ymm14
+ vpxor %ymm7, %ymm15, %ymm15
+ vmovdqu %ymm8, (%rdx)
+ vmovdqu %ymm9, 64(%rdx)
+ vmovdqu %ymm10, 128(%rdx)
+ vmovdqu %ymm11, 192(%rdx)
+ vmovdqu %ymm12, 256(%rdx)
+ vmovdqu %ymm13, 320(%rdx)
+ vmovdqu %ymm14, 384(%rdx)
+ vmovdqu %ymm15, 448(%rdx)
+ vmovdqa (%r10), %ymm0
+ vmovdqa 32(%r10), %ymm1
+ vmovdqa 64(%r10), %ymm2
+ vmovdqa 96(%r10), %ymm3
+ vmovdqa 128(%r10), %ymm4
+ vmovdqa 160(%r10), %ymm5
+ vmovdqa 192(%r10), %ymm6
+ vmovdqa 224(%r10), %ymm7
+ vpunpckldq %ymm1, %ymm0, %ymm8
+ vpunpckldq %ymm3, %ymm2, %ymm9
+ vpunpckhdq %ymm1, %ymm0, %ymm12
+ vpunpckhdq %ymm3, %ymm2, %ymm13
+ vpunpckldq %ymm5, %ymm4, %ymm10
+ vpunpckldq %ymm7, %ymm6, %ymm11
+ vpunpckhdq %ymm5, %ymm4, %ymm14
+ vpunpckhdq %ymm7, %ymm6, %ymm15
+ vpunpcklqdq %ymm9, %ymm8, %ymm0
+ vpunpcklqdq %ymm11, %ymm10, %ymm1
+ vpunpckhqdq %ymm9, %ymm8, %ymm2
+ vpunpckhqdq %ymm11, %ymm10, %ymm3
+ vpunpcklqdq %ymm13, %ymm12, %ymm4
+ vpunpcklqdq %ymm15, %ymm14, %ymm5
+ vpunpckhqdq %ymm13, %ymm12, %ymm6
+ vpunpckhqdq %ymm15, %ymm14, %ymm7
+ vperm2i128 $32, %ymm1, %ymm0, %ymm8
+ vperm2i128 $32, %ymm3, %ymm2, %ymm9
+ vperm2i128 $49, %ymm1, %ymm0, %ymm12
+ vperm2i128 $49, %ymm3, %ymm2, %ymm13
+ vperm2i128 $32, %ymm5, %ymm4, %ymm10
+ vperm2i128 $32, %ymm7, %ymm6, %ymm11
+ vperm2i128 $49, %ymm5, %ymm4, %ymm14
+ vperm2i128 $49, %ymm7, %ymm6, %ymm15
+ vmovdqu 32(%rsi), %ymm0
+ vmovdqu 96(%rsi), %ymm1
+ vmovdqu 160(%rsi), %ymm2
+ vmovdqu 224(%rsi), %ymm3
+ vmovdqu 288(%rsi), %ymm4
+ vmovdqu 352(%rsi), %ymm5
+ vmovdqu 416(%rsi), %ymm6
+ vmovdqu 480(%rsi), %ymm7
+ vpxor %ymm0, %ymm8, %ymm8
+ vpxor %ymm1, %ymm9, %ymm9
+ vpxor %ymm2, %ymm10, %ymm10
+ vpxor %ymm3, %ymm11, %ymm11
+ vpxor %ymm4, %ymm12, %ymm12
+ vpxor %ymm5, %ymm13, %ymm13
+ vpxor %ymm6, %ymm14, %ymm14
+ vpxor %ymm7, %ymm15, %ymm15
+ vmovdqu %ymm8, 32(%rdx)
+ vmovdqu %ymm9, 96(%rdx)
+ vmovdqu %ymm10, 160(%rdx)
+ vmovdqu %ymm11, 224(%rdx)
+ vmovdqu %ymm12, 288(%rdx)
+ vmovdqu %ymm13, 352(%rdx)
+ vmovdqu %ymm14, 416(%rdx)
+ vmovdqu %ymm15, 480(%rdx)
+ vmovdqa 384(%r9), %ymm12
+ addq $0x200, %rsi
+ addq $0x200, %rdx
+ vpaddd L_chacha20_avx2_eight(%rip), %ymm12, %ymm12
+ subl $0x200, %ecx
+ vmovdqa %ymm12, 384(%r9)
+ cmpl $0x200, %ecx
+ jl L_chacha20_avx2_done256
+ vmovdqa (%r9), %ymm0
+ vmovdqa 32(%r9), %ymm1
+ vmovdqa 64(%r9), %ymm2
+ vmovdqa 96(%r9), %ymm3
+ vmovdqa 128(%r9), %ymm4
+ vmovdqa 160(%r9), %ymm5
+ vmovdqa 192(%r9), %ymm6
+ vmovdqa 224(%r9), %ymm7
+ vmovdqa 256(%r9), %ymm8
+ vmovdqa 288(%r9), %ymm9
+ vmovdqa 320(%r9), %ymm10
+ vmovdqa 352(%r9), %ymm11
+ vmovdqa 384(%r9), %ymm12
+ vmovdqa 416(%r9), %ymm13
+ vmovdqa 448(%r9), %ymm14
+ vmovdqa 480(%r9), %ymm15
+ jmp L_chacha20_avx2_start256
+L_chacha20_avx2_done256:
+ shl $3, %eax
+ addl %eax, 48(%rdi)
+L_chacha20_avx2_end256:
+#ifndef __APPLE__
+ callq chacha_encrypt_avx1@plt
+#else
+ callq _chacha_encrypt_avx1
+#endif /* __APPLE__ */
+ addq $0x310, %rsp
+ repz retq
+#ifndef __APPLE__
+.size chacha_encrypt_avx2,.-chacha_encrypt_avx2
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/cmac.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/cmac.c
new file mode 100644
index 000000000..9d30bb5f3
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/cmac.c
@@ -0,0 +1,215 @@
+/* cmac.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#if defined(WOLFSSL_CMAC) && !defined(NO_AES) && defined(WOLFSSL_AES_DIRECT)
+
+#if defined(HAVE_FIPS) && \
+ defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
+
+ /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
+ #define FIPS_NO_WRAPPERS
+
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$n")
+ #pragma const_seg(".fipsB$n")
+ #endif
+#endif
+
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/aes.h>
+#include <wolfssl/wolfcrypt/cmac.h>
+
+
+static void ShiftAndXorRb(byte* out, byte* in)
+{
+ int i, j, xorRb;
+ int mask = 0, last = 0;
+ byte Rb = 0x87;
+
+ xorRb = (in[0] & 0x80) != 0;
+
+ for (i = 1, j = AES_BLOCK_SIZE - 1; i <= AES_BLOCK_SIZE; i++, j--) {
+ last = (in[j] & 0x80) ? 1 : 0;
+ out[j] = (byte)((in[j] << 1) | mask);
+ mask = last;
+ if (xorRb) {
+ out[j] ^= Rb;
+ Rb = 0;
+ }
+ }
+}
+
+
+int wc_InitCmac(Cmac* cmac, const byte* key, word32 keySz,
+ int type, void* unused)
+{
+ int ret;
+
+ (void)unused;
+
+ if (cmac == NULL || key == NULL || keySz == 0 || type != WC_CMAC_AES)
+ return BAD_FUNC_ARG;
+
+ XMEMSET(cmac, 0, sizeof(Cmac));
+ ret = wc_AesSetKey(&cmac->aes, key, keySz, NULL, AES_ENCRYPTION);
+ if (ret == 0) {
+ byte l[AES_BLOCK_SIZE];
+
+ XMEMSET(l, 0, AES_BLOCK_SIZE);
+ wc_AesEncryptDirect(&cmac->aes, l, l);
+ ShiftAndXorRb(cmac->k1, l);
+ ShiftAndXorRb(cmac->k2, cmac->k1);
+ ForceZero(l, AES_BLOCK_SIZE);
+ }
+ return ret;
+}
+
+
+int wc_CmacUpdate(Cmac* cmac, const byte* in, word32 inSz)
+{
+ if ((cmac == NULL) || (in == NULL && inSz != 0))
+ return BAD_FUNC_ARG;
+
+ while (inSz != 0) {
+ word32 add = min(inSz, AES_BLOCK_SIZE - cmac->bufferSz);
+ XMEMCPY(&cmac->buffer[cmac->bufferSz], in, add);
+
+ cmac->bufferSz += add;
+ in += add;
+ inSz -= add;
+
+ if (cmac->bufferSz == AES_BLOCK_SIZE && inSz != 0) {
+ if (cmac->totalSz != 0)
+ xorbuf(cmac->buffer, cmac->digest, AES_BLOCK_SIZE);
+ wc_AesEncryptDirect(&cmac->aes,
+ cmac->digest,
+ cmac->buffer);
+ cmac->totalSz += AES_BLOCK_SIZE;
+ cmac->bufferSz = 0;
+ }
+ }
+
+ return 0;
+}
+
+
+int wc_CmacFinal(Cmac* cmac, byte* out, word32* outSz)
+{
+ const byte* subKey;
+
+ if (cmac == NULL || out == NULL || outSz == NULL)
+ return BAD_FUNC_ARG;
+
+ if (*outSz < WC_CMAC_TAG_MIN_SZ || *outSz > WC_CMAC_TAG_MAX_SZ)
+ return BUFFER_E;
+
+ if (cmac->bufferSz == AES_BLOCK_SIZE) {
+ subKey = cmac->k1;
+ }
+ else {
+ word32 remainder = AES_BLOCK_SIZE - cmac->bufferSz;
+
+ if (remainder == 0)
+ remainder = AES_BLOCK_SIZE;
+
+ if (remainder > 1)
+ XMEMSET(cmac->buffer + AES_BLOCK_SIZE - remainder, 0, remainder);
+ cmac->buffer[AES_BLOCK_SIZE - remainder] = 0x80;
+ subKey = cmac->k2;
+ }
+ xorbuf(cmac->buffer, cmac->digest, AES_BLOCK_SIZE);
+ xorbuf(cmac->buffer, subKey, AES_BLOCK_SIZE);
+ wc_AesEncryptDirect(&cmac->aes, cmac->digest, cmac->buffer);
+
+ XMEMCPY(out, cmac->digest, *outSz);
+
+ ForceZero(cmac, sizeof(Cmac));
+
+ return 0;
+}
+
+
+int wc_AesCmacGenerate(byte* out, word32* outSz,
+ const byte* in, word32 inSz,
+ const byte* key, word32 keySz)
+{
+ Cmac cmac;
+ int ret;
+
+ if (out == NULL || (in == NULL && inSz > 0) || key == NULL || keySz == 0)
+ return BAD_FUNC_ARG;
+
+ ret = wc_InitCmac(&cmac, key, keySz, WC_CMAC_AES, NULL);
+ if (ret != 0)
+ return ret;
+
+ ret = wc_CmacUpdate(&cmac, in, inSz);
+ if (ret != 0)
+ return ret;
+
+ ret = wc_CmacFinal(&cmac, out, outSz);
+ if (ret != 0)
+ return ret;
+
+ return 0;
+}
+
+
+int wc_AesCmacVerify(const byte* check, word32 checkSz,
+ const byte* in, word32 inSz,
+ const byte* key, word32 keySz)
+{
+ byte a[AES_BLOCK_SIZE];
+ word32 aSz = sizeof(a);
+ int result;
+ int compareRet;
+
+ if (check == NULL || checkSz == 0 || (in == NULL && inSz != 0) ||
+ key == NULL || keySz == 0)
+
+ return BAD_FUNC_ARG;
+
+ XMEMSET(a, 0, aSz);
+ result = wc_AesCmacGenerate(a, &aSz, in, inSz, key, keySz);
+ compareRet = ConstantCompare(check, a, min(checkSz, aSz));
+
+ if (result == 0)
+ result = compareRet ? 1 : 0;
+
+ return result;
+}
+
+
+#endif /* WOLFSSL_CMAC && NO_AES && WOLFSSL_AES_DIRECT */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/coding.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/coding.c
index 6ead79caf..f6c814e01 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/coding.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/coding.c
@@ -1,8 +1,8 @@
/* coding.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -35,10 +36,14 @@
enum {
BAD = 0xFF, /* invalid encoding */
PAD = '=',
- PEM_LINE_SZ = 64
+ PEM_LINE_SZ = 64,
+ BASE64_MIN = 0x2B,
+ BASE16_MIN = 0x30,
};
+#ifdef WOLFSSL_BASE64_DECODE
+
static
const byte base64Decode[] = { 62, BAD, BAD, BAD, 63, /* + starts at 0x2B */
52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
@@ -52,27 +57,81 @@ const byte base64Decode[] = { 62, BAD, BAD, BAD, 63, /* + starts at 0x2B */
46, 47, 48, 49, 50, 51
};
+static WC_INLINE int Base64_SkipNewline(const byte* in, word32 *inLen, word32 *outJ)
+{
+ word32 len = *inLen;
+ word32 j = *outJ;
+ if (len && (in[j] == ' ' || in[j] == '\r' || in[j] == '\n')) {
+ byte endLine = in[j++];
+ len--;
+ while (len && endLine == ' ') { /* allow trailing whitespace */
+ endLine = in[j++];
+ len--;
+ }
+ if (endLine == '\r') {
+ if (len) {
+ endLine = in[j++];
+ len--;
+ }
+ }
+ if (endLine != '\n') {
+ WOLFSSL_MSG("Bad end of line in Base64 Decode");
+ return ASN_INPUT_E;
+ }
+ }
+ if (!len) {
+ return BUFFER_E;
+ }
+ *inLen = len;
+ *outJ = j;
+ return 0;
+}
int Base64_Decode(const byte* in, word32 inLen, byte* out, word32* outLen)
{
word32 i = 0;
word32 j = 0;
word32 plainSz = inLen - ((inLen + (PEM_LINE_SZ - 1)) / PEM_LINE_SZ );
- const byte maxIdx = (byte)sizeof(base64Decode) + 0x2B - 1;
+ int ret;
+ const byte maxIdx = (byte)sizeof(base64Decode) + BASE64_MIN - 1;
plainSz = (plainSz * 3 + 3) / 4;
if (plainSz > *outLen) return BAD_FUNC_ARG;
while (inLen > 3) {
- byte b1, b2, b3;
- byte e1 = in[j++];
- byte e2 = in[j++];
- byte e3 = in[j++];
- byte e4 = in[j++];
-
int pad3 = 0;
int pad4 = 0;
+ byte b1, b2, b3;
+ byte e1, e2, e3, e4;
+ if ((ret = Base64_SkipNewline(in, &inLen, &j)) != 0) {
+ if (ret == BUFFER_E) {
+ /* Running out of buffer here is not an error */
+ break;
+ }
+ return ret;
+ }
+ e1 = in[j++];
+ if (e1 == '\0') {
+ break;
+ }
+ inLen--;
+ if ((ret = Base64_SkipNewline(in, &inLen, &j)) != 0) {
+ return ret;
+ }
+ e2 = in[j++];
+ inLen--;
+ if ((ret = Base64_SkipNewline(in, &inLen, &j)) != 0) {
+ return ret;
+ }
+ e3 = in[j++];
+ inLen--;
+ if ((ret = Base64_SkipNewline(in, &inLen, &j)) != 0) {
+ return ret;
+ }
+ e4 = in[j++];
+ inLen--;
+
if (e1 == 0) /* end file 0's */
break;
if (e3 == PAD)
@@ -80,7 +139,7 @@ int Base64_Decode(const byte* in, word32 inLen, byte* out, word32* outLen)
if (e4 == PAD)
pad4 = 1;
- if (e1 < 0x2B || e2 < 0x2B || e3 < 0x2B || e4 < 0x2B) {
+ if (e1 < BASE64_MIN || e2 < BASE64_MIN || e3 < BASE64_MIN || e4 < BASE64_MIN) {
WOLFSSL_MSG("Bad Base64 Decode data, too small");
return ASN_INPUT_E;
}
@@ -90,10 +149,15 @@ int Base64_Decode(const byte* in, word32 inLen, byte* out, word32* outLen)
return ASN_INPUT_E;
}
- e1 = base64Decode[e1 - 0x2B];
- e2 = base64Decode[e2 - 0x2B];
- e3 = (e3 == PAD) ? 0 : base64Decode[e3 - 0x2B];
- e4 = (e4 == PAD) ? 0 : base64Decode[e4 - 0x2B];
+ if (i + 1 + !pad3 + !pad4 > *outLen) {
+ WOLFSSL_MSG("Bad Base64 Decode out buffer, too small");
+ return BAD_FUNC_ARG;
+ }
+
+ e1 = base64Decode[e1 - BASE64_MIN];
+ e2 = base64Decode[e2 - BASE64_MIN];
+ e3 = (e3 == PAD) ? 0 : base64Decode[e3 - BASE64_MIN];
+ e4 = (e4 == PAD) ? 0 : base64Decode[e4 - BASE64_MIN];
b1 = (byte)((e1 << 2) | (e2 >> 4));
b2 = (byte)(((e2 & 0xF) << 4) | (e3 >> 2));
@@ -106,32 +170,17 @@ int Base64_Decode(const byte* in, word32 inLen, byte* out, word32* outLen)
out[i++] = b3;
else
break;
-
- inLen -= 4;
- if (inLen && (in[j] == ' ' || in[j] == '\r' || in[j] == '\n')) {
- byte endLine = in[j++];
- inLen--;
- while (inLen && endLine == ' ') { /* allow trailing whitespace */
- endLine = in[j++];
- inLen--;
- }
- if (endLine == '\r') {
- if (inLen) {
- endLine = in[j++];
- inLen--;
- }
- }
- if (endLine != '\n') {
- WOLFSSL_MSG("Bad end of line in Base64 Decode");
- return ASN_INPUT_E;
- }
- }
}
+/* If the output buffer has a room for an extra byte, add a null terminator */
+ if (out && *outLen > i)
+ out[i]= '\0';
+
*outLen = i;
return 0;
}
+#endif /* WOLFSSL_BASE64_DECODE */
#if defined(WOLFSSL_BASE64_ENCODE)
@@ -150,7 +199,7 @@ const byte base64Encode[] = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
/* make sure *i (idx) won't exceed max, store and possibly escape to out,
* raw means use e w/o decode, 0 on success */
static int CEscape(int escaped, byte e, byte* out, word32* i, word32 max,
- int raw)
+ int raw, int getSzOnly)
{
int doEscape = 0;
word32 needed = 1;
@@ -166,8 +215,8 @@ static int CEscape(int escaped, byte e, byte* out, word32* i, word32 max,
else
basic = base64Encode[e];
- /* check whether to escape */
- if (escaped) {
+ /* check whether to escape. Only escape for EncodeEsc */
+ if (escaped == WC_ESC_NL_ENC) {
switch ((char)basic) {
case '+' :
plus = 1;
@@ -191,31 +240,37 @@ static int CEscape(int escaped, byte e, byte* out, word32* i, word32 max,
}
/* check size */
- if ( (idx+needed) > max) {
+ if ( (idx+needed) > max && !getSzOnly) {
WOLFSSL_MSG("Escape buffer max too small");
return BUFFER_E;
}
/* store it */
if (doEscape == 0) {
- out[idx++] = basic;
+ if(getSzOnly)
+ idx++;
+ else
+ out[idx++] = basic;
}
else {
- out[idx++] = '%'; /* start escape */
-
- if (plus) {
- out[idx++] = '2';
- out[idx++] = 'B';
- }
- else if (equals) {
- out[idx++] = '3';
- out[idx++] = 'D';
- }
- else if (newline) {
- out[idx++] = '0';
- out[idx++] = 'A';
+ if(getSzOnly)
+ idx+=3;
+ else {
+ out[idx++] = '%'; /* start escape */
+
+ if (plus) {
+ out[idx++] = '2';
+ out[idx++] = 'B';
+ }
+ else if (equals) {
+ out[idx++] = '3';
+ out[idx++] = 'D';
+ }
+ else if (newline) {
+ out[idx++] = '0';
+ out[idx++] = 'A';
+ }
}
-
}
*i = idx;
@@ -223,7 +278,8 @@ static int CEscape(int escaped, byte e, byte* out, word32* i, word32 max,
}
-/* internal worker, handles both escaped and normal line endings */
+/* internal worker, handles both escaped and normal line endings.
+ If out buffer is NULL, will return sz needed in outLen */
static int DoBase64_Encode(const byte* in, word32 inLen, byte* out,
word32* outLen, int escaped)
{
@@ -232,18 +288,23 @@ static int DoBase64_Encode(const byte* in, word32 inLen, byte* out,
j = 0,
n = 0; /* new line counter */
+ int getSzOnly = (out == NULL);
+
word32 outSz = (inLen + 3 - 1) / 3 * 4;
word32 addSz = (outSz + PEM_LINE_SZ - 1) / PEM_LINE_SZ; /* new lines */
- if (escaped)
+ if (escaped == WC_ESC_NL_ENC)
addSz *= 3; /* instead of just \n, we're doing %0A triplet */
+ else if (escaped == WC_NO_NL_ENC)
+ addSz = 0; /* encode without \n */
outSz += addSz;
/* if escaped we can't predetermine size for one pass encoding, but
- * make sure we have enough if no escapes are in input */
- if (outSz > *outLen) return BAD_FUNC_ARG;
-
+ * make sure we have enough if no escapes are in input
+ * Also need to ensure outLen valid before dereference */
+ if (!outLen || (outSz > *outLen && !getSzOnly)) return BAD_FUNC_ARG;
+
while (inLen > 2) {
byte b1 = in[j++];
byte b2 = in[j++];
@@ -256,19 +317,20 @@ static int DoBase64_Encode(const byte* in, word32 inLen, byte* out,
byte e4 = b3 & 0x3F;
/* store */
- ret = CEscape(escaped, e1, out, &i, *outLen, 0);
+ ret = CEscape(escaped, e1, out, &i, *outLen, 0, getSzOnly);
if (ret != 0) break;
- ret = CEscape(escaped, e2, out, &i, *outLen, 0);
+ ret = CEscape(escaped, e2, out, &i, *outLen, 0, getSzOnly);
if (ret != 0) break;
- ret = CEscape(escaped, e3, out, &i, *outLen, 0);
+ ret = CEscape(escaped, e3, out, &i, *outLen, 0, getSzOnly);
if (ret != 0) break;
- ret = CEscape(escaped, e4, out, &i, *outLen, 0);
+ ret = CEscape(escaped, e4, out, &i, *outLen, 0, getSzOnly);
if (ret != 0) break;
inLen -= 3;
- if ((++n % (PEM_LINE_SZ / 4)) == 0 && inLen) {
- ret = CEscape(escaped, '\n', out, &i, *outLen, 1);
+ /* Insert newline after PEM_LINE_SZ, unless no \n requested */
+ if (escaped != WC_NO_NL_ENC && (++n % (PEM_LINE_SZ/4)) == 0 && inLen) {
+ ret = CEscape(escaped, '\n', out, &i, *outLen, 1, getSzOnly);
if (ret != 0) break;
}
}
@@ -284,50 +346,61 @@ static int DoBase64_Encode(const byte* in, word32 inLen, byte* out,
byte e2 = (byte)(((b1 & 0x3) << 4) | (b2 >> 4));
byte e3 = (byte)((b2 & 0xF) << 2);
- ret = CEscape(escaped, e1, out, &i, *outLen, 0);
- if (ret == 0)
- ret = CEscape(escaped, e2, out, &i, *outLen, 0);
+ ret = CEscape(escaped, e1, out, &i, *outLen, 0, getSzOnly);
+ if (ret == 0)
+ ret = CEscape(escaped, e2, out, &i, *outLen, 0, getSzOnly);
if (ret == 0) {
/* third */
if (twoBytes)
- ret = CEscape(escaped, e3, out, &i, *outLen, 0);
- else
- ret = CEscape(escaped, '=', out, &i, *outLen, 1);
+ ret = CEscape(escaped, e3, out, &i, *outLen, 0, getSzOnly);
+ else
+ ret = CEscape(escaped, '=', out, &i, *outLen, 1, getSzOnly);
}
/* fourth always pad */
if (ret == 0)
- ret = CEscape(escaped, '=', out, &i, *outLen, 1);
- }
+ ret = CEscape(escaped, '=', out, &i, *outLen, 1, getSzOnly);
+ }
- if (ret == 0)
- ret = CEscape(escaped, '\n', out, &i, *outLen, 1);
+ if (ret == 0 && escaped != WC_NO_NL_ENC)
+ ret = CEscape(escaped, '\n', out, &i, *outLen, 1, getSzOnly);
- if (i != outSz && escaped == 0 && ret == 0)
- return ASN_INPUT_E;
+ if (i != outSz && escaped != 1 && ret == 0)
+ return ASN_INPUT_E;
+/* If the output buffer has a room for an extra byte, add a null terminator */
+ if (out && *outLen > i)
+ out[i]= '\0';
*outLen = i;
- return ret;
+
+ if (ret == 0)
+ return getSzOnly ? LENGTH_ONLY_E : 0;
+
+ return ret;
}
/* Base64 Encode, PEM style, with \n line endings */
int Base64_Encode(const byte* in, word32 inLen, byte* out, word32* outLen)
{
- return DoBase64_Encode(in, inLen, out, outLen, 0);
+ return DoBase64_Encode(in, inLen, out, outLen, WC_STD_ENC);
}
-/* Base64 Encode, with %0A esacped line endings instead of \n */
+/* Base64 Encode, with %0A escaped line endings instead of \n */
int Base64_EncodeEsc(const byte* in, word32 inLen, byte* out, word32* outLen)
{
- return DoBase64_Encode(in, inLen, out, outLen, 1);
+ return DoBase64_Encode(in, inLen, out, outLen, WC_ESC_NL_ENC);
}
+int Base64_Encode_NoNl(const byte* in, word32 inLen, byte* out, word32* outLen)
+{
+ return DoBase64_Encode(in, inLen, out, outLen, WC_NO_NL_ENC);
+}
-#endif /* defined(WOLFSSL_BASE64_ENCODE) */
+#endif /* WOLFSSL_BASE64_ENCODE */
-#if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER) || defined(HAVE_FIPS)
+#ifdef WOLFSSL_BASE16
static
const byte hexDecode[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
@@ -345,8 +418,11 @@ int Base16_Decode(const byte* in, word32 inLen, byte* out, word32* outLen)
word32 inIdx = 0;
word32 outIdx = 0;
+ if (in == NULL || out == NULL || outLen == NULL)
+ return BAD_FUNC_ARG;
+
if (inLen == 1 && *outLen && in) {
- byte b = in[inIdx++] - 0x30; /* 0 starts at 0x30 */
+ byte b = in[inIdx++] - BASE16_MIN; /* 0 starts at 0x30 */
/* sanity check */
if (b >= sizeof(hexDecode)/sizeof(hexDecode[0]))
@@ -356,7 +432,7 @@ int Base16_Decode(const byte* in, word32 inLen, byte* out, word32* outLen)
if (b == BAD)
return ASN_INPUT_E;
-
+
out[outIdx++] = b;
*outLen = outIdx;
@@ -370,8 +446,8 @@ int Base16_Decode(const byte* in, word32 inLen, byte* out, word32* outLen)
return BAD_FUNC_ARG;
while (inLen) {
- byte b = in[inIdx++] - 0x30; /* 0 starts at 0x30 */
- byte b2 = in[inIdx++] - 0x30;
+ byte b = in[inIdx++] - BASE16_MIN; /* 0 starts at 0x30 */
+ byte b2 = in[inIdx++] - BASE16_MIN;
/* sanity checks */
if (b >= sizeof(hexDecode)/sizeof(hexDecode[0]))
@@ -384,7 +460,7 @@ int Base16_Decode(const byte* in, word32 inLen, byte* out, word32* outLen)
if (b == BAD || b2 == BAD)
return ASN_INPUT_E;
-
+
out[outIdx++] = (byte)((b << 4) | b2);
inLen -= 2;
}
@@ -393,7 +469,43 @@ int Base16_Decode(const byte* in, word32 inLen, byte* out, word32* outLen)
return 0;
}
+int Base16_Encode(const byte* in, word32 inLen, byte* out, word32* outLen)
+{
+ word32 outIdx = 0;
+ word32 i;
+ byte hb, lb;
+
+ if (in == NULL || out == NULL || outLen == NULL)
+ return BAD_FUNC_ARG;
+
+ if (*outLen < (2 * inLen + 1))
+ return BAD_FUNC_ARG;
+
+ for (i = 0; i < inLen; i++) {
+ hb = in[i] >> 4;
+ lb = in[i] & 0x0f;
+
+ /* ASCII value */
+ hb += '0';
+ if (hb > '9')
+ hb += 7;
+
+ /* ASCII value */
+ lb += '0';
+ if (lb>'9')
+ lb += 7;
+
+ out[outIdx++] = hb;
+ out[outIdx++] = lb;
+ }
+
+ /* force 0 at this end */
+ out[outIdx++] = 0;
+
+ *outLen = outIdx;
+ return 0;
+}
-#endif /* (OPENSSL_EXTRA) || (HAVE_WEBSERVER) || (HAVE_FIPS) */
+#endif /* WOLFSSL_BASE16 */
-#endif /* NO_CODING */
+#endif /* !NO_CODING */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/compress.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/compress.c
index a01c071dc..28d04f02d 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/compress.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/compress.c
@@ -1,9 +1,9 @@
/* compress.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
- wc_*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@@ -16,10 +16,11 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -35,6 +36,7 @@
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
@@ -65,23 +67,24 @@ static void myFree(void* opaque, void* memory)
#endif
-int wc_Compress(byte* out, word32 outSz, const byte* in, word32 inSz, word32 flags)
/*
* out - pointer to destination buffer
* outSz - size of destination buffer
* in - pointer to source buffer to compress
* inSz - size of source to compress
- * flags - flags to control how compress operates
+ * flags - flags to control how compress operates
*
* return:
* negative - error code
* positive - bytes stored in out buffer
- *
+ *
* Note, the output buffer still needs to be larger than the input buffer.
* The right chunk of data won't compress at all, and the lookup table will
* add to the size of the output. The libz code says the compressed
* buffer should be srcSz + 0.1% + 12.
*/
+int wc_Compress_ex(byte* out, word32 outSz, const byte* in, word32 inSz,
+ word32 flags, word32 windowBits)
{
z_stream stream;
int result = 0;
@@ -101,7 +104,8 @@ int wc_Compress(byte* out, word32 outSz, const byte* in, word32 inSz, word32 fla
stream.opaque = (voidpf)0;
if (deflateInit2(&stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
- DEFLATE_DEFAULT_WINDOWBITS, DEFLATE_DEFAULT_MEMLEVEL,
+ DEFLATE_DEFAULT_WINDOWBITS | windowBits,
+ DEFLATE_DEFAULT_MEMLEVEL,
flags ? Z_FIXED : Z_DEFAULT_STRATEGY) != Z_OK)
return COMPRESS_INIT_E;
@@ -118,19 +122,37 @@ int wc_Compress(byte* out, word32 outSz, const byte* in, word32 inSz, word32 fla
return result;
}
+int wc_Compress(byte* out, word32 outSz, const byte* in, word32 inSz, word32 flags)
+{
+ return wc_Compress_ex(out, outSz, in, inSz, flags, 0);
+}
-int wc_DeCompress(byte* out, word32 outSz, const byte* in, word32 inSz)
+
+/* windowBits:
+* deflateInit() and inflateInit(), as well as deflateInit2() and inflateInit2()
+ with windowBits in 0..15 all process zlib-wrapped deflate data.
+ (See RFC 1950 and RFC 1951.)
+* deflateInit2() and inflateInit2() with negative windowBits in -1..-15 process
+ raw deflate data with no header or trailer.
+* deflateInit2() and inflateInit2() with windowBits in 16..31, i.e. 16
+ added to 0..15, process gzip-wrapped deflate data (RFC 1952).
+* inflateInit2() with windowBits in 32..47 (32 added to 0..15) will
+ automatically detect either a gzip or zlib header (but not raw deflate
+ data), and decompress accordingly.
+*/
+int wc_DeCompress_ex(byte* out, word32 outSz, const byte* in, word32 inSz,
+ int windowBits)
/*
* out - pointer to destination buffer
* outSz - size of destination buffer
* in - pointer to source buffer to compress
* inSz - size of source to compress
- * flags - flags to control how compress operates
+ * windowBits - flags to control how decompress operates
*
* return:
* negative - error code
* positive - bytes stored in out buffer
- */
+ */
{
z_stream stream;
int result = 0;
@@ -148,14 +170,15 @@ int wc_DeCompress(byte* out, word32 outSz, const byte* in, word32 inSz)
stream.zfree = (free_func)myFree;
stream.opaque = (voidpf)0;
- if (inflateInit2(&stream, DEFLATE_DEFAULT_WINDOWBITS) != Z_OK)
+ if (inflateInit2(&stream, DEFLATE_DEFAULT_WINDOWBITS | windowBits) != Z_OK)
return DECOMPRESS_INIT_E;
- if (inflate(&stream, Z_FINISH) != Z_STREAM_END) {
+ result = inflate(&stream, Z_FINISH);
+ if (result != Z_STREAM_END) {
inflateEnd(&stream);
return DECOMPRESS_E;
}
-
+
result = (int)stream.total_out;
if (inflateEnd(&stream) != Z_OK)
@@ -165,5 +188,11 @@ int wc_DeCompress(byte* out, word32 outSz, const byte* in, word32 inSz)
}
+int wc_DeCompress(byte* out, word32 outSz, const byte* in, word32 inSz)
+{
+ return wc_DeCompress_ex(out, outSz, in, inSz, 0);
+}
+
+
#endif /* HAVE_LIBZ */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/cpuid.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/cpuid.c
new file mode 100644
index 000000000..85c4bf2d6
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/cpuid.c
@@ -0,0 +1,110 @@
+/* cpuid.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#include <wolfssl/wolfcrypt/cpuid.h>
+
+#if (defined(WOLFSSL_X86_64_BUILD) || defined(USE_INTEL_SPEEDUP) || \
+ defined(WOLFSSL_AESNI)) && !defined(WOLFSSL_NO_ASM)
+ /* Each platform needs to query info type 1 from cpuid to see if aesni is
+ * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
+ */
+
+ #ifndef _MSC_VER
+ #define cpuid(reg, leaf, sub)\
+ __asm__ __volatile__ ("cpuid":\
+ "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
+ "a" (leaf), "c"(sub));
+
+ #define XASM_LINK(f) asm(f)
+ #else
+ #include <intrin.h>
+
+ #define cpuid(a,b,c) __cpuidex((int*)a,b,c)
+
+ #define XASM_LINK(f)
+ #endif /* _MSC_VER */
+
+ #define EAX 0
+ #define EBX 1
+ #define ECX 2
+ #define EDX 3
+
+ static word32 cpuid_check = 0;
+ static word32 cpuid_flags = 0;
+
+ static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit)
+ {
+ int got_intel_cpu = 0;
+ int got_amd_cpu = 0;
+ unsigned int reg[5];
+ reg[4] = '\0';
+ cpuid(reg, 0, 0);
+
+ /* check for Intel cpu */
+ if (XMEMCMP((char *)&(reg[EBX]), "Genu", 4) == 0 &&
+ XMEMCMP((char *)&(reg[EDX]), "ineI", 4) == 0 &&
+ XMEMCMP((char *)&(reg[ECX]), "ntel", 4) == 0) {
+ got_intel_cpu = 1;
+ }
+
+ /* check for AMD cpu */
+ if (XMEMCMP((char *)&(reg[EBX]), "Auth", 4) == 0 &&
+ XMEMCMP((char *)&(reg[EDX]), "enti", 4) == 0 &&
+ XMEMCMP((char *)&(reg[ECX]), "cAMD", 4) == 0) {
+ got_amd_cpu = 1;
+ }
+
+ if (got_intel_cpu || got_amd_cpu) {
+ cpuid(reg, leaf, sub);
+ return ((reg[num] >> bit) & 0x1);
+ }
+ return 0;
+ }
+
+
+ void cpuid_set_flags(void)
+ {
+ if (!cpuid_check) {
+ if (cpuid_flag(1, 0, ECX, 28)) { cpuid_flags |= CPUID_AVX1 ; }
+ if (cpuid_flag(7, 0, EBX, 5)) { cpuid_flags |= CPUID_AVX2 ; }
+ if (cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2 ; }
+ if (cpuid_flag(1, 0, ECX, 30)) { cpuid_flags |= CPUID_RDRAND; }
+ if (cpuid_flag(7, 0, EBX, 18)) { cpuid_flags |= CPUID_RDSEED; }
+ if (cpuid_flag(1, 0, ECX, 25)) { cpuid_flags |= CPUID_AESNI ; }
+ if (cpuid_flag(7, 0, EBX, 19)) { cpuid_flags |= CPUID_ADX ; }
+ cpuid_check = 1;
+ }
+ }
+
+ word32 cpuid_get_flags(void)
+ {
+ if (!cpuid_check)
+ cpuid_set_flags();
+ return cpuid_flags;
+ }
+#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/cryptocb.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/cryptocb.c
new file mode 100644
index 000000000..79f89dbb1
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/cryptocb.c
@@ -0,0 +1,648 @@
+/* cryptocb.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* This framework provides a central place for crypto hardware integration
+ using the devId scheme. If not supported return `CRYPTOCB_UNAVAILABLE`. */
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#ifdef WOLF_CRYPTO_CB
+
+#include <wolfssl/wolfcrypt/cryptocb.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/logging.h>
+
+
+/* TODO: Consider linked list with mutex */
+#ifndef MAX_CRYPTO_DEVID_CALLBACKS
+#define MAX_CRYPTO_DEVID_CALLBACKS 8
+#endif
+
+typedef struct CryptoCb {
+ int devId;
+ CryptoDevCallbackFunc cb;
+ void* ctx;
+} CryptoCb;
+static WOLFSSL_GLOBAL CryptoCb gCryptoDev[MAX_CRYPTO_DEVID_CALLBACKS];
+
+static CryptoCb* wc_CryptoCb_FindDevice(int devId)
+{
+ int i;
+ for (i=0; i<MAX_CRYPTO_DEVID_CALLBACKS; i++) {
+ if (gCryptoDev[i].devId == devId)
+ return &gCryptoDev[i];
+ }
+ return NULL;
+}
+static CryptoCb* wc_CryptoCb_FindDeviceByIndex(int startIdx)
+{
+ int i;
+ for (i=startIdx; i<MAX_CRYPTO_DEVID_CALLBACKS; i++) {
+ if (gCryptoDev[i].devId != INVALID_DEVID)
+ return &gCryptoDev[i];
+ }
+ return NULL;
+}
+
+static WC_INLINE int wc_CryptoCb_TranslateErrorCode(int ret)
+{
+ if (ret == NOT_COMPILED_IN) {
+ /* backwards compatibility for older NOT_COMPILED_IN syntax */
+ ret = CRYPTOCB_UNAVAILABLE;
+ }
+ return ret;
+}
+
+void wc_CryptoCb_Init(void)
+{
+ int i;
+ for (i=0; i<MAX_CRYPTO_DEVID_CALLBACKS; i++) {
+ gCryptoDev[i].devId = INVALID_DEVID;
+ }
+}
+
+int wc_CryptoCb_RegisterDevice(int devId, CryptoDevCallbackFunc cb, void* ctx)
+{
+ /* find existing or new */
+ CryptoCb* dev = wc_CryptoCb_FindDevice(devId);
+ if (dev == NULL)
+ dev = wc_CryptoCb_FindDevice(INVALID_DEVID);
+
+ if (dev == NULL)
+ return BUFFER_E; /* out of devices */
+
+ dev->devId = devId;
+ dev->cb = cb;
+ dev->ctx = ctx;
+
+ return 0;
+}
+
+void wc_CryptoCb_UnRegisterDevice(int devId)
+{
+ CryptoCb* dev = wc_CryptoCb_FindDevice(devId);
+ if (dev) {
+ XMEMSET(dev, 0, sizeof(*dev));
+ dev->devId = INVALID_DEVID;
+ }
+}
+
+#ifndef NO_RSA
+int wc_CryptoCb_Rsa(const byte* in, word32 inLen, byte* out,
+ word32* outLen, int type, RsaKey* key, WC_RNG* rng)
+{
+ int ret = CRYPTOCB_UNAVAILABLE;
+ CryptoCb* dev;
+
+ if (key == NULL)
+ return ret;
+
+ /* locate registered callback */
+ dev = wc_CryptoCb_FindDevice(key->devId);
+ if (dev && dev->cb) {
+ wc_CryptoInfo cryptoInfo;
+ XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+ cryptoInfo.algo_type = WC_ALGO_TYPE_PK;
+ cryptoInfo.pk.type = WC_PK_TYPE_RSA;
+ cryptoInfo.pk.rsa.in = in;
+ cryptoInfo.pk.rsa.inLen = inLen;
+ cryptoInfo.pk.rsa.out = out;
+ cryptoInfo.pk.rsa.outLen = outLen;
+ cryptoInfo.pk.rsa.type = type;
+ cryptoInfo.pk.rsa.key = key;
+ cryptoInfo.pk.rsa.rng = rng;
+
+ ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+ }
+
+ return wc_CryptoCb_TranslateErrorCode(ret);
+}
+
+#ifdef WOLFSSL_KEY_GEN
+int wc_CryptoCb_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng)
+{
+ int ret = CRYPTOCB_UNAVAILABLE;
+ CryptoCb* dev;
+
+ if (key == NULL)
+ return ret;
+
+ /* locate registered callback */
+ dev = wc_CryptoCb_FindDevice(key->devId);
+ if (dev && dev->cb) {
+ wc_CryptoInfo cryptoInfo;
+ XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+ cryptoInfo.algo_type = WC_ALGO_TYPE_PK;
+ cryptoInfo.pk.type = WC_PK_TYPE_RSA_KEYGEN;
+ cryptoInfo.pk.rsakg.key = key;
+ cryptoInfo.pk.rsakg.size = size;
+ cryptoInfo.pk.rsakg.e = e;
+ cryptoInfo.pk.rsakg.rng = rng;
+
+ ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+ }
+
+ return wc_CryptoCb_TranslateErrorCode(ret);
+}
+#endif
+#endif /* !NO_RSA */
+
+#ifdef HAVE_ECC
+int wc_CryptoCb_MakeEccKey(WC_RNG* rng, int keySize, ecc_key* key, int curveId)
+{
+ int ret = CRYPTOCB_UNAVAILABLE;
+ CryptoCb* dev;
+
+ if (key == NULL)
+ return ret;
+
+ /* locate registered callback */
+ dev = wc_CryptoCb_FindDevice(key->devId);
+ if (dev && dev->cb) {
+ wc_CryptoInfo cryptoInfo;
+ XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+ cryptoInfo.algo_type = WC_ALGO_TYPE_PK;
+ cryptoInfo.pk.type = WC_PK_TYPE_EC_KEYGEN;
+ cryptoInfo.pk.eckg.rng = rng;
+ cryptoInfo.pk.eckg.size = keySize;
+ cryptoInfo.pk.eckg.key = key;
+ cryptoInfo.pk.eckg.curveId = curveId;
+
+ ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+ }
+
+ return wc_CryptoCb_TranslateErrorCode(ret);
+}
+
+int wc_CryptoCb_Ecdh(ecc_key* private_key, ecc_key* public_key,
+ byte* out, word32* outlen)
+{
+ int ret = CRYPTOCB_UNAVAILABLE;
+ CryptoCb* dev;
+
+ if (private_key == NULL)
+ return ret;
+
+ /* locate registered callback */
+ dev = wc_CryptoCb_FindDevice(private_key->devId);
+ if (dev && dev->cb) {
+ wc_CryptoInfo cryptoInfo;
+ XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+ cryptoInfo.algo_type = WC_ALGO_TYPE_PK;
+ cryptoInfo.pk.type = WC_PK_TYPE_ECDH;
+ cryptoInfo.pk.ecdh.private_key = private_key;
+ cryptoInfo.pk.ecdh.public_key = public_key;
+ cryptoInfo.pk.ecdh.out = out;
+ cryptoInfo.pk.ecdh.outlen = outlen;
+
+ ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+ }
+
+ return wc_CryptoCb_TranslateErrorCode(ret);
+}
+
+int wc_CryptoCb_EccSign(const byte* in, word32 inlen, byte* out,
+ word32 *outlen, WC_RNG* rng, ecc_key* key)
+{
+ int ret = CRYPTOCB_UNAVAILABLE;
+ CryptoCb* dev;
+
+ if (key == NULL)
+ return ret;
+
+ /* locate registered callback */
+ dev = wc_CryptoCb_FindDevice(key->devId);
+ if (dev && dev->cb) {
+ wc_CryptoInfo cryptoInfo;
+ XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+ cryptoInfo.algo_type = WC_ALGO_TYPE_PK;
+ cryptoInfo.pk.type = WC_PK_TYPE_ECDSA_SIGN;
+ cryptoInfo.pk.eccsign.in = in;
+ cryptoInfo.pk.eccsign.inlen = inlen;
+ cryptoInfo.pk.eccsign.out = out;
+ cryptoInfo.pk.eccsign.outlen = outlen;
+ cryptoInfo.pk.eccsign.rng = rng;
+ cryptoInfo.pk.eccsign.key = key;
+
+ ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+ }
+
+ return wc_CryptoCb_TranslateErrorCode(ret);
+}
+
+int wc_CryptoCb_EccVerify(const byte* sig, word32 siglen,
+ const byte* hash, word32 hashlen, int* res, ecc_key* key)
+{
+ int ret = CRYPTOCB_UNAVAILABLE;
+ CryptoCb* dev;
+
+ if (key == NULL)
+ return ret;
+
+ /* locate registered callback */
+ dev = wc_CryptoCb_FindDevice(key->devId);
+ if (dev && dev->cb) {
+ wc_CryptoInfo cryptoInfo;
+ XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+ cryptoInfo.algo_type = WC_ALGO_TYPE_PK;
+ cryptoInfo.pk.type = WC_PK_TYPE_ECDSA_VERIFY;
+ cryptoInfo.pk.eccverify.sig = sig;
+ cryptoInfo.pk.eccverify.siglen = siglen;
+ cryptoInfo.pk.eccverify.hash = hash;
+ cryptoInfo.pk.eccverify.hashlen = hashlen;
+ cryptoInfo.pk.eccverify.res = res;
+ cryptoInfo.pk.eccverify.key = key;
+
+ ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+ }
+
+ return wc_CryptoCb_TranslateErrorCode(ret);
+}
+#endif /* HAVE_ECC */
+
+#ifndef NO_AES
+#ifdef HAVE_AESGCM
+int wc_CryptoCb_AesGcmEncrypt(Aes* aes, byte* out,
+ const byte* in, word32 sz,
+ const byte* iv, word32 ivSz,
+ byte* authTag, word32 authTagSz,
+ const byte* authIn, word32 authInSz)
+{
+ int ret = CRYPTOCB_UNAVAILABLE;
+ CryptoCb* dev;
+
+ /* locate registered callback */
+ if (aes) {
+ dev = wc_CryptoCb_FindDevice(aes->devId);
+ }
+ else {
+ /* locate first callback and try using it */
+ dev = wc_CryptoCb_FindDeviceByIndex(0);
+ }
+
+ if (dev && dev->cb) {
+ wc_CryptoInfo cryptoInfo;
+ XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+ cryptoInfo.algo_type = WC_ALGO_TYPE_CIPHER;
+ cryptoInfo.cipher.type = WC_CIPHER_AES_GCM;
+ cryptoInfo.cipher.enc = 1;
+ cryptoInfo.cipher.aesgcm_enc.aes = aes;
+ cryptoInfo.cipher.aesgcm_enc.out = out;
+ cryptoInfo.cipher.aesgcm_enc.in = in;
+ cryptoInfo.cipher.aesgcm_enc.sz = sz;
+ cryptoInfo.cipher.aesgcm_enc.iv = iv;
+ cryptoInfo.cipher.aesgcm_enc.ivSz = ivSz;
+ cryptoInfo.cipher.aesgcm_enc.authTag = authTag;
+ cryptoInfo.cipher.aesgcm_enc.authTagSz = authTagSz;
+ cryptoInfo.cipher.aesgcm_enc.authIn = authIn;
+ cryptoInfo.cipher.aesgcm_enc.authInSz = authInSz;
+
+ ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+ }
+
+ return wc_CryptoCb_TranslateErrorCode(ret);
+}
+
+int wc_CryptoCb_AesGcmDecrypt(Aes* aes, byte* out,
+ const byte* in, word32 sz,
+ const byte* iv, word32 ivSz,
+ const byte* authTag, word32 authTagSz,
+ const byte* authIn, word32 authInSz)
+{
+ int ret = CRYPTOCB_UNAVAILABLE;
+ CryptoCb* dev;
+
+ /* locate registered callback */
+ if (aes) {
+ dev = wc_CryptoCb_FindDevice(aes->devId);
+ }
+ else {
+ /* locate first callback and try using it */
+ dev = wc_CryptoCb_FindDeviceByIndex(0);
+ }
+
+ if (dev && dev->cb) {
+ wc_CryptoInfo cryptoInfo;
+ XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+ cryptoInfo.algo_type = WC_ALGO_TYPE_CIPHER;
+ cryptoInfo.cipher.type = WC_CIPHER_AES_GCM;
+ cryptoInfo.cipher.enc = 0;
+ cryptoInfo.cipher.aesgcm_dec.aes = aes;
+ cryptoInfo.cipher.aesgcm_dec.out = out;
+ cryptoInfo.cipher.aesgcm_dec.in = in;
+ cryptoInfo.cipher.aesgcm_dec.sz = sz;
+ cryptoInfo.cipher.aesgcm_dec.iv = iv;
+ cryptoInfo.cipher.aesgcm_dec.ivSz = ivSz;
+ cryptoInfo.cipher.aesgcm_dec.authTag = authTag;
+ cryptoInfo.cipher.aesgcm_dec.authTagSz = authTagSz;
+ cryptoInfo.cipher.aesgcm_dec.authIn = authIn;
+ cryptoInfo.cipher.aesgcm_dec.authInSz = authInSz;
+
+ ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+ }
+
+ return wc_CryptoCb_TranslateErrorCode(ret);
+}
+#endif /* HAVE_AESGCM */
+
+#ifdef HAVE_AES_CBC
+int wc_CryptoCb_AesCbcEncrypt(Aes* aes, byte* out,
+ const byte* in, word32 sz)
+{
+ int ret = CRYPTOCB_UNAVAILABLE;
+ CryptoCb* dev;
+
+ /* locate registered callback */
+ if (aes) {
+ dev = wc_CryptoCb_FindDevice(aes->devId);
+ }
+ else {
+ /* locate first callback and try using it */
+ dev = wc_CryptoCb_FindDeviceByIndex(0);
+ }
+
+ if (dev && dev->cb) {
+ wc_CryptoInfo cryptoInfo;
+ XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+ cryptoInfo.algo_type = WC_ALGO_TYPE_CIPHER;
+ cryptoInfo.cipher.type = WC_CIPHER_AES_CBC;
+ cryptoInfo.cipher.enc = 1;
+ cryptoInfo.cipher.aescbc.aes = aes;
+ cryptoInfo.cipher.aescbc.out = out;
+ cryptoInfo.cipher.aescbc.in = in;
+ cryptoInfo.cipher.aescbc.sz = sz;
+
+ ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+ }
+
+ return wc_CryptoCb_TranslateErrorCode(ret);
+}
+
+int wc_CryptoCb_AesCbcDecrypt(Aes* aes, byte* out,
+ const byte* in, word32 sz)
+{
+ int ret = CRYPTOCB_UNAVAILABLE;
+ CryptoCb* dev;
+
+ /* locate registered callback */
+ if (aes) {
+ dev = wc_CryptoCb_FindDevice(aes->devId);
+ }
+ else {
+ /* locate first callback and try using it */
+ dev = wc_CryptoCb_FindDeviceByIndex(0);
+ }
+
+ if (dev && dev->cb) {
+ wc_CryptoInfo cryptoInfo;
+ XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+ cryptoInfo.algo_type = WC_ALGO_TYPE_CIPHER;
+ cryptoInfo.cipher.type = WC_CIPHER_AES_CBC;
+ cryptoInfo.cipher.enc = 0;
+ cryptoInfo.cipher.aescbc.aes = aes;
+ cryptoInfo.cipher.aescbc.out = out;
+ cryptoInfo.cipher.aescbc.in = in;
+ cryptoInfo.cipher.aescbc.sz = sz;
+
+ ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+ }
+
+ return wc_CryptoCb_TranslateErrorCode(ret);
+}
+#endif /* HAVE_AES_CBC */
+#endif /* !NO_AES */
+
+#ifndef NO_DES3
+int wc_CryptoCb_Des3Encrypt(Des3* des3, byte* out,
+ const byte* in, word32 sz)
+{
+ int ret = CRYPTOCB_UNAVAILABLE;
+ CryptoCb* dev;
+
+ /* locate registered callback */
+ if (des3) {
+ dev = wc_CryptoCb_FindDevice(des3->devId);
+ }
+ else {
+ /* locate first callback and try using it */
+ dev = wc_CryptoCb_FindDeviceByIndex(0);
+ }
+
+ if (dev && dev->cb) {
+ wc_CryptoInfo cryptoInfo;
+ XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+ cryptoInfo.algo_type = WC_ALGO_TYPE_CIPHER;
+ cryptoInfo.cipher.type = WC_CIPHER_DES3;
+ cryptoInfo.cipher.enc = 1;
+ cryptoInfo.cipher.des3.des = des3;
+ cryptoInfo.cipher.des3.out = out;
+ cryptoInfo.cipher.des3.in = in;
+ cryptoInfo.cipher.des3.sz = sz;
+
+ ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+ }
+
+ return wc_CryptoCb_TranslateErrorCode(ret);
+}
+
+int wc_CryptoCb_Des3Decrypt(Des3* des3, byte* out,
+ const byte* in, word32 sz)
+{
+ int ret = CRYPTOCB_UNAVAILABLE;
+ CryptoCb* dev;
+
+ /* locate registered callback */
+ if (des3) {
+ dev = wc_CryptoCb_FindDevice(des3->devId);
+ }
+ else {
+ /* locate first callback and try using it */
+ dev = wc_CryptoCb_FindDeviceByIndex(0);
+ }
+
+ if (dev && dev->cb) {
+ wc_CryptoInfo cryptoInfo;
+ XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+ cryptoInfo.algo_type = WC_ALGO_TYPE_CIPHER;
+ cryptoInfo.cipher.type = WC_CIPHER_DES3;
+ cryptoInfo.cipher.enc = 0;
+ cryptoInfo.cipher.des3.des = des3;
+ cryptoInfo.cipher.des3.out = out;
+ cryptoInfo.cipher.des3.in = in;
+ cryptoInfo.cipher.des3.sz = sz;
+
+ ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+ }
+
+ return wc_CryptoCb_TranslateErrorCode(ret);
+}
+#endif /* !NO_DES3 */
+
+#ifndef NO_SHA
+int wc_CryptoCb_ShaHash(wc_Sha* sha, const byte* in,
+ word32 inSz, byte* digest)
+{
+ int ret = CRYPTOCB_UNAVAILABLE;
+ CryptoCb* dev;
+
+ /* locate registered callback */
+ if (sha) {
+ dev = wc_CryptoCb_FindDevice(sha->devId);
+ }
+ else {
+ /* locate first callback and try using it */
+ dev = wc_CryptoCb_FindDeviceByIndex(0);
+ }
+
+ if (dev && dev->cb) {
+ wc_CryptoInfo cryptoInfo;
+ XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+ cryptoInfo.algo_type = WC_ALGO_TYPE_HASH;
+ cryptoInfo.hash.type = WC_HASH_TYPE_SHA;
+ cryptoInfo.hash.sha1 = sha;
+ cryptoInfo.hash.in = in;
+ cryptoInfo.hash.inSz = inSz;
+ cryptoInfo.hash.digest = digest;
+
+ ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+ }
+
+ return wc_CryptoCb_TranslateErrorCode(ret);
+}
+#endif /* !NO_SHA */
+
+#ifndef NO_SHA256
+int wc_CryptoCb_Sha256Hash(wc_Sha256* sha256, const byte* in,
+ word32 inSz, byte* digest)
+{
+ int ret = CRYPTOCB_UNAVAILABLE;
+ CryptoCb* dev;
+
+ /* locate registered callback */
+ if (sha256) {
+ dev = wc_CryptoCb_FindDevice(sha256->devId);
+ }
+ else {
+ /* locate first callback and try using it */
+ dev = wc_CryptoCb_FindDeviceByIndex(0);
+ }
+
+ if (dev && dev->cb) {
+ wc_CryptoInfo cryptoInfo;
+ XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+ cryptoInfo.algo_type = WC_ALGO_TYPE_HASH;
+ cryptoInfo.hash.type = WC_HASH_TYPE_SHA256;
+ cryptoInfo.hash.sha256 = sha256;
+ cryptoInfo.hash.in = in;
+ cryptoInfo.hash.inSz = inSz;
+ cryptoInfo.hash.digest = digest;
+
+ ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+ }
+
+ return wc_CryptoCb_TranslateErrorCode(ret);
+}
+#endif /* !NO_SHA256 */
+
+#ifndef NO_HMAC
+int wc_CryptoCb_Hmac(Hmac* hmac, int macType, const byte* in, word32 inSz,
+ byte* digest)
+{
+ int ret = CRYPTOCB_UNAVAILABLE;
+ CryptoCb* dev;
+
+ if (hmac == NULL)
+ return ret;
+
+ /* locate registered callback */
+ dev = wc_CryptoCb_FindDevice(hmac->devId);
+ if (dev && dev->cb) {
+ wc_CryptoInfo cryptoInfo;
+ XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+ cryptoInfo.algo_type = WC_ALGO_TYPE_HMAC;
+ cryptoInfo.hmac.macType = macType;
+ cryptoInfo.hmac.in = in;
+ cryptoInfo.hmac.inSz = inSz;
+ cryptoInfo.hmac.digest = digest;
+ cryptoInfo.hmac.hmac = hmac;
+
+ ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+ }
+
+ return wc_CryptoCb_TranslateErrorCode(ret);
+}
+#endif /* !NO_HMAC */
+
+#ifndef WC_NO_RNG
+int wc_CryptoCb_RandomBlock(WC_RNG* rng, byte* out, word32 sz)
+{
+ int ret = CRYPTOCB_UNAVAILABLE;
+ CryptoCb* dev;
+
+ /* locate registered callback */
+ if (rng) {
+ dev = wc_CryptoCb_FindDevice(rng->devId);
+ }
+ else {
+ /* locate first callback and try using it */
+ dev = wc_CryptoCb_FindDeviceByIndex(0);
+ }
+
+ if (dev && dev->cb) {
+ wc_CryptoInfo cryptoInfo;
+ XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+ cryptoInfo.algo_type = WC_ALGO_TYPE_RNG;
+ cryptoInfo.rng.rng = rng;
+ cryptoInfo.rng.out = out;
+ cryptoInfo.rng.sz = sz;
+
+ ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+ }
+
+ return wc_CryptoCb_TranslateErrorCode(ret);
+}
+
+int wc_CryptoCb_RandomSeed(OS_Seed* os, byte* seed, word32 sz)
+{
+ int ret = CRYPTOCB_UNAVAILABLE;
+ CryptoCb* dev;
+
+ /* locate registered callback */
+ dev = wc_CryptoCb_FindDevice(os->devId);
+ if (dev && dev->cb) {
+ wc_CryptoInfo cryptoInfo;
+ XMEMSET(&cryptoInfo, 0, sizeof(cryptoInfo));
+ cryptoInfo.algo_type = WC_ALGO_TYPE_SEED;
+ cryptoInfo.seed.os = os;
+ cryptoInfo.seed.seed = seed;
+ cryptoInfo.seed.sz = sz;
+
+ ret = dev->cb(dev->devId, &cryptoInfo, dev->ctx);
+ }
+
+ return wc_CryptoCb_TranslateErrorCode(ret);
+}
+#endif /* !WC_NO_RNG */
+
+#endif /* WOLF_CRYPTO_CB */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/curve25519.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/curve25519.c
index b745a046c..39e1216a0 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/curve25519.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/curve25519.c
@@ -1,8 +1,8 @@
/* curve25519.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
/* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work. */
@@ -35,202 +36,449 @@
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
+#if defined(FREESCALE_LTC_ECC)
+ #include <wolfssl/wolfcrypt/port/nxp/ksdk_port.h>
+#endif
+
const curve25519_set_type curve25519_sets[] = {
-{
- 32,
+ {
+ CURVE25519_KEYSIZE,
"CURVE25519",
-}
+ }
};
+int wc_curve25519_make_key(WC_RNG* rng, int keysize, curve25519_key* key)
+{
+#ifdef FREESCALE_LTC_ECC
+ const ECPoint* basepoint = wc_curve25519_GetBasePoint();
+#else
+ unsigned char basepoint[CURVE25519_KEYSIZE] = {9};
+#endif
+ int ret;
+
+ if (key == NULL || rng == NULL)
+ return BAD_FUNC_ARG;
+ /* currently only a key size of 32 bytes is used */
+ if (keysize != CURVE25519_KEYSIZE)
+ return ECC_BAD_ARG_E;
-int wc_curve25519_make_key(RNG* rng, int keysize, curve25519_key* key)
-{
- unsigned char basepoint[CURVE25519_KEYSIZE] = {9};
- unsigned char n[CURVE25519_KEYSIZE];
- unsigned char p[CURVE25519_KEYSIZE];
- int i;
- int ret;
-
- if (key == NULL || rng == NULL)
- return ECC_BAD_ARG_E;
-
- /* currently only a key size of 32 bytes is used */
- if (keysize != CURVE25519_KEYSIZE)
- return ECC_BAD_ARG_E;
-
- /* get random number from RNG */
- ret = wc_RNG_GenerateBlock(rng, n, keysize);
- if (ret != 0)
- return ret;
-
- for (i = 0; i < keysize; ++i) key->k.point[i] = n[i];
- key->k.point[ 0] &= 248;
- key->k.point[31] &= 127;
- key->k.point[31] |= 64;
-
- /* compute public key */
- ret = curve25519(p, key->k.point, basepoint);
-
- /* store keys in big endian format */
- for (i = 0; i < keysize; ++i) n[i] = key->k.point[i];
- for (i = 0; i < keysize; ++i) {
- key->p.point[keysize - i - 1] = p[i];
- key->k.point[keysize - i - 1] = n[i];
- }
-
- ForceZero(n, keysize);
- ForceZero(p, keysize);
-
- return ret;
+#ifndef FREESCALE_LTC_ECC
+ fe_init();
+#endif
+
+ /* random number for private key */
+ ret = wc_RNG_GenerateBlock(rng, key->k.point, keysize);
+ if (ret != 0)
+ return ret;
+
+ /* Clamp the private key */
+ key->k.point[0] &= 248;
+ key->k.point[CURVE25519_KEYSIZE-1] &= 63; /* same &=127 because |=64 after */
+ key->k.point[CURVE25519_KEYSIZE-1] |= 64;
+
+ /* compute public key */
+ #ifdef FREESCALE_LTC_ECC
+ ret = wc_curve25519(&key->p, key->k.point, basepoint, kLTC_Weierstrass); /* input basepoint on Weierstrass curve */
+ #else
+ ret = curve25519(key->p.point, key->k.point, basepoint);
+ #endif
+ if (ret != 0) {
+ ForceZero(key->k.point, keysize);
+ ForceZero(key->p.point, keysize);
+ return ret;
+ }
+
+ return ret;
}
+#ifdef HAVE_CURVE25519_SHARED_SECRET
int wc_curve25519_shared_secret(curve25519_key* private_key,
curve25519_key* public_key,
byte* out, word32* outlen)
{
- unsigned char k[CURVE25519_KEYSIZE];
- unsigned char p[CURVE25519_KEYSIZE];
- unsigned char o[CURVE25519_KEYSIZE];
+ return wc_curve25519_shared_secret_ex(private_key, public_key,
+ out, outlen, EC25519_BIG_ENDIAN);
+}
+
+int wc_curve25519_shared_secret_ex(curve25519_key* private_key,
+ curve25519_key* public_key,
+ byte* out, word32* outlen, int endian)
+{
+ #ifdef FREESCALE_LTC_ECC
+ ECPoint o = {{0}};
+ #else
+ unsigned char o[CURVE25519_KEYSIZE];
+ #endif
int ret = 0;
- int i;
/* sanity check */
- if (private_key == NULL || public_key == NULL || out == NULL ||
- outlen == NULL)
+ if (private_key == NULL || public_key == NULL ||
+ out == NULL || outlen == NULL || *outlen < CURVE25519_KEYSIZE)
return BAD_FUNC_ARG;
/* avoid implementation fingerprinting */
- if (public_key->p.point[0] > 0x7F)
+ if (public_key->p.point[CURVE25519_KEYSIZE-1] > 0x7F)
return ECC_BAD_ARG_E;
- XMEMSET(p, 0, sizeof(p));
- XMEMSET(k, 0, sizeof(k));
- XMEMSET(out, 0, CURVE25519_KEYSIZE);
+ #ifdef FREESCALE_LTC_ECC
+ ret = wc_curve25519(&o, private_key->k.point, &public_key->p, kLTC_Curve25519 /* input point P on Curve25519 */);
+ #else
+ ret = curve25519(o, private_key->k.point, public_key->p.point);
+ #endif
+ if (ret != 0) {
+ #ifdef FREESCALE_LTC_ECC
+ ForceZero(o.point, CURVE25519_KEYSIZE);
+ ForceZero(o.pointY, CURVE25519_KEYSIZE);
+ #else
+ ForceZero(o, CURVE25519_KEYSIZE);
+ #endif
+ return ret;
+ }
- for (i = 0; i < CURVE25519_KEYSIZE; ++i) {
- p[i] = public_key->p.point [CURVE25519_KEYSIZE - i - 1];
- k[i] = private_key->k.point[CURVE25519_KEYSIZE - i - 1];
+ if (endian == EC25519_BIG_ENDIAN) {
+ int i;
+ /* put shared secret key in Big Endian format */
+ for (i = 0; i < CURVE25519_KEYSIZE; i++)
+ #ifdef FREESCALE_LTC_ECC
+ out[i] = o.point[CURVE25519_KEYSIZE - i -1];
+ #else
+ out[i] = o[CURVE25519_KEYSIZE - i -1];
+ #endif
}
+ else /* put shared secret key in Little Endian format */
+ #ifdef FREESCALE_LTC_ECC
+ XMEMCPY(out, o.point, CURVE25519_KEYSIZE);
+ #else
+ XMEMCPY(out, o, CURVE25519_KEYSIZE);
+ #endif
- ret = curve25519(o , k, p);
*outlen = CURVE25519_KEYSIZE;
- for (i = 0; i < CURVE25519_KEYSIZE; ++i) {
- out[i] = o[CURVE25519_KEYSIZE - i -1];
- }
-
- ForceZero(p, sizeof(p));
- ForceZero(k, sizeof(k));
- ForceZero(o, sizeof(o));
+ #ifdef FREESCALE_LTC_ECC
+ ForceZero(o.point, CURVE25519_KEYSIZE);
+ ForceZero(o.pointY, CURVE25519_KEYSIZE);
+ #else
+ ForceZero(o, CURVE25519_KEYSIZE);
+ #endif
return ret;
}
+#endif /* HAVE_CURVE25519_SHARED_SECRET */
+
+#ifdef HAVE_CURVE25519_KEY_EXPORT
-/* curve25519 uses a serialized string for key representation */
+/* export curve25519 public key (Big endian)
+ * return 0 on success */
int wc_curve25519_export_public(curve25519_key* key, byte* out, word32* outLen)
{
- word32 keySz;
+ return wc_curve25519_export_public_ex(key, out, outLen, EC25519_BIG_ENDIAN);
+}
+/* export curve25519 public key (Big or Little endian)
+ * return 0 on success */
+int wc_curve25519_export_public_ex(curve25519_key* key, byte* out,
+ word32* outLen, int endian)
+{
if (key == NULL || out == NULL || outLen == NULL)
return BAD_FUNC_ARG;
- /* check size of outgoing key */
- keySz = wc_curve25519_size(key);
+ /* check and set outgoing key size */
+ if (*outLen < CURVE25519_KEYSIZE) {
+ *outLen = CURVE25519_KEYSIZE;
+ return ECC_BAD_ARG_E;
+ }
+ *outLen = CURVE25519_KEYSIZE;
- /* copy in public key */
- XMEMCPY(out, key->p.point, keySz);
- *outLen = keySz;
+ if (endian == EC25519_BIG_ENDIAN) {
+ int i;
+
+ /* read keys in Big Endian format */
+ for (i = 0; i < CURVE25519_KEYSIZE; i++)
+ out[i] = key->p.point[CURVE25519_KEYSIZE - i - 1];
+ }
+ else
+ XMEMCPY(out, key->p.point, CURVE25519_KEYSIZE);
return 0;
}
-/* import curve25519 public key
- return 0 on success */
+#endif /* HAVE_CURVE25519_KEY_EXPORT */
+
+#ifdef HAVE_CURVE25519_KEY_IMPORT
+
+/* import curve25519 public key (Big endian)
+ * return 0 on success */
int wc_curve25519_import_public(const byte* in, word32 inLen,
curve25519_key* key)
{
- word32 keySz;
+ return wc_curve25519_import_public_ex(in, inLen, key, EC25519_BIG_ENDIAN);
+}
+/* import curve25519 public key (Big or Little endian)
+ * return 0 on success */
+int wc_curve25519_import_public_ex(const byte* in, word32 inLen,
+ curve25519_key* key, int endian)
+{
/* sanity check */
if (key == NULL || in == NULL)
- return ECC_BAD_ARG_E;
+ return BAD_FUNC_ARG;
/* check size of incoming keys */
- keySz = wc_curve25519_size(key);
- if (inLen != keySz)
+ if (inLen != CURVE25519_KEYSIZE)
return ECC_BAD_ARG_E;
- XMEMCPY(key->p.point, in, inLen);
+ if (endian == EC25519_BIG_ENDIAN) {
+ int i;
+
+ /* read keys in Big Endian format */
+ for (i = 0; i < CURVE25519_KEYSIZE; i++)
+ key->p.point[i] = in[CURVE25519_KEYSIZE - i - 1];
+ }
+ else
+ XMEMCPY(key->p.point, in, inLen);
key->dp = &curve25519_sets[0];
+ /* LTC needs also Y coordinate - let's compute it */
+ #ifdef FREESCALE_LTC_ECC
+ ltc_pkha_ecc_point_t ltcPoint;
+ ltcPoint.X = &key->p.point[0];
+ ltcPoint.Y = &key->p.pointY[0];
+ LTC_PKHA_Curve25519ComputeY(&ltcPoint);
+ #endif
+
return 0;
}
+/* Check the public key value (big or little endian)
+ *
+ * pub Public key bytes.
+ * pubSz Size of public key in bytes.
+ * endian Public key bytes passed in as big-endian or little-endian.
+ * returns BAD_FUNC_ARGS when pub is NULL,
+ * BUFFER_E when size of public key is zero;
+ * ECC_OUT_OF_RANGE_E if the high bit is set;
+ * ECC_BAD_ARG_E if key length is not 32 bytes, public key value is
+ * zero or one; and
+ * 0 otherwise.
+ */
+int wc_curve25519_check_public(const byte* pub, word32 pubSz, int endian)
+{
+ word32 i;
+
+ if (pub == NULL)
+ return BAD_FUNC_ARG;
+
+ /* Check for empty key data */
+ if (pubSz == 0)
+ return BUFFER_E;
-/* export curve25519 private key only raw, outLen is in/out size
- return 0 on success */
+ /* Check key length */
+ if (pubSz != CURVE25519_KEYSIZE)
+ return ECC_BAD_ARG_E;
+
+
+ if (endian == EC25519_LITTLE_ENDIAN) {
+ /* Check for value of zero or one */
+ for (i = pubSz - 1; i > 0; i--) {
+ if (pub[i] != 0)
+ break;
+ }
+ if (i == 0 && (pub[0] == 0 || pub[0] == 1))
+ return ECC_BAD_ARG_E;
+
+ /* Check high bit set */
+ if (pub[CURVE25519_KEYSIZE-1] & 0x80)
+ return ECC_OUT_OF_RANGE_E;
+ }
+ else {
+ /* Check for value of zero or one */
+ for (i = 0; i < pubSz-1; i++) {
+ if (pub[i] != 0)
+ break;
+ }
+ if (i == pubSz - 1 && (pub[i] == 0 || pub[i] == 1))
+ return ECC_BAD_ARG_E;
+
+ /* Check high bit set */
+ if (pub[0] & 0x80)
+ return ECC_OUT_OF_RANGE_E;
+ }
+
+ return 0;
+}
+
+#endif /* HAVE_CURVE25519_KEY_IMPORT */
+
+
+#ifdef HAVE_CURVE25519_KEY_EXPORT
+
+/* export curve25519 private key only raw (Big endian)
+ * outLen is in/out size
+ * return 0 on success */
int wc_curve25519_export_private_raw(curve25519_key* key, byte* out,
word32* outLen)
{
- word32 keySz;
+ return wc_curve25519_export_private_raw_ex(key, out, outLen,
+ EC25519_BIG_ENDIAN);
+}
+/* export curve25519 private key only raw (Big or Little endian)
+ * outLen is in/out size
+ * return 0 on success */
+int wc_curve25519_export_private_raw_ex(curve25519_key* key, byte* out,
+ word32* outLen, int endian)
+{
/* sanity check */
if (key == NULL || out == NULL || outLen == NULL)
+ return BAD_FUNC_ARG;
+
+ /* check size of outgoing buffer */
+ if (*outLen < CURVE25519_KEYSIZE) {
+ *outLen = CURVE25519_KEYSIZE;
return ECC_BAD_ARG_E;
+ }
+ *outLen = CURVE25519_KEYSIZE;
+
+ if (endian == EC25519_BIG_ENDIAN) {
+ int i;
- keySz = wc_curve25519_size(key);
- *outLen = keySz;
- XMEMSET(out, 0, keySz);
- XMEMCPY(out, key->k.point, keySz);
+ /* put the key in Big Endian format */
+ for (i = 0; i < CURVE25519_KEYSIZE; i++)
+ out[i] = key->k.point[CURVE25519_KEYSIZE - i - 1];
+ }
+ else
+ XMEMCPY(out, key->k.point, CURVE25519_KEYSIZE);
return 0;
}
+/* curve25519 key pair export (Big or Little endian)
+ * return 0 on success */
+int wc_curve25519_export_key_raw(curve25519_key* key,
+ byte* priv, word32 *privSz,
+ byte* pub, word32 *pubSz)
+{
+ return wc_curve25519_export_key_raw_ex(key, priv, privSz,
+ pub, pubSz, EC25519_BIG_ENDIAN);
+}
-/* curve25519 private key import.
- Public key to match private key needs to be imported too */
+/* curve25519 key pair export (Big or Little endian)
+ * return 0 on success */
+int wc_curve25519_export_key_raw_ex(curve25519_key* key,
+ byte* priv, word32 *privSz,
+ byte* pub, word32 *pubSz,
+ int endian)
+{
+ int ret;
+
+ /* export private part */
+ ret = wc_curve25519_export_private_raw_ex(key, priv, privSz, endian);
+ if (ret != 0)
+ return ret;
+
+ /* export public part */
+ return wc_curve25519_export_public_ex(key, pub, pubSz, endian);
+}
+
+#endif /* HAVE_CURVE25519_KEY_EXPORT */
+
+#ifdef HAVE_CURVE25519_KEY_IMPORT
+
+/* curve25519 private key import (Big endian)
+ * Public key to match private key needs to be imported too
+ * return 0 on success */
int wc_curve25519_import_private_raw(const byte* priv, word32 privSz,
- const byte* pub, word32 pubSz, curve25519_key* key)
+ const byte* pub, word32 pubSz,
+ curve25519_key* key)
{
- int ret = 0;
- word32 keySz;
+ return wc_curve25519_import_private_raw_ex(priv, privSz, pub, pubSz,
+ key, EC25519_BIG_ENDIAN);
+}
+/* curve25519 private key import (Big or Little endian)
+ * Public key to match private key needs to be imported too
+ * return 0 on success */
+int wc_curve25519_import_private_raw_ex(const byte* priv, word32 privSz,
+ const byte* pub, word32 pubSz,
+ curve25519_key* key, int endian)
+{
+ int ret;
+
+ /* import private part */
+ ret = wc_curve25519_import_private_ex(priv, privSz, key, endian);
+ if (ret != 0)
+ return ret;
+
+ /* import public part */
+ return wc_curve25519_import_public_ex(pub, pubSz, key, endian);
+}
+
+/* curve25519 private key import only. (Big endian)
+ * return 0 on success */
+int wc_curve25519_import_private(const byte* priv, word32 privSz,
+ curve25519_key* key)
+{
+ return wc_curve25519_import_private_ex(priv, privSz,
+ key, EC25519_BIG_ENDIAN);
+}
+
+/* curve25519 private key import only. (Big or Little endian)
+ * return 0 on success */
+int wc_curve25519_import_private_ex(const byte* priv, word32 privSz,
+ curve25519_key* key, int endian)
+{
/* sanity check */
- if (key == NULL || priv == NULL || pub == NULL)
- return ECC_BAD_ARG_E;
+ if (key == NULL || priv == NULL)
+ return BAD_FUNC_ARG;
/* check size of incoming keys */
- keySz = wc_curve25519_size(key);
- if (privSz != keySz || pubSz != keySz)
- return ECC_BAD_ARG_E;
+ if ((int)privSz != CURVE25519_KEYSIZE)
+ return ECC_BAD_ARG_E;
- XMEMCPY(key->k.point, priv, privSz);
- XMEMCPY(key->p.point, pub, pubSz);
+ if (endian == EC25519_BIG_ENDIAN) {
+ int i;
- return ret;
+ /* read the key in Big Endian format */
+ for (i = 0; i < CURVE25519_KEYSIZE; i++)
+ key->k.point[i] = priv[CURVE25519_KEYSIZE - i - 1];
+ }
+ else
+ XMEMCPY(key->k.point, priv, CURVE25519_KEYSIZE);
+
+ key->dp = &curve25519_sets[0];
+
+ /* Clamp the key */
+ key->k.point[0] &= 248;
+ key->k.point[privSz-1] &= 63; /* same &=127 because |=64 after */
+ key->k.point[privSz-1] |= 64;
+
+ return 0;
}
+#endif /* HAVE_CURVE25519_KEY_IMPORT */
+
int wc_curve25519_init(curve25519_key* key)
{
- word32 keySz;
-
if (key == NULL)
- return ECC_BAD_ARG_E;
+ return BAD_FUNC_ARG;
+
+ XMEMSET(key, 0, sizeof(*key));
/* currently the format for curve25519 */
key->dp = &curve25519_sets[0];
- keySz = key->dp->size;
- XMEMSET(key->k.point, 0, keySz);
- XMEMSET(key->p.point, 0, keySz);
+#ifndef FREESCALE_LTC_ECC
+ fe_init();
+#endif
return 0;
}
@@ -245,13 +493,18 @@ void wc_curve25519_free(curve25519_key* key)
key->dp = NULL;
ForceZero(key->p.point, sizeof(key->p.point));
ForceZero(key->k.point, sizeof(key->k.point));
+ #ifdef FREESCALE_LTC_ECC
+ ForceZero(key->p.point, sizeof(key->p.pointY));
+ ForceZero(key->k.point, sizeof(key->k.pointY));
+ #endif
}
/* get key size */
int wc_curve25519_size(curve25519_key* key)
{
- if (key == NULL) return 0;
+ if (key == NULL)
+ return 0;
return key->dp->size;
}
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/curve448.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/curve448.c
new file mode 100644
index 000000000..135f2380e
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/curve448.c
@@ -0,0 +1,635 @@
+/* curve448.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Implemented to: RFC 7748 */
+
+/* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work.
+ * Reworked for curve448 by Sean Parkinson.
+ */
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#ifdef HAVE_CURVE448
+
+#include <wolfssl/wolfcrypt/curve448.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+
+/* Make a new curve448 private/public key.
+ *
+ * rng [in] Random number generator.
+ * keysize [in] Size of the key to generate.
+ * key [in] Curve448 key object.
+ * returns BAD_FUNC_ARG when rng or key are NULL,
+ * ECC_BAD_ARG_E when keysize is not CURVE448_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_curve448_make_key(WC_RNG* rng, int keysize, curve448_key* key)
+{
+ unsigned char basepoint[CURVE448_KEY_SIZE] = {5};
+ int ret = 0;
+
+ if ((key == NULL) || (rng == NULL)) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ /* currently only a key size of 56 bytes is used */
+ if ((ret == 0) && (keysize != CURVE448_KEY_SIZE)) {
+ ret = ECC_BAD_ARG_E;
+ }
+
+ if (ret == 0) {
+ fe448_init();
+
+ /* random number for private key */
+ ret = wc_RNG_GenerateBlock(rng, key->k, keysize);
+ }
+ if (ret == 0) {
+ /* Clamp the private key */
+ key->k[0] &= 0xfc;
+ key->k[CURVE448_KEY_SIZE-1] |= 0x80;
+
+ /* compute public key */
+ ret = curve448(key->p, key->k, basepoint);
+ if (ret != 0) {
+ ForceZero(key->k, keysize);
+ ForceZero(key->p, keysize);
+ }
+ }
+
+ return ret;
+}
+
+#ifdef HAVE_CURVE448_SHARED_SECRET
+
+/* Calculate the shared secret from the private key and peer's public key.
+ * Calculation over curve448.
+ * Secret encoded big-endian.
+ *
+ * private_key [in] Curve448 private key.
+ * public_key [in] Curve448 public key.
+ * out [in] Array to hold shared secret.
+ * outLen [in/out] On in, the number of bytes in array.
+ * On out, the number bytes put into array.
+ * returns BAD_FUNC_ARG when a parameter is NULL or outLen is less than
+ * CURVE448_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_curve448_shared_secret(curve448_key* private_key,
+ curve448_key* public_key,
+ byte* out, word32* outLen)
+{
+ return wc_curve448_shared_secret_ex(private_key, public_key, out, outLen,
+ EC448_BIG_ENDIAN);
+}
+
+/* Calculate the shared secret from the private key and peer's public key.
+ * Calculation over curve448.
+ *
+ * private_key [in] Curve448 private key.
+ * public_key [in] Curve448 public key.
+ * out [in] Array to hold shared secret.
+ * outLen [in/out] On in, the number of bytes in array.
+ * On out, the number bytes put into array.
+ * endian [in] Endianness to use when encoding number in array.
+ * returns BAD_FUNC_ARG when a parameter is NULL or outLen is less than
+ * CURVE448_PUB_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_curve448_shared_secret_ex(curve448_key* private_key,
+ curve448_key* public_key,
+ byte* out, word32* outLen, int endian)
+{
+ unsigned char o[CURVE448_PUB_KEY_SIZE];
+ int ret = 0;
+ int i;
+
+ /* sanity check */
+ if ((private_key == NULL) || (public_key == NULL) || (out == NULL) ||
+ (outLen == NULL) || (*outLen < CURVE448_PUB_KEY_SIZE)) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (ret == 0) {
+ ret = curve448(o, private_key->k, public_key->p);
+ }
+ if (ret == 0) {
+ if (endian == EC448_BIG_ENDIAN) {
+ /* put shared secret key in Big Endian format */
+ for (i = 0; i < CURVE448_PUB_KEY_SIZE; i++) {
+ out[i] = o[CURVE448_PUB_KEY_SIZE - i -1];
+ }
+ }
+ else {
+ /* put shared secret key in Little Endian format */
+ XMEMCPY(out, o, CURVE448_PUB_KEY_SIZE);
+ }
+
+ *outLen = CURVE448_PUB_KEY_SIZE;
+ }
+
+ ForceZero(o, CURVE448_PUB_KEY_SIZE);
+
+ return ret;
+}
+
+#endif /* HAVE_CURVE448_SHARED_SECRET */
+
+#ifdef HAVE_CURVE448_KEY_EXPORT
+
+/* Export the curve448 public key.
+ * Public key encoded big-endian.
+ *
+ * key [in] Curve448 public key.
+ * out [in] Array to hold public key.
+ * outLen [in/out] On in, the number of bytes in array.
+ * On out, the number bytes put into array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ * ECC_BAD_ARG_E when outLen is less than CURVE448_PUB_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_curve448_export_public(curve448_key* key, byte* out, word32* outLen)
+{
+ return wc_curve448_export_public_ex(key, out, outLen, EC448_BIG_ENDIAN);
+}
+
+/* Export the curve448 public key.
+ *
+ * key [in] Curve448 public key.
+ * out [in] Array to hold public key.
+ * outLen [in/out] On in, the number of bytes in array.
+ * On out, the number bytes put into array.
+ * endian [in] Endianness to use when encoding number in array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ * ECC_BAD_ARG_E when outLen is less than CURVE448_PUB_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_curve448_export_public_ex(curve448_key* key, byte* out, word32* outLen,
+ int endian)
+{
+ int ret = 0;
+ int i;
+
+ if ((key == NULL) || (out == NULL) || (outLen == NULL)) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ /* check and set outgoing key size */
+ if ((ret == 0) && (*outLen < CURVE448_PUB_KEY_SIZE)) {
+ *outLen = CURVE448_PUB_KEY_SIZE;
+ ret = ECC_BAD_ARG_E;
+ }
+ if (ret == 0) {
+ *outLen = CURVE448_PUB_KEY_SIZE;
+
+ if (endian == EC448_BIG_ENDIAN) {
+ /* read keys in Big Endian format */
+ for (i = 0; i < CURVE448_PUB_KEY_SIZE; i++) {
+ out[i] = key->p[CURVE448_PUB_KEY_SIZE - i - 1];
+ }
+ }
+ else {
+ XMEMCPY(out, key->p, CURVE448_PUB_KEY_SIZE);
+ }
+ }
+
+ return ret;
+}
+
+#endif /* HAVE_CURVE448_KEY_EXPORT */
+
+#ifdef HAVE_CURVE448_KEY_IMPORT
+
+/* Import a curve448 public key from a byte array.
+ * Public key encoded in big-endian.
+ *
+ * in [in] Array holding public key.
+ * inLen [in] Number of bytes of data in array.
+ * key [in] Curve448 public key.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ * ECC_BAD_ARG_E when inLen is less than CURVE448_PUB_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_curve448_import_public(const byte* in, word32 inLen, curve448_key* key)
+{
+ return wc_curve448_import_public_ex(in, inLen, key, EC448_BIG_ENDIAN);
+}
+
+/* Import a curve448 public key from a byte array.
+ *
+ * in [in] Array holding public key.
+ * inLen [in] Number of bytes of data in array.
+ * key [in] Curve448 public key.
+ * endian [in] Endianness of encoded number in byte array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ * ECC_BAD_ARG_E when inLen is less than CURVE448_PUB_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_curve448_import_public_ex(const byte* in, word32 inLen,
+ curve448_key* key, int endian)
+{
+ int ret = 0;
+ int i;
+
+ /* sanity check */
+ if ((key == NULL) || (in == NULL)) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ /* check size of incoming keys */
+ if ((ret == 0) && (inLen != CURVE448_PUB_KEY_SIZE)) {
+ ret = ECC_BAD_ARG_E;
+ }
+
+ if (ret == 0) {
+ if (endian == EC448_BIG_ENDIAN) {
+ /* read keys in Big Endian format */
+ for (i = 0; i < CURVE448_PUB_KEY_SIZE; i++) {
+ key->p[i] = in[CURVE448_PUB_KEY_SIZE - i - 1];
+ }
+ }
+ else
+ XMEMCPY(key->p, in, inLen);
+ }
+
+ return ret;
+}
+
+/* Check the public key value (big or little endian)
+ *
+ * pub [in] Public key bytes.
+ * pubSz [in] Size of public key in bytes.
+ * endian [in] Public key bytes passed in as big-endian or little-endian.
+ * returns BAD_FUNC_ARGS when pub is NULL,
+ * ECC_BAD_ARG_E when key length is not 56 bytes, public key value is
+ * zero or one;
+ * BUFFER_E when size of public key is zero;
+ * 0 otherwise.
+ */
+int wc_curve448_check_public(const byte* pub, word32 pubSz, int endian)
+{
+ int ret = 0;
+ word32 i;
+
+ if (pub == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ /* Check for empty key data */
+ if ((ret == 0) && (pubSz == 0)) {
+ ret = BUFFER_E;
+ }
+
+ /* Check key length */
+ if ((ret == 0) && (pubSz != CURVE448_PUB_KEY_SIZE)) {
+ ret = ECC_BAD_ARG_E;
+ }
+
+ if (ret == 0) {
+ if (endian == EC448_LITTLE_ENDIAN) {
+ /* Check for value of zero or one */
+ for (i = pubSz - 1; i > 0; i--) {
+ if (pub[i] != 0) {
+ break;
+ }
+ }
+ if ((i == 0) && (pub[0] == 0 || pub[0] == 1)) {
+ return ECC_BAD_ARG_E;
+ }
+ }
+ else {
+ /* Check for value of zero or one */
+ for (i = 0; i < pubSz-1; i++) {
+ if (pub[i] != 0) {
+ break;
+ }
+ }
+ if ((i == pubSz - 1) && (pub[i] == 0 || pub[i] == 1)) {
+ ret = ECC_BAD_ARG_E;
+ }
+ }
+ }
+
+ return ret;
+}
+
+#endif /* HAVE_CURVE448_KEY_IMPORT */
+
+
+#ifdef HAVE_CURVE448_KEY_EXPORT
+
+/* Export the curve448 private key raw form.
+ * Private key encoded big-endian.
+ *
+ * key [in] Curve448 private key.
+ * out [in] Array to hold private key.
+ * outLen [in/out] On in, the number of bytes in array.
+ * On out, the number bytes put into array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ * ECC_BAD_ARG_E when outLen is less than CURVE448_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_curve448_export_private_raw(curve448_key* key, byte* out, word32* outLen)
+{
+ return wc_curve448_export_private_raw_ex(key, out, outLen,
+ EC448_BIG_ENDIAN);
+}
+
+/* Export the curve448 private key raw form.
+ *
+ * key [in] Curve448 private key.
+ * out [in] Array to hold private key.
+ * outLen [in/out] On in, the number of bytes in array.
+ * On out, the number bytes put into array.
+ * endian [in] Endianness to use when encoding number in array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ * ECC_BAD_ARG_E when outLen is less than CURVE448_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_curve448_export_private_raw_ex(curve448_key* key, byte* out,
+ word32* outLen, int endian)
+{
+ int ret = 0;
+ int i;
+
+ /* sanity check */
+ if ((key == NULL) || (out == NULL) || (outLen == NULL)) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ /* check size of outgoing buffer */
+ if ((ret == 0) && (*outLen < CURVE448_KEY_SIZE)) {
+ *outLen = CURVE448_KEY_SIZE;
+ ret = ECC_BAD_ARG_E;
+ }
+ if (ret == 0) {
+ *outLen = CURVE448_KEY_SIZE;
+
+ if (endian == EC448_BIG_ENDIAN) {
+ /* put the key in Big Endian format */
+ for (i = 0; i < CURVE448_KEY_SIZE; i++) {
+ out[i] = key->k[CURVE448_KEY_SIZE - i - 1];
+ }
+ }
+ else {
+ XMEMCPY(out, key->k, CURVE448_KEY_SIZE);
+ }
+ }
+
+ return ret;
+}
+
+/* Export the curve448 private and public keys in raw form.
+ * Private and public key encoded big-endian.
+ *
+ * key [in] Curve448 private key.
+ * priv [in] Array to hold private key.
+ * privSz [in/out] On in, the number of bytes in private key array.
+ * On out, the number bytes put into private key array.
+ * pub [in] Array to hold public key.
+ * pubSz [in/out] On in, the number of bytes in public key array.
+ * On out, the number bytes put into public key array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ * ECC_BAD_ARG_E when privSz is less than CURVE448_KEY_SIZE or pubSz is
+ * less than CURVE448_PUB_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_curve448_export_key_raw(curve448_key* key, byte* priv, word32 *privSz,
+ byte* pub, word32 *pubSz)
+{
+ return wc_curve448_export_key_raw_ex(key, priv, privSz, pub, pubSz,
+ EC448_BIG_ENDIAN);
+}
+
+/* Export the curve448 private and public keys in raw form.
+ *
+ * key [in] Curve448 private key.
+ * priv [in] Array to hold private key.
+ * privSz [in/out] On in, the number of bytes in private key array.
+ * On out, the number bytes put into private key array.
+ * pub [in] Array to hold public key.
+ * pubSz [in/out] On in, the number of bytes in public key array.
+ * On out, the number bytes put into public key array.
+ * endian [in] Endianness to use when encoding number in array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ * ECC_BAD_ARG_E when privSz is less than CURVE448_KEY_SIZE or pubSz is
+ * less than CURVE448_PUB_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_curve448_export_key_raw_ex(curve448_key* key, byte* priv, word32 *privSz,
+ byte* pub, word32 *pubSz, int endian)
+{
+ int ret;
+
+ /* export private part */
+ ret = wc_curve448_export_private_raw_ex(key, priv, privSz, endian);
+ if (ret == 0) {
+ /* export public part */
+ ret = wc_curve448_export_public_ex(key, pub, pubSz, endian);
+ }
+
+ return ret;
+}
+
+#endif /* HAVE_CURVE448_KEY_EXPORT */
+
+#ifdef HAVE_CURVE448_KEY_IMPORT
+
+/* Import curve448 private and public keys from a byte arrays.
+ * Private and public keys encoded in big-endian.
+ *
+ * piv [in] Array holding private key.
+ * privSz [in] Number of bytes of data in private key array.
+ * pub [in] Array holding public key.
+ * pubSz [in] Number of bytes of data in public key array.
+ * key [in] Curve448 private/public key.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ * ECC_BAD_ARG_E when privSz is less than CURVE448_KEY_SIZE or pubSz is
+ * less than CURVE448_PUB_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_curve448_import_private_raw(const byte* priv, word32 privSz,
+ const byte* pub, word32 pubSz,
+ curve448_key* key)
+{
+ return wc_curve448_import_private_raw_ex(priv, privSz, pub, pubSz, key,
+ EC448_BIG_ENDIAN);
+}
+
+/* Import curve448 private and public keys from a byte arrays.
+ *
+ * piv [in] Array holding private key.
+ * privSz [in] Number of bytes of data in private key array.
+ * pub [in] Array holding public key.
+ * pubSz [in] Number of bytes of data in public key array.
+ * key [in] Curve448 private/public key.
+ * endian [in] Endianness of encoded numbers in byte arrays.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ * ECC_BAD_ARG_E when privSz is less than CURVE448_KEY_SIZE or pubSz is
+ * less than CURVE448_PUB_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_curve448_import_private_raw_ex(const byte* priv, word32 privSz,
+ const byte* pub, word32 pubSz,
+ curve448_key* key, int endian)
+{
+ int ret;
+
+ /* import private part */
+ ret = wc_curve448_import_private_ex(priv, privSz, key, endian);
+ if (ret == 0) {
+ /* import public part */
+ return wc_curve448_import_public_ex(pub, pubSz, key, endian);
+ }
+
+ return ret;
+}
+
+/* Import curve448 private key from a byte array.
+ * Private key encoded in big-endian.
+ *
+ * piv [in] Array holding private key.
+ * privSz [in] Number of bytes of data in private key array.
+ * key [in] Curve448 private/public key.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ * ECC_BAD_ARG_E when privSz is less than CURVE448_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_curve448_import_private(const byte* priv, word32 privSz,
+ curve448_key* key)
+{
+ return wc_curve448_import_private_ex(priv, privSz, key, EC448_BIG_ENDIAN);
+}
+
+/* Import curve448 private key from a byte array.
+ *
+ * piv [in] Array holding private key.
+ * privSz [in] Number of bytes of data in private key array.
+ * key [in] Curve448 private/public key.
+ * endian [in] Endianness of encoded number in byte array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ * ECC_BAD_ARG_E when privSz is less than CURVE448_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_curve448_import_private_ex(const byte* priv, word32 privSz,
+ curve448_key* key, int endian)
+{
+ int ret = 0;
+ int i;
+
+ /* sanity check */
+ if ((key == NULL) || (priv == NULL)) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ /* check size of incoming keys */
+ if ((ret == 0) && ((int)privSz != CURVE448_KEY_SIZE)) {
+ ret = ECC_BAD_ARG_E;
+ }
+
+ if (ret == 0) {
+ if (endian == EC448_BIG_ENDIAN) {
+ /* read the key in Big Endian format */
+ for (i = 0; i < CURVE448_KEY_SIZE; i++) {
+ key->k[i] = priv[CURVE448_KEY_SIZE - i - 1];
+ }
+ }
+ else {
+ XMEMCPY(key->k, priv, CURVE448_KEY_SIZE);
+ }
+
+ /* Clamp the key */
+ key->k[0] &= 0xfc;
+ key->k[CURVE448_KEY_SIZE-1] |= 0x80;
+ }
+
+ return ret;
+}
+
+#endif /* HAVE_CURVE448_KEY_IMPORT */
+
+
+/* Initialize the curve448 key.
+ *
+ * key [in] Curve448 key object.
+ * returns BAD_FUNC_ARG when key is NULL,
+ * 0 otherwise.
+ */
+int wc_curve448_init(curve448_key* key)
+{
+ int ret = 0;
+
+ if (key == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (ret == 0) {
+ XMEMSET(key, 0, sizeof(*key));
+
+ fe448_init();
+ }
+
+ return ret;
+}
+
+
+/* Clears the curve448 key data.
+ *
+ * key [in] Curve448 key object.
+ */
+void wc_curve448_free(curve448_key* key)
+{
+ if (key != NULL) {
+ ForceZero(key->p, sizeof(key->p));
+ ForceZero(key->k, sizeof(key->k));
+ }
+}
+
+
+/* Get the curve448 key's size.
+ *
+ * key [in] Curve448 key object.
+ * returns 0 if key is NULL,
+ * CURVE448_KEY_SIZE otherwise.
+ */
+int wc_curve448_size(curve448_key* key)
+{
+ int ret = 0;
+
+ if (key != NULL) {
+ ret = CURVE448_KEY_SIZE;
+ }
+
+ return ret;
+}
+
+#endif /* HAVE_CURVE448 */
+
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/des3.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/des3.c
index f886ecdc7..b4b0187cd 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/des3.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/des3.c
@@ -1,8 +1,8 @@
/* des3.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,160 +16,149 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <wolfssl/wolfcrypt/settings.h>
-
-#ifndef NO_DES3
-
-#include <wolfssl/wolfcrypt/des3.h>
-
-#ifdef HAVE_FIPS
-#ifdef HAVE_CAVIUM
- static int wc_Des3_CaviumSetKey(Des3* des3, const byte* key, const byte* iv);
- static int wc_Des3_CaviumCbcEncrypt(Des3* des3, byte* out, const byte* in,
- word32 length);
- static int wc_Des3_CaviumCbcDecrypt(Des3* des3, byte* out, const byte* in,
- word32 length);
-#endif
-
-int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
-{
- return Des_SetKey(des, key, iv, dir);
-}
-
-
-int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir)
-{
- return Des3_SetKey_fips(des, key, iv, dir);
-}
-
-
-int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
- return Des_CbcEncrypt(des, out, in, sz);
-}
-
-
-int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
- return Des_CbcDecrypt(des, out, in, sz);
-}
-
-
-int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
-{
- return Des3_CbcEncrypt_fips(des, out, in, sz);
-}
-
-
-int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz)
-{
- return Des3_CbcDecrypt_fips(des, out, in, sz);
-}
-
-
-#ifdef WOLFSSL_DES_ECB
-
-/* One block, compatibility only */
-int wc_Des_EcbEncrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
- return Des_EcbEncrypt(des, out, in, sz);
-}
-
-#endif /* WOLFSSL_DES_ECB */
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/logging.h>
-void wc_Des_SetIV(Des* des, const byte* iv)
-{
- Des_SetIV(des, iv);
-}
+#ifndef NO_DES3
+#if defined(HAVE_FIPS) && \
+ defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
-int wc_Des_CbcDecryptWithKey(byte* out, const byte* in, word32 sz,
- const byte* key, const byte* iv)
-{
- return Des_CbcDecryptWithKey(out, in, sz, key, iv);
-}
+ /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
+ #define FIPS_NO_WRAPPERS
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$i")
+ #pragma const_seg(".fipsB$i")
+ #endif
+#endif
-int wc_Des3_SetIV(Des3* des, const byte* iv)
-{
- return Des3_SetIV_fips(des, iv);
-}
+#include <wolfssl/wolfcrypt/des3.h>
+#ifdef WOLF_CRYPTO_CB
+ #include <wolfssl/wolfcrypt/cryptocb.h>
+#endif
-int wc_Des3_CbcDecryptWithKey(byte* out, const byte* in, word32 sz,
- const byte* key, const byte* iv)
-{
- return Des3_CbcDecryptWithKey(out, in, sz, key, iv);
-}
+/* fips wrapper calls, user can call direct */
+#if defined(HAVE_FIPS) && \
+ (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2))
+ int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
+ {
+ return Des_SetKey(des, key, iv, dir);
+ }
+ int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir)
+ {
+ if (des == NULL || key == NULL || dir < 0) {
+ return BAD_FUNC_ARG;
+ }
-#ifdef HAVE_CAVIUM
+ return Des3_SetKey_fips(des, key, iv, dir);
+ }
+ int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
+ {
+ return Des_CbcEncrypt(des, out, in, sz);
+ }
+ int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
+ {
+ return Des_CbcDecrypt(des, out, in, sz);
+ }
+ int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
+ {
+ if (des == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ return Des3_CbcEncrypt_fips(des, out, in, sz);
+ }
+ int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz)
+ {
+ if (des == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ return Des3_CbcDecrypt_fips(des, out, in, sz);
+ }
-/* Initiliaze Des3 for use with Nitrox device */
-int wc_Des3_InitCavium(Des3* des3, int devId)
-{
- return Des3_InitCavium(des3, devId);
-}
+ #ifdef WOLFSSL_DES_ECB
+ /* One block, compatibility only */
+ int wc_Des_EcbEncrypt(Des* des, byte* out, const byte* in, word32 sz)
+ {
+ return Des_EcbEncrypt(des, out, in, sz);
+ }
+ int wc_Des3_EcbEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
+ {
+ return Des3_EcbEncrypt(des, out, in, sz);
+ }
+ #endif /* WOLFSSL_DES_ECB */
+ void wc_Des_SetIV(Des* des, const byte* iv)
+ {
+ Des_SetIV(des, iv);
+ }
+ int wc_Des3_SetIV(Des3* des, const byte* iv)
+ {
+ return Des3_SetIV_fips(des, iv);
+ }
-/* Free Des3 from use with Nitrox device */
-void wc_Des3_FreeCavium(Des3* des3)
-{
- Des3_FreeCavium(des3);
-}
+ int wc_Des3Init(Des3* des3, void* heap, int devId)
+ {
+ (void)des3;
+ (void)heap;
+ (void)devId;
+ /* FIPS doesn't support:
+ return Des3Init(des3, heap, devId); */
+ return 0;
+ }
+ void wc_Des3Free(Des3* des3)
+ {
+ (void)des3;
+ /* FIPS doesn't support:
+ Des3Free(des3); */
+ }
+#else /* else build without fips, or for FIPS v2 */
-#endif /* HAVE_CAVIUM */
-#else /* build without fips */
#if defined(WOLFSSL_TI_CRYPT)
#include <wolfcrypt/src/port/ti/ti-des3.c>
#else
-#include <wolfssl/wolfcrypt/error-crypt.h>
-#include <wolfssl/wolfcrypt/logging.h>
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
-#ifdef HAVE_CAVIUM
- static int wc_Des3_CaviumSetKey(Des3* des3, const byte* key, const byte* iv);
- static int wc_Des3_CaviumCbcEncrypt(Des3* des3, byte* out, const byte* in,
- word32 length);
- static int wc_Des3_CaviumCbcDecrypt(Des3* des3, byte* out, const byte* in,
- word32 length);
-#endif
-
+/* Hardware Acceleration */
+#if defined(STM32_CRYPTO)
-
-
-#ifdef STM32F2_CRYPTO
/*
- * STM32F2 hardware DES/3DES support through the STM32F2 standard
- * peripheral library. Documentation located in STM32F2xx Standard
- * Peripheral Library document (See note in README).
+ * STM32F2/F4 hardware DES/3DES support through the standard
+ * peripheral library. (See note in README).
*/
- #include "stm32f2xx.h"
- #include "stm32f2xx_cryp.h"
int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
{
word32 *dkey = des->key;
+ (void)dir;
+
XMEMCPY(dkey, key, 8);
+ #ifndef WOLFSSL_STM32_CUBEMX
ByteReverseWords(dkey, dkey, 8);
+ #endif
wc_Des_SetIV(des, iv);
@@ -178,29 +167,95 @@ void wc_Des3_FreeCavium(Des3* des3)
int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir)
{
- word32 *dkey1 = des->key[0];
- word32 *dkey2 = des->key[1];
- word32 *dkey3 = des->key[2];
+ if (des == NULL || key == NULL)
+ return BAD_FUNC_ARG;
- XMEMCPY(dkey1, key, 8); /* set key 1 */
- XMEMCPY(dkey2, key + 8, 8); /* set key 2 */
- XMEMCPY(dkey3, key + 16, 8); /* set key 3 */
+ (void)dir;
+
+ #ifndef WOLFSSL_STM32_CUBEMX
+ {
+ word32 *dkey1 = des->key[0];
+ word32 *dkey2 = des->key[1];
+ word32 *dkey3 = des->key[2];
- ByteReverseWords(dkey1, dkey1, 8);
- ByteReverseWords(dkey2, dkey2, 8);
- ByteReverseWords(dkey3, dkey3, 8);
+ XMEMCPY(dkey1, key, 8); /* set key 1 */
+ XMEMCPY(dkey2, key + 8, 8); /* set key 2 */
+ XMEMCPY(dkey3, key + 16, 8); /* set key 3 */
+
+ ByteReverseWords(dkey1, dkey1, 8);
+ ByteReverseWords(dkey2, dkey2, 8);
+ ByteReverseWords(dkey3, dkey3, 8);
+ }
+ #else
+ XMEMCPY(des->key[0], key, DES3_KEYLEN); /* CUBEMX wants keys in sequential memory */
+ #endif
return wc_Des3_SetIV(des, iv);
}
- void DesCrypt(Des* des, byte* out, const byte* in, word32 sz,
+ static void DesCrypt(Des* des, byte* out, const byte* in, word32 sz,
int dir, int mode)
{
+ int ret;
+ #ifdef WOLFSSL_STM32_CUBEMX
+ CRYP_HandleTypeDef hcryp;
+ #else
word32 *dkey, *iv;
CRYP_InitTypeDef DES_CRYP_InitStructure;
CRYP_KeyInitTypeDef DES_CRYP_KeyInitStructure;
CRYP_IVInitTypeDef DES_CRYP_IVInitStructure;
+ #endif
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret != 0) {
+ return;
+ }
+
+ #ifdef WOLFSSL_STM32_CUBEMX
+ XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef));
+ hcryp.Instance = CRYP;
+ hcryp.Init.KeySize = CRYP_KEYSIZE_128B;
+ hcryp.Init.DataType = CRYP_DATATYPE_8B;
+ hcryp.Init.pKey = (uint8_t*)des->key;
+ hcryp.Init.pInitVect = (uint8_t*)des->reg;
+
+ HAL_CRYP_Init(&hcryp);
+
+ while (sz > 0) {
+ /* if input and output same will overwrite input iv */
+ XMEMCPY(des->tmp, in + sz - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
+
+ if (mode == DES_CBC) {
+ if (dir == DES_ENCRYPTION) {
+ HAL_CRYP_DESCBC_Encrypt(&hcryp, (uint8_t*)in,
+ DES_BLOCK_SIZE, out, STM32_HAL_TIMEOUT);
+ }
+ else {
+ HAL_CRYP_DESCBC_Decrypt(&hcryp, (uint8_t*)in,
+ DES_BLOCK_SIZE, out, STM32_HAL_TIMEOUT);
+ }
+ }
+ else {
+ if (dir == DES_ENCRYPTION) {
+ HAL_CRYP_DESECB_Encrypt(&hcryp, (uint8_t*)in,
+ DES_BLOCK_SIZE, out, STM32_HAL_TIMEOUT);
+ }
+ else {
+ HAL_CRYP_DESECB_Decrypt(&hcryp, (uint8_t*)in,
+ DES_BLOCK_SIZE, out, STM32_HAL_TIMEOUT);
+ }
+ }
+
+ /* store iv for next call */
+ XMEMCPY(des->reg, des->tmp, DES_BLOCK_SIZE);
+
+ sz -= DES_BLOCK_SIZE;
+ in += DES_BLOCK_SIZE;
+ out += DES_BLOCK_SIZE;
+ }
+
+ HAL_CRYP_DeInit(&hcryp);
+ #else
dkey = des->key;
iv = des->reg;
@@ -242,8 +297,7 @@ void wc_Des3_FreeCavium(Des3* des3)
/* enable crypto processor */
CRYP_Cmd(ENABLE);
- while (sz > 0)
- {
+ while (sz > 0) {
/* flush IN/OUT FIFOs */
CRYP_FIFOFlush();
@@ -269,6 +323,8 @@ void wc_Des3_FreeCavium(Des3* des3)
/* disable crypto processor */
CRYP_Cmd(DISABLE);
+ #endif /* WOLFSSL_STM32_CUBEMX */
+ wolfSSL_CryptHwMutexUnLock();
}
int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
@@ -289,81 +345,121 @@ void wc_Des3_FreeCavium(Des3* des3)
return 0;
}
- void Des3Crypt(Des3* des, byte* out, const byte* in, word32 sz,
+ static void Des3Crypt(Des3* des, byte* out, const byte* in, word32 sz,
int dir)
{
- word32 *dkey1, *dkey2, *dkey3, *iv;
- CRYP_InitTypeDef DES3_CRYP_InitStructure;
- CRYP_KeyInitTypeDef DES3_CRYP_KeyInitStructure;
- CRYP_IVInitTypeDef DES3_CRYP_IVInitStructure;
-
- dkey1 = des->key[0];
- dkey2 = des->key[1];
- dkey3 = des->key[2];
- iv = des->reg;
+ if (des == NULL || out == NULL || in == NULL)
+ return BAD_FUNC_ARG;
- /* crypto structure initialization */
- CRYP_KeyStructInit(&DES3_CRYP_KeyInitStructure);
- CRYP_StructInit(&DES3_CRYP_InitStructure);
- CRYP_IVStructInit(&DES3_CRYP_IVInitStructure);
+ #ifdef WOLFSSL_STM32_CUBEMX
+ {
+ CRYP_HandleTypeDef hcryp;
+
+ XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef));
+ hcryp.Instance = CRYP;
+ hcryp.Init.KeySize = CRYP_KEYSIZE_128B;
+ hcryp.Init.DataType = CRYP_DATATYPE_8B;
+ hcryp.Init.pKey = (uint8_t*)des->key;
+ hcryp.Init.pInitVect = (uint8_t*)des->reg;
+
+ HAL_CRYP_Init(&hcryp);
+
+ while (sz > 0)
+ {
+ if (dir == DES_ENCRYPTION) {
+ HAL_CRYP_TDESCBC_Encrypt(&hcryp, (byte*)in,
+ DES_BLOCK_SIZE, out, STM32_HAL_TIMEOUT);
+ }
+ else {
+ HAL_CRYP_TDESCBC_Decrypt(&hcryp, (byte*)in,
+ DES_BLOCK_SIZE, out, STM32_HAL_TIMEOUT);
+ }
- /* reset registers to their default values */
- CRYP_DeInit();
+ /* store iv for next call */
+ XMEMCPY(des->reg, out + sz - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
- /* set direction, mode, and datatype */
- if (dir == DES_ENCRYPTION) {
- DES3_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt;
- } else {
- DES3_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt;
- }
+ sz -= DES_BLOCK_SIZE;
+ in += DES_BLOCK_SIZE;
+ out += DES_BLOCK_SIZE;
+ }
- DES3_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_TDES_CBC;
- DES3_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
- CRYP_Init(&DES3_CRYP_InitStructure);
+ HAL_CRYP_DeInit(&hcryp);
+ }
+ #else
+ {
+ word32 *dkey1, *dkey2, *dkey3, *iv;
+ CRYP_InitTypeDef DES3_CRYP_InitStructure;
+ CRYP_KeyInitTypeDef DES3_CRYP_KeyInitStructure;
+ CRYP_IVInitTypeDef DES3_CRYP_IVInitStructure;
+
+ dkey1 = des->key[0];
+ dkey2 = des->key[1];
+ dkey3 = des->key[2];
+ iv = des->reg;
+
+ /* crypto structure initialization */
+ CRYP_KeyStructInit(&DES3_CRYP_KeyInitStructure);
+ CRYP_StructInit(&DES3_CRYP_InitStructure);
+ CRYP_IVStructInit(&DES3_CRYP_IVInitStructure);
+
+ /* reset registers to their default values */
+ CRYP_DeInit();
+
+ /* set direction, mode, and datatype */
+ if (dir == DES_ENCRYPTION) {
+ DES3_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt;
+ } else {
+ DES3_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt;
+ }
- /* load key into correct registers */
- DES3_CRYP_KeyInitStructure.CRYP_Key1Left = dkey1[0];
- DES3_CRYP_KeyInitStructure.CRYP_Key1Right = dkey1[1];
- DES3_CRYP_KeyInitStructure.CRYP_Key2Left = dkey2[0];
- DES3_CRYP_KeyInitStructure.CRYP_Key2Right = dkey2[1];
- DES3_CRYP_KeyInitStructure.CRYP_Key3Left = dkey3[0];
- DES3_CRYP_KeyInitStructure.CRYP_Key3Right = dkey3[1];
- CRYP_KeyInit(&DES3_CRYP_KeyInitStructure);
+ DES3_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_TDES_CBC;
+ DES3_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
+ CRYP_Init(&DES3_CRYP_InitStructure);
- /* set iv */
- ByteReverseWords(iv, iv, DES_BLOCK_SIZE);
- DES3_CRYP_IVInitStructure.CRYP_IV0Left = iv[0];
- DES3_CRYP_IVInitStructure.CRYP_IV0Right = iv[1];
- CRYP_IVInit(&DES3_CRYP_IVInitStructure);
+ /* load key into correct registers */
+ DES3_CRYP_KeyInitStructure.CRYP_Key1Left = dkey1[0];
+ DES3_CRYP_KeyInitStructure.CRYP_Key1Right = dkey1[1];
+ DES3_CRYP_KeyInitStructure.CRYP_Key2Left = dkey2[0];
+ DES3_CRYP_KeyInitStructure.CRYP_Key2Right = dkey2[1];
+ DES3_CRYP_KeyInitStructure.CRYP_Key3Left = dkey3[0];
+ DES3_CRYP_KeyInitStructure.CRYP_Key3Right = dkey3[1];
+ CRYP_KeyInit(&DES3_CRYP_KeyInitStructure);
- /* enable crypto processor */
- CRYP_Cmd(ENABLE);
+ /* set iv */
+ ByteReverseWords(iv, iv, DES_BLOCK_SIZE);
+ DES3_CRYP_IVInitStructure.CRYP_IV0Left = iv[0];
+ DES3_CRYP_IVInitStructure.CRYP_IV0Right = iv[1];
+ CRYP_IVInit(&DES3_CRYP_IVInitStructure);
- while (sz > 0)
- {
- /* flush IN/OUT FIFOs */
- CRYP_FIFOFlush();
+ /* enable crypto processor */
+ CRYP_Cmd(ENABLE);
- CRYP_DataIn(*(uint32_t*)&in[0]);
- CRYP_DataIn(*(uint32_t*)&in[4]);
+ while (sz > 0)
+ {
+ /* flush IN/OUT FIFOs */
+ CRYP_FIFOFlush();
- /* wait until the complete message has been processed */
- while(CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
+ CRYP_DataIn(*(uint32_t*)&in[0]);
+ CRYP_DataIn(*(uint32_t*)&in[4]);
- *(uint32_t*)&out[0] = CRYP_DataOut();
- *(uint32_t*)&out[4] = CRYP_DataOut();
+ /* wait until the complete message has been processed */
+ while(CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
- /* store iv for next call */
- XMEMCPY(des->reg, out + sz - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
+ *(uint32_t*)&out[0] = CRYP_DataOut();
+ *(uint32_t*)&out[4] = CRYP_DataOut();
- sz -= DES_BLOCK_SIZE;
- in += DES_BLOCK_SIZE;
- out += DES_BLOCK_SIZE;
- }
+ /* store iv for next call */
+ XMEMCPY(des->reg, out + sz - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
- /* disable crypto processor */
- CRYP_Cmd(DISABLE);
+ sz -= DES_BLOCK_SIZE;
+ in += DES_BLOCK_SIZE;
+ out += DES_BLOCK_SIZE;
+ }
+ /* disable crypto processor */
+ CRYP_Cmd(DISABLE);
+ }
+ #endif /* WOLFSSL_STM32_CUBEMX */
}
int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
@@ -380,238 +476,333 @@ void wc_Des3_FreeCavium(Des3* des3)
#elif defined(HAVE_COLDFIRE_SEC)
-#include <wolfssl/ctaocrypt/types.h>
+ #include <wolfssl/ctaocrypt/types.h>
-#include "sec.h"
-#include "mcf5475_sec.h"
-#include "mcf5475_siu.h"
+ #include "sec.h"
+ #include "mcf5475_sec.h"
+ #include "mcf5475_siu.h"
-#if defined (HAVE_THREADX)
-#include "memory_pools.h"
-extern TX_BYTE_POOL mp_ncached; /* Non Cached memory pool */
-#endif
+ #if defined (HAVE_THREADX)
+ #include "memory_pools.h"
+ extern TX_BYTE_POOL mp_ncached; /* Non Cached memory pool */
+ #endif
-#define DES_BUFFER_SIZE (DES_BLOCK_SIZE * 64)
-static unsigned char *desBuffIn = NULL ;
-static unsigned char *desBuffOut = NULL ;
-static byte *secIV ;
-static byte *secKey ;
-static volatile SECdescriptorType *secDesc ;
+ #define DES_BUFFER_SIZE (DES_BLOCK_SIZE * 64)
+ static unsigned char *desBuffIn = NULL;
+ static unsigned char *desBuffOut = NULL;
+ static byte *secIV;
+ static byte *secKey;
+ static volatile SECdescriptorType *secDesc;
-static wolfSSL_Mutex Mutex_DesSEC ;
+ static wolfSSL_Mutex Mutex_DesSEC;
-#define SEC_DESC_DES_CBC_ENCRYPT 0x20500010
-#define SEC_DESC_DES_CBC_DECRYPT 0x20400010
-#define SEC_DESC_DES3_CBC_ENCRYPT 0x20700010
-#define SEC_DESC_DES3_CBC_DECRYPT 0x20600010
+ #define SEC_DESC_DES_CBC_ENCRYPT 0x20500010
+ #define SEC_DESC_DES_CBC_DECRYPT 0x20400010
+ #define SEC_DESC_DES3_CBC_ENCRYPT 0x20700010
+ #define SEC_DESC_DES3_CBC_DECRYPT 0x20600010
-#define DES_IVLEN 8
-#define DES_KEYLEN 8
-#define DES3_IVLEN 8
-#define DES3_KEYLEN 24
+ #define DES_IVLEN 8
+ #define DES_KEYLEN 8
+ #define DES3_IVLEN 8
+ #define DES3_KEYLEN 24
-extern volatile unsigned char __MBAR[];
+ extern volatile unsigned char __MBAR[];
-static void wc_Des_Cbc(byte* out, const byte* in, word32 sz,
- byte *key, byte *iv, word32 desc)
-{
- #ifdef DEBUG_WOLFSSL
- int ret ; int stat1,stat2 ;
- #endif
- int size ;
- volatile int v ;
-
- LockMutex(&Mutex_DesSEC) ;
-
- secDesc->length1 = 0x0;
- secDesc->pointer1 = NULL;
- if((desc==SEC_DESC_DES_CBC_ENCRYPT)||(desc==SEC_DESC_DES_CBC_DECRYPT)){
- secDesc->length2 = DES_IVLEN ;
- secDesc->length3 = DES_KEYLEN ;
- } else {
- secDesc->length2 = DES3_IVLEN ;
- secDesc->length3 = DES3_KEYLEN ;
- }
- secDesc->pointer2 = secIV ;
- secDesc->pointer3 = secKey;
- secDesc->pointer4 = desBuffIn ;
- secDesc->pointer5 = desBuffOut ;
- secDesc->length6 = 0;
- secDesc->pointer6 = NULL;
- secDesc->length7 = 0x0;
- secDesc->pointer7 = NULL;
- secDesc->nextDescriptorPtr = NULL ;
-
- while(sz) {
- XMEMCPY(secIV, iv, secDesc->length2) ;
- if((sz%DES_BUFFER_SIZE) == sz) {
- size = sz ;
- sz = 0 ;
+ static void wc_Des_Cbc(byte* out, const byte* in, word32 sz,
+ byte *key, byte *iv, word32 desc)
+ {
+ #ifdef DEBUG_WOLFSSL
+ int ret; int stat1,stat2;
+ #endif
+ int size;
+ volatile int v;
+
+ wc_LockMutex(&Mutex_DesSEC) ;
+
+ secDesc->length1 = 0x0;
+ secDesc->pointer1 = NULL;
+ if((desc==SEC_DESC_DES_CBC_ENCRYPT)||(desc==SEC_DESC_DES_CBC_DECRYPT)){
+ secDesc->length2 = DES_IVLEN;
+ secDesc->length3 = DES_KEYLEN;
} else {
- size = DES_BUFFER_SIZE ;
- sz -= DES_BUFFER_SIZE ;
- }
-
- XMEMCPY(desBuffIn, in, size) ;
- XMEMCPY(secKey, key, secDesc->length3) ;
-
- secDesc->header = desc ;
- secDesc->length4 = size;
- secDesc->length5 = size;
- /* Point SEC to the location of the descriptor */
- MCF_SEC_FR0 = (uint32)secDesc;
- /* Initialize SEC and wait for encryption to complete */
- MCF_SEC_CCCR0 = 0x0000001a;
- /* poll SISR to determine when channel is complete */
- v=0 ;
- while((secDesc->header>> 24) != 0xff) {
- if(v++ > 1000)break ;
- }
-
-#ifdef DEBUG_WOLFSSL
- ret = MCF_SEC_SISRH;
- stat1 = MCF_SEC_DSR ;
- stat2 = MCF_SEC_DISR ;
- if(ret & 0xe0000000) {
- /* db_printf("Des_Cbc(%x):ISRH=%08x, DSR=%08x, DISR=%08x\n", desc, ret, stat1, stat2) ; */
+ secDesc->length2 = DES3_IVLEN;
+ secDesc->length3 = DES3_KEYLEN;
}
-#endif
-
- XMEMCPY(out, desBuffOut, size) ;
+ secDesc->pointer2 = secIV;
+ secDesc->pointer3 = secKey;
+ secDesc->pointer4 = desBuffIn;
+ secDesc->pointer5 = desBuffOut;
+ secDesc->length6 = 0;
+ secDesc->pointer6 = NULL;
+ secDesc->length7 = 0x0;
+ secDesc->pointer7 = NULL;
+ secDesc->nextDescriptorPtr = NULL;
+
+ while(sz) {
+ XMEMCPY(secIV, iv, secDesc->length2);
+ if((sz%DES_BUFFER_SIZE) == sz) {
+ size = sz;
+ sz = 0;
+ } else {
+ size = DES_BUFFER_SIZE;
+ sz -= DES_BUFFER_SIZE;
+ }
+
+ XMEMCPY(desBuffIn, in, size);
+ XMEMCPY(secKey, key, secDesc->length3);
+
+ secDesc->header = desc;
+ secDesc->length4 = size;
+ secDesc->length5 = size;
+ /* Point SEC to the location of the descriptor */
+ MCF_SEC_FR0 = (uint32)secDesc;
+ /* Initialize SEC and wait for encryption to complete */
+ MCF_SEC_CCCR0 = 0x0000001a;
+ /* poll SISR to determine when channel is complete */
+ v=0;
+ while((secDesc->header>> 24) != 0xff) {
+ if(v++ > 1000)break;
+ }
+
+ #ifdef DEBUG_WOLFSSL
+ ret = MCF_SEC_SISRH;
+ stat1 = MCF_SEC_DSR;
+ stat2 = MCF_SEC_DISR;
+ if(ret & 0xe0000000) {
+ /* db_printf("Des_Cbc(%x):ISRH=%08x, DSR=%08x, DISR=%08x\n", desc, ret, stat1, stat2); */
+ }
+ #endif
+
+ XMEMCPY(out, desBuffOut, size);
+
+ if ((desc==SEC_DESC_DES3_CBC_ENCRYPT)||(desc==SEC_DESC_DES_CBC_ENCRYPT)) {
+ XMEMCPY((void*)iv, (void*)&(out[size-secDesc->length2]), secDesc->length2);
+ } else {
+ XMEMCPY((void*)iv, (void*)&(in[size-secDesc->length2]), secDesc->length2);
+ }
+
+ in += size;
+ out += size;
- if((desc==SEC_DESC_DES3_CBC_ENCRYPT)||(desc==SEC_DESC_DES_CBC_ENCRYPT)) {
- XMEMCPY((void*)iv, (void*)&(out[size-secDesc->length2]), secDesc->length2) ;
- } else {
- XMEMCPY((void*)iv, (void*)&(in[size-secDesc->length2]), secDesc->length2) ;
}
-
- in += size ;
- out += size ;
-
- }
- UnLockMutex(&Mutex_DesSEC) ;
-
-}
+ wc_UnLockMutex(&Mutex_DesSEC) ;
+ }
-int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
- wc_Des_Cbc(out, in, sz, (byte *)des->key, (byte *)des->reg, SEC_DESC_DES_CBC_ENCRYPT) ;
- return 0;
-}
-int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
- wc_Des_Cbc(out, in, sz, (byte *)des->key, (byte *)des->reg, SEC_DESC_DES_CBC_DECRYPT) ;
- return 0;
-}
+ int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
+ {
+ wc_Des_Cbc(out, in, sz, (byte *)des->key, (byte *)des->reg, SEC_DESC_DES_CBC_ENCRYPT);
+ return 0;
+ }
-int wc_Des3_CbcEncrypt(Des3* des3, byte* out, const byte* in, word32 sz)
-{
- wc_Des_Cbc(out, in, sz, (byte *)des3->key, (byte *)des3->reg, SEC_DESC_DES3_CBC_ENCRYPT) ;
- return 0;
-}
+ int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
+ {
+ wc_Des_Cbc(out, in, sz, (byte *)des->key, (byte *)des->reg, SEC_DESC_DES_CBC_DECRYPT);
+ return 0;
+ }
+ int wc_Des3_CbcEncrypt(Des3* des3, byte* out, const byte* in, word32 sz)
+ {
+ wc_Des_Cbc(out, in, sz, (byte *)des3->key, (byte *)des3->reg, SEC_DESC_DES3_CBC_ENCRYPT);
+ return 0;
+ }
-int wc_Des3_CbcDecrypt(Des3* des3, byte* out, const byte* in, word32 sz)
-{
- wc_Des_Cbc(out, in, sz, (byte *)des3->key, (byte *)des3->reg, SEC_DESC_DES3_CBC_DECRYPT) ;
- return 0;
-}
-static void setParity(byte *buf, int len)
-{
- int i, j ;
- byte v ;
- int bits ;
+ int wc_Des3_CbcDecrypt(Des3* des3, byte* out, const byte* in, word32 sz)
+ {
+ wc_Des_Cbc(out, in, sz, (byte *)des3->key, (byte *)des3->reg, SEC_DESC_DES3_CBC_DECRYPT);
+ return 0;
+ }
- for(i=0; i<len; i++)
+ static void setParity(byte *buf, int len)
{
- v = buf[i] >> 1 ;
- buf[i] = v << 1 ;
- bits = 0 ;
- for(j=0; j<7; j++)
- {
- bits += (v&0x1) ;
- v = v >> 1 ;
+ int i, j;
+ byte v;
+ int bits;
+
+ for (i=0; i<len; i++) {
+ v = buf[i] >> 1;
+ buf[i] = v << 1;
+ bits = 0;
+ for (j=0; j<7; j++) {
+ bits += (v&0x1);
+ v = v >> 1;
+ }
+ buf[i] |= (1 - (bits&0x1));
}
- buf[i] |= (1 - (bits&0x1)) ;
- }
-
-}
+ }
-int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
-{
- if(desBuffIn == NULL) {
+ int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
+ {
+ if(desBuffIn == NULL) {
#if defined (HAVE_THREADX)
- int s1, s2, s3, s4, s5 ;
- s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc,
- sizeof(SECdescriptorType), TX_NO_WAIT);
- s1 = tx_byte_allocate(&mp_ncached,(void *)&desBuffIn, DES_BUFFER_SIZE, TX_NO_WAIT);
- s2 = tx_byte_allocate(&mp_ncached,(void *)&desBuffOut, DES_BUFFER_SIZE, TX_NO_WAIT);
- /* Don't know des or des3 to be used. Allocate larger buffers */
- s3 = tx_byte_allocate(&mp_ncached,(void *)&secKey, DES3_KEYLEN,TX_NO_WAIT);
- s4 = tx_byte_allocate(&mp_ncached,(void *)&secIV, DES3_IVLEN, TX_NO_WAIT);
+ int s1, s2, s3, s4, s5;
+ s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc,
+ sizeof(SECdescriptorType), TX_NO_WAIT);
+ s1 = tx_byte_allocate(&mp_ncached,(void *)&desBuffIn, DES_BUFFER_SIZE, TX_NO_WAIT);
+ s2 = tx_byte_allocate(&mp_ncached,(void *)&desBuffOut, DES_BUFFER_SIZE, TX_NO_WAIT);
+ /* Don't know des or des3 to be used. Allocate larger buffers */
+ s3 = tx_byte_allocate(&mp_ncached,(void *)&secKey, DES3_KEYLEN,TX_NO_WAIT);
+ s4 = tx_byte_allocate(&mp_ncached,(void *)&secIV, DES3_IVLEN, TX_NO_WAIT);
#else
- #warning "Allocate non-Cache buffers"
+ #warning "Allocate non-Cache buffers"
#endif
-
- InitMutex(&Mutex_DesSEC) ;
- }
-
- XMEMCPY(des->key, key, DES_KEYLEN);
- setParity((byte *)des->key, DES_KEYLEN) ;
-
- if (iv) {
- XMEMCPY(des->reg, iv, DES_IVLEN);
- } else {
- XMEMSET(des->reg, 0x0, DES_IVLEN) ;
+
+ InitMutex(&Mutex_DesSEC);
+ }
+
+ XMEMCPY(des->key, key, DES_KEYLEN);
+ setParity((byte *)des->key, DES_KEYLEN);
+
+ if (iv) {
+ XMEMCPY(des->reg, iv, DES_IVLEN);
+ } else {
+ XMEMSET(des->reg, 0x0, DES_IVLEN);
+ }
+ return 0;
}
- return 0;
-}
-int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
-{
-
- if(desBuffIn == NULL) {
+ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
+ {
+ if (des3 == NULL || key == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ if (desBuffIn == NULL) {
#if defined (HAVE_THREADX)
- int s1, s2, s3, s4, s5 ;
- s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc,
- sizeof(SECdescriptorType), TX_NO_WAIT);
- s1 = tx_byte_allocate(&mp_ncached,(void *)&desBuffIn, DES_BUFFER_SIZE, TX_NO_WAIT);
- s2 = tx_byte_allocate(&mp_ncached,(void *)&desBuffOut, DES_BUFFER_SIZE, TX_NO_WAIT);
- s3 = tx_byte_allocate(&mp_ncached,(void *)&secKey, DES3_KEYLEN,TX_NO_WAIT);
- s4 = tx_byte_allocate(&mp_ncached,(void *)&secIV, DES3_IVLEN, TX_NO_WAIT);
+ int s1, s2, s3, s4, s5;
+ s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc,
+ sizeof(SECdescriptorType), TX_NO_WAIT);
+ s1 = tx_byte_allocate(&mp_ncached,(void *)&desBuffIn, DES_BUFFER_SIZE, TX_NO_WAIT);
+ s2 = tx_byte_allocate(&mp_ncached,(void *)&desBuffOut, DES_BUFFER_SIZE, TX_NO_WAIT);
+ s3 = tx_byte_allocate(&mp_ncached,(void *)&secKey, DES3_KEYLEN,TX_NO_WAIT);
+ s4 = tx_byte_allocate(&mp_ncached,(void *)&secIV, DES3_IVLEN, TX_NO_WAIT);
#else
- #warning "Allocate non-Cache buffers"
+ #warning "Allocate non-Cache buffers"
#endif
-
- InitMutex(&Mutex_DesSEC) ;
+
+ InitMutex(&Mutex_DesSEC);
+ }
+
+ XMEMCPY(des3->key[0], key, DES3_KEYLEN);
+ setParity((byte *)des3->key[0], DES3_KEYLEN);
+
+ if (iv) {
+ XMEMCPY(des3->reg, iv, DES3_IVLEN);
+ } else {
+ XMEMSET(des3->reg, 0x0, DES3_IVLEN);
+ }
+ return 0;
+
+ }
+#elif defined(FREESCALE_LTC_DES)
+
+ #include "fsl_ltc.h"
+ int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
+ {
+ byte* dkey;
+
+ if (des == NULL || key == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ dkey = (byte*)des->key;
+
+ XMEMCPY(dkey, key, 8);
+
+ wc_Des_SetIV(des, iv);
+
+ return 0;
+ }
+
+ int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir)
+ {
+ int ret = 0;
+ byte* dkey1 = (byte*)des->key[0];
+ byte* dkey2 = (byte*)des->key[1];
+ byte* dkey3 = (byte*)des->key[2];
+
+ XMEMCPY(dkey1, key, 8); /* set key 1 */
+ XMEMCPY(dkey2, key + 8, 8); /* set key 2 */
+ XMEMCPY(dkey3, key + 16, 8); /* set key 3 */
+
+ ret = wc_Des3_SetIV(des, iv);
+ if (ret != 0)
+ return ret;
+
+ return ret;
}
-
- XMEMCPY(des3->key[0], key, DES3_KEYLEN);
- setParity((byte *)des3->key[0], DES3_KEYLEN) ;
-
- if (iv) {
- XMEMCPY(des3->reg, iv, DES3_IVLEN);
- } else {
- XMEMSET(des3->reg, 0x0, DES3_IVLEN) ;
+
+ int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
+ {
+ status_t status;
+ status = LTC_DES_EncryptCbc(LTC_BASE, in, out, sz, (byte*)des->reg, (byte*)des->key);
+ if (status == kStatus_Success)
+ return 0;
+ else
+ return -1;
}
- return 0;
-}
+ int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
+ {
+ status_t status;
+ status = LTC_DES_DecryptCbc(LTC_BASE, in, out, sz, (byte*)des->reg, (byte*)des->key);
+ if (status == kStatus_Success)
+ return 0;
+ else
+ return -1;
+ }
+
+ int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
+ {
+ status_t status;
+ status = LTC_DES3_EncryptCbc(LTC_BASE,
+ in,
+ out,
+ sz,
+ (byte*)des->reg,
+ (byte*)des->key[0],
+ (byte*)des->key[1],
+ (byte*)des->key[2]);
+ if (status == kStatus_Success)
+ return 0;
+ else
+ return -1;
+ }
+
+ int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz)
+ {
+ status_t status;
+ status = LTC_DES3_DecryptCbc(LTC_BASE,
+ in,
+ out,
+ sz,
+ (byte*)des->reg,
+ (byte*)des->key[0],
+ (byte*)des->key[1],
+ (byte*)des->key[2]);
+ if (status == kStatus_Success)
+ return 0;
+ else
+ return -1;
-#elif defined FREESCALE_MMCAU
+ }
+
+#elif defined(FREESCALE_MMCAU)
/*
* Freescale mmCAU hardware DES/3DES support through the CAU/mmCAU library.
* Documentation located in ColdFire/ColdFire+ CAU and Kinetis mmCAU
* Software Library User Guide (See note in README).
*/
- #include "cau_api.h"
+ #ifdef FREESCALE_MMCAU_CLASSIC
+ #include "cau_api.h"
+ #else
+ #include "fsl_mmcau.h"
+ #endif
- const unsigned char parityLookup[128] =
- {
+ const unsigned char parityLookup[128] = {
1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
@@ -621,7 +812,14 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
{
int i = 0;
- byte* dkey = (byte*)des->key;
+ byte* dkey;
+
+
+ if (des == NULL || key == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ dkey = (byte*)des->key;
XMEMCPY(dkey, key, 8);
@@ -668,15 +866,18 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
int i;
int offset = 0;
int len = sz;
+ int ret = 0;
byte *iv;
byte temp_block[DES_BLOCK_SIZE];
iv = (byte*)des->reg;
+ #ifdef FREESCALE_MMCAU_CLASSIC
if ((wolfssl_word)out % WOLFSSL_MMCAU_ALIGNMENT) {
- WOLFSSL_MSG("Bad cau_des_encrypt alignment");
+ WOLFSSL_MSG("Bad cau_des_encrypt alignment");
return BAD_ALIGN_E;
}
+ #endif
while (len > 0)
{
@@ -686,7 +887,16 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
for (i = 0; i < DES_BLOCK_SIZE; i++)
temp_block[i] ^= iv[i];
+ ret = wolfSSL_CryptHwMutexLock();
+ if(ret != 0) {
+ return ret;
+ }
+ #ifdef FREESCALE_MMCAU_CLASSIC
cau_des_encrypt(temp_block, (byte*)des->key, out + offset);
+ #else
+ MMCAU_DES_EncryptEcb(temp_block, (byte*)des->key, out + offset);
+ #endif
+ wolfSSL_CryptHwMutexUnLock();
len -= DES_BLOCK_SIZE;
offset += DES_BLOCK_SIZE;
@@ -695,7 +905,7 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
XMEMCPY(iv, out + offset - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
}
- return 0;
+ return ret;
}
int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
@@ -703,21 +913,34 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
int i;
int offset = 0;
int len = sz;
+ int ret = 0;
byte* iv;
byte temp_block[DES_BLOCK_SIZE];
iv = (byte*)des->reg;
+ #ifdef FREESCALE_MMCAU_CLASSIC
if ((wolfssl_word)out % WOLFSSL_MMCAU_ALIGNMENT) {
- WOLFSSL_MSG("Bad cau_des_decrypt alignment");
+ WOLFSSL_MSG("Bad cau_des_decrypt alignment");
return BAD_ALIGN_E;
}
+ #endif
while (len > 0)
{
XMEMCPY(temp_block, in + offset, DES_BLOCK_SIZE);
+ ret = wolfSSL_CryptHwMutexLock();
+ if(ret != 0) {
+ return ret;
+ }
+
+ #ifdef FREESCALE_MMCAU_CLASSIC
cau_des_decrypt(in + offset, (byte*)des->key, out + offset);
+ #else
+ MMCAU_DES_DecryptEcb(in + offset, (byte*)des->key, out + offset);
+ #endif
+ wolfSSL_CryptHwMutexUnLock();
/* XOR block with IV for CBC */
for (i = 0; i < DES_BLOCK_SIZE; i++)
@@ -730,7 +953,7 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
offset += DES_BLOCK_SIZE;
}
- return 0;
+ return ret;
}
int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
@@ -738,16 +961,19 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
int i;
int offset = 0;
int len = sz;
+ int ret = 0;
byte *iv;
byte temp_block[DES_BLOCK_SIZE];
iv = (byte*)des->reg;
+ #ifdef FREESCALE_MMCAU_CLASSIC
if ((wolfssl_word)out % WOLFSSL_MMCAU_ALIGNMENT) {
- WOLFSSL_MSG("Bad 3ede cau_des_encrypt alignment");
+ WOLFSSL_MSG("Bad 3ede cau_des_encrypt alignment");
return BAD_ALIGN_E;
}
+ #endif
while (len > 0)
{
@@ -757,9 +983,20 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
for (i = 0; i < DES_BLOCK_SIZE; i++)
temp_block[i] ^= iv[i];
- cau_des_encrypt(temp_block , (byte*)des->key[0], out + offset);
+ ret = wolfSSL_CryptHwMutexLock();
+ if(ret != 0) {
+ return ret;
+ }
+ #ifdef FREESCALE_MMCAU_CLASSIC
+ cau_des_encrypt(temp_block, (byte*)des->key[0], out + offset);
cau_des_decrypt(out + offset, (byte*)des->key[1], out + offset);
cau_des_encrypt(out + offset, (byte*)des->key[2], out + offset);
+ #else
+ MMCAU_DES_EncryptEcb(temp_block , (byte*)des->key[0], out + offset);
+ MMCAU_DES_DecryptEcb(out + offset, (byte*)des->key[1], out + offset);
+ MMCAU_DES_EncryptEcb(out + offset, (byte*)des->key[2], out + offset);
+ #endif
+ wolfSSL_CryptHwMutexUnLock();
len -= DES_BLOCK_SIZE;
offset += DES_BLOCK_SIZE;
@@ -768,7 +1005,7 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
XMEMCPY(iv, out + offset - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
}
- return 0;
+ return ret;
}
int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz)
@@ -776,24 +1013,38 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
int i;
int offset = 0;
int len = sz;
+ int ret = 0;
byte* iv;
byte temp_block[DES_BLOCK_SIZE];
iv = (byte*)des->reg;
+ #ifdef FREESCALE_MMCAU_CLASSIC
if ((wolfssl_word)out % WOLFSSL_MMCAU_ALIGNMENT) {
- WOLFSSL_MSG("Bad 3ede cau_des_decrypt alignment");
+ WOLFSSL_MSG("Bad 3ede cau_des_decrypt alignment");
return BAD_ALIGN_E;
}
+ #endif
while (len > 0)
{
XMEMCPY(temp_block, in + offset, DES_BLOCK_SIZE);
- cau_des_decrypt(in + offset , (byte*)des->key[2], out + offset);
+ ret = wolfSSL_CryptHwMutexLock();
+ if(ret != 0) {
+ return ret;
+ }
+ #ifdef FREESCALE_MMCAU_CLASSIC
+ cau_des_decrypt(in + offset, (byte*)des->key[2], out + offset);
cau_des_encrypt(out + offset, (byte*)des->key[1], out + offset);
cau_des_decrypt(out + offset, (byte*)des->key[0], out + offset);
+ #else
+ MMCAU_DES_DecryptEcb(in + offset , (byte*)des->key[2], out + offset);
+ MMCAU_DES_EncryptEcb(out + offset, (byte*)des->key[1], out + offset);
+ MMCAU_DES_DecryptEcb(out + offset, (byte*)des->key[0], out + offset);
+ #endif
+ wolfSSL_CryptHwMutexUnLock();
/* XOR block with IV for CBC */
for (i = 0; i < DES_BLOCK_SIZE; i++)
@@ -806,680 +1057,707 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
offset += DES_BLOCK_SIZE;
}
- return 0;
+ return ret;
}
#elif defined(WOLFSSL_PIC32MZ_CRYPT)
- #include "wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h"
-
-void wc_Des_SetIV(Des* des, const byte* iv);
-int wc_Des3_SetIV(Des3* des, const byte* iv);
+ /* PIC32MZ DES hardware requires size multiple of block size */
+ #include <wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h>
int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
{
- word32 *dkey = des->key ;
- word32 *dreg = des->reg ;
+ if (des == NULL || key == NULL || iv == NULL)
+ return BAD_FUNC_ARG;
- XMEMCPY((byte *)dkey, (byte *)key, 8);
- ByteReverseWords(dkey, dkey, 8);
- XMEMCPY((byte *)dreg, (byte *)iv, 8);
- ByteReverseWords(dreg, dreg, 8);
+ XMEMCPY(des->key, key, DES_KEYLEN);
+ XMEMCPY(des->reg, iv, DES_IVLEN);
return 0;
}
int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir)
{
- word32 *dkey1 = des->key[0];
- word32 *dreg = des->reg ;
+ if (des == NULL || key == NULL || iv == NULL)
+ return BAD_FUNC_ARG;
- XMEMCPY(dkey1, key, 24);
- ByteReverseWords(dkey1, dkey1, 24);
- XMEMCPY(dreg, iv, 8);
- ByteReverseWords(dreg, dreg, 8) ;
+ XMEMCPY(des->key[0], key, DES3_KEYLEN);
+ XMEMCPY(des->reg, iv, DES3_IVLEN);
return 0;
}
- void DesCrypt(word32 *key, word32 *iv, byte* out, const byte* in, word32 sz,
- int dir, int algo, int cryptoalgo)
+ int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
{
- securityAssociation *sa_p ;
- bufferDescriptor *bd_p ;
- const byte *in_p, *in_l ;
- byte *out_p, *out_l ;
- volatile securityAssociation sa __attribute__((aligned (8)));
- volatile bufferDescriptor bd __attribute__((aligned (8)));
- volatile int k ;
-
- /* get uncached address */
-
- in_l = in;
- out_l = out ;
- sa_p = KVA0_TO_KVA1(&sa) ;
- bd_p = KVA0_TO_KVA1(&bd) ;
- in_p = KVA0_TO_KVA1(in_l) ;
- out_p= KVA0_TO_KVA1(out_l);
-
- if(PIC32MZ_IF_RAM(in_p))
- XMEMCPY((void *)in_p, (void *)in, sz);
- XMEMSET((void *)out_p, 0, sz);
-
- /* Set up the Security Association */
- XMEMSET((byte *)KVA0_TO_KVA1(&sa), 0, sizeof(sa));
- sa_p->SA_CTRL.ALGO = algo ;
- sa_p->SA_CTRL.LNC = 1;
- sa_p->SA_CTRL.LOADIV = 1;
- sa_p->SA_CTRL.FB = 1;
- sa_p->SA_CTRL.ENCTYPE = dir ; /* Encryption/Decryption */
- sa_p->SA_CTRL.CRYPTOALGO = cryptoalgo;
- sa_p->SA_CTRL.KEYSIZE = 1 ; /* KEY is 192 bits */
- XMEMCPY((byte *)KVA0_TO_KVA1(&sa.SA_ENCKEY[algo==PIC32_ALGO_TDES ? 2 : 6]),
- (byte *)key, algo==PIC32_ALGO_TDES ? 24 : 8);
- XMEMCPY((byte *)KVA0_TO_KVA1(&sa.SA_ENCIV[2]), (byte *)iv, 8);
-
- XMEMSET((byte *)KVA0_TO_KVA1(&bd), 0, sizeof(bd));
- /* Set up the Buffer Descriptor */
- bd_p->BD_CTRL.BUFLEN = sz;
- bd_p->BD_CTRL.LIFM = 1;
- bd_p->BD_CTRL.SA_FETCH_EN = 1;
- bd_p->BD_CTRL.LAST_BD = 1;
- bd_p->BD_CTRL.DESC_EN = 1;
-
- bd_p->SA_ADDR = (unsigned int)KVA_TO_PA(&sa) ; /* (unsigned int)sa_p; */
- bd_p->SRCADDR = (unsigned int)KVA_TO_PA(in) ; /* (unsigned int)in_p; */
- bd_p->DSTADDR = (unsigned int)KVA_TO_PA(out); /* (unsigned int)out_p; */
- bd_p->NXTPTR = (unsigned int)KVA_TO_PA(&bd);
- bd_p->MSGLEN = sz ;
-
- /* Fire in the hole! */
- CECON = 1 << 6;
- while (CECON);
-
- /* Run the engine */
- CEBDPADDR = (unsigned int)KVA_TO_PA(&bd) ; /* (unsigned int)bd_p ; */
- CEINTEN = 0x07;
- CECON = 0x27;
-
- WAIT_ENGINE ;
-
- if((cryptoalgo == PIC32_CRYPTOALGO_CBC) ||
- (cryptoalgo == PIC32_CRYPTOALGO_TCBC)||
- (cryptoalgo == PIC32_CRYPTOALGO_RCBC)) {
- /* set iv for the next call */
- if(dir == PIC32_ENCRYPTION) {
- XMEMCPY((void *)iv, (void*)&(out_p[sz-DES_IVLEN]), DES_IVLEN) ;
- } else {
- ByteReverseWords((word32*)iv, (word32 *)&(in_p[sz-DES_IVLEN]),
- DES_IVLEN);
- }
+ word32 blocks = sz / DES_BLOCK_SIZE;
- }
+ if (des == NULL || out == NULL || in == NULL)
+ return BAD_FUNC_ARG;
- ByteReverseWords((word32*)out, (word32 *)KVA0_TO_KVA1(out), sz);
- }
-
- int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
- {
- DesCrypt(des->key, des->reg, out, in, sz,
- PIC32_ENCRYPTION, PIC32_ALGO_DES, PIC32_CRYPTOALGO_CBC );
- return 0;
+ return wc_Pic32DesCrypt(des->key, DES_KEYLEN, des->reg, DES_IVLEN,
+ out, in, (blocks * DES_BLOCK_SIZE),
+ PIC32_ENCRYPTION, PIC32_ALGO_DES, PIC32_CRYPTOALGO_CBC);
}
int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
{
- DesCrypt(des->key, des->reg, out, in, sz,
- PIC32_DECRYPTION, PIC32_ALGO_DES, PIC32_CRYPTOALGO_CBC);
- return 0;
+ word32 blocks = sz / DES_BLOCK_SIZE;
+
+ if (des == NULL || out == NULL || in == NULL)
+ return BAD_FUNC_ARG;
+
+ return wc_Pic32DesCrypt(des->key, DES_KEYLEN, des->reg, DES_IVLEN,
+ out, in, (blocks * DES_BLOCK_SIZE),
+ PIC32_DECRYPTION, PIC32_ALGO_DES, PIC32_CRYPTOALGO_CBC);
}
int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
{
- DesCrypt(des->key[0], des->reg, out, in, sz,
- PIC32_ENCRYPTION, PIC32_ALGO_TDES, PIC32_CRYPTOALGO_TCBC);
- return 0;
+ word32 blocks = sz / DES_BLOCK_SIZE;
+
+ if (des == NULL || out == NULL || in == NULL)
+ return BAD_FUNC_ARG;
+
+ return wc_Pic32DesCrypt(des->key[0], DES3_KEYLEN, des->reg, DES3_IVLEN,
+ out, in, (blocks * DES_BLOCK_SIZE),
+ PIC32_ENCRYPTION, PIC32_ALGO_TDES, PIC32_CRYPTOALGO_TCBC);
}
int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz)
{
- DesCrypt(des->key[0], des->reg, out, in, sz,
- PIC32_DECRYPTION, PIC32_ALGO_TDES, PIC32_CRYPTOALGO_TCBC);
- return 0;
+ word32 blocks = sz / DES_BLOCK_SIZE;
+
+ if (des == NULL || out == NULL || in == NULL)
+ return BAD_FUNC_ARG;
+
+ return wc_Pic32DesCrypt(des->key[0], DES3_KEYLEN, des->reg, DES3_IVLEN,
+ out, in, (blocks * DES_BLOCK_SIZE),
+ PIC32_DECRYPTION, PIC32_ALGO_TDES, PIC32_CRYPTOALGO_TCBC);
}
-
-#else /* CTaoCrypt software implementation */
-
-/* permuted choice table (key) */
-static const byte pc1[] = {
- 57, 49, 41, 33, 25, 17, 9,
- 1, 58, 50, 42, 34, 26, 18,
- 10, 2, 59, 51, 43, 35, 27,
- 19, 11, 3, 60, 52, 44, 36,
-
- 63, 55, 47, 39, 31, 23, 15,
- 7, 62, 54, 46, 38, 30, 22,
- 14, 6, 61, 53, 45, 37, 29,
- 21, 13, 5, 28, 20, 12, 4
-};
-
-/* number left rotations of pc1 */
-static const byte totrot[] = {
- 1,2,4,6,8,10,12,14,15,17,19,21,23,25,27,28
-};
-
-/* permuted choice key (table) */
-static const byte pc2[] = {
- 14, 17, 11, 24, 1, 5,
- 3, 28, 15, 6, 21, 10,
- 23, 19, 12, 4, 26, 8,
- 16, 7, 27, 20, 13, 2,
- 41, 52, 31, 37, 47, 55,
- 30, 40, 51, 45, 33, 48,
- 44, 49, 39, 56, 34, 53,
- 46, 42, 50, 36, 29, 32
-};
-
-/* End of DES-defined tables */
-
-/* bit 0 is left-most in byte */
-static const int bytebit[] = {
- 0200,0100,040,020,010,04,02,01
-};
-
-static const word32 Spbox[8][64] = {
-{
-0x01010400,0x00000000,0x00010000,0x01010404,
-0x01010004,0x00010404,0x00000004,0x00010000,
-0x00000400,0x01010400,0x01010404,0x00000400,
-0x01000404,0x01010004,0x01000000,0x00000004,
-0x00000404,0x01000400,0x01000400,0x00010400,
-0x00010400,0x01010000,0x01010000,0x01000404,
-0x00010004,0x01000004,0x01000004,0x00010004,
-0x00000000,0x00000404,0x00010404,0x01000000,
-0x00010000,0x01010404,0x00000004,0x01010000,
-0x01010400,0x01000000,0x01000000,0x00000400,
-0x01010004,0x00010000,0x00010400,0x01000004,
-0x00000400,0x00000004,0x01000404,0x00010404,
-0x01010404,0x00010004,0x01010000,0x01000404,
-0x01000004,0x00000404,0x00010404,0x01010400,
-0x00000404,0x01000400,0x01000400,0x00000000,
-0x00010004,0x00010400,0x00000000,0x01010004},
-{
-0x80108020,0x80008000,0x00008000,0x00108020,
-0x00100000,0x00000020,0x80100020,0x80008020,
-0x80000020,0x80108020,0x80108000,0x80000000,
-0x80008000,0x00100000,0x00000020,0x80100020,
-0x00108000,0x00100020,0x80008020,0x00000000,
-0x80000000,0x00008000,0x00108020,0x80100000,
-0x00100020,0x80000020,0x00000000,0x00108000,
-0x00008020,0x80108000,0x80100000,0x00008020,
-0x00000000,0x00108020,0x80100020,0x00100000,
-0x80008020,0x80100000,0x80108000,0x00008000,
-0x80100000,0x80008000,0x00000020,0x80108020,
-0x00108020,0x00000020,0x00008000,0x80000000,
-0x00008020,0x80108000,0x00100000,0x80000020,
-0x00100020,0x80008020,0x80000020,0x00100020,
-0x00108000,0x00000000,0x80008000,0x00008020,
-0x80000000,0x80100020,0x80108020,0x00108000},
-{
-0x00000208,0x08020200,0x00000000,0x08020008,
-0x08000200,0x00000000,0x00020208,0x08000200,
-0x00020008,0x08000008,0x08000008,0x00020000,
-0x08020208,0x00020008,0x08020000,0x00000208,
-0x08000000,0x00000008,0x08020200,0x00000200,
-0x00020200,0x08020000,0x08020008,0x00020208,
-0x08000208,0x00020200,0x00020000,0x08000208,
-0x00000008,0x08020208,0x00000200,0x08000000,
-0x08020200,0x08000000,0x00020008,0x00000208,
-0x00020000,0x08020200,0x08000200,0x00000000,
-0x00000200,0x00020008,0x08020208,0x08000200,
-0x08000008,0x00000200,0x00000000,0x08020008,
-0x08000208,0x00020000,0x08000000,0x08020208,
-0x00000008,0x00020208,0x00020200,0x08000008,
-0x08020000,0x08000208,0x00000208,0x08020000,
-0x00020208,0x00000008,0x08020008,0x00020200},
-{
-0x00802001,0x00002081,0x00002081,0x00000080,
-0x00802080,0x00800081,0x00800001,0x00002001,
-0x00000000,0x00802000,0x00802000,0x00802081,
-0x00000081,0x00000000,0x00800080,0x00800001,
-0x00000001,0x00002000,0x00800000,0x00802001,
-0x00000080,0x00800000,0x00002001,0x00002080,
-0x00800081,0x00000001,0x00002080,0x00800080,
-0x00002000,0x00802080,0x00802081,0x00000081,
-0x00800080,0x00800001,0x00802000,0x00802081,
-0x00000081,0x00000000,0x00000000,0x00802000,
-0x00002080,0x00800080,0x00800081,0x00000001,
-0x00802001,0x00002081,0x00002081,0x00000080,
-0x00802081,0x00000081,0x00000001,0x00002000,
-0x00800001,0x00002001,0x00802080,0x00800081,
-0x00002001,0x00002080,0x00800000,0x00802001,
-0x00000080,0x00800000,0x00002000,0x00802080},
-{
-0x00000100,0x02080100,0x02080000,0x42000100,
-0x00080000,0x00000100,0x40000000,0x02080000,
-0x40080100,0x00080000,0x02000100,0x40080100,
-0x42000100,0x42080000,0x00080100,0x40000000,
-0x02000000,0x40080000,0x40080000,0x00000000,
-0x40000100,0x42080100,0x42080100,0x02000100,
-0x42080000,0x40000100,0x00000000,0x42000000,
-0x02080100,0x02000000,0x42000000,0x00080100,
-0x00080000,0x42000100,0x00000100,0x02000000,
-0x40000000,0x02080000,0x42000100,0x40080100,
-0x02000100,0x40000000,0x42080000,0x02080100,
-0x40080100,0x00000100,0x02000000,0x42080000,
-0x42080100,0x00080100,0x42000000,0x42080100,
-0x02080000,0x00000000,0x40080000,0x42000000,
-0x00080100,0x02000100,0x40000100,0x00080000,
-0x00000000,0x40080000,0x02080100,0x40000100},
-{
-0x20000010,0x20400000,0x00004000,0x20404010,
-0x20400000,0x00000010,0x20404010,0x00400000,
-0x20004000,0x00404010,0x00400000,0x20000010,
-0x00400010,0x20004000,0x20000000,0x00004010,
-0x00000000,0x00400010,0x20004010,0x00004000,
-0x00404000,0x20004010,0x00000010,0x20400010,
-0x20400010,0x00000000,0x00404010,0x20404000,
-0x00004010,0x00404000,0x20404000,0x20000000,
-0x20004000,0x00000010,0x20400010,0x00404000,
-0x20404010,0x00400000,0x00004010,0x20000010,
-0x00400000,0x20004000,0x20000000,0x00004010,
-0x20000010,0x20404010,0x00404000,0x20400000,
-0x00404010,0x20404000,0x00000000,0x20400010,
-0x00000010,0x00004000,0x20400000,0x00404010,
-0x00004000,0x00400010,0x20004010,0x00000000,
-0x20404000,0x20000000,0x00400010,0x20004010},
-{
-0x00200000,0x04200002,0x04000802,0x00000000,
-0x00000800,0x04000802,0x00200802,0x04200800,
-0x04200802,0x00200000,0x00000000,0x04000002,
-0x00000002,0x04000000,0x04200002,0x00000802,
-0x04000800,0x00200802,0x00200002,0x04000800,
-0x04000002,0x04200000,0x04200800,0x00200002,
-0x04200000,0x00000800,0x00000802,0x04200802,
-0x00200800,0x00000002,0x04000000,0x00200800,
-0x04000000,0x00200800,0x00200000,0x04000802,
-0x04000802,0x04200002,0x04200002,0x00000002,
-0x00200002,0x04000000,0x04000800,0x00200000,
-0x04200800,0x00000802,0x00200802,0x04200800,
-0x00000802,0x04000002,0x04200802,0x04200000,
-0x00200800,0x00000000,0x00000002,0x04200802,
-0x00000000,0x00200802,0x04200000,0x00000800,
-0x04000002,0x04000800,0x00000800,0x00200002},
-{
-0x10001040,0x00001000,0x00040000,0x10041040,
-0x10000000,0x10001040,0x00000040,0x10000000,
-0x00040040,0x10040000,0x10041040,0x00041000,
-0x10041000,0x00041040,0x00001000,0x00000040,
-0x10040000,0x10000040,0x10001000,0x00001040,
-0x00041000,0x00040040,0x10040040,0x10041000,
-0x00001040,0x00000000,0x00000000,0x10040040,
-0x10000040,0x10001000,0x00041040,0x00040000,
-0x00041040,0x00040000,0x10041000,0x00001000,
-0x00000040,0x10040040,0x00001000,0x00041040,
-0x10001000,0x00000040,0x10000040,0x10040000,
-0x10040040,0x10000000,0x00040000,0x10001040,
-0x00000000,0x10041040,0x00040040,0x10000040,
-0x10040000,0x10001000,0x10001040,0x00000000,
-0x10041040,0x00041000,0x00041000,0x00001040,
-0x00001040,0x00040040,0x10000000,0x10041000}
-};
-
-
-static INLINE void IPERM(word32* left, word32* right)
-{
- word32 work;
- *right = rotlFixed(*right, 4U);
- work = (*left ^ *right) & 0xf0f0f0f0;
- *left ^= work;
+ #ifdef WOLFSSL_DES_ECB
+ int wc_Des_EcbEncrypt(Des* des, byte* out, const byte* in, word32 sz)
+ {
+ word32 blocks = sz / DES_BLOCK_SIZE;
- *right = rotrFixed(*right^work, 20U);
- work = (*left ^ *right) & 0xffff0000;
- *left ^= work;
+ if (des == NULL || out == NULL || in == NULL)
+ return BAD_FUNC_ARG;
- *right = rotrFixed(*right^work, 18U);
- work = (*left ^ *right) & 0x33333333;
- *left ^= work;
+ return wc_Pic32DesCrypt(des->key, DES_KEYLEN, des->reg, DES_IVLEN,
+ out, in, (blocks * DES_BLOCK_SIZE),
+ PIC32_ENCRYPTION, PIC32_ALGO_DES, PIC32_CRYPTOALGO_ECB);
+ }
- *right = rotrFixed(*right^work, 6U);
- work = (*left ^ *right) & 0x00ff00ff;
- *left ^= work;
+ int wc_Des3_EcbEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
+ {
+ word32 blocks = sz / DES_BLOCK_SIZE;
- *right = rotlFixed(*right^work, 9U);
- work = (*left ^ *right) & 0xaaaaaaaa;
- *left = rotlFixed(*left^work, 1U);
- *right ^= work;
-}
+ if (des == NULL || out == NULL || in == NULL)
+ return BAD_FUNC_ARG;
+ return wc_Pic32DesCrypt(des->key[0], DES3_KEYLEN, des->reg, DES3_IVLEN,
+ out, in, (blocks * DES_BLOCK_SIZE),
+ PIC32_ENCRYPTION, PIC32_ALGO_TDES, PIC32_CRYPTOALGO_TECB);
+ }
+ #endif /* WOLFSSL_DES_ECB */
-static INLINE void FPERM(word32* left, word32* right)
-{
- word32 work;
+#else
+ #define NEED_SOFT_DES
- *right = rotrFixed(*right, 1U);
- work = (*left ^ *right) & 0xaaaaaaaa;
- *right ^= work;
+#endif
- *left = rotrFixed(*left^work, 9U);
- work = (*left ^ *right) & 0x00ff00ff;
- *right ^= work;
- *left = rotlFixed(*left^work, 6U);
- work = (*left ^ *right) & 0x33333333;
- *right ^= work;
+#ifdef NEED_SOFT_DES
+
+ /* permuted choice table (key) */
+ static const byte pc1[] = {
+ 57, 49, 41, 33, 25, 17, 9,
+ 1, 58, 50, 42, 34, 26, 18,
+ 10, 2, 59, 51, 43, 35, 27,
+ 19, 11, 3, 60, 52, 44, 36,
+
+ 63, 55, 47, 39, 31, 23, 15,
+ 7, 62, 54, 46, 38, 30, 22,
+ 14, 6, 61, 53, 45, 37, 29,
+ 21, 13, 5, 28, 20, 12, 4
+ };
+
+ /* number left rotations of pc1 */
+ static const byte totrot[] = {
+ 1,2,4,6,8,10,12,14,15,17,19,21,23,25,27,28
+ };
+
+ /* permuted choice key (table) */
+ static const byte pc2[] = {
+ 14, 17, 11, 24, 1, 5,
+ 3, 28, 15, 6, 21, 10,
+ 23, 19, 12, 4, 26, 8,
+ 16, 7, 27, 20, 13, 2,
+ 41, 52, 31, 37, 47, 55,
+ 30, 40, 51, 45, 33, 48,
+ 44, 49, 39, 56, 34, 53,
+ 46, 42, 50, 36, 29, 32
+ };
+
+ /* End of DES-defined tables */
+
+ /* bit 0 is left-most in byte */
+ static const int bytebit[] = {
+ 0200,0100,040,020,010,04,02,01
+ };
+
+ static const word32 Spbox[8][64] = {
+ { 0x01010400,0x00000000,0x00010000,0x01010404,
+ 0x01010004,0x00010404,0x00000004,0x00010000,
+ 0x00000400,0x01010400,0x01010404,0x00000400,
+ 0x01000404,0x01010004,0x01000000,0x00000004,
+ 0x00000404,0x01000400,0x01000400,0x00010400,
+ 0x00010400,0x01010000,0x01010000,0x01000404,
+ 0x00010004,0x01000004,0x01000004,0x00010004,
+ 0x00000000,0x00000404,0x00010404,0x01000000,
+ 0x00010000,0x01010404,0x00000004,0x01010000,
+ 0x01010400,0x01000000,0x01000000,0x00000400,
+ 0x01010004,0x00010000,0x00010400,0x01000004,
+ 0x00000400,0x00000004,0x01000404,0x00010404,
+ 0x01010404,0x00010004,0x01010000,0x01000404,
+ 0x01000004,0x00000404,0x00010404,0x01010400,
+ 0x00000404,0x01000400,0x01000400,0x00000000,
+ 0x00010004,0x00010400,0x00000000,0x01010004},
+ { 0x80108020,0x80008000,0x00008000,0x00108020,
+ 0x00100000,0x00000020,0x80100020,0x80008020,
+ 0x80000020,0x80108020,0x80108000,0x80000000,
+ 0x80008000,0x00100000,0x00000020,0x80100020,
+ 0x00108000,0x00100020,0x80008020,0x00000000,
+ 0x80000000,0x00008000,0x00108020,0x80100000,
+ 0x00100020,0x80000020,0x00000000,0x00108000,
+ 0x00008020,0x80108000,0x80100000,0x00008020,
+ 0x00000000,0x00108020,0x80100020,0x00100000,
+ 0x80008020,0x80100000,0x80108000,0x00008000,
+ 0x80100000,0x80008000,0x00000020,0x80108020,
+ 0x00108020,0x00000020,0x00008000,0x80000000,
+ 0x00008020,0x80108000,0x00100000,0x80000020,
+ 0x00100020,0x80008020,0x80000020,0x00100020,
+ 0x00108000,0x00000000,0x80008000,0x00008020,
+ 0x80000000,0x80100020,0x80108020,0x00108000},
+ { 0x00000208,0x08020200,0x00000000,0x08020008,
+ 0x08000200,0x00000000,0x00020208,0x08000200,
+ 0x00020008,0x08000008,0x08000008,0x00020000,
+ 0x08020208,0x00020008,0x08020000,0x00000208,
+ 0x08000000,0x00000008,0x08020200,0x00000200,
+ 0x00020200,0x08020000,0x08020008,0x00020208,
+ 0x08000208,0x00020200,0x00020000,0x08000208,
+ 0x00000008,0x08020208,0x00000200,0x08000000,
+ 0x08020200,0x08000000,0x00020008,0x00000208,
+ 0x00020000,0x08020200,0x08000200,0x00000000,
+ 0x00000200,0x00020008,0x08020208,0x08000200,
+ 0x08000008,0x00000200,0x00000000,0x08020008,
+ 0x08000208,0x00020000,0x08000000,0x08020208,
+ 0x00000008,0x00020208,0x00020200,0x08000008,
+ 0x08020000,0x08000208,0x00000208,0x08020000,
+ 0x00020208,0x00000008,0x08020008,0x00020200},
+ { 0x00802001,0x00002081,0x00002081,0x00000080,
+ 0x00802080,0x00800081,0x00800001,0x00002001,
+ 0x00000000,0x00802000,0x00802000,0x00802081,
+ 0x00000081,0x00000000,0x00800080,0x00800001,
+ 0x00000001,0x00002000,0x00800000,0x00802001,
+ 0x00000080,0x00800000,0x00002001,0x00002080,
+ 0x00800081,0x00000001,0x00002080,0x00800080,
+ 0x00002000,0x00802080,0x00802081,0x00000081,
+ 0x00800080,0x00800001,0x00802000,0x00802081,
+ 0x00000081,0x00000000,0x00000000,0x00802000,
+ 0x00002080,0x00800080,0x00800081,0x00000001,
+ 0x00802001,0x00002081,0x00002081,0x00000080,
+ 0x00802081,0x00000081,0x00000001,0x00002000,
+ 0x00800001,0x00002001,0x00802080,0x00800081,
+ 0x00002001,0x00002080,0x00800000,0x00802001,
+ 0x00000080,0x00800000,0x00002000,0x00802080},
+ { 0x00000100,0x02080100,0x02080000,0x42000100,
+ 0x00080000,0x00000100,0x40000000,0x02080000,
+ 0x40080100,0x00080000,0x02000100,0x40080100,
+ 0x42000100,0x42080000,0x00080100,0x40000000,
+ 0x02000000,0x40080000,0x40080000,0x00000000,
+ 0x40000100,0x42080100,0x42080100,0x02000100,
+ 0x42080000,0x40000100,0x00000000,0x42000000,
+ 0x02080100,0x02000000,0x42000000,0x00080100,
+ 0x00080000,0x42000100,0x00000100,0x02000000,
+ 0x40000000,0x02080000,0x42000100,0x40080100,
+ 0x02000100,0x40000000,0x42080000,0x02080100,
+ 0x40080100,0x00000100,0x02000000,0x42080000,
+ 0x42080100,0x00080100,0x42000000,0x42080100,
+ 0x02080000,0x00000000,0x40080000,0x42000000,
+ 0x00080100,0x02000100,0x40000100,0x00080000,
+ 0x00000000,0x40080000,0x02080100,0x40000100},
+ { 0x20000010,0x20400000,0x00004000,0x20404010,
+ 0x20400000,0x00000010,0x20404010,0x00400000,
+ 0x20004000,0x00404010,0x00400000,0x20000010,
+ 0x00400010,0x20004000,0x20000000,0x00004010,
+ 0x00000000,0x00400010,0x20004010,0x00004000,
+ 0x00404000,0x20004010,0x00000010,0x20400010,
+ 0x20400010,0x00000000,0x00404010,0x20404000,
+ 0x00004010,0x00404000,0x20404000,0x20000000,
+ 0x20004000,0x00000010,0x20400010,0x00404000,
+ 0x20404010,0x00400000,0x00004010,0x20000010,
+ 0x00400000,0x20004000,0x20000000,0x00004010,
+ 0x20000010,0x20404010,0x00404000,0x20400000,
+ 0x00404010,0x20404000,0x00000000,0x20400010,
+ 0x00000010,0x00004000,0x20400000,0x00404010,
+ 0x00004000,0x00400010,0x20004010,0x00000000,
+ 0x20404000,0x20000000,0x00400010,0x20004010},
+ { 0x00200000,0x04200002,0x04000802,0x00000000,
+ 0x00000800,0x04000802,0x00200802,0x04200800,
+ 0x04200802,0x00200000,0x00000000,0x04000002,
+ 0x00000002,0x04000000,0x04200002,0x00000802,
+ 0x04000800,0x00200802,0x00200002,0x04000800,
+ 0x04000002,0x04200000,0x04200800,0x00200002,
+ 0x04200000,0x00000800,0x00000802,0x04200802,
+ 0x00200800,0x00000002,0x04000000,0x00200800,
+ 0x04000000,0x00200800,0x00200000,0x04000802,
+ 0x04000802,0x04200002,0x04200002,0x00000002,
+ 0x00200002,0x04000000,0x04000800,0x00200000,
+ 0x04200800,0x00000802,0x00200802,0x04200800,
+ 0x00000802,0x04000002,0x04200802,0x04200000,
+ 0x00200800,0x00000000,0x00000002,0x04200802,
+ 0x00000000,0x00200802,0x04200000,0x00000800,
+ 0x04000002,0x04000800,0x00000800,0x00200002},
+ { 0x10001040,0x00001000,0x00040000,0x10041040,
+ 0x10000000,0x10001040,0x00000040,0x10000000,
+ 0x00040040,0x10040000,0x10041040,0x00041000,
+ 0x10041000,0x00041040,0x00001000,0x00000040,
+ 0x10040000,0x10000040,0x10001000,0x00001040,
+ 0x00041000,0x00040040,0x10040040,0x10041000,
+ 0x00001040,0x00000000,0x00000000,0x10040040,
+ 0x10000040,0x10001000,0x00041040,0x00040000,
+ 0x00041040,0x00040000,0x10041000,0x00001000,
+ 0x00000040,0x10040040,0x00001000,0x00041040,
+ 0x10001000,0x00000040,0x10000040,0x10040000,
+ 0x10040040,0x10000000,0x00040000,0x10001040,
+ 0x00000000,0x10041040,0x00040040,0x10000040,
+ 0x10040000,0x10001000,0x10001040,0x00000000,
+ 0x10041040,0x00041000,0x00041000,0x00001040,
+ 0x00001040,0x00040040,0x10000000,0x10041000}
+ };
+
+ static WC_INLINE void IPERM(word32* left, word32* right)
+ {
+ word32 work;
- *left = rotlFixed(*left^work, 18U);
- work = (*left ^ *right) & 0xffff0000;
- *right ^= work;
+ *right = rotlFixed(*right, 4U);
+ work = (*left ^ *right) & 0xf0f0f0f0;
+ *left ^= work;
- *left = rotlFixed(*left^work, 20U);
- work = (*left ^ *right) & 0xf0f0f0f0;
- *right ^= work;
+ *right = rotrFixed(*right^work, 20U);
+ work = (*left ^ *right) & 0xffff0000;
+ *left ^= work;
- *left = rotrFixed(*left^work, 4U);
-}
+ *right = rotrFixed(*right^work, 18U);
+ work = (*left ^ *right) & 0x33333333;
+ *left ^= work;
+ *right = rotrFixed(*right^work, 6U);
+ work = (*left ^ *right) & 0x00ff00ff;
+ *left ^= work;
-static int DesSetKey(const byte* key, int dir, word32* out)
-{
-#ifdef WOLFSSL_SMALL_STACK
- byte* buffer = (byte*)XMALLOC(56+56+8, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ *right = rotlFixed(*right^work, 9U);
+ work = (*left ^ *right) & 0xaaaaaaaa;
+ *left = rotlFixed(*left^work, 1U);
+ *right ^= work;
+ }
- if (buffer == NULL)
- return MEMORY_E;
-#else
- byte buffer[56+56+8];
-#endif
+ static WC_INLINE void FPERM(word32* left, word32* right)
+ {
+ word32 work;
+
+ *right = rotrFixed(*right, 1U);
+ work = (*left ^ *right) & 0xaaaaaaaa;
+ *right ^= work;
+
+ *left = rotrFixed(*left^work, 9U);
+ work = (*left ^ *right) & 0x00ff00ff;
+ *right ^= work;
+
+ *left = rotlFixed(*left^work, 6U);
+ work = (*left ^ *right) & 0x33333333;
+ *right ^= work;
+
+ *left = rotlFixed(*left^work, 18U);
+ work = (*left ^ *right) & 0xffff0000;
+ *right ^= work;
+ *left = rotlFixed(*left^work, 20U);
+ work = (*left ^ *right) & 0xf0f0f0f0;
+ *right ^= work;
+
+ *left = rotrFixed(*left^work, 4U);
+ }
+
+ static int DesSetKey(const byte* key, int dir, word32* out)
{
- byte* const pc1m = buffer; /* place to modify pc1 into */
- byte* const pcr = pc1m + 56; /* place to rotate pc1 into */
- byte* const ks = pcr + 56;
- register int i, j, l;
- int m;
-
- for (j = 0; j < 56; j++) { /* convert pc1 to bits of key */
- l = pc1[j] - 1; /* integer bit location */
- m = l & 07; /* find bit */
- pc1m[j] = (key[l >> 3] & /* find which key byte l is in */
- bytebit[m]) /* and which bit of that byte */
- ? 1 : 0; /* and store 1-bit result */
- }
+ #define DES_KEY_BUFFER_SIZE (56+56+8)
+ #ifdef WOLFSSL_SMALL_STACK
+ byte* buffer = (byte*)XMALLOC(DES_KEY_BUFFER_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+ if (buffer == NULL)
+ return MEMORY_E;
+ #else
+ byte buffer[DES_KEY_BUFFER_SIZE];
+ #endif
- for (i = 0; i < 16; i++) { /* key chunk for each iteration */
- XMEMSET(ks, 0, 8); /* Clear key schedule */
+ {
+ byte* const pc1m = buffer; /* place to modify pc1 into */
+ byte* const pcr = pc1m + 56; /* place to rotate pc1 into */
+ byte* const ks = pcr + 56;
+ register int i, j, l;
+ int m;
+
+ for (j = 0; j < 56; j++) { /* convert pc1 to bits of key */
+ l = pc1[j] - 1; /* integer bit location */
+ m = l & 07; /* find bit */
+ pc1m[j] = (key[l >> 3] & /* find which key byte l is in */
+ bytebit[m]) /* and which bit of that byte */
+ ? 1 : 0; /* and store 1-bit result */
+ }
- for (j = 0; j < 56; j++) /* rotate pc1 the right amount */
- pcr[j] =
- pc1m[(l = j + totrot[i]) < (j < 28 ? 28 : 56) ? l : l-28];
+ for (i = 0; i < 16; i++) { /* key chunk for each iteration */
+ XMEMSET(ks, 0, 8); /* Clear key schedule */
- /* rotate left and right halves independently */
- for (j = 0; j < 48; j++) { /* select bits individually */
- if (pcr[pc2[j] - 1]) { /* check bit that goes to ks[j] */
- l= j % 6; /* mask it in if it's there */
- ks[j/6] |= bytebit[l] >> 2;
+ for (j = 0; j < 56; j++) /* rotate pc1 the right amount */
+ pcr[j] =
+ pc1m[(l = j + totrot[i]) < (j < 28 ? 28 : 56) ? l : l-28];
+
+ /* rotate left and right halves independently */
+ for (j = 0; j < 48; j++) { /* select bits individually */
+ if (pcr[pc2[j] - 1]) { /* check bit that goes to ks[j] */
+ l= j % 6; /* mask it in if it's there */
+ ks[j/6] |= bytebit[l] >> 2;
+ }
}
- }
- /* Now convert to odd/even interleaved form for use in F */
- out[2*i] = ((word32) ks[0] << 24)
- | ((word32) ks[2] << 16)
- | ((word32) ks[4] << 8)
- | ((word32) ks[6]);
+ /* Now convert to odd/even interleaved form for use in F */
+ out[2*i] = ((word32) ks[0] << 24)
+ | ((word32) ks[2] << 16)
+ | ((word32) ks[4] << 8)
+ | ((word32) ks[6]);
- out[2*i + 1] = ((word32) ks[1] << 24)
- | ((word32) ks[3] << 16)
- | ((word32) ks[5] << 8)
- | ((word32) ks[7]);
- }
+ out[2*i + 1] = ((word32) ks[1] << 24)
+ | ((word32) ks[3] << 16)
+ | ((word32) ks[5] << 8)
+ | ((word32) ks[7]);
+ }
+
+ /* reverse key schedule order */
+ if (dir == DES_DECRYPTION) {
+ for (i = 0; i < 16; i += 2) {
+ word32 swap = out[i];
+ out[i] = out[DES_KS_SIZE - 2 - i];
+ out[DES_KS_SIZE - 2 - i] = swap;
- /* reverse key schedule order */
- if (dir == DES_DECRYPTION) {
- for (i = 0; i < 16; i += 2) {
- word32 swap = out[i];
- out[i] = out[DES_KS_SIZE - 2 - i];
- out[DES_KS_SIZE - 2 - i] = swap;
-
- swap = out[i + 1];
- out[i + 1] = out[DES_KS_SIZE - 1 - i];
- out[DES_KS_SIZE - 1 - i] = swap;
+ swap = out[i + 1];
+ out[i + 1] = out[DES_KS_SIZE - 1 - i];
+ out[DES_KS_SIZE - 1 - i] = swap;
+ }
}
+
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
}
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+ return 0;
}
- return 0;
-}
+ int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
+ {
+ wc_Des_SetIV(des, iv);
+ return DesSetKey(key, dir, des->key);
+ }
-static INLINE int Reverse(int dir)
-{
- return !dir;
-}
+ int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir)
+ {
+ int ret;
+ if (des == NULL || key == NULL || dir < 0) {
+ return BAD_FUNC_ARG;
+ }
-int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
-{
- wc_Des_SetIV(des, iv);
+ #if defined(WOLF_CRYPTO_CB) || \
+ (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES))
+ #ifdef WOLF_CRYPTO_CB
+ if (des->devId != INVALID_DEVID)
+ #endif
+ {
+ XMEMCPY(des->devKey, key, DES3_KEYLEN);
+ }
+ #endif
- return DesSetKey(key, dir, des->key);
-}
+ ret = DesSetKey(key + (dir == DES_ENCRYPTION ? 0:16), dir, des->key[0]);
+ if (ret != 0)
+ return ret;
+ ret = DesSetKey(key + 8, !dir, des->key[1]);
+ if (ret != 0)
+ return ret;
-int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir)
-{
- int ret;
+ ret = DesSetKey(key + (dir == DES_DECRYPTION ? 0:16), dir, des->key[2]);
+ if (ret != 0)
+ return ret;
-#ifdef HAVE_CAVIUM
- if (des->magic == WOLFSSL_3DES_CAVIUM_MAGIC)
- return wc_Des3_CaviumSetKey(des, key, iv);
-#endif
+ return wc_Des3_SetIV(des, iv);
+ }
- ret = DesSetKey(key + (dir == DES_ENCRYPTION ? 0:16), dir, des->key[0]);
- if (ret != 0)
- return ret;
+ static void DesRawProcessBlock(word32* lIn, word32* rIn, const word32* kptr)
+ {
+ word32 l = *lIn, r = *rIn, i;
- ret = DesSetKey(key + 8, Reverse(dir), des->key[1]);
- if (ret != 0)
- return ret;
+ for (i=0; i<8; i++)
+ {
+ word32 work = rotrFixed(r, 4U) ^ kptr[4*i+0];
+ l ^= Spbox[6][(work) & 0x3f]
+ ^ Spbox[4][(work >> 8) & 0x3f]
+ ^ Spbox[2][(work >> 16) & 0x3f]
+ ^ Spbox[0][(work >> 24) & 0x3f];
+ work = r ^ kptr[4*i+1];
+ l ^= Spbox[7][(work) & 0x3f]
+ ^ Spbox[5][(work >> 8) & 0x3f]
+ ^ Spbox[3][(work >> 16) & 0x3f]
+ ^ Spbox[1][(work >> 24) & 0x3f];
+
+ work = rotrFixed(l, 4U) ^ kptr[4*i+2];
+ r ^= Spbox[6][(work) & 0x3f]
+ ^ Spbox[4][(work >> 8) & 0x3f]
+ ^ Spbox[2][(work >> 16) & 0x3f]
+ ^ Spbox[0][(work >> 24) & 0x3f];
+ work = l ^ kptr[4*i+3];
+ r ^= Spbox[7][(work) & 0x3f]
+ ^ Spbox[5][(work >> 8) & 0x3f]
+ ^ Spbox[3][(work >> 16) & 0x3f]
+ ^ Spbox[1][(work >> 24) & 0x3f];
+ }
- ret = DesSetKey(key + (dir == DES_DECRYPTION ? 0:16), dir, des->key[2]);
- if (ret != 0)
- return ret;
+ *lIn = l; *rIn = r;
+ }
- return wc_Des3_SetIV(des, iv);
-}
+ static void DesProcessBlock(Des* des, const byte* in, byte* out)
+ {
+ word32 l, r;
+ XMEMCPY(&l, in, sizeof(l));
+ XMEMCPY(&r, in + sizeof(l), sizeof(r));
+ #ifdef LITTLE_ENDIAN_ORDER
+ l = ByteReverseWord32(l);
+ r = ByteReverseWord32(r);
+ #endif
+ IPERM(&l,&r);
-static void DesRawProcessBlock(word32* lIn, word32* rIn, const word32* kptr)
-{
- word32 l = *lIn, r = *rIn, i;
+ DesRawProcessBlock(&l, &r, des->key);
- for (i=0; i<8; i++)
- {
- word32 work = rotrFixed(r, 4U) ^ kptr[4*i+0];
- l ^= Spbox[6][(work) & 0x3f]
- ^ Spbox[4][(work >> 8) & 0x3f]
- ^ Spbox[2][(work >> 16) & 0x3f]
- ^ Spbox[0][(work >> 24) & 0x3f];
- work = r ^ kptr[4*i+1];
- l ^= Spbox[7][(work) & 0x3f]
- ^ Spbox[5][(work >> 8) & 0x3f]
- ^ Spbox[3][(work >> 16) & 0x3f]
- ^ Spbox[1][(work >> 24) & 0x3f];
-
- work = rotrFixed(l, 4U) ^ kptr[4*i+2];
- r ^= Spbox[6][(work) & 0x3f]
- ^ Spbox[4][(work >> 8) & 0x3f]
- ^ Spbox[2][(work >> 16) & 0x3f]
- ^ Spbox[0][(work >> 24) & 0x3f];
- work = l ^ kptr[4*i+3];
- r ^= Spbox[7][(work) & 0x3f]
- ^ Spbox[5][(work >> 8) & 0x3f]
- ^ Spbox[3][(work >> 16) & 0x3f]
- ^ Spbox[1][(work >> 24) & 0x3f];
+ FPERM(&l,&r);
+ #ifdef LITTLE_ENDIAN_ORDER
+ l = ByteReverseWord32(l);
+ r = ByteReverseWord32(r);
+ #endif
+ XMEMCPY(out, &r, sizeof(r));
+ XMEMCPY(out + sizeof(r), &l, sizeof(l));
}
- *lIn = l; *rIn = r;
-}
-
+ static void Des3ProcessBlock(Des3* des, const byte* in, byte* out)
+ {
+ word32 l, r;
-static void DesProcessBlock(Des* des, const byte* in, byte* out)
-{
- word32 l, r;
+ XMEMCPY(&l, in, sizeof(l));
+ XMEMCPY(&r, in + sizeof(l), sizeof(r));
+ #ifdef LITTLE_ENDIAN_ORDER
+ l = ByteReverseWord32(l);
+ r = ByteReverseWord32(r);
+ #endif
+ IPERM(&l,&r);
- XMEMCPY(&l, in, sizeof(l));
- XMEMCPY(&r, in + sizeof(l), sizeof(r));
- #ifdef LITTLE_ENDIAN_ORDER
- l = ByteReverseWord32(l);
- r = ByteReverseWord32(r);
- #endif
- IPERM(&l,&r);
-
- DesRawProcessBlock(&l, &r, des->key);
-
- FPERM(&l,&r);
- #ifdef LITTLE_ENDIAN_ORDER
- l = ByteReverseWord32(l);
- r = ByteReverseWord32(r);
- #endif
- XMEMCPY(out, &r, sizeof(r));
- XMEMCPY(out + sizeof(r), &l, sizeof(l));
-}
+ DesRawProcessBlock(&l, &r, des->key[0]);
+ DesRawProcessBlock(&r, &l, des->key[1]);
+ DesRawProcessBlock(&l, &r, des->key[2]);
+ FPERM(&l,&r);
+ #ifdef LITTLE_ENDIAN_ORDER
+ l = ByteReverseWord32(l);
+ r = ByteReverseWord32(r);
+ #endif
+ XMEMCPY(out, &r, sizeof(r));
+ XMEMCPY(out + sizeof(r), &l, sizeof(l));
+ }
-static void Des3ProcessBlock(Des3* des, const byte* in, byte* out)
-{
- word32 l, r;
+ int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
+ {
+ word32 blocks = sz / DES_BLOCK_SIZE;
- XMEMCPY(&l, in, sizeof(l));
- XMEMCPY(&r, in + sizeof(l), sizeof(r));
- #ifdef LITTLE_ENDIAN_ORDER
- l = ByteReverseWord32(l);
- r = ByteReverseWord32(r);
- #endif
- IPERM(&l,&r);
-
- DesRawProcessBlock(&l, &r, des->key[0]);
- DesRawProcessBlock(&r, &l, des->key[1]);
- DesRawProcessBlock(&l, &r, des->key[2]);
-
- FPERM(&l,&r);
- #ifdef LITTLE_ENDIAN_ORDER
- l = ByteReverseWord32(l);
- r = ByteReverseWord32(r);
- #endif
- XMEMCPY(out, &r, sizeof(r));
- XMEMCPY(out + sizeof(r), &l, sizeof(l));
-}
+ while (blocks--) {
+ xorbuf((byte*)des->reg, in, DES_BLOCK_SIZE);
+ DesProcessBlock(des, (byte*)des->reg, (byte*)des->reg);
+ XMEMCPY(out, des->reg, DES_BLOCK_SIZE);
+ out += DES_BLOCK_SIZE;
+ in += DES_BLOCK_SIZE;
+ }
+ return 0;
+ }
-int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
- word32 blocks = sz / DES_BLOCK_SIZE;
+ int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
+ {
+ word32 blocks = sz / DES_BLOCK_SIZE;
- while (blocks--) {
- xorbuf((byte*)des->reg, in, DES_BLOCK_SIZE);
- DesProcessBlock(des, (byte*)des->reg, (byte*)des->reg);
- XMEMCPY(out, des->reg, DES_BLOCK_SIZE);
+ while (blocks--) {
+ XMEMCPY(des->tmp, in, DES_BLOCK_SIZE);
+ DesProcessBlock(des, (byte*)des->tmp, out);
+ xorbuf(out, (byte*)des->reg, DES_BLOCK_SIZE);
+ XMEMCPY(des->reg, des->tmp, DES_BLOCK_SIZE);
- out += DES_BLOCK_SIZE;
- in += DES_BLOCK_SIZE;
+ out += DES_BLOCK_SIZE;
+ in += DES_BLOCK_SIZE;
+ }
+ return 0;
}
- return 0;
-}
+ int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
+ {
+ word32 blocks;
+
+ if (des == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
-int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
- word32 blocks = sz / DES_BLOCK_SIZE;
- byte hold[DES_BLOCK_SIZE];
+ #ifdef WOLF_CRYPTO_CB
+ if (des->devId != INVALID_DEVID) {
+ int ret = wc_CryptoCb_Des3Encrypt(des, out, in, sz);
+ if (ret != CRYPTOCB_UNAVAILABLE)
+ return ret;
+ /* fall-through when unavailable */
+ }
+ #endif
- while (blocks--) {
- XMEMCPY(des->tmp, in, DES_BLOCK_SIZE);
- DesProcessBlock(des, (byte*)des->tmp, out);
- xorbuf(out, (byte*)des->reg, DES_BLOCK_SIZE);
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)
+ if (des->asyncDev.marker == WOLFSSL_ASYNC_MARKER_3DES &&
+ sz >= WC_ASYNC_THRESH_DES3_CBC) {
+ #if defined(HAVE_CAVIUM)
+ return NitroxDes3CbcEncrypt(des, out, in, sz);
+ #elif defined(HAVE_INTEL_QA)
+ return IntelQaSymDes3CbcEncrypt(&des->asyncDev, out, in, sz,
+ (const byte*)des->devKey, DES3_KEYLEN, (byte*)des->reg, DES3_IVLEN);
+ #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+ if (wc_AsyncTestInit(&des->asyncDev, ASYNC_TEST_DES3_CBC_ENCRYPT)) {
+ WC_ASYNC_TEST* testDev = &des->asyncDev.test;
+ testDev->des.des = des;
+ testDev->des.out = out;
+ testDev->des.in = in;
+ testDev->des.sz = sz;
+ return WC_PENDING_E;
+ }
+ #endif
+ }
+ #endif /* WOLFSSL_ASYNC_CRYPT */
- XMEMCPY(hold, des->reg, DES_BLOCK_SIZE);
- XMEMCPY(des->reg, des->tmp, DES_BLOCK_SIZE);
- XMEMCPY(des->tmp, hold, DES_BLOCK_SIZE);
+ blocks = sz / DES_BLOCK_SIZE;
+ while (blocks--) {
+ xorbuf((byte*)des->reg, in, DES_BLOCK_SIZE);
+ Des3ProcessBlock(des, (byte*)des->reg, (byte*)des->reg);
+ XMEMCPY(out, des->reg, DES_BLOCK_SIZE);
- out += DES_BLOCK_SIZE;
- in += DES_BLOCK_SIZE;
+ out += DES_BLOCK_SIZE;
+ in += DES_BLOCK_SIZE;
+ }
+ return 0;
}
- return 0;
-}
-int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
-{
- word32 blocks;
+ int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz)
+ {
+ word32 blocks;
-#ifdef HAVE_CAVIUM
- if (des->magic == WOLFSSL_3DES_CAVIUM_MAGIC)
- return wc_Des3_CaviumCbcEncrypt(des, out, in, sz);
-#endif
+ if (des == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
- blocks = sz / DES_BLOCK_SIZE;
- while (blocks--) {
- xorbuf((byte*)des->reg, in, DES_BLOCK_SIZE);
- Des3ProcessBlock(des, (byte*)des->reg, (byte*)des->reg);
- XMEMCPY(out, des->reg, DES_BLOCK_SIZE);
+ #ifdef WOLF_CRYPTO_CB
+ if (des->devId != INVALID_DEVID) {
+ int ret = wc_CryptoCb_Des3Decrypt(des, out, in, sz);
+ if (ret != CRYPTOCB_UNAVAILABLE)
+ return ret;
+ /* fall-through when unavailable */
+ }
+ #endif
- out += DES_BLOCK_SIZE;
- in += DES_BLOCK_SIZE;
- }
- return 0;
-}
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)
+ if (des->asyncDev.marker == WOLFSSL_ASYNC_MARKER_3DES &&
+ sz >= WC_ASYNC_THRESH_DES3_CBC) {
+ #if defined(HAVE_CAVIUM)
+ return NitroxDes3CbcDecrypt(des, out, in, sz);
+ #elif defined(HAVE_INTEL_QA)
+ return IntelQaSymDes3CbcDecrypt(&des->asyncDev, out, in, sz,
+ (const byte*)des->devKey, DES3_KEYLEN, (byte*)des->reg, DES3_IVLEN);
+ #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+ if (wc_AsyncTestInit(&des->asyncDev, ASYNC_TEST_DES3_CBC_DECRYPT)) {
+ WC_ASYNC_TEST* testDev = &des->asyncDev.test;
+ testDev->des.des = des;
+ testDev->des.out = out;
+ testDev->des.in = in;
+ testDev->des.sz = sz;
+ return WC_PENDING_E;
+ }
+ #endif
+ }
+ #endif /* WOLFSSL_ASYNC_CRYPT */
+ blocks = sz / DES_BLOCK_SIZE;
+ while (blocks--) {
+ XMEMCPY(des->tmp, in, DES_BLOCK_SIZE);
+ Des3ProcessBlock(des, (byte*)des->tmp, out);
+ xorbuf(out, (byte*)des->reg, DES_BLOCK_SIZE);
+ XMEMCPY(des->reg, des->tmp, DES_BLOCK_SIZE);
-int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz)
-{
- word32 blocks;
+ out += DES_BLOCK_SIZE;
+ in += DES_BLOCK_SIZE;
+ }
+ return 0;
+ }
-#ifdef HAVE_CAVIUM
- if (des->magic == WOLFSSL_3DES_CAVIUM_MAGIC)
- return wc_Des3_CaviumCbcDecrypt(des, out, in, sz);
-#endif
+ #ifdef WOLFSSL_DES_ECB
+ /* One block, compatibility only */
+ int wc_Des_EcbEncrypt(Des* des, byte* out, const byte* in, word32 sz)
+ {
+ word32 blocks = sz / DES_BLOCK_SIZE;
- blocks = sz / DES_BLOCK_SIZE;
- while (blocks--) {
- XMEMCPY(des->tmp, in, DES_BLOCK_SIZE);
- Des3ProcessBlock(des, (byte*)des->tmp, out);
- xorbuf(out, (byte*)des->reg, DES_BLOCK_SIZE);
- XMEMCPY(des->reg, des->tmp, DES_BLOCK_SIZE);
+ if (des == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
- out += DES_BLOCK_SIZE;
- in += DES_BLOCK_SIZE;
- }
- return 0;
-}
+ while (blocks--) {
+ DesProcessBlock(des, in, out);
-#ifdef WOLFSSL_DES_ECB
+ out += DES_BLOCK_SIZE;
+ in += DES_BLOCK_SIZE;
+ }
+ return 0;
+ }
-/* One block, compatibility only */
-int wc_Des_EcbEncrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
- word32 blocks = sz / DES_BLOCK_SIZE;
+ int wc_Des3_EcbEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
+ {
+ word32 blocks = sz / DES_BLOCK_SIZE;
+
+ if (des == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
- while (blocks--) {
- DesProcessBlock(des, in, out);
+ while (blocks--) {
+ Des3ProcessBlock(des, in, out);
- out += DES_BLOCK_SIZE;
- in += DES_BLOCK_SIZE;
- }
- return 0;
-}
+ out += DES_BLOCK_SIZE;
+ in += DES_BLOCK_SIZE;
+ }
+ return 0;
+ }
+ #endif /* WOLFSSL_DES_ECB */
-#endif /* WOLFSSL_DES_ECB */
+#endif /* NEED_SOFT_DES */
-#endif /* STM32F2_CRYPTO */
void wc_Des_SetIV(Des* des, const byte* iv)
{
@@ -1489,37 +1767,11 @@ void wc_Des_SetIV(Des* des, const byte* iv)
XMEMSET(des->reg, 0, DES_BLOCK_SIZE);
}
-
-int wc_Des_CbcDecryptWithKey(byte* out, const byte* in, word32 sz,
- const byte* key, const byte* iv)
-{
- int ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
- Des* des = NULL;
-#else
- Des des[1];
-#endif
-
-#ifdef WOLFSSL_SMALL_STACK
- des = (Des*)XMALLOC(sizeof(Des), NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (des == NULL)
- return MEMORY_E;
-#endif
-
- ret = wc_Des_SetKey(des, key, iv, DES_DECRYPTION);
- if (ret == 0)
- ret = wc_Des_CbcDecrypt(des, out, in, sz);
-
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(des, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
- return ret;
-}
-
-
int wc_Des3_SetIV(Des3* des, const byte* iv)
{
+ if (des == NULL) {
+ return BAD_FUNC_ARG;
+ }
if (des && iv)
XMEMCPY(des->reg, iv, DES_BLOCK_SIZE);
else if (des)
@@ -1529,150 +1781,45 @@ int wc_Des3_SetIV(Des3* des, const byte* iv)
}
-int wc_Des3_CbcDecryptWithKey(byte* out, const byte* in, word32 sz,
- const byte* key, const byte* iv)
+/* Initialize Des3 for use with async device */
+int wc_Des3Init(Des3* des3, void* heap, int devId)
{
- int ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
- Des3* des3 = NULL;
-#else
- Des3 des3[1];
-#endif
-
-#ifdef WOLFSSL_SMALL_STACK
- des3 = (Des3*)XMALLOC(sizeof(Des3), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ int ret = 0;
if (des3 == NULL)
- return MEMORY_E;
-#endif
+ return BAD_FUNC_ARG;
- ret = wc_Des3_SetKey(des3, key, iv, DES_DECRYPTION);
- if (ret == 0)
- ret = wc_Des3_CbcDecrypt(des3, out, in, sz);
+ des3->heap = heap;
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(des3, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#ifdef WOLF_CRYPTO_CB
+ des3->devId = devId;
+ des3->devCtx = NULL;
+#else
+ (void)devId;
#endif
- return ret;
-}
-
-
-#ifdef HAVE_CAVIUM
-
-#include "cavium_common.h"
-
-/* Initiliaze Des3 for use with Nitrox device */
-int wc_Des3_InitCavium(Des3* des3, int devId)
-{
- if (des3 == NULL)
- return -1;
-
- if (CspAllocContext(CONTEXT_SSL, &des3->contextHandle, devId) != 0)
- return -1;
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)
+ ret = wolfAsync_DevCtxInit(&des3->asyncDev, WOLFSSL_ASYNC_MARKER_3DES,
+ des3->heap, devId);
+#endif
- des3->devId = devId;
- des3->magic = WOLFSSL_3DES_CAVIUM_MAGIC;
-
- return 0;
+ return ret;
}
-
-/* Free Des3 from use with Nitrox device */
-void wc_Des3_FreeCavium(Des3* des3)
+/* Free Des3 from use with async device */
+void wc_Des3Free(Des3* des3)
{
if (des3 == NULL)
return;
- if (des3->magic != WOLFSSL_3DES_CAVIUM_MAGIC)
- return;
-
- CspFreeContext(CONTEXT_SSL, des3->contextHandle, des3->devId);
- des3->magic = 0;
-}
-
-
-static int wc_Des3_CaviumSetKey(Des3* des3, const byte* key, const byte* iv)
-{
- if (des3 == NULL)
- return -1;
-
- /* key[0] holds key, iv in reg */
- XMEMCPY(des3->key[0], key, DES_BLOCK_SIZE*3);
-
- return wc_Des3_SetIV(des3, iv);
-}
-
-
-static int wc_Des3_CaviumCbcEncrypt(Des3* des3, byte* out, const byte* in,
- word32 length)
-{
- wolfssl_word offset = 0;
- word32 requestId;
-
- while (length > WOLFSSL_MAX_16BIT) {
- word16 slen = (word16)WOLFSSL_MAX_16BIT;
- if (CspEncrypt3Des(CAVIUM_BLOCKING, des3->contextHandle,
- CAVIUM_NO_UPDATE, slen, (byte*)in + offset,
- out + offset, (byte*)des3->reg, (byte*)des3->key[0],
- &requestId, des3->devId) != 0) {
- WOLFSSL_MSG("Bad Cavium 3DES Cbc Encrypt");
- return -1;
- }
- length -= WOLFSSL_MAX_16BIT;
- offset += WOLFSSL_MAX_16BIT;
- XMEMCPY(des3->reg, out + offset - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
- }
- if (length) {
- word16 slen = (word16)length;
-
- if (CspEncrypt3Des(CAVIUM_BLOCKING, des3->contextHandle,
- CAVIUM_NO_UPDATE, slen, (byte*)in + offset,
- out + offset, (byte*)des3->reg, (byte*)des3->key[0],
- &requestId, des3->devId) != 0) {
- WOLFSSL_MSG("Bad Cavium 3DES Cbc Encrypt");
- return -1;
- }
- XMEMCPY(des3->reg, out+offset+length - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
- }
- return 0;
-}
-
-static int wc_Des3_CaviumCbcDecrypt(Des3* des3, byte* out, const byte* in,
- word32 length)
-{
- word32 requestId;
- wolfssl_word offset = 0;
-
- while (length > WOLFSSL_MAX_16BIT) {
- word16 slen = (word16)WOLFSSL_MAX_16BIT;
- XMEMCPY(des3->tmp, in + offset + slen - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
- if (CspDecrypt3Des(CAVIUM_BLOCKING, des3->contextHandle,
- CAVIUM_NO_UPDATE, slen, (byte*)in+offset, out+offset,
- (byte*)des3->reg, (byte*)des3->key[0], &requestId,
- des3->devId) != 0) {
- WOLFSSL_MSG("Bad Cavium 3Des Decrypt");
- return -1;
- }
- length -= WOLFSSL_MAX_16BIT;
- offset += WOLFSSL_MAX_16BIT;
- XMEMCPY(des3->reg, des3->tmp, DES_BLOCK_SIZE);
- }
- if (length) {
- word16 slen = (word16)length;
- XMEMCPY(des3->tmp, in + offset + slen - DES_BLOCK_SIZE,DES_BLOCK_SIZE);
- if (CspDecrypt3Des(CAVIUM_BLOCKING, des3->contextHandle,
- CAVIUM_NO_UPDATE, slen, (byte*)in+offset, out+offset,
- (byte*)des3->reg, (byte*)des3->key[0], &requestId,
- des3->devId) != 0) {
- WOLFSSL_MSG("Bad Cavium 3Des Decrypt");
- return -1;
- }
- XMEMCPY(des3->reg, des3->tmp, DES_BLOCK_SIZE);
- }
- return 0;
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)
+ wolfAsync_DevCtxFree(&des3->asyncDev, WOLFSSL_ASYNC_MARKER_3DES);
+#endif /* WOLFSSL_ASYNC_CRYPT */
+#if defined(WOLF_CRYPTO_CB) || \
+ (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES))
+ ForceZero(des3->devKey, sizeof(des3->devKey));
+#endif
}
-#endif /* HAVE_CAVIUM */
#endif /* WOLFSSL_TI_CRYPT */
#endif /* HAVE_FIPS */
#endif /* NO_DES3 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/dh.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/dh.c
index bc4ce11d3..6c53be8f3 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/dh.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/dh.c
@@ -1,8 +1,8 @@
/* dh.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -27,10 +28,46 @@
#ifndef NO_DH
+#if defined(HAVE_FIPS) && \
+ defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
+
+ /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
+ #define FIPS_NO_WRAPPERS
+
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$m")
+ #pragma const_seg(".fipsB$m")
+ #endif
+#endif
+
#include <wolfssl/wolfcrypt/dh.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/logging.h>
-#ifndef USER_MATH_LIB
+#ifdef WOLFSSL_HAVE_SP_DH
+#include <wolfssl/wolfcrypt/sp.h>
+#endif
+
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+
+/*
+Possible DH enable options:
+ * NO_RSA: Overall control of DH default: on (not defined)
+ * WOLFSSL_OLD_PRIME_CHECK: Disables the new prime number check. It does not
+ directly effect this file, but it does speed up DH
+ removing the testing. It is not recommended to
+ disable the prime checking. default: off
+
+*/
+
+
+#if !defined(USER_MATH_LIB) && !defined(WOLFSSL_DH_CONST)
#include <math.h>
#define XPOW(x,y) pow((x),(y))
#define XLOG(x) log((x))
@@ -38,179 +75,2417 @@
/* user's own math lib */
#endif
+#ifdef HAVE_FFDHE_2048
+static const byte dh_ffdhe2048_p[] = {
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xAD, 0xF8, 0x54, 0x58, 0xA2, 0xBB, 0x4A, 0x9A,
+ 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1,
+ 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95,
+ 0xA9, 0xE1, 0x36, 0x41, 0x14, 0x64, 0x33, 0xFB,
+ 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9,
+ 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8,
+ 0xF6, 0x81, 0xB2, 0x02, 0xAE, 0xC4, 0x61, 0x7A,
+ 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61,
+ 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0,
+ 0x85, 0x63, 0x65, 0x55, 0x3D, 0xED, 0x1A, 0xF3,
+ 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35,
+ 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77,
+ 0xE2, 0xA6, 0x89, 0xDA, 0xF3, 0xEF, 0xE8, 0x72,
+ 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35,
+ 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A,
+ 0xBC, 0x0A, 0xB1, 0x82, 0xB3, 0x24, 0xFB, 0x61,
+ 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB,
+ 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68,
+ 0x1D, 0x4F, 0x42, 0xA3, 0xDE, 0x39, 0x4D, 0xF4,
+ 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19,
+ 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70,
+ 0x9E, 0x02, 0xFC, 0xE1, 0xCD, 0xF7, 0xE2, 0xEC,
+ 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61,
+ 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF,
+ 0x8E, 0x4F, 0x12, 0x32, 0xEE, 0xF2, 0x81, 0x83,
+ 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73,
+ 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05,
+ 0xC5, 0x8E, 0xF1, 0x83, 0x7D, 0x16, 0x83, 0xB2,
+ 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA,
+ 0x88, 0x6B, 0x42, 0x38, 0x61, 0x28, 0x5C, 0x97,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+static const byte dh_ffdhe2048_g[] = { 0x02 };
+#ifdef HAVE_FFDHE_Q
+static const byte dh_ffdhe2048_q[] = {
+ 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xD6, 0xFC, 0x2A, 0x2C, 0x51, 0x5D, 0xA5, 0x4D,
+ 0x57, 0xEE, 0x2B, 0x10, 0x13, 0x9E, 0x9E, 0x78,
+ 0xEC, 0x5C, 0xE2, 0xC1, 0xE7, 0x16, 0x9B, 0x4A,
+ 0xD4, 0xF0, 0x9B, 0x20, 0x8A, 0x32, 0x19, 0xFD,
+ 0xE6, 0x49, 0xCE, 0xE7, 0x12, 0x4D, 0x9F, 0x7C,
+ 0xBE, 0x97, 0xF1, 0xB1, 0xB1, 0x86, 0x3A, 0xEC,
+ 0x7B, 0x40, 0xD9, 0x01, 0x57, 0x62, 0x30, 0xBD,
+ 0x69, 0xEF, 0x8F, 0x6A, 0xEA, 0xFE, 0xB2, 0xB0,
+ 0x92, 0x19, 0xFA, 0x8F, 0xAF, 0x83, 0x37, 0x68,
+ 0x42, 0xB1, 0xB2, 0xAA, 0x9E, 0xF6, 0x8D, 0x79,
+ 0xDA, 0xAB, 0x89, 0xAF, 0x3F, 0xAB, 0xE4, 0x9A,
+ 0xCC, 0x27, 0x86, 0x38, 0x70, 0x73, 0x45, 0xBB,
+ 0xF1, 0x53, 0x44, 0xED, 0x79, 0xF7, 0xF4, 0x39,
+ 0x0E, 0xF8, 0xAC, 0x50, 0x9B, 0x56, 0xF3, 0x9A,
+ 0x98, 0x56, 0x65, 0x27, 0xA4, 0x1D, 0x3C, 0xBD,
+ 0x5E, 0x05, 0x58, 0xC1, 0x59, 0x92, 0x7D, 0xB0,
+ 0xE8, 0x84, 0x54, 0xA5, 0xD9, 0x64, 0x71, 0xFD,
+ 0xDC, 0xB5, 0x6D, 0x5B, 0xB0, 0x6B, 0xFA, 0x34,
+ 0x0E, 0xA7, 0xA1, 0x51, 0xEF, 0x1C, 0xA6, 0xFA,
+ 0x57, 0x2B, 0x76, 0xF3, 0xB1, 0xB9, 0x5D, 0x8C,
+ 0x85, 0x83, 0xD3, 0xE4, 0x77, 0x05, 0x36, 0xB8,
+ 0x4F, 0x01, 0x7E, 0x70, 0xE6, 0xFB, 0xF1, 0x76,
+ 0x60, 0x1A, 0x02, 0x66, 0x94, 0x1A, 0x17, 0xB0,
+ 0xC8, 0xB9, 0x7F, 0x4E, 0x74, 0xC2, 0xC1, 0xFF,
+ 0xC7, 0x27, 0x89, 0x19, 0x77, 0x79, 0x40, 0xC1,
+ 0xE1, 0xFF, 0x1D, 0x8D, 0xA6, 0x37, 0xD6, 0xB9,
+ 0x9D, 0xDA, 0xFE, 0x5E, 0x17, 0x61, 0x10, 0x02,
+ 0xE2, 0xC7, 0x78, 0xC1, 0xBE, 0x8B, 0x41, 0xD9,
+ 0x63, 0x79, 0xA5, 0x13, 0x60, 0xD9, 0x77, 0xFD,
+ 0x44, 0x35, 0xA1, 0x1C, 0x30, 0x94, 0x2E, 0x4B,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+#endif /* HAVE_FFDHE_Q */
-#ifndef WOLFSSL_HAVE_MIN
-#define WOLFSSL_HAVE_MIN
+const DhParams* wc_Dh_ffdhe2048_Get(void)
+{
+ static const DhParams ffdhe2048 = {
+ #ifdef HAVE_FFDHE_Q
+ dh_ffdhe2048_q, sizeof(dh_ffdhe2048_q),
+ #endif /* HAVE_FFDHE_Q */
+ dh_ffdhe2048_p, sizeof(dh_ffdhe2048_p),
+ dh_ffdhe2048_g, sizeof(dh_ffdhe2048_g)
+ };
+ return &ffdhe2048;
+}
+#endif
- static INLINE word32 min(word32 a, word32 b)
- {
- return a > b ? b : a;
- }
+#ifdef HAVE_FFDHE_3072
+static const byte dh_ffdhe3072_p[] = {
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xAD, 0xF8, 0x54, 0x58, 0xA2, 0xBB, 0x4A, 0x9A,
+ 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1,
+ 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95,
+ 0xA9, 0xE1, 0x36, 0x41, 0x14, 0x64, 0x33, 0xFB,
+ 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9,
+ 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8,
+ 0xF6, 0x81, 0xB2, 0x02, 0xAE, 0xC4, 0x61, 0x7A,
+ 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61,
+ 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0,
+ 0x85, 0x63, 0x65, 0x55, 0x3D, 0xED, 0x1A, 0xF3,
+ 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35,
+ 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77,
+ 0xE2, 0xA6, 0x89, 0xDA, 0xF3, 0xEF, 0xE8, 0x72,
+ 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35,
+ 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A,
+ 0xBC, 0x0A, 0xB1, 0x82, 0xB3, 0x24, 0xFB, 0x61,
+ 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB,
+ 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68,
+ 0x1D, 0x4F, 0x42, 0xA3, 0xDE, 0x39, 0x4D, 0xF4,
+ 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19,
+ 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70,
+ 0x9E, 0x02, 0xFC, 0xE1, 0xCD, 0xF7, 0xE2, 0xEC,
+ 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61,
+ 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF,
+ 0x8E, 0x4F, 0x12, 0x32, 0xEE, 0xF2, 0x81, 0x83,
+ 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73,
+ 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05,
+ 0xC5, 0x8E, 0xF1, 0x83, 0x7D, 0x16, 0x83, 0xB2,
+ 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA,
+ 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC,
+ 0xDE, 0x35, 0x5B, 0x3B, 0x65, 0x19, 0x03, 0x5B,
+ 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38,
+ 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07,
+ 0x7A, 0xD9, 0x1D, 0x26, 0x91, 0xF7, 0xF7, 0xEE,
+ 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C,
+ 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70,
+ 0xB4, 0x13, 0x0C, 0x93, 0xBC, 0x43, 0x79, 0x44,
+ 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3,
+ 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF,
+ 0x5C, 0xAE, 0x82, 0xAB, 0x9C, 0x9D, 0xF6, 0x9E,
+ 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D,
+ 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA,
+ 0x1D, 0xBF, 0x9A, 0x42, 0xD5, 0xC4, 0x48, 0x4E,
+ 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF,
+ 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C,
+ 0x25, 0xE4, 0x1D, 0x2B, 0x66, 0xC6, 0x2E, 0x37,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+static const byte dh_ffdhe3072_g[] = { 0x02 };
+#ifdef HAVE_FFDHE_Q
+static const byte dh_ffdhe3072_q[] = {
+ 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xD6, 0xFC, 0x2A, 0x2C, 0x51, 0x5D, 0xA5, 0x4D,
+ 0x57, 0xEE, 0x2B, 0x10, 0x13, 0x9E, 0x9E, 0x78,
+ 0xEC, 0x5C, 0xE2, 0xC1, 0xE7, 0x16, 0x9B, 0x4A,
+ 0xD4, 0xF0, 0x9B, 0x20, 0x8A, 0x32, 0x19, 0xFD,
+ 0xE6, 0x49, 0xCE, 0xE7, 0x12, 0x4D, 0x9F, 0x7C,
+ 0xBE, 0x97, 0xF1, 0xB1, 0xB1, 0x86, 0x3A, 0xEC,
+ 0x7B, 0x40, 0xD9, 0x01, 0x57, 0x62, 0x30, 0xBD,
+ 0x69, 0xEF, 0x8F, 0x6A, 0xEA, 0xFE, 0xB2, 0xB0,
+ 0x92, 0x19, 0xFA, 0x8F, 0xAF, 0x83, 0x37, 0x68,
+ 0x42, 0xB1, 0xB2, 0xAA, 0x9E, 0xF6, 0x8D, 0x79,
+ 0xDA, 0xAB, 0x89, 0xAF, 0x3F, 0xAB, 0xE4, 0x9A,
+ 0xCC, 0x27, 0x86, 0x38, 0x70, 0x73, 0x45, 0xBB,
+ 0xF1, 0x53, 0x44, 0xED, 0x79, 0xF7, 0xF4, 0x39,
+ 0x0E, 0xF8, 0xAC, 0x50, 0x9B, 0x56, 0xF3, 0x9A,
+ 0x98, 0x56, 0x65, 0x27, 0xA4, 0x1D, 0x3C, 0xBD,
+ 0x5E, 0x05, 0x58, 0xC1, 0x59, 0x92, 0x7D, 0xB0,
+ 0xE8, 0x84, 0x54, 0xA5, 0xD9, 0x64, 0x71, 0xFD,
+ 0xDC, 0xB5, 0x6D, 0x5B, 0xB0, 0x6B, 0xFA, 0x34,
+ 0x0E, 0xA7, 0xA1, 0x51, 0xEF, 0x1C, 0xA6, 0xFA,
+ 0x57, 0x2B, 0x76, 0xF3, 0xB1, 0xB9, 0x5D, 0x8C,
+ 0x85, 0x83, 0xD3, 0xE4, 0x77, 0x05, 0x36, 0xB8,
+ 0x4F, 0x01, 0x7E, 0x70, 0xE6, 0xFB, 0xF1, 0x76,
+ 0x60, 0x1A, 0x02, 0x66, 0x94, 0x1A, 0x17, 0xB0,
+ 0xC8, 0xB9, 0x7F, 0x4E, 0x74, 0xC2, 0xC1, 0xFF,
+ 0xC7, 0x27, 0x89, 0x19, 0x77, 0x79, 0x40, 0xC1,
+ 0xE1, 0xFF, 0x1D, 0x8D, 0xA6, 0x37, 0xD6, 0xB9,
+ 0x9D, 0xDA, 0xFE, 0x5E, 0x17, 0x61, 0x10, 0x02,
+ 0xE2, 0xC7, 0x78, 0xC1, 0xBE, 0x8B, 0x41, 0xD9,
+ 0x63, 0x79, 0xA5, 0x13, 0x60, 0xD9, 0x77, 0xFD,
+ 0x44, 0x35, 0xA1, 0x1C, 0x30, 0x8F, 0xE7, 0xEE,
+ 0x6F, 0x1A, 0xAD, 0x9D, 0xB2, 0x8C, 0x81, 0xAD,
+ 0xDE, 0x1A, 0x7A, 0x6F, 0x7C, 0xCE, 0x01, 0x1C,
+ 0x30, 0xDA, 0x37, 0xE4, 0xEB, 0x73, 0x64, 0x83,
+ 0xBD, 0x6C, 0x8E, 0x93, 0x48, 0xFB, 0xFB, 0xF7,
+ 0x2C, 0xC6, 0x58, 0x7D, 0x60, 0xC3, 0x6C, 0x8E,
+ 0x57, 0x7F, 0x09, 0x84, 0xC2, 0x89, 0xC9, 0x38,
+ 0x5A, 0x09, 0x86, 0x49, 0xDE, 0x21, 0xBC, 0xA2,
+ 0x7A, 0x7E, 0xA2, 0x29, 0x71, 0x6B, 0xA6, 0xE9,
+ 0xB2, 0x79, 0x71, 0x0F, 0x38, 0xFA, 0xA5, 0xFF,
+ 0xAE, 0x57, 0x41, 0x55, 0xCE, 0x4E, 0xFB, 0x4F,
+ 0x74, 0x36, 0x95, 0xE2, 0x91, 0x1B, 0x1D, 0x06,
+ 0xD5, 0xE2, 0x90, 0xCB, 0xCD, 0x86, 0xF5, 0x6D,
+ 0x0E, 0xDF, 0xCD, 0x21, 0x6A, 0xE2, 0x24, 0x27,
+ 0x05, 0x5E, 0x68, 0x35, 0xFD, 0x29, 0xEE, 0xF7,
+ 0x9E, 0x0D, 0x90, 0x77, 0x1F, 0xEA, 0xCE, 0xBE,
+ 0x12, 0xF2, 0x0E, 0x95, 0xB3, 0x63, 0x17, 0x1B,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+#endif /* HAVE_FFDHE_Q */
-#endif /* WOLFSSL_HAVE_MIN */
+const DhParams* wc_Dh_ffdhe3072_Get(void)
+{
+ static const DhParams ffdhe3072 = {
+ #ifdef HAVE_FFDHE_Q
+ dh_ffdhe3072_q, sizeof(dh_ffdhe3072_q),
+ #endif /* HAVE_FFDHE_Q */
+ dh_ffdhe3072_p, sizeof(dh_ffdhe3072_p),
+ dh_ffdhe3072_g, sizeof(dh_ffdhe3072_g)
+ };
+ return &ffdhe3072;
+}
+#endif
+#ifdef HAVE_FFDHE_4096
+static const byte dh_ffdhe4096_p[] = {
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xAD, 0xF8, 0x54, 0x58, 0xA2, 0xBB, 0x4A, 0x9A,
+ 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1,
+ 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95,
+ 0xA9, 0xE1, 0x36, 0x41, 0x14, 0x64, 0x33, 0xFB,
+ 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9,
+ 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8,
+ 0xF6, 0x81, 0xB2, 0x02, 0xAE, 0xC4, 0x61, 0x7A,
+ 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61,
+ 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0,
+ 0x85, 0x63, 0x65, 0x55, 0x3D, 0xED, 0x1A, 0xF3,
+ 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35,
+ 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77,
+ 0xE2, 0xA6, 0x89, 0xDA, 0xF3, 0xEF, 0xE8, 0x72,
+ 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35,
+ 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A,
+ 0xBC, 0x0A, 0xB1, 0x82, 0xB3, 0x24, 0xFB, 0x61,
+ 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB,
+ 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68,
+ 0x1D, 0x4F, 0x42, 0xA3, 0xDE, 0x39, 0x4D, 0xF4,
+ 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19,
+ 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70,
+ 0x9E, 0x02, 0xFC, 0xE1, 0xCD, 0xF7, 0xE2, 0xEC,
+ 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61,
+ 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF,
+ 0x8E, 0x4F, 0x12, 0x32, 0xEE, 0xF2, 0x81, 0x83,
+ 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73,
+ 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05,
+ 0xC5, 0x8E, 0xF1, 0x83, 0x7D, 0x16, 0x83, 0xB2,
+ 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA,
+ 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC,
+ 0xDE, 0x35, 0x5B, 0x3B, 0x65, 0x19, 0x03, 0x5B,
+ 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38,
+ 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07,
+ 0x7A, 0xD9, 0x1D, 0x26, 0x91, 0xF7, 0xF7, 0xEE,
+ 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C,
+ 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70,
+ 0xB4, 0x13, 0x0C, 0x93, 0xBC, 0x43, 0x79, 0x44,
+ 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3,
+ 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF,
+ 0x5C, 0xAE, 0x82, 0xAB, 0x9C, 0x9D, 0xF6, 0x9E,
+ 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D,
+ 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA,
+ 0x1D, 0xBF, 0x9A, 0x42, 0xD5, 0xC4, 0x48, 0x4E,
+ 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF,
+ 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C,
+ 0x25, 0xE4, 0x1D, 0x2B, 0x66, 0x9E, 0x1E, 0xF1,
+ 0x6E, 0x6F, 0x52, 0xC3, 0x16, 0x4D, 0xF4, 0xFB,
+ 0x79, 0x30, 0xE9, 0xE4, 0xE5, 0x88, 0x57, 0xB6,
+ 0xAC, 0x7D, 0x5F, 0x42, 0xD6, 0x9F, 0x6D, 0x18,
+ 0x77, 0x63, 0xCF, 0x1D, 0x55, 0x03, 0x40, 0x04,
+ 0x87, 0xF5, 0x5B, 0xA5, 0x7E, 0x31, 0xCC, 0x7A,
+ 0x71, 0x35, 0xC8, 0x86, 0xEF, 0xB4, 0x31, 0x8A,
+ 0xED, 0x6A, 0x1E, 0x01, 0x2D, 0x9E, 0x68, 0x32,
+ 0xA9, 0x07, 0x60, 0x0A, 0x91, 0x81, 0x30, 0xC4,
+ 0x6D, 0xC7, 0x78, 0xF9, 0x71, 0xAD, 0x00, 0x38,
+ 0x09, 0x29, 0x99, 0xA3, 0x33, 0xCB, 0x8B, 0x7A,
+ 0x1A, 0x1D, 0xB9, 0x3D, 0x71, 0x40, 0x00, 0x3C,
+ 0x2A, 0x4E, 0xCE, 0xA9, 0xF9, 0x8D, 0x0A, 0xCC,
+ 0x0A, 0x82, 0x91, 0xCD, 0xCE, 0xC9, 0x7D, 0xCF,
+ 0x8E, 0xC9, 0xB5, 0x5A, 0x7F, 0x88, 0xA4, 0x6B,
+ 0x4D, 0xB5, 0xA8, 0x51, 0xF4, 0x41, 0x82, 0xE1,
+ 0xC6, 0x8A, 0x00, 0x7E, 0x5E, 0x65, 0x5F, 0x6A,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+static const byte dh_ffdhe4096_g[] = { 0x02 };
+#ifdef HAVE_FFDHE_Q
+static const byte dh_ffdhe4096_q[] = {
+ 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xD6, 0xFC, 0x2A, 0x2C, 0x51, 0x5D, 0xA5, 0x4D,
+ 0x57, 0xEE, 0x2B, 0x10, 0x13, 0x9E, 0x9E, 0x78,
+ 0xEC, 0x5C, 0xE2, 0xC1, 0xE7, 0x16, 0x9B, 0x4A,
+ 0xD4, 0xF0, 0x9B, 0x20, 0x8A, 0x32, 0x19, 0xFD,
+ 0xE6, 0x49, 0xCE, 0xE7, 0x12, 0x4D, 0x9F, 0x7C,
+ 0xBE, 0x97, 0xF1, 0xB1, 0xB1, 0x86, 0x3A, 0xEC,
+ 0x7B, 0x40, 0xD9, 0x01, 0x57, 0x62, 0x30, 0xBD,
+ 0x69, 0xEF, 0x8F, 0x6A, 0xEA, 0xFE, 0xB2, 0xB0,
+ 0x92, 0x19, 0xFA, 0x8F, 0xAF, 0x83, 0x37, 0x68,
+ 0x42, 0xB1, 0xB2, 0xAA, 0x9E, 0xF6, 0x8D, 0x79,
+ 0xDA, 0xAB, 0x89, 0xAF, 0x3F, 0xAB, 0xE4, 0x9A,
+ 0xCC, 0x27, 0x86, 0x38, 0x70, 0x73, 0x45, 0xBB,
+ 0xF1, 0x53, 0x44, 0xED, 0x79, 0xF7, 0xF4, 0x39,
+ 0x0E, 0xF8, 0xAC, 0x50, 0x9B, 0x56, 0xF3, 0x9A,
+ 0x98, 0x56, 0x65, 0x27, 0xA4, 0x1D, 0x3C, 0xBD,
+ 0x5E, 0x05, 0x58, 0xC1, 0x59, 0x92, 0x7D, 0xB0,
+ 0xE8, 0x84, 0x54, 0xA5, 0xD9, 0x64, 0x71, 0xFD,
+ 0xDC, 0xB5, 0x6D, 0x5B, 0xB0, 0x6B, 0xFA, 0x34,
+ 0x0E, 0xA7, 0xA1, 0x51, 0xEF, 0x1C, 0xA6, 0xFA,
+ 0x57, 0x2B, 0x76, 0xF3, 0xB1, 0xB9, 0x5D, 0x8C,
+ 0x85, 0x83, 0xD3, 0xE4, 0x77, 0x05, 0x36, 0xB8,
+ 0x4F, 0x01, 0x7E, 0x70, 0xE6, 0xFB, 0xF1, 0x76,
+ 0x60, 0x1A, 0x02, 0x66, 0x94, 0x1A, 0x17, 0xB0,
+ 0xC8, 0xB9, 0x7F, 0x4E, 0x74, 0xC2, 0xC1, 0xFF,
+ 0xC7, 0x27, 0x89, 0x19, 0x77, 0x79, 0x40, 0xC1,
+ 0xE1, 0xFF, 0x1D, 0x8D, 0xA6, 0x37, 0xD6, 0xB9,
+ 0x9D, 0xDA, 0xFE, 0x5E, 0x17, 0x61, 0x10, 0x02,
+ 0xE2, 0xC7, 0x78, 0xC1, 0xBE, 0x8B, 0x41, 0xD9,
+ 0x63, 0x79, 0xA5, 0x13, 0x60, 0xD9, 0x77, 0xFD,
+ 0x44, 0x35, 0xA1, 0x1C, 0x30, 0x8F, 0xE7, 0xEE,
+ 0x6F, 0x1A, 0xAD, 0x9D, 0xB2, 0x8C, 0x81, 0xAD,
+ 0xDE, 0x1A, 0x7A, 0x6F, 0x7C, 0xCE, 0x01, 0x1C,
+ 0x30, 0xDA, 0x37, 0xE4, 0xEB, 0x73, 0x64, 0x83,
+ 0xBD, 0x6C, 0x8E, 0x93, 0x48, 0xFB, 0xFB, 0xF7,
+ 0x2C, 0xC6, 0x58, 0x7D, 0x60, 0xC3, 0x6C, 0x8E,
+ 0x57, 0x7F, 0x09, 0x84, 0xC2, 0x89, 0xC9, 0x38,
+ 0x5A, 0x09, 0x86, 0x49, 0xDE, 0x21, 0xBC, 0xA2,
+ 0x7A, 0x7E, 0xA2, 0x29, 0x71, 0x6B, 0xA6, 0xE9,
+ 0xB2, 0x79, 0x71, 0x0F, 0x38, 0xFA, 0xA5, 0xFF,
+ 0xAE, 0x57, 0x41, 0x55, 0xCE, 0x4E, 0xFB, 0x4F,
+ 0x74, 0x36, 0x95, 0xE2, 0x91, 0x1B, 0x1D, 0x06,
+ 0xD5, 0xE2, 0x90, 0xCB, 0xCD, 0x86, 0xF5, 0x6D,
+ 0x0E, 0xDF, 0xCD, 0x21, 0x6A, 0xE2, 0x24, 0x27,
+ 0x05, 0x5E, 0x68, 0x35, 0xFD, 0x29, 0xEE, 0xF7,
+ 0x9E, 0x0D, 0x90, 0x77, 0x1F, 0xEA, 0xCE, 0xBE,
+ 0x12, 0xF2, 0x0E, 0x95, 0xB3, 0x4F, 0x0F, 0x78,
+ 0xB7, 0x37, 0xA9, 0x61, 0x8B, 0x26, 0xFA, 0x7D,
+ 0xBC, 0x98, 0x74, 0xF2, 0x72, 0xC4, 0x2B, 0xDB,
+ 0x56, 0x3E, 0xAF, 0xA1, 0x6B, 0x4F, 0xB6, 0x8C,
+ 0x3B, 0xB1, 0xE7, 0x8E, 0xAA, 0x81, 0xA0, 0x02,
+ 0x43, 0xFA, 0xAD, 0xD2, 0xBF, 0x18, 0xE6, 0x3D,
+ 0x38, 0x9A, 0xE4, 0x43, 0x77, 0xDA, 0x18, 0xC5,
+ 0x76, 0xB5, 0x0F, 0x00, 0x96, 0xCF, 0x34, 0x19,
+ 0x54, 0x83, 0xB0, 0x05, 0x48, 0xC0, 0x98, 0x62,
+ 0x36, 0xE3, 0xBC, 0x7C, 0xB8, 0xD6, 0x80, 0x1C,
+ 0x04, 0x94, 0xCC, 0xD1, 0x99, 0xE5, 0xC5, 0xBD,
+ 0x0D, 0x0E, 0xDC, 0x9E, 0xB8, 0xA0, 0x00, 0x1E,
+ 0x15, 0x27, 0x67, 0x54, 0xFC, 0xC6, 0x85, 0x66,
+ 0x05, 0x41, 0x48, 0xE6, 0xE7, 0x64, 0xBE, 0xE7,
+ 0xC7, 0x64, 0xDA, 0xAD, 0x3F, 0xC4, 0x52, 0x35,
+ 0xA6, 0xDA, 0xD4, 0x28, 0xFA, 0x20, 0xC1, 0x70,
+ 0xE3, 0x45, 0x00, 0x3F, 0x2F, 0x32, 0xAF, 0xB5,
+ 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+#endif /* HAVE_FFDHE_Q */
-void wc_InitDhKey(DhKey* key)
+const DhParams* wc_Dh_ffdhe4096_Get(void)
{
- (void)key;
-/* TomsFastMath doesn't use memory allocation */
-#ifndef USE_FAST_MATH
- key->p.dp = 0;
- key->g.dp = 0;
+ static const DhParams ffdhe4096 = {
+ #ifdef HAVE_FFDHE_Q
+ dh_ffdhe4096_q, sizeof(dh_ffdhe4096_q),
+ #endif /* HAVE_FFDHE_Q */
+ dh_ffdhe4096_p, sizeof(dh_ffdhe4096_p),
+ dh_ffdhe4096_g, sizeof(dh_ffdhe4096_g)
+ };
+ return &ffdhe4096;
+}
#endif
+
+#ifdef HAVE_FFDHE_6144
+static const byte dh_ffdhe6144_p[] = {
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xAD, 0xF8, 0x54, 0x58, 0xA2, 0xBB, 0x4A, 0x9A,
+ 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1,
+ 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95,
+ 0xA9, 0xE1, 0x36, 0x41, 0x14, 0x64, 0x33, 0xFB,
+ 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9,
+ 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8,
+ 0xF6, 0x81, 0xB2, 0x02, 0xAE, 0xC4, 0x61, 0x7A,
+ 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61,
+ 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0,
+ 0x85, 0x63, 0x65, 0x55, 0x3D, 0xED, 0x1A, 0xF3,
+ 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35,
+ 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77,
+ 0xE2, 0xA6, 0x89, 0xDA, 0xF3, 0xEF, 0xE8, 0x72,
+ 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35,
+ 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A,
+ 0xBC, 0x0A, 0xB1, 0x82, 0xB3, 0x24, 0xFB, 0x61,
+ 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB,
+ 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68,
+ 0x1D, 0x4F, 0x42, 0xA3, 0xDE, 0x39, 0x4D, 0xF4,
+ 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19,
+ 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70,
+ 0x9E, 0x02, 0xFC, 0xE1, 0xCD, 0xF7, 0xE2, 0xEC,
+ 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61,
+ 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF,
+ 0x8E, 0x4F, 0x12, 0x32, 0xEE, 0xF2, 0x81, 0x83,
+ 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73,
+ 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05,
+ 0xC5, 0x8E, 0xF1, 0x83, 0x7D, 0x16, 0x83, 0xB2,
+ 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA,
+ 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC,
+ 0xDE, 0x35, 0x5B, 0x3B, 0x65, 0x19, 0x03, 0x5B,
+ 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38,
+ 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07,
+ 0x7A, 0xD9, 0x1D, 0x26, 0x91, 0xF7, 0xF7, 0xEE,
+ 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C,
+ 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70,
+ 0xB4, 0x13, 0x0C, 0x93, 0xBC, 0x43, 0x79, 0x44,
+ 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3,
+ 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF,
+ 0x5C, 0xAE, 0x82, 0xAB, 0x9C, 0x9D, 0xF6, 0x9E,
+ 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D,
+ 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA,
+ 0x1D, 0xBF, 0x9A, 0x42, 0xD5, 0xC4, 0x48, 0x4E,
+ 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF,
+ 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C,
+ 0x25, 0xE4, 0x1D, 0x2B, 0x66, 0x9E, 0x1E, 0xF1,
+ 0x6E, 0x6F, 0x52, 0xC3, 0x16, 0x4D, 0xF4, 0xFB,
+ 0x79, 0x30, 0xE9, 0xE4, 0xE5, 0x88, 0x57, 0xB6,
+ 0xAC, 0x7D, 0x5F, 0x42, 0xD6, 0x9F, 0x6D, 0x18,
+ 0x77, 0x63, 0xCF, 0x1D, 0x55, 0x03, 0x40, 0x04,
+ 0x87, 0xF5, 0x5B, 0xA5, 0x7E, 0x31, 0xCC, 0x7A,
+ 0x71, 0x35, 0xC8, 0x86, 0xEF, 0xB4, 0x31, 0x8A,
+ 0xED, 0x6A, 0x1E, 0x01, 0x2D, 0x9E, 0x68, 0x32,
+ 0xA9, 0x07, 0x60, 0x0A, 0x91, 0x81, 0x30, 0xC4,
+ 0x6D, 0xC7, 0x78, 0xF9, 0x71, 0xAD, 0x00, 0x38,
+ 0x09, 0x29, 0x99, 0xA3, 0x33, 0xCB, 0x8B, 0x7A,
+ 0x1A, 0x1D, 0xB9, 0x3D, 0x71, 0x40, 0x00, 0x3C,
+ 0x2A, 0x4E, 0xCE, 0xA9, 0xF9, 0x8D, 0x0A, 0xCC,
+ 0x0A, 0x82, 0x91, 0xCD, 0xCE, 0xC9, 0x7D, 0xCF,
+ 0x8E, 0xC9, 0xB5, 0x5A, 0x7F, 0x88, 0xA4, 0x6B,
+ 0x4D, 0xB5, 0xA8, 0x51, 0xF4, 0x41, 0x82, 0xE1,
+ 0xC6, 0x8A, 0x00, 0x7E, 0x5E, 0x0D, 0xD9, 0x02,
+ 0x0B, 0xFD, 0x64, 0xB6, 0x45, 0x03, 0x6C, 0x7A,
+ 0x4E, 0x67, 0x7D, 0x2C, 0x38, 0x53, 0x2A, 0x3A,
+ 0x23, 0xBA, 0x44, 0x42, 0xCA, 0xF5, 0x3E, 0xA6,
+ 0x3B, 0xB4, 0x54, 0x32, 0x9B, 0x76, 0x24, 0xC8,
+ 0x91, 0x7B, 0xDD, 0x64, 0xB1, 0xC0, 0xFD, 0x4C,
+ 0xB3, 0x8E, 0x8C, 0x33, 0x4C, 0x70, 0x1C, 0x3A,
+ 0xCD, 0xAD, 0x06, 0x57, 0xFC, 0xCF, 0xEC, 0x71,
+ 0x9B, 0x1F, 0x5C, 0x3E, 0x4E, 0x46, 0x04, 0x1F,
+ 0x38, 0x81, 0x47, 0xFB, 0x4C, 0xFD, 0xB4, 0x77,
+ 0xA5, 0x24, 0x71, 0xF7, 0xA9, 0xA9, 0x69, 0x10,
+ 0xB8, 0x55, 0x32, 0x2E, 0xDB, 0x63, 0x40, 0xD8,
+ 0xA0, 0x0E, 0xF0, 0x92, 0x35, 0x05, 0x11, 0xE3,
+ 0x0A, 0xBE, 0xC1, 0xFF, 0xF9, 0xE3, 0xA2, 0x6E,
+ 0x7F, 0xB2, 0x9F, 0x8C, 0x18, 0x30, 0x23, 0xC3,
+ 0x58, 0x7E, 0x38, 0xDA, 0x00, 0x77, 0xD9, 0xB4,
+ 0x76, 0x3E, 0x4E, 0x4B, 0x94, 0xB2, 0xBB, 0xC1,
+ 0x94, 0xC6, 0x65, 0x1E, 0x77, 0xCA, 0xF9, 0x92,
+ 0xEE, 0xAA, 0xC0, 0x23, 0x2A, 0x28, 0x1B, 0xF6,
+ 0xB3, 0xA7, 0x39, 0xC1, 0x22, 0x61, 0x16, 0x82,
+ 0x0A, 0xE8, 0xDB, 0x58, 0x47, 0xA6, 0x7C, 0xBE,
+ 0xF9, 0xC9, 0x09, 0x1B, 0x46, 0x2D, 0x53, 0x8C,
+ 0xD7, 0x2B, 0x03, 0x74, 0x6A, 0xE7, 0x7F, 0x5E,
+ 0x62, 0x29, 0x2C, 0x31, 0x15, 0x62, 0xA8, 0x46,
+ 0x50, 0x5D, 0xC8, 0x2D, 0xB8, 0x54, 0x33, 0x8A,
+ 0xE4, 0x9F, 0x52, 0x35, 0xC9, 0x5B, 0x91, 0x17,
+ 0x8C, 0xCF, 0x2D, 0xD5, 0xCA, 0xCE, 0xF4, 0x03,
+ 0xEC, 0x9D, 0x18, 0x10, 0xC6, 0x27, 0x2B, 0x04,
+ 0x5B, 0x3B, 0x71, 0xF9, 0xDC, 0x6B, 0x80, 0xD6,
+ 0x3F, 0xDD, 0x4A, 0x8E, 0x9A, 0xDB, 0x1E, 0x69,
+ 0x62, 0xA6, 0x95, 0x26, 0xD4, 0x31, 0x61, 0xC1,
+ 0xA4, 0x1D, 0x57, 0x0D, 0x79, 0x38, 0xDA, 0xD4,
+ 0xA4, 0x0E, 0x32, 0x9C, 0xD0, 0xE4, 0x0E, 0x65,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+static const byte dh_ffdhe6144_g[] = { 0x02 };
+#ifdef HAVE_FFDHE_Q
+static const byte dh_ffdhe6144_q[] = {
+ 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xD6, 0xFC, 0x2A, 0x2C, 0x51, 0x5D, 0xA5, 0x4D,
+ 0x57, 0xEE, 0x2B, 0x10, 0x13, 0x9E, 0x9E, 0x78,
+ 0xEC, 0x5C, 0xE2, 0xC1, 0xE7, 0x16, 0x9B, 0x4A,
+ 0xD4, 0xF0, 0x9B, 0x20, 0x8A, 0x32, 0x19, 0xFD,
+ 0xE6, 0x49, 0xCE, 0xE7, 0x12, 0x4D, 0x9F, 0x7C,
+ 0xBE, 0x97, 0xF1, 0xB1, 0xB1, 0x86, 0x3A, 0xEC,
+ 0x7B, 0x40, 0xD9, 0x01, 0x57, 0x62, 0x30, 0xBD,
+ 0x69, 0xEF, 0x8F, 0x6A, 0xEA, 0xFE, 0xB2, 0xB0,
+ 0x92, 0x19, 0xFA, 0x8F, 0xAF, 0x83, 0x37, 0x68,
+ 0x42, 0xB1, 0xB2, 0xAA, 0x9E, 0xF6, 0x8D, 0x79,
+ 0xDA, 0xAB, 0x89, 0xAF, 0x3F, 0xAB, 0xE4, 0x9A,
+ 0xCC, 0x27, 0x86, 0x38, 0x70, 0x73, 0x45, 0xBB,
+ 0xF1, 0x53, 0x44, 0xED, 0x79, 0xF7, 0xF4, 0x39,
+ 0x0E, 0xF8, 0xAC, 0x50, 0x9B, 0x56, 0xF3, 0x9A,
+ 0x98, 0x56, 0x65, 0x27, 0xA4, 0x1D, 0x3C, 0xBD,
+ 0x5E, 0x05, 0x58, 0xC1, 0x59, 0x92, 0x7D, 0xB0,
+ 0xE8, 0x84, 0x54, 0xA5, 0xD9, 0x64, 0x71, 0xFD,
+ 0xDC, 0xB5, 0x6D, 0x5B, 0xB0, 0x6B, 0xFA, 0x34,
+ 0x0E, 0xA7, 0xA1, 0x51, 0xEF, 0x1C, 0xA6, 0xFA,
+ 0x57, 0x2B, 0x76, 0xF3, 0xB1, 0xB9, 0x5D, 0x8C,
+ 0x85, 0x83, 0xD3, 0xE4, 0x77, 0x05, 0x36, 0xB8,
+ 0x4F, 0x01, 0x7E, 0x70, 0xE6, 0xFB, 0xF1, 0x76,
+ 0x60, 0x1A, 0x02, 0x66, 0x94, 0x1A, 0x17, 0xB0,
+ 0xC8, 0xB9, 0x7F, 0x4E, 0x74, 0xC2, 0xC1, 0xFF,
+ 0xC7, 0x27, 0x89, 0x19, 0x77, 0x79, 0x40, 0xC1,
+ 0xE1, 0xFF, 0x1D, 0x8D, 0xA6, 0x37, 0xD6, 0xB9,
+ 0x9D, 0xDA, 0xFE, 0x5E, 0x17, 0x61, 0x10, 0x02,
+ 0xE2, 0xC7, 0x78, 0xC1, 0xBE, 0x8B, 0x41, 0xD9,
+ 0x63, 0x79, 0xA5, 0x13, 0x60, 0xD9, 0x77, 0xFD,
+ 0x44, 0x35, 0xA1, 0x1C, 0x30, 0x8F, 0xE7, 0xEE,
+ 0x6F, 0x1A, 0xAD, 0x9D, 0xB2, 0x8C, 0x81, 0xAD,
+ 0xDE, 0x1A, 0x7A, 0x6F, 0x7C, 0xCE, 0x01, 0x1C,
+ 0x30, 0xDA, 0x37, 0xE4, 0xEB, 0x73, 0x64, 0x83,
+ 0xBD, 0x6C, 0x8E, 0x93, 0x48, 0xFB, 0xFB, 0xF7,
+ 0x2C, 0xC6, 0x58, 0x7D, 0x60, 0xC3, 0x6C, 0x8E,
+ 0x57, 0x7F, 0x09, 0x84, 0xC2, 0x89, 0xC9, 0x38,
+ 0x5A, 0x09, 0x86, 0x49, 0xDE, 0x21, 0xBC, 0xA2,
+ 0x7A, 0x7E, 0xA2, 0x29, 0x71, 0x6B, 0xA6, 0xE9,
+ 0xB2, 0x79, 0x71, 0x0F, 0x38, 0xFA, 0xA5, 0xFF,
+ 0xAE, 0x57, 0x41, 0x55, 0xCE, 0x4E, 0xFB, 0x4F,
+ 0x74, 0x36, 0x95, 0xE2, 0x91, 0x1B, 0x1D, 0x06,
+ 0xD5, 0xE2, 0x90, 0xCB, 0xCD, 0x86, 0xF5, 0x6D,
+ 0x0E, 0xDF, 0xCD, 0x21, 0x6A, 0xE2, 0x24, 0x27,
+ 0x05, 0x5E, 0x68, 0x35, 0xFD, 0x29, 0xEE, 0xF7,
+ 0x9E, 0x0D, 0x90, 0x77, 0x1F, 0xEA, 0xCE, 0xBE,
+ 0x12, 0xF2, 0x0E, 0x95, 0xB3, 0x4F, 0x0F, 0x78,
+ 0xB7, 0x37, 0xA9, 0x61, 0x8B, 0x26, 0xFA, 0x7D,
+ 0xBC, 0x98, 0x74, 0xF2, 0x72, 0xC4, 0x2B, 0xDB,
+ 0x56, 0x3E, 0xAF, 0xA1, 0x6B, 0x4F, 0xB6, 0x8C,
+ 0x3B, 0xB1, 0xE7, 0x8E, 0xAA, 0x81, 0xA0, 0x02,
+ 0x43, 0xFA, 0xAD, 0xD2, 0xBF, 0x18, 0xE6, 0x3D,
+ 0x38, 0x9A, 0xE4, 0x43, 0x77, 0xDA, 0x18, 0xC5,
+ 0x76, 0xB5, 0x0F, 0x00, 0x96, 0xCF, 0x34, 0x19,
+ 0x54, 0x83, 0xB0, 0x05, 0x48, 0xC0, 0x98, 0x62,
+ 0x36, 0xE3, 0xBC, 0x7C, 0xB8, 0xD6, 0x80, 0x1C,
+ 0x04, 0x94, 0xCC, 0xD1, 0x99, 0xE5, 0xC5, 0xBD,
+ 0x0D, 0x0E, 0xDC, 0x9E, 0xB8, 0xA0, 0x00, 0x1E,
+ 0x15, 0x27, 0x67, 0x54, 0xFC, 0xC6, 0x85, 0x66,
+ 0x05, 0x41, 0x48, 0xE6, 0xE7, 0x64, 0xBE, 0xE7,
+ 0xC7, 0x64, 0xDA, 0xAD, 0x3F, 0xC4, 0x52, 0x35,
+ 0xA6, 0xDA, 0xD4, 0x28, 0xFA, 0x20, 0xC1, 0x70,
+ 0xE3, 0x45, 0x00, 0x3F, 0x2F, 0x06, 0xEC, 0x81,
+ 0x05, 0xFE, 0xB2, 0x5B, 0x22, 0x81, 0xB6, 0x3D,
+ 0x27, 0x33, 0xBE, 0x96, 0x1C, 0x29, 0x95, 0x1D,
+ 0x11, 0xDD, 0x22, 0x21, 0x65, 0x7A, 0x9F, 0x53,
+ 0x1D, 0xDA, 0x2A, 0x19, 0x4D, 0xBB, 0x12, 0x64,
+ 0x48, 0xBD, 0xEE, 0xB2, 0x58, 0xE0, 0x7E, 0xA6,
+ 0x59, 0xC7, 0x46, 0x19, 0xA6, 0x38, 0x0E, 0x1D,
+ 0x66, 0xD6, 0x83, 0x2B, 0xFE, 0x67, 0xF6, 0x38,
+ 0xCD, 0x8F, 0xAE, 0x1F, 0x27, 0x23, 0x02, 0x0F,
+ 0x9C, 0x40, 0xA3, 0xFD, 0xA6, 0x7E, 0xDA, 0x3B,
+ 0xD2, 0x92, 0x38, 0xFB, 0xD4, 0xD4, 0xB4, 0x88,
+ 0x5C, 0x2A, 0x99, 0x17, 0x6D, 0xB1, 0xA0, 0x6C,
+ 0x50, 0x07, 0x78, 0x49, 0x1A, 0x82, 0x88, 0xF1,
+ 0x85, 0x5F, 0x60, 0xFF, 0xFC, 0xF1, 0xD1, 0x37,
+ 0x3F, 0xD9, 0x4F, 0xC6, 0x0C, 0x18, 0x11, 0xE1,
+ 0xAC, 0x3F, 0x1C, 0x6D, 0x00, 0x3B, 0xEC, 0xDA,
+ 0x3B, 0x1F, 0x27, 0x25, 0xCA, 0x59, 0x5D, 0xE0,
+ 0xCA, 0x63, 0x32, 0x8F, 0x3B, 0xE5, 0x7C, 0xC9,
+ 0x77, 0x55, 0x60, 0x11, 0x95, 0x14, 0x0D, 0xFB,
+ 0x59, 0xD3, 0x9C, 0xE0, 0x91, 0x30, 0x8B, 0x41,
+ 0x05, 0x74, 0x6D, 0xAC, 0x23, 0xD3, 0x3E, 0x5F,
+ 0x7C, 0xE4, 0x84, 0x8D, 0xA3, 0x16, 0xA9, 0xC6,
+ 0x6B, 0x95, 0x81, 0xBA, 0x35, 0x73, 0xBF, 0xAF,
+ 0x31, 0x14, 0x96, 0x18, 0x8A, 0xB1, 0x54, 0x23,
+ 0x28, 0x2E, 0xE4, 0x16, 0xDC, 0x2A, 0x19, 0xC5,
+ 0x72, 0x4F, 0xA9, 0x1A, 0xE4, 0xAD, 0xC8, 0x8B,
+ 0xC6, 0x67, 0x96, 0xEA, 0xE5, 0x67, 0x7A, 0x01,
+ 0xF6, 0x4E, 0x8C, 0x08, 0x63, 0x13, 0x95, 0x82,
+ 0x2D, 0x9D, 0xB8, 0xFC, 0xEE, 0x35, 0xC0, 0x6B,
+ 0x1F, 0xEE, 0xA5, 0x47, 0x4D, 0x6D, 0x8F, 0x34,
+ 0xB1, 0x53, 0x4A, 0x93, 0x6A, 0x18, 0xB0, 0xE0,
+ 0xD2, 0x0E, 0xAB, 0x86, 0xBC, 0x9C, 0x6D, 0x6A,
+ 0x52, 0x07, 0x19, 0x4E, 0x68, 0x72, 0x07, 0x32,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+#endif /* HAVE_FFDHE_Q */
+
+const DhParams* wc_Dh_ffdhe6144_Get(void)
+{
+ static const DhParams ffdhe6144 = {
+ #ifdef HAVE_FFDHE_Q
+ dh_ffdhe6144_q, sizeof(dh_ffdhe6144_q),
+ #endif /* HAVE_FFDHE_Q */
+ dh_ffdhe6144_p, sizeof(dh_ffdhe6144_p),
+ dh_ffdhe6144_g, sizeof(dh_ffdhe6144_g)
+ };
+ return &ffdhe6144;
}
+#endif
+#ifdef HAVE_FFDHE_8192
+static const byte dh_ffdhe8192_p[] = {
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xAD, 0xF8, 0x54, 0x58, 0xA2, 0xBB, 0x4A, 0x9A,
+ 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1,
+ 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95,
+ 0xA9, 0xE1, 0x36, 0x41, 0x14, 0x64, 0x33, 0xFB,
+ 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9,
+ 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8,
+ 0xF6, 0x81, 0xB2, 0x02, 0xAE, 0xC4, 0x61, 0x7A,
+ 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61,
+ 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0,
+ 0x85, 0x63, 0x65, 0x55, 0x3D, 0xED, 0x1A, 0xF3,
+ 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35,
+ 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77,
+ 0xE2, 0xA6, 0x89, 0xDA, 0xF3, 0xEF, 0xE8, 0x72,
+ 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35,
+ 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A,
+ 0xBC, 0x0A, 0xB1, 0x82, 0xB3, 0x24, 0xFB, 0x61,
+ 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB,
+ 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68,
+ 0x1D, 0x4F, 0x42, 0xA3, 0xDE, 0x39, 0x4D, 0xF4,
+ 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19,
+ 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70,
+ 0x9E, 0x02, 0xFC, 0xE1, 0xCD, 0xF7, 0xE2, 0xEC,
+ 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61,
+ 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF,
+ 0x8E, 0x4F, 0x12, 0x32, 0xEE, 0xF2, 0x81, 0x83,
+ 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73,
+ 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05,
+ 0xC5, 0x8E, 0xF1, 0x83, 0x7D, 0x16, 0x83, 0xB2,
+ 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA,
+ 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC,
+ 0xDE, 0x35, 0x5B, 0x3B, 0x65, 0x19, 0x03, 0x5B,
+ 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38,
+ 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07,
+ 0x7A, 0xD9, 0x1D, 0x26, 0x91, 0xF7, 0xF7, 0xEE,
+ 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C,
+ 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70,
+ 0xB4, 0x13, 0x0C, 0x93, 0xBC, 0x43, 0x79, 0x44,
+ 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3,
+ 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF,
+ 0x5C, 0xAE, 0x82, 0xAB, 0x9C, 0x9D, 0xF6, 0x9E,
+ 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D,
+ 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA,
+ 0x1D, 0xBF, 0x9A, 0x42, 0xD5, 0xC4, 0x48, 0x4E,
+ 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF,
+ 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C,
+ 0x25, 0xE4, 0x1D, 0x2B, 0x66, 0x9E, 0x1E, 0xF1,
+ 0x6E, 0x6F, 0x52, 0xC3, 0x16, 0x4D, 0xF4, 0xFB,
+ 0x79, 0x30, 0xE9, 0xE4, 0xE5, 0x88, 0x57, 0xB6,
+ 0xAC, 0x7D, 0x5F, 0x42, 0xD6, 0x9F, 0x6D, 0x18,
+ 0x77, 0x63, 0xCF, 0x1D, 0x55, 0x03, 0x40, 0x04,
+ 0x87, 0xF5, 0x5B, 0xA5, 0x7E, 0x31, 0xCC, 0x7A,
+ 0x71, 0x35, 0xC8, 0x86, 0xEF, 0xB4, 0x31, 0x8A,
+ 0xED, 0x6A, 0x1E, 0x01, 0x2D, 0x9E, 0x68, 0x32,
+ 0xA9, 0x07, 0x60, 0x0A, 0x91, 0x81, 0x30, 0xC4,
+ 0x6D, 0xC7, 0x78, 0xF9, 0x71, 0xAD, 0x00, 0x38,
+ 0x09, 0x29, 0x99, 0xA3, 0x33, 0xCB, 0x8B, 0x7A,
+ 0x1A, 0x1D, 0xB9, 0x3D, 0x71, 0x40, 0x00, 0x3C,
+ 0x2A, 0x4E, 0xCE, 0xA9, 0xF9, 0x8D, 0x0A, 0xCC,
+ 0x0A, 0x82, 0x91, 0xCD, 0xCE, 0xC9, 0x7D, 0xCF,
+ 0x8E, 0xC9, 0xB5, 0x5A, 0x7F, 0x88, 0xA4, 0x6B,
+ 0x4D, 0xB5, 0xA8, 0x51, 0xF4, 0x41, 0x82, 0xE1,
+ 0xC6, 0x8A, 0x00, 0x7E, 0x5E, 0x0D, 0xD9, 0x02,
+ 0x0B, 0xFD, 0x64, 0xB6, 0x45, 0x03, 0x6C, 0x7A,
+ 0x4E, 0x67, 0x7D, 0x2C, 0x38, 0x53, 0x2A, 0x3A,
+ 0x23, 0xBA, 0x44, 0x42, 0xCA, 0xF5, 0x3E, 0xA6,
+ 0x3B, 0xB4, 0x54, 0x32, 0x9B, 0x76, 0x24, 0xC8,
+ 0x91, 0x7B, 0xDD, 0x64, 0xB1, 0xC0, 0xFD, 0x4C,
+ 0xB3, 0x8E, 0x8C, 0x33, 0x4C, 0x70, 0x1C, 0x3A,
+ 0xCD, 0xAD, 0x06, 0x57, 0xFC, 0xCF, 0xEC, 0x71,
+ 0x9B, 0x1F, 0x5C, 0x3E, 0x4E, 0x46, 0x04, 0x1F,
+ 0x38, 0x81, 0x47, 0xFB, 0x4C, 0xFD, 0xB4, 0x77,
+ 0xA5, 0x24, 0x71, 0xF7, 0xA9, 0xA9, 0x69, 0x10,
+ 0xB8, 0x55, 0x32, 0x2E, 0xDB, 0x63, 0x40, 0xD8,
+ 0xA0, 0x0E, 0xF0, 0x92, 0x35, 0x05, 0x11, 0xE3,
+ 0x0A, 0xBE, 0xC1, 0xFF, 0xF9, 0xE3, 0xA2, 0x6E,
+ 0x7F, 0xB2, 0x9F, 0x8C, 0x18, 0x30, 0x23, 0xC3,
+ 0x58, 0x7E, 0x38, 0xDA, 0x00, 0x77, 0xD9, 0xB4,
+ 0x76, 0x3E, 0x4E, 0x4B, 0x94, 0xB2, 0xBB, 0xC1,
+ 0x94, 0xC6, 0x65, 0x1E, 0x77, 0xCA, 0xF9, 0x92,
+ 0xEE, 0xAA, 0xC0, 0x23, 0x2A, 0x28, 0x1B, 0xF6,
+ 0xB3, 0xA7, 0x39, 0xC1, 0x22, 0x61, 0x16, 0x82,
+ 0x0A, 0xE8, 0xDB, 0x58, 0x47, 0xA6, 0x7C, 0xBE,
+ 0xF9, 0xC9, 0x09, 0x1B, 0x46, 0x2D, 0x53, 0x8C,
+ 0xD7, 0x2B, 0x03, 0x74, 0x6A, 0xE7, 0x7F, 0x5E,
+ 0x62, 0x29, 0x2C, 0x31, 0x15, 0x62, 0xA8, 0x46,
+ 0x50, 0x5D, 0xC8, 0x2D, 0xB8, 0x54, 0x33, 0x8A,
+ 0xE4, 0x9F, 0x52, 0x35, 0xC9, 0x5B, 0x91, 0x17,
+ 0x8C, 0xCF, 0x2D, 0xD5, 0xCA, 0xCE, 0xF4, 0x03,
+ 0xEC, 0x9D, 0x18, 0x10, 0xC6, 0x27, 0x2B, 0x04,
+ 0x5B, 0x3B, 0x71, 0xF9, 0xDC, 0x6B, 0x80, 0xD6,
+ 0x3F, 0xDD, 0x4A, 0x8E, 0x9A, 0xDB, 0x1E, 0x69,
+ 0x62, 0xA6, 0x95, 0x26, 0xD4, 0x31, 0x61, 0xC1,
+ 0xA4, 0x1D, 0x57, 0x0D, 0x79, 0x38, 0xDA, 0xD4,
+ 0xA4, 0x0E, 0x32, 0x9C, 0xCF, 0xF4, 0x6A, 0xAA,
+ 0x36, 0xAD, 0x00, 0x4C, 0xF6, 0x00, 0xC8, 0x38,
+ 0x1E, 0x42, 0x5A, 0x31, 0xD9, 0x51, 0xAE, 0x64,
+ 0xFD, 0xB2, 0x3F, 0xCE, 0xC9, 0x50, 0x9D, 0x43,
+ 0x68, 0x7F, 0xEB, 0x69, 0xED, 0xD1, 0xCC, 0x5E,
+ 0x0B, 0x8C, 0xC3, 0xBD, 0xF6, 0x4B, 0x10, 0xEF,
+ 0x86, 0xB6, 0x31, 0x42, 0xA3, 0xAB, 0x88, 0x29,
+ 0x55, 0x5B, 0x2F, 0x74, 0x7C, 0x93, 0x26, 0x65,
+ 0xCB, 0x2C, 0x0F, 0x1C, 0xC0, 0x1B, 0xD7, 0x02,
+ 0x29, 0x38, 0x88, 0x39, 0xD2, 0xAF, 0x05, 0xE4,
+ 0x54, 0x50, 0x4A, 0xC7, 0x8B, 0x75, 0x82, 0x82,
+ 0x28, 0x46, 0xC0, 0xBA, 0x35, 0xC3, 0x5F, 0x5C,
+ 0x59, 0x16, 0x0C, 0xC0, 0x46, 0xFD, 0x82, 0x51,
+ 0x54, 0x1F, 0xC6, 0x8C, 0x9C, 0x86, 0xB0, 0x22,
+ 0xBB, 0x70, 0x99, 0x87, 0x6A, 0x46, 0x0E, 0x74,
+ 0x51, 0xA8, 0xA9, 0x31, 0x09, 0x70, 0x3F, 0xEE,
+ 0x1C, 0x21, 0x7E, 0x6C, 0x38, 0x26, 0xE5, 0x2C,
+ 0x51, 0xAA, 0x69, 0x1E, 0x0E, 0x42, 0x3C, 0xFC,
+ 0x99, 0xE9, 0xE3, 0x16, 0x50, 0xC1, 0x21, 0x7B,
+ 0x62, 0x48, 0x16, 0xCD, 0xAD, 0x9A, 0x95, 0xF9,
+ 0xD5, 0xB8, 0x01, 0x94, 0x88, 0xD9, 0xC0, 0xA0,
+ 0xA1, 0xFE, 0x30, 0x75, 0xA5, 0x77, 0xE2, 0x31,
+ 0x83, 0xF8, 0x1D, 0x4A, 0x3F, 0x2F, 0xA4, 0x57,
+ 0x1E, 0xFC, 0x8C, 0xE0, 0xBA, 0x8A, 0x4F, 0xE8,
+ 0xB6, 0x85, 0x5D, 0xFE, 0x72, 0xB0, 0xA6, 0x6E,
+ 0xDE, 0xD2, 0xFB, 0xAB, 0xFB, 0xE5, 0x8A, 0x30,
+ 0xFA, 0xFA, 0xBE, 0x1C, 0x5D, 0x71, 0xA8, 0x7E,
+ 0x2F, 0x74, 0x1E, 0xF8, 0xC1, 0xFE, 0x86, 0xFE,
+ 0xA6, 0xBB, 0xFD, 0xE5, 0x30, 0x67, 0x7F, 0x0D,
+ 0x97, 0xD1, 0x1D, 0x49, 0xF7, 0xA8, 0x44, 0x3D,
+ 0x08, 0x22, 0xE5, 0x06, 0xA9, 0xF4, 0x61, 0x4E,
+ 0x01, 0x1E, 0x2A, 0x94, 0x83, 0x8F, 0xF8, 0x8C,
+ 0xD6, 0x8C, 0x8B, 0xB7, 0xC5, 0xC6, 0x42, 0x4C,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+static const byte dh_ffdhe8192_g[] = { 0x02 };
+#ifdef HAVE_FFDHE_Q
+static const byte dh_ffdhe8192_q[] = {
+ 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xD6, 0xFC, 0x2A, 0x2C, 0x51, 0x5D, 0xA5, 0x4D,
+ 0x57, 0xEE, 0x2B, 0x10, 0x13, 0x9E, 0x9E, 0x78,
+ 0xEC, 0x5C, 0xE2, 0xC1, 0xE7, 0x16, 0x9B, 0x4A,
+ 0xD4, 0xF0, 0x9B, 0x20, 0x8A, 0x32, 0x19, 0xFD,
+ 0xE6, 0x49, 0xCE, 0xE7, 0x12, 0x4D, 0x9F, 0x7C,
+ 0xBE, 0x97, 0xF1, 0xB1, 0xB1, 0x86, 0x3A, 0xEC,
+ 0x7B, 0x40, 0xD9, 0x01, 0x57, 0x62, 0x30, 0xBD,
+ 0x69, 0xEF, 0x8F, 0x6A, 0xEA, 0xFE, 0xB2, 0xB0,
+ 0x92, 0x19, 0xFA, 0x8F, 0xAF, 0x83, 0x37, 0x68,
+ 0x42, 0xB1, 0xB2, 0xAA, 0x9E, 0xF6, 0x8D, 0x79,
+ 0xDA, 0xAB, 0x89, 0xAF, 0x3F, 0xAB, 0xE4, 0x9A,
+ 0xCC, 0x27, 0x86, 0x38, 0x70, 0x73, 0x45, 0xBB,
+ 0xF1, 0x53, 0x44, 0xED, 0x79, 0xF7, 0xF4, 0x39,
+ 0x0E, 0xF8, 0xAC, 0x50, 0x9B, 0x56, 0xF3, 0x9A,
+ 0x98, 0x56, 0x65, 0x27, 0xA4, 0x1D, 0x3C, 0xBD,
+ 0x5E, 0x05, 0x58, 0xC1, 0x59, 0x92, 0x7D, 0xB0,
+ 0xE8, 0x84, 0x54, 0xA5, 0xD9, 0x64, 0x71, 0xFD,
+ 0xDC, 0xB5, 0x6D, 0x5B, 0xB0, 0x6B, 0xFA, 0x34,
+ 0x0E, 0xA7, 0xA1, 0x51, 0xEF, 0x1C, 0xA6, 0xFA,
+ 0x57, 0x2B, 0x76, 0xF3, 0xB1, 0xB9, 0x5D, 0x8C,
+ 0x85, 0x83, 0xD3, 0xE4, 0x77, 0x05, 0x36, 0xB8,
+ 0x4F, 0x01, 0x7E, 0x70, 0xE6, 0xFB, 0xF1, 0x76,
+ 0x60, 0x1A, 0x02, 0x66, 0x94, 0x1A, 0x17, 0xB0,
+ 0xC8, 0xB9, 0x7F, 0x4E, 0x74, 0xC2, 0xC1, 0xFF,
+ 0xC7, 0x27, 0x89, 0x19, 0x77, 0x79, 0x40, 0xC1,
+ 0xE1, 0xFF, 0x1D, 0x8D, 0xA6, 0x37, 0xD6, 0xB9,
+ 0x9D, 0xDA, 0xFE, 0x5E, 0x17, 0x61, 0x10, 0x02,
+ 0xE2, 0xC7, 0x78, 0xC1, 0xBE, 0x8B, 0x41, 0xD9,
+ 0x63, 0x79, 0xA5, 0x13, 0x60, 0xD9, 0x77, 0xFD,
+ 0x44, 0x35, 0xA1, 0x1C, 0x30, 0x8F, 0xE7, 0xEE,
+ 0x6F, 0x1A, 0xAD, 0x9D, 0xB2, 0x8C, 0x81, 0xAD,
+ 0xDE, 0x1A, 0x7A, 0x6F, 0x7C, 0xCE, 0x01, 0x1C,
+ 0x30, 0xDA, 0x37, 0xE4, 0xEB, 0x73, 0x64, 0x83,
+ 0xBD, 0x6C, 0x8E, 0x93, 0x48, 0xFB, 0xFB, 0xF7,
+ 0x2C, 0xC6, 0x58, 0x7D, 0x60, 0xC3, 0x6C, 0x8E,
+ 0x57, 0x7F, 0x09, 0x84, 0xC2, 0x89, 0xC9, 0x38,
+ 0x5A, 0x09, 0x86, 0x49, 0xDE, 0x21, 0xBC, 0xA2,
+ 0x7A, 0x7E, 0xA2, 0x29, 0x71, 0x6B, 0xA6, 0xE9,
+ 0xB2, 0x79, 0x71, 0x0F, 0x38, 0xFA, 0xA5, 0xFF,
+ 0xAE, 0x57, 0x41, 0x55, 0xCE, 0x4E, 0xFB, 0x4F,
+ 0x74, 0x36, 0x95, 0xE2, 0x91, 0x1B, 0x1D, 0x06,
+ 0xD5, 0xE2, 0x90, 0xCB, 0xCD, 0x86, 0xF5, 0x6D,
+ 0x0E, 0xDF, 0xCD, 0x21, 0x6A, 0xE2, 0x24, 0x27,
+ 0x05, 0x5E, 0x68, 0x35, 0xFD, 0x29, 0xEE, 0xF7,
+ 0x9E, 0x0D, 0x90, 0x77, 0x1F, 0xEA, 0xCE, 0xBE,
+ 0x12, 0xF2, 0x0E, 0x95, 0xB3, 0x4F, 0x0F, 0x78,
+ 0xB7, 0x37, 0xA9, 0x61, 0x8B, 0x26, 0xFA, 0x7D,
+ 0xBC, 0x98, 0x74, 0xF2, 0x72, 0xC4, 0x2B, 0xDB,
+ 0x56, 0x3E, 0xAF, 0xA1, 0x6B, 0x4F, 0xB6, 0x8C,
+ 0x3B, 0xB1, 0xE7, 0x8E, 0xAA, 0x81, 0xA0, 0x02,
+ 0x43, 0xFA, 0xAD, 0xD2, 0xBF, 0x18, 0xE6, 0x3D,
+ 0x38, 0x9A, 0xE4, 0x43, 0x77, 0xDA, 0x18, 0xC5,
+ 0x76, 0xB5, 0x0F, 0x00, 0x96, 0xCF, 0x34, 0x19,
+ 0x54, 0x83, 0xB0, 0x05, 0x48, 0xC0, 0x98, 0x62,
+ 0x36, 0xE3, 0xBC, 0x7C, 0xB8, 0xD6, 0x80, 0x1C,
+ 0x04, 0x94, 0xCC, 0xD1, 0x99, 0xE5, 0xC5, 0xBD,
+ 0x0D, 0x0E, 0xDC, 0x9E, 0xB8, 0xA0, 0x00, 0x1E,
+ 0x15, 0x27, 0x67, 0x54, 0xFC, 0xC6, 0x85, 0x66,
+ 0x05, 0x41, 0x48, 0xE6, 0xE7, 0x64, 0xBE, 0xE7,
+ 0xC7, 0x64, 0xDA, 0xAD, 0x3F, 0xC4, 0x52, 0x35,
+ 0xA6, 0xDA, 0xD4, 0x28, 0xFA, 0x20, 0xC1, 0x70,
+ 0xE3, 0x45, 0x00, 0x3F, 0x2F, 0x06, 0xEC, 0x81,
+ 0x05, 0xFE, 0xB2, 0x5B, 0x22, 0x81, 0xB6, 0x3D,
+ 0x27, 0x33, 0xBE, 0x96, 0x1C, 0x29, 0x95, 0x1D,
+ 0x11, 0xDD, 0x22, 0x21, 0x65, 0x7A, 0x9F, 0x53,
+ 0x1D, 0xDA, 0x2A, 0x19, 0x4D, 0xBB, 0x12, 0x64,
+ 0x48, 0xBD, 0xEE, 0xB2, 0x58, 0xE0, 0x7E, 0xA6,
+ 0x59, 0xC7, 0x46, 0x19, 0xA6, 0x38, 0x0E, 0x1D,
+ 0x66, 0xD6, 0x83, 0x2B, 0xFE, 0x67, 0xF6, 0x38,
+ 0xCD, 0x8F, 0xAE, 0x1F, 0x27, 0x23, 0x02, 0x0F,
+ 0x9C, 0x40, 0xA3, 0xFD, 0xA6, 0x7E, 0xDA, 0x3B,
+ 0xD2, 0x92, 0x38, 0xFB, 0xD4, 0xD4, 0xB4, 0x88,
+ 0x5C, 0x2A, 0x99, 0x17, 0x6D, 0xB1, 0xA0, 0x6C,
+ 0x50, 0x07, 0x78, 0x49, 0x1A, 0x82, 0x88, 0xF1,
+ 0x85, 0x5F, 0x60, 0xFF, 0xFC, 0xF1, 0xD1, 0x37,
+ 0x3F, 0xD9, 0x4F, 0xC6, 0x0C, 0x18, 0x11, 0xE1,
+ 0xAC, 0x3F, 0x1C, 0x6D, 0x00, 0x3B, 0xEC, 0xDA,
+ 0x3B, 0x1F, 0x27, 0x25, 0xCA, 0x59, 0x5D, 0xE0,
+ 0xCA, 0x63, 0x32, 0x8F, 0x3B, 0xE5, 0x7C, 0xC9,
+ 0x77, 0x55, 0x60, 0x11, 0x95, 0x14, 0x0D, 0xFB,
+ 0x59, 0xD3, 0x9C, 0xE0, 0x91, 0x30, 0x8B, 0x41,
+ 0x05, 0x74, 0x6D, 0xAC, 0x23, 0xD3, 0x3E, 0x5F,
+ 0x7C, 0xE4, 0x84, 0x8D, 0xA3, 0x16, 0xA9, 0xC6,
+ 0x6B, 0x95, 0x81, 0xBA, 0x35, 0x73, 0xBF, 0xAF,
+ 0x31, 0x14, 0x96, 0x18, 0x8A, 0xB1, 0x54, 0x23,
+ 0x28, 0x2E, 0xE4, 0x16, 0xDC, 0x2A, 0x19, 0xC5,
+ 0x72, 0x4F, 0xA9, 0x1A, 0xE4, 0xAD, 0xC8, 0x8B,
+ 0xC6, 0x67, 0x96, 0xEA, 0xE5, 0x67, 0x7A, 0x01,
+ 0xF6, 0x4E, 0x8C, 0x08, 0x63, 0x13, 0x95, 0x82,
+ 0x2D, 0x9D, 0xB8, 0xFC, 0xEE, 0x35, 0xC0, 0x6B,
+ 0x1F, 0xEE, 0xA5, 0x47, 0x4D, 0x6D, 0x8F, 0x34,
+ 0xB1, 0x53, 0x4A, 0x93, 0x6A, 0x18, 0xB0, 0xE0,
+ 0xD2, 0x0E, 0xAB, 0x86, 0xBC, 0x9C, 0x6D, 0x6A,
+ 0x52, 0x07, 0x19, 0x4E, 0x67, 0xFA, 0x35, 0x55,
+ 0x1B, 0x56, 0x80, 0x26, 0x7B, 0x00, 0x64, 0x1C,
+ 0x0F, 0x21, 0x2D, 0x18, 0xEC, 0xA8, 0xD7, 0x32,
+ 0x7E, 0xD9, 0x1F, 0xE7, 0x64, 0xA8, 0x4E, 0xA1,
+ 0xB4, 0x3F, 0xF5, 0xB4, 0xF6, 0xE8, 0xE6, 0x2F,
+ 0x05, 0xC6, 0x61, 0xDE, 0xFB, 0x25, 0x88, 0x77,
+ 0xC3, 0x5B, 0x18, 0xA1, 0x51, 0xD5, 0xC4, 0x14,
+ 0xAA, 0xAD, 0x97, 0xBA, 0x3E, 0x49, 0x93, 0x32,
+ 0xE5, 0x96, 0x07, 0x8E, 0x60, 0x0D, 0xEB, 0x81,
+ 0x14, 0x9C, 0x44, 0x1C, 0xE9, 0x57, 0x82, 0xF2,
+ 0x2A, 0x28, 0x25, 0x63, 0xC5, 0xBA, 0xC1, 0x41,
+ 0x14, 0x23, 0x60, 0x5D, 0x1A, 0xE1, 0xAF, 0xAE,
+ 0x2C, 0x8B, 0x06, 0x60, 0x23, 0x7E, 0xC1, 0x28,
+ 0xAA, 0x0F, 0xE3, 0x46, 0x4E, 0x43, 0x58, 0x11,
+ 0x5D, 0xB8, 0x4C, 0xC3, 0xB5, 0x23, 0x07, 0x3A,
+ 0x28, 0xD4, 0x54, 0x98, 0x84, 0xB8, 0x1F, 0xF7,
+ 0x0E, 0x10, 0xBF, 0x36, 0x1C, 0x13, 0x72, 0x96,
+ 0x28, 0xD5, 0x34, 0x8F, 0x07, 0x21, 0x1E, 0x7E,
+ 0x4C, 0xF4, 0xF1, 0x8B, 0x28, 0x60, 0x90, 0xBD,
+ 0xB1, 0x24, 0x0B, 0x66, 0xD6, 0xCD, 0x4A, 0xFC,
+ 0xEA, 0xDC, 0x00, 0xCA, 0x44, 0x6C, 0xE0, 0x50,
+ 0x50, 0xFF, 0x18, 0x3A, 0xD2, 0xBB, 0xF1, 0x18,
+ 0xC1, 0xFC, 0x0E, 0xA5, 0x1F, 0x97, 0xD2, 0x2B,
+ 0x8F, 0x7E, 0x46, 0x70, 0x5D, 0x45, 0x27, 0xF4,
+ 0x5B, 0x42, 0xAE, 0xFF, 0x39, 0x58, 0x53, 0x37,
+ 0x6F, 0x69, 0x7D, 0xD5, 0xFD, 0xF2, 0xC5, 0x18,
+ 0x7D, 0x7D, 0x5F, 0x0E, 0x2E, 0xB8, 0xD4, 0x3F,
+ 0x17, 0xBA, 0x0F, 0x7C, 0x60, 0xFF, 0x43, 0x7F,
+ 0x53, 0x5D, 0xFE, 0xF2, 0x98, 0x33, 0xBF, 0x86,
+ 0xCB, 0xE8, 0x8E, 0xA4, 0xFB, 0xD4, 0x22, 0x1E,
+ 0x84, 0x11, 0x72, 0x83, 0x54, 0xFA, 0x30, 0xA7,
+ 0x00, 0x8F, 0x15, 0x4A, 0x41, 0xC7, 0xFC, 0x46,
+ 0x6B, 0x46, 0x45, 0xDB, 0xE2, 0xE3, 0x21, 0x26,
+ 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+#endif /* HAVE_FFDHE_Q */
-void wc_FreeDhKey(DhKey* key)
+const DhParams* wc_Dh_ffdhe8192_Get(void)
{
- (void)key;
-/* TomsFastMath doesn't use memory allocation */
-#ifndef USE_FAST_MATH
- mp_clear(&key->p);
- mp_clear(&key->g);
+ static const DhParams ffdhe8192 = {
+ #ifdef HAVE_FFDHE_Q
+ dh_ffdhe8192_q, sizeof(dh_ffdhe8192_q),
+ #endif /* HAVE_FFDHE_Q */
+ dh_ffdhe8192_p, sizeof(dh_ffdhe8192_p),
+ dh_ffdhe8192_g, sizeof(dh_ffdhe8192_g)
+ };
+ return &ffdhe8192;
+}
#endif
+
+int wc_InitDhKey_ex(DhKey* key, void* heap, int devId)
+{
+ int ret = 0;
+
+ if (key == NULL)
+ return BAD_FUNC_ARG;
+
+ key->heap = heap; /* for XMALLOC/XFREE in future */
+
+#if !defined(WOLFSSL_QT) && !defined(OPENSSL_ALL)
+ if (mp_init_multi(&key->p, &key->g, &key->q, NULL, NULL, NULL) != MP_OKAY)
+#else
+ if (mp_init_multi(&key->p,&key->g,&key->q,&key->pub,&key->priv,NULL) != MP_OKAY)
+#endif
+ return MEMORY_E;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH)
+ /* handle as async */
+ ret = wolfAsync_DevCtxInit(&key->asyncDev, WOLFSSL_ASYNC_MARKER_DH,
+ key->heap, devId);
+#else
+ (void)devId;
+#endif
+
+ return ret;
+}
+
+int wc_InitDhKey(DhKey* key)
+{
+ return wc_InitDhKey_ex(key, NULL, INVALID_DEVID);
}
-static word32 DiscreteLogWorkFactor(word32 n)
+int wc_FreeDhKey(DhKey* key)
{
- /* assuming discrete log takes about the same time as factoring */
- if (n<5)
- return 0;
- else
- return (word32)(2.4 * XPOW((double)n, 1.0/3.0) *
- XPOW(XLOG((double)n), 2.0/3.0) - 5);
+ if (key) {
+ mp_clear(&key->p);
+ mp_clear(&key->g);
+ mp_clear(&key->q);
+
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH)
+ wolfAsync_DevCtxFree(&key->asyncDev, WOLFSSL_ASYNC_MARKER_DH);
+ #endif
+ }
+ return 0;
}
-static int GeneratePrivate(DhKey* key, RNG* rng, byte* priv, word32* privSz)
+#ifndef WC_NO_RNG
+/* if defined to not use floating point values do not compile in */
+#ifndef WOLFSSL_DH_CONST
+ static word32 DiscreteLogWorkFactor(word32 n)
+ {
+ /* assuming discrete log takes about the same time as factoring */
+ if (n < 5)
+ return 0;
+ else
+ return (word32)(2.4 * XPOW((double)n, 1.0/3.0) *
+ XPOW(XLOG((double)n), 2.0/3.0) - 5);
+ }
+#endif /* WOLFSSL_DH_CONST*/
+
+
+/* if not using fixed points use DiscreteLogWorkFactor function for unusual size
+ otherwise round up on size needed */
+#ifndef WOLFSSL_DH_CONST
+ #define WOLFSSL_DH_ROUND(x)
+#else
+ #define WOLFSSL_DH_ROUND(x) \
+ do { \
+ if (x % 128) { \
+ x &= 0xffffff80;\
+ x += 128; \
+ } \
+ } \
+ while (0)
+#endif
+
+
+#ifndef WOLFSSL_NO_DH186
+/* validate that (L,N) match allowed sizes from SP 800-56A, Section 5.5.1.1.
+ * modLen - represents L, the size of p in bits
+ * divLen - represents N, the size of q in bits
+ * return 0 on success, -1 on error */
+static int CheckDhLN(int modLen, int divLen)
{
- int ret;
- word32 sz = mp_unsigned_bin_size(&key->p);
- sz = min(sz, 2 * DiscreteLogWorkFactor(sz * WOLFSSL_BIT_SIZE) /
- WOLFSSL_BIT_SIZE + 1);
+ int ret = -1;
- ret = wc_RNG_GenerateBlock(rng, priv, sz);
- if (ret != 0)
- return ret;
+ switch (modLen) {
+ /* FA */
+ case 1024:
+ if (divLen == 160)
+ ret = 0;
+ break;
+ /* FB, FC */
+ case 2048:
+ if (divLen == 224 || divLen == 256)
+ ret = 0;
+ break;
+ default:
+ break;
+ }
- priv[0] |= 0x0C;
+ return ret;
+}
- *privSz = sz;
- return 0;
+/* Create DH private key
+ *
+ * Based on NIST FIPS 186-4,
+ * "B.1.1 Key Pair Generation Using Extra Random Bits"
+ *
+ * dh - pointer to initialized DhKey structure, needs to have dh->q
+ * rng - pointer to initialized WC_RNG structure
+ * priv - output location for generated private key
+ * privSz - IN/OUT, size of priv buffer, size of generated private key
+ *
+ * return 0 on success, negative on error */
+static int GeneratePrivateDh186(DhKey* key, WC_RNG* rng, byte* priv,
+ word32* privSz)
+{
+ byte* cBuf;
+ int qSz, pSz, cSz, err;
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int* tmpQ = NULL;
+ mp_int* tmpX = NULL;
+#else
+ mp_int tmpQ[1], tmpX[1];
+#endif
+
+ /* Parameters validated in calling functions. */
+
+ if (mp_iszero(&key->q) == MP_YES) {
+ WOLFSSL_MSG("DH q parameter needed for FIPS 186-4 key generation");
+ return BAD_FUNC_ARG;
+ }
+
+ qSz = mp_unsigned_bin_size(&key->q);
+ pSz = mp_unsigned_bin_size(&key->p);
+
+ /* verify (L,N) pair bit lengths */
+ if (CheckDhLN(pSz * WOLFSSL_BIT_SIZE, qSz * WOLFSSL_BIT_SIZE) != 0) {
+ WOLFSSL_MSG("DH param sizes do not match SP 800-56A requirements");
+ return BAD_FUNC_ARG;
+ }
+
+ /* generate extra 64 bits so that bias from mod function is negligible */
+ cSz = qSz + (64 / WOLFSSL_BIT_SIZE);
+ cBuf = (byte*)XMALLOC(cSz, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (cBuf == NULL) {
+ return MEMORY_E;
+ }
+#ifdef WOLFSSL_SMALL_STACK
+ tmpQ = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH);
+ if (tmpQ == NULL) {
+ XFREE(cBuf, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return MEMORY_E;
+ }
+ tmpX = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH);
+ if (tmpX == NULL) {
+ XFREE(cBuf, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(tmpQ, key->heap, DYNAMIC_TYPE_DH);
+ return MEMORY_E;
+ }
+#endif
+
+
+ if ((err = mp_init_multi(tmpX, tmpQ, NULL, NULL, NULL, NULL))
+ != MP_OKAY) {
+ XFREE(cBuf, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(tmpQ, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(tmpX, key->heap, DYNAMIC_TYPE_DH);
+#endif
+ return err;
+ }
+
+ do {
+ /* generate N+64 bits (c) from RBG into tmpX, making sure positive.
+ * Hash_DRBG uses SHA-256 which matches maximum
+ * requested_security_strength of (L,N) */
+ err = wc_RNG_GenerateBlock(rng, cBuf, cSz);
+ if (err == MP_OKAY)
+ err = mp_read_unsigned_bin(tmpX, cBuf, cSz);
+ if (err != MP_OKAY) {
+ mp_clear(tmpX);
+ mp_clear(tmpQ);
+ XFREE(cBuf, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(tmpQ, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(tmpX, key->heap, DYNAMIC_TYPE_DH);
+#endif
+ return err;
+ }
+ } while (mp_cmp_d(tmpX, 1) != MP_GT);
+
+ ForceZero(cBuf, cSz);
+ XFREE(cBuf, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+ /* tmpQ = q - 1 */
+ if (err == MP_OKAY)
+ err = mp_copy(&key->q, tmpQ);
+
+ if (err == MP_OKAY)
+ err = mp_sub_d(tmpQ, 1, tmpQ);
+
+ /* x = c mod (q-1), tmpX holds c */
+ if (err == MP_OKAY)
+ err = mp_mod(tmpX, tmpQ, tmpX);
+
+ /* x = c mod (q-1) + 1 */
+ if (err == MP_OKAY)
+ err = mp_add_d(tmpX, 1, tmpX);
+
+ /* copy tmpX into priv */
+ if (err == MP_OKAY) {
+ pSz = mp_unsigned_bin_size(tmpX);
+ if (pSz > (int)*privSz) {
+ WOLFSSL_MSG("DH private key output buffer too small");
+ err = BAD_FUNC_ARG;
+ } else {
+ *privSz = pSz;
+ err = mp_to_unsigned_bin(tmpX, priv);
+ }
+ }
+
+ mp_forcezero(tmpX);
+ mp_clear(tmpX);
+ mp_clear(tmpQ);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(tmpQ, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(tmpX, key->heap, DYNAMIC_TYPE_DH);
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_NO_DH186 */
+#endif /* !WC_NO_RNG */
+
+static int GeneratePrivateDh(DhKey* key, WC_RNG* rng, byte* priv,
+ word32* privSz)
+{
+#ifndef WC_NO_RNG
+ int ret = 0;
+ word32 sz = 0;
+
+#ifndef WOLFSSL_NO_DH186
+ if (mp_iszero(&key->q) == MP_NO) {
+
+ /* q param available, use NIST FIPS 186-4, "B.1.1 Key Pair
+ * Generation Using Extra Random Bits" */
+ ret = GeneratePrivateDh186(key, rng, priv, privSz);
+
+ } else
+#endif
+ {
+
+ sz = mp_unsigned_bin_size(&key->p);
+
+ /* Table of predetermined values from the operation
+ 2 * DiscreteLogWorkFactor(sz * WOLFSSL_BIT_SIZE) /
+ WOLFSSL_BIT_SIZE + 1
+ Sizes in table checked against RFC 3526
+ */
+ WOLFSSL_DH_ROUND(sz); /* if using fixed points only, then round up */
+ switch (sz) {
+ case 128: sz = 21; break;
+ case 256: sz = 29; break;
+ case 384: sz = 34; break;
+ case 512: sz = 39; break;
+ case 640: sz = 42; break;
+ case 768: sz = 46; break;
+ case 896: sz = 49; break;
+ case 1024: sz = 52; break;
+ default:
+ #ifndef WOLFSSL_DH_CONST
+ /* if using floating points and size of p is not in table */
+ sz = min(sz, 2 * DiscreteLogWorkFactor(sz * WOLFSSL_BIT_SIZE) /
+ WOLFSSL_BIT_SIZE + 1);
+ break;
+ #else
+ return BAD_FUNC_ARG;
+ #endif
+ }
+
+ ret = wc_RNG_GenerateBlock(rng, priv, sz);
+
+ if (ret == 0) {
+ priv[0] |= 0x0C;
+ *privSz = sz;
+ }
+ }
+
+ return ret;
+#else
+ (void)key;
+ (void)rng;
+ (void)priv;
+ (void)privSz;
+ return NOT_COMPILED_IN;
+#endif /* WC_NO_RNG */
}
-static int GeneratePublic(DhKey* key, const byte* priv, word32 privSz,
- byte* pub, word32* pubSz)
+static int GeneratePublicDh(DhKey* key, byte* priv, word32 privSz,
+ byte* pub, word32* pubSz)
{
int ret = 0;
+#ifndef WOLFSSL_SP_MATH
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int* x;
+ mp_int* y;
+#else
+ mp_int x[1];
+ mp_int y[1];
+#endif
+#endif
- mp_int x;
- mp_int y;
+#ifdef WOLFSSL_HAVE_SP_DH
+#ifndef WOLFSSL_SP_NO_2048
+ if (mp_count_bits(&key->p) == 2048)
+ return sp_DhExp_2048(&key->g, priv, privSz, &key->p, pub, pubSz);
+#endif
+#ifndef WOLFSSL_SP_NO_3072
+ if (mp_count_bits(&key->p) == 3072)
+ return sp_DhExp_3072(&key->g, priv, privSz, &key->p, pub, pubSz);
+#endif
+#ifdef WOLFSSL_SP_4096
+ if (mp_count_bits(&key->p) == 4096)
+ return sp_DhExp_4096(&key->g, priv, privSz, &key->p, pub, pubSz);
+#endif
+#endif
- if (mp_init_multi(&x, &y, 0, 0, 0, 0) != MP_OKAY)
+#ifndef WOLFSSL_SP_MATH
+#ifdef WOLFSSL_SMALL_STACK
+ x = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH);
+ if (x == NULL)
+ return MEMORY_E;
+ y = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH);
+ if (y == NULL) {
+ XFREE(x, key->heap, DYNAMIC_TYPE_DH);
+ return MEMORY_E;
+ }
+#endif
+ if (mp_init_multi(x, y, 0, 0, 0, 0) != MP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(y, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(x, key->heap, DYNAMIC_TYPE_DH);
+ #endif
return MP_INIT_E;
+ }
- if (mp_read_unsigned_bin(&x, priv, privSz) != MP_OKAY)
+ if (mp_read_unsigned_bin(x, priv, privSz) != MP_OKAY)
ret = MP_READ_E;
- if (ret == 0 && mp_exptmod(&key->g, &x, &key->p, &y) != MP_OKAY)
+ if (ret == 0 && mp_exptmod(&key->g, x, &key->p, y) != MP_OKAY)
ret = MP_EXPTMOD_E;
- if (ret == 0 && mp_to_unsigned_bin(&y, pub) != MP_OKAY)
+ if (ret == 0 && mp_to_unsigned_bin(y, pub) != MP_OKAY)
ret = MP_TO_E;
if (ret == 0)
- *pubSz = mp_unsigned_bin_size(&y);
+ *pubSz = mp_unsigned_bin_size(y);
- mp_clear(&y);
- mp_clear(&x);
+ mp_clear(y);
+ mp_clear(x);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(y, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(x, key->heap, DYNAMIC_TYPE_DH);
+#endif
+#else
+ ret = WC_KEY_SIZE_E;
+#endif
return ret;
}
+static int wc_DhGenerateKeyPair_Sync(DhKey* key, WC_RNG* rng,
+ byte* priv, word32* privSz, byte* pub, word32* pubSz)
+{
+ int ret;
-int wc_DhGenerateKeyPair(DhKey* key, RNG* rng, byte* priv, word32* privSz,
- byte* pub, word32* pubSz)
+ if (key == NULL || rng == NULL || priv == NULL || privSz == NULL ||
+ pub == NULL || pubSz == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ ret = GeneratePrivateDh(key, rng, priv, privSz);
+
+ return (ret != 0) ? ret : GeneratePublicDh(key, priv, *privSz, pub, pubSz);
+}
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH)
+static int wc_DhGenerateKeyPair_Async(DhKey* key, WC_RNG* rng,
+ byte* priv, word32* privSz, byte* pub, word32* pubSz)
{
- int ret = GeneratePrivate(key, rng, priv, privSz);
+ int ret;
+
+#if defined(HAVE_INTEL_QA)
+ word32 pBits;
+
+ /* QAT DH sizes: 768, 1024, 1536, 2048, 3072 and 4096 bits */
+ pBits = mp_unsigned_bin_size(&key->p) * 8;
+ if (pBits == 768 || pBits == 1024 || pBits == 1536 ||
+ pBits == 2048 || pBits == 3072 || pBits == 4096) {
+ mp_int x;
- return (ret != 0) ? ret : GeneratePublic(key, priv, *privSz, pub, pubSz);
+ ret = mp_init(&x);
+ if (ret != MP_OKAY)
+ return ret;
+
+ ret = GeneratePrivateDh(key, rng, priv, privSz);
+ if (ret == 0)
+ ret = mp_read_unsigned_bin(&x, priv, *privSz);
+ if (ret == MP_OKAY)
+ ret = wc_mp_to_bigint(&x, &x.raw);
+ if (ret == MP_OKAY)
+ ret = wc_mp_to_bigint(&key->p, &key->p.raw);
+ if (ret == MP_OKAY)
+ ret = wc_mp_to_bigint(&key->g, &key->g.raw);
+ if (ret == MP_OKAY)
+ ret = IntelQaDhKeyGen(&key->asyncDev, &key->p.raw, &key->g.raw,
+ &x.raw, pub, pubSz);
+ mp_clear(&x);
+
+ return ret;
+ }
+
+#elif defined(HAVE_CAVIUM)
+ /* TODO: Not implemented - use software for now */
+
+#else /* WOLFSSL_ASYNC_CRYPT_TEST */
+ if (wc_AsyncTestInit(&key->asyncDev, ASYNC_TEST_DH_GEN)) {
+ WC_ASYNC_TEST* testDev = &key->asyncDev.test;
+ testDev->dhGen.key = key;
+ testDev->dhGen.rng = rng;
+ testDev->dhGen.priv = priv;
+ testDev->dhGen.privSz = privSz;
+ testDev->dhGen.pub = pub;
+ testDev->dhGen.pubSz = pubSz;
+ return WC_PENDING_E;
+ }
+#endif
+
+ /* otherwise use software DH */
+ ret = wc_DhGenerateKeyPair_Sync(key, rng, priv, privSz, pub, pubSz);
+
+ return ret;
}
+#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_DH */
-int wc_DhAgree(DhKey* key, byte* agree, word32* agreeSz, const byte* priv,
- word32 privSz, const byte* otherPub, word32 pubSz)
+
+/* Check DH Public Key for invalid numbers, optionally allowing
+ * the public key to be checked against the large prime (q).
+ * Check per process in SP 800-56Ar3, section 5.6.2.3.1.
+ *
+ * key DH key group parameters.
+ * pub Public Key.
+ * pubSz Public Key size.
+ * prime Large prime (q), optionally NULL to skip check
+ * primeSz Size of large prime
+ *
+ * returns 0 on success or error code
+ */
+int wc_DhCheckPubKey_ex(DhKey* key, const byte* pub, word32 pubSz,
+ const byte* prime, word32 primeSz)
{
int ret = 0;
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int* y = NULL;
+ mp_int* p = NULL;
+ mp_int* q = NULL;
+#else
+ mp_int y[1];
+ mp_int p[1];
+ mp_int q[1];
+#endif
+
+ if (key == NULL || pub == NULL) {
+ return BAD_FUNC_ARG;
+ }
- mp_int x;
- mp_int y;
- mp_int z;
+#ifdef WOLFSSL_SMALL_STACK
+ y = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH);
+ if (y == NULL)
+ return MEMORY_E;
+ p = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH);
+ if (p == NULL) {
+ XFREE(y, key->heap, DYNAMIC_TYPE_DH);
+ return MEMORY_E;
+ }
+ q = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH);
+ if (q == NULL) {
+ XFREE(p, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(y, key->heap, DYNAMIC_TYPE_DH);
+ return MEMORY_E;
+ }
+#endif
- if (mp_init_multi(&x, &y, &z, 0, 0, 0) != MP_OKAY)
+ if (mp_init_multi(y, p, q, NULL, NULL, NULL) != MP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(q, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(p, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(y, key->heap, DYNAMIC_TYPE_DH);
+ #endif
return MP_INIT_E;
+ }
- if (mp_read_unsigned_bin(&x, priv, privSz) != MP_OKAY)
+ if (mp_read_unsigned_bin(y, pub, pubSz) != MP_OKAY) {
ret = MP_READ_E;
+ }
+
+ if (ret == 0 && prime != NULL) {
+ if (mp_read_unsigned_bin(q, prime, primeSz) != MP_OKAY)
+ ret = MP_READ_E;
+
+ } else if (mp_iszero(&key->q) == MP_NO) {
+ /* use q available in DhKey */
+ if (mp_copy(&key->q, q) != MP_OKAY)
+ ret = MP_INIT_E;
+ }
+
+ /* SP 800-56Ar3, section 5.6.2.3.1, process step 1 */
+ /* pub (y) should not be 0 or 1 */
+ if (ret == 0 && mp_cmp_d(y, 2) == MP_LT) {
+ ret = MP_CMP_E;
+ }
- if (ret == 0 && mp_read_unsigned_bin(&y, otherPub, pubSz) != MP_OKAY)
+ /* pub (y) shouldn't be greater than or equal to p - 1 */
+ if (ret == 0 && mp_copy(&key->p, p) != MP_OKAY) {
+ ret = MP_INIT_E;
+ }
+ if (ret == 0 && mp_sub_d(p, 2, p) != MP_OKAY) {
+ ret = MP_SUB_E;
+ }
+ if (ret == 0 && mp_cmp(y, p) == MP_GT) {
+ ret = MP_CMP_E;
+ }
+
+ if (ret == 0 && (prime != NULL || (mp_iszero(&key->q) == MP_NO) )) {
+
+ /* restore key->p into p */
+ if (mp_copy(&key->p, p) != MP_OKAY)
+ ret = MP_INIT_E;
+ }
+
+ if (ret == 0 && prime != NULL) {
+#ifdef WOLFSSL_HAVE_SP_DH
+#ifndef WOLFSSL_SP_NO_2048
+ if (mp_count_bits(&key->p) == 2048) {
+ ret = sp_ModExp_2048(y, q, p, y);
+ if (ret != 0)
+ ret = MP_EXPTMOD_E;
+ }
+ else
+#endif
+#ifndef WOLFSSL_SP_NO_3072
+ if (mp_count_bits(&key->p) == 3072) {
+ ret = sp_ModExp_3072(y, q, p, y);
+ if (ret != 0)
+ ret = MP_EXPTMOD_E;
+ }
+ else
+#endif
+#ifdef WOLFSSL_SP_NO_4096
+ if (mp_count_bits(&key->p) == 4096) {
+ ret = sp_ModExp_4096(y, q, p, y);
+ if (ret != 0)
+ ret = MP_EXPTMOD_E;
+ }
+ else
+#endif
+#endif
+
+ {
+ /* SP 800-56Ar3, section 5.6.2.3.1, process step 2 */
+#ifndef WOLFSSL_SP_MATH
+ /* calculate (y^q) mod(p), store back into y */
+ if (mp_exptmod(y, q, p, y) != MP_OKAY)
+ ret = MP_EXPTMOD_E;
+#else
+ ret = WC_KEY_SIZE_E;
+#endif
+ }
+
+ /* verify above == 1 */
+ if (ret == 0 && mp_cmp_d(y, 1) != MP_EQ)
+ ret = MP_CMP_E;
+ }
+
+ mp_clear(y);
+ mp_clear(p);
+ mp_clear(q);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(q, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(p, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(y, key->heap, DYNAMIC_TYPE_DH);
+#endif
+
+ return ret;
+}
+
+
+/* Check DH Public Key for invalid numbers
+ *
+ * key DH key group parameters.
+ * pub Public Key.
+ * pubSz Public Key size.
+ *
+ * returns 0 on success or error code
+ */
+int wc_DhCheckPubKey(DhKey* key, const byte* pub, word32 pubSz)
+{
+ return wc_DhCheckPubKey_ex(key, pub, pubSz, NULL, 0);
+}
+
+
+/**
+ * Quick validity check of public key value against prime.
+ * Checks are:
+ * - Public key not 0 or 1
+ * - Public key not equal to prime or prime - 1
+ * - Public key not bigger than prime.
+ *
+ * prime Big-endian encoding of prime in bytes.
+ * primeSz Size of prime in bytes.
+ * pub Big-endian encoding of public key in bytes.
+ * pubSz Size of public key in bytes.
+ */
+int wc_DhCheckPubValue(const byte* prime, word32 primeSz, const byte* pub,
+ word32 pubSz)
+{
+ int ret = 0;
+ word32 i;
+
+ for (i = 0; i < pubSz && pub[i] == 0; i++) {
+ }
+ pubSz -= i;
+ pub += i;
+
+ if (pubSz == 0 || (pubSz == 1 && pub[0] == 1))
+ ret = MP_VAL;
+ else if (pubSz == primeSz) {
+ for (i = 0; i < pubSz-1 && pub[i] == prime[i]; i++) {
+ }
+ if (i == pubSz-1 && (pub[i] == prime[i] || pub[i] == prime[i] - 1))
+ ret = MP_VAL;
+ else if (pub[i] > prime[i])
+ ret = MP_VAL;
+ }
+ else if (pubSz > primeSz)
+ ret = MP_VAL;
+
+ return ret;
+}
+
+
+/* Check DH Private Key for invalid numbers, optionally allowing
+ * the private key to be checked against the large prime (q).
+ * Check per process in SP 800-56Ar3, section 5.6.2.1.2.
+ *
+ * key DH key group parameters.
+ * priv Private Key.
+ * privSz Private Key size.
+ * prime Large prime (q), optionally NULL to skip check
+ * primeSz Size of large prime
+ *
+ * returns 0 on success or error code
+ */
+int wc_DhCheckPrivKey_ex(DhKey* key, const byte* priv, word32 privSz,
+ const byte* prime, word32 primeSz)
+{
+ int ret = 0;
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int* x = NULL;
+ mp_int* q = NULL;
+#else
+ mp_int x[1];
+ mp_int q[1];
+#endif
+
+ if (key == NULL || priv == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ x = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH);
+ if (x == NULL)
+ return MEMORY_E;
+ q = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH);
+ if (q == NULL) {
+ XFREE(x, key->heap, DYNAMIC_TYPE_DH);
+ return MEMORY_E;
+ }
+#endif
+
+ if (mp_init_multi(x, q, NULL, NULL, NULL, NULL) != MP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(q, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(x, key->heap, DYNAMIC_TYPE_DH);
+ #endif
+ return MP_INIT_E;
+ }
+
+ if (mp_read_unsigned_bin(x, priv, privSz) != MP_OKAY) {
ret = MP_READ_E;
+ }
+
+ if (ret == 0) {
+ if (prime != NULL) {
+ if (mp_read_unsigned_bin(q, prime, primeSz) != MP_OKAY)
+ ret = MP_READ_E;
+ }
+ else if (mp_iszero(&key->q) == MP_NO) {
+ /* use q available in DhKey */
+ if (mp_copy(&key->q, q) != MP_OKAY)
+ ret = MP_INIT_E;
+ }
+ }
+
+ /* priv (x) should not be 0 */
+ if (ret == 0) {
+ if (mp_cmp_d(x, 0) == MP_EQ)
+ ret = MP_CMP_E;
+ }
+
+ if (ret == 0) {
+ if (mp_iszero(q) == MP_NO) {
+ /* priv (x) shouldn't be greater than q - 1 */
+ if (ret == 0) {
+ if (mp_copy(&key->q, q) != MP_OKAY)
+ ret = MP_INIT_E;
+ }
+ if (ret == 0) {
+ if (mp_sub_d(q, 1, q) != MP_OKAY)
+ ret = MP_SUB_E;
+ }
+ if (ret == 0) {
+ if (mp_cmp(x, q) == MP_GT)
+ ret = DH_CHECK_PRIV_E;
+ }
+ }
+ }
+
+ mp_clear(x);
+ mp_clear(q);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(q, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(x, key->heap, DYNAMIC_TYPE_DH);
+#endif
+
+ return ret;
+}
+
+
+/* Check DH Private Key for invalid numbers
+ *
+ * key DH key group parameters.
+ * priv Private Key.
+ * privSz Private Key size.
+ *
+ * returns 0 on success or error code
+ */
+int wc_DhCheckPrivKey(DhKey* key, const byte* priv, word32 privSz)
+{
+ return wc_DhCheckPrivKey_ex(key, priv, privSz, NULL, 0);
+}
+
- if (ret == 0 && mp_exptmod(&y, &x, &key->p, &z) != MP_OKAY)
+/* Check DH Keys for pair-wise consistency per process in
+ * SP 800-56Ar3, section 5.6.2.1.4, method (b) for FFC.
+ *
+ * key DH key group parameters.
+ * pub Public Key.
+ * pubSz Public Key size.
+ * priv Private Key.
+ * privSz Private Key size.
+ *
+ * returns 0 on success or error code
+ */
+int wc_DhCheckKeyPair(DhKey* key, const byte* pub, word32 pubSz,
+ const byte* priv, word32 privSz)
+{
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int* publicKey = NULL;
+ mp_int* privateKey = NULL;
+ mp_int* checkKey = NULL;
+#else
+ mp_int publicKey[1];
+ mp_int privateKey[1];
+ mp_int checkKey[1];
+#endif
+ int ret = 0;
+
+ if (key == NULL || pub == NULL || priv == NULL)
+ return BAD_FUNC_ARG;
+
+#ifdef WOLFSSL_SMALL_STACK
+ publicKey = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH);
+ if (publicKey == NULL)
+ return MEMORY_E;
+ privateKey = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH);
+ if (privateKey == NULL) {
+ XFREE(publicKey, key->heap, DYNAMIC_TYPE_DH);
+ return MEMORY_E;
+ }
+ checkKey = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH);
+ if (checkKey == NULL) {
+ XFREE(privateKey, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(publicKey, key->heap, DYNAMIC_TYPE_DH);
+ return MEMORY_E;
+ }
+#endif
+
+ if (mp_init_multi(publicKey, privateKey, checkKey,
+ NULL, NULL, NULL) != MP_OKAY) {
+
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(privateKey, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(publicKey, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(checkKey, key->heap, DYNAMIC_TYPE_DH);
+ #endif
+ return MP_INIT_E;
+ }
+
+ /* Load the private and public keys into big integers. */
+ if (mp_read_unsigned_bin(publicKey, pub, pubSz) != MP_OKAY ||
+ mp_read_unsigned_bin(privateKey, priv, privSz) != MP_OKAY) {
+
+ ret = MP_READ_E;
+ }
+
+ /* Calculate checkKey = g^privateKey mod p */
+ if (ret == 0) {
+#ifdef WOLFSSL_HAVE_SP_DH
+#ifndef WOLFSSL_SP_NO_2048
+ if (mp_count_bits(&key->p) == 2048) {
+ ret = sp_ModExp_2048(&key->g, privateKey, &key->p, checkKey);
+ if (ret != 0)
+ ret = MP_EXPTMOD_E;
+ }
+ else
+#endif
+#ifndef WOLFSSL_SP_NO_3072
+ if (mp_count_bits(&key->p) == 3072) {
+ ret = sp_ModExp_3072(&key->g, privateKey, &key->p, checkKey);
+ if (ret != 0)
+ ret = MP_EXPTMOD_E;
+ }
+ else
+#endif
+#ifdef WOLFSSL_SP_4096
+ if (mp_count_bits(&key->p) == 4096) {
+ ret = sp_ModExp_4096(&key->g, privateKey, &key->p, checkKey);
+ if (ret != 0)
+ ret = MP_EXPTMOD_E;
+ }
+ else
+#endif
+#endif
+ {
+#ifndef WOLFSSL_SP_MATH
+ if (mp_exptmod(&key->g, privateKey, &key->p, checkKey) != MP_OKAY)
+ ret = MP_EXPTMOD_E;
+#else
+ ret = WC_KEY_SIZE_E;
+#endif
+ }
+ }
+
+ /* Compare the calculated public key to the supplied check value. */
+ if (ret == 0) {
+ if (mp_cmp(checkKey, publicKey) != MP_EQ)
+ ret = MP_CMP_E;
+ }
+
+ mp_forcezero(privateKey);
+ mp_clear(privateKey);
+ mp_clear(publicKey);
+ mp_clear(checkKey);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(checkKey, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(privateKey, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(publicKey, key->heap, DYNAMIC_TYPE_DH);
+#endif
+
+ return ret;
+}
+
+
+int wc_DhGenerateKeyPair(DhKey* key, WC_RNG* rng,
+ byte* priv, word32* privSz, byte* pub, word32* pubSz)
+{
+ int ret;
+
+ if (key == NULL || rng == NULL || priv == NULL || privSz == NULL ||
+ pub == NULL || pubSz == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH)
+ if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_DH) {
+ ret = wc_DhGenerateKeyPair_Async(key, rng, priv, privSz, pub, pubSz);
+ }
+ else
+#endif
+ {
+ ret = wc_DhGenerateKeyPair_Sync(key, rng, priv, privSz, pub, pubSz);
+ }
+
+ return ret;
+}
+
+
+static int wc_DhAgree_Sync(DhKey* key, byte* agree, word32* agreeSz,
+ const byte* priv, word32 privSz, const byte* otherPub, word32 pubSz)
+{
+ int ret = 0;
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int* y;
+#ifndef WOLFSSL_SP_MATH
+ mp_int* x;
+ mp_int* z;
+#endif
+#else
+ mp_int y[1];
+#ifndef WOLFSSL_SP_MATH
+ mp_int x[1];
+ mp_int z[1];
+#endif
+#endif
+
+#ifdef WOLFSSL_VALIDATE_FFC_IMPORT
+ if (wc_DhCheckPrivKey(key, priv, privSz) != 0) {
+ WOLFSSL_MSG("wc_DhAgree wc_DhCheckPrivKey failed");
+ return DH_CHECK_PRIV_E;
+ }
+
+ if (wc_DhCheckPubKey(key, otherPub, pubSz) != 0) {
+ WOLFSSL_MSG("wc_DhAgree wc_DhCheckPubKey failed");
+ return DH_CHECK_PUB_E;
+ }
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ y = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH);
+ if (y == NULL)
+ return MEMORY_E;
+#ifndef WOLFSSL_SP_MATH
+ x = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH);
+ if (x == NULL) {
+ XFREE(y, key->heap, DYNAMIC_TYPE_DH);
+ return MEMORY_E;
+ }
+ z = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_DH);
+ if (z == NULL) {
+ XFREE(x, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(y, key->heap, DYNAMIC_TYPE_DH);
+ return MEMORY_E;
+ }
+#endif
+#endif
+
+#ifdef WOLFSSL_HAVE_SP_DH
+#ifndef WOLFSSL_SP_NO_2048
+ if (mp_count_bits(&key->p) == 2048) {
+ if (mp_init(y) != MP_OKAY)
+ return MP_INIT_E;
+
+ if (ret == 0 && mp_read_unsigned_bin(y, otherPub, pubSz) != MP_OKAY)
+ ret = MP_READ_E;
+
+ if (ret == 0)
+ ret = sp_DhExp_2048(y, priv, privSz, &key->p, agree, agreeSz);
+
+ mp_clear(y);
+ #ifdef WOLFSSL_SMALL_STACK
+ #ifndef WOLFSSL_SP_MATH
+ XFREE(z, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(x, key->heap, DYNAMIC_TYPE_DH);
+ #endif
+ XFREE(y, key->heap, DYNAMIC_TYPE_DH);
+ #endif
+ return ret;
+ }
+#endif
+#ifndef WOLFSSL_SP_NO_3072
+ if (mp_count_bits(&key->p) == 3072) {
+ if (mp_init(y) != MP_OKAY)
+ return MP_INIT_E;
+
+ if (ret == 0 && mp_read_unsigned_bin(y, otherPub, pubSz) != MP_OKAY)
+ ret = MP_READ_E;
+
+ if (ret == 0)
+ ret = sp_DhExp_3072(y, priv, privSz, &key->p, agree, agreeSz);
+
+ mp_clear(y);
+ #ifdef WOLFSSL_SMALL_STACK
+ #ifndef WOLFSSL_SP_MATH
+ XFREE(z, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(x, key->heap, DYNAMIC_TYPE_DH);
+ #endif
+ XFREE(y, key->heap, DYNAMIC_TYPE_DH);
+ #endif
+ return ret;
+ }
+#endif
+#ifdef WOLFSSL_SP_4096
+ if (mp_count_bits(&key->p) == 4096) {
+ if (mp_init(y) != MP_OKAY)
+ return MP_INIT_E;
+
+ if (ret == 0 && mp_read_unsigned_bin(y, otherPub, pubSz) != MP_OKAY)
+ ret = MP_READ_E;
+
+ if (ret == 0)
+ ret = sp_DhExp_4096(y, priv, privSz, &key->p, agree, agreeSz);
+
+ mp_clear(y);
+ #ifdef WOLFSSL_SMALL_STACK
+ #ifndef WOLFSSL_SP_MATH
+ XFREE(z, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(x, key->heap, DYNAMIC_TYPE_DH);
+ #endif
+ XFREE(y, key->heap, DYNAMIC_TYPE_DH);
+ #endif
+ return ret;
+ }
+#endif
+#endif
+
+#ifndef WOLFSSL_SP_MATH
+ if (mp_init_multi(x, y, z, 0, 0, 0) != MP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(z, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(x, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(y, key->heap, DYNAMIC_TYPE_DH);
+ #endif
+ return MP_INIT_E;
+ }
+
+ if (mp_read_unsigned_bin(x, priv, privSz) != MP_OKAY)
+ ret = MP_READ_E;
+
+ if (ret == 0 && mp_read_unsigned_bin(y, otherPub, pubSz) != MP_OKAY)
+ ret = MP_READ_E;
+
+ if (ret == 0 && mp_exptmod(y, x, &key->p, z) != MP_OKAY)
ret = MP_EXPTMOD_E;
- if (ret == 0 && mp_to_unsigned_bin(&z, agree) != MP_OKAY)
+ /* make sure z is not one (SP800-56A, 5.7.1.1) */
+ if (ret == 0 && (mp_cmp_d(z, 1) == MP_EQ))
+ ret = MP_VAL;
+
+ if (ret == 0 && mp_to_unsigned_bin(z, agree) != MP_OKAY)
ret = MP_TO_E;
if (ret == 0)
- *agreeSz = mp_unsigned_bin_size(&z);
+ *agreeSz = mp_unsigned_bin_size(z);
+
+ mp_clear(z);
+ mp_clear(y);
+ mp_forcezero(x);
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+#ifndef WOLFSSL_SP_MATH
+ XFREE(z, key->heap, DYNAMIC_TYPE_DH);
+ XFREE(x, key->heap, DYNAMIC_TYPE_DH);
+#endif
+ XFREE(y, key->heap, DYNAMIC_TYPE_DH);
+#endif
+
+ return ret;
+}
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH)
+static int wc_DhAgree_Async(DhKey* key, byte* agree, word32* agreeSz,
+ const byte* priv, word32 privSz, const byte* otherPub, word32 pubSz)
+{
+ int ret;
+
+#if defined(HAVE_INTEL_QA)
+ word32 pBits;
+
+ /* QAT DH sizes: 768, 1024, 1536, 2048, 3072 and 4096 bits */
+ pBits = mp_unsigned_bin_size(&key->p) * 8;
+ if (pBits == 768 || pBits == 1024 || pBits == 1536 ||
+ pBits == 2048 || pBits == 3072 || pBits == 4096) {
+ ret = wc_mp_to_bigint(&key->p, &key->p.raw);
+ if (ret == MP_OKAY)
+ ret = IntelQaDhAgree(&key->asyncDev, &key->p.raw,
+ agree, agreeSz, priv, privSz, otherPub, pubSz);
+ return ret;
+ }
+
+#elif defined(HAVE_CAVIUM)
+ /* TODO: Not implemented - use software for now */
+
+#else /* WOLFSSL_ASYNC_CRYPT_TEST */
+ if (wc_AsyncTestInit(&key->asyncDev, ASYNC_TEST_DH_AGREE)) {
+ WC_ASYNC_TEST* testDev = &key->asyncDev.test;
+ testDev->dhAgree.key = key;
+ testDev->dhAgree.agree = agree;
+ testDev->dhAgree.agreeSz = agreeSz;
+ testDev->dhAgree.priv = priv;
+ testDev->dhAgree.privSz = privSz;
+ testDev->dhAgree.otherPub = otherPub;
+ testDev->dhAgree.pubSz = pubSz;
+ return WC_PENDING_E;
+ }
+#endif
- mp_clear(&z);
- mp_clear(&y);
- mp_clear(&x);
+ /* otherwise use software DH */
+ ret = wc_DhAgree_Sync(key, agree, agreeSz, priv, privSz, otherPub, pubSz);
return ret;
}
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+int wc_DhAgree(DhKey* key, byte* agree, word32* agreeSz, const byte* priv,
+ word32 privSz, const byte* otherPub, word32 pubSz)
+{
+ int ret = 0;
+
+ if (key == NULL || agree == NULL || agreeSz == NULL || priv == NULL ||
+ otherPub == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH)
+ if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_DH) {
+ ret = wc_DhAgree_Async(key, agree, agreeSz, priv, privSz, otherPub, pubSz);
+ }
+ else
+#endif
+ {
+ ret = wc_DhAgree_Sync(key, agree, agreeSz, priv, privSz, otherPub, pubSz);
+ }
+
+ return ret;
+}
+
+#if defined(WOLFSSL_QT) || defined(OPENSSL_ALL)
+/* Sets private and public key in DhKey if both are available, otherwise sets
+ either private or public key, depending on which is available.
+ Returns WOLFSSL_SUCCESS if at least one of the keys was set. */
+WOLFSSL_LOCAL int wc_DhSetFullKeys(DhKey* key,const byte* priv_key,word32 privSz,
+ const byte* pub_key, word32 pubSz)
+{
+ byte havePriv = 0;
+ byte havePub = 0;
+ mp_int* keyPriv = NULL;
+ mp_int* keyPub = NULL;
+
+ if (key == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ havePriv = ( (priv_key != NULL) && (privSz > 0) );
+ havePub = ( (pub_key != NULL) && (pubSz > 0) );
+
+ if (!havePub && !havePriv) {
+ WOLFSSL_MSG("No Public or Private Key to Set");
+ return BAD_FUNC_ARG;
+ }
+ /* Set Private Key */
+ if (havePriv == TRUE) {
+ /* may have leading 0 */
+ if (priv_key[0] == 0) {
+ privSz--; priv_key++;
+ }
+ if (mp_init(&key->priv) != MP_OKAY)
+ havePriv = FALSE;
+ }
+
+ if (havePriv == TRUE) {
+ if (mp_read_unsigned_bin(&key->priv, priv_key, privSz) != MP_OKAY) {
+ havePriv = FALSE;
+ } else {
+ keyPriv = &key->priv;
+ WOLFSSL_MSG("DH Private Key Set.");
+ }
+ }
+
+ /* Set Public Key */
+ if (havePub == TRUE) {
+ /* may have leading 0 */
+ if (pub_key[0] == 0) {
+ pubSz--; pub_key++;
+ }
+ if (mp_init(&key->pub) != MP_OKAY)
+ havePub = FALSE;
+ }
+
+ if (havePub == TRUE) {
+ if (mp_read_unsigned_bin(&key->pub, pub_key, pubSz) != MP_OKAY) {
+ havePub = FALSE;
+ } else {
+ keyPub = &key->pub;
+ WOLFSSL_MSG("DH Public Key Set.");
+ }
+ }
+ /* Free Memory if error occured */
+ if (havePriv == FALSE && keyPriv != NULL)
+ mp_clear(keyPriv);
+ if (havePub == FALSE && keyPub != NULL)
+ mp_clear(keyPub);
+
+ /* WOLFSSL_SUCCESS if private or public was set else WOLFSSL_FAILURE */
+ return havePriv || havePub;
+}
+#endif
+
+static int _DhSetKey(DhKey* key, const byte* p, word32 pSz, const byte* g,
+ word32 gSz, const byte* q, word32 qSz, int trusted,
+ WC_RNG* rng)
+{
+ int ret = 0;
+ mp_int* keyP = NULL;
+ mp_int* keyG = NULL;
+
+ if (key == NULL || p == NULL || g == NULL || pSz == 0 || gSz == 0) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (ret == 0) {
+ /* may have leading 0 */
+ if (p[0] == 0) {
+ pSz--; p++;
+ }
+
+ if (g[0] == 0) {
+ gSz--; g++;
+ }
+
+ if (q != NULL) {
+ if (q[0] == 0) {
+ qSz--; q++;
+ }
+ }
+
+ if (mp_init(&key->p) != MP_OKAY)
+ ret = MP_INIT_E;
+ }
+
+ if (ret == 0) {
+ if (mp_read_unsigned_bin(&key->p, p, pSz) != MP_OKAY)
+ ret = ASN_DH_KEY_E;
+ else
+ keyP = &key->p;
+ }
+
+ if (ret == 0 && !trusted) {
+ int isPrime = 0;
+ if (rng != NULL)
+ ret = mp_prime_is_prime_ex(keyP, 8, &isPrime, rng);
+ else
+ ret = mp_prime_is_prime(keyP, 8, &isPrime);
+
+ if (ret == 0 && isPrime == 0)
+ ret = DH_CHECK_PUB_E;
+ }
+
+ if (ret == 0 && mp_init(&key->g) != MP_OKAY)
+ ret = MP_INIT_E;
+ if (ret == 0) {
+ if (mp_read_unsigned_bin(&key->g, g, gSz) != MP_OKAY)
+ ret = ASN_DH_KEY_E;
+ else
+ keyG = &key->g;
+ }
+
+ if (ret == 0 && q != NULL) {
+ if (mp_init(&key->q) != MP_OKAY)
+ ret = MP_INIT_E;
+ }
+ if (ret == 0 && q != NULL) {
+ if (mp_read_unsigned_bin(&key->q, q, qSz) != MP_OKAY)
+ ret = MP_INIT_E;
+ }
+
+ if (ret != 0 && key != NULL) {
+ if (keyG)
+ mp_clear(keyG);
+ if (keyP)
+ mp_clear(keyP);
+ }
+
+ return ret;
+}
+
+
+int wc_DhSetCheckKey(DhKey* key, const byte* p, word32 pSz, const byte* g,
+ word32 gSz, const byte* q, word32 qSz, int trusted,
+ WC_RNG* rng)
+{
+ return _DhSetKey(key, p, pSz, g, gSz, q, qSz, trusted, rng);
+}
+
+
+int wc_DhSetKey_ex(DhKey* key, const byte* p, word32 pSz, const byte* g,
+ word32 gSz, const byte* q, word32 qSz)
+{
+ return _DhSetKey(key, p, pSz, g, gSz, q, qSz, 1, NULL);
+}
/* not in asn anymore since no actual asn types used */
int wc_DhSetKey(DhKey* key, const byte* p, word32 pSz, const byte* g,
word32 gSz)
{
- if (key == NULL || p == NULL || g == NULL || pSz == 0 || gSz == 0)
- return BAD_FUNC_ARG;
+ return _DhSetKey(key, p, pSz, g, gSz, NULL, 0, 1, NULL);
+}
+
+
+#ifdef WOLFSSL_KEY_GEN
+
+/* modulus_size in bits */
+int wc_DhGenerateParams(WC_RNG *rng, int modSz, DhKey *dh)
+{
+ mp_int tmp, tmp2;
+ int groupSz = 0, bufSz = 0,
+ primeCheckCount = 0,
+ primeCheck = MP_NO,
+ ret = 0;
+ unsigned char *buf = NULL;
- /* may have leading 0 */
- if (p[0] == 0) {
- pSz--; p++;
+ if (rng == NULL || dh == NULL)
+ ret = BAD_FUNC_ARG;
+
+ /* set group size in bytes from modulus size
+ * FIPS 186-4 defines valid values (1024, 160) (2048, 256) (3072, 256)
+ */
+ if (ret == 0) {
+ switch (modSz) {
+ case 1024:
+ groupSz = 20;
+ break;
+ case 2048:
+ case 3072:
+ groupSz = 32;
+ break;
+ default:
+ ret = BAD_FUNC_ARG;
+ break;
+ }
}
- if (g[0] == 0) {
- gSz--; g++;
+ if (ret == 0) {
+ /* modulus size in bytes */
+ modSz /= WOLFSSL_BIT_SIZE;
+ bufSz = modSz - groupSz;
+
+ /* allocate ram */
+ buf = (unsigned char *)XMALLOC(bufSz,
+ dh->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (buf == NULL)
+ ret = MEMORY_E;
}
- if (mp_init(&key->p) != MP_OKAY)
- return MP_INIT_E;
- if (mp_read_unsigned_bin(&key->p, p, pSz) != 0) {
- mp_clear(&key->p);
- return ASN_DH_KEY_E;
+ /* make a random string that will be multiplied against q */
+ if (ret == 0)
+ ret = wc_RNG_GenerateBlock(rng, buf, bufSz);
+
+ if (ret == 0) {
+ /* force magnitude */
+ buf[0] |= 0xC0;
+ /* force even */
+ buf[bufSz - 1] &= ~1;
+
+ if (mp_init_multi(&tmp, &tmp2, &dh->p, &dh->q, &dh->g, 0)
+ != MP_OKAY) {
+ ret = MP_INIT_E;
+ }
}
- if (mp_init(&key->g) != MP_OKAY) {
- mp_clear(&key->p);
- return MP_INIT_E;
+ if (ret == 0) {
+ if (mp_read_unsigned_bin(&tmp2, buf, bufSz) != MP_OKAY)
+ ret = MP_READ_E;
}
- if (mp_read_unsigned_bin(&key->g, g, gSz) != 0) {
- mp_clear(&key->g);
- mp_clear(&key->p);
- return ASN_DH_KEY_E;
+
+ /* make our prime q */
+ if (ret == 0) {
+ if (mp_rand_prime(&dh->q, groupSz, rng, NULL) != MP_OKAY)
+ ret = PRIME_GEN_E;
}
- return 0;
+ /* p = random * q */
+ if (ret == 0) {
+ if (mp_mul(&dh->q, &tmp2, &dh->p) != MP_OKAY)
+ ret = MP_MUL_E;
+ }
+
+ /* p = random * q + 1, so q is a prime divisor of p-1 */
+ if (ret == 0) {
+ if (mp_add_d(&dh->p, 1, &dh->p) != MP_OKAY)
+ ret = MP_ADD_E;
+ }
+
+ /* tmp = 2q */
+ if (ret == 0) {
+ if (mp_add(&dh->q, &dh->q, &tmp) != MP_OKAY)
+ ret = MP_ADD_E;
+ }
+
+ /* loop until p is prime */
+ if (ret == 0) {
+ do {
+ if (mp_prime_is_prime_ex(&dh->p, 8, &primeCheck, rng) != MP_OKAY)
+ ret = PRIME_GEN_E;
+
+ if (primeCheck != MP_YES) {
+ /* p += 2q */
+ if (mp_add(&tmp, &dh->p, &dh->p) != MP_OKAY)
+ ret = MP_ADD_E;
+ else
+ primeCheckCount++;
+ }
+ } while (ret == 0 && primeCheck == MP_NO);
+ }
+
+ /* tmp2 += (2*loop_check_prime)
+ * to have p = (q * tmp2) + 1 prime
+ */
+ if ((ret == 0) && (primeCheckCount)) {
+ if (mp_add_d(&tmp2, 2 * primeCheckCount, &tmp2) != MP_OKAY)
+ ret = MP_ADD_E;
+ }
+
+ /* find a value g for which g^tmp2 != 1 */
+ if ((ret == 0) && (mp_set(&dh->g, 1) != MP_OKAY))
+ ret = MP_ZERO_E;
+
+ if (ret == 0) {
+ do {
+ if (mp_add_d(&dh->g, 1, &dh->g) != MP_OKAY)
+ ret = MP_ADD_E;
+ else if (mp_exptmod(&dh->g, &tmp2, &dh->p, &tmp) != MP_OKAY)
+ ret = MP_EXPTMOD_E;
+ } while (ret == 0 && mp_cmp_d(&tmp, 1) == MP_EQ);
+ }
+
+ if (ret == 0) {
+ /* at this point tmp generates a group of order q mod p */
+ mp_exch(&tmp, &dh->g);
+ }
+
+ /* clear the parameters if there was an error */
+ if ((ret != 0) && (dh != NULL)) {
+ mp_clear(&dh->q);
+ mp_clear(&dh->p);
+ mp_clear(&dh->g);
+ }
+
+ if (buf != NULL) {
+ ForceZero(buf, bufSz);
+ if (dh != NULL) {
+ XFREE(buf, dh->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+ }
+ mp_clear(&tmp);
+ mp_clear(&tmp2);
+
+ return ret;
}
-#endif /* NO_DH */
+/* Export raw DH parameters from DhKey structure
+ *
+ * dh - pointer to initialized DhKey structure
+ * p - output location for DH (p) parameter
+ * pSz - [IN/OUT] size of output buffer for p, size of p
+ * q - output location for DH (q) parameter
+ * qSz - [IN/OUT] size of output buffer for q, size of q
+ * g - output location for DH (g) parameter
+ * gSz - [IN/OUT] size of output buffer for g, size of g
+ *
+ * If p, q, and g pointers are all passed in as NULL, the function
+ * will set pSz, qSz, and gSz to the required output buffer sizes for p,
+ * q, and g. In this case, the function will return LENGTH_ONLY_E.
+ *
+ * returns 0 on success, negative upon failure
+ */
+int wc_DhExportParamsRaw(DhKey* dh, byte* p, word32* pSz,
+ byte* q, word32* qSz, byte* g, word32* gSz)
+{
+ int ret = 0;
+ word32 pLen = 0, qLen = 0, gLen = 0;
+
+ if (dh == NULL || pSz == NULL || qSz == NULL || gSz == NULL)
+ ret = BAD_FUNC_ARG;
+
+ /* get required output buffer sizes */
+ if (ret == 0) {
+ pLen = mp_unsigned_bin_size(&dh->p);
+ qLen = mp_unsigned_bin_size(&dh->q);
+ gLen = mp_unsigned_bin_size(&dh->g);
+
+ /* return buffer sizes and LENGTH_ONLY_E if buffers are NULL */
+ if (p == NULL && q == NULL && g == NULL) {
+ *pSz = pLen;
+ *qSz = qLen;
+ *gSz = gLen;
+ ret = LENGTH_ONLY_E;
+ }
+ }
+
+ if (ret == 0) {
+ if (p == NULL || q == NULL || g == NULL)
+ ret = BAD_FUNC_ARG;
+ }
+
+ /* export p */
+ if (ret == 0) {
+ if (*pSz < pLen) {
+ WOLFSSL_MSG("Output buffer for DH p parameter too small, "
+ "required size placed into pSz");
+ *pSz = pLen;
+ ret = BUFFER_E;
+ }
+ }
+
+ if (ret == 0) {
+ *pSz = pLen;
+ if (mp_to_unsigned_bin(&dh->p, p) != MP_OKAY)
+ ret = MP_TO_E;
+ }
+
+ /* export q */
+ if (ret == 0) {
+ if (*qSz < qLen) {
+ WOLFSSL_MSG("Output buffer for DH q parameter too small, "
+ "required size placed into qSz");
+ *qSz = qLen;
+ ret = BUFFER_E;
+ }
+ }
+
+ if (ret == 0) {
+ *qSz = qLen;
+ if (mp_to_unsigned_bin(&dh->q, q) != MP_OKAY)
+ ret = MP_TO_E;
+ }
+
+ /* export g */
+ if (ret == 0) {
+ if (*gSz < gLen) {
+ WOLFSSL_MSG("Output buffer for DH g parameter too small, "
+ "required size placed into gSz");
+ *gSz = gLen;
+ ret = BUFFER_E;
+ }
+ }
+
+ if (ret == 0) {
+ *gSz = gLen;
+ if (mp_to_unsigned_bin(&dh->g, g) != MP_OKAY)
+ ret = MP_TO_E;
+ }
+
+ return ret;
+}
+#endif /* WOLFSSL_KEY_GEN */
+
+#endif /* NO_DH */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/dsa.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/dsa.c
index f2124b197..4b83a571d 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/dsa.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/dsa.c
@@ -1,8 +1,8 @@
/* dsa.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -27,97 +28,703 @@
#ifndef NO_DSA
-#include <wolfssl/wolfcrypt/dsa.h>
-#include <wolfssl/wolfcrypt/sha.h>
#include <wolfssl/wolfcrypt/random.h>
+#include <wolfssl/wolfcrypt/integer.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/logging.h>
+#include <wolfssl/wolfcrypt/sha.h>
+#include <wolfssl/wolfcrypt/dsa.h>
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
-enum {
- DSA_HALF_SIZE = 20, /* r and s size */
- DSA_SIG_SIZE = 40 /* signature size */
-};
-
-
-#ifndef WOLFSSL_HAVE_MIN
-#define WOLFSSL_HAVE_MIN
-
- static INLINE word32 min(word32 a, word32 b)
- {
- return a > b ? b : a;
- }
+int wc_InitDsaKey(DsaKey* key)
+{
+ if (key == NULL)
+ return BAD_FUNC_ARG;
-#endif /* WOLFSSL_HAVE_MIN */
+ key->type = -1; /* haven't decided yet */
+ key->heap = NULL;
+
+ return mp_init_multi(
+ /* public alloc parts */
+ &key->p,
+ &key->q,
+ &key->g,
+ &key->y,
+
+ /* private alloc parts */
+ &key->x,
+ NULL
+ );
+}
-void wc_InitDsaKey(DsaKey* key)
+int wc_InitDsaKey_h(DsaKey* key, void* h)
{
- key->type = -1; /* haven't decided yet */
-
-/* TomsFastMath doesn't use memory allocation */
-#ifndef USE_FAST_MATH
- key->p.dp = 0; /* public alloc parts */
- key->q.dp = 0;
- key->g.dp = 0;
- key->y.dp = 0;
+ int ret = wc_InitDsaKey(key);
+ if (ret == 0)
+ key->heap = h;
- key->x.dp = 0; /* private alloc parts */
-#endif
+ return ret;
}
void wc_FreeDsaKey(DsaKey* key)
{
- (void)key;
-/* TomsFastMath doesn't use memory allocation */
-#ifndef USE_FAST_MATH
+ if (key == NULL)
+ return;
+
if (key->type == DSA_PRIVATE)
- mp_clear(&key->x);
+ mp_forcezero(&key->x);
+
+ mp_clear(&key->x);
mp_clear(&key->y);
mp_clear(&key->g);
mp_clear(&key->q);
mp_clear(&key->p);
-#endif
}
-int wc_DsaSign(const byte* digest, byte* out, DsaKey* key, RNG* rng)
+/* validate that (L,N) match allowed sizes from FIPS 186-4, Section 4.2.
+ * modLen - represents L, the size of p (prime modulus) in bits
+ * divLen - represents N, the size of q (prime divisor) in bits
+ * return 0 on success, -1 on error */
+static int CheckDsaLN(int modLen, int divLen)
+{
+ int ret = -1;
+
+ switch (modLen) {
+ case 1024:
+ if (divLen == 160)
+ ret = 0;
+ break;
+ case 2048:
+ if (divLen == 224 || divLen == 256)
+ ret = 0;
+ break;
+ case 3072:
+ if (divLen == 256)
+ ret = 0;
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+
+#ifdef WOLFSSL_KEY_GEN
+
+/* Create DSA key pair (&dsa->x, &dsa->y)
+ *
+ * Based on NIST FIPS 186-4,
+ * "B.1.1 Key Pair Generation Using Extra Random Bits"
+ *
+ * rng - pointer to initialized WC_RNG structure
+ * dsa - pointer to initialized DsaKey structure, will hold generated key
+ *
+ * return 0 on success, negative on error */
+int wc_MakeDsaKey(WC_RNG *rng, DsaKey *dsa)
+{
+ byte* cBuf;
+ int qSz, pSz, cSz, err;
+ mp_int tmpQ;
+
+ if (rng == NULL || dsa == NULL)
+ return BAD_FUNC_ARG;
+
+ qSz = mp_unsigned_bin_size(&dsa->q);
+ pSz = mp_unsigned_bin_size(&dsa->p);
+
+ /* verify (L,N) pair bit lengths */
+ if (CheckDsaLN(pSz * WOLFSSL_BIT_SIZE, qSz * WOLFSSL_BIT_SIZE) != 0)
+ return BAD_FUNC_ARG;
+
+ /* generate extra 64 bits so that bias from mod function is negligible */
+ cSz = qSz + (64 / WOLFSSL_BIT_SIZE);
+ cBuf = (byte*)XMALLOC(cSz, dsa->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (cBuf == NULL) {
+ return MEMORY_E;
+ }
+
+ if ((err = mp_init_multi(&dsa->x, &dsa->y, &tmpQ, NULL, NULL, NULL))
+ != MP_OKAY) {
+ XFREE(cBuf, dsa->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return err;
+ }
+
+ do {
+ /* generate N+64 bits (c) from RBG into &dsa->x, making sure positive.
+ * Hash_DRBG uses SHA-256 which matches maximum
+ * requested_security_strength of (L,N) */
+ err = wc_RNG_GenerateBlock(rng, cBuf, cSz);
+ if (err != MP_OKAY) {
+ mp_clear(&dsa->x);
+ mp_clear(&dsa->y);
+ mp_clear(&tmpQ);
+ XFREE(cBuf, dsa->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return err;
+ }
+
+ err = mp_read_unsigned_bin(&dsa->x, cBuf, cSz);
+ if (err != MP_OKAY) {
+ mp_clear(&dsa->x);
+ mp_clear(&dsa->y);
+ mp_clear(&tmpQ);
+ XFREE(cBuf, dsa->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return err;
+ }
+ } while (mp_cmp_d(&dsa->x, 1) != MP_GT);
+
+ XFREE(cBuf, dsa->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+ /* tmpQ = q - 1 */
+ if (err == MP_OKAY)
+ err = mp_copy(&dsa->q, &tmpQ);
+
+ if (err == MP_OKAY)
+ err = mp_sub_d(&tmpQ, 1, &tmpQ);
+
+ /* x = c mod (q-1), &dsa->x holds c */
+ if (err == MP_OKAY)
+ err = mp_mod(&dsa->x, &tmpQ, &dsa->x);
+
+ /* x = c mod (q-1) + 1 */
+ if (err == MP_OKAY)
+ err = mp_add_d(&dsa->x, 1, &dsa->x);
+
+ /* public key : y = g^x mod p */
+ if (err == MP_OKAY)
+ err = mp_exptmod_ex(&dsa->g, &dsa->x, dsa->q.used, &dsa->p, &dsa->y);
+
+ if (err == MP_OKAY)
+ dsa->type = DSA_PRIVATE;
+
+ if (err != MP_OKAY) {
+ mp_clear(&dsa->x);
+ mp_clear(&dsa->y);
+ }
+ mp_clear(&tmpQ);
+
+ return err;
+}
+
+
+/* modulus_size in bits */
+int wc_MakeDsaParameters(WC_RNG *rng, int modulus_size, DsaKey *dsa)
+{
+ mp_int tmp, tmp2;
+ int err, msize, qsize,
+ loop_check_prime = 0,
+ check_prime = MP_NO;
+ unsigned char *buf;
+
+ if (rng == NULL || dsa == NULL)
+ return BAD_FUNC_ARG;
+
+ /* set group size in bytes from modulus size
+ * FIPS 186-4 defines valid values (1024, 160) (2048, 256) (3072, 256)
+ */
+ switch (modulus_size) {
+ case 1024:
+ qsize = 20;
+ break;
+ case 2048:
+ case 3072:
+ qsize = 32;
+ break;
+ default:
+ return BAD_FUNC_ARG;
+ }
+
+ /* modulus size in bytes */
+ msize = modulus_size / WOLFSSL_BIT_SIZE;
+
+ /* allocate ram */
+ buf = (unsigned char *)XMALLOC(msize - qsize,
+ dsa->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (buf == NULL) {
+ return MEMORY_E;
+ }
+
+ /* make a random string that will be multiplied against q */
+ err = wc_RNG_GenerateBlock(rng, buf, msize - qsize);
+ if (err != MP_OKAY) {
+ XFREE(buf, dsa->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return err;
+ }
+
+ /* force magnitude */
+ buf[0] |= 0xC0;
+
+ /* force even */
+ buf[msize - qsize - 1] &= ~1;
+
+ if (mp_init_multi(&tmp2, &dsa->p, &dsa->q, 0, 0, 0) != MP_OKAY) {
+ mp_clear(&dsa->q);
+ XFREE(buf, dsa->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return MP_INIT_E;
+ }
+
+ err = mp_read_unsigned_bin(&tmp2, buf, msize - qsize);
+ if (err != MP_OKAY) {
+ mp_clear(&dsa->q);
+ mp_clear(&dsa->p);
+ mp_clear(&tmp2);
+ XFREE(buf, dsa->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return err;
+ }
+ XFREE(buf, dsa->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+ /* make our prime q */
+ err = mp_rand_prime(&dsa->q, qsize, rng, NULL);
+ if (err != MP_OKAY) {
+ mp_clear(&dsa->q);
+ mp_clear(&dsa->p);
+ mp_clear(&tmp2);
+ return err;
+ }
+
+ /* p = random * q */
+ err = mp_mul(&dsa->q, &tmp2, &dsa->p);
+ if (err != MP_OKAY) {
+ mp_clear(&dsa->q);
+ mp_clear(&dsa->p);
+ mp_clear(&tmp2);
+ return err;
+ }
+
+ /* p = random * q + 1, so q is a prime divisor of p-1 */
+ err = mp_add_d(&dsa->p, 1, &dsa->p);
+ if (err != MP_OKAY) {
+ mp_clear(&dsa->q);
+ mp_clear(&dsa->p);
+ mp_clear(&tmp2);
+ return err;
+ }
+
+ if (mp_init(&tmp) != MP_OKAY) {
+ mp_clear(&dsa->q);
+ mp_clear(&dsa->p);
+ mp_clear(&tmp2);
+ return MP_INIT_E;
+ }
+
+ /* tmp = 2q */
+ err = mp_add(&dsa->q, &dsa->q, &tmp);
+ if (err != MP_OKAY) {
+ mp_clear(&dsa->q);
+ mp_clear(&dsa->p);
+ mp_clear(&tmp);
+ mp_clear(&tmp2);
+ return err;
+ }
+
+ /* loop until p is prime */
+ while (check_prime == MP_NO) {
+ err = mp_prime_is_prime_ex(&dsa->p, 8, &check_prime, rng);
+ if (err != MP_OKAY) {
+ mp_clear(&dsa->q);
+ mp_clear(&dsa->p);
+ mp_clear(&tmp);
+ mp_clear(&tmp2);
+ return err;
+ }
+
+ if (check_prime != MP_YES) {
+ /* p += 2q */
+ err = mp_add(&tmp, &dsa->p, &dsa->p);
+ if (err != MP_OKAY) {
+ mp_clear(&dsa->q);
+ mp_clear(&dsa->p);
+ mp_clear(&tmp);
+ mp_clear(&tmp2);
+ return err;
+ }
+
+ loop_check_prime++;
+ }
+ }
+
+ /* tmp2 += (2*loop_check_prime)
+ * to have p = (q * tmp2) + 1 prime
+ */
+ if (loop_check_prime) {
+ err = mp_add_d(&tmp2, 2*loop_check_prime, &tmp2);
+ if (err != MP_OKAY) {
+ mp_clear(&dsa->q);
+ mp_clear(&dsa->p);
+ mp_clear(&tmp);
+ mp_clear(&tmp2);
+ return err;
+ }
+ }
+
+ if (mp_init(&dsa->g) != MP_OKAY) {
+ mp_clear(&dsa->q);
+ mp_clear(&dsa->p);
+ mp_clear(&tmp);
+ mp_clear(&tmp2);
+ return MP_INIT_E;
+ }
+
+ /* find a value g for which g^tmp2 != 1 */
+ if (mp_set(&dsa->g, 1) != MP_OKAY) {
+ mp_clear(&dsa->q);
+ mp_clear(&dsa->p);
+ mp_clear(&tmp);
+ mp_clear(&tmp2);
+ return MP_INIT_E;
+ }
+
+ do {
+ err = mp_add_d(&dsa->g, 1, &dsa->g);
+ if (err != MP_OKAY) {
+ mp_clear(&dsa->q);
+ mp_clear(&dsa->p);
+ mp_clear(&dsa->g);
+ mp_clear(&tmp);
+ mp_clear(&tmp2);
+ return err;
+ }
+
+ err = mp_exptmod(&dsa->g, &tmp2, &dsa->p, &tmp);
+ if (err != MP_OKAY) {
+ mp_clear(&dsa->q);
+ mp_clear(&dsa->p);
+ mp_clear(&dsa->g);
+ mp_clear(&tmp);
+ mp_clear(&tmp2);
+ return err;
+ }
+
+ } while (mp_cmp_d(&tmp, 1) == MP_EQ);
+
+ /* at this point tmp generates a group of order q mod p */
+ mp_exch(&tmp, &dsa->g);
+
+ mp_clear(&tmp);
+ mp_clear(&tmp2);
+
+ return MP_OKAY;
+}
+#endif /* WOLFSSL_KEY_GEN */
+
+
+static int _DsaImportParamsRaw(DsaKey* dsa, const char* p, const char* q,
+ const char* g, int trusted, WC_RNG* rng)
+{
+ int err;
+ word32 pSz, qSz;
+
+ if (dsa == NULL || p == NULL || q == NULL || g == NULL)
+ return BAD_FUNC_ARG;
+
+ /* read p */
+ err = mp_read_radix(&dsa->p, p, MP_RADIX_HEX);
+ if (err == MP_OKAY && !trusted) {
+ int isPrime = 1;
+ if (rng == NULL)
+ err = mp_prime_is_prime(&dsa->p, 8, &isPrime);
+ else
+ err = mp_prime_is_prime_ex(&dsa->p, 8, &isPrime, rng);
+
+ if (err == MP_OKAY) {
+ if (!isPrime)
+ err = DH_CHECK_PUB_E;
+ }
+ }
+
+ /* read q */
+ if (err == MP_OKAY)
+ err = mp_read_radix(&dsa->q, q, MP_RADIX_HEX);
+
+ /* read g */
+ if (err == MP_OKAY)
+ err = mp_read_radix(&dsa->g, g, MP_RADIX_HEX);
+
+ /* verify (L,N) pair bit lengths */
+ pSz = mp_unsigned_bin_size(&dsa->p);
+ qSz = mp_unsigned_bin_size(&dsa->q);
+
+ if (CheckDsaLN(pSz * WOLFSSL_BIT_SIZE, qSz * WOLFSSL_BIT_SIZE) != 0) {
+ WOLFSSL_MSG("Invalid DSA p or q parameter size");
+ err = BAD_FUNC_ARG;
+ }
+
+ if (err != MP_OKAY) {
+ mp_clear(&dsa->p);
+ mp_clear(&dsa->q);
+ mp_clear(&dsa->g);
+ }
+
+ return err;
+}
+
+
+/* Import raw DSA parameters into DsaKey structure for use with wc_MakeDsaKey(),
+ * input parameters (p,q,g) should be represented as ASCII hex values.
+ *
+ * dsa - pointer to initialized DsaKey structure
+ * p - DSA (p) parameter, ASCII hex string
+ * pSz - length of p
+ * q - DSA (q) parameter, ASCII hex string
+ * qSz - length of q
+ * g - DSA (g) parameter, ASCII hex string
+ * gSz - length of g
+ *
+ * returns 0 on success, negative upon failure
+ */
+int wc_DsaImportParamsRaw(DsaKey* dsa, const char* p, const char* q,
+ const char* g)
+{
+ return _DsaImportParamsRaw(dsa, p, q, g, 1, NULL);
+}
+
+
+/* Import raw DSA parameters into DsaKey structure for use with wc_MakeDsaKey(),
+ * input parameters (p,q,g) should be represented as ASCII hex values. Check
+ * that the p value is probably prime.
+ *
+ * dsa - pointer to initialized DsaKey structure
+ * p - DSA (p) parameter, ASCII hex string
+ * pSz - length of p
+ * q - DSA (q) parameter, ASCII hex string
+ * qSz - length of q
+ * g - DSA (g) parameter, ASCII hex string
+ * gSz - length of g
+ * trusted - trust that p is OK
+ * rng - random number generator for the prime test
+ *
+ * returns 0 on success, negative upon failure
+ */
+int wc_DsaImportParamsRawCheck(DsaKey* dsa, const char* p, const char* q,
+ const char* g, int trusted, WC_RNG* rng)
+{
+ return _DsaImportParamsRaw(dsa, p, q, g, trusted, rng);
+}
+
+
+/* Export raw DSA parameters from DsaKey structure
+ *
+ * dsa - pointer to initialized DsaKey structure
+ * p - output location for DSA (p) parameter
+ * pSz - [IN/OUT] size of output buffer for p, size of p
+ * q - output location for DSA (q) parameter
+ * qSz - [IN/OUT] size of output buffer for q, size of q
+ * g - output location for DSA (g) parameter
+ * gSz - [IN/OUT] size of output buffer for g, size of g
+ *
+ * If p, q, and g pointers are all passed in as NULL, the function
+ * will set pSz, qSz, and gSz to the required output buffer sizes for p,
+ * q, and g. In this case, the function will return LENGTH_ONLY_E.
+ *
+ * returns 0 on success, negative upon failure
+ */
+int wc_DsaExportParamsRaw(DsaKey* dsa, byte* p, word32* pSz,
+ byte* q, word32* qSz, byte* g, word32* gSz)
+{
+ int err;
+ word32 pLen, qLen, gLen;
+
+ if (dsa == NULL || pSz == NULL || qSz == NULL || gSz == NULL)
+ return BAD_FUNC_ARG;
+
+ /* get required output buffer sizes */
+ pLen = mp_unsigned_bin_size(&dsa->p);
+ qLen = mp_unsigned_bin_size(&dsa->q);
+ gLen = mp_unsigned_bin_size(&dsa->g);
+
+ /* return buffer sizes and LENGTH_ONLY_E if buffers are NULL */
+ if (p == NULL && q == NULL && g == NULL) {
+ *pSz = pLen;
+ *qSz = qLen;
+ *gSz = gLen;
+ return LENGTH_ONLY_E;
+ }
+
+ if (p == NULL || q == NULL || g == NULL)
+ return BAD_FUNC_ARG;
+
+ /* export p */
+ if (*pSz < pLen) {
+ WOLFSSL_MSG("Output buffer for DSA p parameter too small, "
+ "required size placed into pSz");
+ *pSz = pLen;
+ return BUFFER_E;
+ }
+ *pSz = pLen;
+ err = mp_to_unsigned_bin(&dsa->p, p);
+
+ /* export q */
+ if (err == MP_OKAY) {
+ if (*qSz < qLen) {
+ WOLFSSL_MSG("Output buffer for DSA q parameter too small, "
+ "required size placed into qSz");
+ *qSz = qLen;
+ return BUFFER_E;
+ }
+ *qSz = qLen;
+ err = mp_to_unsigned_bin(&dsa->q, q);
+ }
+
+ /* export g */
+ if (err == MP_OKAY) {
+ if (*gSz < gLen) {
+ WOLFSSL_MSG("Output buffer for DSA g parameter too small, "
+ "required size placed into gSz");
+ *gSz = gLen;
+ return BUFFER_E;
+ }
+ *gSz = gLen;
+ err = mp_to_unsigned_bin(&dsa->g, g);
+ }
+
+ return err;
+}
+
+
+/* Export raw DSA key (x, y) from DsaKey structure
+ *
+ * dsa - pointer to initialized DsaKey structure
+ * x - output location for private key
+ * xSz - [IN/OUT] size of output buffer for x, size of x
+ * y - output location for public key
+ * ySz - [IN/OUT] size of output buffer for y, size of y
+ *
+ * If x and y pointers are all passed in as NULL, the function
+ * will set xSz and ySz to the required output buffer sizes for x
+ * and y. In this case, the function will return LENGTH_ONLY_E.
+ *
+ * returns 0 on success, negative upon failure
+ */
+int wc_DsaExportKeyRaw(DsaKey* dsa, byte* x, word32* xSz, byte* y, word32* ySz)
+{
+ int err;
+ word32 xLen, yLen;
+
+ if (dsa == NULL || xSz == NULL || ySz == NULL)
+ return BAD_FUNC_ARG;
+
+ /* get required output buffer sizes */
+ xLen = mp_unsigned_bin_size(&dsa->x);
+ yLen = mp_unsigned_bin_size(&dsa->y);
+
+ /* return buffer sizes and LENGTH_ONLY_E if buffers are NULL */
+ if (x == NULL && y == NULL) {
+ *xSz = xLen;
+ *ySz = yLen;
+ return LENGTH_ONLY_E;
+ }
+
+ if (x == NULL || y == NULL)
+ return BAD_FUNC_ARG;
+
+ /* export x */
+ if (*xSz < xLen) {
+ WOLFSSL_MSG("Output buffer for DSA private key (x) too small, "
+ "required size placed into xSz");
+ *xSz = xLen;
+ return BUFFER_E;
+ }
+ *xSz = xLen;
+ err = mp_to_unsigned_bin(&dsa->x, x);
+
+ /* export y */
+ if (err == MP_OKAY) {
+ if (*ySz < yLen) {
+ WOLFSSL_MSG("Output buffer to DSA public key (y) too small, "
+ "required size placed into ySz");
+ *ySz = yLen;
+ return BUFFER_E;
+ }
+ *ySz = yLen;
+ err = mp_to_unsigned_bin(&dsa->y, y);
+ }
+
+ return err;
+}
+
+
+int wc_DsaSign(const byte* digest, byte* out, DsaKey* key, WC_RNG* rng)
{
- mp_int k, kInv, r, s, H;
- int ret, sz;
- byte buffer[DSA_HALF_SIZE];
+ mp_int k, kInv, r, s, H;
+#ifndef WOLFSSL_MP_INVMOD_CONSTANT_TIME
+ mp_int b;
+#endif
+ mp_int* qMinus1;
+ int ret = 0, sz;
+ byte buffer[DSA_HALF_SIZE];
+ byte* tmp; /* initial output pointer */
- sz = min(sizeof(buffer), mp_unsigned_bin_size(&key->q));
+ if (digest == NULL || out == NULL || key == NULL || rng == NULL) {
+ return BAD_FUNC_ARG;
+ }
- /* generate k */
- ret = wc_RNG_GenerateBlock(rng, buffer, sz);
- if (ret != 0)
- return ret;
+ tmp = out;
- buffer[0] |= 0x0C;
+ sz = min((int)sizeof(buffer), mp_unsigned_bin_size(&key->q));
+#ifdef WOLFSSL_MP_INVMOD_CONSTANT_TIME
if (mp_init_multi(&k, &kInv, &r, &s, &H, 0) != MP_OKAY)
+#else
+ if (mp_init_multi(&k, &kInv, &r, &s, &H, &b) != MP_OKAY)
+#endif
+ {
return MP_INIT_E;
+ }
+ qMinus1 = &kInv;
- if (mp_read_unsigned_bin(&k, buffer, sz) != MP_OKAY)
- ret = MP_READ_E;
+ /* NIST FIPS 186-4: B.2.2
+ * Per-Message Secret Number Generation by Testing Candidates
+ * Generate k in range [1, q-1].
+ * Check that k is less than q-1: range [0, q-2].
+ * Add 1 to k: range [1, q-1].
+ */
+ if (mp_sub_d(&key->q, 1, qMinus1))
+ ret = MP_SUB_E;
- if (ret == 0 && mp_cmp_d(&k, 1) != MP_GT)
- ret = MP_CMP_E;
+ if (ret == 0) {
+ do {
+ /* Step 4: generate k */
+ ret = wc_RNG_GenerateBlock(rng, buffer, sz);
+
+ /* Step 5 */
+ if (ret == 0 && mp_read_unsigned_bin(&k, buffer, sz) != MP_OKAY)
+ ret = MP_READ_E;
+
+ /* k is a random numnber and it should be less than q-1
+ * if k greater than repeat
+ */
+ /* Step 6 */
+ } while (ret == 0 && mp_cmp(&k, qMinus1) != MP_LT);
+ }
+ /* Step 7 */
+ if (ret == 0 && mp_add_d(&k, 1, &k) != MP_OKAY)
+ ret = MP_MOD_E;
+#ifdef WOLFSSL_MP_INVMOD_CONSTANT_TIME
/* inverse k mod q */
if (ret == 0 && mp_invmod(&k, &key->q, &kInv) != MP_OKAY)
ret = MP_INVMOD_E;
/* generate r, r = (g exp k mod p) mod q */
- if (ret == 0 && mp_exptmod(&key->g, &k, &key->p, &r) != MP_OKAY)
+ if (ret == 0 && mp_exptmod_ex(&key->g, &k, key->q.used, &key->p,
+ &r) != MP_OKAY) {
ret = MP_EXPTMOD_E;
+ }
if (ret == 0 && mp_mod(&r, &key->q, &r) != MP_OKAY)
ret = MP_MOD_E;
/* generate H from sha digest */
- if (ret == 0 && mp_read_unsigned_bin(&H, digest,SHA_DIGEST_SIZE) != MP_OKAY)
+ if (ret == 0 && mp_read_unsigned_bin(&H, digest,WC_SHA_DIGEST_SIZE) != MP_OKAY)
ret = MP_READ_E;
/* generate s, s = (kInv * (H + x*r)) % q */
@@ -129,28 +736,105 @@ int wc_DsaSign(const byte* digest, byte* out, DsaKey* key, RNG* rng)
if (ret == 0 && mp_mulmod(&s, &kInv, &key->q, &s) != MP_OKAY)
ret = MP_MULMOD_E;
+#else
+ /* Blinding value
+ * Generate b in range [1, q-1].
+ */
+ if (ret == 0) {
+ do {
+ ret = wc_RNG_GenerateBlock(rng, buffer, sz);
+ if (ret == 0 && mp_read_unsigned_bin(&b, buffer, sz) != MP_OKAY)
+ ret = MP_READ_E;
+ } while (ret == 0 && mp_cmp(&b, qMinus1) != MP_LT);
+ }
+ if (ret == 0 && mp_add_d(&b, 1, &b) != MP_OKAY)
+ ret = MP_MOD_E;
+
+ /* set H from sha digest */
+ if (ret == 0 && mp_read_unsigned_bin(&H, digest,
+ WC_SHA_DIGEST_SIZE) != MP_OKAY) {
+ ret = MP_READ_E;
+ }
+
+ /* generate r, r = (g exp k mod p) mod q */
+ if (ret == 0 && mp_exptmod_ex(&key->g, &k, key->q.used, &key->p,
+ &r) != MP_OKAY) {
+ ret = MP_EXPTMOD_E;
+ }
+
+ /* calculate s = (H + xr)/k
+ = b.(H/k.b + x.r/k.b) */
+
+ /* k = k.b */
+ if (ret == 0 && mp_mulmod(&k, &b, &key->q, &k) != MP_OKAY)
+ ret = MP_MULMOD_E;
+
+ /* kInv = 1/k.b mod q */
+ if (ret == 0 && mp_invmod(&k, &key->q, &kInv) != MP_OKAY)
+ ret = MP_INVMOD_E;
+
+ if (ret == 0 && mp_mod(&r, &key->q, &r) != MP_OKAY)
+ ret = MP_MOD_E;
+
+ /* s = x.r */
+ if (ret == 0 && mp_mul(&key->x, &r, &s) != MP_OKAY)
+ ret = MP_MUL_E;
+
+ /* s = x.r/k.b */
+ if (ret == 0 && mp_mulmod(&s, &kInv, &key->q, &s) != MP_OKAY)
+ ret = MP_MULMOD_E;
+
+ /* H = H/k.b */
+ if (ret == 0 && mp_mulmod(&H, &kInv, &key->q, &H) != MP_OKAY)
+ ret = MP_MULMOD_E;
+
+ /* s = H/k.b + x.r/k.b
+ = (H + x.r)/k.b */
+ if (ret == 0 && mp_add(&s, &H, &s) != MP_OKAY)
+ ret = MP_ADD_E;
+
+ /* s = b.(e + x.r)/k.b
+ = (e + x.r)/k */
+ if (ret == 0 && mp_mulmod(&s, &b, &key->q, &s) != MP_OKAY)
+ ret = MP_MULMOD_E;
+
+ /* s = (e + x.r)/k */
+ if (ret == 0 && mp_mod(&s, &key->q, &s) != MP_OKAY)
+ ret = MP_MOD_E;
+#endif
+
+ /* detect zero r or s */
+ if (ret == 0 && (mp_iszero(&r) == MP_YES || mp_iszero(&s) == MP_YES))
+ ret = MP_ZERO_E;
/* write out */
if (ret == 0) {
int rSz = mp_unsigned_bin_size(&r);
int sSz = mp_unsigned_bin_size(&s);
- if (rSz == DSA_HALF_SIZE - 1) {
- out[0] = 0;
- out++;
+ while (rSz++ < DSA_HALF_SIZE) {
+ *out++ = 0x00; /* pad front with zeros */
}
if (mp_to_unsigned_bin(&r, out) != MP_OKAY)
ret = MP_TO_E;
else {
- if (sSz == DSA_HALF_SIZE - 1) {
- out[rSz] = 0;
- out++;
- }
- ret = mp_to_unsigned_bin(&s, out + rSz);
+ out = tmp + DSA_HALF_SIZE; /* advance to s in output */
+ while (sSz++ < DSA_HALF_SIZE) {
+ *out++ = 0x00; /* pad front with zeros */
+ }
+ ret = mp_to_unsigned_bin(&s, out);
}
}
+ ForceZero(buffer, sz);
+ mp_forcezero(&kInv);
+ mp_forcezero(&k);
+#ifndef WOLFSSL_MP_INVMOD_CONSTANT_TIME
+ mp_forcezero(&b);
+
+ mp_clear(&b);
+#endif
mp_clear(&H);
mp_clear(&s);
mp_clear(&r);
@@ -166,6 +850,10 @@ int wc_DsaVerify(const byte* digest, const byte* sig, DsaKey* key, int* answer)
mp_int w, u1, u2, v, r, s;
int ret = 0;
+ if (digest == NULL || sig == NULL || key == NULL || answer == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
if (mp_init_multi(&w, &u1, &u2, &v, &r, &s) != MP_OKAY)
return MP_INIT_E;
@@ -183,7 +871,7 @@ int wc_DsaVerify(const byte* digest, const byte* sig, DsaKey* key, int* answer)
}
/* put H into u1 from sha digest */
- if (ret == 0 && mp_read_unsigned_bin(&u1,digest,SHA_DIGEST_SIZE) != MP_OKAY)
+ if (ret == 0 && mp_read_unsigned_bin(&u1,digest,WC_SHA_DIGEST_SIZE) != MP_OKAY)
ret = MP_READ_E;
/* w = s invmod q */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ecc.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ecc.c
index 897d46adf..22db7f167 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ecc.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ecc.c
@@ -1,8 +1,8 @@
/* ecc.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,10 +16,11 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -27,31 +28,149 @@
/* in case user set HAVE_ECC there */
#include <wolfssl/wolfcrypt/settings.h>
+/* public ASN interface */
+#include <wolfssl/wolfcrypt/asn_public.h>
+
+/*
+Possible ECC enable options:
+ * HAVE_ECC: Overall control of ECC default: on
+ * HAVE_ECC_ENCRYPT: ECC encrypt/decrypt w/AES and HKDF default: off
+ * HAVE_ECC_SIGN: ECC sign default: on
+ * HAVE_ECC_VERIFY: ECC verify default: on
+ * HAVE_ECC_DHE: ECC build shared secret default: on
+ * HAVE_ECC_CDH: ECC cofactor DH shared secret default: off
+ * HAVE_ECC_KEY_IMPORT: ECC Key import default: on
+ * HAVE_ECC_KEY_EXPORT: ECC Key export default: on
+ * ECC_SHAMIR: Enables Shamir calc method default: on
+ * HAVE_COMP_KEY: Enables compressed key default: off
+ * WOLFSSL_VALIDATE_ECC_IMPORT: Validate ECC key on import default: off
+ * WOLFSSL_VALIDATE_ECC_KEYGEN: Validate ECC key gen default: off
+ * WOLFSSL_CUSTOM_CURVES: Allow non-standard curves. default: off
+ * Includes the curve "a" variable in calculation
+ * ECC_DUMP_OID: Enables dump of OID encoding and sum default: off
+ * ECC_CACHE_CURVE: Enables cache of curve info to improve perofrmance
+ default: off
+ * FP_ECC: ECC Fixed Point Cache default: off
+ * USE_ECC_B_PARAM: Enable ECC curve B param default: off
+ (on for HAVE_COMP_KEY)
+ * WOLFSSL_ECC_CURVE_STATIC: default off (on for windows)
+ For the ECC curve paramaters `ecc_set_type` use fixed
+ array for hex string
+ */
+
+/*
+ECC Curve Types:
+ * NO_ECC_SECP Disables SECP curves default: off (not defined)
+ * HAVE_ECC_SECPR2 Enables SECP R2 curves default: off
+ * HAVE_ECC_SECPR3 Enables SECP R3 curves default: off
+ * HAVE_ECC_BRAINPOOL Enables Brainpool curves default: off
+ * HAVE_ECC_KOBLITZ Enables Koblitz curves default: off
+ */
+
+/*
+ECC Curve Sizes:
+ * ECC_USER_CURVES: Allows custom combination of key sizes below
+ * HAVE_ALL_CURVES: Enable all key sizes (on unless ECC_USER_CURVES is defined)
+ * HAVE_ECC112: 112 bit key
+ * HAVE_ECC128: 128 bit key
+ * HAVE_ECC160: 160 bit key
+ * HAVE_ECC192: 192 bit key
+ * HAVE_ECC224: 224 bit key
+ * HAVE_ECC239: 239 bit key
+ * NO_ECC256: Disables 256 bit key (on by default)
+ * HAVE_ECC320: 320 bit key
+ * HAVE_ECC384: 384 bit key
+ * HAVE_ECC512: 512 bit key
+ * HAVE_ECC521: 521 bit key
+ */
+
+
#ifdef HAVE_ECC
+/* Make sure custom curves is enabled for Brainpool or Koblitz curve types */
+#if (defined(HAVE_ECC_BRAINPOOL) || defined(HAVE_ECC_KOBLITZ)) &&\
+ !defined(WOLFSSL_CUSTOM_CURVES)
+ #error Brainpool and Koblitz curves requires WOLFSSL_CUSTOM_CURVES
+#endif
+
+#if defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
+ /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
+ #define FIPS_NO_WRAPPERS
+
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$f")
+ #pragma const_seg(".fipsB$f")
+ #endif
+#endif
+
#include <wolfssl/wolfcrypt/ecc.h>
#include <wolfssl/wolfcrypt/asn.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/logging.h>
+#include <wolfssl/wolfcrypt/types.h>
+
+#ifdef WOLFSSL_HAVE_SP_ECC
+#include <wolfssl/wolfcrypt/sp.h>
+#endif
#ifdef HAVE_ECC_ENCRYPT
#include <wolfssl/wolfcrypt/hmac.h>
#include <wolfssl/wolfcrypt/aes.h>
#endif
+#ifdef HAVE_X963_KDF
+ #include <wolfssl/wolfcrypt/hash.h>
+#endif
+
+#ifdef WOLF_CRYPTO_CB
+ #include <wolfssl/wolfcrypt/cryptocb.h>
+#endif
+
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
-/* map
+#if defined(FREESCALE_LTC_ECC)
+ #include <wolfssl/wolfcrypt/port/nxp/ksdk_port.h>
+#endif
- ptmul -> mulmod
+#if defined(WOLFSSL_STM32_PKA)
+ #include <wolfssl/wolfcrypt/port/st/stm32.h>
+#endif
-*/
+#ifdef WOLFSSL_SP_MATH
+ #define GEN_MEM_ERR MP_MEM
+#elif defined(USE_FAST_MATH)
+ #define GEN_MEM_ERR FP_MEM
+#else
+ #define GEN_MEM_ERR MP_MEM
+#endif
+
+
+/* internal ECC states */
+enum {
+ ECC_STATE_NONE = 0,
+
+ ECC_STATE_SHARED_SEC_GEN,
+ ECC_STATE_SHARED_SEC_RES,
+
+ ECC_STATE_SIGN_DO,
+ ECC_STATE_SIGN_ENCODE,
+ ECC_STATE_VERIFY_DECODE,
+ ECC_STATE_VERIFY_DO,
+ ECC_STATE_VERIFY_RES,
+};
+
+
+/* map
+ ptmul -> mulmod
+*/
-/* p256 curve on by default whether user curves or not */
+/* 256-bit curve on by default whether user curves or not */
#if defined(HAVE_ECC112) || defined(HAVE_ALL_CURVES)
#define ECC112
#endif
@@ -67,939 +186,2334 @@
#if defined(HAVE_ECC224) || defined(HAVE_ALL_CURVES)
#define ECC224
#endif
+#if defined(HAVE_ECC239) || defined(HAVE_ALL_CURVES)
+ #define ECC239
+#endif
#if !defined(NO_ECC256) || defined(HAVE_ALL_CURVES)
#define ECC256
#endif
+#if defined(HAVE_ECC320) || defined(HAVE_ALL_CURVES)
+ #define ECC320
+#endif
#if defined(HAVE_ECC384) || defined(HAVE_ALL_CURVES)
#define ECC384
#endif
+#if defined(HAVE_ECC512) || defined(HAVE_ALL_CURVES)
+ #define ECC512
+#endif
#if defined(HAVE_ECC521) || defined(HAVE_ALL_CURVES)
#define ECC521
#endif
-
-
-/* This holds the key settings. ***MUST*** be organized by size from
- smallest to largest. */
+/* The encoded OID's for ECC curves */
+#ifdef ECC112
+ #ifndef NO_ECC_SECP
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_SECP112R1 {1,3,132,0,6}
+ #define CODED_SECP112R1_SZ 5
+ #else
+ #define CODED_SECP112R1 {0x2B,0x81,0x04,0x00,0x06}
+ #define CODED_SECP112R1_SZ 5
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_secp112r1[] = CODED_SECP112R1;
+ #else
+ #define ecc_oid_secp112r1 CODED_SECP112R1
+ #endif
+ #define ecc_oid_secp112r1_sz CODED_SECP112R1_SZ
+ #endif /* !NO_ECC_SECP */
+ #ifdef HAVE_ECC_SECPR2
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_SECP112R2 {1,3,132,0,7}
+ #define CODED_SECP112R2_SZ 5
+ #else
+ #define CODED_SECP112R2 {0x2B,0x81,0x04,0x00,0x07}
+ #define CODED_SECP112R2_SZ 5
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_secp112r2[] = CODED_SECP112R2;
+ #else
+ #define ecc_oid_secp112r2 CODED_SECP112R2
+ #endif
+ #define ecc_oid_secp112r2_sz CODED_SECP112R2_SZ
+ #endif /* HAVE_ECC_SECPR2 */
+#endif /* ECC112 */
+#ifdef ECC128
+ #ifndef NO_ECC_SECP
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_SECP128R1 {1,3,132,0,28}
+ #define CODED_SECP128R1_SZ 5
+ #else
+ #define CODED_SECP128R1 {0x2B,0x81,0x04,0x00,0x1C}
+ #define CODED_SECP128R1_SZ 5
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_secp128r1[] = CODED_SECP128R1;
+ #else
+ #define ecc_oid_secp128r1 CODED_SECP128R1
+ #endif
+ #define ecc_oid_secp128r1_sz CODED_SECP128R1_SZ
+ #endif /* !NO_ECC_SECP */
+ #ifdef HAVE_ECC_SECPR2
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_SECP128R2 {1,3,132,0,29}
+ #define CODED_SECP128R2_SZ 5
+ #else
+ #define CODED_SECP128R2 {0x2B,0x81,0x04,0x00,0x1D}
+ #define CODED_SECP128R2_SZ 5
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_secp128r2[] = CODED_SECP128R2;
+ #else
+ #define ecc_oid_secp128r2 CODED_SECP128R2
+ #endif
+ #define ecc_oid_secp128r2_sz CODED_SECP128R2_SZ
+ #endif /* HAVE_ECC_SECPR2 */
+#endif /* ECC128 */
+#ifdef ECC160
+ #ifndef NO_ECC_SECP
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_SECP160R1 {1,3,132,0,8}
+ #define CODED_SECP160R1_SZ 5
+ #else
+ #define CODED_SECP160R1 {0x2B,0x81,0x04,0x00,0x08}
+ #define CODED_SECP160R1_SZ 5
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_secp160r1[] = CODED_SECP160R1;
+ #else
+ #define ecc_oid_secp160r1 CODED_SECP160R1
+ #endif
+ #define ecc_oid_secp160r1_sz CODED_SECP160R1_SZ
+ #endif /* !NO_ECC_SECP */
+ #ifdef HAVE_ECC_SECPR2
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_SECP160R2 {1,3,132,0,30}
+ #define CODED_SECP160R2_SZ 5
+ #else
+ #define CODED_SECP160R2 {0x2B,0x81,0x04,0x00,0x1E}
+ #define CODED_SECP160R2_SZ 5
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_secp160r2[] = CODED_SECP160R2;
+ #else
+ #define ecc_oid_secp160r2 CODED_SECP160R2
+ #endif
+ #define ecc_oid_secp160r2_sz CODED_SECP160R2_SZ
+ #endif /* HAVE_ECC_SECPR2 */
+ #ifdef HAVE_ECC_KOBLITZ
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_SECP160K1 {1,3,132,0,9}
+ #define CODED_SECP160K1_SZ 5
+ #else
+ #define CODED_SECP160K1 {0x2B,0x81,0x04,0x00,0x09}
+ #define CODED_SECP160K1_SZ 5
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_secp160k1[] = CODED_SECP160K1;
+ #else
+ #define ecc_oid_secp160k1 CODED_SECP160K1
+ #endif
+ #define ecc_oid_secp160k1_sz CODED_SECP160K1_SZ
+ #endif /* HAVE_ECC_KOBLITZ */
+ #ifdef HAVE_ECC_BRAINPOOL
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_BRAINPOOLP160R1 {1,3,36,3,3,2,8,1,1,1}
+ #define CODED_BRAINPOOLP160R1_SZ 10
+ #else
+ #define CODED_BRAINPOOLP160R1 {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x01}
+ #define CODED_BRAINPOOLP160R1_SZ 9
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_brainpoolp160r1[] = CODED_BRAINPOOLP160R1;
+ #else
+ #define ecc_oid_brainpoolp160r1 CODED_BRAINPOOLP160R1
+ #endif
+ #define ecc_oid_brainpoolp160r1_sz CODED_BRAINPOOLP160R1_SZ
+ #endif /* HAVE_ECC_BRAINPOOL */
+#endif /* ECC160 */
+#ifdef ECC192
+ #ifndef NO_ECC_SECP
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_SECP192R1 {1,2,840,10045,3,1,1}
+ #define CODED_SECP192R1_SZ 7
+ #else
+ #define CODED_SECP192R1 {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x01}
+ #define CODED_SECP192R1_SZ 8
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_secp192r1[] = CODED_SECP192R1;
+ #else
+ #define ecc_oid_secp192r1 CODED_SECP192R1
+ #endif
+ #define ecc_oid_secp192r1_sz CODED_SECP192R1_SZ
+ #endif /* !NO_ECC_SECP */
+ #ifdef HAVE_ECC_SECPR2
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_PRIME192V2 {1,2,840,10045,3,1,2}
+ #define CODED_PRIME192V2_SZ 7
+ #else
+ #define CODED_PRIME192V2 {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x02}
+ #define CODED_PRIME192V2_SZ 8
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_prime192v2[] = CODED_PRIME192V2;
+ #else
+ #define ecc_oid_prime192v2 CODED_PRIME192V2
+ #endif
+ #define ecc_oid_prime192v2_sz CODED_PRIME192V2_SZ
+ #endif /* HAVE_ECC_SECPR2 */
+ #ifdef HAVE_ECC_SECPR3
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_PRIME192V3 {1,2,840,10045,3,1,3}
+ #define CODED_PRIME192V3_SZ 7
+ #else
+ #define CODED_PRIME192V3 {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x03}
+ #define CODED_PRIME192V3_SZ 8
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_prime192v3[] = CODED_PRIME192V3;
+ #else
+ #define ecc_oid_prime192v3 CODED_PRIME192V3
+ #endif
+ #define ecc_oid_prime192v3_sz CODED_PRIME192V3_SZ
+ #endif /* HAVE_ECC_SECPR3 */
+ #ifdef HAVE_ECC_KOBLITZ
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_SECP192K1 {1,3,132,0,31}
+ #define CODED_SECP192K1_SZ 5
+ #else
+ #define CODED_SECP192K1 {0x2B,0x81,0x04,0x00,0x1F}
+ #define CODED_SECP192K1_SZ 5
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_secp192k1[] = CODED_SECP192K1;
+ #else
+ #define ecc_oid_secp192k1 CODED_SECP192K1
+ #endif
+ #define ecc_oid_secp192k1_sz CODED_SECP192K1_SZ
+ #endif /* HAVE_ECC_KOBLITZ */
+ #ifdef HAVE_ECC_BRAINPOOL
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_BRAINPOOLP192R1 {1,3,36,3,3,2,8,1,1,3}
+ #define CODED_BRAINPOOLP192R1_SZ 10
+ #else
+ #define CODED_BRAINPOOLP192R1 {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x03}
+ #define CODED_BRAINPOOLP192R1_SZ 9
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_brainpoolp192r1[] = CODED_BRAINPOOLP192R1;
+ #else
+ #define ecc_oid_brainpoolp192r1 CODED_BRAINPOOLP192R1
+ #endif
+ #define ecc_oid_brainpoolp192r1_sz CODED_BRAINPOOLP192R1_SZ
+ #endif /* HAVE_ECC_BRAINPOOL */
+#endif /* ECC192 */
+#ifdef ECC224
+ #ifndef NO_ECC_SECP
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_SECP224R1 {1,3,132,0,33}
+ #define CODED_SECP224R1_SZ 5
+ #else
+ #define CODED_SECP224R1 {0x2B,0x81,0x04,0x00,0x21}
+ #define CODED_SECP224R1_SZ 5
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_secp224r1[] = CODED_SECP224R1;
+ #else
+ #define ecc_oid_secp224r1 CODED_SECP224R1
+ #endif
+ #define ecc_oid_secp224r1_sz CODED_SECP224R1_SZ
+ #endif /* !NO_ECC_SECP */
+ #ifdef HAVE_ECC_KOBLITZ
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_SECP224K1 {1,3,132,0,32}
+ #define CODED_SECP224K1_SZ 5
+ #else
+ #define CODED_SECP224K1 {0x2B,0x81,0x04,0x00,0x20}
+ #define CODED_SECP224K1_SZ 5
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_secp224k1[] = CODED_SECP224K1;
+ #else
+ #define ecc_oid_secp224k1 CODED_SECP224K1
+ #endif
+ #define ecc_oid_secp224k1_sz CODED_SECP224K1_SZ
+ #endif /* HAVE_ECC_KOBLITZ */
+ #ifdef HAVE_ECC_BRAINPOOL
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_BRAINPOOLP224R1 {1,3,36,3,3,2,8,1,1,5}
+ #define CODED_BRAINPOOLP224R1_SZ 10
+ #else
+ #define CODED_BRAINPOOLP224R1 {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x05}
+ #define CODED_BRAINPOOLP224R1_SZ 9
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_brainpoolp224r1[] = CODED_BRAINPOOLP224R1;
+ #else
+ #define ecc_oid_brainpoolp224r1 CODED_BRAINPOOLP224R1
+ #endif
+ #define ecc_oid_brainpoolp224r1_sz CODED_BRAINPOOLP224R1_SZ
+ #endif /* HAVE_ECC_BRAINPOOL */
+#endif /* ECC224 */
+#ifdef ECC239
+ #ifndef NO_ECC_SECP
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_PRIME239V1 {1,2,840,10045,3,1,4}
+ #define CODED_PRIME239V1_SZ 7
+ #else
+ #define CODED_PRIME239V1 {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x04}
+ #define CODED_PRIME239V1_SZ 8
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_prime239v1[] = CODED_PRIME239V1;
+ #else
+ #define ecc_oid_prime239v1 CODED_PRIME239V1
+ #endif
+ #define ecc_oid_prime239v1_sz CODED_PRIME239V1_SZ
+ #endif /* !NO_ECC_SECP */
+ #ifdef HAVE_ECC_SECPR2
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_PRIME239V2 {1,2,840,10045,3,1,5}
+ #define CODED_PRIME239V2_SZ 7
+ #else
+ #define CODED_PRIME239V2 {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x05}
+ #define CODED_PRIME239V2_SZ 8
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_prime239v2[] = CODED_PRIME239V2;
+ #else
+ #define ecc_oid_prime239v2 CODED_PRIME239V2
+ #endif
+ #define ecc_oid_prime239v2_sz CODED_PRIME239V2_SZ
+ #endif /* HAVE_ECC_SECPR2 */
+ #ifdef HAVE_ECC_SECPR3
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_PRIME239V3 {1,2,840,10045,3,1,6}
+ #define CODED_PRIME239V3_SZ 7
+ #else
+ #define CODED_PRIME239V3 {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x06}
+ #define CODED_PRIME239V3_SZ 8
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_prime239v3[] = CODED_PRIME239V3;
+ #else
+ #define ecc_oid_prime239v3 CODED_PRIME239V3
+ #endif
+ #define ecc_oid_prime239v3_sz CODED_PRIME239V3_SZ
+ #endif /* HAVE_ECC_SECPR3 */
+#endif /* ECC239 */
+#ifdef ECC256
+ #ifndef NO_ECC_SECP
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_SECP256R1 {1,2,840,10045,3,1,7}
+ #define CODED_SECP256R1_SZ 7
+ #else
+ #define CODED_SECP256R1 {0x2A,0x86,0x48,0xCE,0x3D,0x03,0x01,0x07}
+ #define CODED_SECP256R1_SZ 8
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_secp256r1[] = CODED_SECP256R1;
+ #else
+ #define ecc_oid_secp256r1 CODED_SECP256R1
+ #endif
+ #define ecc_oid_secp256r1_sz CODED_SECP256R1_SZ
+ #endif /* !NO_ECC_SECP */
+ #ifdef HAVE_ECC_KOBLITZ
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_SECP256K1 {1,3,132,0,10}
+ #define CODED_SECP256K1_SZ 5
+ #else
+ #define CODED_SECP256K1 {0x2B,0x81,0x04,0x00,0x0A}
+ #define CODED_SECP256K1_SZ 5
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_secp256k1[] = CODED_SECP256K1;
+ #else
+ #define ecc_oid_secp256k1 CODED_SECP256K1
+ #endif
+ #define ecc_oid_secp256k1_sz CODED_SECP256K1_SZ
+ #endif /* HAVE_ECC_KOBLITZ */
+ #ifdef HAVE_ECC_BRAINPOOL
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_BRAINPOOLP256R1 {1,3,36,3,3,2,8,1,1,7}
+ #define CODED_BRAINPOOLP256R1_SZ 10
+ #else
+ #define CODED_BRAINPOOLP256R1 {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x07}
+ #define CODED_BRAINPOOLP256R1_SZ 9
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_brainpoolp256r1[] = CODED_BRAINPOOLP256R1;
+ #else
+ #define ecc_oid_brainpoolp256r1 CODED_BRAINPOOLP256R1
+ #endif
+ #define ecc_oid_brainpoolp256r1_sz CODED_BRAINPOOLP256R1_SZ
+ #endif /* HAVE_ECC_BRAINPOOL */
+#endif /* ECC256 */
+#ifdef ECC320
+ #ifdef HAVE_ECC_BRAINPOOL
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_BRAINPOOLP320R1 {1,3,36,3,3,2,8,1,1,9}
+ #define CODED_BRAINPOOLP320R1_SZ 10
+ #else
+ #define CODED_BRAINPOOLP320R1 {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x09}
+ #define CODED_BRAINPOOLP320R1_SZ 9
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_brainpoolp320r1[] = CODED_BRAINPOOLP320R1;
+ #else
+ #define ecc_oid_brainpoolp320r1 CODED_BRAINPOOLP320R1
+ #endif
+ #define ecc_oid_brainpoolp320r1_sz CODED_BRAINPOOLP320R1_SZ
+ #endif /* HAVE_ECC_BRAINPOOL */
+#endif /* ECC320 */
+#ifdef ECC384
+ #ifndef NO_ECC_SECP
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_SECP384R1 {1,3,132,0,34}
+ #define CODED_SECP384R1_SZ 5
+ #else
+ #define CODED_SECP384R1 {0x2B,0x81,0x04,0x00,0x22}
+ #define CODED_SECP384R1_SZ 5
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_secp384r1[] = CODED_SECP384R1;
+ #define CODED_SECP384R1_OID ecc_oid_secp384r1
+ #else
+ #define ecc_oid_secp384r1 CODED_SECP384R1
+ #endif
+ #define ecc_oid_secp384r1_sz CODED_SECP384R1_SZ
+ #endif /* !NO_ECC_SECP */
+ #ifdef HAVE_ECC_BRAINPOOL
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_BRAINPOOLP384R1 {1,3,36,3,3,2,8,1,1,11}
+ #define CODED_BRAINPOOLP384R1_SZ 10
+ #else
+ #define CODED_BRAINPOOLP384R1 {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0B}
+ #define CODED_BRAINPOOLP384R1_SZ 9
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_brainpoolp384r1[] = CODED_BRAINPOOLP384R1;
+ #else
+ #define ecc_oid_brainpoolp384r1 CODED_BRAINPOOLP384R1
+ #endif
+ #define ecc_oid_brainpoolp384r1_sz CODED_BRAINPOOLP384R1_SZ
+ #endif /* HAVE_ECC_BRAINPOOL */
+#endif /* ECC384 */
+#ifdef ECC512
+ #ifdef HAVE_ECC_BRAINPOOL
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_BRAINPOOLP512R1 {1,3,36,3,3,2,8,1,1,13}
+ #define CODED_BRAINPOOLP512R1_SZ 10
+ #else
+ #define CODED_BRAINPOOLP512R1 {0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0D}
+ #define CODED_BRAINPOOLP512R1_SZ 9
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_brainpoolp512r1[] = CODED_BRAINPOOLP512R1;
+ #else
+ #define ecc_oid_brainpoolp512r1 CODED_BRAINPOOLP512R1
+ #endif
+ #define ecc_oid_brainpoolp512r1_sz CODED_BRAINPOOLP512R1_SZ
+ #endif /* HAVE_ECC_BRAINPOOL */
+#endif /* ECC512 */
+#ifdef ECC521
+ #ifndef NO_ECC_SECP
+ #ifdef HAVE_OID_ENCODING
+ #define CODED_SECP521R1 {1,3,132,0,35}
+ #define CODED_SECP521R1_SZ 5
+ #else
+ #define CODED_SECP521R1 {0x2B,0x81,0x04,0x00,0x23}
+ #define CODED_SECP521R1_SZ 5
+ #endif
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ static const ecc_oid_t ecc_oid_secp521r1[] = CODED_SECP521R1;
+ #else
+ #define ecc_oid_secp521r1 CODED_SECP521R1
+ #endif
+ #define ecc_oid_secp521r1_sz CODED_SECP521R1_SZ
+ #endif /* !NO_ECC_SECP */
+#endif /* ECC521 */
+
+
+/* This holds the key settings.
+ ***MUST*** be organized by size from smallest to largest. */
const ecc_set_type ecc_sets[] = {
#ifdef ECC112
-{
- 14,
- "SECP112R1",
- "DB7C2ABF62E35E668076BEAD208B",
- "DB7C2ABF62E35E668076BEAD2088",
- "659EF8BA043916EEDE8911702B22",
- "DB7C2ABF62E35E7628DFAC6561C5",
- "09487239995A5EE76B55F9C2F098",
- "A89CE5AF8724C0A23E0E0FF77500"
-},
-#endif
+ #ifndef NO_ECC_SECP
+ {
+ 14, /* size/bytes */
+ ECC_SECP112R1, /* ID */
+ "SECP112R1", /* curve name */
+ "DB7C2ABF62E35E668076BEAD208B", /* prime */
+ "DB7C2ABF62E35E668076BEAD2088", /* A */
+ "659EF8BA043916EEDE8911702B22", /* B */
+ "DB7C2ABF62E35E7628DFAC6561C5", /* order */
+ "9487239995A5EE76B55F9C2F098", /* Gx */
+ "A89CE5AF8724C0A23E0E0FF77500", /* Gy */
+ ecc_oid_secp112r1, /* oid/oidSz */
+ ecc_oid_secp112r1_sz,
+ ECC_SECP112R1_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* !NO_ECC_SECP */
+ #ifdef HAVE_ECC_SECPR2
+ {
+ 14, /* size/bytes */
+ ECC_SECP112R2, /* ID */
+ "SECP112R2", /* curve name */
+ "DB7C2ABF62E35E668076BEAD208B", /* prime */
+ "6127C24C05F38A0AAAF65C0EF02C", /* A */
+ "51DEF1815DB5ED74FCC34C85D709", /* B */
+ "36DF0AAFD8B8D7597CA10520D04B", /* order */
+ "4BA30AB5E892B4E1649DD0928643", /* Gx */
+ "ADCD46F5882E3747DEF36E956E97", /* Gy */
+ ecc_oid_secp112r2, /* oid/oidSz */
+ ecc_oid_secp112r2_sz,
+ ECC_SECP112R2_OID, /* oid sum */
+ 4, /* cofactor */
+ },
+ #endif /* HAVE_ECC_SECPR2 */
+#endif /* ECC112 */
#ifdef ECC128
-{
- 16,
- "SECP128R1",
- "FFFFFFFDFFFFFFFFFFFFFFFFFFFFFFFF",
- "FFFFFFFDFFFFFFFFFFFFFFFFFFFFFFFC",
- "E87579C11079F43DD824993C2CEE5ED3",
- "FFFFFFFE0000000075A30D1B9038A115",
- "161FF7528B899B2D0C28607CA52C5B86",
- "CF5AC8395BAFEB13C02DA292DDED7A83",
-},
-#endif
+ #ifndef NO_ECC_SECP
+ {
+ 16, /* size/bytes */
+ ECC_SECP128R1, /* ID */
+ "SECP128R1", /* curve name */
+ "FFFFFFFDFFFFFFFFFFFFFFFFFFFFFFFF", /* prime */
+ "FFFFFFFDFFFFFFFFFFFFFFFFFFFFFFFC", /* A */
+ "E87579C11079F43DD824993C2CEE5ED3", /* B */
+ "FFFFFFFE0000000075A30D1B9038A115", /* order */
+ "161FF7528B899B2D0C28607CA52C5B86", /* Gx */
+ "CF5AC8395BAFEB13C02DA292DDED7A83", /* Gy */
+ ecc_oid_secp128r1, /* oid/oidSz */
+ ecc_oid_secp128r1_sz,
+ ECC_SECP128R1_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* !NO_ECC_SECP */
+ #ifdef HAVE_ECC_SECPR2
+ {
+ 16, /* size/bytes */
+ ECC_SECP128R2, /* ID */
+ "SECP128R2", /* curve name */
+ "FFFFFFFDFFFFFFFFFFFFFFFFFFFFFFFF", /* prime */
+ "D6031998D1B3BBFEBF59CC9BBFF9AEE1", /* A */
+ "5EEEFCA380D02919DC2C6558BB6D8A5D", /* B */
+ "3FFFFFFF7FFFFFFFBE0024720613B5A3", /* order */
+ "7B6AA5D85E572983E6FB32A7CDEBC140", /* Gx */
+ "27B6916A894D3AEE7106FE805FC34B44", /* Gy */
+ ecc_oid_secp128r2, /* oid/oidSz */
+ ecc_oid_secp128r2_sz,
+ ECC_SECP128R2_OID, /* oid sum */
+ 4, /* cofactor */
+ },
+ #endif /* HAVE_ECC_SECPR2 */
+#endif /* ECC128 */
#ifdef ECC160
-{
- 20,
- "SECP160R1",
- "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF7FFFFFFF",
- "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF7FFFFFFC",
- "1C97BEFC54BD7A8B65ACF89F81D4D4ADC565FA45",
- "0100000000000000000001F4C8F927AED3CA752257",
- "4A96B5688EF573284664698968C38BB913CBFC82",
- "23A628553168947D59DCC912042351377AC5FB32",
-},
-#endif
+ #ifndef NO_ECC_SECP
+ {
+ 20, /* size/bytes */
+ ECC_SECP160R1, /* ID */
+ "SECP160R1", /* curve name */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF7FFFFFFF", /* prime */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF7FFFFFFC", /* A */
+ "1C97BEFC54BD7A8B65ACF89F81D4D4ADC565FA45", /* B */
+ "100000000000000000001F4C8F927AED3CA752257",/* order */
+ "4A96B5688EF573284664698968C38BB913CBFC82", /* Gx */
+ "23A628553168947D59DCC912042351377AC5FB32", /* Gy */
+ ecc_oid_secp160r1, /* oid/oidSz */
+ ecc_oid_secp160r1_sz,
+ ECC_SECP160R1_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* !NO_ECC_SECP */
+ #ifdef HAVE_ECC_SECPR2
+ {
+ 20, /* size/bytes */
+ ECC_SECP160R2, /* ID */
+ "SECP160R2", /* curve name */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFAC73", /* prime */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFAC70", /* A */
+ "B4E134D3FB59EB8BAB57274904664D5AF50388BA", /* B */
+ "100000000000000000000351EE786A818F3A1A16B",/* order */
+ "52DCB034293A117E1F4FF11B30F7199D3144CE6D", /* Gx */
+ "FEAFFEF2E331F296E071FA0DF9982CFEA7D43F2E", /* Gy */
+ ecc_oid_secp160r2, /* oid/oidSz */
+ ecc_oid_secp160r2_sz,
+ ECC_SECP160R2_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* HAVE_ECC_SECPR2 */
+ #ifdef HAVE_ECC_KOBLITZ
+ {
+ 20, /* size/bytes */
+ ECC_SECP160K1, /* ID */
+ "SECP160K1", /* curve name */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFAC73", /* prime */
+ "0000000000000000000000000000000000000000", /* A */
+ "0000000000000000000000000000000000000007", /* B */
+ "100000000000000000001B8FA16DFAB9ACA16B6B3",/* order */
+ "3B4C382CE37AA192A4019E763036F4F5DD4D7EBB", /* Gx */
+ "938CF935318FDCED6BC28286531733C3F03C4FEE", /* Gy */
+ ecc_oid_secp160k1, /* oid/oidSz */
+ ecc_oid_secp160k1_sz,
+ ECC_SECP160K1_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* HAVE_ECC_KOBLITZ */
+ #ifdef HAVE_ECC_BRAINPOOL
+ {
+ 20, /* size/bytes */
+ ECC_BRAINPOOLP160R1, /* ID */
+ "BRAINPOOLP160R1", /* curve name */
+ "E95E4A5F737059DC60DFC7AD95B3D8139515620F", /* prime */
+ "340E7BE2A280EB74E2BE61BADA745D97E8F7C300", /* A */
+ "1E589A8595423412134FAA2DBDEC95C8D8675E58", /* B */
+ "E95E4A5F737059DC60DF5991D45029409E60FC09", /* order */
+ "BED5AF16EA3F6A4F62938C4631EB5AF7BDBCDBC3", /* Gx */
+ "1667CB477A1A8EC338F94741669C976316DA6321", /* Gy */
+ ecc_oid_brainpoolp160r1, /* oid/oidSz */
+ ecc_oid_brainpoolp160r1_sz,
+ ECC_BRAINPOOLP160R1_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* HAVE_ECC_BRAINPOOL */
+#endif /* ECC160 */
#ifdef ECC192
-{
- 24,
- "ECC-192",
- "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFF",
- "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFC",
- "64210519E59C80E70FA7E9AB72243049FEB8DEECC146B9B1",
- "FFFFFFFFFFFFFFFFFFFFFFFF99DEF836146BC9B1B4D22831",
- "188DA80EB03090F67CBF20EB43A18800F4FF0AFD82FF1012",
- "7192B95FFC8DA78631011ED6B24CDD573F977A11E794811",
-},
-#endif
+ #ifndef NO_ECC_SECP
+ {
+ 24, /* size/bytes */
+ ECC_SECP192R1, /* ID */
+ "SECP192R1", /* curve name */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFF", /* prime */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFC", /* A */
+ "64210519E59C80E70FA7E9AB72243049FEB8DEECC146B9B1", /* B */
+ "FFFFFFFFFFFFFFFFFFFFFFFF99DEF836146BC9B1B4D22831", /* order */
+ "188DA80EB03090F67CBF20EB43A18800F4FF0AFD82FF1012", /* Gx */
+ "7192B95FFC8DA78631011ED6B24CDD573F977A11E794811", /* Gy */
+ ecc_oid_secp192r1, /* oid/oidSz */
+ ecc_oid_secp192r1_sz,
+ ECC_SECP192R1_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* !NO_ECC_SECP */
+ #ifdef HAVE_ECC_SECPR2
+ {
+ 24, /* size/bytes */
+ ECC_PRIME192V2, /* ID */
+ "PRIME192V2", /* curve name */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFF", /* prime */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFC", /* A */
+ "CC22D6DFB95C6B25E49C0D6364A4E5980C393AA21668D953", /* B */
+ "FFFFFFFFFFFFFFFFFFFFFFFE5FB1A724DC80418648D8DD31", /* order */
+ "EEA2BAE7E1497842F2DE7769CFE9C989C072AD696F48034A", /* Gx */
+ "6574D11D69B6EC7A672BB82A083DF2F2B0847DE970B2DE15", /* Gy */
+ ecc_oid_prime192v2, /* oid/oidSz */
+ ecc_oid_prime192v2_sz,
+ ECC_PRIME192V2_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* HAVE_ECC_SECPR2 */
+ #ifdef HAVE_ECC_SECPR3
+ {
+ 24, /* size/bytes */
+ ECC_PRIME192V3, /* ID */
+ "PRIME192V3", /* curve name */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFF", /* prime */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFC", /* A */
+ "22123DC2395A05CAA7423DAECCC94760A7D462256BD56916", /* B */
+ "FFFFFFFFFFFFFFFFFFFFFFFF7A62D031C83F4294F640EC13", /* order */
+ "7D29778100C65A1DA1783716588DCE2B8B4AEE8E228F1896", /* Gx */
+ "38A90F22637337334B49DCB66A6DC8F9978ACA7648A943B0", /* Gy */
+ ecc_oid_prime192v3, /* oid/oidSz */
+ ecc_oid_prime192v3_sz,
+ ECC_PRIME192V3_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* HAVE_ECC_SECPR3 */
+ #ifdef HAVE_ECC_KOBLITZ
+ {
+ 24, /* size/bytes */
+ ECC_SECP192K1, /* ID */
+ "SECP192K1", /* curve name */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFEE37", /* prime */
+ "000000000000000000000000000000000000000000000000", /* A */
+ "000000000000000000000000000000000000000000000003", /* B */
+ "FFFFFFFFFFFFFFFFFFFFFFFE26F2FC170F69466A74DEFD8D", /* order */
+ "DB4FF10EC057E9AE26B07D0280B7F4341DA5D1B1EAE06C7D", /* Gx */
+ "9B2F2F6D9C5628A7844163D015BE86344082AA88D95E2F9D", /* Gy */
+ ecc_oid_secp192k1, /* oid/oidSz */
+ ecc_oid_secp192k1_sz,
+ ECC_SECP192K1_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* HAVE_ECC_KOBLITZ */
+ #ifdef HAVE_ECC_BRAINPOOL
+ {
+ 24, /* size/bytes */
+ ECC_BRAINPOOLP192R1, /* ID */
+ "BRAINPOOLP192R1", /* curve name */
+ "C302F41D932A36CDA7A3463093D18DB78FCE476DE1A86297", /* prime */
+ "6A91174076B1E0E19C39C031FE8685C1CAE040E5C69A28EF", /* A */
+ "469A28EF7C28CCA3DC721D044F4496BCCA7EF4146FBF25C9", /* B */
+ "C302F41D932A36CDA7A3462F9E9E916B5BE8F1029AC4ACC1", /* order */
+ "C0A0647EAAB6A48753B033C56CB0F0900A2F5C4853375FD6", /* Gx */
+ "14B690866ABD5BB88B5F4828C1490002E6773FA2FA299B8F", /* Gy */
+ ecc_oid_brainpoolp192r1, /* oid/oidSz */
+ ecc_oid_brainpoolp192r1_sz,
+ ECC_BRAINPOOLP192R1_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* HAVE_ECC_BRAINPOOL */
+#endif /* ECC192 */
#ifdef ECC224
-{
- 28,
- "ECC-224",
- "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF000000000000000000000001",
- "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFE",
- "B4050A850C04B3ABF54132565044B0B7D7BFD8BA270B39432355FFB4",
- "FFFFFFFFFFFFFFFFFFFFFFFFFFFF16A2E0B8F03E13DD29455C5C2A3D",
- "B70E0CBD6BB4BF7F321390B94A03C1D356C21122343280D6115C1D21",
- "BD376388B5F723FB4C22DFE6CD4375A05A07476444D5819985007E34",
-},
-#endif
+ #ifndef NO_ECC_SECP
+ {
+ 28, /* size/bytes */
+ ECC_SECP224R1, /* ID */
+ "SECP224R1", /* curve name */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF000000000000000000000001", /* prime */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFE", /* A */
+ "B4050A850C04B3ABF54132565044B0B7D7BFD8BA270B39432355FFB4", /* B */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFF16A2E0B8F03E13DD29455C5C2A3D", /* order */
+ "B70E0CBD6BB4BF7F321390B94A03C1D356C21122343280D6115C1D21", /* Gx */
+ "BD376388B5F723FB4C22DFE6CD4375A05A07476444D5819985007E34", /* Gy */
+ ecc_oid_secp224r1, /* oid/oidSz */
+ ecc_oid_secp224r1_sz,
+ ECC_SECP224R1_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* !NO_ECC_SECP */
+ #ifdef HAVE_ECC_KOBLITZ
+ {
+ 28, /* size/bytes */
+ ECC_SECP224K1, /* ID */
+ "SECP224K1", /* curve name */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFE56D", /* prime */
+ "00000000000000000000000000000000000000000000000000000000", /* A */
+ "00000000000000000000000000000000000000000000000000000005", /* B */
+ "10000000000000000000000000001DCE8D2EC6184CAF0A971769FB1F7",/* order */
+ "A1455B334DF099DF30FC28A169A467E9E47075A90F7E650EB6B7A45C", /* Gx */
+ "7E089FED7FBA344282CAFBD6F7E319F7C0B0BD59E2CA4BDB556D61A5", /* Gy */
+ ecc_oid_secp224k1, /* oid/oidSz */
+ ecc_oid_secp224k1_sz,
+ ECC_SECP224K1_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* HAVE_ECC_KOBLITZ */
+ #ifdef HAVE_ECC_BRAINPOOL
+ {
+ 28, /* size/bytes */
+ ECC_BRAINPOOLP224R1, /* ID */
+ "BRAINPOOLP224R1", /* curve name */
+ "D7C134AA264366862A18302575D1D787B09F075797DA89F57EC8C0FF", /* prime */
+ "68A5E62CA9CE6C1C299803A6C1530B514E182AD8B0042A59CAD29F43", /* A */
+ "2580F63CCFE44138870713B1A92369E33E2135D266DBB372386C400B", /* B */
+ "D7C134AA264366862A18302575D0FB98D116BC4B6DDEBCA3A5A7939F", /* order */
+ "0D9029AD2C7E5CF4340823B2A87DC68C9E4CE3174C1E6EFDEE12C07D", /* Gx */
+ "58AA56F772C0726F24C6B89E4ECDAC24354B9E99CAA3F6D3761402CD", /* Gy */
+ ecc_oid_brainpoolp224r1, /* oid/oidSz */
+ ecc_oid_brainpoolp224r1_sz,
+ ECC_BRAINPOOLP224R1_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* HAVE_ECC_BRAINPOOL */
+#endif /* ECC224 */
+#ifdef ECC239
+ #ifndef NO_ECC_SECP
+ {
+ 30, /* size/bytes */
+ ECC_PRIME239V1, /* ID */
+ "PRIME239V1", /* curve name */
+ "7FFFFFFFFFFFFFFFFFFFFFFF7FFFFFFFFFFF8000000000007FFFFFFFFFFF", /* prime */
+ "7FFFFFFFFFFFFFFFFFFFFFFF7FFFFFFFFFFF8000000000007FFFFFFFFFFC", /* A */
+ "6B016C3BDCF18941D0D654921475CA71A9DB2FB27D1D37796185C2942C0A", /* B */
+ "7FFFFFFFFFFFFFFFFFFFFFFF7FFFFF9E5E9A9F5D9071FBD1522688909D0B", /* order */
+ "0FFA963CDCA8816CCC33B8642BEDF905C3D358573D3F27FBBD3B3CB9AAAF", /* Gx */
+ "7DEBE8E4E90A5DAE6E4054CA530BA04654B36818CE226B39FCCB7B02F1AE", /* Gy */
+ ecc_oid_prime239v1, /* oid/oidSz */
+ ecc_oid_prime239v1_sz,
+ ECC_PRIME239V1_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* !NO_ECC_SECP */
+ #ifdef HAVE_ECC_SECPR2
+ {
+ 30, /* size/bytes */
+ ECC_PRIME239V2, /* ID */
+ "PRIME239V2", /* curve name */
+ "7FFFFFFFFFFFFFFFFFFFFFFF7FFFFFFFFFFF8000000000007FFFFFFFFFFF", /* prime */
+ "7FFFFFFFFFFFFFFFFFFFFFFF7FFFFFFFFFFF8000000000007FFFFFFFFFFC", /* A */
+ "617FAB6832576CBBFED50D99F0249C3FEE58B94BA0038C7AE84C8C832F2C", /* B */
+ "7FFFFFFFFFFFFFFFFFFFFFFF800000CFA7E8594377D414C03821BC582063", /* order */
+ "38AF09D98727705120C921BB5E9E26296A3CDCF2F35757A0EAFD87B830E7", /* Gx */
+ "5B0125E4DBEA0EC7206DA0FC01D9B081329FB555DE6EF460237DFF8BE4BA", /* Gy */
+ ecc_oid_prime239v2, /* oid/oidSz */
+ ecc_oid_prime239v2_sz,
+ ECC_PRIME239V2_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* HAVE_ECC_SECPR2 */
+ #ifdef HAVE_ECC_SECPR3
+ {
+ 30, /* size/bytes */
+ ECC_PRIME239V3, /* ID */
+ "PRIME239V3", /* curve name */
+ "7FFFFFFFFFFFFFFFFFFFFFFF7FFFFFFFFFFF8000000000007FFFFFFFFFFF", /* prime */
+ "7FFFFFFFFFFFFFFFFFFFFFFF7FFFFFFFFFFF8000000000007FFFFFFFFFFC", /* A */
+ "255705FA2A306654B1F4CB03D6A750A30C250102D4988717D9BA15AB6D3E", /* B */
+ "7FFFFFFFFFFFFFFFFFFFFFFF7FFFFF975DEB41B3A6057C3C432146526551", /* order */
+ "6768AE8E18BB92CFCF005C949AA2C6D94853D0E660BBF854B1C9505FE95A", /* Gx */
+ "1607E6898F390C06BC1D552BAD226F3B6FCFE48B6E818499AF18E3ED6CF3", /* Gy */
+ ecc_oid_prime239v3, /* oid/oidSz */
+ ecc_oid_prime239v3_sz,
+ ECC_PRIME239V3_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* HAVE_ECC_SECPR3 */
+#endif /* ECC239 */
#ifdef ECC256
-{
- 32,
- "ECC-256",
- "FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF",
- "FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFC",
- "5AC635D8AA3A93E7B3EBBD55769886BC651D06B0CC53B0F63BCE3C3E27D2604B",
- "FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC632551",
- "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296",
- "4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5",
-},
-#endif
+ #ifndef NO_ECC_SECP
+ {
+ 32, /* size/bytes */
+ ECC_SECP256R1, /* ID */
+ "SECP256R1", /* curve name */
+ "FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF", /* prime */
+ "FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFC", /* A */
+ "5AC635D8AA3A93E7B3EBBD55769886BC651D06B0CC53B0F63BCE3C3E27D2604B", /* B */
+ "FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC632551", /* order */
+ "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296", /* Gx */
+ "4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5", /* Gy */
+ ecc_oid_secp256r1, /* oid/oidSz */
+ ecc_oid_secp256r1_sz,
+ ECC_SECP256R1_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* !NO_ECC_SECP */
+ #ifdef HAVE_ECC_KOBLITZ
+ {
+ 32, /* size/bytes */
+ ECC_SECP256K1, /* ID */
+ "SECP256K1", /* curve name */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F", /* prime */
+ "0000000000000000000000000000000000000000000000000000000000000000", /* A */
+ "0000000000000000000000000000000000000000000000000000000000000007", /* B */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141", /* order */
+ "79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798", /* Gx */
+ "483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8", /* Gy */
+ ecc_oid_secp256k1, /* oid/oidSz */
+ ecc_oid_secp256k1_sz,
+ ECC_SECP256K1_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* HAVE_ECC_KOBLITZ */
+ #ifdef HAVE_ECC_BRAINPOOL
+ {
+ 32, /* size/bytes */
+ ECC_BRAINPOOLP256R1, /* ID */
+ "BRAINPOOLP256R1", /* curve name */
+ "A9FB57DBA1EEA9BC3E660A909D838D726E3BF623D52620282013481D1F6E5377", /* prime */
+ "7D5A0975FC2C3057EEF67530417AFFE7FB8055C126DC5C6CE94A4B44F330B5D9", /* A */
+ "26DC5C6CE94A4B44F330B5D9BBD77CBF958416295CF7E1CE6BCCDC18FF8C07B6", /* B */
+ "A9FB57DBA1EEA9BC3E660A909D838D718C397AA3B561A6F7901E0E82974856A7", /* order */
+ "8BD2AEB9CB7E57CB2C4B482FFC81B7AFB9DE27E1E3BD23C23A4453BD9ACE3262", /* Gx */
+ "547EF835C3DAC4FD97F8461A14611DC9C27745132DED8E545C1D54C72F046997", /* Gy */
+ ecc_oid_brainpoolp256r1, /* oid/oidSz */
+ ecc_oid_brainpoolp256r1_sz,
+ ECC_BRAINPOOLP256R1_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* HAVE_ECC_BRAINPOOL */
+#endif /* ECC256 */
+#ifdef ECC320
+ #ifdef HAVE_ECC_BRAINPOOL
+ {
+ 40, /* size/bytes */
+ ECC_BRAINPOOLP320R1, /* ID */
+ "BRAINPOOLP320R1", /* curve name */
+ "D35E472036BC4FB7E13C785ED201E065F98FCFA6F6F40DEF4F92B9EC7893EC28FCD412B1F1B32E27", /* prime */
+ "3EE30B568FBAB0F883CCEBD46D3F3BB8A2A73513F5EB79DA66190EB085FFA9F492F375A97D860EB4", /* A */
+ "520883949DFDBC42D3AD198640688A6FE13F41349554B49ACC31DCCD884539816F5EB4AC8FB1F1A6", /* B */
+ "D35E472036BC4FB7E13C785ED201E065F98FCFA5B68F12A32D482EC7EE8658E98691555B44C59311", /* order */
+ "43BD7E9AFB53D8B85289BCC48EE5BFE6F20137D10A087EB6E7871E2A10A599C710AF8D0D39E20611", /* Gx */
+ "14FDD05545EC1CC8AB4093247F77275E0743FFED117182EAA9C77877AAAC6AC7D35245D1692E8EE1", /* Gy */
+ ecc_oid_brainpoolp320r1, ecc_oid_brainpoolp320r1_sz, /* oid/oidSz */
+ ECC_BRAINPOOLP320R1_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* HAVE_ECC_BRAINPOOL */
+#endif /* ECC320 */
#ifdef ECC384
-{
- 48,
- "ECC-384",
- "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFF",
- "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFC",
- "B3312FA7E23EE7E4988E056BE3F82D19181D9C6EFE8141120314088F5013875AC656398D8A2ED19D2A85C8EDD3EC2AEF",
- "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC7634D81F4372DDF581A0DB248B0A77AECEC196ACCC52973",
- "AA87CA22BE8B05378EB1C71EF320AD746E1D3B628BA79B9859F741E082542A385502F25DBF55296C3A545E3872760AB7",
- "3617DE4A96262C6F5D9E98BF9292DC29F8F41DBD289A147CE9DA3113B5F0B8C00A60B1CE1D7E819D7A431D7C90EA0E5F",
-},
-#endif
+ #ifndef NO_ECC_SECP
+ {
+ 48, /* size/bytes */
+ ECC_SECP384R1, /* ID */
+ "SECP384R1", /* curve name */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFF", /* prime */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFC", /* A */
+ "B3312FA7E23EE7E4988E056BE3F82D19181D9C6EFE8141120314088F5013875AC656398D8A2ED19D2A85C8EDD3EC2AEF", /* B */
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC7634D81F4372DDF581A0DB248B0A77AECEC196ACCC52973", /* order */
+ "AA87CA22BE8B05378EB1C71EF320AD746E1D3B628BA79B9859F741E082542A385502F25DBF55296C3A545E3872760AB7", /* Gx */
+ "3617DE4A96262C6F5D9E98BF9292DC29F8F41DBD289A147CE9DA3113B5F0B8C00A60B1CE1D7E819D7A431D7C90EA0E5F", /* Gy */
+ ecc_oid_secp384r1, ecc_oid_secp384r1_sz, /* oid/oidSz */
+ ECC_SECP384R1_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* !NO_ECC_SECP */
+ #ifdef HAVE_ECC_BRAINPOOL
+ {
+ 48, /* size/bytes */
+ ECC_BRAINPOOLP384R1, /* ID */
+ "BRAINPOOLP384R1", /* curve name */
+ "8CB91E82A3386D280F5D6F7E50E641DF152F7109ED5456B412B1DA197FB71123ACD3A729901D1A71874700133107EC53", /* prime */
+ "7BC382C63D8C150C3C72080ACE05AFA0C2BEA28E4FB22787139165EFBA91F90F8AA5814A503AD4EB04A8C7DD22CE2826", /* A */
+ "04A8C7DD22CE28268B39B55416F0447C2FB77DE107DCD2A62E880EA53EEB62D57CB4390295DBC9943AB78696FA504C11", /* B */
+ "8CB91E82A3386D280F5D6F7E50E641DF152F7109ED5456B31F166E6CAC0425A7CF3AB6AF6B7FC3103B883202E9046565", /* order */
+ "1D1C64F068CF45FFA2A63A81B7C13F6B8847A3E77EF14FE3DB7FCAFE0CBD10E8E826E03436D646AAEF87B2E247D4AF1E", /* Gx */
+ "8ABE1D7520F9C2A45CB1EB8E95CFD55262B70B29FEEC5864E19C054FF99129280E4646217791811142820341263C5315", /* Gy */
+ ecc_oid_brainpoolp384r1, ecc_oid_brainpoolp384r1_sz, /* oid/oidSz */
+ ECC_BRAINPOOLP384R1_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* HAVE_ECC_BRAINPOOL */
+#endif /* ECC384 */
+#ifdef ECC512
+ #ifdef HAVE_ECC_BRAINPOOL
+ {
+ 64, /* size/bytes */
+ ECC_BRAINPOOLP512R1, /* ID */
+ "BRAINPOOLP512R1", /* curve name */
+ "AADD9DB8DBE9C48B3FD4E6AE33C9FC07CB308DB3B3C9D20ED6639CCA703308717D4D9B009BC66842AECDA12AE6A380E62881FF2F2D82C68528AA6056583A48F3", /* prime */
+ "7830A3318B603B89E2327145AC234CC594CBDD8D3DF91610A83441CAEA9863BC2DED5D5AA8253AA10A2EF1C98B9AC8B57F1117A72BF2C7B9E7C1AC4D77FC94CA", /* A */
+ "3DF91610A83441CAEA9863BC2DED5D5AA8253AA10A2EF1C98B9AC8B57F1117A72BF2C7B9E7C1AC4D77FC94CADC083E67984050B75EBAE5DD2809BD638016F723", /* B */
+ "AADD9DB8DBE9C48B3FD4E6AE33C9FC07CB308DB3B3C9D20ED6639CCA70330870553E5C414CA92619418661197FAC10471DB1D381085DDADDB58796829CA90069", /* order */
+ "81AEE4BDD82ED9645A21322E9C4C6A9385ED9F70B5D916C1B43B62EEF4D0098EFF3B1F78E2D0D48D50D1687B93B97D5F7C6D5047406A5E688B352209BCB9F822", /* Gx */
+ "7DDE385D566332ECC0EABFA9CF7822FDF209F70024A57B1AA000C55B881F8111B2DCDE494A5F485E5BCA4BD88A2763AED1CA2B2FA8F0540678CD1E0F3AD80892", /* Gy */
+ ecc_oid_brainpoolp512r1, ecc_oid_brainpoolp512r1_sz, /* oid/oidSz */
+ ECC_BRAINPOOLP512R1_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* HAVE_ECC_BRAINPOOL */
+#endif /* ECC512 */
#ifdef ECC521
-{
- 66,
- "ECC-521",
- "1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF",
- "1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC",
- "51953EB9618E1C9A1F929A21A0B68540EEA2DA725B99B315F3B8B489918EF109E156193951EC7E937B1652C0BD3BB1BF073573DF883D2C34F1EF451FD46B503F00",
- "1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFA51868783BF2F966B7FCC0148F709A5D03BB5C9B8899C47AEBB6FB71E91386409",
- "C6858E06B70404E9CD9E3ECB662395B4429C648139053FB521F828AF606B4D3DBAA14B5E77EFE75928FE1DC127A2FFA8DE3348B3C1856A429BF97E7E31C2E5BD66",
- "11839296A789A3BC0045C8A5FB42C7D1BD998F54449579B446817AFBD17273E662C97EE72995EF42640C550B9013FAD0761353C7086A272C24088BE94769FD16650",
-},
+ #ifndef NO_ECC_SECP
+ {
+ 66, /* size/bytes */
+ ECC_SECP521R1, /* ID */
+ "SECP521R1", /* curve name */
+ "1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", /* prime */
+ "1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC", /* A */
+ "51953EB9618E1C9A1F929A21A0B68540EEA2DA725B99B315F3B8B489918EF109E156193951EC7E937B1652C0BD3BB1BF073573DF883D2C34F1EF451FD46B503F00", /* B */
+ "1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFA51868783BF2F966B7FCC0148F709A5D03BB5C9B8899C47AEBB6FB71E91386409", /* order */
+ "C6858E06B70404E9CD9E3ECB662395B4429C648139053FB521F828AF606B4D3DBAA14B5E77EFE75928FE1DC127A2FFA8DE3348B3C1856A429BF97E7E31C2E5BD66", /* Gx */
+ "11839296A789A3BC0045C8A5FB42C7D1BD998F54449579B446817AFBD17273E662C97EE72995EF42640C550B9013FAD0761353C7086A272C24088BE94769FD16650", /* Gy */
+ ecc_oid_secp521r1, ecc_oid_secp521r1_sz, /* oid/oidSz */
+ ECC_SECP521R1_OID, /* oid sum */
+ 1, /* cofactor */
+ },
+ #endif /* !NO_ECC_SECP */
+#endif /* ECC521 */
+#if defined(WOLFSSL_CUSTOM_CURVES) && defined(ECC_CACHE_CURVE)
+ /* place holder for custom curve index for cache */
+ {
+ 1, /* non-zero */
+ ECC_CURVE_CUSTOM,
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ #else
+ {0},{0},{0},{0},{0},{0},{0},{0},
+ #endif
+ 0, 0, 0
+ },
#endif
-{
- 0,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL
-}
+ {
+ 0,
+ ECC_CURVE_INVALID,
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ #else
+ {0},{0},{0},{0},{0},{0},{0},{0},
+ #endif
+ 0, 0, 0
+ }
};
+#define ECC_SET_COUNT (sizeof(ecc_sets)/sizeof(ecc_set_type))
+const size_t ecc_sets_count = ECC_SET_COUNT - 1;
-ecc_point* ecc_new_point(void);
-void ecc_del_point(ecc_point* p);
-int ecc_map(ecc_point*, mp_int*, mp_digit*);
-int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R,
- mp_int* modulus, mp_digit* mp);
-int ecc_projective_dbl_point(ecc_point* P, ecc_point* R, mp_int* modulus,
- mp_digit* mp);
-static int ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R, mp_int* modulus,
- int map);
-static int ecc_check_pubkey_order(ecc_key* key, mp_int* prime, mp_int* order);
-#ifdef ECC_SHAMIR
-static int ecc_mul2add(ecc_point* A, mp_int* kA, ecc_point* B, mp_int* kB,
- ecc_point* C, mp_int* modulus);
+#ifdef HAVE_OID_ENCODING
+ /* encoded OID cache */
+ typedef struct {
+ word32 oidSz;
+ byte oid[ECC_MAX_OID_LEN];
+ } oid_cache_t;
+ static oid_cache_t ecc_oid_cache[ECC_SET_COUNT];
#endif
-int mp_jacobi(mp_int* a, mp_int* p, int* c);
-int mp_sqrtmod_prime(mp_int* n, mp_int* prime, mp_int* ret);
-int mp_submod(mp_int* a, mp_int* b, mp_int* c, mp_int* d);
#ifdef HAVE_COMP_KEY
static int wc_ecc_export_x963_compressed(ecc_key*, byte* out, word32* outLen);
#endif
-/* helper for either lib */
-static int get_digit_count(mp_int* a)
+
+#if (defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || !defined(WOLFSSL_SP_MATH)) && \
+ !defined(WOLFSSL_ATECC508A)
+static int ecc_check_pubkey_order(ecc_key* key, ecc_point* pubkey, mp_int* a,
+ mp_int* prime, mp_int* order);
+#endif
+
+int mp_jacobi(mp_int* a, mp_int* n, int* c);
+int mp_sqrtmod_prime(mp_int* n, mp_int* prime, mp_int* ret);
+
+
+/* Curve Specs */
+typedef struct ecc_curve_spec {
+ const ecc_set_type* dp;
+
+ mp_int* prime;
+ mp_int* Af;
+ #ifdef USE_ECC_B_PARAM
+ mp_int* Bf;
+ #endif
+ mp_int* order;
+ mp_int* Gx;
+ mp_int* Gy;
+
+#ifdef ECC_CACHE_CURVE
+ mp_int prime_lcl;
+ mp_int Af_lcl;
+ #ifdef USE_ECC_B_PARAM
+ mp_int Bf_lcl;
+ #endif
+ mp_int order_lcl;
+ mp_int Gx_lcl;
+ mp_int Gy_lcl;
+#else
+ mp_int* spec_ints;
+ word32 spec_count;
+ word32 spec_use;
+#endif
+
+ byte load_mask;
+} ecc_curve_spec;
+
+enum ecc_curve_load_mask {
+ ECC_CURVE_FIELD_NONE = 0x00,
+ ECC_CURVE_FIELD_PRIME = 0x01,
+ ECC_CURVE_FIELD_AF = 0x02,
+#ifdef USE_ECC_B_PARAM
+ ECC_CURVE_FIELD_BF = 0x04,
+#endif
+ ECC_CURVE_FIELD_ORDER = 0x08,
+ ECC_CURVE_FIELD_GX = 0x10,
+ ECC_CURVE_FIELD_GY = 0x20,
+#ifdef USE_ECC_B_PARAM
+ ECC_CURVE_FIELD_ALL = 0x3F,
+ ECC_CURVE_FIELD_COUNT = 6,
+#else
+ ECC_CURVE_FIELD_ALL = 0x3B,
+ ECC_CURVE_FIELD_COUNT = 5,
+#endif
+};
+
+#ifdef ECC_CACHE_CURVE
+ /* cache (mp_int) of the curve parameters */
+ static ecc_curve_spec* ecc_curve_spec_cache[ECC_SET_COUNT];
+ #ifndef SINGLE_THREADED
+ static wolfSSL_Mutex ecc_curve_cache_mutex;
+ #endif
+
+ #define DECLARE_CURVE_SPECS(curve, intcount) ecc_curve_spec* curve = NULL
+ #define ALLOC_CURVE_SPECS(intcount)
+ #define FREE_CURVE_SPECS()
+#elif defined(WOLFSSL_SMALL_STACK)
+ #define DECLARE_CURVE_SPECS(curve, intcount) \
+ mp_int* spec_ints = NULL; \
+ ecc_curve_spec curve_lcl; \
+ ecc_curve_spec* curve = &curve_lcl; \
+ XMEMSET(curve, 0, sizeof(ecc_curve_spec)); \
+ curve->spec_count = intcount
+
+ #define ALLOC_CURVE_SPECS(intcount) \
+ spec_ints = (mp_int*)XMALLOC(sizeof(mp_int) * (intcount), NULL, \
+ DYNAMIC_TYPE_ECC); \
+ if (spec_ints == NULL) \
+ return MEMORY_E; \
+ curve->spec_ints = spec_ints
+ #define FREE_CURVE_SPECS() \
+ XFREE(spec_ints, NULL, DYNAMIC_TYPE_ECC)
+#else
+ #define DECLARE_CURVE_SPECS(curve, intcount) \
+ mp_int spec_ints[(intcount)]; \
+ ecc_curve_spec curve_lcl; \
+ ecc_curve_spec* curve = &curve_lcl; \
+ XMEMSET(curve, 0, sizeof(ecc_curve_spec)); \
+ curve->spec_ints = spec_ints; \
+ curve->spec_count = intcount
+ #define ALLOC_CURVE_SPECS(intcount)
+ #define FREE_CURVE_SPECS()
+#endif /* ECC_CACHE_CURVE */
+
+static void _wc_ecc_curve_free(ecc_curve_spec* curve)
{
- if (a == NULL)
- return 0;
+ if (curve == NULL) {
+ return;
+ }
- return a->used;
+ if (curve->load_mask & ECC_CURVE_FIELD_PRIME)
+ mp_clear(curve->prime);
+ if (curve->load_mask & ECC_CURVE_FIELD_AF)
+ mp_clear(curve->Af);
+#ifdef USE_ECC_B_PARAM
+ if (curve->load_mask & ECC_CURVE_FIELD_BF)
+ mp_clear(curve->Bf);
+#endif
+ if (curve->load_mask & ECC_CURVE_FIELD_ORDER)
+ mp_clear(curve->order);
+ if (curve->load_mask & ECC_CURVE_FIELD_GX)
+ mp_clear(curve->Gx);
+ if (curve->load_mask & ECC_CURVE_FIELD_GY)
+ mp_clear(curve->Gy);
+
+ curve->load_mask = 0;
}
-/* helper for either lib */
-static mp_digit get_digit(mp_int* a, int n)
+static void wc_ecc_curve_free(ecc_curve_spec* curve)
{
- if (a == NULL)
- return 0;
-
- return (n >= a->used || n < 0) ? 0 : a->dp[n];
+ /* don't free cached curves */
+#ifndef ECC_CACHE_CURVE
+ _wc_ecc_curve_free(curve);
+#endif
+ (void)curve;
}
+static int wc_ecc_curve_load_item(const char* src, mp_int** dst,
+ ecc_curve_spec* curve, byte mask)
+{
+ int err;
-#if defined(USE_FAST_MATH)
+#ifndef ECC_CACHE_CURVE
+ /* get mp_int from temp */
+ if (curve->spec_use >= curve->spec_count) {
+ WOLFSSL_MSG("Invalid DECLARE_CURVE_SPECS count");
+ return ECC_BAD_ARG_E;
+ }
+ *dst = &curve->spec_ints[curve->spec_use++];
+#endif
-/* fast math accelerated version, but not for fp ecc yet */
+ err = mp_init(*dst);
+ if (err == MP_OKAY) {
+ curve->load_mask |= mask;
-/**
- Add two ECC points
- P The point to add
- Q The point to add
- R [out] The destination of the double
- modulus The modulus of the field the ECC curve is in
- mp The "b" value from montgomery_setup()
- return MP_OKAY on success
-*/
-int ecc_projective_add_point(ecc_point *P, ecc_point *Q, ecc_point *R,
- mp_int* modulus, mp_digit* mp)
-{
- fp_int t1, t2, x, y, z;
- int err;
+ err = mp_read_radix(*dst, src, MP_RADIX_HEX);
- if (P == NULL || Q == NULL || R == NULL || modulus == NULL || mp == NULL)
- return ECC_BAD_ARG_E;
+ #ifdef HAVE_WOLF_BIGINT
+ if (err == MP_OKAY)
+ err = wc_mp_to_bigint(*dst, &(*dst)->raw);
+ #endif
+ }
+ return err;
+}
- if ((err = mp_init_multi(&t1, &t2, &x, &y, &z, NULL)) != MP_OKAY) {
- return err;
- }
+static int wc_ecc_curve_load(const ecc_set_type* dp, ecc_curve_spec** pCurve,
+ byte load_mask)
+{
+ int ret = 0, x;
+ ecc_curve_spec* curve;
+ byte load_items = 0; /* mask of items to load */
- /* should we dbl instead? */
- fp_sub(modulus, Q->y, &t1);
- if ( (fp_cmp(P->x, Q->x) == FP_EQ) &&
- (get_digit_count(Q->z) && fp_cmp(P->z, Q->z) == FP_EQ) &&
- (fp_cmp(P->y, Q->y) == FP_EQ || fp_cmp(P->y, &t1) == FP_EQ)) {
- return ecc_projective_dbl_point(P, R, modulus, mp);
- }
+ if (dp == NULL || pCurve == NULL)
+ return BAD_FUNC_ARG;
- fp_copy(P->x, &x);
- fp_copy(P->y, &y);
- fp_copy(P->z, &z);
+#ifdef ECC_CACHE_CURVE
+ x = wc_ecc_get_curve_idx(dp->id);
+ if (x == ECC_CURVE_INVALID)
+ return ECC_BAD_ARG_E;
- /* if Z is one then these are no-operations */
- if (get_digit_count(Q->z)) {
- /* T1 = Z' * Z' */
- fp_sqr(Q->z, &t1);
- fp_montgomery_reduce(&t1, modulus, *mp);
- /* X = X * T1 */
- fp_mul(&t1, &x, &x);
- fp_montgomery_reduce(&x, modulus, *mp);
- /* T1 = Z' * T1 */
- fp_mul(Q->z, &t1, &t1);
- fp_montgomery_reduce(&t1, modulus, *mp);
- /* Y = Y * T1 */
- fp_mul(&t1, &y, &y);
- fp_montgomery_reduce(&y, modulus, *mp);
- }
+#if !defined(SINGLE_THREADED)
+ ret = wc_LockMutex(&ecc_curve_cache_mutex);
+ if (ret != 0) {
+ return ret;
+ }
+#endif
- /* T1 = Z*Z */
- fp_sqr(&z, &t1);
- fp_montgomery_reduce(&t1, modulus, *mp);
- /* T2 = X' * T1 */
- fp_mul(Q->x, &t1, &t2);
- fp_montgomery_reduce(&t2, modulus, *mp);
- /* T1 = Z * T1 */
- fp_mul(&z, &t1, &t1);
- fp_montgomery_reduce(&t1, modulus, *mp);
- /* T1 = Y' * T1 */
- fp_mul(Q->y, &t1, &t1);
- fp_montgomery_reduce(&t1, modulus, *mp);
+ /* make sure cache has been allocated */
+ if (ecc_curve_spec_cache[x] == NULL) {
+ ecc_curve_spec_cache[x] = (ecc_curve_spec*)XMALLOC(
+ sizeof(ecc_curve_spec), NULL, DYNAMIC_TYPE_ECC);
+ if (ecc_curve_spec_cache[x] == NULL) {
+ #if defined(ECC_CACHE_CURVE) && !defined(SINGLE_THREADED)
+ wc_UnLockMutex(&ecc_curve_cache_mutex);
+ #endif
+ return MEMORY_E;
+ }
+ XMEMSET(ecc_curve_spec_cache[x], 0, sizeof(ecc_curve_spec));
+ }
- /* Y = Y - T1 */
- fp_sub(&y, &t1, &y);
- if (fp_cmp_d(&y, 0) == FP_LT) {
- fp_add(&y, modulus, &y);
- }
- /* T1 = 2T1 */
- fp_add(&t1, &t1, &t1);
- if (fp_cmp(&t1, modulus) != FP_LT) {
- fp_sub(&t1, modulus, &t1);
- }
- /* T1 = Y + T1 */
- fp_add(&t1, &y, &t1);
- if (fp_cmp(&t1, modulus) != FP_LT) {
- fp_sub(&t1, modulus, &t1);
- }
- /* X = X - T2 */
- fp_sub(&x, &t2, &x);
- if (fp_cmp_d(&x, 0) == FP_LT) {
- fp_add(&x, modulus, &x);
- }
- /* T2 = 2T2 */
- fp_add(&t2, &t2, &t2);
- if (fp_cmp(&t2, modulus) != FP_LT) {
- fp_sub(&t2, modulus, &t2);
- }
- /* T2 = X + T2 */
- fp_add(&t2, &x, &t2);
- if (fp_cmp(&t2, modulus) != FP_LT) {
- fp_sub(&t2, modulus, &t2);
- }
+ /* set curve pointer to cache */
+ *pCurve = ecc_curve_spec_cache[x];
+
+#endif /* ECC_CACHE_CURVE */
+ curve = *pCurve;
+
+ /* make sure the curve is initialized */
+ if (curve->dp != dp) {
+ curve->load_mask = 0;
+
+ #ifdef ECC_CACHE_CURVE
+ curve->prime = &curve->prime_lcl;
+ curve->Af = &curve->Af_lcl;
+ #ifdef USE_ECC_B_PARAM
+ curve->Bf = &curve->Bf_lcl;
+ #endif
+ curve->order = &curve->order_lcl;
+ curve->Gx = &curve->Gx_lcl;
+ curve->Gy = &curve->Gy_lcl;
+ #endif
+ }
+ curve->dp = dp; /* set dp info */
+
+ /* determine items to load */
+ load_items = (((byte)~(word32)curve->load_mask) & load_mask);
+ curve->load_mask |= load_items;
+
+ /* load items */
+ x = 0;
+ if (load_items & ECC_CURVE_FIELD_PRIME)
+ x += wc_ecc_curve_load_item(dp->prime, &curve->prime, curve,
+ ECC_CURVE_FIELD_PRIME);
+ if (load_items & ECC_CURVE_FIELD_AF)
+ x += wc_ecc_curve_load_item(dp->Af, &curve->Af, curve,
+ ECC_CURVE_FIELD_AF);
+#ifdef USE_ECC_B_PARAM
+ if (load_items & ECC_CURVE_FIELD_BF)
+ x += wc_ecc_curve_load_item(dp->Bf, &curve->Bf, curve,
+ ECC_CURVE_FIELD_BF);
+#endif
+ if (load_items & ECC_CURVE_FIELD_ORDER)
+ x += wc_ecc_curve_load_item(dp->order, &curve->order, curve,
+ ECC_CURVE_FIELD_ORDER);
+ if (load_items & ECC_CURVE_FIELD_GX)
+ x += wc_ecc_curve_load_item(dp->Gx, &curve->Gx, curve,
+ ECC_CURVE_FIELD_GX);
+ if (load_items & ECC_CURVE_FIELD_GY)
+ x += wc_ecc_curve_load_item(dp->Gy, &curve->Gy, curve,
+ ECC_CURVE_FIELD_GY);
+
+ /* check for error */
+ if (x != 0) {
+ wc_ecc_curve_free(curve);
+ ret = MP_READ_E;
+ }
- /* if Z' != 1 */
- if (get_digit_count(Q->z)) {
- /* Z = Z * Z' */
- fp_mul(&z, Q->z, &z);
- fp_montgomery_reduce(&z, modulus, *mp);
- }
+#if defined(ECC_CACHE_CURVE) && !defined(SINGLE_THREADED)
+ wc_UnLockMutex(&ecc_curve_cache_mutex);
+#endif
- /* Z = Z * X */
- fp_mul(&z, &x, &z);
- fp_montgomery_reduce(&z, modulus, *mp);
+ return ret;
+}
- /* T1 = T1 * X */
- fp_mul(&t1, &x, &t1);
- fp_montgomery_reduce(&t1, modulus, *mp);
- /* X = X * X */
- fp_sqr(&x, &x);
- fp_montgomery_reduce(&x, modulus, *mp);
- /* T2 = T2 * x */
- fp_mul(&t2, &x, &t2);
- fp_montgomery_reduce(&t2, modulus, *mp);
- /* T1 = T1 * X */
- fp_mul(&t1, &x, &t1);
- fp_montgomery_reduce(&t1, modulus, *mp);
-
- /* X = Y*Y */
- fp_sqr(&y, &x);
- fp_montgomery_reduce(&x, modulus, *mp);
- /* X = X - T2 */
- fp_sub(&x, &t2, &x);
- if (fp_cmp_d(&x, 0) == FP_LT) {
- fp_add(&x, modulus, &x);
- }
+#ifdef ECC_CACHE_CURVE
+int wc_ecc_curve_cache_init(void)
+{
+ int ret = 0;
+#if defined(ECC_CACHE_CURVE) && !defined(SINGLE_THREADED)
+ ret = wc_InitMutex(&ecc_curve_cache_mutex);
+#endif
+ return ret;
+}
- /* T2 = T2 - X */
- fp_sub(&t2, &x, &t2);
- if (fp_cmp_d(&t2, 0) == FP_LT) {
- fp_add(&t2, modulus, &t2);
- }
- /* T2 = T2 - X */
- fp_sub(&t2, &x, &t2);
- if (fp_cmp_d(&t2, 0) == FP_LT) {
- fp_add(&t2, modulus, &t2);
- }
- /* T2 = T2 * Y */
- fp_mul(&t2, &y, &t2);
- fp_montgomery_reduce(&t2, modulus, *mp);
- /* Y = T2 - T1 */
- fp_sub(&t2, &t1, &y);
- if (fp_cmp_d(&y, 0) == FP_LT) {
- fp_add(&y, modulus, &y);
- }
- /* Y = Y/2 */
- if (fp_isodd(&y)) {
- fp_add(&y, modulus, &y);
- }
- fp_div_2(&y, &y);
+void wc_ecc_curve_cache_free(void)
+{
+ int x;
+
+ /* free all ECC curve caches */
+ for (x = 0; x < (int)ECC_SET_COUNT; x++) {
+ if (ecc_curve_spec_cache[x]) {
+ _wc_ecc_curve_free(ecc_curve_spec_cache[x]);
+ XFREE(ecc_curve_spec_cache[x], NULL, DYNAMIC_TYPE_ECC);
+ ecc_curve_spec_cache[x] = NULL;
+ }
+ }
- fp_copy(&x, R->x);
- fp_copy(&y, R->y);
- fp_copy(&z, R->z);
-
- return MP_OKAY;
+#if defined(ECC_CACHE_CURVE) && !defined(SINGLE_THREADED)
+ wc_FreeMutex(&ecc_curve_cache_mutex);
+#endif
}
+#endif /* ECC_CACHE_CURVE */
-/**
- Double an ECC point
- P The point to double
- R [out] The destination of the double
- modulus The modulus of the field the ECC curve is in
- mp The "b" value from montgomery_setup()
- return MP_OKAY on success
-*/
-int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* modulus,
- mp_digit* mp)
+/* Retrieve the curve name for the ECC curve id.
+ *
+ * curve_id The id of the curve.
+ * returns the name stored from the curve if available, otherwise NULL.
+ */
+const char* wc_ecc_get_name(int curve_id)
{
- fp_int t1, t2;
- int err;
+ int curve_idx = wc_ecc_get_curve_idx(curve_id);
+ if (curve_idx == ECC_CURVE_INVALID)
+ return NULL;
+ return ecc_sets[curve_idx].name;
+}
- if (P == NULL || R == NULL || modulus == NULL || mp == NULL)
- return ECC_BAD_ARG_E;
+int wc_ecc_set_curve(ecc_key* key, int keysize, int curve_id)
+{
+ if (keysize <= 0 && curve_id < 0) {
+ return BAD_FUNC_ARG;
+ }
- if (P != R) {
- fp_copy(P->x, R->x);
- fp_copy(P->y, R->y);
- fp_copy(P->z, R->z);
- }
+ if (keysize > ECC_MAXSIZE) {
+ return ECC_BAD_ARG_E;
+ }
- if ((err = mp_init_multi(&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
- return err;
- }
+ /* handle custom case */
+ if (key->idx != ECC_CUSTOM_IDX) {
+ int x;
- /* t1 = Z * Z */
- fp_sqr(R->z, &t1);
- fp_montgomery_reduce(&t1, modulus, *mp);
- /* Z = Y * Z */
- fp_mul(R->z, R->y, R->z);
- fp_montgomery_reduce(R->z, modulus, *mp);
- /* Z = 2Z */
- fp_add(R->z, R->z, R->z);
- if (fp_cmp(R->z, modulus) != FP_LT) {
- fp_sub(R->z, modulus, R->z);
- }
-
- /* &t2 = X - T1 */
- fp_sub(R->x, &t1, &t2);
- if (fp_cmp_d(&t2, 0) == FP_LT) {
- fp_add(&t2, modulus, &t2);
- }
- /* T1 = X + T1 */
- fp_add(&t1, R->x, &t1);
- if (fp_cmp(&t1, modulus) != FP_LT) {
- fp_sub(&t1, modulus, &t1);
- }
- /* T2 = T1 * T2 */
- fp_mul(&t1, &t2, &t2);
- fp_montgomery_reduce(&t2, modulus, *mp);
- /* T1 = 2T2 */
- fp_add(&t2, &t2, &t1);
- if (fp_cmp(&t1, modulus) != FP_LT) {
- fp_sub(&t1, modulus, &t1);
- }
- /* T1 = T1 + T2 */
- fp_add(&t1, &t2, &t1);
- if (fp_cmp(&t1, modulus) != FP_LT) {
- fp_sub(&t1, modulus, &t1);
- }
+ /* default values */
+ key->idx = 0;
+ key->dp = NULL;
- /* Y = 2Y */
- fp_add(R->y, R->y, R->y);
- if (fp_cmp(R->y, modulus) != FP_LT) {
- fp_sub(R->y, modulus, R->y);
- }
- /* Y = Y * Y */
- fp_sqr(R->y, R->y);
- fp_montgomery_reduce(R->y, modulus, *mp);
- /* T2 = Y * Y */
- fp_sqr(R->y, &t2);
- fp_montgomery_reduce(&t2, modulus, *mp);
- /* T2 = T2/2 */
- if (fp_isodd(&t2)) {
- fp_add(&t2, modulus, &t2);
- }
- fp_div_2(&t2, &t2);
- /* Y = Y * X */
- fp_mul(R->y, R->x, R->y);
- fp_montgomery_reduce(R->y, modulus, *mp);
+ /* find ecc_set based on curve_id or key size */
+ for (x = 0; ecc_sets[x].size != 0; x++) {
+ if (curve_id > ECC_CURVE_DEF) {
+ if (curve_id == ecc_sets[x].id)
+ break;
+ }
+ else if (keysize <= ecc_sets[x].size) {
+ break;
+ }
+ }
+ if (ecc_sets[x].size == 0) {
+ WOLFSSL_MSG("ECC Curve not found");
+ return ECC_CURVE_OID_E;
+ }
- /* X = T1 * T1 */
- fp_sqr(&t1, R->x);
- fp_montgomery_reduce(R->x, modulus, *mp);
- /* X = X - Y */
- fp_sub(R->x, R->y, R->x);
- if (fp_cmp_d(R->x, 0) == FP_LT) {
- fp_add(R->x, modulus, R->x);
- }
- /* X = X - Y */
- fp_sub(R->x, R->y, R->x);
- if (fp_cmp_d(R->x, 0) == FP_LT) {
- fp_add(R->x, modulus, R->x);
- }
+ key->idx = x;
+ key->dp = &ecc_sets[x];
+ }
- /* Y = Y - X */
- fp_sub(R->y, R->x, R->y);
- if (fp_cmp_d(R->y, 0) == FP_LT) {
- fp_add(R->y, modulus, R->y);
- }
- /* Y = Y * T1 */
- fp_mul(R->y, &t1, R->y);
- fp_montgomery_reduce(R->y, modulus, *mp);
- /* Y = Y - T2 */
- fp_sub(R->y, &t2, R->y);
- if (fp_cmp_d(R->y, 0) == FP_LT) {
- fp_add(R->y, modulus, R->y);
- }
-
- return MP_OKAY;
+ return 0;
}
-#else /* USE_FAST_MATH */
+
+#ifdef ALT_ECC_SIZE
+static void alt_fp_init(mp_int* a)
+{
+ a->size = FP_SIZE_ECC;
+ mp_zero(a);
+}
+#endif /* ALT_ECC_SIZE */
+
+
+#ifndef WOLFSSL_ATECC508A
+
+#if !defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_PUBLIC_ECC_ADD_DBL)
/**
Add two ECC points
P The point to add
Q The point to add
R [out] The destination of the double
+ a ECC curve parameter a
modulus The modulus of the field the ECC curve is in
mp The "b" value from montgomery_setup()
return MP_OKAY on success
*/
int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R,
- mp_int* modulus, mp_digit* mp)
+ mp_int* a, mp_int* modulus, mp_digit mp)
{
- mp_int t1;
- mp_int t2;
- mp_int x;
- mp_int y;
- mp_int z;
- int err;
+#ifndef WOLFSSL_SP_MATH
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int* t1 = NULL;
+ mp_int* t2 = NULL;
+#ifdef ALT_ECC_SIZE
+ mp_int* rx = NULL;
+ mp_int* ry = NULL;
+ mp_int* rz = NULL;
+#endif
+#else
+ mp_int t1[1], t2[1];
+#ifdef ALT_ECC_SIZE
+ mp_int rx[1], ry[1], rz[1];
+#endif
+#endif
+ mp_int *x, *y, *z;
+ int err;
- if (P == NULL || Q == NULL || R == NULL || modulus == NULL || mp == NULL)
+ if (P == NULL || Q == NULL || R == NULL || modulus == NULL) {
return ECC_BAD_ARG_E;
+ }
+
+ /* if Q == R then swap P and Q, so we don't require a local x,y,z */
+ if (Q == R) {
+ ecc_point* tPt = P;
+ P = Q;
+ Q = tPt;
+ }
- if ((err = mp_init_multi(&t1, &t2, &x, &y, &z, NULL)) != MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ if (R->key != NULL) {
+ t1 = R->key->t1;
+ t2 = R->key->t2;
+#ifdef ALT_ECC_SIZE
+ rx = R->key->x;
+ ry = R->key->y;
+ rz = R->key->z;
+#endif
+ }
+ else
+#endif /* WOLFSSL_SMALL_STACK_CACHE */
+ {
+ t1 = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ t2 = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ if (t1 == NULL || t2 == NULL) {
+ XFREE(t1, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(t2, NULL, DYNAMIC_TYPE_ECC);
+ return MEMORY_E;
+ }
+#ifdef ALT_ECC_SIZE
+ rx = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ ry = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ rz = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ if (rx == NULL || ry == NULL || rz == NULL) {
+ XFREE(rz, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(ry, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(rx, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(t2, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(t1, NULL, DYNAMIC_TYPE_ECC);
+ return MEMORY_E;
+ }
+#endif
+ }
+#endif /* WOLFSSL_SMALL_STACK */
+
+ if ((err = mp_init_multi(t1, t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ #ifdef WOLFSSL_SMALL_STACK_CACHE
+ if (R->key == NULL)
+ #endif
+ {
+ #ifdef ALT_ECC_SIZE
+ XFREE(rz, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(ry, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(rx, NULL, DYNAMIC_TYPE_ECC);
+ #endif
+ XFREE(t2, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(t1, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
return err;
}
-
- /* should we dbl instead? */
- err = mp_sub(modulus, Q->y, &t1);
+ /* should we dbl instead? */
+ if (err == MP_OKAY)
+ err = mp_sub(modulus, Q->y, t1);
if (err == MP_OKAY) {
- if ( (mp_cmp(P->x, Q->x) == MP_EQ) &&
+ if ( (mp_cmp(P->x, Q->x) == MP_EQ) &&
(get_digit_count(Q->z) && mp_cmp(P->z, Q->z) == MP_EQ) &&
- (mp_cmp(P->y, Q->y) == MP_EQ || mp_cmp(P->y, &t1) == MP_EQ)) {
- mp_clear(&t1);
- mp_clear(&t2);
- mp_clear(&x);
- mp_clear(&y);
- mp_clear(&z);
-
- return ecc_projective_dbl_point(P, R, modulus, mp);
+ (mp_cmp(P->y, Q->y) == MP_EQ || mp_cmp(P->y, t1) == MP_EQ)) {
+ mp_clear(t1);
+ mp_clear(t2);
+ #ifdef WOLFSSL_SMALL_STACK
+ #ifdef WOLFSSL_SMALL_STACK_CACHE
+ if (R->key == NULL)
+ #endif
+ {
+ #ifdef ALT_ECC_SIZE
+ XFREE(rz, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(ry, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(rx, NULL, DYNAMIC_TYPE_ECC);
+ #endif
+ XFREE(t2, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(t1, NULL, DYNAMIC_TYPE_ECC);
+ }
+ #endif
+ return ecc_projective_dbl_point(P, R, a, modulus, mp);
}
}
+ if (err != MP_OKAY) {
+ goto done;
+ }
+
+/* If use ALT_ECC_SIZE we need to use local stack variable since
+ ecc_point x,y,z is reduced size */
+#ifdef ALT_ECC_SIZE
+ /* Use local stack variable */
+ x = rx;
+ y = ry;
+ z = rz;
+
+ if ((err = mp_init_multi(x, y, z, NULL, NULL, NULL)) != MP_OKAY) {
+ goto done;
+ }
+#else
+ /* Use destination directly */
+ x = R->x;
+ y = R->y;
+ z = R->z;
+#endif
+
if (err == MP_OKAY)
- err = mp_copy(P->x, &x);
+ err = mp_copy(P->x, x);
if (err == MP_OKAY)
- err = mp_copy(P->y, &y);
+ err = mp_copy(P->y, y);
if (err == MP_OKAY)
- err = mp_copy(P->z, &z);
+ err = mp_copy(P->z, z);
/* if Z is one then these are no-operations */
if (err == MP_OKAY) {
- if (get_digit_count(Q->z)) {
+ if (!mp_iszero(Q->z)) {
/* T1 = Z' * Z' */
- err = mp_sqr(Q->z, &t1);
+ err = mp_sqr(Q->z, t1);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(&t1, modulus, *mp);
+ err = mp_montgomery_reduce(t1, modulus, mp);
/* X = X * T1 */
if (err == MP_OKAY)
- err = mp_mul(&t1, &x, &x);
+ err = mp_mul(t1, x, x);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(&x, modulus, *mp);
+ err = mp_montgomery_reduce(x, modulus, mp);
/* T1 = Z' * T1 */
if (err == MP_OKAY)
- err = mp_mul(Q->z, &t1, &t1);
+ err = mp_mul(Q->z, t1, t1);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(&t1, modulus, *mp);
+ err = mp_montgomery_reduce(t1, modulus, mp);
/* Y = Y * T1 */
if (err == MP_OKAY)
- err = mp_mul(&t1, &y, &y);
+ err = mp_mul(t1, y, y);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(&y, modulus, *mp);
+ err = mp_montgomery_reduce(y, modulus, mp);
}
}
/* T1 = Z*Z */
if (err == MP_OKAY)
- err = mp_sqr(&z, &t1);
+ err = mp_sqr(z, t1);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(&t1, modulus, *mp);
+ err = mp_montgomery_reduce(t1, modulus, mp);
/* T2 = X' * T1 */
if (err == MP_OKAY)
- err = mp_mul(Q->x, &t1, &t2);
+ err = mp_mul(Q->x, t1, t2);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(&t2, modulus, *mp);
+ err = mp_montgomery_reduce(t2, modulus, mp);
/* T1 = Z * T1 */
if (err == MP_OKAY)
- err = mp_mul(&z, &t1, &t1);
+ err = mp_mul(z, t1, t1);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(&t1, modulus, *mp);
+ err = mp_montgomery_reduce(t1, modulus, mp);
/* T1 = Y' * T1 */
if (err == MP_OKAY)
- err = mp_mul(Q->y, &t1, &t1);
+ err = mp_mul(Q->y, t1, t1);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(&t1, modulus, *mp);
+ err = mp_montgomery_reduce(t1, modulus, mp);
/* Y = Y - T1 */
if (err == MP_OKAY)
- err = mp_sub(&y, &t1, &y);
+ err = mp_sub(y, t1, y);
if (err == MP_OKAY) {
- if (mp_cmp_d(&y, 0) == MP_LT)
- err = mp_add(&y, modulus, &y);
+ if (mp_isneg(y))
+ err = mp_add(y, modulus, y);
}
/* T1 = 2T1 */
if (err == MP_OKAY)
- err = mp_add(&t1, &t1, &t1);
+ err = mp_add(t1, t1, t1);
if (err == MP_OKAY) {
- if (mp_cmp(&t1, modulus) != MP_LT)
- err = mp_sub(&t1, modulus, &t1);
+ if (mp_cmp(t1, modulus) != MP_LT)
+ err = mp_sub(t1, modulus, t1);
}
/* T1 = Y + T1 */
if (err == MP_OKAY)
- err = mp_add(&t1, &y, &t1);
+ err = mp_add(t1, y, t1);
if (err == MP_OKAY) {
- if (mp_cmp(&t1, modulus) != MP_LT)
- err = mp_sub(&t1, modulus, &t1);
+ if (mp_cmp(t1, modulus) != MP_LT)
+ err = mp_sub(t1, modulus, t1);
}
/* X = X - T2 */
if (err == MP_OKAY)
- err = mp_sub(&x, &t2, &x);
+ err = mp_sub(x, t2, x);
if (err == MP_OKAY) {
- if (mp_cmp_d(&x, 0) == MP_LT)
- err = mp_add(&x, modulus, &x);
+ if (mp_isneg(x))
+ err = mp_add(x, modulus, x);
}
/* T2 = 2T2 */
if (err == MP_OKAY)
- err = mp_add(&t2, &t2, &t2);
+ err = mp_add(t2, t2, t2);
if (err == MP_OKAY) {
- if (mp_cmp(&t2, modulus) != MP_LT)
- err = mp_sub(&t2, modulus, &t2);
+ if (mp_cmp(t2, modulus) != MP_LT)
+ err = mp_sub(t2, modulus, t2);
}
/* T2 = X + T2 */
if (err == MP_OKAY)
- err = mp_add(&t2, &x, &t2);
+ err = mp_add(t2, x, t2);
if (err == MP_OKAY) {
- if (mp_cmp(&t2, modulus) != MP_LT)
- err = mp_sub(&t2, modulus, &t2);
+ if (mp_cmp(t2, modulus) != MP_LT)
+ err = mp_sub(t2, modulus, t2);
}
if (err == MP_OKAY) {
- if (get_digit_count(Q->z)) {
+ if (!mp_iszero(Q->z)) {
/* Z = Z * Z' */
- err = mp_mul(&z, Q->z, &z);
+ err = mp_mul(z, Q->z, z);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(&z, modulus, *mp);
+ err = mp_montgomery_reduce(z, modulus, mp);
}
}
/* Z = Z * X */
if (err == MP_OKAY)
- err = mp_mul(&z, &x, &z);
+ err = mp_mul(z, x, z);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(&z, modulus, *mp);
+ err = mp_montgomery_reduce(z, modulus, mp);
/* T1 = T1 * X */
if (err == MP_OKAY)
- err = mp_mul(&t1, &x, &t1);
+ err = mp_mul(t1, x, t1);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(&t1, modulus, *mp);
+ err = mp_montgomery_reduce(t1, modulus, mp);
/* X = X * X */
if (err == MP_OKAY)
- err = mp_sqr(&x, &x);
+ err = mp_sqr(x, x);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(&x, modulus, *mp);
-
+ err = mp_montgomery_reduce(x, modulus, mp);
+
/* T2 = T2 * x */
if (err == MP_OKAY)
- err = mp_mul(&t2, &x, &t2);
+ err = mp_mul(t2, x, t2);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(&t2, modulus, *mp);
+ err = mp_montgomery_reduce(t2, modulus, mp);
/* T1 = T1 * X */
if (err == MP_OKAY)
- err = mp_mul(&t1, &x, &t1);
+ err = mp_mul(t1, x, t1);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(&t1, modulus, *mp);
-
+ err = mp_montgomery_reduce(t1, modulus, mp);
+
/* X = Y*Y */
if (err == MP_OKAY)
- err = mp_sqr(&y, &x);
+ err = mp_sqr(y, x);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(&x, modulus, *mp);
+ err = mp_montgomery_reduce(x, modulus, mp);
/* X = X - T2 */
if (err == MP_OKAY)
- err = mp_sub(&x, &t2, &x);
+ err = mp_sub(x, t2, x);
if (err == MP_OKAY) {
- if (mp_cmp_d(&x, 0) == MP_LT)
- err = mp_add(&x, modulus, &x);
+ if (mp_isneg(x))
+ err = mp_add(x, modulus, x);
}
/* T2 = T2 - X */
if (err == MP_OKAY)
- err = mp_sub(&t2, &x, &t2);
+ err = mp_sub(t2, x, t2);
if (err == MP_OKAY) {
- if (mp_cmp_d(&t2, 0) == MP_LT)
- err = mp_add(&t2, modulus, &t2);
- }
+ if (mp_isneg(t2))
+ err = mp_add(t2, modulus, t2);
+ }
/* T2 = T2 - X */
if (err == MP_OKAY)
- err = mp_sub(&t2, &x, &t2);
+ err = mp_sub(t2, x, t2);
if (err == MP_OKAY) {
- if (mp_cmp_d(&t2, 0) == MP_LT)
- err = mp_add(&t2, modulus, &t2);
+ if (mp_isneg(t2))
+ err = mp_add(t2, modulus, t2);
}
/* T2 = T2 * Y */
if (err == MP_OKAY)
- err = mp_mul(&t2, &y, &t2);
+ err = mp_mul(t2, y, t2);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(&t2, modulus, *mp);
+ err = mp_montgomery_reduce(t2, modulus, mp);
/* Y = T2 - T1 */
if (err == MP_OKAY)
- err = mp_sub(&t2, &t1, &y);
+ err = mp_sub(t2, t1, y);
if (err == MP_OKAY) {
- if (mp_cmp_d(&y, 0) == MP_LT)
- err = mp_add(&y, modulus, &y);
+ if (mp_isneg(y))
+ err = mp_add(y, modulus, y);
}
/* Y = Y/2 */
if (err == MP_OKAY) {
- if (mp_isodd(&y))
- err = mp_add(&y, modulus, &y);
+ if (mp_isodd(y) == MP_YES)
+ err = mp_add(y, modulus, y);
}
if (err == MP_OKAY)
- err = mp_div_2(&y, &y);
+ err = mp_div_2(y, y);
+#ifdef ALT_ECC_SIZE
if (err == MP_OKAY)
- err = mp_copy(&x, R->x);
+ err = mp_copy(x, R->x);
if (err == MP_OKAY)
- err = mp_copy(&y, R->y);
+ err = mp_copy(y, R->y);
if (err == MP_OKAY)
- err = mp_copy(&z, R->z);
+ err = mp_copy(z, R->z);
+#endif
+
+done:
/* clean up */
- mp_clear(&t1);
- mp_clear(&t2);
- mp_clear(&x);
- mp_clear(&y);
- mp_clear(&z);
+ mp_clear(t1);
+ mp_clear(t2);
+#ifdef WOLFSSL_SMALL_STACK
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ if (R->key == NULL)
+#endif
+ {
+ #ifdef ALT_ECC_SIZE
+ XFREE(rz, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(ry, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(rx, NULL, DYNAMIC_TYPE_ECC);
+ #endif
+ XFREE(t2, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(t1, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
return err;
+#else
+ if (P == NULL || Q == NULL || R == NULL || modulus == NULL) {
+ return ECC_BAD_ARG_E;
+ }
+
+ (void)a;
+ (void)mp;
+
+#ifndef WOLFSSL_SP_NO_256
+ if (mp_count_bits(modulus) == 256) {
+ return sp_ecc_proj_add_point_256(P->x, P->y, P->z, Q->x, Q->y, Q->z,
+ R->x, R->y, R->z);
+ }
+#endif
+#ifdef WOLFSSL_SP_384
+ if (mp_count_bits(modulus) == 384) {
+ return sp_ecc_proj_add_point_384(P->x, P->y, P->z, Q->x, Q->y, Q->z,
+ R->x, R->y, R->z);
+ }
+#endif
+ return ECC_BAD_ARG_E;
+#endif
}
+/* ### Point doubling in Jacobian coordinate system ###
+ *
+ * let us have a curve: y^2 = x^3 + a*x + b
+ * in Jacobian coordinates it becomes: y^2 = x^3 + a*x*z^4 + b*z^6
+ *
+ * The doubling of P = (Xp, Yp, Zp) is given by R = (Xr, Yr, Zr) where:
+ * Xr = M^2 - 2*S
+ * Yr = M * (S - Xr) - 8*T
+ * Zr = 2 * Yp * Zp
+ *
+ * M = 3 * Xp^2 + a*Zp^4
+ * T = Yp^4
+ * S = 4 * Xp * Yp^2
+ *
+ * SPECIAL CASE: when a == 3 we can compute M as
+ * M = 3 * (Xp^2 - Zp^4) = 3 * (Xp + Zp^2) * (Xp - Zp^2)
+ */
/**
Double an ECC point
P The point to double
R [out] The destination of the double
+ a ECC curve parameter a
modulus The modulus of the field the ECC curve is in
mp The "b" value from montgomery_setup()
return MP_OKAY on success
*/
-int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* modulus,
- mp_digit* mp)
+int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* a,
+ mp_int* modulus, mp_digit mp)
{
- mp_int t1;
- mp_int t2;
+#ifndef WOLFSSL_SP_MATH
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int* t1 = NULL;
+ mp_int* t2 = NULL;
+#ifdef ALT_ECC_SIZE
+ mp_int* rx = NULL;
+ mp_int* ry = NULL;
+ mp_int* rz = NULL;
+#endif
+#else
+ mp_int t1[1], t2[1];
+#ifdef ALT_ECC_SIZE
+ mp_int rx[1], ry[1], rz[1];
+#endif
+#endif
+ mp_int *x, *y, *z;
int err;
- if (P == NULL || R == NULL || modulus == NULL || mp == NULL)
+ if (P == NULL || R == NULL || modulus == NULL)
return ECC_BAD_ARG_E;
- if ((err = mp_init_multi(&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ if (R->key != NULL) {
+ t1 = R->key->t1;
+ t2 = R->key->t2;
+ #ifdef ALT_ECC_SIZE
+ rx = R->key->x;
+ ry = R->key->y;
+ rz = R->key->z;
+ #endif
+ }
+ else
+#endif /* WOLFSSL_SMALL_STACK_CACHE */
+ {
+ t1 = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ t2 = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ if (t1 == NULL || t2 == NULL) {
+ XFREE(t2, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(t1, NULL, DYNAMIC_TYPE_ECC);
+ return MEMORY_E;
+ }
+ #ifdef ALT_ECC_SIZE
+ rx = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ ry = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ rz = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ if (rx == NULL || ry == NULL || rz == NULL) {
+ XFREE(rz, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(ry, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(rx, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(t2, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(t1, NULL, DYNAMIC_TYPE_ECC);
+ return MEMORY_E;
+ }
+ #endif
+ }
+#endif
+
+ if ((err = mp_init_multi(t1, t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ if (R->key == NULL)
+#endif
+ {
+ #ifdef ALT_ECC_SIZE
+ XFREE(rz, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(ry, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(rx, NULL, DYNAMIC_TYPE_ECC);
+ #endif
+ XFREE(t2, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(t1, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
return err;
}
- if (P != R) {
- err = mp_copy(P->x, R->x);
- if (err == MP_OKAY)
- err = mp_copy(P->y, R->y);
- if (err == MP_OKAY)
- err = mp_copy(P->z, R->z);
+/* If use ALT_ECC_SIZE we need to use local stack variable since
+ ecc_point x,y,z is reduced size */
+#ifdef ALT_ECC_SIZE
+ /* Use local stack variable */
+ x = rx;
+ y = ry;
+ z = rz;
+
+ if ((err = mp_init_multi(x, y, z, NULL, NULL, NULL)) != MP_OKAY) {
+ mp_clear(t1);
+ mp_clear(t2);
+ #ifdef WOLFSSL_SMALL_STACK
+ #ifdef WOLFSSL_SMALL_STACK_CACHE
+ if (R->key == NULL)
+ #endif
+ {
+ #ifdef ALT_ECC_SIZE
+ XFREE(rz, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(ry, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(rx, NULL, DYNAMIC_TYPE_ECC);
+ #endif
+ XFREE(t2, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(t1, NULL, DYNAMIC_TYPE_ECC);
+ }
+ #endif
+ return err;
}
+#else
+ /* Use destination directly */
+ x = R->x;
+ y = R->y;
+ z = R->z;
+#endif
- /* t1 = Z * Z */
if (err == MP_OKAY)
- err = mp_sqr(R->z, &t1);
+ err = mp_copy(P->x, x);
+ if (err == MP_OKAY)
+ err = mp_copy(P->y, y);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(&t1, modulus, *mp);
+ err = mp_copy(P->z, z);
+
+ /* T1 = Z * Z */
+ if (err == MP_OKAY)
+ err = mp_sqr(z, t1);
+ if (err == MP_OKAY)
+ err = mp_montgomery_reduce(t1, modulus, mp);
/* Z = Y * Z */
if (err == MP_OKAY)
- err = mp_mul(R->z, R->y, R->z);
+ err = mp_mul(z, y, z);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(R->z, modulus, *mp);
+ err = mp_montgomery_reduce(z, modulus, mp);
/* Z = 2Z */
if (err == MP_OKAY)
- err = mp_add(R->z, R->z, R->z);
+ err = mp_add(z, z, z);
if (err == MP_OKAY) {
- if (mp_cmp(R->z, modulus) != MP_LT)
- err = mp_sub(R->z, modulus, R->z);
+ if (mp_cmp(z, modulus) != MP_LT)
+ err = mp_sub(z, modulus, z);
}
- /* T2 = X - T1 */
- if (err == MP_OKAY)
- err = mp_sub(R->x, &t1, &t2);
- if (err == MP_OKAY) {
- if (mp_cmp_d(&t2, 0) == MP_LT)
- err = mp_add(&t2, modulus, &t2);
- }
- /* T1 = X + T1 */
- if (err == MP_OKAY)
- err = mp_add(&t1, R->x, &t1);
+ /* Determine if curve "a" should be used in calc */
+#ifdef WOLFSSL_CUSTOM_CURVES
if (err == MP_OKAY) {
- if (mp_cmp(&t1, modulus) != MP_LT)
- err = mp_sub(&t1, modulus, &t1);
+ /* Use a and prime to determine if a == 3 */
+ err = mp_submod(modulus, a, modulus, t2);
}
- /* T2 = T1 * T2 */
- if (err == MP_OKAY)
- err = mp_mul(&t1, &t2, &t2);
- if (err == MP_OKAY)
- err = mp_montgomery_reduce(&t2, modulus, *mp);
+ if (err == MP_OKAY && mp_cmp_d(t2, 3) != MP_EQ) {
+ /* use "a" in calc */
- /* T1 = 2T2 */
- if (err == MP_OKAY)
- err = mp_add(&t2, &t2, &t1);
- if (err == MP_OKAY) {
- if (mp_cmp(&t1, modulus) != MP_LT)
- err = mp_sub(&t1, modulus, &t1);
+ /* T2 = T1 * T1 */
+ if (err == MP_OKAY)
+ err = mp_sqr(t1, t2);
+ if (err == MP_OKAY)
+ err = mp_montgomery_reduce(t2, modulus, mp);
+ /* T1 = T2 * a */
+ if (err == MP_OKAY)
+ err = mp_mulmod(t2, a, modulus, t1);
+ /* T2 = X * X */
+ if (err == MP_OKAY)
+ err = mp_sqr(x, t2);
+ if (err == MP_OKAY)
+ err = mp_montgomery_reduce(t2, modulus, mp);
+ /* T1 = T2 + T1 */
+ if (err == MP_OKAY)
+ err = mp_add(t1, t2, t1);
+ if (err == MP_OKAY) {
+ if (mp_cmp(t1, modulus) != MP_LT)
+ err = mp_sub(t1, modulus, t1);
+ }
+ /* T1 = T2 + T1 */
+ if (err == MP_OKAY)
+ err = mp_add(t1, t2, t1);
+ if (err == MP_OKAY) {
+ if (mp_cmp(t1, modulus) != MP_LT)
+ err = mp_sub(t1, modulus, t1);
+ }
+ /* T1 = T2 + T1 */
+ if (err == MP_OKAY)
+ err = mp_add(t1, t2, t1);
+ if (err == MP_OKAY) {
+ if (mp_cmp(t1, modulus) != MP_LT)
+ err = mp_sub(t1, modulus, t1);
+ }
}
- /* T1 = T1 + T2 */
- if (err == MP_OKAY)
- err = mp_add(&t1, &t2, &t1);
- if (err == MP_OKAY) {
- if (mp_cmp(&t1, modulus) != MP_LT)
- err = mp_sub(&t1, modulus, &t1);
+ else
+#endif /* WOLFSSL_CUSTOM_CURVES */
+ {
+ /* assumes "a" == 3 */
+ (void)a;
+
+ /* T2 = X - T1 */
+ if (err == MP_OKAY)
+ err = mp_sub(x, t1, t2);
+ if (err == MP_OKAY) {
+ if (mp_isneg(t2))
+ err = mp_add(t2, modulus, t2);
+ }
+ /* T1 = X + T1 */
+ if (err == MP_OKAY)
+ err = mp_add(t1, x, t1);
+ if (err == MP_OKAY) {
+ if (mp_cmp(t1, modulus) != MP_LT)
+ err = mp_sub(t1, modulus, t1);
+ }
+ /* T2 = T1 * T2 */
+ if (err == MP_OKAY)
+ err = mp_mul(t1, t2, t2);
+ if (err == MP_OKAY)
+ err = mp_montgomery_reduce(t2, modulus, mp);
+
+ /* T1 = 2T2 */
+ if (err == MP_OKAY)
+ err = mp_add(t2, t2, t1);
+ if (err == MP_OKAY) {
+ if (mp_cmp(t1, modulus) != MP_LT)
+ err = mp_sub(t1, modulus, t1);
+ }
+ /* T1 = T1 + T2 */
+ if (err == MP_OKAY)
+ err = mp_add(t1, t2, t1);
+ if (err == MP_OKAY) {
+ if (mp_cmp(t1, modulus) != MP_LT)
+ err = mp_sub(t1, modulus, t1);
+ }
}
+
/* Y = 2Y */
if (err == MP_OKAY)
- err = mp_add(R->y, R->y, R->y);
+ err = mp_add(y, y, y);
if (err == MP_OKAY) {
- if (mp_cmp(R->y, modulus) != MP_LT)
- err = mp_sub(R->y, modulus, R->y);
+ if (mp_cmp(y, modulus) != MP_LT)
+ err = mp_sub(y, modulus, y);
}
/* Y = Y * Y */
if (err == MP_OKAY)
- err = mp_sqr(R->y, R->y);
+ err = mp_sqr(y, y);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(R->y, modulus, *mp);
-
+ err = mp_montgomery_reduce(y, modulus, mp);
+
/* T2 = Y * Y */
if (err == MP_OKAY)
- err = mp_sqr(R->y, &t2);
+ err = mp_sqr(y, t2);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(&t2, modulus, *mp);
+ err = mp_montgomery_reduce(t2, modulus, mp);
/* T2 = T2/2 */
if (err == MP_OKAY) {
- if (mp_isodd(&t2))
- err = mp_add(&t2, modulus, &t2);
+ if (mp_isodd(t2) == MP_YES)
+ err = mp_add(t2, modulus, t2);
}
if (err == MP_OKAY)
- err = mp_div_2(&t2, &t2);
-
+ err = mp_div_2(t2, t2);
+
/* Y = Y * X */
if (err == MP_OKAY)
- err = mp_mul(R->y, R->x, R->y);
+ err = mp_mul(y, x, y);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(R->y, modulus, *mp);
+ err = mp_montgomery_reduce(y, modulus, mp);
- /* X = T1 * T1 */
+ /* X = T1 * T1 */
if (err == MP_OKAY)
- err = mp_sqr(&t1, R->x);
+ err = mp_sqr(t1, x);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(R->x, modulus, *mp);
+ err = mp_montgomery_reduce(x, modulus, mp);
/* X = X - Y */
if (err == MP_OKAY)
- err = mp_sub(R->x, R->y, R->x);
+ err = mp_sub(x, y, x);
if (err == MP_OKAY) {
- if (mp_cmp_d(R->x, 0) == MP_LT)
- err = mp_add(R->x, modulus, R->x);
+ if (mp_isneg(x))
+ err = mp_add(x, modulus, x);
}
/* X = X - Y */
if (err == MP_OKAY)
- err = mp_sub(R->x, R->y, R->x);
+ err = mp_sub(x, y, x);
if (err == MP_OKAY) {
- if (mp_cmp_d(R->x, 0) == MP_LT)
- err = mp_add(R->x, modulus, R->x);
+ if (mp_isneg(x))
+ err = mp_add(x, modulus, x);
}
- /* Y = Y - X */
+
+ /* Y = Y - X */
if (err == MP_OKAY)
- err = mp_sub(R->y, R->x, R->y);
+ err = mp_sub(y, x, y);
if (err == MP_OKAY) {
- if (mp_cmp_d(R->y, 0) == MP_LT)
- err = mp_add(R->y, modulus, R->y);
+ if (mp_isneg(y))
+ err = mp_add(y, modulus, y);
}
/* Y = Y * T1 */
if (err == MP_OKAY)
- err = mp_mul(R->y, &t1, R->y);
+ err = mp_mul(y, t1, y);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(R->y, modulus, *mp);
+ err = mp_montgomery_reduce(y, modulus, mp);
/* Y = Y - T2 */
if (err == MP_OKAY)
- err = mp_sub(R->y, &t2, R->y);
+ err = mp_sub(y, t2, y);
if (err == MP_OKAY) {
- if (mp_cmp_d(R->y, 0) == MP_LT)
- err = mp_add(R->y, modulus, R->y);
+ if (mp_isneg(y))
+ err = mp_add(y, modulus, y);
}
- /* clean up */
- mp_clear(&t1);
- mp_clear(&t2);
+#ifdef ALT_ECC_SIZE
+ if (err == MP_OKAY)
+ err = mp_copy(x, R->x);
+ if (err == MP_OKAY)
+ err = mp_copy(y, R->y);
+ if (err == MP_OKAY)
+ err = mp_copy(z, R->z);
+#endif
+
+ /* clean up */
+ mp_clear(t1);
+ mp_clear(t2);
+
+#ifdef WOLFSSL_SMALL_STACK
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ if (R->key == NULL)
+#endif
+ {
+ #ifdef ALT_ECC_SIZE
+ XFREE(rz, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(ry, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(rx, NULL, DYNAMIC_TYPE_ECC);
+ #endif
+ XFREE(t2, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(t1, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
return err;
+#else
+ if (P == NULL || R == NULL || modulus == NULL)
+ return ECC_BAD_ARG_E;
+
+ (void)a;
+ (void)mp;
+
+#ifndef WOLFSSL_SP_NO_256
+ if (mp_count_bits(modulus) == 256) {
+ return sp_ecc_proj_dbl_point_256(P->x, P->y, P->z, R->x, R->y, R->z);
+ }
+#endif
+#ifdef WOLFSSL_SP_384
+ if (mp_count_bits(modulus) == 384) {
+ return sp_ecc_proj_dbl_point_384(P->x, P->y, P->z, R->x, R->y, R->z);
+ }
+#endif
+ return ECC_BAD_ARG_E;
+#endif
}
-#endif /* USE_FAST_MATH */
/**
- Map a projective jacbobian point back to affine space
+ Map a projective Jacobian point back to affine space
P [in/out] The point to map
modulus The modulus of the field the ECC curve is in
mp The "b" value from montgomery_setup()
+ ct Operation should be constant time.
return MP_OKAY on success
*/
-int ecc_map(ecc_point* P, mp_int* modulus, mp_digit* mp)
+int ecc_map_ex(ecc_point* P, mp_int* modulus, mp_digit mp, int ct)
{
- mp_int t1;
- mp_int t2;
+#ifndef WOLFSSL_SP_MATH
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int* t1 = NULL;
+ mp_int* t2 = NULL;
+#ifdef ALT_ECC_SIZE
+ mp_int* rx = NULL;
+ mp_int* ry = NULL;
+ mp_int* rz = NULL;
+#endif
+#else
+ mp_int t1[1], t2[1];
+#ifdef ALT_ECC_SIZE
+ mp_int rx[1], ry[1], rz[1];
+#endif
+#endif /* WOLFSSL_SMALL_STACK */
+ mp_int *x, *y, *z;
int err;
- if (P == NULL || mp == NULL || modulus == NULL)
+ (void)ct;
+
+ if (P == NULL || modulus == NULL)
return ECC_BAD_ARG_E;
/* special case for point at infinity */
if (mp_cmp_d(P->z, 0) == MP_EQ) {
- mp_set(P->x, 0);
- mp_set(P->y, 0);
- mp_set(P->z, 1);
- return MP_OKAY;
+ err = mp_set(P->x, 0);
+ if (err == MP_OKAY)
+ err = mp_set(P->y, 0);
+ if (err == MP_OKAY)
+ err = mp_set(P->z, 1);
+ return err;
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ if (P->key != NULL) {
+ t1 = P->key->t1;
+ t2 = P->key->t2;
+ #ifdef ALT_ECC_SIZE
+ rx = P->key->x;
+ ry = P->key->y;
+ rz = P->key->z;
+ #endif
+ }
+ else
+#endif /* WOLFSSL_SMALL_STACK_CACHE */
+ {
+ t1 = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ t2 = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ if (t1 == NULL || t2 == NULL) {
+ XFREE(t2, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(t1, NULL, DYNAMIC_TYPE_ECC);
+ return MEMORY_E;
+ }
+#ifdef ALT_ECC_SIZE
+ rx = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ ry = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ rz = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ if (rx == NULL || ry == NULL || rz == NULL) {
+ XFREE(rz, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(ry, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(rx, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(t2, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(t1, NULL, DYNAMIC_TYPE_ECC);
+ return MEMORY_E;
+ }
+#endif
}
+#endif /* WOLFSSL_SMALL_STACK */
- if ((err = mp_init_multi(&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
+ if ((err = mp_init_multi(t1, t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ if (P->key == NULL)
+#endif
+ {
+ #ifdef ALT_ECC_SIZE
+ XFREE(rz, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(ry, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(rx, NULL, DYNAMIC_TYPE_ECC);
+ #endif
+ XFREE(t2, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(t1, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
return MEMORY_E;
}
- /* first map z back to normal */
- err = mp_montgomery_reduce(P->z, modulus, *mp);
+#ifdef ALT_ECC_SIZE
+ /* Use local stack variable */
+ x = rx;
+ y = ry;
+ z = rz;
- /* get 1/z */
+ if ((err = mp_init_multi(x, y, z, NULL, NULL, NULL)) != MP_OKAY) {
+ goto done;
+ }
+
+ if (err == MP_OKAY)
+ err = mp_copy(P->x, x);
+ if (err == MP_OKAY)
+ err = mp_copy(P->y, y);
if (err == MP_OKAY)
- err = mp_invmod(P->z, modulus, &t1);
-
+ err = mp_copy(P->z, z);
+
+ if (err != MP_OKAY) {
+ goto done;
+ }
+#else
+ /* Use destination directly */
+ x = P->x;
+ y = P->y;
+ z = P->z;
+#endif
+
+ /* get 1/z */
+ if (err == MP_OKAY) {
+#if defined(ECC_TIMING_RESISTANT) && defined(USE_FAST_MATH)
+ if (ct) {
+ err = mp_invmod_mont_ct(z, modulus, t1, mp);
+ if (err == MP_OKAY)
+ err = mp_montgomery_reduce(t1, modulus, mp);
+ }
+ else
+#endif
+ {
+ /* first map z back to normal */
+ err = mp_montgomery_reduce(z, modulus, mp);
+ if (err == MP_OKAY)
+ err = mp_invmod(z, modulus, t1);
+ }
+ }
+
/* get 1/z^2 and 1/z^3 */
if (err == MP_OKAY)
- err = mp_sqr(&t1, &t2);
+ err = mp_sqr(t1, t2);
if (err == MP_OKAY)
- err = mp_mod(&t2, modulus, &t2);
+ err = mp_mod(t2, modulus, t2);
if (err == MP_OKAY)
- err = mp_mul(&t1, &t2, &t1);
+ err = mp_mul(t1, t2, t1);
if (err == MP_OKAY)
- err = mp_mod(&t1, modulus, &t1);
+ err = mp_mod(t1, modulus, t1);
/* multiply against x/y */
if (err == MP_OKAY)
- err = mp_mul(P->x, &t2, P->x);
+ err = mp_mul(x, t2, x);
+ if (err == MP_OKAY)
+ err = mp_montgomery_reduce(x, modulus, mp);
+ if (err == MP_OKAY)
+ err = mp_mul(y, t1, y);
+ if (err == MP_OKAY)
+ err = mp_montgomery_reduce(y, modulus, mp);
+
if (err == MP_OKAY)
- err = mp_montgomery_reduce(P->x, modulus, *mp);
+ err = mp_set(z, 1);
+
+#ifdef ALT_ECC_SIZE
+ /* return result */
if (err == MP_OKAY)
- err = mp_mul(P->y, &t1, P->y);
+ err = mp_copy(x, P->x);
if (err == MP_OKAY)
- err = mp_montgomery_reduce(P->y, modulus, *mp);
-
+ err = mp_copy(y, P->y);
if (err == MP_OKAY)
- mp_set(P->z, 1);
+ err = mp_copy(z, P->z);
+
+done:
+#endif
/* clean up */
- mp_clear(&t1);
- mp_clear(&t2);
+ mp_clear(t1);
+ mp_clear(t2);
+
+#ifdef WOLFSSL_SMALL_STACK
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ if (P->key == NULL)
+#endif
+ {
+ #ifdef ALT_ECC_SIZE
+ XFREE(rz, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(ry, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(rx, NULL, DYNAMIC_TYPE_ECC);
+ #endif
+ XFREE(t2, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(t1, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
return err;
-}
+#else
+ if (P == NULL || modulus == NULL)
+ return ECC_BAD_ARG_E;
+ (void)mp;
-#ifndef ECC_TIMING_RESISTANT
+#ifndef WOLFSSL_SP_NO_256
+ if (mp_count_bits(modulus) == 256) {
+ return sp_ecc_map_256(P->x, P->y, P->z);
+ }
+#endif
+#ifdef WOLFSSL_SP_384
+ if (mp_count_bits(modulus) == 384) {
+ return sp_ecc_map_384(P->x, P->y, P->z);
+ }
+#endif
+ return ECC_BAD_ARG_E;
+#endif
+}
+
+int ecc_map(ecc_point* P, mp_int* modulus, mp_digit mp)
+{
+ return ecc_map_ex(P, modulus, mp, 0);
+}
+#endif /* !WOLFSSL_SP_MATH || WOLFSSL_PUBLIC_ECC_ADD_DBL */
-/* size of sliding window, don't change this! */
-#define WINSIZE 4
+#if !defined(FREESCALE_LTC_ECC) && !defined(WOLFSSL_STM32_PKA)
+#if !defined(FP_ECC) || !defined(WOLFSSL_SP_MATH)
/**
- Perform a point multiplication
+ Perform a point multiplication
k The scalar to multiply by
G The base point
R [out] Destination for kG
+ a ECC curve parameter a
modulus The modulus of the field the ECC curve is in
map Boolean whether to map back to affine or not
(1==map, 0 == leave in projective)
@@ -1007,83 +2521,182 @@ int ecc_map(ecc_point* P, mp_int* modulus, mp_digit* mp)
*/
#ifdef FP_ECC
static int normal_ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R,
- mp_int* modulus, int map)
+ mp_int* a, mp_int* modulus, int map,
+ void* heap)
#else
-static int ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R, mp_int* modulus,
- int map)
+int wc_ecc_mulmod_ex(mp_int* k, ecc_point *G, ecc_point *R,
+ mp_int* a, mp_int* modulus, int map,
+ void* heap)
#endif
{
- ecc_point *tG, *M[8];
- int i, j, err;
- mp_int mu;
+#ifndef WOLFSSL_SP_MATH
+#ifndef ECC_TIMING_RESISTANT
+ /* size of sliding window, don't change this! */
+ #define WINSIZE 4
+ #define M_POINTS 8
+ int first = 1, bitbuf = 0, bitcpy = 0, j;
+#elif defined(WC_NO_CACHE_RESISTANT)
+ #define M_POINTS 4
+#else
+ #define M_POINTS 5
+#endif
+
+ ecc_point *tG, *M[M_POINTS];
+ int i, err;
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ ecc_key key;
+#endif
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int* mu = NULL;
+#else
+ mp_int mu[1];
+#endif
mp_digit mp;
mp_digit buf;
- int first = 1, bitbuf = 0, bitcpy = 0, bitcnt = 0, mode = 0,
- digidx = 0;
+ int bitcnt = 0, mode = 0, digidx = 0;
- if (k == NULL || G == NULL || R == NULL || modulus == NULL)
+ if (k == NULL || G == NULL || R == NULL || modulus == NULL) {
return ECC_BAD_ARG_E;
+ }
+
+ /* init variables */
+ tG = NULL;
+ XMEMSET(M, 0, sizeof(M));
+#ifdef WOLFSSL_SMALL_STACK
+ mu = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC);
+ if (mu == NULL)
+ return MEMORY_E;
+#endif
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ key.t1 = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC);
+ key.t2 = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC);
+#ifdef ALT_ECC_SIZE
+ key.x = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC);
+ key.y = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC);
+ key.z = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC);
+#endif
+ if (key.t1 == NULL || key.t2 == NULL
+#ifdef ALT_ECC_SIZE
+ || key.x == NULL || key.y == NULL || key.z == NULL
+#endif
+ ) {
+#ifdef ALT_ECC_SIZE
+ XFREE(key.z, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.y, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.x, heap, DYNAMIC_TYPE_ECC);
+#endif
+ XFREE(key.t2, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.t1, heap, DYNAMIC_TYPE_ECC);
+ XFREE(mu, heap, DYNAMIC_TYPE_ECC);
+ return MEMORY_E;
+ }
+#endif /* WOLFSSL_SMALL_STACK_CACHE */
/* init montgomery reduction */
if ((err = mp_montgomery_setup(modulus, &mp)) != MP_OKAY) {
- return err;
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+#ifdef ALT_ECC_SIZE
+ XFREE(key.z, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.y, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.x, heap, DYNAMIC_TYPE_ECC);
+#endif
+ XFREE(key.t2, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.t1, heap, DYNAMIC_TYPE_ECC);
+#endif /* WOLFSSL_SMALL_STACK_CACHE */
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(mu, heap, DYNAMIC_TYPE_ECC);
+#endif
+ return err;
}
- if ((err = mp_init(&mu)) != MP_OKAY) {
- return err;
+
+ if ((err = mp_init(mu)) != MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+#ifdef ALT_ECC_SIZE
+ XFREE(key.z, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.y, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.x, heap, DYNAMIC_TYPE_ECC);
+#endif
+ XFREE(key.t2, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.t1, heap, DYNAMIC_TYPE_ECC);
+#endif /* WOLFSSL_SMALL_STACK_CACHE */
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(mu, heap, DYNAMIC_TYPE_ECC);
+#endif
+ return err;
}
- if ((err = mp_montgomery_calc_normalization(&mu, modulus)) != MP_OKAY) {
- mp_clear(&mu);
- return err;
+ if ((err = mp_montgomery_calc_normalization(mu, modulus)) != MP_OKAY) {
+ mp_clear(mu);
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+#ifdef ALT_ECC_SIZE
+ XFREE(key.z, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.y, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.x, heap, DYNAMIC_TYPE_ECC);
+#endif
+ XFREE(key.t2, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.t1, heap, DYNAMIC_TYPE_ECC);
+#endif /* WOLFSSL_SMALL_STACK_CACHE */
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(mu, heap, DYNAMIC_TYPE_ECC);
+#endif
+ return err;
}
-
+
/* alloc ram for window temps */
- for (i = 0; i < 8; i++) {
- M[i] = ecc_new_point();
+ for (i = 0; i < M_POINTS; i++) {
+ M[i] = wc_ecc_new_point_h(heap);
if (M[i] == NULL) {
- for (j = 0; j < i; j++) {
- ecc_del_point(M[j]);
- }
- mp_clear(&mu);
- return MEMORY_E;
+ mp_clear(mu);
+ err = MEMORY_E; goto exit;
}
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ M[i]->key = &key;
+#endif
}
- /* make a copy of G incase R==G */
- tG = ecc_new_point();
+ /* make a copy of G in case R==G */
+ tG = wc_ecc_new_point_h(heap);
if (tG == NULL)
err = MEMORY_E;
/* tG = G and convert to montgomery */
if (err == MP_OKAY) {
- if (mp_cmp_d(&mu, 1) == MP_EQ) {
+ if (mp_cmp_d(mu, 1) == MP_EQ) {
err = mp_copy(G->x, tG->x);
if (err == MP_OKAY)
err = mp_copy(G->y, tG->y);
if (err == MP_OKAY)
err = mp_copy(G->z, tG->z);
} else {
- err = mp_mulmod(G->x, &mu, modulus, tG->x);
+ err = mp_mulmod(G->x, mu, modulus, tG->x);
if (err == MP_OKAY)
- err = mp_mulmod(G->y, &mu, modulus, tG->y);
+ err = mp_mulmod(G->y, mu, modulus, tG->y);
if (err == MP_OKAY)
- err = mp_mulmod(G->z, &mu, modulus, tG->z);
+ err = mp_mulmod(G->z, mu, modulus, tG->z);
}
}
- mp_clear(&mu);
-
+
+ /* done with mu */
+ mp_clear(mu);
+
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ R->key = &key;
+#endif
+#ifndef ECC_TIMING_RESISTANT
+
/* calc the M tab, which holds kG for k==8..15 */
/* M[0] == 8G */
if (err == MP_OKAY)
- err = ecc_projective_dbl_point(tG, M[0], modulus, &mp);
+ err = ecc_projective_dbl_point(tG, M[0], a, modulus, mp);
if (err == MP_OKAY)
- err = ecc_projective_dbl_point(M[0], M[0], modulus, &mp);
+ err = ecc_projective_dbl_point(M[0], M[0], a, modulus, mp);
if (err == MP_OKAY)
- err = ecc_projective_dbl_point(M[0], M[0], modulus, &mp);
+ err = ecc_projective_dbl_point(M[0], M[0], a, modulus, mp);
/* now find (8+k)G for k=1..7 */
if (err == MP_OKAY)
for (j = 9; j < 16; j++) {
- err = ecc_projective_add_point(M[j-9], tG, M[j-8], modulus, &mp);
+ err = ecc_projective_add_point(M[j-9], tG, M[j-M_POINTS], a, modulus,
+ mp);
if (err != MP_OKAY) break;
}
@@ -1104,7 +2717,7 @@ static int ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R, mp_int* modulus,
break;
}
buf = get_digit(k, digidx);
- bitcnt = (int) DIGIT_BIT;
+ bitcnt = (int) DIGIT_BIT;
--digidx;
}
@@ -1118,7 +2731,7 @@ static int ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R, mp_int* modulus,
/* if the bit is zero and mode == 1 then we double */
if (mode == 1 && i == 0) {
- err = ecc_projective_dbl_point(R, R, modulus, &mp);
+ err = ecc_projective_dbl_point(R, R, a, modulus, mp);
if (err != MP_OKAY) break;
continue;
}
@@ -1131,26 +2744,27 @@ static int ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R, mp_int* modulus,
/* if this is the first window we do a simple copy */
if (first == 1) {
/* R = kG [k = first window] */
- err = mp_copy(M[bitbuf-8]->x, R->x);
+ err = mp_copy(M[bitbuf-M_POINTS]->x, R->x);
if (err != MP_OKAY) break;
- err = mp_copy(M[bitbuf-8]->y, R->y);
+ err = mp_copy(M[bitbuf-M_POINTS]->y, R->y);
if (err != MP_OKAY) break;
- err = mp_copy(M[bitbuf-8]->z, R->z);
+ err = mp_copy(M[bitbuf-M_POINTS]->z, R->z);
first = 0;
} else {
/* normal window */
/* ok window is filled so double as required and add */
/* double first */
for (j = 0; j < WINSIZE; j++) {
- err = ecc_projective_dbl_point(R, R, modulus, &mp);
+ err = ecc_projective_dbl_point(R, R, a, modulus, mp);
if (err != MP_OKAY) break;
}
if (err != MP_OKAY) break; /* out of first for(;;) */
- /* then add, bitbuf will be 8..15 [8..2^WINSIZE] guaranted */
- err = ecc_projective_add_point(R,M[bitbuf-8],R,modulus,&mp);
+ /* then add, bitbuf will be 8..15 [8..2^WINSIZE] guaranteed */
+ err = ecc_projective_add_point(R, M[bitbuf-M_POINTS], R, a,
+ modulus, mp);
}
if (err != MP_OKAY) break;
/* empty window and reset */
@@ -1167,7 +2781,7 @@ static int ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R, mp_int* modulus,
for (j = 0; j < bitcpy; j++) {
/* only double if we have had at least one add first */
if (first == 0) {
- err = ecc_projective_dbl_point(R, R, modulus, &mp);
+ err = ecc_projective_dbl_point(R, R, a, modulus, mp);
if (err != MP_OKAY) break;
}
@@ -1186,7 +2800,7 @@ static int ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R, mp_int* modulus,
first = 0;
} else {
/* then add */
- err = ecc_projective_add_point(R, tG, R, modulus, &mp);
+ err = ecc_projective_add_point(R, tG, R, a, modulus, mp);
if (err != MP_OKAY) break;
}
}
@@ -1194,89 +2808,10 @@ static int ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R, mp_int* modulus,
}
}
- /* map R back from projective space */
- if (err == MP_OKAY && map)
- err = ecc_map(R, modulus, &mp);
-
- mp_clear(&mu);
- ecc_del_point(tG);
- for (i = 0; i < 8; i++) {
- ecc_del_point(M[i]);
- }
- return err;
-}
-
-#undef WINSIZE
+ #undef WINSIZE
#else /* ECC_TIMING_RESISTANT */
-/**
- Perform a point multiplication (timing resistant)
- k The scalar to multiply by
- G The base point
- R [out] Destination for kG
- modulus The modulus of the field the ECC curve is in
- map Boolean whether to map back to affine or not
- (1==map, 0 == leave in projective)
- return MP_OKAY on success
-*/
-#ifdef FP_ECC
-static int normal_ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R,
- mp_int* modulus, int map)
-#else
-static int ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R, mp_int* modulus,
- int map)
-#endif
-{
- ecc_point *tG, *M[3];
- int i, j, err;
- mp_int mu;
- mp_digit mp;
- mp_digit buf;
- int bitcnt = 0, mode = 0, digidx = 0;
-
- if (k == NULL || G == NULL || R == NULL || modulus == NULL)
- return ECC_BAD_ARG_E;
-
- /* init montgomery reduction */
- if ((err = mp_montgomery_setup(modulus, &mp)) != MP_OKAY) {
- return err;
- }
- if ((err = mp_init(&mu)) != MP_OKAY) {
- return err;
- }
- if ((err = mp_montgomery_calc_normalization(&mu, modulus)) != MP_OKAY) {
- mp_clear(&mu);
- return err;
- }
-
- /* alloc ram for window temps */
- for (i = 0; i < 3; i++) {
- M[i] = ecc_new_point();
- if (M[i] == NULL) {
- for (j = 0; j < i; j++) {
- ecc_del_point(M[j]);
- }
- mp_clear(&mu);
- return MEMORY_E;
- }
- }
-
- /* make a copy of G incase R==G */
- tG = ecc_new_point();
- if (tG == NULL)
- err = MEMORY_E;
-
- /* tG = G and convert to montgomery */
- if (err == MP_OKAY) {
- err = mp_mulmod(G->x, &mu, modulus, tG->x);
- if (err == MP_OKAY)
- err = mp_mulmod(G->y, &mu, modulus, tG->y);
- if (err == MP_OKAY)
- err = mp_mulmod(G->z, &mu, modulus, tG->z);
- }
- mp_clear(&mu);
-
/* calc the M tab */
/* M[0] == G */
if (err == MP_OKAY)
@@ -1288,13 +2823,24 @@ static int ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R, mp_int* modulus,
/* M[1] == 2G */
if (err == MP_OKAY)
- err = ecc_projective_dbl_point(tG, M[1], modulus, &mp);
+ err = ecc_projective_dbl_point(tG, M[1], a, modulus, mp);
+#ifdef WC_NO_CACHE_RESISTANT
+ if (err == MP_OKAY)
+ err = wc_ecc_copy_point(M[0], M[2]);
+#else
+ if (err == MP_OKAY)
+ err = wc_ecc_copy_point(M[0], M[3]);
+ if (err == MP_OKAY)
+ err = wc_ecc_copy_point(M[1], M[4]);
+#endif
/* setup sliding window */
mode = 0;
bitcnt = 1;
buf = 0;
- digidx = get_digit_count(k) - 1;
+ digidx = get_digit_count(modulus) - 1;
+ /* The order MAY be 1 bit longer than the modulus. */
+ digidx += (modulus->dp[digidx] >> (DIGIT_BIT-1));
/* perform ops */
if (err == MP_OKAY) {
@@ -1305,43 +2851,92 @@ static int ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R, mp_int* modulus,
break;
}
buf = get_digit(k, digidx);
- bitcnt = (int) DIGIT_BIT;
+ bitcnt = (int)DIGIT_BIT;
--digidx;
}
- /* grab the next msb from the ltiplicand */
+ /* grab the next msb from the multiplicand */
i = (buf >> (DIGIT_BIT - 1)) & 1;
buf <<= 1;
- if (mode == 0 && i == 0) {
- /* dummy operations */
- if (err == MP_OKAY)
- err = ecc_projective_add_point(M[0], M[1], M[2], modulus,
- &mp);
+#ifdef WC_NO_CACHE_RESISTANT
+ if (mode == 0) {
+ /* timing resistant - dummy operations */
if (err == MP_OKAY)
- err = ecc_projective_dbl_point(M[1], M[2], modulus, &mp);
+ err = ecc_projective_add_point(M[1], M[2], M[2], a, modulus,
+ mp);
if (err == MP_OKAY)
- continue;
+ err = ecc_projective_dbl_point(M[2], M[3], a, modulus, mp);
}
-
- if (mode == 0 && i == 1) {
- mode = 1;
- /* dummy operations */
- if (err == MP_OKAY)
- err = ecc_projective_add_point(M[0], M[1], M[2], modulus,
- &mp);
+ else {
if (err == MP_OKAY)
- err = ecc_projective_dbl_point(M[1], M[2], modulus, &mp);
+ err = ecc_projective_add_point(M[0], M[1], M[i^1], a,
+ modulus, mp);
if (err == MP_OKAY)
- continue;
+ err = ecc_projective_dbl_point(M[i], M[i], a, modulus, mp);
}
+#else
+ if (err == MP_OKAY)
+ err = ecc_projective_add_point(M[0], M[1], M[2], a, modulus, mp);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[2]->x, i, M[0]->x);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[2]->y, i, M[0]->y);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[2]->z, i, M[0]->z);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[2]->x, i ^ 1, M[1]->x);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[2]->y, i ^ 1, M[1]->y);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[2]->z, i ^ 1, M[1]->z);
+
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[0]->x, i ^ 1, M[2]->x);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[0]->y, i ^ 1, M[2]->y);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[0]->z, i ^ 1, M[2]->z);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[1]->x, i, M[2]->x);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[1]->y, i, M[2]->y);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[1]->z, i, M[2]->z);
+
+ if (err == MP_OKAY)
+ err = ecc_projective_dbl_point(M[2], M[2], a, modulus, mp);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[2]->x, i ^ 1, M[0]->x);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[2]->y, i ^ 1, M[0]->y);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[2]->z, i ^ 1, M[0]->z);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[2]->x, i, M[1]->x);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[2]->y, i, M[1]->y);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[2]->z, i, M[1]->z);
if (err == MP_OKAY)
- err = ecc_projective_add_point(M[0], M[1], M[i^1], modulus, &mp);
+ err = mp_cond_copy(M[3]->x, (mode ^ 1) & i, M[0]->x);
if (err == MP_OKAY)
- err = ecc_projective_dbl_point(M[i], M[i], modulus, &mp);
+ err = mp_cond_copy(M[3]->y, (mode ^ 1) & i, M[0]->y);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[3]->z, (mode ^ 1) & i, M[0]->z);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[4]->x, (mode ^ 1) & i, M[1]->x);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[4]->y, (mode ^ 1) & i, M[1]->y);
+ if (err == MP_OKAY)
+ err = mp_cond_copy(M[4]->z, (mode ^ 1) & i, M[1]->z);
+#endif /* WC_NO_CACHE_RESISTANT */
+
if (err != MP_OKAY)
break;
+
+ mode |= i;
} /* end for */
}
@@ -1353,56 +2948,96 @@ static int ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R, mp_int* modulus,
if (err == MP_OKAY)
err = mp_copy(M[0]->z, R->z);
+#endif /* ECC_TIMING_RESISTANT */
+
/* map R back from projective space */
if (err == MP_OKAY && map)
- err = ecc_map(R, modulus, &mp);
+ err = ecc_map(R, modulus, mp);
+
+exit:
/* done */
- mp_clear(&mu);
- ecc_del_point(tG);
- for (i = 0; i < 3; i++) {
- ecc_del_point(M[i]);
+ wc_ecc_del_point_h(tG, heap);
+ for (i = 0; i < M_POINTS; i++) {
+ wc_ecc_del_point_h(M[i], heap);
}
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ R->key = NULL;
+#ifdef ALT_ECC_SIZE
+ XFREE(key.z, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.y, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.x, heap, DYNAMIC_TYPE_ECC);
+#endif
+ XFREE(key.t2, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.t1, heap, DYNAMIC_TYPE_ECC);
+#endif /* WOLFSSL_SMALL_STACK_CACHE */
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(mu, heap, DYNAMIC_TYPE_ECC);
+#endif
+
return err;
-}
+#else
+ if (k == NULL || G == NULL || R == NULL || modulus == NULL) {
+ return ECC_BAD_ARG_E;
+ }
-#endif /* ECC_TIMING_RESISTANT */
+ (void)a;
+#ifndef WOLFSSL_SP_NO_256
+ if (mp_count_bits(modulus) == 256) {
+ return sp_ecc_mulmod_256(k, G, R, map, heap);
+ }
+#endif
+#ifdef WOLFSSL_SP_384
+ if (mp_count_bits(modulus) == 384) {
+ return sp_ecc_mulmod_384(k, G, R, map, heap);
+ }
+#endif
+ return ECC_BAD_ARG_E;
+#endif
+}
-#ifdef ALT_ECC_SIZE
+#endif /* !FP_ECC || !WOLFSSL_SP_MATH */
+
+#endif /* !FREESCALE_LTC_ECC && !WOLFSSL_STM32_PKA */
-static void alt_fp_init(fp_int* a)
+/** ECC Fixed Point mulmod global
+ k The multiplicand
+ G Base point to multiply
+ R [out] Destination of product
+ a ECC curve parameter a
+ modulus The modulus for the curve
+ map [boolean] If non-zero maps the point back to affine coordinates,
+ otherwise it's left in jacobian-montgomery form
+ return MP_OKAY if successful
+*/
+int wc_ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R, mp_int* a,
+ mp_int* modulus, int map)
{
- a->size = FP_SIZE_ECC;
- fp_zero(a);
+ return wc_ecc_mulmod_ex(k, G, R, a, modulus, map, NULL);
}
-#endif /* ALT_ECC_SIZE */
-
+#endif /* !WOLFSSL_ATECC508A */
/**
- Allocate a new ECC point
- return A newly allocated point or NULL on error
-*/
-ecc_point* ecc_new_point(void)
+ * use a heap hint when creating new ecc_point
+ * return an allocated point on success or NULL on failure
+ */
+ecc_point* wc_ecc_new_point_h(void* heap)
{
ecc_point* p;
- p = (ecc_point*)XMALLOC(sizeof(ecc_point), 0, DYNAMIC_TYPE_ECC);
+ (void)heap;
+
+ p = (ecc_point*)XMALLOC(sizeof(ecc_point), heap, DYNAMIC_TYPE_ECC);
if (p == NULL) {
return NULL;
}
XMEMSET(p, 0, sizeof(ecc_point));
-#ifndef USE_FAST_MATH
- p->x->dp = NULL;
- p->y->dp = NULL;
- p->z->dp = NULL;
-#endif
-
#ifndef ALT_ECC_SIZE
if (mp_init_multi(p->x, p->y, p->z, NULL, NULL, NULL) != MP_OKAY) {
- XFREE(p, 0, DYNAMIC_TYPE_ECC);
+ XFREE(p, heap, DYNAMIC_TYPE_ECC);
return NULL;
}
#else
@@ -1417,26 +3052,97 @@ ecc_point* ecc_new_point(void)
return p;
}
-/** Free an ECC point from memory
- p The point to free
+
+/**
+ Allocate a new ECC point
+ return A newly allocated point or NULL on error
*/
-void ecc_del_point(ecc_point* p)
+ecc_point* wc_ecc_new_point(void)
+{
+ return wc_ecc_new_point_h(NULL);
+}
+
+
+void wc_ecc_del_point_h(ecc_point* p, void* heap)
{
/* prevents free'ing null arguments */
if (p != NULL) {
mp_clear(p->x);
mp_clear(p->y);
mp_clear(p->z);
- XFREE(p, 0, DYNAMIC_TYPE_ECC);
+ XFREE(p, heap, DYNAMIC_TYPE_ECC);
}
+ (void)heap;
+}
+
+
+/** Free an ECC point from memory
+ p The point to free
+*/
+void wc_ecc_del_point(ecc_point* p)
+{
+ wc_ecc_del_point_h(p, NULL);
+}
+
+
+/** Copy the value of a point to an other one
+ p The point to copy
+ r The created point
+*/
+int wc_ecc_copy_point(ecc_point* p, ecc_point *r)
+{
+ int ret;
+
+ /* prevents null arguments */
+ if (p == NULL || r == NULL)
+ return ECC_BAD_ARG_E;
+
+ ret = mp_copy(p->x, r->x);
+ if (ret != MP_OKAY)
+ return ret;
+ ret = mp_copy(p->y, r->y);
+ if (ret != MP_OKAY)
+ return ret;
+ ret = mp_copy(p->z, r->z);
+ if (ret != MP_OKAY)
+ return ret;
+
+ return MP_OKAY;
+}
+
+/** Compare the value of a point with an other one
+ a The point to compare
+ b The other point to compare
+
+ return MP_EQ if equal, MP_LT/MP_GT if not, < 0 in case of error
+ */
+int wc_ecc_cmp_point(ecc_point* a, ecc_point *b)
+{
+ int ret;
+
+ /* prevents null arguments */
+ if (a == NULL || b == NULL)
+ return BAD_FUNC_ARG;
+
+ ret = mp_cmp(a->x, b->x);
+ if (ret != MP_EQ)
+ return ret;
+ ret = mp_cmp(a->y, b->y);
+ if (ret != MP_EQ)
+ return ret;
+ ret = mp_cmp(a->z, b->z);
+ if (ret != MP_EQ)
+ return ret;
+
+ return MP_EQ;
}
/** Returns whether an ECC idx is valid or not
n The idx number to check
return 1 if valid, 0 if not
-*/
-static int ecc_is_valid_idx(int n)
+*/
+int wc_ecc_is_valid_idx(int n)
{
int x;
@@ -1444,84 +3150,741 @@ static int ecc_is_valid_idx(int n)
;
/* -1 is a valid index --- indicating that the domain params
were supplied by the user */
- if ((n >= -1) && (n < x)) {
+ if ((n >= ECC_CUSTOM_IDX) && (n < x)) {
return 1;
}
+
+ return 0;
+}
+
+int wc_ecc_get_curve_idx(int curve_id)
+{
+ int curve_idx;
+ for (curve_idx = 0; ecc_sets[curve_idx].size != 0; curve_idx++) {
+ if (curve_id == ecc_sets[curve_idx].id)
+ break;
+ }
+ if (ecc_sets[curve_idx].size == 0) {
+ return ECC_CURVE_INVALID;
+ }
+ return curve_idx;
+}
+
+int wc_ecc_get_curve_id(int curve_idx)
+{
+ if (wc_ecc_is_valid_idx(curve_idx)) {
+ return ecc_sets[curve_idx].id;
+ }
+ return ECC_CURVE_INVALID;
+}
+
+/* Returns the curve size that corresponds to a given ecc_curve_id identifier
+ *
+ * id curve id, from ecc_curve_id enum in ecc.h
+ * return curve size, from ecc_sets[] on success, negative on error
+ */
+int wc_ecc_get_curve_size_from_id(int curve_id)
+{
+ int curve_idx = wc_ecc_get_curve_idx(curve_id);
+ if (curve_idx == ECC_CURVE_INVALID)
+ return ECC_BAD_ARG_E;
+ return ecc_sets[curve_idx].size;
+}
+
+/* Returns the curve index that corresponds to a given curve name in
+ * ecc_sets[] of ecc.c
+ *
+ * name curve name, from ecc_sets[].name in ecc.c
+ * return curve index in ecc_sets[] on success, negative on error
+ */
+int wc_ecc_get_curve_idx_from_name(const char* curveName)
+{
+ int curve_idx;
+ word32 len;
+
+ if (curveName == NULL)
+ return BAD_FUNC_ARG;
+
+ len = (word32)XSTRLEN(curveName);
+
+ for (curve_idx = 0; ecc_sets[curve_idx].size != 0; curve_idx++) {
+ if (
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ ecc_sets[curve_idx].name &&
+ #endif
+ XSTRNCASECMP(ecc_sets[curve_idx].name, curveName, len) == 0) {
+ break;
+ }
+ }
+ if (ecc_sets[curve_idx].size == 0) {
+ WOLFSSL_MSG("ecc_set curve name not found");
+ return ECC_CURVE_INVALID;
+ }
+ return curve_idx;
+}
+
+/* Returns the curve size that corresponds to a given curve name,
+ * as listed in ecc_sets[] of ecc.c.
+ *
+ * name curve name, from ecc_sets[].name in ecc.c
+ * return curve size, from ecc_sets[] on success, negative on error
+ */
+int wc_ecc_get_curve_size_from_name(const char* curveName)
+{
+ int curve_idx;
+
+ if (curveName == NULL)
+ return BAD_FUNC_ARG;
+
+ curve_idx = wc_ecc_get_curve_idx_from_name(curveName);
+ if (curve_idx < 0)
+ return curve_idx;
+
+ return ecc_sets[curve_idx].size;
+}
+
+/* Returns the curve id that corresponds to a given curve name,
+ * as listed in ecc_sets[] of ecc.c.
+ *
+ * name curve name, from ecc_sets[].name in ecc.c
+ * return curve id, from ecc_sets[] on success, negative on error
+ */
+int wc_ecc_get_curve_id_from_name(const char* curveName)
+{
+ int curve_idx;
+
+ if (curveName == NULL)
+ return BAD_FUNC_ARG;
+
+ curve_idx = wc_ecc_get_curve_idx_from_name(curveName);
+ if (curve_idx < 0)
+ return curve_idx;
+
+ return ecc_sets[curve_idx].id;
+}
+
+/* Compares a curve parameter (hex, from ecc_sets[]) to given input
+ * parameter for equality.
+ * encType is WC_TYPE_UNSIGNED_BIN or WC_TYPE_HEX_STR
+ * Returns MP_EQ on success, negative on error */
+static int wc_ecc_cmp_param(const char* curveParam,
+ const byte* param, word32 paramSz, int encType)
+{
+ int err = MP_OKAY;
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int* a = NULL;
+ mp_int* b = NULL;
+#else
+ mp_int a[1], b[1];
+#endif
+
+ if (param == NULL || curveParam == NULL)
+ return BAD_FUNC_ARG;
+
+ if (encType == WC_TYPE_HEX_STR)
+ return XSTRNCMP(curveParam, (char*) param, paramSz);
+
+#ifdef WOLFSSL_SMALL_STACK
+ a = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ if (a == NULL)
+ return MEMORY_E;
+ b = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ if (b == NULL) {
+ XFREE(a, NULL, DYNAMIC_TYPE_ECC);
+ return MEMORY_E;
+ }
+#endif
+
+ if ((err = mp_init_multi(a, b, NULL, NULL, NULL, NULL)) != MP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(a, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(b, NULL, DYNAMIC_TYPE_ECC);
+ #endif
+ return err;
+ }
+
+ if (err == MP_OKAY) {
+ err = mp_read_unsigned_bin(a, param, paramSz);
+ }
+ if (err == MP_OKAY)
+ err = mp_read_radix(b, curveParam, MP_RADIX_HEX);
+
+ if (err == MP_OKAY) {
+ if (mp_cmp(a, b) != MP_EQ) {
+ err = -1;
+ } else {
+ err = MP_EQ;
+ }
+ }
+
+ mp_clear(a);
+ mp_clear(b);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(a, NULL, DYNAMIC_TYPE_ECC);
+#endif
+
+ return err;
+}
+
+/* Returns the curve id in ecc_sets[] that corresponds to a given set of
+ * curve parameters.
+ *
+ * fieldSize the field size in bits
+ * prime prime of the finite field
+ * primeSz size of prime in octets
+ * Af first coefficient a of the curve
+ * AfSz size of Af in octets
+ * Bf second coefficient b of the curve
+ * BfSz size of Bf in octets
+ * order curve order
+ * orderSz size of curve in octets
+ * Gx affine x coordinate of base point
+ * GxSz size of Gx in octets
+ * Gy affine y coordinate of base point
+ * GySz size of Gy in octets
+ * cofactor curve cofactor
+ *
+ * return curve id, from ecc_sets[] on success, negative on error
+ */
+int wc_ecc_get_curve_id_from_params(int fieldSize,
+ const byte* prime, word32 primeSz, const byte* Af, word32 AfSz,
+ const byte* Bf, word32 BfSz, const byte* order, word32 orderSz,
+ const byte* Gx, word32 GxSz, const byte* Gy, word32 GySz, int cofactor)
+{
+ int idx;
+ int curveSz;
+
+ if (prime == NULL || Af == NULL || Bf == NULL || order == NULL ||
+ Gx == NULL || Gy == NULL)
+ return BAD_FUNC_ARG;
+
+ curveSz = (fieldSize + 1) / 8; /* round up */
+
+ for (idx = 0; ecc_sets[idx].size != 0; idx++) {
+ if (curveSz == ecc_sets[idx].size) {
+ if ((wc_ecc_cmp_param(ecc_sets[idx].prime, prime,
+ primeSz, WC_TYPE_UNSIGNED_BIN) == MP_EQ) &&
+ (wc_ecc_cmp_param(ecc_sets[idx].Af, Af, AfSz,
+ WC_TYPE_UNSIGNED_BIN) == MP_EQ) &&
+ (wc_ecc_cmp_param(ecc_sets[idx].Bf, Bf, BfSz,
+ WC_TYPE_UNSIGNED_BIN) == MP_EQ) &&
+ (wc_ecc_cmp_param(ecc_sets[idx].order, order,
+ orderSz, WC_TYPE_UNSIGNED_BIN) == MP_EQ) &&
+ (wc_ecc_cmp_param(ecc_sets[idx].Gx, Gx, GxSz,
+ WC_TYPE_UNSIGNED_BIN) == MP_EQ) &&
+ (wc_ecc_cmp_param(ecc_sets[idx].Gy, Gy, GySz,
+ WC_TYPE_UNSIGNED_BIN) == MP_EQ) &&
+ (cofactor == ecc_sets[idx].cofactor)) {
+ break;
+ }
+ }
+ }
+
+ if (ecc_sets[idx].size == 0)
+ return ECC_CURVE_INVALID;
+
+ return ecc_sets[idx].id;
+}
+
+/* Returns the curve id in ecc_sets[] that corresponds
+ * to a given domain parameters pointer.
+ *
+ * dp domain parameters pointer
+ *
+ * return curve id, from ecc_sets[] on success, negative on error
+ */
+int wc_ecc_get_curve_id_from_dp_params(const ecc_set_type* dp)
+{
+ int idx;
+
+ if (dp == NULL
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ || dp->prime == NULL || dp->Af == NULL ||
+ dp->Bf == NULL || dp->order == NULL || dp->Gx == NULL || dp->Gy == NULL
+ #endif
+ ) {
+ return BAD_FUNC_ARG;
+ }
+
+ for (idx = 0; ecc_sets[idx].size != 0; idx++) {
+ if (dp->size == ecc_sets[idx].size) {
+ if ((wc_ecc_cmp_param(ecc_sets[idx].prime, (const byte*)dp->prime,
+ (word32)XSTRLEN(dp->prime), WC_TYPE_HEX_STR) == MP_EQ) &&
+ (wc_ecc_cmp_param(ecc_sets[idx].Af, (const byte*)dp->Af,
+ (word32)XSTRLEN(dp->Af),WC_TYPE_HEX_STR) == MP_EQ) &&
+ (wc_ecc_cmp_param(ecc_sets[idx].Bf, (const byte*)dp->Bf,
+ (word32)XSTRLEN(dp->Bf),WC_TYPE_HEX_STR) == MP_EQ) &&
+ (wc_ecc_cmp_param(ecc_sets[idx].order, (const byte*)dp->order,
+ (word32)XSTRLEN(dp->order),WC_TYPE_HEX_STR) == MP_EQ) &&
+ (wc_ecc_cmp_param(ecc_sets[idx].Gx, (const byte*)dp->Gx,
+ (word32)XSTRLEN(dp->Gx),WC_TYPE_HEX_STR) == MP_EQ) &&
+ (wc_ecc_cmp_param(ecc_sets[idx].Gy, (const byte*)dp->Gy,
+ (word32)XSTRLEN(dp->Gy),WC_TYPE_HEX_STR) == MP_EQ) &&
+ (dp->cofactor == ecc_sets[idx].cofactor)) {
+ break;
+ }
+ }
+ }
+
+ if (ecc_sets[idx].size == 0)
+ return ECC_CURVE_INVALID;
+
+ return ecc_sets[idx].id;
+}
+
+/* Returns the curve id that corresponds to a given OID,
+ * as listed in ecc_sets[] of ecc.c.
+ *
+ * oid OID, from ecc_sets[].name in ecc.c
+ * len OID len, from ecc_sets[].name in ecc.c
+ * return curve id, from ecc_sets[] on success, negative on error
+ */
+int wc_ecc_get_curve_id_from_oid(const byte* oid, word32 len)
+{
+ int curve_idx;
+
+ if (oid == NULL)
+ return BAD_FUNC_ARG;
+
+ for (curve_idx = 0; ecc_sets[curve_idx].size != 0; curve_idx++) {
+ if (
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ ecc_sets[curve_idx].oid &&
+ #endif
+ ecc_sets[curve_idx].oidSz == len &&
+ XMEMCMP(ecc_sets[curve_idx].oid, oid, len) == 0) {
+ break;
+ }
+ }
+ if (ecc_sets[curve_idx].size == 0) {
+ WOLFSSL_MSG("ecc_set curve name not found");
+ return ECC_CURVE_INVALID;
+ }
+
+ return ecc_sets[curve_idx].id;
+}
+
+/* Get curve parameters using curve index */
+const ecc_set_type* wc_ecc_get_curve_params(int curve_idx)
+{
+ const ecc_set_type* ecc_set = NULL;
+
+ if (curve_idx >= 0 && curve_idx < (int)ECC_SET_COUNT) {
+ ecc_set = &ecc_sets[curve_idx];
+ }
+ return ecc_set;
+}
+
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+static WC_INLINE int wc_ecc_alloc_mpint(ecc_key* key, mp_int** mp)
+{
+ if (key == NULL || mp == NULL)
+ return BAD_FUNC_ARG;
+ if (*mp == NULL) {
+ *mp = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_BIGINT);
+ if (*mp == NULL) {
+ return MEMORY_E;
+ }
+ XMEMSET(*mp, 0, sizeof(mp_int));
+ }
return 0;
}
+static WC_INLINE void wc_ecc_free_mpint(ecc_key* key, mp_int** mp)
+{
+ if (key && mp && *mp) {
+ mp_clear(*mp);
+ XFREE(*mp, key->heap, DYNAMIC_TYPE_BIGINT);
+ *mp = NULL;
+ }
+}
+
+static int wc_ecc_alloc_async(ecc_key* key)
+{
+ int err = wc_ecc_alloc_mpint(key, &key->r);
+ if (err == 0)
+ err = wc_ecc_alloc_mpint(key, &key->s);
+ return err;
+}
+
+static void wc_ecc_free_async(ecc_key* key)
+{
+ wc_ecc_free_mpint(key, &key->r);
+ wc_ecc_free_mpint(key, &key->s);
+#ifdef HAVE_CAVIUM_V
+ wc_ecc_free_mpint(key, &key->e);
+ wc_ecc_free_mpint(key, &key->signK);
+#endif /* HAVE_CAVIUM_V */
+}
+#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_ECC */
+#ifdef HAVE_ECC_DHE
/**
Create an ECC shared secret between two keys
- private_key The private ECC key
+ private_key The private ECC key (heap hint based off of private key)
public_key The public key
out [out] Destination of the shared secret
- Conforms to EC-DH from ANSI X9.63
+ Conforms to EC-DH from ANSI X9.63
outlen [in/out] The max size and resulting size of the shared secret
return MP_OKAY if successful
*/
int wc_ecc_shared_secret(ecc_key* private_key, ecc_key* public_key, byte* out,
word32* outlen)
{
- word32 x = 0;
- ecc_point* result;
- mp_int prime;
- int err;
-
+ int err;
+#if defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_ATECC508A)
+ CRYS_ECDH_TempData_t tempBuff;
+#endif
if (private_key == NULL || public_key == NULL || out == NULL ||
- outlen == NULL)
+ outlen == NULL) {
return BAD_FUNC_ARG;
+ }
+
+#ifdef WOLF_CRYPTO_CB
+ if (private_key->devId != INVALID_DEVID) {
+ err = wc_CryptoCb_Ecdh(private_key, public_key, out, outlen);
+ if (err != CRYPTOCB_UNAVAILABLE)
+ return err;
+ /* fall-through when unavailable */
+ }
+#endif
/* type valid? */
- if (private_key->type != ECC_PRIVATEKEY) {
+ if (private_key->type != ECC_PRIVATEKEY &&
+ private_key->type != ECC_PRIVATEKEY_ONLY) {
return ECC_BAD_ARG_E;
}
- if (ecc_is_valid_idx(private_key->idx) == 0 ||
- ecc_is_valid_idx(public_key->idx) == 0)
+ /* Verify domain params supplied */
+ if (wc_ecc_is_valid_idx(private_key->idx) == 0 ||
+ wc_ecc_is_valid_idx(public_key->idx) == 0) {
return ECC_BAD_ARG_E;
+ }
- if (XSTRNCMP(private_key->dp->name, public_key->dp->name, ECC_MAXNAME) != 0)
+ /* Verify curve id matches */
+ if (private_key->dp->id != public_key->dp->id) {
return ECC_BAD_ARG_E;
-
- /* make new point */
- result = ecc_new_point();
- if (result == NULL) {
- return MEMORY_E;
}
- if ((err = mp_init(&prime)) != MP_OKAY) {
- ecc_del_point(result);
- return err;
+#ifdef WOLFSSL_ATECC508A
+ /* For SECP256R1 use hardware */
+ if (private_key->dp->id == ECC_SECP256R1) {
+ err = atmel_ecc_create_pms(private_key->slot, public_key->pubkey_raw, out);
+ *outlen = private_key->dp->size;
+ }
+ else {
+ err = NOT_COMPILED_IN;
}
+#elif defined(WOLFSSL_CRYPTOCELL)
- err = mp_read_radix(&prime, (char *)private_key->dp->prime, 16);
+ /* generate a secret*/
+ err = CRYS_ECDH_SVDP_DH(&public_key->ctx.pubKey,
+ &private_key->ctx.privKey,
+ out,
+ outlen,
+ &tempBuff);
- if (err == MP_OKAY)
- err = ecc_mulmod(&private_key->k, &public_key->pubkey, result, &prime,1);
+ if (err != SA_SILIB_RET_OK){
+ WOLFSSL_MSG("CRYS_ECDH_SVDP_DH for secret failed");
+ return err;
+ }
- if (err == MP_OKAY) {
- x = mp_unsigned_bin_size(&prime);
- if (*outlen < x)
- err = BUFFER_E;
- }
+#else
+ err = wc_ecc_shared_secret_ex(private_key, &public_key->pubkey, out, outlen);
+#endif /* WOLFSSL_ATECC508A */
- if (err == MP_OKAY) {
- XMEMSET(out, 0, x);
- err = mp_to_unsigned_bin(result->x,out + (x -
- mp_unsigned_bin_size(result->x)));
- *outlen = x;
- }
+ return err;
+}
- mp_clear(&prime);
- ecc_del_point(result);
- return err;
+#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL)
+
+static int wc_ecc_shared_secret_gen_sync(ecc_key* private_key, ecc_point* point,
+ byte* out, word32* outlen, ecc_curve_spec* curve)
+{
+ int err;
+#ifndef WOLFSSL_SP_MATH
+ ecc_point* result = NULL;
+ word32 x = 0;
+#endif
+ mp_int* k = &private_key->k;
+#ifdef HAVE_ECC_CDH
+ mp_int k_lcl;
+
+ /* if cofactor flag has been set */
+ if (private_key->flags & WC_ECC_FLAG_COFACTOR) {
+ mp_digit cofactor = (mp_digit)private_key->dp->cofactor;
+ /* only perform cofactor calc if not equal to 1 */
+ if (cofactor != 1) {
+ k = &k_lcl;
+ if (mp_init(k) != MP_OKAY)
+ return MEMORY_E;
+ /* multiply cofactor times private key "k" */
+ err = mp_mul_d(&private_key->k, cofactor, k);
+ if (err != MP_OKAY) {
+ mp_clear(k);
+ return err;
+ }
+ }
+ }
+#endif
+
+#ifdef WOLFSSL_HAVE_SP_ECC
+#ifndef WOLFSSL_SP_NO_256
+ if (private_key->idx != ECC_CUSTOM_IDX &&
+ ecc_sets[private_key->idx].id == ECC_SECP256R1) {
+ err = sp_ecc_secret_gen_256(k, point, out, outlen, private_key->heap);
+ }
+ else
+#endif
+#ifdef WOLFSSL_SP_384
+ if (private_key->idx != ECC_CUSTOM_IDX &&
+ ecc_sets[private_key->idx].id == ECC_SECP384R1) {
+ err = sp_ecc_secret_gen_384(k, point, out, outlen, private_key->heap);
+ }
+ else
+#endif
+#endif
+#ifdef WOLFSSL_SP_MATH
+ {
+ err = WC_KEY_SIZE_E;
+
+ (void)curve;
+ }
+#else
+ {
+ mp_digit mp = 0;
+
+ /* make new point */
+ result = wc_ecc_new_point_h(private_key->heap);
+ if (result == NULL) {
+#ifdef HAVE_ECC_CDH
+ if (k == &k_lcl)
+ mp_clear(k);
+#endif
+ return MEMORY_E;
+ }
+
+ /* Map in a separate call as this should be constant time */
+ err = wc_ecc_mulmod_ex(k, point, result, curve->Af, curve->prime, 0,
+ private_key->heap);
+ if (err == MP_OKAY) {
+ err = mp_montgomery_setup(curve->prime, &mp);
+ }
+ if (err == MP_OKAY) {
+ /* Use constant time map if compiled in */
+ err = ecc_map_ex(result, curve->prime, mp, 1);
+ }
+ if (err == MP_OKAY) {
+ x = mp_unsigned_bin_size(curve->prime);
+ if (*outlen < x || (int)x < mp_unsigned_bin_size(result->x)) {
+ err = BUFFER_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(out, 0, x);
+ err = mp_to_unsigned_bin(result->x,out +
+ (x - mp_unsigned_bin_size(result->x)));
+ }
+ *outlen = x;
+
+ wc_ecc_del_point_h(result, private_key->heap);
+ }
+#endif
+#ifdef HAVE_ECC_CDH
+ if (k == &k_lcl)
+ mp_clear(k);
+#endif
+
+ return err;
+}
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+static int wc_ecc_shared_secret_gen_async(ecc_key* private_key,
+ ecc_point* point, byte* out, word32 *outlen,
+ ecc_curve_spec* curve)
+{
+ int err;
+
+#if defined(HAVE_CAVIUM_V) || defined(HAVE_INTEL_QA)
+#ifdef HAVE_CAVIUM_V
+ /* verify the curve is supported by hardware */
+ if (NitroxEccIsCurveSupported(private_key))
+#endif
+ {
+ word32 keySz = private_key->dp->size;
+
+ /* sync public key x/y */
+ err = wc_mp_to_bigint_sz(&private_key->k, &private_key->k.raw, keySz);
+ if (err == MP_OKAY)
+ err = wc_mp_to_bigint_sz(point->x, &point->x->raw, keySz);
+ if (err == MP_OKAY)
+ err = wc_mp_to_bigint_sz(point->y, &point->y->raw, keySz);
+ #ifdef HAVE_CAVIUM_V
+ /* allocate buffer for output */
+ if (err == MP_OKAY)
+ err = wc_ecc_alloc_mpint(private_key, &private_key->e);
+ if (err == MP_OKAY)
+ err = wc_bigint_alloc(&private_key->e->raw,
+ NitroxEccGetSize(private_key)*2);
+ if (err == MP_OKAY)
+ err = NitroxEcdh(private_key,
+ &private_key->k.raw, &point->x->raw, &point->y->raw,
+ private_key->e->raw.buf, &private_key->e->raw.len,
+ &curve->prime->raw);
+ #else
+ if (err == MP_OKAY)
+ err = wc_ecc_curve_load(private_key->dp, &curve, ECC_CURVE_FIELD_BF);
+ if (err == MP_OKAY)
+ err = IntelQaEcdh(&private_key->asyncDev,
+ &private_key->k.raw, &point->x->raw, &point->y->raw,
+ out, outlen,
+ &curve->Af->raw, &curve->Bf->raw, &curve->prime->raw,
+ private_key->dp->cofactor);
+ #endif
+ return err;
+ }
+#elif defined(WOLFSSL_ASYNC_CRYPT_TEST)
+ if (wc_AsyncTestInit(&private_key->asyncDev, ASYNC_TEST_ECC_SHARED_SEC)) {
+ WC_ASYNC_TEST* testDev = &private_key->asyncDev.test;
+ testDev->eccSharedSec.private_key = private_key;
+ testDev->eccSharedSec.public_point = point;
+ testDev->eccSharedSec.out = out;
+ testDev->eccSharedSec.outLen = outlen;
+ return WC_PENDING_E;
+ }
+#endif
+
+ /* use sync in other cases */
+ err = wc_ecc_shared_secret_gen_sync(private_key, point, out, outlen, curve);
+
+ return err;
+}
+#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_ECC */
+
+int wc_ecc_shared_secret_gen(ecc_key* private_key, ecc_point* point,
+ byte* out, word32 *outlen)
+{
+ int err;
+ DECLARE_CURVE_SPECS(curve, 2);
+
+ if (private_key == NULL || point == NULL || out == NULL ||
+ outlen == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* load curve info */
+ ALLOC_CURVE_SPECS(2);
+ err = wc_ecc_curve_load(private_key->dp, &curve,
+ (ECC_CURVE_FIELD_PRIME | ECC_CURVE_FIELD_AF));
+ if (err != MP_OKAY) {
+ FREE_CURVE_SPECS();
+ return err;
+ }
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+ if (private_key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+ err = wc_ecc_shared_secret_gen_async(private_key, point,
+ out, outlen, curve);
+ }
+ else
+#endif
+ {
+ err = wc_ecc_shared_secret_gen_sync(private_key, point,
+ out, outlen, curve);
+ }
+
+ wc_ecc_curve_free(curve);
+ FREE_CURVE_SPECS();
+
+ return err;
+}
+
+/**
+ Create an ECC shared secret between private key and public point
+ private_key The private ECC key (heap hint based on private key)
+ point The point to use (public key)
+ out [out] Destination of the shared secret
+ Conforms to EC-DH from ANSI X9.63
+ outlen [in/out] The max size and resulting size of the shared secret
+ return MP_OKAY if successful
+*/
+int wc_ecc_shared_secret_ex(ecc_key* private_key, ecc_point* point,
+ byte* out, word32 *outlen)
+{
+ int err;
+
+ if (private_key == NULL || point == NULL || out == NULL ||
+ outlen == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* type valid? */
+ if (private_key->type != ECC_PRIVATEKEY &&
+ private_key->type != ECC_PRIVATEKEY_ONLY) {
+ return ECC_BAD_ARG_E;
+ }
+
+ /* Verify domain params supplied */
+ if (wc_ecc_is_valid_idx(private_key->idx) == 0)
+ return ECC_BAD_ARG_E;
+
+ switch(private_key->state) {
+ case ECC_STATE_NONE:
+ case ECC_STATE_SHARED_SEC_GEN:
+ private_key->state = ECC_STATE_SHARED_SEC_GEN;
+
+ err = wc_ecc_shared_secret_gen(private_key, point, out, outlen);
+ if (err < 0) {
+ break;
+ }
+ FALL_THROUGH;
+
+ case ECC_STATE_SHARED_SEC_RES:
+ private_key->state = ECC_STATE_SHARED_SEC_RES;
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+ if (private_key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+ #ifdef HAVE_CAVIUM_V
+ /* verify the curve is supported by hardware */
+ if (NitroxEccIsCurveSupported(private_key)) {
+ /* copy output */
+ *outlen = private_key->dp->size;
+ XMEMCPY(out, private_key->e->raw.buf, *outlen);
+ }
+ #endif /* HAVE_CAVIUM_V */
+ }
+ #endif /* WOLFSSL_ASYNC_CRYPT */
+ err = 0;
+ break;
+
+ default:
+ err = BAD_STATE_E;
+ } /* switch */
+
+ /* if async pending then return and skip done cleanup below */
+ if (err == WC_PENDING_E) {
+ private_key->state++;
+ return err;
+ }
+
+ /* cleanup */
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+ wc_ecc_free_async(private_key);
+#endif
+ private_key->state = ECC_STATE_NONE;
+
+ return err;
}
+#endif /* !WOLFSSL_ATECC508A && !WOLFSSL_CRYPTOCELL */
+#endif /* HAVE_ECC_DHE */
+#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL)
/* return 1 if point is at infinity, 0 if not, < 0 on error */
-static int ecc_point_is_at_infinity(ecc_point* p)
+int wc_ecc_point_is_at_infinity(ecc_point* p)
{
if (p == NULL)
return BAD_FUNC_ARG;
@@ -1532,317 +3895,1461 @@ static int ecc_point_is_at_infinity(ecc_point* p)
return 0;
}
+/* generate random and ensure its greater than 0 and less than order */
+int wc_ecc_gen_k(WC_RNG* rng, int size, mp_int* k, mp_int* order)
+{
+#ifndef WC_NO_RNG
+ int err;
+ byte buf[ECC_MAXSIZE_GEN];
-int wc_ecc_make_key_ex(RNG* rng, ecc_key* key, const ecc_set_type* dp);
+ /*generate 8 extra bytes to mitigate bias from the modulo operation below*/
+ /*see section A.1.2 in 'Suite B Implementor's Guide to FIPS 186-3 (ECDSA)'*/
+ size += 8;
-/**
- Make a new ECC key
- rng An active RNG state
- keysize The keysize for the new key (in octets from 20 to 65 bytes)
- key [out] Destination of the newly created key
- return MP_OKAY if successful,
- upon error all allocated memory will be freed
-*/
-int wc_ecc_make_key(RNG* rng, int keysize, ecc_key* key)
-{
- int x, err;
+ /* make up random string */
+ err = wc_RNG_GenerateBlock(rng, buf, size);
- if (key == NULL || rng == NULL)
- return ECC_BAD_ARG_E;
+ /* load random buffer data into k */
+ if (err == 0)
+ err = mp_read_unsigned_bin(k, (byte*)buf, size);
- /* find key size */
- for (x = 0; (keysize > ecc_sets[x].size) && (ecc_sets[x].size != 0); x++)
- ;
- keysize = ecc_sets[x].size;
+ /* the key should be smaller than the order of base point */
+ if (err == MP_OKAY) {
+ if (mp_cmp(k, order) != MP_LT) {
+ err = mp_mod(k, order, k);
+ }
+ }
- if (keysize > ECC_MAXSIZE || ecc_sets[x].size == 0) {
- return BAD_FUNC_ARG;
- }
- err = wc_ecc_make_key_ex(rng, key, &ecc_sets[x]);
- key->idx = x;
+ /* quick sanity check to make sure we're not dealing with a 0 key */
+ if (err == MP_OKAY) {
+ if (mp_iszero(k) == MP_YES)
+ err = MP_ZERO_E;
+ }
- return err;
+ ForceZero(buf, ECC_MAXSIZE);
+
+ return err;
+#else
+ (void)rng;
+ (void)size;
+ (void)k;
+ (void)order;
+ return NOT_COMPILED_IN;
+#endif /* !WC_NO_RNG */
}
+#endif /* !WOLFSSL_ATECC508A && !WOLFSSL_CRYPTOCELL */
-int wc_ecc_make_key_ex(RNG* rng, ecc_key* key, const ecc_set_type* dp)
+static WC_INLINE void wc_ecc_reset(ecc_key* key)
{
- int err;
- ecc_point* base;
- mp_int prime;
- mp_int order;
-#ifdef WOLFSSL_SMALL_STACK
- byte* buf;
-#else
- byte buf[ECC_MAXSIZE];
-#endif
- int keysize;
- int po_init = 0; /* prime order Init flag for clear */
+ /* make sure required key variables are reset */
+ key->state = ECC_STATE_NONE;
+}
- if (key == NULL || rng == NULL || dp == NULL)
- return ECC_BAD_ARG_E;
-#ifdef WOLFSSL_SMALL_STACK
- buf = (byte*)XMALLOC(ECC_MAXSIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (buf == NULL)
- return MEMORY_E;
+/* create the public ECC key from a private key
+ *
+ * key an initialized private key to generate public part from
+ * curveIn [in]curve for key, can be NULL
+ * pubOut [out]ecc_point holding the public key, if NULL then public key part
+ * is cached in key instead.
+ *
+ * Note this function is local to the file because of the argument type
+ * ecc_curve_spec. Having this argument allows for not having to load the
+ * curve type multiple times when generating a key with wc_ecc_make_key().
+ *
+ * returns MP_OKAY on success
+ */
+static int wc_ecc_make_pub_ex(ecc_key* key, ecc_curve_spec* curveIn,
+ ecc_point* pubOut)
+{
+ int err = MP_OKAY;
+#ifndef WOLFSSL_ATECC508A
+#ifndef WOLFSSL_SP_MATH
+ ecc_point* base = NULL;
#endif
+ ecc_point* pub;
+ DECLARE_CURVE_SPECS(curve, ECC_CURVE_FIELD_COUNT);
+#endif /* !WOLFSSL_ATECC508A */
+
+ if (key == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+#ifndef WOLFSSL_ATECC508A
+
+ /* if ecc_point passed in then use it as output for public key point */
+ if (pubOut != NULL) {
+ pub = pubOut;
+ }
+ else {
+ /* caching public key making it a ECC_PRIVATEKEY instead of
+ ECC_PRIVATEKEY_ONLY */
+ pub = &key->pubkey;
+ key->type = ECC_PRIVATEKEY_ONLY;
+ }
- key->idx = -1;
- key->dp = dp;
- keysize = dp->size;
+ /* avoid loading the curve unless it is not passed in */
+ if (curveIn != NULL) {
+ curve = curveIn;
+ }
+ else {
+ /* load curve info */
+ if (err == MP_OKAY) {
+ ALLOC_CURVE_SPECS(ECC_CURVE_FIELD_COUNT);
+ err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ALL);
+ }
+ }
- /* allocate ram */
- base = NULL;
+ if (err == MP_OKAY) {
+ #ifndef ALT_ECC_SIZE
+ err = mp_init_multi(pub->x, pub->y, pub->z, NULL, NULL, NULL);
+ #else
+ pub->x = (mp_int*)&pub->xyz[0];
+ pub->y = (mp_int*)&pub->xyz[1];
+ pub->z = (mp_int*)&pub->xyz[2];
+ alt_fp_init(pub->x);
+ alt_fp_init(pub->y);
+ alt_fp_init(pub->z);
+ #endif
+ }
- /* make up random string */
- err = wc_RNG_GenerateBlock(rng, buf, keysize);
- if (err == 0)
- buf[0] |= 0x0c;
- /* setup the key variables */
- if (err == 0) {
-#ifndef ALT_ECC_SIZE
- err = mp_init_multi(key->pubkey.x, key->pubkey.y, key->pubkey.z,
- &key->k, &prime, &order);
-#else
- key->pubkey.x = (mp_int*)&key->pubkey.xyz[0];
- key->pubkey.y = (mp_int*)&key->pubkey.xyz[1];
- key->pubkey.z = (mp_int*)&key->pubkey.xyz[2];
- alt_fp_init(key->pubkey.x);
- alt_fp_init(key->pubkey.y);
- alt_fp_init(key->pubkey.z);
- err = mp_init_multi(&key->k, &prime, &order, NULL, NULL, NULL);
+ if (err != MP_OKAY) {
+ }
+ else
+#ifdef WOLFSSL_HAVE_SP_ECC
+#ifndef WOLFSSL_SP_NO_256
+ if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1) {
+ err = sp_ecc_mulmod_base_256(&key->k, pub, 1, key->heap);
+ }
+ else
#endif
- if (err != MP_OKAY)
- err = MEMORY_E;
- else
- po_init = 1;
- }
+#ifdef WOLFSSL_SP_384
+ if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) {
+ err = sp_ecc_mulmod_base_384(&key->k, pub, 1, key->heap);
+ }
+ else
+#endif
+#endif
+#ifdef WOLFSSL_SP_MATH
+ err = WC_KEY_SIZE_E;
+#else
+ {
+ mp_digit mp;
- if (err == MP_OKAY) {
- base = ecc_new_point();
- if (base == NULL)
- err = MEMORY_E;
- }
+ base = wc_ecc_new_point_h(key->heap);
+ if (base == NULL)
+ err = MEMORY_E;
+ /* read in the x/y for this key */
+ if (err == MP_OKAY)
+ err = mp_copy(curve->Gx, base->x);
+ if (err == MP_OKAY)
+ err = mp_copy(curve->Gy, base->y);
+ if (err == MP_OKAY)
+ err = mp_set(base->z, 1);
- /* read in the specs for this key */
- if (err == MP_OKAY)
- err = mp_read_radix(&prime, (char *)key->dp->prime, 16);
- if (err == MP_OKAY)
- err = mp_read_radix(&order, (char *)key->dp->order, 16);
- if (err == MP_OKAY)
- err = mp_read_radix(base->x, (char *)key->dp->Gx, 16);
- if (err == MP_OKAY)
- err = mp_read_radix(base->y, (char *)key->dp->Gy, 16);
-
- if (err == MP_OKAY)
- mp_set(base->z, 1);
- if (err == MP_OKAY)
- err = mp_read_unsigned_bin(&key->k, (byte*)buf, keysize);
-
- /* the key should be smaller than the order of base point */
- if (err == MP_OKAY) {
- if (mp_cmp(&key->k, &order) != MP_LT)
- err = mp_mod(&key->k, &order, &key->k);
- }
- /* make the public key */
- if (err == MP_OKAY)
- err = ecc_mulmod(&key->k, base, &key->pubkey, &prime, 1);
+ /* make the public key */
+ if (err == MP_OKAY) {
+ /* Map in a separate call as this should be constant time */
+ err = wc_ecc_mulmod_ex(&key->k, base, pub, curve->Af, curve->prime,
+ 0, key->heap);
+ if (err == MP_MEM) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ err = mp_montgomery_setup(curve->prime, &mp);
+ }
+ if (err == MP_OKAY) {
+ /* Use constant time map if compiled in */
+ err = ecc_map_ex(pub, curve->prime, mp, 1);
+ }
+
+ wc_ecc_del_point_h(base, key->heap);
+ }
+#endif
#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
- /* validate the public key, order * pubkey = point at infinity */
- if (err == MP_OKAY)
- err = ecc_check_pubkey_order(key, &prime, &order);
+ /* validate the public key, order * pubkey = point at infinity */
+ if (err == MP_OKAY)
+ err = ecc_check_pubkey_order(key, pub, curve->Af, curve->prime,
+ curve->order);
#endif /* WOLFSSL_VALIDATE_KEYGEN */
- if (err == MP_OKAY)
- key->type = ECC_PRIVATEKEY;
+ if (err != MP_OKAY) {
+ /* clean up if failed */
+ #ifndef ALT_ECC_SIZE
+ mp_clear(pub->x);
+ mp_clear(pub->y);
+ mp_clear(pub->z);
+ #endif
+ }
- if (err != MP_OKAY) {
- /* clean up */
- mp_clear(key->pubkey.x);
- mp_clear(key->pubkey.y);
- mp_clear(key->pubkey.z);
- mp_clear(&key->k);
+ /* free up local curve */
+ if (curveIn == NULL) {
+ wc_ecc_curve_free(curve);
+ FREE_CURVE_SPECS();
+ }
+
+#else
+ (void)curveIn;
+ err = NOT_COMPILED_IN;
+#endif /* WOLFSSL_ATECC508A */
+
+ /* change key state if public part is cached */
+ if (key->type == ECC_PRIVATEKEY_ONLY && pubOut == NULL) {
+ key->type = ECC_PRIVATEKEY;
+ }
+
+ return err;
+}
+
+
+/* create the public ECC key from a private key
+ *
+ * key an initialized private key to generate public part from
+ * pubOut [out]ecc_point holding the public key, if NULL then public key part
+ * is cached in key instead.
+ *
+ *
+ * returns MP_OKAY on success
+ */
+int wc_ecc_make_pub(ecc_key* key, ecc_point* pubOut)
+{
+ WOLFSSL_ENTER("wc_ecc_make_pub");
+
+ return wc_ecc_make_pub_ex(key, NULL, pubOut);
+}
+
+
+WOLFSSL_ABI
+int wc_ecc_make_key_ex(WC_RNG* rng, int keysize, ecc_key* key, int curve_id)
+{
+ int err;
+#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL)
+#ifndef WOLFSSL_SP_MATH
+ DECLARE_CURVE_SPECS(curve, ECC_CURVE_FIELD_COUNT);
+#endif
+#endif /* !WOLFSSL_ATECC508A */
+#if defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_ATECC508A)
+ const CRYS_ECPKI_Domain_t* pDomain;
+ CRYS_ECPKI_KG_TempData_t tempBuff;
+ CRYS_ECPKI_KG_FipsContext_t fipsCtx;
+ byte ucompressed_key[ECC_MAX_CRYPTO_HW_SIZE*2 + 1];
+ word32 raw_size = 0;
+#endif
+ if (key == NULL || rng == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* make sure required variables are reset */
+ wc_ecc_reset(key);
+
+ err = wc_ecc_set_curve(key, keysize, curve_id);
+ if (err != 0) {
+ return err;
+ }
+
+#ifdef WOLF_CRYPTO_CB
+ if (key->devId != INVALID_DEVID) {
+ err = wc_CryptoCb_MakeEccKey(rng, keysize, key, curve_id);
+ if (err != CRYPTOCB_UNAVAILABLE)
+ return err;
+ /* fall-through when unavailable */
+ }
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+ if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+ #ifdef HAVE_CAVIUM
+ /* TODO: Not implemented */
+ #elif defined(HAVE_INTEL_QA)
+ /* TODO: Not implemented */
+ #else
+ if (wc_AsyncTestInit(&key->asyncDev, ASYNC_TEST_ECC_MAKE)) {
+ WC_ASYNC_TEST* testDev = &key->asyncDev.test;
+ testDev->eccMake.rng = rng;
+ testDev->eccMake.key = key;
+ testDev->eccMake.size = keysize;
+ testDev->eccMake.curve_id = curve_id;
+ return WC_PENDING_E;
+ }
+ #endif
+ }
+#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_ECC */
+
+#ifdef WOLFSSL_ATECC508A
+ if (key->dp->id == ECC_SECP256R1) {
+ key->type = ECC_PRIVATEKEY;
+ key->slot = atmel_ecc_alloc(ATMEL_SLOT_ECDHE);
+ err = atmel_ecc_create_key(key->slot, key->pubkey_raw);
+
+ /* populate key->pubkey */
+ if (err == 0
+ #ifdef ALT_ECC_SIZE
+ && key->pubkey.x
+ #endif
+ ) {
+ err = mp_read_unsigned_bin(key->pubkey.x, key->pubkey_raw,
+ ECC_MAX_CRYPTO_HW_SIZE);
+ }
+ if (err == 0
+ #ifdef ALT_ECC_SIZE
+ && key->pubkey.y
+ #endif
+ ) {
+ err = mp_read_unsigned_bin(key->pubkey.y,
+ key->pubkey_raw + ECC_MAX_CRYPTO_HW_SIZE,
+ ECC_MAX_CRYPTO_HW_SIZE);
+ }
}
- ecc_del_point(base);
- if (po_init) {
- mp_clear(&prime);
- mp_clear(&order);
+ else {
+ err = NOT_COMPILED_IN;
}
+#elif defined(WOLFSSL_CRYPTOCELL)
- ForceZero(buf, ECC_MAXSIZE);
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(buf, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ pDomain = CRYS_ECPKI_GetEcDomain(cc310_mapCurve(curve_id));
+ raw_size = (word32)(key->dp->size)*2 + 1;
+
+ /* generate first key pair */
+ err = CRYS_ECPKI_GenKeyPair(&wc_rndState,
+ wc_rndGenVectFunc,
+ pDomain,
+ &key->ctx.privKey,
+ &key->ctx.pubKey,
+ &tempBuff,
+ &fipsCtx);
+
+ if (err != SA_SILIB_RET_OK){
+ WOLFSSL_MSG("CRYS_ECPKI_GenKeyPair for key pair failed");
+ return err;
+ }
+ key->type = ECC_PRIVATEKEY;
+
+ err = CRYS_ECPKI_ExportPublKey(&key->ctx.pubKey,
+ CRYS_EC_PointUncompressed,
+ &ucompressed_key[0],
+ &raw_size);
+
+ if (err == SA_SILIB_RET_OK && key->pubkey.x && key->pubkey.y) {
+ err = mp_read_unsigned_bin(key->pubkey.x,
+ &ucompressed_key[1], key->dp->size);
+ if (err == MP_OKAY) {
+ err = mp_read_unsigned_bin(key->pubkey.y,
+ &ucompressed_key[1+key->dp->size],key->dp->size);
+ }
+ }
+ raw_size = key->dp->size;
+ if (err == MP_OKAY) {
+ err = CRYS_ECPKI_ExportPrivKey(&key->ctx.privKey,
+ ucompressed_key,
+ &raw_size);
+ }
+
+ if (err == SA_SILIB_RET_OK) {
+ err = mp_read_unsigned_bin(&key->k, ucompressed_key, raw_size);
+ }
+
+#else
+
+#ifdef WOLFSSL_HAVE_SP_ECC
+#ifndef WOLFSSL_SP_NO_256
+ if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1) {
+ err = sp_ecc_make_key_256(rng, &key->k, &key->pubkey, key->heap);
+ if (err == MP_OKAY) {
+ key->type = ECC_PRIVATEKEY;
+ }
+ }
+ else
+#endif
+#ifdef WOLFSSL_SP_384
+ if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) {
+ err = sp_ecc_make_key_384(rng, &key->k, &key->pubkey, key->heap);
+ if (err == MP_OKAY) {
+ key->type = ECC_PRIVATEKEY;
+ }
+ }
+ else
#endif
+#endif /* WOLFSSL_HAVE_SP_ECC */
- return err;
+ { /* software key gen */
+#ifdef WOLFSSL_SP_MATH
+ err = WC_KEY_SIZE_E;
+#else
+
+ /* setup the key variables */
+ err = mp_init(&key->k);
+
+ /* load curve info */
+ if (err == MP_OKAY) {
+ ALLOC_CURVE_SPECS(ECC_CURVE_FIELD_COUNT);
+ err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ALL);
+ }
+
+ /* generate k */
+ if (err == MP_OKAY)
+ err = wc_ecc_gen_k(rng, key->dp->size, &key->k, curve->order);
+
+ /* generate public key from k */
+ if (err == MP_OKAY)
+ err = wc_ecc_make_pub_ex(key, curve, NULL);
+
+ if (err == MP_OKAY)
+ key->type = ECC_PRIVATEKEY;
+
+ /* cleanup these on failure case only */
+ if (err != MP_OKAY) {
+ /* clean up */
+ mp_forcezero(&key->k);
+ }
+
+ /* cleanup allocations */
+ wc_ecc_curve_free(curve);
+ FREE_CURVE_SPECS();
+#endif /* WOLFSSL_SP_MATH */
+ }
+
+#ifdef HAVE_WOLF_BIGINT
+ if (err == MP_OKAY)
+ err = wc_mp_to_bigint(&key->k, &key->k.raw);
+ if (err == MP_OKAY)
+ err = wc_mp_to_bigint(key->pubkey.x, &key->pubkey.x->raw);
+ if (err == MP_OKAY)
+ err = wc_mp_to_bigint(key->pubkey.y, &key->pubkey.y->raw);
+ if (err == MP_OKAY)
+ err = wc_mp_to_bigint(key->pubkey.z, &key->pubkey.z->raw);
+#endif
+
+#endif /* WOLFSSL_ATECC508A */
+
+ return err;
}
+#ifdef ECC_DUMP_OID
+/* Optional dump of encoded OID for adding new curves */
+static int mOidDumpDone;
+static void wc_ecc_dump_oids(void)
+{
+ int x;
-/* Setup dynamic pointers is using normal math for proper freeing */
-int wc_ecc_init(ecc_key* key)
+ if (mOidDumpDone) {
+ return;
+ }
+
+ /* find matching OID sum (based on encoded value) */
+ for (x = 0; ecc_sets[x].size != 0; x++) {
+ int i;
+ byte* oid;
+ word32 oidSz, sum = 0;
+
+ printf("ECC %s (%d):\n", ecc_sets[x].name, x);
+
+ #ifdef HAVE_OID_ENCODING
+ byte oidEnc[ECC_MAX_OID_LEN];
+
+ oid = oidEnc;
+ oidSz = ECC_MAX_OID_LEN;
+
+ printf("OID: ");
+ for (i = 0; i < (int)ecc_sets[x].oidSz; i++) {
+ printf("%d.", ecc_sets[x].oid[i]);
+ }
+ printf("\n");
+
+ EncodeObjectId(ecc_sets[x].oid, ecc_sets[x].oidSz, oidEnc, &oidSz);
+ #else
+ oid = (byte*)ecc_sets[x].oid;
+ oidSz = ecc_sets[x].oidSz;
+ #endif
+
+ printf("OID Encoded: ");
+ for (i = 0; i < (int)oidSz; i++) {
+ printf("0x%02X,", oid[i]);
+ }
+ printf("\n");
+
+ for (i = 0; i < (int)oidSz; i++) {
+ sum += oid[i];
+ }
+ printf("Sum: %d\n", sum);
+
+ /* validate sum */
+ if (ecc_sets[x].oidSum != sum) {
+ printf(" Sum %d Not Valid!\n", ecc_sets[x].oidSum);
+ }
+ }
+ mOidDumpDone = 1;
+}
+#endif /* ECC_DUMP_OID */
+
+
+WOLFSSL_ABI
+ecc_key* wc_ecc_key_new(void* heap)
+{
+ ecc_key* key;
+
+ key = (ecc_key*)XMALLOC(sizeof(ecc_key), heap, DYNAMIC_TYPE_ECC);
+ if (key) {
+ if (wc_ecc_init_ex(key, heap, INVALID_DEVID) != 0) {
+ XFREE(key, heap, DYNAMIC_TYPE_ECC);
+ key = NULL;
+ }
+ }
+
+ return key;
+}
+
+
+WOLFSSL_ABI
+void wc_ecc_key_free(ecc_key* key)
+{
+ if (key) {
+ void* heap = key->heap;
+
+ wc_ecc_free(key);
+ ForceZero(key, sizeof(ecc_key));
+ XFREE(key, heap, DYNAMIC_TYPE_ECC);
+ (void)heap;
+ }
+}
+
+
+/**
+ Make a new ECC key
+ rng An active RNG state
+ keysize The keysize for the new key (in octets from 20 to 65 bytes)
+ key [out] Destination of the newly created key
+ return MP_OKAY if successful,
+ upon error all allocated memory will be freed
+ */
+int wc_ecc_make_key(WC_RNG* rng, int keysize, ecc_key* key)
+{
+ return wc_ecc_make_key_ex(rng, keysize, key, ECC_CURVE_DEF);
+}
+
+/* Setup dynamic pointers if using normal math for proper freeing */
+WOLFSSL_ABI
+int wc_ecc_init_ex(ecc_key* key, void* heap, int devId)
{
- (void)key;
+ int ret = 0;
-#ifndef USE_FAST_MATH
- key->pubkey.x->dp = NULL;
- key->pubkey.y->dp = NULL;
- key->pubkey.z->dp = NULL;
+ if (key == NULL) {
+ return BAD_FUNC_ARG;
+ }
- key->k.dp = NULL;
+#ifdef ECC_DUMP_OID
+ wc_ecc_dump_oids();
#endif
-#ifdef ALT_ECC_SIZE
- if (mp_init(&key->k) != MP_OKAY)
- return MEMORY_E;
+ XMEMSET(key, 0, sizeof(ecc_key));
+ key->state = ECC_STATE_NONE;
+#if defined(PLUTON_CRYPTO_ECC) || defined(WOLF_CRYPTO_CB)
+ key->devId = devId;
+#else
+ (void)devId;
+#endif
+
+#ifdef WOLFSSL_ATECC508A
+ key->slot = ATECC_INVALID_SLOT;
+#else
+#ifdef ALT_ECC_SIZE
key->pubkey.x = (mp_int*)&key->pubkey.xyz[0];
key->pubkey.y = (mp_int*)&key->pubkey.xyz[1];
key->pubkey.z = (mp_int*)&key->pubkey.xyz[2];
alt_fp_init(key->pubkey.x);
alt_fp_init(key->pubkey.y);
alt_fp_init(key->pubkey.z);
+ ret = mp_init(&key->k);
+ if (ret != MP_OKAY) {
+ return MEMORY_E;
+ }
+#else
+ ret = mp_init_multi(&key->k, key->pubkey.x, key->pubkey.y, key->pubkey.z,
+ NULL, NULL);
+ if (ret != MP_OKAY) {
+ return MEMORY_E;
+ }
+#endif /* ALT_ECC_SIZE */
+#endif /* WOLFSSL_ATECC508A */
+
+#ifdef WOLFSSL_HEAP_TEST
+ key->heap = (void*)WOLFSSL_HEAP_TEST;
+#else
+ key->heap = heap;
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+ /* handle as async */
+ ret = wolfAsync_DevCtxInit(&key->asyncDev, WOLFSSL_ASYNC_MARKER_ECC,
+ key->heap, devId);
+#endif
+
+#if defined(WOLFSSL_DSP)
+ key->handle = -1;
+#endif
+ return ret;
+}
+
+int wc_ecc_init(ecc_key* key)
+{
+ return wc_ecc_init_ex(key, NULL, INVALID_DEVID);
+}
+
+#ifdef HAVE_PKCS11
+int wc_ecc_init_id(ecc_key* key, unsigned char* id, int len, void* heap,
+ int devId)
+{
+ int ret = 0;
+
+ if (key == NULL)
+ ret = BAD_FUNC_ARG;
+ if (ret == 0 && (len < 0 || len > ECC_MAX_ID_LEN))
+ ret = BUFFER_E;
+
+ if (ret == 0)
+ ret = wc_ecc_init_ex(key, heap, devId);
+
+ if (ret == 0 && id != NULL && len != 0) {
+ XMEMCPY(key->id, id, len);
+ key->idLen = len;
+ }
+
+ return ret;
+}
#endif
+int wc_ecc_set_flags(ecc_key* key, word32 flags)
+{
+ if (key == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ key->flags |= flags;
return 0;
}
+static int wc_ecc_get_curve_order_bit_count(const ecc_set_type* dp)
+{
+ int err;
+ word32 orderBits;
+ DECLARE_CURVE_SPECS(curve, 1);
+
+ ALLOC_CURVE_SPECS(1);
+ err = wc_ecc_curve_load(dp, &curve, ECC_CURVE_FIELD_ORDER);
+ if (err != 0) {
+ FREE_CURVE_SPECS();
+ return err;
+ }
+ orderBits = mp_count_bits(curve->order);
+
+ wc_ecc_curve_free(curve);
+ FREE_CURVE_SPECS();
+ return (int)orderBits;
+}
+
+#ifdef HAVE_ECC_SIGN
+
+#ifndef NO_ASN
+
+#if defined(WOLFSSL_ATECC508A) || defined(PLUTON_CRYPTO_ECC) || \
+ defined(WOLFSSL_CRYPTOCELL)
+static int wc_ecc_sign_hash_hw(const byte* in, word32 inlen,
+ mp_int* r, mp_int* s, byte* out, word32 *outlen, WC_RNG* rng,
+ ecc_key* key)
+{
+ int err;
+#ifdef PLUTON_CRYPTO_ECC
+ if (key->devId != INVALID_DEVID) /* use hardware */
+#endif
+ {
+ #if defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_ATECC508A)
+ CRYS_ECDSA_SignUserContext_t sigCtxTemp;
+ word32 raw_sig_size = *outlen;
+ word32 msgLenInBytes = inlen;
+ CRYS_ECPKI_HASH_OpMode_t hash_mode;
+ #endif
+ word32 keysize = (word32)key->dp->size;
+ word32 orderBits = wc_ecc_get_curve_order_bit_count(key->dp);
+
+ /* Check args */
+ if (keysize > ECC_MAX_CRYPTO_HW_SIZE || *outlen < keysize*2) {
+ return ECC_BAD_ARG_E;
+ }
+
+ #if defined(WOLFSSL_ATECC508A)
+ key->slot = atmel_ecc_alloc(ATMEL_SLOT_DEVICE);
+ if (key->slot == ATECC_INVALID_SLOT) {
+ return ECC_BAD_ARG_E;
+ }
+
+ /* Sign: Result is 32-bytes of R then 32-bytes of S */
+ err = atmel_ecc_sign(key->slot, in, out);
+ if (err != 0) {
+ return err;
+ }
+ #elif defined(PLUTON_CRYPTO_ECC)
+ {
+ /* if the input is larger than curve order, we must truncate */
+ if ((inlen * WOLFSSL_BIT_SIZE) > orderBits) {
+ inlen = (orderBits + WOLFSSL_BIT_SIZE - 1) / WOLFSSL_BIT_SIZE;
+ }
+
+ /* perform ECC sign */
+ word32 raw_sig_size = *outlen;
+ err = Crypto_EccSign(in, inlen, out, &raw_sig_size);
+ if (err != CRYPTO_RES_SUCCESS || raw_sig_size != keysize*2){
+ return BAD_COND_E;
+ }
+ }
+ #elif defined(WOLFSSL_CRYPTOCELL)
+
+ hash_mode = cc310_hashModeECC(msgLenInBytes);
+ if (hash_mode == CRYS_ECPKI_HASH_OpModeLast) {
+ hash_mode = cc310_hashModeECC(keysize);
+ hash_mode = CRYS_ECPKI_HASH_SHA256_mode;
+ }
+
+ /* truncate if hash is longer than key size */
+ if (msgLenInBytes > keysize) {
+ msgLenInBytes = keysize;
+ }
+
+ /* create signature from an input buffer using a private key*/
+ err = CRYS_ECDSA_Sign(&wc_rndState,
+ wc_rndGenVectFunc,
+ &sigCtxTemp,
+ &key->ctx.privKey,
+ hash_mode,
+ (byte*)in,
+ msgLenInBytes,
+ out,
+ &raw_sig_size);
+
+ if (err != SA_SILIB_RET_OK){
+ WOLFSSL_MSG("CRYS_ECDSA_Sign failed");
+ return err;
+ }
+ #endif
+
+ /* Load R and S */
+ err = mp_read_unsigned_bin(r, &out[0], keysize);
+ if (err != MP_OKAY) {
+ return err;
+ }
+ err = mp_read_unsigned_bin(s, &out[keysize], keysize);
+ if (err != MP_OKAY) {
+ return err;
+ }
+
+ /* Check for zeros */
+ if (mp_iszero(r) || mp_iszero(s)) {
+ return MP_ZERO_E;
+ }
+ }
+#ifdef PLUTON_CRYPTO_ECC
+ else {
+ err = wc_ecc_sign_hash_ex(in, inlen, rng, key, r, s);
+ }
+#endif
+ (void)rng;
+
+ return err;
+}
+#endif /* WOLFSSL_ATECC508A || PLUTON_CRYPTO_ECC || WOLFSSL_CRYPTOCELL */
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+static int wc_ecc_sign_hash_async(const byte* in, word32 inlen, byte* out,
+ word32 *outlen, WC_RNG* rng, ecc_key* key)
+{
+ int err;
+ mp_int *r = NULL, *s = NULL;
+
+ if (in == NULL || out == NULL || outlen == NULL || key == NULL ||
+ rng == NULL) {
+ return ECC_BAD_ARG_E;
+ }
+
+ err = wc_ecc_alloc_async(key);
+ if (err != 0) {
+ return err;
+ }
+ r = key->r;
+ s = key->s;
+
+ switch(key->state) {
+ case ECC_STATE_NONE:
+ case ECC_STATE_SIGN_DO:
+ key->state = ECC_STATE_SIGN_DO;
+
+ if ((err = mp_init_multi(r, s, NULL, NULL, NULL, NULL)) != MP_OKAY){
+ break;
+ }
+
+ err = wc_ecc_sign_hash_ex(in, inlen, rng, key, r, s);
+ if (err < 0) {
+ break;
+ }
+
+ FALL_THROUGH;
+
+ case ECC_STATE_SIGN_ENCODE:
+ key->state = ECC_STATE_SIGN_ENCODE;
+
+ if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+ #ifdef HAVE_CAVIUM_V
+ /* Nitrox requires r and s in sep buffer, so split it */
+ NitroxEccRsSplit(key, &r->raw, &s->raw);
+ #endif
+ #ifndef WOLFSSL_ASYNC_CRYPT_TEST
+ /* only do this if not simulator, since it overwrites result */
+ wc_bigint_to_mp(&r->raw, r);
+ wc_bigint_to_mp(&s->raw, s);
+ #endif
+ }
+
+ /* encoded with DSA header */
+ err = StoreECC_DSA_Sig(out, outlen, r, s);
+
+ /* done with R/S */
+ mp_clear(r);
+ mp_clear(s);
+ break;
+
+ default:
+ err = BAD_STATE_E;
+ break;
+ }
+
+ /* if async pending then return and skip done cleanup below */
+ if (err == WC_PENDING_E) {
+ key->state++;
+ return err;
+ }
+
+ /* cleanup */
+ wc_ecc_free_async(key);
+ key->state = ECC_STATE_NONE;
+
+ return err;
+}
+#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_ECC */
+
+/**
+ Sign a message digest
+ in The message digest to sign
+ inlen The length of the digest
+ out [out] The destination for the signature
+ outlen [in/out] The max size and resulting size of the signature
+ key A private ECC key
+ return MP_OKAY if successful
+ */
+WOLFSSL_ABI
+int wc_ecc_sign_hash(const byte* in, word32 inlen, byte* out, word32 *outlen,
+ WC_RNG* rng, ecc_key* key)
+{
+ int err;
+#if !defined(WOLFSSL_ASYNC_CRYPT) || !defined(WC_ASYNC_ENABLE_ECC)
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int *r = NULL, *s = NULL;
+#else
+ mp_int r[1], s[1];
+#endif
+#endif
+
+ if (in == NULL || out == NULL || outlen == NULL || key == NULL ||
+ rng == NULL) {
+ return ECC_BAD_ARG_E;
+ }
+
+#ifdef WOLF_CRYPTO_CB
+ if (key->devId != INVALID_DEVID) {
+ err = wc_CryptoCb_EccSign(in, inlen, out, outlen, rng, key);
+ if (err != CRYPTOCB_UNAVAILABLE)
+ return err;
+ /* fall-through when unavailable */
+ }
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+ /* handle async cases */
+ err = wc_ecc_sign_hash_async(in, inlen, out, outlen, rng, key);
+#else
+
+#ifdef WOLFSSL_SMALL_STACK
+ r = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC);
+ if (r == NULL)
+ return MEMORY_E;
+ s = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC);
+ if (s == NULL) {
+ XFREE(r, key->heap, DYNAMIC_TYPE_ECC);
+ return MEMORY_E;
+ }
+#endif
+ XMEMSET(r, 0, sizeof(mp_int));
+ XMEMSET(s, 0, sizeof(mp_int));
+
+ if ((err = mp_init_multi(r, s, NULL, NULL, NULL, NULL)) != MP_OKAY){
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(s, key->heap, DYNAMIC_TYPE_ECC);
+ XFREE(r, key->heap, DYNAMIC_TYPE_ECC);
+ #endif
+ return err;
+ }
+
+/* hardware crypto */
+#if defined(WOLFSSL_ATECC508A) || defined(PLUTON_CRYPTO_ECC) || defined(WOLFSSL_CRYPTOCELL)
+ err = wc_ecc_sign_hash_hw(in, inlen, r, s, out, outlen, rng, key);
+#else
+ err = wc_ecc_sign_hash_ex(in, inlen, rng, key, r, s);
+#endif
+ if (err < 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(s, key->heap, DYNAMIC_TYPE_ECC);
+ XFREE(r, key->heap, DYNAMIC_TYPE_ECC);
+ #endif
+ return err;
+ }
+
+ /* encoded with DSA header */
+ err = StoreECC_DSA_Sig(out, outlen, r, s);
+
+ /* cleanup */
+ mp_clear(r);
+ mp_clear(s);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(s, key->heap, DYNAMIC_TYPE_ECC);
+ XFREE(r, key->heap, DYNAMIC_TYPE_ECC);
+#endif
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+ return err;
+}
+#endif /* !NO_ASN */
+
+#if defined(WOLFSSL_STM32_PKA)
+int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng,
+ ecc_key* key, mp_int *r, mp_int *s)
+{
+ return stm32_ecc_sign_hash_ex(in, inlen, rng, key, r, s);
+}
+#elif !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL)
/**
Sign a message digest
in The message digest to sign
inlen The length of the digest
- out [out] The destination for the signature
- outlen [in/out] The max size and resulting size of the signature
key A private ECC key
+ r [out] The destination for r component of the signature
+ s [out] The destination for s component of the signature
return MP_OKAY if successful
*/
-int wc_ecc_sign_hash(const byte* in, word32 inlen, byte* out, word32 *outlen,
- RNG* rng, ecc_key* key)
+int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng,
+ ecc_key* key, mp_int *r, mp_int *s)
{
- mp_int r;
- mp_int s;
- mp_int e;
- mp_int p;
- int err;
+ int err = 0;
+#ifndef WOLFSSL_SP_MATH
+ mp_int* e;
+#if (!defined(WOLFSSL_ASYNC_CRYPT) || !defined(HAVE_CAVIUM_V)) && \
+ !defined(WOLFSSL_SMALL_STACK)
+ mp_int e_lcl;
+#endif
- if (in == NULL || out == NULL || outlen == NULL || key == NULL || rng ==NULL)
+#if defined(WOLFSSL_ECDSA_SET_K) || \
+ (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) && \
+ (defined(HAVE_CAVIUM_V) || defined(HAVE_INTEL_QA)))
+ DECLARE_CURVE_SPECS(curve, ECC_CURVE_FIELD_COUNT);
+#else
+ DECLARE_CURVE_SPECS(curve, 1);
+#endif
+#endif /* !WOLFSSL_SP_MATH */
+
+ if (in == NULL || r == NULL || s == NULL || key == NULL || rng == NULL) {
return ECC_BAD_ARG_E;
+ }
/* is this a private key? */
- if (key->type != ECC_PRIVATEKEY) {
+ if (key->type != ECC_PRIVATEKEY && key->type != ECC_PRIVATEKEY_ONLY) {
return ECC_BAD_ARG_E;
}
-
+
/* is the IDX valid ? */
- if (ecc_is_valid_idx(key->idx) != 1) {
+ if (wc_ecc_is_valid_idx(key->idx) != 1) {
return ECC_BAD_ARG_E;
}
+#ifdef WOLFSSL_SP_MATH
+#ifndef WOLFSSL_SP_NO_256
+ if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1) {
+ #ifndef WOLFSSL_ECDSA_SET_K
+ return sp_ecc_sign_256(in, inlen, rng, &key->k, r, s, NULL, key->heap);
+ #else
+ return sp_ecc_sign_256(in, inlen, rng, &key->k, r, s, key->sign_k,
+ key->heap);
+ #endif
+ }
+#endif
+#ifdef WOLFSSL_SP_384
+ if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) {
+ #ifndef WOLFSSL_ECDSA_SET_K
+ return sp_ecc_sign_384(in, inlen, rng, &key->k, r, s, NULL, key->heap);
+ #else
+ return sp_ecc_sign_384(in, inlen, rng, &key->k, r, s, key->sign_k,
+ key->heap);
+ #endif
+ }
+#endif
+ return WC_KEY_SIZE_E;
+#else
+#ifdef WOLFSSL_HAVE_SP_ECC
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+ if (key->asyncDev.marker != WOLFSSL_ASYNC_MARKER_ECC)
+ #endif
+ {
+#ifndef WOLFSSL_SP_NO_256
+ if (key->idx != ECC_CUSTOM_IDX &&
+ ecc_sets[key->idx].id == ECC_SECP256R1) {
+ #ifndef WOLFSSL_ECDSA_SET_K
+ return sp_ecc_sign_256(in, inlen, rng, &key->k, r, s, NULL,
+ key->heap);
+ #else
+ return sp_ecc_sign_256(in, inlen, rng, &key->k, r, s, key->sign_k,
+ key->heap);
+ #endif
+ }
+#endif
+#ifdef WOLFSSL_SP_384
+ if (key->idx != ECC_CUSTOM_IDX &&
+ ecc_sets[key->idx].id == ECC_SECP384R1) {
+ #ifndef WOLFSSL_ECDSA_SET_K
+ return sp_ecc_sign_384(in, inlen, rng, &key->k, r, s, NULL,
+ key->heap);
+ #else
+ return sp_ecc_sign_384(in, inlen, rng, &key->k, r, s, key->sign_k,
+ key->heap);
+ #endif
+ }
+#endif
+ }
+#endif /* WOLFSSL_HAVE_SP_ECC */
+
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) && \
+ defined(WOLFSSL_ASYNC_CRYPT_TEST)
+ if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+ if (wc_AsyncTestInit(&key->asyncDev, ASYNC_TEST_ECC_SIGN)) {
+ WC_ASYNC_TEST* testDev = &key->asyncDev.test;
+ testDev->eccSign.in = in;
+ testDev->eccSign.inSz = inlen;
+ testDev->eccSign.rng = rng;
+ testDev->eccSign.key = key;
+ testDev->eccSign.r = r;
+ testDev->eccSign.s = s;
+ return WC_PENDING_E;
+ }
+ }
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM_V)
+ err = wc_ecc_alloc_mpint(key, &key->e);
+ if (err != 0) {
+ return err;
+ }
+ e = key->e;
+#elif !defined(WOLFSSL_SMALL_STACK)
+ e = &e_lcl;
+#else
+ e = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC);
+ if (e == NULL) {
+ return MEMORY_E;
+ }
+#endif
+
/* get the hash and load it as a bignum into 'e' */
/* init the bignums */
- if ((err = mp_init_multi(&r, &s, &p, &e, NULL, NULL)) != MP_OKAY) {
+ if ((err = mp_init(e)) != MP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(e, key->heap, DYNAMIC_TYPE_ECC);
+ #endif
return err;
}
- err = mp_read_radix(&p, (char *)key->dp->order, 16);
+ /* load curve info */
+#if defined(WOLFSSL_ECDSA_SET_K)
+ ALLOC_CURVE_SPECS(ECC_CURVE_FIELD_COUNT);
+ err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ALL);
+#else
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) && \
+ (defined(HAVE_CAVIUM_V) || defined(HAVE_INTEL_QA))
+ if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+ ALLOC_CURVE_SPECS(ECC_CURVE_FIELD_COUNT);
+ err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ALL);
+ }
+ else
+ #endif
+ {
+ ALLOC_CURVE_SPECS(1);
+ err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ORDER);
+ }
+#endif
+
+ /* load digest into e */
if (err == MP_OKAY) {
/* we may need to truncate if hash is longer than key size */
- word32 orderBits = mp_count_bits(&p);
+ word32 orderBits = mp_count_bits(curve->order);
/* truncate down to byte size, may be all that's needed */
- if ( (WOLFSSL_BIT_SIZE * inlen) > orderBits)
- inlen = (orderBits + WOLFSSL_BIT_SIZE - 1)/WOLFSSL_BIT_SIZE;
- err = mp_read_unsigned_bin(&e, (byte*)in, inlen);
+ if ((WOLFSSL_BIT_SIZE * inlen) > orderBits)
+ inlen = (orderBits + WOLFSSL_BIT_SIZE - 1) / WOLFSSL_BIT_SIZE;
+ err = mp_read_unsigned_bin(e, (byte*)in, inlen);
/* may still need bit truncation too */
if (err == MP_OKAY && (WOLFSSL_BIT_SIZE * inlen) > orderBits)
- mp_rshb(&e, WOLFSSL_BIT_SIZE - (orderBits & 0x7));
+ mp_rshb(e, WOLFSSL_BIT_SIZE - (orderBits & 0x7));
}
/* make up a key and export the public copy */
if (err == MP_OKAY) {
- int loop_check = 0;
- ecc_key pubkey;
- wc_ecc_init(&pubkey);
- for (;;) {
- if (++loop_check > 64) {
- err = RNG_FAILURE_E;
- break;
+ int loop_check = 0;
+ #ifdef WOLFSSL_SMALL_STACK
+ ecc_key* pubkey;
+ #else
+ ecc_key pubkey[1];
+ #endif
+
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+ if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+ #if defined(HAVE_CAVIUM_V) || defined(HAVE_INTEL_QA)
+ #ifdef HAVE_CAVIUM_V
+ if (NitroxEccIsCurveSupported(key))
+ #endif
+ {
+ word32 keySz = key->dp->size;
+ mp_int* k;
+ #ifdef HAVE_CAVIUM_V
+ err = wc_ecc_alloc_mpint(key, &key->signK);
+ if (err != 0)
+ return err;
+ k = key->signK;
+ #else
+ mp_int k_lcl;
+ k = &k_lcl;
+ #endif
+
+ err = mp_init(k);
+
+ /* make sure r and s are allocated */
+ #ifdef HAVE_CAVIUM_V
+ /* Nitrox V needs single buffer for R and S */
+ if (err == MP_OKAY)
+ err = wc_bigint_alloc(&key->r->raw, NitroxEccGetSize(key)*2);
+ /* Nitrox V only needs Prime and Order */
+ if (err == MP_OKAY)
+ err = wc_ecc_curve_load(key->dp, &curve,
+ (ECC_CURVE_FIELD_PRIME | ECC_CURVE_FIELD_ORDER));
+ #else
+ if (err == MP_OKAY)
+ err = wc_bigint_alloc(&key->r->raw, key->dp->size);
+ if (err == MP_OKAY)
+ err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ALL);
+ #endif
+ if (err == MP_OKAY)
+ err = wc_bigint_alloc(&key->s->raw, key->dp->size);
+
+ /* load e and k */
+ if (err == MP_OKAY)
+ err = wc_mp_to_bigint_sz(e, &e->raw, keySz);
+ if (err == MP_OKAY)
+ err = wc_mp_to_bigint_sz(&key->k, &key->k.raw, keySz);
+ if (err == MP_OKAY)
+ err = wc_ecc_gen_k(rng, key->dp->size, k, curve->order);
+ if (err == MP_OKAY)
+ err = wc_mp_to_bigint_sz(k, &k->raw, keySz);
+
+ #ifdef HAVE_CAVIUM_V
+ if (err == MP_OKAY)
+ err = NitroxEcdsaSign(key, &e->raw, &key->k.raw, &k->raw,
+ &r->raw, &s->raw, &curve->prime->raw, &curve->order->raw);
+ #else
+ if (err == MP_OKAY)
+ err = IntelQaEcdsaSign(&key->asyncDev, &e->raw, &key->k.raw,
+ &k->raw, &r->raw, &s->raw, &curve->Af->raw, &curve->Bf->raw,
+ &curve->prime->raw, &curve->order->raw, &curve->Gx->raw,
+ &curve->Gy->raw);
+ #endif
+
+ #ifndef HAVE_CAVIUM_V
+ mp_clear(e);
+ mp_clear(k);
+ #endif
+ wc_ecc_curve_free(curve);
+ FREE_CURVE_SPECS();
+
+ return err;
}
- err = wc_ecc_make_key_ex(rng, &pubkey, key->dp);
- if (err != MP_OKAY) break;
+ #endif /* HAVE_CAVIUM_V || HAVE_INTEL_QA */
+ }
+ #endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_ECC */
- /* find r = x1 mod n */
- err = mp_mod(pubkey.pubkey.x, &p, &r);
- if (err != MP_OKAY) break;
+ #ifdef WOLFSSL_SMALL_STACK
+ pubkey = (ecc_key*)XMALLOC(sizeof(ecc_key), key->heap, DYNAMIC_TYPE_ECC);
+ if (pubkey == NULL)
+ err = MEMORY_E;
+ #endif
+
+ /* don't use async for key, since we don't support async return here */
+ if (err == MP_OKAY && (err = wc_ecc_init_ex(pubkey, key->heap,
+ INVALID_DEVID)) == MP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ mp_int* b = NULL;
+ #else
+ mp_int b[1];
+ #endif
+
+ #ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY) {
+ b = (mp_int*)XMALLOC(sizeof(mp_int), key->heap,
+ DYNAMIC_TYPE_ECC);
+ if (b == NULL)
+ err = MEMORY_E;
+ }
+ #endif
- if (mp_iszero(&r) == MP_YES) {
- mp_clear(pubkey.pubkey.x);
- mp_clear(pubkey.pubkey.y);
- mp_clear(pubkey.pubkey.z);
- mp_clear(&pubkey.k);
+ if (err == MP_OKAY) {
+ err = mp_init(b);
}
- else {
- /* find s = (e + xr)/k */
- err = mp_invmod(&pubkey.k, &p, &pubkey.k);
- if (err != MP_OKAY) break;
- err = mp_mulmod(&key->k, &r, &p, &s); /* s = xr */
- if (err != MP_OKAY) break;
-
- err = mp_add(&e, &s, &s); /* s = e + xr */
- if (err != MP_OKAY) break;
+ #ifdef WOLFSSL_CUSTOM_CURVES
+ /* if custom curve, apply params to pubkey */
+ if (err == MP_OKAY && key->idx == ECC_CUSTOM_IDX) {
+ err = wc_ecc_set_custom_curve(pubkey, key->dp);
+ }
+ #endif
+
+ if (err == MP_OKAY) {
+ /* Generate blinding value - non-zero value. */
+ do {
+ if (++loop_check > 64) {
+ err = RNG_FAILURE_E;
+ break;
+ }
+
+ err = wc_ecc_gen_k(rng, key->dp->size, b, curve->order);
+ }
+ while (err == MP_ZERO_E);
+ loop_check = 0;
+ }
+
+ for (; err == MP_OKAY;) {
+ if (++loop_check > 64) {
+ err = RNG_FAILURE_E;
+ break;
+ }
+ #ifdef WOLFSSL_ECDSA_SET_K
+ if (key->sign_k != NULL) {
+ if (loop_check > 1) {
+ err = RNG_FAILURE_E;
+ break;
+ }
+
+ err = mp_copy(key->sign_k, &pubkey->k);
+ if (err != MP_OKAY) break;
- err = mp_mod(&s, &p, &s); /* s = e + xr */
+ mp_forcezero(key->sign_k);
+ mp_free(key->sign_k);
+ XFREE(key->sign_k, key->heap, DYNAMIC_TYPE_ECC);
+ key->sign_k = NULL;
+ err = wc_ecc_make_pub_ex(pubkey, curve, NULL);
+ }
+ else
+ #endif
+ {
+ err = wc_ecc_make_key_ex(rng, key->dp->size, pubkey,
+ key->dp->id);
+ }
if (err != MP_OKAY) break;
- err = mp_mulmod(&s, &pubkey.k, &p, &s); /* s = (e + xr)/k */
+ /* find r = x1 mod n */
+ err = mp_mod(pubkey->pubkey.x, curve->order, r);
if (err != MP_OKAY) break;
- if (mp_iszero(&s) == MP_NO)
- break;
- }
+ if (mp_iszero(r) == MP_YES) {
+ #ifndef ALT_ECC_SIZE
+ mp_clear(pubkey->pubkey.x);
+ mp_clear(pubkey->pubkey.y);
+ mp_clear(pubkey->pubkey.z);
+ #endif
+ mp_forcezero(&pubkey->k);
+ }
+ else {
+ /* find s = (e + xr)/k
+ = b.(e/k.b + x.r/k.b) */
+
+ /* k = k.b */
+ err = mp_mulmod(&pubkey->k, b, curve->order, &pubkey->k);
+ if (err != MP_OKAY) break;
+
+ /* k = 1/k.b */
+ err = mp_invmod(&pubkey->k, curve->order, &pubkey->k);
+ if (err != MP_OKAY) break;
+
+ /* s = x.r */
+ err = mp_mulmod(&key->k, r, curve->order, s);
+ if (err != MP_OKAY) break;
+
+ /* s = x.r/k.b */
+ err = mp_mulmod(&pubkey->k, s, curve->order, s);
+ if (err != MP_OKAY) break;
+
+ /* e = e/k.b */
+ err = mp_mulmod(&pubkey->k, e, curve->order, e);
+ if (err != MP_OKAY) break;
+
+ /* s = e/k.b + x.r/k.b
+ = (e + x.r)/k.b */
+ err = mp_add(e, s, s);
+ if (err != MP_OKAY) break;
+
+ /* s = b.(e + x.r)/k.b
+ = (e + x.r)/k */
+ err = mp_mulmod(s, b, curve->order, s);
+ if (err != MP_OKAY) break;
+
+ /* s = (e + xr)/k */
+ err = mp_mod(s, curve->order, s);
+ if (err != MP_OKAY) break;
+
+ if (mp_iszero(s) == MP_NO)
+ break;
+ }
+ }
+ mp_clear(b);
+ mp_free(b);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, key->heap, DYNAMIC_TYPE_ECC);
+ #endif
+ wc_ecc_free(pubkey);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(pubkey, key->heap, DYNAMIC_TYPE_ECC);
+ #endif
}
- wc_ecc_free(&pubkey);
}
- /* store as SEQUENCE { r, s -- integer } */
- if (err == MP_OKAY)
- err = StoreECC_DSA_Sig(out, outlen, &r, &s);
-
- mp_clear(&r);
- mp_clear(&s);
- mp_clear(&p);
- mp_clear(&e);
+ mp_clear(e);
+ wc_ecc_curve_free(curve);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(e, key->heap, DYNAMIC_TYPE_ECC);
+#endif
+ FREE_CURVE_SPECS();
+#endif /* WOLFSSL_SP_MATH */
return err;
}
+#ifdef WOLFSSL_ECDSA_SET_K
+int wc_ecc_sign_set_k(const byte* k, word32 klen, ecc_key* key)
+{
+ int ret = 0;
+
+ if (k == NULL || klen == 0 || key == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (ret == 0) {
+ if (key->sign_k == NULL) {
+ key->sign_k = (mp_int*)XMALLOC(sizeof(mp_int), key->heap,
+ DYNAMIC_TYPE_ECC);
+ if (key->sign_k == NULL) {
+ ret = MEMORY_E;
+ }
+ }
+ }
+
+ if (ret == 0) {
+ ret = mp_init(key->sign_k);
+ }
+ if (ret == 0) {
+ ret = mp_read_unsigned_bin(key->sign_k, k, klen);
+ }
+
+ return ret;
+}
+#endif /* WOLFSSL_ECDSA_SET_K */
+#endif /* WOLFSSL_ATECC508A && WOLFSSL_CRYPTOCELL*/
+
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef WOLFSSL_CUSTOM_CURVES
+void wc_ecc_free_curve(const ecc_set_type* curve, void* heap)
+{
+#ifndef WOLFSSL_ECC_CURVE_STATIC
+ if (curve->prime != NULL)
+ XFREE((void*)curve->prime, heap, DYNAMIC_TYPE_ECC_BUFFER);
+ if (curve->Af != NULL)
+ XFREE((void*)curve->Af, heap, DYNAMIC_TYPE_ECC_BUFFER);
+ if (curve->Bf != NULL)
+ XFREE((void*)curve->Bf, heap, DYNAMIC_TYPE_ECC_BUFFER);
+ if (curve->order != NULL)
+ XFREE((void*)curve->order, heap, DYNAMIC_TYPE_ECC_BUFFER);
+ if (curve->Gx != NULL)
+ XFREE((void*)curve->Gx, heap, DYNAMIC_TYPE_ECC_BUFFER);
+ if (curve->Gy != NULL)
+ XFREE((void*)curve->Gy, heap, DYNAMIC_TYPE_ECC_BUFFER);
+#endif
+
+ XFREE((void*)curve, heap, DYNAMIC_TYPE_ECC_BUFFER);
+
+ (void)heap;
+}
+#endif /* WOLFSSL_CUSTOM_CURVES */
/**
Free an ECC key from memory
key The key you wish to free
*/
-void wc_ecc_free(ecc_key* key)
+WOLFSSL_ABI
+int wc_ecc_free(ecc_key* key)
{
- if (key == NULL)
- return;
+ if (key == NULL) {
+ return 0;
+ }
- mp_clear(key->pubkey.x);
- mp_clear(key->pubkey.y);
- mp_clear(key->pubkey.z);
- mp_clear(&key->k);
-}
+#ifdef WOLFSSL_ECDSA_SET_K
+ if (key->sign_k != NULL) {
+ mp_forcezero(key->sign_k);
+ mp_free(key->sign_k);
+ XFREE(key->sign_k, key->heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+ #ifdef WC_ASYNC_ENABLE_ECC
+ wolfAsync_DevCtxFree(&key->asyncDev, WOLFSSL_ASYNC_MARKER_ECC);
+ #endif
+ wc_ecc_free_async(key);
+#endif
-#ifdef USE_FAST_MATH
- #define GEN_MEM_ERR FP_MEM
-#else
- #define GEN_MEM_ERR MP_MEM
+#ifdef WOLFSSL_ATECC508A
+ atmel_ecc_free(key->slot);
+ key->slot = ATECC_INVALID_SLOT;
+#endif /* WOLFSSL_ATECC508A */
+
+ mp_clear(key->pubkey.x);
+ mp_clear(key->pubkey.y);
+ mp_clear(key->pubkey.z);
+
+ mp_forcezero(&key->k);
+
+#ifdef WOLFSSL_CUSTOM_CURVES
+ if (key->deallocSet && key->dp != NULL)
+ wc_ecc_free_curve(key->dp, key->heap);
#endif
+ return 0;
+}
+
+#if !defined(WOLFSSL_SP_MATH) && !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL)
#ifdef ECC_SHAMIR
/** Computes kA*A + kB*B = C using Shamir's Trick
@@ -1851,47 +5358,97 @@ void wc_ecc_free(ecc_key* key)
B Second point to multiply
kB What to multiple B by
C [out] Destination point (can overlap with A or B)
- modulus Modulus for curve
+ a ECC curve parameter a
+ modulus Modulus for curve
return MP_OKAY on success
*/
#ifdef FP_ECC
static int normal_ecc_mul2add(ecc_point* A, mp_int* kA,
ecc_point* B, mp_int* kB,
- ecc_point* C, mp_int* modulus)
+ ecc_point* C, mp_int* a, mp_int* modulus,
+ void* heap)
#else
-static int ecc_mul2add(ecc_point* A, mp_int* kA,
+int ecc_mul2add(ecc_point* A, mp_int* kA,
ecc_point* B, mp_int* kB,
- ecc_point* C, mp_int* modulus)
+ ecc_point* C, mp_int* a, mp_int* modulus,
+ void* heap)
#endif
{
- ecc_point* precomp[16];
- unsigned bitbufA, bitbufB, lenA, lenB, len, x, y, nA, nB, nibble;
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ ecc_key key;
+#endif
+#ifdef WOLFSSL_SMALL_STACK
+ ecc_point** precomp = NULL;
+#else
+ ecc_point* precomp[SHAMIR_PRECOMP_SZ];
+#endif
+ unsigned bitbufA, bitbufB, lenA, lenB, len, nA, nB, nibble;
unsigned char* tA;
unsigned char* tB;
- int err = MP_OKAY, first;
- int muInit = 0;
- int tableInit = 0;
- mp_digit mp;
- mp_int mu;
-
- /* argchks */
- if (A == NULL || kA == NULL || B == NULL || kB == NULL || C == NULL ||
- modulus == NULL)
- return ECC_BAD_ARG_E;
+ int err = MP_OKAY, first, x, y;
+ mp_digit mp = 0;
+ /* argchks */
+ if (A == NULL || kA == NULL || B == NULL || kB == NULL || C == NULL ||
+ modulus == NULL) {
+ return ECC_BAD_ARG_E;
+ }
/* allocate memory */
- tA = (unsigned char*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ tA = (unsigned char*)XMALLOC(ECC_BUFSIZE, heap, DYNAMIC_TYPE_ECC_BUFFER);
if (tA == NULL) {
return GEN_MEM_ERR;
}
- tB = (unsigned char*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ tB = (unsigned char*)XMALLOC(ECC_BUFSIZE, heap, DYNAMIC_TYPE_ECC_BUFFER);
if (tB == NULL) {
- XFREE(tA, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(tA, heap, DYNAMIC_TYPE_ECC_BUFFER);
+ return GEN_MEM_ERR;
+ }
+#ifdef WOLFSSL_SMALL_STACK
+ precomp = (ecc_point**)XMALLOC(sizeof(ecc_point*) * SHAMIR_PRECOMP_SZ, heap,
+ DYNAMIC_TYPE_ECC_BUFFER);
+ if (precomp == NULL) {
+ XFREE(tB, heap, DYNAMIC_TYPE_ECC_BUFFER);
+ XFREE(tA, heap, DYNAMIC_TYPE_ECC_BUFFER);
return GEN_MEM_ERR;
}
+#endif
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ key.t1 = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC);
+ key.t2 = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC);
+#ifdef ALT_ECC_SIZE
+ key.x = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC);
+ key.y = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC);
+ key.z = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC);
+#endif
+ if (key.t1 == NULL || key.t2 == NULL
+#ifdef ALT_ECC_SIZE
+ || key.x == NULL || key.y == NULL || key.z == NULL
+#endif
+ ) {
+#ifdef ALT_ECC_SIZE
+ XFREE(key.z, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.y, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.x, heap, DYNAMIC_TYPE_ECC);
+#endif
+ XFREE(key.t2, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.t1, heap, DYNAMIC_TYPE_ECC);
+ XFREE(precomp, heap, DYNAMIC_TYPE_ECC_BUFFER);
+ XFREE(tB, heap, DYNAMIC_TYPE_ECC_BUFFER);
+ XFREE(tA, heap, DYNAMIC_TYPE_ECC_BUFFER);
+ return MEMORY_E;
+ }
+ C->key = &key;
+#endif /* WOLFSSL_SMALL_STACK_CACHE */
+
+ /* init variables */
XMEMSET(tA, 0, ECC_BUFSIZE);
XMEMSET(tB, 0, ECC_BUFSIZE);
+#ifndef WOLFSSL_SMALL_STACK
+ XMEMSET(precomp, 0, sizeof(precomp));
+#else
+ XMEMSET(precomp, 0, sizeof(ecc_point*) * SHAMIR_PRECOMP_SZ);
+#endif
/* get sizes */
lenA = mp_unsigned_bin_size(kA);
@@ -1900,7 +5457,7 @@ static int ecc_mul2add(ecc_point* A, mp_int* kA,
/* sanity check */
if ((lenA > ECC_BUFSIZE) || (lenB > ECC_BUFSIZE)) {
- err = BAD_FUNC_ARG;
+ err = BAD_FUNC_ARG;
}
if (err == MP_OKAY) {
@@ -1913,75 +5470,92 @@ static int ecc_mul2add(ecc_point* A, mp_int* kA,
/* allocate the table */
if (err == MP_OKAY) {
- for (x = 0; x < 16; x++) {
- precomp[x] = ecc_new_point();
+ for (x = 0; x < SHAMIR_PRECOMP_SZ; x++) {
+ precomp[x] = wc_ecc_new_point_h(heap);
if (precomp[x] == NULL) {
- for (y = 0; y < x; ++y) {
- ecc_del_point(precomp[y]);
- }
err = GEN_MEM_ERR;
break;
}
+ #ifdef WOLFSSL_SMALL_STACK_CACHE
+ precomp[x]->key = &key;
+ #endif
}
}
}
if (err == MP_OKAY)
- tableInit = 1;
+ /* init montgomery reduction */
+ err = mp_montgomery_setup(modulus, &mp);
- if (err == MP_OKAY)
- /* init montgomery reduction */
- err = mp_montgomery_setup(modulus, &mp);
-
- if (err == MP_OKAY)
- err = mp_init(&mu);
- if (err == MP_OKAY)
- muInit = 1;
+ if (err == MP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ mp_int* mu;
+ #else
+ mp_int mu[1];
+ #endif
+ #ifdef WOLFSSL_SMALL_STACK
+ mu = (mp_int*)XMALLOC(sizeof(mp_int), heap, DYNAMIC_TYPE_ECC);
+ if (mu == NULL)
+ err = MEMORY_E;
+ #endif
+ if (err == MP_OKAY) {
+ err = mp_init(mu);
+ }
+ if (err == MP_OKAY) {
+ err = mp_montgomery_calc_normalization(mu, modulus);
- if (err == MP_OKAY)
- err = mp_montgomery_calc_normalization(&mu, modulus);
+ if (err == MP_OKAY)
+ /* copy ones ... */
+ err = mp_mulmod(A->x, mu, modulus, precomp[1]->x);
- if (err == MP_OKAY)
- /* copy ones ... */
- err = mp_mulmod(A->x, &mu, modulus, precomp[1]->x);
+ if (err == MP_OKAY)
+ err = mp_mulmod(A->y, mu, modulus, precomp[1]->y);
+ if (err == MP_OKAY)
+ err = mp_mulmod(A->z, mu, modulus, precomp[1]->z);
- if (err == MP_OKAY)
- err = mp_mulmod(A->y, &mu, modulus, precomp[1]->y);
- if (err == MP_OKAY)
- err = mp_mulmod(A->z, &mu, modulus, precomp[1]->z);
+ if (err == MP_OKAY)
+ err = mp_mulmod(B->x, mu, modulus, precomp[1<<2]->x);
+ if (err == MP_OKAY)
+ err = mp_mulmod(B->y, mu, modulus, precomp[1<<2]->y);
+ if (err == MP_OKAY)
+ err = mp_mulmod(B->z, mu, modulus, precomp[1<<2]->z);
- if (err == MP_OKAY)
- err = mp_mulmod(B->x, &mu, modulus, precomp[1<<2]->x);
- if (err == MP_OKAY)
- err = mp_mulmod(B->y, &mu, modulus, precomp[1<<2]->y);
- if (err == MP_OKAY)
- err = mp_mulmod(B->z, &mu, modulus, precomp[1<<2]->z);
+ /* done with mu */
+ mp_clear(mu);
+ }
+ #ifdef WOLFSSL_SMALL_STACK
+ if (mu != NULL) {
+ XFREE(mu, heap, DYNAMIC_TYPE_ECC);
+ }
+ #endif
+ }
if (err == MP_OKAY)
/* precomp [i,0](A + B) table */
- err = ecc_projective_dbl_point(precomp[1], precomp[2], modulus, &mp);
+ err = ecc_projective_dbl_point(precomp[1], precomp[2], a, modulus, mp);
if (err == MP_OKAY)
err = ecc_projective_add_point(precomp[1], precomp[2], precomp[3],
- modulus, &mp);
+ a, modulus, mp);
if (err == MP_OKAY)
/* precomp [0,i](A + B) table */
- err = ecc_projective_dbl_point(precomp[1<<2], precomp[2<<2], modulus, &mp);
+ err = ecc_projective_dbl_point(precomp[1<<2], precomp[2<<2], a, modulus, mp);
if (err == MP_OKAY)
err = ecc_projective_add_point(precomp[1<<2], precomp[2<<2], precomp[3<<2],
- modulus, &mp);
+ a, modulus, mp);
if (err == MP_OKAY) {
/* precomp [i,j](A + B) table (i != 0, j != 0) */
for (x = 1; x < 4; x++) {
- for (y = 1; y < 4; y++) {
- if (err == MP_OKAY)
- err = ecc_projective_add_point(precomp[x], precomp[(y<<2)],
- precomp[x+(y<<2)], modulus, &mp);
+ for (y = 1; y < 4; y++) {
+ if (err == MP_OKAY) {
+ err = ecc_projective_add_point(precomp[x], precomp[(y<<2)],
+ precomp[x+(y<<2)], a, modulus, mp);
}
- }
- }
+ }
+ }
+ }
if (err == MP_OKAY) {
nibble = 3;
@@ -1990,20 +5564,21 @@ static int ecc_mul2add(ecc_point* A, mp_int* kA,
bitbufB = tB[0];
/* for every byte of the multiplicands */
- for (x = -1;; ) {
+ for (x = 0;; ) {
/* grab a nibble */
if (++nibble == 4) {
- ++x; if (x == len) break;
+ if (x == (int)len) break;
bitbufA = tA[x];
bitbufB = tB[x];
nibble = 0;
+ x++;
}
/* extract two bits from both, shift/update */
nA = (bitbufA >> 6) & 0x03;
nB = (bitbufB >> 6) & 0x03;
- bitbufA = (bitbufA << 2) & 0xFF;
- bitbufB = (bitbufB << 2) & 0xFF;
+ bitbufA = (bitbufA << 2) & 0xFF;
+ bitbufB = (bitbufB << 2) & 0xFF;
/* if both zero, if first, continue */
if ((nA == 0) && (nB == 0) && (first == 1)) {
@@ -2014,9 +5589,9 @@ static int ecc_mul2add(ecc_point* A, mp_int* kA,
if (first == 0) {
/* double twice */
if (err == MP_OKAY)
- err = ecc_projective_dbl_point(C, C, modulus, &mp);
+ err = ecc_projective_dbl_point(C, C, a, modulus, mp);
if (err == MP_OKAY)
- err = ecc_projective_dbl_point(C, C, modulus, &mp);
+ err = ecc_projective_dbl_point(C, C, a, modulus, mp);
else
break;
}
@@ -2040,44 +5615,76 @@ static int ecc_mul2add(ecc_point* A, mp_int* kA,
/* if not first, add from table */
if (err == MP_OKAY)
err = ecc_projective_add_point(C, precomp[nA + (nB<<2)], C,
- modulus, &mp);
- else
+ a, modulus, mp);
+ if (err != MP_OKAY)
break;
+ if (mp_iszero(C->z)) {
+ /* When all zero then should have done an add */
+ if (mp_iszero(C->x) && mp_iszero(C->y)) {
+ err = ecc_projective_dbl_point(precomp[nA + (nB<<2)], C,
+ a, modulus, mp);
+ if (err != MP_OKAY)
+ break;
+ }
+ /* When only Z zero then result is infinity */
+ else {
+ err = mp_set(C->x, 0);
+ if (err != MP_OKAY)
+ break;
+ err = mp_set(C->y, 0);
+ if (err != MP_OKAY)
+ break;
+ err = mp_set(C->z, 1);
+ if (err != MP_OKAY)
+ break;
+ first = 1;
+ }
+ }
}
}
}
}
+ /* reduce to affine */
if (err == MP_OKAY)
- /* reduce to affine */
- err = ecc_map(C, modulus, &mp);
+ err = ecc_map(C, modulus, mp);
/* clean up */
- if (muInit)
- mp_clear(&mu);
-
- if (tableInit) {
- for (x = 0; x < 16; x++) {
- ecc_del_point(precomp[x]);
- }
+ for (x = 0; x < SHAMIR_PRECOMP_SZ; x++) {
+ wc_ecc_del_point_h(precomp[x], heap);
}
- ForceZero(tA, ECC_BUFSIZE);
- ForceZero(tB, ECC_BUFSIZE);
- XFREE(tA, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(tB, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- return err;
-}
+ ForceZero(tA, ECC_BUFSIZE);
+ ForceZero(tB, ECC_BUFSIZE);
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+#ifdef ALT_ECC_SIZE
+ XFREE(key.z, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.y, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.x, heap, DYNAMIC_TYPE_ECC);
+#endif
+ XFREE(key.t2, heap, DYNAMIC_TYPE_ECC);
+ XFREE(key.t1, heap, DYNAMIC_TYPE_ECC);
+ C->key = NULL;
+#endif
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(precomp, heap, DYNAMIC_TYPE_ECC_BUFFER);
+#endif
+ XFREE(tB, heap, DYNAMIC_TYPE_ECC_BUFFER);
+ XFREE(tA, heap, DYNAMIC_TYPE_ECC_BUFFER);
+ return err;
+}
#endif /* ECC_SHAMIR */
+#endif /* !WOLFSSL_SP_MATH && !WOLFSSL_ATECC508A && !WOLFSSL_CRYPTOCEL*/
-
-/* verify
+#ifdef HAVE_ECC_VERIFY
+#ifndef NO_ASN
+/* verify
*
* w = s^-1 mod n
- * u1 = xw
+ * u1 = xw
* u2 = rw
* X = u1*G + u2*Q
* v = X_x1 mod n
@@ -2085,120 +5692,476 @@ static int ecc_mul2add(ecc_point* A, mp_int* kA,
*/
/**
+ Verify an ECC signature
+ sig The signature to verify
+ siglen The length of the signature (octets)
+ hash The hash (message digest) that was signed
+ hashlen The length of the hash (octets)
+ res Result of signature, 1==valid, 0==invalid
+ key The corresponding public ECC key
+ return MP_OKAY if successful (even if the signature is not valid)
+ */
+int wc_ecc_verify_hash(const byte* sig, word32 siglen, const byte* hash,
+ word32 hashlen, int* res, ecc_key* key)
+{
+ int err;
+ mp_int *r = NULL, *s = NULL;
+#if (!defined(WOLFSSL_ASYNC_CRYPT) || !defined(WC_ASYNC_ENABLE_ECC)) && \
+ !defined(WOLFSSL_SMALL_STACK)
+ mp_int r_lcl, s_lcl;
+#endif
+
+ if (sig == NULL || hash == NULL || res == NULL || key == NULL) {
+ return ECC_BAD_ARG_E;
+ }
+
+#ifdef WOLF_CRYPTO_CB
+ if (key->devId != INVALID_DEVID) {
+ err = wc_CryptoCb_EccVerify(sig, siglen, hash, hashlen, res, key);
+ if (err != CRYPTOCB_UNAVAILABLE)
+ return err;
+ /* fall-through when unavailable */
+ }
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+ err = wc_ecc_alloc_async(key);
+ if (err != 0)
+ return err;
+ r = key->r;
+ s = key->s;
+#else
+ #ifndef WOLFSSL_SMALL_STACK
+ r = &r_lcl;
+ s = &s_lcl;
+ #else
+ r = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC);
+ if (r == NULL)
+ return MEMORY_E;
+ s = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC);
+ if (s == NULL) {
+ XFREE(r, key->heap, DYNAMIC_TYPE_ECC);
+ return MEMORY_E;
+ }
+ #endif
+ XMEMSET(r, 0, sizeof(mp_int));
+ XMEMSET(s, 0, sizeof(mp_int));
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+ switch (key->state) {
+ case ECC_STATE_NONE:
+ case ECC_STATE_VERIFY_DECODE:
+ key->state = ECC_STATE_VERIFY_DECODE;
+
+ /* default to invalid signature */
+ *res = 0;
+
+ /* Note, DecodeECC_DSA_Sig() calls mp_init() on r and s.
+ * If either of those don't allocate correctly, none of
+ * the rest of this function will execute, and everything
+ * gets cleaned up at the end. */
+ /* decode DSA header */
+ err = DecodeECC_DSA_Sig(sig, siglen, r, s);
+ if (err < 0) {
+ break;
+ }
+ FALL_THROUGH;
+
+ case ECC_STATE_VERIFY_DO:
+ key->state = ECC_STATE_VERIFY_DO;
+
+ err = wc_ecc_verify_hash_ex(r, s, hash, hashlen, res, key);
+
+ #ifndef WOLFSSL_ASYNC_CRYPT
+ /* done with R/S */
+ mp_clear(r);
+ mp_clear(s);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(s, key->heap, DYNAMIC_TYPE_ECC);
+ XFREE(r, key->heap, DYNAMIC_TYPE_ECC);
+ r = NULL;
+ s = NULL;
+ #endif
+ #endif
+
+ if (err < 0) {
+ break;
+ }
+ FALL_THROUGH;
+
+ case ECC_STATE_VERIFY_RES:
+ key->state = ECC_STATE_VERIFY_RES;
+ err = 0;
+ break;
+
+ default:
+ err = BAD_STATE_E;
+ }
+
+ /* if async pending then return and skip done cleanup below */
+ if (err == WC_PENDING_E) {
+ key->state++;
+ return err;
+ }
+
+ /* cleanup */
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+ wc_ecc_free_async(key);
+#elif defined(WOLFSSL_SMALL_STACK)
+ XFREE(s, key->heap, DYNAMIC_TYPE_ECC);
+ XFREE(r, key->heap, DYNAMIC_TYPE_ECC);
+ r = NULL;
+ s = NULL;
+#endif
+
+ key->state = ECC_STATE_NONE;
+
+ return err;
+}
+#endif /* !NO_ASN */
+
+
+/**
Verify an ECC signature
- sig The signature to verify
- siglen The length of the signature (octets)
+ r The signature R component to verify
+ s The signature S component to verify
hash The hash (message digest) that was signed
hashlen The length of the hash (octets)
- stat Result of signature, 1==valid, 0==invalid
+ res Result of signature, 1==valid, 0==invalid
key The corresponding public ECC key
return MP_OKAY if successful (even if the signature is not valid)
*/
-int wc_ecc_verify_hash(const byte* sig, word32 siglen, const byte* hash,
- word32 hashlen, int* stat, ecc_key* key)
-{
- ecc_point *mG, *mQ;
- mp_int r;
- mp_int s;
- mp_int v;
- mp_int w;
- mp_int u1;
- mp_int u2;
- mp_int e;
- mp_int p;
- mp_int m;
+
+int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash,
+ word32 hashlen, int* res, ecc_key* key)
+#if defined(WOLFSSL_STM32_PKA)
+{
+ return stm32_ecc_verify_hash_ex(r, s, hash, hashlen, res, key);
+}
+#else
+{
int err;
+ word32 keySz;
+#ifdef WOLFSSL_ATECC508A
+ byte sigRS[ATECC_KEY_SIZE*2];
+#elif defined(WOLFSSL_CRYPTOCELL)
+ byte sigRS[ECC_MAX_CRYPTO_HW_SIZE*2];
+ CRYS_ECDSA_VerifyUserContext_t sigCtxTemp;
+ word32 msgLenInBytes = hashlen;
+ CRYS_ECPKI_HASH_OpMode_t hash_mode;
+#elif !defined(WOLFSSL_SP_MATH) || defined(FREESCALE_LTC_ECC)
+ int did_init = 0;
+ ecc_point *mG = NULL, *mQ = NULL;
+ #ifdef WOLFSSL_SMALL_STACK
+ mp_int* v = NULL;
+ mp_int* w = NULL;
+ mp_int* u1 = NULL;
+ mp_int* u2 = NULL;
+ #if !defined(WOLFSSL_ASYNC_CRYPT) || !defined(HAVE_CAVIUM_V)
+ mp_int* e_lcl = NULL;
+ #endif
+ #else /* WOLFSSL_SMALL_STACK */
+ mp_int v[1];
+ mp_int w[1];
+ mp_int u1[1];
+ mp_int u2[1];
+ #if !defined(WOLFSSL_ASYNC_CRYPT) || !defined(HAVE_CAVIUM_V)
+ mp_int e_lcl[1];
+ #endif
+ #endif /* WOLFSSL_SMALL_STACK */
+ mp_int* e;
+ DECLARE_CURVE_SPECS(curve, ECC_CURVE_FIELD_COUNT);
+#endif
- if (sig == NULL || hash == NULL || stat == NULL || key == NULL)
- return ECC_BAD_ARG_E;
+ if (r == NULL || s == NULL || hash == NULL || res == NULL || key == NULL)
+ return ECC_BAD_ARG_E;
/* default to invalid signature */
- *stat = 0;
+ *res = 0;
/* is the IDX valid ? */
- if (ecc_is_valid_idx(key->idx) != 1) {
+ if (wc_ecc_is_valid_idx(key->idx) != 1) {
return ECC_BAD_ARG_E;
}
- /* allocate ints */
- if ((err = mp_init_multi(&v, &w, &u1, &u2, &p, &e)) != MP_OKAY) {
- return MEMORY_E;
+ keySz = key->dp->size;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) && \
+ defined(WOLFSSL_ASYNC_CRYPT_TEST)
+ if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+ if (wc_AsyncTestInit(&key->asyncDev, ASYNC_TEST_ECC_VERIFY)) {
+ WC_ASYNC_TEST* testDev = &key->asyncDev.test;
+ testDev->eccVerify.r = r;
+ testDev->eccVerify.s = s;
+ testDev->eccVerify.hash = hash;
+ testDev->eccVerify.hashlen = hashlen;
+ testDev->eccVerify.stat = res;
+ testDev->eccVerify.key = key;
+ return WC_PENDING_E;
+ }
+ }
+#endif
+
+#ifdef WOLFSSL_ATECC508A
+ /* Extract R and S */
+ err = mp_to_unsigned_bin(r, &sigRS[0]);
+ if (err != MP_OKAY) {
+ return err;
+ }
+ err = mp_to_unsigned_bin(s, &sigRS[keySz]);
+ if (err != MP_OKAY) {
+ return err;
+ }
+
+ err = atmel_ecc_verify(hash, sigRS, key->pubkey_raw, res);
+ if (err != 0) {
+ return err;
+ }
+ (void)hashlen;
+#elif defined(WOLFSSL_CRYPTOCELL)
+
+ /* Extract R and S */
+
+ err = mp_to_unsigned_bin(r, &sigRS[0]);
+ if (err != MP_OKAY) {
+ return err;
+ }
+ err = mp_to_unsigned_bin(s, &sigRS[keySz]);
+ if (err != MP_OKAY) {
+ return err;
}
- if ((err = mp_init(&m)) != MP_OKAY) {
- mp_clear(&v);
- mp_clear(&w);
- mp_clear(&u1);
- mp_clear(&u2);
- mp_clear(&p);
- mp_clear(&e);
- return MEMORY_E;
+ hash_mode = cc310_hashModeECC(msgLenInBytes);
+ if (hash_mode == CRYS_ECPKI_HASH_OpModeLast) {
+ /* hash_mode = */ cc310_hashModeECC(keySz);
+ hash_mode = CRYS_ECPKI_HASH_SHA256_mode;
+ }
+ /* truncate if hash is longer than key size */
+ if (msgLenInBytes > keySz) {
+ msgLenInBytes = keySz;
}
- /* allocate points */
- mG = ecc_new_point();
- mQ = ecc_new_point();
- if (mQ == NULL || mG == NULL)
- err = MEMORY_E;
-
- /* Note, DecodeECC_DSA_Sig() calls mp_init() on r and s.
- * If either of those don't allocate correctly, none of
- * the rest of this function will execute, and everything
- * gets cleaned up at the end. */
- XMEMSET(&r, 0, sizeof(r));
- XMEMSET(&s, 0, sizeof(s));
- if (err == MP_OKAY)
- err = DecodeECC_DSA_Sig(sig, siglen, &r, &s);
+ /* verify the signature using the public key */
+ err = CRYS_ECDSA_Verify(&sigCtxTemp,
+ &key->ctx.pubKey,
+ hash_mode,
+ &sigRS[0],
+ keySz*2,
+ (byte*)hash,
+ msgLenInBytes);
- /* get the order */
- if (err == MP_OKAY)
- err = mp_read_radix(&p, (char *)key->dp->order, 16);
+ if (err != SA_SILIB_RET_OK) {
+ WOLFSSL_MSG("CRYS_ECDSA_Verify failed");
+ return err;
+ }
+ /* valid signature if we get to this point */
+ *res = 1;
+#else
+ /* checking if private key with no public part */
+ if (key->type == ECC_PRIVATEKEY_ONLY) {
+ WOLFSSL_MSG("Verify called with private key, generating public part");
+ err = wc_ecc_make_pub_ex(key, NULL, NULL);
+ if (err != MP_OKAY) {
+ WOLFSSL_MSG("Unable to extract public key");
+ return err;
+ }
+ }
- /* get the modulus */
- if (err == MP_OKAY)
- err = mp_read_radix(&m, (char *)key->dp->prime, 16);
+#if defined(WOLFSSL_DSP) && !defined(FREESCALE_LTC_ECC)
+ if (key->handle != -1) {
+ return sp_dsp_ecc_verify_256(key->handle, hash, hashlen, key->pubkey.x, key->pubkey.y,
+ key->pubkey.z, r, s, res, key->heap);
+ }
+ if (wolfSSL_GetHandleCbSet() == 1) {
+ return sp_dsp_ecc_verify_256(0, hash, hashlen, key->pubkey.x, key->pubkey.y,
+ key->pubkey.z, r, s, res, key->heap);
+ }
+#endif
+#if defined(WOLFSSL_SP_MATH) && !defined(FREESCALE_LTC_ECC)
+#ifndef WOLFSSL_SP_NO_256
+ if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1) {
+ return sp_ecc_verify_256(hash, hashlen, key->pubkey.x, key->pubkey.y,
+ key->pubkey.z, r, s, res, key->heap);
+ }
+#endif
+#ifdef WOLFSSL_SP_384
+ if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) {
+ return sp_ecc_verify_384(hash, hashlen, key->pubkey.x, key->pubkey.y,
+ key->pubkey.z, r, s, res, key->heap);
+ }
+#endif
+ return WC_KEY_SIZE_E;
+#else
+#if defined WOLFSSL_HAVE_SP_ECC && !defined(FREESCALE_LTC_ECC)
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+ if (key->asyncDev.marker != WOLFSSL_ASYNC_MARKER_ECC)
+ #endif
+ {
+#ifndef WOLFSSL_SP_NO_256
+ if (key->idx != ECC_CUSTOM_IDX &&
+ ecc_sets[key->idx].id == ECC_SECP256R1) {
+ return sp_ecc_verify_256(hash, hashlen, key->pubkey.x,
+ key->pubkey.y, key->pubkey.z,r, s, res,
+ key->heap);
+ }
+#endif /* WOLFSSL_SP_NO_256 */
+#ifdef WOLFSSL_SP_384
+ if (key->idx != ECC_CUSTOM_IDX &&
+ ecc_sets[key->idx].id == ECC_SECP384R1) {
+ return sp_ecc_verify_384(hash, hashlen, key->pubkey.x,
+ key->pubkey.y, key->pubkey.z,r, s, res,
+ key->heap);
+ }
+#endif /* WOLFSSL_SP_384 */
+ }
+#endif /* WOLFSSL_HAVE_SP_ECC */
+
+ ALLOC_CURVE_SPECS(ECC_CURVE_FIELD_COUNT);
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM_V)
+ err = wc_ecc_alloc_mpint(key, &key->e);
+ if (err != 0) {
+ FREE_CURVE_SPECS();
+ return err;
+ }
+ e = key->e;
+#else
+#ifdef WOLFSSL_SMALL_STACK
+ e_lcl = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC);
+ if (e_lcl == NULL) {
+ FREE_CURVE_SPECS();
+ return MEMORY_E;
+ }
+#endif
+ e = e_lcl;
+#endif /* WOLFSSL_ASYNC_CRYPT && HAVE_CAVIUM_V */
+
+ err = mp_init(e);
+ if (err != MP_OKAY)
+ return MEMORY_E;
+
+ /* read in the specs for this curve */
+ err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ALL);
/* check for zero */
if (err == MP_OKAY) {
- if (mp_iszero(&r) || mp_iszero(&s) || mp_cmp(&r, &p) != MP_LT ||
- mp_cmp(&s, &p) != MP_LT)
- err = MP_ZERO_E;
+ if (mp_iszero(r) == MP_YES || mp_iszero(s) == MP_YES ||
+ mp_cmp(r, curve->order) != MP_LT ||
+ mp_cmp(s, curve->order) != MP_LT) {
+ err = MP_ZERO_E;
+ }
}
+
/* read hash */
if (err == MP_OKAY) {
/* we may need to truncate if hash is longer than key size */
- unsigned int orderBits = mp_count_bits(&p);
+ unsigned int orderBits = mp_count_bits(curve->order);
/* truncate down to byte size, may be all that's needed */
if ( (WOLFSSL_BIT_SIZE * hashlen) > orderBits)
- hashlen = (orderBits + WOLFSSL_BIT_SIZE - 1)/WOLFSSL_BIT_SIZE;
- err = mp_read_unsigned_bin(&e, hash, hashlen);
+ hashlen = (orderBits + WOLFSSL_BIT_SIZE - 1) / WOLFSSL_BIT_SIZE;
+ err = mp_read_unsigned_bin(e, hash, hashlen);
/* may still need bit truncation too */
if (err == MP_OKAY && (WOLFSSL_BIT_SIZE * hashlen) > orderBits)
- mp_rshb(&e, WOLFSSL_BIT_SIZE - (orderBits & 0x7));
+ mp_rshb(e, WOLFSSL_BIT_SIZE - (orderBits & 0x7));
+ }
+
+ /* check for async hardware acceleration */
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+ if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+ #if defined(HAVE_CAVIUM_V) || defined(HAVE_INTEL_QA)
+ #ifdef HAVE_CAVIUM_V
+ if (NitroxEccIsCurveSupported(key))
+ #endif
+ {
+ err = wc_mp_to_bigint_sz(e, &e->raw, keySz);
+ if (err == MP_OKAY)
+ err = wc_mp_to_bigint_sz(key->pubkey.x, &key->pubkey.x->raw, keySz);
+ if (err == MP_OKAY)
+ err = wc_mp_to_bigint_sz(key->pubkey.y, &key->pubkey.y->raw, keySz);
+ if (err == MP_OKAY)
+ #ifdef HAVE_CAVIUM_V
+ err = NitroxEcdsaVerify(key, &e->raw, &key->pubkey.x->raw,
+ &key->pubkey.y->raw, &r->raw, &s->raw,
+ &curve->prime->raw, &curve->order->raw, res);
+ #else
+ err = IntelQaEcdsaVerify(&key->asyncDev, &e->raw, &key->pubkey.x->raw,
+ &key->pubkey.y->raw, &r->raw, &s->raw, &curve->Af->raw,
+ &curve->Bf->raw, &curve->prime->raw, &curve->order->raw,
+ &curve->Gx->raw, &curve->Gy->raw, res);
+ #endif
+
+ #ifndef HAVE_CAVIUM_V
+ mp_clear(e);
+ #endif
+ wc_ecc_curve_free(curve);
+ FREE_CURVE_SPECS();
+
+ return err;
+ }
+ #endif /* HAVE_CAVIUM_V || HAVE_INTEL_QA */
+ }
+#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_ECC */
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY) {
+ v = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC);
+ if (v == NULL)
+ err = MEMORY_E;
+ }
+ if (err == MP_OKAY) {
+ w = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC);
+ if (w == NULL)
+ err = MEMORY_E;
+ }
+ if (err == MP_OKAY) {
+ u1 = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC);
+ if (u1 == NULL)
+ err = MEMORY_E;
+ }
+ if (err == MP_OKAY) {
+ u2 = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC);
+ if (u2 == NULL)
+ err = MEMORY_E;
+ }
+#endif
+
+ /* allocate ints */
+ if (err == MP_OKAY) {
+ if ((err = mp_init_multi(v, w, u1, u2, NULL, NULL)) != MP_OKAY) {
+ err = MEMORY_E;
+ }
+ did_init = 1;
+ }
+
+ /* allocate points */
+ if (err == MP_OKAY) {
+ mG = wc_ecc_new_point_h(key->heap);
+ mQ = wc_ecc_new_point_h(key->heap);
+ if (mQ == NULL || mG == NULL)
+ err = MEMORY_E;
}
/* w = s^-1 mod n */
if (err == MP_OKAY)
- err = mp_invmod(&s, &p, &w);
+ err = mp_invmod(s, curve->order, w);
/* u1 = ew */
if (err == MP_OKAY)
- err = mp_mulmod(&e, &w, &p, &u1);
+ err = mp_mulmod(e, w, curve->order, u1);
/* u2 = rw */
if (err == MP_OKAY)
- err = mp_mulmod(&r, &w, &p, &u2);
+ err = mp_mulmod(r, w, curve->order, u2);
/* find mG and mQ */
if (err == MP_OKAY)
- err = mp_read_radix(mG->x, (char *)key->dp->Gx, 16);
-
+ err = mp_copy(curve->Gx, mG->x);
if (err == MP_OKAY)
- err = mp_read_radix(mG->y, (char *)key->dp->Gy, 16);
+ err = mp_copy(curve->Gy, mG->y);
if (err == MP_OKAY)
- mp_set(mG->z, 1);
+ err = mp_set(mG->z, 1);
if (err == MP_OKAY)
err = mp_copy(key->pubkey.x, mQ->x);
@@ -2207,75 +6170,458 @@ int wc_ecc_verify_hash(const byte* sig, word32 siglen, const byte* hash,
if (err == MP_OKAY)
err = mp_copy(key->pubkey.z, mQ->z);
+#if defined(FREESCALE_LTC_ECC)
+ /* use PKHA to compute u1*mG + u2*mQ */
+ if (err == MP_OKAY)
+ err = wc_ecc_mulmod_ex(u1, mG, mG, curve->Af, curve->prime, 0, key->heap);
+ if (err == MP_OKAY)
+ err = wc_ecc_mulmod_ex(u2, mQ, mQ, curve->Af, curve->prime, 0, key->heap);
+ if (err == MP_OKAY)
+ err = wc_ecc_point_add(mG, mQ, mG, curve->prime);
+#else
#ifndef ECC_SHAMIR
+ if (err == MP_OKAY)
{
- mp_digit mp;
+ mp_digit mp = 0;
- /* compute u1*mG + u2*mQ = mG */
- if (err == MP_OKAY)
- err = ecc_mulmod(&u1, mG, mG, &m, 0);
- if (err == MP_OKAY)
- err = ecc_mulmod(&u2, mQ, mQ, &m, 0);
-
- /* find the montgomery mp */
- if (err == MP_OKAY)
- err = mp_montgomery_setup(&m, &mp);
+ if (!mp_iszero(u1)) {
+ /* compute u1*mG + u2*mQ = mG */
+ err = wc_ecc_mulmod_ex(u1, mG, mG, curve->Af, curve->prime, 0,
+ key->heap);
+ if (err == MP_OKAY) {
+ err = wc_ecc_mulmod_ex(u2, mQ, mQ, curve->Af, curve->prime, 0,
+ key->heap);
+ }
- /* add them */
- if (err == MP_OKAY)
- err = ecc_projective_add_point(mQ, mG, mG, &m, &mp);
-
- /* reduce */
- if (err == MP_OKAY)
- err = ecc_map(mG, &m, &mp);
+ /* find the montgomery mp */
+ if (err == MP_OKAY)
+ err = mp_montgomery_setup(curve->prime, &mp);
+
+ /* add them */
+ if (err == MP_OKAY)
+ err = ecc_projective_add_point(mQ, mG, mG, curve->Af,
+ curve->prime, mp);
+ if (err == MP_OKAY && mp_iszero(mG->z)) {
+ /* When all zero then should have done an add */
+ if (mp_iszero(mG->x) && mp_iszero(mG->y)) {
+ err = ecc_projective_dbl_point(mQ, mG, curve->Af,
+ curve->prime, mp);
+ }
+ /* When only Z zero then result is infinity */
+ else {
+ err = mp_set(mG->x, 0);
+ if (err == MP_OKAY)
+ err = mp_set(mG->y, 0);
+ if (err == MP_OKAY)
+ err = mp_set(mG->z, 1);
+ }
+ }
+ }
+ else {
+ /* compute 0*mG + u2*mQ = mG */
+ err = wc_ecc_mulmod_ex(u2, mQ, mG, curve->Af, curve->prime, 0,
+ key->heap);
+ /* find the montgomery mp */
+ if (err == MP_OKAY)
+ err = mp_montgomery_setup(curve->prime, &mp);
+ }
+
+ /* reduce */
+ if (err == MP_OKAY)
+ err = ecc_map(mG, curve->prime, mp);
}
#else
- /* use Shamir's trick to compute u1*mG + u2*mQ using half the doubles */
- if (err == MP_OKAY)
- err = ecc_mul2add(mG, &u1, mQ, &u2, mG, &m);
-#endif /* ECC_SHAMIR */
-
+ /* use Shamir's trick to compute u1*mG + u2*mQ using half the doubles */
+ if (err == MP_OKAY) {
+ err = ecc_mul2add(mG, u1, mQ, u2, mG, curve->Af, curve->prime,
+ key->heap);
+ }
+#endif /* ECC_SHAMIR */
+#endif /* FREESCALE_LTC_ECC */
/* v = X_x1 mod n */
if (err == MP_OKAY)
- err = mp_mod(mG->x, &p, &v);
+ err = mp_mod(mG->x, curve->order, v);
/* does v == r */
if (err == MP_OKAY) {
- if (mp_cmp(&v, &r) == MP_EQ)
- *stat = 1;
+ if (mp_cmp(v, r) == MP_EQ)
+ *res = 1;
}
- ecc_del_point(mG);
- ecc_del_point(mQ);
+ /* cleanup */
+ wc_ecc_del_point_h(mG, key->heap);
+ wc_ecc_del_point_h(mQ, key->heap);
+
+ mp_clear(e);
+ if (did_init) {
+ mp_clear(v);
+ mp_clear(w);
+ mp_clear(u1);
+ mp_clear(u2);
+ }
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(u2, key->heap, DYNAMIC_TYPE_ECC);
+ XFREE(u1, key->heap, DYNAMIC_TYPE_ECC);
+ XFREE(w, key->heap, DYNAMIC_TYPE_ECC);
+ XFREE(v, key->heap, DYNAMIC_TYPE_ECC);
+#if !defined(WOLFSSL_ASYNC_CRYPT) || !defined(HAVE_CAVIUM_V)
+ XFREE(e_lcl, key->heap, DYNAMIC_TYPE_ECC);
+#endif
+#endif
+
+ wc_ecc_curve_free(curve);
+ FREE_CURVE_SPECS();
- mp_clear(&r);
- mp_clear(&s);
- mp_clear(&v);
- mp_clear(&w);
- mp_clear(&u1);
- mp_clear(&u2);
- mp_clear(&p);
- mp_clear(&e);
- mp_clear(&m);
+#endif /* WOLFSSL_SP_MATH */
+#endif /* WOLFSSL_ATECC508A */
+
+ (void)keySz;
+ (void)hashlen;
return err;
}
+#endif /* WOLFSSL_STM32_PKA */
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_KEY_IMPORT
+/* import point from der
+ * if shortKeySize != 0 then keysize is always (inLen-1)>>1 */
+int wc_ecc_import_point_der_ex(byte* in, word32 inLen, const int curve_idx,
+ ecc_point* point, int shortKeySize)
+{
+ int err = 0;
+#ifdef HAVE_COMP_KEY
+ int compressed = 0;
+#endif
+ int keysize;
+ byte pointType;
+
+#ifndef HAVE_COMP_KEY
+ (void)shortKeySize;
+#endif
+
+ if (in == NULL || point == NULL || (curve_idx < 0) ||
+ (wc_ecc_is_valid_idx(curve_idx) == 0))
+ return ECC_BAD_ARG_E;
+
+ /* must be odd */
+ if ((inLen & 1) == 0) {
+ return ECC_BAD_ARG_E;
+ }
+
+ /* init point */
+#ifdef ALT_ECC_SIZE
+ point->x = (mp_int*)&point->xyz[0];
+ point->y = (mp_int*)&point->xyz[1];
+ point->z = (mp_int*)&point->xyz[2];
+ alt_fp_init(point->x);
+ alt_fp_init(point->y);
+ alt_fp_init(point->z);
+#else
+ err = mp_init_multi(point->x, point->y, point->z, NULL, NULL, NULL);
+#endif
+ if (err != MP_OKAY)
+ return MEMORY_E;
+
+ /* check for point type (4, 2, or 3) */
+ pointType = in[0];
+ if (pointType != ECC_POINT_UNCOMP && pointType != ECC_POINT_COMP_EVEN &&
+ pointType != ECC_POINT_COMP_ODD) {
+ err = ASN_PARSE_E;
+ }
+
+ if (pointType == ECC_POINT_COMP_EVEN || pointType == ECC_POINT_COMP_ODD) {
+#ifdef HAVE_COMP_KEY
+ compressed = 1;
+#else
+ err = NOT_COMPILED_IN;
+#endif
+ }
+
+ /* adjust to skip first byte */
+ inLen -= 1;
+ in += 1;
+
+ /* calculate key size based on inLen / 2 if uncompressed or shortKeySize
+ * is true */
+#ifdef HAVE_COMP_KEY
+ keysize = compressed && !shortKeySize ? inLen : inLen>>1;
+#else
+ keysize = inLen>>1;
+#endif
+
+ /* read data */
+ if (err == MP_OKAY)
+ err = mp_read_unsigned_bin(point->x, (byte*)in, keysize);
+
+#ifdef HAVE_COMP_KEY
+ if (err == MP_OKAY && compressed == 1) { /* build y */
+#ifndef WOLFSSL_SP_MATH
+ int did_init = 0;
+ mp_int t1, t2;
+ DECLARE_CURVE_SPECS(curve, 3);
+
+ ALLOC_CURVE_SPECS(3);
+
+ if (mp_init_multi(&t1, &t2, NULL, NULL, NULL, NULL) != MP_OKAY)
+ err = MEMORY_E;
+ else
+ did_init = 1;
+
+ /* load curve info */
+ if (err == MP_OKAY)
+ err = wc_ecc_curve_load(&ecc_sets[curve_idx], &curve,
+ (ECC_CURVE_FIELD_PRIME | ECC_CURVE_FIELD_AF |
+ ECC_CURVE_FIELD_BF));
+
+ /* compute x^3 */
+ if (err == MP_OKAY)
+ err = mp_sqr(point->x, &t1);
+ if (err == MP_OKAY)
+ err = mp_mulmod(&t1, point->x, curve->prime, &t1);
+
+ /* compute x^3 + a*x */
+ if (err == MP_OKAY)
+ err = mp_mulmod(curve->Af, point->x, curve->prime, &t2);
+ if (err == MP_OKAY)
+ err = mp_add(&t1, &t2, &t1);
+
+ /* compute x^3 + a*x + b */
+ if (err == MP_OKAY)
+ err = mp_add(&t1, curve->Bf, &t1);
+
+ /* compute sqrt(x^3 + a*x + b) */
+ if (err == MP_OKAY)
+ err = mp_sqrtmod_prime(&t1, curve->prime, &t2);
+
+ /* adjust y */
+ if (err == MP_OKAY) {
+ if ((mp_isodd(&t2) == MP_YES && pointType == ECC_POINT_COMP_ODD) ||
+ (mp_isodd(&t2) == MP_NO && pointType == ECC_POINT_COMP_EVEN)) {
+ err = mp_mod(&t2, curve->prime, point->y);
+ }
+ else {
+ err = mp_submod(curve->prime, &t2, curve->prime, point->y);
+ }
+ }
+
+ if (did_init) {
+ mp_clear(&t2);
+ mp_clear(&t1);
+ }
+
+ wc_ecc_curve_free(curve);
+ FREE_CURVE_SPECS();
+#else
+ #ifndef WOLFSSL_SP_NO_256
+ if (curve_idx != ECC_CUSTOM_IDX &&
+ ecc_sets[curve_idx].id == ECC_SECP256R1) {
+ sp_ecc_uncompress_256(point->x, pointType, point->y);
+ }
+ else
+ #endif
+ #ifdef WOLFSSL_SP_384
+ if (curve_idx != ECC_CUSTOM_IDX &&
+ ecc_sets[curve_idx].id == ECC_SECP384R1) {
+ sp_ecc_uncompress_384(point->x, pointType, point->y);
+ }
+ else
+ #endif
+ {
+ err = WC_KEY_SIZE_E;
+ }
+#endif
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef HAVE_COMP_KEY
+ if (compressed == 0)
+#endif
+ err = mp_read_unsigned_bin(point->y, (byte*)in + keysize, keysize);
+ }
+ if (err == MP_OKAY)
+ err = mp_set(point->z, 1);
+
+ if (err != MP_OKAY) {
+ mp_clear(point->x);
+ mp_clear(point->y);
+ mp_clear(point->z);
+ }
+
+ return err;
+}
+
+/* function for backwards compatiblity with previous implementations */
+int wc_ecc_import_point_der(byte* in, word32 inLen, const int curve_idx,
+ ecc_point* point)
+{
+ return wc_ecc_import_point_der_ex(in, inLen, curve_idx, point, 1);
+}
+#endif /* HAVE_ECC_KEY_IMPORT */
+
+#ifdef HAVE_ECC_KEY_EXPORT
+/* export point to der */
+
+int wc_ecc_export_point_der_ex(const int curve_idx, ecc_point* point, byte* out,
+ word32* outLen, int compressed)
+{
+ if (compressed == 0)
+ return wc_ecc_export_point_der(curve_idx, point, out, outLen);
+#ifdef HAVE_COMP_KEY
+ else
+ return wc_ecc_export_point_der_compressed(curve_idx, point, out, outLen);
+#else
+ return NOT_COMPILED_IN;
+#endif
+}
+
+int wc_ecc_export_point_der(const int curve_idx, ecc_point* point, byte* out,
+ word32* outLen)
+{
+ int ret = MP_OKAY;
+ word32 numlen;
+#ifdef WOLFSSL_SMALL_STACK
+ byte* buf;
+#else
+ byte buf[ECC_BUFSIZE];
+#endif
+
+ if ((curve_idx < 0) || (wc_ecc_is_valid_idx(curve_idx) == 0))
+ return ECC_BAD_ARG_E;
+
+ numlen = ecc_sets[curve_idx].size;
+
+ /* return length needed only */
+ if (point != NULL && out == NULL && outLen != NULL) {
+ *outLen = 1 + 2*numlen;
+ return LENGTH_ONLY_E;
+ }
+
+ if (point == NULL || out == NULL || outLen == NULL)
+ return ECC_BAD_ARG_E;
+
+ if (*outLen < (1 + 2*numlen)) {
+ *outLen = 1 + 2*numlen;
+ return BUFFER_E;
+ }
+
+ /* store byte point type */
+ out[0] = ECC_POINT_UNCOMP;
+
+#ifdef WOLFSSL_SMALL_STACK
+ buf = (byte*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER);
+ if (buf == NULL)
+ return MEMORY_E;
+#endif
+
+ /* pad and store x */
+ XMEMSET(buf, 0, ECC_BUFSIZE);
+ ret = mp_to_unsigned_bin(point->x, buf +
+ (numlen - mp_unsigned_bin_size(point->x)));
+ if (ret != MP_OKAY)
+ goto done;
+ XMEMCPY(out+1, buf, numlen);
+
+ /* pad and store y */
+ XMEMSET(buf, 0, ECC_BUFSIZE);
+ ret = mp_to_unsigned_bin(point->y, buf +
+ (numlen - mp_unsigned_bin_size(point->y)));
+ if (ret != MP_OKAY)
+ goto done;
+ XMEMCPY(out+1+numlen, buf, numlen);
+
+ *outLen = 1 + 2*numlen;
+
+done:
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(buf, NULL, DYNAMIC_TYPE_ECC_BUFFER);
+#endif
+
+ return ret;
+}
+
+
+/* export point to der */
+#ifdef HAVE_COMP_KEY
+int wc_ecc_export_point_der_compressed(const int curve_idx, ecc_point* point,
+ byte* out, word32* outLen)
+{
+ int ret = MP_OKAY;
+ word32 numlen;
+ word32 output_len;
+#ifdef WOLFSSL_SMALL_STACK
+ byte* buf;
+#else
+ byte buf[ECC_BUFSIZE];
+#endif
+
+ if ((curve_idx < 0) || (wc_ecc_is_valid_idx(curve_idx) == 0))
+ return ECC_BAD_ARG_E;
+
+ numlen = ecc_sets[curve_idx].size;
+ output_len = 1 + numlen; /* y point type + x */
+
+ /* return length needed only */
+ if (point != NULL && out == NULL && outLen != NULL) {
+ *outLen = output_len;
+ return LENGTH_ONLY_E;
+ }
+
+ if (point == NULL || out == NULL || outLen == NULL)
+ return ECC_BAD_ARG_E;
+
+
+ if (*outLen < output_len) {
+ *outLen = output_len;
+ return BUFFER_E;
+ }
+
+ /* store byte point type */
+ out[0] = mp_isodd(point->y) == MP_YES ? ECC_POINT_COMP_ODD :
+ ECC_POINT_COMP_EVEN;
+
+#ifdef WOLFSSL_SMALL_STACK
+ buf = (byte*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER);
+ if (buf == NULL)
+ return MEMORY_E;
+#endif
+ /* pad and store x */
+ XMEMSET(buf, 0, ECC_BUFSIZE);
+ ret = mp_to_unsigned_bin(point->x, buf +
+ (numlen - mp_unsigned_bin_size(point->x)));
+ if (ret != MP_OKAY)
+ goto done;
+ XMEMCPY(out+1, buf, numlen);
+
+ *outLen = output_len;
+
+done:
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(buf, NULL, DYNAMIC_TYPE_ECC_BUFFER);
+#endif
+
+ return ret;
+}
+#endif /* HAVE_COMP_KEY */
/* export public ECC key in ANSI X9.63 format */
int wc_ecc_export_x963(ecc_key* key, byte* out, word32* outLen)
{
+ int ret = MP_OKAY;
+ word32 numlen;
#ifdef WOLFSSL_SMALL_STACK
byte* buf;
#else
byte buf[ECC_BUFSIZE];
#endif
- word32 numlen;
- int ret = MP_OKAY;
+ word32 pubxlen, pubylen;
/* return length needed only */
if (key != NULL && out == NULL && outLen != NULL) {
- numlen = key->dp->size;
+ /* if key hasn't been setup assume max bytes for size estimation */
+ numlen = key->dp ? key->dp->size : MAX_ECC_BYTES;
*outLen = 1 + 2*numlen;
return LENGTH_ONLY_E;
}
@@ -2283,47 +6629,56 @@ int wc_ecc_export_x963(ecc_key* key, byte* out, word32* outLen)
if (key == NULL || out == NULL || outLen == NULL)
return ECC_BAD_ARG_E;
- if (ecc_is_valid_idx(key->idx) == 0) {
+ if (key->type == ECC_PRIVATEKEY_ONLY)
+ return ECC_PRIVATEONLY_E;
+
+ if (wc_ecc_is_valid_idx(key->idx) == 0 || key->dp == NULL) {
return ECC_BAD_ARG_E;
}
numlen = key->dp->size;
+ /* verify room in out buffer */
if (*outLen < (1 + 2*numlen)) {
*outLen = 1 + 2*numlen;
return BUFFER_E;
}
- /* store byte 0x04 */
- out[0] = 0x04;
+ /* verify public key length is less than key size */
+ pubxlen = mp_unsigned_bin_size(key->pubkey.x);
+ pubylen = mp_unsigned_bin_size(key->pubkey.y);
+ if ((pubxlen > numlen) || (pubylen > numlen)) {
+ WOLFSSL_MSG("Public key x/y invalid!");
+ return BUFFER_E;
+ }
+
+ /* store byte point type */
+ out[0] = ECC_POINT_UNCOMP;
#ifdef WOLFSSL_SMALL_STACK
- buf = (byte*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ buf = (byte*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER);
if (buf == NULL)
return MEMORY_E;
#endif
- do {
- /* pad and store x */
- XMEMSET(buf, 0, ECC_BUFSIZE);
- ret = mp_to_unsigned_bin(key->pubkey.x,
- buf + (numlen - mp_unsigned_bin_size(key->pubkey.x)));
- if (ret != MP_OKAY)
- break;
- XMEMCPY(out+1, buf, numlen);
-
- /* pad and store y */
- XMEMSET(buf, 0, ECC_BUFSIZE);
- ret = mp_to_unsigned_bin(key->pubkey.y,
- buf + (numlen - mp_unsigned_bin_size(key->pubkey.y)));
- if (ret != MP_OKAY)
- break;
- XMEMCPY(out+1+numlen, buf, numlen);
-
- *outLen = 1 + 2*numlen;
- } while (0);
-
+ /* pad and store x */
+ XMEMSET(buf, 0, ECC_BUFSIZE);
+ ret = mp_to_unsigned_bin(key->pubkey.x, buf + (numlen - pubxlen));
+ if (ret != MP_OKAY)
+ goto done;
+ XMEMCPY(out+1, buf, numlen);
+
+ /* pad and store y */
+ XMEMSET(buf, 0, ECC_BUFSIZE);
+ ret = mp_to_unsigned_bin(key->pubkey.y, buf + (numlen - pubylen));
+ if (ret != MP_OKAY)
+ goto done;
+ XMEMCPY(out+1+numlen, buf, numlen);
+
+ *outLen = 1 + 2*numlen;
+
+done:
#ifdef WOLFSSL_SMALL_STACK
- XFREE(buf, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(buf, NULL, DYNAMIC_TYPE_ECC_BUFFER);
#endif
return ret;
@@ -2332,158 +6687,264 @@ int wc_ecc_export_x963(ecc_key* key, byte* out, word32* outLen)
/* export public ECC key in ANSI X9.63 format, extended with
* compression option */
-int wc_ecc_export_x963_ex(ecc_key* key, byte* out, word32* outLen, int compressed)
+int wc_ecc_export_x963_ex(ecc_key* key, byte* out, word32* outLen,
+ int compressed)
{
if (compressed == 0)
return wc_ecc_export_x963(key, out, outLen);
#ifdef HAVE_COMP_KEY
else
return wc_ecc_export_x963_compressed(key, out, outLen);
-#endif
-
+#else
return NOT_COMPILED_IN;
+#endif
}
+#endif /* HAVE_ECC_KEY_EXPORT */
+
+#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL)
-/* is ec point on curve descriped by dp ? */
-static int ecc_is_point(const ecc_set_type* dp, ecc_point* ecp, mp_int* prime)
+/* is ecc point on curve described by dp ? */
+int wc_ecc_is_point(ecc_point* ecp, mp_int* a, mp_int* b, mp_int* prime)
{
- mp_int b, t1, t2;
+#ifndef WOLFSSL_SP_MATH
int err;
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int* t1;
+ mp_int* t2;
+#else
+ mp_int t1[1], t2[1];
+#endif
- if ((err = mp_init_multi(&b, &t1, &t2, NULL, NULL, NULL)) != MP_OKAY) {
- return err;
+#ifdef WOLFSSL_SMALL_STACK
+ t1 = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ if (t1 == NULL)
+ return MEMORY_E;
+ t2 = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ if (t2 == NULL) {
+ XFREE(t1, NULL, DYNAMIC_TYPE_ECC);
+ return MEMORY_E;
}
+#endif
- /* load b */
- err = mp_read_radix(&b, dp->Bf, 16);
+ if ((err = mp_init_multi(t1, t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(t2, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(t1, NULL, DYNAMIC_TYPE_ECC);
+ #endif
+ return err;
+ }
/* compute y^2 */
if (err == MP_OKAY)
- err = mp_sqr(ecp->y, &t1);
+ err = mp_sqr(ecp->y, t1);
/* compute x^3 */
if (err == MP_OKAY)
- err = mp_sqr(ecp->x, &t2);
+ err = mp_sqr(ecp->x, t2);
if (err == MP_OKAY)
- err = mp_mod(&t2, prime, &t2);
+ err = mp_mod(t2, prime, t2);
if (err == MP_OKAY)
- err = mp_mul(ecp->x, &t2, &t2);
+ err = mp_mul(ecp->x, t2, t2);
/* compute y^2 - x^3 */
if (err == MP_OKAY)
- err = mp_sub(&t1, &t2, &t1);
+ err = mp_sub(t1, t2, t1);
- /* compute y^2 - x^3 + 3x */
- if (err == MP_OKAY)
- err = mp_add(&t1, ecp->x, &t1);
- if (err == MP_OKAY)
- err = mp_add(&t1, ecp->x, &t1);
- if (err == MP_OKAY)
- err = mp_add(&t1, ecp->x, &t1);
- if (err == MP_OKAY)
- err = mp_mod(&t1, prime, &t1);
+ /* Determine if curve "a" should be used in calc */
+#ifdef WOLFSSL_CUSTOM_CURVES
+ if (err == MP_OKAY) {
+ /* Use a and prime to determine if a == 3 */
+ err = mp_set(t2, 0);
+ if (err == MP_OKAY)
+ err = mp_submod(prime, a, prime, t2);
+ }
+ if (err == MP_OKAY && mp_cmp_d(t2, 3) != MP_EQ) {
+ /* compute y^2 - x^3 + a*x */
+ if (err == MP_OKAY)
+ err = mp_mulmod(t2, ecp->x, prime, t2);
+ if (err == MP_OKAY)
+ err = mp_addmod(t1, t2, prime, t1);
+ }
+ else
+#endif /* WOLFSSL_CUSTOM_CURVES */
+ {
+ /* assumes "a" == 3 */
+ (void)a;
+
+ /* compute y^2 - x^3 + 3x */
+ if (err == MP_OKAY)
+ err = mp_add(t1, ecp->x, t1);
+ if (err == MP_OKAY)
+ err = mp_add(t1, ecp->x, t1);
+ if (err == MP_OKAY)
+ err = mp_add(t1, ecp->x, t1);
+ if (err == MP_OKAY)
+ err = mp_mod(t1, prime, t1);
+ }
- while (err == MP_OKAY && mp_cmp_d(&t1, 0) == MP_LT) {
- err = mp_add(&t1, prime, &t1);
+ /* adjust range (0, prime) */
+ while (err == MP_OKAY && mp_isneg(t1)) {
+ err = mp_add(t1, prime, t1);
}
- while (err == MP_OKAY && mp_cmp(&t1, prime) != MP_LT) {
- err = mp_sub(&t1, prime, &t1);
+ while (err == MP_OKAY && mp_cmp(t1, prime) != MP_LT) {
+ err = mp_sub(t1, prime, t1);
}
/* compare to b */
if (err == MP_OKAY) {
- if (mp_cmp(&t1, &b) != MP_EQ) {
+ if (mp_cmp(t1, b) != MP_EQ) {
err = MP_VAL;
} else {
err = MP_OKAY;
}
}
- mp_clear(&b);
- mp_clear(&t1);
- mp_clear(&t2);
+ mp_clear(t1);
+ mp_clear(t2);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(t2, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(t1, NULL, DYNAMIC_TYPE_ECC);
+#endif
return err;
-}
+#else
+ (void)a;
+ (void)b;
+#ifndef WOLFSSL_SP_NO_256
+ if (mp_count_bits(prime) == 256) {
+ return sp_ecc_is_point_256(ecp->x, ecp->y);
+ }
+#endif
+#ifdef WOLFSSL_SP_384
+ if (mp_count_bits(prime) == 384) {
+ return sp_ecc_is_point_384(ecp->x, ecp->y);
+ }
+#endif
+ return WC_KEY_SIZE_E;
+#endif
+}
+#ifndef WOLFSSL_SP_MATH
/* validate privkey * generator == pubkey, 0 on success */
-static int ecc_check_privkey_gen(ecc_key* key, mp_int* prime)
+static int ecc_check_privkey_gen(ecc_key* key, mp_int* a, mp_int* prime)
{
+ int err = MP_OKAY;
ecc_point* base = NULL;
ecc_point* res = NULL;
- int err;
+ DECLARE_CURVE_SPECS(curve, 2);
if (key == NULL)
return BAD_FUNC_ARG;
- base = ecc_new_point();
- if (base == NULL)
- return MEMORY_E;
+ ALLOC_CURVE_SPECS(2);
- /* set up base generator */
- err = mp_read_radix(base->x, (char*)key->dp->Gx, 16);
- if (err == MP_OKAY)
- err = mp_read_radix(base->y, (char*)key->dp->Gy, 16);
- if (err == MP_OKAY)
- mp_set(base->z, 1);
+ res = wc_ecc_new_point_h(key->heap);
+ if (res == NULL)
+ err = MEMORY_E;
- if (err == MP_OKAY) {
- res = ecc_new_point();
- if (res == NULL)
+#ifdef WOLFSSL_HAVE_SP_ECC
+#ifndef WOLFSSL_SP_NO_256
+ if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1) {
+ if (err == MP_OKAY) {
+ err = sp_ecc_mulmod_base_256(&key->k, res, 1, key->heap);
+ }
+ }
+ else
+#endif
+#ifdef WOLFSSL_SP_384
+ if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) {
+ if (err == MP_OKAY) {
+ err = sp_ecc_mulmod_base_384(&key->k, res, 1, key->heap);
+ }
+ }
+ else
+#endif
+#endif
+ {
+ base = wc_ecc_new_point_h(key->heap);
+ if (base == NULL)
err = MEMORY_E;
- else {
- err = ecc_mulmod(&key->k, base, res, prime, 1);
- if (err == MP_OKAY) {
- /* compare result to public key */
- if (mp_cmp(res->x, key->pubkey.x) != MP_EQ ||
- mp_cmp(res->y, key->pubkey.y) != MP_EQ ||
- mp_cmp(res->z, key->pubkey.z) != MP_EQ) {
- /* didn't match */
- err = ECC_PRIV_KEY_E;
- }
- }
+
+ if (err == MP_OKAY) {
+ /* load curve info */
+ err = wc_ecc_curve_load(key->dp, &curve,
+ (ECC_CURVE_FIELD_GX | ECC_CURVE_FIELD_GY));
+ }
+
+ /* set up base generator */
+ if (err == MP_OKAY)
+ err = mp_copy(curve->Gx, base->x);
+ if (err == MP_OKAY)
+ err = mp_copy(curve->Gy, base->y);
+ if (err == MP_OKAY)
+ err = mp_set(base->z, 1);
+
+ if (err == MP_OKAY)
+ err = wc_ecc_mulmod_ex(&key->k, base, res, a, prime, 1, key->heap);
+ }
+
+ if (err == MP_OKAY) {
+ /* compare result to public key */
+ if (mp_cmp(res->x, key->pubkey.x) != MP_EQ ||
+ mp_cmp(res->y, key->pubkey.y) != MP_EQ ||
+ mp_cmp(res->z, key->pubkey.z) != MP_EQ) {
+ /* didn't match */
+ err = ECC_PRIV_KEY_E;
}
}
- ecc_del_point(res);
- ecc_del_point(base);
+ wc_ecc_curve_free(curve);
+ wc_ecc_del_point_h(res, key->heap);
+ wc_ecc_del_point_h(base, key->heap);
+ FREE_CURVE_SPECS();
return err;
}
-
+#endif
#ifdef WOLFSSL_VALIDATE_ECC_IMPORT
/* check privkey generator helper, creates prime needed */
static int ecc_check_privkey_gen_helper(ecc_key* key)
{
- mp_int prime;
int err;
+#ifndef WOLFSSL_ATECC508A
+ DECLARE_CURVE_SPECS(curve, 2);
+#endif
if (key == NULL)
return BAD_FUNC_ARG;
- err = mp_init(&prime);
- if (err != MP_OKAY)
- return err;
+#ifdef WOLFSSL_ATECC508A
+ /* Hardware based private key, so this operation is not supported */
+ err = MP_OKAY; /* just report success */
+
+#else
+ ALLOC_CURVE_SPECS(2);
- err = mp_read_radix(&prime, (char*)key->dp->prime, 16);
+ /* load curve info */
+ err = wc_ecc_curve_load(key->dp, &curve,
+ (ECC_CURVE_FIELD_PRIME | ECC_CURVE_FIELD_AF));
+
+ if (err == MP_OKAY)
+ err = ecc_check_privkey_gen(key, curve->Af, curve->prime);
- if (err == MP_OKAY);
- err = ecc_check_privkey_gen(key, &prime);
+ wc_ecc_curve_free(curve);
+ FREE_CURVE_SPECS();
- mp_clear(&prime);
+#endif /* WOLFSSL_ATECC508A */
return err;
}
#endif /* WOLFSSL_VALIDATE_ECC_IMPORT */
-
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || !defined(WOLFSSL_SP_MATH)
/* validate order * pubkey = point at infinity, 0 on success */
-static int ecc_check_pubkey_order(ecc_key* key, mp_int* prime, mp_int* order)
+static int ecc_check_pubkey_order(ecc_key* key, ecc_point* pubkey, mp_int* a,
+ mp_int* prime, mp_int* order)
{
ecc_point* inf = NULL;
int err;
@@ -2491,258 +6952,613 @@ static int ecc_check_pubkey_order(ecc_key* key, mp_int* prime, mp_int* order)
if (key == NULL)
return BAD_FUNC_ARG;
- inf = ecc_new_point();
+ inf = wc_ecc_new_point_h(key->heap);
if (inf == NULL)
err = MEMORY_E;
else {
- err = ecc_mulmod(order, &key->pubkey, inf, prime, 1);
- if (err == MP_OKAY && !ecc_point_is_at_infinity(inf))
+#ifdef WOLFSSL_HAVE_SP_ECC
+#ifndef WOLFSSL_SP_NO_256
+ if (key->idx != ECC_CUSTOM_IDX &&
+ ecc_sets[key->idx].id == ECC_SECP256R1) {
+ err = sp_ecc_mulmod_256(order, pubkey, inf, 1, key->heap);
+ }
+ else
+#endif
+#ifdef WOLFSSL_SP_384
+ if (key->idx != ECC_CUSTOM_IDX &&
+ ecc_sets[key->idx].id == ECC_SECP384R1) {
+ err = sp_ecc_mulmod_384(order, pubkey, inf, 1, key->heap);
+ }
+ else
+#endif
+#endif
+#ifndef WOLFSSL_SP_MATH
+ err = wc_ecc_mulmod_ex(order, pubkey, inf, a, prime, 1, key->heap);
+ if (err == MP_OKAY && !wc_ecc_point_is_at_infinity(inf))
err = ECC_INF_E;
+#else
+ (void)a;
+ (void)prime;
+
+ err = WC_KEY_SIZE_E;
+#endif
}
- ecc_del_point(inf);
+ wc_ecc_del_point_h(inf, key->heap);
return err;
}
+#endif
+#endif /* !WOLFSSL_ATECC508A && !WOLFSSL_CRYPTOCELL*/
+
+#ifdef OPENSSL_EXTRA
+int wc_ecc_get_generator(ecc_point* ecp, int curve_idx)
+{
+ int err = MP_OKAY;
+ DECLARE_CURVE_SPECS(curve, 2);
+
+ if (!ecp || curve_idx < 0 || curve_idx > (int)(ECC_SET_COUNT-1))
+ return BAD_FUNC_ARG;
+ ALLOC_CURVE_SPECS(2);
+
+ err = wc_ecc_curve_load(&ecc_sets[curve_idx], &curve,
+ (ECC_CURVE_FIELD_GX | ECC_CURVE_FIELD_GY));
+ if (err == MP_OKAY)
+ err = mp_copy(curve->Gx, ecp->x);
+ if (err == MP_OKAY)
+ err = mp_copy(curve->Gy, ecp->y);
+ if (err == MP_OKAY)
+ err = mp_set(ecp->z, 1);
+
+ wc_ecc_curve_free(curve);
+ FREE_CURVE_SPECS();
+
+ return err;
+}
+#endif /* OPENSSLALL */
-/* perform sanity checks on ec key validity, 0 on success */
+/* perform sanity checks on ecc key validity, 0 on success */
int wc_ecc_check_key(ecc_key* key)
{
- mp_int prime; /* used by multiple calls so let's cache */
- mp_int order; /* other callers have, so let's gen here */
int err;
+#ifndef WOLFSSL_SP_MATH
+#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL)
+ mp_int* b = NULL;
+#ifdef USE_ECC_B_PARAM
+ DECLARE_CURVE_SPECS(curve, 4);
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ mp_int b_lcl;
+#endif
+ DECLARE_CURVE_SPECS(curve, 3);
+#endif /* USE_ECC_B_PARAM */
+#endif /* WOLFSSL_ATECC508A */
if (key == NULL)
return BAD_FUNC_ARG;
- /* pubkey point cannot be at inifinity */
- if (ecc_point_is_at_infinity(&key->pubkey))
- return ECC_INF_E;
+#if defined(WOLFSSL_ATECC508A) || defined(WOLFSSL_CRYPTOCELL)
- err = mp_init_multi(&prime, &order, NULL, NULL, NULL, NULL);
- if (err != MP_OKAY)
- return err;
+ err = 0; /* consider key check success on ATECC508A */
- err = mp_read_radix(&prime, (char*)key->dp->prime, 16);
+#else
+ #ifdef USE_ECC_B_PARAM
+ ALLOC_CURVE_SPECS(4);
+ #else
+ ALLOC_CURVE_SPECS(3);
+ #ifndef WOLFSSL_SMALL_STACK
+ b = &b_lcl;
+ #else
+ b = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_ECC);
+ if (b == NULL) {
+ FREE_CURVE_SPECS();
+ return MEMORY_E;
+ }
+ #endif
+ XMEMSET(b, 0, sizeof(mp_int));
+ #endif
- /* make sure point is actually on curve */
+ /* SP 800-56Ar3, section 5.6.2.3.3, process step 1 */
+ /* pubkey point cannot be at infinity */
+ if (wc_ecc_point_is_at_infinity(&key->pubkey)) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, key->heap, DYNAMIC_TYPE_ECC);
+ #endif
+ FREE_CURVE_SPECS();
+ return ECC_INF_E;
+ }
+
+ /* load curve info */
+ err = wc_ecc_curve_load(key->dp, &curve, (ECC_CURVE_FIELD_PRIME |
+ ECC_CURVE_FIELD_AF | ECC_CURVE_FIELD_ORDER
+#ifdef USE_ECC_B_PARAM
+ | ECC_CURVE_FIELD_BF
+#endif
+ ));
+
+#ifndef USE_ECC_B_PARAM
+ /* load curve b parameter */
+ if (err == MP_OKAY)
+ err = mp_init(b);
if (err == MP_OKAY)
- err = ecc_is_point(key->dp, &key->pubkey, &prime);
+ err = mp_read_radix(b, key->dp->Bf, MP_RADIX_HEX);
+#else
+ if (err == MP_OKAY)
+ b = curve->Bf;
+#endif
+
+ /* SP 800-56Ar3, section 5.6.2.3.3, process step 2 */
+ /* Qx must be in the range [0, p-1] */
+ if (err == MP_OKAY) {
+ if (mp_cmp(key->pubkey.x, curve->prime) != MP_LT)
+ err = ECC_OUT_OF_RANGE_E;
+ }
+ /* Qy must be in the range [0, p-1] */
+ if (err == MP_OKAY) {
+ if (mp_cmp(key->pubkey.y, curve->prime) != MP_LT)
+ err = ECC_OUT_OF_RANGE_E;
+ }
+
+ /* SP 800-56Ar3, section 5.6.2.3.3, process steps 3 */
+ /* make sure point is actually on curve */
if (err == MP_OKAY)
- err = mp_read_radix(&order, (char*)key->dp->order, 16);
+ err = wc_ecc_is_point(&key->pubkey, curve->Af, b, curve->prime);
+ /* SP 800-56Ar3, section 5.6.2.3.3, process steps 4 */
/* pubkey * order must be at infinity */
if (err == MP_OKAY)
- err = ecc_check_pubkey_order(key, &prime, &order);
+ err = ecc_check_pubkey_order(key, &key->pubkey, curve->Af, curve->prime,
+ curve->order);
+ /* SP 800-56Ar3, section 5.6.2.1.4, method (b) for ECC */
/* private * base generator must equal pubkey */
if (err == MP_OKAY && key->type == ECC_PRIVATEKEY)
- err = ecc_check_privkey_gen(key, &prime);
+ err = ecc_check_privkey_gen(key, curve->Af, curve->prime);
+
+ wc_ecc_curve_free(curve);
+
+#ifndef USE_ECC_B_PARAM
+ mp_clear(b);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, key->heap, DYNAMIC_TYPE_ECC);
+ #endif
+#endif
+
+ FREE_CURVE_SPECS();
+
+#endif /* WOLFSSL_ATECC508A */
+#else
+ if (key == NULL)
+ return BAD_FUNC_ARG;
- mp_clear(&order);
- mp_clear(&prime);
+ /* pubkey point cannot be at infinity */
+#ifndef WOLFSSL_SP_NO_256
+ if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP256R1) {
+ err = sp_ecc_check_key_256(key->pubkey.x, key->pubkey.y, &key->k,
+ key->heap);
+ }
+ else
+#endif
+#ifdef WOLFSSL_SP_384
+ if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) {
+ err = sp_ecc_check_key_384(key->pubkey.x, key->pubkey.y, &key->k,
+ key->heap);
+ }
+ else
+#endif
+ {
+ err = WC_KEY_SIZE_E;
+ }
+#endif
return err;
}
-
+#ifdef HAVE_ECC_KEY_IMPORT
/* import public ECC key in ANSI X9.63 format */
-int wc_ecc_import_x963(const byte* in, word32 inLen, ecc_key* key)
+int wc_ecc_import_x963_ex(const byte* in, word32 inLen, ecc_key* key,
+ int curve_id)
{
- int x, err;
- int compressed = 0;
-
- if (in == NULL || key == NULL)
- return ECC_BAD_ARG_E;
+ int err = MP_OKAY;
+#ifdef HAVE_COMP_KEY
+ int compressed = 0;
+#endif
+ int keysize = 0;
+ byte pointType;
- /* must be odd */
- if ((inLen & 1) == 0) {
- return ECC_BAD_ARG_E;
- }
+ if (in == NULL || key == NULL)
+ return BAD_FUNC_ARG;
- /* init key */
-#ifdef ALT_ECC_SIZE
- key->pubkey.x = (mp_int*)&key->pubkey.xyz[0];
- key->pubkey.y = (mp_int*)&key->pubkey.xyz[1];
- key->pubkey.z = (mp_int*)&key->pubkey.xyz[2];
- alt_fp_init(key->pubkey.x);
- alt_fp_init(key->pubkey.y);
- alt_fp_init(key->pubkey.z);
- err = mp_init(&key->k);
-#else
- err = mp_init_multi(key->pubkey.x, key->pubkey.y, key->pubkey.z, &key->k,
- NULL, NULL);
-#endif
- if (err != MP_OKAY)
- return MEMORY_E;
+ /* must be odd */
+ if ((inLen & 1) == 0) {
+ return ECC_BAD_ARG_E;
+ }
- /* check for 4, 2, or 3 */
- if (in[0] != 0x04 && in[0] != 0x02 && in[0] != 0x03) {
- err = ASN_PARSE_E;
- }
+ /* make sure required variables are reset */
+ wc_ecc_reset(key);
- if (in[0] == 0x02 || in[0] == 0x03) {
-#ifdef HAVE_COMP_KEY
- compressed = 1;
-#else
- err = NOT_COMPILED_IN;
-#endif
- }
+ /* init key */
+ #ifdef ALT_ECC_SIZE
+ key->pubkey.x = (mp_int*)&key->pubkey.xyz[0];
+ key->pubkey.y = (mp_int*)&key->pubkey.xyz[1];
+ key->pubkey.z = (mp_int*)&key->pubkey.xyz[2];
+ alt_fp_init(key->pubkey.x);
+ alt_fp_init(key->pubkey.y);
+ alt_fp_init(key->pubkey.z);
+ err = mp_init(&key->k);
+ #else
+ err = mp_init_multi(&key->k,
+ key->pubkey.x, key->pubkey.y, key->pubkey.z, NULL, NULL);
+ #endif
+ if (err != MP_OKAY)
+ return MEMORY_E;
- if (err == MP_OKAY) {
- /* determine the idx */
+ /* check for point type (4, 2, or 3) */
+ pointType = in[0];
+ if (pointType != ECC_POINT_UNCOMP && pointType != ECC_POINT_COMP_EVEN &&
+ pointType != ECC_POINT_COMP_ODD) {
+ err = ASN_PARSE_E;
+ }
- if (compressed)
- inLen = (inLen-1)*2 + 1; /* used uncompressed len */
+ if (pointType == ECC_POINT_COMP_EVEN || pointType == ECC_POINT_COMP_ODD) {
+ #ifdef HAVE_COMP_KEY
+ compressed = 1;
+ #else
+ err = NOT_COMPILED_IN;
+ #endif
+ }
- for (x = 0; ecc_sets[x].size != 0; x++) {
- if ((unsigned)ecc_sets[x].size >= ((inLen-1)>>1)) {
- break;
- }
- }
- if (ecc_sets[x].size == 0) {
- err = ASN_PARSE_E;
- } else {
- /* set the idx */
- key->idx = x;
- key->dp = &ecc_sets[x];
- key->type = ECC_PUBLICKEY;
- }
- }
+ /* adjust to skip first byte */
+ inLen -= 1;
+ in += 1;
- /* read data */
- if (err == MP_OKAY)
- err = mp_read_unsigned_bin(key->pubkey.x, (byte*)in+1, (inLen-1)>>1);
+#ifdef WOLFSSL_ATECC508A
+ /* For SECP256R1 only save raw public key for hardware */
+ if (curve_id == ECC_SECP256R1 && inLen <= sizeof(key->pubkey_raw)) {
+ #ifdef HAVE_COMP_KEY
+ if (!compressed)
+ #endif
+ XMEMCPY(key->pubkey_raw, (byte*)in, inLen);
+ }
+#endif
-#ifdef HAVE_COMP_KEY
- if (err == MP_OKAY && compressed == 1) { /* build y */
- mp_int t1, t2, prime, a, b;
+ if (err == MP_OKAY) {
+ #ifdef HAVE_COMP_KEY
+ /* adjust inLen if compressed */
+ if (compressed)
+ inLen = inLen*2 + 1; /* used uncompressed len */
+ #endif
- if (mp_init_multi(&t1, &t2, &prime, &a, &b, NULL) != MP_OKAY)
- err = MEMORY_E;
+ /* determine key size */
+ keysize = (inLen>>1);
+ err = wc_ecc_set_curve(key, keysize, curve_id);
+ key->type = ECC_PUBLICKEY;
+ }
- /* load prime */
- if (err == MP_OKAY)
- err = mp_read_radix(&prime, (char *)key->dp->prime, 16);
+ /* read data */
+ if (err == MP_OKAY)
+ err = mp_read_unsigned_bin(key->pubkey.x, (byte*)in, keysize);
- /* load a */
- if (err == MP_OKAY)
- err = mp_read_radix(&a, (char *)key->dp->Af, 16);
+#ifdef HAVE_COMP_KEY
+ if (err == MP_OKAY && compressed == 1) { /* build y */
+#ifndef WOLFSSL_SP_MATH
+ mp_int t1, t2;
+ int did_init = 0;
+
+ DECLARE_CURVE_SPECS(curve, 3);
+ ALLOC_CURVE_SPECS(3);
+
+ if (mp_init_multi(&t1, &t2, NULL, NULL, NULL, NULL) != MP_OKAY)
+ err = MEMORY_E;
+ else
+ did_init = 1;
- /* load b */
+ /* load curve info */
if (err == MP_OKAY)
- err = mp_read_radix(&b, (char *)key->dp->Bf, 16);
+ err = wc_ecc_curve_load(key->dp, &curve,
+ (ECC_CURVE_FIELD_PRIME | ECC_CURVE_FIELD_AF |
+ ECC_CURVE_FIELD_BF));
/* compute x^3 */
if (err == MP_OKAY)
err = mp_sqr(key->pubkey.x, &t1);
-
if (err == MP_OKAY)
- err = mp_mulmod(&t1, key->pubkey.x, &prime, &t1);
+ err = mp_mulmod(&t1, key->pubkey.x, curve->prime, &t1);
/* compute x^3 + a*x */
if (err == MP_OKAY)
- err = mp_mulmod(&a, key->pubkey.x, &prime, &t2);
-
+ err = mp_mulmod(curve->Af, key->pubkey.x, curve->prime, &t2);
if (err == MP_OKAY)
err = mp_add(&t1, &t2, &t1);
/* compute x^3 + a*x + b */
if (err == MP_OKAY)
- err = mp_add(&t1, &b, &t1);
+ err = mp_add(&t1, curve->Bf, &t1);
/* compute sqrt(x^3 + a*x + b) */
if (err == MP_OKAY)
- err = mp_sqrtmod_prime(&t1, &prime, &t2);
+ err = mp_sqrtmod_prime(&t1, curve->prime, &t2);
/* adjust y */
if (err == MP_OKAY) {
- if ((mp_isodd(&t2) && in[0] == 0x03) ||
- (!mp_isodd(&t2) && in[0] == 0x02)) {
- err = mp_mod(&t2, &prime, key->pubkey.y);
+ if ((mp_isodd(&t2) == MP_YES && pointType == ECC_POINT_COMP_ODD) ||
+ (mp_isodd(&t2) == MP_NO && pointType == ECC_POINT_COMP_EVEN)) {
+ err = mp_mod(&t2, curve->prime, &t2);
}
else {
- err = mp_submod(&prime, &t2, &prime, key->pubkey.y);
+ err = mp_submod(curve->prime, &t2, curve->prime, &t2);
}
+ if (err == MP_OKAY)
+ err = mp_copy(&t2, key->pubkey.y);
}
- mp_clear(&a);
- mp_clear(&b);
- mp_clear(&prime);
- mp_clear(&t2);
- mp_clear(&t1);
- }
+ if (did_init) {
+ mp_clear(&t2);
+ mp_clear(&t1);
+ }
+
+ wc_ecc_curve_free(curve);
+ FREE_CURVE_SPECS();
+#else
+ #ifndef WOLFSSL_SP_NO_256
+ if (key->dp->id == ECC_SECP256R1) {
+ sp_ecc_uncompress_256(key->pubkey.x, pointType, key->pubkey.y);
+ }
+ else
+ #endif
+ #ifdef WOLFSSL_SP_384
+ if (key->dp->id == ECC_SECP384R1) {
+ sp_ecc_uncompress_384(key->pubkey.x, pointType, key->pubkey.y);
+ }
+ else
+ #endif
+ {
+ err = WC_KEY_SIZE_E;
+ }
#endif
+ }
+#endif /* HAVE_COMP_KEY */
- if (err == MP_OKAY && compressed == 0)
- err = mp_read_unsigned_bin(key->pubkey.y, (byte*)in+1+((inLen-1)>>1),
- (inLen-1)>>1);
- if (err == MP_OKAY)
- mp_set(key->pubkey.z, 1);
+ if (err == MP_OKAY) {
+ #ifdef HAVE_COMP_KEY
+ if (compressed == 0)
+ #endif
+ {
+ err = mp_read_unsigned_bin(key->pubkey.y, (byte*)in + keysize,
+ keysize);
+ }
+ }
+ if (err == MP_OKAY)
+ err = mp_set(key->pubkey.z, 1);
#ifdef WOLFSSL_VALIDATE_ECC_IMPORT
- if (err == MP_OKAY)
- err = wc_ecc_check_key(key);
+ if (err == MP_OKAY)
+ err = wc_ecc_check_key(key);
#endif
- if (err != MP_OKAY) {
- mp_clear(key->pubkey.x);
- mp_clear(key->pubkey.y);
- mp_clear(key->pubkey.z);
- mp_clear(&key->k);
- }
+ if (err != MP_OKAY) {
+ mp_clear(key->pubkey.x);
+ mp_clear(key->pubkey.y);
+ mp_clear(key->pubkey.z);
+ mp_clear(&key->k);
+ }
- return err;
+ return err;
+}
+
+WOLFSSL_ABI
+int wc_ecc_import_x963(const byte* in, word32 inLen, ecc_key* key)
+{
+ return wc_ecc_import_x963_ex(in, inLen, key, ECC_CURVE_DEF);
+}
+#endif /* HAVE_ECC_KEY_IMPORT */
+
+#ifdef HAVE_ECC_KEY_EXPORT
+
+/* export ecc key to component form, d is optional if only exporting public
+ * encType is WC_TYPE_UNSIGNED_BIN or WC_TYPE_HEX_STR
+ * return MP_OKAY on success */
+int wc_ecc_export_ex(ecc_key* key, byte* qx, word32* qxLen,
+ byte* qy, word32* qyLen, byte* d, word32* dLen, int encType)
+{
+ int err = 0;
+ word32 keySz;
+
+ if (key == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ if (wc_ecc_is_valid_idx(key->idx) == 0) {
+ return ECC_BAD_ARG_E;
+ }
+ keySz = key->dp->size;
+
+ /* private key, d */
+ if (d != NULL) {
+ if (dLen == NULL ||
+ (key->type != ECC_PRIVATEKEY && key->type != ECC_PRIVATEKEY_ONLY))
+ return BAD_FUNC_ARG;
+
+ #ifdef WOLFSSL_ATECC508A
+ /* Hardware cannot export private portion */
+ return NOT_COMPILED_IN;
+ #else
+ err = wc_export_int(&key->k, d, dLen, keySz, encType);
+ if (err != MP_OKAY)
+ return err;
+ #endif
+ }
+
+ /* public x component */
+ if (qx != NULL) {
+ if (qxLen == NULL || key->type == ECC_PRIVATEKEY_ONLY)
+ return BAD_FUNC_ARG;
+
+ err = wc_export_int(key->pubkey.x, qx, qxLen, keySz, encType);
+ if (err != MP_OKAY)
+ return err;
+ }
+
+ /* public y component */
+ if (qy != NULL) {
+ if (qyLen == NULL || key->type == ECC_PRIVATEKEY_ONLY)
+ return BAD_FUNC_ARG;
+
+ err = wc_export_int(key->pubkey.y, qy, qyLen, keySz, encType);
+ if (err != MP_OKAY)
+ return err;
+ }
+
+ return err;
}
-/* export ecc private key only raw, outLen is in/out size
+/* export ecc private key only raw, outLen is in/out size as unsigned bin
return MP_OKAY on success */
int wc_ecc_export_private_only(ecc_key* key, byte* out, word32* outLen)
{
- word32 numlen;
+ if (out == NULL || outLen == NULL) {
+ return BAD_FUNC_ARG;
+ }
- if (key == NULL || out == NULL || outLen == NULL)
- return ECC_BAD_ARG_E;
+ return wc_ecc_export_ex(key, NULL, NULL, NULL, NULL, out, outLen,
+ WC_TYPE_UNSIGNED_BIN);
+}
- if (ecc_is_valid_idx(key->idx) == 0) {
- return ECC_BAD_ARG_E;
- }
- numlen = key->dp->size;
+/* export public key to raw elements including public (Qx,Qy) as unsigned bin
+ * return MP_OKAY on success, negative on error */
+int wc_ecc_export_public_raw(ecc_key* key, byte* qx, word32* qxLen,
+ byte* qy, word32* qyLen)
+{
+ if (qx == NULL || qxLen == NULL || qy == NULL || qyLen == NULL) {
+ return BAD_FUNC_ARG;
+ }
- if (*outLen < numlen) {
- *outLen = numlen;
- return BUFFER_E;
- }
- *outLen = numlen;
- XMEMSET(out, 0, *outLen);
- return mp_to_unsigned_bin(&key->k, out + (numlen -
- mp_unsigned_bin_size(&key->k)));
+ return wc_ecc_export_ex(key, qx, qxLen, qy, qyLen, NULL, NULL,
+ WC_TYPE_UNSIGNED_BIN);
}
+/* export ecc key to raw elements including public (Qx,Qy) and
+ * private (d) as unsigned bin
+ * return MP_OKAY on success, negative on error */
+int wc_ecc_export_private_raw(ecc_key* key, byte* qx, word32* qxLen,
+ byte* qy, word32* qyLen, byte* d, word32* dLen)
+{
+ return wc_ecc_export_ex(key, qx, qxLen, qy, qyLen, d, dLen,
+ WC_TYPE_UNSIGNED_BIN);
+}
-/* ecc private key import, public key in ANSI X9.63 format, private raw */
-int wc_ecc_import_private_key(const byte* priv, word32 privSz, const byte* pub,
- word32 pubSz, ecc_key* key)
+#endif /* HAVE_ECC_KEY_EXPORT */
+
+#ifdef HAVE_ECC_KEY_IMPORT
+/* import private key, public part optional if (pub) passed as NULL */
+int wc_ecc_import_private_key_ex(const byte* priv, word32 privSz,
+ const byte* pub, word32 pubSz, ecc_key* key,
+ int curve_id)
{
- int ret = wc_ecc_import_x963(pub, pubSz, key);
+ int ret;
+#if defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_ATECC508A)
+ const CRYS_ECPKI_Domain_t* pDomain;
+ CRYS_ECPKI_BUILD_TempData_t tempBuff;
+#endif
+ if (key == NULL || priv == NULL)
+ return BAD_FUNC_ARG;
+
+ /* public optional, NULL if only importing private */
+ if (pub != NULL) {
+ #ifndef NO_ASN
+ word32 idx = 0;
+ ret = wc_ecc_import_x963_ex(pub, pubSz, key, curve_id);
+ if (ret < 0)
+ ret = wc_EccPublicKeyDecode(pub, &idx, key, pubSz);
+ key->type = ECC_PRIVATEKEY;
+ #else
+ ret = NOT_COMPILED_IN;
+ #endif
+ }
+ else {
+ /* make sure required variables are reset */
+ wc_ecc_reset(key);
+
+ /* set key size */
+ ret = wc_ecc_set_curve(key, privSz, curve_id);
+ key->type = ECC_PRIVATEKEY_ONLY;
+ }
+
if (ret != 0)
return ret;
- key->type = ECC_PRIVATEKEY;
+#ifdef WOLFSSL_ATECC508A
+ /* Hardware does not support loading private keys */
+ return NOT_COMPILED_IN;
+#elif defined(WOLFSSL_CRYPTOCELL)
+ pDomain = CRYS_ECPKI_GetEcDomain(cc310_mapCurve(curve_id));
+
+ if (pub != NULL && pub[0] != '\0') {
+ /* create public key from external key buffer */
+ ret = CRYS_ECPKI_BuildPublKeyFullCheck(pDomain,
+ (byte*)pub,
+ pubSz,
+ &key->ctx.pubKey,
+ &tempBuff);
+
+ if (ret != SA_SILIB_RET_OK){
+ WOLFSSL_MSG("CRYS_ECPKI_BuildPublKeyFullCheck failed");
+ return ret;
+ }
+ }
+ /* import private key */
+ if (priv != NULL && priv[0] != '\0') {
+
+ /* Create private key from external key buffer*/
+ ret = CRYS_ECPKI_BuildPrivKey(pDomain,
+ priv,
+ privSz,
+ &key->ctx.privKey);
+
+ if (ret != SA_SILIB_RET_OK) {
+ WOLFSSL_MSG("CRYS_ECPKI_BuildPrivKey failed");
+ return ret;
+ }
+
+ ret = mp_read_unsigned_bin(&key->k, priv, privSz);
+ }
+
+#else
ret = mp_read_unsigned_bin(&key->k, priv, privSz);
+#ifdef HAVE_WOLF_BIGINT
+ if (ret == 0 &&
+ wc_bigint_from_unsigned_bin(&key->k.raw, priv, privSz) != 0) {
+ mp_clear(&key->k);
+ ret = ASN_GETINT_E;
+ }
+#endif /* HAVE_WOLF_BIGINT */
+
+
+#endif /* WOLFSSL_ATECC508A */
#ifdef WOLFSSL_VALIDATE_ECC_IMPORT
- if (ret == MP_OKAY)
+ if ((pub != NULL) && (ret == MP_OKAY))
+ /* public key needed to perform key validation */
ret = ecc_check_privkey_gen_helper(key);
#endif
return ret;
}
+/* ecc private key import, public key in ANSI X9.63 format, private raw */
+int wc_ecc_import_private_key(const byte* priv, word32 privSz, const byte* pub,
+ word32 pubSz, ecc_key* key)
+{
+ return wc_ecc_import_private_key_ex(priv, privSz, pub, pubSz, key,
+ ECC_CURVE_DEF);
+}
+#endif /* HAVE_ECC_KEY_IMPORT */
+
+#ifndef NO_ASN
/**
Convert ECC R,S to signature
r R component of signature
@@ -2754,53 +7570,232 @@ int wc_ecc_import_private_key(const byte* priv, word32 privSz, const byte* pub,
int wc_ecc_rs_to_sig(const char* r, const char* s, byte* out, word32* outlen)
{
int err;
- mp_int rtmp;
- mp_int stmp;
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int* rtmp = NULL;
+ mp_int* stmp = NULL;
+#else
+ mp_int rtmp[1];
+ mp_int stmp[1];
+#endif
if (r == NULL || s == NULL || out == NULL || outlen == NULL)
return ECC_BAD_ARG_E;
- err = mp_init_multi(&rtmp, &stmp, NULL, NULL, NULL, NULL);
- if (err != MP_OKAY)
+#ifdef WOLFSSL_SMALL_STACK
+ rtmp = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ if (rtmp == NULL)
+ return MEMORY_E;
+ stmp = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ if (stmp == NULL) {
+ XFREE(rtmp, NULL, DYNAMIC_TYPE_ECC);
+ return MEMORY_E;
+ }
+#endif
+
+ err = mp_init_multi(rtmp, stmp, NULL, NULL, NULL, NULL);
+ if (err != MP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(stmp, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(rtmp, NULL, DYNAMIC_TYPE_ECC);
+ #endif
+ return err;
+ }
+
+ err = mp_read_radix(rtmp, r, MP_RADIX_HEX);
+ if (err == MP_OKAY)
+ err = mp_read_radix(stmp, s, MP_RADIX_HEX);
+
+ /* convert mp_ints to ECDSA sig, initializes rtmp and stmp internally */
+ if (err == MP_OKAY)
+ err = StoreECC_DSA_Sig(out, outlen, rtmp, stmp);
+
+ if (err == MP_OKAY) {
+ if (mp_iszero(rtmp) == MP_YES || mp_iszero(stmp) == MP_YES)
+ err = MP_ZERO_E;
+ }
+
+ mp_clear(rtmp);
+ mp_clear(stmp);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(stmp, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(rtmp, NULL, DYNAMIC_TYPE_ECC);
+#endif
+
+ return err;
+}
+
+/**
+ Convert ECC R,S raw unsigned bin to signature
+ r R component of signature
+ rSz R size
+ s S component of signature
+ sSz S size
+ out DER-encoded ECDSA signature
+ outlen [in/out] output buffer size, output signature size
+ return MP_OKAY on success
+*/
+int wc_ecc_rs_raw_to_sig(const byte* r, word32 rSz, const byte* s, word32 sSz,
+ byte* out, word32* outlen)
+{
+ int err;
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int* rtmp = NULL;
+ mp_int* stmp = NULL;
+#else
+ mp_int rtmp[1];
+ mp_int stmp[1];
+#endif
+
+ if (r == NULL || s == NULL || out == NULL || outlen == NULL)
+ return ECC_BAD_ARG_E;
+
+#ifdef WOLFSSL_SMALL_STACK
+ rtmp = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ if (rtmp == NULL)
+ return MEMORY_E;
+ stmp = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ if (stmp == NULL) {
+ XFREE(rtmp, NULL, DYNAMIC_TYPE_ECC);
+ return MEMORY_E;
+ }
+#endif
+
+ err = mp_init_multi(rtmp, stmp, NULL, NULL, NULL, NULL);
+ if (err != MP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(stmp, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(rtmp, NULL, DYNAMIC_TYPE_ECC);
+ #endif
return err;
+ }
- err = mp_read_radix(&rtmp, r, 16);
+ err = mp_read_unsigned_bin(rtmp, r, rSz);
if (err == MP_OKAY)
- err = mp_read_radix(&stmp, s, 16);
+ err = mp_read_unsigned_bin(stmp, s, sSz);
/* convert mp_ints to ECDSA sig, initializes rtmp and stmp internally */
if (err == MP_OKAY)
- err = StoreECC_DSA_Sig(out, outlen, &rtmp, &stmp);
+ err = StoreECC_DSA_Sig(out, outlen, rtmp, stmp);
if (err == MP_OKAY) {
- if (mp_iszero(&rtmp) || mp_iszero(&stmp))
+ if (mp_iszero(rtmp) == MP_YES || mp_iszero(stmp) == MP_YES)
err = MP_ZERO_E;
}
- mp_clear(&rtmp);
- mp_clear(&stmp);
+ mp_clear(rtmp);
+ mp_clear(stmp);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(stmp, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(rtmp, NULL, DYNAMIC_TYPE_ECC);
+#endif
return err;
}
/**
- Import raw ECC key
- key The destination ecc_key structure
- qx x component of base point, as ASCII hex string
- qy y component of base point, as ASCII hex string
- d private key, as ASCII hex string
- curveName ECC curve name, from ecc_sets[]
- return MP_OKAY on success
+ Convert ECC signature to R,S
+ sig DER-encoded ECDSA signature
+ sigLen length of signature in octets
+ r R component of signature
+ rLen [in/out] output "r" buffer size, output "r" size
+ s S component of signature
+ sLen [in/out] output "s" buffer size, output "s" size
+ return MP_OKAY on success, negative on error
*/
-int wc_ecc_import_raw(ecc_key* key, const char* qx, const char* qy,
- const char* d, const char* curveName)
+int wc_ecc_sig_to_rs(const byte* sig, word32 sigLen, byte* r, word32* rLen,
+ byte* s, word32* sLen)
{
- int err, x;
+ int err;
+ int tmp_valid = 0;
+ word32 x = 0;
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int* rtmp = NULL;
+ mp_int* stmp = NULL;
+#else
+ mp_int rtmp[1];
+ mp_int stmp[1];
+#endif
- if (key == NULL || qx == NULL || qy == NULL || d == NULL ||
- curveName == NULL)
+ if (sig == NULL || r == NULL || rLen == NULL || s == NULL || sLen == NULL)
return ECC_BAD_ARG_E;
+#ifdef WOLFSSL_SMALL_STACK
+ rtmp = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ if (rtmp == NULL)
+ return MEMORY_E;
+ stmp = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_ECC);
+ if (stmp == NULL) {
+ XFREE(rtmp, NULL, DYNAMIC_TYPE_ECC);
+ return MEMORY_E;
+ }
+#endif
+
+ err = DecodeECC_DSA_Sig(sig, sigLen, rtmp, stmp);
+
+ /* rtmp and stmp are initialized */
+ if (err == MP_OKAY) {
+ tmp_valid = 1;
+
+ /* extract r */
+ x = mp_unsigned_bin_size(rtmp);
+ if (*rLen < x)
+ err = BUFFER_E;
+ }
+ if (err == MP_OKAY) {
+ *rLen = x;
+ err = mp_to_unsigned_bin(rtmp, r);
+ }
+
+ /* extract s */
+ if (err == MP_OKAY) {
+ x = mp_unsigned_bin_size(stmp);
+ if (*sLen < x)
+ err = BUFFER_E;
+
+ if (err == MP_OKAY) {
+ *sLen = x;
+ err = mp_to_unsigned_bin(stmp, s);
+ }
+ }
+
+ if (tmp_valid) {
+ mp_clear(rtmp);
+ mp_clear(stmp);
+ }
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(stmp, NULL, DYNAMIC_TYPE_ECC);
+ XFREE(rtmp, NULL, DYNAMIC_TYPE_ECC);
+#endif
+
+ return err;
+}
+#endif /* !NO_ASN */
+
+#ifdef HAVE_ECC_KEY_IMPORT
+static int wc_ecc_import_raw_private(ecc_key* key, const char* qx,
+ const char* qy, const char* d, int curve_id, int encType)
+{
+ int err = MP_OKAY;
+#if defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_ATECC508A)
+ const CRYS_ECPKI_Domain_t* pDomain;
+ CRYS_ECPKI_BUILD_TempData_t tempBuff;
+ byte key_raw[ECC_MAX_CRYPTO_HW_SIZE*2 + 1];
+ word32 keySz = 0;
+#endif
+ /* if d is NULL, only import as public key using Qx,Qy */
+ if (key == NULL || qx == NULL || qy == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* make sure required variables are reset */
+ wc_ecc_reset(key);
+
+ /* set curve type and index */
+ err = wc_ecc_set_curve(key, 0, curve_id);
+ if (err != 0) {
+ return err;
+ }
+
/* init key */
#ifdef ALT_ECC_SIZE
key->pubkey.x = (mp_int*)&key->pubkey.xyz[0];
@@ -2811,45 +7806,120 @@ int wc_ecc_import_raw(ecc_key* key, const char* qx, const char* qy,
alt_fp_init(key->pubkey.z);
err = mp_init(&key->k);
#else
- err = mp_init_multi(key->pubkey.x, key->pubkey.y, key->pubkey.z, &key->k,
- NULL, NULL);
+ err = mp_init_multi(&key->k, key->pubkey.x, key->pubkey.y, key->pubkey.z,
+ NULL, NULL);
#endif
if (err != MP_OKAY)
return MEMORY_E;
/* read Qx */
- if (err == MP_OKAY)
- err = mp_read_radix(key->pubkey.x, qx, 16);
+ if (err == MP_OKAY) {
+ if (encType == WC_TYPE_HEX_STR)
+ err = mp_read_radix(key->pubkey.x, qx, MP_RADIX_HEX);
+ else
+ err = mp_read_unsigned_bin(key->pubkey.x, (const byte*)qx,
+ key->dp->size);
+ }
/* read Qy */
- if (err == MP_OKAY)
- err = mp_read_radix(key->pubkey.y, qy, 16);
+ if (err == MP_OKAY) {
+ if (encType == WC_TYPE_HEX_STR)
+ err = mp_read_radix(key->pubkey.y, qy, MP_RADIX_HEX);
+ else
+ err = mp_read_unsigned_bin(key->pubkey.y, (const byte*)qy,
+ key->dp->size);
- if (err == MP_OKAY)
- mp_set(key->pubkey.z, 1);
+ }
- /* read and set the curve */
+ if (err == MP_OKAY)
+ err = mp_set(key->pubkey.z, 1);
+
+#ifdef WOLFSSL_ATECC508A
+ /* For SECP256R1 only save raw public key for hardware */
+ if (err == MP_OKAY && curve_id == ECC_SECP256R1) {
+ word32 keySz = key->dp->size;
+ err = wc_export_int(key->pubkey.x, key->pubkey_raw,
+ &keySz, keySz, WC_TYPE_UNSIGNED_BIN);
+ if (err == MP_OKAY)
+ err = wc_export_int(key->pubkey.y, &key->pubkey_raw[keySz],
+ &keySz, keySz, WC_TYPE_UNSIGNED_BIN);
+ }
+#elif defined(WOLFSSL_CRYPTOCELL)
if (err == MP_OKAY) {
- for (x = 0; ecc_sets[x].size != 0; x++) {
- if (XSTRNCMP(ecc_sets[x].name, curveName,
- XSTRLEN(curveName)) == 0) {
- break;
- }
+ key_raw[0] = ECC_POINT_UNCOMP;
+ keySz = (word32)key->dp->size;
+ err = wc_export_int(key->pubkey.x, &key_raw[1], &keySz, keySz,
+ WC_TYPE_UNSIGNED_BIN);
+ if (err == MP_OKAY) {
+ err = wc_export_int(key->pubkey.y, &key_raw[1+keySz],
+ &keySz, keySz, WC_TYPE_UNSIGNED_BIN);
}
- if (ecc_sets[x].size == 0) {
- err = ASN_PARSE_E;
- } else {
- /* set the curve */
- key->idx = x;
- key->dp = &ecc_sets[x];
- key->type = ECC_PUBLICKEY;
+
+ if (err == MP_OKAY) {
+ pDomain = CRYS_ECPKI_GetEcDomain(cc310_mapCurve(curve_id));
+
+ /* create public key from external key buffer */
+ err = CRYS_ECPKI_BuildPublKeyFullCheck(pDomain,
+ key_raw,
+ keySz*2 + 1,
+ &key->ctx.pubKey,
+ &tempBuff);
+ }
+
+ if (err != SA_SILIB_RET_OK){
+ WOLFSSL_MSG("CRYS_ECPKI_BuildPublKeyFullCheck failed");
+ return err;
}
}
+#endif
+
/* import private key */
if (err == MP_OKAY) {
- key->type = ECC_PRIVATEKEY;
- err = mp_read_radix(&key->k, d, 16);
+ if (d != NULL && d[0] != '\0') {
+ #ifdef WOLFSSL_ATECC508A
+ /* Hardware doesn't support loading private key */
+ err = NOT_COMPILED_IN;
+
+ #elif defined(WOLFSSL_CRYPTOCELL)
+
+ key->type = ECC_PRIVATEKEY;
+
+ if (encType == WC_TYPE_HEX_STR)
+ err = mp_read_radix(&key->k, d, MP_RADIX_HEX);
+ else
+ err = mp_read_unsigned_bin(&key->k, (const byte*)d,
+ key->dp->size);
+ if (err == MP_OKAY) {
+ err = wc_export_int(&key->k, &key_raw[0], &keySz, keySz,
+ WC_TYPE_UNSIGNED_BIN);
+ }
+
+ if (err == MP_OKAY) {
+ /* Create private key from external key buffer*/
+ err = CRYS_ECPKI_BuildPrivKey(pDomain,
+ key_raw,
+ keySz,
+ &key->ctx.privKey);
+
+ if (err != SA_SILIB_RET_OK){
+ WOLFSSL_MSG("CRYS_ECPKI_BuildPrivKey failed");
+ return err;
+ }
+ }
+
+ #else
+ key->type = ECC_PRIVATEKEY;
+
+ if (encType == WC_TYPE_HEX_STR)
+ err = mp_read_radix(&key->k, d, MP_RADIX_HEX);
+ else
+ err = mp_read_unsigned_bin(&key->k, (const byte*)d,
+ key->dp->size);
+ #endif /* WOLFSSL_ATECC508A */
+ } else {
+ key->type = ECC_PUBLICKEY;
+ }
}
#ifdef WOLFSSL_VALIDATE_ECC_IMPORT
@@ -2867,25 +7937,127 @@ int wc_ecc_import_raw(ecc_key* key, const char* qx, const char* qy,
return err;
}
+/**
+ Import raw ECC key
+ key The destination ecc_key structure
+ qx x component of the public key, as ASCII hex string
+ qy y component of the public key, as ASCII hex string
+ d private key, as ASCII hex string, optional if importing public
+ key only
+ dp Custom ecc_set_type
+ return MP_OKAY on success
+*/
+int wc_ecc_import_raw_ex(ecc_key* key, const char* qx, const char* qy,
+ const char* d, int curve_id)
+{
+ return wc_ecc_import_raw_private(key, qx, qy, d, curve_id,
+ WC_TYPE_HEX_STR);
+
+}
+
+/* Import x, y and optional private (d) as unsigned binary */
+int wc_ecc_import_unsigned(ecc_key* key, byte* qx, byte* qy,
+ byte* d, int curve_id)
+{
+ return wc_ecc_import_raw_private(key, (const char*)qx, (const char*)qy,
+ (const char*)d, curve_id, WC_TYPE_UNSIGNED_BIN);
+}
+
+/**
+ Import raw ECC key
+ key The destination ecc_key structure
+ qx x component of the public key, as ASCII hex string
+ qy y component of the public key, as ASCII hex string
+ d private key, as ASCII hex string, optional if importing public
+ key only
+ curveName ECC curve name, from ecc_sets[]
+ return MP_OKAY on success
+*/
+int wc_ecc_import_raw(ecc_key* key, const char* qx, const char* qy,
+ const char* d, const char* curveName)
+{
+ int err, x;
+
+ /* if d is NULL, only import as public key using Qx,Qy */
+ if (key == NULL || qx == NULL || qy == NULL || curveName == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* set curve type and index */
+ for (x = 0; ecc_sets[x].size != 0; x++) {
+ if (XSTRNCMP(ecc_sets[x].name, curveName,
+ XSTRLEN(curveName)) == 0) {
+ break;
+ }
+ }
+
+ if (ecc_sets[x].size == 0) {
+ WOLFSSL_MSG("ecc_set curve name not found");
+ err = ASN_PARSE_E;
+ } else {
+ return wc_ecc_import_raw_private(key, qx, qy, d, ecc_sets[x].id,
+ WC_TYPE_HEX_STR);
+ }
+
+ return err;
+}
+#endif /* HAVE_ECC_KEY_IMPORT */
/* key size in octets */
int wc_ecc_size(ecc_key* key)
{
- if (key == NULL) return 0;
+ if (key == NULL)
+ return 0;
return key->dp->size;
}
+/* maximum signature size based on key size */
+int wc_ecc_sig_size_calc(int sz)
+{
+ int maxSigSz = 0;
+
+ /* calculate based on key bits */
+ /* maximum possible signature header size is 7 bytes plus 2 bytes padding */
+ maxSigSz = (sz * 2) + SIG_HEADER_SZ + ECC_MAX_PAD_SZ;
-/* worst case estimate, check actual return from wc_ecc_sign_hash for actual value
- of signature size in octets */
+ /* if total length is less than 128 + SEQ(1)+LEN(1) then subtract 1 */
+ if (maxSigSz < (128 + 2)) {
+ maxSigSz -= 1;
+ }
+
+ return maxSigSz;
+}
+
+/* maximum signature size based on actual key curve */
int wc_ecc_sig_size(ecc_key* key)
{
- int sz = wc_ecc_size(key);
- if (sz <= 0)
- return sz;
+ int maxSigSz;
+ int orderBits, keySz;
+
+ if (key == NULL || key->dp == NULL)
+ return 0;
+
+ /* the signature r and s will always be less than order */
+ /* if the order MSB (top bit of byte) is set then ASN encoding needs
+ extra byte for r and s, so add 2 */
+ keySz = key->dp->size;
+ orderBits = wc_ecc_get_curve_order_bit_count(key->dp);
+ if (orderBits > keySz * 8) {
+ keySz = (orderBits + 7) / 8;
+ }
+ /* maximum possible signature header size is 7 bytes */
+ maxSigSz = (keySz * 2) + SIG_HEADER_SZ;
+ if ((orderBits % 8) == 0) {
+ /* MSB can be set, so add 2 */
+ maxSigSz += ECC_MAX_PAD_SZ;
+ }
+ /* if total length is less than 128 + SEQ(1)+LEN(1) then subtract 1 */
+ if (maxSigSz < (128 + 2)) {
+ maxSigSz -= 1;
+ }
- return sz * 2 + SIG_HEADER_SZ + 4; /* (4) worst case estimate */
+ return maxSigSz;
}
@@ -2894,7 +8066,7 @@ int wc_ecc_sig_size(ecc_key* key)
/* fixed point ECC cache */
/* number of entries in the cache */
#ifndef FP_ENTRIES
- #define FP_ENTRIES 16
+ #define FP_ENTRIES 15
#endif
/* number of bits in LUT */
@@ -2914,10 +8086,12 @@ int wc_ecc_sig_size(ecc_key* key)
#endif
+#ifndef WOLFSSL_SP_MATH
+
/** Our FP cache */
typedef struct {
ecc_point* g; /* cached COPY of base point */
- ecc_point* LUT[1U<<FP_LUT]; /* fixed point lookup */
+ ecc_point* LUT[1U<<FP_LUT]; /* fixed point lookup */
mp_int mu; /* copy of the montgomery constant */
int lru_count; /* amount of times this entry has been used */
int lock; /* flag to indicate cache eviction */
@@ -2936,524 +8110,524 @@ static THREAD_LS_T fp_cache_t fp_cache[FP_ENTRIES];
static const struct {
int ham, terma, termb;
} lut_orders[] = {
- { 0, 0, 0 }, { 1, 0, 0 }, { 1, 0, 0 }, { 2, 1, 2 }, { 1, 0, 0 }, { 2, 1, 4 }, { 2, 2, 4 }, { 3, 3, 4 },
- { 1, 0, 0 }, { 2, 1, 8 }, { 2, 2, 8 }, { 3, 3, 8 }, { 2, 4, 8 }, { 3, 5, 8 }, { 3, 6, 8 }, { 4, 7, 8 },
- { 1, 0, 0 }, { 2, 1, 16 }, { 2, 2, 16 }, { 3, 3, 16 }, { 2, 4, 16 }, { 3, 5, 16 }, { 3, 6, 16 }, { 4, 7, 16 },
- { 2, 8, 16 }, { 3, 9, 16 }, { 3, 10, 16 }, { 4, 11, 16 }, { 3, 12, 16 }, { 4, 13, 16 }, { 4, 14, 16 }, { 5, 15, 16 },
- { 1, 0, 0 }, { 2, 1, 32 }, { 2, 2, 32 }, { 3, 3, 32 }, { 2, 4, 32 }, { 3, 5, 32 }, { 3, 6, 32 }, { 4, 7, 32 },
- { 2, 8, 32 }, { 3, 9, 32 }, { 3, 10, 32 }, { 4, 11, 32 }, { 3, 12, 32 }, { 4, 13, 32 }, { 4, 14, 32 }, { 5, 15, 32 },
- { 2, 16, 32 }, { 3, 17, 32 }, { 3, 18, 32 }, { 4, 19, 32 }, { 3, 20, 32 }, { 4, 21, 32 }, { 4, 22, 32 }, { 5, 23, 32 },
- { 3, 24, 32 }, { 4, 25, 32 }, { 4, 26, 32 }, { 5, 27, 32 }, { 4, 28, 32 }, { 5, 29, 32 }, { 5, 30, 32 }, { 6, 31, 32 },
+ { 0, 0, 0 }, { 1, 0, 0 }, { 1, 0, 0 }, { 2, 1, 2 }, { 1, 0, 0 }, { 2, 1, 4 }, { 2, 2, 4 }, { 3, 3, 4 },
+ { 1, 0, 0 }, { 2, 1, 8 }, { 2, 2, 8 }, { 3, 3, 8 }, { 2, 4, 8 }, { 3, 5, 8 }, { 3, 6, 8 }, { 4, 7, 8 },
+ { 1, 0, 0 }, { 2, 1, 16 }, { 2, 2, 16 }, { 3, 3, 16 }, { 2, 4, 16 }, { 3, 5, 16 }, { 3, 6, 16 }, { 4, 7, 16 },
+ { 2, 8, 16 }, { 3, 9, 16 }, { 3, 10, 16 }, { 4, 11, 16 }, { 3, 12, 16 }, { 4, 13, 16 }, { 4, 14, 16 }, { 5, 15, 16 },
+ { 1, 0, 0 }, { 2, 1, 32 }, { 2, 2, 32 }, { 3, 3, 32 }, { 2, 4, 32 }, { 3, 5, 32 }, { 3, 6, 32 }, { 4, 7, 32 },
+ { 2, 8, 32 }, { 3, 9, 32 }, { 3, 10, 32 }, { 4, 11, 32 }, { 3, 12, 32 }, { 4, 13, 32 }, { 4, 14, 32 }, { 5, 15, 32 },
+ { 2, 16, 32 }, { 3, 17, 32 }, { 3, 18, 32 }, { 4, 19, 32 }, { 3, 20, 32 }, { 4, 21, 32 }, { 4, 22, 32 }, { 5, 23, 32 },
+ { 3, 24, 32 }, { 4, 25, 32 }, { 4, 26, 32 }, { 5, 27, 32 }, { 4, 28, 32 }, { 5, 29, 32 }, { 5, 30, 32 }, { 6, 31, 32 },
#if FP_LUT > 6
- { 1, 0, 0 }, { 2, 1, 64 }, { 2, 2, 64 }, { 3, 3, 64 }, { 2, 4, 64 }, { 3, 5, 64 }, { 3, 6, 64 }, { 4, 7, 64 },
- { 2, 8, 64 }, { 3, 9, 64 }, { 3, 10, 64 }, { 4, 11, 64 }, { 3, 12, 64 }, { 4, 13, 64 }, { 4, 14, 64 }, { 5, 15, 64 },
- { 2, 16, 64 }, { 3, 17, 64 }, { 3, 18, 64 }, { 4, 19, 64 }, { 3, 20, 64 }, { 4, 21, 64 }, { 4, 22, 64 }, { 5, 23, 64 },
- { 3, 24, 64 }, { 4, 25, 64 }, { 4, 26, 64 }, { 5, 27, 64 }, { 4, 28, 64 }, { 5, 29, 64 }, { 5, 30, 64 }, { 6, 31, 64 },
- { 2, 32, 64 }, { 3, 33, 64 }, { 3, 34, 64 }, { 4, 35, 64 }, { 3, 36, 64 }, { 4, 37, 64 }, { 4, 38, 64 }, { 5, 39, 64 },
- { 3, 40, 64 }, { 4, 41, 64 }, { 4, 42, 64 }, { 5, 43, 64 }, { 4, 44, 64 }, { 5, 45, 64 }, { 5, 46, 64 }, { 6, 47, 64 },
- { 3, 48, 64 }, { 4, 49, 64 }, { 4, 50, 64 }, { 5, 51, 64 }, { 4, 52, 64 }, { 5, 53, 64 }, { 5, 54, 64 }, { 6, 55, 64 },
- { 4, 56, 64 }, { 5, 57, 64 }, { 5, 58, 64 }, { 6, 59, 64 }, { 5, 60, 64 }, { 6, 61, 64 }, { 6, 62, 64 }, { 7, 63, 64 },
+ { 1, 0, 0 }, { 2, 1, 64 }, { 2, 2, 64 }, { 3, 3, 64 }, { 2, 4, 64 }, { 3, 5, 64 }, { 3, 6, 64 }, { 4, 7, 64 },
+ { 2, 8, 64 }, { 3, 9, 64 }, { 3, 10, 64 }, { 4, 11, 64 }, { 3, 12, 64 }, { 4, 13, 64 }, { 4, 14, 64 }, { 5, 15, 64 },
+ { 2, 16, 64 }, { 3, 17, 64 }, { 3, 18, 64 }, { 4, 19, 64 }, { 3, 20, 64 }, { 4, 21, 64 }, { 4, 22, 64 }, { 5, 23, 64 },
+ { 3, 24, 64 }, { 4, 25, 64 }, { 4, 26, 64 }, { 5, 27, 64 }, { 4, 28, 64 }, { 5, 29, 64 }, { 5, 30, 64 }, { 6, 31, 64 },
+ { 2, 32, 64 }, { 3, 33, 64 }, { 3, 34, 64 }, { 4, 35, 64 }, { 3, 36, 64 }, { 4, 37, 64 }, { 4, 38, 64 }, { 5, 39, 64 },
+ { 3, 40, 64 }, { 4, 41, 64 }, { 4, 42, 64 }, { 5, 43, 64 }, { 4, 44, 64 }, { 5, 45, 64 }, { 5, 46, 64 }, { 6, 47, 64 },
+ { 3, 48, 64 }, { 4, 49, 64 }, { 4, 50, 64 }, { 5, 51, 64 }, { 4, 52, 64 }, { 5, 53, 64 }, { 5, 54, 64 }, { 6, 55, 64 },
+ { 4, 56, 64 }, { 5, 57, 64 }, { 5, 58, 64 }, { 6, 59, 64 }, { 5, 60, 64 }, { 6, 61, 64 }, { 6, 62, 64 }, { 7, 63, 64 },
#if FP_LUT > 7
- { 1, 0, 0 }, { 2, 1, 128 }, { 2, 2, 128 }, { 3, 3, 128 }, { 2, 4, 128 }, { 3, 5, 128 }, { 3, 6, 128 }, { 4, 7, 128 },
- { 2, 8, 128 }, { 3, 9, 128 }, { 3, 10, 128 }, { 4, 11, 128 }, { 3, 12, 128 }, { 4, 13, 128 }, { 4, 14, 128 }, { 5, 15, 128 },
- { 2, 16, 128 }, { 3, 17, 128 }, { 3, 18, 128 }, { 4, 19, 128 }, { 3, 20, 128 }, { 4, 21, 128 }, { 4, 22, 128 }, { 5, 23, 128 },
- { 3, 24, 128 }, { 4, 25, 128 }, { 4, 26, 128 }, { 5, 27, 128 }, { 4, 28, 128 }, { 5, 29, 128 }, { 5, 30, 128 }, { 6, 31, 128 },
- { 2, 32, 128 }, { 3, 33, 128 }, { 3, 34, 128 }, { 4, 35, 128 }, { 3, 36, 128 }, { 4, 37, 128 }, { 4, 38, 128 }, { 5, 39, 128 },
- { 3, 40, 128 }, { 4, 41, 128 }, { 4, 42, 128 }, { 5, 43, 128 }, { 4, 44, 128 }, { 5, 45, 128 }, { 5, 46, 128 }, { 6, 47, 128 },
- { 3, 48, 128 }, { 4, 49, 128 }, { 4, 50, 128 }, { 5, 51, 128 }, { 4, 52, 128 }, { 5, 53, 128 }, { 5, 54, 128 }, { 6, 55, 128 },
- { 4, 56, 128 }, { 5, 57, 128 }, { 5, 58, 128 }, { 6, 59, 128 }, { 5, 60, 128 }, { 6, 61, 128 }, { 6, 62, 128 }, { 7, 63, 128 },
- { 2, 64, 128 }, { 3, 65, 128 }, { 3, 66, 128 }, { 4, 67, 128 }, { 3, 68, 128 }, { 4, 69, 128 }, { 4, 70, 128 }, { 5, 71, 128 },
- { 3, 72, 128 }, { 4, 73, 128 }, { 4, 74, 128 }, { 5, 75, 128 }, { 4, 76, 128 }, { 5, 77, 128 }, { 5, 78, 128 }, { 6, 79, 128 },
- { 3, 80, 128 }, { 4, 81, 128 }, { 4, 82, 128 }, { 5, 83, 128 }, { 4, 84, 128 }, { 5, 85, 128 }, { 5, 86, 128 }, { 6, 87, 128 },
- { 4, 88, 128 }, { 5, 89, 128 }, { 5, 90, 128 }, { 6, 91, 128 }, { 5, 92, 128 }, { 6, 93, 128 }, { 6, 94, 128 }, { 7, 95, 128 },
- { 3, 96, 128 }, { 4, 97, 128 }, { 4, 98, 128 }, { 5, 99, 128 }, { 4, 100, 128 }, { 5, 101, 128 }, { 5, 102, 128 }, { 6, 103, 128 },
- { 4, 104, 128 }, { 5, 105, 128 }, { 5, 106, 128 }, { 6, 107, 128 }, { 5, 108, 128 }, { 6, 109, 128 }, { 6, 110, 128 }, { 7, 111, 128 },
- { 4, 112, 128 }, { 5, 113, 128 }, { 5, 114, 128 }, { 6, 115, 128 }, { 5, 116, 128 }, { 6, 117, 128 }, { 6, 118, 128 }, { 7, 119, 128 },
- { 5, 120, 128 }, { 6, 121, 128 }, { 6, 122, 128 }, { 7, 123, 128 }, { 6, 124, 128 }, { 7, 125, 128 }, { 7, 126, 128 }, { 8, 127, 128 },
+ { 1, 0, 0 }, { 2, 1, 128 }, { 2, 2, 128 }, { 3, 3, 128 }, { 2, 4, 128 }, { 3, 5, 128 }, { 3, 6, 128 }, { 4, 7, 128 },
+ { 2, 8, 128 }, { 3, 9, 128 }, { 3, 10, 128 }, { 4, 11, 128 }, { 3, 12, 128 }, { 4, 13, 128 }, { 4, 14, 128 }, { 5, 15, 128 },
+ { 2, 16, 128 }, { 3, 17, 128 }, { 3, 18, 128 }, { 4, 19, 128 }, { 3, 20, 128 }, { 4, 21, 128 }, { 4, 22, 128 }, { 5, 23, 128 },
+ { 3, 24, 128 }, { 4, 25, 128 }, { 4, 26, 128 }, { 5, 27, 128 }, { 4, 28, 128 }, { 5, 29, 128 }, { 5, 30, 128 }, { 6, 31, 128 },
+ { 2, 32, 128 }, { 3, 33, 128 }, { 3, 34, 128 }, { 4, 35, 128 }, { 3, 36, 128 }, { 4, 37, 128 }, { 4, 38, 128 }, { 5, 39, 128 },
+ { 3, 40, 128 }, { 4, 41, 128 }, { 4, 42, 128 }, { 5, 43, 128 }, { 4, 44, 128 }, { 5, 45, 128 }, { 5, 46, 128 }, { 6, 47, 128 },
+ { 3, 48, 128 }, { 4, 49, 128 }, { 4, 50, 128 }, { 5, 51, 128 }, { 4, 52, 128 }, { 5, 53, 128 }, { 5, 54, 128 }, { 6, 55, 128 },
+ { 4, 56, 128 }, { 5, 57, 128 }, { 5, 58, 128 }, { 6, 59, 128 }, { 5, 60, 128 }, { 6, 61, 128 }, { 6, 62, 128 }, { 7, 63, 128 },
+ { 2, 64, 128 }, { 3, 65, 128 }, { 3, 66, 128 }, { 4, 67, 128 }, { 3, 68, 128 }, { 4, 69, 128 }, { 4, 70, 128 }, { 5, 71, 128 },
+ { 3, 72, 128 }, { 4, 73, 128 }, { 4, 74, 128 }, { 5, 75, 128 }, { 4, 76, 128 }, { 5, 77, 128 }, { 5, 78, 128 }, { 6, 79, 128 },
+ { 3, 80, 128 }, { 4, 81, 128 }, { 4, 82, 128 }, { 5, 83, 128 }, { 4, 84, 128 }, { 5, 85, 128 }, { 5, 86, 128 }, { 6, 87, 128 },
+ { 4, 88, 128 }, { 5, 89, 128 }, { 5, 90, 128 }, { 6, 91, 128 }, { 5, 92, 128 }, { 6, 93, 128 }, { 6, 94, 128 }, { 7, 95, 128 },
+ { 3, 96, 128 }, { 4, 97, 128 }, { 4, 98, 128 }, { 5, 99, 128 }, { 4, 100, 128 }, { 5, 101, 128 }, { 5, 102, 128 }, { 6, 103, 128 },
+ { 4, 104, 128 }, { 5, 105, 128 }, { 5, 106, 128 }, { 6, 107, 128 }, { 5, 108, 128 }, { 6, 109, 128 }, { 6, 110, 128 }, { 7, 111, 128 },
+ { 4, 112, 128 }, { 5, 113, 128 }, { 5, 114, 128 }, { 6, 115, 128 }, { 5, 116, 128 }, { 6, 117, 128 }, { 6, 118, 128 }, { 7, 119, 128 },
+ { 5, 120, 128 }, { 6, 121, 128 }, { 6, 122, 128 }, { 7, 123, 128 }, { 6, 124, 128 }, { 7, 125, 128 }, { 7, 126, 128 }, { 8, 127, 128 },
#if FP_LUT > 8
- { 1, 0, 0 }, { 2, 1, 256 }, { 2, 2, 256 }, { 3, 3, 256 }, { 2, 4, 256 }, { 3, 5, 256 }, { 3, 6, 256 }, { 4, 7, 256 },
- { 2, 8, 256 }, { 3, 9, 256 }, { 3, 10, 256 }, { 4, 11, 256 }, { 3, 12, 256 }, { 4, 13, 256 }, { 4, 14, 256 }, { 5, 15, 256 },
- { 2, 16, 256 }, { 3, 17, 256 }, { 3, 18, 256 }, { 4, 19, 256 }, { 3, 20, 256 }, { 4, 21, 256 }, { 4, 22, 256 }, { 5, 23, 256 },
- { 3, 24, 256 }, { 4, 25, 256 }, { 4, 26, 256 }, { 5, 27, 256 }, { 4, 28, 256 }, { 5, 29, 256 }, { 5, 30, 256 }, { 6, 31, 256 },
- { 2, 32, 256 }, { 3, 33, 256 }, { 3, 34, 256 }, { 4, 35, 256 }, { 3, 36, 256 }, { 4, 37, 256 }, { 4, 38, 256 }, { 5, 39, 256 },
- { 3, 40, 256 }, { 4, 41, 256 }, { 4, 42, 256 }, { 5, 43, 256 }, { 4, 44, 256 }, { 5, 45, 256 }, { 5, 46, 256 }, { 6, 47, 256 },
- { 3, 48, 256 }, { 4, 49, 256 }, { 4, 50, 256 }, { 5, 51, 256 }, { 4, 52, 256 }, { 5, 53, 256 }, { 5, 54, 256 }, { 6, 55, 256 },
- { 4, 56, 256 }, { 5, 57, 256 }, { 5, 58, 256 }, { 6, 59, 256 }, { 5, 60, 256 }, { 6, 61, 256 }, { 6, 62, 256 }, { 7, 63, 256 },
- { 2, 64, 256 }, { 3, 65, 256 }, { 3, 66, 256 }, { 4, 67, 256 }, { 3, 68, 256 }, { 4, 69, 256 }, { 4, 70, 256 }, { 5, 71, 256 },
- { 3, 72, 256 }, { 4, 73, 256 }, { 4, 74, 256 }, { 5, 75, 256 }, { 4, 76, 256 }, { 5, 77, 256 }, { 5, 78, 256 }, { 6, 79, 256 },
- { 3, 80, 256 }, { 4, 81, 256 }, { 4, 82, 256 }, { 5, 83, 256 }, { 4, 84, 256 }, { 5, 85, 256 }, { 5, 86, 256 }, { 6, 87, 256 },
- { 4, 88, 256 }, { 5, 89, 256 }, { 5, 90, 256 }, { 6, 91, 256 }, { 5, 92, 256 }, { 6, 93, 256 }, { 6, 94, 256 }, { 7, 95, 256 },
- { 3, 96, 256 }, { 4, 97, 256 }, { 4, 98, 256 }, { 5, 99, 256 }, { 4, 100, 256 }, { 5, 101, 256 }, { 5, 102, 256 }, { 6, 103, 256 },
- { 4, 104, 256 }, { 5, 105, 256 }, { 5, 106, 256 }, { 6, 107, 256 }, { 5, 108, 256 }, { 6, 109, 256 }, { 6, 110, 256 }, { 7, 111, 256 },
- { 4, 112, 256 }, { 5, 113, 256 }, { 5, 114, 256 }, { 6, 115, 256 }, { 5, 116, 256 }, { 6, 117, 256 }, { 6, 118, 256 }, { 7, 119, 256 },
- { 5, 120, 256 }, { 6, 121, 256 }, { 6, 122, 256 }, { 7, 123, 256 }, { 6, 124, 256 }, { 7, 125, 256 }, { 7, 126, 256 }, { 8, 127, 256 },
- { 2, 128, 256 }, { 3, 129, 256 }, { 3, 130, 256 }, { 4, 131, 256 }, { 3, 132, 256 }, { 4, 133, 256 }, { 4, 134, 256 }, { 5, 135, 256 },
- { 3, 136, 256 }, { 4, 137, 256 }, { 4, 138, 256 }, { 5, 139, 256 }, { 4, 140, 256 }, { 5, 141, 256 }, { 5, 142, 256 }, { 6, 143, 256 },
- { 3, 144, 256 }, { 4, 145, 256 }, { 4, 146, 256 }, { 5, 147, 256 }, { 4, 148, 256 }, { 5, 149, 256 }, { 5, 150, 256 }, { 6, 151, 256 },
- { 4, 152, 256 }, { 5, 153, 256 }, { 5, 154, 256 }, { 6, 155, 256 }, { 5, 156, 256 }, { 6, 157, 256 }, { 6, 158, 256 }, { 7, 159, 256 },
- { 3, 160, 256 }, { 4, 161, 256 }, { 4, 162, 256 }, { 5, 163, 256 }, { 4, 164, 256 }, { 5, 165, 256 }, { 5, 166, 256 }, { 6, 167, 256 },
- { 4, 168, 256 }, { 5, 169, 256 }, { 5, 170, 256 }, { 6, 171, 256 }, { 5, 172, 256 }, { 6, 173, 256 }, { 6, 174, 256 }, { 7, 175, 256 },
- { 4, 176, 256 }, { 5, 177, 256 }, { 5, 178, 256 }, { 6, 179, 256 }, { 5, 180, 256 }, { 6, 181, 256 }, { 6, 182, 256 }, { 7, 183, 256 },
- { 5, 184, 256 }, { 6, 185, 256 }, { 6, 186, 256 }, { 7, 187, 256 }, { 6, 188, 256 }, { 7, 189, 256 }, { 7, 190, 256 }, { 8, 191, 256 },
- { 3, 192, 256 }, { 4, 193, 256 }, { 4, 194, 256 }, { 5, 195, 256 }, { 4, 196, 256 }, { 5, 197, 256 }, { 5, 198, 256 }, { 6, 199, 256 },
- { 4, 200, 256 }, { 5, 201, 256 }, { 5, 202, 256 }, { 6, 203, 256 }, { 5, 204, 256 }, { 6, 205, 256 }, { 6, 206, 256 }, { 7, 207, 256 },
- { 4, 208, 256 }, { 5, 209, 256 }, { 5, 210, 256 }, { 6, 211, 256 }, { 5, 212, 256 }, { 6, 213, 256 }, { 6, 214, 256 }, { 7, 215, 256 },
- { 5, 216, 256 }, { 6, 217, 256 }, { 6, 218, 256 }, { 7, 219, 256 }, { 6, 220, 256 }, { 7, 221, 256 }, { 7, 222, 256 }, { 8, 223, 256 },
- { 4, 224, 256 }, { 5, 225, 256 }, { 5, 226, 256 }, { 6, 227, 256 }, { 5, 228, 256 }, { 6, 229, 256 }, { 6, 230, 256 }, { 7, 231, 256 },
- { 5, 232, 256 }, { 6, 233, 256 }, { 6, 234, 256 }, { 7, 235, 256 }, { 6, 236, 256 }, { 7, 237, 256 }, { 7, 238, 256 }, { 8, 239, 256 },
- { 5, 240, 256 }, { 6, 241, 256 }, { 6, 242, 256 }, { 7, 243, 256 }, { 6, 244, 256 }, { 7, 245, 256 }, { 7, 246, 256 }, { 8, 247, 256 },
- { 6, 248, 256 }, { 7, 249, 256 }, { 7, 250, 256 }, { 8, 251, 256 }, { 7, 252, 256 }, { 8, 253, 256 }, { 8, 254, 256 }, { 9, 255, 256 },
+ { 1, 0, 0 }, { 2, 1, 256 }, { 2, 2, 256 }, { 3, 3, 256 }, { 2, 4, 256 }, { 3, 5, 256 }, { 3, 6, 256 }, { 4, 7, 256 },
+ { 2, 8, 256 }, { 3, 9, 256 }, { 3, 10, 256 }, { 4, 11, 256 }, { 3, 12, 256 }, { 4, 13, 256 }, { 4, 14, 256 }, { 5, 15, 256 },
+ { 2, 16, 256 }, { 3, 17, 256 }, { 3, 18, 256 }, { 4, 19, 256 }, { 3, 20, 256 }, { 4, 21, 256 }, { 4, 22, 256 }, { 5, 23, 256 },
+ { 3, 24, 256 }, { 4, 25, 256 }, { 4, 26, 256 }, { 5, 27, 256 }, { 4, 28, 256 }, { 5, 29, 256 }, { 5, 30, 256 }, { 6, 31, 256 },
+ { 2, 32, 256 }, { 3, 33, 256 }, { 3, 34, 256 }, { 4, 35, 256 }, { 3, 36, 256 }, { 4, 37, 256 }, { 4, 38, 256 }, { 5, 39, 256 },
+ { 3, 40, 256 }, { 4, 41, 256 }, { 4, 42, 256 }, { 5, 43, 256 }, { 4, 44, 256 }, { 5, 45, 256 }, { 5, 46, 256 }, { 6, 47, 256 },
+ { 3, 48, 256 }, { 4, 49, 256 }, { 4, 50, 256 }, { 5, 51, 256 }, { 4, 52, 256 }, { 5, 53, 256 }, { 5, 54, 256 }, { 6, 55, 256 },
+ { 4, 56, 256 }, { 5, 57, 256 }, { 5, 58, 256 }, { 6, 59, 256 }, { 5, 60, 256 }, { 6, 61, 256 }, { 6, 62, 256 }, { 7, 63, 256 },
+ { 2, 64, 256 }, { 3, 65, 256 }, { 3, 66, 256 }, { 4, 67, 256 }, { 3, 68, 256 }, { 4, 69, 256 }, { 4, 70, 256 }, { 5, 71, 256 },
+ { 3, 72, 256 }, { 4, 73, 256 }, { 4, 74, 256 }, { 5, 75, 256 }, { 4, 76, 256 }, { 5, 77, 256 }, { 5, 78, 256 }, { 6, 79, 256 },
+ { 3, 80, 256 }, { 4, 81, 256 }, { 4, 82, 256 }, { 5, 83, 256 }, { 4, 84, 256 }, { 5, 85, 256 }, { 5, 86, 256 }, { 6, 87, 256 },
+ { 4, 88, 256 }, { 5, 89, 256 }, { 5, 90, 256 }, { 6, 91, 256 }, { 5, 92, 256 }, { 6, 93, 256 }, { 6, 94, 256 }, { 7, 95, 256 },
+ { 3, 96, 256 }, { 4, 97, 256 }, { 4, 98, 256 }, { 5, 99, 256 }, { 4, 100, 256 }, { 5, 101, 256 }, { 5, 102, 256 }, { 6, 103, 256 },
+ { 4, 104, 256 }, { 5, 105, 256 }, { 5, 106, 256 }, { 6, 107, 256 }, { 5, 108, 256 }, { 6, 109, 256 }, { 6, 110, 256 }, { 7, 111, 256 },
+ { 4, 112, 256 }, { 5, 113, 256 }, { 5, 114, 256 }, { 6, 115, 256 }, { 5, 116, 256 }, { 6, 117, 256 }, { 6, 118, 256 }, { 7, 119, 256 },
+ { 5, 120, 256 }, { 6, 121, 256 }, { 6, 122, 256 }, { 7, 123, 256 }, { 6, 124, 256 }, { 7, 125, 256 }, { 7, 126, 256 }, { 8, 127, 256 },
+ { 2, 128, 256 }, { 3, 129, 256 }, { 3, 130, 256 }, { 4, 131, 256 }, { 3, 132, 256 }, { 4, 133, 256 }, { 4, 134, 256 }, { 5, 135, 256 },
+ { 3, 136, 256 }, { 4, 137, 256 }, { 4, 138, 256 }, { 5, 139, 256 }, { 4, 140, 256 }, { 5, 141, 256 }, { 5, 142, 256 }, { 6, 143, 256 },
+ { 3, 144, 256 }, { 4, 145, 256 }, { 4, 146, 256 }, { 5, 147, 256 }, { 4, 148, 256 }, { 5, 149, 256 }, { 5, 150, 256 }, { 6, 151, 256 },
+ { 4, 152, 256 }, { 5, 153, 256 }, { 5, 154, 256 }, { 6, 155, 256 }, { 5, 156, 256 }, { 6, 157, 256 }, { 6, 158, 256 }, { 7, 159, 256 },
+ { 3, 160, 256 }, { 4, 161, 256 }, { 4, 162, 256 }, { 5, 163, 256 }, { 4, 164, 256 }, { 5, 165, 256 }, { 5, 166, 256 }, { 6, 167, 256 },
+ { 4, 168, 256 }, { 5, 169, 256 }, { 5, 170, 256 }, { 6, 171, 256 }, { 5, 172, 256 }, { 6, 173, 256 }, { 6, 174, 256 }, { 7, 175, 256 },
+ { 4, 176, 256 }, { 5, 177, 256 }, { 5, 178, 256 }, { 6, 179, 256 }, { 5, 180, 256 }, { 6, 181, 256 }, { 6, 182, 256 }, { 7, 183, 256 },
+ { 5, 184, 256 }, { 6, 185, 256 }, { 6, 186, 256 }, { 7, 187, 256 }, { 6, 188, 256 }, { 7, 189, 256 }, { 7, 190, 256 }, { 8, 191, 256 },
+ { 3, 192, 256 }, { 4, 193, 256 }, { 4, 194, 256 }, { 5, 195, 256 }, { 4, 196, 256 }, { 5, 197, 256 }, { 5, 198, 256 }, { 6, 199, 256 },
+ { 4, 200, 256 }, { 5, 201, 256 }, { 5, 202, 256 }, { 6, 203, 256 }, { 5, 204, 256 }, { 6, 205, 256 }, { 6, 206, 256 }, { 7, 207, 256 },
+ { 4, 208, 256 }, { 5, 209, 256 }, { 5, 210, 256 }, { 6, 211, 256 }, { 5, 212, 256 }, { 6, 213, 256 }, { 6, 214, 256 }, { 7, 215, 256 },
+ { 5, 216, 256 }, { 6, 217, 256 }, { 6, 218, 256 }, { 7, 219, 256 }, { 6, 220, 256 }, { 7, 221, 256 }, { 7, 222, 256 }, { 8, 223, 256 },
+ { 4, 224, 256 }, { 5, 225, 256 }, { 5, 226, 256 }, { 6, 227, 256 }, { 5, 228, 256 }, { 6, 229, 256 }, { 6, 230, 256 }, { 7, 231, 256 },
+ { 5, 232, 256 }, { 6, 233, 256 }, { 6, 234, 256 }, { 7, 235, 256 }, { 6, 236, 256 }, { 7, 237, 256 }, { 7, 238, 256 }, { 8, 239, 256 },
+ { 5, 240, 256 }, { 6, 241, 256 }, { 6, 242, 256 }, { 7, 243, 256 }, { 6, 244, 256 }, { 7, 245, 256 }, { 7, 246, 256 }, { 8, 247, 256 },
+ { 6, 248, 256 }, { 7, 249, 256 }, { 7, 250, 256 }, { 8, 251, 256 }, { 7, 252, 256 }, { 8, 253, 256 }, { 8, 254, 256 }, { 9, 255, 256 },
#if FP_LUT > 9
- { 1, 0, 0 }, { 2, 1, 512 }, { 2, 2, 512 }, { 3, 3, 512 }, { 2, 4, 512 }, { 3, 5, 512 }, { 3, 6, 512 }, { 4, 7, 512 },
- { 2, 8, 512 }, { 3, 9, 512 }, { 3, 10, 512 }, { 4, 11, 512 }, { 3, 12, 512 }, { 4, 13, 512 }, { 4, 14, 512 }, { 5, 15, 512 },
- { 2, 16, 512 }, { 3, 17, 512 }, { 3, 18, 512 }, { 4, 19, 512 }, { 3, 20, 512 }, { 4, 21, 512 }, { 4, 22, 512 }, { 5, 23, 512 },
- { 3, 24, 512 }, { 4, 25, 512 }, { 4, 26, 512 }, { 5, 27, 512 }, { 4, 28, 512 }, { 5, 29, 512 }, { 5, 30, 512 }, { 6, 31, 512 },
- { 2, 32, 512 }, { 3, 33, 512 }, { 3, 34, 512 }, { 4, 35, 512 }, { 3, 36, 512 }, { 4, 37, 512 }, { 4, 38, 512 }, { 5, 39, 512 },
- { 3, 40, 512 }, { 4, 41, 512 }, { 4, 42, 512 }, { 5, 43, 512 }, { 4, 44, 512 }, { 5, 45, 512 }, { 5, 46, 512 }, { 6, 47, 512 },
- { 3, 48, 512 }, { 4, 49, 512 }, { 4, 50, 512 }, { 5, 51, 512 }, { 4, 52, 512 }, { 5, 53, 512 }, { 5, 54, 512 }, { 6, 55, 512 },
- { 4, 56, 512 }, { 5, 57, 512 }, { 5, 58, 512 }, { 6, 59, 512 }, { 5, 60, 512 }, { 6, 61, 512 }, { 6, 62, 512 }, { 7, 63, 512 },
- { 2, 64, 512 }, { 3, 65, 512 }, { 3, 66, 512 }, { 4, 67, 512 }, { 3, 68, 512 }, { 4, 69, 512 }, { 4, 70, 512 }, { 5, 71, 512 },
- { 3, 72, 512 }, { 4, 73, 512 }, { 4, 74, 512 }, { 5, 75, 512 }, { 4, 76, 512 }, { 5, 77, 512 }, { 5, 78, 512 }, { 6, 79, 512 },
- { 3, 80, 512 }, { 4, 81, 512 }, { 4, 82, 512 }, { 5, 83, 512 }, { 4, 84, 512 }, { 5, 85, 512 }, { 5, 86, 512 }, { 6, 87, 512 },
- { 4, 88, 512 }, { 5, 89, 512 }, { 5, 90, 512 }, { 6, 91, 512 }, { 5, 92, 512 }, { 6, 93, 512 }, { 6, 94, 512 }, { 7, 95, 512 },
- { 3, 96, 512 }, { 4, 97, 512 }, { 4, 98, 512 }, { 5, 99, 512 }, { 4, 100, 512 }, { 5, 101, 512 }, { 5, 102, 512 }, { 6, 103, 512 },
- { 4, 104, 512 }, { 5, 105, 512 }, { 5, 106, 512 }, { 6, 107, 512 }, { 5, 108, 512 }, { 6, 109, 512 }, { 6, 110, 512 }, { 7, 111, 512 },
- { 4, 112, 512 }, { 5, 113, 512 }, { 5, 114, 512 }, { 6, 115, 512 }, { 5, 116, 512 }, { 6, 117, 512 }, { 6, 118, 512 }, { 7, 119, 512 },
- { 5, 120, 512 }, { 6, 121, 512 }, { 6, 122, 512 }, { 7, 123, 512 }, { 6, 124, 512 }, { 7, 125, 512 }, { 7, 126, 512 }, { 8, 127, 512 },
- { 2, 128, 512 }, { 3, 129, 512 }, { 3, 130, 512 }, { 4, 131, 512 }, { 3, 132, 512 }, { 4, 133, 512 }, { 4, 134, 512 }, { 5, 135, 512 },
- { 3, 136, 512 }, { 4, 137, 512 }, { 4, 138, 512 }, { 5, 139, 512 }, { 4, 140, 512 }, { 5, 141, 512 }, { 5, 142, 512 }, { 6, 143, 512 },
- { 3, 144, 512 }, { 4, 145, 512 }, { 4, 146, 512 }, { 5, 147, 512 }, { 4, 148, 512 }, { 5, 149, 512 }, { 5, 150, 512 }, { 6, 151, 512 },
- { 4, 152, 512 }, { 5, 153, 512 }, { 5, 154, 512 }, { 6, 155, 512 }, { 5, 156, 512 }, { 6, 157, 512 }, { 6, 158, 512 }, { 7, 159, 512 },
- { 3, 160, 512 }, { 4, 161, 512 }, { 4, 162, 512 }, { 5, 163, 512 }, { 4, 164, 512 }, { 5, 165, 512 }, { 5, 166, 512 }, { 6, 167, 512 },
- { 4, 168, 512 }, { 5, 169, 512 }, { 5, 170, 512 }, { 6, 171, 512 }, { 5, 172, 512 }, { 6, 173, 512 }, { 6, 174, 512 }, { 7, 175, 512 },
- { 4, 176, 512 }, { 5, 177, 512 }, { 5, 178, 512 }, { 6, 179, 512 }, { 5, 180, 512 }, { 6, 181, 512 }, { 6, 182, 512 }, { 7, 183, 512 },
- { 5, 184, 512 }, { 6, 185, 512 }, { 6, 186, 512 }, { 7, 187, 512 }, { 6, 188, 512 }, { 7, 189, 512 }, { 7, 190, 512 }, { 8, 191, 512 },
- { 3, 192, 512 }, { 4, 193, 512 }, { 4, 194, 512 }, { 5, 195, 512 }, { 4, 196, 512 }, { 5, 197, 512 }, { 5, 198, 512 }, { 6, 199, 512 },
- { 4, 200, 512 }, { 5, 201, 512 }, { 5, 202, 512 }, { 6, 203, 512 }, { 5, 204, 512 }, { 6, 205, 512 }, { 6, 206, 512 }, { 7, 207, 512 },
- { 4, 208, 512 }, { 5, 209, 512 }, { 5, 210, 512 }, { 6, 211, 512 }, { 5, 212, 512 }, { 6, 213, 512 }, { 6, 214, 512 }, { 7, 215, 512 },
- { 5, 216, 512 }, { 6, 217, 512 }, { 6, 218, 512 }, { 7, 219, 512 }, { 6, 220, 512 }, { 7, 221, 512 }, { 7, 222, 512 }, { 8, 223, 512 },
- { 4, 224, 512 }, { 5, 225, 512 }, { 5, 226, 512 }, { 6, 227, 512 }, { 5, 228, 512 }, { 6, 229, 512 }, { 6, 230, 512 }, { 7, 231, 512 },
- { 5, 232, 512 }, { 6, 233, 512 }, { 6, 234, 512 }, { 7, 235, 512 }, { 6, 236, 512 }, { 7, 237, 512 }, { 7, 238, 512 }, { 8, 239, 512 },
- { 5, 240, 512 }, { 6, 241, 512 }, { 6, 242, 512 }, { 7, 243, 512 }, { 6, 244, 512 }, { 7, 245, 512 }, { 7, 246, 512 }, { 8, 247, 512 },
- { 6, 248, 512 }, { 7, 249, 512 }, { 7, 250, 512 }, { 8, 251, 512 }, { 7, 252, 512 }, { 8, 253, 512 }, { 8, 254, 512 }, { 9, 255, 512 },
- { 2, 256, 512 }, { 3, 257, 512 }, { 3, 258, 512 }, { 4, 259, 512 }, { 3, 260, 512 }, { 4, 261, 512 }, { 4, 262, 512 }, { 5, 263, 512 },
- { 3, 264, 512 }, { 4, 265, 512 }, { 4, 266, 512 }, { 5, 267, 512 }, { 4, 268, 512 }, { 5, 269, 512 }, { 5, 270, 512 }, { 6, 271, 512 },
- { 3, 272, 512 }, { 4, 273, 512 }, { 4, 274, 512 }, { 5, 275, 512 }, { 4, 276, 512 }, { 5, 277, 512 }, { 5, 278, 512 }, { 6, 279, 512 },
- { 4, 280, 512 }, { 5, 281, 512 }, { 5, 282, 512 }, { 6, 283, 512 }, { 5, 284, 512 }, { 6, 285, 512 }, { 6, 286, 512 }, { 7, 287, 512 },
- { 3, 288, 512 }, { 4, 289, 512 }, { 4, 290, 512 }, { 5, 291, 512 }, { 4, 292, 512 }, { 5, 293, 512 }, { 5, 294, 512 }, { 6, 295, 512 },
- { 4, 296, 512 }, { 5, 297, 512 }, { 5, 298, 512 }, { 6, 299, 512 }, { 5, 300, 512 }, { 6, 301, 512 }, { 6, 302, 512 }, { 7, 303, 512 },
- { 4, 304, 512 }, { 5, 305, 512 }, { 5, 306, 512 }, { 6, 307, 512 }, { 5, 308, 512 }, { 6, 309, 512 }, { 6, 310, 512 }, { 7, 311, 512 },
- { 5, 312, 512 }, { 6, 313, 512 }, { 6, 314, 512 }, { 7, 315, 512 }, { 6, 316, 512 }, { 7, 317, 512 }, { 7, 318, 512 }, { 8, 319, 512 },
- { 3, 320, 512 }, { 4, 321, 512 }, { 4, 322, 512 }, { 5, 323, 512 }, { 4, 324, 512 }, { 5, 325, 512 }, { 5, 326, 512 }, { 6, 327, 512 },
- { 4, 328, 512 }, { 5, 329, 512 }, { 5, 330, 512 }, { 6, 331, 512 }, { 5, 332, 512 }, { 6, 333, 512 }, { 6, 334, 512 }, { 7, 335, 512 },
- { 4, 336, 512 }, { 5, 337, 512 }, { 5, 338, 512 }, { 6, 339, 512 }, { 5, 340, 512 }, { 6, 341, 512 }, { 6, 342, 512 }, { 7, 343, 512 },
- { 5, 344, 512 }, { 6, 345, 512 }, { 6, 346, 512 }, { 7, 347, 512 }, { 6, 348, 512 }, { 7, 349, 512 }, { 7, 350, 512 }, { 8, 351, 512 },
- { 4, 352, 512 }, { 5, 353, 512 }, { 5, 354, 512 }, { 6, 355, 512 }, { 5, 356, 512 }, { 6, 357, 512 }, { 6, 358, 512 }, { 7, 359, 512 },
- { 5, 360, 512 }, { 6, 361, 512 }, { 6, 362, 512 }, { 7, 363, 512 }, { 6, 364, 512 }, { 7, 365, 512 }, { 7, 366, 512 }, { 8, 367, 512 },
- { 5, 368, 512 }, { 6, 369, 512 }, { 6, 370, 512 }, { 7, 371, 512 }, { 6, 372, 512 }, { 7, 373, 512 }, { 7, 374, 512 }, { 8, 375, 512 },
- { 6, 376, 512 }, { 7, 377, 512 }, { 7, 378, 512 }, { 8, 379, 512 }, { 7, 380, 512 }, { 8, 381, 512 }, { 8, 382, 512 }, { 9, 383, 512 },
- { 3, 384, 512 }, { 4, 385, 512 }, { 4, 386, 512 }, { 5, 387, 512 }, { 4, 388, 512 }, { 5, 389, 512 }, { 5, 390, 512 }, { 6, 391, 512 },
- { 4, 392, 512 }, { 5, 393, 512 }, { 5, 394, 512 }, { 6, 395, 512 }, { 5, 396, 512 }, { 6, 397, 512 }, { 6, 398, 512 }, { 7, 399, 512 },
- { 4, 400, 512 }, { 5, 401, 512 }, { 5, 402, 512 }, { 6, 403, 512 }, { 5, 404, 512 }, { 6, 405, 512 }, { 6, 406, 512 }, { 7, 407, 512 },
- { 5, 408, 512 }, { 6, 409, 512 }, { 6, 410, 512 }, { 7, 411, 512 }, { 6, 412, 512 }, { 7, 413, 512 }, { 7, 414, 512 }, { 8, 415, 512 },
- { 4, 416, 512 }, { 5, 417, 512 }, { 5, 418, 512 }, { 6, 419, 512 }, { 5, 420, 512 }, { 6, 421, 512 }, { 6, 422, 512 }, { 7, 423, 512 },
- { 5, 424, 512 }, { 6, 425, 512 }, { 6, 426, 512 }, { 7, 427, 512 }, { 6, 428, 512 }, { 7, 429, 512 }, { 7, 430, 512 }, { 8, 431, 512 },
- { 5, 432, 512 }, { 6, 433, 512 }, { 6, 434, 512 }, { 7, 435, 512 }, { 6, 436, 512 }, { 7, 437, 512 }, { 7, 438, 512 }, { 8, 439, 512 },
- { 6, 440, 512 }, { 7, 441, 512 }, { 7, 442, 512 }, { 8, 443, 512 }, { 7, 444, 512 }, { 8, 445, 512 }, { 8, 446, 512 }, { 9, 447, 512 },
- { 4, 448, 512 }, { 5, 449, 512 }, { 5, 450, 512 }, { 6, 451, 512 }, { 5, 452, 512 }, { 6, 453, 512 }, { 6, 454, 512 }, { 7, 455, 512 },
- { 5, 456, 512 }, { 6, 457, 512 }, { 6, 458, 512 }, { 7, 459, 512 }, { 6, 460, 512 }, { 7, 461, 512 }, { 7, 462, 512 }, { 8, 463, 512 },
- { 5, 464, 512 }, { 6, 465, 512 }, { 6, 466, 512 }, { 7, 467, 512 }, { 6, 468, 512 }, { 7, 469, 512 }, { 7, 470, 512 }, { 8, 471, 512 },
- { 6, 472, 512 }, { 7, 473, 512 }, { 7, 474, 512 }, { 8, 475, 512 }, { 7, 476, 512 }, { 8, 477, 512 }, { 8, 478, 512 }, { 9, 479, 512 },
- { 5, 480, 512 }, { 6, 481, 512 }, { 6, 482, 512 }, { 7, 483, 512 }, { 6, 484, 512 }, { 7, 485, 512 }, { 7, 486, 512 }, { 8, 487, 512 },
- { 6, 488, 512 }, { 7, 489, 512 }, { 7, 490, 512 }, { 8, 491, 512 }, { 7, 492, 512 }, { 8, 493, 512 }, { 8, 494, 512 }, { 9, 495, 512 },
- { 6, 496, 512 }, { 7, 497, 512 }, { 7, 498, 512 }, { 8, 499, 512 }, { 7, 500, 512 }, { 8, 501, 512 }, { 8, 502, 512 }, { 9, 503, 512 },
- { 7, 504, 512 }, { 8, 505, 512 }, { 8, 506, 512 }, { 9, 507, 512 }, { 8, 508, 512 }, { 9, 509, 512 }, { 9, 510, 512 }, { 10, 511, 512 },
+ { 1, 0, 0 }, { 2, 1, 512 }, { 2, 2, 512 }, { 3, 3, 512 }, { 2, 4, 512 }, { 3, 5, 512 }, { 3, 6, 512 }, { 4, 7, 512 },
+ { 2, 8, 512 }, { 3, 9, 512 }, { 3, 10, 512 }, { 4, 11, 512 }, { 3, 12, 512 }, { 4, 13, 512 }, { 4, 14, 512 }, { 5, 15, 512 },
+ { 2, 16, 512 }, { 3, 17, 512 }, { 3, 18, 512 }, { 4, 19, 512 }, { 3, 20, 512 }, { 4, 21, 512 }, { 4, 22, 512 }, { 5, 23, 512 },
+ { 3, 24, 512 }, { 4, 25, 512 }, { 4, 26, 512 }, { 5, 27, 512 }, { 4, 28, 512 }, { 5, 29, 512 }, { 5, 30, 512 }, { 6, 31, 512 },
+ { 2, 32, 512 }, { 3, 33, 512 }, { 3, 34, 512 }, { 4, 35, 512 }, { 3, 36, 512 }, { 4, 37, 512 }, { 4, 38, 512 }, { 5, 39, 512 },
+ { 3, 40, 512 }, { 4, 41, 512 }, { 4, 42, 512 }, { 5, 43, 512 }, { 4, 44, 512 }, { 5, 45, 512 }, { 5, 46, 512 }, { 6, 47, 512 },
+ { 3, 48, 512 }, { 4, 49, 512 }, { 4, 50, 512 }, { 5, 51, 512 }, { 4, 52, 512 }, { 5, 53, 512 }, { 5, 54, 512 }, { 6, 55, 512 },
+ { 4, 56, 512 }, { 5, 57, 512 }, { 5, 58, 512 }, { 6, 59, 512 }, { 5, 60, 512 }, { 6, 61, 512 }, { 6, 62, 512 }, { 7, 63, 512 },
+ { 2, 64, 512 }, { 3, 65, 512 }, { 3, 66, 512 }, { 4, 67, 512 }, { 3, 68, 512 }, { 4, 69, 512 }, { 4, 70, 512 }, { 5, 71, 512 },
+ { 3, 72, 512 }, { 4, 73, 512 }, { 4, 74, 512 }, { 5, 75, 512 }, { 4, 76, 512 }, { 5, 77, 512 }, { 5, 78, 512 }, { 6, 79, 512 },
+ { 3, 80, 512 }, { 4, 81, 512 }, { 4, 82, 512 }, { 5, 83, 512 }, { 4, 84, 512 }, { 5, 85, 512 }, { 5, 86, 512 }, { 6, 87, 512 },
+ { 4, 88, 512 }, { 5, 89, 512 }, { 5, 90, 512 }, { 6, 91, 512 }, { 5, 92, 512 }, { 6, 93, 512 }, { 6, 94, 512 }, { 7, 95, 512 },
+ { 3, 96, 512 }, { 4, 97, 512 }, { 4, 98, 512 }, { 5, 99, 512 }, { 4, 100, 512 }, { 5, 101, 512 }, { 5, 102, 512 }, { 6, 103, 512 },
+ { 4, 104, 512 }, { 5, 105, 512 }, { 5, 106, 512 }, { 6, 107, 512 }, { 5, 108, 512 }, { 6, 109, 512 }, { 6, 110, 512 }, { 7, 111, 512 },
+ { 4, 112, 512 }, { 5, 113, 512 }, { 5, 114, 512 }, { 6, 115, 512 }, { 5, 116, 512 }, { 6, 117, 512 }, { 6, 118, 512 }, { 7, 119, 512 },
+ { 5, 120, 512 }, { 6, 121, 512 }, { 6, 122, 512 }, { 7, 123, 512 }, { 6, 124, 512 }, { 7, 125, 512 }, { 7, 126, 512 }, { 8, 127, 512 },
+ { 2, 128, 512 }, { 3, 129, 512 }, { 3, 130, 512 }, { 4, 131, 512 }, { 3, 132, 512 }, { 4, 133, 512 }, { 4, 134, 512 }, { 5, 135, 512 },
+ { 3, 136, 512 }, { 4, 137, 512 }, { 4, 138, 512 }, { 5, 139, 512 }, { 4, 140, 512 }, { 5, 141, 512 }, { 5, 142, 512 }, { 6, 143, 512 },
+ { 3, 144, 512 }, { 4, 145, 512 }, { 4, 146, 512 }, { 5, 147, 512 }, { 4, 148, 512 }, { 5, 149, 512 }, { 5, 150, 512 }, { 6, 151, 512 },
+ { 4, 152, 512 }, { 5, 153, 512 }, { 5, 154, 512 }, { 6, 155, 512 }, { 5, 156, 512 }, { 6, 157, 512 }, { 6, 158, 512 }, { 7, 159, 512 },
+ { 3, 160, 512 }, { 4, 161, 512 }, { 4, 162, 512 }, { 5, 163, 512 }, { 4, 164, 512 }, { 5, 165, 512 }, { 5, 166, 512 }, { 6, 167, 512 },
+ { 4, 168, 512 }, { 5, 169, 512 }, { 5, 170, 512 }, { 6, 171, 512 }, { 5, 172, 512 }, { 6, 173, 512 }, { 6, 174, 512 }, { 7, 175, 512 },
+ { 4, 176, 512 }, { 5, 177, 512 }, { 5, 178, 512 }, { 6, 179, 512 }, { 5, 180, 512 }, { 6, 181, 512 }, { 6, 182, 512 }, { 7, 183, 512 },
+ { 5, 184, 512 }, { 6, 185, 512 }, { 6, 186, 512 }, { 7, 187, 512 }, { 6, 188, 512 }, { 7, 189, 512 }, { 7, 190, 512 }, { 8, 191, 512 },
+ { 3, 192, 512 }, { 4, 193, 512 }, { 4, 194, 512 }, { 5, 195, 512 }, { 4, 196, 512 }, { 5, 197, 512 }, { 5, 198, 512 }, { 6, 199, 512 },
+ { 4, 200, 512 }, { 5, 201, 512 }, { 5, 202, 512 }, { 6, 203, 512 }, { 5, 204, 512 }, { 6, 205, 512 }, { 6, 206, 512 }, { 7, 207, 512 },
+ { 4, 208, 512 }, { 5, 209, 512 }, { 5, 210, 512 }, { 6, 211, 512 }, { 5, 212, 512 }, { 6, 213, 512 }, { 6, 214, 512 }, { 7, 215, 512 },
+ { 5, 216, 512 }, { 6, 217, 512 }, { 6, 218, 512 }, { 7, 219, 512 }, { 6, 220, 512 }, { 7, 221, 512 }, { 7, 222, 512 }, { 8, 223, 512 },
+ { 4, 224, 512 }, { 5, 225, 512 }, { 5, 226, 512 }, { 6, 227, 512 }, { 5, 228, 512 }, { 6, 229, 512 }, { 6, 230, 512 }, { 7, 231, 512 },
+ { 5, 232, 512 }, { 6, 233, 512 }, { 6, 234, 512 }, { 7, 235, 512 }, { 6, 236, 512 }, { 7, 237, 512 }, { 7, 238, 512 }, { 8, 239, 512 },
+ { 5, 240, 512 }, { 6, 241, 512 }, { 6, 242, 512 }, { 7, 243, 512 }, { 6, 244, 512 }, { 7, 245, 512 }, { 7, 246, 512 }, { 8, 247, 512 },
+ { 6, 248, 512 }, { 7, 249, 512 }, { 7, 250, 512 }, { 8, 251, 512 }, { 7, 252, 512 }, { 8, 253, 512 }, { 8, 254, 512 }, { 9, 255, 512 },
+ { 2, 256, 512 }, { 3, 257, 512 }, { 3, 258, 512 }, { 4, 259, 512 }, { 3, 260, 512 }, { 4, 261, 512 }, { 4, 262, 512 }, { 5, 263, 512 },
+ { 3, 264, 512 }, { 4, 265, 512 }, { 4, 266, 512 }, { 5, 267, 512 }, { 4, 268, 512 }, { 5, 269, 512 }, { 5, 270, 512 }, { 6, 271, 512 },
+ { 3, 272, 512 }, { 4, 273, 512 }, { 4, 274, 512 }, { 5, 275, 512 }, { 4, 276, 512 }, { 5, 277, 512 }, { 5, 278, 512 }, { 6, 279, 512 },
+ { 4, 280, 512 }, { 5, 281, 512 }, { 5, 282, 512 }, { 6, 283, 512 }, { 5, 284, 512 }, { 6, 285, 512 }, { 6, 286, 512 }, { 7, 287, 512 },
+ { 3, 288, 512 }, { 4, 289, 512 }, { 4, 290, 512 }, { 5, 291, 512 }, { 4, 292, 512 }, { 5, 293, 512 }, { 5, 294, 512 }, { 6, 295, 512 },
+ { 4, 296, 512 }, { 5, 297, 512 }, { 5, 298, 512 }, { 6, 299, 512 }, { 5, 300, 512 }, { 6, 301, 512 }, { 6, 302, 512 }, { 7, 303, 512 },
+ { 4, 304, 512 }, { 5, 305, 512 }, { 5, 306, 512 }, { 6, 307, 512 }, { 5, 308, 512 }, { 6, 309, 512 }, { 6, 310, 512 }, { 7, 311, 512 },
+ { 5, 312, 512 }, { 6, 313, 512 }, { 6, 314, 512 }, { 7, 315, 512 }, { 6, 316, 512 }, { 7, 317, 512 }, { 7, 318, 512 }, { 8, 319, 512 },
+ { 3, 320, 512 }, { 4, 321, 512 }, { 4, 322, 512 }, { 5, 323, 512 }, { 4, 324, 512 }, { 5, 325, 512 }, { 5, 326, 512 }, { 6, 327, 512 },
+ { 4, 328, 512 }, { 5, 329, 512 }, { 5, 330, 512 }, { 6, 331, 512 }, { 5, 332, 512 }, { 6, 333, 512 }, { 6, 334, 512 }, { 7, 335, 512 },
+ { 4, 336, 512 }, { 5, 337, 512 }, { 5, 338, 512 }, { 6, 339, 512 }, { 5, 340, 512 }, { 6, 341, 512 }, { 6, 342, 512 }, { 7, 343, 512 },
+ { 5, 344, 512 }, { 6, 345, 512 }, { 6, 346, 512 }, { 7, 347, 512 }, { 6, 348, 512 }, { 7, 349, 512 }, { 7, 350, 512 }, { 8, 351, 512 },
+ { 4, 352, 512 }, { 5, 353, 512 }, { 5, 354, 512 }, { 6, 355, 512 }, { 5, 356, 512 }, { 6, 357, 512 }, { 6, 358, 512 }, { 7, 359, 512 },
+ { 5, 360, 512 }, { 6, 361, 512 }, { 6, 362, 512 }, { 7, 363, 512 }, { 6, 364, 512 }, { 7, 365, 512 }, { 7, 366, 512 }, { 8, 367, 512 },
+ { 5, 368, 512 }, { 6, 369, 512 }, { 6, 370, 512 }, { 7, 371, 512 }, { 6, 372, 512 }, { 7, 373, 512 }, { 7, 374, 512 }, { 8, 375, 512 },
+ { 6, 376, 512 }, { 7, 377, 512 }, { 7, 378, 512 }, { 8, 379, 512 }, { 7, 380, 512 }, { 8, 381, 512 }, { 8, 382, 512 }, { 9, 383, 512 },
+ { 3, 384, 512 }, { 4, 385, 512 }, { 4, 386, 512 }, { 5, 387, 512 }, { 4, 388, 512 }, { 5, 389, 512 }, { 5, 390, 512 }, { 6, 391, 512 },
+ { 4, 392, 512 }, { 5, 393, 512 }, { 5, 394, 512 }, { 6, 395, 512 }, { 5, 396, 512 }, { 6, 397, 512 }, { 6, 398, 512 }, { 7, 399, 512 },
+ { 4, 400, 512 }, { 5, 401, 512 }, { 5, 402, 512 }, { 6, 403, 512 }, { 5, 404, 512 }, { 6, 405, 512 }, { 6, 406, 512 }, { 7, 407, 512 },
+ { 5, 408, 512 }, { 6, 409, 512 }, { 6, 410, 512 }, { 7, 411, 512 }, { 6, 412, 512 }, { 7, 413, 512 }, { 7, 414, 512 }, { 8, 415, 512 },
+ { 4, 416, 512 }, { 5, 417, 512 }, { 5, 418, 512 }, { 6, 419, 512 }, { 5, 420, 512 }, { 6, 421, 512 }, { 6, 422, 512 }, { 7, 423, 512 },
+ { 5, 424, 512 }, { 6, 425, 512 }, { 6, 426, 512 }, { 7, 427, 512 }, { 6, 428, 512 }, { 7, 429, 512 }, { 7, 430, 512 }, { 8, 431, 512 },
+ { 5, 432, 512 }, { 6, 433, 512 }, { 6, 434, 512 }, { 7, 435, 512 }, { 6, 436, 512 }, { 7, 437, 512 }, { 7, 438, 512 }, { 8, 439, 512 },
+ { 6, 440, 512 }, { 7, 441, 512 }, { 7, 442, 512 }, { 8, 443, 512 }, { 7, 444, 512 }, { 8, 445, 512 }, { 8, 446, 512 }, { 9, 447, 512 },
+ { 4, 448, 512 }, { 5, 449, 512 }, { 5, 450, 512 }, { 6, 451, 512 }, { 5, 452, 512 }, { 6, 453, 512 }, { 6, 454, 512 }, { 7, 455, 512 },
+ { 5, 456, 512 }, { 6, 457, 512 }, { 6, 458, 512 }, { 7, 459, 512 }, { 6, 460, 512 }, { 7, 461, 512 }, { 7, 462, 512 }, { 8, 463, 512 },
+ { 5, 464, 512 }, { 6, 465, 512 }, { 6, 466, 512 }, { 7, 467, 512 }, { 6, 468, 512 }, { 7, 469, 512 }, { 7, 470, 512 }, { 8, 471, 512 },
+ { 6, 472, 512 }, { 7, 473, 512 }, { 7, 474, 512 }, { 8, 475, 512 }, { 7, 476, 512 }, { 8, 477, 512 }, { 8, 478, 512 }, { 9, 479, 512 },
+ { 5, 480, 512 }, { 6, 481, 512 }, { 6, 482, 512 }, { 7, 483, 512 }, { 6, 484, 512 }, { 7, 485, 512 }, { 7, 486, 512 }, { 8, 487, 512 },
+ { 6, 488, 512 }, { 7, 489, 512 }, { 7, 490, 512 }, { 8, 491, 512 }, { 7, 492, 512 }, { 8, 493, 512 }, { 8, 494, 512 }, { 9, 495, 512 },
+ { 6, 496, 512 }, { 7, 497, 512 }, { 7, 498, 512 }, { 8, 499, 512 }, { 7, 500, 512 }, { 8, 501, 512 }, { 8, 502, 512 }, { 9, 503, 512 },
+ { 7, 504, 512 }, { 8, 505, 512 }, { 8, 506, 512 }, { 9, 507, 512 }, { 8, 508, 512 }, { 9, 509, 512 }, { 9, 510, 512 }, { 10, 511, 512 },
#if FP_LUT > 10
- { 1, 0, 0 }, { 2, 1, 1024 }, { 2, 2, 1024 }, { 3, 3, 1024 }, { 2, 4, 1024 }, { 3, 5, 1024 }, { 3, 6, 1024 }, { 4, 7, 1024 },
- { 2, 8, 1024 }, { 3, 9, 1024 }, { 3, 10, 1024 }, { 4, 11, 1024 }, { 3, 12, 1024 }, { 4, 13, 1024 }, { 4, 14, 1024 }, { 5, 15, 1024 },
- { 2, 16, 1024 }, { 3, 17, 1024 }, { 3, 18, 1024 }, { 4, 19, 1024 }, { 3, 20, 1024 }, { 4, 21, 1024 }, { 4, 22, 1024 }, { 5, 23, 1024 },
- { 3, 24, 1024 }, { 4, 25, 1024 }, { 4, 26, 1024 }, { 5, 27, 1024 }, { 4, 28, 1024 }, { 5, 29, 1024 }, { 5, 30, 1024 }, { 6, 31, 1024 },
- { 2, 32, 1024 }, { 3, 33, 1024 }, { 3, 34, 1024 }, { 4, 35, 1024 }, { 3, 36, 1024 }, { 4, 37, 1024 }, { 4, 38, 1024 }, { 5, 39, 1024 },
- { 3, 40, 1024 }, { 4, 41, 1024 }, { 4, 42, 1024 }, { 5, 43, 1024 }, { 4, 44, 1024 }, { 5, 45, 1024 }, { 5, 46, 1024 }, { 6, 47, 1024 },
- { 3, 48, 1024 }, { 4, 49, 1024 }, { 4, 50, 1024 }, { 5, 51, 1024 }, { 4, 52, 1024 }, { 5, 53, 1024 }, { 5, 54, 1024 }, { 6, 55, 1024 },
- { 4, 56, 1024 }, { 5, 57, 1024 }, { 5, 58, 1024 }, { 6, 59, 1024 }, { 5, 60, 1024 }, { 6, 61, 1024 }, { 6, 62, 1024 }, { 7, 63, 1024 },
- { 2, 64, 1024 }, { 3, 65, 1024 }, { 3, 66, 1024 }, { 4, 67, 1024 }, { 3, 68, 1024 }, { 4, 69, 1024 }, { 4, 70, 1024 }, { 5, 71, 1024 },
- { 3, 72, 1024 }, { 4, 73, 1024 }, { 4, 74, 1024 }, { 5, 75, 1024 }, { 4, 76, 1024 }, { 5, 77, 1024 }, { 5, 78, 1024 }, { 6, 79, 1024 },
- { 3, 80, 1024 }, { 4, 81, 1024 }, { 4, 82, 1024 }, { 5, 83, 1024 }, { 4, 84, 1024 }, { 5, 85, 1024 }, { 5, 86, 1024 }, { 6, 87, 1024 },
- { 4, 88, 1024 }, { 5, 89, 1024 }, { 5, 90, 1024 }, { 6, 91, 1024 }, { 5, 92, 1024 }, { 6, 93, 1024 }, { 6, 94, 1024 }, { 7, 95, 1024 },
- { 3, 96, 1024 }, { 4, 97, 1024 }, { 4, 98, 1024 }, { 5, 99, 1024 }, { 4, 100, 1024 }, { 5, 101, 1024 }, { 5, 102, 1024 }, { 6, 103, 1024 },
- { 4, 104, 1024 }, { 5, 105, 1024 }, { 5, 106, 1024 }, { 6, 107, 1024 }, { 5, 108, 1024 }, { 6, 109, 1024 }, { 6, 110, 1024 }, { 7, 111, 1024 },
- { 4, 112, 1024 }, { 5, 113, 1024 }, { 5, 114, 1024 }, { 6, 115, 1024 }, { 5, 116, 1024 }, { 6, 117, 1024 }, { 6, 118, 1024 }, { 7, 119, 1024 },
- { 5, 120, 1024 }, { 6, 121, 1024 }, { 6, 122, 1024 }, { 7, 123, 1024 }, { 6, 124, 1024 }, { 7, 125, 1024 }, { 7, 126, 1024 }, { 8, 127, 1024 },
- { 2, 128, 1024 }, { 3, 129, 1024 }, { 3, 130, 1024 }, { 4, 131, 1024 }, { 3, 132, 1024 }, { 4, 133, 1024 }, { 4, 134, 1024 }, { 5, 135, 1024 },
- { 3, 136, 1024 }, { 4, 137, 1024 }, { 4, 138, 1024 }, { 5, 139, 1024 }, { 4, 140, 1024 }, { 5, 141, 1024 }, { 5, 142, 1024 }, { 6, 143, 1024 },
- { 3, 144, 1024 }, { 4, 145, 1024 }, { 4, 146, 1024 }, { 5, 147, 1024 }, { 4, 148, 1024 }, { 5, 149, 1024 }, { 5, 150, 1024 }, { 6, 151, 1024 },
- { 4, 152, 1024 }, { 5, 153, 1024 }, { 5, 154, 1024 }, { 6, 155, 1024 }, { 5, 156, 1024 }, { 6, 157, 1024 }, { 6, 158, 1024 }, { 7, 159, 1024 },
- { 3, 160, 1024 }, { 4, 161, 1024 }, { 4, 162, 1024 }, { 5, 163, 1024 }, { 4, 164, 1024 }, { 5, 165, 1024 }, { 5, 166, 1024 }, { 6, 167, 1024 },
- { 4, 168, 1024 }, { 5, 169, 1024 }, { 5, 170, 1024 }, { 6, 171, 1024 }, { 5, 172, 1024 }, { 6, 173, 1024 }, { 6, 174, 1024 }, { 7, 175, 1024 },
- { 4, 176, 1024 }, { 5, 177, 1024 }, { 5, 178, 1024 }, { 6, 179, 1024 }, { 5, 180, 1024 }, { 6, 181, 1024 }, { 6, 182, 1024 }, { 7, 183, 1024 },
- { 5, 184, 1024 }, { 6, 185, 1024 }, { 6, 186, 1024 }, { 7, 187, 1024 }, { 6, 188, 1024 }, { 7, 189, 1024 }, { 7, 190, 1024 }, { 8, 191, 1024 },
- { 3, 192, 1024 }, { 4, 193, 1024 }, { 4, 194, 1024 }, { 5, 195, 1024 }, { 4, 196, 1024 }, { 5, 197, 1024 }, { 5, 198, 1024 }, { 6, 199, 1024 },
- { 4, 200, 1024 }, { 5, 201, 1024 }, { 5, 202, 1024 }, { 6, 203, 1024 }, { 5, 204, 1024 }, { 6, 205, 1024 }, { 6, 206, 1024 }, { 7, 207, 1024 },
- { 4, 208, 1024 }, { 5, 209, 1024 }, { 5, 210, 1024 }, { 6, 211, 1024 }, { 5, 212, 1024 }, { 6, 213, 1024 }, { 6, 214, 1024 }, { 7, 215, 1024 },
- { 5, 216, 1024 }, { 6, 217, 1024 }, { 6, 218, 1024 }, { 7, 219, 1024 }, { 6, 220, 1024 }, { 7, 221, 1024 }, { 7, 222, 1024 }, { 8, 223, 1024 },
- { 4, 224, 1024 }, { 5, 225, 1024 }, { 5, 226, 1024 }, { 6, 227, 1024 }, { 5, 228, 1024 }, { 6, 229, 1024 }, { 6, 230, 1024 }, { 7, 231, 1024 },
- { 5, 232, 1024 }, { 6, 233, 1024 }, { 6, 234, 1024 }, { 7, 235, 1024 }, { 6, 236, 1024 }, { 7, 237, 1024 }, { 7, 238, 1024 }, { 8, 239, 1024 },
- { 5, 240, 1024 }, { 6, 241, 1024 }, { 6, 242, 1024 }, { 7, 243, 1024 }, { 6, 244, 1024 }, { 7, 245, 1024 }, { 7, 246, 1024 }, { 8, 247, 1024 },
- { 6, 248, 1024 }, { 7, 249, 1024 }, { 7, 250, 1024 }, { 8, 251, 1024 }, { 7, 252, 1024 }, { 8, 253, 1024 }, { 8, 254, 1024 }, { 9, 255, 1024 },
- { 2, 256, 1024 }, { 3, 257, 1024 }, { 3, 258, 1024 }, { 4, 259, 1024 }, { 3, 260, 1024 }, { 4, 261, 1024 }, { 4, 262, 1024 }, { 5, 263, 1024 },
- { 3, 264, 1024 }, { 4, 265, 1024 }, { 4, 266, 1024 }, { 5, 267, 1024 }, { 4, 268, 1024 }, { 5, 269, 1024 }, { 5, 270, 1024 }, { 6, 271, 1024 },
- { 3, 272, 1024 }, { 4, 273, 1024 }, { 4, 274, 1024 }, { 5, 275, 1024 }, { 4, 276, 1024 }, { 5, 277, 1024 }, { 5, 278, 1024 }, { 6, 279, 1024 },
- { 4, 280, 1024 }, { 5, 281, 1024 }, { 5, 282, 1024 }, { 6, 283, 1024 }, { 5, 284, 1024 }, { 6, 285, 1024 }, { 6, 286, 1024 }, { 7, 287, 1024 },
- { 3, 288, 1024 }, { 4, 289, 1024 }, { 4, 290, 1024 }, { 5, 291, 1024 }, { 4, 292, 1024 }, { 5, 293, 1024 }, { 5, 294, 1024 }, { 6, 295, 1024 },
- { 4, 296, 1024 }, { 5, 297, 1024 }, { 5, 298, 1024 }, { 6, 299, 1024 }, { 5, 300, 1024 }, { 6, 301, 1024 }, { 6, 302, 1024 }, { 7, 303, 1024 },
- { 4, 304, 1024 }, { 5, 305, 1024 }, { 5, 306, 1024 }, { 6, 307, 1024 }, { 5, 308, 1024 }, { 6, 309, 1024 }, { 6, 310, 1024 }, { 7, 311, 1024 },
- { 5, 312, 1024 }, { 6, 313, 1024 }, { 6, 314, 1024 }, { 7, 315, 1024 }, { 6, 316, 1024 }, { 7, 317, 1024 }, { 7, 318, 1024 }, { 8, 319, 1024 },
- { 3, 320, 1024 }, { 4, 321, 1024 }, { 4, 322, 1024 }, { 5, 323, 1024 }, { 4, 324, 1024 }, { 5, 325, 1024 }, { 5, 326, 1024 }, { 6, 327, 1024 },
- { 4, 328, 1024 }, { 5, 329, 1024 }, { 5, 330, 1024 }, { 6, 331, 1024 }, { 5, 332, 1024 }, { 6, 333, 1024 }, { 6, 334, 1024 }, { 7, 335, 1024 },
- { 4, 336, 1024 }, { 5, 337, 1024 }, { 5, 338, 1024 }, { 6, 339, 1024 }, { 5, 340, 1024 }, { 6, 341, 1024 }, { 6, 342, 1024 }, { 7, 343, 1024 },
- { 5, 344, 1024 }, { 6, 345, 1024 }, { 6, 346, 1024 }, { 7, 347, 1024 }, { 6, 348, 1024 }, { 7, 349, 1024 }, { 7, 350, 1024 }, { 8, 351, 1024 },
- { 4, 352, 1024 }, { 5, 353, 1024 }, { 5, 354, 1024 }, { 6, 355, 1024 }, { 5, 356, 1024 }, { 6, 357, 1024 }, { 6, 358, 1024 }, { 7, 359, 1024 },
- { 5, 360, 1024 }, { 6, 361, 1024 }, { 6, 362, 1024 }, { 7, 363, 1024 }, { 6, 364, 1024 }, { 7, 365, 1024 }, { 7, 366, 1024 }, { 8, 367, 1024 },
- { 5, 368, 1024 }, { 6, 369, 1024 }, { 6, 370, 1024 }, { 7, 371, 1024 }, { 6, 372, 1024 }, { 7, 373, 1024 }, { 7, 374, 1024 }, { 8, 375, 1024 },
- { 6, 376, 1024 }, { 7, 377, 1024 }, { 7, 378, 1024 }, { 8, 379, 1024 }, { 7, 380, 1024 }, { 8, 381, 1024 }, { 8, 382, 1024 }, { 9, 383, 1024 },
- { 3, 384, 1024 }, { 4, 385, 1024 }, { 4, 386, 1024 }, { 5, 387, 1024 }, { 4, 388, 1024 }, { 5, 389, 1024 }, { 5, 390, 1024 }, { 6, 391, 1024 },
- { 4, 392, 1024 }, { 5, 393, 1024 }, { 5, 394, 1024 }, { 6, 395, 1024 }, { 5, 396, 1024 }, { 6, 397, 1024 }, { 6, 398, 1024 }, { 7, 399, 1024 },
- { 4, 400, 1024 }, { 5, 401, 1024 }, { 5, 402, 1024 }, { 6, 403, 1024 }, { 5, 404, 1024 }, { 6, 405, 1024 }, { 6, 406, 1024 }, { 7, 407, 1024 },
- { 5, 408, 1024 }, { 6, 409, 1024 }, { 6, 410, 1024 }, { 7, 411, 1024 }, { 6, 412, 1024 }, { 7, 413, 1024 }, { 7, 414, 1024 }, { 8, 415, 1024 },
- { 4, 416, 1024 }, { 5, 417, 1024 }, { 5, 418, 1024 }, { 6, 419, 1024 }, { 5, 420, 1024 }, { 6, 421, 1024 }, { 6, 422, 1024 }, { 7, 423, 1024 },
- { 5, 424, 1024 }, { 6, 425, 1024 }, { 6, 426, 1024 }, { 7, 427, 1024 }, { 6, 428, 1024 }, { 7, 429, 1024 }, { 7, 430, 1024 }, { 8, 431, 1024 },
- { 5, 432, 1024 }, { 6, 433, 1024 }, { 6, 434, 1024 }, { 7, 435, 1024 }, { 6, 436, 1024 }, { 7, 437, 1024 }, { 7, 438, 1024 }, { 8, 439, 1024 },
- { 6, 440, 1024 }, { 7, 441, 1024 }, { 7, 442, 1024 }, { 8, 443, 1024 }, { 7, 444, 1024 }, { 8, 445, 1024 }, { 8, 446, 1024 }, { 9, 447, 1024 },
- { 4, 448, 1024 }, { 5, 449, 1024 }, { 5, 450, 1024 }, { 6, 451, 1024 }, { 5, 452, 1024 }, { 6, 453, 1024 }, { 6, 454, 1024 }, { 7, 455, 1024 },
- { 5, 456, 1024 }, { 6, 457, 1024 }, { 6, 458, 1024 }, { 7, 459, 1024 }, { 6, 460, 1024 }, { 7, 461, 1024 }, { 7, 462, 1024 }, { 8, 463, 1024 },
- { 5, 464, 1024 }, { 6, 465, 1024 }, { 6, 466, 1024 }, { 7, 467, 1024 }, { 6, 468, 1024 }, { 7, 469, 1024 }, { 7, 470, 1024 }, { 8, 471, 1024 },
- { 6, 472, 1024 }, { 7, 473, 1024 }, { 7, 474, 1024 }, { 8, 475, 1024 }, { 7, 476, 1024 }, { 8, 477, 1024 }, { 8, 478, 1024 }, { 9, 479, 1024 },
- { 5, 480, 1024 }, { 6, 481, 1024 }, { 6, 482, 1024 }, { 7, 483, 1024 }, { 6, 484, 1024 }, { 7, 485, 1024 }, { 7, 486, 1024 }, { 8, 487, 1024 },
- { 6, 488, 1024 }, { 7, 489, 1024 }, { 7, 490, 1024 }, { 8, 491, 1024 }, { 7, 492, 1024 }, { 8, 493, 1024 }, { 8, 494, 1024 }, { 9, 495, 1024 },
- { 6, 496, 1024 }, { 7, 497, 1024 }, { 7, 498, 1024 }, { 8, 499, 1024 }, { 7, 500, 1024 }, { 8, 501, 1024 }, { 8, 502, 1024 }, { 9, 503, 1024 },
- { 7, 504, 1024 }, { 8, 505, 1024 }, { 8, 506, 1024 }, { 9, 507, 1024 }, { 8, 508, 1024 }, { 9, 509, 1024 }, { 9, 510, 1024 }, { 10, 511, 1024 },
- { 2, 512, 1024 }, { 3, 513, 1024 }, { 3, 514, 1024 }, { 4, 515, 1024 }, { 3, 516, 1024 }, { 4, 517, 1024 }, { 4, 518, 1024 }, { 5, 519, 1024 },
- { 3, 520, 1024 }, { 4, 521, 1024 }, { 4, 522, 1024 }, { 5, 523, 1024 }, { 4, 524, 1024 }, { 5, 525, 1024 }, { 5, 526, 1024 }, { 6, 527, 1024 },
- { 3, 528, 1024 }, { 4, 529, 1024 }, { 4, 530, 1024 }, { 5, 531, 1024 }, { 4, 532, 1024 }, { 5, 533, 1024 }, { 5, 534, 1024 }, { 6, 535, 1024 },
- { 4, 536, 1024 }, { 5, 537, 1024 }, { 5, 538, 1024 }, { 6, 539, 1024 }, { 5, 540, 1024 }, { 6, 541, 1024 }, { 6, 542, 1024 }, { 7, 543, 1024 },
- { 3, 544, 1024 }, { 4, 545, 1024 }, { 4, 546, 1024 }, { 5, 547, 1024 }, { 4, 548, 1024 }, { 5, 549, 1024 }, { 5, 550, 1024 }, { 6, 551, 1024 },
- { 4, 552, 1024 }, { 5, 553, 1024 }, { 5, 554, 1024 }, { 6, 555, 1024 }, { 5, 556, 1024 }, { 6, 557, 1024 }, { 6, 558, 1024 }, { 7, 559, 1024 },
- { 4, 560, 1024 }, { 5, 561, 1024 }, { 5, 562, 1024 }, { 6, 563, 1024 }, { 5, 564, 1024 }, { 6, 565, 1024 }, { 6, 566, 1024 }, { 7, 567, 1024 },
- { 5, 568, 1024 }, { 6, 569, 1024 }, { 6, 570, 1024 }, { 7, 571, 1024 }, { 6, 572, 1024 }, { 7, 573, 1024 }, { 7, 574, 1024 }, { 8, 575, 1024 },
- { 3, 576, 1024 }, { 4, 577, 1024 }, { 4, 578, 1024 }, { 5, 579, 1024 }, { 4, 580, 1024 }, { 5, 581, 1024 }, { 5, 582, 1024 }, { 6, 583, 1024 },
- { 4, 584, 1024 }, { 5, 585, 1024 }, { 5, 586, 1024 }, { 6, 587, 1024 }, { 5, 588, 1024 }, { 6, 589, 1024 }, { 6, 590, 1024 }, { 7, 591, 1024 },
- { 4, 592, 1024 }, { 5, 593, 1024 }, { 5, 594, 1024 }, { 6, 595, 1024 }, { 5, 596, 1024 }, { 6, 597, 1024 }, { 6, 598, 1024 }, { 7, 599, 1024 },
- { 5, 600, 1024 }, { 6, 601, 1024 }, { 6, 602, 1024 }, { 7, 603, 1024 }, { 6, 604, 1024 }, { 7, 605, 1024 }, { 7, 606, 1024 }, { 8, 607, 1024 },
- { 4, 608, 1024 }, { 5, 609, 1024 }, { 5, 610, 1024 }, { 6, 611, 1024 }, { 5, 612, 1024 }, { 6, 613, 1024 }, { 6, 614, 1024 }, { 7, 615, 1024 },
- { 5, 616, 1024 }, { 6, 617, 1024 }, { 6, 618, 1024 }, { 7, 619, 1024 }, { 6, 620, 1024 }, { 7, 621, 1024 }, { 7, 622, 1024 }, { 8, 623, 1024 },
- { 5, 624, 1024 }, { 6, 625, 1024 }, { 6, 626, 1024 }, { 7, 627, 1024 }, { 6, 628, 1024 }, { 7, 629, 1024 }, { 7, 630, 1024 }, { 8, 631, 1024 },
- { 6, 632, 1024 }, { 7, 633, 1024 }, { 7, 634, 1024 }, { 8, 635, 1024 }, { 7, 636, 1024 }, { 8, 637, 1024 }, { 8, 638, 1024 }, { 9, 639, 1024 },
- { 3, 640, 1024 }, { 4, 641, 1024 }, { 4, 642, 1024 }, { 5, 643, 1024 }, { 4, 644, 1024 }, { 5, 645, 1024 }, { 5, 646, 1024 }, { 6, 647, 1024 },
- { 4, 648, 1024 }, { 5, 649, 1024 }, { 5, 650, 1024 }, { 6, 651, 1024 }, { 5, 652, 1024 }, { 6, 653, 1024 }, { 6, 654, 1024 }, { 7, 655, 1024 },
- { 4, 656, 1024 }, { 5, 657, 1024 }, { 5, 658, 1024 }, { 6, 659, 1024 }, { 5, 660, 1024 }, { 6, 661, 1024 }, { 6, 662, 1024 }, { 7, 663, 1024 },
- { 5, 664, 1024 }, { 6, 665, 1024 }, { 6, 666, 1024 }, { 7, 667, 1024 }, { 6, 668, 1024 }, { 7, 669, 1024 }, { 7, 670, 1024 }, { 8, 671, 1024 },
- { 4, 672, 1024 }, { 5, 673, 1024 }, { 5, 674, 1024 }, { 6, 675, 1024 }, { 5, 676, 1024 }, { 6, 677, 1024 }, { 6, 678, 1024 }, { 7, 679, 1024 },
- { 5, 680, 1024 }, { 6, 681, 1024 }, { 6, 682, 1024 }, { 7, 683, 1024 }, { 6, 684, 1024 }, { 7, 685, 1024 }, { 7, 686, 1024 }, { 8, 687, 1024 },
- { 5, 688, 1024 }, { 6, 689, 1024 }, { 6, 690, 1024 }, { 7, 691, 1024 }, { 6, 692, 1024 }, { 7, 693, 1024 }, { 7, 694, 1024 }, { 8, 695, 1024 },
- { 6, 696, 1024 }, { 7, 697, 1024 }, { 7, 698, 1024 }, { 8, 699, 1024 }, { 7, 700, 1024 }, { 8, 701, 1024 }, { 8, 702, 1024 }, { 9, 703, 1024 },
- { 4, 704, 1024 }, { 5, 705, 1024 }, { 5, 706, 1024 }, { 6, 707, 1024 }, { 5, 708, 1024 }, { 6, 709, 1024 }, { 6, 710, 1024 }, { 7, 711, 1024 },
- { 5, 712, 1024 }, { 6, 713, 1024 }, { 6, 714, 1024 }, { 7, 715, 1024 }, { 6, 716, 1024 }, { 7, 717, 1024 }, { 7, 718, 1024 }, { 8, 719, 1024 },
- { 5, 720, 1024 }, { 6, 721, 1024 }, { 6, 722, 1024 }, { 7, 723, 1024 }, { 6, 724, 1024 }, { 7, 725, 1024 }, { 7, 726, 1024 }, { 8, 727, 1024 },
- { 6, 728, 1024 }, { 7, 729, 1024 }, { 7, 730, 1024 }, { 8, 731, 1024 }, { 7, 732, 1024 }, { 8, 733, 1024 }, { 8, 734, 1024 }, { 9, 735, 1024 },
- { 5, 736, 1024 }, { 6, 737, 1024 }, { 6, 738, 1024 }, { 7, 739, 1024 }, { 6, 740, 1024 }, { 7, 741, 1024 }, { 7, 742, 1024 }, { 8, 743, 1024 },
- { 6, 744, 1024 }, { 7, 745, 1024 }, { 7, 746, 1024 }, { 8, 747, 1024 }, { 7, 748, 1024 }, { 8, 749, 1024 }, { 8, 750, 1024 }, { 9, 751, 1024 },
- { 6, 752, 1024 }, { 7, 753, 1024 }, { 7, 754, 1024 }, { 8, 755, 1024 }, { 7, 756, 1024 }, { 8, 757, 1024 }, { 8, 758, 1024 }, { 9, 759, 1024 },
- { 7, 760, 1024 }, { 8, 761, 1024 }, { 8, 762, 1024 }, { 9, 763, 1024 }, { 8, 764, 1024 }, { 9, 765, 1024 }, { 9, 766, 1024 }, { 10, 767, 1024 },
- { 3, 768, 1024 }, { 4, 769, 1024 }, { 4, 770, 1024 }, { 5, 771, 1024 }, { 4, 772, 1024 }, { 5, 773, 1024 }, { 5, 774, 1024 }, { 6, 775, 1024 },
- { 4, 776, 1024 }, { 5, 777, 1024 }, { 5, 778, 1024 }, { 6, 779, 1024 }, { 5, 780, 1024 }, { 6, 781, 1024 }, { 6, 782, 1024 }, { 7, 783, 1024 },
- { 4, 784, 1024 }, { 5, 785, 1024 }, { 5, 786, 1024 }, { 6, 787, 1024 }, { 5, 788, 1024 }, { 6, 789, 1024 }, { 6, 790, 1024 }, { 7, 791, 1024 },
- { 5, 792, 1024 }, { 6, 793, 1024 }, { 6, 794, 1024 }, { 7, 795, 1024 }, { 6, 796, 1024 }, { 7, 797, 1024 }, { 7, 798, 1024 }, { 8, 799, 1024 },
- { 4, 800, 1024 }, { 5, 801, 1024 }, { 5, 802, 1024 }, { 6, 803, 1024 }, { 5, 804, 1024 }, { 6, 805, 1024 }, { 6, 806, 1024 }, { 7, 807, 1024 },
- { 5, 808, 1024 }, { 6, 809, 1024 }, { 6, 810, 1024 }, { 7, 811, 1024 }, { 6, 812, 1024 }, { 7, 813, 1024 }, { 7, 814, 1024 }, { 8, 815, 1024 },
- { 5, 816, 1024 }, { 6, 817, 1024 }, { 6, 818, 1024 }, { 7, 819, 1024 }, { 6, 820, 1024 }, { 7, 821, 1024 }, { 7, 822, 1024 }, { 8, 823, 1024 },
- { 6, 824, 1024 }, { 7, 825, 1024 }, { 7, 826, 1024 }, { 8, 827, 1024 }, { 7, 828, 1024 }, { 8, 829, 1024 }, { 8, 830, 1024 }, { 9, 831, 1024 },
- { 4, 832, 1024 }, { 5, 833, 1024 }, { 5, 834, 1024 }, { 6, 835, 1024 }, { 5, 836, 1024 }, { 6, 837, 1024 }, { 6, 838, 1024 }, { 7, 839, 1024 },
- { 5, 840, 1024 }, { 6, 841, 1024 }, { 6, 842, 1024 }, { 7, 843, 1024 }, { 6, 844, 1024 }, { 7, 845, 1024 }, { 7, 846, 1024 }, { 8, 847, 1024 },
- { 5, 848, 1024 }, { 6, 849, 1024 }, { 6, 850, 1024 }, { 7, 851, 1024 }, { 6, 852, 1024 }, { 7, 853, 1024 }, { 7, 854, 1024 }, { 8, 855, 1024 },
- { 6, 856, 1024 }, { 7, 857, 1024 }, { 7, 858, 1024 }, { 8, 859, 1024 }, { 7, 860, 1024 }, { 8, 861, 1024 }, { 8, 862, 1024 }, { 9, 863, 1024 },
- { 5, 864, 1024 }, { 6, 865, 1024 }, { 6, 866, 1024 }, { 7, 867, 1024 }, { 6, 868, 1024 }, { 7, 869, 1024 }, { 7, 870, 1024 }, { 8, 871, 1024 },
- { 6, 872, 1024 }, { 7, 873, 1024 }, { 7, 874, 1024 }, { 8, 875, 1024 }, { 7, 876, 1024 }, { 8, 877, 1024 }, { 8, 878, 1024 }, { 9, 879, 1024 },
- { 6, 880, 1024 }, { 7, 881, 1024 }, { 7, 882, 1024 }, { 8, 883, 1024 }, { 7, 884, 1024 }, { 8, 885, 1024 }, { 8, 886, 1024 }, { 9, 887, 1024 },
- { 7, 888, 1024 }, { 8, 889, 1024 }, { 8, 890, 1024 }, { 9, 891, 1024 }, { 8, 892, 1024 }, { 9, 893, 1024 }, { 9, 894, 1024 }, { 10, 895, 1024 },
- { 4, 896, 1024 }, { 5, 897, 1024 }, { 5, 898, 1024 }, { 6, 899, 1024 }, { 5, 900, 1024 }, { 6, 901, 1024 }, { 6, 902, 1024 }, { 7, 903, 1024 },
- { 5, 904, 1024 }, { 6, 905, 1024 }, { 6, 906, 1024 }, { 7, 907, 1024 }, { 6, 908, 1024 }, { 7, 909, 1024 }, { 7, 910, 1024 }, { 8, 911, 1024 },
- { 5, 912, 1024 }, { 6, 913, 1024 }, { 6, 914, 1024 }, { 7, 915, 1024 }, { 6, 916, 1024 }, { 7, 917, 1024 }, { 7, 918, 1024 }, { 8, 919, 1024 },
- { 6, 920, 1024 }, { 7, 921, 1024 }, { 7, 922, 1024 }, { 8, 923, 1024 }, { 7, 924, 1024 }, { 8, 925, 1024 }, { 8, 926, 1024 }, { 9, 927, 1024 },
- { 5, 928, 1024 }, { 6, 929, 1024 }, { 6, 930, 1024 }, { 7, 931, 1024 }, { 6, 932, 1024 }, { 7, 933, 1024 }, { 7, 934, 1024 }, { 8, 935, 1024 },
- { 6, 936, 1024 }, { 7, 937, 1024 }, { 7, 938, 1024 }, { 8, 939, 1024 }, { 7, 940, 1024 }, { 8, 941, 1024 }, { 8, 942, 1024 }, { 9, 943, 1024 },
- { 6, 944, 1024 }, { 7, 945, 1024 }, { 7, 946, 1024 }, { 8, 947, 1024 }, { 7, 948, 1024 }, { 8, 949, 1024 }, { 8, 950, 1024 }, { 9, 951, 1024 },
- { 7, 952, 1024 }, { 8, 953, 1024 }, { 8, 954, 1024 }, { 9, 955, 1024 }, { 8, 956, 1024 }, { 9, 957, 1024 }, { 9, 958, 1024 }, { 10, 959, 1024 },
- { 5, 960, 1024 }, { 6, 961, 1024 }, { 6, 962, 1024 }, { 7, 963, 1024 }, { 6, 964, 1024 }, { 7, 965, 1024 }, { 7, 966, 1024 }, { 8, 967, 1024 },
- { 6, 968, 1024 }, { 7, 969, 1024 }, { 7, 970, 1024 }, { 8, 971, 1024 }, { 7, 972, 1024 }, { 8, 973, 1024 }, { 8, 974, 1024 }, { 9, 975, 1024 },
- { 6, 976, 1024 }, { 7, 977, 1024 }, { 7, 978, 1024 }, { 8, 979, 1024 }, { 7, 980, 1024 }, { 8, 981, 1024 }, { 8, 982, 1024 }, { 9, 983, 1024 },
- { 7, 984, 1024 }, { 8, 985, 1024 }, { 8, 986, 1024 }, { 9, 987, 1024 }, { 8, 988, 1024 }, { 9, 989, 1024 }, { 9, 990, 1024 }, { 10, 991, 1024 },
- { 6, 992, 1024 }, { 7, 993, 1024 }, { 7, 994, 1024 }, { 8, 995, 1024 }, { 7, 996, 1024 }, { 8, 997, 1024 }, { 8, 998, 1024 }, { 9, 999, 1024 },
- { 7, 1000, 1024 }, { 8, 1001, 1024 }, { 8, 1002, 1024 }, { 9, 1003, 1024 }, { 8, 1004, 1024 }, { 9, 1005, 1024 }, { 9, 1006, 1024 }, { 10, 1007, 1024 },
- { 7, 1008, 1024 }, { 8, 1009, 1024 }, { 8, 1010, 1024 }, { 9, 1011, 1024 }, { 8, 1012, 1024 }, { 9, 1013, 1024 }, { 9, 1014, 1024 }, { 10, 1015, 1024 },
- { 8, 1016, 1024 }, { 9, 1017, 1024 }, { 9, 1018, 1024 }, { 10, 1019, 1024 }, { 9, 1020, 1024 }, { 10, 1021, 1024 }, { 10, 1022, 1024 }, { 11, 1023, 1024 },
+ { 1, 0, 0 }, { 2, 1, 1024 }, { 2, 2, 1024 }, { 3, 3, 1024 }, { 2, 4, 1024 }, { 3, 5, 1024 }, { 3, 6, 1024 }, { 4, 7, 1024 },
+ { 2, 8, 1024 }, { 3, 9, 1024 }, { 3, 10, 1024 }, { 4, 11, 1024 }, { 3, 12, 1024 }, { 4, 13, 1024 }, { 4, 14, 1024 }, { 5, 15, 1024 },
+ { 2, 16, 1024 }, { 3, 17, 1024 }, { 3, 18, 1024 }, { 4, 19, 1024 }, { 3, 20, 1024 }, { 4, 21, 1024 }, { 4, 22, 1024 }, { 5, 23, 1024 },
+ { 3, 24, 1024 }, { 4, 25, 1024 }, { 4, 26, 1024 }, { 5, 27, 1024 }, { 4, 28, 1024 }, { 5, 29, 1024 }, { 5, 30, 1024 }, { 6, 31, 1024 },
+ { 2, 32, 1024 }, { 3, 33, 1024 }, { 3, 34, 1024 }, { 4, 35, 1024 }, { 3, 36, 1024 }, { 4, 37, 1024 }, { 4, 38, 1024 }, { 5, 39, 1024 },
+ { 3, 40, 1024 }, { 4, 41, 1024 }, { 4, 42, 1024 }, { 5, 43, 1024 }, { 4, 44, 1024 }, { 5, 45, 1024 }, { 5, 46, 1024 }, { 6, 47, 1024 },
+ { 3, 48, 1024 }, { 4, 49, 1024 }, { 4, 50, 1024 }, { 5, 51, 1024 }, { 4, 52, 1024 }, { 5, 53, 1024 }, { 5, 54, 1024 }, { 6, 55, 1024 },
+ { 4, 56, 1024 }, { 5, 57, 1024 }, { 5, 58, 1024 }, { 6, 59, 1024 }, { 5, 60, 1024 }, { 6, 61, 1024 }, { 6, 62, 1024 }, { 7, 63, 1024 },
+ { 2, 64, 1024 }, { 3, 65, 1024 }, { 3, 66, 1024 }, { 4, 67, 1024 }, { 3, 68, 1024 }, { 4, 69, 1024 }, { 4, 70, 1024 }, { 5, 71, 1024 },
+ { 3, 72, 1024 }, { 4, 73, 1024 }, { 4, 74, 1024 }, { 5, 75, 1024 }, { 4, 76, 1024 }, { 5, 77, 1024 }, { 5, 78, 1024 }, { 6, 79, 1024 },
+ { 3, 80, 1024 }, { 4, 81, 1024 }, { 4, 82, 1024 }, { 5, 83, 1024 }, { 4, 84, 1024 }, { 5, 85, 1024 }, { 5, 86, 1024 }, { 6, 87, 1024 },
+ { 4, 88, 1024 }, { 5, 89, 1024 }, { 5, 90, 1024 }, { 6, 91, 1024 }, { 5, 92, 1024 }, { 6, 93, 1024 }, { 6, 94, 1024 }, { 7, 95, 1024 },
+ { 3, 96, 1024 }, { 4, 97, 1024 }, { 4, 98, 1024 }, { 5, 99, 1024 }, { 4, 100, 1024 }, { 5, 101, 1024 }, { 5, 102, 1024 }, { 6, 103, 1024 },
+ { 4, 104, 1024 }, { 5, 105, 1024 }, { 5, 106, 1024 }, { 6, 107, 1024 }, { 5, 108, 1024 }, { 6, 109, 1024 }, { 6, 110, 1024 }, { 7, 111, 1024 },
+ { 4, 112, 1024 }, { 5, 113, 1024 }, { 5, 114, 1024 }, { 6, 115, 1024 }, { 5, 116, 1024 }, { 6, 117, 1024 }, { 6, 118, 1024 }, { 7, 119, 1024 },
+ { 5, 120, 1024 }, { 6, 121, 1024 }, { 6, 122, 1024 }, { 7, 123, 1024 }, { 6, 124, 1024 }, { 7, 125, 1024 }, { 7, 126, 1024 }, { 8, 127, 1024 },
+ { 2, 128, 1024 }, { 3, 129, 1024 }, { 3, 130, 1024 }, { 4, 131, 1024 }, { 3, 132, 1024 }, { 4, 133, 1024 }, { 4, 134, 1024 }, { 5, 135, 1024 },
+ { 3, 136, 1024 }, { 4, 137, 1024 }, { 4, 138, 1024 }, { 5, 139, 1024 }, { 4, 140, 1024 }, { 5, 141, 1024 }, { 5, 142, 1024 }, { 6, 143, 1024 },
+ { 3, 144, 1024 }, { 4, 145, 1024 }, { 4, 146, 1024 }, { 5, 147, 1024 }, { 4, 148, 1024 }, { 5, 149, 1024 }, { 5, 150, 1024 }, { 6, 151, 1024 },
+ { 4, 152, 1024 }, { 5, 153, 1024 }, { 5, 154, 1024 }, { 6, 155, 1024 }, { 5, 156, 1024 }, { 6, 157, 1024 }, { 6, 158, 1024 }, { 7, 159, 1024 },
+ { 3, 160, 1024 }, { 4, 161, 1024 }, { 4, 162, 1024 }, { 5, 163, 1024 }, { 4, 164, 1024 }, { 5, 165, 1024 }, { 5, 166, 1024 }, { 6, 167, 1024 },
+ { 4, 168, 1024 }, { 5, 169, 1024 }, { 5, 170, 1024 }, { 6, 171, 1024 }, { 5, 172, 1024 }, { 6, 173, 1024 }, { 6, 174, 1024 }, { 7, 175, 1024 },
+ { 4, 176, 1024 }, { 5, 177, 1024 }, { 5, 178, 1024 }, { 6, 179, 1024 }, { 5, 180, 1024 }, { 6, 181, 1024 }, { 6, 182, 1024 }, { 7, 183, 1024 },
+ { 5, 184, 1024 }, { 6, 185, 1024 }, { 6, 186, 1024 }, { 7, 187, 1024 }, { 6, 188, 1024 }, { 7, 189, 1024 }, { 7, 190, 1024 }, { 8, 191, 1024 },
+ { 3, 192, 1024 }, { 4, 193, 1024 }, { 4, 194, 1024 }, { 5, 195, 1024 }, { 4, 196, 1024 }, { 5, 197, 1024 }, { 5, 198, 1024 }, { 6, 199, 1024 },
+ { 4, 200, 1024 }, { 5, 201, 1024 }, { 5, 202, 1024 }, { 6, 203, 1024 }, { 5, 204, 1024 }, { 6, 205, 1024 }, { 6, 206, 1024 }, { 7, 207, 1024 },
+ { 4, 208, 1024 }, { 5, 209, 1024 }, { 5, 210, 1024 }, { 6, 211, 1024 }, { 5, 212, 1024 }, { 6, 213, 1024 }, { 6, 214, 1024 }, { 7, 215, 1024 },
+ { 5, 216, 1024 }, { 6, 217, 1024 }, { 6, 218, 1024 }, { 7, 219, 1024 }, { 6, 220, 1024 }, { 7, 221, 1024 }, { 7, 222, 1024 }, { 8, 223, 1024 },
+ { 4, 224, 1024 }, { 5, 225, 1024 }, { 5, 226, 1024 }, { 6, 227, 1024 }, { 5, 228, 1024 }, { 6, 229, 1024 }, { 6, 230, 1024 }, { 7, 231, 1024 },
+ { 5, 232, 1024 }, { 6, 233, 1024 }, { 6, 234, 1024 }, { 7, 235, 1024 }, { 6, 236, 1024 }, { 7, 237, 1024 }, { 7, 238, 1024 }, { 8, 239, 1024 },
+ { 5, 240, 1024 }, { 6, 241, 1024 }, { 6, 242, 1024 }, { 7, 243, 1024 }, { 6, 244, 1024 }, { 7, 245, 1024 }, { 7, 246, 1024 }, { 8, 247, 1024 },
+ { 6, 248, 1024 }, { 7, 249, 1024 }, { 7, 250, 1024 }, { 8, 251, 1024 }, { 7, 252, 1024 }, { 8, 253, 1024 }, { 8, 254, 1024 }, { 9, 255, 1024 },
+ { 2, 256, 1024 }, { 3, 257, 1024 }, { 3, 258, 1024 }, { 4, 259, 1024 }, { 3, 260, 1024 }, { 4, 261, 1024 }, { 4, 262, 1024 }, { 5, 263, 1024 },
+ { 3, 264, 1024 }, { 4, 265, 1024 }, { 4, 266, 1024 }, { 5, 267, 1024 }, { 4, 268, 1024 }, { 5, 269, 1024 }, { 5, 270, 1024 }, { 6, 271, 1024 },
+ { 3, 272, 1024 }, { 4, 273, 1024 }, { 4, 274, 1024 }, { 5, 275, 1024 }, { 4, 276, 1024 }, { 5, 277, 1024 }, { 5, 278, 1024 }, { 6, 279, 1024 },
+ { 4, 280, 1024 }, { 5, 281, 1024 }, { 5, 282, 1024 }, { 6, 283, 1024 }, { 5, 284, 1024 }, { 6, 285, 1024 }, { 6, 286, 1024 }, { 7, 287, 1024 },
+ { 3, 288, 1024 }, { 4, 289, 1024 }, { 4, 290, 1024 }, { 5, 291, 1024 }, { 4, 292, 1024 }, { 5, 293, 1024 }, { 5, 294, 1024 }, { 6, 295, 1024 },
+ { 4, 296, 1024 }, { 5, 297, 1024 }, { 5, 298, 1024 }, { 6, 299, 1024 }, { 5, 300, 1024 }, { 6, 301, 1024 }, { 6, 302, 1024 }, { 7, 303, 1024 },
+ { 4, 304, 1024 }, { 5, 305, 1024 }, { 5, 306, 1024 }, { 6, 307, 1024 }, { 5, 308, 1024 }, { 6, 309, 1024 }, { 6, 310, 1024 }, { 7, 311, 1024 },
+ { 5, 312, 1024 }, { 6, 313, 1024 }, { 6, 314, 1024 }, { 7, 315, 1024 }, { 6, 316, 1024 }, { 7, 317, 1024 }, { 7, 318, 1024 }, { 8, 319, 1024 },
+ { 3, 320, 1024 }, { 4, 321, 1024 }, { 4, 322, 1024 }, { 5, 323, 1024 }, { 4, 324, 1024 }, { 5, 325, 1024 }, { 5, 326, 1024 }, { 6, 327, 1024 },
+ { 4, 328, 1024 }, { 5, 329, 1024 }, { 5, 330, 1024 }, { 6, 331, 1024 }, { 5, 332, 1024 }, { 6, 333, 1024 }, { 6, 334, 1024 }, { 7, 335, 1024 },
+ { 4, 336, 1024 }, { 5, 337, 1024 }, { 5, 338, 1024 }, { 6, 339, 1024 }, { 5, 340, 1024 }, { 6, 341, 1024 }, { 6, 342, 1024 }, { 7, 343, 1024 },
+ { 5, 344, 1024 }, { 6, 345, 1024 }, { 6, 346, 1024 }, { 7, 347, 1024 }, { 6, 348, 1024 }, { 7, 349, 1024 }, { 7, 350, 1024 }, { 8, 351, 1024 },
+ { 4, 352, 1024 }, { 5, 353, 1024 }, { 5, 354, 1024 }, { 6, 355, 1024 }, { 5, 356, 1024 }, { 6, 357, 1024 }, { 6, 358, 1024 }, { 7, 359, 1024 },
+ { 5, 360, 1024 }, { 6, 361, 1024 }, { 6, 362, 1024 }, { 7, 363, 1024 }, { 6, 364, 1024 }, { 7, 365, 1024 }, { 7, 366, 1024 }, { 8, 367, 1024 },
+ { 5, 368, 1024 }, { 6, 369, 1024 }, { 6, 370, 1024 }, { 7, 371, 1024 }, { 6, 372, 1024 }, { 7, 373, 1024 }, { 7, 374, 1024 }, { 8, 375, 1024 },
+ { 6, 376, 1024 }, { 7, 377, 1024 }, { 7, 378, 1024 }, { 8, 379, 1024 }, { 7, 380, 1024 }, { 8, 381, 1024 }, { 8, 382, 1024 }, { 9, 383, 1024 },
+ { 3, 384, 1024 }, { 4, 385, 1024 }, { 4, 386, 1024 }, { 5, 387, 1024 }, { 4, 388, 1024 }, { 5, 389, 1024 }, { 5, 390, 1024 }, { 6, 391, 1024 },
+ { 4, 392, 1024 }, { 5, 393, 1024 }, { 5, 394, 1024 }, { 6, 395, 1024 }, { 5, 396, 1024 }, { 6, 397, 1024 }, { 6, 398, 1024 }, { 7, 399, 1024 },
+ { 4, 400, 1024 }, { 5, 401, 1024 }, { 5, 402, 1024 }, { 6, 403, 1024 }, { 5, 404, 1024 }, { 6, 405, 1024 }, { 6, 406, 1024 }, { 7, 407, 1024 },
+ { 5, 408, 1024 }, { 6, 409, 1024 }, { 6, 410, 1024 }, { 7, 411, 1024 }, { 6, 412, 1024 }, { 7, 413, 1024 }, { 7, 414, 1024 }, { 8, 415, 1024 },
+ { 4, 416, 1024 }, { 5, 417, 1024 }, { 5, 418, 1024 }, { 6, 419, 1024 }, { 5, 420, 1024 }, { 6, 421, 1024 }, { 6, 422, 1024 }, { 7, 423, 1024 },
+ { 5, 424, 1024 }, { 6, 425, 1024 }, { 6, 426, 1024 }, { 7, 427, 1024 }, { 6, 428, 1024 }, { 7, 429, 1024 }, { 7, 430, 1024 }, { 8, 431, 1024 },
+ { 5, 432, 1024 }, { 6, 433, 1024 }, { 6, 434, 1024 }, { 7, 435, 1024 }, { 6, 436, 1024 }, { 7, 437, 1024 }, { 7, 438, 1024 }, { 8, 439, 1024 },
+ { 6, 440, 1024 }, { 7, 441, 1024 }, { 7, 442, 1024 }, { 8, 443, 1024 }, { 7, 444, 1024 }, { 8, 445, 1024 }, { 8, 446, 1024 }, { 9, 447, 1024 },
+ { 4, 448, 1024 }, { 5, 449, 1024 }, { 5, 450, 1024 }, { 6, 451, 1024 }, { 5, 452, 1024 }, { 6, 453, 1024 }, { 6, 454, 1024 }, { 7, 455, 1024 },
+ { 5, 456, 1024 }, { 6, 457, 1024 }, { 6, 458, 1024 }, { 7, 459, 1024 }, { 6, 460, 1024 }, { 7, 461, 1024 }, { 7, 462, 1024 }, { 8, 463, 1024 },
+ { 5, 464, 1024 }, { 6, 465, 1024 }, { 6, 466, 1024 }, { 7, 467, 1024 }, { 6, 468, 1024 }, { 7, 469, 1024 }, { 7, 470, 1024 }, { 8, 471, 1024 },
+ { 6, 472, 1024 }, { 7, 473, 1024 }, { 7, 474, 1024 }, { 8, 475, 1024 }, { 7, 476, 1024 }, { 8, 477, 1024 }, { 8, 478, 1024 }, { 9, 479, 1024 },
+ { 5, 480, 1024 }, { 6, 481, 1024 }, { 6, 482, 1024 }, { 7, 483, 1024 }, { 6, 484, 1024 }, { 7, 485, 1024 }, { 7, 486, 1024 }, { 8, 487, 1024 },
+ { 6, 488, 1024 }, { 7, 489, 1024 }, { 7, 490, 1024 }, { 8, 491, 1024 }, { 7, 492, 1024 }, { 8, 493, 1024 }, { 8, 494, 1024 }, { 9, 495, 1024 },
+ { 6, 496, 1024 }, { 7, 497, 1024 }, { 7, 498, 1024 }, { 8, 499, 1024 }, { 7, 500, 1024 }, { 8, 501, 1024 }, { 8, 502, 1024 }, { 9, 503, 1024 },
+ { 7, 504, 1024 }, { 8, 505, 1024 }, { 8, 506, 1024 }, { 9, 507, 1024 }, { 8, 508, 1024 }, { 9, 509, 1024 }, { 9, 510, 1024 }, { 10, 511, 1024 },
+ { 2, 512, 1024 }, { 3, 513, 1024 }, { 3, 514, 1024 }, { 4, 515, 1024 }, { 3, 516, 1024 }, { 4, 517, 1024 }, { 4, 518, 1024 }, { 5, 519, 1024 },
+ { 3, 520, 1024 }, { 4, 521, 1024 }, { 4, 522, 1024 }, { 5, 523, 1024 }, { 4, 524, 1024 }, { 5, 525, 1024 }, { 5, 526, 1024 }, { 6, 527, 1024 },
+ { 3, 528, 1024 }, { 4, 529, 1024 }, { 4, 530, 1024 }, { 5, 531, 1024 }, { 4, 532, 1024 }, { 5, 533, 1024 }, { 5, 534, 1024 }, { 6, 535, 1024 },
+ { 4, 536, 1024 }, { 5, 537, 1024 }, { 5, 538, 1024 }, { 6, 539, 1024 }, { 5, 540, 1024 }, { 6, 541, 1024 }, { 6, 542, 1024 }, { 7, 543, 1024 },
+ { 3, 544, 1024 }, { 4, 545, 1024 }, { 4, 546, 1024 }, { 5, 547, 1024 }, { 4, 548, 1024 }, { 5, 549, 1024 }, { 5, 550, 1024 }, { 6, 551, 1024 },
+ { 4, 552, 1024 }, { 5, 553, 1024 }, { 5, 554, 1024 }, { 6, 555, 1024 }, { 5, 556, 1024 }, { 6, 557, 1024 }, { 6, 558, 1024 }, { 7, 559, 1024 },
+ { 4, 560, 1024 }, { 5, 561, 1024 }, { 5, 562, 1024 }, { 6, 563, 1024 }, { 5, 564, 1024 }, { 6, 565, 1024 }, { 6, 566, 1024 }, { 7, 567, 1024 },
+ { 5, 568, 1024 }, { 6, 569, 1024 }, { 6, 570, 1024 }, { 7, 571, 1024 }, { 6, 572, 1024 }, { 7, 573, 1024 }, { 7, 574, 1024 }, { 8, 575, 1024 },
+ { 3, 576, 1024 }, { 4, 577, 1024 }, { 4, 578, 1024 }, { 5, 579, 1024 }, { 4, 580, 1024 }, { 5, 581, 1024 }, { 5, 582, 1024 }, { 6, 583, 1024 },
+ { 4, 584, 1024 }, { 5, 585, 1024 }, { 5, 586, 1024 }, { 6, 587, 1024 }, { 5, 588, 1024 }, { 6, 589, 1024 }, { 6, 590, 1024 }, { 7, 591, 1024 },
+ { 4, 592, 1024 }, { 5, 593, 1024 }, { 5, 594, 1024 }, { 6, 595, 1024 }, { 5, 596, 1024 }, { 6, 597, 1024 }, { 6, 598, 1024 }, { 7, 599, 1024 },
+ { 5, 600, 1024 }, { 6, 601, 1024 }, { 6, 602, 1024 }, { 7, 603, 1024 }, { 6, 604, 1024 }, { 7, 605, 1024 }, { 7, 606, 1024 }, { 8, 607, 1024 },
+ { 4, 608, 1024 }, { 5, 609, 1024 }, { 5, 610, 1024 }, { 6, 611, 1024 }, { 5, 612, 1024 }, { 6, 613, 1024 }, { 6, 614, 1024 }, { 7, 615, 1024 },
+ { 5, 616, 1024 }, { 6, 617, 1024 }, { 6, 618, 1024 }, { 7, 619, 1024 }, { 6, 620, 1024 }, { 7, 621, 1024 }, { 7, 622, 1024 }, { 8, 623, 1024 },
+ { 5, 624, 1024 }, { 6, 625, 1024 }, { 6, 626, 1024 }, { 7, 627, 1024 }, { 6, 628, 1024 }, { 7, 629, 1024 }, { 7, 630, 1024 }, { 8, 631, 1024 },
+ { 6, 632, 1024 }, { 7, 633, 1024 }, { 7, 634, 1024 }, { 8, 635, 1024 }, { 7, 636, 1024 }, { 8, 637, 1024 }, { 8, 638, 1024 }, { 9, 639, 1024 },
+ { 3, 640, 1024 }, { 4, 641, 1024 }, { 4, 642, 1024 }, { 5, 643, 1024 }, { 4, 644, 1024 }, { 5, 645, 1024 }, { 5, 646, 1024 }, { 6, 647, 1024 },
+ { 4, 648, 1024 }, { 5, 649, 1024 }, { 5, 650, 1024 }, { 6, 651, 1024 }, { 5, 652, 1024 }, { 6, 653, 1024 }, { 6, 654, 1024 }, { 7, 655, 1024 },
+ { 4, 656, 1024 }, { 5, 657, 1024 }, { 5, 658, 1024 }, { 6, 659, 1024 }, { 5, 660, 1024 }, { 6, 661, 1024 }, { 6, 662, 1024 }, { 7, 663, 1024 },
+ { 5, 664, 1024 }, { 6, 665, 1024 }, { 6, 666, 1024 }, { 7, 667, 1024 }, { 6, 668, 1024 }, { 7, 669, 1024 }, { 7, 670, 1024 }, { 8, 671, 1024 },
+ { 4, 672, 1024 }, { 5, 673, 1024 }, { 5, 674, 1024 }, { 6, 675, 1024 }, { 5, 676, 1024 }, { 6, 677, 1024 }, { 6, 678, 1024 }, { 7, 679, 1024 },
+ { 5, 680, 1024 }, { 6, 681, 1024 }, { 6, 682, 1024 }, { 7, 683, 1024 }, { 6, 684, 1024 }, { 7, 685, 1024 }, { 7, 686, 1024 }, { 8, 687, 1024 },
+ { 5, 688, 1024 }, { 6, 689, 1024 }, { 6, 690, 1024 }, { 7, 691, 1024 }, { 6, 692, 1024 }, { 7, 693, 1024 }, { 7, 694, 1024 }, { 8, 695, 1024 },
+ { 6, 696, 1024 }, { 7, 697, 1024 }, { 7, 698, 1024 }, { 8, 699, 1024 }, { 7, 700, 1024 }, { 8, 701, 1024 }, { 8, 702, 1024 }, { 9, 703, 1024 },
+ { 4, 704, 1024 }, { 5, 705, 1024 }, { 5, 706, 1024 }, { 6, 707, 1024 }, { 5, 708, 1024 }, { 6, 709, 1024 }, { 6, 710, 1024 }, { 7, 711, 1024 },
+ { 5, 712, 1024 }, { 6, 713, 1024 }, { 6, 714, 1024 }, { 7, 715, 1024 }, { 6, 716, 1024 }, { 7, 717, 1024 }, { 7, 718, 1024 }, { 8, 719, 1024 },
+ { 5, 720, 1024 }, { 6, 721, 1024 }, { 6, 722, 1024 }, { 7, 723, 1024 }, { 6, 724, 1024 }, { 7, 725, 1024 }, { 7, 726, 1024 }, { 8, 727, 1024 },
+ { 6, 728, 1024 }, { 7, 729, 1024 }, { 7, 730, 1024 }, { 8, 731, 1024 }, { 7, 732, 1024 }, { 8, 733, 1024 }, { 8, 734, 1024 }, { 9, 735, 1024 },
+ { 5, 736, 1024 }, { 6, 737, 1024 }, { 6, 738, 1024 }, { 7, 739, 1024 }, { 6, 740, 1024 }, { 7, 741, 1024 }, { 7, 742, 1024 }, { 8, 743, 1024 },
+ { 6, 744, 1024 }, { 7, 745, 1024 }, { 7, 746, 1024 }, { 8, 747, 1024 }, { 7, 748, 1024 }, { 8, 749, 1024 }, { 8, 750, 1024 }, { 9, 751, 1024 },
+ { 6, 752, 1024 }, { 7, 753, 1024 }, { 7, 754, 1024 }, { 8, 755, 1024 }, { 7, 756, 1024 }, { 8, 757, 1024 }, { 8, 758, 1024 }, { 9, 759, 1024 },
+ { 7, 760, 1024 }, { 8, 761, 1024 }, { 8, 762, 1024 }, { 9, 763, 1024 }, { 8, 764, 1024 }, { 9, 765, 1024 }, { 9, 766, 1024 }, { 10, 767, 1024 },
+ { 3, 768, 1024 }, { 4, 769, 1024 }, { 4, 770, 1024 }, { 5, 771, 1024 }, { 4, 772, 1024 }, { 5, 773, 1024 }, { 5, 774, 1024 }, { 6, 775, 1024 },
+ { 4, 776, 1024 }, { 5, 777, 1024 }, { 5, 778, 1024 }, { 6, 779, 1024 }, { 5, 780, 1024 }, { 6, 781, 1024 }, { 6, 782, 1024 }, { 7, 783, 1024 },
+ { 4, 784, 1024 }, { 5, 785, 1024 }, { 5, 786, 1024 }, { 6, 787, 1024 }, { 5, 788, 1024 }, { 6, 789, 1024 }, { 6, 790, 1024 }, { 7, 791, 1024 },
+ { 5, 792, 1024 }, { 6, 793, 1024 }, { 6, 794, 1024 }, { 7, 795, 1024 }, { 6, 796, 1024 }, { 7, 797, 1024 }, { 7, 798, 1024 }, { 8, 799, 1024 },
+ { 4, 800, 1024 }, { 5, 801, 1024 }, { 5, 802, 1024 }, { 6, 803, 1024 }, { 5, 804, 1024 }, { 6, 805, 1024 }, { 6, 806, 1024 }, { 7, 807, 1024 },
+ { 5, 808, 1024 }, { 6, 809, 1024 }, { 6, 810, 1024 }, { 7, 811, 1024 }, { 6, 812, 1024 }, { 7, 813, 1024 }, { 7, 814, 1024 }, { 8, 815, 1024 },
+ { 5, 816, 1024 }, { 6, 817, 1024 }, { 6, 818, 1024 }, { 7, 819, 1024 }, { 6, 820, 1024 }, { 7, 821, 1024 }, { 7, 822, 1024 }, { 8, 823, 1024 },
+ { 6, 824, 1024 }, { 7, 825, 1024 }, { 7, 826, 1024 }, { 8, 827, 1024 }, { 7, 828, 1024 }, { 8, 829, 1024 }, { 8, 830, 1024 }, { 9, 831, 1024 },
+ { 4, 832, 1024 }, { 5, 833, 1024 }, { 5, 834, 1024 }, { 6, 835, 1024 }, { 5, 836, 1024 }, { 6, 837, 1024 }, { 6, 838, 1024 }, { 7, 839, 1024 },
+ { 5, 840, 1024 }, { 6, 841, 1024 }, { 6, 842, 1024 }, { 7, 843, 1024 }, { 6, 844, 1024 }, { 7, 845, 1024 }, { 7, 846, 1024 }, { 8, 847, 1024 },
+ { 5, 848, 1024 }, { 6, 849, 1024 }, { 6, 850, 1024 }, { 7, 851, 1024 }, { 6, 852, 1024 }, { 7, 853, 1024 }, { 7, 854, 1024 }, { 8, 855, 1024 },
+ { 6, 856, 1024 }, { 7, 857, 1024 }, { 7, 858, 1024 }, { 8, 859, 1024 }, { 7, 860, 1024 }, { 8, 861, 1024 }, { 8, 862, 1024 }, { 9, 863, 1024 },
+ { 5, 864, 1024 }, { 6, 865, 1024 }, { 6, 866, 1024 }, { 7, 867, 1024 }, { 6, 868, 1024 }, { 7, 869, 1024 }, { 7, 870, 1024 }, { 8, 871, 1024 },
+ { 6, 872, 1024 }, { 7, 873, 1024 }, { 7, 874, 1024 }, { 8, 875, 1024 }, { 7, 876, 1024 }, { 8, 877, 1024 }, { 8, 878, 1024 }, { 9, 879, 1024 },
+ { 6, 880, 1024 }, { 7, 881, 1024 }, { 7, 882, 1024 }, { 8, 883, 1024 }, { 7, 884, 1024 }, { 8, 885, 1024 }, { 8, 886, 1024 }, { 9, 887, 1024 },
+ { 7, 888, 1024 }, { 8, 889, 1024 }, { 8, 890, 1024 }, { 9, 891, 1024 }, { 8, 892, 1024 }, { 9, 893, 1024 }, { 9, 894, 1024 }, { 10, 895, 1024 },
+ { 4, 896, 1024 }, { 5, 897, 1024 }, { 5, 898, 1024 }, { 6, 899, 1024 }, { 5, 900, 1024 }, { 6, 901, 1024 }, { 6, 902, 1024 }, { 7, 903, 1024 },
+ { 5, 904, 1024 }, { 6, 905, 1024 }, { 6, 906, 1024 }, { 7, 907, 1024 }, { 6, 908, 1024 }, { 7, 909, 1024 }, { 7, 910, 1024 }, { 8, 911, 1024 },
+ { 5, 912, 1024 }, { 6, 913, 1024 }, { 6, 914, 1024 }, { 7, 915, 1024 }, { 6, 916, 1024 }, { 7, 917, 1024 }, { 7, 918, 1024 }, { 8, 919, 1024 },
+ { 6, 920, 1024 }, { 7, 921, 1024 }, { 7, 922, 1024 }, { 8, 923, 1024 }, { 7, 924, 1024 }, { 8, 925, 1024 }, { 8, 926, 1024 }, { 9, 927, 1024 },
+ { 5, 928, 1024 }, { 6, 929, 1024 }, { 6, 930, 1024 }, { 7, 931, 1024 }, { 6, 932, 1024 }, { 7, 933, 1024 }, { 7, 934, 1024 }, { 8, 935, 1024 },
+ { 6, 936, 1024 }, { 7, 937, 1024 }, { 7, 938, 1024 }, { 8, 939, 1024 }, { 7, 940, 1024 }, { 8, 941, 1024 }, { 8, 942, 1024 }, { 9, 943, 1024 },
+ { 6, 944, 1024 }, { 7, 945, 1024 }, { 7, 946, 1024 }, { 8, 947, 1024 }, { 7, 948, 1024 }, { 8, 949, 1024 }, { 8, 950, 1024 }, { 9, 951, 1024 },
+ { 7, 952, 1024 }, { 8, 953, 1024 }, { 8, 954, 1024 }, { 9, 955, 1024 }, { 8, 956, 1024 }, { 9, 957, 1024 }, { 9, 958, 1024 }, { 10, 959, 1024 },
+ { 5, 960, 1024 }, { 6, 961, 1024 }, { 6, 962, 1024 }, { 7, 963, 1024 }, { 6, 964, 1024 }, { 7, 965, 1024 }, { 7, 966, 1024 }, { 8, 967, 1024 },
+ { 6, 968, 1024 }, { 7, 969, 1024 }, { 7, 970, 1024 }, { 8, 971, 1024 }, { 7, 972, 1024 }, { 8, 973, 1024 }, { 8, 974, 1024 }, { 9, 975, 1024 },
+ { 6, 976, 1024 }, { 7, 977, 1024 }, { 7, 978, 1024 }, { 8, 979, 1024 }, { 7, 980, 1024 }, { 8, 981, 1024 }, { 8, 982, 1024 }, { 9, 983, 1024 },
+ { 7, 984, 1024 }, { 8, 985, 1024 }, { 8, 986, 1024 }, { 9, 987, 1024 }, { 8, 988, 1024 }, { 9, 989, 1024 }, { 9, 990, 1024 }, { 10, 991, 1024 },
+ { 6, 992, 1024 }, { 7, 993, 1024 }, { 7, 994, 1024 }, { 8, 995, 1024 }, { 7, 996, 1024 }, { 8, 997, 1024 }, { 8, 998, 1024 }, { 9, 999, 1024 },
+ { 7, 1000, 1024 }, { 8, 1001, 1024 }, { 8, 1002, 1024 }, { 9, 1003, 1024 }, { 8, 1004, 1024 }, { 9, 1005, 1024 }, { 9, 1006, 1024 }, { 10, 1007, 1024 },
+ { 7, 1008, 1024 }, { 8, 1009, 1024 }, { 8, 1010, 1024 }, { 9, 1011, 1024 }, { 8, 1012, 1024 }, { 9, 1013, 1024 }, { 9, 1014, 1024 }, { 10, 1015, 1024 },
+ { 8, 1016, 1024 }, { 9, 1017, 1024 }, { 9, 1018, 1024 }, { 10, 1019, 1024 }, { 9, 1020, 1024 }, { 10, 1021, 1024 }, { 10, 1022, 1024 }, { 11, 1023, 1024 },
#if FP_LUT > 11
- { 1, 0, 0 }, { 2, 1, 2048 }, { 2, 2, 2048 }, { 3, 3, 2048 }, { 2, 4, 2048 }, { 3, 5, 2048 }, { 3, 6, 2048 }, { 4, 7, 2048 },
- { 2, 8, 2048 }, { 3, 9, 2048 }, { 3, 10, 2048 }, { 4, 11, 2048 }, { 3, 12, 2048 }, { 4, 13, 2048 }, { 4, 14, 2048 }, { 5, 15, 2048 },
- { 2, 16, 2048 }, { 3, 17, 2048 }, { 3, 18, 2048 }, { 4, 19, 2048 }, { 3, 20, 2048 }, { 4, 21, 2048 }, { 4, 22, 2048 }, { 5, 23, 2048 },
- { 3, 24, 2048 }, { 4, 25, 2048 }, { 4, 26, 2048 }, { 5, 27, 2048 }, { 4, 28, 2048 }, { 5, 29, 2048 }, { 5, 30, 2048 }, { 6, 31, 2048 },
- { 2, 32, 2048 }, { 3, 33, 2048 }, { 3, 34, 2048 }, { 4, 35, 2048 }, { 3, 36, 2048 }, { 4, 37, 2048 }, { 4, 38, 2048 }, { 5, 39, 2048 },
- { 3, 40, 2048 }, { 4, 41, 2048 }, { 4, 42, 2048 }, { 5, 43, 2048 }, { 4, 44, 2048 }, { 5, 45, 2048 }, { 5, 46, 2048 }, { 6, 47, 2048 },
- { 3, 48, 2048 }, { 4, 49, 2048 }, { 4, 50, 2048 }, { 5, 51, 2048 }, { 4, 52, 2048 }, { 5, 53, 2048 }, { 5, 54, 2048 }, { 6, 55, 2048 },
- { 4, 56, 2048 }, { 5, 57, 2048 }, { 5, 58, 2048 }, { 6, 59, 2048 }, { 5, 60, 2048 }, { 6, 61, 2048 }, { 6, 62, 2048 }, { 7, 63, 2048 },
- { 2, 64, 2048 }, { 3, 65, 2048 }, { 3, 66, 2048 }, { 4, 67, 2048 }, { 3, 68, 2048 }, { 4, 69, 2048 }, { 4, 70, 2048 }, { 5, 71, 2048 },
- { 3, 72, 2048 }, { 4, 73, 2048 }, { 4, 74, 2048 }, { 5, 75, 2048 }, { 4, 76, 2048 }, { 5, 77, 2048 }, { 5, 78, 2048 }, { 6, 79, 2048 },
- { 3, 80, 2048 }, { 4, 81, 2048 }, { 4, 82, 2048 }, { 5, 83, 2048 }, { 4, 84, 2048 }, { 5, 85, 2048 }, { 5, 86, 2048 }, { 6, 87, 2048 },
- { 4, 88, 2048 }, { 5, 89, 2048 }, { 5, 90, 2048 }, { 6, 91, 2048 }, { 5, 92, 2048 }, { 6, 93, 2048 }, { 6, 94, 2048 }, { 7, 95, 2048 },
- { 3, 96, 2048 }, { 4, 97, 2048 }, { 4, 98, 2048 }, { 5, 99, 2048 }, { 4, 100, 2048 }, { 5, 101, 2048 }, { 5, 102, 2048 }, { 6, 103, 2048 },
- { 4, 104, 2048 }, { 5, 105, 2048 }, { 5, 106, 2048 }, { 6, 107, 2048 }, { 5, 108, 2048 }, { 6, 109, 2048 }, { 6, 110, 2048 }, { 7, 111, 2048 },
- { 4, 112, 2048 }, { 5, 113, 2048 }, { 5, 114, 2048 }, { 6, 115, 2048 }, { 5, 116, 2048 }, { 6, 117, 2048 }, { 6, 118, 2048 }, { 7, 119, 2048 },
- { 5, 120, 2048 }, { 6, 121, 2048 }, { 6, 122, 2048 }, { 7, 123, 2048 }, { 6, 124, 2048 }, { 7, 125, 2048 }, { 7, 126, 2048 }, { 8, 127, 2048 },
- { 2, 128, 2048 }, { 3, 129, 2048 }, { 3, 130, 2048 }, { 4, 131, 2048 }, { 3, 132, 2048 }, { 4, 133, 2048 }, { 4, 134, 2048 }, { 5, 135, 2048 },
- { 3, 136, 2048 }, { 4, 137, 2048 }, { 4, 138, 2048 }, { 5, 139, 2048 }, { 4, 140, 2048 }, { 5, 141, 2048 }, { 5, 142, 2048 }, { 6, 143, 2048 },
- { 3, 144, 2048 }, { 4, 145, 2048 }, { 4, 146, 2048 }, { 5, 147, 2048 }, { 4, 148, 2048 }, { 5, 149, 2048 }, { 5, 150, 2048 }, { 6, 151, 2048 },
- { 4, 152, 2048 }, { 5, 153, 2048 }, { 5, 154, 2048 }, { 6, 155, 2048 }, { 5, 156, 2048 }, { 6, 157, 2048 }, { 6, 158, 2048 }, { 7, 159, 2048 },
- { 3, 160, 2048 }, { 4, 161, 2048 }, { 4, 162, 2048 }, { 5, 163, 2048 }, { 4, 164, 2048 }, { 5, 165, 2048 }, { 5, 166, 2048 }, { 6, 167, 2048 },
- { 4, 168, 2048 }, { 5, 169, 2048 }, { 5, 170, 2048 }, { 6, 171, 2048 }, { 5, 172, 2048 }, { 6, 173, 2048 }, { 6, 174, 2048 }, { 7, 175, 2048 },
- { 4, 176, 2048 }, { 5, 177, 2048 }, { 5, 178, 2048 }, { 6, 179, 2048 }, { 5, 180, 2048 }, { 6, 181, 2048 }, { 6, 182, 2048 }, { 7, 183, 2048 },
- { 5, 184, 2048 }, { 6, 185, 2048 }, { 6, 186, 2048 }, { 7, 187, 2048 }, { 6, 188, 2048 }, { 7, 189, 2048 }, { 7, 190, 2048 }, { 8, 191, 2048 },
- { 3, 192, 2048 }, { 4, 193, 2048 }, { 4, 194, 2048 }, { 5, 195, 2048 }, { 4, 196, 2048 }, { 5, 197, 2048 }, { 5, 198, 2048 }, { 6, 199, 2048 },
- { 4, 200, 2048 }, { 5, 201, 2048 }, { 5, 202, 2048 }, { 6, 203, 2048 }, { 5, 204, 2048 }, { 6, 205, 2048 }, { 6, 206, 2048 }, { 7, 207, 2048 },
- { 4, 208, 2048 }, { 5, 209, 2048 }, { 5, 210, 2048 }, { 6, 211, 2048 }, { 5, 212, 2048 }, { 6, 213, 2048 }, { 6, 214, 2048 }, { 7, 215, 2048 },
- { 5, 216, 2048 }, { 6, 217, 2048 }, { 6, 218, 2048 }, { 7, 219, 2048 }, { 6, 220, 2048 }, { 7, 221, 2048 }, { 7, 222, 2048 }, { 8, 223, 2048 },
- { 4, 224, 2048 }, { 5, 225, 2048 }, { 5, 226, 2048 }, { 6, 227, 2048 }, { 5, 228, 2048 }, { 6, 229, 2048 }, { 6, 230, 2048 }, { 7, 231, 2048 },
- { 5, 232, 2048 }, { 6, 233, 2048 }, { 6, 234, 2048 }, { 7, 235, 2048 }, { 6, 236, 2048 }, { 7, 237, 2048 }, { 7, 238, 2048 }, { 8, 239, 2048 },
- { 5, 240, 2048 }, { 6, 241, 2048 }, { 6, 242, 2048 }, { 7, 243, 2048 }, { 6, 244, 2048 }, { 7, 245, 2048 }, { 7, 246, 2048 }, { 8, 247, 2048 },
- { 6, 248, 2048 }, { 7, 249, 2048 }, { 7, 250, 2048 }, { 8, 251, 2048 }, { 7, 252, 2048 }, { 8, 253, 2048 }, { 8, 254, 2048 }, { 9, 255, 2048 },
- { 2, 256, 2048 }, { 3, 257, 2048 }, { 3, 258, 2048 }, { 4, 259, 2048 }, { 3, 260, 2048 }, { 4, 261, 2048 }, { 4, 262, 2048 }, { 5, 263, 2048 },
- { 3, 264, 2048 }, { 4, 265, 2048 }, { 4, 266, 2048 }, { 5, 267, 2048 }, { 4, 268, 2048 }, { 5, 269, 2048 }, { 5, 270, 2048 }, { 6, 271, 2048 },
- { 3, 272, 2048 }, { 4, 273, 2048 }, { 4, 274, 2048 }, { 5, 275, 2048 }, { 4, 276, 2048 }, { 5, 277, 2048 }, { 5, 278, 2048 }, { 6, 279, 2048 },
- { 4, 280, 2048 }, { 5, 281, 2048 }, { 5, 282, 2048 }, { 6, 283, 2048 }, { 5, 284, 2048 }, { 6, 285, 2048 }, { 6, 286, 2048 }, { 7, 287, 2048 },
- { 3, 288, 2048 }, { 4, 289, 2048 }, { 4, 290, 2048 }, { 5, 291, 2048 }, { 4, 292, 2048 }, { 5, 293, 2048 }, { 5, 294, 2048 }, { 6, 295, 2048 },
- { 4, 296, 2048 }, { 5, 297, 2048 }, { 5, 298, 2048 }, { 6, 299, 2048 }, { 5, 300, 2048 }, { 6, 301, 2048 }, { 6, 302, 2048 }, { 7, 303, 2048 },
- { 4, 304, 2048 }, { 5, 305, 2048 }, { 5, 306, 2048 }, { 6, 307, 2048 }, { 5, 308, 2048 }, { 6, 309, 2048 }, { 6, 310, 2048 }, { 7, 311, 2048 },
- { 5, 312, 2048 }, { 6, 313, 2048 }, { 6, 314, 2048 }, { 7, 315, 2048 }, { 6, 316, 2048 }, { 7, 317, 2048 }, { 7, 318, 2048 }, { 8, 319, 2048 },
- { 3, 320, 2048 }, { 4, 321, 2048 }, { 4, 322, 2048 }, { 5, 323, 2048 }, { 4, 324, 2048 }, { 5, 325, 2048 }, { 5, 326, 2048 }, { 6, 327, 2048 },
- { 4, 328, 2048 }, { 5, 329, 2048 }, { 5, 330, 2048 }, { 6, 331, 2048 }, { 5, 332, 2048 }, { 6, 333, 2048 }, { 6, 334, 2048 }, { 7, 335, 2048 },
- { 4, 336, 2048 }, { 5, 337, 2048 }, { 5, 338, 2048 }, { 6, 339, 2048 }, { 5, 340, 2048 }, { 6, 341, 2048 }, { 6, 342, 2048 }, { 7, 343, 2048 },
- { 5, 344, 2048 }, { 6, 345, 2048 }, { 6, 346, 2048 }, { 7, 347, 2048 }, { 6, 348, 2048 }, { 7, 349, 2048 }, { 7, 350, 2048 }, { 8, 351, 2048 },
- { 4, 352, 2048 }, { 5, 353, 2048 }, { 5, 354, 2048 }, { 6, 355, 2048 }, { 5, 356, 2048 }, { 6, 357, 2048 }, { 6, 358, 2048 }, { 7, 359, 2048 },
- { 5, 360, 2048 }, { 6, 361, 2048 }, { 6, 362, 2048 }, { 7, 363, 2048 }, { 6, 364, 2048 }, { 7, 365, 2048 }, { 7, 366, 2048 }, { 8, 367, 2048 },
- { 5, 368, 2048 }, { 6, 369, 2048 }, { 6, 370, 2048 }, { 7, 371, 2048 }, { 6, 372, 2048 }, { 7, 373, 2048 }, { 7, 374, 2048 }, { 8, 375, 2048 },
- { 6, 376, 2048 }, { 7, 377, 2048 }, { 7, 378, 2048 }, { 8, 379, 2048 }, { 7, 380, 2048 }, { 8, 381, 2048 }, { 8, 382, 2048 }, { 9, 383, 2048 },
- { 3, 384, 2048 }, { 4, 385, 2048 }, { 4, 386, 2048 }, { 5, 387, 2048 }, { 4, 388, 2048 }, { 5, 389, 2048 }, { 5, 390, 2048 }, { 6, 391, 2048 },
- { 4, 392, 2048 }, { 5, 393, 2048 }, { 5, 394, 2048 }, { 6, 395, 2048 }, { 5, 396, 2048 }, { 6, 397, 2048 }, { 6, 398, 2048 }, { 7, 399, 2048 },
- { 4, 400, 2048 }, { 5, 401, 2048 }, { 5, 402, 2048 }, { 6, 403, 2048 }, { 5, 404, 2048 }, { 6, 405, 2048 }, { 6, 406, 2048 }, { 7, 407, 2048 },
- { 5, 408, 2048 }, { 6, 409, 2048 }, { 6, 410, 2048 }, { 7, 411, 2048 }, { 6, 412, 2048 }, { 7, 413, 2048 }, { 7, 414, 2048 }, { 8, 415, 2048 },
- { 4, 416, 2048 }, { 5, 417, 2048 }, { 5, 418, 2048 }, { 6, 419, 2048 }, { 5, 420, 2048 }, { 6, 421, 2048 }, { 6, 422, 2048 }, { 7, 423, 2048 },
- { 5, 424, 2048 }, { 6, 425, 2048 }, { 6, 426, 2048 }, { 7, 427, 2048 }, { 6, 428, 2048 }, { 7, 429, 2048 }, { 7, 430, 2048 }, { 8, 431, 2048 },
- { 5, 432, 2048 }, { 6, 433, 2048 }, { 6, 434, 2048 }, { 7, 435, 2048 }, { 6, 436, 2048 }, { 7, 437, 2048 }, { 7, 438, 2048 }, { 8, 439, 2048 },
- { 6, 440, 2048 }, { 7, 441, 2048 }, { 7, 442, 2048 }, { 8, 443, 2048 }, { 7, 444, 2048 }, { 8, 445, 2048 }, { 8, 446, 2048 }, { 9, 447, 2048 },
- { 4, 448, 2048 }, { 5, 449, 2048 }, { 5, 450, 2048 }, { 6, 451, 2048 }, { 5, 452, 2048 }, { 6, 453, 2048 }, { 6, 454, 2048 }, { 7, 455, 2048 },
- { 5, 456, 2048 }, { 6, 457, 2048 }, { 6, 458, 2048 }, { 7, 459, 2048 }, { 6, 460, 2048 }, { 7, 461, 2048 }, { 7, 462, 2048 }, { 8, 463, 2048 },
- { 5, 464, 2048 }, { 6, 465, 2048 }, { 6, 466, 2048 }, { 7, 467, 2048 }, { 6, 468, 2048 }, { 7, 469, 2048 }, { 7, 470, 2048 }, { 8, 471, 2048 },
- { 6, 472, 2048 }, { 7, 473, 2048 }, { 7, 474, 2048 }, { 8, 475, 2048 }, { 7, 476, 2048 }, { 8, 477, 2048 }, { 8, 478, 2048 }, { 9, 479, 2048 },
- { 5, 480, 2048 }, { 6, 481, 2048 }, { 6, 482, 2048 }, { 7, 483, 2048 }, { 6, 484, 2048 }, { 7, 485, 2048 }, { 7, 486, 2048 }, { 8, 487, 2048 },
- { 6, 488, 2048 }, { 7, 489, 2048 }, { 7, 490, 2048 }, { 8, 491, 2048 }, { 7, 492, 2048 }, { 8, 493, 2048 }, { 8, 494, 2048 }, { 9, 495, 2048 },
- { 6, 496, 2048 }, { 7, 497, 2048 }, { 7, 498, 2048 }, { 8, 499, 2048 }, { 7, 500, 2048 }, { 8, 501, 2048 }, { 8, 502, 2048 }, { 9, 503, 2048 },
- { 7, 504, 2048 }, { 8, 505, 2048 }, { 8, 506, 2048 }, { 9, 507, 2048 }, { 8, 508, 2048 }, { 9, 509, 2048 }, { 9, 510, 2048 }, { 10, 511, 2048 },
- { 2, 512, 2048 }, { 3, 513, 2048 }, { 3, 514, 2048 }, { 4, 515, 2048 }, { 3, 516, 2048 }, { 4, 517, 2048 }, { 4, 518, 2048 }, { 5, 519, 2048 },
- { 3, 520, 2048 }, { 4, 521, 2048 }, { 4, 522, 2048 }, { 5, 523, 2048 }, { 4, 524, 2048 }, { 5, 525, 2048 }, { 5, 526, 2048 }, { 6, 527, 2048 },
- { 3, 528, 2048 }, { 4, 529, 2048 }, { 4, 530, 2048 }, { 5, 531, 2048 }, { 4, 532, 2048 }, { 5, 533, 2048 }, { 5, 534, 2048 }, { 6, 535, 2048 },
- { 4, 536, 2048 }, { 5, 537, 2048 }, { 5, 538, 2048 }, { 6, 539, 2048 }, { 5, 540, 2048 }, { 6, 541, 2048 }, { 6, 542, 2048 }, { 7, 543, 2048 },
- { 3, 544, 2048 }, { 4, 545, 2048 }, { 4, 546, 2048 }, { 5, 547, 2048 }, { 4, 548, 2048 }, { 5, 549, 2048 }, { 5, 550, 2048 }, { 6, 551, 2048 },
- { 4, 552, 2048 }, { 5, 553, 2048 }, { 5, 554, 2048 }, { 6, 555, 2048 }, { 5, 556, 2048 }, { 6, 557, 2048 }, { 6, 558, 2048 }, { 7, 559, 2048 },
- { 4, 560, 2048 }, { 5, 561, 2048 }, { 5, 562, 2048 }, { 6, 563, 2048 }, { 5, 564, 2048 }, { 6, 565, 2048 }, { 6, 566, 2048 }, { 7, 567, 2048 },
- { 5, 568, 2048 }, { 6, 569, 2048 }, { 6, 570, 2048 }, { 7, 571, 2048 }, { 6, 572, 2048 }, { 7, 573, 2048 }, { 7, 574, 2048 }, { 8, 575, 2048 },
- { 3, 576, 2048 }, { 4, 577, 2048 }, { 4, 578, 2048 }, { 5, 579, 2048 }, { 4, 580, 2048 }, { 5, 581, 2048 }, { 5, 582, 2048 }, { 6, 583, 2048 },
- { 4, 584, 2048 }, { 5, 585, 2048 }, { 5, 586, 2048 }, { 6, 587, 2048 }, { 5, 588, 2048 }, { 6, 589, 2048 }, { 6, 590, 2048 }, { 7, 591, 2048 },
- { 4, 592, 2048 }, { 5, 593, 2048 }, { 5, 594, 2048 }, { 6, 595, 2048 }, { 5, 596, 2048 }, { 6, 597, 2048 }, { 6, 598, 2048 }, { 7, 599, 2048 },
- { 5, 600, 2048 }, { 6, 601, 2048 }, { 6, 602, 2048 }, { 7, 603, 2048 }, { 6, 604, 2048 }, { 7, 605, 2048 }, { 7, 606, 2048 }, { 8, 607, 2048 },
- { 4, 608, 2048 }, { 5, 609, 2048 }, { 5, 610, 2048 }, { 6, 611, 2048 }, { 5, 612, 2048 }, { 6, 613, 2048 }, { 6, 614, 2048 }, { 7, 615, 2048 },
- { 5, 616, 2048 }, { 6, 617, 2048 }, { 6, 618, 2048 }, { 7, 619, 2048 }, { 6, 620, 2048 }, { 7, 621, 2048 }, { 7, 622, 2048 }, { 8, 623, 2048 },
- { 5, 624, 2048 }, { 6, 625, 2048 }, { 6, 626, 2048 }, { 7, 627, 2048 }, { 6, 628, 2048 }, { 7, 629, 2048 }, { 7, 630, 2048 }, { 8, 631, 2048 },
- { 6, 632, 2048 }, { 7, 633, 2048 }, { 7, 634, 2048 }, { 8, 635, 2048 }, { 7, 636, 2048 }, { 8, 637, 2048 }, { 8, 638, 2048 }, { 9, 639, 2048 },
- { 3, 640, 2048 }, { 4, 641, 2048 }, { 4, 642, 2048 }, { 5, 643, 2048 }, { 4, 644, 2048 }, { 5, 645, 2048 }, { 5, 646, 2048 }, { 6, 647, 2048 },
- { 4, 648, 2048 }, { 5, 649, 2048 }, { 5, 650, 2048 }, { 6, 651, 2048 }, { 5, 652, 2048 }, { 6, 653, 2048 }, { 6, 654, 2048 }, { 7, 655, 2048 },
- { 4, 656, 2048 }, { 5, 657, 2048 }, { 5, 658, 2048 }, { 6, 659, 2048 }, { 5, 660, 2048 }, { 6, 661, 2048 }, { 6, 662, 2048 }, { 7, 663, 2048 },
- { 5, 664, 2048 }, { 6, 665, 2048 }, { 6, 666, 2048 }, { 7, 667, 2048 }, { 6, 668, 2048 }, { 7, 669, 2048 }, { 7, 670, 2048 }, { 8, 671, 2048 },
- { 4, 672, 2048 }, { 5, 673, 2048 }, { 5, 674, 2048 }, { 6, 675, 2048 }, { 5, 676, 2048 }, { 6, 677, 2048 }, { 6, 678, 2048 }, { 7, 679, 2048 },
- { 5, 680, 2048 }, { 6, 681, 2048 }, { 6, 682, 2048 }, { 7, 683, 2048 }, { 6, 684, 2048 }, { 7, 685, 2048 }, { 7, 686, 2048 }, { 8, 687, 2048 },
- { 5, 688, 2048 }, { 6, 689, 2048 }, { 6, 690, 2048 }, { 7, 691, 2048 }, { 6, 692, 2048 }, { 7, 693, 2048 }, { 7, 694, 2048 }, { 8, 695, 2048 },
- { 6, 696, 2048 }, { 7, 697, 2048 }, { 7, 698, 2048 }, { 8, 699, 2048 }, { 7, 700, 2048 }, { 8, 701, 2048 }, { 8, 702, 2048 }, { 9, 703, 2048 },
- { 4, 704, 2048 }, { 5, 705, 2048 }, { 5, 706, 2048 }, { 6, 707, 2048 }, { 5, 708, 2048 }, { 6, 709, 2048 }, { 6, 710, 2048 }, { 7, 711, 2048 },
- { 5, 712, 2048 }, { 6, 713, 2048 }, { 6, 714, 2048 }, { 7, 715, 2048 }, { 6, 716, 2048 }, { 7, 717, 2048 }, { 7, 718, 2048 }, { 8, 719, 2048 },
- { 5, 720, 2048 }, { 6, 721, 2048 }, { 6, 722, 2048 }, { 7, 723, 2048 }, { 6, 724, 2048 }, { 7, 725, 2048 }, { 7, 726, 2048 }, { 8, 727, 2048 },
- { 6, 728, 2048 }, { 7, 729, 2048 }, { 7, 730, 2048 }, { 8, 731, 2048 }, { 7, 732, 2048 }, { 8, 733, 2048 }, { 8, 734, 2048 }, { 9, 735, 2048 },
- { 5, 736, 2048 }, { 6, 737, 2048 }, { 6, 738, 2048 }, { 7, 739, 2048 }, { 6, 740, 2048 }, { 7, 741, 2048 }, { 7, 742, 2048 }, { 8, 743, 2048 },
- { 6, 744, 2048 }, { 7, 745, 2048 }, { 7, 746, 2048 }, { 8, 747, 2048 }, { 7, 748, 2048 }, { 8, 749, 2048 }, { 8, 750, 2048 }, { 9, 751, 2048 },
- { 6, 752, 2048 }, { 7, 753, 2048 }, { 7, 754, 2048 }, { 8, 755, 2048 }, { 7, 756, 2048 }, { 8, 757, 2048 }, { 8, 758, 2048 }, { 9, 759, 2048 },
- { 7, 760, 2048 }, { 8, 761, 2048 }, { 8, 762, 2048 }, { 9, 763, 2048 }, { 8, 764, 2048 }, { 9, 765, 2048 }, { 9, 766, 2048 }, { 10, 767, 2048 },
- { 3, 768, 2048 }, { 4, 769, 2048 }, { 4, 770, 2048 }, { 5, 771, 2048 }, { 4, 772, 2048 }, { 5, 773, 2048 }, { 5, 774, 2048 }, { 6, 775, 2048 },
- { 4, 776, 2048 }, { 5, 777, 2048 }, { 5, 778, 2048 }, { 6, 779, 2048 }, { 5, 780, 2048 }, { 6, 781, 2048 }, { 6, 782, 2048 }, { 7, 783, 2048 },
- { 4, 784, 2048 }, { 5, 785, 2048 }, { 5, 786, 2048 }, { 6, 787, 2048 }, { 5, 788, 2048 }, { 6, 789, 2048 }, { 6, 790, 2048 }, { 7, 791, 2048 },
- { 5, 792, 2048 }, { 6, 793, 2048 }, { 6, 794, 2048 }, { 7, 795, 2048 }, { 6, 796, 2048 }, { 7, 797, 2048 }, { 7, 798, 2048 }, { 8, 799, 2048 },
- { 4, 800, 2048 }, { 5, 801, 2048 }, { 5, 802, 2048 }, { 6, 803, 2048 }, { 5, 804, 2048 }, { 6, 805, 2048 }, { 6, 806, 2048 }, { 7, 807, 2048 },
- { 5, 808, 2048 }, { 6, 809, 2048 }, { 6, 810, 2048 }, { 7, 811, 2048 }, { 6, 812, 2048 }, { 7, 813, 2048 }, { 7, 814, 2048 }, { 8, 815, 2048 },
- { 5, 816, 2048 }, { 6, 817, 2048 }, { 6, 818, 2048 }, { 7, 819, 2048 }, { 6, 820, 2048 }, { 7, 821, 2048 }, { 7, 822, 2048 }, { 8, 823, 2048 },
- { 6, 824, 2048 }, { 7, 825, 2048 }, { 7, 826, 2048 }, { 8, 827, 2048 }, { 7, 828, 2048 }, { 8, 829, 2048 }, { 8, 830, 2048 }, { 9, 831, 2048 },
- { 4, 832, 2048 }, { 5, 833, 2048 }, { 5, 834, 2048 }, { 6, 835, 2048 }, { 5, 836, 2048 }, { 6, 837, 2048 }, { 6, 838, 2048 }, { 7, 839, 2048 },
- { 5, 840, 2048 }, { 6, 841, 2048 }, { 6, 842, 2048 }, { 7, 843, 2048 }, { 6, 844, 2048 }, { 7, 845, 2048 }, { 7, 846, 2048 }, { 8, 847, 2048 },
- { 5, 848, 2048 }, { 6, 849, 2048 }, { 6, 850, 2048 }, { 7, 851, 2048 }, { 6, 852, 2048 }, { 7, 853, 2048 }, { 7, 854, 2048 }, { 8, 855, 2048 },
- { 6, 856, 2048 }, { 7, 857, 2048 }, { 7, 858, 2048 }, { 8, 859, 2048 }, { 7, 860, 2048 }, { 8, 861, 2048 }, { 8, 862, 2048 }, { 9, 863, 2048 },
- { 5, 864, 2048 }, { 6, 865, 2048 }, { 6, 866, 2048 }, { 7, 867, 2048 }, { 6, 868, 2048 }, { 7, 869, 2048 }, { 7, 870, 2048 }, { 8, 871, 2048 },
- { 6, 872, 2048 }, { 7, 873, 2048 }, { 7, 874, 2048 }, { 8, 875, 2048 }, { 7, 876, 2048 }, { 8, 877, 2048 }, { 8, 878, 2048 }, { 9, 879, 2048 },
- { 6, 880, 2048 }, { 7, 881, 2048 }, { 7, 882, 2048 }, { 8, 883, 2048 }, { 7, 884, 2048 }, { 8, 885, 2048 }, { 8, 886, 2048 }, { 9, 887, 2048 },
- { 7, 888, 2048 }, { 8, 889, 2048 }, { 8, 890, 2048 }, { 9, 891, 2048 }, { 8, 892, 2048 }, { 9, 893, 2048 }, { 9, 894, 2048 }, { 10, 895, 2048 },
- { 4, 896, 2048 }, { 5, 897, 2048 }, { 5, 898, 2048 }, { 6, 899, 2048 }, { 5, 900, 2048 }, { 6, 901, 2048 }, { 6, 902, 2048 }, { 7, 903, 2048 },
- { 5, 904, 2048 }, { 6, 905, 2048 }, { 6, 906, 2048 }, { 7, 907, 2048 }, { 6, 908, 2048 }, { 7, 909, 2048 }, { 7, 910, 2048 }, { 8, 911, 2048 },
- { 5, 912, 2048 }, { 6, 913, 2048 }, { 6, 914, 2048 }, { 7, 915, 2048 }, { 6, 916, 2048 }, { 7, 917, 2048 }, { 7, 918, 2048 }, { 8, 919, 2048 },
- { 6, 920, 2048 }, { 7, 921, 2048 }, { 7, 922, 2048 }, { 8, 923, 2048 }, { 7, 924, 2048 }, { 8, 925, 2048 }, { 8, 926, 2048 }, { 9, 927, 2048 },
- { 5, 928, 2048 }, { 6, 929, 2048 }, { 6, 930, 2048 }, { 7, 931, 2048 }, { 6, 932, 2048 }, { 7, 933, 2048 }, { 7, 934, 2048 }, { 8, 935, 2048 },
- { 6, 936, 2048 }, { 7, 937, 2048 }, { 7, 938, 2048 }, { 8, 939, 2048 }, { 7, 940, 2048 }, { 8, 941, 2048 }, { 8, 942, 2048 }, { 9, 943, 2048 },
- { 6, 944, 2048 }, { 7, 945, 2048 }, { 7, 946, 2048 }, { 8, 947, 2048 }, { 7, 948, 2048 }, { 8, 949, 2048 }, { 8, 950, 2048 }, { 9, 951, 2048 },
- { 7, 952, 2048 }, { 8, 953, 2048 }, { 8, 954, 2048 }, { 9, 955, 2048 }, { 8, 956, 2048 }, { 9, 957, 2048 }, { 9, 958, 2048 }, { 10, 959, 2048 },
- { 5, 960, 2048 }, { 6, 961, 2048 }, { 6, 962, 2048 }, { 7, 963, 2048 }, { 6, 964, 2048 }, { 7, 965, 2048 }, { 7, 966, 2048 }, { 8, 967, 2048 },
- { 6, 968, 2048 }, { 7, 969, 2048 }, { 7, 970, 2048 }, { 8, 971, 2048 }, { 7, 972, 2048 }, { 8, 973, 2048 }, { 8, 974, 2048 }, { 9, 975, 2048 },
- { 6, 976, 2048 }, { 7, 977, 2048 }, { 7, 978, 2048 }, { 8, 979, 2048 }, { 7, 980, 2048 }, { 8, 981, 2048 }, { 8, 982, 2048 }, { 9, 983, 2048 },
- { 7, 984, 2048 }, { 8, 985, 2048 }, { 8, 986, 2048 }, { 9, 987, 2048 }, { 8, 988, 2048 }, { 9, 989, 2048 }, { 9, 990, 2048 }, { 10, 991, 2048 },
- { 6, 992, 2048 }, { 7, 993, 2048 }, { 7, 994, 2048 }, { 8, 995, 2048 }, { 7, 996, 2048 }, { 8, 997, 2048 }, { 8, 998, 2048 }, { 9, 999, 2048 },
- { 7, 1000, 2048 }, { 8, 1001, 2048 }, { 8, 1002, 2048 }, { 9, 1003, 2048 }, { 8, 1004, 2048 }, { 9, 1005, 2048 }, { 9, 1006, 2048 }, { 10, 1007, 2048 },
- { 7, 1008, 2048 }, { 8, 1009, 2048 }, { 8, 1010, 2048 }, { 9, 1011, 2048 }, { 8, 1012, 2048 }, { 9, 1013, 2048 }, { 9, 1014, 2048 }, { 10, 1015, 2048 },
- { 8, 1016, 2048 }, { 9, 1017, 2048 }, { 9, 1018, 2048 }, { 10, 1019, 2048 }, { 9, 1020, 2048 }, { 10, 1021, 2048 }, { 10, 1022, 2048 }, { 11, 1023, 2048 },
- { 2, 1024, 2048 }, { 3, 1025, 2048 }, { 3, 1026, 2048 }, { 4, 1027, 2048 }, { 3, 1028, 2048 }, { 4, 1029, 2048 }, { 4, 1030, 2048 }, { 5, 1031, 2048 },
- { 3, 1032, 2048 }, { 4, 1033, 2048 }, { 4, 1034, 2048 }, { 5, 1035, 2048 }, { 4, 1036, 2048 }, { 5, 1037, 2048 }, { 5, 1038, 2048 }, { 6, 1039, 2048 },
- { 3, 1040, 2048 }, { 4, 1041, 2048 }, { 4, 1042, 2048 }, { 5, 1043, 2048 }, { 4, 1044, 2048 }, { 5, 1045, 2048 }, { 5, 1046, 2048 }, { 6, 1047, 2048 },
- { 4, 1048, 2048 }, { 5, 1049, 2048 }, { 5, 1050, 2048 }, { 6, 1051, 2048 }, { 5, 1052, 2048 }, { 6, 1053, 2048 }, { 6, 1054, 2048 }, { 7, 1055, 2048 },
- { 3, 1056, 2048 }, { 4, 1057, 2048 }, { 4, 1058, 2048 }, { 5, 1059, 2048 }, { 4, 1060, 2048 }, { 5, 1061, 2048 }, { 5, 1062, 2048 }, { 6, 1063, 2048 },
- { 4, 1064, 2048 }, { 5, 1065, 2048 }, { 5, 1066, 2048 }, { 6, 1067, 2048 }, { 5, 1068, 2048 }, { 6, 1069, 2048 }, { 6, 1070, 2048 }, { 7, 1071, 2048 },
- { 4, 1072, 2048 }, { 5, 1073, 2048 }, { 5, 1074, 2048 }, { 6, 1075, 2048 }, { 5, 1076, 2048 }, { 6, 1077, 2048 }, { 6, 1078, 2048 }, { 7, 1079, 2048 },
- { 5, 1080, 2048 }, { 6, 1081, 2048 }, { 6, 1082, 2048 }, { 7, 1083, 2048 }, { 6, 1084, 2048 }, { 7, 1085, 2048 }, { 7, 1086, 2048 }, { 8, 1087, 2048 },
- { 3, 1088, 2048 }, { 4, 1089, 2048 }, { 4, 1090, 2048 }, { 5, 1091, 2048 }, { 4, 1092, 2048 }, { 5, 1093, 2048 }, { 5, 1094, 2048 }, { 6, 1095, 2048 },
- { 4, 1096, 2048 }, { 5, 1097, 2048 }, { 5, 1098, 2048 }, { 6, 1099, 2048 }, { 5, 1100, 2048 }, { 6, 1101, 2048 }, { 6, 1102, 2048 }, { 7, 1103, 2048 },
- { 4, 1104, 2048 }, { 5, 1105, 2048 }, { 5, 1106, 2048 }, { 6, 1107, 2048 }, { 5, 1108, 2048 }, { 6, 1109, 2048 }, { 6, 1110, 2048 }, { 7, 1111, 2048 },
- { 5, 1112, 2048 }, { 6, 1113, 2048 }, { 6, 1114, 2048 }, { 7, 1115, 2048 }, { 6, 1116, 2048 }, { 7, 1117, 2048 }, { 7, 1118, 2048 }, { 8, 1119, 2048 },
- { 4, 1120, 2048 }, { 5, 1121, 2048 }, { 5, 1122, 2048 }, { 6, 1123, 2048 }, { 5, 1124, 2048 }, { 6, 1125, 2048 }, { 6, 1126, 2048 }, { 7, 1127, 2048 },
- { 5, 1128, 2048 }, { 6, 1129, 2048 }, { 6, 1130, 2048 }, { 7, 1131, 2048 }, { 6, 1132, 2048 }, { 7, 1133, 2048 }, { 7, 1134, 2048 }, { 8, 1135, 2048 },
- { 5, 1136, 2048 }, { 6, 1137, 2048 }, { 6, 1138, 2048 }, { 7, 1139, 2048 }, { 6, 1140, 2048 }, { 7, 1141, 2048 }, { 7, 1142, 2048 }, { 8, 1143, 2048 },
- { 6, 1144, 2048 }, { 7, 1145, 2048 }, { 7, 1146, 2048 }, { 8, 1147, 2048 }, { 7, 1148, 2048 }, { 8, 1149, 2048 }, { 8, 1150, 2048 }, { 9, 1151, 2048 },
- { 3, 1152, 2048 }, { 4, 1153, 2048 }, { 4, 1154, 2048 }, { 5, 1155, 2048 }, { 4, 1156, 2048 }, { 5, 1157, 2048 }, { 5, 1158, 2048 }, { 6, 1159, 2048 },
- { 4, 1160, 2048 }, { 5, 1161, 2048 }, { 5, 1162, 2048 }, { 6, 1163, 2048 }, { 5, 1164, 2048 }, { 6, 1165, 2048 }, { 6, 1166, 2048 }, { 7, 1167, 2048 },
- { 4, 1168, 2048 }, { 5, 1169, 2048 }, { 5, 1170, 2048 }, { 6, 1171, 2048 }, { 5, 1172, 2048 }, { 6, 1173, 2048 }, { 6, 1174, 2048 }, { 7, 1175, 2048 },
- { 5, 1176, 2048 }, { 6, 1177, 2048 }, { 6, 1178, 2048 }, { 7, 1179, 2048 }, { 6, 1180, 2048 }, { 7, 1181, 2048 }, { 7, 1182, 2048 }, { 8, 1183, 2048 },
- { 4, 1184, 2048 }, { 5, 1185, 2048 }, { 5, 1186, 2048 }, { 6, 1187, 2048 }, { 5, 1188, 2048 }, { 6, 1189, 2048 }, { 6, 1190, 2048 }, { 7, 1191, 2048 },
- { 5, 1192, 2048 }, { 6, 1193, 2048 }, { 6, 1194, 2048 }, { 7, 1195, 2048 }, { 6, 1196, 2048 }, { 7, 1197, 2048 }, { 7, 1198, 2048 }, { 8, 1199, 2048 },
- { 5, 1200, 2048 }, { 6, 1201, 2048 }, { 6, 1202, 2048 }, { 7, 1203, 2048 }, { 6, 1204, 2048 }, { 7, 1205, 2048 }, { 7, 1206, 2048 }, { 8, 1207, 2048 },
- { 6, 1208, 2048 }, { 7, 1209, 2048 }, { 7, 1210, 2048 }, { 8, 1211, 2048 }, { 7, 1212, 2048 }, { 8, 1213, 2048 }, { 8, 1214, 2048 }, { 9, 1215, 2048 },
- { 4, 1216, 2048 }, { 5, 1217, 2048 }, { 5, 1218, 2048 }, { 6, 1219, 2048 }, { 5, 1220, 2048 }, { 6, 1221, 2048 }, { 6, 1222, 2048 }, { 7, 1223, 2048 },
- { 5, 1224, 2048 }, { 6, 1225, 2048 }, { 6, 1226, 2048 }, { 7, 1227, 2048 }, { 6, 1228, 2048 }, { 7, 1229, 2048 }, { 7, 1230, 2048 }, { 8, 1231, 2048 },
- { 5, 1232, 2048 }, { 6, 1233, 2048 }, { 6, 1234, 2048 }, { 7, 1235, 2048 }, { 6, 1236, 2048 }, { 7, 1237, 2048 }, { 7, 1238, 2048 }, { 8, 1239, 2048 },
- { 6, 1240, 2048 }, { 7, 1241, 2048 }, { 7, 1242, 2048 }, { 8, 1243, 2048 }, { 7, 1244, 2048 }, { 8, 1245, 2048 }, { 8, 1246, 2048 }, { 9, 1247, 2048 },
- { 5, 1248, 2048 }, { 6, 1249, 2048 }, { 6, 1250, 2048 }, { 7, 1251, 2048 }, { 6, 1252, 2048 }, { 7, 1253, 2048 }, { 7, 1254, 2048 }, { 8, 1255, 2048 },
- { 6, 1256, 2048 }, { 7, 1257, 2048 }, { 7, 1258, 2048 }, { 8, 1259, 2048 }, { 7, 1260, 2048 }, { 8, 1261, 2048 }, { 8, 1262, 2048 }, { 9, 1263, 2048 },
- { 6, 1264, 2048 }, { 7, 1265, 2048 }, { 7, 1266, 2048 }, { 8, 1267, 2048 }, { 7, 1268, 2048 }, { 8, 1269, 2048 }, { 8, 1270, 2048 }, { 9, 1271, 2048 },
- { 7, 1272, 2048 }, { 8, 1273, 2048 }, { 8, 1274, 2048 }, { 9, 1275, 2048 }, { 8, 1276, 2048 }, { 9, 1277, 2048 }, { 9, 1278, 2048 }, { 10, 1279, 2048 },
- { 3, 1280, 2048 }, { 4, 1281, 2048 }, { 4, 1282, 2048 }, { 5, 1283, 2048 }, { 4, 1284, 2048 }, { 5, 1285, 2048 }, { 5, 1286, 2048 }, { 6, 1287, 2048 },
- { 4, 1288, 2048 }, { 5, 1289, 2048 }, { 5, 1290, 2048 }, { 6, 1291, 2048 }, { 5, 1292, 2048 }, { 6, 1293, 2048 }, { 6, 1294, 2048 }, { 7, 1295, 2048 },
- { 4, 1296, 2048 }, { 5, 1297, 2048 }, { 5, 1298, 2048 }, { 6, 1299, 2048 }, { 5, 1300, 2048 }, { 6, 1301, 2048 }, { 6, 1302, 2048 }, { 7, 1303, 2048 },
- { 5, 1304, 2048 }, { 6, 1305, 2048 }, { 6, 1306, 2048 }, { 7, 1307, 2048 }, { 6, 1308, 2048 }, { 7, 1309, 2048 }, { 7, 1310, 2048 }, { 8, 1311, 2048 },
- { 4, 1312, 2048 }, { 5, 1313, 2048 }, { 5, 1314, 2048 }, { 6, 1315, 2048 }, { 5, 1316, 2048 }, { 6, 1317, 2048 }, { 6, 1318, 2048 }, { 7, 1319, 2048 },
- { 5, 1320, 2048 }, { 6, 1321, 2048 }, { 6, 1322, 2048 }, { 7, 1323, 2048 }, { 6, 1324, 2048 }, { 7, 1325, 2048 }, { 7, 1326, 2048 }, { 8, 1327, 2048 },
- { 5, 1328, 2048 }, { 6, 1329, 2048 }, { 6, 1330, 2048 }, { 7, 1331, 2048 }, { 6, 1332, 2048 }, { 7, 1333, 2048 }, { 7, 1334, 2048 }, { 8, 1335, 2048 },
- { 6, 1336, 2048 }, { 7, 1337, 2048 }, { 7, 1338, 2048 }, { 8, 1339, 2048 }, { 7, 1340, 2048 }, { 8, 1341, 2048 }, { 8, 1342, 2048 }, { 9, 1343, 2048 },
- { 4, 1344, 2048 }, { 5, 1345, 2048 }, { 5, 1346, 2048 }, { 6, 1347, 2048 }, { 5, 1348, 2048 }, { 6, 1349, 2048 }, { 6, 1350, 2048 }, { 7, 1351, 2048 },
- { 5, 1352, 2048 }, { 6, 1353, 2048 }, { 6, 1354, 2048 }, { 7, 1355, 2048 }, { 6, 1356, 2048 }, { 7, 1357, 2048 }, { 7, 1358, 2048 }, { 8, 1359, 2048 },
- { 5, 1360, 2048 }, { 6, 1361, 2048 }, { 6, 1362, 2048 }, { 7, 1363, 2048 }, { 6, 1364, 2048 }, { 7, 1365, 2048 }, { 7, 1366, 2048 }, { 8, 1367, 2048 },
- { 6, 1368, 2048 }, { 7, 1369, 2048 }, { 7, 1370, 2048 }, { 8, 1371, 2048 }, { 7, 1372, 2048 }, { 8, 1373, 2048 }, { 8, 1374, 2048 }, { 9, 1375, 2048 },
- { 5, 1376, 2048 }, { 6, 1377, 2048 }, { 6, 1378, 2048 }, { 7, 1379, 2048 }, { 6, 1380, 2048 }, { 7, 1381, 2048 }, { 7, 1382, 2048 }, { 8, 1383, 2048 },
- { 6, 1384, 2048 }, { 7, 1385, 2048 }, { 7, 1386, 2048 }, { 8, 1387, 2048 }, { 7, 1388, 2048 }, { 8, 1389, 2048 }, { 8, 1390, 2048 }, { 9, 1391, 2048 },
- { 6, 1392, 2048 }, { 7, 1393, 2048 }, { 7, 1394, 2048 }, { 8, 1395, 2048 }, { 7, 1396, 2048 }, { 8, 1397, 2048 }, { 8, 1398, 2048 }, { 9, 1399, 2048 },
- { 7, 1400, 2048 }, { 8, 1401, 2048 }, { 8, 1402, 2048 }, { 9, 1403, 2048 }, { 8, 1404, 2048 }, { 9, 1405, 2048 }, { 9, 1406, 2048 }, { 10, 1407, 2048 },
- { 4, 1408, 2048 }, { 5, 1409, 2048 }, { 5, 1410, 2048 }, { 6, 1411, 2048 }, { 5, 1412, 2048 }, { 6, 1413, 2048 }, { 6, 1414, 2048 }, { 7, 1415, 2048 },
- { 5, 1416, 2048 }, { 6, 1417, 2048 }, { 6, 1418, 2048 }, { 7, 1419, 2048 }, { 6, 1420, 2048 }, { 7, 1421, 2048 }, { 7, 1422, 2048 }, { 8, 1423, 2048 },
- { 5, 1424, 2048 }, { 6, 1425, 2048 }, { 6, 1426, 2048 }, { 7, 1427, 2048 }, { 6, 1428, 2048 }, { 7, 1429, 2048 }, { 7, 1430, 2048 }, { 8, 1431, 2048 },
- { 6, 1432, 2048 }, { 7, 1433, 2048 }, { 7, 1434, 2048 }, { 8, 1435, 2048 }, { 7, 1436, 2048 }, { 8, 1437, 2048 }, { 8, 1438, 2048 }, { 9, 1439, 2048 },
- { 5, 1440, 2048 }, { 6, 1441, 2048 }, { 6, 1442, 2048 }, { 7, 1443, 2048 }, { 6, 1444, 2048 }, { 7, 1445, 2048 }, { 7, 1446, 2048 }, { 8, 1447, 2048 },
- { 6, 1448, 2048 }, { 7, 1449, 2048 }, { 7, 1450, 2048 }, { 8, 1451, 2048 }, { 7, 1452, 2048 }, { 8, 1453, 2048 }, { 8, 1454, 2048 }, { 9, 1455, 2048 },
- { 6, 1456, 2048 }, { 7, 1457, 2048 }, { 7, 1458, 2048 }, { 8, 1459, 2048 }, { 7, 1460, 2048 }, { 8, 1461, 2048 }, { 8, 1462, 2048 }, { 9, 1463, 2048 },
- { 7, 1464, 2048 }, { 8, 1465, 2048 }, { 8, 1466, 2048 }, { 9, 1467, 2048 }, { 8, 1468, 2048 }, { 9, 1469, 2048 }, { 9, 1470, 2048 }, { 10, 1471, 2048 },
- { 5, 1472, 2048 }, { 6, 1473, 2048 }, { 6, 1474, 2048 }, { 7, 1475, 2048 }, { 6, 1476, 2048 }, { 7, 1477, 2048 }, { 7, 1478, 2048 }, { 8, 1479, 2048 },
- { 6, 1480, 2048 }, { 7, 1481, 2048 }, { 7, 1482, 2048 }, { 8, 1483, 2048 }, { 7, 1484, 2048 }, { 8, 1485, 2048 }, { 8, 1486, 2048 }, { 9, 1487, 2048 },
- { 6, 1488, 2048 }, { 7, 1489, 2048 }, { 7, 1490, 2048 }, { 8, 1491, 2048 }, { 7, 1492, 2048 }, { 8, 1493, 2048 }, { 8, 1494, 2048 }, { 9, 1495, 2048 },
- { 7, 1496, 2048 }, { 8, 1497, 2048 }, { 8, 1498, 2048 }, { 9, 1499, 2048 }, { 8, 1500, 2048 }, { 9, 1501, 2048 }, { 9, 1502, 2048 }, { 10, 1503, 2048 },
- { 6, 1504, 2048 }, { 7, 1505, 2048 }, { 7, 1506, 2048 }, { 8, 1507, 2048 }, { 7, 1508, 2048 }, { 8, 1509, 2048 }, { 8, 1510, 2048 }, { 9, 1511, 2048 },
- { 7, 1512, 2048 }, { 8, 1513, 2048 }, { 8, 1514, 2048 }, { 9, 1515, 2048 }, { 8, 1516, 2048 }, { 9, 1517, 2048 }, { 9, 1518, 2048 }, { 10, 1519, 2048 },
- { 7, 1520, 2048 }, { 8, 1521, 2048 }, { 8, 1522, 2048 }, { 9, 1523, 2048 }, { 8, 1524, 2048 }, { 9, 1525, 2048 }, { 9, 1526, 2048 }, { 10, 1527, 2048 },
- { 8, 1528, 2048 }, { 9, 1529, 2048 }, { 9, 1530, 2048 }, { 10, 1531, 2048 }, { 9, 1532, 2048 }, { 10, 1533, 2048 }, { 10, 1534, 2048 }, { 11, 1535, 2048 },
- { 3, 1536, 2048 }, { 4, 1537, 2048 }, { 4, 1538, 2048 }, { 5, 1539, 2048 }, { 4, 1540, 2048 }, { 5, 1541, 2048 }, { 5, 1542, 2048 }, { 6, 1543, 2048 },
- { 4, 1544, 2048 }, { 5, 1545, 2048 }, { 5, 1546, 2048 }, { 6, 1547, 2048 }, { 5, 1548, 2048 }, { 6, 1549, 2048 }, { 6, 1550, 2048 }, { 7, 1551, 2048 },
- { 4, 1552, 2048 }, { 5, 1553, 2048 }, { 5, 1554, 2048 }, { 6, 1555, 2048 }, { 5, 1556, 2048 }, { 6, 1557, 2048 }, { 6, 1558, 2048 }, { 7, 1559, 2048 },
- { 5, 1560, 2048 }, { 6, 1561, 2048 }, { 6, 1562, 2048 }, { 7, 1563, 2048 }, { 6, 1564, 2048 }, { 7, 1565, 2048 }, { 7, 1566, 2048 }, { 8, 1567, 2048 },
- { 4, 1568, 2048 }, { 5, 1569, 2048 }, { 5, 1570, 2048 }, { 6, 1571, 2048 }, { 5, 1572, 2048 }, { 6, 1573, 2048 }, { 6, 1574, 2048 }, { 7, 1575, 2048 },
- { 5, 1576, 2048 }, { 6, 1577, 2048 }, { 6, 1578, 2048 }, { 7, 1579, 2048 }, { 6, 1580, 2048 }, { 7, 1581, 2048 }, { 7, 1582, 2048 }, { 8, 1583, 2048 },
- { 5, 1584, 2048 }, { 6, 1585, 2048 }, { 6, 1586, 2048 }, { 7, 1587, 2048 }, { 6, 1588, 2048 }, { 7, 1589, 2048 }, { 7, 1590, 2048 }, { 8, 1591, 2048 },
- { 6, 1592, 2048 }, { 7, 1593, 2048 }, { 7, 1594, 2048 }, { 8, 1595, 2048 }, { 7, 1596, 2048 }, { 8, 1597, 2048 }, { 8, 1598, 2048 }, { 9, 1599, 2048 },
- { 4, 1600, 2048 }, { 5, 1601, 2048 }, { 5, 1602, 2048 }, { 6, 1603, 2048 }, { 5, 1604, 2048 }, { 6, 1605, 2048 }, { 6, 1606, 2048 }, { 7, 1607, 2048 },
- { 5, 1608, 2048 }, { 6, 1609, 2048 }, { 6, 1610, 2048 }, { 7, 1611, 2048 }, { 6, 1612, 2048 }, { 7, 1613, 2048 }, { 7, 1614, 2048 }, { 8, 1615, 2048 },
- { 5, 1616, 2048 }, { 6, 1617, 2048 }, { 6, 1618, 2048 }, { 7, 1619, 2048 }, { 6, 1620, 2048 }, { 7, 1621, 2048 }, { 7, 1622, 2048 }, { 8, 1623, 2048 },
- { 6, 1624, 2048 }, { 7, 1625, 2048 }, { 7, 1626, 2048 }, { 8, 1627, 2048 }, { 7, 1628, 2048 }, { 8, 1629, 2048 }, { 8, 1630, 2048 }, { 9, 1631, 2048 },
- { 5, 1632, 2048 }, { 6, 1633, 2048 }, { 6, 1634, 2048 }, { 7, 1635, 2048 }, { 6, 1636, 2048 }, { 7, 1637, 2048 }, { 7, 1638, 2048 }, { 8, 1639, 2048 },
- { 6, 1640, 2048 }, { 7, 1641, 2048 }, { 7, 1642, 2048 }, { 8, 1643, 2048 }, { 7, 1644, 2048 }, { 8, 1645, 2048 }, { 8, 1646, 2048 }, { 9, 1647, 2048 },
- { 6, 1648, 2048 }, { 7, 1649, 2048 }, { 7, 1650, 2048 }, { 8, 1651, 2048 }, { 7, 1652, 2048 }, { 8, 1653, 2048 }, { 8, 1654, 2048 }, { 9, 1655, 2048 },
- { 7, 1656, 2048 }, { 8, 1657, 2048 }, { 8, 1658, 2048 }, { 9, 1659, 2048 }, { 8, 1660, 2048 }, { 9, 1661, 2048 }, { 9, 1662, 2048 }, { 10, 1663, 2048 },
- { 4, 1664, 2048 }, { 5, 1665, 2048 }, { 5, 1666, 2048 }, { 6, 1667, 2048 }, { 5, 1668, 2048 }, { 6, 1669, 2048 }, { 6, 1670, 2048 }, { 7, 1671, 2048 },
- { 5, 1672, 2048 }, { 6, 1673, 2048 }, { 6, 1674, 2048 }, { 7, 1675, 2048 }, { 6, 1676, 2048 }, { 7, 1677, 2048 }, { 7, 1678, 2048 }, { 8, 1679, 2048 },
- { 5, 1680, 2048 }, { 6, 1681, 2048 }, { 6, 1682, 2048 }, { 7, 1683, 2048 }, { 6, 1684, 2048 }, { 7, 1685, 2048 }, { 7, 1686, 2048 }, { 8, 1687, 2048 },
- { 6, 1688, 2048 }, { 7, 1689, 2048 }, { 7, 1690, 2048 }, { 8, 1691, 2048 }, { 7, 1692, 2048 }, { 8, 1693, 2048 }, { 8, 1694, 2048 }, { 9, 1695, 2048 },
- { 5, 1696, 2048 }, { 6, 1697, 2048 }, { 6, 1698, 2048 }, { 7, 1699, 2048 }, { 6, 1700, 2048 }, { 7, 1701, 2048 }, { 7, 1702, 2048 }, { 8, 1703, 2048 },
- { 6, 1704, 2048 }, { 7, 1705, 2048 }, { 7, 1706, 2048 }, { 8, 1707, 2048 }, { 7, 1708, 2048 }, { 8, 1709, 2048 }, { 8, 1710, 2048 }, { 9, 1711, 2048 },
- { 6, 1712, 2048 }, { 7, 1713, 2048 }, { 7, 1714, 2048 }, { 8, 1715, 2048 }, { 7, 1716, 2048 }, { 8, 1717, 2048 }, { 8, 1718, 2048 }, { 9, 1719, 2048 },
- { 7, 1720, 2048 }, { 8, 1721, 2048 }, { 8, 1722, 2048 }, { 9, 1723, 2048 }, { 8, 1724, 2048 }, { 9, 1725, 2048 }, { 9, 1726, 2048 }, { 10, 1727, 2048 },
- { 5, 1728, 2048 }, { 6, 1729, 2048 }, { 6, 1730, 2048 }, { 7, 1731, 2048 }, { 6, 1732, 2048 }, { 7, 1733, 2048 }, { 7, 1734, 2048 }, { 8, 1735, 2048 },
- { 6, 1736, 2048 }, { 7, 1737, 2048 }, { 7, 1738, 2048 }, { 8, 1739, 2048 }, { 7, 1740, 2048 }, { 8, 1741, 2048 }, { 8, 1742, 2048 }, { 9, 1743, 2048 },
- { 6, 1744, 2048 }, { 7, 1745, 2048 }, { 7, 1746, 2048 }, { 8, 1747, 2048 }, { 7, 1748, 2048 }, { 8, 1749, 2048 }, { 8, 1750, 2048 }, { 9, 1751, 2048 },
- { 7, 1752, 2048 }, { 8, 1753, 2048 }, { 8, 1754, 2048 }, { 9, 1755, 2048 }, { 8, 1756, 2048 }, { 9, 1757, 2048 }, { 9, 1758, 2048 }, { 10, 1759, 2048 },
- { 6, 1760, 2048 }, { 7, 1761, 2048 }, { 7, 1762, 2048 }, { 8, 1763, 2048 }, { 7, 1764, 2048 }, { 8, 1765, 2048 }, { 8, 1766, 2048 }, { 9, 1767, 2048 },
- { 7, 1768, 2048 }, { 8, 1769, 2048 }, { 8, 1770, 2048 }, { 9, 1771, 2048 }, { 8, 1772, 2048 }, { 9, 1773, 2048 }, { 9, 1774, 2048 }, { 10, 1775, 2048 },
- { 7, 1776, 2048 }, { 8, 1777, 2048 }, { 8, 1778, 2048 }, { 9, 1779, 2048 }, { 8, 1780, 2048 }, { 9, 1781, 2048 }, { 9, 1782, 2048 }, { 10, 1783, 2048 },
- { 8, 1784, 2048 }, { 9, 1785, 2048 }, { 9, 1786, 2048 }, { 10, 1787, 2048 }, { 9, 1788, 2048 }, { 10, 1789, 2048 }, { 10, 1790, 2048 }, { 11, 1791, 2048 },
- { 4, 1792, 2048 }, { 5, 1793, 2048 }, { 5, 1794, 2048 }, { 6, 1795, 2048 }, { 5, 1796, 2048 }, { 6, 1797, 2048 }, { 6, 1798, 2048 }, { 7, 1799, 2048 },
- { 5, 1800, 2048 }, { 6, 1801, 2048 }, { 6, 1802, 2048 }, { 7, 1803, 2048 }, { 6, 1804, 2048 }, { 7, 1805, 2048 }, { 7, 1806, 2048 }, { 8, 1807, 2048 },
- { 5, 1808, 2048 }, { 6, 1809, 2048 }, { 6, 1810, 2048 }, { 7, 1811, 2048 }, { 6, 1812, 2048 }, { 7, 1813, 2048 }, { 7, 1814, 2048 }, { 8, 1815, 2048 },
- { 6, 1816, 2048 }, { 7, 1817, 2048 }, { 7, 1818, 2048 }, { 8, 1819, 2048 }, { 7, 1820, 2048 }, { 8, 1821, 2048 }, { 8, 1822, 2048 }, { 9, 1823, 2048 },
- { 5, 1824, 2048 }, { 6, 1825, 2048 }, { 6, 1826, 2048 }, { 7, 1827, 2048 }, { 6, 1828, 2048 }, { 7, 1829, 2048 }, { 7, 1830, 2048 }, { 8, 1831, 2048 },
- { 6, 1832, 2048 }, { 7, 1833, 2048 }, { 7, 1834, 2048 }, { 8, 1835, 2048 }, { 7, 1836, 2048 }, { 8, 1837, 2048 }, { 8, 1838, 2048 }, { 9, 1839, 2048 },
- { 6, 1840, 2048 }, { 7, 1841, 2048 }, { 7, 1842, 2048 }, { 8, 1843, 2048 }, { 7, 1844, 2048 }, { 8, 1845, 2048 }, { 8, 1846, 2048 }, { 9, 1847, 2048 },
- { 7, 1848, 2048 }, { 8, 1849, 2048 }, { 8, 1850, 2048 }, { 9, 1851, 2048 }, { 8, 1852, 2048 }, { 9, 1853, 2048 }, { 9, 1854, 2048 }, { 10, 1855, 2048 },
- { 5, 1856, 2048 }, { 6, 1857, 2048 }, { 6, 1858, 2048 }, { 7, 1859, 2048 }, { 6, 1860, 2048 }, { 7, 1861, 2048 }, { 7, 1862, 2048 }, { 8, 1863, 2048 },
- { 6, 1864, 2048 }, { 7, 1865, 2048 }, { 7, 1866, 2048 }, { 8, 1867, 2048 }, { 7, 1868, 2048 }, { 8, 1869, 2048 }, { 8, 1870, 2048 }, { 9, 1871, 2048 },
- { 6, 1872, 2048 }, { 7, 1873, 2048 }, { 7, 1874, 2048 }, { 8, 1875, 2048 }, { 7, 1876, 2048 }, { 8, 1877, 2048 }, { 8, 1878, 2048 }, { 9, 1879, 2048 },
- { 7, 1880, 2048 }, { 8, 1881, 2048 }, { 8, 1882, 2048 }, { 9, 1883, 2048 }, { 8, 1884, 2048 }, { 9, 1885, 2048 }, { 9, 1886, 2048 }, { 10, 1887, 2048 },
- { 6, 1888, 2048 }, { 7, 1889, 2048 }, { 7, 1890, 2048 }, { 8, 1891, 2048 }, { 7, 1892, 2048 }, { 8, 1893, 2048 }, { 8, 1894, 2048 }, { 9, 1895, 2048 },
- { 7, 1896, 2048 }, { 8, 1897, 2048 }, { 8, 1898, 2048 }, { 9, 1899, 2048 }, { 8, 1900, 2048 }, { 9, 1901, 2048 }, { 9, 1902, 2048 }, { 10, 1903, 2048 },
- { 7, 1904, 2048 }, { 8, 1905, 2048 }, { 8, 1906, 2048 }, { 9, 1907, 2048 }, { 8, 1908, 2048 }, { 9, 1909, 2048 }, { 9, 1910, 2048 }, { 10, 1911, 2048 },
- { 8, 1912, 2048 }, { 9, 1913, 2048 }, { 9, 1914, 2048 }, { 10, 1915, 2048 }, { 9, 1916, 2048 }, { 10, 1917, 2048 }, { 10, 1918, 2048 }, { 11, 1919, 2048 },
- { 5, 1920, 2048 }, { 6, 1921, 2048 }, { 6, 1922, 2048 }, { 7, 1923, 2048 }, { 6, 1924, 2048 }, { 7, 1925, 2048 }, { 7, 1926, 2048 }, { 8, 1927, 2048 },
- { 6, 1928, 2048 }, { 7, 1929, 2048 }, { 7, 1930, 2048 }, { 8, 1931, 2048 }, { 7, 1932, 2048 }, { 8, 1933, 2048 }, { 8, 1934, 2048 }, { 9, 1935, 2048 },
- { 6, 1936, 2048 }, { 7, 1937, 2048 }, { 7, 1938, 2048 }, { 8, 1939, 2048 }, { 7, 1940, 2048 }, { 8, 1941, 2048 }, { 8, 1942, 2048 }, { 9, 1943, 2048 },
- { 7, 1944, 2048 }, { 8, 1945, 2048 }, { 8, 1946, 2048 }, { 9, 1947, 2048 }, { 8, 1948, 2048 }, { 9, 1949, 2048 }, { 9, 1950, 2048 }, { 10, 1951, 2048 },
- { 6, 1952, 2048 }, { 7, 1953, 2048 }, { 7, 1954, 2048 }, { 8, 1955, 2048 }, { 7, 1956, 2048 }, { 8, 1957, 2048 }, { 8, 1958, 2048 }, { 9, 1959, 2048 },
- { 7, 1960, 2048 }, { 8, 1961, 2048 }, { 8, 1962, 2048 }, { 9, 1963, 2048 }, { 8, 1964, 2048 }, { 9, 1965, 2048 }, { 9, 1966, 2048 }, { 10, 1967, 2048 },
- { 7, 1968, 2048 }, { 8, 1969, 2048 }, { 8, 1970, 2048 }, { 9, 1971, 2048 }, { 8, 1972, 2048 }, { 9, 1973, 2048 }, { 9, 1974, 2048 }, { 10, 1975, 2048 },
- { 8, 1976, 2048 }, { 9, 1977, 2048 }, { 9, 1978, 2048 }, { 10, 1979, 2048 }, { 9, 1980, 2048 }, { 10, 1981, 2048 }, { 10, 1982, 2048 }, { 11, 1983, 2048 },
- { 6, 1984, 2048 }, { 7, 1985, 2048 }, { 7, 1986, 2048 }, { 8, 1987, 2048 }, { 7, 1988, 2048 }, { 8, 1989, 2048 }, { 8, 1990, 2048 }, { 9, 1991, 2048 },
- { 7, 1992, 2048 }, { 8, 1993, 2048 }, { 8, 1994, 2048 }, { 9, 1995, 2048 }, { 8, 1996, 2048 }, { 9, 1997, 2048 }, { 9, 1998, 2048 }, { 10, 1999, 2048 },
- { 7, 2000, 2048 }, { 8, 2001, 2048 }, { 8, 2002, 2048 }, { 9, 2003, 2048 }, { 8, 2004, 2048 }, { 9, 2005, 2048 }, { 9, 2006, 2048 }, { 10, 2007, 2048 },
- { 8, 2008, 2048 }, { 9, 2009, 2048 }, { 9, 2010, 2048 }, { 10, 2011, 2048 }, { 9, 2012, 2048 }, { 10, 2013, 2048 }, { 10, 2014, 2048 }, { 11, 2015, 2048 },
- { 7, 2016, 2048 }, { 8, 2017, 2048 }, { 8, 2018, 2048 }, { 9, 2019, 2048 }, { 8, 2020, 2048 }, { 9, 2021, 2048 }, { 9, 2022, 2048 }, { 10, 2023, 2048 },
- { 8, 2024, 2048 }, { 9, 2025, 2048 }, { 9, 2026, 2048 }, { 10, 2027, 2048 }, { 9, 2028, 2048 }, { 10, 2029, 2048 }, { 10, 2030, 2048 }, { 11, 2031, 2048 },
- { 8, 2032, 2048 }, { 9, 2033, 2048 }, { 9, 2034, 2048 }, { 10, 2035, 2048 }, { 9, 2036, 2048 }, { 10, 2037, 2048 }, { 10, 2038, 2048 }, { 11, 2039, 2048 },
- { 9, 2040, 2048 }, { 10, 2041, 2048 }, { 10, 2042, 2048 }, { 11, 2043, 2048 }, { 10, 2044, 2048 }, { 11, 2045, 2048 }, { 11, 2046, 2048 }, { 12, 2047, 2048 },
+ { 1, 0, 0 }, { 2, 1, 2048 }, { 2, 2, 2048 }, { 3, 3, 2048 }, { 2, 4, 2048 }, { 3, 5, 2048 }, { 3, 6, 2048 }, { 4, 7, 2048 },
+ { 2, 8, 2048 }, { 3, 9, 2048 }, { 3, 10, 2048 }, { 4, 11, 2048 }, { 3, 12, 2048 }, { 4, 13, 2048 }, { 4, 14, 2048 }, { 5, 15, 2048 },
+ { 2, 16, 2048 }, { 3, 17, 2048 }, { 3, 18, 2048 }, { 4, 19, 2048 }, { 3, 20, 2048 }, { 4, 21, 2048 }, { 4, 22, 2048 }, { 5, 23, 2048 },
+ { 3, 24, 2048 }, { 4, 25, 2048 }, { 4, 26, 2048 }, { 5, 27, 2048 }, { 4, 28, 2048 }, { 5, 29, 2048 }, { 5, 30, 2048 }, { 6, 31, 2048 },
+ { 2, 32, 2048 }, { 3, 33, 2048 }, { 3, 34, 2048 }, { 4, 35, 2048 }, { 3, 36, 2048 }, { 4, 37, 2048 }, { 4, 38, 2048 }, { 5, 39, 2048 },
+ { 3, 40, 2048 }, { 4, 41, 2048 }, { 4, 42, 2048 }, { 5, 43, 2048 }, { 4, 44, 2048 }, { 5, 45, 2048 }, { 5, 46, 2048 }, { 6, 47, 2048 },
+ { 3, 48, 2048 }, { 4, 49, 2048 }, { 4, 50, 2048 }, { 5, 51, 2048 }, { 4, 52, 2048 }, { 5, 53, 2048 }, { 5, 54, 2048 }, { 6, 55, 2048 },
+ { 4, 56, 2048 }, { 5, 57, 2048 }, { 5, 58, 2048 }, { 6, 59, 2048 }, { 5, 60, 2048 }, { 6, 61, 2048 }, { 6, 62, 2048 }, { 7, 63, 2048 },
+ { 2, 64, 2048 }, { 3, 65, 2048 }, { 3, 66, 2048 }, { 4, 67, 2048 }, { 3, 68, 2048 }, { 4, 69, 2048 }, { 4, 70, 2048 }, { 5, 71, 2048 },
+ { 3, 72, 2048 }, { 4, 73, 2048 }, { 4, 74, 2048 }, { 5, 75, 2048 }, { 4, 76, 2048 }, { 5, 77, 2048 }, { 5, 78, 2048 }, { 6, 79, 2048 },
+ { 3, 80, 2048 }, { 4, 81, 2048 }, { 4, 82, 2048 }, { 5, 83, 2048 }, { 4, 84, 2048 }, { 5, 85, 2048 }, { 5, 86, 2048 }, { 6, 87, 2048 },
+ { 4, 88, 2048 }, { 5, 89, 2048 }, { 5, 90, 2048 }, { 6, 91, 2048 }, { 5, 92, 2048 }, { 6, 93, 2048 }, { 6, 94, 2048 }, { 7, 95, 2048 },
+ { 3, 96, 2048 }, { 4, 97, 2048 }, { 4, 98, 2048 }, { 5, 99, 2048 }, { 4, 100, 2048 }, { 5, 101, 2048 }, { 5, 102, 2048 }, { 6, 103, 2048 },
+ { 4, 104, 2048 }, { 5, 105, 2048 }, { 5, 106, 2048 }, { 6, 107, 2048 }, { 5, 108, 2048 }, { 6, 109, 2048 }, { 6, 110, 2048 }, { 7, 111, 2048 },
+ { 4, 112, 2048 }, { 5, 113, 2048 }, { 5, 114, 2048 }, { 6, 115, 2048 }, { 5, 116, 2048 }, { 6, 117, 2048 }, { 6, 118, 2048 }, { 7, 119, 2048 },
+ { 5, 120, 2048 }, { 6, 121, 2048 }, { 6, 122, 2048 }, { 7, 123, 2048 }, { 6, 124, 2048 }, { 7, 125, 2048 }, { 7, 126, 2048 }, { 8, 127, 2048 },
+ { 2, 128, 2048 }, { 3, 129, 2048 }, { 3, 130, 2048 }, { 4, 131, 2048 }, { 3, 132, 2048 }, { 4, 133, 2048 }, { 4, 134, 2048 }, { 5, 135, 2048 },
+ { 3, 136, 2048 }, { 4, 137, 2048 }, { 4, 138, 2048 }, { 5, 139, 2048 }, { 4, 140, 2048 }, { 5, 141, 2048 }, { 5, 142, 2048 }, { 6, 143, 2048 },
+ { 3, 144, 2048 }, { 4, 145, 2048 }, { 4, 146, 2048 }, { 5, 147, 2048 }, { 4, 148, 2048 }, { 5, 149, 2048 }, { 5, 150, 2048 }, { 6, 151, 2048 },
+ { 4, 152, 2048 }, { 5, 153, 2048 }, { 5, 154, 2048 }, { 6, 155, 2048 }, { 5, 156, 2048 }, { 6, 157, 2048 }, { 6, 158, 2048 }, { 7, 159, 2048 },
+ { 3, 160, 2048 }, { 4, 161, 2048 }, { 4, 162, 2048 }, { 5, 163, 2048 }, { 4, 164, 2048 }, { 5, 165, 2048 }, { 5, 166, 2048 }, { 6, 167, 2048 },
+ { 4, 168, 2048 }, { 5, 169, 2048 }, { 5, 170, 2048 }, { 6, 171, 2048 }, { 5, 172, 2048 }, { 6, 173, 2048 }, { 6, 174, 2048 }, { 7, 175, 2048 },
+ { 4, 176, 2048 }, { 5, 177, 2048 }, { 5, 178, 2048 }, { 6, 179, 2048 }, { 5, 180, 2048 }, { 6, 181, 2048 }, { 6, 182, 2048 }, { 7, 183, 2048 },
+ { 5, 184, 2048 }, { 6, 185, 2048 }, { 6, 186, 2048 }, { 7, 187, 2048 }, { 6, 188, 2048 }, { 7, 189, 2048 }, { 7, 190, 2048 }, { 8, 191, 2048 },
+ { 3, 192, 2048 }, { 4, 193, 2048 }, { 4, 194, 2048 }, { 5, 195, 2048 }, { 4, 196, 2048 }, { 5, 197, 2048 }, { 5, 198, 2048 }, { 6, 199, 2048 },
+ { 4, 200, 2048 }, { 5, 201, 2048 }, { 5, 202, 2048 }, { 6, 203, 2048 }, { 5, 204, 2048 }, { 6, 205, 2048 }, { 6, 206, 2048 }, { 7, 207, 2048 },
+ { 4, 208, 2048 }, { 5, 209, 2048 }, { 5, 210, 2048 }, { 6, 211, 2048 }, { 5, 212, 2048 }, { 6, 213, 2048 }, { 6, 214, 2048 }, { 7, 215, 2048 },
+ { 5, 216, 2048 }, { 6, 217, 2048 }, { 6, 218, 2048 }, { 7, 219, 2048 }, { 6, 220, 2048 }, { 7, 221, 2048 }, { 7, 222, 2048 }, { 8, 223, 2048 },
+ { 4, 224, 2048 }, { 5, 225, 2048 }, { 5, 226, 2048 }, { 6, 227, 2048 }, { 5, 228, 2048 }, { 6, 229, 2048 }, { 6, 230, 2048 }, { 7, 231, 2048 },
+ { 5, 232, 2048 }, { 6, 233, 2048 }, { 6, 234, 2048 }, { 7, 235, 2048 }, { 6, 236, 2048 }, { 7, 237, 2048 }, { 7, 238, 2048 }, { 8, 239, 2048 },
+ { 5, 240, 2048 }, { 6, 241, 2048 }, { 6, 242, 2048 }, { 7, 243, 2048 }, { 6, 244, 2048 }, { 7, 245, 2048 }, { 7, 246, 2048 }, { 8, 247, 2048 },
+ { 6, 248, 2048 }, { 7, 249, 2048 }, { 7, 250, 2048 }, { 8, 251, 2048 }, { 7, 252, 2048 }, { 8, 253, 2048 }, { 8, 254, 2048 }, { 9, 255, 2048 },
+ { 2, 256, 2048 }, { 3, 257, 2048 }, { 3, 258, 2048 }, { 4, 259, 2048 }, { 3, 260, 2048 }, { 4, 261, 2048 }, { 4, 262, 2048 }, { 5, 263, 2048 },
+ { 3, 264, 2048 }, { 4, 265, 2048 }, { 4, 266, 2048 }, { 5, 267, 2048 }, { 4, 268, 2048 }, { 5, 269, 2048 }, { 5, 270, 2048 }, { 6, 271, 2048 },
+ { 3, 272, 2048 }, { 4, 273, 2048 }, { 4, 274, 2048 }, { 5, 275, 2048 }, { 4, 276, 2048 }, { 5, 277, 2048 }, { 5, 278, 2048 }, { 6, 279, 2048 },
+ { 4, 280, 2048 }, { 5, 281, 2048 }, { 5, 282, 2048 }, { 6, 283, 2048 }, { 5, 284, 2048 }, { 6, 285, 2048 }, { 6, 286, 2048 }, { 7, 287, 2048 },
+ { 3, 288, 2048 }, { 4, 289, 2048 }, { 4, 290, 2048 }, { 5, 291, 2048 }, { 4, 292, 2048 }, { 5, 293, 2048 }, { 5, 294, 2048 }, { 6, 295, 2048 },
+ { 4, 296, 2048 }, { 5, 297, 2048 }, { 5, 298, 2048 }, { 6, 299, 2048 }, { 5, 300, 2048 }, { 6, 301, 2048 }, { 6, 302, 2048 }, { 7, 303, 2048 },
+ { 4, 304, 2048 }, { 5, 305, 2048 }, { 5, 306, 2048 }, { 6, 307, 2048 }, { 5, 308, 2048 }, { 6, 309, 2048 }, { 6, 310, 2048 }, { 7, 311, 2048 },
+ { 5, 312, 2048 }, { 6, 313, 2048 }, { 6, 314, 2048 }, { 7, 315, 2048 }, { 6, 316, 2048 }, { 7, 317, 2048 }, { 7, 318, 2048 }, { 8, 319, 2048 },
+ { 3, 320, 2048 }, { 4, 321, 2048 }, { 4, 322, 2048 }, { 5, 323, 2048 }, { 4, 324, 2048 }, { 5, 325, 2048 }, { 5, 326, 2048 }, { 6, 327, 2048 },
+ { 4, 328, 2048 }, { 5, 329, 2048 }, { 5, 330, 2048 }, { 6, 331, 2048 }, { 5, 332, 2048 }, { 6, 333, 2048 }, { 6, 334, 2048 }, { 7, 335, 2048 },
+ { 4, 336, 2048 }, { 5, 337, 2048 }, { 5, 338, 2048 }, { 6, 339, 2048 }, { 5, 340, 2048 }, { 6, 341, 2048 }, { 6, 342, 2048 }, { 7, 343, 2048 },
+ { 5, 344, 2048 }, { 6, 345, 2048 }, { 6, 346, 2048 }, { 7, 347, 2048 }, { 6, 348, 2048 }, { 7, 349, 2048 }, { 7, 350, 2048 }, { 8, 351, 2048 },
+ { 4, 352, 2048 }, { 5, 353, 2048 }, { 5, 354, 2048 }, { 6, 355, 2048 }, { 5, 356, 2048 }, { 6, 357, 2048 }, { 6, 358, 2048 }, { 7, 359, 2048 },
+ { 5, 360, 2048 }, { 6, 361, 2048 }, { 6, 362, 2048 }, { 7, 363, 2048 }, { 6, 364, 2048 }, { 7, 365, 2048 }, { 7, 366, 2048 }, { 8, 367, 2048 },
+ { 5, 368, 2048 }, { 6, 369, 2048 }, { 6, 370, 2048 }, { 7, 371, 2048 }, { 6, 372, 2048 }, { 7, 373, 2048 }, { 7, 374, 2048 }, { 8, 375, 2048 },
+ { 6, 376, 2048 }, { 7, 377, 2048 }, { 7, 378, 2048 }, { 8, 379, 2048 }, { 7, 380, 2048 }, { 8, 381, 2048 }, { 8, 382, 2048 }, { 9, 383, 2048 },
+ { 3, 384, 2048 }, { 4, 385, 2048 }, { 4, 386, 2048 }, { 5, 387, 2048 }, { 4, 388, 2048 }, { 5, 389, 2048 }, { 5, 390, 2048 }, { 6, 391, 2048 },
+ { 4, 392, 2048 }, { 5, 393, 2048 }, { 5, 394, 2048 }, { 6, 395, 2048 }, { 5, 396, 2048 }, { 6, 397, 2048 }, { 6, 398, 2048 }, { 7, 399, 2048 },
+ { 4, 400, 2048 }, { 5, 401, 2048 }, { 5, 402, 2048 }, { 6, 403, 2048 }, { 5, 404, 2048 }, { 6, 405, 2048 }, { 6, 406, 2048 }, { 7, 407, 2048 },
+ { 5, 408, 2048 }, { 6, 409, 2048 }, { 6, 410, 2048 }, { 7, 411, 2048 }, { 6, 412, 2048 }, { 7, 413, 2048 }, { 7, 414, 2048 }, { 8, 415, 2048 },
+ { 4, 416, 2048 }, { 5, 417, 2048 }, { 5, 418, 2048 }, { 6, 419, 2048 }, { 5, 420, 2048 }, { 6, 421, 2048 }, { 6, 422, 2048 }, { 7, 423, 2048 },
+ { 5, 424, 2048 }, { 6, 425, 2048 }, { 6, 426, 2048 }, { 7, 427, 2048 }, { 6, 428, 2048 }, { 7, 429, 2048 }, { 7, 430, 2048 }, { 8, 431, 2048 },
+ { 5, 432, 2048 }, { 6, 433, 2048 }, { 6, 434, 2048 }, { 7, 435, 2048 }, { 6, 436, 2048 }, { 7, 437, 2048 }, { 7, 438, 2048 }, { 8, 439, 2048 },
+ { 6, 440, 2048 }, { 7, 441, 2048 }, { 7, 442, 2048 }, { 8, 443, 2048 }, { 7, 444, 2048 }, { 8, 445, 2048 }, { 8, 446, 2048 }, { 9, 447, 2048 },
+ { 4, 448, 2048 }, { 5, 449, 2048 }, { 5, 450, 2048 }, { 6, 451, 2048 }, { 5, 452, 2048 }, { 6, 453, 2048 }, { 6, 454, 2048 }, { 7, 455, 2048 },
+ { 5, 456, 2048 }, { 6, 457, 2048 }, { 6, 458, 2048 }, { 7, 459, 2048 }, { 6, 460, 2048 }, { 7, 461, 2048 }, { 7, 462, 2048 }, { 8, 463, 2048 },
+ { 5, 464, 2048 }, { 6, 465, 2048 }, { 6, 466, 2048 }, { 7, 467, 2048 }, { 6, 468, 2048 }, { 7, 469, 2048 }, { 7, 470, 2048 }, { 8, 471, 2048 },
+ { 6, 472, 2048 }, { 7, 473, 2048 }, { 7, 474, 2048 }, { 8, 475, 2048 }, { 7, 476, 2048 }, { 8, 477, 2048 }, { 8, 478, 2048 }, { 9, 479, 2048 },
+ { 5, 480, 2048 }, { 6, 481, 2048 }, { 6, 482, 2048 }, { 7, 483, 2048 }, { 6, 484, 2048 }, { 7, 485, 2048 }, { 7, 486, 2048 }, { 8, 487, 2048 },
+ { 6, 488, 2048 }, { 7, 489, 2048 }, { 7, 490, 2048 }, { 8, 491, 2048 }, { 7, 492, 2048 }, { 8, 493, 2048 }, { 8, 494, 2048 }, { 9, 495, 2048 },
+ { 6, 496, 2048 }, { 7, 497, 2048 }, { 7, 498, 2048 }, { 8, 499, 2048 }, { 7, 500, 2048 }, { 8, 501, 2048 }, { 8, 502, 2048 }, { 9, 503, 2048 },
+ { 7, 504, 2048 }, { 8, 505, 2048 }, { 8, 506, 2048 }, { 9, 507, 2048 }, { 8, 508, 2048 }, { 9, 509, 2048 }, { 9, 510, 2048 }, { 10, 511, 2048 },
+ { 2, 512, 2048 }, { 3, 513, 2048 }, { 3, 514, 2048 }, { 4, 515, 2048 }, { 3, 516, 2048 }, { 4, 517, 2048 }, { 4, 518, 2048 }, { 5, 519, 2048 },
+ { 3, 520, 2048 }, { 4, 521, 2048 }, { 4, 522, 2048 }, { 5, 523, 2048 }, { 4, 524, 2048 }, { 5, 525, 2048 }, { 5, 526, 2048 }, { 6, 527, 2048 },
+ { 3, 528, 2048 }, { 4, 529, 2048 }, { 4, 530, 2048 }, { 5, 531, 2048 }, { 4, 532, 2048 }, { 5, 533, 2048 }, { 5, 534, 2048 }, { 6, 535, 2048 },
+ { 4, 536, 2048 }, { 5, 537, 2048 }, { 5, 538, 2048 }, { 6, 539, 2048 }, { 5, 540, 2048 }, { 6, 541, 2048 }, { 6, 542, 2048 }, { 7, 543, 2048 },
+ { 3, 544, 2048 }, { 4, 545, 2048 }, { 4, 546, 2048 }, { 5, 547, 2048 }, { 4, 548, 2048 }, { 5, 549, 2048 }, { 5, 550, 2048 }, { 6, 551, 2048 },
+ { 4, 552, 2048 }, { 5, 553, 2048 }, { 5, 554, 2048 }, { 6, 555, 2048 }, { 5, 556, 2048 }, { 6, 557, 2048 }, { 6, 558, 2048 }, { 7, 559, 2048 },
+ { 4, 560, 2048 }, { 5, 561, 2048 }, { 5, 562, 2048 }, { 6, 563, 2048 }, { 5, 564, 2048 }, { 6, 565, 2048 }, { 6, 566, 2048 }, { 7, 567, 2048 },
+ { 5, 568, 2048 }, { 6, 569, 2048 }, { 6, 570, 2048 }, { 7, 571, 2048 }, { 6, 572, 2048 }, { 7, 573, 2048 }, { 7, 574, 2048 }, { 8, 575, 2048 },
+ { 3, 576, 2048 }, { 4, 577, 2048 }, { 4, 578, 2048 }, { 5, 579, 2048 }, { 4, 580, 2048 }, { 5, 581, 2048 }, { 5, 582, 2048 }, { 6, 583, 2048 },
+ { 4, 584, 2048 }, { 5, 585, 2048 }, { 5, 586, 2048 }, { 6, 587, 2048 }, { 5, 588, 2048 }, { 6, 589, 2048 }, { 6, 590, 2048 }, { 7, 591, 2048 },
+ { 4, 592, 2048 }, { 5, 593, 2048 }, { 5, 594, 2048 }, { 6, 595, 2048 }, { 5, 596, 2048 }, { 6, 597, 2048 }, { 6, 598, 2048 }, { 7, 599, 2048 },
+ { 5, 600, 2048 }, { 6, 601, 2048 }, { 6, 602, 2048 }, { 7, 603, 2048 }, { 6, 604, 2048 }, { 7, 605, 2048 }, { 7, 606, 2048 }, { 8, 607, 2048 },
+ { 4, 608, 2048 }, { 5, 609, 2048 }, { 5, 610, 2048 }, { 6, 611, 2048 }, { 5, 612, 2048 }, { 6, 613, 2048 }, { 6, 614, 2048 }, { 7, 615, 2048 },
+ { 5, 616, 2048 }, { 6, 617, 2048 }, { 6, 618, 2048 }, { 7, 619, 2048 }, { 6, 620, 2048 }, { 7, 621, 2048 }, { 7, 622, 2048 }, { 8, 623, 2048 },
+ { 5, 624, 2048 }, { 6, 625, 2048 }, { 6, 626, 2048 }, { 7, 627, 2048 }, { 6, 628, 2048 }, { 7, 629, 2048 }, { 7, 630, 2048 }, { 8, 631, 2048 },
+ { 6, 632, 2048 }, { 7, 633, 2048 }, { 7, 634, 2048 }, { 8, 635, 2048 }, { 7, 636, 2048 }, { 8, 637, 2048 }, { 8, 638, 2048 }, { 9, 639, 2048 },
+ { 3, 640, 2048 }, { 4, 641, 2048 }, { 4, 642, 2048 }, { 5, 643, 2048 }, { 4, 644, 2048 }, { 5, 645, 2048 }, { 5, 646, 2048 }, { 6, 647, 2048 },
+ { 4, 648, 2048 }, { 5, 649, 2048 }, { 5, 650, 2048 }, { 6, 651, 2048 }, { 5, 652, 2048 }, { 6, 653, 2048 }, { 6, 654, 2048 }, { 7, 655, 2048 },
+ { 4, 656, 2048 }, { 5, 657, 2048 }, { 5, 658, 2048 }, { 6, 659, 2048 }, { 5, 660, 2048 }, { 6, 661, 2048 }, { 6, 662, 2048 }, { 7, 663, 2048 },
+ { 5, 664, 2048 }, { 6, 665, 2048 }, { 6, 666, 2048 }, { 7, 667, 2048 }, { 6, 668, 2048 }, { 7, 669, 2048 }, { 7, 670, 2048 }, { 8, 671, 2048 },
+ { 4, 672, 2048 }, { 5, 673, 2048 }, { 5, 674, 2048 }, { 6, 675, 2048 }, { 5, 676, 2048 }, { 6, 677, 2048 }, { 6, 678, 2048 }, { 7, 679, 2048 },
+ { 5, 680, 2048 }, { 6, 681, 2048 }, { 6, 682, 2048 }, { 7, 683, 2048 }, { 6, 684, 2048 }, { 7, 685, 2048 }, { 7, 686, 2048 }, { 8, 687, 2048 },
+ { 5, 688, 2048 }, { 6, 689, 2048 }, { 6, 690, 2048 }, { 7, 691, 2048 }, { 6, 692, 2048 }, { 7, 693, 2048 }, { 7, 694, 2048 }, { 8, 695, 2048 },
+ { 6, 696, 2048 }, { 7, 697, 2048 }, { 7, 698, 2048 }, { 8, 699, 2048 }, { 7, 700, 2048 }, { 8, 701, 2048 }, { 8, 702, 2048 }, { 9, 703, 2048 },
+ { 4, 704, 2048 }, { 5, 705, 2048 }, { 5, 706, 2048 }, { 6, 707, 2048 }, { 5, 708, 2048 }, { 6, 709, 2048 }, { 6, 710, 2048 }, { 7, 711, 2048 },
+ { 5, 712, 2048 }, { 6, 713, 2048 }, { 6, 714, 2048 }, { 7, 715, 2048 }, { 6, 716, 2048 }, { 7, 717, 2048 }, { 7, 718, 2048 }, { 8, 719, 2048 },
+ { 5, 720, 2048 }, { 6, 721, 2048 }, { 6, 722, 2048 }, { 7, 723, 2048 }, { 6, 724, 2048 }, { 7, 725, 2048 }, { 7, 726, 2048 }, { 8, 727, 2048 },
+ { 6, 728, 2048 }, { 7, 729, 2048 }, { 7, 730, 2048 }, { 8, 731, 2048 }, { 7, 732, 2048 }, { 8, 733, 2048 }, { 8, 734, 2048 }, { 9, 735, 2048 },
+ { 5, 736, 2048 }, { 6, 737, 2048 }, { 6, 738, 2048 }, { 7, 739, 2048 }, { 6, 740, 2048 }, { 7, 741, 2048 }, { 7, 742, 2048 }, { 8, 743, 2048 },
+ { 6, 744, 2048 }, { 7, 745, 2048 }, { 7, 746, 2048 }, { 8, 747, 2048 }, { 7, 748, 2048 }, { 8, 749, 2048 }, { 8, 750, 2048 }, { 9, 751, 2048 },
+ { 6, 752, 2048 }, { 7, 753, 2048 }, { 7, 754, 2048 }, { 8, 755, 2048 }, { 7, 756, 2048 }, { 8, 757, 2048 }, { 8, 758, 2048 }, { 9, 759, 2048 },
+ { 7, 760, 2048 }, { 8, 761, 2048 }, { 8, 762, 2048 }, { 9, 763, 2048 }, { 8, 764, 2048 }, { 9, 765, 2048 }, { 9, 766, 2048 }, { 10, 767, 2048 },
+ { 3, 768, 2048 }, { 4, 769, 2048 }, { 4, 770, 2048 }, { 5, 771, 2048 }, { 4, 772, 2048 }, { 5, 773, 2048 }, { 5, 774, 2048 }, { 6, 775, 2048 },
+ { 4, 776, 2048 }, { 5, 777, 2048 }, { 5, 778, 2048 }, { 6, 779, 2048 }, { 5, 780, 2048 }, { 6, 781, 2048 }, { 6, 782, 2048 }, { 7, 783, 2048 },
+ { 4, 784, 2048 }, { 5, 785, 2048 }, { 5, 786, 2048 }, { 6, 787, 2048 }, { 5, 788, 2048 }, { 6, 789, 2048 }, { 6, 790, 2048 }, { 7, 791, 2048 },
+ { 5, 792, 2048 }, { 6, 793, 2048 }, { 6, 794, 2048 }, { 7, 795, 2048 }, { 6, 796, 2048 }, { 7, 797, 2048 }, { 7, 798, 2048 }, { 8, 799, 2048 },
+ { 4, 800, 2048 }, { 5, 801, 2048 }, { 5, 802, 2048 }, { 6, 803, 2048 }, { 5, 804, 2048 }, { 6, 805, 2048 }, { 6, 806, 2048 }, { 7, 807, 2048 },
+ { 5, 808, 2048 }, { 6, 809, 2048 }, { 6, 810, 2048 }, { 7, 811, 2048 }, { 6, 812, 2048 }, { 7, 813, 2048 }, { 7, 814, 2048 }, { 8, 815, 2048 },
+ { 5, 816, 2048 }, { 6, 817, 2048 }, { 6, 818, 2048 }, { 7, 819, 2048 }, { 6, 820, 2048 }, { 7, 821, 2048 }, { 7, 822, 2048 }, { 8, 823, 2048 },
+ { 6, 824, 2048 }, { 7, 825, 2048 }, { 7, 826, 2048 }, { 8, 827, 2048 }, { 7, 828, 2048 }, { 8, 829, 2048 }, { 8, 830, 2048 }, { 9, 831, 2048 },
+ { 4, 832, 2048 }, { 5, 833, 2048 }, { 5, 834, 2048 }, { 6, 835, 2048 }, { 5, 836, 2048 }, { 6, 837, 2048 }, { 6, 838, 2048 }, { 7, 839, 2048 },
+ { 5, 840, 2048 }, { 6, 841, 2048 }, { 6, 842, 2048 }, { 7, 843, 2048 }, { 6, 844, 2048 }, { 7, 845, 2048 }, { 7, 846, 2048 }, { 8, 847, 2048 },
+ { 5, 848, 2048 }, { 6, 849, 2048 }, { 6, 850, 2048 }, { 7, 851, 2048 }, { 6, 852, 2048 }, { 7, 853, 2048 }, { 7, 854, 2048 }, { 8, 855, 2048 },
+ { 6, 856, 2048 }, { 7, 857, 2048 }, { 7, 858, 2048 }, { 8, 859, 2048 }, { 7, 860, 2048 }, { 8, 861, 2048 }, { 8, 862, 2048 }, { 9, 863, 2048 },
+ { 5, 864, 2048 }, { 6, 865, 2048 }, { 6, 866, 2048 }, { 7, 867, 2048 }, { 6, 868, 2048 }, { 7, 869, 2048 }, { 7, 870, 2048 }, { 8, 871, 2048 },
+ { 6, 872, 2048 }, { 7, 873, 2048 }, { 7, 874, 2048 }, { 8, 875, 2048 }, { 7, 876, 2048 }, { 8, 877, 2048 }, { 8, 878, 2048 }, { 9, 879, 2048 },
+ { 6, 880, 2048 }, { 7, 881, 2048 }, { 7, 882, 2048 }, { 8, 883, 2048 }, { 7, 884, 2048 }, { 8, 885, 2048 }, { 8, 886, 2048 }, { 9, 887, 2048 },
+ { 7, 888, 2048 }, { 8, 889, 2048 }, { 8, 890, 2048 }, { 9, 891, 2048 }, { 8, 892, 2048 }, { 9, 893, 2048 }, { 9, 894, 2048 }, { 10, 895, 2048 },
+ { 4, 896, 2048 }, { 5, 897, 2048 }, { 5, 898, 2048 }, { 6, 899, 2048 }, { 5, 900, 2048 }, { 6, 901, 2048 }, { 6, 902, 2048 }, { 7, 903, 2048 },
+ { 5, 904, 2048 }, { 6, 905, 2048 }, { 6, 906, 2048 }, { 7, 907, 2048 }, { 6, 908, 2048 }, { 7, 909, 2048 }, { 7, 910, 2048 }, { 8, 911, 2048 },
+ { 5, 912, 2048 }, { 6, 913, 2048 }, { 6, 914, 2048 }, { 7, 915, 2048 }, { 6, 916, 2048 }, { 7, 917, 2048 }, { 7, 918, 2048 }, { 8, 919, 2048 },
+ { 6, 920, 2048 }, { 7, 921, 2048 }, { 7, 922, 2048 }, { 8, 923, 2048 }, { 7, 924, 2048 }, { 8, 925, 2048 }, { 8, 926, 2048 }, { 9, 927, 2048 },
+ { 5, 928, 2048 }, { 6, 929, 2048 }, { 6, 930, 2048 }, { 7, 931, 2048 }, { 6, 932, 2048 }, { 7, 933, 2048 }, { 7, 934, 2048 }, { 8, 935, 2048 },
+ { 6, 936, 2048 }, { 7, 937, 2048 }, { 7, 938, 2048 }, { 8, 939, 2048 }, { 7, 940, 2048 }, { 8, 941, 2048 }, { 8, 942, 2048 }, { 9, 943, 2048 },
+ { 6, 944, 2048 }, { 7, 945, 2048 }, { 7, 946, 2048 }, { 8, 947, 2048 }, { 7, 948, 2048 }, { 8, 949, 2048 }, { 8, 950, 2048 }, { 9, 951, 2048 },
+ { 7, 952, 2048 }, { 8, 953, 2048 }, { 8, 954, 2048 }, { 9, 955, 2048 }, { 8, 956, 2048 }, { 9, 957, 2048 }, { 9, 958, 2048 }, { 10, 959, 2048 },
+ { 5, 960, 2048 }, { 6, 961, 2048 }, { 6, 962, 2048 }, { 7, 963, 2048 }, { 6, 964, 2048 }, { 7, 965, 2048 }, { 7, 966, 2048 }, { 8, 967, 2048 },
+ { 6, 968, 2048 }, { 7, 969, 2048 }, { 7, 970, 2048 }, { 8, 971, 2048 }, { 7, 972, 2048 }, { 8, 973, 2048 }, { 8, 974, 2048 }, { 9, 975, 2048 },
+ { 6, 976, 2048 }, { 7, 977, 2048 }, { 7, 978, 2048 }, { 8, 979, 2048 }, { 7, 980, 2048 }, { 8, 981, 2048 }, { 8, 982, 2048 }, { 9, 983, 2048 },
+ { 7, 984, 2048 }, { 8, 985, 2048 }, { 8, 986, 2048 }, { 9, 987, 2048 }, { 8, 988, 2048 }, { 9, 989, 2048 }, { 9, 990, 2048 }, { 10, 991, 2048 },
+ { 6, 992, 2048 }, { 7, 993, 2048 }, { 7, 994, 2048 }, { 8, 995, 2048 }, { 7, 996, 2048 }, { 8, 997, 2048 }, { 8, 998, 2048 }, { 9, 999, 2048 },
+ { 7, 1000, 2048 }, { 8, 1001, 2048 }, { 8, 1002, 2048 }, { 9, 1003, 2048 }, { 8, 1004, 2048 }, { 9, 1005, 2048 }, { 9, 1006, 2048 }, { 10, 1007, 2048 },
+ { 7, 1008, 2048 }, { 8, 1009, 2048 }, { 8, 1010, 2048 }, { 9, 1011, 2048 }, { 8, 1012, 2048 }, { 9, 1013, 2048 }, { 9, 1014, 2048 }, { 10, 1015, 2048 },
+ { 8, 1016, 2048 }, { 9, 1017, 2048 }, { 9, 1018, 2048 }, { 10, 1019, 2048 }, { 9, 1020, 2048 }, { 10, 1021, 2048 }, { 10, 1022, 2048 }, { 11, 1023, 2048 },
+ { 2, 1024, 2048 }, { 3, 1025, 2048 }, { 3, 1026, 2048 }, { 4, 1027, 2048 }, { 3, 1028, 2048 }, { 4, 1029, 2048 }, { 4, 1030, 2048 }, { 5, 1031, 2048 },
+ { 3, 1032, 2048 }, { 4, 1033, 2048 }, { 4, 1034, 2048 }, { 5, 1035, 2048 }, { 4, 1036, 2048 }, { 5, 1037, 2048 }, { 5, 1038, 2048 }, { 6, 1039, 2048 },
+ { 3, 1040, 2048 }, { 4, 1041, 2048 }, { 4, 1042, 2048 }, { 5, 1043, 2048 }, { 4, 1044, 2048 }, { 5, 1045, 2048 }, { 5, 1046, 2048 }, { 6, 1047, 2048 },
+ { 4, 1048, 2048 }, { 5, 1049, 2048 }, { 5, 1050, 2048 }, { 6, 1051, 2048 }, { 5, 1052, 2048 }, { 6, 1053, 2048 }, { 6, 1054, 2048 }, { 7, 1055, 2048 },
+ { 3, 1056, 2048 }, { 4, 1057, 2048 }, { 4, 1058, 2048 }, { 5, 1059, 2048 }, { 4, 1060, 2048 }, { 5, 1061, 2048 }, { 5, 1062, 2048 }, { 6, 1063, 2048 },
+ { 4, 1064, 2048 }, { 5, 1065, 2048 }, { 5, 1066, 2048 }, { 6, 1067, 2048 }, { 5, 1068, 2048 }, { 6, 1069, 2048 }, { 6, 1070, 2048 }, { 7, 1071, 2048 },
+ { 4, 1072, 2048 }, { 5, 1073, 2048 }, { 5, 1074, 2048 }, { 6, 1075, 2048 }, { 5, 1076, 2048 }, { 6, 1077, 2048 }, { 6, 1078, 2048 }, { 7, 1079, 2048 },
+ { 5, 1080, 2048 }, { 6, 1081, 2048 }, { 6, 1082, 2048 }, { 7, 1083, 2048 }, { 6, 1084, 2048 }, { 7, 1085, 2048 }, { 7, 1086, 2048 }, { 8, 1087, 2048 },
+ { 3, 1088, 2048 }, { 4, 1089, 2048 }, { 4, 1090, 2048 }, { 5, 1091, 2048 }, { 4, 1092, 2048 }, { 5, 1093, 2048 }, { 5, 1094, 2048 }, { 6, 1095, 2048 },
+ { 4, 1096, 2048 }, { 5, 1097, 2048 }, { 5, 1098, 2048 }, { 6, 1099, 2048 }, { 5, 1100, 2048 }, { 6, 1101, 2048 }, { 6, 1102, 2048 }, { 7, 1103, 2048 },
+ { 4, 1104, 2048 }, { 5, 1105, 2048 }, { 5, 1106, 2048 }, { 6, 1107, 2048 }, { 5, 1108, 2048 }, { 6, 1109, 2048 }, { 6, 1110, 2048 }, { 7, 1111, 2048 },
+ { 5, 1112, 2048 }, { 6, 1113, 2048 }, { 6, 1114, 2048 }, { 7, 1115, 2048 }, { 6, 1116, 2048 }, { 7, 1117, 2048 }, { 7, 1118, 2048 }, { 8, 1119, 2048 },
+ { 4, 1120, 2048 }, { 5, 1121, 2048 }, { 5, 1122, 2048 }, { 6, 1123, 2048 }, { 5, 1124, 2048 }, { 6, 1125, 2048 }, { 6, 1126, 2048 }, { 7, 1127, 2048 },
+ { 5, 1128, 2048 }, { 6, 1129, 2048 }, { 6, 1130, 2048 }, { 7, 1131, 2048 }, { 6, 1132, 2048 }, { 7, 1133, 2048 }, { 7, 1134, 2048 }, { 8, 1135, 2048 },
+ { 5, 1136, 2048 }, { 6, 1137, 2048 }, { 6, 1138, 2048 }, { 7, 1139, 2048 }, { 6, 1140, 2048 }, { 7, 1141, 2048 }, { 7, 1142, 2048 }, { 8, 1143, 2048 },
+ { 6, 1144, 2048 }, { 7, 1145, 2048 }, { 7, 1146, 2048 }, { 8, 1147, 2048 }, { 7, 1148, 2048 }, { 8, 1149, 2048 }, { 8, 1150, 2048 }, { 9, 1151, 2048 },
+ { 3, 1152, 2048 }, { 4, 1153, 2048 }, { 4, 1154, 2048 }, { 5, 1155, 2048 }, { 4, 1156, 2048 }, { 5, 1157, 2048 }, { 5, 1158, 2048 }, { 6, 1159, 2048 },
+ { 4, 1160, 2048 }, { 5, 1161, 2048 }, { 5, 1162, 2048 }, { 6, 1163, 2048 }, { 5, 1164, 2048 }, { 6, 1165, 2048 }, { 6, 1166, 2048 }, { 7, 1167, 2048 },
+ { 4, 1168, 2048 }, { 5, 1169, 2048 }, { 5, 1170, 2048 }, { 6, 1171, 2048 }, { 5, 1172, 2048 }, { 6, 1173, 2048 }, { 6, 1174, 2048 }, { 7, 1175, 2048 },
+ { 5, 1176, 2048 }, { 6, 1177, 2048 }, { 6, 1178, 2048 }, { 7, 1179, 2048 }, { 6, 1180, 2048 }, { 7, 1181, 2048 }, { 7, 1182, 2048 }, { 8, 1183, 2048 },
+ { 4, 1184, 2048 }, { 5, 1185, 2048 }, { 5, 1186, 2048 }, { 6, 1187, 2048 }, { 5, 1188, 2048 }, { 6, 1189, 2048 }, { 6, 1190, 2048 }, { 7, 1191, 2048 },
+ { 5, 1192, 2048 }, { 6, 1193, 2048 }, { 6, 1194, 2048 }, { 7, 1195, 2048 }, { 6, 1196, 2048 }, { 7, 1197, 2048 }, { 7, 1198, 2048 }, { 8, 1199, 2048 },
+ { 5, 1200, 2048 }, { 6, 1201, 2048 }, { 6, 1202, 2048 }, { 7, 1203, 2048 }, { 6, 1204, 2048 }, { 7, 1205, 2048 }, { 7, 1206, 2048 }, { 8, 1207, 2048 },
+ { 6, 1208, 2048 }, { 7, 1209, 2048 }, { 7, 1210, 2048 }, { 8, 1211, 2048 }, { 7, 1212, 2048 }, { 8, 1213, 2048 }, { 8, 1214, 2048 }, { 9, 1215, 2048 },
+ { 4, 1216, 2048 }, { 5, 1217, 2048 }, { 5, 1218, 2048 }, { 6, 1219, 2048 }, { 5, 1220, 2048 }, { 6, 1221, 2048 }, { 6, 1222, 2048 }, { 7, 1223, 2048 },
+ { 5, 1224, 2048 }, { 6, 1225, 2048 }, { 6, 1226, 2048 }, { 7, 1227, 2048 }, { 6, 1228, 2048 }, { 7, 1229, 2048 }, { 7, 1230, 2048 }, { 8, 1231, 2048 },
+ { 5, 1232, 2048 }, { 6, 1233, 2048 }, { 6, 1234, 2048 }, { 7, 1235, 2048 }, { 6, 1236, 2048 }, { 7, 1237, 2048 }, { 7, 1238, 2048 }, { 8, 1239, 2048 },
+ { 6, 1240, 2048 }, { 7, 1241, 2048 }, { 7, 1242, 2048 }, { 8, 1243, 2048 }, { 7, 1244, 2048 }, { 8, 1245, 2048 }, { 8, 1246, 2048 }, { 9, 1247, 2048 },
+ { 5, 1248, 2048 }, { 6, 1249, 2048 }, { 6, 1250, 2048 }, { 7, 1251, 2048 }, { 6, 1252, 2048 }, { 7, 1253, 2048 }, { 7, 1254, 2048 }, { 8, 1255, 2048 },
+ { 6, 1256, 2048 }, { 7, 1257, 2048 }, { 7, 1258, 2048 }, { 8, 1259, 2048 }, { 7, 1260, 2048 }, { 8, 1261, 2048 }, { 8, 1262, 2048 }, { 9, 1263, 2048 },
+ { 6, 1264, 2048 }, { 7, 1265, 2048 }, { 7, 1266, 2048 }, { 8, 1267, 2048 }, { 7, 1268, 2048 }, { 8, 1269, 2048 }, { 8, 1270, 2048 }, { 9, 1271, 2048 },
+ { 7, 1272, 2048 }, { 8, 1273, 2048 }, { 8, 1274, 2048 }, { 9, 1275, 2048 }, { 8, 1276, 2048 }, { 9, 1277, 2048 }, { 9, 1278, 2048 }, { 10, 1279, 2048 },
+ { 3, 1280, 2048 }, { 4, 1281, 2048 }, { 4, 1282, 2048 }, { 5, 1283, 2048 }, { 4, 1284, 2048 }, { 5, 1285, 2048 }, { 5, 1286, 2048 }, { 6, 1287, 2048 },
+ { 4, 1288, 2048 }, { 5, 1289, 2048 }, { 5, 1290, 2048 }, { 6, 1291, 2048 }, { 5, 1292, 2048 }, { 6, 1293, 2048 }, { 6, 1294, 2048 }, { 7, 1295, 2048 },
+ { 4, 1296, 2048 }, { 5, 1297, 2048 }, { 5, 1298, 2048 }, { 6, 1299, 2048 }, { 5, 1300, 2048 }, { 6, 1301, 2048 }, { 6, 1302, 2048 }, { 7, 1303, 2048 },
+ { 5, 1304, 2048 }, { 6, 1305, 2048 }, { 6, 1306, 2048 }, { 7, 1307, 2048 }, { 6, 1308, 2048 }, { 7, 1309, 2048 }, { 7, 1310, 2048 }, { 8, 1311, 2048 },
+ { 4, 1312, 2048 }, { 5, 1313, 2048 }, { 5, 1314, 2048 }, { 6, 1315, 2048 }, { 5, 1316, 2048 }, { 6, 1317, 2048 }, { 6, 1318, 2048 }, { 7, 1319, 2048 },
+ { 5, 1320, 2048 }, { 6, 1321, 2048 }, { 6, 1322, 2048 }, { 7, 1323, 2048 }, { 6, 1324, 2048 }, { 7, 1325, 2048 }, { 7, 1326, 2048 }, { 8, 1327, 2048 },
+ { 5, 1328, 2048 }, { 6, 1329, 2048 }, { 6, 1330, 2048 }, { 7, 1331, 2048 }, { 6, 1332, 2048 }, { 7, 1333, 2048 }, { 7, 1334, 2048 }, { 8, 1335, 2048 },
+ { 6, 1336, 2048 }, { 7, 1337, 2048 }, { 7, 1338, 2048 }, { 8, 1339, 2048 }, { 7, 1340, 2048 }, { 8, 1341, 2048 }, { 8, 1342, 2048 }, { 9, 1343, 2048 },
+ { 4, 1344, 2048 }, { 5, 1345, 2048 }, { 5, 1346, 2048 }, { 6, 1347, 2048 }, { 5, 1348, 2048 }, { 6, 1349, 2048 }, { 6, 1350, 2048 }, { 7, 1351, 2048 },
+ { 5, 1352, 2048 }, { 6, 1353, 2048 }, { 6, 1354, 2048 }, { 7, 1355, 2048 }, { 6, 1356, 2048 }, { 7, 1357, 2048 }, { 7, 1358, 2048 }, { 8, 1359, 2048 },
+ { 5, 1360, 2048 }, { 6, 1361, 2048 }, { 6, 1362, 2048 }, { 7, 1363, 2048 }, { 6, 1364, 2048 }, { 7, 1365, 2048 }, { 7, 1366, 2048 }, { 8, 1367, 2048 },
+ { 6, 1368, 2048 }, { 7, 1369, 2048 }, { 7, 1370, 2048 }, { 8, 1371, 2048 }, { 7, 1372, 2048 }, { 8, 1373, 2048 }, { 8, 1374, 2048 }, { 9, 1375, 2048 },
+ { 5, 1376, 2048 }, { 6, 1377, 2048 }, { 6, 1378, 2048 }, { 7, 1379, 2048 }, { 6, 1380, 2048 }, { 7, 1381, 2048 }, { 7, 1382, 2048 }, { 8, 1383, 2048 },
+ { 6, 1384, 2048 }, { 7, 1385, 2048 }, { 7, 1386, 2048 }, { 8, 1387, 2048 }, { 7, 1388, 2048 }, { 8, 1389, 2048 }, { 8, 1390, 2048 }, { 9, 1391, 2048 },
+ { 6, 1392, 2048 }, { 7, 1393, 2048 }, { 7, 1394, 2048 }, { 8, 1395, 2048 }, { 7, 1396, 2048 }, { 8, 1397, 2048 }, { 8, 1398, 2048 }, { 9, 1399, 2048 },
+ { 7, 1400, 2048 }, { 8, 1401, 2048 }, { 8, 1402, 2048 }, { 9, 1403, 2048 }, { 8, 1404, 2048 }, { 9, 1405, 2048 }, { 9, 1406, 2048 }, { 10, 1407, 2048 },
+ { 4, 1408, 2048 }, { 5, 1409, 2048 }, { 5, 1410, 2048 }, { 6, 1411, 2048 }, { 5, 1412, 2048 }, { 6, 1413, 2048 }, { 6, 1414, 2048 }, { 7, 1415, 2048 },
+ { 5, 1416, 2048 }, { 6, 1417, 2048 }, { 6, 1418, 2048 }, { 7, 1419, 2048 }, { 6, 1420, 2048 }, { 7, 1421, 2048 }, { 7, 1422, 2048 }, { 8, 1423, 2048 },
+ { 5, 1424, 2048 }, { 6, 1425, 2048 }, { 6, 1426, 2048 }, { 7, 1427, 2048 }, { 6, 1428, 2048 }, { 7, 1429, 2048 }, { 7, 1430, 2048 }, { 8, 1431, 2048 },
+ { 6, 1432, 2048 }, { 7, 1433, 2048 }, { 7, 1434, 2048 }, { 8, 1435, 2048 }, { 7, 1436, 2048 }, { 8, 1437, 2048 }, { 8, 1438, 2048 }, { 9, 1439, 2048 },
+ { 5, 1440, 2048 }, { 6, 1441, 2048 }, { 6, 1442, 2048 }, { 7, 1443, 2048 }, { 6, 1444, 2048 }, { 7, 1445, 2048 }, { 7, 1446, 2048 }, { 8, 1447, 2048 },
+ { 6, 1448, 2048 }, { 7, 1449, 2048 }, { 7, 1450, 2048 }, { 8, 1451, 2048 }, { 7, 1452, 2048 }, { 8, 1453, 2048 }, { 8, 1454, 2048 }, { 9, 1455, 2048 },
+ { 6, 1456, 2048 }, { 7, 1457, 2048 }, { 7, 1458, 2048 }, { 8, 1459, 2048 }, { 7, 1460, 2048 }, { 8, 1461, 2048 }, { 8, 1462, 2048 }, { 9, 1463, 2048 },
+ { 7, 1464, 2048 }, { 8, 1465, 2048 }, { 8, 1466, 2048 }, { 9, 1467, 2048 }, { 8, 1468, 2048 }, { 9, 1469, 2048 }, { 9, 1470, 2048 }, { 10, 1471, 2048 },
+ { 5, 1472, 2048 }, { 6, 1473, 2048 }, { 6, 1474, 2048 }, { 7, 1475, 2048 }, { 6, 1476, 2048 }, { 7, 1477, 2048 }, { 7, 1478, 2048 }, { 8, 1479, 2048 },
+ { 6, 1480, 2048 }, { 7, 1481, 2048 }, { 7, 1482, 2048 }, { 8, 1483, 2048 }, { 7, 1484, 2048 }, { 8, 1485, 2048 }, { 8, 1486, 2048 }, { 9, 1487, 2048 },
+ { 6, 1488, 2048 }, { 7, 1489, 2048 }, { 7, 1490, 2048 }, { 8, 1491, 2048 }, { 7, 1492, 2048 }, { 8, 1493, 2048 }, { 8, 1494, 2048 }, { 9, 1495, 2048 },
+ { 7, 1496, 2048 }, { 8, 1497, 2048 }, { 8, 1498, 2048 }, { 9, 1499, 2048 }, { 8, 1500, 2048 }, { 9, 1501, 2048 }, { 9, 1502, 2048 }, { 10, 1503, 2048 },
+ { 6, 1504, 2048 }, { 7, 1505, 2048 }, { 7, 1506, 2048 }, { 8, 1507, 2048 }, { 7, 1508, 2048 }, { 8, 1509, 2048 }, { 8, 1510, 2048 }, { 9, 1511, 2048 },
+ { 7, 1512, 2048 }, { 8, 1513, 2048 }, { 8, 1514, 2048 }, { 9, 1515, 2048 }, { 8, 1516, 2048 }, { 9, 1517, 2048 }, { 9, 1518, 2048 }, { 10, 1519, 2048 },
+ { 7, 1520, 2048 }, { 8, 1521, 2048 }, { 8, 1522, 2048 }, { 9, 1523, 2048 }, { 8, 1524, 2048 }, { 9, 1525, 2048 }, { 9, 1526, 2048 }, { 10, 1527, 2048 },
+ { 8, 1528, 2048 }, { 9, 1529, 2048 }, { 9, 1530, 2048 }, { 10, 1531, 2048 }, { 9, 1532, 2048 }, { 10, 1533, 2048 }, { 10, 1534, 2048 }, { 11, 1535, 2048 },
+ { 3, 1536, 2048 }, { 4, 1537, 2048 }, { 4, 1538, 2048 }, { 5, 1539, 2048 }, { 4, 1540, 2048 }, { 5, 1541, 2048 }, { 5, 1542, 2048 }, { 6, 1543, 2048 },
+ { 4, 1544, 2048 }, { 5, 1545, 2048 }, { 5, 1546, 2048 }, { 6, 1547, 2048 }, { 5, 1548, 2048 }, { 6, 1549, 2048 }, { 6, 1550, 2048 }, { 7, 1551, 2048 },
+ { 4, 1552, 2048 }, { 5, 1553, 2048 }, { 5, 1554, 2048 }, { 6, 1555, 2048 }, { 5, 1556, 2048 }, { 6, 1557, 2048 }, { 6, 1558, 2048 }, { 7, 1559, 2048 },
+ { 5, 1560, 2048 }, { 6, 1561, 2048 }, { 6, 1562, 2048 }, { 7, 1563, 2048 }, { 6, 1564, 2048 }, { 7, 1565, 2048 }, { 7, 1566, 2048 }, { 8, 1567, 2048 },
+ { 4, 1568, 2048 }, { 5, 1569, 2048 }, { 5, 1570, 2048 }, { 6, 1571, 2048 }, { 5, 1572, 2048 }, { 6, 1573, 2048 }, { 6, 1574, 2048 }, { 7, 1575, 2048 },
+ { 5, 1576, 2048 }, { 6, 1577, 2048 }, { 6, 1578, 2048 }, { 7, 1579, 2048 }, { 6, 1580, 2048 }, { 7, 1581, 2048 }, { 7, 1582, 2048 }, { 8, 1583, 2048 },
+ { 5, 1584, 2048 }, { 6, 1585, 2048 }, { 6, 1586, 2048 }, { 7, 1587, 2048 }, { 6, 1588, 2048 }, { 7, 1589, 2048 }, { 7, 1590, 2048 }, { 8, 1591, 2048 },
+ { 6, 1592, 2048 }, { 7, 1593, 2048 }, { 7, 1594, 2048 }, { 8, 1595, 2048 }, { 7, 1596, 2048 }, { 8, 1597, 2048 }, { 8, 1598, 2048 }, { 9, 1599, 2048 },
+ { 4, 1600, 2048 }, { 5, 1601, 2048 }, { 5, 1602, 2048 }, { 6, 1603, 2048 }, { 5, 1604, 2048 }, { 6, 1605, 2048 }, { 6, 1606, 2048 }, { 7, 1607, 2048 },
+ { 5, 1608, 2048 }, { 6, 1609, 2048 }, { 6, 1610, 2048 }, { 7, 1611, 2048 }, { 6, 1612, 2048 }, { 7, 1613, 2048 }, { 7, 1614, 2048 }, { 8, 1615, 2048 },
+ { 5, 1616, 2048 }, { 6, 1617, 2048 }, { 6, 1618, 2048 }, { 7, 1619, 2048 }, { 6, 1620, 2048 }, { 7, 1621, 2048 }, { 7, 1622, 2048 }, { 8, 1623, 2048 },
+ { 6, 1624, 2048 }, { 7, 1625, 2048 }, { 7, 1626, 2048 }, { 8, 1627, 2048 }, { 7, 1628, 2048 }, { 8, 1629, 2048 }, { 8, 1630, 2048 }, { 9, 1631, 2048 },
+ { 5, 1632, 2048 }, { 6, 1633, 2048 }, { 6, 1634, 2048 }, { 7, 1635, 2048 }, { 6, 1636, 2048 }, { 7, 1637, 2048 }, { 7, 1638, 2048 }, { 8, 1639, 2048 },
+ { 6, 1640, 2048 }, { 7, 1641, 2048 }, { 7, 1642, 2048 }, { 8, 1643, 2048 }, { 7, 1644, 2048 }, { 8, 1645, 2048 }, { 8, 1646, 2048 }, { 9, 1647, 2048 },
+ { 6, 1648, 2048 }, { 7, 1649, 2048 }, { 7, 1650, 2048 }, { 8, 1651, 2048 }, { 7, 1652, 2048 }, { 8, 1653, 2048 }, { 8, 1654, 2048 }, { 9, 1655, 2048 },
+ { 7, 1656, 2048 }, { 8, 1657, 2048 }, { 8, 1658, 2048 }, { 9, 1659, 2048 }, { 8, 1660, 2048 }, { 9, 1661, 2048 }, { 9, 1662, 2048 }, { 10, 1663, 2048 },
+ { 4, 1664, 2048 }, { 5, 1665, 2048 }, { 5, 1666, 2048 }, { 6, 1667, 2048 }, { 5, 1668, 2048 }, { 6, 1669, 2048 }, { 6, 1670, 2048 }, { 7, 1671, 2048 },
+ { 5, 1672, 2048 }, { 6, 1673, 2048 }, { 6, 1674, 2048 }, { 7, 1675, 2048 }, { 6, 1676, 2048 }, { 7, 1677, 2048 }, { 7, 1678, 2048 }, { 8, 1679, 2048 },
+ { 5, 1680, 2048 }, { 6, 1681, 2048 }, { 6, 1682, 2048 }, { 7, 1683, 2048 }, { 6, 1684, 2048 }, { 7, 1685, 2048 }, { 7, 1686, 2048 }, { 8, 1687, 2048 },
+ { 6, 1688, 2048 }, { 7, 1689, 2048 }, { 7, 1690, 2048 }, { 8, 1691, 2048 }, { 7, 1692, 2048 }, { 8, 1693, 2048 }, { 8, 1694, 2048 }, { 9, 1695, 2048 },
+ { 5, 1696, 2048 }, { 6, 1697, 2048 }, { 6, 1698, 2048 }, { 7, 1699, 2048 }, { 6, 1700, 2048 }, { 7, 1701, 2048 }, { 7, 1702, 2048 }, { 8, 1703, 2048 },
+ { 6, 1704, 2048 }, { 7, 1705, 2048 }, { 7, 1706, 2048 }, { 8, 1707, 2048 }, { 7, 1708, 2048 }, { 8, 1709, 2048 }, { 8, 1710, 2048 }, { 9, 1711, 2048 },
+ { 6, 1712, 2048 }, { 7, 1713, 2048 }, { 7, 1714, 2048 }, { 8, 1715, 2048 }, { 7, 1716, 2048 }, { 8, 1717, 2048 }, { 8, 1718, 2048 }, { 9, 1719, 2048 },
+ { 7, 1720, 2048 }, { 8, 1721, 2048 }, { 8, 1722, 2048 }, { 9, 1723, 2048 }, { 8, 1724, 2048 }, { 9, 1725, 2048 }, { 9, 1726, 2048 }, { 10, 1727, 2048 },
+ { 5, 1728, 2048 }, { 6, 1729, 2048 }, { 6, 1730, 2048 }, { 7, 1731, 2048 }, { 6, 1732, 2048 }, { 7, 1733, 2048 }, { 7, 1734, 2048 }, { 8, 1735, 2048 },
+ { 6, 1736, 2048 }, { 7, 1737, 2048 }, { 7, 1738, 2048 }, { 8, 1739, 2048 }, { 7, 1740, 2048 }, { 8, 1741, 2048 }, { 8, 1742, 2048 }, { 9, 1743, 2048 },
+ { 6, 1744, 2048 }, { 7, 1745, 2048 }, { 7, 1746, 2048 }, { 8, 1747, 2048 }, { 7, 1748, 2048 }, { 8, 1749, 2048 }, { 8, 1750, 2048 }, { 9, 1751, 2048 },
+ { 7, 1752, 2048 }, { 8, 1753, 2048 }, { 8, 1754, 2048 }, { 9, 1755, 2048 }, { 8, 1756, 2048 }, { 9, 1757, 2048 }, { 9, 1758, 2048 }, { 10, 1759, 2048 },
+ { 6, 1760, 2048 }, { 7, 1761, 2048 }, { 7, 1762, 2048 }, { 8, 1763, 2048 }, { 7, 1764, 2048 }, { 8, 1765, 2048 }, { 8, 1766, 2048 }, { 9, 1767, 2048 },
+ { 7, 1768, 2048 }, { 8, 1769, 2048 }, { 8, 1770, 2048 }, { 9, 1771, 2048 }, { 8, 1772, 2048 }, { 9, 1773, 2048 }, { 9, 1774, 2048 }, { 10, 1775, 2048 },
+ { 7, 1776, 2048 }, { 8, 1777, 2048 }, { 8, 1778, 2048 }, { 9, 1779, 2048 }, { 8, 1780, 2048 }, { 9, 1781, 2048 }, { 9, 1782, 2048 }, { 10, 1783, 2048 },
+ { 8, 1784, 2048 }, { 9, 1785, 2048 }, { 9, 1786, 2048 }, { 10, 1787, 2048 }, { 9, 1788, 2048 }, { 10, 1789, 2048 }, { 10, 1790, 2048 }, { 11, 1791, 2048 },
+ { 4, 1792, 2048 }, { 5, 1793, 2048 }, { 5, 1794, 2048 }, { 6, 1795, 2048 }, { 5, 1796, 2048 }, { 6, 1797, 2048 }, { 6, 1798, 2048 }, { 7, 1799, 2048 },
+ { 5, 1800, 2048 }, { 6, 1801, 2048 }, { 6, 1802, 2048 }, { 7, 1803, 2048 }, { 6, 1804, 2048 }, { 7, 1805, 2048 }, { 7, 1806, 2048 }, { 8, 1807, 2048 },
+ { 5, 1808, 2048 }, { 6, 1809, 2048 }, { 6, 1810, 2048 }, { 7, 1811, 2048 }, { 6, 1812, 2048 }, { 7, 1813, 2048 }, { 7, 1814, 2048 }, { 8, 1815, 2048 },
+ { 6, 1816, 2048 }, { 7, 1817, 2048 }, { 7, 1818, 2048 }, { 8, 1819, 2048 }, { 7, 1820, 2048 }, { 8, 1821, 2048 }, { 8, 1822, 2048 }, { 9, 1823, 2048 },
+ { 5, 1824, 2048 }, { 6, 1825, 2048 }, { 6, 1826, 2048 }, { 7, 1827, 2048 }, { 6, 1828, 2048 }, { 7, 1829, 2048 }, { 7, 1830, 2048 }, { 8, 1831, 2048 },
+ { 6, 1832, 2048 }, { 7, 1833, 2048 }, { 7, 1834, 2048 }, { 8, 1835, 2048 }, { 7, 1836, 2048 }, { 8, 1837, 2048 }, { 8, 1838, 2048 }, { 9, 1839, 2048 },
+ { 6, 1840, 2048 }, { 7, 1841, 2048 }, { 7, 1842, 2048 }, { 8, 1843, 2048 }, { 7, 1844, 2048 }, { 8, 1845, 2048 }, { 8, 1846, 2048 }, { 9, 1847, 2048 },
+ { 7, 1848, 2048 }, { 8, 1849, 2048 }, { 8, 1850, 2048 }, { 9, 1851, 2048 }, { 8, 1852, 2048 }, { 9, 1853, 2048 }, { 9, 1854, 2048 }, { 10, 1855, 2048 },
+ { 5, 1856, 2048 }, { 6, 1857, 2048 }, { 6, 1858, 2048 }, { 7, 1859, 2048 }, { 6, 1860, 2048 }, { 7, 1861, 2048 }, { 7, 1862, 2048 }, { 8, 1863, 2048 },
+ { 6, 1864, 2048 }, { 7, 1865, 2048 }, { 7, 1866, 2048 }, { 8, 1867, 2048 }, { 7, 1868, 2048 }, { 8, 1869, 2048 }, { 8, 1870, 2048 }, { 9, 1871, 2048 },
+ { 6, 1872, 2048 }, { 7, 1873, 2048 }, { 7, 1874, 2048 }, { 8, 1875, 2048 }, { 7, 1876, 2048 }, { 8, 1877, 2048 }, { 8, 1878, 2048 }, { 9, 1879, 2048 },
+ { 7, 1880, 2048 }, { 8, 1881, 2048 }, { 8, 1882, 2048 }, { 9, 1883, 2048 }, { 8, 1884, 2048 }, { 9, 1885, 2048 }, { 9, 1886, 2048 }, { 10, 1887, 2048 },
+ { 6, 1888, 2048 }, { 7, 1889, 2048 }, { 7, 1890, 2048 }, { 8, 1891, 2048 }, { 7, 1892, 2048 }, { 8, 1893, 2048 }, { 8, 1894, 2048 }, { 9, 1895, 2048 },
+ { 7, 1896, 2048 }, { 8, 1897, 2048 }, { 8, 1898, 2048 }, { 9, 1899, 2048 }, { 8, 1900, 2048 }, { 9, 1901, 2048 }, { 9, 1902, 2048 }, { 10, 1903, 2048 },
+ { 7, 1904, 2048 }, { 8, 1905, 2048 }, { 8, 1906, 2048 }, { 9, 1907, 2048 }, { 8, 1908, 2048 }, { 9, 1909, 2048 }, { 9, 1910, 2048 }, { 10, 1911, 2048 },
+ { 8, 1912, 2048 }, { 9, 1913, 2048 }, { 9, 1914, 2048 }, { 10, 1915, 2048 }, { 9, 1916, 2048 }, { 10, 1917, 2048 }, { 10, 1918, 2048 }, { 11, 1919, 2048 },
+ { 5, 1920, 2048 }, { 6, 1921, 2048 }, { 6, 1922, 2048 }, { 7, 1923, 2048 }, { 6, 1924, 2048 }, { 7, 1925, 2048 }, { 7, 1926, 2048 }, { 8, 1927, 2048 },
+ { 6, 1928, 2048 }, { 7, 1929, 2048 }, { 7, 1930, 2048 }, { 8, 1931, 2048 }, { 7, 1932, 2048 }, { 8, 1933, 2048 }, { 8, 1934, 2048 }, { 9, 1935, 2048 },
+ { 6, 1936, 2048 }, { 7, 1937, 2048 }, { 7, 1938, 2048 }, { 8, 1939, 2048 }, { 7, 1940, 2048 }, { 8, 1941, 2048 }, { 8, 1942, 2048 }, { 9, 1943, 2048 },
+ { 7, 1944, 2048 }, { 8, 1945, 2048 }, { 8, 1946, 2048 }, { 9, 1947, 2048 }, { 8, 1948, 2048 }, { 9, 1949, 2048 }, { 9, 1950, 2048 }, { 10, 1951, 2048 },
+ { 6, 1952, 2048 }, { 7, 1953, 2048 }, { 7, 1954, 2048 }, { 8, 1955, 2048 }, { 7, 1956, 2048 }, { 8, 1957, 2048 }, { 8, 1958, 2048 }, { 9, 1959, 2048 },
+ { 7, 1960, 2048 }, { 8, 1961, 2048 }, { 8, 1962, 2048 }, { 9, 1963, 2048 }, { 8, 1964, 2048 }, { 9, 1965, 2048 }, { 9, 1966, 2048 }, { 10, 1967, 2048 },
+ { 7, 1968, 2048 }, { 8, 1969, 2048 }, { 8, 1970, 2048 }, { 9, 1971, 2048 }, { 8, 1972, 2048 }, { 9, 1973, 2048 }, { 9, 1974, 2048 }, { 10, 1975, 2048 },
+ { 8, 1976, 2048 }, { 9, 1977, 2048 }, { 9, 1978, 2048 }, { 10, 1979, 2048 }, { 9, 1980, 2048 }, { 10, 1981, 2048 }, { 10, 1982, 2048 }, { 11, 1983, 2048 },
+ { 6, 1984, 2048 }, { 7, 1985, 2048 }, { 7, 1986, 2048 }, { 8, 1987, 2048 }, { 7, 1988, 2048 }, { 8, 1989, 2048 }, { 8, 1990, 2048 }, { 9, 1991, 2048 },
+ { 7, 1992, 2048 }, { 8, 1993, 2048 }, { 8, 1994, 2048 }, { 9, 1995, 2048 }, { 8, 1996, 2048 }, { 9, 1997, 2048 }, { 9, 1998, 2048 }, { 10, 1999, 2048 },
+ { 7, 2000, 2048 }, { 8, 2001, 2048 }, { 8, 2002, 2048 }, { 9, 2003, 2048 }, { 8, 2004, 2048 }, { 9, 2005, 2048 }, { 9, 2006, 2048 }, { 10, 2007, 2048 },
+ { 8, 2008, 2048 }, { 9, 2009, 2048 }, { 9, 2010, 2048 }, { 10, 2011, 2048 }, { 9, 2012, 2048 }, { 10, 2013, 2048 }, { 10, 2014, 2048 }, { 11, 2015, 2048 },
+ { 7, 2016, 2048 }, { 8, 2017, 2048 }, { 8, 2018, 2048 }, { 9, 2019, 2048 }, { 8, 2020, 2048 }, { 9, 2021, 2048 }, { 9, 2022, 2048 }, { 10, 2023, 2048 },
+ { 8, 2024, 2048 }, { 9, 2025, 2048 }, { 9, 2026, 2048 }, { 10, 2027, 2048 }, { 9, 2028, 2048 }, { 10, 2029, 2048 }, { 10, 2030, 2048 }, { 11, 2031, 2048 },
+ { 8, 2032, 2048 }, { 9, 2033, 2048 }, { 9, 2034, 2048 }, { 10, 2035, 2048 }, { 9, 2036, 2048 }, { 10, 2037, 2048 }, { 10, 2038, 2048 }, { 11, 2039, 2048 },
+ { 9, 2040, 2048 }, { 10, 2041, 2048 }, { 10, 2042, 2048 }, { 11, 2043, 2048 }, { 10, 2044, 2048 }, { 11, 2045, 2048 }, { 11, 2046, 2048 }, { 12, 2047, 2048 },
#endif
#endif
#endif
@@ -3462,6 +8636,7 @@ static const struct {
#endif
};
+
/* find a hole and free as required, return -1 if no hole found */
static int find_hole(void)
{
@@ -3484,10 +8659,10 @@ static int find_hole(void)
/* free entry z */
if (z >= 0 && fp_cache[z].g) {
mp_clear(&fp_cache[z].mu);
- ecc_del_point(fp_cache[z].g);
+ wc_ecc_del_point(fp_cache[z].g);
fp_cache[z].g = NULL;
for (x = 0; x < (1U<<FP_LUT); x++) {
- ecc_del_point(fp_cache[z].LUT[x]);
+ wc_ecc_del_point(fp_cache[z].LUT[x]);
fp_cache[z].LUT[x] = NULL;
}
fp_cache[z].lru_count = 0;
@@ -3500,9 +8675,9 @@ static int find_base(ecc_point* g)
{
int x;
for (x = 0; x < FP_ENTRIES; x++) {
- if (fp_cache[x].g != NULL &&
- mp_cmp(fp_cache[x].g->x, g->x) == MP_EQ &&
- mp_cmp(fp_cache[x].g->y, g->y) == MP_EQ &&
+ if (fp_cache[x].g != NULL &&
+ mp_cmp(fp_cache[x].g->x, g->x) == MP_EQ &&
+ mp_cmp(fp_cache[x].g->y, g->y) == MP_EQ &&
mp_cmp(fp_cache[x].g->z, g->z) == MP_EQ) {
break;
}
@@ -3519,7 +8694,7 @@ static int add_entry(int idx, ecc_point *g)
unsigned x, y;
/* allocate base and LUT */
- fp_cache[idx].g = ecc_new_point();
+ fp_cache[idx].g = wc_ecc_new_point();
if (fp_cache[idx].g == NULL) {
return GEN_MEM_ERR;
}
@@ -3528,38 +8703,42 @@ static int add_entry(int idx, ecc_point *g)
if ((mp_copy(g->x, fp_cache[idx].g->x) != MP_OKAY) ||
(mp_copy(g->y, fp_cache[idx].g->y) != MP_OKAY) ||
(mp_copy(g->z, fp_cache[idx].g->z) != MP_OKAY)) {
- ecc_del_point(fp_cache[idx].g);
+ wc_ecc_del_point(fp_cache[idx].g);
fp_cache[idx].g = NULL;
return GEN_MEM_ERR;
- }
+ }
for (x = 0; x < (1U<<FP_LUT); x++) {
- fp_cache[idx].LUT[x] = ecc_new_point();
+ fp_cache[idx].LUT[x] = wc_ecc_new_point();
if (fp_cache[idx].LUT[x] == NULL) {
for (y = 0; y < x; y++) {
- ecc_del_point(fp_cache[idx].LUT[y]);
+ wc_ecc_del_point(fp_cache[idx].LUT[y]);
fp_cache[idx].LUT[y] = NULL;
}
- ecc_del_point(fp_cache[idx].g);
+ wc_ecc_del_point(fp_cache[idx].g);
fp_cache[idx].g = NULL;
fp_cache[idx].lru_count = 0;
return GEN_MEM_ERR;
}
}
-
+
fp_cache[idx].lru_count = 0;
return MP_OKAY;
}
+#endif
-/* build the LUT by spacing the bits of the input by #modulus/FP_LUT bits apart
- *
- * The algorithm builds patterns in increasing bit order by first making all
+#ifndef WOLFSSL_SP_MATH
+/* build the LUT by spacing the bits of the input by #modulus/FP_LUT bits apart
+ *
+ * The algorithm builds patterns in increasing bit order by first making all
* single bit input patterns, then all two bit input patterns and so on
*/
-static int build_lut(int idx, mp_int* modulus, mp_digit* mp, mp_int* mu)
-{
- unsigned x, y, err, bitlen, lut_gap;
+static int build_lut(int idx, mp_int* a, mp_int* modulus, mp_digit mp,
+ mp_int* mu)
+{
+ int err;
+ unsigned x, y, bitlen, lut_gap;
mp_int tmp;
if (mp_init(&tmp) != MP_OKAY)
@@ -3570,82 +8749,80 @@ static int build_lut(int idx, mp_int* modulus, mp_digit* mp, mp_int* mu)
if ((sizeof(lut_orders) / sizeof(lut_orders[0])) < (1U<<FP_LUT)) {
err = BAD_FUNC_ARG;
}
- else {
+ else {
/* get bitlen and round up to next multiple of FP_LUT */
bitlen = mp_unsigned_bin_size(modulus) << 3;
x = bitlen % FP_LUT;
if (x) {
bitlen += FP_LUT - x;
- }
+ }
lut_gap = bitlen / FP_LUT;
/* init the mu */
err = mp_init_copy(&fp_cache[idx].mu, mu);
}
-
+
/* copy base */
if (err == MP_OKAY) {
if ((mp_mulmod(fp_cache[idx].g->x, mu, modulus,
- fp_cache[idx].LUT[1]->x) != MP_OKAY) ||
+ fp_cache[idx].LUT[1]->x) != MP_OKAY) ||
(mp_mulmod(fp_cache[idx].g->y, mu, modulus,
- fp_cache[idx].LUT[1]->y) != MP_OKAY) ||
+ fp_cache[idx].LUT[1]->y) != MP_OKAY) ||
(mp_mulmod(fp_cache[idx].g->z, mu, modulus,
fp_cache[idx].LUT[1]->z) != MP_OKAY)) {
- err = MP_MULMOD_E;
+ err = MP_MULMOD_E;
}
}
-
+
/* make all single bit entries */
for (x = 1; x < FP_LUT; x++) {
if (err != MP_OKAY)
break;
if ((mp_copy(fp_cache[idx].LUT[1<<(x-1)]->x,
- fp_cache[idx].LUT[1<<x]->x) != MP_OKAY) ||
+ fp_cache[idx].LUT[1<<x]->x) != MP_OKAY) ||
(mp_copy(fp_cache[idx].LUT[1<<(x-1)]->y,
- fp_cache[idx].LUT[1<<x]->y) != MP_OKAY) ||
+ fp_cache[idx].LUT[1<<x]->y) != MP_OKAY) ||
(mp_copy(fp_cache[idx].LUT[1<<(x-1)]->z,
fp_cache[idx].LUT[1<<x]->z) != MP_OKAY)){
err = MP_INIT_E;
break;
} else {
-
+
/* now double it bitlen/FP_LUT times */
for (y = 0; y < lut_gap; y++) {
if ((err = ecc_projective_dbl_point(fp_cache[idx].LUT[1<<x],
- fp_cache[idx].LUT[1<<x], modulus, mp)) != MP_OKAY) {
+ fp_cache[idx].LUT[1<<x], a, modulus, mp)) != MP_OKAY) {
break;
}
}
}
}
-
+
/* now make all entries in increase order of hamming weight */
for (x = 2; x <= FP_LUT; x++) {
if (err != MP_OKAY)
break;
for (y = 0; y < (1UL<<FP_LUT); y++) {
- if (err != MP_OKAY)
- break;
if (lut_orders[y].ham != (int)x) continue;
-
+
/* perform the add */
if ((err = ecc_projective_add_point(
fp_cache[idx].LUT[lut_orders[y].terma],
fp_cache[idx].LUT[lut_orders[y].termb],
- fp_cache[idx].LUT[y], modulus, mp)) != MP_OKAY) {
+ fp_cache[idx].LUT[y], a, modulus, mp)) != MP_OKAY) {
break;
}
}
}
-
+
/* now map all entries back to affine space to make point addition faster */
for (x = 1; x < (1UL<<FP_LUT); x++) {
if (err != MP_OKAY)
break;
/* convert z to normal from montgomery */
- err = mp_montgomery_reduce(fp_cache[idx].LUT[x]->z, modulus, *mp);
-
+ err = mp_montgomery_reduce(fp_cache[idx].LUT[x]->z, modulus, mp);
+
/* invert it */
if (err == MP_OKAY)
err = mp_invmod(fp_cache[idx].LUT[x]->z, modulus,
@@ -3654,7 +8831,7 @@ static int build_lut(int idx, mp_int* modulus, mp_digit* mp, mp_int* mu)
if (err == MP_OKAY)
/* now square it */
err = mp_sqrmod(fp_cache[idx].LUT[x]->z, modulus, &tmp);
-
+
if (err == MP_OKAY)
/* fix x */
err = mp_mulmod(fp_cache[idx].LUT[x]->x, &tmp, modulus,
@@ -3673,6 +8850,7 @@ static int build_lut(int idx, mp_int* modulus, mp_digit* mp, mp_int* mu)
/* free z */
mp_clear(fp_cache[idx].LUT[x]->z);
}
+
mp_clear(&tmp);
if (err == MP_OKAY)
@@ -3680,107 +8858,99 @@ static int build_lut(int idx, mp_int* modulus, mp_digit* mp, mp_int* mu)
/* err cleanup */
for (y = 0; y < (1U<<FP_LUT); y++) {
- ecc_del_point(fp_cache[idx].LUT[y]);
+ wc_ecc_del_point(fp_cache[idx].LUT[y]);
fp_cache[idx].LUT[y] = NULL;
}
- ecc_del_point(fp_cache[idx].g);
+ wc_ecc_del_point(fp_cache[idx].g);
fp_cache[idx].g = NULL;
fp_cache[idx].lru_count = 0;
mp_clear(&fp_cache[idx].mu);
- mp_clear(&tmp);
return err;
}
/* perform a fixed point ECC mulmod */
-static int accel_fp_mul(int idx, mp_int* k, ecc_point *R, mp_int* modulus,
- mp_digit* mp, int map)
+static int accel_fp_mul(int idx, mp_int* k, ecc_point *R, mp_int* a,
+ mp_int* modulus, mp_digit mp, int map)
{
#define KB_SIZE 128
#ifdef WOLFSSL_SMALL_STACK
- unsigned char* kb;
+ unsigned char* kb = NULL;
#else
- unsigned char kb[128];
+ unsigned char kb[KB_SIZE];
#endif
- int x;
- unsigned y, z, err, bitlen, bitpos, lut_gap, first;
- mp_int tk;
+ int x, err;
+ unsigned y, z = 0, bitlen, bitpos, lut_gap, first;
+ mp_int tk, order;
- if (mp_init(&tk) != MP_OKAY)
+ if (mp_init_multi(&tk, &order, NULL, NULL, NULL, NULL) != MP_OKAY)
return MP_INIT_E;
/* if it's smaller than modulus we fine */
if (mp_unsigned_bin_size(k) > mp_unsigned_bin_size(modulus)) {
- mp_int order;
- if (mp_init(&order) != MP_OKAY) {
- mp_clear(&tk);
- return MP_INIT_E;
- }
-
/* find order */
y = mp_unsigned_bin_size(modulus);
for (x = 0; ecc_sets[x].size; x++) {
if (y <= (unsigned)ecc_sets[x].size) break;
}
-
+
/* back off if we are on the 521 bit curve */
if (y == 66) --x;
-
- if ((err = mp_read_radix(&order, ecc_sets[x].order, 16)) != MP_OKAY) {
- mp_clear(&order);
- mp_clear(&tk);
- return err;
+
+ if ((err = mp_read_radix(&order, ecc_sets[x].order,
+ MP_RADIX_HEX)) != MP_OKAY) {
+ goto done;
}
/* k must be less than modulus */
if (mp_cmp(k, &order) != MP_LT) {
if ((err = mp_mod(k, &order, &tk)) != MP_OKAY) {
- mp_clear(&tk);
- mp_clear(&order);
- return err;
+ goto done;
}
} else {
- mp_copy(k, &tk);
+ if ((err = mp_copy(k, &tk)) != MP_OKAY) {
+ goto done;
+ }
}
- mp_clear(&order);
} else {
- mp_copy(k, &tk);
- }
-
+ if ((err = mp_copy(k, &tk)) != MP_OKAY) {
+ goto done;
+ }
+ }
+
/* get bitlen and round up to next multiple of FP_LUT */
bitlen = mp_unsigned_bin_size(modulus) << 3;
x = bitlen % FP_LUT;
if (x) {
bitlen += FP_LUT - x;
- }
+ }
lut_gap = bitlen / FP_LUT;
-
+
/* get the k value */
if (mp_unsigned_bin_size(&tk) > (int)(KB_SIZE - 2)) {
- mp_clear(&tk);
- return BUFFER_E;
+ err = BUFFER_E; goto done;
}
-
+
/* store k */
#ifdef WOLFSSL_SMALL_STACK
- kb = (unsigned char*)XMALLOC(KB_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (kb == NULL)
- return MEMORY_E;
+ kb = (unsigned char*)XMALLOC(KB_SIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER);
+ if (kb == NULL) {
+ err = MEMORY_E; goto done;
+ }
#endif
XMEMSET(kb, 0, KB_SIZE);
- if ((err = mp_to_unsigned_bin(&tk, kb)) != MP_OKAY) {
- mp_clear(&tk);
- }
- else {
+ if ((err = mp_to_unsigned_bin(&tk, kb)) == MP_OKAY) {
/* let's reverse kb so it's little endian */
x = 0;
- y = mp_unsigned_bin_size(&tk) - 1;
- mp_clear(&tk);
+ y = mp_unsigned_bin_size(&tk);
+ if (y > 0) {
+ y -= 1;
+ }
while ((unsigned)x < y) {
- z = kb[x]; kb[x] = kb[y]; kb[y] = z;
+ z = kb[x]; kb[x] = kb[y]; kb[y] = (byte)z;
++x; --y;
}
@@ -3798,7 +8968,7 @@ static int accel_fp_mul(int idx, mp_int* k, ecc_point *R, mp_int* modulus,
/* double if not first */
if (!first) {
- if ((err = ecc_projective_dbl_point(R, R, modulus,
+ if ((err = ecc_projective_dbl_point(R, R, a, modulus,
mp)) != MP_OKAY) {
break;
}
@@ -3806,10 +8976,35 @@ static int accel_fp_mul(int idx, mp_int* k, ecc_point *R, mp_int* modulus,
/* add if not first, otherwise copy */
if (!first && z) {
- if ((err = ecc_projective_add_point(R, fp_cache[idx].LUT[z], R,
+ if ((err = ecc_projective_add_point(R, fp_cache[idx].LUT[z], R, a,
modulus, mp)) != MP_OKAY) {
break;
}
+ if (mp_iszero(R->z)) {
+ /* When all zero then should have done an add */
+ if (mp_iszero(R->x) && mp_iszero(R->y)) {
+ if ((err = ecc_projective_dbl_point(fp_cache[idx].LUT[z],
+ R, a, modulus, mp)) != MP_OKAY) {
+ break;
+ }
+ }
+ /* When only Z zero then result is infinity */
+ else {
+ err = mp_set(R->x, 0);
+ if (err != MP_OKAY) {
+ break;
+ }
+ err = mp_set(R->y, 0);
+ if (err != MP_OKAY) {
+ break;
+ }
+ err = mp_copy(&fp_cache[idx].mu, R->z);
+ if (err != MP_OKAY) {
+ break;
+ }
+ first = 1;
+ }
+ }
} else if (z) {
if ((mp_copy(fp_cache[idx].LUT[z]->x, R->x) != MP_OKAY) ||
(mp_copy(fp_cache[idx].LUT[z]->y, R->y) != MP_OKAY) ||
@@ -3817,14 +9012,15 @@ static int accel_fp_mul(int idx, mp_int* k, ecc_point *R, mp_int* modulus,
err = GEN_MEM_ERR;
break;
}
- first = 0;
+ first = 0;
}
}
}
if (err == MP_OKAY) {
- z = 0;
+ (void) z; /* Acknowledge the unused assignment */
ForceZero(kb, KB_SIZE);
+
/* map R back from projective space */
if (map) {
err = ecc_map(R, modulus, mp);
@@ -3833,35 +9029,41 @@ static int accel_fp_mul(int idx, mp_int* k, ecc_point *R, mp_int* modulus,
}
}
+done:
+ /* cleanup */
+ mp_clear(&order);
+ mp_clear(&tk);
+
#ifdef WOLFSSL_SMALL_STACK
- XFREE(kb, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(kb, NULL, DYNAMIC_TYPE_ECC_BUFFER);
#endif
#undef KB_SIZE
return err;
}
+#endif
#ifdef ECC_SHAMIR
+#ifndef WOLFSSL_SP_MATH
/* perform a fixed point ECC mulmod */
-static int accel_fp_mul2add(int idx1, int idx2,
+static int accel_fp_mul2add(int idx1, int idx2,
mp_int* kA, mp_int* kB,
- ecc_point *R, mp_int* modulus, mp_digit* mp)
+ ecc_point *R, mp_int* a,
+ mp_int* modulus, mp_digit mp)
{
#define KB_SIZE 128
#ifdef WOLFSSL_SMALL_STACK
- unsigned char* kb[2];
+ unsigned char* kb[2] = {NULL, NULL};
#else
- unsigned char kb[2][128];
+ unsigned char kb[2][KB_SIZE];
#endif
- int x;
- unsigned y, z, err, bitlen, bitpos, lut_gap, first, zA, zB;
- mp_int tka;
- mp_int tkb;
- mp_int order;
+ int x, err;
+ unsigned y, z, bitlen, bitpos, lut_gap, first, zA, zB;
+ mp_int tka, tkb, order;
- if (mp_init_multi(&tka, &tkb, 0, 0, 0, 0) != MP_OKAY)
+ if (mp_init_multi(&tka, &tkb, &order, NULL, NULL, NULL) != MP_OKAY)
return MP_INIT_E;
/* if it's smaller than modulus we fine */
@@ -3871,37 +9073,30 @@ static int accel_fp_mul2add(int idx1, int idx2,
for (x = 0; ecc_sets[x].size; x++) {
if (y <= (unsigned)ecc_sets[x].size) break;
}
-
+
/* back off if we are on the 521 bit curve */
if (y == 66) --x;
-
- if ((err = mp_init(&order)) != MP_OKAY) {
- mp_clear(&tkb);
- mp_clear(&tka);
- return err;
- }
- if ((err = mp_read_radix(&order, ecc_sets[x].order, 16)) != MP_OKAY) {
- mp_clear(&tkb);
- mp_clear(&tka);
- mp_clear(&order);
- return err;
+
+ if ((err = mp_read_radix(&order, ecc_sets[x].order,
+ MP_RADIX_HEX)) != MP_OKAY) {
+ goto done;
}
/* kA must be less than modulus */
if (mp_cmp(kA, &order) != MP_LT) {
if ((err = mp_mod(kA, &order, &tka)) != MP_OKAY) {
- mp_clear(&tkb);
- mp_clear(&tka);
- mp_clear(&order);
- return err;
+ goto done;
}
} else {
- mp_copy(kA, &tka);
+ if ((err = mp_copy(kA, &tka)) != MP_OKAY) {
+ goto done;
+ }
}
- mp_clear(&order);
} else {
- mp_copy(kA, &tka);
- }
+ if ((err = mp_copy(kA, &tka)) != MP_OKAY) {
+ goto done;
+ }
+ }
/* if it's smaller than modulus we fine */
if (mp_unsigned_bin_size(kB) > mp_unsigned_bin_size(modulus)) {
@@ -3910,97 +9105,88 @@ static int accel_fp_mul2add(int idx1, int idx2,
for (x = 0; ecc_sets[x].size; x++) {
if (y <= (unsigned)ecc_sets[x].size) break;
}
-
+
/* back off if we are on the 521 bit curve */
if (y == 66) --x;
-
- if ((err = mp_init(&order)) != MP_OKAY) {
- mp_clear(&tkb);
- mp_clear(&tka);
- return err;
- }
- if ((err = mp_read_radix(&order, ecc_sets[x].order, 16)) != MP_OKAY) {
- mp_clear(&tkb);
- mp_clear(&tka);
- mp_clear(&order);
- return err;
+
+ if ((err = mp_read_radix(&order, ecc_sets[x].order,
+ MP_RADIX_HEX)) != MP_OKAY) {
+ goto done;
}
/* kB must be less than modulus */
if (mp_cmp(kB, &order) != MP_LT) {
if ((err = mp_mod(kB, &order, &tkb)) != MP_OKAY) {
- mp_clear(&tkb);
- mp_clear(&tka);
- mp_clear(&order);
- return err;
+ goto done;
}
} else {
- mp_copy(kB, &tkb);
+ if ((err = mp_copy(kB, &tkb)) != MP_OKAY) {
+ goto done;
+ }
}
- mp_clear(&order);
} else {
- mp_copy(kB, &tkb);
- }
+ if ((err = mp_copy(kB, &tkb)) != MP_OKAY) {
+ goto done;
+ }
+ }
/* get bitlen and round up to next multiple of FP_LUT */
bitlen = mp_unsigned_bin_size(modulus) << 3;
x = bitlen % FP_LUT;
if (x) {
bitlen += FP_LUT - x;
- }
+ }
lut_gap = bitlen / FP_LUT;
-
+
/* get the k value */
if ((mp_unsigned_bin_size(&tka) > (int)(KB_SIZE - 2)) ||
(mp_unsigned_bin_size(&tkb) > (int)(KB_SIZE - 2)) ) {
- mp_clear(&tka);
- mp_clear(&tkb);
- return BUFFER_E;
+ err = BUFFER_E; goto done;
}
-
+
/* store k */
#ifdef WOLFSSL_SMALL_STACK
- kb[0] = (unsigned char*)XMALLOC(KB_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (kb[0] == NULL)
- return MEMORY_E;
+ kb[0] = (unsigned char*)XMALLOC(KB_SIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER);
+ if (kb[0] == NULL) {
+ err = MEMORY_E; goto done;
+ }
#endif
XMEMSET(kb[0], 0, KB_SIZE);
if ((err = mp_to_unsigned_bin(&tka, kb[0])) != MP_OKAY) {
- mp_clear(&tka);
- mp_clear(&tkb);
- XFREE(kb[0], NULL, DYNAMIC_TYPE_TMP_BUFFER);
- return err;
+ goto done;
}
-
+
/* let's reverse kb so it's little endian */
x = 0;
- y = mp_unsigned_bin_size(&tka) - 1;
+ y = mp_unsigned_bin_size(&tka);
+ if (y > 0) {
+ y -= 1;
+ }
mp_clear(&tka);
while ((unsigned)x < y) {
- z = kb[0][x]; kb[0][x] = kb[0][y]; kb[0][y] = z;
+ z = kb[0][x]; kb[0][x] = kb[0][y]; kb[0][y] = (byte)z;
++x; --y;
- }
-
+ }
+
/* store b */
#ifdef WOLFSSL_SMALL_STACK
- kb[1] = (unsigned char*)XMALLOC(KB_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ kb[1] = (unsigned char*)XMALLOC(KB_SIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER);
if (kb[1] == NULL) {
- XFREE(kb[0], NULL, DYNAMIC_TYPE_TMP_BUFFER);
- return MEMORY_E;
+ err = MEMORY_E; goto done;
}
#endif
XMEMSET(kb[1], 0, KB_SIZE);
- if ((err = mp_to_unsigned_bin(&tkb, kb[1])) != MP_OKAY) {
- mp_clear(&tkb);
- }
- else {
+ if ((err = mp_to_unsigned_bin(&tkb, kb[1])) == MP_OKAY) {
x = 0;
- y = mp_unsigned_bin_size(&tkb) - 1;
- mp_clear(&tkb);
+ y = mp_unsigned_bin_size(&tkb);
+ if (y > 0) {
+ y -= 1;
+ }
+
while ((unsigned)x < y) {
- z = kb[1][x]; kb[1][x] = kb[1][y]; kb[1][y] = z;
+ z = kb[1][x]; kb[1][x] = kb[1][y]; kb[1][y] = (byte)z;
++x; --y;
}
@@ -4019,31 +9205,82 @@ static int accel_fp_mul2add(int idx1, int idx2,
/* double if not first */
if (!first) {
- if ((err = ecc_projective_dbl_point(R, R, modulus,
+ if ((err = ecc_projective_dbl_point(R, R, a, modulus,
mp)) != MP_OKAY) {
break;
}
- }
- /* add if not first, otherwise copy */
- if (!first) {
+ /* add if not first, otherwise copy */
if (zA) {
if ((err = ecc_projective_add_point(R, fp_cache[idx1].LUT[zA],
- R, modulus, mp)) != MP_OKAY) {
+ R, a, modulus, mp)) != MP_OKAY) {
break;
}
+ if (mp_iszero(R->z)) {
+ /* When all zero then should have done an add */
+ if (mp_iszero(R->x) && mp_iszero(R->y)) {
+ if ((err = ecc_projective_dbl_point(
+ fp_cache[idx1].LUT[zA], R,
+ a, modulus, mp)) != MP_OKAY) {
+ break;
+ }
+ }
+ /* When only Z zero then result is infinity */
+ else {
+ err = mp_set(R->x, 0);
+ if (err != MP_OKAY) {
+ break;
+ }
+ err = mp_set(R->y, 0);
+ if (err != MP_OKAY) {
+ break;
+ }
+ err = mp_copy(&fp_cache[idx1].mu, R->z);
+ if (err != MP_OKAY) {
+ break;
+ }
+ first = 1;
+ }
+ }
}
+
if (zB) {
if ((err = ecc_projective_add_point(R, fp_cache[idx2].LUT[zB],
- R, modulus, mp)) != MP_OKAY) {
+ R, a, modulus, mp)) != MP_OKAY) {
break;
}
+ if (mp_iszero(R->z)) {
+ /* When all zero then should have done an add */
+ if (mp_iszero(R->x) && mp_iszero(R->y)) {
+ if ((err = ecc_projective_dbl_point(
+ fp_cache[idx2].LUT[zB], R,
+ a, modulus, mp)) != MP_OKAY) {
+ break;
+ }
+ }
+ /* When only Z zero then result is infinity */
+ else {
+ err = mp_set(R->x, 0);
+ if (err != MP_OKAY) {
+ break;
+ }
+ err = mp_set(R->y, 0);
+ if (err != MP_OKAY) {
+ break;
+ }
+ err = mp_copy(&fp_cache[idx2].mu, R->z);
+ if (err != MP_OKAY) {
+ break;
+ }
+ first = 1;
+ }
+ }
}
} else {
if (zA) {
if ((mp_copy(fp_cache[idx1].LUT[zA]->x, R->x) != MP_OKAY) ||
- (mp_copy(fp_cache[idx1].LUT[zA]->y, R->y) != MP_OKAY) ||
- (mp_copy(&fp_cache[idx1].mu, R->z) != MP_OKAY)) {
+ (mp_copy(fp_cache[idx1].LUT[zA]->y, R->y) != MP_OKAY) ||
+ (mp_copy(&fp_cache[idx1].mu, R->z) != MP_OKAY)) {
err = GEN_MEM_ERR;
break;
}
@@ -4052,14 +9289,40 @@ static int accel_fp_mul2add(int idx1, int idx2,
if (zB && first == 0) {
if (zB) {
if ((err = ecc_projective_add_point(R,
- fp_cache[idx2].LUT[zB], R, modulus, mp)) != MP_OKAY){
+ fp_cache[idx2].LUT[zB], R, a, modulus, mp)) != MP_OKAY){
break;
}
+ if (mp_iszero(R->z)) {
+ /* When all zero then should have done an add */
+ if (mp_iszero(R->x) && mp_iszero(R->y)) {
+ if ((err = ecc_projective_dbl_point(
+ fp_cache[idx2].LUT[zB], R,
+ a, modulus, mp)) != MP_OKAY) {
+ break;
+ }
+ }
+ /* When only Z zero then result is infinity */
+ else {
+ err = mp_set(R->x, 0);
+ if (err != MP_OKAY) {
+ break;
+ }
+ err = mp_set(R->y, 0);
+ if (err != MP_OKAY) {
+ break;
+ }
+ err = mp_copy(&fp_cache[idx2].mu, R->z);
+ if (err != MP_OKAY) {
+ break;
+ }
+ first = 1;
+ }
+ }
}
} else if (zB && first == 1) {
if ((mp_copy(fp_cache[idx2].LUT[zB]->x, R->x) != MP_OKAY) ||
- (mp_copy(fp_cache[idx2].LUT[zB]->y, R->y) != MP_OKAY) ||
- (mp_copy(&fp_cache[idx2].mu, R->z) != MP_OKAY)) {
+ (mp_copy(fp_cache[idx2].LUT[zB]->y, R->y) != MP_OKAY) ||
+ (mp_copy(&fp_cache[idx2].mu, R->z) != MP_OKAY)) {
err = GEN_MEM_ERR;
break;
}
@@ -4069,47 +9332,64 @@ static int accel_fp_mul2add(int idx1, int idx2,
}
}
- ForceZero(kb[0], KB_SIZE);
- ForceZero(kb[1], KB_SIZE);
+done:
+ /* cleanup */
+ mp_clear(&tkb);
+ mp_clear(&tka);
+ mp_clear(&order);
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (kb[0])
+#endif
+ ForceZero(kb[0], KB_SIZE);
+#ifdef WOLFSSL_SMALL_STACK
+ if (kb[1])
+#endif
+ ForceZero(kb[1], KB_SIZE);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(kb[0], NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(kb[1], NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(kb[0], NULL, DYNAMIC_TYPE_ECC_BUFFER);
+ XFREE(kb[1], NULL, DYNAMIC_TYPE_ECC_BUFFER);
#endif
#undef KB_SIZE
+ if (err != MP_OKAY)
+ return err;
+
return ecc_map(R, modulus, mp);
}
-/** ECC Fixed Point mulmod global
+
+/** ECC Fixed Point mulmod global with heap hint used
Computes kA*A + kB*B = C using Shamir's Trick
A First point to multiply
kA What to multiple A by
B Second point to multiply
kB What to multiple B by
C [out] Destination point (can overlap with A or B)
- modulus Modulus for curve
+ a ECC curve parameter a
+ modulus Modulus for curve
return MP_OKAY on success
-*/
+*/
int ecc_mul2add(ecc_point* A, mp_int* kA,
ecc_point* B, mp_int* kB,
- ecc_point* C, mp_int* modulus)
+ ecc_point* C, mp_int* a, mp_int* modulus, void* heap)
{
- int idx1 = -1, idx2 = -1, err = MP_OKAY, mpInit = 0;
+ int idx1 = -1, idx2 = -1, err, mpInit = 0;
mp_digit mp;
mp_int mu;
-
+
err = mp_init(&mu);
if (err != MP_OKAY)
return err;
#ifndef HAVE_THREAD_LS
if (initMutex == 0) {
- InitMutex(&ecc_fp_lock);
+ wc_InitMutex(&ecc_fp_lock);
initMutex = 1;
}
- if (LockMutex(&ecc_fp_lock) != 0)
+ if (wc_LockMutex(&ecc_fp_lock) != 0)
return BAD_MUTEX_E;
#endif /* HAVE_THREAD_LS */
@@ -4128,11 +9408,10 @@ int ecc_mul2add(ecc_point* A, mp_int* kA,
++(fp_cache[idx1].lru_count);
}
- if (err == MP_OKAY)
+ if (err == MP_OKAY) {
/* find point */
idx2 = find_base(B);
- if (err == MP_OKAY) {
/* no entry? */
if (idx2 == -1) {
/* find hole and add it */
@@ -4156,10 +9435,10 @@ int ecc_mul2add(ecc_point* A, mp_int* kA,
mpInit = 1;
err = mp_montgomery_calc_normalization(&mu, modulus);
}
-
+
if (err == MP_OKAY)
/* build the LUT */
- err = build_lut(idx1, modulus, &mp, &mu);
+ err = build_lut(idx1, a, modulus, mp, &mu);
}
}
@@ -4174,10 +9453,10 @@ int ecc_mul2add(ecc_point* A, mp_int* kA,
err = mp_montgomery_calc_normalization(&mu, modulus);
}
}
-
- if (err == MP_OKAY)
+
+ if (err == MP_OKAY)
/* build the LUT */
- err = build_lut(idx2, modulus, &mp, &mu);
+ err = build_lut(idx2, a, modulus, mp, &mu);
}
}
@@ -4190,48 +9469,55 @@ int ecc_mul2add(ecc_point* A, mp_int* kA,
err = mp_montgomery_setup(modulus, &mp);
}
if (err == MP_OKAY)
- err = accel_fp_mul2add(idx1, idx2, kA, kB, C, modulus, &mp);
+ err = accel_fp_mul2add(idx1, idx2, kA, kB, C, a, modulus, mp);
} else {
- err = normal_ecc_mul2add(A, kA, B, kB, C, modulus);
+ err = normal_ecc_mul2add(A, kA, B, kB, C, a, modulus, heap);
}
}
#ifndef HAVE_THREAD_LS
- UnLockMutex(&ecc_fp_lock);
+ wc_UnLockMutex(&ecc_fp_lock);
#endif /* HAVE_THREAD_LS */
mp_clear(&mu);
return err;
}
#endif
+#endif /* ECC_SHAMIR */
/** ECC Fixed Point mulmod global
k The multiplicand
G Base point to multiply
R [out] Destination of product
+ a ECC curve parameter a
modulus The modulus for the curve
- map [boolean] If non-zero maps the point back to affine co-ordinates,
+ map [boolean] If non-zero maps the point back to affine coordinates,
otherwise it's left in jacobian-montgomery form
return MP_OKAY if successful
-*/
-int ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R, mp_int* modulus,
- int map)
+*/
+int wc_ecc_mulmod_ex(mp_int* k, ecc_point *G, ecc_point *R, mp_int* a,
+ mp_int* modulus, int map, void* heap)
{
+#ifndef WOLFSSL_SP_MATH
int idx, err = MP_OKAY;
mp_digit mp;
mp_int mu;
int mpSetup = 0;
+ if (k == NULL || G == NULL || R == NULL || a == NULL || modulus == NULL) {
+ return ECC_BAD_ARG_E;
+ }
+
if (mp_init(&mu) != MP_OKAY)
return MP_INIT_E;
-
+
#ifndef HAVE_THREAD_LS
if (initMutex == 0) {
- InitMutex(&ecc_fp_lock);
+ wc_InitMutex(&ecc_fp_lock);
initMutex = 1;
}
-
- if (LockMutex(&ecc_fp_lock) != 0)
+
+ if (wc_LockMutex(&ecc_fp_lock) != 0)
return BAD_MUTEX_E;
#endif /* HAVE_THREAD_LS */
@@ -4252,7 +9538,7 @@ int ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R, mp_int* modulus,
}
- if (err == MP_OKAY) {
+ if (err == MP_OKAY) {
/* if it's 2 build the LUT, if it's higher just use the LUT */
if (idx >= 0 && fp_cache[idx].lru_count == 2) {
/* compute mp */
@@ -4263,34 +9549,52 @@ int ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R, mp_int* modulus,
mpSetup = 1;
err = mp_montgomery_calc_normalization(&mu, modulus);
}
-
- if (err == MP_OKAY)
+
+ if (err == MP_OKAY)
/* build the LUT */
- err = build_lut(idx, modulus, &mp, &mu);
+ err = build_lut(idx, a, modulus, mp, &mu);
}
}
- if (err == MP_OKAY) {
+ if (err == MP_OKAY) {
if (idx >= 0 && fp_cache[idx].lru_count >= 2) {
if (mpSetup == 0) {
/* compute mp */
err = mp_montgomery_setup(modulus, &mp);
}
if (err == MP_OKAY)
- err = accel_fp_mul(idx, k, R, modulus, &mp, map);
+ err = accel_fp_mul(idx, k, R, a, modulus, mp, map);
} else {
- err = normal_ecc_mulmod(k, G, R, modulus, map);
+ err = normal_ecc_mulmod(k, G, R, a, modulus, map, heap);
}
}
#ifndef HAVE_THREAD_LS
- UnLockMutex(&ecc_fp_lock);
+ wc_UnLockMutex(&ecc_fp_lock);
#endif /* HAVE_THREAD_LS */
mp_clear(&mu);
return err;
+#else
+ if (k == NULL || G == NULL || R == NULL || a == NULL || modulus == NULL) {
+ return ECC_BAD_ARG_E;
+ }
+
+#ifndef WOLFSSL_SP_NO_256
+ if (mp_count_bits(modulus) == 256) {
+ return sp_ecc_mulmod_256(k, G, R, map, heap);
+ }
+#endif
+#ifdef WOLFSSL_SP_384
+ if (mp_count_bits(modulus) == 384) {
+ return sp_ecc_mulmod_384(k, G, R, map, heap);
+ }
+#endif
+ return WC_KEY_SIZE_E;
+#endif
}
+#ifndef WOLFSSL_SP_MATH
/* helper function for freeing the cache ...
must be called with the cache mutex locked */
static void wc_ecc_fp_free_cache(void)
@@ -4299,38 +9603,41 @@ static void wc_ecc_fp_free_cache(void)
for (x = 0; x < FP_ENTRIES; x++) {
if (fp_cache[x].g != NULL) {
for (y = 0; y < (1U<<FP_LUT); y++) {
- ecc_del_point(fp_cache[x].LUT[y]);
+ wc_ecc_del_point(fp_cache[x].LUT[y]);
fp_cache[x].LUT[y] = NULL;
}
- ecc_del_point(fp_cache[x].g);
+ wc_ecc_del_point(fp_cache[x].g);
fp_cache[x].g = NULL;
mp_clear(&fp_cache[x].mu);
fp_cache[x].lru_count = 0;
fp_cache[x].lock = 0;
- }
+ }
}
-}
+}
+#endif
/** Free the Fixed Point cache */
void wc_ecc_fp_free(void)
{
+#ifndef WOLFSSL_SP_MATH
#ifndef HAVE_THREAD_LS
if (initMutex == 0) {
- InitMutex(&ecc_fp_lock);
+ wc_InitMutex(&ecc_fp_lock);
initMutex = 1;
}
-
- if (LockMutex(&ecc_fp_lock) == 0) {
+
+ if (wc_LockMutex(&ecc_fp_lock) == 0) {
#endif /* HAVE_THREAD_LS */
wc_ecc_fp_free_cache();
#ifndef HAVE_THREAD_LS
- UnLockMutex(&ecc_fp_lock);
- FreeMutex(&ecc_fp_lock);
+ wc_UnLockMutex(&ecc_fp_lock);
+ wc_FreeMutex(&ecc_fp_lock);
initMutex = 0;
}
#endif /* HAVE_THREAD_LS */
+#endif
}
@@ -4340,21 +9647,21 @@ void wc_ecc_fp_free(void)
enum ecCliState {
- ecCLI_INIT = 1,
- ecCLI_SALT_GET = 2,
- ecCLI_SALT_SET = 3,
- ecCLI_SENT_REQ = 4,
- ecCLI_RECV_RESP = 5,
- ecCLI_BAD_STATE = 99
+ ecCLI_INIT = 1,
+ ecCLI_SALT_GET = 2,
+ ecCLI_SALT_SET = 3,
+ ecCLI_SENT_REQ = 4,
+ ecCLI_RECV_RESP = 5,
+ ecCLI_BAD_STATE = 99
};
enum ecSrvState {
- ecSRV_INIT = 1,
- ecSRV_SALT_GET = 2,
- ecSRV_SALT_SET = 3,
- ecSRV_RECV_REQ = 4,
- ecSRV_SENT_RESP = 5,
- ecSRV_BAD_STATE = 99
+ ecSRV_INIT = 1,
+ ecSRV_SALT_GET = 2,
+ ecSRV_SALT_SET = 3,
+ ecSRV_RECV_REQ = 4,
+ ecSRV_SENT_RESP = 5,
+ ecSRV_BAD_STATE = 99
};
@@ -4365,6 +9672,7 @@ struct ecEncCtx {
word32 kdfSaltSz; /* size of kdfSalt */
word32 kdfInfoSz; /* size of kdfInfo */
word32 macSaltSz; /* size of macSalt */
+ void* heap; /* heap hint for memory used */
byte clientSalt[EXCHANGE_SALT_SZ]; /* for msg exchange */
byte serverSalt[EXCHANGE_SALT_SZ]; /* for msg exchange */
byte encAlgo; /* which encryption type */
@@ -4435,7 +9743,7 @@ int wc_ecc_ctx_set_peer_salt(ecEncCtx* ctx, const byte* salt)
ctx->cliSt = ecCLI_SALT_SET;
else {
ctx->cliSt = ecCLI_BAD_STATE;
- return BAD_ENC_STATE_E;
+ return BAD_STATE_E;
}
}
else {
@@ -4444,7 +9752,7 @@ int wc_ecc_ctx_set_peer_salt(ecEncCtx* ctx, const byte* salt)
ctx->srvSt = ecSRV_SALT_SET;
else {
ctx->srvSt = ecSRV_BAD_STATE;
- return BAD_ENC_STATE_E;
+ return BAD_STATE_E;
}
}
@@ -4470,11 +9778,11 @@ int wc_ecc_ctx_set_peer_salt(ecEncCtx* ctx, const byte* salt)
}
-static int ecc_ctx_set_salt(ecEncCtx* ctx, int flags, RNG* rng)
+static int ecc_ctx_set_salt(ecEncCtx* ctx, int flags, WC_RNG* rng)
{
byte* saltBuffer = NULL;
- if (ctx == NULL || rng == NULL || flags == 0)
+ if (ctx == NULL || rng == NULL || flags == 0)
return BAD_FUNC_ARG;
saltBuffer = (flags == REQ_RESP_CLIENT) ? ctx->clientSalt : ctx->serverSalt;
@@ -4501,8 +9809,8 @@ static void ecc_ctx_init(ecEncCtx* ctx, int flags)
}
-/* allow ecc context reset so user doesn't have to init/free for resue */
-int wc_ecc_ctx_reset(ecEncCtx* ctx, RNG* rng)
+/* allow ecc context reset so user doesn't have to init/free for reuse */
+int wc_ecc_ctx_reset(ecEncCtx* ctx, WC_RNG* rng)
{
if (ctx == NULL || rng == NULL)
return BAD_FUNC_ARG;
@@ -4512,14 +9820,16 @@ int wc_ecc_ctx_reset(ecEncCtx* ctx, RNG* rng)
}
-/* alloc/init and set defaults, return new Context */
-ecEncCtx* wc_ecc_ctx_new(int flags, RNG* rng)
+ecEncCtx* wc_ecc_ctx_new_ex(int flags, WC_RNG* rng, void* heap)
{
int ret = 0;
- ecEncCtx* ctx = (ecEncCtx*)XMALLOC(sizeof(ecEncCtx), 0, DYNAMIC_TYPE_ECC);
+ ecEncCtx* ctx = (ecEncCtx*)XMALLOC(sizeof(ecEncCtx), heap,
+ DYNAMIC_TYPE_ECC);
- if (ctx)
+ if (ctx) {
ctx->protocol = (byte)flags;
+ ctx->heap = heap;
+ }
ret = wc_ecc_ctx_reset(ctx, rng);
if (ret != 0) {
@@ -4531,12 +9841,19 @@ ecEncCtx* wc_ecc_ctx_new(int flags, RNG* rng)
}
+/* alloc/init and set defaults, return new Context */
+ecEncCtx* wc_ecc_ctx_new(int flags, WC_RNG* rng)
+{
+ return wc_ecc_ctx_new_ex(flags, rng, NULL);
+}
+
+
/* free any resources, clear any keys */
void wc_ecc_ctx_free(ecEncCtx* ctx)
{
if (ctx) {
ForceZero(ctx, sizeof(ecEncCtx));
- XFREE(ctx, 0, DYNAMIC_TYPE_ECC);
+ XFREE(ctx, ctx->heap, DYNAMIC_TYPE_ECC);
}
}
@@ -4548,7 +9865,7 @@ static int ecc_get_key_sizes(ecEncCtx* ctx, int* encKeySz, int* ivSz,
switch (ctx->encAlgo) {
case ecAES_128_CBC:
*encKeySz = KEY_SIZE_128;
- *ivSz = IV_SIZE_64;
+ *ivSz = IV_SIZE_128;
*blockSz = AES_BLOCK_SIZE;
break;
default:
@@ -4557,7 +9874,7 @@ static int ecc_get_key_sizes(ecEncCtx* ctx, int* encKeySz, int* ivSz,
switch (ctx->macAlgo) {
case ecHMAC_SHA256:
- *digestSz = SHA256_DIGEST_SIZE;
+ *digestSz = WC_SHA256_DIGEST_SIZE;
break;
default:
return BAD_FUNC_ARG;
@@ -4573,12 +9890,12 @@ static int ecc_get_key_sizes(ecEncCtx* ctx, int* encKeySz, int* ivSz,
/* ecc encrypt with shared secret run through kdf
ctx holds non default algos and inputs
- msgSz should be the right size for encAlgo, i.e., already padded
+ msgSz should be the right size for encAlgo, i.e., already padded
return 0 on success */
int wc_ecc_encrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg,
word32 msgSz, byte* out, word32* outSz, ecEncCtx* ctx)
{
- int ret;
+ int ret = 0;
word32 blockSz;
word32 digestSz;
ecEncCtx localCtx;
@@ -4604,9 +9921,9 @@ int wc_ecc_encrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg,
if (ctx == NULL) { /* use defaults */
ecc_ctx_init(&localCtx, 0);
- ctx = &localCtx;
+ ctx = &localCtx;
}
-
+
ret = ecc_get_key_sizes(ctx, &encKeySz, &ivSz, &keysLen, &digestSz,
&blockSz);
if (ret != 0)
@@ -4617,20 +9934,20 @@ int wc_ecc_encrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg,
keysLen *= 2;
if (ctx->srvSt != ecSRV_RECV_REQ)
- return BAD_ENC_STATE_E;
+ return BAD_STATE_E;
ctx->srvSt = ecSRV_BAD_STATE; /* we're done no more ops allowed */
}
else if (ctx->protocol == REQ_RESP_CLIENT) {
if (ctx->cliSt != ecCLI_SALT_SET)
- return BAD_ENC_STATE_E;
+ return BAD_STATE_E;
ctx->cliSt = ecCLI_SENT_REQ; /* only do this once */
}
-
+
if (keysLen > ECC_BUFSIZE) /* keys size */
return BUFFER_E;
-
+
if ( (msgSz%blockSz) != 0)
return BAD_PADDING_E;
@@ -4638,23 +9955,29 @@ int wc_ecc_encrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg,
return BUFFER_E;
#ifdef WOLFSSL_SMALL_STACK
- sharedSecret = (byte*)XMALLOC(ECC_MAXSIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ sharedSecret = (byte*)XMALLOC(ECC_MAXSIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER);
if (sharedSecret == NULL)
return MEMORY_E;
- keys = (byte*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ keys = (byte*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER);
if (keys == NULL) {
- XFREE(sharedSecret, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(sharedSecret, NULL, DYNAMIC_TYPE_ECC_BUFFER);
return MEMORY_E;
}
#endif
- ret = wc_ecc_shared_secret(privKey, pubKey, sharedSecret, &sharedSz);
-
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+ ret = wc_AsyncWait(ret, &privKey->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ if (ret != 0)
+ break;
+ #endif
+ ret = wc_ecc_shared_secret(privKey, pubKey, sharedSecret, &sharedSz);
+ } while (ret == WC_PENDING_E);
if (ret == 0) {
switch (ctx->kdfAlgo) {
case ecHKDF_SHA256 :
- ret = wc_HKDF(SHA256, sharedSecret, sharedSz, ctx->kdfSalt,
+ ret = wc_HKDF(WC_SHA256, sharedSecret, sharedSz, ctx->kdfSalt,
ctx->kdfSaltSz, ctx->kdfInfo, ctx->kdfInfoSz,
keys, keysLen);
break;
@@ -4674,11 +9997,21 @@ int wc_ecc_encrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg,
case ecAES_128_CBC:
{
Aes aes;
- ret = wc_AesSetKey(&aes, encKey, KEY_SIZE_128, encIv,
+ ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_AesSetKey(&aes, encKey, KEY_SIZE_128, encIv,
AES_ENCRYPTION);
+ if (ret == 0) {
+ ret = wc_AesCbcEncrypt(&aes, out, msg, msgSz);
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+ ret = wc_AsyncWait(ret, &aes.asyncDev,
+ WC_ASYNC_FLAG_NONE);
+ #endif
+ }
+ wc_AesFree(&aes);
+ }
if (ret != 0)
- break;
- ret = wc_AesCbcEncrypt(&aes, out, msg, msgSz);
+ break;
}
break;
@@ -4693,16 +10026,17 @@ int wc_ecc_encrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg,
case ecHMAC_SHA256:
{
Hmac hmac;
- ret = wc_HmacSetKey(&hmac, SHA256, macKey, SHA256_DIGEST_SIZE);
- if (ret != 0)
- break;
- ret = wc_HmacUpdate(&hmac, out, msgSz);
- if (ret != 0)
- break;
- ret = wc_HmacUpdate(&hmac, ctx->macSalt, ctx->macSaltSz);
- if (ret != 0)
- break;
- ret = wc_HmacFinal(&hmac, out+msgSz);
+ ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_HmacSetKey(&hmac, WC_SHA256, macKey, WC_SHA256_DIGEST_SIZE);
+ if (ret == 0)
+ ret = wc_HmacUpdate(&hmac, out, msgSz);
+ if (ret == 0)
+ ret = wc_HmacUpdate(&hmac, ctx->macSalt, ctx->macSaltSz);
+ if (ret == 0)
+ ret = wc_HmacFinal(&hmac, out+msgSz);
+ wc_HmacFree(&hmac);
+ }
}
break;
@@ -4716,8 +10050,8 @@ int wc_ecc_encrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg,
*outSz = msgSz + digestSz;
#ifdef WOLFSSL_SMALL_STACK
- XFREE(sharedSecret, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(keys, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(sharedSecret, NULL, DYNAMIC_TYPE_ECC_BUFFER);
+ XFREE(keys, NULL, DYNAMIC_TYPE_ECC_BUFFER);
#endif
return ret;
@@ -4730,7 +10064,7 @@ int wc_ecc_encrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg,
int wc_ecc_decrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg,
word32 msgSz, byte* out, word32* outSz, ecEncCtx* ctx)
{
- int ret;
+ int ret = 0;
word32 blockSz;
word32 digestSz;
ecEncCtx localCtx;
@@ -4756,33 +10090,33 @@ int wc_ecc_decrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg,
if (ctx == NULL) { /* use defaults */
ecc_ctx_init(&localCtx, 0);
- ctx = &localCtx;
+ ctx = &localCtx;
}
-
+
ret = ecc_get_key_sizes(ctx, &encKeySz, &ivSz, &keysLen, &digestSz,
&blockSz);
if (ret != 0)
return ret;
-
+
if (ctx->protocol == REQ_RESP_CLIENT) {
offset = keysLen;
keysLen *= 2;
if (ctx->cliSt != ecCLI_SENT_REQ)
- return BAD_ENC_STATE_E;
+ return BAD_STATE_E;
ctx->cliSt = ecSRV_BAD_STATE; /* we're done no more ops allowed */
}
else if (ctx->protocol == REQ_RESP_SERVER) {
if (ctx->srvSt != ecSRV_SALT_SET)
- return BAD_ENC_STATE_E;
+ return BAD_STATE_E;
ctx->srvSt = ecSRV_RECV_REQ; /* only do this once */
}
-
+
if (keysLen > ECC_BUFSIZE) /* keys size */
return BUFFER_E;
-
+
if ( ((msgSz-digestSz) % blockSz) != 0)
return BAD_PADDING_E;
@@ -4790,23 +10124,29 @@ int wc_ecc_decrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg,
return BUFFER_E;
#ifdef WOLFSSL_SMALL_STACK
- sharedSecret = (byte*)XMALLOC(ECC_MAXSIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ sharedSecret = (byte*)XMALLOC(ECC_MAXSIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER);
if (sharedSecret == NULL)
return MEMORY_E;
- keys = (byte*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ keys = (byte*)XMALLOC(ECC_BUFSIZE, NULL, DYNAMIC_TYPE_ECC_BUFFER);
if (keys == NULL) {
- XFREE(sharedSecret, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(sharedSecret, NULL, DYNAMIC_TYPE_ECC_BUFFER);
return MEMORY_E;
}
#endif
- ret = wc_ecc_shared_secret(privKey, pubKey, sharedSecret, &sharedSz);
-
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+ ret = wc_AsyncWait(ret, &privKey->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ if (ret != 0)
+ break;
+ #endif
+ ret = wc_ecc_shared_secret(privKey, pubKey, sharedSecret, &sharedSz);
+ } while (ret == WC_PENDING_E);
if (ret == 0) {
switch (ctx->kdfAlgo) {
case ecHKDF_SHA256 :
- ret = wc_HKDF(SHA256, sharedSecret, sharedSz, ctx->kdfSalt,
+ ret = wc_HKDF(WC_SHA256, sharedSecret, sharedSz, ctx->kdfSalt,
ctx->kdfSaltSz, ctx->kdfInfo, ctx->kdfInfoSz,
keys, keysLen);
break;
@@ -4824,25 +10164,28 @@ int wc_ecc_decrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg,
switch (ctx->macAlgo) {
case ecHMAC_SHA256:
- {
- byte verify[SHA256_DIGEST_SIZE];
- Hmac hmac;
- ret = wc_HmacSetKey(&hmac, SHA256, macKey, SHA256_DIGEST_SIZE);
- if (ret != 0)
- break;
- ret = wc_HmacUpdate(&hmac, msg, msgSz-digestSz);
- if (ret != 0)
- break;
- ret = wc_HmacUpdate(&hmac, ctx->macSalt, ctx->macSaltSz);
- if (ret != 0)
- break;
- ret = wc_HmacFinal(&hmac, verify);
- if (ret != 0)
- break;
- if (memcmp(verify, msg + msgSz - digestSz, digestSz) != 0)
- ret = -1;
+ {
+ byte verify[WC_SHA256_DIGEST_SIZE];
+ Hmac hmac;
+
+ ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_HmacSetKey(&hmac, WC_SHA256, macKey, WC_SHA256_DIGEST_SIZE);
+ if (ret == 0)
+ ret = wc_HmacUpdate(&hmac, msg, msgSz-digestSz);
+ if (ret == 0)
+ ret = wc_HmacUpdate(&hmac, ctx->macSalt, ctx->macSaltSz);
+ if (ret == 0)
+ ret = wc_HmacFinal(&hmac, verify);
+ if (ret == 0) {
+ if (XMEMCMP(verify, msg + msgSz - digestSz, digestSz) != 0)
+ ret = -1;
+ }
+
+ wc_HmacFree(&hmac);
}
break;
+ }
default:
ret = BAD_FUNC_ARG;
@@ -4852,6 +10195,7 @@ int wc_ecc_decrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg,
if (ret == 0) {
switch (ctx->encAlgo) {
+ #ifdef HAVE_AES_CBC
case ecAES_128_CBC:
{
Aes aes;
@@ -4860,9 +10204,12 @@ int wc_ecc_decrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg,
if (ret != 0)
break;
ret = wc_AesCbcDecrypt(&aes, out, msg, msgSz-digestSz);
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+ ret = wc_AsyncWait(ret, &aes.asyncDev, WC_ASYNC_FLAG_NONE);
+ #endif
}
break;
-
+ #endif
default:
ret = BAD_FUNC_ARG;
break;
@@ -4873,8 +10220,8 @@ int wc_ecc_decrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg,
*outSz = msgSz - digestSz;
#ifdef WOLFSSL_SMALL_STACK
- XFREE(sharedSecret, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(keys, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(sharedSecret, NULL, DYNAMIC_TYPE_ECC_BUFFER);
+ XFREE(keys, NULL, DYNAMIC_TYPE_ECC_BUFFER);
#endif
return ret;
@@ -4885,25 +10232,36 @@ int wc_ecc_decrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg,
#ifdef HAVE_COMP_KEY
+#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL)
-/* computes the jacobi c = (a | n) (or Legendre if n is prime)
- * HAC pp. 73 Algorithm 2.149
- */
-int mp_jacobi(mp_int* a, mp_int* p, int* c)
+#ifndef WOLFSSL_SP_MATH
+int do_mp_jacobi(mp_int* a, mp_int* n, int* c);
+
+int do_mp_jacobi(mp_int* a, mp_int* n, int* c)
{
- mp_int a1, p1;
- int k, s, r, res;
+ int k, s, res;
+ int r = 0; /* initialize to help static analysis out */
mp_digit residue;
- /* if p <= 0 return MP_VAL */
- if (mp_cmp_d(p, 0) != MP_GT) {
+ /* if a < 0 return MP_VAL */
+ if (mp_isneg(a) == MP_YES) {
return MP_VAL;
}
- /* step 1. if a == 0, return 0 */
- if (mp_iszero (a) == 1) {
- *c = 0;
- return MP_OKAY;
+ /* if n <= 0 return MP_VAL */
+ if (mp_cmp_d(n, 0) != MP_GT) {
+ return MP_VAL;
+ }
+
+ /* step 1. handle case of a == 0 */
+ if (mp_iszero (a) == MP_YES) {
+ /* special case of a == 0 and n == 1 */
+ if (mp_cmp_d (n, 1) == MP_EQ) {
+ *c = 1;
+ } else {
+ *c = 0;
+ }
+ return MP_OKAY;
}
/* step 2. if a == 1, return 1 */
@@ -4915,19 +10273,9 @@ int mp_jacobi(mp_int* a, mp_int* p, int* c)
/* default */
s = 0;
- /* step 3. write a = a1 * 2**k */
- if ((res = mp_init_copy (&a1, a)) != MP_OKAY) {
- return res;
- }
-
- if ((res = mp_init (&p1)) != MP_OKAY) {
- mp_clear(&a1);
- return res;
- }
-
/* divide out larger power of two */
- k = mp_cnt_lsb(&a1);
- res = mp_div_2d(&a1, k, &a1, NULL);
+ k = mp_cnt_lsb(a);
+ res = mp_div_2d(a, k, a, NULL);
if (res == MP_OKAY) {
/* step 4. if e is even set s=1 */
@@ -4935,7 +10283,7 @@ int mp_jacobi(mp_int* a, mp_int* p, int* c)
s = 1;
} else {
/* else set s=1 if p = 1/7 (mod 8) or s=-1 if p = 3/5 (mod 8) */
- residue = p->dp[0] & 7;
+ residue = n->dp[0] & 7;
if (residue == 1 || residue == 7) {
s = 1;
@@ -4944,50 +10292,114 @@ int mp_jacobi(mp_int* a, mp_int* p, int* c)
}
}
- /* step 5. if p == 3 (mod 4) *and* a1 == 3 (mod 4) then s = -s */
- if ( ((p->dp[0] & 3) == 3) && ((a1.dp[0] & 3) == 3)) {
+ /* step 5. if p == 3 (mod 4) *and* a == 3 (mod 4) then s = -s */
+ if ( ((n->dp[0] & 3) == 3) && ((a->dp[0] & 3) == 3)) {
s = -s;
}
}
if (res == MP_OKAY) {
- /* if a1 == 1 we're done */
- if (mp_cmp_d (&a1, 1) == MP_EQ) {
+ /* if a == 1 we're done */
+ if (mp_cmp_d(a, 1) == MP_EQ) {
*c = s;
} else {
- /* n1 = n mod a1 */
- res = mp_mod (p, &a1, &p1);
+ /* n1 = n mod a */
+ res = mp_mod (n, a, n);
if (res == MP_OKAY)
- res = mp_jacobi (&p1, &a1, &r);
+ res = do_mp_jacobi(n, a, &r);
if (res == MP_OKAY)
- *c = s * r;
+ *c = s * r;
}
}
- /* done */
- mp_clear (&p1);
- mp_clear (&a1);
+ return res;
+}
+
+
+/* computes the jacobi c = (a | n) (or Legendre if n is prime)
+ * HAC pp. 73 Algorithm 2.149
+ * HAC is wrong here, as the special case of (0 | 1) is not
+ * handled correctly.
+ */
+int mp_jacobi(mp_int* a, mp_int* n, int* c)
+{
+ mp_int a1, n1;
+ int res;
+
+ /* step 3. write a = a1 * 2**k */
+ if ((res = mp_init_multi(&a1, &n1, NULL, NULL, NULL, NULL)) != MP_OKAY) {
+ return res;
+ }
+
+ if ((res = mp_copy(a, &a1)) != MP_OKAY) {
+ goto done;
+ }
+
+ if ((res = mp_copy(n, &n1)) != MP_OKAY) {
+ goto done;
+ }
+
+ res = do_mp_jacobi(&a1, &n1, c);
+
+done:
+ /* cleanup */
+ mp_clear(&n1);
+ mp_clear(&a1);
return res;
}
+/* Solves the modular equation x^2 = n (mod p)
+ * where prime number is greater than 2 (odd prime).
+ * The result is returned in the third argument x
+ * the function returns MP_OKAY on success, MP_VAL or another error on failure
+ */
int mp_sqrtmod_prime(mp_int* n, mp_int* prime, mp_int* ret)
{
+#ifdef SQRTMOD_USE_MOD_EXP
+ int res;
+
+ mp_int e;
+
+ res = mp_init(&e);
+ if (res == MP_OKAY)
+ res = mp_add_d(prime, 1, &e);
+ if (res == MP_OKAY)
+ res = mp_div_2d(&e, 2, &e, NULL);
+ if (res == MP_OKAY)
+ res = mp_exptmod(n, &e, prime, ret);
+
+ mp_clear(&e);
+
+ return res;
+#else
int res, legendre, done = 0;
mp_int t1, C, Q, S, Z, M, T, R, two;
mp_digit i;
- /* first handle the simple cases */
+ /* first handle the simple cases n = 0 or n = 1 */
if (mp_cmp_d(n, 0) == MP_EQ) {
mp_zero(ret);
return MP_OKAY;
}
- if (mp_cmp_d(prime, 2) == MP_EQ) return MP_VAL; /* prime must be odd */
- /* TAO removed
- if ((res = mp_jacobi(n, prime, &legendre)) != MP_OKAY) return res;
- if (legendre == -1) return MP_VAL; */ /* quadratic non-residue mod prime */
+ if (mp_cmp_d(n, 1) == MP_EQ) {
+ return mp_set(ret, 1);
+ }
+
+ /* prime must be odd */
+ if (mp_cmp_d(prime, 2) == MP_EQ) {
+ return MP_VAL;
+ }
+
+ /* is quadratic non-residue mod prime */
+ if ((res = mp_jacobi(n, prime, &legendre)) != MP_OKAY) {
+ return res;
+ }
+ if (legendre == -1) {
+ return MP_VAL;
+ }
if ((res = mp_init_multi(&t1, &C, &Q, &S, &Z, &M)) != MP_OKAY)
return res;
@@ -5018,61 +10430,72 @@ int mp_sqrtmod_prime(mp_int* n, mp_int* prime, mp_int* ret)
}
/* NOW: TonelliShanks algorithm */
+ if (res == MP_OKAY && done == 0) {
- if (res == MP_OKAY && done == 0) {
-
- /* factor out powers of 2 from prime-1, defining Q and S
+ /* factor out powers of 2 from prime-1, defining Q and S
* as: prime-1 = Q*2^S */
+ /* Q = prime - 1 */
res = mp_copy(prime, &Q);
if (res == MP_OKAY)
res = mp_sub_d(&Q, 1, &Q);
- /* Q = prime - 1 */
+
+ /* S = 0 */
if (res == MP_OKAY)
mp_zero(&S);
- /* S = 0 */
- while (res == MP_OKAY && mp_iseven(&Q)) {
- res = mp_div_2(&Q, &Q);
+
+ while (res == MP_OKAY && mp_iseven(&Q) == MP_YES) {
/* Q = Q / 2 */
+ res = mp_div_2(&Q, &Q);
+
+ /* S = S + 1 */
if (res == MP_OKAY)
res = mp_add_d(&S, 1, &S);
- /* S = S + 1 */
}
/* find a Z such that the Legendre symbol (Z|prime) == -1 */
+ /* Z = 2 */
if (res == MP_OKAY)
res = mp_set_int(&Z, 2);
- /* Z = 2 */
+
while (res == MP_OKAY) {
res = mp_jacobi(&Z, prime, &legendre);
if (res == MP_OKAY && legendre == -1)
break;
+
+ /* Z = Z + 1 */
if (res == MP_OKAY)
res = mp_add_d(&Z, 1, &Z);
- /* Z = Z + 1 */
}
+ /* C = Z ^ Q mod prime */
if (res == MP_OKAY)
res = mp_exptmod(&Z, &Q, prime, &C);
- /* C = Z ^ Q mod prime */
+
+ /* t1 = (Q + 1) / 2 */
if (res == MP_OKAY)
res = mp_add_d(&Q, 1, &t1);
if (res == MP_OKAY)
res = mp_div_2(&t1, &t1);
- /* t1 = (Q + 1) / 2 */
+
+ /* R = n ^ ((Q + 1) / 2) mod prime */
if (res == MP_OKAY)
res = mp_exptmod(n, &t1, prime, &R);
- /* R = n ^ ((Q + 1) / 2) mod prime */
+
+ /* T = n ^ Q mod prime */
if (res == MP_OKAY)
res = mp_exptmod(n, &Q, prime, &T);
- /* T = n ^ Q mod prime */
+
+ /* M = S */
if (res == MP_OKAY)
res = mp_copy(&S, &M);
- /* M = S */
+
if (res == MP_OKAY)
res = mp_set_int(&two, 2);
while (res == MP_OKAY && done == 0) {
res = mp_copy(&T, &t1);
+
+ /* reduce to 1 and count */
i = 0;
while (res == MP_OKAY) {
if (mp_cmp_d(&t1, 1) == MP_EQ)
@@ -5082,34 +10505,38 @@ int mp_sqrtmod_prime(mp_int* n, mp_int* prime, mp_int* ret)
i++;
}
if (res == MP_OKAY && i == 0) {
- mp_copy(&R, ret);
- res = MP_OKAY;
+ res = mp_copy(&R, ret);
done = 1;
}
if (done == 0) {
+ /* t1 = 2 ^ (M - i - 1) */
if (res == MP_OKAY)
res = mp_sub_d(&M, i, &t1);
if (res == MP_OKAY)
res = mp_sub_d(&t1, 1, &t1);
if (res == MP_OKAY)
res = mp_exptmod(&two, &t1, prime, &t1);
- /* t1 = 2 ^ (M - i - 1) */
- if (res == MP_OKAY)
- res = mp_exptmod(&C, &t1, prime, &t1);
+
/* t1 = C ^ (2 ^ (M - i - 1)) mod prime */
if (res == MP_OKAY)
- res = mp_sqrmod(&t1, prime, &C);
+ res = mp_exptmod(&C, &t1, prime, &t1);
+
/* C = (t1 * t1) mod prime */
if (res == MP_OKAY)
- res = mp_mulmod(&R, &t1, prime, &R);
+ res = mp_sqrmod(&t1, prime, &C);
+
/* R = (R * t1) mod prime */
if (res == MP_OKAY)
- res = mp_mulmod(&T, &C, prime, &T);
+ res = mp_mulmod(&R, &t1, prime, &R);
+
/* T = (T * C) mod prime */
if (res == MP_OKAY)
- mp_set(&M, i);
+ res = mp_mulmod(&T, &C, prime, &T);
+
/* M = i */
+ if (res == MP_OKAY)
+ res = mp_set(&M, i);
}
}
}
@@ -5126,19 +10553,22 @@ int mp_sqrtmod_prime(mp_int* n, mp_int* prime, mp_int* ret)
mp_clear(&two);
return res;
+#endif
}
+#endif
+#endif /* !WOLFSSL_ATECC508A && !WOLFSSL_CRYPTOCELL */
/* export public ECC key in ANSI X9.63 format compressed */
-int wc_ecc_export_x963_compressed(ecc_key* key, byte* out, word32* outLen)
+static int wc_ecc_export_x963_compressed(ecc_key* key, byte* out, word32* outLen)
{
word32 numlen;
int ret = MP_OKAY;
if (key == NULL || out == NULL || outLen == NULL)
- return ECC_BAD_ARG_E;
+ return BAD_FUNC_ARG;
- if (ecc_is_valid_idx(key->idx) == 0) {
+ if (wc_ecc_is_valid_idx(key->idx) == 0) {
return ECC_BAD_ARG_E;
}
numlen = key->dp->size;
@@ -5149,38 +10579,183 @@ int wc_ecc_export_x963_compressed(ecc_key* key, byte* out, word32* outLen)
}
/* store first byte */
- out[0] = mp_isodd(key->pubkey.y) ? 0x03 : 0x02;
+ out[0] = mp_isodd(key->pubkey.y) == MP_YES ? ECC_POINT_COMP_ODD : ECC_POINT_COMP_EVEN;
/* pad and store x */
XMEMSET(out+1, 0, numlen);
ret = mp_to_unsigned_bin(key->pubkey.x,
out+1 + (numlen - mp_unsigned_bin_size(key->pubkey.x)));
*outLen = 1 + numlen;
+
return ret;
}
+#endif /* HAVE_COMP_KEY */
+
-/* d = a - b (mod c) */
-int mp_submod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
+int wc_ecc_get_oid(word32 oidSum, const byte** oid, word32* oidSz)
{
- int res;
- mp_int t;
+ int x;
- if ((res = mp_init (&t)) != MP_OKAY) {
- return res;
- }
+ if (oidSum == 0) {
+ return BAD_FUNC_ARG;
+ }
- if ((res = mp_sub (a, b, &t)) != MP_OKAY) {
- mp_clear (&t);
- return res;
- }
- res = mp_mod (&t, c, d);
- mp_clear (&t);
+ /* find matching OID sum (based on encoded value) */
+ for (x = 0; ecc_sets[x].size != 0; x++) {
+ if (ecc_sets[x].oidSum == oidSum) {
+ int ret;
+ #ifdef HAVE_OID_ENCODING
+ ret = 0;
+ /* check cache */
+ oid_cache_t* o = &ecc_oid_cache[x];
+ if (o->oidSz == 0) {
+ o->oidSz = sizeof(o->oid);
+ ret = EncodeObjectId(ecc_sets[x].oid, ecc_sets[x].oidSz,
+ o->oid, &o->oidSz);
+ }
+ if (oidSz) {
+ *oidSz = o->oidSz;
+ }
+ if (oid) {
+ *oid = o->oid;
+ }
+ /* on success return curve id */
+ if (ret == 0) {
+ ret = ecc_sets[x].id;
+ }
+ #else
+ if (oidSz) {
+ *oidSz = ecc_sets[x].oidSz;
+ }
+ if (oid) {
+ *oid = ecc_sets[x].oid;
+ }
+ ret = ecc_sets[x].id;
+ #endif
+ return ret;
+ }
+ }
- return res;
+ return NOT_COMPILED_IN;
}
+#ifdef WOLFSSL_CUSTOM_CURVES
+int wc_ecc_set_custom_curve(ecc_key* key, const ecc_set_type* dp)
+{
+ if (key == NULL || dp == NULL) {
+ return BAD_FUNC_ARG;
+ }
-#endif /* HAVE_COMP_KEY */
+ key->idx = ECC_CUSTOM_IDX;
+ key->dp = dp;
+
+ return 0;
+}
+#endif /* WOLFSSL_CUSTOM_CURVES */
+
+#ifdef HAVE_X963_KDF
+
+static WC_INLINE void IncrementX963KdfCounter(byte* inOutCtr)
+{
+ int i;
+
+ /* in network byte order so start at end and work back */
+ for (i = 3; i >= 0; i--) {
+ if (++inOutCtr[i]) /* we're done unless we overflow */
+ return;
+ }
+}
+
+/* ASN X9.63 Key Derivation Function (SEC1) */
+int wc_X963_KDF(enum wc_HashType type, const byte* secret, word32 secretSz,
+ const byte* sinfo, word32 sinfoSz, byte* out, word32 outSz)
+{
+ int ret, i;
+ int digestSz, copySz;
+ int remaining = outSz;
+ byte* outIdx;
+ byte counter[4];
+ byte tmp[WC_MAX_DIGEST_SIZE];
+
+#ifdef WOLFSSL_SMALL_STACK
+ wc_HashAlg* hash;
+#else
+ wc_HashAlg hash[1];
+#endif
+
+ if (secret == NULL || secretSz == 0 || out == NULL)
+ return BAD_FUNC_ARG;
+
+ /* X9.63 allowed algos only */
+ if (type != WC_HASH_TYPE_SHA && type != WC_HASH_TYPE_SHA224 &&
+ type != WC_HASH_TYPE_SHA256 && type != WC_HASH_TYPE_SHA384 &&
+ type != WC_HASH_TYPE_SHA512)
+ return BAD_FUNC_ARG;
+
+ digestSz = wc_HashGetDigestSize(type);
+ if (digestSz < 0)
+ return digestSz;
+
+#ifdef WOLFSSL_SMALL_STACK
+ hash = (wc_HashAlg*)XMALLOC(sizeof(wc_HashAlg), NULL,
+ DYNAMIC_TYPE_HASHES);
+ if (hash == NULL)
+ return MEMORY_E;
+#endif
+
+ ret = wc_HashInit(hash, type);
+ if (ret != 0) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(hash, NULL, DYNAMIC_TYPE_HASHES);
+#endif
+ return ret;
+ }
+
+ outIdx = out;
+ XMEMSET(counter, 0, sizeof(counter));
+
+ for (i = 1; remaining > 0; i++) {
+
+ IncrementX963KdfCounter(counter);
+
+ ret = wc_HashUpdate(hash, type, secret, secretSz);
+ if (ret != 0) {
+ break;
+ }
+
+ ret = wc_HashUpdate(hash, type, counter, sizeof(counter));
+ if (ret != 0) {
+ break;
+ }
+
+ if (sinfo) {
+ ret = wc_HashUpdate(hash, type, sinfo, sinfoSz);
+ if (ret != 0) {
+ break;
+ }
+ }
+
+ ret = wc_HashFinal(hash, type, tmp);
+ if (ret != 0) {
+ break;
+ }
+
+ copySz = min(remaining, digestSz);
+ XMEMCPY(outIdx, tmp, copySz);
+
+ remaining -= copySz;
+ outIdx += copySz;
+ }
+
+ wc_HashFree(hash, type);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(hash, NULL, DYNAMIC_TYPE_HASHES);
+#endif
+
+ return ret;
+}
+#endif /* HAVE_X963_KDF */
#endif /* HAVE_ECC */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ed25519.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ed25519.c
index ba0dcbe53..8057caa7c 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ed25519.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ed25519.c
@@ -1,8 +1,8 @@
/* ed25519.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
/* Based On Daniel J Bernstein's ed25519 Public Domain ref10 work. */
#ifdef HAVE_CONFIG_H
@@ -32,22 +33,67 @@
#include <wolfssl/wolfcrypt/ed25519.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/hash.h>
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
+#ifdef FREESCALE_LTC_ECC
+ #include <wolfssl/wolfcrypt/port/nxp/ksdk_port.h>
+#endif
-/*
- generate an ed25519 key pair.
- returns 0 on success
- */
-int wc_ed25519_make_key(RNG* rng, int keySz, ed25519_key* key)
+#if defined(HAVE_ED25519_SIGN) || defined(HAVE_ED25519_VERIFY)
+#define ED25519CTX_SIZE 32
+
+static const byte ed25519Ctx[ED25519CTX_SIZE+1] =
+ "SigEd25519 no Ed25519 collisions";
+#endif
+
+int wc_ed25519_make_public(ed25519_key* key, unsigned char* pubKey,
+ word32 pubKeySz)
{
- byte az[64];
- int ret;
+ int ret = 0;
+ byte az[ED25519_PRV_KEY_SIZE];
+#if !defined(FREESCALE_LTC_ECC)
ge_p3 A;
+#endif
+
+ if (key == NULL || pubKeySz != ED25519_PUB_KEY_SIZE)
+ ret = BAD_FUNC_ARG;
+
+ if (ret == 0)
+ ret = wc_Sha512Hash(key->k, ED25519_KEY_SIZE, az);
+ if (ret == 0) {
+ /* apply clamp */
+ az[0] &= 248;
+ az[31] &= 63; /* same than az[31] &= 127 because of az[31] |= 64 */
+ az[31] |= 64;
+
+ #ifdef FREESCALE_LTC_ECC
+ ltc_pkha_ecc_point_t publicKey = {0};
+ publicKey.X = key->pointX;
+ publicKey.Y = key->pointY;
+ LTC_PKHA_Ed25519_PointMul(LTC_PKHA_Ed25519_BasePoint(), az,
+ ED25519_KEY_SIZE, &publicKey, kLTC_Ed25519 /* result on Ed25519 */);
+ LTC_PKHA_Ed25519_Compress(&publicKey, pubKey);
+ #else
+ ge_scalarmult_base(&A, az);
+ ge_p3_tobytes(pubKey, &A);
+ #endif
+ }
+
+ return ret;
+}
+
+/* generate an ed25519 key pair.
+ * returns 0 on success
+ */
+int wc_ed25519_make_key(WC_RNG* rng, int keySz, ed25519_key* key)
+{
+ int ret;
if (rng == NULL || key == NULL)
return BAD_FUNC_ARG;
@@ -56,142 +102,412 @@ int wc_ed25519_make_key(RNG* rng, int keySz, ed25519_key* key)
if (keySz != ED25519_KEY_SIZE)
return BAD_FUNC_ARG;
- ret = 0;
- ret |= wc_RNG_GenerateBlock(rng, key->k, 32);
- ret |= wc_Sha512Hash(key->k, 32, az);
- az[0] &= 248;
- az[31] &= 63;
- az[31] |= 64;
+ ret = wc_RNG_GenerateBlock(rng, key->k, ED25519_KEY_SIZE);
+ if (ret != 0)
+ return ret;
+
+ ret = wc_ed25519_make_public(key, key->p, ED25519_PUB_KEY_SIZE);
+ if (ret != 0) {
+ ForceZero(key->k, ED25519_KEY_SIZE);
+ return ret;
+ }
+
+ /* put public key after private key, on the same buffer */
+ XMEMMOVE(key->k + ED25519_KEY_SIZE, key->p, ED25519_PUB_KEY_SIZE);
- ge_scalarmult_base(&A, az);
- ge_p3_tobytes(key->p, &A);
- XMEMMOVE(key->k + 32, key->p, 32);
+ key->pubKeySet = 1;
return ret;
}
+#ifdef HAVE_ED25519_SIGN
/*
- in contains the message to sign
- inlen is the length of the message to sign
- out is the buffer to write the signature
- outlen [in/out] input size of out buf
- output gets set as the final length of out
- key is the ed25519 key to use when signing
+ in contains the message to sign
+ inLen is the length of the message to sign
+ out is the buffer to write the signature
+ outLen [in/out] input size of out buf
+ output gets set as the final length of out
+ key is the ed25519 key to use when signing
+ type one of Ed25519, Ed25519ctx or Ed25519ph
+ context extra signing data
+ contextLen length of extra signing data
return 0 on success
*/
-int wc_ed25519_sign_msg(const byte* in, word32 inlen, byte* out,
- word32 *outlen, ed25519_key* key)
+static int ed25519_sign_msg(const byte* in, word32 inLen, byte* out,
+ word32 *outLen, ed25519_key* key, byte type,
+ const byte* context, byte contextLen)
{
+#ifdef FREESCALE_LTC_ECC
+ byte tempBuf[ED25519_PRV_KEY_SIZE];
+#else
ge_p3 R;
- byte nonce[SHA512_DIGEST_SIZE];
- byte hram[SHA512_DIGEST_SIZE];
- byte az[64];
- word32 sigSz;
- Sha512 sha;
- int ret = 0;
+#endif
+ byte nonce[WC_SHA512_DIGEST_SIZE];
+ byte hram[WC_SHA512_DIGEST_SIZE];
+ byte az[ED25519_PRV_KEY_SIZE];
+ wc_Sha512 sha;
+ int ret;
/* sanity check on arguments */
- if (in == NULL || out == NULL || outlen == NULL || key == NULL)
+ if (in == NULL || out == NULL || outLen == NULL || key == NULL ||
+ (context == NULL && contextLen != 0)) {
+ return BAD_FUNC_ARG;
+ }
+ if (!key->pubKeySet)
return BAD_FUNC_ARG;
/* check and set up out length */
- ret = 0;
- sigSz = wc_ed25519_sig_size(key);
- if (*outlen < sigSz)
- return BAD_FUNC_ARG;
- *outlen = sigSz;
+ if (*outLen < ED25519_SIG_SIZE) {
+ *outLen = ED25519_SIG_SIZE;
+ return BUFFER_E;
+ }
+ *outLen = ED25519_SIG_SIZE;
/* step 1: create nonce to use where nonce is r in
r = H(h_b, ... ,h_2b-1,M) */
- ret |= wc_Sha512Hash(key->k,32,az);
+ ret = wc_Sha512Hash(key->k, ED25519_KEY_SIZE, az);
+ if (ret != 0)
+ return ret;
+
+ /* apply clamp */
az[0] &= 248;
- az[31] &= 63;
+ az[31] &= 63; /* same than az[31] &= 127 because of az[31] |= 64 */
az[31] |= 64;
- ret |= wc_InitSha512(&sha);
- ret |= wc_Sha512Update(&sha, az + 32, 32);
- ret |= wc_Sha512Update(&sha, in, inlen);
- ret |= wc_Sha512Final(&sha, nonce);
+
+ ret = wc_InitSha512(&sha);
+ if (ret != 0)
+ return ret;
+ if (type == Ed25519ctx || type == Ed25519ph) {
+ ret = wc_Sha512Update(&sha, ed25519Ctx, ED25519CTX_SIZE);
+ if (ret == 0)
+ ret = wc_Sha512Update(&sha, &type, sizeof(type));
+ if (ret == 0)
+ ret = wc_Sha512Update(&sha, &contextLen, sizeof(contextLen));
+ if (ret == 0 && context != NULL)
+ ret = wc_Sha512Update(&sha, context, contextLen);
+ }
+ if (ret == 0)
+ ret = wc_Sha512Update(&sha, az + ED25519_KEY_SIZE, ED25519_KEY_SIZE);
+ if (ret == 0)
+ ret = wc_Sha512Update(&sha, in, inLen);
+ if (ret == 0)
+ ret = wc_Sha512Final(&sha, nonce);
+ wc_Sha512Free(&sha);
+ if (ret != 0)
+ return ret;
+
+#ifdef FREESCALE_LTC_ECC
+ ltc_pkha_ecc_point_t ltcPoint = {0};
+ ltcPoint.X = &tempBuf[0];
+ ltcPoint.Y = &tempBuf[32];
+ LTC_PKHA_sc_reduce(nonce);
+ LTC_PKHA_Ed25519_PointMul(LTC_PKHA_Ed25519_BasePoint(), nonce,
+ ED25519_KEY_SIZE, &ltcPoint, kLTC_Ed25519 /* result on Ed25519 */);
+ LTC_PKHA_Ed25519_Compress(&ltcPoint, out);
+#else
sc_reduce(nonce);
/* step 2: computing R = rB where rB is the scalar multiplication of
r and B */
ge_scalarmult_base(&R,nonce);
ge_p3_tobytes(out,&R);
+#endif
/* step 3: hash R + public key + message getting H(R,A,M) then
creating S = (r + H(R,A,M)a) mod l */
- ret |= wc_InitSha512(&sha);
- ret |= wc_Sha512Update(&sha, out, 32);
- ret |= wc_Sha512Update(&sha, key->p, 32);
- ret |= wc_Sha512Update(&sha, in, inlen);
- ret |= wc_Sha512Final(&sha, hram);
+ ret = wc_InitSha512(&sha);
+ if (ret != 0)
+ return ret;
+ if (type == Ed25519ctx || type == Ed25519ph) {
+ ret = wc_Sha512Update(&sha, ed25519Ctx, ED25519CTX_SIZE);
+ if (ret == 0)
+ ret = wc_Sha512Update(&sha, &type, sizeof(type));
+ if (ret == 0)
+ ret = wc_Sha512Update(&sha, &contextLen, sizeof(contextLen));
+ if (ret == 0 && context != NULL)
+ ret = wc_Sha512Update(&sha, context, contextLen);
+ }
+ if (ret == 0)
+ ret = wc_Sha512Update(&sha, out, ED25519_SIG_SIZE/2);
+ if (ret == 0)
+ ret = wc_Sha512Update(&sha, key->p, ED25519_PUB_KEY_SIZE);
+ if (ret == 0)
+ ret = wc_Sha512Update(&sha, in, inLen);
+ if (ret == 0)
+ ret = wc_Sha512Final(&sha, hram);
+ wc_Sha512Free(&sha);
+ if (ret != 0)
+ return ret;
+
+#ifdef FREESCALE_LTC_ECC
+ LTC_PKHA_sc_reduce(hram);
+ LTC_PKHA_sc_muladd(out + (ED25519_SIG_SIZE/2), hram, az, nonce);
+#else
sc_reduce(hram);
- sc_muladd(out + 32, hram, az, nonce);
+ sc_muladd(out + (ED25519_SIG_SIZE/2), hram, az, nonce);
+#endif
return ret;
}
+/*
+ in contains the message to sign
+ inLen is the length of the message to sign
+ out is the buffer to write the signature
+ outLen [in/out] input size of out buf
+ output gets set as the final length of out
+ key is the ed25519 key to use when signing
+ return 0 on success
+ */
+int wc_ed25519_sign_msg(const byte* in, word32 inLen, byte* out,
+ word32 *outLen, ed25519_key* key)
+{
+ return ed25519_sign_msg(in, inLen, out, outLen, key, (byte)Ed25519, NULL, 0);
+}
+
+/*
+ in contains the message to sign
+ inLen is the length of the message to sign
+ out is the buffer to write the signature
+ outLen [in/out] input size of out buf
+ output gets set as the final length of out
+ key is the ed25519 key to use when signing
+ context extra signing data
+ contextLen length of extra signing data
+ return 0 on success
+ */
+int wc_ed25519ctx_sign_msg(const byte* in, word32 inLen, byte* out,
+ word32 *outLen, ed25519_key* key,
+ const byte* context, byte contextLen)
+{
+ return ed25519_sign_msg(in, inLen, out, outLen, key, Ed25519ctx, context,
+ contextLen);
+}
+
+/*
+ hash contains the SHA-512 hash of the message to sign
+ hashLen is the length of the SHA-512 hash of the message to sign
+ out is the buffer to write the signature
+ outLen [in/out] input size of out buf
+ output gets set as the final length of out
+ key is the ed25519 key to use when signing
+ context extra signing data
+ contextLen length of extra signing data
+ return 0 on success
+ */
+int wc_ed25519ph_sign_hash(const byte* hash, word32 hashLen, byte* out,
+ word32 *outLen, ed25519_key* key,
+ const byte* context, byte contextLen)
+{
+ return ed25519_sign_msg(hash, hashLen, out, outLen, key, Ed25519ph, context,
+ contextLen);
+}
+
+/*
+ in contains the message to sign
+ inLen is the length of the message to sign
+ out is the buffer to write the signature
+ outLen [in/out] input size of out buf
+ output gets set as the final length of out
+ key is the ed25519 key to use when signing
+ context extra signing data
+ contextLen length of extra signing data
+ return 0 on success
+ */
+int wc_ed25519ph_sign_msg(const byte* in, word32 inLen, byte* out,
+ word32 *outLen, ed25519_key* key,
+ const byte* context, byte contextLen)
+{
+ int ret;
+ byte hash[WC_SHA512_DIGEST_SIZE];
+
+ ret = wc_Sha512Hash(in, inLen, hash);
+ if (ret != 0)
+ return ret;
+
+ return wc_ed25519ph_sign_hash(hash, sizeof(hash), out, outLen, key, context,
+ contextLen);
+}
+#endif /* HAVE_ED25519_SIGN */
+
+#ifdef HAVE_ED25519_VERIFY
/*
sig is array of bytes containing the signature
- siglen is the length of sig byte array
+ sigLen is the length of sig byte array
msg the array of bytes containing the message
- msglen length of msg array
- stat will be 1 on successful verify and 0 on unsuccessful
+ msgLen length of msg array
+ res will be 1 on successful verify and 0 on unsuccessful
+ key Ed25519 public key
+ return 0 and res of 1 on success
*/
-int wc_ed25519_verify_msg(byte* sig, word32 siglen, const byte* msg,
- word32 msglen, int* stat, ed25519_key* key)
+static int ed25519_verify_msg(const byte* sig, word32 sigLen, const byte* msg,
+ word32 msgLen, int* res, ed25519_key* key,
+ byte type, const byte* context, byte contextLen)
{
- byte rcheck[32];
- byte h[SHA512_DIGEST_SIZE];
+ byte rcheck[ED25519_KEY_SIZE];
+ byte h[WC_SHA512_DIGEST_SIZE];
+#ifndef FREESCALE_LTC_ECC
ge_p3 A;
ge_p2 R;
- word32 sigSz;
+#endif
int ret;
- Sha512 sha;
+ wc_Sha512 sha;
/* sanity check on arguments */
- if (sig == NULL || msg == NULL || stat == NULL || key == NULL)
+ if (sig == NULL || msg == NULL || res == NULL || key == NULL ||
+ (context == NULL && contextLen != 0)) {
return BAD_FUNC_ARG;
+ }
- ret = 0;
- *stat = 0;
- sigSz = wc_ed25519_size(key);
+ /* set verification failed by default */
+ *res = 0;
/* check on basics needed to verify signature */
- if (siglen < sigSz)
- return BAD_FUNC_ARG;
- if (sig[63] & 224)
+ if (sigLen < ED25519_SIG_SIZE || (sig[ED25519_SIG_SIZE-1] & 224))
return BAD_FUNC_ARG;
/* uncompress A (public key), test if valid, and negate it */
+#ifndef FREESCALE_LTC_ECC
if (ge_frombytes_negate_vartime(&A, key->p) != 0)
return BAD_FUNC_ARG;
+#endif
/* find H(R,A,M) and store it as h */
- ret |= wc_InitSha512(&sha);
- ret |= wc_Sha512Update(&sha, sig, 32);
- ret |= wc_Sha512Update(&sha, key->p, 32);
- ret |= wc_Sha512Update(&sha, msg, msglen);
- ret |= wc_Sha512Final(&sha, h);
+ ret = wc_InitSha512(&sha);
+ if (ret != 0)
+ return ret;
+ if (type == Ed25519ctx || type == Ed25519ph) {
+ ret = wc_Sha512Update(&sha, ed25519Ctx, ED25519CTX_SIZE);
+ if (ret == 0)
+ ret = wc_Sha512Update(&sha, &type, sizeof(type));
+ if (ret == 0)
+ ret = wc_Sha512Update(&sha, &contextLen, sizeof(contextLen));
+ if (ret == 0 && context != NULL)
+ ret = wc_Sha512Update(&sha, context, contextLen);
+ }
+ if (ret == 0)
+ ret = wc_Sha512Update(&sha, sig, ED25519_SIG_SIZE/2);
+ if (ret == 0)
+ ret = wc_Sha512Update(&sha, key->p, ED25519_PUB_KEY_SIZE);
+ if (ret == 0)
+ ret = wc_Sha512Update(&sha, msg, msgLen);
+ if (ret == 0)
+ ret = wc_Sha512Final(&sha, h);
+ wc_Sha512Free(&sha);
+ if (ret != 0)
+ return ret;
+
+#ifdef FREESCALE_LTC_ECC
+ LTC_PKHA_sc_reduce(h);
+ LTC_PKHA_SignatureForVerify(rcheck, h, sig + (ED25519_SIG_SIZE/2), key);
+#else
sc_reduce(h);
/*
Uses a fast single-signature verification SB = R + H(R,A,M)A becomes
SB - H(R,A,M)A saving decompression of R
*/
- ret |= ge_double_scalarmult_vartime(&R, h, &A, sig + 32);
+ ret = ge_double_scalarmult_vartime(&R, h, &A, sig + (ED25519_SIG_SIZE/2));
+ if (ret != 0)
+ return ret;
+
ge_tobytes(rcheck, &R);
+#endif /* FREESCALE_LTC_ECC */
/* comparison of R created to R in sig */
- ret |= ConstantCompare(rcheck, sig, 32);
+ ret = ConstantCompare(rcheck, sig, ED25519_SIG_SIZE/2);
+ if (ret != 0)
+ return SIG_VERIFY_E;
- *stat = (ret == 0)? 1: 0;
+ /* set the verification status */
+ *res = 1;
return ret;
}
+/*
+ sig is array of bytes containing the signature
+ sigLen is the length of sig byte array
+ msg the array of bytes containing the message
+ msgLen length of msg array
+ res will be 1 on successful verify and 0 on unsuccessful
+ key Ed25519 public key
+ return 0 and res of 1 on success
+*/
+int wc_ed25519_verify_msg(const byte* sig, word32 sigLen, const byte* msg,
+ word32 msgLen, int* res, ed25519_key* key)
+{
+ return ed25519_verify_msg(sig, sigLen, msg, msgLen, res, key, (byte)Ed25519,
+ NULL, 0);
+}
+
+/*
+ sig is array of bytes containing the signature
+ sigLen is the length of sig byte array
+ msg the array of bytes containing the message
+ msgLen length of msg array
+ res will be 1 on successful verify and 0 on unsuccessful
+ key Ed25519 public key
+ context extra sigining data
+ contextLen length of extra sigining data
+ return 0 and res of 1 on success
+*/
+int wc_ed25519ctx_verify_msg(const byte* sig, word32 sigLen, const byte* msg,
+ word32 msgLen, int* res, ed25519_key* key,
+ const byte* context, byte contextLen)
+{
+ return ed25519_verify_msg(sig, sigLen, msg, msgLen, res, key, Ed25519ctx,
+ context, contextLen);
+}
+
+/*
+ sig is array of bytes containing the signature
+ sigLen is the length of sig byte array
+ hash the array of bytes containing the SHA-512 hash of the message
+ hashLen length of hash array
+ res will be 1 on successful verify and 0 on unsuccessful
+ key Ed25519 public key
+ context extra sigining data
+ contextLen length of extra sigining data
+ return 0 and res of 1 on success
+*/
+int wc_ed25519ph_verify_hash(const byte* sig, word32 sigLen, const byte* hash,
+ word32 hashLen, int* res, ed25519_key* key,
+ const byte* context, byte contextLen)
+{
+ return ed25519_verify_msg(sig, sigLen, hash, hashLen, res, key, Ed25519ph,
+ context, contextLen);
+}
+
+/*
+ sig is array of bytes containing the signature
+ sigLen is the length of sig byte array
+ msg the array of bytes containing the message
+ msgLen length of msg array
+ res will be 1 on successful verify and 0 on unsuccessful
+ key Ed25519 public key
+ context extra sigining data
+ contextLen length of extra sigining data
+ return 0 and res of 1 on success
+*/
+int wc_ed25519ph_verify_msg(const byte* sig, word32 sigLen, const byte* msg,
+ word32 msgLen, int* res, ed25519_key* key,
+ const byte* context, byte contextLen)
+{
+ int ret;
+ byte hash[WC_SHA512_DIGEST_SIZE];
+
+ ret = wc_Sha512Hash(msg, msgLen, hash);
+ if (ret != 0)
+ return ret;
+
+ return wc_ed25519ph_verify_hash(sig, sigLen, hash, sizeof(hash), res, key,
+ context, contextLen);
+}
+#endif /* HAVE_ED25519_VERIFY */
+
/* initialize information and memory for key */
int wc_ed25519_init(ed25519_key* key)
@@ -201,6 +517,10 @@ int wc_ed25519_init(ed25519_key* key)
XMEMSET(key, 0, sizeof(ed25519_key));
+#ifndef FREESCALE_LTC_ECC
+ fe_init();
+#endif
+
return 0;
}
@@ -215,6 +535,8 @@ void wc_ed25519_free(ed25519_key* key)
}
+#ifdef HAVE_ED25519_KEY_EXPORT
+
/*
outLen should contain the size of out buffer when input. outLen is than set
to the final output length.
@@ -222,24 +544,25 @@ void wc_ed25519_free(ed25519_key* key)
*/
int wc_ed25519_export_public(ed25519_key* key, byte* out, word32* outLen)
{
- word32 keySz;
-
/* sanity check on arguments */
if (key == NULL || out == NULL || outLen == NULL)
return BAD_FUNC_ARG;
- keySz = wc_ed25519_size(key);
- if (*outLen < keySz) {
- *outLen = keySz;
+ if (*outLen < ED25519_PUB_KEY_SIZE) {
+ *outLen = ED25519_PUB_KEY_SIZE;
return BUFFER_E;
}
- *outLen = keySz;
- XMEMCPY(out, key->p, keySz);
+
+ *outLen = ED25519_PUB_KEY_SIZE;
+ XMEMCPY(out, key->p, ED25519_PUB_KEY_SIZE);
return 0;
}
+#endif /* HAVE_ED25519_KEY_EXPORT */
+
+#ifdef HAVE_ED25519_KEY_IMPORT
/*
Imports a compressed/uncompressed public key.
in the byte array containing the public key
@@ -248,37 +571,65 @@ int wc_ed25519_export_public(ed25519_key* key, byte* out, word32* outLen)
*/
int wc_ed25519_import_public(const byte* in, word32 inLen, ed25519_key* key)
{
- word32 keySz;
int ret;
/* sanity check on arguments */
if (in == NULL || key == NULL)
return BAD_FUNC_ARG;
- keySz = wc_ed25519_size(key);
-
- if (inLen < keySz)
+ if (inLen < ED25519_PUB_KEY_SIZE)
return BAD_FUNC_ARG;
/* compressed prefix according to draft
http://www.ietf.org/id/draft-koch-eddsa-for-openpgp-02.txt */
- if (in[0] == 0x40) {
+ if (in[0] == 0x40 && inLen > ED25519_PUB_KEY_SIZE) {
/* key is stored in compressed format so just copy in */
- XMEMCPY(key->p, (in + 1), keySz);
+ XMEMCPY(key->p, (in + 1), ED25519_PUB_KEY_SIZE);
+#ifdef FREESCALE_LTC_ECC
+ /* recover X coordinate */
+ ltc_pkha_ecc_point_t pubKey;
+ pubKey.X = key->pointX;
+ pubKey.Y = key->pointY;
+ LTC_PKHA_Ed25519_PointDecompress(key->p, ED25519_PUB_KEY_SIZE, &pubKey);
+#endif
+ key->pubKeySet = 1;
return 0;
}
/* importing uncompressed public key */
- if (in[0] == 0x04) {
+ if (in[0] == 0x04 && inLen > 2*ED25519_PUB_KEY_SIZE) {
+#ifdef FREESCALE_LTC_ECC
+ /* reverse bytes for little endian byte order */
+ for (int i = 0; i < ED25519_KEY_SIZE; i++)
+ {
+ key->pointX[i] = *(in + ED25519_KEY_SIZE - i);
+ key->pointY[i] = *(in + 2*ED25519_KEY_SIZE - i);
+ }
+ XMEMCPY(key->p, key->pointY, ED25519_KEY_SIZE);
+ key->pubKeySet = 1;
+ ret = 0;
+#else
/* pass in (x,y) and store compressed key */
- ret = ge_compress_key(key->p, (in+1), (in+1+keySz), keySz);
+ ret = ge_compress_key(key->p, in+1,
+ in+1+ED25519_PUB_KEY_SIZE, ED25519_PUB_KEY_SIZE);
+ if (ret == 0)
+ key->pubKeySet = 1;
+#endif /* FREESCALE_LTC_ECC */
return ret;
}
/* if not specified compressed or uncompressed check key size
if key size is equal to compressed key size copy in key */
- if (inLen == keySz) {
- XMEMCPY(key->p, in, keySz);
+ if (inLen == ED25519_PUB_KEY_SIZE) {
+ XMEMCPY(key->p, in, ED25519_PUB_KEY_SIZE);
+#ifdef FREESCALE_LTC_ECC
+ /* recover X coordinate */
+ ltc_pkha_ecc_point_t pubKey;
+ pubKey.X = key->pointX;
+ pubKey.Y = key->pointY;
+ LTC_PKHA_Ed25519_PointDecompress(key->p, ED25519_PUB_KEY_SIZE, &pubKey);
+#endif
+ key->pubKeySet = 1;
return 0;
}
@@ -288,82 +639,175 @@ int wc_ed25519_import_public(const byte* in, word32 inLen, ed25519_key* key)
/*
+ For importing a private key.
+ */
+int wc_ed25519_import_private_only(const byte* priv, word32 privSz,
+ ed25519_key* key)
+{
+ /* sanity check on arguments */
+ if (priv == NULL || key == NULL)
+ return BAD_FUNC_ARG;
+
+ /* key size check */
+ if (privSz < ED25519_KEY_SIZE)
+ return BAD_FUNC_ARG;
+
+ XMEMCPY(key->k, priv, ED25519_KEY_SIZE);
+
+ return 0;
+}
+
+/*
For importing a private key and its associated public key.
*/
int wc_ed25519_import_private_key(const byte* priv, word32 privSz,
const byte* pub, word32 pubSz, ed25519_key* key)
{
- word32 keySz;
int ret;
/* sanity check on arguments */
if (priv == NULL || pub == NULL || key == NULL)
return BAD_FUNC_ARG;
- keySz = wc_ed25519_size(key);
-
/* key size check */
- if (privSz < keySz || pubSz < keySz)
+ if (privSz < ED25519_KEY_SIZE || pubSz < ED25519_PUB_KEY_SIZE)
return BAD_FUNC_ARG;
- XMEMCPY(key->k, priv, keySz);
+ /* import public key */
ret = wc_ed25519_import_public(pub, pubSz, key);
- XMEMCPY((key->k + keySz), key->p, keySz);
+ if (ret != 0)
+ return ret;
+
+ /* make the private key (priv + pub) */
+ XMEMCPY(key->k, priv, ED25519_KEY_SIZE);
+ XMEMCPY(key->k + ED25519_KEY_SIZE, key->p, ED25519_PUB_KEY_SIZE);
return ret;
}
+#endif /* HAVE_ED25519_KEY_IMPORT */
+
+
+#ifdef HAVE_ED25519_KEY_EXPORT
/*
- outLen should contain the size of out buffer when input. outLen is than set
- to the final output length.
- returns 0 on success
+ export private key only (secret part so 32 bytes)
+ outLen should contain the size of out buffer when input. outLen is than set
+ to the final output length.
+ returns 0 on success
*/
int wc_ed25519_export_private_only(ed25519_key* key, byte* out, word32* outLen)
{
- word32 keySz;
+ /* sanity checks on arguments */
+ if (key == NULL || out == NULL || outLen == NULL)
+ return BAD_FUNC_ARG;
+
+ if (*outLen < ED25519_KEY_SIZE) {
+ *outLen = ED25519_KEY_SIZE;
+ return BUFFER_E;
+ }
+
+ *outLen = ED25519_KEY_SIZE;
+ XMEMCPY(out, key->k, ED25519_KEY_SIZE);
+ return 0;
+}
+
+/*
+ export private key, including public part
+ outLen should contain the size of out buffer when input. outLen is than set
+ to the final output length.
+ returns 0 on success
+ */
+int wc_ed25519_export_private(ed25519_key* key, byte* out, word32* outLen)
+{
/* sanity checks on arguments */
if (key == NULL || out == NULL || outLen == NULL)
return BAD_FUNC_ARG;
- keySz = wc_ed25519_size(key);
- if (*outLen < keySz) {
- *outLen = keySz;
+ if (*outLen < ED25519_PRV_KEY_SIZE) {
+ *outLen = ED25519_PRV_KEY_SIZE;
return BUFFER_E;
}
- *outLen = keySz;
- XMEMCPY(out, key->k, keySz);
+
+ *outLen = ED25519_PRV_KEY_SIZE;
+ XMEMCPY(out, key->k, ED25519_PRV_KEY_SIZE);
return 0;
}
+/* export full private key and public key
+ return 0 on success
+ */
+int wc_ed25519_export_key(ed25519_key* key,
+ byte* priv, word32 *privSz,
+ byte* pub, word32 *pubSz)
+{
+ int ret;
+
+ /* export 'full' private part */
+ ret = wc_ed25519_export_private(key, priv, privSz);
+ if (ret != 0)
+ return ret;
-/* is the compressed key size in bytes */
-int wc_ed25519_size(ed25519_key* key)
+ /* export public part */
+ ret = wc_ed25519_export_public(key, pub, pubSz);
+
+ return ret;
+}
+
+#endif /* HAVE_ED25519_KEY_EXPORT */
+
+/* check the private and public keys match */
+int wc_ed25519_check_key(ed25519_key* key)
{
- word32 keySz;
+ int ret = 0;
+ unsigned char pubKey[ED25519_PUB_KEY_SIZE];
+
+ if (!key->pubKeySet)
+ ret = PUBLIC_KEY_E;
+ if (ret == 0)
+ ret = wc_ed25519_make_public(key, pubKey, sizeof(pubKey));
+ if (ret == 0 && XMEMCMP(pubKey, key->p, ED25519_PUB_KEY_SIZE) != 0)
+ ret = PUBLIC_KEY_E;
+ return ret;
+}
+
+/* returns the private key size (secret only) in bytes */
+int wc_ed25519_size(ed25519_key* key)
+{
if (key == NULL)
return BAD_FUNC_ARG;
- keySz = ED25519_KEY_SIZE;
+ return ED25519_KEY_SIZE;
+}
- return keySz;
+/* returns the private key size (secret + public) in bytes */
+int wc_ed25519_priv_size(ed25519_key* key)
+{
+ if (key == NULL)
+ return BAD_FUNC_ARG;
+
+ return ED25519_PRV_KEY_SIZE;
}
+/* returns the compressed key size in bytes (public key) */
+int wc_ed25519_pub_size(ed25519_key* key)
+{
+ if (key == NULL)
+ return BAD_FUNC_ARG;
+
+ return ED25519_PUB_KEY_SIZE;
+}
/* returns the size of signature in bytes */
int wc_ed25519_sig_size(ed25519_key* key)
{
- word32 sigSz;
-
if (key == NULL)
return BAD_FUNC_ARG;
- sigSz = ED25519_SIG_SIZE;
-
- return sigSz;
+ return ED25519_SIG_SIZE;
}
#endif /* HAVE_ED25519 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ed448.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ed448.c
new file mode 100644
index 000000000..125ee3852
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ed448.c
@@ -0,0 +1,917 @@
+/* ed448.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Implemented to: RFC 8032 */
+
+/* Based On Daniel J Bernstein's ed25519 Public Domain ref10 work.
+ * Reworked for curve448 by Sean Parkinson.
+ */
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+/* in case user set HAVE_ED448 there */
+#include <wolfssl/wolfcrypt/settings.h>
+
+#ifdef HAVE_ED448
+
+#include <wolfssl/wolfcrypt/ed448.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/hash.h>
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+#if defined(HAVE_ED448_SIGN) || defined(HAVE_ED448_VERIFY)
+/* Size of context bytes to use with hash when signing and verifying. */
+#define ED448CTX_SIZE 8
+/* Context to pass to hash when signing and verifying. */
+static const byte ed448Ctx[ED448CTX_SIZE+1] = "SigEd448";
+#endif
+
+/* Derive the public key for the private key.
+ *
+ * key [in] Ed448 key object.
+ * pubKey [in] Byte array to hold te public key.
+ * pubKeySz [in] Size of the array in bytes.
+ * returns BAD_FUNC_ARG when key is NULL or pubKeySz is not equal to
+ * ED448_PUB_KEY_SIZE,
+ * other -ve value on hash failure,
+ * 0 otherwise.
+ */
+int wc_ed448_make_public(ed448_key* key, unsigned char* pubKey, word32 pubKeySz)
+{
+ int ret = 0;
+ byte az[ED448_PRV_KEY_SIZE];
+ ge448_p2 A;
+
+ if ((key == NULL) || (pubKeySz != ED448_PUB_KEY_SIZE)) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (ret == 0) {
+ ret = wc_Shake256Hash(key->k, ED448_KEY_SIZE, az, sizeof(az));
+ }
+ if (ret == 0) {
+ /* apply clamp */
+ az[0] &= 0xfc;
+ az[55] |= 0x80;
+ az[56] = 0x00;
+
+ ge448_scalarmult_base(&A, az);
+ ge448_to_bytes(pubKey, &A);
+ }
+
+ return ret;
+}
+
+/* Make a new ed448 private/public key.
+ *
+ * rng [in] Random number generator.
+ * keysize [in] Size of the key to generate.
+ * key [in] Ed448 key object.
+ * returns BAD_FUNC_ARG when rng or key is NULL or keySz is not equal to
+ * ED448_KEY_SIZE,
+ * other -ve value on random number or hash failure,
+ * 0 otherwise.
+ */
+int wc_ed448_make_key(WC_RNG* rng, int keySz, ed448_key* key)
+{
+ int ret = 0;
+
+ if ((rng == NULL) || (key == NULL)) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ /* ed448 has 57 byte key sizes */
+ if ((ret == 0) && (keySz != ED448_KEY_SIZE)) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (ret == 0) {
+ ret = wc_RNG_GenerateBlock(rng, key->k, ED448_KEY_SIZE);
+ }
+ if (ret == 0) {
+ ret = wc_ed448_make_public(key, key->p, ED448_PUB_KEY_SIZE);
+ if (ret != 0) {
+ ForceZero(key->k, ED448_KEY_SIZE);
+ }
+ }
+ if (ret == 0) {
+ /* put public key after private key, on the same buffer */
+ XMEMMOVE(key->k + ED448_KEY_SIZE, key->p, ED448_PUB_KEY_SIZE);
+
+ key->pubKeySet = 1;
+ }
+
+ return ret;
+}
+
+
+#ifdef HAVE_ED448_SIGN
+/* Sign the message using the ed448 private key.
+ *
+ * in [in] Message to sign.
+ * inLen [in] Length of the message in bytes.
+ * out [in] Buffer to write signature into.
+ * outLen [in/out] On in, size of buffer.
+ * On out, the length of the signature in bytes.
+ * key [in] Ed448 key to use when signing
+ * type [in] Type of signature to perform: Ed448 or Ed448ph
+ * context [in] Context of signing.
+ * contextLen [in] Length of context in bytes.
+ * returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and
+ * context is not NULL or public key not set,
+ * BUFFER_E when outLen is less than ED448_SIG_SIZE,
+ * other -ve values when hash fails,
+ * 0 otherwise.
+ */
+static int ed448_sign_msg(const byte* in, word32 inLen, byte* out,
+ word32 *outLen, ed448_key* key, byte type,
+ const byte* context, byte contextLen)
+{
+ ge448_p2 R;
+ byte nonce[ED448_SIG_SIZE];
+ byte hram[ED448_SIG_SIZE];
+ byte az[ED448_PRV_KEY_SIZE];
+ wc_Shake sha;
+ int ret = 0;
+
+ /* sanity check on arguments */
+ if ((in == NULL) || (out == NULL) || (outLen == NULL) || (key == NULL) ||
+ ((context == NULL) && (contextLen != 0))) {
+ ret = BAD_FUNC_ARG;
+ }
+ if ((ret == 0) && (!key->pubKeySet)) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ /* check and set up out length */
+ if ((ret == 0) && (*outLen < ED448_SIG_SIZE)) {
+ *outLen = ED448_SIG_SIZE;
+ ret = BUFFER_E;
+ }
+
+ if (ret == 0) {
+ *outLen = ED448_SIG_SIZE;
+
+ /* step 1: create nonce to use where nonce is r in
+ r = H(h_b, ... ,h_2b-1,M) */
+ ret = wc_Shake256Hash(key->k, ED448_KEY_SIZE, az, sizeof(az));
+ }
+ if (ret == 0) {
+ /* apply clamp */
+ az[0] &= 0xfc;
+ az[55] |= 0x80;
+ az[56] = 0x00;
+
+ ret = wc_InitShake256(&sha, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_Shake256_Update(&sha, ed448Ctx, ED448CTX_SIZE);
+ }
+ if (ret == 0) {
+ ret = wc_Shake256_Update(&sha, &type, sizeof(type));
+ }
+ if (ret == 0) {
+ ret = wc_Shake256_Update(&sha, &contextLen, sizeof(contextLen));
+ }
+ if (ret == 0 && context != NULL) {
+ ret = wc_Shake256_Update(&sha, context, contextLen);
+ }
+ if (ret == 0) {
+ ret = wc_Shake256_Update(&sha, az + ED448_KEY_SIZE, ED448_KEY_SIZE);
+ }
+ if (ret == 0) {
+ ret = wc_Shake256_Update(&sha, in, inLen);
+ }
+ if (ret == 0) {
+ ret = wc_Shake256_Final(&sha, nonce, sizeof(nonce));
+ }
+ wc_Shake256_Free(&sha);
+ }
+ if (ret == 0) {
+ sc448_reduce(nonce);
+
+ /* step 2: computing R = rB where rB is the scalar multiplication of
+ r and B */
+ ge448_scalarmult_base(&R,nonce);
+ ge448_to_bytes(out,&R);
+
+ /* step 3: hash R + public key + message getting H(R,A,M) then
+ creating S = (r + H(R,A,M)a) mod l */
+ ret = wc_InitShake256(&sha, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_Shake256_Update(&sha, ed448Ctx, ED448CTX_SIZE);
+ if (ret == 0) {
+ ret = wc_Shake256_Update(&sha, &type, sizeof(type));
+ }
+ if (ret == 0) {
+ ret = wc_Shake256_Update(&sha, &contextLen, sizeof(contextLen));
+ }
+ if (ret == 0 && context != NULL) {
+ ret = wc_Shake256_Update(&sha, context, contextLen);
+ }
+ if (ret == 0) {
+ ret = wc_Shake256_Update(&sha, out, ED448_SIG_SIZE/2);
+ }
+ if (ret == 0) {
+ ret = wc_Shake256_Update(&sha, key->p, ED448_PUB_KEY_SIZE);
+ }
+ if (ret == 0) {
+ ret = wc_Shake256_Update(&sha, in, inLen);
+ }
+ if (ret == 0) {
+ ret = wc_Shake256_Final(&sha, hram, sizeof(hram));
+ }
+ wc_Shake256_Free(&sha);
+ }
+ }
+
+ if (ret == 0) {
+ sc448_reduce(hram);
+ sc448_muladd(out + (ED448_SIG_SIZE/2), hram, az, nonce);
+ }
+
+ return ret;
+}
+
+/* Sign the message using the ed448 private key.
+ * Signature type is Ed448.
+ *
+ * in [in] Message to sign.
+ * inLen [in] Length of the message in bytes.
+ * out [in] Buffer to write signature into.
+ * outLen [in/out] On in, size of buffer.
+ * On out, the length of the signature in bytes.
+ * key [in] Ed448 key to use when signing
+ * context [in] Context of signing.
+ * contextLen [in] Length of context in bytes.
+ * returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and
+ * context is not NULL or public key not set,
+ * BUFFER_E when outLen is less than ED448_SIG_SIZE,
+ * other -ve values when hash fails,
+ * 0 otherwise.
+ */
+int wc_ed448_sign_msg(const byte* in, word32 inLen, byte* out, word32 *outLen,
+ ed448_key* key, const byte* context, byte contextLen)
+{
+ return ed448_sign_msg(in, inLen, out, outLen, key, Ed448, context,
+ contextLen);
+}
+
+/* Sign the hash using the ed448 private key.
+ * Signature type is Ed448ph.
+ *
+ * hash [in] Hash of message to sign.
+ * hashLen [in] Length of hash of message in bytes.
+ * out [in] Buffer to write signature into.
+ * outLen [in/out] On in, size of buffer.
+ * On out, the length of the signature in bytes.
+ * key [in] Ed448 key to use when signing
+ * context [in] Context of signing.
+ * contextLen [in] Length of context in bytes.
+ * returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and
+ * context is not NULL or public key not set,
+ * BUFFER_E when outLen is less than ED448_SIG_SIZE,
+ * other -ve values when hash fails,
+ * 0 otherwise.
+ */
+int wc_ed448ph_sign_hash(const byte* hash, word32 hashLen, byte* out,
+ word32 *outLen, ed448_key* key,
+ const byte* context, byte contextLen)
+{
+ return ed448_sign_msg(hash, hashLen, out, outLen, key, Ed448ph, context,
+ contextLen);
+}
+
+/* Sign the message using the ed448 private key.
+ * Signature type is Ed448ph.
+ *
+ * in [in] Message to sign.
+ * inLen [in] Length of the message to sign in bytes.
+ * out [in] Buffer to write signature into.
+ * outLen [in/out] On in, size of buffer.
+ * On out, the length of the signature in bytes.
+ * key [in] Ed448 key to use when signing
+ * context [in] Context of signing.
+ * contextLen [in] Length of context in bytes.
+ * returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and
+ * context is not NULL or public key not set,
+ * BUFFER_E when outLen is less than ED448_SIG_SIZE,
+ * other -ve values when hash fails,
+ * 0 otherwise.
+ */
+int wc_ed448ph_sign_msg(const byte* in, word32 inLen, byte* out, word32 *outLen,
+ ed448_key* key, const byte* context, byte contextLen)
+{
+ int ret = 0;
+ byte hash[64];
+
+ ret = wc_Shake256Hash(in, inLen, hash, sizeof(hash));
+ if (ret == 0) {
+ ret = wc_ed448ph_sign_hash(hash, sizeof(hash), out, outLen, key,
+ context, contextLen);
+ }
+
+ return ret;
+}
+#endif /* HAVE_ED448_SIGN */
+
+#ifdef HAVE_ED448_VERIFY
+
+/* Verify the message using the ed448 public key.
+ *
+ * sig [in] Signature to verify.
+ * sigLen [in] Size of signature in bytes.
+ * msg [in] Message to verify.
+ * msgLen [in] Length of the message in bytes.
+ * key [in] Ed448 key to use to verify.
+ * type [in] Type of signature to verify: Ed448 or Ed448ph
+ * context [in] Context of verification.
+ * contextLen [in] Length of context in bytes.
+ * returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and
+ * context is not NULL or public key not set,
+ * BUFFER_E when sigLen is less than ED448_SIG_SIZE,
+ * other -ve values when hash fails,
+ * 0 otherwise.
+ */
+static int ed448_verify_msg(const byte* sig, word32 sigLen, const byte* msg,
+ word32 msgLen, int* res, ed448_key* key,
+ byte type, const byte* context, byte contextLen)
+{
+ byte rcheck[ED448_KEY_SIZE];
+ byte h[ED448_SIG_SIZE];
+ ge448_p2 A;
+ ge448_p2 R;
+ int ret = 0;
+ wc_Shake sha;
+
+ /* sanity check on arguments */
+ if ((sig == NULL) || (msg == NULL) || (res == NULL) || (key == NULL) ||
+ ((context == NULL) && (contextLen != 0))) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (ret == 0) {
+ /* set verification failed by default */
+ *res = 0;
+
+ /* check on basics needed to verify signature */
+ if (sigLen < ED448_SIG_SIZE) {
+ ret = BAD_FUNC_ARG;
+ }
+ }
+
+ /* uncompress A (public key), test if valid, and negate it */
+ if ((ret == 0) && (ge448_from_bytes_negate_vartime(&A, key->p) != 0)) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (ret == 0) {
+ /* find H(R,A,M) and store it as h */
+ ret = wc_InitShake256(&sha, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_Shake256_Update(&sha, ed448Ctx, ED448CTX_SIZE);
+ if (ret == 0) {
+ ret = wc_Shake256_Update(&sha, &type, sizeof(type));
+ }
+ if (ret == 0) {
+ ret = wc_Shake256_Update(&sha, &contextLen, sizeof(contextLen));
+ }
+ if (ret == 0 && context != NULL) {
+ ret = wc_Shake256_Update(&sha, context, contextLen);
+ }
+ if (ret == 0) {
+ ret = wc_Shake256_Update(&sha, sig, ED448_SIG_SIZE/2);
+ }
+ if (ret == 0) {
+ ret = wc_Shake256_Update(&sha, key->p, ED448_PUB_KEY_SIZE);
+ }
+ if (ret == 0) {
+ ret = wc_Shake256_Update(&sha, msg, msgLen);
+ }
+ if (ret == 0) {
+ ret = wc_Shake256_Final(&sha, h, sizeof(h));
+ }
+ wc_Shake256_Free(&sha);
+ }
+ }
+ if (ret == 0) {
+ sc448_reduce(h);
+
+ /* Uses a fast single-signature verification SB = R + H(R,A,M)A becomes
+ * SB - H(R,A,M)A saving decompression of R
+ */
+ ret = ge448_double_scalarmult_vartime(&R, h, &A,
+ sig + (ED448_SIG_SIZE/2));
+ }
+
+ if (ret == 0) {
+ ge448_to_bytes(rcheck, &R);
+
+ /* comparison of R created to R in sig */
+ if (ConstantCompare(rcheck, sig, ED448_SIG_SIZE/2) != 0) {
+ ret = SIG_VERIFY_E;
+ }
+ else {
+ /* set the verification status */
+ *res = 1;
+ }
+ }
+
+ return ret;
+}
+
+/* Verify the message using the ed448 public key.
+ * Signature type is Ed448.
+ *
+ * sig [in] Signature to verify.
+ * sigLen [in] Size of signature in bytes.
+ * msg [in] Message to verify.
+ * msgLen [in] Length of the message in bytes.
+ * key [in] Ed448 key to use to verify.
+ * context [in] Context of verification.
+ * contextLen [in] Length of context in bytes.
+ * returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and
+ * context is not NULL or public key not set,
+ * BUFFER_E when sigLen is less than ED448_SIG_SIZE,
+ * other -ve values when hash fails,
+ * 0 otherwise.
+ */
+int wc_ed448_verify_msg(const byte* sig, word32 sigLen, const byte* msg,
+ word32 msgLen, int* res, ed448_key* key,
+ const byte* context, byte contextLen)
+{
+ return ed448_verify_msg(sig, sigLen, msg, msgLen, res, key, Ed448,
+ context, contextLen);
+}
+
+/* Verify the hash using the ed448 public key.
+ * Signature type is Ed448ph.
+ *
+ * sig [in] Signature to verify.
+ * sigLen [in] Size of signature in bytes.
+ * hash [in] Hash of message to verify.
+ * hashLen [in] Length of the hash in bytes.
+ * key [in] Ed448 key to use to verify.
+ * context [in] Context of verification.
+ * contextLen [in] Length of context in bytes.
+ * returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and
+ * context is not NULL or public key not set,
+ * BUFFER_E when sigLen is less than ED448_SIG_SIZE,
+ * other -ve values when hash fails,
+ * 0 otherwise.
+ */
+int wc_ed448ph_verify_hash(const byte* sig, word32 sigLen, const byte* hash,
+ word32 hashLen, int* res, ed448_key* key,
+ const byte* context, byte contextLen)
+{
+ return ed448_verify_msg(sig, sigLen, hash, hashLen, res, key, Ed448ph,
+ context, contextLen);
+}
+
+/* Verify the message using the ed448 public key.
+ * Signature type is Ed448ph.
+ *
+ * sig [in] Signature to verify.
+ * sigLen [in] Size of signature in bytes.
+ * msg [in] Message to verify.
+ * msgLen [in] Length of the message in bytes.
+ * key [in] Ed448 key to use to verify.
+ * context [in] Context of verification.
+ * contextLen [in] Length of context in bytes.
+ * returns BAD_FUNC_ARG when a parameter is NULL or contextLen is zero when and
+ * context is not NULL or public key not set,
+ * BUFFER_E when sigLen is less than ED448_SIG_SIZE,
+ * other -ve values when hash fails,
+ * 0 otherwise.
+ */
+int wc_ed448ph_verify_msg(const byte* sig, word32 sigLen, const byte* msg,
+ word32 msgLen, int* res, ed448_key* key,
+ const byte* context, byte contextLen)
+{
+ int ret = 0;
+ byte hash[64];
+
+ ret = wc_Shake256Hash(msg, msgLen, hash, sizeof(hash));
+ if (ret == 0) {
+ ret = wc_ed448ph_verify_hash(sig, sigLen, hash, sizeof(hash), res, key,
+ context, contextLen);
+ }
+
+ return ret;
+}
+#endif /* HAVE_ED448_VERIFY */
+
+/* Initialize the ed448 private/public key.
+ *
+ * key [in] Ed448 key.
+ * returns BAD_FUNC_ARG when key is NULL
+ */
+int wc_ed448_init(ed448_key* key)
+{
+ int ret = 0;
+
+ if (key == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+ else {
+ XMEMSET(key, 0, sizeof(ed448_key));
+
+ fe448_init();
+ }
+
+ return ret;
+}
+
+
+/* Clears the ed448 key data
+ *
+ * key [in] Ed448 key.
+ */
+void wc_ed448_free(ed448_key* key)
+{
+ if (key != NULL) {
+ ForceZero(key, sizeof(ed448_key));
+ }
+}
+
+
+#ifdef HAVE_ED448_KEY_EXPORT
+
+/* Export the ed448 public key.
+ *
+ * key [in] Ed448 public key.
+ * out [in] Array to hold public key.
+ * outLen [in/out] On in, the number of bytes in array.
+ * On out, the number bytes put into array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ * ECC_BAD_ARG_E when outLen is less than ED448_PUB_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_ed448_export_public(ed448_key* key, byte* out, word32* outLen)
+{
+ int ret = 0;
+
+ /* sanity check on arguments */
+ if ((key == NULL) || (out == NULL) || (outLen == NULL)) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ if ((ret == 0) && (*outLen < ED448_PUB_KEY_SIZE)) {
+ *outLen = ED448_PUB_KEY_SIZE;
+ ret = BUFFER_E;
+ }
+
+ if (ret == 0) {
+ *outLen = ED448_PUB_KEY_SIZE;
+ XMEMCPY(out, key->p, ED448_PUB_KEY_SIZE);
+ }
+
+ return ret;
+}
+
+#endif /* HAVE_ED448_KEY_EXPORT */
+
+
+#ifdef HAVE_ED448_KEY_IMPORT
+/* Import a compressed or uncompressed ed448 public key from a byte array.
+ * Public key encoded in big-endian.
+ *
+ * in [in] Array holding public key.
+ * inLen [in] Number of bytes of data in array.
+ * key [in] Ed448 public key.
+ * returns BAD_FUNC_ARG when a parameter is NULL or key format is not supported,
+ * 0 otherwise.
+ */
+int wc_ed448_import_public(const byte* in, word32 inLen, ed448_key* key)
+{
+ int ret = 0;
+
+ /* sanity check on arguments */
+ if ((in == NULL) || (key == NULL)) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (ret == 0) {
+ /* compressed prefix according to draft
+ * https://tools.ietf.org/html/draft-ietf-openpgp-rfc4880bis-06 */
+ if (in[0] == 0x40 && inLen > ED448_PUB_KEY_SIZE) {
+ /* key is stored in compressed format so just copy in */
+ XMEMCPY(key->p, (in + 1), ED448_PUB_KEY_SIZE);
+ key->pubKeySet = 1;
+ }
+ /* importing uncompressed public key */
+ else if (in[0] == 0x04 && inLen > 2*ED448_PUB_KEY_SIZE) {
+ /* pass in (x,y) and store compressed key */
+ ret = ge448_compress_key(key->p, in+1, in+1+ED448_PUB_KEY_SIZE);
+ if (ret == 0)
+ key->pubKeySet = 1;
+ }
+ else if (inLen == ED448_PUB_KEY_SIZE) {
+ /* if not specified compressed or uncompressed check key size
+ * if key size is equal to compressed key size copy in key */
+ XMEMCPY(key->p, in, ED448_PUB_KEY_SIZE);
+ key->pubKeySet = 1;
+ }
+ else {
+ /* bad public key format */
+ ret = BAD_FUNC_ARG;
+ }
+ }
+
+ return ret;
+}
+
+
+/* Import an ed448 private key from a byte array.
+ *
+ * priv [in] Array holding private key.
+ * privSz [in] Number of bytes of data in array.
+ * key [in] Ed448 private key.
+ * returns BAD_FUNC_ARG when a parameter is NULL or privSz is less than
+ * ED448_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_ed448_import_private_only(const byte* priv, word32 privSz,
+ ed448_key* key)
+{
+ int ret = 0;
+
+ /* sanity check on arguments */
+ if ((priv == NULL) || (key == NULL)) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ /* key size check */
+ if ((ret == 0) && (privSz < ED448_KEY_SIZE)) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (ret == 0) {
+ XMEMCPY(key->k, priv, ED448_KEY_SIZE);
+ }
+
+ return ret;
+}
+
+/* Import an ed448 private and public keys from a byte arrays.
+ *
+ * priv [in] Array holding private key.
+ * privSz [in] Number of bytes of data in private key array.
+ * pub [in] Array holding private key.
+ * pubSz [in] Number of bytes of data in public key array.
+ * key [in] Ed448 private/public key.
+ * returns BAD_FUNC_ARG when a parameter is NULL or privSz is less than
+ * ED448_KEY_SIZE or pubSz is less than ED448_PUB_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_ed448_import_private_key(const byte* priv, word32 privSz,
+ const byte* pub, word32 pubSz, ed448_key* key)
+{
+ int ret = 0;
+
+ /* sanity check on arguments */
+ if ((priv == NULL) || (pub == NULL) || (key == NULL)) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ /* key size check */
+ if ((ret == 0) && (privSz < ED448_KEY_SIZE || pubSz < ED448_PUB_KEY_SIZE)) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (ret == 0) {
+ /* import public key */
+ ret = wc_ed448_import_public(pub, pubSz, key);
+ }
+ if (ret == 0) {
+ /* make the private key (priv + pub) */
+ XMEMCPY(key->k, priv, ED448_KEY_SIZE);
+ XMEMCPY(key->k + ED448_KEY_SIZE, key->p, ED448_PUB_KEY_SIZE);
+ }
+
+ return ret;
+}
+
+#endif /* HAVE_ED448_KEY_IMPORT */
+
+
+#ifdef HAVE_ED448_KEY_EXPORT
+
+/* Export the ed448 private key.
+ *
+ * key [in] Ed448 private key.
+ * out [in] Array to hold private key.
+ * outLen [in/out] On in, the number of bytes in array.
+ * On out, the number bytes put into array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ * ECC_BAD_ARG_E when outLen is less than ED448_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_ed448_export_private_only(ed448_key* key, byte* out, word32* outLen)
+{
+ int ret = 0;
+
+ /* sanity checks on arguments */
+ if ((key == NULL) || (out == NULL) || (outLen == NULL)) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ if ((ret == 0) && (*outLen < ED448_KEY_SIZE)) {
+ *outLen = ED448_KEY_SIZE;
+ ret = BUFFER_E;
+ }
+
+ if (ret == 0) {
+ *outLen = ED448_KEY_SIZE;
+ XMEMCPY(out, key->k, ED448_KEY_SIZE);
+ }
+
+ return ret;
+}
+
+/* Export the ed448 private and public key.
+ *
+ * key [in] Ed448 private/public key.
+ * out [in] Array to hold private and public key.
+ * outLen [in/out] On in, the number of bytes in array.
+ * On out, the number bytes put into array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ * BUFFER_E when outLen is less than ED448_PRV_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_ed448_export_private(ed448_key* key, byte* out, word32* outLen)
+{
+ int ret = 0;
+
+ /* sanity checks on arguments */
+ if ((key == NULL) || (out == NULL) || (outLen == NULL)) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ if ((ret == 0) && (*outLen < ED448_PRV_KEY_SIZE)) {
+ *outLen = ED448_PRV_KEY_SIZE;
+ ret = BUFFER_E;
+ }
+
+ if (ret == 0) {
+ *outLen = ED448_PRV_KEY_SIZE;
+ XMEMCPY(out, key->k, ED448_PRV_KEY_SIZE);
+ }
+
+ return ret;
+}
+
+/* Export the ed448 private and public key.
+ *
+ * key [in] Ed448 private/public key.
+ * priv [in] Array to hold private key.
+ * privSz [in/out] On in, the number of bytes in private key array.
+ * pub [in] Array to hold public key.
+ * pubSz [in/out] On in, the number of bytes in public key array.
+ * On out, the number bytes put into array.
+ * returns BAD_FUNC_ARG when a parameter is NULL,
+ * BUFFER_E when privSz is less than ED448_PRV_KEY_SIZE or pubSz is less
+ * than ED448_PUB_KEY_SIZE,
+ * 0 otherwise.
+ */
+int wc_ed448_export_key(ed448_key* key, byte* priv, word32 *privSz,
+ byte* pub, word32 *pubSz)
+{
+ int ret = 0;
+
+ /* export 'full' private part */
+ ret = wc_ed448_export_private(key, priv, privSz);
+ if (ret == 0) {
+ /* export public part */
+ ret = wc_ed448_export_public(key, pub, pubSz);
+ }
+
+ return ret;
+}
+
+#endif /* HAVE_ED448_KEY_EXPORT */
+
+/* Check the public key of the ed448 key matches the private key.
+ *
+ * key [in] Ed448 private/public key.
+ * returns BAD_FUNC_ARG when key is NULL,
+ * PUBLIC_KEY_E when the public key is not set or doesn't match,
+ * other -ve value on hash failure,
+ * 0 otherwise.
+ */
+int wc_ed448_check_key(ed448_key* key)
+{
+ int ret = 0;
+ unsigned char pubKey[ED448_PUB_KEY_SIZE];
+
+ if (key == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (!key->pubKeySet) {
+ ret = PUBLIC_KEY_E;
+ }
+ if (ret == 0) {
+ ret = wc_ed448_make_public(key, pubKey, sizeof(pubKey));
+ }
+ if ((ret == 0) && (XMEMCMP(pubKey, key->p, ED448_PUB_KEY_SIZE) != 0)) {
+ ret = PUBLIC_KEY_E;
+ }
+
+ return ret;
+}
+
+/* Returns the size of an ed448 private key.
+ *
+ * key [in] Ed448 private/public key.
+ * returns BAD_FUNC_ARG when key is NULL,
+ * ED448_KEY_SIZE otherwise.
+ */
+int wc_ed448_size(ed448_key* key)
+{
+ int ret = ED448_KEY_SIZE;
+
+ if (key == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ return ret;
+}
+
+/* Returns the size of an ed448 private plus public key.
+ *
+ * key [in] Ed448 private/public key.
+ * returns BAD_FUNC_ARG when key is NULL,
+ * ED448_PRV_KEY_SIZE otherwise.
+ */
+int wc_ed448_priv_size(ed448_key* key)
+{
+ int ret = ED448_PRV_KEY_SIZE;
+
+ if (key == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ return ret;
+}
+
+/* Returns the size of an ed448 public key.
+ *
+ * key [in] Ed448 private/public key.
+ * returns BAD_FUNC_ARG when key is NULL,
+ * ED448_PUB_KEY_SIZE otherwise.
+ */
+int wc_ed448_pub_size(ed448_key* key)
+{
+ int ret = ED448_PUB_KEY_SIZE;
+
+ if (key == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ return ret;
+}
+
+/* Returns the size of an ed448 signature.
+ *
+ * key [in] Ed448 private/public key.
+ * returns BAD_FUNC_ARG when key is NULL,
+ * ED448_SIG_SIZE otherwise.
+ */
+int wc_ed448_sig_size(ed448_key* key)
+{
+ int ret = ED448_SIG_SIZE;
+
+ if (key == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ return ret;
+}
+
+#endif /* HAVE_ED448 */
+
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/error.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/error.c
index 7d1d5ebe7..87ded35d6 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/error.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/error.c
@@ -1,8 +1,8 @@
/* error.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -32,15 +33,9 @@
#pragma warning(disable: 4996)
#endif
+#ifndef NO_ERROR_STRINGS
const char* wc_GetErrorString(int error)
{
-#ifdef NO_ERROR_STRINGS
-
- (void)error;
- return "no support for error strings built in";
-
-#else
-
switch (error) {
case OPEN_RAN_E :
@@ -61,6 +56,15 @@ const char* wc_GetErrorString(int error)
case BAD_MUTEX_E :
return "Bad mutex, operation failed";
+ case WC_TIMEOUT_E:
+ return "Timeout error";
+
+ case WC_PENDING_E:
+ return "wolfCrypt Operation Pending (would block / eagain) error";
+
+ case WC_NOT_PENDING_E:
+ return "wolfCrypt operation not pending error";
+
case MP_INIT_E :
return "mp_init error state";
@@ -100,6 +104,9 @@ const char* wc_GetErrorString(int error)
case MEMORY_E :
return "out of memory error";
+ case VAR_STATE_CHANGE_E :
+ return "Variable state modified by different thread";
+
case RSA_WRONG_TYPE_E :
return "RSA wrong block type for RSA function";
@@ -110,7 +117,7 @@ const char* wc_GetErrorString(int error)
return "Buffer error, output too small or input too big";
case ALGO_ID_E :
- return "Setting Cert AlogID error";
+ return "Setting Cert AlgoID error";
case PUBLIC_KEY_E :
return "Setting Cert Public Key error";
@@ -170,7 +177,7 @@ const char* wc_GetErrorString(int error)
return "ASN signature error, mismatched oid";
case ASN_TIME_E :
- return "ASN time error, unkown time type";
+ return "ASN time error, unknown time type";
case ASN_INPUT_E :
return "ASN input error, not enough data";
@@ -191,7 +198,10 @@ const char* wc_GetErrorString(int error)
return "ASN NTRU key decode error, invalid input";
case ASN_CRIT_EXT_E:
- return "X.509 Critical extension ignored";
+ return "X.509 Critical extension ignored or invalid";
+
+ case ASN_ALT_NAME_E:
+ return "ASN alternate name error";
case ECC_BAD_ARG_E :
return "ECC input argument wrong type, invalid input";
@@ -223,8 +233,8 @@ const char* wc_GetErrorString(int error)
case AES_CCM_AUTH_E:
return "AES-CCM Authentication check fail";
- case CAVIUM_INIT_E:
- return "Cavium Init type error";
+ case ASYNC_INIT_E:
+ return "Async Init error";
case COMPRESS_INIT_E:
return "Compress Init error";
@@ -253,8 +263,11 @@ const char* wc_GetErrorString(int error)
case ASN_OCSP_CONFIRM_E :
return "ASN OCSP sig error, confirm failure";
- case BAD_ENC_STATE_E:
- return "Bad ecc encrypt state operation";
+ case ASN_NO_PEM_HEADER:
+ return "ASN no PEM Header Error";
+
+ case BAD_STATE_E:
+ return "Bad state operation";
case BAD_PADDING_E:
return "Bad padding, message wrong length";
@@ -268,6 +281,9 @@ const char* wc_GetErrorString(int error)
case PKCS7_RECIP_E:
return "PKCS#7 error: no matching recipient found";
+ case WC_PKCS7_WANT_READ_E:
+ return "PKCS#7 operations wants more input, call again";
+
case FIPS_NOT_ALLOWED_E:
return "FIPS mode not allowed error";
@@ -325,19 +341,190 @@ const char* wc_GetErrorString(int error)
case ECC_INF_E:
return " ECC point at infinity error";
+ case ECC_OUT_OF_RANGE_E:
+ return " ECC Qx or Qy out of range error";
+
case ECC_PRIV_KEY_E:
return " ECC private key is not valid error";
+ case SRP_CALL_ORDER_E:
+ return "SRP function called in the wrong order error";
+
+ case SRP_VERIFY_E:
+ return "SRP proof verification error";
+
+ case SRP_BAD_KEY_E:
+ return "SRP bad key values error";
+
+ case ASN_NO_SKID:
+ return "ASN no Subject Key Identifier found error";
+
+ case ASN_NO_AKID:
+ return "ASN no Authority Key Identifier found error";
+
+ case ASN_NO_KEYUSAGE:
+ return "ASN no Key Usage found error";
+
+ case SKID_E:
+ return "Setting Subject Key Identifier error";
+
+ case AKID_E:
+ return "Setting Authority Key Identifier error";
+
+ case KEYUSAGE_E:
+ return "Key Usage value error";
+
+ case EXTKEYUSAGE_E:
+ return "Extended Key Usage value error";
+
+ case CERTPOLICIES_E:
+ return "Setting Certificate Policies error";
+
+ case WC_INIT_E:
+ return "wolfCrypt Initialize Failure error";
+
+ case SIG_VERIFY_E:
+ return "Signature verify error";
+
+ case BAD_COND_E:
+ return "Bad condition variable operation error";
+
+ case SIG_TYPE_E:
+ return "Signature type not enabled/available";
+
+ case HASH_TYPE_E:
+ return "Hash type not enabled/available";
+
+ case WC_KEY_SIZE_E:
+ return "Key size error, either too small or large";
+
+ case ASN_COUNTRY_SIZE_E:
+ return "Country code size error, either too small or large";
+
+ case MISSING_RNG_E:
+ return "RNG required but not provided";
+
+ case ASN_PATHLEN_SIZE_E:
+ return "ASN CA path length value too large error";
+
+ case ASN_PATHLEN_INV_E:
+ return "ASN CA path length larger than signer error";
+
+ case BAD_KEYWRAP_ALG_E:
+ return "Unsupported key wrap algorithm error";
+
+ case BAD_KEYWRAP_IV_E:
+ return "Decrypted AES key wrap IV does not match expected";
+
+ case WC_CLEANUP_E:
+ return "wolfcrypt cleanup failed";
+
+ case ECC_CDH_KAT_FIPS_E:
+ return "wolfcrypt FIPS ECC CDH Known Answer Test Failure";
+
+ case DH_CHECK_PUB_E:
+ return "DH Check Public Key failure";
+
+ case BAD_PATH_ERROR:
+ return "Bad path for opendir error";
+
+ case ASYNC_OP_E:
+ return "Async operation error";
+
+ case BAD_OCSP_RESPONDER:
+ return "Invalid OCSP Responder, missing specific key usage extensions";
+
+ case ECC_PRIVATEONLY_E:
+ return "Invalid use of private only ECC key";
+
+ case WC_HW_E:
+ return "Error with hardware crypto use";
+
+ case WC_HW_WAIT_E:
+ return "Hardware waiting on resource";
+
+ case PSS_SALTLEN_E:
+ return "PSS - Length of salt is too big for hash algorithm";
+
+ case PRIME_GEN_E:
+ return "Unable to find a prime for RSA key";
+
+ case BER_INDEF_E:
+ return "Unable to decode an indefinite length encoded message";
+
+ case RSA_OUT_OF_RANGE_E:
+ return "Ciphertext to decrypt is out of range";
+
+ case RSAPSS_PAT_FIPS_E:
+ return "wolfcrypt FIPS RSA-PSS Pairwise Agreement Test Failure";
+
+ case ECDSA_PAT_FIPS_E:
+ return "wolfcrypt FIPS ECDSA Pairwise Agreement Test Failure";
+
+ case DH_KAT_FIPS_E:
+ return "wolfcrypt FIPS DH Known Answer Test Failure";
+
+ case AESCCM_KAT_FIPS_E:
+ return "AESCCM Known Answer Test check FIPS error";
+
+ case SHA3_KAT_FIPS_E:
+ return "SHA-3 Known Answer Test check FIPS error";
+
+ case ECDHE_KAT_FIPS_E:
+ return "wolfcrypt FIPS ECDHE Known Answer Test Failure";
+
+ case AES_GCM_OVERFLOW_E:
+ return "AES-GCM invocation counter overflow";
+
+ case AES_CCM_OVERFLOW_E:
+ return "AES-CCM invocation counter overflow";
+
+ case RSA_KEY_PAIR_E:
+ return "RSA Key Pair-Wise Consistency check fail";
+
+ case DH_CHECK_PRIV_E:
+ return "DH Check Private Key failure";
+
+ case WC_AFALG_SOCK_E:
+ return "AF_ALG socket error";
+
+ case WC_DEVCRYPTO_E:
+ return "Error with /dev/crypto";
+
+ case ZLIB_INIT_ERROR:
+ return "zlib init error";
+
+ case ZLIB_COMPRESS_ERROR:
+ return "zlib compress error";
+
+ case ZLIB_DECOMPRESS_ERROR:
+ return "zlib decompress error";
+
+ case PKCS7_NO_SIGNER_E:
+ return "No signer in PKCS#7 signed data";
+
+ case CRYPTOCB_UNAVAILABLE:
+ return "Crypto callback unavailable";
+
+ case PKCS7_SIGNEEDS_CHECK:
+ return "Signature found but no certificate to verify";
+
+ case PSS_SALTLEN_RECOVER_E:
+ return "PSS - Salt length unable to be recovered";
+
+ case ASN_SELF_SIGNED_E:
+ return "ASN self-signed certificate error";
+
default:
return "unknown error number";
}
-
-#endif /* NO_ERROR_STRINGS */
-
}
void wc_ErrorString(int error, char* buffer)
{
XSTRNCPY(buffer, wc_GetErrorString(error), WOLFSSL_MAX_ERROR_SZ);
+ buffer[WOLFSSL_MAX_ERROR_SZ-1] = 0;
}
+#endif /* !NO_ERROR_STRINGS */
+
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/evp.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/evp.c
new file mode 100644
index 000000000..d9207900c
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/evp.c
@@ -0,0 +1,6595 @@
+/* evp.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+#if !defined(WOLFSSL_EVP_INCLUDED)
+ #ifndef WOLFSSL_IGNORE_FILE_WARN
+ #warning evp.c does not need to be compiled separately from ssl.c
+ #endif
+#elif defined(WOLFCRYPT_ONLY)
+#else
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#include <wolfssl/openssl/ecdsa.h>
+#include <wolfssl/openssl/evp.h>
+
+#if defined(OPENSSL_EXTRA)
+
+#ifndef NO_AES
+ #ifdef HAVE_AES_CBC
+ #ifdef WOLFSSL_AES_128
+ static char *EVP_AES_128_CBC = NULL;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ static char *EVP_AES_192_CBC = NULL;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ static char *EVP_AES_256_CBC = NULL;
+ #endif
+ #endif /* HAVE_AES_CBC */
+
+ #ifdef WOLFSSL_AES_OFB
+ #ifdef WOLFSSL_AES_128
+ static char *EVP_AES_128_OFB = NULL;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ static char *EVP_AES_192_OFB = NULL;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ static char *EVP_AES_256_OFB = NULL;
+ #endif
+ #endif /* WOLFSSL_AES_OFB */
+
+ #ifdef WOLFSSL_AES_XTS
+ #ifdef WOLFSSL_AES_128
+ static char *EVP_AES_128_XTS = NULL;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ static char *EVP_AES_256_XTS = NULL;
+ #endif
+ #endif /* WOLFSSL_AES_XTS */
+
+ #ifdef WOLFSSL_AES_CFB
+ #ifdef WOLFSSL_AES_128
+ static char *EVP_AES_128_CFB1 = NULL;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ static char *EVP_AES_192_CFB1 = NULL;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ static char *EVP_AES_256_CFB1 = NULL;
+ #endif
+
+ #ifdef WOLFSSL_AES_128
+ static char *EVP_AES_128_CFB8 = NULL;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ static char *EVP_AES_192_CFB8 = NULL;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ static char *EVP_AES_256_CFB8 = NULL;
+ #endif
+
+ #ifdef WOLFSSL_AES_128
+ static char *EVP_AES_128_CFB128 = NULL;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ static char *EVP_AES_192_CFB128 = NULL;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ static char *EVP_AES_256_CFB128 = NULL;
+ #endif
+ #endif /* WOLFSSL_AES_CFB */
+
+ #ifdef HAVE_AESGCM
+ #ifdef WOLFSSL_AES_128
+ static char *EVP_AES_128_GCM = NULL;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ static char *EVP_AES_192_GCM = NULL;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ static char *EVP_AES_256_GCM = NULL;
+ #endif
+ #endif /* HAVE_AESGCM */
+ #ifdef WOLFSSL_AES_128
+ static char *EVP_AES_128_CTR = NULL;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ static char *EVP_AES_192_CTR = NULL;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ static char *EVP_AES_256_CTR = NULL;
+ #endif
+
+ #ifdef WOLFSSL_AES_128
+ static char *EVP_AES_128_ECB = NULL;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ static char *EVP_AES_192_ECB = NULL;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ static char *EVP_AES_256_ECB = NULL;
+ #endif
+ #define EVP_AES_SIZE 11
+ #ifdef WOLFSSL_AES_CFB
+ #define EVP_AESCFB_SIZE 14
+ #endif
+#endif
+
+#ifndef NO_DES3
+ static char *EVP_DES_CBC = NULL;
+ static char *EVP_DES_ECB = NULL;
+
+ static char *EVP_DES_EDE3_CBC = NULL;
+ static char *EVP_DES_EDE3_ECB = NULL;
+
+ #define EVP_DES_SIZE 7
+ #define EVP_DES_EDE3_SIZE 12
+#endif
+
+#ifdef HAVE_IDEA
+ static char *EVP_IDEA_CBC;
+ #define EVP_IDEA_SIZE 8
+#endif
+
+static unsigned int cipherType(const WOLFSSL_EVP_CIPHER *cipher);
+
+
+/* Getter function for cipher key length
+ *
+ * c WOLFSSL_EVP_CIPHER structure to get key length from
+ *
+ * NOTE: OpenSSL_add_all_ciphers() should be called first before using this
+ * function
+ *
+ * Returns size of key in bytes
+ */
+int wolfSSL_EVP_Cipher_key_length(const WOLFSSL_EVP_CIPHER* c)
+{
+ WOLFSSL_ENTER("wolfSSL_EVP_Cipher_key_length");
+
+ if (c == NULL) {
+ return 0;
+ }
+
+ switch (cipherType(c)) {
+#if !defined(NO_AES)
+ #if defined(HAVE_AES_CBC)
+ case AES_128_CBC_TYPE: return 16;
+ case AES_192_CBC_TYPE: return 24;
+ case AES_256_CBC_TYPE: return 32;
+ #endif
+ #if defined(WOLFSSL_AES_CFB)
+ case AES_128_CFB1_TYPE: return 16;
+ case AES_192_CFB1_TYPE: return 24;
+ case AES_256_CFB1_TYPE: return 32;
+ case AES_128_CFB8_TYPE: return 16;
+ case AES_192_CFB8_TYPE: return 24;
+ case AES_256_CFB8_TYPE: return 32;
+ case AES_128_CFB128_TYPE: return 16;
+ case AES_192_CFB128_TYPE: return 24;
+ case AES_256_CFB128_TYPE: return 32;
+ #endif
+ #if defined(WOLFSSL_AES_OFB)
+ case AES_128_OFB_TYPE: return 16;
+ case AES_192_OFB_TYPE: return 24;
+ case AES_256_OFB_TYPE: return 32;
+ #endif
+ #if defined(WOLFSSL_AES_XTS)
+ case AES_128_XTS_TYPE: return 16;
+ case AES_256_XTS_TYPE: return 32;
+ #endif
+ #if defined(HAVE_AESGCM)
+ case AES_128_GCM_TYPE: return 16;
+ case AES_192_GCM_TYPE: return 24;
+ case AES_256_GCM_TYPE: return 32;
+ #endif
+ #if defined(WOLFSSL_AES_COUNTER)
+ case AES_128_CTR_TYPE: return 16;
+ case AES_192_CTR_TYPE: return 24;
+ case AES_256_CTR_TYPE: return 32;
+ #endif
+ #if defined(HAVE_AES_ECB)
+ case AES_128_ECB_TYPE: return 16;
+ case AES_192_ECB_TYPE: return 24;
+ case AES_256_ECB_TYPE: return 32;
+ #endif
+#endif /* !NO_AES */
+ #ifndef NO_DES3
+ case DES_CBC_TYPE: return 8;
+ case DES_EDE3_CBC_TYPE: return 24;
+ case DES_ECB_TYPE: return 8;
+ case DES_EDE3_ECB_TYPE: return 24;
+ #endif
+ default:
+ return 0;
+ }
+}
+
+
+int wolfSSL_EVP_EncryptInit(WOLFSSL_EVP_CIPHER_CTX* ctx,
+ const WOLFSSL_EVP_CIPHER* type,
+ const unsigned char* key,
+ const unsigned char* iv)
+{
+ return wolfSSL_EVP_CipherInit(ctx, type, (byte*)key, (byte*)iv, 1);
+}
+
+int wolfSSL_EVP_EncryptInit_ex(WOLFSSL_EVP_CIPHER_CTX* ctx,
+ const WOLFSSL_EVP_CIPHER* type,
+ WOLFSSL_ENGINE *impl,
+ const unsigned char* key,
+ const unsigned char* iv)
+{
+ (void) impl;
+ return wolfSSL_EVP_CipherInit(ctx, type, (byte*)key, (byte*)iv, 1);
+}
+
+int wolfSSL_EVP_DecryptInit(WOLFSSL_EVP_CIPHER_CTX* ctx,
+ const WOLFSSL_EVP_CIPHER* type,
+ const unsigned char* key,
+ const unsigned char* iv)
+{
+ WOLFSSL_ENTER("wolfSSL_EVP_CipherInit");
+ return wolfSSL_EVP_CipherInit(ctx, type, (byte*)key, (byte*)iv, 0);
+}
+
+int wolfSSL_EVP_DecryptInit_ex(WOLFSSL_EVP_CIPHER_CTX* ctx,
+ const WOLFSSL_EVP_CIPHER* type,
+ WOLFSSL_ENGINE *impl,
+ const unsigned char* key,
+ const unsigned char* iv)
+{
+ (void) impl;
+ WOLFSSL_ENTER("wolfSSL_EVP_DecryptInit");
+ return wolfSSL_EVP_CipherInit(ctx, type, (byte*)key, (byte*)iv, 0);
+}
+
+
+WOLFSSL_EVP_CIPHER_CTX *wolfSSL_EVP_CIPHER_CTX_new(void)
+{
+ WOLFSSL_EVP_CIPHER_CTX *ctx = (WOLFSSL_EVP_CIPHER_CTX*)XMALLOC(sizeof *ctx,
+ NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (ctx) {
+ WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_new");
+ wolfSSL_EVP_CIPHER_CTX_init(ctx);
+ }
+ return ctx;
+}
+
+void wolfSSL_EVP_CIPHER_CTX_free(WOLFSSL_EVP_CIPHER_CTX *ctx)
+{
+ if (ctx) {
+ WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_free");
+ wolfSSL_EVP_CIPHER_CTX_cleanup(ctx);
+ XFREE(ctx, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+}
+
+int wolfSSL_EVP_CIPHER_CTX_reset(WOLFSSL_EVP_CIPHER_CTX *ctx)
+{
+ int ret = WOLFSSL_FAILURE;
+
+ if (ctx != NULL) {
+ WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_reset");
+ wolfSSL_EVP_CIPHER_CTX_cleanup(ctx);
+ ret = WOLFSSL_SUCCESS;
+ }
+
+ return ret;
+}
+
+unsigned long wolfSSL_EVP_CIPHER_CTX_mode(const WOLFSSL_EVP_CIPHER_CTX *ctx)
+{
+ if (ctx == NULL) return 0;
+ return ctx->flags & WOLFSSL_EVP_CIPH_MODE;
+}
+
+int wolfSSL_EVP_EncryptFinal(WOLFSSL_EVP_CIPHER_CTX *ctx,
+ unsigned char *out, int *outl)
+{
+ if (ctx && ctx->enc) {
+ WOLFSSL_ENTER("wolfSSL_EVP_EncryptFinal");
+ return wolfSSL_EVP_CipherFinal(ctx, out, outl);
+ }
+ else
+ return WOLFSSL_FAILURE;
+}
+
+
+int wolfSSL_EVP_CipherInit_ex(WOLFSSL_EVP_CIPHER_CTX* ctx,
+ const WOLFSSL_EVP_CIPHER* type,
+ WOLFSSL_ENGINE *impl,
+ const unsigned char* key,
+ const unsigned char* iv,
+ int enc)
+{
+ (void)impl;
+ return wolfSSL_EVP_CipherInit(ctx, type, key, iv, enc);
+}
+
+int wolfSSL_EVP_EncryptFinal_ex(WOLFSSL_EVP_CIPHER_CTX *ctx,
+ unsigned char *out, int *outl)
+{
+ if (ctx && ctx->enc) {
+ WOLFSSL_ENTER("wolfSSL_EVP_EncryptFinal_ex");
+ return wolfSSL_EVP_CipherFinal(ctx, out, outl);
+ }
+ else
+ return WOLFSSL_FAILURE;
+}
+
+int wolfSSL_EVP_DecryptFinal(WOLFSSL_EVP_CIPHER_CTX *ctx,
+ unsigned char *out, int *outl)
+{
+ if (ctx && !ctx->enc) {
+ WOLFSSL_ENTER("wolfSSL_EVP_DecryptFinal");
+ return wolfSSL_EVP_CipherFinal(ctx, out, outl);
+ }
+ else {
+ return WOLFSSL_FAILURE;
+ }
+}
+
+int wolfSSL_EVP_DecryptFinal_ex(WOLFSSL_EVP_CIPHER_CTX *ctx,
+ unsigned char *out, int *outl)
+{
+ if (ctx && !ctx->enc) {
+ WOLFSSL_ENTER("wolfSSL_EVP_DecryptFinal_ex");
+ return wolfSSL_EVP_CipherFinal(ctx, out, outl);
+ }
+ else {
+ return WOLFSSL_FAILURE;
+ }
+}
+
+
+int wolfSSL_EVP_DigestInit_ex(WOLFSSL_EVP_MD_CTX* ctx,
+ const WOLFSSL_EVP_MD* type,
+ WOLFSSL_ENGINE *impl)
+{
+ (void) impl;
+ WOLFSSL_ENTER("wolfSSL_EVP_DigestInit_ex");
+ return wolfSSL_EVP_DigestInit(ctx, type);
+}
+
+#ifdef DEBUG_WOLFSSL_EVP
+#define PRINT_BUF(b, sz) { int _i; for(_i=0; _i<(sz); _i++) { \
+ printf("%02x(%c),", (b)[_i], (b)[_i]); if ((_i+1)%8==0)printf("\n");}}
+#else
+#define PRINT_BUF(b, sz)
+#endif
+
+static int fillBuff(WOLFSSL_EVP_CIPHER_CTX *ctx, const unsigned char *in, int sz)
+{
+ int fill;
+
+ if (sz > 0) {
+ if ((sz+ctx->bufUsed) > ctx->block_size) {
+ fill = ctx->block_size - ctx->bufUsed;
+ } else {
+ fill = sz;
+ }
+ XMEMCPY(&(ctx->buf[ctx->bufUsed]), in, fill);
+ ctx->bufUsed += fill;
+ return fill;
+ } else return 0;
+}
+
+static int evpCipherBlock(WOLFSSL_EVP_CIPHER_CTX *ctx,
+ unsigned char *out,
+ const unsigned char *in, int inl)
+{
+ int ret = 0;
+
+ switch (ctx->cipherType) {
+#if !defined(NO_AES)
+ #if defined(HAVE_AES_CBC)
+ case AES_128_CBC_TYPE:
+ case AES_192_CBC_TYPE:
+ case AES_256_CBC_TYPE:
+ if (ctx->enc)
+ ret = wc_AesCbcEncrypt(&ctx->cipher.aes, out, in, inl);
+ else
+ ret = wc_AesCbcDecrypt(&ctx->cipher.aes, out, in, inl);
+ break;
+ #endif
+ #if defined(HAVE_AESGCM)
+ case AES_128_GCM_TYPE:
+ case AES_192_GCM_TYPE:
+ case AES_256_GCM_TYPE:
+ if (ctx->enc) {
+ if (out){
+ /* encrypt confidential data*/
+ ret = wc_AesGcmEncrypt(&ctx->cipher.aes, out, in, inl,
+ ctx->iv, ctx->ivSz, ctx->authTag, ctx->authTagSz,
+ NULL, 0);
+ }
+ else {
+ /* authenticated, non-confidential data */
+ ret = wc_AesGcmEncrypt(&ctx->cipher.aes, NULL, NULL, 0,
+ ctx->iv, ctx->ivSz, ctx->authTag, ctx->authTagSz,
+ in, inl);
+ /* Reset partial authTag error for AAD*/
+ if (ret == AES_GCM_AUTH_E)
+ ret = 0;
+ }
+ }
+ else {
+ if (out){
+ /* decrypt confidential data*/
+ ret = wc_AesGcmDecrypt(&ctx->cipher.aes, out, in, inl,
+ ctx->iv, ctx->ivSz, ctx->authTag, ctx->authTagSz,
+ NULL, 0);
+ }
+ else {
+ /* authenticated, non-confidential data*/
+ ret = wc_AesGcmDecrypt(&ctx->cipher.aes, NULL, NULL, 0,
+ ctx->iv, ctx->ivSz,
+ ctx->authTag, ctx->authTagSz,
+ in, inl);
+ /* Reset partial authTag error for AAD*/
+ if (ret == AES_GCM_AUTH_E)
+ ret = 0;
+ }
+ }
+ break;
+ #endif
+ #if defined(WOLFSSL_AES_COUNTER)
+ case AES_128_CTR_TYPE:
+ case AES_192_CTR_TYPE:
+ case AES_256_CTR_TYPE:
+ ret = wc_AesCtrEncrypt(&ctx->cipher.aes, out, in, inl);
+ break;
+ #endif
+ #if defined(HAVE_AES_ECB)
+ case AES_128_ECB_TYPE:
+ case AES_192_ECB_TYPE:
+ case AES_256_ECB_TYPE:
+ if (ctx->enc)
+ ret = wc_AesEcbEncrypt(&ctx->cipher.aes, out, in, inl);
+ else
+ ret = wc_AesEcbDecrypt(&ctx->cipher.aes, out, in, inl);
+ break;
+ #endif
+ #if defined(WOLFSSL_AES_OFB)
+ case AES_128_OFB_TYPE:
+ case AES_192_OFB_TYPE:
+ case AES_256_OFB_TYPE:
+ if (ctx->enc)
+ ret = wc_AesOfbEncrypt(&ctx->cipher.aes, out, in, inl);
+ else
+ ret = wc_AesOfbDecrypt(&ctx->cipher.aes, out, in, inl);
+ break;
+ #endif
+ #if defined(WOLFSSL_AES_CFB)
+ #if !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS)
+ case AES_128_CFB1_TYPE:
+ case AES_192_CFB1_TYPE:
+ case AES_256_CFB1_TYPE:
+ if (ctx->enc)
+ ret = wc_AesCfb1Encrypt(&ctx->cipher.aes, out, in,
+ inl * WOLFSSL_BIT_SIZE);
+ else
+ ret = wc_AesCfb1Decrypt(&ctx->cipher.aes, out, in,
+ inl * WOLFSSL_BIT_SIZE);
+ break;
+
+ case AES_128_CFB8_TYPE:
+ case AES_192_CFB8_TYPE:
+ case AES_256_CFB8_TYPE:
+ if (ctx->enc)
+ ret = wc_AesCfb8Encrypt(&ctx->cipher.aes, out, in, inl);
+ else
+ ret = wc_AesCfb8Decrypt(&ctx->cipher.aes, out, in, inl);
+ break;
+ #endif /* !HAVE_SELFTEST && !HAVE_FIPS */
+
+ case AES_128_CFB128_TYPE:
+ case AES_192_CFB128_TYPE:
+ case AES_256_CFB128_TYPE:
+ if (ctx->enc)
+ ret = wc_AesCfbEncrypt(&ctx->cipher.aes, out, in, inl);
+ else
+ ret = wc_AesCfbDecrypt(&ctx->cipher.aes, out, in, inl);
+ break;
+ #endif
+#if defined(WOLFSSL_AES_XTS)
+ case AES_128_XTS_TYPE:
+ case AES_256_XTS_TYPE:
+ if (ctx->enc)
+ ret = wc_AesXtsEncrypt(&ctx->cipher.xts, out, in, inl,
+ ctx->iv, ctx->ivSz);
+ else
+ ret = wc_AesXtsDecrypt(&ctx->cipher.xts, out, in, inl,
+ ctx->iv, ctx->ivSz);
+ break;
+#endif
+#endif /* !NO_AES */
+ #ifndef NO_DES3
+ case DES_CBC_TYPE:
+ if (ctx->enc)
+ ret = wc_Des_CbcEncrypt(&ctx->cipher.des, out, in, inl);
+ else
+ ret = wc_Des_CbcDecrypt(&ctx->cipher.des, out, in, inl);
+ break;
+ case DES_EDE3_CBC_TYPE:
+ if (ctx->enc)
+ ret = wc_Des3_CbcEncrypt(&ctx->cipher.des3, out, in, inl);
+ else
+ ret = wc_Des3_CbcDecrypt(&ctx->cipher.des3, out, in, inl);
+ break;
+ #if defined(WOLFSSL_DES_ECB)
+ case DES_ECB_TYPE:
+ ret = wc_Des_EcbEncrypt(&ctx->cipher.des, out, in, inl);
+ break;
+ case DES_EDE3_ECB_TYPE:
+ ret = wc_Des3_EcbEncrypt(&ctx->cipher.des3, out, in, inl);
+ break;
+ #endif
+ #endif
+ #ifndef NO_RC4
+ case ARC4_TYPE:
+ wc_Arc4Process(&ctx->cipher.arc4, out, in, inl);
+ break;
+ #endif
+ default:
+ return WOLFSSL_FAILURE;
+ }
+
+ if (ret != 0)
+ return WOLFSSL_FAILURE; /* failure */
+
+ (void)in;
+ (void)inl;
+ (void)out;
+
+ return WOLFSSL_SUCCESS; /* success */
+}
+
+#if defined(HAVE_AESGCM)
+static int wolfSSL_EVP_CipherUpdate_GCM(WOLFSSL_EVP_CIPHER_CTX *ctx,
+ unsigned char *out, int *outl,
+ const unsigned char *in, int inl)
+{
+ /* process blocks */
+ if (evpCipherBlock(ctx, out, in, inl) == 0)
+ return WOLFSSL_FAILURE;
+ *outl = inl;
+ return WOLFSSL_SUCCESS;
+}
+#endif
+
+/* returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure */
+WOLFSSL_API int wolfSSL_EVP_CipherUpdate(WOLFSSL_EVP_CIPHER_CTX *ctx,
+ unsigned char *out, int *outl,
+ const unsigned char *in, int inl)
+{
+ int blocks;
+ int fill;
+
+ WOLFSSL_ENTER("wolfSSL_EVP_CipherUpdate");
+ if ((ctx == NULL) || (inl < 0) || (outl == NULL)|| (in == NULL)) {
+ WOLFSSL_MSG("Bad argument");
+ return WOLFSSL_FAILURE;
+ }
+
+ *outl = 0;
+ if (inl == 0) {
+ return WOLFSSL_SUCCESS;
+ }
+
+#if !defined(NO_AES) && defined(HAVE_AESGCM)
+ switch (ctx->cipherType) {
+ case AES_128_GCM_TYPE:
+ case AES_192_GCM_TYPE:
+ case AES_256_GCM_TYPE:
+/* if out == NULL, in/inl contains the additional authenticated data for GCM */
+ return wolfSSL_EVP_CipherUpdate_GCM(ctx, out, outl, in, inl);
+ default:
+ /* fall-through */
+ break;
+ }
+#endif /* !defined(NO_AES) && defined(HAVE_AESGCM) */
+
+ if (out == NULL) {
+ return WOLFSSL_FAILURE;
+ }
+
+
+ if (ctx->bufUsed > 0) { /* concatenate them if there is anything */
+ fill = fillBuff(ctx, in, inl);
+ inl -= fill;
+ in += fill;
+ }
+
+ /* check if the buff is full, and if so flash it out */
+ if (ctx->bufUsed == ctx->block_size) {
+ byte* output = out;
+
+ /* During decryption we save the last block to check padding on Final.
+ * Update the last block stored if one has already been stored */
+ if (ctx->enc == 0) {
+ if (ctx->lastUsed == 1) {
+ XMEMCPY(out, ctx->lastBlock, ctx->block_size);
+ *outl+= ctx->block_size;
+ out += ctx->block_size;
+ }
+ output = ctx->lastBlock; /* redirect output to last block buffer */
+ ctx->lastUsed = 1;
+ }
+
+ PRINT_BUF(ctx->buf, ctx->block_size);
+ if (evpCipherBlock(ctx, output, ctx->buf, ctx->block_size) == 0) {
+ return WOLFSSL_FAILURE;
+ }
+ PRINT_BUF(out, ctx->block_size);
+ ctx->bufUsed = 0;
+
+ /* if doing encryption update the new output block, decryption will
+ * always have the last block saved for when Final is called */
+ if ((ctx->enc != 0)) {
+ *outl+= ctx->block_size;
+ out += ctx->block_size;
+ }
+ }
+
+ blocks = inl / ctx->block_size;
+ if (blocks > 0) {
+ /* During decryption we save the last block to check padding on Final.
+ * Update the last block stored if one has already been stored */
+ if ((ctx->enc == 0) && (ctx->lastUsed == 1)) {
+ PRINT_BUF(ctx->lastBlock, ctx->block_size);
+ XMEMCPY(out, ctx->lastBlock, ctx->block_size);
+ *outl += ctx->block_size;
+ out += ctx->block_size;
+ ctx->lastUsed = 0;
+ }
+
+ /* process blocks */
+ if (evpCipherBlock(ctx, out, in, blocks * ctx->block_size) == 0) {
+ return WOLFSSL_FAILURE;
+ }
+ PRINT_BUF(in, ctx->block_size*blocks);
+ PRINT_BUF(out,ctx->block_size*blocks);
+ inl -= ctx->block_size * blocks;
+ in += ctx->block_size * blocks;
+ if (ctx->enc == 0) {
+ if ((ctx->flags & WOLFSSL_EVP_CIPH_NO_PADDING) ||
+ (ctx->block_size == 1)) {
+ ctx->lastUsed = 0;
+ *outl += ctx->block_size * blocks;
+ } else {
+ /* in the case of decryption and padding, store the last block
+ * here in order to verify the padding when Final is called */
+ if (inl == 0) { /* if not 0 then we know leftovers are checked*/
+ ctx->lastUsed = 1;
+ blocks = blocks - 1; /* save last block to check padding in
+ * EVP_CipherFinal call */
+ XMEMCPY(ctx->lastBlock, &out[ctx->block_size * blocks],
+ ctx->block_size);
+ }
+ *outl += ctx->block_size * blocks;
+ }
+ } else {
+ *outl += ctx->block_size * blocks;
+ }
+ }
+
+
+ if (inl > 0) {
+ /* put fraction into buff */
+ fillBuff(ctx, in, inl);
+ /* no increase of outl */
+ }
+ (void)out; /* silence warning in case not read */
+
+ return WOLFSSL_SUCCESS;
+}
+
+static void padBlock(WOLFSSL_EVP_CIPHER_CTX *ctx)
+{
+ int i;
+ for (i = ctx->bufUsed; i < ctx->block_size; i++)
+ ctx->buf[i] = (byte)(ctx->block_size - ctx->bufUsed);
+}
+
+static int checkPad(WOLFSSL_EVP_CIPHER_CTX *ctx, unsigned char *buff)
+{
+ int i;
+ int n;
+ n = buff[ctx->block_size-1];
+ if (n > ctx->block_size) return -1;
+ for (i = 0; i < n; i++) {
+ if (buff[ctx->block_size-i-1] != n)
+ return -1;
+ }
+ return ctx->block_size - n;
+}
+
+int wolfSSL_EVP_CipherFinal(WOLFSSL_EVP_CIPHER_CTX *ctx,
+ unsigned char *out, int *outl)
+{
+ int fl;
+ int ret = WOLFSSL_SUCCESS;
+ if (!ctx || !outl)
+ return WOLFSSL_FAILURE;
+
+ WOLFSSL_ENTER("wolfSSL_EVP_CipherFinal");
+
+#if !defined(NO_AES) && defined(HAVE_AESGCM)
+ switch (ctx->cipherType) {
+ case AES_128_GCM_TYPE:
+ case AES_192_GCM_TYPE:
+ case AES_256_GCM_TYPE:
+ *outl = 0;
+ /* Clear IV, since IV reuse is not recommended for AES GCM. */
+ XMEMSET(ctx->iv, 0, AES_BLOCK_SIZE);
+ return WOLFSSL_SUCCESS;
+ default:
+ /* fall-through */
+ break;
+ }
+#endif /* !NO_AES && HAVE_AESGCM */
+
+ if (!out)
+ return WOLFSSL_FAILURE;
+
+ if (ctx->flags & WOLFSSL_EVP_CIPH_NO_PADDING) {
+ if (ctx->bufUsed != 0) return WOLFSSL_FAILURE;
+ *outl = 0;
+ }
+ else if (ctx->enc) {
+ if (ctx->block_size == 1) {
+ *outl = 0;
+ }
+ else if ((ctx->bufUsed >= 0) && (ctx->block_size != 1)) {
+ padBlock(ctx);
+ PRINT_BUF(ctx->buf, ctx->block_size);
+ if (evpCipherBlock(ctx, out, ctx->buf, ctx->block_size) == 0) {
+ WOLFSSL_MSG("Final Cipher Block failed");
+ ret = WOLFSSL_FAILURE;
+ }
+ else {
+ PRINT_BUF(out, ctx->block_size);
+ *outl = ctx->block_size;
+ }
+ }
+ }
+ else {
+ if (ctx->block_size == 1) {
+ *outl = 0;
+ }
+ else if ((ctx->bufUsed % ctx->block_size) != 0) {
+ *outl = 0;
+ /* not enough padding for decrypt */
+ WOLFSSL_MSG("Final Cipher Block not enough padding");
+ ret = WOLFSSL_FAILURE;
+ }
+ else if (ctx->lastUsed) {
+ PRINT_BUF(ctx->lastBlock, ctx->block_size);
+ if ((fl = checkPad(ctx, ctx->lastBlock)) >= 0) {
+ XMEMCPY(out, ctx->lastBlock, fl);
+ *outl = fl;
+ if (ctx->lastUsed == 0 && ctx->bufUsed == 0) {
+ /* return error in cases where the block length is incorrect */
+ WOLFSSL_MSG("Final Cipher Block bad length");
+ ret = WOLFSSL_FAILURE;
+ }
+ }
+ else {
+ ret = WOLFSSL_FAILURE;
+ }
+ }
+ else if (ctx->lastUsed == 0 && ctx->bufUsed == 0) {
+ /* return error in cases where the block length is incorrect */
+ ret = WOLFSSL_FAILURE;
+ }
+ }
+ if (ret == WOLFSSL_SUCCESS) {
+ /* reset cipher state after final */
+ wolfSSL_EVP_CipherInit(ctx, NULL, NULL, NULL, -1);
+ }
+ return ret;
+}
+
+
+#ifdef WOLFSSL_EVP_DECRYPT_LEGACY
+/* This is a version of DecryptFinal to work with data encrypted with
+ * wolfSSL_EVP_EncryptFinal() with the broken padding. (pre-v3.12.0)
+ * Only call this after wolfSSL_EVP_CipherFinal() fails on a decrypt.
+ * Note, you don't know if the padding is good or bad with the old
+ * encrypt, but it is likely to be or bad. It will update the output
+ * length with the block_size so the last block is still captured. */
+WOLFSSL_API int wolfSSL_EVP_DecryptFinal_legacy(WOLFSSL_EVP_CIPHER_CTX *ctx,
+ unsigned char *out, int *outl)
+{
+ int fl;
+ if (ctx == NULL || out == NULL || outl == NULL)
+ return BAD_FUNC_ARG;
+
+ WOLFSSL_ENTER("wolfSSL_EVP_DecryptFinal_legacy");
+ if (ctx->block_size == 1) {
+ *outl = 0;
+ return WOLFSSL_SUCCESS;
+ }
+ if ((ctx->bufUsed % ctx->block_size) != 0) {
+ *outl = 0;
+ /* not enough padding for decrypt */
+ return WOLFSSL_FAILURE;
+ }
+ /* The original behavior of CipherFinal() was like it is now,
+ * but checkPad would return 0 in case of a bad pad. It would
+ * treat the pad as 0, and leave the data in the output buffer,
+ * and not try to copy anything. This converts checkPad's -1 error
+ * code to block_size.
+ */
+ if (ctx->lastUsed) {
+ PRINT_BUF(ctx->lastBlock, ctx->block_size);
+ if ((fl = checkPad(ctx, ctx->lastBlock)) < 0) {
+ fl = ctx->block_size;
+ }
+ else {
+ XMEMCPY(out, ctx->lastBlock, fl);
+ }
+ *outl = fl;
+ }
+ /* return error in cases where the block length is incorrect */
+ if (ctx->lastUsed == 0 && ctx->bufUsed == 0) {
+ return WOLFSSL_FAILURE;
+ }
+
+ return WOLFSSL_SUCCESS;
+}
+#endif
+
+
+int wolfSSL_EVP_CIPHER_CTX_block_size(const WOLFSSL_EVP_CIPHER_CTX *ctx)
+{
+ if (ctx == NULL) return BAD_FUNC_ARG;
+ switch (ctx->cipherType) {
+#if !defined(NO_AES) || !defined(NO_DES3)
+#if !defined(NO_AES)
+#if defined(HAVE_AES_CBC)
+ case AES_128_CBC_TYPE:
+ case AES_192_CBC_TYPE:
+ case AES_256_CBC_TYPE:
+#endif
+#if defined(HAVE_AESGCM)
+ case AES_128_GCM_TYPE:
+ case AES_192_GCM_TYPE:
+ case AES_256_GCM_TYPE:
+#endif
+#if defined(WOLFSSL_AES_COUNTER)
+ case AES_128_CTR_TYPE:
+ case AES_192_CTR_TYPE:
+ case AES_256_CTR_TYPE:
+#endif
+#if defined(WOLFSSL_AES_CFB)
+ case AES_128_CFB1_TYPE:
+ case AES_192_CFB1_TYPE:
+ case AES_256_CFB1_TYPE:
+ case AES_128_CFB8_TYPE:
+ case AES_192_CFB8_TYPE:
+ case AES_256_CFB8_TYPE:
+ case AES_128_CFB128_TYPE:
+ case AES_192_CFB128_TYPE:
+ case AES_256_CFB128_TYPE:
+#endif
+#if defined(WOLFSSL_AES_OFB)
+ case AES_128_OFB_TYPE:
+ case AES_192_OFB_TYPE:
+ case AES_256_OFB_TYPE:
+#endif
+#if defined(WOLFSSL_AES_XTS)
+ case AES_128_XTS_TYPE:
+ case AES_256_XTS_TYPE:
+#endif
+
+ case AES_128_ECB_TYPE:
+ case AES_192_ECB_TYPE:
+ case AES_256_ECB_TYPE:
+#endif /* !NO_AES */
+#ifndef NO_DES3
+ case DES_CBC_TYPE:
+ case DES_ECB_TYPE:
+ case DES_EDE3_CBC_TYPE:
+ case DES_EDE3_ECB_TYPE:
+#endif
+ return ctx->block_size;
+#endif /* !NO_AES || !NO_DES3 */
+ default:
+ return 0;
+ }
+}
+
+static unsigned int cipherType(const WOLFSSL_EVP_CIPHER *cipher)
+{
+ if (cipher == NULL) return 0; /* dummy for #ifdef */
+#ifndef NO_DES3
+ else if (EVP_DES_CBC && XSTRNCMP(cipher, EVP_DES_CBC, EVP_DES_SIZE) == 0)
+ return DES_CBC_TYPE;
+ else if (EVP_DES_EDE3_CBC && XSTRNCMP(cipher, EVP_DES_EDE3_CBC, EVP_DES_EDE3_SIZE) == 0)
+ return DES_EDE3_CBC_TYPE;
+#if !defined(NO_DES3)
+ else if (EVP_DES_ECB && XSTRNCMP(cipher, EVP_DES_ECB, EVP_DES_SIZE) == 0)
+ return DES_ECB_TYPE;
+ else if (EVP_DES_EDE3_ECB && XSTRNCMP(cipher, EVP_DES_EDE3_ECB, EVP_DES_EDE3_SIZE) == 0)
+ return DES_EDE3_ECB_TYPE;
+#endif /* NO_DES3 && HAVE_AES_ECB */
+#endif
+#if !defined(NO_AES)
+#if defined(HAVE_AES_CBC)
+ #ifdef WOLFSSL_AES_128
+ else if (EVP_AES_128_CBC && XSTRNCMP(cipher, EVP_AES_128_CBC, EVP_AES_SIZE) == 0)
+ return AES_128_CBC_TYPE;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ else if (EVP_AES_192_CBC && XSTRNCMP(cipher, EVP_AES_192_CBC, EVP_AES_SIZE) == 0)
+ return AES_192_CBC_TYPE;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ else if (EVP_AES_256_CBC && XSTRNCMP(cipher, EVP_AES_256_CBC, EVP_AES_SIZE) == 0)
+ return AES_256_CBC_TYPE;
+ #endif
+#endif /* HAVE_AES_CBC */
+#if defined(HAVE_AESGCM)
+ #ifdef WOLFSSL_AES_128
+ else if (EVP_AES_128_GCM && XSTRNCMP(cipher, EVP_AES_128_GCM, EVP_AES_SIZE) == 0)
+ return AES_128_GCM_TYPE;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ else if (EVP_AES_192_GCM && XSTRNCMP(cipher, EVP_AES_192_GCM, EVP_AES_SIZE) == 0)
+ return AES_192_GCM_TYPE;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ else if (EVP_AES_256_GCM && XSTRNCMP(cipher, EVP_AES_256_GCM, EVP_AES_SIZE) == 0)
+ return AES_256_GCM_TYPE;
+ #endif
+#endif /* HAVE_AESGCM */
+#if defined(WOLFSSL_AES_COUNTER)
+ #ifdef WOLFSSL_AES_128
+ else if (EVP_AES_128_CTR && XSTRNCMP(cipher, EVP_AES_128_CTR, EVP_AES_SIZE) == 0)
+ return AES_128_CTR_TYPE;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ else if (EVP_AES_192_CTR && XSTRNCMP(cipher, EVP_AES_192_CTR, EVP_AES_SIZE) == 0)
+ return AES_192_CTR_TYPE;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ else if (EVP_AES_256_CTR && XSTRNCMP(cipher, EVP_AES_256_CTR, EVP_AES_SIZE) == 0)
+ return AES_256_CTR_TYPE;
+ #endif
+#endif /* HAVE_AES_CBC */
+#if defined(HAVE_AES_ECB)
+ #ifdef WOLFSSL_AES_128
+ else if (EVP_AES_128_ECB && XSTRNCMP(cipher, EVP_AES_128_ECB, EVP_AES_SIZE) == 0)
+ return AES_128_ECB_TYPE;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ else if (EVP_AES_192_ECB && XSTRNCMP(cipher, EVP_AES_192_ECB, EVP_AES_SIZE) == 0)
+ return AES_192_ECB_TYPE;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ else if (EVP_AES_256_ECB && XSTRNCMP(cipher, EVP_AES_256_ECB, EVP_AES_SIZE) == 0)
+ return AES_256_ECB_TYPE;
+ #endif
+#endif /*HAVE_AES_CBC */
+#if defined(WOLFSSL_AES_XTS)
+ #ifdef WOLFSSL_AES_128
+ else if (EVP_AES_128_XTS && XSTRNCMP(cipher, EVP_AES_128_XTS, EVP_AES_SIZE) == 0)
+ return AES_128_XTS_TYPE;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ else if (EVP_AES_256_XTS && XSTRNCMP(cipher, EVP_AES_256_XTS, EVP_AES_SIZE) == 0)
+ return AES_256_XTS_TYPE;
+ #endif
+#endif /* WOLFSSL_AES_XTS */
+#if defined(WOLFSSL_AES_CFB)
+ #ifdef WOLFSSL_AES_128
+ else if (EVP_AES_128_CFB1 && XSTRNCMP(cipher, EVP_AES_128_CFB1, EVP_AESCFB_SIZE) == 0)
+ return AES_128_CFB1_TYPE;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ else if (EVP_AES_192_CFB1 && XSTRNCMP(cipher, EVP_AES_192_CFB1, EVP_AESCFB_SIZE) == 0)
+ return AES_192_CFB1_TYPE;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ else if (EVP_AES_256_CFB1 && XSTRNCMP(cipher, EVP_AES_256_CFB1, EVP_AESCFB_SIZE) == 0)
+ return AES_256_CFB1_TYPE;
+ #endif
+ #ifdef WOLFSSL_AES_128
+ else if (EVP_AES_128_CFB8 && XSTRNCMP(cipher, EVP_AES_128_CFB8, EVP_AESCFB_SIZE) == 0)
+ return AES_128_CFB8_TYPE;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ else if (EVP_AES_192_CFB8 && XSTRNCMP(cipher, EVP_AES_192_CFB8, EVP_AESCFB_SIZE) == 0)
+ return AES_192_CFB8_TYPE;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ else if (EVP_AES_256_CFB8 && XSTRNCMP(cipher, EVP_AES_256_CFB8, EVP_AESCFB_SIZE) == 0)
+ return AES_256_CFB8_TYPE;
+ #endif
+ #ifdef WOLFSSL_AES_128
+ else if (EVP_AES_128_CFB128 && XSTRNCMP(cipher, EVP_AES_128_CFB128, EVP_AESCFB_SIZE) == 0)
+ return AES_128_CFB128_TYPE;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ else if (EVP_AES_192_CFB128 && XSTRNCMP(cipher, EVP_AES_192_CFB128, EVP_AESCFB_SIZE) == 0)
+ return AES_192_CFB128_TYPE;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ else if (EVP_AES_256_CFB128 && XSTRNCMP(cipher, EVP_AES_256_CFB128, EVP_AESCFB_SIZE) == 0)
+ return AES_256_CFB128_TYPE;
+ #endif
+#endif /*HAVE_AES_CBC */
+#endif /* !NO_AES */
+ else return 0;
+}
+
+int wolfSSL_EVP_CIPHER_block_size(const WOLFSSL_EVP_CIPHER *cipher)
+{
+ if (cipher == NULL) return BAD_FUNC_ARG;
+ switch (cipherType(cipher)) {
+#if !defined(NO_AES)
+ #if defined(HAVE_AES_CBC)
+ case AES_128_CBC_TYPE:
+ case AES_192_CBC_TYPE:
+ case AES_256_CBC_TYPE:
+ return AES_BLOCK_SIZE;
+ #endif
+ #if defined(HAVE_AESGCM)
+ case AES_128_GCM_TYPE:
+ case AES_192_GCM_TYPE:
+ case AES_256_GCM_TYPE:
+ return AES_BLOCK_SIZE;
+ #endif
+ #if defined(WOLFSSL_AES_COUNTER)
+ case AES_128_CTR_TYPE:
+ case AES_192_CTR_TYPE:
+ case AES_256_CTR_TYPE:
+ return AES_BLOCK_SIZE;
+ #endif
+ #if defined(HAVE_AES_ECB)
+ case AES_128_ECB_TYPE:
+ case AES_192_ECB_TYPE:
+ case AES_256_ECB_TYPE:
+ return AES_BLOCK_SIZE;
+ #endif
+#endif /* NO_AES */
+ #ifndef NO_DES3
+ case DES_CBC_TYPE: return 8;
+ case DES_EDE3_CBC_TYPE: return 8;
+ case DES_ECB_TYPE: return 8;
+ case DES_EDE3_ECB_TYPE: return 8;
+ #endif
+ default:
+ return 0;
+ }
+}
+
+unsigned long WOLFSSL_CIPHER_mode(const WOLFSSL_EVP_CIPHER *cipher)
+{
+ switch (cipherType(cipher)) {
+#if !defined(NO_AES)
+ #if defined(HAVE_AES_CBC)
+ case AES_128_CBC_TYPE:
+ case AES_192_CBC_TYPE:
+ case AES_256_CBC_TYPE:
+ return WOLFSSL_EVP_CIPH_CBC_MODE;
+ #endif
+ #if defined(HAVE_AESGCM)
+ case AES_128_GCM_TYPE:
+ case AES_192_GCM_TYPE:
+ case AES_256_GCM_TYPE:
+ return WOLFSSL_EVP_CIPH_GCM_MODE;
+ #endif
+ #if defined(WOLFSSL_AES_COUNTER)
+ case AES_128_CTR_TYPE:
+ case AES_192_CTR_TYPE:
+ case AES_256_CTR_TYPE:
+ return WOLFSSL_EVP_CIPH_CTR_MODE;
+ #endif
+ case AES_128_ECB_TYPE:
+ case AES_192_ECB_TYPE:
+ case AES_256_ECB_TYPE:
+ return WOLFSSL_EVP_CIPH_ECB_MODE;
+#endif /* NO_ASE */
+ #ifndef NO_DES3
+ case DES_CBC_TYPE:
+ case DES_EDE3_CBC_TYPE:
+ return WOLFSSL_EVP_CIPH_CBC_MODE;
+ case DES_ECB_TYPE:
+ case DES_EDE3_ECB_TYPE:
+ return WOLFSSL_EVP_CIPH_ECB_MODE;
+ #endif
+ #ifndef NO_RC4
+ case ARC4_TYPE:
+ return EVP_CIPH_STREAM_CIPHER;
+ #endif
+ default:
+ return 0;
+ }
+}
+
+unsigned long WOLFSSL_EVP_CIPHER_mode(const WOLFSSL_EVP_CIPHER *cipher)
+{
+ if (cipher == NULL) return 0;
+ return WOLFSSL_CIPHER_mode(cipher);
+}
+
+void wolfSSL_EVP_CIPHER_CTX_set_flags(WOLFSSL_EVP_CIPHER_CTX *ctx, int flags)
+{
+ if (ctx != NULL) {
+ ctx->flags |= flags;
+ }
+}
+
+void wolfSSL_EVP_CIPHER_CTX_clear_flags(WOLFSSL_EVP_CIPHER_CTX *ctx, int flags)
+{
+ if (ctx != NULL) {
+ ctx->flags &= ~flags;
+ }
+}
+
+unsigned long wolfSSL_EVP_CIPHER_flags(const WOLFSSL_EVP_CIPHER *cipher)
+{
+ if (cipher == NULL) return 0;
+ return WOLFSSL_CIPHER_mode(cipher);
+}
+
+int wolfSSL_EVP_CIPHER_CTX_set_padding(WOLFSSL_EVP_CIPHER_CTX *ctx, int padding)
+{
+ if (ctx == NULL) return BAD_FUNC_ARG;
+ if (padding) {
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_NO_PADDING;
+ }
+ else {
+ ctx->flags |= WOLFSSL_EVP_CIPH_NO_PADDING;
+ }
+ return 1;
+}
+
+int wolfSSL_EVP_add_digest(const WOLFSSL_EVP_MD *digest)
+{
+ (void)digest;
+ /* nothing to do */
+ return 0;
+}
+
+
+/* Frees the WOLFSSL_EVP_PKEY_CTX passed in.
+ *
+ * return WOLFSSL_SUCCESS on success
+ */
+int wolfSSL_EVP_PKEY_CTX_free(WOLFSSL_EVP_PKEY_CTX *ctx)
+{
+ if (ctx == NULL) return 0;
+ WOLFSSL_ENTER("EVP_PKEY_CTX_free");
+ if (ctx->pkey != NULL)
+ wolfSSL_EVP_PKEY_free(ctx->pkey);
+ if (ctx->peerKey != NULL)
+ wolfSSL_EVP_PKEY_free(ctx->peerKey);
+ XFREE(ctx, NULL, DYNAMIC_TYPE_PUBLIC_KEY);
+ return WOLFSSL_SUCCESS;
+}
+
+
+/* Creates a new WOLFSSL_EVP_PKEY_CTX structure.
+ *
+ * pkey key structure to use with new WOLFSSL_EVP_PEKY_CTX
+ * e engine to use. It should be NULL at this time.
+ *
+ * return the new structure on success and NULL if failed.
+ */
+WOLFSSL_EVP_PKEY_CTX *wolfSSL_EVP_PKEY_CTX_new(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_ENGINE *e)
+{
+ WOLFSSL_EVP_PKEY_CTX* ctx;
+ int type = NID_undef;
+
+ if (pkey == NULL) return 0;
+ if (e != NULL) return 0;
+ WOLFSSL_ENTER("EVP_PKEY_CTX_new");
+
+ ctx = (WOLFSSL_EVP_PKEY_CTX*)XMALLOC(sizeof(WOLFSSL_EVP_PKEY_CTX), NULL,
+ DYNAMIC_TYPE_PUBLIC_KEY);
+ if (ctx == NULL) return NULL;
+ XMEMSET(ctx, 0, sizeof(WOLFSSL_EVP_PKEY_CTX));
+ ctx->pkey = pkey;
+#if !defined(NO_RSA) && !defined(HAVE_USER_RSA)
+ ctx->padding = RSA_PKCS1_PADDING;
+#endif
+ type = wolfSSL_EVP_PKEY_type(pkey->type);
+
+ if (type != NID_undef) {
+ if (wc_LockMutex(&pkey->refMutex) != 0) {
+ WOLFSSL_MSG("Couldn't lock pkey mutex");
+ }
+ pkey->references++;
+
+ wc_UnLockMutex(&pkey->refMutex);
+ }
+ return ctx;
+}
+
+
+/* Sets the type of RSA padding to use.
+ *
+ * ctx structure to set padding in.
+ * padding RSA padding type
+ *
+ * returns WOLFSSL_SUCCESS on success.
+ */
+int wolfSSL_EVP_PKEY_CTX_set_rsa_padding(WOLFSSL_EVP_PKEY_CTX *ctx, int padding)
+{
+ if (ctx == NULL) return 0;
+ WOLFSSL_ENTER("EVP_PKEY_CTX_set_rsa_padding");
+ ctx->padding = padding;
+ return WOLFSSL_SUCCESS;
+}
+
+/* create a PKEY contxt and return it */
+WOLFSSL_EVP_PKEY_CTX *wolfSSL_EVP_PKEY_CTX_new_id(int id, WOLFSSL_ENGINE *e)
+{
+ WOLFSSL_EVP_PKEY* pkey;
+ WOLFSSL_EVP_PKEY_CTX* ctx = NULL;
+
+ WOLFSSL_ENTER("wolfSSL_EVP_PKEY_CTX_new_id");
+
+ pkey = wolfSSL_EVP_PKEY_new_ex(NULL);
+ if (pkey) {
+ pkey->type = id;
+ ctx = wolfSSL_EVP_PKEY_CTX_new(pkey, e);
+ if (ctx == NULL) {
+ wolfSSL_EVP_PKEY_free(pkey);
+ }
+ }
+ return ctx;
+}
+
+/* Returns WOLFSSL_SUCCESS or error */
+int wolfSSL_EVP_PKEY_CTX_set_rsa_keygen_bits(WOLFSSL_EVP_PKEY_CTX *ctx, int bits)
+{
+ if (ctx) {
+ ctx->nbits = bits;
+ }
+ return WOLFSSL_SUCCESS;
+}
+
+
+int wolfSSL_EVP_PKEY_derive_init(WOLFSSL_EVP_PKEY_CTX *ctx)
+{
+ WOLFSSL_ENTER("wolfSSL_EVP_PKEY_derive_init");
+
+ if (!ctx) {
+ return WOLFSSL_FAILURE;
+ }
+ wolfSSL_EVP_PKEY_free(ctx->peerKey);
+ ctx->op = EVP_PKEY_OP_DERIVE;
+ ctx->padding = 0;
+ ctx->nbits = 0;
+ return WOLFSSL_SUCCESS;
+}
+
+int wolfSSL_EVP_PKEY_derive_set_peer(WOLFSSL_EVP_PKEY_CTX *ctx, WOLFSSL_EVP_PKEY *peer)
+{
+ WOLFSSL_ENTER("wolfSSL_EVP_PKEY_derive_set_peer");
+
+ if (!ctx || ctx->op != EVP_PKEY_OP_DERIVE) {
+ return WOLFSSL_FAILURE;
+ }
+ wolfSSL_EVP_PKEY_free(ctx->peerKey);
+ ctx->peerKey = peer;
+ if (!wolfSSL_EVP_PKEY_up_ref(peer)) {
+ ctx->peerKey = NULL;
+ return WOLFSSL_FAILURE;
+ }
+ return WOLFSSL_SUCCESS;
+}
+
+#if !defined(NO_DH) && defined(HAVE_ECC)
+int wolfSSL_EVP_PKEY_derive(WOLFSSL_EVP_PKEY_CTX *ctx, unsigned char *key, size_t *keylen)
+{
+ int len;
+
+ WOLFSSL_ENTER("wolfSSL_EVP_PKEY_derive");
+
+ if (!ctx || ctx->op != EVP_PKEY_OP_DERIVE || !ctx->pkey || !ctx->peerKey || !keylen
+ || ctx->pkey->type != ctx->peerKey->type) {
+ return WOLFSSL_FAILURE;
+ }
+ switch (ctx->pkey->type) {
+#ifndef NO_DH
+ case EVP_PKEY_DH:
+ /* Use DH */
+ if (!ctx->pkey->dh || !ctx->peerKey->dh || !ctx->peerKey->dh->pub_key) {
+ return WOLFSSL_FAILURE;
+ }
+ if ((len = wolfSSL_DH_size(ctx->pkey->dh)) <= 0) {
+ return WOLFSSL_FAILURE;
+ }
+ if (key) {
+ if (*keylen < (size_t)len) {
+ return WOLFSSL_FAILURE;
+ }
+ if (wolfSSL_DH_compute_key(key, ctx->peerKey->dh->pub_key,
+ ctx->pkey->dh) != len) {
+ return WOLFSSL_FAILURE;
+ }
+ }
+ *keylen = (size_t)len;
+ break;
+#endif
+#ifdef HAVE_ECC
+ case EVP_PKEY_EC:
+ /* Use ECDH */
+ if (!ctx->pkey->ecc || !ctx->peerKey->ecc) {
+ return WOLFSSL_FAILURE;
+ }
+ /* set internal key if not done */
+ if (!ctx->pkey->ecc->inSet) {
+ if (SetECKeyInternal(ctx->pkey->ecc) != WOLFSSL_SUCCESS) {
+ WOLFSSL_MSG("SetECKeyInternal failed");
+ return WOLFSSL_FAILURE;
+ }
+ }
+ if (!ctx->peerKey->ecc->exSet || !ctx->peerKey->ecc->pub_key->internal) {
+ if (SetECKeyExternal(ctx->peerKey->ecc) != WOLFSSL_SUCCESS) {
+ WOLFSSL_MSG("SetECKeyExternal failed");
+ return WOLFSSL_FAILURE;
+ }
+ }
+ if (!(len = wc_ecc_size((ecc_key*)ctx->pkey->ecc->internal))) {
+ return WOLFSSL_FAILURE;
+ }
+ if (key) {
+ word32 len32 = (word32)len;
+ if (*keylen < len32) {
+ WOLFSSL_MSG("buffer too short");
+ return WOLFSSL_FAILURE;
+ }
+ if (wc_ecc_shared_secret_ssh((ecc_key*)ctx->pkey->ecc->internal,
+ (ecc_point*)ctx->peerKey->ecc->pub_key->internal,
+ key, &len32) != MP_OKAY) {
+ WOLFSSL_MSG("wc_ecc_shared_secret failed");
+ return WOLFSSL_FAILURE;
+ }
+ len = (int)len32;
+ }
+ *keylen = (size_t)len;
+ break;
+#endif
+ default:
+ WOLFSSL_MSG("Unknown key type");
+ return WOLFSSL_FAILURE;
+ }
+ return WOLFSSL_SUCCESS;
+}
+#endif
+
+/* Uses the WOLFSSL_EVP_PKEY_CTX to decrypt a buffer.
+ *
+ * ctx structure to decrypt with
+ * out buffer to hold the results
+ * outlen initially holds size of out buffer and gets set to decrypt result size
+ * in buffer decrypt
+ * inlen length of in buffer
+ *
+ * returns WOLFSSL_SUCCESS on success.
+ */
+int wolfSSL_EVP_PKEY_decrypt(WOLFSSL_EVP_PKEY_CTX *ctx,
+ unsigned char *out, size_t *outlen,
+ const unsigned char *in, size_t inlen)
+{
+ int len = 0;
+
+ if (ctx == NULL) return 0;
+ WOLFSSL_ENTER("EVP_PKEY_decrypt");
+
+ (void)out;
+ (void)outlen;
+ (void)in;
+ (void)inlen;
+ (void)len;
+
+ switch (ctx->pkey->type) {
+#if !defined(NO_RSA) && !defined(HAVE_USER_RSA)
+ case EVP_PKEY_RSA:
+ len = wolfSSL_RSA_private_decrypt((int)inlen, (unsigned char*)in, out,
+ ctx->pkey->rsa, ctx->padding);
+ if (len < 0) break;
+ else {
+ *outlen = len;
+ return WOLFSSL_SUCCESS;
+ }
+#endif /* NO_RSA */
+
+ case EVP_PKEY_EC:
+ WOLFSSL_MSG("not implemented");
+ FALL_THROUGH;
+ default:
+ break;
+ }
+ return WOLFSSL_FAILURE;
+}
+
+
+/* Initialize a WOLFSSL_EVP_PKEY_CTX structure for decryption
+ *
+ * ctx WOLFSSL_EVP_PKEY_CTX structure to use with decryption
+ *
+ * Returns WOLFSSL_FAILURE on failure and WOLFSSL_SUCCESS on success
+ */
+int wolfSSL_EVP_PKEY_decrypt_init(WOLFSSL_EVP_PKEY_CTX *ctx)
+{
+ if (ctx == NULL) return WOLFSSL_FAILURE;
+ WOLFSSL_ENTER("EVP_PKEY_decrypt_init");
+ switch (ctx->pkey->type) {
+ case EVP_PKEY_RSA:
+ ctx->op = EVP_PKEY_OP_DECRYPT;
+ return WOLFSSL_SUCCESS;
+ case EVP_PKEY_EC:
+ WOLFSSL_MSG("not implemented");
+ FALL_THROUGH;
+ default:
+ break;
+ }
+ return WOLFSSL_FAILURE;
+}
+
+
+/* Use a WOLFSSL_EVP_PKEY_CTX structure to encrypt data
+ *
+ * ctx WOLFSSL_EVP_PKEY_CTX structure to use with encryption
+ * out buffer to hold encrypted data
+ * outlen length of out buffer
+ * in data to be encrypted
+ * inlen length of in buffer
+ *
+ * Returns WOLFSSL_FAILURE on failure and WOLFSSL_SUCCESS on success
+ */
+int wolfSSL_EVP_PKEY_encrypt(WOLFSSL_EVP_PKEY_CTX *ctx,
+ unsigned char *out, size_t *outlen,
+ const unsigned char *in, size_t inlen)
+{
+ int len = 0;
+ if (ctx == NULL) return WOLFSSL_FAILURE;
+ WOLFSSL_ENTER("EVP_PKEY_encrypt");
+ if (ctx->op != EVP_PKEY_OP_ENCRYPT) return WOLFSSL_FAILURE;
+
+ (void)out;
+ (void)outlen;
+ (void)in;
+ (void)inlen;
+ (void)len;
+ switch (ctx->pkey->type) {
+#if !defined(NO_RSA) && !defined(HAVE_USER_RSA)
+ case EVP_PKEY_RSA:
+ len = wolfSSL_RSA_public_encrypt((int)inlen, (unsigned char *)in, out,
+ ctx->pkey->rsa, ctx->padding);
+ if (len < 0)
+ break;
+ else {
+ *outlen = len;
+ return WOLFSSL_SUCCESS;
+ }
+#endif /* NO_RSA */
+
+ case EVP_PKEY_EC:
+ WOLFSSL_MSG("not implemented");
+ FALL_THROUGH;
+ default:
+ break;
+ }
+ return WOLFSSL_FAILURE;
+}
+
+
+/* Initialize a WOLFSSL_EVP_PKEY_CTX structure to encrypt data
+ *
+ * ctx WOLFSSL_EVP_PKEY_CTX structure to use with encryption
+ *
+ * Returns WOLFSSL_FAILURE on failure and WOLFSSL_SUCCESS on success
+ */
+int wolfSSL_EVP_PKEY_encrypt_init(WOLFSSL_EVP_PKEY_CTX *ctx)
+{
+ if (ctx == NULL) return WOLFSSL_FAILURE;
+ WOLFSSL_ENTER("EVP_PKEY_encrypt_init");
+
+ switch (ctx->pkey->type) {
+ case EVP_PKEY_RSA:
+ ctx->op = EVP_PKEY_OP_ENCRYPT;
+ return WOLFSSL_SUCCESS;
+ case EVP_PKEY_EC:
+ WOLFSSL_MSG("not implemented");
+ FALL_THROUGH;
+ default:
+ break;
+ }
+ return WOLFSSL_FAILURE;
+}
+/******************************************************************************
+* wolfSSL_EVP_PKEY_sign_init - initializes a public key algorithm context for
+* a signing operation.
+*
+* RETURNS:
+* returns WOLFSSL_SUCCESS on success, otherwise returns -2
+*/
+WOLFSSL_API int wolfSSL_EVP_PKEY_sign_init(WOLFSSL_EVP_PKEY_CTX *ctx)
+{
+ int ret = -2;
+
+ WOLFSSL_MSG("wolfSSL_EVP_PKEY_sign_init");
+ if (!ctx || !ctx->pkey)
+ return ret;
+
+ switch (ctx->pkey->type) {
+ case EVP_PKEY_RSA:
+ ctx->op = EVP_PKEY_OP_SIGN;
+ ret = WOLFSSL_SUCCESS;
+ break;
+ case EVP_PKEY_EC:
+ WOLFSSL_MSG("not implemented");
+ FALL_THROUGH;
+ default:
+ ret = -2;
+ }
+ return ret;
+}
+/******************************************************************************
+* wolfSSL_EVP_PKEY_sign - performs a public key signing operation using ctx
+* The data to be signed should be hashed since the function does not hash the data.
+*
+* RETURNS:
+* returns WOLFSSL_SUCCESS on success, otherwise returns WOLFSSL_FAILURE
+*/
+
+WOLFSSL_API int wolfSSL_EVP_PKEY_sign(WOLFSSL_EVP_PKEY_CTX *ctx, unsigned char *sig,
+ size_t *siglen, const unsigned char *tbs, size_t tbslen)
+{
+ int len = 0;
+
+ WOLFSSL_MSG("wolfSSL_EVP_PKEY_sign");
+
+ if (!ctx || ctx->op != EVP_PKEY_OP_SIGN || !ctx->pkey)
+ return WOLFSSL_FAILURE;
+
+ (void)sig;
+ (void)siglen;
+ (void)tbs;
+ (void)tbslen;
+ (void)len;
+
+ switch (ctx->pkey->type) {
+#if !defined(NO_RSA) && !defined(HAVE_USER_RSA)
+ case EVP_PKEY_RSA:
+ len = wolfSSL_RSA_private_encrypt((int)tbslen, (unsigned char*)tbs, sig,
+ ctx->pkey->rsa, ctx->padding);
+ if (len < 0)
+ break;
+ else {
+ *siglen = len;
+ return WOLFSSL_SUCCESS;
+ }
+#endif /* NO_RSA */
+
+ case EVP_PKEY_EC:
+ WOLFSSL_MSG("not implemented");
+ FALL_THROUGH;
+ default:
+ break;
+ }
+ return WOLFSSL_FAILURE;
+}
+
+/* Get the size in bits for WOLFSSL_EVP_PKEY key
+ *
+ * pkey WOLFSSL_EVP_PKEY structure to get key size of
+ *
+ * returns the size in bits of key on success
+ */
+int wolfSSL_EVP_PKEY_bits(const WOLFSSL_EVP_PKEY *pkey)
+{
+ int bytes;
+
+ if (pkey == NULL) return 0;
+ WOLFSSL_ENTER("EVP_PKEY_bits");
+ if ((bytes = wolfSSL_EVP_PKEY_size((WOLFSSL_EVP_PKEY*)pkey)) ==0) return 0;
+ return bytes*8;
+}
+
+
+int wolfSSL_EVP_PKEY_keygen_init(WOLFSSL_EVP_PKEY_CTX *ctx)
+{
+ (void)ctx;
+ return WOLFSSL_SUCCESS;
+}
+
+int wolfSSL_EVP_PKEY_keygen(WOLFSSL_EVP_PKEY_CTX *ctx,
+ WOLFSSL_EVP_PKEY **ppkey)
+{
+ int ret = WOLFSSL_FAILURE;
+ int ownPkey = 0;
+ WOLFSSL_EVP_PKEY* pkey;
+
+ if (ctx == NULL || ppkey == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ pkey = *ppkey;
+ if (pkey == NULL) {
+ ownPkey = 1;
+ pkey = wolfSSL_EVP_PKEY_new();
+
+ if (pkey == NULL)
+ return ret;
+ }
+
+ switch (pkey->type) {
+#if !defined(HAVE_FAST_RSA) && defined(WOLFSSL_KEY_GEN) && \
+ !defined(NO_RSA) && !defined(HAVE_USER_RSA)
+ case EVP_PKEY_RSA:
+ pkey->rsa = wolfSSL_RSA_generate_key(ctx->nbits, WC_RSA_EXPONENT,
+ NULL, NULL);
+ if (pkey->rsa) {
+ pkey->ownRsa = 1;
+ pkey->pkey_sz = wolfSSL_i2d_RSAPrivateKey(pkey->rsa,
+ (unsigned char**)&pkey->pkey.ptr);
+ ret = WOLFSSL_SUCCESS;
+ }
+ break;
+#endif
+#ifdef HAVE_ECC
+ case EVP_PKEY_EC:
+ pkey->ecc = wolfSSL_EC_KEY_new();
+ if (pkey->ecc) {
+ ret = wolfSSL_EC_KEY_generate_key(pkey->ecc);
+ if (ret == WOLFSSL_SUCCESS) {
+ pkey->ownEcc = 1;
+ }
+ }
+#endif
+ default:
+ break;
+ }
+
+ if (ret != WOLFSSL_SUCCESS && ownPkey) {
+ wolfSSL_EVP_PKEY_free(pkey);
+ pkey = NULL;
+ }
+
+ *ppkey = pkey;
+
+ return ret;
+}
+
+/* Get the size in bytes for WOLFSSL_EVP_PKEY key
+ *
+ * pkey WOLFSSL_EVP_PKEY structure to get key size of
+ *
+ * returns the size of a key on success which is the maximum size of a
+ * signature
+ */
+int wolfSSL_EVP_PKEY_size(WOLFSSL_EVP_PKEY *pkey)
+{
+ if (pkey == NULL) return 0;
+ WOLFSSL_ENTER("EVP_PKEY_size");
+
+ switch (pkey->type) {
+#ifndef NO_RSA
+ case EVP_PKEY_RSA:
+ return (int)wolfSSL_RSA_size((const WOLFSSL_RSA*)(pkey->rsa));
+#endif /* !NO_RSA */
+
+#ifdef HAVE_ECC
+ case EVP_PKEY_EC:
+ if (pkey->ecc == NULL || pkey->ecc->internal == NULL) {
+ WOLFSSL_MSG("No ECC key has been set");
+ break;
+ }
+ return wc_ecc_size((ecc_key*)(pkey->ecc->internal));
+#endif /* HAVE_ECC */
+
+ default:
+ break;
+ }
+ return 0;
+}
+
+#ifndef NO_WOLFSSL_STUB
+WOLFSSL_API int wolfSSL_EVP_PKEY_missing_parameters(WOLFSSL_EVP_PKEY *pkey)
+{
+ (void)pkey;
+ /* not using missing params callback and returning zero to indicate success */
+ return 0;
+}
+#endif
+
+WOLFSSL_API int wolfSSL_EVP_PKEY_cmp(const WOLFSSL_EVP_PKEY *a, const WOLFSSL_EVP_PKEY *b)
+{
+ int ret = -1; /* failure */
+ int a_sz = 0, b_sz = 0;
+
+ if (a == NULL || b == NULL)
+ return ret;
+
+ /* check its the same type of key */
+ if (a->type != b->type)
+ return ret;
+
+ /* get size based on key type */
+ switch (a->type) {
+#ifndef NO_RSA
+ case EVP_PKEY_RSA:
+ a_sz = (int)wolfSSL_RSA_size((const WOLFSSL_RSA*)(a->rsa));
+ b_sz = (int)wolfSSL_RSA_size((const WOLFSSL_RSA*)(b->rsa));
+ break;
+#endif /* !NO_RSA */
+#ifdef HAVE_ECC
+ case EVP_PKEY_EC:
+ if (a->ecc == NULL || a->ecc->internal == NULL ||
+ b->ecc == NULL || b->ecc->internal == NULL) {
+ return ret;
+ }
+ a_sz = wc_ecc_size((ecc_key*)(a->ecc->internal));
+ b_sz = wc_ecc_size((ecc_key*)(b->ecc->internal));
+ break;
+#endif /* HAVE_ECC */
+ default:
+ break;
+ } /* switch (a->type) */
+
+ /* check size */
+ if (a_sz <= 0 || b_sz <= 0 || a_sz != b_sz) {
+ return ret;
+ }
+
+ /* check public key size */
+ if (a->pkey_sz > 0 && b->pkey_sz > 0 && a->pkey_sz != b->pkey_sz) {
+ return ret;
+ }
+
+ /* check public key */
+ if (a->pkey.ptr && b->pkey.ptr) {
+ if (XMEMCMP(a->pkey.ptr, b->pkey.ptr, a->pkey_sz) != 0) {
+ return ret;
+ }
+ }
+ ret = 0; /* success */
+
+ return ret;
+}
+
+/* Initialize structure for signing
+ *
+ * ctx WOLFSSL_EVP_MD_CTX structure to initialize
+ * type is the type of message digest to use
+ *
+ * returns WOLFSSL_SUCCESS on success
+ */
+int wolfSSL_EVP_SignInit(WOLFSSL_EVP_MD_CTX *ctx, const WOLFSSL_EVP_MD *type)
+{
+ if (ctx == NULL) return WOLFSSL_FAILURE;
+ WOLFSSL_ENTER("EVP_SignInit");
+ return wolfSSL_EVP_DigestInit(ctx,type);
+}
+
+WOLFSSL_API int wolfSSL_EVP_SignInit_ex(WOLFSSL_EVP_MD_CTX* ctx,
+ const WOLFSSL_EVP_MD* type,
+ WOLFSSL_ENGINE *impl)
+{
+ if (ctx == NULL) return WOLFSSL_FAILURE;
+ WOLFSSL_ENTER("EVP_SignInit");
+ return wolfSSL_EVP_DigestInit_ex(ctx,type,impl);
+}
+
+
+/* Update structure with data for signing
+ *
+ * ctx WOLFSSL_EVP_MD_CTX structure to update
+ * data buffer holding data to update with for sign
+ * len length of data buffer
+ *
+ * returns WOLFSSL_SUCCESS on success
+ */
+int wolfSSL_EVP_SignUpdate(WOLFSSL_EVP_MD_CTX *ctx, const void *data, size_t len)
+{
+ if (ctx == NULL) return 0;
+ WOLFSSL_ENTER("EVP_SignUpdate(");
+ return wolfSSL_EVP_DigestUpdate(ctx, data, len);
+}
+
+static const struct s_ent {
+ const int macType;
+ const int nid;
+ const char *name;
+} md_tbl[] = {
+#ifndef NO_MD4
+ {WC_HASH_TYPE_MD4, NID_md4, "MD4"},
+#endif /* NO_MD4 */
+
+#ifndef NO_MD5
+ {WC_HASH_TYPE_MD5, NID_md5, "MD5"},
+#endif /* NO_MD5 */
+
+#ifndef NO_SHA
+ {WC_HASH_TYPE_SHA, NID_sha1, "SHA"},
+#endif /* NO_SHA */
+
+#ifdef WOLFSSL_SHA224
+ {WC_HASH_TYPE_SHA224, NID_sha224, "SHA224"},
+#endif /* WOLFSSL_SHA224 */
+#ifndef NO_SHA256
+ {WC_HASH_TYPE_SHA256, NID_sha256, "SHA256"},
+#endif
+
+#ifdef WOLFSSL_SHA384
+ {WC_HASH_TYPE_SHA384, NID_sha384, "SHA384"},
+#endif /* WOLFSSL_SHA384 */
+#ifdef WOLFSSL_SHA512
+ {WC_HASH_TYPE_SHA512, NID_sha512, "SHA512"},
+#endif /* WOLFSSL_SHA512 */
+#ifndef WOLFSSL_NOSHA3_224
+ {WC_HASH_TYPE_SHA3_224, NID_sha3_224, "SHA3_224"},
+#endif
+#ifndef WOLFSSL_NOSHA3_256
+ {WC_HASH_TYPE_SHA3_256, NID_sha3_256, "SHA3_256"},
+#endif
+ {WC_HASH_TYPE_SHA3_384, NID_sha3_384, "SHA3_384"},
+#ifndef WOLFSSL_NOSHA3_512
+ {WC_HASH_TYPE_SHA3_512, NID_sha3_512, "SHA3_512"},
+#endif
+ {0, 0, NULL}
+};
+
+static int wolfSSL_EVP_md2macType(const WOLFSSL_EVP_MD *md)
+{
+ const struct s_ent *ent ;
+
+ if (md != NULL) {
+ for( ent = md_tbl; ent->name != NULL; ent++) {
+ if(XSTRNCMP((const char *)md, ent->name, XSTRLEN(ent->name)+1) == 0) {
+ return ent->macType;
+ }
+ }
+ }
+ return WC_HASH_TYPE_NONE;
+}
+
+/* Finalize structure for signing
+ *
+ * ctx WOLFSSL_EVP_MD_CTX structure to finalize
+ * sigret buffer to hold resulting signature
+ * siglen length of sigret buffer
+ * pkey key to sign with
+ *
+ * returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure
+ */
+int wolfSSL_EVP_SignFinal(WOLFSSL_EVP_MD_CTX *ctx, unsigned char *sigret,
+ unsigned int *siglen, WOLFSSL_EVP_PKEY *pkey)
+{
+ unsigned int mdsize;
+ unsigned char md[WC_MAX_DIGEST_SIZE];
+ int ret;
+ if (ctx == NULL) return WOLFSSL_FAILURE;
+ WOLFSSL_ENTER("EVP_SignFinal");
+
+ ret = wolfSSL_EVP_DigestFinal(ctx, md, &mdsize);
+ if (ret <= 0) return ret;
+
+ (void)sigret;
+ (void)siglen;
+
+ switch (pkey->type) {
+#if !defined(NO_RSA) && !defined(HAVE_USER_RSA)
+ case EVP_PKEY_RSA: {
+ int nid = wolfSSL_EVP_MD_type(wolfSSL_EVP_MD_CTX_md(ctx));
+ if (nid < 0) break;
+ return wolfSSL_RSA_sign(nid, md, mdsize, sigret,
+ siglen, pkey->rsa);
+ }
+#endif /* NO_RSA */
+
+ case EVP_PKEY_DSA:
+ case EVP_PKEY_EC:
+ WOLFSSL_MSG("not implemented");
+ FALL_THROUGH;
+ default:
+ break;
+ }
+ return WOLFSSL_FAILURE;
+}
+
+
+/* Initialize structure for verifying signature
+ *
+ * ctx WOLFSSL_EVP_MD_CTX structure to initialize
+ * type is the type of message digest to use
+ *
+ * returns WOLFSSL_SUCCESS on success
+ */
+int wolfSSL_EVP_VerifyInit(WOLFSSL_EVP_MD_CTX *ctx, const WOLFSSL_EVP_MD *type)
+{
+ if (ctx == NULL) return WOLFSSL_FAILURE;
+ WOLFSSL_ENTER("EVP_VerifyInit");
+ return wolfSSL_EVP_DigestInit(ctx,type);
+}
+
+
+/* Update structure for verifying signature
+ *
+ * ctx WOLFSSL_EVP_MD_CTX structure to update
+ * data buffer holding data to update with for verify
+ * len length of data buffer
+ *
+ * returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure
+ */
+int wolfSSL_EVP_VerifyUpdate(WOLFSSL_EVP_MD_CTX *ctx, const void *data, size_t len)
+{
+ if (ctx == NULL) return WOLFSSL_FAILURE;
+ WOLFSSL_ENTER("EVP_VerifyUpdate");
+ return wolfSSL_EVP_DigestUpdate(ctx, data, len);
+}
+
+
+/* Finalize structure for verifying signature
+ *
+ * ctx WOLFSSL_EVP_MD_CTX structure to finalize
+ * sig buffer holding signature
+ * siglen length of sig buffer
+ * pkey key to verify with
+ *
+ * returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure
+ */
+int wolfSSL_EVP_VerifyFinal(WOLFSSL_EVP_MD_CTX *ctx,
+ unsigned char*sig, unsigned int siglen, WOLFSSL_EVP_PKEY *pkey)
+{
+ int ret;
+ unsigned char md[WC_MAX_DIGEST_SIZE];
+ unsigned int mdsize;
+
+ if (ctx == NULL) return WOLFSSL_FAILURE;
+ WOLFSSL_ENTER("EVP_VerifyFinal");
+ ret = wolfSSL_EVP_DigestFinal(ctx, md, &mdsize);
+ if (ret <= 0) return ret;
+
+ (void)sig;
+ (void)siglen;
+
+ switch (pkey->type) {
+#if !defined(NO_RSA) && !defined(HAVE_USER_RSA)
+ case EVP_PKEY_RSA: {
+ int nid = wolfSSL_EVP_MD_type(wolfSSL_EVP_MD_CTX_md(ctx));
+ if (nid < 0) break;
+ return wolfSSL_RSA_verify(nid, md, mdsize, sig,
+ (unsigned int)siglen, pkey->rsa);
+ }
+#endif /* NO_RSA */
+
+ case EVP_PKEY_DSA:
+ case EVP_PKEY_EC:
+ WOLFSSL_MSG("not implemented");
+ FALL_THROUGH;
+ default:
+ break;
+ }
+ return WOLFSSL_FAILURE;
+}
+
+int wolfSSL_EVP_add_cipher(const WOLFSSL_EVP_CIPHER *cipher)
+{
+ (void)cipher;
+ /* nothing to do */
+ return 0;
+}
+
+
+WOLFSSL_EVP_PKEY* wolfSSL_EVP_PKEY_new_mac_key(int type, ENGINE* e,
+ const unsigned char* key, int keylen)
+{
+ WOLFSSL_EVP_PKEY* pkey;
+
+ (void)e;
+
+ if (type != EVP_PKEY_HMAC || (key == NULL && keylen != 0))
+ return NULL;
+
+ pkey = wolfSSL_EVP_PKEY_new();
+ if (pkey != NULL) {
+ pkey->pkey.ptr = (char*)XMALLOC(keylen, NULL, DYNAMIC_TYPE_PUBLIC_KEY);
+ if (pkey->pkey.ptr == NULL && keylen > 0) {
+ wolfSSL_EVP_PKEY_free(pkey);
+ pkey = NULL;
+ }
+ else {
+ XMEMCPY(pkey->pkey.ptr, key, keylen);
+ pkey->pkey_sz = keylen;
+ pkey->type = pkey->save_type = type;
+ }
+ }
+
+ return pkey;
+}
+
+
+const unsigned char* wolfSSL_EVP_PKEY_get0_hmac(const WOLFSSL_EVP_PKEY* pkey,
+ size_t* len)
+{
+ if (pkey == NULL || len == NULL)
+ return NULL;
+
+ *len = (size_t)pkey->pkey_sz;
+
+ return (const unsigned char*)pkey->pkey.ptr;
+}
+
+
+/* Initialize an EVP_DigestSign/Verify operation.
+ * Initialize a digest for RSA and ECC keys, or HMAC for HMAC key.
+ */
+static int wolfSSL_evp_digest_pk_init(WOLFSSL_EVP_MD_CTX *ctx,
+ WOLFSSL_EVP_PKEY_CTX **pctx,
+ const WOLFSSL_EVP_MD *type,
+ WOLFSSL_ENGINE *e,
+ WOLFSSL_EVP_PKEY *pkey)
+{
+ if (pkey->type == EVP_PKEY_HMAC) {
+ int hashType;
+ const unsigned char* key;
+ size_t keySz;
+
+ if (XSTRNCMP(type, "SHA256", 6) == 0) {
+ hashType = WC_SHA256;
+ }
+ #ifdef WOLFSSL_SHA224
+ else if (XSTRNCMP(type, "SHA224", 6) == 0) {
+ hashType = WC_SHA224;
+ }
+ #endif
+ #ifdef WOLFSSL_SHA384
+ else if (XSTRNCMP(type, "SHA384", 6) == 0) {
+ hashType = WC_SHA384;
+ }
+ #endif
+ #ifdef WOLFSSL_SHA512
+ else if (XSTRNCMP(type, "SHA512", 6) == 0) {
+ hashType = WC_SHA512;
+ }
+ #endif
+ #ifndef NO_MD5
+ else if (XSTRNCMP(type, "MD5", 3) == 0) {
+ hashType = WC_MD5;
+ }
+ #endif
+ #ifndef NO_SHA
+ /* has to be last since would pick or 224, 256, 384, or 512 too */
+ else if (XSTRNCMP(type, "SHA", 3) == 0) {
+ hashType = WC_SHA;
+ }
+ #endif /* NO_SHA */
+ else
+ return BAD_FUNC_ARG;
+
+ key = wolfSSL_EVP_PKEY_get0_hmac(pkey, &keySz);
+
+ if (wc_HmacInit(&ctx->hash.hmac, NULL, INVALID_DEVID) != 0)
+ return WOLFSSL_FAILURE;
+
+ if (wc_HmacSetKey(&ctx->hash.hmac, hashType, key, (word32)keySz) != 0)
+ return WOLFSSL_FAILURE;
+
+ ctx->macType = NID_hmac;
+ }
+ else {
+ int ret;
+
+ if (ctx->pctx == NULL) {
+ ctx->pctx = wolfSSL_EVP_PKEY_CTX_new(pkey, e);
+ if (ctx->pctx == NULL)
+ return WOLFSSL_FAILURE;
+ }
+
+ ret = wolfSSL_EVP_DigestInit(ctx, type);
+ if (ret == WOLFSSL_SUCCESS && pctx != NULL)
+ *pctx = ctx->pctx;
+ return ret;
+ }
+
+ return WOLFSSL_SUCCESS;
+}
+
+/* Update an EVP_DigestSign/Verify operation.
+ * Update a digest for RSA and ECC keys, or HMAC for HMAC key.
+ */
+static int wolfssl_evp_digest_pk_update(WOLFSSL_EVP_MD_CTX *ctx,
+ const void *d, unsigned int cnt)
+{
+ if (ctx->pctx == NULL) {
+ if (ctx->macType != NID_hmac)
+ return WOLFSSL_FAILURE;
+
+ if (wc_HmacUpdate(&ctx->hash.hmac, (const byte *)d, cnt) != 0)
+ return WOLFSSL_FAILURE;
+
+ return WOLFSSL_SUCCESS;
+ }
+ else
+ return wolfSSL_EVP_DigestUpdate(ctx, d, cnt);
+}
+
+/* Finalize an EVP_DigestSign/Verify operation - common part only.
+ * Finalize a digest for RSA and ECC keys, or HMAC for HMAC key.
+ * Copies the digest so that you can keep updating.
+ */
+static int wolfssl_evp_digest_pk_final(WOLFSSL_EVP_MD_CTX *ctx,
+ unsigned char *md, unsigned int* mdlen)
+{
+ int ret;
+
+ if (ctx->pctx == NULL) {
+ Hmac hmacCopy;
+
+ if (ctx->macType != NID_hmac)
+ return WOLFSSL_FAILURE;
+
+ if (wolfSSL_HmacCopy(&hmacCopy, &ctx->hash.hmac) != WOLFSSL_SUCCESS)
+ return WOLFSSL_FAILURE;
+ ret = wc_HmacFinal(&hmacCopy, md) == 0;
+ wc_HmacFree(&hmacCopy);
+ return ret;
+ }
+ else {
+ WOLFSSL_EVP_MD_CTX ctxCopy;
+
+ if (wolfSSL_EVP_MD_CTX_copy_ex(&ctxCopy, ctx) != WOLFSSL_SUCCESS)
+ return WOLFSSL_FAILURE;
+
+ ret = wolfSSL_EVP_DigestFinal(&ctxCopy, md, mdlen);
+ wolfSSL_EVP_MD_CTX_cleanup(&ctxCopy);
+ return ret;
+ }
+}
+
+/* Get the length of the mac based on the digest algorithm. */
+static int wolfssl_mac_len(unsigned char macType)
+{
+ int hashLen;
+
+ switch (macType) {
+ #ifndef NO_MD5
+ case WC_MD5:
+ hashLen = WC_MD5_DIGEST_SIZE;
+ break;
+ #endif /* !NO_MD5 */
+
+ #ifndef NO_SHA
+ case WC_SHA:
+ hashLen = WC_SHA_DIGEST_SIZE;
+ break;
+ #endif /* !NO_SHA */
+
+ #ifdef WOLFSSL_SHA224
+ case WC_SHA224:
+ hashLen = WC_SHA224_DIGEST_SIZE;
+ break;
+ #endif /* WOLFSSL_SHA224 */
+
+ #ifndef NO_SHA256
+ case WC_SHA256:
+ hashLen = WC_SHA256_DIGEST_SIZE;
+ break;
+ #endif /* !NO_SHA256 */
+
+ #ifdef WOLFSSL_SHA384
+ case WC_SHA384:
+ hashLen = WC_SHA384_DIGEST_SIZE;
+ break;
+ #endif /* WOLFSSL_SHA384 */
+ #ifdef WOLFSSL_SHA512
+ case WC_SHA512:
+ hashLen = WC_SHA512_DIGEST_SIZE;
+ break;
+ #endif /* WOLFSSL_SHA512 */
+
+ #ifdef HAVE_BLAKE2
+ case BLAKE2B_ID:
+ hashLen = BLAKE2B_OUTBYTES;
+ break;
+ #endif /* HAVE_BLAKE2 */
+
+ default:
+ hashLen = 0;
+ }
+
+ return hashLen;
+}
+
+int wolfSSL_EVP_DigestSignInit(WOLFSSL_EVP_MD_CTX *ctx,
+ WOLFSSL_EVP_PKEY_CTX **pctx,
+ const WOLFSSL_EVP_MD *type,
+ WOLFSSL_ENGINE *e,
+ WOLFSSL_EVP_PKEY *pkey)
+{
+ WOLFSSL_ENTER("EVP_DigestSignInit");
+
+ if (ctx == NULL || type == NULL || pkey == NULL)
+ return BAD_FUNC_ARG;
+
+ return wolfSSL_evp_digest_pk_init(ctx, pctx, type, e, pkey);
+}
+
+
+int wolfSSL_EVP_DigestSignUpdate(WOLFSSL_EVP_MD_CTX *ctx, const void *d,
+ unsigned int cnt)
+{
+ WOLFSSL_ENTER("EVP_DigestSignUpdate");
+
+ if (ctx == NULL || d == NULL)
+ return BAD_FUNC_ARG;
+
+ return wolfssl_evp_digest_pk_update(ctx, d, cnt);
+}
+
+int wolfSSL_EVP_DigestSignFinal(WOLFSSL_EVP_MD_CTX *ctx, unsigned char *sig,
+ size_t *siglen)
+{
+ unsigned char digest[WC_MAX_DIGEST_SIZE];
+ unsigned int hashLen;
+ int ret = WOLFSSL_FAILURE;
+
+ WOLFSSL_ENTER("EVP_DigestSignFinal");
+
+ if (ctx == NULL || siglen == NULL)
+ return WOLFSSL_FAILURE;
+
+ /* Return the maximum size of the signaure when sig is NULL. */
+ if (ctx->pctx == NULL) {
+ if (ctx->macType != NID_hmac)
+ return WOLFSSL_FAILURE;
+
+ hashLen = wolfssl_mac_len(ctx->hash.hmac.macType);
+
+ if (sig == NULL) {
+ *siglen = hashLen;
+ return WOLFSSL_SUCCESS;
+ }
+ }
+#ifndef NO_RSA
+ else if (ctx->pctx->pkey->type == EVP_PKEY_RSA) {
+ if (sig == NULL) {
+ *siglen = wolfSSL_RSA_size(ctx->pctx->pkey->rsa);
+ return WOLFSSL_SUCCESS;
+ }
+ }
+#endif /* !NO_RSA */
+#ifdef HAVE_ECC
+ else if (ctx->pctx->pkey->type == EVP_PKEY_EC) {
+ if (sig == NULL) {
+ /* SEQ + INT + INT */
+ *siglen = ecc_sets[ctx->pctx->pkey->ecc->group->curve_idx].size * 2
+ + 8;
+ return WOLFSSL_SUCCESS;
+ }
+ }
+#endif
+
+ if (wolfssl_evp_digest_pk_final(ctx, digest, &hashLen) <= 0)
+ return WOLFSSL_FAILURE;
+
+ if (ctx->pctx == NULL) {
+ /* Copy the HMAC result as signature. */
+ if ((unsigned int)(*siglen) > hashLen)
+ *siglen = hashLen;
+ /* May be a truncated signature. */
+
+ XMEMCPY(sig, digest, *siglen);
+ ret = WOLFSSL_SUCCESS;
+ }
+ else {
+ /* Sign the digest. */
+ switch (ctx->pctx->pkey->type) {
+ #if !defined(NO_RSA) && !defined(HAVE_USER_RSA)
+ case EVP_PKEY_RSA: {
+ unsigned int sigSz;
+ int nid = wolfSSL_EVP_MD_type(wolfSSL_EVP_MD_CTX_md(ctx));
+ if (nid < 0)
+ break;
+ ret = wolfSSL_RSA_sign(nid, digest, hashLen, sig, &sigSz,
+ ctx->pctx->pkey->rsa);
+ if (ret >= 0)
+ *siglen = sigSz;
+ break;
+ }
+ #endif /* NO_RSA */
+
+ #ifdef HAVE_ECC
+ case EVP_PKEY_EC: {
+ WOLFSSL_ECDSA_SIG *ecdsaSig;
+ ecdsaSig = wolfSSL_ECDSA_do_sign(digest, hashLen,
+ ctx->pctx->pkey->ecc);
+ if (ecdsaSig == NULL)
+ break;
+ *siglen = wolfSSL_i2d_ECDSA_SIG(ecdsaSig, &sig);
+ wolfSSL_ECDSA_SIG_free(ecdsaSig);
+ ret = WOLFSSL_SUCCESS;
+ break;
+ }
+ #endif
+ default:
+ break;
+ }
+ }
+
+ ForceZero(digest, sizeof(digest));
+ return ret;
+}
+int wolfSSL_EVP_DigestVerifyInit(WOLFSSL_EVP_MD_CTX *ctx,
+ WOLFSSL_EVP_PKEY_CTX **pctx,
+ const WOLFSSL_EVP_MD *type,
+ WOLFSSL_ENGINE *e,
+ WOLFSSL_EVP_PKEY *pkey)
+{
+ WOLFSSL_ENTER("EVP_DigestVerifyInit");
+
+ if (ctx == NULL || type == NULL || pkey == NULL)
+ return BAD_FUNC_ARG;
+
+ return wolfSSL_evp_digest_pk_init(ctx, pctx, type, e, pkey);
+}
+
+
+int wolfSSL_EVP_DigestVerifyUpdate(WOLFSSL_EVP_MD_CTX *ctx, const void *d,
+ size_t cnt)
+{
+ WOLFSSL_ENTER("EVP_DigestVerifyUpdate");
+
+ if (ctx == NULL || d == NULL)
+ return BAD_FUNC_ARG;
+
+ return wolfssl_evp_digest_pk_update(ctx, d, (unsigned int)cnt);
+}
+
+
+int wolfSSL_EVP_DigestVerifyFinal(WOLFSSL_EVP_MD_CTX *ctx,
+ const unsigned char *sig, size_t siglen)
+{
+ unsigned char digest[WC_MAX_DIGEST_SIZE];
+ unsigned int hashLen;
+
+ WOLFSSL_ENTER("EVP_DigestVerifyFinal");
+
+ if (ctx == NULL || sig == NULL)
+ return WOLFSSL_FAILURE;
+
+ if (ctx->pctx == NULL) {
+ if (ctx->macType != NID_hmac)
+ return WOLFSSL_FAILURE;
+
+ hashLen = wolfssl_mac_len(ctx->hash.hmac.macType);
+
+ if (siglen > hashLen)
+ return WOLFSSL_FAILURE;
+ /* May be a truncated signature. */
+ }
+
+ if (wolfssl_evp_digest_pk_final(ctx, digest, &hashLen) <= 0)
+ return WOLFSSL_FAILURE;
+
+ if (ctx->pctx == NULL) {
+ /* Check HMAC result matches the signature. */
+ if (XMEMCMP(sig, digest, siglen) == 0)
+ return WOLFSSL_SUCCESS;
+ return WOLFSSL_FAILURE;
+ }
+ else {
+ /* Verify the signature with the digest. */
+ switch (ctx->pctx->pkey->type) {
+ #if !defined(NO_RSA) && !defined(HAVE_USER_RSA)
+ case EVP_PKEY_RSA: {
+ int nid = wolfSSL_EVP_MD_type(wolfSSL_EVP_MD_CTX_md(ctx));
+ if (nid < 0)
+ return WOLFSSL_FAILURE;
+ return wolfSSL_RSA_verify(nid, digest, hashLen, sig,
+ (unsigned int)siglen,
+ ctx->pctx->pkey->rsa);
+ }
+ #endif /* NO_RSA */
+
+ #ifdef HAVE_ECC
+ case EVP_PKEY_EC: {
+ int ret;
+ WOLFSSL_ECDSA_SIG *ecdsaSig;
+ ecdsaSig = wolfSSL_d2i_ECDSA_SIG(NULL, &sig, (long)siglen);
+ if (ecdsaSig == NULL)
+ return WOLFSSL_FAILURE;
+ ret = wolfSSL_ECDSA_do_verify(digest, hashLen, ecdsaSig,
+ ctx->pctx->pkey->ecc);
+ wolfSSL_ECDSA_SIG_free(ecdsaSig);
+ return ret;
+ }
+ #endif
+ default:
+ break;
+ }
+ }
+
+ return WOLFSSL_FAILURE;
+}
+
+
+#ifdef WOLFSSL_APACHE_HTTPD
+#if !defined(USE_WINDOWS_API) && !defined(MICROCHIP_PIC32)
+ #include <termios.h>
+#endif
+
+#ifndef XGETPASSWD
+ static int XGETPASSWD(char* buf, int bufSz) {
+ int ret = WOLFSSL_SUCCESS;
+
+ /* turn off echo for passwords */
+ #ifdef USE_WINDOWS_API
+ DWORD originalTerm;
+ DWORD newTerm;
+ CONSOLE_SCREEN_BUFFER_INFO screenOrig;
+ HANDLE stdinHandle = GetStdHandle(STD_INPUT_HANDLE);
+ if (GetConsoleMode(stdinHandle, &originalTerm) == 0) {
+ WOLFSSL_MSG("Couldn't get the original terminal settings");
+ return WOLFSSL_FAILURE;
+ }
+ newTerm = originalTerm;
+ newTerm &= ~ENABLE_ECHO_INPUT;
+ if (SetConsoleMode(stdinHandle, newTerm) == 0) {
+ WOLFSSL_MSG("Couldn't turn off echo");
+ return WOLFSSL_FAILURE;
+ }
+ #else
+ struct termios originalTerm;
+ struct termios newTerm;
+ if (tcgetattr(STDIN_FILENO, &originalTerm) != 0) {
+ WOLFSSL_MSG("Couldn't get the original terminal settings");
+ return WOLFSSL_FAILURE;
+ }
+ XMEMCPY(&newTerm, &originalTerm, sizeof(struct termios));
+
+ newTerm.c_lflag &= ~ECHO;
+ newTerm.c_lflag |= (ICANON | ECHONL);
+ if (tcsetattr(STDIN_FILENO, TCSANOW, &newTerm) != 0) {
+ WOLFSSL_MSG("Couldn't turn off echo");
+ return WOLFSSL_FAILURE;
+ }
+ #endif
+
+ if (XFGETS(buf, bufSz, stdin) == NULL) {
+ ret = WOLFSSL_FAILURE;
+ }
+
+ /* restore default echo */
+ #ifdef USE_WINDOWS_API
+ if (SetConsoleMode(stdinHandle, originalTerm) == 0) {
+ WOLFSSL_MSG("Couldn't restore the terminal settings");
+ return WOLFSSL_FAILURE;
+ }
+ #else
+ if (tcsetattr(STDIN_FILENO, TCSANOW, &originalTerm) != 0) {
+ WOLFSSL_MSG("Couldn't restore the terminal settings");
+ return WOLFSSL_FAILURE;
+ }
+ #endif
+ return ret;
+ }
+#endif
+
+/* returns 0 on success and -2 or -1 on failure */
+int wolfSSL_EVP_read_pw_string(char* buf, int bufSz, const char* banner, int v)
+{
+ printf("%s", banner);
+ if (XGETPASSWD(buf, bufSz) == WOLFSSL_FAILURE) {
+ return -1;
+ }
+ (void)v; /* fgets always sanity checks size of input vs buffer */
+ return 0;
+}
+#endif /* WOLFSSL_APACHE_HTTPD */
+
+#if !defined(NO_PWDBASED) && !defined(NO_SHA)
+int wolfSSL_PKCS5_PBKDF2_HMAC_SHA1(const char *pass, int passlen,
+ const unsigned char *salt,
+ int saltlen, int iter,
+ int keylen, unsigned char *out)
+{
+ const char *nostring = "";
+ int ret = 0;
+
+ if (pass == NULL) {
+ passlen = 0;
+ pass = nostring;
+ }
+ else if (passlen == -1) {
+ passlen = (int)XSTRLEN(pass);
+ }
+
+ ret = wc_PBKDF2((byte*)out, (byte*)pass, passlen, (byte*)salt, saltlen,
+ iter, keylen, WC_SHA);
+ if (ret == 0)
+ return WOLFSSL_SUCCESS;
+ else
+ return WOLFSSL_FAILURE;
+}
+#endif /* !NO_PWDBASED !NO_SHA*/
+
+#if !defined(NO_PWDBASED)
+WOLFSSL_API int wolfSSL_PKCS5_PBKDF2_HMAC(const char *pass, int passlen,
+ const unsigned char *salt,
+ int saltlen, int iter,
+ const WOLFSSL_EVP_MD *digest,
+ int keylen, unsigned char *out)
+{
+ const char *nostring = "";
+ int ret = 0;
+
+ if (pass == NULL) {
+ passlen = 0;
+ pass = nostring;
+ } else if (passlen == -1) {
+ passlen = (int)XSTRLEN(pass);
+ }
+
+ ret = wc_PBKDF2((byte*)out, (byte*)pass, passlen, (byte*)salt, saltlen,
+ iter, keylen, wolfSSL_EVP_md2macType(digest));
+ if (ret == 0)
+ return WOLFSSL_SUCCESS;
+ else
+ return WOLFSSL_FAILURE;
+}
+#endif /* !NO_PWDBASED */
+
+static const struct cipher{
+ unsigned char type;
+ const char *name;
+ int nid;
+} cipher_tbl[] = {
+
+#ifndef NO_AES
+ #ifdef WOLFSSL_AES_128
+ {AES_128_CBC_TYPE, "AES-128-CBC", NID_aes_128_cbc},
+ #endif
+ #ifdef WOLFSSL_AES_192
+ {AES_192_CBC_TYPE, "AES-192-CBC", NID_aes_192_cbc},
+ #endif
+ #ifdef WOLFSSL_AES_256
+ {AES_256_CBC_TYPE, "AES-256-CBC", NID_aes_256_cbc},
+ #endif
+
+ #ifdef WOLFSSL_AES_128
+ {AES_128_CFB1_TYPE, "AES-128-CFB1", NID_aes_128_cfb1},
+ #endif
+ #ifdef WOLFSSL_AES_192
+ {AES_192_CFB1_TYPE, "AES-192-CFB1", NID_aes_192_cfb1},
+ #endif
+ #ifdef WOLFSSL_AES_256
+ {AES_256_CFB1_TYPE, "AES-256-CFB1", NID_aes_256_cfb1},
+ #endif
+
+ #ifdef WOLFSSL_AES_128
+ {AES_128_CFB8_TYPE, "AES-128-CFB8", NID_aes_128_cfb8},
+ #endif
+ #ifdef WOLFSSL_AES_192
+ {AES_192_CFB8_TYPE, "AES-192-CFB8", NID_aes_192_cfb8},
+ #endif
+ #ifdef WOLFSSL_AES_256
+ {AES_256_CFB8_TYPE, "AES-256-CFB8", NID_aes_256_cfb8},
+ #endif
+
+ #ifdef WOLFSSL_AES_128
+ {AES_128_CFB128_TYPE, "AES-128-CFB128", NID_aes_128_cfb128},
+ #endif
+ #ifdef WOLFSSL_AES_192
+ {AES_192_CFB128_TYPE, "AES-192-CFB128", NID_aes_192_cfb128},
+ #endif
+ #ifdef WOLFSSL_AES_256
+ {AES_256_CFB128_TYPE, "AES-256-CFB128", NID_aes_256_cfb128},
+ #endif
+
+ #ifdef WOLFSSL_AES_128
+ {AES_128_OFB_TYPE, "AES-128-OFB", NID_aes_128_ofb},
+ #endif
+ #ifdef WOLFSSL_AES_192
+ {AES_192_OFB_TYPE, "AES-192-OFB", NID_aes_192_ofb},
+ #endif
+ #ifdef WOLFSSL_AES_256
+ {AES_256_OFB_TYPE, "AES-256-OFB", NID_aes_256_ofb},
+ #endif
+
+ #ifdef WOLFSSL_AES_128
+ {AES_128_XTS_TYPE, "AES-128-XTS", NID_aes_128_xts},
+ #endif
+ #ifdef WOLFSSL_AES_256
+ {AES_256_XTS_TYPE, "AES-256-XTS", NID_aes_256_xts},
+ #endif
+
+ #ifdef WOLFSSL_AES_128
+ {AES_128_GCM_TYPE, "AES-128-GCM", NID_aes_128_gcm},
+ #endif
+ #ifdef WOLFSSL_AES_192
+ {AES_192_GCM_TYPE, "AES-192-GCM", NID_aes_192_gcm},
+ #endif
+ #ifdef WOLFSSL_AES_256
+ {AES_256_GCM_TYPE, "AES-256-GCM", NID_aes_256_gcm},
+ #endif
+ #ifdef WOLFSSL_AES_128
+ {AES_128_CTR_TYPE, "AES-128-CTR", NID_aes_128_ctr},
+ #endif
+ #ifdef WOLFSSL_AES_192
+ {AES_192_CTR_TYPE, "AES-192-CTR", NID_aes_192_ctr},
+ #endif
+ #ifdef WOLFSSL_AES_256
+ {AES_256_CTR_TYPE, "AES-256-CTR", NID_aes_256_ctr},
+ #endif
+
+ #ifdef WOLFSSL_AES_128
+ {AES_128_ECB_TYPE, "AES-128-ECB", NID_aes_128_ecb},
+ #endif
+ #ifdef WOLFSSL_AES_192
+ {AES_192_ECB_TYPE, "AES-192-ECB", NID_aes_192_ecb},
+ #endif
+ #ifdef WOLFSSL_AES_256
+ {AES_256_ECB_TYPE, "AES-256-ECB", NID_aes_256_ecb},
+ #endif
+
+#endif
+
+#ifndef NO_DES3
+ {DES_CBC_TYPE, "DES-CBC", NID_des_cbc},
+ {DES_ECB_TYPE, "DES-ECB", NID_des_ecb},
+
+ {DES_EDE3_CBC_TYPE, "DES-EDE3-CBC", NID_des_ede3_cbc},
+ {DES_EDE3_ECB_TYPE, "DES-EDE3-ECB", NID_des_ede3_ecb},
+#endif
+
+#ifndef NO_RC4
+ {ARC4_TYPE, "ARC4", NID_undef},
+#endif
+
+#ifdef HAVE_IDEA
+ {IDEA_CBC_TYPE, "IDEA-CBC", NID_idea_cbc},
+#endif
+ { 0, NULL, 0}
+};
+
+/* returns cipher using provided ctx type */
+const WOLFSSL_EVP_CIPHER *wolfSSL_EVP_CIPHER_CTX_cipher(
+ const WOLFSSL_EVP_CIPHER_CTX *ctx)
+{
+ const struct cipher* c;
+
+ if (!ctx || !ctx->cipherType) {
+ return NULL;
+ }
+
+ for (c = cipher_tbl; c->type != 0; c++) {
+ if (ctx->cipherType == c->type) {
+ return wolfSSL_EVP_get_cipherbyname(c->name);
+ }
+ }
+
+ return NULL;
+}
+
+int wolfSSL_EVP_CIPHER_nid(const WOLFSSL_EVP_CIPHER *cipher)
+{
+ const struct cipher* c;
+
+ if (!cipher) {
+ return 0;
+ }
+
+ for (c = cipher_tbl; c->type != 0; c++) {
+ if (XSTRNCMP(cipher, c->name, XSTRLEN(c->name)+1) == 0) {
+ return c->nid;
+ }
+ }
+
+ return 0;
+}
+
+const WOLFSSL_EVP_CIPHER *wolfSSL_EVP_get_cipherbyname(const char *name)
+{
+
+ static const struct alias {
+ const char *name;
+ const char *alias;
+ } alias_tbl[] =
+ {
+#ifndef NO_DES3
+ {"DES-CBC", "DES"},
+ {"DES-CBC", "des"},
+ {"DES-ECB", "DES-ECB"},
+ {"DES-ECB", "des-ecb"},
+ {"DES-EDE3-CBC", "DES3"},
+ {"DES-EDE3-CBC", "des3"},
+ {"DES-EDE3-ECB", "DES-EDE3"},
+ {"DES-EDE3-ECB", "des-ede3"},
+ {"DES-EDE3-ECB", "des-ede3-ecb"},
+#endif
+#ifdef HAVE_IDEA
+ {"IDEA-CBC", "IDEA"},
+ {"IDEA-CBC", "idea"},
+#endif
+#ifndef NO_AES
+ #ifdef HAVE_AES_CBC
+ #ifdef WOLFSSL_AES_128
+ {"AES-128-CBC", "AES128-CBC"},
+ {"AES-128-CBC", "aes128-cbc"},
+ #endif
+ #ifdef WOLFSSL_AES_192
+ {"AES-192-CBC", "AES192-CBC"},
+ {"AES-192-CBC", "aes192-cbc"},
+ #endif
+ #ifdef WOLFSSL_AES_256
+ {"AES-256-CBC", "AES256-CBC"},
+ {"AES-256-CBC", "aes256-cbc"},
+ #endif
+ #endif
+ #ifdef WOLFSSL_AES_128
+ {"AES-128-ECB", "AES128-ECB"},
+ {"AES-128-ECB", "aes128-ecb"},
+ #endif
+ #ifdef WOLFSSL_AES_192
+ {"AES-192-ECB", "AES192-ECB"},
+ {"AES-192-ECB", "aes192-ecb"},
+ #endif
+ #ifdef WOLFSSL_AES_256
+ {"AES-256-ECB", "AES256-ECB"},
+ #endif
+ #ifdef HAVE_AESGCM
+ #ifdef WOLFSSL_AES_128
+ {"AES-128-GCM", "aes-128-gcm"},
+ {"AES-128-GCM", "id-aes128-GCM"},
+ #endif
+ #ifdef WOLFSSL_AES_192
+ {"AES-192-GCM", "aes-192-gcm"},
+ {"AES-192-GCM", "id-aes192-GCM"},
+ #endif
+ #ifdef WOLFSSL_AES_256
+ {"AES-256-GCM", "aes-256-gcm"},
+ {"AES-256-GCM", "id-aes256-GCM"},
+ #endif
+ #endif
+#endif
+#ifndef NO_RC4
+ {"ARC4", "RC4"},
+#endif
+ { NULL, NULL}
+ };
+
+ const struct cipher *ent;
+ const struct alias *al;
+
+ WOLFSSL_ENTER("EVP_get_cipherbyname");
+
+ for( al = alias_tbl; al->name != NULL; al++)
+ if(XSTRNCMP(name, al->alias, XSTRLEN(al->alias)+1) == 0) {
+ name = al->name;
+ break;
+ }
+
+ for( ent = cipher_tbl; ent->name != NULL; ent++)
+ if(XSTRNCMP(name, ent->name, XSTRLEN(ent->name)+1) == 0) {
+ return (WOLFSSL_EVP_CIPHER *)ent->name;
+ }
+
+ return NULL;
+}
+
+/*
+ * return an EVP_CIPHER structure when cipher NID is passed.
+ *
+ * id cipher NID
+ *
+ * return WOLFSSL_EVP_CIPHER
+*/
+const WOLFSSL_EVP_CIPHER *wolfSSL_EVP_get_cipherbynid(int id)
+{
+ WOLFSSL_ENTER("EVP_get_cipherbynid");
+
+ switch(id) {
+
+#ifndef NO_AES
+ #ifdef HAVE_AES_CBC
+ #ifdef WOLFSSL_AES_128
+ case NID_aes_128_cbc:
+ return wolfSSL_EVP_aes_128_cbc();
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case NID_aes_192_cbc:
+ return wolfSSL_EVP_aes_192_cbc();
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case NID_aes_256_cbc:
+ return wolfSSL_EVP_aes_256_cbc();
+ #endif
+ #endif
+ #ifdef WOLFSSL_AES_COUNTER
+ #ifdef WOLFSSL_AES_128
+ case NID_aes_128_ctr:
+ return wolfSSL_EVP_aes_128_ctr();
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case NID_aes_192_ctr:
+ return wolfSSL_EVP_aes_192_ctr();
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case NID_aes_256_ctr:
+ return wolfSSL_EVP_aes_256_ctr();
+ #endif
+ #endif /* WOLFSSL_AES_COUNTER */
+ #ifdef HAVE_AES_ECB
+ #ifdef WOLFSSL_AES_128
+ case NID_aes_128_ecb:
+ return wolfSSL_EVP_aes_128_ecb();
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case NID_aes_192_ecb:
+ return wolfSSL_EVP_aes_192_ecb();
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case NID_aes_256_ecb:
+ return wolfSSL_EVP_aes_256_ecb();
+ #endif
+ #endif /* HAVE_AES_ECB */
+ #ifdef HAVE_AESGCM
+ #ifdef WOLFSSL_AES_128
+ case NID_aes_128_gcm:
+ return wolfSSL_EVP_aes_128_gcm();
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case NID_aes_192_gcm:
+ return wolfSSL_EVP_aes_192_gcm();
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case NID_aes_256_gcm:
+ return wolfSSL_EVP_aes_256_gcm();
+ #endif
+ #endif
+#endif
+
+#ifndef NO_DES3
+ case NID_des_cbc:
+ return wolfSSL_EVP_des_cbc();
+#ifdef WOLFSSL_DES_ECB
+ case NID_des_ecb:
+ return wolfSSL_EVP_des_ecb();
+#endif
+ case NID_des_ede3_cbc:
+ return wolfSSL_EVP_des_ede3_cbc();
+#ifdef WOLFSSL_DES_ECB
+ case NID_des_ede3_ecb:
+ return wolfSSL_EVP_des_ede3_ecb();
+#endif
+#endif /*NO_DES3*/
+
+#ifdef HAVE_IDEA
+ case NID_idea_cbc:
+ return wolfSSL_EVP_idea_cbc();
+#endif
+
+ default:
+ WOLFSSL_MSG("Bad cipher id value");
+ }
+
+ return NULL;
+}
+
+void wolfSSL_EVP_init(void)
+{
+#ifndef NO_AES
+ #ifdef HAVE_AES_CBC
+ #ifdef WOLFSSL_AES_128
+ EVP_AES_128_CBC = (char *)EVP_get_cipherbyname("AES-128-CBC");
+ #endif
+ #ifdef WOLFSSL_AES_192
+ EVP_AES_192_CBC = (char *)EVP_get_cipherbyname("AES-192-CBC");
+ #endif
+ #ifdef WOLFSSL_AES_256
+ EVP_AES_256_CBC = (char *)EVP_get_cipherbyname("AES-256-CBC");
+ #endif
+ #endif /* HAVE_AES_CBC */
+
+ #ifdef WOLFSSL_AES_CFB
+ #ifdef WOLFSSL_AES_128
+ EVP_AES_128_CFB1 = (char *)EVP_get_cipherbyname("AES-128-CFB1");
+ #endif
+
+ #ifdef WOLFSSL_AES_192
+ EVP_AES_192_CFB1 = (char *)EVP_get_cipherbyname("AES-192-CFB1");
+ #endif
+
+ #ifdef WOLFSSL_AES_256
+ EVP_AES_256_CFB1 = (char *)EVP_get_cipherbyname("AES-256-CFB1");
+ #endif
+
+ #ifdef WOLFSSL_AES_128
+ EVP_AES_128_CFB8 = (char *)EVP_get_cipherbyname("AES-128-CFB8");
+ #endif
+
+ #ifdef WOLFSSL_AES_192
+ EVP_AES_192_CFB8 = (char *)EVP_get_cipherbyname("AES-192-CFB8");
+ #endif
+
+ #ifdef WOLFSSL_AES_256
+ EVP_AES_256_CFB8 = (char *)EVP_get_cipherbyname("AES-256-CFB8");
+ #endif
+
+ #ifdef WOLFSSL_AES_128
+ EVP_AES_128_CFB128 = (char *)EVP_get_cipherbyname("AES-128-CFB128");
+ #endif
+
+ #ifdef WOLFSSL_AES_192
+ EVP_AES_192_CFB128 = (char *)EVP_get_cipherbyname("AES-192-CFB128");
+ #endif
+
+ #ifdef WOLFSSL_AES_256
+ EVP_AES_256_CFB128 = (char *)EVP_get_cipherbyname("AES-256-CFB128");
+ #endif
+ #endif /* WOLFSSL_AES_CFB */
+
+ #ifdef WOLFSSL_AES_OFB
+ #ifdef WOLFSSL_AES_128
+ EVP_AES_128_OFB = (char *)EVP_get_cipherbyname("AES-128-OFB");
+ #endif
+
+ #ifdef WOLFSSL_AES_192
+ EVP_AES_192_OFB = (char *)EVP_get_cipherbyname("AES-192-OFB");
+ #endif
+
+ #ifdef WOLFSSL_AES_256
+ EVP_AES_256_OFB = (char *)EVP_get_cipherbyname("AES-256-OFB");
+ #endif
+ #endif /* WOLFSSL_AES_OFB */
+
+ #ifdef WOLFSSL_AES_XTS
+ #ifdef WOLFSSL_AES_128
+ EVP_AES_128_XTS = (char *)EVP_get_cipherbyname("AES-128-XTS");
+ #endif
+
+ #ifdef WOLFSSL_AES_256
+ EVP_AES_256_XTS = (char *)EVP_get_cipherbyname("AES-256-XTS");
+ #endif
+ #endif /* WOLFSSL_AES_XTS */
+
+ #ifdef HAVE_AESGCM
+ #ifdef WOLFSSL_AES_128
+ EVP_AES_128_GCM = (char *)EVP_get_cipherbyname("AES-128-GCM");
+ #endif
+ #ifdef WOLFSSL_AES_192
+ EVP_AES_192_GCM = (char *)EVP_get_cipherbyname("AES-192-GCM");
+ #endif
+ #ifdef WOLFSSL_AES_256
+ EVP_AES_256_GCM = (char *)EVP_get_cipherbyname("AES-256-GCM");
+ #endif
+ #endif /* HAVE_AESGCM*/
+ #ifdef WOLFSSL_AES_128
+ EVP_AES_128_CTR = (char *)EVP_get_cipherbyname("AES-128-CTR");
+ #endif
+ #ifdef WOLFSSL_AES_192
+ EVP_AES_192_CTR = (char *)EVP_get_cipherbyname("AES-192-CTR");
+ #endif
+ #ifdef WOLFSSL_AES_256
+ EVP_AES_256_CTR = (char *)EVP_get_cipherbyname("AES-256-CTR");
+ #endif
+
+ #ifdef WOLFSSL_AES_128
+ EVP_AES_128_ECB = (char *)EVP_get_cipherbyname("AES-128-ECB");
+ #endif
+ #ifdef WOLFSSL_AES_192
+ EVP_AES_192_ECB = (char *)EVP_get_cipherbyname("AES-192-ECB");
+ #endif
+ #ifdef WOLFSSL_AES_256
+ EVP_AES_256_ECB = (char *)EVP_get_cipherbyname("AES-256-ECB");
+ #endif
+#endif /* ifndef NO_AES*/
+
+#ifndef NO_DES3
+ EVP_DES_CBC = (char *)EVP_get_cipherbyname("DES-CBC");
+ EVP_DES_ECB = (char *)EVP_get_cipherbyname("DES-ECB");
+
+ EVP_DES_EDE3_CBC = (char *)EVP_get_cipherbyname("DES-EDE3-CBC");
+ EVP_DES_EDE3_ECB = (char *)EVP_get_cipherbyname("DES-EDE3-ECB");
+#endif
+
+#ifdef HAVE_IDEA
+ EVP_IDEA_CBC = (char *)EVP_get_cipherbyname("IDEA-CBC");
+#endif
+}
+
+#if !defined(NO_PWDBASED)
+int wolfSSL_EVP_get_hashinfo(const WOLFSSL_EVP_MD* evp,
+ int* pHash, int* pHashSz)
+{
+ enum wc_HashType hash = WC_HASH_TYPE_NONE;
+ int hashSz;
+
+ if (XSTRLEN(evp) < 3) {
+ /* do not try comparing strings if size is too small */
+ return WOLFSSL_FAILURE;
+ }
+
+ if (XSTRNCMP("SHA", evp, 3) == 0) {
+ if (XSTRLEN(evp) > 3) {
+ #ifndef NO_SHA256
+ if (XSTRNCMP("SHA256", evp, 6) == 0) {
+ hash = WC_HASH_TYPE_SHA256;
+ }
+ else
+ #endif
+ #ifdef WOLFSSL_SHA384
+ if (XSTRNCMP("SHA384", evp, 6) == 0) {
+ hash = WC_HASH_TYPE_SHA384;
+ }
+ else
+ #endif
+ #ifdef WOLFSSL_SHA512
+ if (XSTRNCMP("SHA512", evp, 6) == 0) {
+ hash = WC_HASH_TYPE_SHA512;
+ }
+ else
+ #endif
+ {
+ WOLFSSL_MSG("Unknown SHA hash");
+ }
+ }
+ else {
+ hash = WC_HASH_TYPE_SHA;
+ }
+ }
+#ifdef WOLFSSL_MD2
+ else if (XSTRNCMP("MD2", evp, 3) == 0) {
+ hash = WC_HASH_TYPE_MD2;
+ }
+#endif
+#ifndef NO_MD4
+ else if (XSTRNCMP("MD4", evp, 3) == 0) {
+ hash = WC_HASH_TYPE_MD4;
+ }
+#endif
+#ifndef NO_MD5
+ else if (XSTRNCMP("MD5", evp, 3) == 0) {
+ hash = WC_HASH_TYPE_MD5;
+ }
+#endif
+
+ if (pHash)
+ *pHash = hash;
+
+ hashSz = wc_HashGetDigestSize(hash);
+ if (pHashSz)
+ *pHashSz = hashSz;
+
+ if (hashSz < 0) {
+ return WOLFSSL_FAILURE;
+ }
+
+ return WOLFSSL_SUCCESS;
+}
+
+/* this function makes the assumption that out buffer is big enough for digest*/
+int wolfSSL_EVP_Digest(const unsigned char* in, int inSz, unsigned char* out,
+ unsigned int* outSz, const WOLFSSL_EVP_MD* evp,
+ WOLFSSL_ENGINE* eng)
+{
+ int err;
+ int hashType = WC_HASH_TYPE_NONE;
+ int hashSz;
+
+ WOLFSSL_ENTER("wolfSSL_EVP_Digest");
+ if (in == NULL || out == NULL || evp == NULL) {
+ WOLFSSL_MSG("Null argument passed in");
+ return WOLFSSL_FAILURE;
+ }
+
+ err = wolfSSL_EVP_get_hashinfo(evp, &hashType, &hashSz);
+ if (err != WOLFSSL_SUCCESS)
+ return err;
+
+ if (wc_Hash((enum wc_HashType)hashType, in, inSz, out, hashSz) != 0) {
+ return WOLFSSL_FAILURE;
+ }
+
+ if (outSz != NULL)
+ *outSz = hashSz;
+
+ (void)eng;
+ return WOLFSSL_SUCCESS;
+}
+#endif
+
+const WOLFSSL_EVP_MD *wolfSSL_EVP_get_digestbyname(const char *name)
+{
+ static const struct alias {
+ const char *name;
+ const char *alias;
+ } alias_tbl[] =
+ {
+ {"MD4", "ssl3-md4"},
+ {"MD5", "ssl3-md5"},
+ {"SHA", "ssl3-sha1"},
+ {"SHA", "SHA1"},
+ { NULL, NULL}
+ };
+
+ const struct alias *al;
+ const struct s_ent *ent;
+
+
+ for (al = alias_tbl; al->name != NULL; al++)
+ if(XSTRNCMP(name, al->alias, XSTRLEN(al->alias)+1) == 0) {
+ name = al->name;
+ break;
+ }
+
+ for (ent = md_tbl; ent->name != NULL; ent++)
+ if(XSTRNCMP(name, ent->name, XSTRLEN(ent->name)+1) == 0) {
+ return (EVP_MD *)ent->name;
+ }
+ return NULL;
+}
+
+int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md)
+{
+ const struct s_ent *ent ;
+ WOLFSSL_ENTER("EVP_MD_type");
+ for( ent = md_tbl; ent->name != NULL; ent++){
+ if(XSTRNCMP((const char *)md, ent->name, XSTRLEN(ent->name)+1) == 0) {
+ return ent->nid;
+ }
+ }
+ return 0;
+}
+
+#ifndef NO_MD4
+
+ /* return a pointer to MD4 EVP type */
+ const WOLFSSL_EVP_MD* wolfSSL_EVP_md4(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_md4");
+ return EVP_get_digestbyname("MD4");
+ }
+
+#endif /* !NO_MD4 */
+
+
+#ifndef NO_MD5
+
+ const WOLFSSL_EVP_MD* wolfSSL_EVP_md5(void)
+ {
+ WOLFSSL_ENTER("EVP_md5");
+ return EVP_get_digestbyname("MD5");
+ }
+
+#endif /* !NO_MD5 */
+
+
+#ifndef NO_WOLFSSL_STUB
+ const WOLFSSL_EVP_MD* wolfSSL_EVP_mdc2(void)
+ {
+ WOLFSSL_STUB("EVP_mdc2");
+ return NULL;
+ }
+#endif
+
+#ifndef NO_SHA
+ const WOLFSSL_EVP_MD* wolfSSL_EVP_sha1(void)
+ {
+ WOLFSSL_ENTER("EVP_sha1");
+ return EVP_get_digestbyname("SHA");
+ }
+#endif /* NO_SHA */
+
+#ifdef WOLFSSL_SHA224
+
+ const WOLFSSL_EVP_MD* wolfSSL_EVP_sha224(void)
+ {
+ WOLFSSL_ENTER("EVP_sha224");
+ return EVP_get_digestbyname("SHA224");
+ }
+
+#endif /* WOLFSSL_SHA224 */
+
+
+ const WOLFSSL_EVP_MD* wolfSSL_EVP_sha256(void)
+ {
+ WOLFSSL_ENTER("EVP_sha256");
+ return EVP_get_digestbyname("SHA256");
+ }
+
+#ifdef WOLFSSL_SHA384
+
+ const WOLFSSL_EVP_MD* wolfSSL_EVP_sha384(void)
+ {
+ WOLFSSL_ENTER("EVP_sha384");
+ return EVP_get_digestbyname("SHA384");
+ }
+
+#endif /* WOLFSSL_SHA384 */
+
+#ifdef WOLFSSL_SHA512
+
+ const WOLFSSL_EVP_MD* wolfSSL_EVP_sha512(void)
+ {
+ WOLFSSL_ENTER("EVP_sha512");
+ return EVP_get_digestbyname("SHA512");
+ }
+
+#endif /* WOLFSSL_SHA512 */
+
+#ifdef WOLFSSL_SHA3
+#ifndef WOLFSSL_NOSHA3_224
+ const WOLFSSL_EVP_MD* wolfSSL_EVP_sha3_224(void)
+ {
+ WOLFSSL_ENTER("EVP_sha3_224");
+ return EVP_get_digestbyname("SHA3_224");
+ }
+#endif /* WOLFSSL_NOSHA3_224 */
+
+
+#ifndef WOLFSSL_NOSHA3_256
+ const WOLFSSL_EVP_MD* wolfSSL_EVP_sha3_256(void)
+ {
+ WOLFSSL_ENTER("EVP_sha3_256");
+ return EVP_get_digestbyname("SHA3_256");
+ }
+#endif /* WOLFSSL_NOSHA3_256 */
+
+ const WOLFSSL_EVP_MD* wolfSSL_EVP_sha3_384(void)
+ {
+ WOLFSSL_ENTER("EVP_sha3_384");
+ return EVP_get_digestbyname("SHA3_384");
+ }
+
+#ifndef WOLFSSL_NOSHA3_512
+ const WOLFSSL_EVP_MD* wolfSSL_EVP_sha3_512(void)
+ {
+ WOLFSSL_ENTER("EVP_sha3_512");
+ return EVP_get_digestbyname("SHA3_512");
+ }
+#endif /* WOLFSSL_NOSHA3_512 */
+#endif /* WOLFSSL_SHA3 */
+
+ WOLFSSL_EVP_MD_CTX *wolfSSL_EVP_MD_CTX_new(void)
+ {
+ WOLFSSL_EVP_MD_CTX* ctx;
+ WOLFSSL_ENTER("EVP_MD_CTX_new");
+ ctx = (WOLFSSL_EVP_MD_CTX*)XMALLOC(sizeof *ctx, NULL,
+ DYNAMIC_TYPE_OPENSSL);
+ if (ctx){
+ wolfSSL_EVP_MD_CTX_init(ctx);
+ }
+ return ctx;
+ }
+
+ WOLFSSL_API void wolfSSL_EVP_MD_CTX_free(WOLFSSL_EVP_MD_CTX *ctx)
+ {
+ if (ctx) {
+ WOLFSSL_ENTER("EVP_MD_CTX_free");
+ wolfSSL_EVP_MD_CTX_cleanup(ctx);
+ XFREE(ctx, NULL, DYNAMIC_TYPE_OPENSSL);
+ }
+ }
+
+ /* returns the NID of message digest used by the ctx */
+ int wolfSSL_EVP_MD_CTX_type(const WOLFSSL_EVP_MD_CTX *ctx) {
+ const struct s_ent *ent;
+
+ WOLFSSL_ENTER("EVP_MD_CTX_type");
+
+ if (ctx) {
+ for(ent = md_tbl; ent->name != NULL; ent++) {
+ if (ctx->macType == ent->macType) {
+ return ent->nid;
+ }
+ }
+ /* Return whatever we got */
+ return ctx->macType;
+ }
+ return 0;
+ }
+
+
+ /* returns WOLFSSL_SUCCESS on success */
+ int wolfSSL_EVP_MD_CTX_copy(WOLFSSL_EVP_MD_CTX *out, const WOLFSSL_EVP_MD_CTX *in)
+ {
+ return wolfSSL_EVP_MD_CTX_copy_ex(out, in);
+ }
+
+ /* returns digest size */
+ int wolfSSL_EVP_MD_CTX_size(const WOLFSSL_EVP_MD_CTX *ctx) {
+ return(wolfSSL_EVP_MD_size(wolfSSL_EVP_MD_CTX_md(ctx)));
+ }
+ /* returns block size */
+ int wolfSSL_EVP_MD_CTX_block_size(const WOLFSSL_EVP_MD_CTX *ctx) {
+ return(wolfSSL_EVP_MD_block_size(wolfSSL_EVP_MD_CTX_md(ctx)));
+ }
+
+ /* Deep copy of EVP_MD hasher
+ * return WOLFSSL_SUCCESS on success */
+ static int wolfSSL_EVP_MD_Copy_Hasher(WOLFSSL_EVP_MD_CTX* des,
+ const WOLFSSL_EVP_MD_CTX* src)
+ {
+ if (src->macType == NID_hmac) {
+ wolfSSL_HmacCopy(&des->hash.hmac, (Hmac*)&src->hash.hmac);
+ }
+ else {
+ switch (src->macType) {
+ #ifndef NO_MD5
+ case WC_HASH_TYPE_MD5:
+ wc_Md5Copy((wc_Md5*)&src->hash.digest,
+ (wc_Md5*)&des->hash.digest);
+ break;
+ #endif /* !NO_MD5 */
+
+ #ifndef NO_SHA
+ case WC_HASH_TYPE_SHA:
+ wc_ShaCopy((wc_Sha*)&src->hash.digest,
+ (wc_Sha*)&des->hash.digest);
+ break;
+ #endif /* !NO_SHA */
+
+ #ifdef WOLFSSL_SHA224
+ case WC_HASH_TYPE_SHA224:
+ wc_Sha224Copy((wc_Sha224*)&src->hash.digest,
+ (wc_Sha224*)&des->hash.digest);
+ break;
+ #endif /* WOLFSSL_SHA224 */
+
+ #ifndef NO_SHA256
+ case WC_HASH_TYPE_SHA256:
+ wc_Sha256Copy((wc_Sha256*)&src->hash.digest,
+ (wc_Sha256*)&des->hash.digest);
+ break;
+ #endif /* !NO_SHA256 */
+
+ #ifdef WOLFSSL_SHA384
+ case WC_HASH_TYPE_SHA384:
+ wc_Sha384Copy((wc_Sha384*)&src->hash.digest,
+ (wc_Sha384*)&des->hash.digest);
+ break;
+ #endif /* WOLFSSL_SHA384 */
+ #ifdef WOLFSSL_SHA512
+ case WC_HASH_TYPE_SHA512:
+ wc_Sha512Copy((wc_Sha512*)&src->hash.digest,
+ (wc_Sha512*)&des->hash.digest);
+ break;
+ #endif /* WOLFSSL_SHA512 */
+ #ifdef WOLFSSL_SHA3
+ #ifndef WOLFSSL_NOSHA3_224
+ case WC_HASH_TYPE_SHA3_224:
+ wc_Sha3_224_Copy((wc_Sha3*)&src->hash.digest,
+ (wc_Sha3*)&des->hash.digest);
+ break;
+ #endif
+
+ #ifndef WOLFSSL_NOSHA3_256
+ case WC_HASH_TYPE_SHA3_256:
+ wc_Sha3_256_Copy((wc_Sha3*)&src->hash.digest,
+ (wc_Sha3*)&des->hash.digest);
+ break;
+ #endif
+
+ case WC_HASH_TYPE_SHA3_384:
+ wc_Sha3_384_Copy((wc_Sha3*)&src->hash.digest,
+ (wc_Sha3*)&des->hash.digest);
+ break;
+
+ #ifndef WOLFSSL_NOSHA3_512
+ case WC_HASH_TYPE_SHA3_512:
+ wc_Sha3_512_Copy((wc_Sha3*)&src->hash.digest,
+ (wc_Sha3*)&des->hash.digest);
+ break;
+ #endif
+ #endif
+ default:
+ return WOLFSSL_FAILURE;
+ }
+ }
+ return WOLFSSL_SUCCESS;
+ }
+
+ /* copies structure in to the structure out
+ *
+ * returns WOLFSSL_SUCCESS on success */
+ int wolfSSL_EVP_MD_CTX_copy_ex(WOLFSSL_EVP_MD_CTX *out, const WOLFSSL_EVP_MD_CTX *in)
+ {
+ if ((out == NULL) || (in == NULL)) return WOLFSSL_FAILURE;
+ WOLFSSL_ENTER("EVP_CIPHER_MD_CTX_copy_ex");
+ XMEMCPY(out, in, sizeof(WOLFSSL_EVP_MD_CTX));
+ if (in->pctx != NULL) {
+ out->pctx = wolfSSL_EVP_PKEY_CTX_new(in->pctx->pkey, NULL);
+ if (out->pctx == NULL)
+ return WOLFSSL_FAILURE;
+ }
+ return wolfSSL_EVP_MD_Copy_Hasher(out, (WOLFSSL_EVP_MD_CTX*)in);
+ }
+
+ void wolfSSL_EVP_MD_CTX_init(WOLFSSL_EVP_MD_CTX* ctx)
+ {
+ WOLFSSL_ENTER("EVP_CIPHER_MD_CTX_init");
+ XMEMSET(ctx, 0, sizeof(WOLFSSL_EVP_MD_CTX));
+ }
+
+ const WOLFSSL_EVP_MD *wolfSSL_EVP_MD_CTX_md(const WOLFSSL_EVP_MD_CTX *ctx)
+ {
+ const struct s_ent *ent;
+ if (ctx == NULL)
+ return NULL;
+ WOLFSSL_ENTER("EVP_MD_CTX_md");
+ for(ent = md_tbl; ent->name != NULL; ent++) {
+ if(ctx->macType == ent->macType) {
+ return (const WOLFSSL_EVP_MD *)ent->name;
+ }
+ }
+ return (WOLFSSL_EVP_MD *)NULL;
+ }
+
+ #ifndef NO_AES
+
+ #ifdef HAVE_AES_CBC
+ #ifdef WOLFSSL_AES_128
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_cbc(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_128_cbc");
+ if (EVP_AES_128_CBC == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_128_CBC;
+ }
+ #endif /* WOLFSSL_AES_128 */
+
+
+ #ifdef WOLFSSL_AES_192
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_cbc(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_192_cbc");
+ if (EVP_AES_192_CBC == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_192_CBC;
+ }
+ #endif /* WOLFSSL_AES_192 */
+
+
+ #ifdef WOLFSSL_AES_256
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_cbc(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_256_cbc");
+ if (EVP_AES_256_CBC == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_256_CBC;
+ }
+ #endif /* WOLFSSL_AES_256 */
+ #endif /* HAVE_AES_CBC */
+
+ #ifdef WOLFSSL_AES_CFB
+#if !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS)
+ #ifdef WOLFSSL_AES_128
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_cfb1(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_128_cfb1");
+ if (EVP_AES_128_CFB1 == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_128_CFB1;
+ }
+ #endif /* WOLFSSL_AES_128 */
+
+ #ifdef WOLFSSL_AES_192
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_cfb1(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_192_cfb1");
+ if (EVP_AES_192_CFB1 == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_192_CFB1;
+ }
+ #endif /* WOLFSSL_AES_192 */
+
+ #ifdef WOLFSSL_AES_256
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_cfb1(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_256_cfb1");
+ if (EVP_AES_256_CFB1 == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_256_CFB1;
+ }
+ #endif /* WOLFSSL_AES_256 */
+
+ #ifdef WOLFSSL_AES_128
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_cfb8(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_128_cfb8");
+ if (EVP_AES_128_CFB8 == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_128_CFB8;
+ }
+ #endif /* WOLFSSL_AES_128 */
+
+ #ifdef WOLFSSL_AES_192
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_cfb8(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_192_cfb8");
+ if (EVP_AES_192_CFB8 == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_192_CFB8;
+ }
+ #endif /* WOLFSSL_AES_192 */
+
+ #ifdef WOLFSSL_AES_256
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_cfb8(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_256_cfb8");
+ if (EVP_AES_256_CFB8 == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_256_CFB8;
+ }
+ #endif /* WOLFSSL_AES_256 */
+#endif /* !HAVE_SELFTEST && !HAVE_FIPS */
+
+ #ifdef WOLFSSL_AES_128
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_cfb128(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_128_cfb128");
+ if (EVP_AES_128_CFB128 == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_128_CFB128;
+ }
+ #endif /* WOLFSSL_AES_128 */
+
+ #ifdef WOLFSSL_AES_192
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_cfb128(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_192_cfb128");
+ if (EVP_AES_192_CFB128 == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_192_CFB128;
+ }
+ #endif /* WOLFSSL_AES_192 */
+
+ #ifdef WOLFSSL_AES_256
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_cfb128(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_256_cfb128");
+ if (EVP_AES_256_CFB128 == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_256_CFB128;
+ }
+ #endif /* WOLFSSL_AES_256 */
+ #endif /* WOLFSSL_AES_CFB */
+
+ #ifdef WOLFSSL_AES_OFB
+ #ifdef WOLFSSL_AES_128
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_ofb(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_128_ofb");
+ if (EVP_AES_128_OFB == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_128_OFB;
+ }
+ #endif /* WOLFSSL_AES_128 */
+
+ #ifdef WOLFSSL_AES_192
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_ofb(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_192_ofb");
+ if (EVP_AES_192_OFB == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_192_OFB;
+ }
+ #endif /* WOLFSSL_AES_192 */
+
+ #ifdef WOLFSSL_AES_256
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_ofb(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_256_ofb");
+ if (EVP_AES_256_OFB == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_256_OFB;
+ }
+ #endif /* WOLFSSL_AES_256 */
+ #endif /* WOLFSSL_AES_OFB */
+
+ #ifdef WOLFSSL_AES_XTS
+ #ifdef WOLFSSL_AES_128
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_xts(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_128_xts");
+ if (EVP_AES_128_XTS == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_128_XTS;
+ }
+ #endif /* WOLFSSL_AES_128 */
+
+ #ifdef WOLFSSL_AES_256
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_xts(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_256_xts");
+ if (EVP_AES_256_XTS == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_256_XTS;
+ }
+ #endif /* WOLFSSL_AES_256 */
+ #endif /* WOLFSSL_AES_XTS */
+
+ #ifdef HAVE_AESGCM
+ #ifdef WOLFSSL_AES_128
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_gcm(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_128_gcm");
+ if (EVP_AES_128_GCM == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_128_GCM;
+ }
+ #endif /* WOLFSSL_GCM_128 */
+
+ #ifdef WOLFSSL_AES_192
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_gcm(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_192_gcm");
+ if (EVP_AES_192_GCM == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_192_GCM;
+ }
+ #endif /* WOLFSSL_AES_192 */
+
+ #ifdef WOLFSSL_AES_256
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_gcm(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_256_gcm");
+ if (EVP_AES_256_GCM == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_256_GCM;
+ }
+ #endif /* WOLFSSL_AES_256 */
+ #endif /* HAVE_AESGCM */
+
+ #ifdef WOLFSSL_AES_128
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_ctr(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_128_ctr");
+ if (EVP_AES_128_CTR == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_128_CTR;
+ }
+ #endif /* WOLFSSL_AES_2128 */
+
+
+ #ifdef WOLFSSL_AES_192
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_ctr(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_192_ctr");
+ if (EVP_AES_192_CTR == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_192_CTR;
+ }
+ #endif /* WOLFSSL_AES_192 */
+
+
+ #ifdef WOLFSSL_AES_256
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_ctr(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_256_ctr");
+ if (EVP_AES_256_CTR == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_256_CTR;
+ }
+ #endif /* WOLFSSL_AES_256 */
+
+ #ifdef WOLFSSL_AES_128
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_128_ecb(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_128_ecb");
+ if (EVP_AES_128_ECB == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_128_ECB;
+ }
+ #endif /* WOLFSSL_AES_128 */
+
+
+ #ifdef WOLFSSL_AES_192
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_192_ecb(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_192_ecb");
+ if (EVP_AES_192_ECB == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_192_ECB;
+ }
+ #endif /* WOLFSSL_AES_192*/
+
+
+ #ifdef WOLFSSL_AES_256
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_aes_256_ecb(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_aes_256_ecb");
+ if (EVP_AES_256_ECB == NULL)
+ wolfSSL_EVP_init();
+ return EVP_AES_256_ECB;
+ }
+ #endif /* WOLFSSL_AES_256 */
+ #endif /* NO_AES */
+
+#ifndef NO_DES3
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_des_cbc(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_des_cbc");
+ if (EVP_DES_CBC == NULL)
+ wolfSSL_EVP_init();
+ return EVP_DES_CBC;
+ }
+#ifdef WOLFSSL_DES_ECB
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_des_ecb(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_des_ecb");
+ if (EVP_DES_ECB == NULL)
+ wolfSSL_EVP_init();
+ return EVP_DES_ECB;
+ }
+#endif
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_des_ede3_cbc(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_des_ede3_cbc");
+ if (EVP_DES_EDE3_CBC == NULL)
+ wolfSSL_EVP_init();
+ return EVP_DES_EDE3_CBC;
+ }
+#ifdef WOLFSSL_DES_ECB
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_des_ede3_ecb(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_des_ede3_ecb");
+ if (EVP_DES_EDE3_ECB == NULL)
+ wolfSSL_EVP_init();
+ return EVP_DES_EDE3_ECB;
+ }
+#endif
+#endif /* NO_DES3 */
+
+#ifndef NO_RC4
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_rc4(void)
+ {
+ static const char* type = "ARC4";
+ WOLFSSL_ENTER("wolfSSL_EVP_rc4");
+ return type;
+ }
+#endif
+
+#ifdef HAVE_IDEA
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_idea_cbc(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_idea_cbc");
+ if (EVP_IDEA_CBC == NULL)
+ wolfSSL_EVP_init();
+ return EVP_IDEA_CBC;
+ }
+#endif
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_enc_null(void)
+ {
+ static const char* type = "NULL";
+ WOLFSSL_ENTER("wolfSSL_EVP_enc_null");
+ return type;
+ }
+
+ int wolfSSL_EVP_MD_CTX_cleanup(WOLFSSL_EVP_MD_CTX* ctx)
+ {
+ WOLFSSL_ENTER("EVP_MD_CTX_cleanup");
+ if (ctx->pctx != NULL)
+ wolfSSL_EVP_PKEY_CTX_free(ctx->pctx);
+
+ if (ctx->macType == NID_hmac) {
+ wc_HmacFree(&ctx->hash.hmac);
+ }
+ else {
+ switch (ctx->macType) {
+ #ifndef NO_MD5
+ case WC_HASH_TYPE_MD5:
+ wc_Md5Free((wc_Md5*)&ctx->hash.digest);
+ break;
+ #endif /* !NO_MD5 */
+
+ #ifndef NO_SHA
+ case WC_HASH_TYPE_SHA:
+ wc_ShaFree((wc_Sha*)&ctx->hash.digest);
+ break;
+ #endif /* !NO_SHA */
+
+ #ifdef WOLFSSL_SHA224
+ case WC_HASH_TYPE_SHA224:
+ wc_Sha224Free((wc_Sha224*)&ctx->hash.digest);
+ break;
+ #endif /* WOLFSSL_SHA224 */
+
+ #ifndef NO_SHA256
+ case WC_HASH_TYPE_SHA256:
+ wc_Sha256Free((wc_Sha256*)&ctx->hash.digest);
+ break;
+ #endif /* !NO_SHA256 */
+
+ #ifdef WOLFSSL_SHA384
+ case WC_HASH_TYPE_SHA384:
+ wc_Sha384Free((wc_Sha384*)&ctx->hash.digest);
+ break;
+ #endif /* WOLFSSL_SHA384 */
+ #ifdef WOLFSSL_SHA512
+ case WC_HASH_TYPE_SHA512:
+ wc_Sha512Free((wc_Sha512*)&ctx->hash.digest);
+ break;
+ #endif /* WOLFSSL_SHA512 */
+ #ifdef WOLFSSL_SHA3
+ #ifndef WOLFSSL_NOSHA3_224
+ case WC_HASH_TYPE_SHA3_224:
+ wc_Sha3_224_Free((wc_Sha3*)&ctx->hash.digest);
+ break;
+ #endif
+
+ #ifndef WOLFSSL_NOSHA3_256
+ case WC_HASH_TYPE_SHA3_256:
+ wc_Sha3_256_Free((wc_Sha3*)&ctx->hash.digest);
+ break;
+ #endif
+
+ case WC_HASH_TYPE_SHA3_384:
+ wc_Sha3_384_Free((wc_Sha3*)&ctx->hash.digest);
+ break;
+
+ #ifndef WOLFSSL_NOSHA3_512
+ case WC_HASH_TYPE_SHA3_512:
+ wc_Sha3_512_Free((wc_Sha3*)&ctx->hash.digest);
+ break;
+ #endif
+ #endif
+ default:
+ return WOLFSSL_FAILURE;
+ }
+ }
+ ForceZero(ctx, sizeof(*ctx));
+ ctx->macType = WC_HASH_TYPE_NONE;
+ return 1;
+ }
+
+ void wolfSSL_EVP_CIPHER_CTX_init(WOLFSSL_EVP_CIPHER_CTX* ctx)
+ {
+ WOLFSSL_ENTER("EVP_CIPHER_CTX_init");
+ if (ctx) {
+ XMEMSET(ctx, 0, sizeof(WOLFSSL_EVP_CIPHER_CTX));
+ ctx->cipherType = WOLFSSL_EVP_CIPH_TYPE_INIT; /* not yet initialized */
+ ctx->keyLen = 0;
+ ctx->enc = 1; /* start in encrypt mode */
+ }
+ }
+
+#if defined(HAVE_AESGCM) && !defined(HAVE_SELFTEST)
+ static WC_INLINE void IncCtr(byte* ctr, word32 ctrSz)
+ {
+ int i;
+ for (i = ctrSz-1; i >= 0; i--) {
+ if (++ctr[i])
+ break;
+ }
+ }
+#endif
+
+ /* This function allows cipher specific parameters to be
+ determined and set. */
+ int wolfSSL_EVP_CIPHER_CTX_ctrl(WOLFSSL_EVP_CIPHER_CTX *ctx, int type, \
+ int arg, void *ptr)
+ {
+ int ret = WOLFSSL_FAILURE;
+#if defined(HAVE_AESGCM) && !defined(HAVE_SELFTEST) && !defined(WC_NO_RNG)
+ WC_RNG rng;
+#endif
+ if (ctx == NULL)
+ return WOLFSSL_FAILURE;
+
+ (void)arg;
+ (void)ptr;
+
+ WOLFSSL_ENTER("EVP_CIPHER_CTX_ctrl");
+
+ switch(type) {
+ case EVP_CTRL_INIT:
+ wolfSSL_EVP_CIPHER_CTX_init(ctx);
+ if(ctx)
+ ret = WOLFSSL_SUCCESS;
+ break;
+ case EVP_CTRL_SET_KEY_LENGTH:
+ ret = wolfSSL_EVP_CIPHER_CTX_set_key_length(ctx, arg);
+ break;
+#if defined(HAVE_AESGCM) && !defined(HAVE_SELFTEST) && !defined(WC_NO_RNG)
+ case EVP_CTRL_GCM_SET_IVLEN:
+ if(arg <= 0 || arg > 16)
+ return WOLFSSL_FAILURE;
+ ret = wolfSSL_EVP_CIPHER_CTX_set_iv_length(ctx, arg);
+ break;
+ case EVP_CTRL_AEAD_SET_IV_FIXED:
+ if (arg == -1) {
+ /* arg == -1 copies ctx->ivSz from ptr */
+ ret = wolfSSL_EVP_CIPHER_CTX_set_iv(ctx, (byte*)ptr, ctx->ivSz);
+ }
+ else {
+ /*
+ * Fixed field must be at least 4 bytes and invocation
+ * field at least 8.
+ */
+ if ((arg < 4) || (ctx->ivSz - arg) < 8) {
+ WOLFSSL_MSG("Fixed field or invocation field too short");
+ ret = WOLFSSL_FAILURE;
+ break;
+ }
+ if (wc_InitRng(&rng) != 0) {
+ WOLFSSL_MSG("wc_InitRng failed");
+ ret = WOLFSSL_FAILURE;
+ break;
+ }
+ if (arg) {
+ XMEMCPY(ctx->iv, ptr, arg);
+ }
+ if (wc_RNG_GenerateBlock(&rng, ctx->iv + arg,
+ ctx->ivSz - arg) != 0) {
+ /* rng is freed immediately after if block so no need
+ * to do it here
+ */
+ WOLFSSL_MSG("wc_RNG_GenerateBlock failed");
+ ret = WOLFSSL_FAILURE;
+ }
+
+ if (wc_FreeRng(&rng) != 0) {
+ WOLFSSL_MSG("wc_FreeRng failed");
+ ret = WOLFSSL_FAILURE;
+ break;
+ }
+ }
+ break;
+#if !defined(_WIN32) && !defined(HAVE_FIPS)
+ case EVP_CTRL_GCM_IV_GEN:
+ if (ctx->cipher.aes.keylen == 0 || ctx->ivSz == 0) {
+ ret = WOLFSSL_FAILURE;
+ WOLFSSL_MSG("Key or IV not set");
+ break;
+ }
+ if ((ret = wc_AesGcmSetExtIV(&ctx->cipher.aes, ctx->iv, ctx->ivSz)) != 0) {
+ WOLFSSL_MSG("wc_AesGcmSetIV failed");
+ ret = WOLFSSL_FAILURE;
+ }
+ /* OpenSSL increments the IV. Not sure why */
+ IncCtr(ctx->iv, ctx->ivSz);
+ break;
+#endif
+ case EVP_CTRL_AEAD_SET_TAG:
+ if(arg <= 0 || arg > 16 || (ptr == NULL))
+ return WOLFSSL_FAILURE;
+
+ XMEMCPY(ctx->authTag, ptr, arg);
+ ctx->authTagSz = arg;
+ ret = WOLFSSL_SUCCESS;
+
+ break;
+ case EVP_CTRL_AEAD_GET_TAG:
+ if(arg <= 0 || arg > 16)
+ return WOLFSSL_FAILURE;
+
+ XMEMCPY(ptr, ctx->authTag, arg);
+ ret = WOLFSSL_SUCCESS;
+ break;
+#endif /* HAVE_AESGCM && !HAVE_SELFTEST && !WC_NO_RNG */
+ default:
+ WOLFSSL_MSG("EVP_CIPHER_CTX_ctrl operation not yet handled");
+ ret = WOLFSSL_FAILURE;
+ }
+ return ret;
+ }
+
+ /* WOLFSSL_SUCCESS on ok */
+ int wolfSSL_EVP_CIPHER_CTX_cleanup(WOLFSSL_EVP_CIPHER_CTX* ctx)
+ {
+ WOLFSSL_ENTER("EVP_CIPHER_CTX_cleanup");
+ if (ctx) {
+ ctx->cipherType = WOLFSSL_EVP_CIPH_TYPE_INIT; /* not yet initialized */
+ ctx->keyLen = 0;
+ }
+
+ return WOLFSSL_SUCCESS;
+ }
+
+ /* Permanent stub for Qt compilation. */
+ #if defined(WOLFSSL_QT) && !defined(NO_WOLFSSL_STUB)
+ const WOLFSSL_EVP_CIPHER* wolfSSL_EVP_rc2_cbc(void)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_rc2_cbc");
+ WOLFSSL_STUB("EVP_rc2_cbc");
+ return NULL;
+ }
+ #endif
+
+#if defined(WOLFSSL_ENCRYPTED_KEYS) && !defined(NO_PWDBASED)
+
+ int wolfSSL_EVP_BytesToKey(const WOLFSSL_EVP_CIPHER* type,
+ const WOLFSSL_EVP_MD* md, const byte* salt,
+ const byte* data, int sz, int count, byte* key, byte* iv)
+ {
+ int ret;
+ int hashType = WC_HASH_TYPE_NONE;
+ #ifdef WOLFSSL_SMALL_STACK
+ EncryptedInfo* info;
+ #else
+ EncryptedInfo info[1];
+ #endif
+
+ #ifdef WOLFSSL_SMALL_STACK
+ info = (EncryptedInfo*)XMALLOC(sizeof(EncryptedInfo), NULL,
+ DYNAMIC_TYPE_ENCRYPTEDINFO);
+ if (info == NULL) {
+ WOLFSSL_MSG("malloc failed");
+ return WOLFSSL_FAILURE;
+ }
+ #endif
+
+ XMEMSET(info, 0, sizeof(EncryptedInfo));
+
+ ret = wc_EncryptedInfoGet(info, type);
+ if (ret < 0)
+ goto end;
+
+ if (data == NULL) {
+ ret = info->keySz;
+ goto end;
+ }
+
+ ret = wolfSSL_EVP_get_hashinfo(md, &hashType, NULL);
+ if (ret == WOLFSSL_FAILURE)
+ goto end;
+
+ ret = wc_PBKDF1_ex(key, info->keySz, iv, info->ivSz, data, sz, salt,
+ EVP_SALT_SIZE, count, hashType, NULL);
+ if (ret == 0)
+ ret = info->keySz;
+
+ end:
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(info, NULL, DYNAMIC_TYPE_ENCRYPTEDINFO);
+ #endif
+ if (ret < 0)
+ return 0; /* failure - for compatibility */
+
+ return ret;
+ }
+
+#endif /* WOLFSSL_ENCRYPTED_KEYS && !NO_PWDBASED */
+
+#ifndef NO_AES
+ static int AesSetKey_ex(Aes* aes, const byte* key, word32 len,
+ const byte* iv, int dir, int direct)
+ {
+ int ret;
+ /* wc_AesSetKey clear aes.reg if iv == NULL.
+ Keep IV for openSSL compatibility */
+ if (iv == NULL)
+ XMEMCPY((byte *)aes->tmp, (byte *)aes->reg, AES_BLOCK_SIZE);
+ if (direct) {
+ #if defined(WOLFSSL_AES_DIRECT)
+ ret = wc_AesSetKeyDirect(aes, key, len, iv, dir);
+ #else
+ ret = NOT_COMPILED_IN;
+ #endif
+ }
+ else {
+ ret = wc_AesSetKey(aes, key, len, iv, dir);
+ }
+ if (iv == NULL)
+ XMEMCPY((byte *)aes->reg, (byte *)aes->tmp, AES_BLOCK_SIZE);
+ return ret;
+ }
+#endif
+
+ /* return WOLFSSL_SUCCESS on ok, 0 on failure to match API compatibility */
+ int wolfSSL_EVP_CipherInit(WOLFSSL_EVP_CIPHER_CTX* ctx,
+ const WOLFSSL_EVP_CIPHER* type, const byte* key,
+ const byte* iv, int enc)
+ {
+ int ret = 0;
+ (void)key;
+ (void)iv;
+ (void)enc;
+
+ WOLFSSL_ENTER("wolfSSL_EVP_CipherInit");
+ if (ctx == NULL) {
+ WOLFSSL_MSG("no ctx");
+ return WOLFSSL_FAILURE;
+ }
+
+ if (type == NULL && ctx->cipherType == WOLFSSL_EVP_CIPH_TYPE_INIT) {
+ WOLFSSL_MSG("no type set");
+ return WOLFSSL_FAILURE;
+ }
+ if (ctx->cipherType == WOLFSSL_EVP_CIPH_TYPE_INIT){
+ /* only first EVP_CipherInit invoke. ctx->cipherType is set below */
+ XMEMSET(&ctx->cipher, 0, sizeof(ctx->cipher));
+ ctx->flags = 0;
+ }
+ /* always clear buffer state */
+ ctx->bufUsed = 0;
+ ctx->lastUsed = 0;
+
+#ifdef HAVE_WOLFSSL_EVP_CIPHER_CTX_IV
+ if (!iv && ctx->ivSz) {
+ iv = ctx->iv;
+ }
+#endif
+
+#ifndef NO_AES
+ #ifdef HAVE_AES_CBC
+ #ifdef WOLFSSL_AES_128
+ if (ctx->cipherType == AES_128_CBC_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_128_CBC, EVP_AES_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_128_CBC");
+ ctx->cipherType = AES_128_CBC_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_CBC_MODE;
+ ctx->keyLen = 16;
+ ctx->block_size = AES_BLOCK_SIZE;
+ ctx->ivSz = AES_BLOCK_SIZE;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+ ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, 0);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ if (iv && key == NULL) {
+ ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ }
+ #endif /* WOLFSSL_AES_128 */
+ #ifdef WOLFSSL_AES_192
+ if (ctx->cipherType == AES_192_CBC_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_192_CBC, EVP_AES_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_192_CBC");
+ ctx->cipherType = AES_192_CBC_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_CBC_MODE;
+ ctx->keyLen = 24;
+ ctx->block_size = AES_BLOCK_SIZE;
+ ctx->ivSz = AES_BLOCK_SIZE;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+ ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, 0);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ if (iv && key == NULL) {
+ ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ }
+ #endif /* WOLFSSL_AES_192 */
+ #ifdef WOLFSSL_AES_256
+ if (ctx->cipherType == AES_256_CBC_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_256_CBC, EVP_AES_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_256_CBC");
+ ctx->cipherType = AES_256_CBC_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_CBC_MODE;
+ ctx->keyLen = 32;
+ ctx->block_size = AES_BLOCK_SIZE;
+ ctx->ivSz = AES_BLOCK_SIZE;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+ ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, 0);
+ if (ret != 0){
+ WOLFSSL_MSG("AesSetKey() failed");
+ return WOLFSSL_FAILURE;
+ }
+ }
+ if (iv && key == NULL) {
+ ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+ if (ret != 0){
+ WOLFSSL_MSG("wc_AesSetIV() failed");
+ return WOLFSSL_FAILURE;
+ }
+ }
+ }
+ #endif /* WOLFSSL_AES_256 */
+ #endif /* HAVE_AES_CBC */
+#if !defined(_WIN32) && !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)
+ #ifdef HAVE_AESGCM
+ #ifdef WOLFSSL_AES_128
+ if (ctx->cipherType == AES_128_GCM_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_128_GCM, EVP_AES_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_128_GCM");
+ ctx->cipherType = AES_128_GCM_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_GCM_MODE;
+ ctx->keyLen = 16;
+ ctx->block_size = AES_BLOCK_SIZE;
+ ctx->authTagSz = AES_BLOCK_SIZE;
+ ctx->ivSz = GCM_NONCE_MID_SZ;
+
+ XMEMSET(ctx->authTag, 0, ctx->authTagSz);
+ if (key && wc_AesGcmSetKey(&ctx->cipher.aes, key, ctx->keyLen)) {
+ WOLFSSL_MSG("wc_AesGcmSetKey() failed");
+ return WOLFSSL_FAILURE;
+ }
+ if (iv && wc_AesGcmSetExtIV(&ctx->cipher.aes, iv, GCM_NONCE_MID_SZ)) {
+ WOLFSSL_MSG("wc_AesGcmSetExtIV() failed");
+ return WOLFSSL_FAILURE;
+ }
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ }
+ #endif /* WOLFSSL_AES_128 */
+ #ifdef WOLFSSL_AES_192
+ if (ctx->cipherType == AES_192_GCM_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_192_GCM, EVP_AES_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_192_GCM");
+ ctx->cipherType = AES_192_GCM_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_GCM_MODE;
+ ctx->keyLen = 24;
+ ctx->block_size = AES_BLOCK_SIZE;
+ ctx->authTagSz = AES_BLOCK_SIZE;
+ ctx->ivSz = GCM_NONCE_MID_SZ;
+
+ XMEMSET(ctx->authTag, 0, ctx->authTagSz);
+ if (key && wc_AesGcmSetKey(&ctx->cipher.aes, key, ctx->keyLen)) {
+ WOLFSSL_MSG("wc_AesGcmSetKey() failed");
+ return WOLFSSL_FAILURE;
+ }
+ if (iv && wc_AesGcmSetExtIV(&ctx->cipher.aes, iv, GCM_NONCE_MID_SZ)) {
+ WOLFSSL_MSG("wc_AesGcmSetExtIV() failed");
+ return WOLFSSL_FAILURE;
+ }
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ }
+ #endif /* WOLFSSL_AES_192 */
+ #ifdef WOLFSSL_AES_256
+ if (ctx->cipherType == AES_256_GCM_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_256_GCM, EVP_AES_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_256_GCM");
+ ctx->cipherType = AES_256_GCM_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_GCM_MODE;
+ ctx->keyLen = 32;
+ ctx->block_size = AES_BLOCK_SIZE;
+ ctx->authTagSz = AES_BLOCK_SIZE;
+ ctx->ivSz = GCM_NONCE_MID_SZ;
+
+ XMEMSET(ctx->authTag, 0, ctx->authTagSz);
+ if (key && wc_AesGcmSetKey(&ctx->cipher.aes, key, ctx->keyLen)) {
+ WOLFSSL_MSG("wc_AesGcmSetKey() failed");
+ return WOLFSSL_FAILURE;
+ }
+ if (iv && wc_AesGcmSetExtIV(&ctx->cipher.aes, iv, GCM_NONCE_MID_SZ)) {
+ WOLFSSL_MSG("wc_AesGcmSetExtIV() failed");
+ return WOLFSSL_FAILURE;
+ }
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ }
+ #endif /* WOLFSSL_AES_256 */
+ #endif /* HAVE_AESGCM */
+#endif /* !defined(_WIN32) && !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST) */
+#ifdef WOLFSSL_AES_COUNTER
+ #ifdef WOLFSSL_AES_128
+ if (ctx->cipherType == AES_128_CTR_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_128_CTR, EVP_AES_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_128_CTR");
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->cipherType = AES_128_CTR_TYPE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_CTR_MODE;
+ ctx->keyLen = 16;
+ ctx->block_size = NO_PADDING_BLOCK_SIZE;
+ ctx->ivSz = AES_BLOCK_SIZE;
+#if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB)
+ ctx->cipher.aes.left = 0;
+#endif
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+ AES_ENCRYPTION, 1);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ if (iv && key == NULL) {
+ ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ }
+ #endif /* WOLFSSL_AES_128 */
+ #ifdef WOLFSSL_AES_192
+ if (ctx->cipherType == AES_192_CTR_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_192_CTR, EVP_AES_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_192_CTR");
+ ctx->cipherType = AES_192_CTR_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_CTR_MODE;
+ ctx->keyLen = 24;
+ ctx->block_size = NO_PADDING_BLOCK_SIZE;
+ ctx->ivSz = AES_BLOCK_SIZE;
+#if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB)
+ ctx->cipher.aes.left = 0;
+#endif
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+ AES_ENCRYPTION, 1);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ if (iv && key == NULL) {
+ ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ }
+ #endif /* WOLFSSL_AES_192 */
+ #ifdef WOLFSSL_AES_256
+ if (ctx->cipherType == AES_256_CTR_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_256_CTR, EVP_AES_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_256_CTR");
+ ctx->cipherType = AES_256_CTR_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_CTR_MODE;
+ ctx->keyLen = 32;
+ ctx->block_size = NO_PADDING_BLOCK_SIZE;
+ ctx->ivSz = AES_BLOCK_SIZE;
+#if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB)
+ ctx->cipher.aes.left = 0;
+#endif
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+ AES_ENCRYPTION, 1);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ if (iv && key == NULL) {
+ ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ }
+ #endif /* WOLFSSL_AES_256 */
+#endif /* WOLFSSL_AES_COUNTER */
+ #ifdef WOLFSSL_AES_128
+ if (ctx->cipherType == AES_128_ECB_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_128_ECB, EVP_AES_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_128_ECB");
+ ctx->cipherType = AES_128_ECB_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_ECB_MODE;
+ ctx->keyLen = 16;
+ ctx->block_size = AES_BLOCK_SIZE;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, NULL,
+ ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, 1);
+ }
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ #endif /* WOLFSSL_AES_128 */
+ #ifdef WOLFSSL_AES_192
+ if (ctx->cipherType == AES_192_ECB_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_192_ECB, EVP_AES_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_192_ECB");
+ ctx->cipherType = AES_192_ECB_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_ECB_MODE;
+ ctx->keyLen = 24;
+ ctx->block_size = AES_BLOCK_SIZE;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, NULL,
+ ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, 1);
+ }
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ #endif /* WOLFSSL_AES_192 */
+ #ifdef WOLFSSL_AES_256
+ if (ctx->cipherType == AES_256_ECB_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_256_ECB, EVP_AES_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_256_ECB");
+ ctx->cipherType = AES_256_ECB_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_ECB_MODE;
+ ctx->keyLen = 32;
+ ctx->block_size = AES_BLOCK_SIZE;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, NULL,
+ ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, 1);
+ }
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ #endif /* WOLFSSL_AES_256 */
+ #ifdef WOLFSSL_AES_CFB
+ #ifdef WOLFSSL_AES_128
+ if (ctx->cipherType == AES_128_CFB1_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_128_CFB1, EVP_AESCFB_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_128_CFB1");
+ ctx->cipherType = AES_128_CFB1_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_CFB_MODE;
+ ctx->keyLen = 16;
+ ctx->block_size = 1;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+ AES_ENCRYPTION, 0);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ if (iv && key == NULL) {
+ ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ }
+ #endif /* WOLFSSL_AES_128 */
+ #ifdef WOLFSSL_AES_192
+ if (ctx->cipherType == AES_192_CFB1_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_192_CFB1, EVP_AESCFB_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_192_CFB1");
+ ctx->cipherType = AES_192_CFB1_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_CFB_MODE;
+ ctx->keyLen = 24;
+ ctx->block_size = 1;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+ AES_ENCRYPTION, 0);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ if (iv && key == NULL) {
+ ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ }
+ #endif /* WOLFSSL_AES_192 */
+ #ifdef WOLFSSL_AES_256
+ if (ctx->cipherType == AES_256_CFB1_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_256_CFB1, EVP_AESCFB_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_256_CFB1");
+ ctx->cipherType = AES_256_CFB1_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_CFB_MODE;
+ ctx->keyLen = 32;
+ ctx->block_size = 1;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+ AES_ENCRYPTION, 0);
+ if (ret != 0){
+ WOLFSSL_MSG("AesSetKey() failed");
+ return WOLFSSL_FAILURE;
+ }
+ }
+ if (iv && key == NULL) {
+ ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+ if (ret != 0){
+ WOLFSSL_MSG("wc_AesSetIV() failed");
+ return WOLFSSL_FAILURE;
+ }
+ }
+ }
+ #endif /* WOLFSSL_AES_256 */
+ #ifdef WOLFSSL_AES_128
+ if (ctx->cipherType == AES_128_CFB8_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_128_CFB8, EVP_AESCFB_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_128_CFB8");
+ ctx->cipherType = AES_128_CFB8_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_CFB_MODE;
+ ctx->keyLen = 16;
+ ctx->block_size = 1;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+ AES_ENCRYPTION, 0);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ if (iv && key == NULL) {
+ ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ }
+ #endif /* WOLFSSL_AES_128 */
+ #ifdef WOLFSSL_AES_192
+ if (ctx->cipherType == AES_192_CFB8_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_192_CFB8, EVP_AESCFB_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_192_CFB8");
+ ctx->cipherType = AES_192_CFB8_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_CFB_MODE;
+ ctx->keyLen = 24;
+ ctx->block_size = 1;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+ AES_ENCRYPTION, 0);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ if (iv && key == NULL) {
+ ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ }
+ #endif /* WOLFSSL_AES_192 */
+ #ifdef WOLFSSL_AES_256
+ if (ctx->cipherType == AES_256_CFB8_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_256_CFB8, EVP_AESCFB_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_256_CFB8");
+ ctx->cipherType = AES_256_CFB8_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_CFB_MODE;
+ ctx->keyLen = 32;
+ ctx->block_size = 1;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+ AES_ENCRYPTION, 0);
+ if (ret != 0){
+ WOLFSSL_MSG("AesSetKey() failed");
+ return WOLFSSL_FAILURE;
+ }
+ }
+ if (iv && key == NULL) {
+ ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+ if (ret != 0){
+ WOLFSSL_MSG("wc_AesSetIV() failed");
+ return WOLFSSL_FAILURE;
+ }
+ }
+ }
+ #endif /* WOLFSSL_AES_256 */
+ #ifdef WOLFSSL_AES_128
+ if (ctx->cipherType == AES_128_CFB128_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_128_CFB128, EVP_AESCFB_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_128_CFB128");
+ ctx->cipherType = AES_128_CFB128_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_CFB_MODE;
+ ctx->keyLen = 16;
+ ctx->block_size = 1;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+ AES_ENCRYPTION, 0);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ if (iv && key == NULL) {
+ ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ }
+ #endif /* WOLFSSL_AES_128 */
+ #ifdef WOLFSSL_AES_192
+ if (ctx->cipherType == AES_192_CFB128_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_192_CFB128, EVP_AESCFB_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_192_CFB128");
+ ctx->cipherType = AES_192_CFB128_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_CFB_MODE;
+ ctx->keyLen = 24;
+ ctx->block_size = 1;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+ AES_ENCRYPTION, 0);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ if (iv && key == NULL) {
+ ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ }
+ #endif /* WOLFSSL_AES_192 */
+ #ifdef WOLFSSL_AES_256
+ if (ctx->cipherType == AES_256_CFB128_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_256_CFB128, EVP_AESCFB_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_256_CFB128");
+ ctx->cipherType = AES_256_CFB128_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_CFB_MODE;
+ ctx->keyLen = 32;
+ ctx->block_size = 1;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+ AES_ENCRYPTION, 0);
+ if (ret != 0){
+ WOLFSSL_MSG("AesSetKey() failed");
+ return WOLFSSL_FAILURE;
+ }
+ }
+ if (iv && key == NULL) {
+ ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+ if (ret != 0){
+ WOLFSSL_MSG("wc_AesSetIV() failed");
+ return WOLFSSL_FAILURE;
+ }
+ }
+ }
+ #endif /* WOLFSSL_AES_256 */
+ #endif /* HAVE_AES_CFB */
+ #ifdef WOLFSSL_AES_OFB
+ #ifdef WOLFSSL_AES_128
+ if (ctx->cipherType == AES_128_OFB_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_128_OFB, EVP_AES_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_128_OFB");
+ ctx->cipherType = AES_128_OFB_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_OFB_MODE;
+ ctx->keyLen = 16;
+ ctx->block_size = 1;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+ AES_ENCRYPTION, 0);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ if (iv && key == NULL) {
+ ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ }
+ #endif /* WOLFSSL_AES_128 */
+ #ifdef WOLFSSL_AES_192
+ if (ctx->cipherType == AES_192_OFB_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_192_OFB, EVP_AES_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_192_OFB");
+ ctx->cipherType = AES_192_OFB_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_OFB_MODE;
+ ctx->keyLen = 24;
+ ctx->block_size = 1;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+ AES_ENCRYPTION, 0);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ if (iv && key == NULL) {
+ ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ }
+ #endif /* WOLFSSL_AES_192 */
+ #ifdef WOLFSSL_AES_256
+ if (ctx->cipherType == AES_256_OFB_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_256_OFB, EVP_AES_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_256_OFB");
+ ctx->cipherType = AES_256_OFB_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_OFB_MODE;
+ ctx->keyLen = 32;
+ ctx->block_size = 1;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv,
+ AES_ENCRYPTION, 0);
+ if (ret != 0){
+ WOLFSSL_MSG("AesSetKey() failed");
+ return WOLFSSL_FAILURE;
+ }
+ }
+ if (iv && key == NULL) {
+ ret = wc_AesSetIV(&ctx->cipher.aes, iv);
+ if (ret != 0){
+ WOLFSSL_MSG("wc_AesSetIV() failed");
+ return WOLFSSL_FAILURE;
+ }
+ }
+ }
+ #endif /* WOLFSSL_AES_256 */
+ #endif /* HAVE_AES_OFB */
+ #ifdef WOLFSSL_AES_XTS
+ #ifdef WOLFSSL_AES_128
+ if (ctx->cipherType == AES_128_XTS_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_128_XTS, EVP_AES_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_128_XTS");
+ ctx->cipherType = AES_128_XTS_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_XTS_MODE;
+ ctx->keyLen = 32;
+ ctx->block_size = 1;
+ ctx->ivSz = AES_BLOCK_SIZE;
+
+ if (iv != NULL) {
+ if (iv != ctx->iv) /* Valgrind error when src == dst */
+ XMEMCPY(ctx->iv, iv, ctx->ivSz);
+ }
+ else
+ XMEMSET(ctx->iv, 0, AES_BLOCK_SIZE);
+
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = wc_AesXtsSetKey(&ctx->cipher.xts, key, ctx->keyLen,
+ ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, NULL, 0);
+ if (ret != 0) {
+ WOLFSSL_MSG("wc_AesXtsSetKey() failed");
+ return WOLFSSL_FAILURE;
+ }
+ }
+ }
+ #endif /* WOLFSSL_AES_128 */
+ #ifdef WOLFSSL_AES_256
+ if (ctx->cipherType == AES_256_XTS_TYPE ||
+ (type && XSTRNCMP(type, EVP_AES_256_XTS, EVP_AES_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_AES_256_XTS");
+ ctx->cipherType = AES_256_XTS_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_XTS_MODE;
+ ctx->keyLen = 64;
+ ctx->block_size = 1;
+ ctx->ivSz = AES_BLOCK_SIZE;
+
+ if (iv != NULL) {
+ if (iv != ctx->iv) /* Valgrind error when src == dst */
+ XMEMCPY(ctx->iv, iv, ctx->ivSz);
+ }
+ else
+ XMEMSET(ctx->iv, 0, AES_BLOCK_SIZE);
+
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = wc_AesXtsSetKey(&ctx->cipher.xts, key, ctx->keyLen,
+ ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, NULL, 0);
+ if (ret != 0) {
+ WOLFSSL_MSG("wc_AesXtsSetKey() failed");
+ return WOLFSSL_FAILURE;
+ }
+ }
+ }
+ #endif /* WOLFSSL_AES_256 */
+ #endif /* HAVE_AES_XTS */
+#endif /* NO_AES */
+
+#ifndef NO_DES3
+ if (ctx->cipherType == DES_CBC_TYPE ||
+ (type && XSTRNCMP(type, EVP_DES_CBC, EVP_DES_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_DES_CBC");
+ ctx->cipherType = DES_CBC_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_CBC_MODE;
+ ctx->keyLen = 8;
+ ctx->block_size = DES_BLOCK_SIZE;
+ ctx->ivSz = DES_BLOCK_SIZE;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = wc_Des_SetKey(&ctx->cipher.des, key, iv,
+ ctx->enc ? DES_ENCRYPTION : DES_DECRYPTION);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+
+ if (iv && key == NULL)
+ wc_Des_SetIV(&ctx->cipher.des, iv);
+ }
+#ifdef WOLFSSL_DES_ECB
+ else if (ctx->cipherType == DES_ECB_TYPE ||
+ (type && XSTRNCMP(type, EVP_DES_ECB, EVP_DES_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_DES_ECB");
+ ctx->cipherType = DES_ECB_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_ECB_MODE;
+ ctx->keyLen = 8;
+ ctx->block_size = DES_BLOCK_SIZE;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ WOLFSSL_MSG("Des_SetKey");
+ ret = wc_Des_SetKey(&ctx->cipher.des, key, NULL,
+ ctx->enc ? DES_ENCRYPTION : DES_DECRYPTION);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ }
+#endif
+ else if (ctx->cipherType == DES_EDE3_CBC_TYPE ||
+ (type &&
+ XSTRNCMP(type, EVP_DES_EDE3_CBC, EVP_DES_EDE3_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_DES_EDE3_CBC");
+ ctx->cipherType = DES_EDE3_CBC_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_CBC_MODE;
+ ctx->keyLen = 24;
+ ctx->block_size = DES_BLOCK_SIZE;
+ ctx->ivSz = DES_BLOCK_SIZE;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = wc_Des3_SetKey(&ctx->cipher.des3, key, iv,
+ ctx->enc ? DES_ENCRYPTION : DES_DECRYPTION);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+
+ if (iv && key == NULL) {
+ ret = wc_Des3_SetIV(&ctx->cipher.des3, iv);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ }
+ else if (ctx->cipherType == DES_EDE3_ECB_TYPE ||
+ (type &&
+ XSTRNCMP(type, EVP_DES_EDE3_ECB, EVP_DES_EDE3_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_DES_EDE3_ECB");
+ ctx->cipherType = DES_EDE3_ECB_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_ECB_MODE;
+ ctx->keyLen = 24;
+ ctx->block_size = DES_BLOCK_SIZE;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = wc_Des3_SetKey(&ctx->cipher.des3, key, NULL,
+ ctx->enc ? DES_ENCRYPTION : DES_DECRYPTION);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+ }
+#endif /* NO_DES3 */
+#ifndef NO_RC4
+ if (ctx->cipherType == ARC4_TYPE || (type &&
+ XSTRNCMP(type, "ARC4", 4) == 0)) {
+ WOLFSSL_MSG("ARC4");
+ ctx->cipherType = ARC4_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_STREAM_CIPHER;
+ ctx->block_size = 1;
+ if (ctx->keyLen == 0) /* user may have already set */
+ ctx->keyLen = 16; /* default to 128 */
+ if (key)
+ wc_Arc4SetKey(&ctx->cipher.arc4, key, ctx->keyLen);
+ }
+#endif /* NO_RC4 */
+#ifdef HAVE_IDEA
+ if (ctx->cipherType == IDEA_CBC_TYPE ||
+ (type && XSTRNCMP(type, EVP_IDEA_CBC, EVP_IDEA_SIZE) == 0)) {
+ WOLFSSL_MSG("EVP_IDEA_CBC");
+ ctx->cipherType = IDEA_CBC_TYPE;
+ ctx->flags &= ~WOLFSSL_EVP_CIPH_MODE;
+ ctx->flags |= WOLFSSL_EVP_CIPH_CBC_MODE;
+ ctx->keyLen = IDEA_KEY_SIZE;
+ ctx->block_size = 8;
+ ctx->ivSz = IDEA_BLOCK_SIZE;
+ if (enc == 0 || enc == 1)
+ ctx->enc = enc ? 1 : 0;
+ if (key) {
+ ret = wc_IdeaSetKey(&ctx->cipher.idea, key, (word16)ctx->keyLen,
+ iv, ctx->enc ? IDEA_ENCRYPTION :
+ IDEA_DECRYPTION);
+ if (ret != 0)
+ return WOLFSSL_FAILURE;
+ }
+
+ if (iv && key == NULL)
+ wc_IdeaSetIV(&ctx->cipher.idea, iv);
+ }
+#endif /* HAVE_IDEA */
+ if (ctx->cipherType == NULL_CIPHER_TYPE || (type &&
+ XSTRNCMP(type, "NULL", 4) == 0)) {
+ WOLFSSL_MSG("NULL cipher");
+ ctx->cipherType = NULL_CIPHER_TYPE;
+ ctx->keyLen = 0;
+ ctx->block_size = 16;
+ }
+#ifdef HAVE_WOLFSSL_EVP_CIPHER_CTX_IV
+ if (iv && iv != ctx->iv) {
+ if (wolfSSL_StoreExternalIV(ctx) != WOLFSSL_SUCCESS) {
+ return WOLFSSL_FAILURE;
+ }
+ }
+#endif
+ (void)ret; /* remove warning. If execution reaches this point, ret=0 */
+ return WOLFSSL_SUCCESS;
+ }
+
+ /* WOLFSSL_SUCCESS on ok */
+ int wolfSSL_EVP_CIPHER_CTX_key_length(WOLFSSL_EVP_CIPHER_CTX* ctx)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_key_length");
+ if (ctx)
+ return ctx->keyLen;
+
+ return 0; /* failure */
+ }
+
+ /* WOLFSSL_SUCCESS on ok */
+ int wolfSSL_EVP_CIPHER_CTX_set_key_length(WOLFSSL_EVP_CIPHER_CTX* ctx,
+ int keylen)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_set_key_length");
+ if (ctx)
+ ctx->keyLen = keylen;
+ else
+ return 0; /* failure */
+
+ return WOLFSSL_SUCCESS;
+ }
+#if defined(HAVE_AESGCM)
+ /* returns WOLFSSL_SUCCESS on success, otherwise returns WOLFSSL_FAILURE */
+ int wolfSSL_EVP_CIPHER_CTX_set_iv_length(WOLFSSL_EVP_CIPHER_CTX* ctx,
+ int ivLen)
+ {
+ WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_set_iv_length");
+ if (ctx)
+ ctx->ivSz= ivLen;
+ else
+ return WOLFSSL_FAILURE;
+
+ return WOLFSSL_SUCCESS;
+ }
+
+ /* returns WOLFSSL_SUCCESS on success, otherwise returns WOLFSSL_FAILURE */
+ int wolfSSL_EVP_CIPHER_CTX_set_iv(WOLFSSL_EVP_CIPHER_CTX* ctx, byte* iv,
+ int ivLen)
+ {
+ int expectedIvLen;
+
+ WOLFSSL_ENTER("wolfSSL_EVP_CIPHER_CTX_set_iv_length");
+ if (!ctx || !iv || !ivLen) {
+ return WOLFSSL_FAILURE;
+ }
+
+ expectedIvLen = wolfSSL_EVP_CIPHER_CTX_iv_length(ctx);
+
+ if (expectedIvLen == 0 || expectedIvLen != ivLen) {
+ WOLFSSL_MSG("Wrong ivLen value");
+ return WOLFSSL_FAILURE;
+ }
+
+ return wolfSSL_EVP_CipherInit(ctx, NULL, NULL, iv, -1);
+ }
+#endif
+
+ /* WOLFSSL_SUCCESS on ok */
+ int wolfSSL_EVP_Cipher(WOLFSSL_EVP_CIPHER_CTX* ctx, byte* dst, byte* src,
+ word32 len)
+ {
+ int ret = 0;
+ WOLFSSL_ENTER("wolfSSL_EVP_Cipher");
+
+ if (ctx == NULL || src == NULL ||
+ (dst == NULL &&
+ ctx->cipherType != AES_128_GCM_TYPE &&
+ ctx->cipherType != AES_192_GCM_TYPE &&
+ ctx->cipherType != AES_256_GCM_TYPE)) {
+ WOLFSSL_MSG("Bad function argument");
+ return 0; /* failure */
+ }
+
+ if (ctx->cipherType == 0xff) {
+ WOLFSSL_MSG("no init");
+ return 0; /* failure */
+ }
+
+ switch (ctx->cipherType) {
+
+#ifndef NO_AES
+#ifdef HAVE_AES_CBC
+ case AES_128_CBC_TYPE :
+ case AES_192_CBC_TYPE :
+ case AES_256_CBC_TYPE :
+ WOLFSSL_MSG("AES CBC");
+ if (ctx->enc)
+ ret = wc_AesCbcEncrypt(&ctx->cipher.aes, dst, src, len);
+ else
+ ret = wc_AesCbcDecrypt(&ctx->cipher.aes, dst, src, len);
+ break;
+#endif /* HAVE_AES_CBC */
+
+#ifdef WOLFSSL_AES_CFB
+#if !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS)
+ case AES_128_CFB1_TYPE:
+ case AES_192_CFB1_TYPE:
+ case AES_256_CFB1_TYPE:
+ WOLFSSL_MSG("AES CFB1");
+ if (ctx->enc)
+ ret = wc_AesCfb1Encrypt(&ctx->cipher.aes, dst, src, len);
+ else
+ ret = wc_AesCfb1Decrypt(&ctx->cipher.aes, dst, src, len);
+ break;
+ case AES_128_CFB8_TYPE:
+ case AES_192_CFB8_TYPE:
+ case AES_256_CFB8_TYPE:
+ WOLFSSL_MSG("AES CFB8");
+ if (ctx->enc)
+ ret = wc_AesCfb8Encrypt(&ctx->cipher.aes, dst, src, len);
+ else
+ ret = wc_AesCfb8Decrypt(&ctx->cipher.aes, dst, src, len);
+ break;
+#endif /* !HAVE_SELFTEST && !HAVE_FIPS */
+ case AES_128_CFB128_TYPE:
+ case AES_192_CFB128_TYPE:
+ case AES_256_CFB128_TYPE:
+ WOLFSSL_MSG("AES CFB128");
+ if (ctx->enc)
+ ret = wc_AesCfbEncrypt(&ctx->cipher.aes, dst, src, len);
+ else
+ ret = wc_AesCfbDecrypt(&ctx->cipher.aes, dst, src, len);
+ break;
+#endif /* WOLFSSL_AES_CFB */
+#if defined(WOLFSSL_AES_OFB)
+ case AES_128_OFB_TYPE:
+ case AES_192_OFB_TYPE:
+ case AES_256_OFB_TYPE:
+ WOLFSSL_MSG("AES OFB");
+ if (ctx->enc)
+ ret = wc_AesOfbEncrypt(&ctx->cipher.aes, dst, src, len);
+ else
+ ret = wc_AesOfbDecrypt(&ctx->cipher.aes, dst, src, len);
+ break;
+#endif /* WOLFSSL_AES_OFB */
+#if defined(WOLFSSL_AES_XTS)
+ case AES_128_XTS_TYPE:
+ case AES_256_XTS_TYPE:
+ WOLFSSL_MSG("AES XTS");
+ if (ctx->enc)
+ ret = wc_AesXtsEncrypt(&ctx->cipher.xts, dst, src, len,
+ ctx->iv, ctx->ivSz);
+ else
+ ret = wc_AesXtsDecrypt(&ctx->cipher.xts, dst, src, len,
+ ctx->iv, ctx->ivSz);
+ break;
+#endif /* WOLFSSL_AES_XTS */
+
+#ifdef HAVE_AESGCM
+ case AES_128_GCM_TYPE :
+ case AES_192_GCM_TYPE :
+ case AES_256_GCM_TYPE :
+ WOLFSSL_MSG("AES GCM");
+ if (ctx->enc) {
+ if (dst){
+ /* encrypt confidential data*/
+ ret = wc_AesGcmEncrypt(&ctx->cipher.aes, dst, src, len,
+ ctx->iv, ctx->ivSz, ctx->authTag, ctx->authTagSz,
+ NULL, 0);
+ }
+ else {
+ /* authenticated, non-confidential data */
+ ret = wc_AesGcmEncrypt(&ctx->cipher.aes, NULL, NULL, 0,
+ ctx->iv, ctx->ivSz, ctx->authTag, ctx->authTagSz,
+ src, len);
+ /* Reset partial authTag error for AAD*/
+ if (ret == AES_GCM_AUTH_E)
+ ret = 0;
+ }
+ }
+ else {
+ if (dst){
+ /* decrypt confidential data*/
+ ret = wc_AesGcmDecrypt(&ctx->cipher.aes, dst, src, len,
+ ctx->iv, ctx->ivSz, ctx->authTag, ctx->authTagSz,
+ NULL, 0);
+ }
+ else {
+ /* authenticated, non-confidential data*/
+ ret = wc_AesGcmDecrypt(&ctx->cipher.aes, NULL, NULL, 0,
+ ctx->iv, ctx->ivSz,
+ ctx->authTag, ctx->authTagSz,
+ src, len);
+ /* Reset partial authTag error for AAD*/
+ if (ret == AES_GCM_AUTH_E)
+ ret = 0;
+ }
+ }
+ break;
+#endif /* HAVE_AESGCM */
+#ifdef HAVE_AES_ECB
+ case AES_128_ECB_TYPE :
+ case AES_192_ECB_TYPE :
+ case AES_256_ECB_TYPE :
+ WOLFSSL_MSG("AES ECB");
+ if (ctx->enc)
+ ret = wc_AesEcbEncrypt(&ctx->cipher.aes, dst, src, len);
+ else
+ ret = wc_AesEcbDecrypt(&ctx->cipher.aes, dst, src, len);
+ break;
+#endif
+#ifdef WOLFSSL_AES_COUNTER
+ case AES_128_CTR_TYPE :
+ case AES_192_CTR_TYPE :
+ case AES_256_CTR_TYPE :
+ WOLFSSL_MSG("AES CTR");
+ ret = wc_AesCtrEncrypt(&ctx->cipher.aes, dst, src, len);
+ break;
+#endif /* WOLFSSL_AES_COUNTER */
+#endif /* NO_AES */
+
+#ifndef NO_DES3
+ case DES_CBC_TYPE :
+ WOLFSSL_MSG("DES CBC");
+ if (ctx->enc)
+ wc_Des_CbcEncrypt(&ctx->cipher.des, dst, src, len);
+ else
+ wc_Des_CbcDecrypt(&ctx->cipher.des, dst, src, len);
+ break;
+ case DES_EDE3_CBC_TYPE :
+ WOLFSSL_MSG("DES3 CBC");
+ if (ctx->enc)
+ ret = wc_Des3_CbcEncrypt(&ctx->cipher.des3, dst, src, len);
+ else
+ ret = wc_Des3_CbcDecrypt(&ctx->cipher.des3, dst, src, len);
+ break;
+#ifdef WOLFSSL_DES_ECB
+ case DES_ECB_TYPE :
+ WOLFSSL_MSG("DES ECB");
+ ret = wc_Des_EcbEncrypt(&ctx->cipher.des, dst, src, len);
+ break;
+ case DES_EDE3_ECB_TYPE :
+ WOLFSSL_MSG("DES3 ECB");
+ ret = wc_Des3_EcbEncrypt(&ctx->cipher.des3, dst, src, len);
+ break;
+#endif
+#endif /* !NO_DES3 */
+
+#ifndef NO_RC4
+ case ARC4_TYPE :
+ WOLFSSL_MSG("ARC4");
+ wc_Arc4Process(&ctx->cipher.arc4, dst, src, len);
+ break;
+#endif
+
+#ifdef HAVE_IDEA
+ case IDEA_CBC_TYPE :
+ WOLFSSL_MSG("IDEA CBC");
+ if (ctx->enc)
+ wc_IdeaCbcEncrypt(&ctx->cipher.idea, dst, src, len);
+ else
+ wc_IdeaCbcDecrypt(&ctx->cipher.idea, dst, src, len);
+ break;
+#endif
+ case NULL_CIPHER_TYPE :
+ WOLFSSL_MSG("NULL CIPHER");
+ XMEMCPY(dst, src, len);
+ break;
+
+ default: {
+ WOLFSSL_MSG("bad type");
+ return 0; /* failure */
+ }
+ }
+
+ if (ret != 0) {
+ WOLFSSL_MSG("wolfSSL_EVP_Cipher failure");
+ return 0; /* failure */
+ }
+
+ if (wolfSSL_StoreExternalIV(ctx) != WOLFSSL_SUCCESS) {
+ return WOLFSSL_FAILURE;
+ }
+
+ WOLFSSL_MSG("wolfSSL_EVP_Cipher success");
+ return WOLFSSL_SUCCESS; /* success */
+ }
+
+ /* WOLFSSL_SUCCESS on ok */
+ int wolfSSL_EVP_DigestInit(WOLFSSL_EVP_MD_CTX* ctx,
+ const WOLFSSL_EVP_MD* md)
+ {
+ int ret = WOLFSSL_SUCCESS;
+
+ WOLFSSL_ENTER("EVP_DigestInit");
+
+ if (ctx == NULL || md == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ /* compile-time validation of ASYNC_CTX_SIZE */
+ typedef char async_test[WC_ASYNC_DEV_SIZE >= sizeof(WC_ASYNC_DEV) ?
+ 1 : -1];
+ (void)sizeof(async_test);
+ #endif
+
+ /* Set to 0 if no match */
+ ctx->macType = wolfSSL_EVP_md2macType(md);
+ if (XSTRNCMP(md, "SHA256", 6) == 0) {
+ ret = wolfSSL_SHA256_Init(&(ctx->hash.digest.sha256));
+ }
+ #ifdef WOLFSSL_SHA224
+ else if (XSTRNCMP(md, "SHA224", 6) == 0) {
+ ret = wolfSSL_SHA224_Init(&(ctx->hash.digest.sha224));
+ }
+ #endif
+ #ifdef WOLFSSL_SHA384
+ else if (XSTRNCMP(md, "SHA384", 6) == 0) {
+ ret = wolfSSL_SHA384_Init(&(ctx->hash.digest.sha384));
+ }
+ #endif
+ #ifdef WOLFSSL_SHA512
+ else if (XSTRNCMP(md, "SHA512", 6) == 0) {
+ ret = wolfSSL_SHA512_Init(&(ctx->hash.digest.sha512));
+ }
+ #endif
+ #ifndef NO_MD4
+ else if (XSTRNCMP(md, "MD4", 3) == 0) {
+ wolfSSL_MD4_Init(&(ctx->hash.digest.md4));
+ }
+ #endif
+ #ifndef NO_MD5
+ else if (XSTRNCMP(md, "MD5", 3) == 0) {
+ ret = wolfSSL_MD5_Init(&(ctx->hash.digest.md5));
+ }
+ #endif
+#ifdef WOLFSSL_SHA3
+ #ifndef WOLFSSL_NOSHA3_224
+ else if (XSTRNCMP(md, "SHA3_224", 8) == 0) {
+ ret = wolfSSL_SHA3_224_Init(&(ctx->hash.digest.sha3_224));
+ }
+ #endif
+ #ifndef WOLFSSL_NOSHA3_256
+ else if (XSTRNCMP(md, "SHA3_256", 8) == 0) {
+ ret = wolfSSL_SHA3_256_Init(&(ctx->hash.digest.sha3_256));
+ }
+ #endif
+ else if (XSTRNCMP(md, "SHA3_384", 8) == 0) {
+ ret = wolfSSL_SHA3_384_Init(&(ctx->hash.digest.sha3_384));
+ }
+ #ifndef WOLFSSL_NOSHA3_512
+ else if (XSTRNCMP(md, "SHA3_512", 8) == 0) {
+ ret = wolfSSL_SHA3_512_Init(&(ctx->hash.digest.sha3_512));
+ }
+ #endif
+#endif
+ #ifndef NO_SHA
+ /* has to be last since would pick or 224, 256, 384, or 512 too */
+ else if (XSTRNCMP(md, "SHA", 3) == 0) {
+ ret = wolfSSL_SHA_Init(&(ctx->hash.digest.sha));
+ }
+ #endif /* NO_SHA */
+ else {
+ ctx->macType = WC_HASH_TYPE_NONE;
+ return BAD_FUNC_ARG;
+ }
+
+ return ret;
+ }
+
+ /* WOLFSSL_SUCCESS on ok, WOLFSSL_FAILURE on failure */
+ int wolfSSL_EVP_DigestUpdate(WOLFSSL_EVP_MD_CTX* ctx, const void* data,
+ size_t sz)
+ {
+ int macType;
+
+ WOLFSSL_ENTER("EVP_DigestUpdate");
+
+ macType = wolfSSL_EVP_md2macType(EVP_MD_CTX_md(ctx));
+ switch (macType) {
+#ifndef NO_MD4
+ case WC_HASH_TYPE_MD4:
+ wolfSSL_MD4_Update((MD4_CTX*)&ctx->hash, data,
+ (unsigned long)sz);
+ break;
+#endif
+#ifndef NO_MD5
+ case WC_HASH_TYPE_MD5:
+ wolfSSL_MD5_Update((MD5_CTX*)&ctx->hash, data,
+ (unsigned long)sz);
+ break;
+#endif
+#ifndef NO_SHA
+ case WC_HASH_TYPE_SHA:
+ wolfSSL_SHA_Update((SHA_CTX*)&ctx->hash, data,
+ (unsigned long)sz);
+ break;
+#endif
+#ifdef WOLFSSL_SHA224
+ case WC_HASH_TYPE_SHA224:
+ wolfSSL_SHA224_Update((SHA224_CTX*)&ctx->hash, data,
+ (unsigned long)sz);
+ break;
+#endif
+#ifndef NO_SHA256
+ case WC_HASH_TYPE_SHA256:
+ wolfSSL_SHA256_Update((SHA256_CTX*)&ctx->hash, data,
+ (unsigned long)sz);
+ break;
+#endif /* !NO_SHA256 */
+#ifdef WOLFSSL_SHA384
+ case WC_HASH_TYPE_SHA384:
+ wolfSSL_SHA384_Update((SHA384_CTX*)&ctx->hash, data,
+ (unsigned long)sz);
+ break;
+#endif
+#ifdef WOLFSSL_SHA512
+ case WC_HASH_TYPE_SHA512:
+ wolfSSL_SHA512_Update((SHA512_CTX*)&ctx->hash, data,
+ (unsigned long)sz);
+ break;
+#endif /* WOLFSSL_SHA512 */
+ #ifdef WOLFSSL_SHA3
+ #ifndef WOLFSSL_NOSHA3_224
+ case WC_HASH_TYPE_SHA3_224:
+ wolfSSL_SHA3_224_Update((SHA3_224_CTX*)&ctx->hash, data,
+ (unsigned long)sz);
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_256
+ case WC_HASH_TYPE_SHA3_256:
+ wolfSSL_SHA3_256_Update((SHA3_256_CTX*)&ctx->hash, data,
+ (unsigned long)sz);
+ break;
+ #endif
+ case WC_HASH_TYPE_SHA3_384:
+ wolfSSL_SHA3_384_Update((SHA3_384_CTX*)&ctx->hash, data,
+ (unsigned long)sz);
+ break;
+ #ifndef WOLFSSL_NOSHA3_512
+ case WC_HASH_TYPE_SHA3_512:
+ wolfSSL_SHA3_512_Update((SHA3_512_CTX*)&ctx->hash, data,
+ (unsigned long)sz);
+ break;
+ #endif
+ #endif
+ default:
+ return WOLFSSL_FAILURE;
+ }
+
+ return WOLFSSL_SUCCESS;
+ }
+
+ /* WOLFSSL_SUCCESS on ok */
+ int wolfSSL_EVP_DigestFinal(WOLFSSL_EVP_MD_CTX* ctx, unsigned char* md,
+ unsigned int* s)
+ {
+ int macType;
+
+ WOLFSSL_ENTER("EVP_DigestFinal");
+ macType = wolfSSL_EVP_md2macType(EVP_MD_CTX_md(ctx));
+ switch (macType) {
+#ifndef NO_MD4
+ case WC_HASH_TYPE_MD4:
+ wolfSSL_MD4_Final(md, (MD4_CTX*)&ctx->hash);
+ if (s) *s = MD4_DIGEST_SIZE;
+ break;
+#endif
+#ifndef NO_MD5
+ case WC_HASH_TYPE_MD5:
+ wolfSSL_MD5_Final(md, (MD5_CTX*)&ctx->hash);
+ if (s) *s = WC_MD5_DIGEST_SIZE;
+ break;
+#endif
+#ifndef NO_SHA
+ case WC_HASH_TYPE_SHA:
+ wolfSSL_SHA_Final(md, (SHA_CTX*)&ctx->hash);
+ if (s) *s = WC_SHA_DIGEST_SIZE;
+ break;
+#endif
+#ifdef WOLFSSL_SHA224
+ case WC_HASH_TYPE_SHA224:
+ wolfSSL_SHA224_Final(md, (SHA224_CTX*)&ctx->hash);
+ if (s) *s = WC_SHA224_DIGEST_SIZE;
+ break;
+#endif
+#ifndef NO_SHA256
+ case WC_HASH_TYPE_SHA256:
+ wolfSSL_SHA256_Final(md, (SHA256_CTX*)&ctx->hash);
+ if (s) *s = WC_SHA256_DIGEST_SIZE;
+ break;
+#endif /* !NO_SHA256 */
+#ifdef WOLFSSL_SHA384
+ case WC_HASH_TYPE_SHA384:
+ wolfSSL_SHA384_Final(md, (SHA384_CTX*)&ctx->hash);
+ if (s) *s = WC_SHA384_DIGEST_SIZE;
+ break;
+#endif
+#ifdef WOLFSSL_SHA512
+ case WC_HASH_TYPE_SHA512:
+ wolfSSL_SHA512_Final(md, (SHA512_CTX*)&ctx->hash);
+ if (s) *s = WC_SHA512_DIGEST_SIZE;
+ break;
+#endif /* WOLFSSL_SHA512 */
+ #ifdef WOLFSSL_SHA3
+ #ifndef WOLFSSL_NOSHA3_224
+ case WC_HASH_TYPE_SHA3_224:
+ wolfSSL_SHA3_224_Final(md, (SHA3_224_CTX*)&ctx->hash);
+ if (s) *s = WC_SHA3_224_DIGEST_SIZE;
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_256
+ case WC_HASH_TYPE_SHA3_256:
+ wolfSSL_SHA3_256_Final(md, (SHA3_256_CTX*)&ctx->hash);
+ if (s) *s = WC_SHA3_256_DIGEST_SIZE;
+ break;
+ #endif
+ case WC_HASH_TYPE_SHA3_384:
+ wolfSSL_SHA3_384_Final(md, (SHA3_384_CTX*)&ctx->hash);
+ if (s) *s = WC_SHA3_384_DIGEST_SIZE;
+ break;
+ #ifndef WOLFSSL_NOSHA3_512
+ case WC_HASH_TYPE_SHA3_512:
+ wolfSSL_SHA3_512_Final(md, (SHA3_512_CTX*)&ctx->hash);
+ if (s) *s = WC_SHA3_512_DIGEST_SIZE;
+ break;
+ #endif
+ #endif
+ default:
+ return WOLFSSL_FAILURE;
+ }
+
+ return WOLFSSL_SUCCESS;
+ }
+
+ /* WOLFSSL_SUCCESS on ok */
+ int wolfSSL_EVP_DigestFinal_ex(WOLFSSL_EVP_MD_CTX* ctx, unsigned char* md,
+ unsigned int* s)
+ {
+ WOLFSSL_ENTER("EVP_DigestFinal_ex");
+ return EVP_DigestFinal(ctx, md, s);
+ }
+
+ void wolfSSL_EVP_cleanup(void)
+ {
+ /* nothing to do here */
+ }
+
+const WOLFSSL_EVP_MD* wolfSSL_EVP_get_digestbynid(int id)
+{
+ WOLFSSL_MSG("wolfSSL_get_digestbynid");
+
+ switch(id) {
+#ifndef NO_MD5
+ case NID_md5:
+ return wolfSSL_EVP_md5();
+#endif
+#ifndef NO_SHA
+ case NID_sha1:
+ return wolfSSL_EVP_sha1();
+#endif
+ default:
+ WOLFSSL_MSG("Bad digest id value");
+ }
+
+ return NULL;
+}
+
+#ifndef NO_RSA
+WOLFSSL_RSA* wolfSSL_EVP_PKEY_get0_RSA(WOLFSSL_EVP_PKEY *pkey)
+{
+ if (!pkey) {
+ return NULL;
+ }
+ return pkey->rsa;
+}
+
+WOLFSSL_RSA* wolfSSL_EVP_PKEY_get1_RSA(WOLFSSL_EVP_PKEY* key)
+{
+ WOLFSSL_RSA* local;
+
+ WOLFSSL_MSG("wolfSSL_EVP_PKEY_get1_RSA");
+
+ if (key == NULL) {
+ return NULL;
+ }
+
+ local = wolfSSL_RSA_new();
+ if (local == NULL) {
+ WOLFSSL_MSG("Error creating a new WOLFSSL_RSA structure");
+ return NULL;
+ }
+
+ if (key->type == EVP_PKEY_RSA) {
+ if (wolfSSL_RSA_LoadDer(local, (const unsigned char*)key->pkey.ptr,
+ key->pkey_sz) != SSL_SUCCESS) {
+ /* now try public key */
+ if (wolfSSL_RSA_LoadDer_ex(local,
+ (const unsigned char*)key->pkey.ptr, key->pkey_sz,
+ WOLFSSL_RSA_LOAD_PUBLIC) != SSL_SUCCESS) {
+ wolfSSL_RSA_free(local);
+ local = NULL;
+ }
+ }
+ }
+ else {
+ WOLFSSL_MSG("WOLFSSL_EVP_PKEY does not hold an RSA key");
+ wolfSSL_RSA_free(local);
+ local = NULL;
+ }
+ return local;
+}
+
+/* with set1 functions the pkey struct does not own the RSA structure
+ *
+ * returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure
+ */
+int wolfSSL_EVP_PKEY_set1_RSA(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_RSA *key)
+{
+#if defined(WOLFSSL_KEY_GEN) && !defined(HAVE_USER_RSA)
+ int derMax = 0;
+ int derSz = 0;
+ byte* derBuf = NULL;
+ RsaKey* rsa = NULL;
+#endif
+ WOLFSSL_ENTER("wolfSSL_EVP_PKEY_set1_RSA");
+ if ((pkey == NULL) || (key == NULL))
+ return WOLFSSL_FAILURE;
+
+ if (pkey->rsa != NULL && pkey->ownRsa == 1) {
+ wolfSSL_RSA_free(pkey->rsa);
+ }
+ pkey->rsa = key;
+ pkey->ownRsa = 0; /* pkey does not own RSA */
+ pkey->type = EVP_PKEY_RSA;
+ if (key->inSet == 0) {
+ if (SetRsaInternal(key) != WOLFSSL_SUCCESS) {
+ WOLFSSL_MSG("SetRsaInternal failed");
+ return WOLFSSL_FAILURE;
+ }
+ }
+
+#if defined(WOLFSSL_KEY_GEN) && !defined(HAVE_USER_RSA)
+ rsa = (RsaKey*)key->internal;
+ /* 5 > size of n, d, p, q, d%(p-1), d(q-1), 1/q%p, e + ASN.1 additional
+ * information */
+ derMax = 5 * wolfSSL_RSA_size(key) + (2 * AES_BLOCK_SIZE);
+
+ derBuf = (byte*)XMALLOC(derMax, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (derBuf == NULL) {
+ WOLFSSL_MSG("malloc failed");
+ return WOLFSSL_FAILURE;
+ }
+
+ if (rsa->type == RSA_PRIVATE) {
+ /* Private key to DER */
+ derSz = wc_RsaKeyToDer(rsa, derBuf, derMax);
+ }
+ else {
+ /* Public key to DER */
+ derSz = wc_RsaKeyToPublicDer(rsa, derBuf, derMax);
+ }
+
+ if (derSz < 0) {
+ if (rsa->type == RSA_PRIVATE) {
+ WOLFSSL_MSG("wc_RsaKeyToDer failed");
+ }
+ else {
+ WOLFSSL_MSG("wc_RsaKeyToPublicDer failed");
+ }
+ XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return WOLFSSL_FAILURE;
+ }
+
+ pkey->pkey.ptr = (char*)XMALLOC(derSz, pkey->heap, DYNAMIC_TYPE_DER);
+ if (pkey->pkey.ptr == NULL) {
+ WOLFSSL_MSG("key malloc failed");
+ XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return WOLFSSL_FAILURE;
+ }
+ pkey->pkey_sz = derSz;
+ XMEMCPY(pkey->pkey.ptr, derBuf, derSz);
+ XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif /* WOLFSSL_KEY_GEN && !HAVE_USER_RSA */
+
+#ifdef WC_RSA_BLINDING
+ if (key->ownRng == 0) {
+ if (wc_RsaSetRNG((RsaKey*)(pkey->rsa->internal), &(pkey->rng)) != 0) {
+ WOLFSSL_MSG("Error setting RSA rng");
+ return WOLFSSL_FAILURE;
+ }
+ }
+#endif
+ return WOLFSSL_SUCCESS;
+}
+#endif /* !NO_RSA */
+
+#if !defined (NO_DSA) && !defined(HAVE_SELFTEST) && defined(WOLFSSL_KEY_GEN)
+/* with set1 functions the pkey struct does not own the DSA structure
+ *
+ * returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure
+ */
+int wolfSSL_EVP_PKEY_set1_DSA(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_DSA *key)
+{
+ int derMax = 0;
+ int derSz = 0;
+ DsaKey* dsa = NULL;
+ byte* derBuf = NULL;
+
+ WOLFSSL_ENTER("wolfSSL_EVP_PKEY_set1_DSA");
+
+ if((pkey == NULL) || (key == NULL))return WOLFSSL_FAILURE;
+ if (pkey->dsa != NULL && pkey->ownDsa == 1) {
+ wolfSSL_DSA_free(pkey->dsa);
+ }
+ pkey->dsa = key;
+ pkey->ownDsa = 0; /* pkey does not own DSA */
+ pkey->type = EVP_PKEY_DSA;
+ if (key->inSet == 0) {
+ if (SetDsaInternal(key) != WOLFSSL_SUCCESS) {
+ WOLFSSL_MSG("SetDsaInternal failed");
+ return WOLFSSL_FAILURE;
+ }
+ }
+ dsa = (DsaKey*)key->internal;
+
+ /* 4 > size of pub, priv, p, q, g + ASN.1 additional information */
+ derMax = 4 * wolfSSL_BN_num_bytes(key->g) + AES_BLOCK_SIZE;
+
+ derBuf = (byte*)XMALLOC(derMax, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (derBuf == NULL) {
+ WOLFSSL_MSG("malloc failed");
+ return WOLFSSL_FAILURE;
+ }
+
+ if (dsa->type == DSA_PRIVATE) {
+ /* Private key to DER */
+ derSz = wc_DsaKeyToDer(dsa, derBuf, derMax);
+ }
+ else {
+ /* Public key to DER */
+ derSz = wc_DsaKeyToPublicDer(dsa, derBuf, derMax);
+ }
+
+ if (derSz < 0) {
+ if (dsa->type == DSA_PRIVATE) {
+ WOLFSSL_MSG("wc_DsaKeyToDer failed");
+ }
+ else {
+ WOLFSSL_MSG("wc_DsaKeyToPublicDer failed");
+ }
+ XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return WOLFSSL_FAILURE;
+ }
+
+ pkey->pkey.ptr = (char*)XMALLOC(derSz, pkey->heap, DYNAMIC_TYPE_DER);
+ if (pkey->pkey.ptr == NULL) {
+ WOLFSSL_MSG("key malloc failed");
+ XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return WOLFSSL_FAILURE;
+ }
+ pkey->pkey_sz = derSz;
+ XMEMCPY(pkey->pkey.ptr, derBuf, derSz);
+ XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+ return WOLFSSL_SUCCESS;
+}
+
+WOLFSSL_DSA* wolfSSL_EVP_PKEY_get1_DSA(WOLFSSL_EVP_PKEY* key)
+{
+ WOLFSSL_DSA* local;
+
+ WOLFSSL_ENTER("wolfSSL_EVP_PKEY_get1_DSA");
+
+ if (key == NULL) {
+ WOLFSSL_MSG("Bad function argument");
+ return NULL;
+ }
+
+ local = wolfSSL_DSA_new();
+ if (local == NULL) {
+ WOLFSSL_MSG("Error creating a new WOLFSSL_DSA structure");
+ return NULL;
+ }
+
+ if (key->type == EVP_PKEY_DSA) {
+ if (wolfSSL_DSA_LoadDer(local, (const unsigned char*)key->pkey.ptr,
+ key->pkey_sz) != SSL_SUCCESS) {
+ /* now try public key */
+ if (wolfSSL_DSA_LoadDer_ex(local,
+ (const unsigned char*)key->pkey.ptr, key->pkey_sz,
+ WOLFSSL_DSA_LOAD_PUBLIC) != SSL_SUCCESS) {
+ wolfSSL_DSA_free(local);
+ local = NULL;
+ }
+ }
+ }
+ else {
+ WOLFSSL_MSG("WOLFSSL_EVP_PKEY does not hold a DSA key");
+ wolfSSL_DSA_free(local);
+ local = NULL;
+ }
+ return local;
+}
+#endif /* !NO_DSA && !HAVE_SELFTEST && WOLFSSL_KEY_GEN */
+
+#ifdef HAVE_ECC
+WOLFSSL_EC_KEY *wolfSSL_EVP_PKEY_get0_EC_KEY(WOLFSSL_EVP_PKEY *pkey)
+{
+ WOLFSSL_EC_KEY *eckey = NULL;
+ if (pkey) {
+#ifdef HAVE_ECC
+ eckey = pkey->ecc;
+#endif
+ }
+ return eckey;
+}
+
+WOLFSSL_EC_KEY* wolfSSL_EVP_PKEY_get1_EC_KEY(WOLFSSL_EVP_PKEY* key)
+{
+ WOLFSSL_EC_KEY* local;
+ WOLFSSL_ENTER("wolfSSL_EVP_PKEY_get1_EC_KEY");
+
+ if (key == NULL) {
+ return NULL;
+ }
+
+ local = wolfSSL_EC_KEY_new();
+ if (local == NULL) {
+ WOLFSSL_MSG("Error creating a new WOLFSSL_EC_KEY structure");
+ return NULL;
+ }
+
+ if (key->type == EVP_PKEY_EC) {
+ if (wolfSSL_EC_KEY_LoadDer(local, (const unsigned char*)key->pkey.ptr,
+ key->pkey_sz) != SSL_SUCCESS) {
+ /* now try public key */
+ if (wolfSSL_EC_KEY_LoadDer_ex(local,
+ (const unsigned char*)key->pkey.ptr,
+ key->pkey_sz, WOLFSSL_EC_KEY_LOAD_PUBLIC) != SSL_SUCCESS) {
+
+ wolfSSL_EC_KEY_free(local);
+ local = NULL;
+ }
+ }
+ }
+ else {
+ WOLFSSL_MSG("WOLFSSL_EVP_PKEY does not hold an EC key");
+ wolfSSL_EC_KEY_free(local);
+ local = NULL;
+ }
+#ifdef OPENSSL_ALL
+ if (!local && key->ecc) {
+ local = wolfSSL_EC_KEY_dup(key->ecc);
+ }
+#endif
+ return local;
+}
+#endif /* HAVE_ECC */
+
+#if defined(OPENSSL_ALL) || defined(WOLFSSL_QT)
+#if !defined(NO_DH) && !defined(NO_FILESYSTEM)
+/* with set1 functions the pkey struct does not own the DH structure
+ * Build the following DH Key format from the passed in WOLFSSL_DH
+ * then store in WOLFSSL_EVP_PKEY in DER format.
+ *
+ * returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure
+ */
+int wolfSSL_EVP_PKEY_set1_DH(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_DH *key)
+{
+ byte havePublic = 0, havePrivate = 0;
+ int ret;
+ word32 derSz = 0;
+ byte* derBuf = NULL;
+ DhKey* dhkey = NULL;
+
+ WOLFSSL_ENTER("wolfSSL_EVP_PKEY_set1_DH");
+
+ if (pkey == NULL || key == NULL)
+ return WOLFSSL_FAILURE;
+
+ if (pkey->dh != NULL && pkey->ownDh == 1)
+ wolfSSL_DH_free(pkey->dh);
+
+ pkey->dh = key;
+ pkey->ownDh = 0; /* pkey does not own DH */
+ pkey->type = EVP_PKEY_DH;
+ if (key->inSet == 0) {
+ if (SetDhInternal(key) != WOLFSSL_SUCCESS) {
+ WOLFSSL_MSG("SetDhInternal failed");
+ return WOLFSSL_FAILURE;
+ }
+ }
+
+ dhkey = (DhKey*)key->internal;
+
+ havePublic = mp_unsigned_bin_size(&dhkey->pub) > 0;
+ havePrivate = mp_unsigned_bin_size(&dhkey->priv) > 0;
+
+ /* Get size of DER buffer only */
+ if (havePublic && !havePrivate) {
+ ret = wc_DhPubKeyToDer(dhkey, NULL, &derSz);
+ } else if (havePrivate && !havePublic) {
+ ret = wc_DhPrivKeyToDer(dhkey, NULL, &derSz);
+ } else {
+ ret = wc_DhParamsToDer(dhkey,NULL,&derSz);
+ }
+
+ if (derSz <= 0 || ret != LENGTH_ONLY_E) {
+ WOLFSSL_MSG("Failed to get size of DH Key");
+ return WOLFSSL_FAILURE;
+ }
+
+ derBuf = (byte*)XMALLOC(derSz, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (derBuf == NULL) {
+ WOLFSSL_MSG("malloc failed");
+ return WOLFSSL_FAILURE;
+ }
+
+ /* Fill DER buffer */
+ if (havePublic && !havePrivate) {
+ ret = wc_DhPubKeyToDer(dhkey, derBuf, &derSz);
+ } else if (havePrivate && !havePublic) {
+ ret = wc_DhPrivKeyToDer(dhkey, derBuf, &derSz);
+ } else {
+ ret = wc_DhParamsToDer(dhkey,derBuf,&derSz);
+ }
+
+ if (ret <= 0) {
+ WOLFSSL_MSG("Failed to export DH Key");
+ XFREE(derBuf, pkey->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return WOLFSSL_FAILURE;
+ }
+
+ /* Store DH key into pkey (DER format) */
+ pkey->pkey.ptr = (char*)derBuf;
+ pkey->pkey_sz = derSz;
+
+ return WOLFSSL_SUCCESS;
+}
+
+WOLFSSL_DH* wolfSSL_EVP_PKEY_get0_DH(WOLFSSL_EVP_PKEY* key)
+{
+ if (!key) {
+ return NULL;
+ }
+ return key->dh;
+}
+
+WOLFSSL_DH* wolfSSL_EVP_PKEY_get1_DH(WOLFSSL_EVP_PKEY* key)
+{
+ WOLFSSL_DH* local = NULL;
+
+ WOLFSSL_ENTER("wolfSSL_EVP_PKEY_get1_DH");
+
+ if (key == NULL || key->dh == NULL) {
+ WOLFSSL_MSG("Bad function argument");
+ return NULL;
+ }
+
+ if (key->type == EVP_PKEY_DH) {
+ local = wolfSSL_DH_new();
+ if (local == NULL) {
+ WOLFSSL_MSG("Error creating a new WOLFSSL_DH structure");
+ return NULL;
+ }
+
+ if (wolfSSL_DH_LoadDer(local, (const unsigned char*)key->pkey.ptr,
+ key->pkey_sz) != SSL_SUCCESS) {
+ wolfSSL_DH_free(local);
+ WOLFSSL_MSG("Error wolfSSL_DH_LoadDer");
+ local = NULL;
+ }
+ }
+ else {
+ WOLFSSL_MSG("WOLFSSL_EVP_PKEY does not hold a DH key");
+ wolfSSL_DH_free(local);
+ return NULL;
+ }
+
+ return local;
+}
+#endif /* NO_DH && NO_FILESYSTEM */
+
+int wolfSSL_EVP_PKEY_assign(WOLFSSL_EVP_PKEY *pkey, int type, void *key)
+{
+ int ret;
+
+ WOLFSSL_ENTER("wolfSSL_EVP_PKEY_assign");
+
+ /* pkey and key checked if NULL in subsequent assign functions */
+ switch(type) {
+ #ifndef NO_RSA
+ case EVP_PKEY_RSA:
+ ret = wolfSSL_EVP_PKEY_assign_RSA(pkey, (WOLFSSL_RSA*)key);
+ break;
+ #endif
+ #ifndef NO_DSA
+ case EVP_PKEY_DSA:
+ ret = wolfSSL_EVP_PKEY_assign_DSA(pkey, (WOLFSSL_DSA*)key);
+ break;
+ #endif
+ #ifdef HAVE_ECC
+ case EVP_PKEY_EC:
+ ret = wolfSSL_EVP_PKEY_assign_EC_KEY(pkey, (WOLFSSL_EC_KEY*)key);
+ break;
+ #endif
+ #ifdef NO_DH
+ case EVP_PKEY_DH:
+ ret = wolfSSL_EVP_PKEY_assign_DH(pkey, (WOLFSSL_DH*)key);
+ break;
+ #endif
+ default:
+ WOLFSSL_MSG("Unknown EVP_PKEY type in wolfSSL_EVP_PKEY_assign.");
+ ret = WOLFSSL_FAILURE;
+ }
+
+ return ret;
+}
+#endif /* WOLFSSL_QT || OPENSSL_ALL */
+
+#if defined(HAVE_ECC)
+/* try and populate public pkey_sz and pkey.ptr */
+static void ECC_populate_EVP_PKEY(EVP_PKEY* pkey, ecc_key* ecc)
+{
+ int ret;
+ if (!pkey || !ecc)
+ return;
+ if ((ret = wc_EccPublicKeyDerSize(ecc, 1)) > 0) {
+ int derSz = ret;
+ char* derBuf = (char*)XMALLOC(derSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (derBuf) {
+ ret = wc_EccPublicKeyToDer(ecc, (byte*)derBuf, derSz, 1);
+ if (ret >= 0) {
+ if (pkey->pkey.ptr) {
+ XFREE(pkey->pkey.ptr, NULL, DYNAMIC_TYPE_OPENSSL);
+ }
+ pkey->pkey_sz = ret;
+ pkey->pkey.ptr = derBuf;
+ }
+ else { /* failure - okay to ignore */
+ XFREE(derBuf, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ derBuf = NULL;
+ }
+ }
+ }
+}
+
+WOLFSSL_API int wolfSSL_EVP_PKEY_set1_EC_KEY(WOLFSSL_EVP_PKEY *pkey, WOLFSSL_EC_KEY *key)
+{
+#ifdef HAVE_ECC
+ if((pkey == NULL) || (key ==NULL))return WOLFSSL_FAILURE;
+ WOLFSSL_ENTER("wolfSSL_EVP_PKEY_set1_EC_KEY");
+#ifndef NO_RSA
+ if (pkey->rsa != NULL && pkey->ownRsa == 1) {
+ wolfSSL_RSA_free(pkey->rsa);
+ }
+ pkey->ownRsa = 0;
+#endif
+#ifndef NO_DSA
+ if (pkey->dsa != NULL && pkey->ownDsa == 1) {
+ wolfSSL_DSA_free(pkey->dsa);
+ }
+ pkey->ownDsa = 0;
+#endif
+#ifndef NO_DH
+ if (pkey->dh != NULL && pkey->ownDh == 1) {
+ wolfSSL_DH_free(pkey->dh);
+ }
+ pkey->ownDh = 0;
+#endif
+ if (pkey->ecc != NULL && pkey->ownEcc == 1) {
+ wolfSSL_EC_KEY_free(pkey->ecc);
+ }
+ pkey->ecc = key;
+ pkey->ownEcc = 0; /* pkey does not own EC key */
+ pkey->type = EVP_PKEY_EC;
+ ECC_populate_EVP_PKEY(pkey, (ecc_key*)key->internal);
+ return WOLFSSL_SUCCESS;
+#else
+ (void)pkey;
+ (void)key;
+ return WOLFSSL_FAILURE;
+#endif
+}
+
+void* wolfSSL_EVP_X_STATE(const WOLFSSL_EVP_CIPHER_CTX* ctx)
+{
+ WOLFSSL_MSG("wolfSSL_EVP_X_STATE");
+
+ if (ctx) {
+ switch (ctx->cipherType) {
+ case ARC4_TYPE:
+ WOLFSSL_MSG("returning arc4 state");
+ return (void*)&ctx->cipher.arc4.x;
+
+ default:
+ WOLFSSL_MSG("bad x state type");
+ return 0;
+ }
+ }
+
+ return NULL;
+}
+int wolfSSL_EVP_PKEY_assign_EC_KEY(EVP_PKEY* pkey, WOLFSSL_EC_KEY* key)
+{
+ if (pkey == NULL || key == NULL)
+ return WOLFSSL_FAILURE;
+
+ pkey->type = EVP_PKEY_EC;
+ pkey->ecc = key;
+ pkey->ownEcc = 1;
+
+ /* try and populate public pkey_sz and pkey.ptr */
+ ECC_populate_EVP_PKEY(pkey, (ecc_key*)key->internal);
+
+ return WOLFSSL_SUCCESS;
+}
+#endif /* HAVE_ECC */
+
+#ifndef NO_WOLFSSL_STUB
+const WOLFSSL_EVP_MD* wolfSSL_EVP_ripemd160(void)
+{
+ WOLFSSL_MSG("wolfSSL_ripemd160");
+ WOLFSSL_STUB("EVP_ripemd160");
+ return NULL;
+}
+#endif
+
+
+int wolfSSL_EVP_MD_block_size(const WOLFSSL_EVP_MD* type)
+{
+ WOLFSSL_MSG("wolfSSL_EVP_MD_block_size");
+
+ if (type == NULL) {
+ WOLFSSL_MSG("No md type arg");
+ return BAD_FUNC_ARG;
+ }
+
+ if (XSTRNCMP(type, "SHA256", 6) == 0) {
+ return WC_SHA256_BLOCK_SIZE;
+ }
+#ifndef NO_MD5
+ else if (XSTRNCMP(type, "MD5", 3) == 0) {
+ return WC_MD5_BLOCK_SIZE;
+ }
+#endif
+#ifdef WOLFSSL_SHA224
+ else if (XSTRNCMP(type, "SHA224", 6) == 0) {
+ return WC_SHA224_BLOCK_SIZE;
+ }
+#endif
+#ifdef WOLFSSL_SHA384
+ else if (XSTRNCMP(type, "SHA384", 6) == 0) {
+ return WC_SHA384_BLOCK_SIZE;
+ }
+#endif
+#ifdef WOLFSSL_SHA512
+ else if (XSTRNCMP(type, "SHA512", 6) == 0) {
+ return WC_SHA512_BLOCK_SIZE;
+ }
+#endif
+#ifndef NO_SHA
+ /* has to be last since would pick or 256, 384, or 512 too */
+ else if (XSTRNCMP(type, "SHA", 3) == 0) {
+ return WC_SHA_BLOCK_SIZE;
+ }
+#endif
+
+ return BAD_FUNC_ARG;
+}
+
+int wolfSSL_EVP_MD_size(const WOLFSSL_EVP_MD* type)
+{
+ WOLFSSL_MSG("wolfSSL_EVP_MD_size");
+
+ if (type == NULL) {
+ WOLFSSL_MSG("No md type arg");
+ return BAD_FUNC_ARG;
+ }
+
+ if (XSTRNCMP(type, "SHA256", 6) == 0) {
+ return WC_SHA256_DIGEST_SIZE;
+ }
+#ifndef NO_MD5
+ else if (XSTRNCMP(type, "MD5", 3) == 0) {
+ return WC_MD5_DIGEST_SIZE;
+ }
+#endif
+#ifdef WOLFSSL_SHA224
+ else if (XSTRNCMP(type, "SHA224", 6) == 0) {
+ return WC_SHA224_DIGEST_SIZE;
+ }
+#endif
+#ifdef WOLFSSL_SHA384
+ else if (XSTRNCMP(type, "SHA384", 6) == 0) {
+ return WC_SHA384_DIGEST_SIZE;
+ }
+#endif
+#ifdef WOLFSSL_SHA512
+ else if (XSTRNCMP(type, "SHA512", 6) == 0) {
+ return WC_SHA512_DIGEST_SIZE;
+ }
+#endif
+#ifndef NO_SHA
+ /* has to be last since would pick or 256, 384, or 512 too */
+ else if (XSTRNCMP(type, "SHA", 3) == 0) {
+ return WC_SHA_DIGEST_SIZE;
+ }
+#endif
+
+ return BAD_FUNC_ARG;
+}
+
+
+int wolfSSL_EVP_CIPHER_CTX_iv_length(const WOLFSSL_EVP_CIPHER_CTX* ctx)
+{
+ WOLFSSL_MSG("wolfSSL_EVP_CIPHER_CTX_iv_length");
+
+ switch (ctx->cipherType) {
+
+#ifdef HAVE_AES_CBC
+ case AES_128_CBC_TYPE :
+ case AES_192_CBC_TYPE :
+ case AES_256_CBC_TYPE :
+ WOLFSSL_MSG("AES CBC");
+ return AES_BLOCK_SIZE;
+#endif
+#ifdef HAVE_AESGCM
+ case AES_128_GCM_TYPE :
+ case AES_192_GCM_TYPE :
+ case AES_256_GCM_TYPE :
+ WOLFSSL_MSG("AES GCM");
+ return GCM_NONCE_MID_SZ;
+#endif
+#ifdef WOLFSSL_AES_COUNTER
+ case AES_128_CTR_TYPE :
+ case AES_192_CTR_TYPE :
+ case AES_256_CTR_TYPE :
+ WOLFSSL_MSG("AES CTR");
+ return AES_BLOCK_SIZE;
+#endif
+#ifndef NO_DES3
+ case DES_CBC_TYPE :
+ WOLFSSL_MSG("DES CBC");
+ return DES_BLOCK_SIZE;
+
+ case DES_EDE3_CBC_TYPE :
+ WOLFSSL_MSG("DES EDE3 CBC");
+ return DES_BLOCK_SIZE;
+#endif
+#ifdef HAVE_IDEA
+ case IDEA_CBC_TYPE :
+ WOLFSSL_MSG("IDEA CBC");
+ return IDEA_BLOCK_SIZE;
+#endif
+#ifndef NO_RC4
+ case ARC4_TYPE :
+ WOLFSSL_MSG("ARC4");
+ return 0;
+#endif
+#ifdef WOLFSSL_AES_CFB
+#if !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS)
+ case AES_128_CFB1_TYPE:
+ case AES_192_CFB1_TYPE:
+ case AES_256_CFB1_TYPE:
+ WOLFSSL_MSG("AES CFB1");
+ return AES_BLOCK_SIZE;
+ case AES_128_CFB8_TYPE:
+ case AES_192_CFB8_TYPE:
+ case AES_256_CFB8_TYPE:
+ WOLFSSL_MSG("AES CFB8");
+ return AES_BLOCK_SIZE;
+#endif /* !HAVE_SELFTEST && !HAVE_FIPS */
+ case AES_128_CFB128_TYPE:
+ case AES_192_CFB128_TYPE:
+ case AES_256_CFB128_TYPE:
+ WOLFSSL_MSG("AES CFB128");
+ return AES_BLOCK_SIZE;
+#endif /* WOLFSSL_AES_CFB */
+#if defined(WOLFSSL_AES_OFB)
+ case AES_128_OFB_TYPE:
+ case AES_192_OFB_TYPE:
+ case AES_256_OFB_TYPE:
+ WOLFSSL_MSG("AES OFB");
+ return AES_BLOCK_SIZE;
+#endif /* WOLFSSL_AES_OFB */
+#ifdef WOLFSSL_AES_XTS
+ case AES_128_XTS_TYPE:
+ case AES_256_XTS_TYPE:
+ WOLFSSL_MSG("AES XTS");
+ return AES_BLOCK_SIZE;
+#endif /* WOLFSSL_AES_XTS */
+
+ case NULL_CIPHER_TYPE :
+ WOLFSSL_MSG("NULL");
+ return 0;
+
+ default: {
+ WOLFSSL_MSG("bad type");
+ }
+ }
+ return 0;
+}
+
+int wolfSSL_EVP_CIPHER_iv_length(const WOLFSSL_EVP_CIPHER* cipher)
+{
+ const char *name = (const char *)cipher;
+ WOLFSSL_MSG("wolfSSL_EVP_CIPHER_iv_length");
+
+#ifndef NO_AES
+#ifdef HAVE_AES_CBC
+ #ifdef WOLFSSL_AES_128
+ if (EVP_AES_128_CBC && XSTRNCMP(name, EVP_AES_128_CBC, XSTRLEN(EVP_AES_128_CBC)) == 0)
+ return AES_BLOCK_SIZE;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ if (EVP_AES_192_CBC && XSTRNCMP(name, EVP_AES_192_CBC, XSTRLEN(EVP_AES_192_CBC)) == 0)
+ return AES_BLOCK_SIZE;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ if (EVP_AES_256_CBC && XSTRNCMP(name, EVP_AES_256_CBC, XSTRLEN(EVP_AES_256_CBC)) == 0)
+ return AES_BLOCK_SIZE;
+ #endif
+#endif /* HAVE_AES_CBC */
+#ifdef HAVE_AESGCM
+ #ifdef WOLFSSL_AES_128
+ if (EVP_AES_128_GCM && XSTRNCMP(name, EVP_AES_128_GCM, XSTRLEN(EVP_AES_128_GCM)) == 0)
+ return GCM_NONCE_MID_SZ;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ if (EVP_AES_192_GCM && XSTRNCMP(name, EVP_AES_192_GCM, XSTRLEN(EVP_AES_192_GCM)) == 0)
+ return GCM_NONCE_MID_SZ;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ if (EVP_AES_256_GCM && XSTRNCMP(name, EVP_AES_256_GCM, XSTRLEN(EVP_AES_256_GCM)) == 0)
+ return GCM_NONCE_MID_SZ;
+ #endif
+#endif /* HAVE_AESGCM */
+#ifdef WOLFSSL_AES_COUNTER
+ #ifdef WOLFSSL_AES_128
+ if (EVP_AES_128_CTR && XSTRNCMP(name, EVP_AES_128_CTR, XSTRLEN(EVP_AES_128_CTR)) == 0)
+ return AES_BLOCK_SIZE;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ if (EVP_AES_192_CTR && XSTRNCMP(name, EVP_AES_192_CTR, XSTRLEN(EVP_AES_192_CTR)) == 0)
+ return AES_BLOCK_SIZE;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ if (EVP_AES_256_CTR && XSTRNCMP(name, EVP_AES_256_CTR, XSTRLEN(EVP_AES_256_CTR)) == 0)
+ return AES_BLOCK_SIZE;
+ #endif
+#endif
+#ifdef WOLFSSL_AES_XTS
+ #ifdef WOLFSSL_AES_128
+ if (EVP_AES_128_XTS && XSTRNCMP(name, EVP_AES_128_XTS, XSTRLEN(EVP_AES_128_XTS)) == 0)
+ return AES_BLOCK_SIZE;
+ #endif /* WOLFSSL_AES_128 */
+
+ #ifdef WOLFSSL_AES_256
+ if (EVP_AES_256_XTS && XSTRNCMP(name, EVP_AES_256_XTS, XSTRLEN(EVP_AES_256_XTS)) == 0)
+ return AES_BLOCK_SIZE;
+ #endif /* WOLFSSL_AES_256 */
+#endif /* WOLFSSL_AES_XTS */
+
+#endif
+
+#ifndef NO_DES3
+ if ((EVP_DES_CBC && XSTRNCMP(name, EVP_DES_CBC, XSTRLEN(EVP_DES_CBC)) == 0) ||
+ (EVP_DES_EDE3_CBC && XSTRNCMP(name, EVP_DES_EDE3_CBC, XSTRLEN(EVP_DES_EDE3_CBC)) == 0)) {
+ return DES_BLOCK_SIZE;
+ }
+#endif
+
+#ifdef HAVE_IDEA
+ if (EVP_IDEA_CBC && XSTRNCMP(name, EVP_IDEA_CBC, XSTRLEN(EVP_IDEA_CBC)) == 0)
+ return IDEA_BLOCK_SIZE;
+#endif
+
+ (void)name;
+
+ return 0;
+}
+
+
+int wolfSSL_EVP_X_STATE_LEN(const WOLFSSL_EVP_CIPHER_CTX* ctx)
+{
+ WOLFSSL_MSG("wolfSSL_EVP_X_STATE_LEN");
+
+ if (ctx) {
+ switch (ctx->cipherType) {
+ case ARC4_TYPE:
+ WOLFSSL_MSG("returning arc4 state size");
+ return sizeof(Arc4);
+
+ default:
+ WOLFSSL_MSG("bad x state type");
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+
+/* return of pkey->type which will be EVP_PKEY_RSA for example.
+ *
+ * type type of EVP_PKEY
+ *
+ * returns type or if type is not found then NID_undef
+ */
+int wolfSSL_EVP_PKEY_type(int type)
+{
+ WOLFSSL_MSG("wolfSSL_EVP_PKEY_type");
+
+ switch (type) {
+ case EVP_PKEY_RSA:
+ return EVP_PKEY_RSA;
+ case EVP_PKEY_DSA:
+ return EVP_PKEY_DSA;
+ case EVP_PKEY_EC:
+ return EVP_PKEY_EC;
+ case EVP_PKEY_DH:
+ return EVP_PKEY_DH;
+ default:
+ return NID_undef;
+ }
+}
+
+
+int wolfSSL_EVP_PKEY_id(const EVP_PKEY *pkey)
+{
+ if (pkey != NULL)
+ return pkey->type;
+ return 0;
+}
+
+
+int wolfSSL_EVP_PKEY_base_id(const EVP_PKEY *pkey)
+{
+ if (pkey == NULL)
+ return NID_undef;
+ return wolfSSL_EVP_PKEY_type(pkey->type);
+}
+
+
+/* increments ref count of WOLFSSL_EVP_PKEY. Return 1 on success, 0 on error */
+int wolfSSL_EVP_PKEY_up_ref(WOLFSSL_EVP_PKEY* pkey)
+{
+ if (pkey) {
+ if (wc_LockMutex(&pkey->refMutex) != 0) {
+ WOLFSSL_MSG("Failed to lock pkey mutex");
+ }
+ pkey->references++;
+ wc_UnLockMutex(&pkey->refMutex);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+#ifndef NO_RSA
+int wolfSSL_EVP_PKEY_assign_RSA(EVP_PKEY* pkey, WOLFSSL_RSA* key)
+{
+ if (pkey == NULL || key == NULL)
+ return WOLFSSL_FAILURE;
+
+ pkey->type = EVP_PKEY_RSA;
+ pkey->rsa = key;
+ pkey->ownRsa = 1;
+
+ /* try and populate public pkey_sz and pkey.ptr */
+ if (key->internal) {
+ RsaKey* rsa = (RsaKey*)key->internal;
+ int ret = wc_RsaPublicKeyDerSize(rsa, 1);
+ if (ret > 0) {
+ int derSz = ret;
+ char* derBuf = (char*)XMALLOC(derSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (derBuf) {
+ ret = wc_RsaKeyToPublicDer(rsa, (byte*)derBuf, derSz);
+ if (ret >= 0) {
+ pkey->pkey_sz = ret;
+ pkey->pkey.ptr = derBuf;
+ }
+ else { /* failure - okay to ignore */
+ XFREE(derBuf, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ derBuf = NULL;
+ }
+ }
+ }
+ }
+
+ return WOLFSSL_SUCCESS;
+}
+#endif /* !NO_RSA */
+
+#ifndef NO_DSA
+int wolfSSL_EVP_PKEY_assign_DSA(EVP_PKEY* pkey, WOLFSSL_DSA* key)
+{
+ if (pkey == NULL || key == NULL)
+ return WOLFSSL_FAILURE;
+
+ pkey->type = EVP_PKEY_DSA;
+ pkey->dsa = key;
+ pkey->ownDsa = 1;
+
+ return WOLFSSL_SUCCESS;
+}
+#endif /* !NO_DSA */
+
+#ifndef NO_DH
+int wolfSSL_EVP_PKEY_assign_DH(EVP_PKEY* pkey, WOLFSSL_DH* key)
+{
+ if (pkey == NULL || key == NULL)
+ return WOLFSSL_FAILURE;
+
+ pkey->type = EVP_PKEY_DH;
+ pkey->dh = key;
+ pkey->ownDh = 1;
+
+ return WOLFSSL_SUCCESS;
+}
+#endif /* !NO_DH */
+
+#endif /* OPENSSL_EXTRA */
+
+#if defined(OPENSSL_EXTRA_X509_SMALL)
+/* Subset of OPENSSL_EXTRA for PKEY operations PKEY free is needed by the
+ * subset of X509 API */
+
+WOLFSSL_EVP_PKEY* wolfSSL_EVP_PKEY_new(void){
+ return wolfSSL_EVP_PKEY_new_ex(NULL);
+}
+
+WOLFSSL_EVP_PKEY* wolfSSL_EVP_PKEY_new_ex(void* heap)
+{
+ WOLFSSL_EVP_PKEY* pkey;
+ int ret;
+ WOLFSSL_ENTER("wolfSSL_EVP_PKEY_new_ex");
+ pkey = (WOLFSSL_EVP_PKEY*)XMALLOC(sizeof(WOLFSSL_EVP_PKEY), heap,
+ DYNAMIC_TYPE_PUBLIC_KEY);
+ if (pkey != NULL) {
+ XMEMSET(pkey, 0, sizeof(WOLFSSL_EVP_PKEY));
+ pkey->heap = heap;
+ pkey->type = WOLFSSL_EVP_PKEY_DEFAULT;
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&pkey->rng, heap, INVALID_DEVID);
+#else
+ ret = wc_InitRng(&pkey->rng);
+#endif
+ if (ret != 0){
+ wolfSSL_EVP_PKEY_free(pkey);
+ WOLFSSL_MSG("memory failure");
+ return NULL;
+ }
+ pkey->references = 1;
+ wc_InitMutex(&pkey->refMutex);
+ }
+ else {
+ WOLFSSL_MSG("memory failure");
+ }
+
+ return pkey;
+}
+
+void wolfSSL_EVP_PKEY_free(WOLFSSL_EVP_PKEY* key)
+{
+ int doFree = 0;
+ WOLFSSL_ENTER("wolfSSL_EVP_PKEY_free");
+ if (key != NULL) {
+ if (wc_LockMutex(&key->refMutex) != 0) {
+ WOLFSSL_MSG("Couldn't lock pkey mutex");
+ }
+
+ /* only free if all references to it are done */
+ key->references--;
+ if (key->references == 0) {
+ doFree = 1;
+ }
+ wc_UnLockMutex(&key->refMutex);
+
+ if (doFree) {
+ wc_FreeRng(&key->rng);
+
+ if (key->pkey.ptr != NULL) {
+ XFREE(key->pkey.ptr, key->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+ key->pkey.ptr = NULL;
+ }
+ switch(key->type)
+ {
+ #ifndef NO_RSA
+ case EVP_PKEY_RSA:
+ if (key->rsa != NULL && key->ownRsa == 1) {
+ wolfSSL_RSA_free(key->rsa);
+ key->rsa = NULL;
+ }
+ break;
+ #endif /* NO_RSA */
+
+ #if defined(HAVE_ECC) && defined(OPENSSL_EXTRA)
+ case EVP_PKEY_EC:
+ if (key->ecc != NULL && key->ownEcc == 1) {
+ wolfSSL_EC_KEY_free(key->ecc);
+ key->ecc = NULL;
+ }
+ break;
+ #endif /* HAVE_ECC && OPENSSL_EXTRA */
+
+ #ifndef NO_DSA
+ case EVP_PKEY_DSA:
+ if (key->dsa != NULL && key->ownDsa == 1) {
+ wolfSSL_DSA_free(key->dsa);
+ key->dsa = NULL;
+ }
+ break;
+ #endif /* NO_DSA */
+
+ #if !defined(NO_DH) && (defined(WOLFSSL_QT) || defined(OPENSSL_ALL))
+ case EVP_PKEY_DH:
+ if (key->dh != NULL && key->ownDh == 1) {
+ wolfSSL_DH_free(key->dh);
+ key->dh = NULL;
+ }
+ break;
+ #endif /* ! NO_DH ... */
+
+ default:
+ break;
+ }
+
+ if (wc_FreeMutex(&key->refMutex) != 0) {
+ WOLFSSL_MSG("Couldn't free pkey mutex");
+ }
+ XFREE(key, key->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+ }
+ }
+}
+
+#endif /* OPENSSL_EXTRA_X509_SMALL */
+
+#endif /* WOLFSSL_EVP_INCLUDED */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_448.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_448.c
new file mode 100644
index 000000000..bc38c112f
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_448.c
@@ -0,0 +1,2458 @@
+/* fe_448.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work.
+ * Small implementation based on Daniel Beer's curve25519 public domain work.
+ * Reworked for curve448 by Sean Parkinson.
+ */
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#if defined(HAVE_CURVE448) || defined(HAVE_ED448)
+
+#include <wolfssl/wolfcrypt/fe_448.h>
+#include <stdint.h>
+
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+#if defined(CURVE448_SMALL) || defined(ED448_SMALL)
+
+/* Initialize the field element operations.
+ */
+void fe448_init(void)
+{
+}
+
+/* Normalize the field element.
+ * Ensure result is in range: 0..2^448-2^224-2
+ *
+ * a [in] Field element in range 0..2^448-1.
+ */
+void fe448_norm(uint8_t* a)
+{
+ int i;
+ int16_t c = 0;
+ int16_t o = 0;
+
+ for (i = 0; i < 56; i++) {
+ c += a[i];
+ if ((i == 0) || (i == 28))
+ c += 1;
+ c >>= 8;
+ }
+
+ for (i = 0; i < 56; i++) {
+ if ((i == 0) || (i == 28)) o += c;
+ o += a[i];
+ a[i] = (uint8_t)o;
+ o >>= 8;
+ }
+}
+
+/* Copy one field element into another: d = a.
+ *
+ * d [in] Destination field element.
+ * a [in] Source field element.
+ */
+void fe448_copy(uint8_t* d, const uint8_t* a)
+{
+ int i;
+ for (i = 0; i < 56; i++) {
+ d[i] = a[i];
+ }
+}
+
+/* Conditionally swap the elements.
+ * Constant time implementation.
+ *
+ * a [in] First field element.
+ * b [in] Second field element.
+ * c [in] Swap when 1. Valid values: 0, 1.
+ */
+static void fe448_cswap(uint8_t* a, uint8_t* b, int c)
+{
+ int i;
+ uint8_t mask = -(uint8_t)c;
+ uint8_t t[56];
+
+ for (i = 0; i < 56; i++)
+ t[i] = (a[i] ^ b[i]) & mask;
+ for (i = 0; i < 56; i++)
+ a[i] ^= t[i];
+ for (i = 0; i < 56; i++)
+ b[i] ^= t[i];
+}
+
+/* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
+ *
+ * r [in] Field element to hold sum.
+ * a [in] Field element to add.
+ * b [in] Field element to add.
+ */
+void fe448_add(uint8_t* r, const uint8_t* a, const uint8_t* b)
+{
+ int i;
+ int16_t c = 0;
+ int16_t o = 0;
+
+ for (i = 0; i < 56; i++) {
+ c += a[i];
+ c += b[i];
+ r[i] = (uint8_t)c;
+ c >>= 8;
+ }
+
+ for (i = 0; i < 56; i++) {
+ if ((i == 0) || (i == 28)) o += c;
+ o += r[i];
+ r[i] = (uint8_t)o;
+ o >>= 8;
+ }
+}
+
+/* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
+ *
+ * r [in] Field element to hold difference.
+ * a [in] Field element to subtract from.
+ * b [in] Field element to subtract.
+ */
+void fe448_sub(uint8_t* r, const uint8_t* a, const uint8_t* b)
+{
+ int i;
+ int16_t c = 0;
+ int16_t o = 0;
+
+ for (i = 0; i < 56; i++) {
+ if (i == 28)
+ c += 0x1fc;
+ else
+ c += 0x1fe;
+ c += a[i];
+ c -= b[i];
+ r[i] = (uint8_t)c;
+ c >>= 8;
+ }
+
+ for (i = 0; i < 56; i++) {
+ if ((i == 0) || (i == 28)) o += c;
+ o += r[i];
+ r[i] = (uint8_t)o;
+ o >>= 8;
+ }
+}
+
+/* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
+ *
+ * r [in] Field element to hold result.
+ * a [in] Field element to multiply.
+ */
+void fe448_mul39081(uint8_t* r, const uint8_t* a)
+{
+ int i;
+ int32_t c = 0;
+ int32_t o = 0;
+
+ for (i = 0; i < 56; i++) {
+ c += a[i] * (int32_t)39081;
+ r[i] = (uint8_t)c;
+ c >>= 8;
+ }
+
+ for (i = 0; i < 56; i++) {
+ if ((i == 0) || (i == 28)) o += c;
+ o += r[i];
+ r[i] = (uint8_t)o;
+ o >>= 8;
+ }
+}
+
+/* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
+ *
+ * r [in] Field element to hold result.
+ * a [in] Field element to multiply.
+ * b [in] Field element to multiply.
+ */
+void fe448_mul(uint8_t* r, const uint8_t* a, const uint8_t* b)
+{
+ int i, k;
+ int32_t c = 0;
+ int16_t o = 0, cc = 0;
+ uint8_t t[112];
+
+ for (k = 0; k < 56; k++) {
+ i = 0;
+ for (; i <= k; i++) {
+ c += (int32_t)a[i] * b[k - i];
+ }
+ t[k] = (uint8_t)c;
+ c >>= 8;
+ }
+ for (; k < 111; k++) {
+ i = k - 55;
+ for (; i < 56; i++) {
+ c += (int32_t)a[i] * b[k - i];
+ }
+ t[k] = (uint8_t)c;
+ c >>= 8;
+ }
+ t[k] = (uint8_t)c;
+
+ for (i = 0; i < 28; i++) {
+ o += t[i];
+ o += t[i + 56];
+ o += t[i + 84];
+ r[i] = (uint8_t)o;
+ o >>= 8;
+ }
+ for (i = 28; i < 56; i++) {
+ o += t[i];
+ o += t[i + 56];
+ o += t[i + 28];
+ o += t[i + 56];
+ r[i] = (uint8_t)o;
+ o >>= 8;
+ }
+ for (i = 0; i < 56; i++) {
+ if ((i == 0) || (i == 28)) cc += o;
+ cc += r[i];
+ r[i] = (uint8_t)cc;
+ cc >>= 8;
+ }
+}
+
+/* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
+ *
+ * r [in] Field element to hold result.
+ * a [in] Field element to square.
+ */
+void fe448_sqr(uint8_t* r, const uint8_t* a)
+{
+ int i, k;
+ int32_t c = 0;
+ int32_t p;
+ int16_t o = 0, cc = 0;
+ uint8_t t[112];
+
+ for (k = 0; k < 56; k++) {
+ i = 0;
+ for (; i <= k; i++) {
+ if (k - i < i)
+ break;
+ p = (int32_t)a[i] * a[k - i];
+ if (k - i != i)
+ p *= 2;
+ c += p;
+ }
+ t[k] = (uint8_t)c;
+ c >>= 8;
+ }
+ for (; k < 111; k++) {
+ i = k - 55;
+ for (; i < 56; i++) {
+ if (k - i < i)
+ break;
+ p = (int32_t)a[i] * a[k - i];
+ if (k - i != i)
+ p *= 2;
+ c += p;
+ }
+ t[k] = (uint8_t)c;
+ c >>= 8;
+ }
+ t[k] = (uint8_t)c;
+
+ for (i = 0; i < 28; i++) {
+ o += t[i];
+ o += t[i + 56];
+ o += t[i + 84];
+ r[i] = (uint8_t)o;
+ o >>= 8;
+ }
+ for (i = 28; i < 56; i++) {
+ o += t[i];
+ o += t[i + 56];
+ o += t[i + 28];
+ o += t[i + 56];
+ r[i] = (uint8_t)o;
+ o >>= 8;
+ }
+ for (i = 0; i < 56; i++) {
+ if ((i == 0) || (i == 28)) cc += o;
+ cc += r[i];
+ r[i] = (uint8_t)cc;
+ cc >>= 8;
+ }
+ fe448_norm(r);
+}
+
+/* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
+ * Constant time implementation - using Fermat's little theorem:
+ * a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
+ * For curve448: p - 2 = 2^448 - 2^224 - 3
+ *
+ * r [in] Field element to hold result.
+ * a [in] Field element to invert.
+ */
+void fe448_invert(uint8_t* r, const uint8_t* a)
+{
+ int i;
+ uint8_t t[56];
+
+ fe448_sqr(t, a);
+ fe448_mul(t, t, a);
+ for (i = 0; i < 221; i++) {
+ fe448_sqr(t, t);
+ fe448_mul(t, t, a);
+ }
+ fe448_sqr(t, t);
+ for (i = 0; i < 222; i++) {
+ fe448_sqr(t, t);
+ fe448_mul(t, t, a);
+ }
+ fe448_sqr(t, t);
+ fe448_sqr(t, t);
+ fe448_mul(r, t, a);
+}
+
+/* Scalar multiply the point by a number. r = n.a
+ * Uses Montogmery ladder and only requires the x-ordinate.
+ *
+ * r [in] Field element to hold result.
+ * n [in] Scalar as an array of bytes.
+ * a [in] Point to multiply - x-ordinate only.
+ */
+int curve448(byte* r, const byte* n, const byte* a)
+{
+ uint8_t x1[56];
+ uint8_t x2[56] = {1};
+ uint8_t z2[56] = {0};
+ uint8_t x3[56];
+ uint8_t z3[56] = {1};
+ uint8_t t0[56];
+ uint8_t t1[56];
+ int i;
+ unsigned int swap;
+ unsigned int b;
+
+ fe448_copy(x1, a);
+ fe448_copy(x3, a);
+
+ swap = 0;
+ for (i = 447; i >= 0; --i) {
+ b = (n[i >> 3] >> (i & 7)) & 1;
+ swap ^= b;
+ fe448_cswap(x2, x3, swap);
+ fe448_cswap(z2, z3, swap);
+ swap = b;
+
+ /* Montgomery Ladder - double and add */
+ fe448_add(t0, x2, z2);
+ fe448_add(t1, x3, z3);
+ fe448_sub(x2, x2, z2);
+ fe448_sub(x3, x3, z3);
+ fe448_mul(t1, t1, x2);
+ fe448_mul(z3, x3, t0);
+ fe448_sqr(t0, t0);
+ fe448_sqr(x2, x2);
+ fe448_add(x3, z3, t1);
+ fe448_sqr(x3, x3);
+ fe448_sub(z3, z3, t1);
+ fe448_sqr(z3, z3);
+ fe448_mul(z3, z3, x1);
+ fe448_sub(t1, t0, x2);
+ fe448_mul(x2, t0, x2);
+ fe448_mul39081(z2, t1);
+ fe448_add(z2, t0, z2);
+ fe448_mul(z2, z2, t1);
+ }
+ fe448_cswap(x2, x3, swap);
+ fe448_cswap(z2, z3, swap);
+
+ fe448_invert(z2, z2);
+ fe448_mul(r, x2, z2);
+ fe448_norm(r);
+
+ return 0;
+}
+
+#ifdef HAVE_ED448
+/* Check whether field element is not 0.
+ * Field element must have been normalized before call.
+ *
+ * a [in] Field element.
+ * returns 0 when zero, and any other value otherwise.
+ */
+int fe448_isnonzero(const uint8_t* a)
+{
+ int i;
+ uint8_t c = 0;
+ for (i = 0; i < 56; i++)
+ c |= a[i];
+ return c;
+}
+
+/* Negates the field element. r = -a mod (2^448 - 2^224 - 1)
+ * Add 0x200 to each element and subtract 2 from next.
+ * Top element overflow handled by subtracting 2 from index 0 and 28.
+ *
+ * r [in] Field element to hold result.
+ * a [in] Field element.
+ */
+void fe448_neg(uint8_t* r, const uint8_t* a)
+{
+ int i;
+ int16_t c = 0;
+ int16_t o = 0;
+
+ for (i = 0; i < 56; i++) {
+ if (i == 28)
+ c += 0x1fc;
+ else
+ c += 0x1fe;
+ c -= a[i];
+ r[i] = (uint8_t)c;
+ c >>= 8;
+ }
+
+ for (i = 0; i < 56; i++) {
+ if ((i == 0) || (i == 28)) o += c;
+ o += r[i];
+ r[i] = (uint8_t)o;
+ o >>= 8;
+ }
+}
+
+/* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
+ * Used for calcualting y-ordinate from x-ordinate for Ed448.
+ *
+ * r [in] Field element to hold result.
+ * a [in] Field element to exponentiate.
+ */
+void fe448_pow_2_446_222_1(uint8_t* r, const uint8_t* a)
+{
+ int i;
+ uint8_t t[56];
+
+ fe448_sqr(t, a);
+ fe448_mul(t, t, a);
+ for (i = 0; i < 221; i++) {
+ fe448_sqr(t, t);
+ fe448_mul(t, t, a);
+ }
+ fe448_sqr(t, t);
+ for (i = 0; i < 221; i++) {
+ fe448_sqr(t, t);
+ fe448_mul(t, t, a);
+ }
+ fe448_sqr(t, t);
+ fe448_mul(r, t, a);
+}
+
+/* Constant time, conditional move of b into a.
+ * a is not changed if the condition is 0.
+ *
+ * a A field element.
+ * b A field element.
+ * c If 1 then copy and if 0 then don't copy.
+ */
+void fe448_cmov(uint8_t* a, const uint8_t* b, int c)
+{
+ int i;
+ uint8_t m = -(uint8_t)c;
+ uint8_t t[56];
+
+ for (i = 0; i < 56; i++)
+ t[i] = m & (a[i] ^ b[i]);
+ for (i = 0; i < 56; i++)
+ a[i] ^= t[i];
+}
+
+#endif /* HAVE_ED448 */
+#elif defined(CURVED448_128BIT)
+
+/* Initialize the field element operations.
+ */
+void fe448_init(void)
+{
+}
+
+/* Convert the field element from a byte array to an array of 56-bits.
+ *
+ * r [in] Array to encode into.
+ * b [in] Byte array.
+ */
+void fe448_from_bytes(int64_t* r, const unsigned char* b)
+{
+ r[ 0] = ((int64_t) (b[ 0]) << 0)
+ | ((int64_t) (b[ 1]) << 8)
+ | ((int64_t) (b[ 2]) << 16)
+ | ((int64_t) (b[ 3]) << 24)
+ | ((int64_t) (b[ 4]) << 32)
+ | ((int64_t) (b[ 5]) << 40)
+ | ((int64_t) (b[ 6]) << 48);
+ r[ 1] = ((int64_t) (b[ 7]) << 0)
+ | ((int64_t) (b[ 8]) << 8)
+ | ((int64_t) (b[ 9]) << 16)
+ | ((int64_t) (b[10]) << 24)
+ | ((int64_t) (b[11]) << 32)
+ | ((int64_t) (b[12]) << 40)
+ | ((int64_t) (b[13]) << 48);
+ r[ 2] = ((int64_t) (b[14]) << 0)
+ | ((int64_t) (b[15]) << 8)
+ | ((int64_t) (b[16]) << 16)
+ | ((int64_t) (b[17]) << 24)
+ | ((int64_t) (b[18]) << 32)
+ | ((int64_t) (b[19]) << 40)
+ | ((int64_t) (b[20]) << 48);
+ r[ 3] = ((int64_t) (b[21]) << 0)
+ | ((int64_t) (b[22]) << 8)
+ | ((int64_t) (b[23]) << 16)
+ | ((int64_t) (b[24]) << 24)
+ | ((int64_t) (b[25]) << 32)
+ | ((int64_t) (b[26]) << 40)
+ | ((int64_t) (b[27]) << 48);
+ r[ 4] = ((int64_t) (b[28]) << 0)
+ | ((int64_t) (b[29]) << 8)
+ | ((int64_t) (b[30]) << 16)
+ | ((int64_t) (b[31]) << 24)
+ | ((int64_t) (b[32]) << 32)
+ | ((int64_t) (b[33]) << 40)
+ | ((int64_t) (b[34]) << 48);
+ r[ 5] = ((int64_t) (b[35]) << 0)
+ | ((int64_t) (b[36]) << 8)
+ | ((int64_t) (b[37]) << 16)
+ | ((int64_t) (b[38]) << 24)
+ | ((int64_t) (b[39]) << 32)
+ | ((int64_t) (b[40]) << 40)
+ | ((int64_t) (b[41]) << 48);
+ r[ 6] = ((int64_t) (b[42]) << 0)
+ | ((int64_t) (b[43]) << 8)
+ | ((int64_t) (b[44]) << 16)
+ | ((int64_t) (b[45]) << 24)
+ | ((int64_t) (b[46]) << 32)
+ | ((int64_t) (b[47]) << 40)
+ | ((int64_t) (b[48]) << 48);
+ r[ 7] = ((int64_t) (b[49]) << 0)
+ | ((int64_t) (b[50]) << 8)
+ | ((int64_t) (b[51]) << 16)
+ | ((int64_t) (b[52]) << 24)
+ | ((int64_t) (b[53]) << 32)
+ | ((int64_t) (b[54]) << 40)
+ | ((int64_t) (b[55]) << 48);
+}
+
+/* Convert the field element to a byte array from an array of 56-bits.
+ *
+ * b [in] Byte array.
+ * a [in] Array to encode into.
+ */
+void fe448_to_bytes(unsigned char* b, const int64_t* a)
+{
+ int128_t t;
+ /* Mod */
+ int64_t in0 = a[0];
+ int64_t in1 = a[1];
+ int64_t in2 = a[2];
+ int64_t in3 = a[3];
+ int64_t in4 = a[4];
+ int64_t in5 = a[5];
+ int64_t in6 = a[6];
+ int64_t in7 = a[7];
+ int64_t o = in7 >> 56;
+ in7 -= o << 56;
+ in0 += o;
+ in4 += o;
+ o = (in0 + 1) >> 56;
+ o = (o + in1) >> 56;
+ o = (o + in2) >> 56;
+ o = (o + in3) >> 56;
+ o = (o + in4 + 1) >> 56;
+ o = (o + in5) >> 56;
+ o = (o + in6) >> 56;
+ o = (o + in7) >> 56;
+ in0 += o;
+ in4 += o;
+ in7 -= o << 56;
+ o = in0 >> 56; in1 += o; t = o << 56; in0 -= t;
+ o = in1 >> 56; in2 += o; t = o << 56; in1 -= t;
+ o = in2 >> 56; in3 += o; t = o << 56; in2 -= t;
+ o = in3 >> 56; in4 += o; t = o << 56; in3 -= t;
+ o = in4 >> 56; in5 += o; t = o << 56; in4 -= t;
+ o = in5 >> 56; in6 += o; t = o << 56; in5 -= t;
+ o = in6 >> 56; in7 += o; t = o << 56; in6 -= t;
+ o = in7 >> 56; in0 += o;
+ in4 += o; t = o << 56; in7 -= t;
+
+ /* Output as bytes */
+ b[ 0] = (in0 >> 0);
+ b[ 1] = (in0 >> 8);
+ b[ 2] = (in0 >> 16);
+ b[ 3] = (in0 >> 24);
+ b[ 4] = (in0 >> 32);
+ b[ 5] = (in0 >> 40);
+ b[ 6] = (in0 >> 48);
+ b[ 7] = (in1 >> 0);
+ b[ 8] = (in1 >> 8);
+ b[ 9] = (in1 >> 16);
+ b[10] = (in1 >> 24);
+ b[11] = (in1 >> 32);
+ b[12] = (in1 >> 40);
+ b[13] = (in1 >> 48);
+ b[14] = (in2 >> 0);
+ b[15] = (in2 >> 8);
+ b[16] = (in2 >> 16);
+ b[17] = (in2 >> 24);
+ b[18] = (in2 >> 32);
+ b[19] = (in2 >> 40);
+ b[20] = (in2 >> 48);
+ b[21] = (in3 >> 0);
+ b[22] = (in3 >> 8);
+ b[23] = (in3 >> 16);
+ b[24] = (in3 >> 24);
+ b[25] = (in3 >> 32);
+ b[26] = (in3 >> 40);
+ b[27] = (in3 >> 48);
+ b[28] = (in4 >> 0);
+ b[29] = (in4 >> 8);
+ b[30] = (in4 >> 16);
+ b[31] = (in4 >> 24);
+ b[32] = (in4 >> 32);
+ b[33] = (in4 >> 40);
+ b[34] = (in4 >> 48);
+ b[35] = (in5 >> 0);
+ b[36] = (in5 >> 8);
+ b[37] = (in5 >> 16);
+ b[38] = (in5 >> 24);
+ b[39] = (in5 >> 32);
+ b[40] = (in5 >> 40);
+ b[41] = (in5 >> 48);
+ b[42] = (in6 >> 0);
+ b[43] = (in6 >> 8);
+ b[44] = (in6 >> 16);
+ b[45] = (in6 >> 24);
+ b[46] = (in6 >> 32);
+ b[47] = (in6 >> 40);
+ b[48] = (in6 >> 48);
+ b[49] = (in7 >> 0);
+ b[50] = (in7 >> 8);
+ b[51] = (in7 >> 16);
+ b[52] = (in7 >> 24);
+ b[53] = (in7 >> 32);
+ b[54] = (in7 >> 40);
+ b[55] = (in7 >> 48);
+}
+
+/* Set the field element to 0.
+ *
+ * a [in] Field element.
+ */
+void fe448_1(int64_t* a)
+{
+ a[0] = 1;
+ a[1] = 0;
+ a[2] = 0;
+ a[3] = 0;
+ a[4] = 0;
+ a[5] = 0;
+ a[6] = 0;
+ a[7] = 0;
+}
+
+/* Set the field element to 0.
+ *
+ * a [in] Field element.
+ */
+void fe448_0(int64_t* a)
+{
+ a[0] = 0;
+ a[1] = 0;
+ a[2] = 0;
+ a[3] = 0;
+ a[4] = 0;
+ a[5] = 0;
+ a[6] = 0;
+ a[7] = 0;
+}
+
+/* Copy one field element into another: d = a.
+ *
+ * d [in] Destination field element.
+ * a [in] Source field element.
+ */
+void fe448_copy(int64_t* d, const int64_t* a)
+{
+ d[0] = a[0];
+ d[1] = a[1];
+ d[2] = a[2];
+ d[3] = a[3];
+ d[4] = a[4];
+ d[5] = a[5];
+ d[6] = a[6];
+ d[7] = a[7];
+}
+
+/* Conditionally swap the elements.
+ * Constant time implementation.
+ *
+ * a [in] First field element.
+ * b [in] Second field element.
+ * c [in] Swap when 1. Valid values: 0, 1.
+ */
+static void fe448_cswap(int64_t* a, int64_t* b, int c)
+{
+ int64_t mask = -(int64_t)c;
+ int64_t t0 = (a[0] ^ b[0]) & mask;
+ int64_t t1 = (a[1] ^ b[1]) & mask;
+ int64_t t2 = (a[2] ^ b[2]) & mask;
+ int64_t t3 = (a[3] ^ b[3]) & mask;
+ int64_t t4 = (a[4] ^ b[4]) & mask;
+ int64_t t5 = (a[5] ^ b[5]) & mask;
+ int64_t t6 = (a[6] ^ b[6]) & mask;
+ int64_t t7 = (a[7] ^ b[7]) & mask;
+ a[0] ^= t0;
+ a[1] ^= t1;
+ a[2] ^= t2;
+ a[3] ^= t3;
+ a[4] ^= t4;
+ a[5] ^= t5;
+ a[6] ^= t6;
+ a[7] ^= t7;
+ b[0] ^= t0;
+ b[1] ^= t1;
+ b[2] ^= t2;
+ b[3] ^= t3;
+ b[4] ^= t4;
+ b[5] ^= t5;
+ b[6] ^= t6;
+ b[7] ^= t7;
+}
+
+/* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
+ *
+ * r [in] Field element to hold sum.
+ * a [in] Field element to add.
+ * b [in] Field element to add.
+ */
+void fe448_add(int64_t* r, const int64_t* a, const int64_t* b)
+{
+ r[0] = a[0] + b[0];
+ r[1] = a[1] + b[1];
+ r[2] = a[2] + b[2];
+ r[3] = a[3] + b[3];
+ r[4] = a[4] + b[4];
+ r[5] = a[5] + b[5];
+ r[6] = a[6] + b[6];
+ r[7] = a[7] + b[7];
+}
+
+/* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
+ *
+ * r [in] Field element to hold difference.
+ * a [in] Field element to subtract from.
+ * b [in] Field element to subtract.
+ */
+void fe448_sub(int64_t* r, const int64_t* a, const int64_t* b)
+{
+ r[0] = a[0] - b[0];
+ r[1] = a[1] - b[1];
+ r[2] = a[2] - b[2];
+ r[3] = a[3] - b[3];
+ r[4] = a[4] - b[4];
+ r[5] = a[5] - b[5];
+ r[6] = a[6] - b[6];
+ r[7] = a[7] - b[7];
+}
+
+/* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
+ *
+ * r [in] Field element to hold result.
+ * a [in] Field element to multiply.
+ */
+void fe448_mul39081(int64_t* r, const int64_t* a)
+{
+ int128_t t;
+ int64_t o;
+ int128_t t0 = a[0] * (int128_t)39081;
+ int128_t t1 = a[1] * (int128_t)39081;
+ int128_t t2 = a[2] * (int128_t)39081;
+ int128_t t3 = a[3] * (int128_t)39081;
+ int128_t t4 = a[4] * (int128_t)39081;
+ int128_t t5 = a[5] * (int128_t)39081;
+ int128_t t6 = a[6] * (int128_t)39081;
+ int128_t t7 = a[7] * (int128_t)39081;
+ o = t0 >> 56; t1 += o; t = (int128_t)o << 56; t0 -= t;
+ o = t1 >> 56; t2 += o; t = (int128_t)o << 56; t1 -= t;
+ o = t2 >> 56; t3 += o; t = (int128_t)o << 56; t2 -= t;
+ o = t3 >> 56; t4 += o; t = (int128_t)o << 56; t3 -= t;
+ o = t4 >> 56; t5 += o; t = (int128_t)o << 56; t4 -= t;
+ o = t5 >> 56; t6 += o; t = (int128_t)o << 56; t5 -= t;
+ o = t6 >> 56; t7 += o; t = (int128_t)o << 56; t6 -= t;
+ o = t7 >> 56; t0 += o;
+ t4 += o; t = (int128_t)o << 56; t7 -= t;
+
+ /* Store */
+ r[0] = t0;
+ r[1] = t1;
+ r[2] = t2;
+ r[3] = t3;
+ r[4] = t4;
+ r[5] = t5;
+ r[6] = t6;
+ r[7] = t7;
+}
+
+/* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
+ *
+ * r [in] Field element to hold result.
+ * a [in] Field element to multiply.
+ * b [in] Field element to multiply.
+ */
+void fe448_mul(int64_t* r, const int64_t* a, const int64_t* b)
+{
+ int128_t t;
+ int64_t o;
+ int128_t t0 = (int128_t)a[ 0] * b[ 0];
+ int128_t t1 = (int128_t)a[ 0] * b[ 1];
+ int128_t t101 = (int128_t)a[ 1] * b[ 0];
+ int128_t t2 = (int128_t)a[ 0] * b[ 2];
+ int128_t t102 = (int128_t)a[ 1] * b[ 1];
+ int128_t t202 = (int128_t)a[ 2] * b[ 0];
+ int128_t t3 = (int128_t)a[ 0] * b[ 3];
+ int128_t t103 = (int128_t)a[ 1] * b[ 2];
+ int128_t t203 = (int128_t)a[ 2] * b[ 1];
+ int128_t t303 = (int128_t)a[ 3] * b[ 0];
+ int128_t t4 = (int128_t)a[ 0] * b[ 4];
+ int128_t t104 = (int128_t)a[ 1] * b[ 3];
+ int128_t t204 = (int128_t)a[ 2] * b[ 2];
+ int128_t t304 = (int128_t)a[ 3] * b[ 1];
+ int128_t t404 = (int128_t)a[ 4] * b[ 0];
+ int128_t t5 = (int128_t)a[ 0] * b[ 5];
+ int128_t t105 = (int128_t)a[ 1] * b[ 4];
+ int128_t t205 = (int128_t)a[ 2] * b[ 3];
+ int128_t t305 = (int128_t)a[ 3] * b[ 2];
+ int128_t t405 = (int128_t)a[ 4] * b[ 1];
+ int128_t t505 = (int128_t)a[ 5] * b[ 0];
+ int128_t t6 = (int128_t)a[ 0] * b[ 6];
+ int128_t t106 = (int128_t)a[ 1] * b[ 5];
+ int128_t t206 = (int128_t)a[ 2] * b[ 4];
+ int128_t t306 = (int128_t)a[ 3] * b[ 3];
+ int128_t t406 = (int128_t)a[ 4] * b[ 2];
+ int128_t t506 = (int128_t)a[ 5] * b[ 1];
+ int128_t t606 = (int128_t)a[ 6] * b[ 0];
+ int128_t t7 = (int128_t)a[ 0] * b[ 7];
+ int128_t t107 = (int128_t)a[ 1] * b[ 6];
+ int128_t t207 = (int128_t)a[ 2] * b[ 5];
+ int128_t t307 = (int128_t)a[ 3] * b[ 4];
+ int128_t t407 = (int128_t)a[ 4] * b[ 3];
+ int128_t t507 = (int128_t)a[ 5] * b[ 2];
+ int128_t t607 = (int128_t)a[ 6] * b[ 1];
+ int128_t t707 = (int128_t)a[ 7] * b[ 0];
+ int128_t t8 = (int128_t)a[ 1] * b[ 7];
+ int128_t t108 = (int128_t)a[ 2] * b[ 6];
+ int128_t t208 = (int128_t)a[ 3] * b[ 5];
+ int128_t t308 = (int128_t)a[ 4] * b[ 4];
+ int128_t t408 = (int128_t)a[ 5] * b[ 3];
+ int128_t t508 = (int128_t)a[ 6] * b[ 2];
+ int128_t t608 = (int128_t)a[ 7] * b[ 1];
+ int128_t t9 = (int128_t)a[ 2] * b[ 7];
+ int128_t t109 = (int128_t)a[ 3] * b[ 6];
+ int128_t t209 = (int128_t)a[ 4] * b[ 5];
+ int128_t t309 = (int128_t)a[ 5] * b[ 4];
+ int128_t t409 = (int128_t)a[ 6] * b[ 3];
+ int128_t t509 = (int128_t)a[ 7] * b[ 2];
+ int128_t t10 = (int128_t)a[ 3] * b[ 7];
+ int128_t t110 = (int128_t)a[ 4] * b[ 6];
+ int128_t t210 = (int128_t)a[ 5] * b[ 5];
+ int128_t t310 = (int128_t)a[ 6] * b[ 4];
+ int128_t t410 = (int128_t)a[ 7] * b[ 3];
+ int128_t t11 = (int128_t)a[ 4] * b[ 7];
+ int128_t t111 = (int128_t)a[ 5] * b[ 6];
+ int128_t t211 = (int128_t)a[ 6] * b[ 5];
+ int128_t t311 = (int128_t)a[ 7] * b[ 4];
+ int128_t t12 = (int128_t)a[ 5] * b[ 7];
+ int128_t t112 = (int128_t)a[ 6] * b[ 6];
+ int128_t t212 = (int128_t)a[ 7] * b[ 5];
+ int128_t t13 = (int128_t)a[ 6] * b[ 7];
+ int128_t t113 = (int128_t)a[ 7] * b[ 6];
+ int128_t t14 = (int128_t)a[ 7] * b[ 7];
+ t1 += t101;
+ t2 += t102; t2 += t202;
+ t3 += t103; t3 += t203; t3 += t303;
+ t4 += t104; t4 += t204; t4 += t304; t4 += t404;
+ t5 += t105; t5 += t205; t5 += t305; t5 += t405; t5 += t505;
+ t6 += t106; t6 += t206; t6 += t306; t6 += t406; t6 += t506;
+ t6 += t606;
+ t7 += t107; t7 += t207; t7 += t307; t7 += t407; t7 += t507;
+ t7 += t607;
+ t7 += t707;
+ t8 += t108; t8 += t208; t8 += t308; t8 += t408; t8 += t508;
+ t8 += t608;
+ t9 += t109; t9 += t209; t9 += t309; t9 += t409; t9 += t509;
+ t10 += t110; t10 += t210; t10 += t310; t10 += t410;
+ t11 += t111; t11 += t211; t11 += t311;
+ t12 += t112; t12 += t212;
+ t13 += t113;
+
+ /* Reduce */
+ t0 += t8 + t12;
+ t1 += t9 + t13;
+ t2 += t10 + t14;
+ t3 += t11;
+ t4 += t12 + t8 + t12;
+ t5 += t13 + t9 + t13;
+ t6 += t14 + t10 + t14;
+ t7 += t11;
+ o = t7 >> 56; t0 += o;
+ t4 += o; t = (int128_t)o << 56; t7 -= t;
+ o = t0 >> 56; t1 += o; t = (int128_t)o << 56; t0 -= t;
+ o = t1 >> 56; t2 += o; t = (int128_t)o << 56; t1 -= t;
+ o = t2 >> 56; t3 += o; t = (int128_t)o << 56; t2 -= t;
+ o = t3 >> 56; t4 += o; t = (int128_t)o << 56; t3 -= t;
+ o = t4 >> 56; t5 += o; t = (int128_t)o << 56; t4 -= t;
+ o = t5 >> 56; t6 += o; t = (int128_t)o << 56; t5 -= t;
+ o = t6 >> 56; t7 += o; t = (int128_t)o << 56; t6 -= t;
+ o = t7 >> 56; t0 += o;
+ t4 += o; t = (int128_t)o << 56; t7 -= t;
+
+ /* Store */
+ r[0] = t0;
+ r[1] = t1;
+ r[2] = t2;
+ r[3] = t3;
+ r[4] = t4;
+ r[5] = t5;
+ r[6] = t6;
+ r[7] = t7;
+}
+
+/* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
+ *
+ * r [in] Field element to hold result.
+ * a [in] Field element to square.
+ */
+void fe448_sqr(int64_t* r, const int64_t* a)
+{
+ int128_t t;
+ int64_t o;
+ int128_t t0 = (int128_t)a[ 0] * a[ 0];
+ int128_t t1 = 2 * (int128_t)a[ 0] * a[ 1];
+ int128_t t2 = 2 * (int128_t)a[ 0] * a[ 2];
+ int128_t t102 = (int128_t)a[ 1] * a[ 1];
+ int128_t t3 = 2 * (int128_t)a[ 0] * a[ 3];
+ int128_t t103 = 2 * (int128_t)a[ 1] * a[ 2];
+ int128_t t4 = 2 * (int128_t)a[ 0] * a[ 4];
+ int128_t t104 = 2 * (int128_t)a[ 1] * a[ 3];
+ int128_t t204 = (int128_t)a[ 2] * a[ 2];
+ int128_t t5 = 2 * (int128_t)a[ 0] * a[ 5];
+ int128_t t105 = 2 * (int128_t)a[ 1] * a[ 4];
+ int128_t t205 = 2 * (int128_t)a[ 2] * a[ 3];
+ int128_t t6 = 2 * (int128_t)a[ 0] * a[ 6];
+ int128_t t106 = 2 * (int128_t)a[ 1] * a[ 5];
+ int128_t t206 = 2 * (int128_t)a[ 2] * a[ 4];
+ int128_t t306 = (int128_t)a[ 3] * a[ 3];
+ int128_t t7 = 2 * (int128_t)a[ 0] * a[ 7];
+ int128_t t107 = 2 * (int128_t)a[ 1] * a[ 6];
+ int128_t t207 = 2 * (int128_t)a[ 2] * a[ 5];
+ int128_t t307 = 2 * (int128_t)a[ 3] * a[ 4];
+ int128_t t8 = 2 * (int128_t)a[ 1] * a[ 7];
+ int128_t t108 = 2 * (int128_t)a[ 2] * a[ 6];
+ int128_t t208 = 2 * (int128_t)a[ 3] * a[ 5];
+ int128_t t308 = (int128_t)a[ 4] * a[ 4];
+ int128_t t9 = 2 * (int128_t)a[ 2] * a[ 7];
+ int128_t t109 = 2 * (int128_t)a[ 3] * a[ 6];
+ int128_t t209 = 2 * (int128_t)a[ 4] * a[ 5];
+ int128_t t10 = 2 * (int128_t)a[ 3] * a[ 7];
+ int128_t t110 = 2 * (int128_t)a[ 4] * a[ 6];
+ int128_t t210 = (int128_t)a[ 5] * a[ 5];
+ int128_t t11 = 2 * (int128_t)a[ 4] * a[ 7];
+ int128_t t111 = 2 * (int128_t)a[ 5] * a[ 6];
+ int128_t t12 = 2 * (int128_t)a[ 5] * a[ 7];
+ int128_t t112 = (int128_t)a[ 6] * a[ 6];
+ int128_t t13 = 2 * (int128_t)a[ 6] * a[ 7];
+ int128_t t14 = (int128_t)a[ 7] * a[ 7];
+ t2 += t102;
+ t3 += t103;
+ t4 += t104; t4 += t204;
+ t5 += t105; t5 += t205;
+ t6 += t106; t6 += t206; t6 += t306;
+ t7 += t107; t7 += t207; t7 += t307;
+ t8 += t108; t8 += t208; t8 += t308;
+ t9 += t109; t9 += t209;
+ t10 += t110; t10 += t210;
+ t11 += t111;
+ t12 += t112;
+
+ /* Reduce */
+ t0 += t8 + t12;
+ t1 += t9 + t13;
+ t2 += t10 + t14;
+ t3 += t11;
+ t4 += t12 + t8 + t12;
+ t5 += t13 + t9 + t13;
+ t6 += t14 + t10 + t14;
+ t7 += t11;
+ o = t7 >> 56; t0 += o;
+ t4 += o; t = (int128_t)o << 56; t7 -= t;
+ o = t0 >> 56; t1 += o; t = (int128_t)o << 56; t0 -= t;
+ o = t1 >> 56; t2 += o; t = (int128_t)o << 56; t1 -= t;
+ o = t2 >> 56; t3 += o; t = (int128_t)o << 56; t2 -= t;
+ o = t3 >> 56; t4 += o; t = (int128_t)o << 56; t3 -= t;
+ o = t4 >> 56; t5 += o; t = (int128_t)o << 56; t4 -= t;
+ o = t5 >> 56; t6 += o; t = (int128_t)o << 56; t5 -= t;
+ o = t6 >> 56; t7 += o; t = (int128_t)o << 56; t6 -= t;
+ o = t7 >> 56; t0 += o;
+ t4 += o; t = (int128_t)o << 56; t7 -= t;
+
+ /* Store */
+ r[0] = t0;
+ r[1] = t1;
+ r[2] = t2;
+ r[3] = t3;
+ r[4] = t4;
+ r[5] = t5;
+ r[6] = t6;
+ r[7] = t7;
+}
+
+/* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
+ * Constant time implementation - using Fermat's little theorem:
+ * a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
+ * For curve448: p - 2 = 2^448 - 2^224 - 3
+ *
+ * r [in] Field element to hold result.
+ * a [in] Field element to invert.
+ */
+void fe448_invert(int64_t* r, const int64_t* a)
+{
+ int64_t t1[8];
+ int64_t t2[8];
+ int64_t t3[8];
+ int64_t t4[8];
+ int i;
+
+ fe448_sqr(t1, a);
+ /* t1 = 2 */
+ fe448_mul(t1, t1, a);
+ /* t1 = 3 */
+ fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
+ /* t2 = c */
+ fe448_mul(t3, t2, a);
+ /* t3 = d */
+ fe448_mul(t1, t2, t1);
+ /* t1 = f */
+ fe448_sqr(t2, t1);
+ /* t2 = 1e */
+ fe448_mul(t4, t2, a);
+ /* t4 = 1f */
+ fe448_sqr(t2, t4); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
+ /* t2 = 3e0 */
+ fe448_mul(t1, t2, t4);
+ /* t1 = 3ff */
+ fe448_sqr(t2, t1); for (i = 1; i < 10; ++i) fe448_sqr(t2, t2);
+ /* t2 = ffc00 */
+ fe448_mul(t1, t2, t1);
+ /* t1 = fffff */
+ fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
+ /* t2 = 1ffffe0 */
+ fe448_mul(t1, t2, t4);
+ /* t1 = 1ffffff */
+ fe448_sqr(t2, t1); for (i = 1; i < 25; ++i) fe448_sqr(t2, t2);
+ /* t2 = 3fffffe000000 */
+ fe448_mul(t1, t2, t1);
+ /* t1 = 3ffffffffffff */
+ fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
+ /* t2 = 7fffffffffffe0 */
+ fe448_mul(t1, t2, t4);
+ /* t1 = 7fffffffffffff */
+ fe448_sqr(t2, t1); for (i = 1; i < 55; ++i) fe448_sqr(t2, t2);
+ /* t2 = 3fffffffffffff80000000000000 */
+ fe448_mul(t1, t2, t1);
+ /* t1 = 3fffffffffffffffffffffffffff */
+ fe448_sqr(t2, t1); for (i = 1; i < 110; ++i) fe448_sqr(t2, t2);
+ /* t2 = fffffffffffffffffffffffffffc000000000000000000000000000 */
+ fe448_mul(t1, t2, t1);
+ /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
+ /* t2 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff0 */
+ fe448_mul(t3, t3, t2);
+ /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
+ fe448_mul(t1, t3, a);
+ /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
+ fe448_sqr(t1, t1); for (i = 1; i < 224; ++i) fe448_sqr(t1, t1);
+ /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000000000000000000000000000000000000000000000000000 */
+ fe448_mul(r, t3, t1);
+ /* r = fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
+}
+
+/* Scalar multiply the point by a number. r = n.a
+ * Uses Montogmery ladder and only requires the x-ordinate.
+ *
+ * r [in] Field element to hold result.
+ * n [in] Scalar as an array of bytes.
+ * a [in] Point to multiply - x-ordinate only.
+ */
+int curve448(byte* r, const byte* n, const byte* a)
+{
+ int64_t x1[8];
+ int64_t x2[8];
+ int64_t z2[8];
+ int64_t x3[8];
+ int64_t z3[8];
+ int64_t t0[8];
+ int64_t t1[8];
+ int i;
+ unsigned int swap;
+ unsigned int b;
+
+ fe448_from_bytes(x1, a);
+ fe448_1(x2);
+ fe448_0(z2);
+ fe448_copy(x3, x1);
+ fe448_1(z3);
+
+ swap = 0;
+ for (i = 447; i >= 0; --i) {
+ b = (n[i >> 3] >> (i & 7)) & 1;
+ swap ^= b;
+ fe448_cswap(x2, x3, swap);
+ fe448_cswap(z2, z3, swap);
+ swap = b;
+
+ /* Montgomery Ladder - double and add */
+ fe448_add(t0, x2, z2);
+ fe448_reduce(t0);
+ fe448_add(t1, x3, z3);
+ fe448_reduce(t1);
+ fe448_sub(x2, x2, z2);
+ fe448_sub(x3, x3, z3);
+ fe448_mul(t1, t1, x2);
+ fe448_mul(z3, x3, t0);
+ fe448_sqr(t0, t0);
+ fe448_sqr(x2, x2);
+ fe448_add(x3, z3, t1);
+ fe448_reduce(x3);
+ fe448_sqr(x3, x3);
+ fe448_sub(z3, z3, t1);
+ fe448_sqr(z3, z3);
+ fe448_mul(z3, z3, x1);
+ fe448_sub(t1, t0, x2);
+ fe448_mul(x2, t0, x2);
+ fe448_mul39081(z2, t1);
+ fe448_add(z2, t0, z2);
+ fe448_mul(z2, z2, t1);
+ }
+ /* Last two bits are 0 - no final swap check required. */
+
+ fe448_invert(z2, z2);
+ fe448_mul(x2, x2, z2);
+ fe448_to_bytes(r, x2);
+
+ return 0;
+}
+
+#ifdef HAVE_ED448
+/* Check whether field element is not 0.
+ * Must convert to a normalized form before checking.
+ *
+ * a [in] Field element.
+ * returns 0 when zero, and any other value otherwise.
+ */
+int fe448_isnonzero(const int64_t* a)
+{
+ uint8_t b[56];
+ int i;
+ uint8_t c = 0;
+ fe448_to_bytes(b, a);
+ for (i = 0; i < 56; i++)
+ c |= b[i];
+ return c;
+}
+
+/* Check whether field element is negative.
+ * Must convert to a normalized form before checking.
+ *
+ * a [in] Field element.
+ * returns 1 when negative, and 0 otherwise.
+ */
+int fe448_isnegative(const int64_t* a)
+{
+ uint8_t b[56];
+ fe448_to_bytes(b, a);
+ return b[0] & 1;
+}
+
+/* Negates the field element. r = -a
+ *
+ * r [in] Field element to hold result.
+ * a [in] Field element.
+ */
+void fe448_neg(int64_t* r, const int64_t* a)
+{
+ r[0] = -a[0];
+ r[1] = -a[1];
+ r[2] = -a[2];
+ r[3] = -a[3];
+ r[4] = -a[4];
+ r[5] = -a[5];
+ r[6] = -a[6];
+ r[7] = -a[7];
+}
+
+/* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
+ * Used for calcualting y-ordinate from x-ordinate for Ed448.
+ *
+ * r [in] Field element to hold result.
+ * a [in] Field element to exponentiate.
+ */
+void fe448_pow_2_446_222_1(int64_t* r, const int64_t* a)
+{
+ int64_t t1[8];
+ int64_t t2[8];
+ int64_t t3[8];
+ int64_t t4[8];
+ int64_t t5[8];
+ int i;
+
+ fe448_sqr(t3, a);
+ /* t3 = 2 */
+ fe448_mul(t1, t3, a);
+ /* t1 = 3 */
+ fe448_sqr(t5, t1);
+ /* t5 = 6 */
+ fe448_mul(t5, t5, a);
+ /* t5 = 7 */
+ fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
+ /* t2 = c */
+ fe448_mul(t3, t2, t3);
+ /* t3 = e */
+ fe448_mul(t1, t2, t1);
+ /* t1 = f */
+ fe448_sqr(t2, t1); for (i = 1; i < 3; ++i) fe448_sqr(t2, t2);
+ /* t2 = 78 */
+ fe448_mul(t5, t2, t5);
+ /* t5 = 7f */
+ fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
+ /* t2 = f0 */
+ fe448_mul(t1, t2, t1);
+ /* t1 = ff */
+ fe448_mul(t3, t3, t2);
+ /* t3 = fe */
+ fe448_sqr(t2, t1); for (i = 1; i < 7; ++i) fe448_sqr(t2, t2);
+ /* t2 = 7f80 */
+ fe448_mul(t5, t2, t5);
+ /* t5 = 7fff */
+ fe448_sqr(t2, t1); for (i = 1; i < 8; ++i) fe448_sqr(t2, t2);
+ /* t2 = ff00 */
+ fe448_mul(t1, t2, t1);
+ /* t1 = ffff */
+ fe448_mul(t3, t3, t2);
+ /* t3 = fffe */
+ fe448_sqr(t2, t5); for (i = 1; i < 15; ++i) fe448_sqr(t2, t2);
+ /* t2 = 3fff8000 */
+ fe448_mul(t5, t2, t5);
+ /* t5 = 3fffffff */
+ fe448_sqr(t2, t1); for (i = 1; i < 16; ++i) fe448_sqr(t2, t2);
+ /* t2 = ffff0000 */
+ fe448_mul(t1, t2, t1);
+ /* t1 = ffffffff */
+ fe448_mul(t3, t3, t2);
+ /* t3 = fffffffe */
+ fe448_sqr(t2, t1); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
+ /* t2 = ffffffff00000000 */
+ fe448_mul(t2, t2, t1);
+ /* t2 = ffffffffffffffff */
+ fe448_sqr(t1, t2); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
+ /* t1 = ffffffffffffffff0000000000000000 */
+ fe448_mul(t1, t1, t2);
+ /* t1 = ffffffffffffffffffffffffffffffff */
+ fe448_sqr(t1, t1); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
+ /* t1 = ffffffffffffffffffffffffffffffff0000000000000000 */
+ fe448_mul(t4, t1, t2);
+ /* t4 = ffffffffffffffffffffffffffffffffffffffffffffffff */
+ fe448_sqr(t2, t4); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
+ /* t2 = ffffffffffffffffffffffffffffffffffffffffffffffff00000000 */
+ fe448_mul(t3, t3, t2);
+ /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
+ fe448_sqr(t1, t3); for (i = 1; i < 192; ++i) fe448_sqr(t1, t1);
+ /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe000000000000000000000000000000000000000000000000 */
+ fe448_mul(t1, t1, t4);
+ /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffff */
+ fe448_sqr(t1, t1); for (i = 1; i < 30; ++i) fe448_sqr(t1, t1);
+ /* t1 = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffc0000000 */
+ fe448_mul(r, t5, t1);
+ /* r = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+}
+
+/* Constant time, conditional move of b into a.
+ * a is not changed if the condition is 0.
+ *
+ * a A field element.
+ * b A field element.
+ * c If 1 then copy and if 0 then don't copy.
+ */
+void fe448_cmov(int64_t* a, const int64_t* b, int c)
+{
+ int64_t m = -(int64_t)c;
+ int64_t t0 = m & (a[0] ^ b[0]);
+ int64_t t1 = m & (a[1] ^ b[1]);
+ int64_t t2 = m & (a[2] ^ b[2]);
+ int64_t t3 = m & (a[3] ^ b[3]);
+ int64_t t4 = m & (a[4] ^ b[4]);
+ int64_t t5 = m & (a[5] ^ b[5]);
+ int64_t t6 = m & (a[6] ^ b[6]);
+ int64_t t7 = m & (a[7] ^ b[7]);
+
+ a[0] ^= t0;
+ a[1] ^= t1;
+ a[2] ^= t2;
+ a[3] ^= t3;
+ a[4] ^= t4;
+ a[5] ^= t5;
+ a[6] ^= t6;
+ a[7] ^= t7;
+}
+
+#endif /* HAVE_ED448 */
+#else
+
+/* Initialize the field element operations.
+ */
+void fe448_init(void)
+{
+}
+
+/* Convert the field element from a byte array to an array of 28-bits.
+ *
+ * r [in] Array to encode into.
+ * b [in] Byte array.
+ */
+void fe448_from_bytes(int32_t* r, const unsigned char* b)
+{
+ r[ 0] = (((int32_t)((b[ 0] ) >> 0)) << 0)
+ | (((int32_t)((b[ 1] ) >> 0)) << 8)
+ | (((int32_t)((b[ 2] ) >> 0)) << 16)
+ | ((((int32_t)((b[ 3] & 0xf )) >> 0)) << 24);
+ r[ 1] = (((int32_t)((b[ 3] ) >> 4)) << 0)
+ | (((int32_t)((b[ 4] ) >> 0)) << 4)
+ | (((int32_t)((b[ 5] ) >> 0)) << 12)
+ | (((int32_t)((b[ 6] ) >> 0)) << 20);
+ r[ 2] = (((int32_t)((b[ 7] ) >> 0)) << 0)
+ | (((int32_t)((b[ 8] ) >> 0)) << 8)
+ | (((int32_t)((b[ 9] ) >> 0)) << 16)
+ | ((((int32_t)((b[10] & 0xf )) >> 0)) << 24);
+ r[ 3] = (((int32_t)((b[10] ) >> 4)) << 0)
+ | (((int32_t)((b[11] ) >> 0)) << 4)
+ | (((int32_t)((b[12] ) >> 0)) << 12)
+ | (((int32_t)((b[13] ) >> 0)) << 20);
+ r[ 4] = (((int32_t)((b[14] ) >> 0)) << 0)
+ | (((int32_t)((b[15] ) >> 0)) << 8)
+ | (((int32_t)((b[16] ) >> 0)) << 16)
+ | ((((int32_t)((b[17] & 0xf )) >> 0)) << 24);
+ r[ 5] = (((int32_t)((b[17] ) >> 4)) << 0)
+ | (((int32_t)((b[18] ) >> 0)) << 4)
+ | (((int32_t)((b[19] ) >> 0)) << 12)
+ | (((int32_t)((b[20] ) >> 0)) << 20);
+ r[ 6] = (((int32_t)((b[21] ) >> 0)) << 0)
+ | (((int32_t)((b[22] ) >> 0)) << 8)
+ | (((int32_t)((b[23] ) >> 0)) << 16)
+ | ((((int32_t)((b[24] & 0xf )) >> 0)) << 24);
+ r[ 7] = (((int32_t)((b[24] ) >> 4)) << 0)
+ | (((int32_t)((b[25] ) >> 0)) << 4)
+ | (((int32_t)((b[26] ) >> 0)) << 12)
+ | (((int32_t)((b[27] ) >> 0)) << 20);
+ r[ 8] = (((int32_t)((b[28] ) >> 0)) << 0)
+ | (((int32_t)((b[29] ) >> 0)) << 8)
+ | (((int32_t)((b[30] ) >> 0)) << 16)
+ | ((((int32_t)((b[31] & 0xf )) >> 0)) << 24);
+ r[ 9] = (((int32_t)((b[31] ) >> 4)) << 0)
+ | (((int32_t)((b[32] ) >> 0)) << 4)
+ | (((int32_t)((b[33] ) >> 0)) << 12)
+ | (((int32_t)((b[34] ) >> 0)) << 20);
+ r[10] = (((int32_t)((b[35] ) >> 0)) << 0)
+ | (((int32_t)((b[36] ) >> 0)) << 8)
+ | (((int32_t)((b[37] ) >> 0)) << 16)
+ | ((((int32_t)((b[38] & 0xf )) >> 0)) << 24);
+ r[11] = (((int32_t)((b[38] ) >> 4)) << 0)
+ | (((int32_t)((b[39] ) >> 0)) << 4)
+ | (((int32_t)((b[40] ) >> 0)) << 12)
+ | (((int32_t)((b[41] ) >> 0)) << 20);
+ r[12] = (((int32_t)((b[42] ) >> 0)) << 0)
+ | (((int32_t)((b[43] ) >> 0)) << 8)
+ | (((int32_t)((b[44] ) >> 0)) << 16)
+ | ((((int32_t)((b[45] & 0xf )) >> 0)) << 24);
+ r[13] = (((int32_t)((b[45] ) >> 4)) << 0)
+ | (((int32_t)((b[46] ) >> 0)) << 4)
+ | (((int32_t)((b[47] ) >> 0)) << 12)
+ | (((int32_t)((b[48] ) >> 0)) << 20);
+ r[14] = (((int32_t)((b[49] ) >> 0)) << 0)
+ | (((int32_t)((b[50] ) >> 0)) << 8)
+ | (((int32_t)((b[51] ) >> 0)) << 16)
+ | ((((int32_t)((b[52] & 0xf )) >> 0)) << 24);
+ r[15] = (((int32_t)((b[52] ) >> 4)) << 0)
+ | (((int32_t)((b[53] ) >> 0)) << 4)
+ | (((int32_t)((b[54] ) >> 0)) << 12)
+ | (((int32_t)((b[55] ) >> 0)) << 20);
+}
+
+/* Convert the field element to a byte array from an array of 28-bits.
+ *
+ * b [in] Byte array.
+ * a [in] Array to encode into.
+ */
+void fe448_to_bytes(unsigned char* b, const int32_t* a)
+{
+ int64_t t;
+ /* Mod */
+ int32_t in0 = a[0];
+ int32_t in1 = a[1];
+ int32_t in2 = a[2];
+ int32_t in3 = a[3];
+ int32_t in4 = a[4];
+ int32_t in5 = a[5];
+ int32_t in6 = a[6];
+ int32_t in7 = a[7];
+ int32_t in8 = a[8];
+ int32_t in9 = a[9];
+ int32_t in10 = a[10];
+ int32_t in11 = a[11];
+ int32_t in12 = a[12];
+ int32_t in13 = a[13];
+ int32_t in14 = a[14];
+ int32_t in15 = a[15];
+ int32_t o = in15 >> 28;
+ in15 -= o << 28;
+ in0 += o;
+ in8 += o;
+ o = (in0 + 1) >> 28;
+ o = (o + in1) >> 28;
+ o = (o + in2) >> 28;
+ o = (o + in3) >> 28;
+ o = (o + in4) >> 28;
+ o = (o + in5) >> 28;
+ o = (o + in6) >> 28;
+ o = (o + in7) >> 28;
+ o = (o + in8 + 1) >> 28;
+ o = (o + in9) >> 28;
+ o = (o + in10) >> 28;
+ o = (o + in11) >> 28;
+ o = (o + in12) >> 28;
+ o = (o + in13) >> 28;
+ o = (o + in14) >> 28;
+ o = (o + in15) >> 28;
+ in0 += o;
+ in8 += o;
+ in15 -= o << 28;
+ o = in0 >> 28; in1 += o; t = o << 28; in0 -= t;
+ o = in1 >> 28; in2 += o; t = o << 28; in1 -= t;
+ o = in2 >> 28; in3 += o; t = o << 28; in2 -= t;
+ o = in3 >> 28; in4 += o; t = o << 28; in3 -= t;
+ o = in4 >> 28; in5 += o; t = o << 28; in4 -= t;
+ o = in5 >> 28; in6 += o; t = o << 28; in5 -= t;
+ o = in6 >> 28; in7 += o; t = o << 28; in6 -= t;
+ o = in7 >> 28; in8 += o; t = o << 28; in7 -= t;
+ o = in8 >> 28; in9 += o; t = o << 28; in8 -= t;
+ o = in9 >> 28; in10 += o; t = o << 28; in9 -= t;
+ o = in10 >> 28; in11 += o; t = o << 28; in10 -= t;
+ o = in11 >> 28; in12 += o; t = o << 28; in11 -= t;
+ o = in12 >> 28; in13 += o; t = o << 28; in12 -= t;
+ o = in13 >> 28; in14 += o; t = o << 28; in13 -= t;
+ o = in14 >> 28; in15 += o; t = o << 28; in14 -= t;
+ o = in15 >> 28; in0 += o;
+ in8 += o; t = o << 28; in15 -= t;
+
+ /* Output as bytes */
+ b[ 0] = (in0 >> 0);
+ b[ 1] = (in0 >> 8);
+ b[ 2] = (in0 >> 16);
+ b[ 3] = (in0 >> 24) + ((in1 >> 0) << 4);
+ b[ 4] = (in1 >> 4);
+ b[ 5] = (in1 >> 12);
+ b[ 6] = (in1 >> 20);
+ b[ 7] = (in2 >> 0);
+ b[ 8] = (in2 >> 8);
+ b[ 9] = (in2 >> 16);
+ b[10] = (in2 >> 24) + ((in3 >> 0) << 4);
+ b[11] = (in3 >> 4);
+ b[12] = (in3 >> 12);
+ b[13] = (in3 >> 20);
+ b[14] = (in4 >> 0);
+ b[15] = (in4 >> 8);
+ b[16] = (in4 >> 16);
+ b[17] = (in4 >> 24) + ((in5 >> 0) << 4);
+ b[18] = (in5 >> 4);
+ b[19] = (in5 >> 12);
+ b[20] = (in5 >> 20);
+ b[21] = (in6 >> 0);
+ b[22] = (in6 >> 8);
+ b[23] = (in6 >> 16);
+ b[24] = (in6 >> 24) + ((in7 >> 0) << 4);
+ b[25] = (in7 >> 4);
+ b[26] = (in7 >> 12);
+ b[27] = (in7 >> 20);
+ b[28] = (in8 >> 0);
+ b[29] = (in8 >> 8);
+ b[30] = (in8 >> 16);
+ b[31] = (in8 >> 24) + ((in9 >> 0) << 4);
+ b[32] = (in9 >> 4);
+ b[33] = (in9 >> 12);
+ b[34] = (in9 >> 20);
+ b[35] = (in10 >> 0);
+ b[36] = (in10 >> 8);
+ b[37] = (in10 >> 16);
+ b[38] = (in10 >> 24) + ((in11 >> 0) << 4);
+ b[39] = (in11 >> 4);
+ b[40] = (in11 >> 12);
+ b[41] = (in11 >> 20);
+ b[42] = (in12 >> 0);
+ b[43] = (in12 >> 8);
+ b[44] = (in12 >> 16);
+ b[45] = (in12 >> 24) + ((in13 >> 0) << 4);
+ b[46] = (in13 >> 4);
+ b[47] = (in13 >> 12);
+ b[48] = (in13 >> 20);
+ b[49] = (in14 >> 0);
+ b[50] = (in14 >> 8);
+ b[51] = (in14 >> 16);
+ b[52] = (in14 >> 24) + ((in15 >> 0) << 4);
+ b[53] = (in15 >> 4);
+ b[54] = (in15 >> 12);
+ b[55] = (in15 >> 20);
+}
+
+/* Set the field element to 0.
+ *
+ * a [in] Field element.
+ */
+void fe448_1(int32_t* a)
+{
+ a[0] = 1;
+ a[1] = 0;
+ a[2] = 0;
+ a[3] = 0;
+ a[4] = 0;
+ a[5] = 0;
+ a[6] = 0;
+ a[7] = 0;
+ a[8] = 0;
+ a[9] = 0;
+ a[10] = 0;
+ a[11] = 0;
+ a[12] = 0;
+ a[13] = 0;
+ a[14] = 0;
+ a[15] = 0;
+}
+
+/* Set the field element to 0.
+ *
+ * a [in] Field element.
+ */
+void fe448_0(int32_t* a)
+{
+ a[0] = 0;
+ a[1] = 0;
+ a[2] = 0;
+ a[3] = 0;
+ a[4] = 0;
+ a[5] = 0;
+ a[6] = 0;
+ a[7] = 0;
+ a[8] = 0;
+ a[9] = 0;
+ a[10] = 0;
+ a[11] = 0;
+ a[12] = 0;
+ a[13] = 0;
+ a[14] = 0;
+ a[15] = 0;
+}
+
+/* Copy one field element into another: d = a.
+ *
+ * d [in] Destination field element.
+ * a [in] Source field element.
+ */
+void fe448_copy(int32_t* d, const int32_t* a)
+{
+ d[0] = a[0];
+ d[1] = a[1];
+ d[2] = a[2];
+ d[3] = a[3];
+ d[4] = a[4];
+ d[5] = a[5];
+ d[6] = a[6];
+ d[7] = a[7];
+ d[8] = a[8];
+ d[9] = a[9];
+ d[10] = a[10];
+ d[11] = a[11];
+ d[12] = a[12];
+ d[13] = a[13];
+ d[14] = a[14];
+ d[15] = a[15];
+}
+
+/* Conditionally swap the elements.
+ * Constant time implementation.
+ *
+ * a [in] First field element.
+ * b [in] Second field element.
+ * c [in] Swap when 1. Valid values: 0, 1.
+ */
+static void fe448_cswap(int32_t* a, int32_t* b, int c)
+{
+ int32_t mask = -(int32_t)c;
+ int32_t t0 = (a[0] ^ b[0]) & mask;
+ int32_t t1 = (a[1] ^ b[1]) & mask;
+ int32_t t2 = (a[2] ^ b[2]) & mask;
+ int32_t t3 = (a[3] ^ b[3]) & mask;
+ int32_t t4 = (a[4] ^ b[4]) & mask;
+ int32_t t5 = (a[5] ^ b[5]) & mask;
+ int32_t t6 = (a[6] ^ b[6]) & mask;
+ int32_t t7 = (a[7] ^ b[7]) & mask;
+ int32_t t8 = (a[8] ^ b[8]) & mask;
+ int32_t t9 = (a[9] ^ b[9]) & mask;
+ int32_t t10 = (a[10] ^ b[10]) & mask;
+ int32_t t11 = (a[11] ^ b[11]) & mask;
+ int32_t t12 = (a[12] ^ b[12]) & mask;
+ int32_t t13 = (a[13] ^ b[13]) & mask;
+ int32_t t14 = (a[14] ^ b[14]) & mask;
+ int32_t t15 = (a[15] ^ b[15]) & mask;
+ a[0] ^= t0;
+ a[1] ^= t1;
+ a[2] ^= t2;
+ a[3] ^= t3;
+ a[4] ^= t4;
+ a[5] ^= t5;
+ a[6] ^= t6;
+ a[7] ^= t7;
+ a[8] ^= t8;
+ a[9] ^= t9;
+ a[10] ^= t10;
+ a[11] ^= t11;
+ a[12] ^= t12;
+ a[13] ^= t13;
+ a[14] ^= t14;
+ a[15] ^= t15;
+ b[0] ^= t0;
+ b[1] ^= t1;
+ b[2] ^= t2;
+ b[3] ^= t3;
+ b[4] ^= t4;
+ b[5] ^= t5;
+ b[6] ^= t6;
+ b[7] ^= t7;
+ b[8] ^= t8;
+ b[9] ^= t9;
+ b[10] ^= t10;
+ b[11] ^= t11;
+ b[12] ^= t12;
+ b[13] ^= t13;
+ b[14] ^= t14;
+ b[15] ^= t15;
+}
+
+/* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
+ *
+ * r [in] Field element to hold sum.
+ * a [in] Field element to add.
+ * b [in] Field element to add.
+ */
+void fe448_add(int32_t* r, const int32_t* a, const int32_t* b)
+{
+ r[0] = a[0] + b[0];
+ r[1] = a[1] + b[1];
+ r[2] = a[2] + b[2];
+ r[3] = a[3] + b[3];
+ r[4] = a[4] + b[4];
+ r[5] = a[5] + b[5];
+ r[6] = a[6] + b[6];
+ r[7] = a[7] + b[7];
+ r[8] = a[8] + b[8];
+ r[9] = a[9] + b[9];
+ r[10] = a[10] + b[10];
+ r[11] = a[11] + b[11];
+ r[12] = a[12] + b[12];
+ r[13] = a[13] + b[13];
+ r[14] = a[14] + b[14];
+ r[15] = a[15] + b[15];
+}
+
+/* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
+ *
+ * r [in] Field element to hold difference.
+ * a [in] Field element to subtract from.
+ * b [in] Field element to subtract.
+ */
+void fe448_sub(int32_t* r, const int32_t* a, const int32_t* b)
+{
+ r[0] = a[0] - b[0];
+ r[1] = a[1] - b[1];
+ r[2] = a[2] - b[2];
+ r[3] = a[3] - b[3];
+ r[4] = a[4] - b[4];
+ r[5] = a[5] - b[5];
+ r[6] = a[6] - b[6];
+ r[7] = a[7] - b[7];
+ r[8] = a[8] - b[8];
+ r[9] = a[9] - b[9];
+ r[10] = a[10] - b[10];
+ r[11] = a[11] - b[11];
+ r[12] = a[12] - b[12];
+ r[13] = a[13] - b[13];
+ r[14] = a[14] - b[14];
+ r[15] = a[15] - b[15];
+}
+
+void fe448_reduce(int32_t* a)
+{
+ int64_t o;
+
+ o = a[0 ] >> 28; a[1 ] += o; a[0 ] -= o << 28;
+ o = a[1 ] >> 28; a[2 ] += o; a[1 ] -= o << 28;
+ o = a[2 ] >> 28; a[3 ] += o; a[2 ] -= o << 28;
+ o = a[3 ] >> 28; a[4 ] += o; a[3 ] -= o << 28;
+ o = a[4 ] >> 28; a[5 ] += o; a[4 ] -= o << 28;
+ o = a[5 ] >> 28; a[6 ] += o; a[5 ] -= o << 28;
+ o = a[6 ] >> 28; a[7 ] += o; a[6 ] -= o << 28;
+ o = a[7 ] >> 28; a[8 ] += o; a[7 ] -= o << 28;
+ o = a[8 ] >> 28; a[9 ] += o; a[8 ] -= o << 28;
+ o = a[9 ] >> 28; a[10] += o; a[9 ] -= o << 28;
+ o = a[10] >> 28; a[11] += o; a[10] -= o << 28;
+ o = a[11] >> 28; a[12] += o; a[11] -= o << 28;
+ o = a[12] >> 28; a[13] += o; a[12] -= o << 28;
+ o = a[13] >> 28; a[14] += o; a[13] -= o << 28;
+ o = a[14] >> 28; a[15] += o; a[14] -= o << 28;
+ o = a[15] >> 28; a[0] += o;
+ a[8] += o; a[15] -= o << 28;
+}
+/* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
+ *
+ * r [in] Field element to hold result.
+ * a [in] Field element to multiply.
+ */
+void fe448_mul39081(int32_t* r, const int32_t* a)
+{
+ int64_t t;
+ int32_t o;
+ int64_t t0 = a[0] * (int64_t)39081;
+ int64_t t1 = a[1] * (int64_t)39081;
+ int64_t t2 = a[2] * (int64_t)39081;
+ int64_t t3 = a[3] * (int64_t)39081;
+ int64_t t4 = a[4] * (int64_t)39081;
+ int64_t t5 = a[5] * (int64_t)39081;
+ int64_t t6 = a[6] * (int64_t)39081;
+ int64_t t7 = a[7] * (int64_t)39081;
+ int64_t t8 = a[8] * (int64_t)39081;
+ int64_t t9 = a[9] * (int64_t)39081;
+ int64_t t10 = a[10] * (int64_t)39081;
+ int64_t t11 = a[11] * (int64_t)39081;
+ int64_t t12 = a[12] * (int64_t)39081;
+ int64_t t13 = a[13] * (int64_t)39081;
+ int64_t t14 = a[14] * (int64_t)39081;
+ int64_t t15 = a[15] * (int64_t)39081;
+ o = t0 >> 28; t1 += o; t = (int64_t)o << 28; t0 -= t;
+ o = t1 >> 28; t2 += o; t = (int64_t)o << 28; t1 -= t;
+ o = t2 >> 28; t3 += o; t = (int64_t)o << 28; t2 -= t;
+ o = t3 >> 28; t4 += o; t = (int64_t)o << 28; t3 -= t;
+ o = t4 >> 28; t5 += o; t = (int64_t)o << 28; t4 -= t;
+ o = t5 >> 28; t6 += o; t = (int64_t)o << 28; t5 -= t;
+ o = t6 >> 28; t7 += o; t = (int64_t)o << 28; t6 -= t;
+ o = t7 >> 28; t8 += o; t = (int64_t)o << 28; t7 -= t;
+ o = t8 >> 28; t9 += o; t = (int64_t)o << 28; t8 -= t;
+ o = t9 >> 28; t10 += o; t = (int64_t)o << 28; t9 -= t;
+ o = t10 >> 28; t11 += o; t = (int64_t)o << 28; t10 -= t;
+ o = t11 >> 28; t12 += o; t = (int64_t)o << 28; t11 -= t;
+ o = t12 >> 28; t13 += o; t = (int64_t)o << 28; t12 -= t;
+ o = t13 >> 28; t14 += o; t = (int64_t)o << 28; t13 -= t;
+ o = t14 >> 28; t15 += o; t = (int64_t)o << 28; t14 -= t;
+ o = t15 >> 28; t0 += o;
+ t8 += o; t = (int64_t)o << 28; t15 -= t;
+
+ /* Store */
+ r[0] = t0;
+ r[1] = t1;
+ r[2] = t2;
+ r[3] = t3;
+ r[4] = t4;
+ r[5] = t5;
+ r[6] = t6;
+ r[7] = t7;
+ r[8] = t8;
+ r[9] = t9;
+ r[10] = t10;
+ r[11] = t11;
+ r[12] = t12;
+ r[13] = t13;
+ r[14] = t14;
+ r[15] = t15;
+}
+
+/* Mulitply two field elements. r = a * b
+ *
+ * r [in] Field element to hold result.
+ * a [in] Field element to multiply.
+ * b [in] Field element to multiply.
+ */
+static WC_INLINE void fe448_mul_8(int32_t* r, const int32_t* a, const int32_t* b)
+{
+ int64_t t;
+ int64_t t0 = (int64_t)a[ 0] * b[ 0];
+ int64_t t1 = (int64_t)a[ 0] * b[ 1];
+ int64_t t101 = (int64_t)a[ 1] * b[ 0];
+ int64_t t2 = (int64_t)a[ 0] * b[ 2];
+ int64_t t102 = (int64_t)a[ 1] * b[ 1];
+ int64_t t202 = (int64_t)a[ 2] * b[ 0];
+ int64_t t3 = (int64_t)a[ 0] * b[ 3];
+ int64_t t103 = (int64_t)a[ 1] * b[ 2];
+ int64_t t203 = (int64_t)a[ 2] * b[ 1];
+ int64_t t303 = (int64_t)a[ 3] * b[ 0];
+ int64_t t4 = (int64_t)a[ 0] * b[ 4];
+ int64_t t104 = (int64_t)a[ 1] * b[ 3];
+ int64_t t204 = (int64_t)a[ 2] * b[ 2];
+ int64_t t304 = (int64_t)a[ 3] * b[ 1];
+ int64_t t404 = (int64_t)a[ 4] * b[ 0];
+ int64_t t5 = (int64_t)a[ 0] * b[ 5];
+ int64_t t105 = (int64_t)a[ 1] * b[ 4];
+ int64_t t205 = (int64_t)a[ 2] * b[ 3];
+ int64_t t305 = (int64_t)a[ 3] * b[ 2];
+ int64_t t405 = (int64_t)a[ 4] * b[ 1];
+ int64_t t505 = (int64_t)a[ 5] * b[ 0];
+ int64_t t6 = (int64_t)a[ 0] * b[ 6];
+ int64_t t106 = (int64_t)a[ 1] * b[ 5];
+ int64_t t206 = (int64_t)a[ 2] * b[ 4];
+ int64_t t306 = (int64_t)a[ 3] * b[ 3];
+ int64_t t406 = (int64_t)a[ 4] * b[ 2];
+ int64_t t506 = (int64_t)a[ 5] * b[ 1];
+ int64_t t606 = (int64_t)a[ 6] * b[ 0];
+ int64_t t7 = (int64_t)a[ 0] * b[ 7];
+ int64_t t107 = (int64_t)a[ 1] * b[ 6];
+ int64_t t207 = (int64_t)a[ 2] * b[ 5];
+ int64_t t307 = (int64_t)a[ 3] * b[ 4];
+ int64_t t407 = (int64_t)a[ 4] * b[ 3];
+ int64_t t507 = (int64_t)a[ 5] * b[ 2];
+ int64_t t607 = (int64_t)a[ 6] * b[ 1];
+ int64_t t707 = (int64_t)a[ 7] * b[ 0];
+ int64_t t8 = (int64_t)a[ 1] * b[ 7];
+ int64_t t108 = (int64_t)a[ 2] * b[ 6];
+ int64_t t208 = (int64_t)a[ 3] * b[ 5];
+ int64_t t308 = (int64_t)a[ 4] * b[ 4];
+ int64_t t408 = (int64_t)a[ 5] * b[ 3];
+ int64_t t508 = (int64_t)a[ 6] * b[ 2];
+ int64_t t608 = (int64_t)a[ 7] * b[ 1];
+ int64_t t9 = (int64_t)a[ 2] * b[ 7];
+ int64_t t109 = (int64_t)a[ 3] * b[ 6];
+ int64_t t209 = (int64_t)a[ 4] * b[ 5];
+ int64_t t309 = (int64_t)a[ 5] * b[ 4];
+ int64_t t409 = (int64_t)a[ 6] * b[ 3];
+ int64_t t509 = (int64_t)a[ 7] * b[ 2];
+ int64_t t10 = (int64_t)a[ 3] * b[ 7];
+ int64_t t110 = (int64_t)a[ 4] * b[ 6];
+ int64_t t210 = (int64_t)a[ 5] * b[ 5];
+ int64_t t310 = (int64_t)a[ 6] * b[ 4];
+ int64_t t410 = (int64_t)a[ 7] * b[ 3];
+ int64_t t11 = (int64_t)a[ 4] * b[ 7];
+ int64_t t111 = (int64_t)a[ 5] * b[ 6];
+ int64_t t211 = (int64_t)a[ 6] * b[ 5];
+ int64_t t311 = (int64_t)a[ 7] * b[ 4];
+ int64_t t12 = (int64_t)a[ 5] * b[ 7];
+ int64_t t112 = (int64_t)a[ 6] * b[ 6];
+ int64_t t212 = (int64_t)a[ 7] * b[ 5];
+ int64_t t13 = (int64_t)a[ 6] * b[ 7];
+ int64_t t113 = (int64_t)a[ 7] * b[ 6];
+ int64_t t14 = (int64_t)a[ 7] * b[ 7];
+ t1 += t101;
+ t2 += t102; t2 += t202;
+ t3 += t103; t3 += t203; t3 += t303;
+ t4 += t104; t4 += t204; t4 += t304; t4 += t404;
+ t5 += t105; t5 += t205; t5 += t305; t5 += t405; t5 += t505;
+ t6 += t106; t6 += t206; t6 += t306; t6 += t406; t6 += t506;
+ t6 += t606;
+ t7 += t107; t7 += t207; t7 += t307; t7 += t407; t7 += t507;
+ t7 += t607;
+ t7 += t707;
+ t8 += t108; t8 += t208; t8 += t308; t8 += t408; t8 += t508;
+ t8 += t608;
+ t9 += t109; t9 += t209; t9 += t309; t9 += t409; t9 += t509;
+ t10 += t110; t10 += t210; t10 += t310; t10 += t410;
+ t11 += t111; t11 += t211; t11 += t311;
+ t12 += t112; t12 += t212;
+ t13 += t113;
+ int64_t o = t14 >> 28;
+ int64_t t15 = o;
+ t14 -= o << 28;
+ o = t0 >> 28; t1 += o; t = (int64_t)o << 28; t0 -= t;
+ o = t1 >> 28; t2 += o; t = (int64_t)o << 28; t1 -= t;
+ o = t2 >> 28; t3 += o; t = (int64_t)o << 28; t2 -= t;
+ o = t3 >> 28; t4 += o; t = (int64_t)o << 28; t3 -= t;
+ o = t4 >> 28; t5 += o; t = (int64_t)o << 28; t4 -= t;
+ o = t5 >> 28; t6 += o; t = (int64_t)o << 28; t5 -= t;
+ o = t6 >> 28; t7 += o; t = (int64_t)o << 28; t6 -= t;
+ o = t7 >> 28; t8 += o; t = (int64_t)o << 28; t7 -= t;
+ o = t8 >> 28; t9 += o; t = (int64_t)o << 28; t8 -= t;
+ o = t9 >> 28; t10 += o; t = (int64_t)o << 28; t9 -= t;
+ o = t10 >> 28; t11 += o; t = (int64_t)o << 28; t10 -= t;
+ o = t11 >> 28; t12 += o; t = (int64_t)o << 28; t11 -= t;
+ o = t12 >> 28; t13 += o; t = (int64_t)o << 28; t12 -= t;
+ o = t13 >> 28; t14 += o; t = (int64_t)o << 28; t13 -= t;
+ o = t14 >> 28; t15 += o; t = (int64_t)o << 28; t14 -= t;
+ o = t15 >> 28; t0 += o;
+ t8 += o; t = (int64_t)o << 28; t15 -= t;
+
+ /* Store */
+ r[0] = t0;
+ r[1] = t1;
+ r[2] = t2;
+ r[3] = t3;
+ r[4] = t4;
+ r[5] = t5;
+ r[6] = t6;
+ r[7] = t7;
+ r[8] = t8;
+ r[9] = t9;
+ r[10] = t10;
+ r[11] = t11;
+ r[12] = t12;
+ r[13] = t13;
+ r[14] = t14;
+ r[15] = t15;
+}
+
+/* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
+ *
+ * r [in] Field element to hold result.
+ * a [in] Field element to multiply.
+ * b [in] Field element to multiply.
+ */
+void fe448_mul(int32_t* r, const int32_t* a, const int32_t* b)
+{
+ int32_t r0[16];
+ int32_t r1[16];
+ int32_t* a1 = r1;
+ int32_t b1[8];
+ int32_t r2[16];
+ a1[0] = a[0] + a[8];
+ a1[1] = a[1] + a[9];
+ a1[2] = a[2] + a[10];
+ a1[3] = a[3] + a[11];
+ a1[4] = a[4] + a[12];
+ a1[5] = a[5] + a[13];
+ a1[6] = a[6] + a[14];
+ a1[7] = a[7] + a[15];
+ b1[0] = b[0] + b[8];
+ b1[1] = b[1] + b[9];
+ b1[2] = b[2] + b[10];
+ b1[3] = b[3] + b[11];
+ b1[4] = b[4] + b[12];
+ b1[5] = b[5] + b[13];
+ b1[6] = b[6] + b[14];
+ b1[7] = b[7] + b[15];
+ fe448_mul_8(r2, a + 8, b + 8);
+ fe448_mul_8(r0, a, b);
+ fe448_mul_8(r1, a1, b1);
+ r[ 0] = r0[ 0] + r2[ 0] + r1[ 8] - r0[ 8];
+ r[ 1] = r0[ 1] + r2[ 1] + r1[ 9] - r0[ 9];
+ r[ 2] = r0[ 2] + r2[ 2] + r1[10] - r0[10];
+ r[ 3] = r0[ 3] + r2[ 3] + r1[11] - r0[11];
+ r[ 4] = r0[ 4] + r2[ 4] + r1[12] - r0[12];
+ r[ 5] = r0[ 5] + r2[ 5] + r1[13] - r0[13];
+ r[ 6] = r0[ 6] + r2[ 6] + r1[14] - r0[14];
+ r[ 7] = r0[ 7] + r2[ 7] + r1[15] - r0[15];
+ r[ 8] = r2[ 8] + r1[ 0] - r0[ 0] + r1[ 8];
+ r[ 9] = r2[ 9] + r1[ 1] - r0[ 1] + r1[ 9];
+ r[10] = r2[10] + r1[ 2] - r0[ 2] + r1[10];
+ r[11] = r2[11] + r1[ 3] - r0[ 3] + r1[11];
+ r[12] = r2[12] + r1[ 4] - r0[ 4] + r1[12];
+ r[13] = r2[13] + r1[ 5] - r0[ 5] + r1[13];
+ r[14] = r2[14] + r1[ 6] - r0[ 6] + r1[14];
+ r[15] = r2[15] + r1[ 7] - r0[ 7] + r1[15];
+}
+
+/* Square a field element. r = a * a
+ *
+ * r [in] Field element to hold result.
+ * a [in] Field element to square.
+ */
+static WC_INLINE void fe448_sqr_8(int32_t* r, const int32_t* a)
+{
+ int64_t t;
+ int64_t t0 = (int64_t)a[ 0] * a[ 0];
+ int64_t t1 = 2 * (int64_t)a[ 0] * a[ 1];
+ int64_t t2 = 2 * (int64_t)a[ 0] * a[ 2];
+ int64_t t102 = (int64_t)a[ 1] * a[ 1];
+ int64_t t3 = 2 * (int64_t)a[ 0] * a[ 3];
+ int64_t t103 = 2 * (int64_t)a[ 1] * a[ 2];
+ int64_t t4 = 2 * (int64_t)a[ 0] * a[ 4];
+ int64_t t104 = 2 * (int64_t)a[ 1] * a[ 3];
+ int64_t t204 = (int64_t)a[ 2] * a[ 2];
+ int64_t t5 = 2 * (int64_t)a[ 0] * a[ 5];
+ int64_t t105 = 2 * (int64_t)a[ 1] * a[ 4];
+ int64_t t205 = 2 * (int64_t)a[ 2] * a[ 3];
+ int64_t t6 = 2 * (int64_t)a[ 0] * a[ 6];
+ int64_t t106 = 2 * (int64_t)a[ 1] * a[ 5];
+ int64_t t206 = 2 * (int64_t)a[ 2] * a[ 4];
+ int64_t t306 = (int64_t)a[ 3] * a[ 3];
+ int64_t t7 = 2 * (int64_t)a[ 0] * a[ 7];
+ int64_t t107 = 2 * (int64_t)a[ 1] * a[ 6];
+ int64_t t207 = 2 * (int64_t)a[ 2] * a[ 5];
+ int64_t t307 = 2 * (int64_t)a[ 3] * a[ 4];
+ int64_t t8 = 2 * (int64_t)a[ 1] * a[ 7];
+ int64_t t108 = 2 * (int64_t)a[ 2] * a[ 6];
+ int64_t t208 = 2 * (int64_t)a[ 3] * a[ 5];
+ int64_t t308 = (int64_t)a[ 4] * a[ 4];
+ int64_t t9 = 2 * (int64_t)a[ 2] * a[ 7];
+ int64_t t109 = 2 * (int64_t)a[ 3] * a[ 6];
+ int64_t t209 = 2 * (int64_t)a[ 4] * a[ 5];
+ int64_t t10 = 2 * (int64_t)a[ 3] * a[ 7];
+ int64_t t110 = 2 * (int64_t)a[ 4] * a[ 6];
+ int64_t t210 = (int64_t)a[ 5] * a[ 5];
+ int64_t t11 = 2 * (int64_t)a[ 4] * a[ 7];
+ int64_t t111 = 2 * (int64_t)a[ 5] * a[ 6];
+ int64_t t12 = 2 * (int64_t)a[ 5] * a[ 7];
+ int64_t t112 = (int64_t)a[ 6] * a[ 6];
+ int64_t t13 = 2 * (int64_t)a[ 6] * a[ 7];
+ int64_t t14 = (int64_t)a[ 7] * a[ 7];
+ t2 += t102;
+ t3 += t103;
+ t4 += t104; t4 += t204;
+ t5 += t105; t5 += t205;
+ t6 += t106; t6 += t206; t6 += t306;
+ t7 += t107; t7 += t207; t7 += t307;
+ t8 += t108; t8 += t208; t8 += t308;
+ t9 += t109; t9 += t209;
+ t10 += t110; t10 += t210;
+ t11 += t111;
+ t12 += t112;
+ int64_t o = t14 >> 28;
+ int64_t t15 = o;
+ t14 -= o << 28;
+ o = t0 >> 28; t1 += o; t = (int64_t)o << 28; t0 -= t;
+ o = t1 >> 28; t2 += o; t = (int64_t)o << 28; t1 -= t;
+ o = t2 >> 28; t3 += o; t = (int64_t)o << 28; t2 -= t;
+ o = t3 >> 28; t4 += o; t = (int64_t)o << 28; t3 -= t;
+ o = t4 >> 28; t5 += o; t = (int64_t)o << 28; t4 -= t;
+ o = t5 >> 28; t6 += o; t = (int64_t)o << 28; t5 -= t;
+ o = t6 >> 28; t7 += o; t = (int64_t)o << 28; t6 -= t;
+ o = t7 >> 28; t8 += o; t = (int64_t)o << 28; t7 -= t;
+ o = t8 >> 28; t9 += o; t = (int64_t)o << 28; t8 -= t;
+ o = t9 >> 28; t10 += o; t = (int64_t)o << 28; t9 -= t;
+ o = t10 >> 28; t11 += o; t = (int64_t)o << 28; t10 -= t;
+ o = t11 >> 28; t12 += o; t = (int64_t)o << 28; t11 -= t;
+ o = t12 >> 28; t13 += o; t = (int64_t)o << 28; t12 -= t;
+ o = t13 >> 28; t14 += o; t = (int64_t)o << 28; t13 -= t;
+ o = t14 >> 28; t15 += o; t = (int64_t)o << 28; t14 -= t;
+ o = t15 >> 28; t0 += o;
+ t8 += o; t = (int64_t)o << 28; t15 -= t;
+
+ /* Store */
+ r[0] = t0;
+ r[1] = t1;
+ r[2] = t2;
+ r[3] = t3;
+ r[4] = t4;
+ r[5] = t5;
+ r[6] = t6;
+ r[7] = t7;
+ r[8] = t8;
+ r[9] = t9;
+ r[10] = t10;
+ r[11] = t11;
+ r[12] = t12;
+ r[13] = t13;
+ r[14] = t14;
+ r[15] = t15;
+}
+
+/* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
+ *
+ * r [in] Field element to hold result.
+ * a [in] Field element to square.
+ */
+void fe448_sqr(int32_t* r, const int32_t* a)
+{
+ int32_t r0[16];
+ int32_t r1[16];
+ int32_t* a1 = r1;
+ int32_t r2[16];
+ a1[0] = a[0] + a[8];
+ a1[1] = a[1] + a[9];
+ a1[2] = a[2] + a[10];
+ a1[3] = a[3] + a[11];
+ a1[4] = a[4] + a[12];
+ a1[5] = a[5] + a[13];
+ a1[6] = a[6] + a[14];
+ a1[7] = a[7] + a[15];
+ fe448_sqr_8(r2, a + 8);
+ fe448_sqr_8(r0, a);
+ fe448_sqr_8(r1, a1);
+ r[ 0] = r0[ 0] + r2[ 0] + r1[ 8] - r0[ 8];
+ r[ 1] = r0[ 1] + r2[ 1] + r1[ 9] - r0[ 9];
+ r[ 2] = r0[ 2] + r2[ 2] + r1[10] - r0[10];
+ r[ 3] = r0[ 3] + r2[ 3] + r1[11] - r0[11];
+ r[ 4] = r0[ 4] + r2[ 4] + r1[12] - r0[12];
+ r[ 5] = r0[ 5] + r2[ 5] + r1[13] - r0[13];
+ r[ 6] = r0[ 6] + r2[ 6] + r1[14] - r0[14];
+ r[ 7] = r0[ 7] + r2[ 7] + r1[15] - r0[15];
+ r[ 8] = r2[ 8] + r1[ 0] - r0[ 0] + r1[ 8];
+ r[ 9] = r2[ 9] + r1[ 1] - r0[ 1] + r1[ 9];
+ r[10] = r2[10] + r1[ 2] - r0[ 2] + r1[10];
+ r[11] = r2[11] + r1[ 3] - r0[ 3] + r1[11];
+ r[12] = r2[12] + r1[ 4] - r0[ 4] + r1[12];
+ r[13] = r2[13] + r1[ 5] - r0[ 5] + r1[13];
+ r[14] = r2[14] + r1[ 6] - r0[ 6] + r1[14];
+ r[15] = r2[15] + r1[ 7] - r0[ 7] + r1[15];
+}
+
+/* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
+ * Constant time implementation - using Fermat's little theorem:
+ * a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
+ * For curve448: p - 2 = 2^448 - 2^224 - 3
+ *
+ * r [in] Field element to hold result.
+ * a [in] Field element to invert.
+ */
+void fe448_invert(int32_t* r, const int32_t* a)
+{
+ int32_t t1[16];
+ int32_t t2[16];
+ int32_t t3[16];
+ int32_t t4[16];
+ int i;
+
+ fe448_sqr(t1, a);
+ /* t1 = 2 */
+ fe448_mul(t1, t1, a);
+ /* t1 = 3 */
+ fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
+ /* t2 = c */
+ fe448_mul(t3, t2, a);
+ /* t3 = d */
+ fe448_mul(t1, t2, t1);
+ /* t1 = f */
+ fe448_sqr(t2, t1);
+ /* t2 = 1e */
+ fe448_mul(t4, t2, a);
+ /* t4 = 1f */
+ fe448_sqr(t2, t4); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
+ /* t2 = 3e0 */
+ fe448_mul(t1, t2, t4);
+ /* t1 = 3ff */
+ fe448_sqr(t2, t1); for (i = 1; i < 10; ++i) fe448_sqr(t2, t2);
+ /* t2 = ffc00 */
+ fe448_mul(t1, t2, t1);
+ /* t1 = fffff */
+ fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
+ /* t2 = 1ffffe0 */
+ fe448_mul(t1, t2, t4);
+ /* t1 = 1ffffff */
+ fe448_sqr(t2, t1); for (i = 1; i < 25; ++i) fe448_sqr(t2, t2);
+ /* t2 = 3fffffe000000 */
+ fe448_mul(t1, t2, t1);
+ /* t1 = 3ffffffffffff */
+ fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
+ /* t2 = 7fffffffffffe0 */
+ fe448_mul(t1, t2, t4);
+ /* t1 = 7fffffffffffff */
+ fe448_sqr(t2, t1); for (i = 1; i < 55; ++i) fe448_sqr(t2, t2);
+ /* t2 = 3fffffffffffff80000000000000 */
+ fe448_mul(t1, t2, t1);
+ /* t1 = 3fffffffffffffffffffffffffff */
+ fe448_sqr(t2, t1); for (i = 1; i < 110; ++i) fe448_sqr(t2, t2);
+ /* t2 = fffffffffffffffffffffffffffc000000000000000000000000000 */
+ fe448_mul(t1, t2, t1);
+ /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
+ /* t2 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff0 */
+ fe448_mul(t3, t3, t2);
+ /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
+ fe448_mul(t1, t3, a);
+ /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
+ fe448_sqr(t1, t1); for (i = 1; i < 224; ++i) fe448_sqr(t1, t1);
+ /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000000000000000000000000000000000000000000000000000 */
+ fe448_mul(r, t3, t1);
+ /* r = fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
+}
+
+/* Scalar multiply the point by a number. r = n.a
+ * Uses Montogmery ladder and only requires the x-ordinate.
+ *
+ * r [in] Field element to hold result.
+ * n [in] Scalar as an array of bytes.
+ * a [in] Point to multiply - x-ordinate only.
+ */
+int curve448(byte* r, const byte* n, const byte* a)
+{
+ int32_t x1[16];
+ int32_t x2[16];
+ int32_t z2[16];
+ int32_t x3[16];
+ int32_t z3[16];
+ int32_t t0[16];
+ int32_t t1[16];
+ int i;
+ unsigned int swap;
+ unsigned int b;
+
+ fe448_from_bytes(x1, a);
+ fe448_1(x2);
+ fe448_0(z2);
+ fe448_copy(x3, x1);
+ fe448_1(z3);
+
+ swap = 0;
+ for (i = 447; i >= 0; --i) {
+ b = (n[i >> 3] >> (i & 7)) & 1;
+ swap ^= b;
+ fe448_cswap(x2, x3, swap);
+ fe448_cswap(z2, z3, swap);
+ swap = b;
+
+ /* Montgomery Ladder - double and add */
+ fe448_add(t0, x2, z2);
+ fe448_reduce(t0);
+ fe448_add(t1, x3, z3);
+ fe448_reduce(t1);
+ fe448_sub(x2, x2, z2);
+ fe448_sub(x3, x3, z3);
+ fe448_mul(t1, t1, x2);
+ fe448_mul(z3, x3, t0);
+ fe448_sqr(t0, t0);
+ fe448_sqr(x2, x2);
+ fe448_add(x3, z3, t1);
+ fe448_reduce(x3);
+ fe448_sqr(x3, x3);
+ fe448_sub(z3, z3, t1);
+ fe448_sqr(z3, z3);
+ fe448_mul(z3, z3, x1);
+ fe448_sub(t1, t0, x2);
+ fe448_mul(x2, t0, x2);
+ fe448_mul39081(z2, t1);
+ fe448_add(z2, t0, z2);
+ fe448_mul(z2, z2, t1);
+ }
+ /* Last two bits are 0 - no final swap check required. */
+
+ fe448_invert(z2, z2);
+ fe448_mul(x2, x2, z2);
+ fe448_to_bytes(r, x2);
+
+ return 0;
+}
+
+#ifdef HAVE_ED448
+/* Check whether field element is not 0.
+ * Must convert to a normalized form before checking.
+ *
+ * a [in] Field element.
+ * returns 0 when zero, and any other value otherwise.
+ */
+int fe448_isnonzero(const int32_t* a)
+{
+ uint8_t b[56];
+ int i;
+ uint8_t c = 0;
+ fe448_to_bytes(b, a);
+ for (i = 0; i < 56; i++)
+ c |= b[i];
+ return c;
+}
+
+/* Check whether field element is negative.
+ * Must convert to a normalized form before checking.
+ *
+ * a [in] Field element.
+ * returns 1 when negative, and 0 otherwise.
+ */
+int fe448_isnegative(const int32_t* a)
+{
+ uint8_t b[56];
+ fe448_to_bytes(b, a);
+ return b[0] & 1;
+}
+
+/* Negates the field element. r = -a
+ *
+ * r [in] Field element to hold result.
+ * a [in] Field element.
+ */
+void fe448_neg(int32_t* r, const int32_t* a)
+{
+ r[0] = -a[0];
+ r[1] = -a[1];
+ r[2] = -a[2];
+ r[3] = -a[3];
+ r[4] = -a[4];
+ r[5] = -a[5];
+ r[6] = -a[6];
+ r[7] = -a[7];
+ r[8] = -a[8];
+ r[9] = -a[9];
+ r[10] = -a[10];
+ r[11] = -a[11];
+ r[12] = -a[12];
+ r[13] = -a[13];
+ r[14] = -a[14];
+ r[15] = -a[15];
+}
+
+/* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
+ * Used for calcualting y-ordinate from x-ordinate for Ed448.
+ *
+ * r [in] Field element to hold result.
+ * a [in] Field element to exponentiate.
+ */
+void fe448_pow_2_446_222_1(int32_t* r, const int32_t* a)
+{
+ int32_t t1[16];
+ int32_t t2[16];
+ int32_t t3[16];
+ int32_t t4[16];
+ int32_t t5[16];
+ int i;
+
+ fe448_sqr(t3, a);
+ /* t3 = 2 */
+ fe448_mul(t1, t3, a);
+ /* t1 = 3 */
+ fe448_sqr(t5, t1);
+ /* t5 = 6 */
+ fe448_mul(t5, t5, a);
+ /* t5 = 7 */
+ fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
+ /* t2 = c */
+ fe448_mul(t3, t2, t3);
+ /* t3 = e */
+ fe448_mul(t1, t2, t1);
+ /* t1 = f */
+ fe448_sqr(t2, t1); for (i = 1; i < 3; ++i) fe448_sqr(t2, t2);
+ /* t2 = 78 */
+ fe448_mul(t5, t2, t5);
+ /* t5 = 7f */
+ fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
+ /* t2 = f0 */
+ fe448_mul(t1, t2, t1);
+ /* t1 = ff */
+ fe448_mul(t3, t3, t2);
+ /* t3 = fe */
+ fe448_sqr(t2, t1); for (i = 1; i < 7; ++i) fe448_sqr(t2, t2);
+ /* t2 = 7f80 */
+ fe448_mul(t5, t2, t5);
+ /* t5 = 7fff */
+ fe448_sqr(t2, t1); for (i = 1; i < 8; ++i) fe448_sqr(t2, t2);
+ /* t2 = ff00 */
+ fe448_mul(t1, t2, t1);
+ /* t1 = ffff */
+ fe448_mul(t3, t3, t2);
+ /* t3 = fffe */
+ fe448_sqr(t2, t5); for (i = 1; i < 15; ++i) fe448_sqr(t2, t2);
+ /* t2 = 3fff8000 */
+ fe448_mul(t5, t2, t5);
+ /* t5 = 3fffffff */
+ fe448_sqr(t2, t1); for (i = 1; i < 16; ++i) fe448_sqr(t2, t2);
+ /* t2 = ffff0000 */
+ fe448_mul(t1, t2, t1);
+ /* t1 = ffffffff */
+ fe448_mul(t3, t3, t2);
+ /* t3 = fffffffe */
+ fe448_sqr(t2, t1); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
+ /* t2 = ffffffff00000000 */
+ fe448_mul(t2, t2, t1);
+ /* t2 = ffffffffffffffff */
+ fe448_sqr(t1, t2); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
+ /* t1 = ffffffffffffffff0000000000000000 */
+ fe448_mul(t1, t1, t2);
+ /* t1 = ffffffffffffffffffffffffffffffff */
+ fe448_sqr(t1, t1); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
+ /* t1 = ffffffffffffffffffffffffffffffff0000000000000000 */
+ fe448_mul(t4, t1, t2);
+ /* t4 = ffffffffffffffffffffffffffffffffffffffffffffffff */
+ fe448_sqr(t2, t4); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
+ /* t2 = ffffffffffffffffffffffffffffffffffffffffffffffff00000000 */
+ fe448_mul(t3, t3, t2);
+ /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
+ fe448_sqr(t1, t3); for (i = 1; i < 192; ++i) fe448_sqr(t1, t1);
+ /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe000000000000000000000000000000000000000000000000 */
+ fe448_mul(t1, t1, t4);
+ /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffff */
+ fe448_sqr(t1, t1); for (i = 1; i < 30; ++i) fe448_sqr(t1, t1);
+ /* t1 = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffc0000000 */
+ fe448_mul(r, t5, t1);
+ /* r = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+}
+
+/* Constant time, conditional move of b into a.
+ * a is not changed if the condition is 0.
+ *
+ * a A field element.
+ * b A field element.
+ * c If 1 then copy and if 0 then don't copy.
+ */
+void fe448_cmov(int32_t* a, const int32_t* b, int c)
+{
+ int32_t m = -(int32_t)c;
+ int32_t t0 = m & (a[0] ^ b[0]);
+ int32_t t1 = m & (a[1] ^ b[1]);
+ int32_t t2 = m & (a[2] ^ b[2]);
+ int32_t t3 = m & (a[3] ^ b[3]);
+ int32_t t4 = m & (a[4] ^ b[4]);
+ int32_t t5 = m & (a[5] ^ b[5]);
+ int32_t t6 = m & (a[6] ^ b[6]);
+ int32_t t7 = m & (a[7] ^ b[7]);
+ int32_t t8 = m & (a[8] ^ b[8]);
+ int32_t t9 = m & (a[9] ^ b[9]);
+ int32_t t10 = m & (a[10] ^ b[10]);
+ int32_t t11 = m & (a[11] ^ b[11]);
+ int32_t t12 = m & (a[12] ^ b[12]);
+ int32_t t13 = m & (a[13] ^ b[13]);
+ int32_t t14 = m & (a[14] ^ b[14]);
+ int32_t t15 = m & (a[15] ^ b[15]);
+
+ a[0] ^= t0;
+ a[1] ^= t1;
+ a[2] ^= t2;
+ a[3] ^= t3;
+ a[4] ^= t4;
+ a[5] ^= t5;
+ a[6] ^= t6;
+ a[7] ^= t7;
+ a[8] ^= t8;
+ a[9] ^= t9;
+ a[10] ^= t10;
+ a[11] ^= t11;
+ a[12] ^= t12;
+ a[13] ^= t13;
+ a[14] ^= t14;
+ a[15] ^= t15;
+}
+
+#endif /* HAVE_ED448 */
+#endif
+
+#endif /* HAVE_CURVE448 || HAVE_ED448 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_low_mem.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_low_mem.c
index 80834f54c..13c88cbb4 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_low_mem.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_low_mem.c
@@ -1,8 +1,8 @@
/* fe_low_mem.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,10 +16,11 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
-/* Based from Daniel Beer's public domain word. */
+
+/* Based from Daniel Beer's public domain work. */
#ifdef HAVE_CONFIG_H
#include <config.h>
@@ -27,17 +28,18 @@
#include <wolfssl/wolfcrypt/settings.h>
-#if defined(HAVE_ED25519) || defined(HAVE_CURVE25519)
+#if defined(HAVE_CURVE25519) || defined(HAVE_ED25519)
+#if defined(CURVE25519_SMALL) || defined(ED25519_SMALL) /* use slower code that takes less memory */
#include <wolfssl/wolfcrypt/fe_operations.h>
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
-
void fprime_copy(byte *x, const byte *a)
{
int i;
@@ -46,13 +48,24 @@ void fprime_copy(byte *x, const byte *a)
}
-void fe_copy(fe x, const fe a)
+void lm_copy(byte* x, const byte* a)
{
int i;
for (i = 0; i < F25519_SIZE; i++)
x[i] = a[i];
}
+#if ((defined(HAVE_CURVE25519) && !defined(CURVE25519_SMALL)) || \
+ (defined(HAVE_ED25519) && !defined(ED25519_SMALL))) && \
+ !defined(FREESCALE_LTC_ECC)
+ /* to be Complementary to fe_low_mem.c */
+#else
+void fe_init(void)
+{
+}
+#endif
+
+#ifdef CURVE25519_SMALL
/* Double an X-coordinate */
static void xc_double(byte *x3, byte *z3,
@@ -74,12 +87,12 @@ static void xc_double(byte *x3, byte *z3,
fe_mul__distinct(z1sq, z1, z1);
fe_mul__distinct(x1z1, x1, z1);
- fe_sub(a, x1sq, z1sq);
+ lm_sub(a, x1sq, z1sq);
fe_mul__distinct(x3, a, a);
fe_mul_c(a, x1z1, 486662);
- fe_add(a, x1sq, a);
- fe_add(a, z1sq, a);
+ lm_add(a, x1sq, a);
+ lm_add(a, z1sq, a);
fe_mul__distinct(x1sq, x1z1, a);
fe_mul_c(z3, x1sq, 4);
}
@@ -110,24 +123,24 @@ static void xc_diffadd(byte *x5, byte *z5,
byte a[F25519_SIZE];
byte b[F25519_SIZE];
- fe_add(a, x2, z2);
- fe_sub(b, x3, z3); /* D */
+ lm_add(a, x2, z2);
+ lm_sub(b, x3, z3); /* D */
fe_mul__distinct(da, a, b);
- fe_sub(b, x2, z2);
- fe_add(a, x3, z3); /* C */
+ lm_sub(b, x2, z2);
+ lm_add(a, x3, z3); /* C */
fe_mul__distinct(cb, a, b);
- fe_add(a, da, cb);
+ lm_add(a, da, cb);
fe_mul__distinct(b, a, a);
fe_mul__distinct(x5, z1, b);
- fe_sub(a, da, cb);
+ lm_sub(a, da, cb);
fe_mul__distinct(b, a, a);
fe_mul__distinct(z5, x1, b);
}
-
+#ifndef FREESCALE_LTC_ECC
int curve25519(byte *result, byte *e, byte *q)
{
/* Current point: P_m */
@@ -141,7 +154,7 @@ int curve25519(byte *result, byte *e, byte *q)
int i;
/* Note: bit 254 is assumed to be 1 */
- fe_copy(xm, q);
+ lm_copy(xm, q);
for (i = 253; i >= 0; i--) {
const int bit = (e[i >> 3] >> (i & 7)) & 1;
@@ -171,6 +184,8 @@ int curve25519(byte *result, byte *e, byte *q)
fe_normalize(result);
return 0;
}
+#endif /* !FREESCALE_LTC_ECC */
+#endif /* CURVE25519_SMALL */
static void raw_add(byte *x, const byte *p)
@@ -180,7 +195,7 @@ static void raw_add(byte *x, const byte *p)
for (i = 0; i < F25519_SIZE; i++) {
c += ((word16)x[i]) + ((word16)p[i]);
- x[i] = c;
+ x[i] = (byte)c;
c >>= 8;
}
}
@@ -194,11 +209,11 @@ static void raw_try_sub(byte *x, const byte *p)
for (i = 0; i < F25519_SIZE; i++) {
c = ((word16)x[i]) - ((word16)p[i]) - c;
- minusp[i] = c;
+ minusp[i] = (byte)c;
c = (c >> 8) & 1;
}
- fprime_select(x, minusp, x, c);
+ fprime_select(x, minusp, x, (byte)c);
}
@@ -211,7 +226,7 @@ static int prime_msb(const byte *p)
/*
Test for any hot bits.
- As soon as one instance is incountered set shift to 0.
+ As soon as one instance is encountered set shift to 0.
*/
for (i = F25519_SIZE - 1; i >= 0; i--) {
shift &= ((shift ^ ((-p[i] | p[i]) >> 7)) & 1);
@@ -268,7 +283,7 @@ void fprime_mul(byte *r, const byte *a, const byte *b,
for (j = 0; j < F25519_SIZE; j++) {
c |= ((word16)r[j]) << 1;
- r[j] = c;
+ r[j] = (byte)c;
c >>= 8;
}
raw_try_sub(r, modulus);
@@ -307,7 +322,7 @@ void fe_normalize(byte *x)
for (i = 0; i < F25519_SIZE; i++) {
c += x[i];
- x[i] = c;
+ x[i] = (byte)c;
c >>= 8;
}
@@ -319,12 +334,12 @@ void fe_normalize(byte *x)
for (i = 0; i + 1 < F25519_SIZE; i++) {
c += x[i];
- minusp[i] = c;
+ minusp[i] = (byte)c;
c >>= 8;
}
c += ((word16)x[i]) - 128;
- minusp[31] = c;
+ minusp[31] = (byte)c;
/* Load x-p if no underflow */
fe_select(x, minusp, x, (c >> 15) & 1);
@@ -343,7 +358,7 @@ void fe_select(byte *dst,
}
-void fe_add(fe r, const fe a, const fe b)
+void lm_add(byte* r, const byte* a, const byte* b)
{
word16 c = 0;
int i;
@@ -352,7 +367,7 @@ void fe_add(fe r, const fe a, const fe b)
for (i = 0; i < F25519_SIZE; i++) {
c >>= 8;
c += ((word16)a[i]) + ((word16)b[i]);
- r[i] = c;
+ r[i] = (byte)c;
}
/* Reduce with 2^255 = 19 mod p */
@@ -361,13 +376,13 @@ void fe_add(fe r, const fe a, const fe b)
for (i = 0; i < F25519_SIZE; i++) {
c += r[i];
- r[i] = c;
+ r[i] = (byte)c;
c >>= 8;
}
}
-void fe_sub(fe r, const fe a, const fe b)
+void lm_sub(byte* r, const byte* a, const byte* b)
{
word32 c = 0;
int i;
@@ -392,7 +407,7 @@ void fe_sub(fe r, const fe a, const fe b)
}
-void fe_neg(fe r, const fe a)
+void lm_neg(byte* r, const byte* a)
{
word32 c = 0;
int i;
@@ -447,12 +462,12 @@ void fe_mul__distinct(byte *r, const byte *a, const byte *b)
}
-void fe_mul(fe r, const fe a, const fe b)
+void lm_mul(byte *r, const byte* a, const byte *b)
{
byte tmp[F25519_SIZE];
fe_mul__distinct(tmp, a, b);
- fe_copy(r, tmp);
+ lm_copy(r, tmp);
}
@@ -530,12 +545,12 @@ void fe_inv__distinct(byte *r, const byte *x)
}
-void fe_invert(fe r, const fe x)
+void lm_invert(byte *r, const byte *x)
{
byte tmp[F25519_SIZE];
fe_inv__distinct(tmp, x);
- fe_copy(r, tmp);
+ lm_copy(r, tmp);
}
@@ -585,12 +600,12 @@ void fe_sqrt(byte *r, const byte *a)
fe_mul__distinct(y, v, v);
fe_mul__distinct(i, x, y);
fe_load(y, 1);
- fe_sub(i, i, y);
+ lm_sub(i, i, y);
/* r = avi */
fe_mul__distinct(x, v, a);
fe_mul__distinct(r, x, i);
}
-#endif /* HAVE_CURVE25519 or HAVE_ED25519 */
-
+#endif /* CURVE25519_SMALL || ED25519_SMALL */
+#endif /* HAVE_CURVE25519 || HAVE_ED25519 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_operations.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_operations.c
index da07c951c..1e1c92bf2 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_operations.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_operations.c
@@ -1,8 +1,8 @@
/* fe_operations.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
/* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work. */
#ifdef HAVE_CONFIG_H
@@ -27,7 +28,8 @@
#include <wolfssl/wolfcrypt/settings.h>
-#if defined(HAVE_ED25519) || defined(HAVE_CURVE25519)
+#if defined(HAVE_CURVE25519) || defined(HAVE_ED25519)
+#if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL) /* run when not defined to use small memory math */
#include <wolfssl/wolfcrypt/fe_operations.h>
#include <stdint.h>
@@ -35,9 +37,20 @@
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
+#ifdef CURVED25519_X64
+/* Assembly code in fe_x25519_asm.* */
+#elif defined(WOLFSSL_ARMASM)
+/* Assembly code in fe_armv[78]_x25519.* */
+#elif defined(CURVED25519_128BIT)
+#include "fe_x25519_128.i"
+#else
+
+#if defined(HAVE_CURVE25519) || \
+ (defined(HAVE_ED25519) && !defined(ED25519_SMALL))
/*
fe means field element.
Here the field is \Z/(2^255-19).
@@ -65,7 +78,7 @@ uint64_t load_4(const unsigned char *in)
result |= ((uint64_t) in[3]) << 24;
return result;
}
-
+#endif
/*
h = 1
@@ -105,25 +118,43 @@ void fe_0(fe h)
}
+#if ((defined(HAVE_CURVE25519) && !defined(CURVE25519_SMALL)) || \
+ (defined(HAVE_ED25519) && !defined(ED25519_SMALL))) && \
+ !defined(FREESCALE_LTC_ECC)
+/* to be Complementary to fe_low_mem.c */
+void fe_init(void)
+{
+}
+#endif
+
+#if defined(HAVE_CURVE25519) && !defined(CURVE25519_SMALL) && \
+ !defined(FREESCALE_LTC_ECC)
int curve25519(byte* q, byte* n, byte* p)
{
+#if 0
unsigned char e[32];
- unsigned int i;
- fe x1;
- fe x2;
- fe z2;
- fe x3;
- fe z3;
- fe tmp0;
- fe tmp1;
- int pos;
- unsigned int swap;
- unsigned int b;
-
- for (i = 0;i < 32;++i) e[i] = n[i];
- e[0] &= 248;
- e[31] &= 127;
- e[31] |= 64;
+#endif
+ fe x1 = {0};
+ fe x2 = {0};
+ fe z2 = {0};
+ fe x3 = {0};
+ fe z3 = {0};
+ fe tmp0 = {0};
+ fe tmp1 = {0};
+ int pos = 0;
+ unsigned int swap = 0;
+ unsigned int b = 0;
+
+ /* Clamp already done during key generation and import */
+#if 0
+ {
+ unsigned int i;
+ for (i = 0;i < 32;++i) e[i] = n[i];
+ e[0] &= 248;
+ e[31] &= 127;
+ e[31] |= 64;
+ }
+#endif
fe_frombytes(x1,p);
fe_1(x2);
@@ -133,7 +164,11 @@ int curve25519(byte* q, byte* n, byte* p)
swap = 0;
for (pos = 254;pos >= 0;--pos) {
+#if 0
b = e[pos / 8] >> (pos & 7);
+#else
+ b = n[pos / 8] >> (pos & 7);
+#endif
b &= 1;
swap ^= b;
fe_cswap(x2,x3,swap);
@@ -169,6 +204,7 @@ int curve25519(byte* q, byte* n, byte* p)
return 0;
}
+#endif /* HAVE_CURVE25519 && !CURVE25519_SMALL && !FREESCALE_LTC_ECC */
/*
@@ -287,24 +323,24 @@ void fe_sq(fe h,const fe f)
int64_t carry8;
int64_t carry9;
- carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
- carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+ carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+ carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
- carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
- carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
+ carry1 = (h1 + (int64_t) (1UL<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
+ carry5 = (h5 + (int64_t) (1UL<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
- carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
- carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
+ carry2 = (h2 + (int64_t) (1UL<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
+ carry6 = (h6 + (int64_t) (1UL<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
- carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
- carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
+ carry3 = (h3 + (int64_t) (1UL<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
+ carry7 = (h7 + (int64_t) (1UL<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
- carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
- carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
+ carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+ carry8 = (h8 + (int64_t) (1UL<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
- carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
+ carry9 = (h9 + (int64_t) (1UL<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
- carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+ carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
h[0] = (int32_t)h0;
h[1] = (int32_t)h1;
@@ -460,38 +496,38 @@ void fe_tobytes(unsigned char *s,const fe h)
Goal: Output h0+...+2^230 h9.
*/
- s[0] = h0 >> 0;
- s[1] = h0 >> 8;
- s[2] = h0 >> 16;
- s[3] = (h0 >> 24) | (h1 << 2);
- s[4] = h1 >> 6;
- s[5] = h1 >> 14;
- s[6] = (h1 >> 22) | (h2 << 3);
- s[7] = h2 >> 5;
- s[8] = h2 >> 13;
- s[9] = (h2 >> 21) | (h3 << 5);
- s[10] = h3 >> 3;
- s[11] = h3 >> 11;
- s[12] = (h3 >> 19) | (h4 << 6);
- s[13] = h4 >> 2;
- s[14] = h4 >> 10;
- s[15] = h4 >> 18;
- s[16] = h5 >> 0;
- s[17] = h5 >> 8;
- s[18] = h5 >> 16;
- s[19] = (h5 >> 24) | (h6 << 1);
- s[20] = h6 >> 7;
- s[21] = h6 >> 15;
- s[22] = (h6 >> 23) | (h7 << 3);
- s[23] = h7 >> 5;
- s[24] = h7 >> 13;
- s[25] = (h7 >> 21) | (h8 << 4);
- s[26] = h8 >> 4;
- s[27] = h8 >> 12;
- s[28] = (h8 >> 20) | (h9 << 6);
- s[29] = h9 >> 2;
- s[30] = h9 >> 10;
- s[31] = h9 >> 18;
+ s[0] = (byte)(h0 >> 0);
+ s[1] = (byte)(h0 >> 8);
+ s[2] = (byte)(h0 >> 16);
+ s[3] = (byte)((h0 >> 24) | (h1 << 2));
+ s[4] = (byte)(h1 >> 6);
+ s[5] = (byte)(h1 >> 14);
+ s[6] = (byte)((h1 >> 22) | (h2 << 3));
+ s[7] = (byte)(h2 >> 5);
+ s[8] = (byte)(h2 >> 13);
+ s[9] = (byte)((h2 >> 21) | (h3 << 5));
+ s[10] = (byte)(h3 >> 3);
+ s[11] = (byte)(h3 >> 11);
+ s[12] = (byte)((h3 >> 19) | (h4 << 6));
+ s[13] = (byte)(h4 >> 2);
+ s[14] = (byte)(h4 >> 10);
+ s[15] = (byte)(h4 >> 18);
+ s[16] = (byte)(h5 >> 0);
+ s[17] = (byte)(h5 >> 8);
+ s[18] = (byte)(h5 >> 16);
+ s[19] = (byte)((h5 >> 24) | (h6 << 1));
+ s[20] = (byte)(h6 >> 7);
+ s[21] = (byte)(h6 >> 15);
+ s[22] = (byte)((h6 >> 23) | (h7 << 3));
+ s[23] = (byte)(h7 >> 5);
+ s[24] = (byte)(h7 >> 13);
+ s[25] = (byte)((h7 >> 21) | (h8 << 4));
+ s[26] = (byte)(h8 >> 4);
+ s[27] = (byte)(h8 >> 12);
+ s[28] = (byte)((h8 >> 20) | (h9 << 6));
+ s[29] = (byte)(h9 >> 2);
+ s[30] = (byte)(h9 >> 10);
+ s[31] = (byte)(h9 >> 18);
}
@@ -552,6 +588,8 @@ void fe_sub(fe h,const fe f,const fe g)
}
+#if defined(HAVE_CURVE25519) || \
+ (defined(HAVE_ED25519) && !defined(ED25519_SMALL))
/*
Ignores top bit of h.
*/
@@ -579,17 +617,17 @@ void fe_frombytes(fe h,const unsigned char *s)
int64_t carry8;
int64_t carry9;
- carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
- carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
- carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
- carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
- carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
+ carry9 = (h9 + (int64_t) (1UL<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
+ carry1 = (h1 + (int64_t) (1UL<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
+ carry3 = (h3 + (int64_t) (1UL<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
+ carry5 = (h5 + (int64_t) (1UL<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
+ carry7 = (h7 + (int64_t) (1UL<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
- carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
- carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
- carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
- carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
- carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
+ carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+ carry2 = (h2 + (int64_t) (1UL<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
+ carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+ carry6 = (h6 + (int64_t) (1UL<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
+ carry8 = (h8 + (int64_t) (1UL<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
h[0] = (int32_t)h0;
h[1] = (int32_t)h1;
@@ -602,15 +640,16 @@ void fe_frombytes(fe h,const unsigned char *s)
h[8] = (int32_t)h8;
h[9] = (int32_t)h9;
}
+#endif
void fe_invert(fe out,const fe z)
{
- fe t0;
- fe t1;
- fe t2;
- fe t3;
- int i;
+ fe t0 = {0};
+ fe t1 = {0};
+ fe t2 = {0};
+ fe t3 = {0};
+ int i = 0;
/* pow225521 */
fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0);
@@ -865,46 +904,46 @@ void fe_mul(fe h,const fe f,const fe g)
i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
*/
- carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
- carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+ carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+ carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
/* |h0| <= 2^25 */
/* |h4| <= 2^25 */
/* |h1| <= 1.71*2^59 */
/* |h5| <= 1.71*2^59 */
- carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
- carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
+ carry1 = (h1 + (int64_t) (1UL<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
+ carry5 = (h5 + (int64_t) (1UL<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
/* |h1| <= 2^24; from now on fits into int32 */
/* |h5| <= 2^24; from now on fits into int32 */
/* |h2| <= 1.41*2^60 */
/* |h6| <= 1.41*2^60 */
- carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
- carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
+ carry2 = (h2 + (int64_t) (1UL<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
+ carry6 = (h6 + (int64_t) (1UL<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
/* |h2| <= 2^25; from now on fits into int32 unchanged */
/* |h6| <= 2^25; from now on fits into int32 unchanged */
/* |h3| <= 1.71*2^59 */
/* |h7| <= 1.71*2^59 */
- carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
- carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
+ carry3 = (h3 + (int64_t) (1UL<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
+ carry7 = (h7 + (int64_t) (1UL<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
/* |h3| <= 2^24; from now on fits into int32 unchanged */
/* |h7| <= 2^24; from now on fits into int32 unchanged */
/* |h4| <= 1.72*2^34 */
/* |h8| <= 1.41*2^60 */
- carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
- carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
+ carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+ carry8 = (h8 + (int64_t) (1UL<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
/* |h4| <= 2^25; from now on fits into int32 unchanged */
/* |h8| <= 2^25; from now on fits into int32 unchanged */
/* |h5| <= 1.01*2^24 */
/* |h9| <= 1.71*2^59 */
- carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
+ carry9 = (h9 + (int64_t) (1UL<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
/* |h9| <= 2^24; from now on fits into int32 unchanged */
/* |h0| <= 1.1*2^39 */
- carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+ carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
/* |h0| <= 2^25; from now on fits into int32 unchanged */
/* |h1| <= 1.01*2^24 */
@@ -928,7 +967,7 @@ replace (f,g) with (f,g) if b == 0.
Preconditions: b in {0,1}.
*/
-void fe_cswap(fe f,fe g,unsigned int b)
+void fe_cswap(fe f, fe g, int b)
{
int32_t f0 = f[0];
int32_t f1 = f[1];
@@ -1038,17 +1077,17 @@ void fe_mul121666(fe h,fe f)
int64_t carry8;
int64_t carry9;
- carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
- carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
- carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
- carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
- carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
+ carry9 = (h9 + (int64_t) (1UL<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
+ carry1 = (h1 + (int64_t) (1UL<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
+ carry3 = (h3 + (int64_t) (1UL<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
+ carry5 = (h5 + (int64_t) (1UL<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
+ carry7 = (h7 + (int64_t) (1UL<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
- carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
- carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
- carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
- carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
- carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
+ carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+ carry2 = (h2 + (int64_t) (1UL<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
+ carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+ carry6 = (h6 + (int64_t) (1UL<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
+ carry8 = (h8 + (int64_t) (1UL<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
h[0] = (int32_t)h0;
h[1] = (int32_t)h1;
@@ -1190,24 +1229,24 @@ void fe_sq2(fe h,const fe f)
h8 += h8;
h9 += h9;
- carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
- carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+ carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+ carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
- carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
- carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
+ carry1 = (h1 + (int64_t) (1UL<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
+ carry5 = (h5 + (int64_t) (1UL<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
- carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
- carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
+ carry2 = (h2 + (int64_t) (1UL<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
+ carry6 = (h6 + (int64_t) (1UL<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
- carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
- carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
+ carry3 = (h3 + (int64_t) (1UL<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
+ carry7 = (h7 + (int64_t) (1UL<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
- carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
- carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
+ carry4 = (h4 + (int64_t) (1UL<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+ carry8 = (h8 + (int64_t) (1UL<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
- carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
+ carry9 = (h9 + (int64_t) (1UL<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
- carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+ carry0 = (h0 + (int64_t) (1UL<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
h[0] = (int32_t)h0;
h[1] = (int32_t)h1;
@@ -1224,10 +1263,10 @@ void fe_sq2(fe h,const fe f)
void fe_pow22523(fe out,const fe z)
{
- fe t0;
- fe t1;
- fe t2;
- int i;
+ fe t0 = {0};
+ fe t1 = {0};
+ fe t2 = {0};
+ int i = 0;
fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0);
fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1);
@@ -1306,7 +1345,7 @@ Preconditions:
|f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
*/
-static const unsigned char zero[32];
+static const unsigned char zero[32] = {0};
int fe_isnonzero(const fe f)
{
@@ -1339,7 +1378,7 @@ replace (f,g) with (f,g) if b == 0.
Preconditions: b in {0,1}.
*/
-void fe_cmov(fe f,const fe g,unsigned int b)
+void fe_cmov(fe f, const fe g, int b)
{
int32_t f0 = f[0];
int32_t f1 = f[1];
@@ -1393,5 +1432,7 @@ void fe_cmov(fe f,const fe g,unsigned int b)
f[8] = f8 ^ x8;
f[9] = f9 ^ x9;
}
-#endif /* HAVE ED25519 or CURVE25519 */
+#endif
+#endif /* !CURVE25519_SMALL || !ED25519_SMALL */
+#endif /* HAVE_CURVE25519 || HAVE_ED25519 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_x25519_128.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_x25519_128.i
new file mode 100644
index 000000000..10e43d9cd
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_x25519_128.i
@@ -0,0 +1,625 @@
+/* fe_x25519_128.i
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+void fe_init(void)
+{
+}
+
+/* Convert a number represented as an array of bytes to an array of words with
+ * 51-bits of data in each word.
+ *
+ * in An array of bytes.
+ * out An array of words.
+ */
+void fe_frombytes(fe out, const unsigned char *in)
+{
+ out[0] = (((int64_t)((in[ 0] ) )) )
+ | (((int64_t)((in[ 1] ) )) << 8)
+ | (((int64_t)((in[ 2] ) )) << 16)
+ | (((int64_t)((in[ 3] ) )) << 24)
+ | (((int64_t)((in[ 4] ) )) << 32)
+ | (((int64_t)((in[ 5] ) )) << 40)
+ | (((int64_t)((in[ 6] ) & 0x07)) << 48);
+ out[1] = (((int64_t)((in[ 6] >> 3) & 0x1f)) )
+ | (((int64_t)((in[ 7] ) )) << 5)
+ | (((int64_t)((in[ 8] ) )) << 13)
+ | (((int64_t)((in[ 9] ) )) << 21)
+ | (((int64_t)((in[10] ) )) << 29)
+ | (((int64_t)((in[11] ) )) << 37)
+ | (((int64_t)((in[12] ) & 0x3f)) << 45);
+ out[2] = (((int64_t)((in[12] >> 6) & 0x03)) )
+ | (((int64_t)((in[13] ) )) << 2)
+ | (((int64_t)((in[14] ) )) << 10)
+ | (((int64_t)((in[15] ) )) << 18)
+ | (((int64_t)((in[16] ) )) << 26)
+ | (((int64_t)((in[17] ) )) << 34)
+ | (((int64_t)((in[18] ) )) << 42)
+ | (((int64_t)((in[19] ) & 0x01)) << 50);
+ out[3] = (((int64_t)((in[19] >> 1) & 0x7f)) )
+ | (((int64_t)((in[20] ) )) << 7)
+ | (((int64_t)((in[21] ) )) << 15)
+ | (((int64_t)((in[22] ) )) << 23)
+ | (((int64_t)((in[23] ) )) << 31)
+ | (((int64_t)((in[24] ) )) << 39)
+ | (((int64_t)((in[25] ) & 0x0f)) << 47);
+ out[4] = (((int64_t)((in[25] >> 4) & 0x0f)) )
+ | (((int64_t)((in[26] ) )) << 4)
+ | (((int64_t)((in[27] ) )) << 12)
+ | (((int64_t)((in[28] ) )) << 20)
+ | (((int64_t)((in[29] ) )) << 28)
+ | (((int64_t)((in[30] ) )) << 36)
+ | (((int64_t)((in[31] ) & 0x7f)) << 44);
+}
+
+/* Convert a number represented as an array of words to an array of bytes.
+ * The array of words is normalized to an array of 51-bit data words and if
+ * greater than the mod, modulo reduced by the prime 2^255 - 1.
+ *
+ * n An array of words.
+ * out An array of bytes.
+ */
+void fe_tobytes(unsigned char *out, const fe n)
+{
+ fe in;
+ int64_t c;
+
+ in[0] = n[0];
+ in[1] = n[1];
+ in[2] = n[2];
+ in[3] = n[3];
+ in[4] = n[4];
+
+ /* Normalize to 51-bits of data per word. */
+ in[0] += (in[4] >> 51) * 19; in[4] &= 0x7ffffffffffff;
+
+ in[1] += in[0] >> 51; in[0] &= 0x7ffffffffffff;
+ in[2] += in[1] >> 51; in[1] &= 0x7ffffffffffff;
+ in[3] += in[2] >> 51; in[2] &= 0x7ffffffffffff;
+ in[4] += in[3] >> 51; in[3] &= 0x7ffffffffffff;
+ in[0] += (in[4] >> 51) * 19;
+ in[4] &= 0x7ffffffffffff;
+
+ c = (in[0] + 19) >> 51;
+ c = (in[1] + c) >> 51;
+ c = (in[2] + c) >> 51;
+ c = (in[3] + c) >> 51;
+ c = (in[4] + c) >> 51;
+ in[0] += c * 19;
+ in[1] += in[0] >> 51; in[0] &= 0x7ffffffffffff;
+ in[2] += in[1] >> 51; in[1] &= 0x7ffffffffffff;
+ in[3] += in[2] >> 51; in[2] &= 0x7ffffffffffff;
+ in[4] += in[3] >> 51; in[3] &= 0x7ffffffffffff;
+ in[4] &= 0x7ffffffffffff;
+
+ out[ 0] = (((byte)((in[0] ) )) );
+ out[ 1] = (((byte)((in[0] >> 8) )) );
+ out[ 2] = (((byte)((in[0] >> 16) )) );
+ out[ 3] = (((byte)((in[0] >> 24) )) );
+ out[ 4] = (((byte)((in[0] >> 32) )) );
+ out[ 5] = (((byte)((in[0] >> 40) )) );
+ out[ 6] = (((byte)((in[0] >> 48) & 0x07)) )
+ | (((byte)((in[1] ) & 0x1f)) << 3);
+ out[ 7] = (((byte)((in[1] >> 5) )) );
+ out[ 8] = (((byte)((in[1] >> 13) )) );
+ out[ 9] = (((byte)((in[1] >> 21) )) );
+ out[10] = (((byte)((in[1] >> 29) )) );
+ out[11] = (((byte)((in[1] >> 37) )) );
+ out[12] = (((byte)((in[1] >> 45) & 0x3f)) )
+ | (((byte)((in[2] ) & 0x03)) << 6);
+ out[13] = (((byte)((in[2] >> 2) )) );
+ out[14] = (((byte)((in[2] >> 10) )) );
+ out[15] = (((byte)((in[2] >> 18) )) );
+ out[16] = (((byte)((in[2] >> 26) )) );
+ out[17] = (((byte)((in[2] >> 34) )) );
+ out[18] = (((byte)((in[2] >> 42) )) );
+ out[19] = (((byte)((in[2] >> 50) & 0x01)) )
+ | (((byte)((in[3] ) & 0x7f)) << 1);
+ out[20] = (((byte)((in[3] >> 7) )) );
+ out[21] = (((byte)((in[3] >> 15) )) );
+ out[22] = (((byte)((in[3] >> 23) )) );
+ out[23] = (((byte)((in[3] >> 31) )) );
+ out[24] = (((byte)((in[3] >> 39) )) );
+ out[25] = (((byte)((in[3] >> 47) & 0x0f)) )
+ | (((byte)((in[4] ) & 0x0f)) << 4);
+ out[26] = (((byte)((in[4] >> 4) )) );
+ out[27] = (((byte)((in[4] >> 12) )) );
+ out[28] = (((byte)((in[4] >> 20) )) );
+ out[29] = (((byte)((in[4] >> 28) )) );
+ out[30] = (((byte)((in[4] >> 36) )) );
+ out[31] = (((byte)((in[4] >> 44) & 0x7f)) );
+}
+
+/* Set the field element to 1.
+ *
+ * n The field element number.
+ */
+void fe_1(fe n)
+{
+ n[0] = 0x0000000000001;
+ n[1] = 0x0000000000000;
+ n[2] = 0x0000000000000;
+ n[3] = 0x0000000000000;
+ n[4] = 0x0000000000000;
+}
+
+/* Set the field element to 0.
+ *
+ * n The field element number.
+ */
+void fe_0(fe n)
+{
+ n[0] = 0x0000000000000;
+ n[1] = 0x0000000000000;
+ n[2] = 0x0000000000000;
+ n[3] = 0x0000000000000;
+ n[4] = 0x0000000000000;
+}
+
+/* Copy field element a into field element r.
+ *
+ * r Field element to copy into.
+ * a Field element to copy.
+ */
+void fe_copy(fe r, const fe a)
+{
+ r[0] = a[0];
+ r[1] = a[1];
+ r[2] = a[2];
+ r[3] = a[3];
+ r[4] = a[4];
+}
+
+/* Constant time, conditional swap of field elements a and b.
+ *
+ * a A field element.
+ * b A field element.
+ * c If 1 then swap and if 0 then don't swap.
+ */
+void fe_cswap(fe a, fe b, int c)
+{
+ int64_t m = c;
+ int64_t t0, t1, t2, t3, t4;
+
+ /* Convert conditional into mask. */
+ m = -m;
+ t0 = m & (a[0] ^ b[0]);
+ t1 = m & (a[1] ^ b[1]);
+ t2 = m & (a[2] ^ b[2]);
+ t3 = m & (a[3] ^ b[3]);
+ t4 = m & (a[4] ^ b[4]);
+
+ a[0] ^= t0;
+ a[1] ^= t1;
+ a[2] ^= t2;
+ a[3] ^= t3;
+ a[4] ^= t4;
+
+ b[0] ^= t0;
+ b[1] ^= t1;
+ b[2] ^= t2;
+ b[3] ^= t3;
+ b[4] ^= t4;
+}
+
+/* Subtract b from a into r. (r = a - b)
+ *
+ * r A field element.
+ * a A field element.
+ * b A field element.
+ */
+void fe_sub(fe r, const fe a, const fe b)
+{
+ r[0] = a[0] - b[0];
+ r[1] = a[1] - b[1];
+ r[2] = a[2] - b[2];
+ r[3] = a[3] - b[3];
+ r[4] = a[4] - b[4];
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A field element.
+ * a A field element.
+ * b A field element.
+ */
+void fe_add(fe r, const fe a, const fe b)
+{
+ r[0] = a[0] + b[0];
+ r[1] = a[1] + b[1];
+ r[2] = a[2] + b[2];
+ r[3] = a[3] + b[3];
+ r[4] = a[4] + b[4];
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A field element.
+ * a A field element.
+ * b A field element.
+ */
+void fe_mul(fe r, const fe a, const fe b)
+{
+ const __int128_t k19 = 19;
+ __int128_t t0 = ((__int128_t)a[0]) * b[0];
+ __int128_t t1 = ((__int128_t)a[0]) * b[1]
+ + ((__int128_t)a[1]) * b[0];
+ __int128_t t2 = ((__int128_t)a[0]) * b[2]
+ + ((__int128_t)a[1]) * b[1]
+ + ((__int128_t)a[2]) * b[0];
+ __int128_t t3 = ((__int128_t)a[0]) * b[3]
+ + ((__int128_t)a[1]) * b[2]
+ + ((__int128_t)a[2]) * b[1]
+ + ((__int128_t)a[3]) * b[0];
+ __int128_t t4 = ((__int128_t)a[0]) * b[4]
+ + ((__int128_t)a[1]) * b[3]
+ + ((__int128_t)a[2]) * b[2]
+ + ((__int128_t)a[3]) * b[1]
+ + ((__int128_t)a[4]) * b[0];
+ __int128_t t5 = ((__int128_t)a[1]) * b[4]
+ + ((__int128_t)a[2]) * b[3]
+ + ((__int128_t)a[3]) * b[2]
+ + ((__int128_t)a[4]) * b[1];
+ __int128_t t6 = ((__int128_t)a[2]) * b[4]
+ + ((__int128_t)a[3]) * b[3]
+ + ((__int128_t)a[4]) * b[2];
+ __int128_t t7 = ((__int128_t)a[3]) * b[4]
+ + ((__int128_t)a[4]) * b[3];
+ __int128_t t8 = ((__int128_t)a[4]) * b[4];
+
+ /* Modulo reduce double long word. */
+ t0 += t5 * k19;
+ t1 += t6 * k19;
+ t2 += t7 * k19;
+ t3 += t8 * k19;
+
+ /* Normalize to 51-bits of data per word. */
+ t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
+
+ t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
+ t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
+ t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
+ t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
+ r[0] += (t4 >> 51) * k19;
+ r[4] = t4 & 0x7ffffffffffff;
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A field element.
+ * a A field element.
+ * b A field element.
+ */
+void fe_sq(fe r, const fe a)
+{
+ const __int128_t k19 = 19;
+ const __int128_t k2 = 2;
+ __int128_t t0 = ((__int128_t)a[0]) * a[0];
+ __int128_t t1 = ((__int128_t)a[0]) * a[1] * k2;
+ __int128_t t2 = ((__int128_t)a[0]) * a[2] * k2
+ + ((__int128_t)a[1]) * a[1];
+ __int128_t t3 = ((__int128_t)a[0]) * a[3] * k2
+ + ((__int128_t)a[1]) * a[2] * k2;
+ __int128_t t4 = ((__int128_t)a[0]) * a[4] * k2
+ + ((__int128_t)a[1]) * a[3] * k2
+ + ((__int128_t)a[2]) * a[2];
+ __int128_t t5 = ((__int128_t)a[1]) * a[4] * k2
+ + ((__int128_t)a[2]) * a[3] * k2;
+ __int128_t t6 = ((__int128_t)a[2]) * a[4] * k2
+ + ((__int128_t)a[3]) * a[3];
+ __int128_t t7 = ((__int128_t)a[3]) * a[4] * k2;
+ __int128_t t8 = ((__int128_t)a[4]) * a[4];
+
+ /* Modulo reduce double long word. */
+ t0 += t5 * k19;
+ t1 += t6 * k19;
+ t2 += t7 * k19;
+ t3 += t8 * k19;
+
+ /* Normalize to 51-bits of data per word. */
+ t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
+
+ t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
+ t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
+ t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
+ t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
+ r[0] += (t4 >> 51) * k19;
+ r[4] = t4 & 0x7ffffffffffff;
+}
+
+/* Multiply a by 121666 and put result in r. (r = 121666 * a)
+ *
+ * r A field element.
+ * a A field element.
+ * b A field element.
+ */
+void fe_mul121666(fe r, fe a)
+{
+ const __int128_t k19 = 19;
+ const __int128_t k121666 = 121666;
+ __int128_t t0 = ((__int128_t)a[0]) * k121666;
+ __int128_t t1 = ((__int128_t)a[1]) * k121666;
+ __int128_t t2 = ((__int128_t)a[2]) * k121666;
+ __int128_t t3 = ((__int128_t)a[3]) * k121666;
+ __int128_t t4 = ((__int128_t)a[4]) * k121666;
+
+ /* Normalize to 51-bits of data per word. */
+ t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
+
+ t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
+ t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
+ t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
+ t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
+ r[0] += (t4 >> 51) * k19;
+ r[4] = t4 & 0x7ffffffffffff;
+}
+
+/* Find the inverse of a modulo 2^255 - 1 and put result in r.
+ * (r * a) mod (2^255 - 1) = 1
+ * Implementation is constant time.
+ *
+ * r A field element.
+ * a A field element.
+ */
+void fe_invert(fe r, const fe a)
+{
+ fe t0, t1, t2, t3;
+ int i;
+
+ /* a ^ (2^255 - 21) */
+ fe_sq(t0, a); for (i = 1; i < 1; ++i) fe_sq(t0, t0);
+ fe_sq(t1, t0); for (i = 1; i < 2; ++i) fe_sq(t1, t1); fe_mul(t1, a, t1);
+ fe_mul(t0, t0, t1);
+ fe_sq(t2, t0); for (i = 1; i < 1; ++i) fe_sq(t2, t2); fe_mul(t1, t1, t2);
+ fe_sq(t2, t1); for (i = 1; i < 5; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
+ fe_sq(t2, t1); for (i = 1; i < 10; ++i) fe_sq(t2, t2); fe_mul(t2, t2, t1);
+ fe_sq(t3, t2); for (i = 1; i < 20; ++i) fe_sq(t3, t3); fe_mul(t2, t3, t2);
+ fe_sq(t2, t2); for (i = 1; i < 10; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
+ fe_sq(t2, t1); for (i = 1; i < 50; ++i) fe_sq(t2, t2); fe_mul(t2, t2, t1);
+ fe_sq(t3, t2); for (i = 1; i < 100; ++i) fe_sq(t3, t3); fe_mul(t2, t3, t2);
+ fe_sq(t2, t2); for (i = 1; i < 50; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
+ fe_sq(t1, t1); for (i = 1; i < 5; ++i) fe_sq(t1, t1); fe_mul( r, t1, t0);
+}
+
+#ifndef CURVE25519_SMALL
+/* Scalar multiply the field element a by n using Montgomery Ladder and places
+ * result in r.
+ *
+ * r A field element as an array of bytes.
+ * n The scalar as an array of bytes.
+ * a A field element as an array of bytes.
+ */
+int curve25519(byte* r, byte* n, byte* a)
+{
+ fe x1, x2, z2, x3, z3;
+ fe t0, t1;
+ int pos;
+ unsigned int swap;
+ unsigned int b;
+
+ fe_frombytes(x1, a);
+ fe_1(x2);
+ fe_0(z2);
+ fe_copy(x3, x1);
+ fe_1(z3);
+
+ swap = 0;
+ for (pos = 254;pos >= 0;--pos) {
+ b = n[pos / 8] >> (pos & 7);
+ b &= 1;
+ swap ^= b;
+ fe_cswap(x2, x3, swap);
+ fe_cswap(z2, z3, swap);
+ swap = b;
+
+ fe_sub(t0, x3, z3);
+ fe_sub(t1, x2, z2);
+ fe_add(x2, x2, z2);
+ fe_add(z2, x3, z3);
+ fe_mul(z3, t0, x2);
+ fe_mul(z2, z2, t1);
+ fe_sq(t0, t1);
+ fe_sq(t1, x2);
+ fe_add(x3, z3, z2);
+ fe_sub(z2, z3, z2);
+ fe_mul(x2, t1, t0);
+ fe_sub(t1, t1, t0);
+ fe_sq(z2, z2);
+ fe_mul121666(z3, t1);
+ fe_sq(x3, x3);
+ fe_add(t0, t0, z3);
+ fe_mul(z3, x1, z2);
+ fe_mul(z2, t1, t0);
+ }
+ fe_cswap(x2, x3, swap);
+ fe_cswap(z2, z3, swap);
+
+ fe_invert(z2, z2);
+ fe_mul(x2, x2, z2);
+ fe_tobytes(r, x2);
+
+ return 0;
+}
+#endif /* !CURVE25519_SMALL */
+
+/* The field element value 0 as an array of bytes. */
+static const unsigned char zero[32] = {0};
+
+/* Constant time check as to whether a is not 0.
+ *
+ * a A field element.
+ */
+int fe_isnonzero(const fe a)
+{
+ unsigned char s[32];
+ fe_tobytes(s, a);
+ return ConstantCompare(s, zero, 32);
+}
+
+/* Checks whether a is negative.
+ *
+ * a A field element.
+ */
+int fe_isnegative(const fe a)
+{
+ unsigned char s[32];
+ fe_tobytes(s, a);
+ return s[0] & 1;
+}
+
+/* Negates field element a and stores the result in r.
+ *
+ * r A field element.
+ * a A field element.
+ */
+void fe_neg(fe r, const fe a)
+{
+ r[0] = -a[0];
+ r[1] = -a[1];
+ r[2] = -a[2];
+ r[3] = -a[3];
+ r[4] = -a[4];
+}
+
+/* Constant time, conditional move of b into a.
+ * a is not changed if the condition is 0.
+ *
+ * a A field element.
+ * b A field element.
+ * c If 1 then copy and if 0 then don't copy.
+ */
+void fe_cmov(fe a, const fe b, int c)
+{
+ int64_t m = c;
+ int64_t t0, t1, t2, t3, t4;
+
+ /* Convert conditional into mask. */
+ m = -m;
+ t0 = m & (a[0] ^ b[0]);
+ t1 = m & (a[1] ^ b[1]);
+ t2 = m & (a[2] ^ b[2]);
+ t3 = m & (a[3] ^ b[3]);
+ t4 = m & (a[4] ^ b[4]);
+
+ a[0] ^= t0;
+ a[1] ^= t1;
+ a[2] ^= t2;
+ a[3] ^= t3;
+ a[4] ^= t4;
+}
+
+void fe_pow22523(fe r, const fe a)
+{
+ fe t0, t1, t2;
+ int i;
+
+ /* a ^ (2^255 - 23) */
+ fe_sq(t0, a); for (i = 1; i < 1; ++i) fe_sq(t0, t0);
+ fe_sq(t1, t0); for (i = 1; i < 2; ++i) fe_sq(t1, t1); fe_mul(t1, a, t1);
+ fe_mul(t0, t0, t1);
+ fe_sq(t0, t0); for (i = 1; i < 1; ++i) fe_sq(t0, t0); fe_mul(t0, t1, t0);
+ fe_sq(t1, t0); for (i = 1; i < 5; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0);
+ fe_sq(t1, t0); for (i = 1; i < 10; ++i) fe_sq(t1, t1); fe_mul(t1, t1, t0);
+ fe_sq(t2, t1); for (i = 1; i < 20; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
+ fe_sq(t1, t1); for (i = 1; i < 10; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0);
+ fe_sq(t1, t0); for (i = 1; i < 50; ++i) fe_sq(t1, t1); fe_mul(t1, t1, t0);
+ fe_sq(t2, t1); for (i = 1; i < 100; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
+ fe_sq(t1, t1); for (i = 1; i < 50; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0);
+ fe_sq(t0, t0); for (i = 1; i < 2; ++i) fe_sq(t0, t0); fe_mul( r, t0, a);
+
+ return;
+}
+
+/* Double the square of a and put result in r. (r = 2 * a * a)
+ *
+ * r A field element.
+ * a A field element.
+ * b A field element.
+ */
+void fe_sq2(fe r, const fe a)
+{
+ const __int128_t k2 = 2;
+ const __int128_t k19 = 19;
+ __int128_t t0 = k2 * (((__int128_t)a[0]) * a[0]);
+ __int128_t t1 = k2 * (((__int128_t)a[0]) * a[1] * k2);
+ __int128_t t2 = k2 * (((__int128_t)a[0]) * a[2] * k2
+ + ((__int128_t)a[1]) * a[1]);
+ __int128_t t3 = k2 * (((__int128_t)a[0]) * a[3] * k2
+ + ((__int128_t)a[1]) * a[2] * k2);
+ __int128_t t4 = k2 * (((__int128_t)a[0]) * a[4] * k2
+ + ((__int128_t)a[1]) * a[3] * k2
+ + ((__int128_t)a[2]) * a[2]);
+ __int128_t t5 = k2 * (((__int128_t)a[1]) * a[4] * k2
+ + ((__int128_t)a[2]) * a[3] * k2);
+ __int128_t t6 = k2 * (((__int128_t)a[2]) * a[4] * k2
+ + ((__int128_t)a[3]) * a[3]);
+ __int128_t t7 = k2 * (((__int128_t)a[3]) * a[4] * k2);
+ __int128_t t8 = k2 * (((__int128_t)a[4]) * a[4]);
+
+ /* Modulo reduce double long word. */
+ t0 += t5 * k19;
+ t1 += t6 * k19;
+ t2 += t7 * k19;
+ t3 += t8 * k19;
+
+ /* Normalize to 51-bits of data per word. */
+ t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
+
+ t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
+ t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
+ t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
+ t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
+ r[0] += (t4 >> 51) * k19;
+ r[4] = t4 & 0x7ffffffffffff;
+}
+
+/* Load 3 little endian bytes into a 64-bit word.
+ *
+ * in An array of bytes.
+ * returns a 64-bit word.
+ */
+uint64_t load_3(const unsigned char *in)
+{
+ uint64_t result;
+
+ result = ((((uint64_t)in[0]) ) |
+ (((uint64_t)in[1]) << 8) |
+ (((uint64_t)in[2]) << 16));
+
+ return result;
+}
+
+/* Load 4 little endian bytes into a 64-bit word.
+ *
+ * in An array of bytes.
+ * returns a 64-bit word.
+ */
+uint64_t load_4(const unsigned char *in)
+{
+ uint64_t result;
+
+ result = ((((uint64_t)in[0]) ) |
+ (((uint64_t)in[1]) << 8) |
+ (((uint64_t)in[2]) << 16) |
+ (((uint64_t)in[3]) << 24));
+
+ return result;
+}
+
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_x25519_asm.S b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_x25519_asm.S
new file mode 100644
index 000000000..6d0f638b5
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fe_x25519_asm.S
@@ -0,0 +1,16542 @@
+/* fe_x25519_asm
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+#ifndef HAVE_INTEL_AVX1
+#define HAVE_INTEL_AVX1
+#endif /* HAVE_INTEL_AVX1 */
+#ifndef NO_AVX2_SUPPORT
+#define HAVE_INTEL_AVX2
+#endif /* NO_AVX2_SUPPORT */
+
+#ifndef __APPLE__
+.text
+.globl fe_init
+.type fe_init,@function
+.align 4
+fe_init:
+#else
+.section __TEXT,__text
+.globl _fe_init
+.p2align 2
+_fe_init:
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX2
+#ifndef __APPLE__
+ movq cpuFlagsSet@GOTPCREL(%rip), %rax
+ movl (%rax), %eax
+#else
+ movl _cpuFlagsSet(%rip), %eax
+#endif /* __APPLE__ */
+ testl %eax, %eax
+ je L_fe_init_get_flags
+ repz retq
+L_fe_init_get_flags:
+#ifndef __APPLE__
+ callq cpuid_get_flags@plt
+#else
+ callq _cpuid_get_flags
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq intelFlags@GOTPCREL(%rip), %rdx
+ movl %eax, (%rdx)
+#else
+ movl %eax, _intelFlags(%rip)
+#endif /* __APPLE__ */
+ andl $0x50, %eax
+ cmpl $0x50, %eax
+ jne L_fe_init_flags_done
+#ifndef __APPLE__
+ movq fe_mul_avx2@GOTPCREL(%rip), %rax
+#else
+ leaq _fe_mul_avx2(%rip), %rax
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_mul_p@GOTPCREL(%rip), %rdx
+ movq %rax, (%rdx)
+#else
+ movq %rax, _fe_mul_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_sq_avx2@GOTPCREL(%rip), %rax
+#else
+ leaq _fe_sq_avx2(%rip), %rax
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_sq_p@GOTPCREL(%rip), %rdx
+ movq %rax, (%rdx)
+#else
+ movq %rax, _fe_sq_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_mul121666_avx2@GOTPCREL(%rip), %rax
+#else
+ leaq _fe_mul121666_avx2(%rip), %rax
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_mul121666_p@GOTPCREL(%rip), %rdx
+ movq %rax, (%rdx)
+#else
+ movq %rax, _fe_mul121666_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_sq2_avx2@GOTPCREL(%rip), %rax
+#else
+ leaq _fe_sq2_avx2(%rip), %rax
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_sq2_p@GOTPCREL(%rip), %rdx
+ movq %rax, (%rdx)
+#else
+ movq %rax, _fe_sq2_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_invert_avx2@GOTPCREL(%rip), %rax
+#else
+ leaq _fe_invert_avx2(%rip), %rax
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_invert_p@GOTPCREL(%rip), %rdx
+ movq %rax, (%rdx)
+#else
+ movq %rax, _fe_invert_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq curve25519_avx2@GOTPCREL(%rip), %rax
+#else
+ leaq _curve25519_avx2(%rip), %rax
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq curve25519_p@GOTPCREL(%rip), %rdx
+ movq %rax, (%rdx)
+#else
+ movq %rax, _curve25519_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_pow22523_avx2@GOTPCREL(%rip), %rax
+#else
+ leaq _fe_pow22523_avx2(%rip), %rax
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_pow22523_p@GOTPCREL(%rip), %rdx
+ movq %rax, (%rdx)
+#else
+ movq %rax, _fe_pow22523_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_ge_to_p2_avx2@GOTPCREL(%rip), %rax
+#else
+ leaq _fe_ge_to_p2_avx2(%rip), %rax
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_ge_to_p2_p@GOTPCREL(%rip), %rdx
+ movq %rax, (%rdx)
+#else
+ movq %rax, _fe_ge_to_p2_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_ge_to_p3_avx2@GOTPCREL(%rip), %rax
+#else
+ leaq _fe_ge_to_p3_avx2(%rip), %rax
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_ge_to_p3_p@GOTPCREL(%rip), %rdx
+ movq %rax, (%rdx)
+#else
+ movq %rax, _fe_ge_to_p3_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_ge_dbl_avx2@GOTPCREL(%rip), %rax
+#else
+ leaq _fe_ge_dbl_avx2(%rip), %rax
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_ge_dbl_p@GOTPCREL(%rip), %rdx
+ movq %rax, (%rdx)
+#else
+ movq %rax, _fe_ge_dbl_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_ge_madd_avx2@GOTPCREL(%rip), %rax
+#else
+ leaq _fe_ge_madd_avx2(%rip), %rax
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_ge_madd_p@GOTPCREL(%rip), %rdx
+ movq %rax, (%rdx)
+#else
+ movq %rax, _fe_ge_madd_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_ge_msub_avx2@GOTPCREL(%rip), %rax
+#else
+ leaq _fe_ge_msub_avx2(%rip), %rax
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_ge_msub_p@GOTPCREL(%rip), %rdx
+ movq %rax, (%rdx)
+#else
+ movq %rax, _fe_ge_msub_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_ge_add_avx2@GOTPCREL(%rip), %rax
+#else
+ leaq _fe_ge_add_avx2(%rip), %rax
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_ge_add_p@GOTPCREL(%rip), %rdx
+ movq %rax, (%rdx)
+#else
+ movq %rax, _fe_ge_add_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_ge_sub_avx2@GOTPCREL(%rip), %rax
+#else
+ leaq _fe_ge_sub_avx2(%rip), %rax
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ movq fe_ge_sub_p@GOTPCREL(%rip), %rdx
+ movq %rax, (%rdx)
+#else
+ movq %rax, _fe_ge_sub_p(%rip)
+#endif /* __APPLE__ */
+L_fe_init_flags_done:
+#ifndef __APPLE__
+ movq cpuFlagsSet@GOTPCREL(%rip), %rdx
+ movl $0x1, (%rdx)
+#else
+ movl $0x1, _cpuFlagsSet(%rip)
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
+ repz retq
+#ifndef __APPLE__
+.size fe_init,.-fe_init
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_frombytes
+.type fe_frombytes,@function
+.align 4
+fe_frombytes:
+#else
+.section __TEXT,__text
+.globl _fe_frombytes
+.p2align 2
+_fe_frombytes:
+#endif /* __APPLE__ */
+ movq $0x7fffffffffffffff, %r9
+ movq (%rsi), %rdx
+ movq 8(%rsi), %rax
+ movq 16(%rsi), %rcx
+ movq 24(%rsi), %r8
+ andq %r9, %r8
+ movq %rdx, (%rdi)
+ movq %rax, 8(%rdi)
+ movq %rcx, 16(%rdi)
+ movq %r8, 24(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size fe_frombytes,.-fe_frombytes
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_tobytes
+.type fe_tobytes,@function
+.align 4
+fe_tobytes:
+#else
+.section __TEXT,__text
+.globl _fe_tobytes
+.p2align 2
+_fe_tobytes:
+#endif /* __APPLE__ */
+ movq $0x7fffffffffffffff, %r10
+ movq (%rsi), %rdx
+ movq 8(%rsi), %rax
+ movq 16(%rsi), %rcx
+ movq 24(%rsi), %r8
+ addq $19, %rdx
+ adcq $0x00, %rax
+ adcq $0x00, %rcx
+ adcq $0x00, %r8
+ shrq $63, %r8
+ imulq $19, %r8, %r9
+ movq (%rsi), %rdx
+ movq 8(%rsi), %rax
+ movq 16(%rsi), %rcx
+ movq 24(%rsi), %r8
+ addq %r9, %rdx
+ adcq $0x00, %rax
+ adcq $0x00, %rcx
+ adcq $0x00, %r8
+ andq %r10, %r8
+ movq %rdx, (%rdi)
+ movq %rax, 8(%rdi)
+ movq %rcx, 16(%rdi)
+ movq %r8, 24(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size fe_tobytes,.-fe_tobytes
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_1
+.type fe_1,@function
+.align 4
+fe_1:
+#else
+.section __TEXT,__text
+.globl _fe_1
+.p2align 2
+_fe_1:
+#endif /* __APPLE__ */
+ # Set one
+ movq $0x01, (%rdi)
+ movq $0x00, 8(%rdi)
+ movq $0x00, 16(%rdi)
+ movq $0x00, 24(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size fe_1,.-fe_1
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_0
+.type fe_0,@function
+.align 4
+fe_0:
+#else
+.section __TEXT,__text
+.globl _fe_0
+.p2align 2
+_fe_0:
+#endif /* __APPLE__ */
+ # Set zero
+ movq $0x00, (%rdi)
+ movq $0x00, 8(%rdi)
+ movq $0x00, 16(%rdi)
+ movq $0x00, 24(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size fe_0,.-fe_0
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_copy
+.type fe_copy,@function
+.align 4
+fe_copy:
+#else
+.section __TEXT,__text
+.globl _fe_copy
+.p2align 2
+_fe_copy:
+#endif /* __APPLE__ */
+ # Copy
+ movq (%rsi), %rdx
+ movq 8(%rsi), %rax
+ movq 16(%rsi), %rcx
+ movq 24(%rsi), %r8
+ movq %rdx, (%rdi)
+ movq %rax, 8(%rdi)
+ movq %rcx, 16(%rdi)
+ movq %r8, 24(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size fe_copy,.-fe_copy
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_sub
+.type fe_sub,@function
+.align 4
+fe_sub:
+#else
+.section __TEXT,__text
+.globl _fe_sub
+.p2align 2
+_fe_sub:
+#endif /* __APPLE__ */
+ pushq %r12
+ # Sub
+ movq (%rsi), %rax
+ movq 8(%rsi), %rcx
+ movq 16(%rsi), %r8
+ movq 24(%rsi), %r9
+ subq (%rdx), %rax
+ movq $0x00, %r10
+ sbbq 8(%rdx), %rcx
+ movq $-19, %r11
+ sbbq 16(%rdx), %r8
+ movq $0x7fffffffffffffff, %r12
+ sbbq 24(%rdx), %r9
+ sbbq $0x00, %r10
+ # Mask the modulus
+ andq %r10, %r11
+ andq %r10, %r12
+ # Add modulus (if underflow)
+ addq %r11, %rax
+ adcq %r10, %rcx
+ adcq %r10, %r8
+ adcq %r12, %r9
+ movq %rax, (%rdi)
+ movq %rcx, 8(%rdi)
+ movq %r8, 16(%rdi)
+ movq %r9, 24(%rdi)
+ popq %r12
+ repz retq
+#ifndef __APPLE__
+.size fe_sub,.-fe_sub
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_add
+.type fe_add,@function
+.align 4
+fe_add:
+#else
+.section __TEXT,__text
+.globl _fe_add
+.p2align 2
+_fe_add:
+#endif /* __APPLE__ */
+ pushq %r12
+ # Add
+ movq (%rsi), %rax
+ movq 8(%rsi), %rcx
+ addq (%rdx), %rax
+ movq 16(%rsi), %r8
+ adcq 8(%rdx), %rcx
+ movq 24(%rsi), %r10
+ adcq 16(%rdx), %r8
+ movq $-19, %r11
+ adcq 24(%rdx), %r10
+ movq $0x7fffffffffffffff, %r12
+ movq %r10, %r9
+ sarq $63, %r10
+ # Mask the modulus
+ andq %r10, %r11
+ andq %r10, %r12
+ # Sub modulus (if overflow)
+ subq %r11, %rax
+ sbbq %r10, %rcx
+ sbbq %r10, %r8
+ sbbq %r12, %r9
+ movq %rax, (%rdi)
+ movq %rcx, 8(%rdi)
+ movq %r8, 16(%rdi)
+ movq %r9, 24(%rdi)
+ popq %r12
+ repz retq
+#ifndef __APPLE__
+.size fe_add,.-fe_add
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_neg
+.type fe_neg,@function
+.align 4
+fe_neg:
+#else
+.section __TEXT,__text
+.globl _fe_neg
+.p2align 2
+_fe_neg:
+#endif /* __APPLE__ */
+ movq $-19, %rdx
+ movq $-1, %rax
+ movq $-1, %rcx
+ movq $0x7fffffffffffffff, %r8
+ subq (%rsi), %rdx
+ sbbq 8(%rsi), %rax
+ sbbq 16(%rsi), %rcx
+ sbbq 24(%rsi), %r8
+ movq %rdx, (%rdi)
+ movq %rax, 8(%rdi)
+ movq %rcx, 16(%rdi)
+ movq %r8, 24(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size fe_neg,.-fe_neg
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_cmov
+.type fe_cmov,@function
+.align 4
+fe_cmov:
+#else
+.section __TEXT,__text
+.globl _fe_cmov
+.p2align 2
+_fe_cmov:
+#endif /* __APPLE__ */
+ cmpl $0x01, %edx
+ movq (%rdi), %rcx
+ movq 8(%rdi), %r8
+ movq 16(%rdi), %r9
+ movq 24(%rdi), %r10
+ cmoveq (%rsi), %rcx
+ cmoveq 8(%rsi), %r8
+ cmoveq 16(%rsi), %r9
+ cmoveq 24(%rsi), %r10
+ movq %rcx, (%rdi)
+ movq %r8, 8(%rdi)
+ movq %r9, 16(%rdi)
+ movq %r10, 24(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size fe_cmov,.-fe_cmov
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_isnonzero
+.type fe_isnonzero,@function
+.align 4
+fe_isnonzero:
+#else
+.section __TEXT,__text
+.globl _fe_isnonzero
+.p2align 2
+_fe_isnonzero:
+#endif /* __APPLE__ */
+ movq $0x7fffffffffffffff, %r10
+ movq (%rdi), %rax
+ movq 8(%rdi), %rdx
+ movq 16(%rdi), %rcx
+ movq 24(%rdi), %r8
+ addq $19, %rax
+ adcq $0x00, %rdx
+ adcq $0x00, %rcx
+ adcq $0x00, %r8
+ shrq $63, %r8
+ imulq $19, %r8, %r9
+ movq (%rdi), %rax
+ movq 8(%rdi), %rdx
+ movq 16(%rdi), %rcx
+ movq 24(%rdi), %r8
+ addq %r9, %rax
+ adcq $0x00, %rdx
+ adcq $0x00, %rcx
+ adcq $0x00, %r8
+ andq %r10, %r8
+ orq %rdx, %rax
+ orq %rcx, %rax
+ orq %r8, %rax
+ repz retq
+#ifndef __APPLE__
+.size fe_isnonzero,.-fe_isnonzero
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_isnegative
+.type fe_isnegative,@function
+.align 4
+fe_isnegative:
+#else
+.section __TEXT,__text
+.globl _fe_isnegative
+.p2align 2
+_fe_isnegative:
+#endif /* __APPLE__ */
+ movq $0x7fffffffffffffff, %r11
+ movq (%rdi), %rdx
+ movq 8(%rdi), %rcx
+ movq 16(%rdi), %r8
+ movq 24(%rdi), %r9
+ movq %rdx, %rax
+ addq $19, %rdx
+ adcq $0x00, %rcx
+ adcq $0x00, %r8
+ adcq $0x00, %r9
+ shrq $63, %r9
+ imulq $19, %r9, %r10
+ addq %r10, %rax
+ andq $0x01, %rax
+ repz retq
+#ifndef __APPLE__
+.size fe_isnegative,.-fe_isnegative
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_cmov_table
+.type fe_cmov_table,@function
+.align 4
+fe_cmov_table:
+#else
+.section __TEXT,__text
+.globl _fe_cmov_table
+.p2align 2
+_fe_cmov_table:
+#endif /* __APPLE__ */
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rdx, %rcx
+ movsbq %cl, %rax
+ cdq
+ xorb %dl, %al
+ subb %dl, %al
+ movb %al, %r15b
+ movq $0x01, %rax
+ xorq %rdx, %rdx
+ xorq %r8, %r8
+ xorq %r9, %r9
+ movq $0x01, %r10
+ xorq %r11, %r11
+ xorq %r12, %r12
+ xorq %r13, %r13
+ cmpb $0x01, %r15b
+ movq (%rsi), %r14
+ cmoveq %r14, %rax
+ movq 8(%rsi), %r14
+ cmoveq %r14, %rdx
+ movq 16(%rsi), %r14
+ cmoveq %r14, %r8
+ movq 24(%rsi), %r14
+ cmoveq %r14, %r9
+ movq 32(%rsi), %r14
+ cmoveq %r14, %r10
+ movq 40(%rsi), %r14
+ cmoveq %r14, %r11
+ movq 48(%rsi), %r14
+ cmoveq %r14, %r12
+ movq 56(%rsi), %r14
+ cmoveq %r14, %r13
+ cmpb $2, %r15b
+ movq 96(%rsi), %r14
+ cmoveq %r14, %rax
+ movq 104(%rsi), %r14
+ cmoveq %r14, %rdx
+ movq 112(%rsi), %r14
+ cmoveq %r14, %r8
+ movq 120(%rsi), %r14
+ cmoveq %r14, %r9
+ movq 128(%rsi), %r14
+ cmoveq %r14, %r10
+ movq 136(%rsi), %r14
+ cmoveq %r14, %r11
+ movq 144(%rsi), %r14
+ cmoveq %r14, %r12
+ movq 152(%rsi), %r14
+ cmoveq %r14, %r13
+ cmpb $3, %r15b
+ movq 192(%rsi), %r14
+ cmoveq %r14, %rax
+ movq 200(%rsi), %r14
+ cmoveq %r14, %rdx
+ movq 208(%rsi), %r14
+ cmoveq %r14, %r8
+ movq 216(%rsi), %r14
+ cmoveq %r14, %r9
+ movq 224(%rsi), %r14
+ cmoveq %r14, %r10
+ movq 232(%rsi), %r14
+ cmoveq %r14, %r11
+ movq 240(%rsi), %r14
+ cmoveq %r14, %r12
+ movq 248(%rsi), %r14
+ cmoveq %r14, %r13
+ cmpb $4, %r15b
+ movq 288(%rsi), %r14
+ cmoveq %r14, %rax
+ movq 296(%rsi), %r14
+ cmoveq %r14, %rdx
+ movq 304(%rsi), %r14
+ cmoveq %r14, %r8
+ movq 312(%rsi), %r14
+ cmoveq %r14, %r9
+ movq 320(%rsi), %r14
+ cmoveq %r14, %r10
+ movq 328(%rsi), %r14
+ cmoveq %r14, %r11
+ movq 336(%rsi), %r14
+ cmoveq %r14, %r12
+ movq 344(%rsi), %r14
+ cmoveq %r14, %r13
+ cmpb $5, %r15b
+ movq 384(%rsi), %r14
+ cmoveq %r14, %rax
+ movq 392(%rsi), %r14
+ cmoveq %r14, %rdx
+ movq 400(%rsi), %r14
+ cmoveq %r14, %r8
+ movq 408(%rsi), %r14
+ cmoveq %r14, %r9
+ movq 416(%rsi), %r14
+ cmoveq %r14, %r10
+ movq 424(%rsi), %r14
+ cmoveq %r14, %r11
+ movq 432(%rsi), %r14
+ cmoveq %r14, %r12
+ movq 440(%rsi), %r14
+ cmoveq %r14, %r13
+ cmpb $6, %r15b
+ movq 480(%rsi), %r14
+ cmoveq %r14, %rax
+ movq 488(%rsi), %r14
+ cmoveq %r14, %rdx
+ movq 496(%rsi), %r14
+ cmoveq %r14, %r8
+ movq 504(%rsi), %r14
+ cmoveq %r14, %r9
+ movq 512(%rsi), %r14
+ cmoveq %r14, %r10
+ movq 520(%rsi), %r14
+ cmoveq %r14, %r11
+ movq 528(%rsi), %r14
+ cmoveq %r14, %r12
+ movq 536(%rsi), %r14
+ cmoveq %r14, %r13
+ cmpb $7, %r15b
+ movq 576(%rsi), %r14
+ cmoveq %r14, %rax
+ movq 584(%rsi), %r14
+ cmoveq %r14, %rdx
+ movq 592(%rsi), %r14
+ cmoveq %r14, %r8
+ movq 600(%rsi), %r14
+ cmoveq %r14, %r9
+ movq 608(%rsi), %r14
+ cmoveq %r14, %r10
+ movq 616(%rsi), %r14
+ cmoveq %r14, %r11
+ movq 624(%rsi), %r14
+ cmoveq %r14, %r12
+ movq 632(%rsi), %r14
+ cmoveq %r14, %r13
+ cmpb $8, %r15b
+ movq 672(%rsi), %r14
+ cmoveq %r14, %rax
+ movq 680(%rsi), %r14
+ cmoveq %r14, %rdx
+ movq 688(%rsi), %r14
+ cmoveq %r14, %r8
+ movq 696(%rsi), %r14
+ cmoveq %r14, %r9
+ movq 704(%rsi), %r14
+ cmoveq %r14, %r10
+ movq 712(%rsi), %r14
+ cmoveq %r14, %r11
+ movq 720(%rsi), %r14
+ cmoveq %r14, %r12
+ movq 728(%rsi), %r14
+ cmoveq %r14, %r13
+ cmpb $0x00, %cl
+ movq %rax, %r14
+ cmovlq %r10, %rax
+ cmovlq %r14, %r10
+ movq %rdx, %r14
+ cmovlq %r11, %rdx
+ cmovlq %r14, %r11
+ movq %r8, %r14
+ cmovlq %r12, %r8
+ cmovlq %r14, %r12
+ movq %r9, %r14
+ cmovlq %r13, %r9
+ cmovlq %r14, %r13
+ movq %rax, (%rdi)
+ movq %rdx, 8(%rdi)
+ movq %r8, 16(%rdi)
+ movq %r9, 24(%rdi)
+ movq %r10, 32(%rdi)
+ movq %r11, 40(%rdi)
+ movq %r12, 48(%rdi)
+ movq %r13, 56(%rdi)
+ xorq %rax, %rax
+ xorq %rdx, %rdx
+ xorq %r8, %r8
+ xorq %r9, %r9
+ cmpb $0x01, %r15b
+ movq 64(%rsi), %r14
+ cmoveq %r14, %rax
+ movq 72(%rsi), %r14
+ cmoveq %r14, %rdx
+ movq 80(%rsi), %r14
+ cmoveq %r14, %r8
+ movq 88(%rsi), %r14
+ cmoveq %r14, %r9
+ cmpb $2, %r15b
+ movq 160(%rsi), %r14
+ cmoveq %r14, %rax
+ movq 168(%rsi), %r14
+ cmoveq %r14, %rdx
+ movq 176(%rsi), %r14
+ cmoveq %r14, %r8
+ movq 184(%rsi), %r14
+ cmoveq %r14, %r9
+ cmpb $3, %r15b
+ movq 256(%rsi), %r14
+ cmoveq %r14, %rax
+ movq 264(%rsi), %r14
+ cmoveq %r14, %rdx
+ movq 272(%rsi), %r14
+ cmoveq %r14, %r8
+ movq 280(%rsi), %r14
+ cmoveq %r14, %r9
+ cmpb $4, %r15b
+ movq 352(%rsi), %r14
+ cmoveq %r14, %rax
+ movq 360(%rsi), %r14
+ cmoveq %r14, %rdx
+ movq 368(%rsi), %r14
+ cmoveq %r14, %r8
+ movq 376(%rsi), %r14
+ cmoveq %r14, %r9
+ cmpb $5, %r15b
+ movq 448(%rsi), %r14
+ cmoveq %r14, %rax
+ movq 456(%rsi), %r14
+ cmoveq %r14, %rdx
+ movq 464(%rsi), %r14
+ cmoveq %r14, %r8
+ movq 472(%rsi), %r14
+ cmoveq %r14, %r9
+ cmpb $6, %r15b
+ movq 544(%rsi), %r14
+ cmoveq %r14, %rax
+ movq 552(%rsi), %r14
+ cmoveq %r14, %rdx
+ movq 560(%rsi), %r14
+ cmoveq %r14, %r8
+ movq 568(%rsi), %r14
+ cmoveq %r14, %r9
+ cmpb $7, %r15b
+ movq 640(%rsi), %r14
+ cmoveq %r14, %rax
+ movq 648(%rsi), %r14
+ cmoveq %r14, %rdx
+ movq 656(%rsi), %r14
+ cmoveq %r14, %r8
+ movq 664(%rsi), %r14
+ cmoveq %r14, %r9
+ cmpb $8, %r15b
+ movq 736(%rsi), %r14
+ cmoveq %r14, %rax
+ movq 744(%rsi), %r14
+ cmoveq %r14, %rdx
+ movq 752(%rsi), %r14
+ cmoveq %r14, %r8
+ movq 760(%rsi), %r14
+ cmoveq %r14, %r9
+ movq $-19, %r10
+ movq $-1, %r11
+ movq $-1, %r12
+ movq $0x7fffffffffffffff, %r13
+ subq %rax, %r10
+ sbbq %rdx, %r11
+ sbbq %r8, %r12
+ sbbq %r9, %r13
+ cmpb $0x00, %cl
+ cmovlq %r10, %rax
+ cmovlq %r11, %rdx
+ cmovlq %r12, %r8
+ cmovlq %r13, %r9
+ movq %rax, 64(%rdi)
+ movq %rdx, 72(%rdi)
+ movq %r8, 80(%rdi)
+ movq %r9, 88(%rdi)
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ repz retq
+#ifndef __APPLE__
+.size fe_cmov_table,.-fe_cmov_table
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_mul
+.type fe_mul,@function
+.align 4
+fe_mul:
+#else
+.section __TEXT,__text
+.globl _fe_mul
+.p2align 2
+_fe_mul:
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ jmpq *fe_mul_p(%rip)
+#else
+ jmpq *_fe_mul_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.size fe_mul,.-fe_mul
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_sq
+.type fe_sq,@function
+.align 4
+fe_sq:
+#else
+.section __TEXT,__text
+.globl _fe_sq
+.p2align 2
+_fe_sq:
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ jmpq *fe_sq_p(%rip)
+#else
+ jmpq *_fe_sq_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.size fe_sq,.-fe_sq
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_mul121666
+.type fe_mul121666,@function
+.align 4
+fe_mul121666:
+#else
+.section __TEXT,__text
+.globl _fe_mul121666
+.p2align 2
+_fe_mul121666:
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ jmpq *fe_mul121666_p(%rip)
+#else
+ jmpq *_fe_mul121666_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.size fe_mul121666,.-fe_mul121666
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_sq2
+.type fe_sq2,@function
+.align 4
+fe_sq2:
+#else
+.section __TEXT,__text
+.globl _fe_sq2
+.p2align 2
+_fe_sq2:
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ jmpq *fe_sq2_p(%rip)
+#else
+ jmpq *_fe_sq2_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.size fe_sq2,.-fe_sq2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_invert
+.type fe_invert,@function
+.align 4
+fe_invert:
+#else
+.section __TEXT,__text
+.globl _fe_invert
+.p2align 2
+_fe_invert:
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ jmpq *fe_invert_p(%rip)
+#else
+ jmpq *_fe_invert_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.size fe_invert,.-fe_invert
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl curve25519
+.type curve25519,@function
+.align 4
+curve25519:
+#else
+.section __TEXT,__text
+.globl _curve25519
+.p2align 2
+_curve25519:
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ jmpq *curve25519_p(%rip)
+#else
+ jmpq *_curve25519_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.size curve25519,.-curve25519
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_pow22523
+.type fe_pow22523,@function
+.align 4
+fe_pow22523:
+#else
+.section __TEXT,__text
+.globl _fe_pow22523
+.p2align 2
+_fe_pow22523:
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ jmpq *fe_pow22523_p(%rip)
+#else
+ jmpq *_fe_pow22523_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.size fe_pow22523,.-fe_pow22523
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_ge_to_p2
+.type fe_ge_to_p2,@function
+.align 4
+fe_ge_to_p2:
+#else
+.section __TEXT,__text
+.globl _fe_ge_to_p2
+.p2align 2
+_fe_ge_to_p2:
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ jmpq *fe_ge_to_p2_p(%rip)
+#else
+ jmpq *_fe_ge_to_p2_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.size fe_ge_to_p2,.-fe_ge_to_p2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_ge_to_p3
+.type fe_ge_to_p3,@function
+.align 4
+fe_ge_to_p3:
+#else
+.section __TEXT,__text
+.globl _fe_ge_to_p3
+.p2align 2
+_fe_ge_to_p3:
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ jmpq *fe_ge_to_p3_p(%rip)
+#else
+ jmpq *_fe_ge_to_p3_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.size fe_ge_to_p3,.-fe_ge_to_p3
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_ge_dbl
+.type fe_ge_dbl,@function
+.align 4
+fe_ge_dbl:
+#else
+.section __TEXT,__text
+.globl _fe_ge_dbl
+.p2align 2
+_fe_ge_dbl:
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ jmpq *fe_ge_dbl_p(%rip)
+#else
+ jmpq *_fe_ge_dbl_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.size fe_ge_dbl,.-fe_ge_dbl
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_ge_madd
+.type fe_ge_madd,@function
+.align 4
+fe_ge_madd:
+#else
+.section __TEXT,__text
+.globl _fe_ge_madd
+.p2align 2
+_fe_ge_madd:
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ jmpq *fe_ge_madd_p(%rip)
+#else
+ jmpq *_fe_ge_madd_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.size fe_ge_madd,.-fe_ge_madd
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_ge_msub
+.type fe_ge_msub,@function
+.align 4
+fe_ge_msub:
+#else
+.section __TEXT,__text
+.globl _fe_ge_msub
+.p2align 2
+_fe_ge_msub:
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ jmpq *fe_ge_msub_p(%rip)
+#else
+ jmpq *_fe_ge_msub_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.size fe_ge_msub,.-fe_ge_msub
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_ge_add
+.type fe_ge_add,@function
+.align 4
+fe_ge_add:
+#else
+.section __TEXT,__text
+.globl _fe_ge_add
+.p2align 2
+_fe_ge_add:
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ jmpq *fe_ge_add_p(%rip)
+#else
+ jmpq *_fe_ge_add_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.size fe_ge_add,.-fe_ge_add
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_ge_sub
+.type fe_ge_sub,@function
+.align 4
+fe_ge_sub:
+#else
+.section __TEXT,__text
+.globl _fe_ge_sub
+.p2align 2
+_fe_ge_sub:
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ jmpq *fe_ge_sub_p(%rip)
+#else
+ jmpq *_fe_ge_sub_p(%rip)
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.size fe_ge_sub,.-fe_ge_sub
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+.type cpuFlagsSet, @object
+.size cpuFlagsSet,4
+cpuFlagsSet:
+ .long 0
+#else
+.section __DATA,__data
+.p2align 2
+_cpuFlagsSet:
+ .long 0
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+.type intelFlags, @object
+.size intelFlags,4
+intelFlags:
+ .long 0
+#else
+.section __DATA,__data
+.p2align 2
+_intelFlags:
+ .long 0
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+.type fe_mul_p, @object
+.size fe_mul_p,8
+fe_mul_p:
+ .quad fe_mul_x64
+#else
+.section __DATA,__data
+.p2align 2
+_fe_mul_p:
+ .quad _fe_mul_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+.type fe_sq_p, @object
+.size fe_sq_p,8
+fe_sq_p:
+ .quad fe_sq_x64
+#else
+.section __DATA,__data
+.p2align 2
+_fe_sq_p:
+ .quad _fe_sq_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+.type fe_mul121666_p, @object
+.size fe_mul121666_p,8
+fe_mul121666_p:
+ .quad fe_mul121666_x64
+#else
+.section __DATA,__data
+.p2align 2
+_fe_mul121666_p:
+ .quad _fe_mul121666_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+.type fe_sq2_p, @object
+.size fe_sq2_p,8
+fe_sq2_p:
+ .quad fe_sq2_x64
+#else
+.section __DATA,__data
+.p2align 2
+_fe_sq2_p:
+ .quad _fe_sq2_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+.type fe_invert_p, @object
+.size fe_invert_p,8
+fe_invert_p:
+ .quad fe_invert_x64
+#else
+.section __DATA,__data
+.p2align 2
+_fe_invert_p:
+ .quad _fe_invert_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+.type curve25519_p, @object
+.size curve25519_p,8
+curve25519_p:
+ .quad curve25519_x64
+#else
+.section __DATA,__data
+.p2align 2
+_curve25519_p:
+ .quad _curve25519_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+.type fe_pow22523_p, @object
+.size fe_pow22523_p,8
+fe_pow22523_p:
+ .quad fe_pow22523_x64
+#else
+.section __DATA,__data
+.p2align 2
+_fe_pow22523_p:
+ .quad _fe_pow22523_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+.type fe_ge_to_p2_p, @object
+.size fe_ge_to_p2_p,8
+fe_ge_to_p2_p:
+ .quad fe_ge_to_p2_x64
+#else
+.section __DATA,__data
+.p2align 2
+_fe_ge_to_p2_p:
+ .quad _fe_ge_to_p2_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+.type fe_ge_to_p3_p, @object
+.size fe_ge_to_p3_p,8
+fe_ge_to_p3_p:
+ .quad fe_ge_to_p3_x64
+#else
+.section __DATA,__data
+.p2align 2
+_fe_ge_to_p3_p:
+ .quad _fe_ge_to_p3_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+.type fe_ge_dbl_p, @object
+.size fe_ge_dbl_p,8
+fe_ge_dbl_p:
+ .quad fe_ge_dbl_x64
+#else
+.section __DATA,__data
+.p2align 2
+_fe_ge_dbl_p:
+ .quad _fe_ge_dbl_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+.type fe_ge_madd_p, @object
+.size fe_ge_madd_p,8
+fe_ge_madd_p:
+ .quad fe_ge_madd_x64
+#else
+.section __DATA,__data
+.p2align 2
+_fe_ge_madd_p:
+ .quad _fe_ge_madd_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+.type fe_ge_msub_p, @object
+.size fe_ge_msub_p,8
+fe_ge_msub_p:
+ .quad fe_ge_msub_x64
+#else
+.section __DATA,__data
+.p2align 2
+_fe_ge_msub_p:
+ .quad _fe_ge_msub_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+.type fe_ge_add_p, @object
+.size fe_ge_add_p,8
+fe_ge_add_p:
+ .quad fe_ge_add_x64
+#else
+.section __DATA,__data
+.p2align 2
+_fe_ge_add_p:
+ .quad _fe_ge_add_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+.type fe_ge_sub_p, @object
+.size fe_ge_sub_p,8
+fe_ge_sub_p:
+ .quad fe_ge_sub_x64
+#else
+.section __DATA,__data
+.p2align 2
+_fe_ge_sub_p:
+ .quad _fe_ge_sub_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_mul_x64
+.type fe_mul_x64,@function
+.align 4
+fe_mul_x64:
+#else
+.section __TEXT,__text
+.globl _fe_mul_x64
+.p2align 2
+_fe_mul_x64:
+#endif /* __APPLE__ */
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rbx
+ movq %rdx, %rcx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rcx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rcx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rcx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rcx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rcx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rcx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rcx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rcx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rbx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rbx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ popq %rbx
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ repz retq
+#ifndef __APPLE__
+.size fe_mul_x64,.-fe_mul_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_sq_x64
+.type fe_sq_x64,@function
+.align 4
+fe_sq_x64:
+#else
+.section __TEXT,__text
+.globl _fe_sq_x64
+.p2align 2
+_fe_sq_x64:
+#endif /* __APPLE__ */
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ # Square
+ # A[0] * A[1]
+ movq (%rsi), %rax
+ mulq 8(%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * A[2]
+ movq (%rsi), %rax
+ mulq 16(%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[0] * A[3]
+ movq (%rsi), %rax
+ mulq 24(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * A[2]
+ movq 8(%rsi), %rax
+ mulq 16(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[1] * A[3]
+ movq 8(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ # A[2] * A[3]
+ movq 16(%rsi), %rax
+ mulq 24(%rsi)
+ xorq %r13, %r13
+ addq %rax, %r12
+ adcq %rdx, %r13
+ # Double
+ xorq %r14, %r14
+ addq %r8, %r8
+ adcq %r9, %r9
+ adcq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ adcq %r13, %r13
+ adcq $0x00, %r14
+ # A[0] * A[0]
+ movq (%rsi), %rax
+ mulq %rax
+ movq %rax, %rcx
+ movq %rdx, %r15
+ # A[1] * A[1]
+ movq 8(%rsi), %rax
+ mulq %rax
+ addq %r15, %r8
+ adcq %rax, %r9
+ adcq $0x00, %rdx
+ movq %rdx, %r15
+ # A[2] * A[2]
+ movq 16(%rsi), %rax
+ mulq %rax
+ addq %r15, %r10
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ movq %rdx, %r15
+ # A[3] * A[3]
+ movq 24(%rsi), %rax
+ mulq %rax
+ addq %rax, %r13
+ adcq %rdx, %r14
+ addq %r15, %r12
+ adcq $0x00, %r13
+ adcq $0x00, %r14
+ # Reduce
+ movq $0x7fffffffffffffff, %r15
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ shldq $0x01, %r10, %r11
+ andq %r15, %r10
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r11
+ xorq %r11, %r11
+ addq %rax, %rcx
+ movq $19, %rax
+ adcq %rdx, %r11
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ # Add remaining product results in
+ addq %r11, %r8
+ adcq %r12, %r9
+ adcq %r13, %r10
+ adcq %rax, %r10
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r10, %rdx
+ imulq $19, %rdx, %rax
+ andq %r15, %r10
+ addq %rax, %rcx
+ adcq $0x00, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ # Reduce if top bit set
+ movq %r10, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %r15, %r10
+ addq %rax, %rcx
+ adcq $0x00, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ # Store
+ movq %rcx, (%rdi)
+ movq %r8, 8(%rdi)
+ movq %r9, 16(%rdi)
+ movq %r10, 24(%rdi)
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ repz retq
+#ifndef __APPLE__
+.size fe_sq_x64,.-fe_sq_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_sq_n_x64
+.type fe_sq_n_x64,@function
+.align 4
+fe_sq_n_x64:
+#else
+.section __TEXT,__text
+.globl _fe_sq_n_x64
+.p2align 2
+_fe_sq_n_x64:
+#endif /* __APPLE__ */
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rbx
+ movq %rdx, %rcx
+L_fe_sq_n_x64:
+ # Square
+ # A[0] * A[1]
+ movq (%rsi), %rax
+ mulq 8(%rsi)
+ movq %rax, %r9
+ movq %rdx, %r10
+ # A[0] * A[2]
+ movq (%rsi), %rax
+ mulq 16(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[0] * A[3]
+ movq (%rsi), %rax
+ mulq 24(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r11
+ adcq %rdx, %r12
+ # A[1] * A[2]
+ movq 8(%rsi), %rax
+ mulq 16(%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * A[3]
+ movq 8(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ # A[2] * A[3]
+ movq 16(%rsi), %rax
+ mulq 24(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r13
+ adcq %rdx, %r14
+ # Double
+ xorq %r15, %r15
+ addq %r9, %r9
+ adcq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ adcq %r13, %r13
+ adcq %r14, %r14
+ adcq $0x00, %r15
+ # A[0] * A[0]
+ movq (%rsi), %rax
+ mulq %rax
+ movq %rax, %r8
+ movq %rdx, %rbx
+ # A[1] * A[1]
+ movq 8(%rsi), %rax
+ mulq %rax
+ addq %rbx, %r9
+ adcq %rax, %r10
+ adcq $0x00, %rdx
+ movq %rdx, %rbx
+ # A[2] * A[2]
+ movq 16(%rsi), %rax
+ mulq %rax
+ addq %rbx, %r11
+ adcq %rax, %r12
+ adcq $0x00, %rdx
+ movq %rdx, %rbx
+ # A[3] * A[3]
+ movq 24(%rsi), %rax
+ mulq %rax
+ addq %rax, %r14
+ adcq %rdx, %r15
+ addq %rbx, %r13
+ adcq $0x00, %r14
+ adcq $0x00, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rbx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rbx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ decb %cl
+ jnz L_fe_sq_n_x64
+ popq %rbx
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ repz retq
+#ifndef __APPLE__
+.size fe_sq_n_x64,.-fe_sq_n_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_mul121666_x64
+.type fe_mul121666_x64,@function
+.align 4
+fe_mul121666_x64:
+#else
+.section __TEXT,__text
+.globl _fe_mul121666_x64
+.p2align 2
+_fe_mul121666_x64:
+#endif /* __APPLE__ */
+ pushq %r12
+ # Multiply by 121666
+ movq $0x1db42, %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ movq %rax, %r8
+ movq %rdx, %r9
+ movq $0x1db42, %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ movq $0x1db42, %rax
+ mulq 16(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ movq $0x1db42, %rax
+ mulq 24(%rsi)
+ movq $0x7fffffffffffffff, %rcx
+ addq %rax, %r11
+ adcq %rdx, %r12
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ movq $19, %rax
+ mulq %r12
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ popq %r12
+ repz retq
+#ifndef __APPLE__
+.size fe_mul121666_x64,.-fe_mul121666_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_sq2_x64
+.type fe_sq2_x64,@function
+.align 4
+fe_sq2_x64:
+#else
+.section __TEXT,__text
+.globl _fe_sq2_x64
+.p2align 2
+_fe_sq2_x64:
+#endif /* __APPLE__ */
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rbx
+ # Square * 2
+ # A[0] * A[1]
+ movq (%rsi), %rax
+ mulq 8(%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * A[2]
+ movq (%rsi), %rax
+ mulq 16(%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[0] * A[3]
+ movq (%rsi), %rax
+ mulq 24(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * A[2]
+ movq 8(%rsi), %rax
+ mulq 16(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[1] * A[3]
+ movq 8(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ # A[2] * A[3]
+ movq 16(%rsi), %rax
+ mulq 24(%rsi)
+ xorq %r13, %r13
+ addq %rax, %r12
+ adcq %rdx, %r13
+ # Double
+ xorq %r14, %r14
+ addq %r8, %r8
+ adcq %r9, %r9
+ adcq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ adcq %r13, %r13
+ adcq $0x00, %r14
+ # A[0] * A[0]
+ movq (%rsi), %rax
+ mulq %rax
+ movq %rax, %rcx
+ movq %rdx, %r15
+ # A[1] * A[1]
+ movq 8(%rsi), %rax
+ mulq %rax
+ addq %r15, %r8
+ adcq %rax, %r9
+ adcq $0x00, %rdx
+ movq %rdx, %r15
+ # A[2] * A[2]
+ movq 16(%rsi), %rax
+ mulq %rax
+ addq %r15, %r10
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ movq %rdx, %r15
+ # A[3] * A[3]
+ movq 24(%rsi), %rax
+ mulq %rax
+ addq %rax, %r13
+ adcq %rdx, %r14
+ addq %r15, %r12
+ adcq $0x00, %r13
+ adcq $0x00, %r14
+ # Reduce
+ movq $0x7fffffffffffffff, %rbx
+ xorq %rax, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $3, %r14, %rax
+ shldq $2, %r13, %r14
+ shldq $2, %r12, %r13
+ shldq $2, %r11, %r12
+ shldq $2, %r10, %r11
+ shldq $0x01, %r9, %r10
+ shldq $0x01, %r8, %r9
+ shldq $0x01, %rcx, %r8
+ shlq $0x01, %rcx
+ andq %rbx, %r10
+ # Two out left, one in right
+ andq %rbx, %r14
+ # Multiply top bits by 19*19
+ imulq $0x169, %rax, %r15
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r11
+ xorq %r11, %r11
+ addq %rax, %rcx
+ movq $19, %rax
+ adcq %rdx, %r11
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ # Add remaining produce results in
+ addq %r15, %rcx
+ adcq %r11, %r8
+ adcq %r12, %r9
+ adcq %r13, %r10
+ adcq %rax, %r10
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r10, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbx, %r10
+ addq %rax, %rcx
+ adcq $0x00, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ # Reduce if top bit set
+ movq %r10, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbx, %r10
+ addq %rax, %rcx
+ adcq $0x00, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ # Store
+ movq %rcx, (%rdi)
+ movq %r8, 8(%rdi)
+ movq %r9, 16(%rdi)
+ movq %r10, 24(%rdi)
+ popq %rbx
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ repz retq
+#ifndef __APPLE__
+.size fe_sq2_x64,.-fe_sq2_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_invert_x64
+.type fe_invert_x64,@function
+.align 4
+fe_invert_x64:
+#else
+.section __TEXT,__text
+.globl _fe_invert_x64
+.p2align 2
+_fe_invert_x64:
+#endif /* __APPLE__ */
+ subq $0x90, %rsp
+ # Invert
+ movq %rdi, 128(%rsp)
+ movq %rsi, 136(%rsp)
+ movq %rsp, %rdi
+ movq 136(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ movq %rsp, %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ movq 136(%rsp), %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ movq %rsp, %rdi
+ movq %rsp, %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ movq %rsp, %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ leaq 64(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ movq $4, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ movq $9, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ movq $19, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ leaq 64(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ movq $9, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ movq $49, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ movq $0x63, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ leaq 64(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ movq $49, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ movq $4, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ movq 128(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ movq %rsp, %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ movq 136(%rsp), %rsi
+ movq 128(%rsp), %rdi
+ addq $0x90, %rsp
+ repz retq
+#ifndef __APPLE__
+.text
+.globl curve25519_x64
+.type curve25519_x64,@function
+.align 4
+curve25519_x64:
+#else
+.section __TEXT,__text
+.globl _curve25519_x64
+.p2align 2
+_curve25519_x64:
+#endif /* __APPLE__ */
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rbx
+ pushq %rbp
+ movq %rdx, %r8
+ subq $0xb8, %rsp
+ xorq %rbx, %rbx
+ movq %rdi, 176(%rsp)
+ # Set one
+ movq $0x01, (%rdi)
+ movq $0x00, 8(%rdi)
+ movq $0x00, 16(%rdi)
+ movq $0x00, 24(%rdi)
+ # Set zero
+ movq $0x00, (%rsp)
+ movq $0x00, 8(%rsp)
+ movq $0x00, 16(%rsp)
+ movq $0x00, 24(%rsp)
+ # Set one
+ movq $0x01, 32(%rsp)
+ movq $0x00, 40(%rsp)
+ movq $0x00, 48(%rsp)
+ movq $0x00, 56(%rsp)
+ # Copy
+ movq (%r8), %rcx
+ movq 8(%r8), %r9
+ movq 16(%r8), %r10
+ movq 24(%r8), %r11
+ movq %rcx, 64(%rsp)
+ movq %r9, 72(%rsp)
+ movq %r10, 80(%rsp)
+ movq %r11, 88(%rsp)
+ movb $62, 168(%rsp)
+ movq $3, 160(%rsp)
+L_curve25519_x64_words:
+L_curve25519_x64_bits:
+ movq 160(%rsp), %r9
+ movb 168(%rsp), %cl
+ movq (%rsi,%r9,8), %rbp
+ shrq %cl, %rbp
+ andq $0x01, %rbp
+ xorq %rbp, %rbx
+ negq %rbx
+ # Conditional Swap
+ movq (%rdi), %rcx
+ movq 8(%rdi), %r9
+ movq 16(%rdi), %r10
+ movq 24(%rdi), %r11
+ xorq 64(%rsp), %rcx
+ xorq 72(%rsp), %r9
+ xorq 80(%rsp), %r10
+ xorq 88(%rsp), %r11
+ andq %rbx, %rcx
+ andq %rbx, %r9
+ andq %rbx, %r10
+ andq %rbx, %r11
+ xorq %rcx, (%rdi)
+ xorq %r9, 8(%rdi)
+ xorq %r10, 16(%rdi)
+ xorq %r11, 24(%rdi)
+ xorq %rcx, 64(%rsp)
+ xorq %r9, 72(%rsp)
+ xorq %r10, 80(%rsp)
+ xorq %r11, 88(%rsp)
+ # Conditional Swap
+ movq (%rsp), %rcx
+ movq 8(%rsp), %r9
+ movq 16(%rsp), %r10
+ movq 24(%rsp), %r11
+ xorq 32(%rsp), %rcx
+ xorq 40(%rsp), %r9
+ xorq 48(%rsp), %r10
+ xorq 56(%rsp), %r11
+ andq %rbx, %rcx
+ andq %rbx, %r9
+ andq %rbx, %r10
+ andq %rbx, %r11
+ xorq %rcx, (%rsp)
+ xorq %r9, 8(%rsp)
+ xorq %r10, 16(%rsp)
+ xorq %r11, 24(%rsp)
+ xorq %rcx, 32(%rsp)
+ xorq %r9, 40(%rsp)
+ xorq %r10, 48(%rsp)
+ xorq %r11, 56(%rsp)
+ movq %rbp, %rbx
+ # Add
+ movq (%rdi), %rcx
+ movq 8(%rdi), %r9
+ movq 16(%rdi), %r10
+ movq 24(%rdi), %rbp
+ movq %rcx, %r12
+ addq (%rsp), %rcx
+ movq %r9, %r13
+ adcq 8(%rsp), %r9
+ movq %r10, %r14
+ adcq 16(%rsp), %r10
+ movq %rbp, %r15
+ adcq 24(%rsp), %rbp
+ movq $-19, %rax
+ movq %rbp, %r11
+ movq $0x7fffffffffffffff, %rdx
+ sarq $63, %rbp
+ # Mask the modulus
+ andq %rbp, %rax
+ andq %rbp, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %rcx
+ sbbq %rbp, %r9
+ sbbq %rbp, %r10
+ sbbq %rdx, %r11
+ # Sub
+ subq (%rsp), %r12
+ movq $0x00, %rbp
+ sbbq 8(%rsp), %r13
+ movq $-19, %rax
+ sbbq 16(%rsp), %r14
+ movq $0x7fffffffffffffff, %rdx
+ sbbq 24(%rsp), %r15
+ sbbq $0x00, %rbp
+ # Mask the modulus
+ andq %rbp, %rax
+ andq %rbp, %rdx
+ # Add modulus (if underflow)
+ addq %rax, %r12
+ adcq %rbp, %r13
+ adcq %rbp, %r14
+ adcq %rdx, %r15
+ movq %rcx, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq %r12, 128(%rsp)
+ movq %r13, 136(%rsp)
+ movq %r14, 144(%rsp)
+ movq %r15, 152(%rsp)
+ # Add
+ movq 64(%rsp), %rcx
+ movq 72(%rsp), %r9
+ movq 80(%rsp), %r10
+ movq 88(%rsp), %rbp
+ movq %rcx, %r12
+ addq 32(%rsp), %rcx
+ movq %r9, %r13
+ adcq 40(%rsp), %r9
+ movq %r10, %r14
+ adcq 48(%rsp), %r10
+ movq %rbp, %r15
+ adcq 56(%rsp), %rbp
+ movq $-19, %rax
+ movq %rbp, %r11
+ movq $0x7fffffffffffffff, %rdx
+ sarq $63, %rbp
+ # Mask the modulus
+ andq %rbp, %rax
+ andq %rbp, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %rcx
+ sbbq %rbp, %r9
+ sbbq %rbp, %r10
+ sbbq %rdx, %r11
+ # Sub
+ subq 32(%rsp), %r12
+ movq $0x00, %rbp
+ sbbq 40(%rsp), %r13
+ movq $-19, %rax
+ sbbq 48(%rsp), %r14
+ movq $0x7fffffffffffffff, %rdx
+ sbbq 56(%rsp), %r15
+ sbbq $0x00, %rbp
+ # Mask the modulus
+ andq %rbp, %rax
+ andq %rbp, %rdx
+ # Add modulus (if underflow)
+ addq %rax, %r12
+ adcq %rbp, %r13
+ adcq %rbp, %r14
+ adcq %rdx, %r15
+ movq %rcx, (%rsp)
+ movq %r9, 8(%rsp)
+ movq %r10, 16(%rsp)
+ movq %r11, 24(%rsp)
+ movq %r12, 96(%rsp)
+ movq %r13, 104(%rsp)
+ movq %r14, 112(%rsp)
+ movq %r15, 120(%rsp)
+ # Multiply
+ # A[0] * B[0]
+ movq (%rdi), %rax
+ mulq 96(%rsp)
+ movq %rax, %rcx
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rdi), %rax
+ mulq 96(%rsp)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rdi), %rax
+ mulq 104(%rsp)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rdi), %rax
+ mulq 96(%rsp)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rdi), %rax
+ mulq 104(%rsp)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rdi), %rax
+ mulq 112(%rsp)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rdi), %rax
+ mulq 96(%rsp)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rdi), %rax
+ mulq 104(%rsp)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rdi), %rax
+ mulq 112(%rsp)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rdi), %rax
+ mulq 120(%rsp)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rdi), %rax
+ mulq 104(%rsp)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rdi), %rax
+ mulq 112(%rsp)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rdi), %rax
+ mulq 120(%rsp)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rdi), %rax
+ mulq 112(%rsp)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rdi), %rax
+ mulq 120(%rsp)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rdi), %rax
+ mulq 120(%rsp)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rbp
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rbp, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %rcx
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbp, %r11
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbp, %r11
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %rcx, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq %r10, 48(%rsp)
+ movq %r11, 56(%rsp)
+ # Multiply
+ # A[0] * B[0]
+ movq 128(%rsp), %rax
+ mulq (%rsp)
+ movq %rax, %rcx
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 136(%rsp), %rax
+ mulq (%rsp)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq 128(%rsp), %rax
+ mulq 8(%rsp)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 144(%rsp), %rax
+ mulq (%rsp)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 136(%rsp), %rax
+ mulq 8(%rsp)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq 128(%rsp), %rax
+ mulq 16(%rsp)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 152(%rsp), %rax
+ mulq (%rsp)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 144(%rsp), %rax
+ mulq 8(%rsp)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 136(%rsp), %rax
+ mulq 16(%rsp)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq 128(%rsp), %rax
+ mulq 24(%rsp)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 152(%rsp), %rax
+ mulq 8(%rsp)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 144(%rsp), %rax
+ mulq 16(%rsp)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 136(%rsp), %rax
+ mulq 24(%rsp)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 152(%rsp), %rax
+ mulq 16(%rsp)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 144(%rsp), %rax
+ mulq 24(%rsp)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 152(%rsp), %rax
+ mulq 24(%rsp)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rbp
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rbp, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %rcx
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbp, %r11
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbp, %r11
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %rcx, (%rsp)
+ movq %r9, 8(%rsp)
+ movq %r10, 16(%rsp)
+ movq %r11, 24(%rsp)
+ # Square
+ # A[0] * A[1]
+ movq 128(%rsp), %rax
+ mulq 136(%rsp)
+ movq %rax, %r9
+ movq %rdx, %r10
+ # A[0] * A[2]
+ movq 128(%rsp), %rax
+ mulq 144(%rsp)
+ xorq %r11, %r11
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[0] * A[3]
+ movq 128(%rsp), %rax
+ mulq 152(%rsp)
+ xorq %r12, %r12
+ addq %rax, %r11
+ adcq %rdx, %r12
+ # A[1] * A[2]
+ movq 136(%rsp), %rax
+ mulq 144(%rsp)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * A[3]
+ movq 136(%rsp), %rax
+ mulq 152(%rsp)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ # A[2] * A[3]
+ movq 144(%rsp), %rax
+ mulq 152(%rsp)
+ xorq %r14, %r14
+ addq %rax, %r13
+ adcq %rdx, %r14
+ # Double
+ xorq %r15, %r15
+ addq %r9, %r9
+ adcq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ adcq %r13, %r13
+ adcq %r14, %r14
+ adcq $0x00, %r15
+ # A[0] * A[0]
+ movq 128(%rsp), %rax
+ mulq %rax
+ movq %rax, %rcx
+ movq %rdx, %rbp
+ # A[1] * A[1]
+ movq 136(%rsp), %rax
+ mulq %rax
+ addq %rbp, %r9
+ adcq %rax, %r10
+ adcq $0x00, %rdx
+ movq %rdx, %rbp
+ # A[2] * A[2]
+ movq 144(%rsp), %rax
+ mulq %rax
+ addq %rbp, %r11
+ adcq %rax, %r12
+ adcq $0x00, %rdx
+ movq %rdx, %rbp
+ # A[3] * A[3]
+ movq 152(%rsp), %rax
+ mulq %rax
+ addq %rax, %r14
+ adcq %rdx, %r15
+ addq %rbp, %r13
+ adcq $0x00, %r14
+ adcq $0x00, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rbp
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rbp, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %rcx
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbp, %r11
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbp, %r11
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %rcx, 96(%rsp)
+ movq %r9, 104(%rsp)
+ movq %r10, 112(%rsp)
+ movq %r11, 120(%rsp)
+ # Square
+ # A[0] * A[1]
+ movq (%rdi), %rax
+ mulq 8(%rdi)
+ movq %rax, %r9
+ movq %rdx, %r10
+ # A[0] * A[2]
+ movq (%rdi), %rax
+ mulq 16(%rdi)
+ xorq %r11, %r11
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[0] * A[3]
+ movq (%rdi), %rax
+ mulq 24(%rdi)
+ xorq %r12, %r12
+ addq %rax, %r11
+ adcq %rdx, %r12
+ # A[1] * A[2]
+ movq 8(%rdi), %rax
+ mulq 16(%rdi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * A[3]
+ movq 8(%rdi), %rax
+ mulq 24(%rdi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ # A[2] * A[3]
+ movq 16(%rdi), %rax
+ mulq 24(%rdi)
+ xorq %r14, %r14
+ addq %rax, %r13
+ adcq %rdx, %r14
+ # Double
+ xorq %r15, %r15
+ addq %r9, %r9
+ adcq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ adcq %r13, %r13
+ adcq %r14, %r14
+ adcq $0x00, %r15
+ # A[0] * A[0]
+ movq (%rdi), %rax
+ mulq %rax
+ movq %rax, %rcx
+ movq %rdx, %rbp
+ # A[1] * A[1]
+ movq 8(%rdi), %rax
+ mulq %rax
+ addq %rbp, %r9
+ adcq %rax, %r10
+ adcq $0x00, %rdx
+ movq %rdx, %rbp
+ # A[2] * A[2]
+ movq 16(%rdi), %rax
+ mulq %rax
+ addq %rbp, %r11
+ adcq %rax, %r12
+ adcq $0x00, %rdx
+ movq %rdx, %rbp
+ # A[3] * A[3]
+ movq 24(%rdi), %rax
+ mulq %rax
+ addq %rax, %r14
+ adcq %rdx, %r15
+ addq %rbp, %r13
+ adcq $0x00, %r14
+ adcq $0x00, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rbp
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rbp, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %rcx
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbp, %r11
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbp, %r11
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %rcx, 128(%rsp)
+ movq %r9, 136(%rsp)
+ movq %r10, 144(%rsp)
+ movq %r11, 152(%rsp)
+ # Add
+ movq 32(%rsp), %rcx
+ movq 40(%rsp), %r9
+ movq 48(%rsp), %r10
+ movq 56(%rsp), %rbp
+ movq %rcx, %r12
+ addq (%rsp), %rcx
+ movq %r9, %r13
+ adcq 8(%rsp), %r9
+ movq %r10, %r14
+ adcq 16(%rsp), %r10
+ movq %rbp, %r15
+ adcq 24(%rsp), %rbp
+ movq $-19, %rax
+ movq %rbp, %r11
+ movq $0x7fffffffffffffff, %rdx
+ sarq $63, %rbp
+ # Mask the modulus
+ andq %rbp, %rax
+ andq %rbp, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %rcx
+ sbbq %rbp, %r9
+ sbbq %rbp, %r10
+ sbbq %rdx, %r11
+ # Sub
+ subq (%rsp), %r12
+ movq $0x00, %rbp
+ sbbq 8(%rsp), %r13
+ movq $-19, %rax
+ sbbq 16(%rsp), %r14
+ movq $0x7fffffffffffffff, %rdx
+ sbbq 24(%rsp), %r15
+ sbbq $0x00, %rbp
+ # Mask the modulus
+ andq %rbp, %rax
+ andq %rbp, %rdx
+ # Add modulus (if underflow)
+ addq %rax, %r12
+ adcq %rbp, %r13
+ adcq %rbp, %r14
+ adcq %rdx, %r15
+ movq %rcx, 64(%rsp)
+ movq %r9, 72(%rsp)
+ movq %r10, 80(%rsp)
+ movq %r11, 88(%rsp)
+ movq %r12, (%rsp)
+ movq %r13, 8(%rsp)
+ movq %r14, 16(%rsp)
+ movq %r15, 24(%rsp)
+ # Multiply
+ # A[0] * B[0]
+ movq 96(%rsp), %rax
+ mulq 128(%rsp)
+ movq %rax, %rcx
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 104(%rsp), %rax
+ mulq 128(%rsp)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq 96(%rsp), %rax
+ mulq 136(%rsp)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 112(%rsp), %rax
+ mulq 128(%rsp)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 104(%rsp), %rax
+ mulq 136(%rsp)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq 96(%rsp), %rax
+ mulq 144(%rsp)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 120(%rsp), %rax
+ mulq 128(%rsp)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 112(%rsp), %rax
+ mulq 136(%rsp)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 104(%rsp), %rax
+ mulq 144(%rsp)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq 96(%rsp), %rax
+ mulq 152(%rsp)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 120(%rsp), %rax
+ mulq 136(%rsp)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 112(%rsp), %rax
+ mulq 144(%rsp)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 104(%rsp), %rax
+ mulq 152(%rsp)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 120(%rsp), %rax
+ mulq 144(%rsp)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 112(%rsp), %rax
+ mulq 152(%rsp)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 120(%rsp), %rax
+ mulq 152(%rsp)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rbp
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rbp, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %rcx
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbp, %r11
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbp, %r11
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %rcx, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ # Sub
+ movq 128(%rsp), %rcx
+ movq 136(%rsp), %r9
+ movq 144(%rsp), %r10
+ movq 152(%rsp), %r11
+ subq 96(%rsp), %rcx
+ movq $0x00, %rbp
+ sbbq 104(%rsp), %r9
+ movq $-19, %rax
+ sbbq 112(%rsp), %r10
+ movq $0x7fffffffffffffff, %rdx
+ sbbq 120(%rsp), %r11
+ sbbq $0x00, %rbp
+ # Mask the modulus
+ andq %rbp, %rax
+ andq %rbp, %rdx
+ # Add modulus (if underflow)
+ addq %rax, %rcx
+ adcq %rbp, %r9
+ adcq %rbp, %r10
+ adcq %rdx, %r11
+ movq %rcx, 128(%rsp)
+ movq %r9, 136(%rsp)
+ movq %r10, 144(%rsp)
+ movq %r11, 152(%rsp)
+ # Square
+ # A[0] * A[1]
+ movq (%rsp), %rax
+ mulq 8(%rsp)
+ movq %rax, %r9
+ movq %rdx, %r10
+ # A[0] * A[2]
+ movq (%rsp), %rax
+ mulq 16(%rsp)
+ xorq %r11, %r11
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[0] * A[3]
+ movq (%rsp), %rax
+ mulq 24(%rsp)
+ xorq %r12, %r12
+ addq %rax, %r11
+ adcq %rdx, %r12
+ # A[1] * A[2]
+ movq 8(%rsp), %rax
+ mulq 16(%rsp)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * A[3]
+ movq 8(%rsp), %rax
+ mulq 24(%rsp)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ # A[2] * A[3]
+ movq 16(%rsp), %rax
+ mulq 24(%rsp)
+ xorq %r14, %r14
+ addq %rax, %r13
+ adcq %rdx, %r14
+ # Double
+ xorq %r15, %r15
+ addq %r9, %r9
+ adcq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ adcq %r13, %r13
+ adcq %r14, %r14
+ adcq $0x00, %r15
+ # A[0] * A[0]
+ movq (%rsp), %rax
+ mulq %rax
+ movq %rax, %rcx
+ movq %rdx, %rbp
+ # A[1] * A[1]
+ movq 8(%rsp), %rax
+ mulq %rax
+ addq %rbp, %r9
+ adcq %rax, %r10
+ adcq $0x00, %rdx
+ movq %rdx, %rbp
+ # A[2] * A[2]
+ movq 16(%rsp), %rax
+ mulq %rax
+ addq %rbp, %r11
+ adcq %rax, %r12
+ adcq $0x00, %rdx
+ movq %rdx, %rbp
+ # A[3] * A[3]
+ movq 24(%rsp), %rax
+ mulq %rax
+ addq %rax, %r14
+ adcq %rdx, %r15
+ addq %rbp, %r13
+ adcq $0x00, %r14
+ adcq $0x00, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rbp
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rbp, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %rcx
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbp, %r11
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbp, %r11
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %rcx, (%rsp)
+ movq %r9, 8(%rsp)
+ movq %r10, 16(%rsp)
+ movq %r11, 24(%rsp)
+ # Multiply by 121666
+ movq $0x1db42, %rax
+ mulq 128(%rsp)
+ xorq %r10, %r10
+ movq %rax, %rcx
+ movq %rdx, %r9
+ movq $0x1db42, %rax
+ mulq 136(%rsp)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ movq $0x1db42, %rax
+ mulq 144(%rsp)
+ xorq %r13, %r13
+ addq %rax, %r10
+ adcq %rdx, %r11
+ movq $0x1db42, %rax
+ mulq 152(%rsp)
+ movq $0x7fffffffffffffff, %r12
+ addq %rax, %r11
+ adcq %rdx, %r13
+ shldq $0x01, %r11, %r13
+ andq %r12, %r11
+ movq $19, %rax
+ mulq %r13
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ movq %rcx, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq %r10, 48(%rsp)
+ movq %r11, 56(%rsp)
+ # Square
+ # A[0] * A[1]
+ movq 64(%rsp), %rax
+ mulq 72(%rsp)
+ movq %rax, %r9
+ movq %rdx, %r10
+ # A[0] * A[2]
+ movq 64(%rsp), %rax
+ mulq 80(%rsp)
+ xorq %r11, %r11
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[0] * A[3]
+ movq 64(%rsp), %rax
+ mulq 88(%rsp)
+ xorq %r12, %r12
+ addq %rax, %r11
+ adcq %rdx, %r12
+ # A[1] * A[2]
+ movq 72(%rsp), %rax
+ mulq 80(%rsp)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * A[3]
+ movq 72(%rsp), %rax
+ mulq 88(%rsp)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ # A[2] * A[3]
+ movq 80(%rsp), %rax
+ mulq 88(%rsp)
+ xorq %r14, %r14
+ addq %rax, %r13
+ adcq %rdx, %r14
+ # Double
+ xorq %r15, %r15
+ addq %r9, %r9
+ adcq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ adcq %r13, %r13
+ adcq %r14, %r14
+ adcq $0x00, %r15
+ # A[0] * A[0]
+ movq 64(%rsp), %rax
+ mulq %rax
+ movq %rax, %rcx
+ movq %rdx, %rbp
+ # A[1] * A[1]
+ movq 72(%rsp), %rax
+ mulq %rax
+ addq %rbp, %r9
+ adcq %rax, %r10
+ adcq $0x00, %rdx
+ movq %rdx, %rbp
+ # A[2] * A[2]
+ movq 80(%rsp), %rax
+ mulq %rax
+ addq %rbp, %r11
+ adcq %rax, %r12
+ adcq $0x00, %rdx
+ movq %rdx, %rbp
+ # A[3] * A[3]
+ movq 88(%rsp), %rax
+ mulq %rax
+ addq %rax, %r14
+ adcq %rdx, %r15
+ addq %rbp, %r13
+ adcq $0x00, %r14
+ adcq $0x00, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rbp
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rbp, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %rcx
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbp, %r11
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbp, %r11
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %rcx, 64(%rsp)
+ movq %r9, 72(%rsp)
+ movq %r10, 80(%rsp)
+ movq %r11, 88(%rsp)
+ # Add
+ movq 96(%rsp), %rcx
+ movq 104(%rsp), %r9
+ addq 32(%rsp), %rcx
+ movq 112(%rsp), %r10
+ adcq 40(%rsp), %r9
+ movq 120(%rsp), %rbp
+ adcq 48(%rsp), %r10
+ movq $-19, %rax
+ adcq 56(%rsp), %rbp
+ movq $0x7fffffffffffffff, %rdx
+ movq %rbp, %r11
+ sarq $63, %rbp
+ # Mask the modulus
+ andq %rbp, %rax
+ andq %rbp, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %rcx
+ sbbq %rbp, %r9
+ sbbq %rbp, %r10
+ sbbq %rdx, %r11
+ movq %rcx, 96(%rsp)
+ movq %r9, 104(%rsp)
+ movq %r10, 112(%rsp)
+ movq %r11, 120(%rsp)
+ # Multiply
+ # A[0] * B[0]
+ movq (%rsp), %rax
+ mulq (%r8)
+ movq %rax, %rcx
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rsp), %rax
+ mulq (%r8)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rsp), %rax
+ mulq 8(%r8)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rsp), %rax
+ mulq (%r8)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rsp), %rax
+ mulq 8(%r8)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rsp), %rax
+ mulq 16(%r8)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rsp), %rax
+ mulq (%r8)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rsp), %rax
+ mulq 8(%r8)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rsp), %rax
+ mulq 16(%r8)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rsp), %rax
+ mulq 24(%r8)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rsp), %rax
+ mulq 8(%r8)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rsp), %rax
+ mulq 16(%r8)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rsp), %rax
+ mulq 24(%r8)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rsp), %rax
+ mulq 16(%r8)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rsp), %rax
+ mulq 24(%r8)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rsp), %rax
+ mulq 24(%r8)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rbp
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rbp, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %rcx
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbp, %r11
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbp, %r11
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %rcx, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq %r10, 48(%rsp)
+ movq %r11, 56(%rsp)
+ # Multiply
+ # A[0] * B[0]
+ movq 96(%rsp), %rax
+ mulq 128(%rsp)
+ movq %rax, %rcx
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 104(%rsp), %rax
+ mulq 128(%rsp)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq 96(%rsp), %rax
+ mulq 136(%rsp)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 112(%rsp), %rax
+ mulq 128(%rsp)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 104(%rsp), %rax
+ mulq 136(%rsp)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq 96(%rsp), %rax
+ mulq 144(%rsp)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 120(%rsp), %rax
+ mulq 128(%rsp)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 112(%rsp), %rax
+ mulq 136(%rsp)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 104(%rsp), %rax
+ mulq 144(%rsp)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq 96(%rsp), %rax
+ mulq 152(%rsp)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 120(%rsp), %rax
+ mulq 136(%rsp)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 112(%rsp), %rax
+ mulq 144(%rsp)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 104(%rsp), %rax
+ mulq 152(%rsp)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 120(%rsp), %rax
+ mulq 144(%rsp)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 112(%rsp), %rax
+ mulq 152(%rsp)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 120(%rsp), %rax
+ mulq 152(%rsp)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rbp
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rbp, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %rcx
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbp, %r11
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbp, %r11
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %rcx, (%rsp)
+ movq %r9, 8(%rsp)
+ movq %r10, 16(%rsp)
+ movq %r11, 24(%rsp)
+ decb 168(%rsp)
+ jge L_curve25519_x64_bits
+ movq $63, 168(%rsp)
+ decb 160(%rsp)
+ jge L_curve25519_x64_words
+ # Invert
+ leaq 32(%rsp), %rdi
+ movq %rsp, %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ movq %rsp, %rsi
+ leaq 64(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ leaq 64(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ leaq 96(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ movq $4, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ leaq 64(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ movq $9, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ leaq 64(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 128(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 128(%rsp), %rdi
+ leaq 128(%rsp), %rsi
+ movq $19, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 128(%rsp), %rsi
+ leaq 96(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ movq $9, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ leaq 64(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ movq $49, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ leaq 64(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 128(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 128(%rsp), %rdi
+ leaq 128(%rsp), %rsi
+ movq $0x63, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 128(%rsp), %rsi
+ leaq 96(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ movq $49, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ leaq 64(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ movq $4, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ movq %rsp, %rdi
+ leaq 64(%rsp), %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ movq 176(%rsp), %rdi
+ # Multiply
+ # A[0] * B[0]
+ movq (%rsp), %rax
+ mulq (%rdi)
+ movq %rax, %rcx
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rsp), %rax
+ mulq (%rdi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rsp), %rax
+ mulq 8(%rdi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rsp), %rax
+ mulq (%rdi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rsp), %rax
+ mulq 8(%rdi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rsp), %rax
+ mulq 16(%rdi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rsp), %rax
+ mulq (%rdi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rsp), %rax
+ mulq 8(%rdi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rsp), %rax
+ mulq 16(%rdi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rsp), %rax
+ mulq 24(%rdi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rsp), %rax
+ mulq 8(%rdi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rsp), %rax
+ mulq 16(%rdi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rsp), %rax
+ mulq 24(%rdi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rsp), %rax
+ mulq 16(%rdi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rsp), %rax
+ mulq 24(%rdi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rsp), %rax
+ mulq 24(%rdi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rbp
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rbp, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %rcx
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbp, %r11
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbp, %r11
+ addq %rax, %rcx
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %rcx, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ xorq %rax, %rax
+ addq $0xb8, %rsp
+ popq %rbp
+ popq %rbx
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ repz retq
+#ifndef __APPLE__
+.size curve25519_x64,.-curve25519_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_pow22523_x64
+.type fe_pow22523_x64,@function
+.align 4
+fe_pow22523_x64:
+#else
+.section __TEXT,__text
+.globl _fe_pow22523_x64
+.p2align 2
+_fe_pow22523_x64:
+#endif /* __APPLE__ */
+ subq $0x70, %rsp
+ # pow22523
+ movq %rdi, 96(%rsp)
+ movq %rsi, 104(%rsp)
+ movq %rsp, %rdi
+ movq 104(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ movq %rsp, %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ movq 104(%rsp), %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ movq %rsp, %rdi
+ movq %rsp, %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ movq %rsp, %rdi
+ movq %rsp, %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ movq %rsp, %rdi
+ leaq 32(%rsp), %rsi
+ movq %rsp, %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ movq %rsp, %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ movq $4, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ movq %rsp, %rdi
+ leaq 32(%rsp), %rsi
+ movq %rsp, %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ movq %rsp, %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ movq $9, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ movq %rsp, %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ movq $19, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ movq $9, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ movq %rsp, %rdi
+ leaq 32(%rsp), %rsi
+ movq %rsp, %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ movq %rsp, %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ movq $49, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ movq %rsp, %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ movq $0x63, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ movq $49, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_x64@plt
+#else
+ callq _fe_sq_n_x64
+#endif /* __APPLE__ */
+ movq %rsp, %rdi
+ leaq 32(%rsp), %rsi
+ movq %rsp, %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ movq %rsp, %rdi
+ movq %rsp, %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ movq %rsp, %rdi
+ movq %rsp, %rsi
+#ifndef __APPLE__
+ callq fe_sq_x64@plt
+#else
+ callq _fe_sq_x64
+#endif /* __APPLE__ */
+ movq 96(%rsp), %rdi
+ movq %rsp, %rsi
+ movq 104(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_x64@plt
+#else
+ callq _fe_mul_x64
+#endif /* __APPLE__ */
+ movq 104(%rsp), %rsi
+ movq 96(%rsp), %rdi
+ addq $0x70, %rsp
+ repz retq
+#ifndef __APPLE__
+.text
+.globl fe_ge_to_p2_x64
+.type fe_ge_to_p2_x64,@function
+.align 4
+fe_ge_to_p2_x64:
+#else
+.section __TEXT,__text
+.globl _fe_ge_to_p2_x64
+.p2align 2
+_fe_ge_to_p2_x64:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $40, %rsp
+ movq %rsi, (%rsp)
+ movq %rdx, 8(%rsp)
+ movq %rcx, 16(%rsp)
+ movq %r8, 24(%rsp)
+ movq %r9, 32(%rsp)
+ movq 16(%rsp), %rsi
+ movq 88(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq (%rsp), %rdi
+ movq 24(%rsp), %rsi
+ movq 32(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rdi
+ movq 32(%rsp), %rsi
+ movq 88(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ addq $40, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size fe_ge_to_p2_x64,.-fe_ge_to_p2_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_ge_to_p3_x64
+.type fe_ge_to_p3_x64,@function
+.align 4
+fe_ge_to_p3_x64:
+#else
+.section __TEXT,__text
+.globl _fe_ge_to_p3_x64
+.p2align 2
+_fe_ge_to_p3_x64:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $40, %rsp
+ movq %rsi, (%rsp)
+ movq %rdx, 8(%rsp)
+ movq %rcx, 16(%rsp)
+ movq %r8, 24(%rsp)
+ movq %r9, 32(%rsp)
+ movq 24(%rsp), %rsi
+ movq 96(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq (%rsp), %rdi
+ movq 32(%rsp), %rsi
+ movq 88(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rdi
+ movq 88(%rsp), %rsi
+ movq 96(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 16(%rsp), %rdi
+ movq 24(%rsp), %rsi
+ movq 32(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ addq $40, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size fe_ge_to_p3_x64,.-fe_ge_to_p3_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_ge_dbl_x64
+.type fe_ge_dbl_x64,@function
+.align 4
+fe_ge_dbl_x64:
+#else
+.section __TEXT,__text
+.globl _fe_ge_dbl_x64
+.p2align 2
+_fe_ge_dbl_x64:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $0x50, %rsp
+ movq %rdi, (%rsp)
+ movq %rsi, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rcx, 24(%rsp)
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq (%rsp), %rdi
+ movq 32(%rsp), %rsi
+ # Square
+ # A[0] * A[1]
+ movq (%rsi), %rax
+ mulq 8(%rsi)
+ movq %rax, %r9
+ movq %rdx, %r10
+ # A[0] * A[2]
+ movq (%rsi), %rax
+ mulq 16(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[0] * A[3]
+ movq (%rsi), %rax
+ mulq 24(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r11
+ adcq %rdx, %r12
+ # A[1] * A[2]
+ movq 8(%rsi), %rax
+ mulq 16(%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * A[3]
+ movq 8(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ # A[2] * A[3]
+ movq 16(%rsi), %rax
+ mulq 24(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r13
+ adcq %rdx, %r14
+ # Double
+ xorq %r15, %r15
+ addq %r9, %r9
+ adcq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ adcq %r13, %r13
+ adcq %r14, %r14
+ adcq $0x00, %r15
+ # A[0] * A[0]
+ movq (%rsi), %rax
+ mulq %rax
+ movq %rax, %r8
+ movq %rdx, %rcx
+ # A[1] * A[1]
+ movq 8(%rsi), %rax
+ mulq %rax
+ addq %rcx, %r9
+ adcq %rax, %r10
+ adcq $0x00, %rdx
+ movq %rdx, %rcx
+ # A[2] * A[2]
+ movq 16(%rsi), %rax
+ mulq %rax
+ addq %rcx, %r11
+ adcq %rax, %r12
+ adcq $0x00, %rdx
+ movq %rdx, %rcx
+ # A[3] * A[3]
+ movq 24(%rsi), %rax
+ mulq %rax
+ addq %rax, %r14
+ adcq %rdx, %r15
+ addq %rcx, %r13
+ adcq $0x00, %r14
+ adcq $0x00, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 16(%rsp), %rdi
+ movq 40(%rsp), %rsi
+ # Square
+ # A[0] * A[1]
+ movq (%rsi), %rax
+ mulq 8(%rsi)
+ movq %rax, %r9
+ movq %rdx, %r10
+ # A[0] * A[2]
+ movq (%rsi), %rax
+ mulq 16(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[0] * A[3]
+ movq (%rsi), %rax
+ mulq 24(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r11
+ adcq %rdx, %r12
+ # A[1] * A[2]
+ movq 8(%rsi), %rax
+ mulq 16(%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * A[3]
+ movq 8(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ # A[2] * A[3]
+ movq 16(%rsi), %rax
+ mulq 24(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r13
+ adcq %rdx, %r14
+ # Double
+ xorq %r15, %r15
+ addq %r9, %r9
+ adcq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ adcq %r13, %r13
+ adcq %r14, %r14
+ adcq $0x00, %r15
+ # A[0] * A[0]
+ movq (%rsi), %rax
+ mulq %rax
+ movq %rax, %r8
+ movq %rdx, %rcx
+ # A[1] * A[1]
+ movq 8(%rsi), %rax
+ mulq %rax
+ addq %rcx, %r9
+ adcq %rax, %r10
+ adcq $0x00, %rdx
+ movq %rdx, %rcx
+ # A[2] * A[2]
+ movq 16(%rsi), %rax
+ mulq %rax
+ addq %rcx, %r11
+ adcq %rax, %r12
+ adcq $0x00, %rdx
+ movq %rdx, %rcx
+ # A[3] * A[3]
+ movq 24(%rsi), %rax
+ mulq %rax
+ addq %rax, %r14
+ adcq %rdx, %r15
+ addq %rcx, %r13
+ adcq $0x00, %r14
+ adcq $0x00, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 24(%rsp), %rdi
+ movq 128(%rsp), %rsi
+ # Square * 2
+ # A[0] * A[1]
+ movq (%rsi), %rax
+ mulq 8(%rsi)
+ movq %rax, %r9
+ movq %rdx, %r10
+ # A[0] * A[2]
+ movq (%rsi), %rax
+ mulq 16(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[0] * A[3]
+ movq (%rsi), %rax
+ mulq 24(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r11
+ adcq %rdx, %r12
+ # A[1] * A[2]
+ movq 8(%rsi), %rax
+ mulq 16(%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * A[3]
+ movq 8(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ # A[2] * A[3]
+ movq 16(%rsi), %rax
+ mulq 24(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r13
+ adcq %rdx, %r14
+ # Double
+ xorq %r15, %r15
+ addq %r9, %r9
+ adcq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ adcq %r13, %r13
+ adcq %r14, %r14
+ adcq $0x00, %r15
+ # A[0] * A[0]
+ movq (%rsi), %rax
+ mulq %rax
+ movq %rax, %r8
+ movq %rdx, %rcx
+ # A[1] * A[1]
+ movq 8(%rsi), %rax
+ mulq %rax
+ addq %rcx, %r9
+ adcq %rax, %r10
+ adcq $0x00, %rdx
+ movq %rdx, %rcx
+ # A[2] * A[2]
+ movq 16(%rsi), %rax
+ mulq %rax
+ addq %rcx, %r11
+ adcq %rax, %r12
+ adcq $0x00, %rdx
+ movq %rdx, %rcx
+ # A[3] * A[3]
+ movq 24(%rsi), %rax
+ mulq %rax
+ addq %rax, %r14
+ adcq %rdx, %r15
+ addq %rcx, %r13
+ adcq $0x00, %r14
+ adcq $0x00, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rbx
+ xorq %rax, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $3, %r15, %rax
+ shldq $2, %r14, %r15
+ shldq $2, %r13, %r14
+ shldq $2, %r12, %r13
+ shldq $2, %r11, %r12
+ shldq $0x01, %r10, %r11
+ shldq $0x01, %r9, %r10
+ shldq $0x01, %r8, %r9
+ shlq $0x01, %r8
+ andq %rbx, %r11
+ # Two out left, one in right
+ andq %rbx, %r15
+ # Multiply top bits by 19*19
+ imulq $0x169, %rax, %rcx
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining produce results in
+ addq %rcx, %r8
+ adcq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rdi
+ movq 32(%rsp), %rsi
+ movq 40(%rsp), %rbx
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ addq (%rbx), %r8
+ movq 16(%rsi), %r10
+ adcq 8(%rbx), %r9
+ movq 24(%rsi), %rcx
+ adcq 16(%rbx), %r10
+ movq $-19, %rax
+ adcq 24(%rbx), %rcx
+ movq $0x7fffffffffffffff, %rdx
+ movq %rcx, %r11
+ sarq $63, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %r8
+ sbbq %rcx, %r9
+ sbbq %rcx, %r10
+ sbbq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ leaq 48(%rsp), %rdi
+ movq 8(%rsp), %rsi
+ # Square
+ # A[0] * A[1]
+ movq (%rsi), %rax
+ mulq 8(%rsi)
+ movq %rax, %r9
+ movq %rdx, %r10
+ # A[0] * A[2]
+ movq (%rsi), %rax
+ mulq 16(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[0] * A[3]
+ movq (%rsi), %rax
+ mulq 24(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r11
+ adcq %rdx, %r12
+ # A[1] * A[2]
+ movq 8(%rsi), %rax
+ mulq 16(%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * A[3]
+ movq 8(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ # A[2] * A[3]
+ movq 16(%rsi), %rax
+ mulq 24(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r13
+ adcq %rdx, %r14
+ # Double
+ xorq %r15, %r15
+ addq %r9, %r9
+ adcq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ adcq %r13, %r13
+ adcq %r14, %r14
+ adcq $0x00, %r15
+ # A[0] * A[0]
+ movq (%rsi), %rax
+ mulq %rax
+ movq %rax, %r8
+ movq %rdx, %rcx
+ # A[1] * A[1]
+ movq 8(%rsi), %rax
+ mulq %rax
+ addq %rcx, %r9
+ adcq %rax, %r10
+ adcq $0x00, %rdx
+ movq %rdx, %rcx
+ # A[2] * A[2]
+ movq 16(%rsi), %rax
+ mulq %rax
+ addq %rcx, %r11
+ adcq %rax, %r12
+ adcq $0x00, %rdx
+ movq %rdx, %rcx
+ # A[3] * A[3]
+ movq 24(%rsi), %rax
+ mulq %rax
+ addq %rax, %r14
+ adcq %rdx, %r15
+ addq %rcx, %r13
+ adcq $0x00, %r14
+ adcq $0x00, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rdi
+ movq 16(%rsp), %rsi
+ movq (%rsp), %rbx
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ addq (%rbx), %r8
+ movq 16(%rsi), %r10
+ adcq 8(%rbx), %r9
+ movq 24(%rsi), %rcx
+ adcq 16(%rbx), %r10
+ movq $-19, %rax
+ adcq 24(%rbx), %rcx
+ movq $0x7fffffffffffffff, %rdx
+ movq %rcx, %r11
+ sarq $63, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %r8
+ sbbq %rcx, %r9
+ sbbq %rcx, %r10
+ sbbq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 16(%rsp), %rdi
+ movq 16(%rsp), %rsi
+ movq (%rsp), %rbx
+ # Sub
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq (%rbx), %r8
+ movq $0x00, %rcx
+ sbbq 8(%rbx), %r9
+ movq $-19, %rax
+ sbbq 16(%rbx), %r10
+ movq $0x7fffffffffffffff, %rdx
+ sbbq 24(%rbx), %r11
+ sbbq $0x00, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Add modulus (if underflow)
+ addq %rax, %r8
+ adcq %rcx, %r9
+ adcq %rcx, %r10
+ adcq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq (%rsp), %rdi
+ leaq 48(%rsp), %rsi
+ movq 8(%rsp), %rbx
+ # Sub
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq (%rbx), %r8
+ movq $0x00, %rcx
+ sbbq 8(%rbx), %r9
+ movq $-19, %rax
+ sbbq 16(%rbx), %r10
+ movq $0x7fffffffffffffff, %rdx
+ sbbq 24(%rbx), %r11
+ sbbq $0x00, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Add modulus (if underflow)
+ addq %rax, %r8
+ adcq %rcx, %r9
+ adcq %rcx, %r10
+ adcq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 24(%rsp), %rdi
+ movq 24(%rsp), %rsi
+ movq 16(%rsp), %rbx
+ # Sub
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq (%rbx), %r8
+ movq $0x00, %rcx
+ sbbq 8(%rbx), %r9
+ movq $-19, %rax
+ sbbq 16(%rbx), %r10
+ movq $0x7fffffffffffffff, %rdx
+ sbbq 24(%rbx), %r11
+ sbbq $0x00, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Add modulus (if underflow)
+ addq %rax, %r8
+ adcq %rcx, %r9
+ adcq %rcx, %r10
+ adcq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ addq $0x50, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size fe_ge_dbl_x64,.-fe_ge_dbl_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_ge_madd_x64
+.type fe_ge_madd_x64,@function
+.align 4
+fe_ge_madd_x64:
+#else
+.section __TEXT,__text
+.globl _fe_ge_madd_x64
+.p2align 2
+_fe_ge_madd_x64:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $0x50, %rsp
+ movq %rdi, (%rsp)
+ movq %rsi, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rcx, 24(%rsp)
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq (%rsp), %rdi
+ movq 40(%rsp), %rsi
+ movq 32(%rsp), %rbx
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ addq (%rbx), %r8
+ movq 16(%rsi), %r10
+ adcq 8(%rbx), %r9
+ movq 24(%rsi), %rcx
+ adcq 16(%rbx), %r10
+ movq $-19, %rax
+ adcq 24(%rbx), %rcx
+ movq $0x7fffffffffffffff, %rdx
+ movq %rcx, %r11
+ sarq $63, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %r8
+ sbbq %rcx, %r9
+ sbbq %rcx, %r10
+ sbbq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rdi
+ movq 40(%rsp), %rsi
+ movq 32(%rsp), %rbx
+ # Sub
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq (%rbx), %r8
+ movq $0x00, %rcx
+ sbbq 8(%rbx), %r9
+ movq $-19, %rax
+ sbbq 16(%rbx), %r10
+ movq $0x7fffffffffffffff, %rdx
+ sbbq 24(%rbx), %r11
+ sbbq $0x00, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Add modulus (if underflow)
+ addq %rax, %r8
+ adcq %rcx, %r9
+ adcq %rcx, %r10
+ adcq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 16(%rsp), %rdi
+ movq (%rsp), %rsi
+ movq 152(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rdi
+ movq 8(%rsp), %rsi
+ movq 160(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 24(%rsp), %rdi
+ movq 144(%rsp), %rsi
+ movq 136(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ leaq 48(%rsp), %rdi
+ movq 128(%rsp), %rsi
+ movq 128(%rsp), %rbx
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ addq (%rbx), %r8
+ movq 16(%rsi), %r10
+ adcq 8(%rbx), %r9
+ movq 24(%rsi), %rcx
+ adcq 16(%rbx), %r10
+ movq $-19, %rax
+ adcq 24(%rbx), %rcx
+ movq $0x7fffffffffffffff, %rdx
+ movq %rcx, %r11
+ sarq $63, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %r8
+ sbbq %rcx, %r9
+ sbbq %rcx, %r10
+ sbbq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq (%rsp), %rdi
+ movq 16(%rsp), %rsi
+ movq 8(%rsp), %rbx
+ # Sub
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq (%rbx), %r8
+ movq $0x00, %rcx
+ sbbq 8(%rbx), %r9
+ movq $-19, %rax
+ sbbq 16(%rbx), %r10
+ movq $0x7fffffffffffffff, %rdx
+ sbbq 24(%rbx), %r11
+ sbbq $0x00, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Add modulus (if underflow)
+ addq %rax, %r8
+ adcq %rcx, %r9
+ adcq %rcx, %r10
+ adcq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rdi
+ movq 16(%rsp), %rsi
+ movq 8(%rsp), %rbx
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ addq (%rbx), %r8
+ movq 16(%rsi), %r10
+ adcq 8(%rbx), %r9
+ movq 24(%rsi), %rcx
+ adcq 16(%rbx), %r10
+ movq $-19, %rax
+ adcq 24(%rbx), %rcx
+ movq $0x7fffffffffffffff, %rdx
+ movq %rcx, %r11
+ sarq $63, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %r8
+ sbbq %rcx, %r9
+ sbbq %rcx, %r10
+ sbbq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 16(%rsp), %rdi
+ leaq 48(%rsp), %rsi
+ movq 24(%rsp), %rbx
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ addq (%rbx), %r8
+ movq 16(%rsi), %r10
+ adcq 8(%rbx), %r9
+ movq 24(%rsi), %rcx
+ adcq 16(%rbx), %r10
+ movq $-19, %rax
+ adcq 24(%rbx), %rcx
+ movq $0x7fffffffffffffff, %rdx
+ movq %rcx, %r11
+ sarq $63, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %r8
+ sbbq %rcx, %r9
+ sbbq %rcx, %r10
+ sbbq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 24(%rsp), %rdi
+ leaq 48(%rsp), %rsi
+ movq 24(%rsp), %rbx
+ # Sub
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq (%rbx), %r8
+ movq $0x00, %rcx
+ sbbq 8(%rbx), %r9
+ movq $-19, %rax
+ sbbq 16(%rbx), %r10
+ movq $0x7fffffffffffffff, %rdx
+ sbbq 24(%rbx), %r11
+ sbbq $0x00, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Add modulus (if underflow)
+ addq %rax, %r8
+ adcq %rcx, %r9
+ adcq %rcx, %r10
+ adcq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ addq $0x50, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size fe_ge_madd_x64,.-fe_ge_madd_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_ge_msub_x64
+.type fe_ge_msub_x64,@function
+.align 4
+fe_ge_msub_x64:
+#else
+.section __TEXT,__text
+.globl _fe_ge_msub_x64
+.p2align 2
+_fe_ge_msub_x64:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $0x50, %rsp
+ movq %rdi, (%rsp)
+ movq %rsi, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rcx, 24(%rsp)
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq (%rsp), %rdi
+ movq 40(%rsp), %rsi
+ movq 32(%rsp), %rbx
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ addq (%rbx), %r8
+ movq 16(%rsi), %r10
+ adcq 8(%rbx), %r9
+ movq 24(%rsi), %rcx
+ adcq 16(%rbx), %r10
+ movq $-19, %rax
+ adcq 24(%rbx), %rcx
+ movq $0x7fffffffffffffff, %rdx
+ movq %rcx, %r11
+ sarq $63, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %r8
+ sbbq %rcx, %r9
+ sbbq %rcx, %r10
+ sbbq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rdi
+ movq 40(%rsp), %rsi
+ movq 32(%rsp), %rbx
+ # Sub
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq (%rbx), %r8
+ movq $0x00, %rcx
+ sbbq 8(%rbx), %r9
+ movq $-19, %rax
+ sbbq 16(%rbx), %r10
+ movq $0x7fffffffffffffff, %rdx
+ sbbq 24(%rbx), %r11
+ sbbq $0x00, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Add modulus (if underflow)
+ addq %rax, %r8
+ adcq %rcx, %r9
+ adcq %rcx, %r10
+ adcq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 16(%rsp), %rdi
+ movq (%rsp), %rsi
+ movq 160(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rdi
+ movq 8(%rsp), %rsi
+ movq 152(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 24(%rsp), %rdi
+ movq 144(%rsp), %rsi
+ movq 136(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ leaq 48(%rsp), %rdi
+ movq 128(%rsp), %rsi
+ movq 128(%rsp), %rbx
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ addq (%rbx), %r8
+ movq 16(%rsi), %r10
+ adcq 8(%rbx), %r9
+ movq 24(%rsi), %rcx
+ adcq 16(%rbx), %r10
+ movq $-19, %rax
+ adcq 24(%rbx), %rcx
+ movq $0x7fffffffffffffff, %rdx
+ movq %rcx, %r11
+ sarq $63, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %r8
+ sbbq %rcx, %r9
+ sbbq %rcx, %r10
+ sbbq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq (%rsp), %rdi
+ movq 16(%rsp), %rsi
+ movq 8(%rsp), %rbx
+ # Sub
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq (%rbx), %r8
+ movq $0x00, %rcx
+ sbbq 8(%rbx), %r9
+ movq $-19, %rax
+ sbbq 16(%rbx), %r10
+ movq $0x7fffffffffffffff, %rdx
+ sbbq 24(%rbx), %r11
+ sbbq $0x00, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Add modulus (if underflow)
+ addq %rax, %r8
+ adcq %rcx, %r9
+ adcq %rcx, %r10
+ adcq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rdi
+ movq 16(%rsp), %rsi
+ movq 8(%rsp), %rbx
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ addq (%rbx), %r8
+ movq 16(%rsi), %r10
+ adcq 8(%rbx), %r9
+ movq 24(%rsi), %rcx
+ adcq 16(%rbx), %r10
+ movq $-19, %rax
+ adcq 24(%rbx), %rcx
+ movq $0x7fffffffffffffff, %rdx
+ movq %rcx, %r11
+ sarq $63, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %r8
+ sbbq %rcx, %r9
+ sbbq %rcx, %r10
+ sbbq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 16(%rsp), %rdi
+ leaq 48(%rsp), %rsi
+ movq 24(%rsp), %rbx
+ # Sub
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq (%rbx), %r8
+ movq $0x00, %rcx
+ sbbq 8(%rbx), %r9
+ movq $-19, %rax
+ sbbq 16(%rbx), %r10
+ movq $0x7fffffffffffffff, %rdx
+ sbbq 24(%rbx), %r11
+ sbbq $0x00, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Add modulus (if underflow)
+ addq %rax, %r8
+ adcq %rcx, %r9
+ adcq %rcx, %r10
+ adcq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 24(%rsp), %rdi
+ leaq 48(%rsp), %rsi
+ movq 24(%rsp), %rbx
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ addq (%rbx), %r8
+ movq 16(%rsi), %r10
+ adcq 8(%rbx), %r9
+ movq 24(%rsi), %rcx
+ adcq 16(%rbx), %r10
+ movq $-19, %rax
+ adcq 24(%rbx), %rcx
+ movq $0x7fffffffffffffff, %rdx
+ movq %rcx, %r11
+ sarq $63, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %r8
+ sbbq %rcx, %r9
+ sbbq %rcx, %r10
+ sbbq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ addq $0x50, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size fe_ge_msub_x64,.-fe_ge_msub_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_ge_add_x64
+.type fe_ge_add_x64,@function
+.align 4
+fe_ge_add_x64:
+#else
+.section __TEXT,__text
+.globl _fe_ge_add_x64
+.p2align 2
+_fe_ge_add_x64:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $0x50, %rsp
+ movq %rdi, (%rsp)
+ movq %rsi, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rcx, 24(%rsp)
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq (%rsp), %rdi
+ movq 40(%rsp), %rsi
+ movq 32(%rsp), %rbx
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ addq (%rbx), %r8
+ movq 16(%rsi), %r10
+ adcq 8(%rbx), %r9
+ movq 24(%rsi), %rcx
+ adcq 16(%rbx), %r10
+ movq $-19, %rax
+ adcq 24(%rbx), %rcx
+ movq $0x7fffffffffffffff, %rdx
+ movq %rcx, %r11
+ sarq $63, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %r8
+ sbbq %rcx, %r9
+ sbbq %rcx, %r10
+ sbbq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rdi
+ movq 40(%rsp), %rsi
+ movq 32(%rsp), %rbx
+ # Sub
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq (%rbx), %r8
+ movq $0x00, %rcx
+ sbbq 8(%rbx), %r9
+ movq $-19, %rax
+ sbbq 16(%rbx), %r10
+ movq $0x7fffffffffffffff, %rdx
+ sbbq 24(%rbx), %r11
+ sbbq $0x00, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Add modulus (if underflow)
+ addq %rax, %r8
+ adcq %rcx, %r9
+ adcq %rcx, %r10
+ adcq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 16(%rsp), %rdi
+ movq (%rsp), %rsi
+ movq 160(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rdi
+ movq 8(%rsp), %rsi
+ movq 168(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 24(%rsp), %rdi
+ movq 152(%rsp), %rsi
+ movq 136(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq (%rsp), %rdi
+ movq 128(%rsp), %rsi
+ movq 144(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ leaq 48(%rsp), %rdi
+ movq (%rsp), %rsi
+ movq (%rsp), %rbx
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ addq (%rbx), %r8
+ movq 16(%rsi), %r10
+ adcq 8(%rbx), %r9
+ movq 24(%rsi), %rcx
+ adcq 16(%rbx), %r10
+ movq $-19, %rax
+ adcq 24(%rbx), %rcx
+ movq $0x7fffffffffffffff, %rdx
+ movq %rcx, %r11
+ sarq $63, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %r8
+ sbbq %rcx, %r9
+ sbbq %rcx, %r10
+ sbbq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq (%rsp), %rdi
+ movq 16(%rsp), %rsi
+ movq 8(%rsp), %rbx
+ # Sub
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq (%rbx), %r8
+ movq $0x00, %rcx
+ sbbq 8(%rbx), %r9
+ movq $-19, %rax
+ sbbq 16(%rbx), %r10
+ movq $0x7fffffffffffffff, %rdx
+ sbbq 24(%rbx), %r11
+ sbbq $0x00, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Add modulus (if underflow)
+ addq %rax, %r8
+ adcq %rcx, %r9
+ adcq %rcx, %r10
+ adcq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rdi
+ movq 16(%rsp), %rsi
+ movq 8(%rsp), %rbx
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ addq (%rbx), %r8
+ movq 16(%rsi), %r10
+ adcq 8(%rbx), %r9
+ movq 24(%rsi), %rcx
+ adcq 16(%rbx), %r10
+ movq $-19, %rax
+ adcq 24(%rbx), %rcx
+ movq $0x7fffffffffffffff, %rdx
+ movq %rcx, %r11
+ sarq $63, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %r8
+ sbbq %rcx, %r9
+ sbbq %rcx, %r10
+ sbbq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 16(%rsp), %rdi
+ leaq 48(%rsp), %rsi
+ movq 24(%rsp), %rbx
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ addq (%rbx), %r8
+ movq 16(%rsi), %r10
+ adcq 8(%rbx), %r9
+ movq 24(%rsi), %rcx
+ adcq 16(%rbx), %r10
+ movq $-19, %rax
+ adcq 24(%rbx), %rcx
+ movq $0x7fffffffffffffff, %rdx
+ movq %rcx, %r11
+ sarq $63, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %r8
+ sbbq %rcx, %r9
+ sbbq %rcx, %r10
+ sbbq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 24(%rsp), %rdi
+ leaq 48(%rsp), %rsi
+ movq 24(%rsp), %rbx
+ # Sub
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq (%rbx), %r8
+ movq $0x00, %rcx
+ sbbq 8(%rbx), %r9
+ movq $-19, %rax
+ sbbq 16(%rbx), %r10
+ movq $0x7fffffffffffffff, %rdx
+ sbbq 24(%rbx), %r11
+ sbbq $0x00, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Add modulus (if underflow)
+ addq %rax, %r8
+ adcq %rcx, %r9
+ adcq %rcx, %r10
+ adcq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ addq $0x50, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size fe_ge_add_x64,.-fe_ge_add_x64
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_ge_sub_x64
+.type fe_ge_sub_x64,@function
+.align 4
+fe_ge_sub_x64:
+#else
+.section __TEXT,__text
+.globl _fe_ge_sub_x64
+.p2align 2
+_fe_ge_sub_x64:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $0x50, %rsp
+ movq %rdi, (%rsp)
+ movq %rsi, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rcx, 24(%rsp)
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq (%rsp), %rdi
+ movq 40(%rsp), %rsi
+ movq 32(%rsp), %rbx
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ addq (%rbx), %r8
+ movq 16(%rsi), %r10
+ adcq 8(%rbx), %r9
+ movq 24(%rsi), %rcx
+ adcq 16(%rbx), %r10
+ movq $-19, %rax
+ adcq 24(%rbx), %rcx
+ movq $0x7fffffffffffffff, %rdx
+ movq %rcx, %r11
+ sarq $63, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %r8
+ sbbq %rcx, %r9
+ sbbq %rcx, %r10
+ sbbq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rdi
+ movq 40(%rsp), %rsi
+ movq 32(%rsp), %rbx
+ # Sub
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq (%rbx), %r8
+ movq $0x00, %rcx
+ sbbq 8(%rbx), %r9
+ movq $-19, %rax
+ sbbq 16(%rbx), %r10
+ movq $0x7fffffffffffffff, %rdx
+ sbbq 24(%rbx), %r11
+ sbbq $0x00, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Add modulus (if underflow)
+ addq %rax, %r8
+ adcq %rcx, %r9
+ adcq %rcx, %r10
+ adcq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 16(%rsp), %rdi
+ movq (%rsp), %rsi
+ movq 168(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rdi
+ movq 8(%rsp), %rsi
+ movq 160(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 24(%rsp), %rdi
+ movq 152(%rsp), %rsi
+ movq 136(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq (%rsp), %rdi
+ movq 128(%rsp), %rsi
+ movq 144(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rax
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rbx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r9
+ adcq %rdx, %r10
+ # A[1] * B[0]
+ movq (%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r11, %r11
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0x00, %r11
+ # A[0] * B[2]
+ movq 16(%rbx), %rax
+ mulq (%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[1]
+ movq 8(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[2] * B[0]
+ movq (%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ # A[0] * B[3]
+ movq 24(%rbx), %rax
+ mulq (%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[2]
+ movq 16(%rbx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[2] * B[1]
+ movq 8(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[3] * B[0]
+ movq (%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ # A[1] * B[3]
+ movq 24(%rbx), %rax
+ mulq 8(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[2]
+ movq 16(%rbx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[3] * B[1]
+ movq 8(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ # A[2] * B[3]
+ movq 24(%rbx), %rax
+ mulq 16(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[2]
+ movq 16(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0x00, %r15
+ # A[3] * B[3]
+ movq 24(%rbx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rax
+ mulq %r12
+ xorq %r12, %r12
+ addq %rax, %r8
+ movq $19, %rax
+ adcq %rdx, %r12
+ mulq %r13
+ xorq %r13, %r13
+ addq %rax, %r9
+ movq $19, %rax
+ adcq %rdx, %r13
+ mulq %r14
+ xorq %r14, %r14
+ addq %rax, %r10
+ movq $19, %rax
+ adcq %rdx, %r14
+ mulq %r15
+ # Add remaining product results in
+ addq %r12, %r9
+ adcq %r13, %r10
+ adcq %r14, %r11
+ adcq %rax, %r11
+ adcq $0x00, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ leaq 48(%rsp), %rdi
+ movq (%rsp), %rsi
+ movq (%rsp), %rbx
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ addq (%rbx), %r8
+ movq 16(%rsi), %r10
+ adcq 8(%rbx), %r9
+ movq 24(%rsi), %rcx
+ adcq 16(%rbx), %r10
+ movq $-19, %rax
+ adcq 24(%rbx), %rcx
+ movq $0x7fffffffffffffff, %rdx
+ movq %rcx, %r11
+ sarq $63, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %r8
+ sbbq %rcx, %r9
+ sbbq %rcx, %r10
+ sbbq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq (%rsp), %rdi
+ movq 16(%rsp), %rsi
+ movq 8(%rsp), %rbx
+ # Sub
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq (%rbx), %r8
+ movq $0x00, %rcx
+ sbbq 8(%rbx), %r9
+ movq $-19, %rax
+ sbbq 16(%rbx), %r10
+ movq $0x7fffffffffffffff, %rdx
+ sbbq 24(%rbx), %r11
+ sbbq $0x00, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Add modulus (if underflow)
+ addq %rax, %r8
+ adcq %rcx, %r9
+ adcq %rcx, %r10
+ adcq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rdi
+ movq 16(%rsp), %rsi
+ movq 8(%rsp), %rbx
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ addq (%rbx), %r8
+ movq 16(%rsi), %r10
+ adcq 8(%rbx), %r9
+ movq 24(%rsi), %rcx
+ adcq 16(%rbx), %r10
+ movq $-19, %rax
+ adcq 24(%rbx), %rcx
+ movq $0x7fffffffffffffff, %rdx
+ movq %rcx, %r11
+ sarq $63, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %r8
+ sbbq %rcx, %r9
+ sbbq %rcx, %r10
+ sbbq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 16(%rsp), %rdi
+ leaq 48(%rsp), %rsi
+ movq 24(%rsp), %rbx
+ # Sub
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq (%rbx), %r8
+ movq $0x00, %rcx
+ sbbq 8(%rbx), %r9
+ movq $-19, %rax
+ sbbq 16(%rbx), %r10
+ movq $0x7fffffffffffffff, %rdx
+ sbbq 24(%rbx), %r11
+ sbbq $0x00, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Add modulus (if underflow)
+ addq %rax, %r8
+ adcq %rcx, %r9
+ adcq %rcx, %r10
+ adcq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 24(%rsp), %rdi
+ leaq 48(%rsp), %rsi
+ movq 24(%rsp), %rbx
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ addq (%rbx), %r8
+ movq 16(%rsi), %r10
+ adcq 8(%rbx), %r9
+ movq 24(%rsi), %rcx
+ adcq 16(%rbx), %r10
+ movq $-19, %rax
+ adcq 24(%rbx), %rcx
+ movq $0x7fffffffffffffff, %rdx
+ movq %rcx, %r11
+ sarq $63, %rcx
+ # Mask the modulus
+ andq %rcx, %rax
+ andq %rcx, %rdx
+ # Sub modulus (if overflow)
+ subq %rax, %r8
+ sbbq %rcx, %r9
+ sbbq %rcx, %r10
+ sbbq %rdx, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ addq $0x50, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size fe_ge_sub_x64,.-fe_ge_sub_x64
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX2
+#ifndef __APPLE__
+.text
+.globl fe_mul_avx2
+.type fe_mul_avx2,@function
+.align 4
+fe_mul_avx2:
+#else
+.section __TEXT,__text
+.globl _fe_mul_avx2
+.p2align 2
+_fe_mul_avx2:
+#endif /* __APPLE__ */
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rbx
+ movq %rdx, %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rdx
+ mulxq (%rsi), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rsi), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rsi), %rax, %rcx
+ xorq %r15, %r15
+ adcxq %rax, %r9
+ # A[1] * B[3]
+ movq 24(%rbx), %rdx
+ mulxq 8(%rsi), %r12, %r13
+ adcxq %rcx, %r10
+ # A[0] * B[1]
+ movq 8(%rbx), %rdx
+ mulxq (%rsi), %rax, %rcx
+ adoxq %rax, %r9
+ # A[2] * B[1]
+ mulxq 16(%rsi), %rax, %r14
+ adoxq %rcx, %r10
+ adcxq %rax, %r11
+ # A[1] * B[2]
+ movq 16(%rbx), %rdx
+ mulxq 8(%rsi), %rax, %rcx
+ adcxq %r14, %r12
+ adoxq %rax, %r11
+ adcxq %r15, %r13
+ adoxq %rcx, %r12
+ # A[0] * B[2]
+ mulxq (%rsi), %rax, %rcx
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rax, %r10
+ # A[1] * B[1]
+ movq 8(%rbx), %rdx
+ mulxq 8(%rsi), %rdx, %rax
+ adcxq %rcx, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rbx), %rdx
+ adoxq %rax, %r11
+ mulxq 24(%rsi), %rax, %rcx
+ adcxq %rax, %r12
+ # A[2] * B[2]
+ movq 16(%rbx), %rdx
+ mulxq 16(%rsi), %rdx, %rax
+ adcxq %rcx, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rbx), %rdx
+ adoxq %rax, %r13
+ mulxq 24(%rsi), %rax, %rcx
+ adoxq %r15, %r14
+ adcxq %rax, %r14
+ # A[0] * B[3]
+ mulxq (%rsi), %rdx, %rax
+ adcxq %rcx, %r15
+ xorq %rcx, %rcx
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rbx), %rdx
+ adcxq %rax, %r12
+ mulxq 24(%rsi), %rdx, %rax
+ adoxq %rdx, %r11
+ adoxq %rax, %r12
+ # A[2] * B[3]
+ movq 24(%rbx), %rdx
+ mulxq 16(%rsi), %rdx, %rax
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rbx), %rdx
+ adcxq %rax, %r14
+ mulxq 24(%rsi), %rax, %rdx
+ adcxq %rcx, %r15
+ adoxq %rax, %r13
+ adoxq %rdx, %r14
+ adoxq %rcx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rcx, %rcx
+ mulxq %r12, %rax, %r12
+ adcxq %rax, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rax, %r13
+ adcxq %rax, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rax, %r14
+ adcxq %rax, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rcx, %rdx
+ adcxq %rcx, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rcx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ popq %rbx
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ repz retq
+#ifndef __APPLE__
+.size fe_mul_avx2,.-fe_mul_avx2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_sq_avx2
+.type fe_sq_avx2,@function
+.align 4
+fe_sq_avx2:
+#else
+.section __TEXT,__text
+.globl _fe_sq_avx2
+.p2align 2
+_fe_sq_avx2:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ # Square
+ # A[0] * A[1]
+ movq (%rsi), %rdx
+ mulxq 8(%rsi), %r9, %r10
+ # A[0] * A[3]
+ mulxq 24(%rsi), %r11, %r12
+ # A[2] * A[1]
+ movq 16(%rsi), %rdx
+ mulxq 8(%rsi), %rcx, %rbx
+ xorq %r15, %r15
+ adoxq %rcx, %r11
+ # A[2] * A[3]
+ mulxq 24(%rsi), %r13, %r14
+ adoxq %rbx, %r12
+ # A[2] * A[0]
+ mulxq (%rsi), %rcx, %rbx
+ adoxq %r15, %r13
+ adcxq %rcx, %r10
+ adoxq %r15, %r14
+ # A[1] * A[3]
+ movq 8(%rsi), %rdx
+ mulxq 24(%rsi), %rax, %r8
+ adcxq %rbx, %r11
+ adcxq %rax, %r12
+ adcxq %r8, %r13
+ adcxq %r15, %r14
+ # Double with Carry Flag
+ xorq %r15, %r15
+ # A[0] * A[0]
+ movq (%rsi), %rdx
+ mulxq %rdx, %r8, %rax
+ adcxq %r9, %r9
+ # A[1] * A[1]
+ movq 8(%rsi), %rdx
+ mulxq %rdx, %rcx, %rbx
+ adcxq %r10, %r10
+ adoxq %rax, %r9
+ adcxq %r11, %r11
+ adoxq %rcx, %r10
+ # A[2] * A[2]
+ movq 16(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adcxq %r12, %r12
+ adoxq %rbx, %r11
+ adcxq %r13, %r13
+ adoxq %rax, %r12
+ # A[3] * A[3]
+ movq 24(%rsi), %rdx
+ mulxq %rdx, %rax, %rbx
+ adcxq %r14, %r14
+ adoxq %rcx, %r13
+ adcxq %r15, %r15
+ adoxq %rax, %r14
+ adoxq %rbx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rcx, %rcx
+ mulxq %r12, %rax, %r12
+ adcxq %rax, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rax, %r13
+ adcxq %rax, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rax, %r14
+ adcxq %rax, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rcx, %rdx
+ adcxq %rcx, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rcx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size fe_sq_avx2,.-fe_sq_avx2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_sq_n_avx2
+.type fe_sq_n_avx2,@function
+.align 4
+fe_sq_n_avx2:
+#else
+.section __TEXT,__text
+.globl _fe_sq_n_avx2
+.p2align 2
+_fe_sq_n_avx2:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rbp
+ movq %rdx, %rbp
+L_fe_sq_n_avx2:
+ # Square
+ # A[0] * A[1]
+ movq (%rsi), %rdx
+ mulxq 8(%rsi), %r9, %r10
+ # A[0] * A[3]
+ mulxq 24(%rsi), %r11, %r12
+ # A[2] * A[1]
+ movq 16(%rsi), %rdx
+ mulxq 8(%rsi), %rcx, %rbx
+ xorq %r15, %r15
+ adoxq %rcx, %r11
+ # A[2] * A[3]
+ mulxq 24(%rsi), %r13, %r14
+ adoxq %rbx, %r12
+ # A[2] * A[0]
+ mulxq (%rsi), %rcx, %rbx
+ adoxq %r15, %r13
+ adcxq %rcx, %r10
+ adoxq %r15, %r14
+ # A[1] * A[3]
+ movq 8(%rsi), %rdx
+ mulxq 24(%rsi), %rax, %r8
+ adcxq %rbx, %r11
+ adcxq %rax, %r12
+ adcxq %r8, %r13
+ adcxq %r15, %r14
+ # Double with Carry Flag
+ xorq %r15, %r15
+ # A[0] * A[0]
+ movq (%rsi), %rdx
+ mulxq %rdx, %r8, %rax
+ adcxq %r9, %r9
+ # A[1] * A[1]
+ movq 8(%rsi), %rdx
+ mulxq %rdx, %rcx, %rbx
+ adcxq %r10, %r10
+ adoxq %rax, %r9
+ adcxq %r11, %r11
+ adoxq %rcx, %r10
+ # A[2] * A[2]
+ movq 16(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adcxq %r12, %r12
+ adoxq %rbx, %r11
+ adcxq %r13, %r13
+ adoxq %rax, %r12
+ # A[3] * A[3]
+ movq 24(%rsi), %rdx
+ mulxq %rdx, %rax, %rbx
+ adcxq %r14, %r14
+ adoxq %rcx, %r13
+ adcxq %r15, %r15
+ adoxq %rax, %r14
+ adoxq %rbx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rcx, %rcx
+ mulxq %r12, %rax, %r12
+ adcxq %rax, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rax, %r13
+ adcxq %rax, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rax, %r14
+ adcxq %rax, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rcx, %rdx
+ adcxq %rcx, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rcx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ decb %bpl
+ jnz L_fe_sq_n_avx2
+ popq %rbp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size fe_sq_n_avx2,.-fe_sq_n_avx2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_mul121666_avx2
+.type fe_mul121666_avx2,@function
+.align 4
+fe_mul121666_avx2:
+#else
+.section __TEXT,__text
+.globl _fe_mul121666_avx2
+.p2align 2
+_fe_mul121666_avx2:
+#endif /* __APPLE__ */
+ pushq %r12
+ pushq %r13
+ movq $0x1db42, %rdx
+ mulxq (%rsi), %rax, %r13
+ mulxq 8(%rsi), %rcx, %r12
+ mulxq 16(%rsi), %r8, %r11
+ mulxq 24(%rsi), %r9, %r10
+ addq %r13, %rcx
+ adcq %r12, %r8
+ adcq %r11, %r9
+ adcq $0x00, %r10
+ movq $0x7fffffffffffffff, %r13
+ shldq $0x01, %r9, %r10
+ andq %r13, %r9
+ imulq $19, %r10, %r10
+ addq %r10, %rax
+ adcq $0x00, %rcx
+ adcq $0x00, %r8
+ adcq $0x00, %r9
+ movq %rax, (%rdi)
+ movq %rcx, 8(%rdi)
+ movq %r8, 16(%rdi)
+ movq %r9, 24(%rdi)
+ popq %r13
+ popq %r12
+ repz retq
+#ifndef __APPLE__
+.size fe_mul121666_avx2,.-fe_mul121666_avx2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_sq2_avx2
+.type fe_sq2_avx2,@function
+.align 4
+fe_sq2_avx2:
+#else
+.section __TEXT,__text
+.globl _fe_sq2_avx2
+.p2align 2
+_fe_sq2_avx2:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ # Square * 2
+ # A[0] * A[1]
+ movq (%rsi), %rdx
+ mulxq 8(%rsi), %r9, %r10
+ # A[0] * A[3]
+ mulxq 24(%rsi), %r11, %r12
+ # A[2] * A[1]
+ movq 16(%rsi), %rdx
+ mulxq 8(%rsi), %rcx, %rbx
+ xorq %r15, %r15
+ adoxq %rcx, %r11
+ # A[2] * A[3]
+ mulxq 24(%rsi), %r13, %r14
+ adoxq %rbx, %r12
+ # A[2] * A[0]
+ mulxq (%rsi), %rcx, %rbx
+ adoxq %r15, %r13
+ adcxq %rcx, %r10
+ adoxq %r15, %r14
+ # A[1] * A[3]
+ movq 8(%rsi), %rdx
+ mulxq 24(%rsi), %rax, %r8
+ adcxq %rbx, %r11
+ adcxq %rax, %r12
+ adcxq %r8, %r13
+ adcxq %r15, %r14
+ # Double with Carry Flag
+ xorq %r15, %r15
+ # A[0] * A[0]
+ movq (%rsi), %rdx
+ mulxq %rdx, %r8, %rax
+ adcxq %r9, %r9
+ # A[1] * A[1]
+ movq 8(%rsi), %rdx
+ mulxq %rdx, %rcx, %rbx
+ adcxq %r10, %r10
+ adoxq %rax, %r9
+ adcxq %r11, %r11
+ adoxq %rcx, %r10
+ # A[2] * A[2]
+ movq 16(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adcxq %r12, %r12
+ adoxq %rbx, %r11
+ adcxq %r13, %r13
+ adoxq %rax, %r12
+ # A[3] * A[3]
+ movq 24(%rsi), %rdx
+ mulxq %rdx, %rax, %rbx
+ adcxq %r14, %r14
+ adoxq %rcx, %r13
+ adcxq %r15, %r15
+ adoxq %rax, %r14
+ adoxq %rbx, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rbx
+ xorq %rax, %rax
+ # Move top half into t4-t7 and remove top bit from t3 and double
+ shldq $3, %r15, %rax
+ shldq $2, %r14, %r15
+ shldq $2, %r13, %r14
+ shldq $2, %r12, %r13
+ shldq $2, %r11, %r12
+ shldq $0x01, %r10, %r11
+ shldq $0x01, %r9, %r10
+ shldq $0x01, %r8, %r9
+ shlq $0x01, %r8
+ andq %rbx, %r11
+ # Two out left, one in right
+ andq %rbx, %r15
+ # Multiply top bits by 19*19
+ imulq $0x169, %rax, %rcx
+ xorq %rbx, %rbx
+ # Multiply top half by 19
+ movq $19, %rdx
+ adoxq %rcx, %r8
+ mulxq %r12, %rax, %r12
+ adcxq %rax, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rax, %r13
+ adcxq %rax, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rax, %r14
+ adcxq %rax, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rbx, %rdx
+ adcxq %rbx, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rbx
+ imulq $19, %rdx, %rax
+ andq %rbx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rbx, %r11
+ addq %rax, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size fe_sq2_avx2,.-fe_sq2_avx2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_invert_avx2
+.type fe_invert_avx2,@function
+.align 4
+fe_invert_avx2:
+#else
+.section __TEXT,__text
+.globl _fe_invert_avx2
+.p2align 2
+_fe_invert_avx2:
+#endif /* __APPLE__ */
+ subq $0x90, %rsp
+ # Invert
+ movq %rdi, 128(%rsp)
+ movq %rsi, 136(%rsp)
+ movq %rsp, %rdi
+ movq 136(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ movq %rsp, %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ movq 136(%rsp), %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ movq %rsp, %rdi
+ movq %rsp, %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ movq %rsp, %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ leaq 64(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ movq $4, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ movq $9, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ movq $19, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ leaq 64(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ movq $9, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ movq $49, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ movq $0x63, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ leaq 64(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ movq $49, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ movq $4, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ movq 128(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ movq %rsp, %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ movq 136(%rsp), %rsi
+ movq 128(%rsp), %rdi
+ addq $0x90, %rsp
+ repz retq
+#ifndef __APPLE__
+.text
+.globl curve25519_avx2
+.type curve25519_avx2,@function
+.align 4
+curve25519_avx2:
+#else
+.section __TEXT,__text
+.globl _curve25519_avx2
+.p2align 2
+_curve25519_avx2:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rbp
+ movq %rdx, %r8
+ subq $0xc0, %rsp
+ movq $0x00, 184(%rsp)
+ movq %rdi, 176(%rsp)
+ # Set one
+ movq $0x01, (%rdi)
+ movq $0x00, 8(%rdi)
+ movq $0x00, 16(%rdi)
+ movq $0x00, 24(%rdi)
+ # Set zero
+ movq $0x00, (%rsp)
+ movq $0x00, 8(%rsp)
+ movq $0x00, 16(%rsp)
+ movq $0x00, 24(%rsp)
+ # Set one
+ movq $0x01, 32(%rsp)
+ movq $0x00, 40(%rsp)
+ movq $0x00, 48(%rsp)
+ movq $0x00, 56(%rsp)
+ # Copy
+ movq (%r8), %r9
+ movq 8(%r8), %r10
+ movq 16(%r8), %r11
+ movq 24(%r8), %r12
+ movq %r9, 64(%rsp)
+ movq %r10, 72(%rsp)
+ movq %r11, 80(%rsp)
+ movq %r12, 88(%rsp)
+ movb $62, 168(%rsp)
+ movq $3, 160(%rsp)
+L_curve25519_avx2_words:
+L_curve25519_avx2_bits:
+ movq 184(%rsp), %rbx
+ movq 160(%rsp), %r9
+ movb 168(%rsp), %cl
+ movq (%rsi,%r9,8), %rax
+ shrq %cl, %rax
+ andq $0x01, %rax
+ xorq %rax, %rbx
+ negq %rbx
+ # Conditional Swap
+ movq (%rdi), %r9
+ movq 8(%rdi), %r10
+ movq 16(%rdi), %r11
+ movq 24(%rdi), %r12
+ xorq 64(%rsp), %r9
+ xorq 72(%rsp), %r10
+ xorq 80(%rsp), %r11
+ xorq 88(%rsp), %r12
+ andq %rbx, %r9
+ andq %rbx, %r10
+ andq %rbx, %r11
+ andq %rbx, %r12
+ xorq %r9, (%rdi)
+ xorq %r10, 8(%rdi)
+ xorq %r11, 16(%rdi)
+ xorq %r12, 24(%rdi)
+ xorq %r9, 64(%rsp)
+ xorq %r10, 72(%rsp)
+ xorq %r11, 80(%rsp)
+ xorq %r12, 88(%rsp)
+ # Conditional Swap
+ movq (%rsp), %r9
+ movq 8(%rsp), %r10
+ movq 16(%rsp), %r11
+ movq 24(%rsp), %r12
+ xorq 32(%rsp), %r9
+ xorq 40(%rsp), %r10
+ xorq 48(%rsp), %r11
+ xorq 56(%rsp), %r12
+ andq %rbx, %r9
+ andq %rbx, %r10
+ andq %rbx, %r11
+ andq %rbx, %r12
+ xorq %r9, (%rsp)
+ xorq %r10, 8(%rsp)
+ xorq %r11, 16(%rsp)
+ xorq %r12, 24(%rsp)
+ xorq %r9, 32(%rsp)
+ xorq %r10, 40(%rsp)
+ xorq %r11, 48(%rsp)
+ xorq %r12, 56(%rsp)
+ movq %rax, 184(%rsp)
+ # Add
+ movq (%rdi), %r9
+ movq 8(%rdi), %r10
+ movq 16(%rdi), %r11
+ movq 24(%rdi), %rax
+ movq %r9, %r13
+ addq (%rsp), %r9
+ movq %r10, %r14
+ adcq 8(%rsp), %r10
+ movq %r11, %r15
+ adcq 16(%rsp), %r11
+ movq %rax, %rbp
+ adcq 24(%rsp), %rax
+ movq $-19, %rcx
+ movq %rax, %r12
+ movq $0x7fffffffffffffff, %rbx
+ sarq $63, %rax
+ # Mask the modulus
+ andq %rax, %rcx
+ andq %rax, %rbx
+ # Sub modulus (if overflow)
+ subq %rcx, %r9
+ sbbq %rax, %r10
+ sbbq %rax, %r11
+ sbbq %rbx, %r12
+ # Sub
+ subq (%rsp), %r13
+ movq $0x00, %rax
+ sbbq 8(%rsp), %r14
+ movq $-19, %rcx
+ sbbq 16(%rsp), %r15
+ movq $0x7fffffffffffffff, %rbx
+ sbbq 24(%rsp), %rbp
+ sbbq $0x00, %rax
+ # Mask the modulus
+ andq %rax, %rcx
+ andq %rax, %rbx
+ # Add modulus (if underflow)
+ addq %rcx, %r13
+ adcq %rax, %r14
+ adcq %rax, %r15
+ adcq %rbx, %rbp
+ movq %r9, (%rdi)
+ movq %r10, 8(%rdi)
+ movq %r11, 16(%rdi)
+ movq %r12, 24(%rdi)
+ movq %r13, 128(%rsp)
+ movq %r14, 136(%rsp)
+ movq %r15, 144(%rsp)
+ movq %rbp, 152(%rsp)
+ # Add
+ movq 64(%rsp), %r9
+ movq 72(%rsp), %r10
+ movq 80(%rsp), %r11
+ movq 88(%rsp), %rax
+ movq %r9, %r13
+ addq 32(%rsp), %r9
+ movq %r10, %r14
+ adcq 40(%rsp), %r10
+ movq %r11, %r15
+ adcq 48(%rsp), %r11
+ movq %rax, %rbp
+ adcq 56(%rsp), %rax
+ movq $-19, %rcx
+ movq %rax, %r12
+ movq $0x7fffffffffffffff, %rbx
+ sarq $63, %rax
+ # Mask the modulus
+ andq %rax, %rcx
+ andq %rax, %rbx
+ # Sub modulus (if overflow)
+ subq %rcx, %r9
+ sbbq %rax, %r10
+ sbbq %rax, %r11
+ sbbq %rbx, %r12
+ # Sub
+ subq 32(%rsp), %r13
+ movq $0x00, %rax
+ sbbq 40(%rsp), %r14
+ movq $-19, %rcx
+ sbbq 48(%rsp), %r15
+ movq $0x7fffffffffffffff, %rbx
+ sbbq 56(%rsp), %rbp
+ sbbq $0x00, %rax
+ # Mask the modulus
+ andq %rax, %rcx
+ andq %rax, %rbx
+ # Add modulus (if underflow)
+ addq %rcx, %r13
+ adcq %rax, %r14
+ adcq %rax, %r15
+ adcq %rbx, %rbp
+ movq %r9, (%rsp)
+ movq %r10, 8(%rsp)
+ movq %r11, 16(%rsp)
+ movq %r12, 24(%rsp)
+ movq %r13, 96(%rsp)
+ movq %r14, 104(%rsp)
+ movq %r15, 112(%rsp)
+ movq %rbp, 120(%rsp)
+ # Multiply
+ # A[0] * B[0]
+ movq (%rdi), %rdx
+ mulxq 96(%rsp), %r9, %r10
+ # A[2] * B[0]
+ mulxq 112(%rsp), %r11, %r12
+ # A[1] * B[0]
+ mulxq 104(%rsp), %rcx, %rbx
+ xorq %rbp, %rbp
+ adcxq %rcx, %r10
+ # A[1] * B[3]
+ movq 24(%rdi), %rdx
+ mulxq 104(%rsp), %r13, %r14
+ adcxq %rbx, %r11
+ # A[0] * B[1]
+ movq 8(%rdi), %rdx
+ mulxq 96(%rsp), %rcx, %rbx
+ adoxq %rcx, %r10
+ # A[2] * B[1]
+ mulxq 112(%rsp), %rcx, %r15
+ adoxq %rbx, %r11
+ adcxq %rcx, %r12
+ # A[1] * B[2]
+ movq 16(%rdi), %rdx
+ mulxq 104(%rsp), %rcx, %rbx
+ adcxq %r15, %r13
+ adoxq %rcx, %r12
+ adcxq %rbp, %r14
+ adoxq %rbx, %r13
+ # A[0] * B[2]
+ mulxq 96(%rsp), %rcx, %rbx
+ adoxq %rbp, %r14
+ xorq %r15, %r15
+ adcxq %rcx, %r11
+ # A[1] * B[1]
+ movq 8(%rdi), %rdx
+ mulxq 104(%rsp), %rdx, %rcx
+ adcxq %rbx, %r12
+ adoxq %rdx, %r11
+ # A[3] * B[1]
+ movq 8(%rdi), %rdx
+ adoxq %rcx, %r12
+ mulxq 120(%rsp), %rcx, %rbx
+ adcxq %rcx, %r13
+ # A[2] * B[2]
+ movq 16(%rdi), %rdx
+ mulxq 112(%rsp), %rdx, %rcx
+ adcxq %rbx, %r14
+ adoxq %rdx, %r13
+ # A[3] * B[3]
+ movq 24(%rdi), %rdx
+ adoxq %rcx, %r14
+ mulxq 120(%rsp), %rcx, %rbx
+ adoxq %rbp, %r15
+ adcxq %rcx, %r15
+ # A[0] * B[3]
+ mulxq 96(%rsp), %rdx, %rcx
+ adcxq %rbx, %rbp
+ xorq %rbx, %rbx
+ adcxq %rdx, %r12
+ # A[3] * B[0]
+ movq (%rdi), %rdx
+ adcxq %rcx, %r13
+ mulxq 120(%rsp), %rdx, %rcx
+ adoxq %rdx, %r12
+ adoxq %rcx, %r13
+ # A[2] * B[3]
+ movq 24(%rdi), %rdx
+ mulxq 112(%rsp), %rdx, %rcx
+ adcxq %rdx, %r14
+ # A[3] * B[2]
+ movq 16(%rdi), %rdx
+ adcxq %rcx, %r15
+ mulxq 120(%rsp), %rcx, %rdx
+ adcxq %rbx, %rbp
+ adoxq %rcx, %r14
+ adoxq %rdx, %r15
+ adoxq %rbx, %rbp
+ # Reduce
+ movq $0x7fffffffffffffff, %rbx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r15, %rbp
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ andq %rbx, %r12
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rbx, %rbx
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %rcx, %r15
+ adcxq %rcx, %r11
+ adoxq %r15, %r12
+ mulxq %rbp, %rbp, %rdx
+ adcxq %rbp, %r12
+ adoxq %rbx, %rdx
+ adcxq %rbx, %rdx
+ # Overflow
+ shldq $0x01, %r12, %rdx
+ movq $0x7fffffffffffffff, %rbx
+ imulq $19, %rdx, %rcx
+ andq %rbx, %r12
+ addq %rcx, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Reduce if top bit set
+ movq %r12, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rbx, %r12
+ addq %rcx, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Store
+ movq %r9, 32(%rsp)
+ movq %r10, 40(%rsp)
+ movq %r11, 48(%rsp)
+ movq %r12, 56(%rsp)
+ # Multiply
+ # A[0] * B[0]
+ movq 128(%rsp), %rdx
+ mulxq (%rsp), %r9, %r10
+ # A[2] * B[0]
+ mulxq 16(%rsp), %r11, %r12
+ # A[1] * B[0]
+ mulxq 8(%rsp), %rcx, %rbx
+ xorq %rbp, %rbp
+ adcxq %rcx, %r10
+ # A[1] * B[3]
+ movq 152(%rsp), %rdx
+ mulxq 8(%rsp), %r13, %r14
+ adcxq %rbx, %r11
+ # A[0] * B[1]
+ movq 136(%rsp), %rdx
+ mulxq (%rsp), %rcx, %rbx
+ adoxq %rcx, %r10
+ # A[2] * B[1]
+ mulxq 16(%rsp), %rcx, %r15
+ adoxq %rbx, %r11
+ adcxq %rcx, %r12
+ # A[1] * B[2]
+ movq 144(%rsp), %rdx
+ mulxq 8(%rsp), %rcx, %rbx
+ adcxq %r15, %r13
+ adoxq %rcx, %r12
+ adcxq %rbp, %r14
+ adoxq %rbx, %r13
+ # A[0] * B[2]
+ mulxq (%rsp), %rcx, %rbx
+ adoxq %rbp, %r14
+ xorq %r15, %r15
+ adcxq %rcx, %r11
+ # A[1] * B[1]
+ movq 136(%rsp), %rdx
+ mulxq 8(%rsp), %rdx, %rcx
+ adcxq %rbx, %r12
+ adoxq %rdx, %r11
+ # A[3] * B[1]
+ movq 136(%rsp), %rdx
+ adoxq %rcx, %r12
+ mulxq 24(%rsp), %rcx, %rbx
+ adcxq %rcx, %r13
+ # A[2] * B[2]
+ movq 144(%rsp), %rdx
+ mulxq 16(%rsp), %rdx, %rcx
+ adcxq %rbx, %r14
+ adoxq %rdx, %r13
+ # A[3] * B[3]
+ movq 152(%rsp), %rdx
+ adoxq %rcx, %r14
+ mulxq 24(%rsp), %rcx, %rbx
+ adoxq %rbp, %r15
+ adcxq %rcx, %r15
+ # A[0] * B[3]
+ mulxq (%rsp), %rdx, %rcx
+ adcxq %rbx, %rbp
+ xorq %rbx, %rbx
+ adcxq %rdx, %r12
+ # A[3] * B[0]
+ movq 128(%rsp), %rdx
+ adcxq %rcx, %r13
+ mulxq 24(%rsp), %rdx, %rcx
+ adoxq %rdx, %r12
+ adoxq %rcx, %r13
+ # A[2] * B[3]
+ movq 152(%rsp), %rdx
+ mulxq 16(%rsp), %rdx, %rcx
+ adcxq %rdx, %r14
+ # A[3] * B[2]
+ movq 144(%rsp), %rdx
+ adcxq %rcx, %r15
+ mulxq 24(%rsp), %rcx, %rdx
+ adcxq %rbx, %rbp
+ adoxq %rcx, %r14
+ adoxq %rdx, %r15
+ adoxq %rbx, %rbp
+ # Reduce
+ movq $0x7fffffffffffffff, %rbx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r15, %rbp
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ andq %rbx, %r12
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rbx, %rbx
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %rcx, %r15
+ adcxq %rcx, %r11
+ adoxq %r15, %r12
+ mulxq %rbp, %rbp, %rdx
+ adcxq %rbp, %r12
+ adoxq %rbx, %rdx
+ adcxq %rbx, %rdx
+ # Overflow
+ shldq $0x01, %r12, %rdx
+ movq $0x7fffffffffffffff, %rbx
+ imulq $19, %rdx, %rcx
+ andq %rbx, %r12
+ addq %rcx, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Reduce if top bit set
+ movq %r12, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rbx, %r12
+ addq %rcx, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Store
+ movq %r9, (%rsp)
+ movq %r10, 8(%rsp)
+ movq %r11, 16(%rsp)
+ movq %r12, 24(%rsp)
+ # Square
+ # A[0] * A[1]
+ movq 128(%rsp), %rdx
+ mulxq 136(%rsp), %r10, %r11
+ # A[0] * A[3]
+ mulxq 152(%rsp), %r12, %r13
+ # A[2] * A[1]
+ movq 144(%rsp), %rdx
+ mulxq 136(%rsp), %rcx, %rbx
+ xorq %rbp, %rbp
+ adoxq %rcx, %r12
+ # A[2] * A[3]
+ mulxq 152(%rsp), %r14, %r15
+ adoxq %rbx, %r13
+ # A[2] * A[0]
+ mulxq 128(%rsp), %rcx, %rbx
+ adoxq %rbp, %r14
+ adcxq %rcx, %r11
+ adoxq %rbp, %r15
+ # A[1] * A[3]
+ movq 136(%rsp), %rdx
+ mulxq 152(%rsp), %rax, %r9
+ adcxq %rbx, %r12
+ adcxq %rax, %r13
+ adcxq %r9, %r14
+ adcxq %rbp, %r15
+ # Double with Carry Flag
+ xorq %rbp, %rbp
+ # A[0] * A[0]
+ movq 128(%rsp), %rdx
+ mulxq %rdx, %r9, %rax
+ adcxq %r10, %r10
+ # A[1] * A[1]
+ movq 136(%rsp), %rdx
+ mulxq %rdx, %rcx, %rbx
+ adcxq %r11, %r11
+ adoxq %rax, %r10
+ adcxq %r12, %r12
+ adoxq %rcx, %r11
+ # A[2] * A[2]
+ movq 144(%rsp), %rdx
+ mulxq %rdx, %rax, %rcx
+ adcxq %r13, %r13
+ adoxq %rbx, %r12
+ adcxq %r14, %r14
+ adoxq %rax, %r13
+ # A[3] * A[3]
+ movq 152(%rsp), %rdx
+ mulxq %rdx, %rax, %rbx
+ adcxq %r15, %r15
+ adoxq %rcx, %r14
+ adcxq %rbp, %rbp
+ adoxq %rax, %r15
+ adoxq %rbx, %rbp
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r15, %rbp
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ andq %rcx, %r12
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rcx, %rcx
+ mulxq %r13, %rax, %r13
+ adcxq %rax, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rax, %r14
+ adcxq %rax, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %rax, %r15
+ adcxq %rax, %r11
+ adoxq %r15, %r12
+ mulxq %rbp, %rbp, %rdx
+ adcxq %rbp, %r12
+ adoxq %rcx, %rdx
+ adcxq %rcx, %rdx
+ # Overflow
+ shldq $0x01, %r12, %rdx
+ movq $0x7fffffffffffffff, %rcx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r12
+ addq %rax, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Reduce if top bit set
+ movq %r12, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r12
+ addq %rax, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Store
+ movq %r9, 96(%rsp)
+ movq %r10, 104(%rsp)
+ movq %r11, 112(%rsp)
+ movq %r12, 120(%rsp)
+ # Square
+ # A[0] * A[1]
+ movq (%rdi), %rdx
+ mulxq 8(%rdi), %r10, %r11
+ # A[0] * A[3]
+ mulxq 24(%rdi), %r12, %r13
+ # A[2] * A[1]
+ movq 16(%rdi), %rdx
+ mulxq 8(%rdi), %rcx, %rbx
+ xorq %rbp, %rbp
+ adoxq %rcx, %r12
+ # A[2] * A[3]
+ mulxq 24(%rdi), %r14, %r15
+ adoxq %rbx, %r13
+ # A[2] * A[0]
+ mulxq (%rdi), %rcx, %rbx
+ adoxq %rbp, %r14
+ adcxq %rcx, %r11
+ adoxq %rbp, %r15
+ # A[1] * A[3]
+ movq 8(%rdi), %rdx
+ mulxq 24(%rdi), %rax, %r9
+ adcxq %rbx, %r12
+ adcxq %rax, %r13
+ adcxq %r9, %r14
+ adcxq %rbp, %r15
+ # Double with Carry Flag
+ xorq %rbp, %rbp
+ # A[0] * A[0]
+ movq (%rdi), %rdx
+ mulxq %rdx, %r9, %rax
+ adcxq %r10, %r10
+ # A[1] * A[1]
+ movq 8(%rdi), %rdx
+ mulxq %rdx, %rcx, %rbx
+ adcxq %r11, %r11
+ adoxq %rax, %r10
+ adcxq %r12, %r12
+ adoxq %rcx, %r11
+ # A[2] * A[2]
+ movq 16(%rdi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adcxq %r13, %r13
+ adoxq %rbx, %r12
+ adcxq %r14, %r14
+ adoxq %rax, %r13
+ # A[3] * A[3]
+ movq 24(%rdi), %rdx
+ mulxq %rdx, %rax, %rbx
+ adcxq %r15, %r15
+ adoxq %rcx, %r14
+ adcxq %rbp, %rbp
+ adoxq %rax, %r15
+ adoxq %rbx, %rbp
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r15, %rbp
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ andq %rcx, %r12
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rcx, %rcx
+ mulxq %r13, %rax, %r13
+ adcxq %rax, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rax, %r14
+ adcxq %rax, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %rax, %r15
+ adcxq %rax, %r11
+ adoxq %r15, %r12
+ mulxq %rbp, %rbp, %rdx
+ adcxq %rbp, %r12
+ adoxq %rcx, %rdx
+ adcxq %rcx, %rdx
+ # Overflow
+ shldq $0x01, %r12, %rdx
+ movq $0x7fffffffffffffff, %rcx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r12
+ addq %rax, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Reduce if top bit set
+ movq %r12, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r12
+ addq %rax, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Store
+ movq %r9, 128(%rsp)
+ movq %r10, 136(%rsp)
+ movq %r11, 144(%rsp)
+ movq %r12, 152(%rsp)
+ # Add
+ movq 32(%rsp), %r9
+ movq 40(%rsp), %r10
+ movq 48(%rsp), %r11
+ movq 56(%rsp), %rax
+ movq %r9, %r13
+ addq (%rsp), %r9
+ movq %r10, %r14
+ adcq 8(%rsp), %r10
+ movq %r11, %r15
+ adcq 16(%rsp), %r11
+ movq %rax, %rbp
+ adcq 24(%rsp), %rax
+ movq $-19, %rcx
+ movq %rax, %r12
+ movq $0x7fffffffffffffff, %rbx
+ sarq $63, %rax
+ # Mask the modulus
+ andq %rax, %rcx
+ andq %rax, %rbx
+ # Sub modulus (if overflow)
+ subq %rcx, %r9
+ sbbq %rax, %r10
+ sbbq %rax, %r11
+ sbbq %rbx, %r12
+ # Sub
+ subq (%rsp), %r13
+ movq $0x00, %rax
+ sbbq 8(%rsp), %r14
+ movq $-19, %rcx
+ sbbq 16(%rsp), %r15
+ movq $0x7fffffffffffffff, %rbx
+ sbbq 24(%rsp), %rbp
+ sbbq $0x00, %rax
+ # Mask the modulus
+ andq %rax, %rcx
+ andq %rax, %rbx
+ # Add modulus (if underflow)
+ addq %rcx, %r13
+ adcq %rax, %r14
+ adcq %rax, %r15
+ adcq %rbx, %rbp
+ movq %r9, 64(%rsp)
+ movq %r10, 72(%rsp)
+ movq %r11, 80(%rsp)
+ movq %r12, 88(%rsp)
+ movq %r13, (%rsp)
+ movq %r14, 8(%rsp)
+ movq %r15, 16(%rsp)
+ movq %rbp, 24(%rsp)
+ # Multiply
+ # A[0] * B[0]
+ movq 96(%rsp), %rdx
+ mulxq 128(%rsp), %r9, %r10
+ # A[2] * B[0]
+ mulxq 144(%rsp), %r11, %r12
+ # A[1] * B[0]
+ mulxq 136(%rsp), %rcx, %rbx
+ xorq %rbp, %rbp
+ adcxq %rcx, %r10
+ # A[1] * B[3]
+ movq 120(%rsp), %rdx
+ mulxq 136(%rsp), %r13, %r14
+ adcxq %rbx, %r11
+ # A[0] * B[1]
+ movq 104(%rsp), %rdx
+ mulxq 128(%rsp), %rcx, %rbx
+ adoxq %rcx, %r10
+ # A[2] * B[1]
+ mulxq 144(%rsp), %rcx, %r15
+ adoxq %rbx, %r11
+ adcxq %rcx, %r12
+ # A[1] * B[2]
+ movq 112(%rsp), %rdx
+ mulxq 136(%rsp), %rcx, %rbx
+ adcxq %r15, %r13
+ adoxq %rcx, %r12
+ adcxq %rbp, %r14
+ adoxq %rbx, %r13
+ # A[0] * B[2]
+ mulxq 128(%rsp), %rcx, %rbx
+ adoxq %rbp, %r14
+ xorq %r15, %r15
+ adcxq %rcx, %r11
+ # A[1] * B[1]
+ movq 104(%rsp), %rdx
+ mulxq 136(%rsp), %rdx, %rcx
+ adcxq %rbx, %r12
+ adoxq %rdx, %r11
+ # A[3] * B[1]
+ movq 104(%rsp), %rdx
+ adoxq %rcx, %r12
+ mulxq 152(%rsp), %rcx, %rbx
+ adcxq %rcx, %r13
+ # A[2] * B[2]
+ movq 112(%rsp), %rdx
+ mulxq 144(%rsp), %rdx, %rcx
+ adcxq %rbx, %r14
+ adoxq %rdx, %r13
+ # A[3] * B[3]
+ movq 120(%rsp), %rdx
+ adoxq %rcx, %r14
+ mulxq 152(%rsp), %rcx, %rbx
+ adoxq %rbp, %r15
+ adcxq %rcx, %r15
+ # A[0] * B[3]
+ mulxq 128(%rsp), %rdx, %rcx
+ adcxq %rbx, %rbp
+ xorq %rbx, %rbx
+ adcxq %rdx, %r12
+ # A[3] * B[0]
+ movq 96(%rsp), %rdx
+ adcxq %rcx, %r13
+ mulxq 152(%rsp), %rdx, %rcx
+ adoxq %rdx, %r12
+ adoxq %rcx, %r13
+ # A[2] * B[3]
+ movq 120(%rsp), %rdx
+ mulxq 144(%rsp), %rdx, %rcx
+ adcxq %rdx, %r14
+ # A[3] * B[2]
+ movq 112(%rsp), %rdx
+ adcxq %rcx, %r15
+ mulxq 152(%rsp), %rcx, %rdx
+ adcxq %rbx, %rbp
+ adoxq %rcx, %r14
+ adoxq %rdx, %r15
+ adoxq %rbx, %rbp
+ # Reduce
+ movq $0x7fffffffffffffff, %rbx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r15, %rbp
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ andq %rbx, %r12
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rbx, %rbx
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %rcx, %r15
+ adcxq %rcx, %r11
+ adoxq %r15, %r12
+ mulxq %rbp, %rbp, %rdx
+ adcxq %rbp, %r12
+ adoxq %rbx, %rdx
+ adcxq %rbx, %rdx
+ # Overflow
+ shldq $0x01, %r12, %rdx
+ movq $0x7fffffffffffffff, %rbx
+ imulq $19, %rdx, %rcx
+ andq %rbx, %r12
+ addq %rcx, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Reduce if top bit set
+ movq %r12, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rbx, %r12
+ addq %rcx, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Store
+ movq %r9, (%rdi)
+ movq %r10, 8(%rdi)
+ movq %r11, 16(%rdi)
+ movq %r12, 24(%rdi)
+ # Sub
+ movq 128(%rsp), %r9
+ movq 136(%rsp), %r10
+ movq 144(%rsp), %r11
+ movq 152(%rsp), %r12
+ subq 96(%rsp), %r9
+ movq $0x00, %rax
+ sbbq 104(%rsp), %r10
+ movq $-19, %rcx
+ sbbq 112(%rsp), %r11
+ movq $0x7fffffffffffffff, %rbx
+ sbbq 120(%rsp), %r12
+ sbbq $0x00, %rax
+ # Mask the modulus
+ andq %rax, %rcx
+ andq %rax, %rbx
+ # Add modulus (if underflow)
+ addq %rcx, %r9
+ adcq %rax, %r10
+ adcq %rax, %r11
+ adcq %rbx, %r12
+ movq %r9, 128(%rsp)
+ movq %r10, 136(%rsp)
+ movq %r11, 144(%rsp)
+ movq %r12, 152(%rsp)
+ # Square
+ # A[0] * A[1]
+ movq (%rsp), %rdx
+ mulxq 8(%rsp), %r10, %r11
+ # A[0] * A[3]
+ mulxq 24(%rsp), %r12, %r13
+ # A[2] * A[1]
+ movq 16(%rsp), %rdx
+ mulxq 8(%rsp), %rcx, %rbx
+ xorq %rbp, %rbp
+ adoxq %rcx, %r12
+ # A[2] * A[3]
+ mulxq 24(%rsp), %r14, %r15
+ adoxq %rbx, %r13
+ # A[2] * A[0]
+ mulxq (%rsp), %rcx, %rbx
+ adoxq %rbp, %r14
+ adcxq %rcx, %r11
+ adoxq %rbp, %r15
+ # A[1] * A[3]
+ movq 8(%rsp), %rdx
+ mulxq 24(%rsp), %rax, %r9
+ adcxq %rbx, %r12
+ adcxq %rax, %r13
+ adcxq %r9, %r14
+ adcxq %rbp, %r15
+ # Double with Carry Flag
+ xorq %rbp, %rbp
+ # A[0] * A[0]
+ movq (%rsp), %rdx
+ mulxq %rdx, %r9, %rax
+ adcxq %r10, %r10
+ # A[1] * A[1]
+ movq 8(%rsp), %rdx
+ mulxq %rdx, %rcx, %rbx
+ adcxq %r11, %r11
+ adoxq %rax, %r10
+ adcxq %r12, %r12
+ adoxq %rcx, %r11
+ # A[2] * A[2]
+ movq 16(%rsp), %rdx
+ mulxq %rdx, %rax, %rcx
+ adcxq %r13, %r13
+ adoxq %rbx, %r12
+ adcxq %r14, %r14
+ adoxq %rax, %r13
+ # A[3] * A[3]
+ movq 24(%rsp), %rdx
+ mulxq %rdx, %rax, %rbx
+ adcxq %r15, %r15
+ adoxq %rcx, %r14
+ adcxq %rbp, %rbp
+ adoxq %rax, %r15
+ adoxq %rbx, %rbp
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r15, %rbp
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ andq %rcx, %r12
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rcx, %rcx
+ mulxq %r13, %rax, %r13
+ adcxq %rax, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rax, %r14
+ adcxq %rax, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %rax, %r15
+ adcxq %rax, %r11
+ adoxq %r15, %r12
+ mulxq %rbp, %rbp, %rdx
+ adcxq %rbp, %r12
+ adoxq %rcx, %rdx
+ adcxq %rcx, %rdx
+ # Overflow
+ shldq $0x01, %r12, %rdx
+ movq $0x7fffffffffffffff, %rcx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r12
+ addq %rax, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Reduce if top bit set
+ movq %r12, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r12
+ addq %rax, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Store
+ movq %r9, (%rsp)
+ movq %r10, 8(%rsp)
+ movq %r11, 16(%rsp)
+ movq %r12, 24(%rsp)
+ movq $0x1db42, %rdx
+ mulxq 128(%rsp), %r9, %rbp
+ mulxq 136(%rsp), %r10, %r15
+ mulxq 144(%rsp), %r11, %r14
+ mulxq 152(%rsp), %r12, %r13
+ addq %rbp, %r10
+ adcq %r15, %r11
+ adcq %r14, %r12
+ adcq $0x00, %r13
+ movq $0x7fffffffffffffff, %rbp
+ shldq $0x01, %r12, %r13
+ andq %rbp, %r12
+ imulq $19, %r13, %r13
+ addq %r13, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ movq %r9, 32(%rsp)
+ movq %r10, 40(%rsp)
+ movq %r11, 48(%rsp)
+ movq %r12, 56(%rsp)
+ # Square
+ # A[0] * A[1]
+ movq 64(%rsp), %rdx
+ mulxq 72(%rsp), %r10, %r11
+ # A[0] * A[3]
+ mulxq 88(%rsp), %r12, %r13
+ # A[2] * A[1]
+ movq 80(%rsp), %rdx
+ mulxq 72(%rsp), %rcx, %rbx
+ xorq %rbp, %rbp
+ adoxq %rcx, %r12
+ # A[2] * A[3]
+ mulxq 88(%rsp), %r14, %r15
+ adoxq %rbx, %r13
+ # A[2] * A[0]
+ mulxq 64(%rsp), %rcx, %rbx
+ adoxq %rbp, %r14
+ adcxq %rcx, %r11
+ adoxq %rbp, %r15
+ # A[1] * A[3]
+ movq 72(%rsp), %rdx
+ mulxq 88(%rsp), %rax, %r9
+ adcxq %rbx, %r12
+ adcxq %rax, %r13
+ adcxq %r9, %r14
+ adcxq %rbp, %r15
+ # Double with Carry Flag
+ xorq %rbp, %rbp
+ # A[0] * A[0]
+ movq 64(%rsp), %rdx
+ mulxq %rdx, %r9, %rax
+ adcxq %r10, %r10
+ # A[1] * A[1]
+ movq 72(%rsp), %rdx
+ mulxq %rdx, %rcx, %rbx
+ adcxq %r11, %r11
+ adoxq %rax, %r10
+ adcxq %r12, %r12
+ adoxq %rcx, %r11
+ # A[2] * A[2]
+ movq 80(%rsp), %rdx
+ mulxq %rdx, %rax, %rcx
+ adcxq %r13, %r13
+ adoxq %rbx, %r12
+ adcxq %r14, %r14
+ adoxq %rax, %r13
+ # A[3] * A[3]
+ movq 88(%rsp), %rdx
+ mulxq %rdx, %rax, %rbx
+ adcxq %r15, %r15
+ adoxq %rcx, %r14
+ adcxq %rbp, %rbp
+ adoxq %rax, %r15
+ adoxq %rbx, %rbp
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r15, %rbp
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ andq %rcx, %r12
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rcx, %rcx
+ mulxq %r13, %rax, %r13
+ adcxq %rax, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rax, %r14
+ adcxq %rax, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %rax, %r15
+ adcxq %rax, %r11
+ adoxq %r15, %r12
+ mulxq %rbp, %rbp, %rdx
+ adcxq %rbp, %r12
+ adoxq %rcx, %rdx
+ adcxq %rcx, %rdx
+ # Overflow
+ shldq $0x01, %r12, %rdx
+ movq $0x7fffffffffffffff, %rcx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r12
+ addq %rax, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Reduce if top bit set
+ movq %r12, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rax
+ andq %rcx, %r12
+ addq %rax, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Store
+ movq %r9, 64(%rsp)
+ movq %r10, 72(%rsp)
+ movq %r11, 80(%rsp)
+ movq %r12, 88(%rsp)
+ # Add
+ movq 96(%rsp), %r9
+ movq 104(%rsp), %r10
+ addq 32(%rsp), %r9
+ movq 112(%rsp), %r11
+ adcq 40(%rsp), %r10
+ movq 120(%rsp), %rax
+ adcq 48(%rsp), %r11
+ movq $-19, %rcx
+ adcq 56(%rsp), %rax
+ movq $0x7fffffffffffffff, %rbx
+ movq %rax, %r12
+ sarq $63, %rax
+ # Mask the modulus
+ andq %rax, %rcx
+ andq %rax, %rbx
+ # Sub modulus (if overflow)
+ subq %rcx, %r9
+ sbbq %rax, %r10
+ sbbq %rax, %r11
+ sbbq %rbx, %r12
+ movq %r9, 96(%rsp)
+ movq %r10, 104(%rsp)
+ movq %r11, 112(%rsp)
+ movq %r12, 120(%rsp)
+ # Multiply
+ # A[0] * B[0]
+ movq (%rsp), %rdx
+ mulxq (%r8), %r9, %r10
+ # A[2] * B[0]
+ mulxq 16(%r8), %r11, %r12
+ # A[1] * B[0]
+ mulxq 8(%r8), %rcx, %rbx
+ xorq %rbp, %rbp
+ adcxq %rcx, %r10
+ # A[1] * B[3]
+ movq 24(%rsp), %rdx
+ mulxq 8(%r8), %r13, %r14
+ adcxq %rbx, %r11
+ # A[0] * B[1]
+ movq 8(%rsp), %rdx
+ mulxq (%r8), %rcx, %rbx
+ adoxq %rcx, %r10
+ # A[2] * B[1]
+ mulxq 16(%r8), %rcx, %r15
+ adoxq %rbx, %r11
+ adcxq %rcx, %r12
+ # A[1] * B[2]
+ movq 16(%rsp), %rdx
+ mulxq 8(%r8), %rcx, %rbx
+ adcxq %r15, %r13
+ adoxq %rcx, %r12
+ adcxq %rbp, %r14
+ adoxq %rbx, %r13
+ # A[0] * B[2]
+ mulxq (%r8), %rcx, %rbx
+ adoxq %rbp, %r14
+ xorq %r15, %r15
+ adcxq %rcx, %r11
+ # A[1] * B[1]
+ movq 8(%rsp), %rdx
+ mulxq 8(%r8), %rdx, %rcx
+ adcxq %rbx, %r12
+ adoxq %rdx, %r11
+ # A[3] * B[1]
+ movq 8(%rsp), %rdx
+ adoxq %rcx, %r12
+ mulxq 24(%r8), %rcx, %rbx
+ adcxq %rcx, %r13
+ # A[2] * B[2]
+ movq 16(%rsp), %rdx
+ mulxq 16(%r8), %rdx, %rcx
+ adcxq %rbx, %r14
+ adoxq %rdx, %r13
+ # A[3] * B[3]
+ movq 24(%rsp), %rdx
+ adoxq %rcx, %r14
+ mulxq 24(%r8), %rcx, %rbx
+ adoxq %rbp, %r15
+ adcxq %rcx, %r15
+ # A[0] * B[3]
+ mulxq (%r8), %rdx, %rcx
+ adcxq %rbx, %rbp
+ xorq %rbx, %rbx
+ adcxq %rdx, %r12
+ # A[3] * B[0]
+ movq (%rsp), %rdx
+ adcxq %rcx, %r13
+ mulxq 24(%r8), %rdx, %rcx
+ adoxq %rdx, %r12
+ adoxq %rcx, %r13
+ # A[2] * B[3]
+ movq 24(%rsp), %rdx
+ mulxq 16(%r8), %rdx, %rcx
+ adcxq %rdx, %r14
+ # A[3] * B[2]
+ movq 16(%rsp), %rdx
+ adcxq %rcx, %r15
+ mulxq 24(%r8), %rcx, %rdx
+ adcxq %rbx, %rbp
+ adoxq %rcx, %r14
+ adoxq %rdx, %r15
+ adoxq %rbx, %rbp
+ # Reduce
+ movq $0x7fffffffffffffff, %rbx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r15, %rbp
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ andq %rbx, %r12
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rbx, %rbx
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %rcx, %r15
+ adcxq %rcx, %r11
+ adoxq %r15, %r12
+ mulxq %rbp, %rbp, %rdx
+ adcxq %rbp, %r12
+ adoxq %rbx, %rdx
+ adcxq %rbx, %rdx
+ # Overflow
+ shldq $0x01, %r12, %rdx
+ movq $0x7fffffffffffffff, %rbx
+ imulq $19, %rdx, %rcx
+ andq %rbx, %r12
+ addq %rcx, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Reduce if top bit set
+ movq %r12, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rbx, %r12
+ addq %rcx, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Store
+ movq %r9, 32(%rsp)
+ movq %r10, 40(%rsp)
+ movq %r11, 48(%rsp)
+ movq %r12, 56(%rsp)
+ # Multiply
+ # A[0] * B[0]
+ movq 96(%rsp), %rdx
+ mulxq 128(%rsp), %r9, %r10
+ # A[2] * B[0]
+ mulxq 144(%rsp), %r11, %r12
+ # A[1] * B[0]
+ mulxq 136(%rsp), %rcx, %rbx
+ xorq %rbp, %rbp
+ adcxq %rcx, %r10
+ # A[1] * B[3]
+ movq 120(%rsp), %rdx
+ mulxq 136(%rsp), %r13, %r14
+ adcxq %rbx, %r11
+ # A[0] * B[1]
+ movq 104(%rsp), %rdx
+ mulxq 128(%rsp), %rcx, %rbx
+ adoxq %rcx, %r10
+ # A[2] * B[1]
+ mulxq 144(%rsp), %rcx, %r15
+ adoxq %rbx, %r11
+ adcxq %rcx, %r12
+ # A[1] * B[2]
+ movq 112(%rsp), %rdx
+ mulxq 136(%rsp), %rcx, %rbx
+ adcxq %r15, %r13
+ adoxq %rcx, %r12
+ adcxq %rbp, %r14
+ adoxq %rbx, %r13
+ # A[0] * B[2]
+ mulxq 128(%rsp), %rcx, %rbx
+ adoxq %rbp, %r14
+ xorq %r15, %r15
+ adcxq %rcx, %r11
+ # A[1] * B[1]
+ movq 104(%rsp), %rdx
+ mulxq 136(%rsp), %rdx, %rcx
+ adcxq %rbx, %r12
+ adoxq %rdx, %r11
+ # A[3] * B[1]
+ movq 104(%rsp), %rdx
+ adoxq %rcx, %r12
+ mulxq 152(%rsp), %rcx, %rbx
+ adcxq %rcx, %r13
+ # A[2] * B[2]
+ movq 112(%rsp), %rdx
+ mulxq 144(%rsp), %rdx, %rcx
+ adcxq %rbx, %r14
+ adoxq %rdx, %r13
+ # A[3] * B[3]
+ movq 120(%rsp), %rdx
+ adoxq %rcx, %r14
+ mulxq 152(%rsp), %rcx, %rbx
+ adoxq %rbp, %r15
+ adcxq %rcx, %r15
+ # A[0] * B[3]
+ mulxq 128(%rsp), %rdx, %rcx
+ adcxq %rbx, %rbp
+ xorq %rbx, %rbx
+ adcxq %rdx, %r12
+ # A[3] * B[0]
+ movq 96(%rsp), %rdx
+ adcxq %rcx, %r13
+ mulxq 152(%rsp), %rdx, %rcx
+ adoxq %rdx, %r12
+ adoxq %rcx, %r13
+ # A[2] * B[3]
+ movq 120(%rsp), %rdx
+ mulxq 144(%rsp), %rdx, %rcx
+ adcxq %rdx, %r14
+ # A[3] * B[2]
+ movq 112(%rsp), %rdx
+ adcxq %rcx, %r15
+ mulxq 152(%rsp), %rcx, %rdx
+ adcxq %rbx, %rbp
+ adoxq %rcx, %r14
+ adoxq %rdx, %r15
+ adoxq %rbx, %rbp
+ # Reduce
+ movq $0x7fffffffffffffff, %rbx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r15, %rbp
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ andq %rbx, %r12
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rbx, %rbx
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %rcx, %r15
+ adcxq %rcx, %r11
+ adoxq %r15, %r12
+ mulxq %rbp, %rbp, %rdx
+ adcxq %rbp, %r12
+ adoxq %rbx, %rdx
+ adcxq %rbx, %rdx
+ # Overflow
+ shldq $0x01, %r12, %rdx
+ movq $0x7fffffffffffffff, %rbx
+ imulq $19, %rdx, %rcx
+ andq %rbx, %r12
+ addq %rcx, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Reduce if top bit set
+ movq %r12, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rbx, %r12
+ addq %rcx, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Store
+ movq %r9, (%rsp)
+ movq %r10, 8(%rsp)
+ movq %r11, 16(%rsp)
+ movq %r12, 24(%rsp)
+ decb 168(%rsp)
+ jge L_curve25519_avx2_bits
+ movq $63, 168(%rsp)
+ decb 160(%rsp)
+ jge L_curve25519_avx2_words
+ # Invert
+ leaq 32(%rsp), %rdi
+ movq %rsp, %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ movq %rsp, %rsi
+ leaq 64(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ leaq 64(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ leaq 96(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ movq $4, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ leaq 64(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ movq $9, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ leaq 64(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 128(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 128(%rsp), %rdi
+ leaq 128(%rsp), %rsi
+ movq $19, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 128(%rsp), %rsi
+ leaq 96(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ movq $9, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ leaq 64(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ movq $49, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ leaq 64(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 128(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 128(%rsp), %rdi
+ leaq 128(%rsp), %rsi
+ movq $0x63, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 128(%rsp), %rsi
+ leaq 96(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 96(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ movq $49, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 96(%rsp), %rsi
+ leaq 64(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ movq $4, %rdx
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ movq %rsp, %rdi
+ leaq 64(%rsp), %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ movq 176(%rsp), %rdi
+ # Multiply
+ # A[0] * B[0]
+ movq (%rsp), %rdx
+ mulxq (%rdi), %r9, %r10
+ # A[2] * B[0]
+ mulxq 16(%rdi), %r11, %r12
+ # A[1] * B[0]
+ mulxq 8(%rdi), %rcx, %rbx
+ xorq %rbp, %rbp
+ adcxq %rcx, %r10
+ # A[1] * B[3]
+ movq 24(%rsp), %rdx
+ mulxq 8(%rdi), %r13, %r14
+ adcxq %rbx, %r11
+ # A[0] * B[1]
+ movq 8(%rsp), %rdx
+ mulxq (%rdi), %rcx, %rbx
+ adoxq %rcx, %r10
+ # A[2] * B[1]
+ mulxq 16(%rdi), %rcx, %r15
+ adoxq %rbx, %r11
+ adcxq %rcx, %r12
+ # A[1] * B[2]
+ movq 16(%rsp), %rdx
+ mulxq 8(%rdi), %rcx, %rbx
+ adcxq %r15, %r13
+ adoxq %rcx, %r12
+ adcxq %rbp, %r14
+ adoxq %rbx, %r13
+ # A[0] * B[2]
+ mulxq (%rdi), %rcx, %rbx
+ adoxq %rbp, %r14
+ xorq %r15, %r15
+ adcxq %rcx, %r11
+ # A[1] * B[1]
+ movq 8(%rsp), %rdx
+ mulxq 8(%rdi), %rdx, %rcx
+ adcxq %rbx, %r12
+ adoxq %rdx, %r11
+ # A[3] * B[1]
+ movq 8(%rsp), %rdx
+ adoxq %rcx, %r12
+ mulxq 24(%rdi), %rcx, %rbx
+ adcxq %rcx, %r13
+ # A[2] * B[2]
+ movq 16(%rsp), %rdx
+ mulxq 16(%rdi), %rdx, %rcx
+ adcxq %rbx, %r14
+ adoxq %rdx, %r13
+ # A[3] * B[3]
+ movq 24(%rsp), %rdx
+ adoxq %rcx, %r14
+ mulxq 24(%rdi), %rcx, %rbx
+ adoxq %rbp, %r15
+ adcxq %rcx, %r15
+ # A[0] * B[3]
+ mulxq (%rdi), %rdx, %rcx
+ adcxq %rbx, %rbp
+ xorq %rbx, %rbx
+ adcxq %rdx, %r12
+ # A[3] * B[0]
+ movq (%rsp), %rdx
+ adcxq %rcx, %r13
+ mulxq 24(%rdi), %rdx, %rcx
+ adoxq %rdx, %r12
+ adoxq %rcx, %r13
+ # A[2] * B[3]
+ movq 24(%rsp), %rdx
+ mulxq 16(%rdi), %rdx, %rcx
+ adcxq %rdx, %r14
+ # A[3] * B[2]
+ movq 16(%rsp), %rdx
+ adcxq %rcx, %r15
+ mulxq 24(%rdi), %rcx, %rdx
+ adcxq %rbx, %rbp
+ adoxq %rcx, %r14
+ adoxq %rdx, %r15
+ adoxq %rbx, %rbp
+ # Reduce
+ movq $0x7fffffffffffffff, %rbx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r15, %rbp
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ andq %rbx, %r12
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rbx, %rbx
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %rcx, %r15
+ adcxq %rcx, %r11
+ adoxq %r15, %r12
+ mulxq %rbp, %rbp, %rdx
+ adcxq %rbp, %r12
+ adoxq %rbx, %rdx
+ adcxq %rbx, %rdx
+ # Overflow
+ shldq $0x01, %r12, %rdx
+ movq $0x7fffffffffffffff, %rbx
+ imulq $19, %rdx, %rcx
+ andq %rbx, %r12
+ addq %rcx, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Reduce if top bit set
+ movq %r12, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rbx, %r12
+ addq %rcx, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Store
+ movq %r9, (%rdi)
+ movq %r10, 8(%rdi)
+ movq %r11, 16(%rdi)
+ movq %r12, 24(%rdi)
+ xorq %rax, %rax
+ addq $0xc0, %rsp
+ popq %rbp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size curve25519_avx2,.-curve25519_avx2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_pow22523_avx2
+.type fe_pow22523_avx2,@function
+.align 4
+fe_pow22523_avx2:
+#else
+.section __TEXT,__text
+.globl _fe_pow22523_avx2
+.p2align 2
+_fe_pow22523_avx2:
+#endif /* __APPLE__ */
+ subq $0x70, %rsp
+ # pow22523
+ movq %rdi, 96(%rsp)
+ movq %rsi, 104(%rsp)
+ movq %rsp, %rdi
+ movq 104(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ movq %rsp, %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ movq 104(%rsp), %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ movq %rsp, %rdi
+ movq %rsp, %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ movq %rsp, %rdi
+ movq %rsp, %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ movq %rsp, %rdi
+ leaq 32(%rsp), %rsi
+ movq %rsp, %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ movq %rsp, %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ movb $4, %dl
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ movq %rsp, %rdi
+ leaq 32(%rsp), %rsi
+ movq %rsp, %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ movq %rsp, %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ movb $9, %dl
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ movq %rsp, %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ movb $19, %dl
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ movb $9, %dl
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ movq %rsp, %rdi
+ leaq 32(%rsp), %rsi
+ movq %rsp, %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ movq %rsp, %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ movb $49, %dl
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ movq %rsp, %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 64(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ movb $0x63, %dl
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 64(%rsp), %rsi
+ leaq 32(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ leaq 32(%rsp), %rdi
+ leaq 32(%rsp), %rsi
+ movb $49, %dl
+#ifndef __APPLE__
+ callq fe_sq_n_avx2@plt
+#else
+ callq _fe_sq_n_avx2
+#endif /* __APPLE__ */
+ movq %rsp, %rdi
+ leaq 32(%rsp), %rsi
+ movq %rsp, %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ movq %rsp, %rdi
+ movq %rsp, %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ movq %rsp, %rdi
+ movq %rsp, %rsi
+#ifndef __APPLE__
+ callq fe_sq_avx2@plt
+#else
+ callq _fe_sq_avx2
+#endif /* __APPLE__ */
+ movq 96(%rsp), %rdi
+ movq %rsp, %rsi
+ movq 104(%rsp), %rdx
+#ifndef __APPLE__
+ callq fe_mul_avx2@plt
+#else
+ callq _fe_mul_avx2
+#endif /* __APPLE__ */
+ movq 104(%rsp), %rsi
+ movq 96(%rsp), %rdi
+ addq $0x70, %rsp
+ repz retq
+#ifndef __APPLE__
+.text
+.globl fe_ge_to_p2_avx2
+.type fe_ge_to_p2_avx2,@function
+.align 4
+fe_ge_to_p2_avx2:
+#else
+.section __TEXT,__text
+.globl _fe_ge_to_p2_avx2
+.p2align 2
+_fe_ge_to_p2_avx2:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $40, %rsp
+ movq %rsi, (%rsp)
+ movq %rdx, 8(%rsp)
+ movq %rcx, 16(%rsp)
+ movq %r8, 24(%rsp)
+ movq %r9, 32(%rsp)
+ movq 16(%rsp), %rsi
+ movq 88(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rdx
+ mulxq (%rsi), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rsi), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rsi), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rbx), %rdx
+ mulxq 8(%rsi), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rbx), %rdx
+ mulxq (%rsi), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rsi), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rbx), %rdx
+ mulxq 8(%rsi), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rsi), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rbx), %rdx
+ mulxq 8(%rsi), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rbx), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rsi), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rbx), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rbx), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rsi), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rsi), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rbx), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rsi), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rbx), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rbx), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rsi), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq (%rsp), %rdi
+ movq 24(%rsp), %rsi
+ movq 32(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rdx
+ mulxq (%rsi), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rsi), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rsi), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rbx), %rdx
+ mulxq 8(%rsi), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rbx), %rdx
+ mulxq (%rsi), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rsi), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rbx), %rdx
+ mulxq 8(%rsi), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rsi), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rbx), %rdx
+ mulxq 8(%rsi), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rbx), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rsi), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rbx), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rbx), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rsi), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rsi), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rbx), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rsi), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rbx), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rbx), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rsi), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rdi
+ movq 88(%rsp), %rsi
+ # Multiply
+ # A[0] * B[0]
+ movq (%rsi), %rdx
+ mulxq (%rbx), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rbx), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rbx), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rsi), %rdx
+ mulxq 8(%rbx), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rsi), %rdx
+ mulxq (%rbx), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rbx), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rsi), %rdx
+ mulxq 8(%rbx), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rbx), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rsi), %rdx
+ mulxq 8(%rbx), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rsi), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rbx), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rsi), %rdx
+ mulxq 16(%rbx), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rsi), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rbx), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rbx), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rsi), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rbx), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rsi), %rdx
+ mulxq 16(%rbx), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rsi), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rbx), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ addq $40, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size fe_ge_to_p2_avx2,.-fe_ge_to_p2_avx2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_ge_to_p3_avx2
+.type fe_ge_to_p3_avx2,@function
+.align 4
+fe_ge_to_p3_avx2:
+#else
+.section __TEXT,__text
+.globl _fe_ge_to_p3_avx2
+.p2align 2
+_fe_ge_to_p3_avx2:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $40, %rsp
+ movq %rsi, (%rsp)
+ movq %rdx, 8(%rsp)
+ movq %rcx, 16(%rsp)
+ movq %r8, 24(%rsp)
+ movq %r9, 32(%rsp)
+ movq 24(%rsp), %rsi
+ movq 96(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rdx
+ mulxq (%rsi), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rsi), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rsi), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rbx), %rdx
+ mulxq 8(%rsi), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rbx), %rdx
+ mulxq (%rsi), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rsi), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rbx), %rdx
+ mulxq 8(%rsi), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rsi), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rbx), %rdx
+ mulxq 8(%rsi), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rbx), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rsi), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rbx), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rbx), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rsi), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rsi), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rbx), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rsi), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rbx), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rbx), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rsi), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq (%rsp), %rdi
+ movq 32(%rsp), %rsi
+ movq 88(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rdx
+ mulxq (%rsi), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rsi), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rsi), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rbx), %rdx
+ mulxq 8(%rsi), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rbx), %rdx
+ mulxq (%rsi), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rsi), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rbx), %rdx
+ mulxq 8(%rsi), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rsi), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rbx), %rdx
+ mulxq 8(%rsi), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rbx), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rsi), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rbx), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rbx), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rsi), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rsi), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rbx), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rsi), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rbx), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rbx), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rsi), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rdi
+ movq 96(%rsp), %rsi
+ # Multiply
+ # A[0] * B[0]
+ movq (%rsi), %rdx
+ mulxq (%rbx), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rbx), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rbx), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rsi), %rdx
+ mulxq 8(%rbx), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rsi), %rdx
+ mulxq (%rbx), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rbx), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rsi), %rdx
+ mulxq 8(%rbx), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rbx), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rsi), %rdx
+ mulxq 8(%rbx), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rsi), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rbx), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rsi), %rdx
+ mulxq 16(%rbx), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rsi), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rbx), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rbx), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rsi), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rbx), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rsi), %rdx
+ mulxq 16(%rbx), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rsi), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rbx), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 16(%rsp), %rdi
+ movq 24(%rsp), %rsi
+ movq 32(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rdx
+ mulxq (%rsi), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rsi), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rsi), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rbx), %rdx
+ mulxq 8(%rsi), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rbx), %rdx
+ mulxq (%rsi), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rsi), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rbx), %rdx
+ mulxq 8(%rsi), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rsi), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rbx), %rdx
+ mulxq 8(%rsi), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rbx), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rsi), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rbx), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rbx), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rsi), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rsi), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rbx), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rsi), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rbx), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rbx), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rsi), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ addq $40, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size fe_ge_to_p3_avx2,.-fe_ge_to_p3_avx2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_ge_dbl_avx2
+.type fe_ge_dbl_avx2,@function
+.align 4
+fe_ge_dbl_avx2:
+#else
+.section __TEXT,__text
+.globl _fe_ge_dbl_avx2
+.p2align 2
+_fe_ge_dbl_avx2:
+#endif /* __APPLE__ */
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $48, %rsp
+ movq %rdi, (%rsp)
+ movq %rsi, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rcx, 24(%rsp)
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq 32(%rsp), %rsi
+ # Square
+ # A[0] * A[1]
+ movq (%rsi), %rdx
+ mulxq 8(%rsi), %r9, %r10
+ # A[0] * A[3]
+ mulxq 24(%rsi), %r11, %r12
+ # A[2] * A[1]
+ movq 16(%rsi), %rdx
+ mulxq 8(%rsi), %rcx, %rax
+ xorq %r15, %r15
+ adoxq %rcx, %r11
+ # A[2] * A[3]
+ mulxq 24(%rsi), %r13, %r14
+ adoxq %rax, %r12
+ # A[2] * A[0]
+ mulxq (%rsi), %rcx, %rax
+ adoxq %r15, %r13
+ adcxq %rcx, %r10
+ adoxq %r15, %r14
+ # A[1] * A[3]
+ movq 8(%rsi), %rdx
+ mulxq 24(%rsi), %rbp, %r8
+ adcxq %rax, %r11
+ adcxq %rbp, %r12
+ adcxq %r8, %r13
+ adcxq %r15, %r14
+ # Double with Carry Flag
+ xorq %r15, %r15
+ # A[0] * A[0]
+ movq (%rsi), %rdx
+ mulxq %rdx, %r8, %rbp
+ adcxq %r9, %r9
+ # A[1] * A[1]
+ movq 8(%rsi), %rdx
+ mulxq %rdx, %rcx, %rax
+ adcxq %r10, %r10
+ adoxq %rbp, %r9
+ adcxq %r11, %r11
+ adoxq %rcx, %r10
+ # A[2] * A[2]
+ movq 16(%rsi), %rdx
+ mulxq %rdx, %rbp, %rcx
+ adcxq %r12, %r12
+ adoxq %rax, %r11
+ adcxq %r13, %r13
+ adoxq %rbp, %r12
+ # A[3] * A[3]
+ movq 24(%rsi), %rdx
+ mulxq %rdx, %rbp, %rax
+ adcxq %r14, %r14
+ adoxq %rcx, %r13
+ adcxq %r15, %r15
+ adoxq %rbp, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rcx, %rcx
+ mulxq %r12, %rbp, %r12
+ adcxq %rbp, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rbp, %r13
+ adcxq %rbp, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rbp, %r14
+ adcxq %rbp, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rcx, %rdx
+ adcxq %rcx, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rcx
+ imulq $19, %rdx, %rbp
+ andq %rcx, %r11
+ addq %rbp, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rbp
+ andq %rcx, %r11
+ addq %rbp, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 16(%rsp), %rdi
+ movq 40(%rsp), %rbx
+ # Square
+ # A[0] * A[1]
+ movq (%rbx), %rdx
+ mulxq 8(%rbx), %r9, %r10
+ # A[0] * A[3]
+ mulxq 24(%rbx), %r11, %r12
+ # A[2] * A[1]
+ movq 16(%rbx), %rdx
+ mulxq 8(%rbx), %rcx, %rax
+ xorq %r15, %r15
+ adoxq %rcx, %r11
+ # A[2] * A[3]
+ mulxq 24(%rbx), %r13, %r14
+ adoxq %rax, %r12
+ # A[2] * A[0]
+ mulxq (%rbx), %rcx, %rax
+ adoxq %r15, %r13
+ adcxq %rcx, %r10
+ adoxq %r15, %r14
+ # A[1] * A[3]
+ movq 8(%rbx), %rdx
+ mulxq 24(%rbx), %rbp, %r8
+ adcxq %rax, %r11
+ adcxq %rbp, %r12
+ adcxq %r8, %r13
+ adcxq %r15, %r14
+ # Double with Carry Flag
+ xorq %r15, %r15
+ # A[0] * A[0]
+ movq (%rbx), %rdx
+ mulxq %rdx, %r8, %rbp
+ adcxq %r9, %r9
+ # A[1] * A[1]
+ movq 8(%rbx), %rdx
+ mulxq %rdx, %rcx, %rax
+ adcxq %r10, %r10
+ adoxq %rbp, %r9
+ adcxq %r11, %r11
+ adoxq %rcx, %r10
+ # A[2] * A[2]
+ movq 16(%rbx), %rdx
+ mulxq %rdx, %rbp, %rcx
+ adcxq %r12, %r12
+ adoxq %rax, %r11
+ adcxq %r13, %r13
+ adoxq %rbp, %r12
+ # A[3] * A[3]
+ movq 24(%rbx), %rdx
+ mulxq %rdx, %rbp, %rax
+ adcxq %r14, %r14
+ adoxq %rcx, %r13
+ adcxq %r15, %r15
+ adoxq %rbp, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rcx, %rcx
+ mulxq %r12, %rbp, %r12
+ adcxq %rbp, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rbp, %r13
+ adcxq %rbp, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rbp, %r14
+ adcxq %rbp, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rcx, %rdx
+ adcxq %rcx, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rcx
+ imulq $19, %rdx, %rbp
+ andq %rcx, %r11
+ addq %rbp, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rbp
+ andq %rcx, %r11
+ addq %rbp, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rdi
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ addq (%rbx), %r8
+ movq 16(%rsi), %r10
+ adcq 8(%rbx), %r9
+ movq 24(%rsi), %rdx
+ adcq 16(%rbx), %r10
+ movq $-19, %rcx
+ adcq 24(%rbx), %rdx
+ movq $0x7fffffffffffffff, %rax
+ movq %rdx, %r11
+ sarq $63, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Sub modulus (if overflow)
+ subq %rcx, %r8
+ sbbq %rdx, %r9
+ sbbq %rdx, %r10
+ sbbq %rax, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 24(%rsp), %rsi
+ # Square
+ # A[0] * A[1]
+ movq (%rdi), %rdx
+ mulxq 8(%rdi), %r9, %r10
+ # A[0] * A[3]
+ mulxq 24(%rdi), %r11, %r12
+ # A[2] * A[1]
+ movq 16(%rdi), %rdx
+ mulxq 8(%rdi), %rcx, %rax
+ xorq %r15, %r15
+ adoxq %rcx, %r11
+ # A[2] * A[3]
+ mulxq 24(%rdi), %r13, %r14
+ adoxq %rax, %r12
+ # A[2] * A[0]
+ mulxq (%rdi), %rcx, %rax
+ adoxq %r15, %r13
+ adcxq %rcx, %r10
+ adoxq %r15, %r14
+ # A[1] * A[3]
+ movq 8(%rdi), %rdx
+ mulxq 24(%rdi), %rbp, %r8
+ adcxq %rax, %r11
+ adcxq %rbp, %r12
+ adcxq %r8, %r13
+ adcxq %r15, %r14
+ # Double with Carry Flag
+ xorq %r15, %r15
+ # A[0] * A[0]
+ movq (%rdi), %rdx
+ mulxq %rdx, %r8, %rbp
+ adcxq %r9, %r9
+ # A[1] * A[1]
+ movq 8(%rdi), %rdx
+ mulxq %rdx, %rcx, %rax
+ adcxq %r10, %r10
+ adoxq %rbp, %r9
+ adcxq %r11, %r11
+ adoxq %rcx, %r10
+ # A[2] * A[2]
+ movq 16(%rdi), %rdx
+ mulxq %rdx, %rbp, %rcx
+ adcxq %r12, %r12
+ adoxq %rax, %r11
+ adcxq %r13, %r13
+ adoxq %rbp, %r12
+ # A[3] * A[3]
+ movq 24(%rdi), %rdx
+ mulxq %rdx, %rbp, %rax
+ adcxq %r14, %r14
+ adoxq %rcx, %r13
+ adcxq %r15, %r15
+ adoxq %rbp, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rcx
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rcx, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rcx, %rcx
+ mulxq %r12, %rbp, %r12
+ adcxq %rbp, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rbp, %r13
+ adcxq %rbp, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rbp, %r14
+ adcxq %rbp, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rcx, %rdx
+ adcxq %rcx, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rcx
+ imulq $19, %rdx, %rbp
+ andq %rcx, %r11
+ addq %rbp, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rbp
+ andq %rcx, %r11
+ addq %rbp, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rsi)
+ movq %r9, 8(%rsi)
+ movq %r10, 16(%rsi)
+ movq %r11, 24(%rsi)
+ movq 16(%rsp), %rsi
+ movq (%rsp), %rbx
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %rdx
+ movq %r8, %r12
+ addq (%rbx), %r8
+ movq %r9, %r13
+ adcq 8(%rbx), %r9
+ movq %r10, %r14
+ adcq 16(%rbx), %r10
+ movq %rdx, %r15
+ adcq 24(%rbx), %rdx
+ movq $-19, %rcx
+ movq %rdx, %r11
+ movq $0x7fffffffffffffff, %rax
+ sarq $63, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Sub modulus (if overflow)
+ subq %rcx, %r8
+ sbbq %rdx, %r9
+ sbbq %rdx, %r10
+ sbbq %rax, %r11
+ # Sub
+ subq (%rbx), %r12
+ movq $0x00, %rdx
+ sbbq 8(%rbx), %r13
+ movq $-19, %rcx
+ sbbq 16(%rbx), %r14
+ movq $0x7fffffffffffffff, %rax
+ sbbq 24(%rbx), %r15
+ sbbq $0x00, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Add modulus (if underflow)
+ addq %rcx, %r12
+ adcq %rdx, %r13
+ adcq %rdx, %r14
+ adcq %rax, %r15
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq %r12, (%rsi)
+ movq %r13, 8(%rsi)
+ movq %r14, 16(%rsi)
+ movq %r15, 24(%rsi)
+ movq 24(%rsp), %rsi
+ # Sub
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq (%rdi), %r8
+ movq $0x00, %rdx
+ sbbq 8(%rdi), %r9
+ movq $-19, %rcx
+ sbbq 16(%rdi), %r10
+ movq $0x7fffffffffffffff, %rax
+ sbbq 24(%rdi), %r11
+ sbbq $0x00, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Add modulus (if underflow)
+ addq %rcx, %r8
+ adcq %rdx, %r9
+ adcq %rdx, %r10
+ adcq %rax, %r11
+ movq %r8, (%rbx)
+ movq %r9, 8(%rbx)
+ movq %r10, 16(%rbx)
+ movq %r11, 24(%rbx)
+ movq 104(%rsp), %rdi
+ # Square * 2
+ # A[0] * A[1]
+ movq (%rdi), %rdx
+ mulxq 8(%rdi), %r9, %r10
+ # A[0] * A[3]
+ mulxq 24(%rdi), %r11, %r12
+ # A[2] * A[1]
+ movq 16(%rdi), %rdx
+ mulxq 8(%rdi), %rcx, %rax
+ xorq %r15, %r15
+ adoxq %rcx, %r11
+ # A[2] * A[3]
+ mulxq 24(%rdi), %r13, %r14
+ adoxq %rax, %r12
+ # A[2] * A[0]
+ mulxq (%rdi), %rcx, %rax
+ adoxq %r15, %r13
+ adcxq %rcx, %r10
+ adoxq %r15, %r14
+ # A[1] * A[3]
+ movq 8(%rdi), %rdx
+ mulxq 24(%rdi), %rbp, %r8
+ adcxq %rax, %r11
+ adcxq %rbp, %r12
+ adcxq %r8, %r13
+ adcxq %r15, %r14
+ # Double with Carry Flag
+ xorq %r15, %r15
+ # A[0] * A[0]
+ movq (%rdi), %rdx
+ mulxq %rdx, %r8, %rbp
+ adcxq %r9, %r9
+ # A[1] * A[1]
+ movq 8(%rdi), %rdx
+ mulxq %rdx, %rcx, %rax
+ adcxq %r10, %r10
+ adoxq %rbp, %r9
+ adcxq %r11, %r11
+ adoxq %rcx, %r10
+ # A[2] * A[2]
+ movq 16(%rdi), %rdx
+ mulxq %rdx, %rbp, %rcx
+ adcxq %r12, %r12
+ adoxq %rax, %r11
+ adcxq %r13, %r13
+ adoxq %rbp, %r12
+ # A[3] * A[3]
+ movq 24(%rdi), %rdx
+ mulxq %rdx, %rbp, %rax
+ adcxq %r14, %r14
+ adoxq %rcx, %r13
+ adcxq %r15, %r15
+ adoxq %rbp, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ xorq %rbp, %rbp
+ # Move top half into t4-t7 and remove top bit from t3 and double
+ shldq $3, %r15, %rbp
+ shldq $2, %r14, %r15
+ shldq $2, %r13, %r14
+ shldq $2, %r12, %r13
+ shldq $2, %r11, %r12
+ shldq $0x01, %r10, %r11
+ shldq $0x01, %r9, %r10
+ shldq $0x01, %r8, %r9
+ shlq $0x01, %r8
+ andq %rax, %r11
+ # Two out left, one in right
+ andq %rax, %r15
+ # Multiply top bits by 19*19
+ imulq $0x169, %rbp, %rcx
+ xorq %rax, %rax
+ # Multiply top half by 19
+ movq $19, %rdx
+ adoxq %rcx, %r8
+ mulxq %r12, %rbp, %r12
+ adcxq %rbp, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rbp, %r13
+ adcxq %rbp, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rbp, %r14
+ adcxq %rbp, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rbp
+ andq %rax, %r11
+ addq %rbp, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rbp
+ andq %rax, %r11
+ addq %rbp, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rsi)
+ movq %r9, 8(%rsi)
+ movq %r10, 16(%rsi)
+ movq %r11, 24(%rsi)
+ movq 16(%rsp), %rdi
+ # Sub
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq (%rdi), %r8
+ movq $0x00, %rdx
+ sbbq 8(%rdi), %r9
+ movq $-19, %rcx
+ sbbq 16(%rdi), %r10
+ movq $0x7fffffffffffffff, %rax
+ sbbq 24(%rdi), %r11
+ sbbq $0x00, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Add modulus (if underflow)
+ addq %rcx, %r8
+ adcq %rdx, %r9
+ adcq %rdx, %r10
+ adcq %rax, %r11
+ movq %r8, (%rsi)
+ movq %r9, 8(%rsi)
+ movq %r10, 16(%rsi)
+ movq %r11, 24(%rsi)
+ addq $48, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ popq %rbp
+ repz retq
+#ifndef __APPLE__
+.size fe_ge_dbl_avx2,.-fe_ge_dbl_avx2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_ge_madd_avx2
+.type fe_ge_madd_avx2,@function
+.align 4
+fe_ge_madd_avx2:
+#else
+.section __TEXT,__text
+.globl _fe_ge_madd_avx2
+.p2align 2
+_fe_ge_madd_avx2:
+#endif /* __APPLE__ */
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $48, %rsp
+ movq %rdi, (%rsp)
+ movq %rsi, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rcx, 24(%rsp)
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq 8(%rsp), %rsi
+ movq 40(%rsp), %rbx
+ movq 32(%rsp), %rbp
+ # Add
+ movq (%rbx), %r8
+ movq 8(%rbx), %r9
+ movq 16(%rbx), %r10
+ movq 24(%rbx), %rdx
+ movq %r8, %r12
+ addq (%rbp), %r8
+ movq %r9, %r13
+ adcq 8(%rbp), %r9
+ movq %r10, %r14
+ adcq 16(%rbp), %r10
+ movq %rdx, %r15
+ adcq 24(%rbp), %rdx
+ movq $-19, %rcx
+ movq %rdx, %r11
+ movq $0x7fffffffffffffff, %rax
+ sarq $63, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Sub modulus (if overflow)
+ subq %rcx, %r8
+ sbbq %rdx, %r9
+ sbbq %rdx, %r10
+ sbbq %rax, %r11
+ # Sub
+ subq (%rbp), %r12
+ movq $0x00, %rdx
+ sbbq 8(%rbp), %r13
+ movq $-19, %rcx
+ sbbq 16(%rbp), %r14
+ movq $0x7fffffffffffffff, %rax
+ sbbq 24(%rbp), %r15
+ sbbq $0x00, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Add modulus (if underflow)
+ addq %rcx, %r12
+ adcq %rdx, %r13
+ adcq %rdx, %r14
+ adcq %rax, %r15
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq %r12, (%rsi)
+ movq %r13, 8(%rsi)
+ movq %r14, 16(%rsi)
+ movq %r15, 24(%rsi)
+ movq 16(%rsp), %rbx
+ movq 128(%rsp), %rbp
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbp), %rdx
+ mulxq (%rdi), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rdi), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rdi), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rbp), %rdx
+ mulxq 8(%rdi), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rbp), %rdx
+ mulxq (%rdi), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rdi), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rbp), %rdx
+ mulxq 8(%rdi), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rdi), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rbp), %rdx
+ mulxq 8(%rdi), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rbp), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rdi), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rbp), %rdx
+ mulxq 16(%rdi), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rbp), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rdi), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rdi), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rbp), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rdi), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rbp), %rdx
+ mulxq 16(%rdi), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rbp), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rdi), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rbx)
+ movq %r9, 8(%rbx)
+ movq %r10, 16(%rbx)
+ movq %r11, 24(%rbx)
+ movq 136(%rsp), %rdi
+ # Multiply
+ # A[0] * B[0]
+ movq (%rdi), %rdx
+ mulxq (%rsi), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rsi), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rsi), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rdi), %rdx
+ mulxq 8(%rsi), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rdi), %rdx
+ mulxq (%rsi), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rsi), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rdi), %rdx
+ mulxq 8(%rsi), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rsi), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rdi), %rdx
+ mulxq 8(%rsi), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rdi), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rsi), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rdi), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rdi), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rsi), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rsi), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rdi), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rsi), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rdi), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rdi), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rsi), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rsi)
+ movq %r9, 8(%rsi)
+ movq %r10, 16(%rsi)
+ movq %r11, 24(%rsi)
+ movq 24(%rsp), %rdi
+ movq 120(%rsp), %rsi
+ movq 112(%rsp), %rbp
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbp), %rdx
+ mulxq (%rsi), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rsi), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rsi), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rbp), %rdx
+ mulxq 8(%rsi), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rbp), %rdx
+ mulxq (%rsi), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rsi), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rbp), %rdx
+ mulxq 8(%rsi), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rsi), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rbp), %rdx
+ mulxq 8(%rsi), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rbp), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rsi), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rbp), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rbp), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rsi), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rsi), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rbp), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rsi), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rbp), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rbp), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rsi), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rdi
+ movq (%rsp), %rsi
+ # Add
+ movq (%rbx), %r8
+ movq 8(%rbx), %r9
+ movq 16(%rbx), %r10
+ movq 24(%rbx), %rdx
+ movq %r8, %r12
+ addq (%rdi), %r8
+ movq %r9, %r13
+ adcq 8(%rdi), %r9
+ movq %r10, %r14
+ adcq 16(%rdi), %r10
+ movq %rdx, %r15
+ adcq 24(%rdi), %rdx
+ movq $-19, %rcx
+ movq %rdx, %r11
+ movq $0x7fffffffffffffff, %rax
+ sarq $63, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Sub modulus (if overflow)
+ subq %rcx, %r8
+ sbbq %rdx, %r9
+ sbbq %rdx, %r10
+ sbbq %rax, %r11
+ # Sub
+ subq (%rdi), %r12
+ movq $0x00, %rdx
+ sbbq 8(%rdi), %r13
+ movq $-19, %rcx
+ sbbq 16(%rdi), %r14
+ movq $0x7fffffffffffffff, %rax
+ sbbq 24(%rdi), %r15
+ sbbq $0x00, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Add modulus (if underflow)
+ addq %rcx, %r12
+ adcq %rdx, %r13
+ adcq %rdx, %r14
+ adcq %rax, %r15
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq %r12, (%rsi)
+ movq %r13, 8(%rsi)
+ movq %r14, 16(%rsi)
+ movq %r15, 24(%rsi)
+ movq 104(%rsp), %rdi
+ # Double
+ movq (%rdi), %r8
+ movq 8(%rdi), %r9
+ addq %r8, %r8
+ movq 16(%rdi), %r10
+ adcq %r9, %r9
+ movq 24(%rdi), %rdx
+ adcq %r10, %r10
+ movq $-19, %rcx
+ adcq %rdx, %rdx
+ movq $0x7fffffffffffffff, %rax
+ movq %rdx, %r11
+ sarq $63, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Sub modulus (if overflow)
+ subq %rcx, %r8
+ sbbq %rdx, %r9
+ sbbq %rdx, %r10
+ sbbq %rax, %r11
+ movq %r8, (%rbx)
+ movq %r9, 8(%rbx)
+ movq %r10, 16(%rbx)
+ movq %r11, 24(%rbx)
+ movq 24(%rsp), %rdi
+ # Add
+ movq (%rbx), %r8
+ movq 8(%rbx), %r9
+ movq 16(%rbx), %r10
+ movq 24(%rbx), %rdx
+ movq %r8, %r12
+ addq (%rdi), %r8
+ movq %r9, %r13
+ adcq 8(%rdi), %r9
+ movq %r10, %r14
+ adcq 16(%rdi), %r10
+ movq %rdx, %r15
+ adcq 24(%rdi), %rdx
+ movq $-19, %rcx
+ movq %rdx, %r11
+ movq $0x7fffffffffffffff, %rax
+ sarq $63, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Sub modulus (if overflow)
+ subq %rcx, %r8
+ sbbq %rdx, %r9
+ sbbq %rdx, %r10
+ sbbq %rax, %r11
+ # Sub
+ subq (%rdi), %r12
+ movq $0x00, %rdx
+ sbbq 8(%rdi), %r13
+ movq $-19, %rcx
+ sbbq 16(%rdi), %r14
+ movq $0x7fffffffffffffff, %rax
+ sbbq 24(%rdi), %r15
+ sbbq $0x00, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Add modulus (if underflow)
+ addq %rcx, %r12
+ adcq %rdx, %r13
+ adcq %rdx, %r14
+ adcq %rax, %r15
+ movq %r8, (%rbx)
+ movq %r9, 8(%rbx)
+ movq %r10, 16(%rbx)
+ movq %r11, 24(%rbx)
+ movq %r12, (%rdi)
+ movq %r13, 8(%rdi)
+ movq %r14, 16(%rdi)
+ movq %r15, 24(%rdi)
+ addq $48, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ popq %rbp
+ repz retq
+#ifndef __APPLE__
+.size fe_ge_madd_avx2,.-fe_ge_madd_avx2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_ge_msub_avx2
+.type fe_ge_msub_avx2,@function
+.align 4
+fe_ge_msub_avx2:
+#else
+.section __TEXT,__text
+.globl _fe_ge_msub_avx2
+.p2align 2
+_fe_ge_msub_avx2:
+#endif /* __APPLE__ */
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $48, %rsp
+ movq %rdi, (%rsp)
+ movq %rsi, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rcx, 24(%rsp)
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq 8(%rsp), %rsi
+ movq 40(%rsp), %rbx
+ movq 32(%rsp), %rbp
+ # Add
+ movq (%rbx), %r8
+ movq 8(%rbx), %r9
+ movq 16(%rbx), %r10
+ movq 24(%rbx), %rdx
+ movq %r8, %r12
+ addq (%rbp), %r8
+ movq %r9, %r13
+ adcq 8(%rbp), %r9
+ movq %r10, %r14
+ adcq 16(%rbp), %r10
+ movq %rdx, %r15
+ adcq 24(%rbp), %rdx
+ movq $-19, %rcx
+ movq %rdx, %r11
+ movq $0x7fffffffffffffff, %rax
+ sarq $63, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Sub modulus (if overflow)
+ subq %rcx, %r8
+ sbbq %rdx, %r9
+ sbbq %rdx, %r10
+ sbbq %rax, %r11
+ # Sub
+ subq (%rbp), %r12
+ movq $0x00, %rdx
+ sbbq 8(%rbp), %r13
+ movq $-19, %rcx
+ sbbq 16(%rbp), %r14
+ movq $0x7fffffffffffffff, %rax
+ sbbq 24(%rbp), %r15
+ sbbq $0x00, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Add modulus (if underflow)
+ addq %rcx, %r12
+ adcq %rdx, %r13
+ adcq %rdx, %r14
+ adcq %rax, %r15
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq %r12, (%rsi)
+ movq %r13, 8(%rsi)
+ movq %r14, 16(%rsi)
+ movq %r15, 24(%rsi)
+ movq 16(%rsp), %rbx
+ movq 136(%rsp), %rbp
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbp), %rdx
+ mulxq (%rdi), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rdi), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rdi), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rbp), %rdx
+ mulxq 8(%rdi), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rbp), %rdx
+ mulxq (%rdi), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rdi), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rbp), %rdx
+ mulxq 8(%rdi), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rdi), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rbp), %rdx
+ mulxq 8(%rdi), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rbp), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rdi), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rbp), %rdx
+ mulxq 16(%rdi), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rbp), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rdi), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rdi), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rbp), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rdi), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rbp), %rdx
+ mulxq 16(%rdi), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rbp), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rdi), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rbx)
+ movq %r9, 8(%rbx)
+ movq %r10, 16(%rbx)
+ movq %r11, 24(%rbx)
+ movq 128(%rsp), %rdi
+ # Multiply
+ # A[0] * B[0]
+ movq (%rdi), %rdx
+ mulxq (%rsi), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rsi), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rsi), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rdi), %rdx
+ mulxq 8(%rsi), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rdi), %rdx
+ mulxq (%rsi), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rsi), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rdi), %rdx
+ mulxq 8(%rsi), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rsi), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rdi), %rdx
+ mulxq 8(%rsi), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rdi), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rsi), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rdi), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rdi), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rsi), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rsi), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rdi), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rsi), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rdi), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rdi), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rsi), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rsi)
+ movq %r9, 8(%rsi)
+ movq %r10, 16(%rsi)
+ movq %r11, 24(%rsi)
+ movq 24(%rsp), %rdi
+ movq 120(%rsp), %rsi
+ movq 112(%rsp), %rbp
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbp), %rdx
+ mulxq (%rsi), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rsi), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rsi), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rbp), %rdx
+ mulxq 8(%rsi), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rbp), %rdx
+ mulxq (%rsi), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rsi), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rbp), %rdx
+ mulxq 8(%rsi), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rsi), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rbp), %rdx
+ mulxq 8(%rsi), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rbp), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rsi), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rbp), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rbp), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rsi), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rsi), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rbp), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rsi), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rbp), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rbp), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rsi), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 8(%rsp), %rsi
+ movq (%rsp), %rbp
+ # Add
+ movq (%rbx), %r8
+ movq 8(%rbx), %r9
+ movq 16(%rbx), %r10
+ movq 24(%rbx), %rdx
+ movq %r8, %r12
+ addq (%rsi), %r8
+ movq %r9, %r13
+ adcq 8(%rsi), %r9
+ movq %r10, %r14
+ adcq 16(%rsi), %r10
+ movq %rdx, %r15
+ adcq 24(%rsi), %rdx
+ movq $-19, %rcx
+ movq %rdx, %r11
+ movq $0x7fffffffffffffff, %rax
+ sarq $63, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Sub modulus (if overflow)
+ subq %rcx, %r8
+ sbbq %rdx, %r9
+ sbbq %rdx, %r10
+ sbbq %rax, %r11
+ # Sub
+ subq (%rsi), %r12
+ movq $0x00, %rdx
+ sbbq 8(%rsi), %r13
+ movq $-19, %rcx
+ sbbq 16(%rsi), %r14
+ movq $0x7fffffffffffffff, %rax
+ sbbq 24(%rsi), %r15
+ sbbq $0x00, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Add modulus (if underflow)
+ addq %rcx, %r12
+ adcq %rdx, %r13
+ adcq %rdx, %r14
+ adcq %rax, %r15
+ movq %r8, (%rsi)
+ movq %r9, 8(%rsi)
+ movq %r10, 16(%rsi)
+ movq %r11, 24(%rsi)
+ movq %r12, (%rbp)
+ movq %r13, 8(%rbp)
+ movq %r14, 16(%rbp)
+ movq %r15, 24(%rbp)
+ movq 104(%rsp), %rsi
+ # Double
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ addq %r8, %r8
+ movq 16(%rsi), %r10
+ adcq %r9, %r9
+ movq 24(%rsi), %rdx
+ adcq %r10, %r10
+ movq $-19, %rcx
+ adcq %rdx, %rdx
+ movq $0x7fffffffffffffff, %rax
+ movq %rdx, %r11
+ sarq $63, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Sub modulus (if overflow)
+ subq %rcx, %r8
+ sbbq %rdx, %r9
+ sbbq %rdx, %r10
+ sbbq %rax, %r11
+ movq %r8, (%rbx)
+ movq %r9, 8(%rbx)
+ movq %r10, 16(%rbx)
+ movq %r11, 24(%rbx)
+ # Add
+ movq (%rbx), %r8
+ movq 8(%rbx), %r9
+ movq 16(%rbx), %r10
+ movq 24(%rbx), %rdx
+ movq %r8, %r12
+ addq (%rdi), %r8
+ movq %r9, %r13
+ adcq 8(%rdi), %r9
+ movq %r10, %r14
+ adcq 16(%rdi), %r10
+ movq %rdx, %r15
+ adcq 24(%rdi), %rdx
+ movq $-19, %rcx
+ movq %rdx, %r11
+ movq $0x7fffffffffffffff, %rax
+ sarq $63, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Sub modulus (if overflow)
+ subq %rcx, %r8
+ sbbq %rdx, %r9
+ sbbq %rdx, %r10
+ sbbq %rax, %r11
+ # Sub
+ subq (%rdi), %r12
+ movq $0x00, %rdx
+ sbbq 8(%rdi), %r13
+ movq $-19, %rcx
+ sbbq 16(%rdi), %r14
+ movq $0x7fffffffffffffff, %rax
+ sbbq 24(%rdi), %r15
+ sbbq $0x00, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Add modulus (if underflow)
+ addq %rcx, %r12
+ adcq %rdx, %r13
+ adcq %rdx, %r14
+ adcq %rax, %r15
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq %r12, (%rbx)
+ movq %r13, 8(%rbx)
+ movq %r14, 16(%rbx)
+ movq %r15, 24(%rbx)
+ addq $48, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ popq %rbp
+ repz retq
+#ifndef __APPLE__
+.size fe_ge_msub_avx2,.-fe_ge_msub_avx2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_ge_add_avx2
+.type fe_ge_add_avx2,@function
+.align 4
+fe_ge_add_avx2:
+#else
+.section __TEXT,__text
+.globl _fe_ge_add_avx2
+.p2align 2
+_fe_ge_add_avx2:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $0x50, %rsp
+ movq %rdi, (%rsp)
+ movq %rsi, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rcx, 24(%rsp)
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq 8(%rsp), %rsi
+ movq 40(%rsp), %rbx
+ movq 32(%rsp), %rbp
+ # Add
+ movq (%rbx), %r8
+ movq 8(%rbx), %r9
+ movq 16(%rbx), %r10
+ movq 24(%rbx), %rdx
+ movq %r8, %r12
+ addq (%rbp), %r8
+ movq %r9, %r13
+ adcq 8(%rbp), %r9
+ movq %r10, %r14
+ adcq 16(%rbp), %r10
+ movq %rdx, %r15
+ adcq 24(%rbp), %rdx
+ movq $-19, %rcx
+ movq %rdx, %r11
+ movq $0x7fffffffffffffff, %rax
+ sarq $63, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Sub modulus (if overflow)
+ subq %rcx, %r8
+ sbbq %rdx, %r9
+ sbbq %rdx, %r10
+ sbbq %rax, %r11
+ # Sub
+ subq (%rbp), %r12
+ movq $0x00, %rdx
+ sbbq 8(%rbp), %r13
+ movq $-19, %rcx
+ sbbq 16(%rbp), %r14
+ movq $0x7fffffffffffffff, %rax
+ sbbq 24(%rbp), %r15
+ sbbq $0x00, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Add modulus (if underflow)
+ addq %rcx, %r12
+ adcq %rdx, %r13
+ adcq %rdx, %r14
+ adcq %rax, %r15
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq %r12, (%rsi)
+ movq %r13, 8(%rsi)
+ movq %r14, 16(%rsi)
+ movq %r15, 24(%rsi)
+ movq 16(%rsp), %rbx
+ movq 168(%rsp), %rbp
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbp), %rdx
+ mulxq (%rdi), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rdi), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rdi), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rbp), %rdx
+ mulxq 8(%rdi), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rbp), %rdx
+ mulxq (%rdi), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rdi), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rbp), %rdx
+ mulxq 8(%rdi), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rdi), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rbp), %rdx
+ mulxq 8(%rdi), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rbp), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rdi), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rbp), %rdx
+ mulxq 16(%rdi), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rbp), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rdi), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rdi), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rbp), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rdi), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rbp), %rdx
+ mulxq 16(%rdi), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rbp), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rdi), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rbx)
+ movq %r9, 8(%rbx)
+ movq %r10, 16(%rbx)
+ movq %r11, 24(%rbx)
+ movq 176(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rdx
+ mulxq (%rsi), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rsi), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rsi), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rbx), %rdx
+ mulxq 8(%rsi), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rbx), %rdx
+ mulxq (%rsi), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rsi), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rbx), %rdx
+ mulxq 8(%rsi), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rsi), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rbx), %rdx
+ mulxq 8(%rsi), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rbx), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rsi), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rbx), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rbx), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rsi), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rsi), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rbx), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rsi), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rbx), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rbx), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rsi), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rsi)
+ movq %r9, 8(%rsi)
+ movq %r10, 16(%rsi)
+ movq %r11, 24(%rsi)
+ movq 24(%rsp), %rsi
+ movq 160(%rsp), %rbx
+ movq 144(%rsp), %rbp
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbp), %rdx
+ mulxq (%rbx), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rbx), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rbx), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rbp), %rdx
+ mulxq 8(%rbx), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rbp), %rdx
+ mulxq (%rbx), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rbx), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rbp), %rdx
+ mulxq 8(%rbx), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rbx), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rbp), %rdx
+ mulxq 8(%rbx), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rbp), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rbx), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rbp), %rdx
+ mulxq 16(%rbx), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rbp), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rbx), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rbx), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rbp), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rbx), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rbp), %rdx
+ mulxq 16(%rbx), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rbp), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rbx), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rsi)
+ movq %r9, 8(%rsi)
+ movq %r10, 16(%rsi)
+ movq %r11, 24(%rsi)
+ movq 136(%rsp), %rsi
+ movq 152(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rdx
+ mulxq (%rsi), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rsi), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rsi), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rbx), %rdx
+ mulxq 8(%rsi), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rbx), %rdx
+ mulxq (%rsi), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rsi), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rbx), %rdx
+ mulxq 8(%rsi), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rsi), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rbx), %rdx
+ mulxq 8(%rsi), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rbx), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rsi), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rbx), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rbx), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rsi), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rsi), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rbx), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rsi), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rbx), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rbx), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rsi), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ leaq 48(%rsp), %rsi
+ # Double
+ movq (%rdi), %r8
+ movq 8(%rdi), %r9
+ addq %r8, %r8
+ movq 16(%rdi), %r10
+ adcq %r9, %r9
+ movq 24(%rdi), %rdx
+ adcq %r10, %r10
+ movq $-19, %rcx
+ adcq %rdx, %rdx
+ movq $0x7fffffffffffffff, %rax
+ movq %rdx, %r11
+ sarq $63, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Sub modulus (if overflow)
+ subq %rcx, %r8
+ sbbq %rdx, %r9
+ sbbq %rdx, %r10
+ sbbq %rax, %r11
+ movq %r8, (%rsi)
+ movq %r9, 8(%rsi)
+ movq %r10, 16(%rsi)
+ movq %r11, 24(%rsi)
+ movq 8(%rsp), %rbx
+ movq 16(%rsp), %rbp
+ # Add
+ movq (%rbp), %r8
+ movq 8(%rbp), %r9
+ movq 16(%rbp), %r10
+ movq 24(%rbp), %rdx
+ movq %r8, %r12
+ addq (%rbx), %r8
+ movq %r9, %r13
+ adcq 8(%rbx), %r9
+ movq %r10, %r14
+ adcq 16(%rbx), %r10
+ movq %rdx, %r15
+ adcq 24(%rbx), %rdx
+ movq $-19, %rcx
+ movq %rdx, %r11
+ movq $0x7fffffffffffffff, %rax
+ sarq $63, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Sub modulus (if overflow)
+ subq %rcx, %r8
+ sbbq %rdx, %r9
+ sbbq %rdx, %r10
+ sbbq %rax, %r11
+ # Sub
+ subq (%rbx), %r12
+ movq $0x00, %rdx
+ sbbq 8(%rbx), %r13
+ movq $-19, %rcx
+ sbbq 16(%rbx), %r14
+ movq $0x7fffffffffffffff, %rax
+ sbbq 24(%rbx), %r15
+ sbbq $0x00, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Add modulus (if underflow)
+ addq %rcx, %r12
+ adcq %rdx, %r13
+ adcq %rdx, %r14
+ adcq %rax, %r15
+ movq %r8, (%rbx)
+ movq %r9, 8(%rbx)
+ movq %r10, 16(%rbx)
+ movq %r11, 24(%rbx)
+ movq %r12, (%rdi)
+ movq %r13, 8(%rdi)
+ movq %r14, 16(%rdi)
+ movq %r15, 24(%rdi)
+ movq 24(%rsp), %rdi
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %rdx
+ movq %r8, %r12
+ addq (%rdi), %r8
+ movq %r9, %r13
+ adcq 8(%rdi), %r9
+ movq %r10, %r14
+ adcq 16(%rdi), %r10
+ movq %rdx, %r15
+ adcq 24(%rdi), %rdx
+ movq $-19, %rcx
+ movq %rdx, %r11
+ movq $0x7fffffffffffffff, %rax
+ sarq $63, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Sub modulus (if overflow)
+ subq %rcx, %r8
+ sbbq %rdx, %r9
+ sbbq %rdx, %r10
+ sbbq %rax, %r11
+ # Sub
+ subq (%rdi), %r12
+ movq $0x00, %rdx
+ sbbq 8(%rdi), %r13
+ movq $-19, %rcx
+ sbbq 16(%rdi), %r14
+ movq $0x7fffffffffffffff, %rax
+ sbbq 24(%rdi), %r15
+ sbbq $0x00, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Add modulus (if underflow)
+ addq %rcx, %r12
+ adcq %rdx, %r13
+ adcq %rdx, %r14
+ adcq %rax, %r15
+ movq %r8, (%rbp)
+ movq %r9, 8(%rbp)
+ movq %r10, 16(%rbp)
+ movq %r11, 24(%rbp)
+ movq %r12, (%rdi)
+ movq %r13, 8(%rdi)
+ movq %r14, 16(%rdi)
+ movq %r15, 24(%rdi)
+ addq $0x50, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size fe_ge_add_avx2,.-fe_ge_add_avx2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl fe_ge_sub_avx2
+.type fe_ge_sub_avx2,@function
+.align 4
+fe_ge_sub_avx2:
+#else
+.section __TEXT,__text
+.globl _fe_ge_sub_avx2
+.p2align 2
+_fe_ge_sub_avx2:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $0x50, %rsp
+ movq %rdi, (%rsp)
+ movq %rsi, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rcx, 24(%rsp)
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq 8(%rsp), %rsi
+ movq 40(%rsp), %rbx
+ movq 32(%rsp), %rbp
+ # Add
+ movq (%rbx), %r8
+ movq 8(%rbx), %r9
+ movq 16(%rbx), %r10
+ movq 24(%rbx), %rdx
+ movq %r8, %r12
+ addq (%rbp), %r8
+ movq %r9, %r13
+ adcq 8(%rbp), %r9
+ movq %r10, %r14
+ adcq 16(%rbp), %r10
+ movq %rdx, %r15
+ adcq 24(%rbp), %rdx
+ movq $-19, %rcx
+ movq %rdx, %r11
+ movq $0x7fffffffffffffff, %rax
+ sarq $63, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Sub modulus (if overflow)
+ subq %rcx, %r8
+ sbbq %rdx, %r9
+ sbbq %rdx, %r10
+ sbbq %rax, %r11
+ # Sub
+ subq (%rbp), %r12
+ movq $0x00, %rdx
+ sbbq 8(%rbp), %r13
+ movq $-19, %rcx
+ sbbq 16(%rbp), %r14
+ movq $0x7fffffffffffffff, %rax
+ sbbq 24(%rbp), %r15
+ sbbq $0x00, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Add modulus (if underflow)
+ addq %rcx, %r12
+ adcq %rdx, %r13
+ adcq %rdx, %r14
+ adcq %rax, %r15
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq %r12, (%rsi)
+ movq %r13, 8(%rsi)
+ movq %r14, 16(%rsi)
+ movq %r15, 24(%rsi)
+ movq 16(%rsp), %rbx
+ movq 176(%rsp), %rbp
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbp), %rdx
+ mulxq (%rdi), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rdi), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rdi), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rbp), %rdx
+ mulxq 8(%rdi), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rbp), %rdx
+ mulxq (%rdi), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rdi), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rbp), %rdx
+ mulxq 8(%rdi), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rdi), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rbp), %rdx
+ mulxq 8(%rdi), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rbp), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rdi), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rbp), %rdx
+ mulxq 16(%rdi), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rbp), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rdi), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rdi), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rbp), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rdi), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rbp), %rdx
+ mulxq 16(%rdi), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rbp), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rdi), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rbx)
+ movq %r9, 8(%rbx)
+ movq %r10, 16(%rbx)
+ movq %r11, 24(%rbx)
+ movq 168(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rdx
+ mulxq (%rsi), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rsi), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rsi), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rbx), %rdx
+ mulxq 8(%rsi), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rbx), %rdx
+ mulxq (%rsi), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rsi), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rbx), %rdx
+ mulxq 8(%rsi), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rsi), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rbx), %rdx
+ mulxq 8(%rsi), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rbx), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rsi), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rbx), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rbx), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rsi), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rsi), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rbx), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rsi), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rbx), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rbx), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rsi), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rsi)
+ movq %r9, 8(%rsi)
+ movq %r10, 16(%rsi)
+ movq %r11, 24(%rsi)
+ movq 24(%rsp), %rsi
+ movq 160(%rsp), %rbx
+ movq 144(%rsp), %rbp
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbp), %rdx
+ mulxq (%rbx), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rbx), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rbx), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rbp), %rdx
+ mulxq 8(%rbx), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rbp), %rdx
+ mulxq (%rbx), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rbx), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rbp), %rdx
+ mulxq 8(%rbx), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rbx), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rbp), %rdx
+ mulxq 8(%rbx), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rbp), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rbx), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rbp), %rdx
+ mulxq 16(%rbx), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rbp), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rbx), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rbx), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rbp), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rbx), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rbp), %rdx
+ mulxq 16(%rbx), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rbp), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rbx), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rsi)
+ movq %r9, 8(%rsi)
+ movq %r10, 16(%rsi)
+ movq %r11, 24(%rsi)
+ movq 136(%rsp), %rsi
+ movq 152(%rsp), %rbx
+ # Multiply
+ # A[0] * B[0]
+ movq (%rbx), %rdx
+ mulxq (%rsi), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rsi), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rsi), %rcx, %rax
+ xorq %r15, %r15
+ adcxq %rcx, %r9
+ # A[1] * B[3]
+ movq 24(%rbx), %rdx
+ mulxq 8(%rsi), %r12, %r13
+ adcxq %rax, %r10
+ # A[0] * B[1]
+ movq 8(%rbx), %rdx
+ mulxq (%rsi), %rcx, %rax
+ adoxq %rcx, %r9
+ # A[2] * B[1]
+ mulxq 16(%rsi), %rcx, %r14
+ adoxq %rax, %r10
+ adcxq %rcx, %r11
+ # A[1] * B[2]
+ movq 16(%rbx), %rdx
+ mulxq 8(%rsi), %rcx, %rax
+ adcxq %r14, %r12
+ adoxq %rcx, %r11
+ adcxq %r15, %r13
+ adoxq %rax, %r12
+ # A[0] * B[2]
+ mulxq (%rsi), %rcx, %rax
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rcx, %r10
+ # A[1] * B[1]
+ movq 8(%rbx), %rdx
+ mulxq 8(%rsi), %rdx, %rcx
+ adcxq %rax, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rbx), %rdx
+ adoxq %rcx, %r11
+ mulxq 24(%rsi), %rcx, %rax
+ adcxq %rcx, %r12
+ # A[2] * B[2]
+ movq 16(%rbx), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rax, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rbx), %rdx
+ adoxq %rcx, %r13
+ mulxq 24(%rsi), %rcx, %rax
+ adoxq %r15, %r14
+ adcxq %rcx, %r14
+ # A[0] * B[3]
+ mulxq (%rsi), %rdx, %rcx
+ adcxq %rax, %r15
+ xorq %rax, %rax
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq (%rbx), %rdx
+ adcxq %rcx, %r12
+ mulxq 24(%rsi), %rdx, %rcx
+ adoxq %rdx, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[3]
+ movq 24(%rbx), %rdx
+ mulxq 16(%rsi), %rdx, %rcx
+ adcxq %rdx, %r13
+ # A[3] * B[2]
+ movq 16(%rbx), %rdx
+ adcxq %rcx, %r14
+ mulxq 24(%rsi), %rcx, %rdx
+ adcxq %rax, %r15
+ adoxq %rcx, %r13
+ adoxq %rdx, %r14
+ adoxq %rax, %r15
+ # Reduce
+ movq $0x7fffffffffffffff, %rax
+ # Move top half into t4-t7 and remove top bit from t3
+ shldq $0x01, %r14, %r15
+ shldq $0x01, %r13, %r14
+ shldq $0x01, %r12, %r13
+ shldq $0x01, %r11, %r12
+ andq %rax, %r11
+ # Multiply top half by 19
+ movq $19, %rdx
+ xorq %rax, %rax
+ mulxq %r12, %rcx, %r12
+ adcxq %rcx, %r8
+ adoxq %r12, %r9
+ mulxq %r13, %rcx, %r13
+ adcxq %rcx, %r9
+ adoxq %r13, %r10
+ mulxq %r14, %rcx, %r14
+ adcxq %rcx, %r10
+ adoxq %r14, %r11
+ mulxq %r15, %r15, %rdx
+ adcxq %r15, %r11
+ adoxq %rax, %rdx
+ adcxq %rax, %rdx
+ # Overflow
+ shldq $0x01, %r11, %rdx
+ movq $0x7fffffffffffffff, %rax
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Reduce if top bit set
+ movq %r11, %rdx
+ shrq $63, %rdx
+ imulq $19, %rdx, %rcx
+ andq %rax, %r11
+ addq %rcx, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ adcq $0x00, %r11
+ # Store
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ leaq 48(%rsp), %rsi
+ # Double
+ movq (%rdi), %r8
+ movq 8(%rdi), %r9
+ addq %r8, %r8
+ movq 16(%rdi), %r10
+ adcq %r9, %r9
+ movq 24(%rdi), %rdx
+ adcq %r10, %r10
+ movq $-19, %rcx
+ adcq %rdx, %rdx
+ movq $0x7fffffffffffffff, %rax
+ movq %rdx, %r11
+ sarq $63, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Sub modulus (if overflow)
+ subq %rcx, %r8
+ sbbq %rdx, %r9
+ sbbq %rdx, %r10
+ sbbq %rax, %r11
+ movq %r8, (%rsi)
+ movq %r9, 8(%rsi)
+ movq %r10, 16(%rsi)
+ movq %r11, 24(%rsi)
+ movq 8(%rsp), %rbx
+ movq 16(%rsp), %rbp
+ # Add
+ movq (%rbp), %r8
+ movq 8(%rbp), %r9
+ movq 16(%rbp), %r10
+ movq 24(%rbp), %rdx
+ movq %r8, %r12
+ addq (%rbx), %r8
+ movq %r9, %r13
+ adcq 8(%rbx), %r9
+ movq %r10, %r14
+ adcq 16(%rbx), %r10
+ movq %rdx, %r15
+ adcq 24(%rbx), %rdx
+ movq $-19, %rcx
+ movq %rdx, %r11
+ movq $0x7fffffffffffffff, %rax
+ sarq $63, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Sub modulus (if overflow)
+ subq %rcx, %r8
+ sbbq %rdx, %r9
+ sbbq %rdx, %r10
+ sbbq %rax, %r11
+ # Sub
+ subq (%rbx), %r12
+ movq $0x00, %rdx
+ sbbq 8(%rbx), %r13
+ movq $-19, %rcx
+ sbbq 16(%rbx), %r14
+ movq $0x7fffffffffffffff, %rax
+ sbbq 24(%rbx), %r15
+ sbbq $0x00, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Add modulus (if underflow)
+ addq %rcx, %r12
+ adcq %rdx, %r13
+ adcq %rdx, %r14
+ adcq %rax, %r15
+ movq %r8, (%rbx)
+ movq %r9, 8(%rbx)
+ movq %r10, 16(%rbx)
+ movq %r11, 24(%rbx)
+ movq %r12, (%rdi)
+ movq %r13, 8(%rdi)
+ movq %r14, 16(%rdi)
+ movq %r15, 24(%rdi)
+ movq 24(%rsp), %rdi
+ # Add
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %rdx
+ movq %r8, %r12
+ addq (%rdi), %r8
+ movq %r9, %r13
+ adcq 8(%rdi), %r9
+ movq %r10, %r14
+ adcq 16(%rdi), %r10
+ movq %rdx, %r15
+ adcq 24(%rdi), %rdx
+ movq $-19, %rcx
+ movq %rdx, %r11
+ movq $0x7fffffffffffffff, %rax
+ sarq $63, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Sub modulus (if overflow)
+ subq %rcx, %r8
+ sbbq %rdx, %r9
+ sbbq %rdx, %r10
+ sbbq %rax, %r11
+ # Sub
+ subq (%rdi), %r12
+ movq $0x00, %rdx
+ sbbq 8(%rdi), %r13
+ movq $-19, %rcx
+ sbbq 16(%rdi), %r14
+ movq $0x7fffffffffffffff, %rax
+ sbbq 24(%rdi), %r15
+ sbbq $0x00, %rdx
+ # Mask the modulus
+ andq %rdx, %rcx
+ andq %rdx, %rax
+ # Add modulus (if underflow)
+ addq %rcx, %r12
+ adcq %rdx, %r13
+ adcq %rdx, %r14
+ adcq %rax, %r15
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq %r12, (%rbp)
+ movq %r13, 8(%rbp)
+ movq %r14, 16(%rbp)
+ movq %r15, 24(%rbp)
+ addq $0x50, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size fe_ge_sub_avx2,.-fe_ge_sub_avx2
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fips.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fips.c
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fips.c
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fips_test.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fips_test.c
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fips_test.c
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mont_small.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mont_small.i
index c4a339b9b..380b0a25b 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mont_small.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mont_small.i
@@ -1,8 +1,8 @@
/* fp_mont_small.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,21 +16,34 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_SMALL_MONT_SET
/* computes x/R == x (mod N) via Montgomery Reduction */
-void fp_montgomery_reduce_small(fp_int *a, fp_int *m, fp_digit mp)
+int fp_montgomery_reduce_small(fp_int *a, fp_int *m, fp_digit mp)
{
- fp_digit c[FP_SIZE], *_c, *tmpm, mu, cy;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit c[FP_SIZE];
+#else
+ fp_digit *c;
+#endif
+ fp_digit *_c, *tmpm, mu, cy;
int oldused, x, y, pa;
-#if defined(USE_MEMSET)
- /* now zero the buff */
- memset(c, 0, sizeof c);
+#ifdef WOLFSSL_SMALL_STACK
+ /* only allocate space for what's needed for window plus res */
+ c = (fp_digit*)XMALLOC(sizeof(fp_digit)*FP_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (c == NULL) {
+ return FP_MEM;
+ }
#endif
+
+ /* now zero the buff */
+ XMEMSET(c, 0, sizeof(fp_digit)*(FP_SIZE));
+
pa = m->used;
/* copy the input */
@@ -38,11 +51,7 @@ void fp_montgomery_reduce_small(fp_int *a, fp_int *m, fp_digit mp)
for (x = 0; x < oldused; x++) {
c[x] = a->dp[x];
}
-#if !defined(USE_MEMSET)
- for (; x < 2*pa+3; x++) {
- c[x] = 0;
- }
-#endif
+
MONT_START;
switch (pa) {
@@ -3855,6 +3864,11 @@ void fp_montgomery_reduce_small(fp_int *a, fp_int *m, fp_digit mp)
if (fp_cmp_mag (a, m) != FP_LT) {
s_fp_sub (a, m, a);
}
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(c, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_12.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_12.i
index b25ce5c4a..0f0683d74 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_12.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_12.i
@@ -1,8 +1,8 @@
/* fp_mul_comba_12.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_MUL12
-void fp_mul_comba12(fp_int *A, fp_int *B, fp_int *C)
+int fp_mul_comba12(fp_int *A, fp_int *B, fp_int *C)
{
- fp_digit c0, c1, c2, at[24];
+ fp_digit c0, c1, c2;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit at[24];
+#else
+ fp_digit *at;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 24, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (at == NULL)
+ return FP_MEM;
+#endif
- memcpy(at, A->dp, 12 * sizeof(fp_digit));
- memcpy(at+12, B->dp, 12 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 12 * sizeof(fp_digit));
+ XMEMCPY(at+12, B->dp, 12 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -126,5 +138,10 @@ void fp_mul_comba12(fp_int *A, fp_int *B, fp_int *C)
C->sign = A->sign ^ B->sign;
fp_clamp(C);
COMBA_FINI;
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_17.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_17.i
index fe12f0602..fb3205515 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_17.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_17.i
@@ -1,8 +1,8 @@
/* fp_mul_comba_17.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_MUL17
-void fp_mul_comba17(fp_int *A, fp_int *B, fp_int *C)
+int fp_mul_comba17(fp_int *A, fp_int *B, fp_int *C)
{
- fp_digit c0, c1, c2, at[34];
+ fp_digit c0, c1, c2;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit at[34];
+#else
+ fp_digit *at;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 34, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (at == NULL)
+ return FP_MEM;
+#endif
- memcpy(at, A->dp, 17 * sizeof(fp_digit));
- memcpy(at+17, B->dp, 17 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 17 * sizeof(fp_digit));
+ XMEMCPY(at+17, B->dp, 17 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -166,5 +178,10 @@ void fp_mul_comba17(fp_int *A, fp_int *B, fp_int *C)
C->sign = A->sign ^ B->sign;
fp_clamp(C);
COMBA_FINI;
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_20.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_20.i
index cd07e5dfd..372f51f41 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_20.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_20.i
@@ -1,8 +1,8 @@
/* fp_mul_comba_20.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,16 +16,28 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_MUL20
-void fp_mul_comba20(fp_int *A, fp_int *B, fp_int *C)
+int fp_mul_comba20(fp_int *A, fp_int *B, fp_int *C)
{
- fp_digit c0, c1, c2, at[40];
-
- memcpy(at, A->dp, 20 * sizeof(fp_digit));
- memcpy(at+20, B->dp, 20 * sizeof(fp_digit));
+ fp_digit c0, c1, c2;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit at[40];
+#else
+ fp_digit *at;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 40, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (at == NULL)
+ return FP_MEM;
+#endif
+
+ XMEMCPY(at, A->dp, 20 * sizeof(fp_digit));
+ XMEMCPY(at+20, B->dp, 20 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -189,5 +201,10 @@ void fp_mul_comba20(fp_int *A, fp_int *B, fp_int *C)
C->sign = A->sign ^ B->sign;
fp_clamp(C);
COMBA_FINI;
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_24.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_24.i
index 2576d27aa..17705f7df 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_24.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_24.i
@@ -1,8 +1,8 @@
/* fp_mul_comba_24.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_MUL24
-void fp_mul_comba24(fp_int *A, fp_int *B, fp_int *C)
+int fp_mul_comba24(fp_int *A, fp_int *B, fp_int *C)
{
- fp_digit c0, c1, c2, at[48];
+ fp_digit c0, c1, c2;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit at[48];
+#else
+ fp_digit *at;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 48, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (at == NULL)
+ return FP_MEM;
+#endif
- memcpy(at, A->dp, 24 * sizeof(fp_digit));
- memcpy(at+24, B->dp, 24 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 24 * sizeof(fp_digit));
+ XMEMCPY(at+24, B->dp, 24 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -222,5 +234,10 @@ void fp_mul_comba24(fp_int *A, fp_int *B, fp_int *C)
C->sign = A->sign ^ B->sign;
fp_clamp(C);
COMBA_FINI;
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_28.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_28.i
index 822dd14c7..594db74ef 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_28.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_28.i
@@ -1,8 +1,8 @@
/* fp_mul_comba_28.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_MUL28
-void fp_mul_comba28(fp_int *A, fp_int *B, fp_int *C)
+int fp_mul_comba28(fp_int *A, fp_int *B, fp_int *C)
{
- fp_digit c0, c1, c2, at[56];
+ fp_digit c0, c1, c2;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit at[56];
+#else
+ fp_digit *at;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 56, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (at == NULL)
+ return FP_MEM;
+#endif
- memcpy(at, A->dp, 28 * sizeof(fp_digit));
- memcpy(at+28, B->dp, 28 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 28 * sizeof(fp_digit));
+ XMEMCPY(at+28, B->dp, 28 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -254,5 +266,10 @@ void fp_mul_comba28(fp_int *A, fp_int *B, fp_int *C)
C->sign = A->sign ^ B->sign;
fp_clamp(C);
COMBA_FINI;
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_3.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_3.i
index 440291e38..0befff860 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_3.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_3.i
@@ -1,8 +1,8 @@
/* fp_mul_comba_3.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,18 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_MUL3
-void fp_mul_comba3(fp_int *A, fp_int *B, fp_int *C)
+int fp_mul_comba3(fp_int *A, fp_int *B, fp_int *C)
{
fp_digit c0, c1, c2, at[6];
- memcpy(at, A->dp, 3 * sizeof(fp_digit));
- memcpy(at+3, B->dp, 3 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 3 * sizeof(fp_digit));
+ XMEMCPY(at+3, B->dp, 3 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -54,5 +55,7 @@ void fp_mul_comba3(fp_int *A, fp_int *B, fp_int *C)
C->sign = A->sign ^ B->sign;
fp_clamp(C);
COMBA_FINI;
+
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_32.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_32.i
index 905028d17..97dc076be 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_32.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_32.i
@@ -1,8 +1,8 @@
/* fp_mul_comba_32.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,19 +16,31 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_MUL32
-void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C)
+int fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C)
{
- fp_digit c0, c1, c2, at[64];
int out_size;
+ fp_digit c0, c1, c2;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit at[64];
+#else
+ fp_digit *at;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 64, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (at == NULL)
+ return FP_MEM;
+#endif
out_size = A->used + B->used;
- memcpy(at, A->dp, 32 * sizeof(fp_digit));
- memcpy(at+32, B->dp, 32 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 32 * sizeof(fp_digit));
+ XMEMCPY(at+32, B->dp, 32 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -189,7 +201,7 @@ void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C)
COMBA_STORE(C->dp[38]);
/* early out at 40 digits, 40*32==1280, or two 640 bit operands */
- if (out_size <= 40) { COMBA_STORE2(C->dp[39]); C->used = 40; C->sign = A->sign ^ B->sign; fp_clamp(C); COMBA_FINI; return; }
+ if (out_size <= 40) { COMBA_STORE2(C->dp[39]); C->used = 40; C->sign = A->sign ^ B->sign; fp_clamp(C); COMBA_FINI; return FP_OKAY; }
/* 39 */
COMBA_FORWARD;
@@ -225,7 +237,7 @@ void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C)
COMBA_STORE(C->dp[46]);
/* early out at 48 digits, 48*32==1536, or two 768 bit operands */
- if (out_size <= 48) { COMBA_STORE2(C->dp[47]); C->used = 48; C->sign = A->sign ^ B->sign; fp_clamp(C); COMBA_FINI; return; }
+ if (out_size <= 48) { COMBA_STORE2(C->dp[47]); C->used = 48; C->sign = A->sign ^ B->sign; fp_clamp(C); COMBA_FINI; return FP_OKAY; }
/* 47 */
COMBA_FORWARD;
@@ -261,7 +273,7 @@ void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C)
COMBA_STORE(C->dp[54]);
/* early out at 56 digits, 56*32==1792, or two 896 bit operands */
- if (out_size <= 56) { COMBA_STORE2(C->dp[55]); C->used = 56; C->sign = A->sign ^ B->sign; fp_clamp(C); COMBA_FINI; return; }
+ if (out_size <= 56) { COMBA_STORE2(C->dp[55]); C->used = 56; C->sign = A->sign ^ B->sign; fp_clamp(C); COMBA_FINI; return FP_OKAY; }
/* 55 */
COMBA_FORWARD;
@@ -300,5 +312,10 @@ void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C)
C->sign = A->sign ^ B->sign;
fp_clamp(C);
COMBA_FINI;
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_4.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_4.i
index e981eb1f0..803c6151a 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_4.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_4.i
@@ -1,8 +1,8 @@
/* fp_mul_comba_4.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_MUL4
-void fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C)
+int fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C)
{
- fp_digit c0, c1, c2, at[8];
+ fp_digit c0, c1, c2;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit at[8];
+#else
+ fp_digit *at;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 8, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (at == NULL)
+ return FP_MEM;
+#endif
- memcpy(at, A->dp, 4 * sizeof(fp_digit));
- memcpy(at+4, B->dp, 4 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 4 * sizeof(fp_digit));
+ XMEMCPY(at+4, B->dp, 4 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -62,5 +74,10 @@ void fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C)
C->sign = A->sign ^ B->sign;
fp_clamp(C);
COMBA_FINI;
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_48.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_48.i
index 79e43b8d0..0d1533458 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_48.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_48.i
@@ -1,8 +1,8 @@
/* fp_mul_comba_48.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_MUL48
-void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C)
+int fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C)
{
- fp_digit c0, c1, c2, at[96];
+ fp_digit c0, c1, c2;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit at[96];
+#else
+ fp_digit *at;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 96, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (at == NULL)
+ return FP_MEM;
+#endif
- memcpy(at, A->dp, 48 * sizeof(fp_digit));
- memcpy(at+48, B->dp, 48 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 48 * sizeof(fp_digit));
+ XMEMCPY(at+48, B->dp, 48 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -414,5 +426,10 @@ void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C)
C->sign = A->sign ^ B->sign;
fp_clamp(C);
COMBA_FINI;
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_6.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_6.i
index 165c270b7..815badcb8 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_6.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_6.i
@@ -1,8 +1,8 @@
/* fp_mul_comba_6.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_MUL6
-void fp_mul_comba6(fp_int *A, fp_int *B, fp_int *C)
+int fp_mul_comba6(fp_int *A, fp_int *B, fp_int *C)
{
- fp_digit c0, c1, c2, at[12];
+ fp_digit c0, c1, c2;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit at[12];
+#else
+ fp_digit *at;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 12, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (at == NULL)
+ return FP_MEM;
+#endif
- memcpy(at, A->dp, 6 * sizeof(fp_digit));
- memcpy(at+6, B->dp, 6 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 6 * sizeof(fp_digit));
+ XMEMCPY(at+6, B->dp, 6 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -78,5 +90,10 @@ void fp_mul_comba6(fp_int *A, fp_int *B, fp_int *C)
C->sign = A->sign ^ B->sign;
fp_clamp(C);
COMBA_FINI;
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_64.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_64.i
index 76d7c2114..7080fa2a3 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_64.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_64.i
@@ -1,8 +1,8 @@
/* fp_mul_comba_64.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_MUL64
-void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C)
+int fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C)
{
- fp_digit c0, c1, c2, at[128];
+ fp_digit c0, c1, c2;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit at[128];
+#else
+ fp_digit *at;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 128, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (at == NULL)
+ return FP_MEM;
+#endif
- memcpy(at, A->dp, 64 * sizeof(fp_digit));
- memcpy(at+64, B->dp, 64 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 64 * sizeof(fp_digit));
+ XMEMCPY(at+64, B->dp, 64 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -542,5 +554,10 @@ void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C)
C->sign = A->sign ^ B->sign;
fp_clamp(C);
COMBA_FINI;
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_7.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_7.i
index eed886315..b969a9a3b 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_7.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_7.i
@@ -1,8 +1,8 @@
/* fp_mul_comba_7.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_MUL7
-void fp_mul_comba7(fp_int *A, fp_int *B, fp_int *C)
+int fp_mul_comba7(fp_int *A, fp_int *B, fp_int *C)
{
- fp_digit c0, c1, c2, at[14];
+ fp_digit c0, c1, c2;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit at[14];
+#else
+ fp_digit *at;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 14, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (at == NULL)
+ return FP_MEM;
+#endif
- memcpy(at, A->dp, 7 * sizeof(fp_digit));
- memcpy(at+7, B->dp, 7 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 7 * sizeof(fp_digit));
+ XMEMCPY(at+7, B->dp, 7 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -86,5 +98,10 @@ void fp_mul_comba7(fp_int *A, fp_int *B, fp_int *C)
C->sign = A->sign ^ B->sign;
fp_clamp(C);
COMBA_FINI;
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_8.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_8.i
index fa578a839..1d61a7781 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_8.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_8.i
@@ -1,8 +1,8 @@
/* fp_mul_comba_8.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_MUL8
-void fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C)
+int fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C)
{
- fp_digit c0, c1, c2, at[16];
+ fp_digit c0, c1, c2;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit at[16];
+#else
+ fp_digit *at;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (at == NULL)
+ return FP_MEM;
+#endif
- memcpy(at, A->dp, 8 * sizeof(fp_digit));
- memcpy(at+8, B->dp, 8 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 8 * sizeof(fp_digit));
+ XMEMCPY(at+8, B->dp, 8 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -94,5 +106,10 @@ void fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C)
C->sign = A->sign ^ B->sign;
fp_clamp(C);
COMBA_FINI;
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_9.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_9.i
index 755067f86..0eedd7597 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_9.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_9.i
@@ -1,8 +1,8 @@
/* fp_mul_comba_9.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_MUL9
-void fp_mul_comba9(fp_int *A, fp_int *B, fp_int *C)
+int fp_mul_comba9(fp_int *A, fp_int *B, fp_int *C)
{
- fp_digit c0, c1, c2, at[18];
+ fp_digit c0, c1, c2;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit at[18];
+#else
+ fp_digit *at;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 18, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (at == NULL)
+ return FP_MEM;
+#endif
- memcpy(at, A->dp, 9 * sizeof(fp_digit));
- memcpy(at+9, B->dp, 9 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 9 * sizeof(fp_digit));
+ XMEMCPY(at+9, B->dp, 9 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -102,5 +114,10 @@ void fp_mul_comba9(fp_int *A, fp_int *B, fp_int *C)
C->sign = A->sign ^ B->sign;
fp_clamp(C);
COMBA_FINI;
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_small_set.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_small_set.i
index deea5932c..62ab909cf 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_small_set.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_mul_comba_small_set.i
@@ -1,8 +1,8 @@
/* fp_mul_comba_small_set.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,19 +16,32 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#if defined(TFM_SMALL_SET)
-void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
+int fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
{
- fp_digit c0, c1, c2, at[32];
+ fp_digit c0, c1, c2;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit at[32];
+#else
+ fp_digit *at;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ at = (fp_digit*)XMALLOC(sizeof(fp_digit) * 32, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (at == NULL)
+ return FP_MEM;
+#endif
+
switch (MAX(A->used, B->used)) {
case 1:
- memcpy(at, A->dp, 1 * sizeof(fp_digit));
- memcpy(at+1, B->dp, 1 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 1 * sizeof(fp_digit));
+ XMEMCPY(at+1, B->dp, 1 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -43,8 +56,8 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
break;
case 2:
- memcpy(at, A->dp, 2 * sizeof(fp_digit));
- memcpy(at+2, B->dp, 2 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 2 * sizeof(fp_digit));
+ XMEMCPY(at+2, B->dp, 2 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -67,8 +80,8 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
break;
case 3:
- memcpy(at, A->dp, 3 * sizeof(fp_digit));
- memcpy(at+3, B->dp, 3 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 3 * sizeof(fp_digit));
+ XMEMCPY(at+3, B->dp, 3 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -99,8 +112,8 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
break;
case 4:
- memcpy(at, A->dp, 4 * sizeof(fp_digit));
- memcpy(at+4, B->dp, 4 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 4 * sizeof(fp_digit));
+ XMEMCPY(at+4, B->dp, 4 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -139,8 +152,8 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
break;
case 5:
- memcpy(at, A->dp, 5 * sizeof(fp_digit));
- memcpy(at+5, B->dp, 5 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 5 * sizeof(fp_digit));
+ XMEMCPY(at+5, B->dp, 5 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -187,8 +200,8 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
break;
case 6:
- memcpy(at, A->dp, 6 * sizeof(fp_digit));
- memcpy(at+6, B->dp, 6 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 6 * sizeof(fp_digit));
+ XMEMCPY(at+6, B->dp, 6 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -243,8 +256,8 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
break;
case 7:
- memcpy(at, A->dp, 7 * sizeof(fp_digit));
- memcpy(at+7, B->dp, 7 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 7 * sizeof(fp_digit));
+ XMEMCPY(at+7, B->dp, 7 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -307,8 +320,8 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
break;
case 8:
- memcpy(at, A->dp, 8 * sizeof(fp_digit));
- memcpy(at+8, B->dp, 8 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 8 * sizeof(fp_digit));
+ XMEMCPY(at+8, B->dp, 8 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -379,8 +392,8 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
break;
case 9:
- memcpy(at, A->dp, 9 * sizeof(fp_digit));
- memcpy(at+9, B->dp, 9 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 9 * sizeof(fp_digit));
+ XMEMCPY(at+9, B->dp, 9 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -459,8 +472,8 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
break;
case 10:
- memcpy(at, A->dp, 10 * sizeof(fp_digit));
- memcpy(at+10, B->dp, 10 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 10 * sizeof(fp_digit));
+ XMEMCPY(at+10, B->dp, 10 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -547,8 +560,8 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
break;
case 11:
- memcpy(at, A->dp, 11 * sizeof(fp_digit));
- memcpy(at+11, B->dp, 11 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 11 * sizeof(fp_digit));
+ XMEMCPY(at+11, B->dp, 11 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -643,8 +656,8 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
break;
case 12:
- memcpy(at, A->dp, 12 * sizeof(fp_digit));
- memcpy(at+12, B->dp, 12 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 12 * sizeof(fp_digit));
+ XMEMCPY(at+12, B->dp, 12 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -747,8 +760,8 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
break;
case 13:
- memcpy(at, A->dp, 13 * sizeof(fp_digit));
- memcpy(at+13, B->dp, 13 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 13 * sizeof(fp_digit));
+ XMEMCPY(at+13, B->dp, 13 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -859,8 +872,8 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
break;
case 14:
- memcpy(at, A->dp, 14 * sizeof(fp_digit));
- memcpy(at+14, B->dp, 14 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 14 * sizeof(fp_digit));
+ XMEMCPY(at+14, B->dp, 14 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -979,8 +992,8 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
break;
case 15:
- memcpy(at, A->dp, 15 * sizeof(fp_digit));
- memcpy(at+15, B->dp, 15 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 15 * sizeof(fp_digit));
+ XMEMCPY(at+15, B->dp, 15 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -1107,8 +1120,8 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
break;
case 16:
- memcpy(at, A->dp, 16 * sizeof(fp_digit));
- memcpy(at+16, B->dp, 16 * sizeof(fp_digit));
+ XMEMCPY(at, A->dp, 16 * sizeof(fp_digit));
+ XMEMCPY(at+16, B->dp, 16 * sizeof(fp_digit));
COMBA_START;
COMBA_CLEAR;
@@ -1245,6 +1258,11 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
default:
break;
}
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(at, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_12.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_12.i
index 078b8986d..cded4b123 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_12.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_12.i
@@ -1,8 +1,8 @@
/* fp_sqr_comba_12.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,30 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_SQR12
-void fp_sqr_comba12(fp_int *A, fp_int *B)
+int fp_sqr_comba12(fp_int *A, fp_int *B)
{
- fp_digit *a, b[24], c0, c1, c2, sc0, sc1, sc2;
+ fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0;
#ifdef TFM_ISO
fp_word tt;
#endif
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit b[24];
+#else
+ fp_digit *b;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 24, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (b == NULL)
+ return FP_MEM;
+#endif
+
a = A->dp;
COMBA_START;
@@ -151,8 +164,13 @@ void fp_sqr_comba12(fp_int *A, fp_int *B)
B->used = 24;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 24 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 24 * sizeof(fp_digit));
fp_clamp(B);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_17.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_17.i
index d5f4674fb..d2418d931 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_17.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_17.i
@@ -1,8 +1,8 @@
/* fp_sqr_comba_17.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_SQR17
-void fp_sqr_comba17(fp_int *A, fp_int *B)
+int fp_sqr_comba17(fp_int *A, fp_int *B)
{
- fp_digit *a, b[34], c0, c1, c2, sc0, sc1, sc2;
+ fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0;
#ifdef TFM_ISO
fp_word tt;
#endif
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit b[34];
+#else
+ fp_digit *b;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 34, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (b == NULL)
+ return FP_MEM;
+#endif
a = A->dp;
COMBA_START;
@@ -202,8 +214,13 @@ void fp_sqr_comba17(fp_int *A, fp_int *B)
B->used = 34;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 34 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 34 * sizeof(fp_digit));
fp_clamp(B);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_20.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_20.i
index dcd9f318f..78fd3fd96 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_20.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_20.i
@@ -1,8 +1,8 @@
/* fp_sqr_comba_20.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_SQR20
-void fp_sqr_comba20(fp_int *A, fp_int *B)
+int fp_sqr_comba20(fp_int *A, fp_int *B)
{
- fp_digit *a, b[40], c0, c1, c2, sc0, sc1, sc2;
+ fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0;
#ifdef TFM_ISO
- fp_word tt;
-#endif
+ fp_word tt;
+#endif
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit b[40];
+#else
+ fp_digit *b;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 40, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (b == NULL)
+ return FP_MEM;
+#endif
a = A->dp;
COMBA_START;
@@ -232,8 +244,13 @@ void fp_sqr_comba20(fp_int *A, fp_int *B)
B->used = 40;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 40 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 40 * sizeof(fp_digit));
fp_clamp(B);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_24.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_24.i
index cf512e3c3..602b36c09 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_24.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_24.i
@@ -1,8 +1,8 @@
/* fp_sqr_comba_24.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_SQR24
-void fp_sqr_comba24(fp_int *A, fp_int *B)
+int fp_sqr_comba24(fp_int *A, fp_int *B)
{
- fp_digit *a, b[48], c0, c1, c2, sc0, sc1, sc2;
+ fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0;
#ifdef TFM_ISO
- fp_word tt;
-#endif
+ fp_word tt;
+#endif
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit b[48];
+#else
+ fp_digit *b;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 48, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (b == NULL)
+ return FP_MEM;
+#endif
a = A->dp;
COMBA_START;
@@ -272,8 +284,13 @@ void fp_sqr_comba24(fp_int *A, fp_int *B)
B->used = 48;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 48 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 48 * sizeof(fp_digit));
fp_clamp(B);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_28.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_28.i
index 08e9bc4d5..57c1acc30 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_28.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_28.i
@@ -1,8 +1,8 @@
/* fp_sqr_comba_28.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_SQR28
-void fp_sqr_comba28(fp_int *A, fp_int *B)
+int fp_sqr_comba28(fp_int *A, fp_int *B)
{
- fp_digit *a, b[56], c0, c1, c2, sc0, sc1, sc2;
+ fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0;
#ifdef TFM_ISO
- fp_word tt;
-#endif
+ fp_word tt;
+#endif
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit b[56];
+#else
+ fp_digit *b;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 56, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (b == NULL)
+ return FP_MEM;
+#endif
a = A->dp;
COMBA_START;
@@ -312,8 +324,13 @@ void fp_sqr_comba28(fp_int *A, fp_int *B)
B->used = 56;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 56 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 56 * sizeof(fp_digit));
fp_clamp(B);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_3.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_3.i
index b4754093d..51c3d7422 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_3.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_3.i
@@ -1,8 +1,8 @@
/* fp_sqr_comba_3.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,12 +16,13 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_SQR3
-void fp_sqr_comba3(fp_int *A, fp_int *B)
+int fp_sqr_comba3(fp_int *A, fp_int *B)
{
fp_digit *a, b[6], c0, c1, c2;
#ifdef TFM_ISO
@@ -62,8 +63,10 @@ void fp_sqr_comba3(fp_int *A, fp_int *B)
B->used = 6;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 6 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 6 * sizeof(fp_digit));
fp_clamp(B);
+
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_32.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_32.i
index 4a35d7477..4fcf3497b 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_32.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_32.i
@@ -1,8 +1,8 @@
/* fp_sqr_comba_32.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_SQR32
-void fp_sqr_comba32(fp_int *A, fp_int *B)
+int fp_sqr_comba32(fp_int *A, fp_int *B)
{
- fp_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2;
+ fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0;
#ifdef TFM_ISO
- fp_word tt;
-#endif
+ fp_word tt;
+#endif
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit b[64];
+#else
+ fp_digit *b;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 64, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (b == NULL)
+ return FP_MEM;
+#endif
a = A->dp;
COMBA_START;
@@ -352,8 +364,13 @@ void fp_sqr_comba32(fp_int *A, fp_int *B)
B->used = 64;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 64 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 64 * sizeof(fp_digit));
fp_clamp(B);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_4.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_4.i
index bb09dc891..b7f257288 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_4.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_4.i
@@ -1,8 +1,8 @@
/* fp_sqr_comba_4.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_SQR4
-void fp_sqr_comba4(fp_int *A, fp_int *B)
+int fp_sqr_comba4(fp_int *A, fp_int *B)
{
- fp_digit *a, b[8], c0, c1, c2;
+ fp_digit *a, c0, c1, c2;
#ifdef TFM_ISO
fp_word tt;
#endif
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit b[8];
+#else
+ fp_digit *b;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 8, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (b == NULL)
+ return FP_MEM;
+#endif
a = A->dp;
COMBA_START;
@@ -72,8 +84,13 @@ void fp_sqr_comba4(fp_int *A, fp_int *B)
B->used = 8;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 8 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 8 * sizeof(fp_digit));
fp_clamp(B);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_48.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_48.i
index cbaac02cc..0f24532b1 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_48.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_48.i
@@ -1,8 +1,8 @@
/* fp_sqr_comba_48.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_SQR48
-void fp_sqr_comba48(fp_int *A, fp_int *B)
+int fp_sqr_comba48(fp_int *A, fp_int *B)
{
- fp_digit *a, b[96], c0, c1, c2, sc0, sc1, sc2;
+ fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0;
#ifdef TFM_ISO
- fp_word tt;
-#endif
+ fp_word tt;
+#endif
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit b[96];
+#else
+ fp_digit *b;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 96, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (b == NULL)
+ return FP_MEM;
+#endif
a = A->dp;
COMBA_START;
@@ -512,8 +524,13 @@ void fp_sqr_comba48(fp_int *A, fp_int *B)
B->used = 96;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 96 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 96 * sizeof(fp_digit));
fp_clamp(B);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_6.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_6.i
index bb2fd743e..b36416844 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_6.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_6.i
@@ -1,8 +1,8 @@
/* fp_sqr_comba_6.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_SQR6
-void fp_sqr_comba6(fp_int *A, fp_int *B)
+int fp_sqr_comba6(fp_int *A, fp_int *B)
{
- fp_digit *a, b[12], c0, c1, c2, sc0, sc1, sc2;
+ fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0;
#ifdef TFM_ISO
fp_word tt;
#endif
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit b[12];
+#else
+ fp_digit *b;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 12, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (b == NULL)
+ return FP_MEM;
+#endif
a = A->dp;
COMBA_START;
@@ -92,8 +104,13 @@ void fp_sqr_comba6(fp_int *A, fp_int *B)
B->used = 12;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 12 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 12 * sizeof(fp_digit));
fp_clamp(B);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_64.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_64.i
index b74367a7c..b9b2c8ab7 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_64.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_64.i
@@ -1,8 +1,8 @@
/* fp_sqr_comba_64.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_SQR64
-void fp_sqr_comba64(fp_int *A, fp_int *B)
+int fp_sqr_comba64(fp_int *A, fp_int *B)
{
- fp_digit *a, b[128], c0, c1, c2, sc0, sc1, sc2;
+ fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0;
#ifdef TFM_ISO
- fp_word tt;
-#endif
+ fp_word tt;
+#endif
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit b[128];
+#else
+ fp_digit *b;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 128, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (b == NULL)
+ return FP_MEM;
+#endif
a = A->dp;
COMBA_START;
@@ -672,8 +684,13 @@ void fp_sqr_comba64(fp_int *A, fp_int *B)
B->used = 128;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 128 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 128 * sizeof(fp_digit));
fp_clamp(B);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_7.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_7.i
index 8ddef1a9b..09bf9954a 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_7.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_7.i
@@ -1,8 +1,8 @@
/* fp_sqr_comba_7.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_SQR7
-void fp_sqr_comba7(fp_int *A, fp_int *B)
+int fp_sqr_comba7(fp_int *A, fp_int *B)
{
- fp_digit *a, b[14], c0, c1, c2, sc0, sc1, sc2;
+ fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0;
#ifdef TFM_ISO
fp_word tt;
#endif
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit b[14];
+#else
+ fp_digit *b;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 14, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (b == NULL)
+ return FP_MEM;
+#endif
a = A->dp;
COMBA_START;
@@ -102,8 +114,13 @@ void fp_sqr_comba7(fp_int *A, fp_int *B)
B->used = 14;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 14 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 14 * sizeof(fp_digit));
fp_clamp(B);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_8.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_8.i
index f9a72bcf6..23fd8e41d 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_8.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_8.i
@@ -1,8 +1,8 @@
/* fp_sqr_comba_8.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_SQR8
-void fp_sqr_comba8(fp_int *A, fp_int *B)
+int fp_sqr_comba8(fp_int *A, fp_int *B)
{
- fp_digit *a, b[16], c0, c1, c2, sc0, sc1, sc2;
+ fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0;
#ifdef TFM_ISO
- fp_word tt;
-#endif
+ fp_word tt;
+#endif
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit b[16];
+#else
+ fp_digit *b;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (b == NULL)
+ return FP_MEM;
+#endif
a = A->dp;
COMBA_START;
@@ -112,8 +124,13 @@ void fp_sqr_comba8(fp_int *A, fp_int *B)
B->used = 16;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 16 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 16 * sizeof(fp_digit));
fp_clamp(B);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_9.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_9.i
index 94a5d2e92..ed6451a77 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_9.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_9.i
@@ -1,8 +1,8 @@
/* fp_sqr_comba_9.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,29 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef TFM_SQR9
-void fp_sqr_comba9(fp_int *A, fp_int *B)
+int fp_sqr_comba9(fp_int *A, fp_int *B)
{
- fp_digit *a, b[18], c0, c1, c2, sc0, sc1, sc2;
+ fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0;
#ifdef TFM_ISO
fp_word tt;
#endif
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit b[18];
+#else
+ fp_digit *b;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 18, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (b == NULL)
+ return FP_MEM;
+#endif
a = A->dp;
COMBA_START;
@@ -122,8 +134,13 @@ void fp_sqr_comba9(fp_int *A, fp_int *B)
B->used = 18;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 18 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 18 * sizeof(fp_digit));
fp_clamp(B);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_small_set.i b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_small_set.i
index 9918b2ee8..a81ee10e2 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_small_set.i
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/fp_sqr_comba_small_set.i
@@ -1,8 +1,8 @@
/* fp_sqr_comba_small_set.i
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,17 +16,30 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#if defined(TFM_SMALL_SET)
-void fp_sqr_comba_small(fp_int *A, fp_int *B)
+int fp_sqr_comba_small(fp_int *A, fp_int *B)
{
- fp_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2;
+ fp_digit *a, c0, c1, c2, sc0 = 0, sc1 = 0, sc2 = 0;
#ifdef TFM_ISO
- fp_word tt;
-#endif
+ fp_word tt;
+#endif
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit b[32];
+#else
+ fp_digit *b;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ b = (fp_digit*)XMALLOC(sizeof(fp_digit) * 32, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (b == NULL)
+ return FP_MEM;
+#endif
+
switch (A->used) {
case 1:
a = A->dp;
@@ -43,7 +56,7 @@ void fp_sqr_comba_small(fp_int *A, fp_int *B)
B->used = 2;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 2 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 2 * sizeof(fp_digit));
fp_clamp(B);
break;
@@ -72,7 +85,7 @@ void fp_sqr_comba_small(fp_int *A, fp_int *B)
B->used = 4;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 4 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 4 * sizeof(fp_digit));
fp_clamp(B);
break;
@@ -111,7 +124,7 @@ void fp_sqr_comba_small(fp_int *A, fp_int *B)
B->used = 6;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 6 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 6 * sizeof(fp_digit));
fp_clamp(B);
break;
@@ -160,7 +173,7 @@ void fp_sqr_comba_small(fp_int *A, fp_int *B)
B->used = 8;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 8 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 8 * sizeof(fp_digit));
fp_clamp(B);
break;
@@ -219,7 +232,7 @@ void fp_sqr_comba_small(fp_int *A, fp_int *B)
B->used = 10;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 10 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 10 * sizeof(fp_digit));
fp_clamp(B);
break;
@@ -288,7 +301,7 @@ void fp_sqr_comba_small(fp_int *A, fp_int *B)
B->used = 12;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 12 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 12 * sizeof(fp_digit));
fp_clamp(B);
break;
@@ -367,7 +380,7 @@ void fp_sqr_comba_small(fp_int *A, fp_int *B)
B->used = 14;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 14 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 14 * sizeof(fp_digit));
fp_clamp(B);
break;
@@ -456,7 +469,7 @@ void fp_sqr_comba_small(fp_int *A, fp_int *B)
B->used = 16;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 16 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 16 * sizeof(fp_digit));
fp_clamp(B);
break;
@@ -555,7 +568,7 @@ void fp_sqr_comba_small(fp_int *A, fp_int *B)
B->used = 18;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 18 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 18 * sizeof(fp_digit));
fp_clamp(B);
break;
@@ -664,7 +677,7 @@ void fp_sqr_comba_small(fp_int *A, fp_int *B)
B->used = 20;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 20 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 20 * sizeof(fp_digit));
fp_clamp(B);
break;
@@ -783,7 +796,7 @@ void fp_sqr_comba_small(fp_int *A, fp_int *B)
B->used = 22;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 22 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 22 * sizeof(fp_digit));
fp_clamp(B);
break;
@@ -912,7 +925,7 @@ void fp_sqr_comba_small(fp_int *A, fp_int *B)
B->used = 24;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 24 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 24 * sizeof(fp_digit));
fp_clamp(B);
break;
@@ -1051,7 +1064,7 @@ void fp_sqr_comba_small(fp_int *A, fp_int *B)
B->used = 26;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 26 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 26 * sizeof(fp_digit));
fp_clamp(B);
break;
@@ -1200,7 +1213,7 @@ void fp_sqr_comba_small(fp_int *A, fp_int *B)
B->used = 28;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 28 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 28 * sizeof(fp_digit));
fp_clamp(B);
break;
@@ -1359,7 +1372,7 @@ void fp_sqr_comba_small(fp_int *A, fp_int *B)
B->used = 30;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 30 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 30 * sizeof(fp_digit));
fp_clamp(B);
break;
@@ -1528,13 +1541,18 @@ void fp_sqr_comba_small(fp_int *A, fp_int *B)
B->used = 32;
B->sign = FP_ZPOS;
- memcpy(B->dp, b, 32 * sizeof(fp_digit));
+ XMEMCPY(B->dp, b, 32 * sizeof(fp_digit));
fp_clamp(B);
break;
default:
break;
-}
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return FP_OKAY;
}
#endif /* TFM_SMALL_SET */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ge_448.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ge_448.c
new file mode 100644
index 000000000..7795a962d
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ge_448.c
@@ -0,0 +1,10780 @@
+/* ge_448.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Based On Daniel J Bernstein's ed25519 Public Domain ref10 work.
+ * Small implementation based on Daniel Beer's ed25519 public domain work.
+ * Reworked for ed448 by Sean Parkinson.
+ */
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#ifdef HAVE_ED448
+
+#include <wolfssl/wolfcrypt/ge_448.h>
+#include <wolfssl/wolfcrypt/ed448.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+/*
+sc means scalar.
+ge means group element.
+
+Here the group is the set of pairs (x,y) of field elements (see ge_448.h)
+satisfying -x^2 + y^2 = 1 + d x^2y^2
+where d = -39081
+
+Representations:
+ ge448_p2 (projective) : (X:Y:Z) satisfying x=X/Z, y=Y/Z
+ ge448_precomp (affine): (X:Y)
+*/
+
+
+#ifdef ED448_SMALL
+
+/* Base point of ed448 */
+static const ge448_p2 ed448_base = {
+ { 0x5e, 0xc0, 0x0c, 0xc7, 0x2b, 0xa8, 0x26, 0x26, 0x8e, 0x93, 0x00, 0x8b,
+ 0xe1, 0x80, 0x3b, 0x43, 0x11, 0x65, 0xb6, 0x2a, 0xf7, 0x1a, 0xae, 0x12,
+ 0x64, 0xa4, 0xd3, 0xa3, 0x24, 0xe3, 0x6d, 0xea, 0x67, 0x17, 0x0f, 0x47,
+ 0x70, 0x65, 0x14, 0x9e, 0xda, 0x36, 0xbf, 0x22, 0xa6, 0x15, 0x1d, 0x22,
+ 0xed, 0x0d, 0xed, 0x6b, 0xc6, 0x70, 0x19, 0x4f },
+ { 0x14, 0xfa, 0x30, 0xf2, 0x5b, 0x79, 0x08, 0x98, 0xad, 0xc8, 0xd7, 0x4e,
+ 0x2c, 0x13, 0xbd, 0xfd, 0xc4, 0x39, 0x7c, 0xe6, 0x1c, 0xff, 0xd3, 0x3a,
+ 0xd7, 0xc2, 0xa0, 0x05, 0x1e, 0x9c, 0x78, 0x87, 0x40, 0x98, 0xa3, 0x6c,
+ 0x73, 0x73, 0xea, 0x4b, 0x62, 0xc7, 0xc9, 0x56, 0x37, 0x20, 0x76, 0x88,
+ 0x24, 0xbc, 0xb6, 0x6e, 0x71, 0x46, 0x3f, 0x69 },
+ { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }
+};
+
+/* Part of order of ed448 that needs tp be multiplied when reducing */
+static const uint8_t ed448_order_mul[56] = {
+ 0x0d, 0xbb, 0xa7, 0x54, 0x6d, 0x3d, 0x87, 0xdc, 0xaa, 0x70, 0x3a, 0x72,
+ 0x8d, 0x3d, 0x93, 0xde, 0x6f, 0xc9, 0x29, 0x51, 0xb6, 0x24, 0xb1, 0x3b,
+ 0x16, 0xdc, 0x35, 0x83,
+};
+
+/* Reduce scalar mod the order of the curve.
+ * Scalar Will be 114 bytes.
+ *
+ * b [in] Scalar to reduce.
+ */
+void sc448_reduce(uint8_t* b)
+{
+ int i, j;
+ uint32_t t[114];
+ uint8_t o;
+
+ for (i = 0; i < 86; i++) {
+ t[i] = b[i];
+ }
+ for (i = 0; i < 58; i++) {
+ for (j = 0; j < 28; j++)
+ t[i+j] += b[i+56] * ((uint32_t)ed448_order_mul[j] << 2);
+ t[i+56] = 0;
+ }
+ for (i = 54; i < 87; i++) {
+ t[i+1] += t[i] >> 8;
+ t[i] &= 0xff;
+ }
+ for (i = 0; i < 31; i++) {
+ for (j = 0; j < 28; j++)
+ t[i+j] += t[i+56] * ((uint32_t)ed448_order_mul[j] << 2);
+ t[i+56] = 0;
+ }
+ for (i = 54; i < 60; i++) {
+ t[i+1] += t[i] >> 8;
+ t[i] &= 0xff;
+ }
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 28; j++)
+ t[i+j] += t[i+56] * ((uint32_t)ed448_order_mul[j] << 2);
+ t[i+56] = 0;
+ }
+ for (i = 0; i < 55; i++) {
+ t[i+1] += t[i] >> 8;
+ t[i] &= 0xff;
+ }
+ o = t[55] >> 6;
+ t[55] &= 0x3f;
+ for (j = 0; j < 28; j++)
+ t[j] += o * (uint32_t)ed448_order_mul[j];
+ for (i = 0; i < 55; i++) {
+ t[i+1] += t[i] >> 8;
+ b[i] = t[i] & 0xff;
+ }
+ b[i] = t[i] & 0xff;
+ b[i+1] = 0;
+}
+
+/* Multiply a by b and add d. r = (a * b + d) mod order
+ *
+ * r [in] Scalar to hold result.
+ * a [in] Scalar to multiply.
+ * b [in] Scalar to multiply.
+ * d [in] Scalar to add to multiplicative result.
+ */
+void sc448_muladd(uint8_t* r, const uint8_t* a, const uint8_t* b,
+ const uint8_t* d)
+{
+ int i, j;
+ uint32_t t[112];
+ uint8_t o;
+
+ /* a * b + d */
+ for (i = 0; i < 56; i++)
+ t[i] = d[i];
+ for (i = 0; i < 56; i++) {
+ for (j = 0; j < 56; j++)
+ t[i+j] += (int16_t)a[i] * b[j];
+ t[i+56] = 0;
+ }
+
+ for (i = 0; i < 111; i++) {
+ t[i+1] += t[i] >> 8;
+ t[i] &= 0xff;
+ }
+ for (i = 0; i < 56; i++) {
+ for (j = 0; j < 28; j++)
+ t[i+j] += t[i+56] * ((uint32_t)ed448_order_mul[j] << 2);
+ t[i+56] = 0;
+ }
+ for (i = 54; i < 85; i++) {
+ t[i+1] += t[i] >> 8;
+ t[i] &= 0xff;
+ }
+ for (i = 0; i < 29; i++) {
+ for (j = 0; j < 28; j++)
+ t[i+j] += t[i+56] * ((uint32_t)ed448_order_mul[j] << 2);
+ t[i+56] = 0;
+ }
+ for (i = 54; i < 58; i++) {
+ t[i+1] += t[i] >> 8;
+ t[i] &= 0xff;
+ }
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 28; j++)
+ t[i+j] += t[i+56] * ((uint32_t)ed448_order_mul[j] << 2);
+ t[i+56] = 0;
+ }
+ for (i = 0; i < 55; i++) {
+ t[i+1] += t[i] >> 8;
+ t[i] &= 0xff;
+ }
+ o = t[55] >> 6;
+ t[55] &= 0x3f;
+ for (j = 0; j < 28; j++)
+ t[j] += o * (uint32_t)ed448_order_mul[j];
+ for (i = 0; i < 55; i++) {
+ t[i+1] += t[i] >> 8;
+ r[i] = t[i] & 0xff;
+ }
+ r[i] = t[i] & 0xff;
+ r[i+1] = 0;
+}
+
+/* Double the point on the Twisted Edwards curve. r = 2.p
+ *
+ * r [in] Point to hold result.
+ * p [in] Point to double.
+ */
+static WC_INLINE void ge448_dbl(ge448_p2 *r,const ge448_p2 *p)
+{
+ ge448 t0[GE448_WORDS];
+ ge448 t1[GE448_WORDS];
+
+ fe448_add(t0, p->X, p->Y); /* t0 = B1 = X1+Y1 */
+ fe448_reduce(t0);
+ fe448_sqr(t0, t0); /* t0 = B = (X1+Y1)^2 */
+ fe448_sqr(r->X, p->X); /* r->X = C = X1^2 */
+ fe448_sqr(r->Y, p->Y); /* r->Y = D = Y1^2 */
+ fe448_add(t1, r->X, r->Y); /* t1 = E = C+D */
+ fe448_reduce(t1);
+ fe448_sub(r->Y, r->X, r->Y); /* r->Y = Y31 = C-D */
+ fe448_sqr(r->Z, p->Z); /* r->Z = H = Z1^2 */
+ fe448_add(r->Z, r->Z, r->Z); /* r->Z = J1 = 2*H */
+ fe448_sub(r->Z, t1, r->Z); /* r->Z = J = E-2*H */
+ fe448_reduce(r->Z);
+ fe448_sub(r->X, t0, t1); /* r->X = X31 = B-E */
+ fe448_mul(r->X, r->X, r->Z); /* r->X = X3 = (B-E)*J */
+ fe448_mul(r->Y, r->Y, t1); /* r->Y = Y3 = E*(C-D) */
+ fe448_mul(r->Z, t1, r->Z); /* r->Z = Z3 = E*J */
+}
+
+/* Add two point on the Twisted Edwards curve. r = p + q
+ *
+ * r [in] Point to hold result.
+ * p [in] Point to add.
+ * q [in] Point to add.
+ */
+static WC_INLINE void ge448_add(ge448_p2* r, const ge448_p2* p,
+ const ge448_p2* q)
+{
+ ge448 t0[GE448_WORDS];
+ ge448 t1[GE448_WORDS];
+ ge448 t2[GE448_WORDS];
+ ge448 t3[GE448_WORDS];
+ ge448 t4[GE448_WORDS];
+
+ fe448_mul(t1, p->X, q->X); /* t1 = C = X1*X2 */
+ fe448_mul(t2, p->Y, q->Y); /* t2 = D = Y1*Y2 */
+ fe448_mul(t3, t1, t2); /* t3 = E1 = C*D */
+ fe448_mul39081(t3, t3); /* t3 = E = d*C*D */
+ fe448_mul(r->Z, p->Z, q->Z); /* r->Z = A = Z1*Z2 */
+ fe448_sqr(t0, r->Z); /* t0 = B = A^2 */
+ fe448_add(t4, t0, t3); /* t4 = F = B-(-E) */
+ fe448_sub(t0, t0, t3); /* t0 = G = B+(-E) */
+ fe448_reduce(t0);
+ fe448_add(r->X, p->X, p->Y); /* r->X = H1 = X1+Y1 */
+ fe448_reduce(r->X);
+ fe448_add(r->Y, q->X, q->Y); /* r->Y = H2 = X2+Y2 */
+ fe448_reduce(r->Y);
+ fe448_mul(r->X, r->X, r->Y); /* r->X = H = (X1+Y1)*(X2+Y2) */
+ fe448_sub(r->X, r->X, t1); /* r->X = X31 = H-C */
+ fe448_sub(r->X, r->X, t2); /* r->X = X32 = H-C-D */
+ fe448_reduce(r->X);
+ fe448_mul(r->X, r->X, t4); /* r->X = X33 = F*(H-C-D) */
+ fe448_mul(r->X, r->X, r->Z); /* r->X = X3 = A*F*(H-C-D) */
+ fe448_sub(r->Y, t2, t1); /* r->Y = Y31 = D-C */
+ fe448_reduce(r->Y);
+ fe448_mul(r->Y, r->Y, t0); /* r->Y = Y32 = G*(D-C) */
+ fe448_mul(r->Y, r->Y, r->Z); /* r->Y = Y3 = A*F*(D-C) */
+ fe448_mul(r->Z, t4, t0); /* r->Z = Z3 = F*G */
+}
+
+/* Convert point to byte array assuming projective ordinates.
+ *
+ * b [in] Array of bytes to hold compressed point.
+ * p [in] Point to convert.
+ */
+void ge448_to_bytes(uint8_t *s, const ge448_p2 *h)
+{
+ ge448 recip[56];
+ ge448 x[56];
+
+ fe448_invert(recip, h->Z);
+ fe448_mul(x, h->X, recip);
+ fe448_mul(s, h->Y, recip);
+ fe448_norm(x);
+ fe448_norm(s);
+ s[56] = (x[0] & 1) << 7;
+}
+
+/* Compress the point to y-ordinate and negative bit.
+ *
+ * out [in] Array of bytes to hold compressed key.
+ * xIn [in] The x-ordinate.
+ * yIn [in] The y-ordinate.
+ */
+int ge448_compress_key(uint8_t* out, const uint8_t* xIn, const uint8_t* yIn)
+{
+ ge448 x[56];
+
+ fe448_copy(x, xIn);
+ fe448_copy(out, yIn);
+ fe448_norm(x);
+ fe448_norm(out);
+ out[56] = (x[0] & 1) << 7;
+
+ return 0;
+}
+
+/* Perform a scalar multiplication of the a point. r = p * base
+ *
+ * r [in] Point to hold result.
+ * a [in] Scalar to multiply by.
+ */
+static void ge448_scalarmult(ge448_p2* h, const ge448_p2* p, const uint8_t* a)
+{
+ ge448_p2 r;
+ ge448_p2 s;
+ int i;
+
+ XMEMSET(&r, 0, sizeof(r));
+ r.Y[0] = 1;
+ r.Z[0] = 1;
+
+ for (i = 447; i >= 0; i--) {
+ const byte bit = (a[i >> 3] >> (i & 7)) & 1;
+
+ ge448_dbl(&r, &r);
+ ge448_add(&s, &r, p);
+
+ fe448_cmov(r.X, s.X, bit);
+ fe448_cmov(r.Y, s.Y, bit);
+ fe448_cmov(r.Z, s.Z, bit);
+ }
+
+ XMEMCPY(h, &r, sizeof(r));
+}
+
+/* Perform a scalar multiplication of the base point. r = a * base
+ *
+ * r [in] Point to hold result.
+ * a [in] Scalar to multiply by.
+ */
+void ge448_scalarmult_base(ge448_p2* h, const uint8_t* a)
+{
+ ge448_scalarmult(h, &ed448_base, a);
+}
+
+/* Perform a scalar multplication of the base point and public point.
+ * r = a * p + b * base
+ * Uses a sliding window of 5 bits.
+ * Not constant time.
+ *
+ * r [in] Point to hold result.
+ * a [in] Scalar to multiply by.
+ */
+int ge448_double_scalarmult_vartime(ge448_p2 *r, const uint8_t *a,
+ const ge448_p2 *A, const uint8_t *b)
+{
+ ge448_p2 t;
+
+ ge448_scalarmult(&t, &ed448_base, b);
+ ge448_scalarmult(r, A, a);
+ ge448_add(r, r, &t);
+
+ return 0;
+}
+
+/* Convert compressed point to negative of affine point.
+ * Calculates x from the y and the negative bit.
+ * Not constant time.
+ *
+ * r [in] Uncompressed point.
+ * b [in] Array of bytes representing point.
+ * returns 0 on success and -1 on failure.
+ */
+int ge448_from_bytes_negate_vartime(ge448_p2 *r, const uint8_t *b)
+{
+ int ret = 0;
+ ge448 u[GE448_WORDS];
+ ge448 v[GE448_WORDS];
+ ge448 u3[GE448_WORDS];
+ ge448 vxx[GE448_WORDS];
+ ge448 check[GE448_WORDS];
+
+ fe448_copy(r->Y, b);
+ XMEMSET(r->Z, 0, sizeof(r->Z));
+ r->Z[0] = 1;
+ fe448_sqr(u, r->Y); /* u = y^2 */
+ fe448_mul39081(v, u); /* v = 39081.y^2 */
+ fe448_sub(u, u, r->Z); /* u = y^2-1 */
+ fe448_add(v, v, r->Z); /* v = 39081.y^2-1 */
+ fe448_neg(v, v); /* v = -39081.y^2-1 = d.y^2-1 */
+
+ fe448_sqr(r->X, v); /* x = v^2 */
+ fe448_mul(r->X, r->X, v); /* x = v^3 */
+ fe448_sqr(u3, u); /* x = u^2.v^3 */
+ fe448_mul(r->X, r->X, u3); /* x = u^2.v^3 */
+ fe448_mul(u3, u3, u); /* u3 = u^3 */
+ fe448_mul(r->X, r->X, u3); /* x = u^5.v^3 */
+
+ fe448_pow_2_446_222_1(r->X, r->X); /* x = (u^5.v^3)^((q-3)/4) */
+ fe448_mul(r->X, r->X, u3); /* x = u^3(u^5.v^3)^((q-3)/4) */
+ fe448_mul(r->X, r->X, v); /* x = u^3.v(u^5.v^3)^((q-3)/4) */
+
+ fe448_sqr(vxx, r->X);
+ fe448_mul(vxx, vxx, v);
+ fe448_sub(check, vxx, u); /* check = v.x^2-u */
+ fe448_norm(check);
+ fe448_norm(r->X);
+ fe448_norm(r->Y);
+ /* Note; vx^2+u is NOT correct. */
+ if (fe448_isnonzero(check)) {
+ ret = -1;
+ }
+
+ /* Calculating negative of point in bytes - negate only if X is correct. */
+ if ((r->X[0] & 1) == (b[56] >> 7)) {
+ fe448_neg(r->X, r->X);
+ }
+
+ return ret;
+}
+
+#else /* !ED448_SMALL */
+
+#if defined(CURVED448_128BIT)
+
+/* Reduce scalar mod the order of the curve.
+ * Scalar Will be 114 bytes.
+ *
+ * b [in] Scalar to reduce.
+ */
+void sc448_reduce(uint8_t* b)
+{
+ uint64_t d[8];
+ uint128_t t[17];
+ uint128_t c;
+ uint64_t o;
+
+ /* Load from bytes */
+ t[ 0] = ((int64_t) (b[ 0]) << 0)
+ | ((int64_t) (b[ 1]) << 8)
+ | ((int64_t) (b[ 2]) << 16)
+ | ((int64_t) (b[ 3]) << 24)
+ | ((int64_t) (b[ 4]) << 32)
+ | ((int64_t) (b[ 5]) << 40)
+ | ((int64_t) (b[ 6]) << 48);
+ t[ 1] = ((int64_t) (b[ 7]) << 0)
+ | ((int64_t) (b[ 8]) << 8)
+ | ((int64_t) (b[ 9]) << 16)
+ | ((int64_t) (b[10]) << 24)
+ | ((int64_t) (b[11]) << 32)
+ | ((int64_t) (b[12]) << 40)
+ | ((int64_t) (b[13]) << 48);
+ t[ 2] = ((int64_t) (b[14]) << 0)
+ | ((int64_t) (b[15]) << 8)
+ | ((int64_t) (b[16]) << 16)
+ | ((int64_t) (b[17]) << 24)
+ | ((int64_t) (b[18]) << 32)
+ | ((int64_t) (b[19]) << 40)
+ | ((int64_t) (b[20]) << 48);
+ t[ 3] = ((int64_t) (b[21]) << 0)
+ | ((int64_t) (b[22]) << 8)
+ | ((int64_t) (b[23]) << 16)
+ | ((int64_t) (b[24]) << 24)
+ | ((int64_t) (b[25]) << 32)
+ | ((int64_t) (b[26]) << 40)
+ | ((int64_t) (b[27]) << 48);
+ t[ 4] = ((int64_t) (b[28]) << 0)
+ | ((int64_t) (b[29]) << 8)
+ | ((int64_t) (b[30]) << 16)
+ | ((int64_t) (b[31]) << 24)
+ | ((int64_t) (b[32]) << 32)
+ | ((int64_t) (b[33]) << 40)
+ | ((int64_t) (b[34]) << 48);
+ t[ 5] = ((int64_t) (b[35]) << 0)
+ | ((int64_t) (b[36]) << 8)
+ | ((int64_t) (b[37]) << 16)
+ | ((int64_t) (b[38]) << 24)
+ | ((int64_t) (b[39]) << 32)
+ | ((int64_t) (b[40]) << 40)
+ | ((int64_t) (b[41]) << 48);
+ t[ 6] = ((int64_t) (b[42]) << 0)
+ | ((int64_t) (b[43]) << 8)
+ | ((int64_t) (b[44]) << 16)
+ | ((int64_t) (b[45]) << 24)
+ | ((int64_t) (b[46]) << 32)
+ | ((int64_t) (b[47]) << 40)
+ | ((int64_t) (b[48]) << 48);
+ t[ 7] = ((int64_t) (b[49]) << 0)
+ | ((int64_t) (b[50]) << 8)
+ | ((int64_t) (b[51]) << 16)
+ | ((int64_t) (b[52]) << 24)
+ | ((int64_t) (b[53]) << 32)
+ | ((int64_t) (b[54]) << 40)
+ | ((int64_t) (b[55]) << 48);
+ t[ 8] = ((int64_t) (b[56]) << 0)
+ | ((int64_t) (b[57]) << 8)
+ | ((int64_t) (b[58]) << 16)
+ | ((int64_t) (b[59]) << 24)
+ | ((int64_t) (b[60]) << 32)
+ | ((int64_t) (b[61]) << 40)
+ | ((int64_t) (b[62]) << 48);
+ t[ 9] = ((int64_t) (b[63]) << 0)
+ | ((int64_t) (b[64]) << 8)
+ | ((int64_t) (b[65]) << 16)
+ | ((int64_t) (b[66]) << 24)
+ | ((int64_t) (b[67]) << 32)
+ | ((int64_t) (b[68]) << 40)
+ | ((int64_t) (b[69]) << 48);
+ t[10] = ((int64_t) (b[70]) << 0)
+ | ((int64_t) (b[71]) << 8)
+ | ((int64_t) (b[72]) << 16)
+ | ((int64_t) (b[73]) << 24)
+ | ((int64_t) (b[74]) << 32)
+ | ((int64_t) (b[75]) << 40)
+ | ((int64_t) (b[76]) << 48);
+ t[11] = ((int64_t) (b[77]) << 0)
+ | ((int64_t) (b[78]) << 8)
+ | ((int64_t) (b[79]) << 16)
+ | ((int64_t) (b[80]) << 24)
+ | ((int64_t) (b[81]) << 32)
+ | ((int64_t) (b[82]) << 40)
+ | ((int64_t) (b[83]) << 48);
+ t[12] = ((int64_t) (b[84]) << 0)
+ | ((int64_t) (b[85]) << 8)
+ | ((int64_t) (b[86]) << 16)
+ | ((int64_t) (b[87]) << 24)
+ | ((int64_t) (b[88]) << 32)
+ | ((int64_t) (b[89]) << 40)
+ | ((int64_t) (b[90]) << 48);
+ t[13] = ((int64_t) (b[91]) << 0)
+ | ((int64_t) (b[92]) << 8)
+ | ((int64_t) (b[93]) << 16)
+ | ((int64_t) (b[94]) << 24)
+ | ((int64_t) (b[95]) << 32)
+ | ((int64_t) (b[96]) << 40)
+ | ((int64_t) (b[97]) << 48);
+ t[14] = ((int64_t) (b[98]) << 0)
+ | ((int64_t) (b[99]) << 8)
+ | ((int64_t) (b[100]) << 16)
+ | ((int64_t) (b[101]) << 24)
+ | ((int64_t) (b[102]) << 32)
+ | ((int64_t) (b[103]) << 40)
+ | ((int64_t) (b[104]) << 48);
+ t[15] = ((int64_t) (b[105]) << 0)
+ | ((int64_t) (b[106]) << 8)
+ | ((int64_t) (b[107]) << 16)
+ | ((int64_t) (b[108]) << 24)
+ | ((int64_t) (b[109]) << 32)
+ | ((int64_t) (b[110]) << 40)
+ | ((int64_t) (b[111]) << 48);
+ t[16] = ((int64_t) (b[112]) << 0)
+ | ((int64_t) (b[113]) << 8);
+
+ /* Mod curve order */
+ /* 2^446 - 0x8335dc163bb124b65129c96fde933d8d723a70aadc873d6d54a7bb0d */
+ /* Mod top half of extra words */
+ t[ 4] += (int128_t)0x21cf5b5529eec34L * t[12];
+ t[ 5] += (int128_t)0x0f635c8e9c2ab70L * t[12];
+ t[ 6] += (int128_t)0x2d944a725bf7a4cL * t[12];
+ t[ 7] += (int128_t)0x20cd77058eec490L * t[12];
+ t[ 5] += (int128_t)0x21cf5b5529eec34L * t[13];
+ t[ 6] += (int128_t)0x0f635c8e9c2ab70L * t[13];
+ t[ 7] += (int128_t)0x2d944a725bf7a4cL * t[13];
+ t[ 8] += (int128_t)0x20cd77058eec490L * t[13];
+ t[ 6] += (int128_t)0x21cf5b5529eec34L * t[14];
+ t[ 7] += (int128_t)0x0f635c8e9c2ab70L * t[14];
+ t[ 8] += (int128_t)0x2d944a725bf7a4cL * t[14];
+ t[ 9] += (int128_t)0x20cd77058eec490L * t[14];
+ t[ 7] += (int128_t)0x21cf5b5529eec34L * t[15];
+ t[ 8] += (int128_t)0x0f635c8e9c2ab70L * t[15];
+ t[ 9] += (int128_t)0x2d944a725bf7a4cL * t[15];
+ t[10] += (int128_t)0x20cd77058eec490L * t[15];
+ t[ 8] += (int128_t)0x21cf5b5529eec34L * t[16];
+ t[ 9] += (int128_t)0x0f635c8e9c2ab70L * t[16];
+ t[10] += (int128_t)0x2d944a725bf7a4cL * t[16];
+ t[11] += (int128_t)0x20cd77058eec490L * t[16];
+ t[12] = 0;
+ /* Propagate carries */
+ c = t[ 4] >> 56; t[ 5] += c; t[ 4] = t[ 4] & 0xffffffffffffff;
+ c = t[ 5] >> 56; t[ 6] += c; t[ 5] = t[ 5] & 0xffffffffffffff;
+ c = t[ 6] >> 56; t[ 7] += c; t[ 6] = t[ 6] & 0xffffffffffffff;
+ c = t[ 7] >> 56; t[ 8] += c; t[ 7] = t[ 7] & 0xffffffffffffff;
+ c = t[ 8] >> 56; t[ 9] += c; t[ 8] = t[ 8] & 0xffffffffffffff;
+ c = t[ 9] >> 56; t[10] += c; t[ 9] = t[ 9] & 0xffffffffffffff;
+ c = t[10] >> 56; t[11] += c; t[10] = t[10] & 0xffffffffffffff;
+ c = t[11] >> 56; t[12] += c; t[11] = t[11] & 0xffffffffffffff;
+ /* Mod bottom half of extra words */
+ t[ 0] += (int128_t)0x21cf5b5529eec34L * t[ 8];
+ t[ 1] += (int128_t)0x0f635c8e9c2ab70L * t[ 8];
+ t[ 2] += (int128_t)0x2d944a725bf7a4cL * t[ 8];
+ t[ 3] += (int128_t)0x20cd77058eec490L * t[ 8];
+ t[ 1] += (int128_t)0x21cf5b5529eec34L * t[ 9];
+ t[ 2] += (int128_t)0x0f635c8e9c2ab70L * t[ 9];
+ t[ 3] += (int128_t)0x2d944a725bf7a4cL * t[ 9];
+ t[ 4] += (int128_t)0x20cd77058eec490L * t[ 9];
+ t[ 2] += (int128_t)0x21cf5b5529eec34L * t[10];
+ t[ 3] += (int128_t)0x0f635c8e9c2ab70L * t[10];
+ t[ 4] += (int128_t)0x2d944a725bf7a4cL * t[10];
+ t[ 5] += (int128_t)0x20cd77058eec490L * t[10];
+ t[ 3] += (int128_t)0x21cf5b5529eec34L * t[11];
+ t[ 4] += (int128_t)0x0f635c8e9c2ab70L * t[11];
+ t[ 5] += (int128_t)0x2d944a725bf7a4cL * t[11];
+ t[ 6] += (int128_t)0x20cd77058eec490L * t[11];
+ t[ 4] += (int128_t)0x21cf5b5529eec34L * t[12];
+ t[ 5] += (int128_t)0x0f635c8e9c2ab70L * t[12];
+ t[ 6] += (int128_t)0x2d944a725bf7a4cL * t[12];
+ t[ 7] += (int128_t)0x20cd77058eec490L * t[12];
+ t[ 8] = 0;
+ /* Propagate carries */
+ c = t[ 0] >> 56; t[ 1] += c; t[ 0] = t[ 0] & 0xffffffffffffff;
+ c = t[ 1] >> 56; t[ 2] += c; t[ 1] = t[ 1] & 0xffffffffffffff;
+ c = t[ 2] >> 56; t[ 3] += c; t[ 2] = t[ 2] & 0xffffffffffffff;
+ c = t[ 3] >> 56; t[ 4] += c; t[ 3] = t[ 3] & 0xffffffffffffff;
+ c = t[ 4] >> 56; t[ 5] += c; t[ 4] = t[ 4] & 0xffffffffffffff;
+ c = t[ 5] >> 56; t[ 6] += c; t[ 5] = t[ 5] & 0xffffffffffffff;
+ c = t[ 6] >> 56; t[ 7] += c; t[ 6] = t[ 6] & 0xffffffffffffff;
+ c = t[ 7] >> 56; t[ 8] += c; t[ 7] = t[ 7] & 0xffffffffffffff;
+ t[ 0] += (int128_t)0x21cf5b5529eec34L * t[ 8];
+ t[ 1] += (int128_t)0x0f635c8e9c2ab70L * t[ 8];
+ t[ 2] += (int128_t)0x2d944a725bf7a4cL * t[ 8];
+ t[ 3] += (int128_t)0x20cd77058eec490L * t[ 8];
+ /* Propagate carries */
+ c = t[ 0] >> 56; t[ 1] += c; d[ 0] = (int64_t)(t[ 0] & 0xffffffffffffff);
+ c = t[ 1] >> 56; t[ 2] += c; d[ 1] = (int64_t)(t[ 1] & 0xffffffffffffff);
+ c = t[ 2] >> 56; t[ 3] += c; d[ 2] = (int64_t)(t[ 2] & 0xffffffffffffff);
+ c = t[ 3] >> 56; t[ 4] += c; d[ 3] = (int64_t)(t[ 3] & 0xffffffffffffff);
+ c = t[ 4] >> 56; t[ 5] += c; d[ 4] = (int64_t)(t[ 4] & 0xffffffffffffff);
+ c = t[ 5] >> 56; t[ 6] += c; d[ 5] = (int64_t)(t[ 5] & 0xffffffffffffff);
+ c = t[ 6] >> 56; t[ 7] += c; d[ 6] = (int64_t)(t[ 6] & 0xffffffffffffff);
+ d[ 7] = t[7];
+ /* Mod bits over 56 in last word */
+ o = d[7] >> 54; d[ 7] &= 0x3fffffffffffff;
+ d[ 0] += 0x873d6d54a7bb0dL * o;
+ d[ 1] += 0x3d8d723a70aadcL * o;
+ d[ 2] += 0xb65129c96fde93L * o;
+ d[ 3] += 0x8335dc163bb124L * o;
+ /* Propagate carries */
+ o = d[ 0] >> 56; d[ 1] += o; d[ 0] = d[ 0] & 0xffffffffffffff;
+ o = d[ 1] >> 56; d[ 2] += o; d[ 1] = d[ 1] & 0xffffffffffffff;
+ o = d[ 2] >> 56; d[ 3] += o; d[ 2] = d[ 2] & 0xffffffffffffff;
+ o = d[ 3] >> 56; d[ 4] += o; d[ 3] = d[ 3] & 0xffffffffffffff;
+ o = d[ 4] >> 56; d[ 5] += o; d[ 4] = d[ 4] & 0xffffffffffffff;
+ o = d[ 5] >> 56; d[ 6] += o; d[ 5] = d[ 5] & 0xffffffffffffff;
+ o = d[ 6] >> 56; d[ 7] += o; d[ 6] = d[ 6] & 0xffffffffffffff;
+
+ /* Convert to bytes */
+ b[ 0] = (d[0 ] >> 0);
+ b[ 1] = (d[0 ] >> 8);
+ b[ 2] = (d[0 ] >> 16);
+ b[ 3] = (d[0 ] >> 24);
+ b[ 4] = (d[0 ] >> 32);
+ b[ 5] = (d[0 ] >> 40);
+ b[ 6] = (d[0 ] >> 48);
+ b[ 7] = (d[1 ] >> 0);
+ b[ 8] = (d[1 ] >> 8);
+ b[ 9] = (d[1 ] >> 16);
+ b[10] = (d[1 ] >> 24);
+ b[11] = (d[1 ] >> 32);
+ b[12] = (d[1 ] >> 40);
+ b[13] = (d[1 ] >> 48);
+ b[14] = (d[2 ] >> 0);
+ b[15] = (d[2 ] >> 8);
+ b[16] = (d[2 ] >> 16);
+ b[17] = (d[2 ] >> 24);
+ b[18] = (d[2 ] >> 32);
+ b[19] = (d[2 ] >> 40);
+ b[20] = (d[2 ] >> 48);
+ b[21] = (d[3 ] >> 0);
+ b[22] = (d[3 ] >> 8);
+ b[23] = (d[3 ] >> 16);
+ b[24] = (d[3 ] >> 24);
+ b[25] = (d[3 ] >> 32);
+ b[26] = (d[3 ] >> 40);
+ b[27] = (d[3 ] >> 48);
+ b[28] = (d[4 ] >> 0);
+ b[29] = (d[4 ] >> 8);
+ b[30] = (d[4 ] >> 16);
+ b[31] = (d[4 ] >> 24);
+ b[32] = (d[4 ] >> 32);
+ b[33] = (d[4 ] >> 40);
+ b[34] = (d[4 ] >> 48);
+ b[35] = (d[5 ] >> 0);
+ b[36] = (d[5 ] >> 8);
+ b[37] = (d[5 ] >> 16);
+ b[38] = (d[5 ] >> 24);
+ b[39] = (d[5 ] >> 32);
+ b[40] = (d[5 ] >> 40);
+ b[41] = (d[5 ] >> 48);
+ b[42] = (d[6 ] >> 0);
+ b[43] = (d[6 ] >> 8);
+ b[44] = (d[6 ] >> 16);
+ b[45] = (d[6 ] >> 24);
+ b[46] = (d[6 ] >> 32);
+ b[47] = (d[6 ] >> 40);
+ b[48] = (d[6 ] >> 48);
+ b[49] = (d[7 ] >> 0);
+ b[50] = (d[7 ] >> 8);
+ b[51] = (d[7 ] >> 16);
+ b[52] = (d[7 ] >> 24);
+ b[53] = (d[7 ] >> 32);
+ b[54] = (d[7 ] >> 40);
+ b[55] = (d[7 ] >> 48);
+ b[56] = 0;
+}
+
+/* Multiply a by b and add d. r = (a * b + d) mod order
+ *
+ * r [in] Scalar to hold result.
+ * a [in] Scalar to multiply.
+ * b [in] Scalar to multiply.
+ * d [in] Scalar to add to multiplicative result.
+ */
+void sc448_muladd(uint8_t* r, const uint8_t* a, const uint8_t* b,
+ const uint8_t* d)
+{
+ uint64_t ad[8], bd[8], dd[8], rd[8];
+ uint128_t t[16];
+ uint128_t c;
+ uint64_t o;
+
+ /* Load from bytes */
+ ad[ 0] = ((int64_t) (a[ 0]) << 0)
+ | ((int64_t) (a[ 1]) << 8)
+ | ((int64_t) (a[ 2]) << 16)
+ | ((int64_t) (a[ 3]) << 24)
+ | ((int64_t) (a[ 4]) << 32)
+ | ((int64_t) (a[ 5]) << 40)
+ | ((int64_t) (a[ 6]) << 48);
+ ad[ 1] = ((int64_t) (a[ 7]) << 0)
+ | ((int64_t) (a[ 8]) << 8)
+ | ((int64_t) (a[ 9]) << 16)
+ | ((int64_t) (a[10]) << 24)
+ | ((int64_t) (a[11]) << 32)
+ | ((int64_t) (a[12]) << 40)
+ | ((int64_t) (a[13]) << 48);
+ ad[ 2] = ((int64_t) (a[14]) << 0)
+ | ((int64_t) (a[15]) << 8)
+ | ((int64_t) (a[16]) << 16)
+ | ((int64_t) (a[17]) << 24)
+ | ((int64_t) (a[18]) << 32)
+ | ((int64_t) (a[19]) << 40)
+ | ((int64_t) (a[20]) << 48);
+ ad[ 3] = ((int64_t) (a[21]) << 0)
+ | ((int64_t) (a[22]) << 8)
+ | ((int64_t) (a[23]) << 16)
+ | ((int64_t) (a[24]) << 24)
+ | ((int64_t) (a[25]) << 32)
+ | ((int64_t) (a[26]) << 40)
+ | ((int64_t) (a[27]) << 48);
+ ad[ 4] = ((int64_t) (a[28]) << 0)
+ | ((int64_t) (a[29]) << 8)
+ | ((int64_t) (a[30]) << 16)
+ | ((int64_t) (a[31]) << 24)
+ | ((int64_t) (a[32]) << 32)
+ | ((int64_t) (a[33]) << 40)
+ | ((int64_t) (a[34]) << 48);
+ ad[ 5] = ((int64_t) (a[35]) << 0)
+ | ((int64_t) (a[36]) << 8)
+ | ((int64_t) (a[37]) << 16)
+ | ((int64_t) (a[38]) << 24)
+ | ((int64_t) (a[39]) << 32)
+ | ((int64_t) (a[40]) << 40)
+ | ((int64_t) (a[41]) << 48);
+ ad[ 6] = ((int64_t) (a[42]) << 0)
+ | ((int64_t) (a[43]) << 8)
+ | ((int64_t) (a[44]) << 16)
+ | ((int64_t) (a[45]) << 24)
+ | ((int64_t) (a[46]) << 32)
+ | ((int64_t) (a[47]) << 40)
+ | ((int64_t) (a[48]) << 48);
+ ad[ 7] = ((int64_t) (a[49]) << 0)
+ | ((int64_t) (a[50]) << 8)
+ | ((int64_t) (a[51]) << 16)
+ | ((int64_t) (a[52]) << 24)
+ | ((int64_t) (a[53]) << 32)
+ | ((int64_t) (a[54]) << 40)
+ | ((int64_t) (a[55]) << 48);
+ /* Load from bytes */
+ bd[ 0] = ((int64_t) (b[ 0]) << 0)
+ | ((int64_t) (b[ 1]) << 8)
+ | ((int64_t) (b[ 2]) << 16)
+ | ((int64_t) (b[ 3]) << 24)
+ | ((int64_t) (b[ 4]) << 32)
+ | ((int64_t) (b[ 5]) << 40)
+ | ((int64_t) (b[ 6]) << 48);
+ bd[ 1] = ((int64_t) (b[ 7]) << 0)
+ | ((int64_t) (b[ 8]) << 8)
+ | ((int64_t) (b[ 9]) << 16)
+ | ((int64_t) (b[10]) << 24)
+ | ((int64_t) (b[11]) << 32)
+ | ((int64_t) (b[12]) << 40)
+ | ((int64_t) (b[13]) << 48);
+ bd[ 2] = ((int64_t) (b[14]) << 0)
+ | ((int64_t) (b[15]) << 8)
+ | ((int64_t) (b[16]) << 16)
+ | ((int64_t) (b[17]) << 24)
+ | ((int64_t) (b[18]) << 32)
+ | ((int64_t) (b[19]) << 40)
+ | ((int64_t) (b[20]) << 48);
+ bd[ 3] = ((int64_t) (b[21]) << 0)
+ | ((int64_t) (b[22]) << 8)
+ | ((int64_t) (b[23]) << 16)
+ | ((int64_t) (b[24]) << 24)
+ | ((int64_t) (b[25]) << 32)
+ | ((int64_t) (b[26]) << 40)
+ | ((int64_t) (b[27]) << 48);
+ bd[ 4] = ((int64_t) (b[28]) << 0)
+ | ((int64_t) (b[29]) << 8)
+ | ((int64_t) (b[30]) << 16)
+ | ((int64_t) (b[31]) << 24)
+ | ((int64_t) (b[32]) << 32)
+ | ((int64_t) (b[33]) << 40)
+ | ((int64_t) (b[34]) << 48);
+ bd[ 5] = ((int64_t) (b[35]) << 0)
+ | ((int64_t) (b[36]) << 8)
+ | ((int64_t) (b[37]) << 16)
+ | ((int64_t) (b[38]) << 24)
+ | ((int64_t) (b[39]) << 32)
+ | ((int64_t) (b[40]) << 40)
+ | ((int64_t) (b[41]) << 48);
+ bd[ 6] = ((int64_t) (b[42]) << 0)
+ | ((int64_t) (b[43]) << 8)
+ | ((int64_t) (b[44]) << 16)
+ | ((int64_t) (b[45]) << 24)
+ | ((int64_t) (b[46]) << 32)
+ | ((int64_t) (b[47]) << 40)
+ | ((int64_t) (b[48]) << 48);
+ bd[ 7] = ((int64_t) (b[49]) << 0)
+ | ((int64_t) (b[50]) << 8)
+ | ((int64_t) (b[51]) << 16)
+ | ((int64_t) (b[52]) << 24)
+ | ((int64_t) (b[53]) << 32)
+ | ((int64_t) (b[54]) << 40)
+ | ((int64_t) (b[55]) << 48);
+ /* Load from bytes */
+ dd[ 0] = ((int64_t) (d[ 0]) << 0)
+ | ((int64_t) (d[ 1]) << 8)
+ | ((int64_t) (d[ 2]) << 16)
+ | ((int64_t) (d[ 3]) << 24)
+ | ((int64_t) (d[ 4]) << 32)
+ | ((int64_t) (d[ 5]) << 40)
+ | ((int64_t) (d[ 6]) << 48);
+ dd[ 1] = ((int64_t) (d[ 7]) << 0)
+ | ((int64_t) (d[ 8]) << 8)
+ | ((int64_t) (d[ 9]) << 16)
+ | ((int64_t) (d[10]) << 24)
+ | ((int64_t) (d[11]) << 32)
+ | ((int64_t) (d[12]) << 40)
+ | ((int64_t) (d[13]) << 48);
+ dd[ 2] = ((int64_t) (d[14]) << 0)
+ | ((int64_t) (d[15]) << 8)
+ | ((int64_t) (d[16]) << 16)
+ | ((int64_t) (d[17]) << 24)
+ | ((int64_t) (d[18]) << 32)
+ | ((int64_t) (d[19]) << 40)
+ | ((int64_t) (d[20]) << 48);
+ dd[ 3] = ((int64_t) (d[21]) << 0)
+ | ((int64_t) (d[22]) << 8)
+ | ((int64_t) (d[23]) << 16)
+ | ((int64_t) (d[24]) << 24)
+ | ((int64_t) (d[25]) << 32)
+ | ((int64_t) (d[26]) << 40)
+ | ((int64_t) (d[27]) << 48);
+ dd[ 4] = ((int64_t) (d[28]) << 0)
+ | ((int64_t) (d[29]) << 8)
+ | ((int64_t) (d[30]) << 16)
+ | ((int64_t) (d[31]) << 24)
+ | ((int64_t) (d[32]) << 32)
+ | ((int64_t) (d[33]) << 40)
+ | ((int64_t) (d[34]) << 48);
+ dd[ 5] = ((int64_t) (d[35]) << 0)
+ | ((int64_t) (d[36]) << 8)
+ | ((int64_t) (d[37]) << 16)
+ | ((int64_t) (d[38]) << 24)
+ | ((int64_t) (d[39]) << 32)
+ | ((int64_t) (d[40]) << 40)
+ | ((int64_t) (d[41]) << 48);
+ dd[ 6] = ((int64_t) (d[42]) << 0)
+ | ((int64_t) (d[43]) << 8)
+ | ((int64_t) (d[44]) << 16)
+ | ((int64_t) (d[45]) << 24)
+ | ((int64_t) (d[46]) << 32)
+ | ((int64_t) (d[47]) << 40)
+ | ((int64_t) (d[48]) << 48);
+ dd[ 7] = ((int64_t) (d[49]) << 0)
+ | ((int64_t) (d[50]) << 8)
+ | ((int64_t) (d[51]) << 16)
+ | ((int64_t) (d[52]) << 24)
+ | ((int64_t) (d[53]) << 32)
+ | ((int64_t) (d[54]) << 40)
+ | ((int64_t) (d[55]) << 48);
+
+ /* a * b + d */
+ t[ 0] = dd[ 0] + (int128_t)ad[ 0] * bd[ 0];
+ t[ 1] = dd[ 1] + (int128_t)ad[ 0] * bd[ 1]
+ + (int128_t)ad[ 1] * bd[ 0];
+ t[ 2] = dd[ 2] + (int128_t)ad[ 0] * bd[ 2]
+ + (int128_t)ad[ 1] * bd[ 1]
+ + (int128_t)ad[ 2] * bd[ 0];
+ t[ 3] = dd[ 3] + (int128_t)ad[ 0] * bd[ 3]
+ + (int128_t)ad[ 1] * bd[ 2]
+ + (int128_t)ad[ 2] * bd[ 1]
+ + (int128_t)ad[ 3] * bd[ 0];
+ t[ 4] = dd[ 4] + (int128_t)ad[ 0] * bd[ 4]
+ + (int128_t)ad[ 1] * bd[ 3]
+ + (int128_t)ad[ 2] * bd[ 2]
+ + (int128_t)ad[ 3] * bd[ 1]
+ + (int128_t)ad[ 4] * bd[ 0];
+ t[ 5] = dd[ 5] + (int128_t)ad[ 0] * bd[ 5]
+ + (int128_t)ad[ 1] * bd[ 4]
+ + (int128_t)ad[ 2] * bd[ 3]
+ + (int128_t)ad[ 3] * bd[ 2]
+ + (int128_t)ad[ 4] * bd[ 1]
+ + (int128_t)ad[ 5] * bd[ 0];
+ t[ 6] = dd[ 6] + (int128_t)ad[ 0] * bd[ 6]
+ + (int128_t)ad[ 1] * bd[ 5]
+ + (int128_t)ad[ 2] * bd[ 4]
+ + (int128_t)ad[ 3] * bd[ 3]
+ + (int128_t)ad[ 4] * bd[ 2]
+ + (int128_t)ad[ 5] * bd[ 1]
+ + (int128_t)ad[ 6] * bd[ 0];
+ t[ 7] = dd[ 7] + (int128_t)ad[ 0] * bd[ 7]
+ + (int128_t)ad[ 1] * bd[ 6]
+ + (int128_t)ad[ 2] * bd[ 5]
+ + (int128_t)ad[ 3] * bd[ 4]
+ + (int128_t)ad[ 4] * bd[ 3]
+ + (int128_t)ad[ 5] * bd[ 2]
+ + (int128_t)ad[ 6] * bd[ 1]
+ + (int128_t)ad[ 7] * bd[ 0];
+ t[ 8] = (int128_t)ad[ 1] * bd[ 7]
+ + (int128_t)ad[ 2] * bd[ 6]
+ + (int128_t)ad[ 3] * bd[ 5]
+ + (int128_t)ad[ 4] * bd[ 4]
+ + (int128_t)ad[ 5] * bd[ 3]
+ + (int128_t)ad[ 6] * bd[ 2]
+ + (int128_t)ad[ 7] * bd[ 1];
+ t[ 9] = (int128_t)ad[ 2] * bd[ 7]
+ + (int128_t)ad[ 3] * bd[ 6]
+ + (int128_t)ad[ 4] * bd[ 5]
+ + (int128_t)ad[ 5] * bd[ 4]
+ + (int128_t)ad[ 6] * bd[ 3]
+ + (int128_t)ad[ 7] * bd[ 2];
+ t[10] = (int128_t)ad[ 3] * bd[ 7]
+ + (int128_t)ad[ 4] * bd[ 6]
+ + (int128_t)ad[ 5] * bd[ 5]
+ + (int128_t)ad[ 6] * bd[ 4]
+ + (int128_t)ad[ 7] * bd[ 3];
+ t[11] = (int128_t)ad[ 4] * bd[ 7]
+ + (int128_t)ad[ 5] * bd[ 6]
+ + (int128_t)ad[ 6] * bd[ 5]
+ + (int128_t)ad[ 7] * bd[ 4];
+ t[12] = (int128_t)ad[ 5] * bd[ 7]
+ + (int128_t)ad[ 6] * bd[ 6]
+ + (int128_t)ad[ 7] * bd[ 5];
+ t[13] = (int128_t)ad[ 6] * bd[ 7]
+ + (int128_t)ad[ 7] * bd[ 6];
+ t[14] = (int128_t)ad[ 7] * bd[ 7];
+ t[15] = 0;
+
+ /* Mod curve order */
+ /* 2^446 - 0x8335dc163bb124b65129c96fde933d8d723a70aadc873d6d54a7bb0d */
+ /* Propagate carries */
+ c = t[ 0] >> 56; t[ 1] += c; t[ 0] = t[ 0] & 0xffffffffffffff;
+ c = t[ 1] >> 56; t[ 2] += c; t[ 1] = t[ 1] & 0xffffffffffffff;
+ c = t[ 2] >> 56; t[ 3] += c; t[ 2] = t[ 2] & 0xffffffffffffff;
+ c = t[ 3] >> 56; t[ 4] += c; t[ 3] = t[ 3] & 0xffffffffffffff;
+ c = t[ 4] >> 56; t[ 5] += c; t[ 4] = t[ 4] & 0xffffffffffffff;
+ c = t[ 5] >> 56; t[ 6] += c; t[ 5] = t[ 5] & 0xffffffffffffff;
+ c = t[ 6] >> 56; t[ 7] += c; t[ 6] = t[ 6] & 0xffffffffffffff;
+ c = t[ 7] >> 56; t[ 8] += c; t[ 7] = t[ 7] & 0xffffffffffffff;
+ c = t[ 8] >> 56; t[ 9] += c; t[ 8] = t[ 8] & 0xffffffffffffff;
+ c = t[ 9] >> 56; t[10] += c; t[ 9] = t[ 9] & 0xffffffffffffff;
+ c = t[10] >> 56; t[11] += c; t[10] = t[10] & 0xffffffffffffff;
+ c = t[11] >> 56; t[12] += c; t[11] = t[11] & 0xffffffffffffff;
+ c = t[12] >> 56; t[13] += c; t[12] = t[12] & 0xffffffffffffff;
+ c = t[13] >> 56; t[14] += c; t[13] = t[13] & 0xffffffffffffff;
+ c = t[14] >> 56; t[15] += c; t[14] = t[14] & 0xffffffffffffff;
+ /* Mod top half of extra words */
+ t[ 4] += (int128_t)0x21cf5b5529eec34L * t[12];
+ t[ 5] += (int128_t)0x0f635c8e9c2ab70L * t[12];
+ t[ 6] += (int128_t)0x2d944a725bf7a4cL * t[12];
+ t[ 7] += (int128_t)0x20cd77058eec490L * t[12];
+ t[ 5] += (int128_t)0x21cf5b5529eec34L * t[13];
+ t[ 6] += (int128_t)0x0f635c8e9c2ab70L * t[13];
+ t[ 7] += (int128_t)0x2d944a725bf7a4cL * t[13];
+ t[ 8] += (int128_t)0x20cd77058eec490L * t[13];
+ t[ 6] += (int128_t)0x21cf5b5529eec34L * t[14];
+ t[ 7] += (int128_t)0x0f635c8e9c2ab70L * t[14];
+ t[ 8] += (int128_t)0x2d944a725bf7a4cL * t[14];
+ t[ 9] += (int128_t)0x20cd77058eec490L * t[14];
+ t[ 7] += (int128_t)0x21cf5b5529eec34L * t[15];
+ t[ 8] += (int128_t)0x0f635c8e9c2ab70L * t[15];
+ t[ 9] += (int128_t)0x2d944a725bf7a4cL * t[15];
+ t[10] += (int128_t)0x20cd77058eec490L * t[15];
+ /* Propagate carries */
+ c = t[ 4] >> 56; t[ 5] += c; t[ 4] = t[ 4] & 0xffffffffffffff;
+ c = t[ 5] >> 56; t[ 6] += c; t[ 5] = t[ 5] & 0xffffffffffffff;
+ c = t[ 6] >> 56; t[ 7] += c; t[ 6] = t[ 6] & 0xffffffffffffff;
+ c = t[ 7] >> 56; t[ 8] += c; t[ 7] = t[ 7] & 0xffffffffffffff;
+ c = t[ 8] >> 56; t[ 9] += c; t[ 8] = t[ 8] & 0xffffffffffffff;
+ c = t[ 9] >> 56; t[10] += c; t[ 9] = t[ 9] & 0xffffffffffffff;
+ c = t[10] >> 56; t[11] += c; t[10] = t[10] & 0xffffffffffffff;
+ /* Mod bottom half of extra words */
+ t[ 0] += (int128_t)0x21cf5b5529eec34L * t[ 8];
+ t[ 1] += (int128_t)0x0f635c8e9c2ab70L * t[ 8];
+ t[ 2] += (int128_t)0x2d944a725bf7a4cL * t[ 8];
+ t[ 3] += (int128_t)0x20cd77058eec490L * t[ 8];
+ t[ 1] += (int128_t)0x21cf5b5529eec34L * t[ 9];
+ t[ 2] += (int128_t)0x0f635c8e9c2ab70L * t[ 9];
+ t[ 3] += (int128_t)0x2d944a725bf7a4cL * t[ 9];
+ t[ 4] += (int128_t)0x20cd77058eec490L * t[ 9];
+ t[ 2] += (int128_t)0x21cf5b5529eec34L * t[10];
+ t[ 3] += (int128_t)0x0f635c8e9c2ab70L * t[10];
+ t[ 4] += (int128_t)0x2d944a725bf7a4cL * t[10];
+ t[ 5] += (int128_t)0x20cd77058eec490L * t[10];
+ t[ 3] += (int128_t)0x21cf5b5529eec34L * t[11];
+ t[ 4] += (int128_t)0x0f635c8e9c2ab70L * t[11];
+ t[ 5] += (int128_t)0x2d944a725bf7a4cL * t[11];
+ t[ 6] += (int128_t)0x20cd77058eec490L * t[11];
+ /* Propagate carries */
+ c = t[ 0] >> 56; t[ 1] += c; rd[ 0] = (int64_t)(t[ 0] & 0xffffffffffffff);
+ c = t[ 1] >> 56; t[ 2] += c; rd[ 1] = (int64_t)(t[ 1] & 0xffffffffffffff);
+ c = t[ 2] >> 56; t[ 3] += c; rd[ 2] = (int64_t)(t[ 2] & 0xffffffffffffff);
+ c = t[ 3] >> 56; t[ 4] += c; rd[ 3] = (int64_t)(t[ 3] & 0xffffffffffffff);
+ c = t[ 4] >> 56; t[ 5] += c; rd[ 4] = (int64_t)(t[ 4] & 0xffffffffffffff);
+ c = t[ 5] >> 56; t[ 6] += c; rd[ 5] = (int64_t)(t[ 5] & 0xffffffffffffff);
+ c = t[ 6] >> 56; t[ 7] += c; rd[ 6] = (int64_t)(t[ 6] & 0xffffffffffffff);
+ rd[ 7] = t[7];
+ /* Mod bits over 56 in last word */
+ o = rd[7] >> 54; rd[ 7] &= 0x3fffffffffffff;
+ rd[ 0] += 0x873d6d54a7bb0dL * o;
+ rd[ 1] += 0x3d8d723a70aadcL * o;
+ rd[ 2] += 0xb65129c96fde93L * o;
+ rd[ 3] += 0x8335dc163bb124L * o;
+ /* Propagate carries */
+ o = rd[ 0] >> 56; rd[ 1] += o; rd[ 0] = rd[ 0] & 0xffffffffffffff;
+ o = rd[ 1] >> 56; rd[ 2] += o; rd[ 1] = rd[ 1] & 0xffffffffffffff;
+ o = rd[ 2] >> 56; rd[ 3] += o; rd[ 2] = rd[ 2] & 0xffffffffffffff;
+ o = rd[ 3] >> 56; rd[ 4] += o; rd[ 3] = rd[ 3] & 0xffffffffffffff;
+ o = rd[ 4] >> 56; rd[ 5] += o; rd[ 4] = rd[ 4] & 0xffffffffffffff;
+ o = rd[ 5] >> 56; rd[ 6] += o; rd[ 5] = rd[ 5] & 0xffffffffffffff;
+ o = rd[ 6] >> 56; rd[ 7] += o; rd[ 6] = rd[ 6] & 0xffffffffffffff;
+
+ /* Convert to bytes */
+ r[ 0] = (rd[0 ] >> 0);
+ r[ 1] = (rd[0 ] >> 8);
+ r[ 2] = (rd[0 ] >> 16);
+ r[ 3] = (rd[0 ] >> 24);
+ r[ 4] = (rd[0 ] >> 32);
+ r[ 5] = (rd[0 ] >> 40);
+ r[ 6] = (rd[0 ] >> 48);
+ r[ 7] = (rd[1 ] >> 0);
+ r[ 8] = (rd[1 ] >> 8);
+ r[ 9] = (rd[1 ] >> 16);
+ r[10] = (rd[1 ] >> 24);
+ r[11] = (rd[1 ] >> 32);
+ r[12] = (rd[1 ] >> 40);
+ r[13] = (rd[1 ] >> 48);
+ r[14] = (rd[2 ] >> 0);
+ r[15] = (rd[2 ] >> 8);
+ r[16] = (rd[2 ] >> 16);
+ r[17] = (rd[2 ] >> 24);
+ r[18] = (rd[2 ] >> 32);
+ r[19] = (rd[2 ] >> 40);
+ r[20] = (rd[2 ] >> 48);
+ r[21] = (rd[3 ] >> 0);
+ r[22] = (rd[3 ] >> 8);
+ r[23] = (rd[3 ] >> 16);
+ r[24] = (rd[3 ] >> 24);
+ r[25] = (rd[3 ] >> 32);
+ r[26] = (rd[3 ] >> 40);
+ r[27] = (rd[3 ] >> 48);
+ r[28] = (rd[4 ] >> 0);
+ r[29] = (rd[4 ] >> 8);
+ r[30] = (rd[4 ] >> 16);
+ r[31] = (rd[4 ] >> 24);
+ r[32] = (rd[4 ] >> 32);
+ r[33] = (rd[4 ] >> 40);
+ r[34] = (rd[4 ] >> 48);
+ r[35] = (rd[5 ] >> 0);
+ r[36] = (rd[5 ] >> 8);
+ r[37] = (rd[5 ] >> 16);
+ r[38] = (rd[5 ] >> 24);
+ r[39] = (rd[5 ] >> 32);
+ r[40] = (rd[5 ] >> 40);
+ r[41] = (rd[5 ] >> 48);
+ r[42] = (rd[6 ] >> 0);
+ r[43] = (rd[6 ] >> 8);
+ r[44] = (rd[6 ] >> 16);
+ r[45] = (rd[6 ] >> 24);
+ r[46] = (rd[6 ] >> 32);
+ r[47] = (rd[6 ] >> 40);
+ r[48] = (rd[6 ] >> 48);
+ r[49] = (rd[7 ] >> 0);
+ r[50] = (rd[7 ] >> 8);
+ r[51] = (rd[7 ] >> 16);
+ r[52] = (rd[7 ] >> 24);
+ r[53] = (rd[7 ] >> 32);
+ r[54] = (rd[7 ] >> 40);
+ r[55] = (rd[7 ] >> 48);
+ r[56] = 0;
+}
+
+/* Precomputed multiples of the base point. */
+static const ge448_precomp base[58][8] = {
+{
+ {
+ { 0x26a82bc70cc05eL, 0x80e18b00938e26L, 0xf72ab66511433bL,
+ 0xa3d3a46412ae1aL, 0x0f1767ea6de324L, 0x36da9e14657047L,
+ 0xed221d15a622bfL, 0x4f1970c66bed0dL },
+ { 0x08795bf230fa14L, 0x132c4ed7c8ad98L, 0x1ce67c39c4fdbdL,
+ 0x05a0c2d73ad3ffL, 0xa3984087789c1eL, 0xc7624bea73736cL,
+ 0x248876203756c9L, 0x693f46716eb6bcL }
+ },
+ {
+ { 0x55555555555555L, 0x55555555555555L, 0x55555555555555L,
+ 0x55555555555555L, 0xaaaaaaaaaaaaa9L, 0xaaaaaaaaaaaaaaL,
+ 0xaaaaaaaaaaaaaaL, 0xaaaaaaaaaaaaaaL },
+ { 0xeafbcdea9386edL, 0xb2bed1cda06bdaL, 0x833a2a3098bbbcL,
+ 0x8ad8c4b80d6565L, 0x884dd7b7e36d72L, 0xc2b0036ed7a035L,
+ 0x8db359d6205086L, 0xae05e9634ad704L }
+ },
+ {
+ { 0x28173286ff2f8fL, 0xb769465da85757L, 0xf7f6271fd6e862L,
+ 0x4a3fcfe8daa9cbL, 0xda82c7e2ba077aL, 0x943332241b8b8cL,
+ 0x6455bd64316cb6L, 0x0865886b9108afL },
+ { 0x22ac13588ed6fcL, 0x9a68fed02dafb8L, 0x1bdb6767f0bffaL,
+ 0xec4e1d58bb3a33L, 0x56c3b9fce43c82L, 0xa6449a4a8d9523L,
+ 0xf706cbda7ad43aL, 0xe005a8dbd5125cL }
+ },
+ {
+ { 0xce42ac48ba7f30L, 0xe1798949e120e2L, 0xf1515dd8ba21aeL,
+ 0x70c74cc301b7bdL, 0x0891c693fda4beL, 0x29ea255a09cf4eL,
+ 0x2c1419a17226f9L, 0x49dcbc5c6c0cceL },
+ { 0xe236f86de51839L, 0x44285d0d4f5b32L, 0x7ea1ca9472b5d4L,
+ 0x7b8a5bc1c0d8f9L, 0x57d845c90dc322L, 0x1b979cb7c02f04L,
+ 0x27164b33a5de02L, 0xd49077e4accde5L }
+ },
+ {
+ { 0xa99d1092030034L, 0x2d8cefc6f950d0L, 0x7a920c3c96f07bL,
+ 0x958812808bc0d5L, 0x62ada756d761e8L, 0x0def80cbcf7285L,
+ 0x0e2ba7601eedb5L, 0x7a9f9335a48dcbL },
+ { 0xb4731472f435ebL, 0x5512881f225443L, 0xee59d2b33c5840L,
+ 0xb698017127d7a4L, 0xb18fced86551f7L, 0x0ade260ca1823aL,
+ 0xd3b9109ce4fd58L, 0xadfd751a2517edL }
+ },
+ {
+ { 0x7fd7652abef79cL, 0x6c20a07443a878L, 0x5c1840d12a7109L,
+ 0x4a06e4a876451cL, 0x3bed0b4ad95f65L, 0x25d2e673fb0260L,
+ 0x2e00349aebd971L, 0x54523e04498b72L },
+ { 0xea5d1da07c7bccL, 0xcce776938ea98cL, 0x80284e861d2b3eL,
+ 0x48de76b6e1ff1bL, 0x7b121869c58522L, 0xbfd053a2765a1aL,
+ 0x2d743ec056c667L, 0x3f99b9cd8ab61cL }
+ },
+ {
+ { 0xdf9567ceb5eaf7L, 0x110a6b478ac7d7L, 0x2d335014706e0bL,
+ 0x0df9c7b0b5a209L, 0xba4223d568e684L, 0xd78af2d8c3719bL,
+ 0x77467b9a5291b6L, 0x079748e5c89befL },
+ { 0xe20d3fadac377fL, 0x34e866972b5c09L, 0xd8687a3c40bbb7L,
+ 0x7b3946fd2f84c9L, 0xd00e40ca78f50eL, 0xb87594417e7179L,
+ 0x9c7373bcb23583L, 0x7ddeda3c90fd69L }
+ },
+ {
+ { 0x2538a67153bde0L, 0x223aca9406b696L, 0xf9080dc1ad713eL,
+ 0x6c4cb47d816a64L, 0xbc285685dc8b97L, 0xd97b037c08e2d7L,
+ 0x5b63fb45d0e66bL, 0xd1f1bc5520e8a3L },
+ { 0x4eb873ce69e09bL, 0x1663164bc8ee45L, 0x08f7003ba8d89fL,
+ 0x4b98ead386ad82L, 0xa4b93b7bd94c7bL, 0x46ba408c6b38b3L,
+ 0xdae87d1f3574ffL, 0xc7564f4e9bea9bL }
+ },
+},
+{
+ {
+ { 0x2e4fdb25bfac1cL, 0xf0d79aaf5f3bcaL, 0xe756b0d20fb7ccL,
+ 0xe3696beb39609aL, 0xa019fc35a5ab58L, 0xa2b24853b281ddL,
+ 0xe3e2be761ac0a2L, 0xf19c34feb56730L },
+ { 0x2d25ce8a30241eL, 0xf5661eab73d7a1L, 0x4611ed0daac9f4L,
+ 0xd5442344ced72cL, 0xce78f52e92e985L, 0x6fe5dd44da4aadL,
+ 0xfcaddc61d363ceL, 0x3beb69cc9111bfL }
+ },
+ {
+ { 0xd2e7660940ebc9L, 0xe032018b17bbe0L, 0xad4939175c0575L,
+ 0xdd0b14721c7f34L, 0x52c2ba43e147e0L, 0x7dd03c60ee8973L,
+ 0x5472e8decf2754L, 0x17a1cd1d6482bbL },
+ { 0xdd43b848128b3fL, 0xf0cae34ea7dd25L, 0x81ca99fff07df2L,
+ 0x1c8959792ebbdcL, 0x45c7a6872155e6L, 0x907a50e39ddd08L,
+ 0xbe398c2bb2d89bL, 0x38063f91b3b536L }
+ },
+ {
+ { 0x149fafbf843b23L, 0x00ab582ac7f22aL, 0xa3b981bf2f4d4cL,
+ 0x2ce1a654341a22L, 0x68a40747c03b63L, 0x63206a212f2cf8L,
+ 0xc9961d35149741L, 0xfb85430bc7099eL },
+ { 0x9c9107290a9e59L, 0x734e94a06de367L, 0x5cf3cbedb99214L,
+ 0xc6bce3245b1fb9L, 0x1a82abedd7be0dL, 0xf74976aede7d1cL,
+ 0x7025b7c21503bdL, 0xf7894910d096abL }
+ },
+ {
+ { 0x6bd48bb555a41bL, 0xfbdd0d067de206L, 0x98bc477dd6dfd1L,
+ 0x1d0693b3e40b8aL, 0x6e15563da32ae4L, 0x0194a20fcebaa2L,
+ 0xda116150980a93L, 0x8e119200109cecL },
+ { 0x8ea0552ffb9726L, 0xeba50a4047e44bL, 0xc050d2460ddf76L,
+ 0xe009204ac690e0L, 0x47b86399b18edcL, 0x2f5b76ac77f23fL,
+ 0x4296c240792905L, 0x73f6b4a06f6dc7L }
+ },
+ {
+ { 0xb6ef9ea3b10cadL, 0x312843df7c8fceL, 0x5bdcd528bedf86L,
+ 0x2889059f6dd823L, 0x04578e908bfde0L, 0x3245df3123e2e5L,
+ 0xbf461d57ee9e3aL, 0xddec2d46f94cebL },
+ { 0x21b43b9145768fL, 0xe79a8f9dae962aL, 0xff1972bcbb043fL,
+ 0xe3dcf6d239649bL, 0xed592bdc533b85L, 0x14ff94fdbe22d0L,
+ 0x6c4eb87f1d8e22L, 0xd8d4c71d18cf6dL }
+ },
+ {
+ { 0xcda666c8d96345L, 0x9ecaa25836cd21L, 0x6e885bd984606eL,
+ 0x1dd5fef804f054L, 0x9dfff6b6959ae4L, 0x99b9cf8c9b55ccL,
+ 0xb4716b062b9b80L, 0x13ec87c554b128L },
+ { 0xe696d1f75aacc2L, 0xf78c99387fc5ffL, 0x76c09473809d42L,
+ 0x99ce62db618fa8L, 0x35e3e022f53341L, 0x62fc1ac0db6c5eL,
+ 0xa1fb8e600d8b47L, 0x0bc107058f0d1eL }
+ },
+ {
+ { 0x1f4526916da513L, 0x1f2fc04f5cf341L, 0xae9208664d23e0L,
+ 0x4e33082da8a113L, 0x2688ec61cfc085L, 0x6f2e8de6e5327fL,
+ 0x2070db3b4e48a8L, 0xd6626973240adeL },
+ { 0xa6b317ffbd997bL, 0x9fa1b5649e26bdL, 0xcbf0d258cba0f3L,
+ 0x4a7791b17b4745L, 0x25f555b5c9e190L, 0x7cd3940923ec4cL,
+ 0x16f4c6ae98f1b6L, 0x7962116bcd4e0fL }
+ },
+ {
+ { 0x8d58fa302491e3L, 0x7cf76c67ab3898L, 0xbc2f657647ebc7L,
+ 0x5f4bfe0d25f5a3L, 0x503f478d69505dL, 0x4a889fc3fb6645L,
+ 0x33e1bc1fa86b18L, 0xabb234f5508dd8L },
+ { 0x5348e1b9a05b48L, 0x57ac5f164dc858L, 0x21f4d38ec8a2d3L,
+ 0x5ec6d3ca3a3e9dL, 0xcd4062e560a0b8L, 0x49b74f73433f59L,
+ 0xefd9d87cab14e3L, 0x858ce7feb964f5L }
+ },
+},
+{
+ {
+ { 0x7577254eb731b4L, 0x9fff1fb4e2397eL, 0x749b145c821715L,
+ 0x40619fe2e65e67L, 0x57b82812e618d8L, 0x063186c707b83eL,
+ 0xcfc80cb31b24a2L, 0xcca6185ac75169L },
+ { 0x6539f44b255818L, 0x5895da00368bceL, 0x841a30917c7482L,
+ 0x85469e1b1a9c9eL, 0x05664c0e4f7d9dL, 0x8a063187b35cc0L,
+ 0x214763aa0e9b0aL, 0x1bd872c4b26ac2L }
+ },
+ {
+ { 0x3578f97a93762bL, 0x434f69a72d52bcL, 0xddcca4022cb565L,
+ 0xa7d1e41ff20544L, 0x823475d8a66588L, 0x9fc97c799d7bafL,
+ 0x15542f1660e421L, 0xa7d1f60843faf6L },
+ { 0xbbfaab54063cccL, 0x3ad9bada49855aL, 0xffd5f1c5bddbfeL,
+ 0x0e419c2ae87e59L, 0xdce6ed6f89956bL, 0xf047c21ccd8951L,
+ 0x6ed4a1ba83c991L, 0x85af86e2d28e0aL }
+ },
+ {
+ { 0x04433c49ed48a8L, 0xeffa8580bc375dL, 0xfb0e1b2fa6e3b5L,
+ 0x51483a2a1aaddaL, 0x733448df8b2ea8L, 0xaa0513cf639f0cL,
+ 0x6bc61a3a23bf84L, 0x3e64f68dc2430dL },
+ { 0x51bf502c5876b1L, 0x6b833751c0dd2aL, 0xe597be1342914fL,
+ 0x43d5ab0f8e632cL, 0x2696715d62587bL, 0xe87d20aed34f24L,
+ 0x25b7e14e18baf7L, 0xf5eb753e22e084L }
+ },
+ {
+ { 0x51da71724d8295L, 0xd478e4318d1340L, 0xacf94f42cf7f66L,
+ 0x230d7d13760711L, 0x078a66a5abc626L, 0xd78b0bd6b5f6daL,
+ 0x23a971396d1d0bL, 0x87623d64bd960fL },
+ { 0x0841a9977db53fL, 0x23c1a53f4d03eeL, 0x2f62c2e1f95df1L,
+ 0xd1e2ec1116f4e7L, 0x896d2fe34811a9L, 0xad65e2bec8096eL,
+ 0x09d36f9b1744a6L, 0x564bac7ff5ddf7L }
+ },
+ {
+ { 0x48b41e2c3f77cbL, 0x52276730968938L, 0xff1b899fd9b452L,
+ 0x67cf3bf2e03908L, 0x3731d90248a6fbL, 0xd800a05256598fL,
+ 0x347d2f2bdc8530L, 0xc72a3007ad08a1L },
+ { 0x5e5be741d65f73L, 0x183d4ae4206eadL, 0xcb50c1cade4013L,
+ 0x39db43d3102483L, 0x0eb49fa70d6325L, 0xa18f6a2c1f02b9L,
+ 0x3e6fe30dbf5e66L, 0xac4eeb93a82aa5L }
+ },
+ {
+ { 0x295affd3613d47L, 0x7b7e68ab56f343L, 0x980629692b173bL,
+ 0x937061ebad35fbL, 0x25019785c21eeaL, 0xe92721b787a746L,
+ 0x463c46c3651631L, 0x6da4b5dc6f2d5aL },
+ { 0xcb67cc16e6d18cL, 0x1b30d520010588L, 0x1bb6ea6db1d1e8L,
+ 0x9c6308aad11474L, 0xc3167413d19b1cL, 0xf2e84d7be4fb79L,
+ 0xeccb873e050f77L, 0xf7c8d80cc2bf86L }
+ },
+ {
+ { 0x16fe2e17ab20e5L, 0x274deadecf3a92L, 0x9f434870972f67L,
+ 0x9a65a454605751L, 0x9351f07b8980b2L, 0x412962b0eb08a5L,
+ 0xb8c9bfd733f440L, 0xac2cd641ca250fL },
+ { 0x68cdd0f2ba7d26L, 0xd3d2a4a4e0beeaL, 0x50135c19f4a258L,
+ 0xb475e53f0d02e4L, 0x432d8c6589283aL, 0x29141bfa0a2b6cL,
+ 0xd7379ec13704bcL, 0x831562c52459bfL }
+ },
+ {
+ { 0x676b366eeec506L, 0xdd6cad545da557L, 0x9de39cb77057d2L,
+ 0x388c5fedf05bf1L, 0x6e55650dfb1f03L, 0xdbceffa52126c9L,
+ 0xe4d187b3a4a220L, 0xac914f9eb27020L },
+ { 0x3f4ab98d2e5f30L, 0x6ae97dadd94451L, 0x64af6950d80981L,
+ 0x36b4b90f2aa2ceL, 0x6adcd7a18fcf59L, 0x3ddfe6dc116c81L,
+ 0x661072b549b9e3L, 0xd9e3134ec4584dL }
+ },
+},
+{
+ {
+ { 0x6e46707a1e400cL, 0xcdc990b551e806L, 0xfa512513a07724L,
+ 0x500553f1b3e4f5L, 0x67e8b58ef4dac3L, 0x958349f2cb4cc7L,
+ 0x948b4ed7f9143cL, 0xe646d092b7822bL },
+ { 0xd185dd52bc3c26L, 0x34ba16ec837fc9L, 0x516d4ba5a788b7L,
+ 0x72f2de756142b0L, 0x5846f61f445b3dL, 0xdaec5c9f4631a1L,
+ 0xa10b18d169ea9bL, 0x85d2998af6751bL }
+ },
+ {
+ { 0xda0cac443ddf31L, 0x0966e171860911L, 0x9c3a7173cba600L,
+ 0x5781880571f895L, 0x5e2a927737ac21L, 0x8a461486c253fbL,
+ 0xe801cf595ee626L, 0x271166a5f84fc0L },
+ { 0x306937fba856bdL, 0x80cb179be80a43L, 0x70393b2ffb5980L,
+ 0xa8e4a1c660fc64L, 0x5078abfc0d5c98L, 0x62ba530fbd31ffL,
+ 0xda608449e51b88L, 0xdb6ecb0355ae15L }
+ },
+ {
+ { 0xbcbb6ea23c5d49L, 0x08906ba87959bcL, 0x61cc0880991665L,
+ 0x21d6b41d90d13cL, 0x0c27ac1d03afe9L, 0x159995f5cfea52L,
+ 0x4057e20bdfe220L, 0xdd1b349cbdf058L },
+ { 0x0cd66262e37159L, 0x8cea8e43eb0d17L, 0x553af085bce7f0L,
+ 0xb94cb5f5b6511dL, 0x7b8d3a550e0330L, 0x415911057ab7e7L,
+ 0x320820e6aa886fL, 0x130d4d6c5b6b81L }
+ },
+ {
+ { 0x2f98059c7bb2edL, 0x33ebf4ca49bdfbL, 0x04c72a1b0a675bL,
+ 0x94f9ea4adb6c14L, 0x03376d8cf728c0L, 0x5c059d34c6eb6aL,
+ 0x0178408eb8da48L, 0x8bf607b2956817L },
+ { 0x7ad2822ceb3d28L, 0xd07a40337ae653L, 0xbc68739c1e46b2L,
+ 0x15d7cca9154ba9L, 0x6b97103a26617dL, 0xa610314b2e0d28L,
+ 0x52a08bafd4d363L, 0x80c2638c7dc2afL }
+ },
+ {
+ { 0x0cde7ef3187140L, 0x93b92ca4b70acdL, 0x5696e507a79cdcL,
+ 0x73cc9728eaab66L, 0x6b8c5b68f1b0c7L, 0xb39a3184f7e0b1L,
+ 0x72cfb0d376108aL, 0x0c53efc98536a7L },
+ { 0x03b52a824c2f1eL, 0x717132e6399b78L, 0x31ebd25349a85dL,
+ 0x265ee811a200d4L, 0x0b1aad2407d7adL, 0x9a9ebc894d2962L,
+ 0x994e6cd41171d9L, 0x09178d86c8fa83L }
+ },
+ {
+ { 0x7d1d238a2593a1L, 0x863e93ab38fb19L, 0xd23a4cce7712a9L,
+ 0x7477b1327efcd5L, 0x3ba69ff1392f6cL, 0x63e0c32f7bb5a5L,
+ 0x20412c0026effdL, 0xd3ee8e4ef424abL },
+ { 0x14c0b2d64e5174L, 0x2a611f2e58c47bL, 0xaa58a06c1e8635L,
+ 0x1870c3ecf17034L, 0xb0d5e3483f1bf3L, 0xb19905c16c7eb3L,
+ 0xbf85d626efa4caL, 0xfd16b2f180f92bL }
+ },
+ {
+ { 0xc0431af3adcb48L, 0xc9a7a8dba90496L, 0xd765a163895294L,
+ 0xb02a41a551de70L, 0xb71b261749b8a1L, 0x0dfa89ec6f3e47L,
+ 0x392c0d80f5d9ceL, 0x43c59d831aee3cL },
+ { 0x94bfb6d4d76f49L, 0xe8f5b8227d68a5L, 0x78ae1d9630fd08L,
+ 0x1379029ce1bdaeL, 0x9689da066715dcL, 0x5d4cb24d3278c7L,
+ 0x77c98339e84fbcL, 0xc8478dcea1048cL }
+ },
+ {
+ { 0xe4b8f31770d2baL, 0x744f65242ea095L, 0xd06e090036f138L,
+ 0xd3a3d5b3b078caL, 0xc7ae54178b8417L, 0xad6c5d4c738fd7L,
+ 0x61789844676454L, 0xfbf34235d9a392L },
+ { 0x8e451a7fff772fL, 0x8605bb75ffbeadL, 0x6f75cc1930d59fL,
+ 0xd4f47558f3f460L, 0xefd2d796700c8aL, 0xceb462a2406421L,
+ 0x8ed0f979dfe8f1L, 0x0280bf1d1d7600L }
+ },
+},
+{
+ {
+ { 0x761c219dd9a54dL, 0x1127fcb86a39c0L, 0x7d0e4f04c9beddL,
+ 0x27c017a4d976b6L, 0x800c973da042cfL, 0xe7419af2593f11L,
+ 0xbd49448ae67960L, 0xd3b60b7744fd85L },
+ { 0x5e74ed961676feL, 0x7383ef339af627L, 0x34407e05e62df7L,
+ 0xb0534618bf3196L, 0xd6b7184583b407L, 0xe3d068555011beL,
+ 0x94083d02124b52L, 0xa908324f780aafL }
+ },
+ {
+ { 0xb27af1a73ec9c3L, 0xb66ad9f70fa725L, 0x07724f58cf73e4L,
+ 0xc3fcd579949358L, 0x06efb79da0cc01L, 0x1e977d210597c9L,
+ 0xcd732be703e8d6L, 0x6fd29bf6d0b69eL },
+ { 0xca658ac667128eL, 0xca0036ac7872b3L, 0xc9698585355837L,
+ 0x59f3be8075cf1cL, 0x9f1b9b03809a11L, 0x6881ced9733871L,
+ 0x8cda0fbe902a5fL, 0x4d8c69b4e3871eL }
+ },
+ {
+ { 0x5c3bd07ddee82fL, 0xe52dd312f9723bL, 0xcf8761174f1be8L,
+ 0xd9ecbd835f8657L, 0x4f77393fbfea17L, 0xec9579fd78fe2cL,
+ 0x320de920fb0450L, 0xbfc9b8d95d9c47L },
+ { 0x818bd425e1b4c3L, 0x0e0c41c40e2c78L, 0x0f7ce9abccb0d0L,
+ 0xc7e9fa45ef81fbL, 0x2561d6f73574adL, 0xa2d8d99d2efb0bL,
+ 0xcf8f316e96cd0aL, 0x088f0f14964807L }
+ },
+ {
+ { 0x0a8498945d5a19L, 0x47ab39c6c2131fL, 0x5c02824f3fc35dL,
+ 0x3be77c89ee8127L, 0xa8491b7c90b80aL, 0x5397631a28aa93L,
+ 0x54d6e816c0b344L, 0x22878be876d0e4L },
+ { 0xeecb8a46db3bf6L, 0x340f29554577a3L, 0xa7798689a00f85L,
+ 0x98465d74bb9147L, 0x9532d7dda3c736L, 0x6d574f17504b20L,
+ 0x6e356f4d86e435L, 0x70c2e8d4533887L }
+ },
+ {
+ { 0xdce5a0ad293980L, 0x32d7210069010eL, 0x64af59f06deaaaL,
+ 0xd6b43c459239e4L, 0x74bf2559199c29L, 0x3efff4111e1e2bL,
+ 0x1aa7b5ecb0f8d8L, 0x9baa22b989e395L },
+ { 0xf78db807b33ac1L, 0x05a3b4354ce80aL, 0x371defc7bc8e12L,
+ 0x63305a01224610L, 0x028b1ae6d697efL, 0x7aba39c1cd8051L,
+ 0x76ed7a928ee4b4L, 0x31bd02a7f99901L }
+ },
+ {
+ { 0xf9dab7af075566L, 0x84e29a5f56f18bL, 0x3a4c45af64e56dL,
+ 0xcf3644a6a7302dL, 0xfb40808156b658L, 0xf33ef9cf96be52L,
+ 0xfe92038caa2f08L, 0xcfaf2e3b261894L },
+ { 0xf2a0dbc224ce3fL, 0xed05009592eb27L, 0x501743f95889d0L,
+ 0xa88a47877c95c2L, 0x86755fbdd63da9L, 0x9024acfc7ee828L,
+ 0x634b020f38113bL, 0x3c5aacc6056e64L }
+ },
+ {
+ { 0xe03ff3aa2ef760L, 0x3b95767b1c3bacL, 0x51ce6aa940d754L,
+ 0x7cbac3f47a9a3dL, 0xa864ac434f8d1aL, 0x1eff3f280dbd47L,
+ 0xd8ab6607ebd5caL, 0xc4df5c405b07edL },
+ { 0x3dc92dfa4f095bL, 0x5ae36a57cdbd9aL, 0x7ff29737891e04L,
+ 0x37c03130a5fe7bL, 0x210d7b0aa6e35eL, 0x6edfb53bf200d8L,
+ 0x787b68d84afb85L, 0x9b5c49b72c6de3L }
+ },
+ {
+ { 0x51857164010f4eL, 0xe0b144b0536ebeL, 0xacabb14887d663L,
+ 0xac1caededf584fL, 0xb43fb8faf175a3L, 0x310b6d5f992a3cL,
+ 0xf2c4aa285178a4L, 0x69c99698bd56bfL },
+ { 0x73d6372a4d972eL, 0x3d5bb2e9583803L, 0x7bf7d18d891581L,
+ 0xa5ce5d7568a34aL, 0x670b4331f45c81L, 0x97265a71f96910L,
+ 0xdb14eb3b07c1eaL, 0xdf008eafed447cL }
+ },
+},
+{
+ {
+ { 0x0379f5a00c2f10L, 0xb320b4fd350285L, 0x74e560e8efdd7dL,
+ 0xf2f017ef46a140L, 0x2ced1a60f34624L, 0x7c4b4e3ca08ec9L,
+ 0xdffc2a15d8bc6bL, 0xcc8f3f3527b007L },
+ { 0x59f8ac4861fe83L, 0x8d48d2cd03144cL, 0xa8457d2bfa6dceL,
+ 0xd7ed333677c136L, 0xcb8e219c228e18L, 0x5f70bc916ab1e4L,
+ 0x2ae3a3d3780370L, 0x9f3365488f17adL }
+ },
+ {
+ { 0xeab0710960e4bbL, 0xc668a78ab9cfd3L, 0x2e85553b0ef946L,
+ 0xa43c4b98df5df3L, 0x0ecd5593cb3646L, 0x6f543c418dbe71L,
+ 0xee7edaaf59818bL, 0xc44e8d290911c1L },
+ { 0xafb38b1269b509L, 0x9e2737c52afe2cL, 0x5b2ef02ccfa664L,
+ 0x1e0aeace1cc58bL, 0x37a57e95ea134eL, 0xc9c465a83b9fc2L,
+ 0x4b9e8c76e3eccaL, 0xca07dbe9bdbab5L }
+ },
+ {
+ { 0xd297f3cb0d7807L, 0xee441a5f59ce61L, 0x728553bb2db844L,
+ 0x90f87e5640e9e0L, 0xaa72cbfcb76dffL, 0x065c6864012d57L,
+ 0xd5ee88f9678b44L, 0x3d74b852177603L },
+ { 0x3f9c947748b68eL, 0x03856d98f44d44L, 0xde34b84462426cL,
+ 0xc16d1bb845ab29L, 0x9df6217d2e18deL, 0xec6d219b154643L,
+ 0x22a8ec32ee0f8fL, 0x632ad3891c5175L }
+ },
+ {
+ { 0x19d9d236869267L, 0x628df94fe5532aL, 0x458d76c6dc9a01L,
+ 0x405fe6c2cc39c8L, 0x7dddc67f3a04baL, 0xfee630312500c7L,
+ 0x580b6f0a50e9deL, 0xfb5918a6090604L },
+ { 0xd7159253af6b2dL, 0x83d62d61c7d1ecL, 0x94398c185858c4L,
+ 0x94643dc14bfb64L, 0x758fa38af7db80L, 0xe2d7d93a8a1557L,
+ 0xa569e853562af1L, 0xd226bdd84346aaL }
+ },
+ {
+ { 0xc2d0a5ed0ccd20L, 0xeb9adb85dbc0cfL, 0xe0a29ee26d7e88L,
+ 0x8bb39f884a8e98L, 0x511f1c137396eaL, 0xbc9ec5ac8b2fb3L,
+ 0x299d81c090e5bcL, 0xe1dfe344cdd587L },
+ { 0x80f61f45e465b7L, 0x5699c531bad59eL, 0x85e92e4b79ff92L,
+ 0x1e64fce9db244cL, 0x3748574a22097dL, 0xe2aa6b9efff24eL,
+ 0xb951be70a10bc6L, 0x66853269067a1cL }
+ },
+ {
+ { 0xf716ddfa6114d3L, 0x9e515f5037ec1fL, 0x773454144944a6L,
+ 0x1540c4caba97ccL, 0xe41e5488b54bb7L, 0x4363156cae37bcL,
+ 0xc384eaff3d2ce8L, 0x72a4f454c58ba4L },
+ { 0x0ceb530dcaf3fcL, 0x72d536578dcdbbL, 0x9b44084c6320faL,
+ 0x6262d34eb74c70L, 0x8abac85608e6dcL, 0x82a526410dd38dL,
+ 0xbc39911a819b8dL, 0xbda15fe03ad0d9L }
+ },
+ {
+ { 0xadbf587f9dc60bL, 0xf9d814f7d846d2L, 0xccdd241b77bde0L,
+ 0x89cb6d72242f50L, 0x95c0e3ee6360a8L, 0x7c7dd5adf49713L,
+ 0x68e0e4957d5814L, 0x3aa097d0c16571L },
+ { 0xb56b672267d03aL, 0x4f557088c44af4L, 0x67c49e7f3252a5L,
+ 0x871d6cfc94a469L, 0x57ae99801fbfaaL, 0x5c0e48f48a5d8eL,
+ 0xe9bf9c85e240b9L, 0xa41018999d41caL }
+ },
+ {
+ { 0x6beb0c7b2889b4L, 0x78b7f899455370L, 0xd43421447ca364L,
+ 0xdd9d2da9f21e5bL, 0xa0c7c180a7e4aaL, 0x022c0d4da1660cL,
+ 0xe1f5c165a57002L, 0x51c7c9e518f68fL },
+ { 0x6d521b62586502L, 0xa0f2cb3183ec1bL, 0x578b4e0caa5e16L,
+ 0x7bd4fbd764997fL, 0x7ec56c364b1804L, 0xb75a2540ee08e4L,
+ 0x6bf74a6dc19080L, 0x6ec793d97d6e59L }
+ },
+},
+{
+ {
+ { 0x16789d60a4beb9L, 0x512b2cd9b9c801L, 0xf8b6d108c7bb9cL,
+ 0xd85651e9ebdc8cL, 0xc9450829ba971aL, 0x852d9ea7e1cf78L,
+ 0x6a45e350af01e2L, 0xe6cdadf6151dcfL },
+ { 0xc454bb42b8c01bL, 0x59e0c493d54cd2L, 0x8e1e686454d608L,
+ 0x0dbae4bd8c6103L, 0xa5603a16c18b18L, 0x227a6b23369093L,
+ 0xf1e89295f3de1cL, 0x42f0b588ab63c5L }
+ },
+ {
+ { 0xf1974cc5b596d8L, 0xee8093f44719f0L, 0x40ba933f6f5b54L,
+ 0xd6e53652f3d654L, 0x9aeb83526d73b8L, 0x50ed5350776382L,
+ 0x3be47d6ad43875L, 0x21d56dfc786e48L },
+ { 0x8a75e18b73bb39L, 0x9eba84cf265a78L, 0x7c02a4d2e772e7L,
+ 0xf7df6d44c1ecd2L, 0xa8d9ea06cef71bL, 0x86e8f91cae3b68L,
+ 0x2fd141199efefaL, 0x0b36ab2214e6f6L }
+ },
+ {
+ { 0xd79065cbdce61cL, 0xcb562ffdecb229L, 0xef5d3d14600849L,
+ 0x348b31b1d23ac8L, 0xb2ea69915c36b8L, 0x268683d4822836L,
+ 0x083edbec6f0b7dL, 0xaf4f39d1a7821cL },
+ { 0x23be6e84e64841L, 0xe9e246365bf791L, 0xa3208ac02bfd7cL,
+ 0x231989cd01357dL, 0x79b8aad6422ab4L, 0x57d2b7e91b8564L,
+ 0x28ebbcc8c04421L, 0xdc787d87d09c05L }
+ },
+ {
+ { 0xeb99f626c7bed5L, 0x326b15f39cd0e8L, 0xd9d53dcd860615L,
+ 0xdf636e71bf4205L, 0x1eaa0bf0752209L, 0x17ce69a4744abbL,
+ 0x474572df3ea2fbL, 0xc4f6f73224a7f3L },
+ { 0x7ed86ad63081b4L, 0xcd4cdc74a20afbL, 0x7563831b301b2eL,
+ 0x5b4d2b1e038699L, 0xa15d1fa802a15fL, 0x6687aaf13e9172L,
+ 0x3eccd36ba6da90L, 0x34e829d7474e83L }
+ },
+ {
+ { 0x4cea19b19c9b27L, 0xa14c37a5f52523L, 0x248b16d726625cL,
+ 0x8c40f9f6cabc21L, 0x918470c32a5c65L, 0x314056b2a98d5bL,
+ 0x6c974cf34a0714L, 0x0c8f8a94f6314aL },
+ { 0x484455770bccfdL, 0xf5835db740c9fdL, 0x12e59b5a21407cL,
+ 0xbe338e0db1689dL, 0x5a50ce9dd5e915L, 0xb1780e9ef99f39L,
+ 0x1262b55ee4d833L, 0x4be3f2289c5340L }
+ },
+ {
+ { 0xbb99b906c4b858L, 0xa7724d1550ca53L, 0x7d31f5a826962eL,
+ 0xf239322a5804daL, 0x3e113200275048L, 0xcbb1bb83ee4cb6L,
+ 0xdb865251331191L, 0xb7caf9e7d1d903L },
+ { 0x06e3b0577d7a9dL, 0x7a132b0b3bbbf5L, 0xd61fbc57c50575L,
+ 0x393f712af4b646L, 0xef77972cb7efe9L, 0x20e6d5d5ea4995L,
+ 0x0ac23d4fbbe4c6L, 0x8456617c807f2aL }
+ },
+ {
+ { 0x4995fb35396143L, 0xa8b4bd1b99dc46L, 0x2293e8e4150064L,
+ 0x2f77d4922a3545L, 0xe866b03b2192c4L, 0x58b01f05e0aa38L,
+ 0xe406b232ed246bL, 0x447edb3ed60974L },
+ { 0xf541b338869703L, 0x6959fe0383420aL, 0xd6b39db4be4e48L,
+ 0x048f3b4b5714efL, 0x68b49685d9e4b8L, 0xbda8e6c2177963L,
+ 0x5094e35c4211feL, 0xea591c32d46d1aL }
+ },
+ {
+ { 0x3a768ff2fef780L, 0x4218d2832970c6L, 0xce598e4ec6da17L,
+ 0xf675645fbb126aL, 0xb04c23f0427617L, 0xc9f93fbe4fce74L,
+ 0x44a414b3c91b00L, 0x4d982f31d3b3ccL },
+ { 0xb1d40e8b24cce0L, 0x5a21c07133e73dL, 0x6e9358e0bb589dL,
+ 0x39cfb172399844L, 0x83f7647166080eL, 0xcfe7bf8450b468L,
+ 0x2a288f71e8434fL, 0xd39f1e521a81e3L }
+ },
+},
+{
+ {
+ { 0x78c6f13528af6fL, 0x0001fe294b74d9L, 0xae7742501aab44L,
+ 0x7cbe937ef0039cL, 0xaf3e4f00fa2a67L, 0xe28175fda1378eL,
+ 0x72adeed8ccd90eL, 0x16a8ce100af22fL },
+ { 0x69fae17cbf63ddL, 0x67861729e39e26L, 0xe92b3d5f827a18L,
+ 0x4d75e418403682L, 0x01a4fd99056a79L, 0x89efb2d20008f5L,
+ 0xa2f6918b78ff15L, 0xf41c870a3437f5L }
+ },
+ {
+ { 0xc840ae57be353cL, 0x465a5eb3fb2691L, 0x34a89f07eba833L,
+ 0xf620896013346eL, 0x563b5f0e875df2L, 0x5f7fc8bfbc44ceL,
+ 0x22fcb5acfedf9dL, 0x7cf68d47dc691bL },
+ { 0x37f7c2d76a103fL, 0x728a128fd87b7dL, 0x7db2ad8ccf2132L,
+ 0xa4c13feb100e63L, 0xcd28a517b511d5L, 0xb910280721ca5cL,
+ 0xec1305fd84bd52L, 0xb9646422729791L }
+ },
+ {
+ { 0x83fccdf5bc7462L, 0x01f3ddad6f012fL, 0x57f11713a6a87cL,
+ 0xedb47ceff403acL, 0x6c184e5baab073L, 0x5b17c7d6f0d6a1L,
+ 0x45a4c4f3ef2c91L, 0x26c3f7e86a8f41L },
+ { 0x81a6db0b646514L, 0xf84059fca8b9aeL, 0xd73dab69f02305L,
+ 0x0de3faec4b7c6cL, 0x18abb88696df2fL, 0x45dd1b975d7740L,
+ 0x3aeccc69ee35bcL, 0x478252eb029f88L }
+ },
+ {
+ { 0x66bf85b8b2ce15L, 0x1175425335709dL, 0x00169ef8123874L,
+ 0xfd3c18c9b89868L, 0xb3612f9775204eL, 0x4b8d09dc2cd510L,
+ 0xafa12e614559adL, 0x1ddaa889657493L },
+ { 0x87d700b1e77a08L, 0xaf4cf2f14d2e71L, 0xe00835dbf90c94L,
+ 0xb16a6ec6dc8429L, 0x02a7210f8a4d92L, 0x5a5ab403d0c48dL,
+ 0x0052b3ab5b9beaL, 0x6242739e138f89L }
+ },
+ {
+ { 0x7c215d316b2819L, 0xdacb65efeb9d7aL, 0xc3c569ed833423L,
+ 0xbc08435886a058L, 0x132c4db7e5cb61L, 0x6373a279422affL,
+ 0x43b9d7efca9fc4L, 0xe3319a5dbe465fL },
+ { 0x51d36870b39da7L, 0xcb6d7984b75492L, 0x77eb272eadd87aL,
+ 0xf2fb47de0d3f6cL, 0x807fd86f9f791cL, 0xf01086b975e885L,
+ 0xf9314b5b6a3604L, 0x8cd453867be852L }
+ },
+ {
+ { 0x7c1e6b3858f79bL, 0xf0477c4938caf9L, 0xb311bbf3e88c44L,
+ 0x9234c091e3a3c1L, 0x531af2b95a1d4dL, 0xf3cc969b8d1c64L,
+ 0x6f3c328b51e78dL, 0x5a1bd6c34e8881L },
+ { 0x2e312393a9336fL, 0x020f0cc5ced897L, 0x4b45d7b5fab121L,
+ 0x8068b1c1841210L, 0x1bd85fc8349170L, 0xfe816d80f97fe5L,
+ 0x108981814b84fcL, 0x1d4fabbb93cd48L }
+ },
+ {
+ { 0x1f11d45aef599eL, 0x8d91243b09c58aL, 0xd2eec7bd08c3c3L,
+ 0x5a6039b3b02793L, 0xb27fed58fb2c00L, 0xb5de44de8acf5eL,
+ 0x2c3e0cd6e6c698L, 0x2f96ed4777180dL },
+ { 0x67de8bf96d0e36L, 0xd36a2b6c9b6d65L, 0x8df5d37637d59cL,
+ 0x951899fc8d9878L, 0x0fa090db13fcf8L, 0xa5270811f5c7b4L,
+ 0x56a6560513a37aL, 0xc6f553014dc1feL }
+ },
+ {
+ { 0x7f6def794945d6L, 0x2f52fe38cc8832L, 0x0228ad9a812ff5L,
+ 0xcd282e5bb8478aL, 0xa0bc9afbe91b07L, 0x0360cdc11165e2L,
+ 0xb5240fd7b857e4L, 0x67f1665fa36b08L },
+ { 0x84ce588ad2c93fL, 0x94db722e8ff4c0L, 0xad2edbb489c8a3L,
+ 0x6b2d5b87e5f278L, 0x0265e58d1d0798L, 0xd2c9f264c5589eL,
+ 0xde81f094e4074dL, 0xc539595303089fL }
+ },
+},
+{
+ {
+ { 0x183492f83e882cL, 0x4d58203b5e6c12L, 0x1ac96c3efec20bL,
+ 0xabd5a5be1cd15eL, 0x7e1e242cbbb14bL, 0x9f03f45d0543b3L,
+ 0xc94bc47d678158L, 0x7917be0a446cadL },
+ { 0x53f2be29b37394L, 0x0cb0a6c064cc76L, 0x3a857bcfba3da3L,
+ 0xac86bc580fcb49L, 0x9d5336e30ab146L, 0xafb093d5bc1270L,
+ 0x996689de5c3b6eL, 0x55189faea076baL }
+ },
+ {
+ { 0x99ef986646ce03L, 0xa155f8130e6100L, 0x75bef1729b6b07L,
+ 0xc46f08e1de077bL, 0xf52fdc57ed0526L, 0xe09d98961a299aL,
+ 0x95273297b8e93aL, 0x11255b50acd185L },
+ { 0x57919db4a6acddL, 0x708a5784451d74L, 0x5b0bd01283f7b3L,
+ 0xe82f40cc3d9260L, 0x2ab96ec82bbdc2L, 0x921f680c164d87L,
+ 0xf0f7883c17a6a9L, 0xc366478382a001L }
+ },
+ {
+ { 0x5c9aa072e40791L, 0xf0b72d6a0776bfL, 0x445f9b2eaa50dcL,
+ 0xa929fa96bda47fL, 0x539dc713bbfc49L, 0x4f16dd0006a78bL,
+ 0x331ba3deef39c7L, 0xbfa0a24c34157cL },
+ { 0x0220beb6a3b482L, 0x3164d4d6c43885L, 0xa03bb5dacdea23L,
+ 0xd6b8b5a9d8f450L, 0xd218e65bd208feL, 0x43948ed35c476fL,
+ 0x29a0dd80a2ed2bL, 0xa6ccf3325295b7L }
+ },
+ {
+ { 0xf68f15fac38939L, 0xb3dd5a2f8010c1L, 0xf7ac290a35f141L,
+ 0xdc8f3b27388574L, 0x7ec3de1e95fed2L, 0xc625451257ac7dL,
+ 0x66fc33e664e55aL, 0xd3968d34832ba5L },
+ { 0x980291bc026448L, 0xfcb212524da4a5L, 0xbca7df4827a360L,
+ 0xfcc395c85ca63bL, 0xcf566ec8e9f733L, 0x835ee9bd465f70L,
+ 0xe66d111372f916L, 0xc066cf904d9211L }
+ },
+ {
+ { 0xb9763a38b48818L, 0xa6d23cc4288f96L, 0xe27fcf5ed3a229L,
+ 0x6aebf9cabaff00L, 0xf3375038131cd1L, 0x13ad41dffabd58L,
+ 0x1bee6af861c83bL, 0x274fe969c142e7L },
+ { 0x70ebcc99b84b5bL, 0xe1a57d78191cfcL, 0x46ccd06cbf00b8L,
+ 0xc233e8eefe402dL, 0xb4ab215beebeb3L, 0xb7424eabd14e7bL,
+ 0x351259aa679578L, 0x6d6d01e471d684L }
+ },
+ {
+ { 0x755c465815ae38L, 0xadc3e85611db56L, 0x633999b188dd50L,
+ 0xfdf7509c12d907L, 0x25bcfde238b6afL, 0x50d705d397f5e7L,
+ 0xb65f60b944c974L, 0x8867fc327ac325L },
+ { 0x2edc4413763effL, 0x892c0b3341fb63L, 0xb34b83ab3a7f28L,
+ 0x9aa106d15c2f18L, 0x720bbc61bb2277L, 0x637f72a5cfaefdL,
+ 0xf57db6ef43e565L, 0xceb7c67b58e772L }
+ },
+ {
+ { 0x2793da56ecc1deL, 0x4e1097438f31b2L, 0x4229b4f8781267L,
+ 0xe5d2272dec04a1L, 0x6abb463ec17cffL, 0x28aaa7e0cbb048L,
+ 0x41dc081d22ef85L, 0xcbc361e5e63d0fL },
+ { 0xb78aafcad5dbaaL, 0x0111505fc1edc3L, 0x63ed66d92c7bfaL,
+ 0x2982284e468919L, 0x30f1f21b8c0d8cL, 0xf0567472685093L,
+ 0x0e085b6f03dd0fL, 0xa8c8db85581e66L }
+ },
+ {
+ { 0x42009a6264ad0cL, 0x13bf2b8593bef4L, 0x1d111905d4e8b1L,
+ 0xfe3e940ef7bddcL, 0xa012275624e62cL, 0xcb659241d6d3ccL,
+ 0xc7bcc70edb7ab6L, 0xff9fafbb750b1cL },
+ { 0xf65df297fea84bL, 0x17c84a890b0e02L, 0xa92a859301e821L,
+ 0xbee8cb2fb480d1L, 0x7010b8c59c604eL, 0x47bf3f4e803c43L,
+ 0xd64514247b3fffL, 0xc4c5dcb9f0da13L }
+ },
+},
+{
+ {
+ { 0x8af700cb5253b3L, 0x31ca605206957aL, 0x25744393eafdcdL,
+ 0x2ba5ae1d3ae15eL, 0x710b7385b82579L, 0x145ab57112b95aL,
+ 0x4b133a038c55c5L, 0xf7559c92a16fefL },
+ { 0x70c3e68d9ba896L, 0x475dd32c33d07aL, 0xe084e473a41e40L,
+ 0xddc9382fd2e706L, 0x34b727579510bdL, 0x5e78a69a5f901eL,
+ 0x429dfd7dcfb823L, 0x1d9dc18014f0a3L }
+ },
+ {
+ { 0x364fcdfaf403d7L, 0xd9ea4ffb7d7b34L, 0x21a3426cbb1dacL,
+ 0xfa51052143b4f5L, 0x2bca0736df2409L, 0x7e6985a8ad7285L,
+ 0x3a1a9d04aaa27fL, 0x1a815e19fc0c6cL },
+ { 0xfab6147bb65bb3L, 0xa36dc0d33ced0bL, 0x26a88592062d78L,
+ 0x343861728a5fb7L, 0xe82da254ebb1adL, 0x70f5071d05aa11L,
+ 0x0b7f847adaac48L, 0xeb812bc93cb269L }
+ },
+ {
+ { 0xcb317ccf7cacccL, 0xd3410d9cf85098L, 0xca68c8d7f078d7L,
+ 0xfe9e812b782efcL, 0x32e7c0f5f544b5L, 0x44fe95a3a7b7f2L,
+ 0xf4f1543e91327bL, 0x27d118d76645edL },
+ { 0x690547cd7abc2cL, 0xf64680fb53c8afL, 0xbe0cbe079ea989L,
+ 0x6cf0ccea91af28L, 0xa3b85a29daa2f9L, 0xd4b663c91faed0L,
+ 0x782c7b7a8b20baL, 0xf494fafb8d98ceL }
+ },
+ {
+ { 0x080c0d7002f55aL, 0xf4f8f142d6d9ddL, 0xb326229382f025L,
+ 0x58fd0b5ad28c20L, 0x704b9928d06a15L, 0xf4545d97fbd8e4L,
+ 0xc32fa63ed55581L, 0x3ab793601ac0fdL },
+ { 0x13ece526099fd1L, 0x776dba89c79178L, 0x8d28212ce26c45L,
+ 0x09fddaf60d739cL, 0xf9931eda84826eL, 0x6e73d90b29439eL,
+ 0x94cfefc9095e61L, 0x3050d16802f474L }
+ },
+ {
+ { 0x0898f8f9f6394bL, 0x48b8cea88b0e91L, 0x4bc99254c1b362L,
+ 0xe3fccb4827d9ecL, 0x5d4cf9ad950d6aL, 0xa16f1ef39b5b38L,
+ 0x3c76d1d620f288L, 0x9fdd059e119390L },
+ { 0x7b5de9efb5edf8L, 0x3e290b9769d14eL, 0x4df3a916bd10b5L,
+ 0xae99bca82f8f7bL, 0x5481d5dc9524afL, 0xf112e4f69504f1L,
+ 0xb048f0951931ecL, 0xbff876a18f51b1L }
+ },
+ {
+ { 0x932e2a746c1c37L, 0x903ad529aea4c1L, 0x717ac918f161f2L,
+ 0xa57d197f425e2aL, 0xae89dac7f39e0eL, 0x91655c0baa2a58L,
+ 0xe3dc28654836ddL, 0xb5f0baaa9ec9e6L },
+ { 0xf7c4662bdbda04L, 0xbe5393b51059c0L, 0xb16d552dd95b0fL,
+ 0xde495b31b3bd96L, 0xb2a6e02c0206c5L, 0x045cc09014d3a9L,
+ 0xf66a3152a2f490L, 0x208c108c5dea05L }
+ },
+ {
+ { 0x6e38b6865237eaL, 0x93a13039f27fc6L, 0x9a6d510a95068aL,
+ 0x6fbf216e7c9e54L, 0x7824290571ac1dL, 0x8cb23ba91c2a0cL,
+ 0x611202ec7e434dL, 0x8f901bf76058b4L },
+ { 0xef0ac050849588L, 0xe0d2ddedd31804L, 0xaf5417ceb2ca81L,
+ 0x420ac065d1a509L, 0x46e345e9683bb6L, 0x6daf635f613f7fL,
+ 0xc9e829148a9576L, 0x5f9f1d1176d147L }
+ },
+ {
+ { 0xd24ae1d77e9709L, 0x77751dc0047b8aL, 0xe325334c6a1593L,
+ 0x9baf962671f86aL, 0x425af6ac29a15eL, 0x31086002796e33L,
+ 0xb6ea78cfc253a5L, 0x4c733e0afae0eaL },
+ { 0x4b7443a97c99b9L, 0xc14e9e450203a6L, 0xd1bb51552680baL,
+ 0xa56a3efd55533aL, 0xa66e38c169e1a0L, 0xb3e4df9eed7da0L,
+ 0x022c937ddce3d9L, 0x8552089f6e36b4L }
+ },
+},
+{
+ {
+ { 0x8e4bf95f5cc82eL, 0x2ad80c3c3ed6c9L, 0xf2e5b2cc9045e1L,
+ 0x42c906559b06d4L, 0xc1f73797b43b84L, 0x1710dbf72d7992L,
+ 0xe98cf47767b41cL, 0xe713fce7bfb9e9L },
+ { 0x9f54ae99fa5134L, 0x3002fd8de40d0eL, 0xdc282b79311334L,
+ 0x5519810bfeb360L, 0x31539c70f96ffeL, 0x04eacc0d27777bL,
+ 0x59824108ff5053L, 0x598236632b67adL }
+ },
+ {
+ { 0x6eb45546bea5c2L, 0x82cfae0d509a33L, 0x6a69bd8394bb59L,
+ 0x1880d8d5770ee1L, 0x63518447dacf9eL, 0x5b1ecc5f02b891L,
+ 0xeb7d900b6c9a5aL, 0xdab8a768897da8L },
+ { 0x28c7be598851a6L, 0x0101d4f4d73c3bL, 0x3c2569c5084996L,
+ 0xb9bc911280bde0L, 0x513a22acd0d4f9L, 0xdf2986d2a15f3bL,
+ 0x231c28f2aa4943L, 0x29623ad0333870L }
+ },
+ {
+ { 0x2ceb1784084416L, 0x924cf1c49516cdL, 0x76536c04be856fL,
+ 0x11b59cd47a265bL, 0x720dc844999494L, 0x910f794007b795L,
+ 0x8434e142d3df83L, 0x8f53878bd478d3L },
+ { 0xd9b072eaeb9c2fL, 0x16f87eafd8a29fL, 0x8c42f9b2fd0de1L,
+ 0x916721e0e816efL, 0x2ecb47018bde37L, 0xcde3b7a2375da2L,
+ 0x30d0657ef94281L, 0x51054565cd7af8L }
+ },
+ {
+ { 0x7230b334bdced3L, 0x0c6a3e10838569L, 0xf19c9ece3493b8L,
+ 0xf2759270d97c57L, 0xf14181e0c862ebL, 0xfd3bac132c72bcL,
+ 0x620563ff3be362L, 0x672ccaf47283b7L },
+ { 0x191e3fa2b7bf16L, 0xf838633520dad7L, 0xd3dde553629d87L,
+ 0x14d8836af86ebeL, 0x3db7dfb221b2ceL, 0x3872abb0aed72aL,
+ 0xb60de528c665b7L, 0x89c259644982cbL }
+ },
+ {
+ { 0x799a2de4dbba25L, 0xd818aaea42715eL, 0xbc88f4df55c362L,
+ 0x142a163713c9aeL, 0x411e8eefbfb33fL, 0x34b46296bb684aL,
+ 0x4344becdc81817L, 0xcc9573d17f9d46L },
+ { 0xf85f8bcff38a7dL, 0xa14bf730caf117L, 0x126874f4ba6429L,
+ 0xcc9bf22aa5db97L, 0x62b56df6aba827L, 0xfee1cb89c9772aL,
+ 0xe36838f177e541L, 0x698815dadd438fL }
+ },
+ {
+ { 0xc9fd89438ed1adL, 0x73cd79d7b6a601L, 0x2210e6205e8d20L,
+ 0x72384ac3592af5L, 0x5ccc079763d07eL, 0x2f31a4aa5f79ebL,
+ 0x693f4ed2945a95L, 0xc7120178056fdcL },
+ { 0x361ecd2df4b09aL, 0xa5644eab7d929aL, 0x34abc0b3fabe9aL,
+ 0x1a2473ce942a8cL, 0xe00c9246454bc3L, 0xab324bcdff7366L,
+ 0xe1412f121b8f99L, 0x970b572e33551eL }
+ },
+ {
+ { 0x6ca4cacbd0a6b5L, 0x5584787921d654L, 0x18e5253c809bdaL,
+ 0x01b32c3f0cbe5eL, 0xb9aa7540f987ddL, 0x628f4bb6dfa4dbL,
+ 0x0255f0b891890bL, 0x25b7df4874e590L },
+ { 0xbded3188ed5f95L, 0x9dc428dca93023L, 0xc68f25abccf520L,
+ 0xc4f3764e616e6cL, 0xd9a57f1a1d9993L, 0xd1964a5533431bL,
+ 0x06cd77f02ab6d0L, 0xa66079103e52e0L }
+ },
+ {
+ { 0xab088645f72700L, 0xf77b2ff0a1a44eL, 0x43ebdd8c2a24b5L,
+ 0xa6d67114f564d7L, 0x495df63f414160L, 0xf5bacd776f6de6L,
+ 0x3011aff7c2b43dL, 0xbb1e64c3241928L },
+ { 0xf70c5725034073L, 0x891c62a68f1e97L, 0xed8eb2eb22e374L,
+ 0xd3a53e97dbcc2fL, 0x1d06281dc8f220L, 0x9eef48face4393L,
+ 0x96014f5d2abecdL, 0x1da7e092653cebL }
+ },
+},
+{
+ {
+ { 0x7593318d00bc94L, 0x586f3c6c7262a2L, 0xea68f52958ad31L,
+ 0x6707fccd4e8bedL, 0xb7e35d6cb3f9ceL, 0x2cbb6f7f4b1be8L,
+ 0xa5352687b41aeeL, 0x1d77845f7b39b8L },
+ { 0xb1f3995eaf9554L, 0x3250f70fe9e7d4L, 0x62e5d1ba00c23cL,
+ 0x5e422f5c10e3bfL, 0x7a18039c25cec4L, 0xb4e66a17cc4d5bL,
+ 0xad7c5f636d0e0cL, 0x9f40b12a4cf347L }
+ },
+ {
+ { 0x697f88251e3696L, 0xc89bc40ab0a648L, 0x8f261a59785804L,
+ 0x4c7f900b51a2bdL, 0xd00e7af8a2dfcfL, 0xf9c534db642aebL,
+ 0xea2a79fb63df0eL, 0x392a69af2f64a4L },
+ { 0x0c0f01cc331b6cL, 0x414bf2e6a5edb5L, 0xfe5ed815068391L,
+ 0x0a8078d62fbc34L, 0x78a438254bca98L, 0xf7a49ae3d727c7L,
+ 0x96c1de1ab4dffeL, 0x45901f73b9440aL }
+ },
+ {
+ { 0x3f1189facfe46eL, 0xdca6f464467443L, 0xac385422eb5bcfL,
+ 0xb02dce9906bf72L, 0xdd8cdacfe1d454L, 0xc26f04c65f7218L,
+ 0xb4748596ea145dL, 0xc53dc6b5bdb315L },
+ { 0xbe5be749ad7197L, 0x627e91918b5eccL, 0x57c889c9ea405dL,
+ 0x2e5650c1a5360bL, 0x42290df1b30b27L, 0x4a071575242687L,
+ 0x553ed1fd379133L, 0xb9d7a0701db019L }
+ },
+ {
+ { 0xcfe551c56597dcL, 0x81af92a925ebd6L, 0x83efe16f4e8d57L,
+ 0x61bb4311f640d3L, 0xf80440f78b414aL, 0x72f3c636c9e3b4L,
+ 0xb55f43a6a03c66L, 0x47a9dede417037L },
+ { 0x1a7e287dbb612bL, 0x895c3c7dbb9220L, 0xd50c86e6c04764L,
+ 0xed5269853cf7caL, 0xc78d799f74af55L, 0xb2ba0f2b969ff2L,
+ 0x06d48151c6530bL, 0x764a1fe165a575L }
+ },
+ {
+ { 0x4383a3bc1b5eceL, 0x0563c8854ff148L, 0x9a452795af796eL,
+ 0xffba7c088e9953L, 0xfe9fb5eb6a3001L, 0x795098825b6b19L,
+ 0x67c899ad81be5eL, 0xc89ac8d2f9d29bL },
+ { 0x7c76ba329ab8f7L, 0xb2a18c96e40f74L, 0x1b5056e3864d9bL,
+ 0xdfa503d9b582b8L, 0xfb035197c9c68eL, 0xdc501316b3c22bL,
+ 0x38ab231a6c96ffL, 0x4ea527c8cb1c10L }
+ },
+ {
+ { 0xd632f20c05b4edL, 0xe0199fab2a032dL, 0x373295626812d7L,
+ 0x2aed855013df13L, 0x92ca24b39f96acL, 0x620273dbb9751aL,
+ 0x5d0d21ef7437a1L, 0x9de2a43077de56L },
+ { 0x0569b1211a4674L, 0xfc3923e89c3989L, 0x3d127042c5c770L,
+ 0x0072b9084e8c37L, 0x7178d4dac39f9aL, 0x5f8292f778d345L,
+ 0x9e5bf0f77c7307L, 0x7691610c3a20f5L }
+ },
+ {
+ { 0x7c4ead5705fe96L, 0x377ec35c8e464cL, 0x3e5b9907689954L,
+ 0xc0f6949a2d31eaL, 0x839d395c580671L, 0x2f347a6b215b09L,
+ 0xfdcfa33683df83L, 0x6e12cc26af39a8L },
+ { 0xae46ec813a3bd2L, 0x03a7d3b59366f8L, 0xe2029d5b87aed4L,
+ 0xbdc4e43fe1b83dL, 0x768437cdb8a1a8L, 0xe47acc3ea0dd7fL,
+ 0x550e0cc62a0af4L, 0xcaf2cbc1a20962L }
+ },
+ {
+ { 0x5a784f7f28a78fL, 0x952a9b507e9724L, 0x8ac5e411bab7a3L,
+ 0x1251e3fb7bc1e1L, 0xe360f82dc15e22L, 0x3ac72da95213f5L,
+ 0x65ee9ba4dcd47bL, 0xdfeab7b3af5952L },
+ { 0x34c5c8026fd3c6L, 0xd977b08f3ac7eeL, 0x003bd017dba2f6L,
+ 0xcfc5cf8ac98c8dL, 0x05eb6040e46922L, 0xc248b17faa9352L,
+ 0xfa41c0f395c7a7L, 0x29931d4b71ee44L }
+ },
+},
+{
+ {
+ { 0xac087bb07861c5L, 0x3bd37db5ae8240L, 0x94c68ecf94518fL,
+ 0xd32a378ff88a5bL, 0x42c8aaf9b441d1L, 0x089db70fc07f12L,
+ 0x211c386d3d4455L, 0x1db9af7546b158L },
+ { 0xdfd1b6551bc927L, 0x69c04930733df4L, 0xdc72cd42aeb586L,
+ 0xeebdace823aa13L, 0x51b3b3c56ad643L, 0xb983a99d4e0426L,
+ 0xa1e5b6c69c4eccL, 0x37cd38245e6668L }
+ },
+ {
+ { 0x158ce6d9f73aeaL, 0x36a774914ff475L, 0x0d4e424dc0b018L,
+ 0xc2c44483946f09L, 0x7a7de3ffacda62L, 0x49a19e6b486709L,
+ 0x65094d8db61da7L, 0x09edfd98f5ee87L },
+ { 0xe460fcfb37226dL, 0x3b9d03969bf470L, 0x3d4d511247ca22L,
+ 0xc7248d6c782cb1L, 0x91189a000ad293L, 0x1244942e8abe75L,
+ 0x9f88d12bf52cdbL, 0x368463ebbbcadfL }
+ },
+ {
+ { 0x419e4b38074f45L, 0xd3f8e2e0771c83L, 0xd2743b42e68d34L,
+ 0xc68b7dbb116a00L, 0xfad2cf7d84cc37L, 0xcfd27c0b7a0f4dL,
+ 0x3b9e23f190e587L, 0x7bab499751ca9eL },
+ { 0x3270861a8f12eeL, 0xee1f38d31b36d5L, 0x748bb31e4c0eedL,
+ 0x9be5c9b110ebadL, 0x728660bc8b6cb6L, 0x7bc9df793d914aL,
+ 0x73a4f2cc88c859L, 0xbe4a2fdb4e7f0eL }
+ },
+ {
+ { 0xe566ff8a450e77L, 0xb0b40066a13abaL, 0x483a510cd7dc90L,
+ 0xb1a20135fa9cccL, 0xeb0b631a80e67cL, 0x7c34e1f020801aL,
+ 0x0257dc8f4e447cL, 0x7abe7d174c6f0fL },
+ { 0xf115a3ab19a576L, 0x8f0474a064ca0eL, 0x999bb6b351f99bL,
+ 0x855254b773edc3L, 0x49f6c2f427d717L, 0x9f682532e0cef2L,
+ 0x1fe126c2ee34f5L, 0x1ec2cae80150f7L }
+ },
+ {
+ { 0x862c5afc005b7aL, 0x61adea7ec4ef17L, 0xf885fd3007b446L,
+ 0x25c129d9b0e30eL, 0xbc10f25feec7e0L, 0x3901ac4df79ee1L,
+ 0xad49db7fe9e19fL, 0xc8624d9360d050L },
+ { 0xc74a576bf3260bL, 0xbde80248c010c2L, 0xf15532909b6977L,
+ 0x6a5a82ed52dcf8L, 0x4fbf59d29b9dfcL, 0x337d049c7b730cL,
+ 0xb3deac63a89cd4L, 0x1e07595ad2f2ebL }
+ },
+ {
+ { 0xa0b0a4d3b7c84eL, 0xf132c378cf2b00L, 0x192814beaaa8ecL,
+ 0xe7929f97b4b5dfL, 0xf08a68e42d0ab7L, 0x814afb17b60cddL,
+ 0x78c348c7d9c160L, 0xf8a948844db217L },
+ { 0xcdefd88eaa2578L, 0xf717f56bd0e260L, 0x7754e131694d02L,
+ 0x1254c14181dbd8L, 0x0dacdd26e5f312L, 0xb8abdfbcef87bfL,
+ 0xb985972e74e2eaL, 0x1717621002b424L }
+ },
+ {
+ { 0x92cc75e162df70L, 0x1e20c0618ee849L, 0xc036b4626aa590L,
+ 0x31be67e4da5155L, 0x04911b5f7213b0L, 0x39261d7bb2e72eL,
+ 0x9e844665c015a3L, 0x2f59fc0298ae67L },
+ { 0xa3ea7ba1701fccL, 0x87a5fa90ebd651L, 0xa607ed4301d7b1L,
+ 0xbd4ec5f3b2e271L, 0x732a1a2dc4180fL, 0xbe15d82feaa8c1L,
+ 0x103670266f2f3fL, 0xccfd3979e79ce8L }
+ },
+ {
+ { 0x82ab83570a54adL, 0x5c1dee8e3bec75L, 0xf583ff454b556bL,
+ 0x9220199f461e60L, 0xdf61ca887fc4e7L, 0x6641fd20776dadL,
+ 0x00c6edd8edd061L, 0xaf9b14255f7e87L },
+ { 0x73f15e49bbe3ecL, 0xdd3b788f8bc1faL, 0xb24cc071b8ff86L,
+ 0x6c260d241be58bL, 0xec1c4e36b10adaL, 0xf6b42097fdb985L,
+ 0x0d0ac85d47c212L, 0x967191c07d78d1L }
+ },
+},
+{
+ {
+ { 0x3b11638843d0f3L, 0x4b89297f27f10eL, 0x477236e863ba2aL,
+ 0x1949622add280cL, 0x7cd523504da757L, 0xe0e99d279e4ff7L,
+ 0xb4ef894537da41L, 0xc55dde45a24ff1L },
+ { 0x18d8e21b587521L, 0x8010b5d3777833L, 0x4af522dd3a54c8L,
+ 0x7cd476b4c0ac13L, 0x4587e614099f67L, 0x494d0ed605ee64L,
+ 0x3218ba2cc80903L, 0x5ff56aa0b2e169L }
+ },
+ {
+ { 0x51ec94e3a06c69L, 0xa26d7be5e65c52L, 0x156f113d44ee96L,
+ 0x70f0968bf5b9b4L, 0x9b7e4695f5332dL, 0x36c295f6703829L,
+ 0x1522690d04f492L, 0xcf35ca4728043bL },
+ { 0xf9ca3e1190a7c3L, 0x53d2413f971b07L, 0xae596529c48b49L,
+ 0x74672b8fefff5cL, 0x0a3018ba7643b0L, 0x51919e83e9b0a8L,
+ 0x89ad33dc932fb5L, 0x52a4419643e687L }
+ },
+ {
+ { 0x7778990d2d0acdL, 0x3bdbcce487fdf1L, 0xdc413ca2b03dd2L,
+ 0x278755b9a2b7d0L, 0x4ebb8b535ddd7fL, 0x0465152bcbdb92L,
+ 0x34f22d6671d051L, 0x1ba04c787192b9L },
+ { 0xb1693f483560c1L, 0xe08a5937d174e9L, 0x47ffdc464dc9afL,
+ 0x1123596ce8126cL, 0x632d95f1124628L, 0x66287abfee7c76L,
+ 0xb40fe60c552332L, 0x3f11729e304e1eL }
+ },
+ {
+ { 0x97a6ea05030a8cL, 0x692419809c27b2L, 0x3308501ac9dd5dL,
+ 0x9fed7fabe73fdcL, 0xea555440535286L, 0xc7c07ab6c9b832L,
+ 0x178c882c51b967L, 0x6fa0c6986ee075L },
+ { 0xbaa4a15b8b5c4aL, 0xf83c0ea3130c0aL, 0xcf8624b2800331L,
+ 0xade85cd7ccbcb8L, 0x971d7f6f08445dL, 0xfd480b76a546dcL,
+ 0xdc15a38c93761cL, 0xc4c495c9d04631L }
+ },
+ {
+ { 0x5f4cee89470efeL, 0x9fe896188d93adL, 0x24783b3f4e49ceL,
+ 0x1bc7ed752ffb3eL, 0xa3abe6a6d81e17L, 0xd6bb8b47a333c3L,
+ 0x3485c0b10a3527L, 0x7cddc9c31a9d10L },
+ { 0x0c78112c38ca37L, 0x10e249ddd2f8d8L, 0x72c88ccc511911L,
+ 0x4d75b5a29a6c84L, 0xc74b267a227b1eL, 0x698390cf8e35adL,
+ 0x8f27edfe98d230L, 0xec922f26bdc7f4L }
+ },
+ {
+ { 0xac34023fc32e11L, 0xe0ae2f547200d1L, 0xa7c7492bd98c82L,
+ 0x3910b687b02154L, 0x6fdd06ce28ab6dL, 0xd3a7e49d98b012L,
+ 0x4c1c82b9f54207L, 0xef5bbe645c176fL },
+ { 0x3d17960d3e71ebL, 0x90d7e84080e70cL, 0x83e6438bff5d9eL,
+ 0x1877e1f535d85cL, 0x931ed6efbb69ccL, 0xcf962651247848L,
+ 0x76d618b750da4eL, 0xc076708717fbf6L }
+ },
+ {
+ { 0x80a5ac5eec5126L, 0x6d05dd13379c80L, 0x514b0892336d32L,
+ 0x586c0066725137L, 0xab2365a574f954L, 0x3c89ea0ac7d356L,
+ 0xf1f2edd27460baL, 0xf200ddbab9870fL },
+ { 0xc8f1b2ca35e885L, 0x5d22f86e6e7550L, 0x24b9a409554615L,
+ 0xcb41107616314fL, 0xca752f0c976a11L, 0x3e2f839a08291aL,
+ 0x0cff22ff2c420eL, 0xafd603e82b9747L }
+ },
+ {
+ { 0xaddeddc810a3daL, 0x78b6c2dd3a87bfL, 0xbc7020bde3a04cL,
+ 0x47ab9739b6d045L, 0x3b046d60959358L, 0x0f953e7509ee3eL,
+ 0x803dc8669fc61bL, 0xcceaec0893c8d4L },
+ { 0x21f8c40b048a45L, 0xb535073fcaea8aL, 0xe712c3590e360bL,
+ 0x5d0f3f48403338L, 0xe0ea26c7207f2dL, 0x20f6b57ffd9e05L,
+ 0xb97d68e4788b00L, 0xb1215541889cceL }
+ },
+},
+{
+ {
+ { 0x0079817464238eL, 0x21103020d381caL, 0x1cc4c6ed9f01b5L,
+ 0x5e35dc55a131b1L, 0xb61848d06944ebL, 0x83792a029631a3L,
+ 0xbe1017fafca0ddL, 0x70aaa01782fcbbL },
+ { 0xc63b7a099945e7L, 0xe9164ecc4486c1L, 0xb133e35885f2c1L,
+ 0x186f0d3c99ae02L, 0x2fca4922bf53e6L, 0xf922aa248a02bcL,
+ 0x4fe64900dd3dcaL, 0xe8c313ff6a8207L }
+ },
+ {
+ { 0xc5b358397caf1eL, 0xa001922922a4b6L, 0x67e36bedf07c95L,
+ 0xabaa0aeb2f4f34L, 0x66dc926dedc333L, 0x82021c438ec5b3L,
+ 0x82b4f2600ab176L, 0x1b7c22e69c45afL },
+ { 0x07b0dbe0924ad9L, 0xe030936a407ddeL, 0x66e1ce926ccd06L,
+ 0xb50c108e3505a9L, 0x8b921e1da98f51L, 0x449ca1a20cf7c7L,
+ 0xadb80c7e67d079L, 0x205aa54834372dL }
+ },
+ {
+ { 0x1482b4819bf847L, 0xd6c16ab5906f0fL, 0x323fb1723ad060L,
+ 0x0346389c832be7L, 0xe71b2d82ee45bfL, 0x761c37dfb22276L,
+ 0xa9b33345d70be2L, 0x81a06565a0627aL },
+ { 0x337750399a6282L, 0xafc8d2ed0436f0L, 0x22f71d3c53342fL,
+ 0x66ca56d8939ad3L, 0x15a919230e09baL, 0x261091ea6de890L,
+ 0x609d700e78f2d5L, 0x8aa52ee8eaaf78L }
+ },
+ {
+ { 0xa398788ce76258L, 0x3031d07494b975L, 0x4a6d652043dfe2L,
+ 0xdb1a849b4401ecL, 0xf81ebbbce8bbccL, 0x937dd4716efe9eL,
+ 0x9c19350ef85eccL, 0x260d932214273bL },
+ { 0x1d7e21e77bf1a3L, 0x199d689a544eb7L, 0x9da594194ced50L,
+ 0x71a60be8a0aeaaL, 0x183a0ae26d3b51L, 0x49f176a8df9728L,
+ 0x744376e3230674L, 0xb2cb21ae25541cL }
+ },
+ {
+ { 0x7a721589a0071fL, 0xe19dd29e7d2a6bL, 0x3deb34e55113f0L,
+ 0xef1f8ebede573bL, 0xa8f7ff95665e37L, 0xa2c21eaf2d7777L,
+ 0x1387afa91e2e39L, 0x04057b97db68f6L },
+ { 0x8b9d5ae1c241f7L, 0x689588a8e75993L, 0x79585b45c0e2d4L,
+ 0xba1ef167b64974L, 0x72685bc1c08a75L, 0xf0a5814d572eddL,
+ 0x71464a35ab0e70L, 0xc93c92b339aea7L }
+ },
+ {
+ { 0x1917e2a5b8a87dL, 0xea5db763a82756L, 0x5bba2fb6420e2bL,
+ 0x5cc0501019372aL, 0xb1ef8beccc5efdL, 0xaf06393f49c57dL,
+ 0x3ab1adf87a0bc4L, 0x2ee4cca34fe6b6L },
+ { 0xd1606686b8ba9bL, 0xef137d97efec13L, 0x7b6046550abb76L,
+ 0xb40ec2bf753a00L, 0x696ed22eaf8f1dL, 0x398c91fd8ba3d8L,
+ 0x11f203437db313L, 0xe1ec33bfe5079eL }
+ },
+ {
+ { 0x8a10c00bdc81f0L, 0x5f392566fe8e05L, 0xa595dab14a368eL,
+ 0x32b318138cec6bL, 0xd77afde1b00d00L, 0x3c979284d9923dL,
+ 0x78f0e7a76e13ddL, 0x5ee8e59bf75675L },
+ { 0x49ec89391b130cL, 0x9416182a47a441L, 0x54555b576e2ce8L,
+ 0xcbdd2fd349c40bL, 0x10ae7379392bbeL, 0x270b1112e2dab0L,
+ 0x5cb7712af293f4L, 0xfc22a33d6095c6L }
+ },
+ {
+ { 0xdcb5bbd0f15878L, 0xbcf27adb6bba48L, 0x979913e7b70ebaL,
+ 0x4c0f34b158578aL, 0x53f59a76ed6088L, 0x19b3b2c75b0fc2L,
+ 0xad628dc0153f3cL, 0x5195a2bcec1607L },
+ { 0x95f8b84dfe0f7aL, 0x935c6b0152920bL, 0x25f9e314da1056L,
+ 0x4910a94b28c229L, 0x54b03b48ee4d6eL, 0xc991fc3694e3edL,
+ 0x68c4c26dbe5709L, 0xc9cfce463d7657L }
+ },
+},
+{
+ {
+ { 0x21c9227f52a44eL, 0x7f105a2e85bfbdL, 0x887781f6268fc2L,
+ 0x56ee808a2d7e35L, 0x14f9de52d3930fL, 0x4a4e356dcb561aL,
+ 0x87362267f95598L, 0x211c3425f34151L },
+ { 0x8fcb75b0eaf9cbL, 0xcc9edf93d60ce2L, 0x54412c9a5fe627L,
+ 0x6036a72842dd09L, 0x71ce668a6c6099L, 0x02b30d75386764L,
+ 0xb69bed36f18e23L, 0x124c9b1d1de9f4L }
+ },
+ {
+ { 0xe8f8d95e69b531L, 0xe1e115eaff1049L, 0x9087cd1eddea0cL,
+ 0x8ed55a57449916L, 0x8009f547808404L, 0x990f21617fea55L,
+ 0x68ba624fe8ecf9L, 0x8ac295056d1f47L },
+ { 0x3257887529dfb0L, 0xc4a613f244c080L, 0xabb1ac028672faL,
+ 0xb2915c531eb291L, 0x6e368ca8fababaL, 0x6b8c2591fde498L,
+ 0x67724a1f2a548cL, 0x6b3b7e8f90409bL }
+ },
+ {
+ { 0x5415003fae20aaL, 0x95858a985df5ceL, 0x42bc9870ac6beeL,
+ 0x8d843c539ea1a9L, 0x5de200cb571043L, 0x084fcd51741a33L,
+ 0xe1ca20c0009d1cL, 0x0271d28e957e6dL },
+ { 0x84cbf809e3be55L, 0xc804dda1c578c6L, 0xea85489409a93aL,
+ 0x64a450a972021dL, 0xc6a2161e681312L, 0x280bff965bc111L,
+ 0xd358a4b0f8526fL, 0xd967be8953a3abL }
+ },
+ {
+ { 0x4c5e6157dd066cL, 0x37afd33634c8d4L, 0xa3ac88a42d8b87L,
+ 0x9681e9b938b607L, 0x7a286ab37fe4c8L, 0xdeee5742494245L,
+ 0x184b9d36af75a8L, 0x20f696a3670c04L },
+ { 0x1340adfa39e8b9L, 0x03c19290850b2eL, 0x435ebd42c0e1efL,
+ 0x49de18b142ee9bL, 0xb440b273f116f2L, 0xd94e9fa2214463L,
+ 0x1b0ddd36311543L, 0x1ae042a991ba3cL }
+ },
+ {
+ { 0xbc322f85bb47aaL, 0x9e2562554a5845L, 0x96b65ae21115f3L,
+ 0x46fbed4bb5757bL, 0x18aec4f4c42dceL, 0xc59caf68d801f0L,
+ 0x91894631205521L, 0x66bd8e089feb7aL },
+ { 0x39ebe95c529ee7L, 0x28d89928eadb99L, 0x6058c786927544L,
+ 0x877e7a5d3808ecL, 0x8f651111c52eafL, 0xfb59812ae221cdL,
+ 0x22289c6f890391L, 0xa97695b4966e92L }
+ },
+ {
+ { 0xf0a91226ff10f0L, 0x49a931ba2a65c8L, 0x3fcebbcb1d3cb0L,
+ 0x70eb79bca9685fL, 0x82520b5ab38cb6L, 0xccf991b76304c3L,
+ 0x575aab1af8b07cL, 0xec8166a5ed5efbL },
+ { 0xddc5698c8689b1L, 0x227c949b2e78d7L, 0x61323218e07d91L,
+ 0x658a11d22cfd62L, 0x908fb44004dd5fL, 0xe3d14f090d21b1L,
+ 0x6f3db9da6a1639L, 0x09d86c0333a525L }
+ },
+ {
+ { 0xd83eaf06f043f7L, 0x88ab648b52d5f6L, 0x67c664d57144d7L,
+ 0x55d7644eafc8b5L, 0x1c89f20cceb291L, 0x51aec7b831ac47L,
+ 0x51172fa6148854L, 0x8fabf7ef6d7bfeL },
+ { 0x5910316477ee27L, 0x5f299dd20fe61eL, 0x48079a842826abL,
+ 0xf4a83ba22591faL, 0x8fac66055482ecL, 0x48fd5f16b65b3bL,
+ 0x4288a7c9fd9e19L, 0x27db8199377894L }
+ },
+ {
+ { 0x2936ee47fd9dd6L, 0xcce5f0e9ec87c6L, 0x15a50e3db6e3b4L,
+ 0x61df105ad701c8L, 0x3601add1dff1f7L, 0xb761e06e8a16e1L,
+ 0x4341e021af3f91L, 0x9156a4a933fa3fL },
+ { 0x9dc46ae54bc01dL, 0x605577a64eb910L, 0x22b99f85a59a99L,
+ 0xab2dbaf0a229d8L, 0xa8bfb656599364L, 0x39ed4a5e94ebf0L,
+ 0x7b46a1e0dbb23eL, 0x117b1958751422L }
+ },
+},
+{
+ {
+ { 0xd19e8fd423bddfL, 0x9d77042387ef59L, 0x315cbdd849590aL,
+ 0xfdc637c7866c1eL, 0x72be83d03515a6L, 0xd44a4a00376780L,
+ 0x3b9613119e0c2bL, 0x023aca37b1a689L },
+ { 0xf5f368782282eaL, 0x44710898a8b5c7L, 0xcd2f00a17a3066L,
+ 0x754e11281ed681L, 0x9c6c70c0bfcefdL, 0xd6aced03b6f29bL,
+ 0xe443d562817a2aL, 0xe590ef4e7c0012L }
+ },
+ {
+ { 0xc2f96763e62e2aL, 0x661816eb2daa26L, 0x3515fd2dd5f512L,
+ 0xdc36e2756b6e75L, 0x0bdde4674cc658L, 0x102908600e7644L,
+ 0xfdf00451694a09L, 0x454bcb6ceac169L },
+ { 0xf4c92ab6481eb6L, 0x8b77afa09750e7L, 0xe6f42316362d6dL,
+ 0x0d45deef53a3aeL, 0xdac7aacd7dcf98L, 0x628cb7f125ec4aL,
+ 0x41e8a20aec0320L, 0x7418c7eea2e35bL }
+ },
+ {
+ { 0x4d649abdf40519L, 0x8cb22d43525833L, 0x15f6d137a5333fL,
+ 0x8c3991b72c23eeL, 0x248b9a50cd44a3L, 0x6b4c4e0ccc1a75L,
+ 0x3221efb15c99a9L, 0x236d5040a9c504L },
+ { 0x401c7fbd559100L, 0xcf0e07507c524dL, 0x39647c034a9275L,
+ 0x2355422f7e8683L, 0x3e0a16eb3ae670L, 0x1c83bcbad61b7fL,
+ 0x491bcb19ca6cbeL, 0xe668dc45e29458L }
+ },
+ {
+ { 0xe44c65b219379eL, 0x211381bbb607eeL, 0xd4c7428b7bc6dbL,
+ 0xba62a03b76a2e8L, 0xe1729c98bb0b31L, 0x3caeb50c6bbc10L,
+ 0x6c66727b0187aaL, 0xbf9d2f0fb90dcfL },
+ { 0xec693501184dc6L, 0xd58d2a32698eb5L, 0xb366d8da316b07L,
+ 0xe1e39bb251c017L, 0xbe44ba9adb157fL, 0xbaa9a9a8a8b06cL,
+ 0xd0f46356e473e1L, 0xd25a8f61d681c6L }
+ },
+ {
+ { 0xba39d5fcb102c7L, 0x66eba21d8aa1ebL, 0xcc2591a697fbf4L,
+ 0x5adb5792317f54L, 0xa01ae71f76c6f9L, 0x2c525de5042705L,
+ 0xc8f42724f4479fL, 0x26ab54ae6d7a5bL },
+ { 0xda217b5dc28106L, 0xc7cadeaeb2ae6aL, 0x0b1609453ea3b2L,
+ 0xcddcc1ccc6111bL, 0x5c47affa7a7bebL, 0xf9931bd0e52dabL,
+ 0x5231835c6dcf96L, 0x7095bdef27ea4eL }
+ },
+ {
+ { 0xee8adaec33b4e2L, 0x300665163ceb44L, 0xf1476fb880b086L,
+ 0x07033289569ce8L, 0x2cabf9a238b595L, 0x85017bc26c8158L,
+ 0x420b5b568d5144L, 0xa9f5f1ef9c696fL },
+ { 0x1409c3ac8fec5aL, 0x541516f28e9579L, 0x06573f70e1f446L,
+ 0x3e3c7062311b96L, 0x0033f1a3c2ffd8L, 0x8e808fcca6711cL,
+ 0x716752d07aef98L, 0x5e53e9a92525b3L }
+ },
+ {
+ { 0xce98a425a1c29fL, 0xaa703483ca6dc9L, 0xe77d822edfa48bL,
+ 0xd2e3455068abcaL, 0xb456e81482cfcaL, 0xc5aa9817fbfb08L,
+ 0x8979f258243194L, 0x727f2172cd043dL },
+ { 0x7cca616aa53923L, 0x387c5aee9bcb72L, 0x0173fd437580bbL,
+ 0xdd7795b75fc0d9L, 0x47d1c37345deaeL, 0x2eb5d7fb0d1c03L,
+ 0xf7a1b92958f002L, 0x7365cf48f61b67L }
+ },
+ {
+ { 0x4b22c3b562a5edL, 0x711216f5c7cd07L, 0x51f72c49ba0648L,
+ 0xc10d0930de9e6fL, 0xaca479bfda63baL, 0x4722a55af532b0L,
+ 0x8d59eb77236f39L, 0x5cad8744465c34L },
+ { 0xa2119e5722b0c1L, 0xb670264f343ea4L, 0x6910f02c19f387L,
+ 0xcfec5bc0381fbaL, 0x5f5de0d52c0a1dL, 0x4e474d56378cb6L,
+ 0x2fc802727e2ba3L, 0xa215da3159b541L }
+ },
+},
+{
+ {
+ { 0xed535858499895L, 0xa0aefd565c998dL, 0x210d8502d5a561L,
+ 0xc2cc23ca2cd9d6L, 0x2371d46c4d297eL, 0x88b2143d18d441L,
+ 0xbebdad9043993dL, 0x6ba91e7ad5f28dL },
+ { 0xc2bb3f13a731f4L, 0xd35cfac5d0d5c3L, 0x995099835ac427L,
+ 0x8938bb55458adbL, 0x0bd738cab26f3bL, 0x56db3d5a28cd8dL,
+ 0x87eb95fa1d8b4bL, 0xd6700efe7f3b4bL }
+ },
+ {
+ { 0x962c920ea1e57bL, 0xd3be37e6dded6dL, 0xf499b622c96a73L,
+ 0x3eaf7b46c99752L, 0xa310c89025590bL, 0x535aa4a721db23L,
+ 0x56ab57819714a0L, 0xeecb4fad4048c1L },
+ { 0x7b79ec4470c466L, 0xc4e8f2e1383ceeL, 0x0f5d7765750c45L,
+ 0xa3b3bc3725527dL, 0x2f5deb66d00cceL, 0x5d5a0f495a8d81L,
+ 0x50a442ee02b824L, 0xafb04462a11628L }
+ },
+ {
+ { 0x72b67bc0c613deL, 0x0150d4be6f0b24L, 0x847854e8ed289dL,
+ 0xe08292fa320f88L, 0xd5b6da329c6160L, 0x2a48e2d4fb9d06L,
+ 0x55d9e412de087cL, 0x65683b54f02100L },
+ { 0x4dc8c2ea8886c6L, 0xe966dd220d6114L, 0x99745eba57af97L,
+ 0x23a9a71b854725L, 0x8effe05621a047L, 0xf16d284049a4beL,
+ 0x95828c25b0660fL, 0xd5b69ba56e96b0L }
+ },
+ {
+ { 0x0b5b4244ffa0b8L, 0x0585b45096cc5eL, 0x413e1aef505d37L,
+ 0xe5652a30c7ab8dL, 0xab32fb72990120L, 0x6b8b16e3f09368L,
+ 0xbf9fadbefe128eL, 0x85f366b14b7671L },
+ { 0xcb2f294090608dL, 0x25e2769ac3045fL, 0x069c4f06131904L,
+ 0x1c57cf1329a779L, 0x72fe0d5b7cace7L, 0x04d9f430897a45L,
+ 0xbaf32f6359a645L, 0x0fa854ffa7485aL }
+ },
+ {
+ { 0xae3533c5f56f60L, 0x9773bbb0ad9360L, 0x769b34a38fbe6bL,
+ 0xb5ba8e9ffb0c00L, 0xa93931875472e4L, 0x12cac92ce5f30fL,
+ 0x514fc06a9e7dbcL, 0xd7ca86558b4734L },
+ { 0xd101ff365a730bL, 0x92da451abe70e9L, 0xfb5f94aef7bf4bL,
+ 0x8c3ef4c1d56c7bL, 0xb0857668435c10L, 0x7fbbbdae7ed4ccL,
+ 0x1da6eaf24f372fL, 0x0ab2c1f59b8ae3L }
+ },
+ {
+ { 0x63a1a78f10a4b9L, 0xbb5278d0c7e510L, 0x97b224ef874142L,
+ 0x0a9ff52b2517b1L, 0x1b5a485c5cd920L, 0x1a8e2eba1823b9L,
+ 0x2b088c00e914a8L, 0xe5ec3adcf13432L },
+ { 0x0d6ab3e6e7e253L, 0x9f0f5cd6f18458L, 0x839a744f459a6dL,
+ 0xb4b4f941eb15f7L, 0xe0313acc72cb14L, 0x58ee933b20472dL,
+ 0x5f73d7a872543eL, 0xb1700c5501f067L }
+ },
+ {
+ { 0xb70428e085f67fL, 0x5441d5143cabe5L, 0x4d0e8c2e0a6055L,
+ 0x8d39a080882e4fL, 0x615bb32c1cb39dL, 0x113f18df7a1642L,
+ 0xbab8cf5250681fL, 0x3017ba2677b72aL },
+ { 0xcd2b6e95a3a876L, 0x04765012035a69L, 0x31d6440efa2ea0L,
+ 0xde8f8d156874d5L, 0xcbc71cd0199d4aL, 0xc546b61e7f2170L,
+ 0x4e57e4e112c4c3L, 0x58955a8d1622baL }
+ },
+ {
+ { 0x0064cd704e2f6fL, 0xe9d458de0edd38L, 0xeb1a5977e0a5c8L,
+ 0xe322ece01fc0a8L, 0x8b9d1661032a19L, 0x3e7b539a89de94L,
+ 0xfa30262001c754L, 0xe33de4ddb588f6L },
+ { 0x4dafbdb954eb94L, 0xbb436480584c1bL, 0x622c93e5dbe29bL,
+ 0x968f9e3f57b931L, 0x98f03be0f6453bL, 0xb0ecc7f08f696cL,
+ 0x5af55f4a505335L, 0x028533efb3fa9bL }
+ },
+},
+{
+ {
+ { 0x3bc8e6827e8d86L, 0x4e43b3063f105aL, 0x5301b7d4981250L,
+ 0x8b0a75e9f72fa8L, 0x88f59db357348cL, 0x5f0ebb1ec4208eL,
+ 0x4712561c043d3bL, 0x9e5ded0c806b97L },
+ { 0xf9bd0a62121d09L, 0x1759ecbe337cd1L, 0xd1acc0ee945542L,
+ 0x3683febbd2f63aL, 0x44f1bccda5dfe9L, 0xa3606c9707f22fL,
+ 0x45ef0642d96ca5L, 0xfc3107d9022df9L }
+ },
+ {
+ { 0xe81320b44be755L, 0xdf213d55c7c761L, 0xf43d2d5b4e5db9L,
+ 0x3bcfd828dedcd2L, 0xdf368a6d37a9ecL, 0xfef20aef475a77L,
+ 0x22f5894162c064L, 0x956bc660142a7dL },
+ { 0xaaa10e27daec78L, 0x3cb9b72b6e9a78L, 0xa740bade383f72L,
+ 0xc31b4017759007L, 0xdada964a7afc50L, 0x6bf062cfd3d11fL,
+ 0x9470d535db3679L, 0x339447303abf13L }
+ },
+ {
+ { 0x533f44046e5d7fL, 0xd1793e349048c8L, 0x59e11501929b94L,
+ 0xcddbbcb8364134L, 0x795c794582774fL, 0x114dfc4e03081aL,
+ 0x541ef68ef54042L, 0x159295b23f18cdL },
+ { 0xfb7e2ba48a2c8cL, 0xe2d4572bb6d116L, 0x7bb0b22d750b53L,
+ 0xc58888cd142ee8L, 0xd11537a90c9e2dL, 0x77d5858d02eb9eL,
+ 0x1fa4c75d444a79L, 0xf19b2d3d58a68dL }
+ },
+ {
+ { 0x37e5b73eb8b90fL, 0x3737f7a3f2a963L, 0x87913fa9de35e0L,
+ 0xec7f9928731eddL, 0x6e6259e219491eL, 0xb2148a04de236cL,
+ 0x89700e8fdd309bL, 0x9ce51e49f0bf80L },
+ { 0xe7ec421301f17bL, 0xa4b570a3bc5f4fL, 0xc2b1b2a1285ee2L,
+ 0x5e86bc8c53db73L, 0xb65fceaf24fa90L, 0x9e74c5608ab024L,
+ 0x5c8003df9ed877L, 0xa632e9e4a2cbbcL }
+ },
+ {
+ { 0x32a4546c91c8b5L, 0xc122b5ac969363L, 0xbbbec5e3648b3aL,
+ 0xd5a365e25143b0L, 0xcf3e46454157ceL, 0x9712f04f9bab64L,
+ 0xc12d43a04b4008L, 0x51932d72edf1c7L },
+ { 0xaef1655b2f8470L, 0xaa8e3f36c24aceL, 0x7da75da6b4e761L,
+ 0xd371827b90bca2L, 0x84db4500afb45cL, 0xae12045ef46b5dL,
+ 0x91639a5d962f98L, 0x669cbe672f2ac0L }
+ },
+ {
+ { 0x851bb3183a4356L, 0x7d436bf9a1bf15L, 0x46a3f0e120b378L,
+ 0x9302abc3f5b357L, 0x1e0672693fef53L, 0xb12f4a95fd2ee9L,
+ 0x94a884c7de9433L, 0x2645234a6f2874L },
+ { 0x6fb56f5cdb8dfaL, 0x4a17dfc9e0ee4eL, 0xe269d8383ab01eL,
+ 0xda932dab77c10fL, 0x463af0c0321243L, 0xbe1d68216fc8a3L,
+ 0x2eae3ea48b39e3L, 0x94230213b03e7bL }
+ },
+ {
+ { 0xaeb507cb22f28aL, 0xa77458b49a6b44L, 0x232ed5ac03dc17L,
+ 0x79dfc169c61ac6L, 0x7c48be9cd71b93L, 0x983d68ac429cd9L,
+ 0x7709c4798ae2c8L, 0xe4765c0a5df075L },
+ { 0x23c4deb3367f33L, 0xbdf2b7e37d72a7L, 0xbaab5c70af2d26L,
+ 0xd609f7ffd026abL, 0x23b72b2541b039L, 0x8d06bac83be852L,
+ 0x911d4a9cb23d1cL, 0xeae815cfb0dbd7L }
+ },
+ {
+ { 0x487c35c2c33481L, 0xffab636b6136dbL, 0xccd4daea3d3aa4L,
+ 0x87149bbc3704e0L, 0x9de8119c0e8396L, 0xd49357a58e7ca6L,
+ 0x68789181562d75L, 0xc7453815ab1fadL },
+ { 0x0f1579802c9b91L, 0x7ffc3f0b1ddde5L, 0xa01d5e06aae50dL,
+ 0x6a97e65e279873L, 0x4bcf42fb5b1b41L, 0x1c6410f32f5982L,
+ 0xd4f760050701c8L, 0xff02663873b90dL }
+ },
+},
+{
+ {
+ { 0xdc53ea2e5b2de2L, 0x94b352d38acecbL, 0x37d960b0d9d5e5L,
+ 0xabd868f90bd997L, 0x781668f35a7376L, 0x043d59710118bfL,
+ 0xd4da719f57928aL, 0x01942f6983e46cL },
+ { 0xab97fc8728bd76L, 0x825956b4b5c1c5L, 0x202809fc82a104L,
+ 0xdb63e9cc8e3132L, 0xa41c701c2181afL, 0xd28018043e066aL,
+ 0xc734e4124044ceL, 0x4d9ab23505193cL }
+ },
+ {
+ { 0x0bcd42af9f0c3fL, 0xda21a46b94a218L, 0xe55243c0ffc788L,
+ 0x318aae647a5551L, 0x8c2938b79af9cbL, 0x5d15232ec1dce5L,
+ 0x3d310ba8ad2e5cL, 0xd3d972494f792aL },
+ { 0xdeb4ca112a9553L, 0x2f1ed04eb54d9dL, 0xaa9c9cf69fb7a1L,
+ 0xeb73c3a54dcd3aL, 0xee3eddcf5f201fL, 0x35f9e1cba7d234L,
+ 0x1d1d04cd2e242fL, 0x48df9d80df7515L }
+ },
+ {
+ { 0x4ecc77da81dd9aL, 0xa6ac4bb03aa015L, 0x7645842bbc4fedL,
+ 0x9ae34cd9d6cf52L, 0xf8ff0335917e0bL, 0x7c9da37c2cc175L,
+ 0x1e74dccaaacfbeL, 0xa8f2df07999af8L },
+ { 0xd06c4ea102a466L, 0x2156e87ae190ddL, 0xc95db8aec4a863L,
+ 0x49edffd244a6feL, 0x110fae6904f81eL, 0xbaa3e50a1cd104L,
+ 0x5bd38a20478b65L, 0x2b57d05daefbccL }
+ },
+ {
+ { 0x1ce92ba86f4534L, 0xb2a8592414f5e3L, 0xdd7a4c69979436L,
+ 0x7599aff3f0add7L, 0xe0ce4d3e2d4f64L, 0x74475cc401a29fL,
+ 0xaef6541a2377d9L, 0x54048f53f917b6L },
+ { 0x1b86b2205312ecL, 0x779ba2231493cbL, 0xc718369aac9320L,
+ 0xeab01a8617fce4L, 0x17b1f10f7187faL, 0xe68eda0a1aca46L,
+ 0x61033fe2586342L, 0xfc14e790b6ca43L }
+ },
+ {
+ { 0x9f2231913d2491L, 0x66bdb537997202L, 0x0bafb0c4617f34L,
+ 0x5917831f3bb7b3L, 0x6feb2a6b45bddbL, 0x08662b30202c19L,
+ 0x0bc2b5705852f6L, 0x2c00fd491818c2L },
+ { 0xca7672cda37dacL, 0xfe4c04c5a30865L, 0x5f1399f322e92aL,
+ 0xe7d67ea25b1bebL, 0xe08b014dce7f68L, 0x24df52af2f2b3cL,
+ 0x2028b23750ecd1L, 0x9b25d4bc810a45L }
+ },
+ {
+ { 0xa35b7157a9d799L, 0x6da1eb301f9c99L, 0x33ef91ce363ba8L,
+ 0x21c0e2ece140daL, 0xb0b11bf158cd84L, 0x6a8744293da438L,
+ 0x924f10d3db585bL, 0xf5ddd7310c6159L },
+ { 0xb72dcb86a74c21L, 0x6d14198cc8f79fL, 0x99f4b6c9c5a8d6L,
+ 0x063968890e135cL, 0x330edb883f6385L, 0xe1a5a6b9079675L,
+ 0x6e37fa8b8f5fe0L, 0x60e2fd961dca1eL }
+ },
+ {
+ { 0xc6cb40366c395eL, 0x03b21a7b51d0f1L, 0xbc478a5e693181L,
+ 0x0017c2fc6cff33L, 0x740a5b839d8d1eL, 0x3968d664d9ec6dL,
+ 0xfd53738b0ef1b0L, 0x73ca8fd1ed0a04L },
+ { 0x4ace93875ab371L, 0xd602936ddad7e9L, 0x1f5424a750bcc2L,
+ 0xfe09b3668c7a17L, 0x165f7de58341ecL, 0x95b825a6ce61e5L,
+ 0x9d31e1966c83c4L, 0x65b3e08cc5887bL }
+ },
+ {
+ { 0xd37e93221482d1L, 0x9af659708b6380L, 0x279426a7d61e4bL,
+ 0x80dd0ec80997adL, 0x7239b0dd5b76d4L, 0x92e6c73e76c098L,
+ 0xeeb2321eab3e1dL, 0xa69c4a7eb1a910L },
+ { 0x46d6aa7833d9aeL, 0x3ee6957572b0feL, 0x44ccbedcdb3d97L,
+ 0x342f29dcbea01bL, 0x0d518c58926876L, 0xaaabae75585d2cL,
+ 0xc548c77e008f58L, 0x819e2fa21fab2cL }
+ },
+},
+{
+ {
+ { 0x468e149c16e981L, 0x286c7909ddbb7cL, 0x2a92d47db7a38aL,
+ 0xde614e68a27cb2L, 0x8dc8822e5b0ab6L, 0x38441aecf48565L,
+ 0x11ed5c9089435bL, 0x238928682d0d31L },
+ { 0xc6698d472f2f31L, 0x295242c56d76afL, 0x4099205eba563bL,
+ 0xae7de5a3ab7384L, 0xccdf127d0ed86cL, 0xb9b6d5b965c3c3L,
+ 0xe351a8f2c31ad7L, 0xa761dd8ac12f13L }
+ },
+ {
+ { 0xda115ddf171ab7L, 0x2de17b1401f93dL, 0x95019ca40964b4L,
+ 0x169d1f465ba3c3L, 0x534a0070090d08L, 0x805c5e282bf410L,
+ 0x15dfe1165f8d90L, 0x827a416ca72456L },
+ { 0x5af888433a36c4L, 0x8bfa54cd8ee604L, 0x08fd1419ce290fL,
+ 0x2db5e8c287b3a6L, 0xe5be98103cdad2L, 0x155b874bf810b9L,
+ 0x2ae42de670f473L, 0x22185847f74657L }
+ },
+ {
+ { 0x54b2a5023ffa43L, 0xcf87b16a24d919L, 0x1ff540263524e8L,
+ 0x73c94e056d1e54L, 0x76515523899fb5L, 0x13a721418723bfL,
+ 0x39afbdd3561517L, 0x49b790a9f2862eL },
+ { 0xc8c1f4f527d2ceL, 0x1997aec7609bb7L, 0x583ad8002a3400L,
+ 0xac2374e4f79706L, 0xbf1f9a821b7183L, 0x06158ab6600fe0L,
+ 0xfcc9b2ebd56751L, 0xe1de5acddaaec7L }
+ },
+ {
+ { 0x230baa1788fdabL, 0xf30860a7d04597L, 0xa2c7ece99f4caaL,
+ 0xbd39f106ad065eL, 0xfd92f5d3bef7bdL, 0x6069fad96d2203L,
+ 0xbff38cac4d9e0dL, 0x419a0171fda313L },
+ { 0x5d77fd8572f035L, 0x5af99f2b282b40L, 0x7257d3b23facffL,
+ 0xf2ee22358c90afL, 0xcc2687d9b6a52aL, 0x140892c302430eL,
+ 0xa934d5e3ec4f38L, 0xc087d7c3bd18beL }
+ },
+ {
+ { 0x7e94138a2c5ed7L, 0xbc8ceef53610bfL, 0xe89356bd86f803L,
+ 0x9a3a3805a55330L, 0xe894aba11ad648L, 0x2e68fbaba95918L,
+ 0x643e2bafcad344L, 0x0dd025661640aaL },
+ { 0xc02e479e25cbddL, 0xd78c4d813a1b3fL, 0xa6dae8fcca9692L,
+ 0x3dd91e9e5de8a0L, 0x78ae0ce764ea36L, 0xb4ad99985dbc5eL,
+ 0x967ff23e82a169L, 0xaeb26ecbaee1fcL }
+ },
+ {
+ { 0x8c502559a6f90cL, 0x56e7abe0ea374aL, 0x675c72256413b2L,
+ 0xd3fc17e946753fL, 0x28c4e1fe235f7cL, 0xe209bcdb028eb0L,
+ 0x7d0f93a489fe88L, 0xb966a2e063706aL },
+ { 0xb6c228c4a30319L, 0x6868efeca6d674L, 0x0610a70057311aL,
+ 0x0808112bad7f89L, 0x2a2462c1dd6181L, 0x52ed9feb58e88aL,
+ 0xbbff16f33821a2L, 0xda53e9617f882aL }
+ },
+ {
+ { 0xb6ffca38c30e5dL, 0xa90f9915c905f5L, 0x72fb200d753e88L,
+ 0xe509d4c7256c6aL, 0x369e552d866500L, 0xee4b7e033cf8aeL,
+ 0x280d954efcf6ebL, 0x5b275d3d557f0eL },
+ { 0xeb17211b5cecf8L, 0xd6ad50fbdb2f8dL, 0x2478c7b35e04b7L,
+ 0x97e7143ac73bd3L, 0x09d6ede4817e24L, 0x68fea712c405e1L,
+ 0x34adbc905f67a1L, 0xd20ab7073edf99L }
+ },
+ {
+ { 0xe116a96569f191L, 0xb3f0bce4d6e29aL, 0x30b9e1af51dbabL,
+ 0x1dd36f3346d276L, 0x83151030749a27L, 0x242f148ab47f70L,
+ 0xe8a5bcf5585681L, 0x8b801845ed79baL },
+ { 0xa4042fd3894ad1L, 0x82f781d2b88bc6L, 0x2d34cacbe4c397L,
+ 0x8731aeadd99c9fL, 0x0f95498ef1d382L, 0xcaba2e1dd0bbc9L,
+ 0x78889e954064e8L, 0x8cd9c9761a8ab9L }
+ },
+},
+{
+ {
+ { 0xf31f53ffa0459eL, 0xf8742a1315cd6bL, 0xabe2f50ae64e97L,
+ 0xbd787419b9da48L, 0x4521a3351e526eL, 0xfa05935e10ba45L,
+ 0x5c947e1e8f903cL, 0x0aa47d15a754eeL },
+ { 0xb2849efd814825L, 0x9c2a5d25c9968dL, 0x24dbb2604e634cL,
+ 0x33f3a4cdb38194L, 0xe04f609c8a2b6bL, 0xcaefd8eabbbfdbL,
+ 0x683119a404498bL, 0x24ab7a98b21cbdL }
+ },
+ {
+ { 0x6f1326921fa2ddL, 0xd79e61cc10a4bcL, 0xac4b3ce4bd6d46L,
+ 0x52459b6bd3f37bL, 0xce0f0a3a396966L, 0x050d1d5a1ed488L,
+ 0x1b9c403e0b17faL, 0xee1abd004a2e66L },
+ { 0x97065c35cf3e3bL, 0x6513d5fbe33441L, 0xcd3463479047aeL,
+ 0x45cbb1cfd22df1L, 0x7a173ae967b17cL, 0x75f5ba72223cdaL,
+ 0xe3d12dbefe0a73L, 0x3b7f94dfd7adcfL }
+ },
+ {
+ { 0xd596a13f1e9b7dL, 0x04f5bdd6734e0cL, 0x18b694f8be163aL,
+ 0x15620c7d959fa3L, 0x65fc2c553d2a3bL, 0xd44a364c4d36f2L,
+ 0xc8b421f268ceabL, 0x564139abfe2bd4L },
+ { 0xb52461019d4633L, 0x5ab3f886346934L, 0x96691fe9819422L,
+ 0xdfdec898b39b82L, 0x84b1c7997cfb27L, 0xe59a98d4d6d004L,
+ 0x5e5d0c612c350fL, 0xb431220d415774L }
+ },
+ {
+ { 0x3d0ca736aae0a2L, 0x7b1991f48c2d8cL, 0x00ae8565cdae72L,
+ 0xdbb6ca0bd55128L, 0x3c2ab2a45c82bfL, 0xea5a55979545caL,
+ 0xeba9a26d5927d0L, 0xb52e40183257fcL },
+ { 0x55ed517ca9650aL, 0xbdaa081e3ebff2L, 0x8cf7ce49f8831bL,
+ 0x1d0b5bd6e3b8d3L, 0xa314a9fd8fc869L, 0x07f2079b892babL,
+ 0xb700dbfa0cc9d9L, 0x7105a086dc0a39L }
+ },
+ {
+ { 0x0c7e05d8c7d901L, 0xa7ff681af3182bL, 0xb88e3caf9a0d06L,
+ 0xfe20a12c343b7fL, 0x9f0257703251f9L, 0xf225dedc40c5ebL,
+ 0x50e0cecb208ea7L, 0x5b250f0e6eeb65L },
+ { 0x807a1534806b6eL, 0xded120afa94139L, 0x237ddc749366fbL,
+ 0xdd3674e5a34bcbL, 0xef6cdff9c4a61dL, 0x036194bb2fb896L,
+ 0x38659539528cd9L, 0x0723c596936a52L }
+ },
+ {
+ { 0x1f84cd5e17719dL, 0x545939bc73b394L, 0xefbf3c583e84e7L,
+ 0x6cc46f1f77fd66L, 0xa629f591383ab8L, 0x9177ffacd35cd2L,
+ 0x039187f9dd411bL, 0xa9cf1cf7b7eea8L },
+ { 0xa3b105aac47e5dL, 0xa755bead0a9da4L, 0x50cfbae73da15eL,
+ 0x9456cbc60b628cL, 0x7ffc3629b7a910L, 0x30b5924cd6d6a4L,
+ 0x198629f0b04ab6L, 0xc74609c624dea9L }
+ },
+ {
+ { 0x27d4d77af12fa6L, 0xdd8a216690aeb2L, 0xe48fc02fe24417L,
+ 0x1970403720e17eL, 0x95013fdce37b42L, 0x06817d2de4bd9bL,
+ 0xc5863e763d0ba2L, 0xa1bafc0a556f5dL },
+ { 0xf28ec7b410a78aL, 0x0dcac420a01a63L, 0xfcd3fa4b5bce11L,
+ 0x054d7e5d278b89L, 0x5195db85ce49e3L, 0x4c0b1672c73d96L,
+ 0xd94307720a1bdbL, 0x66fa8b359c77a7L }
+ },
+ {
+ { 0xb9e93aed7462feL, 0xbfe54b218dde4fL, 0xaabb5283dbb08eL,
+ 0x8c367020e5fc45L, 0x35028888e69be3L, 0x6d2efc1c12a11dL,
+ 0xfce5cebf265e30L, 0x58c8bb35742c7eL },
+ { 0x32e89dcccf7fa0L, 0xa811f33dd020a4L, 0xa10d6205129fe5L,
+ 0x3841c88e4ed29bL, 0xf3303a9d8b1ea6L, 0xa9a0cad1781f58L,
+ 0x4502b388f3ef0bL, 0x2b7587e74c6d35L }
+ },
+},
+{
+ {
+ { 0xc6eaea123ae7cdL, 0xa1884d473c0caaL, 0x901e76fef1ea88L,
+ 0xdb9935ca14269dL, 0xe8b2486947f1deL, 0x4ad56f4a657588L,
+ 0xe7680542913fb1L, 0x2abff5d37600daL },
+ { 0xa814813a81a797L, 0x63e76a446acb69L, 0xb1038394ab8277L,
+ 0x587de349d8e759L, 0xdfaeb8dddf62dfL, 0x24fe1cf9239d49L,
+ 0x7de7409e130d1cL, 0x3ecfef9581d070L }
+ },
+ {
+ { 0x8d177a0f87c72dL, 0xae7e5818c6d1deL, 0x0077b5f8cece85L,
+ 0x382483832d2187L, 0x49d8b156db2bd2L, 0xe9e5513c8d85b9L,
+ 0x63c410ce05c53fL, 0xceaf2fbd86f752L },
+ { 0x0b432fe93806c5L, 0x18eb15d3d06c75L, 0xcaad82612cfc02L,
+ 0x581e0401e2d045L, 0xd573cb595edcfdL, 0xce71948dbc66e3L,
+ 0xcf68721acc14eaL, 0xf68bea26cac4dcL }
+ },
+ {
+ { 0xd8576afcb74da2L, 0x8771c29c433f46L, 0x7315af6e2f5b8eL,
+ 0xc195481ba33928L, 0xb77dcc22fb1f94L, 0xcb3e57ca610f75L,
+ 0xeb2a92753907dfL, 0x916f14923eff95L },
+ { 0xbb378e4b6cd291L, 0xa2a5e2b2f13ce1L, 0xa8a0e60bcd00b0L,
+ 0x5902741682b75aL, 0xa0882c93f65a77L, 0x2069f75c93cfffL,
+ 0x1ede40570c0cb9L, 0x13840c90d526c4L }
+ },
+ {
+ { 0xdc2caaa03ced48L, 0x2079219a0315beL, 0xca493563b1f642L,
+ 0x0202dc7b0665f2L, 0xe5d6bbdb7a5238L, 0x36fbd5e26eab32L,
+ 0xb3988f1f5819b4L, 0x5b15dc84aa4d69L },
+ { 0xa52feed54e5c24L, 0x927471be91a797L, 0xd119bfdd57f677L,
+ 0xde38f7b78e4c4fL, 0xa7af516b150bc3L, 0x403b21e26b76c2L,
+ 0x589067d92300dcL, 0x04e406a066802aL }
+ },
+ {
+ { 0x28e7d09a9ca9bbL, 0xaa84fd5fccf4a0L, 0xdbe9fb8635b7edL,
+ 0x9ede3f5d56fc7cL, 0xa4b5031b01cb29L, 0x584299d7f93703L,
+ 0xbd28868b6fe825L, 0x1d385d48b9c2d9L },
+ { 0x6606f4a822be80L, 0xb5a0165626d0fdL, 0x9920a2014568adL,
+ 0x7d430f41c6d174L, 0xc243e16e02e9e9L, 0x367f1d2a6bd649L,
+ 0x693910071b8c36L, 0x2ede1314de2984L }
+ },
+ {
+ { 0xdc781875beec32L, 0x1fff0cca525ff4L, 0x6e86425676df34L,
+ 0x2b4e8a63f638e1L, 0xc4991d29b1e59fL, 0x399d0011589717L,
+ 0x406464ebe041cdL, 0x901cb3d9e65bb0L },
+ { 0xf5f4572fb42307L, 0xf81b3b0f1b7307L, 0x8fb695cf2094d1L,
+ 0x7db4792db56f7bL, 0x36836d55a794e0L, 0x2da477b09bc879L,
+ 0x1cdfadb1887c40L, 0x65dc6c2f2699b6L }
+ },
+ {
+ { 0x36f9f214737972L, 0x48f0c8b7a387b0L, 0xa156ed339a1d24L,
+ 0x375293a0fed268L, 0xf679f487ff75cbL, 0xd15a00f1cc9e62L,
+ 0x92a7dc722c3877L, 0xe9870636fb0ed4L },
+ { 0xfd8e59c16f5f3cL, 0x375732eaeeb48eL, 0x2dd9213ca1ab42L,
+ 0xcb062099ffcceaL, 0xfc611f6b23edfdL, 0x271634999b060eL,
+ 0xb938b5d820de8aL, 0x138f6e7eb49a32L }
+ },
+ {
+ { 0x7feda63e485f70L, 0x646380aeb27b2cL, 0xcf8fe32c4511c7L,
+ 0x2c68e1eff9406aL, 0xa9f2fd920b6020L, 0x1c98fc63b3e465L,
+ 0xb8dac3593e53aaL, 0x2fb47b6a750e96L },
+ { 0xea373ef1950bb3L, 0x81566944ac7aecL, 0x8d6b3c2b55b931L,
+ 0x5d13f2db62ef7dL, 0x4647f2aab9182bL, 0x8f56c5a33bf07cL,
+ 0xc5ab284b35a221L, 0x0747ab75a46a6bL }
+ },
+},
+{
+ {
+ { 0x5b9236c86b85c5L, 0x5967a0dc482448L, 0x397c9557df6ae0L,
+ 0xf83ee1c5378f2bL, 0xf82df656e05dd1L, 0x4c424f619d7c8bL,
+ 0xa612550a6d5f2aL, 0xfe8482a63c3ebfL },
+ { 0xcb8d4030142c82L, 0x08b06623679e6cL, 0x3ea51463eca5eeL,
+ 0x089eb3b1370500L, 0xcbfb19c5a0d306L, 0x2f6858842a65bbL,
+ 0xe3e1db5e51e119L, 0x2c150e7110895eL }
+ },
+ {
+ { 0xf323488f6d4c4cL, 0x5fc931f63b87e2L, 0x8867da035c759fL,
+ 0xb6f1eff9746d4cL, 0x8a8172d990be0aL, 0x1113eee5c407b4L,
+ 0xd80dacf378ed8aL, 0x99b57cf3fa7fd1L },
+ { 0xf5bb6d95176405L, 0x6b8963a92e83b5L, 0xac55b6b8a7ef8dL,
+ 0xe73fa126c1fbf0L, 0xdb3756060148dfL, 0x72f1a98f3f1fbaL,
+ 0x1f71d0aea550f2L, 0xc3ea4f09544a87L }
+ },
+ {
+ { 0x5b09da24322bf3L, 0x2a573d561264e1L, 0x93cb2e1803acc4L,
+ 0x397b4fbe502fc6L, 0xddfb21239e0ebcL, 0xeccd8f5bbcbc57L,
+ 0x49d3bed4663788L, 0x37192aa1218df9L },
+ { 0x8a05bc92ffa3c6L, 0xc38c28123ebf4dL, 0xc80d547fe343a8L,
+ 0xa8d5a5b6c63516L, 0xc5d8ce18d8fa6bL, 0xeb5e87224a87c0L,
+ 0x9806e9e75bfa23L, 0x11f0889689469aL }
+ },
+ {
+ { 0x81005f68e75666L, 0xb84d861d349505L, 0xe0832829f321eaL,
+ 0xb751d7acfa33a1L, 0x793cf6f067c550L, 0x073a6b21027e56L,
+ 0x53f40ee66a6012L, 0x70bfaa8c210fa9L },
+ { 0x1518e39e4b5998L, 0x8f0b53024b8d9cL, 0xd91c281afdf923L,
+ 0xc5cfb2824e3f69L, 0x63a529a870871fL, 0x3d3e8872128dadL,
+ 0xed658dccb30cceL, 0xf9373b9afb7baeL }
+ },
+ {
+ { 0x22d4dbede58ed2L, 0x4fefc1d03f8789L, 0x6b0a1fe344817fL,
+ 0x96bef40a56b0b2L, 0x32684eeda249faL, 0x8298864524a91bL,
+ 0xa958baf0c736a1L, 0xd033a7def2f3e5L },
+ { 0x5be3edc43f4d6aL, 0x326a39d9c89abbL, 0x90c44f755d997aL,
+ 0x20581066e966c2L, 0xdbae4906548038L, 0xac7bc97d473fc1L,
+ 0xb34488b4b2603aL, 0x27aea275e9bb98L }
+ },
+ {
+ { 0xa59e7281b88773L, 0xe2f05d40c241f6L, 0xa56229e4e75749L,
+ 0x8f00c0b1b10705L, 0x855994619394d3L, 0x0d7e352aaf5e32L,
+ 0x526c462787b8eaL, 0x89297d9a179d48L },
+ { 0xeff17e6ef43892L, 0x17091eb221f841L, 0x82f5eb34a4b848L,
+ 0x6bea4778eb7b76L, 0x21f227176c536cL, 0xd9ef2c896c81bbL,
+ 0x7c2754654bf4d3L, 0x9dd4662d7c28c8L }
+ },
+ {
+ { 0xe7fff0020e1a6bL, 0x26a35c6a08d467L, 0xb3c773d3248c91L,
+ 0xa646615ba7d935L, 0xa91f453b0d26faL, 0xdcf9c3460c6d32L,
+ 0x63668619e3e3dcL, 0x3012813f30f3e2L },
+ { 0xac6623dc2fc61aL, 0x108dc252bfd2ffL, 0xd7f5c0d231d6eaL,
+ 0xa904f9aad1107eL, 0x46941c20d1e9c8L, 0xe5b6451c810cf2L,
+ 0xaba8e674f511d1L, 0x5b4b94f08373feL }
+ },
+ {
+ { 0x002d4e2849c230L, 0x9bed0efd8ba391L, 0x745e0c0828e319L,
+ 0xcd40907ca58de2L, 0x2c87ab11abaa4aL, 0x3c17a97db64391L,
+ 0x36b184e86c72d2L, 0xb03d202485f7aaL },
+ { 0x2b6b79bde24abaL, 0xdcb78542325fb2L, 0xf5d1db966ebae2L,
+ 0x35a4d5b903840aL, 0x7afeb09190e9daL, 0x1818f6a35c1792L,
+ 0x90091fa3faa269L, 0xc4ccff62570235L }
+ },
+},
+{
+ {
+ { 0xa177619ec85940L, 0xfca24db7ef7eeeL, 0xb2450f37a90c11L,
+ 0x29d256ddbf4f85L, 0x920c8d051316c3L, 0x2f7f7ba04474daL,
+ 0x308117f2ec9a0bL, 0xd0a231ad0d2085L },
+ { 0xf3288fc7ab641dL, 0xc68bade9f4fa32L, 0x768f014bbf8253L,
+ 0x5eff260c0a33f0L, 0xc71b4536bb93ceL, 0xa71d045680697fL,
+ 0xb62444cce72bc3L, 0x11f03e8d1379f3L }
+ },
+ {
+ { 0x1f54789c16df92L, 0x874c642e3ed142L, 0x6699f60fa2a9f1L,
+ 0xbd1b8d33fecfc1L, 0x59682d58a3d953L, 0xf17c0214a36b81L,
+ 0xeb9621d181a666L, 0x7c2c3ab3cf1ad8L },
+ { 0xe6888c3e529f7cL, 0x197b66ab355315L, 0x63b558a83e31acL,
+ 0x4aa7bc5891c68eL, 0xc17d989592e360L, 0xc750a291363666L,
+ 0x0d534704909ac0L, 0xd6d02724594a10L }
+ },
+ {
+ { 0x35c541b3fbb635L, 0x50016d05982afaL, 0x58ebce496b0ca0L,
+ 0xb940027577ea56L, 0xf29d305e38480fL, 0x43705b0ebd6a2cL,
+ 0x0e4acdae90c639L, 0xbe94a29f56e05eL },
+ { 0xc61f4a030659adL, 0x39074adc402211L, 0xfe0d8d551b621dL,
+ 0x2d02e8dd1d5222L, 0x05ece3c46c2683L, 0xf70705ac689d41L,
+ 0xe3caf444d837bfL, 0xfda058475ba6d0L }
+ },
+ {
+ { 0x1098163cb7d458L, 0x12b645ff5ba834L, 0x70a318128af72cL,
+ 0x5f4727ef32e5ddL, 0x7cbae1510a21b4L, 0xa80bf806785389L,
+ 0x9827402b8f93b7L, 0xe385f8208349daL },
+ { 0x2d054619589f6eL, 0x6aa5b26e7c0191L, 0xe79ae12bd5574dL,
+ 0x5d13f914148e61L, 0x7b2be0f13716ffL, 0x82b0fe680bb81fL,
+ 0x697633c3e2569cL, 0x6c1f083873f8b3L }
+ },
+ {
+ { 0x6e26d850be1674L, 0xe4e47f6ab8044fL, 0xfdf46e882fc434L,
+ 0x639ae2cc89cadcL, 0x2244a524b85bdcL, 0xb1e4790b7cf4eaL,
+ 0x51dce037e0bb8fL, 0xdd143352716ceeL },
+ { 0x1c049b48e8841dL, 0x6bf26dcb97c621L, 0x21d6255ba01178L,
+ 0x477258a8e4f0e4L, 0xf5e437e68f8ef1L, 0xd118fbc8b03e1eL,
+ 0x3d6bc51e1c91b3L, 0xa259486d5b6907L }
+ },
+ {
+ { 0x4159cfc7b6f5dcL, 0x05a52b3493694aL, 0xeeb511c83b8883L,
+ 0x19d79e42b06400L, 0x8e503a2738f37eL, 0xa30e5795a94ad9L,
+ 0x3981c75262618dL, 0x06b6c692dcba19L },
+ { 0xd7242ee4d1b051L, 0x6274ccb3b350c4L, 0x66df0bbf540019L,
+ 0x4d66be65ae12d5L, 0xcea29601049cbaL, 0x40473398df84b3L,
+ 0x7d6c96b75a31c8L, 0xbb80159874174cL }
+ },
+ {
+ { 0xf0f7be059f1aa4L, 0x798f39adcff451L, 0x96763ff8014e1eL,
+ 0x03987a809cc5ecL, 0x4919656893650aL, 0x92e8eef75e24dfL,
+ 0x54e97cde89d639L, 0x8081d067682cc0L },
+ { 0xb9ef41aa8ceb71L, 0xb8173a4a4d7aaaL, 0x93d81b1c54ee10L,
+ 0xabe180570a445aL, 0xac0ff9764d569dL, 0x86946b23e570beL,
+ 0x8e11dd24180641L, 0x3d0b33c99f67dcL }
+ },
+ {
+ { 0x2c9637e48bf5a4L, 0x9fdec19ccaf112L, 0xe5cde9d5c42023L,
+ 0x9869620878f0ccL, 0xcf970a21fe6ebaL, 0x1df5ec854e678bL,
+ 0x4667f0128d00ddL, 0xfa7260db0b3fa8L },
+ { 0x6bd2895b34239bL, 0x04c8bc52d2a50dL, 0x14e55ef6cb23e2L,
+ 0x6440c273a278d5L, 0xf4b12e32193046L, 0x46adf645dd4c08L,
+ 0x70e29984656e8cL, 0xe7b36eae4acd44L }
+ },
+},
+{
+ {
+ { 0xea64a5716cf664L, 0x8497ee426fd357L, 0x44d94b4814e851L,
+ 0xf4aac225a6a2cfL, 0x947b30980c301fL, 0xf390ba17865383L,
+ 0x16c4fc6d1773d3L, 0x61b98146227220L },
+ { 0x07dd03a1dd0270L, 0x290ca820f160dfL, 0x8f2205444ba955L,
+ 0x4e85e450b6f1b3L, 0xfd73ce9ad78089L, 0x67c12702f2cb0eL,
+ 0xa7de0d7ee33a61L, 0x6a811cc6553261L }
+ },
+ {
+ { 0x5ef05742d0a427L, 0xe8d2e95220a341L, 0xdd28cbf8044886L,
+ 0xdad7b4ba1aa58bL, 0xb28f3738ec901bL, 0x1841a935bbe3dbL,
+ 0x8fd7cd1a075feeL, 0x93b603fc0d3cddL },
+ { 0xca54fd55edd859L, 0xa4cb05f64ed687L, 0x3138668ed1a3d7L,
+ 0x1224fdaee32be5L, 0xf1f532bc80aeb3L, 0xa4f65d0e8d4d69L,
+ 0xc697a015905fe5L, 0x514da7a6690ce4L }
+ },
+ {
+ { 0xc7b9af83de4a55L, 0xc79bad7b318d93L, 0x1808071f5b1c83L,
+ 0x92112efb965b16L, 0x655ab387bb740aL, 0x53dbc8b384ff87L,
+ 0xd153c2872dc6f2L, 0x2ec20e199c7819L },
+ { 0x65e46ea3b854b5L, 0x272d5aec711db5L, 0xfd1bb5326e19e8L,
+ 0x33280b83dc0665L, 0x95b986eb8f1c4aL, 0xa671fc4a685c4aL,
+ 0xa03cbd583bdbbfL, 0xd329402ab77544L }
+ },
+ {
+ { 0x40fa6518e62b35L, 0x3913b11f9e55a6L, 0x4e8089b5270a41L,
+ 0x565f52a80d1886L, 0x93b5f05512749bL, 0x35c869c141c547L,
+ 0x9a44a1af86717fL, 0x2b9984b9c2b2cbL },
+ { 0x61fb6074952322L, 0x2d4072f7af1464L, 0x9b2fa8c600eb30L,
+ 0x6071fb7f10668eL, 0x27cc24d90634caL, 0x3875bc2471d32bL,
+ 0x678590ba11210cL, 0x352b447fcc5a9aL }
+ },
+ {
+ { 0x795d5415fa3200L, 0xadaa557a92949fL, 0x42fff063cc88c4L,
+ 0x26d683171b68a5L, 0x3286549e67ad8cL, 0x5bf636386396b2L,
+ 0x41229b6e12c8eaL, 0x05320c9748952eL },
+ { 0xae36b63900b460L, 0x9354ff2f2b6affL, 0x10b810b065ee0cL,
+ 0x4d6925fcc8bb38L, 0x31c03fd7a22f14L, 0x76b7f4457544e8L,
+ 0x3a9123cc0eed26L, 0x77acd67e0cd1ccL }
+ },
+ {
+ { 0x2e9053007ec527L, 0x32388ef62937cfL, 0xa445389e229188L,
+ 0xa44b68e33bcebeL, 0x5a8722e4c4e701L, 0xfd066e8cf07e41L,
+ 0xa3c1a4f95fab62L, 0xb4d6a1be542f24L },
+ { 0xe6a92e4af6c9b5L, 0x9452484c83d61dL, 0x422b55b0062276L,
+ 0x261973a5279688L, 0xde8be263999fb2L, 0x64e96287b029caL,
+ 0xd8edfaa06897d4L, 0x408319c6955511L }
+ },
+ {
+ { 0xff6baed50a5632L, 0x922b7d05c5885aL, 0xdf0f3b31b45864L,
+ 0x27e49c0c04340eL, 0x618c566122c447L, 0x7863a38eafee7eL,
+ 0x7143affb828cb0L, 0x51fcf4cf9d054eL },
+ { 0xc4a4b3127f5e09L, 0x021f47a90be2bdL, 0x1a060197ab956dL,
+ 0xe77fa1586ea86bL, 0x9ccde87d550ef3L, 0x7dee53a6532654L,
+ 0x8b4f060e826387L, 0xda38637ad077b5L }
+ },
+ {
+ { 0xbc901b30e9fac8L, 0xfa082046fb2a2aL, 0x92f68ab5e04efcL,
+ 0x184a30a9ac12d0L, 0x1aa11aab25d479L, 0x8bc5f4c0f03161L,
+ 0x7e3a083cfc8817L, 0x84d9355597f93fL },
+ { 0xc014478239abc6L, 0xb226b098d37b04L, 0xb056942f575789L,
+ 0x816b95aba745ebL, 0x2a49d39b98ddb6L, 0xc41ca26291af81L,
+ 0xb3afe99ab26347L, 0x59c31bc604b638L }
+ },
+},
+{
+ {
+ { 0xa16a8b9c42befdL, 0x731c9c92052f00L, 0x1ad49b41f5dfa0L,
+ 0x7a289e3bffce36L, 0x868fac00c79cf1L, 0x6d6d28486721abL,
+ 0x590f928e726c94L, 0x0e802cb51f3841L },
+ { 0x6a6a57a0b694bcL, 0xb9bb0cd8120fb8L, 0xad96ac79c05826L,
+ 0x294da8c7768df0L, 0xfe32311b56c6c6L, 0x291c2c6ae8d050L,
+ 0x1c765e7e7db4c9L, 0xe058298d65f9f7L }
+ },
+ {
+ { 0x4bfa85b7e8d345L, 0xa04ef95de1dfc8L, 0xb5f7f21324ace3L,
+ 0x4b350a1574b14aL, 0x11436bff8e5c8dL, 0x1c789f97642369L,
+ 0xeb5e335fb623ceL, 0x9deacd2442d562L },
+ { 0x4ff989f531ee71L, 0x43e2c49aacb52aL, 0xa76319885bfadcL,
+ 0x08b6d5cd0161a0L, 0x010e3fa541f197L, 0x83a589e3279a16L,
+ 0xf0991376309f9bL, 0x07c093bf1cea10L }
+ },
+ {
+ { 0x1ce3f0f33d2192L, 0x07b559ac37ce73L, 0xaa2ad38207be27L,
+ 0x84f053b7ed93deL, 0xbc5c7973b98a4bL, 0xc92346163aa9b9L,
+ 0x807cc16231a10cL, 0x8ffdf57a061209L },
+ { 0xa9ca741497070fL, 0xf608ec9d113b3aL, 0x51327268d0384dL,
+ 0x96686acf5ec307L, 0x437bbbd71c4665L, 0xdef09d57c379caL,
+ 0xf8be033621747cL, 0x2775b378ae8047L }
+ },
+ {
+ { 0x4009798b2c4fc2L, 0x148d7d1203772eL, 0x9d9392df8423fbL,
+ 0xa5bd72eaf8cef4L, 0x579d58d4380b53L, 0x2ff88f18c39d24L,
+ 0x9ca2fbc5706466L, 0xb42987d1e56af2L },
+ { 0xcc2556e5d94ea8L, 0x4e5c2b35369d76L, 0x5de35742a94f9cL,
+ 0x8d068c95cb4145L, 0x4d553ff51bfcbfL, 0x3ab71648a23fceL,
+ 0xc9cb3a9d0fa7f3L, 0xf81209bed9ced1L }
+ },
+ {
+ { 0xde7356ee5b66f5L, 0x7b2bf1ae8a25e0L, 0x09a444a2c9b725L,
+ 0xfd8a2f44906c55L, 0x409cc8082514f3L, 0x47e009928999a9L,
+ 0x0a582a66a312f4L, 0xf7946f8f6723deL },
+ { 0xa55f6ba92d8affL, 0xb62c3c8a544b1cL, 0xa1d14115c16a94L,
+ 0xc3783192ad5e71L, 0x13d784706b1dd6L, 0x99005f8ee7ff55L,
+ 0xfb5ea3f8a1e7d8L, 0xdc7f53cb4cac39L }
+ },
+ {
+ { 0x482abaf36e3794L, 0xc23e9e5c74684fL, 0x4544cf6f1629beL,
+ 0xd8a8ee52f40374L, 0x2eea87ff433bdbL, 0x489a99cae9990eL,
+ 0xefc131e54b23b6L, 0x25fe6998600270L },
+ { 0x03d2d9ec059a7eL, 0xa6445b56979c3cL, 0x491a10c9bfbceaL,
+ 0x15b5974e937af1L, 0x4be8002797c7fcL, 0xbed8a49fedcfeeL,
+ 0x35751cea9e0691L, 0xe9a9fa39ef5982L }
+ },
+ {
+ { 0xeffeaca3065de7L, 0x841d544ac4d4e2L, 0x8144679caf199fL,
+ 0x98cf4f9443967aL, 0x8cd57f4f33183cL, 0x390832ac1b15ebL,
+ 0xc4b1feaa53b500L, 0xd762a10dff24b5L },
+ { 0xccd3eedb0ee2a9L, 0xa6dd4a9362d485L, 0xeb4ff26f1d047aL,
+ 0xc0771fd23860fcL, 0xdbb4e394b64114L, 0x2ff3f244d29b29L,
+ 0x9cac005387b365L, 0x05b7aa6de5994aL }
+ },
+ {
+ { 0x5e71752c03dd63L, 0xad10fe9bc74687L, 0x51a5b0c54c76abL,
+ 0x763fd501f586d4L, 0xc7bd5ce816048bL, 0x8fc83d23f744dcL,
+ 0x0561802109df9aL, 0x18fb01fccf0e43L },
+ { 0xe4606fc038ab23L, 0x5878f1fa664c98L, 0x3aedbbd5da7356L,
+ 0x3c578f5516746aL, 0x259477f1a17210L, 0xc7a869d028248fL,
+ 0x6517a6148cbf95L, 0xbc5f91d3d04d47L }
+ },
+},
+{
+ {
+ { 0x15fd9a9083ca53L, 0x1161da02697ca6L, 0xf516af356b676cL,
+ 0x8a420d575eec13L, 0x72d67421a9526bL, 0x8d8c29e76b463fL,
+ 0x38a4f588815627L, 0xf7e528be0650f9L },
+ { 0x2cfa78e382edcaL, 0x638d183c4ad83cL, 0x96d3b9de4a0119L,
+ 0x5769ccba7c1101L, 0xc3b3b792b8d04aL, 0x96212f64951bdeL,
+ 0xad7905a481161eL, 0x8fd676241c5edfL }
+ },
+ {
+ { 0xf7b063539d6cdeL, 0x69d0549115a84aL, 0x4a976c6cbd9fe4L,
+ 0xc92953f950ff96L, 0x1d7f0fe654d127L, 0x7293870da0f75dL,
+ 0x7bb3652cf2277fL, 0x64798c9834484fL },
+ { 0xb94d8bfac3a76cL, 0xf5721a97ff776bL, 0x23a6e9f2722e31L,
+ 0xe9da9969a5c034L, 0xb9bbf83456ebc3L, 0x239f58a96956a4L,
+ 0x8b75beb18b7f00L, 0x6c2b5b8a51cb97L }
+ },
+ {
+ { 0x78b1c627eb41f3L, 0x0638fcf17c4352L, 0x939edd80c5709cL,
+ 0x0a8dfc3edc906cL, 0x3942f47efb01edL, 0x4c8275749986feL,
+ 0x792545c4dffa57L, 0xeee68836c3ff26L },
+ { 0x824d08e12b1218L, 0x515a478902457fL, 0xc70cc9cbae55b3L,
+ 0x1240737bcef9d4L, 0xf22e6162f9db7fL, 0x98c4f0291f8da2L,
+ 0xa89219cafaaa67L, 0xf35fd87e7d27e2L }
+ },
+ {
+ { 0x19b0cd701b80d0L, 0x3d7e29df9aebd1L, 0xd39c9ca0477cbcL,
+ 0xac0f6155ff0d3dL, 0x8a51993520fd01L, 0x508ff54b22d6fbL,
+ 0x8786c47318d3abL, 0x4312c464a683f8L },
+ { 0x73b1d3995359f6L, 0x0d94fa5963011eL, 0x5723af29bfe83eL,
+ 0xafa90016841df3L, 0x791e92ab7c498aL, 0xbc931ad7ea4253L,
+ 0x438e016b783c06L, 0x1347db22ca662bL }
+ },
+ {
+ { 0x41df37dfbaa861L, 0x98ecb23329e4deL, 0xdaf1560507e018L,
+ 0xa902269b088e32L, 0xad898a5e4cab2fL, 0xd84e9ed02c1e1bL,
+ 0xc20a5d58488af3L, 0xc7165af6cc77c6L },
+ { 0x8526f3adeb7461L, 0x03577b14a2d332L, 0x28e469de4760b5L,
+ 0x442c7f9b276266L, 0x90d5c77f9c90faL, 0x7aa87163e211bdL,
+ 0x56d8ff05decfd6L, 0xa204b56ee23e6eL }
+ },
+ {
+ { 0x2e4374e4aceafcL, 0x978743b6fcd5e5L, 0xa0f6345c4855caL,
+ 0x9bc7e4fe98074bL, 0x3835d57c33d08aL, 0xeec7c8b6f00566L,
+ 0x71628a21acf55cL, 0x5da375097fb19eL },
+ { 0x6904a8e01a7125L, 0xad33c85e6e3780L, 0x1702928c19f94aL,
+ 0xb424ff27c04b3dL, 0xb212e3919e2ba3L, 0x4cca8e8c9af4c9L,
+ 0x98ab7aefd9bf0eL, 0x21d245d9799db5L }
+ },
+ {
+ { 0x6b034dcec08806L, 0xfd763f2b40f2d9L, 0x5e16de029cb906L,
+ 0x02b70148a0e16aL, 0x463c8eee071e12L, 0x644728125ad509L,
+ 0x9ee6f2ddc0e07aL, 0x188895c68d4d97L },
+ { 0x092fff3b27f971L, 0xb3c159fc9b7722L, 0xe27d8ff3cae42dL,
+ 0xf8a5ed6e87071dL, 0x318388f607ebd2L, 0x924967b53486f1L,
+ 0x77304947c46e1fL, 0xf279c60f21d196L }
+ },
+ {
+ { 0xef2bc0384f3201L, 0xf8750c71f94c51L, 0xbaa4f5a986ec65L,
+ 0x6f8a5de2732a33L, 0x0f13d80299e365L, 0x2709530e85261fL,
+ 0x097d922f527d56L, 0x4969687be1f3f8L },
+ { 0x9f3f5043e1708dL, 0xac67b874aa4be4L, 0x75fb042320a87eL,
+ 0xa361ad36e2cad6L, 0xcb01470203e9f6L, 0xe3807b7c9b76c6L,
+ 0xf086833b907c09L, 0xe9bed3c7e85a01L }
+ },
+},
+{
+ {
+ { 0xa7ea98991780c7L, 0x04e4eccd2476b6L, 0x0af9f58c494b68L,
+ 0xe0f269fdee64fdL, 0x85a61f6021bd26L, 0xc265c35b5d284bL,
+ 0x58755ea3775afdL, 0x617f1742ecf2c6L },
+ { 0x50109e25ec556aL, 0x235366bfd57e39L, 0x7b3c97644b6b2eL,
+ 0xf7f9e82b2b7b9cL, 0xb6196ab0ec6409L, 0x88f1d160a20d9eL,
+ 0xe3be3b4586f761L, 0x9983c26e26395dL }
+ },
+ {
+ { 0x1d7605c6909ee2L, 0xfc4d970995ec8aL, 0x2d82e9dcf2b361L,
+ 0x07f0ef61225f55L, 0xa240c13aee9c55L, 0xd449d1e5627b54L,
+ 0x07164a73a44575L, 0x61a15fdbd4bd71L },
+ { 0x30696b9d3a9fe4L, 0x68308c77e7e326L, 0x3ac222bce0b8c8L,
+ 0x83ee319304db8eL, 0xeca503b5e5db0bL, 0x78a8dceb1c6539L,
+ 0x4a8b05e2d256bcL, 0xa1c3cb8bd9fd57L }
+ },
+ {
+ { 0x5685531d95aa96L, 0xc6f11746bd51ffL, 0xb38308ac9c2343L,
+ 0x52ee64a2921841L, 0x60809c478f3b01L, 0xe297a99ae403acL,
+ 0x7edc18fcb09a5bL, 0x4808bcb81ac92aL },
+ { 0x3ec1bb234dc89aL, 0x1e8b42e4e39da5L, 0xde67d5ee526486L,
+ 0x237654876f0684L, 0x0a583bd285a3ddL, 0x3d8b87dfe9b009L,
+ 0x45bd7360413979L, 0xb5d5f9038a727fL }
+ },
+ {
+ { 0x7b8820f4bde3eeL, 0xea712ef24d5170L, 0x517f88cdf6ec7bL,
+ 0xb15cecf983ea9aL, 0x9eeee4431a4592L, 0x786c784ebb013eL,
+ 0x2f06cb31f4e15dL, 0x5603fd84f4fda1L },
+ { 0xf6790e99e1321fL, 0x274c66a74a4c09L, 0xa4b70b49a41a4eL,
+ 0x7700bddada5157L, 0xe54a60d51be8dcL, 0xfaf92761a477e0L,
+ 0x6661c72b027eacL, 0x50e2340280b917L }
+ },
+ {
+ { 0x635f40f96ec123L, 0x4a331337a766a4L, 0x9ce4416b935587L,
+ 0xbb6e1f595d97e4L, 0x26147239d4197dL, 0xabd4478490e896L,
+ 0xf6a1b2a8bba895L, 0x401fa405e27a45L },
+ { 0x7354ba50620900L, 0xc443a29385678bL, 0x48aba1053cf5faL,
+ 0xd67e723bbe152dL, 0x4b858e02a63d68L, 0x174e1ee72be4eeL,
+ 0xad0fbb39ab8d46L, 0xa0fdffbce17dd7L }
+ },
+ {
+ { 0xa1ea3259c46fd8L, 0xeca122e9fb96efL, 0xf9074a26767acdL,
+ 0x9b004a22787082L, 0x389f8077f3ba8eL, 0x6463de90d5aabeL,
+ 0xf30ceaab090585L, 0x71b31e85634ab8L },
+ { 0x0dee65caf02aedL, 0x506886e20ac252L, 0x0665f7886b8a59L,
+ 0xb9b784df2bb328L, 0x46e443adc6b089L, 0x3d5de1966c27fdL,
+ 0x0419265f0fde70L, 0xed946122b5c034L }
+ },
+ {
+ { 0x5a52ad213b0056L, 0x9fbeb92b909ee3L, 0xb42ba18bdaab08L,
+ 0xec127c4ffc8a77L, 0xc6d2985fda906aL, 0x5355547994bbe7L,
+ 0xa7470c09cdfd62L, 0x31a3971d2e675aL },
+ { 0x8d8311ccc8b356L, 0xabb0bf801b4372L, 0x33c1cad0294566L,
+ 0xe2e649ce07b672L, 0x9084d882ae3284L, 0x7a90d4c1835ce2L,
+ 0xb4d1cd5809d44cL, 0x78227149f0528fL }
+ },
+ {
+ { 0xca884cfbf5844bL, 0x9dd05c48524cf9L, 0xdbffa1936ba889L,
+ 0xef94fdd29e7666L, 0x358f81b3eaf48fL, 0x96734d51530d56L,
+ 0x378b2d14adf9e5L, 0x2f850464731f61L },
+ { 0xd6ae90599dcb83L, 0xa4f89e06199239L, 0x64052498f0f958L,
+ 0x2866d99cc27707L, 0x64681a2f551c0fL, 0x2c7b0d04c37080L,
+ 0x218925b00ac301L, 0x8d57fb354df895L }
+ },
+},
+{
+ {
+ { 0xdaebde0809c8d7L, 0x58c761c0e95ea1L, 0xbd9965000ae5e2L,
+ 0x6117a85cd51acdL, 0xc4424d87c55d56L, 0xe9b1ddedfbeeafL,
+ 0xda98bb50db4791L, 0xff3a5a63fca108L },
+ { 0x172fb8e5ccbea1L, 0x9fe12a7a9f6cc9L, 0x1de4b0b8967ce2L,
+ 0xc1ab60f671dbc6L, 0x338385a5dedcdaL, 0x647a4203a043feL,
+ 0xe9abc6428ebc89L, 0xc357ff003ba3c8L }
+ },
+ {
+ { 0x37061e7de39ebdL, 0xebb91352be567aL, 0xa9a6f6bd6bb80aL,
+ 0x039345d99f0ba2L, 0x215494e98bbf47L, 0xf2cb7a4a2a1ccbL,
+ 0xf51aa1037f67c9L, 0xd29c85c17fff71L },
+ { 0x8d4e4f24d30b87L, 0x20fdf5593a8309L, 0x9b9f9cf757075cL,
+ 0x09142adcd70101L, 0x901d0ee766ca55L, 0x6a5d86a32e418bL,
+ 0x550ad92d7fcaecL, 0x64e8818d91b26eL }
+ },
+ {
+ { 0x5cea0f747e5ee5L, 0x8ca1d31be99699L, 0x52db8465c136c7L,
+ 0x8cecb3890e0d74L, 0xb8efe9dede2ad8L, 0x18d6ff8f17ade8L,
+ 0xd2227352d66c20L, 0xc46593ef2005fdL },
+ { 0xe5ebe6ff7141e1L, 0xc968315e0126f2L, 0x95adc731cb91b6L,
+ 0x753b54c38a6003L, 0xa6141254230a61L, 0x23ac6eb559feceL,
+ 0x9816b603865c23L, 0x567014e543a570L }
+ },
+ {
+ { 0xd46091ddd2b71fL, 0x3999a5d97d24ffL, 0xce2a4f11ecff3cL,
+ 0xab2687c581c6f0L, 0xa9fb2ebcba70b4L, 0x6fde35642093e1L,
+ 0x00253ecaee724aL, 0xa08ce3c2b81bddL },
+ { 0xa251238935a2b3L, 0x8cae1d4584f750L, 0x011469e988a219L,
+ 0x61f7ed35a6a50eL, 0xe13ebaa01fcebdL, 0x794b97631d8867L,
+ 0xf25755ccda32e7L, 0x368a97b4564cd1L }
+ },
+ {
+ { 0x0d22224aa3397bL, 0x1dbb3e638066dbL, 0xfe0b5ee0ce8e32L,
+ 0x09c17c87bab4dcL, 0x5cc65ddf188b64L, 0x74c4abf211b5faL,
+ 0xdcc17b7ab0ba86L, 0xfbdf46fa535501L },
+ { 0x4775087aca569eL, 0x6575f9006a1718L, 0xb5c45a9b94de93L,
+ 0x0fc80068497171L, 0x775d965489f7abL, 0x8775b58f5c0c89L,
+ 0x05d4e201a06254L, 0x8cab349b6d73a5L }
+ },
+ {
+ { 0xca7816339465b0L, 0x3ef914814498fdL, 0x9ca1f346255c11L,
+ 0x389fd15b7f38f1L, 0xdac2089354b8f3L, 0x82d07fca840a70L,
+ 0xf53fd731dd483aL, 0xa6e4eae1590578L },
+ { 0x7bf65af3c01b77L, 0x27542f3a75c982L, 0xc5bd947716cfceL,
+ 0xba5fe76884b9e7L, 0x39bae14d55725dL, 0x982f64efae0eabL,
+ 0xcfae6627a5293aL, 0x22a25a1d60f464L }
+ },
+ {
+ { 0x74caecc7dd5e16L, 0x23678a2ce7bca3L, 0x467393257f1ba1L,
+ 0x4eb9948a4c1697L, 0x5d400e8eaba18dL, 0x128d1c89807871L,
+ 0x78f9627bff38a6L, 0xf80b813a39d4ccL },
+ { 0x8aeefa031d3aadL, 0x504219927db664L, 0x244fc694cb6383L,
+ 0x319047772192a3L, 0xcc86075bbfb57bL, 0xbae3a134451511L,
+ 0x16cf416f6174f0L, 0xb343cc0d376813L }
+ },
+ {
+ { 0x31ac9b9d1824b7L, 0x6282260ec8f61aL, 0xbbeb9f8c781765L,
+ 0x06ab5c02d110daL, 0xd583e2247146b8L, 0x79a16084100d05L,
+ 0x16dbbb4f0a5c95L, 0xfe2af1de331667L },
+ { 0x26f0364af8710eL, 0x1cb8c91eec08feL, 0x436bce61d95e9fL,
+ 0xfe9050c57944a0L, 0x5f45acf07b626bL, 0x48dc93f9cf1276L,
+ 0x4491371a05bfb7L, 0x51063044bcf785L }
+ },
+},
+{
+ {
+ { 0xac2e294ed0b3b6L, 0x5c5ade6671637bL, 0x2f289ce1140677L,
+ 0xaf446e2754eb53L, 0x70911b720421adL, 0x4b73836e0b7556L,
+ 0xcadf1042a97827L, 0x4824e498005bc6L },
+ { 0xb0eeccd937c28aL, 0x1ce061d0c3ee97L, 0xcb076319f33faaL,
+ 0x9980bf4aea66dcL, 0x2bd0755d111d98L, 0x43feaf67fe4de0L,
+ 0xe76fb80b077b2fL, 0x227dc9f5793b04L }
+ },
+ {
+ { 0xea24ae514f49baL, 0xbc39ea611436e7L, 0x9d7fed278485d8L,
+ 0xb6ef00cdf8b131L, 0x0237b4bfdbc7afL, 0x08745b564ccd27L,
+ 0xaf8595dafc5a76L, 0x43657af29f5500L },
+ { 0x300718348470f8L, 0x51f91fd640fd53L, 0x859c807be15512L,
+ 0x7d1a474ab3e9c5L, 0x5d714d981553e5L, 0x07573436f62310L,
+ 0xedc5be06b02a62L, 0x5a4b9b7ea47832L }
+ },
+ {
+ { 0x03e0a24e93dbb3L, 0x25841dccadc884L, 0xabc1a818d10ad5L,
+ 0x207e38a2042dddL, 0x7fffbdbfeba8d8L, 0x74efebba3ec9b5L,
+ 0x0bc39ca0b40a9fL, 0x69ee9c90267febL },
+ { 0xd402facbc62919L, 0xe9f8fc11cf53c6L, 0xe76fa5a7cc7d81L,
+ 0x4f2d87696bb19dL, 0xd4fb7f9adc67c7L, 0x40621d596702dcL,
+ 0x5b6a98e438f6c5L, 0xa7c64def1a1036L }
+ },
+ {
+ { 0x84c5e809a092c7L, 0x9e40e0a11c22b7L, 0x820a091d06c99bL,
+ 0x45fdc77eecca8fL, 0xfe1b8a35794f16L, 0x31f7e5b4ce3d6dL,
+ 0xfd5e01082c74c8L, 0xfdabf30c1f6f7dL },
+ { 0xbfa6017b9248a0L, 0xe898d30546b941L, 0x878c492207ff65L,
+ 0xbf22e8db874e64L, 0x43fdb1b53a547eL, 0xb66deda5fbd464L,
+ 0x59127a6c7ae1b5L, 0xa4636466a7515aL }
+ },
+ {
+ { 0x22c4e66de9ab2eL, 0xfaf60c20203c58L, 0xed2d7bf0d5c5edL,
+ 0xdbc16fe4ca0f19L, 0x54e8ef6465b979L, 0xe2d64b1a310ef9L,
+ 0xa0f2c953778636L, 0xf3b4aa4281883bL },
+ { 0x4ac9af09be6629L, 0xba455e11ca90c5L, 0x0147538856f492L,
+ 0xc80db7eabd7840L, 0xb3526d96beb9cdL, 0x37657fb9d81503L,
+ 0x8729a16193cec3L, 0xd9a93fbd69952aL }
+ },
+ {
+ { 0xfce017594f47c6L, 0x228da21e366d05L, 0x27ce0b2dc8baf3L,
+ 0x8cc660b6b4a951L, 0xf678947384bb01L, 0xc629d7d44d980cL,
+ 0x47980e4e85e81fL, 0xa2e636a1cd723eL },
+ { 0x6b6ebae77fb207L, 0x70179614c92891L, 0x5569541b4d279cL,
+ 0xbb6b36a41758cbL, 0xecaa22227a8e30L, 0x8b6746ab470ad9L,
+ 0x4c4601763e2d3dL, 0xe19c4edd3edaecL }
+ },
+ {
+ { 0x0b43fec34718c8L, 0x553c407f33499fL, 0x8272efb970d1dbL,
+ 0x008c62ca8e8d1cL, 0xe4b79d763eec45L, 0x1fd4230f2d71a3L,
+ 0x090fdafa368c36L, 0xf62c101fca7baaL },
+ { 0x1c9e6c8d2395b3L, 0x671ed6304c5513L, 0x577d933299a465L,
+ 0x286890e63f9986L, 0xd92a95dbfc979cL, 0xcebd79d2b51019L,
+ 0xe74d88b3d07251L, 0x8b6db73906f9adL }
+ },
+ {
+ { 0xc0c43db7b3d90cL, 0x85d154e4304a06L, 0xe8aceefaf2f38eL,
+ 0x5e0429383d9459L, 0x65e5e32431afd1L, 0x9e5f050a900a65L,
+ 0xcbaa1718a26671L, 0x33d0b249c93de7L },
+ { 0x3dcbf92d5b6680L, 0xc47e5ec20006f9L, 0xc9711299a51924L,
+ 0x665d9b8cd0ed46L, 0xed2d63fa5fcab6L, 0xa817eb6cfbfc5aL,
+ 0xb38169fb76eb76L, 0x8b93544f11160bL }
+ },
+},
+{
+ {
+ { 0x02eca52693bdcdL, 0xbbf09232ae01d6L, 0x0b0a2de8b44b3eL,
+ 0xdb82449b250dffL, 0x0c42b866e1c530L, 0xcd226dca64c2c4L,
+ 0xcfb2bb1f046b5fL, 0x97e2fae3fccb0dL },
+ { 0xdf9290745ed156L, 0x224dcb9f641229L, 0x2126abc5f1f67eL,
+ 0xa7eed5ae9c8a6bL, 0x40abedc9857d9bL, 0x3f9c7f6de941c6L,
+ 0x2158d42d725ddfL, 0xbdd10158c69543L }
+ },
+ {
+ { 0xa7dd24e8df2fbcL, 0x3adbcfd13d1aeeL, 0xf6a32d113b2177L,
+ 0x89a72327a9a14cL, 0xe3aef43dc65df9L, 0xeaec3e3a64d74cL,
+ 0x4d387d84fec33bL, 0xaba2a0521a2128L },
+ { 0x2382c226b85e30L, 0x4352d85cd2aad3L, 0xb0c6001d9772c4L,
+ 0x7ed82635f3653fL, 0x3626a6f0300f47L, 0x23909de6ca7e4eL,
+ 0xb43dd81c154141L, 0x9a49fad7e4bc68L }
+ },
+ {
+ { 0xa3661df2428f88L, 0xbe48b0256e0db2L, 0x3cd1871ce79aa9L,
+ 0x90ab87123dddacL, 0x9c58fb971871a6L, 0xf031f7fa34910eL,
+ 0xb501eea81060e4L, 0xdb668ba791224eL },
+ { 0x240bbcb6a705bcL, 0x7e76fbd2d1865eL, 0x6e2cd022513641L,
+ 0xe6c522546365c9L, 0xe46a8b8a5a01fbL, 0x696fa7bb67618bL,
+ 0x418b3b90db6792L, 0x7204acd7108b9cL }
+ },
+ {
+ { 0xb5a143b8456b45L, 0x8a3ab25f53b4d9L, 0xb112a58e13a570L,
+ 0x613ca3281487d2L, 0x837d8233b1e7c9L, 0x592baded41e9d5L,
+ 0xdc1893a5cd02f2L, 0x08795028972e23L },
+ { 0x7003c08cb76261L, 0x14bde9e332a5e0L, 0x14b2872cbbd78eL,
+ 0x5594061de238e8L, 0xad12645067466cL, 0xa8d0e64f5e4952L,
+ 0x5b44b82c7f8d06L, 0xb51bea8fb1b828L }
+ },
+ {
+ { 0xebad6853f0daccL, 0x5c31b8b1cbebbcL, 0x6746975fa5a2dcL,
+ 0x2d9596531d9faaL, 0x343797d00fc0e4L, 0x38d821c55fe01bL,
+ 0x0bfdb247323aa0L, 0x42613c4f962a8eL },
+ { 0x599a211e134bc0L, 0x75fa4a147a7084L, 0x6e719487f734b5L,
+ 0xd5ced2d6dfca2bL, 0x9fa0fdc8aeabd2L, 0x5e6b03f12361daL,
+ 0xad23d315859fcfL, 0x3120ef125a5fc8L }
+ },
+ {
+ { 0x990ef628e9f638L, 0xfdaa240626a60cL, 0x4a3de202abddabL,
+ 0xd5d10b7d8872b2L, 0xa01b7301ea5880L, 0x481697fa81b9d8L,
+ 0x29841533471ed8L, 0xefd73f8292d37cL },
+ { 0xdda76269994bebL, 0xa0377036a4f865L, 0xda992ece5b47d5L,
+ 0x912a427e53edbaL, 0x64675989264e45L, 0xd3b68c3af71222L,
+ 0x9d3436c6dedc5fL, 0x1e027af076b2adL }
+ },
+ {
+ { 0xd56fca14382f4aL, 0x83712a48966b7bL, 0xd6b2cf5a4c9ddbL,
+ 0xa66be29f602875L, 0x70e4266894f3d0L, 0x007d220b3195caL,
+ 0xba38d8f82c74d4L, 0xdccc5fcd975cbdL },
+ { 0x03e1610c88b38bL, 0xeb9f9a152e0d8dL, 0x6a57ecab646eb7L,
+ 0x161641fc76b6c1L, 0xf9025adbd2e12bL, 0x87c74db5c0e26dL,
+ 0xed5cb51bfeca74L, 0x603dfb6e34a08cL }
+ },
+ {
+ { 0xc4be728cb03307L, 0xde34c0ec2741ccL, 0xe01db05a74eb17L,
+ 0x1bfce0c8905e4bL, 0xb18830ad1b1826L, 0xcacbb41e87bbfbL,
+ 0x8696842d2f1a79L, 0xa80e5fb08c83eaL },
+ { 0xe48f1633f1439cL, 0xc1d4108cd6987bL, 0x05705c4b751814L,
+ 0xa9bffd0c1c622dL, 0x23de4af46cd053L, 0xf782f5e39457c3L,
+ 0x815276b5e5d243L, 0x31320416161ae3L }
+ },
+},
+{
+ {
+ { 0x245966177f2542L, 0x203be7e8372b25L, 0xc7c9426ee2007bL,
+ 0xc5641380621799L, 0xda56589c28c3ceL, 0x13e8a7c7afc1e3L,
+ 0xdba81e9e352082L, 0xf43054904435c7L },
+ { 0x4d26533691de4aL, 0x364408cfb777abL, 0xccdfb43eae7f88L,
+ 0xbc40f44a525b11L, 0x8e112a53c60627L, 0x7f7c581e17e696L,
+ 0x0fd78781ea774aL, 0xd09e6320b1f582L }
+ },
+ {
+ { 0x44390bd70aab15L, 0x41112bc889c3f2L, 0x6b02894d685349L,
+ 0x71030015584dfeL, 0x373cb1b1ba7887L, 0x53d286c2a017c7L,
+ 0x2ed03883c81fdcL, 0x3bfc5e3fbcc6fcL },
+ { 0xd38ac6ffd6418dL, 0xc667e96bfad89eL, 0x46f4f77eab4d66L,
+ 0x194c04f0911293L, 0x0fd09cf68c48d5L, 0x6f5b05563cf7f4L,
+ 0x0c0a8c4acd562fL, 0x94c1d8336d965dL }
+ },
+ {
+ { 0x94fc8f0caa127aL, 0xc762d5dd803690L, 0x8bfdfd11ebf0d3L,
+ 0xa98cdf248eac50L, 0x3d7365d8b5ff10L, 0x20dc29bc65b4deL,
+ 0x62ac28e8ec7c68L, 0x7f5a13290372d2L },
+ { 0xf3d8a253246658L, 0xa4bebd39ac202aL, 0x078ede75cc1697L,
+ 0x5525800c8fc022L, 0x302a8025fae77bL, 0x018013957917b6L,
+ 0x7c8806d864bf55L, 0x4e2d87812f06f1L }
+ },
+ {
+ { 0x8d351183d66e88L, 0xfb861a1a91d02aL, 0x8c27c2a7850e5fL,
+ 0x9fd6399a5496f6L, 0x52152ae8080049L, 0x600e2fffd1c2dcL,
+ 0xc75902affe8b2eL, 0x5c4d2cce03b175L },
+ { 0x8ad7c424f57e78L, 0x77cf6061736f87L, 0x2876012f85038aL,
+ 0xff328451b97b95L, 0x3cc6dd5392dfc8L, 0x72f1363a6f5075L,
+ 0x028ec4471de894L, 0x7030f2f6f45a86L }
+ },
+ {
+ { 0x66400f59695817L, 0xeda0a7df20ea36L, 0x855be51d394992L,
+ 0x2d082c18336f62L, 0x30944ddf28c868L, 0xfb5f8530dc86d0L,
+ 0x9562ae5564a0bdL, 0x1f7ea12b6b9b51L },
+ { 0x5bd74e0d0a7148L, 0x6c8247fb91e572L, 0x699aba547da498L,
+ 0xed825811f7c814L, 0x434674b62057b9L, 0x8b4df5e15c15b4L,
+ 0x2a97da1b110081L, 0x2a96b0c4c417feL }
+ },
+ {
+ { 0x4f75dfc237639dL, 0xe5ad6bc1db7029L, 0xd43e06eb3d28f7L,
+ 0x89f3bb5e447989L, 0xc426a2c01a1a6eL, 0x33ea71c315878fL,
+ 0x8a7784ab1b5705L, 0xa59e86e77ca811L },
+ { 0xddb133c36ae155L, 0x49f1d4c0d51b42L, 0x55080829d05519L,
+ 0x20e23be5291816L, 0x35047ec67181ecL, 0x6237dc47aad091L,
+ 0xa1d3ce1e2e25a2L, 0x1de05220d3db4cL }
+ },
+ {
+ { 0xe9a5e19d9fd423L, 0x0c2c3d09801e43L, 0x043c2dd28df2daL,
+ 0x4eecab4e1ad12aL, 0x97e17979615aa5L, 0xe57b879ca7bb5eL,
+ 0xa2a903ccc92619L, 0x5cef370aa56e93L },
+ { 0xbef29fa7f3232cL, 0x1cf35ed2b7ad5cL, 0x35c48933b6077aL,
+ 0xe0651487a1d47dL, 0xedb4673ce14572L, 0xdc9e98c0b17629L,
+ 0xef98ebe9a02a5cL, 0x1f772e311d03c0L }
+ },
+ {
+ { 0xcbdbdcd4608f72L, 0xb4352235a13c6fL, 0xa6497f64bb3c21L,
+ 0x3af238312c15c9L, 0xfbbf4b36322d11L, 0x520a5c6c641775L,
+ 0x18cd967e81e0e1L, 0x980b2c63de3871L },
+ { 0xfa9db619ae44a2L, 0x0281dd2176bc56L, 0xfd037118a7f817L,
+ 0x9c485454129b30L, 0xb439648039626dL, 0x355050ee4ada6bL,
+ 0xc9c16d67f5d98cL, 0xf53ccc318c4d5eL }
+ },
+},
+{
+ {
+ { 0x50ae9423ffb20bL, 0xa6c0b426865eb4L, 0x4677f7d09930f1L,
+ 0x742e0b64a16427L, 0x521d18ef976f9aL, 0x43ac9cfa454749L,
+ 0xda3a91dc51f50dL, 0xf657029ad6f954L },
+ { 0xfe5f0646b4f99aL, 0xd92a5d963ad4ceL, 0xfcb55092e0e081L,
+ 0xadc85ab8d8a858L, 0x8e9b9660632f0fL, 0xe7a4f168d7216dL,
+ 0x00a4cc559c3b99L, 0xed6d0bdba09dc1L }
+ },
+ {
+ { 0x7236d141621bebL, 0x1751fd4bc7ca95L, 0xaa619d12f5319cL,
+ 0xfc2b15b4e9316fL, 0x2d1a9069fd4d33L, 0x28c3bac8ced829L,
+ 0xf2efab51dd998fL, 0x2c133303b149edL },
+ { 0x65237c9f601ac6L, 0xb54dd6507d6a45L, 0xa1ce391fb1a4cfL,
+ 0x2957533115f67eL, 0x6456da8465279bL, 0x02890aaa993e02L,
+ 0x6891853b7175e4L, 0x3fda2030f3e59bL }
+ },
+ {
+ { 0xe99fe12d8c6e0bL, 0x7cb07ff5341c56L, 0xc292c7bdf77b24L,
+ 0xf52dfd0ca29906L, 0x4a6aa26772f02cL, 0x26f7684e1bbd09L,
+ 0xec56b2bee7c2a8L, 0x67709e6ad4a312L },
+ { 0x99c57b2c570263L, 0xeb0100b2faafaeL, 0x980d5d1ff25ecaL,
+ 0xace35e682cf936L, 0x5a82ce544679edL, 0x5c76a41074b81eL,
+ 0xf36fa43a00abb1L, 0x064281904ffb2dL }
+ },
+ {
+ { 0x68f6bc804bdd28L, 0xc311d96b5dc7adL, 0xff0d646ed32e45L,
+ 0xaf3cdc6e0f712dL, 0xd4508e9d483861L, 0xb624be50e1c277L,
+ 0xc510275c5dd841L, 0x451c5c3298dc02L },
+ { 0xf87d479dd34d6bL, 0xda7f293dd06a38L, 0x575e129b699e9fL,
+ 0x79e5fb2215b2ccL, 0xd280028657e690L, 0x7fecd09e702a71L,
+ 0x85160abfa13677L, 0x5de3427ce65f64L }
+ },
+ {
+ { 0x84e4bf6e8fff38L, 0x16f3725b358b1cL, 0x360371c3b472a5L,
+ 0xe64c06152f217aL, 0x8e673790501241L, 0x88e81d6ab2dd96L,
+ 0xf3e218a1385604L, 0x9736cafe84184dL },
+ { 0xb55a043dbb93a3L, 0x335088f9301088L, 0xcea7a2db2a4959L,
+ 0x48e5d4ab882c33L, 0x114f09bad46179L, 0x4416467b446576L,
+ 0x01cb23e34c6c2fL, 0xddebf04a02db8aL }
+ },
+ {
+ { 0x36d60cc9bde8a1L, 0x20fd2f2676e4adL, 0xebdcfb78936581L,
+ 0x245d0d5dbfc2c3L, 0x104c62ca9f82e5L, 0x7387457d654d9bL,
+ 0xe966777ae7f10eL, 0xefeb16f1d8e582L },
+ { 0x4faf4f170364b5L, 0x0e1ab58d612472L, 0x11bbfe7fed6085L,
+ 0xb360a14a59a09aL, 0x61d96e9722fdb6L, 0x16a12f194068bdL,
+ 0x225bf07f73c2beL, 0x1e64665c8bd24eL }
+ },
+ {
+ { 0x27a478a3698c75L, 0x778ccd36202aa2L, 0x0149c638d87f1fL,
+ 0xa660e5f784edaeL, 0xe0d4d2f82adfa8L, 0xf512dd61ba1f9dL,
+ 0x90cfed96245c58L, 0x6c3a54818b53ddL },
+ { 0x833f70cbdc094fL, 0xa5f26f5b1514e7L, 0x93e7cf51c8cf13L,
+ 0x1436601186ec43L, 0x81924ace78170aL, 0xcc880a08694368L,
+ 0x2dfa9550b62cbbL, 0x0bc6aa496b4a2cL }
+ },
+ {
+ { 0x5157a7e3561aa2L, 0x525c5008645c1eL, 0x22feb4ece7cbb3L,
+ 0x36d0d25c89a58bL, 0x43131f7c9bde9cL, 0x74afdda881f731L,
+ 0x99ab87c7c8e36aL, 0xf07a476c1d4fb2L },
+ { 0x1b82056bebc606L, 0x95a1e5afcf089fL, 0xc5bccfa2b55d5cL,
+ 0x8fbc18e00eb0b1L, 0x93a06fe9efb483L, 0xcafd7252d74c57L,
+ 0xc7518f03de4350L, 0x9a719bfc6fd762L }
+ },
+},
+{
+ {
+ { 0x5ee0d832362087L, 0x7f2c0d70b167e8L, 0xb7327895e0e865L,
+ 0xef5b2e898c4e65L, 0x222797d8fe9cc1L, 0xfe6d73e82d1e15L,
+ 0xc7c0e9cf62dc4bL, 0x962acfe937cedaL },
+ { 0xd763711c1e85c7L, 0x8f2dbbc2836978L, 0xbadc0558c44e98L,
+ 0xed63eaba3e93f8L, 0x807e85741b55c7L, 0xd51ae5e6d1207bL,
+ 0xa0ef9a639d541bL, 0x58855f9a0c56a5L }
+ },
+ {
+ { 0x7d88eaa213091dL, 0xcbdfee745b6a0dL, 0x826a0124f5e077L,
+ 0xb04fc1390f1e4cL, 0x1961ac3aea69aaL, 0x3afb719d5bb63eL,
+ 0x2a378374ac7e5cL, 0x78efcc1c50ca45L },
+ { 0x346e8f0b8abdefL, 0x27e3dbd88095d0L, 0x56d3379ffc6c22L,
+ 0x67d416cfa4b291L, 0xc3baaf63b1b373L, 0x0184e1fdf73baeL,
+ 0x38ae8f79167528L, 0x7329d4c35d6297L }
+ },
+ {
+ { 0x45d2ac9f568c52L, 0x51348149808593L, 0x0c92d8331b7ed8L,
+ 0x921327a0876ecdL, 0xf752d75052736aL, 0x7b56487bc6b837L,
+ 0x6b1a320a23b4ccL, 0x1983937ec0d665L },
+ { 0x2c3017c08554abL, 0x40ad955366e87fL, 0x88c4edf8ed7f02L,
+ 0x64a7db13cc5e6dL, 0x5ac91fa2dc978bL, 0x016a20d925d2a2L,
+ 0x3604dfeabb57b4L, 0xc3683ecd7e2e85L }
+ },
+ {
+ { 0xc47150a4c0c6d0L, 0x30af45ee22adcfL, 0x39b5acb022ea4bL,
+ 0xfbe318577203b5L, 0xe5aaa346fd9b59L, 0x0062c90dd1c8dcL,
+ 0xcf113f354049acL, 0xd8fba4d63a31b5L },
+ { 0x73b54881056a69L, 0x3be6cbcd780bdaL, 0x5776ec230ba2b9L,
+ 0xbe883cf8e8d6f7L, 0x64efe945c2be6fL, 0x064f704f1ade8dL,
+ 0x41cfd17743110eL, 0xaac94114c20abeL }
+ },
+ {
+ { 0x91f9192f1c1468L, 0x8176e744563e13L, 0xa48b5f90bda15dL,
+ 0x2a085aeda42af6L, 0xfd38ab2425c018L, 0x2884ba408abafbL,
+ 0x356f318cbd091dL, 0x454e450817871bL },
+ { 0xe080e818ada531L, 0xa40f1eb3152ba8L, 0x051049f0c38eb1L,
+ 0x37e4bb3bd45003L, 0x6d0980454a01e5L, 0x6de932feeb824aL,
+ 0xccdef37dc93481L, 0x8633e0793a05e8L }
+ },
+ {
+ { 0xbe94256034675cL, 0x376c01d08db789L, 0x8707ee79af1b6bL,
+ 0x633b3ef11bfbacL, 0x694f33fd06db60L, 0x2a68bfcbb13407L,
+ 0x1c860c9da27c3aL, 0xbca16ded701ac3L },
+ { 0x2b76cfac59ffd0L, 0xf9a116554d718dL, 0xf86a1db67f0878L,
+ 0xe313e05af34e85L, 0xa1888113343159L, 0xdbe4c3f0bb7ed1L,
+ 0x73b67e80c732bcL, 0xa4e1c87e74110eL }
+ },
+ {
+ { 0xce1106b5c6770cL, 0x422c70b5c0bcb7L, 0x32a39908195e7fL,
+ 0xa24968d1ccd4aaL, 0x8f08ecf720e557L, 0x5da10a454bcc81L,
+ 0x9d3c73b6cd846eL, 0xaeb12c7368d065L },
+ { 0x2110859cf9fd1bL, 0xd2a4801ee2bd6dL, 0x376e556e9466acL,
+ 0x767803b3b5aa35L, 0x343f842b8a89baL, 0x3263cc16726bbfL,
+ 0x26caf1725871b0L, 0xef66ad641b8578L }
+ },
+ {
+ { 0xc9f2249638068cL, 0x96d282c1ccf9afL, 0x71df30c69b435aL,
+ 0x88c943acb9d5c9L, 0xbf98ef12a8f378L, 0xffc1824114c6ffL,
+ 0xda3ad2cd52e8c7L, 0xf1222bc1afcb59L },
+ { 0x459e94b0ee334aL, 0xd4477b8421933aL, 0x60fb7b0a1e401eL,
+ 0xfde6e820d1e330L, 0xcecfe9b3233fdeL, 0x09ec4662e93523L,
+ 0xa5ba64930775b9L, 0xcc397e5adf80f2L }
+ },
+},
+{
+ {
+ { 0x2fe182d4ddc8a8L, 0x88d6e79ac056bfL, 0xc3ff2d10e41e4eL,
+ 0x32ec7f92c3679fL, 0x3561f094e61051L, 0x4553f5a6c6250aL,
+ 0x2b765efdd25c5bL, 0xe3a40a26a1cd7fL },
+ { 0xb27309b5d821ddL, 0x950fb8dc2c17caL, 0xfeed0158fb0d4cL,
+ 0x762c479f550179L, 0x306cf44e095840L, 0x84b413ad379e66L,
+ 0xd6e5d5abb2e4f1L, 0x8bc12b794b085dL }
+ },
+ {
+ { 0xc0d4cb804b5532L, 0x7a31525b9940a6L, 0x010e7dd68c69d1L,
+ 0xd81f29d2a18c35L, 0x08ae7703f11e73L, 0x5358f876e55106L,
+ 0x299e8cac960ef5L, 0x89a6fb4acfc8dcL },
+ { 0x5996a406dc7d4aL, 0x21e5112e51b96eL, 0x95b8c3d09a202bL,
+ 0x306ab0fd441f1fL, 0x2834fed98d4245L, 0xc29c387d0abbdeL,
+ 0xf6a9bf1b805c15L, 0x602f4f8c4e458dL }
+ },
+ {
+ { 0xf041486e5a893aL, 0x53b891d8934327L, 0x11e000d4000758L,
+ 0xa4ccde8662bad9L, 0xe34d3edb9a1b64L, 0x72d967584e7a6dL,
+ 0x773da2f6627be4L, 0xa11c946e835ae3L },
+ { 0x02e8203650bc15L, 0x2d35936e58b78dL, 0xe9cfbe8f21a3ccL,
+ 0x55ad8311049222L, 0xbf99de438fff47L, 0xebbfd803831db5L,
+ 0xe990636af2af42L, 0xc26ae52b7f5a0eL }
+ },
+ {
+ { 0xb5d85b1fa8f846L, 0x4166489b3b1455L, 0x768260dd36a305L,
+ 0xc6a82354ff5645L, 0xd241cd8d6e93e5L, 0xeed9aa1a406e74L,
+ 0x9e96ab05f600d9L, 0xa26b8b56eca2a1L },
+ { 0x78321cfd705aefL, 0xc4fb6b3c0161ecL, 0xdc324415199cf1L,
+ 0x33627d0d0a5067L, 0x13490cb15143eeL, 0x77e0ede85b4f44L,
+ 0x904f12e394b165L, 0x90f50f5efab32dL }
+ },
+ {
+ { 0x4aa0a16bc2de96L, 0x172596aaa9c12bL, 0xd512e1e60e8a29L,
+ 0x77d35c1f637e83L, 0xbb0d141d2aae0bL, 0x8a878a58c03738L,
+ 0x6d24c01ab0e525L, 0xb7d3136f760887L },
+ { 0xdbc3f8f3f91b7cL, 0xe7b4bcaa8722c0L, 0x3286a91da0ae65L,
+ 0x8372274225b084L, 0x5884cd5ae1886cL, 0xb4e63ef3a23cf7L,
+ 0xfe5f202f2dd0daL, 0x951fac9653916cL }
+ },
+ {
+ { 0x05e2e8f854fa4eL, 0xf411f941edaf10L, 0x26cc562a0a928dL,
+ 0x78fd34e4abce65L, 0x1d8760998a32e2L, 0x85dc76f4c37518L,
+ 0xdcaeef500e8021L, 0x7fcb2f84e9b2a5L },
+ { 0x9eba91ef382c06L, 0x2052e8524cae53L, 0x617336ef5c1519L,
+ 0xf1546d5b4e632bL, 0xa9edc81d7b8ffdL, 0xdb2914f29ab68cL,
+ 0xe805070debbabaL, 0x775e53bc3b719eL }
+ },
+ {
+ { 0xa40e294065256aL, 0x9f113868fb031aL, 0xac03af8059667cL,
+ 0x432eb3a0475f58L, 0x22332bf01faad0L, 0xc8132e9bc57a11L,
+ 0x27d5a173bc3f8bL, 0x5471fc6930bf3eL },
+ { 0xba28bc0e6bff40L, 0x198d57e555e564L, 0x13ce8319c65b8fL,
+ 0xb0a5c9d5681b51L, 0x467588bdeb9e11L, 0xf1891a7bb4250bL,
+ 0x10b938bd12b433L, 0x0b8c80224dcda4L }
+ },
+ {
+ { 0xc428703cf332d3L, 0x9d0053cf2a5b98L, 0x4e4c6207838a15L,
+ 0x2e92919fbf8a43L, 0x39ad52421cd9a5L, 0x584ed6c1561588L,
+ 0x20af30517a95c8L, 0xa223077b70e1c8L },
+ { 0x679cfea2fa4871L, 0x54f2a46ac633c7L, 0x60306514cdc5f1L,
+ 0xc4facda75a1dc7L, 0x710a2882d07d19L, 0xd55864e6b44992L,
+ 0x44d4b6c454c5b2L, 0x2855d2872f9981L }
+ },
+},
+{
+ {
+ { 0x4071b3ec7b0674L, 0x800eb14f8794d5L, 0x70573afbe6783eL,
+ 0xafaa4407785901L, 0x112d2a1405f32cL, 0x3761a52169b3e2L,
+ 0xe168b31842a366L, 0x5bc322f9bf4734L },
+ { 0x36ef240976c4a0L, 0x066f3d6fea4e64L, 0x0e954bda989e57L,
+ 0xe36ef5ef9466e4L, 0x6bb615abeb9226L, 0x5571e5f3d5a2caL,
+ 0xa86efe24897a86L, 0xed7e9cf28a9f77L }
+ },
+ {
+ { 0xdf10c971f82c68L, 0x796ba1e3b597e6L, 0x1ac77ece718cbfL,
+ 0xc8175bb410eac8L, 0x0cdf9a1bc555efL, 0x6b889f17524e05L,
+ 0x6bf1e61ae26d82L, 0xb3f6ad5d2e97d9L },
+ { 0x94dcff9f226487L, 0x60e6356be03ddeL, 0xda1f93b6a3dd7dL,
+ 0xf1be72179ca90cL, 0x05ed3131e6bce5L, 0xcf50908d48af3eL,
+ 0x3b0e85c61e554fL, 0xfe7e35ba2778d3L }
+ },
+ {
+ { 0x42c503275ac5a9L, 0xa66a66dda062c2L, 0xa4f4f82caa7023L,
+ 0x489d47664b4f86L, 0x10b108897311adL, 0x55dd637177b2ecL,
+ 0xa5ccff09a267b1L, 0xf07690bff327b0L },
+ { 0x39162ed2250cd2L, 0x1426de08b255f1L, 0xf227afd1bdd731L,
+ 0x78f8a36fa4c844L, 0x267a211157379cL, 0x3f05f92cc04acbL,
+ 0x374496cfc69caeL, 0xbf2c5d016ebfecL }
+ },
+ {
+ { 0x605418bd0518d1L, 0x3237f809e1cbc6L, 0x37a7005286c019L,
+ 0xf1fb0e0b15af0bL, 0xfc3b97caa853c0L, 0x1f48bd0e6beba2L,
+ 0x8e5d7c5e6a72f1L, 0x575e66d26ebf0cL },
+ { 0x099477662eae3dL, 0x53f074f96c9c65L, 0x6cfbfdbb81badeL,
+ 0x98b4efe3fed7d1L, 0xdaa112338c3382L, 0xdf88b7347b8ec6L,
+ 0x9b0fe4b9504a4fL, 0x2e7df4cf30c1c3L }
+ },
+ {
+ { 0x25380cb2fc1833L, 0xb8e248c18d62deL, 0x91c8f59d82f9dbL,
+ 0x5ec2b202444750L, 0x3f3a1f766b6f74L, 0x0180aa9dd7d14dL,
+ 0xd0a342d2956b9cL, 0x26e910e7139873L },
+ { 0x2261dc4139e23dL, 0x7edb181b8343ddL, 0xfcf1073b4038ddL,
+ 0x88870efa3bfea3L, 0x4e98ba964a263eL, 0x3c6e5dc70811f5L,
+ 0x17d28f5f86055dL, 0xca9c27666e4199L }
+ },
+ {
+ { 0x0b2d8bd964ef8cL, 0x5a99b8588e2ba6L, 0x9e927b204498ceL,
+ 0x9ff20c5756eb25L, 0x97cc27b3f27736L, 0xf32dd6d4729583L,
+ 0xbdc26580381a94L, 0x70fef15ef2c06fL },
+ { 0x50a619149252ccL, 0x9eb4a14236b4b9L, 0x9b1b2158e00f78L,
+ 0x27add366ea9c23L, 0xef61763c3a8e79L, 0xed4542fd82ce56L,
+ 0xa8737e70caed75L, 0xeca0ac2d452d76L }
+ },
+ {
+ { 0x20c07793d082d0L, 0x6e3ce64c9e9f3bL, 0xb3a4dce75a195fL,
+ 0x3a3c305bdd9f24L, 0xe2545c88688942L, 0xa463c82080f32bL,
+ 0x442974842686b8L, 0xf50e20d7213866L },
+ { 0x265ac523826e74L, 0x26fba57228e8ecL, 0x8a1e1dbe6b3ed8L,
+ 0x7c7b278f0fe65aL, 0x9a6df23c395234L, 0x99562060b0f114L,
+ 0x440c8c4ef90837L, 0x21ad22a3645f65L }
+ },
+ {
+ { 0x1e023a6edd31b2L, 0xf76d1459ff8668L, 0x970705617b45c8L,
+ 0x06120781e88e37L, 0x85c51c8922faacL, 0x4df392e22756d9L,
+ 0x8907fd0a03c98eL, 0x626f46a52ea51cL },
+ { 0xf8f766a486c8a2L, 0x8c499a288ed18cL, 0x44d2dc63c4f0deL,
+ 0x47dde686f2a0b6L, 0x9a655f84a973fdL, 0x3e7124e786ac80L,
+ 0x699e61ce8a0574L, 0xdf0ba9a31cdd0dL }
+ },
+},
+{
+ {
+ { 0x76270add73e69bL, 0x991120fc67d38aL, 0x7be58309469f0cL,
+ 0x93aba597db40acL, 0x2b707bc822fc08L, 0x4199fc069551cdL,
+ 0x38deed4f367324L, 0xca518e12228787L },
+ { 0x72f1befd9a9277L, 0x57d4aabe49ae90L, 0x13810d5db23478L,
+ 0x2a8b7809b4b77fL, 0xb542f4e1b4e004L, 0x4080fd03ec77f0L,
+ 0xb49e9fecec6596L, 0x20338d33f16037L }
+ },
+ {
+ { 0x4adcdae53554b0L, 0xfea4906e04c4dbL, 0x0808bec7748233L,
+ 0xde7477c47148d7L, 0xdd9124c03da38cL, 0x6b2503125ee8e9L,
+ 0xae67399b0d6161L, 0x70c4acd82203b6L },
+ { 0x9683916d31dae8L, 0x34775031ac7f69L, 0x9553153988e4adL,
+ 0xb58f41153a15e1L, 0xb65a2d492ba2ddL, 0x7c3efb1a90169cL,
+ 0x210f45e6b1747dL, 0x16e8d1bcff488dL }
+ },
+ {
+ { 0x252adf89d703dbL, 0x259ac1dfdfeb39L, 0x7faf6af115e806L,
+ 0x7aaefd6c1aff21L, 0x80542107c0113dL, 0x481f1a5e19b4b1L,
+ 0x7c17d43fcc8c61L, 0x8b04452bb0bbbeL },
+ { 0xe51e5f54cebae1L, 0x05341ba56a414cL, 0x0083a2c7fb8a30L,
+ 0xb4663f277f4952L, 0xce72eec4bb0074L, 0x74fdd66a3584d1L,
+ 0x6b9e58eb02e076L, 0x5be45d53b961f4L }
+ },
+ {
+ { 0xc7474f31ab2e0bL, 0x2838ccbf4bf454L, 0x634392ef3c3eacL,
+ 0x440e40a137602bL, 0xeea67e9d1ae8e3L, 0xafdf93a77e221eL,
+ 0x3c9f3da2719a10L, 0x466ecef32c8256L },
+ { 0x1061c19f9c432fL, 0xa1332d9b1c7d98L, 0xbc735f2a425c2cL,
+ 0x1429cdf4b1bccbL, 0x77b42a16bbb5f9L, 0x30078e35955ae4L,
+ 0x8acd77721cc315L, 0xaa90d5fe86fa99L }
+ },
+ {
+ { 0xfcfd460721115aL, 0x6a7de3e08269b8L, 0xe5964a696dd47eL,
+ 0x6717cd58dca975L, 0x7ea4ebe98b149eL, 0x6f894d5b7b8057L,
+ 0xbd6f9607f30e31L, 0x61ca45323df092L },
+ { 0x32241f99d782f3L, 0x55173b02abfae2L, 0x0abe0edd15bbbdL,
+ 0xb6d3c0ab438abbL, 0x62fb4679ffa20bL, 0x30926b5d31560aL,
+ 0x44bf27c2a0aa6dL, 0xf7473131a4cb97L }
+ },
+ {
+ { 0xa2f6c0db0535deL, 0xcb02ae1c855166L, 0xc699e6bb3422f0L,
+ 0x774febe281ba8aL, 0x1d9d24fffabcc7L, 0x0b31ba1fe12ba5L,
+ 0x4c8680313d0af7L, 0x90640d32f47160L },
+ { 0xa0c4bf45876603L, 0x717f6fa950ab08L, 0xf12bb53a710de8L,
+ 0xc500c616a88f50L, 0x0070f992645351L, 0x57aab5d2446893L,
+ 0xd553fa8b68f657L, 0xe8537c1693c55dL }
+ },
+ {
+ { 0x58e86eb7fc7684L, 0xdf330f7bfc73a9L, 0x41e337dcc11936L,
+ 0x36d92006e35759L, 0x01327033500d8bL, 0xfa684059483354L,
+ 0xc8f2980667851bL, 0x538ec8918296b0L },
+ { 0xa2a2c4fcff55f9L, 0xb260d4d60d20bdL, 0x3ed576fd9cc59fL,
+ 0x4ed8c64d514fccL, 0x37ebfb2c22b315L, 0xca67a3694c212cL,
+ 0x4f8e08c3a1795eL, 0x498f9264e7261fL }
+ },
+ {
+ { 0xfea7382c59b3d4L, 0xb9942ed3f2925fL, 0xe4b00dc8ea77e8L,
+ 0x74a18ec3cab02eL, 0xbbbb752ef16d0bL, 0x639da4fffab032L,
+ 0xc371a4a3aa30f0L, 0x8e26b22caa175bL },
+ { 0x94e41567e2b62eL, 0x7cceea625a794cL, 0x931d2f4479f015L,
+ 0x946183d90b25b2L, 0x1504e9768a2807L, 0xa7577d3fa49dddL,
+ 0x24fc87edd48699L, 0x9edefd63d7d99cL }
+ },
+},
+{
+ {
+ { 0x0508b340f0b450L, 0xe0069a5c36f7f4L, 0x26556642a5a761L,
+ 0x0193fd8848e04dL, 0xc108cf573fe2e7L, 0x05eb0ecfd787d4L,
+ 0x1555ccbff28985L, 0xb5af09f651b995L },
+ { 0x167d72ce1134beL, 0xd6d98bf57c669aL, 0x40fb7166dd76faL,
+ 0xeabbf202a41b31L, 0x300ff0e09b75b0L, 0x32b6fadd9a0c1eL,
+ 0x805188365a80e0L, 0x8bef69332110feL }
+ },
+ {
+ { 0x637802fbef47d4L, 0xfac114b2d16eaaL, 0x7b3f3ab0415644L,
+ 0x17ab8d12dd895bL, 0x271b7fe87195f3L, 0xa3f867ea71f65fL,
+ 0x39ba40cc80583aL, 0x6db067256e1fccL },
+ { 0x4feab4e06662a8L, 0xc857415c74bd46L, 0x18032ed732b126L,
+ 0x87c8aea7a099eaL, 0xb4a753536fe0a8L, 0x33a98da27673f6L,
+ 0x3e40c022b8e549L, 0x2def1af9a4c587L }
+ },
+ {
+ { 0x9618b68a8c9ad9L, 0xd70b4aa49defdaL, 0xae8b1385f788efL,
+ 0x87c3542dd523f4L, 0xe42c7055c5b004L, 0x6303360fa7df57L,
+ 0x33e27a75f6d068L, 0x9b3268e8ff331aL },
+ { 0x845cc9623ee0c3L, 0x003af70ac80084L, 0x6a9f931530c41dL,
+ 0xa1d7051bb127f0L, 0x642ce05ca36245L, 0xc34205b0323ee9L,
+ 0x7cc8912b7b3513L, 0x6252cc8076cbdbL }
+ },
+ {
+ { 0x10e68a07089522L, 0x36c136158fc658L, 0x490397d74723a4L,
+ 0x42692c0519d56cL, 0x69d251bf1ff235L, 0xe689d03c2cbf37L,
+ 0xf04ceba825b7f4L, 0xd6b9bee2281c2eL },
+ { 0xc52ef3fe0043abL, 0x351bf28d1d1be8L, 0x277615f0f18a5aL,
+ 0x31f717f5d6800fL, 0xf5fb82dab922e2L, 0x99aee2f2d6ae43L,
+ 0x42477fec63b982L, 0x904aeb1a594a01L }
+ },
+ {
+ { 0xaa82174eb39974L, 0xbc38e6195e6aa0L, 0x6a3df8a25c0675L,
+ 0xf324203ffbe739L, 0xfa5a0b4a3f0649L, 0x79c87327a7a6b8L,
+ 0xeb65ecd40ad3f5L, 0x718d416e4e45c5L },
+ { 0x029dbf4e2326fdL, 0x0c63416e7942f0L, 0x6d0c7286f4e678L,
+ 0x59f0b10a138601L, 0x8a1d9788d92ea9L, 0x9f8d712c22eca5L,
+ 0x73970447b6b96bL, 0xa2d49eee6fb955L }
+ },
+ {
+ { 0x249f900bf14a19L, 0xd3522da63a8cd2L, 0x28a32f386964d2L,
+ 0xacf712bc1fa743L, 0x98a9bfc0bb94d3L, 0x318ece1bc06824L,
+ 0xfc476754fce7f0L, 0x19caec9e4135b7L },
+ { 0x6de68a8c6817bbL, 0x7121960f3b6d89L, 0xa7d4261f5a818eL,
+ 0x0c0ba519157455L, 0x78b6acf450d5ffL, 0x198b4934e8649aL,
+ 0x0941a3cfd05da3L, 0x264ea4adb55951L }
+ },
+ {
+ { 0xcfee91c46e5a31L, 0x47b6806fff7366L, 0xdb14be45df849dL,
+ 0x3c5e22bac66cc7L, 0x7f3f284a5f4769L, 0x4e00815383be36L,
+ 0x39a9f0b8072b0bL, 0x9887cd5c7eadd6L },
+ { 0x7dd8f05b659511L, 0x15c796dd2e1cb9L, 0xe5edb0c0d31345L,
+ 0x2025df06939c60L, 0x6314c08bf15de1L, 0x03c154804c7fb5L,
+ 0x413337fbb5d3edL, 0xfc20b40477e983L }
+ },
+ {
+ { 0x7f968805db0ef9L, 0x05562dee9c2a70L, 0x071e5bc7dae133L,
+ 0xa8cdd12237fc4aL, 0x6d565e74ea492bL, 0xa17cf94381ee52L,
+ 0x6ab8a4e9f5c546L, 0xbb642f340288efL },
+ { 0x64e59215df5c2dL, 0x43696e3bb906f4L, 0x73a841a74ae46cL,
+ 0xe264883c506b8aL, 0x9542e1aa1be548L, 0x89385395e81b4aL,
+ 0x5642cfaeaca6ceL, 0xed8077b806e0f9L }
+ },
+},
+{
+ {
+ { 0x1c776c47e13597L, 0x0ec8b289e584fdL, 0x0bb6043b8b61e8L,
+ 0xdcc17489cd835bL, 0x493e6ac39fef9aL, 0xb44eb34d133e17L,
+ 0xfebcd0071cb6f9L, 0xe6cf543d20eff2L },
+ { 0xf265cad0a004c7L, 0x9b06c9dd35cc12L, 0x769f985cb4ea53L,
+ 0x29160a20993434L, 0xdf8dd108d939c4L, 0xefa177c6711e2fL,
+ 0x1695790cd7a2cdL, 0x38da3d777f6642L }
+ },
+ {
+ { 0x9bfcfd96307b74L, 0xc26a36dbfdabc3L, 0x9341be04abe28eL,
+ 0xdb20b5273d1387L, 0xf8d229c3d1949cL, 0xf1e0afeb8b3a41L,
+ 0x29c60dfed565d0L, 0x6930bb58b43b2cL },
+ { 0x1d76527fc0718fL, 0xdb981431f67189L, 0x0c62f6451f32ccL,
+ 0x70a66268bd35e5L, 0x1725641c1cece7L, 0x7f130a8f96f4a4L,
+ 0x72319e9f06ee98L, 0x215b73867bf9b2L }
+ },
+ {
+ { 0x8d1bec20aaddd7L, 0xfb8b95bb8be4f9L, 0xeac193efde1026L,
+ 0xa5edea79d5860cL, 0x4adbaea44280d3L, 0xce8b67038f4798L,
+ 0x914c107ec30deaL, 0xbdc5cf7000776bL },
+ { 0xb6fd7d1a206a13L, 0x9941ebadae986eL, 0x76c27a81f1caaaL,
+ 0x6967c123f108b4L, 0x6f115284aea2d0L, 0x9bb4319144ddacL,
+ 0x1a4d3eac8ec6fcL, 0xfe4b0b8bf37420L }
+ },
+ {
+ { 0x5d9a4a1ec0ac6fL, 0x84b79f2fc7c80dL, 0x64222f7c14fac3L,
+ 0xdd9e039c23b3f2L, 0x4a84abdea956bbL, 0x370dcbaebe09dcL,
+ 0x79a9ea8e0eaf82L, 0x4cfb60aaee375fL },
+ { 0x6a10dbf9106827L, 0xa3ba5cf43f305bL, 0x481b885c1bb083L,
+ 0x2f52380b3117b1L, 0x0066122ddd6791L, 0x4f8923e63bace3L,
+ 0x5c5f499ecb88d4L, 0xfdc780a3bac146L }
+ },
+ {
+ { 0x34b70ae7ba1f71L, 0x909182945bd184L, 0x3b39778e707313L,
+ 0xdeefc5e6164e91L, 0xbb55bed4971f39L, 0x7d523398dafc8bL,
+ 0x82391bfa6adf0fL, 0xfd6f90ae319522L },
+ { 0x60fdf77f29bbc9L, 0xeff9ed8aaa4030L, 0x978e045f8c0d3fL,
+ 0xe0502c3eed65cdL, 0x3104d8f3cfd4c8L, 0xab1be44a639005L,
+ 0xe83f4319eeab3fL, 0x01970e8451d797L }
+ },
+ {
+ { 0xbc972f83180f4bL, 0xac053c0617779dL, 0x89392c57fa149fL,
+ 0xdc4699bbcb6263L, 0x0ae8b28ce12882L, 0xdca19a7af1a4dcL,
+ 0xd3d719f64e1a74L, 0xbb50201affdd5dL },
+ { 0x56f73107ac30e9L, 0x65cc9c71878900L, 0x83f586627338a3L,
+ 0x122adefac5bb13L, 0x97de2001bcd4d5L, 0x6ed3985b8aa3a0L,
+ 0x8680f1d6821f9bL, 0xcb42028dda9f98L }
+ },
+ {
+ { 0xcdb07080ec2db3L, 0xe28c8333dad1a1L, 0x2093e32de2da07L,
+ 0x731707383b8987L, 0xad17871f552b8dL, 0x846da9851cf70aL,
+ 0xf94a16e5c4f5e1L, 0x84299960f8348aL },
+ { 0x4bf3f6898db78aL, 0xad77fa83d19b52L, 0x69767728b972dcL,
+ 0x7dfa35a5321be0L, 0x9881846dd344a6L, 0xe550292ad4e2a8L,
+ 0x8075217bc68bf1L, 0xdd837c4893be15L }
+ },
+ {
+ { 0x09c931ed4fab5bL, 0xb2dcf08b77a0f1L, 0x7dac5c0e0d38a6L,
+ 0xa5570b00ae73afL, 0xc7c19d3f5aed28L, 0x575fa6f5251e92L,
+ 0xb843cd6cdf7275L, 0xd9d3d8e9a01287L },
+ { 0xf94e356b3c370bL, 0xc62b99ffe464b0L, 0x7792650a986057L,
+ 0xeaa67d5c4b1874L, 0xba1ba4d0b07078L, 0xdbf636d7a03699L,
+ 0x1a16c34edd32a3L, 0x6ce2495a45cb5dL }
+ },
+},
+{
+ {
+ { 0xd7c4d9aa684441L, 0xce62af630cd42aL, 0xcd2669b43014c4L,
+ 0xce7e7116f65b24L, 0x1847ce9576fa19L, 0x82585ac9dd8ca6L,
+ 0x3009096b42e1dbL, 0x2b2c83e384ab8bL },
+ { 0xe171ffcb4e9a6eL, 0x9de42187374b40L, 0x5701f9fdb1d616L,
+ 0x211e122a3e8cbcL, 0x04e8c1a1e400bfL, 0x02974700f37159L,
+ 0x41775d13df8c28L, 0xcfaad4a61ac2dbL }
+ },
+ {
+ { 0x6341b4d7dc0f49L, 0xaff6c2df471a53L, 0x20ec795fb8e91eL,
+ 0x4c7a4dfc3b7b62L, 0x9f33ff2d374938L, 0x38f8c653a60f2eL,
+ 0xc1168ac2efef73L, 0x046146fce408eeL },
+ { 0x9b39ac0308b0c3L, 0xe032d6136b8570L, 0xee07d8dfc4aacfL,
+ 0x0a82acbd5a41ddL, 0xbe0ded27c3d726L, 0xce51d60b926ce9L,
+ 0xfa2f7f45806c1eL, 0xe367c6d1dec59cL }
+ },
+ {
+ { 0x64511b6da2547bL, 0x76a349c0761405L, 0x37d662601223abL,
+ 0x0e243c1f4d7c48L, 0xdc9c8b4da756a0L, 0xc7430dfd72e7e9L,
+ 0x0eb130827b4210L, 0x7a9c044cf11cbdL },
+ { 0x2c08ff6e8dd150L, 0x18b738c2932fc6L, 0x07d565104513e8L,
+ 0x0ca5cffaa40a17L, 0xd48634101baa8fL, 0xfb20fafb72b79eL,
+ 0x1a051e5654020fL, 0xe3b33174e17f23L }
+ },
+ {
+ { 0x05910484de9428L, 0x620542a5abdf97L, 0xaa0ededa16a4d1L,
+ 0xa93f71c6d65bb9L, 0x88be135b8dfaf9L, 0x1d9f4e557ca8eeL,
+ 0x4c896aa26781adL, 0xd3fbe316c6c49fL },
+ { 0x088d8522c34c3dL, 0xbb6d645badff1eL, 0xe3080b8385450dL,
+ 0x5ccc54c50ab1f3L, 0x4e07e6eac0657dL, 0xa7ba596b7ef2c0L,
+ 0xcceca8a73a81e9L, 0xa0b804c8284c35L }
+ },
+ {
+ { 0x7c55956f17a6a2L, 0xb451d81789cfa8L, 0xdf414e82506eaaL,
+ 0x6ef40fbae96562L, 0x63ea2830e0297eL, 0xf5df26e73c46faL,
+ 0xe00641caac8bceL, 0xc89ed8f64371f3L },
+ { 0xd22b08e793202eL, 0x39a9033875cb50L, 0xe64eec0f85ddb4L,
+ 0xdce45a77acf7b5L, 0x39d1e71b9b802dL, 0xafdfe7cbd559acL,
+ 0x17ec1f8809eeb5L, 0x8c0e38a4889b8cL }
+ },
+ {
+ { 0x47eabfe17089daL, 0x2d18466ec90c50L, 0xa511aa45861531L,
+ 0xebb3d348c39b39L, 0xa0ac4daf1b5282L, 0xea26be7a9dadbaL,
+ 0x8992ba8554d86eL, 0x7fcbdb6d5f2ef5L },
+ { 0x320e79b56863e7L, 0xeb9d0c0a7dce2dL, 0xb9f4031784cbc6L,
+ 0x68823ee7ac1f81L, 0xa6b6f4f9d87497L, 0x83c67b657f9b6eL,
+ 0x37357470fef2a7L, 0xf38028f59596e2L }
+ },
+ {
+ { 0x9ea57ab7e82886L, 0x18221c548c44d5L, 0xbf8e6cf314a24fL,
+ 0x70ff18efd025e5L, 0x08d03de5334468L, 0x2b206d57404fb7L,
+ 0xb92327155e36b0L, 0xcc7604ab88ddd9L },
+ { 0x3df51524a746f0L, 0x8fdebd8168e3fcL, 0xffc550c7f8c32cL,
+ 0x1dbbc17148743eL, 0xd48af29b88e18bL, 0x8dca11c750027cL,
+ 0x717f9db1832be3L, 0x22923e02b06019L }
+ },
+ {
+ { 0xd4e06f5c1cc4d3L, 0x0fa32e32b4f03aL, 0x956b9afc4628d0L,
+ 0x95c39ce939dad1L, 0x39d41e08a00416L, 0xfd7ff266fb01aaL,
+ 0xc6033d545af340L, 0x2f655428e36584L },
+ { 0x14cfb1f8dff960L, 0x7236ffcda81474L, 0xc6a6788d452d0fL,
+ 0x2ad4a5277f6094L, 0x369d65a07eea74L, 0x27c6c38d6229aaL,
+ 0xe590e098863976L, 0x361ca6eb38b142L }
+ },
+},
+{
+ {
+ { 0x6803413dfeb7efL, 0xb669d71d3f4fadL, 0x5df402ac941606L,
+ 0xe5d17768e6c5b7L, 0x131bcb392ab236L, 0x7f1fb31ce2e0e0L,
+ 0xa2c020d9e98c35L, 0x33b23c0f28657bL },
+ { 0xed14e739cf7879L, 0x10d4867b4357b3L, 0x127cea331e4e04L,
+ 0xc60d25faa5f8a7L, 0xfef840a025b987L, 0x78081d666f2a0aL,
+ 0x0fa0b97ac36198L, 0xe0bb919134dc9fL }
+ },
+ {
+ { 0xc1d2461cc32eaeL, 0x0fdbfdf0f79a37L, 0x70f2bc21c95f02L,
+ 0x7d68bec372cddfL, 0x44f78178439342L, 0xa3d56784843a6cL,
+ 0xbadf77a07f8959L, 0xf45819873db4caL },
+ { 0xe8eaaf3d54f805L, 0x2f529d1b84c1e7L, 0x404e32e21e535cL,
+ 0xabac85c159b5f5L, 0x4e8e594b00466fL, 0x40fcaabc941873L,
+ 0x3b4e370be407c6L, 0xccd57885b2e58dL }
+ },
+ {
+ { 0x3ee615e88b74a8L, 0xd7d6608eab4e69L, 0x27cf9f1e4ace36L,
+ 0x282359e7aebabbL, 0x96e509bf6d162fL, 0xad906f3f1a290aL,
+ 0xe7d6c4f1314a58L, 0xeecffe4218431dL },
+ { 0xa66e0e9e2cfed9L, 0xb0887ec71f0544L, 0xd34e36ba04c5d7L,
+ 0x094daa5ed4392dL, 0xcda83adc8aa925L, 0x1adef91b979786L,
+ 0x3124dcbfddc5d6L, 0x5cc27ed0b70c14L }
+ },
+ {
+ { 0x386dbc00eac2d8L, 0xa716ecbc50ca30L, 0x9e3fc0580d9f04L,
+ 0x37dde44cfeacebL, 0xd88d74da3522d5L, 0x6bb9e9f2cf239aL,
+ 0x9e7fb49a7cbfecL, 0xe1a75f00a5c0efL },
+ { 0x6e434e7fb9229dL, 0x0ec6df5c8a79b3L, 0x7046380d3fb311L,
+ 0xe957ef052e20faL, 0x0f4fe9a9ef4614L, 0x1b37d9c54d8f2bL,
+ 0x23b2dc139d84a2L, 0xf62c4f6724e713L }
+ },
+ {
+ { 0xbd6922c747e219L, 0x34d14383869b7bL, 0x8c875a596f2272L,
+ 0xd9602c03fe361eL, 0x081348f744839fL, 0x61bd16c61ac1f1L,
+ 0x993b727d8da4e1L, 0xbb40ba87741271L },
+ { 0xe6dcc9881dcfffL, 0x9f513f593ce616L, 0xdc09683618cd8fL,
+ 0xc3b1d1026639beL, 0xe8f149fc762ee2L, 0x59f26efb244aaeL,
+ 0x3f2de27693dd96L, 0xd8b68f79c3a7deL }
+ },
+ {
+ { 0x6fa20b9970bd5bL, 0x87242d775f6179L, 0xa95a6c672d9308L,
+ 0x6eb251837a8a58L, 0xfdea12ac59562cL, 0x4419c1e20f1fc3L,
+ 0x0c1bd999d66788L, 0x4b7428832c0547L },
+ { 0x4f38accdf479abL, 0x01f6271c52a942L, 0xe3298f402ca9a7L,
+ 0x533dacab718fc8L, 0x133602ab093ca8L, 0xc04da808f98104L,
+ 0xd0f2e23af08620L, 0x882c817178b164L }
+ },
+ {
+ { 0x28e6678ec30a71L, 0xe646879f78aca1L, 0x868a64b88fa078L,
+ 0x671030afee3433L, 0xb2a06bb87c0211L, 0x202eca946c406aL,
+ 0x64d6284e4f0f59L, 0x56ae4a23c9f907L },
+ { 0x5abbb561dcc100L, 0x6fef6cf07c7784L, 0xb6e25cddb7302dL,
+ 0xa26785b42980e8L, 0xe7d4043fb96801L, 0x46df55d8e4282bL,
+ 0x9c0a5f5c602d6eL, 0xf06560475dfe29L }
+ },
+ {
+ { 0x0e82a1a3dcbc90L, 0xb1ee285656feacL, 0xfa4353b0d3d3b2L,
+ 0xc2e7a6edd5c5dfL, 0x13707e1416ce53L, 0xc84ce0787ebc07L,
+ 0xdd273ce8a9a834L, 0x432a6175e8e1e7L },
+ { 0xa359670bd0064aL, 0xc899dd56534516L, 0x666560edb27169L,
+ 0x1537b22a19a068L, 0x3420507eac7527L, 0x479f25e6fc13a7L,
+ 0xc847acc1bc19b3L, 0xecdecf00b20d45L }
+ },
+},
+{
+ {
+ { 0x6f241004acea57L, 0xdace1c6da68597L, 0xea7dd4150ce77fL,
+ 0x1aecb841585884L, 0x92ff208ea4a85cL, 0xde9433c88eebd2L,
+ 0x53cd3183f4d289L, 0x397085826539afL },
+ { 0x4b57599b827d87L, 0xdc82ac03d77638L, 0x694336652f6e61L,
+ 0xb8fc4b0ad5e8a6L, 0x1b6f7dcf388642L, 0x6f24533a74dd57L,
+ 0xc66937841750cfL, 0x06757eb28a37afL }
+ },
+ {
+ { 0x0e70d53c133995L, 0x88a5e0c7c8c97dL, 0x4e59dbf85f3be3L,
+ 0x0f364ac0e92698L, 0x3a1e79bef6940fL, 0xc8a3941d85d23aL,
+ 0x143bb999a00e58L, 0x61cf7d6c6f2f10L },
+ { 0x979c99485150feL, 0xcfd0df259d773fL, 0xce97b9daab7bcdL,
+ 0xc9fff8e6afd8fcL, 0x246befd89a4628L, 0xf6302821567090L,
+ 0x15393426749c58L, 0xff47d0ea0f3fd3L }
+ },
+ {
+ { 0x09b0bfd35f6706L, 0x74645812c82e69L, 0xb60729f50d5fe9L,
+ 0xf13324595c74f1L, 0x33647e3bb76c89L, 0x01264045a9afccL,
+ 0x46d57ee0f154abL, 0x2efa55525680a4L },
+ { 0x12ebfc65329d90L, 0xcb37ae579800afL, 0x5bb53496f8e310L,
+ 0x9b59c63f1bb936L, 0x5b49baaf4610e9L, 0x2bbeeef4f2d6acL,
+ 0x87ee21e0badc67L, 0x12e2aadf1ddfa0L }
+ },
+ {
+ { 0x5b4668fa9109eeL, 0xfa951338a6cea2L, 0xe45e6fc4068e16L,
+ 0x8ae9a0c0205ed8L, 0x2993b96679b79bL, 0xc6b878fed604d3L,
+ 0x01d020832c77f3L, 0xd45d890495a1abL },
+ { 0x99348fa29d2030L, 0x961f9a661f8f7aL, 0xfd53212674f74bL,
+ 0x45cee23b3e72bcL, 0x3fccb86b77e2d5L, 0xdff03104219cb7L,
+ 0x233771dc056871L, 0x1214e327d2c521L }
+ },
+ {
+ { 0x9f51e15ff2a8e1L, 0x86571c5138bc70L, 0xbfc4caf0c09d46L,
+ 0x65e33fec2a0c18L, 0x8214392426867dL, 0x51ce6c080ae4edL,
+ 0x6cbe8d7b110de6L, 0x7f6e947fd22ea4L },
+ { 0x7373a75cadefc4L, 0x6fca1d2b0c682fL, 0xcd2140df3c7c1eL,
+ 0x8653a37558b7a5L, 0x653e74e55eb321L, 0xbe0c6b3c31af73L,
+ 0x3376379f4fc365L, 0x3570b3771add4dL }
+ },
+ {
+ { 0x9061ec183c3494L, 0xaf2f28d677bc95L, 0x6fe72793bf8768L,
+ 0xc5f50e30fa86d8L, 0x6c03060a3293ceL, 0x4d53357e2355a6L,
+ 0x43a59eae4df931L, 0x6f48f5d13b79c6L },
+ { 0xa4d073dddc5192L, 0x6d0e318a65773fL, 0x1008792765de9eL,
+ 0xa724ed239a0375L, 0x510ff1497d7c9eL, 0x251f6225baa863L,
+ 0x86464fe648a351L, 0xf85e98fd50fd91L }
+ },
+ {
+ { 0x29c963486ee987L, 0x93e8e5210dcc9fL, 0xa1fc4d1c910b1fL,
+ 0x015acacfeb603eL, 0xc9f25f80844a5fL, 0x50de93c73f4dacL,
+ 0x1758783310a4aaL, 0x544d570358f106L },
+ { 0x4eeec7b1dc68caL, 0x6238e6fe00fbcbL, 0x34d394cb4e83c9L,
+ 0x764ffa22292656L, 0x5614cd1f641f2eL, 0x4252eb69e07234L,
+ 0xcbaef4568d2ba4L, 0x8c9c5508a98b17L }
+ },
+ {
+ { 0xf235d9d4106140L, 0x1bf2fc39eb601eL, 0x6fb6ca9375e0c3L,
+ 0x4bf5492c0024d2L, 0x3d97093eb54cc6L, 0xc60931f5c90cb5L,
+ 0xfa88808fbe0f1aL, 0xc22b83dd33e7d4L },
+ { 0x9cfec53c0abbf5L, 0x52c3f0a93723dfL, 0x0622b7e39b96b6L,
+ 0x300de281667270L, 0x50b66c79ef426aL, 0x8849189c6eb295L,
+ 0xeaec3a98914a7eL, 0x7ed56b0c4c99e0L }
+ },
+},
+{
+ {
+ { 0x7926403687e557L, 0xa3498165310017L, 0x1b06e91d43a8fdL,
+ 0xf201db46ac23cbL, 0x6f172ad4f48750L, 0x5ed8c8ce74bd3eL,
+ 0x492a654daba648L, 0x123010ba9b64ffL },
+ { 0xa83125b6e89f93L, 0x3a3b0b0398378aL, 0x9622e0b0aebe7cL,
+ 0xb9cbfdc49512a4L, 0x13edffd6aaf12aL, 0x555dff59f5eafdL,
+ 0x3cba6fe1212efaL, 0xd07b744d9bb0f8L }
+ },
+ {
+ { 0x45732b09a48920L, 0xf3080fc13ff36dL, 0x9347395de8f950L,
+ 0x14d025a382b897L, 0x60c5a7404d72adL, 0x30be7e511a9c71L,
+ 0x43ffabd31ac33aL, 0x97b06f335cbb14L },
+ { 0xe4ff5c57740de9L, 0x5fed090aacf81eL, 0x97196eee8b7c9dL,
+ 0x316dcd1045910bL, 0x7a2b2f55ad8c63L, 0x674fffdc5b03bbL,
+ 0xc1cd133e65953cL, 0x3c060520a83556L }
+ },
+ {
+ { 0x797c3f6091c23dL, 0x2ea2de339c9c05L, 0x5d958b4a31f67cL,
+ 0xf97afe5d5f088cL, 0xbcfbd2a0b37243L, 0xc43ad3eeca630cL,
+ 0xb92a33742845e0L, 0x970bff7a9a0f16L },
+ { 0x86355115970a79L, 0xcee332ef205928L, 0x2c58d70c04c208L,
+ 0xdbfe19a3f5e5bfL, 0x8f8f2c88e51c56L, 0xb61f58e8e2da75L,
+ 0x4046a19624d93fL, 0x7de64dbe1f9538L }
+ },
+ {
+ { 0xd018e1cc2d850eL, 0x8cdb64363a723cL, 0x9a65abe90a42afL,
+ 0xfeece9616f20ccL, 0xc906800d5cff56L, 0x0acf23a3f0deedL,
+ 0x2143061728dd3aL, 0x66276e2b8ce34cL },
+ { 0x23700dc73cc9c7L, 0xdb448515b1778bL, 0x330f41e4aab669L,
+ 0x2f5aabcf5282a4L, 0xff837a930f9e01L, 0x1a1eb2f901cc98L,
+ 0xd3f4ed9e69bd7fL, 0xa6b11418a72a7dL }
+ },
+ {
+ { 0x34bde809ea3b43L, 0x5ddcb705ced6aeL, 0x8257f5b95a6cb8L,
+ 0xaac205dc77dcb8L, 0x77d740d035b397L, 0xca7847fcf7e0a6L,
+ 0x9404dd6085601bL, 0x0a5046c457e4f9L },
+ { 0xcaee868bc11470L, 0xb118796005c5f6L, 0xcc04976ec79173L,
+ 0x7f51ba721f6827L, 0xa8e3f0c486ff7eL, 0x327163af87838cL,
+ 0xcf2883e6d039fdL, 0x6fb7ab6db8b0e2L }
+ },
+ {
+ { 0x8ca5bac620d669L, 0xff707c8ed7caa9L, 0xdaefa2b927909bL,
+ 0x1d2f9557029da3L, 0x52a3ba46d131a0L, 0xe5a94fd3ab1041L,
+ 0x508917799bc0aeL, 0xf750354fa1bd16L },
+ { 0xdd4e83a6cd31fdL, 0xd33505392fac84L, 0xf914cbc1691382L,
+ 0x669683fda6ade6L, 0x69446438878513L, 0x429d3cc4b1a72dL,
+ 0x655c46a61eec36L, 0x881eded4bc4970L }
+ },
+ {
+ { 0x5b39d377ca647fL, 0x41533c1e917b34L, 0xea2aeb57daf734L,
+ 0xf1ef1eb1286560L, 0x582f2e008e0473L, 0x5913d7d5edc74aL,
+ 0x588c7ec3c1e754L, 0xbd6db057146fe1L },
+ { 0x3b0d49e7634907L, 0x4c65ce4e43b9ccL, 0xb87e9582d92d5bL,
+ 0x05135727ab1519L, 0x03ec0848c3aed0L, 0x4d7aa21561a641L,
+ 0xe5f821199e92adL, 0x379b55f48a457cL }
+ },
+ {
+ { 0x8317c34d6a8442L, 0xb0ab4a5ae499daL, 0xebcb16e720e8ebL,
+ 0xfd5c5639a96908L, 0xcab4d67ad23acfL, 0xa600a79bcdf748L,
+ 0x18a6340a2a6a51L, 0xf2f415c3aabd69L },
+ { 0xdb38a4f747258aL, 0xb6ea5602e24415L, 0xfad1ea9f1f7655L,
+ 0x4e27eb5c957684L, 0xf8283e1b2e1cfcL, 0x8f83bd6aa6291cL,
+ 0x28d23b55619e84L, 0xb9f34e893770a4L }
+ },
+},
+{
+ {
+ { 0x1bb84377515fb1L, 0xac73f2a7b860a6L, 0x78afdfa22b390fL,
+ 0x815502b66048aaL, 0xf513b9785bf620L, 0x2524e653fc5d7cL,
+ 0xa10adc0178c969L, 0xa1d53965391c8dL },
+ { 0x09fccc5a8bcc45L, 0xa1f97d67710e1eL, 0xd694442897d0a1L,
+ 0x7030beb5f42400L, 0xdebe08c7127908L, 0x96b715c2187637L,
+ 0xc598250b528129L, 0x0f62f45a1ccb07L }
+ },
+ {
+ { 0x8404941b765479L, 0xfdecff45837dc4L, 0x1796372adbd465L,
+ 0x5f84c793159806L, 0x6d2e46b6aaad34L, 0xd303b4a384b375L,
+ 0x440acd5b392002L, 0x4f2a4a7c475e87L },
+ { 0x038e1da5606fc2L, 0x2d821c29c2f050L, 0xc074cb3f139db4L,
+ 0xde2fee74ec59beL, 0x5a819eea84ed59L, 0xd65c62c3e98711L,
+ 0x72eb440b9723c1L, 0xb92775401be611L }
+ },
+ {
+ { 0x929fe64ab9e9fcL, 0x04379fd0bf1e85L, 0xb322093bc28ee3L,
+ 0x78ac4e2e4555e1L, 0xdb42b58abc5588L, 0x1c1b5e177c8b12L,
+ 0xf6d78dd40366c4L, 0xc21ff75bdae22eL },
+ { 0x1e3d28ea211df2L, 0xc5a65a13617c0aL, 0x3fa02c058140d5L,
+ 0x155c346b62d10cL, 0xc9cf142e48268fL, 0xdc140831993bc3L,
+ 0x07c44d40ee69dcL, 0x61699505e2ac46L }
+ },
+ {
+ { 0x44e4a51d0fb585L, 0x00846bef1f3ce8L, 0xedef39a8e2de1eL,
+ 0x430afe333b3934L, 0xac78b054337188L, 0x0f39de4c9a3f24L,
+ 0x039edddc9ae6a4L, 0xf4701578eacd51L },
+ { 0x1e396949a2f31aL, 0xc8a40f4b19a8b1L, 0xdddd10c9d239d8L,
+ 0xf974245887e066L, 0xfdb51113ea28c6L, 0xb5af0fbe1122a9L,
+ 0xd30c89f36e0267L, 0x7b1c0f774f024cL }
+ },
+ {
+ { 0x1ec995607a39bfL, 0x1c3ecf23a68d15L, 0xd8a5c4e4f59fe9L,
+ 0xacb2032271abc3L, 0xbc6bdf071ef239L, 0x660d7abb39b391L,
+ 0x2e73bb2b627a0eL, 0x3464d7e248fc7eL },
+ { 0xaa492491666760L, 0xa257b6a8582659L, 0xf572cef5593089L,
+ 0x2f51bde73ca6bfL, 0x234b63f764cff5L, 0x29f48ead411a35L,
+ 0xd837840afe1db1L, 0x58ec0b1d9f4c4bL }
+ },
+ {
+ { 0x8e1deba5e6f3dcL, 0xc636cf406a5ff7L, 0xe172b06c80ca0fL,
+ 0x56dc0985ffb90aL, 0x895c2189a05e83L, 0x6ddfaec7561ac2L,
+ 0xaa3574996283a0L, 0x6dfb2627e7cd43L },
+ { 0x6576de52c8ca27L, 0x6a4a87249018ebL, 0x00c275c5c34342L,
+ 0xe34805ad2d90c4L, 0x651b161d8743c4L, 0xb3b9d9b7312bf3L,
+ 0x5d4b8e20bf7e00L, 0x8899bdf78d3d7eL }
+ },
+ {
+ { 0x9644ad8faa9cd1L, 0x34c98bf6e0e58eL, 0x6022aad404c637L,
+ 0x2a11a737ac013bL, 0x5bdd1035540899L, 0x2e675721e022a4L,
+ 0xe32045db834c33L, 0x74a260c2f2d01cL },
+ { 0x20d59e9c48841cL, 0x05045dde560359L, 0xeba779cac998acL,
+ 0x5bed10c00a6218L, 0x25d4f8e5327ef4L, 0xa2784744597794L,
+ 0xefd68ca831d11eL, 0x9ad370d934446aL }
+ },
+ {
+ { 0x3089b3e73c92acL, 0x0ff3f27957a75cL, 0x843d3d9d676f50L,
+ 0xe547a19d496d43L, 0x68911c98e924a4L, 0xfab38f885b5522L,
+ 0x104881183e0ac5L, 0xcaccea9dc788c4L },
+ { 0xfbe2e95e3c6aadL, 0xa7b3992b3a6cf1L, 0x5302ec587d78b1L,
+ 0xf589a0e1826100L, 0x2acdb978610632L, 0x1e4ea8f9232b26L,
+ 0xb21194e9c09a15L, 0xab13645849b909L }
+ },
+},
+{
+ {
+ { 0x92e5d6df3a71c1L, 0x349ed29297d661L, 0xe58bd521713fc9L,
+ 0xad999a7b9ddfb5L, 0x271c30f3c28ce0L, 0xf6cd7dc2a9d460L,
+ 0xaf728e9207dec7L, 0x9c2a532fcb8bf0L },
+ { 0xd70218468bf486L, 0x73b45be7ab8ea8L, 0xddfc6581795c93L,
+ 0x79416606bb8da2L, 0x658f19788e07a2L, 0xa9d5b0826d3d12L,
+ 0x4d7c95f9535b52L, 0xad55e25268ef8aL }
+ },
+ {
+ { 0x94a9b0ba2bc326L, 0x485ecc5167e5f3L, 0x8340bc7c97fc74L,
+ 0x06f882b07aaa5cL, 0x4b57455849698aL, 0xd9281ebb36a0baL,
+ 0x8918c6c8b8108fL, 0xedd1eea5b50d1dL },
+ { 0x94d737d2a25f50L, 0x0e5a8232446ad0L, 0x02a54357ced3e2L,
+ 0xb09a92a4af8cedL, 0x85fc498eeecef2L, 0x06a02b9e71e3d4L,
+ 0x00ad30784bb49aL, 0xf61585e64a5b4aL }
+ },
+ {
+ { 0x915f6d8b86a4c9L, 0x944bc6ba861e1fL, 0x3091ca754465efL,
+ 0x11df859eb53a38L, 0xd44dde50144679L, 0x6c8da9a0994eddL,
+ 0xeebcebf91241efL, 0xc419354c2f6859L },
+ { 0x1f4969349581b6L, 0x5712b10bb26cb4L, 0x8fcaa41b09fd59L,
+ 0xbd39aad72e22e3L, 0xf70e794b1199b0L, 0xdf63c0cc6f863dL,
+ 0xd58166fee9df4fL, 0xb9224eac45e70bL }
+ },
+ {
+ { 0x80072face525f4L, 0x8597bd666a5502L, 0xf65e203dbc9725L,
+ 0xeccfbe3f2222a4L, 0x490aa422339834L, 0x134889162489e8L,
+ 0xaff3f80a735084L, 0x69d53d2f3f1bd6L },
+ { 0xb123ffc813341aL, 0x359084c1173848L, 0x751425ed29b08dL,
+ 0x1edda523890ad4L, 0xb64974c607cf20L, 0xa8c8cb8b42ac7cL,
+ 0xd5cb305edd42e5L, 0xf3034dc44c090aL }
+ },
+ {
+ { 0x428921dbb18e19L, 0x4cfd680fed2127L, 0x671144d92ac8c3L,
+ 0x2121901132c894L, 0x25d0e567604cd9L, 0xa372223afbc2a0L,
+ 0xcf98a5256c16f7L, 0x71f129ab5459e1L },
+ { 0xf4afdc5b668b2eL, 0xc5d937a0c2d410L, 0xe2cc4af285d54aL,
+ 0x1c827778c53e18L, 0x270f2c369a92f6L, 0x799f9ac616327aL,
+ 0xce658d9d4246f2L, 0x0fb681ffb12e36L }
+ },
+ {
+ { 0xc5ab11ee0690feL, 0x80261e33f74249L, 0x8eb4b4758c1cf2L,
+ 0x4895a80184ae9bL, 0x4a4bdb6d3e27ebL, 0xa7a1638bfd251cL,
+ 0x29ec144417a7e3L, 0xd0736093f1b960L },
+ { 0xcb1ed8349c73d1L, 0x33fc84a8d1945aL, 0x9f668dbe965118L,
+ 0x3331743a82811fL, 0xf394dec28ba540L, 0x44ce601654a454L,
+ 0x240dbb63623645L, 0xf07e7f22e61048L }
+ },
+ {
+ { 0x7c9f1763d45213L, 0x3eefa709c1f77fL, 0xde3c3c51b48350L,
+ 0x4a2bc649d481a7L, 0xfd4a58a7874f3dL, 0x96655d4037b302L,
+ 0x945252868bf5abL, 0x1b6d46a75177f6L },
+ { 0x7de6763efb8d00L, 0xb2c1ba7a741b7bL, 0xcca6af47bae6edL,
+ 0xe4378ca5b68b3fL, 0xfb757deaf71948L, 0x7f07b5ebc6ac99L,
+ 0x752a56827d636dL, 0xc8b7d1d4b8a34fL }
+ },
+ {
+ { 0x76cb78e325331bL, 0x41f41c9add2eedL, 0x03db2385c5f623L,
+ 0xbbc1d177102fa2L, 0x80f137a60182ecL, 0xfdd856955adf15L,
+ 0x4f53f5ee3373dcL, 0xec6faf021b669bL },
+ { 0x7d4e9830b86081L, 0x10d3cd9f2d979cL, 0x0f48f5824a22c8L,
+ 0x86c540c02f99eeL, 0xf4c66545e6c5fcL, 0xaf0c588bc404c8L,
+ 0x2e6edbd423118aL, 0x86e32e90690eabL }
+ },
+},
+{
+ {
+ { 0x1d12656dfbfa6fL, 0xa4980957646018L, 0x2f1071bc3597d0L,
+ 0x3df83f91dda80aL, 0x5853e28f3ae449L, 0xb853d319e19aadL,
+ 0x863f01ba0d8a46L, 0xa84fca62fef108L },
+ { 0xbe4c0b7fb84de9L, 0x40a03dcc0727bfL, 0x781f841b18575cL,
+ 0x6a63045466cddbL, 0x6be758205dc7a2L, 0x420f87f07ae811L,
+ 0x28082423bf96c8L, 0x723998c51c6821L }
+ },
+ {
+ { 0x38ab64181f5863L, 0xd82ecbd05ff9e1L, 0x339c94ea065856L,
+ 0x143054aa45156dL, 0xe6d64bf065628cL, 0xe530086a938589L,
+ 0x22d3a49385d79bL, 0x0b107900ab8245L },
+ { 0xb0d80fbca387b5L, 0x698206e35551d7L, 0x199685da10bb73L,
+ 0xa8e5fa89107378L, 0x36e5724d99dbbfL, 0xd67f476d581b03L,
+ 0x7a15be788dd1e6L, 0x8dac8e4e5baa31L }
+ },
+ {
+ { 0x4d5d88fe170ef8L, 0xb6ba5de1e9e600L, 0x4a89d41edeabc5L,
+ 0x737c66b8fac936L, 0x8d05b2365c3125L, 0x85a5cbcb61b68eL,
+ 0x8fea62620a6af9L, 0x85115ded8b50ecL },
+ { 0x5430c8d6a6f30bL, 0x8bef9cf8474295L, 0x0648f5bbe77f38L,
+ 0xfe2b72f9e47bd7L, 0xad6c5da93106e2L, 0x4fa6f3dfa7a6c3L,
+ 0xdcd2ed8b396650L, 0x7de1cce1157ef9L }
+ },
+ {
+ { 0x70a5f6c1f241d1L, 0x6c354d8798cd5cL, 0x23c78381a729fbL,
+ 0xcff8f15523cbdaL, 0x5683ff43493697L, 0xef7dbab7534f53L,
+ 0xd7bd08e2243d53L, 0x6f644cbf8072a9L },
+ { 0xac960f9b22db63L, 0xa97f41723af04dL, 0x692b652d9798afL,
+ 0x0e35967fedb156L, 0x14b5e50dfe6ee8L, 0x7597edeb411070L,
+ 0x116f3ce442b3f9L, 0xe9b5ae81b2b6dbL }
+ },
+ {
+ { 0xf4385ee2315930L, 0xc8d029827a8740L, 0x7907a8dd934a43L,
+ 0x20bc946c582191L, 0xa4acb3e6a405e7L, 0x8c1d6c843df2f5L,
+ 0x9df1593991f0b5L, 0xbb9df984d9be9dL },
+ { 0x63620088e4b190L, 0xee1421eada3a88L, 0xb84f0ccf93b027L,
+ 0x7a5d6678e95091L, 0x3974462f3e3704L, 0xfa6fb5ec593e98L,
+ 0x44b6cf7a6477d2L, 0xe885b57b09a562L }
+ },
+ {
+ { 0x6e339e909a0c02L, 0x57afff00e75f29L, 0x797d8d6fb7db03L,
+ 0xc6e11a3d25a236L, 0x643ce1c0107260L, 0xe644ec462eae1cL,
+ 0x821d5b83f5a3f5L, 0xa8ad453c0579d6L },
+ { 0x6518ed417d43a4L, 0x46e76a53f87ccdL, 0xd6cbaabf9bef95L,
+ 0x25688324f7cbcfL, 0x367159a08476b4L, 0x1d1b401be6d324L,
+ 0x348cb98a605026L, 0x144f3fe43b6b1eL }
+ },
+ {
+ { 0xbabbd787b1822cL, 0xd34ba7e2aa51f8L, 0x086f1cc41fbea4L,
+ 0x96f7eac746f3d9L, 0xad97f26281ecafL, 0x751a905a14ee2cL,
+ 0xb4e7fe90d7335fL, 0x0d97b8f4892ff0L },
+ { 0xdb8a3155a5c40eL, 0x64e5de77ba567bL, 0x4f155f71eefe88L,
+ 0xe2297e9fb6fbf4L, 0xfe24bf96c16be5L, 0x2251847cdd83e2L,
+ 0x13ac2c85eda444L, 0x49d1b85283275fL }
+ },
+ {
+ { 0xca08731423e08fL, 0x7046bb087d2f14L, 0x876f10c3bc846cL,
+ 0x2202b76358fbe3L, 0x0d4fc1c0e26ac6L, 0x1fc748bb986881L,
+ 0x609e61c8384a18L, 0x28a72d60d88e00L },
+ { 0x1332a3178c6e2fL, 0x0367919b3526a4L, 0x53989e4698fe3eL,
+ 0x14b1145b16a99bL, 0xef9ec80ddbb75fL, 0x76256240e53955L,
+ 0x54e087a8744ae1L, 0xce50e8a672b875L }
+ },
+},
+{
+ {
+ { 0x4c88b2ba29629cL, 0x946559c7b2642fL, 0x933d432f7ebe4cL,
+ 0x97109b663632c9L, 0x799b3fbe53184dL, 0xd4628710f069a6L,
+ 0x0c182a13a68351L, 0x974a8399a2437aL },
+ { 0x29f19972a70278L, 0x01b98b6d9c424bL, 0xd85a60b08f4c37L,
+ 0xcc3523f2b1da15L, 0xf922115ddffb0fL, 0xee0fe4dde84ae2L,
+ 0x810440c55365beL, 0xd2f66391a457e8L }
+ },
+ {
+ { 0x5e6879fe2ddd05L, 0x92a7545abdfc61L, 0x7dedd63a5cede8L,
+ 0x8a03b3f70df4bdL, 0xa5d1f6591f6cbbL, 0x372fde610f3fb2L,
+ 0x4537f9ea9dee05L, 0x7eb85bbdf7aa50L },
+ { 0x963edf8e8c504dL, 0x53c8dcae7bdb6bL, 0xa246e4c6fedf2dL,
+ 0x75533400c55bdeL, 0x2aa748d0270a54L, 0xadb6cf005860ddL,
+ 0x8d314509b84763L, 0x626720deb405efL }
+ },
+ {
+ { 0xa3709ae6601328L, 0x68e94fd2ac2478L, 0x38793439d5d247L,
+ 0xfa467af392c198L, 0x49e7b0d15df607L, 0x8c5812261792a8L,
+ 0x79f76581d3762fL, 0xaa38895244a39dL },
+ { 0xef60af9c5cd0bcL, 0x2b0db53a33b3bbL, 0xe3e0b1f251015dL,
+ 0xc608afce64489eL, 0xe52b05703651aaL, 0x1dda8b91c6f7b9L,
+ 0x833f022ff41893L, 0x58eb0a0192818cL }
+ },
+ {
+ { 0x6c1300cfc7b5a7L, 0x6d2ffe1a83ab33L, 0x7b3cd019c02eefL,
+ 0x6c64559ba60d55L, 0x2e9c16c19e2f73L, 0x11b24aedbe47b1L,
+ 0xc10a2ee1b8153bL, 0x35c0e081e02e1aL },
+ { 0xa9f470c1dd6f16L, 0x4ea93b6f41a290L, 0xac240f825ee03fL,
+ 0x6cd88adb85aabdL, 0x378a64a1be2f8fL, 0xbf254da417bac1L,
+ 0x7e4e5a59231142L, 0x057aadc3b8c057L }
+ },
+ {
+ { 0x607c77a80af479L, 0xd3e01ff5ccdf74L, 0x9680aaf101b4c7L,
+ 0xd2a7be12fc50a6L, 0x92a788db72d782L, 0x35daf2e4640b52L,
+ 0xc170d6939e601cL, 0x16e05f57b25c2fL },
+ { 0x47a42a66fe37f8L, 0xeb74271beca298L, 0x401e11e179da16L,
+ 0xfb8da82aa53873L, 0xd657d635bb4783L, 0x6847758fcea0b1L,
+ 0x2f261fb0993154L, 0x868abe3592853aL }
+ },
+ {
+ { 0x1a4c54335766abL, 0xa1c84d66f4e4eaL, 0x5d737a660ba199L,
+ 0x4a7b1e298b15a2L, 0x207877ffd967d3L, 0xcaec82dc262b4dL,
+ 0x0b278494f2a37dL, 0x34781416ac1711L },
+ { 0x28e3df18fc6856L, 0xbec03f816d003fL, 0x2bd705bff39ebdL,
+ 0x1dcb53b2d776d3L, 0xabafa7d5c0e7ceL, 0x5b9c8c24a53332L,
+ 0xe9f90d99d90214L, 0x789747ec129690L }
+ },
+ {
+ { 0x94d3c3954e2dfaL, 0x919f406afb2a8fL, 0x159ef0534e3927L,
+ 0xcdb4d14a165c37L, 0xa23e5e8288f337L, 0x95867c00f90242L,
+ 0x2528150e34e781L, 0x104e5016657b95L },
+ { 0x695a6c9bcdda24L, 0x609b99523eb5faL, 0xcbce4f516a60f8L,
+ 0xec63f7df084a29L, 0x3075ada20c811fL, 0x129a1928c716a1L,
+ 0xd65f4d4cd4cd4aL, 0xe18fa9c62188beL }
+ },
+ {
+ { 0x1672757bac60e3L, 0x525b3b9577144bL, 0x38fc997887055bL,
+ 0x7a7712631e4408L, 0x884f173cba2fcfL, 0x783cbdc5962ac0L,
+ 0x4f3ed0a22287dcL, 0x8a73e3450e20e6L },
+ { 0xe7a1cd0d764583L, 0x8997d8d0d58ee6L, 0x0ea08e9aa13ed6L,
+ 0xed478d0cf363cbL, 0x068523d5b37bf4L, 0x8b5a9e8783f13cL,
+ 0xde47bbd87528a9L, 0xd6499cccaec313L }
+ },
+},
+{
+ {
+ { 0x54781bbe09859dL, 0x89b6e067f5e648L, 0xb006dfe7075824L,
+ 0x17316600717f68L, 0x9c865540b4efe2L, 0xdbdb2575e30d8eL,
+ 0xa6a5db13b4d50fL, 0x3b5662cfa47bebL },
+ { 0x9d4091f89d4a59L, 0x790517b550a7dcL, 0x19eae96c52965eL,
+ 0x1a7b3c5b5ed7a4L, 0x19e9ac6eb16541L, 0x5f6262fef66852L,
+ 0x1b83091c4cda27L, 0xa4adf6f3bf742bL }
+ },
+ {
+ { 0x8cc2365a5100e7L, 0x3026f508592422L, 0xa4de79a3d714d0L,
+ 0xefa0d3f90fcb30L, 0x126d559474ada0L, 0xd68fa77c94350aL,
+ 0xfa80e570c7cb45L, 0xe042bb83985fbfL },
+ { 0x51c80f1fe13dbaL, 0xeace234cf055d7L, 0x6b8197b73f95f7L,
+ 0x9ca5a89dcdbe89L, 0x2124d5fdfd9896L, 0x7c695569e7ca37L,
+ 0x58e806a8babb37L, 0x91b4cc7baf99ceL }
+ },
+ {
+ { 0x874e253197e968L, 0x36277f53160668L, 0x0b65dda8b95dbeL,
+ 0x477a792f0872a1L, 0x03a7e3a314268dL, 0xa96c8420c805c7L,
+ 0xb941968b7bc4a8L, 0x79dce3075db390L },
+ { 0x577d4ef6f4cc14L, 0x5b0d205b5d1107L, 0x64ff20f9f93624L,
+ 0x0b15e315034a2fL, 0x3a0f6bb8b6f35cL, 0x0399a84e0d0ec5L,
+ 0xd0e58230d5d521L, 0xdeb3da1cb1dd54L }
+ },
+ {
+ { 0x24684ae182401aL, 0x0b79c1c21a706fL, 0xe1d81f8d8998afL,
+ 0xadf870f4bb069fL, 0xd57f85cf3dd7aaL, 0x62d8e06e4a40f8L,
+ 0x0c5228c8b55aa1L, 0xc34244aa9c0a1aL },
+ { 0xb5c6cf968f544eL, 0xa560533de23ab7L, 0xaa5512047c690cL,
+ 0x20eda5b12aaaa6L, 0xea0a49a751a6a0L, 0x6d6cfff2baa272L,
+ 0x95b756ebf4c28aL, 0xd747074e6178a4L }
+ },
+ {
+ { 0xa27b453221a94bL, 0xd56ad13e635f20L, 0x03574b08c95117L,
+ 0xf0ee953ed30b70L, 0xb48d733957796fL, 0xf5d958358c336bL,
+ 0x6170cd882db529L, 0xcd3ef00ec9d1eaL },
+ { 0xd1bea0de4d105fL, 0xd2d670fad6a559L, 0x652d01252f9690L,
+ 0x5f51fb2c2529b0L, 0x5e88bf0e89df2aL, 0x9a90684cd686e4L,
+ 0xf519ccd882c7a1L, 0x933a0dfc2f4d37L }
+ },
+ {
+ { 0x0720a9f3f66938L, 0x99356b6d8149dfL, 0xb89c419a3d7f61L,
+ 0xe6581344ba6e31L, 0xd130561ab936c8L, 0x0625f6c40dbef1L,
+ 0x7b2d6a2b6bb847L, 0x3ca8b2984d506bL },
+ { 0x6bf729afb011b0L, 0x01c307833448c9L, 0x6ae95080837420L,
+ 0xf781a8da207fb8L, 0xcc54d5857562a9L, 0xc9b7364858c5abL,
+ 0xdfb5035359908fL, 0x8bf77fd9631138L }
+ },
+ {
+ { 0xf523365c13fbb1L, 0x88532ea9993ed5L, 0x5318b025a73492L,
+ 0x94bff5ce5a8f3cL, 0x73f9e61306c2a0L, 0x00abbacf2668a3L,
+ 0x23ce332076237dL, 0xc867f1734c0f9bL },
+ { 0x1e50995cfd2136L, 0x0026a6eb2b70f8L, 0x66cb1845077a7dL,
+ 0xc31b2b8a3b498eL, 0xc12035b260ec86L, 0x1cbee81e1b3df0L,
+ 0xfd7b8048d55a42L, 0x912a41cf47a8c8L }
+ },
+ {
+ { 0xab9ffe79e157e3L, 0x9cfe46d44dc158L, 0x435551c8a4a3efL,
+ 0x638acc03b7e3a8L, 0x08a4ebd49954a7L, 0x295390c13194f7L,
+ 0x3a2b68b253892aL, 0xc1662c225d5b11L },
+ { 0xcfba0723a5d2bbL, 0xffaf6d3cc327c9L, 0x6c6314bc67e254L,
+ 0x66616312f32208L, 0xf780f97bea72e1L, 0x495af40002122fL,
+ 0x3562f247578a99L, 0x5f479a377ce51eL }
+ },
+},
+{
+ {
+ { 0x91a58841a82a12L, 0xa75417580f3a62L, 0x399009ff73417aL,
+ 0x2db1fb90a8c5cdL, 0x82c8912c046d51L, 0x0a3f5778f18274L,
+ 0x2ad0ede26ccae2L, 0x7d6bd8b8a4e9c2L },
+ { 0xaa0d7974b3de44L, 0xf8658b996ac9bbL, 0x31e7be25f6c334L,
+ 0x23836ce4df12c9L, 0x029027b59eb5c9L, 0x2f225315b8649dL,
+ 0xa0fdf03d907162L, 0x101d9df9e80226L }
+ },
+ {
+ { 0xf12037a9a90835L, 0xd2d0882f0222a7L, 0xeaf8d40c3814e2L,
+ 0xa986dc68b8146bL, 0x147a3318504653L, 0x734e0032feaf67L,
+ 0x6f27bbf602bec5L, 0xa1e21f16a688f3L },
+ { 0x5a8eeab73c4ae5L, 0x4dbaddbe70b412L, 0x871cebacfd2af1L,
+ 0x18603827d7a286L, 0x024059db5bb401L, 0x2557c093c39b73L,
+ 0xfc5a7116681697L, 0xf881c0f891b57cL }
+ },
+ {
+ { 0x3c443f18ea191aL, 0x76faa58d700ad0L, 0x6fe6cfabe7fcbfL,
+ 0xaefc5288990ef7L, 0x44e30fa80004ccL, 0xc744adc6d8ef85L,
+ 0xafcd931912df70L, 0xf62a9d1572a6d8L },
+ { 0x47158a03219f27L, 0x76fb27ead73136L, 0x41bb2adcc2d614L,
+ 0x8858cb9de1ec21L, 0xab402c45f15866L, 0x6675d5bbc82bbfL,
+ 0x4ee9dd6f1b28d3L, 0x875884fe373c17L }
+ },
+ {
+ { 0x17806dd2a67d36L, 0xaa23a8632c9ec1L, 0xd914126fc1ee55L,
+ 0xbf8f7bd653701bL, 0x9b0111aea71367L, 0x61fd4aba98e417L,
+ 0xeb45298561c5a5L, 0x2187b0ae7af394L },
+ { 0x71f12db1616ddeL, 0x061760907da7b4L, 0x414d37602ddb04L,
+ 0x1100be7286fb58L, 0xd7cf88d6f0d95bL, 0x8539d23746d703L,
+ 0xdccc9d64e23d73L, 0xaeef1d2ec89680L }
+ },
+ {
+ { 0x82ccf1a336508dL, 0xa128c1f5bad150L, 0x551d8c029a188dL,
+ 0xef13dd4771404fL, 0xdd67696c37b993L, 0x428c0e20dddad2L,
+ 0x222278d038c94cL, 0x1a24a51078e3f2L },
+ { 0xd297fe6edb0db9L, 0x00988d28251a87L, 0xbb946f8bfaa0d7L,
+ 0x380f7b9df45ea0L, 0x8526415afccf5eL, 0x909bfbfe9ec7bcL,
+ 0x2ed7093124755cL, 0x436802889404e2L }
+ },
+ {
+ { 0x21b9fa036d9ef1L, 0xfd64b7ce433526L, 0xd9d7eb76544849L,
+ 0x201620cd5b54b3L, 0x25fab3dbb61159L, 0x90d4eb0c53e0d3L,
+ 0xba098319e74772L, 0x8749658ec1681cL },
+ { 0xa354349fec316bL, 0x639a9b1a743ea2L, 0x2e514ca37c50e6L,
+ 0x9f4a4fddbaf6c5L, 0x0df87ef6f511c9L, 0xadd4cef0c00d95L,
+ 0x401c0ebaa1433fL, 0x3c3a59ebb38af9L }
+ },
+ {
+ { 0x8706245f0e7dcaL, 0xad238cd3fb29caL, 0x03304439b7d8f0L,
+ 0xfdcd6e6154f495L, 0xc67e24a7d4ad09L, 0x1b209e85438390L,
+ 0xf893b81b0c211eL, 0x1aa86f07e11e36L },
+ { 0x2cca3ffedea8b1L, 0x7eedd073b306cdL, 0x78e37bc12ee222L,
+ 0x257870bbc42a1dL, 0x5fb2bb91fbd397L, 0x470247009d6c60L,
+ 0x11748a320bdc36L, 0x3ff24dc04280e8L }
+ },
+ {
+ { 0x0eb1c679839b52L, 0x5bcca27acfbd32L, 0xb506c1674898e3L,
+ 0x37d662e2489e5eL, 0x8dc0731f694887L, 0x571149ef43f1dcL,
+ 0x6430a3766d63dcL, 0x0d2640eb50dd70L },
+ { 0x2b561493b2675bL, 0x1b4806588c604fL, 0x55c86a8aafbabcL,
+ 0xa7b9447608aabaL, 0xa42f63504cad8cL, 0x0f72b1dcee7788L,
+ 0x1d68374755d99aL, 0xd7cdd8f5be2531L }
+ },
+},
+{
+ {
+ { 0x67873bdbcdfee1L, 0xa5a0c0afcd0a3fL, 0x59389f93cfa3d4L,
+ 0x14e945ce1c865cL, 0x62d2f8e1d588ccL, 0xfd02f8a8e228b4L,
+ 0x208f791b42b649L, 0x0e0dff1ab397adL },
+ { 0x30ac3d90bc6eb1L, 0xf14f16a5f313bbL, 0x70fa447e2a0ad2L,
+ 0x6e406855a0db84L, 0xd52282be32e1e7L, 0x315a02a15ca330L,
+ 0x9a57a70867c2feL, 0x55f07650054923L }
+ },
+ {
+ { 0x2d729f6c0cf08fL, 0x6b80138ebaf57fL, 0x6285bcc0200c25L,
+ 0xee845192cd2ac7L, 0x28fce4d922778aL, 0x761325ccd1011cL,
+ 0xd01f2475100e47L, 0xc7a1665c60d8e1L },
+ { 0x950966d7ceb064L, 0x0a88e8578420dbL, 0x44f2cfce096f29L,
+ 0x9d9325f640f1d2L, 0x6a4a81fd2426f1L, 0x3ed6b189c905acL,
+ 0xba3c0e2008854dL, 0x1df0bd6a0d321bL }
+ },
+ {
+ { 0x0117ad63feb1e7L, 0xa058ba2f1ae02fL, 0x5eee5aa31b3f06L,
+ 0x540d9d4afacd4dL, 0x38992f41571d91L, 0xef2738ebf2c7deL,
+ 0x28bfcab92a798dL, 0x37c7c5d2286733L },
+ { 0xb99936e6470df0L, 0x3d762d58af6a42L, 0xa8c357ac74eec5L,
+ 0x9917bebf13afbcL, 0x28f0941f2dc073L, 0x306abf36ce7df7L,
+ 0xa3c5f6fd6973c8L, 0x640209b3677632L }
+ },
+ {
+ { 0xee872a2e23aef7L, 0xb497b6feb9b08eL, 0xfb94d973f33c63L,
+ 0x9ea1ff42b32315L, 0x537b49249a4166L, 0x89c7fe6ab4f8beL,
+ 0xf68007fdad8f0fL, 0xe56ef0b71b8474L },
+ { 0x478b2e83f333f9L, 0x144e718b2607f5L, 0x13aa605a4c7ab5L,
+ 0xfc1fc991d0730dL, 0xe7a04375ab3ea1L, 0xc59986a306d8d3L,
+ 0x24f6111702a8b1L, 0x7741394e040ad2L }
+ },
+ {
+ { 0x34c6a2560723a7L, 0x8aabd0df4ea691L, 0x9d676a55d7497fL,
+ 0x12c09577d91fa4L, 0x581c7a86479284L, 0xa54f3daf4fd449L,
+ 0x2f89f3c4ef44cfL, 0xfc266b5c9ec97cL },
+ { 0xfcd3fbe88b142aL, 0x9f3109f4bd69c1L, 0x08839c0b5f5a6aL,
+ 0x63ca8502e68303L, 0x2f0628dbba0a74L, 0x743cccf5d56b54L,
+ 0xbd4b06613e09fdL, 0x7a8415bde2ba3eL }
+ },
+ {
+ { 0x2234a3bc076ab2L, 0xd6953e54977a98L, 0xc12215831ebe2eL,
+ 0x632145fbad78e2L, 0xd7ba78aa5c4b08L, 0x6f4ea71998e32aL,
+ 0x25900d23485a63L, 0x97ac6286a5176fL },
+ { 0x5df91181093f7bL, 0x2bf9829c844563L, 0x525d99d6272449L,
+ 0x4281cb5b5c8a18L, 0x35df2780544a08L, 0xf4c3d2dbaeb8f4L,
+ 0xc7ff3175230447L, 0x6b4d7645d2fbffL }
+ },
+ {
+ { 0x4837f802b0c9cbL, 0xb65f8168ce8418L, 0xdf66ea99fc1428L,
+ 0x9788ee804ea7e8L, 0x9eae9008334e3cL, 0xbc91058d6ba1b6L,
+ 0x634aba1d7064b6L, 0x12d9bb3397b368L },
+ { 0x0645c85c413aa8L, 0xb09dea6ac6b5e3L, 0x29a620d289a50bL,
+ 0x104db3bbbcceb1L, 0x42e479287b3309L, 0xdfc373eec97f01L,
+ 0xe953f94b93f84eL, 0x3274b7f052dfbfL }
+ },
+ {
+ { 0x9d5670a1bd6fa9L, 0xec42fc9db6c4d4L, 0xaecd4ed1b42845L,
+ 0x4eed90e1b03549L, 0xeb3225cbbab1faL, 0x5345e1d28a2816L,
+ 0x3741cfa0b77d2aL, 0x712b19f7ea8caaL },
+ { 0x42e6844661853eL, 0x4cf4126e4a6e5dL, 0x196a9cfc3649f6L,
+ 0x06621bcf21b6b1L, 0x887021c32e29eaL, 0x5703aeb8c5680fL,
+ 0x974be24660f6d7L, 0xaf09badc71864eL }
+ },
+},
+{
+ {
+ { 0x3483535a81b6d3L, 0x19e7301ca037dcL, 0x748cab763ddfebL,
+ 0xe5d87f66f01a38L, 0xbba4a5c2795cd6L, 0x411c5d4615c36cL,
+ 0xff48efc706f412L, 0x205bafc4b519dfL },
+ { 0xfcaa5be5227110L, 0x7832f463ad0af0L, 0x34ef2c42642b1bL,
+ 0x7bbef7b072f822L, 0x93cb0a8923a616L, 0x5df02366d91ba7L,
+ 0x5da94f142f7d21L, 0x3478298a14e891L }
+ },
+ {
+ { 0xad79a0fc831d39L, 0x24d19484803c44L, 0x4f8a86486aeeb2L,
+ 0x0ca284b926f6b9L, 0x501829c1acd7cdL, 0x9f6038b3d12c52L,
+ 0x77223abf371ef5L, 0x2e0351613bf4deL },
+ { 0x7a5a4f2b4468ccL, 0xdcea921470ae46L, 0xf23b7e811be696L,
+ 0xe59ad0d720d6fbL, 0x9eacac22983469L, 0x4dd4110c4397eeL,
+ 0x4ef85bdcbe2675L, 0xe4999f7aa7c74bL }
+ },
+ {
+ { 0x031838c8ea1e98L, 0x539b38304d96a2L, 0x5fbdef0163956eL,
+ 0x6bd4d35ce3f52aL, 0xe538c2355e897fL, 0x6078d3a472dd3fL,
+ 0x590241eca9f452L, 0x2bc8495fd7fc07L },
+ { 0x23d0c89ead4c8cL, 0x1ea55a9601c66eL, 0x41493c94f5b833L,
+ 0xc49a300aa5a978L, 0xc98bdc90c69594L, 0x4e44cedccbdc8cL,
+ 0xb0d4e916adccbfL, 0xd56e36b32c37aeL }
+ },
+ {
+ { 0x052bd405b93152L, 0x688b1d44f1dbfaL, 0xe77ba1abe5cc5fL,
+ 0x11f8a38a6ac543L, 0x3355fd6e4bb988L, 0xdf29c5af8dffb4L,
+ 0x751f58981f20eeL, 0x22a0f74da9b7fbL },
+ { 0xec8f2bc6397b49L, 0xff59fc93639201L, 0xb7f130aa048264L,
+ 0xe156a63afdc4ccL, 0x0fd7c34b13acafL, 0x87698d40cb4999L,
+ 0x6d6ecae7f26f24L, 0xae51fad0f296e2L }
+ },
+ {
+ { 0xd0ad5ebdd0f58dL, 0x6ec6a2c5c67880L, 0xe1ce0349af1e0fL,
+ 0x08014853996d32L, 0x59af51e5e69d20L, 0x0ef743aaa48ecfL,
+ 0x8d3d2ea7dafcb0L, 0x4ac4fad89189b6L },
+ { 0x92d91c2eae97f1L, 0xef5eca262b4662L, 0x440b213b38b10aL,
+ 0xec90187fc661daL, 0x85f3f25f64cf8dL, 0xcee53ca457ad1bL,
+ 0x8deed4bf517672L, 0x7706fb34761828L }
+ },
+ {
+ { 0x1577d9117494feL, 0x52d29be2fd7239L, 0x9a0eef00186d37L,
+ 0x241d0f527fe108L, 0x42824bae6fb59fL, 0xb8d33df0d48c25L,
+ 0xfffdb0a47af4b0L, 0x534c601073b0b6L },
+ { 0xe6df35951c033bL, 0x3e1002b86c0f94L, 0xa7cb55548fb9b6L,
+ 0x999818ba7bbff8L, 0xe4ba3d684d8bf2L, 0x53dbb326358f0aL,
+ 0xeebc1e2f2568e8L, 0xc6917ebb3e0f68L }
+ },
+ {
+ { 0xbe1bbfc19f8d13L, 0xc3951b62d4795cL, 0x9371c49ed535a9L,
+ 0x77c389f68cebeaL, 0xfc1a947a141d0eL, 0x4b48d7ade44f8bL,
+ 0x3db1f058580a26L, 0xeed1466258b5fcL },
+ { 0x5daa4a19854b21L, 0x5bfa46f1ab1eadL, 0xc152e3559957ebL,
+ 0xdc84277ea48adaL, 0x68709cffc169b5L, 0xde50ce3720e617L,
+ 0xe42f262dd9a832L, 0xddffd4d2d6ce29L }
+ },
+ {
+ { 0xd5ba5578fa0a56L, 0x0d7d0f1fafaf4cL, 0x7666e4138b63edL,
+ 0x04e65135d87f02L, 0xdca8866c958f32L, 0xaa8486d3ce2686L,
+ 0xe3785caf1cbcd3L, 0x8a9b11403c8335L },
+ { 0x5c1dca22e0ef60L, 0x775af5b7d3fb20L, 0xe690ffc2b373a8L,
+ 0x30fe15d28330e6L, 0x8a1022bdd0f393L, 0x6bd7364966a828L,
+ 0x8d4b154949208aL, 0xfb38c6bb9d9828L }
+ },
+},
+{
+ {
+ { 0x6d197640340ac2L, 0x969f473ecab5ffL, 0xead46f7c458e42L,
+ 0x168646a1d00eedL, 0xf70c878e0ce0cfL, 0xa7291d38d8d15aL,
+ 0x92cf916fdd10ccL, 0x6d3613424f86d5L },
+ { 0xba50d172d5c4b4L, 0xe0af5024626f15L, 0x76f3809d76098aL,
+ 0x433dc27d6caaa8L, 0x72dc67a70d97a7L, 0x935b360f5c7355L,
+ 0xdbaac93179bb31L, 0x76738487ed1a33L }
+ },
+ {
+ { 0x8d1ca668f9fa0dL, 0x4ed95d8a02f2bfL, 0xd19fc79f630d7bL,
+ 0x0448ec4f46fa51L, 0xb371dd8623bf3fL, 0xe94fabcd650e94L,
+ 0x3af3fcacd90a70L, 0x0f720c403ce3b7L },
+ { 0x590814cd636c3bL, 0xcf6928d4469945L, 0x5843aaf484a4c6L,
+ 0xb5a4c1af9b4722L, 0x25116b36cfb2f9L, 0xf248cf032c2640L,
+ 0x8cd059e27412a1L, 0x866d536862fc5dL }
+ },
+ {
+ { 0x156e62f6de4a2eL, 0x0365af7aafcc78L, 0x65c861819e925eL,
+ 0x4db5c01f8b2191L, 0x1fd26d1ad564faL, 0x16bbc5319c8610L,
+ 0x0718eef815f262L, 0x8684f4727f83d1L },
+ { 0xa30fd28b0f48dbL, 0x6fef5066ab8278L, 0xd164e771a652dfL,
+ 0x5a486f3c6ebc8cL, 0xb68b498dc3132bL, 0x264b6efd73323fL,
+ 0xc261eb669b2262L, 0xd17015f2a35748L }
+ },
+ {
+ { 0x4241f657c4bb1dL, 0x5671702f5187c4L, 0x8a9449f3973753L,
+ 0x272f772cc0c0cdL, 0x1b7efee58e280cL, 0x7b323494b5ee9cL,
+ 0xf23af4731142a5L, 0x80c0e1dd62cc9eL },
+ { 0xcbc05bf675ffe3L, 0x66215cf258ce3cL, 0xc5d223928c9110L,
+ 0x30e12a32a69bc2L, 0x5ef5e8076a9f48L, 0x77964ed2329d5fL,
+ 0xdf81ba58a72cf2L, 0x38ea70d6e1b365L }
+ },
+ {
+ { 0x1b186802f75c80L, 0x0c153a0698665aL, 0x6f5a7fe522e8ddL,
+ 0x96738668ddfc27L, 0x7e421d50d3bdceL, 0x2d737cf25001b2L,
+ 0x568840f0e8490cL, 0xea2610be30c8daL },
+ { 0xe7b1bc09561fd4L, 0xeda786c26decb0L, 0x22369906a76160L,
+ 0x371c71478a3da3L, 0x1db8fce2a2d9bfL, 0x59d7b843292f92L,
+ 0x8097af95a665f9L, 0x7cb4662542b7a9L }
+ },
+ {
+ { 0xa5c53aec6b0c2fL, 0xc4b87327312d84L, 0xfc374cbc732736L,
+ 0xa8d78fe9310cc0L, 0xd980e8665d1752L, 0xa62692d6004727L,
+ 0x5d079280146220L, 0xbd1fedb860fea5L },
+ { 0xcbc4f8ab35d111L, 0x5ba8cdf3e32f77L, 0xd5b71adb614b93L,
+ 0x7b3a2df2f8808dL, 0x09b89c26ef2721L, 0x55a505447c3030L,
+ 0x21044312986ae6L, 0x427a0112367d4cL }
+ },
+ {
+ { 0xe9fe256c1942d8L, 0x9e7377d96e3546L, 0x43e734cb0c1744L,
+ 0x5f46821211fbcaL, 0x44f83dc32b6203L, 0x84513086ad1d96L,
+ 0x54dd5192fbb455L, 0xc2a18222f10089L },
+ { 0x01055a21855bfaL, 0x9e6d7b477078b4L, 0x3f8df6d30cea0eL,
+ 0x81c215032973f7L, 0x17dd761c0b3d40L, 0x040424c50d0abeL,
+ 0x5599413783deabL, 0xde9271e8f3146fL }
+ },
+ {
+ { 0x5edfd25af4a11dL, 0x3a3c5307846783L, 0xb20086873edd31L,
+ 0x74e00ecfe0eef8L, 0xba65d2f3dd78c7L, 0xab1364371999f1L,
+ 0xfa9be5dde9a7e8L, 0xeb146ce87a8609L },
+ { 0x76afd6565353e9L, 0xfa7023dd51ba1cL, 0x7a09f2237ede4fL,
+ 0xca085760ba7a1bL, 0xd973882b99950aL, 0xe894266ea5057aL,
+ 0xd01c4217f55e49L, 0x69cfb9c5555679L }
+ },
+},
+{
+ {
+ { 0x67867e7c5d631aL, 0x1de88c55bcf47bL, 0x8366d06afd1352L,
+ 0xd7dbdef6e20337L, 0xb0f9e2f1253ec7L, 0x1be984510ad240L,
+ 0x63ec533f4a6118L, 0xd5e4c5b96ce633L },
+ { 0x1d0b6c34df4a25L, 0xef9486a5a1b554L, 0x2f0e59e47b6ef3L,
+ 0x4d8042f2ff84d7L, 0x3e74aa3da359c9L, 0x1baa16fd21c160L,
+ 0xb4cff210191cbaL, 0x50032d8ebc6472L }
+ },
+ {
+ { 0xb6833e01fc1b13L, 0x8a8b7ba1a5ad8fL, 0xc0cafa2622b820L,
+ 0xc6663af738ed20L, 0xd8944868b18f97L, 0xcf0c1f9774fbe4L,
+ 0xeedd4355be814fL, 0xd81c02db57e543L },
+ { 0x5e32afc310bad8L, 0x065bc819b813d1L, 0x8efc5fc3142795L,
+ 0x5006514732d59cL, 0x91e39df2b5a3ceL, 0x2ad4477faf4204L,
+ 0x1a96b184d9bd4fL, 0xc3fee95a4d9c07L }
+ },
+ {
+ { 0xfac7df06b4ba61L, 0xa6ed551061aaefL, 0x35aa2d6133f609L,
+ 0x420cfba20ed13dL, 0x861c63eea03d0cL, 0x75f0c56f936d6eL,
+ 0xa25f68f3d9a3d5L, 0xba0b7fecd9f66eL },
+ { 0x292e1354680772L, 0x6f6a2dba73f405L, 0xca6add924ea9e4L,
+ 0x81cfd61268daaaL, 0x7a4cb6ce6f147aL, 0x8ec3454bded8f5L,
+ 0xc8a893b11d61cbL, 0x2256ffc7656022L }
+ },
+ {
+ { 0x6b33271575cb78L, 0x560d305adcd23eL, 0xeedbd3ad6d834bL,
+ 0x614a64a5a31e27L, 0xe40b47647ee0c8L, 0x8ef4ff68bd7c2cL,
+ 0xa5297fc0b77727L, 0x8759208baf88adL },
+ { 0x86cfe64918df68L, 0x9d60a73cdd882eL, 0x546b642b953014L,
+ 0xbaceae38bbef55L, 0xdf58e43f1c3467L, 0x99a83fee9f9babL,
+ 0xcd52cbf57a4a8bL, 0xf744e968ae36ecL }
+ },
+ {
+ { 0xb945869a607124L, 0x810dbe9440e6f6L, 0x9911e60738e381L,
+ 0x51df68c343b80bL, 0xe424336f7a3f39L, 0x2d32acb989015cL,
+ 0xa69b14931019e8L, 0x8a31a38ec12f93L },
+ { 0x0d0d36997c916aL, 0xdc95f3b8885372L, 0xcf1a2613549040L,
+ 0x60f6f5eabe95a2L, 0xa909e9fe141325L, 0x7d598f2355c865L,
+ 0x70c6442931a9c9L, 0x2354a85b423850L }
+ },
+ {
+ { 0x4cdd22497f9619L, 0x4776fffc22162eL, 0xee5ec330cd31c2L,
+ 0x7c04c10f209bb8L, 0x35bbfde579e211L, 0x0e3832515cdfc2L,
+ 0x657e6d3e26ffa7L, 0xc66a7c3c65c604L },
+ { 0x322acd7b45e567L, 0x1589cf0296db9bL, 0x1fd0bd3ba1db73L,
+ 0xe8826109337a40L, 0xf505a50b3035c7L, 0x4d5af066ed08d7L,
+ 0xb3c376b5eda400L, 0x9c7b7001944748L }
+ },
+ {
+ { 0xd76832570c3716L, 0xda62af0dd540e0L, 0x76b155d6580feaL,
+ 0x4f42acc32b5464L, 0x881bb603f5b72bL, 0x09c130ee68b9baL,
+ 0x37ede3b5c50342L, 0xce61a9cfd15e7dL },
+ { 0xfff1d8572605d0L, 0x62ac2d3062abc2L, 0xa85e02efbe43ddL,
+ 0x859d2baa947020L, 0x2ebc8a9111c20bL, 0x7f590a7a656f66L,
+ 0x0e1384316b21a6L, 0x29b30c500c7db6L }
+ },
+ {
+ { 0x61e55e2906b8deL, 0x6a97e96949974dL, 0x24b52b526eef67L,
+ 0x512f5361aa595aL, 0x81cc7b83c48fcbL, 0xa64af2328115adL,
+ 0x9edf6f93d44b8eL, 0x68d7f7c1fe22e3L },
+ { 0x2b2116a520d151L, 0x66a0b7d6aa3efbL, 0x48ae70a9b0f791L,
+ 0xcf12174037db88L, 0x36868cd317d9f3L, 0xb57305922fc344L,
+ 0xbaa852646a5d23L, 0xad6569137fc10dL }
+ },
+},
+{
+ {
+ { 0xcf8e5f512c78d5L, 0xeb94d98805cdbdL, 0xad1dcdf2ab50b5L,
+ 0xf33c136f33cd31L, 0x0d6226b10aeff5L, 0xf7ff493f2f8fc5L,
+ 0x7e520d4df57165L, 0x41fbae505271a7L },
+ { 0x72c898776480baL, 0x260835925f4523L, 0xed36b8d49f5f01L,
+ 0x3bc1dcef3d49ebL, 0x30c1c1a4940322L, 0x78c1cda7e0f731L,
+ 0x51f2dc86d05a31L, 0x57b0aa807f3522L }
+ },
+ {
+ { 0x7ab628e71f88bcL, 0xcf585f38018f21L, 0xdbbe3a413d64f6L,
+ 0x0f86df1ec493a5L, 0x8355e6c7725de9L, 0x3954ffee00fe1eL,
+ 0xbb8978f9924e32L, 0x1c192987812714L },
+ { 0x7c4ce3eaabca8bL, 0xf861eb59bf7019L, 0x31a84fc682e541L,
+ 0x2307ca9acd1b92L, 0x6f8b6ce4bf2842L, 0xde252accb9f9a9L,
+ 0x7f0611d93c46d1L, 0x8e2bd80751dc98L }
+ },
+ {
+ { 0xf2fd8fbe27d54bL, 0x2a1e37ec248071L, 0x2fcc888ab8f49aL,
+ 0x42c62a3c18a9e5L, 0xe30290870b2446L, 0x90277fac5ac55dL,
+ 0x8d97d56d6dde41L, 0xf4cf8a95db04feL },
+ { 0x3e280f5d30d077L, 0x2c903073cb3293L, 0xe0be2ac24eb0ddL,
+ 0xa2d1a498bcb4f0L, 0x16db466cd0cd45L, 0x3b28aa79a80232L,
+ 0xdd7e52f17b008eL, 0x20685f2868e4daL }
+ },
+ {
+ { 0x0a68c147c7a486L, 0xd8ef234c429633L, 0x470667bffe7506L,
+ 0x55a13c88828d51L, 0x5f327412e44befL, 0x537d92a5929f92L,
+ 0x0a01d5b31c5cd5L, 0xb77aa7867eb3d7L },
+ { 0x36ec45f8b82e4dL, 0x6821da0b37b199L, 0x8af37aad7fa94eL,
+ 0xf0206421085010L, 0x9b886787e56851L, 0x35f394452948ceL,
+ 0x125c2baafc1361L, 0x8a57d0e453e332L }
+ },
+ {
+ { 0xefe99488043664L, 0xb8b8509db1aa55L, 0x1a2e5a9332523fL,
+ 0x5e255dd1045c0fL, 0xe68dd8a7ae7180L, 0x55f1cf345bf532L,
+ 0xe00722ee63a716L, 0xd1c21386116bacL },
+ { 0x626221f1c6d1f4L, 0x240b8303773278L, 0xe393a0d88def16L,
+ 0x229266eca0495cL, 0x7b5c6c9d3e4608L, 0xdc559cb7927190L,
+ 0x06afe42c7b3c57L, 0x8a2ad0bb439c9bL }
+ },
+ {
+ { 0xd7360fbffc3e2fL, 0xf721317fbd2e95L, 0x8cacbab5748e69L,
+ 0x7c89f279054bb9L, 0xcbe50faaa86881L, 0x7aa05d375206e4L,
+ 0x1ea01bcc752c66L, 0x5968cde1f2c2bcL },
+ { 0x487c55f09a853eL, 0x82cbef1e09204bL, 0xad5c492abd8670L,
+ 0x7175963f12dcb3L, 0x7a85762bf6aa06L, 0x02e5697f8d5237L,
+ 0xccf7d1937c6157L, 0x3b14ca6c2fd59cL }
+ },
+ {
+ { 0x5e610d81b9f77fL, 0x85876d0051b02fL, 0x5d81c63b8020ddL,
+ 0xd0b4116d6ce614L, 0x91810e5aa8bf0cL, 0xf27f91fcbf8c66L,
+ 0x2e5dc5f38480aeL, 0x0a13ffebec7633L },
+ { 0x61ff6492bf6af8L, 0xe6aef2d641f827L, 0xad5708a5de5f04L,
+ 0xe5c3a80cdfee20L, 0x88466e268fcfa2L, 0x8e5bb3ad6e1d7bL,
+ 0xa514f06ed236b8L, 0x51c9c7ba5f5274L }
+ },
+ {
+ { 0xa19d228f9bc3d8L, 0xf89c3f03381069L, 0xfee890e5c3f379L,
+ 0x3d3ef3d32fb857L, 0x39988495b418ddL, 0x6786f73c46e89aL,
+ 0x79691a59e0f12fL, 0x76916bf3bc022bL },
+ { 0xea073b62cd8a0aL, 0x1fbedd4102fdbcL, 0x1888b14cb9d015L,
+ 0x98f2cfd76655f7L, 0xb9b591059f0494L, 0xa3dbbe1e6986a3L,
+ 0xef016a5eaf2b04L, 0xf671ba7cd2d876L }
+ },
+},
+{
+ {
+ { 0x1dae3bf1ae05e9L, 0x6a029961f21fefL, 0x95df2b97aec3c6L,
+ 0x9abbc5ad83189bL, 0xaf994af2d13140L, 0xc3f884686aa406L,
+ 0xcd77e5075284c5L, 0x1c1e13d2a9a4d7L },
+ { 0x7f8815d744b89dL, 0xb1891332ba673eL, 0x55ea93cd594570L,
+ 0x19c8a18d61b041L, 0x938ebaa8d2c580L, 0x9b4344d05ba078L,
+ 0x622da438eaf9b7L, 0x809b8079fea368L }
+ },
+ {
+ { 0x3780e51c33b7a2L, 0xd7a205c387b1c8L, 0x79515f84be60e4L,
+ 0xde02a8b1e18277L, 0x4645c96f0d9150L, 0x45f8acbe0b3fd1L,
+ 0x5d532ba9b53ac3L, 0x7984dcdb0557c9L },
+ { 0x5ae5ca68a92f01L, 0xd2fbb3c9d569caL, 0x668cc570c297c1L,
+ 0xa4829436295e89L, 0xf646bc1a33ad40L, 0x066aaa4c3f425dL,
+ 0x23434cdd005de2L, 0x5aca9e9db35af4L }
+ },
+ {
+ { 0x2bca35c6877c56L, 0xab864b4f0ddd7dL, 0x5f6aa74404f46cL,
+ 0x72be164539c279L, 0x1b1d73ee0283cfL, 0xe550f46ad583d9L,
+ 0x4ac6518e739ad1L, 0x6b6def78d42100L },
+ { 0x4d36b8cfa8468dL, 0x2cb37735a3d7b8L, 0x577f86f5016281L,
+ 0xdb6fe5f9124733L, 0xacb6d2ae29e039L, 0x2ab8330580b8a1L,
+ 0x130a4ac643b2d0L, 0xa7996e35e6884eL }
+ },
+ {
+ { 0x6fb627760a0aa8L, 0xe046843cbe04f0L, 0xc01d120e6ad443L,
+ 0xa42a05cabef2fcL, 0x6b793f112ff09cL, 0x5734ea8a3e5854L,
+ 0xe482b36775f0adL, 0x2f4f60df864a34L },
+ { 0xf521c5884f2449L, 0x58734a99186a71L, 0x157f5d5ac5eaccL,
+ 0x858d9a4248ee61L, 0x0727e6d48149c3L, 0xd5c3eaaac9ec50L,
+ 0xa63a64a20ee9b5L, 0x3f0dfc487be9deL }
+ },
+ {
+ { 0x836349db13e3f4L, 0xebdd0263e9316dL, 0x3fd61e8324fd6cL,
+ 0x85dddfa0964f41L, 0x06e72de52add1bL, 0xb752cff8c4a9e2L,
+ 0x53b0894fdf09f7L, 0xd5220ab0bc24fdL },
+ { 0x8442b35fb1981aL, 0xa733a373edd701L, 0x42b60c3d0ef089L,
+ 0xa1b16ec46e7bcaL, 0xc0df179a09aaf4L, 0xcd4f187638f3a1L,
+ 0x9af64f79eab1c2L, 0x86fed79d1d78e3L }
+ },
+ {
+ { 0x42c8d86fe29980L, 0x6657b816575660L, 0x82d52c680f92caL,
+ 0x8587af102d42beL, 0xb5151316e8bdf0L, 0x706e2d9c333495L,
+ 0xd53601a9673064L, 0x27b1fbb8219099L },
+ { 0x3f0929d705f7c8L, 0xff40b10f3d6e6fL, 0x673c703026af5cL,
+ 0x2c1dce4e25a422L, 0x5348bd73dad8b6L, 0xc39b6b6be2c329L,
+ 0x47854ffb921084L, 0xb347b8bb391f20L }
+ },
+ {
+ { 0x79fc841eb9b774L, 0xf32da25b4b6c1dL, 0xcbba76bfe492cbL,
+ 0x76c51fcd623903L, 0x114cf6fcf0705aL, 0x6b720497815dafL,
+ 0x630b362473382eL, 0xbf40c3a9704db5L },
+ { 0xa8a9ddcc5456ebL, 0x2b4472a72f2dc1L, 0x9874444d6d6ef3L,
+ 0x27e8d85a0ba5edL, 0x5d225b4194849fL, 0xe852cd6ebaa40dL,
+ 0xb669c248d4bf3fL, 0xa8601eb2343991L }
+ },
+ {
+ { 0x8a0485459502d3L, 0xcab27eee269a7bL, 0x41793074875adaL,
+ 0x179e685e2405f9L, 0x0d7b6987b28963L, 0x80c9db8422a43eL,
+ 0xf5ff318a0f43eeL, 0x7a928054ba7aa7L },
+ { 0xa5c79fe0c0834eL, 0x837ca0d1f849ecL, 0xfe0d7fa628ab7bL,
+ 0x94bcb956edd19aL, 0xa18bc932226fbfL, 0x2795379aad54a3L,
+ 0xceeacf8371129eL, 0x65ca57fa588be5L }
+ },
+},
+{
+ {
+ { 0x7a578b52caa330L, 0x7c21944d8ca34aL, 0x6c0fbbb6447282L,
+ 0xa8a9957f90b2e5L, 0xbbe10666586b71L, 0x716a90249138a2L,
+ 0x2fa6034e7ed66dL, 0x56f77ed2b9916aL },
+ { 0x69f1e26bddefb3L, 0xa4978098c08420L, 0xc3377eb09bc184L,
+ 0x796ce0cbe6dadeL, 0x3be0625d103bbbL, 0x01be27c992685cL,
+ 0xc0e25597755f9fL, 0x165c40d1c0dbfaL }
+ },
+ {
+ { 0xc63a397659c761L, 0x10a0e5b630fbadL, 0xf21e8a6655ac56L,
+ 0xe8580fac1181e2L, 0xbfc2d9c0a84b5cL, 0x2cdbaff7afd5d1L,
+ 0x95f1182f61e85aL, 0x1173e96719eaf4L },
+ { 0xc06d55ec6de8b9L, 0x1b4c8ebafcbcaaL, 0x52af5cbbc2bbcdL,
+ 0x564fab877bcd10L, 0xfd53a18ae85a6eL, 0x225785994c712fL,
+ 0x29b11d71352121L, 0xab1cb76c40491aL }
+ },
+ {
+ { 0xb4e8ca8ce32eb4L, 0x7e484acb250b49L, 0x062c6f7a3e31a2L,
+ 0x497fd83625d1fcL, 0x98f821c362dda7L, 0xcae1f8f6be3111L,
+ 0x9077e955d4fa42L, 0xa589971a65855aL },
+ { 0xda6321d28832a9L, 0xf9ef5dc3936e9eL, 0xa37f117c9797efL,
+ 0x0eb3c80db581beL, 0x207c5c4baa0002L, 0xc0401b5f38faa0L,
+ 0xceee523d0f1e6eL, 0x8d27a5fd1f0045L }
+ },
+ {
+ { 0x9411063cf0af29L, 0x304385789a6693L, 0x9a9fb8f640145eL,
+ 0x7d82fe954832ebL, 0xf2789e1898c520L, 0x448b402f948dc0L,
+ 0xeca8fdf68996ddL, 0x22227e9a149b2fL },
+ { 0x63509ff8e62d6aL, 0xe98d81c8c9c57fL, 0xd3874071fe3bedL,
+ 0xf1db013539538fL, 0xb04092e48418ceL, 0xbbf8e76d6d9d4dL,
+ 0x2ea9cda2cec5aeL, 0x8414b3e5078fa9L }
+ },
+ {
+ { 0x5ad1cdbd68a073L, 0xd4cedafc18b591L, 0x78267078e4c1c9L,
+ 0x9b8d9209ca302aL, 0x3101bd2326115bL, 0x6f154b54c2717aL,
+ 0x618c31b263e84bL, 0x12c4138bbd6942L },
+ { 0xf9ead2580da426L, 0xe748e9947d9680L, 0x9b396a38a4210eL,
+ 0xfaf03ddf4b8f72L, 0xbd94a5266159e7L, 0x5e730491d4c7cbL,
+ 0x31d1f9a7910f38L, 0x4fd10ca08d6dd1L }
+ },
+ {
+ { 0x4f510ac9f2331eL, 0xee872dc7e3dcc2L, 0x4a11a32a0a0c73L,
+ 0x27e5803aa5a630L, 0xe5ae5037af4a8aL, 0x2dcdeba9fffeb0L,
+ 0x8c27748719d91fL, 0xd3b5b62b9cc61cL },
+ { 0x998ac90cca7939L, 0xc22b59864514e5L, 0x950aaa1b35738aL,
+ 0x4b208bbdab0264L, 0x6677931a557d2eL, 0x2c696d8f7c17d3L,
+ 0x1672d4a3e15c51L, 0x95fab663db0e82L }
+ },
+ {
+ { 0x3d427346ff205eL, 0x7f187d90ea9fbeL, 0xbd9367f466b2afL,
+ 0x188e53203daf2fL, 0xefe132927b54d8L, 0x14faf85ef70435L,
+ 0xa5061281ec95c4L, 0xad01705c22cba7L },
+ { 0x7d2dfa66197333L, 0xedd7f078b4f6edL, 0xe0cb68575df105L,
+ 0x47c9ddb80f76bcL, 0x49ab5319073c54L, 0x845255ae607f44L,
+ 0x0b4ed9fcc74b7cL, 0xcfb52d50f5c3a6L }
+ },
+ {
+ { 0x545c7c6c278776L, 0x92a39ae98c30f0L, 0x8aa8c01d2f4680L,
+ 0xa5409ed6b7f840L, 0x0c450acdcb24e7L, 0x5da6fb2c5770d9L,
+ 0x5b8e8be8658333L, 0xb26bf4a67ea4adL },
+ { 0x2e30c81c7d91faL, 0x6e50a490eeb69fL, 0x9458c2bee4bc26L,
+ 0x419acf233be250L, 0x79d6f8187881abL, 0x694565d403b1beL,
+ 0x34b3990234fe1dL, 0x60997d72132b38L }
+ },
+},
+{
+ {
+ { 0x00a974126975dcL, 0x42161c46cf94e7L, 0xcc9fe4bc64ed99L,
+ 0x020019a4680570L, 0x885595a698da0dL, 0x008444b77dd962L,
+ 0xbf3c22da4fea0eL, 0xc4630482c81245L },
+ { 0xcb248c5793ab18L, 0x4dc7a20eb4320bL, 0x9a0906f1572b7dL,
+ 0xd5b3019f9ac20fL, 0x79b1bf534520a3L, 0x788dfe869b5322L,
+ 0x9a05298455b7e2L, 0x2f4aecb016bca9L }
+ },
+ {
+ { 0x414d3798745618L, 0x64ba22eb7c983cL, 0x9a5d19f9f9d532L,
+ 0x81a00d844a80c8L, 0xb9e24f5cae98d6L, 0x6c3769caca965aL,
+ 0x50d6081f6e4e6dL, 0x0d9698054422a6L },
+ { 0xbd7e7925cdd790L, 0xcff65da6a35219L, 0x40dc3638b60ebeL,
+ 0x84bee7492a50dcL, 0x57d4be415ad65eL, 0xc54256b1a6d1d3L,
+ 0x141c64945717ccL, 0x05eb609cd1c736L }
+ },
+ {
+ { 0xfd52eab1e3c7ecL, 0xa4a5eca9f24895L, 0xaaa2a8d79fdb83L,
+ 0xd105e6072bdfdaL, 0x59e6ae2681d97eL, 0xfedf8e08e8077fL,
+ 0xb06d0ad629e462L, 0x8c7c2d096fa863L },
+ { 0x5eecc4cee8fc91L, 0x5e83ab29e61174L, 0x1fd8925b28c02dL,
+ 0x93be5382072864L, 0xda0c88624c984eL, 0xdcf9f0ca008286L,
+ 0x1ecb5a6a58ba75L, 0x1d9b890c2e3c83L }
+ },
+ {
+ { 0x19e866eeeee062L, 0x31c1c7f4f7b387L, 0x9be60181c06652L,
+ 0xc00a93a2b68bbbL, 0x54c65d69d52b2bL, 0x4591416e8b744aL,
+ 0x641bcca9a64ab6L, 0xf22bcb1ab08098L },
+ { 0x3c0db8ff1f726cL, 0x4f5739e9d2e6a6L, 0x5cb669b45c9530L,
+ 0x861b04e7b472d0L, 0x3e30515894da77L, 0x3344685c9ac39bL,
+ 0x9e1730573bdd29L, 0x9cac12c808dc85L }
+ },
+ {
+ { 0xf152b865e27087L, 0x267bd8590a580eL, 0xba79cec8baafc1L,
+ 0x6140ab19442686L, 0xa67090c5b31693L, 0x50a103a28b4117L,
+ 0x7722e610ddc08fL, 0x5d19d43e6569b2L },
+ { 0x70e0c525962bf6L, 0x808e316fb5fb02L, 0x3fb80da5b667beL,
+ 0x8aa366efcfacecL, 0xcb0b3e7134280eL, 0x0bf1de4cd7d944L,
+ 0x0cd23bed092df5L, 0xc9a6a79a153a0cL }
+ },
+ {
+ { 0x1c69ad02d5a4b7L, 0x4bb28d0d9e6f4aL, 0x815308ca984fc6L,
+ 0x40929c79037ca5L, 0x0ea2b491bd0357L, 0xec17e5b42aad4eL,
+ 0x1f32ade18e7235L, 0xbc60b05a96a9d3L },
+ { 0x3b0229ae20f707L, 0xd63505056bdfadL, 0xac2d922d8b2e1eL,
+ 0x92b2998235c748L, 0x6002c3ad766f97L, 0x99198001a2a862L,
+ 0x2af7567b58b684L, 0xd8fe707aaafce5L }
+ },
+ {
+ { 0x54487ab5df7a4bL, 0x51cccdec57ccc2L, 0x23943277510b53L,
+ 0x3a09f02f555de3L, 0xa696aec1be484dL, 0x56f459f37817a2L,
+ 0x8d8f61c623dcb4L, 0xc52223c5335656L },
+ { 0xf634111b49914aL, 0xbf8e1ab8e4f9bbL, 0x2f59578f4dba02L,
+ 0x2a94199e004319L, 0x87931f0654d005L, 0x7df57d96fa0814L,
+ 0xc8da316a154031L, 0x2a44ac041f658bL }
+ },
+ {
+ { 0xfb5f4f89e34ac6L, 0x0a1b10b97790f2L, 0x58fe4e74b8a06cL,
+ 0x10c1710955f27cL, 0x77b798ad5ebe19L, 0xaf1c35b1f1c2dcL,
+ 0xc25b8e6a1f8d69L, 0x49cf751f76bf23L },
+ { 0x15cb2db436f7b7L, 0x186d7c27e74d1aL, 0x60731dec00a415L,
+ 0xea1e15615f0772L, 0xf02d591714463fL, 0x26a0c6451adeb1L,
+ 0x20174cdcc5229eL, 0xb817e50efd512aL }
+ },
+},
+};
+
+static const ge448_precomp base_i[16] = {
+ {
+ { 0x26a82bc70cc05eL, 0x80e18b00938e26L, 0xf72ab66511433bL,
+ 0xa3d3a46412ae1aL, 0x0f1767ea6de324L, 0x36da9e14657047L,
+ 0xed221d15a622bfL, 0x4f1970c66bed0dL },
+ { 0x08795bf230fa14L, 0x132c4ed7c8ad98L, 0x1ce67c39c4fdbdL,
+ 0x05a0c2d73ad3ffL, 0xa3984087789c1eL, 0xc7624bea73736cL,
+ 0x248876203756c9L, 0x693f46716eb6bcL }
+ },
+ {
+ { 0x28173286ff2f8fL, 0xb769465da85757L, 0xf7f6271fd6e862L,
+ 0x4a3fcfe8daa9cbL, 0xda82c7e2ba077aL, 0x943332241b8b8cL,
+ 0x6455bd64316cb6L, 0x0865886b9108afL },
+ { 0x22ac13588ed6fcL, 0x9a68fed02dafb8L, 0x1bdb6767f0bffaL,
+ 0xec4e1d58bb3a33L, 0x56c3b9fce43c82L, 0xa6449a4a8d9523L,
+ 0xf706cbda7ad43aL, 0xe005a8dbd5125cL }
+ },
+ {
+ { 0xa99d1092030034L, 0x2d8cefc6f950d0L, 0x7a920c3c96f07bL,
+ 0x958812808bc0d5L, 0x62ada756d761e8L, 0x0def80cbcf7285L,
+ 0x0e2ba7601eedb5L, 0x7a9f9335a48dcbL },
+ { 0xb4731472f435ebL, 0x5512881f225443L, 0xee59d2b33c5840L,
+ 0xb698017127d7a4L, 0xb18fced86551f7L, 0x0ade260ca1823aL,
+ 0xd3b9109ce4fd58L, 0xadfd751a2517edL }
+ },
+ {
+ { 0xdf9567ceb5eaf7L, 0x110a6b478ac7d7L, 0x2d335014706e0bL,
+ 0x0df9c7b0b5a209L, 0xba4223d568e684L, 0xd78af2d8c3719bL,
+ 0x77467b9a5291b6L, 0x079748e5c89befL },
+ { 0xe20d3fadac377fL, 0x34e866972b5c09L, 0xd8687a3c40bbb7L,
+ 0x7b3946fd2f84c9L, 0xd00e40ca78f50eL, 0xb87594417e7179L,
+ 0x9c7373bcb23583L, 0x7ddeda3c90fd69L }
+ },
+ {
+ { 0x3d0def76ab686bL, 0x1a467ec49f7c79L, 0x3e53f4fc8989edL,
+ 0x101e344430a0d9L, 0xa3ae7318ad44eeL, 0xaefa6cdae1d134L,
+ 0xaa8cd7d824ad4dL, 0xef1650ced584fcL },
+ { 0xa74df674f4754fL, 0xf52cea8ef3fb8bL, 0x47c32d42971140L,
+ 0x391c15da256fbbL, 0xc165faba605671L, 0xf2518c687993b9L,
+ 0x2daf7acbd5a84dL, 0x1560b6298f12aeL }
+ },
+ {
+ { 0xef4da0254dc10aL, 0x63118655940db8L, 0xe20b14982f2948L,
+ 0x67b93775581dbaL, 0x422ee7104f5029L, 0x5d440db5122d34L,
+ 0xb1e56d71a4c640L, 0xbf12abbc2408eeL },
+ { 0x0cc9f86016af01L, 0x88366abf3d8cabL, 0x85dda13a2efe12L,
+ 0x390df605d00674L, 0xf18f5806d187f7L, 0x28c900ff0c5d20L,
+ 0xad308123e01733L, 0x42d35b554bf2fdL }
+ },
+ {
+ { 0x009135f2ffb1f1L, 0x099fc7e8f9c605L, 0xcc67da626bfa5aL,
+ 0xc186d12344552bL, 0xb5232501b339e1L, 0x70a544fc9708c5L,
+ 0x06baaec1e928e7L, 0x0baedd2ef0f50fL },
+ { 0x535d6d8bf479e5L, 0x156e536e4ec3e9L, 0x3165741ddb9be2L,
+ 0x988af7159fd736L, 0x13d8a782e33dddL, 0x54604214e69002L,
+ 0x34d56e0804a268L, 0xc59b84f0e52a4cL }
+ },
+ {
+ { 0x525d45f24729d9L, 0x5768aba8712327L, 0xa25e43b43035dbL,
+ 0x15a1ee8927ef21L, 0xa785d216056112L, 0x45e2fbfd508af9L,
+ 0xb6f721a37ba969L, 0x30d6d8c216d8d3L },
+ { 0x3065e0852074c3L, 0xfa40b4a2a0684eL, 0x851325a763f955L,
+ 0xd4ef19c9f25900L, 0x799c869f665756L, 0x7b052223312990L,
+ 0xc986c2b28db802L, 0xf48fb8f28ade0aL }
+ },
+ {
+ { 0x1e461731649b68L, 0xa96e5d65beb9dcL, 0x765ddff481935dL,
+ 0x6cf132c9f3bf2aL, 0x9f6c5c97c35658L, 0x99cd1394696e60L,
+ 0x99fa9249c0d5e4L, 0x1acd0638845a95L },
+ { 0x0b065413636087L, 0xea20e78ea17b7fL, 0x20afc5f6161967L,
+ 0xfd6c8a2dc81028L, 0x4ef1357e32c8fdL, 0x8aa400400e4a88L,
+ 0xd6fcaef48cb82fL, 0x7ba7c6db3cd4faL }
+ },
+ {
+ { 0xf843473d19c7abL, 0x968e76dc655c4dL, 0x52c87d9c4b9c2fL,
+ 0x65f641ae4aa082L, 0x491a39733c3603L, 0xa606ffe5810098L,
+ 0x09920e68bf8ad4L, 0x691a0c86db7882L },
+ { 0x5205883a4d3ef5L, 0xee839b7acf2efeL, 0x4b78e2ac00ca66L,
+ 0xbe3f071f9fcb91L, 0x61e66c9bf6943aL, 0xe9b4e57061b79dL,
+ 0x8d1b01b56c06bdL, 0x0dfa315df76ae5L }
+ },
+ {
+ { 0x803df65f1fd093L, 0x1cd6523489b77eL, 0x2cd2e15c20e295L,
+ 0xcd490be9b912d1L, 0xdd9a2ff2e886d2L, 0xa3c836dfe9d72aL,
+ 0xfcad5f2298e0c1L, 0xed126e24bcf067L },
+ { 0x1e339533dc81bcL, 0xbea4d76ece6a08L, 0x1d15de3991b252L,
+ 0x74cc5cfe6daf97L, 0x5ad343f0826493L, 0x2d38a471064049L,
+ 0xf7f47b9ffcfa4dL, 0xef14490418066cL }
+ },
+ {
+ { 0x4e7f86b9bb55abL, 0x310d7853f496a3L, 0xbd682fc0dec42cL,
+ 0xbde047a411d32aL, 0xea639b4c5a5ea2L, 0x5052078ba08fa1L,
+ 0xc968b2307729f2L, 0x567b5a623d3e28L },
+ { 0x171e825977fbf7L, 0x0319c70be990aaL, 0x8f65023e12cd69L,
+ 0x1fb9b19f5015e6L, 0x0083f603568a7cL, 0xba3d30b1f3c5acL,
+ 0xe7b509d3d7a988L, 0x2318b99cd0f6b6L }
+ },
+ {
+ { 0x54d3b8793ab2cfL, 0x366abead2d8306L, 0x66e8eb6d7a4977L,
+ 0xa61888cae0072eL, 0x9eeeef5dbc3315L, 0x93f09db163e7f5L,
+ 0xee9095959ade9aL, 0xaf7f578ce59be0L },
+ { 0x24bfd8d5ece59eL, 0x8aa698b3689523L, 0xa9a65de2de92cfL,
+ 0xec11dbca6ad300L, 0x217f3fa09f88caL, 0xf6c33e3b4d6af7L,
+ 0xcd3bfa21d86d2dL, 0x1497f835f13f25L }
+ },
+ {
+ { 0xa579568cd03d1dL, 0xd717cdae158af6L, 0x59eda97389a19fL,
+ 0xb32c370099e99cL, 0xa2dba91dabb591L, 0x6d697d577c2c97L,
+ 0x5423fc2d43fa6dL, 0x56ea8a50b382bfL },
+ { 0x4a987bad80c11aL, 0xe4cde217d590a5L, 0x3dd8860f97e559L,
+ 0xff45e2543b593cL, 0x00eb4535343cb5L, 0x06b9b997bbfbddL,
+ 0x4da36b716aea24L, 0x247651757a624eL }
+ },
+ {
+ { 0x32207d03474e0dL, 0x3ffbf04b41cc73L, 0x5c4dc45319eb39L,
+ 0xfee29be758b463L, 0xcc8a381c30c7a7L, 0x147f4e49fe0e53L,
+ 0x05b2e26e35a2deL, 0x4362f0292f3666L },
+ { 0x0476d0c8474b85L, 0x9d8c65fccaf108L, 0xf58d4041d54b6aL,
+ 0x3ee6862f38e4b0L, 0x7c7c9d53b44f54L, 0x36a3fd80fb0db5L,
+ 0xfcd94ba18a8ac8L, 0xc1b1d568f35c05L }
+ },
+ {
+ { 0x16539fc1bdd30dL, 0x1356e538df4afbL, 0xc0545d85a1aedbL,
+ 0xeb2037a489396bL, 0x897fcbd5660894L, 0x02a58a9b7d104aL,
+ 0x57fa24cc96b980L, 0xf6448e35bd8946L },
+ { 0xee727418805c83L, 0x10fa274992cfc6L, 0x95141939e66b21L,
+ 0xe0ffa44bd08009L, 0x174332220da22bL, 0x4891ff359e6831L,
+ 0x407ed73a7d687bL, 0x2fb4e0751d99cfL }
+ },
+};
+#else
+
+/* Reduce scalar mod the order of the curve.
+ * Scalar Will be 114 bytes.
+ *
+ * b [in] Scalar to reduce.
+ */
+void sc448_reduce(uint8_t* b)
+{
+ uint32_t d[16];
+ uint64_t t[33];
+ uint64_t c;
+ uint32_t o;
+
+ /* Load from bytes */
+ t[ 0] = (((int32_t)((b[ 0] ) >> 0)) << 0)
+ | (((int32_t)((b[ 1] ) >> 0)) << 8)
+ | (((int32_t)((b[ 2] ) >> 0)) << 16)
+ | ((((int32_t)((b[ 3] & 0xf )) >> 0)) << 24);
+ t[ 1] = (((int32_t)((b[ 3] ) >> 4)) << 0)
+ | (((int32_t)((b[ 4] ) >> 0)) << 4)
+ | (((int32_t)((b[ 5] ) >> 0)) << 12)
+ | (((int32_t)((b[ 6] ) >> 0)) << 20);
+ t[ 2] = (((int32_t)((b[ 7] ) >> 0)) << 0)
+ | (((int32_t)((b[ 8] ) >> 0)) << 8)
+ | (((int32_t)((b[ 9] ) >> 0)) << 16)
+ | ((((int32_t)((b[10] & 0xf )) >> 0)) << 24);
+ t[ 3] = (((int32_t)((b[10] ) >> 4)) << 0)
+ | (((int32_t)((b[11] ) >> 0)) << 4)
+ | (((int32_t)((b[12] ) >> 0)) << 12)
+ | (((int32_t)((b[13] ) >> 0)) << 20);
+ t[ 4] = (((int32_t)((b[14] ) >> 0)) << 0)
+ | (((int32_t)((b[15] ) >> 0)) << 8)
+ | (((int32_t)((b[16] ) >> 0)) << 16)
+ | ((((int32_t)((b[17] & 0xf )) >> 0)) << 24);
+ t[ 5] = (((int32_t)((b[17] ) >> 4)) << 0)
+ | (((int32_t)((b[18] ) >> 0)) << 4)
+ | (((int32_t)((b[19] ) >> 0)) << 12)
+ | (((int32_t)((b[20] ) >> 0)) << 20);
+ t[ 6] = (((int32_t)((b[21] ) >> 0)) << 0)
+ | (((int32_t)((b[22] ) >> 0)) << 8)
+ | (((int32_t)((b[23] ) >> 0)) << 16)
+ | ((((int32_t)((b[24] & 0xf )) >> 0)) << 24);
+ t[ 7] = (((int32_t)((b[24] ) >> 4)) << 0)
+ | (((int32_t)((b[25] ) >> 0)) << 4)
+ | (((int32_t)((b[26] ) >> 0)) << 12)
+ | (((int32_t)((b[27] ) >> 0)) << 20);
+ t[ 8] = (((int32_t)((b[28] ) >> 0)) << 0)
+ | (((int32_t)((b[29] ) >> 0)) << 8)
+ | (((int32_t)((b[30] ) >> 0)) << 16)
+ | ((((int32_t)((b[31] & 0xf )) >> 0)) << 24);
+ t[ 9] = (((int32_t)((b[31] ) >> 4)) << 0)
+ | (((int32_t)((b[32] ) >> 0)) << 4)
+ | (((int32_t)((b[33] ) >> 0)) << 12)
+ | (((int32_t)((b[34] ) >> 0)) << 20);
+ t[10] = (((int32_t)((b[35] ) >> 0)) << 0)
+ | (((int32_t)((b[36] ) >> 0)) << 8)
+ | (((int32_t)((b[37] ) >> 0)) << 16)
+ | ((((int32_t)((b[38] & 0xf )) >> 0)) << 24);
+ t[11] = (((int32_t)((b[38] ) >> 4)) << 0)
+ | (((int32_t)((b[39] ) >> 0)) << 4)
+ | (((int32_t)((b[40] ) >> 0)) << 12)
+ | (((int32_t)((b[41] ) >> 0)) << 20);
+ t[12] = (((int32_t)((b[42] ) >> 0)) << 0)
+ | (((int32_t)((b[43] ) >> 0)) << 8)
+ | (((int32_t)((b[44] ) >> 0)) << 16)
+ | ((((int32_t)((b[45] & 0xf )) >> 0)) << 24);
+ t[13] = (((int32_t)((b[45] ) >> 4)) << 0)
+ | (((int32_t)((b[46] ) >> 0)) << 4)
+ | (((int32_t)((b[47] ) >> 0)) << 12)
+ | (((int32_t)((b[48] ) >> 0)) << 20);
+ t[14] = (((int32_t)((b[49] ) >> 0)) << 0)
+ | (((int32_t)((b[50] ) >> 0)) << 8)
+ | (((int32_t)((b[51] ) >> 0)) << 16)
+ | ((((int32_t)((b[52] & 0xf )) >> 0)) << 24);
+ t[15] = (((int32_t)((b[52] ) >> 4)) << 0)
+ | (((int32_t)((b[53] ) >> 0)) << 4)
+ | (((int32_t)((b[54] ) >> 0)) << 12)
+ | (((int32_t)((b[55] ) >> 0)) << 20);
+ t[16] = (((int32_t)((b[56] ) >> 0)) << 0)
+ | (((int32_t)((b[57] ) >> 0)) << 8)
+ | (((int32_t)((b[58] ) >> 0)) << 16)
+ | ((((int32_t)((b[59] & 0xf )) >> 0)) << 24);
+ t[17] = (((int32_t)((b[59] ) >> 4)) << 0)
+ | (((int32_t)((b[60] ) >> 0)) << 4)
+ | (((int32_t)((b[61] ) >> 0)) << 12)
+ | (((int32_t)((b[62] ) >> 0)) << 20);
+ t[18] = (((int32_t)((b[63] ) >> 0)) << 0)
+ | (((int32_t)((b[64] ) >> 0)) << 8)
+ | (((int32_t)((b[65] ) >> 0)) << 16)
+ | ((((int32_t)((b[66] & 0xf )) >> 0)) << 24);
+ t[19] = (((int32_t)((b[66] ) >> 4)) << 0)
+ | (((int32_t)((b[67] ) >> 0)) << 4)
+ | (((int32_t)((b[68] ) >> 0)) << 12)
+ | (((int32_t)((b[69] ) >> 0)) << 20);
+ t[20] = (((int32_t)((b[70] ) >> 0)) << 0)
+ | (((int32_t)((b[71] ) >> 0)) << 8)
+ | (((int32_t)((b[72] ) >> 0)) << 16)
+ | ((((int32_t)((b[73] & 0xf )) >> 0)) << 24);
+ t[21] = (((int32_t)((b[73] ) >> 4)) << 0)
+ | (((int32_t)((b[74] ) >> 0)) << 4)
+ | (((int32_t)((b[75] ) >> 0)) << 12)
+ | (((int32_t)((b[76] ) >> 0)) << 20);
+ t[22] = (((int32_t)((b[77] ) >> 0)) << 0)
+ | (((int32_t)((b[78] ) >> 0)) << 8)
+ | (((int32_t)((b[79] ) >> 0)) << 16)
+ | ((((int32_t)((b[80] & 0xf )) >> 0)) << 24);
+ t[23] = (((int32_t)((b[80] ) >> 4)) << 0)
+ | (((int32_t)((b[81] ) >> 0)) << 4)
+ | (((int32_t)((b[82] ) >> 0)) << 12)
+ | (((int32_t)((b[83] ) >> 0)) << 20);
+ t[24] = (((int32_t)((b[84] ) >> 0)) << 0)
+ | (((int32_t)((b[85] ) >> 0)) << 8)
+ | (((int32_t)((b[86] ) >> 0)) << 16)
+ | ((((int32_t)((b[87] & 0xf )) >> 0)) << 24);
+ t[25] = (((int32_t)((b[87] ) >> 4)) << 0)
+ | (((int32_t)((b[88] ) >> 0)) << 4)
+ | (((int32_t)((b[89] ) >> 0)) << 12)
+ | (((int32_t)((b[90] ) >> 0)) << 20);
+ t[26] = (((int32_t)((b[91] ) >> 0)) << 0)
+ | (((int32_t)((b[92] ) >> 0)) << 8)
+ | (((int32_t)((b[93] ) >> 0)) << 16)
+ | ((((int32_t)((b[94] & 0xf )) >> 0)) << 24);
+ t[27] = (((int32_t)((b[94] ) >> 4)) << 0)
+ | (((int32_t)((b[95] ) >> 0)) << 4)
+ | (((int32_t)((b[96] ) >> 0)) << 12)
+ | (((int32_t)((b[97] ) >> 0)) << 20);
+ t[28] = (((int32_t)((b[98] ) >> 0)) << 0)
+ | (((int32_t)((b[99] ) >> 0)) << 8)
+ | (((int32_t)((b[100] ) >> 0)) << 16)
+ | ((((int32_t)((b[101] & 0xf )) >> 0)) << 24);
+ t[29] = (((int32_t)((b[101] ) >> 4)) << 0)
+ | (((int32_t)((b[102] ) >> 0)) << 4)
+ | (((int32_t)((b[103] ) >> 0)) << 12)
+ | (((int32_t)((b[104] ) >> 0)) << 20);
+ t[30] = (((int32_t)((b[105] ) >> 0)) << 0)
+ | (((int32_t)((b[106] ) >> 0)) << 8)
+ | (((int32_t)((b[107] ) >> 0)) << 16)
+ | ((((int32_t)((b[108] & 0xf )) >> 0)) << 24);
+ t[31] = (((int32_t)((b[108] ) >> 4)) << 0)
+ | (((int32_t)((b[109] ) >> 0)) << 4)
+ | (((int32_t)((b[110] ) >> 0)) << 12)
+ | (((int32_t)((b[111] ) >> 0)) << 20);
+ t[32] = (((int32_t)((b[112] ) >> 0)) << 0)
+ | (((int32_t)((b[113] ) >> 0)) << 8);
+
+ /* Mod curve order */
+ /* 2^446 - 0x8335dc163bb124b65129c96fde933d8d723a70aadc873d6d54a7bb0d */
+ /* Mod top half of extra words */
+ t[ 8] += (int64_t)0x129eec34 * t[24];
+ t[ 9] += (int64_t)0x21cf5b54 * t[24];
+ t[10] += (int64_t)0x29c2ab70 * t[24];
+ t[11] += (int64_t)0x0f635c8c * t[24];
+ t[12] += (int64_t)0x25bf7a4c * t[24];
+ t[13] += (int64_t)0x2d944a70 * t[24];
+ t[14] += (int64_t)0x18eec490 * t[24];
+ t[15] += (int64_t)0x20cd7704 * t[24];
+ t[ 9] += (int64_t)0x129eec34 * t[25];
+ t[10] += (int64_t)0x21cf5b54 * t[25];
+ t[11] += (int64_t)0x29c2ab70 * t[25];
+ t[12] += (int64_t)0x0f635c8c * t[25];
+ t[13] += (int64_t)0x25bf7a4c * t[25];
+ t[14] += (int64_t)0x2d944a70 * t[25];
+ t[15] += (int64_t)0x18eec490 * t[25];
+ t[16] += (int64_t)0x20cd7704 * t[25];
+ t[10] += (int64_t)0x129eec34 * t[26];
+ t[11] += (int64_t)0x21cf5b54 * t[26];
+ t[12] += (int64_t)0x29c2ab70 * t[26];
+ t[13] += (int64_t)0x0f635c8c * t[26];
+ t[14] += (int64_t)0x25bf7a4c * t[26];
+ t[15] += (int64_t)0x2d944a70 * t[26];
+ t[16] += (int64_t)0x18eec490 * t[26];
+ t[17] += (int64_t)0x20cd7704 * t[26];
+ t[11] += (int64_t)0x129eec34 * t[27];
+ t[12] += (int64_t)0x21cf5b54 * t[27];
+ t[13] += (int64_t)0x29c2ab70 * t[27];
+ t[14] += (int64_t)0x0f635c8c * t[27];
+ t[15] += (int64_t)0x25bf7a4c * t[27];
+ t[16] += (int64_t)0x2d944a70 * t[27];
+ t[17] += (int64_t)0x18eec490 * t[27];
+ t[18] += (int64_t)0x20cd7704 * t[27];
+ t[12] += (int64_t)0x129eec34 * t[28];
+ t[13] += (int64_t)0x21cf5b54 * t[28];
+ t[14] += (int64_t)0x29c2ab70 * t[28];
+ t[15] += (int64_t)0x0f635c8c * t[28];
+ t[16] += (int64_t)0x25bf7a4c * t[28];
+ t[17] += (int64_t)0x2d944a70 * t[28];
+ t[18] += (int64_t)0x18eec490 * t[28];
+ t[19] += (int64_t)0x20cd7704 * t[28];
+ t[13] += (int64_t)0x129eec34 * t[29];
+ t[14] += (int64_t)0x21cf5b54 * t[29];
+ t[15] += (int64_t)0x29c2ab70 * t[29];
+ t[16] += (int64_t)0x0f635c8c * t[29];
+ t[17] += (int64_t)0x25bf7a4c * t[29];
+ t[18] += (int64_t)0x2d944a70 * t[29];
+ t[19] += (int64_t)0x18eec490 * t[29];
+ t[20] += (int64_t)0x20cd7704 * t[29];
+ t[14] += (int64_t)0x129eec34 * t[30];
+ t[15] += (int64_t)0x21cf5b54 * t[30];
+ t[16] += (int64_t)0x29c2ab70 * t[30];
+ t[17] += (int64_t)0x0f635c8c * t[30];
+ t[18] += (int64_t)0x25bf7a4c * t[30];
+ t[19] += (int64_t)0x2d944a70 * t[30];
+ t[20] += (int64_t)0x18eec490 * t[30];
+ t[21] += (int64_t)0x20cd7704 * t[30];
+ t[15] += (int64_t)0x129eec34 * t[31];
+ t[16] += (int64_t)0x21cf5b54 * t[31];
+ t[17] += (int64_t)0x29c2ab70 * t[31];
+ t[18] += (int64_t)0x0f635c8c * t[31];
+ t[19] += (int64_t)0x25bf7a4c * t[31];
+ t[20] += (int64_t)0x2d944a70 * t[31];
+ t[21] += (int64_t)0x18eec490 * t[31];
+ t[22] += (int64_t)0x20cd7704 * t[31];
+ t[16] += (int64_t)0x129eec34 * t[32];
+ t[17] += (int64_t)0x21cf5b54 * t[32];
+ t[18] += (int64_t)0x29c2ab70 * t[32];
+ t[19] += (int64_t)0x0f635c8c * t[32];
+ t[20] += (int64_t)0x25bf7a4c * t[32];
+ t[21] += (int64_t)0x2d944a70 * t[32];
+ t[22] += (int64_t)0x18eec490 * t[32];
+ t[23] += (int64_t)0x20cd7704 * t[32];
+ t[24] = 0;
+ /* Propagate carries */
+ c = t[ 8] >> 28; t[ 9] += c; t[ 8] = t[ 8] & 0xfffffff;
+ c = t[ 9] >> 28; t[10] += c; t[ 9] = t[ 9] & 0xfffffff;
+ c = t[10] >> 28; t[11] += c; t[10] = t[10] & 0xfffffff;
+ c = t[11] >> 28; t[12] += c; t[11] = t[11] & 0xfffffff;
+ c = t[12] >> 28; t[13] += c; t[12] = t[12] & 0xfffffff;
+ c = t[13] >> 28; t[14] += c; t[13] = t[13] & 0xfffffff;
+ c = t[14] >> 28; t[15] += c; t[14] = t[14] & 0xfffffff;
+ c = t[15] >> 28; t[16] += c; t[15] = t[15] & 0xfffffff;
+ c = t[16] >> 28; t[17] += c; t[16] = t[16] & 0xfffffff;
+ c = t[17] >> 28; t[18] += c; t[17] = t[17] & 0xfffffff;
+ c = t[18] >> 28; t[19] += c; t[18] = t[18] & 0xfffffff;
+ c = t[19] >> 28; t[20] += c; t[19] = t[19] & 0xfffffff;
+ c = t[20] >> 28; t[21] += c; t[20] = t[20] & 0xfffffff;
+ c = t[21] >> 28; t[22] += c; t[21] = t[21] & 0xfffffff;
+ c = t[22] >> 28; t[23] += c; t[22] = t[22] & 0xfffffff;
+ c = t[23] >> 28; t[24] += c; t[23] = t[23] & 0xfffffff;
+ /* Mod bottom half of extra words */
+ t[ 0] += (int64_t)0x129eec34 * t[16];
+ t[ 1] += (int64_t)0x21cf5b54 * t[16];
+ t[ 2] += (int64_t)0x29c2ab70 * t[16];
+ t[ 3] += (int64_t)0x0f635c8c * t[16];
+ t[ 4] += (int64_t)0x25bf7a4c * t[16];
+ t[ 5] += (int64_t)0x2d944a70 * t[16];
+ t[ 6] += (int64_t)0x18eec490 * t[16];
+ t[ 7] += (int64_t)0x20cd7704 * t[16];
+ t[ 1] += (int64_t)0x129eec34 * t[17];
+ t[ 2] += (int64_t)0x21cf5b54 * t[17];
+ t[ 3] += (int64_t)0x29c2ab70 * t[17];
+ t[ 4] += (int64_t)0x0f635c8c * t[17];
+ t[ 5] += (int64_t)0x25bf7a4c * t[17];
+ t[ 6] += (int64_t)0x2d944a70 * t[17];
+ t[ 7] += (int64_t)0x18eec490 * t[17];
+ t[ 8] += (int64_t)0x20cd7704 * t[17];
+ t[ 2] += (int64_t)0x129eec34 * t[18];
+ t[ 3] += (int64_t)0x21cf5b54 * t[18];
+ t[ 4] += (int64_t)0x29c2ab70 * t[18];
+ t[ 5] += (int64_t)0x0f635c8c * t[18];
+ t[ 6] += (int64_t)0x25bf7a4c * t[18];
+ t[ 7] += (int64_t)0x2d944a70 * t[18];
+ t[ 8] += (int64_t)0x18eec490 * t[18];
+ t[ 9] += (int64_t)0x20cd7704 * t[18];
+ t[ 3] += (int64_t)0x129eec34 * t[19];
+ t[ 4] += (int64_t)0x21cf5b54 * t[19];
+ t[ 5] += (int64_t)0x29c2ab70 * t[19];
+ t[ 6] += (int64_t)0x0f635c8c * t[19];
+ t[ 7] += (int64_t)0x25bf7a4c * t[19];
+ t[ 8] += (int64_t)0x2d944a70 * t[19];
+ t[ 9] += (int64_t)0x18eec490 * t[19];
+ t[10] += (int64_t)0x20cd7704 * t[19];
+ t[ 4] += (int64_t)0x129eec34 * t[20];
+ t[ 5] += (int64_t)0x21cf5b54 * t[20];
+ t[ 6] += (int64_t)0x29c2ab70 * t[20];
+ t[ 7] += (int64_t)0x0f635c8c * t[20];
+ t[ 8] += (int64_t)0x25bf7a4c * t[20];
+ t[ 9] += (int64_t)0x2d944a70 * t[20];
+ t[10] += (int64_t)0x18eec490 * t[20];
+ t[11] += (int64_t)0x20cd7704 * t[20];
+ t[ 5] += (int64_t)0x129eec34 * t[21];
+ t[ 6] += (int64_t)0x21cf5b54 * t[21];
+ t[ 7] += (int64_t)0x29c2ab70 * t[21];
+ t[ 8] += (int64_t)0x0f635c8c * t[21];
+ t[ 9] += (int64_t)0x25bf7a4c * t[21];
+ t[10] += (int64_t)0x2d944a70 * t[21];
+ t[11] += (int64_t)0x18eec490 * t[21];
+ t[12] += (int64_t)0x20cd7704 * t[21];
+ t[ 6] += (int64_t)0x129eec34 * t[22];
+ t[ 7] += (int64_t)0x21cf5b54 * t[22];
+ t[ 8] += (int64_t)0x29c2ab70 * t[22];
+ t[ 9] += (int64_t)0x0f635c8c * t[22];
+ t[10] += (int64_t)0x25bf7a4c * t[22];
+ t[11] += (int64_t)0x2d944a70 * t[22];
+ t[12] += (int64_t)0x18eec490 * t[22];
+ t[13] += (int64_t)0x20cd7704 * t[22];
+ t[ 7] += (int64_t)0x129eec34 * t[23];
+ t[ 8] += (int64_t)0x21cf5b54 * t[23];
+ t[ 9] += (int64_t)0x29c2ab70 * t[23];
+ t[10] += (int64_t)0x0f635c8c * t[23];
+ t[11] += (int64_t)0x25bf7a4c * t[23];
+ t[12] += (int64_t)0x2d944a70 * t[23];
+ t[13] += (int64_t)0x18eec490 * t[23];
+ t[14] += (int64_t)0x20cd7704 * t[23];
+ t[ 8] += (int64_t)0x129eec34 * t[24];
+ t[ 9] += (int64_t)0x21cf5b54 * t[24];
+ t[10] += (int64_t)0x29c2ab70 * t[24];
+ t[11] += (int64_t)0x0f635c8c * t[24];
+ t[12] += (int64_t)0x25bf7a4c * t[24];
+ t[13] += (int64_t)0x2d944a70 * t[24];
+ t[14] += (int64_t)0x18eec490 * t[24];
+ t[15] += (int64_t)0x20cd7704 * t[24];
+ t[16] = 0;
+ /* Propagate carries */
+ c = t[ 0] >> 28; t[ 1] += c; t[ 0] = t[ 0] & 0xfffffff;
+ c = t[ 1] >> 28; t[ 2] += c; t[ 1] = t[ 1] & 0xfffffff;
+ c = t[ 2] >> 28; t[ 3] += c; t[ 2] = t[ 2] & 0xfffffff;
+ c = t[ 3] >> 28; t[ 4] += c; t[ 3] = t[ 3] & 0xfffffff;
+ c = t[ 4] >> 28; t[ 5] += c; t[ 4] = t[ 4] & 0xfffffff;
+ c = t[ 5] >> 28; t[ 6] += c; t[ 5] = t[ 5] & 0xfffffff;
+ c = t[ 6] >> 28; t[ 7] += c; t[ 6] = t[ 6] & 0xfffffff;
+ c = t[ 7] >> 28; t[ 8] += c; t[ 7] = t[ 7] & 0xfffffff;
+ c = t[ 8] >> 28; t[ 9] += c; t[ 8] = t[ 8] & 0xfffffff;
+ c = t[ 9] >> 28; t[10] += c; t[ 9] = t[ 9] & 0xfffffff;
+ c = t[10] >> 28; t[11] += c; t[10] = t[10] & 0xfffffff;
+ c = t[11] >> 28; t[12] += c; t[11] = t[11] & 0xfffffff;
+ c = t[12] >> 28; t[13] += c; t[12] = t[12] & 0xfffffff;
+ c = t[13] >> 28; t[14] += c; t[13] = t[13] & 0xfffffff;
+ c = t[14] >> 28; t[15] += c; t[14] = t[14] & 0xfffffff;
+ c = t[15] >> 28; t[16] += c; t[15] = t[15] & 0xfffffff;
+ t[ 0] += (int64_t)0x129eec34 * t[16];
+ t[ 1] += (int64_t)0x21cf5b54 * t[16];
+ t[ 2] += (int64_t)0x29c2ab70 * t[16];
+ t[ 3] += (int64_t)0x0f635c8c * t[16];
+ t[ 4] += (int64_t)0x25bf7a4c * t[16];
+ t[ 5] += (int64_t)0x2d944a70 * t[16];
+ t[ 6] += (int64_t)0x18eec490 * t[16];
+ t[ 7] += (int64_t)0x20cd7704 * t[16];
+ /* Propagate carries */
+ c = t[ 0] >> 28; t[ 1] += c; d[ 0] = (int32_t)(t[ 0] & 0xfffffff);
+ c = t[ 1] >> 28; t[ 2] += c; d[ 1] = (int32_t)(t[ 1] & 0xfffffff);
+ c = t[ 2] >> 28; t[ 3] += c; d[ 2] = (int32_t)(t[ 2] & 0xfffffff);
+ c = t[ 3] >> 28; t[ 4] += c; d[ 3] = (int32_t)(t[ 3] & 0xfffffff);
+ c = t[ 4] >> 28; t[ 5] += c; d[ 4] = (int32_t)(t[ 4] & 0xfffffff);
+ c = t[ 5] >> 28; t[ 6] += c; d[ 5] = (int32_t)(t[ 5] & 0xfffffff);
+ c = t[ 6] >> 28; t[ 7] += c; d[ 6] = (int32_t)(t[ 6] & 0xfffffff);
+ c = t[ 7] >> 28; t[ 8] += c; d[ 7] = (int32_t)(t[ 7] & 0xfffffff);
+ c = t[ 8] >> 28; t[ 9] += c; d[ 8] = (int32_t)(t[ 8] & 0xfffffff);
+ c = t[ 9] >> 28; t[10] += c; d[ 9] = (int32_t)(t[ 9] & 0xfffffff);
+ c = t[10] >> 28; t[11] += c; d[10] = (int32_t)(t[10] & 0xfffffff);
+ c = t[11] >> 28; t[12] += c; d[11] = (int32_t)(t[11] & 0xfffffff);
+ c = t[12] >> 28; t[13] += c; d[12] = (int32_t)(t[12] & 0xfffffff);
+ c = t[13] >> 28; t[14] += c; d[13] = (int32_t)(t[13] & 0xfffffff);
+ c = t[14] >> 28; t[15] += c; d[14] = (int32_t)(t[14] & 0xfffffff);
+ d[15] = t[15];
+ /* Mod bits over 28 in last word */
+ o = d[15] >> 26; d[15] &= 0x3ffffff;
+ d[ 0] += 0x4a7bb0d * o;
+ d[ 1] += 0x873d6d5 * o;
+ d[ 2] += 0xa70aadc * o;
+ d[ 3] += 0x3d8d723 * o;
+ d[ 4] += 0x96fde93 * o;
+ d[ 5] += 0xb65129c * o;
+ d[ 6] += 0x63bb124 * o;
+ d[ 7] += 0x8335dc1 * o;
+ /* Propagate carries */
+ o = d[ 0] >> 28; d[ 1] += o; d[ 0] = d[ 0] & 0xfffffff;
+ o = d[ 1] >> 28; d[ 2] += o; d[ 1] = d[ 1] & 0xfffffff;
+ o = d[ 2] >> 28; d[ 3] += o; d[ 2] = d[ 2] & 0xfffffff;
+ o = d[ 3] >> 28; d[ 4] += o; d[ 3] = d[ 3] & 0xfffffff;
+ o = d[ 4] >> 28; d[ 5] += o; d[ 4] = d[ 4] & 0xfffffff;
+ o = d[ 5] >> 28; d[ 6] += o; d[ 5] = d[ 5] & 0xfffffff;
+ o = d[ 6] >> 28; d[ 7] += o; d[ 6] = d[ 6] & 0xfffffff;
+ o = d[ 7] >> 28; d[ 8] += o; d[ 7] = d[ 7] & 0xfffffff;
+ o = d[ 8] >> 28; d[ 9] += o; d[ 8] = d[ 8] & 0xfffffff;
+ o = d[ 9] >> 28; d[10] += o; d[ 9] = d[ 9] & 0xfffffff;
+ o = d[10] >> 28; d[11] += o; d[10] = d[10] & 0xfffffff;
+ o = d[11] >> 28; d[12] += o; d[11] = d[11] & 0xfffffff;
+ o = d[12] >> 28; d[13] += o; d[12] = d[12] & 0xfffffff;
+ o = d[13] >> 28; d[14] += o; d[13] = d[13] & 0xfffffff;
+ o = d[14] >> 28; d[15] += o; d[14] = d[14] & 0xfffffff;
+
+ /* Convert to bytes */
+ b[ 0] = (d[0 ] >> 0);
+ b[ 1] = (d[0 ] >> 8);
+ b[ 2] = (d[0 ] >> 16);
+ b[ 3] = (d[0 ] >> 24) + ((d[1 ] >> 0) << 4);
+ b[ 4] = (d[1 ] >> 4);
+ b[ 5] = (d[1 ] >> 12);
+ b[ 6] = (d[1 ] >> 20);
+ b[ 7] = (d[2 ] >> 0);
+ b[ 8] = (d[2 ] >> 8);
+ b[ 9] = (d[2 ] >> 16);
+ b[10] = (d[2 ] >> 24) + ((d[3 ] >> 0) << 4);
+ b[11] = (d[3 ] >> 4);
+ b[12] = (d[3 ] >> 12);
+ b[13] = (d[3 ] >> 20);
+ b[14] = (d[4 ] >> 0);
+ b[15] = (d[4 ] >> 8);
+ b[16] = (d[4 ] >> 16);
+ b[17] = (d[4 ] >> 24) + ((d[5 ] >> 0) << 4);
+ b[18] = (d[5 ] >> 4);
+ b[19] = (d[5 ] >> 12);
+ b[20] = (d[5 ] >> 20);
+ b[21] = (d[6 ] >> 0);
+ b[22] = (d[6 ] >> 8);
+ b[23] = (d[6 ] >> 16);
+ b[24] = (d[6 ] >> 24) + ((d[7 ] >> 0) << 4);
+ b[25] = (d[7 ] >> 4);
+ b[26] = (d[7 ] >> 12);
+ b[27] = (d[7 ] >> 20);
+ b[28] = (d[8 ] >> 0);
+ b[29] = (d[8 ] >> 8);
+ b[30] = (d[8 ] >> 16);
+ b[31] = (d[8 ] >> 24) + ((d[9 ] >> 0) << 4);
+ b[32] = (d[9 ] >> 4);
+ b[33] = (d[9 ] >> 12);
+ b[34] = (d[9 ] >> 20);
+ b[35] = (d[10] >> 0);
+ b[36] = (d[10] >> 8);
+ b[37] = (d[10] >> 16);
+ b[38] = (d[10] >> 24) + ((d[11] >> 0) << 4);
+ b[39] = (d[11] >> 4);
+ b[40] = (d[11] >> 12);
+ b[41] = (d[11] >> 20);
+ b[42] = (d[12] >> 0);
+ b[43] = (d[12] >> 8);
+ b[44] = (d[12] >> 16);
+ b[45] = (d[12] >> 24) + ((d[13] >> 0) << 4);
+ b[46] = (d[13] >> 4);
+ b[47] = (d[13] >> 12);
+ b[48] = (d[13] >> 20);
+ b[49] = (d[14] >> 0);
+ b[50] = (d[14] >> 8);
+ b[51] = (d[14] >> 16);
+ b[52] = (d[14] >> 24) + ((d[15] >> 0) << 4);
+ b[53] = (d[15] >> 4);
+ b[54] = (d[15] >> 12);
+ b[55] = (d[15] >> 20);
+ b[56] = 0;
+}
+
+/* Multiply a by b and add d. r = (a * b + d) mod order
+ *
+ * r [in] Scalar to hold result.
+ * a [in] Scalar to multiply.
+ * b [in] Scalar to multiply.
+ * d [in] Scalar to add to multiplicative result.
+ */
+void sc448_muladd(uint8_t* r, const uint8_t* a, const uint8_t* b,
+ const uint8_t* d)
+{
+ uint32_t ad[16], bd[16], dd[16], rd[16];
+ uint64_t t[32];
+ uint64_t c;
+ uint32_t o;
+
+ /* Load from bytes */
+ ad[ 0] = (((int32_t)((a[ 0] ) >> 0)) << 0)
+ | (((int32_t)((a[ 1] ) >> 0)) << 8)
+ | (((int32_t)((a[ 2] ) >> 0)) << 16)
+ | ((((int32_t)((a[ 3] & 0xf )) >> 0)) << 24);
+ ad[ 1] = (((int32_t)((a[ 3] ) >> 4)) << 0)
+ | (((int32_t)((a[ 4] ) >> 0)) << 4)
+ | (((int32_t)((a[ 5] ) >> 0)) << 12)
+ | (((int32_t)((a[ 6] ) >> 0)) << 20);
+ ad[ 2] = (((int32_t)((a[ 7] ) >> 0)) << 0)
+ | (((int32_t)((a[ 8] ) >> 0)) << 8)
+ | (((int32_t)((a[ 9] ) >> 0)) << 16)
+ | ((((int32_t)((a[10] & 0xf )) >> 0)) << 24);
+ ad[ 3] = (((int32_t)((a[10] ) >> 4)) << 0)
+ | (((int32_t)((a[11] ) >> 0)) << 4)
+ | (((int32_t)((a[12] ) >> 0)) << 12)
+ | (((int32_t)((a[13] ) >> 0)) << 20);
+ ad[ 4] = (((int32_t)((a[14] ) >> 0)) << 0)
+ | (((int32_t)((a[15] ) >> 0)) << 8)
+ | (((int32_t)((a[16] ) >> 0)) << 16)
+ | ((((int32_t)((a[17] & 0xf )) >> 0)) << 24);
+ ad[ 5] = (((int32_t)((a[17] ) >> 4)) << 0)
+ | (((int32_t)((a[18] ) >> 0)) << 4)
+ | (((int32_t)((a[19] ) >> 0)) << 12)
+ | (((int32_t)((a[20] ) >> 0)) << 20);
+ ad[ 6] = (((int32_t)((a[21] ) >> 0)) << 0)
+ | (((int32_t)((a[22] ) >> 0)) << 8)
+ | (((int32_t)((a[23] ) >> 0)) << 16)
+ | ((((int32_t)((a[24] & 0xf )) >> 0)) << 24);
+ ad[ 7] = (((int32_t)((a[24] ) >> 4)) << 0)
+ | (((int32_t)((a[25] ) >> 0)) << 4)
+ | (((int32_t)((a[26] ) >> 0)) << 12)
+ | (((int32_t)((a[27] ) >> 0)) << 20);
+ ad[ 8] = (((int32_t)((a[28] ) >> 0)) << 0)
+ | (((int32_t)((a[29] ) >> 0)) << 8)
+ | (((int32_t)((a[30] ) >> 0)) << 16)
+ | ((((int32_t)((a[31] & 0xf )) >> 0)) << 24);
+ ad[ 9] = (((int32_t)((a[31] ) >> 4)) << 0)
+ | (((int32_t)((a[32] ) >> 0)) << 4)
+ | (((int32_t)((a[33] ) >> 0)) << 12)
+ | (((int32_t)((a[34] ) >> 0)) << 20);
+ ad[10] = (((int32_t)((a[35] ) >> 0)) << 0)
+ | (((int32_t)((a[36] ) >> 0)) << 8)
+ | (((int32_t)((a[37] ) >> 0)) << 16)
+ | ((((int32_t)((a[38] & 0xf )) >> 0)) << 24);
+ ad[11] = (((int32_t)((a[38] ) >> 4)) << 0)
+ | (((int32_t)((a[39] ) >> 0)) << 4)
+ | (((int32_t)((a[40] ) >> 0)) << 12)
+ | (((int32_t)((a[41] ) >> 0)) << 20);
+ ad[12] = (((int32_t)((a[42] ) >> 0)) << 0)
+ | (((int32_t)((a[43] ) >> 0)) << 8)
+ | (((int32_t)((a[44] ) >> 0)) << 16)
+ | ((((int32_t)((a[45] & 0xf )) >> 0)) << 24);
+ ad[13] = (((int32_t)((a[45] ) >> 4)) << 0)
+ | (((int32_t)((a[46] ) >> 0)) << 4)
+ | (((int32_t)((a[47] ) >> 0)) << 12)
+ | (((int32_t)((a[48] ) >> 0)) << 20);
+ ad[14] = (((int32_t)((a[49] ) >> 0)) << 0)
+ | (((int32_t)((a[50] ) >> 0)) << 8)
+ | (((int32_t)((a[51] ) >> 0)) << 16)
+ | ((((int32_t)((a[52] & 0xf )) >> 0)) << 24);
+ ad[15] = (((int32_t)((a[52] ) >> 4)) << 0)
+ | (((int32_t)((a[53] ) >> 0)) << 4)
+ | (((int32_t)((a[54] ) >> 0)) << 12)
+ | (((int32_t)((a[55] ) >> 0)) << 20);
+ /* Load from bytes */
+ bd[ 0] = (((int32_t)((b[ 0] ) >> 0)) << 0)
+ | (((int32_t)((b[ 1] ) >> 0)) << 8)
+ | (((int32_t)((b[ 2] ) >> 0)) << 16)
+ | ((((int32_t)((b[ 3] & 0xf )) >> 0)) << 24);
+ bd[ 1] = (((int32_t)((b[ 3] ) >> 4)) << 0)
+ | (((int32_t)((b[ 4] ) >> 0)) << 4)
+ | (((int32_t)((b[ 5] ) >> 0)) << 12)
+ | (((int32_t)((b[ 6] ) >> 0)) << 20);
+ bd[ 2] = (((int32_t)((b[ 7] ) >> 0)) << 0)
+ | (((int32_t)((b[ 8] ) >> 0)) << 8)
+ | (((int32_t)((b[ 9] ) >> 0)) << 16)
+ | ((((int32_t)((b[10] & 0xf )) >> 0)) << 24);
+ bd[ 3] = (((int32_t)((b[10] ) >> 4)) << 0)
+ | (((int32_t)((b[11] ) >> 0)) << 4)
+ | (((int32_t)((b[12] ) >> 0)) << 12)
+ | (((int32_t)((b[13] ) >> 0)) << 20);
+ bd[ 4] = (((int32_t)((b[14] ) >> 0)) << 0)
+ | (((int32_t)((b[15] ) >> 0)) << 8)
+ | (((int32_t)((b[16] ) >> 0)) << 16)
+ | ((((int32_t)((b[17] & 0xf )) >> 0)) << 24);
+ bd[ 5] = (((int32_t)((b[17] ) >> 4)) << 0)
+ | (((int32_t)((b[18] ) >> 0)) << 4)
+ | (((int32_t)((b[19] ) >> 0)) << 12)
+ | (((int32_t)((b[20] ) >> 0)) << 20);
+ bd[ 6] = (((int32_t)((b[21] ) >> 0)) << 0)
+ | (((int32_t)((b[22] ) >> 0)) << 8)
+ | (((int32_t)((b[23] ) >> 0)) << 16)
+ | ((((int32_t)((b[24] & 0xf )) >> 0)) << 24);
+ bd[ 7] = (((int32_t)((b[24] ) >> 4)) << 0)
+ | (((int32_t)((b[25] ) >> 0)) << 4)
+ | (((int32_t)((b[26] ) >> 0)) << 12)
+ | (((int32_t)((b[27] ) >> 0)) << 20);
+ bd[ 8] = (((int32_t)((b[28] ) >> 0)) << 0)
+ | (((int32_t)((b[29] ) >> 0)) << 8)
+ | (((int32_t)((b[30] ) >> 0)) << 16)
+ | ((((int32_t)((b[31] & 0xf )) >> 0)) << 24);
+ bd[ 9] = (((int32_t)((b[31] ) >> 4)) << 0)
+ | (((int32_t)((b[32] ) >> 0)) << 4)
+ | (((int32_t)((b[33] ) >> 0)) << 12)
+ | (((int32_t)((b[34] ) >> 0)) << 20);
+ bd[10] = (((int32_t)((b[35] ) >> 0)) << 0)
+ | (((int32_t)((b[36] ) >> 0)) << 8)
+ | (((int32_t)((b[37] ) >> 0)) << 16)
+ | ((((int32_t)((b[38] & 0xf )) >> 0)) << 24);
+ bd[11] = (((int32_t)((b[38] ) >> 4)) << 0)
+ | (((int32_t)((b[39] ) >> 0)) << 4)
+ | (((int32_t)((b[40] ) >> 0)) << 12)
+ | (((int32_t)((b[41] ) >> 0)) << 20);
+ bd[12] = (((int32_t)((b[42] ) >> 0)) << 0)
+ | (((int32_t)((b[43] ) >> 0)) << 8)
+ | (((int32_t)((b[44] ) >> 0)) << 16)
+ | ((((int32_t)((b[45] & 0xf )) >> 0)) << 24);
+ bd[13] = (((int32_t)((b[45] ) >> 4)) << 0)
+ | (((int32_t)((b[46] ) >> 0)) << 4)
+ | (((int32_t)((b[47] ) >> 0)) << 12)
+ | (((int32_t)((b[48] ) >> 0)) << 20);
+ bd[14] = (((int32_t)((b[49] ) >> 0)) << 0)
+ | (((int32_t)((b[50] ) >> 0)) << 8)
+ | (((int32_t)((b[51] ) >> 0)) << 16)
+ | ((((int32_t)((b[52] & 0xf )) >> 0)) << 24);
+ bd[15] = (((int32_t)((b[52] ) >> 4)) << 0)
+ | (((int32_t)((b[53] ) >> 0)) << 4)
+ | (((int32_t)((b[54] ) >> 0)) << 12)
+ | (((int32_t)((b[55] ) >> 0)) << 20);
+ /* Load from bytes */
+ dd[ 0] = (((int32_t)((d[ 0] ) >> 0)) << 0)
+ | (((int32_t)((d[ 1] ) >> 0)) << 8)
+ | (((int32_t)((d[ 2] ) >> 0)) << 16)
+ | ((((int32_t)((d[ 3] & 0xf )) >> 0)) << 24);
+ dd[ 1] = (((int32_t)((d[ 3] ) >> 4)) << 0)
+ | (((int32_t)((d[ 4] ) >> 0)) << 4)
+ | (((int32_t)((d[ 5] ) >> 0)) << 12)
+ | (((int32_t)((d[ 6] ) >> 0)) << 20);
+ dd[ 2] = (((int32_t)((d[ 7] ) >> 0)) << 0)
+ | (((int32_t)((d[ 8] ) >> 0)) << 8)
+ | (((int32_t)((d[ 9] ) >> 0)) << 16)
+ | ((((int32_t)((d[10] & 0xf )) >> 0)) << 24);
+ dd[ 3] = (((int32_t)((d[10] ) >> 4)) << 0)
+ | (((int32_t)((d[11] ) >> 0)) << 4)
+ | (((int32_t)((d[12] ) >> 0)) << 12)
+ | (((int32_t)((d[13] ) >> 0)) << 20);
+ dd[ 4] = (((int32_t)((d[14] ) >> 0)) << 0)
+ | (((int32_t)((d[15] ) >> 0)) << 8)
+ | (((int32_t)((d[16] ) >> 0)) << 16)
+ | ((((int32_t)((d[17] & 0xf )) >> 0)) << 24);
+ dd[ 5] = (((int32_t)((d[17] ) >> 4)) << 0)
+ | (((int32_t)((d[18] ) >> 0)) << 4)
+ | (((int32_t)((d[19] ) >> 0)) << 12)
+ | (((int32_t)((d[20] ) >> 0)) << 20);
+ dd[ 6] = (((int32_t)((d[21] ) >> 0)) << 0)
+ | (((int32_t)((d[22] ) >> 0)) << 8)
+ | (((int32_t)((d[23] ) >> 0)) << 16)
+ | ((((int32_t)((d[24] & 0xf )) >> 0)) << 24);
+ dd[ 7] = (((int32_t)((d[24] ) >> 4)) << 0)
+ | (((int32_t)((d[25] ) >> 0)) << 4)
+ | (((int32_t)((d[26] ) >> 0)) << 12)
+ | (((int32_t)((d[27] ) >> 0)) << 20);
+ dd[ 8] = (((int32_t)((d[28] ) >> 0)) << 0)
+ | (((int32_t)((d[29] ) >> 0)) << 8)
+ | (((int32_t)((d[30] ) >> 0)) << 16)
+ | ((((int32_t)((d[31] & 0xf )) >> 0)) << 24);
+ dd[ 9] = (((int32_t)((d[31] ) >> 4)) << 0)
+ | (((int32_t)((d[32] ) >> 0)) << 4)
+ | (((int32_t)((d[33] ) >> 0)) << 12)
+ | (((int32_t)((d[34] ) >> 0)) << 20);
+ dd[10] = (((int32_t)((d[35] ) >> 0)) << 0)
+ | (((int32_t)((d[36] ) >> 0)) << 8)
+ | (((int32_t)((d[37] ) >> 0)) << 16)
+ | ((((int32_t)((d[38] & 0xf )) >> 0)) << 24);
+ dd[11] = (((int32_t)((d[38] ) >> 4)) << 0)
+ | (((int32_t)((d[39] ) >> 0)) << 4)
+ | (((int32_t)((d[40] ) >> 0)) << 12)
+ | (((int32_t)((d[41] ) >> 0)) << 20);
+ dd[12] = (((int32_t)((d[42] ) >> 0)) << 0)
+ | (((int32_t)((d[43] ) >> 0)) << 8)
+ | (((int32_t)((d[44] ) >> 0)) << 16)
+ | ((((int32_t)((d[45] & 0xf )) >> 0)) << 24);
+ dd[13] = (((int32_t)((d[45] ) >> 4)) << 0)
+ | (((int32_t)((d[46] ) >> 0)) << 4)
+ | (((int32_t)((d[47] ) >> 0)) << 12)
+ | (((int32_t)((d[48] ) >> 0)) << 20);
+ dd[14] = (((int32_t)((d[49] ) >> 0)) << 0)
+ | (((int32_t)((d[50] ) >> 0)) << 8)
+ | (((int32_t)((d[51] ) >> 0)) << 16)
+ | ((((int32_t)((d[52] & 0xf )) >> 0)) << 24);
+ dd[15] = (((int32_t)((d[52] ) >> 4)) << 0)
+ | (((int32_t)((d[53] ) >> 0)) << 4)
+ | (((int32_t)((d[54] ) >> 0)) << 12)
+ | (((int32_t)((d[55] ) >> 0)) << 20);
+
+ /* a * b + d */
+ t[ 0] = dd[ 0] + (int64_t)ad[ 0] * bd[ 0];
+ t[ 1] = dd[ 1] + (int64_t)ad[ 0] * bd[ 1]
+ + (int64_t)ad[ 1] * bd[ 0];
+ t[ 2] = dd[ 2] + (int64_t)ad[ 0] * bd[ 2]
+ + (int64_t)ad[ 1] * bd[ 1]
+ + (int64_t)ad[ 2] * bd[ 0];
+ t[ 3] = dd[ 3] + (int64_t)ad[ 0] * bd[ 3]
+ + (int64_t)ad[ 1] * bd[ 2]
+ + (int64_t)ad[ 2] * bd[ 1]
+ + (int64_t)ad[ 3] * bd[ 0];
+ t[ 4] = dd[ 4] + (int64_t)ad[ 0] * bd[ 4]
+ + (int64_t)ad[ 1] * bd[ 3]
+ + (int64_t)ad[ 2] * bd[ 2]
+ + (int64_t)ad[ 3] * bd[ 1]
+ + (int64_t)ad[ 4] * bd[ 0];
+ t[ 5] = dd[ 5] + (int64_t)ad[ 0] * bd[ 5]
+ + (int64_t)ad[ 1] * bd[ 4]
+ + (int64_t)ad[ 2] * bd[ 3]
+ + (int64_t)ad[ 3] * bd[ 2]
+ + (int64_t)ad[ 4] * bd[ 1]
+ + (int64_t)ad[ 5] * bd[ 0];
+ t[ 6] = dd[ 6] + (int64_t)ad[ 0] * bd[ 6]
+ + (int64_t)ad[ 1] * bd[ 5]
+ + (int64_t)ad[ 2] * bd[ 4]
+ + (int64_t)ad[ 3] * bd[ 3]
+ + (int64_t)ad[ 4] * bd[ 2]
+ + (int64_t)ad[ 5] * bd[ 1]
+ + (int64_t)ad[ 6] * bd[ 0];
+ t[ 7] = dd[ 7] + (int64_t)ad[ 0] * bd[ 7]
+ + (int64_t)ad[ 1] * bd[ 6]
+ + (int64_t)ad[ 2] * bd[ 5]
+ + (int64_t)ad[ 3] * bd[ 4]
+ + (int64_t)ad[ 4] * bd[ 3]
+ + (int64_t)ad[ 5] * bd[ 2]
+ + (int64_t)ad[ 6] * bd[ 1]
+ + (int64_t)ad[ 7] * bd[ 0];
+ t[ 8] = dd[ 8] + (int64_t)ad[ 0] * bd[ 8]
+ + (int64_t)ad[ 1] * bd[ 7]
+ + (int64_t)ad[ 2] * bd[ 6]
+ + (int64_t)ad[ 3] * bd[ 5]
+ + (int64_t)ad[ 4] * bd[ 4]
+ + (int64_t)ad[ 5] * bd[ 3]
+ + (int64_t)ad[ 6] * bd[ 2]
+ + (int64_t)ad[ 7] * bd[ 1]
+ + (int64_t)ad[ 8] * bd[ 0];
+ t[ 9] = dd[ 9] + (int64_t)ad[ 0] * bd[ 9]
+ + (int64_t)ad[ 1] * bd[ 8]
+ + (int64_t)ad[ 2] * bd[ 7]
+ + (int64_t)ad[ 3] * bd[ 6]
+ + (int64_t)ad[ 4] * bd[ 5]
+ + (int64_t)ad[ 5] * bd[ 4]
+ + (int64_t)ad[ 6] * bd[ 3]
+ + (int64_t)ad[ 7] * bd[ 2]
+ + (int64_t)ad[ 8] * bd[ 1]
+ + (int64_t)ad[ 9] * bd[ 0];
+ t[10] = dd[10] + (int64_t)ad[ 0] * bd[10]
+ + (int64_t)ad[ 1] * bd[ 9]
+ + (int64_t)ad[ 2] * bd[ 8]
+ + (int64_t)ad[ 3] * bd[ 7]
+ + (int64_t)ad[ 4] * bd[ 6]
+ + (int64_t)ad[ 5] * bd[ 5]
+ + (int64_t)ad[ 6] * bd[ 4]
+ + (int64_t)ad[ 7] * bd[ 3]
+ + (int64_t)ad[ 8] * bd[ 2]
+ + (int64_t)ad[ 9] * bd[ 1]
+ + (int64_t)ad[10] * bd[ 0];
+ t[11] = dd[11] + (int64_t)ad[ 0] * bd[11]
+ + (int64_t)ad[ 1] * bd[10]
+ + (int64_t)ad[ 2] * bd[ 9]
+ + (int64_t)ad[ 3] * bd[ 8]
+ + (int64_t)ad[ 4] * bd[ 7]
+ + (int64_t)ad[ 5] * bd[ 6]
+ + (int64_t)ad[ 6] * bd[ 5]
+ + (int64_t)ad[ 7] * bd[ 4]
+ + (int64_t)ad[ 8] * bd[ 3]
+ + (int64_t)ad[ 9] * bd[ 2]
+ + (int64_t)ad[10] * bd[ 1]
+ + (int64_t)ad[11] * bd[ 0];
+ t[12] = dd[12] + (int64_t)ad[ 0] * bd[12]
+ + (int64_t)ad[ 1] * bd[11]
+ + (int64_t)ad[ 2] * bd[10]
+ + (int64_t)ad[ 3] * bd[ 9]
+ + (int64_t)ad[ 4] * bd[ 8]
+ + (int64_t)ad[ 5] * bd[ 7]
+ + (int64_t)ad[ 6] * bd[ 6]
+ + (int64_t)ad[ 7] * bd[ 5]
+ + (int64_t)ad[ 8] * bd[ 4]
+ + (int64_t)ad[ 9] * bd[ 3]
+ + (int64_t)ad[10] * bd[ 2]
+ + (int64_t)ad[11] * bd[ 1]
+ + (int64_t)ad[12] * bd[ 0];
+ t[13] = dd[13] + (int64_t)ad[ 0] * bd[13]
+ + (int64_t)ad[ 1] * bd[12]
+ + (int64_t)ad[ 2] * bd[11]
+ + (int64_t)ad[ 3] * bd[10]
+ + (int64_t)ad[ 4] * bd[ 9]
+ + (int64_t)ad[ 5] * bd[ 8]
+ + (int64_t)ad[ 6] * bd[ 7]
+ + (int64_t)ad[ 7] * bd[ 6]
+ + (int64_t)ad[ 8] * bd[ 5]
+ + (int64_t)ad[ 9] * bd[ 4]
+ + (int64_t)ad[10] * bd[ 3]
+ + (int64_t)ad[11] * bd[ 2]
+ + (int64_t)ad[12] * bd[ 1]
+ + (int64_t)ad[13] * bd[ 0];
+ t[14] = dd[14] + (int64_t)ad[ 0] * bd[14]
+ + (int64_t)ad[ 1] * bd[13]
+ + (int64_t)ad[ 2] * bd[12]
+ + (int64_t)ad[ 3] * bd[11]
+ + (int64_t)ad[ 4] * bd[10]
+ + (int64_t)ad[ 5] * bd[ 9]
+ + (int64_t)ad[ 6] * bd[ 8]
+ + (int64_t)ad[ 7] * bd[ 7]
+ + (int64_t)ad[ 8] * bd[ 6]
+ + (int64_t)ad[ 9] * bd[ 5]
+ + (int64_t)ad[10] * bd[ 4]
+ + (int64_t)ad[11] * bd[ 3]
+ + (int64_t)ad[12] * bd[ 2]
+ + (int64_t)ad[13] * bd[ 1]
+ + (int64_t)ad[14] * bd[ 0];
+ t[15] = dd[15] + (int64_t)ad[ 0] * bd[15]
+ + (int64_t)ad[ 1] * bd[14]
+ + (int64_t)ad[ 2] * bd[13]
+ + (int64_t)ad[ 3] * bd[12]
+ + (int64_t)ad[ 4] * bd[11]
+ + (int64_t)ad[ 5] * bd[10]
+ + (int64_t)ad[ 6] * bd[ 9]
+ + (int64_t)ad[ 7] * bd[ 8]
+ + (int64_t)ad[ 8] * bd[ 7]
+ + (int64_t)ad[ 9] * bd[ 6]
+ + (int64_t)ad[10] * bd[ 5]
+ + (int64_t)ad[11] * bd[ 4]
+ + (int64_t)ad[12] * bd[ 3]
+ + (int64_t)ad[13] * bd[ 2]
+ + (int64_t)ad[14] * bd[ 1]
+ + (int64_t)ad[15] * bd[ 0];
+ t[16] = (int64_t)ad[ 1] * bd[15]
+ + (int64_t)ad[ 2] * bd[14]
+ + (int64_t)ad[ 3] * bd[13]
+ + (int64_t)ad[ 4] * bd[12]
+ + (int64_t)ad[ 5] * bd[11]
+ + (int64_t)ad[ 6] * bd[10]
+ + (int64_t)ad[ 7] * bd[ 9]
+ + (int64_t)ad[ 8] * bd[ 8]
+ + (int64_t)ad[ 9] * bd[ 7]
+ + (int64_t)ad[10] * bd[ 6]
+ + (int64_t)ad[11] * bd[ 5]
+ + (int64_t)ad[12] * bd[ 4]
+ + (int64_t)ad[13] * bd[ 3]
+ + (int64_t)ad[14] * bd[ 2]
+ + (int64_t)ad[15] * bd[ 1];
+ t[17] = (int64_t)ad[ 2] * bd[15]
+ + (int64_t)ad[ 3] * bd[14]
+ + (int64_t)ad[ 4] * bd[13]
+ + (int64_t)ad[ 5] * bd[12]
+ + (int64_t)ad[ 6] * bd[11]
+ + (int64_t)ad[ 7] * bd[10]
+ + (int64_t)ad[ 8] * bd[ 9]
+ + (int64_t)ad[ 9] * bd[ 8]
+ + (int64_t)ad[10] * bd[ 7]
+ + (int64_t)ad[11] * bd[ 6]
+ + (int64_t)ad[12] * bd[ 5]
+ + (int64_t)ad[13] * bd[ 4]
+ + (int64_t)ad[14] * bd[ 3]
+ + (int64_t)ad[15] * bd[ 2];
+ t[18] = (int64_t)ad[ 3] * bd[15]
+ + (int64_t)ad[ 4] * bd[14]
+ + (int64_t)ad[ 5] * bd[13]
+ + (int64_t)ad[ 6] * bd[12]
+ + (int64_t)ad[ 7] * bd[11]
+ + (int64_t)ad[ 8] * bd[10]
+ + (int64_t)ad[ 9] * bd[ 9]
+ + (int64_t)ad[10] * bd[ 8]
+ + (int64_t)ad[11] * bd[ 7]
+ + (int64_t)ad[12] * bd[ 6]
+ + (int64_t)ad[13] * bd[ 5]
+ + (int64_t)ad[14] * bd[ 4]
+ + (int64_t)ad[15] * bd[ 3];
+ t[19] = (int64_t)ad[ 4] * bd[15]
+ + (int64_t)ad[ 5] * bd[14]
+ + (int64_t)ad[ 6] * bd[13]
+ + (int64_t)ad[ 7] * bd[12]
+ + (int64_t)ad[ 8] * bd[11]
+ + (int64_t)ad[ 9] * bd[10]
+ + (int64_t)ad[10] * bd[ 9]
+ + (int64_t)ad[11] * bd[ 8]
+ + (int64_t)ad[12] * bd[ 7]
+ + (int64_t)ad[13] * bd[ 6]
+ + (int64_t)ad[14] * bd[ 5]
+ + (int64_t)ad[15] * bd[ 4];
+ t[20] = (int64_t)ad[ 5] * bd[15]
+ + (int64_t)ad[ 6] * bd[14]
+ + (int64_t)ad[ 7] * bd[13]
+ + (int64_t)ad[ 8] * bd[12]
+ + (int64_t)ad[ 9] * bd[11]
+ + (int64_t)ad[10] * bd[10]
+ + (int64_t)ad[11] * bd[ 9]
+ + (int64_t)ad[12] * bd[ 8]
+ + (int64_t)ad[13] * bd[ 7]
+ + (int64_t)ad[14] * bd[ 6]
+ + (int64_t)ad[15] * bd[ 5];
+ t[21] = (int64_t)ad[ 6] * bd[15]
+ + (int64_t)ad[ 7] * bd[14]
+ + (int64_t)ad[ 8] * bd[13]
+ + (int64_t)ad[ 9] * bd[12]
+ + (int64_t)ad[10] * bd[11]
+ + (int64_t)ad[11] * bd[10]
+ + (int64_t)ad[12] * bd[ 9]
+ + (int64_t)ad[13] * bd[ 8]
+ + (int64_t)ad[14] * bd[ 7]
+ + (int64_t)ad[15] * bd[ 6];
+ t[22] = (int64_t)ad[ 7] * bd[15]
+ + (int64_t)ad[ 8] * bd[14]
+ + (int64_t)ad[ 9] * bd[13]
+ + (int64_t)ad[10] * bd[12]
+ + (int64_t)ad[11] * bd[11]
+ + (int64_t)ad[12] * bd[10]
+ + (int64_t)ad[13] * bd[ 9]
+ + (int64_t)ad[14] * bd[ 8]
+ + (int64_t)ad[15] * bd[ 7];
+ t[23] = (int64_t)ad[ 8] * bd[15]
+ + (int64_t)ad[ 9] * bd[14]
+ + (int64_t)ad[10] * bd[13]
+ + (int64_t)ad[11] * bd[12]
+ + (int64_t)ad[12] * bd[11]
+ + (int64_t)ad[13] * bd[10]
+ + (int64_t)ad[14] * bd[ 9]
+ + (int64_t)ad[15] * bd[ 8];
+ t[24] = (int64_t)ad[ 9] * bd[15]
+ + (int64_t)ad[10] * bd[14]
+ + (int64_t)ad[11] * bd[13]
+ + (int64_t)ad[12] * bd[12]
+ + (int64_t)ad[13] * bd[11]
+ + (int64_t)ad[14] * bd[10]
+ + (int64_t)ad[15] * bd[ 9];
+ t[25] = (int64_t)ad[10] * bd[15]
+ + (int64_t)ad[11] * bd[14]
+ + (int64_t)ad[12] * bd[13]
+ + (int64_t)ad[13] * bd[12]
+ + (int64_t)ad[14] * bd[11]
+ + (int64_t)ad[15] * bd[10];
+ t[26] = (int64_t)ad[11] * bd[15]
+ + (int64_t)ad[12] * bd[14]
+ + (int64_t)ad[13] * bd[13]
+ + (int64_t)ad[14] * bd[12]
+ + (int64_t)ad[15] * bd[11];
+ t[27] = (int64_t)ad[12] * bd[15]
+ + (int64_t)ad[13] * bd[14]
+ + (int64_t)ad[14] * bd[13]
+ + (int64_t)ad[15] * bd[12];
+ t[28] = (int64_t)ad[13] * bd[15]
+ + (int64_t)ad[14] * bd[14]
+ + (int64_t)ad[15] * bd[13];
+ t[29] = (int64_t)ad[14] * bd[15]
+ + (int64_t)ad[15] * bd[14];
+ t[30] = (int64_t)ad[15] * bd[15];
+ t[31] = 0;
+
+ /* Mod curve order */
+ /* 2^446 - 0x8335dc163bb124b65129c96fde933d8d723a70aadc873d6d54a7bb0d */
+ /* Propagate carries */
+ c = t[ 0] >> 28; t[ 1] += c; t[ 0] = t[ 0] & 0xfffffff;
+ c = t[ 1] >> 28; t[ 2] += c; t[ 1] = t[ 1] & 0xfffffff;
+ c = t[ 2] >> 28; t[ 3] += c; t[ 2] = t[ 2] & 0xfffffff;
+ c = t[ 3] >> 28; t[ 4] += c; t[ 3] = t[ 3] & 0xfffffff;
+ c = t[ 4] >> 28; t[ 5] += c; t[ 4] = t[ 4] & 0xfffffff;
+ c = t[ 5] >> 28; t[ 6] += c; t[ 5] = t[ 5] & 0xfffffff;
+ c = t[ 6] >> 28; t[ 7] += c; t[ 6] = t[ 6] & 0xfffffff;
+ c = t[ 7] >> 28; t[ 8] += c; t[ 7] = t[ 7] & 0xfffffff;
+ c = t[ 8] >> 28; t[ 9] += c; t[ 8] = t[ 8] & 0xfffffff;
+ c = t[ 9] >> 28; t[10] += c; t[ 9] = t[ 9] & 0xfffffff;
+ c = t[10] >> 28; t[11] += c; t[10] = t[10] & 0xfffffff;
+ c = t[11] >> 28; t[12] += c; t[11] = t[11] & 0xfffffff;
+ c = t[12] >> 28; t[13] += c; t[12] = t[12] & 0xfffffff;
+ c = t[13] >> 28; t[14] += c; t[13] = t[13] & 0xfffffff;
+ c = t[14] >> 28; t[15] += c; t[14] = t[14] & 0xfffffff;
+ c = t[15] >> 28; t[16] += c; t[15] = t[15] & 0xfffffff;
+ c = t[16] >> 28; t[17] += c; t[16] = t[16] & 0xfffffff;
+ c = t[17] >> 28; t[18] += c; t[17] = t[17] & 0xfffffff;
+ c = t[18] >> 28; t[19] += c; t[18] = t[18] & 0xfffffff;
+ c = t[19] >> 28; t[20] += c; t[19] = t[19] & 0xfffffff;
+ c = t[20] >> 28; t[21] += c; t[20] = t[20] & 0xfffffff;
+ c = t[21] >> 28; t[22] += c; t[21] = t[21] & 0xfffffff;
+ c = t[22] >> 28; t[23] += c; t[22] = t[22] & 0xfffffff;
+ c = t[23] >> 28; t[24] += c; t[23] = t[23] & 0xfffffff;
+ c = t[24] >> 28; t[25] += c; t[24] = t[24] & 0xfffffff;
+ c = t[25] >> 28; t[26] += c; t[25] = t[25] & 0xfffffff;
+ c = t[26] >> 28; t[27] += c; t[26] = t[26] & 0xfffffff;
+ c = t[27] >> 28; t[28] += c; t[27] = t[27] & 0xfffffff;
+ c = t[28] >> 28; t[29] += c; t[28] = t[28] & 0xfffffff;
+ c = t[29] >> 28; t[30] += c; t[29] = t[29] & 0xfffffff;
+ c = t[30] >> 28; t[31] += c; t[30] = t[30] & 0xfffffff;
+ /* Mod top half of extra words */
+ t[ 8] += (int64_t)0x129eec34 * t[24];
+ t[ 9] += (int64_t)0x21cf5b54 * t[24];
+ t[10] += (int64_t)0x29c2ab70 * t[24];
+ t[11] += (int64_t)0x0f635c8c * t[24];
+ t[12] += (int64_t)0x25bf7a4c * t[24];
+ t[13] += (int64_t)0x2d944a70 * t[24];
+ t[14] += (int64_t)0x18eec490 * t[24];
+ t[15] += (int64_t)0x20cd7704 * t[24];
+ t[ 9] += (int64_t)0x129eec34 * t[25];
+ t[10] += (int64_t)0x21cf5b54 * t[25];
+ t[11] += (int64_t)0x29c2ab70 * t[25];
+ t[12] += (int64_t)0x0f635c8c * t[25];
+ t[13] += (int64_t)0x25bf7a4c * t[25];
+ t[14] += (int64_t)0x2d944a70 * t[25];
+ t[15] += (int64_t)0x18eec490 * t[25];
+ t[16] += (int64_t)0x20cd7704 * t[25];
+ t[10] += (int64_t)0x129eec34 * t[26];
+ t[11] += (int64_t)0x21cf5b54 * t[26];
+ t[12] += (int64_t)0x29c2ab70 * t[26];
+ t[13] += (int64_t)0x0f635c8c * t[26];
+ t[14] += (int64_t)0x25bf7a4c * t[26];
+ t[15] += (int64_t)0x2d944a70 * t[26];
+ t[16] += (int64_t)0x18eec490 * t[26];
+ t[17] += (int64_t)0x20cd7704 * t[26];
+ t[11] += (int64_t)0x129eec34 * t[27];
+ t[12] += (int64_t)0x21cf5b54 * t[27];
+ t[13] += (int64_t)0x29c2ab70 * t[27];
+ t[14] += (int64_t)0x0f635c8c * t[27];
+ t[15] += (int64_t)0x25bf7a4c * t[27];
+ t[16] += (int64_t)0x2d944a70 * t[27];
+ t[17] += (int64_t)0x18eec490 * t[27];
+ t[18] += (int64_t)0x20cd7704 * t[27];
+ t[12] += (int64_t)0x129eec34 * t[28];
+ t[13] += (int64_t)0x21cf5b54 * t[28];
+ t[14] += (int64_t)0x29c2ab70 * t[28];
+ t[15] += (int64_t)0x0f635c8c * t[28];
+ t[16] += (int64_t)0x25bf7a4c * t[28];
+ t[17] += (int64_t)0x2d944a70 * t[28];
+ t[18] += (int64_t)0x18eec490 * t[28];
+ t[19] += (int64_t)0x20cd7704 * t[28];
+ t[13] += (int64_t)0x129eec34 * t[29];
+ t[14] += (int64_t)0x21cf5b54 * t[29];
+ t[15] += (int64_t)0x29c2ab70 * t[29];
+ t[16] += (int64_t)0x0f635c8c * t[29];
+ t[17] += (int64_t)0x25bf7a4c * t[29];
+ t[18] += (int64_t)0x2d944a70 * t[29];
+ t[19] += (int64_t)0x18eec490 * t[29];
+ t[20] += (int64_t)0x20cd7704 * t[29];
+ t[14] += (int64_t)0x129eec34 * t[30];
+ t[15] += (int64_t)0x21cf5b54 * t[30];
+ t[16] += (int64_t)0x29c2ab70 * t[30];
+ t[17] += (int64_t)0x0f635c8c * t[30];
+ t[18] += (int64_t)0x25bf7a4c * t[30];
+ t[19] += (int64_t)0x2d944a70 * t[30];
+ t[20] += (int64_t)0x18eec490 * t[30];
+ t[21] += (int64_t)0x20cd7704 * t[30];
+ t[15] += (int64_t)0x129eec34 * t[31];
+ t[16] += (int64_t)0x21cf5b54 * t[31];
+ t[17] += (int64_t)0x29c2ab70 * t[31];
+ t[18] += (int64_t)0x0f635c8c * t[31];
+ t[19] += (int64_t)0x25bf7a4c * t[31];
+ t[20] += (int64_t)0x2d944a70 * t[31];
+ t[21] += (int64_t)0x18eec490 * t[31];
+ t[22] += (int64_t)0x20cd7704 * t[31];
+ /* Propagate carries */
+ c = t[ 8] >> 28; t[ 9] += c; t[ 8] = t[ 8] & 0xfffffff;
+ c = t[ 9] >> 28; t[10] += c; t[ 9] = t[ 9] & 0xfffffff;
+ c = t[10] >> 28; t[11] += c; t[10] = t[10] & 0xfffffff;
+ c = t[11] >> 28; t[12] += c; t[11] = t[11] & 0xfffffff;
+ c = t[12] >> 28; t[13] += c; t[12] = t[12] & 0xfffffff;
+ c = t[13] >> 28; t[14] += c; t[13] = t[13] & 0xfffffff;
+ c = t[14] >> 28; t[15] += c; t[14] = t[14] & 0xfffffff;
+ c = t[15] >> 28; t[16] += c; t[15] = t[15] & 0xfffffff;
+ c = t[16] >> 28; t[17] += c; t[16] = t[16] & 0xfffffff;
+ c = t[17] >> 28; t[18] += c; t[17] = t[17] & 0xfffffff;
+ c = t[18] >> 28; t[19] += c; t[18] = t[18] & 0xfffffff;
+ c = t[19] >> 28; t[20] += c; t[19] = t[19] & 0xfffffff;
+ c = t[20] >> 28; t[21] += c; t[20] = t[20] & 0xfffffff;
+ c = t[21] >> 28; t[22] += c; t[21] = t[21] & 0xfffffff;
+ c = t[22] >> 28; t[23] += c; t[22] = t[22] & 0xfffffff;
+ /* Mod bottom half of extra words */
+ t[ 0] += (int64_t)0x129eec34 * t[16];
+ t[ 1] += (int64_t)0x21cf5b54 * t[16];
+ t[ 2] += (int64_t)0x29c2ab70 * t[16];
+ t[ 3] += (int64_t)0x0f635c8c * t[16];
+ t[ 4] += (int64_t)0x25bf7a4c * t[16];
+ t[ 5] += (int64_t)0x2d944a70 * t[16];
+ t[ 6] += (int64_t)0x18eec490 * t[16];
+ t[ 7] += (int64_t)0x20cd7704 * t[16];
+ t[ 1] += (int64_t)0x129eec34 * t[17];
+ t[ 2] += (int64_t)0x21cf5b54 * t[17];
+ t[ 3] += (int64_t)0x29c2ab70 * t[17];
+ t[ 4] += (int64_t)0x0f635c8c * t[17];
+ t[ 5] += (int64_t)0x25bf7a4c * t[17];
+ t[ 6] += (int64_t)0x2d944a70 * t[17];
+ t[ 7] += (int64_t)0x18eec490 * t[17];
+ t[ 8] += (int64_t)0x20cd7704 * t[17];
+ t[ 2] += (int64_t)0x129eec34 * t[18];
+ t[ 3] += (int64_t)0x21cf5b54 * t[18];
+ t[ 4] += (int64_t)0x29c2ab70 * t[18];
+ t[ 5] += (int64_t)0x0f635c8c * t[18];
+ t[ 6] += (int64_t)0x25bf7a4c * t[18];
+ t[ 7] += (int64_t)0x2d944a70 * t[18];
+ t[ 8] += (int64_t)0x18eec490 * t[18];
+ t[ 9] += (int64_t)0x20cd7704 * t[18];
+ t[ 3] += (int64_t)0x129eec34 * t[19];
+ t[ 4] += (int64_t)0x21cf5b54 * t[19];
+ t[ 5] += (int64_t)0x29c2ab70 * t[19];
+ t[ 6] += (int64_t)0x0f635c8c * t[19];
+ t[ 7] += (int64_t)0x25bf7a4c * t[19];
+ t[ 8] += (int64_t)0x2d944a70 * t[19];
+ t[ 9] += (int64_t)0x18eec490 * t[19];
+ t[10] += (int64_t)0x20cd7704 * t[19];
+ t[ 4] += (int64_t)0x129eec34 * t[20];
+ t[ 5] += (int64_t)0x21cf5b54 * t[20];
+ t[ 6] += (int64_t)0x29c2ab70 * t[20];
+ t[ 7] += (int64_t)0x0f635c8c * t[20];
+ t[ 8] += (int64_t)0x25bf7a4c * t[20];
+ t[ 9] += (int64_t)0x2d944a70 * t[20];
+ t[10] += (int64_t)0x18eec490 * t[20];
+ t[11] += (int64_t)0x20cd7704 * t[20];
+ t[ 5] += (int64_t)0x129eec34 * t[21];
+ t[ 6] += (int64_t)0x21cf5b54 * t[21];
+ t[ 7] += (int64_t)0x29c2ab70 * t[21];
+ t[ 8] += (int64_t)0x0f635c8c * t[21];
+ t[ 9] += (int64_t)0x25bf7a4c * t[21];
+ t[10] += (int64_t)0x2d944a70 * t[21];
+ t[11] += (int64_t)0x18eec490 * t[21];
+ t[12] += (int64_t)0x20cd7704 * t[21];
+ t[ 6] += (int64_t)0x129eec34 * t[22];
+ t[ 7] += (int64_t)0x21cf5b54 * t[22];
+ t[ 8] += (int64_t)0x29c2ab70 * t[22];
+ t[ 9] += (int64_t)0x0f635c8c * t[22];
+ t[10] += (int64_t)0x25bf7a4c * t[22];
+ t[11] += (int64_t)0x2d944a70 * t[22];
+ t[12] += (int64_t)0x18eec490 * t[22];
+ t[13] += (int64_t)0x20cd7704 * t[22];
+ t[ 7] += (int64_t)0x129eec34 * t[23];
+ t[ 8] += (int64_t)0x21cf5b54 * t[23];
+ t[ 9] += (int64_t)0x29c2ab70 * t[23];
+ t[10] += (int64_t)0x0f635c8c * t[23];
+ t[11] += (int64_t)0x25bf7a4c * t[23];
+ t[12] += (int64_t)0x2d944a70 * t[23];
+ t[13] += (int64_t)0x18eec490 * t[23];
+ t[14] += (int64_t)0x20cd7704 * t[23];
+ /* Propagate carries */
+ c = t[ 0] >> 28; t[ 1] += c; rd[ 0] = (int32_t)(t[ 0] & 0xfffffff);
+ c = t[ 1] >> 28; t[ 2] += c; rd[ 1] = (int32_t)(t[ 1] & 0xfffffff);
+ c = t[ 2] >> 28; t[ 3] += c; rd[ 2] = (int32_t)(t[ 2] & 0xfffffff);
+ c = t[ 3] >> 28; t[ 4] += c; rd[ 3] = (int32_t)(t[ 3] & 0xfffffff);
+ c = t[ 4] >> 28; t[ 5] += c; rd[ 4] = (int32_t)(t[ 4] & 0xfffffff);
+ c = t[ 5] >> 28; t[ 6] += c; rd[ 5] = (int32_t)(t[ 5] & 0xfffffff);
+ c = t[ 6] >> 28; t[ 7] += c; rd[ 6] = (int32_t)(t[ 6] & 0xfffffff);
+ c = t[ 7] >> 28; t[ 8] += c; rd[ 7] = (int32_t)(t[ 7] & 0xfffffff);
+ c = t[ 8] >> 28; t[ 9] += c; rd[ 8] = (int32_t)(t[ 8] & 0xfffffff);
+ c = t[ 9] >> 28; t[10] += c; rd[ 9] = (int32_t)(t[ 9] & 0xfffffff);
+ c = t[10] >> 28; t[11] += c; rd[10] = (int32_t)(t[10] & 0xfffffff);
+ c = t[11] >> 28; t[12] += c; rd[11] = (int32_t)(t[11] & 0xfffffff);
+ c = t[12] >> 28; t[13] += c; rd[12] = (int32_t)(t[12] & 0xfffffff);
+ c = t[13] >> 28; t[14] += c; rd[13] = (int32_t)(t[13] & 0xfffffff);
+ c = t[14] >> 28; t[15] += c; rd[14] = (int32_t)(t[14] & 0xfffffff);
+ rd[15] = t[15];
+ /* Mod bits over 28 in last word */
+ o = rd[15] >> 26; rd[15] &= 0x3ffffff;
+ rd[ 0] += 0x4a7bb0d * o;
+ rd[ 1] += 0x873d6d5 * o;
+ rd[ 2] += 0xa70aadc * o;
+ rd[ 3] += 0x3d8d723 * o;
+ rd[ 4] += 0x96fde93 * o;
+ rd[ 5] += 0xb65129c * o;
+ rd[ 6] += 0x63bb124 * o;
+ rd[ 7] += 0x8335dc1 * o;
+ /* Propagate carries */
+ o = rd[ 0] >> 28; rd[ 1] += o; rd[ 0] = rd[ 0] & 0xfffffff;
+ o = rd[ 1] >> 28; rd[ 2] += o; rd[ 1] = rd[ 1] & 0xfffffff;
+ o = rd[ 2] >> 28; rd[ 3] += o; rd[ 2] = rd[ 2] & 0xfffffff;
+ o = rd[ 3] >> 28; rd[ 4] += o; rd[ 3] = rd[ 3] & 0xfffffff;
+ o = rd[ 4] >> 28; rd[ 5] += o; rd[ 4] = rd[ 4] & 0xfffffff;
+ o = rd[ 5] >> 28; rd[ 6] += o; rd[ 5] = rd[ 5] & 0xfffffff;
+ o = rd[ 6] >> 28; rd[ 7] += o; rd[ 6] = rd[ 6] & 0xfffffff;
+ o = rd[ 7] >> 28; rd[ 8] += o; rd[ 7] = rd[ 7] & 0xfffffff;
+ o = rd[ 8] >> 28; rd[ 9] += o; rd[ 8] = rd[ 8] & 0xfffffff;
+ o = rd[ 9] >> 28; rd[10] += o; rd[ 9] = rd[ 9] & 0xfffffff;
+ o = rd[10] >> 28; rd[11] += o; rd[10] = rd[10] & 0xfffffff;
+ o = rd[11] >> 28; rd[12] += o; rd[11] = rd[11] & 0xfffffff;
+ o = rd[12] >> 28; rd[13] += o; rd[12] = rd[12] & 0xfffffff;
+ o = rd[13] >> 28; rd[14] += o; rd[13] = rd[13] & 0xfffffff;
+ o = rd[14] >> 28; rd[15] += o; rd[14] = rd[14] & 0xfffffff;
+
+ /* Convert to bytes */
+ r[ 0] = (rd[0 ] >> 0);
+ r[ 1] = (rd[0 ] >> 8);
+ r[ 2] = (rd[0 ] >> 16);
+ r[ 3] = (rd[0 ] >> 24) + ((rd[1 ] >> 0) << 4);
+ r[ 4] = (rd[1 ] >> 4);
+ r[ 5] = (rd[1 ] >> 12);
+ r[ 6] = (rd[1 ] >> 20);
+ r[ 7] = (rd[2 ] >> 0);
+ r[ 8] = (rd[2 ] >> 8);
+ r[ 9] = (rd[2 ] >> 16);
+ r[10] = (rd[2 ] >> 24) + ((rd[3 ] >> 0) << 4);
+ r[11] = (rd[3 ] >> 4);
+ r[12] = (rd[3 ] >> 12);
+ r[13] = (rd[3 ] >> 20);
+ r[14] = (rd[4 ] >> 0);
+ r[15] = (rd[4 ] >> 8);
+ r[16] = (rd[4 ] >> 16);
+ r[17] = (rd[4 ] >> 24) + ((rd[5 ] >> 0) << 4);
+ r[18] = (rd[5 ] >> 4);
+ r[19] = (rd[5 ] >> 12);
+ r[20] = (rd[5 ] >> 20);
+ r[21] = (rd[6 ] >> 0);
+ r[22] = (rd[6 ] >> 8);
+ r[23] = (rd[6 ] >> 16);
+ r[24] = (rd[6 ] >> 24) + ((rd[7 ] >> 0) << 4);
+ r[25] = (rd[7 ] >> 4);
+ r[26] = (rd[7 ] >> 12);
+ r[27] = (rd[7 ] >> 20);
+ r[28] = (rd[8 ] >> 0);
+ r[29] = (rd[8 ] >> 8);
+ r[30] = (rd[8 ] >> 16);
+ r[31] = (rd[8 ] >> 24) + ((rd[9 ] >> 0) << 4);
+ r[32] = (rd[9 ] >> 4);
+ r[33] = (rd[9 ] >> 12);
+ r[34] = (rd[9 ] >> 20);
+ r[35] = (rd[10] >> 0);
+ r[36] = (rd[10] >> 8);
+ r[37] = (rd[10] >> 16);
+ r[38] = (rd[10] >> 24) + ((rd[11] >> 0) << 4);
+ r[39] = (rd[11] >> 4);
+ r[40] = (rd[11] >> 12);
+ r[41] = (rd[11] >> 20);
+ r[42] = (rd[12] >> 0);
+ r[43] = (rd[12] >> 8);
+ r[44] = (rd[12] >> 16);
+ r[45] = (rd[12] >> 24) + ((rd[13] >> 0) << 4);
+ r[46] = (rd[13] >> 4);
+ r[47] = (rd[13] >> 12);
+ r[48] = (rd[13] >> 20);
+ r[49] = (rd[14] >> 0);
+ r[50] = (rd[14] >> 8);
+ r[51] = (rd[14] >> 16);
+ r[52] = (rd[14] >> 24) + ((rd[15] >> 0) << 4);
+ r[53] = (rd[15] >> 4);
+ r[54] = (rd[15] >> 12);
+ r[55] = (rd[15] >> 20);
+ r[56] = 0;
+}
+
+/* Precomputed multiples of the base point. */
+static const ge448_precomp base[58][8] = {
+{
+ {
+ { 0x70cc05e, 0x26a82bc, 0x0938e26, 0x80e18b0, 0x511433b, 0xf72ab66,
+ 0x412ae1a, 0xa3d3a46, 0xa6de324, 0x0f1767e, 0x4657047, 0x36da9e1,
+ 0x5a622bf, 0xed221d1, 0x66bed0d, 0x4f1970c },
+ { 0x230fa14, 0x08795bf, 0x7c8ad98, 0x132c4ed, 0x9c4fdbd, 0x1ce67c3,
+ 0x73ad3ff, 0x05a0c2d, 0x7789c1e, 0xa398408, 0xa73736c, 0xc7624be,
+ 0x03756c9, 0x2488762, 0x16eb6bc, 0x693f467 }
+ },
+ {
+ { 0x5555555, 0x5555555, 0x5555555, 0x5555555, 0x5555555, 0x5555555,
+ 0x5555555, 0x5555555, 0xaaaaaa9, 0xaaaaaaa, 0xaaaaaaa, 0xaaaaaaa,
+ 0xaaaaaaa, 0xaaaaaaa, 0xaaaaaaa, 0xaaaaaaa },
+ { 0xa9386ed, 0xeafbcde, 0xda06bda, 0xb2bed1c, 0x098bbbc, 0x833a2a3,
+ 0x80d6565, 0x8ad8c4b, 0x7e36d72, 0x884dd7b, 0xed7a035, 0xc2b0036,
+ 0x6205086, 0x8db359d, 0x34ad704, 0xae05e96 }
+ },
+ {
+ { 0x6ff2f8f, 0x2817328, 0xda85757, 0xb769465, 0xfd6e862, 0xf7f6271,
+ 0x8daa9cb, 0x4a3fcfe, 0x2ba077a, 0xda82c7e, 0x41b8b8c, 0x9433322,
+ 0x4316cb6, 0x6455bd6, 0xb9108af, 0x0865886 },
+ { 0x88ed6fc, 0x22ac135, 0x02dafb8, 0x9a68fed, 0x7f0bffa, 0x1bdb676,
+ 0x8bb3a33, 0xec4e1d5, 0xce43c82, 0x56c3b9f, 0xa8d9523, 0xa6449a4,
+ 0xa7ad43a, 0xf706cbd, 0xbd5125c, 0xe005a8d }
+ },
+ {
+ { 0x8ba7f30, 0xce42ac4, 0x9e120e2, 0xe179894, 0x8ba21ae, 0xf1515dd,
+ 0x301b7bd, 0x70c74cc, 0x3fda4be, 0x0891c69, 0xa09cf4e, 0x29ea255,
+ 0x17226f9, 0x2c1419a, 0xc6c0cce, 0x49dcbc5 },
+ { 0xde51839, 0xe236f86, 0xd4f5b32, 0x44285d0, 0x472b5d4, 0x7ea1ca9,
+ 0x1c0d8f9, 0x7b8a5bc, 0x90dc322, 0x57d845c, 0x7c02f04, 0x1b979cb,
+ 0x3a5de02, 0x27164b3, 0x4accde5, 0xd49077e }
+ },
+ {
+ { 0x2030034, 0xa99d109, 0x6f950d0, 0x2d8cefc, 0xc96f07b, 0x7a920c3,
+ 0x08bc0d5, 0x9588128, 0x6d761e8, 0x62ada75, 0xbcf7285, 0x0def80c,
+ 0x01eedb5, 0x0e2ba76, 0x5a48dcb, 0x7a9f933 },
+ { 0x2f435eb, 0xb473147, 0xf225443, 0x5512881, 0x33c5840, 0xee59d2b,
+ 0x127d7a4, 0xb698017, 0x86551f7, 0xb18fced, 0xca1823a, 0x0ade260,
+ 0xce4fd58, 0xd3b9109, 0xa2517ed, 0xadfd751 }
+ },
+ {
+ { 0xabef79c, 0x7fd7652, 0x443a878, 0x6c20a07, 0x12a7109, 0x5c1840d,
+ 0x876451c, 0x4a06e4a, 0xad95f65, 0x3bed0b4, 0x3fb0260, 0x25d2e67,
+ 0xaebd971, 0x2e00349, 0x4498b72, 0x54523e0 },
+ { 0x07c7bcc, 0xea5d1da, 0x38ea98c, 0xcce7769, 0x61d2b3e, 0x80284e8,
+ 0x6e1ff1b, 0x48de76b, 0x9c58522, 0x7b12186, 0x2765a1a, 0xbfd053a,
+ 0x056c667, 0x2d743ec, 0xd8ab61c, 0x3f99b9c }
+ },
+ {
+ { 0xeb5eaf7, 0xdf9567c, 0x78ac7d7, 0x110a6b4, 0x4706e0b, 0x2d33501,
+ 0x0b5a209, 0x0df9c7b, 0x568e684, 0xba4223d, 0x8c3719b, 0xd78af2d,
+ 0xa5291b6, 0x77467b9, 0x5c89bef, 0x079748e },
+ { 0xdac377f, 0xe20d3fa, 0x72b5c09, 0x34e8669, 0xc40bbb7, 0xd8687a3,
+ 0xd2f84c9, 0x7b3946f, 0xa78f50e, 0xd00e40c, 0x17e7179, 0xb875944,
+ 0xcb23583, 0x9c7373b, 0xc90fd69, 0x7ddeda3 }
+ },
+ {
+ { 0x153bde0, 0x2538a67, 0x406b696, 0x223aca9, 0x1ad713e, 0xf9080dc,
+ 0xd816a64, 0x6c4cb47, 0x5dc8b97, 0xbc28568, 0xc08e2d7, 0xd97b037,
+ 0x5d0e66b, 0x5b63fb4, 0x520e8a3, 0xd1f1bc5 },
+ { 0xe69e09b, 0x4eb873c, 0xbc8ee45, 0x1663164, 0xba8d89f, 0x08f7003,
+ 0x386ad82, 0x4b98ead, 0xbd94c7b, 0xa4b93b7, 0xc6b38b3, 0x46ba408,
+ 0xf3574ff, 0xdae87d1, 0xe9bea9b, 0xc7564f4 }
+ },
+},
+{
+ {
+ { 0x5bfac1c, 0x2e4fdb2, 0xf5f3bca, 0xf0d79aa, 0x20fb7cc, 0xe756b0d,
+ 0xb39609a, 0xe3696be, 0x5a5ab58, 0xa019fc3, 0x3b281dd, 0xa2b2485,
+ 0x61ac0a2, 0xe3e2be7, 0xeb56730, 0xf19c34f },
+ { 0xa30241e, 0x2d25ce8, 0xb73d7a1, 0xf5661ea, 0xdaac9f4, 0x4611ed0,
+ 0x4ced72c, 0xd544234, 0xe92e985, 0xce78f52, 0x4da4aad, 0x6fe5dd4,
+ 0x1d363ce, 0xfcaddc6, 0xc9111bf, 0x3beb69c }
+ },
+ {
+ { 0x940ebc9, 0xd2e7660, 0xb17bbe0, 0xe032018, 0x75c0575, 0xad49391,
+ 0x21c7f34, 0xdd0b147, 0x3e147e0, 0x52c2ba4, 0x0ee8973, 0x7dd03c6,
+ 0xecf2754, 0x5472e8d, 0xd6482bb, 0x17a1cd1 },
+ { 0x8128b3f, 0xdd43b84, 0xea7dd25, 0xf0cae34, 0xff07df2, 0x81ca99f,
+ 0x92ebbdc, 0x1c89597, 0x72155e6, 0x45c7a68, 0x39ddd08, 0x907a50e,
+ 0xbb2d89b, 0xbe398c2, 0x1b3b536, 0x38063f9 }
+ },
+ {
+ { 0xf843b23, 0x149fafb, 0xac7f22a, 0x00ab582, 0xf2f4d4c, 0xa3b981b,
+ 0x4341a22, 0x2ce1a65, 0x7c03b63, 0x68a4074, 0x12f2cf8, 0x63206a2,
+ 0x5149741, 0xc9961d3, 0xbc7099e, 0xfb85430 },
+ { 0x90a9e59, 0x9c91072, 0x06de367, 0x734e94a, 0xdb99214, 0x5cf3cbe,
+ 0x45b1fb9, 0xc6bce32, 0xdd7be0d, 0x1a82abe, 0xede7d1c, 0xf74976a,
+ 0x21503bd, 0x7025b7c, 0x0d096ab, 0xf789491 }
+ },
+ {
+ { 0x555a41b, 0x6bd48bb, 0x67de206, 0xfbdd0d0, 0xdd6dfd1, 0x98bc477,
+ 0x3e40b8a, 0x1d0693b, 0xda32ae4, 0x6e15563, 0xfcebaa2, 0x0194a20,
+ 0x0980a93, 0xda11615, 0x0109cec, 0x8e11920 },
+ { 0xffb9726, 0x8ea0552, 0x047e44b, 0xeba50a4, 0x60ddf76, 0xc050d24,
+ 0xac690e0, 0xe009204, 0x9b18edc, 0x47b8639, 0xc77f23f, 0x2f5b76a,
+ 0x0792905, 0x4296c24, 0x06f6dc7, 0x73f6b4a }
+ },
+ {
+ { 0x3b10cad, 0xb6ef9ea, 0xf7c8fce, 0x312843d, 0x8bedf86, 0x5bdcd52,
+ 0xf6dd823, 0x2889059, 0x08bfde0, 0x04578e9, 0x123e2e5, 0x3245df3,
+ 0x7ee9e3a, 0xbf461d5, 0x6f94ceb, 0xddec2d4 },
+ { 0x145768f, 0x21b43b9, 0xdae962a, 0xe79a8f9, 0xcbb043f, 0xff1972b,
+ 0x239649b, 0xe3dcf6d, 0xc533b85, 0xed592bd, 0xdbe22d0, 0x14ff94f,
+ 0xf1d8e22, 0x6c4eb87, 0xd18cf6d, 0xd8d4c71 }
+ },
+ {
+ { 0x8d96345, 0xcda666c, 0x836cd21, 0x9ecaa25, 0x984606e, 0x6e885bd,
+ 0x804f054, 0x1dd5fef, 0x6959ae4, 0x9dfff6b, 0xc9b55cc, 0x99b9cf8,
+ 0x62b9b80, 0xb4716b0, 0x554b128, 0x13ec87c },
+ { 0x75aacc2, 0xe696d1f, 0x87fc5ff, 0xf78c993, 0x3809d42, 0x76c0947,
+ 0xb618fa8, 0x99ce62d, 0x2f53341, 0x35e3e02, 0x0db6c5e, 0x62fc1ac,
+ 0x00d8b47, 0xa1fb8e6, 0x58f0d1e, 0x0bc1070 }
+ },
+ {
+ { 0x16da513, 0x1f45269, 0xf5cf341, 0x1f2fc04, 0x64d23e0, 0xae92086,
+ 0xda8a113, 0x4e33082, 0x1cfc085, 0x2688ec6, 0x6e5327f, 0x6f2e8de,
+ 0xb4e48a8, 0x2070db3, 0x3240ade, 0xd662697 },
+ { 0xfbd997b, 0xa6b317f, 0x49e26bd, 0x9fa1b56, 0x8cba0f3, 0xcbf0d25,
+ 0x17b4745, 0x4a7791b, 0x5c9e190, 0x25f555b, 0x923ec4c, 0x7cd3940,
+ 0xe98f1b6, 0x16f4c6a, 0xbcd4e0f, 0x7962116 }
+ },
+ {
+ { 0x02491e3, 0x8d58fa3, 0x7ab3898, 0x7cf76c6, 0x647ebc7, 0xbc2f657,
+ 0xd25f5a3, 0x5f4bfe0, 0xd69505d, 0x503f478, 0x3fb6645, 0x4a889fc,
+ 0xfa86b18, 0x33e1bc1, 0x5508dd8, 0xabb234f },
+ { 0x9a05b48, 0x5348e1b, 0x64dc858, 0x57ac5f1, 0xec8a2d3, 0x21f4d38,
+ 0xa3a3e9d, 0x5ec6d3c, 0x560a0b8, 0xcd4062e, 0x3433f59, 0x49b74f7,
+ 0xcab14e3, 0xefd9d87, 0xeb964f5, 0x858ce7f }
+ },
+},
+{
+ {
+ { 0xeb731b4, 0x7577254, 0x4e2397e, 0x9fff1fb, 0xc821715, 0x749b145,
+ 0x2e65e67, 0x40619fe, 0x2e618d8, 0x57b8281, 0x707b83e, 0x063186c,
+ 0x31b24a2, 0xcfc80cb, 0xac75169, 0xcca6185 },
+ { 0xb255818, 0x6539f44, 0x0368bce, 0x5895da0, 0x17c7482, 0x841a309,
+ 0xb1a9c9e, 0x85469e1, 0xe4f7d9d, 0x05664c0, 0x7b35cc0, 0x8a06318,
+ 0xa0e9b0a, 0x214763a, 0x4b26ac2, 0x1bd872c }
+ },
+ {
+ { 0xa93762b, 0x3578f97, 0x72d52bc, 0x434f69a, 0x22cb565, 0xddcca40,
+ 0xff20544, 0xa7d1e41, 0x8a66588, 0x823475d, 0x99d7baf, 0x9fc97c7,
+ 0x660e421, 0x15542f1, 0x843faf6, 0xa7d1f60 },
+ { 0x4063ccc, 0xbbfaab5, 0xa49855a, 0x3ad9bad, 0x5bddbfe, 0xffd5f1c,
+ 0xae87e59, 0x0e419c2, 0xf89956b, 0xdce6ed6, 0xccd8951, 0xf047c21,
+ 0xa83c991, 0x6ed4a1b, 0x2d28e0a, 0x85af86e }
+ },
+ {
+ { 0x9ed48a8, 0x04433c4, 0x0bc375d, 0xeffa858, 0xfa6e3b5, 0xfb0e1b2,
+ 0xa1aadda, 0x51483a2, 0xf8b2ea8, 0x733448d, 0xf639f0c, 0xaa0513c,
+ 0xa23bf84, 0x6bc61a3, 0xdc2430d, 0x3e64f68 },
+ { 0xc5876b1, 0x51bf502, 0x1c0dd2a, 0x6b83375, 0x342914f, 0xe597be1,
+ 0xf8e632c, 0x43d5ab0, 0xd62587b, 0x2696715, 0xed34f24, 0xe87d20a,
+ 0xe18baf7, 0x25b7e14, 0xe22e084, 0xf5eb753 }
+ },
+ {
+ { 0x24d8295, 0x51da717, 0x18d1340, 0xd478e43, 0x2cf7f66, 0xacf94f4,
+ 0x3760711, 0x230d7d1, 0x5abc626, 0x078a66a, 0x6b5f6da, 0xd78b0bd,
+ 0x96d1d0b, 0x23a9713, 0x4bd960f, 0x87623d6 },
+ { 0x77db53f, 0x0841a99, 0xf4d03ee, 0x23c1a53, 0x1f95df1, 0x2f62c2e,
+ 0x116f4e7, 0xd1e2ec1, 0x34811a9, 0x896d2fe, 0xec8096e, 0xad65e2b,
+ 0xb1744a6, 0x09d36f9, 0xff5ddf7, 0x564bac7 }
+ },
+ {
+ { 0xc3f77cb, 0x48b41e2, 0x0968938, 0x5227673, 0xfd9b452, 0xff1b899,
+ 0x2e03908, 0x67cf3bf, 0x248a6fb, 0x3731d90, 0x256598f, 0xd800a05,
+ 0xbdc8530, 0x347d2f2, 0x7ad08a1, 0xc72a300 },
+ { 0x1d65f73, 0x5e5be74, 0x4206ead, 0x183d4ae, 0xade4013, 0xcb50c1c,
+ 0x3102483, 0x39db43d, 0x70d6325, 0x0eb49fa, 0xc1f02b9, 0xa18f6a2,
+ 0xdbf5e66, 0x3e6fe30, 0x3a82aa5, 0xac4eeb9 }
+ },
+ {
+ { 0x3613d47, 0x295affd, 0xb56f343, 0x7b7e68a, 0x92b173b, 0x9806296,
+ 0xbad35fb, 0x937061e, 0x5c21eea, 0x2501978, 0x787a746, 0xe92721b,
+ 0x3651631, 0x463c46c, 0xc6f2d5a, 0x6da4b5d },
+ { 0x6e6d18c, 0xcb67cc1, 0x0010588, 0x1b30d52, 0xdb1d1e8, 0x1bb6ea6,
+ 0xad11474, 0x9c6308a, 0x3d19b1c, 0xc316741, 0xbe4fb79, 0xf2e84d7,
+ 0xe050f77, 0xeccb873, 0xcc2bf86, 0xf7c8d80 }
+ },
+ {
+ { 0x7ab20e5, 0x16fe2e1, 0xecf3a92, 0x274dead, 0x0972f67, 0x9f43487,
+ 0x4605751, 0x9a65a45, 0xb8980b2, 0x9351f07, 0x0eb08a5, 0x412962b,
+ 0x733f440, 0xb8c9bfd, 0x1ca250f, 0xac2cd64 },
+ { 0x2ba7d26, 0x68cdd0f, 0x4e0beea, 0xd3d2a4a, 0x9f4a258, 0x50135c1,
+ 0xf0d02e4, 0xb475e53, 0x589283a, 0x432d8c6, 0xa0a2b6c, 0x29141bf,
+ 0x13704bc, 0xd7379ec, 0x52459bf, 0x831562c }
+ },
+ {
+ { 0xeeec506, 0x676b366, 0x45da557, 0xdd6cad5, 0x77057d2, 0x9de39cb,
+ 0xdf05bf1, 0x388c5fe, 0xdfb1f03, 0x6e55650, 0x52126c9, 0xdbceffa,
+ 0x3a4a220, 0xe4d187b, 0xeb27020, 0xac914f9 },
+ { 0xd2e5f30, 0x3f4ab98, 0xdd94451, 0x6ae97da, 0x0d80981, 0x64af695,
+ 0xf2aa2ce, 0x36b4b90, 0x18fcf59, 0x6adcd7a, 0xc116c81, 0x3ddfe6d,
+ 0x549b9e3, 0x661072b, 0xec4584d, 0xd9e3134 }
+ },
+},
+{
+ {
+ { 0xa1e400c, 0x6e46707, 0x551e806, 0xcdc990b, 0x3a07724, 0xfa51251,
+ 0x1b3e4f5, 0x500553f, 0xef4dac3, 0x67e8b58, 0x2cb4cc7, 0x958349f,
+ 0x7f9143c, 0x948b4ed, 0x2b7822b, 0xe646d09 },
+ { 0x2bc3c26, 0xd185dd5, 0xc837fc9, 0x34ba16e, 0x5a788b7, 0x516d4ba,
+ 0x56142b0, 0x72f2de7, 0xf445b3d, 0x5846f61, 0xf4631a1, 0xdaec5c9,
+ 0x169ea9b, 0xa10b18d, 0xaf6751b, 0x85d2998 }
+ },
+ {
+ { 0x43ddf31, 0xda0cac4, 0x1860911, 0x0966e17, 0x3cba600, 0x9c3a717,
+ 0x571f895, 0x5781880, 0x737ac21, 0x5e2a927, 0x6c253fb, 0x8a46148,
+ 0x95ee626, 0xe801cf5, 0x5f84fc0, 0x271166a },
+ { 0xba856bd, 0x306937f, 0xbe80a43, 0x80cb179, 0xffb5980, 0x70393b2,
+ 0x660fc64, 0xa8e4a1c, 0xc0d5c98, 0x5078abf, 0xfbd31ff, 0x62ba530,
+ 0x9e51b88, 0xda60844, 0x355ae15, 0xdb6ecb0 }
+ },
+ {
+ { 0x23c5d49, 0xbcbb6ea, 0x87959bc, 0x08906ba, 0x0991665, 0x61cc088,
+ 0xd90d13c, 0x21d6b41, 0xd03afe9, 0x0c27ac1, 0x5cfea52, 0x159995f,
+ 0xbdfe220, 0x4057e20, 0xcbdf058, 0xdd1b349 },
+ { 0x2e37159, 0x0cd6626, 0x3eb0d17, 0x8cea8e4, 0x5bce7f0, 0x553af08,
+ 0x5b6511d, 0xb94cb5f, 0x50e0330, 0x7b8d3a5, 0x57ab7e7, 0x4159110,
+ 0x6aa886f, 0x320820e, 0xc5b6b81, 0x130d4d6 }
+ },
+ {
+ { 0xc7bb2ed, 0x2f98059, 0xa49bdfb, 0x33ebf4c, 0xb0a675b, 0x04c72a1,
+ 0xadb6c14, 0x94f9ea4, 0xcf728c0, 0x03376d8, 0x4c6eb6a, 0x5c059d3,
+ 0xeb8da48, 0x0178408, 0x2956817, 0x8bf607b },
+ { 0xceb3d28, 0x7ad2822, 0x37ae653, 0xd07a403, 0xc1e46b2, 0xbc68739,
+ 0x9154ba9, 0x15d7cca, 0xa26617d, 0x6b97103, 0xb2e0d28, 0xa610314,
+ 0xfd4d363, 0x52a08ba, 0xc7dc2af, 0x80c2638 }
+ },
+ {
+ { 0x3187140, 0x0cde7ef, 0x4b70acd, 0x93b92ca, 0x7a79cdc, 0x5696e50,
+ 0x8eaab66, 0x73cc972, 0x8f1b0c7, 0x6b8c5b6, 0x4f7e0b1, 0xb39a318,
+ 0x376108a, 0x72cfb0d, 0x98536a7, 0x0c53efc },
+ { 0x24c2f1e, 0x03b52a8, 0x6399b78, 0x717132e, 0x349a85d, 0x31ebd25,
+ 0x1a200d4, 0x265ee81, 0x407d7ad, 0x0b1aad2, 0x94d2962, 0x9a9ebc8,
+ 0x41171d9, 0x994e6cd, 0x6c8fa83, 0x09178d8 }
+ },
+ {
+ { 0xa2593a1, 0x7d1d238, 0xb38fb19, 0x863e93a, 0xe7712a9, 0xd23a4cc,
+ 0x27efcd5, 0x7477b13, 0x1392f6c, 0x3ba69ff, 0xf7bb5a5, 0x63e0c32,
+ 0x026effd, 0x20412c0, 0xef424ab, 0xd3ee8e4 },
+ { 0x64e5174, 0x14c0b2d, 0xe58c47b, 0x2a611f2, 0xc1e8635, 0xaa58a06,
+ 0xcf17034, 0x1870c3e, 0x83f1bf3, 0xb0d5e34, 0x16c7eb3, 0xb19905c,
+ 0x6efa4ca, 0xbf85d62, 0x180f92b, 0xfd16b2f }
+ },
+ {
+ { 0x3adcb48, 0xc0431af, 0xba90496, 0xc9a7a8d, 0x3895294, 0xd765a16,
+ 0x551de70, 0xb02a41a, 0x749b8a1, 0xb71b261, 0xc6f3e47, 0x0dfa89e,
+ 0x0f5d9ce, 0x392c0d8, 0x31aee3c, 0x43c59d8 },
+ { 0x4d76f49, 0x94bfb6d, 0x27d68a5, 0xe8f5b82, 0x630fd08, 0x78ae1d9,
+ 0xce1bdae, 0x1379029, 0x66715dc, 0x9689da0, 0xd3278c7, 0x5d4cb24,
+ 0x9e84fbc, 0x77c9833, 0xea1048c, 0xc8478dc }
+ },
+ {
+ { 0x770d2ba, 0xe4b8f31, 0x42ea095, 0x744f652, 0x036f138, 0xd06e090,
+ 0x3b078ca, 0xd3a3d5b, 0x78b8417, 0xc7ae541, 0xc738fd7, 0xad6c5d4,
+ 0x4676454, 0x6178984, 0x5d9a392, 0xfbf3423 },
+ { 0xfff772f, 0x8e451a7, 0x5ffbead, 0x8605bb7, 0x930d59f, 0x6f75cc1,
+ 0x8f3f460, 0xd4f4755, 0x6700c8a, 0xefd2d79, 0x2406421, 0xceb462a,
+ 0x9dfe8f1, 0x8ed0f97, 0xd1d7600, 0x0280bf1 }
+ },
+},
+{
+ {
+ { 0xdd9a54d, 0x761c219, 0x86a39c0, 0x1127fcb, 0x4c9bedd, 0x7d0e4f0,
+ 0x4d976b6, 0x27c017a, 0xda042cf, 0x800c973, 0x2593f11, 0xe7419af,
+ 0xae67960, 0xbd49448, 0x744fd85, 0xd3b60b7 },
+ { 0x61676fe, 0x5e74ed9, 0x39af627, 0x7383ef3, 0x5e62df7, 0x34407e0,
+ 0x8bf3196, 0xb053461, 0x583b407, 0xd6b7184, 0x55011be, 0xe3d0685,
+ 0x2124b52, 0x94083d0, 0xf780aaf, 0xa908324 }
+ },
+ {
+ { 0x73ec9c3, 0xb27af1a, 0x70fa725, 0xb66ad9f, 0x8cf73e4, 0x07724f5,
+ 0x9949358, 0xc3fcd57, 0xda0cc01, 0x06efb79, 0x10597c9, 0x1e977d2,
+ 0x703e8d6, 0xcd732be, 0x6d0b69e, 0x6fd29bf },
+ { 0x667128e, 0xca658ac, 0xc7872b3, 0xca0036a, 0x5355837, 0xc969858,
+ 0x075cf1c, 0x59f3be8, 0x3809a11, 0x9f1b9b0, 0x9733871, 0x6881ced,
+ 0xe902a5f, 0x8cda0fb, 0x4e3871e, 0x4d8c69b }
+ },
+ {
+ { 0xddee82f, 0x5c3bd07, 0x2f9723b, 0xe52dd31, 0x74f1be8, 0xcf87611,
+ 0x35f8657, 0xd9ecbd8, 0xfbfea17, 0x4f77393, 0xd78fe2c, 0xec9579f,
+ 0x0fb0450, 0x320de92, 0x95d9c47, 0xbfc9b8d },
+ { 0x5e1b4c3, 0x818bd42, 0x40e2c78, 0x0e0c41c, 0xbccb0d0, 0x0f7ce9a,
+ 0x5ef81fb, 0xc7e9fa4, 0x73574ad, 0x2561d6f, 0xd2efb0b, 0xa2d8d99,
+ 0xe96cd0a, 0xcf8f316, 0x4964807, 0x088f0f1 }
+ },
+ {
+ { 0x45d5a19, 0x0a84989, 0x6c2131f, 0x47ab39c, 0xf3fc35d, 0x5c02824,
+ 0x9ee8127, 0x3be77c8, 0xc90b80a, 0xa8491b7, 0xa28aa93, 0x5397631,
+ 0x6c0b344, 0x54d6e81, 0x876d0e4, 0x22878be },
+ { 0x6db3bf6, 0xeecb8a4, 0x54577a3, 0x340f295, 0x9a00f85, 0xa779868,
+ 0x4bb9147, 0x98465d7, 0xda3c736, 0x9532d7d, 0x7504b20, 0x6d574f1,
+ 0xd86e435, 0x6e356f4, 0x4533887, 0x70c2e8d }
+ },
+ {
+ { 0xd293980, 0xdce5a0a, 0x069010e, 0x32d7210, 0x06deaaa, 0x64af59f,
+ 0x59239e4, 0xd6b43c4, 0x9199c29, 0x74bf255, 0x11e1e2b, 0x3efff41,
+ 0xcb0f8d8, 0x1aa7b5e, 0x989e395, 0x9baa22b },
+ { 0x7b33ac1, 0xf78db80, 0x54ce80a, 0x05a3b43, 0x7bc8e12, 0x371defc,
+ 0x1224610, 0x63305a0, 0x6d697ef, 0x028b1ae, 0x1cd8051, 0x7aba39c,
+ 0x28ee4b4, 0x76ed7a9, 0x7f99901, 0x31bd02a }
+ },
+ {
+ { 0xf075566, 0xf9dab7a, 0xf56f18b, 0x84e29a5, 0xf64e56d, 0x3a4c45a,
+ 0x6a7302d, 0xcf3644a, 0x156b658, 0xfb40808, 0xf96be52, 0xf33ef9c,
+ 0xcaa2f08, 0xfe92038, 0xb261894, 0xcfaf2e3 },
+ { 0x224ce3f, 0xf2a0dbc, 0x592eb27, 0xed05009, 0x95889d0, 0x501743f,
+ 0x77c95c2, 0xa88a478, 0xdd63da9, 0x86755fb, 0xc7ee828, 0x9024acf,
+ 0xf38113b, 0x634b020, 0x6056e64, 0x3c5aacc }
+ },
+ {
+ { 0xa2ef760, 0xe03ff3a, 0xb1c3bac, 0x3b95767, 0x940d754, 0x51ce6aa,
+ 0x47a9a3d, 0x7cbac3f, 0x34f8d1a, 0xa864ac4, 0x80dbd47, 0x1eff3f2,
+ 0x7ebd5ca, 0xd8ab660, 0x05b07ed, 0xc4df5c4 },
+ { 0xa4f095b, 0x3dc92df, 0x7cdbd9a, 0x5ae36a5, 0x7891e04, 0x7ff2973,
+ 0x0a5fe7b, 0x37c0313, 0xaa6e35e, 0x210d7b0, 0xbf200d8, 0x6edfb53,
+ 0x84afb85, 0x787b68d, 0x72c6de3, 0x9b5c49b }
+ },
+ {
+ { 0x4010f4e, 0x5185716, 0x0536ebe, 0xe0b144b, 0x887d663, 0xacabb14,
+ 0xedf584f, 0xac1caed, 0xaf175a3, 0xb43fb8f, 0xf992a3c, 0x310b6d5,
+ 0x85178a4, 0xf2c4aa2, 0x8bd56bf, 0x69c9969 },
+ { 0xa4d972e, 0x73d6372, 0x9583803, 0x3d5bb2e, 0xd891581, 0x7bf7d18,
+ 0x568a34a, 0xa5ce5d7, 0x1f45c81, 0x670b433, 0x1f96910, 0x97265a7,
+ 0xb07c1ea, 0xdb14eb3, 0xfed447c, 0xdf008ea }
+ },
+},
+{
+ {
+ { 0x00c2f10, 0x0379f5a, 0xd350285, 0xb320b4f, 0x8efdd7d, 0x74e560e,
+ 0xf46a140, 0xf2f017e, 0x0f34624, 0x2ced1a6, 0xca08ec9, 0x7c4b4e3,
+ 0x5d8bc6b, 0xdffc2a1, 0x527b007, 0xcc8f3f3 },
+ { 0x861fe83, 0x59f8ac4, 0xd03144c, 0x8d48d2c, 0xbfa6dce, 0xa8457d2,
+ 0x677c136, 0xd7ed333, 0xc228e18, 0xcb8e219, 0x16ab1e4, 0x5f70bc9,
+ 0x3780370, 0x2ae3a3d, 0x88f17ad, 0x9f33654 }
+ },
+ {
+ { 0x960e4bb, 0xeab0710, 0xab9cfd3, 0xc668a78, 0xb0ef946, 0x2e85553,
+ 0x8df5df3, 0xa43c4b9, 0x3cb3646, 0x0ecd559, 0x18dbe71, 0x6f543c4,
+ 0xf59818b, 0xee7edaa, 0x90911c1, 0xc44e8d2 },
+ { 0x269b509, 0xafb38b1, 0x52afe2c, 0x9e2737c, 0xccfa664, 0x5b2ef02,
+ 0xe1cc58b, 0x1e0aeac, 0x5ea134e, 0x37a57e9, 0x83b9fc2, 0xc9c465a,
+ 0x6e3ecca, 0x4b9e8c7, 0x9bdbab5, 0xca07dbe }
+ },
+ {
+ { 0xb0d7807, 0xd297f3c, 0xf59ce61, 0xee441a5, 0xb2db844, 0x728553b,
+ 0x640e9e0, 0x90f87e5, 0xcb76dff, 0xaa72cbf, 0x4012d57, 0x065c686,
+ 0x9678b44, 0xd5ee88f, 0x2177603, 0x3d74b85 },
+ { 0x748b68e, 0x3f9c947, 0x8f44d44, 0x03856d9, 0x462426c, 0xde34b84,
+ 0x845ab29, 0xc16d1bb, 0xd2e18de, 0x9df6217, 0xb154643, 0xec6d219,
+ 0x2ee0f8f, 0x22a8ec3, 0x91c5175, 0x632ad38 }
+ },
+ {
+ { 0x6869267, 0x19d9d23, 0xfe5532a, 0x628df94, 0x6dc9a01, 0x458d76c,
+ 0x2cc39c8, 0x405fe6c, 0xf3a04ba, 0x7dddc67, 0x12500c7, 0xfee6303,
+ 0xa50e9de, 0x580b6f0, 0x6090604, 0xfb5918a },
+ { 0x3af6b2d, 0xd715925, 0x1c7d1ec, 0x83d62d6, 0x85858c4, 0x94398c1,
+ 0x14bfb64, 0x94643dc, 0xaf7db80, 0x758fa38, 0xa8a1557, 0xe2d7d93,
+ 0x3562af1, 0xa569e85, 0x84346aa, 0xd226bdd }
+ },
+ {
+ { 0xd0ccd20, 0xc2d0a5e, 0x5dbc0cf, 0xeb9adb8, 0x26d7e88, 0xe0a29ee,
+ 0x84a8e98, 0x8bb39f8, 0x37396ea, 0x511f1c1, 0xc8b2fb3, 0xbc9ec5a,
+ 0x090e5bc, 0x299d81c, 0x4cdd587, 0xe1dfe34 },
+ { 0x5e465b7, 0x80f61f4, 0x1bad59e, 0x5699c53, 0xb79ff92, 0x85e92e4,
+ 0x9db244c, 0x1e64fce, 0xa22097d, 0x3748574, 0xefff24e, 0xe2aa6b9,
+ 0x0a10bc6, 0xb951be7, 0x9067a1c, 0x6685326 }
+ },
+ {
+ { 0xa6114d3, 0xf716ddf, 0x037ec1f, 0x9e515f5, 0x44944a6, 0x7734541,
+ 0xaba97cc, 0x1540c4c, 0x8b54bb7, 0xe41e548, 0xcae37bc, 0x4363156,
+ 0xf3d2ce8, 0xc384eaf, 0x4c58ba4, 0x72a4f45 },
+ { 0xdcaf3fc, 0x0ceb530, 0x78dcdbb, 0x72d5365, 0xc6320fa, 0x9b44084,
+ 0xeb74c70, 0x6262d34, 0x608e6dc, 0x8abac85, 0x10dd38d, 0x82a5264,
+ 0xa819b8d, 0xbc39911, 0x03ad0d9, 0xbda15fe }
+ },
+ {
+ { 0xf9dc60b, 0xadbf587, 0x7d846d2, 0xf9d814f, 0xb77bde0, 0xccdd241,
+ 0x2242f50, 0x89cb6d7, 0xe6360a8, 0x95c0e3e, 0xdf49713, 0x7c7dd5a,
+ 0x57d5814, 0x68e0e49, 0x0c16571, 0x3aa097d },
+ { 0x267d03a, 0xb56b672, 0x8c44af4, 0x4f55708, 0xf3252a5, 0x67c49e7,
+ 0xc94a469, 0x871d6cf, 0x01fbfaa, 0x57ae998, 0x48a5d8e, 0x5c0e48f,
+ 0x5e240b9, 0xe9bf9c8, 0x99d41ca, 0xa410189 }
+ },
+ {
+ { 0xb2889b4, 0x6beb0c7, 0x9455370, 0x78b7f89, 0x47ca364, 0xd434214,
+ 0x9f21e5b, 0xdd9d2da, 0x0a7e4aa, 0xa0c7c18, 0xda1660c, 0x022c0d4,
+ 0x5a57002, 0xe1f5c16, 0x518f68f, 0x51c7c9e },
+ { 0x2586502, 0x6d521b6, 0x183ec1b, 0xa0f2cb3, 0xcaa5e16, 0x578b4e0,
+ 0x764997f, 0x7bd4fbd, 0x64b1804, 0x7ec56c3, 0x0ee08e4, 0xb75a254,
+ 0xdc19080, 0x6bf74a6, 0x97d6e59, 0x6ec793d }
+ },
+},
+{
+ {
+ { 0x0a4beb9, 0x16789d6, 0x9b9c801, 0x512b2cd, 0x8c7bb9c, 0xf8b6d10,
+ 0x9ebdc8c, 0xd85651e, 0x9ba971a, 0xc945082, 0x7e1cf78, 0x852d9ea,
+ 0x0af01e2, 0x6a45e35, 0x6151dcf, 0xe6cdadf },
+ { 0x2b8c01b, 0xc454bb4, 0x3d54cd2, 0x59e0c49, 0x454d608, 0x8e1e686,
+ 0xd8c6103, 0x0dbae4b, 0x6c18b18, 0xa5603a1, 0x3369093, 0x227a6b2,
+ 0x5f3de1c, 0xf1e8929, 0x8ab63c5, 0x42f0b58 }
+ },
+ {
+ { 0x5b596d8, 0xf1974cc, 0x44719f0, 0xee8093f, 0xf6f5b54, 0x40ba933,
+ 0x2f3d654, 0xd6e5365, 0x26d73b8, 0x9aeb835, 0x0776382, 0x50ed535,
+ 0xad43875, 0x3be47d6, 0xc786e48, 0x21d56df },
+ { 0xb73bb39, 0x8a75e18, 0xf265a78, 0x9eba84c, 0x2e772e7, 0x7c02a4d,
+ 0x4c1ecd2, 0xf7df6d4, 0x6cef71b, 0xa8d9ea0, 0xcae3b68, 0x86e8f91,
+ 0x99efefa, 0x2fd1411, 0x214e6f6, 0x0b36ab2 }
+ },
+ {
+ { 0xbdce61c, 0xd79065c, 0xdecb229, 0xcb562ff, 0x4600849, 0xef5d3d1,
+ 0x1d23ac8, 0x348b31b, 0x15c36b8, 0xb2ea699, 0x4822836, 0x268683d,
+ 0xc6f0b7d, 0x083edbe, 0x1a7821c, 0xaf4f39d },
+ { 0x4e64841, 0x23be6e8, 0x65bf791, 0xe9e2463, 0x02bfd7c, 0xa3208ac,
+ 0xd01357d, 0x231989c, 0x6422ab4, 0x79b8aad, 0x91b8564, 0x57d2b7e,
+ 0x8c04421, 0x28ebbcc, 0x7d09c05, 0xdc787d8 }
+ },
+ {
+ { 0x6c7bed5, 0xeb99f62, 0x39cd0e8, 0x326b15f, 0xd860615, 0xd9d53dc,
+ 0x1bf4205, 0xdf636e7, 0x0752209, 0x1eaa0bf, 0x4744abb, 0x17ce69a,
+ 0xf3ea2fb, 0x474572d, 0x224a7f3, 0xc4f6f73 },
+ { 0x63081b4, 0x7ed86ad, 0x4a20afb, 0xcd4cdc7, 0xb301b2e, 0x7563831,
+ 0xe038699, 0x5b4d2b1, 0x802a15f, 0xa15d1fa, 0x13e9172, 0x6687aaf,
+ 0xba6da90, 0x3eccd36, 0x7474e83, 0x34e829d }
+ },
+ {
+ { 0x19c9b27, 0x4cea19b, 0x5f52523, 0xa14c37a, 0x726625c, 0x248b16d,
+ 0x6cabc21, 0x8c40f9f, 0x32a5c65, 0x918470c, 0x2a98d5b, 0x314056b,
+ 0x34a0714, 0x6c974cf, 0x4f6314a, 0x0c8f8a9 },
+ { 0x70bccfd, 0x4844557, 0x740c9fd, 0xf5835db, 0xa21407c, 0x12e59b5,
+ 0xdb1689d, 0xbe338e0, 0xdd5e915, 0x5a50ce9, 0xef99f39, 0xb1780e9,
+ 0xee4d833, 0x1262b55, 0x89c5340, 0x4be3f22 }
+ },
+ {
+ { 0x6c4b858, 0xbb99b90, 0x550ca53, 0xa7724d1, 0x826962e, 0x7d31f5a,
+ 0xa5804da, 0xf239322, 0x0275048, 0x3e11320, 0x3ee4cb6, 0xcbb1bb8,
+ 0x1331191, 0xdb86525, 0x7d1d903, 0xb7caf9e },
+ { 0x77d7a9d, 0x06e3b05, 0xb3bbbf5, 0x7a132b0, 0x7c50575, 0xd61fbc5,
+ 0xaf4b646, 0x393f712, 0xcb7efe9, 0xef77972, 0x5ea4995, 0x20e6d5d,
+ 0xfbbe4c6, 0x0ac23d4, 0xc807f2a, 0x8456617 }
+ },
+ {
+ { 0x5396143, 0x4995fb3, 0xb99dc46, 0xa8b4bd1, 0x4150064, 0x2293e8e,
+ 0x22a3545, 0x2f77d49, 0xb2192c4, 0xe866b03, 0x5e0aa38, 0x58b01f0,
+ 0x2ed246b, 0xe406b23, 0xed60974, 0x447edb3 },
+ { 0x8869703, 0xf541b33, 0x383420a, 0x6959fe0, 0x4be4e48, 0xd6b39db,
+ 0xb5714ef, 0x048f3b4, 0x5d9e4b8, 0x68b4968, 0x2177963, 0xbda8e6c,
+ 0xc4211fe, 0x5094e35, 0x2d46d1a, 0xea591c3 }
+ },
+ {
+ { 0x2fef780, 0x3a768ff, 0x32970c6, 0x4218d28, 0xec6da17, 0xce598e4,
+ 0xfbb126a, 0xf675645, 0x0427617, 0xb04c23f, 0xe4fce74, 0xc9f93fb,
+ 0x3c91b00, 0x44a414b, 0x1d3b3cc, 0x4d982f3 },
+ { 0xb24cce0, 0xb1d40e8, 0x133e73d, 0x5a21c07, 0x0bb589d, 0x6e9358e,
+ 0x2399844, 0x39cfb17, 0x166080e, 0x83f7647, 0x450b468, 0xcfe7bf8,
+ 0x1e8434f, 0x2a288f7, 0x21a81e3, 0xd39f1e5 }
+ },
+},
+{
+ {
+ { 0x528af6f, 0x78c6f13, 0x94b74d9, 0x0001fe2, 0x01aab44, 0xae77425,
+ 0xef0039c, 0x7cbe937, 0x0fa2a67, 0xaf3e4f0, 0xda1378e, 0xe28175f,
+ 0x8ccd90e, 0x72adeed, 0x00af22f, 0x16a8ce1 },
+ { 0xcbf63dd, 0x69fae17, 0x9e39e26, 0x6786172, 0xf827a18, 0xe92b3d5,
+ 0x8403682, 0x4d75e41, 0x9056a79, 0x01a4fd9, 0x20008f5, 0x89efb2d,
+ 0xb78ff15, 0xa2f6918, 0xa3437f5, 0xf41c870 }
+ },
+ {
+ { 0x7be353c, 0xc840ae5, 0x3fb2691, 0x465a5eb, 0x7eba833, 0x34a89f0,
+ 0x013346e, 0xf620896, 0xe875df2, 0x563b5f0, 0xfbc44ce, 0x5f7fc8b,
+ 0xcfedf9d, 0x22fcb5a, 0x7dc691b, 0x7cf68d4 },
+ { 0x76a103f, 0x37f7c2d, 0xfd87b7d, 0x728a128, 0xccf2132, 0x7db2ad8,
+ 0xb100e63, 0xa4c13fe, 0x7b511d5, 0xcd28a51, 0x721ca5c, 0xb910280,
+ 0xd84bd52, 0xec1305f, 0x2729791, 0xb964642 }
+ },
+ {
+ { 0x5bc7462, 0x83fccdf, 0xd6f012f, 0x01f3dda, 0x3a6a87c, 0x57f1171,
+ 0xff403ac, 0xedb47ce, 0xbaab073, 0x6c184e5, 0x6f0d6a1, 0x5b17c7d,
+ 0x3ef2c91, 0x45a4c4f, 0x86a8f41, 0x26c3f7e },
+ { 0xb646514, 0x81a6db0, 0xca8b9ae, 0xf84059f, 0x9f02305, 0xd73dab6,
+ 0xc4b7c6c, 0x0de3fae, 0x696df2f, 0x18abb88, 0x75d7740, 0x45dd1b9,
+ 0x9ee35bc, 0x3aeccc6, 0xb029f88, 0x478252e }
+ },
+ {
+ { 0x8b2ce15, 0x66bf85b, 0x335709d, 0x1175425, 0x8123874, 0x00169ef,
+ 0x9b89868, 0xfd3c18c, 0x775204e, 0xb3612f9, 0xc2cd510, 0x4b8d09d,
+ 0x14559ad, 0xafa12e6, 0x9657493, 0x1ddaa88 },
+ { 0x1e77a08, 0x87d700b, 0x14d2e71, 0xaf4cf2f, 0xbf90c94, 0xe00835d,
+ 0x6dc8429, 0xb16a6ec, 0xf8a4d92, 0x02a7210, 0x3d0c48d, 0x5a5ab40,
+ 0xb5b9bea, 0x0052b3a, 0xe138f89, 0x6242739 }
+ },
+ {
+ { 0x16b2819, 0x7c215d3, 0xfeb9d7a, 0xdacb65e, 0xd833423, 0xc3c569e,
+ 0x886a058, 0xbc08435, 0x7e5cb61, 0x132c4db, 0x9422aff, 0x6373a27,
+ 0xfca9fc4, 0x43b9d7e, 0xdbe465f, 0xe3319a5 },
+ { 0x0b39da7, 0x51d3687, 0x4b75492, 0xcb6d798, 0xeadd87a, 0x77eb272,
+ 0xe0d3f6c, 0xf2fb47d, 0xf9f791c, 0x807fd86, 0x975e885, 0xf01086b,
+ 0xb6a3604, 0xf9314b5, 0x67be852, 0x8cd4538 }
+ },
+ {
+ { 0x858f79b, 0x7c1e6b3, 0x938caf9, 0xf0477c4, 0x3e88c44, 0xb311bbf,
+ 0x1e3a3c1, 0x9234c09, 0x95a1d4d, 0x531af2b, 0xb8d1c64, 0xf3cc969,
+ 0xb51e78d, 0x6f3c328, 0x34e8881, 0x5a1bd6c },
+ { 0x3a9336f, 0x2e31239, 0x5ced897, 0x020f0cc, 0x5fab121, 0x4b45d7b,
+ 0x1841210, 0x8068b1c, 0x8349170, 0x1bd85fc, 0x0f97fe5, 0xfe816d8,
+ 0x14b84fc, 0x1089818, 0xb93cd48, 0x1d4fabb }
+ },
+ {
+ { 0xaef599e, 0x1f11d45, 0xb09c58a, 0x8d91243, 0xd08c3c3, 0xd2eec7b,
+ 0x3b02793, 0x5a6039b, 0x8fb2c00, 0xb27fed5, 0xe8acf5e, 0xb5de44d,
+ 0x6e6c698, 0x2c3e0cd, 0x777180d, 0x2f96ed4 },
+ { 0x96d0e36, 0x67de8bf, 0xc9b6d65, 0xd36a2b6, 0x637d59c, 0x8df5d37,
+ 0xc8d9878, 0x951899f, 0xb13fcf8, 0x0fa090d, 0x1f5c7b4, 0xa527081,
+ 0x513a37a, 0x56a6560, 0x14dc1fe, 0xc6f5530 }
+ },
+ {
+ { 0x94945d6, 0x7f6def7, 0x8cc8832, 0x2f52fe3, 0xa812ff5, 0x0228ad9,
+ 0xbb8478a, 0xcd282e5, 0xbe91b07, 0xa0bc9af, 0x11165e2, 0x0360cdc,
+ 0x7b857e4, 0xb5240fd, 0xfa36b08, 0x67f1665 },
+ { 0xad2c93f, 0x84ce588, 0xe8ff4c0, 0x94db722, 0x489c8a3, 0xad2edbb,
+ 0x7e5f278, 0x6b2d5b8, 0xd1d0798, 0x0265e58, 0x4c5589e, 0xd2c9f26,
+ 0x4e4074d, 0xde81f09, 0x303089f, 0xc539595 }
+ },
+},
+{
+ {
+ { 0x83e882c, 0x183492f, 0xb5e6c12, 0x4d58203, 0xefec20b, 0x1ac96c3,
+ 0xe1cd15e, 0xabd5a5b, 0xcbbb14b, 0x7e1e242, 0xd0543b3, 0x9f03f45,
+ 0xd678158, 0xc94bc47, 0xa446cad, 0x7917be0 },
+ { 0x9b37394, 0x53f2be2, 0x064cc76, 0x0cb0a6c, 0xfba3da3, 0x3a857bc,
+ 0x80fcb49, 0xac86bc5, 0x30ab146, 0x9d5336e, 0x5bc1270, 0xafb093d,
+ 0xe5c3b6e, 0x996689d, 0xea076ba, 0x55189fa }
+ },
+ {
+ { 0x646ce03, 0x99ef986, 0x30e6100, 0xa155f81, 0x29b6b07, 0x75bef17,
+ 0x1de077b, 0xc46f08e, 0x7ed0526, 0xf52fdc5, 0x61a299a, 0xe09d989,
+ 0x7b8e93a, 0x9527329, 0x0acd185, 0x11255b5 },
+ { 0x4a6acdd, 0x57919db, 0x4451d74, 0x708a578, 0x283f7b3, 0x5b0bd01,
+ 0xc3d9260, 0xe82f40c, 0x82bbdc2, 0x2ab96ec, 0xc164d87, 0x921f680,
+ 0xc17a6a9, 0xf0f7883, 0x382a001, 0xc366478 }
+ },
+ {
+ { 0x2e40791, 0x5c9aa07, 0xa0776bf, 0xf0b72d6, 0xeaa50dc, 0x445f9b2,
+ 0x6bda47f, 0xa929fa9, 0x3bbfc49, 0x539dc71, 0x006a78b, 0x4f16dd0,
+ 0xeef39c7, 0x331ba3d, 0xc34157c, 0xbfa0a24 },
+ { 0x6a3b482, 0x0220beb, 0x6c43885, 0x3164d4d, 0xacdea23, 0xa03bb5d,
+ 0x9d8f450, 0xd6b8b5a, 0xbd208fe, 0xd218e65, 0x35c476f, 0x43948ed,
+ 0x0a2ed2b, 0x29a0dd8, 0x25295b7, 0xa6ccf33 }
+ },
+ {
+ { 0xac38939, 0xf68f15f, 0xf8010c1, 0xb3dd5a2, 0xa35f141, 0xf7ac290,
+ 0x7388574, 0xdc8f3b2, 0xe95fed2, 0x7ec3de1, 0x257ac7d, 0xc625451,
+ 0x664e55a, 0x66fc33e, 0x4832ba5, 0xd3968d3 },
+ { 0xc026448, 0x980291b, 0x24da4a5, 0xfcb2125, 0x827a360, 0xbca7df4,
+ 0x85ca63b, 0xfcc395c, 0x8e9f733, 0xcf566ec, 0xd465f70, 0x835ee9b,
+ 0x372f916, 0xe66d111, 0x04d9211, 0xc066cf9 }
+ },
+ {
+ { 0x8b48818, 0xb9763a3, 0x4288f96, 0xa6d23cc, 0xed3a229, 0xe27fcf5,
+ 0xabaff00, 0x6aebf9c, 0x8131cd1, 0xf337503, 0xffabd58, 0x13ad41d,
+ 0x861c83b, 0x1bee6af, 0x9c142e7, 0x274fe96 },
+ { 0x9b84b5b, 0x70ebcc9, 0x8191cfc, 0xe1a57d7, 0xcbf00b8, 0x46ccd06,
+ 0xefe402d, 0xc233e8e, 0xbeebeb3, 0xb4ab215, 0xbd14e7b, 0xb7424ea,
+ 0xa679578, 0x351259a, 0x471d684, 0x6d6d01e }
+ },
+ {
+ { 0x815ae38, 0x755c465, 0x611db56, 0xadc3e85, 0x188dd50, 0x633999b,
+ 0xc12d907, 0xfdf7509, 0x238b6af, 0x25bcfde, 0x397f5e7, 0x50d705d,
+ 0x944c974, 0xb65f60b, 0x27ac325, 0x8867fc3 },
+ { 0x3763eff, 0x2edc441, 0x341fb63, 0x892c0b3, 0xb3a7f28, 0xb34b83a,
+ 0x15c2f18, 0x9aa106d, 0x1bb2277, 0x720bbc6, 0x5cfaefd, 0x637f72a,
+ 0xf43e565, 0xf57db6e, 0xb58e772, 0xceb7c67 }
+ },
+ {
+ { 0x6ecc1de, 0x2793da5, 0x38f31b2, 0x4e10974, 0x8781267, 0x4229b4f,
+ 0xdec04a1, 0xe5d2272, 0xec17cff, 0x6abb463, 0x0cbb048, 0x28aaa7e,
+ 0xd22ef85, 0x41dc081, 0x5e63d0f, 0xcbc361e },
+ { 0xad5dbaa, 0xb78aafc, 0xfc1edc3, 0x0111505, 0x92c7bfa, 0x63ed66d,
+ 0xe468919, 0x2982284, 0xb8c0d8c, 0x30f1f21, 0x2685093, 0xf056747,
+ 0xf03dd0f, 0x0e085b6, 0x5581e66, 0xa8c8db8 }
+ },
+ {
+ { 0x264ad0c, 0x42009a6, 0x593bef4, 0x13bf2b8, 0x5d4e8b1, 0x1d11190,
+ 0xef7bddc, 0xfe3e940, 0x624e62c, 0xa012275, 0x1d6d3cc, 0xcb65924,
+ 0xedb7ab6, 0xc7bcc70, 0xb750b1c, 0xff9fafb },
+ { 0x7fea84b, 0xf65df29, 0x90b0e02, 0x17c84a8, 0x301e821, 0xa92a859,
+ 0xfb480d1, 0xbee8cb2, 0x59c604e, 0x7010b8c, 0xe803c43, 0x47bf3f4,
+ 0x47b3fff, 0xd645142, 0x9f0da13, 0xc4c5dcb }
+ },
+},
+{
+ {
+ { 0xb5253b3, 0x8af700c, 0x206957a, 0x31ca605, 0x3eafdcd, 0x2574439,
+ 0xd3ae15e, 0x2ba5ae1, 0x5b82579, 0x710b738, 0x112b95a, 0x145ab57,
+ 0x38c55c5, 0x4b133a0, 0x2a16fef, 0xf7559c9 },
+ { 0xd9ba896, 0x70c3e68, 0xc33d07a, 0x475dd32, 0x3a41e40, 0xe084e47,
+ 0xfd2e706, 0xddc9382, 0x79510bd, 0x34b7275, 0xa5f901e, 0x5e78a69,
+ 0xdcfb823, 0x429dfd7, 0x014f0a3, 0x1d9dc18 }
+ },
+ {
+ { 0xaf403d7, 0x364fcdf, 0xb7d7b34, 0xd9ea4ff, 0xcbb1dac, 0x21a3426,
+ 0x143b4f5, 0xfa51052, 0x6df2409, 0x2bca073, 0x8ad7285, 0x7e6985a,
+ 0x4aaa27f, 0x3a1a9d0, 0x9fc0c6c, 0x1a815e1 },
+ { 0xbb65bb3, 0xfab6147, 0x33ced0b, 0xa36dc0d, 0x2062d78, 0x26a8859,
+ 0x28a5fb7, 0x3438617, 0x4ebb1ad, 0xe82da25, 0xd05aa11, 0x70f5071,
+ 0xadaac48, 0x0b7f847, 0x93cb269, 0xeb812bc }
+ },
+ {
+ { 0xf7caccc, 0xcb317cc, 0xcf85098, 0xd3410d9, 0x7f078d7, 0xca68c8d,
+ 0xb782efc, 0xfe9e812, 0x5f544b5, 0x32e7c0f, 0x3a7b7f2, 0x44fe95a,
+ 0xe91327b, 0xf4f1543, 0x76645ed, 0x27d118d },
+ { 0xd7abc2c, 0x690547c, 0xb53c8af, 0xf64680f, 0x79ea989, 0xbe0cbe0,
+ 0xa91af28, 0x6cf0cce, 0x9daa2f9, 0xa3b85a2, 0x91faed0, 0xd4b663c,
+ 0xa8b20ba, 0x782c7b7, 0xb8d98ce, 0xf494faf }
+ },
+ {
+ { 0x002f55a, 0x080c0d7, 0x2d6d9dd, 0xf4f8f14, 0x382f025, 0xb326229,
+ 0xad28c20, 0x58fd0b5, 0x8d06a15, 0x704b992, 0x7fbd8e4, 0xf4545d9,
+ 0xed55581, 0xc32fa63, 0x01ac0fd, 0x3ab7936 },
+ { 0x6099fd1, 0x13ece52, 0x9c79178, 0x776dba8, 0xce26c45, 0x8d28212,
+ 0x60d739c, 0x09fddaf, 0xa84826e, 0xf9931ed, 0xb29439e, 0x6e73d90,
+ 0x9095e61, 0x94cfefc, 0x802f474, 0x3050d16 }
+ },
+ {
+ { 0x9f6394b, 0x0898f8f, 0x88b0e91, 0x48b8cea, 0x4c1b362, 0x4bc9925,
+ 0x827d9ec, 0xe3fccb4, 0xd950d6a, 0x5d4cf9a, 0x39b5b38, 0xa16f1ef,
+ 0x620f288, 0x3c76d1d, 0xe119390, 0x9fdd059 },
+ { 0xfb5edf8, 0x7b5de9e, 0x769d14e, 0x3e290b9, 0x6bd10b5, 0x4df3a91,
+ 0x82f8f7b, 0xae99bca, 0xc9524af, 0x5481d5d, 0x69504f1, 0xf112e4f,
+ 0x51931ec, 0xb048f09, 0x18f51b1, 0xbff876a }
+ },
+ {
+ { 0x46c1c37, 0x932e2a7, 0x9aea4c1, 0x903ad52, 0x8f161f2, 0x717ac91,
+ 0xf425e2a, 0xa57d197, 0x7f39e0e, 0xae89dac, 0xbaa2a58, 0x91655c0,
+ 0x54836dd, 0xe3dc286, 0xa9ec9e6, 0xb5f0baa },
+ { 0xbdbda04, 0xf7c4662, 0x51059c0, 0xbe5393b, 0xdd95b0f, 0xb16d552,
+ 0x1b3bd96, 0xde495b3, 0xc0206c5, 0xb2a6e02, 0x014d3a9, 0x045cc09,
+ 0x2a2f490, 0xf66a315, 0xc5dea05, 0x208c108 }
+ },
+ {
+ { 0x65237ea, 0x6e38b68, 0x9f27fc6, 0x93a1303, 0xa95068a, 0x9a6d510,
+ 0xe7c9e54, 0x6fbf216, 0x571ac1d, 0x7824290, 0x91c2a0c, 0x8cb23ba,
+ 0xc7e434d, 0x611202e, 0x76058b4, 0x8f901bf },
+ { 0x0849588, 0xef0ac05, 0xdd31804, 0xe0d2dde, 0xeb2ca81, 0xaf5417c,
+ 0x5d1a509, 0x420ac06, 0x9683bb6, 0x46e345e, 0xf613f7f, 0x6daf635,
+ 0x48a9576, 0xc9e8291, 0x176d147, 0x5f9f1d1 }
+ },
+ {
+ { 0x77e9709, 0xd24ae1d, 0x0047b8a, 0x77751dc, 0xc6a1593, 0xe325334,
+ 0x671f86a, 0x9baf962, 0xc29a15e, 0x425af6a, 0x2796e33, 0x3108600,
+ 0xfc253a5, 0xb6ea78c, 0xafae0ea, 0x4c733e0 },
+ { 0x97c99b9, 0x4b7443a, 0x50203a6, 0xc14e9e4, 0x52680ba, 0xd1bb515,
+ 0xd55533a, 0xa56a3ef, 0x169e1a0, 0xa66e38c, 0xeed7da0, 0xb3e4df9,
+ 0xddce3d9, 0x022c937, 0xf6e36b4, 0x8552089 }
+ },
+},
+{
+ {
+ { 0xf5cc82e, 0x8e4bf95, 0xc3ed6c9, 0x2ad80c3, 0xc9045e1, 0xf2e5b2c,
+ 0x59b06d4, 0x42c9065, 0x7b43b84, 0xc1f7379, 0x72d7992, 0x1710dbf,
+ 0x767b41c, 0xe98cf47, 0x7bfb9e9, 0xe713fce },
+ { 0x9fa5134, 0x9f54ae9, 0xde40d0e, 0x3002fd8, 0x9311334, 0xdc282b7,
+ 0xbfeb360, 0x5519810, 0x0f96ffe, 0x31539c7, 0xd27777b, 0x04eacc0,
+ 0x8ff5053, 0x5982410, 0x32b67ad, 0x5982366 }
+ },
+ {
+ { 0x6bea5c2, 0x6eb4554, 0xd509a33, 0x82cfae0, 0x394bb59, 0x6a69bd8,
+ 0x5770ee1, 0x1880d8d, 0x7dacf9e, 0x6351844, 0xf02b891, 0x5b1ecc5,
+ 0xb6c9a5a, 0xeb7d900, 0x8897da8, 0xdab8a76 },
+ { 0x98851a6, 0x28c7be5, 0x4d73c3b, 0x0101d4f, 0x5084996, 0x3c2569c,
+ 0x280bde0, 0xb9bc911, 0xcd0d4f9, 0x513a22a, 0x2a15f3b, 0xdf2986d,
+ 0x2aa4943, 0x231c28f, 0x0333870, 0x29623ad }
+ },
+ {
+ { 0x4084416, 0x2ceb178, 0x49516cd, 0x924cf1c, 0x4be856f, 0x76536c0,
+ 0x47a265b, 0x11b59cd, 0x4999494, 0x720dc84, 0x007b795, 0x910f794,
+ 0x2d3df83, 0x8434e14, 0xbd478d3, 0x8f53878 },
+ { 0xaeb9c2f, 0xd9b072e, 0xfd8a29f, 0x16f87ea, 0x2fd0de1, 0x8c42f9b,
+ 0x0e816ef, 0x916721e, 0x18bde37, 0x2ecb470, 0x2375da2, 0xcde3b7a,
+ 0xef94281, 0x30d0657, 0x5cd7af8, 0x5105456 }
+ },
+ {
+ { 0x4bdced3, 0x7230b33, 0x0838569, 0x0c6a3e1, 0xe3493b8, 0xf19c9ec,
+ 0x0d97c57, 0xf275927, 0x0c862eb, 0xf14181e, 0x32c72bc, 0xfd3bac1,
+ 0xf3be362, 0x620563f, 0x47283b7, 0x672ccaf },
+ { 0x2b7bf16, 0x191e3fa, 0x520dad7, 0xf838633, 0x3629d87, 0xd3dde55,
+ 0xaf86ebe, 0x14d8836, 0x221b2ce, 0x3db7dfb, 0x0aed72a, 0x3872abb,
+ 0x8c665b7, 0xb60de52, 0x44982cb, 0x89c2596 }
+ },
+ {
+ { 0x4dbba25, 0x799a2de, 0xa42715e, 0xd818aae, 0xf55c362, 0xbc88f4d,
+ 0x713c9ae, 0x142a163, 0xfbfb33f, 0x411e8ee, 0x6bb684a, 0x34b4629,
+ 0xdc81817, 0x4344bec, 0x17f9d46, 0xcc9573d },
+ { 0xff38a7d, 0xf85f8bc, 0x0caf117, 0xa14bf73, 0x4ba6429, 0x126874f,
+ 0xaa5db97, 0xcc9bf22, 0x6aba827, 0x62b56df, 0x9c9772a, 0xfee1cb8,
+ 0x177e541, 0xe36838f, 0xadd438f, 0x698815d }
+ },
+ {
+ { 0x38ed1ad, 0xc9fd894, 0x7b6a601, 0x73cd79d, 0x05e8d20, 0x2210e62,
+ 0x3592af5, 0x72384ac, 0x763d07e, 0x5ccc079, 0xa5f79eb, 0x2f31a4a,
+ 0x2945a95, 0x693f4ed, 0x8056fdc, 0xc712017 },
+ { 0xdf4b09a, 0x361ecd2, 0xb7d929a, 0xa5644ea, 0x3fabe9a, 0x34abc0b,
+ 0xe942a8c, 0x1a2473c, 0x6454bc3, 0xe00c924, 0xdff7366, 0xab324bc,
+ 0x21b8f99, 0xe1412f1, 0xe33551e, 0x970b572 }
+ },
+ {
+ { 0xbd0a6b5, 0x6ca4cac, 0x921d654, 0x5584787, 0xc809bda, 0x18e5253,
+ 0xf0cbe5e, 0x01b32c3, 0x0f987dd, 0xb9aa754, 0x6dfa4db, 0x628f4bb,
+ 0x891890b, 0x0255f0b, 0x874e590, 0x25b7df4 },
+ { 0x8ed5f95, 0xbded318, 0xca93023, 0x9dc428d, 0xbccf520, 0xc68f25a,
+ 0xe616e6c, 0xc4f3764, 0xa1d9993, 0xd9a57f1, 0x533431b, 0xd1964a5,
+ 0x02ab6d0, 0x06cd77f, 0x03e52e0, 0xa660791 }
+ },
+ {
+ { 0x5f72700, 0xab08864, 0x0a1a44e, 0xf77b2ff, 0xc2a24b5, 0x43ebdd8,
+ 0x4f564d7, 0xa6d6711, 0xf414160, 0x495df63, 0x76f6de6, 0xf5bacd7,
+ 0x7c2b43d, 0x3011aff, 0x3241928, 0xbb1e64c },
+ { 0x5034073, 0xf70c572, 0x68f1e97, 0x891c62a, 0xb22e374, 0xed8eb2e,
+ 0x7dbcc2f, 0xd3a53e9, 0xdc8f220, 0x1d06281, 0xace4393, 0x9eef48f,
+ 0xd2abecd, 0x96014f5, 0x2653ceb, 0x1da7e09 }
+ },
+},
+{
+ {
+ { 0xd00bc94, 0x7593318, 0xc7262a2, 0x586f3c6, 0x958ad31, 0xea68f52,
+ 0xd4e8bed, 0x6707fcc, 0xcb3f9ce, 0xb7e35d6, 0xf4b1be8, 0x2cbb6f7,
+ 0x7b41aee, 0xa535268, 0xf7b39b8, 0x1d77845 },
+ { 0xeaf9554, 0xb1f3995, 0xfe9e7d4, 0x3250f70, 0xa00c23c, 0x62e5d1b,
+ 0xc10e3bf, 0x5e422f5, 0xc25cec4, 0x7a18039, 0x7cc4d5b, 0xb4e66a1,
+ 0x36d0e0c, 0xad7c5f6, 0xa4cf347, 0x9f40b12 }
+ },
+ {
+ { 0x51e3696, 0x697f882, 0xab0a648, 0xc89bc40, 0x9785804, 0x8f261a5,
+ 0xb51a2bd, 0x4c7f900, 0x8a2dfcf, 0xd00e7af, 0xb642aeb, 0xf9c534d,
+ 0xb63df0e, 0xea2a79f, 0xf2f64a4, 0x392a69a },
+ { 0xc331b6c, 0x0c0f01c, 0x6a5edb5, 0x414bf2e, 0x5068391, 0xfe5ed81,
+ 0x62fbc34, 0x0a8078d, 0x54bca98, 0x78a4382, 0x3d727c7, 0xf7a49ae,
+ 0xab4dffe, 0x96c1de1, 0x3b9440a, 0x45901f7 }
+ },
+ {
+ { 0xacfe46e, 0x3f1189f, 0x4467443, 0xdca6f46, 0x2eb5bcf, 0xac38542,
+ 0x906bf72, 0xb02dce9, 0xfe1d454, 0xdd8cdac, 0x65f7218, 0xc26f04c,
+ 0x6ea145d, 0xb474859, 0x5bdb315, 0xc53dc6b },
+ { 0x9ad7197, 0xbe5be74, 0x18b5ecc, 0x627e919, 0x9ea405d, 0x57c889c,
+ 0x1a5360b, 0x2e5650c, 0x1b30b27, 0x42290df, 0x5242687, 0x4a07157,
+ 0xd379133, 0x553ed1f, 0x01db019, 0xb9d7a07 }
+ },
+ {
+ { 0x56597dc, 0xcfe551c, 0x925ebd6, 0x81af92a, 0xf4e8d57, 0x83efe16,
+ 0x1f640d3, 0x61bb431, 0x78b414a, 0xf80440f, 0x6c9e3b4, 0x72f3c63,
+ 0x6a03c66, 0xb55f43a, 0xe417037, 0x47a9ded },
+ { 0xdbb612b, 0x1a7e287, 0xdbb9220, 0x895c3c7, 0x6c04764, 0xd50c86e,
+ 0x53cf7ca, 0xed52698, 0xf74af55, 0xc78d799, 0xb969ff2, 0xb2ba0f2,
+ 0x1c6530b, 0x06d4815, 0x165a575, 0x764a1fe }
+ },
+ {
+ { 0xc1b5ece, 0x4383a3b, 0x54ff148, 0x0563c88, 0x5af796e, 0x9a45279,
+ 0x88e9953, 0xffba7c0, 0xb6a3001, 0xfe9fb5e, 0x25b6b19, 0x7950988,
+ 0xd81be5e, 0x67c899a, 0x2f9d29b, 0xc89ac8d },
+ { 0x29ab8f7, 0x7c76ba3, 0x6e40f74, 0xb2a18c9, 0x3864d9b, 0x1b5056e,
+ 0x9b582b8, 0xdfa503d, 0x7c9c68e, 0xfb03519, 0x6b3c22b, 0xdc50131,
+ 0xa6c96ff, 0x38ab231, 0x8cb1c10, 0x4ea527c }
+ },
+ {
+ { 0xc05b4ed, 0xd632f20, 0xb2a032d, 0xe0199fa, 0x26812d7, 0x3732956,
+ 0x013df13, 0x2aed855, 0x39f96ac, 0x92ca24b, 0xbb9751a, 0x620273d,
+ 0xf7437a1, 0x5d0d21e, 0x077de56, 0x9de2a43 },
+ { 0x11a4674, 0x0569b12, 0x89c3989, 0xfc3923e, 0x2c5c770, 0x3d12704,
+ 0x84e8c37, 0x0072b90, 0xac39f9a, 0x7178d4d, 0x778d345, 0x5f8292f,
+ 0x77c7307, 0x9e5bf0f, 0xc3a20f5, 0x7691610 }
+ },
+ {
+ { 0x705fe96, 0x7c4ead5, 0xc8e464c, 0x377ec35, 0x7689954, 0x3e5b990,
+ 0xa2d31ea, 0xc0f6949, 0xc580671, 0x839d395, 0xb215b09, 0x2f347a6,
+ 0x683df83, 0xfdcfa33, 0x6af39a8, 0x6e12cc2 },
+ { 0x13a3bd2, 0xae46ec8, 0x59366f8, 0x03a7d3b, 0xb87aed4, 0xe2029d5,
+ 0xfe1b83d, 0xbdc4e43, 0xdb8a1a8, 0x768437c, 0xea0dd7f, 0xe47acc3,
+ 0x62a0af4, 0x550e0cc, 0x1a20962, 0xcaf2cbc }
+ },
+ {
+ { 0xf28a78f, 0x5a784f7, 0x07e9724, 0x952a9b5, 0x1bab7a3, 0x8ac5e41,
+ 0xb7bc1e1, 0x1251e3f, 0xdc15e22, 0xe360f82, 0x95213f5, 0x3ac72da,
+ 0x4dcd47b, 0x65ee9ba, 0x3af5952, 0xdfeab7b },
+ { 0x26fd3c6, 0x34c5c80, 0xf3ac7ee, 0xd977b08, 0x7dba2f6, 0x003bd01,
+ 0xac98c8d, 0xcfc5cf8, 0x0e46922, 0x05eb604, 0xfaa9352, 0xc248b17,
+ 0x395c7a7, 0xfa41c0f, 0xb71ee44, 0x29931d4 }
+ },
+},
+{
+ {
+ { 0x07861c5, 0xac087bb, 0x5ae8240, 0x3bd37db, 0xf94518f, 0x94c68ec,
+ 0xff88a5b, 0xd32a378, 0x9b441d1, 0x42c8aaf, 0xfc07f12, 0x089db70,
+ 0xd3d4455, 0x211c386, 0x546b158, 0x1db9af7 },
+ { 0x51bc927, 0xdfd1b65, 0x0733df4, 0x69c0493, 0x2aeb586, 0xdc72cd4,
+ 0x823aa13, 0xeebdace, 0x56ad643, 0x51b3b3c, 0xd4e0426, 0xb983a99,
+ 0x69c4ecc, 0xa1e5b6c, 0x45e6668, 0x37cd382 }
+ },
+ {
+ { 0x9f73aea, 0x158ce6d, 0x14ff475, 0x36a7749, 0xdc0b018, 0x0d4e424,
+ 0x3946f09, 0xc2c4448, 0xfacda62, 0x7a7de3f, 0xb486709, 0x49a19e6,
+ 0xdb61da7, 0x65094d8, 0x8f5ee87, 0x09edfd9 },
+ { 0xb37226d, 0xe460fcf, 0x69bf470, 0x3b9d039, 0x247ca22, 0x3d4d511,
+ 0xc782cb1, 0xc7248d6, 0x00ad293, 0x91189a0, 0xe8abe75, 0x1244942,
+ 0xbf52cdb, 0x9f88d12, 0xbbbcadf, 0x368463e }
+ },
+ {
+ { 0x8074f45, 0x419e4b3, 0x0771c83, 0xd3f8e2e, 0x2e68d34, 0xd2743b4,
+ 0xb116a00, 0xc68b7db, 0xd84cc37, 0xfad2cf7, 0xb7a0f4d, 0xcfd27c0,
+ 0x190e587, 0x3b9e23f, 0x751ca9e, 0x7bab499 },
+ { 0xa8f12ee, 0x3270861, 0x31b36d5, 0xee1f38d, 0xe4c0eed, 0x748bb31,
+ 0x110ebad, 0x9be5c9b, 0xc8b6cb6, 0x728660b, 0x93d914a, 0x7bc9df7,
+ 0xc88c859, 0x73a4f2c, 0xb4e7f0e, 0xbe4a2fd }
+ },
+ {
+ { 0xa450e77, 0xe566ff8, 0x6a13aba, 0xb0b4006, 0xcd7dc90, 0x483a510,
+ 0x5fa9ccc, 0xb1a2013, 0xa80e67c, 0xeb0b631, 0x020801a, 0x7c34e1f,
+ 0xf4e447c, 0x0257dc8, 0x74c6f0f, 0x7abe7d1 },
+ { 0xb19a576, 0xf115a3a, 0x064ca0e, 0x8f0474a, 0x351f99b, 0x999bb6b,
+ 0x773edc3, 0x855254b, 0x427d717, 0x49f6c2f, 0x2e0cef2, 0x9f68253,
+ 0x2ee34f5, 0x1fe126c, 0x80150f7, 0x1ec2cae }
+ },
+ {
+ { 0xc005b7a, 0x862c5af, 0xec4ef17, 0x61adea7, 0x007b446, 0xf885fd3,
+ 0x9b0e30e, 0x25c129d, 0xfeec7e0, 0xbc10f25, 0xdf79ee1, 0x3901ac4,
+ 0xfe9e19f, 0xad49db7, 0x360d050, 0xc8624d9 },
+ { 0xbf3260b, 0xc74a576, 0x8c010c2, 0xbde8024, 0x09b6977, 0xf155329,
+ 0xd52dcf8, 0x6a5a82e, 0x29b9dfc, 0x4fbf59d, 0xc7b730c, 0x337d049,
+ 0x3a89cd4, 0xb3deac6, 0xad2f2eb, 0x1e07595 }
+ },
+ {
+ { 0x3b7c84e, 0xa0b0a4d, 0x8cf2b00, 0xf132c37, 0xeaaa8ec, 0x192814b,
+ 0x7b4b5df, 0xe7929f9, 0x42d0ab7, 0xf08a68e, 0x7b60cdd, 0x814afb1,
+ 0x7d9c160, 0x78c348c, 0x44db217, 0xf8a9488 },
+ { 0xeaa2578, 0xcdefd88, 0xbd0e260, 0xf717f56, 0x1694d02, 0x7754e13,
+ 0x181dbd8, 0x1254c14, 0x6e5f312, 0x0dacdd2, 0xcef87bf, 0xb8abdfb,
+ 0xe74e2ea, 0xb985972, 0x002b424, 0x1717621 }
+ },
+ {
+ { 0x162df70, 0x92cc75e, 0x18ee849, 0x1e20c06, 0x26aa590, 0xc036b46,
+ 0x4da5155, 0x31be67e, 0xf7213b0, 0x04911b5, 0xbb2e72e, 0x39261d7,
+ 0x5c015a3, 0x9e84466, 0x298ae67, 0x2f59fc0 },
+ { 0x1701fcc, 0xa3ea7ba, 0x0ebd651, 0x87a5fa9, 0x301d7b1, 0xa607ed4,
+ 0x3b2e271, 0xbd4ec5f, 0xdc4180f, 0x732a1a2, 0xfeaa8c1, 0xbe15d82,
+ 0x66f2f3f, 0x1036702, 0x9e79ce8, 0xccfd397 }
+ },
+ {
+ { 0x70a54ad, 0x82ab835, 0xe3bec75, 0x5c1dee8, 0x54b556b, 0xf583ff4,
+ 0xf461e60, 0x9220199, 0x87fc4e7, 0xdf61ca8, 0x0776dad, 0x6641fd2,
+ 0x8edd061, 0x00c6edd, 0x55f7e87, 0xaf9b142 },
+ { 0x9bbe3ec, 0x73f15e4, 0xf8bc1fa, 0xdd3b788, 0x1b8ff86, 0xb24cc07,
+ 0x41be58b, 0x6c260d2, 0x6b10ada, 0xec1c4e3, 0x7fdb985, 0xf6b4209,
+ 0xd47c212, 0x0d0ac85, 0x07d78d1, 0x967191c }
+ },
+},
+{
+ {
+ { 0x843d0f3, 0x3b11638, 0xf27f10e, 0x4b89297, 0x863ba2a, 0x477236e,
+ 0xadd280c, 0x1949622, 0x04da757, 0x7cd5235, 0x79e4ff7, 0xe0e99d2,
+ 0x537da41, 0xb4ef894, 0x5a24ff1, 0xc55dde4 },
+ { 0xb587521, 0x18d8e21, 0x3777833, 0x8010b5d, 0xd3a54c8, 0x4af522d,
+ 0x4c0ac13, 0x7cd476b, 0x4099f67, 0x4587e61, 0x605ee64, 0x494d0ed,
+ 0xcc80903, 0x3218ba2, 0x0b2e169, 0x5ff56aa }
+ },
+ {
+ { 0x3a06c69, 0x51ec94e, 0x5e65c52, 0xa26d7be, 0xd44ee96, 0x156f113,
+ 0xbf5b9b4, 0x70f0968, 0x5f5332d, 0x9b7e469, 0x6703829, 0x36c295f,
+ 0xd04f492, 0x1522690, 0x728043b, 0xcf35ca4 },
+ { 0x190a7c3, 0xf9ca3e1, 0xf971b07, 0x53d2413, 0x9c48b49, 0xae59652,
+ 0xfefff5c, 0x74672b8, 0xa7643b0, 0x0a3018b, 0x3e9b0a8, 0x51919e8,
+ 0xc932fb5, 0x89ad33d, 0x643e687, 0x52a4419 }
+ },
+ {
+ { 0xd2d0acd, 0x7778990, 0x487fdf1, 0x3bdbcce, 0x2b03dd2, 0xdc413ca,
+ 0x9a2b7d0, 0x278755b, 0x35ddd7f, 0x4ebb8b5, 0xbcbdb92, 0x0465152,
+ 0x671d051, 0x34f22d6, 0x87192b9, 0x1ba04c7 },
+ { 0x83560c1, 0xb1693f4, 0x7d174e9, 0xe08a593, 0x64dc9af, 0x47ffdc4,
+ 0xce8126c, 0x1123596, 0x1124628, 0x632d95f, 0xfee7c76, 0x66287ab,
+ 0xc552332, 0xb40fe60, 0xe304e1e, 0x3f11729 }
+ },
+ {
+ { 0x5030a8c, 0x97a6ea0, 0x09c27b2, 0x6924198, 0xac9dd5d, 0x3308501,
+ 0xbe73fdc, 0x9fed7fa, 0x0535286, 0xea55544, 0x6c9b832, 0xc7c07ab,
+ 0xc51b967, 0x178c882, 0x86ee075, 0x6fa0c69 },
+ { 0xb8b5c4a, 0xbaa4a15, 0x3130c0a, 0xf83c0ea, 0x2800331, 0xcf8624b,
+ 0x7ccbcb8, 0xade85cd, 0xf08445d, 0x971d7f6, 0x6a546dc, 0xfd480b7,
+ 0xc93761c, 0xdc15a38, 0x9d04631, 0xc4c495c }
+ },
+ {
+ { 0x9470efe, 0x5f4cee8, 0x88d93ad, 0x9fe8961, 0xf4e49ce, 0x24783b3,
+ 0x52ffb3e, 0x1bc7ed7, 0x6d81e17, 0xa3abe6a, 0x7a333c3, 0xd6bb8b4,
+ 0x10a3527, 0x3485c0b, 0x31a9d10, 0x7cddc9c },
+ { 0xc38ca37, 0x0c78112, 0xdd2f8d8, 0x10e249d, 0xc511911, 0x72c88cc,
+ 0x29a6c84, 0x4d75b5a, 0xa227b1e, 0xc74b267, 0xf8e35ad, 0x698390c,
+ 0xe98d230, 0x8f27edf, 0x6bdc7f4, 0xec922f2 }
+ },
+ {
+ { 0xfc32e11, 0xac34023, 0x47200d1, 0xe0ae2f5, 0xbd98c82, 0xa7c7492,
+ 0x7b02154, 0x3910b68, 0xe28ab6d, 0x6fdd06c, 0xd98b012, 0xd3a7e49,
+ 0x9f54207, 0x4c1c82b, 0x45c176f, 0xef5bbe6 },
+ { 0xd3e71eb, 0x3d17960, 0x080e70c, 0x90d7e84, 0xbff5d9e, 0x83e6438,
+ 0x535d85c, 0x1877e1f, 0xfbb69cc, 0x931ed6e, 0x1247848, 0xcf96265,
+ 0x750da4e, 0x76d618b, 0x717fbf6, 0xc076708 }
+ },
+ {
+ { 0xeec5126, 0x80a5ac5, 0x3379c80, 0x6d05dd1, 0x2336d32, 0x514b089,
+ 0x6725137, 0x586c006, 0x574f954, 0xab2365a, 0xac7d356, 0x3c89ea0,
+ 0x27460ba, 0xf1f2edd, 0xab9870f, 0xf200ddb },
+ { 0xa35e885, 0xc8f1b2c, 0xe6e7550, 0x5d22f86, 0x9554615, 0x24b9a40,
+ 0x616314f, 0xcb41107, 0xc976a11, 0xca752f0, 0xa08291a, 0x3e2f839,
+ 0xf2c420e, 0x0cff22f, 0x82b9747, 0xafd603e }
+ },
+ {
+ { 0x810a3da, 0xaddeddc, 0xd3a87bf, 0x78b6c2d, 0xde3a04c, 0xbc7020b,
+ 0x9b6d045, 0x47ab973, 0x0959358, 0x3b046d6, 0x509ee3e, 0x0f953e7,
+ 0x69fc61b, 0x803dc86, 0x893c8d4, 0xcceaec0 },
+ { 0xb048a45, 0x21f8c40, 0xfcaea8a, 0xb535073, 0x90e360b, 0xe712c35,
+ 0x8403338, 0x5d0f3f4, 0x7207f2d, 0xe0ea26c, 0xffd9e05, 0x20f6b57,
+ 0x4788b00, 0xb97d68e, 0x1889cce, 0xb121554 }
+ },
+},
+{
+ {
+ { 0x464238e, 0x0079817, 0x0d381ca, 0x2110302, 0xd9f01b5, 0x1cc4c6e,
+ 0x5a131b1, 0x5e35dc5, 0x06944eb, 0xb61848d, 0x29631a3, 0x83792a0,
+ 0xafca0dd, 0xbe1017f, 0x782fcbb, 0x70aaa01 },
+ { 0x99945e7, 0xc63b7a0, 0xc4486c1, 0xe9164ec, 0x885f2c1, 0xb133e35,
+ 0xc99ae02, 0x186f0d3, 0x2bf53e6, 0x2fca492, 0x48a02bc, 0xf922aa2,
+ 0x0dd3dca, 0x4fe6490, 0xf6a8207, 0xe8c313f }
+ },
+ {
+ { 0x97caf1e, 0xc5b3583, 0x922a4b6, 0xa001922, 0xdf07c95, 0x67e36be,
+ 0xb2f4f34, 0xabaa0ae, 0xdedc333, 0x66dc926, 0x38ec5b3, 0x82021c4,
+ 0x00ab176, 0x82b4f26, 0x69c45af, 0x1b7c22e },
+ { 0x0924ad9, 0x07b0dbe, 0xa407dde, 0xe030936, 0x26ccd06, 0x66e1ce9,
+ 0xe3505a9, 0xb50c108, 0xda98f51, 0x8b921e1, 0x20cf7c7, 0x449ca1a,
+ 0xe67d079, 0xadb80c7, 0x834372d, 0x205aa54 }
+ },
+ {
+ { 0x19bf847, 0x1482b48, 0x5906f0f, 0xd6c16ab, 0x23ad060, 0x323fb17,
+ 0xc832be7, 0x0346389, 0x2ee45bf, 0xe71b2d8, 0xfb22276, 0x761c37d,
+ 0x5d70be2, 0xa9b3334, 0x5a0627a, 0x81a0656 },
+ { 0x99a6282, 0x3377503, 0xd0436f0, 0xafc8d2e, 0xc53342f, 0x22f71d3,
+ 0x8939ad3, 0x66ca56d, 0x30e09ba, 0x15a9192, 0xa6de890, 0x261091e,
+ 0xe78f2d5, 0x609d700, 0x8eaaf78, 0x8aa52ee }
+ },
+ {
+ { 0xce76258, 0xa398788, 0x494b975, 0x3031d07, 0x043dfe2, 0x4a6d652,
+ 0xb4401ec, 0xdb1a849, 0xce8bbcc, 0xf81ebbb, 0x16efe9e, 0x937dd47,
+ 0xef85ecc, 0x9c19350, 0x214273b, 0x260d932 },
+ { 0x77bf1a3, 0x1d7e21e, 0xa544eb7, 0x199d689, 0x94ced50, 0x9da5941,
+ 0x8a0aeaa, 0x71a60be, 0x26d3b51, 0x183a0ae, 0x8df9728, 0x49f176a,
+ 0x3230674, 0x744376e, 0xe25541c, 0xb2cb21a }
+ },
+ {
+ { 0x9a0071f, 0x7a72158, 0xe7d2a6b, 0xe19dd29, 0x55113f0, 0x3deb34e,
+ 0xede573b, 0xef1f8eb, 0x5665e37, 0xa8f7ff9, 0xf2d7777, 0xa2c21ea,
+ 0x91e2e39, 0x1387afa, 0x7db68f6, 0x04057b9 },
+ { 0x1c241f7, 0x8b9d5ae, 0x8e75993, 0x689588a, 0x5c0e2d4, 0x79585b4,
+ 0x7b64974, 0xba1ef16, 0x1c08a75, 0x72685bc, 0xd572edd, 0xf0a5814,
+ 0x5ab0e70, 0x71464a3, 0x339aea7, 0xc93c92b }
+ },
+ {
+ { 0x5b8a87d, 0x1917e2a, 0x3a82756, 0xea5db76, 0x6420e2b, 0x5bba2fb,
+ 0x019372a, 0x5cc0501, 0xccc5efd, 0xb1ef8be, 0xf49c57d, 0xaf06393,
+ 0x87a0bc4, 0x3ab1adf, 0x34fe6b6, 0x2ee4cca },
+ { 0x6b8ba9b, 0xd160668, 0x7efec13, 0xef137d9, 0x50abb76, 0x7b60465,
+ 0xf753a00, 0xb40ec2b, 0xeaf8f1d, 0x696ed22, 0xd8ba3d8, 0x398c91f,
+ 0x37db313, 0x11f2034, 0xfe5079e, 0xe1ec33b }
+ },
+ {
+ { 0xbdc81f0, 0x8a10c00, 0x6fe8e05, 0x5f39256, 0x14a368e, 0xa595dab,
+ 0x38cec6b, 0x32b3181, 0x1b00d00, 0xd77afde, 0x4d9923d, 0x3c97928,
+ 0x76e13dd, 0x78f0e7a, 0xbf75675, 0x5ee8e59 },
+ { 0x91b130c, 0x49ec893, 0xa47a441, 0x9416182, 0x76e2ce8, 0x54555b5,
+ 0x349c40b, 0xcbdd2fd, 0x9392bbe, 0x10ae737, 0x2e2dab0, 0x270b111,
+ 0xaf293f4, 0x5cb7712, 0xd6095c6, 0xfc22a33 }
+ },
+ {
+ { 0x0f15878, 0xdcb5bbd, 0xb6bba48, 0xbcf27ad, 0x7b70eba, 0x979913e,
+ 0x158578a, 0x4c0f34b, 0x6ed6088, 0x53f59a7, 0x75b0fc2, 0x19b3b2c,
+ 0x0153f3c, 0xad628dc, 0xcec1607, 0x5195a2b },
+ { 0xdfe0f7a, 0x95f8b84, 0x152920b, 0x935c6b0, 0x4da1056, 0x25f9e31,
+ 0xb28c229, 0x4910a94, 0x8ee4d6e, 0x54b03b4, 0x694e3ed, 0xc991fc3,
+ 0xdbe5709, 0x68c4c26, 0x63d7657, 0xc9cfce4 }
+ },
+},
+{
+ {
+ { 0xf52a44e, 0x21c9227, 0xe85bfbd, 0x7f105a2, 0x6268fc2, 0x887781f,
+ 0xa2d7e35, 0x56ee808, 0x2d3930f, 0x14f9de5, 0xdcb561a, 0x4a4e356,
+ 0x7f95598, 0x8736226, 0x5f34151, 0x211c342 },
+ { 0x0eaf9cb, 0x8fcb75b, 0x3d60ce2, 0xcc9edf9, 0xa5fe627, 0x54412c9,
+ 0x842dd09, 0x6036a72, 0xa6c6099, 0x71ce668, 0x5386764, 0x02b30d7,
+ 0x6f18e23, 0xb69bed3, 0xd1de9f4, 0x124c9b1 }
+ },
+ {
+ { 0xe69b531, 0xe8f8d95, 0xaff1049, 0xe1e115e, 0xeddea0c, 0x9087cd1,
+ 0x7449916, 0x8ed55a5, 0x7808404, 0x8009f54, 0x17fea55, 0x990f216,
+ 0xfe8ecf9, 0x68ba624, 0x56d1f47, 0x8ac2950 },
+ { 0x529dfb0, 0x3257887, 0x244c080, 0xc4a613f, 0x28672fa, 0xabb1ac0,
+ 0x31eb291, 0xb2915c5, 0x8fababa, 0x6e368ca, 0x1fde498, 0x6b8c259,
+ 0xf2a548c, 0x67724a1, 0xf90409b, 0x6b3b7e8 }
+ },
+ {
+ { 0xfae20aa, 0x5415003, 0x85df5ce, 0x95858a9, 0x0ac6bee, 0x42bc987,
+ 0x39ea1a9, 0x8d843c5, 0xb571043, 0x5de200c, 0x1741a33, 0x084fcd5,
+ 0x0009d1c, 0xe1ca20c, 0xe957e6d, 0x0271d28 },
+ { 0x9e3be55, 0x84cbf80, 0x1c578c6, 0xc804dda, 0x409a93a, 0xea85489,
+ 0x972021d, 0x64a450a, 0xe681312, 0xc6a2161, 0x65bc111, 0x280bff9,
+ 0x0f8526f, 0xd358a4b, 0x953a3ab, 0xd967be8 }
+ },
+ {
+ { 0x7dd066c, 0x4c5e615, 0x634c8d4, 0x37afd33, 0x42d8b87, 0xa3ac88a,
+ 0x938b607, 0x9681e9b, 0x37fe4c8, 0x7a286ab, 0x2494245, 0xdeee574,
+ 0x6af75a8, 0x184b9d3, 0x3670c04, 0x20f696a },
+ { 0xa39e8b9, 0x1340adf, 0x0850b2e, 0x03c1929, 0x2c0e1ef, 0x435ebd4,
+ 0x142ee9b, 0x49de18b, 0x3f116f2, 0xb440b27, 0x2214463, 0xd94e9fa,
+ 0x6311543, 0x1b0ddd3, 0x991ba3c, 0x1ae042a }
+ },
+ {
+ { 0x5bb47aa, 0xbc322f8, 0x54a5845, 0x9e25625, 0x21115f3, 0x96b65ae,
+ 0xbb5757b, 0x46fbed4, 0x4c42dce, 0x18aec4f, 0x8d801f0, 0xc59caf6,
+ 0x1205521, 0x9189463, 0x89feb7a, 0x66bd8e0 },
+ { 0xc529ee7, 0x39ebe95, 0x8eadb99, 0x28d8992, 0x6927544, 0x6058c78,
+ 0xd3808ec, 0x877e7a5, 0x1c52eaf, 0x8f65111, 0xae221cd, 0xfb59812,
+ 0xf890391, 0x22289c6, 0x4966e92, 0xa97695b }
+ },
+ {
+ { 0x6ff10f0, 0xf0a9122, 0xa2a65c8, 0x49a931b, 0xb1d3cb0, 0x3fcebbc,
+ 0xca9685f, 0x70eb79b, 0xab38cb6, 0x82520b5, 0x76304c3, 0xccf991b,
+ 0xaf8b07c, 0x575aab1, 0x5ed5efb, 0xec8166a },
+ { 0xc8689b1, 0xddc5698, 0xb2e78d7, 0x227c949, 0x8e07d91, 0x6132321,
+ 0x22cfd62, 0x658a11d, 0x004dd5f, 0x908fb44, 0x90d21b1, 0xe3d14f0,
+ 0xa6a1639, 0x6f3db9d, 0x333a525, 0x09d86c0 }
+ },
+ {
+ { 0x6f043f7, 0xd83eaf0, 0xb52d5f6, 0x88ab648, 0x57144d7, 0x67c664d,
+ 0xeafc8b5, 0x55d7644, 0xcceb291, 0x1c89f20, 0x831ac47, 0x51aec7b,
+ 0x6148854, 0x51172fa, 0xf6d7bfe, 0x8fabf7e },
+ { 0x477ee27, 0x5910316, 0x20fe61e, 0x5f299dd, 0x42826ab, 0x48079a8,
+ 0x22591fa, 0xf4a83ba, 0x55482ec, 0x8fac660, 0x6b65b3b, 0x48fd5f1,
+ 0x9fd9e19, 0x4288a7c, 0x9377894, 0x27db819 }
+ },
+ {
+ { 0x7fd9dd6, 0x2936ee4, 0x9ec87c6, 0xcce5f0e, 0xdb6e3b4, 0x15a50e3,
+ 0xad701c8, 0x61df105, 0x1dff1f7, 0x3601add, 0xe8a16e1, 0xb761e06,
+ 0x1af3f91, 0x4341e02, 0x933fa3f, 0x9156a4a },
+ { 0x54bc01d, 0x9dc46ae, 0x64eb910, 0x605577a, 0x5a59a99, 0x22b99f8,
+ 0x0a229d8, 0xab2dbaf, 0x6599364, 0xa8bfb65, 0xe94ebf0, 0x39ed4a5,
+ 0x0dbb23e, 0x7b46a1e, 0x8751422, 0x117b195 }
+ },
+},
+{
+ {
+ { 0x423bddf, 0xd19e8fd, 0x387ef59, 0x9d77042, 0x849590a, 0x315cbdd,
+ 0x7866c1e, 0xfdc637c, 0x03515a6, 0x72be83d, 0x0376780, 0xd44a4a0,
+ 0x19e0c2b, 0x3b96131, 0x7b1a689, 0x023aca3 },
+ { 0x82282ea, 0xf5f3687, 0x8a8b5c7, 0x4471089, 0x17a3066, 0xcd2f00a,
+ 0x81ed681, 0x754e112, 0x0bfcefd, 0x9c6c70c, 0x3b6f29b, 0xd6aced0,
+ 0x2817a2a, 0xe443d56, 0xe7c0012, 0xe590ef4 }
+ },
+ {
+ { 0x3e62e2a, 0xc2f9676, 0xb2daa26, 0x661816e, 0xdd5f512, 0x3515fd2,
+ 0x56b6e75, 0xdc36e27, 0x74cc658, 0x0bdde46, 0x00e7644, 0x1029086,
+ 0x1694a09, 0xfdf0045, 0xceac169, 0x454bcb6 },
+ { 0x6481eb6, 0xf4c92ab, 0x09750e7, 0x8b77afa, 0x6362d6d, 0xe6f4231,
+ 0xf53a3ae, 0x0d45dee, 0xd7dcf98, 0xdac7aac, 0x125ec4a, 0x628cb7f,
+ 0xaec0320, 0x41e8a20, 0xea2e35b, 0x7418c7e }
+ },
+ {
+ { 0xdf40519, 0x4d649ab, 0x3525833, 0x8cb22d4, 0x7a5333f, 0x15f6d13,
+ 0x72c23ee, 0x8c3991b, 0x0cd44a3, 0x248b9a5, 0xccc1a75, 0x6b4c4e0,
+ 0x15c99a9, 0x3221efb, 0x0a9c504, 0x236d504 },
+ { 0xd559100, 0x401c7fb, 0x07c524d, 0xcf0e075, 0x34a9275, 0x39647c0,
+ 0xf7e8683, 0x2355422, 0xb3ae670, 0x3e0a16e, 0xad61b7f, 0x1c83bcb,
+ 0x9ca6cbe, 0x491bcb1, 0x5e29458, 0xe668dc4 }
+ },
+ {
+ { 0x219379e, 0xe44c65b, 0xbb607ee, 0x211381b, 0xb7bc6db, 0xd4c7428,
+ 0xb76a2e8, 0xba62a03, 0x8bb0b31, 0xe1729c9, 0xc6bbc10, 0x3caeb50,
+ 0xb0187aa, 0x6c66727, 0xfb90dcf, 0xbf9d2f0 },
+ { 0x1184dc6, 0xec69350, 0x2698eb5, 0xd58d2a3, 0xa316b07, 0xb366d8d,
+ 0x251c017, 0xe1e39bb, 0xadb157f, 0xbe44ba9, 0x8a8b06c, 0xbaa9a9a,
+ 0x6e473e1, 0xd0f4635, 0x1d681c6, 0xd25a8f6 }
+ },
+ {
+ { 0xcb102c7, 0xba39d5f, 0xd8aa1eb, 0x66eba21, 0x697fbf4, 0xcc2591a,
+ 0x2317f54, 0x5adb579, 0xf76c6f9, 0xa01ae71, 0x5042705, 0x2c525de,
+ 0x4f4479f, 0xc8f4272, 0xe6d7a5b, 0x26ab54a },
+ { 0xdc28106, 0xda217b5, 0xeb2ae6a, 0xc7cadea, 0x53ea3b2, 0x0b16094,
+ 0xcc6111b, 0xcddcc1c, 0xa7a7beb, 0x5c47aff, 0x0e52dab, 0xf9931bd,
+ 0xc6dcf96, 0x5231835, 0xf27ea4e, 0x7095bde }
+ },
+ {
+ { 0xc33b4e2, 0xee8adae, 0x63ceb44, 0x3006651, 0x880b086, 0xf1476fb,
+ 0x9569ce8, 0x0703328, 0x238b595, 0x2cabf9a, 0x26c8158, 0x85017bc,
+ 0x68d5144, 0x420b5b5, 0xf9c696f, 0xa9f5f1e },
+ { 0xc8fec5a, 0x1409c3a, 0x28e9579, 0x541516f, 0x0e1f446, 0x06573f7,
+ 0x2311b96, 0x3e3c706, 0x3c2ffd8, 0x0033f1a, 0xca6711c, 0x8e808fc,
+ 0x07aef98, 0x716752d, 0x92525b3, 0x5e53e9a }
+ },
+ {
+ { 0x5a1c29f, 0xce98a42, 0x3ca6dc9, 0xaa70348, 0xedfa48b, 0xe77d822,
+ 0x068abca, 0xd2e3455, 0x482cfca, 0xb456e81, 0x7fbfb08, 0xc5aa981,
+ 0x8243194, 0x8979f25, 0x2cd043d, 0x727f217 },
+ { 0xaa53923, 0x7cca616, 0xe9bcb72, 0x387c5ae, 0x37580bb, 0x0173fd4,
+ 0x75fc0d9, 0xdd7795b, 0x345deae, 0x47d1c37, 0xb0d1c03, 0x2eb5d7f,
+ 0x958f002, 0xf7a1b92, 0x8f61b67, 0x7365cf4 }
+ },
+ {
+ { 0x562a5ed, 0x4b22c3b, 0x5c7cd07, 0x711216f, 0x9ba0648, 0x51f72c4,
+ 0x0de9e6f, 0xc10d093, 0xfda63ba, 0xaca479b, 0xaf532b0, 0x4722a55,
+ 0x7236f39, 0x8d59eb7, 0x4465c34, 0x5cad874 },
+ { 0x722b0c1, 0xa2119e5, 0xf343ea4, 0xb670264, 0xc19f387, 0x6910f02,
+ 0x0381fba, 0xcfec5bc, 0x52c0a1d, 0x5f5de0d, 0x6378cb6, 0x4e474d5,
+ 0x27e2ba3, 0x2fc8027, 0x159b541, 0xa215da3 }
+ },
+},
+{
+ {
+ { 0x8499895, 0xed53585, 0x65c998d, 0xa0aefd5, 0x2d5a561, 0x210d850,
+ 0xa2cd9d6, 0xc2cc23c, 0xc4d297e, 0x2371d46, 0xd18d441, 0x88b2143,
+ 0x043993d, 0xbebdad9, 0xad5f28d, 0x6ba91e7 },
+ { 0x3a731f4, 0xc2bb3f1, 0x5d0d5c3, 0xd35cfac, 0x35ac427, 0x9950998,
+ 0x5458adb, 0x8938bb5, 0xab26f3b, 0x0bd738c, 0xa28cd8d, 0x56db3d5,
+ 0xa1d8b4b, 0x87eb95f, 0xe7f3b4b, 0xd6700ef }
+ },
+ {
+ { 0xea1e57b, 0x962c920, 0x6dded6d, 0xd3be37e, 0x2c96a73, 0xf499b62,
+ 0x6c99752, 0x3eaf7b4, 0x025590b, 0xa310c89, 0x721db23, 0x535aa4a,
+ 0x19714a0, 0x56ab578, 0xd4048c1, 0xeecb4fa },
+ { 0x470c466, 0x7b79ec4, 0x1383cee, 0xc4e8f2e, 0x5750c45, 0x0f5d776,
+ 0x725527d, 0xa3b3bc3, 0x6d00cce, 0x2f5deb6, 0x95a8d81, 0x5d5a0f4,
+ 0xe02b824, 0x50a442e, 0x2a11628, 0xafb0446 }
+ },
+ {
+ { 0x0c613de, 0x72b67bc, 0xe6f0b24, 0x0150d4b, 0x8ed289d, 0x847854e,
+ 0xa320f88, 0xe08292f, 0x29c6160, 0xd5b6da3, 0x4fb9d06, 0x2a48e2d,
+ 0x2de087c, 0x55d9e41, 0x4f02100, 0x65683b5 },
+ { 0xa8886c6, 0x4dc8c2e, 0x20d6114, 0xe966dd2, 0xa57af97, 0x99745eb,
+ 0xb854725, 0x23a9a71, 0x621a047, 0x8effe05, 0x049a4be, 0xf16d284,
+ 0x5b0660f, 0x95828c2, 0x56e96b0, 0xd5b69ba }
+ },
+ {
+ { 0x4ffa0b8, 0x0b5b424, 0x096cc5e, 0x0585b45, 0xf505d37, 0x413e1ae,
+ 0x0c7ab8d, 0xe5652a3, 0x2990120, 0xab32fb7, 0x3f09368, 0x6b8b16e,
+ 0xefe128e, 0xbf9fadb, 0x14b7671, 0x85f366b },
+ { 0x090608d, 0xcb2f294, 0xac3045f, 0x25e2769, 0x6131904, 0x069c4f0,
+ 0x329a779, 0x1c57cf1, 0xb7cace7, 0x72fe0d5, 0x0897a45, 0x04d9f43,
+ 0x359a645, 0xbaf32f6, 0xfa7485a, 0x0fa854f }
+ },
+ {
+ { 0x5f56f60, 0xae3533c, 0x0ad9360, 0x9773bbb, 0x38fbe6b, 0x769b34a,
+ 0xffb0c00, 0xb5ba8e9, 0x75472e4, 0xa939318, 0xce5f30f, 0x12cac92,
+ 0xa9e7dbc, 0x514fc06, 0x58b4734, 0xd7ca865 },
+ { 0x65a730b, 0xd101ff3, 0xabe70e9, 0x92da451, 0xef7bf4b, 0xfb5f94a,
+ 0x1d56c7b, 0x8c3ef4c, 0x8435c10, 0xb085766, 0xe7ed4cc, 0x7fbbbda,
+ 0x24f372f, 0x1da6eaf, 0x59b8ae3, 0x0ab2c1f }
+ },
+ {
+ { 0xf10a4b9, 0x63a1a78, 0x0c7e510, 0xbb5278d, 0xf874142, 0x97b224e,
+ 0xb2517b1, 0x0a9ff52, 0xc5cd920, 0x1b5a485, 0xa1823b9, 0x1a8e2eb,
+ 0x0e914a8, 0x2b088c0, 0xcf13432, 0xe5ec3ad },
+ { 0x6e7e253, 0x0d6ab3e, 0x6f18458, 0x9f0f5cd, 0xf459a6d, 0x839a744,
+ 0x1eb15f7, 0xb4b4f94, 0xc72cb14, 0xe0313ac, 0xb20472d, 0x58ee933,
+ 0x872543e, 0x5f73d7a, 0x501f067, 0xb1700c5 }
+ },
+ {
+ { 0x085f67f, 0xb70428e, 0x43cabe5, 0x5441d51, 0xe0a6055, 0x4d0e8c2,
+ 0x0882e4f, 0x8d39a08, 0xc1cb39d, 0x615bb32, 0xf7a1642, 0x113f18d,
+ 0x250681f, 0xbab8cf5, 0x677b72a, 0x3017ba2 },
+ { 0x5a3a876, 0xcd2b6e9, 0x2035a69, 0x0476501, 0xefa2ea0, 0x31d6440,
+ 0x56874d5, 0xde8f8d1, 0x0199d4a, 0xcbc71cd, 0xe7f2170, 0xc546b61,
+ 0x112c4c3, 0x4e57e4e, 0xd1622ba, 0x58955a8 }
+ },
+ {
+ { 0x04e2f6f, 0x0064cd7, 0xe0edd38, 0xe9d458d, 0x7e0a5c8, 0xeb1a597,
+ 0x01fc0a8, 0xe322ece, 0x1032a19, 0x8b9d166, 0xa89de94, 0x3e7b539,
+ 0x001c754, 0xfa30262, 0xdb588f6, 0xe33de4d },
+ { 0x954eb94, 0x4dafbdb, 0x0584c1b, 0xbb43648, 0x5dbe29b, 0x622c93e,
+ 0xf57b931, 0x968f9e3, 0x0f6453b, 0x98f03be, 0x08f696c, 0xb0ecc7f,
+ 0xa505335, 0x5af55f4, 0xfb3fa9b, 0x028533e }
+ },
+},
+{
+ {
+ { 0x27e8d86, 0x3bc8e68, 0x63f105a, 0x4e43b30, 0x4981250, 0x5301b7d,
+ 0x9f72fa8, 0x8b0a75e, 0x357348c, 0x88f59db, 0xec4208e, 0x5f0ebb1,
+ 0xc043d3b, 0x4712561, 0xc806b97, 0x9e5ded0 },
+ { 0x2121d09, 0xf9bd0a6, 0xe337cd1, 0x1759ecb, 0xe945542, 0xd1acc0e,
+ 0xbd2f63a, 0x3683feb, 0xda5dfe9, 0x44f1bcc, 0x707f22f, 0xa3606c9,
+ 0x2d96ca5, 0x45ef064, 0x9022df9, 0xfc3107d }
+ },
+ {
+ { 0x44be755, 0xe81320b, 0x5c7c761, 0xdf213d5, 0xb4e5db9, 0xf43d2d5,
+ 0x8dedcd2, 0x3bcfd82, 0xd37a9ec, 0xdf368a6, 0xf475a77, 0xfef20ae,
+ 0x162c064, 0x22f5894, 0x0142a7d, 0x956bc66 },
+ { 0x7daec78, 0xaaa10e2, 0xb6e9a78, 0x3cb9b72, 0xe383f72, 0xa740bad,
+ 0x7759007, 0xc31b401, 0xa7afc50, 0xdada964, 0xfd3d11f, 0x6bf062c,
+ 0x5db3679, 0x9470d53, 0x03abf13, 0x3394473 }
+ },
+ {
+ { 0x46e5d7f, 0x533f440, 0x49048c8, 0xd1793e3, 0x1929b94, 0x59e1150,
+ 0x8364134, 0xcddbbcb, 0x582774f, 0x795c794, 0xe03081a, 0x114dfc4,
+ 0xef54042, 0x541ef68, 0x23f18cd, 0x159295b },
+ { 0x48a2c8c, 0xfb7e2ba, 0xbb6d116, 0xe2d4572, 0xd750b53, 0x7bb0b22,
+ 0xd142ee8, 0xc58888c, 0x90c9e2d, 0xd11537a, 0xd02eb9e, 0x77d5858,
+ 0xd444a79, 0x1fa4c75, 0xd58a68d, 0xf19b2d3 }
+ },
+ {
+ { 0xeb8b90f, 0x37e5b73, 0x3f2a963, 0x3737f7a, 0x9de35e0, 0x87913fa,
+ 0x8731edd, 0xec7f992, 0x219491e, 0x6e6259e, 0x4de236c, 0xb2148a0,
+ 0xfdd309b, 0x89700e8, 0x9f0bf80, 0x9ce51e4 },
+ { 0x301f17b, 0xe7ec421, 0x3bc5f4f, 0xa4b570a, 0x1285ee2, 0xc2b1b2a,
+ 0xc53db73, 0x5e86bc8, 0xf24fa90, 0xb65fcea, 0x08ab024, 0x9e74c56,
+ 0xf9ed877, 0x5c8003d, 0x4a2cbbc, 0xa632e9e }
+ },
+ {
+ { 0xc91c8b5, 0x32a4546, 0xc969363, 0xc122b5a, 0x3648b3a, 0xbbbec5e,
+ 0x25143b0, 0xd5a365e, 0x54157ce, 0xcf3e464, 0xf9bab64, 0x9712f04,
+ 0x04b4008, 0xc12d43a, 0x2edf1c7, 0x51932d7 },
+ { 0xb2f8470, 0xaef1655, 0x6c24ace, 0xaa8e3f3, 0x6b4e761, 0x7da75da,
+ 0xb90bca2, 0xd371827, 0x0afb45c, 0x84db450, 0xef46b5d, 0xae12045,
+ 0xd962f98, 0x91639a5, 0x72f2ac0, 0x669cbe6 }
+ },
+ {
+ { 0x83a4356, 0x851bb31, 0x9a1bf15, 0x7d436bf, 0x120b378, 0x46a3f0e,
+ 0x3f5b357, 0x9302abc, 0x93fef53, 0x1e06726, 0x5fd2ee9, 0xb12f4a9,
+ 0x7de9433, 0x94a884c, 0xa6f2874, 0x2645234 },
+ { 0xcdb8dfa, 0x6fb56f5, 0x9e0ee4e, 0x4a17dfc, 0x83ab01e, 0xe269d83,
+ 0xb77c10f, 0xda932da, 0x0321243, 0x463af0c, 0x16fc8a3, 0xbe1d682,
+ 0x48b39e3, 0x2eae3ea, 0x3b03e7b, 0x9423021 }
+ },
+ {
+ { 0xb22f28a, 0xaeb507c, 0x49a6b44, 0xa77458b, 0xc03dc17, 0x232ed5a,
+ 0x9c61ac6, 0x79dfc16, 0xcd71b93, 0x7c48be9, 0xc429cd9, 0x983d68a,
+ 0x98ae2c8, 0x7709c47, 0xa5df075, 0xe4765c0 },
+ { 0x3367f33, 0x23c4deb, 0x37d72a7, 0xbdf2b7e, 0x0af2d26, 0xbaab5c7,
+ 0xfd026ab, 0xd609f7f, 0x541b039, 0x23b72b2, 0x83be852, 0x8d06bac,
+ 0xcb23d1c, 0x911d4a9, 0xfb0dbd7, 0xeae815c }
+ },
+ {
+ { 0x2c33481, 0x487c35c, 0xb6136db, 0xffab636, 0xa3d3aa4, 0xccd4dae,
+ 0xc3704e0, 0x87149bb, 0xc0e8396, 0x9de8119, 0x58e7ca6, 0xd49357a,
+ 0x1562d75, 0x6878918, 0x5ab1fad, 0xc745381 },
+ { 0x02c9b91, 0x0f15798, 0xb1ddde5, 0x7ffc3f0, 0x6aae50d, 0xa01d5e0,
+ 0xe279873, 0x6a97e65, 0xb5b1b41, 0x4bcf42f, 0x32f5982, 0x1c6410f,
+ 0x50701c8, 0xd4f7600, 0x873b90d, 0xff02663 }
+ },
+},
+{
+ {
+ { 0xe5b2de2, 0xdc53ea2, 0x38acecb, 0x94b352d, 0x0d9d5e5, 0x37d960b,
+ 0x90bd997, 0xabd868f, 0x35a7376, 0x781668f, 0x10118bf, 0x043d597,
+ 0xf57928a, 0xd4da719, 0x983e46c, 0x01942f6 },
+ { 0x728bd76, 0xab97fc8, 0x4b5c1c5, 0x825956b, 0xc82a104, 0x202809f,
+ 0xc8e3132, 0xdb63e9c, 0xc2181af, 0xa41c701, 0x43e066a, 0xd280180,
+ 0x24044ce, 0xc734e41, 0x505193c, 0x4d9ab23 }
+ },
+ {
+ { 0xf9f0c3f, 0x0bcd42a, 0xb94a218, 0xda21a46, 0x0ffc788, 0xe55243c,
+ 0x47a5551, 0x318aae6, 0x79af9cb, 0x8c2938b, 0xec1dce5, 0x5d15232,
+ 0x8ad2e5c, 0x3d310ba, 0x94f792a, 0xd3d9724 },
+ { 0x12a9553, 0xdeb4ca1, 0xeb54d9d, 0x2f1ed04, 0x69fb7a1, 0xaa9c9cf,
+ 0x54dcd3a, 0xeb73c3a, 0xf5f201f, 0xee3eddc, 0xba7d234, 0x35f9e1c,
+ 0xd2e242f, 0x1d1d04c, 0x0df7515, 0x48df9d8 }
+ },
+ {
+ { 0xa81dd9a, 0x4ecc77d, 0x03aa015, 0xa6ac4bb, 0xbbc4fed, 0x7645842,
+ 0x9d6cf52, 0x9ae34cd, 0x5917e0b, 0xf8ff033, 0xc2cc175, 0x7c9da37,
+ 0xaaacfbe, 0x1e74dcc, 0x7999af8, 0xa8f2df0 },
+ { 0x102a466, 0xd06c4ea, 0xae190dd, 0x2156e87, 0xec4a863, 0xc95db8a,
+ 0x244a6fe, 0x49edffd, 0x904f81e, 0x110fae6, 0xa1cd104, 0xbaa3e50,
+ 0x0478b65, 0x5bd38a2, 0xdaefbcc, 0x2b57d05 }
+ },
+ {
+ { 0x86f4534, 0x1ce92ba, 0x414f5e3, 0xb2a8592, 0x9979436, 0xdd7a4c6,
+ 0x3f0add7, 0x7599aff, 0xe2d4f64, 0xe0ce4d3, 0x401a29f, 0x74475cc,
+ 0xa2377d9, 0xaef6541, 0x3f917b6, 0x54048f5 },
+ { 0x05312ec, 0x1b86b22, 0x31493cb, 0x779ba22, 0xaac9320, 0xc718369,
+ 0x617fce4, 0xeab01a8, 0xf7187fa, 0x17b1f10, 0xa1aca46, 0xe68eda0,
+ 0x2586342, 0x61033fe, 0x0b6ca43, 0xfc14e79 }
+ },
+ {
+ { 0x13d2491, 0x9f22319, 0x7997202, 0x66bdb53, 0x4617f34, 0x0bafb0c,
+ 0xf3bb7b3, 0x5917831, 0xb45bddb, 0x6feb2a6, 0x0202c19, 0x08662b3,
+ 0x05852f6, 0x0bc2b57, 0x91818c2, 0x2c00fd4 },
+ { 0xda37dac, 0xca7672c, 0x5a30865, 0xfe4c04c, 0x322e92a, 0x5f1399f,
+ 0x25b1beb, 0xe7d67ea, 0xdce7f68, 0xe08b014, 0xf2f2b3c, 0x24df52a,
+ 0x750ecd1, 0x2028b23, 0xc810a45, 0x9b25d4b }
+ },
+ {
+ { 0x7a9d799, 0xa35b715, 0x01f9c99, 0x6da1eb3, 0xe363ba8, 0x33ef91c,
+ 0xce140da, 0x21c0e2e, 0x158cd84, 0xb0b11bf, 0x93da438, 0x6a87442,
+ 0x3db585b, 0x924f10d, 0x10c6159, 0xf5ddd73 },
+ { 0x6a74c21, 0xb72dcb8, 0xcc8f79f, 0x6d14198, 0x9c5a8d6, 0x99f4b6c,
+ 0x90e135c, 0x0639688, 0x83f6385, 0x330edb8, 0x9079675, 0xe1a5a6b,
+ 0xb8f5fe0, 0x6e37fa8, 0x61dca1e, 0x60e2fd9 }
+ },
+ {
+ { 0x66c395e, 0xc6cb403, 0xb51d0f1, 0x03b21a7, 0xe693181, 0xbc478a5,
+ 0xc6cff33, 0x0017c2f, 0x39d8d1e, 0x740a5b8, 0x4d9ec6d, 0x3968d66,
+ 0xb0ef1b0, 0xfd53738, 0x1ed0a04, 0x73ca8fd },
+ { 0x75ab371, 0x4ace938, 0xddad7e9, 0xd602936, 0x750bcc2, 0x1f5424a,
+ 0x68c7a17, 0xfe09b36, 0x58341ec, 0x165f7de, 0x6ce61e5, 0x95b825a,
+ 0x66c83c4, 0x9d31e19, 0xcc5887b, 0x65b3e08 }
+ },
+ {
+ { 0x21482d1, 0xd37e932, 0x08b6380, 0x9af6597, 0x7d61e4b, 0x279426a,
+ 0x80997ad, 0x80dd0ec, 0xd5b76d4, 0x7239b0d, 0xe76c098, 0x92e6c73,
+ 0xeab3e1d, 0xeeb2321, 0xeb1a910, 0xa69c4a7 },
+ { 0x833d9ae, 0x46d6aa7, 0x572b0fe, 0x3ee6957, 0xcdb3d97, 0x44ccbed,
+ 0xcbea01b, 0x342f29d, 0x8926876, 0x0d518c5, 0x5585d2c, 0xaaabae7,
+ 0xe008f58, 0xc548c77, 0x21fab2c, 0x819e2fa }
+ },
+},
+{
+ {
+ { 0xc16e981, 0x468e149, 0x9ddbb7c, 0x286c790, 0xdb7a38a, 0x2a92d47,
+ 0x8a27cb2, 0xde614e6, 0xe5b0ab6, 0x8dc8822, 0xcf48565, 0x38441ae,
+ 0x089435b, 0x11ed5c9, 0x82d0d31, 0x2389286 },
+ { 0x72f2f31, 0xc6698d4, 0x56d76af, 0x295242c, 0xeba563b, 0x4099205,
+ 0x3ab7384, 0xae7de5a, 0xd0ed86c, 0xccdf127, 0x965c3c3, 0xb9b6d5b,
+ 0x2c31ad7, 0xe351a8f, 0xac12f13, 0xa761dd8 }
+ },
+ {
+ { 0xf171ab7, 0xda115dd, 0x401f93d, 0x2de17b1, 0x40964b4, 0x95019ca,
+ 0x65ba3c3, 0x169d1f4, 0x0090d08, 0x534a007, 0x82bf410, 0x805c5e2,
+ 0x65f8d90, 0x15dfe11, 0xca72456, 0x827a416 },
+ { 0x33a36c4, 0x5af8884, 0xd8ee604, 0x8bfa54c, 0x9ce290f, 0x08fd141,
+ 0x287b3a6, 0x2db5e8c, 0x03cdad2, 0xe5be981, 0xbf810b9, 0x155b874,
+ 0x670f473, 0x2ae42de, 0x7f74657, 0x2218584 }
+ },
+ {
+ { 0x23ffa43, 0x54b2a50, 0xa24d919, 0xcf87b16, 0x63524e8, 0x1ff5402,
+ 0x56d1e54, 0x73c94e0, 0x3899fb5, 0x7651552, 0x18723bf, 0x13a7214,
+ 0x3561517, 0x39afbdd, 0x9f2862e, 0x49b790a },
+ { 0x527d2ce, 0xc8c1f4f, 0x7609bb7, 0x1997aec, 0x02a3400, 0x583ad80,
+ 0x4f79706, 0xac2374e, 0x21b7183, 0xbf1f9a8, 0x6600fe0, 0x06158ab,
+ 0xbd56751, 0xfcc9b2e, 0xddaaec7, 0xe1de5ac }
+ },
+ {
+ { 0x788fdab, 0x230baa1, 0x7d04597, 0xf30860a, 0x99f4caa, 0xa2c7ece,
+ 0x6ad065e, 0xbd39f10, 0x3bef7bd, 0xfd92f5d, 0x96d2203, 0x6069fad,
+ 0xc4d9e0d, 0xbff38ca, 0x1fda313, 0x419a017 },
+ { 0x572f035, 0x5d77fd8, 0xb282b40, 0x5af99f2, 0x23facff, 0x7257d3b,
+ 0x58c90af, 0xf2ee223, 0x9b6a52a, 0xcc2687d, 0x302430e, 0x140892c,
+ 0x3ec4f38, 0xa934d5e, 0x3bd18be, 0xc087d7c }
+ },
+ {
+ { 0xa2c5ed7, 0x7e94138, 0x53610bf, 0xbc8ceef, 0xd86f803, 0xe89356b,
+ 0x5a55330, 0x9a3a380, 0x11ad648, 0xe894aba, 0xba95918, 0x2e68fba,
+ 0xfcad344, 0x643e2ba, 0x61640aa, 0x0dd0256 },
+ { 0xe25cbdd, 0xc02e479, 0x13a1b3f, 0xd78c4d8, 0xcca9692, 0xa6dae8f,
+ 0xe5de8a0, 0x3dd91e9, 0x764ea36, 0x78ae0ce, 0x85dbc5e, 0xb4ad999,
+ 0xe82a169, 0x967ff23, 0xbaee1fc, 0xaeb26ec }
+ },
+ {
+ { 0x9a6f90c, 0x8c50255, 0x0ea374a, 0x56e7abe, 0x56413b2, 0x675c722,
+ 0x946753f, 0xd3fc17e, 0xe235f7c, 0x28c4e1f, 0xb028eb0, 0xe209bcd,
+ 0x489fe88, 0x7d0f93a, 0x063706a, 0xb966a2e },
+ { 0x4a30319, 0xb6c228c, 0xca6d674, 0x6868efe, 0x057311a, 0x0610a70,
+ 0xbad7f89, 0x0808112, 0x1dd6181, 0x2a2462c, 0xb58e88a, 0x52ed9fe,
+ 0x33821a2, 0xbbff16f, 0x17f882a, 0xda53e96 }
+ },
+ {
+ { 0x8c30e5d, 0xb6ffca3, 0x5c905f5, 0xa90f991, 0xd753e88, 0x72fb200,
+ 0x7256c6a, 0xe509d4c, 0xd866500, 0x369e552, 0x33cf8ae, 0xee4b7e0,
+ 0xefcf6eb, 0x280d954, 0xd557f0e, 0x5b275d3 },
+ { 0xb5cecf8, 0xeb17211, 0xbdb2f8d, 0xd6ad50f, 0x35e04b7, 0x2478c7b,
+ 0xac73bd3, 0x97e7143, 0x4817e24, 0x09d6ede, 0x2c405e1, 0x68fea71,
+ 0x05f67a1, 0x34adbc9, 0x73edf99, 0xd20ab70 }
+ },
+ {
+ { 0x569f191, 0xe116a96, 0x4d6e29a, 0xb3f0bce, 0xf51dbab, 0x30b9e1a,
+ 0x346d276, 0x1dd36f3, 0x0749a27, 0x8315103, 0xab47f70, 0x242f148,
+ 0x5585681, 0xe8a5bcf, 0x5ed79ba, 0x8b80184 },
+ { 0x3894ad1, 0xa4042fd, 0x2b88bc6, 0x82f781d, 0xbe4c397, 0x2d34cac,
+ 0xdd99c9f, 0x8731aea, 0xef1d382, 0x0f95498, 0xdd0bbc9, 0xcaba2e1,
+ 0x54064e8, 0x78889e9, 0x61a8ab9, 0x8cd9c97 }
+ },
+},
+{
+ {
+ { 0xfa0459e, 0xf31f53f, 0x315cd6b, 0xf8742a1, 0xae64e97, 0xabe2f50,
+ 0x9b9da48, 0xbd78741, 0x51e526e, 0x4521a33, 0xe10ba45, 0xfa05935,
+ 0xe8f903c, 0x5c947e1, 0x5a754ee, 0x0aa47d1 },
+ { 0xd814825, 0xb2849ef, 0x5c9968d, 0x9c2a5d2, 0x04e634c, 0x24dbb26,
+ 0xdb38194, 0x33f3a4c, 0xc8a2b6b, 0xe04f609, 0xabbbfdb, 0xcaefd8e,
+ 0x404498b, 0x683119a, 0x8b21cbd, 0x24ab7a9 }
+ },
+ {
+ { 0x21fa2dd, 0x6f13269, 0xc10a4bc, 0xd79e61c, 0x4bd6d46, 0xac4b3ce,
+ 0xbd3f37b, 0x52459b6, 0xa396966, 0xce0f0a3, 0xa1ed488, 0x050d1d5,
+ 0xe0b17fa, 0x1b9c403, 0x04a2e66, 0xee1abd0 },
+ { 0x5cf3e3b, 0x97065c3, 0xbe33441, 0x6513d5f, 0x79047ae, 0xcd34634,
+ 0xfd22df1, 0x45cbb1c, 0x967b17c, 0x7a173ae, 0x2223cda, 0x75f5ba7,
+ 0xefe0a73, 0xe3d12db, 0xfd7adcf, 0x3b7f94d }
+ },
+ {
+ { 0xf1e9b7d, 0xd596a13, 0x6734e0c, 0x04f5bdd, 0x8be163a, 0x18b694f,
+ 0xd959fa3, 0x15620c7, 0x53d2a3b, 0x65fc2c5, 0xc4d36f2, 0xd44a364,
+ 0x268ceab, 0xc8b421f, 0xbfe2bd4, 0x564139a },
+ { 0x19d4633, 0xb524610, 0x6346934, 0x5ab3f88, 0x9819422, 0x96691fe,
+ 0x8b39b82, 0xdfdec89, 0x97cfb27, 0x84b1c79, 0x4d6d004, 0xe59a98d,
+ 0x12c350f, 0x5e5d0c6, 0xd415774, 0xb431220 }
+ },
+ {
+ { 0x6aae0a2, 0x3d0ca73, 0x48c2d8c, 0x7b1991f, 0x5cdae72, 0x00ae856,
+ 0xbd55128, 0xdbb6ca0, 0x45c82bf, 0x3c2ab2a, 0x79545ca, 0xea5a559,
+ 0xd5927d0, 0xeba9a26, 0x83257fc, 0xb52e401 },
+ { 0xca9650a, 0x55ed517, 0xe3ebff2, 0xbdaa081, 0x9f8831b, 0x8cf7ce4,
+ 0x6e3b8d3, 0x1d0b5bd, 0xd8fc869, 0xa314a9f, 0xb892bab, 0x07f2079,
+ 0xa0cc9d9, 0xb700dbf, 0x6dc0a39, 0x7105a08 }
+ },
+ {
+ { 0x8c7d901, 0x0c7e05d, 0xaf3182b, 0xa7ff681, 0xf9a0d06, 0xb88e3ca,
+ 0xc343b7f, 0xfe20a12, 0x03251f9, 0x9f02577, 0xc40c5eb, 0xf225ded,
+ 0xb208ea7, 0x50e0cec, 0xe6eeb65, 0x5b250f0 },
+ { 0x4806b6e, 0x807a153, 0xfa94139, 0xded120a, 0x49366fb, 0x237ddc7,
+ 0x5a34bcb, 0xdd3674e, 0x9c4a61d, 0xef6cdff, 0xb2fb896, 0x036194b,
+ 0x9528cd9, 0x3865953, 0x6936a52, 0x0723c59 }
+ },
+ {
+ { 0xe17719d, 0x1f84cd5, 0xc73b394, 0x545939b, 0x83e84e7, 0xefbf3c5,
+ 0xf77fd66, 0x6cc46f1, 0x1383ab8, 0xa629f59, 0xcd35cd2, 0x9177ffa,
+ 0x9dd411b, 0x039187f, 0x7b7eea8, 0xa9cf1cf },
+ { 0xac47e5d, 0xa3b105a, 0xd0a9da4, 0xa755bea, 0x73da15e, 0x50cfbae,
+ 0x60b628c, 0x9456cbc, 0x9b7a910, 0x7ffc362, 0xcd6d6a4, 0x30b5924,
+ 0x0b04ab6, 0x198629f, 0x624dea9, 0xc74609c }
+ },
+ {
+ { 0xaf12fa6, 0x27d4d77, 0x690aeb2, 0xdd8a216, 0xfe24417, 0xe48fc02,
+ 0x720e17e, 0x1970403, 0xce37b42, 0x95013fd, 0xde4bd9b, 0x06817d2,
+ 0x63d0ba2, 0xc5863e7, 0xa556f5d, 0xa1bafc0 },
+ { 0x410a78a, 0xf28ec7b, 0x0a01a63, 0x0dcac42, 0xb5bce11, 0xfcd3fa4,
+ 0xd278b89, 0x054d7e5, 0x5ce49e3, 0x5195db8, 0x2c73d96, 0x4c0b167,
+ 0x20a1bdb, 0xd943077, 0x59c77a7, 0x66fa8b3 }
+ },
+ {
+ { 0xd7462fe, 0xb9e93ae, 0x18dde4f, 0xbfe54b2, 0x3dbb08e, 0xaabb528,
+ 0x0e5fc45, 0x8c36702, 0x8e69be3, 0x3502888, 0xc12a11d, 0x6d2efc1,
+ 0xf265e30, 0xfce5ceb, 0x5742c7e, 0x58c8bb3 },
+ { 0xccf7fa0, 0x32e89dc, 0xdd020a4, 0xa811f33, 0x5129fe5, 0xa10d620,
+ 0xe4ed29b, 0x3841c88, 0xd8b1ea6, 0xf3303a9, 0x1781f58, 0xa9a0cad,
+ 0x8f3ef0b, 0x4502b38, 0x74c6d35, 0x2b7587e }
+ },
+},
+{
+ {
+ { 0x23ae7cd, 0xc6eaea1, 0x73c0caa, 0xa1884d4, 0xef1ea88, 0x901e76f,
+ 0xa14269d, 0xdb9935c, 0x947f1de, 0xe8b2486, 0xa657588, 0x4ad56f4,
+ 0x2913fb1, 0xe768054, 0x37600da, 0x2abff5d },
+ { 0xa81a797, 0xa814813, 0x46acb69, 0x63e76a4, 0x4ab8277, 0xb103839,
+ 0x9d8e759, 0x587de34, 0xddf62df, 0xdfaeb8d, 0x9239d49, 0x24fe1cf,
+ 0xe130d1c, 0x7de7409, 0x581d070, 0x3ecfef9 }
+ },
+ {
+ { 0xf87c72d, 0x8d177a0, 0x8c6d1de, 0xae7e581, 0x8cece85, 0x0077b5f,
+ 0x32d2187, 0x3824838, 0x6db2bd2, 0x49d8b15, 0xc8d85b9, 0xe9e5513,
+ 0xe05c53f, 0x63c410c, 0xd86f752, 0xceaf2fb },
+ { 0x93806c5, 0x0b432fe, 0x3d06c75, 0x18eb15d, 0x12cfc02, 0xcaad826,
+ 0x1e2d045, 0x581e040, 0x95edcfd, 0xd573cb5, 0xdbc66e3, 0xce71948,
+ 0xacc14ea, 0xcf68721, 0x6cac4dc, 0xf68bea2 }
+ },
+ {
+ { 0xcb74da2, 0xd8576af, 0xc433f46, 0x8771c29, 0xe2f5b8e, 0x7315af6,
+ 0xba33928, 0xc195481, 0x2fb1f94, 0xb77dcc2, 0xa610f75, 0xcb3e57c,
+ 0x53907df, 0xeb2a927, 0x23eff95, 0x916f149 },
+ { 0xb6cd291, 0xbb378e4, 0x2f13ce1, 0xa2a5e2b, 0xbcd00b0, 0xa8a0e60,
+ 0x682b75a, 0x5902741, 0x3f65a77, 0xa0882c9, 0xc93cfff, 0x2069f75,
+ 0x70c0cb9, 0x1ede405, 0x0d526c4, 0x13840c9 }
+ },
+ {
+ { 0x03ced48, 0xdc2caaa, 0xa0315be, 0x2079219, 0x3b1f642, 0xca49356,
+ 0xb0665f2, 0x0202dc7, 0xb7a5238, 0xe5d6bbd, 0x26eab32, 0x36fbd5e,
+ 0xf5819b4, 0xb3988f1, 0x4aa4d69, 0x5b15dc8 },
+ { 0x54e5c24, 0xa52feed, 0xe91a797, 0x927471b, 0xd57f677, 0xd119bfd,
+ 0x78e4c4f, 0xde38f7b, 0xb150bc3, 0xa7af516, 0x26b76c2, 0x403b21e,
+ 0x92300dc, 0x589067d, 0x066802a, 0x04e406a }
+ },
+ {
+ { 0xa9ca9bb, 0x28e7d09, 0xfccf4a0, 0xaa84fd5, 0x635b7ed, 0xdbe9fb8,
+ 0xd56fc7c, 0x9ede3f5, 0xb01cb29, 0xa4b5031, 0x7f93703, 0x584299d,
+ 0xb6fe825, 0xbd28868, 0x8b9c2d9, 0x1d385d4 },
+ { 0x822be80, 0x6606f4a, 0x626d0fd, 0xb5a0165, 0x14568ad, 0x9920a20,
+ 0x1c6d174, 0x7d430f4, 0xe02e9e9, 0xc243e16, 0xa6bd649, 0x367f1d2,
+ 0x71b8c36, 0x6939100, 0x4de2984, 0x2ede131 }
+ },
+ {
+ { 0x5beec32, 0xdc78187, 0xa525ff4, 0x1fff0cc, 0x676df34, 0x6e86425,
+ 0x3f638e1, 0x2b4e8a6, 0x9b1e59f, 0xc4991d2, 0x1589717, 0x399d001,
+ 0xbe041cd, 0x406464e, 0x9e65bb0, 0x901cb3d },
+ { 0xfb42307, 0xf5f4572, 0xf1b7307, 0xf81b3b0, 0xf2094d1, 0x8fb695c,
+ 0xdb56f7b, 0x7db4792, 0x5a794e0, 0x36836d5, 0x09bc879, 0x2da477b,
+ 0x1887c40, 0x1cdfadb, 0xf2699b6, 0x65dc6c2 }
+ },
+ {
+ { 0x4737972, 0x36f9f21, 0x7a387b0, 0x48f0c8b, 0x39a1d24, 0xa156ed3,
+ 0x0fed268, 0x375293a, 0x7ff75cb, 0xf679f48, 0x1cc9e62, 0xd15a00f,
+ 0x22c3877, 0x92a7dc7, 0x6fb0ed4, 0xe987063 },
+ { 0x16f5f3c, 0xfd8e59c, 0xaeeb48e, 0x375732e, 0xca1ab42, 0x2dd9213,
+ 0x9ffccea, 0xcb06209, 0xb23edfd, 0xfc611f6, 0x99b060e, 0x2716349,
+ 0x820de8a, 0xb938b5d, 0xeb49a32, 0x138f6e7 }
+ },
+ {
+ { 0xe485f70, 0x7feda63, 0xeb27b2c, 0x646380a, 0xc4511c7, 0xcf8fe32,
+ 0xff9406a, 0x2c68e1e, 0x20b6020, 0xa9f2fd9, 0x3b3e465, 0x1c98fc6,
+ 0x93e53aa, 0xb8dac35, 0xa750e96, 0x2fb47b6 },
+ { 0x1950bb3, 0xea373ef, 0x4ac7aec, 0x8156694, 0xb55b931, 0x8d6b3c2,
+ 0xb62ef7d, 0x5d13f2d, 0xab9182b, 0x4647f2a, 0x33bf07c, 0x8f56c5a,
+ 0xb35a221, 0xc5ab284, 0x5a46a6b, 0x0747ab7 }
+ },
+},
+{
+ {
+ { 0x86b85c5, 0x5b9236c, 0xc482448, 0x5967a0d, 0x7df6ae0, 0x397c955,
+ 0x5378f2b, 0xf83ee1c, 0x6e05dd1, 0xf82df65, 0x19d7c8b, 0x4c424f6,
+ 0xa6d5f2a, 0xa612550, 0x63c3ebf, 0xfe8482a },
+ { 0x0142c82, 0xcb8d403, 0x3679e6c, 0x08b0662, 0x3eca5ee, 0x3ea5146,
+ 0x1370500, 0x089eb3b, 0x5a0d306, 0xcbfb19c, 0x42a65bb, 0x2f68588,
+ 0xe51e119, 0xe3e1db5, 0x110895e, 0x2c150e7 }
+ },
+ {
+ { 0xf6d4c4c, 0xf323488, 0x63b87e2, 0x5fc931f, 0x35c759f, 0x8867da0,
+ 0x9746d4c, 0xb6f1eff, 0x990be0a, 0x8a8172d, 0x5c407b4, 0x1113eee,
+ 0x378ed8a, 0xd80dacf, 0x3fa7fd1, 0x99b57cf },
+ { 0x5176405, 0xf5bb6d9, 0x92e83b5, 0x6b8963a, 0x8a7ef8d, 0xac55b6b,
+ 0x6c1fbf0, 0xe73fa12, 0x60148df, 0xdb37560, 0xf3f1fba, 0x72f1a98,
+ 0xea550f2, 0x1f71d0a, 0x9544a87, 0xc3ea4f0 }
+ },
+ {
+ { 0x4322bf3, 0x5b09da2, 0x61264e1, 0x2a573d5, 0x803acc4, 0x93cb2e1,
+ 0xe502fc6, 0x397b4fb, 0x39e0ebc, 0xddfb212, 0xbbcbc57, 0xeccd8f5,
+ 0x4663788, 0x49d3bed, 0x1218df9, 0x37192aa },
+ { 0x2ffa3c6, 0x8a05bc9, 0x23ebf4d, 0xc38c281, 0xfe343a8, 0xc80d547,
+ 0x6c63516, 0xa8d5a5b, 0x8d8fa6b, 0xc5d8ce1, 0x24a87c0, 0xeb5e872,
+ 0x75bfa23, 0x9806e9e, 0x689469a, 0x11f0889 }
+ },
+ {
+ { 0x8e75666, 0x81005f6, 0xd349505, 0xb84d861, 0x9f321ea, 0xe083282,
+ 0xcfa33a1, 0xb751d7a, 0x067c550, 0x793cf6f, 0x1027e56, 0x073a6b2,
+ 0x66a6012, 0x53f40ee, 0xc210fa9, 0x70bfaa8 },
+ { 0xe4b5998, 0x1518e39, 0x24b8d9c, 0x8f0b530, 0xafdf923, 0xd91c281,
+ 0x24e3f69, 0xc5cfb28, 0x870871f, 0x63a529a, 0x2128dad, 0x3d3e887,
+ 0xcb30cce, 0xed658dc, 0xafb7bae, 0xf9373b9 }
+ },
+ {
+ { 0xde58ed2, 0x22d4dbe, 0x03f8789, 0x4fefc1d, 0x344817f, 0x6b0a1fe,
+ 0xa56b0b2, 0x96bef40, 0xda249fa, 0x32684ee, 0x524a91b, 0x8298864,
+ 0x0c736a1, 0xa958baf, 0xef2f3e5, 0xd033a7d },
+ { 0x43f4d6a, 0x5be3edc, 0x9c89abb, 0x326a39d, 0x55d997a, 0x90c44f7,
+ 0x6e966c2, 0x2058106, 0x6548038, 0xdbae490, 0xd473fc1, 0xac7bc97,
+ 0x4b2603a, 0xb34488b, 0x5e9bb98, 0x27aea27 }
+ },
+ {
+ { 0x1b88773, 0xa59e728, 0x0c241f6, 0xe2f05d4, 0x4e75749, 0xa56229e,
+ 0x1b10705, 0x8f00c0b, 0x19394d3, 0x8559946, 0xaaf5e32, 0x0d7e352,
+ 0x787b8ea, 0x526c462, 0xa179d48, 0x89297d9 },
+ { 0xef43892, 0xeff17e6, 0x221f841, 0x17091eb, 0x4a4b848, 0x82f5eb3,
+ 0x8eb7b76, 0x6bea477, 0x76c536c, 0x21f2271, 0x96c81bb, 0xd9ef2c8,
+ 0x54bf4d3, 0x7c27546, 0xd7c28c8, 0x9dd4662 }
+ },
+ {
+ { 0x20e1a6b, 0xe7fff00, 0xa08d467, 0x26a35c6, 0x3248c91, 0xb3c773d,
+ 0xba7d935, 0xa646615, 0xb0d26fa, 0xa91f453, 0x60c6d32, 0xdcf9c34,
+ 0x9e3e3dc, 0x6366861, 0xf30f3e2, 0x3012813 },
+ { 0xc2fc61a, 0xac6623d, 0x2bfd2ff, 0x108dc25, 0x231d6ea, 0xd7f5c0d,
+ 0xad1107e, 0xa904f9a, 0x0d1e9c8, 0x46941c2, 0xc810cf2, 0xe5b6451,
+ 0x4f511d1, 0xaba8e67, 0x08373fe, 0x5b4b94f }
+ },
+ {
+ { 0x849c230, 0x002d4e2, 0xd8ba391, 0x9bed0ef, 0x828e319, 0x745e0c0,
+ 0xca58de2, 0xcd40907, 0x1abaa4a, 0x2c87ab1, 0xdb64391, 0x3c17a97,
+ 0x86c72d2, 0x36b184e, 0x485f7aa, 0xb03d202 },
+ { 0xde24aba, 0x2b6b79b, 0x2325fb2, 0xdcb7854, 0x66ebae2, 0xf5d1db9,
+ 0x903840a, 0x35a4d5b, 0x190e9da, 0x7afeb09, 0x35c1792, 0x1818f6a,
+ 0x3faa269, 0x90091fa, 0x2570235, 0xc4ccff6 }
+ },
+},
+{
+ {
+ { 0xec85940, 0xa177619, 0x7ef7eee, 0xfca24db, 0x7a90c11, 0xb2450f3,
+ 0xdbf4f85, 0x29d256d, 0x51316c3, 0x920c8d0, 0x04474da, 0x2f7f7ba,
+ 0x2ec9a0b, 0x308117f, 0xd0d2085, 0xd0a231a },
+ { 0x7ab641d, 0xf3288fc, 0x9f4fa32, 0xc68bade, 0xbbf8253, 0x768f014,
+ 0xc0a33f0, 0x5eff260, 0x6bb93ce, 0xc71b453, 0x680697f, 0xa71d045,
+ 0xce72bc3, 0xb62444c, 0xd1379f3, 0x11f03e8 }
+ },
+ {
+ { 0xc16df92, 0x1f54789, 0xe3ed142, 0x874c642, 0xfa2a9f1, 0x6699f60,
+ 0x3fecfc1, 0xbd1b8d3, 0x8a3d953, 0x59682d5, 0x4a36b81, 0xf17c021,
+ 0x181a666, 0xeb9621d, 0x3cf1ad8, 0x7c2c3ab },
+ { 0xe529f7c, 0xe6888c3, 0xb355315, 0x197b66a, 0x83e31ac, 0x63b558a,
+ 0x891c68e, 0x4aa7bc5, 0x592e360, 0xc17d989, 0x1363666, 0xc750a29,
+ 0x4909ac0, 0x0d53470, 0x4594a10, 0xd6d0272 }
+ },
+ {
+ { 0x3fbb635, 0x35c541b, 0x5982afa, 0x50016d0, 0x96b0ca0, 0x58ebce4,
+ 0x577ea56, 0xb940027, 0xe38480f, 0xf29d305, 0xebd6a2c, 0x43705b0,
+ 0xe90c639, 0x0e4acda, 0xf56e05e, 0xbe94a29 },
+ { 0x30659ad, 0xc61f4a0, 0xc402211, 0x39074ad, 0x51b621d, 0xfe0d8d5,
+ 0xd1d5222, 0x2d02e8d, 0x46c2683, 0x05ece3c, 0xc689d41, 0xf70705a,
+ 0x4d837bf, 0xe3caf44, 0x75ba6d0, 0xfda0584 }
+ },
+ {
+ { 0xcb7d458, 0x1098163, 0xf5ba834, 0x12b645f, 0x28af72c, 0x70a3181,
+ 0xf32e5dd, 0x5f4727e, 0x10a21b4, 0x7cbae15, 0x6785389, 0xa80bf80,
+ 0xb8f93b7, 0x9827402, 0x08349da, 0xe385f82 },
+ { 0x9589f6e, 0x2d05461, 0xe7c0191, 0x6aa5b26, 0xbd5574d, 0xe79ae12,
+ 0x4148e61, 0x5d13f91, 0x13716ff, 0x7b2be0f, 0x80bb81f, 0x82b0fe6,
+ 0x3e2569c, 0x697633c, 0x873f8b3, 0x6c1f083 }
+ },
+ {
+ { 0x0be1674, 0x6e26d85, 0xab8044f, 0xe4e47f6, 0x82fc434, 0xfdf46e8,
+ 0xc89cadc, 0x639ae2c, 0x4b85bdc, 0x2244a52, 0xb7cf4ea, 0xb1e4790,
+ 0x7e0bb8f, 0x51dce03, 0x2716cee, 0xdd14335 },
+ { 0x8e8841d, 0x1c049b4, 0xb97c621, 0x6bf26dc, 0xba01178, 0x21d6255,
+ 0x8e4f0e4, 0x477258a, 0x68f8ef1, 0xf5e437e, 0x8b03e1e, 0xd118fbc,
+ 0xe1c91b3, 0x3d6bc51, 0xd5b6907, 0xa259486 }
+ },
+ {
+ { 0x7b6f5dc, 0x4159cfc, 0x493694a, 0x05a52b3, 0x83b8883, 0xeeb511c,
+ 0x2b06400, 0x19d79e4, 0x738f37e, 0x8e503a2, 0x5a94ad9, 0xa30e579,
+ 0x262618d, 0x3981c75, 0x2dcba19, 0x06b6c69 },
+ { 0x4d1b051, 0xd7242ee, 0x3b350c4, 0x6274ccb, 0xf540019, 0x66df0bb,
+ 0x5ae12d5, 0x4d66be6, 0x1049cba, 0xcea2960, 0x8df84b3, 0x4047339,
+ 0x75a31c8, 0x7d6c96b, 0x874174c, 0xbb80159 }
+ },
+ {
+ { 0x59f1aa4, 0xf0f7be0, 0xdcff451, 0x798f39a, 0x8014e1e, 0x96763ff,
+ 0x09cc5ec, 0x03987a8, 0x893650a, 0x4919656, 0x75e24df, 0x92e8eef,
+ 0xe89d639, 0x54e97cd, 0x7682cc0, 0x8081d06 },
+ { 0xa8ceb71, 0xb9ef41a, 0xa4d7aaa, 0xb8173a4, 0xc54ee10, 0x93d81b1,
+ 0x70a445a, 0xabe1805, 0x64d569d, 0xac0ff97, 0x3e570be, 0x86946b2,
+ 0x4180641, 0x8e11dd2, 0x99f67dc, 0x3d0b33c }
+ },
+ {
+ { 0x48bf5a4, 0x2c9637e, 0xccaf112, 0x9fdec19, 0x5c42023, 0xe5cde9d,
+ 0x878f0cc, 0x9869620, 0x1fe6eba, 0xcf970a2, 0x54e678b, 0x1df5ec8,
+ 0x28d00dd, 0x4667f01, 0xb0b3fa8, 0xfa7260d },
+ { 0xb34239b, 0x6bd2895, 0x2d2a50d, 0x04c8bc5, 0x6cb23e2, 0x14e55ef,
+ 0x3a278d5, 0x6440c27, 0x2193046, 0xf4b12e3, 0x5dd4c08, 0x46adf64,
+ 0x4656e8c, 0x70e2998, 0xe4acd44, 0xe7b36ea }
+ },
+},
+{
+ {
+ { 0x16cf664, 0xea64a57, 0x26fd357, 0x8497ee4, 0x814e851, 0x44d94b4,
+ 0x5a6a2cf, 0xf4aac22, 0x80c301f, 0x947b309, 0x7865383, 0xf390ba1,
+ 0xd1773d3, 0x16c4fc6, 0x6227220, 0x61b9814 },
+ { 0x1dd0270, 0x07dd03a, 0x0f160df, 0x290ca82, 0x44ba955, 0x8f22054,
+ 0x0b6f1b3, 0x4e85e45, 0xad78089, 0xfd73ce9, 0x2f2cb0e, 0x67c1270,
+ 0xee33a61, 0xa7de0d7, 0x6553261, 0x6a811cc }
+ },
+ {
+ { 0x2d0a427, 0x5ef0574, 0x220a341, 0xe8d2e95, 0x8044886, 0xdd28cbf,
+ 0xa1aa58b, 0xdad7b4b, 0x8ec901b, 0xb28f373, 0x5bbe3db, 0x1841a93,
+ 0xa075fee, 0x8fd7cd1, 0xc0d3cdd, 0x93b603f },
+ { 0x5edd859, 0xca54fd5, 0x64ed687, 0xa4cb05f, 0xed1a3d7, 0x3138668,
+ 0xee32be5, 0x1224fda, 0xc80aeb3, 0xf1f532b, 0xe8d4d69, 0xa4f65d0,
+ 0x5905fe5, 0xc697a01, 0x6690ce4, 0x514da7a }
+ },
+ {
+ { 0x3de4a55, 0xc7b9af8, 0xb318d93, 0xc79bad7, 0xf5b1c83, 0x1808071,
+ 0xb965b16, 0x92112ef, 0x7bb740a, 0x655ab38, 0x384ff87, 0x53dbc8b,
+ 0x72dc6f2, 0xd153c28, 0x99c7819, 0x2ec20e1 },
+ { 0x3b854b5, 0x65e46ea, 0xc711db5, 0x272d5ae, 0x26e19e8, 0xfd1bb53,
+ 0x3dc0665, 0x33280b8, 0xb8f1c4a, 0x95b986e, 0xa685c4a, 0xa671fc4,
+ 0x83bdbbf, 0xa03cbd5, 0xab77544, 0xd329402 }
+ },
+ {
+ { 0x8e62b35, 0x40fa651, 0xf9e55a6, 0x3913b11, 0x5270a41, 0x4e8089b,
+ 0x80d1886, 0x565f52a, 0x512749b, 0x93b5f05, 0x141c547, 0x35c869c,
+ 0xf86717f, 0x9a44a1a, 0x9c2b2cb, 0x2b9984b },
+ { 0x4952322, 0x61fb607, 0x7af1464, 0x2d4072f, 0x600eb30, 0x9b2fa8c,
+ 0xf10668e, 0x6071fb7, 0x90634ca, 0x27cc24d, 0x471d32b, 0x3875bc2,
+ 0xa11210c, 0x678590b, 0xfcc5a9a, 0x352b447 }
+ },
+ {
+ { 0x5fa3200, 0x795d541, 0xa92949f, 0xadaa557, 0x3cc88c4, 0x42fff06,
+ 0x71b68a5, 0x26d6831, 0xe67ad8c, 0x3286549, 0x86396b2, 0x5bf6363,
+ 0xe12c8ea, 0x41229b6, 0x748952e, 0x05320c9 },
+ { 0x900b460, 0xae36b63, 0xf2b6aff, 0x9354ff2, 0x065ee0c, 0x10b810b,
+ 0xcc8bb38, 0x4d6925f, 0x7a22f14, 0x31c03fd, 0x57544e8, 0x76b7f44,
+ 0xc0eed26, 0x3a9123c, 0xe0cd1cc, 0x77acd67 }
+ },
+ {
+ { 0x07ec527, 0x2e90530, 0x62937cf, 0x32388ef, 0xe229188, 0xa445389,
+ 0x33bcebe, 0xa44b68e, 0x4c4e701, 0x5a8722e, 0xcf07e41, 0xfd066e8,
+ 0x95fab62, 0xa3c1a4f, 0xe542f24, 0xb4d6a1b },
+ { 0xaf6c9b5, 0xe6a92e4, 0xc83d61d, 0x9452484, 0x0062276, 0x422b55b,
+ 0x5279688, 0x261973a, 0x3999fb2, 0xde8be26, 0x7b029ca, 0x64e9628,
+ 0x06897d4, 0xd8edfaa, 0x6955511, 0x408319c }
+ },
+ {
+ { 0x50a5632, 0xff6baed, 0x5c5885a, 0x922b7d0, 0x1b45864, 0xdf0f3b3,
+ 0xc04340e, 0x27e49c0, 0x122c447, 0x618c566, 0xeafee7e, 0x7863a38,
+ 0xb828cb0, 0x7143aff, 0xf9d054e, 0x51fcf4c },
+ { 0x27f5e09, 0xc4a4b31, 0x90be2bd, 0x021f47a, 0x7ab956d, 0x1a06019,
+ 0x86ea86b, 0xe77fa15, 0xd550ef3, 0x9ccde87, 0x6532654, 0x7dee53a,
+ 0xe826387, 0x8b4f060, 0xad077b5, 0xda38637 }
+ },
+ {
+ { 0x0e9fac8, 0xbc901b3, 0x6fb2a2a, 0xfa08204, 0x5e04efc, 0x92f68ab,
+ 0x9ac12d0, 0x184a30a, 0xb25d479, 0x1aa11aa, 0x0f03161, 0x8bc5f4c,
+ 0xcfc8817, 0x7e3a083, 0x597f93f, 0x84d9355 },
+ { 0x239abc6, 0xc014478, 0x8d37b04, 0xb226b09, 0xf575789, 0xb056942,
+ 0xba745eb, 0x816b95a, 0xb98ddb6, 0x2a49d39, 0x291af81, 0xc41ca26,
+ 0xab26347, 0xb3afe99, 0x604b638, 0x59c31bc }
+ },
+},
+{
+ {
+ { 0xc42befd, 0xa16a8b9, 0x2052f00, 0x731c9c9, 0x1f5dfa0, 0x1ad49b4,
+ 0xbffce36, 0x7a289e3, 0x0c79cf1, 0x868fac0, 0x86721ab, 0x6d6d284,
+ 0xe726c94, 0x590f928, 0x51f3841, 0x0e802cb },
+ { 0x0b694bc, 0x6a6a57a, 0x8120fb8, 0xb9bb0cd, 0x9c05826, 0xad96ac7,
+ 0x7768df0, 0x294da8c, 0xb56c6c6, 0xfe32311, 0xae8d050, 0x291c2c6,
+ 0xe7db4c9, 0x1c765e7, 0xd65f9f7, 0xe058298 }
+ },
+ {
+ { 0x7e8d345, 0x4bfa85b, 0xde1dfc8, 0xa04ef95, 0x324ace3, 0xb5f7f21,
+ 0x574b14a, 0x4b350a1, 0xf8e5c8d, 0x11436bf, 0x7642369, 0x1c789f9,
+ 0xfb623ce, 0xeb5e335, 0x442d562, 0x9deacd2 },
+ { 0x531ee71, 0x4ff989f, 0xaacb52a, 0x43e2c49, 0x85bfadc, 0xa763198,
+ 0xd0161a0, 0x08b6d5c, 0x541f197, 0x010e3fa, 0x3279a16, 0x83a589e,
+ 0x6309f9b, 0xf099137, 0xf1cea10, 0x07c093b }
+ },
+ {
+ { 0x33d2192, 0x1ce3f0f, 0xc37ce73, 0x07b559a, 0x207be27, 0xaa2ad38,
+ 0x7ed93de, 0x84f053b, 0x3b98a4b, 0xbc5c797, 0x63aa9b9, 0xc923461,
+ 0x231a10c, 0x807cc16, 0xa061209, 0x8ffdf57 },
+ { 0x497070f, 0xa9ca741, 0xd113b3a, 0xf608ec9, 0x8d0384d, 0x5132726,
+ 0xf5ec307, 0x96686ac, 0x71c4665, 0x437bbbd, 0x7c379ca, 0xdef09d5,
+ 0x621747c, 0xf8be033, 0x8ae8047, 0x2775b37 }
+ },
+ {
+ { 0xb2c4fc2, 0x4009798, 0x203772e, 0x148d7d1, 0xf8423fb, 0x9d9392d,
+ 0xaf8cef4, 0xa5bd72e, 0x4380b53, 0x579d58d, 0x8c39d24, 0x2ff88f1,
+ 0x5706466, 0x9ca2fbc, 0x1e56af2, 0xb42987d },
+ { 0x5d94ea8, 0xcc2556e, 0x5369d76, 0x4e5c2b3, 0x2a94f9c, 0x5de3574,
+ 0x5cb4145, 0x8d068c9, 0x51bfcbf, 0x4d553ff, 0x8a23fce, 0x3ab7164,
+ 0xd0fa7f3, 0xc9cb3a9, 0xed9ced1, 0xf81209b }
+ },
+ {
+ { 0xe5b66f5, 0xde7356e, 0xe8a25e0, 0x7b2bf1a, 0x2c9b725, 0x09a444a,
+ 0x4906c55, 0xfd8a2f4, 0x82514f3, 0x409cc80, 0x28999a9, 0x47e0099,
+ 0x6a312f4, 0x0a582a6, 0xf6723de, 0xf7946f8 },
+ { 0x92d8aff, 0xa55f6ba, 0xa544b1c, 0xb62c3c8, 0x5c16a94, 0xa1d1411,
+ 0x2ad5e71, 0xc378319, 0x06b1dd6, 0x13d7847, 0xee7ff55, 0x99005f8,
+ 0x8a1e7d8, 0xfb5ea3f, 0xb4cac39, 0xdc7f53c }
+ },
+ {
+ { 0x36e3794, 0x482abaf, 0xc74684f, 0xc23e9e5, 0xf1629be, 0x4544cf6,
+ 0x2f40374, 0xd8a8ee5, 0xf433bdb, 0x2eea87f, 0xae9990e, 0x489a99c,
+ 0x54b23b6, 0xefc131e, 0x8600270, 0x25fe699 },
+ { 0xc059a7e, 0x03d2d9e, 0x6979c3c, 0xa6445b5, 0x9bfbcea, 0x491a10c,
+ 0xe937af1, 0x15b5974, 0x797c7fc, 0x4be8002, 0xfedcfee, 0xbed8a49,
+ 0xa9e0691, 0x35751ce, 0x9ef5982, 0xe9a9fa3 }
+ },
+ {
+ { 0x3065de7, 0xeffeaca, 0xac4d4e2, 0x841d544, 0xcaf199f, 0x8144679,
+ 0x443967a, 0x98cf4f9, 0xf33183c, 0x8cd57f4, 0xc1b15eb, 0x390832a,
+ 0xa53b500, 0xc4b1fea, 0xdff24b5, 0xd762a10 },
+ { 0xb0ee2a9, 0xccd3eed, 0x362d485, 0xa6dd4a9, 0xf1d047a, 0xeb4ff26,
+ 0x23860fc, 0xc0771fd, 0x4b64114, 0xdbb4e39, 0x4d29b29, 0x2ff3f24,
+ 0x387b365, 0x9cac005, 0xde5994a, 0x05b7aa6 }
+ },
+ {
+ { 0xc03dd63, 0x5e71752, 0xbc74687, 0xad10fe9, 0x54c76ab, 0x51a5b0c,
+ 0x1f586d4, 0x763fd50, 0x816048b, 0xc7bd5ce, 0x3f744dc, 0x8fc83d2,
+ 0x109df9a, 0x0561802, 0xccf0e43, 0x18fb01f },
+ { 0x038ab23, 0xe4606fc, 0xa664c98, 0x5878f1f, 0x5da7356, 0x3aedbbd,
+ 0x516746a, 0x3c578f5, 0x1a17210, 0x259477f, 0x028248f, 0xc7a869d,
+ 0x48cbf95, 0x6517a61, 0x3d04d47, 0xbc5f91d }
+ },
+},
+{
+ {
+ { 0x083ca53, 0x15fd9a9, 0x2697ca6, 0x1161da0, 0x56b676c, 0xf516af3,
+ 0x75eec13, 0x8a420d5, 0x1a9526b, 0x72d6742, 0x76b463f, 0x8d8c29e,
+ 0x8815627, 0x38a4f58, 0xe0650f9, 0xf7e528b },
+ { 0x382edca, 0x2cfa78e, 0xc4ad83c, 0x638d183, 0xe4a0119, 0x96d3b9d,
+ 0xa7c1101, 0x5769ccb, 0x2b8d04a, 0xc3b3b79, 0x4951bde, 0x96212f6,
+ 0x481161e, 0xad7905a, 0x41c5edf, 0x8fd6762 }
+ },
+ {
+ { 0x39d6cde, 0xf7b0635, 0x115a84a, 0x69d0549, 0xcbd9fe4, 0x4a976c6,
+ 0x950ff96, 0xc92953f, 0x654d127, 0x1d7f0fe, 0xda0f75d, 0x7293870,
+ 0xcf2277f, 0x7bb3652, 0x834484f, 0x64798c9 },
+ { 0xac3a76c, 0xb94d8bf, 0x7ff776b, 0xf5721a9, 0x2722e31, 0x23a6e9f,
+ 0x9a5c034, 0xe9da996, 0x456ebc3, 0xb9bbf83, 0x96956a4, 0x239f58a,
+ 0x18b7f00, 0x8b75beb, 0xa51cb97, 0x6c2b5b8 }
+ },
+ {
+ { 0x7eb41f3, 0x78b1c62, 0x17c4352, 0x0638fcf, 0x0c5709c, 0x939edd8,
+ 0xedc906c, 0x0a8dfc3, 0xefb01ed, 0x3942f47, 0x49986fe, 0x4c82757,
+ 0x4dffa57, 0x792545c, 0x6c3ff26, 0xeee6883 },
+ { 0x12b1218, 0x824d08e, 0x902457f, 0x515a478, 0xbae55b3, 0xc70cc9c,
+ 0xbcef9d4, 0x1240737, 0x2f9db7f, 0xf22e616, 0x91f8da2, 0x98c4f02,
+ 0xafaaa67, 0xa89219c, 0xe7d27e2, 0xf35fd87 }
+ },
+ {
+ { 0x01b80d0, 0x19b0cd7, 0xf9aebd1, 0x3d7e29d, 0x0477cbc, 0xd39c9ca,
+ 0x5ff0d3d, 0xac0f615, 0x520fd01, 0x8a51993, 0xb22d6fb, 0x508ff54,
+ 0x318d3ab, 0x8786c47, 0x4a683f8, 0x4312c46 },
+ { 0x95359f6, 0x73b1d39, 0x963011e, 0x0d94fa5, 0x9bfe83e, 0x5723af2,
+ 0x6841df3, 0xafa9001, 0xb7c498a, 0x791e92a, 0x7ea4253, 0xbc931ad,
+ 0xb783c06, 0x438e016, 0x2ca662b, 0x1347db2 }
+ },
+ {
+ { 0xfbaa861, 0x41df37d, 0x329e4de, 0x98ecb23, 0x507e018, 0xdaf1560,
+ 0xb088e32, 0xa902269, 0xe4cab2f, 0xad898a5, 0x02c1e1b, 0xd84e9ed,
+ 0x8488af3, 0xc20a5d5, 0x6cc77c6, 0xc7165af },
+ { 0xdeb7461, 0x8526f3a, 0x4a2d332, 0x03577b1, 0xe4760b5, 0x28e469d,
+ 0xb276266, 0x442c7f9, 0xf9c90fa, 0x90d5c77, 0x3e211bd, 0x7aa8716,
+ 0x5decfd6, 0x56d8ff0, 0xee23e6e, 0xa204b56 }
+ },
+ {
+ { 0x4aceafc, 0x2e4374e, 0x6fcd5e5, 0x978743b, 0xc4855ca, 0xa0f6345,
+ 0xe98074b, 0x9bc7e4f, 0xc33d08a, 0x3835d57, 0x6f00566, 0xeec7c8b,
+ 0x1acf55c, 0x71628a2, 0x97fb19e, 0x5da3750 },
+ { 0x01a7125, 0x6904a8e, 0xe6e3780, 0xad33c85, 0xc19f94a, 0x1702928,
+ 0x7c04b3d, 0xb424ff2, 0x19e2ba3, 0xb212e39, 0xc9af4c9, 0x4cca8e8,
+ 0xfd9bf0e, 0x98ab7ae, 0x9799db5, 0x21d245d }
+ },
+ {
+ { 0xec08806, 0x6b034dc, 0xb40f2d9, 0xfd763f2, 0x29cb906, 0x5e16de0,
+ 0x8a0e16a, 0x02b7014, 0xe071e12, 0x463c8ee, 0x25ad509, 0x6447281,
+ 0xdc0e07a, 0x9ee6f2d, 0x68d4d97, 0x188895c },
+ { 0xb27f971, 0x092fff3, 0xc9b7722, 0xb3c159f, 0x3cae42d, 0xe27d8ff,
+ 0xe87071d, 0xf8a5ed6, 0x607ebd2, 0x318388f, 0x53486f1, 0x924967b,
+ 0x7c46e1f, 0x7730494, 0xf21d196, 0xf279c60 }
+ },
+ {
+ { 0x84f3201, 0xef2bc03, 0x1f94c51, 0xf8750c7, 0x986ec65, 0xbaa4f5a,
+ 0x2732a33, 0x6f8a5de, 0x299e365, 0x0f13d80, 0xe85261f, 0x2709530,
+ 0xf527d56, 0x097d922, 0xbe1f3f8, 0x4969687 },
+ { 0x3e1708d, 0x9f3f504, 0x4aa4be4, 0xac67b87, 0x320a87e, 0x75fb042,
+ 0x6e2cad6, 0xa361ad3, 0x203e9f6, 0xcb01470, 0xc9b76c6, 0xe3807b7,
+ 0xb907c09, 0xf086833, 0x7e85a01, 0xe9bed3c }
+ },
+},
+{
+ {
+ { 0x91780c7, 0xa7ea989, 0xd2476b6, 0x04e4ecc, 0xc494b68, 0x0af9f58,
+ 0xdee64fd, 0xe0f269f, 0x021bd26, 0x85a61f6, 0xb5d284b, 0xc265c35,
+ 0x3775afd, 0x58755ea, 0x2ecf2c6, 0x617f174 },
+ { 0x5ec556a, 0x50109e2, 0xfd57e39, 0x235366b, 0x44b6b2e, 0x7b3c976,
+ 0xb2b7b9c, 0xf7f9e82, 0x0ec6409, 0xb6196ab, 0x0a20d9e, 0x88f1d16,
+ 0x586f761, 0xe3be3b4, 0xe26395d, 0x9983c26 }
+ },
+ {
+ { 0x6909ee2, 0x1d7605c, 0x995ec8a, 0xfc4d970, 0xcf2b361, 0x2d82e9d,
+ 0x1225f55, 0x07f0ef6, 0xaee9c55, 0xa240c13, 0x5627b54, 0xd449d1e,
+ 0x3a44575, 0x07164a7, 0xbd4bd71, 0x61a15fd },
+ { 0xd3a9fe4, 0x30696b9, 0x7e7e326, 0x68308c7, 0xce0b8c8, 0x3ac222b,
+ 0x304db8e, 0x83ee319, 0x5e5db0b, 0xeca503b, 0xb1c6539, 0x78a8dce,
+ 0x2d256bc, 0x4a8b05e, 0xbd9fd57, 0xa1c3cb8 }
+ },
+ {
+ { 0xd95aa96, 0x5685531, 0x6bd51ff, 0xc6f1174, 0xc9c2343, 0xb38308a,
+ 0x2921841, 0x52ee64a, 0x78f3b01, 0x60809c4, 0xae403ac, 0xe297a99,
+ 0xcb09a5b, 0x7edc18f, 0x81ac92a, 0x4808bcb },
+ { 0x34dc89a, 0x3ec1bb2, 0x4e39da5, 0x1e8b42e, 0xe526486, 0xde67d5e,
+ 0x76f0684, 0x2376548, 0x285a3dd, 0x0a583bd, 0xfe9b009, 0x3d8b87d,
+ 0x0413979, 0x45bd736, 0x38a727f, 0xb5d5f90 }
+ },
+ {
+ { 0x4bde3ee, 0x7b8820f, 0x24d5170, 0xea712ef, 0xdf6ec7b, 0x517f88c,
+ 0x983ea9a, 0xb15cecf, 0x31a4592, 0x9eeee44, 0xebb013e, 0x786c784,
+ 0x1f4e15d, 0x2f06cb3, 0x4f4fda1, 0x5603fd8 },
+ { 0x9e1321f, 0xf6790e9, 0x74a4c09, 0x274c66a, 0x9a41a4e, 0xa4b70b4,
+ 0xada5157, 0x7700bdd, 0x51be8dc, 0xe54a60d, 0x1a477e0, 0xfaf9276,
+ 0xb027eac, 0x6661c72, 0x280b917, 0x50e2340 }
+ },
+ {
+ { 0x96ec123, 0x635f40f, 0x7a766a4, 0x4a33133, 0xb935587, 0x9ce4416,
+ 0x95d97e4, 0xbb6e1f5, 0x9d4197d, 0x2614723, 0x490e896, 0xabd4478,
+ 0x8bba895, 0xf6a1b2a, 0x5e27a45, 0x401fa40 },
+ { 0x0620900, 0x7354ba5, 0x385678b, 0xc443a29, 0x53cf5fa, 0x48aba10,
+ 0xbbe152d, 0xd67e723, 0x2a63d68, 0x4b858e0, 0x72be4ee, 0x174e1ee,
+ 0x9ab8d46, 0xad0fbb3, 0xce17dd7, 0xa0fdffb }
+ },
+ {
+ { 0x9c46fd8, 0xa1ea325, 0x9fb96ef, 0xeca122e, 0x6767acd, 0xf9074a2,
+ 0x2787082, 0x9b004a2, 0x7f3ba8e, 0x389f807, 0x0d5aabe, 0x6463de9,
+ 0xb090585, 0xf30ceaa, 0x5634ab8, 0x71b31e8 },
+ { 0xaf02aed, 0x0dee65c, 0x20ac252, 0x506886e, 0x86b8a59, 0x0665f78,
+ 0xf2bb328, 0xb9b784d, 0xdc6b089, 0x46e443a, 0x66c27fd, 0x3d5de19,
+ 0xf0fde70, 0x0419265, 0x2b5c034, 0xed94612 }
+ },
+ {
+ { 0x13b0056, 0x5a52ad2, 0xb909ee3, 0x9fbeb92, 0xbdaab08, 0xb42ba18,
+ 0xffc8a77, 0xec127c4, 0xfda906a, 0xc6d2985, 0x994bbe7, 0x5355547,
+ 0x9cdfd62, 0xa7470c0, 0xd2e675a, 0x31a3971 },
+ { 0xcc8b356, 0x8d8311c, 0x01b4372, 0xabb0bf8, 0x0294566, 0x33c1cad,
+ 0xe07b672, 0xe2e649c, 0x2ae3284, 0x9084d88, 0x1835ce2, 0x7a90d4c,
+ 0x809d44c, 0xb4d1cd5, 0x9f0528f, 0x7822714 }
+ },
+ {
+ { 0xbf5844b, 0xca884cf, 0x8524cf9, 0x9dd05c4, 0x36ba889, 0xdbffa19,
+ 0x29e7666, 0xef94fdd, 0x3eaf48f, 0x358f81b, 0x1530d56, 0x96734d5,
+ 0x4adf9e5, 0x378b2d1, 0x4731f61, 0x2f85046 },
+ { 0x99dcb83, 0xd6ae905, 0x6199239, 0xa4f89e0, 0x8f0f958, 0x6405249,
+ 0xcc27707, 0x2866d99, 0xf551c0f, 0x64681a2, 0x4c37080, 0x2c7b0d0,
+ 0x00ac301, 0x218925b, 0x54df895, 0x8d57fb3 }
+ },
+},
+{
+ {
+ { 0x809c8d7, 0xdaebde0, 0x0e95ea1, 0x58c761c, 0x00ae5e2, 0xbd99650,
+ 0xcd51acd, 0x6117a85, 0x7c55d56, 0xc4424d8, 0xdfbeeaf, 0xe9b1dde,
+ 0x0db4791, 0xda98bb5, 0x3fca108, 0xff3a5a6 },
+ { 0x5ccbea1, 0x172fb8e, 0xa9f6cc9, 0x9fe12a7, 0x8967ce2, 0x1de4b0b,
+ 0x671dbc6, 0xc1ab60f, 0x5dedcda, 0x338385a, 0x3a043fe, 0x647a420,
+ 0x28ebc89, 0xe9abc64, 0x03ba3c8, 0xc357ff0 }
+ },
+ {
+ { 0xde39ebd, 0x37061e7, 0x2be567a, 0xebb9135, 0xd6bb80a, 0xa9a6f6b,
+ 0x99f0ba2, 0x039345d, 0x98bbf47, 0x215494e, 0xa2a1ccb, 0xf2cb7a4,
+ 0x37f67c9, 0xf51aa10, 0x17fff71, 0xd29c85c },
+ { 0x4d30b87, 0x8d4e4f2, 0x93a8309, 0x20fdf55, 0x757075c, 0x9b9f9cf,
+ 0xcd70101, 0x09142ad, 0x766ca55, 0x901d0ee, 0x32e418b, 0x6a5d86a,
+ 0xd7fcaec, 0x550ad92, 0xd91b26e, 0x64e8818 }
+ },
+ {
+ { 0x47e5ee5, 0x5cea0f7, 0xbe99699, 0x8ca1d31, 0x5c136c7, 0x52db846,
+ 0x90e0d74, 0x8cecb38, 0xede2ad8, 0xb8efe9d, 0xf17ade8, 0x18d6ff8,
+ 0x2d66c20, 0xd222735, 0xf2005fd, 0xc46593e },
+ { 0xf7141e1, 0xe5ebe6f, 0xe0126f2, 0xc968315, 0x1cb91b6, 0x95adc73,
+ 0x38a6003, 0x753b54c, 0x4230a61, 0xa614125, 0x559fece, 0x23ac6eb,
+ 0x3865c23, 0x9816b60, 0x543a570, 0x567014e }
+ },
+ {
+ { 0xdd2b71f, 0xd46091d, 0x97d24ff, 0x3999a5d, 0x1ecff3c, 0xce2a4f1,
+ 0x581c6f0, 0xab2687c, 0xcba70b4, 0xa9fb2eb, 0x42093e1, 0x6fde356,
+ 0xaee724a, 0x00253ec, 0x2b81bdd, 0xa08ce3c },
+ { 0x935a2b3, 0xa251238, 0x584f750, 0x8cae1d4, 0x988a219, 0x011469e,
+ 0x5a6a50e, 0x61f7ed3, 0x01fcebd, 0xe13ebaa, 0x31d8867, 0x794b976,
+ 0xcda32e7, 0xf25755c, 0x4564cd1, 0x368a97b }
+ },
+ {
+ { 0xaa3397b, 0x0d22224, 0x38066db, 0x1dbb3e6, 0x0ce8e32, 0xfe0b5ee,
+ 0x7bab4dc, 0x09c17c8, 0xf188b64, 0x5cc65dd, 0x211b5fa, 0x74c4abf,
+ 0xab0ba86, 0xdcc17b7, 0xa535501, 0xfbdf46f },
+ { 0xaca569e, 0x4775087, 0x06a1718, 0x6575f90, 0xb94de93, 0xb5c45a9,
+ 0x8497171, 0x0fc8006, 0x489f7ab, 0x775d965, 0xf5c0c89, 0x8775b58,
+ 0x1a06254, 0x05d4e20, 0xb6d73a5, 0x8cab349 }
+ },
+ {
+ { 0x39465b0, 0xca78163, 0x14498fd, 0x3ef9148, 0x6255c11, 0x9ca1f34,
+ 0xb7f38f1, 0x389fd15, 0x354b8f3, 0xdac2089, 0xa840a70, 0x82d07fc,
+ 0x1dd483a, 0xf53fd73, 0x1590578, 0xa6e4eae },
+ { 0x3c01b77, 0x7bf65af, 0xa75c982, 0x27542f3, 0x716cfce, 0xc5bd947,
+ 0x884b9e7, 0xba5fe76, 0xd55725d, 0x39bae14, 0xfae0eab, 0x982f64e,
+ 0x7a5293a, 0xcfae662, 0xd60f464, 0x22a25a1 }
+ },
+ {
+ { 0x7dd5e16, 0x74caecc, 0xce7bca3, 0x23678a2, 0x57f1ba1, 0x4673932,
+ 0xa4c1697, 0x4eb9948, 0xeaba18d, 0x5d400e8, 0x9807871, 0x128d1c8,
+ 0xbff38a6, 0x78f9627, 0xa39d4cc, 0xf80b813 },
+ { 0x31d3aad, 0x8aeefa0, 0x27db664, 0x5042199, 0x4cb6383, 0x244fc69,
+ 0x72192a3, 0x3190477, 0xbbfb57b, 0xcc86075, 0x4451511, 0xbae3a13,
+ 0xf6174f0, 0x16cf416, 0xd376813, 0xb343cc0 }
+ },
+ {
+ { 0xd1824b7, 0x31ac9b9, 0xec8f61a, 0x6282260, 0xc781765, 0xbbeb9f8,
+ 0x2d110da, 0x06ab5c0, 0x47146b8, 0xd583e22, 0x4100d05, 0x79a1608,
+ 0xf0a5c95, 0x16dbbb4, 0xe331667, 0xfe2af1d },
+ { 0xaf8710e, 0x26f0364, 0xeec08fe, 0x1cb8c91, 0x1d95e9f, 0x436bce6,
+ 0x57944a0, 0xfe9050c, 0x07b626b, 0x5f45acf, 0x9cf1276, 0x48dc93f,
+ 0xa05bfb7, 0x4491371, 0x4bcf785, 0x5106304 }
+ },
+},
+{
+ {
+ { 0xed0b3b6, 0xac2e294, 0x671637b, 0x5c5ade6, 0x1140677, 0x2f289ce,
+ 0x754eb53, 0xaf446e2, 0x20421ad, 0x70911b7, 0xe0b7556, 0x4b73836,
+ 0x2a97827, 0xcadf104, 0x8005bc6, 0x4824e49 },
+ { 0x937c28a, 0xb0eeccd, 0x0c3ee97, 0x1ce061d, 0x9f33faa, 0xcb07631,
+ 0xaea66dc, 0x9980bf4, 0xd111d98, 0x2bd0755, 0x7fe4de0, 0x43feaf6,
+ 0xb077b2f, 0xe76fb80, 0x5793b04, 0x227dc9f }
+ },
+ {
+ { 0x14f49ba, 0xea24ae5, 0x11436e7, 0xbc39ea6, 0x78485d8, 0x9d7fed2,
+ 0xdf8b131, 0xb6ef00c, 0xfdbc7af, 0x0237b4b, 0x64ccd27, 0x08745b5,
+ 0xafc5a76, 0xaf8595d, 0x29f5500, 0x43657af },
+ { 0x48470f8, 0x3007183, 0x640fd53, 0x51f91fd, 0xbe15512, 0x859c807,
+ 0xab3e9c5, 0x7d1a474, 0x81553e5, 0x5d714d9, 0x6f62310, 0x0757343,
+ 0x6b02a62, 0xedc5be0, 0xea47832, 0x5a4b9b7 }
+ },
+ {
+ { 0xe93dbb3, 0x03e0a24, 0xcadc884, 0x25841dc, 0x8d10ad5, 0xabc1a81,
+ 0x2042ddd, 0x207e38a, 0xfeba8d8, 0x7fffbdb, 0xa3ec9b5, 0x74efebb,
+ 0x0b40a9f, 0x0bc39ca, 0x0267feb, 0x69ee9c9 },
+ { 0xbc62919, 0xd402fac, 0x1cf53c6, 0xe9f8fc1, 0x7cc7d81, 0xe76fa5a,
+ 0x96bb19d, 0x4f2d876, 0xadc67c7, 0xd4fb7f9, 0x96702dc, 0x40621d5,
+ 0x438f6c5, 0x5b6a98e, 0xf1a1036, 0xa7c64de }
+ },
+ {
+ { 0x9a092c7, 0x84c5e80, 0x11c22b7, 0x9e40e0a, 0xd06c99b, 0x820a091,
+ 0xeecca8f, 0x45fdc77, 0x5794f16, 0xfe1b8a3, 0x4ce3d6d, 0x31f7e5b,
+ 0x82c74c8, 0xfd5e010, 0xc1f6f7d, 0xfdabf30 },
+ { 0xb9248a0, 0xbfa6017, 0x546b941, 0xe898d30, 0x207ff65, 0x878c492,
+ 0xb874e64, 0xbf22e8d, 0x53a547e, 0x43fdb1b, 0x5fbd464, 0xb66deda,
+ 0xc7ae1b5, 0x59127a6, 0x6a7515a, 0xa463646 }
+ },
+ {
+ { 0xde9ab2e, 0x22c4e66, 0x0203c58, 0xfaf60c2, 0x0d5c5ed, 0xed2d7bf,
+ 0x4ca0f19, 0xdbc16fe, 0x465b979, 0x54e8ef6, 0xa310ef9, 0xe2d64b1,
+ 0x3778636, 0xa0f2c95, 0x281883b, 0xf3b4aa4 },
+ { 0x9be6629, 0x4ac9af0, 0x1ca90c5, 0xba455e1, 0x856f492, 0x0147538,
+ 0xabd7840, 0xc80db7e, 0x6beb9cd, 0xb3526d9, 0x9d81503, 0x37657fb,
+ 0x193cec3, 0x8729a16, 0xd69952a, 0xd9a93fb }
+ },
+ {
+ { 0x94f47c6, 0xfce0175, 0xe366d05, 0x228da21, 0xdc8baf3, 0x27ce0b2,
+ 0x6b4a951, 0x8cc660b, 0x384bb01, 0xf678947, 0x44d980c, 0xc629d7d,
+ 0xe85e81f, 0x47980e4, 0x1cd723e, 0xa2e636a },
+ { 0x77fb207, 0x6b6ebae, 0x4c92891, 0x7017961, 0xb4d279c, 0x5569541,
+ 0x41758cb, 0xbb6b36a, 0x27a8e30, 0xecaa222, 0xb470ad9, 0x8b6746a,
+ 0x63e2d3d, 0x4c46017, 0xd3edaec, 0xe19c4ed }
+ },
+ {
+ { 0x34718c8, 0x0b43fec, 0xf33499f, 0x553c407, 0x970d1db, 0x8272efb,
+ 0xa8e8d1c, 0x008c62c, 0x63eec45, 0xe4b79d7, 0xf2d71a3, 0x1fd4230,
+ 0xa368c36, 0x090fdaf, 0xfca7baa, 0xf62c101 },
+ { 0xd2395b3, 0x1c9e6c8, 0x04c5513, 0x671ed63, 0x299a465, 0x577d933,
+ 0x63f9986, 0x286890e, 0xbfc979c, 0xd92a95d, 0x2b51019, 0xcebd79d,
+ 0x3d07251, 0xe74d88b, 0x906f9ad, 0x8b6db73 }
+ },
+ {
+ { 0x7b3d90c, 0xc0c43db, 0x4304a06, 0x85d154e, 0xaf2f38e, 0xe8aceef,
+ 0x83d9459, 0x5e04293, 0x431afd1, 0x65e5e32, 0xa900a65, 0x9e5f050,
+ 0x8a26671, 0xcbaa171, 0x9c93de7, 0x33d0b24 },
+ { 0xd5b6680, 0x3dcbf92, 0x20006f9, 0xc47e5ec, 0x9a51924, 0xc971129,
+ 0xcd0ed46, 0x665d9b8, 0xa5fcab6, 0xed2d63f, 0xcfbfc5a, 0xa817eb6,
+ 0xb76eb76, 0xb38169f, 0xf11160b, 0x8b93544 }
+ },
+},
+{
+ {
+ { 0x693bdcd, 0x02eca52, 0x2ae01d6, 0xbbf0923, 0x8b44b3e, 0x0b0a2de,
+ 0xb250dff, 0xdb82449, 0x6e1c530, 0x0c42b86, 0xa64c2c4, 0xcd226dc,
+ 0xf046b5f, 0xcfb2bb1, 0x3fccb0d, 0x97e2fae },
+ { 0x45ed156, 0xdf92907, 0xf641229, 0x224dcb9, 0x5f1f67e, 0x2126abc,
+ 0xe9c8a6b, 0xa7eed5a, 0x9857d9b, 0x40abedc, 0xde941c6, 0x3f9c7f6,
+ 0xd725ddf, 0x2158d42, 0x8c69543, 0xbdd1015 }
+ },
+ {
+ { 0x8df2fbc, 0xa7dd24e, 0x13d1aee, 0x3adbcfd, 0x13b2177, 0xf6a32d1,
+ 0x7a9a14c, 0x89a7232, 0xdc65df9, 0xe3aef43, 0xa64d74c, 0xeaec3e3,
+ 0x4fec33b, 0x4d387d8, 0x21a2128, 0xaba2a05 },
+ { 0x6b85e30, 0x2382c22, 0xcd2aad3, 0x4352d85, 0xd9772c4, 0xb0c6001,
+ 0x5f3653f, 0x7ed8263, 0x0300f47, 0x3626a6f, 0x6ca7e4e, 0x23909de,
+ 0xc154141, 0xb43dd81, 0x7e4bc68, 0x9a49fad }
+ },
+ {
+ { 0x2428f88, 0xa3661df, 0x56e0db2, 0xbe48b02, 0xce79aa9, 0x3cd1871,
+ 0x23dddac, 0x90ab871, 0x71871a6, 0x9c58fb9, 0xa34910e, 0xf031f7f,
+ 0x81060e4, 0xb501eea, 0x791224e, 0xdb668ba },
+ { 0x6a705bc, 0x240bbcb, 0x2d1865e, 0x7e76fbd, 0x2513641, 0x6e2cd02,
+ 0x46365c9, 0xe6c5225, 0xa5a01fb, 0xe46a8b8, 0xb67618b, 0x696fa7b,
+ 0x0db6792, 0x418b3b9, 0x7108b9c, 0x7204acd }
+ },
+ {
+ { 0x8456b45, 0xb5a143b, 0xf53b4d9, 0x8a3ab25, 0xe13a570, 0xb112a58,
+ 0x81487d2, 0x613ca32, 0x3b1e7c9, 0x837d823, 0xd41e9d5, 0x592bade,
+ 0x5cd02f2, 0xdc1893a, 0x8972e23, 0x0879502 },
+ { 0xcb76261, 0x7003c08, 0x332a5e0, 0x14bde9e, 0xcbbd78e, 0x14b2872,
+ 0xde238e8, 0x5594061, 0x067466c, 0xad12645, 0xf5e4952, 0xa8d0e64,
+ 0xc7f8d06, 0x5b44b82, 0xfb1b828, 0xb51bea8 }
+ },
+ {
+ { 0x3f0dacc, 0xebad685, 0x1cbebbc, 0x5c31b8b, 0xfa5a2dc, 0x6746975,
+ 0x31d9faa, 0x2d95965, 0x00fc0e4, 0x343797d, 0x55fe01b, 0x38d821c,
+ 0x7323aa0, 0x0bfdb24, 0xf962a8e, 0x42613c4 },
+ { 0xe134bc0, 0x599a211, 0x47a7084, 0x75fa4a1, 0x7f734b5, 0x6e71948,
+ 0x6dfca2b, 0xd5ced2d, 0x8aeabd2, 0x9fa0fdc, 0x12361da, 0x5e6b03f,
+ 0x5859fcf, 0xad23d31, 0x25a5fc8, 0x3120ef1 }
+ },
+ {
+ { 0x8e9f638, 0x990ef62, 0x626a60c, 0xfdaa240, 0x2abddab, 0x4a3de20,
+ 0xd8872b2, 0xd5d10b7, 0x1ea5880, 0xa01b730, 0xa81b9d8, 0x481697f,
+ 0x3471ed8, 0x2984153, 0x292d37c, 0xefd73f8 },
+ { 0x9994beb, 0xdda7626, 0x6a4f865, 0xa037703, 0xe5b47d5, 0xda992ec,
+ 0xe53edba, 0x912a427, 0x9264e45, 0x6467598, 0xaf71222, 0xd3b68c3,
+ 0x6dedc5f, 0x9d3436c, 0x076b2ad, 0x1e027af }
+ },
+ {
+ { 0x4382f4a, 0xd56fca1, 0x8966b7b, 0x83712a4, 0xa4c9ddb, 0xd6b2cf5,
+ 0xf602875, 0xa66be29, 0x894f3d0, 0x70e4266, 0xb3195ca, 0x007d220,
+ 0x82c74d4, 0xba38d8f, 0xd975cbd, 0xdccc5fc },
+ { 0xc88b38b, 0x03e1610, 0x52e0d8d, 0xeb9f9a1, 0xb646eb7, 0x6a57eca,
+ 0xc76b6c1, 0x161641f, 0xbd2e12b, 0xf9025ad, 0x5c0e26d, 0x87c74db,
+ 0xbfeca74, 0xed5cb51, 0xe34a08c, 0x603dfb6 }
+ },
+ {
+ { 0xcb03307, 0xc4be728, 0xc2741cc, 0xde34c0e, 0xa74eb17, 0xe01db05,
+ 0x8905e4b, 0x1bfce0c, 0xd1b1826, 0xb18830a, 0xe87bbfb, 0xcacbb41,
+ 0xd2f1a79, 0x8696842, 0x08c83ea, 0xa80e5fb },
+ { 0x3f1439c, 0xe48f163, 0xcd6987b, 0xc1d4108, 0xb751814, 0x05705c4,
+ 0xc1c622d, 0xa9bffd0, 0x46cd053, 0x23de4af, 0x39457c3, 0xf782f5e,
+ 0x5e5d243, 0x815276b, 0x6161ae3, 0x3132041 }
+ },
+},
+{
+ {
+ { 0x77f2542, 0x2459661, 0x8372b25, 0x203be7e, 0xee2007b, 0xc7c9426,
+ 0x0621799, 0xc564138, 0xc28c3ce, 0xda56589, 0x7afc1e3, 0x13e8a7c,
+ 0xe352082, 0xdba81e9, 0x04435c7, 0xf430549 },
+ { 0x691de4a, 0x4d26533, 0xfb777ab, 0x364408c, 0xeae7f88, 0xccdfb43,
+ 0xa525b11, 0xbc40f44, 0x3c60627, 0x8e112a5, 0xe17e696, 0x7f7c581,
+ 0x1ea774a, 0x0fd7878, 0x0b1f582, 0xd09e632 }
+ },
+ {
+ { 0x70aab15, 0x44390bd, 0x889c3f2, 0x41112bc, 0xd685349, 0x6b02894,
+ 0x5584dfe, 0x7103001, 0x1ba7887, 0x373cb1b, 0x2a017c7, 0x53d286c,
+ 0x3c81fdc, 0x2ed0388, 0xfbcc6fc, 0x3bfc5e3 },
+ { 0xfd6418d, 0xd38ac6f, 0xbfad89e, 0xc667e96, 0xeab4d66, 0x46f4f77,
+ 0x0911293, 0x194c04f, 0x68c48d5, 0x0fd09cf, 0x63cf7f4, 0x6f5b055,
+ 0xacd562f, 0x0c0a8c4, 0x36d965d, 0x94c1d83 }
+ },
+ {
+ { 0xcaa127a, 0x94fc8f0, 0xd803690, 0xc762d5d, 0x1ebf0d3, 0x8bfdfd1,
+ 0x48eac50, 0xa98cdf2, 0x8b5ff10, 0x3d7365d, 0xc65b4de, 0x20dc29b,
+ 0x8ec7c68, 0x62ac28e, 0x90372d2, 0x7f5a132 },
+ { 0x3246658, 0xf3d8a25, 0x9ac202a, 0xa4bebd3, 0x5cc1697, 0x078ede7,
+ 0xc8fc022, 0x5525800, 0x5fae77b, 0x302a802, 0x57917b6, 0x0180139,
+ 0x864bf55, 0x7c8806d, 0x12f06f1, 0x4e2d878 }
+ },
+ {
+ { 0x3d66e88, 0x8d35118, 0xa91d02a, 0xfb861a1, 0x7850e5f, 0x8c27c2a,
+ 0xa5496f6, 0x9fd6399, 0x8080049, 0x52152ae, 0xfd1c2dc, 0x600e2ff,
+ 0xffe8b2e, 0xc75902a, 0xe03b175, 0x5c4d2cc },
+ { 0x4f57e78, 0x8ad7c42, 0x1736f87, 0x77cf606, 0xf85038a, 0x2876012,
+ 0x1b97b95, 0xff32845, 0x392dfc8, 0x3cc6dd5, 0xa6f5075, 0x72f1363,
+ 0x71de894, 0x028ec44, 0x6f45a86, 0x7030f2f }
+ },
+ {
+ { 0x9695817, 0x66400f5, 0xf20ea36, 0xeda0a7d, 0xd394992, 0x855be51,
+ 0x8336f62, 0x2d082c1, 0xf28c868, 0x30944dd, 0x0dc86d0, 0xfb5f853,
+ 0x564a0bd, 0x9562ae5, 0xb6b9b51, 0x1f7ea12 },
+ { 0xd0a7148, 0x5bd74e0, 0xb91e572, 0x6c8247f, 0x47da498, 0x699aba5,
+ 0x1f7c814, 0xed82581, 0x62057b9, 0x434674b, 0x15c15b4, 0x8b4df5e,
+ 0xb110081, 0x2a97da1, 0x4c417fe, 0x2a96b0c }
+ },
+ {
+ { 0x237639d, 0x4f75dfc, 0x1db7029, 0xe5ad6bc, 0xb3d28f7, 0xd43e06e,
+ 0xe447989, 0x89f3bb5, 0x01a1a6e, 0xc426a2c, 0x315878f, 0x33ea71c,
+ 0xb1b5705, 0x8a7784a, 0x77ca811, 0xa59e86e },
+ { 0x36ae155, 0xddb133c, 0x0d51b42, 0x49f1d4c, 0x9d05519, 0x5508082,
+ 0x5291816, 0x20e23be, 0x67181ec, 0x35047ec, 0x7aad091, 0x6237dc4,
+ 0xe2e25a2, 0xa1d3ce1, 0x0d3db4c, 0x1de0522 }
+ },
+ {
+ { 0xd9fd423, 0xe9a5e19, 0x9801e43, 0x0c2c3d0, 0x28df2da, 0x043c2dd,
+ 0xe1ad12a, 0x4eecab4, 0x9615aa5, 0x97e1797, 0xca7bb5e, 0xe57b879,
+ 0xcc92619, 0xa2a903c, 0xaa56e93, 0x5cef370 },
+ { 0x7f3232c, 0xbef29fa, 0x2b7ad5c, 0x1cf35ed, 0x3b6077a, 0x35c4893,
+ 0x7a1d47d, 0xe065148, 0xce14572, 0xedb4673, 0x0b17629, 0xdc9e98c,
+ 0x9a02a5c, 0xef98ebe, 0x11d03c0, 0x1f772e3 }
+ },
+ {
+ { 0x4608f72, 0xcbdbdcd, 0x5a13c6f, 0xb435223, 0x4bb3c21, 0xa6497f6,
+ 0x12c15c9, 0x3af2383, 0x6322d11, 0xfbbf4b3, 0xc641775, 0x520a5c6,
+ 0xe81e0e1, 0x18cd967, 0x3de3871, 0x980b2c6 },
+ { 0x9ae44a2, 0xfa9db61, 0x176bc56, 0x0281dd2, 0x8a7f817, 0xfd03711,
+ 0x4129b30, 0x9c48545, 0x039626d, 0xb439648, 0xe4ada6b, 0x355050e,
+ 0x7f5d98c, 0xc9c16d6, 0x18c4d5e, 0xf53ccc3 }
+ },
+},
+{
+ {
+ { 0x3ffb20b, 0x50ae942, 0x6865eb4, 0xa6c0b42, 0x09930f1, 0x4677f7d,
+ 0x4a16427, 0x742e0b6, 0xf976f9a, 0x521d18e, 0xa454749, 0x43ac9cf,
+ 0xc51f50d, 0xda3a91d, 0xad6f954, 0xf657029 },
+ { 0x6b4f99a, 0xfe5f064, 0x63ad4ce, 0xd92a5d9, 0x2e0e081, 0xfcb5509,
+ 0x8d8a858, 0xadc85ab, 0x0632f0f, 0x8e9b966, 0x8d7216d, 0xe7a4f16,
+ 0x59c3b99, 0x00a4cc5, 0xba09dc1, 0xed6d0bd }
+ },
+ {
+ { 0x1621beb, 0x7236d14, 0xbc7ca95, 0x1751fd4, 0x2f5319c, 0xaa619d1,
+ 0x4e9316f, 0xfc2b15b, 0x9fd4d33, 0x2d1a906, 0x8ced829, 0x28c3bac,
+ 0x1dd998f, 0xf2efab5, 0x3b149ed, 0x2c13330 },
+ { 0xf601ac6, 0x65237c9, 0x07d6a45, 0xb54dd65, 0xfb1a4cf, 0xa1ce391,
+ 0x115f67e, 0x2957533, 0x465279b, 0x6456da8, 0xa993e02, 0x02890aa,
+ 0xb7175e4, 0x6891853, 0x0f3e59b, 0x3fda203 }
+ },
+ {
+ { 0xd8c6e0b, 0xe99fe12, 0x5341c56, 0x7cb07ff, 0xdf77b24, 0xc292c7b,
+ 0xca29906, 0xf52dfd0, 0x772f02c, 0x4a6aa26, 0xe1bbd09, 0x26f7684,
+ 0xee7c2a8, 0xec56b2b, 0xad4a312, 0x67709e6 },
+ { 0xc570263, 0x99c57b2, 0x2faafae, 0xeb0100b, 0xff25eca, 0x980d5d1,
+ 0x82cf936, 0xace35e6, 0x44679ed, 0x5a82ce5, 0x074b81e, 0x5c76a41,
+ 0xa00abb1, 0xf36fa43, 0x04ffb2d, 0x0642819 }
+ },
+ {
+ { 0x04bdd28, 0x68f6bc8, 0xb5dc7ad, 0xc311d96, 0xed32e45, 0xff0d646,
+ 0xe0f712d, 0xaf3cdc6, 0xd483861, 0xd4508e9, 0x0e1c277, 0xb624be5,
+ 0xc5dd841, 0xc510275, 0x298dc02, 0x451c5c3 },
+ { 0xdd34d6b, 0xf87d479, 0xdd06a38, 0xda7f293, 0xb699e9f, 0x575e129,
+ 0x215b2cc, 0x79e5fb2, 0x657e690, 0xd280028, 0xe702a71, 0x7fecd09,
+ 0xfa13677, 0x85160ab, 0xce65f64, 0x5de3427 }
+ },
+ {
+ { 0xe8fff38, 0x84e4bf6, 0xb358b1c, 0x16f3725, 0x3b472a5, 0x360371c,
+ 0x52f217a, 0xe64c061, 0x0501241, 0x8e67379, 0xab2dd96, 0x88e81d6,
+ 0x1385604, 0xf3e218a, 0xe84184d, 0x9736caf },
+ { 0xdbb93a3, 0xb55a043, 0x9301088, 0x335088f, 0xb2a4959, 0xcea7a2d,
+ 0xb882c33, 0x48e5d4a, 0xad46179, 0x114f09b, 0xb446576, 0x4416467,
+ 0x34c6c2f, 0x01cb23e, 0xa02db8a, 0xddebf04 }
+ },
+ {
+ { 0x9bde8a1, 0x36d60cc, 0x676e4ad, 0x20fd2f2, 0x8936581, 0xebdcfb7,
+ 0xdbfc2c3, 0x245d0d5, 0xa9f82e5, 0x104c62c, 0xd654d9b, 0x7387457,
+ 0xae7f10e, 0xe966777, 0x1d8e582, 0xefeb16f },
+ { 0x70364b5, 0x4faf4f1, 0xd612472, 0x0e1ab58, 0xfed6085, 0x11bbfe7,
+ 0xa59a09a, 0xb360a14, 0x722fdb6, 0x61d96e9, 0x94068bd, 0x16a12f1,
+ 0xf73c2be, 0x225bf07, 0xc8bd24e, 0x1e64665 }
+ },
+ {
+ { 0x3698c75, 0x27a478a, 0x6202aa2, 0x778ccd3, 0x8d87f1f, 0x0149c63,
+ 0x784edae, 0xa660e5f, 0x82adfa8, 0xe0d4d2f, 0x1ba1f9d, 0xf512dd6,
+ 0x6245c58, 0x90cfed9, 0x18b53dd, 0x6c3a548 },
+ { 0xbdc094f, 0x833f70c, 0xb1514e7, 0xa5f26f5, 0x1c8cf13, 0x93e7cf5,
+ 0x186ec43, 0x1436601, 0xe78170a, 0x81924ac, 0x8694368, 0xcc880a0,
+ 0x0b62cbb, 0x2dfa955, 0x96b4a2c, 0x0bc6aa4 }
+ },
+ {
+ { 0x3561aa2, 0x5157a7e, 0x8645c1e, 0x525c500, 0xce7cbb3, 0x22feb4e,
+ 0xc89a58b, 0x36d0d25, 0xc9bde9c, 0x43131f7, 0x881f731, 0x74afdda,
+ 0x7c8e36a, 0x99ab87c, 0xc1d4fb2, 0xf07a476 },
+ { 0xbebc606, 0x1b82056, 0xfcf089f, 0x95a1e5a, 0x2b55d5c, 0xc5bccfa,
+ 0x00eb0b1, 0x8fbc18e, 0x9efb483, 0x93a06fe, 0x2d74c57, 0xcafd725,
+ 0x3de4350, 0xc7518f0, 0xc6fd762, 0x9a719bf }
+ },
+},
+{
+ {
+ { 0x2362087, 0x5ee0d83, 0x0b167e8, 0x7f2c0d7, 0x5e0e865, 0xb732789,
+ 0x98c4e65, 0xef5b2e8, 0x8fe9cc1, 0x222797d, 0x82d1e15, 0xfe6d73e,
+ 0xf62dc4b, 0xc7c0e9c, 0x937ceda, 0x962acfe },
+ { 0xc1e85c7, 0xd763711, 0x2836978, 0x8f2dbbc, 0x8c44e98, 0xbadc055,
+ 0xa3e93f8, 0xed63eab, 0x41b55c7, 0x807e857, 0x6d1207b, 0xd51ae5e,
+ 0x39d541b, 0xa0ef9a6, 0xa0c56a5, 0x58855f9 }
+ },
+ {
+ { 0x213091d, 0x7d88eaa, 0x45b6a0d, 0xcbdfee7, 0x4f5e077, 0x826a012,
+ 0x90f1e4c, 0xb04fc13, 0xaea69aa, 0x1961ac3, 0xd5bb63e, 0x3afb719,
+ 0x4ac7e5c, 0x2a37837, 0xc50ca45, 0x78efcc1 },
+ { 0xb8abdef, 0x346e8f0, 0x88095d0, 0x27e3dbd, 0xffc6c22, 0x56d3379,
+ 0xfa4b291, 0x67d416c, 0x3b1b373, 0xc3baaf6, 0xdf73bae, 0x0184e1f,
+ 0x9167528, 0x38ae8f7, 0x35d6297, 0x7329d4c }
+ },
+ {
+ { 0xf568c52, 0x45d2ac9, 0x9808593, 0x5134814, 0x31b7ed8, 0x0c92d83,
+ 0x0876ecd, 0x921327a, 0x052736a, 0xf752d75, 0xbc6b837, 0x7b56487,
+ 0xa23b4cc, 0x6b1a320, 0xec0d665, 0x1983937 },
+ { 0x08554ab, 0x2c3017c, 0x366e87f, 0x40ad955, 0x8ed7f02, 0x88c4edf,
+ 0x3cc5e6d, 0x64a7db1, 0x2dc978b, 0x5ac91fa, 0x925d2a2, 0x016a20d,
+ 0xabb57b4, 0x3604dfe, 0xd7e2e85, 0xc3683ec }
+ },
+ {
+ { 0x4c0c6d0, 0xc47150a, 0xe22adcf, 0x30af45e, 0x022ea4b, 0x39b5acb,
+ 0x77203b5, 0xfbe3185, 0x6fd9b59, 0xe5aaa34, 0xdd1c8dc, 0x0062c90,
+ 0x54049ac, 0xcf113f3, 0x63a31b5, 0xd8fba4d },
+ { 0x1056a69, 0x73b5488, 0xd780bda, 0x3be6cbc, 0x30ba2b9, 0x5776ec2,
+ 0x8e8d6f7, 0xbe883cf, 0x5c2be6f, 0x64efe94, 0xf1ade8d, 0x064f704,
+ 0x743110e, 0x41cfd17, 0x4c20abe, 0xaac9411 }
+ },
+ {
+ { 0xf1c1468, 0x91f9192, 0x4563e13, 0x8176e74, 0x0bda15d, 0xa48b5f9,
+ 0xda42af6, 0x2a085ae, 0x425c018, 0xfd38ab2, 0x08abafb, 0x2884ba4,
+ 0xcbd091d, 0x356f318, 0x817871b, 0x454e450 },
+ { 0x8ada531, 0xe080e81, 0x3152ba8, 0xa40f1eb, 0x0c38eb1, 0x051049f,
+ 0xbd45003, 0x37e4bb3, 0x54a01e5, 0x6d09804, 0xeeb824a, 0x6de932f,
+ 0xdc93481, 0xccdef37, 0x93a05e8, 0x8633e07 }
+ },
+ {
+ { 0x034675c, 0xbe94256, 0x08db789, 0x376c01d, 0x9af1b6b, 0x8707ee7,
+ 0x11bfbac, 0x633b3ef, 0xd06db60, 0x694f33f, 0xbb13407, 0x2a68bfc,
+ 0xda27c3a, 0x1c860c9, 0xd701ac3, 0xbca16de },
+ { 0xc59ffd0, 0x2b76cfa, 0x54d718d, 0xf9a1165, 0x67f0878, 0xf86a1db,
+ 0xaf34e85, 0xe313e05, 0x3343159, 0xa188811, 0x0bb7ed1, 0xdbe4c3f,
+ 0x0c732bc, 0x73b67e8, 0xe74110e, 0xa4e1c87 }
+ },
+ {
+ { 0x5c6770c, 0xce1106b, 0x5c0bcb7, 0x422c70b, 0x8195e7f, 0x32a3990,
+ 0x1ccd4aa, 0xa24968d, 0x720e557, 0x8f08ecf, 0x54bcc81, 0x5da10a4,
+ 0x6cd846e, 0x9d3c73b, 0x368d065, 0xaeb12c7 },
+ { 0xcf9fd1b, 0x2110859, 0xee2bd6d, 0xd2a4801, 0xe9466ac, 0x376e556,
+ 0x3b5aa35, 0x767803b, 0xb8a89ba, 0x343f842, 0x6726bbf, 0x3263cc1,
+ 0x25871b0, 0x26caf17, 0x41b8578, 0xef66ad6 }
+ },
+ {
+ { 0x638068c, 0xc9f2249, 0x1ccf9af, 0x96d282c, 0x69b435a, 0x71df30c,
+ 0xcb9d5c9, 0x88c943a, 0x2a8f378, 0xbf98ef1, 0x114c6ff, 0xffc1824,
+ 0xd52e8c7, 0xda3ad2c, 0x1afcb59, 0xf1222bc },
+ { 0x0ee334a, 0x459e94b, 0x421933a, 0xd4477b8, 0xa1e401e, 0x60fb7b0,
+ 0x0d1e330, 0xfde6e82, 0x3233fde, 0xcecfe9b, 0x2e93523, 0x09ec466,
+ 0x30775b9, 0xa5ba649, 0xadf80f2, 0xcc397e5 }
+ },
+},
+{
+ {
+ { 0x4ddc8a8, 0x2fe182d, 0xac056bf, 0x88d6e79, 0x0e41e4e, 0xc3ff2d1,
+ 0x2c3679f, 0x32ec7f9, 0x4e61051, 0x3561f09, 0x6c6250a, 0x4553f5a,
+ 0xdd25c5b, 0x2b765ef, 0x6a1cd7f, 0xe3a40a2 },
+ { 0x5d821dd, 0xb27309b, 0xc2c17ca, 0x950fb8d, 0x8fb0d4c, 0xfeed015,
+ 0xf550179, 0x762c479, 0xe095840, 0x306cf44, 0xd379e66, 0x84b413a,
+ 0xbb2e4f1, 0xd6e5d5a, 0x94b085d, 0x8bc12b7 }
+ },
+ {
+ { 0x04b5532, 0xc0d4cb8, 0xb9940a6, 0x7a31525, 0x68c69d1, 0x010e7dd,
+ 0x2a18c35, 0xd81f29d, 0x3f11e73, 0x08ae770, 0x6e55106, 0x5358f87,
+ 0xc960ef5, 0x299e8ca, 0xacfc8dc, 0x89a6fb4 },
+ { 0x6dc7d4a, 0x5996a40, 0xe51b96e, 0x21e5112, 0x09a202b, 0x95b8c3d,
+ 0xd441f1f, 0x306ab0f, 0x98d4245, 0x2834fed, 0xd0abbde, 0xc29c387,
+ 0xb805c15, 0xf6a9bf1, 0xc4e458d, 0x602f4f8 }
+ },
+ {
+ { 0xe5a893a, 0xf041486, 0x8934327, 0x53b891d, 0x4000758, 0x11e000d,
+ 0x662bad9, 0xa4ccde8, 0xb9a1b64, 0xe34d3ed, 0x84e7a6d, 0x72d9675,
+ 0x6627be4, 0x773da2f, 0xe835ae3, 0xa11c946 },
+ { 0x650bc15, 0x02e8203, 0xe58b78d, 0x2d35936, 0xf21a3cc, 0xe9cfbe8,
+ 0x1049222, 0x55ad831, 0x38fff47, 0xbf99de4, 0x3831db5, 0xebbfd80,
+ 0xaf2af42, 0xe990636, 0xb7f5a0e, 0xc26ae52 }
+ },
+ {
+ { 0xfa8f846, 0xb5d85b1, 0xb3b1455, 0x4166489, 0xd36a305, 0x768260d,
+ 0x4ff5645, 0xc6a8235, 0xd6e93e5, 0xd241cd8, 0xa406e74, 0xeed9aa1,
+ 0x5f600d9, 0x9e96ab0, 0x6eca2a1, 0xa26b8b5 },
+ { 0xd705aef, 0x78321cf, 0xc0161ec, 0xc4fb6b3, 0x5199cf1, 0xdc32441,
+ 0xd0a5067, 0x33627d0, 0x15143ee, 0x13490cb, 0x85b4f44, 0x77e0ede,
+ 0x394b165, 0x904f12e, 0xefab32d, 0x90f50f5 }
+ },
+ {
+ { 0xbc2de96, 0x4aa0a16, 0xaa9c12b, 0x172596a, 0x60e8a29, 0xd512e1e,
+ 0xf637e83, 0x77d35c1, 0xd2aae0b, 0xbb0d141, 0x8c03738, 0x8a878a5,
+ 0xab0e525, 0x6d24c01, 0xf760887, 0xb7d3136 },
+ { 0x3f91b7c, 0xdbc3f8f, 0xa8722c0, 0xe7b4bca, 0xda0ae65, 0x3286a91,
+ 0x225b084, 0x8372274, 0xae1886c, 0x5884cd5, 0x3a23cf7, 0xb4e63ef,
+ 0xf2dd0da, 0xfe5f202, 0x653916c, 0x951fac9 }
+ },
+ {
+ { 0x854fa4e, 0x05e2e8f, 0x1edaf10, 0xf411f94, 0xa0a928d, 0x26cc562,
+ 0x4abce65, 0x78fd34e, 0x98a32e2, 0x1d87609, 0x4c37518, 0x85dc76f,
+ 0x00e8021, 0xdcaeef5, 0x4e9b2a5, 0x7fcb2f8 },
+ { 0xf382c06, 0x9eba91e, 0x24cae53, 0x2052e85, 0xf5c1519, 0x617336e,
+ 0xb4e632b, 0xf1546d5, 0xd7b8ffd, 0xa9edc81, 0x29ab68c, 0xdb2914f,
+ 0xdebbaba, 0xe805070, 0xc3b719e, 0x775e53b }
+ },
+ {
+ { 0x065256a, 0xa40e294, 0x8fb031a, 0x9f11386, 0x059667c, 0xac03af8,
+ 0x0475f58, 0x432eb3a, 0x01faad0, 0x22332bf, 0xbc57a11, 0xc8132e9,
+ 0x3bc3f8b, 0x27d5a17, 0x930bf3e, 0x5471fc6 },
+ { 0xe6bff40, 0xba28bc0, 0x555e564, 0x198d57e, 0x9c65b8f, 0x13ce831,
+ 0x5681b51, 0xb0a5c9d, 0xdeb9e11, 0x467588b, 0xbb4250b, 0xf1891a7,
+ 0xd12b433, 0x10b938b, 0x24dcda4, 0x0b8c802 }
+ },
+ {
+ { 0xcf332d3, 0xc428703, 0xf2a5b98, 0x9d0053c, 0x7838a15, 0x4e4c620,
+ 0xfbf8a43, 0x2e92919, 0x21cd9a5, 0x39ad524, 0x1561588, 0x584ed6c,
+ 0x17a95c8, 0x20af305, 0xb70e1c8, 0xa223077 },
+ { 0x2fa4871, 0x679cfea, 0xac633c7, 0x54f2a46, 0x4cdc5f1, 0x6030651,
+ 0x75a1dc7, 0xc4facda, 0x2d07d19, 0x710a288, 0x6b44992, 0xd55864e,
+ 0x454c5b2, 0x44d4b6c, 0x72f9981, 0x2855d28 }
+ },
+},
+{
+ {
+ { 0xc7b0674, 0x4071b3e, 0xf8794d5, 0x800eb14, 0xbe6783e, 0x70573af,
+ 0x7785901, 0xafaa440, 0x405f32c, 0x112d2a1, 0x169b3e2, 0x3761a52,
+ 0x842a366, 0xe168b31, 0x9bf4734, 0x5bc322f },
+ { 0x976c4a0, 0x36ef240, 0xfea4e64, 0x066f3d6, 0xa989e57, 0x0e954bd,
+ 0xf9466e4, 0xe36ef5e, 0xbeb9226, 0x6bb615a, 0x3d5a2ca, 0x5571e5f,
+ 0x4897a86, 0xa86efe2, 0x28a9f77, 0xed7e9cf }
+ },
+ {
+ { 0x1f82c68, 0xdf10c97, 0x3b597e6, 0x796ba1e, 0xe718cbf, 0x1ac77ec,
+ 0x410eac8, 0xc8175bb, 0xbc555ef, 0x0cdf9a1, 0x7524e05, 0x6b889f1,
+ 0xae26d82, 0x6bf1e61, 0xd2e97d9, 0xb3f6ad5 },
+ { 0xf226487, 0x94dcff9, 0xbe03dde, 0x60e6356, 0x6a3dd7d, 0xda1f93b,
+ 0x79ca90c, 0xf1be721, 0x1e6bce5, 0x05ed313, 0xd48af3e, 0xcf50908,
+ 0x61e554f, 0x3b0e85c, 0xa2778d3, 0xfe7e35b }
+ },
+ {
+ { 0x75ac5a9, 0x42c5032, 0xda062c2, 0xa66a66d, 0xcaa7023, 0xa4f4f82,
+ 0x64b4f86, 0x489d476, 0x97311ad, 0x10b1088, 0x177b2ec, 0x55dd637,
+ 0x9a267b1, 0xa5ccff0, 0xff327b0, 0xf07690b },
+ { 0x2250cd2, 0x39162ed, 0x8b255f1, 0x1426de0, 0x1bdd731, 0xf227afd,
+ 0xfa4c844, 0x78f8a36, 0x157379c, 0x267a211, 0xcc04acb, 0x3f05f92,
+ 0xfc69cae, 0x374496c, 0x16ebfec, 0xbf2c5d0 }
+ },
+ {
+ { 0xd0518d1, 0x605418b, 0x9e1cbc6, 0x3237f80, 0x286c019, 0x37a7005,
+ 0xb15af0b, 0xf1fb0e0, 0xaa853c0, 0xfc3b97c, 0xe6beba2, 0x1f48bd0,
+ 0xe6a72f1, 0x8e5d7c5, 0x26ebf0c, 0x575e66d },
+ { 0x62eae3d, 0x0994776, 0x96c9c65, 0x53f074f, 0xb81bade, 0x6cfbfdb,
+ 0x3fed7d1, 0x98b4efe, 0x38c3382, 0xdaa1123, 0x47b8ec6, 0xdf88b73,
+ 0x9504a4f, 0x9b0fe4b, 0xf30c1c3, 0x2e7df4c }
+ },
+ {
+ { 0x2fc1833, 0x25380cb, 0x18d62de, 0xb8e248c, 0xd82f9db, 0x91c8f59,
+ 0x2444750, 0x5ec2b20, 0x66b6f74, 0x3f3a1f7, 0xdd7d14d, 0x0180aa9,
+ 0x2956b9c, 0xd0a342d, 0x7139873, 0x26e910e },
+ { 0x139e23d, 0x2261dc4, 0xb8343dd, 0x7edb181, 0xb4038dd, 0xfcf1073,
+ 0xa3bfea3, 0x88870ef, 0x64a263e, 0x4e98ba9, 0x70811f5, 0x3c6e5dc,
+ 0xf86055d, 0x17d28f5, 0x66e4199, 0xca9c276 }
+ },
+ {
+ { 0x964ef8c, 0x0b2d8bd, 0x88e2ba6, 0x5a99b85, 0x04498ce, 0x9e927b2,
+ 0x756eb25, 0x9ff20c5, 0x3f27736, 0x97cc27b, 0x4729583, 0xf32dd6d,
+ 0x0381a94, 0xbdc2658, 0xef2c06f, 0x70fef15 },
+ { 0x49252cc, 0x50a6191, 0x236b4b9, 0x9eb4a14, 0x8e00f78, 0x9b1b215,
+ 0x6ea9c23, 0x27add36, 0xc3a8e79, 0xef61763, 0xd82ce56, 0xed4542f,
+ 0x0caed75, 0xa8737e7, 0xd452d76, 0xeca0ac2 }
+ },
+ {
+ { 0x3d082d0, 0x20c0779, 0xc9e9f3b, 0x6e3ce64, 0x75a195f, 0xb3a4dce,
+ 0xbdd9f24, 0x3a3c305, 0x8688942, 0xe2545c8, 0x080f32b, 0xa463c82,
+ 0x42686b8, 0x4429748, 0x7213866, 0xf50e20d },
+ { 0x3826e74, 0x265ac52, 0x228e8ec, 0x26fba57, 0xe6b3ed8, 0x8a1e1db,
+ 0xf0fe65a, 0x7c7b278, 0xc395234, 0x9a6df23, 0x0b0f114, 0x9956206,
+ 0xef90837, 0x440c8c4, 0x3645f65, 0x21ad22a }
+ },
+ {
+ { 0xedd31b2, 0x1e023a6, 0x9ff8668, 0xf76d145, 0x17b45c8, 0x9707056,
+ 0x1e88e37, 0x0612078, 0x922faac, 0x85c51c8, 0x22756d9, 0x4df392e,
+ 0xa03c98e, 0x8907fd0, 0x52ea51c, 0x626f46a },
+ { 0x486c8a2, 0xf8f766a, 0x88ed18c, 0x8c499a2, 0x3c4f0de, 0x44d2dc6,
+ 0x6f2a0b6, 0x47dde68, 0x4a973fd, 0x9a655f8, 0x786ac80, 0x3e7124e,
+ 0xe8a0574, 0x699e61c, 0x31cdd0d, 0xdf0ba9a }
+ },
+},
+{
+ {
+ { 0xd73e69b, 0x76270ad, 0xc67d38a, 0x991120f, 0x9469f0c, 0x7be5830,
+ 0x7db40ac, 0x93aba59, 0x822fc08, 0x2b707bc, 0x69551cd, 0x4199fc0,
+ 0xf367324, 0x38deed4, 0x2228787, 0xca518e1 },
+ { 0xd9a9277, 0x72f1bef, 0xe49ae90, 0x57d4aab, 0xdb23478, 0x13810d5,
+ 0x9b4b77f, 0x2a8b780, 0x1b4e004, 0xb542f4e, 0x3ec77f0, 0x4080fd0,
+ 0xcec6596, 0xb49e9fe, 0x3f16037, 0x20338d3 }
+ },
+ {
+ { 0x53554b0, 0x4adcdae, 0xe04c4db, 0xfea4906, 0x7748233, 0x0808bec,
+ 0x47148d7, 0xde7477c, 0x03da38c, 0xdd9124c, 0x25ee8e9, 0x6b25031,
+ 0xb0d6161, 0xae67399, 0x82203b6, 0x70c4acd },
+ { 0xd31dae8, 0x9683916, 0x1ac7f69, 0x3477503, 0x988e4ad, 0x9553153,
+ 0x53a15e1, 0xb58f411, 0x92ba2dd, 0xb65a2d4, 0xa90169c, 0x7c3efb1,
+ 0x6b1747d, 0x210f45e, 0xcff488d, 0x16e8d1b }
+ },
+ {
+ { 0x9d703db, 0x252adf8, 0xfdfeb39, 0x259ac1d, 0x115e806, 0x7faf6af,
+ 0xc1aff21, 0x7aaefd6, 0x7c0113d, 0x8054210, 0xe19b4b1, 0x481f1a5,
+ 0xfcc8c61, 0x7c17d43, 0xbb0bbbe, 0x8b04452 },
+ { 0x4cebae1, 0xe51e5f5, 0x56a414c, 0x05341ba, 0x7fb8a30, 0x0083a2c,
+ 0x77f4952, 0xb4663f2, 0x4bb0074, 0xce72eec, 0xa3584d1, 0x74fdd66,
+ 0xb02e076, 0x6b9e58e, 0x3b961f4, 0x5be45d5 }
+ },
+ {
+ { 0x1ab2e0b, 0xc7474f3, 0xf4bf454, 0x2838ccb, 0xf3c3eac, 0x634392e,
+ 0x137602b, 0x440e40a, 0xd1ae8e3, 0xeea67e9, 0x77e221e, 0xafdf93a,
+ 0x2719a10, 0x3c9f3da, 0x32c8256, 0x466ecef },
+ { 0xf9c432f, 0x1061c19, 0xb1c7d98, 0xa1332d9, 0xa425c2c, 0xbc735f2,
+ 0x4b1bccb, 0x1429cdf, 0x6bbb5f9, 0x77b42a1, 0x5955ae4, 0x30078e3,
+ 0x21cc315, 0x8acd777, 0xe86fa99, 0xaa90d5f }
+ },
+ {
+ { 0x721115a, 0xfcfd460, 0x08269b8, 0x6a7de3e, 0x96dd47e, 0xe5964a6,
+ 0x8dca975, 0x6717cd5, 0x98b149e, 0x7ea4ebe, 0xb7b8057, 0x6f894d5,
+ 0x7f30e31, 0xbd6f960, 0x23df092, 0x61ca453 },
+ { 0x9d782f3, 0x32241f9, 0x2abfae2, 0x55173b0, 0xd15bbbd, 0x0abe0ed,
+ 0xb438abb, 0xb6d3c0a, 0x9ffa20b, 0x62fb467, 0xd31560a, 0x30926b5,
+ 0x2a0aa6d, 0x44bf27c, 0x1a4cb97, 0xf747313 }
+ },
+ {
+ { 0xb0535de, 0xa2f6c0d, 0xc855166, 0xcb02ae1, 0xb3422f0, 0xc699e6b,
+ 0x281ba8a, 0x774febe, 0xffabcc7, 0x1d9d24f, 0xfe12ba5, 0x0b31ba1,
+ 0x13d0af7, 0x4c86803, 0x2f47160, 0x90640d3 },
+ { 0x5876603, 0xa0c4bf4, 0x950ab08, 0x717f6fa, 0xa710de8, 0xf12bb53,
+ 0x6a88f50, 0xc500c61, 0x2645351, 0x0070f99, 0x2446893, 0x57aab5d,
+ 0xb68f657, 0xd553fa8, 0x693c55d, 0xe8537c1 }
+ },
+ {
+ { 0x7fc7684, 0x58e86eb, 0xbfc73a9, 0xdf330f7, 0xcc11936, 0x41e337d,
+ 0x6e35759, 0x36d9200, 0x3500d8b, 0x0132703, 0x9483354, 0xfa68405,
+ 0x667851b, 0xc8f2980, 0x18296b0, 0x538ec89 },
+ { 0xcff55f9, 0xa2a2c4f, 0x60d20bd, 0xb260d4d, 0xd9cc59f, 0x3ed576f,
+ 0xd514fcc, 0x4ed8c64, 0xc22b315, 0x37ebfb2, 0x94c212c, 0xca67a36,
+ 0x3a1795e, 0x4f8e08c, 0x4e7261f, 0x498f926 }
+ },
+ {
+ { 0xc59b3d4, 0xfea7382, 0x3f2925f, 0xb9942ed, 0x8ea77e8, 0xe4b00dc,
+ 0x3cab02e, 0x74a18ec, 0xef16d0b, 0xbbbb752, 0xffab032, 0x639da4f,
+ 0x3aa30f0, 0xc371a4a, 0xcaa175b, 0x8e26b22 },
+ { 0x7e2b62e, 0x94e4156, 0x25a794c, 0x7cceea6, 0x479f015, 0x931d2f4,
+ 0x90b25b2, 0x946183d, 0x68a2807, 0x1504e97, 0xfa49ddd, 0xa7577d3,
+ 0xdd48699, 0x24fc87e, 0x3d7d99c, 0x9edefd6 }
+ },
+},
+{
+ {
+ { 0x0f0b450, 0x0508b34, 0xc36f7f4, 0xe0069a5, 0x2a5a761, 0x2655664,
+ 0x848e04d, 0x0193fd8, 0x73fe2e7, 0xc108cf5, 0xfd787d4, 0x05eb0ec,
+ 0xff28985, 0x1555ccb, 0x651b995, 0xb5af09f },
+ { 0xe1134be, 0x167d72c, 0x57c669a, 0xd6d98bf, 0x6dd76fa, 0x40fb716,
+ 0x2a41b31, 0xeabbf20, 0x09b75b0, 0x300ff0e, 0xd9a0c1e, 0x32b6fad,
+ 0x65a80e0, 0x8051883, 0x32110fe, 0x8bef693 }
+ },
+ {
+ { 0xbef47d4, 0x637802f, 0x2d16eaa, 0xfac114b, 0x0415644, 0x7b3f3ab,
+ 0x2dd895b, 0x17ab8d1, 0x87195f3, 0x271b7fe, 0xa71f65f, 0xa3f867e,
+ 0xc80583a, 0x39ba40c, 0x56e1fcc, 0x6db0672 },
+ { 0x06662a8, 0x4feab4e, 0xc74bd46, 0xc857415, 0x732b126, 0x18032ed,
+ 0x7a099ea, 0x87c8aea, 0x36fe0a8, 0xb4a7535, 0x27673f6, 0x33a98da,
+ 0x2b8e549, 0x3e40c02, 0x9a4c587, 0x2def1af }
+ },
+ {
+ { 0xa8c9ad9, 0x9618b68, 0x49defda, 0xd70b4aa, 0x5f788ef, 0xae8b138,
+ 0xdd523f4, 0x87c3542, 0x5c5b004, 0xe42c705, 0xfa7df57, 0x6303360,
+ 0x5f6d068, 0x33e27a7, 0x8ff331a, 0x9b3268e },
+ { 0x23ee0c3, 0x845cc96, 0xac80084, 0x003af70, 0x530c41d, 0x6a9f931,
+ 0xbb127f0, 0xa1d7051, 0xca36245, 0x642ce05, 0x0323ee9, 0xc34205b,
+ 0xb7b3513, 0x7cc8912, 0x076cbdb, 0x6252cc8 }
+ },
+ {
+ { 0x7089522, 0x10e68a0, 0x58fc658, 0x36c1361, 0x74723a4, 0x490397d,
+ 0x519d56c, 0x42692c0, 0xf1ff235, 0x69d251b, 0xc2cbf37, 0xe689d03,
+ 0x825b7f4, 0xf04ceba, 0x2281c2e, 0xd6b9bee },
+ { 0xe0043ab, 0xc52ef3f, 0xd1d1be8, 0x351bf28, 0x0f18a5a, 0x277615f,
+ 0x5d6800f, 0x31f717f, 0xab922e2, 0xf5fb82d, 0x2d6ae43, 0x99aee2f,
+ 0xc63b982, 0x42477fe, 0xa594a01, 0x904aeb1 }
+ },
+ {
+ { 0xeb39974, 0xaa82174, 0x95e6aa0, 0xbc38e61, 0x25c0675, 0x6a3df8a,
+ 0xffbe739, 0xf324203, 0xa3f0649, 0xfa5a0b4, 0x7a7a6b8, 0x79c8732,
+ 0x40ad3f5, 0xeb65ecd, 0xe4e45c5, 0x718d416 },
+ { 0xe2326fd, 0x029dbf4, 0xe7942f0, 0x0c63416, 0x6f4e678, 0x6d0c728,
+ 0xa138601, 0x59f0b10, 0x8d92ea9, 0x8a1d978, 0xc22eca5, 0x9f8d712,
+ 0x7b6b96b, 0x7397044, 0xe6fb955, 0xa2d49ee }
+ },
+ {
+ { 0xbf14a19, 0x249f900, 0x63a8cd2, 0xd3522da, 0x86964d2, 0x28a32f3,
+ 0xc1fa743, 0xacf712b, 0x0bb94d3, 0x98a9bfc, 0xbc06824, 0x318ece1,
+ 0x4fce7f0, 0xfc47675, 0xe4135b7, 0x19caec9 },
+ { 0xc6817bb, 0x6de68a8, 0xf3b6d89, 0x7121960, 0xf5a818e, 0xa7d4261,
+ 0x9157455, 0x0c0ba51, 0x450d5ff, 0x78b6acf, 0x4e8649a, 0x198b493,
+ 0xfd05da3, 0x0941a3c, 0xdb55951, 0x264ea4a }
+ },
+ {
+ { 0x46e5a31, 0xcfee91c, 0xfff7366, 0x47b6806, 0x5df849d, 0xdb14be4,
+ 0xac66cc7, 0x3c5e22b, 0xa5f4769, 0x7f3f284, 0x383be36, 0x4e00815,
+ 0x8072b0b, 0x39a9f0b, 0xc7eadd6, 0x9887cd5 },
+ { 0xb659511, 0x7dd8f05, 0xd2e1cb9, 0x15c796d, 0x0d31345, 0xe5edb0c,
+ 0x6939c60, 0x2025df0, 0xbf15de1, 0x6314c08, 0x04c7fb5, 0x03c1548,
+ 0xbb5d3ed, 0x413337f, 0x477e983, 0xfc20b40 }
+ },
+ {
+ { 0x5db0ef9, 0x7f96880, 0xe9c2a70, 0x05562de, 0x7dae133, 0x071e5bc,
+ 0x237fc4a, 0xa8cdd12, 0x4ea492b, 0x6d565e7, 0x381ee52, 0xa17cf94,
+ 0x9f5c546, 0x6ab8a4e, 0x40288ef, 0xbb642f3 },
+ { 0x5df5c2d, 0x64e5921, 0xbb906f4, 0x43696e3, 0x74ae46c, 0x73a841a,
+ 0xc506b8a, 0xe264883, 0xa1be548, 0x9542e1a, 0x5e81b4a, 0x8938539,
+ 0xeaca6ce, 0x5642cfa, 0x806e0f9, 0xed8077b }
+ },
+},
+{
+ {
+ { 0x7e13597, 0x1c776c4, 0x9e584fd, 0x0ec8b28, 0xb8b61e8, 0x0bb6043,
+ 0x9cd835b, 0xdcc1748, 0x39fef9a, 0x493e6ac, 0xd133e17, 0xb44eb34,
+ 0x71cb6f9, 0xfebcd00, 0xd20eff2, 0xe6cf543 },
+ { 0x0a004c7, 0xf265cad, 0xd35cc12, 0x9b06c9d, 0xcb4ea53, 0x769f985,
+ 0x0993434, 0x29160a2, 0x8d939c4, 0xdf8dd10, 0x6711e2f, 0xefa177c,
+ 0xcd7a2cd, 0x1695790, 0x77f6642, 0x38da3d7 }
+ },
+ {
+ { 0x6307b74, 0x9bfcfd9, 0xbfdabc3, 0xc26a36d, 0x4abe28e, 0x9341be0,
+ 0x73d1387, 0xdb20b52, 0x3d1949c, 0xf8d229c, 0xb8b3a41, 0xf1e0afe,
+ 0xed565d0, 0x29c60df, 0x8b43b2c, 0x6930bb5 },
+ { 0xfc0718f, 0x1d76527, 0x1f67189, 0xdb98143, 0x51f32cc, 0x0c62f64,
+ 0x8bd35e5, 0x70a6626, 0xc1cece7, 0x1725641, 0xf96f4a4, 0x7f130a8,
+ 0xf06ee98, 0x72319e9, 0x67bf9b2, 0x215b738 }
+ },
+ {
+ { 0x0aaddd7, 0x8d1bec2, 0xb8be4f9, 0xfb8b95b, 0xfde1026, 0xeac193e,
+ 0x9d5860c, 0xa5edea7, 0x44280d3, 0x4adbaea, 0x38f4798, 0xce8b670,
+ 0xec30dea, 0x914c107, 0x000776b, 0xbdc5cf7 },
+ { 0xa206a13, 0xb6fd7d1, 0xdae986e, 0x9941eba, 0x1f1caaa, 0x76c27a8,
+ 0x3f108b4, 0x6967c12, 0x4aea2d0, 0x6f11528, 0x144ddac, 0x9bb4319,
+ 0xc8ec6fc, 0x1a4d3ea, 0xbf37420, 0xfe4b0b8 }
+ },
+ {
+ { 0xec0ac6f, 0x5d9a4a1, 0xfc7c80d, 0x84b79f2, 0xc14fac3, 0x64222f7,
+ 0xc23b3f2, 0xdd9e039, 0xea956bb, 0x4a84abd, 0xebe09dc, 0x370dcba,
+ 0xe0eaf82, 0x79a9ea8, 0xaee375f, 0x4cfb60a },
+ { 0x9106827, 0x6a10dbf, 0x43f305b, 0xa3ba5cf, 0xc1bb083, 0x481b885,
+ 0xb3117b1, 0x2f52380, 0xddd6791, 0x0066122, 0x63bace3, 0x4f8923e,
+ 0xecb88d4, 0x5c5f499, 0x3bac146, 0xfdc780a }
+ },
+ {
+ { 0x7ba1f71, 0x34b70ae, 0x45bd184, 0x9091829, 0xe707313, 0x3b39778,
+ 0x6164e91, 0xdeefc5e, 0x4971f39, 0xbb55bed, 0x8dafc8b, 0x7d52339,
+ 0xa6adf0f, 0x82391bf, 0xe319522, 0xfd6f90a },
+ { 0xf29bbc9, 0x60fdf77, 0xaaa4030, 0xeff9ed8, 0xf8c0d3f, 0x978e045,
+ 0xeed65cd, 0xe0502c3, 0x3cfd4c8, 0x3104d8f, 0xa639005, 0xab1be44,
+ 0x9eeab3f, 0xe83f431, 0x451d797, 0x01970e8 }
+ },
+ {
+ { 0x3180f4b, 0xbc972f8, 0x617779d, 0xac053c0, 0x7fa149f, 0x89392c5,
+ 0xbcb6263, 0xdc4699b, 0xce12882, 0x0ae8b28, 0xaf1a4dc, 0xdca19a7,
+ 0x64e1a74, 0xd3d719f, 0xaffdd5d, 0xbb50201 },
+ { 0x7ac30e9, 0x56f7310, 0x1878900, 0x65cc9c7, 0x27338a3, 0x83f5866,
+ 0xac5bb13, 0x122adef, 0x1bcd4d5, 0x97de200, 0xb8aa3a0, 0x6ed3985,
+ 0x6821f9b, 0x8680f1d, 0xdda9f98, 0xcb42028 }
+ },
+ {
+ { 0x0ec2db3, 0xcdb0708, 0x3dad1a1, 0xe28c833, 0xde2da07, 0x2093e32,
+ 0x83b8987, 0x7317073, 0xf552b8d, 0xad17871, 0x51cf70a, 0x846da98,
+ 0x5c4f5e1, 0xf94a16e, 0x0f8348a, 0x8429996 },
+ { 0x98db78a, 0x4bf3f68, 0x3d19b52, 0xad77fa8, 0x8b972dc, 0x6976772,
+ 0x5321be0, 0x7dfa35a, 0xdd344a6, 0x9881846, 0xad4e2a8, 0xe550292,
+ 0xbc68bf1, 0x8075217, 0x893be15, 0xdd837c4 }
+ },
+ {
+ { 0xd4fab5b, 0x09c931e, 0xb77a0f1, 0xb2dcf08, 0xe0d38a6, 0x7dac5c0,
+ 0x0ae73af, 0xa5570b0, 0xf5aed28, 0xc7c19d3, 0x5251e92, 0x575fa6f,
+ 0xcdf7275, 0xb843cd6, 0x9a01287, 0xd9d3d8e },
+ { 0xb3c370b, 0xf94e356, 0xfe464b0, 0xc62b99f, 0xa986057, 0x7792650,
+ 0xc4b1874, 0xeaa67d5, 0x0b07078, 0xba1ba4d, 0x7a03699, 0xdbf636d,
+ 0xedd32a3, 0x1a16c34, 0xa45cb5d, 0x6ce2495 }
+ },
+},
+{
+ {
+ { 0xa684441, 0xd7c4d9a, 0x30cd42a, 0xce62af6, 0x43014c4, 0xcd2669b,
+ 0x6f65b24, 0xce7e711, 0x576fa19, 0x1847ce9, 0x9dd8ca6, 0x82585ac,
+ 0xb42e1db, 0x3009096, 0x384ab8b, 0x2b2c83e },
+ { 0xb4e9a6e, 0xe171ffc, 0x7374b40, 0x9de4218, 0xdb1d616, 0x5701f9f,
+ 0xa3e8cbc, 0x211e122, 0x1e400bf, 0x04e8c1a, 0x0f37159, 0x0297470,
+ 0x3df8c28, 0x41775d1, 0x61ac2db, 0xcfaad4a }
+ },
+ {
+ { 0x7dc0f49, 0x6341b4d, 0xf471a53, 0xaff6c2d, 0xfb8e91e, 0x20ec795,
+ 0xc3b7b62, 0x4c7a4df, 0xd374938, 0x9f33ff2, 0x3a60f2e, 0x38f8c65,
+ 0x2efef73, 0xc1168ac, 0xce408ee, 0x046146f },
+ { 0x308b0c3, 0x9b39ac0, 0x36b8570, 0xe032d61, 0xfc4aacf, 0xee07d8d,
+ 0xd5a41dd, 0x0a82acb, 0x7c3d726, 0xbe0ded2, 0xb926ce9, 0xce51d60,
+ 0x5806c1e, 0xfa2f7f4, 0x1dec59c, 0xe367c6d }
+ },
+ {
+ { 0xda2547b, 0x64511b6, 0x0761405, 0x76a349c, 0x01223ab, 0x37d6626,
+ 0xf4d7c48, 0x0e243c1, 0xda756a0, 0xdc9c8b4, 0xd72e7e9, 0xc7430df,
+ 0x27b4210, 0x0eb1308, 0xcf11cbd, 0x7a9c044 },
+ { 0xe8dd150, 0x2c08ff6, 0x2932fc6, 0x18b738c, 0x04513e8, 0x07d5651,
+ 0xaa40a17, 0x0ca5cff, 0x01baa8f, 0xd486341, 0xb72b79e, 0xfb20faf,
+ 0x654020f, 0x1a051e5, 0x4e17f23, 0xe3b3317 }
+ },
+ {
+ { 0x4de9428, 0x0591048, 0x5abdf97, 0x620542a, 0xa16a4d1, 0xaa0eded,
+ 0x6d65bb9, 0xa93f71c, 0xb8dfaf9, 0x88be135, 0x57ca8ee, 0x1d9f4e5,
+ 0x26781ad, 0x4c896aa, 0x6c6c49f, 0xd3fbe31 },
+ { 0x2c34c3d, 0x088d852, 0xbadff1e, 0xbb6d645, 0x385450d, 0xe3080b8,
+ 0x50ab1f3, 0x5ccc54c, 0xac0657d, 0x4e07e6e, 0xb7ef2c0, 0xa7ba596,
+ 0x73a81e9, 0xcceca8a, 0x8284c35, 0xa0b804c }
+ },
+ {
+ { 0xf17a6a2, 0x7c55956, 0x789cfa8, 0xb451d81, 0x2506eaa, 0xdf414e8,
+ 0xae96562, 0x6ef40fb, 0x0e0297e, 0x63ea283, 0x73c46fa, 0xf5df26e,
+ 0xaac8bce, 0xe00641c, 0x64371f3, 0xc89ed8f },
+ { 0x793202e, 0xd22b08e, 0x875cb50, 0x39a9033, 0xf85ddb4, 0xe64eec0,
+ 0x7acf7b5, 0xdce45a7, 0xb9b802d, 0x39d1e71, 0xbd559ac, 0xafdfe7c,
+ 0x809eeb5, 0x17ec1f8, 0x4889b8c, 0x8c0e38a }
+ },
+ {
+ { 0x17089da, 0x47eabfe, 0xec90c50, 0x2d18466, 0x5861531, 0xa511aa4,
+ 0x8c39b39, 0xebb3d34, 0xf1b5282, 0xa0ac4da, 0xa9dadba, 0xea26be7,
+ 0x554d86e, 0x8992ba8, 0xd5f2ef5, 0x7fcbdb6 },
+ { 0x56863e7, 0x320e79b, 0xa7dce2d, 0xeb9d0c0, 0x784cbc6, 0xb9f4031,
+ 0x7ac1f81, 0x68823ee, 0x9d87497, 0xa6b6f4f, 0x57f9b6e, 0x83c67b6,
+ 0x0fef2a7, 0x3735747, 0x59596e2, 0xf38028f }
+ },
+ {
+ { 0x7e82886, 0x9ea57ab, 0x48c44d5, 0x18221c5, 0x314a24f, 0xbf8e6cf,
+ 0xfd025e5, 0x70ff18e, 0x5334468, 0x08d03de, 0x7404fb7, 0x2b206d5,
+ 0x55e36b0, 0xb923271, 0xb88ddd9, 0xcc7604a },
+ { 0x4a746f0, 0x3df5152, 0x168e3fc, 0x8fdebd8, 0x7f8c32c, 0xffc550c,
+ 0x148743e, 0x1dbbc17, 0xb88e18b, 0xd48af29, 0x750027c, 0x8dca11c,
+ 0x1832be3, 0x717f9db, 0x2b06019, 0x22923e0 }
+ },
+ {
+ { 0xc1cc4d3, 0xd4e06f5, 0x2b4f03a, 0x0fa32e3, 0xc4628d0, 0x956b9af,
+ 0x939dad1, 0x95c39ce, 0x8a00416, 0x39d41e0, 0x6fb01aa, 0xfd7ff26,
+ 0x45af340, 0xc6033d5, 0x8e36584, 0x2f65542 },
+ { 0x8dff960, 0x14cfb1f, 0xda81474, 0x7236ffc, 0xd452d0f, 0xc6a6788,
+ 0x77f6094, 0x2ad4a52, 0x07eea74, 0x369d65a, 0xd6229aa, 0x27c6c38,
+ 0x8863976, 0xe590e09, 0xb38b142, 0x361ca6e }
+ },
+},
+{
+ {
+ { 0xdfeb7ef, 0x6803413, 0xd3f4fad, 0xb669d71, 0xc941606, 0x5df402a,
+ 0x8e6c5b7, 0xe5d1776, 0x92ab236, 0x131bcb3, 0xce2e0e0, 0x7f1fb31,
+ 0x9e98c35, 0xa2c020d, 0xf28657b, 0x33b23c0 },
+ { 0x9cf7879, 0xed14e73, 0xb4357b3, 0x10d4867, 0x31e4e04, 0x127cea3,
+ 0xaa5f8a7, 0xc60d25f, 0x025b987, 0xfef840a, 0x66f2a0a, 0x78081d6,
+ 0xac36198, 0x0fa0b97, 0x134dc9f, 0xe0bb919 }
+ },
+ {
+ { 0xcc32eae, 0xc1d2461, 0x0f79a37, 0x0fdbfdf, 0x1c95f02, 0x70f2bc2,
+ 0x372cddf, 0x7d68bec, 0x8439342, 0x44f7817, 0x4843a6c, 0xa3d5678,
+ 0x07f8959, 0xbadf77a, 0x73db4ca, 0xf458198 },
+ { 0xd54f805, 0xe8eaaf3, 0xb84c1e7, 0x2f529d1, 0x21e535c, 0x404e32e,
+ 0x159b5f5, 0xabac85c, 0xb00466f, 0x4e8e594, 0xc941873, 0x40fcaab,
+ 0xbe407c6, 0x3b4e370, 0x5b2e58d, 0xccd5788 }
+ },
+ {
+ { 0x88b74a8, 0x3ee615e, 0xeab4e69, 0xd7d6608, 0xe4ace36, 0x27cf9f1,
+ 0x7aebabb, 0x282359e, 0xf6d162f, 0x96e509b, 0xf1a290a, 0xad906f3,
+ 0x1314a58, 0xe7d6c4f, 0x218431d, 0xeecffe4 },
+ { 0xe2cfed9, 0xa66e0e9, 0x71f0544, 0xb0887ec, 0xa04c5d7, 0xd34e36b,
+ 0xed4392d, 0x094daa5, 0xc8aa925, 0xcda83ad, 0xb979786, 0x1adef91,
+ 0xfddc5d6, 0x3124dcb, 0x0b70c14, 0x5cc27ed }
+ },
+ {
+ { 0x0eac2d8, 0x386dbc0, 0xc50ca30, 0xa716ecb, 0x80d9f04, 0x9e3fc05,
+ 0xcfeaceb, 0x37dde44, 0xa3522d5, 0xd88d74d, 0x2cf239a, 0x6bb9e9f,
+ 0xa7cbfec, 0x9e7fb49, 0x0a5c0ef, 0xe1a75f0 },
+ { 0xfb9229d, 0x6e434e7, 0xc8a79b3, 0x0ec6df5, 0xd3fb311, 0x7046380,
+ 0x52e20fa, 0xe957ef0, 0x9ef4614, 0x0f4fe9a, 0x54d8f2b, 0x1b37d9c,
+ 0x39d84a2, 0x23b2dc1, 0x724e713, 0xf62c4f6 }
+ },
+ {
+ { 0x747e219, 0xbd6922c, 0x3869b7b, 0x34d1438, 0x96f2272, 0x8c875a5,
+ 0x3fe361e, 0xd9602c0, 0x744839f, 0x081348f, 0x61ac1f1, 0x61bd16c,
+ 0xd8da4e1, 0x993b727, 0x7741271, 0xbb40ba8 },
+ { 0x81dcfff, 0xe6dcc98, 0x93ce616, 0x9f513f5, 0x618cd8f, 0xdc09683,
+ 0x26639be, 0xc3b1d10, 0xc762ee2, 0xe8f149f, 0xb244aae, 0x59f26ef,
+ 0x693dd96, 0x3f2de27, 0x9c3a7de, 0xd8b68f7 }
+ },
+ {
+ { 0x970bd5b, 0x6fa20b9, 0x75f6179, 0x87242d7, 0x72d9308, 0xa95a6c6,
+ 0x37a8a58, 0x6eb2518, 0xc59562c, 0xfdea12a, 0x20f1fc3, 0x4419c1e,
+ 0x9d66788, 0x0c1bd99, 0x32c0547, 0x4b74288 },
+ { 0xdf479ab, 0x4f38acc, 0xc52a942, 0x01f6271, 0x02ca9a7, 0xe3298f4,
+ 0xb718fc8, 0x533daca, 0xb093ca8, 0x133602a, 0x8f98104, 0xc04da80,
+ 0xaf08620, 0xd0f2e23, 0x178b164, 0x882c817 }
+ },
+ {
+ { 0xec30a71, 0x28e6678, 0xf78aca1, 0xe646879, 0x88fa078, 0x868a64b,
+ 0xfee3433, 0x671030a, 0x87c0211, 0xb2a06bb, 0x46c406a, 0x202eca9,
+ 0xe4f0f59, 0x64d6284, 0x3c9f907, 0x56ae4a2 },
+ { 0x1dcc100, 0x5abbb56, 0x07c7784, 0x6fef6cf, 0xdb7302d, 0xb6e25cd,
+ 0x42980e8, 0xa26785b, 0xfb96801, 0xe7d4043, 0x8e4282b, 0x46df55d,
+ 0xc602d6e, 0x9c0a5f5, 0x75dfe29, 0xf065604 }
+ },
+ {
+ { 0x3dcbc90, 0x0e82a1a, 0x656feac, 0xb1ee285, 0x0d3d3b2, 0xfa4353b,
+ 0xdd5c5df, 0xc2e7a6e, 0x416ce53, 0x13707e1, 0x87ebc07, 0xc84ce07,
+ 0x8a9a834, 0xdd273ce, 0x5e8e1e7, 0x432a617 },
+ { 0xbd0064a, 0xa359670, 0x6534516, 0xc899dd5, 0xdb27169, 0x666560e,
+ 0xa19a068, 0x1537b22, 0xeac7527, 0x3420507, 0x6fc13a7, 0x479f25e,
+ 0x1bc19b3, 0xc847acc, 0x0b20d45, 0xecdecf0 }
+ },
+},
+{
+ {
+ { 0x4acea57, 0x6f24100, 0xda68597, 0xdace1c6, 0x50ce77f, 0xea7dd41,
+ 0x1585884, 0x1aecb84, 0xea4a85c, 0x92ff208, 0x88eebd2, 0xde9433c,
+ 0x3f4d289, 0x53cd318, 0x26539af, 0x3970858 },
+ { 0xb827d87, 0x4b57599, 0x3d77638, 0xdc82ac0, 0x52f6e61, 0x6943366,
+ 0xad5e8a6, 0xb8fc4b0, 0xf388642, 0x1b6f7dc, 0xa74dd57, 0x6f24533,
+ 0x41750cf, 0xc669378, 0x28a37af, 0x06757eb }
+ },
+ {
+ { 0xc133995, 0x0e70d53, 0x7c8c97d, 0x88a5e0c, 0x85f3be3, 0x4e59dbf,
+ 0x0e92698, 0x0f364ac, 0xef6940f, 0x3a1e79b, 0xd85d23a, 0xc8a3941,
+ 0x9a00e58, 0x143bb99, 0xc6f2f10, 0x61cf7d6 },
+ { 0x85150fe, 0x979c994, 0x59d773f, 0xcfd0df2, 0xaab7bcd, 0xce97b9d,
+ 0x6afd8fc, 0xc9fff8e, 0x89a4628, 0x246befd, 0x1567090, 0xf630282,
+ 0x6749c58, 0x1539342, 0xa0f3fd3, 0xff47d0e }
+ },
+ {
+ { 0x35f6706, 0x09b0bfd, 0x2c82e69, 0x7464581, 0x50d5fe9, 0xb60729f,
+ 0x95c74f1, 0xf133245, 0xbb76c89, 0x33647e3, 0x5a9afcc, 0x0126404,
+ 0x0f154ab, 0x46d57ee, 0x25680a4, 0x2efa555 },
+ { 0x5329d90, 0x12ebfc6, 0x79800af, 0xcb37ae5, 0x6f8e310, 0x5bb5349,
+ 0xf1bb936, 0x9b59c63, 0xf4610e9, 0x5b49baa, 0x4f2d6ac, 0x2bbeeef,
+ 0x0badc67, 0x87ee21e, 0xf1ddfa0, 0x12e2aad }
+ },
+ {
+ { 0xa9109ee, 0x5b4668f, 0x8a6cea2, 0xfa95133, 0x4068e16, 0xe45e6fc,
+ 0x0205ed8, 0x8ae9a0c, 0x679b79b, 0x2993b96, 0xed604d3, 0xc6b878f,
+ 0x32c77f3, 0x01d0208, 0x495a1ab, 0xd45d890 },
+ { 0x29d2030, 0x99348fa, 0x61f8f7a, 0x961f9a6, 0x674f74b, 0xfd53212,
+ 0xb3e72bc, 0x45cee23, 0xb77e2d5, 0x3fccb86, 0x4219cb7, 0xdff0310,
+ 0xc056871, 0x233771d, 0x7d2c521, 0x1214e32 }
+ },
+ {
+ { 0xff2a8e1, 0x9f51e15, 0x138bc70, 0x86571c5, 0x0c09d46, 0xbfc4caf,
+ 0xc2a0c18, 0x65e33fe, 0x426867d, 0x8214392, 0x80ae4ed, 0x51ce6c0,
+ 0xb110de6, 0x6cbe8d7, 0xfd22ea4, 0x7f6e947 },
+ { 0xcadefc4, 0x7373a75, 0xb0c682f, 0x6fca1d2, 0xf3c7c1e, 0xcd2140d,
+ 0x558b7a5, 0x8653a37, 0x55eb321, 0x653e74e, 0xc31af73, 0xbe0c6b3,
+ 0xf4fc365, 0x3376379, 0x71add4d, 0x3570b37 }
+ },
+ {
+ { 0x83c3494, 0x9061ec1, 0x677bc95, 0xaf2f28d, 0x3bf8768, 0x6fe7279,
+ 0x0fa86d8, 0xc5f50e3, 0xa3293ce, 0x6c03060, 0xe2355a6, 0x4d53357,
+ 0xe4df931, 0x43a59ea, 0x13b79c6, 0x6f48f5d },
+ { 0xddc5192, 0xa4d073d, 0xa65773f, 0x6d0e318, 0x765de9e, 0x1008792,
+ 0x39a0375, 0xa724ed2, 0x97d7c9e, 0x510ff14, 0x5baa863, 0x251f622,
+ 0x648a351, 0x86464fe, 0xd50fd91, 0xf85e98f }
+ },
+ {
+ { 0x86ee987, 0x29c9634, 0x10dcc9f, 0x93e8e52, 0xc910b1f, 0xa1fc4d1,
+ 0xfeb603e, 0x015acac, 0x0844a5f, 0xc9f25f8, 0x73f4dac, 0x50de93c,
+ 0x310a4aa, 0x1758783, 0x358f106, 0x544d570 },
+ { 0x1dc68ca, 0x4eeec7b, 0xe00fbcb, 0x6238e6f, 0xb4e83c9, 0x34d394c,
+ 0x2292656, 0x764ffa2, 0xf641f2e, 0x5614cd1, 0x9e07234, 0x4252eb6,
+ 0x68d2ba4, 0xcbaef45, 0x8a98b17, 0x8c9c550 }
+ },
+ {
+ { 0x4106140, 0xf235d9d, 0x9eb601e, 0x1bf2fc3, 0x375e0c3, 0x6fb6ca9,
+ 0xc0024d2, 0x4bf5492, 0xeb54cc6, 0x3d97093, 0x5c90cb5, 0xc60931f,
+ 0xfbe0f1a, 0xfa88808, 0xd33e7d4, 0xc22b83d },
+ { 0xc0abbf5, 0x9cfec53, 0x93723df, 0x52c3f0a, 0x39b96b6, 0x0622b7e,
+ 0x1667270, 0x300de28, 0x9ef426a, 0x50b66c7, 0xc6eb295, 0x8849189,
+ 0x8914a7e, 0xeaec3a9, 0xc4c99e0, 0x7ed56b0 }
+ },
+},
+{
+ {
+ { 0x687e557, 0x7926403, 0x5310017, 0xa349816, 0xd43a8fd, 0x1b06e91,
+ 0x6ac23cb, 0xf201db4, 0x4f48750, 0x6f172ad, 0xe74bd3e, 0x5ed8c8c,
+ 0xdaba648, 0x492a654, 0xa9b64ff, 0x123010b },
+ { 0x6e89f93, 0xa83125b, 0x398378a, 0x3a3b0b0, 0x0aebe7c, 0x9622e0b,
+ 0x49512a4, 0xb9cbfdc, 0x6aaf12a, 0x13edffd, 0x9f5eafd, 0x555dff5,
+ 0x1212efa, 0x3cba6fe, 0xd9bb0f8, 0xd07b744 }
+ },
+ {
+ { 0x9a48920, 0x45732b0, 0x13ff36d, 0xf3080fc, 0xde8f950, 0x9347395,
+ 0x382b897, 0x14d025a, 0x04d72ad, 0x60c5a74, 0x11a9c71, 0x30be7e5,
+ 0x31ac33a, 0x43ffabd, 0x35cbb14, 0x97b06f3 },
+ { 0x7740de9, 0xe4ff5c5, 0xaacf81e, 0x5fed090, 0xe8b7c9d, 0x97196ee,
+ 0x045910b, 0x316dcd1, 0x5ad8c63, 0x7a2b2f5, 0xc5b03bb, 0x674fffd,
+ 0xe65953c, 0xc1cd133, 0x0a83556, 0x3c06052 }
+ },
+ {
+ { 0x091c23d, 0x797c3f6, 0x39c9c05, 0x2ea2de3, 0xa31f67c, 0x5d958b4,
+ 0xd5f088c, 0xf97afe5, 0x0b37243, 0xbcfbd2a, 0xeca630c, 0xc43ad3e,
+ 0x42845e0, 0xb92a337, 0xa9a0f16, 0x970bff7 },
+ { 0x5970a79, 0x8635511, 0xf205928, 0xcee332e, 0xc04c208, 0x2c58d70,
+ 0x3f5e5bf, 0xdbfe19a, 0x8e51c56, 0x8f8f2c8, 0x8e2da75, 0xb61f58e,
+ 0x624d93f, 0x4046a19, 0xe1f9538, 0x7de64db }
+ },
+ {
+ { 0xc2d850e, 0xd018e1c, 0x63a723c, 0x8cdb643, 0x90a42af, 0x9a65abe,
+ 0x16f20cc, 0xfeece96, 0xd5cff56, 0xc906800, 0x3f0deed, 0x0acf23a,
+ 0x728dd3a, 0x2143061, 0xb8ce34c, 0x66276e2 },
+ { 0x73cc9c7, 0x23700dc, 0x5b1778b, 0xdb44851, 0x4aab669, 0x330f41e,
+ 0xf5282a4, 0x2f5aabc, 0x30f9e01, 0xff837a9, 0x901cc98, 0x1a1eb2f,
+ 0xe69bd7f, 0xd3f4ed9, 0x8a72a7d, 0xa6b1141 }
+ },
+ {
+ { 0x9ea3b43, 0x34bde80, 0x5ced6ae, 0x5ddcb70, 0x95a6cb8, 0x8257f5b,
+ 0xc77dcb8, 0xaac205d, 0x035b397, 0x77d740d, 0xcf7e0a6, 0xca7847f,
+ 0x085601b, 0x9404dd6, 0x457e4f9, 0x0a5046c },
+ { 0xbc11470, 0xcaee868, 0x005c5f6, 0xb118796, 0xec79173, 0xcc04976,
+ 0x21f6827, 0x7f51ba7, 0x486ff7e, 0xa8e3f0c, 0xf87838c, 0x327163a,
+ 0x6d039fd, 0xcf2883e, 0xdb8b0e2, 0x6fb7ab6 }
+ },
+ {
+ { 0x620d669, 0x8ca5bac, 0xed7caa9, 0xff707c8, 0x927909b, 0xdaefa2b,
+ 0x7029da3, 0x1d2f955, 0x6d131a0, 0x52a3ba4, 0x3ab1041, 0xe5a94fd,
+ 0x99bc0ae, 0x5089177, 0xfa1bd16, 0xf750354 },
+ { 0x6cd31fd, 0xdd4e83a, 0x92fac84, 0xd335053, 0x1691382, 0xf914cbc,
+ 0xda6ade6, 0x669683f, 0x8878513, 0x6944643, 0x4b1a72d, 0x429d3cc,
+ 0x61eec36, 0x655c46a, 0x4bc4970, 0x881eded }
+ },
+ {
+ { 0x7ca647f, 0x5b39d37, 0xe917b34, 0x41533c1, 0x7daf734, 0xea2aeb5,
+ 0x1286560, 0xf1ef1eb, 0x08e0473, 0x582f2e0, 0x5edc74a, 0x5913d7d,
+ 0x3c1e754, 0x588c7ec, 0x7146fe1, 0xbd6db05 },
+ { 0x7634907, 0x3b0d49e, 0xe43b9cc, 0x4c65ce4, 0x2d92d5b, 0xb87e958,
+ 0x7ab1519, 0x0513572, 0x8c3aed0, 0x03ec084, 0x561a641, 0x4d7aa21,
+ 0x99e92ad, 0xe5f8211, 0x48a457c, 0x379b55f }
+ },
+ {
+ { 0xd6a8442, 0x8317c34, 0xae499da, 0xb0ab4a5, 0x720e8eb, 0xebcb16e,
+ 0x9a96908, 0xfd5c563, 0xad23acf, 0xcab4d67, 0xbcdf748, 0xa600a79,
+ 0xa2a6a51, 0x18a6340, 0x3aabd69, 0xf2f415c },
+ { 0x747258a, 0xdb38a4f, 0x2e24415, 0xb6ea560, 0xf1f7655, 0xfad1ea9,
+ 0xc957684, 0x4e27eb5, 0xb2e1cfc, 0xf8283e1, 0xaa6291c, 0x8f83bd6,
+ 0x5619e84, 0x28d23b5, 0x93770a4, 0xb9f34e8 }
+ },
+},
+{
+ {
+ { 0x7515fb1, 0x1bb8437, 0x7b860a6, 0xac73f2a, 0x22b390f, 0x78afdfa,
+ 0x66048aa, 0x815502b, 0x85bf620, 0xf513b97, 0x3fc5d7c, 0x2524e65,
+ 0x178c969, 0xa10adc0, 0x5391c8d, 0xa1d5396 },
+ { 0xa8bcc45, 0x09fccc5, 0x7710e1e, 0xa1f97d6, 0x897d0a1, 0xd694442,
+ 0x5f42400, 0x7030beb, 0x7127908, 0xdebe08c, 0x2187637, 0x96b715c,
+ 0xb528129, 0xc598250, 0xa1ccb07, 0x0f62f45 }
+ },
+ {
+ { 0xb765479, 0x8404941, 0x5837dc4, 0xfdecff4, 0xadbd465, 0x1796372,
+ 0x3159806, 0x5f84c79, 0x6aaad34, 0x6d2e46b, 0x384b375, 0xd303b4a,
+ 0xb392002, 0x440acd5, 0xc475e87, 0x4f2a4a7 },
+ { 0x5606fc2, 0x038e1da, 0x9c2f050, 0x2d821c2, 0xf139db4, 0xc074cb3,
+ 0x4ec59be, 0xde2fee7, 0xa84ed59, 0x5a819ee, 0x3e98711, 0xd65c62c,
+ 0xb9723c1, 0x72eb440, 0x01be611, 0xb927754 }
+ },
+ {
+ { 0xab9e9fc, 0x929fe64, 0x0bf1e85, 0x04379fd, 0xbc28ee3, 0xb322093,
+ 0xe4555e1, 0x78ac4e2, 0xabc5588, 0xdb42b58, 0x77c8b12, 0x1c1b5e1,
+ 0x40366c4, 0xf6d78dd, 0xbdae22e, 0xc21ff75 },
+ { 0xa211df2, 0x1e3d28e, 0x3617c0a, 0xc5a65a1, 0x58140d5, 0x3fa02c0,
+ 0xb62d10c, 0x155c346, 0xe48268f, 0xc9cf142, 0x1993bc3, 0xdc14083,
+ 0x0ee69dc, 0x07c44d4, 0x5e2ac46, 0x6169950 }
+ },
+ {
+ { 0xd0fb585, 0x44e4a51, 0xf1f3ce8, 0x00846be, 0x8e2de1e, 0xedef39a,
+ 0x33b3934, 0x430afe3, 0x4337188, 0xac78b05, 0xc9a3f24, 0x0f39de4,
+ 0xc9ae6a4, 0x039eddd, 0x8eacd51, 0xf470157 },
+ { 0x9a2f31a, 0x1e39694, 0xb19a8b1, 0xc8a40f4, 0x9d239d8, 0xdddd10c,
+ 0x887e066, 0xf974245, 0x3ea28c6, 0xfdb5111, 0xe1122a9, 0xb5af0fb,
+ 0x36e0267, 0xd30c89f, 0x74f024c, 0x7b1c0f7 }
+ },
+ {
+ { 0x07a39bf, 0x1ec9956, 0x3a68d15, 0x1c3ecf2, 0x4f59fe9, 0xd8a5c4e,
+ 0x271abc3, 0xacb2032, 0x71ef239, 0xbc6bdf0, 0xb39b391, 0x660d7ab,
+ 0xb627a0e, 0x2e73bb2, 0x248fc7e, 0x3464d7e },
+ { 0x1666760, 0xaa49249, 0x8582659, 0xa257b6a, 0x5593089, 0xf572cef,
+ 0x73ca6bf, 0x2f51bde, 0x764cff5, 0x234b63f, 0xd411a35, 0x29f48ea,
+ 0xafe1db1, 0xd837840, 0xd9f4c4b, 0x58ec0b1 }
+ },
+ {
+ { 0x5e6f3dc, 0x8e1deba, 0x06a5ff7, 0xc636cf4, 0xc80ca0f, 0xe172b06,
+ 0x5ffb90a, 0x56dc098, 0x9a05e83, 0x895c218, 0x7561ac2, 0x6ddfaec,
+ 0x96283a0, 0xaa35749, 0x7e7cd43, 0x6dfb262 },
+ { 0x2c8ca27, 0x6576de5, 0x49018eb, 0x6a4a872, 0x5c34342, 0x00c275c,
+ 0xd2d90c4, 0xe34805a, 0xd8743c4, 0x651b161, 0x7312bf3, 0xb3b9d9b,
+ 0x0bf7e00, 0x5d4b8e2, 0x78d3d7e, 0x8899bdf }
+ },
+ {
+ { 0xfaa9cd1, 0x9644ad8, 0x6e0e58e, 0x34c98bf, 0x404c637, 0x6022aad,
+ 0x7ac013b, 0x2a11a73, 0x5540899, 0x5bdd103, 0x1e022a4, 0x2e67572,
+ 0xb834c33, 0xe32045d, 0x2f2d01c, 0x74a260c },
+ { 0xc48841c, 0x20d59e9, 0xe560359, 0x05045dd, 0xac998ac, 0xeba779c,
+ 0x00a6218, 0x5bed10c, 0x5327ef4, 0x25d4f8e, 0x4597794, 0xa278474,
+ 0x831d11e, 0xefd68ca, 0x934446a, 0x9ad370d }
+ },
+ {
+ { 0x73c92ac, 0x3089b3e, 0x957a75c, 0x0ff3f27, 0xd676f50, 0x843d3d9,
+ 0xd496d43, 0xe547a19, 0x8e924a4, 0x68911c9, 0x85b5522, 0xfab38f8,
+ 0x83e0ac5, 0x1048811, 0xdc788c4, 0xcaccea9 },
+ { 0xe3c6aad, 0xfbe2e95, 0xb3a6cf1, 0xa7b3992, 0x87d78b1, 0x5302ec5,
+ 0x1826100, 0xf589a0e, 0x8610632, 0x2acdb97, 0x9232b26, 0x1e4ea8f,
+ 0x9c09a15, 0xb21194e, 0x849b909, 0xab13645 }
+ },
+},
+{
+ {
+ { 0xf3a71c1, 0x92e5d6d, 0x297d661, 0x349ed29, 0x1713fc9, 0xe58bd52,
+ 0xb9ddfb5, 0xad999a7, 0x3c28ce0, 0x271c30f, 0x2a9d460, 0xf6cd7dc,
+ 0x207dec7, 0xaf728e9, 0xfcb8bf0, 0x9c2a532 },
+ { 0x68bf486, 0xd702184, 0x7ab8ea8, 0x73b45be, 0x1795c93, 0xddfc658,
+ 0x6bb8da2, 0x7941660, 0x88e07a2, 0x658f197, 0x26d3d12, 0xa9d5b08,
+ 0x9535b52, 0x4d7c95f, 0x268ef8a, 0xad55e25 }
+ },
+ {
+ { 0xa2bc326, 0x94a9b0b, 0x167e5f3, 0x485ecc5, 0xc97fc74, 0x8340bc7,
+ 0x07aaa5c, 0x06f882b, 0x849698a, 0x4b57455, 0xb36a0ba, 0xd9281eb,
+ 0x8b8108f, 0x8918c6c, 0x5b50d1d, 0xedd1eea },
+ { 0x2a25f50, 0x94d737d, 0x2446ad0, 0x0e5a823, 0x7ced3e2, 0x02a5435,
+ 0x4af8ced, 0xb09a92a, 0xeeecef2, 0x85fc498, 0xe71e3d4, 0x06a02b9,
+ 0x84bb49a, 0x00ad307, 0x64a5b4a, 0xf61585e }
+ },
+ {
+ { 0xb86a4c9, 0x915f6d8, 0xa861e1f, 0x944bc6b, 0x54465ef, 0x3091ca7,
+ 0xeb53a38, 0x11df859, 0x0144679, 0xd44dde5, 0x0994edd, 0x6c8da9a,
+ 0x91241ef, 0xeebcebf, 0xc2f6859, 0xc419354 },
+ { 0x49581b6, 0x1f49693, 0xbb26cb4, 0x5712b10, 0xb09fd59, 0x8fcaa41,
+ 0x72e22e3, 0xbd39aad, 0xb1199b0, 0xf70e794, 0xc6f863d, 0xdf63c0c,
+ 0xee9df4f, 0xd58166f, 0xc45e70b, 0xb9224ea }
+ },
+ {
+ { 0xce525f4, 0x80072fa, 0x66a5502, 0x8597bd6, 0xdbc9725, 0xf65e203,
+ 0xf2222a4, 0xeccfbe3, 0x2339834, 0x490aa42, 0x62489e8, 0x1348891,
+ 0xa735084, 0xaff3f80, 0xf3f1bd6, 0x69d53d2 },
+ { 0x813341a, 0xb123ffc, 0x1173848, 0x359084c, 0xd29b08d, 0x751425e,
+ 0x3890ad4, 0x1edda52, 0x607cf20, 0xb64974c, 0xb42ac7c, 0xa8c8cb8,
+ 0xedd42e5, 0xd5cb305, 0x44c090a, 0xf3034dc }
+ },
+ {
+ { 0xbb18e19, 0x428921d, 0xfed2127, 0x4cfd680, 0x92ac8c3, 0x671144d,
+ 0x132c894, 0x2121901, 0x7604cd9, 0x25d0e56, 0xafbc2a0, 0xa372223,
+ 0x56c16f7, 0xcf98a52, 0xb5459e1, 0x71f129a },
+ { 0xb668b2e, 0xf4afdc5, 0x0c2d410, 0xc5d937a, 0x285d54a, 0xe2cc4af,
+ 0x8c53e18, 0x1c82777, 0x69a92f6, 0x270f2c3, 0x616327a, 0x799f9ac,
+ 0xd4246f2, 0xce658d9, 0xfb12e36, 0x0fb681f }
+ },
+ {
+ { 0xe0690fe, 0xc5ab11e, 0x3f74249, 0x80261e3, 0x58c1cf2, 0x8eb4b47,
+ 0x184ae9b, 0x4895a80, 0xd3e27eb, 0x4a4bdb6, 0xbfd251c, 0xa7a1638,
+ 0x417a7e3, 0x29ec144, 0x3f1b960, 0xd073609 },
+ { 0x49c73d1, 0xcb1ed83, 0x8d1945a, 0x33fc84a, 0xe965118, 0x9f668db,
+ 0xa82811f, 0x3331743, 0x28ba540, 0xf394dec, 0x654a454, 0x44ce601,
+ 0x3623645, 0x240dbb6, 0x2e61048, 0xf07e7f2 }
+ },
+ {
+ { 0x3d45213, 0x7c9f176, 0x9c1f77f, 0x3eefa70, 0x1b48350, 0xde3c3c5,
+ 0x9d481a7, 0x4a2bc64, 0x7874f3d, 0xfd4a58a, 0x037b302, 0x96655d4,
+ 0x68bf5ab, 0x9452528, 0x75177f6, 0x1b6d46a },
+ { 0xefb8d00, 0x7de6763, 0xa741b7b, 0xb2c1ba7, 0x7bae6ed, 0xcca6af4,
+ 0x5b68b3f, 0xe4378ca, 0xaf71948, 0xfb757de, 0xbc6ac99, 0x7f07b5e,
+ 0x27d636d, 0x752a568, 0x4b8a34f, 0xc8b7d1d }
+ },
+ {
+ { 0x325331b, 0x76cb78e, 0xadd2eed, 0x41f41c9, 0x5c5f623, 0x03db238,
+ 0x7102fa2, 0xbbc1d17, 0x60182ec, 0x80f137a, 0x55adf15, 0xfdd8569,
+ 0xe3373dc, 0x4f53f5e, 0x21b669b, 0xec6faf0 },
+ { 0x0b86081, 0x7d4e983, 0xf2d979c, 0x10d3cd9, 0x24a22c8, 0x0f48f58,
+ 0x02f99ee, 0x86c540c, 0x5e6c5fc, 0xf4c6654, 0xbc404c8, 0xaf0c588,
+ 0x423118a, 0x2e6edbd, 0x0690eab, 0x86e32e9 }
+ },
+},
+{
+ {
+ { 0xdfbfa6f, 0x1d12656, 0x7646018, 0xa498095, 0xc3597d0, 0x2f1071b,
+ 0x1dda80a, 0x3df83f9, 0xf3ae449, 0x5853e28, 0x9e19aad, 0xb853d31,
+ 0xa0d8a46, 0x863f01b, 0x2fef108, 0xa84fca6 },
+ { 0xfb84de9, 0xbe4c0b7, 0xc0727bf, 0x40a03dc, 0xb18575c, 0x781f841,
+ 0x466cddb, 0x6a63045, 0x05dc7a2, 0x6be7582, 0x07ae811, 0x420f87f,
+ 0x3bf96c8, 0x2808242, 0x51c6821, 0x723998c }
+ },
+ {
+ { 0x81f5863, 0x38ab641, 0x05ff9e1, 0xd82ecbd, 0xa065856, 0x339c94e,
+ 0xa45156d, 0x143054a, 0x065628c, 0xe6d64bf, 0xa938589, 0xe530086,
+ 0x385d79b, 0x22d3a49, 0x0ab8245, 0x0b10790 },
+ { 0xca387b5, 0xb0d80fb, 0x35551d7, 0x698206e, 0xa10bb73, 0x199685d,
+ 0x9107378, 0xa8e5fa8, 0xd99dbbf, 0x36e5724, 0xd581b03, 0xd67f476,
+ 0x88dd1e6, 0x7a15be7, 0xe5baa31, 0x8dac8e4 }
+ },
+ {
+ { 0xe170ef8, 0x4d5d88f, 0x1e9e600, 0xb6ba5de, 0xedeabc5, 0x4a89d41,
+ 0x8fac936, 0x737c66b, 0x65c3125, 0x8d05b23, 0xb61b68e, 0x85a5cbc,
+ 0x20a6af9, 0x8fea626, 0xd8b50ec, 0x85115de },
+ { 0x6a6f30b, 0x5430c8d, 0x8474295, 0x8bef9cf, 0xbe77f38, 0x0648f5b,
+ 0x9e47bd7, 0xfe2b72f, 0x93106e2, 0xad6c5da, 0xfa7a6c3, 0x4fa6f3d,
+ 0xb396650, 0xdcd2ed8, 0x1157ef9, 0x7de1cce }
+ },
+ {
+ { 0x1f241d1, 0x70a5f6c, 0x798cd5c, 0x6c354d8, 0x1a729fb, 0x23c7838,
+ 0x523cbda, 0xcff8f15, 0x3493697, 0x5683ff4, 0x7534f53, 0xef7dbab,
+ 0x2243d53, 0xd7bd08e, 0xf8072a9, 0x6f644cb },
+ { 0xb22db63, 0xac960f9, 0x23af04d, 0xa97f417, 0xd9798af, 0x692b652,
+ 0xfedb156, 0x0e35967, 0xdfe6ee8, 0x14b5e50, 0xb411070, 0x7597ede,
+ 0x442b3f9, 0x116f3ce, 0x1b2b6db, 0xe9b5ae8 }
+ },
+ {
+ { 0x2315930, 0xf4385ee, 0x27a8740, 0xc8d0298, 0xd934a43, 0x7907a8d,
+ 0xc582191, 0x20bc946, 0x6a405e7, 0xa4acb3e, 0x43df2f5, 0x8c1d6c8,
+ 0x991f0b5, 0x9df1593, 0x4d9be9d, 0xbb9df98 },
+ { 0x8e4b190, 0x6362008, 0xada3a88, 0xee1421e, 0xf93b027, 0xb84f0cc,
+ 0x8e95091, 0x7a5d667, 0xf3e3704, 0x3974462, 0xc593e98, 0xfa6fb5e,
+ 0xa6477d2, 0x44b6cf7, 0xb09a562, 0xe885b57 }
+ },
+ {
+ { 0x09a0c02, 0x6e339e9, 0x0e75f29, 0x57afff0, 0xfb7db03, 0x797d8d6,
+ 0xd25a236, 0xc6e11a3, 0x0107260, 0x643ce1c, 0x62eae1c, 0xe644ec4,
+ 0x3f5a3f5, 0x821d5b8, 0xc0579d6, 0xa8ad453 },
+ { 0x17d43a4, 0x6518ed4, 0x3f87ccd, 0x46e76a5, 0xf9bef95, 0xd6cbaab,
+ 0x4f7cbcf, 0x2568832, 0x08476b4, 0x367159a, 0xbe6d324, 0x1d1b401,
+ 0xa605026, 0x348cb98, 0x43b6b1e, 0x144f3fe }
+ },
+ {
+ { 0x7b1822c, 0xbabbd78, 0x2aa51f8, 0xd34ba7e, 0x41fbea4, 0x086f1cc,
+ 0x746f3d9, 0x96f7eac, 0x281ecaf, 0xad97f26, 0xa14ee2c, 0x751a905,
+ 0x0d7335f, 0xb4e7fe9, 0x4892ff0, 0x0d97b8f },
+ { 0x5a5c40e, 0xdb8a315, 0x7ba567b, 0x64e5de7, 0x1eefe88, 0x4f155f7,
+ 0xfb6fbf4, 0xe2297e9, 0x6c16be5, 0xfe24bf9, 0xcdd83e2, 0x2251847,
+ 0x5eda444, 0x13ac2c8, 0x283275f, 0x49d1b85 }
+ },
+ {
+ { 0x423e08f, 0xca08731, 0x87d2f14, 0x7046bb0, 0x3bc846c, 0x876f10c,
+ 0x358fbe3, 0x2202b76, 0x0e26ac6, 0x0d4fc1c, 0xb986881, 0x1fc748b,
+ 0x8384a18, 0x609e61c, 0x0d88e00, 0x28a72d6 },
+ { 0x78c6e2f, 0x1332a31, 0xb3526a4, 0x0367919, 0x698fe3e, 0x53989e4,
+ 0xb16a99b, 0x14b1145, 0xddbb75f, 0xef9ec80, 0x0e53955, 0x7625624,
+ 0x8744ae1, 0x54e087a, 0x672b875, 0xce50e8a }
+ },
+},
+{
+ {
+ { 0xa29629c, 0x4c88b2b, 0x7b2642f, 0x946559c, 0xf7ebe4c, 0x933d432,
+ 0x63632c9, 0x97109b6, 0xe53184d, 0x799b3fb, 0x0f069a6, 0xd462871,
+ 0x3a68351, 0x0c182a1, 0x9a2437a, 0x974a839 },
+ { 0x2a70278, 0x29f1997, 0xd9c424b, 0x01b98b6, 0x08f4c37, 0xd85a60b,
+ 0x2b1da15, 0xcc3523f, 0xddffb0f, 0xf922115, 0xde84ae2, 0xee0fe4d,
+ 0x55365be, 0x810440c, 0x1a457e8, 0xd2f6639 }
+ },
+ {
+ { 0xe2ddd05, 0x5e6879f, 0xabdfc61, 0x92a7545, 0xa5cede8, 0x7dedd63,
+ 0x70df4bd, 0x8a03b3f, 0x91f6cbb, 0xa5d1f65, 0x10f3fb2, 0x372fde6,
+ 0xa9dee05, 0x4537f9e, 0xdf7aa50, 0x7eb85bb },
+ { 0xe8c504d, 0x963edf8, 0xe7bdb6b, 0x53c8dca, 0x6fedf2d, 0xa246e4c,
+ 0x0c55bde, 0x7553340, 0x0270a54, 0x2aa748d, 0x05860dd, 0xadb6cf0,
+ 0x9b84763, 0x8d31450, 0xeb405ef, 0x626720d }
+ },
+ {
+ { 0x6601328, 0xa3709ae, 0x2ac2478, 0x68e94fd, 0x9d5d247, 0x3879343,
+ 0x392c198, 0xfa467af, 0x15df607, 0x49e7b0d, 0x61792a8, 0x8c58122,
+ 0x1d3762f, 0x79f7658, 0x244a39d, 0xaa38895 },
+ { 0xc5cd0bc, 0xef60af9, 0xa33b3bb, 0x2b0db53, 0x251015d, 0xe3e0b1f,
+ 0xe64489e, 0xc608afc, 0x03651aa, 0xe52b057, 0x1c6f7b9, 0x1dda8b9,
+ 0xff41893, 0x833f022, 0x192818c, 0x58eb0a0 }
+ },
+ {
+ { 0xfc7b5a7, 0x6c1300c, 0xa83ab33, 0x6d2ffe1, 0x9c02eef, 0x7b3cd01,
+ 0xba60d55, 0x6c64559, 0x19e2f73, 0x2e9c16c, 0xdbe47b1, 0x11b24ae,
+ 0x1b8153b, 0xc10a2ee, 0x1e02e1a, 0x35c0e08 },
+ { 0x1dd6f16, 0xa9f470c, 0xf41a290, 0x4ea93b6, 0x25ee03f, 0xac240f8,
+ 0xb85aabd, 0x6cd88ad, 0x1be2f8f, 0x378a64a, 0x417bac1, 0xbf254da,
+ 0x9231142, 0x7e4e5a5, 0x3b8c057, 0x057aadc }
+ },
+ {
+ { 0x80af479, 0x607c77a, 0x5ccdf74, 0xd3e01ff, 0x101b4c7, 0x9680aaf,
+ 0x2fc50a6, 0xd2a7be1, 0xb72d782, 0x92a788d, 0x4640b52, 0x35daf2e,
+ 0x39e601c, 0xc170d69, 0x7b25c2f, 0x16e05f5 },
+ { 0x6fe37f8, 0x47a42a6, 0xbeca298, 0xeb74271, 0x179da16, 0x401e11e,
+ 0xaa53873, 0xfb8da82, 0x5bb4783, 0xd657d63, 0xfcea0b1, 0x6847758,
+ 0x0993154, 0x2f261fb, 0x592853a, 0x868abe3 }
+ },
+ {
+ { 0x35766ab, 0x1a4c543, 0x6f4e4ea, 0xa1c84d6, 0x60ba199, 0x5d737a6,
+ 0x98b15a2, 0x4a7b1e2, 0xfd967d3, 0x207877f, 0xc262b4d, 0xcaec82d,
+ 0x4f2a37d, 0x0b27849, 0x6ac1711, 0x3478141 },
+ { 0x8fc6856, 0x28e3df1, 0x16d003f, 0xbec03f8, 0xff39ebd, 0x2bd705b,
+ 0x2d776d3, 0x1dcb53b, 0x5c0e7ce, 0xabafa7d, 0x4a53332, 0x5b9c8c2,
+ 0x9d90214, 0xe9f90d9, 0xc129690, 0x789747e }
+ },
+ {
+ { 0x54e2dfa, 0x94d3c39, 0xafb2a8f, 0x919f406, 0x34e3927, 0x159ef05,
+ 0xa165c37, 0xcdb4d14, 0x288f337, 0xa23e5e8, 0x0f90242, 0x95867c0,
+ 0xe34e781, 0x2528150, 0x6657b95, 0x104e501 },
+ { 0xbcdda24, 0x695a6c9, 0x23eb5fa, 0x609b995, 0x16a60f8, 0xcbce4f5,
+ 0xf084a29, 0xec63f7d, 0x20c811f, 0x3075ada, 0x8c716a1, 0x129a192,
+ 0xcd4cd4a, 0xd65f4d4, 0x62188be, 0xe18fa9c }
+ },
+ {
+ { 0xbac60e3, 0x1672757, 0x577144b, 0x525b3b9, 0x887055b, 0x38fc997,
+ 0x31e4408, 0x7a77126, 0xcba2fcf, 0x884f173, 0x5962ac0, 0x783cbdc,
+ 0x22287dc, 0x4f3ed0a, 0x50e20e6, 0x8a73e34 },
+ { 0xd764583, 0xe7a1cd0, 0x0d58ee6, 0x8997d8d, 0xaa13ed6, 0x0ea08e9,
+ 0xcf363cb, 0xed478d0, 0x5b37bf4, 0x068523d, 0x783f13c, 0x8b5a9e8,
+ 0x87528a9, 0xde47bbd, 0xcaec313, 0xd6499cc }
+ },
+},
+{
+ {
+ { 0xe09859d, 0x54781bb, 0x7f5e648, 0x89b6e06, 0x7075824, 0xb006dfe,
+ 0x0717f68, 0x1731660, 0x0b4efe2, 0x9c86554, 0x5e30d8e, 0xdbdb257,
+ 0x3b4d50f, 0xa6a5db1, 0xfa47beb, 0x3b5662c },
+ { 0x89d4a59, 0x9d4091f, 0x550a7dc, 0x790517b, 0xc52965e, 0x19eae96,
+ 0xb5ed7a4, 0x1a7b3c5, 0xeb16541, 0x19e9ac6, 0xef66852, 0x5f6262f,
+ 0xc4cda27, 0x1b83091, 0x3bf742b, 0xa4adf6f }
+ },
+ {
+ { 0xa5100e7, 0x8cc2365, 0x8592422, 0x3026f50, 0x3d714d0, 0xa4de79a,
+ 0x90fcb30, 0xefa0d3f, 0x474ada0, 0x126d559, 0xc94350a, 0xd68fa77,
+ 0x0c7cb45, 0xfa80e57, 0x3985fbf, 0xe042bb8 },
+ { 0xfe13dba, 0x51c80f1, 0xcf055d7, 0xeace234, 0x73f95f7, 0x6b8197b,
+ 0xdcdbe89, 0x9ca5a89, 0xdfd9896, 0x2124d5f, 0x9e7ca37, 0x7c69556,
+ 0x8babb37, 0x58e806a, 0xbaf99ce, 0x91b4cc7 }
+ },
+ {
+ { 0x197e968, 0x874e253, 0x3160668, 0x36277f5, 0x8b95dbe, 0x0b65dda,
+ 0xf0872a1, 0x477a792, 0x314268d, 0x03a7e3a, 0x0c805c7, 0xa96c842,
+ 0xb7bc4a8, 0xb941968, 0x75db390, 0x79dce30 },
+ { 0x6f4cc14, 0x577d4ef, 0xb5d1107, 0x5b0d205, 0x9f93624, 0x64ff20f,
+ 0x5034a2f, 0x0b15e31, 0x8b6f35c, 0x3a0f6bb, 0xe0d0ec5, 0x0399a84,
+ 0x0d5d521, 0xd0e5823, 0xcb1dd54, 0xdeb3da1 }
+ },
+ {
+ { 0x182401a, 0x24684ae, 0x21a706f, 0x0b79c1c, 0xd8998af, 0xe1d81f8,
+ 0x4bb069f, 0xadf870f, 0xf3dd7aa, 0xd57f85c, 0xe4a40f8, 0x62d8e06,
+ 0x8b55aa1, 0x0c5228c, 0xa9c0a1a, 0xc34244a },
+ { 0x68f544e, 0xb5c6cf9, 0xde23ab7, 0xa560533, 0x47c690c, 0xaa55120,
+ 0x12aaaa6, 0x20eda5b, 0x751a6a0, 0xea0a49a, 0x2baa272, 0x6d6cfff,
+ 0xbf4c28a, 0x95b756e, 0xe6178a4, 0xd747074 }
+ },
+ {
+ { 0x221a94b, 0xa27b453, 0xe635f20, 0xd56ad13, 0x8c95117, 0x03574b0,
+ 0xed30b70, 0xf0ee953, 0x957796f, 0xb48d733, 0x58c336b, 0xf5d9583,
+ 0x82db529, 0x6170cd8, 0xec9d1ea, 0xcd3ef00 },
+ { 0xe4d105f, 0xd1bea0d, 0xad6a559, 0xd2d670f, 0x52f9690, 0x652d012,
+ 0xc2529b0, 0x5f51fb2, 0xe89df2a, 0x5e88bf0, 0xcd686e4, 0x9a90684,
+ 0x882c7a1, 0xf519ccd, 0xc2f4d37, 0x933a0df }
+ },
+ {
+ { 0x3f66938, 0x0720a9f, 0xd8149df, 0x99356b6, 0xa3d7f61, 0xb89c419,
+ 0x4ba6e31, 0xe658134, 0xab936c8, 0xd130561, 0x40dbef1, 0x0625f6c,
+ 0xb6bb847, 0x7b2d6a2, 0x84d506b, 0x3ca8b29 },
+ { 0xfb011b0, 0x6bf729a, 0x33448c9, 0x01c3078, 0x0837420, 0x6ae9508,
+ 0xa207fb8, 0xf781a8d, 0x57562a9, 0xcc54d58, 0x858c5ab, 0xc9b7364,
+ 0x359908f, 0xdfb5035, 0x9631138, 0x8bf77fd }
+ },
+ {
+ { 0xc13fbb1, 0xf523365, 0x9993ed5, 0x88532ea, 0x5a73492, 0x5318b02,
+ 0xe5a8f3c, 0x94bff5c, 0x306c2a0, 0x73f9e61, 0xf2668a3, 0x00abbac,
+ 0x076237d, 0x23ce332, 0x34c0f9b, 0xc867f17 },
+ { 0xcfd2136, 0x1e50995, 0xb2b70f8, 0x0026a6e, 0x5077a7d, 0x66cb184,
+ 0xa3b498e, 0xc31b2b8, 0x260ec86, 0xc12035b, 0xe1b3df0, 0x1cbee81,
+ 0x8d55a42, 0xfd7b804, 0xf47a8c8, 0x912a41c }
+ },
+ {
+ { 0x9e157e3, 0xab9ffe7, 0x44dc158, 0x9cfe46d, 0x8a4a3ef, 0x435551c,
+ 0x3b7e3a8, 0x638acc0, 0x49954a7, 0x08a4ebd, 0x13194f7, 0x295390c,
+ 0x253892a, 0x3a2b68b, 0x25d5b11, 0xc1662c2 },
+ { 0x3a5d2bb, 0xcfba072, 0xcc327c9, 0xffaf6d3, 0xc67e254, 0x6c6314b,
+ 0x2f32208, 0x6661631, 0xbea72e1, 0xf780f97, 0x002122f, 0x495af40,
+ 0x7578a99, 0x3562f24, 0x77ce51e, 0x5f479a3 }
+ },
+},
+{
+ {
+ { 0x1a82a12, 0x91a5884, 0x80f3a62, 0xa754175, 0xf73417a, 0x399009f,
+ 0x0a8c5cd, 0x2db1fb9, 0xc046d51, 0x82c8912, 0x8f18274, 0x0a3f577,
+ 0x26ccae2, 0x2ad0ede, 0x8a4e9c2, 0x7d6bd8b },
+ { 0x4b3de44, 0xaa0d797, 0x96ac9bb, 0xf8658b9, 0x5f6c334, 0x31e7be2,
+ 0x4df12c9, 0x23836ce, 0x59eb5c9, 0x029027b, 0x5b8649d, 0x2f22531,
+ 0xd907162, 0xa0fdf03, 0x9e80226, 0x101d9df }
+ },
+ {
+ { 0x9a90835, 0xf12037a, 0xf0222a7, 0xd2d0882, 0xc3814e2, 0xeaf8d40,
+ 0x8b8146b, 0xa986dc6, 0x8504653, 0x147a331, 0x2feaf67, 0x734e003,
+ 0x602bec5, 0x6f27bbf, 0x6a688f3, 0xa1e21f1 },
+ { 0x73c4ae5, 0x5a8eeab, 0xe70b412, 0x4dbaddb, 0xcfd2af1, 0x871ceba,
+ 0x7d7a286, 0x1860382, 0xb5bb401, 0x024059d, 0x3c39b73, 0x2557c09,
+ 0x6681697, 0xfc5a711, 0x891b57c, 0xf881c0f }
+ },
+ {
+ { 0x8ea191a, 0x3c443f1, 0xd700ad0, 0x76faa58, 0xbe7fcbf, 0x6fe6cfa,
+ 0x8990ef7, 0xaefc528, 0x80004cc, 0x44e30fa, 0x6d8ef85, 0xc744adc,
+ 0x912df70, 0xafcd931, 0x572a6d8, 0xf62a9d1 },
+ { 0x3219f27, 0x47158a0, 0xad73136, 0x76fb27e, 0xcc2d614, 0x41bb2ad,
+ 0xde1ec21, 0x8858cb9, 0x5f15866, 0xab402c4, 0xbc82bbf, 0x6675d5b,
+ 0xf1b28d3, 0x4ee9dd6, 0xe373c17, 0x875884f }
+ },
+ {
+ { 0x2a67d36, 0x17806dd, 0x32c9ec1, 0xaa23a86, 0xfc1ee55, 0xd914126,
+ 0x653701b, 0xbf8f7bd, 0xea71367, 0x9b0111a, 0xa98e417, 0x61fd4ab,
+ 0x561c5a5, 0xeb45298, 0xe7af394, 0x2187b0a },
+ { 0x1616dde, 0x71f12db, 0x07da7b4, 0x0617609, 0x02ddb04, 0x414d376,
+ 0x286fb58, 0x1100be7, 0x6f0d95b, 0xd7cf88d, 0x746d703, 0x8539d23,
+ 0x4e23d73, 0xdccc9d6, 0xec89680, 0xaeef1d2 }
+ },
+ {
+ { 0x336508d, 0x82ccf1a, 0x5bad150, 0xa128c1f, 0x29a188d, 0x551d8c0,
+ 0x771404f, 0xef13dd4, 0xc37b993, 0xdd67696, 0x0dddad2, 0x428c0e2,
+ 0x038c94c, 0x222278d, 0x078e3f2, 0x1a24a51 },
+ { 0xedb0db9, 0xd297fe6, 0x8251a87, 0x00988d2, 0xbfaa0d7, 0xbb946f8,
+ 0xdf45ea0, 0x380f7b9, 0xafccf5e, 0x8526415, 0xe9ec7bc, 0x909bfbf,
+ 0x124755c, 0x2ed7093, 0x89404e2, 0x4368028 }
+ },
+ {
+ { 0x36d9ef1, 0x21b9fa0, 0xe433526, 0xfd64b7c, 0x6544849, 0xd9d7eb7,
+ 0xd5b54b3, 0x201620c, 0xbb61159, 0x25fab3d, 0xc53e0d3, 0x90d4eb0,
+ 0x9e74772, 0xba09831, 0xec1681c, 0x8749658 },
+ { 0xfec316b, 0xa354349, 0xa743ea2, 0x639a9b1, 0x37c50e6, 0x2e514ca,
+ 0xdbaf6c5, 0x9f4a4fd, 0x6f511c9, 0x0df87ef, 0x0c00d95, 0xadd4cef,
+ 0xaa1433f, 0x401c0eb, 0xbb38af9, 0x3c3a59e }
+ },
+ {
+ { 0xf0e7dca, 0x8706245, 0x3fb29ca, 0xad238cd, 0x9b7d8f0, 0x0330443,
+ 0x154f495, 0xfdcd6e6, 0x7d4ad09, 0xc67e24a, 0x5438390, 0x1b209e8,
+ 0xb0c211e, 0xf893b81, 0x7e11e36, 0x1aa86f0 },
+ { 0xedea8b1, 0x2cca3ff, 0x3b306cd, 0x7eedd07, 0x12ee222, 0x78e37bc,
+ 0xbc42a1d, 0x257870b, 0x1fbd397, 0x5fb2bb9, 0x09d6c60, 0x4702470,
+ 0x20bdc36, 0x11748a3, 0x04280e8, 0x3ff24dc }
+ },
+ {
+ { 0x9839b52, 0x0eb1c67, 0xacfbd32, 0x5bcca27, 0x74898e3, 0xb506c16,
+ 0x2489e5e, 0x37d662e, 0xf694887, 0x8dc0731, 0xf43f1dc, 0x571149e,
+ 0x66d63dc, 0x6430a37, 0xb50dd70, 0x0d2640e },
+ { 0x3b2675b, 0x2b56149, 0x88c604f, 0x1b48065, 0xaafbabc, 0x55c86a8,
+ 0x608aaba, 0xa7b9447, 0x04cad8c, 0xa42f635, 0xcee7788, 0x0f72b1d,
+ 0x755d99a, 0x1d68374, 0x5be2531, 0xd7cdd8f }
+ },
+},
+{
+ {
+ { 0xbcdfee1, 0x67873bd, 0xfcd0a3f, 0xa5a0c0a, 0x3cfa3d4, 0x59389f9,
+ 0xe1c865c, 0x14e945c, 0x1d588cc, 0x62d2f8e, 0x8e228b4, 0xfd02f8a,
+ 0xb42b649, 0x208f791, 0xab397ad, 0x0e0dff1 },
+ { 0x0bc6eb1, 0x30ac3d9, 0x5f313bb, 0xf14f16a, 0xe2a0ad2, 0x70fa447,
+ 0x5a0db84, 0x6e40685, 0xe32e1e7, 0xd52282b, 0x15ca330, 0x315a02a,
+ 0x867c2fe, 0x9a57a70, 0x0054923, 0x55f0765 }
+ },
+ {
+ { 0xc0cf08f, 0x2d729f6, 0xebaf57f, 0x6b80138, 0x0200c25, 0x6285bcc,
+ 0x2cd2ac7, 0xee84519, 0x922778a, 0x28fce4d, 0xcd1011c, 0x761325c,
+ 0x5100e47, 0xd01f247, 0xc60d8e1, 0xc7a1665 },
+ { 0x7ceb064, 0x950966d, 0x78420db, 0x0a88e85, 0xe096f29, 0x44f2cfc,
+ 0x640f1d2, 0x9d9325f, 0xd2426f1, 0x6a4a81f, 0x9c905ac, 0x3ed6b18,
+ 0x008854d, 0xba3c0e2, 0xa0d321b, 0x1df0bd6 }
+ },
+ {
+ { 0x3feb1e7, 0x0117ad6, 0xf1ae02f, 0xa058ba2, 0x31b3f06, 0x5eee5aa,
+ 0xafacd4d, 0x540d9d4, 0x1571d91, 0x38992f4, 0xbf2c7de, 0xef2738e,
+ 0x92a798d, 0x28bfcab, 0x2286733, 0x37c7c5d },
+ { 0x6470df0, 0xb99936e, 0x8af6a42, 0x3d762d5, 0xc74eec5, 0xa8c357a,
+ 0xf13afbc, 0x9917beb, 0xf2dc073, 0x28f0941, 0x6ce7df7, 0x306abf3,
+ 0xd6973c8, 0xa3c5f6f, 0x3677632, 0x640209b }
+ },
+ {
+ { 0xe23aef7, 0xee872a2, 0xeb9b08e, 0xb497b6f, 0x3f33c63, 0xfb94d97,
+ 0x2b32315, 0x9ea1ff4, 0x49a4166, 0x537b492, 0xab4f8be, 0x89c7fe6,
+ 0xdad8f0f, 0xf68007f, 0x71b8474, 0xe56ef0b },
+ { 0x3f333f9, 0x478b2e8, 0xb2607f5, 0x144e718, 0xa4c7ab5, 0x13aa605,
+ 0x1d0730d, 0xfc1fc99, 0x5ab3ea1, 0xe7a0437, 0x306d8d3, 0xc59986a,
+ 0x702a8b1, 0x24f6111, 0xe040ad2, 0x7741394 }
+ },
+ {
+ { 0x60723a7, 0x34c6a25, 0xf4ea691, 0x8aabd0d, 0x5d7497f, 0x9d676a5,
+ 0x7d91fa4, 0x12c0957, 0x6479284, 0x581c7a8, 0xf4fd449, 0xa54f3da,
+ 0x4ef44cf, 0x2f89f3c, 0xc9ec97c, 0xfc266b5 },
+ { 0x88b142a, 0xfcd3fbe, 0x4bd69c1, 0x9f3109f, 0xb5f5a6a, 0x08839c0,
+ 0x2e68303, 0x63ca850, 0xbba0a74, 0x2f0628d, 0x5d56b54, 0x743cccf,
+ 0x13e09fd, 0xbd4b066, 0xde2ba3e, 0x7a8415b }
+ },
+ {
+ { 0xc076ab2, 0x2234a3b, 0x4977a98, 0xd6953e5, 0x31ebe2e, 0xc122158,
+ 0xbad78e2, 0x632145f, 0xa5c4b08, 0xd7ba78a, 0x998e32a, 0x6f4ea71,
+ 0x3485a63, 0x25900d2, 0x6a5176f, 0x97ac628 },
+ { 0x1093f7b, 0x5df9118, 0xc844563, 0x2bf9829, 0x6272449, 0x525d99d,
+ 0xb5c8a18, 0x4281cb5, 0x0544a08, 0x35df278, 0xbaeb8f4, 0xf4c3d2d,
+ 0x5230447, 0xc7ff317, 0x5d2fbff, 0x6b4d764 }
+ },
+ {
+ { 0x2b0c9cb, 0x4837f80, 0x8ce8418, 0xb65f816, 0x9fc1428, 0xdf66ea9,
+ 0x04ea7e8, 0x9788ee8, 0x8334e3c, 0x9eae900, 0xd6ba1b6, 0xbc91058,
+ 0xd7064b6, 0x634aba1, 0x397b368, 0x12d9bb3 },
+ { 0xc413aa8, 0x0645c85, 0xac6b5e3, 0xb09dea6, 0x289a50b, 0x29a620d,
+ 0xbbcceb1, 0x104db3b, 0x87b3309, 0x42e4792, 0xec97f01, 0xdfc373e,
+ 0xb93f84e, 0xe953f94, 0x052dfbf, 0x3274b7f }
+ },
+ {
+ { 0x1bd6fa9, 0x9d5670a, 0xdb6c4d4, 0xec42fc9, 0x1b42845, 0xaecd4ed,
+ 0x1b03549, 0x4eed90e, 0xbbab1fa, 0xeb3225c, 0x28a2816, 0x5345e1d,
+ 0x0b77d2a, 0x3741cfa, 0x7ea8caa, 0x712b19f },
+ { 0x661853e, 0x42e6844, 0xe4a6e5d, 0x4cf4126, 0xc3649f6, 0x196a9cf,
+ 0xf21b6b1, 0x06621bc, 0x32e29ea, 0x887021c, 0x8c5680f, 0x5703aeb,
+ 0x660f6d7, 0x974be24, 0xc71864e, 0xaf09bad }
+ },
+},
+{
+ {
+ { 0xa81b6d3, 0x3483535, 0xca037dc, 0x19e7301, 0x63ddfeb, 0x748cab7,
+ 0x6f01a38, 0xe5d87f6, 0x2795cd6, 0xbba4a5c, 0x615c36c, 0x411c5d4,
+ 0x706f412, 0xff48efc, 0x4b519df, 0x205bafc },
+ { 0x5227110, 0xfcaa5be, 0x3ad0af0, 0x7832f46, 0x2642b1b, 0x34ef2c4,
+ 0x072f822, 0x7bbef7b, 0x923a616, 0x93cb0a8, 0x6d91ba7, 0x5df0236,
+ 0x42f7d21, 0x5da94f1, 0xa14e891, 0x3478298 }
+ },
+ {
+ { 0xc831d39, 0xad79a0f, 0x4803c44, 0x24d1948, 0x86aeeb2, 0x4f8a864,
+ 0x926f6b9, 0x0ca284b, 0x1acd7cd, 0x501829c, 0x3d12c52, 0x9f6038b,
+ 0xf371ef5, 0x77223ab, 0x13bf4de, 0x2e03516 },
+ { 0xb4468cc, 0x7a5a4f2, 0x470ae46, 0xdcea921, 0x11be696, 0xf23b7e8,
+ 0x720d6fb, 0xe59ad0d, 0x2983469, 0x9eacac2, 0xc4397ee, 0x4dd4110,
+ 0xcbe2675, 0x4ef85bd, 0xaa7c74b, 0xe4999f7 }
+ },
+ {
+ { 0x8ea1e98, 0x031838c, 0x04d96a2, 0x539b383, 0x163956e, 0x5fbdef0,
+ 0xce3f52a, 0x6bd4d35, 0x55e897f, 0xe538c23, 0x472dd3f, 0x6078d3a,
+ 0xca9f452, 0x590241e, 0xfd7fc07, 0x2bc8495 },
+ { 0xead4c8c, 0x23d0c89, 0x601c66e, 0x1ea55a9, 0x4f5b833, 0x41493c9,
+ 0xaa5a978, 0xc49a300, 0x0c69594, 0xc98bdc9, 0xccbdc8c, 0x4e44ced,
+ 0x6adccbf, 0xb0d4e91, 0x32c37ae, 0xd56e36b }
+ },
+ {
+ { 0x5b93152, 0x052bd40, 0x4f1dbfa, 0x688b1d4, 0xbe5cc5f, 0xe77ba1a,
+ 0xa6ac543, 0x11f8a38, 0xe4bb988, 0x3355fd6, 0xf8dffb4, 0xdf29c5a,
+ 0x81f20ee, 0x751f589, 0xda9b7fb, 0x22a0f74 },
+ { 0x6397b49, 0xec8f2bc, 0x3639201, 0xff59fc9, 0xa048264, 0xb7f130a,
+ 0xafdc4cc, 0xe156a63, 0xb13acaf, 0x0fd7c34, 0x0cb4999, 0x87698d4,
+ 0x7f26f24, 0x6d6ecae, 0x0f296e2, 0xae51fad }
+ },
+ {
+ { 0xdd0f58d, 0xd0ad5eb, 0x5c67880, 0x6ec6a2c, 0x9af1e0f, 0xe1ce034,
+ 0x3996d32, 0x0801485, 0x5e69d20, 0x59af51e, 0xaa48ecf, 0x0ef743a,
+ 0x7dafcb0, 0x8d3d2ea, 0x89189b6, 0x4ac4fad },
+ { 0xeae97f1, 0x92d91c2, 0x62b4662, 0xef5eca2, 0xb38b10a, 0x440b213,
+ 0xfc661da, 0xec90187, 0xf64cf8d, 0x85f3f25, 0x457ad1b, 0xcee53ca,
+ 0xf517672, 0x8deed4b, 0x4761828, 0x7706fb3 }
+ },
+ {
+ { 0x17494fe, 0x1577d91, 0x2fd7239, 0x52d29be, 0x0186d37, 0x9a0eef0,
+ 0x27fe108, 0x241d0f5, 0xe6fb59f, 0x42824ba, 0x0d48c25, 0xb8d33df,
+ 0x47af4b0, 0xfffdb0a, 0x073b0b6, 0x534c601 },
+ { 0x51c033b, 0xe6df359, 0x86c0f94, 0x3e1002b, 0x48fb9b6, 0xa7cb555,
+ 0xa7bbff8, 0x999818b, 0x84d8bf2, 0xe4ba3d6, 0x6358f0a, 0x53dbb32,
+ 0xf2568e8, 0xeebc1e2, 0xb3e0f68, 0xc6917eb }
+ },
+ {
+ { 0x19f8d13, 0xbe1bbfc, 0x2d4795c, 0xc3951b6, 0xed535a9, 0x9371c49,
+ 0x68cebea, 0x77c389f, 0xa141d0e, 0xfc1a947, 0xde44f8b, 0x4b48d7a,
+ 0x8580a26, 0x3db1f05, 0x258b5fc, 0xeed1466 },
+ { 0x9854b21, 0x5daa4a1, 0x1ab1ead, 0x5bfa46f, 0x59957eb, 0xc152e35,
+ 0xea48ada, 0xdc84277, 0xfc169b5, 0x68709cf, 0x720e617, 0xde50ce3,
+ 0xdd9a832, 0xe42f262, 0x2d6ce29, 0xddffd4d }
+ },
+ {
+ { 0x8fa0a56, 0xd5ba557, 0xfafaf4c, 0x0d7d0f1, 0x38b63ed, 0x7666e41,
+ 0x5d87f02, 0x04e6513, 0xc958f32, 0xdca8866, 0x3ce2686, 0xaa8486d,
+ 0xf1cbcd3, 0xe3785ca, 0x03c8335, 0x8a9b114 },
+ { 0x2e0ef60, 0x5c1dca2, 0x7d3fb20, 0x775af5b, 0x2b373a8, 0xe690ffc,
+ 0x28330e6, 0x30fe15d, 0xdd0f393, 0x8a1022b, 0x966a828, 0x6bd7364,
+ 0x949208a, 0x8d4b154, 0xb9d9828, 0xfb38c6b }
+ },
+},
+{
+ {
+ { 0x0340ac2, 0x6d19764, 0xecab5ff, 0x969f473, 0xc458e42, 0xead46f7,
+ 0x1d00eed, 0x168646a, 0xe0ce0cf, 0xf70c878, 0x8d8d15a, 0xa7291d3,
+ 0xfdd10cc, 0x92cf916, 0x24f86d5, 0x6d36134 },
+ { 0x2d5c4b4, 0xba50d17, 0x4626f15, 0xe0af502, 0xd76098a, 0x76f3809,
+ 0xd6caaa8, 0x433dc27, 0x70d97a7, 0x72dc67a, 0xf5c7355, 0x935b360,
+ 0x179bb31, 0xdbaac93, 0x7ed1a33, 0x7673848 }
+ },
+ {
+ { 0x8f9fa0d, 0x8d1ca66, 0xa02f2bf, 0x4ed95d8, 0xf630d7b, 0xd19fc79,
+ 0xf46fa51, 0x0448ec4, 0x623bf3f, 0xb371dd8, 0xd650e94, 0xe94fabc,
+ 0xcd90a70, 0x3af3fca, 0x03ce3b7, 0x0f720c4 },
+ { 0xd636c3b, 0x590814c, 0x4469945, 0xcf6928d, 0x484a4c6, 0x5843aaf,
+ 0xf9b4722, 0xb5a4c1a, 0x6cfb2f9, 0x25116b3, 0x32c2640, 0xf248cf0,
+ 0x27412a1, 0x8cd059e, 0x862fc5d, 0x866d536 }
+ },
+ {
+ { 0x6de4a2e, 0x156e62f, 0xaafcc78, 0x0365af7, 0x19e925e, 0x65c8618,
+ 0xf8b2191, 0x4db5c01, 0xad564fa, 0x1fd26d1, 0x19c8610, 0x16bbc53,
+ 0x815f262, 0x0718eef, 0x27f83d1, 0x8684f47 },
+ { 0xb0f48db, 0xa30fd28, 0x6ab8278, 0x6fef506, 0x1a652df, 0xd164e77,
+ 0xc6ebc8c, 0x5a486f3, 0xdc3132b, 0xb68b498, 0xd73323f, 0x264b6ef,
+ 0x69b2262, 0xc261eb6, 0x2a35748, 0xd17015f }
+ },
+ {
+ { 0x7c4bb1d, 0x4241f65, 0xf5187c4, 0x5671702, 0x3973753, 0x8a9449f,
+ 0xcc0c0cd, 0x272f772, 0x58e280c, 0x1b7efee, 0x4b5ee9c, 0x7b32349,
+ 0x31142a5, 0xf23af47, 0xd62cc9e, 0x80c0e1d },
+ { 0x675ffe3, 0xcbc05bf, 0x258ce3c, 0x66215cf, 0x28c9110, 0xc5d2239,
+ 0x2a69bc2, 0x30e12a3, 0x76a9f48, 0x5ef5e80, 0x2329d5f, 0x77964ed,
+ 0x8a72cf2, 0xdf81ba5, 0x6e1b365, 0x38ea70d }
+ },
+ {
+ { 0x2f75c80, 0x1b18680, 0x698665a, 0x0c153a0, 0x522e8dd, 0x6f5a7fe,
+ 0x8ddfc27, 0x9673866, 0x0d3bdce, 0x7e421d5, 0x25001b2, 0x2d737cf,
+ 0x0e8490c, 0x568840f, 0xe30c8da, 0xea2610b },
+ { 0x9561fd4, 0xe7b1bc0, 0x26decb0, 0xeda786c, 0x6a76160, 0x2236990,
+ 0x78a3da3, 0x371c714, 0x2a2d9bf, 0x1db8fce, 0x3292f92, 0x59d7b84,
+ 0x5a665f9, 0x8097af9, 0x542b7a9, 0x7cb4662 }
+ },
+ {
+ { 0xc6b0c2f, 0xa5c53ae, 0x7312d84, 0xc4b8732, 0xc732736, 0xfc374cb,
+ 0x9310cc0, 0xa8d78fe, 0x65d1752, 0xd980e86, 0x6004727, 0xa62692d,
+ 0x0146220, 0x5d07928, 0x860fea5, 0xbd1fedb },
+ { 0xb35d111, 0xcbc4f8a, 0x3e32f77, 0x5ba8cdf, 0xb614b93, 0xd5b71ad,
+ 0x2f8808d, 0x7b3a2df, 0x6ef2721, 0x09b89c2, 0x47c3030, 0x55a5054,
+ 0x2986ae6, 0x2104431, 0x2367d4c, 0x427a011 }
+ },
+ {
+ { 0xc1942d8, 0xe9fe256, 0x96e3546, 0x9e7377d, 0xb0c1744, 0x43e734c,
+ 0x211fbca, 0x5f46821, 0x32b6203, 0x44f83dc, 0x6ad1d96, 0x8451308,
+ 0x2fbb455, 0x54dd519, 0x2f10089, 0xc2a1822 },
+ { 0x1855bfa, 0x01055a2, 0x77078b4, 0x9e6d7b4, 0x30cea0e, 0x3f8df6d,
+ 0x32973f7, 0x81c2150, 0xc0b3d40, 0x17dd761, 0x50d0abe, 0x040424c,
+ 0x783deab, 0x5599413, 0x8f3146f, 0xde9271e }
+ },
+ {
+ { 0xaf4a11d, 0x5edfd25, 0x7846783, 0x3a3c530, 0x73edd31, 0xb200868,
+ 0xfe0eef8, 0x74e00ec, 0x3dd78c7, 0xba65d2f, 0x71999f1, 0xab13643,
+ 0xde9a7e8, 0xfa9be5d, 0x87a8609, 0xeb146ce },
+ { 0x65353e9, 0x76afd65, 0xd51ba1c, 0xfa7023d, 0x37ede4f, 0x7a09f22,
+ 0x0ba7a1b, 0xca08576, 0xb99950a, 0xd973882, 0xea5057a, 0xe894266,
+ 0x7f55e49, 0xd01c421, 0x5555679, 0x69cfb9c }
+ },
+},
+{
+ {
+ { 0xc5d631a, 0x67867e7, 0x5bcf47b, 0x1de88c5, 0xafd1352, 0x8366d06,
+ 0x6e20337, 0xd7dbdef, 0x1253ec7, 0xb0f9e2f, 0x10ad240, 0x1be9845,
+ 0xf4a6118, 0x63ec533, 0x96ce633, 0xd5e4c5b },
+ { 0x4df4a25, 0x1d0b6c3, 0x5a1b554, 0xef9486a, 0x47b6ef3, 0x2f0e59e,
+ 0x2ff84d7, 0x4d8042f, 0xda359c9, 0x3e74aa3, 0xd21c160, 0x1baa16f,
+ 0x0191cba, 0xb4cff21, 0xebc6472, 0x50032d8 }
+ },
+ {
+ { 0x1fc1b13, 0xb6833e0, 0x1a5ad8f, 0x8a8b7ba, 0x622b820, 0xc0cafa2,
+ 0x738ed20, 0xc6663af, 0x8b18f97, 0xd894486, 0x774fbe4, 0xcf0c1f9,
+ 0x5be814f, 0xeedd435, 0xb57e543, 0xd81c02d },
+ { 0x310bad8, 0x5e32afc, 0x9b813d1, 0x065bc81, 0x3142795, 0x8efc5fc,
+ 0x732d59c, 0x5006514, 0x2b5a3ce, 0x91e39df, 0xfaf4204, 0x2ad4477,
+ 0x4d9bd4f, 0x1a96b18, 0xa4d9c07, 0xc3fee95 }
+ },
+ {
+ { 0x6b4ba61, 0xfac7df0, 0x061aaef, 0xa6ed551, 0x133f609, 0x35aa2d6,
+ 0x20ed13d, 0x420cfba, 0xea03d0c, 0x861c63e, 0xf936d6e, 0x75f0c56,
+ 0x3d9a3d5, 0xa25f68f, 0xcd9f66e, 0xba0b7fe },
+ { 0x4680772, 0x292e135, 0xa73f405, 0x6f6a2db, 0x24ea9e4, 0xca6add9,
+ 0x268daaa, 0x81cfd61, 0xe6f147a, 0x7a4cb6c, 0xbded8f5, 0x8ec3454,
+ 0x11d61cb, 0xc8a893b, 0x7656022, 0x2256ffc }
+ },
+ {
+ { 0x575cb78, 0x6b33271, 0xadcd23e, 0x560d305, 0xd6d834b, 0xeedbd3a,
+ 0x5a31e27, 0x614a64a, 0x47ee0c8, 0xe40b476, 0x8bd7c2c, 0x8ef4ff6,
+ 0x0b77727, 0xa5297fc, 0xbaf88ad, 0x8759208 },
+ { 0x918df68, 0x86cfe64, 0xcdd882e, 0x9d60a73, 0xb953014, 0x546b642,
+ 0x8bbef55, 0xbaceae3, 0xf1c3467, 0xdf58e43, 0xe9f9bab, 0x99a83fe,
+ 0x57a4a8b, 0xcd52cbf, 0x8ae36ec, 0xf744e96 }
+ },
+ {
+ { 0xa607124, 0xb945869, 0x440e6f6, 0x810dbe9, 0x738e381, 0x9911e60,
+ 0x343b80b, 0x51df68c, 0xf7a3f39, 0xe424336, 0x989015c, 0x2d32acb,
+ 0x31019e8, 0xa69b149, 0xec12f93, 0x8a31a38 },
+ { 0x97c916a, 0x0d0d369, 0x8885372, 0xdc95f3b, 0x3549040, 0xcf1a261,
+ 0xabe95a2, 0x60f6f5e, 0xe141325, 0xa909e9f, 0x355c865, 0x7d598f2,
+ 0x931a9c9, 0x70c6442, 0xb423850, 0x2354a85 }
+ },
+ {
+ { 0x97f9619, 0x4cdd224, 0xc22162e, 0x4776fff, 0x0cd31c2, 0xee5ec33,
+ 0xf209bb8, 0x7c04c10, 0x579e211, 0x35bbfde, 0x15cdfc2, 0x0e38325,
+ 0xe26ffa7, 0x657e6d3, 0xc65c604, 0xc66a7c3 },
+ { 0xb45e567, 0x322acd7, 0x296db9b, 0x1589cf0, 0xba1db73, 0x1fd0bd3,
+ 0x9337a40, 0xe882610, 0xb3035c7, 0xf505a50, 0x6ed08d7, 0x4d5af06,
+ 0x5eda400, 0xb3c376b, 0x1944748, 0x9c7b700 }
+ },
+ {
+ { 0x70c3716, 0xd768325, 0xdd540e0, 0xda62af0, 0x6580fea, 0x76b155d,
+ 0x32b5464, 0x4f42acc, 0x3f5b72b, 0x881bb60, 0xe68b9ba, 0x09c130e,
+ 0x5c50342, 0x37ede3b, 0xfd15e7d, 0xce61a9c },
+ { 0x72605d0, 0xfff1d85, 0x062abc2, 0x62ac2d3, 0xfbe43dd, 0xa85e02e,
+ 0xa947020, 0x859d2ba, 0x111c20b, 0x2ebc8a9, 0xa656f66, 0x7f590a7,
+ 0x16b21a6, 0x0e13843, 0x00c7db6, 0x29b30c5 }
+ },
+ {
+ { 0x906b8de, 0x61e55e2, 0x949974d, 0x6a97e96, 0x26eef67, 0x24b52b5,
+ 0x1aa595a, 0x512f536, 0x3c48fcb, 0x81cc7b8, 0x28115ad, 0xa64af23,
+ 0x3d44b8e, 0x9edf6f9, 0x1fe22e3, 0x68d7f7c },
+ { 0x520d151, 0x2b2116a, 0x6aa3efb, 0x66a0b7d, 0x9b0f791, 0x48ae70a,
+ 0x037db88, 0xcf12174, 0x317d9f3, 0x36868cd, 0x22fc344, 0xb573059,
+ 0x46a5d23, 0xbaa8526, 0x37fc10d, 0xad65691 }
+ },
+},
+{
+ {
+ { 0x12c78d5, 0xcf8e5f5, 0x805cdbd, 0xeb94d98, 0x2ab50b5, 0xad1dcdf,
+ 0xf33cd31, 0xf33c136, 0x10aeff5, 0x0d6226b, 0xf2f8fc5, 0xf7ff493,
+ 0xdf57165, 0x7e520d4, 0x05271a7, 0x41fbae5 },
+ { 0x76480ba, 0x72c8987, 0x25f4523, 0x2608359, 0x49f5f01, 0xed36b8d,
+ 0xf3d49eb, 0x3bc1dce, 0x4940322, 0x30c1c1a, 0x7e0f731, 0x78c1cda,
+ 0x6d05a31, 0x51f2dc8, 0x07f3522, 0x57b0aa8 }
+ },
+ {
+ { 0x71f88bc, 0x7ab628e, 0x8018f21, 0xcf585f3, 0x13d64f6, 0xdbbe3a4,
+ 0xec493a5, 0x0f86df1, 0x7725de9, 0x8355e6c, 0xe00fe1e, 0x3954ffe,
+ 0x9924e32, 0xbb8978f, 0x7812714, 0x1c19298 },
+ { 0xaabca8b, 0x7c4ce3e, 0x9bf7019, 0xf861eb5, 0x682e541, 0x31a84fc,
+ 0xacd1b92, 0x2307ca9, 0x4bf2842, 0x6f8b6ce, 0xcb9f9a9, 0xde252ac,
+ 0x93c46d1, 0x7f0611d, 0x751dc98, 0x8e2bd80 }
+ },
+ {
+ { 0xe27d54b, 0xf2fd8fb, 0xc248071, 0x2a1e37e, 0xab8f49a, 0x2fcc888,
+ 0xc18a9e5, 0x42c62a3, 0x70b2446, 0xe302908, 0xc5ac55d, 0x90277fa,
+ 0xd6dde41, 0x8d97d56, 0x5db04fe, 0xf4cf8a9 },
+ { 0xd30d077, 0x3e280f5, 0x3cb3293, 0x2c90307, 0x24eb0dd, 0xe0be2ac,
+ 0x8bcb4f0, 0xa2d1a49, 0xcd0cd45, 0x16db466, 0x9a80232, 0x3b28aa7,
+ 0x17b008e, 0xdd7e52f, 0x868e4da, 0x20685f2 }
+ },
+ {
+ { 0x7c7a486, 0x0a68c14, 0xc429633, 0xd8ef234, 0xffe7506, 0x470667b,
+ 0x8828d51, 0x55a13c8, 0x2e44bef, 0x5f32741, 0x5929f92, 0x537d92a,
+ 0x31c5cd5, 0x0a01d5b, 0x67eb3d7, 0xb77aa78 },
+ { 0x8b82e4d, 0x36ec45f, 0xb37b199, 0x6821da0, 0xd7fa94e, 0x8af37aa,
+ 0x1085010, 0xf020642, 0x7e56851, 0x9b88678, 0x52948ce, 0x35f3944,
+ 0xafc1361, 0x125c2ba, 0x453e332, 0x8a57d0e }
+ },
+ {
+ { 0x8043664, 0xefe9948, 0xdb1aa55, 0xb8b8509, 0x332523f, 0x1a2e5a9,
+ 0x1045c0f, 0x5e255dd, 0x7ae7180, 0xe68dd8a, 0x45bf532, 0x55f1cf3,
+ 0xe63a716, 0xe00722e, 0x6116bac, 0xd1c2138 },
+ { 0x1c6d1f4, 0x626221f, 0x3773278, 0x240b830, 0x88def16, 0xe393a0d,
+ 0xca0495c, 0x229266e, 0xd3e4608, 0x7b5c6c9, 0x7927190, 0xdc559cb,
+ 0xc7b3c57, 0x06afe42, 0xb439c9b, 0x8a2ad0b }
+ },
+ {
+ { 0xffc3e2f, 0xd7360fb, 0xfbd2e95, 0xf721317, 0x5748e69, 0x8cacbab,
+ 0x9054bb9, 0x7c89f27, 0xaa86881, 0xcbe50fa, 0x75206e4, 0x7aa05d3,
+ 0xc752c66, 0x1ea01bc, 0x1f2c2bc, 0x5968cde },
+ { 0x09a853e, 0x487c55f, 0xe09204b, 0x82cbef1, 0xabd8670, 0xad5c492,
+ 0xf12dcb3, 0x7175963, 0xbf6aa06, 0x7a85762, 0xf8d5237, 0x02e5697,
+ 0x37c6157, 0xccf7d19, 0xc2fd59c, 0x3b14ca6 }
+ },
+ {
+ { 0x1b9f77f, 0x5e610d8, 0x051b02f, 0x85876d0, 0xb8020dd, 0x5d81c63,
+ 0xd6ce614, 0xd0b4116, 0xaa8bf0c, 0x91810e5, 0xcbf8c66, 0xf27f91f,
+ 0x38480ae, 0x2e5dc5f, 0xbec7633, 0x0a13ffe },
+ { 0x2bf6af8, 0x61ff649, 0x641f827, 0xe6aef2d, 0x5de5f04, 0xad5708a,
+ 0xcdfee20, 0xe5c3a80, 0x68fcfa2, 0x88466e2, 0xd6e1d7b, 0x8e5bb3a,
+ 0xed236b8, 0xa514f06, 0xa5f5274, 0x51c9c7b }
+ },
+ {
+ { 0xf9bc3d8, 0xa19d228, 0x3381069, 0xf89c3f0, 0x5c3f379, 0xfee890e,
+ 0x32fb857, 0x3d3ef3d, 0x5b418dd, 0x3998849, 0xc46e89a, 0x6786f73,
+ 0x9e0f12f, 0x79691a5, 0x3bc022b, 0x76916bf },
+ { 0x2cd8a0a, 0xea073b6, 0x102fdbc, 0x1fbedd4, 0xcb9d015, 0x1888b14,
+ 0x76655f7, 0x98f2cfd, 0x59f0494, 0xb9b5910, 0xe6986a3, 0xa3dbbe1,
+ 0xeaf2b04, 0xef016a5, 0xcd2d876, 0xf671ba7 }
+ },
+},
+{
+ {
+ { 0x1ae05e9, 0x1dae3bf, 0x1f21fef, 0x6a02996, 0x7aec3c6, 0x95df2b9,
+ 0xd83189b, 0x9abbc5a, 0x2d13140, 0xaf994af, 0x86aa406, 0xc3f8846,
+ 0x75284c5, 0xcd77e50, 0x2a9a4d7, 0x1c1e13d },
+ { 0x744b89d, 0x7f8815d, 0x2ba673e, 0xb189133, 0xd594570, 0x55ea93c,
+ 0xd61b041, 0x19c8a18, 0x8d2c580, 0x938ebaa, 0x05ba078, 0x9b4344d,
+ 0x8eaf9b7, 0x622da43, 0x9fea368, 0x809b807 }
+ },
+ {
+ { 0xc33b7a2, 0x3780e51, 0x387b1c8, 0xd7a205c, 0x4be60e4, 0x79515f8,
+ 0x1e18277, 0xde02a8b, 0xf0d9150, 0x4645c96, 0xe0b3fd1, 0x45f8acb,
+ 0x9b53ac3, 0x5d532ba, 0xb0557c9, 0x7984dcd },
+ { 0x8a92f01, 0x5ae5ca6, 0x9d569ca, 0xd2fbb3c, 0x0c297c1, 0x668cc57,
+ 0x6295e89, 0xa482943, 0xa33ad40, 0xf646bc1, 0xc3f425d, 0x066aaa4,
+ 0xd005de2, 0x23434cd, 0xdb35af4, 0x5aca9e9 }
+ },
+ {
+ { 0x6877c56, 0x2bca35c, 0xf0ddd7d, 0xab864b4, 0x404f46c, 0x5f6aa74,
+ 0x539c279, 0x72be164, 0xe0283cf, 0x1b1d73e, 0xad583d9, 0xe550f46,
+ 0xe739ad1, 0x4ac6518, 0x8d42100, 0x6b6def7 },
+ { 0xfa8468d, 0x4d36b8c, 0x5a3d7b8, 0x2cb3773, 0x5016281, 0x577f86f,
+ 0x9124733, 0xdb6fe5f, 0xe29e039, 0xacb6d2a, 0x580b8a1, 0x2ab8330,
+ 0x643b2d0, 0x130a4ac, 0x5e6884e, 0xa7996e3 }
+ },
+ {
+ { 0x60a0aa8, 0x6fb6277, 0xcbe04f0, 0xe046843, 0xe6ad443, 0xc01d120,
+ 0xabef2fc, 0xa42a05c, 0x12ff09c, 0x6b793f1, 0xa3e5854, 0x5734ea8,
+ 0x775f0ad, 0xe482b36, 0xf864a34, 0x2f4f60d },
+ { 0x84f2449, 0xf521c58, 0x9186a71, 0x58734a9, 0xac5eacc, 0x157f5d5,
+ 0x248ee61, 0x858d9a4, 0x48149c3, 0x0727e6d, 0xac9ec50, 0xd5c3eaa,
+ 0x20ee9b5, 0xa63a64a, 0x87be9de, 0x3f0dfc4 }
+ },
+ {
+ { 0xb13e3f4, 0x836349d, 0x3e9316d, 0xebdd026, 0x324fd6c, 0x3fd61e8,
+ 0x0964f41, 0x85dddfa, 0x52add1b, 0x06e72de, 0x8c4a9e2, 0xb752cff,
+ 0xfdf09f7, 0x53b0894, 0x0bc24fd, 0xd5220ab },
+ { 0xfb1981a, 0x8442b35, 0x3edd701, 0xa733a37, 0xd0ef089, 0x42b60c3,
+ 0x46e7bca, 0xa1b16ec, 0xa09aaf4, 0xc0df179, 0x638f3a1, 0xcd4f187,
+ 0x9eab1c2, 0x9af64f7, 0xd1d78e3, 0x86fed79 }
+ },
+ {
+ { 0xfe29980, 0x42c8d86, 0x6575660, 0x6657b81, 0x80f92ca, 0x82d52c6,
+ 0x02d42be, 0x8587af1, 0x6e8bdf0, 0xb515131, 0xc333495, 0x706e2d9,
+ 0x9673064, 0xd53601a, 0x8219099, 0x27b1fbb },
+ { 0x705f7c8, 0x3f0929d, 0xf3d6e6f, 0xff40b10, 0x026af5c, 0x673c703,
+ 0xe25a422, 0x2c1dce4, 0x3dad8b6, 0x5348bd7, 0xbe2c329, 0xc39b6b6,
+ 0xb921084, 0x47854ff, 0xb391f20, 0xb347b8b }
+ },
+ {
+ { 0xeb9b774, 0x79fc841, 0xb4b6c1d, 0xf32da25, 0xfe492cb, 0xcbba76b,
+ 0xd623903, 0x76c51fc, 0xcf0705a, 0x114cf6f, 0x7815daf, 0x6b72049,
+ 0x473382e, 0x630b362, 0x9704db5, 0xbf40c3a },
+ { 0xc5456eb, 0xa8a9ddc, 0x72f2dc1, 0x2b4472a, 0xd6d6ef3, 0x9874444,
+ 0xa0ba5ed, 0x27e8d85, 0x194849f, 0x5d225b4, 0xebaa40d, 0xe852cd6,
+ 0x8d4bf3f, 0xb669c24, 0x2343991, 0xa8601eb }
+ },
+ {
+ { 0x59502d3, 0x8a04854, 0xe269a7b, 0xcab27ee, 0x4875ada, 0x4179307,
+ 0xe2405f9, 0x179e685, 0x7b28963, 0x0d7b698, 0x422a43e, 0x80c9db8,
+ 0xa0f43ee, 0xf5ff318, 0x4ba7aa7, 0x7a92805 },
+ { 0x0c0834e, 0xa5c79fe, 0x1f849ec, 0x837ca0d, 0x628ab7b, 0xfe0d7fa,
+ 0x6edd19a, 0x94bcb95, 0x2226fbf, 0xa18bc93, 0xaad54a3, 0x2795379,
+ 0x371129e, 0xceeacf8, 0xa588be5, 0x65ca57f }
+ },
+},
+{
+ {
+ { 0x2caa330, 0x7a578b5, 0xd8ca34a, 0x7c21944, 0x6447282, 0x6c0fbbb,
+ 0xf90b2e5, 0xa8a9957, 0x6586b71, 0xbbe1066, 0x49138a2, 0x716a902,
+ 0xe7ed66d, 0x2fa6034, 0x2b9916a, 0x56f77ed },
+ { 0xbddefb3, 0x69f1e26, 0x8c08420, 0xa497809, 0x09bc184, 0xc3377eb,
+ 0xbe6dade, 0x796ce0c, 0xd103bbb, 0x3be0625, 0x992685c, 0x01be27c,
+ 0x7755f9f, 0xc0e2559, 0x1c0dbfa, 0x165c40d }
+ },
+ {
+ { 0x659c761, 0xc63a397, 0x630fbad, 0x10a0e5b, 0x655ac56, 0xf21e8a6,
+ 0xc1181e2, 0xe8580fa, 0x0a84b5c, 0xbfc2d9c, 0x7afd5d1, 0x2cdbaff,
+ 0xf61e85a, 0x95f1182, 0x719eaf4, 0x1173e96 },
+ { 0xc6de8b9, 0xc06d55e, 0xafcbcaa, 0x1b4c8eb, 0xbc2bbcd, 0x52af5cb,
+ 0x77bcd10, 0x564fab8, 0xae85a6e, 0xfd53a18, 0x94c712f, 0x2257859,
+ 0x1352121, 0x29b11d7, 0xc40491a, 0xab1cb76 }
+ },
+ {
+ { 0xce32eb4, 0xb4e8ca8, 0xb250b49, 0x7e484ac, 0xa3e31a2, 0x062c6f7,
+ 0x625d1fc, 0x497fd83, 0x362dda7, 0x98f821c, 0x6be3111, 0xcae1f8f,
+ 0x5d4fa42, 0x9077e95, 0xa65855a, 0xa589971 },
+ { 0x28832a9, 0xda6321d, 0x3936e9e, 0xf9ef5dc, 0xc9797ef, 0xa37f117,
+ 0xdb581be, 0x0eb3c80, 0xbaa0002, 0x207c5c4, 0xf38faa0, 0xc0401b5,
+ 0xd0f1e6e, 0xceee523, 0xd1f0045, 0x8d27a5f }
+ },
+ {
+ { 0xcf0af29, 0x9411063, 0x89a6693, 0x3043857, 0x640145e, 0x9a9fb8f,
+ 0x54832eb, 0x7d82fe9, 0x898c520, 0xf2789e1, 0xf948dc0, 0x448b402,
+ 0x68996dd, 0xeca8fdf, 0xa149b2f, 0x22227e9 },
+ { 0x8e62d6a, 0x63509ff, 0x8c9c57f, 0xe98d81c, 0x1fe3bed, 0xd387407,
+ 0x539538f, 0xf1db013, 0x48418ce, 0xb04092e, 0xd6d9d4d, 0xbbf8e76,
+ 0x2cec5ae, 0x2ea9cda, 0x5078fa9, 0x8414b3e }
+ },
+ {
+ { 0xd68a073, 0x5ad1cdb, 0xc18b591, 0xd4cedaf, 0x8e4c1c9, 0x7826707,
+ 0x9ca302a, 0x9b8d920, 0x326115b, 0x3101bd2, 0x4c2717a, 0x6f154b5,
+ 0x263e84b, 0x618c31b, 0xbbd6942, 0x12c4138 },
+ { 0x80da426, 0xf9ead25, 0x47d9680, 0xe748e99, 0x8a4210e, 0x9b396a3,
+ 0xf4b8f72, 0xfaf03dd, 0x66159e7, 0xbd94a52, 0x1d4c7cb, 0x5e73049,
+ 0x7910f38, 0x31d1f9a, 0x08d6dd1, 0x4fd10ca }
+ },
+ {
+ { 0x9f2331e, 0x4f510ac, 0x7e3dcc2, 0xee872dc, 0xa0a0c73, 0x4a11a32,
+ 0xaa5a630, 0x27e5803, 0x7af4a8a, 0xe5ae503, 0x9fffeb0, 0x2dcdeba,
+ 0x719d91f, 0x8c27748, 0xb9cc61c, 0xd3b5b62 },
+ { 0xcca7939, 0x998ac90, 0x64514e5, 0xc22b598, 0xb35738a, 0x950aaa1,
+ 0xdab0264, 0x4b208bb, 0xa557d2e, 0x6677931, 0xf7c17d3, 0x2c696d8,
+ 0x3e15c51, 0x1672d4a, 0x3db0e82, 0x95fab66 }
+ },
+ {
+ { 0x6ff205e, 0x3d42734, 0x0ea9fbe, 0x7f187d9, 0x466b2af, 0xbd9367f,
+ 0x03daf2f, 0x188e532, 0x27b54d8, 0xefe1329, 0xef70435, 0x14faf85,
+ 0x1ec95c4, 0xa506128, 0xc22cba7, 0xad01705 },
+ { 0x6197333, 0x7d2dfa6, 0x8b4f6ed, 0xedd7f07, 0x75df105, 0xe0cb685,
+ 0x80f76bc, 0x47c9ddb, 0x9073c54, 0x49ab531, 0xe607f44, 0x845255a,
+ 0xcc74b7c, 0x0b4ed9f, 0x0f5c3a6, 0xcfb52d5 }
+ },
+ {
+ { 0xc278776, 0x545c7c6, 0x98c30f0, 0x92a39ae, 0xd2f4680, 0x8aa8c01,
+ 0x6b7f840, 0xa5409ed, 0xdcb24e7, 0x0c450ac, 0xc5770d9, 0x5da6fb2,
+ 0x8658333, 0x5b8e8be, 0x67ea4ad, 0xb26bf4a },
+ { 0xc7d91fa, 0x2e30c81, 0x0eeb69f, 0x6e50a49, 0xee4bc26, 0x9458c2b,
+ 0x33be250, 0x419acf2, 0x87881ab, 0x79d6f81, 0x403b1be, 0x694565d,
+ 0x234fe1d, 0x34b3990, 0x2132b38, 0x60997d7 }
+ },
+},
+{
+ {
+ { 0x26975dc, 0x00a9741, 0x6cf94e7, 0x42161c4, 0xc64ed99, 0xcc9fe4b,
+ 0x4680570, 0x020019a, 0x698da0d, 0x885595a, 0x77dd962, 0x008444b,
+ 0xa4fea0e, 0xbf3c22d, 0x2c81245, 0xc463048 },
+ { 0x793ab18, 0xcb248c5, 0xeb4320b, 0x4dc7a20, 0x1572b7d, 0x9a0906f,
+ 0xf9ac20f, 0xd5b3019, 0x34520a3, 0x79b1bf5, 0x69b5322, 0x788dfe8,
+ 0x455b7e2, 0x9a05298, 0x016bca9, 0x2f4aecb }
+ },
+ {
+ { 0x8745618, 0x414d379, 0xb7c983c, 0x64ba22e, 0x9f9d532, 0x9a5d19f,
+ 0x44a80c8, 0x81a00d8, 0xcae98d6, 0xb9e24f5, 0xaca965a, 0x6c3769c,
+ 0xf6e4e6d, 0x50d6081, 0x54422a6, 0x0d96980 },
+ { 0x5cdd790, 0xbd7e792, 0x6a35219, 0xcff65da, 0x8b60ebe, 0x40dc363,
+ 0x92a50dc, 0x84bee74, 0x15ad65e, 0x57d4be4, 0x1a6d1d3, 0xc54256b,
+ 0x45717cc, 0x141c649, 0xcd1c736, 0x05eb609 }
+ },
+ {
+ { 0x1e3c7ec, 0xfd52eab, 0x9f24895, 0xa4a5eca, 0x79fdb83, 0xaaa2a8d,
+ 0x72bdfda, 0xd105e60, 0x681d97e, 0x59e6ae2, 0x8e8077f, 0xfedf8e0,
+ 0x629e462, 0xb06d0ad, 0x96fa863, 0x8c7c2d0 },
+ { 0xee8fc91, 0x5eecc4c, 0x9e61174, 0x5e83ab2, 0xb28c02d, 0x1fd8925,
+ 0x2072864, 0x93be538, 0x24c984e, 0xda0c886, 0xa008286, 0xdcf9f0c,
+ 0xa58ba75, 0x1ecb5a6, 0xc2e3c83, 0x1d9b890 }
+ },
+ {
+ { 0xeeee062, 0x19e866e, 0x4f7b387, 0x31c1c7f, 0x1c06652, 0x9be6018,
+ 0x2b68bbb, 0xc00a93a, 0x9d52b2b, 0x54c65d6, 0xe8b744a, 0x4591416,
+ 0x9a64ab6, 0x641bcca, 0xab08098, 0xf22bcb1 },
+ { 0xf1f726c, 0x3c0db8f, 0x9d2e6a6, 0x4f5739e, 0x45c9530, 0x5cb669b,
+ 0x7b472d0, 0x861b04e, 0x894da77, 0x3e30515, 0xc9ac39b, 0x3344685,
+ 0x73bdd29, 0x9e17305, 0x808dc85, 0x9cac12c }
+ },
+ {
+ { 0x5e27087, 0xf152b86, 0x90a580e, 0x267bd85, 0x8baafc1, 0xba79cec,
+ 0x9442686, 0x6140ab1, 0x5b31693, 0xa67090c, 0x28b4117, 0x50a103a,
+ 0x0ddc08f, 0x7722e61, 0xe6569b2, 0x5d19d43 },
+ { 0x5962bf6, 0x70e0c52, 0xfb5fb02, 0x808e316, 0x5b667be, 0x3fb80da,
+ 0xfcfacec, 0x8aa366e, 0x134280e, 0xcb0b3e7, 0xcd7d944, 0x0bf1de4,
+ 0xd092df5, 0x0cd23be, 0xa153a0c, 0xc9a6a79 }
+ },
+ {
+ { 0x2d5a4b7, 0x1c69ad0, 0xd9e6f4a, 0x4bb28d0, 0xa984fc6, 0x815308c,
+ 0x9037ca5, 0x40929c7, 0x1bd0357, 0x0ea2b49, 0x42aad4e, 0xec17e5b,
+ 0x18e7235, 0x1f32ade, 0xa96a9d3, 0xbc60b05 },
+ { 0xe20f707, 0x3b0229a, 0x56bdfad, 0xd635050, 0xd8b2e1e, 0xac2d922,
+ 0x235c748, 0x92b2998, 0xd766f97, 0x6002c3a, 0x1a2a862, 0x9919800,
+ 0xb58b684, 0x2af7567, 0xaaafce5, 0xd8fe707 }
+ },
+ {
+ { 0x5df7a4b, 0x54487ab, 0xc57ccc2, 0x51cccde, 0x7510b53, 0x2394327,
+ 0xf555de3, 0x3a09f02, 0x1be484d, 0xa696aec, 0x37817a2, 0x56f459f,
+ 0x623dcb4, 0x8d8f61c, 0x5335656, 0xc52223c },
+ { 0xb49914a, 0xf634111, 0x8e4f9bb, 0xbf8e1ab, 0xf4dba02, 0x2f59578,
+ 0xe004319, 0x2a94199, 0x654d005, 0x87931f0, 0x6fa0814, 0x7df57d9,
+ 0xa154031, 0xc8da316, 0x41f658b, 0x2a44ac0 }
+ },
+ {
+ { 0x9e34ac6, 0xfb5f4f8, 0x97790f2, 0x0a1b10b, 0x4b8a06c, 0x58fe4e7,
+ 0x955f27c, 0x10c1710, 0xd5ebe19, 0x77b798a, 0x1f1c2dc, 0xaf1c35b,
+ 0xa1f8d69, 0xc25b8e6, 0xf76bf23, 0x49cf751 },
+ { 0x436f7b7, 0x15cb2db, 0x7e74d1a, 0x186d7c2, 0xc00a415, 0x60731de,
+ 0x15f0772, 0xea1e156, 0x714463f, 0xf02d591, 0x51adeb1, 0x26a0c64,
+ 0xcc5229e, 0x20174cd, 0xefd512a, 0xb817e50 }
+ },
+},
+};
+
+static const ge448_precomp base_i[16] = {
+ {
+ { 0x70cc05e, 0x26a82bc, 0x0938e26, 0x80e18b0, 0x511433b, 0xf72ab66,
+ 0x412ae1a, 0xa3d3a46, 0xa6de324, 0x0f1767e, 0x4657047, 0x36da9e1,
+ 0x5a622bf, 0xed221d1, 0x66bed0d, 0x4f1970c },
+ { 0x230fa14, 0x08795bf, 0x7c8ad98, 0x132c4ed, 0x9c4fdbd, 0x1ce67c3,
+ 0x73ad3ff, 0x05a0c2d, 0x7789c1e, 0xa398408, 0xa73736c, 0xc7624be,
+ 0x03756c9, 0x2488762, 0x16eb6bc, 0x693f467 }
+ },
+ {
+ { 0x6ff2f8f, 0x2817328, 0xda85757, 0xb769465, 0xfd6e862, 0xf7f6271,
+ 0x8daa9cb, 0x4a3fcfe, 0x2ba077a, 0xda82c7e, 0x41b8b8c, 0x9433322,
+ 0x4316cb6, 0x6455bd6, 0xb9108af, 0x0865886 },
+ { 0x88ed6fc, 0x22ac135, 0x02dafb8, 0x9a68fed, 0x7f0bffa, 0x1bdb676,
+ 0x8bb3a33, 0xec4e1d5, 0xce43c82, 0x56c3b9f, 0xa8d9523, 0xa6449a4,
+ 0xa7ad43a, 0xf706cbd, 0xbd5125c, 0xe005a8d }
+ },
+ {
+ { 0x2030034, 0xa99d109, 0x6f950d0, 0x2d8cefc, 0xc96f07b, 0x7a920c3,
+ 0x08bc0d5, 0x9588128, 0x6d761e8, 0x62ada75, 0xbcf7285, 0x0def80c,
+ 0x01eedb5, 0x0e2ba76, 0x5a48dcb, 0x7a9f933 },
+ { 0x2f435eb, 0xb473147, 0xf225443, 0x5512881, 0x33c5840, 0xee59d2b,
+ 0x127d7a4, 0xb698017, 0x86551f7, 0xb18fced, 0xca1823a, 0x0ade260,
+ 0xce4fd58, 0xd3b9109, 0xa2517ed, 0xadfd751 }
+ },
+ {
+ { 0xeb5eaf7, 0xdf9567c, 0x78ac7d7, 0x110a6b4, 0x4706e0b, 0x2d33501,
+ 0x0b5a209, 0x0df9c7b, 0x568e684, 0xba4223d, 0x8c3719b, 0xd78af2d,
+ 0xa5291b6, 0x77467b9, 0x5c89bef, 0x079748e },
+ { 0xdac377f, 0xe20d3fa, 0x72b5c09, 0x34e8669, 0xc40bbb7, 0xd8687a3,
+ 0xd2f84c9, 0x7b3946f, 0xa78f50e, 0xd00e40c, 0x17e7179, 0xb875944,
+ 0xcb23583, 0x9c7373b, 0xc90fd69, 0x7ddeda3 }
+ },
+ {
+ { 0x6ab686b, 0x3d0def7, 0x49f7c79, 0x1a467ec, 0xc8989ed, 0x3e53f4f,
+ 0x430a0d9, 0x101e344, 0x8ad44ee, 0xa3ae731, 0xae1d134, 0xaefa6cd,
+ 0x824ad4d, 0xaa8cd7d, 0xed584fc, 0xef1650c },
+ { 0x4f4754f, 0xa74df67, 0xef3fb8b, 0xf52cea8, 0x2971140, 0x47c32d4,
+ 0xa256fbb, 0x391c15d, 0xa605671, 0xc165fab, 0x87993b9, 0xf2518c6,
+ 0xbd5a84d, 0x2daf7ac, 0x98f12ae, 0x1560b62 }
+ },
+ {
+ { 0x54dc10a, 0xef4da02, 0x5940db8, 0x6311865, 0x82f2948, 0xe20b149,
+ 0x5581dba, 0x67b9377, 0x04f5029, 0x422ee71, 0x5122d34, 0x5d440db,
+ 0x1a4c640, 0xb1e56d7, 0xc2408ee, 0xbf12abb },
+ { 0x016af01, 0x0cc9f86, 0xf3d8cab, 0x88366ab, 0xa2efe12, 0x85dda13,
+ 0x5d00674, 0x390df60, 0x6d187f7, 0xf18f580, 0xf0c5d20, 0x28c900f,
+ 0x3e01733, 0xad30812, 0x54bf2fd, 0x42d35b5 }
+ },
+ {
+ { 0x2ffb1f1, 0x009135f, 0x8f9c605, 0x099fc7e, 0x26bfa5a, 0xcc67da6,
+ 0x344552b, 0xc186d12, 0x1b339e1, 0xb523250, 0xc9708c5, 0x70a544f,
+ 0x1e928e7, 0x06baaec, 0xef0f50f, 0x0baedd2 },
+ { 0xbf479e5, 0x535d6d8, 0xe4ec3e9, 0x156e536, 0xddb9be2, 0x3165741,
+ 0x59fd736, 0x988af71, 0x2e33ddd, 0x13d8a78, 0x4e69002, 0x5460421,
+ 0x804a268, 0x34d56e0, 0x0e52a4c, 0xc59b84f }
+ },
+ {
+ { 0x24729d9, 0x525d45f, 0x8712327, 0x5768aba, 0x43035db, 0xa25e43b,
+ 0x927ef21, 0x15a1ee8, 0x6056112, 0xa785d21, 0xd508af9, 0x45e2fbf,
+ 0x37ba969, 0xb6f721a, 0x216d8d3, 0x30d6d8c },
+ { 0x52074c3, 0x3065e08, 0x2a0684e, 0xfa40b4a, 0x763f955, 0x851325a,
+ 0x9f25900, 0xd4ef19c, 0xf665756, 0x799c869, 0x3312990, 0x7b05222,
+ 0x28db802, 0xc986c2b, 0x28ade0a, 0xf48fb8f }
+ },
+ {
+ { 0x1649b68, 0x1e46173, 0x5beb9dc, 0xa96e5d6, 0x481935d, 0x765ddff,
+ 0x9f3bf2a, 0x6cf132c, 0x7c35658, 0x9f6c5c9, 0x4696e60, 0x99cd139,
+ 0x9c0d5e4, 0x99fa924, 0x8845a95, 0x1acd063 },
+ { 0x3636087, 0x0b06541, 0xea17b7f, 0xea20e78, 0x6161967, 0x20afc5f,
+ 0xdc81028, 0xfd6c8a2, 0xe32c8fd, 0x4ef1357, 0x00e4a88, 0x8aa4004,
+ 0x48cb82f, 0xd6fcaef, 0xb3cd4fa, 0x7ba7c6d }
+ },
+ {
+ { 0xd19c7ab, 0xf843473, 0xc655c4d, 0x968e76d, 0xc4b9c2f, 0x52c87d9,
+ 0xe4aa082, 0x65f641a, 0x33c3603, 0x491a397, 0x5810098, 0xa606ffe,
+ 0x8bf8ad4, 0x09920e6, 0x6db7882, 0x691a0c8 },
+ { 0xa4d3ef5, 0x5205883, 0xacf2efe, 0xee839b7, 0xc00ca66, 0x4b78e2a,
+ 0xf9fcb91, 0xbe3f071, 0xbf6943a, 0x61e66c9, 0x061b79d, 0xe9b4e57,
+ 0x56c06bd, 0x8d1b01b, 0xdf76ae5, 0x0dfa315 }
+ },
+ {
+ { 0xf1fd093, 0x803df65, 0x489b77e, 0x1cd6523, 0xc20e295, 0x2cd2e15,
+ 0x9b912d1, 0xcd490be, 0x2e886d2, 0xdd9a2ff, 0xfe9d72a, 0xa3c836d,
+ 0x298e0c1, 0xfcad5f2, 0x4bcf067, 0xed126e2 },
+ { 0x3dc81bc, 0x1e33953, 0xece6a08, 0xbea4d76, 0x991b252, 0x1d15de3,
+ 0xe6daf97, 0x74cc5cf, 0x0826493, 0x5ad343f, 0x1064049, 0x2d38a47,
+ 0xffcfa4d, 0xf7f47b9, 0x418066c, 0xef14490 }
+ },
+ {
+ { 0x9bb55ab, 0x4e7f86b, 0x3f496a3, 0x310d785, 0x0dec42c, 0xbd682fc,
+ 0x411d32a, 0xbde047a, 0xc5a5ea2, 0xea639b4, 0xba08fa1, 0x5052078,
+ 0x07729f2, 0xc968b23, 0x23d3e28, 0x567b5a6 },
+ { 0x977fbf7, 0x171e825, 0xbe990aa, 0x0319c70, 0xe12cd69, 0x8f65023,
+ 0xf5015e6, 0x1fb9b19, 0x3568a7c, 0x0083f60, 0x1f3c5ac, 0xba3d30b,
+ 0x3d7a988, 0xe7b509d, 0xcd0f6b6, 0x2318b99 }
+ },
+ {
+ { 0x93ab2cf, 0x54d3b87, 0xd2d8306, 0x366abea, 0xd7a4977, 0x66e8eb6,
+ 0xae0072e, 0xa61888c, 0xdbc3315, 0x9eeeef5, 0x163e7f5, 0x93f09db,
+ 0x59ade9a, 0xee90959, 0xce59be0, 0xaf7f578 },
+ { 0x5ece59e, 0x24bfd8d, 0x3689523, 0x8aa698b, 0x2de92cf, 0xa9a65de,
+ 0xa6ad300, 0xec11dbc, 0x09f88ca, 0x217f3fa, 0xb4d6af7, 0xf6c33e3,
+ 0x1d86d2d, 0xcd3bfa2, 0x5f13f25, 0x1497f83 }
+ },
+ {
+ { 0xcd03d1d, 0xa579568, 0xe158af6, 0xd717cda, 0x389a19f, 0x59eda97,
+ 0x099e99c, 0xb32c370, 0xdabb591, 0xa2dba91, 0x77c2c97, 0x6d697d5,
+ 0xd43fa6d, 0x5423fc2, 0x0b382bf, 0x56ea8a5 },
+ { 0xd80c11a, 0x4a987ba, 0x7d590a5, 0xe4cde21, 0xf97e559, 0x3dd8860,
+ 0x43b593c, 0xff45e25, 0x5343cb5, 0x00eb453, 0x7bbfbdd, 0x06b9b99,
+ 0x16aea24, 0x4da36b7, 0x57a624e, 0x2476517 }
+ },
+ {
+ { 0x3474e0d, 0x32207d0, 0xb41cc73, 0x3ffbf04, 0x319eb39, 0x5c4dc45,
+ 0x758b463, 0xfee29be, 0xc30c7a7, 0xcc8a381, 0x9fe0e53, 0x147f4e4,
+ 0xe35a2de, 0x05b2e26, 0x92f3666, 0x4362f02 },
+ { 0x8474b85, 0x0476d0c, 0xccaf108, 0x9d8c65f, 0x1d54b6a, 0xf58d404,
+ 0xf38e4b0, 0x3ee6862, 0x3b44f54, 0x7c7c9d5, 0x0fb0db5, 0x36a3fd8,
+ 0x18a8ac8, 0xfcd94ba, 0x8f35c05, 0xc1b1d56 }
+ },
+ {
+ { 0x1bdd30d, 0x16539fc, 0x8df4afb, 0x1356e53, 0x5a1aedb, 0xc0545d8,
+ 0x489396b, 0xeb2037a, 0x5660894, 0x897fcbd, 0xb7d104a, 0x02a58a9,
+ 0xc96b980, 0x57fa24c, 0x5bd8946, 0xf6448e3 },
+ { 0x8805c83, 0xee72741, 0x992cfc6, 0x10fa274, 0x9e66b21, 0x9514193,
+ 0xbd08009, 0xe0ffa44, 0x20da22b, 0x1743322, 0x59e6831, 0x4891ff3,
+ 0xa7d687b, 0x407ed73, 0x51d99cf, 0x2fb4e07 }
+ },
+};
+#endif
+
+/* Set the 0 point.
+ *
+ * p [in] Point to set to 0.
+ */
+static WC_INLINE void ge448_0(ge448_p2 *p)
+{
+ fe448_0(p->X);
+ fe448_1(p->Y);
+ fe448_1(p->Z);
+}
+
+/* Set the precompute point to 0.
+ *
+ * p [in] Precompute point to set.
+ */
+static void ge448_precomp_0(ge448_precomp *p)
+{
+ fe448_0(p->x);
+ fe448_1(p->y);
+}
+
+/* Double the point on the Twisted Edwards curve. r = 2.p
+ *
+ * r [in] Point to hold result.
+ * p [in] Point to double.
+ */
+static WC_INLINE void ge448_dbl(ge448_p2 *r,const ge448_p2 *p)
+{
+ ge448 t0[GE448_WORDS];
+ ge448 t1[GE448_WORDS];
+
+ fe448_add(t0, p->X, p->Y); /* t0 = B1 = X1+Y1 */
+ fe448_reduce(t0);
+ fe448_sqr(t0, t0); /* t0 = B = (X1+Y1)^2 */
+ fe448_sqr(r->X, p->X); /* r->X = C = X1^2 */
+ fe448_sqr(r->Y, p->Y); /* r->Y = D = Y1^2 */
+ fe448_add(t1, r->X, r->Y); /* t1 = E = C+D */
+ fe448_reduce(t1);
+ fe448_sub(r->Y, r->X, r->Y); /* r->Y = Y31 = C-D */
+ fe448_sqr(r->Z, p->Z); /* r->Z = H = Z1^2 */
+ fe448_add(r->Z, r->Z, r->Z); /* r->Z = J1 = 2*H */
+ fe448_sub(r->Z, t1, r->Z); /* r->Z = J = E-2*H */
+ fe448_reduce(r->Z);
+ fe448_sub(r->X, t0, t1); /* r->X = X31 = B-E */
+ fe448_mul(r->X, r->X, r->Z); /* r->X = X3 = (B-E)*J */
+ fe448_mul(r->Y, r->Y, t1); /* r->Y = Y3 = E*(C-D) */
+ fe448_mul(r->Z, t1, r->Z); /* r->Z = Z3 = E*J */
+}
+
+/* Add two point on the Twisted Edwards curve. r = p + q
+ * Second point has z-ordinate of 1.
+ *
+ * r [in] Point to hold result.
+ * p [in] Point to add.
+ * q [in] Point to add.
+ */
+static WC_INLINE void ge448_madd(ge448_p2 *r, const ge448_p2 *p,
+ const ge448_precomp *q)
+{
+ ge448 t0[GE448_WORDS];
+ ge448 t1[GE448_WORDS];
+ ge448 t2[GE448_WORDS];
+ ge448 t3[GE448_WORDS];
+ ge448 t4[GE448_WORDS];
+
+ /* p->Z = A */
+ fe448_mul(t1, p->X, q->x); /* t1 = C = X1*X2 */
+ fe448_mul(t2, p->Y, q->y); /* t2 = D = Y1*Y2 */
+ fe448_mul(t3, t1, t2); /* t3 = E1 = C*D */
+ fe448_mul39081(t3, t3); /* t3 = E = d*C*D */
+ fe448_sqr(t0, p->Z); /* t0 = B = A^2 */
+ fe448_add(t4, t0, t3); /* t4 = F = B-(-E) */
+ fe448_sub(t0, t0, t3); /* t0 = G = B+(-E) */
+ fe448_reduce(t0);
+ fe448_add(r->X, p->X, p->Y); /* r->X = H1 = X1+Y1 */
+ fe448_reduce(r->X);
+ fe448_add(r->Y, q->x, q->y); /* r->Y = H2 = X2+Y2 */
+ fe448_reduce(r->Y);
+ fe448_mul(r->X, r->X, r->Y); /* r->X = H = (X1+Y1)*(X2+Y2) */
+ fe448_sub(r->X, r->X, t1); /* r->X = X31 = H-C */
+ fe448_sub(r->X, r->X, t2); /* r->X = X32 = H-C-D */
+ fe448_reduce(r->X);
+ fe448_mul(r->X, r->X, t4); /* r->X = X33 = F*(H-C-D) */
+ fe448_mul(r->X, r->X, p->Z); /* r->X = X3 = A*F*(H-C-D) */
+ fe448_sub(r->Y, t2, t1); /* r->Y = Y31 = D-C */
+ fe448_reduce(r->Y);
+ fe448_mul(r->Y, r->Y, t0); /* r->Y = Y32 = G*(D-C) */
+ fe448_mul(r->Y, r->Y, p->Z); /* r->Y = Y3 = A*F*(D-C) */
+ fe448_mul(r->Z, t4, t0); /* r->Z = Z3 = F*G */
+}
+
+/* Subtract one point from another on the Twisted Edwards curve. r = p - q
+ * Second point has z-ordinate of 1.
+ *
+ * r [in] Point to hold result.
+ * p [in] Point to subtract from.
+ * q [in] Point to subtract.
+ */
+static WC_INLINE void ge448_msub(ge448_p2 *r, const ge448_p2 *p,
+ const ge448_precomp *q)
+{
+ ge448 t0[GE448_WORDS];
+ ge448 t1[GE448_WORDS];
+ ge448 t2[GE448_WORDS];
+ ge448 t3[GE448_WORDS];
+ ge448 t4[GE448_WORDS];
+
+ /* p->Z = A */
+ fe448_sqr(t0, p->Z); /* t0 = B = A^2 */
+ fe448_mul(t1, p->X, q->x); /* t1 = C = X1*X2 */
+ fe448_mul(t2, p->Y, q->y); /* t2 = D = Y1*Y2 */
+ fe448_mul(t3, t1, t2); /* t3 = E1 = C*D */
+ fe448_mul39081(t3, t3); /* t3 = E = d*C*D */
+ fe448_sub(t4, t0, t3); /* t4 = F = B-(--E) */
+ fe448_add(t0, t0, t3); /* t0 = G = B+(--E) */
+ fe448_reduce(t0);
+ fe448_add(r->X, p->X, p->Y); /* r->X = H1 = X1+Y1 */
+ fe448_reduce(r->X);
+ fe448_sub(r->Y, q->y, q->x); /* r->Y = H2 = Y2+(-X2) */
+ fe448_reduce(r->Y);
+ fe448_mul(r->X, r->X, r->Y); /* r->X = H = (X1+Y1)*(X2+Y2) */
+ fe448_add(r->X, r->X, t1); /* r->X = X31 = H-(-C) */
+ fe448_sub(r->X, r->X, t2); /* r->X = X32 = H-(-C)-D */
+ fe448_reduce(r->X);
+ fe448_mul(r->X, r->X, t4); /* r->X = X33 = F*(H-C-D) */
+ fe448_mul(r->X, r->X, p->Z); /* r->X = X3 = A*F*(H-C-D) */
+ fe448_add(r->Y, t2, t1); /* r->Y = Y31 = D-C */
+ fe448_reduce(r->Y);
+ fe448_mul(r->Y, r->Y, t0); /* r->Y = Y32 = G*(D-C) */
+ fe448_mul(r->Y, r->Y, p->Z); /* r->Y = Y3 = A*F*(D-C) */
+ fe448_mul(r->Z, t4, t0); /* r->Z = Z3 = F*G */
+}
+
+/* Add two point on the Twisted Edwards curve. r = p + q
+ *
+ * r [in] Point to hold result.
+ * p [in] Point to add.
+ * q [in] Point to add.
+ */
+static WC_INLINE void ge448_add(ge448_p2* r, const ge448_p2* p,
+ const ge448_p2* q)
+{
+ ge448 t0[GE448_WORDS];
+ ge448 t1[GE448_WORDS];
+ ge448 t2[GE448_WORDS];
+ ge448 t3[GE448_WORDS];
+ ge448 t4[GE448_WORDS];
+
+ fe448_mul(t1, p->X, q->X); /* t1 = C = X1*X2 */
+ fe448_mul(t2, p->Y, q->Y); /* t2 = D = Y1*Y2 */
+ fe448_mul(t3, t1, t2); /* t3 = E1 = C*D */
+ fe448_mul39081(t3, t3); /* t3 = E = d*C*D */
+ fe448_mul(r->Z, p->Z, q->Z); /* r->Z = A = Z1*Z2 */
+ fe448_sqr(t0, r->Z); /* t0 = B = A^2 */
+ fe448_add(t4, t0, t3); /* t4 = F = B-(-E) */
+ fe448_sub(t0, t0, t3); /* t0 = G = B+(-E) */
+ fe448_reduce(t0);
+ fe448_add(r->X, p->X, p->Y); /* r->X = H1 = X1+Y1 */
+ fe448_reduce(r->X);
+ fe448_add(r->Y, q->X, q->Y); /* r->Y = H2 = X2+Y2 */
+ fe448_reduce(r->Y);
+ fe448_mul(r->X, r->X, r->Y); /* r->X = H = (X1+Y1)*(X2+Y2) */
+ fe448_sub(r->X, r->X, t1); /* r->X = X31 = H-C */
+ fe448_sub(r->X, r->X, t2); /* r->X = X32 = H-C-D */
+ fe448_reduce(r->X);
+ fe448_mul(r->X, r->X, t4); /* r->X = X33 = F*(H-C-D) */
+ fe448_mul(r->X, r->X, r->Z); /* r->X = X3 = A*F*(H-C-D) */
+ fe448_sub(r->Y, t2, t1); /* r->Y = Y31 = D-C */
+ fe448_reduce(r->Y);
+ fe448_mul(r->Y, r->Y, t0); /* r->Y = Y32 = G*(D-C) */
+ fe448_mul(r->Y, r->Y, r->Z); /* r->Y = Y3 = A*F*(D-C) */
+ fe448_mul(r->Z, t4, t0); /* r->Z = Z3 = F*G */
+}
+
+/* Subtract one point from another on the Twisted Edwards curve. r = p - q
+ *
+ * r [in] Point to hold result.
+ * p [in] Point to subtract from.
+ * q [in] Point to subtract.
+ */
+static WC_INLINE void ge448_sub(ge448_p2 *r, const ge448_p2 *p,
+ const ge448_p2 *q)
+{
+ ge448 t0[GE448_WORDS];
+ ge448 t1[GE448_WORDS];
+ ge448 t2[GE448_WORDS];
+ ge448 t3[GE448_WORDS];
+ ge448 t4[GE448_WORDS];
+
+ fe448_mul(t1, p->X, q->X); /* t1 = C = X1*X2 */
+ fe448_mul(t2, p->Y, q->Y); /* t2 = D = Y1*Y2 */
+ fe448_mul(t3, t1, t2); /* t3 = E1 = C*D */
+ fe448_mul39081(t3, t3); /* t3 = E = d*C*D */
+ fe448_mul(r->Z, p->Z, q->Z); /* r->Z = A = Z1*Z2 */
+ fe448_sqr(t0, p->Z); /* t0 = B = A^2 */
+ fe448_sub(t4, t0, t3); /* t4 = F = B-(--E) */
+ fe448_add(t0, t0, t3); /* t0 = G = B+(--E) */
+ fe448_reduce(t0);
+ fe448_add(r->X, p->X, p->Y); /* r->X = H1 = X1+Y1 */
+ fe448_reduce(r->X);
+ fe448_sub(r->Y, q->Y, q->X); /* r->Y = H2 = Y2+(-X2) */
+ fe448_reduce(r->Y);
+ fe448_mul(r->X, r->X, r->Y); /* r->X = H = (X1+Y1)*(X2+Y2) */
+ fe448_add(r->X, r->X, t1); /* r->X = X31 = H-(-C) */
+ fe448_sub(r->X, r->X, t2); /* r->X = X32 = H-(-C)-D */
+ fe448_reduce(r->X);
+ fe448_mul(r->X, r->X, t4); /* r->X = X33 = F*(H-C-D) */
+ fe448_mul(r->X, r->X, r->Z); /* r->X = X3 = A*F*(H-C-D) */
+ fe448_add(r->Y, t2, t1); /* r->Y = Y31 = D-C */
+ fe448_reduce(r->Y);
+ fe448_mul(r->Y, r->Y, t0); /* r->Y = Y32 = G*(D-C) */
+ fe448_mul(r->Y, r->Y, r->Z); /* r->Y = Y3 = A*F*(D-C) */
+ fe448_mul(r->Z, t4, t0); /* r->Z = Z3 = F*G */
+}
+
+/* Convert point to byte array assuming projective ordinates.
+ *
+ * b [in] Array of bytes to hold compressed point.
+ * p [in] Point to convert.
+ */
+void ge448_to_bytes(uint8_t *b, const ge448_p2 *p)
+{
+ ge448 recip[GE448_WORDS];
+ ge448 x[GE448_WORDS];
+ ge448 y[GE448_WORDS];
+
+ fe448_invert(recip, p->Z);
+ fe448_mul(x, p->X, recip);
+ fe448_mul(y, p->Y, recip);
+ fe448_to_bytes(b, y);
+ b[56] = fe448_isnegative(x) << 7;
+}
+
+/* Convert point to byte array assuming z is 1.
+ *
+ * b [in] Array of bytes to hold compressed point.
+ * p [in] Point to convert.
+ */
+static void ge448_p2z1_to_bytes(uint8_t *b, const ge448_p2 *p)
+{
+ fe448_to_bytes(b, p->Y);
+ b[56] = fe448_isnegative(p->X) << 7;
+}
+
+/* Compress the point to y-ordinate and negative bit.
+ *
+ * out [in] Array of bytes to hold compressed key.
+ * xIn [in] The x-ordinate.
+ * yIn [in] The y-ordinate.
+ */
+int ge448_compress_key(uint8_t* out, const uint8_t* xIn, const uint8_t* yIn)
+{
+ ge448_p2 g;
+ uint8_t bArray[ED448_KEY_SIZE];
+ uint32_t i;
+
+ fe448_from_bytes(g.X, xIn);
+ fe448_from_bytes(g.Y, yIn);
+ fe448_1(g.Z);
+
+ ge448_p2z1_to_bytes(bArray, &g);
+
+ for (i = 0; i < 57; i++) {
+ out[57 - 1 - i] = bArray[i];
+ }
+
+ return 0;
+}
+
+/* Determine whether the value is negative.
+ *
+ * b [in] An 8-bit signed value.
+ * returns 1 when negative and 0 otherwise.
+ */
+static uint8_t negative(int8_t b)
+{
+ return ((uint8_t)b) >> 7;
+}
+
+/* Determine whether two values are equal. a == b
+ * Constant time implementation.
+ *
+ * a [in] An 8-bit unsigned value.
+ * b [in] An 8-bit unsigned value.
+ * returns 1 when equal and 0 otherwise.
+ */
+static uint8_t equal(uint8_t a, uint8_t b)
+{
+ return (uint8_t)(((uint32_t)(a ^ b) - 1) >> 31);
+}
+
+/* Conditional move the point into result point if two values are equal.
+ * Constant time implementation.
+ *
+ * f [in] Point to conditionally overwrite.
+ * p [in] Point to conditionally copy.
+ * b [in] An 8-bit unsigned value.
+ * n [in] An 8-bit unsigned value.
+ */
+static WC_INLINE void cmov(ge448_precomp* r, const ge448_precomp* p, uint8_t b,
+ uint8_t n)
+{
+ b = equal(b, n);
+ fe448_cmov(r->x, p->x, b);
+ fe448_cmov(r->y, p->y, b);
+}
+
+/* Select one of the entries from the precomputed table and negate if required.
+ * Constant time implementation.
+ *
+ * r [in] Point to hold chosen point.
+ * pos [in] Position of array of entries to choose from.
+ * b [in] Index of point to select. -ve value means negate the point.
+ */
+static void ge448_select(ge448_precomp* r, int pos, int8_t b)
+{
+ ge448 minusx[16];
+ uint8_t bnegative = negative(b);
+ uint8_t babs = b - (((-bnegative) & b) << 1);
+
+ ge448_precomp_0(r);
+ cmov(r, &base[pos][0], babs, 1);
+ cmov(r, &base[pos][1], babs, 2);
+ cmov(r, &base[pos][2], babs, 3);
+ cmov(r, &base[pos][3], babs, 4);
+ cmov(r, &base[pos][4], babs, 5);
+ cmov(r, &base[pos][5], babs, 6);
+ cmov(r, &base[pos][6], babs, 7);
+ cmov(r, &base[pos][7], babs, 8);
+ fe448_neg(minusx, r->x);
+ fe448_cmov(r->x, minusx, bnegative);
+}
+
+/* Perform a scalar multiplication of the base point. r = a * base
+ *
+ * r [in] Point to hold result.
+ * a [in] Scalar to multiply by.
+ */
+void ge448_scalarmult_base(ge448_p2* r, const uint8_t* a)
+{
+ int8_t carry;
+ ge448_precomp t;
+ int i;
+ int8_t e[113];
+
+ carry = 0;
+ for (i = 0; i < 56; ++i) {
+ e[2 * i + 0] = ((a[i] >> 0) & 0xf) + carry;
+ carry = e[2 * i + 0] + 8;
+ carry >>= 4;
+ e[2 * i + 0] -= carry << 4;
+
+ e[2 * i + 1] = ((a[i] >> 4) & 0xf) + carry;
+ carry = e[2 * i + 1] + 8;
+ carry >>= 4;
+ e[2 * i + 1] -= carry << 4;
+ }
+ e[112] = carry;
+ /* each e[i] is between -8 and 8 */
+
+ /* Odd indeces first - sum based on even index so multiply by 16 */
+ ge448_select(&t, 0, e[1]);
+ fe448_copy(r->X, t.x);
+ fe448_copy(r->Y, t.y);
+ fe448_1(r->Z);
+ for (i = 3; i < 112; i += 2) {
+ ge448_select(&t, i / 2, e[i]);
+ ge448_madd(r, r, &t);
+ }
+
+ ge448_dbl(r, r);
+ ge448_dbl(r, r);
+ ge448_dbl(r, r);
+ ge448_dbl(r, r);
+
+ /* Add even indeces */
+ for (i = 0; i <= 112; i += 2) {
+ ge448_select(&t, i / 2, e[i]);
+ ge448_madd(r, r, &t);
+ }
+}
+
+/* Create to a sliding window for the scalar multiplicaton.
+ *
+ * r [in] Array of indeces.
+ * a [in] Scalar to break up.
+ */
+static void slide(int8_t *r, const uint8_t *a)
+{
+ int i;
+ int b;
+ int k;
+
+ for (i = 0; i < 448; ++i) {
+ r[i] = (a[i >> 3] >> (i & 7)) & 1;
+ }
+
+ for (i = 0; i < 448; ++i) {
+ if (r[i] == 0) {
+ continue;
+ }
+
+ for (b = 1; b <= 7 && i + b < 448; ++b) {
+ if (r[i + b] == 0) {
+ continue;
+ }
+
+ if (r[i] + (r[i + b] << b) <= 31) {
+ r[i] += r[i + b] << b; r[i + b] = 0;
+ }
+ else if (r[i] - (r[i + b] << b) >= -31) {
+ r[i] -= r[i + b] << b;
+ for (k = i + b; k < 448; ++k) {
+ if (!r[k]) {
+ r[k] = 1;
+ break;
+ }
+ r[k] = 0;
+ }
+ }
+ else {
+ break;
+ }
+ }
+ }
+}
+
+/* Perform a scalar multplication of the base point and public point.
+ * r = a * p + b * base
+ * Uses a sliding window of 5 bits.
+ * Not constant time.
+ *
+ * r [in] Point to hold result.
+ * a [in] Scalar to multiply by.
+ */
+int ge448_double_scalarmult_vartime(ge448_p2 *r, const uint8_t *a,
+ const ge448_p2 *p, const uint8_t *b)
+{
+ int8_t aslide[448];
+ int8_t bslide[448];
+ ge448_p2 pi[16]; /* p,3p,..,31p */
+ ge448_p2 p2;
+ int i;
+
+ slide(aslide, a);
+ slide(bslide, b);
+
+ fe448_copy(pi[0].X, p->X);
+ fe448_copy(pi[0].Y, p->Y);
+ fe448_copy(pi[0].Z, p->Z);
+ ge448_dbl(&p2, p);
+ ge448_add(&pi[1], &p2, &pi[0]);
+ ge448_add(&pi[2], &p2, &pi[1]);
+ ge448_add(&pi[3], &p2, &pi[2]);
+ ge448_add(&pi[4], &p2, &pi[3]);
+ ge448_add(&pi[5], &p2, &pi[4]);
+ ge448_add(&pi[6], &p2, &pi[5]);
+ ge448_add(&pi[7], &p2, &pi[6]);
+ ge448_add(&pi[8], &p2, &pi[7]);
+ ge448_add(&pi[9], &p2, &pi[8]);
+ ge448_add(&pi[10], &p2, &pi[9]);
+ ge448_add(&pi[11], &p2, &pi[10]);
+ ge448_add(&pi[12], &p2, &pi[11]);
+ ge448_add(&pi[13], &p2, &pi[12]);
+ ge448_add(&pi[14], &p2, &pi[13]);
+ ge448_add(&pi[15], &p2, &pi[14]);
+
+ ge448_0(r);
+
+ /* Find first index that is not 0. */
+ for (i = 447; i >= 0; --i) {
+ if (aslide[i] || bslide[i]) {
+ break;
+ }
+ }
+
+ for (; i >= 0; --i) {
+ ge448_dbl(r, r);
+
+ if (aslide[i] > 0)
+ ge448_add(r, r, &pi[aslide[i]/2]);
+ else if (aslide[i] < 0)
+ ge448_sub(r, r ,&pi[(-aslide[i])/2]);
+
+ if (bslide[i] > 0)
+ ge448_madd(r, r, &base_i[bslide[i]/2]);
+ else if (bslide[i] < 0)
+ ge448_msub(r, r, &base_i[(-bslide[i])/2]);
+ }
+
+ return 0;
+}
+
+/* Convert compressed point to negative of affine point.
+ * Calculates x from the y and the negative bit.
+ * Not constant time.
+ *
+ * r [in] Uncompressed point.
+ * b [in] Array of bytes representing point.
+ * returns 0 on success and -1 on failure.
+ */
+int ge448_from_bytes_negate_vartime(ge448_p2 *r, const uint8_t *b)
+{
+ int ret = 0;
+ ge448 u[GE448_WORDS];
+ ge448 v[GE448_WORDS];
+ ge448 u3[GE448_WORDS];
+ ge448 vxx[GE448_WORDS];
+ ge448 check[GE448_WORDS];
+
+ fe448_from_bytes(r->Y, b);
+ fe448_1(r->Z);
+ fe448_sqr(u, r->Y); /* u = y^2 */
+ fe448_mul39081(v, u); /* v = 39081.y^2 */
+ fe448_sub(u, u, r->Z); /* u = y^2-1 */
+ fe448_reduce(u);
+ fe448_add(v, v, r->Z); /* v = 39081.y^2-1 */
+ fe448_reduce(v);
+ fe448_neg(v, v); /* v = -39081.y^2-1 = d.y^2-1 */
+
+ fe448_sqr(r->X, v); /* x = v^2 */
+ fe448_mul(r->X, r->X, v); /* x = v^3 */
+ fe448_sqr(u3, u); /* x = u^2.v^3 */
+ fe448_mul(r->X, r->X, u3); /* x = u^2.v^3 */
+ fe448_mul(u3, u3, u); /* u3 = u^3 */
+ fe448_mul(r->X, r->X, u3); /* x = u^5.v^3 */
+
+ fe448_pow_2_446_222_1(r->X, r->X); /* x = (u^5.v^3)^((q-3)/4) */
+ fe448_mul(r->X, r->X, u3); /* x = u^3(u^5.v^3)^((q-3)/4) */
+ fe448_mul(r->X, r->X, v); /* x = u^3.v(u^5.v^3)^((q-3)/4) */
+
+ fe448_sqr(vxx, r->X);
+ fe448_mul(vxx, vxx, v);
+ fe448_sub(check, vxx, u); /* check = v.x^2-u */
+ fe448_reduce(check);
+ /* Note; vx^2+u is NOT correct. */
+ if (fe448_isnonzero(check)) {
+ ret = -1;
+ }
+
+ /* Calculating negative of point in bytes - negate only if X is correct. */
+ if (fe448_isnegative(r->X) == (b[56] >> 7)) {
+ fe448_neg(r->X, r->X);
+ }
+
+ return ret;
+}
+
+#endif /* ED448_SMALL */
+#endif /* HAVE_CURVE448 || HAVE_ED448 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ge_low_mem.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ge_low_mem.c
index 43c533c69..3b72b96cc 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ge_low_mem.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ge_low_mem.c
@@ -1,8 +1,8 @@
/* ge_low_mem.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
/* Based from Daniel Beer's public domain work. */
#ifdef HAVE_CONFIG_H
@@ -28,12 +29,14 @@
#include <wolfssl/wolfcrypt/settings.h>
#ifdef HAVE_ED25519
+#ifdef ED25519_SMALL /* use slower code that takes less memory */
#include <wolfssl/wolfcrypt/ge_operations.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
@@ -43,10 +46,10 @@ void ed25519_double(ge_p3 *r, const ge_p3 *a);
static const byte ed25519_order[F25519_SIZE] = {
- 0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58,
- 0xd6, 0x9c, 0xf7, 0xa2, 0xde, 0xf9, 0xde, 0x14,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10
+ 0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58,
+ 0xd6, 0x9c, 0xf7, 0xa2, 0xde, 0xf9, 0xde, 0x14,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10
};
/*Arithmetic modulo the group order m = 2^252 +
@@ -69,16 +72,16 @@ static const word32 mu[33] = {
int ge_compress_key(byte* out, const byte* xIn, const byte* yIn,
word32 keySz)
{
- byte tmp[F25519_SIZE];
- byte parity;
+ byte tmp[F25519_SIZE];
+ byte parity;
+ byte pt[32];
int i;
- fe_copy(tmp, xIn);
- parity = (tmp[0] & 1) << 7;
+ lm_copy(tmp, xIn);
+ parity = (tmp[0] & 1) << 7;
- byte pt[32];
- fe_copy(pt, yIn);
- pt[31] |= parity;
+ lm_copy(pt, yIn);
+ pt[31] |= parity;
for(i = 0; i < 32; i++) {
out[32-i-1] = pt[i];
@@ -90,7 +93,7 @@ int ge_compress_key(byte* out, const byte* xIn, const byte* yIn,
static word32 lt(word32 a,word32 b) /* 16-bit inputs */
{
- unsigned int x = a;
+ word32 x = a;
x -= (unsigned int) b; /* 0..65535: no; 4294901761..4294967295: yes */
x >>= 31; /* 0: no; 1: yes */
return x;
@@ -187,20 +190,20 @@ void sc_reduce(unsigned char x[64])
void sc_muladd(byte* out, const byte* a, const byte* b, const byte* c)
{
- byte s[32];
+ byte s[32];
byte e[64];
XMEMSET(e, 0, sizeof(e));
XMEMCPY(e, b, 32);
- /* Obtain e */
- sc_reduce(e);
+ /* Obtain e */
+ sc_reduce(e);
- /* Compute s = ze + k */
- fprime_mul(s, a, e, ed25519_order);
- fprime_add(s, c, ed25519_order);
+ /* Compute s = ze + k */
+ fprime_mul(s, a, e, ed25519_order);
+ fprime_add(s, c, ed25519_order);
- XMEMCPY(out, s, 32);
+ XMEMCPY(out, s, 32);
}
@@ -216,315 +219,317 @@ void sc_muladd(byte* out, const byte* a, const byte* b, const byte* c)
* t is x*y.
*/
const ge_p3 ed25519_base = {
- .X = {
- 0x1a, 0xd5, 0x25, 0x8f, 0x60, 0x2d, 0x56, 0xc9,
- 0xb2, 0xa7, 0x25, 0x95, 0x60, 0xc7, 0x2c, 0x69,
- 0x5c, 0xdc, 0xd6, 0xfd, 0x31, 0xe2, 0xa4, 0xc0,
- 0xfe, 0x53, 0x6e, 0xcd, 0xd3, 0x36, 0x69, 0x21
- },
- .Y = {
- 0x58, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
- 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
- 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
- 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66
- },
- .T = {
- 0xa3, 0xdd, 0xb7, 0xa5, 0xb3, 0x8a, 0xde, 0x6d,
- 0xf5, 0x52, 0x51, 0x77, 0x80, 0x9f, 0xf0, 0x20,
- 0x7d, 0xe3, 0xab, 0x64, 0x8e, 0x4e, 0xea, 0x66,
- 0x65, 0x76, 0x8b, 0xd7, 0x0f, 0x5f, 0x87, 0x67
- },
- .Z = {1, 0}
+ {
+ 0x1a, 0xd5, 0x25, 0x8f, 0x60, 0x2d, 0x56, 0xc9,
+ 0xb2, 0xa7, 0x25, 0x95, 0x60, 0xc7, 0x2c, 0x69,
+ 0x5c, 0xdc, 0xd6, 0xfd, 0x31, 0xe2, 0xa4, 0xc0,
+ 0xfe, 0x53, 0x6e, 0xcd, 0xd3, 0x36, 0x69, 0x21
+ },
+ {
+ 0x58, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
+ 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
+ 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
+ 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66
+ },
+ {1, 0},
+ {
+ 0xa3, 0xdd, 0xb7, 0xa5, 0xb3, 0x8a, 0xde, 0x6d,
+ 0xf5, 0x52, 0x51, 0x77, 0x80, 0x9f, 0xf0, 0x20,
+ 0x7d, 0xe3, 0xab, 0x64, 0x8e, 0x4e, 0xea, 0x66,
+ 0x65, 0x76, 0x8b, 0xd7, 0x0f, 0x5f, 0x87, 0x67
+ },
+
};
const ge_p3 ed25519_neutral = {
- .X = {0},
- .Y = {1, 0},
- .T = {0},
- .Z = {1, 0}
+ {0},
+ {1, 0},
+ {1, 0},
+ {0},
+
};
static const byte ed25519_d[F25519_SIZE] = {
- 0xa3, 0x78, 0x59, 0x13, 0xca, 0x4d, 0xeb, 0x75,
- 0xab, 0xd8, 0x41, 0x41, 0x4d, 0x0a, 0x70, 0x00,
- 0x98, 0xe8, 0x79, 0x77, 0x79, 0x40, 0xc7, 0x8c,
- 0x73, 0xfe, 0x6f, 0x2b, 0xee, 0x6c, 0x03, 0x52
+ 0xa3, 0x78, 0x59, 0x13, 0xca, 0x4d, 0xeb, 0x75,
+ 0xab, 0xd8, 0x41, 0x41, 0x4d, 0x0a, 0x70, 0x00,
+ 0x98, 0xe8, 0x79, 0x77, 0x79, 0x40, 0xc7, 0x8c,
+ 0x73, 0xfe, 0x6f, 0x2b, 0xee, 0x6c, 0x03, 0x52
};
/* k = 2d */
static const byte ed25519_k[F25519_SIZE] = {
- 0x59, 0xf1, 0xb2, 0x26, 0x94, 0x9b, 0xd6, 0xeb,
- 0x56, 0xb1, 0x83, 0x82, 0x9a, 0x14, 0xe0, 0x00,
- 0x30, 0xd1, 0xf3, 0xee, 0xf2, 0x80, 0x8e, 0x19,
- 0xe7, 0xfc, 0xdf, 0x56, 0xdc, 0xd9, 0x06, 0x24
+ 0x59, 0xf1, 0xb2, 0x26, 0x94, 0x9b, 0xd6, 0xeb,
+ 0x56, 0xb1, 0x83, 0x82, 0x9a, 0x14, 0xe0, 0x00,
+ 0x30, 0xd1, 0xf3, 0xee, 0xf2, 0x80, 0x8e, 0x19,
+ 0xe7, 0xfc, 0xdf, 0x56, 0xdc, 0xd9, 0x06, 0x24
};
void ed25519_add(ge_p3 *r,
- const ge_p3 *p1, const ge_p3 *p2)
+ const ge_p3 *p1, const ge_p3 *p2)
{
- /* Explicit formulas database: add-2008-hwcd-3
- *
- * source 2008 Hisil--Wong--Carter--Dawson,
- * http://eprint.iacr.org/2008/522, Section 3.1
- * appliesto extended-1
- * parameter k
- * assume k = 2 d
- * compute A = (Y1-X1)(Y2-X2)
- * compute B = (Y1+X1)(Y2+X2)
- * compute C = T1 k T2
- * compute D = Z1 2 Z2
- * compute E = B - A
- * compute F = D - C
- * compute G = D + C
- * compute H = B + A
- * compute X3 = E F
- * compute Y3 = G H
- * compute T3 = E H
- * compute Z3 = F G
- */
- byte a[F25519_SIZE];
- byte b[F25519_SIZE];
- byte c[F25519_SIZE];
- byte d[F25519_SIZE];
- byte e[F25519_SIZE];
- byte f[F25519_SIZE];
- byte g[F25519_SIZE];
- byte h[F25519_SIZE];
-
- /* A = (Y1-X1)(Y2-X2) */
- fe_sub(c, p1->Y, p1->X);
- fe_sub(d, p2->Y, p2->X);
- fe_mul__distinct(a, c, d);
-
- /* B = (Y1+X1)(Y2+X2) */
- fe_add(c, p1->Y, p1->X);
- fe_add(d, p2->Y, p2->X);
- fe_mul__distinct(b, c, d);
-
- /* C = T1 k T2 */
- fe_mul__distinct(d, p1->T, p2->T);
- fe_mul__distinct(c, d, ed25519_k);
-
- /* D = Z1 2 Z2 */
- fe_mul__distinct(d, p1->Z, p2->Z);
- fe_add(d, d, d);
-
- /* E = B - A */
- fe_sub(e, b, a);
-
- /* F = D - C */
- fe_sub(f, d, c);
-
- /* G = D + C */
- fe_add(g, d, c);
-
- /* H = B + A */
- fe_add(h, b, a);
-
- /* X3 = E F */
- fe_mul__distinct(r->X, e, f);
-
- /* Y3 = G H */
- fe_mul__distinct(r->Y, g, h);
-
- /* T3 = E H */
- fe_mul__distinct(r->T, e, h);
-
- /* Z3 = F G */
- fe_mul__distinct(r->Z, f, g);
+ /* Explicit formulas database: add-2008-hwcd-3
+ *
+ * source 2008 Hisil--Wong--Carter--Dawson,
+ * http://eprint.iacr.org/2008/522, Section 3.1
+ * appliesto extended-1
+ * parameter k
+ * assume k = 2 d
+ * compute A = (Y1-X1)(Y2-X2)
+ * compute B = (Y1+X1)(Y2+X2)
+ * compute C = T1 k T2
+ * compute D = Z1 2 Z2
+ * compute E = B - A
+ * compute F = D - C
+ * compute G = D + C
+ * compute H = B + A
+ * compute X3 = E F
+ * compute Y3 = G H
+ * compute T3 = E H
+ * compute Z3 = F G
+ */
+ byte a[F25519_SIZE];
+ byte b[F25519_SIZE];
+ byte c[F25519_SIZE];
+ byte d[F25519_SIZE];
+ byte e[F25519_SIZE];
+ byte f[F25519_SIZE];
+ byte g[F25519_SIZE];
+ byte h[F25519_SIZE];
+
+ /* A = (Y1-X1)(Y2-X2) */
+ lm_sub(c, p1->Y, p1->X);
+ lm_sub(d, p2->Y, p2->X);
+ fe_mul__distinct(a, c, d);
+
+ /* B = (Y1+X1)(Y2+X2) */
+ lm_add(c, p1->Y, p1->X);
+ lm_add(d, p2->Y, p2->X);
+ fe_mul__distinct(b, c, d);
+
+ /* C = T1 k T2 */
+ fe_mul__distinct(d, p1->T, p2->T);
+ fe_mul__distinct(c, d, ed25519_k);
+
+ /* D = Z1 2 Z2 */
+ fe_mul__distinct(d, p1->Z, p2->Z);
+ lm_add(d, d, d);
+
+ /* E = B - A */
+ lm_sub(e, b, a);
+
+ /* F = D - C */
+ lm_sub(f, d, c);
+
+ /* G = D + C */
+ lm_add(g, d, c);
+
+ /* H = B + A */
+ lm_add(h, b, a);
+
+ /* X3 = E F */
+ fe_mul__distinct(r->X, e, f);
+
+ /* Y3 = G H */
+ fe_mul__distinct(r->Y, g, h);
+
+ /* T3 = E H */
+ fe_mul__distinct(r->T, e, h);
+
+ /* Z3 = F G */
+ fe_mul__distinct(r->Z, f, g);
}
void ed25519_double(ge_p3 *r, const ge_p3 *p)
{
- /* Explicit formulas database: dbl-2008-hwcd
- *
- * source 2008 Hisil--Wong--Carter--Dawson,
- * http://eprint.iacr.org/2008/522, Section 3.3
- * compute A = X1^2
- * compute B = Y1^2
- * compute C = 2 Z1^2
- * compute D = a A
- * compute E = (X1+Y1)^2-A-B
- * compute G = D + B
- * compute F = G - C
- * compute H = D - B
- * compute X3 = E F
- * compute Y3 = G H
- * compute T3 = E H
- * compute Z3 = F G
- */
- byte a[F25519_SIZE];
- byte b[F25519_SIZE];
- byte c[F25519_SIZE];
- byte e[F25519_SIZE];
- byte f[F25519_SIZE];
- byte g[F25519_SIZE];
- byte h[F25519_SIZE];
-
- /* A = X1^2 */
- fe_mul__distinct(a, p->X, p->X);
-
- /* B = Y1^2 */
- fe_mul__distinct(b, p->Y, p->Y);
-
- /* C = 2 Z1^2 */
- fe_mul__distinct(c, p->Z, p->Z);
- fe_add(c, c, c);
-
- /* D = a A (alter sign) */
- /* E = (X1+Y1)^2-A-B */
- fe_add(f, p->X, p->Y);
- fe_mul__distinct(e, f, f);
- fe_sub(e, e, a);
- fe_sub(e, e, b);
-
- /* G = D + B */
- fe_sub(g, b, a);
-
- /* F = G - C */
- fe_sub(f, g, c);
-
- /* H = D - B */
- fe_neg(h, b);
- fe_sub(h, h, a);
-
- /* X3 = E F */
- fe_mul__distinct(r->X, e, f);
-
- /* Y3 = G H */
- fe_mul__distinct(r->Y, g, h);
-
- /* T3 = E H */
- fe_mul__distinct(r->T, e, h);
-
- /* Z3 = F G */
- fe_mul__distinct(r->Z, f, g);
+ /* Explicit formulas database: dbl-2008-hwcd
+ *
+ * source 2008 Hisil--Wong--Carter--Dawson,
+ * http://eprint.iacr.org/2008/522, Section 3.3
+ * compute A = X1^2
+ * compute B = Y1^2
+ * compute C = 2 Z1^2
+ * compute D = a A
+ * compute E = (X1+Y1)^2-A-B
+ * compute G = D + B
+ * compute F = G - C
+ * compute H = D - B
+ * compute X3 = E F
+ * compute Y3 = G H
+ * compute T3 = E H
+ * compute Z3 = F G
+ */
+ byte a[F25519_SIZE];
+ byte b[F25519_SIZE];
+ byte c[F25519_SIZE];
+ byte e[F25519_SIZE];
+ byte f[F25519_SIZE];
+ byte g[F25519_SIZE];
+ byte h[F25519_SIZE];
+
+ /* A = X1^2 */
+ fe_mul__distinct(a, p->X, p->X);
+
+ /* B = Y1^2 */
+ fe_mul__distinct(b, p->Y, p->Y);
+
+ /* C = 2 Z1^2 */
+ fe_mul__distinct(c, p->Z, p->Z);
+ lm_add(c, c, c);
+
+ /* D = a A (alter sign) */
+ /* E = (X1+Y1)^2-A-B */
+ lm_add(f, p->X, p->Y);
+ fe_mul__distinct(e, f, f);
+ lm_sub(e, e, a);
+ lm_sub(e, e, b);
+
+ /* G = D + B */
+ lm_sub(g, b, a);
+
+ /* F = G - C */
+ lm_sub(f, g, c);
+
+ /* H = D - B */
+ lm_neg(h, b);
+ lm_sub(h, h, a);
+
+ /* X3 = E F */
+ fe_mul__distinct(r->X, e, f);
+
+ /* Y3 = G H */
+ fe_mul__distinct(r->Y, g, h);
+
+ /* T3 = E H */
+ fe_mul__distinct(r->T, e, h);
+
+ /* Z3 = F G */
+ fe_mul__distinct(r->Z, f, g);
}
void ed25519_smult(ge_p3 *r_out, const ge_p3 *p, const byte *e)
{
- ge_p3 r;
- int i;
+ ge_p3 r;
+ int i;
XMEMCPY(&r, &ed25519_neutral, sizeof(r));
- for (i = 255; i >= 0; i--) {
- const byte bit = (e[i >> 3] >> (i & 7)) & 1;
- ge_p3 s;
+ for (i = 255; i >= 0; i--) {
+ const byte bit = (e[i >> 3] >> (i & 7)) & 1;
+ ge_p3 s;
- ed25519_double(&r, &r);
- ed25519_add(&s, &r, p);
+ ed25519_double(&r, &r);
+ ed25519_add(&s, &r, p);
- fe_select(r.X, r.X, s.X, bit);
- fe_select(r.Y, r.Y, s.Y, bit);
- fe_select(r.Z, r.Z, s.Z, bit);
- fe_select(r.T, r.T, s.T, bit);
- }
+ fe_select(r.X, r.X, s.X, bit);
+ fe_select(r.Y, r.Y, s.Y, bit);
+ fe_select(r.Z, r.Z, s.Z, bit);
+ fe_select(r.T, r.T, s.T, bit);
+ }
XMEMCPY(r_out, &r, sizeof(r));
}
void ge_scalarmult_base(ge_p3 *R,const unsigned char *nonce)
{
- ed25519_smult(R, &ed25519_base, nonce);
+ ed25519_smult(R, &ed25519_base, nonce);
}
/* pack the point h into array s */
void ge_p3_tobytes(unsigned char *s,const ge_p3 *h)
{
- byte x[F25519_SIZE];
- byte y[F25519_SIZE];
- byte z1[F25519_SIZE];
- byte parity;
-
- fe_inv__distinct(z1, h->Z);
- fe_mul__distinct(x, h->X, z1);
- fe_mul__distinct(y, h->Y, z1);
-
- fe_normalize(x);
- fe_normalize(y);
-
- parity = (x[0] & 1) << 7;
- fe_copy(s, y);
- fe_normalize(s);
- s[31] |= parity;
+ byte x[F25519_SIZE];
+ byte y[F25519_SIZE];
+ byte z1[F25519_SIZE];
+ byte parity;
+
+ fe_inv__distinct(z1, h->Z);
+ fe_mul__distinct(x, h->X, z1);
+ fe_mul__distinct(y, h->Y, z1);
+
+ fe_normalize(x);
+ fe_normalize(y);
+
+ parity = (x[0] & 1) << 7;
+ lm_copy(s, y);
+ fe_normalize(s);
+ s[31] |= parity;
}
/* pack the point h into array s */
void ge_tobytes(unsigned char *s,const ge_p2 *h)
{
- byte x[F25519_SIZE];
- byte y[F25519_SIZE];
- byte z1[F25519_SIZE];
- byte parity;
-
- fe_inv__distinct(z1, h->Z);
- fe_mul__distinct(x, h->X, z1);
- fe_mul__distinct(y, h->Y, z1);
-
- fe_normalize(x);
- fe_normalize(y);
-
- parity = (x[0] & 1) << 7;
- fe_copy(s, y);
- fe_normalize(s);
- s[31] |= parity;
+ byte x[F25519_SIZE];
+ byte y[F25519_SIZE];
+ byte z1[F25519_SIZE];
+ byte parity;
+
+ fe_inv__distinct(z1, h->Z);
+ fe_mul__distinct(x, h->X, z1);
+ fe_mul__distinct(y, h->Y, z1);
+
+ fe_normalize(x);
+ fe_normalize(y);
+
+ parity = (x[0] & 1) << 7;
+ lm_copy(s, y);
+ fe_normalize(s);
+ s[31] |= parity;
}
/*
- Test if the public key can be uncommpressed and negate it (-X,Y,Z,-T)
+ Test if the public key can be uncompressed and negate it (-X,Y,Z,-T)
return 0 on success
*/
int ge_frombytes_negate_vartime(ge_p3 *p,const unsigned char *s)
{
- byte parity;
+ byte parity;
byte x[F25519_SIZE];
- byte y[F25519_SIZE];
- byte a[F25519_SIZE];
- byte b[F25519_SIZE];
- byte c[F25519_SIZE];
+ byte y[F25519_SIZE];
+ byte a[F25519_SIZE];
+ byte b[F25519_SIZE];
+ byte c[F25519_SIZE];
int ret = 0;
/* unpack the key s */
parity = s[31] >> 7;
- fe_copy(y, s);
- y[31] &= 127;
+ lm_copy(y, s);
+ y[31] &= 127;
- fe_mul__distinct(c, y, y);
+ fe_mul__distinct(c, y, y);
fe_mul__distinct(b, c, ed25519_d);
- fe_add(a, b, f25519_one);
- fe_inv__distinct(b, a);
- fe_sub(a, c, f25519_one);
- fe_mul__distinct(c, a, b);
- fe_sqrt(a, c);
- fe_neg(b, a);
- fe_select(x, a, b, (a[0] ^ parity) & 1);
+ lm_add(a, b, f25519_one);
+ fe_inv__distinct(b, a);
+ lm_sub(a, c, f25519_one);
+ fe_mul__distinct(c, a, b);
+ fe_sqrt(a, c);
+ lm_neg(b, a);
+ fe_select(x, a, b, (a[0] ^ parity) & 1);
/* test that x^2 is equal to c */
fe_mul__distinct(a, x, x);
- fe_normalize(a);
- fe_normalize(c);
- ret |= ConstantCompare(a, c, F25519_SIZE);
+ fe_normalize(a);
+ fe_normalize(c);
+ ret |= ConstantCompare(a, c, F25519_SIZE);
/* project the key s onto p */
- fe_copy(p->X, x);
- fe_copy(p->Y, y);
- fe_load(p->Z, 1);
- fe_mul__distinct(p->T, x, y);
+ lm_copy(p->X, x);
+ lm_copy(p->Y, y);
+ fe_load(p->Z, 1);
+ fe_mul__distinct(p->T, x, y);
/* negate, the point becomes (-X,Y,Z,-T) */
- fe_neg(p->X,p->X);
- fe_neg(p->T,p->T);
+ lm_neg(p->X,p->X);
+ lm_neg(p->T,p->T);
return ret;
}
@@ -542,17 +547,17 @@ int ge_double_scalarmult_vartime(ge_p2* R, const unsigned char *h,
ed25519_smult(&p, &ed25519_base, sig);
/* find H(R,A,M) * -A */
- ed25519_smult(&A, &A, h);
+ ed25519_smult(&A, &A, h);
/* SB + -H(R,A,M)A */
- ed25519_add(&A, &p, &A);
+ ed25519_add(&A, &p, &A);
- fe_copy(R->X, A.X);
- fe_copy(R->Y, A.Y);
- fe_copy(R->Z, A.Z);
+ lm_copy(R->X, A.X);
+ lm_copy(R->Y, A.Y);
+ lm_copy(R->Z, A.Z);
return ret;
}
+#endif /* ED25519_SMALL */
#endif /* HAVE_ED25519 */
-
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ge_operations.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ge_operations.c
index 665cfe89b..73fa06e35 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ge_operations.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ge_operations.c
@@ -1,8 +1,8 @@
/* ge_operations.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
/* Based On Daniel J Bernstein's ed25519 Public Domain ref10 work. */
@@ -29,19 +30,52 @@
#include <wolfssl/wolfcrypt/settings.h>
#ifdef HAVE_ED25519
+#ifndef ED25519_SMALL /* run when not defined to use small memory math */
#include <wolfssl/wolfcrypt/ge_operations.h>
+#include <wolfssl/wolfcrypt/ed25519.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
+#if defined(CURVED25519_X64)
+ #define CURVED25519_ASM_64BIT
+ #define CURVED25519_ASM
+#endif
+#if defined(WOLFSSL_ARMASM)
+ #if defined(__aarch64__)
+ #define CURVED25519_ASM_64BIT
+ #else
+ #define CURVED25519_ASM_32BIT
+ #endif
+ #define CURVED25519_ASM
+#endif
+
+
+static void ge_p2_0(ge_p2 *);
+#ifndef CURVED25519_ASM
+static void ge_precomp_0(ge_precomp *);
+#endif
+static void ge_p3_to_p2(ge_p2 *,const ge_p3 *);
+static void ge_p3_to_cached(ge_cached *,const ge_p3 *);
+static void ge_p1p1_to_p2(ge_p2 *,const ge_p1p1 *);
+static void ge_p1p1_to_p3(ge_p3 *,const ge_p1p1 *);
+static void ge_p2_dbl(ge_p1p1 *,const ge_p2 *);
+static void ge_p3_dbl(ge_p1p1 *,const ge_p3 *);
+
+static void ge_madd(ge_p1p1 *,const ge_p3 *,const ge_precomp *);
+static void ge_msub(ge_p1p1 *,const ge_p3 *,const ge_precomp *);
+static void ge_add(ge_p1p1 *,const ge_p3 *,const ge_cached *);
+static void ge_sub(ge_p1p1 *,const ge_p3 *,const ge_cached *);
+
/*
ge means group element.
-Here the group is the set of pairs (x,y) of field elements (see fe.h)
+Here the group is the set of pairs (x,y) of field elements (see ge_operations.h)
satisfying -x^2 + y^2 = 1 + d x^2y^2
where d = -121665/121666.
@@ -52,6 +86,36 @@ Representations:
ge_precomp (Duif): (y+x,y-x,2dxy)
*/
+#if !defined(HAVE___UINT128_T) || defined(NO_CURVED25519_128BIT)
+#define MASK_21 0x1fffff
+#define ORDER_0 0x15d3ed
+#define ORDER_1 0x18d2e7
+#define ORDER_2 0x160498
+#define ORDER_3 0xf39ac
+#define ORDER_4 0x1dea2f
+#define ORDER_5 0xa6f7c
+
+#ifdef CURVED25519_ASM_32BIT
+uint64_t load_3(const unsigned char *in)
+{
+ uint64_t result;
+ result = (uint64_t) in[0];
+ result |= ((uint64_t) in[1]) << 8;
+ result |= ((uint64_t) in[2]) << 16;
+ return result;
+}
+
+
+uint64_t load_4(const unsigned char *in)
+{
+ uint64_t result;
+ result = (uint64_t) in[0];
+ result |= ((uint64_t) in[1]) << 8;
+ result |= ((uint64_t) in[2]) << 16;
+ result |= ((uint64_t) in[3]) << 24;
+ return result;
+}
+#endif
/*
Input:
@@ -64,261 +128,197 @@ Output:
*/
void sc_reduce(byte* s)
{
- int64_t s0 = 2097151 & load_3(s);
- int64_t s1 = 2097151 & (load_4(s + 2) >> 5);
- int64_t s2 = 2097151 & (load_3(s + 5) >> 2);
- int64_t s3 = 2097151 & (load_4(s + 7) >> 7);
- int64_t s4 = 2097151 & (load_4(s + 10) >> 4);
- int64_t s5 = 2097151 & (load_3(s + 13) >> 1);
- int64_t s6 = 2097151 & (load_4(s + 15) >> 6);
- int64_t s7 = 2097151 & (load_3(s + 18) >> 3);
- int64_t s8 = 2097151 & load_3(s + 21);
- int64_t s9 = 2097151 & (load_4(s + 23) >> 5);
- int64_t s10 = 2097151 & (load_3(s + 26) >> 2);
- int64_t s11 = 2097151 & (load_4(s + 28) >> 7);
- int64_t s12 = 2097151 & (load_4(s + 31) >> 4);
- int64_t s13 = 2097151 & (load_3(s + 34) >> 1);
- int64_t s14 = 2097151 & (load_4(s + 36) >> 6);
- int64_t s15 = 2097151 & (load_3(s + 39) >> 3);
- int64_t s16 = 2097151 & load_3(s + 42);
- int64_t s17 = 2097151 & (load_4(s + 44) >> 5);
- int64_t s18 = 2097151 & (load_3(s + 47) >> 2);
- int64_t s19 = 2097151 & (load_4(s + 49) >> 7);
- int64_t s20 = 2097151 & (load_4(s + 52) >> 4);
- int64_t s21 = 2097151 & (load_3(s + 55) >> 1);
- int64_t s22 = 2097151 & (load_4(s + 57) >> 6);
- int64_t s23 = (load_4(s + 60) >> 3);
- int64_t carry0;
- int64_t carry1;
- int64_t carry2;
- int64_t carry3;
- int64_t carry4;
- int64_t carry5;
- int64_t carry6;
- int64_t carry7;
- int64_t carry8;
- int64_t carry9;
- int64_t carry10;
- int64_t carry11;
- int64_t carry12;
- int64_t carry13;
- int64_t carry14;
- int64_t carry15;
- int64_t carry16;
-
- s11 += s23 * 666643;
- s12 += s23 * 470296;
- s13 += s23 * 654183;
- s14 -= s23 * 997805;
- s15 += s23 * 136657;
- s16 -= s23 * 683901;
- s23 = 0;
-
- s10 += s22 * 666643;
- s11 += s22 * 470296;
- s12 += s22 * 654183;
- s13 -= s22 * 997805;
- s14 += s22 * 136657;
- s15 -= s22 * 683901;
- s22 = 0;
-
- s9 += s21 * 666643;
- s10 += s21 * 470296;
- s11 += s21 * 654183;
- s12 -= s21 * 997805;
- s13 += s21 * 136657;
- s14 -= s21 * 683901;
- s21 = 0;
-
- s8 += s20 * 666643;
- s9 += s20 * 470296;
- s10 += s20 * 654183;
- s11 -= s20 * 997805;
- s12 += s20 * 136657;
- s13 -= s20 * 683901;
- s20 = 0;
-
- s7 += s19 * 666643;
- s8 += s19 * 470296;
- s9 += s19 * 654183;
- s10 -= s19 * 997805;
- s11 += s19 * 136657;
- s12 -= s19 * 683901;
- s19 = 0;
-
- s6 += s18 * 666643;
- s7 += s18 * 470296;
- s8 += s18 * 654183;
- s9 -= s18 * 997805;
- s10 += s18 * 136657;
- s11 -= s18 * 683901;
- s18 = 0;
-
- carry6 = (s6 + (1<<20)) >> 21; s7 += carry6; s6 -= carry6 << 21;
- carry8 = (s8 + (1<<20)) >> 21; s9 += carry8; s8 -= carry8 << 21;
- carry10 = (s10 + (1<<20)) >> 21; s11 += carry10; s10 -= carry10 << 21;
- carry12 = (s12 + (1<<20)) >> 21; s13 += carry12; s12 -= carry12 << 21;
- carry14 = (s14 + (1<<20)) >> 21; s15 += carry14; s14 -= carry14 << 21;
- carry16 = (s16 + (1<<20)) >> 21; s17 += carry16; s16 -= carry16 << 21;
-
- carry7 = (s7 + (1<<20)) >> 21; s8 += carry7; s7 -= carry7 << 21;
- carry9 = (s9 + (1<<20)) >> 21; s10 += carry9; s9 -= carry9 << 21;
- carry11 = (s11 + (1<<20)) >> 21; s12 += carry11; s11 -= carry11 << 21;
- carry13 = (s13 + (1<<20)) >> 21; s14 += carry13; s13 -= carry13 << 21;
- carry15 = (s15 + (1<<20)) >> 21; s16 += carry15; s15 -= carry15 << 21;
-
- s5 += s17 * 666643;
- s6 += s17 * 470296;
- s7 += s17 * 654183;
- s8 -= s17 * 997805;
- s9 += s17 * 136657;
- s10 -= s17 * 683901;
- s17 = 0;
-
- s4 += s16 * 666643;
- s5 += s16 * 470296;
- s6 += s16 * 654183;
- s7 -= s16 * 997805;
- s8 += s16 * 136657;
- s9 -= s16 * 683901;
- s16 = 0;
-
- s3 += s15 * 666643;
- s4 += s15 * 470296;
- s5 += s15 * 654183;
- s6 -= s15 * 997805;
- s7 += s15 * 136657;
- s8 -= s15 * 683901;
- s15 = 0;
-
- s2 += s14 * 666643;
- s3 += s14 * 470296;
- s4 += s14 * 654183;
- s5 -= s14 * 997805;
- s6 += s14 * 136657;
- s7 -= s14 * 683901;
- s14 = 0;
-
- s1 += s13 * 666643;
- s2 += s13 * 470296;
- s3 += s13 * 654183;
- s4 -= s13 * 997805;
- s5 += s13 * 136657;
- s6 -= s13 * 683901;
- s13 = 0;
-
- s0 += s12 * 666643;
- s1 += s12 * 470296;
- s2 += s12 * 654183;
- s3 -= s12 * 997805;
- s4 += s12 * 136657;
- s5 -= s12 * 683901;
- s12 = 0;
-
- carry0 = (s0 + (1<<20)) >> 21; s1 += carry0; s0 -= carry0 << 21;
- carry2 = (s2 + (1<<20)) >> 21; s3 += carry2; s2 -= carry2 << 21;
- carry4 = (s4 + (1<<20)) >> 21; s5 += carry4; s4 -= carry4 << 21;
- carry6 = (s6 + (1<<20)) >> 21; s7 += carry6; s6 -= carry6 << 21;
- carry8 = (s8 + (1<<20)) >> 21; s9 += carry8; s8 -= carry8 << 21;
- carry10 = (s10 + (1<<20)) >> 21; s11 += carry10; s10 -= carry10 << 21;
-
- carry1 = (s1 + (1<<20)) >> 21; s2 += carry1; s1 -= carry1 << 21;
- carry3 = (s3 + (1<<20)) >> 21; s4 += carry3; s3 -= carry3 << 21;
- carry5 = (s5 + (1<<20)) >> 21; s6 += carry5; s5 -= carry5 << 21;
- carry7 = (s7 + (1<<20)) >> 21; s8 += carry7; s7 -= carry7 << 21;
- carry9 = (s9 + (1<<20)) >> 21; s10 += carry9; s9 -= carry9 << 21;
- carry11 = (s11 + (1<<20)) >> 21; s12 += carry11; s11 -= carry11 << 21;
-
- s0 += s12 * 666643;
- s1 += s12 * 470296;
- s2 += s12 * 654183;
- s3 -= s12 * 997805;
- s4 += s12 * 136657;
- s5 -= s12 * 683901;
- s12 = 0;
-
- carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 << 21;
- carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 << 21;
- carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 << 21;
- carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 << 21;
- carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 << 21;
- carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 << 21;
- carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 << 21;
- carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 << 21;
- carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 << 21;
- carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 << 21;
- carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 << 21;
- carry11 = s11 >> 21; s12 += carry11; s11 -= carry11 << 21;
-
- s0 += s12 * 666643;
- s1 += s12 * 470296;
- s2 += s12 * 654183;
- s3 -= s12 * 997805;
- s4 += s12 * 136657;
- s5 -= s12 * 683901;
- s12 = 0;
-
- carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 << 21;
- carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 << 21;
- carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 << 21;
- carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 << 21;
- carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 << 21;
- carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 << 21;
- carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 << 21;
- carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 << 21;
- carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 << 21;
- carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 << 21;
- carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 << 21;
-
- s[0] = s0 >> 0;
- s[1] = s0 >> 8;
- s[2] = (s0 >> 16) | (s1 << 5);
- s[3] = s1 >> 3;
- s[4] = s1 >> 11;
- s[5] = (s1 >> 19) | (s2 << 2);
- s[6] = s2 >> 6;
- s[7] = (s2 >> 14) | (s3 << 7);
- s[8] = s3 >> 1;
- s[9] = s3 >> 9;
- s[10] = (s3 >> 17) | (s4 << 4);
- s[11] = s4 >> 4;
- s[12] = s4 >> 12;
- s[13] = (s4 >> 20) | (s5 << 1);
- s[14] = s5 >> 7;
- s[15] = (s5 >> 15) | (s6 << 6);
- s[16] = s6 >> 2;
- s[17] = s6 >> 10;
- s[18] = (s6 >> 18) | (s7 << 3);
- s[19] = s7 >> 5;
- s[20] = s7 >> 13;
- s[21] = s8 >> 0;
- s[22] = s8 >> 8;
- s[23] = (s8 >> 16) | (s9 << 5);
- s[24] = s9 >> 3;
- s[25] = s9 >> 11;
- s[26] = (s9 >> 19) | (s10 << 2);
- s[27] = s10 >> 6;
- s[28] = (s10 >> 14) | (s11 << 7);
- s[29] = s11 >> 1;
- s[30] = s11 >> 9;
- s[31] = s11 >> 17;
-
- /* hush warnings after setting values to 0 */
- (void)s12;
- (void)s13;
- (void)s14;
- (void)s15;
- (void)s16;
- (void)s17;
- (void)s18;
- (void)s19;
- (void)s20;
- (void)s21;
- (void)s22;
- (void)s23;
+ int64_t t[24];
+ int64_t carry;
+
+ t[ 0] = MASK_21 & (load_3(s + 0) >> 0);
+ t[ 1] = MASK_21 & (load_4(s + 2) >> 5);
+ t[ 2] = MASK_21 & (load_3(s + 5) >> 2);
+ t[ 3] = MASK_21 & (load_4(s + 7) >> 7);
+ t[ 4] = MASK_21 & (load_4(s + 10) >> 4);
+ t[ 5] = MASK_21 & (load_3(s + 13) >> 1);
+ t[ 6] = MASK_21 & (load_4(s + 15) >> 6);
+ t[ 7] = MASK_21 & (load_3(s + 18) >> 3);
+ t[ 8] = MASK_21 & (load_3(s + 21) >> 0);
+ t[ 9] = MASK_21 & (load_4(s + 23) >> 5);
+ t[10] = MASK_21 & (load_3(s + 26) >> 2);
+ t[11] = MASK_21 & (load_4(s + 28) >> 7);
+ t[12] = MASK_21 & (load_4(s + 31) >> 4);
+ t[13] = MASK_21 & (load_3(s + 34) >> 1);
+ t[14] = MASK_21 & (load_4(s + 36) >> 6);
+ t[15] = MASK_21 & (load_3(s + 39) >> 3);
+ t[16] = MASK_21 & (load_3(s + 42) >> 0);
+ t[17] = MASK_21 & (load_4(s + 44) >> 5);
+ t[18] = MASK_21 & (load_3(s + 47) >> 2);
+ t[19] = MASK_21 & (load_4(s + 49) >> 7);
+ t[20] = MASK_21 & (load_4(s + 52) >> 4);
+ t[21] = MASK_21 & (load_3(s + 55) >> 1);
+ t[22] = MASK_21 & (load_4(s + 57) >> 6);
+ t[23] = (load_4(s + 60) >> 3);
+
+ t[11] -= t[23] * ORDER_0;
+ t[12] -= t[23] * ORDER_1;
+ t[13] -= t[23] * ORDER_2;
+ t[14] -= t[23] * ORDER_3;
+ t[15] -= t[23] * ORDER_4;
+ t[16] -= t[23] * ORDER_5;
+
+ t[10] -= t[22] * ORDER_0;
+ t[11] -= t[22] * ORDER_1;
+ t[12] -= t[22] * ORDER_2;
+ t[13] -= t[22] * ORDER_3;
+ t[14] -= t[22] * ORDER_4;
+ t[15] -= t[22] * ORDER_5;
+
+ t[ 9] -= t[21] * ORDER_0;
+ t[10] -= t[21] * ORDER_1;
+ t[11] -= t[21] * ORDER_2;
+ t[12] -= t[21] * ORDER_3;
+ t[13] -= t[21] * ORDER_4;
+ t[14] -= t[21] * ORDER_5;
+
+ t[ 8] -= t[20] * ORDER_0;
+ t[ 9] -= t[20] * ORDER_1;
+ t[10] -= t[20] * ORDER_2;
+ t[11] -= t[20] * ORDER_3;
+ t[12] -= t[20] * ORDER_4;
+ t[13] -= t[20] * ORDER_5;
+
+ t[ 7] -= t[19] * ORDER_0;
+ t[ 8] -= t[19] * ORDER_1;
+ t[ 9] -= t[19] * ORDER_2;
+ t[10] -= t[19] * ORDER_3;
+ t[11] -= t[19] * ORDER_4;
+ t[12] -= t[19] * ORDER_5;
+
+ t[ 6] -= t[18] * ORDER_0;
+ t[ 7] -= t[18] * ORDER_1;
+ t[ 8] -= t[18] * ORDER_2;
+ t[ 9] -= t[18] * ORDER_3;
+ t[10] -= t[18] * ORDER_4;
+ t[11] -= t[18] * ORDER_5;
+
+ carry = t[ 6] >> 21; t[ 7] += carry; t[ 6] &= MASK_21;
+ carry = t[ 8] >> 21; t[ 9] += carry; t[ 8] &= MASK_21;
+ carry = t[10] >> 21; t[11] += carry; t[10] &= MASK_21;
+ carry = t[12] >> 21; t[13] += carry; t[12] &= MASK_21;
+ carry = t[14] >> 21; t[15] += carry; t[14] &= MASK_21;
+ carry = t[16] >> 21; t[17] += carry; t[16] &= MASK_21;
+ carry = t[ 7] >> 21; t[ 8] += carry; t[ 7] &= MASK_21;
+ carry = t[ 9] >> 21; t[10] += carry; t[ 9] &= MASK_21;
+ carry = t[11] >> 21; t[12] += carry; t[11] &= MASK_21;
+ carry = t[13] >> 21; t[14] += carry; t[13] &= MASK_21;
+ carry = t[15] >> 21; t[16] += carry; t[15] &= MASK_21;
+
+ t[ 5] -= t[17] * ORDER_0;
+ t[ 6] -= t[17] * ORDER_1;
+ t[ 7] -= t[17] * ORDER_2;
+ t[ 8] -= t[17] * ORDER_3;
+ t[ 9] -= t[17] * ORDER_4;
+ t[10] -= t[17] * ORDER_5;
+
+ t[ 4] -= t[16] * ORDER_0;
+ t[ 5] -= t[16] * ORDER_1;
+ t[ 6] -= t[16] * ORDER_2;
+ t[ 7] -= t[16] * ORDER_3;
+ t[ 8] -= t[16] * ORDER_4;
+ t[ 9] -= t[16] * ORDER_5;
+
+ t[ 3] -= t[15] * ORDER_0;
+ t[ 4] -= t[15] * ORDER_1;
+ t[ 5] -= t[15] * ORDER_2;
+ t[ 6] -= t[15] * ORDER_3;
+ t[ 7] -= t[15] * ORDER_4;
+ t[ 8] -= t[15] * ORDER_5;
+
+ t[ 2] -= t[14] * ORDER_0;
+ t[ 3] -= t[14] * ORDER_1;
+ t[ 4] -= t[14] * ORDER_2;
+ t[ 5] -= t[14] * ORDER_3;
+ t[ 6] -= t[14] * ORDER_4;
+ t[ 7] -= t[14] * ORDER_5;
+
+ t[ 1] -= t[13] * ORDER_0;
+ t[ 2] -= t[13] * ORDER_1;
+ t[ 3] -= t[13] * ORDER_2;
+ t[ 4] -= t[13] * ORDER_3;
+ t[ 5] -= t[13] * ORDER_4;
+ t[ 6] -= t[13] * ORDER_5;
+
+ t[ 0] -= t[12] * ORDER_0;
+ t[ 1] -= t[12] * ORDER_1;
+ t[ 2] -= t[12] * ORDER_2;
+ t[ 3] -= t[12] * ORDER_3;
+ t[ 4] -= t[12] * ORDER_4;
+ t[ 5] -= t[12] * ORDER_5;
+ t[12] = 0;
+
+ carry = t[ 0] >> 21; t[ 1] += carry; t[ 0] &= MASK_21;
+ carry = t[ 1] >> 21; t[ 2] += carry; t[ 1] &= MASK_21;
+ carry = t[ 2] >> 21; t[ 3] += carry; t[ 2] &= MASK_21;
+ carry = t[ 3] >> 21; t[ 4] += carry; t[ 3] &= MASK_21;
+ carry = t[ 4] >> 21; t[ 5] += carry; t[ 4] &= MASK_21;
+ carry = t[ 5] >> 21; t[ 6] += carry; t[ 5] &= MASK_21;
+ carry = t[ 6] >> 21; t[ 7] += carry; t[ 6] &= MASK_21;
+ carry = t[ 7] >> 21; t[ 8] += carry; t[ 7] &= MASK_21;
+ carry = t[ 8] >> 21; t[ 9] += carry; t[ 8] &= MASK_21;
+ carry = t[ 9] >> 21; t[10] += carry; t[ 9] &= MASK_21;
+ carry = t[10] >> 21; t[11] += carry; t[10] &= MASK_21;
+ carry = t[11] >> 21; t[12] += carry; t[11] &= MASK_21;
+
+ t[ 0] -= t[12] * ORDER_0;
+ t[ 1] -= t[12] * ORDER_1;
+ t[ 2] -= t[12] * ORDER_2;
+ t[ 3] -= t[12] * ORDER_3;
+ t[ 4] -= t[12] * ORDER_4;
+ t[ 5] -= t[12] * ORDER_5;
+
+ carry = t[ 0] >> 21; t[ 1] += carry; t[ 0] &= MASK_21;
+ carry = t[ 1] >> 21; t[ 2] += carry; t[ 1] &= MASK_21;
+ carry = t[ 2] >> 21; t[ 3] += carry; t[ 2] &= MASK_21;
+ carry = t[ 3] >> 21; t[ 4] += carry; t[ 3] &= MASK_21;
+ carry = t[ 4] >> 21; t[ 5] += carry; t[ 4] &= MASK_21;
+ carry = t[ 5] >> 21; t[ 6] += carry; t[ 5] &= MASK_21;
+ carry = t[ 6] >> 21; t[ 7] += carry; t[ 6] &= MASK_21;
+ carry = t[ 7] >> 21; t[ 8] += carry; t[ 7] &= MASK_21;
+ carry = t[ 8] >> 21; t[ 9] += carry; t[ 8] &= MASK_21;
+ carry = t[ 9] >> 21; t[10] += carry; t[ 9] &= MASK_21;
+ carry = t[10] >> 21; t[11] += carry; t[10] &= MASK_21;
+
+ s[ 0] = (byte)(t[ 0] >> 0);
+ s[ 1] = (byte)(t[ 0] >> 8);
+ s[ 2] = (byte)((t[ 0] >> 16) | (t[ 1] << 5));
+ s[ 3] = (byte)(t[ 1] >> 3);
+ s[ 4] = (byte)(t[ 1] >> 11);
+ s[ 5] = (byte)((t[ 1] >> 19) | (t[ 2] << 2));
+ s[ 6] = (byte)(t[ 2] >> 6);
+ s[ 7] = (byte)((t[ 2] >> 14) | (t[ 3] << 7));
+ s[ 8] = (byte)(t[ 3] >> 1);
+ s[ 9] = (byte)(t[ 3] >> 9);
+ s[10] = (byte)((t[ 3] >> 17) | (t[ 4] << 4));
+ s[11] = (byte)(t[ 4] >> 4);
+ s[12] = (byte)(t[ 4] >> 12);
+ s[13] = (byte)((t[ 4] >> 20) | (t[ 5] << 1));
+ s[14] = (byte)(t[ 5] >> 7);
+ s[15] = (byte)((t[ 5] >> 15) | (t[ 6] << 6));
+ s[16] = (byte)(t[ 6] >> 2);
+ s[17] = (byte)(t[ 6] >> 10);
+ s[18] = (byte)((t[ 6] >> 18) | (t[ 7] << 3));
+ s[19] = (byte)(t[ 7] >> 5);
+ s[20] = (byte)(t[ 7] >> 13);
+ s[21] = (byte)(t[ 8] >> 0);
+ s[22] = (byte)(t[ 8] >> 8);
+ s[23] = (byte)((t[ 8] >> 16) | (t[ 9] << 5));
+ s[24] = (byte)(t[ 9] >> 3);
+ s[25] = (byte)(t[ 9] >> 11);
+ s[26] = (byte)((t[ 9] >> 19) | (t[10] << 2));
+ s[27] = (byte)(t[10] >> 6);
+ s[28] = (byte)((t[10] >> 14) | (t[11] << 7));
+ s[29] = (byte)(t[11] >> 1);
+ s[30] = (byte)(t[11] >> 9);
+ s[31] = (byte)(t[11] >> 17);
}
-
/*
Input:
a[0]+256*a[1]+...+256^31*a[31] = a
@@ -331,365 +331,611 @@ Output:
*/
void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
{
- int64_t a0 = 2097151 & load_3(a);
- int64_t a1 = 2097151 & (load_4(a + 2) >> 5);
- int64_t a2 = 2097151 & (load_3(a + 5) >> 2);
- int64_t a3 = 2097151 & (load_4(a + 7) >> 7);
- int64_t a4 = 2097151 & (load_4(a + 10) >> 4);
- int64_t a5 = 2097151 & (load_3(a + 13) >> 1);
- int64_t a6 = 2097151 & (load_4(a + 15) >> 6);
- int64_t a7 = 2097151 & (load_3(a + 18) >> 3);
- int64_t a8 = 2097151 & load_3(a + 21);
- int64_t a9 = 2097151 & (load_4(a + 23) >> 5);
- int64_t a10 = 2097151 & (load_3(a + 26) >> 2);
- int64_t a11 = (load_4(a + 28) >> 7);
- int64_t b0 = 2097151 & load_3(b);
- int64_t b1 = 2097151 & (load_4(b + 2) >> 5);
- int64_t b2 = 2097151 & (load_3(b + 5) >> 2);
- int64_t b3 = 2097151 & (load_4(b + 7) >> 7);
- int64_t b4 = 2097151 & (load_4(b + 10) >> 4);
- int64_t b5 = 2097151 & (load_3(b + 13) >> 1);
- int64_t b6 = 2097151 & (load_4(b + 15) >> 6);
- int64_t b7 = 2097151 & (load_3(b + 18) >> 3);
- int64_t b8 = 2097151 & load_3(b + 21);
- int64_t b9 = 2097151 & (load_4(b + 23) >> 5);
- int64_t b10 = 2097151 & (load_3(b + 26) >> 2);
- int64_t b11 = (load_4(b + 28) >> 7);
- int64_t c0 = 2097151 & load_3(c);
- int64_t c1 = 2097151 & (load_4(c + 2) >> 5);
- int64_t c2 = 2097151 & (load_3(c + 5) >> 2);
- int64_t c3 = 2097151 & (load_4(c + 7) >> 7);
- int64_t c4 = 2097151 & (load_4(c + 10) >> 4);
- int64_t c5 = 2097151 & (load_3(c + 13) >> 1);
- int64_t c6 = 2097151 & (load_4(c + 15) >> 6);
- int64_t c7 = 2097151 & (load_3(c + 18) >> 3);
- int64_t c8 = 2097151 & load_3(c + 21);
- int64_t c9 = 2097151 & (load_4(c + 23) >> 5);
- int64_t c10 = 2097151 & (load_3(c + 26) >> 2);
- int64_t c11 = (load_4(c + 28) >> 7);
- int64_t s0;
- int64_t s1;
- int64_t s2;
- int64_t s3;
- int64_t s4;
- int64_t s5;
- int64_t s6;
- int64_t s7;
- int64_t s8;
- int64_t s9;
- int64_t s10;
- int64_t s11;
- int64_t s12;
- int64_t s13;
- int64_t s14;
- int64_t s15;
- int64_t s16;
- int64_t s17;
- int64_t s18;
- int64_t s19;
- int64_t s20;
- int64_t s21;
- int64_t s22;
- int64_t s23;
- int64_t carry0;
- int64_t carry1;
- int64_t carry2;
- int64_t carry3;
- int64_t carry4;
- int64_t carry5;
- int64_t carry6;
- int64_t carry7;
- int64_t carry8;
- int64_t carry9;
- int64_t carry10;
- int64_t carry11;
- int64_t carry12;
- int64_t carry13;
- int64_t carry14;
- int64_t carry15;
- int64_t carry16;
- int64_t carry17;
- int64_t carry18;
- int64_t carry19;
- int64_t carry20;
- int64_t carry21;
- int64_t carry22;
-
- s0 = c0 + a0*b0;
- s1 = c1 + a0*b1 + a1*b0;
- s2 = c2 + a0*b2 + a1*b1 + a2*b0;
- s3 = c3 + a0*b3 + a1*b2 + a2*b1 + a3*b0;
- s4 = c4 + a0*b4 + a1*b3 + a2*b2 + a3*b1 + a4*b0;
- s5 = c5 + a0*b5 + a1*b4 + a2*b3 + a3*b2 + a4*b1 + a5*b0;
- s6 = c6 + a0*b6 + a1*b5 + a2*b4 + a3*b3 + a4*b2 + a5*b1 + a6*b0;
- s7 = c7 + a0*b7 + a1*b6 + a2*b5 + a3*b4 + a4*b3 + a5*b2 + a6*b1 + a7*b0;
- s8 = c8 + a0*b8 + a1*b7 + a2*b6 + a3*b5 + a4*b4 + a5*b3 + a6*b2 + a7*b1
- + a8*b0;
- s9 = c9 + a0*b9 + a1*b8 + a2*b7 + a3*b6 + a4*b5 + a5*b4 + a6*b3 + a7*b2
- + a8*b1 + a9*b0;
- s10 = c10 + a0*b10 + a1*b9 + a2*b8 + a3*b7 + a4*b6 + a5*b5 + a6*b4 + a7*b3
- + a8*b2 + a9*b1 + a10*b0;
- s11 = c11 + a0*b11 + a1*b10 + a2*b9 + a3*b8 + a4*b7 + a5*b6 + a6*b5 + a7*b4
- + a8*b3 + a9*b2 + a10*b1 + a11*b0;
- s12 = a1*b11 + a2*b10 + a3*b9 + a4*b8 + a5*b7 + a6*b6 + a7*b5 + a8*b4 + a9*b3
- + a10*b2 + a11*b1;
- s13 = a2*b11 + a3*b10 + a4*b9 + a5*b8 + a6*b7 + a7*b6 + a8*b5 + a9*b4 + a10*b3
- + a11*b2;
- s14 = a3*b11 + a4*b10 + a5*b9 + a6*b8 + a7*b7 + a8*b6 + a9*b5 + a10*b4
- + a11*b3;
- s15 = a4*b11 + a5*b10 + a6*b9 + a7*b8 + a8*b7 + a9*b6 + a10*b5 + a11*b4;
- s16 = a5*b11 + a6*b10 + a7*b9 + a8*b8 + a9*b7 + a10*b6 + a11*b5;
- s17 = a6*b11 + a7*b10 + a8*b9 + a9*b8 + a10*b7 + a11*b6;
- s18 = a7*b11 + a8*b10 + a9*b9 + a10*b8 + a11*b7;
- s19 = a8*b11 + a9*b10 + a10*b9 + a11*b8;
- s20 = a9*b11 + a10*b10 + a11*b9;
- s21 = a10*b11 + a11*b10;
- s22 = a11*b11;
- s23 = 0;
-
- carry0 = (s0 + (1<<20)) >> 21; s1 += carry0; s0 -= carry0 << 21;
- carry2 = (s2 + (1<<20)) >> 21; s3 += carry2; s2 -= carry2 << 21;
- carry4 = (s4 + (1<<20)) >> 21; s5 += carry4; s4 -= carry4 << 21;
- carry6 = (s6 + (1<<20)) >> 21; s7 += carry6; s6 -= carry6 << 21;
- carry8 = (s8 + (1<<20)) >> 21; s9 += carry8; s8 -= carry8 << 21;
- carry10 = (s10 + (1<<20)) >> 21; s11 += carry10; s10 -= carry10 << 21;
- carry12 = (s12 + (1<<20)) >> 21; s13 += carry12; s12 -= carry12 << 21;
- carry14 = (s14 + (1<<20)) >> 21; s15 += carry14; s14 -= carry14 << 21;
- carry16 = (s16 + (1<<20)) >> 21; s17 += carry16; s16 -= carry16 << 21;
- carry18 = (s18 + (1<<20)) >> 21; s19 += carry18; s18 -= carry18 << 21;
- carry20 = (s20 + (1<<20)) >> 21; s21 += carry20; s20 -= carry20 << 21;
- carry22 = (s22 + (1<<20)) >> 21; s23 += carry22; s22 -= carry22 << 21;
-
- carry1 = (s1 + (1<<20)) >> 21; s2 += carry1; s1 -= carry1 << 21;
- carry3 = (s3 + (1<<20)) >> 21; s4 += carry3; s3 -= carry3 << 21;
- carry5 = (s5 + (1<<20)) >> 21; s6 += carry5; s5 -= carry5 << 21;
- carry7 = (s7 + (1<<20)) >> 21; s8 += carry7; s7 -= carry7 << 21;
- carry9 = (s9 + (1<<20)) >> 21; s10 += carry9; s9 -= carry9 << 21;
- carry11 = (s11 + (1<<20)) >> 21; s12 += carry11; s11 -= carry11 << 21;
- carry13 = (s13 + (1<<20)) >> 21; s14 += carry13; s13 -= carry13 << 21;
- carry15 = (s15 + (1<<20)) >> 21; s16 += carry15; s15 -= carry15 << 21;
- carry17 = (s17 + (1<<20)) >> 21; s18 += carry17; s17 -= carry17 << 21;
- carry19 = (s19 + (1<<20)) >> 21; s20 += carry19; s19 -= carry19 << 21;
- carry21 = (s21 + (1<<20)) >> 21; s22 += carry21; s21 -= carry21 << 21;
-
- s11 += s23 * 666643;
- s12 += s23 * 470296;
- s13 += s23 * 654183;
- s14 -= s23 * 997805;
- s15 += s23 * 136657;
- s16 -= s23 * 683901;
- s23 = 0;
-
- s10 += s22 * 666643;
- s11 += s22 * 470296;
- s12 += s22 * 654183;
- s13 -= s22 * 997805;
- s14 += s22 * 136657;
- s15 -= s22 * 683901;
- s22 = 0;
-
- s9 += s21 * 666643;
- s10 += s21 * 470296;
- s11 += s21 * 654183;
- s12 -= s21 * 997805;
- s13 += s21 * 136657;
- s14 -= s21 * 683901;
- s21 = 0;
-
- s8 += s20 * 666643;
- s9 += s20 * 470296;
- s10 += s20 * 654183;
- s11 -= s20 * 997805;
- s12 += s20 * 136657;
- s13 -= s20 * 683901;
- s20 = 0;
-
- s7 += s19 * 666643;
- s8 += s19 * 470296;
- s9 += s19 * 654183;
- s10 -= s19 * 997805;
- s11 += s19 * 136657;
- s12 -= s19 * 683901;
- s19 = 0;
-
- s6 += s18 * 666643;
- s7 += s18 * 470296;
- s8 += s18 * 654183;
- s9 -= s18 * 997805;
- s10 += s18 * 136657;
- s11 -= s18 * 683901;
- s18 = 0;
-
- carry6 = (s6 + (1<<20)) >> 21; s7 += carry6; s6 -= carry6 << 21;
- carry8 = (s8 + (1<<20)) >> 21; s9 += carry8; s8 -= carry8 << 21;
- carry10 = (s10 + (1<<20)) >> 21; s11 += carry10; s10 -= carry10 << 21;
- carry12 = (s12 + (1<<20)) >> 21; s13 += carry12; s12 -= carry12 << 21;
- carry14 = (s14 + (1<<20)) >> 21; s15 += carry14; s14 -= carry14 << 21;
- carry16 = (s16 + (1<<20)) >> 21; s17 += carry16; s16 -= carry16 << 21;
-
- carry7 = (s7 + (1<<20)) >> 21; s8 += carry7; s7 -= carry7 << 21;
- carry9 = (s9 + (1<<20)) >> 21; s10 += carry9; s9 -= carry9 << 21;
- carry11 = (s11 + (1<<20)) >> 21; s12 += carry11; s11 -= carry11 << 21;
- carry13 = (s13 + (1<<20)) >> 21; s14 += carry13; s13 -= carry13 << 21;
- carry15 = (s15 + (1<<20)) >> 21; s16 += carry15; s15 -= carry15 << 21;
-
- s5 += s17 * 666643;
- s6 += s17 * 470296;
- s7 += s17 * 654183;
- s8 -= s17 * 997805;
- s9 += s17 * 136657;
- s10 -= s17 * 683901;
- s17 = 0;
-
- s4 += s16 * 666643;
- s5 += s16 * 470296;
- s6 += s16 * 654183;
- s7 -= s16 * 997805;
- s8 += s16 * 136657;
- s9 -= s16 * 683901;
- s16 = 0;
-
- s3 += s15 * 666643;
- s4 += s15 * 470296;
- s5 += s15 * 654183;
- s6 -= s15 * 997805;
- s7 += s15 * 136657;
- s8 -= s15 * 683901;
- s15 = 0;
-
- s2 += s14 * 666643;
- s3 += s14 * 470296;
- s4 += s14 * 654183;
- s5 -= s14 * 997805;
- s6 += s14 * 136657;
- s7 -= s14 * 683901;
- s14 = 0;
-
- s1 += s13 * 666643;
- s2 += s13 * 470296;
- s3 += s13 * 654183;
- s4 -= s13 * 997805;
- s5 += s13 * 136657;
- s6 -= s13 * 683901;
- s13 = 0;
-
- s0 += s12 * 666643;
- s1 += s12 * 470296;
- s2 += s12 * 654183;
- s3 -= s12 * 997805;
- s4 += s12 * 136657;
- s5 -= s12 * 683901;
- s12 = 0;
-
- carry0 = (s0 + (1<<20)) >> 21; s1 += carry0; s0 -= carry0 << 21;
- carry2 = (s2 + (1<<20)) >> 21; s3 += carry2; s2 -= carry2 << 21;
- carry4 = (s4 + (1<<20)) >> 21; s5 += carry4; s4 -= carry4 << 21;
- carry6 = (s6 + (1<<20)) >> 21; s7 += carry6; s6 -= carry6 << 21;
- carry8 = (s8 + (1<<20)) >> 21; s9 += carry8; s8 -= carry8 << 21;
- carry10 = (s10 + (1<<20)) >> 21; s11 += carry10; s10 -= carry10 << 21;
-
- carry1 = (s1 + (1<<20)) >> 21; s2 += carry1; s1 -= carry1 << 21;
- carry3 = (s3 + (1<<20)) >> 21; s4 += carry3; s3 -= carry3 << 21;
- carry5 = (s5 + (1<<20)) >> 21; s6 += carry5; s5 -= carry5 << 21;
- carry7 = (s7 + (1<<20)) >> 21; s8 += carry7; s7 -= carry7 << 21;
- carry9 = (s9 + (1<<20)) >> 21; s10 += carry9; s9 -= carry9 << 21;
- carry11 = (s11 + (1<<20)) >> 21; s12 += carry11; s11 -= carry11 << 21;
-
- s0 += s12 * 666643;
- s1 += s12 * 470296;
- s2 += s12 * 654183;
- s3 -= s12 * 997805;
- s4 += s12 * 136657;
- s5 -= s12 * 683901;
- s12 = 0;
-
- carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 << 21;
- carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 << 21;
- carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 << 21;
- carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 << 21;
- carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 << 21;
- carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 << 21;
- carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 << 21;
- carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 << 21;
- carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 << 21;
- carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 << 21;
- carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 << 21;
- carry11 = s11 >> 21; s12 += carry11; s11 -= carry11 << 21;
-
- s0 += s12 * 666643;
- s1 += s12 * 470296;
- s2 += s12 * 654183;
- s3 -= s12 * 997805;
- s4 += s12 * 136657;
- s5 -= s12 * 683901;
- s12 = 0;
-
- carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 << 21;
- carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 << 21;
- carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 << 21;
- carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 << 21;
- carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 << 21;
- carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 << 21;
- carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 << 21;
- carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 << 21;
- carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 << 21;
- carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 << 21;
- carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 << 21;
-
- s[0] = s0 >> 0;
- s[1] = s0 >> 8;
- s[2] = (s0 >> 16) | (s1 << 5);
- s[3] = s1 >> 3;
- s[4] = s1 >> 11;
- s[5] = (s1 >> 19) | (s2 << 2);
- s[6] = s2 >> 6;
- s[7] = (s2 >> 14) | (s3 << 7);
- s[8] = s3 >> 1;
- s[9] = s3 >> 9;
- s[10] = (s3 >> 17) | (s4 << 4);
- s[11] = s4 >> 4;
- s[12] = s4 >> 12;
- s[13] = (s4 >> 20) | (s5 << 1);
- s[14] = s5 >> 7;
- s[15] = (s5 >> 15) | (s6 << 6);
- s[16] = s6 >> 2;
- s[17] = s6 >> 10;
- s[18] = (s6 >> 18) | (s7 << 3);
- s[19] = s7 >> 5;
- s[20] = s7 >> 13;
- s[21] = s8 >> 0;
- s[22] = s8 >> 8;
- s[23] = (s8 >> 16) | (s9 << 5);
- s[24] = s9 >> 3;
- s[25] = s9 >> 11;
- s[26] = (s9 >> 19) | (s10 << 2);
- s[27] = s10 >> 6;
- s[28] = (s10 >> 14) | (s11 << 7);
- s[29] = s11 >> 1;
- s[30] = s11 >> 9;
- s[31] = s11 >> 17;
-
- /* hush warnings after setting values to 0 */
- (void)s12;
- (void)s13;
- (void)s14;
- (void)s15;
- (void)s16;
- (void)s17;
- (void)s18;
- (void)s19;
- (void)s20;
- (void)s21;
- (void)s22;
- (void)s23;
+ uint32_t ad[12], bd[12], cd[12];
+ int64_t t[24];
+ int64_t carry;
+
+ ad[ 0] = MASK_21 & (load_3(a + 0) >> 0);
+ ad[ 1] = MASK_21 & (load_4(a + 2) >> 5);
+ ad[ 2] = MASK_21 & (load_3(a + 5) >> 2);
+ ad[ 3] = MASK_21 & (load_4(a + 7) >> 7);
+ ad[ 4] = MASK_21 & (load_4(a + 10) >> 4);
+ ad[ 5] = MASK_21 & (load_3(a + 13) >> 1);
+ ad[ 6] = MASK_21 & (load_4(a + 15) >> 6);
+ ad[ 7] = MASK_21 & (load_3(a + 18) >> 3);
+ ad[ 8] = MASK_21 & (load_3(a + 21) >> 0);
+ ad[ 9] = MASK_21 & (load_4(a + 23) >> 5);
+ ad[10] = MASK_21 & (load_3(a + 26) >> 2);
+ ad[11] = (uint32_t)(load_4(a + 28) >> 7);
+ bd[ 0] = MASK_21 & (load_3(b + 0) >> 0);
+ bd[ 1] = MASK_21 & (load_4(b + 2) >> 5);
+ bd[ 2] = MASK_21 & (load_3(b + 5) >> 2);
+ bd[ 3] = MASK_21 & (load_4(b + 7) >> 7);
+ bd[ 4] = MASK_21 & (load_4(b + 10) >> 4);
+ bd[ 5] = MASK_21 & (load_3(b + 13) >> 1);
+ bd[ 6] = MASK_21 & (load_4(b + 15) >> 6);
+ bd[ 7] = MASK_21 & (load_3(b + 18) >> 3);
+ bd[ 8] = MASK_21 & (load_3(b + 21) >> 0);
+ bd[ 9] = MASK_21 & (load_4(b + 23) >> 5);
+ bd[10] = MASK_21 & (load_3(b + 26) >> 2);
+ bd[11] = (uint32_t)(load_4(b + 28) >> 7);
+ cd[ 0] = MASK_21 & (load_3(c + 0) >> 0);
+ cd[ 1] = MASK_21 & (load_4(c + 2) >> 5);
+ cd[ 2] = MASK_21 & (load_3(c + 5) >> 2);
+ cd[ 3] = MASK_21 & (load_4(c + 7) >> 7);
+ cd[ 4] = MASK_21 & (load_4(c + 10) >> 4);
+ cd[ 5] = MASK_21 & (load_3(c + 13) >> 1);
+ cd[ 6] = MASK_21 & (load_4(c + 15) >> 6);
+ cd[ 7] = MASK_21 & (load_3(c + 18) >> 3);
+ cd[ 8] = MASK_21 & (load_3(c + 21) >> 0);
+ cd[ 9] = MASK_21 & (load_4(c + 23) >> 5);
+ cd[10] = MASK_21 & (load_3(c + 26) >> 2);
+ cd[11] = (uint32_t)(load_4(c + 28) >> 7);
+
+ t[ 0] = cd[ 0] + (int64_t)ad[ 0] * bd[ 0];
+ t[ 1] = cd[ 1] + (int64_t)ad[ 0] * bd[ 1] + (int64_t)ad[ 1] * bd[ 0];
+ t[ 2] = cd[ 2] + (int64_t)ad[ 0] * bd[ 2] + (int64_t)ad[ 1] * bd[ 1] +
+ (int64_t)ad[ 2] * bd[ 0];
+ t[ 3] = cd[ 3] + (int64_t)ad[ 0] * bd[ 3] + (int64_t)ad[ 1] * bd[ 2] +
+ (int64_t)ad[ 2] * bd[ 1] + (int64_t)ad[ 3] * bd[ 0];
+ t[ 4] = cd[ 4] + (int64_t)ad[ 0] * bd[ 4] + (int64_t)ad[ 1] * bd[ 3] +
+ (int64_t)ad[ 2] * bd[ 2] + (int64_t)ad[ 3] * bd[ 1] +
+ (int64_t)ad[ 4] * bd[ 0];
+ t[ 5] = cd[ 5] + (int64_t)ad[ 0] * bd[ 5] + (int64_t)ad[ 1] * bd[ 4] +
+ (int64_t)ad[ 2] * bd[ 3] + (int64_t)ad[ 3] * bd[ 2] +
+ (int64_t)ad[ 4] * bd[ 1] + (int64_t)ad[ 5] * bd[ 0];
+ t[ 6] = cd[ 6] + (int64_t)ad[ 0] * bd[ 6] + (int64_t)ad[ 1] * bd[ 5] +
+ (int64_t)ad[ 2] * bd[ 4] + (int64_t)ad[ 3] * bd[ 3] +
+ (int64_t)ad[ 4] * bd[ 2] + (int64_t)ad[ 5] * bd[ 1] +
+ (int64_t)ad[ 6] * bd[ 0];
+ t[ 7] = cd[ 7] + (int64_t)ad[ 0] * bd[ 7] + (int64_t)ad[ 1] * bd[ 6] +
+ (int64_t)ad[ 2] * bd[ 5] + (int64_t)ad[ 3] * bd[ 4] +
+ (int64_t)ad[ 4] * bd[ 3] + (int64_t)ad[ 5] * bd[ 2] +
+ (int64_t)ad[ 6] * bd[ 1] + (int64_t)ad[ 7] * bd[ 0];
+ t[ 8] = cd[ 8] + (int64_t)ad[ 0] * bd[ 8] + (int64_t)ad[ 1] * bd[ 7] +
+ (int64_t)ad[ 2] * bd[ 6] + (int64_t)ad[ 3] * bd[ 5] +
+ (int64_t)ad[ 4] * bd[ 4] + (int64_t)ad[ 5] * bd[ 3] +
+ (int64_t)ad[ 6] * bd[ 2] + (int64_t)ad[ 7] * bd[ 1] +
+ (int64_t)ad[ 8] * bd[ 0];
+ t[ 9] = cd[ 9] + (int64_t)ad[ 0] * bd[ 9] + (int64_t)ad[ 1] * bd[ 8] +
+ (int64_t)ad[ 2] * bd[ 7] + (int64_t)ad[ 3] * bd[ 6] +
+ (int64_t)ad[ 4] * bd[ 5] + (int64_t)ad[ 5] * bd[ 4] +
+ (int64_t)ad[ 6] * bd[ 3] + (int64_t)ad[ 7] * bd[ 2] +
+ (int64_t)ad[ 8] * bd[ 1] + (int64_t)ad[ 9] * bd[ 0];
+ t[10] = cd[10] + (int64_t)ad[ 0] * bd[10] + (int64_t)ad[ 1] * bd[ 9] +
+ (int64_t)ad[ 2] * bd[ 8] + (int64_t)ad[ 3] * bd[ 7] +
+ (int64_t)ad[ 4] * bd[ 6] + (int64_t)ad[ 5] * bd[ 5] +
+ (int64_t)ad[ 6] * bd[ 4] + (int64_t)ad[ 7] * bd[ 3] +
+ (int64_t)ad[ 8] * bd[ 2] + (int64_t)ad[ 9] * bd[ 1] +
+ (int64_t)ad[10] * bd[ 0];
+ t[11] = cd[11] + (int64_t)ad[ 0] * bd[11] + (int64_t)ad[ 1] * bd[10] +
+ (int64_t)ad[ 2] * bd[ 9] + (int64_t)ad[ 3] * bd[ 8] +
+ (int64_t)ad[ 4] * bd[ 7] + (int64_t)ad[ 5] * bd[ 6] +
+ (int64_t)ad[ 6] * bd[ 5] + (int64_t)ad[ 7] * bd[ 4] +
+ (int64_t)ad[ 8] * bd[ 3] + (int64_t)ad[ 9] * bd[ 2] +
+ (int64_t)ad[10] * bd[ 1] + (int64_t)ad[11] * bd[ 0];
+ t[12] = (int64_t)ad[ 1] * bd[11] + (int64_t)ad[ 2] * bd[10] +
+ (int64_t)ad[ 3] * bd[ 9] + (int64_t)ad[ 4] * bd[ 8] +
+ (int64_t)ad[ 5] * bd[ 7] + (int64_t)ad[ 6] * bd[ 6] +
+ (int64_t)ad[ 7] * bd[ 5] + (int64_t)ad[ 8] * bd[ 4] +
+ (int64_t)ad[ 9] * bd[ 3] + (int64_t)ad[10] * bd[ 2] +
+ (int64_t)ad[11] * bd[ 1];
+ t[13] = (int64_t)ad[ 2] * bd[11] + (int64_t)ad[ 3] * bd[10] +
+ (int64_t)ad[ 4] * bd[ 9] + (int64_t)ad[ 5] * bd[ 8] +
+ (int64_t)ad[ 6] * bd[ 7] + (int64_t)ad[ 7] * bd[ 6] +
+ (int64_t)ad[ 8] * bd[ 5] + (int64_t)ad[ 9] * bd[ 4] +
+ (int64_t)ad[10] * bd[ 3] + (int64_t)ad[11] * bd[ 2];
+ t[14] = (int64_t)ad[ 3] * bd[11] + (int64_t)ad[ 4] * bd[10] +
+ (int64_t)ad[ 5] * bd[ 9] + (int64_t)ad[ 6] * bd[ 8] +
+ (int64_t)ad[ 7] * bd[ 7] + (int64_t)ad[ 8] * bd[ 6] +
+ (int64_t)ad[ 9] * bd[ 5] + (int64_t)ad[10] * bd[ 4] +
+ (int64_t)ad[11] * bd[ 3];
+ t[15] = (int64_t)ad[ 4] * bd[11] + (int64_t)ad[ 5] * bd[10] +
+ (int64_t)ad[ 6] * bd[ 9] + (int64_t)ad[ 7] * bd[ 8] +
+ (int64_t)ad[ 8] * bd[ 7] + (int64_t)ad[ 9] * bd[ 6] +
+ (int64_t)ad[10] * bd[ 5] + (int64_t)ad[11] * bd[ 4];
+ t[16] = (int64_t)ad[ 5] * bd[11] + (int64_t)ad[ 6] * bd[10] +
+ (int64_t)ad[ 7] * bd[ 9] + (int64_t)ad[ 8] * bd[ 8] +
+ (int64_t)ad[ 9] * bd[ 7] + (int64_t)ad[10] * bd[ 6] +
+ (int64_t)ad[11] * bd[ 5];
+ t[17] = (int64_t)ad[ 6] * bd[11] + (int64_t)ad[ 7] * bd[10] +
+ (int64_t)ad[ 8] * bd[ 9] + (int64_t)ad[ 9] * bd[ 8] +
+ (int64_t)ad[10] * bd[ 7] + (int64_t)ad[11] * bd[ 6];
+ t[18] = (int64_t)ad[ 7] * bd[11] + (int64_t)ad[ 8] * bd[10] +
+ (int64_t)ad[ 9] * bd[ 9] + (int64_t)ad[10] * bd[ 8] +
+ (int64_t)ad[11] * bd[ 7];
+ t[19] = (int64_t)ad[ 8] * bd[11] + (int64_t)ad[ 9] * bd[10] +
+ (int64_t)ad[10] * bd[ 9] + (int64_t)ad[11] * bd[ 8];
+ t[20] = (int64_t)ad[ 9] * bd[11] + (int64_t)ad[10] * bd[10] +
+ (int64_t)ad[11] * bd[ 9];
+ t[21] = (int64_t)ad[10] * bd[11] + (int64_t)ad[11] * bd[10];
+ t[22] = (int64_t)ad[11] * bd[11];
+ t[23] = 0;
+
+ carry = t[ 0] >> 21; t[ 1] += carry; t[ 0] &= MASK_21;
+ carry = t[ 2] >> 21; t[ 3] += carry; t[ 2] &= MASK_21;
+ carry = t[ 4] >> 21; t[ 5] += carry; t[ 4] &= MASK_21;
+ carry = t[ 6] >> 21; t[ 7] += carry; t[ 6] &= MASK_21;
+ carry = t[ 8] >> 21; t[ 9] += carry; t[ 8] &= MASK_21;
+ carry = t[10] >> 21; t[11] += carry; t[10] &= MASK_21;
+ carry = t[12] >> 21; t[13] += carry; t[12] &= MASK_21;
+ carry = t[14] >> 21; t[15] += carry; t[14] &= MASK_21;
+ carry = t[16] >> 21; t[17] += carry; t[16] &= MASK_21;
+ carry = t[18] >> 21; t[19] += carry; t[18] &= MASK_21;
+ carry = t[20] >> 21; t[21] += carry; t[20] &= MASK_21;
+ carry = t[22] >> 21; t[23] += carry; t[22] &= MASK_21;
+ carry = t[ 1] >> 21; t[ 2] += carry; t[ 1] &= MASK_21;
+ carry = t[ 3] >> 21; t[ 4] += carry; t[ 3] &= MASK_21;
+ carry = t[ 5] >> 21; t[ 6] += carry; t[ 5] &= MASK_21;
+ carry = t[ 7] >> 21; t[ 8] += carry; t[ 7] &= MASK_21;
+ carry = t[ 9] >> 21; t[10] += carry; t[ 9] &= MASK_21;
+ carry = t[11] >> 21; t[12] += carry; t[11] &= MASK_21;
+ carry = t[13] >> 21; t[14] += carry; t[13] &= MASK_21;
+ carry = t[15] >> 21; t[16] += carry; t[15] &= MASK_21;
+ carry = t[17] >> 21; t[18] += carry; t[17] &= MASK_21;
+ carry = t[19] >> 21; t[20] += carry; t[19] &= MASK_21;
+ carry = t[21] >> 21; t[22] += carry; t[21] &= MASK_21;
+
+ t[11] -= t[23] * ORDER_0;
+ t[12] -= t[23] * ORDER_1;
+ t[13] -= t[23] * ORDER_2;
+ t[14] -= t[23] * ORDER_3;
+ t[15] -= t[23] * ORDER_4;
+ t[16] -= t[23] * ORDER_5;
+
+ t[10] -= t[22] * ORDER_0;
+ t[11] -= t[22] * ORDER_1;
+ t[12] -= t[22] * ORDER_2;
+ t[13] -= t[22] * ORDER_3;
+ t[14] -= t[22] * ORDER_4;
+ t[15] -= t[22] * ORDER_5;
+
+ t[ 9] -= t[21] * ORDER_0;
+ t[10] -= t[21] * ORDER_1;
+ t[11] -= t[21] * ORDER_2;
+ t[12] -= t[21] * ORDER_3;
+ t[13] -= t[21] * ORDER_4;
+ t[14] -= t[21] * ORDER_5;
+
+ t[ 8] -= t[20] * ORDER_0;
+ t[ 9] -= t[20] * ORDER_1;
+ t[10] -= t[20] * ORDER_2;
+ t[11] -= t[20] * ORDER_3;
+ t[12] -= t[20] * ORDER_4;
+ t[13] -= t[20] * ORDER_5;
+
+ t[ 7] -= t[19] * ORDER_0;
+ t[ 8] -= t[19] * ORDER_1;
+ t[ 9] -= t[19] * ORDER_2;
+ t[10] -= t[19] * ORDER_3;
+ t[11] -= t[19] * ORDER_4;
+ t[12] -= t[19] * ORDER_5;
+
+ t[ 6] -= t[18] * ORDER_0;
+ t[ 7] -= t[18] * ORDER_1;
+ t[ 8] -= t[18] * ORDER_2;
+ t[ 9] -= t[18] * ORDER_3;
+ t[10] -= t[18] * ORDER_4;
+ t[11] -= t[18] * ORDER_5;
+
+ carry = t[ 6] >> 21; t[ 7] += carry; t[ 6] &= MASK_21;
+ carry = t[ 8] >> 21; t[ 9] += carry; t[ 8] &= MASK_21;
+ carry = t[10] >> 21; t[11] += carry; t[10] &= MASK_21;
+ carry = t[12] >> 21; t[13] += carry; t[12] &= MASK_21;
+ carry = t[14] >> 21; t[15] += carry; t[14] &= MASK_21;
+ carry = t[16] >> 21; t[17] += carry; t[16] &= MASK_21;
+ carry = t[ 7] >> 21; t[ 8] += carry; t[ 7] &= MASK_21;
+ carry = t[ 9] >> 21; t[10] += carry; t[ 9] &= MASK_21;
+ carry = t[11] >> 21; t[12] += carry; t[11] &= MASK_21;
+ carry = t[13] >> 21; t[14] += carry; t[13] &= MASK_21;
+ carry = t[15] >> 21; t[16] += carry; t[15] &= MASK_21;
+
+ t[ 5] -= t[17] * ORDER_0;
+ t[ 6] -= t[17] * ORDER_1;
+ t[ 7] -= t[17] * ORDER_2;
+ t[ 8] -= t[17] * ORDER_3;
+ t[ 9] -= t[17] * ORDER_4;
+ t[10] -= t[17] * ORDER_5;
+
+ t[ 4] -= t[16] * ORDER_0;
+ t[ 5] -= t[16] * ORDER_1;
+ t[ 6] -= t[16] * ORDER_2;
+ t[ 7] -= t[16] * ORDER_3;
+ t[ 8] -= t[16] * ORDER_4;
+ t[ 9] -= t[16] * ORDER_5;
+
+ t[ 3] -= t[15] * ORDER_0;
+ t[ 4] -= t[15] * ORDER_1;
+ t[ 5] -= t[15] * ORDER_2;
+ t[ 6] -= t[15] * ORDER_3;
+ t[ 7] -= t[15] * ORDER_4;
+ t[ 8] -= t[15] * ORDER_5;
+
+ t[ 2] -= t[14] * ORDER_0;
+ t[ 3] -= t[14] * ORDER_1;
+ t[ 4] -= t[14] * ORDER_2;
+ t[ 5] -= t[14] * ORDER_3;
+ t[ 6] -= t[14] * ORDER_4;
+ t[ 7] -= t[14] * ORDER_5;
+
+ t[ 1] -= t[13] * ORDER_0;
+ t[ 2] -= t[13] * ORDER_1;
+ t[ 3] -= t[13] * ORDER_2;
+ t[ 4] -= t[13] * ORDER_3;
+ t[ 5] -= t[13] * ORDER_4;
+ t[ 6] -= t[13] * ORDER_5;
+
+ t[ 0] -= t[12] * ORDER_0;
+ t[ 1] -= t[12] * ORDER_1;
+ t[ 2] -= t[12] * ORDER_2;
+ t[ 3] -= t[12] * ORDER_3;
+ t[ 4] -= t[12] * ORDER_4;
+ t[ 5] -= t[12] * ORDER_5;
+ t[12] = 0;
+
+ carry = t[ 0] >> 21; t[ 1] += carry; t[ 0] &= MASK_21;
+ carry = t[ 1] >> 21; t[ 2] += carry; t[ 1] &= MASK_21;
+ carry = t[ 2] >> 21; t[ 3] += carry; t[ 2] &= MASK_21;
+ carry = t[ 3] >> 21; t[ 4] += carry; t[ 3] &= MASK_21;
+ carry = t[ 4] >> 21; t[ 5] += carry; t[ 4] &= MASK_21;
+ carry = t[ 5] >> 21; t[ 6] += carry; t[ 5] &= MASK_21;
+ carry = t[ 6] >> 21; t[ 7] += carry; t[ 6] &= MASK_21;
+ carry = t[ 7] >> 21; t[ 8] += carry; t[ 7] &= MASK_21;
+ carry = t[ 8] >> 21; t[ 9] += carry; t[ 8] &= MASK_21;
+ carry = t[ 9] >> 21; t[10] += carry; t[ 9] &= MASK_21;
+ carry = t[10] >> 21; t[11] += carry; t[10] &= MASK_21;
+ carry = t[11] >> 21; t[12] += carry; t[11] &= MASK_21;
+
+ t[ 0] -= t[12] * ORDER_0;
+ t[ 1] -= t[12] * ORDER_1;
+ t[ 2] -= t[12] * ORDER_2;
+ t[ 3] -= t[12] * ORDER_3;
+ t[ 4] -= t[12] * ORDER_4;
+ t[ 5] -= t[12] * ORDER_5;
+
+ carry = t[ 0] >> 21; t[ 1] += carry; t[ 0] &= MASK_21;
+ carry = t[ 1] >> 21; t[ 2] += carry; t[ 1] &= MASK_21;
+ carry = t[ 2] >> 21; t[ 3] += carry; t[ 2] &= MASK_21;
+ carry = t[ 3] >> 21; t[ 4] += carry; t[ 3] &= MASK_21;
+ carry = t[ 4] >> 21; t[ 5] += carry; t[ 4] &= MASK_21;
+ carry = t[ 5] >> 21; t[ 6] += carry; t[ 5] &= MASK_21;
+ carry = t[ 6] >> 21; t[ 7] += carry; t[ 6] &= MASK_21;
+ carry = t[ 7] >> 21; t[ 8] += carry; t[ 7] &= MASK_21;
+ carry = t[ 8] >> 21; t[ 9] += carry; t[ 8] &= MASK_21;
+ carry = t[ 9] >> 21; t[10] += carry; t[ 9] &= MASK_21;
+ carry = t[10] >> 21; t[11] += carry; t[10] &= MASK_21;
+
+ s[ 0] = (byte)(t[ 0] >> 0);
+ s[ 1] = (byte)(t[ 0] >> 8);
+ s[ 2] = (byte)((t[ 0] >> 16) | (t[ 1] << 5));
+ s[ 3] = (byte)(t[ 1] >> 3);
+ s[ 4] = (byte)(t[ 1] >> 11);
+ s[ 5] = (byte)((t[ 1] >> 19) | (t[ 2] << 2));
+ s[ 6] = (byte)(t[ 2] >> 6);
+ s[ 7] = (byte)((t[ 2] >> 14) | (t[ 3] << 7));
+ s[ 8] = (byte)(t[ 3] >> 1);
+ s[ 9] = (byte)(t[ 3] >> 9);
+ s[10] = (byte)((t[ 3] >> 17) | (t[ 4] << 4));
+ s[11] = (byte)(t[ 4] >> 4);
+ s[12] = (byte)(t[ 4] >> 12);
+ s[13] = (byte)((t[ 4] >> 20) | (t[ 5] << 1));
+ s[14] = (byte)(t[ 5] >> 7);
+ s[15] = (byte)((t[ 5] >> 15) | (t[ 6] << 6));
+ s[16] = (byte)(t[ 6] >> 2);
+ s[17] = (byte)(t[ 6] >> 10);
+ s[18] = (byte)((t[ 6] >> 18) | (t[ 7] << 3));
+ s[19] = (byte)(t[ 7] >> 5);
+ s[20] = (byte)(t[ 7] >> 13);
+ s[21] = (byte)(t[ 8] >> 0);
+ s[22] = (byte)(t[ 8] >> 8);
+ s[23] = (byte)((t[ 8] >> 16) | (t[ 9] << 5));
+ s[24] = (byte)(t[ 9] >> 3);
+ s[25] = (byte)(t[ 9] >> 11);
+ s[26] = (byte)((t[ 9] >> 19) | (t[10] << 2));
+ s[27] = (byte)(t[10] >> 6);
+ s[28] = (byte)((t[10] >> 14) | (t[11] << 7));
+ s[29] = (byte)(t[11] >> 1);
+ s[30] = (byte)(t[11] >> 9);
+ s[31] = (byte)(t[11] >> 17);
+}
+#else
+static uint64_t load_6(const byte* a)
+{
+ uint64_t n;
+ n = ((uint64_t)a[0] << 0) |
+ ((uint64_t)a[1] << 8) |
+ ((uint64_t)a[2] << 16) |
+ ((uint64_t)a[3] << 24) |
+ ((uint64_t)a[4] << 32) |
+ ((uint64_t)a[5] << 40);
+ return n;
+}
+
+static uint64_t load_7(const byte* a)
+{
+ uint64_t n;
+ n = ((uint64_t)a[0] << 0) |
+ ((uint64_t)a[1] << 8) |
+ ((uint64_t)a[2] << 16) |
+ ((uint64_t)a[3] << 24) |
+ ((uint64_t)a[4] << 32) |
+ ((uint64_t)a[5] << 40) |
+ ((uint64_t)a[6] << 48);
+ return n;
}
+#define MASK_42 0x3ffffffffffl
+#define ORDER_0 0x31a5cf5d3edl
+#define ORDER_1 0x1e735960498l
+#define ORDER_2 0x14def9dea2fl
+
+/*
+Input:
+ s[0]+256*s[1]+...+256^63*s[63] = s
+
+Output:
+ s[0]+256*s[1]+...+256^31*s[31] = s mod l
+ where l = 2^252 + 27742317777372353535851937790883648493.
+ Overwrites s in place.
+*/
+void sc_reduce(byte* s)
+{
+ __int128_t t[12];
+ __int128_t carry;
+
+ t[ 0] = MASK_42 & (load_6(s + 0) >> 0);
+ t[ 1] = MASK_42 & (load_6(s + 5) >> 2);
+ t[ 2] = MASK_42 & (load_6(s + 10) >> 4);
+ t[ 3] = MASK_42 & (load_6(s + 15) >> 6);
+ t[ 4] = MASK_42 & (load_6(s + 21) >> 0);
+ t[ 5] = MASK_42 & (load_6(s + 26) >> 2);
+ t[ 6] = MASK_42 & (load_6(s + 31) >> 4);
+ t[ 7] = MASK_42 & (load_6(s + 36) >> 6);
+ t[ 8] = MASK_42 & (load_6(s + 42) >> 0);
+ t[ 9] = MASK_42 & (load_6(s + 47) >> 2);
+ t[10] = MASK_42 & (load_6(s + 52) >> 4);
+ t[11] = (load_7(s + 57) >> 6);
+
+ t[ 5] -= t[11] * ORDER_0;
+ t[ 6] -= t[11] * ORDER_1;
+ t[ 7] -= t[11] * ORDER_2;
+
+ t[ 4] -= t[10] * ORDER_0;
+ t[ 5] -= t[10] * ORDER_1;
+ t[ 6] -= t[10] * ORDER_2;
+
+ t[ 3] -= t[ 9] * ORDER_0;
+ t[ 4] -= t[ 9] * ORDER_1;
+ t[ 5] -= t[ 9] * ORDER_2;
+
+ carry = t[ 3] >> 42; t[ 4] += carry; t[ 3] &= MASK_42;
+ carry = t[ 5] >> 42; t[ 6] += carry; t[ 5] &= MASK_42;
+ carry = t[ 7] >> 42; t[ 8] += carry; t[ 7] &= MASK_42;
+ carry = t[ 4] >> 42; t[ 5] += carry; t[ 4] &= MASK_42;
+ carry = t[ 6] >> 42; t[ 7] += carry; t[ 6] &= MASK_42;
+
+ t[ 2] -= t[ 8] * ORDER_0;
+ t[ 3] -= t[ 8] * ORDER_1;
+ t[ 4] -= t[ 8] * ORDER_2;
+
+ t[ 1] -= t[ 7] * ORDER_0;
+ t[ 2] -= t[ 7] * ORDER_1;
+ t[ 3] -= t[ 7] * ORDER_2;
+
+ t[ 0] -= t[ 6] * ORDER_0;
+ t[ 1] -= t[ 6] * ORDER_1;
+ t[ 2] -= t[ 6] * ORDER_2;
+ t[ 6] = 0;
+
+ carry = t[ 0] >> 42; t[ 1] += carry; t[ 0] &= MASK_42;
+ carry = t[ 1] >> 42; t[ 2] += carry; t[ 1] &= MASK_42;
+ carry = t[ 2] >> 42; t[ 3] += carry; t[ 2] &= MASK_42;
+ carry = t[ 3] >> 42; t[ 4] += carry; t[ 3] &= MASK_42;
+ carry = t[ 4] >> 42; t[ 5] += carry; t[ 4] &= MASK_42;
+ carry = t[ 5] >> 42; t[ 6] += carry; t[ 5] &= MASK_42;
+
+ t[ 0] -= t[ 6] * ORDER_0;
+ t[ 1] -= t[ 6] * ORDER_1;
+ t[ 2] -= t[ 6] * ORDER_2;
+
+ carry = t[ 0] >> 42; t[ 1] += carry; t[ 0] &= MASK_42;
+ carry = t[ 1] >> 42; t[ 2] += carry; t[ 1] &= MASK_42;
+ carry = t[ 2] >> 42; t[ 3] += carry; t[ 2] &= MASK_42;
+ carry = t[ 3] >> 42; t[ 4] += carry; t[ 3] &= MASK_42;
+ carry = t[ 4] >> 42; t[ 5] += carry; t[ 4] &= MASK_42;
+
+ s[ 0] = (t[ 0] >> 0);
+ s[ 1] = (t[ 0] >> 8);
+ s[ 2] = (t[ 0] >> 16);
+ s[ 3] = (t[ 0] >> 24);
+ s[ 4] = (t[ 0] >> 32);
+ s[ 5] = (t[ 0] >> 40) | (t[ 1] << 2);
+ s[ 6] = (t[ 1] >> 6);
+ s[ 7] = (t[ 1] >> 14);
+ s[ 8] = (t[ 1] >> 22);
+ s[ 9] = (t[ 1] >> 30);
+ s[10] = (t[ 1] >> 38) | (t[ 2] << 4);
+ s[11] = (t[ 2] >> 4);
+ s[12] = (t[ 2] >> 12);
+ s[13] = (t[ 2] >> 20);
+ s[14] = (t[ 2] >> 28);
+ s[15] = (t[ 2] >> 36) | (t[ 3] << 6);
+ s[16] = (t[ 3] >> 2);
+ s[17] = (t[ 3] >> 10);
+ s[18] = (t[ 3] >> 18);
+ s[19] = (t[ 3] >> 26);
+ s[20] = (t[ 3] >> 34);
+ s[21] = (t[ 4] >> 0);
+ s[22] = (t[ 4] >> 8);
+ s[23] = (t[ 4] >> 16);
+ s[24] = (t[ 4] >> 24);
+ s[25] = (t[ 4] >> 32);
+ s[26] = (t[ 4] >> 40) | (t[ 5] << 2);
+ s[27] = (t[ 5] >> 6);
+ s[28] = (t[ 5] >> 14);
+ s[29] = (t[ 5] >> 22);
+ s[30] = (t[ 5] >> 30);
+ s[31] = (t[ 5] >> 38);
+}
+
+/*
+Input:
+ a[0]+256*a[1]+...+256^31*a[31] = a
+ b[0]+256*b[1]+...+256^31*b[31] = b
+ c[0]+256*c[1]+...+256^31*c[31] = c
+
+Output:
+ s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
+ where l = 2^252 + 27742317777372353535851937790883648493.
+*/
+void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
+{
+ uint64_t ad[6], bd[6], cd[6];
+ __int128_t t[12];
+ __int128_t carry;
+
+ ad[ 0] = MASK_42 & (load_6(a + 0) >> 0);
+ ad[ 1] = MASK_42 & (load_6(a + 5) >> 2);
+ ad[ 2] = MASK_42 & (load_6(a + 10) >> 4);
+ ad[ 3] = MASK_42 & (load_6(a + 15) >> 6);
+ ad[ 4] = MASK_42 & (load_6(a + 21) >> 0);
+ ad[ 5] = (load_6(a + 26) >> 2);
+ bd[ 0] = MASK_42 & (load_6(b + 0) >> 0);
+ bd[ 1] = MASK_42 & (load_6(b + 5) >> 2);
+ bd[ 2] = MASK_42 & (load_6(b + 10) >> 4);
+ bd[ 3] = MASK_42 & (load_6(b + 15) >> 6);
+ bd[ 4] = MASK_42 & (load_6(b + 21) >> 0);
+ bd[ 5] = (load_6(b + 26) >> 2);
+ cd[ 0] = MASK_42 & (load_6(c + 0) >> 0);
+ cd[ 1] = MASK_42 & (load_6(c + 5) >> 2);
+ cd[ 2] = MASK_42 & (load_6(c + 10) >> 4);
+ cd[ 3] = MASK_42 & (load_6(c + 15) >> 6);
+ cd[ 4] = MASK_42 & (load_6(c + 21) >> 0);
+ cd[ 5] = (load_6(c + 26) >> 2);
+
+ t[ 0] = cd[ 0] + (__int128_t)ad[ 0] * bd[ 0];
+ t[ 1] = cd[ 1] + (__int128_t)ad[ 0] * bd[ 1] + (__int128_t)ad[ 1] * bd[ 0];
+ t[ 2] = cd[ 2] + (__int128_t)ad[ 0] * bd[ 2] + (__int128_t)ad[ 1] * bd[ 1] +
+ (__int128_t)ad[ 2] * bd[ 0];
+ t[ 3] = cd[ 3] + (__int128_t)ad[ 0] * bd[ 3] + (__int128_t)ad[ 1] * bd[ 2] +
+ (__int128_t)ad[ 2] * bd[ 1] + (__int128_t)ad[ 3] * bd[ 0];
+ t[ 4] = cd[ 4] + (__int128_t)ad[ 0] * bd[ 4] + (__int128_t)ad[ 1] * bd[ 3] +
+ (__int128_t)ad[ 2] * bd[ 2] + (__int128_t)ad[ 3] * bd[ 1] +
+ (__int128_t)ad[ 4] * bd[ 0];
+ t[ 5] = cd[ 5] + (__int128_t)ad[ 0] * bd[ 5] + (__int128_t)ad[ 1] * bd[ 4] +
+ (__int128_t)ad[ 2] * bd[ 3] + (__int128_t)ad[ 3] * bd[ 2] +
+ (__int128_t)ad[ 4] * bd[ 1] + (__int128_t)ad[ 5] * bd[ 0];
+ t[ 6] = (__int128_t)ad[ 1] * bd[ 5] + (__int128_t)ad[ 2] * bd[ 4] +
+ (__int128_t)ad[ 3] * bd[ 3] + (__int128_t)ad[ 4] * bd[ 2] +
+ (__int128_t)ad[ 5] * bd[ 1];
+ t[ 7] = (__int128_t)ad[ 2] * bd[ 5] + (__int128_t)ad[ 3] * bd[ 4] +
+ (__int128_t)ad[ 4] * bd[ 3] + (__int128_t)ad[ 5] * bd[ 2];
+ t[ 8] = (__int128_t)ad[ 3] * bd[ 5] + (__int128_t)ad[ 4] * bd[ 4] +
+ (__int128_t)ad[ 5] * bd[ 3];
+ t[ 9] = (__int128_t)ad[ 4] * bd[ 5] + (__int128_t)ad[ 5] * bd[ 4];
+ t[10] = (__int128_t)ad[ 5] * bd[ 5];
+ t[11] = 0;
+
+ carry = t[ 0] >> 42; t[ 1] += carry; t[ 0] &= MASK_42;
+ carry = t[ 2] >> 42; t[ 3] += carry; t[ 2] &= MASK_42;
+ carry = t[ 4] >> 42; t[ 5] += carry; t[ 4] &= MASK_42;
+ carry = t[ 6] >> 42; t[ 7] += carry; t[ 6] &= MASK_42;
+ carry = t[ 8] >> 42; t[ 9] += carry; t[ 8] &= MASK_42;
+ carry = t[10] >> 42; t[11] += carry; t[10] &= MASK_42;
+ carry = t[ 1] >> 42; t[ 2] += carry; t[ 1] &= MASK_42;
+ carry = t[ 3] >> 42; t[ 4] += carry; t[ 3] &= MASK_42;
+ carry = t[ 5] >> 42; t[ 6] += carry; t[ 5] &= MASK_42;
+ carry = t[ 7] >> 42; t[ 8] += carry; t[ 7] &= MASK_42;
+ carry = t[ 9] >> 42; t[10] += carry; t[ 9] &= MASK_42;
+
+ t[ 5] -= t[11] * ORDER_0;
+ t[ 6] -= t[11] * ORDER_1;
+ t[ 7] -= t[11] * ORDER_2;
+
+ t[ 4] -= t[10] * ORDER_0;
+ t[ 5] -= t[10] * ORDER_1;
+ t[ 6] -= t[10] * ORDER_2;
+
+ t[ 3] -= t[ 9] * ORDER_0;
+ t[ 4] -= t[ 9] * ORDER_1;
+ t[ 5] -= t[ 9] * ORDER_2;
+
+ carry = t[ 3] >> 42; t[ 4] += carry; t[ 3] &= MASK_42;
+ carry = t[ 5] >> 42; t[ 6] += carry; t[ 5] &= MASK_42;
+ carry = t[ 7] >> 42; t[ 8] += carry; t[ 7] &= MASK_42;
+ carry = t[ 4] >> 42; t[ 5] += carry; t[ 4] &= MASK_42;
+ carry = t[ 6] >> 42; t[ 7] += carry; t[ 6] &= MASK_42;
+
+ t[ 2] -= t[ 8] * ORDER_0;
+ t[ 3] -= t[ 8] * ORDER_1;
+ t[ 4] -= t[ 8] * ORDER_2;
+
+ t[ 1] -= t[ 7] * ORDER_0;
+ t[ 2] -= t[ 7] * ORDER_1;
+ t[ 3] -= t[ 7] * ORDER_2;
+
+ t[ 0] -= t[ 6] * ORDER_0;
+ t[ 1] -= t[ 6] * ORDER_1;
+ t[ 2] -= t[ 6] * ORDER_2;
+ t[ 6] = 0;
+
+ carry = t[ 0] >> 42; t[ 1] += carry; t[ 0] &= MASK_42;
+ carry = t[ 1] >> 42; t[ 2] += carry; t[ 1] &= MASK_42;
+ carry = t[ 2] >> 42; t[ 3] += carry; t[ 2] &= MASK_42;
+ carry = t[ 3] >> 42; t[ 4] += carry; t[ 3] &= MASK_42;
+ carry = t[ 4] >> 42; t[ 5] += carry; t[ 4] &= MASK_42;
+ carry = t[ 5] >> 42; t[ 6] += carry; t[ 5] &= MASK_42;
+
+ t[ 0] -= t[ 6] * ORDER_0;
+ t[ 1] -= t[ 6] * ORDER_1;
+ t[ 2] -= t[ 6] * ORDER_2;
+
+ carry = t[ 0] >> 42; t[ 1] += carry; t[ 0] &= MASK_42;
+ carry = t[ 1] >> 42; t[ 2] += carry; t[ 1] &= MASK_42;
+ carry = t[ 2] >> 42; t[ 3] += carry; t[ 2] &= MASK_42;
+ carry = t[ 3] >> 42; t[ 4] += carry; t[ 3] &= MASK_42;
+ carry = t[ 4] >> 42; t[ 5] += carry; t[ 4] &= MASK_42;
+
+ s[ 0] = (t[ 0] >> 0);
+ s[ 1] = (t[ 0] >> 8);
+ s[ 2] = (t[ 0] >> 16);
+ s[ 3] = (t[ 0] >> 24);
+ s[ 4] = (t[ 0] >> 32);
+ s[ 5] = (t[ 0] >> 40) | (t[ 1] << 2);
+ s[ 6] = (t[ 1] >> 6);
+ s[ 7] = (t[ 1] >> 14);
+ s[ 8] = (t[ 1] >> 22);
+ s[ 9] = (t[ 1] >> 30);
+ s[10] = (t[ 1] >> 38) | (t[ 2] << 4);
+ s[11] = (t[ 2] >> 4);
+ s[12] = (t[ 2] >> 12);
+ s[13] = (t[ 2] >> 20);
+ s[14] = (t[ 2] >> 28);
+ s[15] = (t[ 2] >> 36) | (t[ 3] << 6);
+ s[16] = (t[ 3] >> 2);
+ s[17] = (t[ 3] >> 10);
+ s[18] = (t[ 3] >> 18);
+ s[19] = (t[ 3] >> 26);
+ s[20] = (t[ 3] >> 34);
+ s[21] = (t[ 4] >> 0);
+ s[22] = (t[ 4] >> 8);
+ s[23] = (t[ 4] >> 16);
+ s[24] = (t[ 4] >> 24);
+ s[25] = (t[ 4] >> 32);
+ s[26] = (t[ 4] >> 40) | (t[ 5] << 2);
+ s[27] = (t[ 5] >> 6);
+ s[28] = (t[ 5] >> 14);
+ s[29] = (t[ 5] >> 22);
+ s[30] = (t[ 5] >> 30);
+ s[31] = (t[ 5] >> 38);
+}
+#endif /* !HAVE___UINT128_T || NO_CURVED25519_128BIT */
int ge_compress_key(byte* out, const byte* xIn, const byte* yIn, word32 keySz)
{
- fe x,y,z;
+ ge x,y,z;
ge_p3 g;
- byte bArray[keySz];
+ byte bArray[ED25519_KEY_SIZE];
word32 i;
fe_0(x);
@@ -715,23 +961,29 @@ int ge_compress_key(byte* out, const byte* xIn, const byte* yIn, word32 keySz)
/*
r = p + q
*/
-void ge_add(ge_p1p1 *r,const ge_p3 *p,const ge_cached *q)
-{
- fe t0;
- fe_add(r->X,p->Y,p->X);
- fe_sub(r->Y,p->Y,p->X);
- fe_mul(r->Z,r->X,q->YplusX);
- fe_mul(r->Y,r->Y,q->YminusX);
- fe_mul(r->T,q->T2d,p->T);
- fe_mul(r->X,p->Z,q->Z);
- fe_add(t0,r->X,r->X);
- fe_sub(r->X,r->Z,r->Y);
- fe_add(r->Y,r->Z,r->Y);
- fe_add(r->Z,t0,r->T);
- fe_sub(r->T,t0,r->T);
+static WC_INLINE void ge_add(ge_p1p1 *r,const ge_p3 *p,const ge_cached *q)
+{
+#ifndef CURVED25519_ASM
+ ge t0;
+ fe_add(r->X,p->Y,p->X);
+ fe_sub(r->Y,p->Y,p->X);
+ fe_mul(r->Z,r->X,q->YplusX);
+ fe_mul(r->Y,r->Y,q->YminusX);
+ fe_mul(r->T,q->T2d,p->T);
+ fe_mul(r->X,p->Z,q->Z);
+ fe_add(t0,r->X,r->X);
+ fe_sub(r->X,r->Z,r->Y);
+ fe_add(r->Y,r->Z,r->Y);
+ fe_add(r->Z,t0,r->T);
+ fe_sub(r->T,t0,r->T);
+#else
+ fe_ge_add(r->X, r->Y, r->Z, r->T, p->X, p->Y, p->Z, p->T, q->Z, q->T2d,
+ q->YplusX, q->YminusX);
+#endif
}
+#ifndef CURVED25519_ASM
/* ge_scalar mult base */
static unsigned char equal(signed char b,signed char c)
{
@@ -741,29 +993,6758 @@ static unsigned char equal(signed char b,signed char c)
uint32_t y = x; /* 0: yes; 1..255: no */
y -= 1; /* 4294967295: yes; 0..254: no */
y >>= 31; /* 1: yes; 0: no */
- return y;
+ return (unsigned char)y;
}
static unsigned char negative(signed char b)
{
- unsigned long long x = b; /* 18446744073709551361..18446744073709551615:
- yes; 0..255: no */
- x >>= 63; /* 1: yes; 0: no */
- return x;
+ return ((unsigned char)b) >> 7;
}
-static void cmov(ge_precomp *t,ge_precomp *u,unsigned char b)
+static WC_INLINE void cmov(ge_precomp *t,const ge_precomp *u,unsigned char b,
+ unsigned char n)
{
+ b = equal(b,n);
fe_cmov(t->yplusx,u->yplusx,b);
fe_cmov(t->yminusx,u->yminusx,b);
fe_cmov(t->xy2d,u->xy2d,b);
}
+#endif
-
+#ifdef CURVED25519_ASM_64BIT
+static const ge_precomp base[64][8] = {
+{
+ {
+ { 0x2fbc93c6f58c3b85, -0x306cd2390473f1e7, 0x270b4898643d42c2, 0x07cf9d3a33d4ba65 },
+ { -0x62efc6fa28bf6ec2, -0x02c660fa2ebf414d, -0x5a3e7bcb977075f7, 0x44fd2f9298f81267 },
+ { -0x2442ea98b49044a7, 0x41e13f00eea2a5ea, -0x322b62e336a83906, 0x4f0ebe1faf16ecca }
+ },
+ {
+ { -0x6ddb18036cc38e29, -0x60b9626985f00a4b, 0x5aa69a65e1d60702, 0x590c063fa87d2e2e },
+ { -0x75665a9fbd4b2a58, -0x70d47ef3b19f530a, -0x1f61dc944e91c856, 0x6bb595a669c92555 },
+ { 0x6e347eaadad36802, -0x450ca66c7c11b7fb, 0x3bcabe10e6076826, 0x49314f0a165ed1b8 }
+ },
+ {
+ { -0x50da4f57b31168d0, 0x025a8430e8864b8a, -0x3ee4affd60fe98ce, 0x7a164e1b9a80f8f4 },
+ { 0x56611fe8a4fcd265, 0x3bd353fde5c1ba7d, -0x7ece0ce5deb42943, 0x2ab91587555bda62 },
+ { -0x640dee0b0e98b7cc, -0x47b194e809d2076b, -0x282190f8a48dd5b2, 0x549a04b963bb2a21 }
+ },
+ {
+ { 0x287351b98efc099f, 0x6765c6f47dfd2538, -0x35cb72c204f56d9b, 0x680e910321e58727 },
+ { -0x6a01faf5fa97e741, 0x327e89715660faa9, -0x3c171c32f95faf8d, 0x27933f4c7445a49a },
+ { -0x40e1ba131aebd950, -0x1cd439c29245f06c, -0x1bd68b2a7307ad40, 0x44f079b1b0e64c18 }
+ },
+ {
+ { -0x5ded43bbf75a44cd, -0x72afb73c38a112fe, -0x22e414f3a54013bc, 0x2945ccf146e206eb },
+ { 0x7f9182c3a447d6ba, -0x2affeb2eb4d8d649, -0x1cc30ee3479b5f79, 0x154a7e73eb1b55f3 },
+ { -0x37cd5e86182ffc4d, 0x5f729d0a00124d7e, 0x62c1d4a10e6d8ff3, 0x68b8ac5938b27a98 }
+ },
+ {
+ { 0x3a0ceeeb77157131, -0x64d8ea76ff375078, -0x7f9a499725a658ca, 0x51e57bb6a2cc38bd },
+ { 0x499806b67b7d8ca4, 0x575be28427d22739, -0x44f7a318dfbaac47, 0x38b64c41ae417884 },
+ { -0x7062526e97621c5c, 0x175f2428f8fb9137, 0x050ab5329fcfb988, 0x7865dfa21354c09f }
+ },
+ {
+ { 0x6b1a5cd0944ea3bf, 0x7470353ab39dc0d2, 0x71b2528228542e49, 0x461bea69283c927e },
+ { -0x4590d36555cdde4f, 0x6ca021533bba23a7, -0x621589b06de6d3c6, 0x1d6edd5d2e5317e0 },
+ { 0x217a8aacab0fda36, -0x5ad739abc2cab638, 0x37d05b8b13ab7568, 0x233cef623a2cbc37 }
+ },
+ {
+ { 0x59b7596604dd3e8f, 0x6cb30377e288702c, -0x4ecc6399a1263cdd, 0x0915e76061bce52f },
+ { -0x1d58a2120c6dcb27, -0x69c2897f1e4aa707, 0x2c2741ac6e3c23fb, 0x3a9024a1320e01c3 },
+ { -0x208217ca57cb5c82, -0x741e63259767a816, 0x2c1185367167b326, 0x589eb3d9dbefd5c2 }
+ },
+},
+{
+ {
+ { 0x322d04a52d9021f6, -0x463e60cc8a394064, 0x587a3a4342d20b09, 0x143b1cf8aa64fe61 },
+ { 0x7ec851ca553e2df3, -0x58ed7b3459b7874d, -0x194a1be6cd772e19, 0x4cf210ec5a9a8883 },
+ { -0x6079838269753555, 0x5f54258e27092729, -0x2f582cb415e7f68b, 0x21b546a3374126e1 }
+ },
+ {
+ { 0x490a7a45d185218f, -0x65eac887b9fb6ccb, 0x0060ea09cc31e1f6, 0x7e041577f86ee965 },
+ { -0x56b007a75d777cbd, -0x31f12ba9acec12c4, -0x0aa3c2304a40cb06, 0x0a653ca5c9eab371 },
+ { 0x66b2a496ce5b67f3, -0x00ab6d2742a9686a, 0x503cec294a592cd0, 0x566943650813acb2 }
+ },
+ {
+ { 0x5672f9eb1dabb69d, -0x458f4aca5017ac04, 0x47ac0f752796d66d, 0x32a5351794117275 },
+ { -0x47e724f3d99df868, 0x5d5c31d9606e354a, 0x0982fa4f00a8cdc7, 0x17e12bcd4653e2d4 },
+ { -0x2c59bb59209b7bc9, 0x703b6559880fbfdd, -0x347adabf52c5e55b, 0x0900b3f78e4c6468 }
+ },
+ {
+ { -0x12d7f04137e952cf, 0x52d9595bd8e6efe3, 0x0fe71772f6c623f5, 0x4314030b051e293c },
+ { 0x0a851b9f679d651b, -0x1ef7349efcccbd0e, -0x29fe0a801774cf5d, 0x371f3acaed2dd714 },
+ { -0x2a9fffa1040f4353, -0x7148f0d12e78f3a2, 0x201f9033d084e6a0, 0x4c3a5ae1ce7b6670 }
+ },
+ {
+ { -0x45078a1b36c25f23, -0x46cd7d588e46d6b3, -0x7f29c0480b393ba0, 0x6de9c73dea66c181 },
+ { 0x4138a434dcb8fa95, -0x78f3098293697bf5, -0x21c77a8bd68417d4, 0x7c814db27262a55a },
+ { 0x478904d5a04df8f2, -0x050451b54efebd2d, -0x0937539caaa2f668, 0x5aac4a412f90b104 }
+ },
+ {
+ { 0x603a0d0abd7f5134, -0x7f7636cd1e2c51ba, -0x20da6ec67867429d, 0x1c145cd274ba0235 },
+ { -0x39b0cd94c536d6f8, 0x5551b282e663e1e0, 0x476b35f54a1a4b83, 0x1b9da3fe189f68c2 },
+ { 0x32e8386475f3d743, 0x365b8baf6ae5d9ef, -0x7dadc749c7a497e2, 0x234929c1167d65e1 }
+ },
+ {
+ { 0x48145cc21d099fcf, 0x4535c192cc28d7e5, -0x7f183e1ab7db81ff, 0x4a5f28743b2973ee },
+ { -0x67b213545f885218, 0x383f77ad19eb389d, -0x38139481d6ab286c, 0x59c77b3aeb7c3a7a },
+ { -0x2c5228dadda3309e, -0x6ee5cc7e4dead3a3, -0x274c6052a4f70783, 0x6f05606b4799fe3b }
+ },
+ {
+ { 0x5b433149f91b6483, -0x524a239aa5d3409e, -0x78057bed9cd7d84d, 0x60895e91ab49f8d8 },
+ { -0x6001616de884569e, -0x675118e2f21a351f, 0x3ff4ae942d831044, 0x714de12e58533ac8 },
+ { -0x16130d12f30793e8, -0x4b92f9edf8ca202c, -0x43625f67fb469419, 0x73e2e62fd96dc26b }
+ },
+},
+{
+ {
+ { 0x2eccdd0e632f9c1d, 0x51d0b69676893115, 0x52dfb76ba8637a58, 0x6dd37d49a00eef39 },
+ { -0x12a49cabb655aea2, -0x579a3b60f4397dc6, -0x7af3e016a4bd2e3c, 0x30d76d6f03d315b9 },
+ { 0x6c4444172106e4c7, -0x04ac297f6d728097, -0x4b8c615b96b2c0da, 0x10c697112e864bb0 }
+ },
+ {
+ { 0x0ca62aa08358c805, 0x6a3d4ae37a204247, 0x7464d3a63b11eddc, 0x03bf9baf550806ef },
+ { 0x6493c4277dbe5fde, 0x265d4fad19ad7ea2, 0x0e00dfc846304590, 0x25e61cabed66fe09 },
+ { 0x3f13e128cc586604, 0x6f5873ecb459747e, -0x5f49c21233ed970b, 0x566d78634586e22c }
+ },
+ {
+ { -0x5efabd7a39a5d030, 0x6c64112af31667c3, 0x680ae240731aee58, 0x14fba5f34793b22a },
+ { 0x1637a49f9cc10834, -0x4371a92a57643baf, 0x1cb5ec0f7f7fd2db, 0x33975bca5ecc35d9 },
+ { 0x3cd746166985f7d4, 0x593e5e84c9c80057, 0x2fc3f2b67b61131e, 0x14829cea83fc526c }
+ },
+ {
+ { 0x21e70b2f4e71ecb8, -0x19a92246bf5b881d, -0x409aa93131e2b080, 0x05fc3bc4535d7b7e },
+ { -0x00bc847b68226a3e, 0x6c744e30aa4eb5a7, -0x61f3a29ec37a1775, 0x2fd9c71e5f758173 },
+ { 0x24b8b3ae52afdedd, 0x3495638ced3b30cf, 0x33a4bc83a9be8195, 0x373767475c651f04 }
+ },
+ {
+ { 0x634095cb14246590, -0x10edebbfe93eaacb, -0x61c7ebf376ef43a0, 0x6bf5905730907c8c },
+ { 0x2fba99fd40d1add9, -0x4cf8e990690b2fd9, 0x4363f05215f03bae, 0x1fbea56c3b18f999 },
+ { 0x0fa778f1e1415b8a, 0x06409ff7bac3a77e, 0x6f52d7b89aa29a50, 0x02521cf67a635a56 }
+ },
+ {
+ { -0x4eeb98df88d0a11c, -0x17076b4e69f86532, 0x4af8224d00ac824a, 0x001753d9f7cd6cc4 },
+ { 0x513fee0b0a9d5294, -0x706718a3f020a59a, -0x2b9e7977401ef832, 0x3fa00a7e71382ced },
+ { 0x3c69232d963ddb34, 0x1dde87dab4973858, -0x55282e065f6e0d7b, 0x12b5fe2fa048edb6 }
+ },
+ {
+ { -0x20d483d95290e16e, 0x4b66d323504b8913, -0x73bf623f8ae3743d, 0x6f7e93c20796c7b8 },
+ { 0x71f0fbc496fce34d, 0x73b9826badf35bed, -0x2dfb8d9e00d73a9f, 0x749b76f96fb1206f },
+ { 0x1f5af604aea6ae05, -0x3edcae0e411b6367, 0x61a808b5eeff6b66, 0x0fcec10f01e02151 }
+ },
+ {
+ { 0x3df2d29dc4244e45, 0x2b020e7493d8de0a, 0x6cc8067e820c214d, 0x413779166feab90a },
+ { 0x644d58a649fe1e44, 0x21fcaea231ad777e, 0x02441c5a887fd0d2, 0x4901aa7183c511f3 },
+ { 0x08b1b7548c1af8f0, -0x31f08583db9d664c, -0x089f4f06e1f926c7, 0x41bb887b726d1213 }
+ },
+},
+{
+ {
+ { -0x68267f1f55c6082e, 0x35d0384252c6b51c, 0x7d43f49307cd55aa, 0x56bd36cfb78ac362 },
+ { -0x6d987f93a983b628, 0x066d04ccca791e6a, -0x5960a9ba1c33c6b5, 0x5c95b686a0788cd2 },
+ { 0x2ac519c10d14a954, -0x150b8b4b6b4a0570, -0x19507c7d560785a6, 0x0dea6db1879be094 }
+ },
+ {
+ { 0x15baeb74d6a8797a, 0x7ef55cf1fac41732, 0x29001f5a3c8b05c5, 0x0ad7cc8752eaccfb },
+ { -0x559940ab8cbb1a55, -0x25eda77770e4bcf7, 0x5e87d2b3fd564b2f, 0x5b2c78885483b1dd },
+ { 0x52151362793408cf, -0x14f0e8fce669c26c, -0x57cc4d0577c26b9a, 0x093a7fa775003c78 }
+ },
+ {
+ { -0x47169fbb9f56ed7a, 0x7f3fd8047778d3de, 0x67d01e31bf8a5e2d, 0x7b038a06c27b653e },
+ { -0x1aef8219c5e92842, -0x5c880023650ccd31, 0x70d5bf18440b677f, 0x6a252b19a4a31403 },
+ { -0x6126e62a2c966f0d, 0x5213aebbdb4eb9f2, -0x38f715fab3466ecb, 0x58ded57f72260e56 }
+ },
+ {
+ { -0x2592acd9a4f02b75, -0x769f7dce6c405678, -0x287536cd9e2a81d8, 0x79f2942d3a5c8143 },
+ { 0x78e79dade9413d77, -0x0da8062a68d61983, 0x59db910ee37aa7e6, 0x6aa11b5bbb9e039c },
+ { -0x6825d0da49377217, 0x251ba7eaacf20169, 0x09b44f87ef4eb4e4, 0x7d90ab1bbc6a7da5 }
+ },
+ {
+ { 0x1a07a3f496b3c397, 0x11ceaa188f4e2532, 0x7d9498d5a7751bf0, 0x19ed161f508dd8a0 },
+ { -0x6533597c58fe9402, -0x6fafa0b20d3af493, 0x6b610d5fcce435aa, 0x19a10d446198ff96 },
+ { 0x560a2cd687dce6ca, 0x7f3568c48664cf4d, -0x78be16addd7fc5c8, 0x483bdab1595653fc }
+ },
+ {
+ { -0x2930b2f54b257f0a, -0x7db7c1ba07cf8020, 0x05005269ae6f9da4, 0x1c7052909cf7877a },
+ { -0x0587f0eb78cb05b7, 0x106f0b70360534e0, 0x2210776fe3e307bd, 0x3286c109dde6a0fe },
+ { 0x32ee7de2874e98d4, 0x14c362e9b97e0c60, 0x5781dcde6a60a38a, 0x217dd5eaaa7aa840 }
+ },
+ {
+ { -0x7420e0464173f138, 0x00bae7f8e30a0282, 0x4963991dad6c4f6c, 0x07058a6e5df6f60a },
+ { -0x62483b2fdb71e150, -0x1f89681eb28b40ae, 0x1e6a9b173c562354, 0x7fa7c21f795a4965 },
+ { -0x1614fd3b24ce0981, -0x12da0276ef4304d5, 0x46c8131f5c5cddb4, 0x33b21c13a0cb9bce }
+ },
+ {
+ { -0x6550464fa11c73a5, -0x4062d2b1f8e5ec39, -0x7111919216ccd6f6, 0x1c3bab17ae109717 },
+ { 0x360692f8087d8e31, -0x0b2339c82d8e9c09, 0x25a4e62065ea5963, 0x659bf72e5ac160d9 },
+ { 0x1c9ab216c7cab7b0, 0x7d65d37407bbc3cc, 0x52744750504a58d5, 0x09f2606b131a2990 }
+ },
+},
+{
+ {
+ { 0x7e234c597c6691ae, 0x64889d3d0a85b4c8, -0x251d36f3cab50519, 0x0a871e070c6a9e1d },
+ { 0x40e87d44744346be, 0x1d48dad415b52b25, 0x7c3a8a18a13b603e, 0x4eb728c12fcdbdf7 },
+ { 0x3301b5994bbc8989, 0x736bae3a5bdd4260, 0x0d61ade219d59e3c, 0x3ee7300f2685d464 }
+ },
+ {
+ { 0x43fa7947841e7518, -0x1a3905a69c63b929, -0x5ef9a1e21cfad48c, 0x7d47c6a2cfb89030 },
+ { -0x0a2daa1b61822949, -0x7fe9eea39ef4e154, 0x3c99975d92e187ca, 0x13815762979125c2 },
+ { 0x3fdad0148ef0d6e0, -0x62c18b656eab90c4, 0x71ec621026bb8157, 0x148cf58d34c9ec80 }
+ },
+ {
+ { -0x1da8d082651b8a93, 0x56c345bb88f3487f, -0x602ef492969f5773, 0x278febad4eaea1b9 },
+ { 0x46a492f67934f027, 0x469984bef6840aa9, 0x5ca1bc2a89611854, 0x3ff2fa1ebd5dbbd4 },
+ { -0x4e5597e0736cc69a, -0x73de6b63dfd6f368, 0x39115291219d3c52, 0x4104dd02fe9c677b }
+ },
+ {
+ { -0x7edeb1f924f69548, 0x21a8b6c90ce44f35, 0x6524c12a409e2af5, 0x0165b5a48efca481 },
+ { 0x72b2bf5e1124422a, -0x5e05f3cc675cc54b, -0x6b349efe05ad499a, 0x2c863b00afaf53d5 },
+ { -0x0e6f5b8b5f7b958a, 0x12eff984cd2f7cc0, 0x695e290658aa2b8f, 0x591b67d9bffec8b8 }
+ },
+ {
+ { -0x66464c8e60e74aa3, -0x1b9a1a055e739be2, 0x61081136c29f05ed, 0x489b4f867030128b },
+ { 0x312f0d1c80b49bfa, 0x5979515eabf3ec8a, 0x727033c09ef01c88, 0x3de02ec7ca8f7bcb },
+ { -0x2dcdefd2c5146d11, -0x1e9dac4b9ee9579f, 0x3d7eabe7190baa24, 0x49f5fbba496cbebf }
+ },
+ {
+ { 0x155d628c1e9c572e, -0x75b279533a77b8bf, -0x6e5cad09aea89c15, 0x06a1a6c28867515b },
+ { 0x30949a108a5bcfd4, -0x23bf228f439b8c15, -0x6d3d6b3ecf83f2e4, 0x5604a86dcbfa6e74 },
+ { 0x7288d1d47c1764b6, 0x72541140e0418b51, -0x60fce59fe753092f, 0x20989e89fe2742c6 }
+ },
+ {
+ { 0x1674278b85eaec2e, 0x5621dc077acb2bdf, 0x640a4c1661cbf45a, 0x730b9950f70595d3 },
+ { 0x499777fd3a2dcc7f, 0x32857c2ca54fd892, -0x5d86279b2df81c60, 0x0403ed1d0ca67e29 },
+ { -0x36b4d2ca78b13aae, -0x3a19373067db9073, -0x0834b905e93fca32, 0x5bd7454308303dcc }
+ },
+ {
+ { -0x7a3b6cdeea1886d6, -0x39b3765d42322237, -0x62e1c257525c289e, 0x5bb7db123067f82c },
+ { 0x7f9ad19528b24cc2, 0x7f6b54656335c181, 0x66b8b66e4fc07236, 0x133a78007380ad83 },
+ { 0x0961f467c6ca62be, 0x04ec21d6211952ee, 0x182360779bd54770, 0x740dca6d58f0e0d2 }
+ },
+},
+{
+ {
+ { 0x3906c72aed261ae5, -0x65497026771eff09, -0x0a16fa650cc9fe69, 0x0e53dc78bf2b6d47 },
+ { 0x50b70bf5d3f0af0b, 0x4feaf48ae32e71f7, 0x60e84ed3a55bbd34, 0x00ed489b3f50d1ed },
+ { -0x46f7d640868e7886, 0x5e4444636d17e631, 0x4d05c52e18276893, 0x27632d9a5a4a4af5 }
+ },
+ {
+ { -0x567d7a2e78150025, -0x5a4b0444272f579c, -0x49a70d80fdd99c09, 0x3bbc2b22d99ce282 },
+ { -0x2ee00faeab4d9f32, -0x27923c718d06ad90, 0x601fcd0d267cc138, 0x2b67916429e90ccd },
+ { -0x46e836ada7c3f5a8, 0x653ff9b80fe4c6f3, -0x64f258284320c3f4, 0x43a0eeb6ab54d60e }
+ },
+ {
+ { 0x3ac6322357875fe8, -0x262b0b130a043471, -0x72117b6cc7d449e0, 0x50c5eaa14c799fdc },
+ { 0x396966a46d4a5487, -0x07ee5e7553d44c46, 0x66e4685b5628b26b, 0x70a477029d929b92 },
+ { -0x22f12374290d04c4, 0x54c63aa79cc7b7a0, -0x51f4fcd4d37260e6, 0x6f9ce107602967fb }
+ },
+ {
+ { 0x139693063520e0b5, 0x437fcf7c88ea03fe, -0x082b3bf42c36a644, 0x699154d1f893ded9 },
+ { -0x52efab4e321e3dd6, -0x3b5716fdb714cd21, 0x5f3e7b33accdc0ea, 0x72364713fc79963e },
+ { 0x315d5c75b4b27526, -0x33347bd2fdc9255b, 0x22f0c8a3345fee8e, 0x73975a617d39dbed }
+ },
+ {
+ { 0x6f37f392f4433e46, 0x0e19b9a11f566b18, 0x220fb78a1fd1d662, 0x362a4258a381c94d },
+ { -0x1bfdb2069c8a25f0, 0x78d3251a1830c870, -0x6fd4e6b79a7326e4, 0x7e18b10b29b7438a },
+ { -0x6f8e26ecd49414d1, 0x0f26e9ad28418247, -0x1546e13642136da3, 0x4be65bc8f48af2de }
+ },
+ {
+ { 0x1d50fba257c26234, 0x7bd4823adeb0678b, -0x3d4f239159ac750b, 0x5665eec6351da73e },
+ { 0x78487feba36e7028, 0x5f3f13001dd8ce34, -0x6cb04ed2b4cf3b77, 0x056c244d397f0a2b },
+ { -0x24c11ff6bc404df0, 0x4972018720800ac2, 0x26ab5d6173bd8667, 0x20b209c2ab204938 }
+ },
+ {
+ { 0x1fcca94516bd3289, 0x448d65aa41420428, 0x59c3b7b216a55d62, 0x49992cc64e612cd8 },
+ { 0x549e342ac07fb34b, 0x02d8220821373d93, -0x43d9d28f532e0a99, 0x7a92c9fdfbcac784 },
+ { 0x65bd1bea70f801de, 0x1befb7c0fe49e28a, -0x579cf9324e4d51b6, 0x3b7ac0cd265c2a09 }
+ },
+ {
+ { -0x0f2ab1b0dd12c659, -0x5d5516e1a9f7eaf6, -0x0bde4d161225178b, 0x31bc531d6b7de992 },
+ { -0x7dd411bc73fe4314, 0x530cb525c0fbc73b, 0x48519034c1953fe9, 0x265cc261e09a0f5b },
+ { -0x20c2ecb2567f068f, 0x7a4fb8d1221a22a7, 0x3df7d42035aad6d8, 0x2a14edcc6a1a125e }
+ },
+},
+{
+ {
+ { 0x231a8c570478433c, -0x484ad8f13d7ebc63, -0x245566151c26f861, 0x2c03f5256c2b03d9 },
+ { -0x20b711f8ad3031b2, -0x3c00050cf913f749, 0x05710b2ab95459c4, 0x161d25fa963ea38d },
+ { 0x790f18757b53a47d, 0x307b0130cf0c5879, 0x31903d77257ef7f9, 0x699468bdbd96bbaf }
+ },
+ {
+ { -0x2722c2199556e6b8, 0x485064c22fc0d2cc, -0x64b7db99cb0215d1, 0x293e1c4e6c4a2e3a },
+ { -0x42e0d0b90b250131, 0x7cef0114a47fd6f7, -0x2ce00225b5b84c81, 0x525219a473905785 },
+ { 0x376e134b925112e1, 0x703778b5dca15da0, -0x4fba7650b9e3ceef, 0x5b605c447f032823 }
+ },
+ {
+ { 0x3be9fec6f0e7f04c, -0x7995a8618a1cb69e, 0x5542ef161e1de61a, 0x2f12fef4cc5abdd5 },
+ { -0x469a7fa6df3b8377, -0x180feff36dc47034, 0x0001256502e2ef77, 0x24a76dcea8aeb3ee },
+ { 0x0a4522b2dfc0c740, 0x10d06e7f40c9a407, -0x3930ebbe87300998, 0x5e607b2518a43790 }
+ },
+ {
+ { -0x5fd3bce35a6930ec, -0x1c3bd2bf512c1c00, -0x2dbad97fd1f0d925, 0x201f33139e457068 },
+ { 0x58b31d8f6cdf1818, 0x35cfa74fc36258a2, -0x1e4c00b09919e292, 0x5067acab6ccdd5f7 },
+ { -0x02ad8094f7fc62af, 0x18b14964017c0006, -0x2addf14fd1da5b58, 0x397cba8862460375 }
+ },
+ {
+ { 0x7815c3fbc81379e7, -0x599e6bdf221ed50f, -0x00563f077a57022b, 0x771b4022c1e1c252 },
+ { 0x30c13093f05959b2, -0x1dc55e721656868a, 0x222fd491721d5e26, 0x2339d320766e6c3a },
+ { -0x27822679aec5d059, -0x0a53648e062b30f8, -0x2f943ce4e15d7c4d, 0x331a189219971a76 }
+ },
+ {
+ { 0x26512f3a9d7572af, 0x5bcbe28868074a9e, -0x7b123e3eee7f083c, 0x1ac9619ff649a67b },
+ { -0x0ae990ba04b07f3a, -0x63c938219e388a31, -0x1c2b17e46fbe26e4, 0x31167c6b83bdfe21 },
+ { -0x0dd4c7bdadb4ef98, 0x5068343bee9ce987, -0x03628e7bb59daf38, 0x612436341f08b111 }
+ },
+ {
+ { -0x749cb61ce5d2d9c8, -0x622048ff642c02cb, 0x7f8bf1b8a3a06ba4, 0x1522aa3178d90445 },
+ { -0x2662be2478b17673, 0x09fea5f16c07dc20, 0x793d2c67d00f9bbc, 0x46ebe2309e5eff40 },
+ { 0x2c382f5369614938, -0x2501bf6548d292f0, -0x1737cc6e49b90dd9, 0x45fe70f50524306c }
+ },
+ {
+ { 0x62f24920c8951491, 0x05f007c83f630ca2, 0x6fbb45d2f5c9d4b8, 0x16619f6db57a2245 },
+ { -0x25b78a5969f3f474, 0x5b68d076ef0e2f20, 0x07fb51cf3d0b8fd4, 0x428d1623a0e392d4 },
+ { 0x084f4a4401a308fd, -0x57dde63c895a3554, -0x214721b9bc2e4383, 0x1d81592d60bd38c6 }
+ },
+},
+{
+ {
+ { 0x3a4a369a2f89c8a1, 0x63137a1d7c8de80d, -0x4353ff7587125feb, 0x2cb8b3a5b483b03f },
+ { -0x27cc284113d5b3c8, 0x2c9162830acc20ed, -0x16c5b8556d208a7f, 0x702d67a3333c4a81 },
+ { 0x36e417cbcb1b90a1, 0x33b3ddaa7f11794e, 0x3f510808885bc607, 0x24141dc0e6a8020d }
+ },
+ {
+ { -0x6e6da233427cea83, 0x3ca1205322cc8094, 0x28e57f183f90d6e4, 0x1a4714cede2e767b },
+ { 0x59f73c773fefee9d, -0x4c0e10763e306763, -0x1ca204bd1fd1aba1, 0x5766120b47a1b47c },
+ { -0x24df45f047494801, -0x48cd3c4988aee05f, -0x56d4ae3f660fd277, 0x4f3875ad489ca5f1 }
+ },
+ {
+ { 0x79ed13f6ee73eec0, -0x5a39ad9296eef44f, -0x1b76d73c79fc79f4, 0x722a1446fd7059f5 },
+ { -0x380389d0b6cd54de, 0x7ac0edf72f4c3c1b, 0x5f6b55aa9aa895e8, 0x3680274dad0a0081 },
+ { -0x2f6a6016573077e7, -0x2f566aaf7b8a5664, 0x6eac173320b09cc5, 0x628ecf04331b1095 }
+ },
+ {
+ { -0x64be5307a38b330f, -0x498cce7ef7d9adaf, -0x6636d512ee524eb9, 0x7a47d70d34ecb40f },
+ { -0x67434ee7562f2244, -0x11bb61cbf74b7fd5, -0x78f76dd947594efc, 0x685f349a45c7915d },
+ { 0x60a0c4cbcc43a4f5, 0x775c66ca3677bea9, -0x5e855e8ad0070a13, 0x11ded9020e01fdc0 }
+ },
+ {
+ { 0x471f95b03bea93b7, 0x0552d7d43313abd3, -0x426c8f1d1e81c085, 0x7b120f1db20e5bec },
+ { -0x76f187f6351018fc, -0x78d7d6921cf17394, 0x4c5cd2a392aeb1c9, 0x194263d15771531f },
+ { 0x17d2fb3d86502d7a, -0x4a9b27bbaf596cae, 0x7da962c8a60ed75d, 0x00d0f85b318736aa }
+ },
+ {
+ { -0x598ac3e10289de3f, 0x69c0b4a7445671f5, -0x68e0ad8bfa4dc3ef, 0x387bc74851a8c7cd },
+ { -0x6874ebd188837b03, -0x0bfd9bb8fa573f9e, -0x59852ae41819ed39, 0x2f7b459698dd6a33 },
+ { -0x7e76b4b2b5ad5658, -0x5226c1ed09477cd1, 0x184d8548b61bd638, 0x3f1c62dbd6c9f6cd }
+ },
+ {
+ { 0x3fad3e40148f693d, 0x052656e194eb9a72, 0x2f4dcbfd184f4e2f, 0x406f8db1c482e18b },
+ { 0x2e8f1f0091910c1f, -0x5b20b01f400d1ed4, 0x60c6560aee927438, 0x6338283facefc8fa },
+ { -0x619cf2d380e6e11c, 0x4fbf8301bc3ff670, 0x787d8e4e7afb73c4, 0x50d83d5be8f58fa5 }
+ },
+ {
+ { -0x3f53306f4b2c4993, -0x58fa621a9e8cd1a0, 0x033d1f7870c6b0ba, 0x584161cd26d946e4 },
+ { -0x7a97c6e93ee5e769, 0x2d69a4efe506d008, 0x39af1378f664bd01, 0x65942131361517c6 },
+ { -0x440d4e5f8d2d835e, -0x40c6c3a6042138fc, -0x167244311d9d47e2, 0x02eebd0b3029b589 }
+ },
+},
+{
+ {
+ { -0x789a4960847a3a18, 0x6ff0678bd168bab2, 0x3a70e77c1d330f9b, 0x3a5f6d51b0af8e7c },
+ { 0x61368756a60dac5f, 0x17e02f6aebabdc57, 0x7f193f2d4cce0f7d, 0x20234a7789ecdcf0 },
+ { 0x76d20db67178b252, 0x071c34f9d51ed160, -0x09d5b5df4c1bee90, 0x7cd682353cffe366 }
+ },
+ {
+ { -0x599a329f97530b0d, 0x42d92d183cd7e3d3, 0x5759389d336025d9, 0x3ef0253b2b2cd8ff },
+ { 0x0be1a45bd887fab6, 0x2a846a32ba403b6e, -0x266defed1691a000, 0x2838c8863bdc0943 },
+ { -0x2e944f30b5b9afd0, -0x05b694beea3a8855, -0x7d3051750b54be63, 0x21dcb8a606a82812 }
+ },
+ {
+ { -0x6572ff054188ce46, -0x7dfc9f819d61e777, -0x4d33fdc8bc0c2681, 0x5d840dbf6c6f678b },
+ { 0x5c6004468c9d9fc8, 0x2540096ed42aa3cb, 0x125b4d4c12ee2f9c, 0x0bc3d08194a31dab },
+ { 0x706e380d309fe18b, 0x6eb02da6b9e165c7, 0x57bbba997dae20ab, 0x3a4276232ac196dd }
+ },
+ {
+ { 0x3bf8c172db447ecb, 0x5fcfc41fc6282dbd, -0x7f53003f8a55ea02, 0x0770c9e824e1a9f9 },
+ { 0x4b42432c8a7084fa, -0x7675e61c20461abb, -0x4160ffde63a71ba3, 0x1ff177cea16debd1 },
+ { -0x309e2665ba4a4a03, -0x79f67b16e4c586dc, -0x18cff6e6cfc1c177, 0x39f264fd41500b1e }
+ },
+ {
+ { -0x2e64b55401f6841f, -0x5b92031e201fe6d7, -0x3c36f76bd3590e01, 0x65c621272c35f14e },
+ { -0x5852cbe824181d64, -0x426bc895d463ec64, -0x5f16e4716ca68457, 0x1712d73468889840 },
+ { -0x18d4760731ce6c23, 0x4d103356a125c0bb, 0x0419a93d2e1cfe83, 0x22f9800ab19ce272 }
+ },
+ {
+ { 0x42029fdd9a6efdac, -0x46ed3141cb5ab6bf, 0x640f64b987bdf37b, 0x4171a4d38598cab4 },
+ { 0x605a368a3e9ef8cb, -0x1c163fdd5aafb8eb, 0x553d48b05f24248f, 0x13f416cd647626e5 },
+ { -0x05d8a7556636b374, 0x23006f6fb000b807, -0x042d6e225225ac6e, 0x508214fa574bd1ab }
+ },
+ {
+ { 0x461a15bb53d003d6, -0x4defd777430c369b, 0x27c576756c683a5a, 0x3a7758a4c86cb447 },
+ { -0x3dfd96eac12901b5, -0x59a598c6aee2883c, -0x3421d9b9d3eb506c, 0x22f960ec6faba74b },
+ { 0x548111f693ae5076, 0x1dae21df1dfd54a6, 0x12248c90f3115e65, 0x5d9fd15f8de7f494 }
+ },
+ {
+ { 0x3f244d2aeed7521e, -0x71c56fd7bcd169eb, -0x1e9b4588d163e92c, 0x3bc187fa47eb98d8 },
+ { 0x031408d36d63727f, 0x6a379aefd7c7b533, -0x561e703a33511db5, 0x332f35914f8fbed3 },
+ { 0x6d470115ea86c20c, -0x6675483493b92edb, -0x2887cd4ac599fe78, 0x450d81ce906fba03 }
+ },
+},
+{
+ {
+ { 0x23264d66b2cae0b5, 0x7dbaed33ebca6576, 0x030ebed6f0d24ac8, 0x2a887f78f7635510 },
+ { -0x0751b2d527bac6fe, 0x7018058ee8db2d1d, -0x554c66a0382d3ee2, 0x53b16d2324ccca79 },
+ { 0x2a23b9e75c012d4f, 0x0c974651cae1f2ea, 0x2fb63273675d70ca, 0x0ba7250b864403f5 }
+ },
+ {
+ { -0x229ca76c79079264, 0x61699176e13a85a4, 0x2e5111954eaa7d57, 0x32c21b57fb60bdfb },
+ { -0x44f2e702fd639bdf, -0x43d2ebde76d670fe, -0x7cb8071974daf16a, 0x7b9f2fe8032d71c9 },
+ { -0x2787dc32ce61f880, -0x103b303e76888a3b, 0x4854fb129a0ab3f7, 0x12c49d417238c371 }
+ },
+ {
+ { 0x09b3a01783799542, 0x626dd08faad5ee3f, -0x45ff4311148feb61, 0x1421b246a0a444c9 },
+ { 0x0950b533ffe83769, 0x21861c1d8e1d6bd1, -0x0fdd27c7ecfd1af0, 0x2509200c6391cab4 },
+ { 0x4aa43a8e8c24a7c7, 0x04c1f540d8f05ef5, -0x5245a1f3f4c14624, 0x2ab5504448a49ce3 }
+ },
+ {
+ { -0x23f8539ce3a2c506, 0x58615171f9df8c6c, 0x72a079d89d73e2b0, 0x7301f4ceb4eae15d },
+ { 0x2ed227266f0f5dec, -0x67db11bea12af7dc, -0x7f8413836b972beb, 0x7093bae1b521e23f },
+ { 0x6409e759d6722c41, -0x598b1e308d408d65, -0x43f5db14c3de1a97, 0x390167d24ebacb23 }
+ },
+ {
+ { -0x2844fab45d0dedf5, -0x1d4631514efa7649, 0x3fe8bac8f3c0edbe, 0x4cbd40767112cb69 },
+ { 0x27f58e3bba353f1c, 0x4c47764dbf6a4361, -0x50443b1a91a9d9b0, 0x07db2ee6aae1a45d },
+ { 0x0b603cc029c58176, 0x5988e3825cb15d61, 0x2bb61413dcf0ad8d, 0x7b8eec6c74183287 }
+ },
+ {
+ { 0x32fee570fc386b73, -0x2574febe25c57339, -0x68a002f537697ca7, 0x6ee809a1b132a855 },
+ { -0x1b35bf87d32d8350, -0x25063cdc04169843, -0x4d642cb5752be162, 0x72810497626ede4d },
+ { -0x6bbb44ce030279c6, 0x2fe3690a3e4e48c5, -0x23d637982f7705db, 0x13bd1e38d173292e }
+ },
+ {
+ { 0x223fb5cf1dfac521, 0x325c25316f554450, 0x030b98d7659177ac, 0x1ed018b64f88a4bd },
+ { -0x2cd4b327969eb64b, -0x1aa6c8287e275549, 0x0bcb2127ae122b94, 0x41e86fcfb14099b0 },
+ { 0x3630dfa1b802a6b0, -0x77f078b8bd52c42b, 0x0af90d6ceec5a4d4, 0x746a247a37cdc5d9 }
+ },
+ {
+ { 0x6eccd85278d941ed, 0x2254ae83d22f7843, -0x3add2fd184403249, 0x681e3351bff0e4e2 },
+ { -0x2ace4742d484650a, 0x5005093537fc5b51, 0x232fcf25c593546d, 0x20a365142bb40f49 },
+ { -0x749b4a627cfcb0bb, 0x2f8b71f21fa20efb, 0x69249495ba6550e4, 0x539ef98e45d5472b }
+ },
+},
+{
+ {
+ { -0x2f8b2769e3518bc1, -0x0792e70a11e39c13, -0x68423aa4180b12d7, 0x4cbad279663ab108 },
+ { 0x6e7bb6a1a6205275, -0x55b0de28bec3717d, 0x6f56d155e88f5cb2, 0x2de25d4ba6345be1 },
+ { -0x7f2e6fdb5f28e033, -0x3ada3df504d77508, -0x4e5c68b4a0c59be7, 0x7d7fbcefe2007233 }
+ },
+ {
+ { -0x3283a23a0c3d6f6c, -0x387e5d65d56efa55, -0x7f39e2c9bde3cfa8, 0x4f9cd196dcd8d4d7 },
+ { -0x0510e195d994d7ff, -0x7993973b2a8c60ea, -0x0975d043e4fc89d4, 0x5975435e87b75a8d },
+ { 0x199297d86a7b3768, -0x2f2fa7dbe52e859d, -0x45fd6352a3e3f3e9, 0x7ccdd084387a0307 }
+ },
+ {
+ { -0x64f37be7989f336d, -0x3251ff85e54cd567, -0x577213799df425e8, 0x3593ca848190ca44 },
+ { -0x2359bdd392d9fbe9, -0x51eac2af6b7dbf43, -0x563f3e4b04973989, 0x428bd0ed61d0cf53 },
+ { -0x6dece765a17b6559, -0x2b273cca9a270533, -0x73adaba4ac02442f, 0x27398308da2d63e6 }
+ },
+ {
+ { -0x465ef1b3f58fdbad, 0x0fa25866d57d1bde, -0x0046264a32d82509, 0x572c2945492c33fd },
+ { 0x42c38d28435ed413, -0x42af0c9fcd873337, -0x44f854e58625fc11, 0x269597aebe8c3355 },
+ { -0x388038ba2932cf42, -0x1b20172c1c455105, -0x5dd377cf55a225f4, 0x7f985498c05bca80 }
+ },
+ {
+ { -0x2ca9eaadf0409c9d, 0x08045a45cf4dfba6, -0x113db04378c05f3e, 0x30f2653cd69b12e7 },
+ { 0x3849ce889f0be117, -0x7ffa52e484ab5d78, 0x3da3c39f23fc921c, 0x76c2ec470a31f304 },
+ { -0x75f736c7553ef37b, 0x46179b60db276bcb, -0x56df3fe1f1905390, 0x2f1273f1596473da }
+ },
+ {
+ { 0x30488bd755a70bc0, 0x06d6b5a4f1d442e7, -0x152e596143a69e9e, 0x38ac1997edc5f784 },
+ { 0x4739fc7c8ae01e11, -0x02ad8b6fb5955461, 0x41d98a8287728f2e, 0x5d9e572ad85b69f2 },
+ { 0x0666b517a751b13b, 0x747d06867e9b858c, -0x53533feebab221b7, 0x22dfcd9cbfe9e69c }
+ },
+ {
+ { 0x56ec59b4103be0a1, 0x2ee3baecd259f969, 0x797cb29413f5cd32, 0x0fe9877824cde472 },
+ { -0x72242d1f3cf2f327, -0x527199a05344bccd, -0x7094da73cdd569e1, 0x6b2916c05448c1c7 },
+ { 0x7edb34d10aba913b, 0x4ea3cd822e6dac0e, 0x66083dff6578f815, 0x4c303f307ff00a17 }
+ },
+ {
+ { 0x29fc03580dd94500, -0x132d855b9044136d, 0x130a155fc2e2a7f8, 0x416b151ab706a1d5 },
+ { -0x2cf5c429e84d737b, -0x3a2c8848c688c416, -0x39391873e195a341, 0x0d61b8f78b2ab7c4 },
+ { 0x56a8d7efe9c136b0, -0x42f81a32a71bb4e0, -0x5019d025e4a81f55, 0x191a2af74277e8d2 }
+ },
+},
+{
+ {
+ { 0x09d4b60b2fe09a14, -0x3c7b0f50244e8b82, 0x58e2ea8978b5fd6e, 0x519ef577b5e09b0a },
+ { -0x2aaff6a45490b67b, 0x04f4cd5b4fbfaf1a, -0x6271d12ed5f38ac0, 0x2bc24e04b2212286 },
+ { 0x1863d7d91124cca9, 0x7ac08145b88a708e, 0x2bcd7309857031f5, 0x62337a6e8ab8fae5 }
+ },
+ {
+ { -0x2e54cdb1e4c5ed8d, 0x18947cf181055340, 0x3b5d9567a98c196e, 0x7fa00425802e1e68 },
+ { 0x4bcef17f06ffca16, -0x21f91e2496d51e96, 0x0753702d614f42b0, 0x5f6041b45b9212d0 },
+ { 0x7d531574028c2705, -0x7fce829624f28a02, 0x30fface8ef8c8ddd, 0x7e9de97bb6c3e998 }
+ },
+ {
+ { -0x0ffb419d5db2bf23, -0x45f9a66efbad2be1, -0x7e3ba11e9d5bbdcc, 0x4cb829d8a22266ef },
+ { 0x1558967b9e6585a3, -0x6836631f6716746e, 0x10af149b6eb3adad, 0x42181fe8f4d38cfa },
+ { 0x1dbcaa8407b86681, 0x081f001e8b26753b, 0x3cd7ce6a84048e81, 0x78af11633f25f22c }
+ },
+ {
+ { 0x3241c00e7d65318c, -0x19411a232f179219, 0x118b2dc2fbc08c26, 0x680d04a7fc603dc3 },
+ { -0x7be9142bf4af4544, 0x1508722628208bee, -0x5ceb7050463e3c93, 0x0d07daacd32d7d5d },
+ { -0x063dbeb596a55c15, -0x255bd3b3fa5970df, 0x7c6c23987f93963e, 0x210e8cd30c3954e3 }
+ },
+ {
+ { 0x2b50f16137fe6c26, -0x1efd4327a91bfb28, 0x12b0f1414c561f6b, 0x51b17bc8d028ec91 },
+ { -0x53bdfe0def58e3fa, 0x6a65e0aef3bfb021, -0x43bd3ca3c6c9cd09, 0x56ea8db1865f0742 },
+ { -0x000a04b430acaee7, -0x0b67628620eef760, -0x4203159a65c45cdb, 0x18a11f1174d1a6f2 }
+ },
+ {
+ { -0x0429c3252d85a0d4, -0x0ff03b43755ef929, 0x53fb5c1a8e64a430, 0x04eaabe50c1a2e85 },
+ { 0x407375ab3f6bba29, -0x613c492766e1b7d2, -0x6637f17d1aa06d17, 0x307c13b6fb0c0ae1 },
+ { 0x24751021cb8ab5e7, -0x03dcbbb6a3afef15, 0x5f1e717b4e5610a1, 0x44da5f18c2710cd5 }
+ },
+ {
+ { -0x6ea9019476271534, -0x19486bae1dced95f, -0x428b9c26c6bb14b2, 0x726373f6767203ae },
+ { 0x033cc55ff1b82eb5, -0x4ea51c92bee351ae, -0x45bf49e67004532d, 0x768edce1532e861f },
+ { -0x1cfa358d14810976, 0x662cf31f70eadb23, 0x18f026fdb4c45b68, 0x513b5384b5d2ecbd }
+ },
+ {
+ { 0x5e2702878af34ceb, -0x6ff4fbf646b92952, 0x6512ebf7dabd8512, 0x61d9b76988258f81 },
+ { 0x46d46280c729989e, 0x4b93fbd05368a5dd, 0x63df3f81d1765a89, 0x34cebd64b9a0a223 },
+ { -0x593a58ecb64826b5, -0x5c0c2ea7dc146bba, 0x0416fbd277484834, 0x69d45e6f2c70812f }
+ },
+},
+{
+ {
+ { -0x6019d4bcb0b9f105, -0x212cfc2b59c9f82a, -0x0faddef1485f25dc, 0x237e7dbe00545b93 },
+ { -0x31e908b43ac3ebcf, 0x2b9725ce2072edde, -0x47463c904a4dc119, 0x7e2e0e450b5cc908 },
+ { 0x013575ed6701b430, 0x231094e69f0bfd10, 0x75320f1583e47f22, 0x71afa699b11155e3 }
+ },
+ {
+ { -0x15bdc3e3b8c4af2a, 0x51e87a1f3b38ef10, -0x647b40a04d36416b, 0x00731fbc78f89a1c },
+ { 0x65ce6f9b3953b61d, -0x39a7c615505ebe1a, 0x0f435ffda9f759fe, 0x021142e9c2b1c28e },
+ { -0x1bcf38e7b707e780, -0x4069f3dda1313ee7, -0x49251f7c9445ea1d, 0x4c4d6f3347e15808 }
+ },
+ {
+ { 0x2f0cddfc988f1970, 0x6b916227b0b9f51b, 0x6ec7b6c4779176be, 0x38bf9500a88f9fa8 },
+ { 0x18f7eccfc17d1fc9, 0x6c75f5a651403c14, -0x24218ed40811f321, 0x193fddaaa7e47a22 },
+ { 0x1fd2c93c37e8876f, -0x5d09e1a5e72eb9d4, 0x5080f58239241276, 0x6a6fb99ebf0d4969 }
+ },
+ {
+ { -0x114edd4a491bdc3a, -0x6c628fef0d790072, -0x6f56d57ce230a274, 0x136fda9f42c5eb10 },
+ { 0x6a46c1bb560855eb, 0x2416bb38f893f09d, -0x28e2eec8708e533f, 0x75f76914a31896ea },
+ { -0x06b3204e5cfa422f, 0x0f364b9d9ff82c08, 0x2a87d8a5c3bb588a, 0x022183510be8dcba }
+ },
+ {
+ { -0x62a58efebccf8581, -0x4f9c21613b825ba1, 0x22bbfe52be927ad3, 0x1387c441fd40426c },
+ { 0x4af766385ead2d14, -0x5f71277f3583a7d0, 0x0d13a6e610211e3d, 0x6a071ce17b806c03 },
+ { -0x4a2c3c2e78687508, 0x722b5a3d7f0e4413, 0x0d7b4848bb477ca0, 0x3171b26aaf1edc92 }
+ },
+ {
+ { -0x59f248274d75b82f, -0x5940eb29e88f5b0f, -0x2b5e076cac2242a8, 0x6c514a63344243e9 },
+ { -0x56d0ce6f68a9b358, -0x008447b3dd8a1ee7, 0x4f55fe37a4875150, 0x221fd4873cf0835a },
+ { 0x2322204f3a156341, -0x048c1f1645f5fcd3, -0x031f22b3bef0fcf2, 0x48daa596fb924aaa }
+ },
+ {
+ { 0x14f61d5dc84c9793, -0x66be061c10be7dfa, -0x320a4770cb9d8854, 0x58c837fa0e8a79a9 },
+ { 0x6eca8e665ca59cc7, -0x57b8dab4d1c75360, 0x31afc708d21e17ce, 0x676dd6fccad84af7 },
+ { 0x0cf9688596fc9058, 0x1ddcbbf37b56a01b, -0x233d1882b6ca2996, 0x1c4f73f2c6a57f0a }
+ },
+ {
+ { -0x4c918f910383cb7c, 0x73dfc9b4c3c1cf61, -0x14e2863687e3381b, 0x70459adb7daf675c },
+ { 0x0e7a4fbd305fa0bb, -0x7d62b31fab399c53, -0x0bde3c7cd01cc7b8, 0x795ac80d1bf64c42 },
+ { 0x1b91db4991b42bb3, 0x572696234b02dcca, -0x6020611ae0738724, 0x5fe162848ce21fd3 }
+ },
+},
+{
+ {
+ { 0x315c29c795115389, -0x281f1af879d08b32, 0x0c4a762185927432, 0x72de6c984a25a1e4 },
+ { -0x1d86f551b2f883bf, -0x746c7d8f248b965d, 0x6eb632dc8abd16a2, 0x720814ecaa064b72 },
+ { -0x51654aac40955cf0, 0x050a50a9806d6e1b, -0x6d448bfc5200aec7, 0x0394d27645be618b }
+ },
+ {
+ { -0x0ac69bda4dcaba5c, 0x15a7a27e98fbb296, -0x5493ad439c90227a, 0x79d995a8419334ee },
+ { 0x4d572251857eedf4, -0x1c8db1221e616c3b, -0x758ebdf1f4868fcb, 0x3b3c833687abe743 },
+ { -0x32757159ee6a228b, -0x5afb2757e22657d1, 0x540dca81a35879b6, 0x60dd16a379c86a8a }
+ },
+ {
+ { 0x3501d6f8153e47b8, -0x485698abeb5d09f4, 0x112ee8b6455d9523, 0x4e62a3c18112ea8a },
+ { 0x35a2c8487381e559, 0x596ffea6d78082cb, -0x34688e14245849ad, 0x5a08b5019b4da685 },
+ { -0x372b53fbae95487a, 0x595af3215295b23d, -0x29122dcb24fdcf3f, 0x0929efe8825b41cc }
+ },
+ {
+ { -0x74ce8d4852a99ae3, 0x01581b7a3fabd717, 0x2dc94df6424df6e4, 0x30376e5d2c29284f },
+ { 0x5f0601d1cbd0f2d3, 0x736e412f6132bb7f, -0x7c9fbbcddc722179, 0x1e3a5272f5c0753c },
+ { -0x2d6e72587ea65a64, 0x6bdc1cd93f0713f3, 0x565f7a934acd6590, 0x53daacec4cb4c128 }
+ },
+ {
+ { -0x667ad43c7ad30250, 0x2cc12e9559d6ed0b, 0x70f9e2bf9b5ac27b, 0x4f3b8c117959ae99 },
+ { 0x4ca73bd79cc8a7d6, 0x4d4a738f47e9a9b2, -0x0b340ed6bd0a0200, 0x01a13ff9bdbf0752 },
+ { 0x55b6c9c82ff26412, 0x1ac4a8c91fb667a8, -0x2ad840301488740e, 0x303337da7012a3be }
+ },
+ {
+ { -0x6892c334052d022f, -0x34777c68c859bf58, 0x2ff00c1d6734cb25, 0x269ff4dc789c2d2b },
+ { -0x6aabdddd73e36284, 0x01fac1371a9b340f, 0x7e8d9177925b48d7, 0x53f8ad5661b3e31b },
+ { 0x0c003fbdc08d678d, 0x4d982fa37ead2b17, -0x3f8194324d1a7d0f, 0x296c7291df412a44 }
+ },
+ {
+ { -0x204dcdfa25474a62, 0x465aeaa0c8092250, -0x2ecc3ee7658da2e8, 0x2327370261f117d1 },
+ { 0x7903de2b33daf397, -0x2f00f9e63659db4d, -0x75e2dad4aaa4c1e8, 0x2b6d581c52e0b7c0 },
+ { 0x3d0543d3623e7986, 0x679414c2c278a354, -0x51bc0f338d9e690a, 0x7836c41f8245eaba }
+ },
+ {
+ { -0x359ae17b7fee6c84, -0x394f3b91910be5d8, -0x48fde458a0c072ae, 0x119dff99ead7b9fd },
+ { -0x185dab24b616a57f, 0x5192d5d008b0ad73, 0x4d20e5b1d00afc07, 0x5d55f8012cf25f38 },
+ { 0x43eadfcbf4b31d4d, -0x39afc08beeeb776e, -0x0111973af9f2c4e9, 0x329293b3dd4a0ac8 }
+ },
+},
+{
+ {
+ { 0x2879852d5d7cb208, -0x4721228f97820d19, -0x23f40054de97876f, 0x2b44c043677daa35 },
+ { 0x4e59214fe194961a, 0x49be7dc70d71cd4f, -0x6cff302dc4af0dd3, 0x4789d446fc917232 },
+ { 0x1a1c87ab074eb78e, -0x05392e7166250b99, 0x3eacbbcd484f9067, 0x60c52eef2bb9a4e4 }
+ },
+ {
+ { 0x702bc5c27cae6d11, 0x44c7699b54a48cab, -0x1043bfa945b6d14e, 0x70d77248d9b6676d },
+ { 0x0b5d89bc3bfd8bf1, -0x4f946dc8360caae6, 0x0e4c16b0d53028f5, 0x10bc9c312ccfcaab },
+ { -0x557517b4c13d5fa5, -0x6796610b12e87e20, 0x794513e4708e85d1, 0x63755bd3a976f413 }
+ },
+ {
+ { 0x3dc7101897f1acb7, 0x5dda7d5ec165bbd8, 0x508e5b9c0fa1020f, 0x2763751737c52a56 },
+ { -0x4aa05fc1d52ef7ad, 0x356f75909ee63569, -0x60060e0241964770, 0x0d8cc1c48bc16f84 },
+ { 0x029402d36eb419a9, -0x0f4bb181884b9f5b, -0x30579dcf2bc3b6aa, 0x70c2dd8a7ad166e7 }
+ },
+ {
+ { -0x6e2b6982471281ed, 0x74252f0ad776817a, -0x1bf67d1ff27ada9c, 0x32b8613816a53ce5 },
+ { 0x656194509f6fec0e, -0x11d18156b939ae73, -0x68cc3e0c981f64a4, 0x2e0fac6363948495 },
+ { 0x79e7f7bee448cd64, 0x6ac83a67087886d0, -0x07602b265f1b24d2, 0x4179215c735a4f41 }
+ },
+ {
+ { -0x1b51cc46d79432cc, -0x48108149aa622924, 0x278b141fb3d38e1f, 0x31fa85662241c286 },
+ { -0x738f6b18282312d6, -0x6804753cb82c6390, -0x1ec41fcc56f926fe, 0x700344a30cd99d76 },
+ { -0x507d93bdd1c9dd0c, -0x3edfd67867ccafd3, -0x643e481ed4c76edd, 0x24bb2312a9952489 }
+ },
+ {
+ { 0x41f80c2af5f85c6b, 0x687284c304fa6794, -0x76ba20665c45e453, 0x0d1d2af9ffeb5d16 },
+ { -0x4e5712e8cd21983d, 0x3cb49418461b4948, -0x7142bcbc8930432e, 0x0fee3e871e188008 },
+ { -0x5625755ecd9de121, 0x30b822a159226579, 0x4004197ba79ac193, 0x16acd79718531d76 }
+ },
+ {
+ { -0x36a6393a87784953, -0x6b1e6152a06f0146, 0x16e24e62a342f504, 0x164ed34b18161700 },
+ { 0x72df72af2d9b1d3d, 0x63462a36a432245a, 0x3ecea07916b39637, 0x123e0ef6b9302309 },
+ { 0x487ed94c192fe69a, 0x61ae2cea3a911513, -0x7884092c465b21d9, 0x78da0fc61073f3eb }
+ },
+ {
+ { -0x5d607f0e97f3c56c, 0x71f77e151ae9e7e6, 0x1100f15848017973, 0x054aa4b316b38ddd },
+ { 0x5bf15d28e52bc66a, 0x2c47e31870f01a8e, 0x2419afbc06c28bdd, 0x2d25deeb256b173a },
+ { -0x2037b972e6d98348, 0x0b28789c66e54daf, 0x2aeb1d2a666eec17, 0x134610a6ab7da760 }
+ },
+},
+{
+ {
+ { -0x26ebcf1f23fd73c4, 0x0eb955a85217c771, 0x4b09e1ed2c99a1fa, 0x42881af2bd6a743c },
+ { -0x350aa13d83a64dc1, -0x665112c1eab2fb0e, 0x68441d72e14141f4, 0x140345133932a0a2 },
+ { 0x7bfec69aab5cad3d, -0x3dc1732cb34d3053, 0x685dd14bfb37d6a2, 0x0ad6d64415677a18 }
+ },
+ {
+ { 0x7914892847927e9f, 0x33dad6ef370aa877, 0x1f8f24fa11122703, 0x5265ac2f2adf9592 },
+ { 0x781a439e417becb5, 0x4ac5938cd10e0266, 0x5da385110692ac24, 0x11b065a2ade31233 },
+ { 0x405fdd309afcb346, -0x268dc2bbd719c0ac, -0x6b3fe20fa09a5552, 0x43e4dc3ae14c0809 }
+ },
+ {
+ { -0x1590853c523d395d, -0x2f16d709168e836c, -0x1d2c861529ba150b, 0x46dd8785c51ffbbe },
+ { -0x43ed380e56c75ae9, 0x473028ab3180b2e1, 0x3f78571efbcd254a, 0x74e534426ff6f90f },
+ { 0x709801be375c8898, 0x4b06dab5e3fd8348, 0x75880ced27230714, 0x2b09468fdd2f4c42 }
+ },
+ {
+ { 0x5b97946582ffa02a, -0x25f695ae01570ab7, -0x5f9caec8a0885065, 0x1bcfde61201d1e76 },
+ { -0x6838b61148fe346a, -0x7c0bc72b495c963d, 0x62962b8b9a402cd9, 0x6976c7509888df7b },
+ { 0x4a4a5490246a59a2, -0x29c1422117802270, -0x26bc8398f2dc8e06, 0x69e87308d30f8ed6 }
+ },
+ {
+ { 0x0f80bf028bc80303, 0x6aae16b37a18cefb, -0x22b815b828d3295d, 0x61943588f4ed39aa },
+ { 0x435a8bb15656beb0, -0x07053645b0b2a436, -0x464d873beab73f8b, 0x3eb0ef76e892b622 },
+ { -0x2d91a3c16efc607b, -0x3f161882090cc557, -0x176973aa8ff9956d, 0x3c34d1881faaaddd }
+ },
+ {
+ { -0x42a4f470d0001f27, 0x6aa254103ed24fb9, 0x2ac7d7bcb26821c4, 0x605b394b60dca36a },
+ { 0x3f9d2b5ea09f9ec0, 0x1dab3b6fb623a890, -0x5f645c158d26d93c, 0x374193513fd8b36d },
+ { -0x4b17a91ba562e12e, -0x1017b7899368565e, -0x4efb309be1a11183, 0x2f50b81c88a71c8f }
+ },
+ {
+ { 0x2b552ca0a7da522a, 0x3230b336449b0250, -0x0d3b3a435b466047, 0x7b2c674958074a22 },
+ { 0x31723c61fc6811bb, -0x634bafb79dee7ff1, 0x768933d347995753, 0x3491a53502752fcd },
+ { -0x2aae9a77c12d7321, 0x12d84fd2d362de39, 0x0a874ad3e3378e4f, 0x000d2b1f7c763e74 }
+ },
+ {
+ { -0x69db8873c16b5755, 0x0ad6f3cee9a78bec, -0x6b75387ef28bc3b1, 0x76627935aaecfccc },
+ { 0x3d420811d06d4a67, -0x4103fb7a6f1f001d, -0x078f394842b78422, 0x6e2a7316319afa28 },
+ { 0x56a8ac24d6d59a9f, -0x37248ac1cf690ffa, 0x477f41e68f4c5299, 0x588d851cf6c86114 }
+ },
+},
+{
+ {
+ { -0x32d59a18882e0aeb, 0x548991878faa60f1, -0x4e48c4432543f91b, 0x654878cba97cc9fb },
+ { 0x51138ec78df6b0fe, 0x5397da89e575f51b, 0x09207a1d717af1b9, 0x2102fdba2b20d650 },
+ { -0x69611bfafaa3195f, 0x36bca7681251ad29, 0x3a1af517aa7da415, 0x0ad725db29ecb2ba }
+ },
+ {
+ { -0x013843f364fa907b, 0x537d5268e7f5ffd7, 0x77afc6624312aefa, 0x4f675f5302399fd9 },
+ { -0x23bd984e7cb1dba9, -0x498abb4a8f31e43b, 0x1af07a0bf7d15ed7, 0x4aefcffb71a03650 },
+ { -0x3cd2c9c9fbeae8e2, -0x32d410ee7667b7c5, -0x78f591522f6baef0, 0x0bccbb72a2a86561 }
+ },
+ {
+ { 0x186d5e4c50fe1296, -0x1fc6847d01176082, 0x3bc7f6c5507031b0, 0x6678fd69108f37c2 },
+ { 0x185e962feab1a9c8, -0x791819ca9aeb8233, -0x4f6d1fce44a4920e, 0x4024f0ab59d6b73e },
+ { 0x1586fa31636863c2, 0x07f68c48572d33f2, 0x4f73cc9f789eaefc, 0x2d42e2108ead4701 }
+ },
+ {
+ { 0x21717b0d0f537593, -0x6eb196f4ece1f9b4, 0x1bb687ae752ae09f, 0x420bf3a79b423c6e },
+ { -0x680aecea6b202d65, 0x6155985d313f4c6a, -0x145ec0f8f7baaff0, 0x676b2608b8d2d322 },
+ { -0x7ec7459ae3a4d4b9, -0x798e4913cee4e480, 0x7bff0cb1bc3135b0, 0x745d2ffa9c0cf1e0 }
+ },
+ {
+ { 0x6036df5721d34e6a, -0x4e2477d866844c30, -0x2c3df63c378a9506, 0x06e15be54c1dc839 },
+ { -0x40ada5e1d4363743, -0x15a4d9f7d9b8627f, -0x2aee38f120feaa25, 0x1ae23ceb960cf5d0 },
+ { 0x5b725d871932994a, 0x32351cb5ceb1dab0, 0x7dc41549dab7ca05, 0x58ded861278ec1f7 }
+ },
+ {
+ { 0x2dfb5ba8b6c2c9a8, 0x48eeef8ef52c598c, 0x33809107f12d1573, 0x08ba696b531d5bd8 },
+ { -0x27e8c86c0d993aa4, -0x3736893a33bab1b7, 0x5ce382f8bc26c3a8, 0x2ff39de85485f6f9 },
+ { 0x77ed3eeec3efc57a, 0x04e05517d4ff4811, -0x15c285c00e598e35, 0x120633b4947cfe54 }
+ },
+ {
+ { -0x7d42ceb8b6edeff6, -0x21dc8492819041fa, -0x1ee189e6ee15863a, 0x07433be3cb393bde },
+ { 0x0b94987891610042, 0x4ee7b13cecebfae8, 0x70be739594f0a4c0, 0x35d30a99b4d59185 },
+ { -0x0086bb3fa316680c, 0x575d3de4b05c51a3, 0x583381fd5a76847c, 0x2d873ede7af6da9f }
+ },
+ {
+ { -0x559dfd1eb1a2067f, -0x5df2a6e8afea1e0b, 0x18a275d3bae21d6c, 0x0543618a01600253 },
+ { 0x157a316443373409, -0x054748110b557e27, -0x4f6c01190a59b7fa, 0x2e773654707fa7b6 },
+ { 0x0deabdf4974c23c1, -0x5590f5da6231b96d, 0x04202cb8a29aba2c, 0x4b1443362d07960d }
+ },
+},
+{
+ {
+ { 0x299b1c3f57c5715e, -0x69346d6194979270, 0x3004806447235ab3, 0x2c435c24a44d9fe1 },
+ { 0x47b837f753242cec, 0x256dc48cc04212f2, -0x1ddd04041e26d73b, 0x48ea295bad8a2c07 },
+ { 0x0607c97c80f8833f, 0x0e851578ca25ec5b, 0x54f7450b161ebb6f, 0x7bcb4792a0def80e }
+ },
+ {
+ { 0x1cecd0a0045224c2, 0x757f1b1b69e53952, 0x775b7a925289f681, 0x1b6cc62016736148 },
+ { -0x7b781c2fd438c9a7, 0x4baf8445059979df, -0x2e8368a523529041, 0x57369f0bdefc96b6 },
+ { -0x0e5666fe8a9c7968, 0x353dd1beeeaa60d3, -0x7b6b8eccb3645b78, 0x63fa6e6843ade311 }
+ },
+ {
+ { 0x2195becdd24b5eb7, 0x5e41f18cc0cd44f9, -0x20d7f8bbbe356122, 0x07073b98f35b7d67 },
+ { -0x2ea3dfac9a683e98, -0x608c8bff672d7877, 0x18aee7f13257ba1f, 0x3418bfda07346f14 },
+ { -0x2fc39893b31acf2c, 0x0b64c0473b5df9f4, 0x065cef8b19b3a31e, 0x3084d661533102c9 }
+ },
+ {
+ { -0x6593178989fcde03, 0x7fe2b5109eb63ad8, 0x00e7d4ae8ac80592, 0x73d86b7abb6f723a },
+ { -0x1e094861407b9653, 0x15801004e2663135, -0x65b67ccf508be7e5, 0x3ba2504f049b673c },
+ { 0x0b52b5606dba5ab6, -0x56ecb0f0444e1255, 0x30a9520d9b04a635, 0x6813b8f37973e5db }
+ },
+ {
+ { -0x0e6b35a90cea81d7, 0x136d35705ef528a5, -0x22b3108874fa6644, 0x7d5472af24f833ed },
+ { -0x67ab4fabccbed83f, 0x105d047882fbff25, -0x24b60806bbe790b1, 0x1768e838bed0b900 },
+ { -0x2f1078b250cc25b9, 0x00d3be5db6e339f9, 0x3f2a8a2f9c9ceece, 0x5d1aeb792352435a }
+ },
+ {
+ { 0x12c7bfaeb61ba775, -0x47b19de01d9c4003, 0x0b47a5c35c840dcf, 0x7e83be0bccaf8634 },
+ { -0x0a61944ce6329c36, 0x670c159221d06839, -0x4f92a9a4deaf354a, 0x20fb199d104f12a3 },
+ { 0x61943dee6d99c120, -0x79efe0d1b9f46020, 0x6bb2f1518ee8598d, 0x76b76289fcc475cc }
+ },
+ {
+ { 0x4245f1a1522ec0b3, 0x558785b22a75656d, 0x1d485a2548a1b3c0, 0x60959eccd58fe09f },
+ { 0x791b4cc1756286fa, -0x24312ce828b5ea84, 0x7e732421ea72bde6, 0x01fe18491131c8e9 },
+ { 0x3ebfeb7ba8ed7a09, 0x49fdc2bbe502789c, 0x44ebce5d3c119428, 0x35e1eb55be947f4a }
+ },
+ {
+ { 0x14fd6dfa726ccc74, 0x3b084cfe2f53b965, -0x0cc51b0aad5d374c, 0x59aab07a0d40166a },
+ { -0x242518fe3a8c722d, -0x063909ca4d90e412, 0x61e96a8042f15ef4, 0x3aa1d11faf60a4d8 },
+ { 0x77bcec4c925eac25, 0x1848718460137738, 0x5b374337fea9f451, 0x1865e78ec8e6aa46 }
+ },
+},
+{
+ {
+ { -0x6983ab16e3ad6335, 0x30f6269264c635fb, 0x2747aff478121965, 0x17038418eaf66f5c },
+ { -0x333b48384991e086, 0x44157e25f50c2f7e, 0x3ef06dfc713eaf1c, 0x582f446752da63f7 },
+ { -0x39ce842cdfcdb31c, -0x57efbd175bb7743c, -0x4de10e74b1a5ec9c, 0x0c2a1c4bcda28dc9 }
+ },
+ {
+ { -0x123b7eb7964296bb, 0x0d6d907dbe1c8d22, -0x39c42ded2aa33a55, 0x5a6a9b30a314dc83 },
+ { -0x2db2382f90e0fbb9, -0x4dd961c124783fa7, -0x2ea4fd8d044d2d71, 0x7c558bd1c6f64877 },
+ { -0x2f13eadb2c69b9c3, 0x12bb628ac35a24f0, -0x5af3c586e343a05c, 0x0404a5ca0afbafc3 }
+ },
+ {
+ { 0x62bc9e1b2a416fd1, -0x4a3908d71cafa675, 0x04343fd83d5d6967, 0x39527516e7f8ee98 },
+ { -0x73e0bff8f558bc2a, -0x33452f34a4d9a118, 0x574b046b668fd2de, 0x46395bfdcadd9633 },
+ { 0x117fdb2d1a5d9a9c, -0x6388ba432effa3d6, -0x102b410eab2a9016, 0x76579a29e822d016 }
+ },
+ {
+ { 0x333cb51352b434f2, -0x27cdd7b66c217f1f, -0x4aaed7788af2ca32, 0x02c514bb2a2777c1 },
+ { 0x45b68e7e49c02a17, 0x23cd51a2bca9a37f, 0x3ed65f11ec224c1b, 0x43a384dc9e05bdb1 },
+ { 0x684bd5da8bf1b645, -0x04742c81094ab4ad, 0x313916d7a9b0d253, 0x1160920961548059 }
+ },
+ {
+ { 0x7a385616369b4dcd, 0x75c02ca7655c3563, 0x7dc21bf9d4f18021, 0x2f637d7491e6e042 },
+ { -0x4bb2e996d6253056, -0x25ad60b37beca671, -0x16109c35bac2aaa7, 0x351e125bc5698e0b },
+ { -0x2b4b64b9e5098442, -0x29fcfc853754769f, 0x71dee19ff9a699fb, 0x7f182d06e7ce2a9a }
+ },
+ {
+ { 0x09454b728e217522, -0x55a7170b2b7b4728, -0x2ca7dab280b96fc4, 0x44acc043241c5217 },
+ { 0x7a7c8e64ab0168ec, -0x34a5b5aaea123abd, 0x095519d347cd0eda, 0x67d4ac8c343e93b0 },
+ { 0x1c7d6bbb4f7a5777, -0x74ca012b6e7cec1f, 0x4adca1c6c96b4684, 0x556d1c8312ad71bd }
+ },
+ {
+ { -0x7e0f98a94ee417df, 0x0faff82310a3f3dd, -0x074d2faa9566b9a3, 0x097abe38cc8c7f05 },
+ { 0x17ef40e30c8d3982, 0x31f7073e15a3fa34, 0x4f21f3cb0773646e, 0x746c6c6d1d824eff },
+ { 0x0c49c9877ea52da4, 0x4c4369559bdc1d43, 0x022c3809f7ccebd2, 0x577e14a34bee84bd }
+ },
+ {
+ { -0x6b01314142b228d5, -0x0b95b025f9f0ddef, 0x124a5977c0c8d1ff, 0x705304b8fb009295 },
+ { -0x0f1d97539e58c4f6, -0x0d0505efc86e5a0b, -0x3e1ec17d9492ff17, 0x60fa7ee96fd78f42 },
+ { -0x49c2e2cab2d6913a, -0x0c3cfac1a052ce28, 0x670b958cb4bd42ec, 0x21398e0ca16353fd }
+ },
+},
+{
+ {
+ { -0x793a03e979e48166, -0x095ccfb895d83baf, 0x01667267a1e93597, 0x05ffb9cd6082dfeb },
+ { 0x216ab2ca8da7d2ef, 0x366ad9dd99f42827, -0x519b46ffb022c38b, 0x403a395b53909e62 },
+ { -0x59e805600ac09ec7, 0x60f2b5e513e66cb6, -0x285741104cbb755c, 0x7a2932856f5ea192 }
+ },
+ {
+ { -0x4763bbb7869c6cfe, 0x4ae4f19350c67f2c, -0x0f4ca25737e5063a, 0x39d0003546871017 },
+ { 0x0b39d761b02de888, 0x5f550e7ed2414e1f, -0x59405ba7dd1e56c0, 0x050a2f7dfd447b99 },
+ { 0x437c3b33a650db77, 0x6bafe81dbac52bb2, -0x0166bfd2d2482ce8, 0x2b5b7eec372ba6ce }
+ },
+ {
+ { -0x596bbfb29ec5370c, 0x500c3c2bfa97e72c, -0x78befb2de0313df0, 0x1b205fb38604a8ee },
+ { -0x4c43b4427c0af111, 0x508f0c998c927866, 0x43e76587c8b7e66e, 0x0f7655a3a47f98d9 },
+ { 0x55ecad37d24b133c, 0x441e147d6038c90b, 0x656683a1d62c6fee, 0x0157d5dc87e0ecae }
+ },
+ {
+ { -0x6ad9aaeb28e14adc, -0x19fc277ea20eba6d, 0x147cdf410d4de6b7, 0x5293b1730437c850 },
+ { -0x0d5850aefcab3ec3, -0x285f4eba55c8d4a0, 0x2869b96a05a3d470, 0x6528e42d82460173 },
+ { 0x23d0e0814bccf226, -0x6d38ba327e69046d, -0x749e8693a6abe1a5, 0x40a44df0c021f978 }
+ },
+ {
+ { -0x793691aeb43a2f6b, -0x0df2bf6703597fb6, 0x27363d89c826ea5d, 0x39ca36565719cacf },
+ { -0x25579676b0df1596, -0x15eb5c2eb39df9e8, 0x6001fccb090bf8be, 0x35f4e822947e9cf0 },
+ { -0x68af90d0907848a4, -0x39db515ffcb51f90, 0x1ec856e3aad34dd6, 0x055b0be0e440e58f }
+ },
+ {
+ { 0x4d12a04b6ea33da2, 0x57cf4c15e36126dd, -0x6f13698a11bb2699, 0x64ca348d2a985aac },
+ { 0x6469a17d89735d12, -0x2490d82a199d460f, -0x60345cd795c6a97f, 0x363b8004d269af25 },
+ { -0x66a771e61b3b6ed3, -0x1033c4b1e35a3195, 0x4522ea60fa5b98d5, 0x7064bbab1de4a819 }
+ },
+ {
+ { -0x5d6f3f9ebdabded7, -0x0d1d3d514172a470, -0x30dba724895401e5, 0x02157ade83d626bf },
+ { -0x46e61eaea588f9bf, -0x565d1d38b1807fc7, 0x7527250b3df23109, 0x756a7330ac27b78b },
+ { 0x3e46972a1b9a038b, 0x2e4ee66a7ee03fb4, -0x7e5db78891244b36, 0x1a944ee88ecd0563 }
+ },
+ {
+ { -0x44bf57a6e7dc9d2a, -0x4660aa8875b2e545, -0x72e74bd88a7aa60a, 0x26c20fe74d26235a },
+ { -0x2a56e2eeaefc6c8e, 0x2ed377b799ca26de, -0x5e8dfd5302c99495, 0x0730291bd6901995 },
+ { 0x648d1d9fe9cc22f5, 0x66bc561928dd577c, 0x47d3ed21652439d1, 0x49d271acedaf8b49 }
+ },
+},
+{
+ {
+ { 0x2798aaf9b4b75601, 0x5eac72135c8dad72, -0x2d31559e9e485fdd, 0x1bbfb284e98f7d4e },
+ { -0x760afa75c7d4cc0d, 0x5ae2ba0bad48c0b4, -0x706c4afc5ac24c92, 0x5aa3ed9d95a232e6 },
+ { 0x656777e9c7d96561, -0x34d4edab8d387fca, 0x65053299d9506eee, 0x4a07e14e5e8957cc }
+ },
+ {
+ { 0x240b58cdc477a49b, -0x02c725219bb80fe9, 0x19928d32a7c86aad, 0x50af7aed84afa081 },
+ { 0x4ee412cb980df999, -0x5cea2890c391388f, -0x445a12216da38803, 0x3f0bac391d313402 },
+ { 0x6e4fde0115f65be5, 0x29982621216109b2, 0x780205810badd6d9, 0x1921a316baebd006 }
+ },
+ {
+ { -0x28a55265260c3e75, 0x566a0eef60b1c19c, 0x3e9a0bac255c0ed9, 0x7b049deca062c7f5 },
+ { -0x76bdd08120478f04, 0x2c296beb4f76b3bd, 0x0738f1d436c24df7, 0x6458df41e273aeb0 },
+ { -0x23341c85cabbbb7d, 0x758879330fedbe93, 0x786004c312c5dd87, 0x6093dccbc2950e64 }
+ },
+ {
+ { 0x6bdeeebe6084034b, 0x3199c2b6780fb854, -0x68cc895449d2f96b, 0x6e3180c98b647d90 },
+ { 0x1ff39a8585e0706d, 0x36d0a5d8b3e73933, 0x43b9f2e1718f453b, 0x57d1ea084827a97c },
+ { -0x118549185ed74f8f, -0x5b3ea6926c577456, -0x084b217d4dde9ed0, 0x363e999ddd97bd18 }
+ },
+ {
+ { 0x2f1848dce24baec6, 0x769b7255babcaf60, -0x6f34c391c31016cf, 0x231f979bc6f9b355 },
+ { -0x6957bc3eca11e03c, -0x68914caaf71b3731, -0x4bd097fe4a732cd0, 0x48ee9b78693a052b },
+ { 0x5c31de4bcc2af3c6, -0x4fb44fcf01df72e1, -0x48728ff63eb04b9a, 0x079bfa9b08792413 }
+ },
+ {
+ { -0x0c36127f5d2abdbb, 0x0aa08b7877f63952, -0x2892539c2ef7ab8b, 0x1ef4fb159470636b },
+ { -0x1c6fc5ae25cff20c, -0x7bc69bdcc256a550, -0x12c30ed2f4ca9b80, 0x038c77f684817194 },
+ { -0x7ab1a119a4e98414, 0x59590a4296d0cdc2, 0x72b2df3498102199, 0x575ee92a4a0bff56 }
+ },
+ {
+ { 0x5d46bc450aa4d801, -0x3c50edd85acc4628, 0x389e3b262b8906c2, 0x200a1e7e382f581b },
+ { -0x2b3f7f6f75e7d031, 0x30e170c299489dbd, 0x05babd5752f733de, 0x43d4e7112cd3fd00 },
+ { 0x518db967eaf93ac5, 0x71bc989b056652c0, -0x01d47a26a98e680b, 0x050eca52651e4e38 }
+ },
+ {
+ { -0x6853c6899f199716, -0x64e64401eac54b69, 0x4cb179b534eca79f, 0x6151c09fa131ae57 },
+ { -0x3cbce521bac0f364, -0x160afba1008fc465, -0x03268536127b84c3, 0x4b0ee6c21c58f4c6 },
+ { 0x3af55c0dfdf05d96, -0x22d9d11fd54b1186, 0x11b2bb8712171709, 0x1fef24fa800f030b }
+ },
+},
+{
+ {
+ { -0x006e59956fe99de0, -0x0ddaad51a40e1ff7, 0x7dff85d87f90df7c, 0x4f620ffe0c736fb9 },
+ { -0x4b69edc5949399f7, -0x58af017a7f54a6c8, -0x0b8e40c6483d85a1, 0x507903ce77ac193c },
+ { 0x62f90d65dfde3e34, -0x30d73a6d4605a053, -0x6637910639e9baf0, 0x25d448044a256c84 }
+ },
+ {
+ { 0x2c7c4415c9022b55, 0x56a0d241812eb1fe, -0x0fd15e362849a1f3, 0x4180512fd5323b26 },
+ { -0x4297dcf138164e91, 0x0eb1b9c1c1c5795d, 0x7943c8c495b6b1ff, 0x2f9faf620bbacf5e },
+ { -0x5b00c19675b75a25, -0x4595c7f9426abfc5, -0x60831e50b82a49a3, 0x15e087e55939d2fb }
+ },
+ {
+ { -0x776be7910469c0c8, 0x48a00e80dc639bd5, -0x5b17f6d41693e367, 0x5a097d54ca573661 },
+ { 0x12207543745c1496, -0x2500c30225c79ef4, -0x1b1868d8d38e3cb1, 0x39c07b1934bdede9 },
+ { 0x2d45892b17c9e755, -0x2fcc028d76cf7208, 0x6c2fe9d9525b8bd9, 0x2edbecf1c11cc079 }
+ },
+ {
+ { -0x11f0f0222f785da1, -0x638aceaaa3c1cb12, 0x660c572e8fab3ab5, 0x0854fc44544cd3b2 },
+ { 0x1616a4e3c715a0d2, 0x53623cb0f8341d4d, -0x6910acd638176635, 0x3d4e8dbba668baa6 },
+ { 0x61eba0c555edad19, 0x24b533fef0a83de6, 0x3b77042883baa5f8, 0x678f82b898a47e8d }
+ },
+ {
+ { 0x1e09d94057775696, -0x112ed9a3c326ae25, -0x056253d4df431e91, 0x0f7f76e0e8d089f4 },
+ { -0x4eb6e2f4296ff3ac, 0x3539722c9d132636, 0x4db928920b362bc9, 0x4d7cd1fea68b69df },
+ { 0x36d9ebc5d485b00c, -0x5da69b6d1b524c9b, -0x3e9a6b7f3dee6333, 0x45306349186e0d5f }
+ },
+ {
+ { -0x695beb13d4f8db6f, 0x1bb2218127a7b65b, 0x6d2849596e8a4af0, 0x65f3b08ccd27765f },
+ { -0x6b222f3e593200e3, 0x55f6f115e84213ae, 0x6c935f85992fcf6a, 0x067ee0f54a37f16f },
+ { -0x134d6000e667fe09, -0x62c9e2e05d5f08d1, 0x25f11d2375fd2f49, 0x124cefe80fe10fe2 }
+ },
+ {
+ { 0x1518e85b31b16489, -0x70552348248ef405, 0x39b0bdf4a14ae239, 0x05f4cbea503d20c1 },
+ { 0x4c126cf9d18df255, -0x3e2b8e16eb859c4a, 0x2c6d3c73f3c93b5f, 0x6be3a6a2e3ff86a2 },
+ { -0x31fbf1613fbeba44, -0x38e00b1df7097cb4, -0x42ab91725477b85d, 0x64666aa0a4d2aba5 }
+ },
+ {
+ { -0x4f3ac408ccc816b4, 0x7cb5697e11e14f15, 0x4b84abac1930c750, 0x28dd4abfe0640468 },
+ { 0x6841435a7c06d912, -0x35edc3de44c07cf5, -0x2b4c84d84e341d88, 0x1d753b84c76f5046 },
+ { 0x7dc0b64c44cb9f44, 0x18a3e1ace3925dbf, 0x7a3034862d0457c4, 0x4c498bf78a0c892e }
+ },
+},
+{
+ {
+ { 0x22d2aff530976b86, -0x726f47f93d2db9fc, -0x235e7693b21a451b, 0x28005fe6c8340c17 },
+ { 0x37d653fb1aa73196, 0x0f9495303fd76418, -0x52dff4f604c5e84e, 0x544d49292fc8613e },
+ { 0x6aefba9f34528688, 0x5c1bff9425107da1, -0x08a444329926b4ca, 0x72e472930f316dfa }
+ },
+ {
+ { 0x07f3f635d32a7627, 0x7aaa4d865f6566f0, 0x3c85e79728d04450, 0x1fee7f000fe06438 },
+ { 0x2695208c9781084f, -0x4eafd5f4dcbaf11f, -0x02625159fc1021fe, 0x5a9d2e8c2733a34c },
+ { 0x765305da03dbf7e5, -0x5b250db6ebcb3243, 0x7b4ad5cdd24a88ec, 0x00f94051ee040543 }
+ },
+ {
+ { -0x28106c44f85068ad, 0x583ed0cf3db766a7, -0x3196674091f4e13b, 0x47b7ffd25dd40452 },
+ { -0x72ca94dc3c2ccf4e, -0x0de374644fb8e4fa, -0x4c93ce9391bd47c4, 0x07d79c7e8beab10d },
+ { -0x7804046343f722ee, -0x75f994c51e113d65, 0x0d57242bdb1fc1bf, 0x1c3520a35ea64bb6 }
+ },
+ {
+ { -0x325790bfde943fa7, 0x1fbb231d12bcd87e, -0x4b6a9561e838f670, 0x38750c3b66d12e55 },
+ { -0x7f2dac5943345cb6, 0x3e61c3a13838219b, -0x6f3c49fe677d1c6a, 0x1c3d05775d0ee66f },
+ { 0x692ef1409422e51a, -0x343f38c3d4a2098f, 0x21014fe7744ce029, 0x0621e2c7d330487c }
+ },
+ {
+ { -0x4851e8694f240f0d, 0x54dfafb9e17ce196, 0x25923071e9aaa3b4, 0x5d8e589ca1002e9d },
+ { -0x50679f337da67c73, -0x6f15b73e39606524, 0x6526483765581e30, 0x0007d6097bd3a5bc },
+ { -0x3f40e26af7bd56b5, -0x4d2c3c9ca770d1c2, 0x0a961438bb51e2ef, 0x1583d7783c1cbf86 }
+ },
+ {
+ { -0x6ffcb8fb3362d739, 0x1d1b679ef72cc58f, 0x16e12b5fbe5b8726, 0x4958064e83c5580a },
+ { -0x13115d10a25d851f, 0x597c3a1455670174, -0x3659d5ed99f6e986, 0x252a5f2e81ed8f70 },
+ { 0x0d2894265066e80d, -0x033c087acf837395, 0x1b53da780c1112fd, 0x079c170bd843b388 }
+ },
+ {
+ { -0x322932af3f2a2faa, -0x6508979244fca8c5, 0x3ca6723ff3c3ef48, 0x6768c0d7317b8acc },
+ { 0x0506ece464fa6fff, -0x411cbce19dfa1add, 0x3579422451b8ea42, 0x6dec05e34ac9fb00 },
+ { -0x6b49da1a0eaa3e4d, 0x417bf3a7997b7b91, -0x3dd342239294da00, 0x51445e14ddcd52f4 }
+ },
+ {
+ { -0x76ceb854d4415bab, -0x73ac5db06df86ed7, 0x4b49f948be30f7a7, 0x12e990086e4fd43d },
+ { 0x57502b4b3b144951, -0x71980094bbb4434d, -0x474296d8e99c7a25, 0x13186f31e39295c8 },
+ { -0x0ef3694c802044d2, -0x60656ca1ede31507, -0x20eec93bc5a467c1, 0x77b2e3f05d3e99af }
+ },
+},
+{
+ {
+ { -0x6acd0b7033a32d65, 0x2ba851bea3ce3671, 0x32dacaa051122941, 0x478d99d9350004f2 },
+ { -0x02f28a78630ed9a9, -0x17d0106b1ac5f1d7, -0x33cb580fa444b419, 0x0b251172a50c38a2 },
+ { 0x1d5ad94890bb02c0, 0x50e208b10ec25115, -0x5d95dd76b10de8fe, 0x4dc923343b524805 }
+ },
+ {
+ { 0x3ad3e3ebf36c4975, -0x28a2da5ac879dedb, -0x178c6bc25fda5aea, 0x6bbc7cb4c411c847 },
+ { -0x1c7d73bff07f794a, 0x3f77e6f7979f0dc8, 0x7ef6de304df42cb4, 0x5265797cb6abd784 },
+ { 0x3c6f9cd1d4a50d56, -0x49dbbf8839015482, 0x6ff9bf483580972e, 0x00375883b332acfb }
+ },
+ {
+ { -0x3674137a938a3664, -0x1bbe7b3fff1cc30c, 0x0a676b9bba907634, 0x669e2cb571f379d7 },
+ { 0x0001b2cd28cb0940, 0x63fb51a06f1c24c9, -0x4a52796e232a35cf, 0x67238dbd8c450660 },
+ { -0x34ee948c5b642cf8, 0x025aad6b2392729e, -0x4b86c105c0aa264f, 0x72a1056140678bb9 }
+ },
+ {
+ { 0x0d8d2909e2e505b6, -0x673587543fd6edd0, 0x77ef5569a9b12327, 0x7c77897b81439b47 },
+ { -0x5d497ed4e336db63, 0x62866eee21211f58, 0x2cb5c5b85df10ece, 0x03a6b259e263ae00 },
+ { -0x0e3e4a1d21cce34b, 0x5a9f5d8e15fca420, -0x605bc70e8426cd4f, 0x2a381bf01c6146e7 }
+ },
+ {
+ { -0x083f41cd4acbe991, 0x27e6ca6419cf70d4, -0x6cb2082856a858a7, 0x5701461dabdec2aa },
+ { -0x536467863037ee3f, -0x7482d67ec8a91a99, 0x50da4e607c70edfc, 0x5dbca62f884400b6 },
+ { 0x2c6747402c915c25, 0x1bdcd1a80b0d340a, 0x5e5601bd07b43f5f, 0x2555b4e05539a242 }
+ },
+ {
+ { 0x78409b1d87e463d4, -0x52b256a532049c63, -0x13d788c8aada6464, 0x69c806e9c31230ab },
+ { 0x6fc09f5266ddd216, -0x231a9f58371c8fb8, -0x139a6c625d209d03, 0x7a869ae7e52ed192 },
+ { 0x7b48f57414bb3f22, 0x68c7cee4aedccc88, -0x12d06c9e86127f42, 0x25d70b885f77bc4b }
+ },
+ {
+ { -0x67ba62d644e51b2c, 0x56b9c4c739f954ec, -0x7cd8bc093d64b4c2, 0x21ea8e2798b6878a },
+ { 0x4151c3d9762bf4de, 0x083f435f2745d82b, 0x29775a2e0d23ddd5, 0x138e3a6269a5db24 },
+ { -0x78410b4b95a58464, -0x2dd662e4a03e2f9e, -0x7dbf67e722cde9b8, 0x5c5abeb1e5a2e03d }
+ },
+ {
+ { 0x02cde6de1306a233, 0x7b5a52a2116f8ec7, -0x1e397e0b3ee9c4a5, 0x241d350660d32643 },
+ { 0x14722af4b73c2ddb, -0x43b8f3a0a5faf9f3, 0x00943eac2581b02e, 0x0e434b3b1f499c8f },
+ { 0x6be4404d0ebc52c7, -0x51b9dcc44e586e0b, 0x2aec170ed25db42b, 0x1d8dfd966645d694 }
+ },
+},
+{
+ {
+ { -0x2a679c63ed224f5c, -0x5a2e60cf3fdb7995, -0x2e83d0fca7031ba0, 0x07a195152e095e8a },
+ { 0x296fa9c59c2ec4de, -0x43749e40b07b0c35, 0x1c7706d917a8f908, 0x63b795fc7ad3255d },
+ { -0x57c970fdc761a038, -0x6fbcc4fd30721bc5, -0x505e02a23abed9bd, 0x3e8fe83d032f0137 }
+ },
+ {
+ { 0x08704c8de8efd13c, -0x203ae571cc1fc8cf, -0x5a62a25aed9f321d, 0x22d60899a6258c86 },
+ { 0x2f8b15b90570a294, -0x6b0dbd8f98f7bab7, -0x21e3a51e9e44027c, 0x75ba3b797fac4007 },
+ { 0x6239dbc070cdd196, 0x60fe8a8b6c7d8a9a, -0x4c77b84314bfeda0, 0x0904d07b87779e5e }
+ },
+ {
+ { -0x0bcdd299b706bf47, 0x06952f0cbd2d0c39, 0x167697ada081f931, 0x6240aacebaf72a6c },
+ { -0x4b31e02b22456e64, -0x30ce24c138b37256, 0x2c63cc63ad86cc51, 0x43e2143fbc1dde07 },
+ { -0x07cb8b63a45d6a60, -0x296b83a435c82da6, 0x66f13ba7e7c9316a, 0x56bdaf238db40cac }
+ },
+ {
+ { 0x1310d36cc19d3bb2, 0x062a6bb7622386b9, 0x7c9b8591d7a14f5c, 0x03aa31507e1e5754 },
+ { 0x362ab9e3f53533eb, 0x338568d56eb93d40, -0x61f1ebade2a5aa8e, 0x1d24a86d83741318 },
+ { -0x0b1389b7002b31e1, -0x1fba150fab5373e4, -0x772dda7de2f6ca84, 0x43b261dc9aeb4859 }
+ },
+ {
+ { 0x19513d8b6c951364, -0x6b018ed9fff40b85, 0x028d10ddd54f9567, 0x02b4d5e242940964 },
+ { -0x1aa4e1e677448645, -0x5f612f823e85ca63, -0x4fd3d11d9fc215cd, 0x326055cf5b276bc2 },
+ { -0x4b5eaa34d72e720e, -0x1533b9b9e7931af8, -0x3b630b6c937dbc77, 0x27a6c809ae5d3410 }
+ },
+ {
+ { -0x32d3d8f53bc296ac, -0x22b5c1a89599354e, 0x79fa592469d7036c, 0x221503603d8c2599 },
+ { -0x74591432e0f24e78, 0x37d3d73a675a5be8, -0x0dd1205cea0aa7a6, 0x2cb67174ff60a17e },
+ { 0x59eecdf9390be1d0, -0x56bddfbb8d731c0f, -0x7d76e399856b0f0c, 0x7b1df4b73890f436 }
+ },
+ {
+ { 0x5f2e221807f8f58c, -0x1caaa3602b6bf62c, -0x4d555772e04959d0, 0x68698245d352e03d },
+ { -0x1b6d0d1f4c4d5ddc, 0x7c6c9e062b551160, 0x15eb8fe20d7f7b0e, 0x61fcef2658fc5992 },
+ { -0x244ea27ad5e7e786, -0x0c1b552c79225329, 0x44bae2810ff6c482, 0x46cf4c473daf01cf }
+ },
+ {
+ { 0x213c6ea7f1498140, 0x7c1e7ef8392b4854, 0x2488c38c5629ceba, 0x1065aae50d8cc5bb },
+ { 0x426525ed9ec4e5f9, 0x0e5eda0116903303, 0x72b1a7f2cbe5cadc, 0x29387bcd14eb5f40 },
+ { 0x1c2c4525df200d57, 0x5c3b2dd6bfca674a, 0x0a07e7b1e1834030, 0x69a198e64f1ce716 }
+ },
+},
+{
+ {
+ { 0x7b26e56b9e2d4734, -0x3b38ecd47e39e98b, -0x10a36ada13632181, 0x39c80b16e71743ad },
+ { 0x7afcd613efa9d697, 0x0cc45aa41c067959, -0x5a901efb3e05256a, 0x3a73b70472e40365 },
+ { 0x0f196e0d1b826c68, -0x08e00f1db69f1c25, 0x6113167023b7436c, 0x0cf0ea5877da7282 }
+ },
+ {
+ { -0x1ccd312bc4596ba6, -0x21f4ec9e177e3fa3, 0x1ad40f095e67ed3b, 0x5da8acdab8c63d5d },
+ { 0x196c80a4ddd4ccbd, 0x22e6f55d95f2dd9d, -0x38a1cc38bf2938e5, 0x7bb51279cb3c042f },
+ { -0x3b4999b5c58fea61, 0x76194f0f0a904e14, -0x5a9eb3c65bf693ed, 0x6cd0ff50979feced }
+ },
+ {
+ { 0x7fecfabdb04ba18e, -0x2f038403c4224309, -0x5be2b791fa85ece4, 0x641a4391f2223a61 },
+ { -0x3f1f981870bbd754, 0x14835ab0a61135e3, -0x0de2eb0cc7f9d6cb, 0x6390a4c8df04849c },
+ { -0x3a3946a559f95725, -0x6eb480614f97da0f, 0x2a731f6b44fc9eff, 0x30ddf38562705cfc }
+ },
+ {
+ { 0x33bef2bd68bcd52c, -0x39b6244f96b7d10e, -0x4a4911f3be34e512, 0x5c294d270212a7e5 },
+ { 0x4e3dcbdad1bff7f9, -0x36ee717ddf9ba8e9, -0x45333143f0e762aa, 0x1b4822e9d4467668 },
+ { -0x54c9f580daa9c87f, 0x2512228a480f7958, -0x38a2fad89eeb4b1d, 0x222d9625d976fe2a }
+ },
+ {
+ { 0x0f94be7e0a344f85, -0x14d05573780dd3c8, -0x631e18a1b11e90f1, 0x43e64e5418a08dea },
+ { 0x1c717f85b372ace1, -0x7e6cf196b9c740e8, 0x239cad056bc08b58, 0x0b34271c87f8fff4 },
+ { -0x7eaa1dade5ca319d, -0x41eff2b206edfd72, -0x4007f4075a822314, 0x57342dc96d6bc6e4 }
+ },
+ {
+ { -0x0c3c4348e18f840a, 0x351d9b8c7291a762, 0x00502e6edad69a33, 0x522f521f1ec8807f },
+ { -0x10110f9a3731a668, -0x40fd6aef4a34155e, -0x739b5ef9df483ba8, 0x35134fb231c24855 },
+ { 0x272c1f46f9a3902b, -0x36e45c48669a8434, -0x519eb4cfb075e3f2, 0x7afcaad70b99017b }
+ },
+ {
+ { -0x577ebe13107bd495, 0x55e7b14797abe6c5, -0x738b7068fc87b002, 0x5b50a1f7afcd00b7 },
+ { -0x3da212ab5b4741bf, -0x6fd2ec1ee44f1d23, 0x41f43233cde82ab2, 0x1085faa5c3aae7cb },
+ { -0x647bf0990ec9eceb, 0x18462242701003e9, 0x65ed45fae4a25080, 0x0a2862393fda7320 }
+ },
+ {
+ { -0x69f18c84913462e9, -0x050db6b72983151f, 0x37e7a9b4d55e1b89, 0x5cb7173cb46c59eb },
+ { 0x46ab13c8347cbc9d, 0x3849e8d499c12383, 0x4cea314087d64ac9, 0x1f354134b1a29ee7 },
+ { 0x4a89e68b82b7abf0, -0x0be326d864594847, 0x16e6c210e18d876f, 0x7cacdb0f7f1b09c6 }
+ },
+},
+{
+ {
+ { -0x1efebbcb233a3513, 0x47ed5d963c84fb33, 0x70019576ed86a0e7, 0x25b2697bd267f9e4 },
+ { -0x6f9d4d1f26e58744, 0x47c9889cc8509667, -0x620ab599bfaf8f48, 0x7369e6a92493a1bf },
+ { -0x6298c004ec67979c, 0x3ca5fbd9415dc7b8, -0x1fb133c420d8c4a2, 0x1420683db54e4cd2 }
+ },
+ {
+ { 0x34eebb6fc1cc5ad0, 0x6a1b0ce99646ac8b, -0x2c4f25b6599421ad, 0x31e83b4161d081c1 },
+ { -0x4b8742e1db622e69, 0x620c35005e58c102, -0x04fd2cd0334553a4, 0x60b63bebf508a72d },
+ { -0x681738ed61f9d4b1, 0x49e48f4f29320ad8, 0x5bece14b6f18683f, 0x55cf1eb62d550317 }
+ },
+ {
+ { 0x3076b5e37df58c52, -0x28c54622186633ca, -0x427ce31cb6ec11e0, 0x1a56fbaa62ba0133 },
+ { 0x5879101065c23d58, -0x7462f792af6b7e64, -0x1dbfd056ed3aa059, 0x669a6564570891d4 },
+ { -0x6bc194afa3623614, 0x302557bba77c371a, -0x678c51a9becb89af, 0x13c4836799c58a5c }
+ },
+ {
+ { -0x3b230495a2742f80, -0x21143b13a8e5b7be, -0x2b4d177c471aac9b, 0x50bdc87dc8e5b827 },
+ { 0x423a5d465ab3e1b9, -0x03ec3e78380ec09f, 0x19f83664ecb5b9b6, 0x66f80c93a637b607 },
+ { 0x606d37836edfe111, 0x32353e15f011abd9, 0x64b03ac325b73b96, 0x1dd56444725fd5ae }
+ },
+ {
+ { -0x3d6819fff7453766, 0x7d4cea11eae1c3e0, -0x0c1c741e60186884, 0x3a3a450f63a305cd },
+ { -0x705b8007cc9ded83, -0x4360953b8e3283eb, 0x6e71454349220c8b, 0x0e645912219f732e },
+ { 0x078f2f31d8394627, 0x389d3183de94a510, -0x2e1c9392e8669080, 0x318c8d9393a9a87b }
+ },
+ {
+ { 0x5d669e29ab1dd398, -0x036de9a7cbd261c5, 0x55851dfdf35973cd, 0x509a41c325950af6 },
+ { -0x0d8ba2fcd50001e7, 0x0c9f3c497f24db66, -0x43672c1c457a6711, 0x224c7c679a1d5314 },
+ { -0x423f91235906da17, 0x793ef3f4641b1f33, -0x7d13ed7f627cc177, 0x05bff02328a11389 }
+ },
+ {
+ { 0x6881a0dd0dc512e4, 0x4fe70dc844a5fafe, 0x1f748e6b8f4a5240, 0x576277cdee01a3ea },
+ { 0x3632137023cae00b, 0x544acf0ad1accf59, -0x698befb62de5e378, 0x780b8cc3fa2a44a7 },
+ { 0x1ef38abc234f305f, -0x65a88042ebfa21f8, 0x5e82a51434e62a0d, 0x5ff418726271b7a1 }
+ },
+ {
+ { -0x1a24b817ec496ac0, -0x0ca2d5c4bcd9ef1f, -0x53e0d916c787ed8a, 0x29d4db8ca0a0cb69 },
+ { 0x398e080c1789db9d, -0x589fdfda0c18870b, -0x056776b3f942fca3, 0x106a03dc25a966be },
+ { -0x2652f550ccccac30, 0x38669da5acd309e5, 0x3c57658ac888f7f0, 0x4ab38a51052cbefa }
+ },
+},
+{
+ {
+ { -0x09701d177f621fac, -0x1c43f695637d452f, 0x076353d40aadbf45, 0x7b9b1fb5dea1959e },
+ { -0x20253411bcdb3f17, 0x054442883f955bb7, -0x2108555715ce9f61, 0x68aee70642287cff },
+ { -0x0fe3370e8b8e33f4, -0x6adbd1c8a86f7d45, 0x27776093d3e46b5f, 0x2d13d55a28bd85fb }
+ },
+ {
+ { -0x40fe6331851185ae, -0x57212d491bab152d, 0x3c619f0b87a8bb19, 0x3619b5d7560916d8 },
+ { -0x053a2df9a4ca4726, -0x572575657a9db449, -0x332d356ec2de32f1, 0x6b8341ee8bf90d58 },
+ { 0x3579f26b0282c4b2, 0x64d592f24fafefae, -0x48321284d7373840, 0x6a927b6b7173a8d7 }
+ },
+ {
+ { -0x728fbf79c1317715, -0x0f1cf8567f113f74, -0x53ddaf9ef2877026, 0x056d92a43a0d478d },
+ { 0x1f6db24f986e4656, 0x1021c02ed1e9105b, -0x0700c000d33f5c8b, 0x1d2a6bf8c6c82592 },
+ { 0x1b05a196fc3da5a1, 0x77d7a8c243b59ed0, 0x06da3d6297d17918, 0x66fbb494f12353f7 }
+ },
+ {
+ { -0x2928f6690edcf62a, -0x2404dc7a163c2ac7, 0x46d602b0f7552411, 0x270a0b0557843e0c },
+ { 0x751a50b9d85c0fb8, -0x2e5023da7430f685, 0x2f16a6a38309a969, 0x14ddff9ee5b00659 },
+ { 0x61ff0640a7862bcc, -0x7e353f65a0ee5402, -0x6fb87cfbaa2ed545, 0x19a4bde1945ae873 }
+ },
+ {
+ { 0x40c709dec076c49f, 0x657bfaf27f3e53f6, 0x40662331eca042c4, 0x14b375487eb4df04 },
+ { -0x6460d90adf59dff6, 0x64804443cf13eaf8, -0x759c98c079ce122d, 0x72bbbce11ed39dc1 },
+ { -0x517ac36b549923b9, -0x149dcbc12089d292, -0x0f71f1e7904d082f, 0x4f0b1c02700ab37a }
+ },
+ {
+ { 0x79fd21ccc1b2e23f, 0x4ae7c281453df52a, -0x37e8d1362eaeb795, 0x68abe9443e0a7534 },
+ { -0x1e8f987827e6ae06, -0x5ef5d3714d6f3885, -0x18c7d05fc129988d, 0x0a4d84710bcc4b54 },
+ { -0x25ed393bf87ce235, 0x0da230d74d5c510d, 0x4ab1531e6bd404e1, 0x4106b166bcf440ef }
+ },
+ {
+ { -0x5b7a332ac61b130e, 0x5aa3f3ad0555bab5, 0x145e3439937df82d, 0x1238b51e1214283f },
+ { 0x02e57a421cd23668, 0x4ad9fb5d0eaef6fd, -0x6ab198d84edbbb80, 0x7f792f9d2699f331 },
+ { 0x0b886b925fd4d924, 0x60906f7a3626a80d, -0x132c984b467542ee, 0x2876beb1def344cf }
+ },
+ {
+ { -0x2a6b4cccc5757a08, 0x4ea37689e78d7d58, 0x73bf9f455e8e351f, 0x5507d7d2bc41ebb4 },
+ { -0x237b16ca9cebb96f, 0x632fe8a0d61f23f4, 0x4caa800612a9a8d5, 0x48f9dbfa0e9918d3 },
+ { 0x1ceb2903299572fc, 0x7c8ccaa29502d0ee, -0x6e405bcbee331985, 0x5784481964a831e7 }
+ },
+},
+{
+ {
+ { -0x29302e10a0223f64, -0x17d4c10208a8a232, 0x25d56b5d201634c2, 0x3041c6bb04ed2b9b },
+ { -0x2583d4da98972a6d, -0x673e3fa8bbdd35ed, -0x0e57f42a35f531e3, 0x29cdd1adc088a690 },
+ { 0x0ff2f2f9d956e148, -0x5218688a60ca94d2, 0x1a4698bb5f6c025c, 0x104bbd6814049a7b }
+ },
+ {
+ { -0x56a265a029800e9d, -0x16d41962b338a97f, -0x4807fdb321df0da9, 0x204f2a20fb072df5 },
+ { 0x51f0fd3168f1ed67, 0x2c811dcdd86f3bc2, 0x44dc5c4304d2f2de, 0x5be8cc57092a7149 },
+ { -0x37ebc4c2cf144f87, 0x7589155abd652e30, 0x653c3c318f6d5c31, 0x2570fb17c279161f }
+ },
+ {
+ { 0x192ea9550bb8245a, -0x37190457706faf2f, 0x7986ea2d88a4c935, 0x241c5f91de018668 },
+ { 0x3efa367f2cb61575, -0x0a069089e329fd94, -0x1738ebd59a4ada9e, 0x3dcb65ea53030acd },
+ { 0x28d8172940de6caa, -0x7040d30fdd268cc6, 0x16d7fcdd235b01d1, 0x08420edd5fcdf0e5 }
+ },
+ {
+ { 0x0358c34e04f410ce, -0x49eca4a5d891f97b, 0x5d9670c7ebb91521, 0x04d654f321db889c },
+ { -0x3200df547c9d05b6, 0x57e118d4e21a3e6e, -0x1ce869e803c619d5, 0x0d9a53efbc1769fd },
+ { 0x5e7dc116ddbdb5d5, 0x2954deb68da5dd2d, 0x1cb608173334a292, 0x4a7a4f2618991ad7 }
+ },
+ {
+ { 0x24c3b291af372a4b, -0x6c257d8f8e7eb80e, -0x227b7a9b7976610e, 0x4a96314223e0ee33 },
+ { -0x0b58e7fda04ea06b, 0x3df65f346b5c1b8f, -0x32030f7aff1feeee, 0x11b50c4cddd31848 },
+ { -0x5917d8bbf75b002a, 0x738e177e9c1576d9, 0x773348b63d02b3f2, 0x4f4bce4dce6bcc51 }
+ },
+ {
+ { 0x30e2616ec49d0b6f, -0x1ba98e703513dce9, 0x48eb409bf26b4fa6, 0x3042cee561595f37 },
+ { -0x58e031a51ddbda7c, 0x26ea725692f58a9e, -0x2de5f628e315c30c, 0x73fcdd14b71c01e6 },
+ { 0x427e7079449bac41, -0x7aa51c92431dcef6, 0x4cae76215f841a7c, 0x389e740c9a9ce1d6 }
+ },
+ {
+ { -0x36428709a8f153d8, -0x1aa4f4cdd86e631f, 0x65fc3eaba19b91ed, 0x25c425e5d6263690 },
+ { 0x64fcb3ae34dcb9ce, -0x68affcdc1cb72f53, 0x45b3f07d62c6381b, 0x61545379465a6788 },
+ { 0x3f3e06a6f1d7de6e, 0x3ef976278e062308, -0x73eb09d9b1759389, 0x6539a08915484759 }
+ },
+ {
+ { -0x223b242beb44b5e7, 0x19b2bc3c98424f8e, 0x48a89fd736ca7169, 0x0f65320ef019bd90 },
+ { -0x162de08b3c2d088d, -0x3eafabbeda3b97bb, 0x624e5ce8f9b99e33, 0x11c5e4aac5cd186c },
+ { -0x2b792e4e35021f3a, 0x4f3fe6e3163b5181, 0x59a8af0dfaf2939a, 0x4cabc7bdec33072a }
+ },
+},
+{
+ {
+ { -0x083f5e63e5ab5fbc, 0x4a1c5e2477bd9fbb, -0x591c35eea50dd68e, 0x1819bb953f2e9e0d },
+ { 0x16faa8fb532f7428, -0x242bd15fb95b1d8e, 0x5337653b8b9ea480, 0x4065947223973f03 },
+ { 0x498fbb795e042e84, 0x7d0dd89a7698b714, -0x7404f45bd8019d6b, 0x36ba82e721200524 }
+ },
+ {
+ { -0x372962f5a8d8b12b, 0x45ba803260804b17, -0x20c325efddaa2054, 0x77d221232709b339 },
+ { -0x29f13448bdba13bf, -0x02641761cbcb78ea, -0x36dbf5011bdd7b22, 0x4472f648d0531db4 },
+ { 0x498a6d7064ad94d8, -0x5a4a37026509dd9d, -0x735712faba3ebe0c, 0x2c63bec3662d358c }
+ },
+ {
+ { -0x65ae74c57a790741, -0x6118e509344e6910, -0x55f9da195dc7a30e, 0x1deb2176ddd7c8d1 },
+ { 0x7fe60d8bea787955, -0x4623ee814a0bfe49, -0x6e383f65e6caa332, 0x22692ef59442bedf },
+ { -0x7a9c2e65df993094, 0x401bfd8c4dcc7cd7, -0x2689594132f2709e, 0x67cfd773a278b05e }
+ },
+ {
+ { 0x2d5fa9855a4e586a, 0x65f8f7a449beab7e, -0x55f8b2220de2cc2d, 0x185cba721bcb9dee },
+ { -0x7213ce0510c11b8b, -0x6624007561dd026e, 0x512d11594e26cab1, 0x0cde561eec4310b9 },
+ { -0x6c79625c0b1c34bf, -0x40fc6d0abf086882, 0x026204fcd0463b83, 0x3ec91a769eec6eed }
+ },
+ {
+ { 0x0fad2fb7b0a3402f, 0x46615ecbfb69f4a8, -0x08ba43373a07155a, 0x7a5fa8794a94e896 },
+ { 0x1e9df75bf78166ad, 0x4dfda838eb0cd7af, -0x45ffd1273e150678, 0x13fedb3e11f33cfc },
+ { 0x52958faa13cd67a1, -0x69a11f7e74244ae9, 0x16e58daa2e8845b3, 0x357d397d5499da8f }
+ },
+ {
+ { 0x481dacb4194bfbf8, 0x4d77e3f1bae58299, 0x1ef4612e7d1372a0, 0x3a8d867e70ff69e1 },
+ { 0x1ebfa05fb0bace6c, -0x36cb9df3e35065e2, -0x3388e33be27d49e6, 0x2d94a16aa5f74fec },
+ { 0x6f58cd5d55aff958, -0x45c155a38aa988df, 0x75c123999165227d, 0x69be1343c2f2b35e }
+ },
+ {
+ { -0x7d44425397b4721d, -0x5d0b382fc035f8e8, 0x337f92fbe096aaa8, 0x200d4d8c63587376 },
+ { 0x0e091d5ee197c92a, 0x4f51019f2945119f, 0x143679b9f034e99c, 0x7d88112e4d24c696 },
+ { 0x208aed4b4893b32b, 0x3efbf23ebe59b964, -0x289d214f245a1af9, 0x69607bd681bd9d94 }
+ },
+ {
+ { 0x3b7f3bd49323a902, 0x7c21b5566b2c6e53, -0x1a45700ac587ad59, 0x28bc77a5838ece00 },
+ { -0x0941fdef9721e31f, -0x172ae718f12343e1, -0x1c10022fe4aafa5b, 0x35f63353d3ec3fd0 },
+ { 0x63ba78a8e25d8036, 0x63651e0094333490, 0x48d82f20288ce532, 0x3a31abfa36b57524 }
+ },
+},
+{
+ {
+ { -0x3f708770c0872d77, -0x01cf58d35ebfb261, -0x0d887403309a3363, 0x7ee498165acb2021 },
+ { 0x239e9624089c0a2e, -0x38b73b3fc501b8c8, 0x17dbed2a764fa12a, 0x639b93f0321c8582 },
+ { 0x7bd508e39111a1c3, 0x2b2b90d480907489, -0x182d513d518d02e7, 0x0edf493c85b602a6 }
+ },
+ {
+ { 0x6767c4d284764113, -0x5f6fbfc0080a07cb, 0x1c8fcffacae6bede, 0x04c00c54d1dfa369 },
+ { -0x51337ea7a664a598, -0x15a8b0f014521df2, 0x4fe41d7422b67f07, 0x403b92e3019d4fb4 },
+ { 0x4dc22f818b465cf8, 0x71a0f35a1480eff8, -0x51174052fb3829a9, 0x355bb12ab26176f4 }
+ },
+ {
+ { -0x5cfe2538a5738ce8, -0x126ffc624c3155ef, 0x6f077cbf3bae3f2d, 0x7518eaf8e052ad8e },
+ { -0x58e19b338b6c440c, -0x1a427b26135c4f3d, 0x0a6bc50cfa05e785, 0x0f9b8132182ec312 },
+ { -0x5b77a63be48093ce, 0x0f2d60bcf4383298, 0x1815a929c9b1d1d9, 0x47c3871bbb1755c4 }
+ },
+ {
+ { -0x0419a2af37af9950, 0x62ecc4b0b3a299b0, -0x1ac8ab15bbe51720, 0x08fea02ce8d48d5f },
+ { 0x5144539771ec4f48, -0x07fa4e823673a292, -0x089d3ee5b83c3995, 0x00b89b85764699dc },
+ { -0x7db2228997211530, -0x379bbadfb497a2dd, -0x4aeb3032a276299b, 0x473829a74f75d537 }
+ },
+ {
+ { 0x23d9533aad3902c9, 0x64c2ddceef03588f, 0x15257390cfe12fb4, 0x6c668b4d44e4d390 },
+ { -0x7d2d258ab9863be8, -0x19c428274d9e7210, 0x355eef24ac47eb0a, 0x2078684c4833c6b4 },
+ { 0x3b48cf217a78820c, -0x0895f54d7ed8c169, -0x56939a5873711285, 0x7411a6054f8a433f }
+ },
+ {
+ { 0x579ae53d18b175b4, 0x68713159f392a102, -0x7baa1345e110ca0b, 0x1ec9a872458c398f },
+ { 0x4d659d32b99dc86d, 0x044cdc75603af115, -0x4cb38ed3233d1b78, 0x7c136574fb8134ff },
+ { -0x47195b2bff5daf65, -0x647e28fdf4377d4c, 0x57e7cc9bf1957561, 0x3add88a5c7cd6460 }
+ },
+ {
+ { -0x7a3d672ba6c6cfba, -0x7081ca67a009a614, 0x1d2ca22af2f66e3a, 0x61ba1131a406a720 },
+ { -0x5476a88f49ca230e, 0x02dfef6cf66c1fbc, -0x7aacfd9741492e79, 0x249929fccc879e74 },
+ { -0x5c2f5f0ee96a6fd7, 0x023b6b6cba7ebd89, 0x7bf15a3e26783307, 0x5620310cbbd8ece7 }
+ },
+ {
+ { 0x6646b5f477e285d6, 0x40e8ff676c8f6193, -0x59138cee544a6b23, 0x7ec846f3658cec4d },
+ { 0x528993434934d643, -0x462407f95aeddd0b, -0x709278703c0be3de, 0x37676a2a4d9d9730 },
+ { -0x64a170c0e25dd139, 0x130f1d776c01cd13, 0x214c8fcfa2989fb8, 0x6daaf723399b9dd5 }
+ },
+},
+{
+ {
+ { -0x7e514422d32ecf90, -0x69d1bcda07a5f162, -0x216c6e5535200135, 0x53177fda52c230e6 },
+ { 0x591e4a5610628564, 0x2a4bb87ca8b4df34, -0x21d5da8d185c71bd, 0x3cbdabd9fee5046e },
+ { -0x584368f9af462187, 0x3d12a7fbc301b59b, 0x02652e68d36ae38c, 0x79d739835a6199dc }
+ },
+ {
+ { 0x21c9d9920d591737, -0x6415be2d164b932a, -0x1df17bdff2764036, 0x79d99f946eae5ff8 },
+ { -0x26cab209bece3e43, 0x758094a186ec5822, 0x4464ee12e459f3c2, 0x6c11fce4cb133282 },
+ { -0x0e84b7ca9798cdfb, 0x387deae83caad96c, 0x61b471fd56ffe386, 0x31741195b745a599 }
+ },
+ {
+ { 0x17f8ba683b02a047, 0x50212096feefb6c8, 0x70139be21556cbe2, 0x203e44a11d98915b },
+ { -0x172efe6f4885c9f5, -0x66467cdf666a18fe, -0x42b0200705fdb856, 0x2772e344e0d36a87 },
+ { -0x2979c145c8461c61, 0x105bc169723b5a23, 0x104f6459a65c0762, 0x567951295b4d38d4 }
+ },
+ {
+ { 0x07242eb30d4b497f, 0x1ef96306b9bccc87, 0x37950934d8116f45, 0x05468d6201405b04 },
+ { 0x535fd60613037524, -0x1def52094f043d96, -0x5372f564dc166f52, 0x47204d08d72fdbf9 },
+ { 0x00f565a9f93267de, -0x313028723f2a7176, -0x5dea1d230ce71d72, 0x4599ee919b633352 }
+ },
+ {
+ { -0x538b929479e51a87, 0x31ab0650f6aea9dc, 0x241d661140256d4c, 0x2f485e853d21a5de },
+ { -0x2c3ddf358f1f1895, -0x4ed415a71560cf6c, 0x294ddec8c3271282, 0x0c3539e1a1d1d028 },
+ { 0x329744839c0833f3, 0x6fe6257fd2abc484, 0x5327d1814b358817, 0x65712585893fe9bc }
+ },
+ {
+ { -0x7e3d60e428f711c1, -0x2234a5fa519bf830, -0x68513e282d5c1459, 0x1590521a91d50831 },
+ { -0x63efd048cd59ee9f, -0x1b71ef22cb2adf58, 0x365c63546f9a9176, 0x32f6fe4c046f6006 },
+ { 0x40a3a11ec7910acc, -0x6fec20070e92d852, 0x1a9720d8abb195d4, 0x1bb9fe452ea98463 }
+ },
+ {
+ { -0x30a1936a33c98b84, 0x294201536b0bc30d, 0x453ac67cee797af0, 0x5eae6ab32a8bb3c9 },
+ { -0x162e26af4c2ab062, 0x2d5f9cbee00d33c1, 0x51c2c656a04fc6ac, 0x65c091ee3c1cbcc9 },
+ { 0x7083661114f118ea, 0x2b37b87b94349cad, 0x7273f51cb4e99f40, 0x78a2a95823d75698 }
+ },
+ {
+ { -0x4b0dc3bda107cdf9, -0x54076b2c3656cb4b, -0x2f8f73ecc6027809, 0x1876789117166130 },
+ { -0x5d4f8d16a373d532, 0x69cffc96651e9c4b, 0x44328ef842e7b42b, 0x5dd996c122aadeb3 },
+ { -0x6da4a10f98f3af84, -0x7e6437bd46c3cc41, 0x10792e9a70dd003f, 0x59ad4b7a6e28dc74 }
+ },
+},
+{
+ {
+ { 0x583b04bfacad8ea2, 0x29b743e8148be884, 0x2b1e583b0810c5db, 0x2b5449e58eb3bbaa },
+ { 0x5f3a7562eb3dbe47, -0x0815c7ab71425f48, 0x00c3e53145747299, 0x1304e9e71627d551 },
+ { 0x789814d26adc9cfe, 0x3c1bab3f8b48dd0b, -0x25f01e00068639f6, 0x4468de2d7c2dd693 }
+ },
+ {
+ { 0x4b9ad8c6f86307ce, 0x21113531435d0c28, -0x2b57993a9a8588d4, 0x5da6427e63247352 },
+ { 0x51bb355e9419469e, 0x33e6dc4c23ddc754, -0x6c5a4929bb80669e, 0x6cce7c6ffb44bd63 },
+ { 0x1a94c688deac22ca, -0x46f991084451e008, -0x775273c772a6a7f1, 0x58f29abfe79f2ca8 }
+ },
+ {
+ { 0x4b5a64bf710ecdf6, -0x4eb31ac7b9d3d6c4, 0x3643d056d50b3ab9, 0x6af93724185b4870 },
+ { -0x16f130547218c198, 0x54036f9f377e76a5, -0x0fb6a4f441fea67e, 0x577629c4a7f41e36 },
+ { 0x3220024509c6a888, -0x2d1fc9ecb4aa768d, -0x7c1dc9dcc3ccd761, 0x701f25bb0caec18f }
+ },
+ {
+ { -0x62e7092683413eed, -0x7bb5f9198b40241c, 0x20f5b522ac4e60d6, 0x720a5bc050955e51 },
+ { -0x3c574f071b9e9313, -0x08ff99f161da5783, 0x61e3061ff4bca59c, 0x2e0c92bfbdc40be9 },
+ { 0x0c3f09439b805a35, -0x17b174c89dbd5404, 0x691417f35c229346, 0x0e9b9cbb144ef0ec }
+ },
+ {
+ { -0x7211642aa24e4112, -0x363c54c8f58dc047, 0x44a8f1bf1c68d791, 0x366d44191cfd3cde },
+ { -0x04452b7004a8df53, -0x117e6e942406f2f2, -0x2b7ecead9caabc41, 0x221104eb3f337bd8 },
+ { -0x61c3e8bc0d4373ec, 0x2eda26fcb5856c3b, -0x3347d0f197580469, 0x4167a4e6bc593244 }
+ },
+ {
+ { -0x3d41d99a07317012, -0x169800eb177f29d4, -0x0ed19181d0c9b112, 0x34b33370cb7ed2f6 },
+ { 0x643b9d2876f62700, 0x5d1d9d400e7668eb, 0x1b4b430321fc0684, 0x7938bb7e2255246a },
+ { -0x323a6e11797e2934, -0x31fdef63127a58ad, -0x128b7a3ea77f777d, 0x1176fc6e2dfe65e4 }
+ },
+ {
+ { -0x246f1d76b688f148, -0x670433d5530bbf5d, 0x21354ffeded7879b, 0x1f6a3e54f26906b6 },
+ { -0x4b50932fa4639e65, 0x2ddfc9f4b2a58480, 0x3d4fa502ebe94dc4, 0x08fc3a4c677d5f34 },
+ { 0x60a4c199d30734ea, 0x40c085b631165cd6, -0x1dccc1dc08a67d6b, 0x4f2fad0116b900d1 }
+ },
+ {
+ { -0x69d326e248c449c8, -0x19fa885503ed63f8, 0x6f619b39f3b61689, 0x3451995f2944ee81 },
+ { 0x44beb24194ae4e54, 0x5f541c511857ef6c, -0x59e194d2c972fb68, 0x445484a4972ef7ab },
+ { -0x6ead032f60158284, 0x4a816c94b0935cf6, 0x258e9aaa47285c40, 0x10b89ca6042893b7 }
+ },
+},
+{
+ {
+ { -0x29832129862cb560, -0x33f4613f33b24c61, -0x5aca5ba91ca2e6f1, 0x2e05d9eaf61f6fef },
+ { -0x64d5bd91c49b9fdb, 0x32127190385ce4cf, -0x5da3003d229215bb, 0x06409010bea8de75 },
+ { -0x3bb86fe529e414a7, 0x661f19bce5dc880a, 0x24685482b7ca6827, 0x293c778cefe07f26 }
+ },
+ {
+ { 0x16c795d6a11ff200, -0x348f2f1d4ea7ea37, -0x760d6cdf64ac6a4b, 0x50b8c2d031e47b4f },
+ { -0x797f618ff8f96f6a, -0x5528a4ea1b1afe77, 0x07f35715a21a0147, 0x0487f3f112815d5e },
+ { 0x48350c08068a4962, 0x6ffdd05351092c9a, 0x17af4f4aaf6fc8dd, 0x4b0553b53cdba58b }
+ },
+ {
+ { -0x40fadee4d83ead2c, 0x5ec26849bd1af639, 0x5e0b2caa8e6fab98, 0x054c8bdd50bd0840 },
+ { -0x639a0341e4cd0087, -0x148a1560fc4af065, -0x0312d59393f819fa, 0x35106cd551717908 },
+ { 0x38a0b12f1dcf073d, 0x4b60a8a3b7f6a276, -0x012a53da2cbfb066, 0x72e82d5e5505c229 }
+ },
+ {
+ { 0x00d9cdfd69771d02, 0x410276cd6cfbf17e, 0x4c45306c1cb12ec7, 0x2857bf1627500861 },
+ { 0x6b0b697ff0d844c8, -0x44ed07a3268634b7, -0x2d5abe393e25f0e1, 0x7b7c242958ce7211 },
+ { -0x60de6fc0fefe9762, -0x2886202c4079effb, -0x5edd11a0c214f0e5, 0x510df84b485a00d4 }
+ },
+ {
+ { 0x24b3c887c70ac15e, -0x4f0c5aa8047e48ce, -0x64d321d01a8733e5, 0x4cf7ed0703b54f8e },
+ { -0x5abecc446d885e06, 0x74ec3b6263991237, 0x1a3c54dc35d2f15a, 0x2d347144e482ba3a },
+ { 0x6bd47c6598fbee0f, -0x61b8cc1d54aa41d3, 0x1093f624127610c5, 0x4e05e26ad0a1eaa4 }
+ },
+ {
+ { 0x1833c773e18fe6c0, -0x1c3b8ee52c378d9b, 0x3bfd3c4f0116b283, 0x1955875eb4cd4db8 },
+ { -0x2564949db4ace0e0, 0x429a760e77509abb, -0x24160add17dc3480, 0x618f1856880c8f82 },
+ { 0x6da6de8f0e399799, 0x7ad61aa440fda178, -0x4cd327efa1ca9c23, 0x15f6beae2ae340ae }
+ },
+ {
+ { -0x4565f0846dba1deb, -0x0c979ed22673f245, 0x2e84e4cbf220b020, 0x6ba92fe962d90eda },
+ { -0x79d434f3ce13c59e, -0x7ef1d4baeec70c3e, 0x788ec4b839dac2a4, 0x28f76867ae2a9281 },
+ { 0x3e4df9655884e2aa, -0x429d0424242b9a5b, -0x28a69355f2161adc, 0x6e8042ccb2b1b3d7 }
+ },
+ {
+ { 0x1530653616521f7e, 0x660d06b896203dba, 0x2d3989bc545f0879, 0x4b5303af78ebd7b0 },
+ { -0x0ef2c3d631d73592, -0x452cbabf0349f6c3, -0x18bd91285d15d2c1, 0x08af9d4e4ff298b9 },
+ { 0x72f8a6c3bebcbde8, 0x4f0fca4adc3a8e89, 0x6fa9d4e8c7bfdf7a, 0x0dcf2d679b624eb7 }
+ },
+},
+{
+ {
+ { 0x753941be5a45f06e, -0x2f8351129263a09b, 0x11776b9c72ff51b6, 0x17d2d1d9ef0d4da9 },
+ { 0x3d5947499718289c, 0x12ebf8c524533f26, 0x0262bfcb14c3ef15, 0x20b878d577b7518e },
+ { 0x27f2af18073f3e6a, -0x02c01ae628adef97, 0x22e3b72c3ca60022, 0x72214f63cc65c6a7 }
+ },
+ {
+ { 0x1d9db7b9f43b29c9, -0x29fa7db5b0ae708b, -0x0d3f8d42ced0623c, 0x1f24ac855a1545b0 },
+ { -0x4b1c80bfacf8596d, -0x5458eb28d0cc986b, -0x29042f588c89ef67, 0x5fdf48c58171cbc9 },
+ { 0x24d608328e9505aa, 0x4748c1d10c1420ee, -0x38001ba3f904da5e, 0x00ba739e2ae395e6 }
+ },
+ {
+ { -0x51bbd90a157744da, 0x360679d984973bfb, 0x5c9f030c26694e50, 0x72297de7d518d226 },
+ { 0x592e98de5c8790d6, -0x1a40482cba3d5d21, 0x115a3b60f9b49922, 0x03283a3e67ad78f3 },
+ { 0x48241dc7be0cb939, 0x32f19b4d8b633080, -0x2c2036f2fdd76cf8, 0x05e1296846271945 }
+ },
+ {
+ { -0x52404437dbd3bab0, -0x4337f3132fcf7e27, -0x7bca99590a37206e, 0x78cf25d38258ce4c },
+ { -0x457d114cd263b6a6, -0x311037030ed44684, -0x4fd254516c4a2e20, 0x39c00c9c13698d9b },
+ { 0x15ae6b8e31489d68, -0x557ae35463d40f79, -0x3658a5680fb105fb, 0x006b52076b3ff832 }
+ },
+ {
+ { -0x0a3481e94631f7d3, 0x3407f14c417abc29, -0x2b4c9431d40b5855, 0x7de2e9561a9f75ce },
+ { 0x29e0cfe19d95781c, -0x497e20e7699cef1e, 0x57df39d370516b39, 0x4d57e3443bc76122 },
+ { -0x218f2b0b495aa135, 0x4801527f5d85db99, -0x24363bbf2c11657f, 0x6b2a90af1a6029ed }
+ },
+ {
+ { 0x77ebf3245bb2d80a, -0x27cfe4b8d046f865, -0x39b8190db3118ccd, 0x465812c8276c2109 },
+ { 0x6923f4fc9ae61e97, 0x5735281de03f5fd1, -0x589b51bc19122ed3, 0x5fd8f4e9d12d3e4a },
+ { 0x4d43beb22a1062d9, 0x7065fb753831dc16, 0x180d4a7bde2968d7, 0x05b32c2b1cb16790 }
+ },
+ {
+ { -0x08035bd3852a7e6b, 0x3214286e4333f3cc, -0x493d62f2cbf46863, 0x31771a48567307e1 },
+ { -0x373fa1332db25703, -0x5e30e553fa20107d, -0x2441100d8206329f, 0x3b5556a37b471e99 },
+ { 0x32b0c524e14dd482, -0x124caeabe5d45b4a, -0x5c2e9fb7d7d4a50d, 0x4fc079d27a7336eb }
+ },
+ {
+ { -0x23cb74bbf3793af3, 0x1337cbc9cc94e651, 0x6422f74d643e3cb9, 0x241170c2bae3cd08 },
+ { 0x51c938b089bf2f7f, 0x2497bd6502dfe9a7, -0x00003f63877f1bad, 0x124567cecaf98e92 },
+ { 0x3ff9ab860ac473b4, -0x0f6ee211feec1bcb, 0x4ae75060ebc6c4af, 0x3f8612966c87000d }
+ },
+},
+{
+ {
+ { 0x529fdffe638c7bf3, -0x20d4619fc774b66b, -0x1fd84cb0e452fdb7, 0x7bc92fc9b9fa74ed },
+ { 0x0c9c5303f7957be4, -0x5c3ce5df1f7a3ebb, -0x4f8de28e2f7affb0, 0x0aba390eab0bf2da },
+ { -0x606810d17fe52607, -0x7c9682ab865025c6, -0x16f94c0042a694b0, 0x02672b37dd3fb8e0 }
+ },
+ {
+ { -0x116458d6c673580b, -0x146359da85b7b625, 0x29eb29ce7ec544e1, 0x232ca21ef736e2c8 },
+ { 0x48b2ca8b260885e4, -0x5bd794137d4cb3e4, -0x6c81e5d9e80a708c, 0x741d1fcbab2ca2a5 },
+ { -0x409ebdc2dac034e9, 0x08803ceafa39eb14, -0x0e79fd2067ae3851, 0x0400f3a049e3414b }
+ },
+ {
+ { 0x2efba412a06e7b06, 0x146785452c8d2560, -0x2068ec1429856e39, 0x32830ac7157eadf3 },
+ { -0x5431fb89459e3aa5, 0x36a3d6d7c4d39716, 0x6eb259d5e8d82d09, 0x0c9176e984d756fb },
+ { 0x0e782a7ab73769e8, 0x04a05d7875b18e2c, 0x29525226ebcceae1, 0x0d794f8383eba820 }
+ },
+ {
+ { 0x7be44ce7a7a2e1ac, 0x411fd93efad1b8b7, 0x1734a1d70d5f7c9b, 0x0d6592233127db16 },
+ { -0x00ca0a3461eae90c, -0x117fa4309b7551bb, -0x0f28c3d446c5610d, 0x097b0bf22092a6c2 },
+ { -0x3b7454eade5628cd, -0x593d151529e544db, 0x625c6c1cc6cb4305, 0x7fc90fea93eb3a67 }
+ },
+ {
+ { -0x3ad8214a63834dc3, -0x6aac6e96acd7bfb2, -0x29bc6d7e8330d386, 0x6ce97dabf7d8fa11 },
+ { 0x0408f1fe1f5c5926, 0x1a8f2f5e3b258bf4, 0x40a951a2fdc71669, 0x6598ee93c98b577e },
+ { 0x25b5a8e50ef7c48f, -0x149fcbee90d31ace, -0x3a18ae8c1ac21ac9, 0x73119fa08c12bb03 }
+ },
+ {
+ { 0x7845b94d21f4774d, -0x409d0e93876848d9, 0x671857c03c56522b, 0x3cd6a85295621212 },
+ { -0x12cfed6bac0e5b35, -0x4319de36370ac879, -0x0534d4ecc7411847, 0x3025798a9ea8428c },
+ { 0x3fecde923aeca999, -0x4255a4ff9d173ed1, 0x67b99dfc96988ade, 0x3f52c02852661036 }
+ },
+ {
+ { -0x6da74066113be93a, -0x5375afe8562d098f, 0x629549ab16dea4ab, 0x05d0e85c99091569 },
+ { -0x00155b71d5ecae3a, 0x28624754fa7f53d7, 0x0b5ba9e57582ddf1, 0x60c0104ba696ac59 },
+ { 0x051de020de9cbe97, -0x05f803a94af4308c, 0x378cec9f0f11df65, 0x36853c69ab96de4d }
+ },
+ {
+ { 0x4433c0b0fac5e7be, 0x724bae854c08dcbe, -0x0e0db33bb9687065, 0x4a0aff6d62825fc8 },
+ { 0x36d9b8de78f39b2d, 0x7f42ed71a847b9ec, 0x241cd1d679bd3fde, 0x6a704fec92fbce6b },
+ { -0x16e804619ef6acff, -0x3efd206bfd5f6d08, -0x40f61d0a0599e6f5, 0x681109bee0dcfe37 }
+ },
+},
+{
+ {
+ { -0x63e70305c9fb72ed, 0x29159db373899ddd, -0x2360caf4606d2f56, 0x26f57eee878a19d4 },
+ { 0x559a0cc9782a0dde, 0x551dcdb2ea718385, 0x7f62865b31ef238c, 0x504aa7767973613d },
+ { 0x0cab2cd55687efb1, 0x5180d162247af17b, -0x7a3ea5cbb0a5db99, 0x4041943d9dba3069 }
+ },
+ {
+ { 0x4b217743a26caadd, 0x47a6b424648ab7ce, -0x34e2b085fc04361d, 0x12d931429800d019 },
+ { -0x3c3f1145bc14336a, -0x728b6363d9156351, -0x26056a11e388333a, 0x1420a1d97684340f },
+ { 0x00c67799d337594f, 0x5e3c5140b23aa47b, 0x44182854e35ff395, 0x1b4f92314359a012 }
+ },
+ {
+ { 0x33cf3030a49866b1, 0x251f73d2215f4859, -0x547d55bfae210b0a, 0x5ff191d56f9a23f6 },
+ { 0x3e5c109d89150951, 0x39cefa912de9696a, 0x20eae43f975f3020, 0x239b572a7f132dae },
+ { -0x7e612bcc53d26f98, 0x2883ab795fc98523, -0x10ba8d7faa6c14c3, 0x020c526a758f36cb }
+ },
+ {
+ { -0x16ce10a60fbd3377, 0x2c589c9d8e124bb6, -0x52371e755138a669, 0x452cfe0a5602c50c },
+ { 0x779834f89ed8dbbc, -0x370d550623835b94, -0x56adb3235c1e4f8c, 0x02aacc4615313877 },
+ { -0x795f085f9b878821, -0x443b9bd8f19f8361, -0x54e815da0e04ee37, 0x4cfb7d7b304b877b }
+ },
+ {
+ { -0x1d79663d687610ee, 0x2b6ecd71df57190d, -0x3cbc37a813368f30, 0x5b1d4cbc434d3ac5 },
+ { 0x72b43d6cb89b75fe, 0x54c694d99c6adc80, -0x473c55c8c11cb361, 0x14b4622b39075364 },
+ { -0x4904d9ea33f560da, 0x3a4f0e2bb88dcce5, 0x1301498b3369a705, 0x2f98f71258592dd1 }
+ },
+ {
+ { 0x2e12ae444f54a701, -0x0301c10f56342822, -0x314076f28a7ca220, 0x1d8062e9e7614554 },
+ { 0x0c94a74cb50f9e56, 0x5b1ff4a98e8e1320, -0x65d533de7dcff099, 0x3a6ae249d806aaf9 },
+ { 0x657ada85a9907c5a, 0x1a0ea8b591b90f62, -0x72f1e20420cb4b17, 0x298b8ce8aef25ff3 }
+ },
+ {
+ { -0x7c858d15f5de9a22, 0x3fab07b40bcf79f6, 0x521636c77738ae70, 0x6ba6271803a7d7dc },
+ { 0x2a927953eff70cb2, 0x4b89c92a79157076, -0x6be7ba85cf583096, 0x34b8a8404d5ce485 },
+ { -0x3d91134a7c96cccb, -0x2a57ec209c4a0103, -0x5d6c55655b4dda8d, 0x71d62bdd465e1c6a }
+ },
+ {
+ { -0x32d24a254e08a10b, -0x28806a30e94f9a0b, 0x14571fea3f49f085, 0x1c333621262b2b3d },
+ { 0x6533cc28d378df80, -0x0924bc86f5f05b4c, -0x1c9ba00608fe25a6, 0x74d5f317f3172ba4 },
+ { -0x57901aab9826357f, 0x398b7c752b298c37, -0x2592f76d1c539dc5, 0x4aebcc4547e9d98c }
+ },
+},
+{
+ {
+ { 0x0de9b204a059a445, -0x1ea34b55b4e852f1, -0x1e4413ade0863aa9, 0x2633f1b9d071081b },
+ { 0x53175a7205d21a77, -0x4f3fbbdd2c46cb2c, -0x52260db422a21524, 0x074f46e69f10ff8c },
+ { -0x3e04be88fe7466f0, -0x5915df2393f01ec0, -0x299e0c18bcab3901, 0x5ecb72e6f1a3407a }
+ },
+ {
+ { -0x01151ef917179669, -0x679ccc80672f6c7d, -0x6b8fb7f155f91411, 0x038b6898d4c5c2d0 },
+ { -0x5aea5ce4dda604b2, 0x0960f3972bcac52f, -0x124ad01372cbab35, 0x382e2720c476c019 },
+ { -0x0c6e3ae27531af5a, 0x3142d0b9ae2d2948, -0x24b2a5e580db3580, 0x21aeba8b59250ea8 }
+ },
+ {
+ { 0x53853600f0087f23, 0x4c461879da7d5784, 0x6af303deb41f6860, 0x0a3c16c5c27c18ed },
+ { 0x24f13b34cf405530, 0x3c44ea4a43088af7, 0x5dd5c5170006a482, 0x118eb8f8890b086d },
+ { 0x17e49c17cc947f3d, -0x33391259553e2d85, -0x209f6d314f0f71aa, 0x4909b3e22c67c36b }
+ },
+ {
+ { 0x59a16676706ff64e, 0x10b953dd0d86a53d, 0x5848e1e6ce5c0b96, 0x2d8b78e712780c68 },
+ { -0x63637a159c01d177, -0x41e4506ef16bed14, -0x7084557579040185, 0x0fb17f9fef968b6c },
+ { 0x79d5c62eafc3902b, 0x773a215289e80728, -0x3c7519bf1efedf47, 0x09ae23717b2b1a6d }
+ },
+ {
+ { 0x10ab8fa1ad32b1d0, -0x165312e41d8874dc, -0x577a943fc8c216f1, 0x66f35ddddda53996 },
+ { -0x4495e6d5b1b2f7c4, 0x34ace0630029e192, -0x67dba5a655054515, 0x6d9c8a9ada97faac },
+ { -0x2d826504db668cdd, 0x1bb7e07ef6f01d2e, 0x2ba7472df52ecc7f, 0x03019b4f646f9dc8 }
+ },
+ {
+ { -0x50f64deb194c2395, 0x3f7573b5ad7d2f65, -0x2fe62677eff5dc50, 0x392b63a58b5c35f7 },
+ { 0x04a186b5565345cd, -0x111899ef433bee96, 0x689c73b478fb2a45, 0x387dcbff65697512 },
+ { 0x4093addc9c07c205, -0x3a9a41ea0acd3c82, 0x63dbecfd1583402a, 0x61722b4aef2e032e }
+ },
+ {
+ { -0x294f85aa7e34f1c4, 0x290ff006d9444969, 0x08680b6a16dcda1f, 0x5568d2b75a06de59 },
+ { 0x0012aafeecbd47af, 0x55a266fb1cd46309, -0x0dfc1497f69838d4, 0x39633944ca3c1429 },
+ { -0x72f34773e4c8301f, 0x05b6a5a3053818f3, -0x0d1643fb487826a7, 0x6beba1249add7f64 }
+ },
+ {
+ { 0x5c3cecb943f5a53b, -0x633659e2f93f720e, -0x30459c657a76abb9, 0x5a845ae80df09fd5 },
+ { 0x1d06005ca5b1b143, 0x6d4c6bb87fd1cda2, 0x6ef5967653fcffe7, 0x097c29e8c1ce1ea5 },
+ { 0x4ce97dbe5deb94ca, 0x38d0a4388c709c48, -0x3bc1312b5e962f69, 0x0a1249fff7e587c3 }
+ },
+},
+{
+ {
+ { 0x0b408d9e7354b610, -0x7f94cdaca457a492, -0x2419c5fcb5a75df9, 0x173bd9ddc9a1df2c },
+ { 0x12f0071b276d01c9, -0x1847453a793b7390, 0x5308129b71d6fba9, 0x5d88fbf95a3db792 },
+ { 0x2b500f1efe5872df, 0x58d6582ed43918c1, -0x1912d8713698c520, 0x06e1cd13b19ea319 }
+ },
+ {
+ { 0x472baf629e5b0353, 0x3baa0b90278d0447, 0x0c785f469643bf27, 0x7f3a6a1a8d837b13 },
+ { 0x40d0ad516f166f23, 0x118e32931fab6abe, 0x3fe35e14a04d088e, 0x3080603526e16266 },
+ { -0x0819bbc6a2c27ff5, -0x6a572aaa36fe120a, 0x68cd7830592c6339, 0x30d0fded2e51307e }
+ },
+ {
+ { -0x634b68e19747b8b0, -0x5f6a8dd6999b4431, 0x5c8de72672fa412b, 0x4615084351c589d9 },
+ { -0x1fa6b2e50dedcc4d, 0x1bdbe78ef0cc4d9c, 0x6965187f8f499a77, 0x0a9214202c099868 },
+ { -0x436fe63f51465fd2, 0x55c7110d16034cae, 0x0e6df501659932ec, 0x3bca0d2895ca5dfe }
+ },
+ {
+ { -0x639771496133fe41, -0x0f437c5259bb7691, -0x35d26aa0a085601e, 0x4ea8b4038df28241 },
+ { 0x40f031bc3c5d62a4, 0x19fc8b3ecff07a60, -0x67e7c25decf04abb, 0x5631deddae8f13cd },
+ { 0x2aed460af1cad202, 0x46305305a48cee83, -0x6ede88bab60ee5a1, 0x24ce0930542ca463 }
+ },
+ {
+ { 0x3fcfa155fdf30b85, -0x2d08e971c9c8d15c, -0x4d1f9b219b6d07bc, 0x549928a7324f4280 },
+ { 0x1fe890f5fd06c106, -0x4a3b97caa277ef0e, -0x7d87f701917350c2, 0x41d4e3c28a06d74b },
+ { -0x0d91cd589c11e5d2, -0x516e1b482da00216, -0x43c42cc42e80b297, 0x491b66dec0dcff6a }
+ },
+ {
+ { 0x75f04a8ed0da64a1, -0x12ddd350981dd7b5, -0x7dcb5c86e084845c, 0x4cf6b8b0b7018b67 },
+ { -0x670a4ec23815cd59, -0x1c2a073381e92468, -0x53f540ad340726b9, 0x08f338d0c85ee4ac },
+ { -0x3c7c57de66e58c43, -0x54d843fe20cdf386, -0x3ec2cce47b888f9d, 0x530d4a82eb078a99 }
+ },
+ {
+ { 0x6d6973456c9abf9e, 0x257fb2fc4900a880, 0x2bacf412c8cfb850, 0x0db3e7e00cbfbd5b },
+ { 0x004c3630e1f94825, 0x7e2d78268cab535a, -0x38b7dcdc337b0075, 0x65ea753f101770b9 },
+ { 0x3d66fc3ee2096363, -0x7e29d3809e4a3495, 0x0fbe044213443b1a, 0x02a4ec1921e1a1db }
+ },
+ {
+ { -0x0a379e9d0e3086a1, 0x118c861926ee57f2, 0x172124851c063578, 0x36d12b5dec067fcf },
+ { 0x5ce6259a3b24b8a2, -0x47a88533ba505f48, -0x33341917745f8fc9, 0x3d143c51127809bf },
+ { 0x126d279179154557, -0x2a1b70a30387c5f6, 0x36bdb6e8df179bac, 0x2ef517885ba82859 }
+ },
+},
+{
+ {
+ { 0x1ea436837c6da1e9, -0x063e7650e0464242, 0x303001fcce5dd155, 0x28a7c99ebc57be52 },
+ { -0x7742bc732ee1f2b6, 0x30cb610d43ccf308, -0x1f65f1c86e6c8434, 0x4559135b25b1720c },
+ { -0x47026c66172e6163, -0x6f7e6e3469dbdc01, -0x4d46b728b838bd5d, 0x37f33226d7fb44c4 }
+ },
+ {
+ { 0x33912553c821b11d, 0x66ed42c241e301df, 0x066fcc11104222fd, 0x307a3b41c192168f },
+ { 0x0dae8767b55f6e08, 0x4a43b3b35b203a02, -0x1c8da5917f507387, 0x0f7a7fd1705fa7a3 },
+ { -0x7114a2f8914aa320, 0x2fc536bfaa0d925a, -0x417e7cf023493918, 0x556c7045827baf52 }
+ },
+ {
+ { -0x46b46ffdd40bbbfa, -0x542bdc81006f4acc, 0x7600a960faf86d3a, 0x2f45abdac2322ee3 },
+ { -0x71d4ae8cfd162749, -0x1c1add96db78eb18, -0x42b04288b3569f4b, 0x6f4b4199c5ecada9 },
+ { 0x61af4912c8ef8a6a, -0x1a705b01bc0491a2, -0x4a5033a2902bd831, 0x6a5393281e1e11eb }
+ },
+ {
+ { 0x0fff04fe149443cf, 0x53cac6d9865cddd7, 0x31385b03531ed1b7, 0x5846a27cacd1039d },
+ { -0x0c25aec65a2e1177, -0x7ebaba83006c9678, 0x3f622fed00e188c4, 0x0f513815db8b5a3d },
+ { 0x4ff5cdac1eb08717, 0x67e8b29590f2e9bc, 0x44093b5e237afa99, 0x0d414bed8708b8b2 }
+ },
+ {
+ { -0x7e77956dd6b53618, 0x23162b45d55547be, -0x6b3043bbfc8ea67d, 0x50eb8fdb134bc401 },
+ { -0x30497d9a02f18a0a, -0x1ba4c1d7446f18f9, 0x7242a8de9ff92c7a, 0x685b3201933202dd },
+ { -0x3f48c139294ccf33, -0x7b1bb7f8ecd0500f, 0x732b7352c4a5dee1, 0x5d7c7cf1aa7cd2d2 }
+ },
+ {
+ { 0x33d1013e9b73a562, -0x6da310a8b713d91f, -0x580319eb22b97fa8, 0x78b0fad41e9aa438 },
+ { -0x50c4b94085b5505e, -0x4878fa13b2bf2bef, 0x114f0c6aca7c15e3, 0x3f364faaa9489d4d },
+ { -0x40a95bce12fa4b78, -0x5acc199363b6a382, -0x179ad450780c9ae6, 0x0241800059d66c33 }
+ },
+ {
+ { 0x28350c7dcf38ea01, 0x7c6cdbc0b2917ab6, -0x531830417a8f7d09, 0x4d2845aba2d9a1e0 },
+ { -0x314f88015c85a41c, -0x249bd0fd1a5a1149, -0x3d192f3ab8ed8f48, 0x4771b65538e4529c },
+ { -0x44ac801fbb8f8f22, -0x3458bbbc922aa821, -0x2c4a5cb8c9ff2435, 0x4aeabbe6f9ffd7f8 }
+ },
+ {
+ { 0x6a2134bcc4a9c8f2, -0x040702e37531d1c9, 0x000ae3049911a0ba, 0x046e3a616bc89b9e },
+ { 0x4630119e40d8f78c, -0x5fe5643ac38ef1ef, 0x486d2b258910dd79, 0x1e6c47b3db0324e5 },
+ { 0x14e65442f03906be, 0x4a019d54e362be2a, 0x68ccdfec8dc230c7, 0x7cfb7e3faf6b861c }
+ },
+},
+{
+ {
+ { -0x69114004cfa4d0af, -0x2c06c752776a6948, -0x0f0ad238b92a22db, 0x57968290bb3a0095 },
+ { 0x4637974e8c58aedc, -0x4610dd04540fbe5c, -0x1e7a26a9167f8e76, 0x2f1b78fab143a8a6 },
+ { -0x08e547bcf5df1eff, -0x0c6c9a72db0f13b9, -0x308af657911d112f, 0x7dc43e35dc2aa3e1 }
+ },
+ {
+ { 0x5a782a5c273e9718, 0x3576c6995e4efd94, 0x0f2ed8051f237d3e, 0x044fb81d82d50a99 },
+ { -0x7a69999a7782263d, -0x36f064ceb44facab, -0x391f720710df864f, 0x7ef72016758cc12f },
+ { -0x3e20e73a56f81c27, 0x57b3371dce4c6359, -0x358fbacb4dfe44b7, 0x7f79823f9c30dd2e }
+ },
+ {
+ { 0x6a9c1ff068f587ba, 0x0827894e0050c8de, 0x3cbf99557ded5be7, 0x64a9b0431c06d6f0 },
+ { -0x7ccb2dc65c4aec18, -0x3ec98f2b46e05728, 0x12b54136f590bd33, 0x0a4e0373d784d9b4 },
+ { 0x2eb3d6a15b7d2919, -0x4f4b095f2ac57dcb, 0x7156ce4389a45d47, 0x071a7d0ace18346c }
+ },
+ {
+ { -0x33f3caaddf1ebbcf, 0x0d65950709b15141, -0x650a9de4df62a0ca, 0x7c69bcf7617755d3 },
+ { -0x2cf8d255377845f5, 0x01262905bfa562ee, -0x30abcffd3f108975, 0x2c3bcc7146ea7e9c },
+ { 0x07f0d7eb04e8295f, 0x10db18252f50f37d, -0x16ae565ce8e86729, 0x6f5a9a7322aca51d }
+ },
+ {
+ { -0x18d62b145c26bb42, -0x7261f6bf7f875062, 0x4525567a47869c03, 0x02ab9680ee8d3b24 },
+ { -0x745efff3d0be393b, -0x3b60863ef3010465, 0x4efa47703cc51c9f, 0x494e21a2e147afca },
+ { -0x105b757a221af266, 0x219a224e0fb9a249, -0x05f6e0e226e10927, 0x6b5d76cbea46bb34 }
+ },
+ {
+ { -0x1f06bee8e187dade, -0x0e19518bfc96c92d, 0x408b3ea2d0fcc746, 0x16fb869c03dd313e },
+ { -0x77a8aa9313f3266c, 0x6472dc6f5cd01dba, -0x50fe96eb70bd4b89, 0x0ae333f685277354 },
+ { 0x288e199733b60962, 0x24fc72b4d8abe133, 0x4811f7ed0991d03e, 0x3f81e38b8f70d075 }
+ },
+ {
+ { 0x0adb7f355f17c824, 0x74b923c3d74299a4, -0x2a83c17434071509, 0x0ad3e2d34cdedc3d },
+ { 0x7f910fcc7ed9affe, 0x545cb8a12465874b, -0x57c6812db4f3b8fc, 0x50510fc104f50993 },
+ { 0x6f0c0fc5336e249d, 0x745ede19c331cfd9, -0x0d2902fff61101e4, 0x127c158bf0fa1ebe }
+ },
+ {
+ { -0x215d703b51ae468c, 0x1d9973d3744dfe96, 0x6240680b873848a8, 0x4ed82479d167df95 },
+ { -0x09e683bdd167865e, -0x5bb5222bad35c9b9, -0x64bec03eb4b15335, 0x354ef87d07ef4f68 },
+ { -0x011c4add9f3a268b, 0x50352efceb41b0b8, -0x77f753cf56099ac4, 0x302d92d20539236d }
+ },
+},
+{
+ {
+ { -0x6a847474f20ac3d0, 0x2a1c770a8e60f098, -0x4438598fcba86922, 0x22a48f9a90c99bc9 },
+ { 0x4c59023fcb3efb7c, 0x6c2fcb99c63c2a94, -0x45be6f1d3c381f7c, 0x0e545daea51874d9 },
+ { 0x6b7dc0dc8d3fac58, 0x5497cd6ce6e42bfd, 0x542f7d1bf400d305, 0x4159f47f048d9136 }
+ },
+ {
+ { 0x748515a8bbd24839, 0x77128347afb02b55, 0x50ba2ac649a2a17f, 0x060525513ad730f1 },
+ { 0x20ad660839e31e32, -0x07e1e42a7bfa41b0, -0x07f9bfa90b254397, 0x14d23dd4ce71b975 },
+ { -0x0dc671f6755d807e, 0x6d7982bb89a1b024, -0x0596bf7bdeb22db4, 0x71ab966fa32301c3 }
+ },
+ {
+ { -0x4ef775f8fd7f66ab, 0x43b273ea0b43c391, -0x3564985101f97913, 0x605eecbf8335f4ed },
+ { 0x2dcbd8e34ded02fc, 0x1151f3ec596f22aa, -0x435daabcb1fcd726, 0x35768fbe92411b22 },
+ { -0x7cdff59a93cbfbcf, -0x60328e98711a63d1, 0x75d4613f71300f8a, 0x7a912faf60f542f9 }
+ },
+ {
+ { 0x253f4f8dfa2d5597, 0x25e49c405477130c, 0x00c052e5996b1102, 0x33cb966e33bb6c4a },
+ { -0x4dfba7a1a123e5bd, -0x60f1e911a76838c4, 0x5b82c0ae4e70483c, 0x624a170e2bddf9be },
+ { 0x597028047f116909, -0x7d753be3e1a9bb99, 0x70417dbde6217387, 0x721627aefbac4384 }
+ },
+ {
+ { -0x02cf6843bef4d0de, -0x0e5fa2584a3057bc, 0x61289a1def57ca74, 0x245ea199bb821902 },
+ { -0x682fc43c78c9522b, 0x2f1422afc532b130, 0x3aa68a057101bbc4, 0x4c946cf7e74f9fa7 },
+ { -0x51235996872b8808, 0x1898ba3c29117fe1, -0x308c067c8df342a8, 0x67da12e6b8b56351 }
+ },
+ {
+ { 0x2b7ef3d38ec8308c, -0x7d7028138e146b55, -0x7f83c4c93af9d543, 0x0cb64cb831a94141 },
+ { 0x7067e187b4bd6e07, 0x6e8f0203c7d1fe74, -0x6c3955d0c737a5d0, 0x76297d1f3d75a78a },
+ { 0x3030fc33534c6378, -0x469ca3a31abe179f, 0x15d9a9bed9b2c728, 0x49233ea3f3775dcb }
+ },
+ {
+ { 0x7b3985fe1c9f249b, 0x4fd6b2d5a1233293, -0x314cba6be520b29e, 0x6987ff6f542de50c },
+ { 0x629398fa8dbffc3a, -0x1ed01ad22ab24bab, -0x0c41ee20250dad6b, 0x628b140dce5e7b51 },
+ { 0x47e241428f83753c, 0x6317bebc866af997, -0x2544a4bcc2e567d7, 0x074d8d245287fb2d }
+ },
+ {
+ { 0x481875c6c0e31488, 0x219429b2e22034b4, 0x7223c98a31283b65, 0x3420d60b342277f9 },
+ { -0x7cc82632bbf403cf, 0x729d2ca1af318fd7, -0x5fbf5b5b88d3df90, 0x46002ef03a7349be },
+ { -0x055dc52150019a09, 0x78261ed45be0764c, 0x441c0a1e2f164403, 0x5aea8e567a87d395 }
+ },
+},
+{
+ {
+ { 0x2dbc6fb6e4e0f177, 0x04e1bf29a4bd6a93, 0x5e1966d4787af6e8, 0x0edc5f5eb426d060 },
+ { 0x7813c1a2bca4283d, -0x129d0f6e5e79c227, -0x513843473d97057a, 0x10e5d3b76f1cae4c },
+ { 0x5453bfd653da8e67, -0x1623e113db5609bf, -0x4078d9c4fca875dd, 0x45b46c51361cba72 }
+ },
+ {
+ { -0x3162b22275801c1c, -0x54ec9ba9899df1d0, 0x4b594f7bb30e9958, 0x5c1c0aef321229df },
+ { -0x56bfd540ceb0805f, -0x1da80e2371730bb0, 0x1dbbd54b23a8be84, 0x2177bfa36dcb713b },
+ { 0x37081bbcfa79db8f, 0x6048811ec25f59b3, 0x087a76659c832487, 0x4ae619387d8ab5bb }
+ },
+ {
+ { 0x61117e44985bfb83, -0x031fb9d58e69ceca, -0x7c53cbb72bda6fb5, 0x75685abe5ba43d64 },
+ { -0x72240955acbb5cd2, 0x7d88eab4b41b4078, 0x5eb0eb974a130d60, 0x1a00d91b17bf3e03 },
+ { 0x6e960933eb61f2b2, 0x543d0fa8c9ff4952, -0x208d8aef85099a97, 0x135529b623b0e6aa }
+ },
+ {
+ { -0x0a38e9431dd17c02, -0x4bd414e617f67a3f, -0x136259c8ebdab552, 0x5972ea051590a613 },
+ { 0x18f0dbd7add1d518, -0x68608777303ee0ef, -0x78cd1e0f8eeb8a65, 0x79b5b81a65ca3a01 },
+ { 0x0fd4ac20dc8f7811, -0x65652d6b53b2b058, -0x3fe4d29b4cc9fbcc, 0x4f7e9c95905f3bdb }
+ },
+ {
+ { 0x71c8443d355299fe, -0x7432c4e324141529, -0x7f6db6610e5b6b9a, 0x1942eec4a144adc8 },
+ { 0x62674bbc5781302e, -0x27adf0c6765223f1, -0x73d66651ac04263a, 0x31993ad92e638e4c },
+ { 0x7dac5319ae234992, 0x2c1b3d910cea3e92, 0x553ce494253c1122, 0x2a0a65314ef9ca75 }
+ },
+ {
+ { -0x30c9e532c3e386c6, 0x2f9ebcac5a35bc3b, 0x60e860e9a8cda6ab, 0x055dc39b6dea1a13 },
+ { 0x2db7937ff7f927c2, -0x248be0f9e82f59cb, 0x5982f3a21155af76, 0x4cf6e218647c2ded },
+ { -0x4ee6dd833d72a44a, 0x07e24ebc774dffab, -0x57c387311b5cd377, 0x121a307710aa24b6 }
+ },
+ {
+ { -0x29a68ec1388b7c37, -0x77401f8847d46951, 0x289e28231097bcd3, 0x527bb94a6ced3a9b },
+ { -0x1b24a2a160fcb569, -0x1eac03f6cfcb43d3, 0x460546919551d3b1, 0x333fc76c7a40e52d },
+ { 0x563d992a995b482e, 0x3405d07c6e383801, 0x485035de2f64d8e5, 0x6b89069b20a7a9f7 }
+ },
+ {
+ { 0x4082fa8cb5c7db77, 0x068686f8c734c155, 0x29e6c8d9f6e7a57e, 0x0473d308a7639bcf },
+ { -0x7ed55fbe9d8fddf3, -0x66a5760506dba4b2, -0x00523b31af8d10fb, 0x23bc2103aa73eb73 },
+ { -0x351186d9fca761fb, 0x2b4b421246dcc492, 0x02a1ef74e601a94f, 0x102f73bfde04341a }
+ },
+},
+{
+ {
+ { 0x358ecba293a36247, -0x5070679d4d97029b, 0x412f7e9968a01c89, 0x5786f312cd754524 },
+ { -0x4a5d2af3813df2c2, -0x39b422915f368d9d, 0x56e89052c1ff734d, 0x4929c6f72b2ffaba },
+ { 0x337788ffca14032c, -0x0c6defd7bb80e11d, -0x74ebf8e0dce43353, 0x4c817b4bf2344783 }
+ },
+ {
+ { 0x413ba057a40b4484, -0x45b3d1e5b0a095bd, 0x614ba0a5aee1d61c, 0x78a1531a8b05dc53 },
+ { 0x0ff853852871b96e, -0x1ec160549f3c0e45, -0x1102a6acdacbbbfe, 0x0a37c37075b7744b },
+ { 0x6cbdf1703ad0562b, -0x7130b7cf36dade5d, -0x25142cfc027bdb19, 0x72ad82a42e5ec56f }
+ },
+ {
+ { -0x3c976c6e98fdb43d, -0x71962e92b6afd026, -0x030d13c31ba0b4d7, 0x065f669ea3b4cbc4 },
+ { 0x3f9e8e35bafb65f6, 0x39d69ec8f27293a1, 0x6cb8cd958cf6a3d0, 0x1734778173adae6d },
+ { -0x75ff5138aacd24b3, -0x47965b1bbc1ce44f, 0x4a0f8552d3a7f515, 0x19adeb7c303d7c08 }
+ },
+ {
+ { -0x62fa4582bc3ce86c, 0x2470c8ff93322526, -0x7cdc2137e9e68bc8, 0x2852709881569b53 },
+ { -0x38df349eac15265d, 0x55b2c97f512b636e, -0x4e1ca4a02bfd6f4f, 0x2fd9ccf13b530ee2 },
+ { 0x07bd475b47f796b8, -0x2d384fecabd370ac, 0x2dbd23f43b24f87e, 0x6551afd77b0901d6 }
+ },
+ {
+ { 0x68a24ce3a1d5c9ac, -0x44885cc2ef009b9f, 0x0f86ce4425d3166e, 0x56507c0950b9623b },
+ { 0x4546baaf54aac27f, -0x090990134d5ba5d8, 0x582d1b5b562bcfe8, 0x44b123f3920f785f },
+ { 0x1206f0b7d1713e63, 0x353fe3d915bafc74, 0x194ceb970ad9d94d, 0x62fadd7cf9d03ad3 }
+ },
+ {
+ { 0x3cd7bc61e7ce4594, -0x3294ca564822d982, -0x5f7f5437bc9910d9, 0x6ec7c46f59c79711 },
+ { -0x394a6984aa675f8c, 0x5efe91ce8e493e25, -0x2b48d3bab6d7f778, 0x20ef1149a26740c2 },
+ { 0x2f07ad636f09a8a2, -0x79681931dbdfa183, -0x3f5103fa11ca5ec7, 0x15e80958b5f9d897 }
+ },
+ {
+ { 0x4dd1ed355bb061c4, 0x42dc0cef941c0700, 0x61305dc1fd86340e, 0x56b2cc930e55a443 },
+ { 0x25a5ef7d0c3e235b, 0x6c39c17fbe134ee7, -0x388b1ecbd23a3cd9, 0x021354b892021f39 },
+ { 0x1df79da6a6bfc5a2, 0x02f3a2749fde4369, -0x4cdc260d325c6f59, 0x7be0847b8774d363 }
+ },
+ {
+ { 0x1466f5af5307fa11, -0x7e8033821293f50e, 0x0a6de44ec3a4a3fb, 0x74071475bc927d0b },
+ { -0x736633a574c0aa3d, 0x0611d7253fded2a0, -0x12d66a00c948f5ca, 0x1f699a54d78a2619 },
+ { -0x188d6d0c8c181576, 0x296537d2cb045a31, 0x1bd0653ed3274fde, 0x2f9a2c4476bd2966 }
+ },
+},
+{
+ {
+ { -0x5d4b251f4aaee366, 0x7ac860292bffff06, -0x67e0c8a20aafbdcc, 0x3f6bd725da4ea12d },
+ { -0x14e7465480a8ba3a, 0x023a8aee5787c690, -0x48d8ed25d2085057, 0x36597d25ea5c013d },
+ { 0x734d8d7b106058ac, -0x26bfa86190396fa1, 0x6466f8f99202932d, 0x7b7ecc19da60d6d0 }
+ },
+ {
+ { 0x6dae4a51a77cfa9b, -0x7dd9c9ab185c79b0, 0x09bbffcd8f2d82db, 0x03bedc661bf5caba },
+ { 0x78c2373c695c690d, -0x22dad199f9bd6f92, -0x6ae2bbbbb51ed42e, 0x4235ad7601743956 },
+ { 0x6258cb0d078975f5, 0x492942549189f298, -0x5f354bdc1d1c911c, 0x0e7ce2b0cdf066a1 }
+ },
+ {
+ { -0x0159012026b48f07, -0x0ecf3fae3e0345d3, 0x4882d47e7f2fab89, 0x615256138aeceeb5 },
+ { -0x3b6b9bc53b737a5d, -0x02c9e20bc39ec653, 0x09db17dd3ae94d48, 0x666e0a5d8fb4674a },
+ { 0x2abbf64e4870cb0d, -0x329a430f55ba7495, -0x6541b1458a1767a3, 0x7f0bc810d514dee4 }
+ },
+ {
+ { -0x7c5362528c8dec60, -0x60090745d108d168, 0x311e2edd43ec6957, 0x1d3a907ddec5ab75 },
+ { -0x46ff945bd90bec91, -0x7298c961a81fcfcb, -0x34372026b0b9c3d8, 0x0d1f8dbcf8eedbf5 },
+ { -0x45e96ccec12f7e24, 0x29329fad851b3480, 0x0128013c030321cb, 0x00011b44a31bfde3 }
+ },
+ {
+ { 0x16561f696a0aa75c, -0x3e408da3a7ad4296, 0x11a8dd7f9a7966ad, 0x63d988a2d2851026 },
+ { 0x3fdfa06c3fc66c0c, 0x5d40e38e4dd60dd2, 0x7ae38b38268e4d71, 0x3ac48d916e8357e1 },
+ { 0x00120753afbd232e, -0x16d431470227097d, -0x07e9964c7b18d46f, 0x33fad52b2368a066 }
+ },
+ {
+ { -0x72d3372f3bdd3018, 0x072b4f7b05a13acb, -0x5c01491913095a91, 0x3cc355ccb90a71e2 },
+ { 0x540649c6c5e41e16, 0x0af86430333f7735, -0x4d53032d0cfa18ba, 0x16c0f429a256dca7 },
+ { -0x16496bbc6fc16ecf, -0x475b6b3485a9c832, -0x37832e5b45456dbc, 0x631eaf426bae7568 }
+ },
+ {
+ { 0x47d975b9a3700de8, 0x7280c5fbe2f80552, 0x53658f2732e45de1, 0x431f2c7f665f80b5 },
+ { -0x4c16fbef25990161, -0x7a22b4ad93e91a5a, -0x43c2689ee106407d, 0x5599648b1ea919b5 },
+ { -0x29fd9cbb7a7084e7, 0x14ab352fa1ea514a, -0x76ffbbe5df6f5629, 0x7b04715f91253b26 }
+ },
+ {
+ { -0x4c893d7f3b19453a, -0x68f12c2292e264f5, -0x4f656aa7baf406bc, 0x48d0acfa57cde223 },
+ { -0x7c1242d7530951bd, -0x79ca837482a3854c, -0x3fbfb8964814d3bc, 0x59b37bf5c2f6583f },
+ { -0x49f0d91b8254198f, -0x0e2e5e689dd0c5c9, 0x4208ce7ee9960394, 0x16234191336d3bdb }
+ },
+},
+{
+ {
+ { -0x7ad22e02c2a87442, 0x2b65ce72c3286108, 0x658c07f4eace2273, 0x0933f804ec38ab40 },
+ { -0x0e651538cc59c511, 0x2c7fba5d4442454e, 0x5da87aa04795e441, 0x413051e1a4e0b0f5 },
+ { -0x5854968672b69b8a, -0x7ede5521034a5438, -0x5a23ed1084ac6b8e, 0x07fd47065e45351a }
+ },
+ {
+ { 0x304211559ae8e7c3, -0x0d7e4dd66bb77d5b, -0x75ec53d1c87daf1c, 0x014afa0954ba48f4 },
+ { -0x37a7c3c2da72d433, 0x17029a4daf60b73f, -0x05f03629be95c87f, 0x1c1e5fba38b3fb23 },
+ { -0x34ce68ffe44c9994, 0x330060524bffecb9, 0x293711991a88233c, 0x291884363d4ed364 }
+ },
+ {
+ { -0x0462c83c43e54915, 0x02be14534d57a240, -0x0b28cbea075a1e0a, 0x5964f4300ccc8188 },
+ { 0x033c6805dc4babfa, 0x2c15bf5e5596ecc1, 0x1bc70624b59b1d3b, 0x3ede9850a19f0ec5 },
+ { -0x1bb5dcead2f69800, 0x5c08c55970866996, -0x20d249f5b9500492, 0x579155c1f856fd89 }
+ },
+ {
+ { -0x4a0e949cf7e8185a, -0x7f7396dcc3caefda, 0x324a983b54cef201, 0x53c092084a485345 },
+ { -0x69cdb122ed1f3611, 0x468b878df2420297, 0x199a3776a4f573be, 0x1e7fbcf18e91e92a },
+ { -0x2d2beb7e0e345041, 0x231d2db6716174e5, 0x0b7d7656e2a55c98, 0x3e955cd82aa495f6 }
+ },
+ {
+ { -0x54c60c109e44c5c1, -0x714bff9ad146e6c2, -0x4a219133c73ee08c, 0x654d7e9626f3c49f },
+ { -0x1b70aca1c12eabcd, -0x2f8a96d5f28d8f5d, 0x40fbd21daade6387, 0x14264887cf4495f5 },
+ { -0x1a9b3022a382d315, -0x7d11502128c83347, 0x6107db62d1f9b0ab, 0x0b6baac3b4358dbb }
+ },
+ {
+ { 0x204abad63700a93b, -0x41ffdc2c25886c8d, -0x27a0fcb99cc548f7, 0x00496dc490820412 },
+ { 0x7ae62bcb8622fe98, 0x47762256ceb891af, 0x1a5a92bcf2e406b4, 0x7d29401784e41501 },
+ { 0x1c74b88dc27e6360, 0x074854268d14850c, -0x5eba0484c1f234d0, 0x10843f1b43803b23 }
+ },
+ {
+ { -0x2a9098d21cdb9765, -0x2e2575124c6b567f, -0x2284a7016e973013, 0x7ce246cd4d56c1e8 },
+ { -0x3a06fbaac89d8923, -0x31a6ea72289ba327, -0x6d09a2aee2c994c7, 0x11574b6e526996c4 },
+ { -0x470bcf71807f41ad, 0x5f3cb8cb34a9d397, 0x18a961bd33cc2b2c, 0x710045fb3a9af671 }
+ },
+ {
+ { -0x5fc0379dfa629662, 0x2370cfa19a619e69, -0x3b01c4edd07dc215, 0x1d1b056fa7f0844e },
+ { 0x73f93d36101b95eb, -0x0510cc86b090bb7a, 0x5651735f8f15e562, 0x7fa3f19058b40da1 },
+ { 0x1bc64631e56bf61f, -0x2c8654ef91ac7d5d, 0x4d58c57e0540168d, 0x566256628442d8e4 }
+ },
+},
+{
+ {
+ { -0x22b66329e00c79c0, 0x29cd9bc3063625a0, 0x51e2d8023dd73dc3, 0x4a25707a203b9231 },
+ { -0x461b662109d9800a, 0x7772ca7b742c0843, 0x23a0153fe9a4f2b1, 0x2cdfdfecd5d05006 },
+ { 0x2ab7668a53f6ed6a, 0x304242581dd170a1, 0x4000144c3ae20161, 0x5721896d248e49fc }
+ },
+ {
+ { 0x285d5091a1d0da4e, 0x4baa6fa7b5fe3e08, 0x63e5177ce19393b3, 0x03c935afc4b030fd },
+ { 0x0b6e5517fd181bae, -0x6fdd9d60d4469c4c, 0x5509bce932064625, 0x578edd74f63c13da },
+ { -0x668d8939b6d4f3c3, 0x47ccc2c4dfe205fc, -0x232d647b229dc5c4, 0x3ec2ab590288c7a2 }
+ },
+ {
+ { -0x58dec5f651cd2e35, 0x0f2b87df40f5c2d5, 0x0baea4c6e81eab29, 0x0e1bf66c6adbac5e },
+ { -0x5e5f2d841b278447, -0x5674b2149ec6e513, -0x665f222f8c34647d, 0x2dd5c25a200fcace },
+ { -0x1d542a1686d37782, 0x1a020018cb926d5d, -0x404596324551a0e2, 0x730548b35ae88f5f }
+ },
+ {
+ { -0x7fa4f6b45e291ccc, -0x40c10e88f6cac0e7, 0x423f06cb0622702b, 0x585a2277d87845dd },
+ { -0x3bcaae5c34574712, 0x65a26f1db2115f16, 0x760f4f52ab8c3850, 0x3043443b411db8ca },
+ { -0x5e75a07dcc2b769e, 0x6698c4b5ec78257f, -0x5871905ac8c1be01, 0x7656278950ef981f }
+ },
+ {
+ { -0x1e8f8c5c15793063, 0x3a8cfbb707155fdc, 0x4853e7fc31838a8e, 0x28bbf484b613f616 },
+ { 0x38c3cf59d51fc8c0, -0x64122d02faf9490e, 0x26bf109fab570e8f, 0x3f4160a8c1b846a6 },
+ { -0x0d9ed0a390ec9384, -0x50152ef80922ee42, 0x527e9ad213de6f33, 0x1e79cb358188f75d }
+ },
+ {
+ { 0x77e953d8f5e08181, -0x7b5af3bbd6622127, -0x2393d2f379bada1b, 0x478ab52d39d1f2f4 },
+ { 0x013436c3eef7e3f1, -0x7d7495800161ef08, 0x7ff908e5bcf9defc, 0x65d7951b3a3b3831 },
+ { 0x66a6a4d39252d159, -0x1a221e4378e537f9, -0x47d394bf593e3691, 0x16d87a411a212214 }
+ },
+ {
+ { -0x045b2a1d2ab1fa7d, -0x1de05028d1426606, 0x497ac2736ee9778f, 0x1f990b577a5a6dde },
+ { -0x4c4281a5bdf99deb, -0x78641c32f3a5db3f, 0x57c05db1d6f994b7, 0x28f87c8165f38ca6 },
+ { -0x5ccbb152e417082a, 0x7d1e50ebacea798f, 0x77c6569e520de052, 0x45882fe1534d6d3e }
+ },
+ {
+ { -0x275366d66bc3901c, -0x4a060e9e5c7c6d5e, 0x2699db13bec89af3, 0x7dcf843ce405f074 },
+ { 0x6669345d757983d6, 0x62b6ed1117aa11a6, 0x7ddd1857985e128f, 0x688fe5b8f626f6dd },
+ { 0x6c90d6484a4732c0, -0x2adebc0235a9cd67, -0x4c41d73c6ea2391f, 0x6739687e7327191b }
+ },
+},
+{
+ {
+ { -0x731a552f363468e1, 0x1156aaa99fd54a29, 0x41f7247015af9b78, 0x1fe8cca8420f49aa },
+ { -0x609a3a15dff7eb31, -0x7bfac91e965ce8c0, -0x74f12ec6da374b53, 0x0080dbafe936361d },
+ { 0x72a1848f3c0cc82a, 0x38c560c2877c9e54, 0x5004e228ce554140, 0x042418a103429d71 }
+ },
+ {
+ { 0x58e84c6f20816247, -0x724d4d491c90286d, -0x688e7da9e2b7b27b, 0x0822024f8632abd7 },
+ { -0x766215ae540c00a1, -0x646c5798d03d2746, 0x2c38cb97be6ebd5c, 0x114d578497263b5d },
+ { -0x4cfe448394e4135d, 0x55393f6dc6eb1375, -0x6ef2d7ef68491b15, 0x1ad4548d9d479ea3 }
+ },
+ {
+ { -0x5f901992f016012d, -0x578cc5bfe3a786f7, 0x30d14d800df98953, 0x41ce5876c7b30258 },
+ { -0x32a5825fc765b703, -0x4c705b556587c8e2, -0x392689e4d3247194, 0x35cf51dbc97e1443 },
+ { 0x59ac3bc5d670c022, -0x151983ef64ee6bfa, -0x6867420f4c87d026, 0x651e3201fd074092 }
+ },
+ {
+ { -0x5a845b5fe1035162, 0x769f4beedc308a94, -0x2e0ef114c9fc34d2, 0x4099ce5e7e441278 },
+ { -0x29c27b7c10cf3a31, 0x4cd4b4962361cc0c, -0x116f1aff5b7bd954, 0x0af51d7d18c14eeb },
+ { 0x1ac98e4f8a5121e9, 0x7dae9544dbfa2fe0, -0x7cdf55f229bcf207, 0x667282652c4a2fb5 }
+ },
+ {
+ { -0x5257491fd6b924dd, 0x1c0ce51a7b253ab7, -0x7bb737a59922b7a5, 0x7f1fc025d0675adf },
+ { -0x78b9de0b27943655, -0x4ab38441a9019016, 0x077a24257fadc22c, 0x1ab53be419b90d39 },
+ { -0x2711e4e7ce615956, 0x004d88083a21f0da, 0x3bd6aa1d883a4f4b, 0x4db9a3a6dfd9fd14 }
+ },
+ {
+ { -0x26a4ff4434488398, -0x22437b956e0e87b7, 0x7cf700aebe28d9b3, 0x5ce1285c85d31f3e },
+ { -0x73184dc44663f8ab, 0x35c5d6edc4f50f7a, 0x7e1e2ed2ed9b50c3, 0x36305f16e8934da1 },
+ { 0x31b6972d98b0bde8, 0x7d920706aca6de5b, -0x198cef076f759a61, 0x50fac2a6efdf0235 }
+ },
+ {
+ { 0x295b1c86f6f449bc, 0x51b2e84a1f0ab4dd, -0x3ffe34cf5571aae3, 0x6a28d35944f43662 },
+ { -0x0c2c560ca477f0a6, -0x1213faf324fc183e, -0x576967e0060f4e5e, 0x49a4ae2bac5e34a4 },
+ { 0x28bb12ee04a740e0, 0x14313bbd9bce8174, 0x72f5b5e4e8c10c40, 0x7cbfb19936adcd5b }
+ },
+ {
+ { -0x7186c58533c91920, -0x0605485c82a79113, 0x3a4f9692bae1f4e4, 0x1c14b03eff5f447e },
+ { -0x5cee223d947686d3, 0x1b30b4c6da512664, 0x0ca77b4ccf150859, 0x1de443df1b009408 },
+ { 0x19647bd114a85291, 0x57b76cb21034d3af, 0x6329db440f9d6dfa, 0x5ef43e586a571493 }
+ },
+},
+{
+ {
+ { -0x5992336237f3e540, -0x685fa30be4c75bca, -0x58140c416a24283a, 0x7da0b8f68d7e7dab },
+ { -0x1087dfebc7a98a5a, -0x5d9b60cf55025618, 0x4cd1eb505cdfa8cb, 0x46115aba1d4dc0b3 },
+ { -0x2bf0e6ac3c4a258a, 0x1dac6f7321119e9b, 0x03cc6021feb25960, 0x5a5f887e83674b4b }
+ },
+ {
+ { -0x6169d72c5f59bc47, -0x4a3c34ff193cdf9c, -0x64acfd7683d213ce, 0x43e37ae2d5d1c70c },
+ { -0x709cfe308f5ec2ef, -0x303147eacaf22f3c, -0x08fd682b5b435b82, 0x3669b656e44d1434 },
+ { 0x387e3f06eda6e133, 0x67301d5199a13ac0, -0x42a52707c9d9c7ef, 0x6a21e6cd4fd5e9be }
+ },
+ {
+ { -0x10bed6ed99664d1d, 0x71d30847708d1301, 0x325432d01182b0bd, 0x45371b07001e8b36 },
+ { -0x0e39e8f5cfb919a1, 0x58712a2a00d23524, 0x69dbbd3c8c82b755, 0x586bf9f1a195ff57 },
+ { -0x5924f772a10786f5, 0x5278f0dc610937e5, -0x53fcb62d9e5e9148, 0x0eafb03790e52179 }
+ },
+ {
+ { 0x5140805e0f75ae1d, -0x13fd041cd99d33d0, 0x2cebdf1eea92396d, 0x44ae3344c5435bb3 },
+ { -0x69faaa3ec8b7fbd1, 0x219a41e6820baa11, 0x1c81f73873486d0c, 0x309acc675a02c661 },
+ { -0x630d7646445abc12, -0x0c89f162a5368ebe, 0x1d82e5c64f9360aa, 0x62d5221b7f94678f }
+ },
+ {
+ { 0x7585d4263af77a3c, -0x205184ee0116ebb3, -0x5af98f7fa608e6c3, 0x14f29a5383922037 },
+ { 0x524c299c18d0936d, -0x37944a9375f3e5f4, -0x5c8afad124b579cf, 0x5c0efde4bc754562 },
+ { -0x208e8123da4d280b, 0x21f970db99b53040, -0x256dcb483c12b39e, 0x5e72365c7bee093e }
+ },
+ {
+ { 0x7d9339062f08b33e, 0x5b9659e5df9f32be, -0x5300c252e0614203, 0x70b20555cb7349b7 },
+ { 0x575bfc074571217f, 0x3779675d0694d95b, -0x65f5c8440be6e1cd, 0x77f1104c47b4eabc },
+ { -0x41aeec3aaaeed3b4, 0x6688423a9a881fcd, 0x446677855e503b47, 0x0e34398f4a06404a }
+ },
+ {
+ { 0x18930b093e4b1928, 0x7de3e10e73f3f640, -0x0bcde8258cc6a291, 0x6f8aded6ca379c3e },
+ { -0x4982dd26c1314218, 0x09b3e84127822f07, 0x743fa61fb05b6d8d, 0x5e5405368a362372 },
+ { -0x1cbfedc202484d66, 0x487b97e1a21ab291, -0x066982fd02196b62, 0x780de72ec8d3de97 }
+ },
+ {
+ { 0x671feaf300f42772, -0x708d14d5d573be56, 0x29a17fd797373292, 0x1defc6ad32b587a6 },
+ { 0x0ae28545089ae7bc, 0x388ddecf1c7f4d06, 0x38ac15510a4811b8, 0x0eb28bf671928ce4 },
+ { -0x50a441e510ae6a59, 0x148c1277917b15ed, 0x2991f7fb7ae5da2e, 0x467d201bf8dd2867 }
+ },
+},
+{
+ {
+ { 0x745f9d56296bc318, -0x66ca7f2b27ead19b, -0x4f1a4ec0a7c61632, 0x51fc2b28d43921c0 },
+ { 0x7906ee72f7bd2e6b, 0x05d270d6109abf4e, -0x72a301ba46be575c, 0x44c218671c974287 },
+ { 0x1b8fd11795e2a98c, 0x1c4e5ee12b6b6291, 0x5b30e7107424b572, 0x6e6b9de84c4f4ac6 }
+ },
+ {
+ { 0x6b7c5f10f80cb088, 0x736b54dc56e42151, -0x3d49df5a3910663c, 0x5f4c802cc3a06f42 },
+ { -0x200da031b4e21eaf, -0x27be3f381ee3bfdb, 0x2554b3c854749c87, 0x2d292459908e0df9 },
+ { -0x649a370e82f8ad26, -0x77e31cc738811800, -0x3c4aeb0fa49d061d, 0x66ed5dd5bec10d48 }
+ },
+ {
+ { -0x0f520c363435fb83, -0x7e3c4d340baad095, -0x3025eed2bb8ca06d, 0x1f23a0c77e20048c },
+ { 0x7d38a1c20bb2089d, -0x7f7ccb1e69332bee, -0x3b58f47393682ced, 0x2eacf8bc03007f20 },
+ { -0x0dcab9841a43ea90, 0x03d2d9020dbab38c, 0x27529aa2fcf9e09e, 0x0840bef29d34bc50 }
+ },
+ {
+ { -0x32ab1f9480c81b15, -0x733ea0780a169336, -0x47db744f2ca68232, 0x246affa06074400c },
+ { 0x796dfb35dc10b287, 0x27176bcd5c7ff29d, 0x7f3d43e8c7b24905, 0x0304f5a191c54276 },
+ { 0x37d88e68fbe45321, -0x79f68ab73f28afce, 0x4e9b13ef894a0d35, 0x25a83cac5753d325 }
+ },
+ {
+ { -0x60f099d6c6ad491e, 0x33db5e0e0934267b, -0x00badad429f60124, 0x06be10f5c506e0c9 },
+ { 0x10222f48eed8165e, 0x623fc1234b8bcf3a, 0x1e145c09c221e8f0, 0x7ccfa59fca782630 },
+ { 0x1a9615a9b62a345f, 0x22050c564a52fecc, -0x585d877ad743f202, 0x5e82770a1a1ee71d }
+ },
+ {
+ { -0x17fd17f5bdcc638c, 0x34175166a7fffae5, 0x34865d1f1c408cae, 0x2cca982c605bc5ee },
+ { 0x35425183ad896a5c, -0x1798c5041872ad0a, 0x2c66f25f92a35f64, 0x09d04f3b3b86b102 },
+ { -0x02d2a2cae6824192, 0x207c2eea8be4ffa3, 0x2613d8db325ae918, 0x7a325d1727741d3e }
+ },
+ {
+ { -0x132d82fe81d5f896, -0x28779760e9c9b6a2, 0x52a61af0919233e5, 0x2a479df17bb1ae64 },
+ { -0x2fc946442e92021e, -0x5dfaa8a83b6857d7, -0x71933699580ed999, 0x4d3b1a791239c180 },
+ { -0x61a11171cc24d8f0, 0x189854ded6c43ca5, -0x5be3dd3a6d8e7ec8, 0x27ad5538a43a5e9b }
+ },
+ {
+ { -0x34a5829c71b8f884, -0x7248ac9edf5e3fa7, 0x549e1e4d8bedfdcc, 0x080153b7503b179d },
+ { 0x2746dd4b15350d61, -0x2fc03437116ade49, -0x1791c9a5ec798d36, 0x510e987f7e7d89e2 },
+ { -0x2259626cf5c12c1d, 0x3d386ef1cd60a722, -0x37e852a74255b11a, 0x23be8d554fe7372a }
+ },
+},
+{
+ {
+ { -0x43e10b42a9851857, 0x3f624cb2d64498bd, -0x1bef9b2dd3e0b138, 0x2ef9c5a5ba384001 },
+ { -0x6a016e658b10b053, 0x3a827becf6a308a2, -0x69b1fe2cf65b84ff, 0x71c43c4f5ba3c797 },
+ { -0x4902920905618b33, -0x0e7d87431b50d986, -0x7daa4c2f0e1066f2, 0x5a758ca390c5f293 }
+ },
+ {
+ { -0x731f6e74e29e236c, -0x7212c9b9657ecf9a, -0x2b1957d65017552d, 0x0a738027f639d43f },
+ { -0x5d48d8ef26b9db6b, 0x3aa8c6d2d57d5003, -0x1c2bff405f4b7836, 0x2dbae244b3eb72ec },
+ { -0x67f0b5d0a8001e34, 0x00670d0de1839843, 0x105c3f4a49fb15fd, 0x2698ca635126a69c }
+ },
+ {
+ { 0x2e3d702f5e3dd90e, -0x61c0f6e71b2dac7a, 0x5e773ef6024da96a, 0x3c004b0c4afa3332 },
+ { -0x189ace77cd4f4588, 0x381831f7925cff8b, 0x08a81b91a0291fcc, 0x1fb43dcc49caeb07 },
+ { -0x6556b953f90b47d5, 0x1ca284a5a806c4f3, 0x3ed3265fc6cd4787, 0x6b43fd01cd1fd217 }
+ },
+ {
+ { -0x4a38bda7c189f10d, 0x75dc52b9ee0ab990, -0x40ebd83df8d46dc1, 0x73420b2d6ff0d9f0 },
+ { -0x3858a2b4b9683abc, 0x15fdf848df0fffbf, 0x2868b9ebaa46785a, 0x5a68d7105b52f714 },
+ { -0x50d30934617ae1fa, -0x70a6c6ec39ddc73c, -0x2575476966040c8d, 0x3db5632fea34bc9e }
+ },
+ {
+ { 0x2e4990b1829825d5, -0x12151478c165766f, -0x110fc2c6b38fb508, 0x59197ea495df2b0e },
+ { -0x0b9111d408a22628, 0x0d17b1f6396759a5, 0x1bf2d131499e7273, 0x04321adf49d75f13 },
+ { 0x04e16019e4e55aae, -0x1884bc8581d06d17, -0x3831d23e90ea655c, 0x45eafdc1f4d70cc0 }
+ },
+ {
+ { -0x49f1b9db30334e13, 0x59dbc292bd5c0395, 0x31a09d1ddc0481c9, 0x3f73ceea5d56d940 },
+ { 0x698401858045d72b, 0x4c22faa2cf2f0651, -0x6be5c99a94ddd23a, 0x5a5eebc80362dade },
+ { -0x4858402ef5b1723a, -0x41a8ff81bb364cc7, 0x60c1207f1557aefa, 0x26058891266218db }
+ },
+ {
+ { 0x4c818e3cc676e542, 0x5e422c9303ceccad, -0x13f833354bed60f8, 0x0dedfa10b24443b8 },
+ { 0x59f704a68360ff04, -0x3c26c021899e190c, -0x7ce4d58ced78caaf, 0x54ad0c2e4e615d57 },
+ { -0x11c4982a47d4add6, 0x36f163469fa5c1eb, -0x5a4b2d0d913e602d, 0x62ecb2baa77a9408 }
+ },
+ {
+ { -0x6df8d7c95049d78c, 0x5fcd5e8579e104a5, 0x5aad01adc630a14a, 0x61913d5075663f98 },
+ { -0x1a1286ad9eead4c3, 0x4962357d0eddd7d1, 0x7482c8d0b96b4c71, 0x2e59f919a966d8be },
+ { 0x0dc62d361a3231da, -0x05b8a7cd6bdffd90, 0x02d801513f9594ce, 0x3ddbc2a131c05d5c }
+ },
+},
+{
+ {
+ { -0x048ca53dffb5ca2f, 0x31de0f433a6607c3, 0x7b8591bfc528d599, 0x55be9a25f5bb050c },
+ { 0x3f50a50a4ffb81ef, -0x4e1fcaf6c40bdf41, -0x645571e33955d330, 0x32239861fa237a40 },
+ { 0x0d005acd33db3dbf, 0x0111b37c80ac35e2, 0x4892d66c6f88ebeb, 0x770eadb16508fbcd }
+ },
+ {
+ { -0x0e2c497e5faf8e47, 0x2207659a3592ff3a, 0x5f0169297881e40e, 0x16bedd0e86ba374e },
+ { -0x7bae061fa1b17623, -0x3f9cfd004386c6c9, 0x5d22749556a6495c, 0x09a6755ca05603fb },
+ { 0x5ecccc4f2c2737b5, 0x43b79e0c2dccb703, 0x33e008bc4ec43df3, 0x06c1b840f07566c0 }
+ },
+ {
+ { 0x69ee9e7f9b02805c, -0x34007d75ab82e9c0, 0x3d93a869b2430968, 0x46b7b8cd3fe26972 },
+ { 0x7688a5c6a388f877, 0x02a96c14deb2b6ac, 0x64c9f3431b8c2af8, 0x3628435554a1eed6 },
+ { -0x167edf7901811420, 0x4cba6be72f515437, 0x1d04168b516efae9, 0x5ea1391043982cb9 }
+ },
+ {
+ { 0x6f2b3be4d5d3b002, -0x5013cc2695f63780, 0x035f73a4a8bcc4cc, 0x22c5b9284662198b },
+ { 0x49125c9cf4702ee1, 0x4520b71f8b25b32d, 0x33193026501fef7e, 0x656d8997c8d2eb2b },
+ { -0x34a73701bcc276c7, -0x765f34d1957281b0, 0x79ca955309fbbe5a, 0x0c626616cd7fc106 }
+ },
+ {
+ { -0x70203c86040bab4f, 0x45a5a970f1a4b771, -0x536de108452ca6eb, 0x42d088dca81c2192 },
+ { 0x1ffeb80a4879b61f, 0x6396726e4ada21ed, 0x33c7b093368025ba, 0x471aa0c6f3c31788 },
+ { -0x7025f0c85fe9ae67, 0x0adadb77c8a0e343, 0x20fbfdfcc875e820, 0x1cf2bea80c2206e7 }
+ },
+ {
+ { -0x67d291e5fd3fbed1, -0x6f05b37c24a71702, 0x01c2f5bcdcb18bc0, 0x686e0c90216abc66 },
+ { -0x3d220e214c9dfd54, -0x6d5a01f62d1d855b, 0x7d1648f6fc09f1d3, 0x74c2cc0513bc4959 },
+ { 0x1fadbadba54395a7, -0x4be5fd5f51f25996, -0x40e60a67445c83f9, 0x6a12b8acde48430d }
+ },
+ {
+ { 0x793bdd801aaeeb5f, 0x00a2a0aac1518871, -0x175c8c5ce0dec94c, 0x48aab888fc91ef19 },
+ { -0x072515e0c62b6a27, 0x592c190e525f1dfc, -0x247342fb3666e2e5, 0x11f7fda3d88f0cb7 },
+ { 0x041f7e925830f40e, 0x002d6ca979661c06, -0x79236006d4fb95d2, 0x760360928b0493d1 }
+ },
+ {
+ { -0x4bcef71a96a5f4fb, 0x6cb00ee8ad37a38b, 0x5edad6eea3537381, 0x3f2602d4b6dc3224 },
+ { 0x21bb41c6120cf9c6, -0x154d55ed21325a65, -0x3e58d2fdf55b74cc, 0x215d4d27e87d3b68 },
+ { -0x374db849a4350e64, 0x49779dc3b1b2c652, -0x765e7f442a131d1e, 0x13f098a3cec8e039 }
+ },
+},
+{
+ {
+ { -0x0c55a85dd86944ec, -0x77c5454864f825df, -0x1ab41de7ce5fc6e4, 0x5ee7fb38d83205f9 },
+ { -0x6523f00631a13ab5, 0x039c2a6b8c2f130d, 0x028007c7f0f89515, 0x78968314ac04b36b },
+ { 0x538dfdcb41446a8e, -0x5a530256bcb6c807, 0x46af908d263c8c78, 0x61d0633c9bca0d09 }
+ },
+ {
+ { -0x525cd74307038c21, -0x117b96a2590fc804, 0x637fb4db38c2a909, 0x5b23ac2df8067bdc },
+ { 0x63744935ffdb2566, -0x3a42947687f49745, 0x6f1b3280553eec03, 0x6e965fd847aed7f5 },
+ { -0x652d46ac117fad85, -0x1770e65505219273, 0x0e711704150e82cf, 0x79b9bbb9dd95dedc }
+ },
+ {
+ { -0x2e66825171608c8c, -0x5fcd5d073044f7ea, -0x329345ed92bba0f6, 0x1ba811460accb834 },
+ { -0x144caabf95ced93e, -0x2d9c7c5797373c6d, 0x6c0c6429e5b97a82, 0x5065f158c9fd2147 },
+ { 0x708169fb0c429954, -0x1eb9ff5328913099, 0x2eaab98a70e645ba, 0x3981f39e58a4faf2 }
+ },
+ {
+ { -0x37ba205a92199022, -0x1ead5affd3bfb7c6, -0x162d1e9c384b09ce, 0x30f4452edcbc1b65 },
+ { 0x18fb8a7559230a93, 0x1d168f6960e6f45d, 0x3a85a94514a93cb5, 0x38dc083705acd0fd },
+ { -0x7a92d87d3a8a68c0, -0x05ecba9606634134, -0x77bb038c3f15b18f, 0x632d9a1a593f2469 }
+ },
+ {
+ { -0x40f602ee12f37b59, 0x63f071810d9f693a, 0x21908c2d57cf8779, 0x3a5a7df28af64ba2 },
+ { -0x094494ea47f8345a, 0x1823c7dfbc54f0d7, -0x44e268fc91d698f5, 0x0b24f48847ed4a57 },
+ { -0x23252b41aee41539, -0x5bac7f8a12d9330e, -0x1e630060ffa0659b, 0x34fcf74475481f63 }
+ },
+ {
+ { -0x5a44e25487305568, 0x5ceda267190b72f2, -0x6cf636eef56d9f72, 0x0119a3042fb374b0 },
+ { -0x3e681fb387689836, -0x478eb234c726b983, 0x55de888283f95fa8, 0x3d3bdc164dfa63f7 },
+ { 0x67a2d89ce8c2177d, 0x669da5f66895d0c1, -0x0a9a671a4d7d5d50, 0x56c088f1ede20a73 }
+ },
+ {
+ { 0x581b5fac24f38f02, -0x56f41601451cf343, -0x65de96fd75306d10, 0x038b7ea48359038f },
+ { 0x336d3d1110a86e17, -0x280c77cdf48a4d06, -0x06eacc89daf8d678, 0x09674c6b99108b87 },
+ { -0x60b107de66ce9008, 0x2f49d282eaa78d4f, 0x0971a5ab5aef3174, 0x6e5e31025969eb65 }
+ },
+ {
+ { 0x3304fb0e63066222, -0x04caf976785345c1, -0x42e6db8873ef9e5d, 0x3058ad43d1838620 },
+ { -0x4e939d0a781a6c05, 0x4999eddeca5d3e71, -0x4b6e3e1feb33c193, 0x08f5114789a8dba8 },
+ { 0x323c0ffde57663d0, 0x05c3df38a22ea610, -0x423875425366b066, 0x26549fa4efe3dc99 }
+ },
+},
+{
+ {
+ { 0x04dbbc17f75396b9, 0x69e6a2d7d2f86746, -0x39bf62660ac1543a, 0x606175f6332e25d2 },
+ { 0x738b38d787ce8f89, -0x49d9a71dbe865773, 0x30738c9cf151316d, 0x49128c7f727275c9 },
+ { 0x4021370ef540e7dd, 0x0910d6f5a1f1d0a5, 0x4634aacd5b06b807, 0x6a39e6356944f235 }
+ },
+ {
+ { 0x1da1965774049e9d, -0x0432915e6701cad5, -0x4e3432af33adc95a, 0x1f5ec83d3f9846e2 },
+ { -0x6932a9bf206f0c19, 0x6c3a760edbfa25ea, 0x24f3ef0959e33cc4, 0x42889e7e530d2e58 },
+ { -0x7104dc3ccd73348b, -0x50bd5df822789117, 0x20fbdadc5dfae796, 0x241e246b06bf9f51 }
+ },
+ {
+ { 0x7eaafc9a6280bbb8, 0x22a70f12f403d809, 0x31ce40bb1bfc8d20, 0x2bc65635e8bd53ee },
+ { 0x29e68e57ad6e98f6, 0x4c9260c80b462065, 0x3f00862ea51ebb4b, 0x5bc2c77fb38d9097 },
+ { -0x172a23605694526d, -0x1a704e8221e6b824, 0x681532ea65185fa3, 0x1fdd6c3b034a7830 }
+ },
+ {
+ { -0x63ec595ad2270857, 0x2dbb1f8c3efdcabf, -0x69e1cdbfa1f7084b, 0x48c8a121bbe6c9e5 },
+ { 0x0a64e28c55dc18fe, -0x1c206166cc661423, 0x79ac432370e2e652, 0x35ff7fc33ae4cc0e },
+ { -0x03bea583a69b9bbb, -0x2ddb4d283ed749eb, 0x6035c9c905fbb912, 0x42d7a91274429fab }
+ },
+ {
+ { -0x565b76b86cc25a44, 0x4a58920ec2e979ec, -0x69277fffec1a53b4, 0x453692d74b48b147 },
+ { 0x4e6213e3eaf72ed3, 0x6794981a43acd4e7, -0x00ab8321914af735, 0x6fed19dd10fcb532 },
+ { -0x2288a26657aa6391, -0x0bd5debf20ffc1dc, 0x5223e229da928a66, 0x063f46ba6d38f22c }
+ },
+ {
+ { 0x39843cb737346921, -0x58b804f8c7376bb9, -0x34727fce5dbacf82, 0x67810f8e6d82f068 },
+ { -0x2d2dbd76a0ac996c, -0x35cc5d3abd6c64d4, -0x67905259382246a4, 0x5a152c042f712d5d },
+ { 0x3eeb8fbcd2287db4, 0x72c7d3a301a03e93, 0x5473e88cbd98265a, 0x7324aa515921b403 }
+ },
+ {
+ { -0x52dc092517dcab35, 0x6962502ab6571a6d, -0x649ae9c91c71c82f, 0x5cac5005d1a3312f },
+ { -0x7a86bd0b93c34172, -0x5e2c9b4eb8cf3fba, 0x1c8ed914d23c41bf, 0x0838e161eef6d5d2 },
+ { -0x733eab33161c66fc, 0x5b3a040b84de6846, -0x3b2759e34e41a292, 0x40fb897bd8861f02 }
+ },
+ {
+ { -0x1a8127b8a54ef89f, 0x71435e206fd13746, 0x342f824ecd025632, 0x4b16281ea8791e7b },
+ { -0x7b3a556f9d21c85f, 0x421da5000d1d96e1, 0x788286306a9242d9, 0x3c5e464a690d10da },
+ { -0x2e3efe2af47ecc7f, -0x2119f0ee891197d8, 0x0cb68893383f6409, 0x6183c565f6ff484a }
+ },
+},
+{
+ {
+ { -0x24b97ab650c09992, -0x288030fb0eb5f15b, 0x3df23ff7a4ba0c47, 0x3a10dfe132ce3c85 },
+ { 0x741d5a461e6bf9d6, 0x2305b3fc7777a581, -0x2baa8b5d9b8b2c27, 0x1926e1dc6401e0ff },
+ { -0x1f80b17515e83160, 0x2fd515463a1fc1fd, 0x175322fd31f2c0f1, 0x1fa1d01d861e5d15 }
+ },
+ {
+ { 0x38dcac00d1df94ab, 0x2e712bddd1080de9, 0x7f13e93efdd5e262, 0x73fced18ee9a01e5 },
+ { -0x337faa6b82a667ce, 0x1e4656da37f15520, -0x6609088bb1fa6ce0, 0x773563bc6a75cf33 },
+ { 0x06b1e90863139cb3, -0x5b6c25983a5fc133, -0x72883137529c76ce, 0x1f426b701b864f44 }
+ },
+ {
+ { -0x0e81ca376e5edaae, -0x48947eaca8a1638a, -0x057cbf90f2648dc2, 0x0b76bb1b3fa7e438 },
+ { -0x1036d9b3be6ee3ff, -0x0e5c4847e85dd3db, 0x5875da6bf30f1447, 0x4e1af5271d31b090 },
+ { 0x08b8c1f97f92939b, -0x41988e342bbb5492, 0x22e5646399bb8017, 0x7b6dd61eb772a955 }
+ },
+ {
+ { 0x5730abf9ab01d2c7, 0x16fb76dc40143b18, -0x7993419a5f344d7f, 0x53fa9b659bff6afe },
+ { -0x48523e17af0cc26e, 0x7998fa4f608cd5cf, -0x5269d2427203a425, 0x703e9bceaf1d2f4f },
+ { 0x6c14c8e994885455, -0x7bc5a2999a512b1b, 0x181bb73ebcd65af1, 0x398d93e5c4c61f50 }
+ },
+ {
+ { -0x3c78839f2d181c0e, 0x3b34aaa030828bb1, 0x283e26e7739ef138, 0x699c9c9002c30577 },
+ { 0x1c4bd16733e248f3, -0x4261ed78ea40f5a1, -0x2bc0730f5ef4fc8a, 0x53b09b5ddf191b13 },
+ { -0x0cf958dca6b90e34, -0x6de8e74a331a2683, 0x28cdd24781b4e975, 0x51caf30c6fcdd907 }
+ },
+ {
+ { 0x737af99a18ac54c7, -0x6fcc87233ae34cf1, 0x2b89bc334ce10cc7, 0x12ae29c189f8e99a },
+ { -0x59f458bd898b1ff6, 0x630e8570a17a7bf3, 0x3758563dcf3324cc, 0x5504aa292383fdaa },
+ { -0x56613f34e0f2fe31, 0x0dd1efcc3a34f7ae, 0x55ca7521d09c4e22, 0x5fd14fe958eba5ea }
+ },
+ {
+ { 0x3c42fe5ebf93cb8e, -0x412057aec92ba9a1, -0x1f0f7a6177bddf18, 0x7dd73f960725d128 },
+ { -0x4a23d220d7ba54d4, 0x069491b10a7fe993, 0x4daaf3d64002e346, 0x093ff26e586474d1 },
+ { -0x4ef2db0197fa67d7, 0x75730672dbaf23e5, 0x1367253ab457ac29, 0x2f59bcbc86b470a4 }
+ },
+ {
+ { 0x7041d560b691c301, -0x7adfe4c0522818e2, 0x16c2e16311335585, 0x2aa55e3d010828b1 },
+ { -0x7c7b82bd66e8eca1, -0x52e46ee0a982fc29, 0x7e7748d9be77aad1, 0x5458b42e2e51af4a },
+ { -0x12ae6d19f3f8bbb1, 0x42c54e2d74421d10, 0x352b4c82fdb5c864, 0x13e9004a8a768664 }
+ },
+},
+{
+ {
+ { 0x1e6284c5806b467c, -0x3a09668418a29f85, -0x749826a74c872d9e, 0x3d88d66a81cd8b70 },
+ { -0x344a4aaa93fcd401, -0x208e6e48d6d685c6, -0x3e008cd952127e45, 0x71ade8bb68be03f5 },
+ { -0x7489856cdfb12877, 0x762fcacb9fa0ae2a, 0x771febcc6dce4887, 0x343062158ff05fb3 }
+ },
+ {
+ { -0x031de6f8d584ce4c, 0x4d7adc75aa578016, 0x0ec276a687479324, 0x6d6d9d5d1fda4beb },
+ { -0x1fa25e581e0a40b7, 0x26457d6dd4736092, 0x77dcb07773cc32f6, 0x0a5d94969cdd5fcd },
+ { 0x22b1a58ae9b08183, -0x026a2f8e3ea3c775, -0x567edc897af5fae9, 0x33384cbabb7f335e }
+ },
+ {
+ { 0x33bc627a26218b8d, -0x157f4de03857f39f, -0x6ba74ed4e8c1611a, 0x076247be0e2f3059 },
+ { 0x3c6fa2680ca2c7b5, 0x1b5082046fb64fda, -0x14accb63abce2922, 0x5278b38f6b879c89 },
+ { 0x52e105f61416375a, -0x136850c97a54145c, 0x26e6b50623a67c36, 0x5cf0e856f3d4fb01 }
+ },
+ {
+ { -0x415131cec24cbd58, -0x345c9ca47bd24812, -0x177399df7e80ec11, 0x1b9438aa4e76d5c6 },
+ { -0x0936978ce517354c, 0x5e20741ecb4f92c5, 0x2da53be58ccdbc3e, 0x2dddfea269970df7 },
+ { -0x75af8881e990fce6, 0x067b39f10fb7a328, 0x1925c9a6010fbd76, 0x6df9b575cc740905 }
+ },
+ {
+ { -0x13203ca4b73521bf, 0x6a88471fb2328270, 0x740a4a2440a01b6a, 0x471e5796003b5f29 },
+ { 0x42c1192927f6bdcf, -0x706e6e85bfc29e36, -0x23e3a5997461e09f, 0x1596047804ec0f8d },
+ { -0x2569444c5312c854, 0x7a2423b5e9208cea, 0x24cc5c3038aebae2, 0x50c356afdc5dae2f }
+ },
+ {
+ { -0x30126320e4ce469c, -0x0b79567a735ae50d, 0x14897265ea8c1f84, 0x784a53dd932acc00 },
+ { 0x09dcbf4341c30318, -0x1145f9ee7ce7e232, -0x3e863f3123e1d65f, 0x1dbf7b89073f35b0 },
+ { 0x2d99f9df14fc4920, 0x76ccb60cc4499fe5, -0x5becd3441a30fffd, 0x3f93d82354f000ea }
+ },
+ {
+ { -0x1553ed2e861eb688, -0x006dc00c441400a2, 0x4af663e40663ce27, 0x0fd381a811a5f5ff },
+ { -0x7e7c189761fb317b, 0x678fb71e04465341, -0x526dfa7099771254, 0x5da350d3532b099a },
+ { -0x0da953135bc920ac, 0x108b6168ae69d6e8, 0x20d986cb6b5d036c, 0x655957b9fee2af50 }
+ },
+ {
+ { -0x423ebf642ffd2f54, 0x66660245b5ccd9a6, -0x7dce823b05217a14, 0x02fe934b6ad7df0d },
+ { -0x51574f8056fdfcf1, -0x077389950b9c2ebd, 0x15b083663c787a60, 0x08eab1148267a4a8 },
+ { -0x10a30eff3048158c, 0x22897633a1cb42ac, -0x2b31f3ab310d7a1e, 0x30408c048a146a55 }
+ },
+},
+{
+ {
+ { -0x44d1ff36e6c47881, -0x131c576f1f23af95, -0x130c483fc9219b61, 0x5f46040898de9e1a },
+ { 0x739d8845832fcedb, -0x05c729365194079d, 0x32bc0dcab74ffef7, 0x73937e8814bce45e },
+ { -0x46fc8ee9d6840b73, -0x562ec4dd2b0f97cc, -0x1e68eaa8b969423a, 0x2cf8a4e891d5e835 }
+ },
+ {
+ { 0x2cb5487e17d06ba2, 0x24d2381c3950196b, -0x289a637e7a6875d0, 0x7a6f7f2891d6a4f6 },
+ { 0x6d93fd8707110f67, -0x22b3f62c83c74ab7, 0x7cb16a4cc2736a86, 0x2049bd6e58252a09 },
+ { 0x7d09fd8d6a9aef49, -0x0f119f41a4c246f5, 0x4c21b52c519ebfd4, 0x6011aadfc545941d }
+ },
+ {
+ { 0x63ded0c802cbf890, -0x042f6735f2009556, 0x624d0afdb9b6ed99, 0x69ce18b779340b1e },
+ { 0x5f67926dcf95f83c, 0x7c7e856171289071, -0x295e180c667085a5, 0x6fc5cc1b0b62f9e0 },
+ { -0x2e10aad74d678635, -0x22e551c32b816f6e, 0x127e0442189f2352, 0x15596b3ae57101f1 }
+ },
+ {
+ { 0x09ff31167e5124ca, 0x0be4158bd9c745df, 0x292b7d227ef556e5, 0x3aa4e241afb6d138 },
+ { 0x462739d23f9179a2, -0x007cedce68292231, 0x1307deb553f2148a, 0x0d2237687b5f4dda },
+ { 0x2cc138bf2a3305f5, 0x48583f8fa2e926c3, 0x083ab1a25549d2eb, 0x32fcaa6e4687a36c }
+ },
+ {
+ { 0x3207a4732787ccdf, 0x17e31908f213e3f8, -0x2a4d132809f269b2, 0x746f6336c2600be9 },
+ { 0x7bc56e8dc57d9af5, 0x3e0bd2ed9df0bdf2, -0x553feb21dd101b5d, 0x4627e9cefebd6a5c },
+ { 0x3f4af345ab6c971c, -0x1d77148d66bc8ce1, 0x33596a8a0344186d, 0x7b4917007ed66293 }
+ },
+ {
+ { 0x54341b28dd53a2dd, -0x55e86fa420bd03c1, 0x0ff592d94dd2f8f4, 0x1d03620fe08cd37d },
+ { 0x2d85fb5cab84b064, 0x497810d289f3bc14, 0x476adc447b15ce0c, 0x122ba376f844fd7b },
+ { -0x3dfdcd325d4b1aac, -0x612f02bdeea2e781, 0x2eabb4be7dd479d9, 0x02c70bf52b68ec4c }
+ },
+ {
+ { -0x531acd40ba728d1f, 0x5be768e07cb73cb5, 0x56cf7d94ee8bbde7, 0x6b0697e3feb43a03 },
+ { -0x5d7813b4a2f4d045, 0x415c5790074882ca, -0x1fbb59e13e2f7ea4, 0x26334f0a409ef5e0 },
+ { -0x49370fb5209d5c40, 0x3ef000ef076da45d, -0x636346a7b60f2d57, 0x1cc37f43441b2fae }
+ },
+ {
+ { -0x2899a90e36315147, 0x1c5b15f818e5656a, 0x26e72832844c2334, 0x3a346f772f196838 },
+ { 0x508f565a5cc7324f, -0x2f9e3b3f1af956de, -0x04e75424a3ba53e7, 0x6c6809c10380314a },
+ { -0x2d2aaeed1d259538, -0x1642fcce4e17ae13, -0x69f8b92271398d9e, 0x05911b9f6ef7c5d0 }
+ },
+},
+{
+ {
+ { 0x01c18980c5fe9f94, -0x329a98968e902a38, -0x7e9fba3c2e6a5f7a, 0x6e2b7f3266cc7982 },
+ { -0x162328a949c800d3, -0x13b3cb7036780f3c, -0x312a6d7a0c043849, 0x3305354793e1ea87 },
+ { -0x337fdb97083ca971, -0x6216457de668b34d, -0x5448dd634a47eca0, 0x44e2017a6fbeba62 }
+ },
+ {
+ { -0x7807d30c49359133, 0x580f893e18f4a0c2, 0x058930072604e557, 0x6cab6ac256d19c1d },
+ { -0x3b3d58bcab25488c, -0x71a2b3c3b150fce6, -0x4893dc2dbd7c70e9, 0x749a098f68dce4ea },
+ { -0x23201f5fd33e21a0, 0x032665ff51c5575b, 0x2c0c32f1073abeeb, 0x6a882014cd7b8606 }
+ },
+ {
+ { -0x2eee2e8350b01492, 0x050bba42b33aa4a3, 0x17514c3ceeb46c30, 0x54bedb8b1bc27d75 },
+ { -0x5ad56d015b8b804b, -0x23ed5bb6e05a5477, -0x27d256b447b85b32, 0x4d77edce9512cc4e },
+ { 0x77c8e14577e2189c, -0x5c1b909500663bbb, 0x3144dfc86d335343, 0x3a96559e7c4216a9 }
+ },
+ {
+ { 0x4493896880baaa52, 0x4c98afc4f285940e, -0x10b558645babb74a, 0x5278c510a57aae7f },
+ { 0x12550d37f42ad2ee, -0x74871ffb675e040b, 0x5d53078233894cb2, 0x02c84e4e3e498d0c },
+ { -0x5ab22f8bd6b3f46c, -0x0aa2b94720e7004a, -0x0f90133a72517c9a, 0x588657668190d165 }
+ },
+ {
+ { -0x40a7cb0fc21da33d, -0x47783751297eab6a, 0x5105221a9481e892, 0x6760ed19f7723f93 },
+ { -0x2b88edcee5108ee9, 0x50343101229e92c7, 0x7a95e1849d159b97, 0x2449959b8b5d29c9 },
+ { 0x669ba3b7ac35e160, 0x2eccf73fba842056, 0x1aec1f17c0804f07, 0x0d96bc031856f4e7 }
+ },
+ {
+ { -0x4e2acb4f338afa1f, 0x32cd003416c35288, -0x34c95a7ff89d3d63, 0x5bfe69b9237a0bf8 },
+ { 0x3318be7775c52d82, 0x4cb764b554d0aab9, -0x5430c2d83388c26f, 0x3bf4d1848123288a },
+ { 0x183eab7e78a151ab, -0x44166f3666f6c89d, -0x008e8291b5381ccb, 0x4c5cddb325f39f88 }
+ },
+ {
+ { 0x57750967e7a9f902, 0x2c37fdfc4f5b467e, -0x4d9e99c5ce8845ba, 0x3a375e78dc2d532b },
+ { -0x3f0948b29e6f5915, 0x20ea81a42db8f4e4, -0x5742908268cea8a0, 0x33b1d60262ac7c21 },
+ { -0x7ebe18d0d2b22216, -0x191501679d39f838, 0x23c28458573cafd0, 0x46b9476f4ff97346 }
+ },
+ {
+ { 0x1215505c0d58359f, 0x2a2013c7fc28c46b, 0x24a0a1af89ea664e, 0x4400b638a1130e1f },
+ { 0x0c1ffea44f901e5c, 0x2b0b6fb72184b782, -0x1a78006efeeb2478, 0x37130f364785a142 },
+ { 0x3a01b76496ed19c3, 0x31e00ab0ed327230, 0x520a885783ca15b1, 0x06aab9875accbec7 }
+ },
+},
+{
+ {
+ { 0x5349acf3512eeaef, 0x20c141d31cc1cb49, 0x24180c07a99a688d, 0x555ef9d1c64b2d17 },
+ { -0x3ecc667c0a20f145, -0x3f0c8a70aed3b354, 0x2cf1130a0bb398e1, 0x6b3cecf9aa270c62 },
+ { 0x36a770ba3b73bd08, 0x624aef08a3afbf0c, 0x5737ff98b40946f2, 0x675f4de13381749d }
+ },
+ {
+ { -0x5ed00926c4254ce3, 0x0725d80f9d652dfe, 0x019c4ff39abe9487, 0x60f450b882cd3c43 },
+ { 0x0e2c52036b1782fc, 0x64816c816cad83b4, -0x2f234226969bf8c2, 0x13d99df70164c520 },
+ { 0x014b5ec321e5c0ca, 0x4fcb69c9d719bfa2, 0x4e5f1c18750023a0, 0x1c06de9e55edac80 }
+ },
+ {
+ { -0x002ad4bf00929656, 0x34530b18dc4049bb, 0x5e4a5c2fa34d9897, 0x78096f8e7d32ba2d },
+ { -0x66f085295cc13b1e, 0x6608f938be2ee08e, -0x635ebc3a9cd7baeb, 0x4cf38a1fec2db60d },
+ { -0x5f55559af205a319, -0x063b61d5b74ab874, 0x4f09cc7d7003725b, 0x373cad3a26091abe }
+ },
+ {
+ { -0x0e41570476224453, 0x3bcb2cbc61aeaecb, -0x70a75844e0647263, 0x21547eda5112a686 },
+ { -0x4d6b9cb27d360a84, 0x1fcbfde124934536, -0x6163b24cbe7324a6, 0x0040f3d9454419fc },
+ { -0x210216c602a6792d, -0x0bd8d376aef5c7f4, -0x48d45bf844cee647, 0x63550a334a254df4 }
+ },
+ {
+ { -0x6445a7ba8dab84b7, -0x0cfa39051d3bf720, 0x60e8fa69c734f18d, 0x39a92bafaa7d767a },
+ { 0x6507d6edb569cf37, 0x178429b00ca52ee1, -0x1583ff6f149429a3, 0x3eea62c7daf78f51 },
+ { -0x62db38ec196cd8b2, 0x5f63857768dbd375, 0x70525560eb8ab39a, 0x68436a0665c9c4cd }
+ },
+ {
+ { 0x1e56d317e820107c, -0x3ad997bb7bf5169b, -0x3e1f5e39cdf00386, 0x5373669c91611472 },
+ { -0x43fdca17dfd0c0d9, -0x38a3ff1d9b068a50, -0x6e5b162a5c73dbea, 0x17b6e7f68ab789f9 },
+ { 0x5d2814ab9a0e5257, -0x6f70df7b36354c04, -0x50350a77a4d2e136, 0x1cb4b5a678f87d11 }
+ },
+ {
+ { 0x6b74aa62a2a007e7, -0x0cee1f4f0f8e384f, 0x5707e438000be223, 0x2dc0fd2d82ef6eac },
+ { -0x499b3f94c6b50394, 0x0c88de2498da5fb1, 0x4f8d03164bcad834, 0x330bca78de7434a2 },
+ { -0x67d1007beee68bb2, -0x0696a169d4f8b8dc, -0x3a753eb04036ac05, 0x3c31be1b369f1cf5 }
+ },
+ {
+ { -0x3e97436c0634bd8e, -0x51478ee038312468, 0x7f0e52aa34ac8d7a, 0x41cec1097e7d55bb },
+ { -0x4f0b79b2f76b7512, 0x07dc19ee91ba1c6f, 0x7975cdaea6aca158, 0x330b61134262d4bb },
+ { -0x0869e6285d927f76, -0x44e02b61e261ea93, 0x73d7c36cdba1df27, 0x26b44cd91f28777d }
+ },
+},
+{
+ {
+ { -0x50bb7bd24fd7a0c9, -0x78ace76fb8103721, -0x6a8b1f6e07df6866, 0x0e378d6069615579 },
+ { 0x300a9035393aa6d8, 0x2b501131a12bb1cd, 0x7b1ff677f093c222, 0x4309c1f8cab82bad },
+ { -0x26056e8e7cf8a5ab, 0x4bdb5ad26b009fdc, 0x7829ad2cd63def0e, 0x078fc54975fd3877 }
+ },
+ {
+ { -0x1dffb4a447cc5676, 0x44775dec2d4c3330, 0x3aa244067eace913, 0x272630e3d58e00a9 },
+ { -0x782042ebd77870d3, 0x134636dd1e9421a1, 0x4f17c951257341a3, 0x5df98d4bad296cb8 },
+ { -0x0c98702f1336f4ac, -0x0ffeba64edfbca67, 0x26725fbc3758b89b, 0x4325e4aa73a719ae }
+ },
+ {
+ { -0x12db9d6530960a63, 0x2a4a1ccedd5abbf4, 0x3535ca1f56b2d67b, 0x5d8c68d043b1b42d },
+ { 0x657dc6ef433c3493, 0x65375e9f80dbf8c3, 0x47fd2d465b372dae, 0x4966ab79796e7947 },
+ { -0x11ccd2b21c4bd4f6, -0x27b1a5d4e95b9fe4, 0x78243877078ba3e4, 0x77ed1eb4184ee437 }
+ },
+ {
+ { 0x185d43f89e92ed1a, -0x4fb5e11501b8e63a, 0x499fbe88a6f03f4f, 0x5d8b0d2f3c859bdd },
+ { -0x402b1ec0dfe7c660, -0x5110001dc1c20e9f, -0x49a4fb0f94a2e01d, 0x52e085fb2b62fbc0 },
+ { 0x124079eaa54cf2ba, -0x28db9a14ffe4d919, 0x6843bcfdc97af7fd, 0x0524b42b55eacd02 }
+ },
+ {
+ { -0x43e72352647d6154, 0x23ae7d28b5f579d0, -0x3cb9edd596c7bdcd, 0x1a6110b2e7d4ac89 },
+ { -0x02f2a2411babb850, 0x6cec351a092005ee, -0x665b87bba98a8635, 0x59d242a216e7fa45 },
+ { 0x4f833f6ae66997ac, 0x6849762a361839a4, 0x6985dec1970ab525, 0x53045e89dcb1f546 }
+ },
+ {
+ { -0x7b25c32172ba01ee, -0x42bd3de71bbb1d2e, -0x57ae6987e081ca68, 0x7642c93f5616e2b2 },
+ { -0x34744cb928acac25, -0x03034db451aee1de, -0x345b72bf2af51911, 0x26e3bae5f4f7cb5d },
+ { 0x2323daa74595f8e4, -0x219773747a85414c, 0x3fc48e961c59326e, 0x0b2e73ca15c9b8ba }
+ },
+ {
+ { 0x0e3fbfaf79c03a55, 0x3077af054cbb5acf, -0x2a3aadba24c21c61, 0x015e68c1476a4af7 },
+ { -0x2944bbd73e80afda, -0x614d8ddc04a56359, -0x1c845afce6e639bc, 0x21ce380db59a6602 },
+ { -0x3e2ad7addff995c8, -0x6a9fc1adca8f510d, -0x7cd9a658dd9475b3, 0x5dd689091f8eedc9 }
+ },
+ {
+ { 0x1d022591a5313084, -0x35d2b55129d8f78e, -0x795ed47ad0f402e0, 0x56e6c439ad7da748 },
+ { -0x34537b21402c37aa, 0x1624c348b35ff244, -0x48077235a26352f9, 0x3b0e574da2c2ebe8 },
+ { -0x38fb00b6bd42451a, 0x5e21ade2b2de1f79, -0x16a24c0ca9ad0528, 0x0822b5378f08ebc1 }
+ },
+},
+{
+ {
+ { -0x1e480d6c9d8cfc7d, 0x4b5279ffebca8a2c, -0x25038875402becec, 0x7deb10149c72610f },
+ { 0x51f048478f387475, -0x4da2430b634134c4, -0x6554edbb2660dfab, 0x2c709e6c1c10a5d6 },
+ { -0x349d509578991186, 0x66cbec045553cd0e, 0x588001380f0be4b5, 0x08e68e9ff62ce2ea }
+ },
+ {
+ { 0x2f2d09d50ab8f2f9, -0x5346de723aa6dc21, 0x4a8f342673766cb9, 0x4cb13bd738f719f5 },
+ { 0x34ad500a4bc130ad, -0x72c724b6c2f42b64, -0x5da3c267aff57642, 0x2f1f3f87eeba3b09 },
+ { -0x087b738a1aea49b6, -0x5a6afe4524b56fc8, -0x3df2cec0c08ae4b0, 0x19a1e353c0ae2ee8 }
+ },
+ {
+ { -0x4bde8d322a694243, -0x6c1fbabc671103c0, -0x604eacb84bbef64b, 0x736bd3990266ae34 },
+ { 0x7d1c7560bafa05c3, -0x4c1e5f5f391aa19f, -0x1cad68e73f299b8d, 0x41546b11c20c3486 },
+ { -0x7aacd2af6ccb4c4c, 0x46fd114b60816573, -0x33a0a0cfbda37c8b, 0x412295a2b87fab5c }
+ },
+ {
+ { 0x2e655261e293eac6, -0x7ba56dfcdecc5325, 0x460975cb7900996b, 0x0760bb8d195add80 },
+ { 0x19c99b88f57ed6e9, 0x5393cb266df8c825, 0x5cee3213b30ad273, 0x14e153ebb52d2e34 },
+ { 0x413e1a17cde6818a, 0x57156da9ed69a084, 0x2cbf268f46caccb1, 0x6b34be9bc33ac5f2 }
+ },
+ {
+ { 0x11fc69656571f2d3, -0x393617baacf18c86, -0x1cc5185d2b01afcb, 0x01b9c7b62e6dd30b },
+ { -0x0c20d09bc5873f4e, 0x4c3e971ef22e027c, -0x1382e3a1b63e4a5d, 0x2012c18f0922dd2d },
+ { -0x77f4aa1aa53762d7, 0x1483241f45a0a763, 0x3d36efdfc2e76c1f, 0x08af5b784e4bade8 }
+ },
+ {
+ { -0x1d8ceb2d7633d3b5, 0x4be4bd11a287178d, 0x18d528d6fa3364ce, 0x6423c1d5afd9826e },
+ { 0x283499dc881f2533, -0x62fada25886cdc4a, -0x7685220498cbbe0c, 0x32b79d71163a168d },
+ { -0x337a072612034c96, 0x22bcc28f3746e5f9, -0x1b621cc7061a2c33, 0x480a5efbc13e2dcc }
+ },
+ {
+ { -0x499eb31bbd31dde1, 0x6e199dcc4c053928, 0x663fb4a4dc1cbe03, 0x24b31d47691c8e06 },
+ { 0x0b51e70b01622071, 0x06b505cf8b1dafc5, 0x2c6bb061ef5aabcd, 0x47aa27600cb7bf31 },
+ { 0x2a541eedc015f8c3, 0x11a4fe7e7c693f7c, -0x0f5099ecb15d872a, 0x545b585d14dda094 }
+ },
+ {
+ { 0x6204e4d0e3b321e1, 0x3baa637a28ff1e95, 0x0b0ccffd5b99bd9e, 0x4d22dc3e64c8d071 },
+ { 0x67bf275ea0d43a0f, -0x521971cbf7641142, 0x4289134cd479e72e, 0x0f62f9c332ba5454 },
+ { -0x034b9a7629c4a0c7, 0x5cae6a3f57cbcf61, -0x01453d2d6ac505fb, 0x1c0fa01a36371436 }
+ },
+},
+{
+ {
+ { -0x3ee11a17ab3ac052, 0x6a0b06c12b4f3ff4, 0x33540f80e0b67a72, 0x15f18fc3cd07e3ef },
+ { -0x18ab8bb64383296e, 0x0f9abeaae6f73ddf, 0x4af01ca700837e29, 0x63ab1b5d3f1bc183 },
+ { 0x32750763b028f48c, 0x06020740556a065f, -0x2ac427ed3cb6a4a8, 0x08706c9b865f508d }
+ },
+ {
+ { -0x3366e4bec74bedba, 0x243b9c526f9ac26b, -0x4610b6b248345443, 0x5fba433dd082ed00 },
+ { -0x0c835d54c2cbc201, 0x1a8c6a2d80abc617, -0x71b61fca2b330036, 0x48b46beebaa1d1b9 },
+ { -0x63b61caa366be530, -0x468cb5218bb6707c, 0x41c3fed066663e5c, 0x0ecfedf8e8e710b3 }
+ },
+ {
+ { 0x744f7463e9403762, -0x0865721172033637, 0x163a649655e4cde3, 0x3b61788db284f435 },
+ { 0x76430f9f9cd470d9, -0x49d533645bd09ff8, 0x1898297c59adad5e, 0x7789dd2db78c5080 },
+ { -0x4dddd7e6f291094e, -0x56b5994db931b406, 0x46c1a77a4f0b6cc7, 0x4236ccffeb7338cf }
+ },
+ {
+ { 0x3bd82dbfda777df6, 0x71b177cc0b98369e, 0x1d0e8463850c3699, 0x5a71945b48e2d1f1 },
+ { -0x7b68bfb2f2aa1d8c, 0x6c6663d9c4ad2b53, -0x13d04f265256a8cc, 0x2617e120cdb8f73c },
+ { 0x6f203dd5405b4b42, 0x327ec60410b24509, -0x63cb8dcf53d577ba, 0x77de29fc11ffeb6a }
+ },
+ {
+ { -0x7ca1ec7013312d36, -0x736150ec1569c466, -0x36a0403f4de9f15a, 0x575e66f3ad877892 },
+ { -0x4f53a8367c488758, 0x53cdcca9d7fe912c, 0x61c2b854ff1f59dc, 0x3a1a2cf0f0de7dac },
+ { -0x667fc5d8377034c6, 0x345a6789275ec0b0, 0x459789d0ff6c2be5, 0x62f882651e70a8b2 }
+ },
+ {
+ { 0x6d822986698a19e0, -0x2367de1e8b28758f, 0x41a85f31f6cb1f47, 0x352721c2bcda9c51 },
+ { 0x085ae2c759ff1be4, 0x149145c93b0e40b7, -0x3b981805800d8c87, 0x4eeecf0ad5c73a95 },
+ { 0x48329952213fc985, 0x1087cf0d368a1746, -0x71ad9e4e993ea55b, 0x2d5b2d842ed24c21 }
+ },
+ {
+ { 0x5eb7d13d196ac533, 0x377234ecdb80be2b, -0x1ebb3003830a51dc, 0x5226bcf9c441acec },
+ { 0x02cfebd9ebd3ded1, -0x2ba4de88c6fde68c, 0x7576f813fe30a1b7, 0x5691b6f9a34ef6c2 },
+ { 0x79ee6c7223e5b547, 0x6f5f50768330d679, -0x128c1e1692752317, 0x27c3da1e1d8ccc03 }
+ },
+ {
+ { 0x28302e71630ef9f6, -0x3d2b5dfcd49b3120, 0x090820304b6292be, 0x5fca747aa82adf18 },
+ { 0x7eb9efb23fe24c74, 0x3e50f49f1651be01, 0x3ea732dc21858dea, 0x17377bd75bb810f9 },
+ { 0x232a03c35c258ea5, -0x790dc5d39434f30f, 0x3dad8d0d2e442166, 0x04a8933cab76862b }
+ },
+},
+{
+ {
+ { 0x69082b0e8c936a50, -0x06365fca3e253a4a, 0x6fb73e54c4dfb634, 0x4005419b1d2bc140 },
+ { -0x2d39fb49dd6bc201, -0x43734131bb304c60, 0x5d254ff397808678, 0x0fa3614f3b1ca6bf },
+ { -0x5ffc014246417d10, 0x2089c1af3a44ac90, -0x07b6606ee6ab0572, 0x1fba218aef40ab42 }
+ },
+ {
+ { 0x4f3e57043e7b0194, -0x57e2c111f7255081, -0x37c639546623210f, 0x6c535d13ff7761d5 },
+ { -0x54ab6bb705370ac2, -0x7e0917658459c8bf, 0x74fd6c7d6c2b5e01, 0x392e3acaa8c86e42 },
+ { 0x4cbd34e93e8a35af, 0x2e0781445887e816, 0x19319c76f29ab0ab, 0x25e17fe4d50ac13b }
+ },
+ {
+ { -0x6ea0800a890ede59, -0x3cb5cdd8d032781d, -0x3345d021b2e41ada, 0x6bba828f8969899b },
+ { 0x0a289bd71e04f676, 0x208e1c52d6420f95, 0x5186d8b034691fab, 0x255751442a9fb351 },
+ { -0x1d2e43996f01c6ff, 0x4cb54a18a0997ad5, -0x68e296eb507b9f2c, 0x559d504f7f6b7be4 }
+ },
+ {
+ { -0x63b76e18092d9903, 0x0744a19b0307781b, -0x77c770e29f9e1dc5, 0x123ea6a3354bd50e },
+ { -0x588c7c874c14ab2b, 0x1d69d366a5553c7c, 0x0a26cf62f92800ba, 0x01ab12d5807e3217 },
+ { 0x118d189041e32d96, -0x46121c3d27cea7b8, 0x1eab4271d83245d9, 0x4a3961e2c918a154 }
+ },
+ {
+ { 0x0327d644f3233f1e, 0x499a260e34fcf016, -0x7c4a58e90d254687, 0x68aceead9bd4111f },
+ { 0x71dc3be0f8e6bba0, -0x293107cb81001cf6, -0x566dbda01ec5b896, 0x2cd6bce3fb1db763 },
+ { 0x38b4c90ef3d7c210, 0x308e6e24b7ad040c, 0x3860d9f1b7e73e23, 0x595760d5b508f597 }
+ },
+ {
+ { -0x77d5341402fdd870, -0x7650ccfa3beea8a0, 0x65f492e37d3473f4, 0x2cb2c5df54515a2b },
+ { 0x6129bfe104aa6397, -0x7069fff75b580335, 0x3f8bc0897d909458, 0x709fa43edcb291a9 },
+ { -0x14f5a2739c02d536, -0x2dd43e99d196b101, 0x2723f36ef8cbb03a, 0x70f029ecf0c8131f }
+ },
+ {
+ { 0x2a6aafaa5e10b0b9, 0x78f0a370ef041aa9, 0x773efb77aa3ad61f, 0x44eca5a2a74bd9e1 },
+ { 0x461307b32eed3e33, -0x51fbd0cc5baa7e19, -0x36bbb62ce6a0fc9a, 0x0b7d5d8a6c314858 },
+ { 0x25d448327b95d543, 0x70d38300a3340f1d, -0x21e3ace39f1e3ad5, 0x272224512c7de9e4 }
+ },
+ {
+ { -0x40844475bd568a04, -0x73a3c68869525ca8, -0x1d803890321255b8, 0x19735fd7f6bc20a6 },
+ { 0x1abc92af49c5342e, -0x001127ee4d190530, -0x105d73720337b1d7, 0x11b5df18a44cc543 },
+ { -0x1c546f2fbd37bd9a, -0x147b71f080e6ab82, 0x2503a1d065a497b9, 0x0fef911191df895f }
+ },
+},
+{
+ {
+ { 0x6ab5dcb85b1c16b7, -0x6b3f0317c384d85b, -0x5b4ee3e58caae842, 0x499238d0ba0eafaa },
+ { -0x4eaf835e54e39147, -0x42bb70c1e949784d, 0x3455fb7f2c7a91ab, 0x7579229e2f2adec1 },
+ { -0x130b91ad854574a9, 0x15a08c478bd1647b, 0x7af1c6a65f706fef, 0x6345fa78f03a30d5 }
+ },
+ {
+ { -0x6c2c341642270f5c, -0x24ead3e402e88cfe, 0x7dbddc6d7f17a875, 0x3e1a71cc8f426efe },
+ { -0x20fd06a0efea185f, 0x790ec41da9b40263, 0x4d3a0ea133ea1107, 0x54f70be7e33af8c9 },
+ { -0x37c35c1c6f45429e, -0x7f121c98fd6e37cd, -0x377fc7332c86ff3c, 0x2c5fc0231ec31fa1 }
+ },
+ {
+ { -0x3bdd1b2efdba919b, -0x78beb53e352b846f, 0x1592e2bba2b6ffdd, 0x75d9d2bff5c2100f },
+ { -0x01456ee8e8fc74b1, -0x1aedc8de3621107f, 0x1c97e4e75d0d8834, 0x68afae7a23dc3bc6 },
+ { 0x5bd9b4763626e81c, -0x766996c9435fd123, 0x0a41193d61f077b3, 0x3097a24200ce5471 }
+ },
+ {
+ { -0x5e9d18db996a3b7a, 0x131d633435a89607, 0x30521561a0d12a37, 0x56704bada6afb363 },
+ { 0x57427734c7f8b84c, -0x0ebe5ec1fe4d8f17, 0x02d1adfeb4e564a6, 0x4bb23d92ce83bd48 },
+ { -0x5093b558ad06ed47, 0x5e665f6cd86770c8, 0x4c35ac83a3c8cd58, 0x2b7a29c010a58a7e }
+ },
+ {
+ { 0x33810a23bf00086e, -0x50316da118c90084, 0x3d60e670e24922d4, 0x11ce9e714f96061b },
+ { -0x3bff80882f3e313d, -0x72efdf49453b6d08, 0x32ec29d57e69daaf, 0x599408759d95fce0 },
+ { 0x219ef713d815bac1, -0x0ebeb9a2b7a41da4, 0x6d5447cc4e513c51, 0x174926be5ef44393 }
+ },
+ {
+ { 0x3ef5d41593ea022e, 0x5cbcc1a20ed0eed6, -0x702db130f8c7d374, 0x6fa42ead06d8e1ad },
+ { -0x4a214d0603a42a45, -0x6d2558d51e27ef1f, -0x503b302348d5e3a7, 0x497d78813fc22a24 },
+ { -0x1d897db5e08cc8e1, 0x7f7cf01c4f5b6736, 0x7e201fe304fa46e7, 0x785a36a357808c96 }
+ },
+ {
+ { 0x070442985d517bc3, 0x6acd56c7ae653678, 0x00a27983985a7763, 0x5167effae512662b },
+ { -0x7da042029cfeb2d5, -0x37adc9639358a875, 0x5b2fcd285c0b5df0, 0x12ab214c58048c8f },
+ { -0x42b1561ef0ac3b4a, 0x1673dc5f8ac91a14, -0x5707e5b1d533e546, 0x33a92a7924332a25 }
+ },
+ {
+ { 0x7ba95ba0218f2ada, -0x300bdd78ccf04636, -0x2525b692a93926f9, 0x5380c296f4beee54 },
+ { -0x622e0b66d86693fe, 0x0cb3b058e04d1752, 0x1f7e88967fd02c3e, 0x2f964268cb8b3eb1 },
+ { -0x62b0d8fb997672f6, 0x3d0987990aff3f7a, -0x2f610c9d982545bb, 0x7761455e7b1c669c }
+ },
+},
+};
+#elif defined(CURVED25519_ASM_32BIT)
+static const ge_precomp base[64][8] = {
+{
+ {
+ { -0x0a73c47b, 0x2fbc93c6, -0x0473f1e7, -0x306cd23a, 0x643d42c2, 0x270b4898, 0x33d4ba65, 0x07cf9d3a },
+ { -0x28bf6ec2, -0x62efc6fb, -0x2ebf414d, -0x02c660fb, 0x688f8a09, -0x5a3e7bcc, -0x6707ed99, 0x44fd2f92 },
+ { 0x4b6fbb59, -0x2442ea99, -0x115d5a16, 0x41e13f00, -0x36a83906, -0x322b62e4, -0x50e91336, 0x4f0ebe1f }
+ },
+ {
+ { -0x6cc38e29, -0x6ddb1804, 0x7a0ff5b5, -0x60b9626a, -0x1e29f8fe, 0x5aa69a65, -0x5782d1d2, 0x590c063f },
+ { 0x42b4d5a8, -0x75665aa0, 0x4e60acf6, -0x70d47ef4, -0x4e91c856, -0x1f61dc95, 0x69c92555, 0x6bb595a6 },
+ { -0x252c97fe, 0x6e347eaa, -0x7c11b7fb, -0x450ca66d, -0x19f897da, 0x3bcabe10, 0x165ed1b8, 0x49314f0a }
+ },
+ {
+ { 0x4cee9730, -0x50da4f58, -0x1779b476, 0x025a8430, -0x60fe98ce, -0x3ee4affe, -0x657f070c, 0x7a164e1b },
+ { -0x5b032d9b, 0x56611fe8, -0x1a3e4583, 0x3bd353fd, 0x214bd6bd, -0x7ece0ce6, 0x555bda62, 0x2ab91587 },
+ { -0x0e98b7cc, -0x640dee0c, -0x09d2076b, -0x47b194e9, 0x5b722a4e, -0x282190f9, 0x63bb2a21, 0x549a04b9 }
+ },
+ {
+ { -0x7103f661, 0x287351b9, 0x7dfd2538, 0x6765c6f4, -0x04f56d9b, -0x35cb72c3, 0x21e58727, 0x680e9103 },
+ { 0x056818bf, -0x6a01faf6, 0x5660faa9, 0x327e8971, 0x06a05073, -0x3c171c33, 0x7445a49a, 0x27933f4c },
+ { -0x1aebd950, -0x40e1ba14, 0x6dba0f94, -0x1cd439c3, -0x7307ad40, -0x1bd68b2b, -0x4f19b3e8, 0x44f079b1 }
+ },
+ {
+ { 0x08a5bb33, -0x5ded43bc, -0x38a112fe, -0x72afb73d, 0x5abfec44, -0x22e414f4, 0x46e206eb, 0x2945ccf1 },
+ { -0x5bb82946, 0x7f9182c3, 0x4b2729b7, -0x2affeb2f, -0x479b5f79, -0x1cc30ee4, -0x14e4aa0d, 0x154a7e73 },
+ { -0x182ffc4d, -0x37cd5e87, 0x00124d7e, 0x5f729d0a, 0x0e6d8ff3, 0x62c1d4a1, 0x38b27a98, 0x68b8ac59 }
+ },
+ {
+ { 0x77157131, 0x3a0ceeeb, 0x00c8af88, -0x64d8ea77, -0x25a658ca, -0x7f9a4998, -0x5d33c743, 0x51e57bb6 },
+ { 0x7b7d8ca4, 0x499806b6, 0x27d22739, 0x575be284, 0x204553b9, -0x44f7a319, -0x51be877c, 0x38b64c41 },
+ { 0x689de3a4, -0x7062526f, -0x07046ec9, 0x175f2428, -0x60304678, 0x050ab532, 0x1354c09f, 0x7865dfa2 }
+ },
+ {
+ { -0x6bb15c41, 0x6b1a5cd0, -0x4c623f2e, 0x7470353a, 0x28542e49, 0x71b25282, 0x283c927e, 0x461bea69 },
+ { -0x55cdde4f, -0x4590d366, 0x3bba23a7, 0x6ca02153, -0x6de6d3c6, -0x621589b1, 0x2e5317e0, 0x1d6edd5d },
+ { -0x54f025ca, 0x217a8aac, 0x3d3549c8, -0x5ad739ac, 0x13ab7568, 0x37d05b8b, 0x3a2cbc37, 0x233cef62 }
+ },
+ {
+ { 0x04dd3e8f, 0x59b75966, -0x1d778fd4, 0x6cb30377, 0x5ed9c323, -0x4ecc639a, 0x61bce52f, 0x0915e760 },
+ { -0x0c6dcb27, -0x1d58a213, -0x1e4aa707, -0x69c28980, 0x6e3c23fb, 0x2c2741ac, 0x320e01c3, 0x3a9024a1 },
+ { -0x57cb5c82, -0x208217cb, 0x689857ea, -0x741e6326, 0x7167b326, 0x2c118536, -0x24102a3e, 0x589eb3d9 }
+ },
+},
+{
+ {
+ { 0x2d9021f6, 0x322d04a5, 0x75c6bf9c, -0x463e60cd, 0x42d20b09, 0x587a3a43, -0x559b019f, 0x143b1cf8 },
+ { 0x553e2df3, 0x7ec851ca, -0x59b7874d, -0x58ed7b35, 0x3288d1e7, -0x194a1be7, 0x5a9a8883, 0x4cf210ec },
+ { -0x69753555, -0x60798383, 0x27092729, 0x5f54258e, -0x15e7f68b, -0x2f582cb5, 0x374126e1, 0x21b546a3 }
+ },
+ {
+ { -0x2e7ade71, 0x490a7a45, 0x46049335, -0x65eac888, -0x33ce1e0a, 0x0060ea09, -0x0791169b, 0x7e041577 },
+ { -0x5d777cbd, -0x56b007a8, 0x5313ed3c, -0x31f12baa, -0x4a40cb06, -0x0aa3c231, -0x36154c8f, 0x0a653ca5 },
+ { -0x31a4980d, 0x66b2a496, -0x42a9686a, -0x00ab6d28, 0x4a592cd0, 0x503cec29, 0x0813acb2, 0x56694365 }
+ },
+ {
+ { 0x1dabb69d, 0x5672f9eb, -0x5017ac04, -0x458f4acb, 0x2796d66d, 0x47ac0f75, -0x6bee8d8b, 0x32a53517 },
+ { 0x26620798, -0x47e724f4, 0x606e354a, 0x5d5c31d9, 0x00a8cdc7, 0x0982fa4f, 0x4653e2d4, 0x17e12bcd },
+ { -0x209b7bc9, -0x2c59bb5a, -0x77f04023, 0x703b6559, -0x52c5e55b, -0x347adac0, -0x71b39b98, 0x0900b3f7 }
+ },
+ {
+ { -0x37e952cf, -0x12d7f042, -0x2719101d, 0x52d9595b, -0x0939dc0b, 0x0fe71772, 0x051e293c, 0x4314030b },
+ { 0x679d651b, 0x0a851b9f, 0x033342f2, -0x1ef7349f, -0x1774cf5d, -0x29fe0a81, -0x12d228ec, 0x371f3aca },
+ { -0x040f4353, -0x2a9fffa2, -0x2e78f3a2, -0x7148f0d2, -0x2f7b1960, 0x201f9033, -0x31849990, 0x4c3a5ae1 }
+ },
+ {
+ { -0x36c25f23, -0x45078a1c, 0x71b9294d, -0x46cd7d59, -0x0b393ba0, -0x7f29c049, -0x15993e7f, 0x6de9c73d },
+ { -0x2347056b, 0x4138a434, 0x6c96840b, -0x78f30983, 0x297be82c, -0x21c77a8c, 0x7262a55a, 0x7c814db2 },
+ { -0x5fb2070e, 0x478904d5, -0x4efebd2d, -0x050451b6, 0x555d0998, -0x0937539d, 0x2f90b104, 0x5aac4a41 }
+ },
+ {
+ { -0x4280aecc, 0x603a0d0a, -0x1e2c51ba, -0x7f7636ce, -0x7867429d, -0x20da6ec7, 0x74ba0235, 0x1c145cd2 },
+ { 0x3ac92908, -0x39b0cd95, -0x199c1e20, 0x5551b282, 0x4a1a4b83, 0x476b35f5, 0x189f68c2, 0x1b9da3fe },
+ { 0x75f3d743, 0x32e83864, 0x6ae5d9ef, 0x365b8baf, 0x385b681e, -0x7dadc74a, 0x167d65e1, 0x234929c1 }
+ },
+ {
+ { 0x1d099fcf, 0x48145cc2, -0x33d7281b, 0x4535c192, 0x48247e01, -0x7f183e1b, 0x3b2973ee, 0x4a5f2874 },
+ { -0x5f885218, -0x67b21355, 0x19eb389d, 0x383f77ad, 0x2954d794, -0x38139482, -0x1483c586, 0x59c77b3a },
+ { 0x225ccf62, -0x2c5228db, -0x4dead3a3, -0x6ee5cc7f, 0x5b08f87d, -0x274c6053, 0x4799fe3b, 0x6f05606b }
+ },
+ {
+ { -0x06e49b7d, 0x5b433149, 0x5a2cbf62, -0x524a239b, 0x632827b3, -0x78057bee, -0x54b60728, 0x60895e91 },
+ { 0x177ba962, -0x6001616e, 0x0de5cae1, -0x675118e3, 0x2d831044, 0x3ff4ae94, 0x58533ac8, 0x714de12e },
+ { 0x0cf86c18, -0x16130d13, 0x0735dfd4, -0x4b92f9ee, 0x04b96be7, -0x43625f68, -0x26923d95, 0x73e2e62f }
+ },
+},
+{
+ {
+ { 0x632f9c1d, 0x2eccdd0e, 0x76893115, 0x51d0b696, -0x579c85a8, 0x52dfb76b, -0x5ff110c7, 0x6dd37d49 },
+ { 0x49aa515e, -0x12a49cac, 0x0bc6823a, -0x579a3b61, 0x5b42d1c4, -0x7af3e017, 0x03d315b9, 0x30d76d6f },
+ { 0x2106e4c7, 0x6c444417, -0x6d728097, -0x04ac2980, 0x694d3f26, -0x4b8c615c, 0x2e864bb0, 0x10c69711 }
+ },
+ {
+ { -0x7ca737fb, 0x0ca62aa0, 0x7a204247, 0x6a3d4ae3, 0x3b11eddc, 0x7464d3a6, 0x550806ef, 0x03bf9baf },
+ { 0x7dbe5fde, 0x6493c427, 0x19ad7ea2, 0x265d4fad, 0x46304590, 0x0e00dfc8, -0x129901f7, 0x25e61cab },
+ { -0x33a799fc, 0x3f13e128, -0x4ba68b82, 0x6f5873ec, -0x33ed970b, -0x5f49c213, 0x4586e22c, 0x566d7863 }
+ },
+ {
+ { -0x39a5d030, -0x5efabd7b, -0x0ce9983d, 0x6c64112a, 0x731aee58, 0x680ae240, 0x4793b22a, 0x14fba5f3 },
+ { -0x633ef7cc, 0x1637a49f, -0x57643baf, -0x4371a92b, 0x7f7fd2db, 0x1cb5ec0f, 0x5ecc35d9, 0x33975bca },
+ { 0x6985f7d4, 0x3cd74616, -0x3637ffa9, 0x593e5e84, 0x7b61131e, 0x2fc3f2b6, -0x7c03ad94, 0x14829cea }
+ },
+ {
+ { 0x4e71ecb8, 0x21e70b2f, 0x40a477e3, -0x19a92247, -0x31e2b080, -0x409aa932, 0x535d7b7e, 0x05fc3bc4 },
+ { -0x68226a3e, -0x00bc847c, -0x55b14a59, 0x6c744e30, 0x3c85e88b, -0x61f3a29f, 0x5f758173, 0x2fd9c71e },
+ { 0x52afdedd, 0x24b8b3ae, -0x12c4cf31, 0x3495638c, -0x56417e6b, 0x33a4bc83, 0x5c651f04, 0x37376747 }
+ },
+ {
+ { 0x14246590, 0x634095cb, 0x16c15535, -0x10edebc0, -0x76ef43a0, -0x61c7ebf4, 0x30907c8c, 0x6bf59057 },
+ { 0x40d1add9, 0x2fba99fd, -0x690b2fd9, -0x4cf8e991, 0x15f03bae, 0x4363f052, 0x3b18f999, 0x1fbea56c },
+ { -0x1ebea476, 0x0fa778f1, -0x453c5882, 0x06409ff7, -0x655d65b0, 0x6f52d7b8, 0x7a635a56, 0x02521cf6 }
+ },
+ {
+ { 0x772f5ee4, -0x4eeb98e0, -0x69f86532, -0x17076b4f, 0x00ac824a, 0x4af8224d, -0x0832933c, 0x001753d9 },
+ { 0x0a9d5294, 0x513fee0b, 0x0fdf5a66, -0x706718a4, -0x401ef832, -0x2b9e7978, 0x71382ced, 0x3fa00a7e },
+ { -0x69c224cc, 0x3c69232d, -0x4b68c7a8, 0x1dde87da, -0x5f6e0d7b, -0x55282e07, -0x5fb7124a, 0x12b5fe2f }
+ },
+ {
+ { -0x5290e16e, -0x20d483da, 0x504b8913, 0x4b66d323, 0x751c8bc3, -0x73bf6240, 0x0796c7b8, 0x6f7e93c2 },
+ { -0x69031cb3, 0x71f0fbc4, -0x520ca413, 0x73b9826b, -0x00d73a9f, -0x2dfb8d9f, 0x6fb1206f, 0x749b76f9 },
+ { -0x515951fb, 0x1f5af604, -0x411b6367, -0x3edcae0f, -0x1100949a, 0x61a808b5, 0x01e02151, 0x0fcec10f }
+ },
+ {
+ { -0x3bdbb1bb, 0x3df2d29d, -0x6c2721f6, 0x2b020e74, -0x7df3deb3, 0x6cc8067e, 0x6feab90a, 0x41377916 },
+ { 0x49fe1e44, 0x644d58a6, 0x31ad777e, 0x21fcaea2, -0x77802f2e, 0x02441c5a, -0x7c3aee0d, 0x4901aa71 },
+ { -0x73e50710, 0x08b1b754, 0x246299b4, -0x31f08584, 0x1e06d939, -0x089f4f07, 0x726d1213, 0x41bb887b }
+ },
+},
+{
+ {
+ { -0x55c6082e, -0x68267f20, 0x52c6b51c, 0x35d03842, 0x07cd55aa, 0x7d43f493, -0x48753c9e, 0x56bd36cf },
+ { 0x567c49d8, -0x6d987f94, -0x3586e196, 0x066d04cc, -0x1c33c6b5, -0x5960a9bb, -0x5f87732e, 0x5c95b686 },
+ { 0x0d14a954, 0x2ac519c1, -0x6b4a0570, -0x150b8b4c, -0x560785a6, -0x19507c7e, -0x78641f6c, 0x0dea6db1 }
+ },
+ {
+ { -0x29578686, 0x15baeb74, -0x053be8ce, 0x7ef55cf1, 0x3c8b05c5, 0x29001f5a, 0x52eaccfb, 0x0ad7cc87 },
+ { 0x7344e5ab, -0x559940ac, -0x70e4bcf7, -0x25eda778, -0x02a9b4d1, 0x5e87d2b3, 0x5483b1dd, 0x5b2c7888 },
+ { 0x793408cf, 0x52151362, 0x19963d94, -0x14f0e8fd, -0x77c26b9a, -0x57cc4d06, 0x75003c78, 0x093a7fa7 }
+ },
+ {
+ { 0x60a91286, -0x47169fbc, 0x7778d3de, 0x7f3fd804, -0x4075a1d3, 0x67d01e31, -0x3d849ac2, 0x7b038a06 },
+ { 0x3a16d7be, -0x1aef821a, -0x650ccd31, -0x5c880024, 0x440b677f, 0x70d5bf18, -0x5b5cebfd, 0x6a252b19 },
+ { -0x2c966f0d, -0x6126e62b, -0x24b1460e, 0x5213aebb, 0x4cb99135, -0x38f715fb, 0x72260e56, 0x58ded57f }
+ },
+ {
+ { 0x5b0fd48b, -0x2592acda, -0x6c405678, -0x769f7dcf, 0x61d57e28, -0x287536ce, 0x3a5c8143, 0x79f2942d },
+ { -0x16bec289, 0x78e79dad, -0x68d61983, -0x0da8062b, -0x1c85581a, 0x59db910e, -0x4461fc64, 0x6aa11b5b },
+ { -0x49377217, -0x6825d0db, -0x530dfe97, 0x251ba7ea, -0x10b14b1c, 0x09b44f87, -0x4395825b, 0x7d90ab1b }
+ },
+ {
+ { -0x694c3c69, 0x1a07a3f4, -0x70b1dace, 0x11ceaa18, -0x588ae410, 0x7d9498d5, 0x508dd8a0, 0x19ed161f },
+ { -0x58fe9402, -0x6533597d, -0x0d3af493, -0x6fafa0b3, -0x331bca56, 0x6b610d5f, 0x6198ff96, 0x19a10d44 },
+ { -0x78231936, 0x560a2cd6, -0x799b30b3, 0x7f3568c4, 0x22803a38, -0x78be16ae, 0x595653fc, 0x483bdab1 }
+ },
+ {
+ { -0x4b257f0a, -0x2930b2f6, -0x07cf8020, -0x7db7c1bb, -0x5190625c, 0x05005269, -0x63087886, 0x1c705290 },
+ { -0x78cb05b7, -0x0587f0ec, 0x360534e0, 0x106f0b70, -0x1c1cf843, 0x2210776f, -0x22195f02, 0x3286c109 },
+ { -0x78b1672c, 0x32ee7de2, -0x4681f3a0, 0x14c362e9, 0x6a60a38a, 0x5781dcde, -0x558557c0, 0x217dd5ea }
+ },
+ {
+ { -0x4173f138, -0x7420e047, -0x1cf5fd7e, 0x00bae7f8, -0x5293b094, 0x4963991d, 0x5df6f60a, 0x07058a6e },
+ { 0x248e1eb0, -0x62483b30, 0x4d74bf52, -0x1f89681f, 0x3c562354, 0x1e6a9b17, 0x795a4965, 0x7fa7c21f },
+ { -0x24ce0981, -0x1614fd3c, 0x10bcfb2b, -0x12da0277, 0x5c5cddb4, 0x46c8131f, -0x5f346432, 0x33b21c13 }
+ },
+ {
+ { 0x5ee38c5b, -0x65504650, 0x071a13c7, -0x4062d2b2, -0x16ccd6f6, -0x71119193, -0x51ef68e9, 0x1c3bab17 },
+ { 0x087d8e31, 0x360692f8, -0x2d8e9c09, -0x0b2339c9, 0x65ea5963, 0x25a4e620, 0x5ac160d9, 0x659bf72e },
+ { -0x38354850, 0x1c9ab216, 0x07bbc3cc, 0x7d65d374, 0x504a58d5, 0x52744750, 0x131a2990, 0x09f2606b }
+ },
+},
+{
+ {
+ { 0x7c6691ae, 0x7e234c59, 0x0a85b4c8, 0x64889d3d, 0x354afae7, -0x251d36f4, 0x0c6a9e1d, 0x0a871e07 },
+ { 0x744346be, 0x40e87d44, 0x15b52b25, 0x1d48dad4, -0x5ec49fc2, 0x7c3a8a18, 0x2fcdbdf7, 0x4eb728c1 },
+ { 0x4bbc8989, 0x3301b599, 0x5bdd4260, 0x736bae3a, 0x19d59e3c, 0x0d61ade2, 0x2685d464, 0x3ee7300f }
+ },
+ {
+ { -0x7be18ae8, 0x43fa7947, 0x639c46d7, -0x1a3905a7, -0x1cfad48c, -0x5ef9a1e3, -0x30476fd0, 0x7d47c6a2 },
+ { -0x61822949, -0x0a2daa1c, 0x610b1eac, -0x7fe9eea4, -0x6d1e7836, 0x3c99975d, -0x686eda3e, 0x13815762 },
+ { -0x710f2920, 0x3fdad014, -0x6eab90c4, -0x62c18b66, 0x26bb8157, 0x71ec6210, 0x34c9ec80, 0x148cf58d }
+ },
+ {
+ { -0x651b8a93, -0x1da8d083, -0x770cb781, 0x56c345bb, 0x6960a88d, -0x602ef493, 0x4eaea1b9, 0x278febad },
+ { 0x7934f027, 0x46a492f6, -0x097bf557, 0x469984be, -0x769ee7ac, 0x5ca1bc2a, -0x42a2442c, 0x3ff2fa1e },
+ { -0x736cc69a, -0x4e5597e1, 0x20290c98, -0x73de6b64, 0x219d3c52, 0x39115291, -0x01639885, 0x4104dd02 }
+ },
+ {
+ { -0x24f69548, -0x7edeb1fa, 0x0ce44f35, 0x21a8b6c9, 0x409e2af5, 0x6524c12a, -0x71035b7f, 0x0165b5a4 },
+ { 0x1124422a, 0x72b2bf5e, -0x675cc54b, -0x5e05f3cd, -0x05ad499a, -0x6b349eff, -0x5050ac2b, 0x2c863b00 },
+ { -0x5f7b958a, -0x0e6f5b8c, -0x32d08340, 0x12eff984, 0x58aa2b8f, 0x695e2906, -0x40013748, 0x591b67d9 }
+ },
+ {
+ { -0x60e74aa3, -0x66464c8f, -0x5e739be2, -0x1b9a1a06, -0x3d60fa13, 0x61081136, 0x7030128b, 0x489b4f86 },
+ { -0x7f4b6406, 0x312f0d1c, -0x540c1376, 0x5979515e, -0x610fe378, 0x727033c0, -0x35708435, 0x3de02ec7 },
+ { 0x3aeb92ef, -0x2dcdefd3, 0x6116a861, -0x1e9dac4c, 0x190baa24, 0x3d7eabe7, 0x496cbebf, 0x49f5fbba }
+ },
+ {
+ { 0x1e9c572e, 0x155d628c, -0x3a77b8bf, -0x75b27954, 0x515763eb, -0x6e5cad0a, -0x7798aea5, 0x06a1a6c2 },
+ { -0x75a4302c, 0x30949a10, -0x439b8c15, -0x23bf2290, 0x307c0d1c, -0x6d3d6b3f, -0x3405918c, 0x5604a86d },
+ { 0x7c1764b6, 0x7288d1d4, -0x1fbe74af, 0x72541140, 0x18acf6d1, -0x60fce5a0, -0x01d8bd3a, 0x20989e89 }
+ },
+ {
+ { -0x7a1513d2, 0x1674278b, 0x7acb2bdf, 0x5621dc07, 0x61cbf45a, 0x640a4c16, -0x08fa6a2d, 0x730b9950 },
+ { 0x3a2dcc7f, 0x499777fd, -0x5ab0276e, 0x32857c2c, -0x2df81c60, -0x5d86279c, 0x0ca67e29, 0x0403ed1d },
+ { -0x78b13aae, -0x36b4d2cb, -0x67db9073, -0x3a193731, 0x16c035ce, -0x0834b906, 0x08303dcc, 0x5bd74543 }
+ },
+ {
+ { 0x15e7792a, -0x7a3b6cdf, -0x42322237, -0x39b3765e, -0x525c289e, -0x62e1c258, 0x3067f82c, 0x5bb7db12 },
+ { 0x28b24cc2, 0x7f9ad195, 0x6335c181, 0x7f6b5465, 0x4fc07236, 0x66b8b66e, 0x7380ad83, 0x133a7800 },
+ { -0x39359d42, 0x0961f467, 0x211952ee, 0x04ec21d6, -0x642ab890, 0x18236077, 0x58f0e0d2, 0x740dca6d }
+ },
+},
+{
+ {
+ { -0x12d9e51b, 0x3906c72a, -0x771eff09, -0x65497027, -0x0cc9fe69, -0x0a16fa66, -0x40d492b9, 0x0e53dc78 },
+ { -0x2c0f50f5, 0x50b70bf5, -0x1cd18e09, 0x4feaf48a, -0x5aa442cc, 0x60e84ed3, 0x3f50d1ed, 0x00ed489b },
+ { 0x7971877a, -0x46f7d641, 0x6d17e631, 0x5e444463, 0x18276893, 0x4d05c52e, 0x5a4a4af5, 0x27632d9a }
+ },
+ {
+ { -0x78150025, -0x567d7a2f, -0x272f579c, -0x5a4b0445, 0x022663f7, -0x49a70d81, -0x26631d7e, 0x3bbc2b22 },
+ { 0x54b260ce, -0x2ee00faf, 0x72f95270, -0x27923c72, 0x267cc138, 0x601fcd0d, 0x29e90ccd, 0x2b679164 },
+ { 0x583c0a58, -0x46e836ae, 0x0fe4c6f3, 0x653ff9b8, -0x4320c3f4, -0x64f25829, -0x54ab29f2, 0x43a0eeb6 }
+ },
+ {
+ { 0x57875fe8, 0x3ac63223, -0x0a043471, -0x262b0b14, 0x382bb620, -0x72117b6d, 0x4c799fdc, 0x50c5eaa1 },
+ { 0x6d4a5487, 0x396966a4, -0x53d44c46, -0x07ee5e76, 0x5628b26b, 0x66e4685b, -0x626d646e, 0x70a47702 },
+ { -0x290d04c4, -0x22f12375, -0x63384860, 0x54c63aa7, 0x2c8d9f1a, -0x51f4fcd5, 0x602967fb, 0x6f9ce107 }
+ },
+ {
+ { 0x3520e0b5, 0x13969306, -0x7715fc02, 0x437fcf7c, -0x2c36a644, -0x082b3bf5, -0x076c2127, 0x699154d1 },
+ { -0x321e3dd6, -0x52efab4f, 0x48eb32df, -0x3b5716fe, -0x53323f16, 0x5f3e7b33, -0x038669c2, 0x72364713 },
+ { -0x4b4d8ada, 0x315d5c75, 0x0236daa5, -0x33347bd3, 0x345fee8e, 0x22f0c8a3, 0x7d39dbed, 0x73975a61 }
+ },
+ {
+ { -0x0bbcc1ba, 0x6f37f392, 0x1f566b18, 0x0e19b9a1, 0x1fd1d662, 0x220fb78a, -0x5c7e36b3, 0x362a4258 },
+ { 0x6375da10, -0x1bfdb207, 0x1830c870, 0x78d3251a, 0x658cd91c, -0x6fd4e6b8, 0x29b7438a, 0x7e18b10b },
+ { 0x2b6beb2f, -0x6f8e26ed, 0x28418247, 0x0f26e9ad, -0x42136da3, -0x1546e137, -0x0b750d22, 0x4be65bc8 }
+ },
+ {
+ { 0x57c26234, 0x1d50fba2, -0x214f9875, 0x7bd4823a, -0x59ac750b, -0x3d4f2392, 0x351da73e, 0x5665eec6 },
+ { -0x5c918fd8, 0x78487feb, 0x1dd8ce34, 0x5f3f1300, 0x4b30c489, -0x6cb04ed3, 0x397f0a2b, 0x056c244d },
+ { 0x43bfb210, -0x24c11ff7, 0x20800ac2, 0x49720187, 0x73bd8667, 0x26ab5d61, -0x54dfb6c8, 0x20b209c2 }
+ },
+ {
+ { 0x16bd3289, 0x1fcca945, 0x41420428, 0x448d65aa, 0x16a55d62, 0x59c3b7b2, 0x4e612cd8, 0x49992cc6 },
+ { -0x3f804cb5, 0x549e342a, 0x21373d93, 0x02d82208, -0x532e0a99, -0x43d9d290, -0x0435387c, 0x7a92c9fd },
+ { 0x70f801de, 0x65bd1bea, -0x01b61d76, 0x1befb7c0, -0x4e4d51b6, -0x579cf933, 0x265c2a09, 0x3b7ac0cd }
+ },
+ {
+ { 0x22ed39a7, -0x0f2ab1b1, 0x5608150a, -0x5d5516e2, -0x1225178b, -0x0bde4d17, 0x6b7de992, 0x31bc531d },
+ { -0x73fe4314, -0x7dd411bd, -0x3f0438c5, 0x530cb525, -0x3e6ac017, 0x48519034, -0x1f65f0a5, 0x265cc261 },
+ { -0x567f068f, -0x20c2ecb3, 0x221a22a7, 0x7a4fb8d1, 0x35aad6d8, 0x3df7d420, 0x6a1a125e, 0x2a14edcc }
+ },
+},
+{
+ {
+ { 0x0478433c, 0x231a8c57, -0x3d7ebc63, -0x484ad8f2, -0x1c26f861, -0x24556616, 0x6c2b03d9, 0x2c03f525 },
+ { 0x52cfce4e, -0x20b711f9, 0x06ec08b7, -0x3c00050d, -0x46aba63c, 0x05710b2a, -0x69c15c73, 0x161d25fa },
+ { 0x7b53a47d, 0x790f1875, -0x30f3a787, 0x307b0130, 0x257ef7f9, 0x31903d77, -0x42694451, 0x699468bd }
+ },
+ {
+ { 0x6aa91948, -0x2722c21a, 0x2fc0d2cc, 0x485064c2, 0x34fdea2f, -0x64b7db9a, 0x6c4a2e3a, 0x293e1c4e },
+ { -0x0b250131, -0x42e0d0ba, -0x5b802909, 0x7cef0114, 0x4a47b37f, -0x2ce00226, 0x73905785, 0x525219a4 },
+ { -0x6daeed1f, 0x376e134b, -0x235ea260, 0x703778b5, 0x461c3111, -0x4fba7651, 0x7f032823, 0x5b605c44 }
+ },
+ {
+ { -0x0f180fb4, 0x3be9fec6, 0x75e34962, -0x7995a862, 0x1e1de61a, 0x5542ef16, -0x33a5422b, 0x2f12fef4 },
+ { 0x20c47c89, -0x469a7fa7, -0x6dc47034, -0x180feff4, 0x02e2ef77, 0x00012565, -0x57514c12, 0x24a76dce },
+ { -0x203f38c0, 0x0a4522b2, 0x40c9a407, 0x10d06e7f, 0x78cff668, -0x3930ebbf, 0x18a43790, 0x5e607b25 }
+ },
+ {
+ { -0x5a6930ec, -0x5fd3bce4, -0x512c1c00, -0x1c3bd2c0, 0x2e0f26db, -0x2dbad980, -0x61ba8f98, 0x201f3313 },
+ { 0x6cdf1818, 0x58b31d8f, -0x3c9da75e, 0x35cfa74f, 0x66e61d6e, -0x1e4c00b1, 0x6ccdd5f7, 0x5067acab },
+ { 0x08039d51, -0x02ad8095, 0x017c0006, 0x18b14964, 0x2e25a4a8, -0x2addf150, 0x62460375, 0x397cba88 }
+ },
+ {
+ { -0x37ec8619, 0x7815c3fb, -0x221ed50f, -0x599e6be0, -0x7a57022b, -0x00563f08, -0x3e1e3dae, 0x771b4022 },
+ { -0x0fa6a64e, 0x30c13093, -0x1656868a, -0x1dc55e73, 0x721d5e26, 0x222fd491, 0x766e6c3a, 0x2339d320 },
+ { 0x513a2fa7, -0x2782267a, -0x062b30f8, -0x0a53648f, 0x1ea283b3, -0x2f943ce5, 0x19971a76, 0x331a1892 }
+ },
+ {
+ { -0x628a8d51, 0x26512f3a, 0x68074a9e, 0x5bcbe288, 0x1180f7c4, -0x7b123e3f, -0x09b65985, 0x1ac9619f },
+ { -0x04b07f3a, -0x0ae990bb, 0x61c775cf, -0x63c93822, -0x6fbe26e4, -0x1c2b17e5, -0x7c4201df, 0x31167c6b },
+ { 0x524b1068, -0x0dd4c7be, -0x11631679, 0x5068343b, 0x4a6250c8, -0x03628e7c, 0x1f08b111, 0x61243634 }
+ },
+ {
+ { 0x1a2d2638, -0x749cb61d, -0x642c02cb, -0x62204900, -0x5c5f945c, 0x7f8bf1b8, 0x78d90445, 0x1522aa31 },
+ { -0x78b17673, -0x2662be25, 0x6c07dc20, 0x09fea5f1, -0x2ff06444, 0x793d2c67, -0x61a100c0, 0x46ebe230 },
+ { 0x69614938, 0x2c382f53, -0x48d292f0, -0x2501bf66, -0x49b90dd9, -0x1737cc6f, 0x0524306c, 0x45fe70f5 }
+ },
+ {
+ { -0x376aeb6f, 0x62f24920, 0x3f630ca2, 0x05f007c8, -0x0a362b48, 0x6fbb45d2, -0x4a85ddbb, 0x16619f6d },
+ { -0x69f3f474, -0x25b78a5a, -0x10f1d0e0, 0x5b68d076, 0x3d0b8fd4, 0x07fb51cf, -0x5f1c6d2c, 0x428d1623 },
+ { 0x01a308fd, 0x084f4a44, 0x76a5caac, -0x57dde63d, 0x43d1bc7d, -0x214721ba, 0x60bd38c6, 0x1d81592d }
+ },
+},
+{
+ {
+ { 0x2f89c8a1, 0x3a4a369a, 0x7c8de80d, 0x63137a1d, 0x78eda015, -0x4353ff76, -0x4b7c4fc1, 0x2cb8b3a5 },
+ { -0x13d5b3c8, -0x27cc2842, 0x0acc20ed, 0x2c916283, -0x6d208a7f, -0x16c5b856, 0x333c4a81, 0x702d67a3 },
+ { -0x34e46f5f, 0x36e417cb, 0x7f11794e, 0x33b3ddaa, -0x77a439f9, 0x3f510808, -0x1957fdf3, 0x24141dc0 }
+ },
+ {
+ { -0x427cea83, -0x6e6da234, 0x22cc8094, 0x3ca12053, 0x3f90d6e4, 0x28e57f18, -0x21d18985, 0x1a4714ce },
+ { 0x3fefee9d, 0x59f73c77, -0x3e306763, -0x4c0e1077, -0x1fd1aba1, -0x1ca204be, 0x47a1b47c, 0x5766120b },
+ { -0x47494801, -0x24df45f1, 0x77511fa1, -0x48cd3c4a, -0x660fd277, -0x56d4ae40, 0x489ca5f1, 0x4f3875ad }
+ },
+ {
+ { -0x118c1140, 0x79ed13f6, 0x69110bb1, -0x5a39ad93, -0x79fc79f4, -0x1b76d73d, -0x028fa60b, 0x722a1446 },
+ { 0x4932ab22, -0x380389d1, 0x2f4c3c1b, 0x7ac0edf7, -0x65576a18, 0x5f6b55aa, -0x52f5ff7f, 0x3680274d },
+ { -0x573077e7, -0x2f6a6017, -0x7b8a5664, -0x2f566ab0, 0x20b09cc5, 0x6eac1733, 0x331b1095, 0x628ecf04 }
+ },
+ {
+ { 0x5c74ccf1, -0x64be5308, 0x08265251, -0x498cce7f, 0x11adb147, -0x6636d513, 0x34ecb40f, 0x7a47d70d },
+ { -0x562f2244, -0x67434ee8, 0x08b4802b, -0x11bb61cc, -0x47594efc, -0x78f76dda, 0x45c7915d, 0x685f349a },
+ { -0x33bc5b0b, 0x60a0c4cb, 0x3677bea9, 0x775c66ca, 0x2ff8f5ed, -0x5e855e8b, 0x0e01fdc0, 0x11ded902 }
+ },
+ {
+ { 0x3bea93b7, 0x471f95b0, 0x3313abd3, 0x0552d7d4, -0x1e81c085, -0x426c8f1e, -0x4df1a414, 0x7b120f1d },
+ { -0x351018fc, -0x76f187f7, -0x1cf17394, -0x78d7d693, -0x6d514e37, 0x4c5cd2a3, 0x5771531f, 0x194263d1 },
+ { -0x79afd286, 0x17d2fb3d, 0x50a69352, -0x4a9b27bc, -0x59f128a3, 0x7da962c8, 0x318736aa, 0x00d0f85b }
+ },
+ {
+ { -0x0289de3f, -0x598ac3e2, 0x445671f5, 0x69c0b4a7, 0x05b23c11, -0x68e0ad8c, 0x51a8c7cd, 0x387bc748 },
+ { 0x777c84fd, -0x6874ebd2, 0x05a8c062, -0x0bfd9bb9, -0x1819ed39, -0x59852ae5, -0x672295cd, 0x2f7b4596 },
+ { 0x4a52a9a8, -0x7e76b4b3, -0x09477cd1, -0x5226c1ee, -0x49e429c8, 0x184d8548, -0x29360933, 0x3f1c62db }
+ },
+ {
+ { 0x148f693d, 0x3fad3e40, -0x6b14658e, 0x052656e1, 0x184f4e2f, 0x2f4dcbfd, -0x3b7d1e75, 0x406f8db1 },
+ { -0x6e6ef3e1, 0x2e8f1f00, -0x400d1ed4, -0x5b20b020, -0x116d8bc8, 0x60c6560a, -0x53103706, 0x6338283f },
+ { 0x7f191ee4, -0x619cf2d4, -0x43c00990, 0x4fbf8301, 0x7afb73c4, 0x787d8e4e, -0x170a705b, 0x50d83d5b }
+ },
+ {
+ { -0x4b2c4993, -0x3f533070, 0x61732e60, -0x58fa621b, 0x70c6b0ba, 0x033d1f78, 0x26d946e4, 0x584161cd },
+ { -0x3ee5e769, -0x7a97c6ea, -0x1af92ff8, 0x2d69a4ef, -0x099b42ff, 0x39af1378, 0x361517c6, 0x65942131 },
+ { 0x72d27ca2, -0x440d4e60, -0x042138fc, -0x40c6c3a7, -0x1d9d47e2, -0x16724432, 0x3029b589, 0x02eebd0b }
+ },
+},
+{
+ {
+ { 0x7b85c5e8, -0x789a4961, -0x2e97454e, 0x6ff0678b, 0x1d330f9b, 0x3a70e77c, -0x4f507184, 0x3a5f6d51 },
+ { -0x59f253a1, 0x61368756, -0x145423a9, 0x17e02f6a, 0x4cce0f7d, 0x7f193f2d, -0x76132310, 0x20234a77 },
+ { 0x7178b252, 0x76d20db6, -0x2ae12ea0, 0x071c34f9, -0x4c1bee90, -0x09d5b5e0, 0x3cffe366, 0x7cd68235 }
+ },
+ {
+ { 0x68acf4f3, -0x599a32a0, 0x3cd7e3d3, 0x42d92d18, 0x336025d9, 0x5759389d, 0x2b2cd8ff, 0x3ef0253b },
+ { -0x2778054a, 0x0be1a45b, -0x45bfc492, 0x2a846a32, -0x1691a000, -0x266defee, 0x3bdc0943, 0x2838c886 },
+ { 0x4a465030, -0x2e944f31, 0x15c577ab, -0x05b694bf, -0x0b54be63, -0x7d305176, 0x06a82812, 0x21dcb8a6 }
+ },
+ {
+ { -0x4188ce46, -0x6572ff06, 0x629e1889, -0x7dfc9f82, 0x43f3d97f, -0x4d33fdc9, 0x6c6f678b, 0x5d840dbf },
+ { -0x73626038, 0x5c600446, -0x2bd55c35, 0x2540096e, 0x12ee2f9c, 0x125b4d4c, -0x6b5ce255, 0x0bc3d081 },
+ { 0x309fe18b, 0x706e380d, -0x461e9a39, 0x6eb02da6, 0x7dae20ab, 0x57bbba99, 0x2ac196dd, 0x3a427623 }
+ },
+ {
+ { -0x24bb8135, 0x3bf8c172, -0x39d7d243, 0x5fcfc41f, 0x75aa15fe, -0x7f530040, 0x24e1a9f9, 0x0770c9e8 },
+ { -0x758f7b06, 0x4b42432c, -0x20461abb, -0x7675e61d, -0x63a71ba3, -0x4160ffdf, -0x5e92142f, 0x1ff177ce },
+ { 0x45b5b5fd, -0x309e2666, 0x1b3a7924, -0x79f67b17, 0x303e3e89, -0x18cff6e7, 0x41500b1e, 0x39f264fd }
+ },
+ {
+ { -0x01f6841f, -0x2e64b555, -0x201fe6d7, -0x5b92031f, 0x2ca6f1ff, -0x3c36f76c, 0x2c35f14e, 0x65c62127 },
+ { -0x24181d64, -0x5852cbe9, 0x2b9c139c, -0x426bc896, -0x6ca68457, -0x5f16e472, 0x68889840, 0x1712d734 },
+ { -0x31ce6c23, -0x18d47608, -0x5eda3f45, 0x4d103356, 0x2e1cfe83, 0x0419a93d, -0x4e631d8e, 0x22f9800a }
+ },
+ {
+ { -0x65910254, 0x42029fdd, 0x34a54941, -0x46ed3142, -0x78420c85, 0x640f64b9, -0x7a67354c, 0x4171a4d3 },
+ { 0x3e9ef8cb, 0x605a368a, -0x5aafb8eb, -0x1c163fde, 0x5f24248f, 0x553d48b0, 0x647626e5, 0x13f416cd },
+ { -0x6636b374, -0x05d8a756, -0x4fff47f9, 0x23006f6f, -0x5225ac6e, -0x042d6e23, 0x574bd1ab, 0x508214fa }
+ },
+ {
+ { 0x53d003d6, 0x461a15bb, -0x430c369b, -0x4defd778, 0x6c683a5a, 0x27c57675, -0x37934bb9, 0x3a7758a4 },
+ { 0x3ed6fe4b, -0x3dfd96eb, 0x511d77c4, -0x59a598c7, 0x2c14af94, -0x3421d9ba, 0x6faba74b, 0x22f960ec },
+ { -0x6c51af8a, 0x548111f6, 0x1dfd54a6, 0x1dae21df, -0x0ceea19b, 0x12248c90, -0x72180b6c, 0x5d9fd15f }
+ },
+ {
+ { -0x1128ade2, 0x3f244d2a, 0x432e9615, -0x71c56fd8, 0x2e9c16d4, -0x1e9b4589, 0x47eb98d8, 0x3bc187fa },
+ { 0x6d63727f, 0x031408d3, -0x28384acd, 0x6a379aef, -0x33511db5, -0x561e703b, 0x4f8fbed3, 0x332f3591 },
+ { -0x15793df4, 0x6d470115, 0x6c46d125, -0x66754835, 0x3a660188, -0x2887cd4b, -0x6f9045fd, 0x450d81ce }
+ },
+},
+{
+ {
+ { -0x4d351f4b, 0x23264d66, -0x14359a8a, 0x7dbaed33, -0x0f2db538, 0x030ebed6, -0x089caaf0, 0x2a887f78 },
+ { -0x27bac6fe, -0x0751b2d6, -0x1724d2e3, 0x7018058e, -0x382d3ee2, -0x554c66a1, 0x24ccca79, 0x53b16d23 },
+ { 0x5c012d4f, 0x2a23b9e7, -0x351e0d16, 0x0c974651, 0x675d70ca, 0x2fb63273, -0x79bbfc0b, 0x0ba7250b }
+ },
+ {
+ { -0x79079264, -0x229ca76d, -0x1ec57a5c, 0x61699176, 0x4eaa7d57, 0x2e511195, -0x049f4205, 0x32c21b57 },
+ { 0x029c6421, -0x44f2e703, -0x76d670fe, -0x43d2ebdf, -0x74daf16a, -0x7cb8071a, 0x032d71c9, 0x7b9f2fe8 },
+ { 0x319e0780, -0x2787dc33, -0x76888a3b, -0x103b303f, -0x65f54c09, 0x4854fb12, 0x7238c371, 0x12c49d41 }
+ },
+ {
+ { -0x7c866abe, 0x09b3a017, -0x552a11c1, 0x626dd08f, -0x148feb61, -0x45ff4312, -0x5f5bbb37, 0x1421b246 },
+ { -0x0017c897, 0x0950b533, -0x71e2942f, 0x21861c1d, 0x1302e510, -0x0fdd27c8, 0x6391cab4, 0x2509200c },
+ { -0x73db5839, 0x4aa43a8e, -0x270fa10b, 0x04c1f540, 0x0b3eb9dc, -0x5245a1f4, 0x48a49ce3, 0x2ab55044 }
+ },
+ {
+ { 0x1c5d3afa, -0x23f8539d, -0x06207394, 0x58615171, -0x628c1d50, 0x72a079d8, -0x4b151ea3, 0x7301f4ce },
+ { 0x6f0f5dec, 0x2ed22726, 0x5ed50824, -0x67db11bf, -0x6b972beb, -0x7f841384, -0x4ade1dc1, 0x7093bae1 },
+ { -0x298dd3bf, 0x6409e759, 0x72bf729b, -0x598b1e31, 0x3c21e569, -0x43f5db15, 0x4ebacb23, 0x390167d2 }
+ },
+ {
+ { -0x5d0dedf5, -0x2844fab5, -0x4efa7649, -0x1d463152, -0x0c3f1242, 0x3fe8bac8, 0x7112cb69, 0x4cbd4076 },
+ { -0x45cac0e4, 0x27f58e3b, -0x4095bc9f, 0x4c47764d, 0x6e562650, -0x50443b1b, -0x551e5ba3, 0x07db2ee6 },
+ { 0x29c58176, 0x0b603cc0, 0x5cb15d61, 0x5988e382, -0x230f5273, 0x2bb61413, 0x74183287, 0x7b8eec6c }
+ },
+ {
+ { -0x03c7948d, 0x32fee570, -0x25c57339, -0x2574febf, -0x37697ca7, -0x68a002f6, -0x4ecd57ab, 0x6ee809a1 },
+ { 0x2cd27cb0, -0x1b35bf88, -0x04169843, -0x25063cdd, -0x752be162, -0x4d642cb6, 0x626ede4d, 0x72810497 },
+ { -0x030279c6, -0x6bbb44cf, 0x3e4e48c5, 0x2fe3690a, -0x2f7705db, -0x23d63799, -0x2e8cd6d2, 0x13bd1e38 }
+ },
+ {
+ { 0x1dfac521, 0x223fb5cf, 0x6f554450, 0x325c2531, 0x659177ac, 0x030b98d7, 0x4f88a4bd, 0x1ed018b6 },
+ { 0x696149b5, -0x2cd4b328, -0x7e275549, -0x1aa6c829, -0x51edd46c, 0x0bcb2127, -0x4ebf6650, 0x41e86fcf },
+ { -0x47fd5950, 0x3630dfa1, 0x42ad3bd5, -0x77f078b9, -0x113a5b2c, 0x0af90d6c, 0x37cdc5d9, 0x746a247a }
+ },
+ {
+ { 0x78d941ed, 0x6eccd852, -0x2dd087bd, 0x2254ae83, 0x7bbfcdb7, -0x3add2fd2, -0x400f1b1e, 0x681e3351 },
+ { 0x2b7b9af6, -0x2ace4743, 0x37fc5b51, 0x50050935, -0x3a6cab93, 0x232fcf25, 0x2bb40f49, 0x20a36514 },
+ { -0x7cfcb0bb, -0x749b4a63, 0x1fa20efb, 0x2f8b71f2, -0x459aaf1c, 0x69249495, 0x45d5472b, 0x539ef98e }
+ },
+},
+{
+ {
+ { 0x1cae743f, -0x2f8b276a, -0x11e39c13, -0x0792e70b, -0x180b12d7, -0x68423aa5, 0x663ab108, 0x4cbad279 },
+ { -0x59dfad8b, 0x6e7bb6a1, 0x413c8e83, -0x55b0de29, -0x1770a34e, 0x6f56d155, -0x59cba41f, 0x2de25d4b },
+ { -0x5f28e033, -0x7f2e6fdc, -0x04d77508, -0x3ada3df6, 0x5f3a6419, -0x4e5c68b5, -0x1dff8dcd, 0x7d7fbcef }
+ },
+ {
+ { -0x0c3d6f6c, -0x3283a23b, 0x2a9105ab, -0x387e5d66, 0x421c3058, -0x7f39e2ca, -0x23272b29, 0x4f9cd196 },
+ { 0x266b2801, -0x0510e196, -0x2a8c60ea, -0x7993973c, 0x1b03762c, -0x0975d044, -0x7848a573, 0x5975435e },
+ { 0x6a7b3768, 0x199297d8, 0x1ad17a63, -0x2f2fa7dc, 0x5c1c0c17, -0x45fd6353, 0x387a0307, 0x7ccdd084 }
+ },
+ {
+ { 0x6760cc93, -0x64f37be8, 0x1ab32a99, -0x3251ff86, 0x620bda18, -0x5772137a, -0x7e6f35bc, 0x3593ca84 },
+ { 0x6d260417, -0x2359bdd4, -0x6b7dbf43, -0x51eac2b0, -0x04973989, -0x563f3e4c, 0x61d0cf53, 0x428bd0ed },
+ { 0x5e849aa7, -0x6dece766, 0x65d8facd, -0x2b273ccb, 0x53fdbbd1, -0x73adaba5, -0x25d29c1a, 0x27398308 }
+ },
+ {
+ { 0x0a702453, -0x465ef1b4, -0x2a82e422, 0x0fa25866, -0x32d82509, -0x0046264b, 0x492c33fd, 0x572c2945 },
+ { 0x435ed413, 0x42c38d28, 0x3278ccc9, -0x42af0ca0, 0x79da03ef, -0x44f854e6, -0x4173ccab, 0x269597ae },
+ { -0x2932cf42, -0x388038bb, -0x1c455105, -0x1b20172d, -0x55a225f4, -0x5dd377d0, -0x3fa43580, 0x7f985498 }
+ },
+ {
+ { 0x0fbf6363, -0x2ca9eaae, -0x30b2045a, 0x08045a45, -0x78c05f3e, -0x113db044, -0x2964ed19, 0x30f2653c },
+ { -0x60f41ee9, 0x3849ce88, 0x7b54a288, -0x7ffa52e5, 0x23fc921c, 0x3da3c39f, 0x0a31f304, 0x76c2ec47 },
+ { -0x553ef37b, -0x75f736c8, -0x24d89435, 0x46179b60, 0x0e6fac70, -0x56df3fe2, 0x596473da, 0x2f1273f1 }
+ },
+ {
+ { 0x55a70bc0, 0x30488bd7, -0x0e2bbd19, 0x06d6b5a4, -0x43a69e9e, -0x152e5962, -0x123a087c, 0x38ac1997 },
+ { -0x751fe1ef, 0x4739fc7c, 0x4a6aab9f, -0x02ad8b70, -0x788d70d2, 0x41d98a82, -0x27a4960e, 0x5d9e572a },
+ { -0x58ae4ec5, 0x0666b517, 0x7e9b858c, 0x747d0686, 0x454dde49, -0x53533fef, -0x40161964, 0x22dfcd9c }
+ },
+ {
+ { 0x103be0a1, 0x56ec59b4, -0x2da60697, 0x2ee3baec, 0x13f5cd32, 0x797cb294, 0x24cde472, 0x0fe98778 },
+ { -0x3cf2f327, -0x72242d20, -0x5344bccd, -0x527199a1, 0x322a961f, -0x7094da74, 0x5448c1c7, 0x6b2916c0 },
+ { 0x0aba913b, 0x7edb34d1, 0x2e6dac0e, 0x4ea3cd82, 0x6578f815, 0x66083dff, 0x7ff00a17, 0x4c303f30 }
+ },
+ {
+ { 0x0dd94500, 0x29fc0358, 0x6fbbec93, -0x132d855c, -0x3d1d5808, 0x130a155f, -0x48f95e2b, 0x416b151a },
+ { 0x17b28c85, -0x2cf5c42a, 0x39773bea, -0x3a2c8849, 0x1e6a5cbf, -0x39391874, -0x74d5483c, 0x0d61b8f7 },
+ { -0x163ec950, 0x56a8d7ef, 0x58e44b20, -0x42f81a33, 0x1b57e0ab, -0x5019d026, 0x4277e8d2, 0x191a2af7 }
+ },
+},
+{
+ {
+ { 0x2fe09a14, 0x09d4b60b, -0x244e8b82, -0x3c7b0f51, 0x78b5fd6e, 0x58e2ea89, -0x4a1f64f6, 0x519ef577 },
+ { -0x5490b67b, -0x2aaff6a5, 0x4fbfaf1a, 0x04f4cd5b, 0x2a0c7540, -0x6271d12f, -0x4ddedd7a, 0x2bc24e04 },
+ { 0x1124cca9, 0x1863d7d9, -0x47758f72, 0x7ac08145, -0x7a8fce0b, 0x2bcd7309, -0x7547051b, 0x62337a6e }
+ },
+ {
+ { 0x1b3a1273, -0x2e54cdb2, -0x7efaacc0, 0x18947cf1, -0x5673e692, 0x3b5d9567, -0x7fd1e198, 0x7fa00425 },
+ { 0x06ffca16, 0x4bcef17f, 0x692ae16a, -0x21f91e25, 0x614f42b0, 0x0753702d, 0x5b9212d0, 0x5f6041b4 },
+ { 0x028c2705, 0x7d531574, -0x24f28a02, -0x7fce8297, -0x10737223, 0x30fface8, -0x493c1668, 0x7e9de97b }
+ },
+ {
+ { -0x5db2bf23, -0x0ffb419e, 0x0452d41f, -0x45f9a66f, 0x62a44234, -0x7e3ba11f, -0x5ddd9911, 0x4cb829d8 },
+ { -0x619a7a5d, 0x1558967b, -0x6716746e, -0x68366320, 0x6eb3adad, 0x10af149b, -0x0b2c7306, 0x42181fe8 },
+ { 0x07b86681, 0x1dbcaa84, -0x74d98ac5, 0x081f001e, -0x7bfb717f, 0x3cd7ce6a, 0x3f25f22c, 0x78af1163 }
+ },
+ {
+ { 0x7d65318c, 0x3241c00e, -0x2f179219, -0x19411a24, -0x043f73da, 0x118b2dc2, -0x039fc23d, 0x680d04a7 },
+ { 0x0b50babc, -0x7be9142c, 0x28208bee, 0x15087226, -0x463e3c93, -0x5ceb7051, -0x2cd282a3, 0x0d07daac },
+ { 0x695aa3eb, -0x063dbeb6, 0x05a68f21, -0x255bd3b4, 0x7f93963e, 0x7c6c2398, 0x0c3954e3, 0x210e8cd3 }
+ },
+ {
+ { 0x37fe6c26, 0x2b50f161, 0x56e404d8, -0x1efd4328, 0x4c561f6b, 0x12b0f141, -0x2fd7136f, 0x51b17bc8 },
+ { 0x10a71c06, -0x53bdfe0e, -0x0c404fdf, 0x6a65e0ae, 0x393632f7, -0x43bd3ca4, -0x79a0f8be, 0x56ea8db1 },
+ { -0x30acaee7, -0x000a04b5, -0x20eef760, -0x0b676287, -0x65c45cdb, -0x4203159b, 0x74d1a6f2, 0x18a11f11 }
+ },
+ {
+ { -0x2d85a0d4, -0x0429c326, -0x755ef929, -0x0ff03b44, -0x719b5bd0, 0x53fb5c1a, 0x0c1a2e85, 0x04eaabe5 },
+ { 0x3f6bba29, 0x407375ab, -0x66e1b7d2, -0x613c4928, -0x1aa06d17, -0x6637f17e, -0x04f3f51f, 0x307c13b6 },
+ { -0x34754a19, 0x24751021, 0x5c5010eb, -0x03dcbbb7, 0x4e5610a1, 0x5f1e717b, -0x3d8ef32b, 0x44da5f18 }
+ },
+ {
+ { -0x76271534, -0x6ea90195, -0x1dced95f, -0x19486baf, 0x3944eb4e, -0x428b9c27, 0x767203ae, 0x726373f6 },
+ { -0x0e47d14b, 0x033cc55f, 0x411cae52, -0x4ea51c93, -0x7004532d, -0x45bf49e7, 0x532e861f, 0x768edce1 },
+ { -0x14810976, -0x1cfa358e, 0x70eadb23, 0x662cf31f, -0x4b3ba498, 0x18f026fd, -0x4a2d1343, 0x513b5384 }
+ },
+ {
+ { -0x750cb315, 0x5e270287, -0x46b92952, -0x6ff4fbf7, -0x25427aee, 0x6512ebf7, -0x77da707f, 0x61d9b769 },
+ { -0x38d66762, 0x46d46280, 0x5368a5dd, 0x4b93fbd0, -0x2e89a577, 0x63df3f81, -0x465f5ddd, 0x34cebd64 },
+ { 0x49b7d94b, -0x593a58ed, 0x23eb9446, -0x5c0c2ea8, 0x77484834, 0x0416fbd2, 0x2c70812f, 0x69d45e6f }
+ },
+},
+{
+ {
+ { 0x4f460efb, -0x6019d4bd, -0x59c9f82a, -0x212cfc2c, -0x485f25dc, -0x0faddef2, 0x00545b93, 0x237e7dbe },
+ { -0x3ac3ebcf, -0x31e908b5, 0x2072edde, 0x2b9725ce, -0x4a4dc119, -0x47463c91, 0x0b5cc908, 0x7e2e0e45 },
+ { 0x6701b430, 0x013575ed, -0x60f402f0, 0x231094e6, -0x7c1b80de, 0x75320f15, -0x4eeeaa1d, 0x71afa699 }
+ },
+ {
+ { 0x473b50d6, -0x15bdc3e4, 0x3b38ef10, 0x51e87a1f, -0x4d36416b, -0x647b40a1, 0x78f89a1c, 0x00731fbc },
+ { 0x3953b61d, 0x65ce6f9b, -0x505ebe1a, -0x39a7c616, -0x5608a602, 0x0f435ffd, -0x3d4e3d72, 0x021142e9 },
+ { 0x48f81880, -0x1bcf38e8, 0x5ecec119, -0x4069f3de, 0x6bba15e3, -0x49251f7d, 0x47e15808, 0x4c4d6f33 }
+ },
+ {
+ { -0x6770e690, 0x2f0cddfc, -0x4f460ae5, 0x6b916227, 0x779176be, 0x6ec7b6c4, -0x57706058, 0x38bf9500 },
+ { -0x3e82e037, 0x18f7eccf, 0x51403c14, 0x6c75f5a6, -0x0811f321, -0x24218ed5, -0x581b85de, 0x193fddaa },
+ { 0x37e8876f, 0x1fd2c93c, 0x18d1462c, -0x5d09e1a6, 0x39241276, 0x5080f582, -0x40f2b697, 0x6a6fb99e }
+ },
+ {
+ { -0x491bdc3a, -0x114edd4b, -0x0d790072, -0x6c628ff0, 0x1dcf5d8c, -0x6f56d57d, 0x42c5eb10, 0x136fda9f },
+ { 0x560855eb, 0x6a46c1bb, -0x076c0f63, 0x2416bb38, -0x708e533f, -0x28e2eec9, -0x5ce76916, 0x75f76914 },
+ { -0x5cfa422f, -0x06b3204f, -0x6007d3f8, 0x0f364b9d, -0x3c44a776, 0x2a87d8a5, 0x0be8dcba, 0x02218351 }
+ },
+ {
+ { 0x43307a7f, -0x62a58eff, -0x3b825ba1, -0x4f9c2162, -0x416d852d, 0x22bbfe52, -0x02bfbd94, 0x1387c441 },
+ { 0x5ead2d14, 0x4af76638, -0x3583a7d0, -0x5f712780, 0x10211e3d, 0x0d13a6e6, 0x7b806c03, 0x6a071ce1 },
+ { -0x78687508, -0x4a2c3c2f, 0x7f0e4413, 0x722b5a3d, -0x44b88360, 0x0d7b4848, -0x50e1236e, 0x3171b26a }
+ },
+ {
+ { -0x4d75b82f, -0x59f24828, 0x1770a4f1, -0x5940eb2a, 0x53ddbd58, -0x2b5e076d, 0x344243e9, 0x6c514a63 },
+ { -0x68a9b358, -0x56d0ce70, 0x2275e119, -0x008447b4, -0x5b78aeb0, 0x4f55fe37, 0x3cf0835a, 0x221fd487 },
+ { 0x3a156341, 0x2322204f, -0x45f5fcd3, -0x048c1f17, 0x410f030e, -0x031f22b4, -0x046db556, 0x48daa596 }
+ },
+ {
+ { -0x37b3686d, 0x14f61d5d, -0x10be7dfa, -0x66be061d, 0x346277ac, -0x320a4771, 0x0e8a79a9, 0x58c837fa },
+ { 0x5ca59cc7, 0x6eca8e66, 0x2e38aca0, -0x57b8dab5, -0x2de1e832, 0x31afc708, -0x3527b509, 0x676dd6fc },
+ { -0x69036fa8, 0x0cf96885, 0x7b56a01b, 0x1ddcbbf3, 0x4935d66a, -0x233d1883, -0x395a80f6, 0x1c4f73f2 }
+ },
+ {
+ { -0x0383cb7c, -0x4c918f92, -0x3c3e309f, 0x73dfc9b4, 0x781cc7e5, -0x14e28637, 0x7daf675c, 0x70459adb },
+ { 0x305fa0bb, 0x0e7a4fbd, 0x54c663ad, -0x7d62b320, 0x2fe33848, -0x0bde3c7d, 0x1bf64c42, 0x795ac80d },
+ { -0x6e4bd44d, 0x1b91db49, 0x4b02dcca, 0x57269623, 0x1f8c78dc, -0x6020611b, -0x731de02d, 0x5fe16284 }
+ },
+},
+{
+ {
+ { -0x6aeeac77, 0x315c29c7, -0x79d08b32, -0x281f1af9, -0x7a6d8bce, 0x0c4a7621, 0x4a25a1e4, 0x72de6c98 },
+ { 0x4d077c41, -0x1d86f552, -0x248b965d, -0x746c7d90, -0x7542e95e, 0x6eb632dc, -0x55f9b48e, 0x720814ec },
+ { -0x40955cf0, -0x51654aad, -0x7f9291e5, 0x050a50a9, -0x5200aec7, -0x6d448bfd, 0x45be618b, 0x0394d276 }
+ },
+ {
+ { -0x4dcaba5c, -0x0ac69bdb, -0x67044d6a, 0x15a7a27e, 0x636fdd86, -0x5493ad44, 0x419334ee, 0x79d995a8 },
+ { -0x7a81120c, 0x4d572251, -0x1e616c3b, -0x1c8db123, 0x0b797035, -0x758ebdf2, -0x785418bd, 0x3b3c8336 },
+ { 0x1195dd75, -0x3275715a, 0x1dd9a82f, -0x5afb2758, -0x5ca7864a, 0x540dca81, 0x79c86a8a, 0x60dd16a3 }
+ },
+ {
+ { 0x153e47b8, 0x3501d6f8, 0x14a2f60c, -0x485698ac, 0x455d9523, 0x112ee8b6, -0x7eed1576, 0x4e62a3c1 },
+ { 0x7381e559, 0x35a2c848, -0x287f7d35, 0x596ffea6, -0x245849ad, -0x34688e15, -0x64b2597b, 0x5a08b501 },
+ { 0x516ab786, -0x372b53fc, 0x5295b23d, 0x595af321, -0x24fdcf3f, -0x29122dcc, -0x7da4be34, 0x0929efe8 }
+ },
+ {
+ { -0x52a99ae3, -0x74ce8d49, 0x3fabd717, 0x01581b7a, 0x424df6e4, 0x2dc94df6, 0x2c29284f, 0x30376e5d },
+ { -0x342f0d2d, 0x5f0601d1, 0x6132bb7f, 0x736e412f, 0x238dde87, -0x7c9fbbce, -0x0a3f8ac4, 0x1e3a5272 },
+ { -0x7ea65a64, -0x2d6e7259, 0x3f0713f3, 0x6bdc1cd9, 0x4acd6590, 0x565f7a93, 0x4cb4c128, 0x53daacec }
+ },
+ {
+ { -0x7ad30250, -0x667ad43d, 0x59d6ed0b, 0x2cc12e95, -0x64a53d85, 0x70f9e2bf, 0x7959ae99, 0x4f3b8c11 },
+ { -0x6337582a, 0x4ca73bd7, 0x47e9a9b2, 0x4d4a738f, 0x42f5fe00, -0x0b340ed7, -0x4240f8ae, 0x01a13ff9 },
+ { 0x2ff26412, 0x55b6c9c8, 0x1fb667a8, 0x1ac4a8c9, -0x1488740e, -0x2ad84031, 0x7012a3be, 0x303337da }
+ },
+ {
+ { -0x052d022f, -0x6892c335, 0x37a640a8, -0x34777c69, 0x6734cb25, 0x2ff00c1d, 0x789c2d2b, 0x269ff4dc },
+ { -0x73e36284, -0x6aabddde, 0x1a9b340f, 0x01fac137, -0x6da4b729, 0x7e8d9177, 0x61b3e31b, 0x53f8ad56 },
+ { -0x3f729873, 0x0c003fbd, 0x7ead2b17, 0x4d982fa3, -0x4d1a7d0f, -0x3f819433, -0x20bed5bc, 0x296c7291 }
+ },
+ {
+ { -0x25474a62, -0x204dcdfb, -0x37f6ddb0, 0x465aeaa0, -0x658da2e8, -0x2ecc3ee8, 0x61f117d1, 0x23273702 },
+ { 0x33daf397, 0x7903de2b, -0x3659db4d, -0x2f00f9e7, 0x555b3e18, -0x75e2dad5, 0x52e0b7c0, 0x2b6d581c },
+ { 0x623e7986, 0x3d0543d3, -0x3d875cac, 0x679414c2, 0x726196f6, -0x51bc0f34, -0x7dba1546, 0x7836c41f }
+ },
+ {
+ { -0x7fee6c84, -0x359ae17c, 0x6ef41a28, -0x394f3b92, 0x5f3f8d52, -0x48fde459, -0x15284603, 0x119dff99 },
+ { 0x49e95a81, -0x185dab25, 0x08b0ad73, 0x5192d5d0, -0x2ff503f9, 0x4d20e5b1, 0x2cf25f38, 0x5d55f801 },
+ { -0x0b4ce2b3, 0x43eadfcb, 0x11148892, -0x39afc08c, 0x060d3b17, -0x0111973b, -0x22b5f538, 0x329293b3 }
+ },
+},
+{
+ {
+ { 0x5d7cb208, 0x2879852d, 0x687df2e7, -0x47212290, 0x21687891, -0x23f40055, 0x677daa35, 0x2b44c043 },
+ { -0x1e6b69e6, 0x4e59214f, 0x0d71cd4f, 0x49be7dc7, 0x3b50f22d, -0x6cff302e, -0x036e8dce, 0x4789d446 },
+ { 0x074eb78e, 0x1a1c87ab, -0x66250b99, -0x05392e72, 0x484f9067, 0x3eacbbcd, 0x2bb9a4e4, 0x60c52eef }
+ },
+ {
+ { 0x7cae6d11, 0x702bc5c2, 0x54a48cab, 0x44c7699b, -0x45b6d14e, -0x1043bfaa, -0x26499893, 0x70d77248 },
+ { 0x3bfd8bf1, 0x0b5d89bc, -0x360caae6, -0x4f946dc9, -0x2acfd70b, 0x0e4c16b0, 0x2ccfcaab, 0x10bc9c31 },
+ { 0x3ec2a05b, -0x557517b5, -0x12e87e20, -0x6796610c, 0x708e85d1, 0x794513e4, -0x56890bed, 0x63755bd3 }
+ },
+ {
+ { -0x680e5349, 0x3dc71018, -0x3e9a4428, 0x5dda7d5e, 0x0fa1020f, 0x508e5b9c, 0x37c52a56, 0x27637517 },
+ { 0x2ad10853, -0x4aa05fc2, -0x6119ca97, 0x356f7590, -0x41964770, -0x60060e03, -0x743e907c, 0x0d8cc1c4 },
+ { 0x6eb419a9, 0x029402d3, 0x77b460a5, -0x0f4bb182, -0x2bc3b6aa, -0x30579dd0, 0x7ad166e7, 0x70c2dd8a }
+ },
+ {
+ { -0x471281ed, -0x6e2b6983, -0x28897e86, 0x74252f0a, 0x0d852564, -0x1bf67d20, 0x16a53ce5, 0x32b86138 },
+ { -0x609013f2, 0x65619450, 0x46c6518d, -0x11d18157, 0x67e09b5c, -0x68cc3e0d, 0x63948495, 0x2e0fac63 },
+ { -0x1bb7329c, 0x79e7f7be, 0x087886d0, 0x6ac83a67, -0x5f1b24d2, -0x07602b27, 0x735a4f41, 0x4179215c }
+ },
+ {
+ { 0x286bcd34, -0x1b51cc47, 0x559dd6dc, -0x4810814a, -0x4c2c71e1, 0x278b141f, 0x2241c286, 0x31fa8566 },
+ { -0x282312d6, -0x738f6b19, 0x47d39c70, -0x6804753d, -0x56f926fe, -0x1ec41fcd, 0x0cd99d76, 0x700344a3 },
+ { 0x2e3622f4, -0x507d93be, -0x67ccafd3, -0x3edfd679, 0x2b389123, -0x643e481f, -0x566adb77, 0x24bb2312 }
+ },
+ {
+ { -0x0a07a395, 0x41f80c2a, 0x04fa6794, 0x687284c3, -0x5c45e453, -0x76ba2067, -0x0014a2ea, 0x0d1d2af9 },
+ { 0x32de67c3, -0x4e5712e9, 0x461b4948, 0x3cb49418, 0x76cfbcd2, -0x7142bcbd, 0x1e188008, 0x0fee3e87 },
+ { 0x32621edf, -0x5625755f, 0x59226579, 0x30b822a1, -0x58653e6d, 0x4004197b, 0x18531d76, 0x16acd797 }
+ },
+ {
+ { 0x7887b6ad, -0x36a6393b, 0x5f90feba, -0x6b1e6153, -0x5cbd0afc, 0x16e24e62, 0x18161700, 0x164ed34b },
+ { 0x2d9b1d3d, 0x72df72af, -0x5bcddba6, 0x63462a36, 0x16b39637, 0x3ecea079, -0x46cfdcf7, 0x123e0ef6 },
+ { 0x192fe69a, 0x487ed94c, 0x3a911513, 0x61ae2cea, -0x465b21d9, -0x7884092d, 0x1073f3eb, 0x78da0fc6 }
+ },
+ {
+ { 0x680c3a94, -0x5d607f0f, 0x1ae9e7e6, 0x71f77e15, 0x48017973, 0x1100f158, 0x16b38ddd, 0x054aa4b3 },
+ { -0x1ad43996, 0x5bf15d28, 0x70f01a8e, 0x2c47e318, 0x06c28bdd, 0x2419afbc, 0x256b173a, 0x2d25deeb },
+ { 0x19267cb8, -0x2037b973, 0x66e54daf, 0x0b28789c, 0x666eec17, 0x2aeb1d2a, -0x548258a0, 0x134610a6 }
+ },
+},
+{
+ {
+ { -0x23fd73c4, -0x26ebcf20, 0x5217c771, 0x0eb955a8, 0x2c99a1fa, 0x4b09e1ed, -0x42958bc4, 0x42881af2 },
+ { 0x7c59b23f, -0x350aa13e, 0x154d04f2, -0x665112c2, -0x1ebebe0c, 0x68441d72, 0x3932a0a2, 0x14034513 },
+ { -0x54a352c3, 0x7bfec69a, 0x4cb2cfad, -0x3dc1732d, -0x04c8295e, 0x685dd14b, 0x15677a18, 0x0ad6d644 }
+ },
+ {
+ { 0x47927e9f, 0x79148928, 0x370aa877, 0x33dad6ef, 0x11122703, 0x1f8f24fa, 0x2adf9592, 0x5265ac2f },
+ { 0x417becb5, 0x781a439e, -0x2ef1fd9a, 0x4ac5938c, 0x0692ac24, 0x5da38511, -0x521cedcd, 0x11b065a2 },
+ { -0x65034cba, 0x405fdd30, 0x28e63f54, -0x268dc2bc, 0x5f65aaae, -0x6b3fe210, -0x1eb3f7f7, 0x43e4dc3a }
+ },
+ {
+ { -0x523d395d, -0x1590853d, -0x168e836c, -0x2f16d70a, -0x29ba150b, -0x1d2c8616, -0x3ae00442, 0x46dd8785 },
+ { -0x56c75ae9, -0x43ed380f, 0x3180b2e1, 0x473028ab, -0x0432dab6, 0x3f78571e, 0x6ff6f90f, 0x74e53442 },
+ { 0x375c8898, 0x709801be, -0x1c027cb8, 0x4b06dab5, 0x27230714, 0x75880ced, -0x22d0b3be, 0x2b09468f }
+ },
+ {
+ { -0x7d005fd6, 0x5b979465, -0x01570ab7, -0x25f695af, 0x5f77af9b, -0x5f9caec9, 0x201d1e76, 0x1bcfde61 },
+ { -0x48fe346a, -0x6838b612, -0x495c963d, -0x7c0bc72c, -0x65bfd327, 0x62962b8b, -0x67772085, 0x6976c750 },
+ { 0x246a59a2, 0x4a4a5490, -0x17802270, -0x29c14222, 0x0d2371fa, -0x26bc8399, -0x2cf0712a, 0x69e87308 }
+ },
+ {
+ { -0x7437fcfd, 0x0f80bf02, 0x7a18cefb, 0x6aae16b3, -0x28d3295d, -0x22b815b9, -0x0b12c656, 0x61943588 },
+ { 0x5656beb0, 0x435a8bb1, 0x4f4d5bca, -0x07053646, 0x1548c075, -0x464d873c, -0x176d49de, 0x3eb0ef76 },
+ { -0x6efc607b, -0x2d91a3c2, -0x090cc557, -0x3f161883, 0x70066a93, -0x176973ab, 0x1faaaddd, 0x3c34d188 }
+ },
+ {
+ { 0x2fffe0d9, -0x42a4f471, 0x3ed24fb9, 0x6aa25410, -0x4d97de3c, 0x2ac7d7bc, 0x60dca36a, 0x605b394b },
+ { -0x5f606140, 0x3f9d2b5e, -0x49dc5770, 0x1dab3b6f, 0x72d926c4, -0x5f645c16, 0x3fd8b36d, 0x37419351 },
+ { 0x5a9d1ed2, -0x4b17a91c, 0x6c97a9a2, -0x1017b78a, 0x1e5eee7d, -0x4efb309c, -0x7758e371, 0x2f50b81c }
+ },
+ {
+ { -0x5825add6, 0x2b552ca0, 0x449b0250, 0x3230b336, -0x5b466047, -0x0d3b3a44, 0x58074a22, 0x7b2c6749 },
+ { -0x0397ee45, 0x31723c61, 0x6211800f, -0x634bafb8, 0x47995753, 0x768933d3, 0x02752fcd, 0x3491a535 },
+ { 0x3ed28cdf, -0x2aae9a78, -0x2c9d21c7, 0x12d84fd2, -0x1cc871b1, 0x0a874ad3, 0x7c763e74, 0x000d2b1f }
+ },
+ {
+ { 0x3e94a8ab, -0x69db8874, -0x16587414, 0x0ad6f3ce, 0x0d743c4f, -0x6b75387f, -0x55130334, 0x76627935 },
+ { -0x2f92b599, 0x3d420811, -0x6f1f001d, -0x4103fb7b, -0x42b78422, -0x078f3949, 0x319afa28, 0x6e2a7316 },
+ { -0x292a6561, 0x56a8ac24, 0x3096f006, -0x37248ac2, -0x70b3ad67, 0x477f41e6, -0x09379eec, 0x588d851c }
+ },
+},
+{
+ {
+ { 0x77d1f515, -0x32d59a19, -0x70559f0f, 0x54899187, -0x2543f91b, -0x4e48c444, -0x56833605, 0x654878cb },
+ { -0x72094f02, 0x51138ec7, -0x1a8a0ae5, 0x5397da89, 0x717af1b9, 0x09207a1d, 0x2b20d650, 0x2102fdba },
+ { 0x055ce6a1, -0x69611bfb, 0x1251ad29, 0x36bca768, -0x55825beb, 0x3a1af517, 0x29ecb2ba, 0x0ad725db }
+ },
+ {
+ { -0x64fa907b, -0x013843f4, -0x180a0029, 0x537d5268, 0x4312aefa, 0x77afc662, 0x02399fd9, 0x4f675f53 },
+ { -0x7cb1dba9, -0x23bd984f, 0x70ce1bc5, -0x498abb4b, -0x082ea129, 0x1af07a0b, 0x71a03650, 0x4aefcffb },
+ { 0x0415171e, -0x3cd2c9ca, -0x7667b7c5, -0x32d410ef, -0x2f6baef0, -0x78f59153, -0x5d579a9f, 0x0bccbb72 }
+ },
+ {
+ { 0x50fe1296, 0x186d5e4c, -0x01176082, -0x1fc6847e, 0x507031b0, 0x3bc7f6c5, 0x108f37c2, 0x6678fd69 },
+ { -0x154e5638, 0x185e962f, 0x65147dcd, -0x791819cb, -0x44a4920e, -0x4f6d1fcf, 0x59d6b73e, 0x4024f0ab },
+ { 0x636863c2, 0x1586fa31, 0x572d33f2, 0x07f68c48, 0x789eaefc, 0x4f73cc9f, -0x7152b8ff, 0x2d42e210 }
+ },
+ {
+ { 0x0f537593, 0x21717b0d, 0x131e064c, -0x6eb196f5, 0x752ae09f, 0x1bb687ae, -0x64bdc392, 0x420bf3a7 },
+ { -0x6b202d65, -0x680aeceb, 0x313f4c6a, 0x6155985d, 0x08455010, -0x145ec0f9, -0x472d2cde, 0x676b2608 },
+ { 0x1c5b2b47, -0x7ec7459b, 0x311b1b80, -0x798e4914, -0x43ceca50, 0x7bff0cb1, -0x63f30e20, 0x745d2ffa }
+ },
+ {
+ { 0x21d34e6a, 0x6036df57, -0x66844c30, -0x4e2477d9, -0x378a9506, -0x2c3df63d, 0x4c1dc839, 0x06e15be5 },
+ { 0x2bc9c8bd, -0x40ada5e2, 0x26479d81, -0x15a4d9f8, -0x20feaa25, -0x2aee38f2, -0x69f30a30, 0x1ae23ceb },
+ { 0x1932994a, 0x5b725d87, -0x314e2550, 0x32351cb5, -0x254835fb, 0x7dc41549, 0x278ec1f7, 0x58ded861 }
+ },
+ {
+ { -0x493d3658, 0x2dfb5ba8, -0x0ad3a674, 0x48eeef8e, -0x0ed2ea8d, 0x33809107, 0x531d5bd8, 0x08ba696b },
+ { -0x0d993aa4, -0x27e8c86d, -0x33bab1b7, -0x3736893b, -0x43d93c58, 0x5ce382f8, 0x5485f6f9, 0x2ff39de8 },
+ { -0x3c103a86, 0x77ed3eee, -0x2b00b7ef, 0x04e05517, -0x0e598e35, -0x15c285c1, -0x6b8301ac, 0x120633b4 }
+ },
+ {
+ { 0x4912100a, -0x7d42ceb9, 0x7e6fbe06, -0x21dc8493, 0x11ea79c6, -0x1ee189e7, -0x34c6c422, 0x07433be3 },
+ { -0x6e9effbe, 0x0b949878, -0x13140518, 0x4ee7b13c, -0x6b0f5b40, 0x70be7395, -0x4b2a6e7b, 0x35d30a99 },
+ { 0x5ce997f4, -0x0086bb40, -0x4fa3ae5d, 0x575d3de4, 0x5a76847c, 0x583381fd, 0x7af6da9f, 0x2d873ede }
+ },
+ {
+ { 0x4e5df981, -0x559dfd1f, 0x5015e1f5, -0x5df2a6e9, -0x451de294, 0x18a275d3, 0x01600253, 0x0543618a },
+ { 0x43373409, 0x157a3164, -0x0b557e27, -0x05474812, -0x0a59b7fa, -0x4f6c011a, 0x707fa7b6, 0x2e773654 },
+ { -0x68b3dc3f, 0x0deabdf4, -0x6231b96d, -0x5590f5db, -0x5d6545d4, 0x04202cb8, 0x2d07960d, 0x4b144336 }
+ },
+},
+{
+ {
+ { 0x57c5715e, 0x299b1c3f, 0x6b686d90, -0x69346d62, 0x47235ab3, 0x30048064, -0x5bb2601f, 0x2c435c24 },
+ { 0x53242cec, 0x47b837f7, -0x3fbded0e, 0x256dc48c, -0x1e26d73b, -0x1ddd0405, -0x5275d3f9, 0x48ea295b },
+ { -0x7f077cc1, 0x0607c97c, -0x35da13a5, 0x0e851578, 0x161ebb6f, 0x54f7450b, -0x5f2107f2, 0x7bcb4792 }
+ },
+ {
+ { 0x045224c2, 0x1cecd0a0, 0x69e53952, 0x757f1b1b, 0x5289f681, 0x775b7a92, 0x16736148, 0x1b6cc620 },
+ { 0x2bc73659, -0x7b781c30, 0x059979df, 0x4baf8445, -0x23529041, -0x2e8368a6, -0x2103694a, 0x57369f0b },
+ { 0x75638698, -0x0e5666ff, -0x11559f2d, 0x353dd1be, 0x4c9ba488, -0x7b6b8ecd, 0x43ade311, 0x63fa6e68 }
+ },
+ {
+ { -0x2db4a149, 0x2195becd, -0x3f32bb07, 0x5e41f18c, 0x41ca9ede, -0x20d7f8bc, -0x0ca48299, 0x07073b98 },
+ { 0x6597c168, -0x2ea3dfad, -0x672d7877, -0x608c8c00, 0x3257ba1f, 0x18aee7f1, 0x07346f14, 0x3418bfda },
+ { 0x4ce530d4, -0x2fc39894, 0x3b5df9f4, 0x0b64c047, 0x19b3a31e, 0x065cef8b, 0x533102c9, 0x3084d661 }
+ },
+ {
+ { 0x760321fd, -0x6593178a, -0x6149c528, 0x7fe2b510, -0x7537fa6e, 0x00e7d4ae, -0x44908dc6, 0x73d86b7a },
+ { -0x407b9653, -0x1e094862, -0x1d99cecb, 0x15801004, -0x508be7e5, -0x65b67cd0, 0x049b673c, 0x3ba2504f },
+ { 0x6dba5ab6, 0x0b52b560, -0x444e1255, -0x56ecb0f1, -0x64fb59cb, 0x30a9520d, 0x7973e5db, 0x6813b8f3 }
+ },
+ {
+ { -0x0cea81d7, -0x0e6b35aa, 0x5ef528a5, 0x136d3570, -0x74fa6644, -0x22b31089, 0x24f833ed, 0x7d5472af },
+ { 0x334127c1, -0x67ab4fac, -0x7d0400db, 0x105d0478, 0x44186f4f, -0x24b60807, -0x412f4700, 0x1768e838 },
+ { -0x50cc25b9, -0x2f1078b3, -0x491cc607, 0x00d3be5d, -0x63631132, 0x3f2a8a2f, 0x2352435a, 0x5d1aeb79 }
+ },
+ {
+ { -0x49e4588b, 0x12c7bfae, -0x1d9c4003, -0x47b19de1, 0x5c840dcf, 0x0b47a5c3, -0x335079cc, 0x7e83be0b },
+ { 0x19cd63ca, -0x0a61944d, 0x21d06839, 0x670c1592, 0x2150cab6, -0x4f92a9a5, 0x104f12a3, 0x20fb199d },
+ { 0x6d99c120, 0x61943dee, 0x460b9fe0, -0x79efe0d2, -0x7117a673, 0x6bb2f151, -0x033b8a34, 0x76b76289 }
+ },
+ {
+ { 0x522ec0b3, 0x4245f1a1, 0x2a75656d, 0x558785b2, 0x48a1b3c0, 0x1d485a25, -0x2a701f61, 0x60959ecc },
+ { 0x756286fa, 0x791b4cc1, -0x28b5ea84, -0x24312ce9, -0x158d421a, 0x7e732421, 0x1131c8e9, 0x01fe1849 },
+ { -0x571285f7, 0x3ebfeb7b, -0x1afd8764, 0x49fdc2bb, 0x3c119428, 0x44ebce5d, -0x416b80b6, 0x35e1eb55 }
+ },
+ {
+ { 0x726ccc74, 0x14fd6dfa, 0x2f53b965, 0x3b084cfe, 0x52a2c8b4, -0x0cc51b0b, 0x0d40166a, 0x59aab07a },
+ { -0x3a8c722d, -0x242518ff, -0x4d90e412, -0x063909cb, 0x42f15ef4, 0x61e96a80, -0x509f5b28, 0x3aa1d11f },
+ { -0x6da153db, 0x77bcec4c, 0x60137738, 0x18487184, -0x01560baf, 0x5b374337, -0x371955ba, 0x1865e78e }
+ },
+},
+{
+ {
+ { 0x1c529ccb, -0x6983ab17, 0x64c635fb, 0x30f62692, 0x78121965, 0x2747aff4, -0x150990a4, 0x17038418 },
+ { -0x4991e086, -0x333b4839, -0x0af3d082, 0x44157e25, 0x713eaf1c, 0x3ef06dfc, 0x52da63f7, 0x582f4467 },
+ { 0x20324ce4, -0x39ce842d, -0x5bb7743c, -0x57efbd18, 0x4e5a1364, -0x4de10e75, -0x325d7237, 0x0c2a1c4b }
+ },
+ {
+ { 0x69bd6945, -0x123b7eb8, -0x41e372de, 0x0d6d907d, -0x2aa33a55, -0x39c42dee, -0x5ceb237d, 0x5a6a9b30 },
+ { 0x6f1f0447, -0x2db23830, -0x24783fa7, -0x4dd961c2, -0x044d2d71, -0x2ea4fd8e, -0x3909b789, 0x7c558bd1 },
+ { -0x2c69b9c3, -0x2f13eadc, -0x3ca5db10, 0x12bb628a, 0x1cbc5fa4, -0x5af3c587, 0x0afbafc3, 0x0404a5ca }
+ },
+ {
+ { 0x2a416fd1, 0x62bc9e1b, -0x1cafa675, -0x4a3908d8, 0x3d5d6967, 0x04343fd8, -0x18071168, 0x39527516 },
+ { 0x0aa743d6, -0x73e0bff9, 0x5b265ee8, -0x33452f35, 0x668fd2de, 0x574b046b, -0x352269cd, 0x46395bfd },
+ { 0x1a5d9a9c, 0x117fdb2d, -0x2effa3d6, -0x6388ba44, 0x54d56fea, -0x102b410f, -0x17dd2fea, 0x76579a29 }
+ },
+ {
+ { 0x52b434f2, 0x333cb513, -0x6c217f1f, -0x27cdd7b7, 0x750d35ce, -0x4aaed779, 0x2a2777c1, 0x02c514bb },
+ { 0x49c02a17, 0x45b68e7e, -0x43565c81, 0x23cd51a2, -0x13ddb3e5, 0x3ed65f11, -0x61fa424f, 0x43a384dc },
+ { -0x740e49bb, 0x684bd5da, -0x094ab4ad, -0x04742c82, -0x564f2dad, 0x313916d7, 0x61548059, 0x11609209 }
+ },
+ {
+ { 0x369b4dcd, 0x7a385616, 0x655c3563, 0x75c02ca7, -0x2b0e7fdf, 0x7dc21bf9, -0x6e191fbe, 0x2f637d74 },
+ { 0x29dacfaa, -0x4bb2e997, -0x7beca671, -0x25ad60b4, 0x453d5559, -0x16109c36, -0x3a9671f5, 0x351e125b },
+ { 0x1af67bbe, -0x2b4b64ba, -0x3754769f, -0x29fcfc86, -0x06596605, 0x71dee19f, -0x1831d566, 0x7f182d06 }
+ },
+ {
+ { -0x71de8ade, 0x09454b72, -0x2b7b4728, -0x55a7170c, 0x7f46903c, -0x2ca7dab3, 0x241c5217, 0x44acc043 },
+ { -0x54fe9714, 0x7a7c8e64, 0x15edc543, -0x34a5b5ab, 0x47cd0eda, 0x095519d3, 0x343e93b0, 0x67d4ac8c },
+ { 0x4f7a5777, 0x1c7d6bbb, -0x6e7cec1f, -0x74ca012c, -0x3694b97c, 0x4adca1c6, 0x12ad71bd, 0x556d1c83 }
+ },
+ {
+ { -0x4ee417df, -0x7e0f98aa, 0x10a3f3dd, 0x0faff823, 0x6a99465d, -0x074d2fab, -0x337380fb, 0x097abe38 },
+ { 0x0c8d3982, 0x17ef40e3, 0x15a3fa34, 0x31f7073e, 0x0773646e, 0x4f21f3cb, 0x1d824eff, 0x746c6c6d },
+ { 0x7ea52da4, 0x0c49c987, -0x6423e2bd, 0x4c436955, -0x0833142e, 0x022c3809, 0x4bee84bd, 0x577e14a3 }
+ },
+ {
+ { -0x42b228d5, -0x6b013142, 0x060f2211, -0x0b95b026, -0x3f372e01, 0x124a5977, -0x04ff6d6b, 0x705304b8 },
+ { 0x61a73b0a, -0x0f1d9754, 0x3791a5f5, -0x0d0505f0, 0x6b6d00e9, -0x3e1ec17e, 0x6fd78f42, 0x60fa7ee9 },
+ { 0x4d296ec6, -0x49c2e2cb, 0x5fad31d8, -0x0c3cfac2, -0x4b42bd14, 0x670b958c, -0x5e9cac03, 0x21398e0c }
+ },
+},
+{
+ {
+ { -0x79e48166, -0x793a03ea, 0x6a27c451, -0x095ccfb9, -0x5e16ca69, 0x01667267, 0x6082dfeb, 0x05ffb9cd },
+ { -0x72582d11, 0x216ab2ca, -0x660bd7d9, 0x366ad9dd, 0x4fdd3c75, -0x519b4700, 0x53909e62, 0x403a395b },
+ { -0x0ac09ec7, -0x59e80561, 0x13e66cb6, 0x60f2b5e5, -0x4cbb755c, -0x28574111, 0x6f5ea192, 0x7a293285 }
+ },
+ {
+ { 0x79639302, -0x4763bbb8, 0x50c67f2c, 0x4ae4f193, -0x37e5063a, -0x0f4ca258, 0x46871017, 0x39d00035 },
+ { -0x4fd21778, 0x0b39d761, -0x2dbeb1e1, 0x5f550e7e, 0x22e1a940, -0x59405ba8, -0x02bb8467, 0x050a2f7d },
+ { -0x59af2489, 0x437c3b33, -0x453ad44e, 0x6bafe81d, 0x2db7d318, -0x0166bfd3, 0x372ba6ce, 0x2b5b7eec }
+ },
+ {
+ { 0x613ac8f4, -0x596bbfb3, -0x056818d4, 0x500c3c2b, 0x1fcec210, -0x78befb2e, -0x79fb5712, 0x1b205fb3 },
+ { -0x7c0af111, -0x4c43b443, -0x736d879a, 0x508f0c99, -0x37481992, 0x43e76587, -0x5b806727, 0x0f7655a3 },
+ { -0x2db4ecc4, 0x55ecad37, 0x6038c90b, 0x441e147d, -0x29d39012, 0x656683a1, -0x781f1352, 0x0157d5dc }
+ },
+ {
+ { -0x28e14adc, -0x6ad9aaec, 0x5df14593, -0x19fc277f, 0x0d4de6b7, 0x147cdf41, 0x0437c850, 0x5293b173 },
+ { 0x0354c13d, -0x0d5850af, -0x55c8d4a0, -0x285f4ebb, 0x05a3d470, 0x2869b96a, -0x7db9fe8d, 0x6528e42d },
+ { 0x4bccf226, 0x23d0e081, -0x7e69046d, -0x6d38ba33, 0x59541e5b, -0x749e8694, -0x3fde0688, 0x40a44df0 }
+ },
+ {
+ { 0x4bc5d095, -0x793691af, -0x03597fb6, -0x0df2bf68, -0x37d915a3, 0x27363d89, 0x5719cacf, 0x39ca3656 },
+ { 0x4f20ea6a, -0x25579677, 0x4c620618, -0x15eb5c2f, 0x090bf8be, 0x6001fccb, -0x6b816310, 0x35f4e822 },
+ { 0x6f87b75c, -0x68af90d1, 0x034ae070, -0x39db5160, -0x552cb22a, 0x1ec856e3, -0x1bbf1a71, 0x055b0be0 }
+ },
+ {
+ { 0x6ea33da2, 0x4d12a04b, -0x1c9ed923, 0x57cf4c15, -0x11bb2699, -0x6f13698b, 0x2a985aac, 0x64ca348d },
+ { -0x768ca2ee, 0x6469a17d, -0x199d460f, -0x2490d82b, 0x6a395681, -0x60345cd8, -0x2d9650db, 0x363b8004 },
+ { -0x1b3b6ed3, -0x66a771e7, 0x1ca5ce6b, -0x1033c4b2, -0x05a4672b, 0x4522ea60, 0x1de4a819, 0x7064bbab }
+ },
+ {
+ { 0x42542129, -0x5d6f3f9f, -0x4172a470, -0x0d1d3d52, 0x76abfe1b, -0x30dba725, -0x7c29d941, 0x02157ade },
+ { 0x5a770641, -0x46e61eaf, 0x4e7f8039, -0x565d1d39, 0x3df23109, 0x7527250b, -0x53d84875, 0x756a7330 },
+ { 0x1b9a038b, 0x3e46972a, 0x7ee03fb4, 0x2e4ee66a, 0x6edbb4ca, -0x7e5db789, -0x7132fa9d, 0x1a944ee8 }
+ },
+ {
+ { 0x182362d6, -0x44bf57a7, -0x75b2e545, -0x4660aa89, 0x758559f6, -0x72e74bd9, 0x4d26235a, 0x26c20fe7 },
+ { 0x51039372, -0x2a56e2ef, -0x6635d922, 0x2ed377b7, -0x02c99495, -0x5e8dfd54, -0x296fe66b, 0x0730291b },
+ { -0x1633dd0b, 0x648d1d9f, 0x28dd577c, 0x66bc5619, 0x652439d1, 0x47d3ed21, -0x125074b7, 0x49d271ac }
+ },
+},
+{
+ {
+ { -0x4b48a9ff, 0x2798aaf9, 0x5c8dad72, 0x5eac7213, 0x61b7a023, -0x2d31559f, -0x167082b2, 0x1bbfb284 },
+ { 0x382b33f3, -0x760afa76, -0x52b73f4c, 0x5ae2ba0b, -0x5ac24c92, -0x706c4afd, -0x6a5dcd1a, 0x5aa3ed9d },
+ { -0x38269a9f, 0x656777e9, 0x72c78036, -0x34d4edac, -0x26af9112, 0x65053299, 0x5e8957cc, 0x4a07e14e }
+ },
+ {
+ { -0x3b885b65, 0x240b58cd, 0x6447f017, -0x02c72522, -0x58379553, 0x19928d32, -0x7b505f7f, 0x50af7aed },
+ { -0x67f20667, 0x4ee412cb, 0x3c6ec771, -0x5cea2891, -0x6da38803, -0x445a1222, 0x1d313402, 0x3f0bac39 },
+ { 0x15f65be5, 0x6e4fde01, 0x216109b2, 0x29982621, 0x0badd6d9, 0x78020581, -0x45142ffa, 0x1921a316 }
+ },
+ {
+ { -0x260c3e75, -0x28a55266, 0x60b1c19c, 0x566a0eef, 0x255c0ed9, 0x3e9a0bac, -0x5f9d380b, 0x7b049dec },
+ { -0x20478f04, -0x76bdd082, 0x4f76b3bd, 0x2c296beb, 0x36c24df7, 0x0738f1d4, -0x1d8c5150, 0x6458df41 },
+ { 0x35444483, -0x23341c86, 0x0fedbe93, 0x75887933, 0x12c5dd87, 0x786004c3, -0x3d6af19c, 0x6093dccb }
+ },
+ {
+ { 0x6084034b, 0x6bdeeebe, 0x780fb854, 0x3199c2b6, -0x49d2f96b, -0x68cc8955, -0x749b8270, 0x6e3180c9 },
+ { -0x7a1f8f93, 0x1ff39a85, -0x4c18c6cd, 0x36d0a5d8, 0x718f453b, 0x43b9f2e1, 0x4827a97c, 0x57d1ea08 },
+ { -0x5ed74f8f, -0x11854919, -0x6c577456, -0x5b3ea693, -0x4dde9ed0, -0x084b217e, -0x226842e8, 0x363e999d }
+ },
+ {
+ { -0x1db4513a, 0x2f1848dc, -0x454350a0, 0x769b7255, 0x3cefe931, -0x6f34c392, -0x39064cab, 0x231f979b },
+ { 0x35ee1fc4, -0x6957bc3f, 0x08e4c8cf, -0x68914cab, -0x4a732cd0, -0x4bd097ff, 0x693a052b, 0x48ee9b78 },
+ { -0x33d50c3a, 0x5c31de4b, -0x01df72e1, -0x4fb44fd0, -0x3eb04b9a, -0x48728ff7, 0x08792413, 0x079bfa9b }
+ },
+ {
+ { -0x5d2abdbb, -0x0c361280, 0x77f63952, 0x0aa08b78, -0x2ef7ab8b, -0x2892539d, -0x6b8f9c95, 0x1ef4fb15 },
+ { -0x25cff20c, -0x1c6fc5af, 0x3da95ab0, -0x7bc69bdd, 0x0b356480, -0x12c30ed3, -0x7b7e8e6c, 0x038c77f6 },
+ { 0x5b167bec, -0x7ab1a11a, -0x692f323e, 0x59590a42, -0x67efde67, 0x72b2df34, 0x4a0bff56, 0x575ee92a }
+ },
+ {
+ { 0x0aa4d801, 0x5d46bc45, -0x5acc4628, -0x3c50edd9, 0x2b8906c2, 0x389e3b26, 0x382f581b, 0x200a1e7e },
+ { -0x75e7d031, -0x2b3f7f70, -0x66b76243, 0x30e170c2, 0x52f733de, 0x05babd57, 0x2cd3fd00, 0x43d4e711 },
+ { -0x1506c53b, 0x518db967, 0x056652c0, 0x71bc989b, 0x567197f5, -0x01d47a27, 0x651e4e38, 0x050eca52 }
+ },
+ {
+ { 0x60e668ea, -0x6853c68a, 0x153ab497, -0x64e64402, 0x34eca79f, 0x4cb179b5, -0x5ece51a9, 0x6151c09f },
+ { 0x453f0c9c, -0x3cbce522, -0x008fc465, -0x160afba2, -0x127b84c3, -0x03268537, 0x1c58f4c6, 0x4b0ee6c2 },
+ { -0x020fa26a, 0x3af55c0d, 0x2ab4ee7a, -0x22d9d120, 0x12171709, 0x11b2bb87, -0x7ff0fcf5, 0x1fef24fa }
+ },
+},
+{
+ {
+ { -0x6fe99de0, -0x006e5996, 0x5bf1e009, -0x0ddaad52, 0x7f90df7c, 0x7dff85d8, 0x0c736fb9, 0x4f620ffe },
+ { 0x6b6c6609, -0x4b69edc6, -0x7f54a6c8, -0x58af017b, -0x483d85a1, -0x0b8e40c7, 0x77ac193c, 0x507903ce },
+ { -0x2021c1cc, 0x62f90d65, -0x4605a053, -0x30d73a6e, -0x39e9baf0, -0x66379107, 0x4a256c84, 0x25d44804 }
+ },
+ {
+ { -0x36fdd4ab, 0x2c7c4415, -0x7ed14e02, 0x56a0d241, -0x2849a1f3, -0x0fd15e37, -0x2acdc4da, 0x4180512f },
+ { -0x38164e91, -0x4297dcf2, -0x3e3a86a3, 0x0eb1b9c1, -0x6a494e01, 0x7943c8c4, 0x0bbacf5e, 0x2f9faf62 },
+ { -0x75b75a25, -0x5b00c197, -0x426abfc5, -0x4595c7fa, 0x47d5b65d, -0x60831e51, 0x5939d2fb, 0x15e087e5 }
+ },
+ {
+ { -0x0469c0c8, -0x776be792, -0x239c642b, 0x48a00e80, -0x1693e367, -0x5b17f6d5, -0x35a8c99f, 0x5a097d54 },
+ { 0x745c1496, 0x12207543, -0x25c79ef4, -0x2500c303, 0x2c71c34f, -0x1b1868d9, 0x34bdede9, 0x39c07b19 },
+ { 0x17c9e755, 0x2d45892b, -0x76cf7208, -0x2fcc028e, 0x525b8bd9, 0x6c2fe9d9, -0x3ee33f87, 0x2edbecf1 }
+ },
+ {
+ { -0x2f785da1, -0x11f0f023, 0x5c3e34ee, -0x638aceab, -0x7054c54b, 0x660c572e, 0x544cd3b2, 0x0854fc44 },
+ { -0x38ea5f2e, 0x1616a4e3, -0x07cbe2b3, 0x53623cb0, -0x38176635, -0x6910acd7, -0x5997455a, 0x3d4e8dbb },
+ { 0x55edad19, 0x61eba0c5, -0x0f57c21a, 0x24b533fe, -0x7c455a08, 0x3b770428, -0x675b8173, 0x678f82b8 }
+ },
+ {
+ { 0x57775696, 0x1e09d940, 0x3cd951db, -0x112ed9a4, 0x20bce16f, -0x056253d5, -0x172f760c, 0x0f7f76e0 },
+ { -0x296ff3ac, -0x4eb6e2f5, -0x62ecd9ca, 0x3539722c, 0x0b362bc9, 0x4db92892, -0x59749621, 0x4d7cd1fe },
+ { -0x2b7a4ff4, 0x36d9ebc5, -0x1b524c9b, -0x5da69b6e, -0x3dee6333, -0x3e9a6b80, 0x186e0d5f, 0x45306349 }
+ },
+ {
+ { 0x2b072491, -0x695beb14, 0x27a7b65b, 0x1bb22181, 0x6e8a4af0, 0x6d284959, -0x32d889a1, 0x65f3b08c },
+ { -0x593200e3, -0x6b222f3f, -0x17bdec52, 0x55f6f115, -0x66d03096, 0x6c935f85, 0x4a37f16f, 0x067ee0f5 },
+ { 0x199801f7, -0x134d6001, -0x5d5f08d1, -0x62c9e2e1, 0x75fd2f49, 0x25f11d23, 0x0fe10fe2, 0x124cefe8 }
+ },
+ {
+ { 0x31b16489, 0x1518e85b, -0x248ef405, -0x70552349, -0x5eb51dc7, 0x39b0bdf4, 0x503d20c1, 0x05f4cbea },
+ { -0x2e720dab, 0x4c126cf9, 0x147a63b6, -0x3e2b8e17, -0x0c36c4a1, 0x2c6d3c73, -0x1c00795e, 0x6be3a6a2 },
+ { -0x3fbeba44, -0x31fbf162, 0x08f6834c, -0x38e00b1e, -0x5477b85d, -0x42ab9173, -0x5b2d545b, 0x64666aa0 }
+ },
+ {
+ { 0x3337e94c, -0x4f3ac409, 0x11e14f15, 0x7cb5697e, 0x1930c750, 0x4b84abac, -0x1f9bfb98, 0x28dd4abf },
+ { 0x7c06d912, 0x6841435a, -0x44c07cf5, -0x35edc3df, -0x4e341d88, -0x2b4c84d9, -0x3890afba, 0x1d753b84 },
+ { 0x44cb9f44, 0x7dc0b64c, -0x1c6da241, 0x18a3e1ac, 0x2d0457c4, 0x7a303486, -0x75f376d2, 0x4c498bf7 }
+ },
+},
+{
+ {
+ { 0x30976b86, 0x22d2aff5, -0x3d2db9fc, -0x726f47fa, 0x4de5bae5, -0x235e7694, -0x37cbf3e9, 0x28005fe6 },
+ { 0x1aa73196, 0x37d653fb, 0x3fd76418, 0x0f949530, -0x04c5e84e, -0x52dff4f7, 0x2fc8613e, 0x544d4929 },
+ { 0x34528688, 0x6aefba9f, 0x25107da1, 0x5c1bff94, 0x66d94b36, -0x08a44433, 0x0f316dfa, 0x72e47293 }
+ },
+ {
+ { -0x2cd589d9, 0x07f3f635, 0x5f6566f0, 0x7aaa4d86, 0x28d04450, 0x3c85e797, 0x0fe06438, 0x1fee7f00 },
+ { -0x687ef7b1, 0x2695208c, 0x23450ee1, -0x4eafd5f5, 0x03efde02, -0x0262515a, 0x2733a34c, 0x5a9d2e8c },
+ { 0x03dbf7e5, 0x765305da, 0x1434cdbd, -0x5b250db7, -0x2db57714, 0x7b4ad5cd, -0x11fbfabd, 0x00f94051 }
+ },
+ {
+ { 0x07af9753, -0x28106c45, 0x3db766a7, 0x583ed0cf, 0x6e0b1ec5, -0x31966741, 0x5dd40452, 0x47b7ffd2 },
+ { -0x3c2ccf4e, -0x72ca94dd, -0x4fb8e4fa, -0x0de37465, 0x6e42b83c, -0x4c93ce94, -0x74154ef3, 0x07d79c7e },
+ { -0x43f722ee, -0x78040464, -0x1e113d65, -0x75f994c6, -0x24e03e41, 0x0d57242b, 0x5ea64bb6, 0x1c3520a3 }
+ },
+ {
+ { 0x216bc059, -0x325790c0, 0x12bcd87e, 0x1fbb231d, 0x17c70990, -0x4b6a9562, 0x66d12e55, 0x38750c3b },
+ { -0x43345cb6, -0x7f2dac5a, 0x3838219b, 0x3e61c3a1, -0x677d1c6a, -0x6f3c49ff, 0x5d0ee66f, 0x1c3d0577 },
+ { -0x6bdd1ae6, 0x692ef140, 0x2b5df671, -0x343f38c4, 0x744ce029, 0x21014fe7, -0x2ccfb784, 0x0621e2c7 }
+ },
+ {
+ { -0x4f240f0d, -0x4851e86a, -0x1e831e6a, 0x54dfafb9, -0x16555c4c, 0x25923071, -0x5effd163, 0x5d8e589c },
+ { -0x7da67c73, -0x50679f34, -0x39606524, -0x6f15b73f, 0x65581e30, 0x65264837, 0x7bd3a5bc, 0x0007d609 },
+ { 0x0842a94b, -0x3f40e26b, 0x588f2e3e, -0x4d2c3c9d, -0x44ae1d11, 0x0a961438, 0x3c1cbf86, 0x1583d778 }
+ },
+ {
+ { -0x3362d739, -0x6ffcb8fc, -0x08d33a71, 0x1d1b679e, -0x41a478da, 0x16e12b5f, -0x7c3aa7f6, 0x4958064e },
+ { 0x5da27ae1, -0x13115d11, 0x55670174, 0x597c3a14, 0x6609167a, -0x3659d5ee, -0x7e127090, 0x252a5f2e },
+ { 0x5066e80d, 0x0d289426, 0x307c8c6b, -0x033c087b, 0x0c1112fd, 0x1b53da78, -0x27bc4c78, 0x079c170b }
+ },
+ {
+ { -0x3f2a2faa, -0x322932b0, -0x44fca8c5, -0x65089793, -0x0c3c10b8, 0x3ca6723f, 0x317b8acc, 0x6768c0d7 },
+ { 0x64fa6fff, 0x0506ece4, 0x6205e523, -0x411cbce2, 0x51b8ea42, 0x35794224, 0x4ac9fb00, 0x6dec05e3 },
+ { -0x0eaa3e4d, -0x6b49da1b, -0x6684846f, 0x417bf3a7, 0x6d6b2600, -0x3dd34224, -0x2232ad0c, 0x51445e14 }
+ },
+ {
+ { 0x2bbea455, -0x76ceb855, -0x6df86ed7, -0x73ac5db1, -0x41cf0859, 0x4b49f948, 0x6e4fd43d, 0x12e99008 },
+ { 0x3b144951, 0x57502b4b, 0x444bbcb3, -0x71980095, 0x166385db, -0x474296d9, -0x1c6d6a38, 0x13186f31 },
+ { 0x7fdfbb2e, -0x0ef3694d, 0x121ceaf9, -0x60656ca2, 0x3a5b983f, -0x20eec93c, 0x5d3e99af, 0x77b2e3f0 }
+ },
+},
+{
+ {
+ { -0x33a32d65, -0x6acd0b71, -0x5c31c98f, 0x2ba851be, 0x51122941, 0x32dacaa0, 0x350004f2, 0x478d99d9 },
+ { -0x630ed9a9, -0x02f28a79, -0x1ac5f1d7, -0x17d0106c, 0x5bbb4be7, -0x33cb5810, -0x5af3c75e, 0x0b251172 },
+ { -0x6f44fd40, 0x1d5ad948, 0x0ec25115, 0x50e208b1, 0x4ef21702, -0x5d95dd77, 0x3b524805, 0x4dc92334 }
+ },
+ {
+ { -0x0c93b68b, 0x3ad3e3eb, 0x37862125, -0x28a2da5b, -0x5fda5aea, -0x178c6bc3, -0x3bee37b9, 0x6bbc7cb4 },
+ { 0x0f8086b6, -0x1c7d73c0, -0x6860f238, 0x3f77e6f7, 0x4df42cb4, 0x7ef6de30, -0x4954287c, 0x5265797c },
+ { -0x2b5af2aa, 0x3c6f9cd1, -0x39015482, -0x49dbbf89, 0x3580972e, 0x6ff9bf48, -0x4ccd5305, 0x00375883 }
+ },
+ {
+ { 0x6c75c99c, -0x3674137b, 0x00e33cf4, -0x1bbe7b40, -0x456f89cc, 0x0a676b9b, 0x71f379d7, 0x669e2cb5 },
+ { 0x28cb0940, 0x0001b2cd, 0x6f1c24c9, 0x63fb51a0, -0x232a35cf, -0x4a52796f, -0x73baf9a0, 0x67238dbd },
+ { -0x5b642cf8, -0x34ee948d, 0x2392729e, 0x025aad6b, 0x3f55d9b1, -0x4b86c106, 0x40678bb9, 0x72a10561 }
+ },
+ {
+ { -0x1d1afa4a, 0x0d8d2909, -0x3fd6edd0, -0x67358755, -0x564edcd9, 0x77ef5569, -0x7ebc64b9, 0x7c77897b },
+ { 0x1cc9249d, -0x5d497ed5, 0x21211f58, 0x62866eee, 0x5df10ece, 0x2cb5c5b8, -0x1d9c5200, 0x03a6b259 },
+ { -0x21cce34b, -0x0e3e4a1e, 0x15fca420, 0x5a9f5d8e, 0x7bd932b1, -0x605bc70f, 0x1c6146e7, 0x2a381bf0 }
+ },
+ {
+ { -0x4acbe991, -0x083f41ce, 0x19cf70d4, 0x27e6ca64, -0x56a858a7, -0x6cb20829, -0x54213d56, 0x5701461d },
+ { -0x3037ee3f, -0x53646787, 0x3756e567, -0x7482d67f, 0x7c70edfc, 0x50da4e60, -0x77bbff4a, 0x5dbca62f },
+ { 0x2c915c25, 0x2c674740, 0x0b0d340a, 0x1bdcd1a8, 0x07b43f5f, 0x5e5601bd, 0x5539a242, 0x2555b4e0 }
+ },
+ {
+ { -0x781b9c2c, 0x78409b1d, -0x32049c63, -0x52b256a6, 0x55259b9c, -0x13d788c9, -0x3cedcf55, 0x69c806e9 },
+ { 0x66ddd216, 0x6fc09f52, -0x371c8fb8, -0x231a9f59, -0x5d209d03, -0x139a6c63, -0x1ad12e6e, 0x7a869ae7 },
+ { 0x14bb3f22, 0x7b48f574, -0x51233378, 0x68c7cee4, 0x79ed80be, -0x12d06c9f, 0x5f77bc4b, 0x25d70b88 }
+ },
+ {
+ { -0x44e51b2c, -0x67ba62d7, 0x39f954ec, 0x56b9c4c7, -0x3d64b4c2, -0x7cd8bc0a, -0x67497876, 0x21ea8e27 },
+ { 0x762bf4de, 0x4151c3d9, 0x2745d82b, 0x083f435f, 0x0d23ddd5, 0x29775a2e, 0x69a5db24, 0x138e3a62 },
+ { 0x6a5a7b9c, -0x78410b4c, 0x5fc1d062, -0x2dd662e5, -0x22cde9b8, -0x7dbf67e8, -0x1a5d1fc3, 0x5c5abeb1 }
+ },
+ {
+ { 0x1306a233, 0x02cde6de, 0x116f8ec7, 0x7b5a52a2, -0x3ee9c4a5, -0x1e397e0c, 0x60d32643, 0x241d3506 },
+ { -0x48c3d225, 0x14722af4, 0x5a05060d, -0x43b8f3a1, 0x2581b02e, 0x00943eac, 0x1f499c8f, 0x0e434b3b },
+ { 0x0ebc52c7, 0x6be4404d, -0x4e586e0b, -0x51b9dcc5, -0x2da24bd5, 0x2aec170e, 0x6645d694, 0x1d8dfd96 }
+ },
+},
+{
+ {
+ { 0x12ddb0a4, -0x2a679c64, -0x3fdb7995, -0x5a2e60d0, 0x58fce460, -0x2e83d0fd, 0x2e095e8a, 0x07a19515 },
+ { -0x63d13b22, 0x296fa9c5, 0x4f84f3cb, -0x43749e41, 0x17a8f908, 0x1c7706d9, 0x7ad3255d, 0x63b795fc },
+ { 0x389e5fc8, -0x57c970fe, -0x30721bc5, -0x6fbcc4fe, -0x3abed9bd, -0x505e02a3, 0x032f0137, 0x3e8fe83d }
+ },
+ {
+ { -0x17102ec4, 0x08704c8d, 0x33e03731, -0x203ae572, 0x1260cde3, -0x5a62a25b, -0x59da737a, 0x22d60899 },
+ { 0x0570a294, 0x2f8b15b9, 0x67084549, -0x6b0dbd90, 0x61bbfd84, -0x21e3a51f, 0x7fac4007, 0x75ba3b79 },
+ { 0x70cdd196, 0x6239dbc0, 0x6c7d8a9a, 0x60fe8a8b, -0x14bfeda0, -0x4c77b844, -0x788861a2, 0x0904d07b }
+ },
+ {
+ { 0x48f940b9, -0x0bcdd29a, -0x42d2f3c7, 0x06952f0c, -0x5f7e06cf, 0x167697ad, -0x4508d594, 0x6240aace },
+ { -0x22456e64, -0x4b31e02c, -0x38b37256, -0x30ce24c2, -0x527933af, 0x2c63cc63, -0x43e221f9, 0x43e2143f },
+ { 0x5ba295a0, -0x07cb8b64, -0x35c82da6, -0x296b83a5, -0x1836ce96, 0x66f13ba7, -0x724bf354, 0x56bdaf23 }
+ },
+ {
+ { -0x3e62c44e, 0x1310d36c, 0x622386b9, 0x062a6bb7, -0x285eb0a4, 0x7c9b8591, 0x7e1e5754, 0x03aa3150 },
+ { -0x0acacc15, 0x362ab9e3, 0x6eb93d40, 0x338568d5, 0x1d5a5572, -0x61f1ebae, -0x7c8bece8, 0x1d24a86d },
+ { -0x002b31e1, -0x0b1389b8, 0x54ac8c1c, -0x1fba1510, 0x1d09357c, -0x772dda7e, -0x6514b7a7, 0x43b261dc }
+ },
+ {
+ { 0x6c951364, 0x19513d8b, 0x000bf47b, -0x6b018eda, -0x2ab06a99, 0x028d10dd, 0x42940964, 0x02b4d5e2 },
+ { -0x77448645, -0x1aa4e1e7, -0x3e85ca63, -0x5f612f83, 0x603dea33, -0x4fd3d11e, 0x5b276bc2, 0x326055cf },
+ { 0x28d18df2, -0x4b5eaa35, 0x186ce508, -0x1533b9ba, 0x6c824389, -0x3b630b6d, -0x51a2cbf0, 0x27a6c809 }
+ },
+ {
+ { -0x3bc296ac, -0x32d3d8f6, 0x6a66cab2, -0x22b5c1a9, 0x69d7036c, 0x79fa5924, 0x3d8c2599, 0x22150360 },
+ { 0x1f0db188, -0x74591433, 0x675a5be8, 0x37d3d73a, 0x15f5585a, -0x0dd1205d, -0x009f5e82, 0x2cb67174 },
+ { 0x390be1d0, 0x59eecdf9, 0x728ce3f1, -0x56bddfbc, 0x7a94f0f4, -0x7d76e39a, 0x3890f436, 0x7b1df4b7 }
+ },
+ {
+ { 0x07f8f58c, 0x5f2e2218, -0x2b6bf62c, -0x1caaa361, 0x1fb6a630, -0x4d555773, -0x2cad1fc3, 0x68698245 },
+ { -0x4c4d5ddc, -0x1b6d0d20, 0x2b551160, 0x7c6c9e06, 0x0d7f7b0e, 0x15eb8fe2, 0x58fc5992, 0x61fcef26 },
+ { 0x2a18187a, -0x244ea27b, -0x79225329, -0x0c1b552d, 0x0ff6c482, 0x44bae281, 0x3daf01cf, 0x46cf4c47 }
+ },
+ {
+ { -0x0eb67ec0, 0x213c6ea7, 0x392b4854, 0x7c1e7ef8, 0x5629ceba, 0x2488c38c, 0x0d8cc5bb, 0x1065aae5 },
+ { -0x613b1a07, 0x426525ed, 0x16903303, 0x0e5eda01, -0x341a3524, 0x72b1a7f2, 0x14eb5f40, 0x29387bcd },
+ { -0x20dff2a9, 0x1c2c4525, -0x403598b6, 0x5c3b2dd6, -0x1e7cbfd0, 0x0a07e7b1, 0x4f1ce716, 0x69a198e6 }
+ },
+},
+{
+ {
+ { -0x61d2b8cc, 0x7b26e56b, -0x7e39e98b, -0x3b38ecd5, -0x13632181, -0x10a36adb, -0x18e8bc53, 0x39c80b16 },
+ { -0x10562969, 0x7afcd613, 0x1c067959, 0x0cc45aa4, -0x3e05256a, -0x5a901efc, 0x72e40365, 0x3a73b704 },
+ { 0x1b826c68, 0x0f196e0d, 0x4960e3db, -0x08e00f1e, 0x23b7436c, 0x61131670, 0x77da7282, 0x0cf0ea58 }
+ },
+ {
+ { 0x3ba6945a, -0x1ccd312c, -0x177e3fa3, -0x21f4ec9f, 0x5e67ed3b, 0x1ad40f09, -0x4739c2a3, 0x5da8acda },
+ { -0x222b3343, 0x196c80a4, -0x6a0d2263, 0x22e6f55d, 0x40d6c71b, -0x38a1cc39, -0x34c3fbd1, 0x7bb51279 },
+ { 0x3a70159f, -0x3b4999b6, 0x0a904e14, 0x76194f0f, -0x5bf693ed, -0x5a9eb3c7, -0x68601313, 0x6cd0ff50 }
+ },
+ {
+ { -0x4fb45e72, 0x7fecfabd, 0x3bddbcf7, -0x2f038404, 0x057a131c, -0x5be2b792, -0x0dddc59f, 0x641a4391 },
+ { -0x70bbd754, -0x3f1f9819, -0x59eeca1d, 0x14835ab0, 0x38062935, -0x0de2eb0d, -0x20fb7b64, 0x6390a4c8 },
+ { -0x59f95725, -0x3a3946a6, -0x4f97da0f, -0x6eb48062, 0x44fc9eff, 0x2a731f6b, 0x62705cfc, 0x30ddf385 }
+ },
+ {
+ { 0x68bcd52c, 0x33bef2bd, 0x69482ef2, -0x39b62450, 0x41cb1aee, -0x4a4911f4, 0x0212a7e5, 0x5c294d27 },
+ { -0x2e400807, 0x4e3dcbda, 0x20645717, -0x36ee717e, 0x0f189d56, -0x45333144, -0x2bb98998, 0x1b4822e9 },
+ { 0x25563781, -0x54c9f581, 0x480f7958, 0x2512228a, 0x6114b4e3, -0x38a2fad9, -0x268901d6, 0x222d9625 }
+ },
+ {
+ { 0x0a344f85, 0x0f94be7e, -0x780dd3c8, -0x14d05574, 0x4ee16f0f, -0x631e18a2, 0x18a08dea, 0x43e64e54 },
+ { -0x4c8d531f, 0x1c717f85, 0x4638bf18, -0x7e6cf197, 0x6bc08b58, 0x239cad05, -0x7807000c, 0x0b34271c },
+ { 0x1a35ce63, -0x7eaa1dae, -0x06edfd72, -0x41eff2b3, -0x5a822314, -0x4007f408, 0x6d6bc6e4, 0x57342dc9 }
+ },
+ {
+ { 0x1e707bf6, -0x0c3c4349, 0x7291a762, 0x351d9b8c, -0x252965cd, 0x00502e6e, 0x1ec8807f, 0x522f521f },
+ { -0x3731a668, -0x10110f9b, -0x4a34155e, -0x40fd6af0, 0x20b7c458, -0x739b5efa, 0x31c24855, 0x35134fb2 },
+ { -0x065c6fd5, 0x272c1f46, -0x669a8434, -0x36e45c49, 0x4f8a1c0e, -0x519eb4d0, 0x0b99017b, 0x7afcaad7 }
+ },
+ {
+ { -0x107bd495, -0x577ebe14, -0x6854193b, 0x55e7b147, 0x03784ffe, -0x738b7069, -0x5032ff49, 0x5b50a1f7 },
+ { -0x5b4741bf, -0x3da212ac, 0x1bb0e2dd, -0x6fd2ec1f, -0x3217d54e, 0x41f43233, -0x3c551835, 0x1085faa5 },
+ { -0x0ec9eceb, -0x647bf09a, 0x701003e9, 0x18462242, -0x1b5daf80, 0x65ed45fa, 0x3fda7320, 0x0a286239 }
+ },
+ {
+ { 0x6ecb9d17, -0x69f18c85, -0x2983151f, -0x050db6b8, -0x2aa1e477, 0x37e7a9b4, -0x4b93a615, 0x5cb7173c },
+ { 0x347cbc9d, 0x46ab13c8, -0x663edc7d, 0x3849e8d4, -0x7829b537, 0x4cea3140, -0x4e5d6119, 0x1f354134 },
+ { -0x7d485410, 0x4a89e68b, -0x64594847, -0x0be326d9, -0x1e727891, 0x16e6c210, 0x7f1b09c6, 0x7cacdb0f }
+ },
+},
+{
+ {
+ { -0x233a3513, -0x1efebbcc, 0x3c84fb33, 0x47ed5d96, -0x12795f19, 0x70019576, -0x2d98061c, 0x25b2697b },
+ { -0x26e58744, -0x6f9d4d20, -0x37af6999, 0x47c9889c, 0x405070b8, -0x620ab59a, 0x2493a1bf, 0x7369e6a9 },
+ { 0x13986864, -0x6298c005, 0x415dc7b8, 0x3ca5fbd9, -0x20d8c4a2, -0x1fb133c5, -0x4ab1b32e, 0x1420683d }
+ },
+ {
+ { -0x3e33a530, 0x34eebb6f, -0x69b95375, 0x6a1b0ce9, -0x599421ad, -0x2c4f25b7, 0x61d081c1, 0x31e83b41 },
+ { 0x249dd197, -0x4b8742e2, 0x5e58c102, 0x620c3500, -0x334553a4, -0x04fd2cd1, -0x0af758d3, 0x60b63beb },
+ { -0x61f9d4b1, -0x681738ee, 0x29320ad8, 0x49e48f4f, 0x6f18683f, 0x5bece14b, 0x2d550317, 0x55cf1eb6 }
+ },
+ {
+ { 0x7df58c52, 0x3076b5e3, -0x186633ca, -0x28c54623, 0x4913ee20, -0x427ce31d, 0x62ba0133, 0x1a56fbaa },
+ { 0x65c23d58, 0x58791010, 0x5094819c, -0x7462f793, 0x12c55fa7, -0x1dbfd057, 0x570891d4, 0x669a6564 },
+ { 0x5c9dc9ec, -0x6bc194b0, -0x5883c8e6, 0x302557bb, 0x41347651, -0x678c51aa, -0x663a75a4, 0x13c48367 }
+ },
+ {
+ { 0x5d8bd080, -0x3b230496, 0x571a4842, -0x21143b14, -0x471aac9b, -0x2b4d177d, -0x371a47d9, 0x50bdc87d },
+ { 0x5ab3e1b9, 0x423a5d46, -0x380ec09f, -0x03ec3e79, -0x134a464a, 0x19f83664, -0x59c849f9, 0x66f80c93 },
+ { 0x6edfe111, 0x606d3783, -0x0fee5427, 0x32353e15, 0x25b73b96, 0x64b03ac3, 0x725fd5ae, 0x1dd56444 }
+ },
+ {
+ { 0x08bac89a, -0x3d681a00, -0x151e3c20, 0x7d4cea11, -0x60186884, -0x0c1c741f, 0x63a305cd, 0x3a3a450f },
+ { 0x3362127d, -0x705b8008, 0x71cd7c15, -0x4360953c, 0x49220c8b, 0x6e714543, 0x219f732e, 0x0e645912 },
+ { -0x27c6b9d9, 0x078f2f31, -0x216b5af0, 0x389d3183, 0x17996f80, -0x2e1c9393, -0x6c565785, 0x318c8d93 }
+ },
+ {
+ { -0x54e22c68, 0x5d669e29, 0x342d9e3b, -0x036de9a8, -0x0ca68c33, 0x55851dfd, 0x25950af6, 0x509a41c3 },
+ { 0x2afffe19, -0x0d8ba2fd, 0x7f24db66, 0x0c9f3c49, -0x457a6711, -0x43672c1d, -0x65e2acec, 0x224c7c67 },
+ { -0x5906da17, -0x423f9124, 0x641b1f33, 0x793ef3f4, -0x627cc177, -0x7d13ed80, 0x28a11389, 0x05bff023 }
+ },
+ {
+ { 0x0dc512e4, 0x6881a0dd, 0x44a5fafe, 0x4fe70dc8, -0x70b5adc0, 0x1f748e6b, -0x11fe5c16, 0x576277cd },
+ { 0x23cae00b, 0x36321370, -0x2e5330a7, 0x544acf0a, -0x2de5e378, -0x698befb7, -0x05d5bb59, 0x780b8cc3 },
+ { 0x234f305f, 0x1ef38abc, 0x1405de08, -0x65a88043, 0x34e62a0d, 0x5e82a514, 0x6271b7a1, 0x5ff41872 }
+ },
+ {
+ { 0x13b69540, -0x1a24b818, 0x432610e1, -0x0ca2d5c5, 0x38781276, -0x53e0d917, -0x5f5f3497, 0x29d4db8c },
+ { 0x1789db9d, 0x398e080c, -0x0c18870b, -0x589fdfdb, 0x06bd035d, -0x056776b4, 0x25a966be, 0x106a03dc },
+ { 0x333353d0, -0x2652f551, -0x532cf61b, 0x38669da5, -0x37770810, 0x3c57658a, 0x052cbefa, 0x4ab38a51 }
+ },
+},
+{
+ {
+ { -0x7f621fac, -0x09701d18, -0x637d452f, -0x1c43f696, 0x0aadbf45, 0x076353d4, -0x215e6a62, 0x7b9b1fb5 },
+ { 0x4324c0e9, -0x20253412, 0x3f955bb7, 0x05444288, -0x15ce9f61, -0x21085558, 0x42287cff, 0x68aee706 },
+ { 0x7471cc0c, -0x0fe3370f, 0x579082bb, -0x6adbd1c9, -0x2c1b94a1, 0x27776093, 0x28bd85fb, 0x2d13d55a }
+ },
+ {
+ { 0x7aee7a52, -0x40fe6332, -0x1bab152d, -0x57212d4a, -0x785744e7, 0x3c619f0b, 0x560916d8, 0x3619b5d7 },
+ { 0x5b35b8da, -0x053a2dfa, -0x7a9db449, -0x57257566, 0x3d21cd0f, -0x332d356f, -0x7406f2a8, 0x6b8341ee },
+ { 0x0282c4b2, 0x3579f26b, 0x4fafefae, 0x64d592f2, 0x28c8c7c0, -0x48321285, 0x7173a8d7, 0x6a927b6b }
+ },
+ {
+ { 0x3ece88eb, -0x728fbf7a, -0x7f113f74, -0x0f1cf857, 0x0d788fda, -0x53ddaf9f, 0x3a0d478d, 0x056d92a4 },
+ { -0x6791b9aa, 0x1f6db24f, -0x2e16efa5, 0x1021c02e, 0x2cc0a375, -0x0700c001, -0x3937da6e, 0x1d2a6bf8 },
+ { -0x03c25a5f, 0x1b05a196, 0x43b59ed0, 0x77d7a8c2, -0x682e86e8, 0x06da3d62, -0x0edcac09, 0x66fbb494 }
+ },
+ {
+ { -0x0edcf62a, -0x2928f66a, -0x163c2ac7, -0x2404dc7b, -0x08aadbef, 0x46d602b0, 0x57843e0c, 0x270a0b05 },
+ { -0x27a3f048, 0x751a50b9, -0x7430f685, -0x2e5023db, -0x7cf65697, 0x2f16a6a3, -0x1a4ff9a7, 0x14ddff9e },
+ { -0x5879d434, 0x61ff0640, 0x5f11abfe, -0x7e353f66, 0x55d12abb, -0x6fb87cfc, -0x6ba5178d, 0x19a4bde1 }
+ },
+ {
+ { -0x3f893b61, 0x40c709de, 0x7f3e53f6, 0x657bfaf2, -0x135fbd3c, 0x40662331, 0x7eb4df04, 0x14b37548 },
+ { 0x20a6200a, -0x6460d90b, -0x30ec1508, 0x64804443, -0x79ce122d, -0x759c98c1, 0x1ed39dc1, 0x72bbbce1 },
+ { -0x549923b9, -0x517ac36c, -0x2089d292, -0x149dcbc2, 0x6fb2f7d1, -0x0f71f1e8, 0x700ab37a, 0x4f0b1c02 }
+ },
+ {
+ { -0x3e4d1dc1, 0x79fd21cc, 0x453df52a, 0x4ae7c281, -0x2eaeb795, -0x37e8d137, 0x3e0a7534, 0x68abe944 },
+ { -0x27e6ae06, -0x1e8f9879, -0x4d6f3885, -0x5ef5d372, 0x3ed66773, -0x18c7d060, 0x0bcc4b54, 0x0a4d8471 },
+ { 0x07831dcb, -0x25ed393c, 0x4d5c510d, 0x0da230d7, 0x6bd404e1, 0x4ab1531e, -0x430bbf11, 0x4106b166 }
+ },
+ {
+ { 0x39e4ecf2, -0x5b7a332b, 0x0555bab5, 0x5aa3f3ad, -0x6c8207d3, 0x145e3439, 0x1214283f, 0x1238b51e },
+ { 0x1cd23668, 0x02e57a42, 0x0eaef6fd, 0x4ad9fb5d, -0x4edbbb80, -0x6ab198d9, 0x2699f331, 0x7f792f9d },
+ { 0x5fd4d924, 0x0b886b92, 0x3626a80d, 0x60906f7a, -0x467542ee, -0x132c984c, -0x210cbb31, 0x2876beb1 }
+ },
+ {
+ { 0x3a8a85f8, -0x2a6b4ccd, -0x187282a8, 0x4ea37689, 0x5e8e351f, 0x73bf9f45, -0x43be144c, 0x5507d7d2 },
+ { 0x63144691, -0x237b16cb, -0x29e0dc0c, 0x632fe8a0, 0x12a9a8d5, 0x4caa8006, 0x0e9918d3, 0x48f9dbfa },
+ { 0x299572fc, 0x1ceb2903, -0x6afd2f12, 0x7c8ccaa2, 0x11cce67b, -0x6e405bcc, 0x64a831e7, 0x57844819 }
+ },
+},
+{
+ {
+ { 0x5fddc09c, -0x29302e11, -0x08a8a232, -0x17d4c103, 0x201634c2, 0x25d56b5d, 0x04ed2b9b, 0x3041c6bb },
+ { 0x6768d593, -0x2583d4db, 0x4422ca13, -0x673e3fa9, -0x35f531e3, -0x0e57f42b, -0x3f775970, 0x29cdd1ad },
+ { -0x26a91eb8, 0x0ff2f2f9, -0x60ca94d2, -0x5218688b, 0x5f6c025c, 0x1a4698bb, 0x14049a7b, 0x104bbd68 }
+ },
+ {
+ { -0x29800e9d, -0x56a265a1, 0x4cc75681, -0x16d41963, -0x21df0da9, -0x4807fdb4, -0x04f8d20b, 0x204f2a20 },
+ { 0x68f1ed67, 0x51f0fd31, -0x2790c43e, 0x2c811dcd, 0x04d2f2de, 0x44dc5c43, 0x092a7149, 0x5be8cc57 },
+ { 0x30ebb079, -0x37ebc4c3, -0x429ad1d0, 0x7589155a, -0x7092a3cf, 0x653c3c31, -0x3d86e9e1, 0x2570fb17 }
+ },
+ {
+ { 0x0bb8245a, 0x192ea955, -0x706faf2f, -0x37190458, -0x775b36cb, 0x7986ea2d, -0x21fe7998, 0x241c5f91 },
+ { 0x2cb61575, 0x3efa367f, 0x1cd6026c, -0x0a06908a, 0x65b52562, -0x1738ebd6, 0x53030acd, 0x3dcb65ea },
+ { 0x40de6caa, 0x28d81729, 0x22d9733a, -0x7040d310, 0x235b01d1, 0x16d7fcdd, 0x5fcdf0e5, 0x08420edd }
+ },
+ {
+ { 0x04f410ce, 0x0358c34e, 0x276e0685, -0x49eca4a6, -0x1446eadf, 0x5d9670c7, 0x21db889c, 0x04d654f3 },
+ { -0x7c9d05b6, -0x3200df55, -0x1de5c192, 0x57e118d4, -0x03c619d5, -0x1ce869e9, -0x43e89603, 0x0d9a53ef },
+ { -0x22424a2b, 0x5e7dc116, -0x725a22d3, 0x2954deb6, 0x3334a292, 0x1cb60817, 0x18991ad7, 0x4a7a4f26 }
+ },
+ {
+ { -0x50c8d5b5, 0x24c3b291, 0x718147f2, -0x6c257d90, -0x7976610e, -0x227b7a9c, 0x23e0ee33, 0x4a963142 },
+ { 0x5fb15f95, -0x0b58e7fe, 0x6b5c1b8f, 0x3df65f34, 0x00e01112, -0x32030f7b, -0x222ce7b8, 0x11b50c4c },
+ { 0x08a4ffd6, -0x5917d8bc, -0x63ea8927, 0x738e177e, 0x3d02b3f2, 0x773348b6, -0x319433af, 0x4f4bce4d }
+ },
+ {
+ { -0x3b62f491, 0x30e2616e, -0x3513dce9, -0x1ba98e71, -0x0d94b05a, 0x48eb409b, 0x61595f37, 0x3042cee5 },
+ { -0x1ddbda7c, -0x58e031a6, -0x6d0a7562, 0x26ea7256, 0x1cea3cf4, -0x2de5f629, -0x48e3fe1a, 0x73fcdd14 },
+ { 0x449bac41, 0x427e7079, -0x431dcef6, -0x7aa51c93, 0x5f841a7c, 0x4cae7621, -0x65631e2a, 0x389e740c }
+ },
+ {
+ { 0x570eac28, -0x3642870a, 0x27919ce1, -0x1aa4f4ce, -0x5e646e13, 0x65fc3eab, -0x29d9c970, 0x25c425e5 },
+ { 0x34dcb9ce, 0x64fcb3ae, -0x1cb72f53, -0x68affcdd, 0x62c6381b, 0x45b3f07d, 0x465a6788, 0x61545379 },
+ { -0x0e282192, 0x3f3e06a6, -0x71f9dcf8, 0x3ef97627, 0x4e8a6c77, -0x73eb09da, 0x15484759, 0x6539a089 }
+ },
+ {
+ { 0x14bb4a19, -0x223b242c, -0x67bdb072, 0x19b2bc3c, 0x36ca7169, 0x48a89fd7, -0x0fe64270, 0x0f65320e },
+ { -0x3c2d088d, -0x162de08c, 0x25c46845, -0x3eafabbf, -0x064661cd, 0x624e5ce8, -0x3a32e794, 0x11c5e4aa },
+ { -0x35021f3a, -0x2b792e4f, 0x163b5181, 0x4f3fe6e3, -0x050d6c66, 0x59a8af0d, -0x13ccf8d6, 0x4cabc7bd }
+ },
+},
+{
+ {
+ { 0x1a54a044, -0x083f5e64, 0x77bd9fbb, 0x4a1c5e24, 0x5af22972, -0x591c35ef, 0x3f2e9e0d, 0x1819bb95 },
+ { 0x532f7428, 0x16faa8fb, 0x46a4e272, -0x242bd160, -0x74615b80, 0x5337653b, 0x23973f03, 0x40659472 },
+ { 0x5e042e84, 0x498fbb79, 0x7698b714, 0x7d0dd89a, 0x27fe6295, -0x7404f45c, 0x21200524, 0x36ba82e7 }
+ },
+ {
+ { 0x57274ed5, -0x372962f6, 0x60804b17, 0x45ba8032, 0x2255dfac, -0x20c325f0, 0x2709b339, 0x77d22123 },
+ { 0x4245ec41, -0x29f13449, 0x34348716, -0x02641762, -0x1bdd7b22, -0x36dbf502, -0x2face24c, 0x4472f648 },
+ { 0x64ad94d8, 0x498a6d70, -0x6509dd9d, -0x5a4a3703, 0x45c141f4, -0x735712fb, 0x662d358c, 0x2c63bec3 }
+ },
+ {
+ { -0x7a790741, -0x65ae74c6, -0x344e6910, -0x6118e50a, -0x5dc7a30e, -0x55f9da1a, -0x2228372f, 0x1deb2176 },
+ { -0x158786ab, 0x7fe60d8b, -0x4a0bfe49, -0x4623ee82, 0x19355cce, -0x6e383f66, -0x6bbd4121, 0x22692ef5 },
+ { 0x2066cf6c, -0x7a9c2e66, 0x4dcc7cd7, 0x401bfd8c, -0x32f2709e, -0x26895942, -0x5d874fa2, 0x67cfd773 }
+ },
+ {
+ { 0x5a4e586a, 0x2d5fa985, 0x49beab7e, 0x65f8f7a4, -0x0de2cc2d, -0x55f8b223, 0x1bcb9dee, 0x185cba72 },
+ { -0x10c11b8b, -0x7213ce06, -0x61dd026e, -0x66240076, 0x4e26cab1, 0x512d1159, -0x13bcef47, 0x0cde561e },
+ { -0x0b1c34bf, -0x6c79625d, 0x40f7977e, -0x40fc6d0b, -0x2fb9c47d, 0x026204fc, -0x61139113, 0x3ec91a76 }
+ },
+ {
+ { -0x4f5cbfd1, 0x0fad2fb7, -0x04960b58, 0x46615ecb, -0x3a07155a, -0x08ba4338, 0x4a94e896, 0x7a5fa879 },
+ { -0x087e9953, 0x1e9df75b, -0x14f32851, 0x4dfda838, -0x3e150678, -0x45ffd128, 0x11f33cfc, 0x13fedb3e },
+ { 0x13cd67a1, 0x52958faa, -0x74244ae9, -0x69a11f7f, 0x2e8845b3, 0x16e58daa, 0x5499da8f, 0x357d397d }
+ },
+ {
+ { 0x194bfbf8, 0x481dacb4, -0x451a7d67, 0x4d77e3f1, 0x7d1372a0, 0x1ef4612e, 0x70ff69e1, 0x3a8d867e },
+ { -0x4f453194, 0x1ebfa05f, 0x1caf9a1e, -0x36cb9df4, 0x1d82b61a, -0x3388e33c, -0x5a08b014, 0x2d94a16a },
+ { 0x55aff958, 0x6f58cd5d, 0x75567721, -0x45c155a4, -0x6e9add83, 0x75c12399, -0x3d0d4ca2, 0x69be1343 }
+ },
+ {
+ { 0x684b8de3, -0x7d444254, 0x3fca0718, -0x5d0b3830, -0x1f695558, 0x337f92fb, 0x63587376, 0x200d4d8c },
+ { -0x1e6836d6, 0x0e091d5e, 0x2945119f, 0x4f51019f, -0x0fcb1664, 0x143679b9, 0x4d24c696, 0x7d88112e },
+ { 0x4893b32b, 0x208aed4b, -0x41a6469c, 0x3efbf23e, -0x245a1af9, -0x289d2150, -0x7e42626c, 0x69607bd6 }
+ },
+ {
+ { -0x6cdc56fe, 0x3b7f3bd4, 0x6b2c6e53, 0x7c21b556, 0x3a7852a7, -0x1a45700b, -0x7c713200, 0x28bc77a5 },
+ { 0x68de1ce1, -0x0941fdf0, 0x0edcbc1f, -0x172ae719, 0x1b5505a5, -0x1c100230, -0x2c13c030, 0x35f63353 },
+ { -0x1da27fca, 0x63ba78a8, -0x6bcccb70, 0x63651e00, 0x288ce532, 0x48d82f20, 0x36b57524, 0x3a31abfa }
+ },
+},
+{
+ {
+ { 0x3f78d289, -0x3f708771, -0x5ebfb261, -0x01cf58d4, -0x309a3363, -0x0d887404, 0x5acb2021, 0x7ee49816 },
+ { 0x089c0a2e, 0x239e9624, 0x3afe4738, -0x38b73b40, 0x764fa12a, 0x17dbed2a, 0x321c8582, 0x639b93f0 },
+ { -0x6eee5e3d, 0x7bd508e3, -0x7f6f8b77, 0x2b2b90d4, -0x518d02e7, -0x182d513e, -0x7a49fd5a, 0x0edf493c }
+ },
+ {
+ { -0x7b89beed, 0x6767c4d2, -0x080a07cb, -0x5f6fbfc1, -0x35194122, 0x1c8fcffa, -0x2e205c97, 0x04c00c54 },
+ { 0x599b5a68, -0x51337ea8, -0x14521df2, -0x15a8b0f1, 0x22b67f07, 0x4fe41d74, 0x019d4fb4, 0x403b92e3 },
+ { -0x74b9a308, 0x4dc22f81, 0x1480eff8, 0x71a0f35a, 0x04c7d657, -0x51174053, -0x4d9e890c, 0x355bb12a }
+ },
+ {
+ { 0x5a8c7318, -0x5cfe2539, -0x4c3155ef, -0x126ffc63, 0x3bae3f2d, 0x6f077cbf, -0x1fad5272, 0x7518eaf8 },
+ { 0x7493bbf4, -0x58e19b34, -0x135c4f3d, -0x1a427b27, -0x05fa187b, 0x0a6bc50c, 0x182ec312, 0x0f9b8132 },
+ { 0x1b7f6c32, -0x5b77a63c, -0x0bc7cd68, 0x0f2d60bc, -0x364e2e27, 0x1815a929, -0x44e8aa3c, 0x47c3871b }
+ },
+ {
+ { -0x37af9950, -0x0419a2b0, -0x4c5d6650, 0x62ecc4b0, 0x441ae8e0, -0x1ac8ab16, -0x172b72a1, 0x08fea02c },
+ { 0x71ec4f48, 0x51445397, -0x3673a292, -0x07fa4e83, 0x47c3c66b, -0x089d3ee6, 0x764699dc, 0x00b89b85 },
+ { 0x68deead0, -0x7db2228a, 0x4b685d23, -0x379bbae0, 0x5d89d665, -0x4aeb3033, 0x4f75d537, 0x473829a7 }
+ },
+ {
+ { -0x52c6fd37, 0x23d9533a, -0x10fca771, 0x64c2ddce, -0x301ed04c, 0x15257390, 0x44e4d390, 0x6c668b4d },
+ { 0x4679c418, -0x7d2d258b, -0x4d9e7210, -0x19c42828, -0x53b814f6, 0x355eef24, 0x4833c6b4, 0x2078684c },
+ { 0x7a78820c, 0x3b48cf21, -0x7ed8c169, -0x0895f54e, -0x73711285, -0x56939a59, 0x4f8a433f, 0x7411a605 }
+ },
+ {
+ { 0x18b175b4, 0x579ae53d, -0x0c6d5efe, 0x68713159, 0x1eef35f5, -0x7baa1346, 0x458c398f, 0x1ec9a872 },
+ { -0x46623793, 0x4d659d32, 0x603af115, 0x044cdc75, -0x233d1b78, -0x4cb38ed4, -0x047ecb01, 0x7c136574 },
+ { 0x00a2509b, -0x47195b2c, 0x0bc882b4, -0x647e28fe, -0x0e6a8a9f, 0x57e7cc9b, -0x38329ba0, 0x3add88a5 }
+ },
+ {
+ { 0x59393046, -0x7a3d672c, 0x5ff659ec, -0x7081ca68, -0x0d0991c6, 0x1d2ca22a, -0x5bf958e0, 0x61ba1131 },
+ { -0x49ca230e, -0x5476a890, -0x0993e044, 0x02dfef6c, -0x41492e79, -0x7aacfd98, -0x3378618c, 0x249929fc },
+ { 0x16959029, -0x5c2f5f0f, -0x45814277, 0x023b6b6c, 0x26783307, 0x7bf15a3e, -0x44271319, 0x5620310c }
+ },
+ {
+ { 0x77e285d6, 0x6646b5f4, 0x6c8f6193, 0x40e8ff67, -0x544a6b23, -0x59138cef, 0x658cec4d, 0x7ec846f3 },
+ { 0x4934d643, 0x52899343, -0x5aeddd0b, -0x462407fa, -0x3c0be3de, -0x70927871, 0x4d9d9730, 0x37676a2a },
+ { 0x1da22ec7, -0x64a170c1, 0x6c01cd13, 0x130f1d77, -0x5d676048, 0x214c8fcf, 0x399b9dd5, 0x6daaf723 }
+ },
+},
+{
+ {
+ { 0x2cd13070, -0x7e514423, -0x07a5f162, -0x69d1bcdb, -0x35200135, -0x216c6e56, 0x52c230e6, 0x53177fda },
+ { 0x10628564, 0x591e4a56, -0x574b20cc, 0x2a4bb87c, -0x185c71bd, -0x21d5da8e, -0x011afb92, 0x3cbdabd9 },
+ { 0x50b9de79, -0x584368fa, -0x3cfe4a65, 0x3d12a7fb, -0x2c951c74, 0x02652e68, 0x5a6199dc, 0x79d73983 }
+ },
+ {
+ { 0x0d591737, 0x21c9d992, -0x164b932a, -0x6415be2e, 0x0d89bfca, -0x1df17be0, 0x6eae5ff8, 0x79d99f94 },
+ { 0x4131c1bd, -0x26cab20a, -0x7913a7de, 0x758094a1, -0x1ba60c3e, 0x4464ee12, -0x34eccd7e, 0x6c11fce4 },
+ { 0x68673205, -0x0e84b7cb, 0x3caad96c, 0x387deae8, 0x56ffe386, 0x61b471fd, -0x48ba5a67, 0x31741195 }
+ },
+ {
+ { 0x3b02a047, 0x17f8ba68, -0x01104938, 0x50212096, 0x1556cbe2, 0x70139be2, 0x1d98915b, 0x203e44a1 },
+ { -0x4885c9f5, -0x172efe70, -0x666a18fe, -0x66467ce0, -0x05fdb856, -0x42b02008, -0x1f2c9579, 0x2772e344 },
+ { 0x37b9e39f, -0x2979c146, 0x723b5a23, 0x105bc169, -0x59a3f89e, 0x104f6459, 0x5b4d38d4, 0x56795129 }
+ },
+ {
+ { 0x0d4b497f, 0x07242eb3, -0x46433379, 0x1ef96306, -0x27ee90bb, 0x37950934, 0x01405b04, 0x05468d62 },
+ { 0x13037524, 0x535fd606, -0x4f043d96, -0x1def520a, 0x23e990ae, -0x5372f565, -0x28d02407, 0x47204d08 },
+ { -0x06cd9822, 0x00f565a9, -0x3f2a7176, -0x31302873, -0x0ce71d72, -0x5dea1d24, -0x649cccae, 0x4599ee91 }
+ },
+ {
+ { -0x79e51a87, -0x538b9295, -0x09515624, 0x31ab0650, 0x40256d4c, 0x241d6611, 0x3d21a5de, 0x2f485e85 },
+ { 0x70e0e76b, -0x2c3ddf36, -0x1560cf6c, -0x4ed415a8, -0x3cd8ed7e, 0x294ddec8, -0x5e2e2fd8, 0x0c3539e1 },
+ { -0x63f7cc0d, 0x32974483, -0x2d543b7c, 0x6fe6257f, 0x4b358817, 0x5327d181, -0x76c01644, 0x65712585 }
+ },
+ {
+ { -0x28f711c1, -0x7e3d60e5, -0x519bf830, -0x2234a5fb, -0x2d5c1459, -0x68513e29, -0x6e2af7cf, 0x1590521a },
+ { 0x32a61161, -0x63efd049, 0x34d520a8, -0x1b71ef23, 0x6f9a9176, 0x365c6354, 0x046f6006, 0x32f6fe4c },
+ { -0x386ef534, 0x40a3a11e, -0x0e92d852, -0x6fec2008, -0x544e6a2c, 0x1a9720d8, 0x2ea98463, 0x1bb9fe45 }
+ },
+ {
+ { -0x33c98b84, -0x30a1936b, 0x6b0bc30d, 0x29420153, -0x11868510, 0x453ac67c, 0x2a8bb3c9, 0x5eae6ab3 },
+ { -0x4c2ab062, -0x162e26b0, -0x1ff2cc3f, 0x2d5f9cbe, -0x5fb03954, 0x51c2c656, 0x3c1cbcc9, 0x65c091ee },
+ { 0x14f118ea, 0x70836611, -0x6bcb6353, 0x2b37b87b, -0x4b1660c0, 0x7273f51c, 0x23d75698, 0x78a2a958 }
+ },
+ {
+ { 0x5ef83207, -0x4b0dc3be, -0x3656cb4b, -0x54076b2d, 0x39fd87f7, -0x2f8f73ed, 0x17166130, 0x18767891 },
+ { 0x5c8c2ace, -0x5d4f8d17, 0x651e9c4b, 0x69cffc96, 0x42e7b42b, 0x44328ef8, 0x22aadeb3, 0x5dd996c1 },
+ { 0x670c507c, -0x6da4a110, -0x46c3cc41, -0x7e6437be, 0x70dd003f, 0x10792e9a, 0x6e28dc74, 0x59ad4b7a }
+ },
+},
+{
+ {
+ { -0x5352715e, 0x583b04bf, 0x148be884, 0x29b743e8, 0x0810c5db, 0x2b1e583b, -0x714c4456, 0x2b5449e5 },
+ { -0x14c241b9, 0x5f3a7562, -0x71425f48, -0x0815c7ac, 0x45747299, 0x00c3e531, 0x1627d551, 0x1304e9e7 },
+ { 0x6adc9cfe, 0x789814d2, -0x74b722f5, 0x3c1bab3f, -0x068639f6, -0x25f01e01, 0x7c2dd693, 0x4468de2d }
+ },
+ {
+ { -0x079cf832, 0x4b9ad8c6, 0x435d0c28, 0x21113531, 0x657a772c, -0x2b57993b, 0x63247352, 0x5da6427e },
+ { -0x6be6b962, 0x51bb355e, 0x23ddc754, 0x33e6dc4c, 0x447f9962, -0x6c5a492a, -0x04bb429d, 0x6cce7c6f },
+ { -0x2153dd36, 0x1a94c688, -0x4451e008, -0x46f99109, -0x72a6a7f1, -0x775273c8, -0x1860d358, 0x58f29abf }
+ },
+ {
+ { 0x710ecdf6, 0x4b5a64bf, 0x462c293c, -0x4eb31ac8, -0x2af4c547, 0x3643d056, 0x185b4870, 0x6af93724 },
+ { -0x7218c198, -0x16f13055, 0x377e76a5, 0x54036f9f, -0x41fea67e, -0x0fb6a4f5, -0x580be1ca, 0x577629c4 },
+ { 0x09c6a888, 0x32200245, 0x4b558973, -0x2d1fc9ed, 0x3c33289f, -0x7c1dc9dd, 0x0caec18f, 0x701f25bb }
+ },
+ {
+ { 0x7cbec113, -0x62e70927, 0x74bfdbe4, -0x7bb5f91a, -0x53b19f2a, 0x20f5b522, 0x50955e51, 0x720a5bc0 },
+ { -0x1b9e9313, -0x3c574f08, -0x61da5783, -0x08ff99f2, -0x0b435a64, 0x61e3061f, -0x423bf417, 0x2e0c92bf },
+ { -0x647fa5cb, 0x0c3f0943, 0x6242abfc, -0x17b174c9, 0x5c229346, 0x691417f3, 0x144ef0ec, 0x0e9b9cbb }
+ },
+ {
+ { 0x5db1beee, -0x7211642b, 0x0a723fb9, -0x363c54c9, 0x1c68d791, 0x44a8f1bf, 0x1cfd3cde, 0x366d4419 },
+ { -0x04a8df53, -0x04452b71, -0x2406f2f2, -0x117e6e95, 0x635543bf, -0x2b7eceae, 0x3f337bd8, 0x221104eb },
+ { -0x0d4373ec, -0x61c3e8bd, -0x4a7a93c5, 0x2eda26fc, 0x68a7fb97, -0x3347d0f2, -0x43a6cdbc, 0x4167a4e6 }
+ },
+ {
+ { -0x07317012, -0x3d41d99b, -0x177f29d4, -0x169800ec, 0x2f364eee, -0x0ed19182, -0x34812d0a, 0x34b33370 },
+ { 0x76f62700, 0x643b9d28, 0x0e7668eb, 0x5d1d9d40, 0x21fc0684, 0x1b4b4303, 0x2255246a, 0x7938bb7e },
+ { -0x797e2934, -0x323a6e12, -0x127a58ad, -0x31fdef64, 0x58808883, -0x128b7a3f, 0x2dfe65e4, 0x1176fc6e }
+ },
+ {
+ { 0x49770eb8, -0x246f1d77, -0x530bbf5d, -0x670433d6, -0x21287865, 0x21354ffe, -0x0d96f94a, 0x1f6a3e54 },
+ { 0x5b9c619b, -0x4b509330, -0x4d5a7b80, 0x2ddfc9f4, -0x1416b23c, 0x3d4fa502, 0x677d5f34, 0x08fc3a4c },
+ { -0x2cf8cb16, 0x60a4c199, 0x31165cd6, 0x40c085b6, -0x08a67d6b, -0x1dccc1dd, 0x16b900d1, 0x4f2fad01 }
+ },
+ {
+ { -0x48c449c8, -0x69d326e3, -0x03ed63f8, -0x19fa8856, -0x0c49e977, 0x6f619b39, 0x2944ee81, 0x3451995f },
+ { -0x6b51b1ac, 0x44beb241, 0x1857ef6c, 0x5f541c51, 0x368d0498, -0x59e194d3, -0x68d10855, 0x445484a4 },
+ { -0x60158284, -0x6ead0330, -0x4f6ca30a, 0x4a816c94, 0x47285c40, 0x258e9aaa, 0x042893b7, 0x10b89ca6 }
+ },
+},
+{
+ {
+ { 0x79d34aa0, -0x2983212a, -0x33b24c61, -0x33f46140, -0x1ca2e6f1, -0x5aca5baa, -0x09e09011, 0x2e05d9ea },
+ { 0x3b646025, -0x64d5bd92, 0x385ce4cf, 0x32127190, -0x229215bb, -0x5da3003e, -0x4157218b, 0x06409010 },
+ { -0x29e414a7, -0x3bb86fe6, -0x1a2377f6, 0x661f19bc, -0x483597d9, 0x24685482, -0x101f80da, 0x293c778c }
+ },
+ {
+ { -0x5ee00e00, 0x16c795d6, -0x4ea7ea37, -0x348f2f1e, -0x64ac6a4b, -0x760d6ce0, 0x31e47b4f, 0x50b8c2d0 },
+ { 0x07069096, -0x797f6190, -0x1b1afe77, -0x5528a4eb, -0x5de5feb9, 0x07f35715, 0x12815d5e, 0x0487f3f1 },
+ { 0x068a4962, 0x48350c08, 0x51092c9a, 0x6ffdd053, -0x50903723, 0x17af4f4a, 0x3cdba58b, 0x4b0553b5 }
+ },
+ {
+ { 0x27c152d4, -0x40fadee5, -0x42e509c7, 0x5ec26849, -0x71905468, 0x5e0b2caa, 0x50bd0840, 0x054c8bdd },
+ { 0x1b32ff79, -0x639a0342, 0x03b50f9b, -0x148a1561, 0x6c07e606, -0x0312d594, 0x51717908, 0x35106cd5 },
+ { 0x1dcf073d, 0x38a0b12f, -0x48095d8a, 0x4b60a8a3, -0x2cbfb066, -0x012a53db, 0x5505c229, 0x72e82d5e }
+ },
+ {
+ { 0x69771d02, 0x00d9cdfd, 0x6cfbf17e, 0x410276cd, 0x1cb12ec7, 0x4c45306c, 0x27500861, 0x2857bf16 },
+ { -0x0f27bb38, 0x6b0b697f, -0x268634b7, -0x44ed07a4, -0x3e25f0e1, -0x2d5abe3a, 0x58ce7211, 0x7b7c2429 },
+ { 0x0101689e, -0x60de6fc1, -0x4079effb, -0x2886202d, 0x3deb0f1b, -0x5edd11a1, 0x485a00d4, 0x510df84b }
+ },
+ {
+ { -0x38f53ea2, 0x24b3c887, -0x047e48ce, -0x4f0c5aa9, -0x1a8733e5, -0x64d321d1, 0x03b54f8e, 0x4cf7ed07 },
+ { -0x6d885e06, -0x5abecc45, 0x63991237, 0x74ec3b62, 0x35d2f15a, 0x1a3c54dc, -0x1b7d45c6, 0x2d347144 },
+ { -0x670411f1, 0x6bd47c65, -0x54aa41d3, -0x61b8cc1e, 0x127610c5, 0x1093f624, -0x2f5e155c, 0x4e05e26a }
+ },
+ {
+ { -0x1e701940, 0x1833c773, -0x2c378d9b, -0x1c3b8ee6, 0x0116b283, 0x3bfd3c4f, -0x4b32b248, 0x1955875e },
+ { 0x4b531f20, -0x2564949e, 0x77509abb, 0x429a760e, -0x17dc3480, -0x24160ade, -0x77f3707e, 0x618f1856 },
+ { 0x0e399799, 0x6da6de8f, 0x40fda178, 0x7ad61aa4, 0x5e3563dd, -0x4cd327f0, 0x2ae340ae, 0x15f6beae }
+ },
+ {
+ { -0x6dba1deb, -0x4565f085, -0x2673f245, -0x0c979ed3, -0x0ddf4fe0, 0x2e84e4cb, 0x62d90eda, 0x6ba92fe9 },
+ { 0x31ec3a62, -0x79d434f4, 0x1138f3c2, -0x7ef1d4bb, 0x39dac2a4, 0x788ec4b8, -0x51d56d7f, 0x28f76867 },
+ { 0x5884e2aa, 0x3e4df965, -0x242b9a5b, -0x429d0425, 0x0de9e524, -0x28a69356, -0x4d4e4c29, 0x6e8042cc }
+ },
+ {
+ { 0x16521f7e, 0x15306536, -0x69dfc246, 0x660d06b8, 0x545f0879, 0x2d3989bc, 0x78ebd7b0, 0x4b5303af },
+ { -0x31d73592, -0x0ef2c3d7, -0x0349f6c3, -0x452cbac0, -0x5d15d2c1, -0x18bd9129, 0x4ff298b9, 0x08af9d4e },
+ { -0x41434218, 0x72f8a6c3, -0x23c57177, 0x4f0fca4a, -0x38402086, 0x6fa9d4e8, -0x649db149, 0x0dcf2d67 }
+ },
+},
+{
+ {
+ { 0x5a45f06e, 0x753941be, 0x6d9c5f65, -0x2f835113, 0x72ff51b6, 0x11776b9c, -0x10f2b257, 0x17d2d1d9 },
+ { -0x68e7d764, 0x3d594749, 0x24533f26, 0x12ebf8c5, 0x14c3ef15, 0x0262bfcb, 0x77b7518e, 0x20b878d5 },
+ { 0x073f3e6a, 0x27f2af18, -0x28adef97, -0x02c01ae7, 0x3ca60022, 0x22e3b72c, -0x339a3959, 0x72214f63 }
+ },
+ {
+ { -0x0bc4d637, 0x1d9db7b9, 0x4f518f75, -0x29fa7db6, 0x312f9dc4, -0x0d3f8d43, 0x5a1545b0, 0x1f24ac85 },
+ { 0x5307a693, -0x4b1c80c0, 0x2f336795, -0x5458eb29, 0x73761099, -0x29042f59, -0x7e8e3437, 0x5fdf48c5 },
+ { -0x716afa56, 0x24d60832, 0x0c1420ee, 0x4748c1d1, 0x06fb25a2, -0x38001ba4, 0x2ae395e6, 0x00ba739e }
+ },
+ {
+ { -0x157744da, -0x51bbd90b, -0x7b68c405, 0x360679d9, 0x26694e50, 0x5c9f030c, -0x2ae72dda, 0x72297de7 },
+ { 0x5c8790d6, 0x592e98de, 0x45c2a2df, -0x1a40482d, -0x064b66de, 0x115a3b60, 0x67ad78f3, 0x03283a3e },
+ { -0x41f346c7, 0x48241dc7, -0x749ccf80, 0x32f19b4d, 0x02289308, -0x2c2036f3, 0x46271945, 0x05e12968 }
+ },
+ {
+ { 0x242c4550, -0x52404438, -0x2fcf7e27, -0x4337f314, -0x0a37206e, -0x7bca995a, -0x7da731b4, 0x78cf25d3 },
+ { 0x2d9c495a, -0x457d114d, -0x0ed44684, -0x31103704, -0x6c4a2e20, -0x4fd25452, 0x13698d9b, 0x39c00c9c },
+ { 0x31489d68, 0x15ae6b8e, -0x63d40f79, -0x557ae355, -0x0fb105fb, -0x3658a569, 0x6b3ff832, 0x006b5207 }
+ },
+ {
+ { -0x4631f7d3, -0x0a3481ea, 0x417abc29, 0x3407f14c, 0x2bf4a7ab, -0x2b4c9432, 0x1a9f75ce, 0x7de2e956 },
+ { -0x626a87e4, 0x29e0cfe1, -0x699cef1e, -0x497e20e8, 0x70516b39, 0x57df39d3, 0x3bc76122, 0x4d57e344 },
+ { -0x495aa135, -0x218f2b0c, 0x5d85db99, 0x4801527f, -0x2c11657f, -0x24363bc0, 0x1a6029ed, 0x6b2a90af }
+ },
+ {
+ { 0x5bb2d80a, 0x77ebf324, 0x2fb9079b, -0x27cfe4b9, 0x4cee7333, -0x39b8190e, 0x276c2109, 0x465812c8 },
+ { -0x6519e169, 0x6923f4fc, -0x1fc0a02f, 0x5735281d, -0x19122ed3, -0x589b51bd, -0x2ed2c1b6, 0x5fd8f4e9 },
+ { 0x2a1062d9, 0x4d43beb2, 0x3831dc16, 0x7065fb75, -0x21d69729, 0x180d4a7b, 0x1cb16790, 0x05b32c2b }
+ },
+ {
+ { 0x7ad58195, -0x08035bd4, 0x4333f3cc, 0x3214286e, 0x340b979d, -0x493d62f3, 0x567307e1, 0x31771a48 },
+ { -0x2db25703, -0x373fa134, 0x05dfef83, -0x5e30e554, 0x7df9cd61, -0x2441100e, 0x7b471e99, 0x3b5556a3 },
+ { -0x1eb22b7e, 0x32b0c524, 0x1a2ba4b6, -0x124caeac, 0x282b5af3, -0x5c2e9fb8, 0x7a7336eb, 0x4fc079d2 }
+ },
+ {
+ { 0x0c86c50d, -0x23cb74bc, -0x336b19af, 0x1337cbc9, 0x643e3cb9, 0x6422f74d, -0x451c32f8, 0x241170c2 },
+ { -0x7640d081, 0x51c938b0, 0x02dfe9a7, 0x2497bd65, 0x7880e453, -0x00003f64, -0x3506716e, 0x124567ce },
+ { 0x0ac473b4, 0x3ff9ab86, 0x0113e435, -0x0f6ee212, -0x14393b51, 0x4ae75060, 0x6c87000d, 0x3f861296 }
+ },
+},
+{
+ {
+ { 0x638c7bf3, 0x529fdffe, 0x388b4995, -0x20d461a0, 0x1bad0249, -0x1fd84cb1, -0x46058b13, 0x7bc92fc9 },
+ { -0x086a841c, 0x0c9c5303, -0x1f7a3ebb, -0x5c3ce5e0, -0x2f7affb0, -0x4f8de28f, -0x54f40d26, 0x0aba390e },
+ { -0x7fe52607, -0x606810d2, 0x79afda3a, -0x7c9682ac, -0x42a694b0, -0x16f94c01, -0x22c04720, 0x02672b37 }
+ },
+ {
+ { 0x398ca7f5, -0x116458d7, 0x7a4849db, -0x146359db, 0x7ec544e1, 0x29eb29ce, -0x08c91d38, 0x232ca21e },
+ { 0x260885e4, 0x48b2ca8b, -0x7d4cb3e4, -0x5bd79414, 0x17f58f74, -0x6c81e5da, -0x54d35d5b, 0x741d1fcb },
+ { 0x253fcb17, -0x409ebdc3, -0x05c614ec, 0x08803cea, -0x67ae3851, -0x0e79fd21, 0x49e3414b, 0x0400f3a0 }
+ },
+ {
+ { -0x5f9184fa, 0x2efba412, 0x2c8d2560, 0x14678545, -0x29856e39, -0x2068ec15, 0x157eadf3, 0x32830ac7 },
+ { -0x459e3aa5, -0x5431fb8a, -0x3b2c68ea, 0x36a3d6d7, -0x1727d2f7, 0x6eb259d5, -0x7b28a905, 0x0c9176e9 },
+ { -0x48c89618, 0x0e782a7a, 0x75b18e2c, 0x04a05d78, -0x1433151f, 0x29525226, -0x7c1457e0, 0x0d794f83 }
+ },
+ {
+ { -0x585d1e54, 0x7be44ce7, -0x052e4749, 0x411fd93e, 0x0d5f7c9b, 0x1734a1d7, 0x3127db16, 0x0d659223 },
+ { -0x61eae90c, -0x00ca0a35, 0x648aae45, -0x117fa431, -0x46c5610d, -0x0f28c3d5, 0x2092a6c2, 0x097b0bf2 },
+ { 0x21a9d733, -0x3b7454eb, -0x29e544db, -0x593d1516, -0x3934bcfb, 0x625c6c1c, -0x6c14c599, 0x7fc90fea }
+ },
+ {
+ { -0x63834dc3, -0x3ad8214b, 0x5328404e, -0x6aac6e97, 0x7ccf2c7a, -0x29bc6d7f, -0x082705ef, 0x6ce97dab },
+ { 0x1f5c5926, 0x0408f1fe, 0x3b258bf4, 0x1a8f2f5e, -0x0238e997, 0x40a951a2, -0x3674a882, 0x6598ee93 },
+ { 0x0ef7c48f, 0x25b5a8e5, 0x6f2ce532, -0x149fcbef, -0x1ac21ac9, -0x3a18ae8d, -0x73ed44fd, 0x73119fa0 }
+ },
+ {
+ { 0x21f4774d, 0x7845b94d, 0x7897b727, -0x409d0e94, 0x3c56522b, 0x671857c0, -0x6a9dedee, 0x3cd6a852 },
+ { 0x53f1a4cb, -0x12cfed6c, -0x370ac879, -0x4319de37, 0x38bee7b9, -0x0534d4ed, -0x6157bd74, 0x3025798a },
+ { 0x3aeca999, 0x3fecde92, 0x62e8c12f, -0x4255a500, -0x69677522, 0x67b99dfc, 0x52661036, 0x3f52c028 }
+ },
+ {
+ { -0x113be93a, -0x6da74067, -0x562d098f, -0x5375afe9, 0x16dea4ab, 0x629549ab, -0x66f6ea97, 0x05d0e85c },
+ { 0x2a1351c6, -0x00155b72, -0x0580ac29, 0x28624754, 0x7582ddf1, 0x0b5ba9e5, -0x596953a7, 0x60c0104b },
+ { -0x21634169, 0x051de020, -0x4af4308c, -0x05f803aa, 0x0f11df65, 0x378cec9f, -0x546921b3, 0x36853c69 }
+ },
+ {
+ { -0x053a1842, 0x4433c0b0, 0x4c08dcbe, 0x724bae85, 0x46978f9b, -0x0e0db33c, 0x62825fc8, 0x4a0aff6d },
+ { 0x78f39b2d, 0x36d9b8de, -0x57b84614, 0x7f42ed71, 0x79bd3fde, 0x241cd1d6, -0x6d043195, 0x6a704fec },
+ { 0x61095301, -0x16e80462, 0x02a092f8, -0x3efd206c, -0x0599e6f5, -0x40f61d0b, -0x1f2301c9, 0x681109be }
+ },
+},
+{
+ {
+ { 0x36048d13, -0x63e70306, 0x73899ddd, 0x29159db3, -0x606d2f56, -0x2360caf5, -0x7875e62c, 0x26f57eee },
+ { 0x782a0dde, 0x559a0cc9, -0x158e7c7b, 0x551dcdb2, 0x31ef238c, 0x7f62865b, 0x7973613d, 0x504aa776 },
+ { 0x5687efb1, 0x0cab2cd5, 0x247af17b, 0x5180d162, 0x4f5a2467, -0x7a3ea5cc, -0x6245cf97, 0x4041943d }
+ },
+ {
+ { -0x5d935523, 0x4b217743, 0x648ab7ce, 0x47a6b424, 0x03fbc9e3, -0x34e2b086, -0x67ff2fe7, 0x12d93142 },
+ { 0x43ebcc96, -0x3c3f1146, 0x26ea9caf, -0x728b6364, 0x1c77ccc6, -0x26056a12, 0x7684340f, 0x1420a1d9 },
+ { -0x2cc8a6b1, 0x00c67799, -0x4dc55b85, 0x5e3c5140, -0x1ca00c6b, 0x44182854, 0x4359a012, 0x1b4f9231 }
+ },
+ {
+ { -0x5b67994f, 0x33cf3030, 0x215f4859, 0x251f73d2, 0x51def4f6, -0x547d55c0, 0x6f9a23f6, 0x5ff191d5 },
+ { -0x76eaf6af, 0x3e5c109d, 0x2de9696a, 0x39cefa91, -0x68a0cfe0, 0x20eae43f, 0x7f132dae, 0x239b572a },
+ { -0x53d26f98, -0x7e612bcd, 0x5fc98523, 0x2883ab79, 0x5593eb3d, -0x10ba8d80, 0x758f36cb, 0x020c526a }
+ },
+ {
+ { -0x0fbd3377, -0x16ce10a7, -0x71edb44a, 0x2c589c9d, -0x5138a669, -0x52371e76, 0x5602c50c, 0x452cfe0a },
+ { -0x61272444, 0x779834f8, -0x23835b94, -0x370d5507, -0x5c1e4f8c, -0x56adb324, 0x15313877, 0x02aacc46 },
+ { 0x647877df, -0x795f0860, 0x0e607c9f, -0x443b9bd9, -0x0e04ee37, -0x54e815db, 0x304b877b, 0x4cfb7d7b }
+ },
+ {
+ { -0x687610ee, -0x1d79663e, -0x20a8e6f3, 0x2b6ecd71, -0x13368f30, -0x3cbc37a9, 0x434d3ac5, 0x5b1d4cbc },
+ { -0x47648a02, 0x72b43d6c, -0x63952380, 0x54c694d9, 0x3ee34c9f, -0x473c55c9, 0x39075364, 0x14b4622b },
+ { -0x33f560da, -0x4904d9eb, -0x4772331b, 0x3a4f0e2b, 0x3369a705, 0x1301498b, 0x58592dd1, 0x2f98f712 }
+ },
+ {
+ { 0x4f54a701, 0x2e12ae44, -0x56342822, -0x0301c110, 0x75835de0, -0x314076f3, -0x189ebaac, 0x1d8062e9 },
+ { -0x4af061aa, 0x0c94a74c, -0x7171ece0, 0x5b1ff4a9, -0x7dcff099, -0x65d533df, -0x27f95507, 0x3a6ae249 },
+ { -0x566f83a6, 0x657ada85, -0x6e46f09e, 0x1a0ea8b5, -0x20cb4b17, -0x72f1e205, -0x510da00d, 0x298b8ce8 }
+ },
+ {
+ { 0x0a2165de, -0x7c858d16, 0x0bcf79f6, 0x3fab07b4, 0x7738ae70, 0x521636c7, 0x03a7d7dc, 0x6ba62718 },
+ { -0x1008f34e, 0x2a927953, 0x79157076, 0x4b89c92a, 0x30a7cf6a, -0x6be7ba86, 0x4d5ce485, 0x34b8a840 },
+ { -0x7c96cccb, -0x3d91134b, 0x63b5fefd, -0x2a57ec21, -0x5b4dda8d, -0x5d6c5566, 0x465e1c6a, 0x71d62bdd }
+ },
+ {
+ { -0x4e08a10b, -0x32d24a26, 0x16b065f5, -0x28806a31, 0x3f49f085, 0x14571fea, 0x262b2b3d, 0x1c333621 },
+ { -0x2c872080, 0x6533cc28, 0x0a0fa4b4, -0x0924bc87, -0x08fe25a6, -0x1c9ba007, -0x0ce8d45c, 0x74d5f317 },
+ { 0x67d9ca81, -0x57901aac, 0x2b298c37, 0x398b7c75, -0x1c539dc5, -0x2592f76e, 0x47e9d98c, 0x4aebcc45 }
+ },
+},
+{
+ {
+ { -0x5fa65bbb, 0x0de9b204, 0x4b17ad0f, -0x1ea34b56, 0x1f79c557, -0x1e4413ae, -0x2f8ef7e5, 0x2633f1b9 },
+ { 0x05d21a77, 0x53175a72, -0x2c46cb2c, -0x4f3fbbde, -0x22a21524, -0x52260db5, -0x60ef0074, 0x074f46e6 },
+ { 0x018b9910, -0x3e04be89, 0x6c0fe140, -0x5915df24, 0x4354c6ff, -0x299e0c19, -0x0e5cbf86, 0x5ecb72e6 }
+ },
+ {
+ { -0x17179669, -0x01151efa, -0x672f6c7d, -0x679ccc81, -0x55f91411, -0x6b8fb7f2, -0x2b3a3d30, 0x038b6898 },
+ { 0x2259fb4e, -0x5aea5ce5, 0x2bcac52f, 0x0960f397, -0x72cbab35, -0x124ad014, -0x3b893fe7, 0x382e2720 },
+ { -0x7531af5a, -0x0c6e3ae3, -0x51d2d6b8, 0x3142d0b9, 0x7f24ca80, -0x24b2a5e6, 0x59250ea8, 0x21aeba8b }
+ },
+ {
+ { -0x0ff780dd, 0x53853600, -0x2582a87c, 0x4c461879, -0x4be097a0, 0x6af303de, -0x3d83e713, 0x0a3c16c5 },
+ { -0x30bfaad0, 0x24f13b34, 0x43088af7, 0x3c44ea4a, 0x0006a482, 0x5dd5c517, -0x76f4f793, 0x118eb8f8 },
+ { -0x336b80c3, 0x17e49c17, -0x553e2d85, -0x3339125a, -0x4f0f71aa, -0x209f6d32, 0x2c67c36b, 0x4909b3e2 }
+ },
+ {
+ { 0x706ff64e, 0x59a16676, 0x0d86a53d, 0x10b953dd, -0x31a3f46a, 0x5848e1e6, 0x12780c68, 0x2d8b78e7 },
+ { 0x63fe2e89, -0x63637a16, 0x0e9412ec, -0x41e4506f, -0x79040185, -0x70845576, -0x10697494, 0x0fb17f9f },
+ { -0x503c6fd5, 0x79d5c62e, -0x7617f8d8, 0x773a2152, -0x1efedf47, -0x3c7519c0, 0x7b2b1a6d, 0x09ae2371 }
+ },
+ {
+ { -0x52cd4e30, 0x10ab8fa1, -0x1d8874dc, -0x165312e5, 0x373de90f, -0x577a9440, -0x225ac66a, 0x66f35ddd },
+ { 0x4e4d083c, -0x4495e6d6, 0x0029e192, 0x34ace063, -0x55054515, -0x67dba5a7, -0x25680554, 0x6d9c8a9a },
+ { 0x24997323, -0x2d826505, -0x090fe2d2, 0x1bb7e07e, -0x0ad13381, 0x2ba7472d, 0x646f9dc8, 0x03019b4f }
+ },
+ {
+ { -0x194c2395, -0x50f64dec, -0x5282d09b, 0x3f7573b5, 0x100a23b0, -0x2fe62678, -0x74a3ca09, 0x392b63a5 },
+ { 0x565345cd, 0x04a186b5, -0x433bee96, -0x111899f0, 0x78fb2a45, 0x689c73b4, 0x65697512, 0x387dcbff },
+ { -0x63f83dfb, 0x4093addc, -0x0acd3c82, -0x3a9a41eb, 0x1583402a, 0x63dbecfd, -0x10d1fcd2, 0x61722b4a }
+ },
+ {
+ { -0x7e34f1c4, -0x294f85ab, -0x26bbb697, 0x290ff006, 0x16dcda1f, 0x08680b6a, 0x5a06de59, 0x5568d2b7 },
+ { -0x1342b851, 0x0012aafe, 0x1cd46309, 0x55a266fb, 0x0967c72c, -0x0dfc1498, -0x35c3ebd7, 0x39633944 },
+ { 0x1b37cfe1, -0x72f34774, 0x053818f3, 0x05b6a5a3, -0x487826a7, -0x0d1643fc, -0x6522809c, 0x6beba124 }
+ },
+ {
+ { 0x43f5a53b, 0x5c3cecb9, 0x06c08df2, -0x633659e3, -0x7a76abb9, -0x30459c66, 0x0df09fd5, 0x5a845ae8 },
+ { -0x5a4e4ebd, 0x1d06005c, 0x7fd1cda2, 0x6d4c6bb8, 0x53fcffe7, 0x6ef59676, -0x3e31e15b, 0x097c29e8 },
+ { 0x5deb94ca, 0x4ce97dbe, -0x738f63b8, 0x38d0a438, -0x5e962f69, -0x3bc1312c, -0x081a783d, 0x0a1249ff }
+ },
+},
+{
+ {
+ { 0x7354b610, 0x0b408d9e, 0x5ba85b6e, -0x7f94cdad, 0x4a58a207, -0x2419c5fd, -0x365e20d4, 0x173bd9dd },
+ { 0x276d01c9, 0x12f0071b, -0x793b7390, -0x1847453b, 0x71d6fba9, 0x5308129b, 0x5a3db792, 0x5d88fbf9 },
+ { -0x01a78d21, 0x2b500f1e, -0x2bc6e73f, 0x58d6582e, -0x3698c520, -0x1912d872, -0x4e615ce7, 0x06e1cd13 }
+ },
+ {
+ { -0x61a4fcad, 0x472baf62, 0x278d0447, 0x3baa0b90, -0x69bc40d9, 0x0c785f46, -0x727c84ed, 0x7f3a6a1a },
+ { 0x6f166f23, 0x40d0ad51, 0x1fab6abe, 0x118e3293, -0x5fb2f772, 0x3fe35e14, 0x26e16266, 0x30806035 },
+ { 0x5d3d800b, -0x0819bbc7, -0x36fe120a, -0x6a572aab, 0x592c6339, 0x68cd7830, 0x2e51307e, 0x30d0fded }
+ },
+ {
+ { 0x68b84750, -0x634b68e2, 0x6664bbcf, -0x5f6a8dd7, 0x72fa412b, 0x5c8de726, 0x51c589d9, 0x46150843 },
+ { -0x0dedcc4d, -0x1fa6b2e6, -0x0f33b264, 0x1bdbe78e, -0x70b66589, 0x6965187f, 0x2c099868, 0x0a921420 },
+ { -0x51465fd2, -0x436fe640, 0x16034cae, 0x55c7110d, 0x659932ec, 0x0e6df501, -0x6a35a202, 0x3bca0d28 }
+ },
+ {
+ { -0x6133fe41, -0x6397714a, -0x59bb7691, -0x0f437c53, 0x5f7a9fe2, -0x35d26aa1, -0x720d7dbf, 0x4ea8b403 },
+ { 0x3c5d62a4, 0x40f031bc, -0x300f85a0, 0x19fc8b3e, 0x130fb545, -0x67e7c25e, -0x5170ec33, 0x5631dedd },
+ { -0x0e352dfe, 0x2aed460a, -0x5b73117d, 0x46305305, 0x49f11a5f, -0x6ede88bb, 0x542ca463, 0x24ce0930 }
+ },
+ {
+ { -0x020cf47b, 0x3fcfa155, 0x36372ea4, -0x2d08e972, 0x6492f844, -0x4d1f9b22, 0x324f4280, 0x549928a7 },
+ { -0x02f93efa, 0x1fe890f5, 0x5d8810f2, -0x4a3b97cb, 0x6e8caf3e, -0x7d87f702, -0x75f928b5, 0x41d4e3c2 },
+ { 0x63ee1a2e, -0x0d91cd59, -0x2da00216, -0x516e1b49, -0x2e80b297, -0x43c42cc5, -0x3f230096, 0x491b66de }
+ },
+ {
+ { -0x2f259b5f, 0x75f04a8e, 0x67e2284b, -0x12ddd351, 0x1f7b7ba4, -0x7dcb5c87, -0x48fe7499, 0x4cf6b8b0 },
+ { -0x3815cd59, -0x670a4ec3, 0x7e16db98, -0x1c2a0734, -0x340726b9, -0x53f540ae, -0x37a11b54, 0x08f338d0 },
+ { -0x66e58c43, -0x3c7c57df, -0x20cdf386, -0x54d843ff, -0x7b888f9d, -0x3ec2cce5, -0x14f87567, 0x530d4a82 }
+ },
+ {
+ { 0x6c9abf9e, 0x6d697345, 0x4900a880, 0x257fb2fc, -0x373047b0, 0x2bacf412, 0x0cbfbd5b, 0x0db3e7e0 },
+ { -0x1e06b7db, 0x004c3630, -0x7354aca6, 0x7e2d7826, -0x337b0075, -0x38b7dcdd, 0x101770b9, 0x65ea753f },
+ { -0x1df69c9d, 0x3d66fc3e, 0x61b5cb6b, -0x7e29d381, 0x13443b1a, 0x0fbe0442, 0x21e1a1db, 0x02a4ec19 }
+ },
+ {
+ { -0x0e3086a1, -0x0a379e9e, 0x26ee57f2, 0x118c8619, 0x1c063578, 0x17212485, -0x13f98031, 0x36d12b5d },
+ { 0x3b24b8a2, 0x5ce6259a, 0x45afa0b8, -0x47a88534, -0x745f8fc9, -0x33341918, 0x127809bf, 0x3d143c51 },
+ { 0x79154557, 0x126d2791, -0x0387c5f6, -0x2a1b70a4, -0x20e86454, 0x36bdb6e8, 0x5ba82859, 0x2ef51788 }
+ },
+},
+{
+ {
+ { 0x7c6da1e9, 0x1ea43683, 0x1fb9bdbe, -0x063e7651, -0x31a22eab, 0x303001fc, -0x43a841ae, 0x28a7c99e },
+ { -0x2ee1f2b6, -0x7742bc74, 0x43ccf308, 0x30cb610d, -0x6e6c8434, -0x1f65f1c9, 0x25b1720c, 0x4559135b },
+ { -0x172e6163, -0x47026c67, -0x69dbdc01, -0x6f7e6e35, 0x47c742a3, -0x4d46b729, -0x2804bb3c, 0x37f33226 }
+ },
+ {
+ { -0x37de4ee3, 0x33912553, 0x41e301df, 0x66ed42c2, 0x104222fd, 0x066fcc11, -0x3e6de971, 0x307a3b41 },
+ { -0x4aa091f8, 0x0dae8767, 0x5b203a02, 0x4a43b3b3, -0x7f507387, -0x1c8da592, 0x705fa7a3, 0x0f7a7fd1 },
+ { 0x6eb55ce0, -0x7114a2f9, -0x55f26da6, 0x2fc536bf, -0x23493918, -0x417e7cf1, -0x7d8450ae, 0x556c7045 }
+ },
+ {
+ { 0x2bf44406, -0x46b46ffe, -0x006f4acc, -0x542bdc82, -0x050792c6, 0x7600a960, -0x3dcdd11d, 0x2f45abda },
+ { 0x02e9d8b7, -0x71d4ae8d, 0x248714e8, -0x1c1add97, 0x4ca960b5, -0x42b04289, -0x3a135257, 0x6f4b4199 },
+ { -0x37107596, 0x61af4912, 0x43fb6e5e, -0x1a705b02, 0x6fd427cf, -0x4a5033a3, 0x1e1e11eb, 0x6a539328 }
+ },
+ {
+ { 0x149443cf, 0x0fff04fe, -0x79a32229, 0x53cac6d9, 0x531ed1b7, 0x31385b03, -0x532efc63, 0x5846a27c },
+ { -0x5a2e1177, -0x0c25aec7, -0x006c9678, -0x7ebaba84, 0x00e188c4, 0x3f622fed, -0x2474a5c3, 0x0f513815 },
+ { 0x1eb08717, 0x4ff5cdac, -0x6f0d1644, 0x67e8b295, 0x237afa99, 0x44093b5e, -0x78f7474e, 0x0d414bed }
+ },
+ {
+ { 0x294ac9e8, -0x7e77956e, -0x2aaab842, 0x23162b45, 0x03715983, -0x6b3043bc, 0x134bc401, 0x50eb8fdb },
+ { -0x02f18a0a, -0x30497d9b, -0x446f18f9, -0x1ba4c1d8, -0x6006d386, 0x7242a8de, -0x6ccdfd23, 0x685b3201 },
+ { -0x294ccf33, -0x3f48c13a, 0x132faff1, -0x7b1bb7f9, -0x3b5a211f, 0x732b7352, -0x55832d2e, 0x5d7c7cf1 }
+ },
+ {
+ { -0x648c5a9e, 0x33d1013e, 0x48ec26e1, -0x6da310a9, -0x22b97fa8, -0x580319ec, 0x1e9aa438, 0x78b0fad4 },
+ { 0x7a4aafa2, -0x50c4b941, 0x4d40d411, -0x4878fa14, -0x3583ea1d, 0x114f0c6a, -0x56b762b3, 0x3f364faa },
+ { -0x12fa4b78, -0x40a95bcf, -0x63b6a382, -0x5acc1994, -0x780c9ae6, -0x179ad451, 0x59d66c33, 0x02418000 }
+ },
+ {
+ { -0x30c715ff, 0x28350c7d, -0x4d6e854a, 0x7c6cdbc0, -0x7a8f7d09, -0x53183042, -0x5d265e20, 0x4d2845ab },
+ { -0x5c85a41c, -0x314f8802, -0x1a5a1149, -0x249bd0fe, 0x471270b8, -0x3d192f3b, 0x38e4529c, 0x4771b655 },
+ { 0x447070de, -0x44ac8020, 0x6dd557df, -0x3458bbbd, 0x3600dbcb, -0x2c4a5cb9, -0x06002808, 0x4aeabbe6 }
+ },
+ {
+ { -0x3b56370e, 0x6a2134bc, -0x7531d1c9, -0x040702e4, -0x66ee5f46, 0x000ae304, 0x6bc89b9e, 0x046e3a61 },
+ { 0x40d8f78c, 0x4630119e, 0x3c710e11, -0x5fe5643b, -0x76ef2287, 0x486d2b25, -0x24fcdb1b, 0x1e6c47b3 },
+ { -0x0fc6f942, 0x14e65442, -0x1c9d41d6, 0x4a019d54, -0x723dcf39, 0x68ccdfec, -0x509479e4, 0x7cfb7e3f }
+ },
+},
+{
+ {
+ { 0x305b2f51, -0x69114005, -0x776a6948, -0x2c06c753, 0x46d5dd25, -0x0f0ad239, -0x44c5ff6b, 0x57968290 },
+ { -0x73a75124, 0x4637974e, -0x540fbe5c, -0x4610dd05, -0x167f8e76, -0x1e7a26aa, -0x4ebc575a, 0x2f1b78fa },
+ { 0x0a20e101, -0x08e547bd, 0x24f0ec47, -0x0c6c9a73, 0x6ee2eed1, -0x308af658, -0x23d55c1f, 0x7dc43e35 }
+ },
+ {
+ { 0x273e9718, 0x5a782a5c, 0x5e4efd94, 0x3576c699, 0x1f237d3e, 0x0f2ed805, -0x7d2af567, 0x044fb81d },
+ { -0x7782263d, -0x7a69999b, 0x4bb05355, -0x36f064cf, -0x10df864f, -0x391f7208, 0x758cc12f, 0x7ef72016 },
+ { -0x56f81c27, -0x3e20e73b, -0x31b39ca7, 0x57b3371d, -0x4dfe44b7, -0x358fbacc, -0x63cf22d2, 0x7f79823f }
+ },
+ {
+ { 0x68f587ba, 0x6a9c1ff0, 0x0050c8de, 0x0827894e, 0x7ded5be7, 0x3cbf9955, 0x1c06d6f0, 0x64a9b043 },
+ { -0x5c4aec18, -0x7ccb2dc7, -0x46e05728, -0x3ec98f2c, -0x0a6f42cd, 0x12b54136, -0x287b264c, 0x0a4e0373 },
+ { 0x5b7d2919, 0x2eb3d6a1, -0x2ac57dcb, -0x4f4b0960, -0x765ba2b9, 0x7156ce43, -0x31e7cb94, 0x071a7d0a }
+ },
+ {
+ { 0x20e14431, -0x33f3caae, 0x09b15141, 0x0d659507, 0x209d5f36, -0x650a9de5, 0x617755d3, 0x7c69bcf7 },
+ { -0x377845f5, -0x2cf8d256, -0x405a9d12, 0x01262905, -0x3f108975, -0x30abcffe, 0x46ea7e9c, 0x2c3bcc71 },
+ { 0x04e8295f, 0x07f0d7eb, 0x2f50f37d, 0x10db1825, 0x171798d7, -0x16ae565d, 0x22aca51d, 0x6f5a9a73 }
+ },
+ {
+ { -0x5c26bb42, -0x18d62b15, -0x7f875062, -0x7261f6c0, 0x47869c03, 0x4525567a, -0x1172c4dc, 0x02ab9680 },
+ { 0x2f41c6c5, -0x745efff4, 0x0cfefb9b, -0x3b60863f, 0x3cc51c9f, 0x4efa4770, -0x1eb85036, 0x494e21a2 },
+ { -0x221af266, -0x105b757b, 0x0fb9a249, 0x219a224e, -0x26e10927, -0x05f6e0e3, -0x15b944cc, 0x6b5d76cb }
+ },
+ {
+ { 0x1e782522, -0x1f06bee9, 0x036936d3, -0x0e19518c, -0x2f0338ba, 0x408b3ea2, 0x03dd313e, 0x16fb869c },
+ { -0x13f3266c, -0x77a8aa94, 0x5cd01dba, 0x6472dc6f, -0x70bd4b89, -0x50fe96ec, -0x7ad88cac, 0x0ae333f6 },
+ { 0x33b60962, 0x288e1997, -0x27541ecd, 0x24fc72b4, 0x0991d03e, 0x4811f7ed, -0x708f2f8b, 0x3f81e38b }
+ },
+ {
+ { 0x5f17c824, 0x0adb7f35, -0x28bd665c, 0x74b923c3, -0x34071509, -0x2a83c175, 0x4cdedc3d, 0x0ad3e2d3 },
+ { 0x7ed9affe, 0x7f910fcc, 0x2465874b, 0x545cb8a1, 0x4b0c4704, -0x57c6812e, 0x04f50993, 0x50510fc1 },
+ { 0x336e249d, 0x6f0c0fc5, -0x3cce3027, 0x745ede19, 0x09eefe1c, -0x0d290300, -0x0f05e142, 0x127c158b }
+ },
+ {
+ { -0x51ae468c, -0x215d703c, 0x744dfe96, 0x1d9973d3, -0x78c7b758, 0x6240680b, -0x2e98206b, 0x4ed82479 },
+ { 0x2e9879a2, -0x09e683be, 0x52ca3647, -0x5bb5222c, 0x4b4eaccb, -0x64bec03f, 0x07ef4f68, 0x354ef87d },
+ { 0x60c5d975, -0x011c4ade, -0x14be4f48, 0x50352efc, -0x56099ac4, -0x77f753d0, 0x0539236d, 0x302d92d2 }
+ },
+},
+{
+ {
+ { 0x0df53c30, -0x6a847475, -0x719f0f68, 0x2a1c770a, 0x345796de, -0x44385990, -0x6f366437, 0x22a48f9a },
+ { -0x34c10484, 0x4c59023f, -0x39c3d56c, 0x6c2fcb99, -0x3c381f7c, -0x45be6f1e, -0x5ae78b27, 0x0e545dae },
+ { -0x72c053a8, 0x6b7dc0dc, -0x191bd403, 0x5497cd6c, -0x0bff2cfb, 0x542f7d1b, 0x048d9136, 0x4159f47f }
+ },
+ {
+ { -0x442db7c7, 0x748515a8, -0x504fd4ab, 0x77128347, 0x49a2a17f, 0x50ba2ac6, 0x3ad730f1, 0x06052551 },
+ { 0x39e31e32, 0x20ad6608, -0x7bfa41b0, -0x07e1e42b, -0x0b254397, -0x07f9bfaa, -0x318e468b, 0x14d23dd4 },
+ { -0x755d807e, -0x0dc671f7, -0x765e4fdc, 0x6d7982bb, 0x214dd24c, -0x0596bf7c, -0x5cdcfe3d, 0x71ab966f }
+ },
+ {
+ { 0x02809955, -0x4ef775f9, 0x0b43c391, 0x43b273ea, -0x01f97913, -0x35649852, -0x7cca0b13, 0x605eecbf },
+ { 0x4ded02fc, 0x2dcbd8e3, 0x596f22aa, 0x1151f3ec, 0x4e0328da, -0x435daabd, -0x6dbee4de, 0x35768fbe },
+ { 0x6c340431, -0x7cdff59b, -0x711a63d1, -0x60328e99, 0x71300f8a, 0x75d4613f, 0x60f542f9, 0x7a912faf }
+ },
+ {
+ { -0x05d2aa69, 0x253f4f8d, 0x5477130c, 0x25e49c40, -0x6694eefe, 0x00c052e5, 0x33bb6c4a, 0x33cb966e },
+ { 0x5edc1a43, -0x4dfba7a2, 0x5897c73c, -0x60f1e912, 0x4e70483c, 0x5b82c0ae, 0x2bddf9be, 0x624a170e },
+ { 0x7f116909, 0x59702804, 0x1e564467, -0x7d753be4, -0x19de8c79, 0x70417dbd, -0x0453bc7c, 0x721627ae }
+ },
+ {
+ { 0x410b2f22, -0x02cf6844, -0x4a3057bc, -0x0e5fa259, -0x10a8358c, 0x61289a1d, -0x447de6fe, 0x245ea199 },
+ { -0x78c9522b, -0x682fc43d, -0x3acd4ed0, 0x2f1422af, 0x7101bbc4, 0x3aa68a05, -0x18b06059, 0x4c946cf7 },
+ { 0x78d477f8, -0x51235997, 0x29117fe1, 0x1898ba3c, 0x720cbd58, -0x308c067d, -0x474a9caf, 0x67da12e6 }
+ },
+ {
+ { -0x7137cf74, 0x2b7ef3d3, 0x71eb94ab, -0x7d702814, -0x3af9d543, -0x7f83c4ca, 0x31a94141, 0x0cb64cb8 },
+ { -0x4b4291f9, 0x7067e187, -0x382e018c, 0x6e8f0203, 0x38c85a30, -0x6c3955d1, 0x3d75a78a, 0x76297d1f },
+ { 0x534c6378, 0x3030fc33, -0x1abe179f, -0x469ca3a4, -0x264d38d8, 0x15d9a9be, -0x0c88a235, 0x49233ea3 }
+ },
+ {
+ { 0x1c9f249b, 0x7b3985fe, -0x5edccd6d, 0x4fd6b2d5, 0x1adf4d62, -0x314cba6c, 0x542de50c, 0x6987ff6f },
+ { -0x724003c6, 0x629398fa, -0x2ab24bab, -0x1ed01ad3, -0x250dad6b, -0x0c41ee21, -0x31a184af, 0x628b140d },
+ { -0x707c8ac4, 0x47e24142, -0x79950669, 0x6317bebc, 0x3d1a9829, -0x2544a4bd, 0x5287fb2d, 0x074d8d24 }
+ },
+ {
+ { -0x3f1ceb78, 0x481875c6, -0x1ddfcb4c, 0x219429b2, 0x31283b65, 0x7223c98a, 0x342277f9, 0x3420d60b },
+ { 0x440bfc31, -0x7cc82633, -0x50ce7029, 0x729d2ca1, 0x772c2070, -0x5fbf5b5c, 0x3a7349be, 0x46002ef0 },
+ { -0x50019a09, -0x055dc522, 0x5be0764c, 0x78261ed4, 0x2f164403, 0x441c0a1e, 0x7a87d395, 0x5aea8e56 }
+ },
+},
+{
+ {
+ { -0x1b1f0e89, 0x2dbc6fb6, -0x5b42956d, 0x04e1bf29, 0x787af6e8, 0x5e1966d4, -0x4bd92fa0, 0x0edc5f5e },
+ { -0x435bd7c3, 0x7813c1a2, -0x5e79c227, -0x129d0f6f, -0x3d97057a, -0x51384348, 0x6f1cae4c, 0x10e5d3b7 },
+ { 0x53da8e67, 0x5453bfd6, 0x24a9f641, -0x1623e114, 0x03578a23, -0x4078d9c5, 0x361cba72, 0x45b46c51 }
+ },
+ {
+ { -0x75801c1c, -0x3162b223, 0x76620e30, -0x54ec9baa, -0x4cf166a8, 0x4b594f7b, 0x321229df, 0x5c1c0aef },
+ { 0x314f7fa1, -0x56bfd541, -0x71730bb0, -0x1da80e24, 0x23a8be84, 0x1dbbd54b, 0x6dcb713b, 0x2177bfa3 },
+ { -0x05862471, 0x37081bbc, -0x3da0a64d, 0x6048811e, -0x637cdb79, 0x087a7665, 0x7d8ab5bb, 0x4ae61938 }
+ },
+ {
+ { -0x67a4047d, 0x61117e44, 0x71963136, -0x031fb9d6, -0x2bda6fb5, -0x7c53cbb8, 0x5ba43d64, 0x75685abe },
+ { 0x5344a32e, -0x72240956, -0x4be4bf88, 0x7d88eab4, 0x4a130d60, 0x5eb0eb97, 0x17bf3e03, 0x1a00d91b },
+ { -0x149e0d4e, 0x6e960933, -0x3600b6ae, 0x543d0fa8, 0x7af66569, -0x208d8af0, 0x23b0e6aa, 0x135529b6 }
+ },
+ {
+ { -0x1dd17c02, -0x0a38e944, -0x17f67a3f, -0x4bd414e7, 0x14254aae, -0x136259c9, 0x1590a613, 0x5972ea05 },
+ { -0x522e2ae8, 0x18f0dbd7, -0x303ee0ef, -0x68608778, 0x7114759b, -0x78cd1e10, 0x65ca3a01, 0x79b5b81a },
+ { -0x237087ef, 0x0fd4ac20, -0x53b2b058, -0x65652d6c, -0x4cc9fbcc, -0x3fe4d29c, -0x6fa0c425, 0x4f7e9c95 }
+ },
+ {
+ { 0x355299fe, 0x71c8443d, -0x24141529, -0x7432c4e4, -0x0e5b6b9a, -0x7f6db662, -0x5ebb5238, 0x1942eec4 },
+ { 0x5781302e, 0x62674bbc, -0x765223f1, -0x27adf0c7, 0x53fbd9c6, -0x73d66652, 0x2e638e4c, 0x31993ad9 },
+ { -0x51dcb66e, 0x7dac5319, 0x0cea3e92, 0x2c1b3d91, 0x253c1122, 0x553ce494, 0x4ef9ca75, 0x2a0a6531 }
+ },
+ {
+ { 0x3c1c793a, -0x30c9e533, 0x5a35bc3b, 0x2f9ebcac, -0x57325955, 0x60e860e9, 0x6dea1a13, 0x055dc39b },
+ { -0x0806d83e, 0x2db7937f, 0x17d0a635, -0x248be0fa, 0x1155af76, 0x5982f3a2, 0x647c2ded, 0x4cf6e218 },
+ { -0x3d72a44a, -0x4ee6dd84, 0x774dffab, 0x07e24ebc, -0x1b5cd377, -0x57c38732, 0x10aa24b6, 0x121a3077 }
+ },
+ {
+ { -0x388b7c37, -0x29a68ec2, -0x47d46951, -0x77401f89, 0x1097bcd3, 0x289e2823, 0x6ced3a9b, 0x527bb94a },
+ { -0x60fcb569, -0x1b24a2a2, 0x3034bc2d, -0x1eac03f7, -0x6aae2c4f, 0x46054691, 0x7a40e52d, 0x333fc76c },
+ { -0x66a4b7d2, 0x563d992a, 0x6e383801, 0x3405d07c, 0x2f64d8e5, 0x485035de, 0x20a7a9f7, 0x6b89069b }
+ },
+ {
+ { -0x4a382489, 0x4082fa8c, -0x38cb3eab, 0x068686f8, -0x09185a82, 0x29e6c8d9, -0x589c6431, 0x0473d308 },
+ { 0x6270220d, -0x7ed55fbf, -0x06dba4b2, -0x66a57606, 0x5072ef05, -0x00523b32, -0x558c148d, 0x23bc2103 },
+ { 0x03589e05, -0x351186da, 0x46dcc492, 0x2b4b4212, -0x19fe56b1, 0x02a1ef74, -0x21fbcbe6, 0x102f73bf }
+ },
+},
+{
+ {
+ { -0x6c5c9db9, 0x358ecba2, -0x4d97029b, -0x5070679e, 0x68a01c89, 0x412f7e99, -0x328abadc, 0x5786f312 },
+ { 0x7ec20d3e, -0x4a5d2af4, -0x5f368d9d, -0x39b42292, -0x3e008cb3, 0x56e89052, 0x2b2ffaba, 0x4929c6f7 },
+ { -0x35ebfcd4, 0x337788ff, 0x447f1ee3, -0x0c6defd8, 0x231bccad, -0x74ebf8e1, -0x0dcbb87d, 0x4c817b4b }
+ },
+ {
+ { -0x5bf4bb7c, 0x413ba057, 0x4f5f6a43, -0x45b3d1e6, -0x511e29e4, 0x614ba0a5, -0x74fa23ad, 0x78a1531a },
+ { 0x2871b96e, 0x0ff85385, 0x60c3f1bb, -0x1ec16055, 0x25344402, -0x1102a6ad, 0x75b7744b, 0x0a37c370 },
+ { 0x3ad0562b, 0x6cbdf170, -0x36dade5d, -0x7130b7d0, -0x027bdb19, -0x25142cfd, 0x2e5ec56f, 0x72ad82a4 }
+ },
+ {
+ { 0x67024bc3, -0x3c976c6f, 0x49502fda, -0x71962e93, -0x1ba0b4d7, -0x030d13c4, -0x5c4b343c, 0x065f669e },
+ { -0x45049a0a, 0x3f9e8e35, -0x0d8d6c5f, 0x39d69ec8, -0x73095c30, 0x6cb8cd95, 0x73adae6d, 0x17347781 },
+ { 0x5532db4d, -0x75ff5139, 0x43e31bb1, -0x47965b1c, -0x2c580aeb, 0x4a0f8552, 0x303d7c08, 0x19adeb7c }
+ },
+ {
+ { 0x43c31794, -0x62fa4583, -0x6ccddada, 0x2470c8ff, 0x16197438, -0x7cdc2138, -0x7ea964ad, 0x28527098 },
+ { 0x53ead9a3, -0x38df349f, 0x512b636e, 0x55b2c97f, -0x2bfd6f4f, -0x4e1ca4a1, 0x3b530ee2, 0x2fd9ccf1 },
+ { 0x47f796b8, 0x07bd475b, 0x542c8f54, -0x2d384fed, 0x3b24f87e, 0x2dbd23f4, 0x7b0901d6, 0x6551afd7 }
+ },
+ {
+ { -0x5e2a3654, 0x68a24ce3, 0x10ff6461, -0x44885cc3, 0x25d3166e, 0x0f86ce44, 0x50b9623b, 0x56507c09 },
+ { 0x54aac27f, 0x4546baaf, -0x4d5ba5d8, -0x09099014, 0x562bcfe8, 0x582d1b5b, -0x6df087a1, 0x44b123f3 },
+ { -0x2e8ec19d, 0x1206f0b7, 0x15bafc74, 0x353fe3d9, 0x0ad9d94d, 0x194ceb97, -0x062fc52d, 0x62fadd7c }
+ },
+ {
+ { -0x1831ba6c, 0x3cd7bc61, -0x4822d982, -0x3294ca57, 0x4366ef27, -0x5f7f5438, 0x59c79711, 0x6ec7c46f },
+ { 0x5598a074, -0x394a6985, -0x71b6c1db, 0x5efe91ce, 0x49280888, -0x2b48d3bb, -0x5d98bf3e, 0x20ef1149 },
+ { 0x6f09a8a2, 0x2f07ad63, 0x24205e7d, -0x79681932, -0x11ca5ec7, -0x3f5103fb, -0x4a062769, 0x15e80958 }
+ },
+ {
+ { 0x5bb061c4, 0x4dd1ed35, -0x6be3f900, 0x42dc0cef, -0x0279cbf2, 0x61305dc1, 0x0e55a443, 0x56b2cc93 },
+ { 0x0c3e235b, 0x25a5ef7d, -0x41ecb119, 0x6c39c17f, 0x2dc5c327, -0x388b1ecc, -0x6dfde0c7, 0x021354b8 },
+ { -0x59403a5e, 0x1df79da6, -0x6021bc97, 0x02f3a274, -0x325c6f59, -0x4cdc260e, -0x788b2c9d, 0x7be0847b }
+ },
+ {
+ { 0x5307fa11, 0x1466f5af, -0x1293f50e, -0x7e803383, -0x3c5b5c05, 0x0a6de44e, -0x436d82f5, 0x74071475 },
+ { -0x74c0aa3d, -0x736633a6, 0x3fded2a0, 0x0611d725, 0x36b70a36, -0x12d66a01, -0x2875d9e7, 0x1f699a54 },
+ { 0x73e7ea8a, -0x188d6d0d, -0x34fba5cf, 0x296537d2, -0x2cd8b022, 0x1bd0653e, 0x76bd2966, 0x2f9a2c44 }
+ },
+},
+{
+ {
+ { -0x4aaee366, -0x5d4b2520, 0x2bffff06, 0x7ac86029, -0x0aafbdcc, -0x67e0c8a3, -0x25b15ed3, 0x3f6bd725 },
+ { 0x7f5745c6, -0x14e74655, 0x5787c690, 0x023a8aee, 0x2df7afa9, -0x48d8ed26, -0x15a3fec3, 0x36597d25 },
+ { 0x106058ac, 0x734d8d7b, 0x6fc6905f, -0x26bfa862, -0x6dfd6cd3, 0x6466f8f9, -0x259f2930, 0x7b7ecc19 }
+ },
+ {
+ { -0x58830565, 0x6dae4a51, -0x185c79b0, -0x7dd9c9ac, -0x70d27d25, 0x09bbffcd, 0x1bf5caba, 0x03bedc66 },
+ { 0x695c690d, 0x78c2373c, 0x0642906e, -0x22dad19a, 0x4ae12bd2, -0x6ae2bbbc, 0x01743956, 0x4235ad76 },
+ { 0x078975f5, 0x6258cb0d, -0x6e760d68, 0x49294254, -0x1d1c911c, -0x5f354bdd, -0x320f995f, 0x0e7ce2b0 }
+ },
+ {
+ { -0x26b48f07, -0x01590121, -0x3e0345d3, -0x0ecf3faf, 0x7f2fab89, 0x4882d47e, -0x7513114b, 0x61525613 },
+ { -0x3b737a5d, -0x3b6b9bc6, 0x3c6139ad, -0x02c9e20c, 0x3ae94d48, 0x09db17dd, -0x704b98b6, 0x666e0a5d },
+ { 0x4870cb0d, 0x2abbf64e, -0x55ba7495, -0x329a4310, 0x75e8985d, -0x6541b146, -0x2aeb211c, 0x7f0bc810 }
+ },
+ {
+ { 0x737213a0, -0x7c536253, 0x2ef72e98, -0x60090746, 0x43ec6957, 0x311e2edd, -0x213a548b, 0x1d3a907d },
+ { 0x26f4136f, -0x46ff945c, 0x57e03035, -0x7298c962, 0x4f463c28, -0x34372027, -0x0711240b, 0x0d1f8dbc },
+ { 0x3ed081dc, -0x45e96ccf, -0x7ae4cb80, 0x29329fad, 0x030321cb, 0x0128013c, -0x5ce4021d, 0x00011b44 }
+ },
+ {
+ { 0x6a0aa75c, 0x16561f69, 0x5852bd6a, -0x3e408da4, -0x65869953, 0x11a8dd7f, -0x2d7aefda, 0x63d988a2 },
+ { 0x3fc66c0c, 0x3fdfa06c, 0x4dd60dd2, 0x5d40e38e, 0x268e4d71, 0x7ae38b38, 0x6e8357e1, 0x3ac48d91 },
+ { -0x5042dcd2, 0x00120753, -0x0227097d, -0x16d43148, -0x7b18d46f, -0x07e9964d, 0x2368a066, 0x33fad52b }
+ },
+ {
+ { -0x3bdd3018, -0x72d33730, 0x05a13acb, 0x072b4f7b, -0x13095a91, -0x5c01491a, -0x46f58e1e, 0x3cc355cc },
+ { -0x3a1be1ea, 0x540649c6, 0x333f7735, 0x0af86430, -0x0cfa18ba, -0x4d53032e, -0x5da92359, 0x16c0f429 },
+ { -0x6fc16ecf, -0x16496bbd, 0x7a5637ce, -0x475b6b35, -0x45456dbc, -0x37832e5c, 0x6bae7568, 0x631eaf42 }
+ },
+ {
+ { -0x5c8ff218, 0x47d975b9, -0x1d07faae, 0x7280c5fb, 0x32e45de1, 0x53658f27, 0x665f80b5, 0x431f2c7f },
+ { -0x25990161, -0x4c16fbf0, 0x6c16e5a6, -0x7a22b4ae, 0x1ef9bf83, -0x43c2689f, 0x1ea919b5, 0x5599648b },
+ { -0x7a7084e7, -0x29fd9cbc, -0x5e15aeb6, 0x14ab352f, 0x2090a9d7, -0x76ffbbe6, -0x6edac4da, 0x7b04715f }
+ },
+ {
+ { -0x3b19453a, -0x4c893d80, 0x6d1d9b0b, -0x68f12c23, 0x450bf944, -0x4f656aa8, 0x57cde223, 0x48d0acfa },
+ { -0x530951bd, -0x7c1242d8, 0x7d5c7ab4, -0x79ca8375, -0x4814d3bc, -0x3fbfb897, -0x3d09a7c1, 0x59b37bf5 },
+ { 0x7dabe671, -0x49f0d91c, 0x622f3a37, -0x0e2e5e69, -0x1669fc6c, 0x4208ce7e, 0x336d3bdb, 0x16234191 }
+ },
+},
+{
+ {
+ { 0x3d578bbe, -0x7ad22e03, -0x3cd79ef8, 0x2b65ce72, -0x1531dd8d, 0x658c07f4, -0x13c754c0, 0x0933f804 },
+ { 0x33a63aef, -0x0e651539, 0x4442454e, 0x2c7fba5d, 0x4795e441, 0x5da87aa0, -0x5b1f4f0b, 0x413051e1 },
+ { -0x72b69b8a, -0x58549687, -0x034a5438, -0x7ede5522, 0x7b539472, -0x5a23ed11, 0x5e45351a, 0x07fd4706 }
+ },
+ {
+ { -0x6517183d, 0x30421155, -0x6bb77d5b, -0x0d7e4dd7, 0x378250e4, -0x75ec53d2, 0x54ba48f4, 0x014afa09 },
+ { 0x258d2bcd, -0x37a7c3c3, -0x509f48c1, 0x17029a4d, 0x416a3781, -0x05f0362a, 0x38b3fb23, 0x1c1e5fba },
+ { 0x1bb3666c, -0x34ce6900, 0x4bffecb9, 0x33006052, 0x1a88233c, 0x29371199, 0x3d4ed364, 0x29188436 }
+ },
+ {
+ { -0x43e54915, -0x0462c83d, 0x4d57a240, 0x02be1453, -0x075a1e0a, -0x0b28cbeb, 0x0ccc8188, 0x5964f430 },
+ { -0x23b45406, 0x033c6805, 0x5596ecc1, 0x2c15bf5e, -0x4a64e2c5, 0x1bc70624, -0x5e60f13b, 0x3ede9850 },
+ { 0x2d096800, -0x1bb5dceb, 0x70866996, 0x5c08c559, 0x46affb6e, -0x20d249f6, -0x07a90277, 0x579155c1 }
+ },
+ {
+ { 0x0817e7a6, -0x4a0e949d, 0x3c351026, -0x7f7396dd, 0x54cef201, 0x324a983b, 0x4a485345, 0x53c09208 },
+ { 0x12e0c9ef, -0x69cdb123, -0x0dbdfd69, 0x468b878d, -0x5b0a8c42, 0x199a3776, -0x716e16d6, 0x1e7fbcf1 },
+ { -0x0e345041, -0x2d2beb7f, 0x716174e5, 0x231d2db6, -0x1d5aa368, 0x0b7d7656, 0x2aa495f6, 0x3e955cd8 }
+ },
+ {
+ { 0x61bb3a3f, -0x54c60c11, 0x2eb9193e, -0x714bff9b, 0x38c11f74, -0x4a219134, 0x26f3c49f, 0x654d7e96 },
+ { 0x3ed15433, -0x1b70aca2, 0x0d7270a3, -0x2f8a96d6, -0x55219c79, 0x40fbd21d, -0x30bb6a0b, 0x14264887 },
+ { 0x5c7d2ceb, -0x1a9b3023, -0x28c83347, -0x7d115022, -0x2e064f55, 0x6107db62, -0x4bca7245, 0x0b6baac3 }
+ },
+ {
+ { 0x3700a93b, 0x204abad6, -0x25886c8d, -0x41ffdc2d, 0x633ab709, -0x27a0fcba, -0x6f7dfbee, 0x00496dc4 },
+ { -0x79dd0168, 0x7ae62bcb, -0x31476e51, 0x47762256, -0x0d1bf94c, 0x1a5a92bc, -0x7b1beaff, 0x7d294017 },
+ { -0x3d819ca0, 0x1c74b88d, -0x72eb7af4, 0x07485426, 0x3e0dcb30, -0x5eba0485, 0x43803b23, 0x10843f1b }
+ },
+ {
+ { -0x1cdb9765, -0x2a9098d3, -0x4c6b567f, -0x2e257513, -0x6e973013, -0x2284a702, 0x4d56c1e8, 0x7ce246cd },
+ { 0x376276dd, -0x3a06fbab, -0x289ba327, -0x31a6ea73, 0x1d366b39, -0x6d09a2af, 0x526996c4, 0x11574b6e },
+ { 0x7f80be53, -0x470bcf72, 0x34a9d397, 0x5f3cb8cb, 0x33cc2b2c, 0x18a961bd, 0x3a9af671, 0x710045fb }
+ },
+ {
+ { 0x059d699e, -0x5fc0379e, -0x659e6197, 0x2370cfa1, 0x2f823deb, -0x3b01c4ee, -0x580f7bb2, 0x1d1b056f },
+ { 0x101b95eb, 0x73f93d36, 0x4f6f4486, -0x0510cc87, -0x70ea1a9e, 0x5651735f, 0x58b40da1, 0x7fa3f190 },
+ { -0x1a9409e1, 0x1bc64631, 0x6e5382a3, -0x2c8654f0, 0x0540168d, 0x4d58c57e, -0x7bbd271c, 0x56625662 }
+ },
+},
+{
+ {
+ { 0x1ff38640, -0x22b6632a, 0x063625a0, 0x29cd9bc3, 0x3dd73dc3, 0x51e2d802, 0x203b9231, 0x4a25707a },
+ { -0x09d9800a, -0x461b6622, 0x742c0843, 0x7772ca7b, -0x165b0d4f, 0x23a0153f, -0x2a2faffa, 0x2cdfdfec },
+ { 0x53f6ed6a, 0x2ab7668a, 0x1dd170a1, 0x30424258, 0x3ae20161, 0x4000144c, 0x248e49fc, 0x5721896d }
+ },
+ {
+ { -0x5e2f25b2, 0x285d5091, -0x4a01c1f8, 0x4baa6fa7, -0x1e6c6c4d, 0x63e5177c, -0x3b4fcf03, 0x03c935af },
+ { -0x02e7e452, 0x0b6e5517, 0x2bb963b4, -0x6fdd9d61, 0x32064625, 0x5509bce9, -0x09c3ec26, 0x578edd74 },
+ { 0x492b0c3d, -0x668d893a, -0x201dfa04, 0x47ccc2c4, -0x229dc5c4, -0x232d647c, 0x0288c7a2, 0x3ec2ab59 }
+ },
+ {
+ { -0x51cd2e35, -0x58dec5f7, 0x40f5c2d5, 0x0f2b87df, -0x17e154d7, 0x0baea4c6, 0x6adbac5e, 0x0e1bf66c },
+ { -0x1b278447, -0x5e5f2d85, 0x61391aed, -0x5674b215, 0x73cb9b83, -0x665f2230, 0x200fcace, 0x2dd5c25a },
+ { 0x792c887e, -0x1d542a17, -0x346d92a3, 0x1a020018, -0x4551a0e2, -0x40459633, 0x5ae88f5f, 0x730548b3 }
+ },
+ {
+ { -0x5e291ccc, -0x7fa4f6b5, 0x09353f19, -0x40c10e89, 0x0622702b, 0x423f06cb, -0x2787ba23, 0x585a2277 },
+ { -0x34574712, -0x3bcaae5d, -0x4deea0ea, 0x65a26f1d, -0x5473c7b0, 0x760f4f52, 0x411db8ca, 0x3043443b },
+ { 0x33d48962, -0x5e75a07e, -0x1387da81, 0x6698c4b5, 0x373e41ff, -0x5871905b, 0x50ef981f, 0x76562789 }
+ },
+ {
+ { -0x15793063, -0x1e8f8c5d, 0x07155fdc, 0x3a8cfbb7, 0x31838a8e, 0x4853e7fc, -0x49ec09ea, 0x28bbf484 },
+ { -0x2ae03740, 0x38c3cf59, 0x0506b6f2, -0x64122d03, -0x54a8f171, 0x26bf109f, -0x3e47b95a, 0x3f4160a8 },
+ { 0x6f136c7c, -0x0d9ed0a4, -0x0922ee42, -0x50152ef9, 0x13de6f33, 0x527e9ad2, -0x7e7708a3, 0x1e79cb35 }
+ },
+ {
+ { -0x0a1f7e7f, 0x77e953d8, 0x299dded9, -0x7b5af3bc, -0x79bada1b, -0x2393d2f4, 0x39d1f2f4, 0x478ab52d },
+ { -0x11081c0f, 0x013436c3, -0x0161ef08, -0x7d749581, -0x43062104, 0x7ff908e5, 0x3a3b3831, 0x65d7951b },
+ { -0x6dad2ea7, 0x66a6a4d3, -0x78e537f9, -0x1a221e44, -0x593e3691, -0x47d394c0, 0x1a212214, 0x16d87a41 }
+ },
+ {
+ { -0x2ab1fa7d, -0x045b2a1e, 0x2ebd99fa, -0x1de05029, 0x6ee9778f, 0x497ac273, 0x7a5a6dde, 0x1f990b57 },
+ { 0x42066215, -0x4c4281a6, 0x0c5a24c1, -0x78641c33, -0x29066b49, 0x57c05db1, 0x65f38ca6, 0x28f87c81 },
+ { 0x1be8f7d6, -0x5ccbb153, -0x53158671, 0x7d1e50eb, 0x520de052, 0x77c6569e, 0x534d6d3e, 0x45882fe1 }
+ },
+ {
+ { -0x6bc3901c, -0x275366d7, -0x5c7c6d5e, -0x4a060e9f, -0x4137650d, 0x2699db13, -0x1bfa0f8c, 0x7dcf843c },
+ { 0x757983d6, 0x6669345d, 0x17aa11a6, 0x62b6ed11, -0x67a1ed71, 0x7ddd1857, -0x09d90923, 0x688fe5b8 },
+ { 0x4a4732c0, 0x6c90d648, -0x35a9cd67, -0x2adebc03, -0x6ea2391f, -0x4c41d73d, 0x7327191b, 0x6739687e }
+ },
+},
+{
+ {
+ { -0x363468e1, -0x731a5530, -0x602ab5d7, 0x1156aaa9, 0x15af9b78, 0x41f72470, 0x420f49aa, 0x1fe8cca8 },
+ { 0x200814cf, -0x609a3a16, 0x69a31740, -0x7bfac91f, 0x25c8b4ad, -0x74f12ec7, -0x16c9c9e3, 0x0080dbaf },
+ { 0x3c0cc82a, 0x72a1848f, -0x788361ac, 0x38c560c2, -0x31aabec0, 0x5004e228, 0x03429d71, 0x042418a1 }
+ },
+ {
+ { 0x20816247, 0x58e84c6f, -0x1c90286d, -0x724d4d4a, 0x1d484d85, -0x688e7daa, -0x79cd5429, 0x0822024f },
+ { -0x540c00a1, -0x766215af, 0x2fc2d8ba, -0x646c5799, -0x419142a4, 0x2c38cb97, -0x68d9c4a3, 0x114d5784 },
+ { 0x6b1beca3, -0x4cfe4484, -0x3914ec8b, 0x55393f6d, -0x68491b15, -0x6ef2d7f0, -0x62b8615d, 0x1ad4548d }
+ },
+ {
+ { 0x0fe9fed3, -0x5f901993, 0x1c587909, -0x578cc5c0, 0x0df98953, 0x30d14d80, -0x384cfda8, 0x41ce5876 },
+ { 0x389a48fd, -0x32a58260, -0x6587c8e2, -0x4c705b56, 0x2cdb8e6c, -0x392689e5, -0x3681ebbd, 0x35cf51db },
+ { -0x298f3fde, 0x59ac3bc5, -0x64ee6bfa, -0x151983f0, -0x4c87d026, -0x68674210, -0x02f8bf6e, 0x651e3201 }
+ },
+ {
+ { 0x1efcae9e, -0x5a845b60, -0x23cf756c, 0x769f4bee, 0x3603cb2e, -0x2e0ef115, 0x7e441278, 0x4099ce5e },
+ { -0x10cf3a31, -0x29c27b7d, 0x2361cc0c, 0x4cd4b496, -0x5b7bd954, -0x116f1b00, 0x18c14eeb, 0x0af51d7d },
+ { -0x75aede17, 0x1ac98e4f, -0x2405d020, 0x7dae9544, -0x29bcf207, -0x7cdf55f3, 0x2c4a2fb5, 0x66728265 }
+ },
+ {
+ { 0x2946db23, -0x52574920, 0x7b253ab7, 0x1c0ce51a, 0x66dd485b, -0x7bb737a6, -0x2f98a521, 0x7f1fc025 },
+ { -0x27943655, -0x78b9de0c, 0x56fe6fea, -0x4ab38442, 0x7fadc22c, 0x077a2425, 0x19b90d39, 0x1ab53be4 },
+ { 0x319ea6aa, -0x2711e4e8, 0x3a21f0da, 0x004d8808, -0x77c5b0b5, 0x3bd6aa1d, -0x202602ec, 0x4db9a3a6 }
+ },
+ {
+ { -0x34488398, -0x26a4ff45, -0x6e0e87b7, -0x22437b96, -0x41d7264d, 0x7cf700ae, -0x7a2ce0c2, 0x5ce1285c },
+ { -0x4663f8ab, -0x73184dc5, -0x3b0af086, 0x35c5d6ed, -0x1264af3d, 0x7e1e2ed2, -0x176cb25f, 0x36305f16 },
+ { -0x674f4218, 0x31b6972d, -0x535921a5, 0x7d920706, -0x6f759a61, -0x198cef08, -0x1020fdcb, 0x50fac2a6 }
+ },
+ {
+ { -0x090bb644, 0x295b1c86, 0x1f0ab4dd, 0x51b2e84a, -0x5571aae3, -0x3ffe34d0, 0x44f43662, 0x6a28d359 },
+ { 0x5b880f5a, -0x0c2c560d, -0x24fc183e, -0x1213faf4, -0x060f4e5e, -0x576967e1, -0x53a1cb5c, 0x49a4ae2b },
+ { 0x04a740e0, 0x28bb12ee, -0x64317e8c, 0x14313bbd, -0x173ef3c0, 0x72f5b5e4, 0x36adcd5b, 0x7cbfb199 }
+ },
+ {
+ { -0x33c91920, -0x7186c586, 0x7d586eed, -0x0605485d, -0x451e0b1c, 0x3a4f9692, -0x00a0bb82, 0x1c14b03e },
+ { 0x6b89792d, -0x5cee223e, -0x25aed99c, 0x1b30b4c6, -0x30eaf7a7, 0x0ca77b4c, 0x1b009408, 0x1de443df },
+ { 0x14a85291, 0x19647bd1, 0x1034d3af, 0x57b76cb2, 0x0f9d6dfa, 0x6329db44, 0x6a571493, 0x5ef43e58 }
+ },
+},
+{
+ {
+ { -0x37f3e540, -0x59923363, 0x1b38a436, -0x685fa30c, -0x6a24283a, -0x58140c42, -0x72818255, 0x7da0b8f6 },
+ { 0x385675a6, -0x1087dfec, -0x55025618, -0x5d9b60d0, 0x5cdfa8cb, 0x4cd1eb50, 0x1d4dc0b3, 0x46115aba },
+ { -0x3c4a258a, -0x2bf0e6ad, 0x21119e9b, 0x1dac6f73, -0x014da6a0, 0x03cc6021, -0x7c98b4b5, 0x5a5f887e }
+ },
+ {
+ { -0x5f59bc47, -0x6169d72d, -0x193cdf9c, -0x4a3c3500, 0x7c2dec32, -0x64acfd77, -0x2a2e38f4, 0x43e37ae2 },
+ { 0x70a13d11, -0x709cfe31, 0x350dd0c4, -0x303147eb, -0x5b435b82, -0x08fd682c, -0x1bb2ebcc, 0x3669b656 },
+ { -0x12591ecd, 0x387e3f06, -0x665ec540, 0x67301d51, 0x36263811, -0x42a52708, 0x4fd5e9be, 0x6a21e6cd }
+ },
+ {
+ { 0x6699b2e3, -0x10bed6ee, 0x708d1301, 0x71d30847, 0x1182b0bd, 0x325432d0, 0x001e8b36, 0x45371b07 },
+ { 0x3046e65f, -0x0e39e8f6, 0x00d23524, 0x58712a2a, -0x737d48ab, 0x69dbbd3c, -0x5e6a00a9, 0x586bf9f1 },
+ { 0x5ef8790b, -0x5924f773, 0x610937e5, 0x5278f0dc, 0x61a16eb8, -0x53fcb62e, -0x6f1ade87, 0x0eafb037 }
+ },
+ {
+ { 0x0f75ae1d, 0x5140805e, 0x2662cc30, -0x13fd041d, -0x156dc693, 0x2cebdf1e, -0x3abca44d, 0x44ae3344 },
+ { 0x3748042f, -0x69faaa3f, -0x7df455ef, 0x219a41e6, 0x73486d0c, 0x1c81f738, 0x5a02c661, 0x309acc67 },
+ { -0x445abc12, -0x630d7647, 0x5ac97142, -0x0c89f163, 0x4f9360aa, 0x1d82e5c6, 0x7f94678f, 0x62d5221b }
+ },
+ {
+ { 0x3af77a3c, 0x7585d426, -0x0116ebb3, -0x205184ef, 0x59f7193d, -0x5af98f80, -0x7c6ddfc9, 0x14f29a53 },
+ { 0x18d0936d, 0x524c299c, -0x75f3e5f4, -0x37944a94, -0x24b579cf, -0x5c8afad2, -0x438aba9e, 0x5c0efde4 },
+ { 0x25b2d7f5, -0x208e8124, -0x664acfc0, 0x21f970db, -0x3c12b39e, -0x256dcb49, 0x7bee093e, 0x5e72365c }
+ },
+ {
+ { 0x2f08b33e, 0x7d933906, -0x2060cd42, 0x5b9659e5, 0x1f9ebdfd, -0x5300c253, -0x348cb649, 0x70b20555 },
+ { 0x4571217f, 0x575bfc07, 0x0694d95b, 0x3779675d, -0x0be6e1cd, -0x65f5c845, 0x47b4eabc, 0x77f1104c },
+ { 0x55112c4c, -0x41aeec3b, -0x6577e033, 0x6688423a, 0x5e503b47, 0x44667785, 0x4a06404a, 0x0e34398f }
+ },
+ {
+ { 0x3e4b1928, 0x18930b09, 0x73f3f640, 0x7de3e10e, 0x73395d6f, -0x0bcde826, -0x35c863c2, 0x6f8aded6 },
+ { 0x3ecebde8, -0x4982dd27, 0x27822f07, 0x09b3e841, -0x4fa49273, 0x743fa61f, -0x75c9dc8e, 0x5e540536 },
+ { -0x02484d66, -0x1cbfedc3, -0x5de54d6f, 0x487b97e1, -0x02196b62, -0x066982fe, -0x372c2169, 0x780de72e }
+ },
+ {
+ { 0x00f42772, 0x671feaf3, 0x2a8c41aa, -0x708d14d6, -0x68c8cd6e, 0x29a17fd7, 0x32b587a6, 0x1defc6ad },
+ { 0x089ae7bc, 0x0ae28545, 0x1c7f4d06, 0x388ddecf, 0x0a4811b8, 0x38ac1551, 0x71928ce4, 0x0eb28bf6 },
+ { -0x10ae6a59, -0x50a441e6, -0x6e84ea13, 0x148c1277, 0x7ae5da2e, 0x2991f7fb, -0x0722d799, 0x467d201b }
+ },
+},
+{
+ {
+ { 0x296bc318, 0x745f9d56, -0x27ead19b, -0x66ca7f2c, 0x5839e9ce, -0x4f1a4ec1, -0x2bc6de40, 0x51fc2b28 },
+ { -0x0842d195, 0x7906ee72, 0x109abf4e, 0x05d270d6, -0x46be575c, -0x72a301bb, 0x1c974287, 0x44c21867 },
+ { -0x6a1d5674, 0x1b8fd117, 0x2b6b6291, 0x1c4e5ee1, 0x7424b572, 0x5b30e710, 0x4c4f4ac6, 0x6e6b9de8 }
+ },
+ {
+ { -0x07f34f78, 0x6b7c5f10, 0x56e42151, 0x736b54dc, -0x3910663c, -0x3d49df5b, -0x3c5f90be, 0x5f4c802c },
+ { 0x4b1de151, -0x200da032, -0x1ee3bfdb, -0x27be3f39, 0x54749c87, 0x2554b3c8, -0x6f71f207, 0x2d292459 },
+ { 0x7d0752da, -0x649a370f, -0x38811800, -0x77e31cc8, 0x5b62f9e3, -0x3c4aeb10, -0x413ef2b8, 0x66ed5dd5 }
+ },
+ {
+ { -0x3435fb83, -0x0f520c37, -0x0baad095, -0x7e3c4d35, 0x44735f93, -0x3025eed3, 0x7e20048c, 0x1f23a0c7 },
+ { 0x0bb2089d, 0x7d38a1c2, -0x69332bee, -0x7f7ccb1f, 0x6c97d313, -0x3b58f474, 0x03007f20, 0x2eacf8bc },
+ { -0x1a43ea90, -0x0dcab985, 0x0dbab38c, 0x03d2d902, -0x03061f62, 0x27529aa2, -0x62cb43b0, 0x0840bef2 }
+ },
+ {
+ { 0x7f37e4eb, -0x32ab1f95, -0x0a169336, -0x733ea079, -0x2ca68232, -0x47db7450, 0x6074400c, 0x246affa0 },
+ { -0x23ef4d79, 0x796dfb35, 0x5c7ff29d, 0x27176bcd, -0x384db6fb, 0x7f3d43e8, -0x6e3abd8a, 0x0304f5a1 },
+ { -0x041bacdf, 0x37d88e68, -0x3f28afce, -0x79f68ab8, -0x76b5f2cb, 0x4e9b13ef, 0x5753d325, 0x25a83cac }
+ },
+ {
+ { 0x3952b6e2, -0x60f099d7, 0x0934267b, 0x33db5e0e, -0x29f60124, -0x00badad5, -0x3af91f37, 0x06be10f5 },
+ { -0x1127e9a2, 0x10222f48, 0x4b8bcf3a, 0x623fc123, -0x3dde1710, 0x1e145c09, -0x3587d9d0, 0x7ccfa59f },
+ { -0x49d5cba1, 0x1a9615a9, 0x4a52fecc, 0x22050c56, 0x28bc0dfe, -0x585d877b, 0x1a1ee71d, 0x5e82770a }
+ },
+ {
+ { 0x42339c74, -0x17fd17f6, -0x5800051b, 0x34175166, 0x1c408cae, 0x34865d1f, 0x605bc5ee, 0x2cca982c },
+ { -0x527695a4, 0x35425183, -0x1872ad0a, -0x1798c505, -0x6d5ca09c, 0x2c66f25f, 0x3b86b102, 0x09d04f3b },
+ { 0x197dbe6e, -0x02d2a2cb, -0x741b005d, 0x207c2eea, 0x325ae918, 0x2613d8db, 0x27741d3e, 0x7a325d17 }
+ },
+ {
+ { 0x7e2a076a, -0x132d82ff, 0x1636495e, -0x28779761, -0x6e6dcc1b, 0x52a61af0, 0x7bb1ae64, 0x2a479df1 },
+ { -0x2e92021e, -0x2fc94645, -0x3b6857d7, -0x5dfaa8a9, -0x580ed999, -0x7193369a, 0x1239c180, 0x4d3b1a79 },
+ { 0x33db2710, -0x61a11172, -0x293bc35b, 0x189854de, -0x6d8e7ec8, -0x5be3dd3b, -0x5bc5a165, 0x27ad5538 }
+ },
+ {
+ { -0x71b8f884, -0x34a5829d, 0x20a1c059, -0x7248ac9f, -0x74120234, 0x549e1e4d, 0x503b179d, 0x080153b7 },
+ { 0x15350d61, 0x2746dd4b, -0x116ade49, -0x2fc03438, 0x138672ca, -0x1791c9a6, 0x7e7d89e2, 0x510e987f },
+ { 0x0a3ed3e3, -0x2259626d, -0x329f58de, 0x3d386ef1, -0x4255b11a, -0x37e852a8, 0x4fe7372a, 0x23be8d55 }
+ },
+},
+{
+ {
+ { 0x567ae7a9, -0x43e10b43, -0x29bb6743, 0x3f624cb2, 0x2c1f4ec8, -0x1bef9b2e, -0x45c7bfff, 0x2ef9c5a5 },
+ { 0x74ef4fad, -0x6a016e66, -0x095cf75e, 0x3a827bec, 0x09a47b01, -0x69b1fe2d, 0x5ba3c797, 0x71c43c4f },
+ { -0x05618b33, -0x4902920a, -0x1b50d986, -0x0e7d8744, -0x0e1066f2, -0x7daa4c30, -0x6f3a0d6d, 0x5a758ca3 }
+ },
+ {
+ { 0x1d61dc94, -0x731f6e75, -0x657ecf9a, -0x7212c9ba, -0x5017552d, -0x2b1957d7, -0x09c62bc1, 0x0a738027 },
+ { -0x26b9db6b, -0x5d48d8f0, -0x2a82affd, 0x3aa8c6d2, -0x5f4b7836, -0x1c2bff41, -0x4c148d14, 0x2dbae244 },
+ { 0x57ffe1cc, -0x67f0b5d1, -0x1e7c67bd, 0x00670d0d, 0x49fb15fd, 0x105c3f4a, 0x5126a69c, 0x2698ca63 }
+ },
+ {
+ { 0x5e3dd90e, 0x2e3d702f, -0x1b2dac7a, -0x61c0f6e8, 0x024da96a, 0x5e773ef6, 0x4afa3332, 0x3c004b0c },
+ { 0x32b0ba78, -0x189ace78, -0x6da30075, 0x381831f7, -0x5fd6e034, 0x08a81b91, 0x49caeb07, 0x1fb43dcc },
+ { 0x06f4b82b, -0x6556b954, -0x57f93b0d, 0x1ca284a5, -0x3932b879, 0x3ed3265f, -0x32e02de9, 0x6b43fd01 }
+ },
+ {
+ { 0x3e760ef3, -0x4a38bda8, -0x11f54670, 0x75dc52b9, 0x072b923f, -0x40ebd83e, 0x6ff0d9f0, 0x73420b2d },
+ { 0x4697c544, -0x3858a2b5, -0x20f00041, 0x15fdf848, -0x55b987a6, 0x2868b9eb, 0x5b52f714, 0x5a68d710 },
+ { -0x617ae1fa, -0x50d30935, -0x39ddc73c, -0x70a6c6ed, -0x66040c8d, -0x2575476a, -0x15cb4362, 0x3db5632f }
+ },
+ {
+ { -0x7d67da2b, 0x2e4990b1, 0x3e9a8991, -0x12151479, 0x4c704af8, -0x110fc2c7, -0x6a20d4f2, 0x59197ea4 },
+ { -0x08a22628, -0x0b9111d5, 0x396759a5, 0x0d17b1f6, 0x499e7273, 0x1bf2d131, 0x49d75f13, 0x04321adf },
+ { -0x1b1aa552, 0x04e16019, 0x7e2f92e9, -0x1884bc86, 0x6f159aa4, -0x3831d23f, -0x0b28f340, 0x45eafdc1 }
+ },
+ {
+ { -0x30334e13, -0x49f1b9dc, -0x42a3fc6b, 0x59dbc292, -0x23fb7e37, 0x31a09d1d, 0x5d56d940, 0x3f73ceea },
+ { -0x7fba28d5, 0x69840185, -0x30d0f9af, 0x4c22faa2, 0x6b222dc6, -0x6be5c99b, 0x0362dade, 0x5a5eebc8 },
+ { 0x0a4e8dc6, -0x4858402f, 0x44c9b339, -0x41a8ff82, 0x1557aefa, 0x60c1207f, 0x266218db, 0x26058891 }
+ },
+ {
+ { -0x39891abe, 0x4c818e3c, 0x03ceccad, 0x5e422c93, -0x4bed60f8, -0x13f83336, -0x4dbbbc48, 0x0dedfa10 },
+ { -0x7c9f00fc, 0x59f704a6, 0x7661e6f4, -0x3c26c022, 0x12873551, -0x7ce4d58d, 0x4e615d57, 0x54ad0c2e },
+ { -0x47d4add6, -0x11c4982b, -0x605a3e15, 0x36f16346, 0x6ec19fd3, -0x5a4b2d0e, -0x58856bf8, 0x62ecb2ba }
+ },
+ {
+ { -0x5049d78c, -0x6df8d7ca, 0x79e104a5, 0x5fcd5e85, -0x39cf5eb6, 0x5aad01ad, 0x75663f98, 0x61913d50 },
+ { 0x61152b3d, -0x1a1286ae, 0x0eddd7d1, 0x4962357d, -0x4694b38f, 0x7482c8d0, -0x56992742, 0x2e59f919 },
+ { 0x1a3231da, 0x0dc62d36, -0x6bdffd90, -0x05b8a7ce, 0x3f9594ce, 0x02d80151, 0x31c05d5c, 0x3ddbc2a1 }
+ },
+},
+{
+ {
+ { 0x004a35d1, -0x048ca53e, 0x3a6607c3, 0x31de0f43, -0x3ad72a67, 0x7b8591bf, -0x0a44faf4, 0x55be9a25 },
+ { 0x4ffb81ef, 0x3f50a50a, 0x3bf420bf, -0x4e1fcaf7, -0x3955d330, -0x645571e4, -0x05dc85c0, 0x32239861 },
+ { 0x33db3dbf, 0x0d005acd, -0x7f53ca1e, 0x0111b37c, 0x6f88ebeb, 0x4892d66c, 0x6508fbcd, 0x770eadb1 }
+ },
+ {
+ { -0x5faf8e47, -0x0e2c497f, 0x3592ff3a, 0x2207659a, 0x7881e40e, 0x5f016929, -0x7945c8b2, 0x16bedd0e },
+ { 0x5e4e89dd, -0x7bae0620, -0x4386c6c9, -0x3f9cfd01, 0x56a6495c, 0x5d227495, -0x5fa9fc05, 0x09a6755c },
+ { 0x2c2737b5, 0x5ecccc4f, 0x2dccb703, 0x43b79e0c, 0x4ec43df3, 0x33e008bc, -0x0f8a9940, 0x06c1b840 }
+ },
+ {
+ { -0x64fd7fa4, 0x69ee9e7f, 0x547d1640, -0x34007d76, -0x4dbcf698, 0x3d93a869, 0x3fe26972, 0x46b7b8cd },
+ { -0x5c770789, 0x7688a5c6, -0x214d4954, 0x02a96c14, 0x1b8c2af8, 0x64c9f343, 0x54a1eed6, 0x36284355 },
+ { -0x01811420, -0x167edf7a, 0x2f515437, 0x4cba6be7, 0x516efae9, 0x1d04168b, 0x43982cb9, 0x5ea13910 }
+ },
+ {
+ { -0x2a2c4ffe, 0x6f2b3be4, 0x6a09c880, -0x5013cc27, -0x57433b34, 0x035f73a4, 0x4662198b, 0x22c5b928 },
+ { -0x0b8fd11f, 0x49125c9c, -0x74da4cd3, 0x4520b71f, 0x501fef7e, 0x33193026, -0x372d14d5, 0x656d8997 },
+ { 0x433d8939, -0x34a73702, 0x6a8d7e50, -0x765f34d2, 0x09fbbe5a, 0x79ca9553, -0x32803efa, 0x0c626616 }
+ },
+ {
+ { -0x040bab4f, -0x70203c87, -0x0e5b488f, 0x45a5a970, -0x452ca6eb, -0x536de109, -0x57e3de6e, 0x42d088dc },
+ { 0x4879b61f, 0x1ffeb80a, 0x4ada21ed, 0x6396726e, 0x368025ba, 0x33c7b093, -0x0c3ce878, 0x471aa0c6 },
+ { -0x5fe9ae67, -0x7025f0c9, -0x375f1cbd, 0x0adadb77, -0x378a17e0, 0x20fbfdfc, 0x0c2206e7, 0x1cf2bea8 }
+ },
+ {
+ { 0x02c0412f, -0x67d291e6, -0x24a71702, -0x6f05b37d, -0x234e7440, 0x01c2f5bc, 0x216abc66, 0x686e0c90 },
+ { -0x4c9dfd54, -0x3d220e22, -0x2d1d855b, -0x6d5a01f7, -0x03f60e2d, 0x7d1648f6, 0x13bc4959, 0x74c2cc05 },
+ { -0x5abc6a59, 0x1fadbadb, -0x51f25996, -0x4be5fd60, -0x445c83f9, -0x40e60a68, -0x21b7bcf3, 0x6a12b8ac }
+ },
+ {
+ { 0x1aaeeb5f, 0x793bdd80, -0x3eae778f, 0x00a2a0aa, 0x1f2136b4, -0x175c8c5d, -0x036e10e7, 0x48aab888 },
+ { 0x39d495d9, -0x072515e1, 0x525f1dfc, 0x592c190e, -0x3666e2e5, -0x247342fc, -0x2770f349, 0x11f7fda3 },
+ { 0x5830f40e, 0x041f7e92, 0x79661c06, 0x002d6ca9, 0x2b046a2e, -0x79236007, -0x74fb6c2f, 0x76036092 }
+ },
+ {
+ { 0x695a0b05, -0x4bcef71b, -0x52c85c75, 0x6cb00ee8, -0x5cac8c7f, 0x5edad6ee, -0x4923cddc, 0x3f2602d4 },
+ { 0x120cf9c6, 0x21bb41c6, -0x21325a65, -0x154d55ee, 0x0aa48b34, -0x3e58d2fe, -0x1782c498, 0x215d4d27 },
+ { 0x5bcaf19c, -0x374db84a, -0x4e4d39ae, 0x49779dc3, -0x2a131d1e, -0x765e7f45, -0x31371fc7, 0x13f098a3 }
+ },
+},
+{
+ {
+ { 0x2796bb14, -0x0c55a85e, -0x64f825df, -0x77c54549, 0x31a0391c, -0x1ab41de8, -0x27cdfa07, 0x5ee7fb38 },
+ { -0x31a13ab5, -0x6523f007, -0x73d0ecf3, 0x039c2a6b, -0x0f076aeb, 0x028007c7, -0x53fb4c95, 0x78968314 },
+ { 0x41446a8e, 0x538dfdcb, 0x434937f9, -0x5a530257, 0x263c8c78, 0x46af908d, -0x6435f2f7, 0x61d0633c }
+ },
+ {
+ { -0x07038c21, -0x525cd744, -0x590fc804, -0x117b96a3, 0x38c2a909, 0x637fb4db, -0x07f98424, 0x5b23ac2d },
+ { -0x0024da9a, 0x63744935, 0x780b68bb, -0x3a429477, 0x553eec03, 0x6f1b3280, 0x47aed7f5, 0x6e965fd8 },
+ { -0x117fad85, -0x652d46ad, -0x05219273, -0x1770e656, 0x150e82cf, 0x0e711704, -0x226a2124, 0x79b9bbb9 }
+ },
+ {
+ { -0x71608c8c, -0x2e668252, -0x3044f7ea, -0x5fcd5d08, 0x6d445f0a, -0x329345ee, 0x0accb834, 0x1ba81146 },
+ { 0x6a3126c2, -0x144caac0, 0x68c8c393, -0x2d9c7c58, -0x1a46857e, 0x6c0c6429, -0x3602deb9, 0x5065f158 },
+ { 0x0c429954, 0x708169fb, -0x28913099, -0x1eb9ff54, 0x70e645ba, 0x2eaab98a, 0x58a4faf2, 0x3981f39e }
+ },
+ {
+ { 0x6de66fde, -0x37ba205b, 0x2c40483a, -0x1ead5b00, -0x384b09ce, -0x162d1e9d, -0x2343e49b, 0x30f4452e },
+ { 0x59230a93, 0x18fb8a75, 0x60e6f45d, 0x1d168f69, 0x14a93cb5, 0x3a85a945, 0x05acd0fd, 0x38dc0837 },
+ { -0x3a8a68c0, -0x7a92d87e, -0x06634134, -0x05ecba97, -0x3f15b18f, -0x77bb038d, 0x593f2469, 0x632d9a1a }
+ },
+ {
+ { -0x12f37b59, -0x40f602ef, 0x0d9f693a, 0x63f07181, 0x57cf8779, 0x21908c2d, -0x7509b45e, 0x3a5a7df2 },
+ { -0x47f8345a, -0x094494eb, -0x43ab0f29, 0x1823c7df, 0x6e29670b, -0x44e268fd, 0x47ed4a57, 0x0b24f488 },
+ { 0x511beac7, -0x23252b42, -0x12d9330e, -0x5bac7f8b, 0x005f9a65, -0x1e630061, 0x75481f63, 0x34fcf744 }
+ },
+ {
+ { 0x78cfaa98, -0x5a44e255, 0x190b72f2, 0x5ceda267, 0x0a92608e, -0x6cf636ef, 0x2fb374b0, 0x0119a304 },
+ { 0x789767ca, -0x3e681fb4, 0x38d9467d, -0x478eb235, -0x7c06a058, 0x55de8882, 0x4dfa63f7, 0x3d3bdc16 },
+ { -0x173de883, 0x67a2d89c, 0x6895d0c1, 0x669da5f6, -0x4d7d5d50, -0x0a9a671b, -0x121df58d, 0x56c088f1 }
+ },
+ {
+ { 0x24f38f02, 0x581b5fac, -0x451cf343, -0x56f41602, -0x75306d10, -0x65de96fe, -0x7ca6fc71, 0x038b7ea4 },
+ { 0x10a86e17, 0x336d3d11, 0x0b75b2fa, -0x280c77ce, 0x25072988, -0x06eacc8a, -0x66ef7479, 0x09674c6b },
+ { -0x66ce9008, -0x60b107df, -0x155872b1, 0x2f49d282, 0x5aef3174, 0x0971a5ab, 0x5969eb65, 0x6e5e3102 }
+ },
+ {
+ { 0x63066222, 0x3304fb0e, -0x785345c1, -0x04caf977, -0x73ef9e5d, -0x42e6db89, -0x2e7c79e0, 0x3058ad43 },
+ { -0x781a6c05, -0x4e939d0b, -0x35a2c18f, 0x4999edde, 0x14cc3e6d, -0x4b6e3e20, -0x76572458, 0x08f51147 },
+ { -0x1a899c30, 0x323c0ffd, -0x5dd159f0, 0x05c3df38, -0x5366b066, -0x42387543, -0x101c2367, 0x26549fa4 }
+ },
+},
+{
+ {
+ { -0x08ac6947, 0x04dbbc17, -0x2d0798ba, 0x69e6a2d7, -0x0ac1543a, -0x39bf6267, 0x332e25d2, 0x606175f6 },
+ { -0x78317077, 0x738b38d7, 0x4179a88d, -0x49d9a71e, -0x0eaece93, 0x30738c9c, 0x727275c9, 0x49128c7f },
+ { -0x0abf1823, 0x4021370e, -0x5e0e2f5b, 0x0910d6f5, 0x5b06b807, 0x4634aacd, 0x6944f235, 0x6a39e635 }
+ },
+ {
+ { 0x74049e9d, 0x1da19657, -0x6701cad5, -0x0432915f, -0x33adc95a, -0x4e3432b0, 0x3f9846e2, 0x1f5ec83d },
+ { -0x206f0c19, -0x6932a9c0, -0x2405da16, 0x6c3a760e, 0x59e33cc4, 0x24f3ef09, 0x530d2e58, 0x42889e7e },
+ { 0x328ccb75, -0x7104dc3d, -0x22789117, -0x50bd5df9, 0x5dfae796, 0x20fbdadc, 0x06bf9f51, 0x241e246b }
+ },
+ {
+ { 0x6280bbb8, 0x7eaafc9a, -0x0bfc27f7, 0x22a70f12, 0x1bfc8d20, 0x31ce40bb, -0x1742ac12, 0x2bc65635 },
+ { -0x5291670a, 0x29e68e57, 0x0b462065, 0x4c9260c8, -0x5ae144b5, 0x3f00862e, -0x4c726f69, 0x5bc2c77f },
+ { -0x5694526d, -0x172a2361, -0x21e6b824, -0x1a704e83, 0x65185fa3, 0x681532ea, 0x034a7830, 0x1fdd6c3b }
+ },
+ {
+ { 0x2dd8f7a9, -0x63ec595b, 0x3efdcabf, 0x2dbb1f8c, 0x5e08f7b5, -0x69e1cdc0, -0x4419361b, 0x48c8a121 },
+ { 0x55dc18fe, 0x0a64e28c, 0x3399ebdd, -0x1c206167, 0x70e2e652, 0x79ac4323, 0x3ae4cc0e, 0x35ff7fc3 },
+ { 0x59646445, -0x03bea584, -0x3ed749eb, -0x2ddb4d29, 0x05fbb912, 0x6035c9c9, 0x74429fab, 0x42d7a912 }
+ },
+ {
+ { -0x6cc25a44, -0x565b76b9, -0x3d168614, 0x4a58920e, 0x13e5ac4c, -0x69278000, 0x4b48b147, 0x453692d7 },
+ { -0x1508d12d, 0x4e6213e3, 0x43acd4e7, 0x6794981a, 0x6eb508cb, -0x00ab8322, 0x10fcb532, 0x6fed19dd },
+ { -0x57aa6391, -0x2288a267, -0x20ffc1dc, -0x0bd5dec0, -0x256d759a, 0x5223e229, 0x6d38f22c, 0x063f46ba }
+ },
+ {
+ { 0x37346921, 0x39843cb7, 0x38c89447, -0x58b804f9, -0x5dbacf82, -0x34727fcf, 0x6d82f068, 0x67810f8e },
+ { 0x5f536694, -0x2d2dbd77, 0x42939b2c, -0x35cc5d3b, -0x382246a4, -0x6790525a, 0x2f712d5d, 0x5a152c04 },
+ { -0x2dd7824c, 0x3eeb8fbc, 0x01a03e93, 0x72c7d3a3, -0x4267d9a6, 0x5473e88c, 0x5921b403, 0x7324aa51 }
+ },
+ {
+ { -0x17dcab35, -0x52dc0926, -0x49a8e593, 0x6962502a, -0x1c71c82f, -0x649ae9ca, -0x2e5cced1, 0x5cac5005 },
+ { 0x6c3cbe8e, -0x7a86bd0c, 0x4730c046, -0x5e2c9b4f, -0x2dc3be41, 0x1c8ed914, -0x11092a2e, 0x0838e161 },
+ { -0x161c66fc, -0x733eab34, -0x7b2197ba, 0x5b3a040b, -0x4e41a292, -0x3b2759e4, -0x2779e0fe, 0x40fb897b }
+ },
+ {
+ { 0x5ab10761, -0x1a8127b9, 0x6fd13746, 0x71435e20, -0x32fda9ce, 0x342f824e, -0x5786e185, 0x4b16281e },
+ { 0x62de37a1, -0x7b3a5570, 0x0d1d96e1, 0x421da500, 0x6a9242d9, 0x78828630, 0x690d10da, 0x3c5e464a },
+ { 0x0b813381, -0x2e3efe2b, 0x76ee6828, -0x2119f0ef, 0x383f6409, 0x0cb68893, -0x0900b7b6, 0x6183c565 }
+ },
+},
+{
+ {
+ { -0x50c09992, -0x24b97ab7, -0x0eb5f15b, -0x288030fc, -0x5b45f3b9, 0x3df23ff7, 0x32ce3c85, 0x3a10dfe1 },
+ { 0x1e6bf9d6, 0x741d5a46, 0x7777a581, 0x2305b3fc, 0x6474d3d9, -0x2baa8b5e, 0x6401e0ff, 0x1926e1dc },
+ { -0x15e83160, -0x1f80b176, 0x3a1fc1fd, 0x2fd51546, 0x31f2c0f1, 0x175322fd, -0x79e1a2eb, 0x1fa1d01d }
+ },
+ {
+ { -0x2e206b55, 0x38dcac00, -0x2ef7f217, 0x2e712bdd, -0x022a1d9e, 0x7f13e93e, -0x1165fe1b, 0x73fced18 },
+ { 0x7d599832, -0x337faa6c, 0x37f15520, 0x1e4656da, 0x4e059320, -0x6609088c, 0x6a75cf33, 0x773563bc },
+ { 0x63139cb3, 0x06b1e908, -0x3a5fc133, -0x5b6c2599, -0x529c76ce, -0x72883138, 0x1b864f44, 0x1f426b70 }
+ },
+ {
+ { -0x6e5edaae, -0x0e81ca38, 0x575e9c76, -0x48947ead, 0x0d9b723e, -0x057cbf91, 0x3fa7e438, 0x0b76bb1b },
+ { 0x41911c01, -0x1036d9b4, 0x17a22c25, -0x0e5c4848, -0x0cf0ebb9, 0x5875da6b, 0x1d31b090, 0x4e1af527 },
+ { 0x7f92939b, 0x08b8c1f9, -0x2bbb5492, -0x41988e35, -0x66447fe9, 0x22e56463, -0x488d56ab, 0x7b6dd61e }
+ },
+ {
+ { -0x54fe2d39, 0x5730abf9, 0x40143b18, 0x16fb76dc, -0x5f344d7f, -0x7993419b, -0x64009502, 0x53fa9b65 },
+ { 0x50f33d92, -0x48523e18, 0x608cd5cf, 0x7998fa4f, -0x7203a425, -0x5269d243, -0x50e2d0b1, 0x703e9bce },
+ { -0x6b77abab, 0x6c14c8e9, 0x65aed4e5, -0x7bc5a29a, -0x4329a50f, 0x181bb73e, -0x3b39e0b0, 0x398d93e5 }
+ },
+ {
+ { -0x2d181c0e, -0x3c7883a0, 0x30828bb1, 0x3b34aaa0, 0x739ef138, 0x283e26e7, 0x02c30577, 0x699c9c90 },
+ { 0x33e248f3, 0x1c4bd167, 0x15bf0a5f, -0x4261ed79, -0x5ef4fc8a, -0x2bc07310, -0x20e6e4ed, 0x53b09b5d },
+ { 0x5946f1cc, -0x0cf958dd, -0x331a2683, -0x6de8e74b, -0x7e4b168b, 0x28cdd247, 0x6fcdd907, 0x51caf30c }
+ },
+ {
+ { 0x18ac54c7, 0x737af99a, -0x3ae34cf1, -0x6fcc8724, 0x4ce10cc7, 0x2b89bc33, -0x76071666, 0x12ae29c1 },
+ { 0x7674e00a, -0x59f458be, -0x5e85840d, 0x630e8570, -0x30ccdb34, 0x3758563d, 0x2383fdaa, 0x5504aa29 },
+ { 0x1f0d01cf, -0x56613f35, 0x3a34f7ae, 0x0dd1efcc, -0x2f63b1de, 0x55ca7521, 0x58eba5ea, 0x5fd14fe9 }
+ },
+ {
+ { -0x406c3472, 0x3c42fe5e, 0x36d4565f, -0x412057af, -0x77bddf18, -0x1f0f7a62, 0x0725d128, 0x7dd73f96 },
+ { 0x2845ab2c, -0x4a23d221, 0x0a7fe993, 0x069491b1, 0x4002e346, 0x4daaf3d6, 0x586474d1, 0x093ff26e },
+ { 0x68059829, -0x4ef2db02, -0x2450dc1b, 0x75730672, -0x4ba853d7, 0x1367253a, -0x794b8f5c, 0x2f59bcbc }
+ },
+ {
+ { -0x496e3cff, 0x7041d560, -0x522818e2, -0x7adfe4c1, 0x11335585, 0x16c2e163, 0x010828b1, 0x2aa55e3d },
+ { -0x66e8eca1, -0x7c7b82be, 0x567d03d7, -0x52e46ee1, -0x4188552f, 0x7e7748d9, 0x2e51af4a, 0x5458b42e },
+ { 0x0c07444f, -0x12ae6d1a, 0x74421d10, 0x42c54e2d, -0x024a379c, 0x352b4c82, -0x7589799c, 0x13e9004a }
+ },
+},
+{
+ {
+ { -0x7f94b984, 0x1e6284c5, -0x18a29f85, -0x3a096685, -0x4c872d9e, -0x749826a8, -0x7e327490, 0x3d88d66a },
+ { 0x6c032bff, -0x344a4aab, 0x29297a3a, -0x208e6e49, -0x52127e45, -0x3e008cda, 0x68be03f5, 0x71ade8bb },
+ { 0x204ed789, -0x7489856d, -0x605f51d6, 0x762fcacb, 0x6dce4887, 0x771febcc, -0x700fa04d, 0x34306215 }
+ },
+ {
+ { 0x2a7b31b4, -0x031de6f9, -0x55a87fea, 0x4d7adc75, -0x78b86cdc, 0x0ec276a6, 0x1fda4beb, 0x6d6d9d5d },
+ { -0x1e0a40b7, -0x1fa25e59, -0x2b8c9f6e, 0x26457d6d, 0x73cc32f6, 0x77dcb077, -0x6322a033, 0x0a5d9496 },
+ { -0x164f7e7d, 0x22b1a58a, -0x3ea3c775, -0x026a2f8f, -0x7af5fae9, -0x567edc8a, -0x4480cca2, 0x33384cba }
+ },
+ {
+ { 0x26218b8d, 0x33bc627a, -0x3857f39f, -0x157f4de1, 0x173e9ee6, -0x6ba74ed5, 0x0e2f3059, 0x076247be },
+ { 0x0ca2c7b5, 0x3c6fa268, 0x6fb64fda, 0x1b508204, 0x5431d6de, -0x14accb64, 0x6b879c89, 0x5278b38f },
+ { 0x1416375a, 0x52e105f6, -0x7a54145c, -0x136850ca, 0x23a67c36, 0x26e6b506, -0x0c2b04ff, 0x5cf0e856 }
+ },
+ {
+ { 0x3db342a8, -0x415131cf, -0x7bd24812, -0x345c9ca5, -0x7e80ec11, -0x177399e0, 0x4e76d5c6, 0x1b9438aa },
+ { 0x1ae8cab4, -0x0936978d, -0x34b06d3b, 0x5e20741e, -0x733243c2, 0x2da53be5, 0x69970df7, 0x2dddfea2 },
+ { 0x166f031a, -0x75af8882, 0x0fb7a328, 0x067b39f1, 0x010fbd76, 0x1925c9a6, -0x338bf6fb, 0x6df9b575 }
+ },
+ {
+ { 0x48cade41, -0x13203ca5, -0x4dcd7d90, 0x6a88471f, 0x40a01b6a, 0x740a4a24, 0x003b5f29, 0x471e5796 },
+ { 0x27f6bdcf, 0x42c11929, 0x403d61ca, -0x706e6e86, -0x7461e09f, -0x23e3a59a, 0x04ec0f8d, 0x15960478 },
+ { -0x5312c854, -0x2569444d, -0x16df7316, 0x7a2423b5, 0x38aebae2, 0x24cc5c30, -0x23a251d1, 0x50c356af }
+ },
+ {
+ { 0x1b31b964, -0x30126321, -0x735ae50d, -0x0b79567b, -0x1573e07c, 0x14897265, -0x6cd53400, 0x784a53dd },
+ { 0x41c30318, 0x09dcbf43, -0x7ce7e232, -0x1145f9ef, -0x23e1d65f, -0x3e863f32, 0x073f35b0, 0x1dbf7b89 },
+ { 0x14fc4920, 0x2d99f9df, -0x3bb6601b, 0x76ccb60c, -0x1a30fffd, -0x5becd345, 0x54f000ea, 0x3f93d823 }
+ },
+ {
+ { 0x79e14978, -0x1553ed2f, -0x441400a2, -0x006dc00d, 0x0663ce27, 0x4af663e4, 0x11a5f5ff, 0x0fd381a8 },
+ { -0x61fb317b, -0x7e7c1898, 0x04465341, 0x678fb71e, 0x6688edac, -0x526dfa71, 0x532b099a, 0x5da350d3 },
+ { -0x5bc920ac, -0x0da95314, -0x51962918, 0x108b6168, 0x6b5d036c, 0x20d986cb, -0x011d50b0, 0x655957b9 }
+ },
+ {
+ { -0x2ffd2f54, -0x423ebf65, -0x4a33265a, 0x66660245, -0x05217a14, -0x7dce823c, 0x6ad7df0d, 0x02fe934b },
+ { -0x56fdfcf1, -0x51574f81, -0x0b9c2ebd, -0x07738996, 0x3c787a60, 0x15b08366, -0x7d985b58, 0x08eab114 },
+ { -0x3048158c, -0x10a30f00, -0x5e34bd54, 0x22897633, -0x310d7a1e, -0x2b31f3ac, -0x75eb95ab, 0x30408c04 }
+ },
+},
+{
+ {
+ { 0x193b877f, -0x44d1ff37, -0x1f23af95, -0x131c5770, 0x36de649f, -0x130c4840, -0x672161e6, 0x5f460408 },
+ { -0x7cd03125, 0x739d8845, -0x5194079d, -0x05c72937, -0x48b00109, 0x32bc0dca, 0x14bce45e, 0x73937e88 },
+ { 0x297bf48d, -0x46fc8eea, -0x2b0f97cc, -0x562ec4de, 0x4696bdc6, -0x1e68eaa9, -0x6e2a17cb, 0x2cf8a4e8 }
+ },
+ {
+ { 0x17d06ba2, 0x2cb5487e, 0x3950196b, 0x24d2381c, -0x7a6875d0, -0x289a637f, -0x6e295b0a, 0x7a6f7f28 },
+ { 0x07110f67, 0x6d93fd87, 0x7c38b549, -0x22b3f62d, -0x3d8c957a, 0x7cb16a4c, 0x58252a09, 0x2049bd6e },
+ { 0x6a9aef49, 0x7d09fd8d, 0x5b3db90b, -0x0f119f42, 0x519ebfd4, 0x4c21b52c, -0x3aba6be3, 0x6011aadf }
+ },
+ {
+ { 0x02cbf890, 0x63ded0c8, 0x0dff6aaa, -0x042f6736, -0x46491267, 0x624d0afd, 0x79340b1e, 0x69ce18b7 },
+ { -0x306a07c4, 0x5f67926d, 0x71289071, 0x7c7e8561, -0x667085a5, -0x295e180d, 0x0b62f9e0, 0x6fc5cc1b },
+ { -0x4d678635, -0x2e10aad8, -0x2b816f6e, -0x22e551c4, 0x189f2352, 0x127e0442, -0x1a8efe0f, 0x15596b3a }
+ },
+ {
+ { 0x7e5124ca, 0x09ff3116, -0x2638ba21, 0x0be4158b, 0x7ef556e5, 0x292b7d22, -0x50492ec8, 0x3aa4e241 },
+ { 0x3f9179a2, 0x462739d2, -0x68292231, -0x007cedcf, 0x53f2148a, 0x1307deb5, 0x7b5f4dda, 0x0d223768 },
+ { 0x2a3305f5, 0x2cc138bf, -0x5d16d93d, 0x48583f8f, 0x5549d2eb, 0x083ab1a2, 0x4687a36c, 0x32fcaa6e }
+ },
+ {
+ { 0x2787ccdf, 0x3207a473, -0x0dec1c08, 0x17e31908, -0x09f269b2, -0x2a4d1329, -0x3d9ff417, 0x746f6336 },
+ { -0x3a82650b, 0x7bc56e8d, -0x620f420e, 0x3e0bd2ed, 0x22efe4a3, -0x553feb22, -0x014295a4, 0x4627e9ce },
+ { -0x549368e4, 0x3f4af345, -0x66bc8ce1, -0x1d77148e, 0x0344186d, 0x33596a8a, 0x7ed66293, 0x7b491700 }
+ },
+ {
+ { -0x22ac5d23, 0x54341b28, -0x20bd03c1, -0x55e86fa5, 0x4dd2f8f4, 0x0ff592d9, -0x1f732c83, 0x1d03620f },
+ { -0x547b4f9c, 0x2d85fb5c, -0x760c43ec, 0x497810d2, 0x7b15ce0c, 0x476adc44, -0x07bb0285, 0x122ba376 },
+ { -0x5d4b1aac, -0x3dfdcd33, 0x115d187f, -0x612f02be, 0x7dd479d9, 0x2eabb4be, 0x2b68ec4c, 0x02c70bf5 }
+ },
+ {
+ { 0x458d72e1, -0x531acd41, 0x7cb73cb5, 0x5be768e0, -0x11744219, 0x56cf7d94, -0x014bc5fd, 0x6b0697e3 },
+ { 0x5d0b2fbb, -0x5d7813b5, 0x074882ca, 0x415c5790, -0x3e2f7ea4, -0x1fbb59e2, 0x409ef5e0, 0x26334f0a },
+ { -0x209d5c40, -0x49370fb6, 0x076da45d, 0x3ef000ef, 0x49f0d2a9, -0x636346a8, 0x441b2fae, 0x1cc37f43 }
+ },
+ {
+ { -0x36315147, -0x2899a90f, 0x18e5656a, 0x1c5b15f8, -0x7bb3dccc, 0x26e72832, 0x2f196838, 0x3a346f77 },
+ { 0x5cc7324f, 0x508f565a, -0x1af956de, -0x2f9e3b40, 0x5c45ac19, -0x04e75425, 0x0380314a, 0x6c6809c1 },
+ { -0x1d259538, -0x2d2aaeee, -0x4e17ae13, -0x1642fccf, -0x71398d9e, -0x69f8b923, 0x6ef7c5d0, 0x05911b9f }
+ },
+},
+{
+ {
+ { -0x3a01606c, 0x01c18980, 0x716fd5c8, -0x329a9897, -0x2e6a5f7a, -0x7e9fba3d, 0x66cc7982, 0x6e2b7f32 },
+ { -0x49c800d3, -0x162328aa, -0x36780f3c, -0x13b3cb71, -0x0c043849, -0x312a6d7b, -0x6c1e1579, 0x33053547 },
+ { -0x083ca971, -0x337fdb98, 0x19974cb3, -0x6216457e, -0x4a47eca0, -0x5448dd64, 0x6fbeba62, 0x44e2017a }
+ },
+ {
+ { -0x49359133, -0x7807d30d, 0x18f4a0c2, 0x580f893e, 0x2604e557, 0x05893007, 0x56d19c1d, 0x6cab6ac2 },
+ { 0x54dab774, -0x3b3d58bd, 0x4eaf031a, -0x71a2b3c4, 0x42838f17, -0x4893dc2e, 0x68dce4ea, 0x749a098f },
+ { 0x2cc1de60, -0x23201f60, 0x51c5575b, 0x032665ff, 0x073abeeb, 0x2c0c32f1, -0x328479fa, 0x6a882014 }
+ },
+ {
+ { -0x50b01492, -0x2eee2e84, -0x4cc55b5d, 0x050bba42, -0x114b93d0, 0x17514c3c, 0x1bc27d75, 0x54bedb8b },
+ { -0x5b8b804b, -0x5ad56d02, 0x1fa5ab89, -0x23ed5bb7, -0x47b85b32, -0x27d256b5, -0x6aed33b2, 0x4d77edce },
+ { 0x77e2189c, 0x77c8e145, -0x00663bbb, -0x5c1b9096, 0x6d335343, 0x3144dfc8, 0x7c4216a9, 0x3a96559e }
+ },
+ {
+ { -0x7f4555ae, 0x44938968, -0x0d7a6bf2, 0x4c98afc4, -0x5babb74a, -0x10b55865, -0x5a855181, 0x5278c510 },
+ { -0x0bd52d12, 0x12550d37, -0x675e040b, -0x74871ffc, 0x33894cb2, 0x5d530782, 0x3e498d0c, 0x02c84e4e },
+ { 0x294c0b94, -0x5ab22f8c, -0x20e7004a, -0x0aa2b948, -0x72517c9a, -0x0f90133b, -0x7e6f2e9b, 0x58865766 }
+ },
+ {
+ { 0x3de25cc3, -0x40a7cb10, -0x297eab6a, -0x47783752, -0x6b7e176e, 0x5105221a, -0x088dc06d, 0x6760ed19 },
+ { 0x1aef7117, -0x2b88edcf, 0x229e92c7, 0x50343101, -0x62ea6469, 0x7a95e184, -0x74a2d637, 0x2449959b },
+ { -0x53ca1ea0, 0x669ba3b7, -0x457bdfaa, 0x2eccf73f, -0x3f7fb0f9, 0x1aec1f17, 0x1856f4e7, 0x0d96bc03 }
+ },
+ {
+ { -0x338afa1f, -0x4e2acb50, 0x16c35288, 0x32cd0034, 0x0762c29d, -0x34c95a80, 0x237a0bf8, 0x5bfe69b9 },
+ { 0x75c52d82, 0x3318be77, 0x54d0aab9, 0x4cb764b5, -0x3388c26f, -0x5430c2d9, -0x7edcd776, 0x3bf4d184 },
+ { 0x78a151ab, 0x183eab7e, -0x66f6c89d, -0x44166f37, 0x4ac7e335, -0x008e8292, 0x25f39f88, 0x4c5cddb3 }
+ },
+ {
+ { -0x185606fe, 0x57750967, 0x4f5b467e, 0x2c37fdfc, 0x3177ba46, -0x4d9e99c6, -0x23d2acd5, 0x3a375e78 },
+ { 0x6190a6eb, -0x3f0948b3, 0x2db8f4e4, 0x20ea81a4, -0x68cea8a0, -0x57429083, 0x62ac7c21, 0x33b1d602 },
+ { 0x2d4dddea, -0x7ebe18d1, 0x62c607c8, -0x19150168, 0x573cafd0, 0x23c28458, 0x4ff97346, 0x46b9476f }
+ },
+ {
+ { 0x0d58359f, 0x1215505c, -0x03d73b95, 0x2a2013c7, -0x761599b2, 0x24a0a1af, -0x5eecf1e1, 0x4400b638 },
+ { 0x4f901e5c, 0x0c1ffea4, 0x2184b782, 0x2b0b6fb7, 0x0114db88, -0x1a78006f, 0x4785a142, 0x37130f36 },
+ { -0x6912e63d, 0x3a01b764, -0x12cd8dd0, 0x31e00ab0, -0x7c35ea4f, 0x520a8857, 0x5accbec7, 0x06aab987 }
+ },
+},
+{
+ {
+ { 0x512eeaef, 0x5349acf3, 0x1cc1cb49, 0x20c141d3, -0x56659773, 0x24180c07, -0x39b4d2e9, 0x555ef9d1 },
+ { -0x0a20f145, -0x3ecc667d, 0x512c4cac, -0x3f0c8a71, 0x0bb398e1, 0x2cf1130a, -0x55d8f39e, 0x6b3cecf9 },
+ { 0x3b73bd08, 0x36a770ba, -0x5c5040f4, 0x624aef08, -0x4bf6b90e, 0x5737ff98, 0x3381749d, 0x675f4de1 }
+ },
+ {
+ { 0x3bdab31d, -0x5ed00927, -0x629ad202, 0x0725d80f, -0x65416b79, 0x019c4ff3, -0x7d32c3bd, 0x60f450b8 },
+ { 0x6b1782fc, 0x0e2c5203, 0x6cad83b4, 0x64816c81, 0x6964073e, -0x2f234227, 0x0164c520, 0x13d99df7 },
+ { 0x21e5c0ca, 0x014b5ec3, -0x28e6405e, 0x4fcb69c9, 0x750023a0, 0x4e5f1c18, 0x55edac80, 0x1c06de9e }
+ },
+ {
+ { -0x00929656, -0x002ad4c0, -0x23bfb645, 0x34530b18, -0x5cb26769, 0x5e4a5c2f, 0x7d32ba2d, 0x78096f8e },
+ { -0x5cc13b1e, -0x66f0852a, -0x41d11f72, 0x6608f938, 0x63284515, -0x635ebc3b, -0x13d249f3, 0x4cf38a1f },
+ { 0x0dfa5ce7, -0x5f55559b, 0x48b5478c, -0x063b61d6, 0x7003725b, 0x4f09cc7d, 0x26091abe, 0x373cad3a }
+ },
+ {
+ { -0x76224453, -0x0e415705, 0x61aeaecb, 0x3bcb2cbc, 0x1f9b8d9d, -0x70a75845, 0x5112a686, 0x21547eda },
+ { -0x7d360a84, -0x4d6b9cb3, 0x24934536, 0x1fcbfde1, 0x418cdb5a, -0x6163b24d, 0x454419fc, 0x0040f3d9 },
+ { -0x02a6792d, -0x210216c7, 0x510a380c, -0x0bd8d377, -0x44cee647, -0x48d45bf9, 0x4a254df4, 0x63550a33 }
+ },
+ {
+ { 0x72547b49, -0x6445a7bb, -0x1d3bf720, -0x0cfa3906, -0x38cb0e73, 0x60e8fa69, -0x55828986, 0x39a92baf },
+ { -0x4a9630c9, 0x6507d6ed, 0x0ca52ee1, 0x178429b0, -0x149429a3, -0x1583ff70, -0x250870af, 0x3eea62c7 },
+ { -0x196cd8b2, -0x62db38ed, 0x68dbd375, 0x5f638577, -0x14754c66, 0x70525560, 0x65c9c4cd, 0x68436a06 }
+ },
+ {
+ { -0x17dfef84, 0x1e56d317, -0x7bf5169b, -0x3ad997bc, 0x320ffc7a, -0x3e1f5e3a, -0x6e9eeb8e, 0x5373669c },
+ { 0x202f3f27, -0x43fdca18, 0x64f975b0, -0x38a3ff1e, -0x5c73dbea, -0x6e5b162b, -0x75487607, 0x17b6e7f6 },
+ { -0x65f1ada9, 0x5d2814ab, -0x36354c04, -0x6f70df7c, 0x5b2d1eca, -0x50350a78, 0x78f87d11, 0x1cb4b5a6 }
+ },
+ {
+ { -0x5d5ff819, 0x6b74aa62, -0x0f8e384f, -0x0cee1f50, 0x000be223, 0x5707e438, -0x7d109154, 0x2dc0fd2d },
+ { 0x394afc6c, -0x499b3f95, -0x6725a04f, 0x0c88de24, 0x4bcad834, 0x4f8d0316, -0x218bcb5e, 0x330bca78 },
+ { 0x1119744e, -0x67d1007c, 0x2b074724, -0x0696a16a, -0x4036ac05, -0x3a753eb1, 0x369f1cf5, 0x3c31be1b }
+ },
+ {
+ { -0x0634bd8e, -0x3e97436d, -0x38312468, -0x51478ee1, 0x34ac8d7a, 0x7f0e52aa, 0x7e7d55bb, 0x41cec109 },
+ { 0x08948aee, -0x4f0b79b3, -0x6e45e391, 0x07dc19ee, -0x59535ea8, 0x7975cdae, 0x4262d4bb, 0x330b6113 },
+ { -0x5d927f76, -0x0869e629, 0x1d9e156d, -0x44e02b62, -0x245e20d9, 0x73d7c36c, 0x1f28777d, 0x26b44cd9 }
+ },
+},
+{
+ {
+ { -0x4fd7a0c9, -0x50bb7bd3, 0x47efc8df, -0x78ace770, -0x07df6866, -0x6a8b1f6f, 0x69615579, 0x0e378d60 },
+ { 0x393aa6d8, 0x300a9035, -0x5ed44e33, 0x2b501131, -0x0f6c3dde, 0x7b1ff677, -0x3547d453, 0x4309c1f8 },
+ { -0x7cf8a5ab, -0x26056e8f, 0x6b009fdc, 0x4bdb5ad2, -0x29c210f2, 0x7829ad2c, 0x75fd3877, 0x078fc549 }
+ },
+ {
+ { -0x47cc5676, -0x1dffb4a5, 0x2d4c3330, 0x44775dec, 0x7eace913, 0x3aa24406, -0x2a71ff57, 0x272630e3 },
+ { 0x28878f2d, -0x782042ec, 0x1e9421a1, 0x134636dd, 0x257341a3, 0x4f17c951, -0x52d69348, 0x5df98d4b },
+ { -0x1336f4ac, -0x0c987030, 0x12043599, -0x0ffeba65, 0x3758b89b, 0x26725fbc, 0x73a719ae, 0x4325e4aa }
+ },
+ {
+ { -0x30960a63, -0x12db9d66, -0x22a5440c, 0x2a4a1cce, 0x56b2d67b, 0x3535ca1f, 0x43b1b42d, 0x5d8c68d0 },
+ { 0x433c3493, 0x657dc6ef, -0x7f24073d, 0x65375e9f, 0x5b372dae, 0x47fd2d46, 0x796e7947, 0x4966ab79 },
+ { -0x1c4bd4f6, -0x11ccd2b3, 0x16a4601c, -0x27b1a5d5, 0x078ba3e4, 0x78243877, 0x184ee437, 0x77ed1eb4 }
+ },
+ {
+ { -0x616d12e6, 0x185d43f8, -0x01b8e63a, -0x4fb5e116, -0x590fc0b1, 0x499fbe88, 0x3c859bdd, 0x5d8b0d2f },
+ { 0x201839a0, -0x402b1ec1, 0x3e3df161, -0x5110001e, 0x6b5d1fe3, -0x49a4fb10, 0x2b62fbc0, 0x52e085fb },
+ { -0x5ab30d46, 0x124079ea, 0x001b26e7, -0x28db9a15, -0x36850803, 0x6843bcfd, 0x55eacd02, 0x0524b42b }
+ },
+ {
+ { -0x647d6154, -0x43e72353, -0x4a0a8630, 0x23ae7d28, 0x69384233, -0x3cb9edd6, -0x182b5377, 0x1a6110b2 },
+ { -0x1babb850, -0x02f2a242, 0x092005ee, 0x6cec351a, 0x567579cb, -0x665b87bc, 0x16e7fa45, 0x59d242a2 },
+ { -0x19966854, 0x4f833f6a, 0x361839a4, 0x6849762a, -0x68f54adb, 0x6985dec1, -0x234e0aba, 0x53045e89 }
+ },
+ {
+ { -0x72ba01ee, -0x7b25c322, -0x1bbb1d2e, -0x42bd3de8, 0x1f7e3598, -0x57ae6988, 0x5616e2b2, 0x7642c93f },
+ { -0x28acac25, -0x34744cba, -0x51aee1de, -0x03034db5, -0x2af51911, -0x345b72c0, -0x0b0834a3, 0x26e3bae5 },
+ { 0x4595f8e4, 0x2323daa7, -0x7a85414c, -0x21977375, 0x1c59326e, 0x3fc48e96, 0x15c9b8ba, 0x0b2e73ca }
+ },
+ {
+ { 0x79c03a55, 0x0e3fbfaf, 0x4cbb5acf, 0x3077af05, -0x24c21c61, -0x2a3aadbb, 0x476a4af7, 0x015e68c1 },
+ { -0x3e80afda, -0x2944bbd8, -0x04a56359, -0x614d8ddd, 0x1919c644, -0x1c845afd, -0x4a6599fe, 0x21ce380d },
+ { 0x20066a38, -0x3e2ad7ae, 0x3570aef3, -0x6a9fc1ae, 0x226b8a4d, -0x7cd9a659, 0x1f8eedc9, 0x5dd68909 }
+ },
+ {
+ { -0x5acecf7c, 0x1d022591, -0x29d8f78e, -0x35d2b552, 0x2f0bfd20, -0x795ed47b, -0x528258b8, 0x56e6c439 },
+ { -0x402c37aa, -0x34537b22, -0x4ca00dbc, 0x1624c348, 0x5d9cad07, -0x48077236, -0x5d3d1418, 0x3b0e574d },
+ { 0x42bdbae6, -0x38fb00b7, -0x4d21e087, 0x5e21ade2, 0x5652fad8, -0x16a24c0d, -0x70f7143f, 0x0822b537 }
+ },
+},
+{
+ {
+ { 0x62730383, -0x1e480d6d, -0x143575d4, 0x4b5279ff, -0x402becec, -0x25038876, -0x638d9ef1, 0x7deb1014 },
+ { -0x70c78b8b, 0x51f04847, -0x634134c4, -0x4da2430c, -0x2660dfab, -0x6554edbc, 0x1c10a5d6, 0x2c709e6c },
+ { -0x78991186, -0x349d5096, 0x5553cd0e, 0x66cbec04, 0x0f0be4b5, 0x58800138, -0x09d31d16, 0x08e68e9f }
+ },
+ {
+ { 0x0ab8f2f9, 0x2f2d09d5, -0x3aa6dc21, -0x5346de73, 0x73766cb9, 0x4a8f3426, 0x38f719f5, 0x4cb13bd7 },
+ { 0x4bc130ad, 0x34ad500a, 0x3d0bd49c, -0x72c724b7, 0x500a89be, -0x5da3c268, -0x1145c4f7, 0x2f1f3f87 },
+ { -0x1aea49b6, -0x087b738b, -0x24b56fc8, -0x5a6afe46, 0x3f751b50, -0x3df2cec1, -0x3f51d118, 0x19a1e353 }
+ },
+ {
+ { -0x2a694243, -0x4bde8d33, -0x671103c0, -0x6c1fbabd, -0x4bbef64b, -0x604eacb9, 0x0266ae34, 0x736bd399 },
+ { -0x4505fa3d, 0x7d1c7560, -0x391aa19f, -0x4c1e5f60, -0x3f299b8d, -0x1cad68e8, -0x3df3cb7a, 0x41546b11 },
+ { -0x6ccb4c4c, -0x7aacd2b0, 0x60816573, 0x46fd114b, 0x425c8375, -0x33a0a0d0, -0x478054a4, 0x412295a2 }
+ },
+ {
+ { -0x1d6c153a, 0x2e655261, 0x2133acdb, -0x7ba56dfd, 0x7900996b, 0x460975cb, 0x195add80, 0x0760bb8d },
+ { -0x0a812917, 0x19c99b88, 0x6df8c825, 0x5393cb26, -0x4cf52d8d, 0x5cee3213, -0x4ad2d1cc, 0x14e153eb },
+ { -0x32197e76, 0x413e1a17, -0x12965f7c, 0x57156da9, 0x46caccb1, 0x2cbf268f, -0x3cc53a0e, 0x6b34be9b }
+ },
+ {
+ { 0x6571f2d3, 0x11fc6965, 0x530e737a, -0x393617bb, -0x2b01afcb, -0x1cc5185e, 0x2e6dd30b, 0x01b9c7b6 },
+ { 0x3a78c0b2, -0x0c20d09c, -0x0dd1fd84, 0x4c3e971e, 0x49c1b5a3, -0x1382e3a2, 0x0922dd2d, 0x2012c18f },
+ { 0x5ac89d29, -0x77f4aa1b, 0x45a0a763, 0x1483241f, -0x3d1893e1, 0x3d36efdf, 0x4e4bade8, 0x08af5b78 }
+ },
+ {
+ { -0x7633d3b5, -0x1d8ceb2e, -0x5d78e873, 0x4be4bd11, -0x05cc9b32, 0x18d528d6, -0x50267d92, 0x6423c1d5 },
+ { -0x77e0dacd, 0x283499dc, 0x779323b6, -0x62fada26, 0x673441f4, -0x76852205, 0x163a168d, 0x32b79d71 },
+ { -0x12034c96, -0x337a0727, 0x3746e5f9, 0x22bcc28f, -0x061a2c33, -0x1b621cc8, -0x3ec1d234, 0x480a5efb }
+ },
+ {
+ { 0x42ce221f, -0x499eb31c, 0x4c053928, 0x6e199dcc, -0x23e341fd, 0x663fb4a4, 0x691c8e06, 0x24b31d47 },
+ { 0x01622071, 0x0b51e70b, -0x74e2503b, 0x06b505cf, -0x10a55433, 0x2c6bb061, 0x0cb7bf31, 0x47aa2760 },
+ { -0x3fea073d, 0x2a541eed, 0x7c693f7c, 0x11a4fe7e, 0x4ea278d6, -0x0f5099ed, 0x14dda094, 0x545b585d }
+ },
+ {
+ { -0x1c4cde1f, 0x6204e4d0, 0x28ff1e95, 0x3baa637a, 0x5b99bd9e, 0x0b0ccffd, 0x64c8d071, 0x4d22dc3e },
+ { -0x5f2bc5f1, 0x67bf275e, 0x089beebe, -0x521971cc, -0x2b8618d2, 0x4289134c, 0x32ba5454, 0x0f62f9c3 },
+ { -0x29c4a0c7, -0x034b9a77, 0x57cbcf61, 0x5cae6a3f, -0x6ac505fb, -0x01453d2e, 0x36371436, 0x1c0fa01a }
+ },
+},
+{
+ {
+ { 0x54c53fae, -0x3ee11a18, 0x2b4f3ff4, 0x6a0b06c1, -0x1f49858e, 0x33540f80, -0x32f81c11, 0x15f18fc3 },
+ { -0x4383296e, -0x18ab8bb7, -0x1908c221, 0x0f9abeaa, 0x00837e29, 0x4af01ca7, 0x3f1bc183, 0x63ab1b5d },
+ { -0x4fd70b74, 0x32750763, 0x556a065f, 0x06020740, -0x3cb6a4a8, -0x2ac427ee, -0x79a0af73, 0x08706c9b }
+ },
+ {
+ { 0x38b41246, -0x3366e4bf, 0x6f9ac26b, 0x243b9c52, -0x48345443, -0x4610b6b3, -0x2f7d1300, 0x5fba433d },
+ { 0x3d343dff, -0x0c835d55, -0x7f5439e9, 0x1a8c6a2d, -0x2b330036, -0x71b61fcb, -0x455e2e47, 0x48b46bee },
+ { -0x366be530, -0x63b61cab, 0x74498f84, -0x468cb522, 0x66663e5c, 0x41c3fed0, -0x1718ef4d, 0x0ecfedf8 }
+ },
+ {
+ { -0x16bfc89e, 0x744f7463, -0x72033637, -0x08657212, 0x55e4cde3, 0x163a6496, -0x4d7b0bcb, 0x3b61788d },
+ { -0x632b8f27, 0x76430f9f, -0x5bd09ff8, -0x49d53365, 0x59adad5e, 0x1898297c, -0x4873af80, 0x7789dd2d },
+ { 0x0d6ef6b2, -0x4dddd7e7, 0x46ce4bfa, -0x56b5994e, 0x4f0b6cc7, 0x46c1a77a, -0x148cc731, 0x4236ccff }
+ },
+ {
+ { -0x2588820a, 0x3bd82dbf, 0x0b98369e, 0x71b177cc, -0x7af3c967, 0x1d0e8463, 0x48e2d1f1, 0x5a71945b },
+ { 0x0d55e274, -0x7b68bfb3, -0x3b52d4ad, 0x6c6663d9, -0x5256a8cc, -0x13d04f27, -0x324708c4, 0x2617e120 },
+ { 0x405b4b42, 0x6f203dd5, 0x10b24509, 0x327ec604, -0x53d577ba, -0x63cb8dd0, 0x11ffeb6a, 0x77de29fc }
+ },
+ {
+ { -0x13312d36, -0x7ca1ec71, -0x1569c466, -0x736150ed, -0x4de9f15a, -0x36a04040, -0x5278876e, 0x575e66f3 },
+ { -0x7c488758, -0x4f53a837, -0x28016ed4, 0x53cdcca9, -0x00e0a624, 0x61c2b854, -0x0f218254, 0x3a1a2cf0 },
+ { -0x377034c6, -0x667fc5d9, 0x275ec0b0, 0x345a6789, -0x0093d41b, 0x459789d0, 0x1e70a8b2, 0x62f88265 }
+ },
+ {
+ { 0x698a19e0, 0x6d822986, 0x74d78a71, -0x2367de1f, -0x0934e0b9, 0x41a85f31, -0x432563af, 0x352721c2 },
+ { 0x59ff1be4, 0x085ae2c7, 0x3b0e40b7, 0x149145c9, 0x7ff27379, -0x3b981806, -0x2a38c56b, 0x4eeecf0a },
+ { 0x213fc985, 0x48329952, 0x368a1746, 0x1087cf0d, 0x66c15aa5, -0x71ad9e4f, 0x2ed24c21, 0x2d5b2d84 }
+ },
+ {
+ { 0x196ac533, 0x5eb7d13d, -0x247f41d5, 0x377234ec, 0x7cf5ae24, -0x1ebb3004, -0x3bbe5314, 0x5226bcf9 },
+ { -0x142c212f, 0x02cfebd9, 0x39021974, -0x2ba4de89, -0x01cf5e49, 0x7576f813, -0x5cb1093e, 0x5691b6f9 },
+ { 0x23e5b547, 0x79ee6c72, -0x7ccf2987, 0x6f5f5076, 0x6d8adce9, -0x128c1e17, 0x1d8ccc03, 0x27c3da1e }
+ },
+ {
+ { 0x630ef9f6, 0x28302e71, 0x2b64cee0, -0x3d2b5dfd, 0x4b6292be, 0x09082030, -0x57d520e8, 0x5fca747a },
+ { 0x3fe24c74, 0x7eb9efb2, 0x1651be01, 0x3e50f49f, 0x21858dea, 0x3ea732dc, 0x5bb810f9, 0x17377bd7 },
+ { 0x5c258ea5, 0x232a03c3, 0x6bcb0cf1, -0x790dc5d4, 0x2e442166, 0x3dad8d0d, -0x548979d5, 0x04a8933c }
+ },
+},
+{
+ {
+ { -0x736c95b0, 0x69082b0e, -0x3e253a4a, -0x06365fcb, -0x3b2049cc, 0x6fb73e54, 0x1d2bc140, 0x4005419b },
+ { 0x22943dff, -0x2d39fb4a, 0x44cfb3a0, -0x43734132, -0x687f7988, 0x5d254ff3, 0x3b1ca6bf, 0x0fa3614f },
+ { -0x46417d10, -0x5ffc0143, 0x3a44ac90, 0x2089c1af, 0x1954fa8e, -0x07b6606f, -0x10bf54be, 0x1fba218a }
+ },
+ {
+ { 0x3e7b0194, 0x4f3e5704, 0x08daaf7f, -0x57e2c112, -0x6623210f, -0x37c63955, -0x00889e2b, 0x6c535d13 },
+ { -0x05370ac2, -0x54ab6bb8, 0x7ba63741, -0x7e091766, 0x6c2b5e01, 0x74fd6c7d, -0x573791be, 0x392e3aca },
+ { 0x3e8a35af, 0x4cbd34e9, 0x5887e816, 0x2e078144, -0x0d654f55, 0x19319c76, -0x2af53ec5, 0x25e17fe4 }
+ },
+ {
+ { 0x76f121a7, -0x6ea0800b, 0x2fcd87e3, -0x3cb5cdd9, 0x4d1be526, -0x3345d022, -0x76967665, 0x6bba828f },
+ { 0x1e04f676, 0x0a289bd7, -0x29bdf06b, 0x208e1c52, 0x34691fab, 0x5186d8b0, 0x2a9fb351, 0x25575144 },
+ { -0x6f01c6ff, -0x1d2e439a, -0x5f66852b, 0x4cb54a18, -0x507b9f2c, -0x68e296ec, 0x7f6b7be4, 0x559d504f }
+ },
+ {
+ { -0x092d9903, -0x63b76e19, 0x0307781b, 0x0744a19b, 0x6061e23b, -0x77c770e3, 0x354bd50e, 0x123ea6a3 },
+ { -0x4c14ab2b, -0x588c7c88, -0x5aaac384, 0x1d69d366, -0x06d7ff46, 0x0a26cf62, -0x7f81cde9, 0x01ab12d5 },
+ { 0x41e32d96, 0x118d1890, -0x27cea7b8, -0x46121c3e, -0x27cdba27, 0x1eab4271, -0x36e75eac, 0x4a3961e2 }
+ },
+ {
+ { -0x0cdcc0e2, 0x0327d644, 0x34fcf016, 0x499a260e, -0x0d254687, -0x7c4a58ea, -0x642beee1, 0x68aceead },
+ { -0x07194460, 0x71dc3be0, 0x7effe30a, -0x293107cc, -0x1ec5b896, -0x566dbda1, -0x04e2489d, 0x2cd6bce3 },
+ { -0x0c283df0, 0x38b4c90e, -0x4852fbf4, 0x308e6e24, -0x4818c1dd, 0x3860d9f1, -0x4af70a69, 0x595760d5 }
+ },
+ {
+ { -0x02fdd870, -0x77d53415, -0x3beea8a0, -0x7650ccfb, 0x7d3473f4, 0x65f492e3, 0x54515a2b, 0x2cb2c5df },
+ { 0x04aa6397, 0x6129bfe1, -0x5b580335, -0x7069fff8, 0x7d909458, 0x3f8bc089, -0x234d6e57, 0x709fa43e },
+ { 0x63fd2aca, -0x14f5a274, 0x2e694eff, -0x2dd43e9a, -0x07344fc6, 0x2723f36e, -0x0f37ece1, 0x70f029ec }
+ },
+ {
+ { 0x5e10b0b9, 0x2a6aafaa, -0x10fbe557, 0x78f0a370, -0x55c529e1, 0x773efb77, -0x58b4261f, 0x44eca5a2 },
+ { 0x2eed3e33, 0x461307b3, -0x5baa7e19, -0x51fbd0cd, 0x195f0366, -0x36bbb62d, 0x6c314858, 0x0b7d5d8a },
+ { 0x7b95d543, 0x25d44832, -0x5ccbf0e3, 0x70d38300, 0x60e1c52b, -0x21e3ace4, 0x2c7de9e4, 0x27222451 }
+ },
+ {
+ { 0x42a975fc, -0x40844476, -0x69525ca8, -0x73a3c689, -0x321255b8, -0x1d803891, -0x0943df5a, 0x19735fd7 },
+ { 0x49c5342e, 0x1abc92af, -0x4d190530, -0x001127ef, -0x0337b1d7, -0x105d7373, -0x5bb33abd, 0x11b5df18 },
+ { 0x42c84266, -0x1c546f30, 0x7f19547e, -0x147b71f1, 0x65a497b9, 0x2503a1d0, -0x6e2076a1, 0x0fef9111 }
+ },
+},
+{
+ {
+ { 0x5b1c16b7, 0x6ab5dcb8, 0x3c7b27a5, -0x6b3f0318, 0x735517be, -0x5b4ee3e6, -0x45f15056, 0x499238d0 },
+ { -0x54e39147, -0x4eaf835f, 0x16b687b3, -0x42bb70c2, 0x2c7a91ab, 0x3455fb7f, 0x2f2adec1, 0x7579229e },
+ { 0x7aba8b57, -0x130b91ae, -0x742e9b85, 0x15a08c47, 0x5f706fef, 0x7af1c6a6, -0x0fc5cf2b, 0x6345fa78 }
+ },
+ {
+ { -0x42270f5c, -0x6c2c3417, -0x02e88cfe, -0x24ead3e5, 0x7f17a875, 0x7dbddc6d, -0x70bd9102, 0x3e1a71cc },
+ { 0x1015e7a1, -0x20fd06a1, -0x564bfd9d, 0x790ec41d, 0x33ea1107, 0x4d3a0ea1, -0x1cc50737, 0x54f70be7 },
+ { -0x6f45429e, -0x37c35c1d, 0x0291c833, -0x7f121c99, -0x2c86ff3c, -0x377fc734, 0x1ec31fa1, 0x2c5fc023 }
+ },
+ {
+ { 0x02456e65, -0x3bdd1b2f, -0x352b846f, -0x78beb53f, -0x5d490023, 0x1592e2bb, -0x0a3deff1, 0x75d9d2bf },
+ { 0x17038b4f, -0x01456ee9, -0x3621107f, -0x1aedc8df, 0x5d0d8834, 0x1c97e4e7, 0x23dc3bc6, 0x68afae7a },
+ { 0x3626e81c, 0x5bd9b476, -0x435fd123, -0x766996ca, 0x61f077b3, 0x0a41193d, 0x00ce5471, 0x3097a242 }
+ },
+ {
+ { 0x6695c486, -0x5e9d18dc, 0x35a89607, 0x131d6334, -0x5f2ed5c9, 0x30521561, -0x59504c9d, 0x56704bad },
+ { -0x380747b4, 0x57427734, 0x01b270e9, -0x0ebe5ec2, -0x4b1a9b5a, 0x02d1adfe, -0x317c42b8, 0x4bb23d92 },
+ { 0x52f912b9, -0x5093b559, -0x27988f38, 0x5e665f6c, -0x5c3732a8, 0x4c35ac83, 0x10a58a7e, 0x2b7a29c0 }
+ },
+ {
+ { -0x40fff792, 0x33810a23, -0x18c90084, -0x50316da2, -0x1db6dd2c, 0x3d60e670, 0x4f96061b, 0x11ce9e71 },
+ { -0x2f3e313d, -0x3bff8089, -0x453b6d08, -0x72efdf4a, 0x7e69daaf, 0x32ec29d5, -0x626a0320, 0x59940875 },
+ { -0x27ea453f, 0x219ef713, 0x485be25c, -0x0ebeb9a3, 0x4e513c51, 0x6d5447cc, 0x5ef44393, 0x174926be }
+ },
+ {
+ { -0x6c15fdd2, 0x3ef5d415, 0x0ed0eed6, 0x5cbcc1a2, 0x07382c8c, -0x702db131, 0x06d8e1ad, 0x6fa42ead },
+ { -0x03a42a45, -0x4a214d07, -0x1e27ef1f, -0x6d2558d6, -0x48d5e3a7, -0x503b3024, 0x3fc22a24, 0x497d7881 },
+ { 0x1f73371f, -0x1d897db6, 0x4f5b6736, 0x7f7cf01c, 0x04fa46e7, 0x7e201fe3, 0x57808c96, 0x785a36a3 }
+ },
+ {
+ { 0x5d517bc3, 0x07044298, -0x519ac988, 0x6acd56c7, -0x67a5889d, 0x00a27983, -0x1aed99d5, 0x5167effa },
+ { 0x63014d2b, -0x7da04203, 0x6ca7578b, -0x37adc964, 0x5c0b5df0, 0x5b2fcd28, 0x58048c8f, 0x12ab214c },
+ { 0x0f53c4b6, -0x42b1561f, -0x7536e5ec, 0x1673dc5f, 0x2acc1aba, -0x5707e5b2, 0x24332a25, 0x33a92a79 }
+ },
+ {
+ { 0x218f2ada, 0x7ba95ba0, 0x330fb9ca, -0x300bdd79, 0x56c6d907, -0x2525b693, -0x0b4111ac, 0x5380c296 },
+ { 0x27996c02, -0x622e0b67, -0x1fb2e8ae, 0x0cb3b058, 0x7fd02c3e, 0x1f7e8896, -0x3474c14f, 0x2f964268 },
+ { 0x66898d0a, -0x62b0d8fc, 0x0aff3f7a, 0x3d098799, 0x67daba45, -0x2f610c9e, 0x7b1c669c, 0x7761455e }
+ },
+},
+};
+#elif defined(CURVED25519_128BIT)
+static const ge_precomp base[32][8] = {
+{
+ {
+ { 0x493c6f58c3b85, 0x0df7181c325f7, 0x0f50b0b3e4cb7, 0x5329385a44c32, 0x07cf9d3a33d4b },
+ { 0x03905d740913e, 0x0ba2817d673a2, 0x23e2827f4e67c, 0x133d2e0c21a34, 0x44fd2f9298f81 },
+ { 0x11205877aaa68, 0x479955893d579, 0x50d66309b67a0, 0x2d42d0dbee5ee, 0x6f117b689f0c6 },
+ },
+ {
+ { 0x4e7fc933c71d7, 0x2cf41feb6b244, 0x7581c0a7d1a76, 0x7172d534d32f0, 0x590c063fa87d2 },
+ { 0x1a56042b4d5a8, 0x189cc159ed153, 0x5b8deaa3cae04, 0x2aaf04f11b5d8, 0x6bb595a669c92 },
+ { 0x2a8b3a59b7a5f, 0x3abb359ef087f, 0x4f5a8c4db05af, 0x5b9a807d04205, 0x701af5b13ea50 },
+ },
+ {
+ { 0x5b0a84cee9730, 0x61d10c97155e4, 0x4059cc8096a10, 0x47a608da8014f, 0x7a164e1b9a80f },
+ { 0x11fe8a4fcd265, 0x7bcb8374faacc, 0x52f5af4ef4d4f, 0x5314098f98d10, 0x2ab91587555bd },
+ { 0x6933f0dd0d889, 0x44386bb4c4295, 0x3cb6d3162508c, 0x26368b872a2c6, 0x5a2826af12b9b },
+ },
+ {
+ { 0x351b98efc099f, 0x68fbfa4a7050e, 0x42a49959d971b, 0x393e51a469efd, 0x680e910321e58 },
+ { 0x6050a056818bf, 0x62acc1f5532bf, 0x28141ccc9fa25, 0x24d61f471e683, 0x27933f4c7445a },
+ { 0x3fbe9c476ff09, 0x0af6b982e4b42, 0x0ad1251ba78e5, 0x715aeedee7c88, 0x7f9d0cbf63553 },
+ },
+ {
+ { 0x2bc4408a5bb33, 0x078ebdda05442, 0x2ffb112354123, 0x375ee8df5862d, 0x2945ccf146e20 },
+ { 0x182c3a447d6ba, 0x22964e536eff2, 0x192821f540053, 0x2f9f19e788e5c, 0x154a7e73eb1b5 },
+ { 0x3dbf1812a8285, 0x0fa17ba3f9797, 0x6f69cb49c3820, 0x34d5a0db3858d, 0x43aabe696b3bb },
+ },
+ {
+ { 0x4eeeb77157131, 0x1201915f10741, 0x1669cda6c9c56, 0x45ec032db346d, 0x51e57bb6a2cc3 },
+ { 0x006b67b7d8ca4, 0x084fa44e72933, 0x1154ee55d6f8a, 0x4425d842e7390, 0x38b64c41ae417 },
+ { 0x4326702ea4b71, 0x06834376030b5, 0x0ef0512f9c380, 0x0f1a9f2512584, 0x10b8e91a9f0d6 },
+ },
+ {
+ { 0x25cd0944ea3bf, 0x75673b81a4d63, 0x150b925d1c0d4, 0x13f38d9294114, 0x461bea69283c9 },
+ { 0x72c9aaa3221b1, 0x267774474f74d, 0x064b0e9b28085, 0x3f04ef53b27c9, 0x1d6edd5d2e531 },
+ { 0x36dc801b8b3a2, 0x0e0a7d4935e30, 0x1deb7cecc0d7d, 0x053a94e20dd2c, 0x7a9fbb1c6a0f9 },
+ },
+ {
+ { 0x7596604dd3e8f, 0x6fc510e058b36, 0x3670c8db2cc0d, 0x297d899ce332f, 0x0915e76061bce },
+ { 0x75dedf39234d9, 0x01c36ab1f3c54, 0x0f08fee58f5da, 0x0e19613a0d637, 0x3a9024a1320e0 },
+ { 0x1f5d9c9a2911a, 0x7117994fafcf8, 0x2d8a8cae28dc5, 0x74ab1b2090c87, 0x26907c5c2ecc4 },
+ },
+},
+{
+ {
+ { 0x4dd0e632f9c1d, 0x2ced12622a5d9, 0x18de9614742da, 0x79ca96fdbb5d4, 0x6dd37d49a00ee },
+ { 0x3635449aa515e, 0x3e178d0475dab, 0x50b4712a19712, 0x2dcc2860ff4ad, 0x30d76d6f03d31 },
+ { 0x444172106e4c7, 0x01251afed2d88, 0x534fc9bed4f5a, 0x5d85a39cf5234, 0x10c697112e864 },
+ },
+ {
+ { 0x62aa08358c805, 0x46f440848e194, 0x447b771a8f52b, 0x377ba3269d31d, 0x03bf9baf55080 },
+ { 0x3c4277dbe5fde, 0x5a335afd44c92, 0x0c1164099753e, 0x70487006fe423, 0x25e61cabed66f },
+ { 0x3e128cc586604, 0x5968b2e8fc7e2, 0x049a3d5bd61cf, 0x116505b1ef6e6, 0x566d78634586e },
+ },
+ {
+ { 0x54285c65a2fd0, 0x55e62ccf87420, 0x46bb961b19044, 0x1153405712039, 0x14fba5f34793b },
+ { 0x7a49f9cc10834, 0x2b513788a22c6, 0x5ff4b6ef2395b, 0x2ec8e5af607bf, 0x33975bca5ecc3 },
+ { 0x746166985f7d4, 0x09939000ae79a, 0x5844c7964f97a, 0x13617e1f95b3d, 0x14829cea83fc5 },
+ },
+ {
+ { 0x70b2f4e71ecb8, 0x728148efc643c, 0x0753e03995b76, 0x5bf5fb2ab6767, 0x05fc3bc4535d7 },
+ { 0x37b8497dd95c2, 0x61549d6b4ffe8, 0x217a22db1d138, 0x0b9cf062eb09e, 0x2fd9c71e5f758 },
+ { 0x0b3ae52afdedd, 0x19da76619e497, 0x6fa0654d2558e, 0x78219d25e41d4, 0x373767475c651 },
+ },
+ {
+ { 0x095cb14246590, 0x002d82aa6ac68, 0x442f183bc4851, 0x6464f1c0a0644, 0x6bf5905730907 },
+ { 0x299fd40d1add9, 0x5f2de9a04e5f7, 0x7c0eebacc1c59, 0x4cca1b1f8290a, 0x1fbea56c3b18f },
+ { 0x778f1e1415b8a, 0x6f75874efc1f4, 0x28a694019027f, 0x52b37a96bdc4d, 0x02521cf67a635 },
+ },
+ {
+ { 0x46720772f5ee4, 0x632c0f359d622, 0x2b2092ba3e252, 0x662257c112680, 0x001753d9f7cd6 },
+ { 0x7ee0b0a9d5294, 0x381fbeb4cca27, 0x7841f3a3e639d, 0x676ea30c3445f, 0x3fa00a7e71382 },
+ { 0x1232d963ddb34, 0x35692e70b078d, 0x247ca14777a1f, 0x6db556be8fcd0, 0x12b5fe2fa048e },
+ },
+ {
+ { 0x37c26ad6f1e92, 0x46a0971227be5, 0x4722f0d2d9b4c, 0x3dc46204ee03a, 0x6f7e93c20796c },
+ { 0x0fbc496fce34d, 0x575be6b7dae3e, 0x4a31585cee609, 0x037e9023930ff, 0x749b76f96fb12 },
+ { 0x2f604aea6ae05, 0x637dc939323eb, 0x3fdad9b048d47, 0x0a8b0d4045af7, 0x0fcec10f01e02 },
+ },
+ {
+ { 0x2d29dc4244e45, 0x6927b1bc147be, 0x0308534ac0839, 0x4853664033f41, 0x413779166feab },
+ { 0x558a649fe1e44, 0x44635aeefcc89, 0x1ff434887f2ba, 0x0f981220e2d44, 0x4901aa7183c51 },
+ { 0x1b7548c1af8f0, 0x7848c53368116, 0x01b64e7383de9, 0x109fbb0587c8f, 0x41bb887b726d1 },
+ },
+},
+{
+ {
+ { 0x34c597c6691ae, 0x7a150b6990fc4, 0x52beb9d922274, 0x70eed7164861a, 0x0a871e070c6a9 },
+ { 0x07d44744346be, 0x282b6a564a81d, 0x4ed80f875236b, 0x6fbbe1d450c50, 0x4eb728c12fcdb },
+ { 0x1b5994bbc8989, 0x74b7ba84c0660, 0x75678f1cdaeb8, 0x23206b0d6f10c, 0x3ee7300f2685d },
+ },
+ {
+ { 0x27947841e7518, 0x32c7388dae87f, 0x414add3971be9, 0x01850832f0ef1, 0x7d47c6a2cfb89 },
+ { 0x255e49e7dd6b7, 0x38c2163d59eba, 0x3861f2a005845, 0x2e11e4ccbaec9, 0x1381576297912 },
+ { 0x2d0148ef0d6e0, 0x3522a8de787fb, 0x2ee055e74f9d2, 0x64038f6310813, 0x148cf58d34c9e },
+ },
+ {
+ { 0x72f7d9ae4756d, 0x7711e690ffc4a, 0x582a2355b0d16, 0x0dccfe885b6b4, 0x278febad4eaea },
+ { 0x492f67934f027, 0x7ded0815528d4, 0x58461511a6612, 0x5ea2e50de1544, 0x3ff2fa1ebd5db },
+ { 0x2681f8c933966, 0x3840521931635, 0x674f14a308652, 0x3bd9c88a94890, 0x4104dd02fe9c6 },
+ },
+ {
+ { 0x14e06db096ab8, 0x1219c89e6b024, 0x278abd486a2db, 0x240b292609520, 0x0165b5a48efca },
+ { 0x2bf5e1124422a, 0x673146756ae56, 0x14ad99a87e830, 0x1eaca65b080fd, 0x2c863b00afaf5 },
+ { 0x0a474a0846a76, 0x099a5ef981e32, 0x2a8ae3c4bbfe6, 0x45c34af14832c, 0x591b67d9bffec },
+ },
+ {
+ { 0x1b3719f18b55d, 0x754318c83d337, 0x27c17b7919797, 0x145b084089b61, 0x489b4f8670301 },
+ { 0x70d1c80b49bfa, 0x3d57e7d914625, 0x3c0722165e545, 0x5e5b93819e04f, 0x3de02ec7ca8f7 },
+ { 0x2102d3aeb92ef, 0x68c22d50c3a46, 0x42ea89385894e, 0x75f9ebf55f38c, 0x49f5fbba496cb },
+ },
+ {
+ { 0x5628c1e9c572e, 0x598b108e822ab, 0x55d8fae29361a, 0x0adc8d1a97b28, 0x06a1a6c288675 },
+ { 0x49a108a5bcfd4, 0x6178c8e7d6612, 0x1f03473710375, 0x73a49614a6098, 0x5604a86dcbfa6 },
+ { 0x0d1d47c1764b6, 0x01c08316a2e51, 0x2b3db45c95045, 0x1634f818d300c, 0x20989e89fe274 },
+ },
+ {
+ { 0x4278b85eaec2e, 0x0ef59657be2ce, 0x72fd169588770, 0x2e9b205260b30, 0x730b9950f7059 },
+ { 0x777fd3a2dcc7f, 0x594a9fb124932, 0x01f8e80ca15f0, 0x714d13cec3269, 0x0403ed1d0ca67 },
+ { 0x32d35874ec552, 0x1f3048df1b929, 0x300d73b179b23, 0x6e67be5a37d0b, 0x5bd7454308303 },
+ },
+ {
+ { 0x4932115e7792a, 0x457b9bbb930b8, 0x68f5d8b193226, 0x4164e8f1ed456, 0x5bb7db123067f },
+ { 0x2d19528b24cc2, 0x4ac66b8302ff3, 0x701c8d9fdad51, 0x6c1b35c5b3727, 0x133a78007380a },
+ { 0x1f467c6ca62be, 0x2c4232a5dc12c, 0x7551dc013b087, 0x0690c11b03bcd, 0x740dca6d58f0e },
+ },
+},
+{
+ {
+ { 0x28c570478433c, 0x1d8502873a463, 0x7641e7eded49c, 0x1ecedd54cf571, 0x2c03f5256c2b0 },
+ { 0x0ee0752cfce4e, 0x660dd8116fbe9, 0x55167130fffeb, 0x1c682b885955c, 0x161d25fa963ea },
+ { 0x718757b53a47d, 0x619e18b0f2f21, 0x5fbdfe4c1ec04, 0x5d798c81ebb92, 0x699468bdbd96b },
+ },
+ {
+ { 0x53de66aa91948, 0x045f81a599b1b, 0x3f7a8bd214193, 0x71d4da412331a, 0x293e1c4e6c4a2 },
+ { 0x72f46f4dafecf, 0x2948ffadef7a3, 0x11ecdfdf3bc04, 0x3c2e98ffeed25, 0x525219a473905 },
+ { 0x6134b925112e1, 0x6bb942bb406ed, 0x070c445c0dde2, 0x411d822c4d7a3, 0x5b605c447f032 },
+ },
+ {
+ { 0x1fec6f0e7f04c, 0x3cebc692c477d, 0x077986a19a95e, 0x6eaaaa1778b0f, 0x2f12fef4cc5ab },
+ { 0x5805920c47c89, 0x1924771f9972c, 0x38bbddf9fc040, 0x1f7000092b281, 0x24a76dcea8aeb },
+ { 0x522b2dfc0c740, 0x7e8193480e148, 0x33fd9a04341b9, 0x3c863678a20bc, 0x5e607b2518a43 },
+ },
+ {
+ { 0x4431ca596cf14, 0x015da7c801405, 0x03c9b6f8f10b5, 0x0346922934017, 0x201f33139e457 },
+ { 0x31d8f6cdf1818, 0x1f86c4b144b16, 0x39875b8d73e9d, 0x2fbf0d9ffa7b3, 0x5067acab6ccdd },
+ { 0x27f6b08039d51, 0x4802f8000dfaa, 0x09692a062c525, 0x1baea91075817, 0x397cba8862460 },
+ },
+ {
+ { 0x5c3fbc81379e7, 0x41bbc255e2f02, 0x6a3f756998650, 0x1297fd4e07c42, 0x771b4022c1e1c },
+ { 0x13093f05959b2, 0x1bd352f2ec618, 0x075789b88ea86, 0x61d1117ea48b9, 0x2339d320766e6 },
+ { 0x5d986513a2fa7, 0x63f3a99e11b0f, 0x28a0ecfd6b26d, 0x53b6835e18d8f, 0x331a189219971 },
+ },
+ {
+ { 0x12f3a9d7572af, 0x10d00e953c4ca, 0x603df116f2f8a, 0x33dc276e0e088, 0x1ac9619ff649a },
+ { 0x66f45fb4f80c6, 0x3cc38eeb9fea2, 0x107647270db1f, 0x710f1ea740dc8, 0x31167c6b83bdf },
+ { 0x33842524b1068, 0x77dd39d30fe45, 0x189432141a0d0, 0x088fe4eb8c225, 0x612436341f08b },
+ },
+ {
+ { 0x349e31a2d2638, 0x0137a7fa6b16c, 0x681ae92777edc, 0x222bfc5f8dc51, 0x1522aa3178d90 },
+ { 0x541db874e898d, 0x62d80fb841b33, 0x03e6ef027fa97, 0x7a03c9e9633e8, 0x46ebe2309e5ef },
+ { 0x02f5369614938, 0x356e5ada20587, 0x11bc89f6bf902, 0x036746419c8db, 0x45fe70f505243 },
+ },
+ {
+ { 0x24920c8951491, 0x107ec61944c5e, 0x72752e017c01f, 0x122b7dda2e97a, 0x16619f6db57a2 },
+ { 0x075a6960c0b8c, 0x6dde1c5e41b49, 0x42e3f516da341, 0x16a03fda8e79e, 0x428d1623a0e39 },
+ { 0x74a4401a308fd, 0x06ed4b9558109, 0x746f1f6a08867, 0x4636f5c6f2321, 0x1d81592d60bd3 },
+ },
+},
+{
+ {
+ { 0x5b69f7b85c5e8, 0x17a2d175650ec, 0x4cc3e6dbfc19e, 0x73e1d3873be0e, 0x3a5f6d51b0af8 },
+ { 0x68756a60dac5f, 0x55d757b8aec26, 0x3383df45f80bd, 0x6783f8c9f96a6, 0x20234a7789ecd },
+ { 0x20db67178b252, 0x73aa3da2c0eda, 0x79045c01c70d3, 0x1b37b15251059, 0x7cd682353cffe },
+ },
+ {
+ { 0x5cd6068acf4f3, 0x3079afc7a74cc, 0x58097650b64b4, 0x47fabac9c4e99, 0x3ef0253b2b2cd },
+ { 0x1a45bd887fab6, 0x65748076dc17c, 0x5b98000aa11a8, 0x4a1ecc9080974, 0x2838c8863bdc0 },
+ { 0x3b0cf4a465030, 0x022b8aef57a2d, 0x2ad0677e925ad, 0x4094167d7457a, 0x21dcb8a606a82 },
+ },
+ {
+ { 0x500fabe7731ba, 0x7cc53c3113351, 0x7cf65fe080d81, 0x3c5d966011ba1, 0x5d840dbf6c6f6 },
+ { 0x004468c9d9fc8, 0x5da8554796b8c, 0x3b8be70950025, 0x6d5892da6a609, 0x0bc3d08194a31 },
+ { 0x6380d309fe18b, 0x4d73c2cb8ee0d, 0x6b882adbac0b6, 0x36eabdddd4cbe, 0x3a4276232ac19 },
+ },
+ {
+ { 0x0c172db447ecb, 0x3f8c505b7a77f, 0x6a857f97f3f10, 0x4fcc0567fe03a, 0x0770c9e824e1a },
+ { 0x2432c8a7084fa, 0x47bf73ca8a968, 0x1639176262867, 0x5e8df4f8010ce, 0x1ff177cea16de },
+ { 0x1d99a45b5b5fd, 0x523674f2499ec, 0x0f8fa26182613, 0x58f7398048c98, 0x39f264fd41500 },
+ },
+ {
+ { 0x34aabfe097be1, 0x43bfc03253a33, 0x29bc7fe91b7f3, 0x0a761e4844a16, 0x65c621272c35f },
+ { 0x53417dbe7e29c, 0x54573827394f5, 0x565eea6f650dd, 0x42050748dc749, 0x1712d73468889 },
+ { 0x389f8ce3193dd, 0x2d424b8177ce5, 0x073fa0d3440cd, 0x139020cd49e97, 0x22f9800ab19ce },
+ },
+ {
+ { 0x29fdd9a6efdac, 0x7c694a9282840, 0x6f7cdeee44b3a, 0x55a3207b25cc3, 0x4171a4d38598c },
+ { 0x2368a3e9ef8cb, 0x454aa08e2ac0b, 0x490923f8fa700, 0x372aa9ea4582f, 0x13f416cd64762 },
+ { 0x758aa99c94c8c, 0x5f6001700ff44, 0x7694e488c01bd, 0x0d5fde948eed6, 0x508214fa574bd },
+ },
+ {
+ { 0x215bb53d003d6, 0x1179e792ca8c3, 0x1a0e96ac840a2, 0x22393e2bb3ab6, 0x3a7758a4c86cb },
+ { 0x269153ed6fe4b, 0x72a23aef89840, 0x052be5299699c, 0x3a5e5ef132316, 0x22f960ec6faba },
+ { 0x111f693ae5076, 0x3e3bfaa94ca90, 0x445799476b887, 0x24a0912464879, 0x5d9fd15f8de7f },
+ },
+ {
+ { 0x44d2aeed7521e, 0x50865d2c2a7e4, 0x2705b5238ea40, 0x46c70b25d3b97, 0x3bc187fa47eb9 },
+ { 0x408d36d63727f, 0x5faf8f6a66062, 0x2bb892da8de6b, 0x769d4f0c7e2e6, 0x332f35914f8fb },
+ { 0x70115ea86c20c, 0x16d88da24ada8, 0x1980622662adf, 0x501ebbc195a9d, 0x450d81ce906fb },
+ },
+},
+{
+ {
+ { 0x4d8961cae743f, 0x6bdc38c7dba0e, 0x7d3b4a7e1b463, 0x0844bdee2adf3, 0x4cbad279663ab },
+ { 0x3b6a1a6205275, 0x2e82791d06dcf, 0x23d72caa93c87, 0x5f0b7ab68aaf4, 0x2de25d4ba6345 },
+ { 0x19024a0d71fcd, 0x15f65115f101a, 0x4e99067149708, 0x119d8d1cba5af, 0x7d7fbcefe2007 },
+ },
+ {
+ { 0x45dc5f3c29094, 0x3455220b579af, 0x070c1631e068a, 0x26bc0630e9b21, 0x4f9cd196dcd8d },
+ { 0x71e6a266b2801, 0x09aae73e2df5d, 0x40dd8b219b1a3, 0x546fb4517de0d, 0x5975435e87b75 },
+ { 0x297d86a7b3768, 0x4835a2f4c6332, 0x070305f434160, 0x183dd014e56ae, 0x7ccdd084387a0 },
+ },
+ {
+ { 0x484186760cc93, 0x7435665533361, 0x02f686336b801, 0x5225446f64331, 0x3593ca848190c },
+ { 0x6422c6d260417, 0x212904817bb94, 0x5a319deb854f5, 0x7a9d4e060da7d, 0x428bd0ed61d0c },
+ { 0x3189a5e849aa7, 0x6acbb1f59b242, 0x7f6ef4753630c, 0x1f346292a2da9, 0x27398308da2d6 },
+ },
+ {
+ { 0x10e4c0a702453, 0x4daafa37bd734, 0x49f6bdc3e8961, 0x1feffdcecdae6, 0x572c2945492c3 },
+ { 0x38d28435ed413, 0x4064f19992858, 0x7680fbef543cd, 0x1aadd83d58d3c, 0x269597aebe8c3 },
+ { 0x7c745d6cd30be, 0x27c7755df78ef, 0x1776833937fa3, 0x5405116441855, 0x7f985498c05bc },
+ },
+ {
+ { 0x615520fbf6363, 0x0b9e9bf74da6a, 0x4fe8308201169, 0x173f76127de43, 0x30f2653cd69b1 },
+ { 0x1ce889f0be117, 0x36f6a94510709, 0x7f248720016b4, 0x1821ed1e1cf91, 0x76c2ec470a31f },
+ { 0x0c938aac10c85, 0x41b64ed797141, 0x1beb1c1185e6d, 0x1ed5490600f07, 0x2f1273f159647 },
+ },
+ {
+ { 0x08bd755a70bc0, 0x49e3a885ce609, 0x16585881b5ad6, 0x3c27568d34f5e, 0x38ac1997edc5f },
+ { 0x1fc7c8ae01e11, 0x2094d5573e8e7, 0x5ca3cbbf549d2, 0x4f920ecc54143, 0x5d9e572ad85b6 },
+ { 0x6b517a751b13b, 0x0cfd370b180cc, 0x5377925d1f41a, 0x34e56566008a2, 0x22dfcd9cbfe9e },
+ },
+ {
+ { 0x459b4103be0a1, 0x59a4b3f2d2add, 0x7d734c8bb8eeb, 0x2393cbe594a09, 0x0fe9877824cde },
+ { 0x3d2e0c30d0cd9, 0x3f597686671bb, 0x0aa587eb63999, 0x0e3c7b592c619, 0x6b2916c05448c },
+ { 0x334d10aba913b, 0x045cdb581cfdb, 0x5e3e0553a8f36, 0x50bb3041effb2, 0x4c303f307ff00 },
+ },
+ {
+ { 0x403580dd94500, 0x48df77d92653f, 0x38a9fe3b349ea, 0x0ea89850aafe1, 0x416b151ab706a },
+ { 0x23bd617b28c85, 0x6e72ee77d5a61, 0x1a972ff174dde, 0x3e2636373c60f, 0x0d61b8f78b2ab },
+ { 0x0d7efe9c136b0, 0x1ab1c89640ad5, 0x55f82aef41f97, 0x46957f317ed0d, 0x191a2af74277e },
+ },
+},
+{
+ {
+ { 0x62b434f460efb, 0x294c6c0fad3fc, 0x68368937b4c0f, 0x5c9f82910875b, 0x237e7dbe00545 },
+ { 0x6f74bc53c1431, 0x1c40e5dbbd9c2, 0x6c8fb9cae5c97, 0x4845c5ce1b7da, 0x7e2e0e450b5cc },
+ { 0x575ed6701b430, 0x4d3e17fa20026, 0x791fc888c4253, 0x2f1ba99078ac1, 0x71afa699b1115 },
+ },
+ {
+ { 0x23c1c473b50d6, 0x3e7671de21d48, 0x326fa5547a1e8, 0x50e4dc25fafd9, 0x00731fbc78f89 },
+ { 0x66f9b3953b61d, 0x555f4283cccb9, 0x7dd67fb1960e7, 0x14707a1affed4, 0x021142e9c2b1c },
+ { 0x0c71848f81880, 0x44bd9d8233c86, 0x6e8578efe5830, 0x4045b6d7041b5, 0x4c4d6f3347e15 },
+ },
+ {
+ { 0x4ddfc988f1970, 0x4f6173ea365e1, 0x645daf9ae4588, 0x7d43763db623b, 0x38bf9500a88f9 },
+ { 0x7eccfc17d1fc9, 0x4ca280782831e, 0x7b8337db1d7d6, 0x5116def3895fb, 0x193fddaaa7e47 },
+ { 0x2c93c37e8876f, 0x3431a28c583fa, 0x49049da8bd879, 0x4b4a8407ac11c, 0x6a6fb99ebf0d4 },
+ },
+ {
+ { 0x122b5b6e423c6, 0x21e50dff1ddd6, 0x73d76324e75c0, 0x588485495418e, 0x136fda9f42c5e },
+ { 0x6c1bb560855eb, 0x71f127e13ad48, 0x5c6b304905aec, 0x3756b8e889bc7, 0x75f76914a3189 },
+ { 0x4dfb1a305bdd1, 0x3b3ff05811f29, 0x6ed62283cd92e, 0x65d1543ec52e1, 0x022183510be8d },
+ },
+ {
+ { 0x2710143307a7f, 0x3d88fb48bf3ab, 0x249eb4ec18f7a, 0x136115dff295f, 0x1387c441fd404 },
+ { 0x766385ead2d14, 0x0194f8b06095e, 0x08478f6823b62, 0x6018689d37308, 0x6a071ce17b806 },
+ { 0x3c3d187978af8, 0x7afe1c88276ba, 0x51df281c8ad68, 0x64906bda4245d, 0x3171b26aaf1ed },
+ },
+ {
+ { 0x5b7d8b28a47d1, 0x2c2ee149e34c1, 0x776f5629afc53, 0x1f4ea50fc49a9, 0x6c514a6334424 },
+ { 0x7319097564ca8, 0x1844ebc233525, 0x21d4543fdeee1, 0x1ad27aaff1bd2, 0x221fd4873cf08 },
+ { 0x2204f3a156341, 0x537414065a464, 0x43c0c3bedcf83, 0x5557e706ea620, 0x48daa596fb924 },
+ },
+ {
+ { 0x61d5dc84c9793, 0x47de83040c29e, 0x189deb26507e7, 0x4d4e6fadc479a, 0x58c837fa0e8a7 },
+ { 0x28e665ca59cc7, 0x165c715940dd9, 0x0785f3aa11c95, 0x57b98d7e38469, 0x676dd6fccad84 },
+ { 0x1688596fc9058, 0x66f6ad403619f, 0x4d759a87772ef, 0x7856e6173bea4, 0x1c4f73f2c6a57 },
+ },
+ {
+ { 0x6706efc7c3484, 0x6987839ec366d, 0x0731f95cf7f26, 0x3ae758ebce4bc, 0x70459adb7daf6 },
+ { 0x24fbd305fa0bb, 0x40a98cc75a1cf, 0x78ce1220a7533, 0x6217a10e1c197, 0x795ac80d1bf64 },
+ { 0x1db4991b42bb3, 0x469605b994372, 0x631e3715c9a58, 0x7e9cfefcf728f, 0x5fe162848ce21 },
+ },
+},
+{
+ {
+ { 0x1852d5d7cb208, 0x60d0fbe5ce50f, 0x5a1e246e37b75, 0x51aee05ffd590, 0x2b44c043677da },
+ { 0x1214fe194961a, 0x0e1ae39a9e9cb, 0x543c8b526f9f7, 0x119498067e91d, 0x4789d446fc917 },
+ { 0x487ab074eb78e, 0x1d33b5e8ce343, 0x13e419feb1b46, 0x2721f565de6a4, 0x60c52eef2bb9a },
+ },
+ {
+ { 0x3c5c27cae6d11, 0x36a9491956e05, 0x124bac9131da6, 0x3b6f7de202b5d, 0x70d77248d9b66 },
+ { 0x589bc3bfd8bf1, 0x6f93e6aa3416b, 0x4c0a3d6c1ae48, 0x55587260b586a, 0x10bc9c312ccfc },
+ { 0x2e84b3ec2a05b, 0x69da2f03c1551, 0x23a174661a67b, 0x209bca289f238, 0x63755bd3a976f },
+ },
+ {
+ { 0x7101897f1acb7, 0x3d82cb77b07b8, 0x684083d7769f5, 0x52b28472dce07, 0x2763751737c52 },
+ { 0x7a03e2ad10853, 0x213dcc6ad36ab, 0x1a6e240d5bdd6, 0x7c24ffcf8fedf, 0x0d8cc1c48bc16 },
+ { 0x402d36eb419a9, 0x7cef68c14a052, 0x0f1255bc2d139, 0x373e7d431186a, 0x70c2dd8a7ad16 },
+ },
+ {
+ { 0x4967db8ed7e13, 0x15aeed02f523a, 0x6149591d094bc, 0x672f204c17006, 0x32b8613816a53 },
+ { 0x194509f6fec0e, 0x528d8ca31acac, 0x7826d73b8b9fa, 0x24acb99e0f9b3, 0x2e0fac6363948 },
+ { 0x7f7bee448cd64, 0x4e10f10da0f3c, 0x3936cb9ab20e9, 0x7a0fc4fea6cd0, 0x4179215c735a4 },
+ },
+ {
+ { 0x633b9286bcd34, 0x6cab3badb9c95, 0x74e387edfbdfa, 0x14313c58a0fd9, 0x31fa85662241c },
+ { 0x094e7d7dced2a, 0x068fa738e118e, 0x41b640a5fee2b, 0x6bb709df019d4, 0x700344a30cd99 },
+ { 0x26c422e3622f4, 0x0f3066a05b5f0, 0x4e2448f0480a6, 0x244cde0dbf095, 0x24bb2312a9952 },
+ },
+ {
+ { 0x00c2af5f85c6b, 0x0609f4cf2883f, 0x6e86eb5a1ca13, 0x68b44a2efccd1, 0x0d1d2af9ffeb5 },
+ { 0x0ed1732de67c3, 0x308c369291635, 0x33ef348f2d250, 0x004475ea1a1bb, 0x0fee3e871e188 },
+ { 0x28aa132621edf, 0x42b244caf353b, 0x66b064cc2e08a, 0x6bb20020cbdd3, 0x16acd79718531 },
+ },
+ {
+ { 0x1c6c57887b6ad, 0x5abf21fd7592b, 0x50bd41253867a, 0x3800b71273151, 0x164ed34b18161 },
+ { 0x772af2d9b1d3d, 0x6d486448b4e5b, 0x2ce58dd8d18a8, 0x1849f67503c8b, 0x123e0ef6b9302 },
+ { 0x6d94c192fe69a, 0x5475222a2690f, 0x693789d86b8b3, 0x1f5c3bdfb69dc, 0x78da0fc61073f },
+ },
+ {
+ { 0x780f1680c3a94, 0x2a35d3cfcd453, 0x005e5cdc7ddf8, 0x6ee888078ac24, 0x054aa4b316b38 },
+ { 0x15d28e52bc66a, 0x30e1e0351cb7e, 0x30a2f74b11f8c, 0x39d120cd7de03, 0x2d25deeb256b1 },
+ { 0x0468d19267cb8, 0x38cdca9b5fbf9, 0x1bbb05c2ca1e2, 0x3b015758e9533, 0x134610a6ab7da },
+ },
+},
+{
+ {
+ { 0x265e777d1f515, 0x0f1f54c1e39a5, 0x2f01b95522646, 0x4fdd8db9dde6d, 0x654878cba97cc },
+ { 0x38ec78df6b0fe, 0x13caebea36a22, 0x5ebc6e54e5f6a, 0x32804903d0eb8, 0x2102fdba2b20d },
+ { 0x6e405055ce6a1, 0x5024a35a532d3, 0x1f69054daf29d, 0x15d1d0d7a8bd5, 0x0ad725db29ecb },
+ },
+ {
+ { 0x7bc0c9b056f85, 0x51cfebffaffd8, 0x44abbe94df549, 0x7ecbbd7e33121, 0x4f675f5302399 },
+ { 0x267b1834e2457, 0x6ae19c378bb88, 0x7457b5ed9d512, 0x3280d783d05fb, 0x4aefcffb71a03 },
+ { 0x536360415171e, 0x2313309077865, 0x251444334afbc, 0x2b0c3853756e8, 0x0bccbb72a2a86 },
+ },
+ {
+ { 0x55e4c50fe1296, 0x05fdd13efc30d, 0x1c0c6c380e5ee, 0x3e11de3fb62a8, 0x6678fd69108f3 },
+ { 0x6962feab1a9c8, 0x6aca28fb9a30b, 0x56db7ca1b9f98, 0x39f58497018dd, 0x4024f0ab59d6b },
+ { 0x6fa31636863c2, 0x10ae5a67e42b0, 0x27abbf01fda31, 0x380a7b9e64fbc, 0x2d42e2108ead4 },
+ },
+ {
+ { 0x17b0d0f537593, 0x16263c0c9842e, 0x4ab827e4539a4, 0x6370ddb43d73a, 0x420bf3a79b423 },
+ { 0x5131594dfd29b, 0x3a627e98d52fe, 0x1154041855661, 0x19175d09f8384, 0x676b2608b8d2d },
+ { 0x0ba651c5b2b47, 0x5862363701027, 0x0c4d6c219c6db, 0x0f03dff8658de, 0x745d2ffa9c0cf },
+ },
+ {
+ { 0x6df5721d34e6a, 0x4f32f767a0c06, 0x1d5abeac76e20, 0x41ce9e104e1e4, 0x06e15be54c1dc },
+ { 0x25a1e2bc9c8bd, 0x104c8f3b037ea, 0x405576fa96c98, 0x2e86a88e3876f, 0x1ae23ceb960cf },
+ { 0x25d871932994a, 0x6b9d63b560b6e, 0x2df2814c8d472, 0x0fbbee20aa4ed, 0x58ded861278ec },
+ },
+ {
+ { 0x35ba8b6c2c9a8, 0x1dea58b3185bf, 0x4b455cd23bbbe, 0x5ec19c04883f8, 0x08ba696b531d5 },
+ { 0x73793f266c55c, 0x0b988a9c93b02, 0x09b0ea32325db, 0x37cae71c17c5e, 0x2ff39de85485f },
+ { 0x53eeec3efc57a, 0x2fa9fe9022efd, 0x699c72c138154, 0x72a751ebd1ff8, 0x120633b4947cf },
+ },
+ {
+ { 0x531474912100a, 0x5afcdf7c0d057, 0x7a9e71b788ded, 0x5ef708f3b0c88, 0x07433be3cb393 },
+ { 0x4987891610042, 0x79d9d7f5d0172, 0x3c293013b9ec4, 0x0c2b85f39caca, 0x35d30a99b4d59 },
+ { 0x144c05ce997f4, 0x4960b8a347fef, 0x1da11f15d74f7, 0x54fac19c0fead, 0x2d873ede7af6d },
+ },
+ {
+ { 0x202e14e5df981, 0x2ea02bc3eb54c, 0x38875b2883564, 0x1298c513ae9dd, 0x0543618a01600 },
+ { 0x2316443373409, 0x5de95503b22af, 0x699201beae2df, 0x3db5849ff737a, 0x2e773654707fa },
+ { 0x2bdf4974c23c1, 0x4b3b9c8d261bd, 0x26ae8b2a9bc28, 0x3068210165c51, 0x4b1443362d079 },
+ },
+},
+{
+ {
+ { 0x454e91c529ccb, 0x24c98c6bf72cf, 0x0486594c3d89a, 0x7ae13a3d7fa3c, 0x17038418eaf66 },
+ { 0x4b7c7b66e1f7a, 0x4bea185efd998, 0x4fabc711055f8, 0x1fb9f7836fe38, 0x582f446752da6 },
+ { 0x17bd320324ce4, 0x51489117898c6, 0x1684d92a0410b, 0x6e4d90f78c5a7, 0x0c2a1c4bcda28 },
+ },
+ {
+ { 0x4814869bd6945, 0x7b7c391a45db8, 0x57316ac35b641, 0x641e31de9096a, 0x5a6a9b30a314d },
+ { 0x5c7d06f1f0447, 0x7db70f80b3a49, 0x6cb4a3ec89a78, 0x43be8ad81397d, 0x7c558bd1c6f64 },
+ { 0x41524d396463d, 0x1586b449e1a1d, 0x2f17e904aed8a, 0x7e1d2861d3c8e, 0x0404a5ca0afba },
+ },
+ {
+ { 0x49e1b2a416fd1, 0x51c6a0b316c57, 0x575a59ed71bdc, 0x74c021a1fec1e, 0x39527516e7f8e },
+ { 0x740070aa743d6, 0x16b64cbdd1183, 0x23f4b7b32eb43, 0x319aba58235b3, 0x46395bfdcadd9 },
+ { 0x7db2d1a5d9a9c, 0x79a200b85422f, 0x355bfaa71dd16, 0x00b77ea5f78aa, 0x76579a29e822d },
+ },
+ {
+ { 0x4b51352b434f2, 0x1327bd01c2667, 0x434d73b60c8a1, 0x3e0daa89443ba, 0x02c514bb2a277 },
+ { 0x68e7e49c02a17, 0x45795346fe8b6, 0x089306c8f3546, 0x6d89f6b2f88f6, 0x43a384dc9e05b },
+ { 0x3d5da8bf1b645, 0x7ded6a96a6d09, 0x6c3494fee2f4d, 0x02c989c8b6bd4, 0x1160920961548 },
+ },
+ {
+ { 0x05616369b4dcd, 0x4ecab86ac6f47, 0x3c60085d700b2, 0x0213ee10dfcea, 0x2f637d7491e6e },
+ { 0x5166929dacfaa, 0x190826b31f689, 0x4f55567694a7d, 0x705f4f7b1e522, 0x351e125bc5698 },
+ { 0x49b461af67bbe, 0x75915712c3a96, 0x69a67ef580c0d, 0x54d38ef70cffc, 0x7f182d06e7ce2 },
+ },
+ {
+ { 0x54b728e217522, 0x69a90971b0128, 0x51a40f2a963a3, 0x10be9ac12a6bf, 0x44acc043241c5 },
+ { 0x48e64ab0168ec, 0x2a2bdb8a86f4f, 0x7343b6b2d6929, 0x1d804aa8ce9a3, 0x67d4ac8c343e9 },
+ { 0x56bbb4f7a5777, 0x29230627c238f, 0x5ad1a122cd7fb, 0x0dea56e50e364, 0x556d1c8312ad7 },
+ },
+ {
+ { 0x06756b11be821, 0x462147e7bb03e, 0x26519743ebfe0, 0x782fc59682ab5, 0x097abe38cc8c7 },
+ { 0x740e30c8d3982, 0x7c2b47f4682fd, 0x5cd91b8c7dc1c, 0x77fa790f9e583, 0x746c6c6d1d824 },
+ { 0x1c9877ea52da4, 0x2b37b83a86189, 0x733af49310da5, 0x25e81161c04fb, 0x577e14a34bee8 },
+ },
+ {
+ { 0x6cebebd4dd72b, 0x340c1e442329f, 0x32347ffd1a93f, 0x14a89252cbbe0, 0x705304b8fb009 },
+ { 0x268ac61a73b0a, 0x206f234bebe1c, 0x5b403a7cbebe8, 0x7a160f09f4135, 0x60fa7ee96fd78 },
+ { 0x51d354d296ec6, 0x7cbf5a63b16c7, 0x2f50bb3cf0c14, 0x1feb385cac65a, 0x21398e0ca1635 },
+ },
+},
+{
+ {
+ { 0x0aaf9b4b75601, 0x26b91b5ae44f3, 0x6de808d7ab1c8, 0x6a769675530b0, 0x1bbfb284e98f7 },
+ { 0x5058a382b33f3, 0x175a91816913e, 0x4f6cdb96b8ae8, 0x17347c9da81d2, 0x5aa3ed9d95a23 },
+ { 0x777e9c7d96561, 0x28e58f006ccac, 0x541bbbb2cac49, 0x3e63282994cec, 0x4a07e14e5e895 },
+ },
+ {
+ { 0x358cdc477a49b, 0x3cc88fe02e481, 0x721aab7f4e36b, 0x0408cc9469953, 0x50af7aed84afa },
+ { 0x412cb980df999, 0x5e78dd8ee29dc, 0x171dff68c575d, 0x2015dd2f6ef49, 0x3f0bac391d313 },
+ { 0x7de0115f65be5, 0x4242c21364dc9, 0x6b75b64a66098, 0x0033c0102c085, 0x1921a316baebd },
+ },
+ {
+ { 0x2ad9ad9f3c18b, 0x5ec1638339aeb, 0x5703b6559a83b, 0x3fa9f4d05d612, 0x7b049deca062c },
+ { 0x22f7edfb870fc, 0x569eed677b128, 0x30937dcb0a5af, 0x758039c78ea1b, 0x6458df41e273a },
+ { 0x3e37a35444483, 0x661fdb7d27b99, 0x317761dd621e4, 0x7323c30026189, 0x6093dccbc2950 },
+ },
+ {
+ { 0x6eebe6084034b, 0x6cf01f70a8d7b, 0x0b41a54c6670a, 0x6c84b99bb55db, 0x6e3180c98b647 },
+ { 0x39a8585e0706d, 0x3167ce72663fe, 0x63d14ecdb4297, 0x4be21dcf970b8, 0x57d1ea084827a },
+ { 0x2b6e7a128b071, 0x5b27511755dcf, 0x08584c2930565, 0x68c7bda6f4159, 0x363e999ddd97b },
+ },
+ {
+ { 0x048dce24baec6, 0x2b75795ec05e3, 0x3bfa4c5da6dc9, 0x1aac8659e371e, 0x231f979bc6f9b },
+ { 0x043c135ee1fc4, 0x2a11c9919f2d5, 0x6334cc25dbacd, 0x295da17b400da, 0x48ee9b78693a0 },
+ { 0x1de4bcc2af3c6, 0x61fc411a3eb86, 0x53ed19ac12ec0, 0x209dbc6b804e0, 0x079bfa9b08792 },
+ },
+ {
+ { 0x1ed80a2d54245, 0x70efec72a5e79, 0x42151d42a822d, 0x1b5ebb6d631e8, 0x1ef4fb1594706 },
+ { 0x03a51da300df4, 0x467b52b561c72, 0x4d5920210e590, 0x0ca769e789685, 0x038c77f684817 },
+ { 0x65ee65b167bec, 0x052da19b850a9, 0x0408665656429, 0x7ab39596f9a4c, 0x575ee92a4a0bf },
+ },
+ {
+ { 0x6bc450aa4d801, 0x4f4a6773b0ba8, 0x6241b0b0ebc48, 0x40d9c4f1d9315, 0x200a1e7e382f5 },
+ { 0x080908a182fcf, 0x0532913b7ba98, 0x3dccf78c385c3, 0x68002dd5eaba9, 0x43d4e7112cd3f },
+ { 0x5b967eaf93ac5, 0x360acca580a31, 0x1c65fd5c6f262, 0x71c7f15c2ecab, 0x050eca52651e4 },
+ },
+ {
+ { 0x4397660e668ea, 0x7c2a75692f2f5, 0x3b29e7e6c66ef, 0x72ba658bcda9a, 0x6151c09fa131a },
+ { 0x31ade453f0c9c, 0x3dfee07737868, 0x611ecf7a7d411, 0x2637e6cbd64f6, 0x4b0ee6c21c58f },
+ { 0x55c0dfdf05d96, 0x405569dcf475e, 0x05c5c277498bb, 0x18588d95dc389, 0x1fef24fa800f0 },
+ },
+},
+{
+ {
+ { 0x2aff530976b86, 0x0d85a48c0845a, 0x796eb963642e0, 0x60bee50c4b626, 0x28005fe6c8340 },
+ { 0x653fb1aa73196, 0x607faec8306fa, 0x4e85ec83e5254, 0x09f56900584fd, 0x544d49292fc86 },
+ { 0x7ba9f34528688, 0x284a20fb42d5d, 0x3652cd9706ffe, 0x6fd7baddde6b3, 0x72e472930f316 },
+ },
+ {
+ { 0x3f635d32a7627, 0x0cbecacde00fe, 0x3411141eaa936, 0x21c1e42f3cb94, 0x1fee7f000fe06 },
+ { 0x5208c9781084f, 0x16468a1dc24d2, 0x7bf780ac540a8, 0x1a67eced75301, 0x5a9d2e8c2733a },
+ { 0x305da03dbf7e5, 0x1228699b7aeca, 0x12a23b2936bc9, 0x2a1bda56ae6e9, 0x00f94051ee040 },
+ },
+ {
+ { 0x793bb07af9753, 0x1e7b6ecd4fafd, 0x02c7b1560fb43, 0x2296734cc5fb7, 0x47b7ffd25dd40 },
+ { 0x56b23c3d330b2, 0x37608e360d1a6, 0x10ae0f3c8722e, 0x086d9b618b637, 0x07d79c7e8beab },
+ { 0x3fb9cbc08dd12, 0x75c3dd85370ff, 0x47f06fe2819ac, 0x5db06ab9215ed, 0x1c3520a35ea64 },
+ },
+ {
+ { 0x06f40216bc059, 0x3a2579b0fd9b5, 0x71c26407eec8c, 0x72ada4ab54f0b, 0x38750c3b66d12 },
+ { 0x253a6bccba34a, 0x427070433701a, 0x20b8e58f9870e, 0x337c861db00cc, 0x1c3d05775d0ee },
+ { 0x6f1409422e51a, 0x7856bbece2d25, 0x13380a72f031c, 0x43e1080a7f3ba, 0x0621e2c7d3304 },
+ },
+ {
+ { 0x61796b0dbf0f3, 0x73c2f9c32d6f5, 0x6aa8ed1537ebe, 0x74e92c91838f4, 0x5d8e589ca1002 },
+ { 0x060cc8259838d, 0x038d3f35b95f3, 0x56078c243a923, 0x2de3293241bb2, 0x0007d6097bd3a },
+ { 0x71d950842a94b, 0x46b11e5c7d817, 0x5478bbecb4f0d, 0x7c3054b0a1c5d, 0x1583d7783c1cb },
+ },
+ {
+ { 0x34704cc9d28c7, 0x3dee598b1f200, 0x16e1c98746d9e, 0x4050b7095afdf, 0x4958064e83c55 },
+ { 0x6a2ef5da27ae1, 0x28aace02e9d9d, 0x02459e965f0e8, 0x7b864d3150933, 0x252a5f2e81ed8 },
+ { 0x094265066e80d, 0x0a60f918d61a5, 0x0444bf7f30fde, 0x1c40da9ed3c06, 0x079c170bd843b },
+ },
+ {
+ { 0x6cd50c0d5d056, 0x5b7606ae779ba, 0x70fbd226bdda1, 0x5661e53391ff9, 0x6768c0d7317b8 },
+ { 0x6ece464fa6fff, 0x3cc40bca460a0, 0x6e3a90afb8d0c, 0x5801abca11228, 0x6dec05e34ac9f },
+ { 0x625e5f155c1b3, 0x4f32f6f723296, 0x5ac980105efce, 0x17a61165eee36, 0x51445e14ddcd5 },
+ },
+ {
+ { 0x147ab2bbea455, 0x1f240f2253126, 0x0c3de9e314e89, 0x21ea5a4fca45f, 0x12e990086e4fd },
+ { 0x02b4b3b144951, 0x5688977966aea, 0x18e176e399ffd, 0x2e45c5eb4938b, 0x13186f31e3929 },
+ { 0x496b37fdfbb2e, 0x3c2439d5f3e21, 0x16e60fe7e6a4d, 0x4d7ef889b621d, 0x77b2e3f05d3e9 },
+ },
+},
+{
+ {
+ { 0x0639c12ddb0a4, 0x6180490cd7ab3, 0x3f3918297467c, 0x74568be1781ac, 0x07a195152e095 },
+ { 0x7a9c59c2ec4de, 0x7e9f09e79652d, 0x6a3e422f22d86, 0x2ae8e3b836c8b, 0x63b795fc7ad32 },
+ { 0x68f02389e5fc8, 0x059f1bc877506, 0x504990e410cec, 0x09bd7d0feaee2, 0x3e8fe83d032f0 },
+ },
+ {
+ { 0x04c8de8efd13c, 0x1c67c06e6210e, 0x183378f7f146a, 0x64352ceaed289, 0x22d60899a6258 },
+ { 0x315b90570a294, 0x60ce108a925f1, 0x6eff61253c909, 0x003ef0e2d70b0, 0x75ba3b797fac4 },
+ { 0x1dbc070cdd196, 0x16d8fb1534c47, 0x500498183fa2a, 0x72f59c423de75, 0x0904d07b87779 },
+ },
+ {
+ { 0x22d6648f940b9, 0x197a5a1873e86, 0x207e4c41a54bc, 0x5360b3b4bd6d0, 0x6240aacebaf72 },
+ { 0x61fd4ddba919c, 0x7d8e991b55699, 0x61b31473cc76c, 0x7039631e631d6, 0x43e2143fbc1dd },
+ { 0x4749c5ba295a0, 0x37946fa4b5f06, 0x724c5ab5a51f1, 0x65633789dd3f3, 0x56bdaf238db40 },
+ },
+ {
+ { 0x0d36cc19d3bb2, 0x6ec4470d72262, 0x6853d7018a9ae, 0x3aa3e4dc2c8eb, 0x03aa31507e1e5 },
+ { 0x2b9e3f53533eb, 0x2add727a806c5, 0x56955c8ce15a3, 0x18c4f070a290e, 0x1d24a86d83741 },
+ { 0x47648ffd4ce1f, 0x60a9591839e9d, 0x424d5f38117ab, 0x42cc46912c10e, 0x43b261dc9aeb4 },
+ },
+ {
+ { 0x13d8b6c951364, 0x4c0017e8f632a, 0x53e559e53f9c4, 0x4b20146886eea, 0x02b4d5e242940 },
+ { 0x31e1988bb79bb, 0x7b82f46b3bcab, 0x0f7a8ce827b41, 0x5e15816177130, 0x326055cf5b276 },
+ { 0x155cb28d18df2, 0x0c30d9ca11694, 0x2090e27ab3119, 0x208624e7a49b6, 0x27a6c809ae5d3 },
+ },
+ {
+ { 0x4270ac43d6954, 0x2ed4cd95659a5, 0x75c0db37528f9, 0x2ccbcfd2c9234, 0x221503603d8c2 },
+ { 0x6ebcd1f0db188, 0x74ceb4b7d1174, 0x7d56168df4f5c, 0x0bf79176fd18a, 0x2cb67174ff60a },
+ { 0x6cdf9390be1d0, 0x08e519c7e2b3d, 0x253c3d2a50881, 0x21b41448e333d, 0x7b1df4b73890f },
+ },
+ {
+ { 0x6221807f8f58c, 0x3fa92813a8be5, 0x6da98c38d5572, 0x01ed95554468f, 0x68698245d352e },
+ { 0x2f2e0b3b2a224, 0x0c56aa22c1c92, 0x5fdec39f1b278, 0x4c90af5c7f106, 0x61fcef2658fc5 },
+ { 0x15d852a18187a, 0x270dbb59afb76, 0x7db120bcf92ab, 0x0e7a25d714087, 0x46cf4c473daf0 },
+ },
+ {
+ { 0x46ea7f1498140, 0x70725690a8427, 0x0a73ae9f079fb, 0x2dd924461c62b, 0x1065aae50d8cc },
+ { 0x525ed9ec4e5f9, 0x022d20660684c, 0x7972b70397b68, 0x7a03958d3f965, 0x29387bcd14eb5 },
+ { 0x44525df200d57, 0x2d7f94ce94385, 0x60d00c170ecb7, 0x38b0503f3d8f0, 0x69a198e64f1ce },
+ },
+},
+{
+ {
+ { 0x14434dcc5caed, 0x2c7909f667c20, 0x61a839d1fb576, 0x4f23800cabb76, 0x25b2697bd267f },
+ { 0x2b2e0d91a78bc, 0x3990a12ccf20c, 0x141c2e11f2622, 0x0dfcefaa53320, 0x7369e6a92493a },
+ { 0x73ffb13986864, 0x3282bb8f713ac, 0x49ced78f297ef, 0x6697027661def, 0x1420683db54e4 },
+ },
+ {
+ { 0x6bb6fc1cc5ad0, 0x532c8d591669d, 0x1af794da86c33, 0x0e0e9d86d24d3, 0x31e83b4161d08 },
+ { 0x0bd1e249dd197, 0x00bcb1820568f, 0x2eab1718830d4, 0x396fd816997e6, 0x60b63bebf508a },
+ { 0x0c7129e062b4f, 0x1e526415b12fd, 0x461a0fd27923d, 0x18badf670a5b7, 0x55cf1eb62d550 },
+ },
+ {
+ { 0x6b5e37df58c52, 0x3bcf33986c60e, 0x44fb8835ceae7, 0x099dec18e71a4, 0x1a56fbaa62ba0 },
+ { 0x1101065c23d58, 0x5aa1290338b0f, 0x3157e9e2e7421, 0x0ea712017d489, 0x669a656457089 },
+ { 0x66b505c9dc9ec, 0x774ef86e35287, 0x4d1d944c0955e, 0x52e4c39d72b20, 0x13c4836799c58 },
+ },
+ {
+ { 0x4fb6a5d8bd080, 0x58ae34908589b, 0x3954d977baf13, 0x413ea597441dc, 0x50bdc87dc8e5b },
+ { 0x25d465ab3e1b9, 0x0f8fe27ec2847, 0x2d6e6dbf04f06, 0x3038cfc1b3276, 0x66f80c93a637b },
+ { 0x537836edfe111, 0x2be02357b2c0d, 0x6dcee58c8d4f8, 0x2d732581d6192, 0x1dd56444725fd },
+ },
+ {
+ { 0x7e60008bac89a, 0x23d5c387c1852, 0x79e5df1f533a8, 0x2e6f9f1c5f0cf, 0x3a3a450f63a30 },
+ { 0x47ff83362127d, 0x08e39af82b1f4, 0x488322ef27dab, 0x1973738a2a1a4, 0x0e645912219f7 },
+ { 0x72f31d8394627, 0x07bd294a200f1, 0x665be00e274c6, 0x43de8f1b6368b, 0x318c8d9393a9a },
+ },
+ {
+ { 0x69e29ab1dd398, 0x30685b3c76bac, 0x565cf37f24859, 0x57b2ac28efef9, 0x509a41c325950 },
+ { 0x45d032afffe19, 0x12fe49b6cde4e, 0x21663bc327cf1, 0x18a5e4c69f1dd, 0x224c7c679a1d5 },
+ { 0x06edca6f925e9, 0x68c8363e677b8, 0x60cfa25e4fbcf, 0x1c4c17609404e, 0x05bff02328a11 },
+ },
+ {
+ { 0x1a0dd0dc512e4, 0x10894bf5fcd10, 0x52949013f9c37, 0x1f50fba4735c7, 0x576277cdee01a },
+ { 0x2137023cae00b, 0x15a3599eb26c6, 0x0687221512b3c, 0x253cb3a0824e9, 0x780b8cc3fa2a4 },
+ { 0x38abc234f305f, 0x7a280bbc103de, 0x398a836695dfe, 0x3d0af41528a1a, 0x5ff418726271b },
+ },
+ {
+ { 0x347e813b69540, 0x76864c21c3cbb, 0x1e049dbcd74a8, 0x5b4d60f93749c, 0x29d4db8ca0a0c },
+ { 0x6080c1789db9d, 0x4be7cef1ea731, 0x2f40d769d8080, 0x35f7d4c44a603, 0x106a03dc25a96 },
+ { 0x50aaf333353d0, 0x4b59a613cbb35, 0x223dfc0e19a76, 0x77d1e2bb2c564, 0x4ab38a51052cb },
+ },
+},
+{
+ {
+ { 0x7d1ef5fddc09c, 0x7beeaebb9dad9, 0x058d30ba0acfb, 0x5cd92eab5ae90, 0x3041c6bb04ed2 },
+ { 0x42b256768d593, 0x2e88459427b4f, 0x02b3876630701, 0x34878d405eae5, 0x29cdd1adc088a },
+ { 0x2f2f9d956e148, 0x6b3e6ad65c1fe, 0x5b00972b79e5d, 0x53d8d234c5daf, 0x104bbd6814049 },
+ },
+ {
+ { 0x59a5fd67ff163, 0x3a998ead0352b, 0x083c95fa4af9a, 0x6fadbfc01266f, 0x204f2a20fb072 },
+ { 0x0fd3168f1ed67, 0x1bb0de7784a3e, 0x34bcb78b20477, 0x0a4a26e2e2182, 0x5be8cc57092a7 },
+ { 0x43b3d30ebb079, 0x357aca5c61902, 0x5b570c5d62455, 0x30fb29e1e18c7, 0x2570fb17c2791 },
+ },
+ {
+ { 0x6a9550bb8245a, 0x511f20a1a2325, 0x29324d7239bee, 0x3343cc37516c4, 0x241c5f91de018 },
+ { 0x2367f2cb61575, 0x6c39ac04d87df, 0x6d4958bd7e5bd, 0x566f4638a1532, 0x3dcb65ea53030 },
+ { 0x0172940de6caa, 0x6045b2e67451b, 0x56c07463efcb3, 0x0728b6bfe6e91, 0x08420edd5fcdf },
+ },
+ {
+ { 0x0c34e04f410ce, 0x344edc0d0a06b, 0x6e45486d84d6d, 0x44e2ecb3863f5, 0x04d654f321db8 },
+ { 0x720ab8362fa4a, 0x29c4347cdd9bf, 0x0e798ad5f8463, 0x4fef18bcb0bfe, 0x0d9a53efbc176 },
+ { 0x5c116ddbdb5d5, 0x6d1b4bba5abcf, 0x4d28a48a5537a, 0x56b8e5b040b99, 0x4a7a4f2618991 },
+ },
+ {
+ { 0x3b291af372a4b, 0x60e3028fe4498, 0x2267bca4f6a09, 0x719eec242b243, 0x4a96314223e0e },
+ { 0x718025fb15f95, 0x68d6b8371fe94, 0x3804448f7d97c, 0x42466fe784280, 0x11b50c4cddd31 },
+ { 0x0274408a4ffd6, 0x7d382aedb34dd, 0x40acfc9ce385d, 0x628bb99a45b1e, 0x4f4bce4dce6bc },
+ },
+ {
+ { 0x2616ec49d0b6f, 0x1f95d8462e61c, 0x1ad3e9b9159c6, 0x79ba475a04df9, 0x3042cee561595 },
+ { 0x7ce5ae2242584, 0x2d25eb153d4e3, 0x3a8f3d09ba9c9, 0x0f3690d04eb8e, 0x73fcdd14b71c0 },
+ { 0x67079449bac41, 0x5b79c4621484f, 0x61069f2156b8d, 0x0eb26573b10af, 0x389e740c9a9ce },
+ },
+ {
+ { 0x578f6570eac28, 0x644f2339c3937, 0x66e47b7956c2c, 0x34832fe1f55d0, 0x25c425e5d6263 },
+ { 0x4b3ae34dcb9ce, 0x47c691a15ac9f, 0x318e06e5d400c, 0x3c422d9f83eb1, 0x61545379465a6 },
+ { 0x606a6f1d7de6e, 0x4f1c0c46107e7, 0x229b1dcfbe5d8, 0x3acc60a7b1327, 0x6539a08915484 },
+ },
+ {
+ { 0x4dbd414bb4a19, 0x7930849f1dbb8, 0x329c5a466caf0, 0x6c824544feb9b, 0x0f65320ef019b },
+ { 0x21f74c3d2f773, 0x024b88d08bd3a, 0x6e678cf054151, 0x43631272e747c, 0x11c5e4aac5cd1 },
+ { 0x6d1b1cafde0c6, 0x462c76a303a90, 0x3ca4e693cff9b, 0x3952cd45786fd, 0x4cabc7bdec330 },
+ },
+},
+{
+ {
+ { 0x7788f3f78d289, 0x5942809b3f811, 0x5973277f8c29c, 0x010f93bc5fe67, 0x7ee498165acb2 },
+ { 0x69624089c0a2e, 0x0075fc8e70473, 0x13e84ab1d2313, 0x2c10bedf6953b, 0x639b93f0321c8 },
+ { 0x508e39111a1c3, 0x290120e912f7a, 0x1cbf464acae43, 0x15373e9576157, 0x0edf493c85b60 },
+ },
+ {
+ { 0x7c4d284764113, 0x7fefebf06acec, 0x39afb7a824100, 0x1b48e47e7fd65, 0x04c00c54d1dfa },
+ { 0x48158599b5a68, 0x1fd75bc41d5d9, 0x2d9fc1fa95d3c, 0x7da27f20eba11, 0x403b92e3019d4 },
+ { 0x22f818b465cf8, 0x342901dff09b8, 0x31f595dc683cd, 0x37a57745fd682, 0x355bb12ab2617 },
+ },
+ {
+ { 0x1dac75a8c7318, 0x3b679d5423460, 0x6b8fcb7b6400e, 0x6c73783be5f9d, 0x7518eaf8e052a },
+ { 0x664cc7493bbf4, 0x33d94761874e3, 0x0179e1796f613, 0x1890535e2867d, 0x0f9b8132182ec },
+ { 0x059c41b7f6c32, 0x79e8706531491, 0x6c747643cb582, 0x2e20c0ad494e4, 0x47c3871bbb175 },
+ },
+ {
+ { 0x65d50c85066b0, 0x6167453361f7c, 0x06ba3818bb312, 0x6aff29baa7522, 0x08fea02ce8d48 },
+ { 0x4539771ec4f48, 0x7b9318badca28, 0x70f19afe016c5, 0x4ee7bb1608d23, 0x00b89b8576469 },
+ { 0x5dd7668deead0, 0x4096d0ba47049, 0x6275997219114, 0x29bda8a67e6ae, 0x473829a74f75d },
+ },
+ {
+ { 0x1533aad3902c9, 0x1dde06b11e47b, 0x784bed1930b77, 0x1c80a92b9c867, 0x6c668b4d44e4d },
+ { 0x2da754679c418, 0x3164c31be105a, 0x11fac2b98ef5f, 0x35a1aaf779256, 0x2078684c4833c },
+ { 0x0cf217a78820c, 0x65024e7d2e769, 0x23bb5efdda82a, 0x19fd4b632d3c6, 0x7411a6054f8a4 },
+ },
+ {
+ { 0x2e53d18b175b4, 0x33e7254204af3, 0x3bcd7d5a1c4c5, 0x4c7c22af65d0f, 0x1ec9a872458c3 },
+ { 0x59d32b99dc86d, 0x6ac075e22a9ac, 0x30b9220113371, 0x27fd9a638966e, 0x7c136574fb813 },
+ { 0x6a4d400a2509b, 0x041791056971c, 0x655d5866e075c, 0x2302bf3e64df8, 0x3add88a5c7cd6 },
+ },
+ {
+ { 0x298d459393046, 0x30bfecb3d90b8, 0x3d9b8ea3df8d6, 0x3900e96511579, 0x61ba1131a406a },
+ { 0x15770b635dcf2, 0x59ecd83f79571, 0x2db461c0b7fbd, 0x73a42a981345f, 0x249929fccc879 },
+ { 0x0a0f116959029, 0x5974fd7b1347a, 0x1e0cc1c08edad, 0x673bdf8ad1f13, 0x5620310cbbd8e },
+ },
+ {
+ { 0x6b5f477e285d6, 0x4ed91ec326cc8, 0x6d6537503a3fd, 0x626d3763988d5, 0x7ec846f3658ce },
+ { 0x193434934d643, 0x0d4a2445eaa51, 0x7d0708ae76fe0, 0x39847b6c3c7e1, 0x37676a2a4d9d9 },
+ { 0x68f3f1da22ec7, 0x6ed8039a2736b, 0x2627ee04c3c75, 0x6ea90a647e7d1, 0x6daaf723399b9 },
+ },
+},
+{
+ {
+ { 0x304bfacad8ea2, 0x502917d108b07, 0x043176ca6dd0f, 0x5d5158f2c1d84, 0x2b5449e58eb3b },
+ { 0x27562eb3dbe47, 0x291d7b4170be7, 0x5d1ca67dfa8e1, 0x2a88061f298a2, 0x1304e9e71627d },
+ { 0x014d26adc9cfe, 0x7f1691ba16f13, 0x5e71828f06eac, 0x349ed07f0fffc, 0x4468de2d7c2dd },
+ },
+ {
+ { 0x2d8c6f86307ce, 0x6286ba1850973, 0x5e9dcb08444d4, 0x1a96a543362b2, 0x5da6427e63247 },
+ { 0x3355e9419469e, 0x1847bb8ea8a37, 0x1fe6588cf9b71, 0x6b1c9d2db6b22, 0x6cce7c6ffb44b },
+ { 0x4c688deac22ca, 0x6f775c3ff0352, 0x565603ee419bb, 0x6544456c61c46, 0x58f29abfe79f2 },
+ },
+ {
+ { 0x264bf710ecdf6, 0x708c58527896b, 0x42ceae6c53394, 0x4381b21e82b6a, 0x6af93724185b4 },
+ { 0x6cfab8de73e68, 0x3e6efced4bd21, 0x0056609500dbe, 0x71b7824ad85df, 0x577629c4a7f41 },
+ { 0x0024509c6a888, 0x2696ab12e6644, 0x0cca27f4b80d8, 0x0c7c1f11b119e, 0x701f25bb0caec },
+ },
+ {
+ { 0x0f6d97cbec113, 0x4ce97fb7c93a3, 0x139835a11281b, 0x728907ada9156, 0x720a5bc050955 },
+ { 0x0b0f8e4616ced, 0x1d3c4b50fb875, 0x2f29673dc0198, 0x5f4b0f1830ffa, 0x2e0c92bfbdc40 },
+ { 0x709439b805a35, 0x6ec48557f8187, 0x08a4d1ba13a2c, 0x076348a0bf9ae, 0x0e9b9cbb144ef },
+ },
+ {
+ { 0x69bd55db1beee, 0x6e14e47f731bd, 0x1a35e47270eac, 0x66f225478df8e, 0x366d44191cfd3 },
+ { 0x2d48ffb5720ad, 0x57b7f21a1df77, 0x5550effba0645, 0x5ec6a4098a931, 0x221104eb3f337 },
+ { 0x41743f2bc8c14, 0x796b0ad8773c7, 0x29fee5cbb689b, 0x122665c178734, 0x4167a4e6bc593 },
+ },
+ {
+ { 0x62665f8ce8fee, 0x29d101ac59857, 0x4d93bbba59ffc, 0x17b7897373f17, 0x34b33370cb7ed },
+ { 0x39d2876f62700, 0x001cecd1d6c87, 0x7f01a11747675, 0x2350da5a18190, 0x7938bb7e22552 },
+ { 0x591ee8681d6cc, 0x39db0b4ea79b8, 0x202220f380842, 0x2f276ba42e0ac, 0x1176fc6e2dfe6 },
+ },
+ {
+ { 0x0e28949770eb8, 0x5559e88147b72, 0x35e1e6e63ef30, 0x35b109aa7ff6f, 0x1f6a3e54f2690 },
+ { 0x76cd05b9c619b, 0x69654b0901695, 0x7a53710b77f27, 0x79a1ea7d28175, 0x08fc3a4c677d5 },
+ { 0x4c199d30734ea, 0x6c622cb9acc14, 0x5660a55030216, 0x068f1199f11fb, 0x4f2fad0116b90 },
+ },
+ {
+ { 0x4d91db73bb638, 0x55f82538112c5, 0x6d85a279815de, 0x740b7b0cd9cf9, 0x3451995f2944e },
+ { 0x6b24194ae4e54, 0x2230afded8897, 0x23412617d5071, 0x3d5d30f35969b, 0x445484a4972ef },
+ { 0x2fcd09fea7d7c, 0x296126b9ed22a, 0x4a171012a05b2, 0x1db92c74d5523, 0x10b89ca604289 },
+ },
+},
+{
+ {
+ { 0x141be5a45f06e, 0x5adb38becaea7, 0x3fd46db41f2bb, 0x6d488bbb5ce39, 0x17d2d1d9ef0d4 },
+ { 0x147499718289c, 0x0a48a67e4c7ab, 0x30fbc544bafe3, 0x0c701315fe58a, 0x20b878d577b75 },
+ { 0x2af18073f3e6a, 0x33aea420d24fe, 0x298008bf4ff94, 0x3539171db961e, 0x72214f63cc65c },
+ },
+ {
+ { 0x5b7b9f43b29c9, 0x149ea31eea3b3, 0x4be7713581609, 0x2d87960395e98, 0x1f24ac855a154 },
+ { 0x37f405307a693, 0x2e5e66cf2b69c, 0x5d84266ae9c53, 0x5e4eb7de853b9, 0x5fdf48c58171c },
+ { 0x608328e9505aa, 0x22182841dc49a, 0x3ec96891d2307, 0x2f363fff22e03, 0x00ba739e2ae39 },
+ },
+ {
+ { 0x426f5ea88bb26, 0x33092e77f75c8, 0x1a53940d819e7, 0x1132e4f818613, 0x72297de7d518d },
+ { 0x698de5c8790d6, 0x268b8545beb25, 0x6d2648b96fedf, 0x47988ad1db07c, 0x03283a3e67ad7 },
+ { 0x41dc7be0cb939, 0x1b16c66100904, 0x0a24c20cbc66d, 0x4a2e9efe48681, 0x05e1296846271 },
+ },
+ {
+ { 0x7bbc8242c4550, 0x59a06103b35b7, 0x7237e4af32033, 0x726421ab3537a, 0x78cf25d38258c },
+ { 0x2eeb32d9c495a, 0x79e25772f9750, 0x6d747833bbf23, 0x6cdd816d5d749, 0x39c00c9c13698 },
+ { 0x66b8e31489d68, 0x573857e10e2b5, 0x13be816aa1472, 0x41964d3ad4bf8, 0x006b52076b3ff },
+ },
+ {
+ { 0x37e16b9ce082d, 0x1882f57853eb9, 0x7d29eacd01fc5, 0x2e76a59b5e715, 0x7de2e9561a9f7 },
+ { 0x0cfe19d95781c, 0x312cc621c453c, 0x145ace6da077c, 0x0912bef9ce9b8, 0x4d57e3443bc76 },
+ { 0x0d4f4b6a55ecb, 0x7ebb0bb733bce, 0x7ba6a05200549, 0x4f6ede4e22069, 0x6b2a90af1a602 },
+ },
+ {
+ { 0x3f3245bb2d80a, 0x0e5f720f36efd, 0x3b9cccf60c06d, 0x084e323f37926, 0x465812c8276c2 },
+ { 0x3f4fc9ae61e97, 0x3bc07ebfa2d24, 0x3b744b55cd4a0, 0x72553b25721f3, 0x5fd8f4e9d12d3 },
+ { 0x3beb22a1062d9, 0x6a7063b82c9a8, 0x0a5a35dc197ed, 0x3c80c06a53def, 0x05b32c2b1cb16 },
+ },
+ {
+ { 0x4a42c7ad58195, 0x5c8667e799eff, 0x02e5e74c850a1, 0x3f0db614e869a, 0x31771a4856730 },
+ { 0x05eccd24da8fd, 0x580bbfdf07918, 0x7e73586873c6a, 0x74ceddf77f93e, 0x3b5556a37b471 },
+ { 0x0c524e14dd482, 0x283457496c656, 0x0ad6bcfb6cd45, 0x375d1e8b02414, 0x4fc079d27a733 },
+ },
+ {
+ { 0x48b440c86c50d, 0x139929cca3b86, 0x0f8f2e44cdf2f, 0x68432117ba6b2, 0x241170c2bae3c },
+ { 0x138b089bf2f7f, 0x4a05bfd34ea39, 0x203914c925ef5, 0x7497fffe04e3c, 0x124567cecaf98 },
+ { 0x1ab860ac473b4, 0x5c0227c86a7ff, 0x71b12bfc24477, 0x006a573a83075, 0x3f8612966c870 },
+ },
+},
+{
+ {
+ { 0x0fcfa36048d13, 0x66e7133bbb383, 0x64b42a8a45676, 0x4ea6e4f9a85cf, 0x26f57eee878a1 },
+ { 0x20cc9782a0dde, 0x65d4e3070aab3, 0x7bc8e31547736, 0x09ebfb1432d98, 0x504aa77679736 },
+ { 0x32cd55687efb1, 0x4448f5e2f6195, 0x568919d460345, 0x034c2e0ad1a27, 0x4041943d9dba3 },
+ },
+ {
+ { 0x17743a26caadd, 0x48c9156f9c964, 0x7ef278d1e9ad0, 0x00ce58ea7bd01, 0x12d931429800d },
+ { 0x0eeba43ebcc96, 0x384dd5395f878, 0x1df331a35d272, 0x207ecfd4af70e, 0x1420a1d976843 },
+ { 0x67799d337594f, 0x01647548f6018, 0x57fce5578f145, 0x009220c142a71, 0x1b4f92314359a },
+ },
+ {
+ { 0x73030a49866b1, 0x2442be90b2679, 0x77bd3d8947dcf, 0x1fb55c1552028, 0x5ff191d56f9a2 },
+ { 0x4109d89150951, 0x225bd2d2d47cb, 0x57cc080e73bea, 0x6d71075721fcb, 0x239b572a7f132 },
+ { 0x6d433ac2d9068, 0x72bf930a47033, 0x64facf4a20ead, 0x365f7a2b9402a, 0x020c526a758f3 },
+ },
+ {
+ { 0x1ef59f042cc89, 0x3b1c24976dd26, 0x31d665cb16272, 0x28656e470c557, 0x452cfe0a5602c },
+ { 0x034f89ed8dbbc, 0x73b8f948d8ef3, 0x786c1d323caab, 0x43bd4a9266e51, 0x02aacc4615313 },
+ { 0x0f7a0647877df, 0x4e1cc0f93f0d4, 0x7ec4726ef1190, 0x3bdd58bf512f8, 0x4cfb7d7b304b8 },
+ },
+ {
+ { 0x699c29789ef12, 0x63beae321bc50, 0x325c340adbb35, 0x562e1a1e42bf6, 0x5b1d4cbc434d3 },
+ { 0x43d6cb89b75fe, 0x3338d5b900e56, 0x38d327d531a53, 0x1b25c61d51b9f, 0x14b4622b39075 },
+ { 0x32615cc0a9f26, 0x57711b99cb6df, 0x5a69c14e93c38, 0x6e88980a4c599, 0x2f98f71258592 },
+ },
+ {
+ { 0x2ae444f54a701, 0x615397afbc5c2, 0x60d7783f3f8fb, 0x2aa675fc486ba, 0x1d8062e9e7614 },
+ { 0x4a74cb50f9e56, 0x531d1c2640192, 0x0c03d9d6c7fd2, 0x57ccd156610c1, 0x3a6ae249d806a },
+ { 0x2da85a9907c5a, 0x6b23721ec4caf, 0x4d2d3a4683aa2, 0x7f9c6870efdef, 0x298b8ce8aef25 },
+ },
+ {
+ { 0x272ea0a2165de, 0x68179ef3ed06f, 0x4e2b9c0feac1e, 0x3ee290b1b63bb, 0x6ba6271803a7d },
+ { 0x27953eff70cb2, 0x54f22ae0ec552, 0x29f3da92e2724, 0x242ca0c22bd18, 0x34b8a8404d5ce },
+ { 0x6ecb583693335, 0x3ec76bfdfb84d, 0x2c895cf56a04f, 0x6355149d54d52, 0x71d62bdd465e1 },
+ },
+ {
+ { 0x5b5dab1f75ef5, 0x1e2d60cbeb9a5, 0x527c2175dfe57, 0x59e8a2b8ff51f, 0x1c333621262b2 },
+ { 0x3cc28d378df80, 0x72141f4968ca6, 0x407696bdb6d0d, 0x5d271b22ffcfb, 0x74d5f317f3172 },
+ { 0x7e55467d9ca81, 0x6a5653186f50d, 0x6b188ece62df1, 0x4c66d36844971, 0x4aebcc4547e9d },
+ },
+},
+{
+ {
+ { 0x08d9e7354b610, 0x26b750b6dc168, 0x162881e01acc9, 0x7966df31d01a5, 0x173bd9ddc9a1d },
+ { 0x0071b276d01c9, 0x0b0d8918e025e, 0x75beea79ee2eb, 0x3c92984094db8, 0x5d88fbf95a3db },
+ { 0x00f1efe5872df, 0x5da872318256a, 0x59ceb81635960, 0x18cf37693c764, 0x06e1cd13b19ea },
+ },
+ {
+ { 0x3af629e5b0353, 0x204f1a088e8e5, 0x10efc9ceea82e, 0x589863c2fa34b, 0x7f3a6a1a8d837 },
+ { 0x0ad516f166f23, 0x263f56d57c81a, 0x13422384638ca, 0x1331ff1af0a50, 0x3080603526e16 },
+ { 0x644395d3d800b, 0x2b9203dbedefc, 0x4b18ce656a355, 0x03f3466bc182c, 0x30d0fded2e513 },
+ },
+ {
+ { 0x4971e68b84750, 0x52ccc9779f396, 0x3e904ae8255c8, 0x4ecae46f39339, 0x4615084351c58 },
+ { 0x14d1af21233b3, 0x1de1989b39c0b, 0x52669dc6f6f9e, 0x43434b28c3fc7, 0x0a9214202c099 },
+ { 0x019c0aeb9a02e, 0x1a2c06995d792, 0x664cbb1571c44, 0x6ff0736fa80b2, 0x3bca0d2895ca5 },
+ },
+ {
+ { 0x08eb69ecc01bf, 0x5b4c8912df38d, 0x5ea7f8bc2f20e, 0x120e516caafaf, 0x4ea8b4038df28 },
+ { 0x031bc3c5d62a4, 0x7d9fe0f4c081e, 0x43ed51467f22c, 0x1e6cc0c1ed109, 0x5631deddae8f1 },
+ { 0x5460af1cad202, 0x0b4919dd0655d, 0x7c4697d18c14c, 0x231c890bba2a4, 0x24ce0930542ca },
+ },
+ {
+ { 0x7a155fdf30b85, 0x1c6c6e5d487f9, 0x24be1134bdc5a, 0x1405970326f32, 0x549928a7324f4 },
+ { 0x090f5fd06c106, 0x6abb1021e43fd, 0x232bcfad711a0, 0x3a5c13c047f37, 0x41d4e3c28a06d },
+ { 0x632a763ee1a2e, 0x6fa4bffbd5e4d, 0x5fd35a6ba4792, 0x7b55e1de99de8, 0x491b66dec0dcf },
+ },
+ {
+ { 0x04a8ed0da64a1, 0x5ecfc45096ebe, 0x5edee93b488b2, 0x5b3c11a51bc8f, 0x4cf6b8b0b7018 },
+ { 0x5b13dc7ea32a7, 0x18fc2db73131e, 0x7e3651f8f57e3, 0x25656055fa965, 0x08f338d0c85ee },
+ { 0x3a821991a73bd, 0x03be6418f5870, 0x1ddc18eac9ef0, 0x54ce09e998dc2, 0x530d4a82eb078 },
+ },
+ {
+ { 0x173456c9abf9e, 0x7892015100dad, 0x33ee14095fecb, 0x6ad95d67a0964, 0x0db3e7e00cbfb },
+ { 0x43630e1f94825, 0x4d1956a6b4009, 0x213fe2df8b5e0, 0x05ce3a41191e6, 0x65ea753f10177 },
+ { 0x6fc3ee2096363, 0x7ec36b96d67ac, 0x510ec6a0758b1, 0x0ed87df022109, 0x02a4ec1921e1a },
+ },
+ {
+ { 0x06162f1cf795f, 0x324ddcafe5eb9, 0x018d5e0463218, 0x7e78b9092428e, 0x36d12b5dec067 },
+ { 0x6259a3b24b8a2, 0x188b5f4170b9c, 0x681c0dee15deb, 0x4dfe665f37445, 0x3d143c5112780 },
+ { 0x5279179154557, 0x39f8f0741424d, 0x45e6eb357923d, 0x42c9b5edb746f, 0x2ef517885ba82 },
+ },
+},
+{
+ {
+ { 0x6bffb305b2f51, 0x5b112b2d712dd, 0x35774974fe4e2, 0x04af87a96e3a3, 0x57968290bb3a0 },
+ { 0x7974e8c58aedc, 0x7757e083488c6, 0x601c62ae7bc8b, 0x45370c2ecab74, 0x2f1b78fab143a },
+ { 0x2b8430a20e101, 0x1a49e1d88fee3, 0x38bbb47ce4d96, 0x1f0e7ba84d437, 0x7dc43e35dc2aa },
+ },
+ {
+ { 0x02a5c273e9718, 0x32bc9dfb28b4f, 0x48df4f8d5db1a, 0x54c87976c028f, 0x044fb81d82d50 },
+ { 0x66665887dd9c3, 0x629760a6ab0b2, 0x481e6c7243e6c, 0x097e37046fc77, 0x7ef72016758cc },
+ { 0x718c5a907e3d9, 0x3b9c98c6b383b, 0x006ed255eccdc, 0x6976538229a59, 0x7f79823f9c30d },
+ },
+ {
+ { 0x41ff068f587ba, 0x1c00a191bcd53, 0x7b56f9c209e25, 0x3781e5fccaabe, 0x64a9b0431c06d },
+ { 0x4d239a3b513e8, 0x29723f51b1066, 0x642f4cf04d9c3, 0x4da095aa09b7a, 0x0a4e0373d784d },
+ { 0x3d6a15b7d2919, 0x41aa75046a5d6, 0x691751ec2d3da, 0x23638ab6721c4, 0x071a7d0ace183 },
+ },
+ {
+ { 0x4355220e14431, 0x0e1362a283981, 0x2757cd8359654, 0x2e9cd7ab10d90, 0x7c69bcf761775 },
+ { 0x72daac887ba0b, 0x0b7f4ac5dda60, 0x3bdda2c0498a4, 0x74e67aa180160, 0x2c3bcc7146ea7 },
+ { 0x0d7eb04e8295f, 0x4a5ea1e6fa0fe, 0x45e635c436c60, 0x28ef4a8d4d18b, 0x6f5a9a7322aca },
+ },
+ {
+ { 0x1d4eba3d944be, 0x0100f15f3dce5, 0x61a700e367825, 0x5922292ab3d23, 0x02ab9680ee8d3 },
+ { 0x1000c2f41c6c5, 0x0219fdf737174, 0x314727f127de7, 0x7e5277d23b81e, 0x494e21a2e147a },
+ { 0x48a85dde50d9a, 0x1c1f734493df4, 0x47bdb64866889, 0x59a7d048f8eec, 0x6b5d76cbea46b },
+ },
+ {
+ { 0x141171e782522, 0x6806d26da7c1f, 0x3f31d1bc79ab9, 0x09f20459f5168, 0x16fb869c03dd3 },
+ { 0x7556cec0cd994, 0x5eb9a03b7510a, 0x50ad1dd91cb71, 0x1aa5780b48a47, 0x0ae333f685277 },
+ { 0x6199733b60962, 0x69b157c266511, 0x64740f893f1ca, 0x03aa408fbf684, 0x3f81e38b8f70d },
+ },
+ {
+ { 0x37f355f17c824, 0x07ae85334815b, 0x7e3abddd2e48f, 0x61eeabe1f45e5, 0x0ad3e2d34cded },
+ { 0x10fcc7ed9affe, 0x4248cb0e96ff2, 0x4311c115172e2, 0x4c9d41cbf6925, 0x50510fc104f50 },
+ { 0x40fc5336e249d, 0x3386639fb2de1, 0x7bbf871d17b78, 0x75f796b7e8004, 0x127c158bf0fa1 },
+ },
+ {
+ { 0x28fc4ae51b974, 0x26e89bfd2dbd4, 0x4e122a07665cf, 0x7cab1203405c3, 0x4ed82479d167d },
+ { 0x17c422e9879a2, 0x28a5946c8fec3, 0x53ab32e912b77, 0x7b44da09fe0a5, 0x354ef87d07ef4 },
+ { 0x3b52260c5d975, 0x79d6836171fdc, 0x7d994f140d4bb, 0x1b6c404561854, 0x302d92d205392 },
+ },
+},
+{
+ {
+ { 0x46fb6e4e0f177, 0x53497ad5265b7, 0x1ebdba01386fc, 0x0302f0cb36a3c, 0x0edc5f5eb426d },
+ { 0x3c1a2bca4283d, 0x23430c7bb2f02, 0x1a3ea1bb58bc2, 0x7265763de5c61, 0x10e5d3b76f1ca },
+ { 0x3bfd653da8e67, 0x584953ec82a8a, 0x55e288fa7707b, 0x5395fc3931d81, 0x45b46c51361cb },
+ },
+ {
+ { 0x54ddd8a7fe3e4, 0x2cecc41c619d3, 0x43a6562ac4d91, 0x4efa5aca7bdd9, 0x5c1c0aef32122 },
+ { 0x02abf314f7fa1, 0x391d19e8a1528, 0x6a2fa13895fc7, 0x09d8eddeaa591, 0x2177bfa36dcb7 },
+ { 0x01bbcfa79db8f, 0x3d84beb3666e1, 0x20c921d812204, 0x2dd843d3b32ce, 0x4ae619387d8ab },
+ },
+ {
+ { 0x17e44985bfb83, 0x54e32c626cc22, 0x096412ff38118, 0x6b241d61a246a, 0x75685abe5ba43 },
+ { 0x3f6aa5344a32e, 0x69683680f11bb, 0x04c3581f623aa, 0x701af5875cba5, 0x1a00d91b17bf3 },
+ { 0x60933eb61f2b2, 0x5193fe92a4dd2, 0x3d995a550f43e, 0x3556fb93a883d, 0x135529b623b0e },
+ },
+ {
+ { 0x716bce22e83fe, 0x33d0130b83eb8, 0x0952abad0afac, 0x309f64ed31b8a, 0x5972ea051590a },
+ { 0x0dbd7add1d518, 0x119f823e2231e, 0x451d66e5e7de2, 0x500c39970f838, 0x79b5b81a65ca3 },
+ { 0x4ac20dc8f7811, 0x29589a9f501fa, 0x4d810d26a6b4a, 0x5ede00d96b259, 0x4f7e9c95905f3 },
+ },
+ {
+ { 0x0443d355299fe, 0x39b7d7d5aee39, 0x692519a2f34ec, 0x6e4404924cf78, 0x1942eec4a144a },
+ { 0x74bbc5781302e, 0x73135bb81ec4c, 0x7ef671b61483c, 0x7264614ccd729, 0x31993ad92e638 },
+ { 0x45319ae234992, 0x2219d47d24fb5, 0x4f04488b06cf6, 0x53aaa9e724a12, 0x2a0a65314ef9c },
+ },
+ {
+ { 0x61acd3c1c793a, 0x58b46b78779e6, 0x3369aacbe7af2, 0x509b0743074d4, 0x055dc39b6dea1 },
+ { 0x7937ff7f927c2, 0x0c2fa14c6a5b6, 0x556bddb6dd07c, 0x6f6acc179d108, 0x4cf6e218647c2 },
+ { 0x1227cc28d5bb6, 0x78ee9bff57623, 0x28cb2241f893a, 0x25b541e3c6772, 0x121a307710aa2 },
+ },
+ {
+ { 0x1713ec77483c9, 0x6f70572d5facb, 0x25ef34e22ff81, 0x54d944f141188, 0x527bb94a6ced3 },
+ { 0x35d5e9f034a97, 0x126069785bc9b, 0x5474ec7854ff0, 0x296a302a348ca, 0x333fc76c7a40e },
+ { 0x5992a995b482e, 0x78dc707002ac7, 0x5936394d01741, 0x4fba4281aef17, 0x6b89069b20a7a },
+ },
+ {
+ { 0x2fa8cb5c7db77, 0x718e6982aa810, 0x39e95f81a1a1b, 0x5e794f3646cfb, 0x0473d308a7639 },
+ { 0x2a0416270220d, 0x75f248b69d025, 0x1cbbc16656a27, 0x5b9ffd6e26728, 0x23bc2103aa73e },
+ { 0x6792603589e05, 0x248db9892595d, 0x006a53cad2d08, 0x20d0150f7ba73, 0x102f73bfde043 },
+ },
+},
+{
+ {
+ { 0x4dae0b5511c9a, 0x5257fffe0d456, 0x54108d1eb2180, 0x096cc0f9baefa, 0x3f6bd725da4ea },
+ { 0x0b9ab7f5745c6, 0x5caf0f8d21d63, 0x7debea408ea2b, 0x09edb93896d16, 0x36597d25ea5c0 },
+ { 0x58d7b106058ac, 0x3cdf8d20bee69, 0x00a4cb765015e, 0x36832337c7cc9, 0x7b7ecc19da60d },
+ },
+ {
+ { 0x64a51a77cfa9b, 0x29cf470ca0db5, 0x4b60b6e0898d9, 0x55d04ddffe6c7, 0x03bedc661bf5c },
+ { 0x2373c695c690d, 0x4c0c8520dcf18, 0x384af4b7494b9, 0x4ab4a8ea22225, 0x4235ad7601743 },
+ { 0x0cb0d078975f5, 0x292313e530c4b, 0x38dbb9124a509, 0x350d0655a11f1, 0x0e7ce2b0cdf06 },
+ },
+ {
+ { 0x6fedfd94b70f9, 0x2383f9745bfd4, 0x4beae27c4c301, 0x75aa4416a3f3f, 0x615256138aece },
+ { 0x4643ac48c85a3, 0x6878c2735b892, 0x3a53523f4d877, 0x3a504ed8bee9d, 0x666e0a5d8fb46 },
+ { 0x3f64e4870cb0d, 0x61548b16d6557, 0x7a261773596f3, 0x7724d5f275d3a, 0x7f0bc810d514d },
+ },
+ {
+ { 0x49dad737213a0, 0x745dee5d31075, 0x7b1a55e7fdbe2, 0x5ba988f176ea1, 0x1d3a907ddec5a },
+ { 0x06ba426f4136f, 0x3cafc0606b720, 0x518f0a2359cda, 0x5fae5e46feca7, 0x0d1f8dbcf8eed },
+ { 0x693313ed081dc, 0x5b0a366901742, 0x40c872ca4ca7e, 0x6f18094009e01, 0x00011b44a31bf },
+ },
+ {
+ { 0x61f696a0aa75c, 0x38b0a57ad42ca, 0x1e59ab706fdc9, 0x01308d46ebfcd, 0x63d988a2d2851 },
+ { 0x7a06c3fc66c0c, 0x1c9bac1ba47fb, 0x23935c575038e, 0x3f0bd71c59c13, 0x3ac48d916e835 },
+ { 0x20753afbd232e, 0x71fbb1ed06002, 0x39cae47a4af3a, 0x0337c0b34d9c2, 0x33fad52b2368a },
+ },
+ {
+ { 0x4c8d0c422cfe8, 0x760b4275971a5, 0x3da95bc1cad3d, 0x0f151ff5b7376, 0x3cc355ccb90a7 },
+ { 0x649c6c5e41e16, 0x60667eee6aa80, 0x4179d182be190, 0x653d9567e6979, 0x16c0f429a256d },
+ { 0x69443903e9131, 0x16f4ac6f9dd36, 0x2ea4912e29253, 0x2b4643e68d25d, 0x631eaf426bae7 },
+ },
+ {
+ { 0x175b9a3700de8, 0x77c5f00aa48fb, 0x3917785ca0317, 0x05aa9b2c79399, 0x431f2c7f665f8 },
+ { 0x10410da66fe9f, 0x24d82dcb4d67d, 0x3e6fe0e17752d, 0x4dade1ecbb08f, 0x5599648b1ea91 },
+ { 0x26344858f7b19, 0x5f43d4a295ac0, 0x242a75c52acd4, 0x5934480220d10, 0x7b04715f91253 },
+ },
+ {
+ { 0x6c280c4e6bac6, 0x3ada3b361766e, 0x42fe5125c3b4f, 0x111d84d4aac22, 0x48d0acfa57cde },
+ { 0x5bd28acf6ae43, 0x16fab8f56907d, 0x7acb11218d5f2, 0x41fe02023b4db, 0x59b37bf5c2f65 },
+ { 0x726e47dabe671, 0x2ec45e746f6c1, 0x6580e53c74686, 0x5eda104673f74, 0x16234191336d3 },
+ },
+},
+{
+ {
+ { 0x19cd61ff38640, 0x060c6c4b41ba9, 0x75cf70ca7366f, 0x118a8f16c011e, 0x4a25707a203b9 },
+ { 0x499def6267ff6, 0x76e858108773c, 0x693cac5ddcb29, 0x00311d00a9ff4, 0x2cdfdfecd5d05 },
+ { 0x7668a53f6ed6a, 0x303ba2e142556, 0x3880584c10909, 0x4fe20000a261d, 0x5721896d248e4 },
+ },
+ {
+ { 0x55091a1d0da4e, 0x4f6bfc7c1050b, 0x64e4ecd2ea9be, 0x07eb1f28bbe70, 0x03c935afc4b03 },
+ { 0x65517fd181bae, 0x3e5772c76816d, 0x019189640898a, 0x1ed2a84de7499, 0x578edd74f63c1 },
+ { 0x276c6492b0c3d, 0x09bfc40bf932e, 0x588e8f11f330b, 0x3d16e694dc26e, 0x3ec2ab590288c },
+ },
+ {
+ { 0x13a09ae32d1cb, 0x3e81eb85ab4e4, 0x07aaca43cae1f, 0x62f05d7526374, 0x0e1bf66c6adba },
+ { 0x0d27be4d87bb9, 0x56c27235db434, 0x72e6e0ea62d37, 0x5674cd06ee839, 0x2dd5c25a200fc },
+ { 0x3d5e9792c887e, 0x319724dabbc55, 0x2b97c78680800, 0x7afdfdd34e6dd, 0x730548b35ae88 },
+ },
+ {
+ { 0x3094ba1d6e334, 0x6e126a7e3300b, 0x089c0aefcfbc5, 0x2eea11f836583, 0x585a2277d8784 },
+ { 0x551a3cba8b8ee, 0x3b6422be2d886, 0x630e1419689bc, 0x4653b07a7a955, 0x3043443b411db },
+ { 0x25f8233d48962, 0x6bd8f04aff431, 0x4f907fd9a6312, 0x40fd3c737d29b, 0x7656278950ef9 },
+ },
+ {
+ { 0x073a3ea86cf9d, 0x6e0e2abfb9c2e, 0x60e2a38ea33ee, 0x30b2429f3fe18, 0x28bbf484b613f },
+ { 0x3cf59d51fc8c0, 0x7a0a0d6de4718, 0x55c3a3e6fb74b, 0x353135f884fd5, 0x3f4160a8c1b84 },
+ { 0x12f5c6f136c7c, 0x0fedba237de4c, 0x779bccebfab44, 0x3aea93f4d6909, 0x1e79cb358188f },
+ },
+ {
+ { 0x153d8f5e08181, 0x08533bbdb2efd, 0x1149796129431, 0x17a6e36168643, 0x478ab52d39d1f },
+ { 0x436c3eef7e3f1, 0x7ffd3c21f0026, 0x3e77bf20a2da9, 0x418bffc8472de, 0x65d7951b3a3b3 },
+ { 0x6a4d39252d159, 0x790e35900ecd4, 0x30725bf977786, 0x10a5c1635a053, 0x16d87a411a212 },
+ },
+ {
+ { 0x4d5e2d54e0583, 0x2e5d7b33f5f74, 0x3a5de3f887ebf, 0x6ef24bd6139b7, 0x1f990b577a5a6 },
+ { 0x57e5a42066215, 0x1a18b44983677, 0x3e652de1e6f8f, 0x6532be02ed8eb, 0x28f87c8165f38 },
+ { 0x44ead1be8f7d6, 0x5759d4f31f466, 0x0378149f47943, 0x69f3be32b4f29, 0x45882fe1534d6 },
+ },
+ {
+ { 0x49929943c6fe4, 0x4347072545b15, 0x3226bced7e7c5, 0x03a134ced89df, 0x7dcf843ce405f },
+ { 0x1345d757983d6, 0x222f54234cccd, 0x1784a3d8adbb4, 0x36ebeee8c2bcc, 0x688fe5b8f626f },
+ { 0x0d6484a4732c0, 0x7b94ac6532d92, 0x5771b8754850f, 0x48dd9df1461c8, 0x6739687e73271 },
+ },
+},
+{
+ {
+ { 0x5cc9dc80c1ac0, 0x683671486d4cd, 0x76f5f1a5e8173, 0x6d5d3f5f9df4a, 0x7da0b8f68d7e7 },
+ { 0x02014385675a6, 0x6155fb53d1def, 0x37ea32e89927c, 0x059a668f5a82e, 0x46115aba1d4dc },
+ { 0x71953c3b5da76, 0x6642233d37a81, 0x2c9658076b1bd, 0x5a581e63010ff, 0x5a5f887e83674 },
+ },
+ {
+ { 0x628d3a0a643b9, 0x01cd8640c93d2, 0x0b7b0cad70f2c, 0x3864da98144be, 0x43e37ae2d5d1c },
+ { 0x301cf70a13d11, 0x2a6a1ba1891ec, 0x2f291fb3f3ae0, 0x21a7b814bea52, 0x3669b656e44d1 },
+ { 0x63f06eda6e133, 0x233342758070f, 0x098e0459cc075, 0x4df5ead6c7c1b, 0x6a21e6cd4fd5e },
+ },
+ {
+ { 0x129126699b2e3, 0x0ee11a2603de8, 0x60ac2f5c74c21, 0x59b192a196808, 0x45371b07001e8 },
+ { 0x6170a3046e65f, 0x5401a46a49e38, 0x20add5561c4a8, 0x7abb4edde9e46, 0x586bf9f1a195f },
+ { 0x3088d5ef8790b, 0x38c2126fcb4db, 0x685bae149e3c3, 0x0bcd601a4e930, 0x0eafb03790e52 },
+ },
+ {
+ { 0x0805e0f75ae1d, 0x464cc59860a28, 0x248e5b7b00bef, 0x5d99675ef8f75, 0x44ae3344c5435 },
+ { 0x555c13748042f, 0x4d041754232c0, 0x521b430866907, 0x3308e40fb9c39, 0x309acc675a02c },
+ { 0x289b9bba543ee, 0x3ab592e28539e, 0x64d82abcdd83a, 0x3c78ec172e327, 0x62d5221b7f946 },
+ },
+ {
+ { 0x5d4263af77a3c, 0x23fdd2289aeb0, 0x7dc64f77eb9ec, 0x01bd28338402c, 0x14f29a5383922 },
+ { 0x4299c18d0936d, 0x5914183418a49, 0x52a18c721aed5, 0x2b151ba82976d, 0x5c0efde4bc754 },
+ { 0x17edc25b2d7f5, 0x37336a6081bee, 0x7b5318887e5c3, 0x49f6d491a5be1, 0x5e72365c7bee0 },
+ },
+ {
+ { 0x339062f08b33e, 0x4bbf3e657cfb2, 0x67af7f56e5967, 0x4dbd67f9ed68f, 0x70b20555cb734 },
+ { 0x3fc074571217f, 0x3a0d29b2b6aeb, 0x06478ccdde59d, 0x55e4d051bddfa, 0x77f1104c47b4e },
+ { 0x113c555112c4c, 0x7535103f9b7ca, 0x140ed1d9a2108, 0x02522333bc2af, 0x0e34398f4a064 },
+ },
+ {
+ { 0x30b093e4b1928, 0x1ce7e7ec80312, 0x4e575bdf78f84, 0x61f7a190bed39, 0x6f8aded6ca379 },
+ { 0x522d93ecebde8, 0x024f045e0f6cf, 0x16db63426cfa1, 0x1b93a1fd30fd8, 0x5e5405368a362 },
+ { 0x0123dfdb7b29a, 0x4344356523c68, 0x79a527921ee5f, 0x74bfccb3e817e, 0x780de72ec8d3d },
+ },
+ {
+ { 0x7eaf300f42772, 0x5455188354ce3, 0x4dcca4a3dcbac, 0x3d314d0bfebcb, 0x1defc6ad32b58 },
+ { 0x28545089ae7bc, 0x1e38fe9a0c15c, 0x12046e0e2377b, 0x6721c560aa885, 0x0eb28bf671928 },
+ { 0x3be1aef5195a7, 0x6f22f62bdb5eb, 0x39768b8523049, 0x43394c8fbfdbd, 0x467d201bf8dd2 },
+ },
+},
+{
+ {
+ { 0x6f4bd567ae7a9, 0x65ac89317b783, 0x07d3b20fd8932, 0x000f208326916, 0x2ef9c5a5ba384 },
+ { 0x6919a74ef4fad, 0x59ed4611452bf, 0x691ec04ea09ef, 0x3cbcb2700e984, 0x71c43c4f5ba3c },
+ { 0x56df6fa9e74cd, 0x79c95e4cf56df, 0x7be643bc609e2, 0x149c12ad9e878, 0x5a758ca390c5f },
+ },
+ {
+ { 0x0918b1d61dc94, 0x0d350260cd19c, 0x7a2ab4e37b4d9, 0x21fea735414d7, 0x0a738027f639d },
+ { 0x72710d9462495, 0x25aafaa007456, 0x2d21f28eaa31b, 0x17671ea005fd0, 0x2dbae244b3eb7 },
+ { 0x74a2f57ffe1cc, 0x1bc3073087301, 0x7ec57f4019c34, 0x34e082e1fa524, 0x2698ca635126a },
+ },
+ {
+ { 0x5702f5e3dd90e, 0x31c9a4a70c5c7, 0x136a5aa78fc24, 0x1992f3b9f7b01, 0x3c004b0c4afa3 },
+ { 0x5318832b0ba78, 0x6f24b9ff17cec, 0x0a47f30e060c7, 0x58384540dc8d0, 0x1fb43dcc49cae },
+ { 0x146ac06f4b82b, 0x4b500d89e7355, 0x3351e1c728a12, 0x10b9f69932fe3, 0x6b43fd01cd1fd },
+ },
+ {
+ { 0x742583e760ef3, 0x73dc1573216b8, 0x4ae48fdd7714a, 0x4f85f8a13e103, 0x73420b2d6ff0d },
+ { 0x75d4b4697c544, 0x11be1fff7f8f4, 0x119e16857f7e1, 0x38a14345cf5d5, 0x5a68d7105b52f },
+ { 0x4f6cb9e851e06, 0x278c4471895e5, 0x7efcdce3d64e4, 0x64f6d455c4b4c, 0x3db5632fea34b },
+ },
+ {
+ { 0x190b1829825d5, 0x0e7d3513225c9, 0x1c12be3b7abae, 0x58777781e9ca6, 0x59197ea495df2 },
+ { 0x6ee2bf75dd9d8, 0x6c72ceb34be8d, 0x679c9cc345ec7, 0x7898df96898a4, 0x04321adf49d75 },
+ { 0x16019e4e55aae, 0x74fc5f25d209c, 0x4566a939ded0d, 0x66063e716e0b7, 0x45eafdc1f4d70 },
+ },
+ {
+ { 0x64624cfccb1ed, 0x257ab8072b6c1, 0x0120725676f0a, 0x4a018d04e8eee, 0x3f73ceea5d56d },
+ { 0x401858045d72b, 0x459e5e0ca2d30, 0x488b719308bea, 0x56f4a0d1b32b5, 0x5a5eebc80362d },
+ { 0x7bfd10a4e8dc6, 0x7c899366736f4, 0x55ebbeaf95c01, 0x46db060903f8a, 0x2605889126621 },
+ },
+ {
+ { 0x18e3cc676e542, 0x26079d995a990, 0x04a7c217908b2, 0x1dc7603e6655a, 0x0dedfa10b2444 },
+ { 0x704a68360ff04, 0x3cecc3cde8b3e, 0x21cd5470f64ff, 0x6abc18d953989, 0x54ad0c2e4e615 },
+ { 0x367d5b82b522a, 0x0d3f4b83d7dc7, 0x3067f4cdbc58d, 0x20452da697937, 0x62ecb2baa77a9 },
+ },
+ {
+ { 0x72836afb62874, 0x0af3c2094b240, 0x0c285297f357a, 0x7cc2d5680d6e3, 0x61913d5075663 },
+ { 0x5795261152b3d, 0x7a1dbbafa3cbd, 0x5ad31c52588d5, 0x45f3a4164685c, 0x2e59f919a966d },
+ { 0x62d361a3231da, 0x65284004e01b8, 0x656533be91d60, 0x6ae016c00a89f, 0x3ddbc2a131c05 },
+ },
+},
+{
+ {
+ { 0x257a22796bb14, 0x6f360fb443e75, 0x680e47220eaea, 0x2fcf2a5f10c18, 0x5ee7fb38d8320 },
+ { 0x40ff9ce5ec54b, 0x57185e261b35b, 0x3e254540e70a9, 0x1b5814003e3f8, 0x78968314ac04b },
+ { 0x5fdcb41446a8e, 0x5286926ff2a71, 0x0f231e296b3f6, 0x684a357c84693, 0x61d0633c9bca0 },
+ },
+ {
+ { 0x328bcf8fc73df, 0x3b4de06ff95b4, 0x30aa427ba11a5, 0x5ee31bfda6d9c, 0x5b23ac2df8067 },
+ { 0x44935ffdb2566, 0x12f016d176c6e, 0x4fbb00f16f5ae, 0x3fab78d99402a, 0x6e965fd847aed },
+ { 0x2b953ee80527b, 0x55f5bcdb1b35a, 0x43a0b3fa23c66, 0x76e07388b820a, 0x79b9bbb9dd95d },
+ },
+ {
+ { 0x17dae8e9f7374, 0x719f76102da33, 0x5117c2a80ca8b, 0x41a66b65d0936, 0x1ba811460accb },
+ { 0x355406a3126c2, 0x50d1918727d76, 0x6e5ea0b498e0e, 0x0a3b6063214f2, 0x5065f158c9fd2 },
+ { 0x169fb0c429954, 0x59aedd9ecee10, 0x39916eb851802, 0x57917555cc538, 0x3981f39e58a4f },
+ },
+ {
+ { 0x5dfa56de66fde, 0x0058809075908, 0x6d3d8cb854a94, 0x5b2f4e970b1e3, 0x30f4452edcbc1 },
+ { 0x38a7559230a93, 0x52c1cde8ba31f, 0x2a4f2d4745a3d, 0x07e9d42d4a28a, 0x38dc083705acd },
+ { 0x52782c5759740, 0x53f3397d990ad, 0x3a939c7e84d15, 0x234c4227e39e0, 0x632d9a1a593f2 },
+ },
+ {
+ { 0x1fd11ed0c84a7, 0x021b3ed2757e1, 0x73e1de58fc1c6, 0x5d110c84616ab, 0x3a5a7df28af64 },
+ { 0x36b15b807cba6, 0x3f78a9e1afed7, 0x0a59c2c608f1f, 0x52bdd8ecb81b7, 0x0b24f48847ed4 },
+ { 0x2d4be511beac7, 0x6bda4d99e5b9b, 0x17e6996914e01, 0x7b1f0ce7fcf80, 0x34fcf74475481 },
+ },
+ {
+ { 0x31dab78cfaa98, 0x4e3216e5e54b7, 0x249823973b689, 0x2584984e48885, 0x0119a3042fb37 },
+ { 0x7e04c789767ca, 0x1671b28cfb832, 0x7e57ea2e1c537, 0x1fbaaef444141, 0x3d3bdc164dfa6 },
+ { 0x2d89ce8c2177d, 0x6cd12ba182cf4, 0x20a8ac19a7697, 0x539fab2cc72d9, 0x56c088f1ede20 },
+ },
+ {
+ { 0x35fac24f38f02, 0x7d75c6197ab03, 0x33e4bc2a42fa7, 0x1c7cd10b48145, 0x038b7ea483590 },
+ { 0x53d1110a86e17, 0x6416eb65f466d, 0x41ca6235fce20, 0x5c3fc8a99bb12, 0x09674c6b99108 },
+ { 0x6f82199316ff8, 0x05d54f1a9f3e9, 0x3bcc5d0bd274a, 0x5b284b8d2d5ad, 0x6e5e31025969e },
+ },
+ {
+ { 0x4fb0e63066222, 0x130f59747e660, 0x041868fecd41a, 0x3105e8c923bc6, 0x3058ad43d1838 },
+ { 0x462f587e593fb, 0x3d94ba7ce362d, 0x330f9b52667b7, 0x5d45a48e0f00a, 0x08f5114789a8d },
+ { 0x40ffde57663d0, 0x71445d4c20647, 0x2653e68170f7c, 0x64cdee3c55ed6, 0x26549fa4efe3d },
+ },
+},
+{
+ {
+ { 0x68549af3f666e, 0x09e2941d4bb68, 0x2e8311f5dff3c, 0x6429ef91ffbd2, 0x3a10dfe132ce3 },
+ { 0x55a461e6bf9d6, 0x78eeef4b02e83, 0x1d34f648c16cf, 0x07fea2aba5132, 0x1926e1dc6401e },
+ { 0x74e8aea17cea0, 0x0c743f83fbc0f, 0x7cb03c4bf5455, 0x68a8ba9917e98, 0x1fa1d01d861e5 },
+ },
+ {
+ { 0x4ac00d1df94ab, 0x3ba2101bd271b, 0x7578988b9c4af, 0x0f2bf89f49f7e, 0x73fced18ee9a0 },
+ { 0x055947d599832, 0x346fe2aa41990, 0x0164c8079195b, 0x799ccfb7bba27, 0x773563bc6a75c },
+ { 0x1e90863139cb3, 0x4f8b407d9a0d6, 0x58e24ca924f69, 0x7a246bbe76456, 0x1f426b701b864 },
+ },
+ {
+ { 0x635c891a12552, 0x26aebd38ede2f, 0x66dc8faddae05, 0x21c7d41a03786, 0x0b76bb1b3fa7e },
+ { 0x1264c41911c01, 0x702f44584bdf9, 0x43c511fc68ede, 0x0482c3aed35f9, 0x4e1af5271d31b },
+ { 0x0c1f97f92939b, 0x17a88956dc117, 0x6ee005ef99dc7, 0x4aa9172b231cc, 0x7b6dd61eb772a },
+ },
+ {
+ { 0x0abf9ab01d2c7, 0x3880287630ae6, 0x32eca045beddb, 0x57f43365f32d0, 0x53fa9b659bff6 },
+ { 0x5c1e850f33d92, 0x1ec119ab9f6f5, 0x7f16f6de663e9, 0x7a7d6cb16dec6, 0x703e9bceaf1d2 },
+ { 0x4c8e994885455, 0x4ccb5da9cad82, 0x3596bc610e975, 0x7a80c0ddb9f5e, 0x398d93e5c4c61 },
+ },
+ {
+ { 0x77c60d2e7e3f2, 0x4061051763870, 0x67bc4e0ecd2aa, 0x2bb941f1373b9, 0x699c9c9002c30 },
+ { 0x3d16733e248f3, 0x0e2b7e14be389, 0x42c0ddaf6784a, 0x589ea1fc67850, 0x53b09b5ddf191 },
+ { 0x6a7235946f1cc, 0x6b99cbb2fbe60, 0x6d3a5d6485c62, 0x4839466e923c0, 0x51caf30c6fcdd },
+ },
+ {
+ { 0x2f99a18ac54c7, 0x398a39661ee6f, 0x384331e40cde3, 0x4cd15c4de19a6, 0x12ae29c189f8e },
+ { 0x3a7427674e00a, 0x6142f4f7e74c1, 0x4cc93318c3a15, 0x6d51bac2b1ee7, 0x5504aa292383f },
+ { 0x6c0cb1f0d01cf, 0x187469ef5d533, 0x27138883747bf, 0x2f52ae53a90e8, 0x5fd14fe958eba },
+ },
+ {
+ { 0x2fe5ebf93cb8e, 0x226da8acbe788, 0x10883a2fb7ea1, 0x094707842cf44, 0x7dd73f960725d },
+ { 0x42ddf2845ab2c, 0x6214ffd3276bb, 0x00b8d181a5246, 0x268a6d579eb20, 0x093ff26e58647 },
+ { 0x524fe68059829, 0x65b75e47cb621, 0x15eb0a5d5cc19, 0x05209b3929d5a, 0x2f59bcbc86b47 },
+ },
+ {
+ { 0x1d560b691c301, 0x7f5bafce3ce08, 0x4cd561614806c, 0x4588b6170b188, 0x2aa55e3d01082 },
+ { 0x47d429917135f, 0x3eacfa07af070, 0x1deab46b46e44, 0x7a53f3ba46cdf, 0x5458b42e2e51a },
+ { 0x192e60c07444f, 0x5ae8843a21daa, 0x6d721910b1538, 0x3321a95a6417e, 0x13e9004a8a768 },
+ },
+},
+{
+ {
+ { 0x600c9193b877f, 0x21c1b8a0d7765, 0x379927fb38ea2, 0x70d7679dbe01b, 0x5f46040898de9 },
+ { 0x58845832fcedb, 0x135cd7f0c6e73, 0x53ffbdfe8e35b, 0x22f195e06e55b, 0x73937e8814bce },
+ { 0x37116297bf48d, 0x45a9e0d069720, 0x25af71aa744ec, 0x41af0cb8aaba3, 0x2cf8a4e891d5e },
+ },
+ {
+ { 0x5487e17d06ba2, 0x3872a032d6596, 0x65e28c09348e0, 0x27b6bb2ce40c2, 0x7a6f7f2891d6a },
+ { 0x3fd8707110f67, 0x26f8716a92db2, 0x1cdaa1b753027, 0x504be58b52661, 0x2049bd6e58252 },
+ { 0x1fd8d6a9aef49, 0x7cb67b7216fa1, 0x67aff53c3b982, 0x20ea610da9628, 0x6011aadfc5459 },
+ },
+ {
+ { 0x6d0c802cbf890, 0x141bfed554c7b, 0x6dbb667ef4263, 0x58f3126857edc, 0x69ce18b779340 },
+ { 0x7926dcf95f83c, 0x42e25120e2bec, 0x63de96df1fa15, 0x4f06b50f3f9cc, 0x6fc5cc1b0b62f },
+ { 0x75528b29879cb, 0x79a8fd2125a3d, 0x27c8d4b746ab8, 0x0f8893f02210c, 0x15596b3ae5710 },
+ },
+ {
+ { 0x731167e5124ca, 0x17b38e8bbe13f, 0x3d55b942f9056, 0x09c1495be913f, 0x3aa4e241afb6d },
+ { 0x739d23f9179a2, 0x632fadbb9e8c4, 0x7c8522bfe0c48, 0x6ed0983ef5aa9, 0x0d2237687b5f4 },
+ { 0x138bf2a3305f5, 0x1f45d24d86598, 0x5274bad2160fe, 0x1b6041d58d12a, 0x32fcaa6e4687a },
+ },
+ {
+ { 0x7a4732787ccdf, 0x11e427c7f0640, 0x03659385f8c64, 0x5f4ead9766bfb, 0x746f6336c2600 },
+ { 0x56e8dc57d9af5, 0x5b3be17be4f78, 0x3bf928cf82f4b, 0x52e55600a6f11, 0x4627e9cefebd6 },
+ { 0x2f345ab6c971c, 0x653286e63e7e9, 0x51061b78a23ad, 0x14999acb54501, 0x7b4917007ed66 },
+ },
+ {
+ { 0x41b28dd53a2dd, 0x37be85f87ea86, 0x74be3d2a85e41, 0x1be87fac96ca6, 0x1d03620fe08cd },
+ { 0x5fb5cab84b064, 0x2513e778285b0, 0x457383125e043, 0x6bda3b56e223d, 0x122ba376f844f },
+ { 0x232cda2b4e554, 0x0422ba30ff840, 0x751e7667b43f5, 0x6261755da5f3e, 0x02c70bf52b68e },
+ },
+ {
+ { 0x532bf458d72e1, 0x40f96e796b59c, 0x22ef79d6f9da3, 0x501ab67beca77, 0x6b0697e3feb43 },
+ { 0x7ec4b5d0b2fbb, 0x200e910595450, 0x742057105715e, 0x2f07022530f60, 0x26334f0a409ef },
+ { 0x0f04adf62a3c0, 0x5e0edb48bb6d9, 0x7c34aa4fbc003, 0x7d74e4e5cac24, 0x1cc37f43441b2 },
+ },
+ {
+ { 0x656f1c9ceaeb9, 0x7031cacad5aec, 0x1308cd0716c57, 0x41c1373941942, 0x3a346f772f196 },
+ { 0x7565a5cc7324f, 0x01ca0d5244a11, 0x116b067418713, 0x0a57d8c55edae, 0x6c6809c103803 },
+ { 0x55112e2da6ac8, 0x6363d0a3dba5a, 0x319c98ba6f40c, 0x2e84b03a36ec7, 0x05911b9f6ef7c },
+ },
+},
+{
+ {
+ { 0x1acf3512eeaef, 0x2639839692a69, 0x669a234830507, 0x68b920c0603d4, 0x555ef9d1c64b2 },
+ { 0x39983f5df0ebb, 0x1ea2589959826, 0x6ce638703cdd6, 0x6311678898505, 0x6b3cecf9aa270 },
+ { 0x770ba3b73bd08, 0x11475f7e186d4, 0x0251bc9892bbc, 0x24eab9bffcc5a, 0x675f4de133817 },
+ },
+ {
+ { 0x7f6d93bdab31d, 0x1f3aca5bfd425, 0x2fa521c1c9760, 0x62180ce27f9cd, 0x60f450b882cd3 },
+ { 0x452036b1782fc, 0x02d95b07681c5, 0x5901cf99205b2, 0x290686e5eecb4, 0x13d99df70164c },
+ { 0x35ec321e5c0ca, 0x13ae337f44029, 0x4008e813f2da7, 0x640272f8e0c3a, 0x1c06de9e55eda },
+ },
+ {
+ { 0x52b40ff6d69aa, 0x31b8809377ffa, 0x536625cd14c2c, 0x516af252e17d1, 0x78096f8e7d32b },
+ { 0x77ad6a33ec4e2, 0x717c5dc11d321, 0x4a114559823e4, 0x306ce50a1e2b1, 0x4cf38a1fec2db },
+ { 0x2aa650dfa5ce7, 0x54916a8f19415, 0x00dc96fe71278, 0x55f2784e63eb8, 0x373cad3a26091 },
+ },
+ {
+ { 0x6a8fb89ddbbad, 0x78c35d5d97e37, 0x66e3674ef2cb2, 0x34347ac53dd8f, 0x21547eda5112a },
+ { 0x4634d82c9f57c, 0x4249268a6d652, 0x6336d687f2ff7, 0x4fe4f4e26d9a0, 0x0040f3d945441 },
+ { 0x5e939fd5986d3, 0x12a2147019bdf, 0x4c466e7d09cb2, 0x6fa5b95d203dd, 0x63550a334a254 },
+ },
+ {
+ { 0x2584572547b49, 0x75c58811c1377, 0x4d3c637cc171b, 0x33d30747d34e3, 0x39a92bafaa7d7 },
+ { 0x7d6edb569cf37, 0x60194a5dc2ca0, 0x5af59745e10a6, 0x7a8f53e004875, 0x3eea62c7daf78 },
+ { 0x4c713e693274e, 0x6ed1b7a6eb3a4, 0x62ace697d8e15, 0x266b8292ab075, 0x68436a0665c9c },
+ },
+ {
+ { 0x6d317e820107c, 0x090815d2ca3ca, 0x03ff1eb1499a1, 0x23960f050e319, 0x5373669c91611 },
+ { 0x235e8202f3f27, 0x44c9f2eb61780, 0x630905b1d7003, 0x4fcc8d274ead1, 0x17b6e7f68ab78 },
+ { 0x014ab9a0e5257, 0x09939567f8ba5, 0x4b47b2a423c82, 0x688d7e57ac42d, 0x1cb4b5a678f87 },
+ },
+ {
+ { 0x4aa62a2a007e7, 0x61e0e38f62d6e, 0x02f888fcc4782, 0x7562b83f21c00, 0x2dc0fd2d82ef6 },
+ { 0x4c06b394afc6c, 0x4931b4bf636cc, 0x72b60d0322378, 0x25127c6818b25, 0x330bca78de743 },
+ { 0x6ff841119744e, 0x2c560e8e49305, 0x7254fefe5a57a, 0x67ae2c560a7df, 0x3c31be1b369f1 },
+ },
+ {
+ { 0x0bc93f9cb4272, 0x3f8f9db73182d, 0x2b235eabae1c4, 0x2ddbf8729551a, 0x41cec1097e7d5 },
+ { 0x4864d08948aee, 0x5d237438df61e, 0x2b285601f7067, 0x25dbcbae6d753, 0x330b61134262d },
+ { 0x619d7a26d808a, 0x3c3b3c2adbef2, 0x6877c9eec7f52, 0x3beb9ebe1b66d, 0x26b44cd91f287 },
+ },
+},
+{
+ {
+ { 0x7f29362730383, 0x7fd7951459c36, 0x7504c512d49e7, 0x087ed7e3bc55f, 0x7deb10149c726 },
+ { 0x048478f387475, 0x69397d9678a3e, 0x67c8156c976f3, 0x2eb4d5589226c, 0x2c709e6c1c10a },
+ { 0x2af6a8766ee7a, 0x08aaa79a1d96c, 0x42f92d59b2fb0, 0x1752c40009c07, 0x08e68e9ff62ce },
+ },
+ {
+ { 0x509d50ab8f2f9, 0x1b8ab247be5e5, 0x5d9b2e6b2e486, 0x4faa5479a1339, 0x4cb13bd738f71 },
+ { 0x5500a4bc130ad, 0x127a17a938695, 0x02a26fa34e36d, 0x584d12e1ecc28, 0x2f1f3f87eeba3 },
+ { 0x48c75e515b64a, 0x75b6952071ef0, 0x5d46d42965406, 0x7746106989f9f, 0x19a1e353c0ae2 },
+ },
+ {
+ { 0x172cdd596bdbd, 0x0731ddf881684, 0x10426d64f8115, 0x71a4fd8a9a3da, 0x736bd3990266a },
+ { 0x47560bafa05c3, 0x418dcabcc2fa3, 0x35991cecf8682, 0x24371a94b8c60, 0x41546b11c20c3 },
+ { 0x32d509334b3b4, 0x16c102cae70aa, 0x1720dd51bf445, 0x5ae662faf9821, 0x412295a2b87fa },
+ },
+ {
+ { 0x55261e293eac6, 0x06426759b65cc, 0x40265ae116a48, 0x6c02304bae5bc, 0x0760bb8d195ad },
+ { 0x19b88f57ed6e9, 0x4cdbf1904a339, 0x42b49cd4e4f2c, 0x71a2e771909d9, 0x14e153ebb52d2 },
+ { 0x61a17cde6818a, 0x53dad34108827, 0x32b32c55c55b6, 0x2f9165f9347a3, 0x6b34be9bc33ac },
+ },
+ {
+ { 0x469656571f2d3, 0x0aa61ce6f423f, 0x3f940d71b27a1, 0x185f19d73d16a, 0x01b9c7b62e6dd },
+ { 0x72f643a78c0b2, 0x3de45c04f9e7b, 0x706d68d30fa5c, 0x696f63e8e2f24, 0x2012c18f0922d },
+ { 0x355e55ac89d29, 0x3e8b414ec7101, 0x39db07c520c90, 0x6f41e9b77efe1, 0x08af5b784e4ba },
+ },
+ {
+ { 0x314d289cc2c4b, 0x23450e2f1bc4e, 0x0cd93392f92f4, 0x1370c6a946b7d, 0x6423c1d5afd98 },
+ { 0x499dc881f2533, 0x34ef26476c506, 0x4d107d2741497, 0x346c4bd6efdb3, 0x32b79d71163a1 },
+ { 0x5f8d9edfcb36a, 0x1e6e8dcbf3990, 0x7974f348af30a, 0x6e6724ef19c7c, 0x480a5efbc13e2 },
+ },
+ {
+ { 0x14ce442ce221f, 0x18980a72516cc, 0x072f80db86677, 0x703331fda526e, 0x24b31d47691c8 },
+ { 0x1e70b01622071, 0x1f163b5f8a16a, 0x56aaf341ad417, 0x7989635d830f7, 0x47aa27600cb7b },
+ { 0x41eedc015f8c3, 0x7cf8d27ef854a, 0x289e3584693f9, 0x04a7857b309a7, 0x545b585d14dda },
+ },
+ {
+ { 0x4e4d0e3b321e1, 0x7451fe3d2ac40, 0x666f678eea98d, 0x038858667fead, 0x4d22dc3e64c8d },
+ { 0x7275ea0d43a0f, 0x681137dd7ccf7, 0x1e79cbab79a38, 0x22a214489a66a, 0x0f62f9c332ba5 },
+ { 0x46589d63b5f39, 0x7eaf979ec3f96, 0x4ebe81572b9a8, 0x21b7f5d61694a, 0x1c0fa01a36371 },
+ },
+},
+{
+ {
+ { 0x02b0e8c936a50, 0x6b83b58b6cd21, 0x37ed8d3e72680, 0x0a037db9f2a62, 0x4005419b1d2bc },
+ { 0x604b622943dff, 0x1c899f6741a58, 0x60219e2f232fb, 0x35fae92a7f9cb, 0x0fa3614f3b1ca },
+ { 0x3febdb9be82f0, 0x5e74895921400, 0x553ea38822706, 0x5a17c24cfc88c, 0x1fba218aef40a },
+ },
+ {
+ { 0x657043e7b0194, 0x5c11b55efe9e7, 0x7737bc6a074fb, 0x0eae41ce355cc, 0x6c535d13ff776 },
+ { 0x49448fac8f53e, 0x34f74c6e8356a, 0x0ad780607dba2, 0x7213a7eb63eb6, 0x392e3acaa8c86 },
+ { 0x534e93e8a35af, 0x08b10fd02c997, 0x26ac2acb81e05, 0x09d8c98ce3b79, 0x25e17fe4d50ac },
+ },
+ {
+ { 0x77ff576f121a7, 0x4e5f9b0fc722b, 0x46f949b0d28c8, 0x4cde65d17ef26, 0x6bba828f89698 },
+ { 0x09bd71e04f676, 0x25ac841f2a145, 0x1a47eac823871, 0x1a8a8c36c581a, 0x255751442a9fb },
+ { 0x1bc6690fe3901, 0x314132f5abc5a, 0x611835132d528, 0x5f24b8eb48a57, 0x559d504f7f6b7 },
+ },
+ {
+ { 0x091e7f6d266fd, 0x36060ef037389, 0x18788ec1d1286, 0x287441c478eb0, 0x123ea6a3354bd },
+ { 0x38378b3eb54d5, 0x4d4aaa78f94ee, 0x4a002e875a74d, 0x10b851367b17c, 0x01ab12d5807e3 },
+ { 0x5189041e32d96, 0x05b062b090231, 0x0c91766e7b78f, 0x0aa0f55a138ec, 0x4a3961e2c918a },
+ },
+ {
+ { 0x7d644f3233f1e, 0x1c69f9e02c064, 0x36ae5e5266898, 0x08fc1dad38b79, 0x68aceead9bd41 },
+ { 0x43be0f8e6bba0, 0x68fdffc614e3b, 0x4e91dab5b3be0, 0x3b1d4c9212ff0, 0x2cd6bce3fb1db },
+ { 0x4c90ef3d7c210, 0x496f5a0818716, 0x79cf88cc239b8, 0x2cb9c306cf8db, 0x595760d5b508f },
+ },
+ {
+ { 0x2cbebfd022790, 0x0b8822aec1105, 0x4d1cfd226bccc, 0x515b2fa4971be, 0x2cb2c5df54515 },
+ { 0x1bfe104aa6397, 0x11494ff996c25, 0x64251623e5800, 0x0d49fc5e044be, 0x709fa43edcb29 },
+ { 0x25d8c63fd2aca, 0x4c5cd29dffd61, 0x32ec0eb48af05, 0x18f9391f9b77c, 0x70f029ecf0c81 },
+ },
+ {
+ { 0x2afaa5e10b0b9, 0x61de08355254d, 0x0eb587de3c28d, 0x4f0bb9f7dbbd5, 0x44eca5a2a74bd },
+ { 0x307b32eed3e33, 0x6748ab03ce8c2, 0x57c0d9ab810bc, 0x42c64a224e98c, 0x0b7d5d8a6c314 },
+ { 0x448327b95d543, 0x0146681e3a4ba, 0x38714adc34e0c, 0x4f26f0e298e30, 0x272224512c7de },
+ },
+ {
+ { 0x3bb8a42a975fc, 0x6f2d5b46b17ef, 0x7b6a9223170e5, 0x053713fe3b7e6, 0x19735fd7f6bc2 },
+ { 0x492af49c5342e, 0x2365cdf5a0357, 0x32138a7ffbb60, 0x2a1f7d14646fe, 0x11b5df18a44cc },
+ { 0x390d042c84266, 0x1efe32a8fdc75, 0x6925ee7ae1238, 0x4af9281d0e832, 0x0fef911191df8 },
+ },
+},
+};
+#else
/* base[i][j] = (j+1)*256^i*B */
-static ge_precomp base[32][8] = {
+static const ge_precomp base[32][8] = {
{
{
{ 25967493,-14356035,29566456,3660896,-12694345,4014787,27544626,-11754271,-6079156,2047605 },
@@ -2109,30 +9090,33 @@ static ge_precomp base[32][8] = {
},
},
} ;
+#endif
static void ge_select(ge_precomp *t,int pos,signed char b)
{
+#ifndef CURVED25519_ASM
ge_precomp minust;
unsigned char bnegative = negative(b);
unsigned char babs = b - (((-bnegative) & b) << 1);
ge_precomp_0(t);
- cmov(t,&base[pos][0],equal(babs,1));
- cmov(t,&base[pos][1],equal(babs,2));
- cmov(t,&base[pos][2],equal(babs,3));
- cmov(t,&base[pos][3],equal(babs,4));
- cmov(t,&base[pos][4],equal(babs,5));
- cmov(t,&base[pos][5],equal(babs,6));
- cmov(t,&base[pos][6],equal(babs,7));
- cmov(t,&base[pos][7],equal(babs,8));
- fe_copy(minust.yplusx,t->yminusx);
- fe_copy(minust.yminusx,t->yplusx);
+ cmov(t,&base[pos][0],babs,1);
+ cmov(t,&base[pos][1],babs,2);
+ cmov(t,&base[pos][2],babs,3);
+ cmov(t,&base[pos][3],babs,4);
+ cmov(t,&base[pos][4],babs,5);
+ cmov(t,&base[pos][5],babs,6);
+ cmov(t,&base[pos][6],babs,7);
+ cmov(t,&base[pos][7],babs,8);
+ fe_cswap(t->yminusx, t->yplusx, bnegative);
fe_neg(minust.xy2d,t->xy2d);
- cmov(t,&minust,bnegative);
+ fe_cmov(t->xy2d,minust.xy2d,bnegative);
+#else
+ fe_cmov_table((fe*)t, (fe*)base[pos], b);
+#endif
}
-
/*
h = a * B
where a = a[0]+256*a[1]+...+256^31 a[31]
@@ -2146,7 +9130,9 @@ void ge_scalarmult_base(ge_p3 *h,const unsigned char *a)
signed char e[64];
signed char carry;
ge_p1p1 r;
+#ifndef CURVED25519_ASM
ge_p2 s;
+#endif
ge_precomp t;
int i;
@@ -2167,8 +9153,17 @@ void ge_scalarmult_base(ge_p3 *h,const unsigned char *a)
e[63] += carry;
/* each e[i] is between -8 and 8 */
- ge_p3_0(h);
- for (i = 1;i < 64;i += 2) {
+#ifndef CURVED25519_ASM
+ ge_select(&t,0,e[1]);
+ fe_sub(h->X, t.yplusx, t.yminusx);
+ fe_add(h->Y, t.yplusx, t.yminusx);
+ fe_0(h->Z);
+ h->Z[0] = 4;
+ fe_mul(h->T,h->X,h->Y);
+ fe_add(h->X, h->X, h->X);
+ fe_add(h->Y, h->Y, h->Y);
+
+ for (i = 3;i < 64;i += 2) {
ge_select(&t,i / 2,e[i]);
ge_madd(&r,h,&t); ge_p1p1_to_p3(h,&r);
}
@@ -2182,6 +9177,18 @@ void ge_scalarmult_base(ge_p3 *h,const unsigned char *a)
ge_select(&t,i / 2,e[i]);
ge_madd(&r,h,&t); ge_p1p1_to_p3(h,&r);
}
+#else
+ ge_select(&t, 0, e[0]);
+ fe_sub(h->X, t.yplusx, t.yminusx);
+ fe_add(h->Y, t.yplusx, t.yminusx);
+ fe_0(h->Z);
+ h->Z[0] = 2;
+ fe_copy(h->T, t.xy2d);
+ for (i = 1; i < 64; i++) {
+ ge_select(&t, i, e[i]);
+ ge_madd(&r,h,&t); ge_p1p1_to_p3(h,&r);
+ }
+#endif
}
@@ -2217,8 +9224,137 @@ static void slide(signed char *r,const unsigned char *a)
}
}
-
-static ge_precomp Bi[8] = {
+#ifdef CURVED25519_ASM_64BIT
+static const ge_precomp Bi[8] = {
+ {
+ { 0x2fbc93c6f58c3b85, -0x306cd2390473f1e7, 0x270b4898643d42c2, 0x07cf9d3a33d4ba65, },
+ { -0x62efc6fa28bf6ec2, -0x02c660fa2ebf414d, -0x5a3e7bcb977075f7, 0x44fd2f9298f81267, },
+ { -0x5436edfa78855598, 0x26d9e823ccaac49e, 0x5a1b7dcbdd43598c, 0x6f117b689f0c65a8, },
+ },
+ {
+ { -0x50da4f57b31168d0, 0x025a8430e8864b8a, -0x3ee4affd60fe98ce, 0x7a164e1b9a80f8f4, },
+ { 0x56611fe8a4fcd265, 0x3bd353fde5c1ba7d, -0x7ece0ce5deb42943, 0x2ab91587555bda62, },
+ { 0x14ae933f0dd0d889, 0x589423221c35da62, -0x2e8f1aba730d24b4, 0x5a2826af12b9b4c6, },
+ },
+ {
+ { -0x5ded43bbf75a44cd, -0x72afb73c38a112fe, -0x22e414f3a54013bc, 0x2945ccf146e206eb, },
+ { 0x7f9182c3a447d6ba, -0x2affeb2eb4d8d649, -0x1cc30ee3479b5f79, 0x154a7e73eb1b55f3, },
+ { -0x4344240e7ed57d7b, 0x270e0807d0bdd1fc, -0x4be498f4e44258d3, 0x43aabe696b3bb69a, },
+ },
+ {
+ { 0x6b1a5cd0944ea3bf, 0x7470353ab39dc0d2, 0x71b2528228542e49, 0x461bea69283c927e, },
+ { -0x4590d36555cdde4f, 0x6ca021533bba23a7, -0x621589b06de6d3c6, 0x1d6edd5d2e5317e0, },
+ { -0x0e7c9237fe474c5e, -0x4cfca0b8fac15b66, 0x529c41ba5877adf3, 0x7a9fbb1c6a0f90a7, },
+ },
+ {
+ { -0x64d1987559579cd1, -0x59af6190ae43b93b, -0x314dcc3639790a4b, 0x34b9ed338add7f59, },
+ { -0x0c91de81fc627f9c, -0x675f7e490adfbe65, -0x693439f718a14fbc, 0x49c05a51fadc9c8f, },
+ { 0x06b4e8bf9045af1b, -0x1d007c1758e62dd1, -0x550903d66c2b30ea, 0x73c172021b008b06, },
+ },
+ {
+ { 0x2fbf00848a802ade, -0x1a260130fdcfd1d9, 0x113e847117703406, 0x4275aae2546d8faf, },
+ { 0x315f5b0249864348, 0x3ed6b36977088381, -0x5c5f8aaa9572146b, 0x18ab598029d5c77f, },
+ { -0x27d4d33a029f7617, 0x031eb4a13282e4a4, 0x44311199b51a8622, 0x3dc65522b53df948, },
+ },
+ {
+ { -0x408f3ddd5dff8093, -0x407b4c654a432125, 0x537a0e12fb07ba07, 0x234fd7eec346f241, },
+ { 0x506f013b327fbf93, -0x5103143664889095, -0x62ed4dcd5552a698, 0x0267882d176024a7, },
+ { 0x5360a119732ea378, 0x2437e6b1df8dd471, -0x5d10c8076e581acd, 0x497ba6fdaa097863, },
+ },
+ {
+ { 0x24cecc0313cfeaa0, -0x79b73d72e763db93, 0x2dbdbdfac1f2d4d0, 0x61e22917f12de72b, },
+ { 0x040bcd86468ccf0b, -0x2c7d645bd566ef2a, 0x7508300807b25192, 0x43b5cd4218d05ebf, },
+ { 0x5d9a762f9bd0b516, -0x14c750b1c8c02112, 0x032e5a7d93d64270, 0x511d61210ae4d842, },
+ },
+};
+#elif defined(CURVED25519_ASM_32BIT)
+static const ge_precomp Bi[8] = {
+ {
+ { -0x0a73c47b, 0x2fbc93c6, -0x0473f1e7, -0x306cd23a, 0x643d42c2, 0x270b4898, 0x33d4ba65, 0x07cf9d3a, },
+ { -0x28bf6ec2, -0x62efc6fb, -0x2ebf414d, -0x02c660fb, 0x688f8a09, -0x5a3e7bcc, -0x6707ed99, 0x44fd2f92, },
+ { -0x78855598, -0x5436edfb, -0x33553b62, 0x26d9e823, -0x22bca674, 0x5a1b7dcb, -0x60f39a58, 0x6f117b68, },
+ },
+ {
+ { 0x4cee9730, -0x50da4f58, -0x1779b476, 0x025a8430, -0x60fe98ce, -0x3ee4affe, -0x657f070c, 0x7a164e1b, },
+ { -0x5b032d9b, 0x56611fe8, -0x1a3e4583, 0x3bd353fd, 0x214bd6bd, -0x7ece0ce6, 0x555bda62, 0x2ab91587, },
+ { 0x0dd0d889, 0x14ae933f, 0x1c35da62, 0x58942322, -0x730d24b4, -0x2e8f1abb, 0x12b9b4c6, 0x5a2826af, },
+ },
+ {
+ { 0x08a5bb33, -0x5ded43bc, -0x38a112fe, -0x72afb73d, 0x5abfec44, -0x22e414f4, 0x46e206eb, 0x2945ccf1, },
+ { -0x5bb82946, 0x7f9182c3, 0x4b2729b7, -0x2affeb2f, -0x479b5f79, -0x1cc30ee4, -0x14e4aa0d, 0x154a7e73, },
+ { -0x7ed57d7b, -0x4344240f, -0x2f422e04, 0x270e0807, 0x1bbda72d, -0x4be498f5, 0x6b3bb69a, 0x43aabe69, },
+ },
+ {
+ { -0x6bb15c41, 0x6b1a5cd0, -0x4c623f2e, 0x7470353a, 0x28542e49, 0x71b25282, 0x283c927e, 0x461bea69, },
+ { -0x55cdde4f, -0x4590d366, 0x3bba23a7, 0x6ca02153, -0x6de6d3c6, -0x621589b1, 0x2e5317e0, 0x1d6edd5d, },
+ { 0x01b8b3a2, -0x0e7c9238, 0x053ea49a, -0x4cfca0b9, 0x5877adf3, 0x529c41ba, 0x6a0f90a7, 0x7a9fbb1c, },
+ },
+ {
+ { -0x59579cd1, -0x64d19876, 0x51bc46c5, -0x59af6191, -0x39790a4b, -0x314dcc37, -0x752280a7, 0x34b9ed33, },
+ { 0x039d8064, -0x0c91de82, -0x0adfbe65, -0x675f7e4a, -0x18a14fbc, -0x693439f8, -0x05236371, 0x49c05a51, },
+ { -0x6fba50e5, 0x06b4e8bf, -0x58e62dd1, -0x1d007c18, -0x6c2b30ea, -0x550903d7, 0x1b008b06, 0x73c17202, },
+ },
+ {
+ { -0x757fd522, 0x2fbf0084, 0x02302e27, -0x1a260131, 0x17703406, 0x113e8471, 0x546d8faf, 0x4275aae2, },
+ { 0x49864348, 0x315f5b02, 0x77088381, 0x3ed6b369, 0x6a8deb95, -0x5c5f8aab, 0x29d5c77f, 0x18ab5980, },
+ { -0x029f7617, -0x27d4d33b, 0x3282e4a4, 0x031eb4a1, -0x4ae579de, 0x44311199, -0x4ac206b8, 0x3dc65522, },
+ },
+ {
+ { -0x5dff8093, -0x408f3dde, -0x4a432125, -0x407b4c66, -0x04f845f9, 0x537a0e12, -0x3cb90dbf, 0x234fd7ee, },
+ { 0x327fbf93, 0x506f013b, -0x64889095, -0x51031437, -0x5552a698, -0x62ed4dce, 0x176024a7, 0x0267882d, },
+ { 0x732ea378, 0x5360a119, -0x20722b8f, 0x2437e6b1, -0x6e581acd, -0x5d10c808, -0x55f6879d, 0x497ba6fd, },
+ },
+ {
+ { 0x13cfeaa0, 0x24cecc03, 0x189c246d, -0x79b73d73, -0x3e0d2b30, 0x2dbdbdfa, -0x0ed218d5, 0x61e22917, },
+ { 0x468ccf0b, 0x040bcd86, 0x2a9910d6, -0x2c7d645c, 0x07b25192, 0x75083008, 0x18d05ebf, 0x43b5cd42, },
+ { -0x642f4aea, 0x5d9a762f, 0x373fdeee, -0x14c750b2, -0x6c29bd90, 0x032e5a7d, 0x0ae4d842, 0x511d6121, },
+ },
+};
+#elif defined(CURVED25519_128BIT)
+static const ge_precomp Bi[8] = {
+ {
+ { 0x493c6f58c3b85, 0x0df7181c325f7, 0x0f50b0b3e4cb7, 0x5329385a44c32, 0x07cf9d3a33d4b },
+ { 0x03905d740913e, 0x0ba2817d673a2, 0x23e2827f4e67c, 0x133d2e0c21a34, 0x44fd2f9298f81 },
+ { 0x11205877aaa68, 0x479955893d579, 0x50d66309b67a0, 0x2d42d0dbee5ee, 0x6f117b689f0c6 },
+ },
+ {
+ { 0x5b0a84cee9730, 0x61d10c97155e4, 0x4059cc8096a10, 0x47a608da8014f, 0x7a164e1b9a80f },
+ { 0x11fe8a4fcd265, 0x7bcb8374faacc, 0x52f5af4ef4d4f, 0x5314098f98d10, 0x2ab91587555bd },
+ { 0x6933f0dd0d889, 0x44386bb4c4295, 0x3cb6d3162508c, 0x26368b872a2c6, 0x5a2826af12b9b },
+ },
+ {
+ { 0x2bc4408a5bb33, 0x078ebdda05442, 0x2ffb112354123, 0x375ee8df5862d, 0x2945ccf146e20 },
+ { 0x182c3a447d6ba, 0x22964e536eff2, 0x192821f540053, 0x2f9f19e788e5c, 0x154a7e73eb1b5 },
+ { 0x3dbf1812a8285, 0x0fa17ba3f9797, 0x6f69cb49c3820, 0x34d5a0db3858d, 0x43aabe696b3bb },
+ },
+ {
+ { 0x25cd0944ea3bf, 0x75673b81a4d63, 0x150b925d1c0d4, 0x13f38d9294114, 0x461bea69283c9 },
+ { 0x72c9aaa3221b1, 0x267774474f74d, 0x064b0e9b28085, 0x3f04ef53b27c9, 0x1d6edd5d2e531 },
+ { 0x36dc801b8b3a2, 0x0e0a7d4935e30, 0x1deb7cecc0d7d, 0x053a94e20dd2c, 0x7a9fbb1c6a0f9 },
+ },
+ {
+ { 0x6678aa6a8632f, 0x5ea3788d8b365, 0x21bd6d6994279, 0x7ace75919e4e3, 0x34b9ed338add7 },
+ { 0x6217e039d8064, 0x6dea408337e6d, 0x57ac112628206, 0x647cb65e30473, 0x49c05a51fadc9 },
+ { 0x4e8bf9045af1b, 0x514e33a45e0d6, 0x7533c5b8bfe0f, 0x583557b7e14c9, 0x73c172021b008 },
+ },
+ {
+ { 0x700848a802ade, 0x1e04605c4e5f7, 0x5c0d01b9767fb, 0x7d7889f42388b, 0x4275aae2546d8 },
+ { 0x75b0249864348, 0x52ee11070262b, 0x237ae54fb5acd, 0x3bfd1d03aaab5, 0x18ab598029d5c },
+ { 0x32cc5fd6089e9, 0x426505c949b05, 0x46a18880c7ad2, 0x4a4221888ccda, 0x3dc65522b53df },
+ },
+ {
+ { 0x0c222a2007f6d, 0x356b79bdb77ee, 0x41ee81efe12ce, 0x120a9bd07097d, 0x234fd7eec346f },
+ { 0x7013b327fbf93, 0x1336eeded6a0d, 0x2b565a2bbf3af, 0x253ce89591955, 0x0267882d17602 },
+ { 0x0a119732ea378, 0x63bf1ba8e2a6c, 0x69f94cc90df9a, 0x431d1779bfc48, 0x497ba6fdaa097 },
+ },
+ {
+ { 0x6cc0313cfeaa0, 0x1a313848da499, 0x7cb534219230a, 0x39596dedefd60, 0x61e22917f12de },
+ { 0x3cd86468ccf0b, 0x48553221ac081, 0x6c9464b4e0a6e, 0x75fba84180403, 0x43b5cd4218d05 },
+ { 0x2762f9bd0b516, 0x1c6e7fbddcbb3, 0x75909c3ace2bd, 0x42101972d3ec9, 0x511d61210ae4d },
+ },
+};
+#else
+static const ge_precomp Bi[8] = {
{
{ 25967493,-14356035,29566456,3660896,-12694345,4014787,27544626,-11754271,-6079156,2047605 },
{ -12545711,934262,-2722910,3049990,-727428,9406986,12720692,5043384,19500929,-15469378 },
@@ -2260,6 +9396,7 @@ static ge_precomp Bi[8] = {
{ -3099351,10324967,-2241613,7453183,-5446979,-2735503,-13812022,-16236442,-32461234,-12290683 },
},
} ;
+#endif
/*
@@ -2268,7 +9405,7 @@ where a = a[0]+256*a[1]+...+256^31 a[31].
and b = b[0]+256*b[1]+...+256^31 b[31].
B is the Ed25519 base point (x,4/5) with x positive.
*/
-int ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a,
+int ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a,
const ge_p3 *A, const unsigned char *b)
{
signed char aslide[256];
@@ -2323,26 +9460,55 @@ int ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a,
return 0;
}
-
-static const fe d = {
+#ifdef CURVED25519_ASM_64BIT
+static const ge d = {
+ 0x75eb4dca135978a3, 0x00700a4d4141d8ab, -0x7338bf8688861768, 0x52036cee2b6ffe73,
+};
+#elif defined(CURVED25519_ASM_32BIT)
+static const ge d = {
+ 0x135978a3, 0x75eb4dca, 0x4141d8ab, 0x00700a4d, 0x7779e898, -0x7338bf87, 0x2b6ffe73, 0x52036cee,
+};
+#elif defined(CURVED25519_128BIT)
+static const ge d = {
+ 0x34dca135978a3, 0x1a8283b156ebd, 0x5e7a26001c029, 0x739c663a03cbb,
+ 0x52036cee2b6ff
+};
+#else
+static const ge d = {
-10913610,13857413,-15372611,6949391,114729,
-8787816,-6275908,-3247719,-18696448,-12055116
-} ;
+};
+#endif
-static const fe sqrtm1 = {
+#ifdef CURVED25519_ASM_64BIT
+static const ge sqrtm1 = {
+ -0x3b11e4d8b5f15f50, 0x2f431806ad2fe478, 0x2b4d00993dfbd7a7, 0x2b8324804fc1df0b,
+};
+#elif defined(CURVED25519_ASM_32BIT)
+static const ge sqrtm1 = {
+ 0x4a0ea0b0, -0x3b11e4d9, -0x52d01b88, 0x2f431806, 0x3dfbd7a7, 0x2b4d0099, 0x4fc1df0b, 0x2b832480,
+};
+#elif defined(CURVED25519_128BIT)
+static const ge sqrtm1 = {
+ 0x61b274a0ea0b0, 0x0d5a5fc8f189d, 0x7ef5e9cbd0c60, 0x78595a6804c9e,
+ 0x2b8324804fc1d
+};
+#else
+static const ge sqrtm1 = {
-32595792,-7943725,9377950,3500415,12389472,
-272473,-25146209,-2005654,326686,11406482
-} ;
+};
+#endif
int ge_frombytes_negate_vartime(ge_p3 *h,const unsigned char *s)
{
- fe u;
- fe v;
- fe v3;
- fe vxx;
- fe check;
+ ge u;
+ ge v;
+ ge v3;
+ ge vxx;
+ ge check;
fe_frombytes(h->Y,s);
fe_1(h->Z);
@@ -2384,19 +9550,24 @@ int ge_frombytes_negate_vartime(ge_p3 *h,const unsigned char *s)
r = p + q
*/
-void ge_madd(ge_p1p1 *r,const ge_p3 *p,const ge_precomp *q)
-{
- fe t0;
- fe_add(r->X,p->Y,p->X);
- fe_sub(r->Y,p->Y,p->X);
- fe_mul(r->Z,r->X,q->yplusx);
- fe_mul(r->Y,r->Y,q->yminusx);
- fe_mul(r->T,q->xy2d,p->T);
- fe_add(t0,p->Z,p->Z);
- fe_sub(r->X,r->Z,r->Y);
- fe_add(r->Y,r->Z,r->Y);
- fe_add(r->Z,t0,r->T);
- fe_sub(r->T,t0,r->T);
+static WC_INLINE void ge_madd(ge_p1p1 *r,const ge_p3 *p,const ge_precomp *q)
+{
+#ifndef CURVED25519_ASM
+ ge t0;
+ fe_add(r->X,p->Y,p->X);
+ fe_sub(r->Y,p->Y,p->X);
+ fe_mul(r->Z,r->X,q->yplusx);
+ fe_mul(r->Y,r->Y,q->yminusx);
+ fe_mul(r->T,q->xy2d,p->T);
+ fe_add(t0,p->Z,p->Z);
+ fe_sub(r->X,r->Z,r->Y);
+ fe_add(r->Y,r->Z,r->Y);
+ fe_add(r->Z,t0,r->T);
+ fe_sub(r->T,t0,r->T);
+#else
+ fe_ge_madd(r->X, r->Y, r->Z, r->T, p->X, p->Y, p->Z, p->T, q->xy2d,
+ q->yplusx, q->yminusx);
+#endif
}
@@ -2406,19 +9577,24 @@ void ge_madd(ge_p1p1 *r,const ge_p3 *p,const ge_precomp *q)
r = p - q
*/
-void ge_msub(ge_p1p1 *r,const ge_p3 *p,const ge_precomp *q)
-{
- fe t0;
- fe_add(r->X,p->Y,p->X);
- fe_sub(r->Y,p->Y,p->X);
- fe_mul(r->Z,r->X,q->yminusx);
- fe_mul(r->Y,r->Y,q->yplusx);
- fe_mul(r->T,q->xy2d,p->T);
- fe_add(t0,p->Z,p->Z);
- fe_sub(r->X,r->Z,r->Y);
- fe_add(r->Y,r->Z,r->Y);
- fe_sub(r->Z,t0,r->T);
- fe_add(r->T,t0,r->T);
+static WC_INLINE void ge_msub(ge_p1p1 *r,const ge_p3 *p,const ge_precomp *q)
+{
+#ifndef CURVED25519_ASM
+ ge t0;
+ fe_add(r->X,p->Y,p->X);
+ fe_sub(r->Y,p->Y,p->X);
+ fe_mul(r->Z,r->X,q->yminusx);
+ fe_mul(r->Y,r->Y,q->yplusx);
+ fe_mul(r->T,q->xy2d,p->T);
+ fe_add(t0,p->Z,p->Z);
+ fe_sub(r->X,r->Z,r->Y);
+ fe_add(r->Y,r->Z,r->Y);
+ fe_sub(r->Z,t0,r->T);
+ fe_add(r->T,t0,r->T);
+#else
+ fe_ge_msub(r->X, r->Y, r->Z, r->T, p->X, p->Y, p->Z, p->T, q->xy2d,
+ q->yplusx, q->yminusx);
+#endif
}
@@ -2427,11 +9603,15 @@ void ge_msub(ge_p1p1 *r,const ge_p3 *p,const ge_precomp *q)
r = p
*/
-extern void ge_p1p1_to_p2(ge_p2 *r,const ge_p1p1 *p)
+static void ge_p1p1_to_p2(ge_p2 *r,const ge_p1p1 *p)
{
+#ifndef CURVED25519_ASM
fe_mul(r->X,p->X,p->T);
fe_mul(r->Y,p->Y,p->Z);
fe_mul(r->Z,p->Z,p->T);
+#else
+ fe_ge_to_p2(r->X, r->Y, r->Z, p->X, p->Y, p->Z, p->T);
+#endif
}
@@ -2441,18 +9621,22 @@ extern void ge_p1p1_to_p2(ge_p2 *r,const ge_p1p1 *p)
r = p
*/
-extern void ge_p1p1_to_p3(ge_p3 *r,const ge_p1p1 *p)
+static WC_INLINE void ge_p1p1_to_p3(ge_p3 *r,const ge_p1p1 *p)
{
+#ifndef CURVED25519_ASM
fe_mul(r->X,p->X,p->T);
fe_mul(r->Y,p->Y,p->Z);
fe_mul(r->Z,p->Z,p->T);
fe_mul(r->T,p->X,p->Y);
+#else
+ fe_ge_to_p3(r->X, r->Y, r->Z, r->T, p->X, p->Y, p->Z, p->T);
+#endif
}
/* ge p2 0 */
-void ge_p2_0(ge_p2 *h)
+static void ge_p2_0(ge_p2 *h)
{
fe_0(h->X);
fe_1(h->Y);
@@ -2466,29 +9650,22 @@ void ge_p2_0(ge_p2 *h)
r = 2 * p
*/
-void ge_p2_dbl(ge_p1p1 *r,const ge_p2 *p)
-{
- fe t0;
- fe_sq(r->X,p->X);
- fe_sq(r->Z,p->Y);
- fe_sq2(r->T,p->Z);
- fe_add(r->Y,p->X,p->Y);
- fe_sq(t0,r->Y);
- fe_add(r->Y,r->Z,r->X);
- fe_sub(r->Z,r->Z,r->X);
- fe_sub(r->X,t0,r->Y);
- fe_sub(r->T,r->T,r->Z);
-}
-
-
-/* ge p3 0 */
-
-void ge_p3_0(ge_p3 *h)
+static WC_INLINE void ge_p2_dbl(ge_p1p1 *r,const ge_p2 *p)
{
- fe_0(h->X);
- fe_1(h->Y);
- fe_1(h->Z);
- fe_0(h->T);
+#ifndef CURVED25519_ASM
+ ge t0;
+ fe_sq(r->X,p->X);
+ fe_sq(r->Z,p->Y);
+ fe_sq2(r->T,p->Z);
+ fe_add(r->Y,p->X,p->Y);
+ fe_sq(t0,r->Y);
+ fe_add(r->Y,r->Z,r->X);
+ fe_sub(r->Z,r->Z,r->X);
+ fe_sub(r->X,t0,r->Y);
+ fe_sub(r->T,r->T,r->Z);
+#else
+ fe_ge_dbl(r->X, r->Y, r->Z, r->T, p->X, p->Y, p->Z);
+#endif
}
@@ -2498,7 +9675,7 @@ void ge_p3_0(ge_p3 *h)
r = 2 * p
*/
-void ge_p3_dbl(ge_p1p1 *r,const ge_p3 *p)
+static void ge_p3_dbl(ge_p1p1 *r,const ge_p3 *p)
{
ge_p2 q;
ge_p3_to_p2(&q,p);
@@ -2512,13 +9689,28 @@ void ge_p3_dbl(ge_p1p1 *r,const ge_p3 *p)
r = p
*/
-static const fe d2 = {
+#ifdef CURVED25519_ASM_64BIT
+static const ge d2 = {
+ -0x1429646bd94d0ea7, 0x00e0149a8283b156, 0x198e80f2eef3d130, 0x2406d9dc56dffce7,
+};
+#elif defined(CURVED25519_ASM_32BIT)
+static const ge d2 = {
+ 0x26b2f159, -0x1429646c, -0x7d7c4eaa, 0x00e0149a, -0x110c2ed0, 0x198e80f2, 0x56dffce7, 0x2406d9dc,
+};
+#elif defined(CURVED25519_128BIT)
+static const ge d2 = {
+ 0x69b9426b2f159, 0x35050762add7a, 0x3cf44c0038052, 0x6738cc7407977,
+ 0x2406d9dc56dff
+};
+#else
+static const ge d2 = {
-21827239,-5839606,-30745221,13898782,229458,
15978800,-12551817,-6495438,29715968,9444199
} ;
+#endif
-extern void ge_p3_to_cached(ge_cached *r,const ge_p3 *p)
+static WC_INLINE void ge_p3_to_cached(ge_cached *r,const ge_p3 *p)
{
fe_add(r->YplusX,p->Y,p->X);
fe_sub(r->YminusX,p->Y,p->X);
@@ -2532,7 +9724,7 @@ extern void ge_p3_to_cached(ge_cached *r,const ge_p3 *p)
r = p
*/
-extern void ge_p3_to_p2(ge_p2 *r,const ge_p3 *p)
+static void ge_p3_to_p2(ge_p2 *r,const ge_p3 *p)
{
fe_copy(r->X,p->X);
fe_copy(r->Y,p->Y);
@@ -2543,9 +9735,9 @@ extern void ge_p3_to_p2(ge_p2 *r,const ge_p3 *p)
/* ge p3 tobytes */
void ge_p3_tobytes(unsigned char *s,const ge_p3 *h)
{
- fe recip;
- fe x;
- fe y;
+ ge recip;
+ ge x;
+ ge y;
fe_invert(recip,h->Z);
fe_mul(x,h->X,recip);
@@ -2555,13 +9747,15 @@ void ge_p3_tobytes(unsigned char *s,const ge_p3 *h)
}
+#ifndef CURVED25519_ASM
/* ge_precomp_0 */
-void ge_precomp_0(ge_precomp *h)
+static void ge_precomp_0(ge_precomp *h)
{
fe_1(h->yplusx);
fe_1(h->yminusx);
fe_0(h->xy2d);
}
+#endif
/* ge_sub */
@@ -2569,29 +9763,34 @@ void ge_precomp_0(ge_precomp *h)
r = p - q
*/
-void ge_sub(ge_p1p1 *r,const ge_p3 *p,const ge_cached *q)
-{
- fe t0;
- fe_add(r->X,p->Y,p->X);
- fe_sub(r->Y,p->Y,p->X);
- fe_mul(r->Z,r->X,q->YminusX);
- fe_mul(r->Y,r->Y,q->YplusX);
- fe_mul(r->T,q->T2d,p->T);
- fe_mul(r->X,p->Z,q->Z);
- fe_add(t0,r->X,r->X);
- fe_sub(r->X,r->Z,r->Y);
- fe_add(r->Y,r->Z,r->Y);
- fe_sub(r->Z,t0,r->T);
- fe_add(r->T,t0,r->T);
+static WC_INLINE void ge_sub(ge_p1p1 *r,const ge_p3 *p,const ge_cached *q)
+{
+#ifndef CURVED25519_ASM
+ ge t0;
+ fe_add(r->X,p->Y,p->X);
+ fe_sub(r->Y,p->Y,p->X);
+ fe_mul(r->Z,r->X,q->YminusX);
+ fe_mul(r->Y,r->Y,q->YplusX);
+ fe_mul(r->T,q->T2d,p->T);
+ fe_mul(r->X,p->Z,q->Z);
+ fe_add(t0,r->X,r->X);
+ fe_sub(r->X,r->Z,r->Y);
+ fe_add(r->Y,r->Z,r->Y);
+ fe_sub(r->Z,t0,r->T);
+ fe_add(r->T,t0,r->T);
+#else
+ fe_ge_sub(r->X, r->Y, r->Z, r->T, p->X, p->Y, p->Z, p->T, q->Z, q->T2d,
+ q->YplusX, q->YminusX);
+#endif
}
/* ge tobytes */
void ge_tobytes(unsigned char *s,const ge_p2 *h)
{
- fe recip;
- fe x;
- fe y;
+ ge recip;
+ ge x;
+ ge y;
fe_invert(recip,h->Z);
fe_mul(x,h->X,recip);
@@ -2599,5 +9798,6 @@ void ge_tobytes(unsigned char *s,const ge_p2 *h)
fe_tobytes(s,y);
s[31] ^= fe_isnegative(x) << 7;
}
-#endif /* HAVE_ED25519 */
+#endif /* !ED25519_SMALL */
+#endif /* HAVE_ED25519 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/hash.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/hash.c
index 55a1f6a1d..c53f5e67a 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/hash.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/hash.c
@@ -1,8 +1,8 @@
/* hash.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,61 +16,1662 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <wolfssl/wolfcrypt/settings.h>
-
-#if !defined(WOLFSSL_TI_HASH)
+#include <wolfssl/wolfcrypt/logging.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#ifndef NO_ASN
+#include <wolfssl/wolfcrypt/asn.h>
+#endif
#include <wolfssl/wolfcrypt/hash.h>
+#include <wolfssl/wolfcrypt/hmac.h>
-#if !defined(NO_MD5)
-void wc_Md5GetHash(Md5* md5, byte* hash)
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+
+#ifdef NO_ASN
+enum Hash_Sum {
+ MD2h = 646,
+ MD5h = 649,
+ SHAh = 88,
+ SHA224h = 417,
+ SHA256h = 414,
+ SHA384h = 415,
+ SHA512h = 416,
+ SHA3_224h = 420,
+ SHA3_256h = 421,
+ SHA3_384h = 422,
+ SHA3_512h = 423
+};
+#endif /* !NO_ASN */
+
+#if !defined(NO_PWDBASED) || !defined(NO_ASN)
+/* function converts int hash type to enum */
+enum wc_HashType wc_HashTypeConvert(int hashType)
{
- Md5 save = *md5 ;
- wc_Md5Final(md5, hash) ;
- *md5 = save ;
+ /* Default to hash type none as error */
+ enum wc_HashType eHashType = WC_HASH_TYPE_NONE;
+#if defined(HAVE_FIPS) || defined(HAVE_SELFTEST)
+ /* original FIPSv1 and CAVP selftest require a mapping for unique hash
+ type to wc_HashType */
+ switch (hashType) {
+ #ifndef NO_MD5
+ case WC_MD5:
+ eHashType = WC_HASH_TYPE_MD5;
+ break;
+ #endif /* !NO_MD5 */
+ #ifndef NO_SHA
+ case WC_SHA:
+ eHashType = WC_HASH_TYPE_SHA;
+ break;
+ #endif /* !NO_SHA */
+
+ #ifdef WOLFSSL_SHA224
+ case WC_SHA224:
+ eHashType = WC_HASH_TYPE_SHA224;
+ break;
+ #endif /* WOLFSSL_SHA224 */
+
+ #ifndef NO_SHA256
+ case WC_SHA256:
+ eHashType = WC_HASH_TYPE_SHA256;
+ break;
+ #endif /* !NO_SHA256 */
+
+ #ifdef WOLFSSL_SHA384
+ case WC_SHA384:
+ eHashType = WC_HASH_TYPE_SHA384;
+ break;
+ #endif /* WOLFSSL_SHA384 */
+ #ifdef WOLFSSL_SHA512
+ case WC_SHA512:
+ eHashType = WC_HASH_TYPE_SHA512;
+ break;
+ #endif /* WOLFSSL_SHA512 */
+ #ifdef WOLFSSL_SHA3
+ case WC_SHA3_224:
+ eHashType = WC_HASH_TYPE_SHA3_224;
+ break;
+ case WC_SHA3_256:
+ eHashType = WC_HASH_TYPE_SHA3_256;
+ break;
+ case WC_SHA3_384:
+ eHashType = WC_HASH_TYPE_SHA3_384;
+ break;
+ case WC_SHA3_512:
+ eHashType = WC_HASH_TYPE_SHA3_512;
+ break;
+ #endif /* WOLFSSL_SHA3 */
+ default:
+ eHashType = WC_HASH_TYPE_NONE;
+ break;
+ }
+#else
+ /* current master uses same unique types as wc_HashType */
+ if (hashType > 0 && hashType <= WC_HASH_TYPE_MAX) {
+ eHashType = (enum wc_HashType)hashType;
+ }
+#endif
+ return eHashType;
}
+#endif /* !NO_PWDBASED || !NO_ASN */
-WOLFSSL_API void wc_Md5RestorePos(Md5* m1, Md5* m2) {
- *m1 = *m2 ;
+#if !defined(NO_ASN) || !defined(NO_DH) || defined(HAVE_ECC)
+
+int wc_HashGetOID(enum wc_HashType hash_type)
+{
+ int oid = HASH_TYPE_E; /* Default to hash type error */
+ switch(hash_type)
+ {
+ case WC_HASH_TYPE_MD2:
+ #ifdef WOLFSSL_MD2
+ oid = MD2h;
+ #endif
+ break;
+ case WC_HASH_TYPE_MD5_SHA:
+ case WC_HASH_TYPE_MD5:
+ #ifndef NO_MD5
+ oid = MD5h;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA:
+ #ifndef NO_SHA
+ oid = SHAh;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA224:
+ #ifdef WOLFSSL_SHA224
+ oid = SHA224h;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA256:
+ #ifndef NO_SHA256
+ oid = SHA256h;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA384:
+ #ifdef WOLFSSL_SHA384
+ oid = SHA384h;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA512:
+ #ifdef WOLFSSL_SHA512
+ oid = SHA512h;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA3_224:
+ #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224)
+ oid = SHA3_224h;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA3_256:
+ #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256)
+ oid = SHA3_256h;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA3_384:
+ #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384)
+ oid = SHA3_384h;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA3_512:
+ #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512)
+ oid = SHA3_512h;
+ #endif
+ break;
+
+ /* Not Supported */
+ case WC_HASH_TYPE_MD4:
+ case WC_HASH_TYPE_BLAKE2B:
+ case WC_HASH_TYPE_BLAKE2S:
+ case WC_HASH_TYPE_NONE:
+ default:
+ oid = BAD_FUNC_ARG;
+ break;
+ }
+ return oid;
}
+
+enum wc_HashType wc_OidGetHash(int oid)
+{
+ enum wc_HashType hash_type = WC_HASH_TYPE_NONE;
+ switch (oid)
+ {
+ #ifdef WOLFSSL_MD2
+ case MD2h:
+ hash_type = WC_HASH_TYPE_MD2;
+ break;
+ #endif
+ case MD5h:
+ #ifndef NO_MD5
+ hash_type = WC_HASH_TYPE_MD5;
+ #endif
+ break;
+ case SHAh:
+ #ifndef NO_SHA
+ hash_type = WC_HASH_TYPE_SHA;
+ #endif
+ break;
+ case SHA224h:
+ #ifdef WOLFSSL_SHA224
+ hash_type = WC_HASH_TYPE_SHA224;
+ #endif
+ break;
+ case SHA256h:
+ #ifndef NO_SHA256
+ hash_type = WC_HASH_TYPE_SHA256;
+ #endif
+ break;
+ case SHA384h:
+ #ifdef WOLFSSL_SHA384
+ hash_type = WC_HASH_TYPE_SHA384;
+ #endif
+ break;
+ case SHA512h:
+ #ifdef WOLFSSL_SHA512
+ hash_type = WC_HASH_TYPE_SHA512;
+ #endif
+ break;
+ #ifdef WOLFSSL_SHA3
+ case SHA3_224h:
+ hash_type = WC_HASH_TYPE_SHA3_224;
+ break;
+ case SHA3_256h:
+ hash_type = WC_HASH_TYPE_SHA3_256;
+ break;
+ case SHA3_384h:
+ hash_type = WC_HASH_TYPE_SHA3_384;
+ break;
+ case SHA3_512h:
+ hash_type = WC_HASH_TYPE_SHA3_512;
+ break;
+ #endif /* WOLFSSL_SHA3 */
+ default:
+ break;
+ }
+ return hash_type;
+}
+#endif /* !NO_ASN || !NO_DH || HAVE_ECC */
+
+#ifndef NO_HASH_WRAPPER
+
+/* Get Hash digest size */
+int wc_HashGetDigestSize(enum wc_HashType hash_type)
+{
+ int dig_size = HASH_TYPE_E; /* Default to hash type error */
+ switch(hash_type)
+ {
+ case WC_HASH_TYPE_MD2:
+ #ifdef WOLFSSL_MD2
+ dig_size = MD2_DIGEST_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_MD4:
+ #ifndef NO_MD4
+ dig_size = MD4_DIGEST_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_MD5:
+ #ifndef NO_MD5
+ dig_size = WC_MD5_DIGEST_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA:
+ #ifndef NO_SHA
+ dig_size = WC_SHA_DIGEST_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA224:
+ #ifdef WOLFSSL_SHA224
+ dig_size = WC_SHA224_DIGEST_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA256:
+ #ifndef NO_SHA256
+ dig_size = WC_SHA256_DIGEST_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA384:
+ #ifdef WOLFSSL_SHA384
+ dig_size = WC_SHA384_DIGEST_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA512:
+ #ifdef WOLFSSL_SHA512
+ dig_size = WC_SHA512_DIGEST_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_MD5_SHA: /* Old TLS Specific */
+ #if !defined(NO_MD5) && !defined(NO_SHA)
+ dig_size = (int)WC_MD5_DIGEST_SIZE + (int)WC_SHA_DIGEST_SIZE;
+ #endif
+ break;
+
+ case WC_HASH_TYPE_SHA3_224:
+ #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224)
+ dig_size = WC_SHA3_224_DIGEST_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA3_256:
+ #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256)
+ dig_size = WC_SHA3_256_DIGEST_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA3_384:
+ #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384)
+ dig_size = WC_SHA3_384_DIGEST_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA3_512:
+ #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512)
+ dig_size = WC_SHA3_512_DIGEST_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_BLAKE2B:
+ case WC_HASH_TYPE_BLAKE2S:
+ #if defined(HAVE_BLAKE2) || defined(HAVE_BLAKE2S)
+ dig_size = BLAKE2S_OUTBYTES;
+ #endif
+ break;
+
+ /* Not Supported */
+ case WC_HASH_TYPE_NONE:
+ default:
+ dig_size = BAD_FUNC_ARG;
+ break;
+ }
+ return dig_size;
+}
+
+
+/* Get Hash block size */
+int wc_HashGetBlockSize(enum wc_HashType hash_type)
+{
+ int block_size = HASH_TYPE_E; /* Default to hash type error */
+ switch (hash_type)
+ {
+ case WC_HASH_TYPE_MD2:
+ #ifdef WOLFSSL_MD2
+ block_size = MD2_BLOCK_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_MD4:
+ #ifndef NO_MD4
+ block_size = MD4_BLOCK_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_MD5:
+ #ifndef NO_MD5
+ block_size = WC_MD5_BLOCK_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA:
+ #ifndef NO_SHA
+ block_size = WC_SHA_BLOCK_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA224:
+ #ifdef WOLFSSL_SHA224
+ block_size = WC_SHA224_BLOCK_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA256:
+ #ifndef NO_SHA256
+ block_size = WC_SHA256_BLOCK_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA384:
+ #ifdef WOLFSSL_SHA384
+ block_size = WC_SHA384_BLOCK_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA512:
+ #ifdef WOLFSSL_SHA512
+ block_size = WC_SHA512_BLOCK_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_MD5_SHA: /* Old TLS Specific */
+ #if !defined(NO_MD5) && !defined(NO_SHA)
+ block_size = (int)WC_MD5_BLOCK_SIZE + (int)WC_SHA_BLOCK_SIZE;
+ #endif
+ break;
+
+ case WC_HASH_TYPE_SHA3_224:
+ #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224)
+ block_size = WC_SHA3_224_BLOCK_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA3_256:
+ #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256)
+ block_size = WC_SHA3_256_BLOCK_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA3_384:
+ #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384)
+ block_size = WC_SHA3_384_BLOCK_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_SHA3_512:
+ #if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512)
+ block_size = WC_SHA3_512_BLOCK_SIZE;
+ #endif
+ break;
+ case WC_HASH_TYPE_BLAKE2B:
+ case WC_HASH_TYPE_BLAKE2S:
+ #if defined(HAVE_BLAKE2) || defined(HAVE_BLAKE2S)
+ block_size = BLAKE2S_BLOCKBYTES;
+ #endif
+ break;
+
+ /* Not Supported */
+ case WC_HASH_TYPE_NONE:
+ default:
+ block_size = BAD_FUNC_ARG;
+ break;
+ }
+ return block_size;
+}
+
+/* Generic Hashing Wrapper */
+int wc_Hash(enum wc_HashType hash_type, const byte* data,
+ word32 data_len, byte* hash, word32 hash_len)
+{
+ int ret = HASH_TYPE_E; /* Default to hash type error */
+ word32 dig_size;
+
+ /* Validate hash buffer size */
+ dig_size = wc_HashGetDigestSize(hash_type);
+ if (hash_len < dig_size) {
+ return BUFFER_E;
+ }
+
+ /* Suppress possible unused arg if all hashing is disabled */
+ (void)data;
+ (void)data_len;
+ (void)hash;
+ (void)hash_len;
+
+ switch(hash_type)
+ {
+ case WC_HASH_TYPE_MD5:
+#ifndef NO_MD5
+ ret = wc_Md5Hash(data, data_len, hash);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA:
+#ifndef NO_SHA
+ ret = wc_ShaHash(data, data_len, hash);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA224:
+#ifdef WOLFSSL_SHA224
+ ret = wc_Sha224Hash(data, data_len, hash);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA256:
+#ifndef NO_SHA256
+ ret = wc_Sha256Hash(data, data_len, hash);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA384:
+#ifdef WOLFSSL_SHA384
+ ret = wc_Sha384Hash(data, data_len, hash);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA512:
+#ifdef WOLFSSL_SHA512
+ ret = wc_Sha512Hash(data, data_len, hash);
+#endif
+ break;
+ case WC_HASH_TYPE_MD5_SHA:
+#if !defined(NO_MD5) && !defined(NO_SHA)
+ ret = wc_Md5Hash(data, data_len, hash);
+ if (ret == 0) {
+ ret = wc_ShaHash(data, data_len, &hash[WC_MD5_DIGEST_SIZE]);
+ }
#endif
+ break;
-#if !defined(NO_SHA)
-int wc_ShaGetHash(Sha* sha, byte* hash)
+ case WC_HASH_TYPE_SHA3_224:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224)
+ ret = wc_Sha3_224Hash(data, data_len, hash);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA3_256:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256)
+ ret = wc_Sha3_256Hash(data, data_len, hash);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA3_384:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384)
+ ret = wc_Sha3_384Hash(data, data_len, hash);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA3_512:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512)
+ ret = wc_Sha3_512Hash(data, data_len, hash);
+#endif
+ break;
+
+ /* Not Supported */
+ case WC_HASH_TYPE_MD2:
+ case WC_HASH_TYPE_MD4:
+ case WC_HASH_TYPE_BLAKE2B:
+ case WC_HASH_TYPE_BLAKE2S:
+ case WC_HASH_TYPE_NONE:
+ default:
+ ret = BAD_FUNC_ARG;
+ break;
+ }
+ return ret;
+}
+
+int wc_HashInit_ex(wc_HashAlg* hash, enum wc_HashType type, void* heap,
+ int devId)
{
- int ret ;
- Sha save = *sha ;
- ret = wc_ShaFinal(sha, hash) ;
- *sha = save ;
- return ret ;
+ int ret = HASH_TYPE_E; /* Default to hash type error */
+
+ if (hash == NULL)
+ return BAD_FUNC_ARG;
+
+ switch (type) {
+ case WC_HASH_TYPE_MD5:
+#ifndef NO_MD5
+ ret = wc_InitMd5_ex(&hash->md5, heap, devId);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA:
+#ifndef NO_SHA
+ ret = wc_InitSha_ex(&hash->sha, heap, devId);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA224:
+#ifdef WOLFSSL_SHA224
+ ret = wc_InitSha224_ex(&hash->sha224, heap, devId);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA256:
+#ifndef NO_SHA256
+ ret = wc_InitSha256_ex(&hash->sha256, heap, devId);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA384:
+#ifdef WOLFSSL_SHA384
+ ret = wc_InitSha384_ex(&hash->sha384, heap, devId);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA512:
+#ifdef WOLFSSL_SHA512
+ ret = wc_InitSha512_ex(&hash->sha512, heap, devId);
+#endif
+ break;
+
+ case WC_HASH_TYPE_SHA3_224:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224)
+ ret = wc_InitSha3_224(&hash->sha3, heap, devId);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA3_256:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256)
+ ret = wc_InitSha3_256(&hash->sha3, heap, devId);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA3_384:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384)
+ ret = wc_InitSha3_384(&hash->sha3, heap, devId);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA3_512:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512)
+ ret = wc_InitSha3_512(&hash->sha3, heap, devId);
+#endif
+ break;
+
+ /* not supported */
+ case WC_HASH_TYPE_MD5_SHA:
+ case WC_HASH_TYPE_MD2:
+ case WC_HASH_TYPE_MD4:
+ case WC_HASH_TYPE_BLAKE2B:
+ case WC_HASH_TYPE_BLAKE2S:
+ case WC_HASH_TYPE_NONE:
+ default:
+ ret = BAD_FUNC_ARG;
+ };
+
+ return ret;
}
-WOLFSSL_API void wc_ShaRestorePos(Sha* s1, Sha* s2) {
- *s1 = *s2 ;
+int wc_HashInit(wc_HashAlg* hash, enum wc_HashType type)
+{
+ return wc_HashInit_ex(hash, type, NULL, INVALID_DEVID);
}
+
+int wc_HashUpdate(wc_HashAlg* hash, enum wc_HashType type, const byte* data,
+ word32 dataSz)
+{
+ int ret = HASH_TYPE_E; /* Default to hash type error */
+
+ if (hash == NULL || data == NULL)
+ return BAD_FUNC_ARG;
+
+ switch (type) {
+ case WC_HASH_TYPE_MD5:
+#ifndef NO_MD5
+ ret = wc_Md5Update(&hash->md5, data, dataSz);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA:
+#ifndef NO_SHA
+ ret = wc_ShaUpdate(&hash->sha, data, dataSz);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA224:
+#ifdef WOLFSSL_SHA224
+ ret = wc_Sha224Update(&hash->sha224, data, dataSz);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA256:
+#ifndef NO_SHA256
+ ret = wc_Sha256Update(&hash->sha256, data, dataSz);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA384:
+#ifdef WOLFSSL_SHA384
+ ret = wc_Sha384Update(&hash->sha384, data, dataSz);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA512:
+#ifdef WOLFSSL_SHA512
+ ret = wc_Sha512Update(&hash->sha512, data, dataSz);
#endif
+ break;
-#if !defined(NO_SHA256)
-int wc_Sha256GetHash(Sha256* sha256, byte* hash)
+ case WC_HASH_TYPE_SHA3_224:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224)
+ ret = wc_Sha3_224_Update(&hash->sha3, data, dataSz);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA3_256:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256)
+ ret = wc_Sha3_256_Update(&hash->sha3, data, dataSz);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA3_384:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384)
+ ret = wc_Sha3_384_Update(&hash->sha3, data, dataSz);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA3_512:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512)
+ ret = wc_Sha3_512_Update(&hash->sha3, data, dataSz);
+#endif
+ break;
+
+ /* not supported */
+ case WC_HASH_TYPE_MD5_SHA:
+ case WC_HASH_TYPE_MD2:
+ case WC_HASH_TYPE_MD4:
+ case WC_HASH_TYPE_BLAKE2B:
+ case WC_HASH_TYPE_BLAKE2S:
+ case WC_HASH_TYPE_NONE:
+ default:
+ ret = BAD_FUNC_ARG;
+ };
+
+ return ret;
+}
+
+int wc_HashFinal(wc_HashAlg* hash, enum wc_HashType type, byte* out)
{
- int ret ;
- Sha256 save = *sha256 ;
- ret = wc_Sha256Final(sha256, hash) ;
- *sha256 = save ;
- return ret ;
+ int ret = HASH_TYPE_E; /* Default to hash type error */
+
+ if (hash == NULL || out == NULL)
+ return BAD_FUNC_ARG;
+
+ switch (type) {
+ case WC_HASH_TYPE_MD5:
+#ifndef NO_MD5
+ ret = wc_Md5Final(&hash->md5, out);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA:
+#ifndef NO_SHA
+ ret = wc_ShaFinal(&hash->sha, out);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA224:
+#ifdef WOLFSSL_SHA224
+ ret = wc_Sha224Final(&hash->sha224, out);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA256:
+#ifndef NO_SHA256
+ ret = wc_Sha256Final(&hash->sha256, out);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA384:
+#ifdef WOLFSSL_SHA384
+ ret = wc_Sha384Final(&hash->sha384, out);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA512:
+#ifdef WOLFSSL_SHA512
+ ret = wc_Sha512Final(&hash->sha512, out);
+#endif
+ break;
+
+ case WC_HASH_TYPE_SHA3_224:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224)
+ ret = wc_Sha3_224_Final(&hash->sha3, out);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA3_256:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256)
+ ret = wc_Sha3_256_Final(&hash->sha3, out);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA3_384:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384)
+ ret = wc_Sha3_384_Final(&hash->sha3, out);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA3_512:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512)
+ ret = wc_Sha3_512_Final(&hash->sha3, out);
+#endif
+ break;
+
+ /* not supported */
+ case WC_HASH_TYPE_MD5_SHA:
+ case WC_HASH_TYPE_MD2:
+ case WC_HASH_TYPE_MD4:
+ case WC_HASH_TYPE_BLAKE2B:
+ case WC_HASH_TYPE_BLAKE2S:
+ case WC_HASH_TYPE_NONE:
+ default:
+ ret = BAD_FUNC_ARG;
+ };
+
+ return ret;
}
-WOLFSSL_API void wc_Sha256RestorePos(Sha256* s1, Sha256* s2) {
- *s1 = *s2 ;
+int wc_HashFree(wc_HashAlg* hash, enum wc_HashType type)
+{
+ int ret = HASH_TYPE_E; /* Default to hash type error */
+
+ if (hash == NULL)
+ return BAD_FUNC_ARG;
+
+ switch (type) {
+ case WC_HASH_TYPE_MD5:
+#ifndef NO_MD5
+ wc_Md5Free(&hash->md5);
+ ret = 0;
+#endif
+ break;
+ case WC_HASH_TYPE_SHA:
+#ifndef NO_SHA
+ wc_ShaFree(&hash->sha);
+ ret = 0;
+#endif
+ break;
+ case WC_HASH_TYPE_SHA224:
+#ifdef WOLFSSL_SHA224
+ wc_Sha224Free(&hash->sha224);
+ ret = 0;
+#endif
+ break;
+ case WC_HASH_TYPE_SHA256:
+#ifndef NO_SHA256
+ wc_Sha256Free(&hash->sha256);
+ ret = 0;
+#endif
+ break;
+ case WC_HASH_TYPE_SHA384:
+#ifdef WOLFSSL_SHA384
+ wc_Sha384Free(&hash->sha384);
+ ret = 0;
+#endif
+ break;
+ case WC_HASH_TYPE_SHA512:
+#ifdef WOLFSSL_SHA512
+ wc_Sha512Free(&hash->sha512);
+ ret = 0;
+#endif
+ break;
+
+ case WC_HASH_TYPE_SHA3_224:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_224)
+ wc_Sha3_224_Free(&hash->sha3);
+ ret = 0;
+#endif
+ break;
+ case WC_HASH_TYPE_SHA3_256:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_256)
+ wc_Sha3_256_Free(&hash->sha3);
+ ret = 0;
+#endif
+ break;
+ case WC_HASH_TYPE_SHA3_384:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_384)
+ wc_Sha3_384_Free(&hash->sha3);
+ ret = 0;
+#endif
+ break;
+ case WC_HASH_TYPE_SHA3_512:
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_NOSHA3_512)
+ wc_Sha3_512_Free(&hash->sha3);
+ ret = 0;
+#endif
+ break;
+
+ /* not supported */
+ case WC_HASH_TYPE_MD5_SHA:
+ case WC_HASH_TYPE_MD2:
+ case WC_HASH_TYPE_MD4:
+ case WC_HASH_TYPE_BLAKE2B:
+ case WC_HASH_TYPE_BLAKE2S:
+ case WC_HASH_TYPE_NONE:
+ default:
+ ret = BAD_FUNC_ARG;
+ };
+
+ return ret;
}
+
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+int wc_HashSetFlags(wc_HashAlg* hash, enum wc_HashType type, word32 flags)
+{
+ int ret = HASH_TYPE_E; /* Default to hash type error */
+
+ if (hash == NULL)
+ return BAD_FUNC_ARG;
+
+ switch (type) {
+ case WC_HASH_TYPE_MD5:
+#ifndef NO_MD5
+ ret = wc_Md5SetFlags(&hash->md5, flags);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA:
+#ifndef NO_SHA
+ ret = wc_ShaSetFlags(&hash->sha, flags);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA224:
+#ifdef WOLFSSL_SHA224
+ ret = wc_Sha224SetFlags(&hash->sha224, flags);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA256:
+#ifndef NO_SHA256
+ ret = wc_Sha256SetFlags(&hash->sha256, flags);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA384:
+#ifdef WOLFSSL_SHA384
+ ret = wc_Sha384SetFlags(&hash->sha384, flags);
#endif
+ break;
+ case WC_HASH_TYPE_SHA512:
+#ifdef WOLFSSL_SHA512
+ ret = wc_Sha512SetFlags(&hash->sha512, flags);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA3_224:
+ case WC_HASH_TYPE_SHA3_256:
+ case WC_HASH_TYPE_SHA3_384:
+ case WC_HASH_TYPE_SHA3_512:
+#ifdef WOLFSSL_SHA3
+ ret = wc_Sha3_SetFlags(&hash->sha3, flags);
#endif
+ break;
+
+ /* not supported */
+ case WC_HASH_TYPE_MD5_SHA:
+ case WC_HASH_TYPE_MD2:
+ case WC_HASH_TYPE_MD4:
+ case WC_HASH_TYPE_BLAKE2B:
+ case WC_HASH_TYPE_BLAKE2S:
+ case WC_HASH_TYPE_NONE:
+ default:
+ ret = BAD_FUNC_ARG;
+ };
+
+ return ret;
+}
+int wc_HashGetFlags(wc_HashAlg* hash, enum wc_HashType type, word32* flags)
+{
+ int ret = HASH_TYPE_E; /* Default to hash type error */
+
+ if (hash == NULL)
+ return BAD_FUNC_ARG;
+ switch (type) {
+ case WC_HASH_TYPE_MD5:
+#ifndef NO_MD5
+ ret = wc_Md5GetFlags(&hash->md5, flags);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA:
+#ifndef NO_SHA
+ ret = wc_ShaGetFlags(&hash->sha, flags);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA224:
+#ifdef WOLFSSL_SHA224
+ ret = wc_Sha224GetFlags(&hash->sha224, flags);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA256:
+#ifndef NO_SHA256
+ ret = wc_Sha256GetFlags(&hash->sha256, flags);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA384:
+#ifdef WOLFSSL_SHA384
+ ret = wc_Sha384GetFlags(&hash->sha384, flags);
+#endif
+ break;
+ case WC_HASH_TYPE_SHA512:
+#ifdef WOLFSSL_SHA512
+ ret = wc_Sha512GetFlags(&hash->sha512, flags);
+#endif
+ break;
+
+ case WC_HASH_TYPE_SHA3_224:
+ case WC_HASH_TYPE_SHA3_256:
+ case WC_HASH_TYPE_SHA3_384:
+ case WC_HASH_TYPE_SHA3_512:
+#ifdef WOLFSSL_SHA3
+ ret = wc_Sha3_GetFlags(&hash->sha3, flags);
+#endif
+ break;
+
+ /* not supported */
+ case WC_HASH_TYPE_MD5_SHA:
+ case WC_HASH_TYPE_MD2:
+ case WC_HASH_TYPE_MD4:
+ case WC_HASH_TYPE_BLAKE2B:
+ case WC_HASH_TYPE_BLAKE2S:
+ case WC_HASH_TYPE_NONE:
+ default:
+ ret = BAD_FUNC_ARG;
+ };
+
+ return ret;
+}
+#endif
+
+
+#if !defined(WOLFSSL_TI_HASH)
+
+#if !defined(NO_MD5)
+ int wc_Md5Hash(const byte* data, word32 len, byte* hash)
+ {
+ int ret;
+ #ifdef WOLFSSL_SMALL_STACK
+ wc_Md5* md5;
+ #else
+ wc_Md5 md5[1];
+ #endif
+
+ #ifdef WOLFSSL_SMALL_STACK
+ md5 = (wc_Md5*)XMALLOC(sizeof(wc_Md5), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (md5 == NULL)
+ return MEMORY_E;
+ #endif
+
+ if ((ret = wc_InitMd5(md5)) != 0) {
+ WOLFSSL_MSG("InitMd5 failed");
+ }
+ else {
+ if ((ret = wc_Md5Update(md5, data, len)) != 0) {
+ WOLFSSL_MSG("Md5Update failed");
+ }
+ else if ((ret = wc_Md5Final(md5, hash)) != 0) {
+ WOLFSSL_MSG("Md5Final failed");
+ }
+ wc_Md5Free(md5);
+ }
+
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(md5, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+
+ return ret;
+ }
+#endif /* !NO_MD5 */
+
+#if !defined(NO_SHA)
+ int wc_ShaHash(const byte* data, word32 len, byte* hash)
+ {
+ int ret = 0;
+ #ifdef WOLFSSL_SMALL_STACK
+ wc_Sha* sha;
+ #else
+ wc_Sha sha[1];
+ #endif
+
+ #ifdef WOLFSSL_SMALL_STACK
+ sha = (wc_Sha*)XMALLOC(sizeof(wc_Sha), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (sha == NULL)
+ return MEMORY_E;
+ #endif
+
+ if ((ret = wc_InitSha(sha)) != 0) {
+ WOLFSSL_MSG("InitSha failed");
+ }
+ else {
+ if ((ret = wc_ShaUpdate(sha, data, len)) != 0) {
+ WOLFSSL_MSG("ShaUpdate failed");
+ }
+ else if ((ret = wc_ShaFinal(sha, hash)) != 0) {
+ WOLFSSL_MSG("ShaFinal failed");
+ }
+ wc_ShaFree(sha);
+ }
+
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(sha, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+
+ return ret;
+ }
+#endif /* !NO_SHA */
+
+#if defined(WOLFSSL_SHA224)
+ int wc_Sha224Hash(const byte* data, word32 len, byte* hash)
+ {
+ int ret = 0;
+ #ifdef WOLFSSL_SMALL_STACK
+ wc_Sha224* sha224;
+ #else
+ wc_Sha224 sha224[1];
+ #endif
+
+ #ifdef WOLFSSL_SMALL_STACK
+ sha224 = (wc_Sha224*)XMALLOC(sizeof(wc_Sha224), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (sha224 == NULL)
+ return MEMORY_E;
+ #endif
+
+ if ((ret = wc_InitSha224(sha224)) != 0) {
+ WOLFSSL_MSG("InitSha224 failed");
+ }
+ else {
+ if ((ret = wc_Sha224Update(sha224, data, len)) != 0) {
+ WOLFSSL_MSG("Sha224Update failed");
+ }
+ else if ((ret = wc_Sha224Final(sha224, hash)) != 0) {
+ WOLFSSL_MSG("Sha224Final failed");
+ }
+ wc_Sha224Free(sha224);
+ }
+
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(sha224, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+
+ return ret;
+}
+#endif /* WOLFSSL_SHA224 */
+
+#if !defined(NO_SHA256)
+ int wc_Sha256Hash(const byte* data, word32 len, byte* hash)
+ {
+ int ret = 0;
+ #ifdef WOLFSSL_SMALL_STACK
+ wc_Sha256* sha256;
+ #else
+ wc_Sha256 sha256[1];
+ #endif
+
+ #ifdef WOLFSSL_SMALL_STACK
+ sha256 = (wc_Sha256*)XMALLOC(sizeof(wc_Sha256), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (sha256 == NULL)
+ return MEMORY_E;
+ #endif
+
+ if ((ret = wc_InitSha256(sha256)) != 0) {
+ WOLFSSL_MSG("InitSha256 failed");
+ }
+ else {
+ if ((ret = wc_Sha256Update(sha256, data, len)) != 0) {
+ WOLFSSL_MSG("Sha256Update failed");
+ }
+ else if ((ret = wc_Sha256Final(sha256, hash)) != 0) {
+ WOLFSSL_MSG("Sha256Final failed");
+ }
+ wc_Sha256Free(sha256);
+ }
+
+
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(sha256, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+
+ return ret;
+ }
+#endif /* !NO_SHA256 */
+
+#endif /* !defined(WOLFSSL_TI_HASH) */
+
+
+#if defined(WOLFSSL_SHA512)
+ int wc_Sha512Hash(const byte* data, word32 len, byte* hash)
+ {
+ int ret = 0;
+ #ifdef WOLFSSL_SMALL_STACK
+ wc_Sha512* sha512;
+ #else
+ wc_Sha512 sha512[1];
+ #endif
+
+ #ifdef WOLFSSL_SMALL_STACK
+ sha512 = (wc_Sha512*)XMALLOC(sizeof(wc_Sha512), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (sha512 == NULL)
+ return MEMORY_E;
+ #endif
+
+ if ((ret = wc_InitSha512(sha512)) != 0) {
+ WOLFSSL_MSG("InitSha512 failed");
+ }
+ else {
+ if ((ret = wc_Sha512Update(sha512, data, len)) != 0) {
+ WOLFSSL_MSG("Sha512Update failed");
+ }
+ else if ((ret = wc_Sha512Final(sha512, hash)) != 0) {
+ WOLFSSL_MSG("Sha512Final failed");
+ }
+ wc_Sha512Free(sha512);
+ }
+
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(sha512, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+
+ return ret;
+ }
+#endif /* WOLFSSL_SHA512 */
+
+#if defined(WOLFSSL_SHA384)
+ int wc_Sha384Hash(const byte* data, word32 len, byte* hash)
+ {
+ int ret = 0;
+ #ifdef WOLFSSL_SMALL_STACK
+ wc_Sha384* sha384;
+ #else
+ wc_Sha384 sha384[1];
+ #endif
+
+ #ifdef WOLFSSL_SMALL_STACK
+ sha384 = (wc_Sha384*)XMALLOC(sizeof(wc_Sha384), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (sha384 == NULL)
+ return MEMORY_E;
+ #endif
+
+ if ((ret = wc_InitSha384(sha384)) != 0) {
+ WOLFSSL_MSG("InitSha384 failed");
+ }
+ else {
+ if ((ret = wc_Sha384Update(sha384, data, len)) != 0) {
+ WOLFSSL_MSG("Sha384Update failed");
+ }
+ else if ((ret = wc_Sha384Final(sha384, hash)) != 0) {
+ WOLFSSL_MSG("Sha384Final failed");
+ }
+ wc_Sha384Free(sha384);
+ }
+
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(sha384, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+
+ return ret;
+ }
+#endif /* WOLFSSL_SHA384 */
+
+#if defined(WOLFSSL_SHA3)
+#if !defined(WOLFSSL_NOSHA3_224)
+ int wc_Sha3_224Hash(const byte* data, word32 len, byte* hash)
+ {
+ int ret = 0;
+ #ifdef WOLFSSL_SMALL_STACK
+ wc_Sha3* sha3;
+ #else
+ wc_Sha3 sha3[1];
+ #endif
+
+ #ifdef WOLFSSL_SMALL_STACK
+ sha3 = (wc_Sha3*)XMALLOC(sizeof(wc_Sha3), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (sha3 == NULL)
+ return MEMORY_E;
+ #endif
+
+ if ((ret = wc_InitSha3_224(sha3, NULL, INVALID_DEVID)) != 0) {
+ WOLFSSL_MSG("InitSha3_224 failed");
+ }
+ else {
+ if ((ret = wc_Sha3_224_Update(sha3, data, len)) != 0) {
+ WOLFSSL_MSG("Sha3_224_Update failed");
+ }
+ else if ((ret = wc_Sha3_224_Final(sha3, hash)) != 0) {
+ WOLFSSL_MSG("Sha3_224_Final failed");
+ }
+ wc_Sha3_224_Free(sha3);
+ }
+
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(sha3, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+
+ return ret;
+ }
+#endif /* !WOLFSSL_NOSHA3_224 */
+
+#if !defined(WOLFSSL_NOSHA3_256)
+ int wc_Sha3_256Hash(const byte* data, word32 len, byte* hash)
+ {
+ int ret = 0;
+ #ifdef WOLFSSL_SMALL_STACK
+ wc_Sha3* sha3;
+ #else
+ wc_Sha3 sha3[1];
+ #endif
+
+ #ifdef WOLFSSL_SMALL_STACK
+ sha3 = (wc_Sha3*)XMALLOC(sizeof(wc_Sha3), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (sha3 == NULL)
+ return MEMORY_E;
+ #endif
+
+ if ((ret = wc_InitSha3_256(sha3, NULL, INVALID_DEVID)) != 0) {
+ WOLFSSL_MSG("InitSha3_256 failed");
+ }
+ else {
+ if ((ret = wc_Sha3_256_Update(sha3, data, len)) != 0) {
+ WOLFSSL_MSG("Sha3_256_Update failed");
+ }
+ else if ((ret = wc_Sha3_256_Final(sha3, hash)) != 0) {
+ WOLFSSL_MSG("Sha3_256_Final failed");
+ }
+ wc_Sha3_256_Free(sha3);
+ }
+
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(sha3, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+
+ return ret;
+ }
+#endif /* !WOLFSSL_NOSHA3_256 */
+
+#if !defined(WOLFSSL_NOSHA3_384)
+ int wc_Sha3_384Hash(const byte* data, word32 len, byte* hash)
+ {
+ int ret = 0;
+ #ifdef WOLFSSL_SMALL_STACK
+ wc_Sha3* sha3;
+ #else
+ wc_Sha3 sha3[1];
+ #endif
+
+ #ifdef WOLFSSL_SMALL_STACK
+ sha3 = (wc_Sha3*)XMALLOC(sizeof(wc_Sha3), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (sha3 == NULL)
+ return MEMORY_E;
+ #endif
+
+ if ((ret = wc_InitSha3_384(sha3, NULL, INVALID_DEVID)) != 0) {
+ WOLFSSL_MSG("InitSha3_384 failed");
+ }
+ else {
+ if ((ret = wc_Sha3_384_Update(sha3, data, len)) != 0) {
+ WOLFSSL_MSG("Sha3_384_Update failed");
+ }
+ else if ((ret = wc_Sha3_384_Final(sha3, hash)) != 0) {
+ WOLFSSL_MSG("Sha3_384_Final failed");
+ }
+ wc_Sha3_384_Free(sha3);
+ }
+
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(sha3, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+
+ return ret;
+ }
+#endif /* !WOLFSSL_NOSHA3_384 */
+
+#if !defined(WOLFSSL_NOSHA3_512)
+ int wc_Sha3_512Hash(const byte* data, word32 len, byte* hash)
+ {
+ int ret = 0;
+ #ifdef WOLFSSL_SMALL_STACK
+ wc_Sha3* sha3;
+ #else
+ wc_Sha3 sha3[1];
+ #endif
+
+ #ifdef WOLFSSL_SMALL_STACK
+ sha3 = (wc_Sha3*)XMALLOC(sizeof(wc_Sha3), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (sha3 == NULL)
+ return MEMORY_E;
+ #endif
+
+ if ((ret = wc_InitSha3_512(sha3, NULL, INVALID_DEVID)) != 0) {
+ WOLFSSL_MSG("InitSha3_512 failed");
+ }
+ else {
+ if ((ret = wc_Sha3_512_Update(sha3, data, len)) != 0) {
+ WOLFSSL_MSG("Sha3_512_Update failed");
+ }
+ else if ((ret = wc_Sha3_512_Final(sha3, hash)) != 0) {
+ WOLFSSL_MSG("Sha3_512_Final failed");
+ }
+ wc_Sha3_512_Free(sha3);
+ }
+
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(sha3, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+
+ return ret;
+ }
+#endif /* !WOLFSSL_NOSHA3_512 */
+
+#if defined(WOLFSSL_SHAKE256) && !defined(WOLFSSL_NO_SHAKE256)
+ int wc_Shake256Hash(const byte* data, word32 len, byte* hash,
+ word32 hashLen)
+ {
+ int ret = 0;
+ #ifdef WOLFSSL_SMALL_STACK
+ wc_Shake* shake;
+ #else
+ wc_Shake shake[1];
+ #endif
+
+ #ifdef WOLFSSL_SMALL_STACK
+ shake = (wc_Shake*)XMALLOC(sizeof(wc_Shake), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (shake == NULL)
+ return MEMORY_E;
+ #endif
+
+ if ((ret = wc_InitShake256(shake, NULL, INVALID_DEVID)) != 0) {
+ WOLFSSL_MSG("InitShake256 failed");
+ }
+ else {
+ if ((ret = wc_Shake256_Update(shake, data, len)) != 0) {
+ WOLFSSL_MSG("Shake256_Update failed");
+ }
+ else if ((ret = wc_Shake256_Final(shake, hash, hashLen)) != 0) {
+ WOLFSSL_MSG("Shake256_Final failed");
+ }
+ wc_Shake256_Free(shake);
+ }
+
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(shake, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+
+ return ret;
+ }
+#endif /* WOLFSSL_SHAKE_256 && !WOLFSSL_NO_SHAKE256 */
+#endif /* WOLFSSL_SHA3 */
+
+#endif /* !NO_HASH_WRAPPER */
+
+#ifdef WOLFSSL_HAVE_PRF
+
+#ifdef WOLFSSL_SHA384
+ #define P_HASH_MAX_SIZE WC_SHA384_DIGEST_SIZE
+#else
+ #define P_HASH_MAX_SIZE WC_SHA256_DIGEST_SIZE
+#endif
+
+/* Pseudo Random Function for MD5, SHA-1, SHA-256, or SHA-384 */
+int wc_PRF(byte* result, word32 resLen, const byte* secret,
+ word32 secLen, const byte* seed, word32 seedLen, int hash,
+ void* heap, int devId)
+{
+ word32 len = P_HASH_MAX_SIZE;
+ word32 times;
+ word32 lastLen;
+ word32 lastTime;
+ word32 i;
+ word32 idx = 0;
+ int ret = 0;
+#ifdef WOLFSSL_SMALL_STACK
+ byte* previous;
+ byte* current;
+ Hmac* hmac;
+#else
+ byte previous[P_HASH_MAX_SIZE]; /* max size */
+ byte current[P_HASH_MAX_SIZE]; /* max size */
+ Hmac hmac[1];
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ previous = (byte*)XMALLOC(P_HASH_MAX_SIZE, heap, DYNAMIC_TYPE_DIGEST);
+ current = (byte*)XMALLOC(P_HASH_MAX_SIZE, heap, DYNAMIC_TYPE_DIGEST);
+ hmac = (Hmac*)XMALLOC(sizeof(Hmac), heap, DYNAMIC_TYPE_HMAC);
+
+ if (previous == NULL || current == NULL || hmac == NULL) {
+ if (previous) XFREE(previous, heap, DYNAMIC_TYPE_DIGEST);
+ if (current) XFREE(current, heap, DYNAMIC_TYPE_DIGEST);
+ if (hmac) XFREE(hmac, heap, DYNAMIC_TYPE_HMAC);
+
+ return MEMORY_E;
+ }
+#endif
+
+ switch (hash) {
+ #ifndef NO_MD5
+ case md5_mac:
+ hash = WC_MD5;
+ len = WC_MD5_DIGEST_SIZE;
+ break;
+ #endif
+
+ #ifndef NO_SHA256
+ case sha256_mac:
+ hash = WC_SHA256;
+ len = WC_SHA256_DIGEST_SIZE;
+ break;
+ #endif
+
+ #ifdef WOLFSSL_SHA384
+ case sha384_mac:
+ hash = WC_SHA384;
+ len = WC_SHA384_DIGEST_SIZE;
+ break;
+ #endif
+
+ #ifndef NO_SHA
+ case sha_mac:
+ default:
+ hash = WC_SHA;
+ len = WC_SHA_DIGEST_SIZE;
+ break;
+ #endif
+ }
+
+ times = resLen / len;
+ lastLen = resLen % len;
+
+ if (lastLen)
+ times += 1;
+
+ lastTime = times - 1;
+
+ ret = wc_HmacInit(hmac, heap, devId);
+ if (ret == 0) {
+ ret = wc_HmacSetKey(hmac, hash, secret, secLen);
+ if (ret == 0)
+ ret = wc_HmacUpdate(hmac, seed, seedLen); /* A0 = seed */
+ if (ret == 0)
+ ret = wc_HmacFinal(hmac, previous); /* A1 */
+ if (ret == 0) {
+ for (i = 0; i < times; i++) {
+ ret = wc_HmacUpdate(hmac, previous, len);
+ if (ret != 0)
+ break;
+ ret = wc_HmacUpdate(hmac, seed, seedLen);
+ if (ret != 0)
+ break;
+ ret = wc_HmacFinal(hmac, current);
+ if (ret != 0)
+ break;
+
+ if ((i == lastTime) && lastLen)
+ XMEMCPY(&result[idx], current,
+ min(lastLen, P_HASH_MAX_SIZE));
+ else {
+ XMEMCPY(&result[idx], current, len);
+ idx += len;
+ ret = wc_HmacUpdate(hmac, previous, len);
+ if (ret != 0)
+ break;
+ ret = wc_HmacFinal(hmac, previous);
+ if (ret != 0)
+ break;
+ }
+ }
+ }
+ wc_HmacFree(hmac);
+ }
+
+ ForceZero(previous, P_HASH_MAX_SIZE);
+ ForceZero(current, P_HASH_MAX_SIZE);
+ ForceZero(hmac, sizeof(Hmac));
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(previous, heap, DYNAMIC_TYPE_DIGEST);
+ XFREE(current, heap, DYNAMIC_TYPE_DIGEST);
+ XFREE(hmac, heap, DYNAMIC_TYPE_HMAC);
+#endif
+
+ return ret;
+}
+#undef P_HASH_MAX_SIZE
+
+/* compute PRF (pseudo random function) using SHA1 and MD5 for TLSv1 */
+int wc_PRF_TLSv1(byte* digest, word32 digLen, const byte* secret,
+ word32 secLen, const byte* label, word32 labLen,
+ const byte* seed, word32 seedLen, void* heap, int devId)
+{
+ int ret = 0;
+ word32 half = (secLen + 1) / 2;
+
+#ifdef WOLFSSL_SMALL_STACK
+ byte* md5_half;
+ byte* sha_half;
+ byte* md5_result;
+ byte* sha_result;
+#else
+ byte md5_half[MAX_PRF_HALF]; /* half is real size */
+ byte sha_half[MAX_PRF_HALF]; /* half is real size */
+ byte md5_result[MAX_PRF_DIG]; /* digLen is real size */
+ byte sha_result[MAX_PRF_DIG]; /* digLen is real size */
+#endif
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH)
+ DECLARE_VAR(labelSeed, byte, MAX_PRF_LABSEED, heap);
+ if (labelSeed == NULL)
+ return MEMORY_E;
+#else
+ byte labelSeed[MAX_PRF_LABSEED];
+#endif
+
+ if (half > MAX_PRF_HALF ||
+ labLen + seedLen > MAX_PRF_LABSEED ||
+ digLen > MAX_PRF_DIG)
+ {
+ #if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH)
+ FREE_VAR(labelSeed, heap);
+ #endif
+ return BUFFER_E;
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ md5_half = (byte*)XMALLOC(MAX_PRF_HALF, heap, DYNAMIC_TYPE_DIGEST);
+ sha_half = (byte*)XMALLOC(MAX_PRF_HALF, heap, DYNAMIC_TYPE_DIGEST);
+ md5_result = (byte*)XMALLOC(MAX_PRF_DIG, heap, DYNAMIC_TYPE_DIGEST);
+ sha_result = (byte*)XMALLOC(MAX_PRF_DIG, heap, DYNAMIC_TYPE_DIGEST);
+
+ if (md5_half == NULL || sha_half == NULL || md5_result == NULL ||
+ sha_result == NULL) {
+ if (md5_half) XFREE(md5_half, heap, DYNAMIC_TYPE_DIGEST);
+ if (sha_half) XFREE(sha_half, heap, DYNAMIC_TYPE_DIGEST);
+ if (md5_result) XFREE(md5_result, heap, DYNAMIC_TYPE_DIGEST);
+ if (sha_result) XFREE(sha_result, heap, DYNAMIC_TYPE_DIGEST);
+ #if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH)
+ FREE_VAR(labelSeed, heap);
+ #endif
+
+ return MEMORY_E;
+ }
+#endif
+
+ XMEMSET(md5_result, 0, digLen);
+ XMEMSET(sha_result, 0, digLen);
+
+ XMEMCPY(md5_half, secret, half);
+ XMEMCPY(sha_half, secret + half - secLen % 2, half);
+
+ XMEMCPY(labelSeed, label, labLen);
+ XMEMCPY(labelSeed + labLen, seed, seedLen);
+
+ if ((ret = wc_PRF(md5_result, digLen, md5_half, half, labelSeed,
+ labLen + seedLen, md5_mac, heap, devId)) == 0) {
+ if ((ret = wc_PRF(sha_result, digLen, sha_half, half, labelSeed,
+ labLen + seedLen, sha_mac, heap, devId)) == 0) {
+ /* calculate XOR for TLSv1 PRF */
+ XMEMCPY(digest, md5_result, digLen);
+ xorbuf(digest, sha_result, digLen);
+ }
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(md5_half, heap, DYNAMIC_TYPE_DIGEST);
+ XFREE(sha_half, heap, DYNAMIC_TYPE_DIGEST);
+ XFREE(md5_result, heap, DYNAMIC_TYPE_DIGEST);
+ XFREE(sha_result, heap, DYNAMIC_TYPE_DIGEST);
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH)
+ FREE_VAR(labelSeed, heap);
+#endif
+
+ return ret;
+}
+
+/* Wrapper for TLS 1.2 and TLSv1 cases to calculate PRF */
+/* In TLS 1.2 case call straight thru to wc_PRF */
+int wc_PRF_TLS(byte* digest, word32 digLen, const byte* secret, word32 secLen,
+ const byte* label, word32 labLen, const byte* seed, word32 seedLen,
+ int useAtLeastSha256, int hash_type, void* heap, int devId)
+{
+ int ret = 0;
+
+ if (useAtLeastSha256) {
+ #if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH)
+ DECLARE_VAR(labelSeed, byte, MAX_PRF_LABSEED, heap);
+ if (labelSeed == NULL)
+ return MEMORY_E;
+ #else
+ byte labelSeed[MAX_PRF_LABSEED];
+ #endif
+
+ if (labLen + seedLen > MAX_PRF_LABSEED)
+ return BUFFER_E;
+
+ XMEMCPY(labelSeed, label, labLen);
+ XMEMCPY(labelSeed + labLen, seed, seedLen);
+
+ /* If a cipher suite wants an algorithm better than sha256, it
+ * should use better. */
+ if (hash_type < sha256_mac || hash_type == blake2b_mac)
+ hash_type = sha256_mac;
+ /* compute PRF for MD5, SHA-1, SHA-256, or SHA-384 for TLSv1.2 PRF */
+ ret = wc_PRF(digest, digLen, secret, secLen, labelSeed,
+ labLen + seedLen, hash_type, heap, devId);
+
+ #if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_ASYNC_NO_HASH)
+ FREE_VAR(labelSeed, heap);
+ #endif
+ }
+#ifndef NO_OLD_TLS
+ else {
+ /* compute TLSv1 PRF (pseudo random function using HMAC) */
+ ret = wc_PRF_TLSv1(digest, digLen, secret, secLen, label, labLen, seed,
+ seedLen, heap, devId);
+ }
+#endif
+
+ return ret;
+}
+#endif /* WOLFSSL_HAVE_PRF */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/hc128.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/hc128.c
index bcfa148e0..96f02d16d 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/hc128.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/hc128.c
@@ -1,8 +1,8 @@
/* hc128.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -34,6 +35,7 @@
#include <wolfssl/wolfcrypt/hc128.h>
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
@@ -71,7 +73,7 @@
(ctx->T[(u)]) += tem2+(tem0 ^ tem1); \
(ctx->X[(a)]) = (ctx->T[(u)]); \
(n) = tem3 ^ (ctx->T[(u)]) ; \
-}
+}
/*one step of HC-128, update Q and generate 32 bits keystream*/
#define step_Q(ctx,u,v,a,b,c,d,n){ \
@@ -83,17 +85,17 @@
(ctx->T[(u)]) += tem2 + (tem0 ^ tem1); \
(ctx->Y[(a)]) = (ctx->T[(u)]); \
(n) = tem3 ^ (ctx->T[(u)]) ; \
-}
+}
/*16 steps of HC-128, generate 512 bits keystream*/
-static void generate_keystream(HC128* ctx, word32* keystream)
+static void generate_keystream(HC128* ctx, word32* keystream)
{
word32 cc,dd;
cc = ctx->counter1024 & 0x1ff;
dd = (cc+16)&0x1ff;
- if (ctx->counter1024 < 512)
- {
+ if (ctx->counter1024 < 512)
+ {
ctx->counter1024 = (ctx->counter1024 + 16) & 0x3ff;
step_P(ctx, cc+0, cc+1, 0, 6, 13,4, keystream[0]);
step_P(ctx, cc+1, cc+2, 1, 7, 14,5, keystream[1]);
@@ -112,7 +114,7 @@ static void generate_keystream(HC128* ctx, word32* keystream)
step_P(ctx, cc+14,cc+15,14,4, 11,2, keystream[14]);
step_P(ctx, cc+15,dd+0, 15,5, 12,3, keystream[15]);
}
- else
+ else
{
ctx->counter1024 = (ctx->counter1024 + 16) & 0x3ff;
step_Q(ctx, 512+cc+0, 512+cc+1, 0, 6, 13,4, keystream[0]);
@@ -148,7 +150,7 @@ static void generate_keystream(HC128* ctx, word32* keystream)
h1((ctx),(ctx->X[(d)]),tem3); \
(ctx->T[(u)]) = ((ctx->T[(u)]) + tem2+(tem0^tem1)) ^ tem3; \
(ctx->X[(a)]) = (ctx->T[(u)]); \
-}
+}
/*update table Q*/
#define update_Q(ctx,u,v,a,b,c,d){ \
@@ -159,7 +161,7 @@ static void generate_keystream(HC128* ctx, word32* keystream)
h2((ctx),(ctx->Y[(d)]),tem3); \
(ctx->T[(u)]) = ((ctx->T[(u)]) + tem2+(tem0^tem1)) ^ tem3; \
(ctx->Y[(a)]) = (ctx->T[(u)]); \
-}
+}
/*16 steps of HC-128, without generating keystream, */
/*but use the outputs to update P and Q*/
@@ -169,8 +171,8 @@ static void setup_update(HC128* ctx) /*each time 16 steps*/
cc = ctx->counter1024 & 0x1ff;
dd = (cc+16)&0x1ff;
- if (ctx->counter1024 < 512)
- {
+ if (ctx->counter1024 < 512)
+ {
ctx->counter1024 = (ctx->counter1024 + 16) & 0x3ff;
update_P(ctx, cc+0, cc+1, 0, 6, 13, 4);
update_P(ctx, cc+1, cc+2, 1, 7, 14, 5);
@@ -187,9 +189,9 @@ static void setup_update(HC128* ctx) /*each time 16 steps*/
update_P(ctx, cc+12,cc+13,12,2, 9, 0);
update_P(ctx, cc+13,cc+14,13,3, 10, 1);
update_P(ctx, cc+14,cc+15,14,4, 11, 2);
- update_P(ctx, cc+15,dd+0, 15,5, 12, 3);
+ update_P(ctx, cc+15,dd+0, 15,5, 12, 3);
}
- else
+ else
{
ctx->counter1024 = (ctx->counter1024 + 16) & 0x3ff;
update_Q(ctx, 512+cc+0, 512+cc+1, 0, 6, 13, 4);
@@ -207,8 +209,8 @@ static void setup_update(HC128* ctx) /*each time 16 steps*/
update_Q(ctx, 512+cc+12,512+cc+13,12,2, 9, 0);
update_Q(ctx, 512+cc+13,512+cc+14,13,3, 10, 1);
update_Q(ctx, 512+cc+14,512+cc+15,14,4, 11, 2);
- update_Q(ctx, 512+cc+15,512+dd+0, 15,5, 12, 3);
- }
+ update_Q(ctx, 512+cc+15,512+dd+0, 15,5, 12, 3);
+ }
}
@@ -230,7 +232,7 @@ static void setup_update(HC128* ctx) /*each time 16 steps*/
static void Hc128_SetIV(HC128* ctx, const byte* inIv)
-{
+{
word32 i;
word32 iv[4];
@@ -238,46 +240,46 @@ static void Hc128_SetIV(HC128* ctx, const byte* inIv)
XMEMCPY(iv, inIv, sizeof(iv));
else
XMEMSET(iv, 0, sizeof(iv));
-
+
for (i = 0; i < (128 >> 5); i++)
ctx->iv[i] = LITTLE32(iv[i]);
-
+
for (; i < 8; i++) ctx->iv[i] = ctx->iv[i-4];
-
- /* expand the key and IV into the table T */
- /* (expand the key and IV into the table P and Q) */
-
+
+ /* expand the key and IV into the table T */
+ /* (expand the key and IV into the table P and Q) */
+
for (i = 0; i < 8; i++) ctx->T[i] = ctx->key[i];
for (i = 8; i < 16; i++) ctx->T[i] = ctx->iv[i-8];
- for (i = 16; i < (256+16); i++)
+ for (i = 16; i < (256+16); i++)
ctx->T[i] = f2(ctx->T[i-2]) + ctx->T[i-7] + f1(ctx->T[i-15]) +
ctx->T[i-16]+i;
-
+
for (i = 0; i < 16; i++) ctx->T[i] = ctx->T[256+i];
- for (i = 16; i < 1024; i++)
+ for (i = 16; i < 1024; i++)
ctx->T[i] = f2(ctx->T[i-2]) + ctx->T[i-7] + f1(ctx->T[i-15]) +
ctx->T[i-16]+256+i;
-
+
/* initialize counter1024, X and Y */
ctx->counter1024 = 0;
for (i = 0; i < 16; i++) ctx->X[i] = ctx->T[512-16+i];
for (i = 0; i < 16; i++) ctx->Y[i] = ctx->T[512+512-16+i];
-
+
/* run the cipher 1024 steps before generating the output */
- for (i = 0; i < 64; i++) setup_update(ctx);
+ for (i = 0; i < 64; i++) setup_update(ctx);
}
-static INLINE int DoKey(HC128* ctx, const byte* key, const byte* iv)
-{
- word32 i;
+static WC_INLINE int DoKey(HC128* ctx, const byte* key, const byte* iv)
+{
+ word32 i;
- /* Key size in bits 128 */
+ /* Key size in bits 128 */
for (i = 0; i < (128 >> 5); i++)
ctx->key[i] = LITTLE32(((word32*)key)[i]);
-
+
for ( ; i < 8 ; i++) ctx->key[i] = ctx->key[i-4];
Hc128_SetIV(ctx, iv);
@@ -286,10 +288,35 @@ static INLINE int DoKey(HC128* ctx, const byte* key, const byte* iv)
}
+int wc_Hc128_SetHeap(HC128* ctx, void* heap)
+{
+ if (ctx == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+#ifdef XSTREAM_ALIGN
+ ctx->heap = heap;
+#endif
+
+ (void)heap;
+ return 0;
+}
+
/* Key setup */
int wc_Hc128_SetKey(HC128* ctx, const byte* key, const byte* iv)
{
+ if (ctx == NULL || key == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
#ifdef XSTREAM_ALIGN
+ /* default heap to NULL or heap test value */
+ #ifdef WOLFSSL_HEAP_TEST
+ ctx->heap = (void*)WOLFSSL_HEAP_TEST;
+ #else
+ ctx->heap = NULL;
+ #endif /* WOLFSSL_HEAP_TEST */
+
if ((wolfssl_word)key % 4) {
int alignKey[4];
@@ -308,7 +335,7 @@ int wc_Hc128_SetKey(HC128* ctx, const byte* key, const byte* iv)
/* The following defines the encryption of data stream */
-static INLINE int DoProcess(HC128* ctx, byte* output, const byte* input,
+static WC_INLINE int DoProcess(HC128* ctx, byte* output, const byte* input,
word32 msglen)
{
word32 i, keystream[16];
@@ -345,7 +372,7 @@ static INLINE int DoProcess(HC128* ctx, byte* output, const byte* input,
{
word32 wordsLeft = msglen / sizeof(word32);
if (msglen % sizeof(word32)) wordsLeft++;
-
+
ByteReverseWords(keystream, keystream, wordsLeft * sizeof(word32));
}
#endif
@@ -361,20 +388,24 @@ static INLINE int DoProcess(HC128* ctx, byte* output, const byte* input,
/* Encrypt/decrypt a message of any size */
int wc_Hc128_Process(HC128* ctx, byte* output, const byte* input, word32 msglen)
{
+ if (ctx == NULL || output == NULL || input == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
#ifdef XSTREAM_ALIGN
if ((wolfssl_word)input % 4 || (wolfssl_word)output % 4) {
#ifndef NO_WOLFSSL_ALLOC_ALIGN
byte* tmp;
WOLFSSL_MSG("Hc128Process unaligned");
- tmp = (byte*)XMALLOC(msglen, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ tmp = (byte*)XMALLOC(msglen, ctx->heap, DYNAMIC_TYPE_TMP_BUFFER);
if (tmp == NULL) return MEMORY_E;
XMEMCPY(tmp, input, msglen);
DoProcess(ctx, tmp, tmp, msglen);
XMEMCPY(output, tmp, msglen);
- XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(tmp, ctx->heap, DYNAMIC_TYPE_TMP_BUFFER);
return 0;
#else
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/hmac.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/hmac.c
index 242adfa55..bcebc1ce2 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/hmac.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/hmac.c
@@ -1,8 +1,8 @@
-/* hmac.h
+/* hmac.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,170 +16,298 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
#ifndef NO_HMAC
-#include <wolfssl/wolfcrypt/hmac.h>
+#if defined(HAVE_FIPS) && \
+ defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
-#ifdef HAVE_FIPS
-/* does init */
-int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 keySz)
-{
- return HmacSetKey_fips(hmac, type, key, keySz);
-}
+ /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
+ #define FIPS_NO_WRAPPERS
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$b")
+ #pragma const_seg(".fipsB$b")
+ #endif
+#endif
-int wc_HmacUpdate(Hmac* hmac, const byte* in, word32 sz)
-{
- return HmacUpdate_fips(hmac, in, sz);
-}
+#include <wolfssl/wolfcrypt/hmac.h>
+#ifdef WOLF_CRYPTO_CB
+ #include <wolfssl/wolfcrypt/cryptocb.h>
+#endif
+
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
-int wc_HmacFinal(Hmac* hmac, byte* out)
-{
- return HmacFinal_fips(hmac, out);
-}
+/* fips wrapper calls, user can call direct */
+/* If building for old FIPS. */
+#if defined(HAVE_FIPS) && \
+ (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2))
-#ifdef HAVE_CAVIUM
- int wc_HmacInitCavium(Hmac* hmac, int i)
+ /* does init */
+ int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 keySz)
{
- return HmacInitCavium(hmac, i);
+ if (hmac == NULL || (key == NULL && keySz != 0) ||
+ !(type == WC_MD5 || type == WC_SHA || type == WC_SHA256 ||
+ type == WC_SHA384 || type == WC_SHA512)) {
+ return BAD_FUNC_ARG;
+ }
+
+ return HmacSetKey_fips(hmac, type, key, keySz);
}
+ int wc_HmacUpdate(Hmac* hmac, const byte* in, word32 sz)
+ {
+ if (hmac == NULL || (in == NULL && sz > 0)) {
+ return BAD_FUNC_ARG;
+ }
+ return HmacUpdate_fips(hmac, in, sz);
+ }
+ int wc_HmacFinal(Hmac* hmac, byte* out)
+ {
+ if (hmac == NULL) {
+ return BAD_FUNC_ARG;
+ }
- void wc_HmacFreeCavium(Hmac* hmac)
+ return HmacFinal_fips(hmac, out);
+ }
+ int wolfSSL_GetHmacMaxSize(void)
{
- HmacFreeCavium(hmac);
+ return CyaSSL_GetHmacMaxSize();
}
-#endif
-int wolfSSL_GetHmacMaxSize(void)
-{
- return CyaSSL_GetHmacMaxSize();
-}
-
-#ifdef HAVE_HKDF
+ int wc_HmacInit(Hmac* hmac, void* heap, int devId)
+ {
+ (void)hmac;
+ (void)heap;
+ (void)devId;
+ /* FIPS doesn't support:
+ return HmacInit(hmac, heap, devId); */
+ return 0;
+ }
+ void wc_HmacFree(Hmac* hmac)
+ {
+ (void)hmac;
+ /* FIPS doesn't support:
+ HmacFree(hmac); */
+ }
-int wc_HKDF(int type, const byte* inKey, word32 inKeySz,
+ #ifdef HAVE_HKDF
+ int wc_HKDF(int type, const byte* inKey, word32 inKeySz,
const byte* salt, word32 saltSz,
const byte* info, word32 infoSz,
byte* out, word32 outSz)
-{
- return HKDF(type, inKey, inKeySz, salt, saltSz, info, infoSz, out, outSz);
-}
-
-
-#endif /* HAVE_HKDF */
-#else /* else build without fips */
-#ifdef WOLFSSL_PIC32MZ_HASH
-
-#define wc_InitMd5 wc_InitMd5_sw
-#define wc_Md5Update wc_Md5Update_sw
-#define wc_Md5Final wc_Md5Final_sw
+ {
+ return HKDF(type, inKey, inKeySz, salt, saltSz,
+ info, infoSz, out, outSz);
+ }
+ #endif /* HAVE_HKDF */
-#define wc_InitSha wc_InitSha_sw
-#define wc_ShaUpdate wc_ShaUpdate_sw
-#define wc_ShaFinal wc_ShaFinal_sw
+#else /* else build without fips, or for new fips */
-#define wc_InitSha256 wc_InitSha256_sw
-#define wc_Sha256Update wc_Sha256Update_sw
-#define wc_Sha256Final wc_Sha256Final_sw
-#endif
+int wc_HmacSizeByType(int type)
+{
+ int ret;
-#ifdef HAVE_FIPS
- /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
- #define FIPS_NO_WRAPPERS
-#endif
+ if (!(type == WC_MD5 || type == WC_SHA ||
+ type == WC_SHA224 || type == WC_SHA256 ||
+ type == WC_SHA384 || type == WC_SHA512 ||
+ type == WC_SHA3_224 || type == WC_SHA3_256 ||
+ type == WC_SHA3_384 || type == WC_SHA3_512)) {
+ return BAD_FUNC_ARG;
+ }
-#include <wolfssl/wolfcrypt/error-crypt.h>
+ switch (type) {
+ #ifndef NO_MD5
+ case WC_MD5:
+ ret = WC_MD5_DIGEST_SIZE;
+ break;
+ #endif /* !NO_MD5 */
+
+ #ifndef NO_SHA
+ case WC_SHA:
+ ret = WC_SHA_DIGEST_SIZE;
+ break;
+ #endif /* !NO_SHA */
+
+ #ifdef WOLFSSL_SHA224
+ case WC_SHA224:
+ ret = WC_SHA224_DIGEST_SIZE;
+ break;
+ #endif /* WOLFSSL_SHA224 */
+
+ #ifndef NO_SHA256
+ case WC_SHA256:
+ ret = WC_SHA256_DIGEST_SIZE;
+ break;
+ #endif /* !NO_SHA256 */
+
+ #ifdef WOLFSSL_SHA384
+ case WC_SHA384:
+ ret = WC_SHA384_DIGEST_SIZE;
+ break;
+ #endif /* WOLFSSL_SHA384 */
+ #ifdef WOLFSSL_SHA512
+ case WC_SHA512:
+ ret = WC_SHA512_DIGEST_SIZE;
+ break;
+ #endif /* WOLFSSL_SHA512 */
+
+ #ifdef WOLFSSL_SHA3
+ case WC_SHA3_224:
+ ret = WC_SHA3_224_DIGEST_SIZE;
+ break;
+
+ case WC_SHA3_256:
+ ret = WC_SHA3_256_DIGEST_SIZE;
+ break;
+
+ case WC_SHA3_384:
+ ret = WC_SHA3_384_DIGEST_SIZE;
+ break;
+
+ case WC_SHA3_512:
+ ret = WC_SHA3_512_DIGEST_SIZE;
+ break;
+
+ #endif
+ default:
+ ret = BAD_FUNC_ARG;
+ break;
+ }
-#ifdef HAVE_CAVIUM
- static void HmacCaviumFinal(Hmac* hmac, byte* hash);
- static void HmacCaviumUpdate(Hmac* hmac, const byte* msg, word32 length);
- static void HmacCaviumSetKey(Hmac* hmac, int type, const byte* key,
- word32 length);
-#endif
+ return ret;
+}
-static int InitHmac(Hmac* hmac, int type)
+int _InitHmac(Hmac* hmac, int type, void* heap)
{
int ret = 0;
- hmac->innerHashKeyed = 0;
- hmac->macType = (byte)type;
-
- if (!(type == MD5 || type == SHA || type == SHA256 || type == SHA384
- || type == SHA512 || type == BLAKE2B_ID))
- return BAD_FUNC_ARG;
-
switch (type) {
- #ifndef NO_MD5
- case MD5:
- wc_InitMd5(&hmac->hash.md5);
- break;
- #endif
-
- #ifndef NO_SHA
- case SHA:
+ #ifndef NO_MD5
+ case WC_MD5:
+ ret = wc_InitMd5(&hmac->hash.md5);
+ break;
+ #endif /* !NO_MD5 */
+
+ #ifndef NO_SHA
+ case WC_SHA:
ret = wc_InitSha(&hmac->hash.sha);
- break;
- #endif
-
- #ifndef NO_SHA256
- case SHA256:
+ break;
+ #endif /* !NO_SHA */
+
+ #ifdef WOLFSSL_SHA224
+ case WC_SHA224:
+ ret = wc_InitSha224(&hmac->hash.sha224);
+ break;
+ #endif /* WOLFSSL_SHA224 */
+
+ #ifndef NO_SHA256
+ case WC_SHA256:
ret = wc_InitSha256(&hmac->hash.sha256);
- break;
- #endif
-
- #ifdef WOLFSSL_SHA384
- case SHA384:
+ break;
+ #endif /* !NO_SHA256 */
+
+ #ifdef WOLFSSL_SHA384
+ case WC_SHA384:
ret = wc_InitSha384(&hmac->hash.sha384);
- break;
- #endif
-
- #ifdef WOLFSSL_SHA512
- case SHA512:
+ break;
+ #endif /* WOLFSSL_SHA384 */
+ #ifdef WOLFSSL_SHA512
+ case WC_SHA512:
ret = wc_InitSha512(&hmac->hash.sha512);
- break;
- #endif
-
- #ifdef HAVE_BLAKE2
- case BLAKE2B_ID:
- ret = wc_InitBlake2b(&hmac->hash.blake2b, BLAKE2B_256);
- break;
- #endif
-
+ break;
+ #endif /* WOLFSSL_SHA512 */
+
+ #ifdef WOLFSSL_SHA3
+ #ifndef WOLFSSL_NOSHA3_224
+ case WC_SHA3_224:
+ ret = wc_InitSha3_224(&hmac->hash.sha3, heap, INVALID_DEVID);
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_256
+ case WC_SHA3_256:
+ ret = wc_InitSha3_256(&hmac->hash.sha3, heap, INVALID_DEVID);
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_384
+ case WC_SHA3_384:
+ ret = wc_InitSha3_384(&hmac->hash.sha3, heap, INVALID_DEVID);
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_512
+ case WC_SHA3_512:
+ ret = wc_InitSha3_512(&hmac->hash.sha3, heap, INVALID_DEVID);
+ break;
+ #endif
+ #endif
+
default:
- return BAD_FUNC_ARG;
+ ret = BAD_FUNC_ARG;
+ break;
}
+ /* default to NULL heap hint or test value */
+#ifdef WOLFSSL_HEAP_TEST
+ hmac->heap = (void)WOLFSSL_HEAP_TEST;
+#else
+ hmac->heap = heap;
+#endif /* WOLFSSL_HEAP_TEST */
+
return ret;
}
int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
{
- byte* ip = (byte*) hmac->ipad;
- byte* op = (byte*) hmac->opad;
+ byte* ip;
+ byte* op;
word32 i, hmac_block_size = 0;
- int ret;
+ int ret = 0;
+ void* heap = NULL;
+
+ if (hmac == NULL || (key == NULL && length != 0) ||
+ !(type == WC_MD5 || type == WC_SHA ||
+ type == WC_SHA224 || type == WC_SHA256 ||
+ type == WC_SHA384 || type == WC_SHA512 ||
+ type == WC_SHA3_224 || type == WC_SHA3_256 ||
+ type == WC_SHA3_384 || type == WC_SHA3_512)) {
+ return BAD_FUNC_ARG;
+ }
-#ifdef HAVE_CAVIUM
- if (hmac->magic == WOLFSSL_HMAC_CAVIUM_MAGIC)
- return HmacCaviumSetKey(hmac, type, key, length);
+#ifndef HAVE_FIPS
+ /* if set key has already been run then make sure and free existing */
+ /* This is for async and PIC32MZ situations, and just normally OK,
+ provided the user calls wc_HmacInit() first. That function is not
+ available in FIPS builds. In current FIPS builds, the hashes are
+ not allocating resources. */
+ if (hmac->macType != WC_HASH_TYPE_NONE) {
+ wc_HmacFree(hmac);
+ }
#endif
- ret = InitHmac(hmac, type);
+ hmac->innerHashKeyed = 0;
+ hmac->macType = (byte)type;
+
+ ret = _InitHmac(hmac, type, heap);
if (ret != 0)
return ret;
@@ -188,138 +316,255 @@ int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
return HMAC_MIN_KEYLEN_E;
#endif
+#ifdef WOLF_CRYPTO_CB
+ hmac->keyRaw = key; /* use buffer directly */
+ hmac->keyLen = length;
+#endif
+
+ ip = (byte*)hmac->ipad;
+ op = (byte*)hmac->opad;
+
switch (hmac->macType) {
- #ifndef NO_MD5
- case MD5:
- {
- hmac_block_size = MD5_BLOCK_SIZE;
- if (length <= MD5_BLOCK_SIZE) {
- XMEMCPY(ip, key, length);
+ #ifndef NO_MD5
+ case WC_MD5:
+ hmac_block_size = WC_MD5_BLOCK_SIZE;
+ if (length <= WC_MD5_BLOCK_SIZE) {
+ if (key != NULL) {
+ XMEMCPY(ip, key, length);
+ }
}
else {
- wc_Md5Update(&hmac->hash.md5, key, length);
- wc_Md5Final(&hmac->hash.md5, ip);
- length = MD5_DIGEST_SIZE;
+ ret = wc_Md5Update(&hmac->hash.md5, key, length);
+ if (ret != 0)
+ break;
+ ret = wc_Md5Final(&hmac->hash.md5, ip);
+ if (ret != 0)
+ break;
+ length = WC_MD5_DIGEST_SIZE;
}
- }
- break;
- #endif
-
- #ifndef NO_SHA
- case SHA:
- {
- hmac_block_size = SHA_BLOCK_SIZE;
- if (length <= SHA_BLOCK_SIZE) {
- XMEMCPY(ip, key, length);
+ break;
+ #endif /* !NO_MD5 */
+
+ #ifndef NO_SHA
+ case WC_SHA:
+ hmac_block_size = WC_SHA_BLOCK_SIZE;
+ if (length <= WC_SHA_BLOCK_SIZE) {
+ if (key != NULL) {
+ XMEMCPY(ip, key, length);
+ }
}
else {
- wc_ShaUpdate(&hmac->hash.sha, key, length);
- wc_ShaFinal(&hmac->hash.sha, ip);
- length = SHA_DIGEST_SIZE;
+ ret = wc_ShaUpdate(&hmac->hash.sha, key, length);
+ if (ret != 0)
+ break;
+ ret = wc_ShaFinal(&hmac->hash.sha, ip);
+ if (ret != 0)
+ break;
+
+ length = WC_SHA_DIGEST_SIZE;
}
- }
- break;
- #endif
+ break;
+ #endif /* !NO_SHA */
+
+ #ifdef WOLFSSL_SHA224
+ case WC_SHA224:
+ hmac_block_size = WC_SHA224_BLOCK_SIZE;
+ if (length <= WC_SHA224_BLOCK_SIZE) {
+ if (key != NULL) {
+ XMEMCPY(ip, key, length);
+ }
+ }
+ else {
+ ret = wc_Sha224Update(&hmac->hash.sha224, key, length);
+ if (ret != 0)
+ break;
+ ret = wc_Sha224Final(&hmac->hash.sha224, ip);
+ if (ret != 0)
+ break;
- #ifndef NO_SHA256
- case SHA256:
- {
- hmac_block_size = SHA256_BLOCK_SIZE;
- if (length <= SHA256_BLOCK_SIZE) {
- XMEMCPY(ip, key, length);
+ length = WC_SHA224_DIGEST_SIZE;
+ }
+ break;
+ #endif /* WOLFSSL_SHA224 */
+ #ifndef NO_SHA256
+ case WC_SHA256:
+ hmac_block_size = WC_SHA256_BLOCK_SIZE;
+ if (length <= WC_SHA256_BLOCK_SIZE) {
+ if (key != NULL) {
+ XMEMCPY(ip, key, length);
+ }
}
else {
ret = wc_Sha256Update(&hmac->hash.sha256, key, length);
if (ret != 0)
- return ret;
-
+ break;
ret = wc_Sha256Final(&hmac->hash.sha256, ip);
if (ret != 0)
- return ret;
+ break;
- length = SHA256_DIGEST_SIZE;
+ length = WC_SHA256_DIGEST_SIZE;
}
- }
- break;
- #endif
-
- #ifdef WOLFSSL_SHA384
- case SHA384:
- {
- hmac_block_size = SHA384_BLOCK_SIZE;
- if (length <= SHA384_BLOCK_SIZE) {
- XMEMCPY(ip, key, length);
+ break;
+ #endif /* !NO_SHA256 */
+
+ #ifdef WOLFSSL_SHA384
+ case WC_SHA384:
+ hmac_block_size = WC_SHA384_BLOCK_SIZE;
+ if (length <= WC_SHA384_BLOCK_SIZE) {
+ if (key != NULL) {
+ XMEMCPY(ip, key, length);
+ }
}
else {
ret = wc_Sha384Update(&hmac->hash.sha384, key, length);
if (ret != 0)
- return ret;
-
+ break;
ret = wc_Sha384Final(&hmac->hash.sha384, ip);
if (ret != 0)
- return ret;
+ break;
- length = SHA384_DIGEST_SIZE;
+ length = WC_SHA384_DIGEST_SIZE;
}
- }
- break;
- #endif
-
- #ifdef WOLFSSL_SHA512
- case SHA512:
- {
- hmac_block_size = SHA512_BLOCK_SIZE;
- if (length <= SHA512_BLOCK_SIZE) {
- XMEMCPY(ip, key, length);
+ break;
+ #endif /* WOLFSSL_SHA384 */
+ #ifdef WOLFSSL_SHA512
+ case WC_SHA512:
+ hmac_block_size = WC_SHA512_BLOCK_SIZE;
+ if (length <= WC_SHA512_BLOCK_SIZE) {
+ if (key != NULL) {
+ XMEMCPY(ip, key, length);
+ }
}
else {
ret = wc_Sha512Update(&hmac->hash.sha512, key, length);
if (ret != 0)
- return ret;
-
+ break;
ret = wc_Sha512Final(&hmac->hash.sha512, ip);
if (ret != 0)
- return ret;
+ break;
- length = SHA512_DIGEST_SIZE;
+ length = WC_SHA512_DIGEST_SIZE;
}
- }
- break;
- #endif
+ break;
+ #endif /* WOLFSSL_SHA512 */
+
+ #ifdef WOLFSSL_SHA3
+ #ifndef WOLFSSL_NOSHA3_224
+ case WC_SHA3_224:
+ hmac_block_size = WC_SHA3_224_BLOCK_SIZE;
+ if (length <= WC_SHA3_224_BLOCK_SIZE) {
+ if (key != NULL) {
+ XMEMCPY(ip, key, length);
+ }
+ }
+ else {
+ ret = wc_Sha3_224_Update(&hmac->hash.sha3, key, length);
+ if (ret != 0)
+ break;
+ ret = wc_Sha3_224_Final(&hmac->hash.sha3, ip);
+ if (ret != 0)
+ break;
- #ifdef HAVE_BLAKE2
- case BLAKE2B_ID:
- {
- hmac_block_size = BLAKE2B_BLOCKBYTES;
- if (length <= BLAKE2B_BLOCKBYTES) {
- XMEMCPY(ip, key, length);
+ length = WC_SHA3_224_DIGEST_SIZE;
+ }
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_256
+ case WC_SHA3_256:
+ hmac_block_size = WC_SHA3_256_BLOCK_SIZE;
+ if (length <= WC_SHA3_256_BLOCK_SIZE) {
+ if (key != NULL) {
+ XMEMCPY(ip, key, length);
+ }
+ }
+ else {
+ ret = wc_Sha3_256_Update(&hmac->hash.sha3, key, length);
+ if (ret != 0)
+ break;
+ ret = wc_Sha3_256_Final(&hmac->hash.sha3, ip);
+ if (ret != 0)
+ break;
+
+ length = WC_SHA3_256_DIGEST_SIZE;
+ }
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_384
+ case WC_SHA3_384:
+ hmac_block_size = WC_SHA3_384_BLOCK_SIZE;
+ if (length <= WC_SHA3_384_BLOCK_SIZE) {
+ if (key != NULL) {
+ XMEMCPY(ip, key, length);
+ }
}
else {
- ret = wc_Blake2bUpdate(&hmac->hash.blake2b, key, length);
+ ret = wc_Sha3_384_Update(&hmac->hash.sha3, key, length);
if (ret != 0)
- return ret;
+ break;
+ ret = wc_Sha3_384_Final(&hmac->hash.sha3, ip);
+ if (ret != 0)
+ break;
- ret = wc_Blake2bFinal(&hmac->hash.blake2b, ip, BLAKE2B_256);
+ length = WC_SHA3_384_DIGEST_SIZE;
+ }
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_512
+ case WC_SHA3_512:
+ hmac_block_size = WC_SHA3_512_BLOCK_SIZE;
+ if (length <= WC_SHA3_512_BLOCK_SIZE) {
+ if (key != NULL) {
+ XMEMCPY(ip, key, length);
+ }
+ }
+ else {
+ ret = wc_Sha3_512_Update(&hmac->hash.sha3, key, length);
+ if (ret != 0)
+ break;
+ ret = wc_Sha3_512_Final(&hmac->hash.sha3, ip);
if (ret != 0)
- return ret;
+ break;
- length = BLAKE2B_256;
+ length = WC_SHA3_512_DIGEST_SIZE;
}
- }
- break;
- #endif
+ break;
+ #endif
+ #endif /* WOLFSSL_SHA3 */
default:
return BAD_FUNC_ARG;
}
- if (length < hmac_block_size)
- XMEMSET(ip + length, 0, hmac_block_size - length);
- for(i = 0; i < hmac_block_size; i++) {
- op[i] = ip[i] ^ OPAD;
- ip[i] ^= IPAD;
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC)
+ if (hmac->asyncDev.marker == WOLFSSL_ASYNC_MARKER_HMAC) {
+ #if defined(HAVE_INTEL_QA) || defined(HAVE_CAVIUM)
+ #ifdef HAVE_INTEL_QA
+ if (IntelQaHmacGetType(hmac->macType, NULL) == 0)
+ #endif
+ {
+ if (length > hmac_block_size)
+ length = hmac_block_size;
+ /* update key length */
+ hmac->keyLen = (word16)length;
+
+ return ret;
+ }
+ /* no need to pad below */
+ #endif
+ }
+#endif
+
+ if (ret == 0) {
+ if (length < hmac_block_size)
+ XMEMSET(ip + length, 0, hmac_block_size - length);
+
+ for(i = 0; i < hmac_block_size; i++) {
+ op[i] = ip[i] ^ OPAD;
+ ip[i] ^= IPAD;
+ }
}
- return 0;
+
+ return ret;
}
@@ -328,59 +573,79 @@ static int HmacKeyInnerHash(Hmac* hmac)
int ret = 0;
switch (hmac->macType) {
- #ifndef NO_MD5
- case MD5:
- wc_Md5Update(&hmac->hash.md5, (byte*) hmac->ipad, MD5_BLOCK_SIZE);
- break;
- #endif
-
- #ifndef NO_SHA
- case SHA:
- wc_ShaUpdate(&hmac->hash.sha, (byte*) hmac->ipad, SHA_BLOCK_SIZE);
- break;
- #endif
-
- #ifndef NO_SHA256
- case SHA256:
- ret = wc_Sha256Update(&hmac->hash.sha256,
- (byte*) hmac->ipad, SHA256_BLOCK_SIZE);
- if (ret != 0)
- return ret;
- break;
- #endif
-
- #ifdef WOLFSSL_SHA384
- case SHA384:
- ret = wc_Sha384Update(&hmac->hash.sha384,
- (byte*) hmac->ipad, SHA384_BLOCK_SIZE);
- if (ret != 0)
- return ret;
- break;
- #endif
-
- #ifdef WOLFSSL_SHA512
- case SHA512:
- ret = wc_Sha512Update(&hmac->hash.sha512,
- (byte*) hmac->ipad, SHA512_BLOCK_SIZE);
- if (ret != 0)
- return ret;
- break;
- #endif
-
- #ifdef HAVE_BLAKE2
- case BLAKE2B_ID:
- ret = wc_Blake2bUpdate(&hmac->hash.blake2b,
- (byte*) hmac->ipad,BLAKE2B_BLOCKBYTES);
- if (ret != 0)
- return ret;
- break;
- #endif
+ #ifndef NO_MD5
+ case WC_MD5:
+ ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->ipad,
+ WC_MD5_BLOCK_SIZE);
+ break;
+ #endif /* !NO_MD5 */
+
+ #ifndef NO_SHA
+ case WC_SHA:
+ ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->ipad,
+ WC_SHA_BLOCK_SIZE);
+ break;
+ #endif /* !NO_SHA */
+
+ #ifdef WOLFSSL_SHA224
+ case WC_SHA224:
+ ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->ipad,
+ WC_SHA224_BLOCK_SIZE);
+ break;
+ #endif /* WOLFSSL_SHA224 */
+ #ifndef NO_SHA256
+ case WC_SHA256:
+ ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->ipad,
+ WC_SHA256_BLOCK_SIZE);
+ break;
+ #endif /* !NO_SHA256 */
+
+ #ifdef WOLFSSL_SHA384
+ case WC_SHA384:
+ ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->ipad,
+ WC_SHA384_BLOCK_SIZE);
+ break;
+ #endif /* WOLFSSL_SHA384 */
+ #ifdef WOLFSSL_SHA512
+ case WC_SHA512:
+ ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->ipad,
+ WC_SHA512_BLOCK_SIZE);
+ break;
+ #endif /* WOLFSSL_SHA512 */
+
+ #ifdef WOLFSSL_SHA3
+ #ifndef WOLFSSL_NOSHA3_224
+ case WC_SHA3_224:
+ ret = wc_Sha3_224_Update(&hmac->hash.sha3, (byte*)hmac->ipad,
+ WC_SHA3_224_BLOCK_SIZE);
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_256
+ case WC_SHA3_256:
+ ret = wc_Sha3_256_Update(&hmac->hash.sha3, (byte*)hmac->ipad,
+ WC_SHA3_256_BLOCK_SIZE);
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_384
+ case WC_SHA3_384:
+ ret = wc_Sha3_384_Update(&hmac->hash.sha3, (byte*)hmac->ipad,
+ WC_SHA3_384_BLOCK_SIZE);
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_512
+ case WC_SHA3_512:
+ ret = wc_Sha3_512_Update(&hmac->hash.sha3, (byte*)hmac->ipad,
+ WC_SHA3_512_BLOCK_SIZE);
+ break;
+ #endif
+ #endif /* WOLFSSL_SHA3 */
default:
- break;
+ break;
}
- hmac->innerHashKeyed = 1;
+ if (ret == 0)
+ hmac->innerHashKeyed = WC_HMAC_INNER_HASH_KEYED_SW;
return ret;
}
@@ -388,12 +653,33 @@ static int HmacKeyInnerHash(Hmac* hmac)
int wc_HmacUpdate(Hmac* hmac, const byte* msg, word32 length)
{
- int ret;
+ int ret = 0;
+
+ if (hmac == NULL || (msg == NULL && length > 0)) {
+ return BAD_FUNC_ARG;
+ }
-#ifdef HAVE_CAVIUM
- if (hmac->magic == WOLFSSL_HMAC_CAVIUM_MAGIC)
- return HmacCaviumUpdate(hmac, msg, length);
+#ifdef WOLF_CRYPTO_CB
+ if (hmac->devId != INVALID_DEVID) {
+ ret = wc_CryptoCb_Hmac(hmac, hmac->macType, msg, length, NULL);
+ if (ret != CRYPTOCB_UNAVAILABLE)
+ return ret;
+ /* fall-through when unavailable */
+ ret = 0; /* reset error code */
+ }
#endif
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC)
+ if (hmac->asyncDev.marker == WOLFSSL_ASYNC_MARKER_HMAC) {
+ #if defined(HAVE_CAVIUM)
+ return NitroxHmacUpdate(hmac, msg, length);
+ #elif defined(HAVE_INTEL_QA)
+ if (IntelQaHmacGetType(hmac->macType, NULL) == 0) {
+ return IntelQaHmac(&hmac->asyncDev, hmac->macType,
+ (byte*)hmac->ipad, hmac->keyLen, NULL, msg, length);
+ }
+ #endif
+ }
+#endif /* WOLFSSL_ASYNC_CRYPT */
if (!hmac->innerHashKeyed) {
ret = HmacKeyInnerHash(hmac);
@@ -402,55 +688,69 @@ int wc_HmacUpdate(Hmac* hmac, const byte* msg, word32 length)
}
switch (hmac->macType) {
- #ifndef NO_MD5
- case MD5:
- wc_Md5Update(&hmac->hash.md5, msg, length);
- break;
- #endif
-
- #ifndef NO_SHA
- case SHA:
- wc_ShaUpdate(&hmac->hash.sha, msg, length);
- break;
- #endif
-
- #ifndef NO_SHA256
- case SHA256:
+ #ifndef NO_MD5
+ case WC_MD5:
+ ret = wc_Md5Update(&hmac->hash.md5, msg, length);
+ break;
+ #endif /* !NO_MD5 */
+
+ #ifndef NO_SHA
+ case WC_SHA:
+ ret = wc_ShaUpdate(&hmac->hash.sha, msg, length);
+ break;
+ #endif /* !NO_SHA */
+
+ #ifdef WOLFSSL_SHA224
+ case WC_SHA224:
+ ret = wc_Sha224Update(&hmac->hash.sha224, msg, length);
+ break;
+ #endif /* WOLFSSL_SHA224 */
+
+ #ifndef NO_SHA256
+ case WC_SHA256:
ret = wc_Sha256Update(&hmac->hash.sha256, msg, length);
- if (ret != 0)
- return ret;
- break;
- #endif
+ break;
+ #endif /* !NO_SHA256 */
- #ifdef WOLFSSL_SHA384
- case SHA384:
+ #ifdef WOLFSSL_SHA384
+ case WC_SHA384:
ret = wc_Sha384Update(&hmac->hash.sha384, msg, length);
- if (ret != 0)
- return ret;
- break;
- #endif
-
- #ifdef WOLFSSL_SHA512
- case SHA512:
+ break;
+ #endif /* WOLFSSL_SHA384 */
+ #ifdef WOLFSSL_SHA512
+ case WC_SHA512:
ret = wc_Sha512Update(&hmac->hash.sha512, msg, length);
- if (ret != 0)
- return ret;
- break;
- #endif
-
- #ifdef HAVE_BLAKE2
- case BLAKE2B_ID:
- ret = wc_Blake2bUpdate(&hmac->hash.blake2b, msg, length);
- if (ret != 0)
- return ret;
- break;
- #endif
+ break;
+ #endif /* WOLFSSL_SHA512 */
+
+ #ifdef WOLFSSL_SHA3
+ #ifndef WOLFSSL_NOSHA3_224
+ case WC_SHA3_224:
+ ret = wc_Sha3_224_Update(&hmac->hash.sha3, msg, length);
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_256
+ case WC_SHA3_256:
+ ret = wc_Sha3_256_Update(&hmac->hash.sha3, msg, length);
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_384
+ case WC_SHA3_384:
+ ret = wc_Sha3_384_Update(&hmac->hash.sha3, msg, length);
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_512
+ case WC_SHA3_512:
+ ret = wc_Sha3_512_Update(&hmac->hash.sha3, msg, length);
+ break;
+ #endif
+ #endif /* WOLFSSL_SHA3 */
default:
- break;
+ break;
}
- return 0;
+ return ret;
}
@@ -458,10 +758,34 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
{
int ret;
-#ifdef HAVE_CAVIUM
- if (hmac->magic == WOLFSSL_HMAC_CAVIUM_MAGIC)
- return HmacCaviumFinal(hmac, hash);
+ if (hmac == NULL || hash == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+#ifdef WOLF_CRYPTO_CB
+ if (hmac->devId != INVALID_DEVID) {
+ ret = wc_CryptoCb_Hmac(hmac, hmac->macType, NULL, 0, hash);
+ if (ret != CRYPTOCB_UNAVAILABLE)
+ return ret;
+ /* fall-through when unavailable */
+ }
#endif
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC)
+ if (hmac->asyncDev.marker == WOLFSSL_ASYNC_MARKER_HMAC) {
+ int hashLen = wc_HmacSizeByType(hmac->macType);
+ if (hashLen <= 0)
+ return hashLen;
+
+ #if defined(HAVE_CAVIUM)
+ return NitroxHmacFinal(hmac, hash, hashLen);
+ #elif defined(HAVE_INTEL_QA)
+ if (IntelQaHmacGetType(hmac->macType, NULL) == 0) {
+ return IntelQaHmac(&hmac->asyncDev, hmac->macType,
+ (byte*)hmac->ipad, hmac->keyLen, hash, NULL, hashLen);
+ }
+ #endif
+ }
+#endif /* WOLFSSL_ASYNC_CRYPT */
if (!hmac->innerHashKeyed) {
ret = HmacKeyInnerHash(hmac);
@@ -470,377 +794,437 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
}
switch (hmac->macType) {
- #ifndef NO_MD5
- case MD5:
- {
- wc_Md5Final(&hmac->hash.md5, (byte*) hmac->innerHash);
-
- wc_Md5Update(&hmac->hash.md5, (byte*) hmac->opad, MD5_BLOCK_SIZE);
- wc_Md5Update(&hmac->hash.md5,
- (byte*) hmac->innerHash, MD5_DIGEST_SIZE);
-
- wc_Md5Final(&hmac->hash.md5, hash);
- }
- break;
- #endif
-
- #ifndef NO_SHA
- case SHA:
- {
- wc_ShaFinal(&hmac->hash.sha, (byte*) hmac->innerHash);
-
- wc_ShaUpdate(&hmac->hash.sha, (byte*) hmac->opad, SHA_BLOCK_SIZE);
- wc_ShaUpdate(&hmac->hash.sha,
- (byte*) hmac->innerHash, SHA_DIGEST_SIZE);
-
- wc_ShaFinal(&hmac->hash.sha, hash);
- }
- break;
- #endif
-
- #ifndef NO_SHA256
- case SHA256:
- {
- ret = wc_Sha256Final(&hmac->hash.sha256, (byte*) hmac->innerHash);
+ #ifndef NO_MD5
+ case WC_MD5:
+ ret = wc_Md5Final(&hmac->hash.md5, (byte*)hmac->innerHash);
if (ret != 0)
- return ret;
-
- ret = wc_Sha256Update(&hmac->hash.sha256,
- (byte*) hmac->opad, SHA256_BLOCK_SIZE);
+ break;
+ ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->opad,
+ WC_MD5_BLOCK_SIZE);
if (ret != 0)
- return ret;
-
- ret = wc_Sha256Update(&hmac->hash.sha256,
- (byte*) hmac->innerHash, SHA256_DIGEST_SIZE);
+ break;
+ ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->innerHash,
+ WC_MD5_DIGEST_SIZE);
if (ret != 0)
- return ret;
+ break;
+ ret = wc_Md5Final(&hmac->hash.md5, hash);
+ break;
+ #endif /* !NO_MD5 */
- ret = wc_Sha256Final(&hmac->hash.sha256, hash);
+ #ifndef NO_SHA
+ case WC_SHA:
+ ret = wc_ShaFinal(&hmac->hash.sha, (byte*)hmac->innerHash);
if (ret != 0)
- return ret;
- }
- break;
- #endif
-
- #ifdef WOLFSSL_SHA384
- case SHA384:
- {
- ret = wc_Sha384Final(&hmac->hash.sha384, (byte*) hmac->innerHash);
+ break;
+ ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->opad,
+ WC_SHA_BLOCK_SIZE);
if (ret != 0)
- return ret;
-
- ret = wc_Sha384Update(&hmac->hash.sha384,
- (byte*) hmac->opad, SHA384_BLOCK_SIZE);
+ break;
+ ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->innerHash,
+ WC_SHA_DIGEST_SIZE);
if (ret != 0)
- return ret;
+ break;
+ ret = wc_ShaFinal(&hmac->hash.sha, hash);
+ break;
+ #endif /* !NO_SHA */
- ret = wc_Sha384Update(&hmac->hash.sha384,
- (byte*) hmac->innerHash, SHA384_DIGEST_SIZE);
+ #ifdef WOLFSSL_SHA224
+ case WC_SHA224:
+ ret = wc_Sha224Final(&hmac->hash.sha224, (byte*)hmac->innerHash);
+ if (ret != 0)
+ break;
+ ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->opad,
+ WC_SHA224_BLOCK_SIZE);
+ if (ret != 0)
+ break;
+ ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->innerHash,
+ WC_SHA224_DIGEST_SIZE);
+ if (ret != 0)
+ break;
+ ret = wc_Sha224Final(&hmac->hash.sha224, hash);
+ if (ret != 0)
+ break;
+ break;
+ #endif /* WOLFSSL_SHA224 */
+ #ifndef NO_SHA256
+ case WC_SHA256:
+ ret = wc_Sha256Final(&hmac->hash.sha256, (byte*)hmac->innerHash);
if (ret != 0)
- return ret;
+ break;
+ ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->opad,
+ WC_SHA256_BLOCK_SIZE);
+ if (ret != 0)
+ break;
+ ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->innerHash,
+ WC_SHA256_DIGEST_SIZE);
+ if (ret != 0)
+ break;
+ ret = wc_Sha256Final(&hmac->hash.sha256, hash);
+ break;
+ #endif /* !NO_SHA256 */
+ #ifdef WOLFSSL_SHA384
+ case WC_SHA384:
+ ret = wc_Sha384Final(&hmac->hash.sha384, (byte*)hmac->innerHash);
+ if (ret != 0)
+ break;
+ ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->opad,
+ WC_SHA384_BLOCK_SIZE);
+ if (ret != 0)
+ break;
+ ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->innerHash,
+ WC_SHA384_DIGEST_SIZE);
+ if (ret != 0)
+ break;
ret = wc_Sha384Final(&hmac->hash.sha384, hash);
+ break;
+ #endif /* WOLFSSL_SHA384 */
+ #ifdef WOLFSSL_SHA512
+ case WC_SHA512:
+ ret = wc_Sha512Final(&hmac->hash.sha512, (byte*)hmac->innerHash);
if (ret != 0)
- return ret;
- }
- break;
- #endif
-
- #ifdef WOLFSSL_SHA512
- case SHA512:
- {
- ret = wc_Sha512Final(&hmac->hash.sha512, (byte*) hmac->innerHash);
+ break;
+ ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->opad,
+ WC_SHA512_BLOCK_SIZE);
if (ret != 0)
- return ret;
-
- ret = wc_Sha512Update(&hmac->hash.sha512,
- (byte*) hmac->opad, SHA512_BLOCK_SIZE);
+ break;
+ ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->innerHash,
+ WC_SHA512_DIGEST_SIZE);
if (ret != 0)
- return ret;
+ break;
+ ret = wc_Sha512Final(&hmac->hash.sha512, hash);
+ break;
+ #endif /* WOLFSSL_SHA512 */
- ret = wc_Sha512Update(&hmac->hash.sha512,
- (byte*) hmac->innerHash, SHA512_DIGEST_SIZE);
+ #ifdef WOLFSSL_SHA3
+ #ifndef WOLFSSL_NOSHA3_224
+ case WC_SHA3_224:
+ ret = wc_Sha3_224_Final(&hmac->hash.sha3, (byte*)hmac->innerHash);
if (ret != 0)
- return ret;
-
- ret = wc_Sha512Final(&hmac->hash.sha512, hash);
+ break;
+ ret = wc_Sha3_224_Update(&hmac->hash.sha3, (byte*)hmac->opad,
+ WC_SHA3_224_BLOCK_SIZE);
if (ret != 0)
- return ret;
- }
- break;
- #endif
-
- #ifdef HAVE_BLAKE2
- case BLAKE2B_ID:
- {
- ret = wc_Blake2bFinal(&hmac->hash.blake2b, (byte*) hmac->innerHash,
- BLAKE2B_256);
+ break;
+ ret = wc_Sha3_224_Update(&hmac->hash.sha3, (byte*)hmac->innerHash,
+ WC_SHA3_224_DIGEST_SIZE);
if (ret != 0)
- return ret;
-
- ret = wc_Blake2bUpdate(&hmac->hash.blake2b,
- (byte*) hmac->opad, BLAKE2B_BLOCKBYTES);
+ break;
+ ret = wc_Sha3_224_Final(&hmac->hash.sha3, hash);
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_256
+ case WC_SHA3_256:
+ ret = wc_Sha3_256_Final(&hmac->hash.sha3, (byte*)hmac->innerHash);
if (ret != 0)
- return ret;
-
- ret = wc_Blake2bUpdate(&hmac->hash.blake2b,
- (byte*) hmac->innerHash, BLAKE2B_256);
+ break;
+ ret = wc_Sha3_256_Update(&hmac->hash.sha3, (byte*)hmac->opad,
+ WC_SHA3_256_BLOCK_SIZE);
if (ret != 0)
- return ret;
-
- ret = wc_Blake2bFinal(&hmac->hash.blake2b, hash, BLAKE2B_256);
+ break;
+ ret = wc_Sha3_256_Update(&hmac->hash.sha3, (byte*)hmac->innerHash,
+ WC_SHA3_256_DIGEST_SIZE);
if (ret != 0)
- return ret;
- }
- break;
- #endif
+ break;
+ ret = wc_Sha3_256_Final(&hmac->hash.sha3, hash);
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_384
+ case WC_SHA3_384:
+ ret = wc_Sha3_384_Final(&hmac->hash.sha3, (byte*)hmac->innerHash);
+ if (ret != 0)
+ break;
+ ret = wc_Sha3_384_Update(&hmac->hash.sha3, (byte*)hmac->opad,
+ WC_SHA3_384_BLOCK_SIZE);
+ if (ret != 0)
+ break;
+ ret = wc_Sha3_384_Update(&hmac->hash.sha3, (byte*)hmac->innerHash,
+ WC_SHA3_384_DIGEST_SIZE);
+ if (ret != 0)
+ break;
+ ret = wc_Sha3_384_Final(&hmac->hash.sha3, hash);
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_512
+ case WC_SHA3_512:
+ ret = wc_Sha3_512_Final(&hmac->hash.sha3, (byte*)hmac->innerHash);
+ if (ret != 0)
+ break;
+ ret = wc_Sha3_512_Update(&hmac->hash.sha3, (byte*)hmac->opad,
+ WC_SHA3_512_BLOCK_SIZE);
+ if (ret != 0)
+ break;
+ ret = wc_Sha3_512_Update(&hmac->hash.sha3, (byte*)hmac->innerHash,
+ WC_SHA3_512_DIGEST_SIZE);
+ if (ret != 0)
+ break;
+ ret = wc_Sha3_512_Final(&hmac->hash.sha3, hash);
+ break;
+ #endif
+ #endif /* WOLFSSL_SHA3 */
default:
- break;
+ ret = BAD_FUNC_ARG;
+ break;
}
- hmac->innerHashKeyed = 0;
+ if (ret == 0) {
+ hmac->innerHashKeyed = 0;
+ }
- return 0;
+ return ret;
}
-#ifdef HAVE_CAVIUM
-
-/* Initiliaze Hmac for use with Nitrox device */
-int wc_HmacInitCavium(Hmac* hmac, int devId)
+/* Initialize Hmac for use with async device */
+int wc_HmacInit(Hmac* hmac, void* heap, int devId)
{
- if (hmac == NULL)
- return -1;
-
- if (CspAllocContext(CONTEXT_SSL, &hmac->contextHandle, devId) != 0)
- return -1;
-
- hmac->keyLen = 0;
- hmac->dataLen = 0;
- hmac->type = 0;
- hmac->devId = devId;
- hmac->magic = WOLFSSL_HMAC_CAVIUM_MAGIC;
- hmac->data = NULL; /* buffered input data */
-
- hmac->innerHashKeyed = 0;
-
- return 0;
-}
-
+ int ret = 0;
-/* Free Hmac from use with Nitrox device */
-void wc_HmacFreeCavium(Hmac* hmac)
-{
if (hmac == NULL)
- return;
+ return BAD_FUNC_ARG;
- CspFreeContext(CONTEXT_SSL, hmac->contextHandle, hmac->devId);
- hmac->magic = 0;
- XFREE(hmac->data, NULL, DYNAMIC_TYPE_CAVIUM_TMP);
- hmac->data = NULL;
-}
+ XMEMSET(hmac, 0, sizeof(Hmac));
+ hmac->macType = WC_HASH_TYPE_NONE;
+ hmac->heap = heap;
+#ifdef WOLF_CRYPTO_CB
+ hmac->devId = devId;
+ hmac->devCtx = NULL;
+#endif
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC)
+ ret = wolfAsync_DevCtxInit(&hmac->asyncDev, WOLFSSL_ASYNC_MARKER_HMAC,
+ hmac->heap, devId);
+#else
+ (void)devId;
+#endif /* WOLFSSL_ASYNC_CRYPT */
-static void HmacCaviumFinal(Hmac* hmac, byte* hash)
-{
- word32 requestId;
-
- if (CspHmac(CAVIUM_BLOCKING, hmac->type, NULL, hmac->keyLen,
- (byte*)hmac->ipad, hmac->dataLen, hmac->data, hash, &requestId,
- hmac->devId) != 0) {
- WOLFSSL_MSG("Cavium Hmac failed");
- }
- hmac->innerHashKeyed = 0; /* tell update to start over if used again */
+ return ret;
}
-
-static void HmacCaviumUpdate(Hmac* hmac, const byte* msg, word32 length)
+#ifdef HAVE_PKCS11
+int wc_HmacInit_Id(Hmac* hmac, unsigned char* id, int len, void* heap,
+ int devId)
{
- word16 add = (word16)length;
- word32 total;
- byte* tmp;
+ int ret = 0;
- if (length > WOLFSSL_MAX_16BIT) {
- WOLFSSL_MSG("Too big msg for cavium hmac");
- return;
- }
+ if (hmac == NULL)
+ ret = BAD_FUNC_ARG;
+ if (ret == 0 && (len < 0 || len > HMAC_MAX_ID_LEN))
+ ret = BUFFER_E;
- if (hmac->innerHashKeyed == 0) { /* starting new */
- hmac->dataLen = 0;
- hmac->innerHashKeyed = 1;
+ if (ret == 0)
+ ret = wc_HmacInit(hmac, heap, devId);
+ if (ret == 0) {
+ XMEMCPY(hmac->id, id, len);
+ hmac->idLen = len;
}
- total = add + hmac->dataLen;
- if (total > WOLFSSL_MAX_16BIT) {
- WOLFSSL_MSG("Too big msg for cavium hmac");
- return;
- }
+ return ret;
+}
+#endif
- tmp = XMALLOC(hmac->dataLen + add, NULL,DYNAMIC_TYPE_CAVIUM_TMP);
- if (tmp == NULL) {
- WOLFSSL_MSG("Out of memory for cavium update");
+/* Free Hmac from use with async device */
+void wc_HmacFree(Hmac* hmac)
+{
+ if (hmac == NULL)
return;
+
+#ifdef WOLF_CRYPTO_CB
+ /* handle cleanup case where final is not called */
+ if (hmac->devId != INVALID_DEVID && hmac->devCtx != NULL) {
+ int ret;
+ byte finalHash[WC_HMAC_BLOCK_SIZE];
+ ret = wc_CryptoCb_Hmac(hmac, hmac->macType, NULL, 0, finalHash);
+ (void)ret; /* must ignore return code here */
+ (void)finalHash;
}
- if (hmac->dataLen)
- XMEMCPY(tmp, hmac->data, hmac->dataLen);
- XMEMCPY(tmp + hmac->dataLen, msg, add);
-
- hmac->dataLen += add;
- XFREE(hmac->data, NULL, DYNAMIC_TYPE_CAVIUM_TMP);
- hmac->data = tmp;
-}
+#endif
+ switch (hmac->macType) {
+ #ifndef NO_MD5
+ case WC_MD5:
+ wc_Md5Free(&hmac->hash.md5);
+ break;
+ #endif /* !NO_MD5 */
+
+ #ifndef NO_SHA
+ case WC_SHA:
+ wc_ShaFree(&hmac->hash.sha);
+ break;
+ #endif /* !NO_SHA */
+
+ #ifdef WOLFSSL_SHA224
+ case WC_SHA224:
+ wc_Sha224Free(&hmac->hash.sha224);
+ break;
+ #endif /* WOLFSSL_SHA224 */
+ #ifndef NO_SHA256
+ case WC_SHA256:
+ wc_Sha256Free(&hmac->hash.sha256);
+ break;
+ #endif /* !NO_SHA256 */
+
+ #ifdef WOLFSSL_SHA384
+ case WC_SHA384:
+ wc_Sha384Free(&hmac->hash.sha384);
+ break;
+ #endif /* WOLFSSL_SHA384 */
+ #ifdef WOLFSSL_SHA512
+ case WC_SHA512:
+ wc_Sha512Free(&hmac->hash.sha512);
+ break;
+ #endif /* WOLFSSL_SHA512 */
+
+ #ifdef WOLFSSL_SHA3
+ #ifndef WOLFSSL_NOSHA3_224
+ case WC_SHA3_224:
+ wc_Sha3_224_Free(&hmac->hash.sha3);
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_256
+ case WC_SHA3_256:
+ wc_Sha3_256_Free(&hmac->hash.sha3);
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_384
+ case WC_SHA3_384:
+ wc_Sha3_384_Free(&hmac->hash.sha3);
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_512
+ case WC_SHA3_512:
+ wc_Sha3_512_Free(&hmac->hash.sha3);
+ break;
+ #endif
+ #endif /* WOLFSSL_SHA3 */
-static void HmacCaviumSetKey(Hmac* hmac, int type, const byte* key,
- word32 length)
-{
- hmac->macType = (byte)type;
- if (type == MD5)
- hmac->type = MD5_TYPE;
- else if (type == SHA)
- hmac->type = SHA1_TYPE;
- else if (type == SHA256)
- hmac->type = SHA256_TYPE;
- else {
- WOLFSSL_MSG("unsupported cavium hmac type");
+ default:
+ break;
}
- hmac->innerHashKeyed = 0; /* should we key Startup flag */
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC)
+ wolfAsync_DevCtxFree(&hmac->asyncDev, WOLFSSL_ASYNC_MARKER_HMAC);
+#endif /* WOLFSSL_ASYNC_CRYPT */
- hmac->keyLen = (word16)length;
- /* store key in ipad */
- XMEMCPY(hmac->ipad, key, length);
+ switch (hmac->macType) {
+ #ifndef NO_MD5
+ case WC_MD5:
+ wc_Md5Free(&hmac->hash.md5);
+ break;
+ #endif /* !NO_MD5 */
+
+ #ifndef NO_SHA
+ case WC_SHA:
+ wc_ShaFree(&hmac->hash.sha);
+ break;
+ #endif /* !NO_SHA */
+
+ #ifdef WOLFSSL_SHA224
+ case WC_SHA224:
+ wc_Sha224Free(&hmac->hash.sha224);
+ break;
+ #endif /* WOLFSSL_SHA224 */
+ #ifndef NO_SHA256
+ case WC_SHA256:
+ wc_Sha256Free(&hmac->hash.sha256);
+ break;
+ #endif /* !NO_SHA256 */
+
+ #ifdef WOLFSSL_SHA512
+ #ifdef WOLFSSL_SHA384
+ case WC_SHA384:
+ wc_Sha384Free(&hmac->hash.sha384);
+ break;
+ #endif /* WOLFSSL_SHA384 */
+ case WC_SHA512:
+ wc_Sha512Free(&hmac->hash.sha512);
+ break;
+ #endif /* WOLFSSL_SHA512 */
+ }
}
-#endif /* HAVE_CAVIUM */
-
int wolfSSL_GetHmacMaxSize(void)
{
- return MAX_DIGEST_SIZE;
+ return WC_MAX_DIGEST_SIZE;
}
#ifdef HAVE_HKDF
-
-#ifndef WOLFSSL_HAVE_MIN
-#define WOLFSSL_HAVE_MIN
-
- static INLINE word32 min(word32 a, word32 b)
+ /* HMAC-KDF-Extract.
+ * RFC 5869 - HMAC-based Extract-and-Expand Key Derivation Function (HKDF).
+ *
+ * type The hash algorithm type.
+ * salt The optional salt value.
+ * saltSz The size of the salt.
+ * inKey The input keying material.
+ * inKeySz The size of the input keying material.
+ * out The pseudorandom key with the length that of the hash.
+ * returns 0 on success, otherwise failure.
+ */
+ int wc_HKDF_Extract(int type, const byte* salt, word32 saltSz,
+ const byte* inKey, word32 inKeySz, byte* out)
{
- return a > b ? b : a;
- }
-
-#endif /* WOLFSSL_HAVE_MIN */
-
-
-static INLINE int GetHashSizeByType(int type)
-{
- if (!(type == MD5 || type == SHA || type == SHA256 || type == SHA384
- || type == SHA512 || type == BLAKE2B_ID))
- return BAD_FUNC_ARG;
-
- switch (type) {
- #ifndef NO_MD5
- case MD5:
- return MD5_DIGEST_SIZE;
- break;
- #endif
-
- #ifndef NO_SHA
- case SHA:
- return SHA_DIGEST_SIZE;
- break;
- #endif
-
- #ifndef NO_SHA256
- case SHA256:
- return SHA256_DIGEST_SIZE;
- break;
- #endif
-
- #ifdef WOLFSSL_SHA384
- case SHA384:
- return SHA384_DIGEST_SIZE;
- break;
- #endif
-
- #ifdef WOLFSSL_SHA512
- case SHA512:
- return SHA512_DIGEST_SIZE;
- break;
- #endif
-
- #ifdef HAVE_BLAKE2
- case BLAKE2B_ID:
- return BLAKE2B_OUTBYTES;
- break;
- #endif
-
- default:
- return BAD_FUNC_ARG;
- break;
- }
-}
-
-
-/* HMAC-KDF with hash type, optional salt and info, return 0 on success */
-int wc_HKDF(int type, const byte* inKey, word32 inKeySz,
- const byte* salt, word32 saltSz,
- const byte* info, word32 infoSz,
- byte* out, word32 outSz)
-{
- Hmac myHmac;
-#ifdef WOLFSSL_SMALL_STACK
- byte* tmp;
- byte* prk;
-#else
- byte tmp[MAX_DIGEST_SIZE]; /* localSalt helper and T */
- byte prk[MAX_DIGEST_SIZE];
-#endif
- const byte* localSalt; /* either points to user input or tmp */
- int hashSz = GetHashSizeByType(type);
- word32 outIdx = 0;
- byte n = 0x1;
- int ret;
+ byte tmp[WC_MAX_DIGEST_SIZE]; /* localSalt helper */
+ Hmac myHmac;
+ int ret;
+ const byte* localSalt; /* either points to user input or tmp */
+ int hashSz;
+
+ ret = wc_HmacSizeByType(type);
+ if (ret < 0)
+ return ret;
- if (hashSz < 0)
- return BAD_FUNC_ARG;
+ hashSz = ret;
+ localSalt = salt;
+ if (localSalt == NULL) {
+ XMEMSET(tmp, 0, hashSz);
+ localSalt = tmp;
+ saltSz = hashSz;
+ }
-#ifdef WOLFSSL_SMALL_STACK
- tmp = (byte*)XMALLOC(MAX_DIGEST_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (tmp == NULL)
- return MEMORY_E;
+ ret = wc_HmacInit(&myHmac, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_HmacSetKey(&myHmac, type, localSalt, saltSz);
+ if (ret == 0)
+ ret = wc_HmacUpdate(&myHmac, inKey, inKeySz);
+ if (ret == 0)
+ ret = wc_HmacFinal(&myHmac, out);
+ wc_HmacFree(&myHmac);
+ }
- prk = (byte*)XMALLOC(MAX_DIGEST_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (prk == NULL) {
- XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- return MEMORY_E;
+ return ret;
}
-#endif
- localSalt = salt;
- if (localSalt == NULL) {
- XMEMSET(tmp, 0, hashSz);
- localSalt = tmp;
- saltSz = hashSz;
- }
-
- do {
- ret = wc_HmacSetKey(&myHmac, type, localSalt, saltSz);
- if (ret != 0)
- break;
- ret = wc_HmacUpdate(&myHmac, inKey, inKeySz);
- if (ret != 0)
- break;
- ret = wc_HmacFinal(&myHmac, prk);
- } while (0);
+ /* HMAC-KDF-Expand.
+ * RFC 5869 - HMAC-based Extract-and-Expand Key Derivation Function (HKDF).
+ *
+ * type The hash algorithm type.
+ * inKey The input key.
+ * inKeySz The size of the input key.
+ * info The application specific information.
+ * infoSz The size of the application specific information.
+ * out The output keying material.
+ * returns 0 on success, otherwise failure.
+ */
+ int wc_HKDF_Expand(int type, const byte* inKey, word32 inKeySz,
+ const byte* info, word32 infoSz, byte* out, word32 outSz)
+ {
+ byte tmp[WC_MAX_DIGEST_SIZE];
+ Hmac myHmac;
+ int ret = 0;
+ word32 outIdx = 0;
+ word32 hashSz = wc_HmacSizeByType(type);
+ byte n = 0x1;
+
+ ret = wc_HmacInit(&myHmac, NULL, INVALID_DEVID);
+ if (ret != 0)
+ return ret;
- if (ret == 0) {
while (outIdx < outSz) {
int tmpSz = (n == 1) ? 0 : hashSz;
word32 left = outSz - outIdx;
- ret = wc_HmacSetKey(&myHmac, type, prk, hashSz);
+ ret = wc_HmacSetKey(&myHmac, type, inKey, inKeySz);
if (ret != 0)
break;
ret = wc_HmacUpdate(&myHmac, tmp, tmpSz);
@@ -856,24 +1240,51 @@ int wc_HKDF(int type, const byte* inKey, word32 inKeySz,
if (ret != 0)
break;
- left = min(left, (word32)hashSz);
+ left = min(left, hashSz);
XMEMCPY(out+outIdx, tmp, left);
outIdx += hashSz;
n++;
}
+
+ wc_HmacFree(&myHmac);
+
+ return ret;
}
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(prk, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+ /* HMAC-KDF.
+ * RFC 5869 - HMAC-based Extract-and-Expand Key Derivation Function (HKDF).
+ *
+ * type The hash algorithm type.
+ * inKey The input keying material.
+ * inKeySz The size of the input keying material.
+ * salt The optional salt value.
+ * saltSz The size of the salt.
+ * info The application specific information.
+ * infoSz The size of the application specific information.
+ * out The output keying material.
+ * returns 0 on success, otherwise failure.
+ */
+ int wc_HKDF(int type, const byte* inKey, word32 inKeySz,
+ const byte* salt, word32 saltSz,
+ const byte* info, word32 infoSz,
+ byte* out, word32 outSz)
+ {
+ byte prk[WC_MAX_DIGEST_SIZE];
+ int hashSz = wc_HmacSizeByType(type);
+ int ret;
- return ret;
-}
+ if (hashSz < 0)
+ return BAD_FUNC_ARG;
+
+ ret = wc_HKDF_Extract(type, salt, saltSz, inKey, inKeySz, prk);
+ if (ret != 0)
+ return ret;
+
+ return wc_HKDF_Expand(type, prk, hashSz, info, infoSz, out, outSz);
+ }
#endif /* HAVE_HKDF */
#endif /* HAVE_FIPS */
#endif /* NO_HMAC */
-
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/idea.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/idea.c
new file mode 100644
index 000000000..600c90654
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/idea.c
@@ -0,0 +1,303 @@
+/* idea.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#ifdef HAVE_IDEA
+
+#include <wolfssl/wolfcrypt/idea.h>
+
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/logging.h>
+
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+/* multiplication of x and y modulo 2^16+1
+ * IDEA specify a special case when an entry value is 0 ( x or y)
+ * then it must be replaced by 2^16
+ */
+static WC_INLINE word16 idea_mult(word16 x, word16 y)
+{
+ long mul, res;
+
+ mul = (long)x * (long)y;
+ if (mul) {
+ res = (mul & IDEA_MASK) - ((word32)mul >> 16);
+ if (res <= 0)
+ res += IDEA_MODULO;
+
+ return (word16) (res & IDEA_MASK);
+ }
+
+ if (!x)
+ return ((IDEA_MODULO - y) & IDEA_MASK);
+
+ /* !y */
+ return ((IDEA_MODULO - x) & IDEA_MASK);
+}
+
+/* compute 1/a modulo 2^16+1 using Extended euclidean algorithm
+ * adapted from fp_invmod */
+static WC_INLINE word16 idea_invmod(word16 x)
+{
+ int u, v, b, d;
+
+ if (x <= 1)
+ return x;
+
+ u = IDEA_MODULO;
+ v = x;
+ d = 1;
+ b = 0;
+
+ do {
+ while (!(u & 1)) {
+ u >>= 1;
+ if (b & 1)
+ b -= IDEA_MODULO;
+ b >>= 1;
+ }
+
+ while (!(v & 1)) {
+ v >>= 1;
+ if (d & 1) {
+ d -= IDEA_MODULO;
+ }
+ d >>= 1;
+ }
+
+ if (u >= v) {
+ u -= v;
+ b -= d;
+ } else {
+ v -= u;
+ d -= b;
+ }
+ } while (u != 0);
+
+ /* d is now the inverse, put positive value if required */
+ while (d < 0)
+ d += IDEA_MODULO;
+
+ /* d must be < IDEA_MODULO */
+ while (d >= (int)IDEA_MODULO)
+ d -= IDEA_MODULO;
+
+ return (word16)(d & IDEA_MASK);
+}
+
+/* generate the 52 16-bits key sub-blocks from the 128 key */
+int wc_IdeaSetKey(Idea *idea, const byte* key, word16 keySz,
+ const byte *iv, int dir)
+{
+ word16 idx = 0;
+ word32 t;
+ short i;
+
+ if (idea == NULL || key == NULL || keySz != IDEA_KEY_SIZE ||
+ (dir != IDEA_ENCRYPTION && dir != IDEA_DECRYPTION))
+ return BAD_FUNC_ARG;
+
+ /* initial key schedule for 0 -> 7 */
+ for (i = 0; i < IDEA_ROUNDS; i++) {
+ idea->skey[i] = (word16)key[idx++] << 8;
+ idea->skey[i] |= (word16)key[idx++];
+ }
+
+ /* shift phase key schedule for 8 -> 51 */
+ for (i = IDEA_ROUNDS; i < IDEA_SK_NUM; i++) {
+ t = (word32)idea->skey[((i+1) & 7) ? i-7 : i-15] << 9;
+ t |= (word32)idea->skey[((i+2) & 7) < 2 ? i-14 : i-6] >> 7;
+ idea->skey[i] = (word16)(t & IDEA_MASK);
+ }
+
+ /* compute decryption key from encryption key */
+ if (dir == IDEA_DECRYPTION) {
+ word16 enckey[IDEA_SK_NUM];
+
+ /* put encryption key in tmp buffer */
+ XMEMCPY(enckey, idea->skey, sizeof(idea->skey));
+
+ idx = 0;
+
+ idea->skey[6*IDEA_ROUNDS] = idea_invmod(enckey[idx++]);
+ idea->skey[6*IDEA_ROUNDS+1] = (IDEA_2EXP16 - enckey[idx++]) & IDEA_MASK;
+ idea->skey[6*IDEA_ROUNDS+2] = (IDEA_2EXP16 - enckey[idx++]) & IDEA_MASK;
+ idea->skey[6*IDEA_ROUNDS+3] = idea_invmod(enckey[idx++]);
+
+ for (i = 6*(IDEA_ROUNDS-1); i >= 0; i -= 6) {
+ idea->skey[i+4] = enckey[idx++];
+ idea->skey[i+5] = enckey[idx++];
+
+ idea->skey[i] = idea_invmod(enckey[idx++]);
+ if (i) {
+ idea->skey[i+2] = (IDEA_2EXP16 - enckey[idx++]) & IDEA_MASK;
+ idea->skey[i+1] = (IDEA_2EXP16 - enckey[idx++]) & IDEA_MASK;
+ }
+ else {
+ idea->skey[1] = (IDEA_2EXP16 - enckey[idx++]) & IDEA_MASK;
+ idea->skey[2] = (IDEA_2EXP16 - enckey[idx++]) & IDEA_MASK;
+ }
+
+ idea->skey[i+3] = idea_invmod(enckey[idx++]);
+ }
+
+ /* erase temporary buffer */
+ ForceZero(enckey, sizeof(enckey));
+ }
+
+ /* set the iv */
+ return wc_IdeaSetIV(idea, iv);
+}
+
+/* set the IV in the Idea key structure */
+int wc_IdeaSetIV(Idea *idea, const byte* iv)
+{
+ if (idea == NULL)
+ return BAD_FUNC_ARG;
+
+ if (iv != NULL)
+ XMEMCPY(idea->reg, iv, IDEA_BLOCK_SIZE);
+ else
+ XMEMSET(idea->reg, 0, IDEA_BLOCK_SIZE);
+
+ return 0;
+}
+
+/* encryption/decryption for a block (64 bits)
+ */
+int wc_IdeaCipher(Idea *idea, byte* out, const byte* in)
+{
+ word32 t1, t2;
+ word16 i, skey_idx = 0, idx = 0;
+ word16 x[4];
+
+ if (idea == NULL || out == NULL || in == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* put input byte block in word16 */
+ for (i = 0; i < IDEA_BLOCK_SIZE/2; i++) {
+ x[i] = (word16)in[idx++] << 8;
+ x[i] |= (word16)in[idx++];
+ }
+
+ for (i = 0; i < IDEA_ROUNDS; i++) {
+ x[0] = idea_mult(x[0], idea->skey[skey_idx++]);
+ x[1] = ((word32)x[1] + (word32)idea->skey[skey_idx++]) & IDEA_MASK;
+ x[2] = ((word32)x[2] + (word32)idea->skey[skey_idx++]) & IDEA_MASK;
+ x[3] = idea_mult(x[3], idea->skey[skey_idx++]);
+
+ t2 = x[0] ^ x[2];
+ t2 = idea_mult((word16)t2, idea->skey[skey_idx++]);
+ t1 = (t2 + (x[1] ^ x[3])) & IDEA_MASK;
+ t1 = idea_mult((word16)t1, idea->skey[skey_idx++]);
+ t2 = (t1 + t2) & IDEA_MASK;
+
+ x[0] ^= t1;
+ x[3] ^= t2;
+
+ t2 ^= x[1];
+ x[1] = x[2] ^ (word16)t1;
+ x[2] = (word16)t2;
+ }
+
+ x[0] = idea_mult(x[0], idea->skey[skey_idx++]);
+ out[0] = (x[0] >> 8) & 0xFF;
+ out[1] = x[0] & 0xFF;
+
+ x[2] = ((word32)x[2] + (word32)idea->skey[skey_idx++]) & IDEA_MASK;
+ out[2] = (x[2] >> 8) & 0xFF;
+ out[3] = x[2] & 0xFF;
+
+ x[1] = ((word32)x[1] + (word32)idea->skey[skey_idx++]) & IDEA_MASK;
+ out[4] = (x[1] >> 8) & 0xFF;
+ out[5] = x[1] & 0xFF;
+
+ x[3] = idea_mult(x[3], idea->skey[skey_idx++]);
+ out[6] = (x[3] >> 8) & 0xFF;
+ out[7] = x[3] & 0xFF;
+
+ return 0;
+}
+
+int wc_IdeaCbcEncrypt(Idea *idea, byte* out, const byte* in, word32 len)
+{
+ int blocks;
+ int ret;
+
+ if (idea == NULL || out == NULL || in == NULL)
+ return BAD_FUNC_ARG;
+
+ blocks = len / IDEA_BLOCK_SIZE;
+ while (blocks--) {
+ xorbuf((byte*)idea->reg, in, IDEA_BLOCK_SIZE);
+ ret = wc_IdeaCipher(idea, (byte*)idea->reg, (byte*)idea->reg);
+ if (ret != 0) {
+ return ret;
+ }
+
+ XMEMCPY(out, idea->reg, IDEA_BLOCK_SIZE);
+
+ out += IDEA_BLOCK_SIZE;
+ in += IDEA_BLOCK_SIZE;
+ }
+
+ return 0;
+}
+
+int wc_IdeaCbcDecrypt(Idea *idea, byte* out, const byte* in, word32 len)
+{
+ int blocks;
+ int ret;
+
+ if (idea == NULL || out == NULL || in == NULL)
+ return BAD_FUNC_ARG;
+
+ blocks = len / IDEA_BLOCK_SIZE;
+ while (blocks--) {
+ XMEMCPY((byte*)idea->tmp, in, IDEA_BLOCK_SIZE);
+ ret = wc_IdeaCipher(idea, out, (byte*)idea->tmp);
+ if (ret != 0) {
+ return ret;
+ }
+
+ xorbuf(out, (byte*)idea->reg, IDEA_BLOCK_SIZE);
+ XMEMCPY(idea->reg, idea->tmp, IDEA_BLOCK_SIZE);
+
+ out += IDEA_BLOCK_SIZE;
+ in += IDEA_BLOCK_SIZE;
+ }
+
+ return 0;
+}
+
+#endif /* HAVE_IDEA */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/include.am b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/include.am
index 299921579..bba761bc1 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/include.am
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/include.am
@@ -1,9 +1,13 @@
# vim:ft=automake
# All paths should be given relative to the root
+EXTRA_DIST += src/bio.c
EXTRA_DIST += wolfcrypt/src/misc.c
+EXTRA_DIST += wolfcrypt/src/evp.c
EXTRA_DIST += wolfcrypt/src/asm.c
EXTRA_DIST += wolfcrypt/src/aes_asm.asm
+EXTRA_DIST += wolfcrypt/src/wc_dsp.c
+EXTRA_DIST += wolfcrypt/src/sp_dsp32.c
EXTRA_DIST += \
wolfcrypt/src/ecc_fp.c \
@@ -37,12 +41,87 @@ EXTRA_DIST += \
wolfcrypt/src/fp_sqr_comba_7.i \
wolfcrypt/src/fp_sqr_comba_8.i \
wolfcrypt/src/fp_sqr_comba_9.i \
- wolfcrypt/src/fp_sqr_comba_small_set.i
+ wolfcrypt/src/fp_sqr_comba_small_set.i \
+ wolfcrypt/src/fe_x25519_128.i
EXTRA_DIST += wolfcrypt/src/port/ti/ti-aes.c \
wolfcrypt/src/port/ti/ti-des3.c \
wolfcrypt/src/port/ti/ti-hash.c \
wolfcrypt/src/port/ti/ti-ccm.c \
- wolfcrypt/src/port/pic32/pic32mz-hash.c
+ wolfcrypt/src/port/pic32/pic32mz-crypt.c \
+ wolfcrypt/src/port/nrf51.c \
+ wolfcrypt/src/port/arm/armv8-aes.c \
+ wolfcrypt/src/port/arm/armv8-sha256.c \
+ wolfcrypt/src/port/arm/armv8-chacha.c \
+ wolfcrypt/src/port/arm/armv8-curve25519.c \
+ wolfcrypt/src/port/arm/armv8-32-curve25519.c \
+ wolfcrypt/src/port/arm/armv8-sha512-asm.c \
+ wolfcrypt/src/port/arm/armv8-32-sha512-asm.c \
+ wolfcrypt/src/port/nxp/ksdk_port.c \
+ wolfcrypt/src/port/atmel/README.md \
+ wolfcrypt/src/port/xilinx/xil-sha3.c \
+ wolfcrypt/src/port/xilinx/xil-aesgcm.c \
+ wolfcrypt/src/port/caam/caam_aes.c \
+ wolfcrypt/src/port/caam/caam_driver.c \
+ wolfcrypt/src/port/caam/caam_init.c \
+ wolfcrypt/src/port/caam/caam_sha.c \
+ wolfcrypt/src/port/caam/caam_doc.pdf \
+ wolfcrypt/src/port/st/stm32.c \
+ wolfcrypt/src/port/st/stsafe.c \
+ wolfcrypt/src/port/st/README.md \
+ wolfcrypt/src/port/af_alg/afalg_aes.c \
+ wolfcrypt/src/port/af_alg/afalg_hash.c \
+ wolfcrypt/src/port/devcrypto/devcrypto_hash.c \
+ wolfcrypt/src/port/devcrypto/wc_devcrypto.c \
+ wolfcrypt/src/port/devcrypto/README.md \
+ wolfcrypt/src/port/mynewt/mynewt_port.c \
+ wolfcrypt/src/port/Espressif/esp32_aes.c \
+ wolfcrypt/src/port/Espressif/esp32_sha.c \
+ wolfcrypt/src/port/Espressif/esp32_util.c \
+ wolfcrypt/src/port/Espressif/esp32_mp.c \
+ wolfcrypt/src/port/Espressif/README.md \
+ wolfcrypt/src/port/arm/cryptoCell.c \
+ wolfcrypt/src/port/arm/cryptoCellHash.c \
+ wolfcrypt/src/port/Renesas/renesas_tsip_aes.c \
+ wolfcrypt/src/port/Renesas/renesas_tsip_sha.c \
+ wolfcrypt/src/port/Renesas/renesas_tsip_util.c \
+ wolfcrypt/src/port/Renesas/README.md
+if BUILD_CRYPTOCB
+src_libwolfssl_la_SOURCES += wolfcrypt/src/cryptocb.c
+endif
+
+if BUILD_PKCS11
+src_libwolfssl_la_SOURCES += wolfcrypt/src/wc_pkcs11.c
+endif
+
+if BUILD_DEVCRYPTO
+src_libwolfssl_la_SOURCES += wolfcrypt/src/port/devcrypto/devcrypto_hash.c
+src_libwolfssl_la_SOURCES += wolfcrypt/src/port/devcrypto/devcrypto_aes.c
+src_libwolfssl_la_SOURCES += wolfcrypt/src/port/devcrypto/wc_devcrypto.c
+endif
+
+if BUILD_CAVIUM
+src_libwolfssl_la_SOURCES += wolfcrypt/src/port/cavium/cavium_nitrox.c
+endif
+EXTRA_DIST += wolfcrypt/src/port/cavium/README.md
+
+if BUILD_OCTEON_SYNC
+src_libwolfssl_la_SOURCES += wolfcrypt/src/port/cavium/cavium_octeon_sync.c
+endif
+EXTRA_DIST += wolfcrypt/src/port/cavium/README_Octeon.md
+
+if BUILD_INTEL_QA
+src_libwolfssl_la_SOURCES += wolfcrypt/src/port/intel/quickassist.c
+src_libwolfssl_la_SOURCES += wolfcrypt/src/port/intel/quickassist_mem.c
+endif
+EXTRA_DIST += wolfcrypt/src/port/intel/README.md
+
+if BUILD_INTEL_QA_SYNC
+src_libwolfssl_la_SOURCES += wolfcrypt/src/port/intel/quickassist_sync.c
+endif
+
+if BUILD_CRYPTOAUTHLIB
+src_libwolfssl_la_SOURCES += wolfcrypt/src/port/atmel/atmel.c
+endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/integer.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/integer.c
index b3ce4203e..56d684b46 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/integer.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/integer.c
@@ -1,8 +1,8 @@
/* integer.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,10 +16,11 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
/*
* Based on public domain LibTomMath 0.38 by Tom St Denis, tomstdenis@iahu.ca,
* http://math.libtomcrypt.com
@@ -33,19 +34,70 @@
/* in case user set USE_FAST_MATH there */
#include <wolfssl/wolfcrypt/settings.h>
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
#ifndef NO_BIG_INT
#ifndef USE_FAST_MATH
+#ifndef WOLFSSL_SP_MATH
+
#include <wolfssl/wolfcrypt/integer.h>
-#ifndef NO_WOLFSSL_SMALL_STACK
- #ifndef WOLFSSL_SMALL_STACK
- #define WOLFSSL_SMALL_STACK
+#if defined(FREESCALE_LTC_TFM)
+ #include <wolfssl/wolfcrypt/port/nxp/ksdk_port.h>
+#endif
+#ifdef WOLFSSL_DEBUG_MATH
+ #include <stdio.h>
+#endif
+
+#ifdef SHOW_GEN
+ #ifndef NO_STDIO_FILESYSTEM
+ #include <stdio.h>
#endif
#endif
-static void bn_reverse (unsigned char *s, int len);
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+#ifdef __cplusplus
+ extern "C" {
+#endif
+WOLFSSL_LOCAL int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod,
+ mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod,
+ mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod,
+ mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod,
+ mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod,
+ mp_int* res);
+#ifdef __cplusplus
+ } /* extern "C" */
+#endif
+#endif
+
+/* reverse an array, used for radix code */
+static void
+bn_reverse (unsigned char *s, int len)
+{
+ int ix, iy;
+ unsigned char t;
+
+ ix = 0;
+ iy = len - 1;
+ while (ix < iy) {
+ t = s[ix];
+ s[ix] = s[iy];
+ s[iy] = t;
+ ++ix;
+ --iy;
+ }
+}
/* math settings check */
word32 CheckRunTimeSettings(void)
@@ -60,6 +112,13 @@ int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
{
int res = MP_OKAY;
+ if (a) XMEMSET(a, 0, sizeof(mp_int));
+ if (b) XMEMSET(b, 0, sizeof(mp_int));
+ if (c) XMEMSET(c, 0, sizeof(mp_int));
+ if (d) XMEMSET(d, 0, sizeof(mp_int));
+ if (e) XMEMSET(e, 0, sizeof(mp_int));
+ if (f) XMEMSET(f, 0, sizeof(mp_int));
+
if (a && ((res = mp_init(a)) != MP_OKAY))
return res;
@@ -95,33 +154,28 @@ int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
/* init a new mp_int */
int mp_init (mp_int * a)
{
- int i;
+ /* Safeguard against passing in a null pointer */
+ if (a == NULL)
+ return MP_VAL;
- /* allocate memory required and clear it */
- a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC, 0,
- DYNAMIC_TYPE_BIGINT);
- if (a->dp == NULL) {
- return MP_MEM;
- }
-
- /* set the digits to zero */
- for (i = 0; i < MP_PREC; i++) {
- a->dp[i] = 0;
- }
+ /* defer allocation until mp_grow */
+ a->dp = NULL;
/* set the used to zero, allocated digits to the default precision
* and sign to positive */
a->used = 0;
- a->alloc = MP_PREC;
+ a->alloc = 0;
a->sign = MP_ZPOS;
+#ifdef HAVE_WOLF_BIGINT
+ wc_bigint_init(&a->raw);
+#endif
return MP_OKAY;
}
/* clear one (frees) */
-void
-mp_clear (mp_int * a)
+void mp_clear (mp_int * a)
{
int i;
@@ -136,15 +190,52 @@ mp_clear (mp_int * a)
}
/* free ram */
- XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
+ mp_free(a);
/* reset members to make debugging easier */
- a->dp = NULL;
a->alloc = a->used = 0;
a->sign = MP_ZPOS;
}
}
+void mp_free (mp_int * a)
+{
+ /* only do anything if a hasn't been freed previously */
+ if (a->dp != NULL) {
+ /* free ram */
+ XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
+ a->dp = NULL;
+ }
+
+#ifdef HAVE_WOLF_BIGINT
+ wc_bigint_free(&a->raw);
+#endif
+}
+
+void mp_forcezero(mp_int * a)
+{
+ if (a == NULL)
+ return;
+
+ /* only do anything if a hasn't been freed previously */
+ if (a->dp != NULL) {
+ /* force zero the used digits */
+ ForceZero(a->dp, a->used * sizeof(mp_digit));
+#ifdef HAVE_WOLF_BIGINT
+ wc_bigint_zero(&a->raw);
+#endif
+ /* free ram */
+ mp_free(a);
+
+ /* reset members to make debugging easier */
+ a->alloc = a->used = 0;
+ a->sign = MP_ZPOS;
+ }
+
+ a->sign = MP_ZPOS;
+ a->used = 0;
+}
+
/* get the size for an unsigned equivalent */
int mp_unsigned_bin_size (mp_int * a)
@@ -155,8 +246,7 @@ int mp_unsigned_bin_size (mp_int * a)
/* returns the number of bits in an int */
-int
-mp_count_bits (mp_int * a)
+int mp_count_bits (mp_int * a)
{
int r;
mp_digit q;
@@ -187,11 +277,11 @@ int mp_leading_bit (mp_int * a)
if (mp_init_copy(&t, a) != MP_OKAY)
return 0;
- while (mp_iszero(&t) == 0) {
+ while (mp_iszero(&t) == MP_NO) {
#ifndef MP_8BIT
bit = (t.dp[0] & 0x80) != 0;
#else
- bit = (t.dp[0] | ((t.dp[1] & 0x01) << 7)) & 0x80 != 0;
+ bit = ((t.dp[0] | ((t.dp[1] & 0x01) << 7)) & 0x80) != 0;
#endif
if (mp_div_2d (&t, 8, &t, NULL) != MP_OKAY)
break;
@@ -200,6 +290,22 @@ int mp_leading_bit (mp_int * a)
return bit;
}
+int mp_to_unsigned_bin_at_pos(int x, mp_int *t, unsigned char *b)
+{
+ int res = 0;
+ while (mp_iszero(t) == MP_NO) {
+#ifndef MP_8BIT
+ b[x++] = (unsigned char) (t->dp[0] & 255);
+#else
+ b[x++] = (unsigned char) (t->dp[0] | ((t->dp[1] & 0x01) << 7));
+#endif
+ if ((res = mp_div_2d (t, 8, t, NULL)) != MP_OKAY) {
+ return res;
+ }
+ res = x;
+ }
+ return res;
+}
/* store in unsigned [big endian] format */
int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
@@ -211,49 +317,62 @@ int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
return res;
}
- x = 0;
- while (mp_iszero (&t) == 0) {
-#ifndef MP_8BIT
- b[x++] = (unsigned char) (t.dp[0] & 255);
-#else
- b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7));
-#endif
- if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
- mp_clear (&t);
- return res;
- }
+ x = mp_to_unsigned_bin_at_pos(0, &t, b);
+ if (x < 0) {
+ mp_clear(&t);
+ return x;
}
+
bn_reverse (b, x);
mp_clear (&t);
- return MP_OKAY;
+ return res;
}
+int mp_to_unsigned_bin_len(mp_int * a, unsigned char *b, int c)
+{
+ int i, len;
+
+ len = mp_unsigned_bin_size(a);
+
+ /* pad front w/ zeros to match length */
+ for (i = 0; i < c - len; i++)
+ b[i] = 0x00;
+ return mp_to_unsigned_bin(a, b + i);
+}
/* creates "a" then copies b into it */
int mp_init_copy (mp_int * a, mp_int * b)
{
int res;
- if ((res = mp_init (a)) != MP_OKAY) {
+ if ((res = mp_init_size (a, b->used)) != MP_OKAY) {
return res;
}
- return mp_copy (b, a);
+
+ if((res = mp_copy (b, a)) != MP_OKAY) {
+ mp_clear(a);
+ }
+
+ return res;
}
/* copy, b = a */
-int
-mp_copy (mp_int * a, mp_int * b)
+int mp_copy (mp_int * a, mp_int * b)
{
int res, n;
+ /* Safeguard against passing in a null pointer */
+ if (a == NULL || b == NULL)
+ return MP_VAL;
+
/* if dst == src do nothing */
if (a == b) {
return MP_OKAY;
}
/* grow dest */
- if (b->alloc < a->used) {
+ if (b->alloc < a->used || b->alloc == 0) {
if ((res = mp_grow (b, a->used)) != MP_OKAY) {
return res;
}
@@ -261,7 +380,7 @@ mp_copy (mp_int * a, mp_int * b)
/* zero b and copy the parameters over */
{
- register mp_digit *tmpa, *tmpb;
+ mp_digit *tmpa, *tmpb;
/* pointer aliases */
@@ -277,7 +396,7 @@ mp_copy (mp_int * a, mp_int * b)
}
/* clear high digits */
- for (; n < b->used; n++) {
+ for (; n < b->used && b->dp; n++) {
*tmpb++ = 0;
}
}
@@ -296,7 +415,7 @@ int mp_grow (mp_int * a, int size)
mp_digit *tmp;
/* if the alloc size is smaller alloc more ram */
- if (a->alloc < size) {
+ if (a->alloc < size || size == 0) {
/* ensure there are always at least MP_PREC digits extra on top */
size += (MP_PREC * 2) - (size % MP_PREC);
@@ -306,8 +425,8 @@ int mp_grow (mp_int * a, int size)
* in case the operation failed we don't want
* to overwrite the dp member of a.
*/
- tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, 0,
- DYNAMIC_TYPE_BIGINT);
+ tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, NULL,
+ DYNAMIC_TYPE_BIGINT);
if (tmp == NULL) {
/* reallocation failed but "a" is still valid [can be freed] */
return MP_MEM;
@@ -327,25 +446,6 @@ int mp_grow (mp_int * a, int size)
}
-/* reverse an array, used for radix code */
-void
-bn_reverse (unsigned char *s, int len)
-{
- int ix, iy;
- unsigned char t;
-
- ix = 0;
- iy = len - 1;
- while (ix < iy) {
- t = s[ix];
- s[ix] = s[iy];
- s[iy] = t;
- ++ix;
- --iy;
- }
-}
-
-
/* shift right by a certain bit count (store quotient in c, optional
remainder in d) */
int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
@@ -406,6 +506,9 @@ void mp_zero (mp_int * a)
int n;
mp_digit *tmp;
+ if (a == NULL)
+ return;
+
a->sign = MP_ZPOS;
a->used = 0;
@@ -419,12 +522,11 @@ void mp_zero (mp_int * a)
/* trim unused digits
*
* This is used to ensure that leading zero digits are
- * trimed and the leading "used" digit will be non-zero
+ * trimmed and the leading "used" digit will be non-zero
* Typically very fast. Also fixes the sign if there
* are no more leading digits
*/
-void
-mp_clamp (mp_int * a)
+void mp_clamp (mp_int * a)
{
/* decrease used while the most significant digit is
* zero.
@@ -443,8 +545,7 @@ mp_clamp (mp_int * a)
/* swap the elements of two integers, for cases where you can't simply swap the
* mp_int pointers around
*/
-void
-mp_exch (mp_int * a, mp_int * b)
+void mp_exch (mp_int * a, mp_int * b)
{
mp_int t;
@@ -457,10 +558,12 @@ mp_exch (mp_int * a, mp_int * b)
/* shift right a certain number of bits */
void mp_rshb (mp_int *c, int x)
{
- register mp_digit *tmpc, mask, shift;
+ mp_digit *tmpc, mask, shift;
mp_digit r, rr;
mp_digit D = x;
+ if (mp_iszero(c)) return;
+
/* mask */
mask = (((mp_digit)1) << D) - 1;
@@ -483,6 +586,7 @@ void mp_rshb (mp_int *c, int x)
/* set the carry to the carry bits of the current word found above */
r = rr;
}
+ mp_clamp(c);
}
@@ -503,7 +607,7 @@ void mp_rshd (mp_int * a, int b)
}
{
- register mp_digit *bottom, *top;
+ mp_digit *bottom, *top;
/* shift the digits down */
@@ -539,8 +643,7 @@ void mp_rshd (mp_int * a, int b)
/* calc a value mod 2**b */
-int
-mp_mod_2d (mp_int * a, int b, mp_int * c)
+int mp_mod_2d (mp_int * a, int b, mp_int * c)
{
int x, res;
@@ -637,8 +740,8 @@ int mp_mul_2d (mp_int * a, int b, mp_int * c)
/* shift any bit count < DIGIT_BIT */
d = (mp_digit) (b % DIGIT_BIT);
if (d != 0) {
- register mp_digit *tmpc, shift, mask, r, rr;
- register int x;
+ mp_digit *tmpc, shift, mask, r, rr;
+ int x;
/* bitmask for carries */
mask = (((mp_digit)1) << d) - 1;
@@ -656,7 +759,7 @@ int mp_mul_2d (mp_int * a, int b, mp_int * c)
rr = (*tmpc >> shift) & mask;
/* shift the current word and OR in the carry */
- *tmpc = ((*tmpc << d) | r) & MP_MASK;
+ *tmpc = (mp_digit)(((*tmpc << d) | r) & MP_MASK);
++tmpc;
/* set the carry to the carry bits of the current word */
@@ -691,7 +794,7 @@ int mp_lshd (mp_int * a, int b)
}
{
- register mp_digit *top, *bottom;
+ mp_digit *top, *bottom;
/* increment the used by the shift amount then copy upwards */
a->used += b;
@@ -703,7 +806,7 @@ int mp_lshd (mp_int * a, int b)
bottom = a->dp + a->used - 1 - b;
/* much like mp_rshd this is implemented using a sliding window
- * except the window goes the otherway around. Copying from
+ * except the window goes the other way around. Copying from
* the bottom to the top. see bn_mp_rshd.c for more info.
*/
for (x = a->used - 1; x >= b; x--) {
@@ -722,17 +825,33 @@ int mp_lshd (mp_int * a, int b)
/* this is a shell function that calls either the normal or Montgomery
* exptmod functions. Originally the call to the montgomery code was
- * embedded in the normal function but that wasted alot of stack space
+ * embedded in the normal function but that wasted a lot of stack space
* for nothing (since 99% of the time the Montgomery code would be called)
*/
+#if defined(FREESCALE_LTC_TFM)
+int wolfcrypt_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
+#else
int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
+#endif
{
int dr;
/* modulus P must be positive */
- if (P->sign == MP_NEG) {
+ if (mp_iszero(P) || P->sign == MP_NEG) {
return MP_VAL;
}
+ if (mp_isone(P)) {
+ mp_set(Y, 0);
+ return MP_OKAY;
+ }
+ if (mp_iszero(X)) {
+ mp_set(Y, 1);
+ return MP_OKAY;
+ }
+ if (mp_iszero(G)) {
+ mp_set(Y, 0);
+ return MP_OKAY;
+ }
/* if exponent X is negative we have to recurse */
if (X->sign == MP_NEG) {
@@ -771,6 +890,12 @@ int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
#endif
}
+#ifdef BN_MP_EXPTMOD_BASE_2
+ if (G->used == 1 && G->dp[0] == 2) {
+ return mp_exptmod_base_2(X, P, Y);
+ }
+#endif
+
/* modified diminished radix reduction */
#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \
defined(BN_S_MP_EXPTMOD_C)
@@ -787,6 +912,8 @@ int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
dr = 0;
#endif
+ (void)dr;
+
#ifdef BN_MP_REDUCE_IS_2K_C
/* if not, is it a unrestricted DR modulus? */
if (dr == 0) {
@@ -796,7 +923,7 @@ int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
/* if the modulus is odd or dr != 0 use the montgomery method */
#ifdef BN_MP_EXPTMOD_FAST_C
- if (mp_isodd (P) == 1 || dr != 0) {
+ if (mp_isodd (P) == MP_YES || dr != 0) {
return mp_exptmod_fast (G, X, P, Y, dr);
} else {
#endif
@@ -812,13 +939,17 @@ int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
#endif
}
+int mp_exptmod_ex (mp_int * G, mp_int * X, int digits, mp_int * P, mp_int * Y)
+{
+ (void)digits;
+ return mp_exptmod(G, X, P, Y);
+}
/* b = |a|
*
* Simple function copies the input and fixes the sign to positive
*/
-int
-mp_abs (mp_int * a, mp_int * b)
+int mp_abs (mp_int * a, mp_int * b)
{
int res;
@@ -837,22 +968,28 @@ mp_abs (mp_int * a, mp_int * b)
/* hac 14.61, pp608 */
+#if defined(FREESCALE_LTC_TFM)
+int wolfcrypt_mp_invmod(mp_int * a, mp_int * b, mp_int * c)
+#else
int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
+#endif
{
- /* b cannot be negative */
- if (b->sign == MP_NEG || mp_iszero(b) == 1) {
+ /* b cannot be negative or zero, and can not divide by 0 (1/a mod b) */
+ if (b->sign == MP_NEG || mp_iszero(b) == MP_YES || mp_iszero(a) == MP_YES) {
return MP_VAL;
}
#ifdef BN_FAST_MP_INVMOD_C
/* if the modulus is odd we can use a faster routine instead */
- if (mp_isodd (b) == 1) {
+ if ((mp_isodd(b) == MP_YES) && (mp_cmp_d(b, 1) != MP_EQ)) {
return fast_mp_invmod (a, b, c);
}
#endif
#ifdef BN_MP_INVMOD_SLOW_C
return mp_invmod_slow(a, b, c);
+#else
+ return MP_VAL;
#endif
}
@@ -869,7 +1006,7 @@ int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
int res, neg, loop_check = 0;
/* 2. [modified] b must be odd */
- if (mp_iseven (b) == 1) {
+ if (mp_iseven (b) == MP_YES) {
return MP_VAL;
}
@@ -895,17 +1032,19 @@ int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
if ((res = mp_copy (&y, &v)) != MP_OKAY) {
goto LBL_ERR;
}
- mp_set (&D, 1);
+ if ((res = mp_set (&D, 1)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
top:
/* 4. while u is even do */
- while (mp_iseven (&u) == 1) {
+ while (mp_iseven (&u) == MP_YES) {
/* 4.1 u = u/2 */
if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
goto LBL_ERR;
}
/* 4.2 if B is odd then */
- if (mp_isodd (&B) == 1) {
+ if (mp_isodd (&B) == MP_YES) {
if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
goto LBL_ERR;
}
@@ -917,13 +1056,13 @@ top:
}
/* 5. while v is even do */
- while (mp_iseven (&v) == 1) {
+ while (mp_iseven (&v) == MP_YES) {
/* 5.1 v = v/2 */
if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
goto LBL_ERR;
}
/* 5.2 if D is odd then */
- if (mp_isodd (&D) == 1) {
+ if (mp_isodd (&D) == MP_YES) {
/* D = (D-x)/2 */
if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
goto LBL_ERR;
@@ -957,8 +1096,8 @@ top:
}
/* if not zero goto step 4 */
- if (mp_iszero (&u) == 0) {
- if (++loop_check > 1024) {
+ if (mp_iszero (&u) == MP_NO) {
+ if (++loop_check > MAX_INVMOD_SZ) {
res = MP_VAL;
goto LBL_ERR;
}
@@ -980,6 +1119,12 @@ top:
goto LBL_ERR;
}
}
+ /* too big */
+ while (mp_cmp_mag(&D, b) != MP_LT) {
+ if ((res = mp_sub(&D, b, &D)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ }
mp_exch (&D, c);
c->sign = neg;
res = MP_OKAY;
@@ -1001,31 +1146,42 @@ int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
int res;
/* b cannot be negative */
- if (b->sign == MP_NEG || mp_iszero(b) == 1) {
+ if (b->sign == MP_NEG || mp_iszero(b) == MP_YES) {
return MP_VAL;
}
/* init temps */
if ((res = mp_init_multi(&x, &y, &u, &v,
&A, &B)) != MP_OKAY) {
- return res;
+ return res;
}
/* init rest of tmps temps */
if ((res = mp_init_multi(&C, &D, 0, 0, 0, 0)) != MP_OKAY) {
- return res;
+ mp_clear(&x);
+ mp_clear(&y);
+ mp_clear(&u);
+ mp_clear(&v);
+ mp_clear(&A);
+ mp_clear(&B);
+ return res;
}
/* x = a, y = b */
if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
- goto LBL_ERR;
+ goto LBL_ERR;
+ }
+ if (mp_isone(&x)) {
+ mp_set(c, 1);
+ res = MP_OKAY;
+ goto LBL_ERR;
}
if ((res = mp_copy (b, &y)) != MP_OKAY) {
goto LBL_ERR;
}
/* 2. [modified] if x,y are both even then return an error! */
- if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
+ if (mp_iseven (&x) == MP_YES && mp_iseven (&y) == MP_YES) {
res = MP_VAL;
goto LBL_ERR;
}
@@ -1037,24 +1193,28 @@ int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
if ((res = mp_copy (&y, &v)) != MP_OKAY) {
goto LBL_ERR;
}
- mp_set (&A, 1);
- mp_set (&D, 1);
+ if ((res = mp_set (&A, 1)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
+ if ((res = mp_set (&D, 1)) != MP_OKAY) {
+ goto LBL_ERR;
+ }
top:
/* 4. while u is even do */
- while (mp_iseven (&u) == 1) {
+ while (mp_iseven (&u) == MP_YES) {
/* 4.1 u = u/2 */
if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
goto LBL_ERR;
}
/* 4.2 if A or B is odd then */
- if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
+ if (mp_isodd (&A) == MP_YES || mp_isodd (&B) == MP_YES) {
/* A = (A+y)/2, B = (B-x)/2 */
if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
- goto LBL_ERR;
+ goto LBL_ERR;
}
if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
- goto LBL_ERR;
+ goto LBL_ERR;
}
}
/* A = A/2, B = B/2 */
@@ -1067,19 +1227,19 @@ top:
}
/* 5. while v is even do */
- while (mp_iseven (&v) == 1) {
+ while (mp_iseven (&v) == MP_YES) {
/* 5.1 v = v/2 */
if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
goto LBL_ERR;
}
/* 5.2 if C or D is odd then */
- if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
+ if (mp_isodd (&C) == MP_YES || mp_isodd (&D) == MP_YES) {
/* C = (C+y)/2, D = (D-x)/2 */
if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
- goto LBL_ERR;
+ goto LBL_ERR;
}
if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
- goto LBL_ERR;
+ goto LBL_ERR;
}
}
/* C = C/2, D = D/2 */
@@ -1121,7 +1281,7 @@ top:
}
/* if not zero goto step 4 */
- if (mp_iszero (&u) == 0)
+ if (mp_iszero (&u) == MP_NO)
goto top;
/* now a = C, b = D, gcd == g*v */
@@ -1161,7 +1321,7 @@ LBL_ERR:mp_clear(&x);
}
-/* compare maginitude of two ints (unsigned) */
+/* compare magnitude of two ints (unsigned) */
int mp_cmp_mag (mp_int * a, mp_int * b)
{
int n;
@@ -1197,8 +1357,7 @@ int mp_cmp_mag (mp_int * a, mp_int * b)
/* compare two ints (signed)*/
-int
-mp_cmp (mp_int * a, mp_int * b)
+int mp_cmp (mp_int * a, mp_int * b)
{
/* compare based on sign */
if (a->sign != b->sign) {
@@ -1222,8 +1381,12 @@ mp_cmp (mp_int * a, mp_int * b)
/* compare a digit */
int mp_cmp_d(mp_int * a, mp_digit b)
{
+ /* special case for zero*/
+ if (a->used == 0 && b == 0)
+ return MP_EQ;
+
/* compare based on sign */
- if (a->sign == MP_NEG) {
+ if ((b && a->used == 0) || a->sign == MP_NEG) {
return MP_LT;
}
@@ -1244,22 +1407,38 @@ int mp_cmp_d(mp_int * a, mp_digit b)
/* set to a digit */
-void mp_set (mp_int * a, mp_digit b)
+int mp_set (mp_int * a, mp_digit b)
{
+ int res;
mp_zero (a);
- a->dp[0] = b & MP_MASK;
- a->used = (a->dp[0] != 0) ? 1 : 0;
+ res = mp_grow (a, 1);
+ if (res == MP_OKAY) {
+ a->dp[0] = (mp_digit)(b & MP_MASK);
+ a->used = (a->dp[0] != 0) ? 1 : 0;
+ }
+ return res;
}
+/* check if a bit is set */
+int mp_is_bit_set (mp_int *a, mp_digit b)
+{
+ if ((mp_digit)a->used < b/DIGIT_BIT)
+ return 0;
+
+ return (int)((a->dp[b/DIGIT_BIT] >> b%DIGIT_BIT) & (mp_digit)1);
+}
/* c = a mod b, 0 <= c < b */
-int
-mp_mod (mp_int * a, mp_int * b, mp_int * c)
+#if defined(FREESCALE_LTC_TFM)
+int wolfcrypt_mp_mod(mp_int * a, mp_int * b, mp_int * c)
+#else
+int mp_mod (mp_int * a, mp_int * b, mp_int * c)
+#endif
{
mp_int t;
int res;
- if ((res = mp_init (&t)) != MP_OKAY) {
+ if ((res = mp_init_size (&t, b->used)) != MP_OKAY) {
return res;
}
@@ -1268,11 +1447,11 @@ mp_mod (mp_int * a, mp_int * b, mp_int * c)
return res;
}
- if (t.sign != b->sign) {
- res = mp_add (b, &t, c);
- } else {
+ if ((mp_iszero(&t) != MP_NO) || (t.sign == b->sign)) {
res = MP_OKAY;
mp_exch (&t, c);
+ } else {
+ res = mp_add (b, &t, c);
}
mp_clear (&t);
@@ -1287,7 +1466,7 @@ int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
int res, n, n2;
/* is divisor zero ? */
- if (mp_iszero (b) == 1) {
+ if (mp_iszero (b) == MP_YES) {
return MP_VAL;
}
@@ -1309,8 +1488,9 @@ int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
return res;
}
-
- mp_set(&tq, 1);
+ if ((res = mp_set(&tq, 1)) != MP_OKAY) {
+ return res;
+ }
n = mp_count_bits(a) - mp_count_bits(b);
if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
((res = mp_abs(b, &tb)) != MP_OKAY) ||
@@ -1367,7 +1547,7 @@ int mp_div_2(mp_int * a, mp_int * b)
oldused = b->used;
b->used = a->used;
{
- register mp_digit r, rr, *tmpa, *tmpb;
+ mp_digit r, rr, *tmpa, *tmpb;
/* source alias */
tmpa = a->dp + b->used - 1;
@@ -1403,7 +1583,7 @@ int mp_div_2(mp_int * a, mp_int * b)
/* high level addition (handles signs) */
int mp_add (mp_int * a, mp_int * b, mp_int * c)
{
- int sa, sb, res;
+ int sa, sb, res;
/* get sign of both inputs */
sa = a->sign;
@@ -1433,39 +1613,38 @@ int mp_add (mp_int * a, mp_int * b, mp_int * c)
/* low level addition, based on HAC pp.594, Algorithm 14.7 */
-int
-s_mp_add (mp_int * a, mp_int * b, mp_int * c)
+int s_mp_add (mp_int * a, mp_int * b, mp_int * c)
{
mp_int *x;
- int olduse, res, min, max;
+ int olduse, res, min_ab, max_ab;
/* find sizes, we let |a| <= |b| which means we have to sort
* them. "x" will point to the input with the most digits
*/
if (a->used > b->used) {
- min = b->used;
- max = a->used;
+ min_ab = b->used;
+ max_ab = a->used;
x = a;
} else {
- min = a->used;
- max = b->used;
+ min_ab = a->used;
+ max_ab = b->used;
x = b;
}
/* init result */
- if (c->alloc < max + 1) {
- if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
+ if (c->alloc < max_ab + 1) {
+ if ((res = mp_grow (c, max_ab + 1)) != MP_OKAY) {
return res;
}
}
/* get old used digit count and set new one */
olduse = c->used;
- c->used = max + 1;
+ c->used = max_ab + 1;
{
- register mp_digit u, *tmpa, *tmpb, *tmpc;
- register int i;
+ mp_digit u, *tmpa, *tmpb, *tmpc;
+ int i;
/* alias for digit pointers */
@@ -1480,7 +1659,7 @@ s_mp_add (mp_int * a, mp_int * b, mp_int * c)
/* zero the carry */
u = 0;
- for (i = 0; i < min; i++) {
+ for (i = 0; i < min_ab; i++) {
/* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
*tmpc = *tmpa++ + *tmpb++ + u;
@@ -1494,8 +1673,8 @@ s_mp_add (mp_int * a, mp_int * b, mp_int * c)
/* now copy higher words if any, that is in A+B
* if A or B has more digits add those in
*/
- if (min != max) {
- for (; i < max; i++) {
+ if (min_ab != max_ab) {
+ for (; i < max_ab; i++) {
/* T[i] = X[i] + U */
*tmpc = x->dp[i] + u;
@@ -1510,7 +1689,7 @@ s_mp_add (mp_int * a, mp_int * b, mp_int * c)
/* add carry */
*tmpc++ = u;
- /* clear digits above oldused */
+ /* clear digits above olduse */
for (i = c->used; i < olduse; i++) {
*tmpc++ = 0;
}
@@ -1522,27 +1701,31 @@ s_mp_add (mp_int * a, mp_int * b, mp_int * c)
/* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
-int
-s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
+int s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
{
- int olduse, res, min, max;
+ int olduse, res, min_b, max_a;
/* find sizes */
- min = b->used;
- max = a->used;
+ min_b = b->used;
+ max_a = a->used;
/* init result */
- if (c->alloc < max) {
- if ((res = mp_grow (c, max)) != MP_OKAY) {
+ if (c->alloc < max_a) {
+ if ((res = mp_grow (c, max_a)) != MP_OKAY) {
return res;
}
}
+
+ /* sanity check on destination */
+ if (c->dp == NULL)
+ return MP_VAL;
+
olduse = c->used;
- c->used = max;
+ c->used = max_a;
{
- register mp_digit u, *tmpa, *tmpb, *tmpc;
- register int i;
+ mp_digit u, *tmpa, *tmpb, *tmpc;
+ int i;
/* alias for digit pointers */
tmpa = a->dp;
@@ -1551,7 +1734,7 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
/* set carry to zero */
u = 0;
- for (i = 0; i < min; i++) {
+ for (i = 0; i < min_b; i++) {
/* T[i] = A[i] - B[i] - U */
*tmpc = *tmpa++ - *tmpb++ - u;
@@ -1567,7 +1750,7 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
}
/* now copy higher words if any, e.g. if A has more digits than B */
- for (; i < max; i++) {
+ for (; i < max_a; i++) {
/* T[i] = A[i] - U */
*tmpc = *tmpa++ - u;
@@ -1590,8 +1773,7 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
/* high level subtraction (handles signs) */
-int
-mp_sub (mp_int * a, mp_int * b, mp_int * c)
+int mp_sub (mp_int * a, mp_int * b, mp_int * c)
{
int sa, sb, res;
@@ -1725,7 +1907,7 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
mp_digit buf, mp;
int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
#ifdef WOLFSSL_SMALL_STACK
- mp_int* M = NULL;
+ mp_int* M;
#else
mp_int M[TAB_SIZE];
#endif
@@ -1733,11 +1915,11 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
* one of many reduction algorithms without modding the guts of
* the code with if statements everywhere.
*/
- int (*redux)(mp_int*,mp_int*,mp_digit);
+ int (*redux)(mp_int*,mp_int*,mp_digit) = NULL;
#ifdef WOLFSSL_SMALL_STACK
M = (mp_int*) XMALLOC(sizeof(mp_int) * TAB_SIZE, NULL,
- DYNAMIC_TYPE_TMP_BUFFER);
+ DYNAMIC_TYPE_BIGINT);
if (M == NULL)
return MP_MEM;
#endif
@@ -1768,9 +1950,9 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
/* init M array */
/* init first cell */
- if ((err = mp_init(&M[1])) != MP_OKAY) {
+ if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
#endif
return err;
@@ -1778,14 +1960,14 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
/* now init the second half of the array */
for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
- if ((err = mp_init(&M[x])) != MP_OKAY) {
+ if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) {
for (y = 1<<(winsize-1); y < x; y++) {
mp_clear (&M[y]);
}
mp_clear(&M[1]);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
#endif
return err;
@@ -1807,7 +1989,7 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
/* automatically pick the comba one if available (saves quite a few
calls/ifs) */
#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
- if (((P->used * 2 + 1) < MP_WARRAY) &&
+ if (((P->used * 2 + 1) < (int)MP_WARRAY) &&
P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
redux = fast_mp_montgomery_reduce;
} else
@@ -1816,9 +1998,6 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
#ifdef BN_MP_MONTGOMERY_REDUCE_C
/* use slower baseline Montgomery method */
redux = mp_montgomery_reduce;
-#else
- err = MP_VAL;
- goto LBL_M;
#endif
}
} else if (redmode == 1) {
@@ -1826,9 +2005,6 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
/* setup DR reduction for moduli of the form B**k - b */
mp_dr_setup(P, &mp);
redux = mp_dr_reduce;
-#else
- err = MP_VAL;
- goto LBL_M;
#endif
} else {
#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
@@ -1837,14 +2013,16 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
goto LBL_M;
}
redux = mp_reduce_2k;
-#else
+#endif
+ }
+
+ if (redux == NULL) {
err = MP_VAL;
goto LBL_M;
-#endif
}
/* setup result */
- if ((err = mp_init (&res)) != MP_OKAY) {
+ if ((err = mp_init_size (&res, P->alloc)) != MP_OKAY) {
goto LBL_M;
}
@@ -1861,17 +2039,19 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
goto LBL_RES;
}
-#else
- err = MP_VAL;
- goto LBL_RES;
-#endif
/* now set M[1] to G * R mod m */
if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
goto LBL_RES;
}
+#else
+ err = MP_VAL;
+ goto LBL_RES;
+#endif
} else {
- mp_set(&res, 1);
+ if ((err = mp_set(&res, 1)) != MP_OKAY) {
+ goto LBL_RES;
+ }
if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
goto LBL_RES;
}
@@ -1883,7 +2063,8 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
}
for (x = 0; x < (winsize - 1); x++) {
- if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
+ if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
+ &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
goto LBL_RES;
}
if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, mp)) != MP_OKAY) {
@@ -2024,16 +2205,179 @@ LBL_M:
}
#ifdef WOLFSSL_SMALL_STACK
- XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+
+ return err;
+}
+
+#ifdef BN_MP_EXPTMOD_BASE_2
+#if DIGIT_BIT < 16
+ #define WINSIZE 3
+#elif DIGIT_BIT < 32
+ #define WINSIZE 4
+#elif DIGIT_BIT < 64
+ #define WINSIZE 5
+#elif DIGIT_BIT < 128
+ #define WINSIZE 6
+#endif
+int mp_exptmod_base_2(mp_int * X, mp_int * P, mp_int * Y)
+{
+ mp_digit buf, mp;
+ int err = MP_OKAY, bitbuf, bitcpy, bitcnt, digidx, x, y;
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int *res = NULL;
+#else
+ mp_int res[1];
+#endif
+ int (*redux)(mp_int*,mp_int*,mp_digit) = NULL;
+
+ /* automatically pick the comba one if available (saves quite a few
+ calls/ifs) */
+#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
+ if (((P->used * 2 + 1) < (int)MP_WARRAY) &&
+ P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
+ redux = fast_mp_montgomery_reduce;
+ } else
#endif
+ {
+#ifdef BN_MP_MONTGOMERY_REDUCE_C
+ /* use slower baseline Montgomery method */
+ redux = mp_montgomery_reduce;
+#else
+ return MP_VAL;
+#endif
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ res = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (res == NULL) {
+ return MP_MEM;
+ }
+#endif
+
+ /* now setup montgomery */
+ if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) {
+ goto LBL_M;
+ }
+
+ /* setup result */
+ if ((err = mp_init(res)) != MP_OKAY) {
+ goto LBL_M;
+ }
+
+ /* now we need R mod m */
+ if ((err = mp_montgomery_calc_normalization(res, P)) != MP_OKAY) {
+ goto LBL_RES;
+ }
+
+ /* Get the top bits left over after taking WINSIZE bits starting at the
+ * least-significant.
+ */
+ digidx = X->used - 1;
+ bitcpy = (X->used * DIGIT_BIT) % WINSIZE;
+ if (bitcpy > 0) {
+ bitcnt = (int)DIGIT_BIT - bitcpy;
+ buf = X->dp[digidx--];
+ bitbuf = (int)(buf >> bitcnt);
+ /* Multiply montgomery representation of 1 by 2 ^ top */
+ err = mp_mul_2d(res, bitbuf, res);
+ if (err != MP_OKAY) {
+ goto LBL_RES;
+ }
+ err = mp_mod(res, P, res);
+ if (err != MP_OKAY) {
+ goto LBL_RES;
+ }
+ /* Move out bits used */
+ buf <<= bitcpy;
+ bitcnt++;
+ }
+ else {
+ bitcnt = 1;
+ buf = 0;
+ }
+
+ /* empty window and reset */
+ bitbuf = 0;
+ bitcpy = 0;
+
+ for (;;) {
+ /* grab next digit as required */
+ if (--bitcnt == 0) {
+ /* if digidx == -1 we are out of digits so break */
+ if (digidx == -1) {
+ break;
+ }
+ /* read next digit and reset bitcnt */
+ buf = X->dp[digidx--];
+ bitcnt = (int)DIGIT_BIT;
+ }
+
+ /* grab the next msb from the exponent */
+ y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
+ buf <<= (mp_digit)1;
+ /* add bit to the window */
+ bitbuf |= (y << (WINSIZE - ++bitcpy));
+
+ if (bitcpy == WINSIZE) {
+ /* ok window is filled so square as required and multiply */
+ /* square first */
+ for (x = 0; x < WINSIZE; x++) {
+ err = mp_sqr(res, res);
+ if (err != MP_OKAY) {
+ goto LBL_RES;
+ }
+ err = (*redux)(res, P, mp);
+ if (err != MP_OKAY) {
+ goto LBL_RES;
+ }
+ }
+
+ /* then multiply by 2^bitbuf */
+ err = mp_mul_2d(res, bitbuf, res);
+ if (err != MP_OKAY) {
+ goto LBL_RES;
+ }
+ err = mp_mod(res, P, res);
+ if (err != MP_OKAY) {
+ goto LBL_RES;
+ }
+
+ /* empty window and reset */
+ bitcpy = 0;
+ bitbuf = 0;
+ }
+ }
+
+ /* fixup result if Montgomery reduction is used
+ * recall that any value in a Montgomery system is
+ * actually multiplied by R mod n. So we have
+ * to reduce one more time to cancel out the factor
+ * of R.
+ */
+ err = (*redux)(res, P, mp);
+ if (err != MP_OKAY) {
+ goto LBL_RES;
+ }
+
+ /* swap res with Y */
+ mp_copy(res, Y);
+LBL_RES:mp_clear (res);
+LBL_M:
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
return err;
}
+#undef WINSIZE
+#endif /* BN_MP_EXPTMOD_BASE_2 */
+
/* setups the montgomery reduction stuff */
-int
-mp_montgomery_setup (mp_int * n, mp_digit * rho)
+int mp_montgomery_setup (mp_int * n, mp_digit * rho)
{
mp_digit x, b;
@@ -2065,7 +2409,7 @@ mp_montgomery_setup (mp_int * n, mp_digit * rho)
/* rho = -1/m mod b */
/* TAO, switched mp_word casts to mp_digit to shut up compiler */
- *rho = (((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK;
+ *rho = (mp_digit)((((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK);
return MP_OKAY;
}
@@ -2099,7 +2443,7 @@ int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
}
#ifdef WOLFSSL_SMALL_STACK
- W = (mp_word*)XMALLOC(sizeof(mp_word) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
+ W = (mp_word*)XMALLOC(sizeof(mp_word) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
if (W == NULL)
return MP_MEM;
#endif
@@ -2108,8 +2452,8 @@ int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
* an array of double precision words W[...]
*/
{
- register mp_word *_W;
- register mp_digit *tmpx;
+ mp_word *_W;
+ mp_digit *tmpx;
/* alias for the W[] array */
_W = W;
@@ -2138,13 +2482,13 @@ int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
* by casting the value down to a mp_digit. Note this requires
* that W[ix-1] have the carry cleared (see after the inner loop)
*/
- register mp_digit mu;
+ mp_digit mu;
mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
/* a = a + mu * m * b**i
*
* This is computed in place and on the fly. The multiplication
- * by b**i is handled by offseting which columns the results
+ * by b**i is handled by offsetting which columns the results
* are added to.
*
* Note the comba method normally doesn't handle carries in the
@@ -2156,9 +2500,9 @@ int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
* first m->used words of W[] have the carries fixed
*/
{
- register int iy;
- register mp_digit *tmpn;
- register mp_word *_W;
+ int iy;
+ mp_digit *tmpn;
+ mp_word *_W;
/* alias for the digits of the modulus */
tmpn = n->dp;
@@ -2181,8 +2525,8 @@ int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
* significant digits we zeroed].
*/
{
- register mp_digit *tmpx;
- register mp_word *_W, *_W1;
+ mp_digit *tmpx;
+ mp_word *_W, *_W1;
/* nox fix rest of carries */
@@ -2213,7 +2557,7 @@ int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
*tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
}
- /* zero oldused digits, if the input a was larger than
+ /* zero olduse digits, if the input a was larger than
* m->used+1 we'll have to clear the digits
*/
for (; ix < olduse; ix++) {
@@ -2226,7 +2570,7 @@ int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
mp_clamp (x);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
+ XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
#endif
/* if A >= m then A = A - m */
@@ -2238,8 +2582,7 @@ int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
/* computes xR**-1 == x (mod N) via Montgomery Reduction */
-int
-mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
+int mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
{
int ix, res, digs;
mp_digit mu;
@@ -2251,7 +2594,7 @@ mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
* are fixed up in the inner loop.
*/
digs = n->used * 2 + 1;
- if ((digs < MP_WARRAY) &&
+ if ((digs < (int)MP_WARRAY) &&
n->used <
(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
return fast_mp_montgomery_reduce (x, n, rho);
@@ -2278,9 +2621,9 @@ mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
/* a = a + mu * m * b**i */
{
- register int iy;
- register mp_digit *tmpn, *tmpx, u;
- register mp_word r;
+ int iy;
+ mp_digit *tmpn, *tmpx, u;
+ mp_word r;
/* alias for digits of the modulus */
tmpn = n->dp;
@@ -2360,8 +2703,7 @@ void mp_dr_setup(mp_int *a, mp_digit *d)
*
* Input x must be in the range 0 <= x <= (n-1)**2
*/
-int
-mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
+int mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
{
int err, i, m;
mp_word r;
@@ -2413,7 +2755,9 @@ top:
* Each successive "recursion" makes the input smaller and smaller.
*/
if (mp_cmp_mag (x, n) != MP_LT) {
- s_mp_sub(x, n, x);
+ if ((err = s_mp_sub(x, n, x)) != MP_OKAY) {
+ return err;
+ }
goto top;
}
return MP_OKAY;
@@ -2450,7 +2794,9 @@ top:
}
if (mp_cmp_mag(a, n) != MP_LT) {
- s_mp_sub(a, n, a);
+ if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
+ goto ERR;
+ }
goto top;
}
@@ -2487,37 +2833,49 @@ int mp_reduce_2k_setup(mp_int *a, mp_digit *d)
}
-/* computes a = 2**b
- *
- * Simple algorithm which zeroes the int, grows it then just sets one bit
- * as required.
- */
-int
-mp_2expt (mp_int * a, int b)
+/* set the b bit of a */
+int mp_set_bit (mp_int * a, int b)
{
- int res;
+ int i = b / DIGIT_BIT, res;
- /* zero a as per default */
- mp_zero (a);
+ /*
+ * Require:
+ * bit index b >= 0
+ * a->alloc == a->used == 0 if a->dp == NULL
+ */
+ if (b < 0 || (a->dp == NULL && (a->alloc != 0 || a->used != 0)))
+ return MP_VAL;
- /* grow a to accomodate the single bit */
- if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
- return res;
- }
+ if (a->dp == NULL || a->used < (int)(i + 1)) {
+ /* grow a to accommodate the single bit */
+ if ((res = mp_grow (a, i + 1)) != MP_OKAY) {
+ return res;
+ }
- /* set the used count of where the bit will go */
- a->used = b / DIGIT_BIT + 1;
+ /* set the used count of where the bit will go */
+ a->used = (int)(i + 1);
+ }
- /* put the single bit in its place */
- a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
+ /* put the single bit in its place */
+ a->dp[i] |= ((mp_digit)1) << (b % DIGIT_BIT);
- return MP_OKAY;
+ return MP_OKAY;
}
+/* computes a = 2**b
+ *
+ * Simple algorithm which zeros the int, set the required bit
+ */
+int mp_2expt (mp_int * a, int b)
+{
+ /* zero a as per default */
+ mp_zero (a);
+
+ return mp_set_bit(a, b);
+}
/* multiply by a digit */
-int
-mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
+int mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
{
mp_digit u, *tmpa, *tmpc;
mp_word r;
@@ -2575,35 +2933,78 @@ mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
/* d = a * b (mod c) */
+#if defined(FREESCALE_LTC_TFM)
+int wolfcrypt_mp_mulmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
+#else
int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
+#endif
{
int res;
mp_int t;
- if ((res = mp_init (&t)) != MP_OKAY) {
+ if ((res = mp_init_size (&t, c->used)) != MP_OKAY) {
return res;
}
- if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
- mp_clear (&t);
+ res = mp_mul (a, b, &t);
+ if (res == MP_OKAY) {
+ res = mp_mod (&t, c, d);
+ }
+
+ mp_clear (&t);
+ return res;
+}
+
+
+/* d = a - b (mod c) */
+int mp_submod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
+{
+ int res;
+ mp_int t;
+
+ if ((res = mp_init (&t)) != MP_OKAY) {
return res;
}
- res = mp_mod (&t, c, d);
+
+ res = mp_sub (a, b, &t);
+ if (res == MP_OKAY) {
+ res = mp_mod (&t, c, d);
+ }
+
mp_clear (&t);
+
return res;
}
+/* d = a + b (mod c) */
+int mp_addmod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
+{
+ int res;
+ mp_int t;
+
+ if ((res = mp_init (&t)) != MP_OKAY) {
+ return res;
+ }
+
+ res = mp_add (a, b, &t);
+ if (res == MP_OKAY) {
+ res = mp_mod (&t, c, d);
+ }
+
+ mp_clear (&t);
+
+ return res;
+}
/* computes b = a*a */
-int
-mp_sqr (mp_int * a, mp_int * b)
+int mp_sqr (mp_int * a, mp_int * b)
{
int res;
{
#ifdef BN_FAST_S_MP_SQR_C
/* can we use the fast comba multiplier? */
- if ((a->used * 2 + 1) < MP_WARRAY &&
+ if ((a->used * 2 + 1) < (int)MP_WARRAY &&
a->used <
(1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
res = fast_s_mp_sqr (a, b);
@@ -2621,12 +3022,17 @@ mp_sqr (mp_int * a, mp_int * b)
/* high level multiplication (handles sign) */
+#if defined(FREESCALE_LTC_TFM)
+int wolfcrypt_mp_mul(mp_int *a, mp_int *b, mp_int *c)
+#else
int mp_mul (mp_int * a, mp_int * b, mp_int * c)
+#endif
{
int res, neg;
neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
{
+#ifdef BN_FAST_S_MP_MUL_DIGS_C
/* can we use the fast multiplier?
*
* The fast multiplier can be used if the output will
@@ -2635,8 +3041,7 @@ int mp_mul (mp_int * a, mp_int * b, mp_int * c)
*/
int digs = a->used + b->used + 1;
-#ifdef BN_FAST_S_MP_MUL_DIGS_C
- if ((digs < MP_WARRAY) &&
+ if ((digs < (int)MP_WARRAY) &&
MIN(a->used, b->used) <=
(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
res = fast_s_mp_mul_digs (a, b, c, digs);
@@ -2659,7 +3064,7 @@ int mp_mul_2(mp_int * a, mp_int * b)
{
int x, res, oldused;
- /* grow to accomodate result */
+ /* grow to accommodate result */
if (b->alloc < a->used + 1) {
if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
return res;
@@ -2670,7 +3075,7 @@ int mp_mul_2(mp_int * a, mp_int * b)
b->used = a->used;
{
- register mp_digit r, rr, *tmpa, *tmpb;
+ mp_digit r, rr, *tmpa, *tmpb;
/* alias for source */
tmpa = a->dp;
@@ -2688,7 +3093,7 @@ int mp_mul_2(mp_int * a, mp_int * b)
rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
/* now shift up this digit, add in the carry [from the previous] */
- *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
+ *tmpb++ = (mp_digit)(((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK);
/* copy the carry that would be from the source
* digit into the next iteration
@@ -2717,8 +3122,7 @@ int mp_mul_2(mp_int * a, mp_int * b)
/* divide by three (based on routine from MPI and the GMP manual) */
-int
-mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
+int mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
{
mp_int q;
mp_word w, t;
@@ -2783,7 +3187,7 @@ int mp_init_size (mp_int * a, int size)
size += (MP_PREC * 2) - (size % MP_PREC);
/* alloc mem */
- a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, 0,
+ a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, NULL,
DYNAMIC_TYPE_BIGINT);
if (a->dp == NULL) {
return MP_MEM;
@@ -2793,6 +3197,9 @@ int mp_init_size (mp_int * a, int size)
a->used = 0;
a->alloc = size;
a->sign = MP_ZPOS;
+#ifdef HAVE_WOLF_BIGINT
+ wc_bigint_init(&a->raw);
+#endif
/* zero the digits */
for (x = 0; x < size; x++) {
@@ -2832,11 +3239,11 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
}
}
- if (pa > MP_WARRAY)
+ if (pa > (int)MP_WARRAY)
return MP_RANGE; /* TAO range check */
#ifdef WOLFSSL_SMALL_STACK
- W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
+ W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
if (W == NULL)
return MP_MEM;
#endif
@@ -2859,7 +3266,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
tmpx = a->dp + tx;
tmpy = a->dp + ty;
- /* this is the number of times the loop will iterrate, essentially
+ /* this is the number of times the loop will iterate, essentially
while (tx++ < a->used && ty-- >= 0) { ... }
*/
iy = MIN(a->used-tx, ty+1);
@@ -2898,7 +3305,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
mp_digit *tmpb;
tmpb = b->dp;
for (ix = 0; ix < pa; ix++) {
- *tmpb++ = W[ix] & MP_MASK;
+ *tmpb++ = (mp_digit)(W[ix] & MP_MASK);
}
/* clear unused digits [that existed in the old copy of c] */
@@ -2909,7 +3316,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
mp_clamp (b);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
+ XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
#endif
return MP_OKAY;
@@ -2940,7 +3347,7 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
#else
mp_digit W[MP_WARRAY];
#endif
- register mp_word _W;
+ mp_word _W;
/* grow the destination as required */
if (c->alloc < digs) {
@@ -2951,11 +3358,11 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
/* number of output digits to produce */
pa = MIN(digs, a->used + b->used);
- if (pa > MP_WARRAY)
+ if (pa > (int)MP_WARRAY)
return MP_RANGE; /* TAO range check */
#ifdef WOLFSSL_SMALL_STACK
- W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
+ W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
if (W == NULL)
return MP_MEM;
#endif
@@ -2975,7 +3382,7 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
tmpx = a->dp + tx;
tmpy = b->dp + ty;
- /* this is the number of times the loop will iterrate, essentially
+ /* this is the number of times the loop will iterate, essentially
while (tx++ < a->used && ty-- >= 0) { ... }
*/
iy = MIN(a->used-tx, ty+1);
@@ -2987,7 +3394,7 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
}
/* store term */
- W[ix] = ((mp_digit)_W) & MP_MASK;
+ W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
/* make next carry */
_W = _W >> ((mp_word)DIGIT_BIT);
@@ -2998,9 +3405,9 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
c->used = pa;
{
- register mp_digit *tmpc;
+ mp_digit *tmpc;
tmpc = c->dp;
- for (ix = 0; ix < pa+1; ix++) {
+ for (ix = 0; ix < pa; ix++) { /* JRB, +1 could read uninitialized data */
/* now extract the previous digit [below the carry] */
*tmpc++ = W[ix];
}
@@ -3013,7 +3420,7 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
mp_clamp (c);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
+ XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
#endif
return MP_OKAY;
@@ -3084,7 +3491,7 @@ int s_mp_sqr (mp_int * a, mp_int * b)
}
-/* multiplies |a| * |b| and only computes upto digs digits of result
+/* multiplies |a| * |b| and only computes up to digs digits of result
* HAC pp. 595, Algorithm 14.12 Modified so you can control how
* many digits of output are created.
*/
@@ -3097,7 +3504,7 @@ int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
mp_digit tmpx, *tmpt, *tmpy;
/* can we use the fast multiplier? */
- if (((digs) < MP_WARRAY) &&
+ if ((digs < (int)MP_WARRAY) &&
MIN (a->used, b->used) <
(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
return fast_s_mp_mul_digs (a, b, c, digs);
@@ -3157,8 +3564,8 @@ int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
/*
* shifts with subtractions when the result is greater than b.
*
- * The method is slightly modified to shift B unconditionally upto just under
- * the leading bit of b. This saves alot of multiple precision shifting.
+ * The method is slightly modified to shift B unconditionally up to just under
+ * the leading bit of b. This saves a lot of multiple precision shifting.
*/
int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
{
@@ -3168,15 +3575,17 @@ int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
bits = mp_count_bits (b) % DIGIT_BIT;
if (b->used > 1) {
- if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
+ if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1))
+ != MP_OKAY) {
return res;
}
} else {
- mp_set(a, 1);
+ if ((res = mp_set(a, 1)) != MP_OKAY) {
+ return res;
+ }
bits = 1;
}
-
/* now compute C = A * B mod b */
for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
@@ -3312,7 +3721,9 @@ int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
if ((err = mp_init (&res)) != MP_OKAY) {
goto LBL_MU;
}
- mp_set (&res, 1);
+ if ((err = mp_set (&res, 1)) != MP_OKAY) {
+ goto LBL_MU;
+ }
/* set initial mode and bit cnt */
mode = 0;
@@ -3427,7 +3838,7 @@ LBL_M:
/* pre-calculate the value required for Barrett reduction
- * For a given modulus "b" it calulates the value required in "a"
+ * For a given modulus "b" it calculates the value required in "a"
*/
int mp_reduce_setup (mp_int * a, mp_int * b)
{
@@ -3499,7 +3910,8 @@ int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
/* If x < 0, add b**(k+1) to it */
if (mp_cmp_d (x, 0) == MP_LT) {
- mp_set (&q, 1);
+ if ((res = mp_set (&q, 1)) != MP_OKAY)
+ goto CLEANUP;
if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
goto CLEANUP;
if ((res = mp_add (x, &q, x)) != MP_OKAY)
@@ -3551,7 +3963,9 @@ top:
}
if (mp_cmp_mag(a, n) != MP_LT) {
- s_mp_sub(a, n, a);
+ if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
+ goto ERR;
+ }
goto top;
}
@@ -3588,8 +4002,7 @@ ERR:
/* multiplies |a| * |b| and does not compute the lower digs digits
* [meant to get the higher part of the product]
*/
-int
-s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
+int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
{
mp_int t;
int res, pa, pb, ix, iy;
@@ -3599,8 +4012,9 @@ s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
/* can we use the fast multiplier? */
#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
- if (((a->used + b->used + 1) < MP_WARRAY)
- && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
+ if (((a->used + b->used + 1) < (int)MP_WARRAY)
+ && MIN (a->used, b->used) <
+ (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
return fast_s_mp_mul_high_digs (a, b, c, digs);
}
#endif
@@ -3612,7 +4026,7 @@ s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
pa = a->used;
pb = b->used;
- for (ix = 0; ix < pa; ix++) {
+ for (ix = 0; ix < pa && a->dp; ix++) {
/* clear the carry */
u = 0;
@@ -3665,6 +4079,10 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
#endif
mp_word _W;
+ if (a->dp == NULL) { /* JRB, avoid reading uninitialized values */
+ return MP_VAL;
+ }
+
/* grow the destination as required */
pa = a->used + b->used;
if (c->alloc < pa) {
@@ -3673,11 +4091,11 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
}
}
- if (pa > MP_WARRAY)
+ if (pa > (int)MP_WARRAY)
return MP_RANGE; /* TAO range check */
#ifdef WOLFSSL_SMALL_STACK
- W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
+ W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
if (W == NULL)
return MP_MEM;
#endif
@@ -3685,7 +4103,7 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
/* number of output digits to produce */
pa = a->used + b->used;
_W = 0;
- for (ix = digs; ix < pa; ix++) {
+ for (ix = digs; ix < pa; ix++) { /* JRB, have a->dp check at top of function*/
int tx, ty, iy;
mp_digit *tmpx, *tmpy;
@@ -3697,7 +4115,7 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
tmpx = a->dp + tx;
tmpy = b->dp + ty;
- /* this is the number of times the loop will iterrate, essentially its
+ /* this is the number of times the loop will iterate, essentially its
while (tx++ < a->used && ty-- >= 0) { ... }
*/
iy = MIN(a->used-tx, ty+1);
@@ -3708,7 +4126,7 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
}
/* store term */
- W[ix] = ((mp_digit)_W) & MP_MASK;
+ W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
/* make next carry */
_W = _W >> ((mp_word)DIGIT_BIT);
@@ -3719,10 +4137,10 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
c->used = pa;
{
- register mp_digit *tmpc;
+ mp_digit *tmpc;
tmpc = c->dp + digs;
- for (ix = digs; ix <= pa; ix++) {
+ for (ix = digs; ix < pa; ix++) { /* TAO, <= could potentially overwrite */
/* now extract the previous digit [below the carry] */
*tmpc++ = W[ix];
}
@@ -3735,32 +4153,40 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
mp_clamp (c);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
+ XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
#endif
return MP_OKAY;
}
-/* set a 32-bit const */
+#ifndef MP_SET_CHUNK_BITS
+ #define MP_SET_CHUNK_BITS 4
+#endif
int mp_set_int (mp_int * a, unsigned long b)
{
- int x, res;
+ int x, res;
+
+ /* use direct mp_set if b is less than mp_digit max */
+ if (b < MP_DIGIT_MAX) {
+ return mp_set (a, (mp_digit)b);
+ }
mp_zero (a);
- /* set four bits at a time */
- for (x = 0; x < 8; x++) {
- /* shift the number up four bits */
- if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
+ /* set chunk bits at a time */
+ for (x = 0; x < (int)(sizeof(b) * 8) / MP_SET_CHUNK_BITS; x++) {
+ /* shift the number up chunk bits */
+ if ((res = mp_mul_2d (a, MP_SET_CHUNK_BITS, a)) != MP_OKAY) {
return res;
}
- /* OR in the top four bits of the source */
- a->dp[0] |= (b >> 28) & 15;
+ /* OR in the top bits of the source */
+ a->dp[0] |= (b >> ((sizeof(b) * 8) - MP_SET_CHUNK_BITS)) &
+ ((1 << MP_SET_CHUNK_BITS) - 1);
- /* shift the source up to the next four bits */
- b <<= 4;
+ /* shift the source up to the next chunk bits */
+ b <<= MP_SET_CHUNK_BITS;
/* ensure that digits are not clamped off */
a->used += 1;
@@ -3770,7 +4196,8 @@ int mp_set_int (mp_int * a, unsigned long b)
}
-#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_ECC)
+#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_ECC) || !defined(NO_RSA) || \
+ !defined(NO_DSA) | !defined(NO_DH)
/* c = a * a (mod b) */
int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
@@ -3794,7 +4221,10 @@ int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
#endif
-#if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(WOLFSSL_SNIFFER) || defined(WOLFSSL_HAVE_WOLFSCEP) || defined(WOLFSSL_KEY_GEN)
+#if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(WOLFSSL_SNIFFER) || \
+ defined(WOLFSSL_HAVE_WOLFSCEP) || defined(WOLFSSL_KEY_GEN) || \
+ defined(OPENSSL_EXTRA) || defined(WC_RSA_BLINDING) || \
+ (!defined(NO_RSA) && !defined(NO_RSA_BOUNDS_CHECK))
/* single digit addition */
int mp_add_d (mp_int* a, mp_digit b, mp_int* c)
@@ -3854,7 +4284,7 @@ int mp_add_d (mp_int* a, mp_digit b, mp_int* c)
*tmpc++ &= MP_MASK;
}
/* set final carry */
- if (mu != 0 && ix < c->alloc) {
+ if (ix < c->alloc) {
ix++;
*tmpc++ = mu;
}
@@ -3961,7 +4391,9 @@ int mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
#endif /* defined(HAVE_ECC) || !defined(NO_PWDBASED) */
-#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
+#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(HAVE_ECC) || \
+ defined(DEBUG_WOLFSSL) || !defined(NO_RSA) || !defined(NO_DSA) || \
+ !defined(NO_DH)
static const int lnz[16] = {
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
@@ -3971,16 +4403,17 @@ static const int lnz[16] = {
int mp_cnt_lsb(mp_int *a)
{
int x;
- mp_digit q, qq;
+ mp_digit q = 0, qq;
/* easy out */
- if (mp_iszero(a) == 1) {
+ if (mp_iszero(a) == MP_YES) {
return 0;
}
/* scan lower digits until non-zero */
- for (x = 0; x < a->used && a->dp[x] == 0; x++);
- q = a->dp[x];
+ for (x = 0; x < a->used && a->dp[x] == 0; x++) {}
+ if (a->dp)
+ q = a->dp[x];
x *= DIGIT_BIT;
/* now scan this digit until a 1 is found */
@@ -4021,7 +4454,7 @@ static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
mp_int q;
mp_word w;
mp_digit t;
- int res, ix;
+ int res = MP_OKAY, ix;
/* cannot divide by zero */
if (b == 0) {
@@ -4029,7 +4462,7 @@ static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
}
/* quick outs */
- if (b == 1 || mp_iszero(a) == 1) {
+ if (b == 1 || mp_iszero(a) == MP_YES) {
if (d != NULL) {
*d = 0;
}
@@ -4058,12 +4491,21 @@ static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
#endif
/* no easy answer [c'est la vie]. Just division */
- if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
- return res;
+ if (c != NULL) {
+ if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
+ return res;
+ }
+
+ q.used = a->used;
+ q.sign = a->sign;
+ }
+ else {
+ if ((res = mp_init(&q)) != MP_OKAY) {
+ return res;
+ }
}
- q.used = a->used;
- q.sign = a->sign;
+
w = 0;
for (ix = a->used - 1; ix >= 0; ix--) {
w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
@@ -4074,7 +4516,8 @@ static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
} else {
t = 0;
}
- q.dp[ix] = (mp_digit)t;
+ if (c != NULL)
+ q.dp[ix] = (mp_digit)t;
}
if (d != NULL) {
@@ -4096,11 +4539,11 @@ int mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
return mp_div_d(a, b, NULL, c);
}
-#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) */
+#endif /* WOLFSSL_KEY_GEN || HAVE_COMP_KEY || HAVE_ECC || DEBUG_WOLFSSL */
-#ifdef WOLFSSL_KEY_GEN
+#if defined(WOLFSSL_KEY_GEN) || !defined(NO_DH) || !defined(NO_DSA) || !defined(NO_RSA)
-const mp_digit ltm_prime_tab[] = {
+const mp_digit ltm_prime_tab[PRIME_SIZE] = {
0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
@@ -4189,9 +4632,30 @@ static int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
if ((err = mp_init (&y)) != MP_OKAY) {
goto LBL_R;
}
- if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
- goto LBL_Y;
- }
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+#ifndef WOLFSSL_SP_NO_2048
+ if (mp_count_bits(a) == 1024)
+ err = sp_ModExp_1024(b, &r, a, &y);
+ else if (mp_count_bits(a) == 2048)
+ err = sp_ModExp_2048(b, &r, a, &y);
+ else
+#endif
+#ifndef WOLFSSL_SP_NO_3072
+ if (mp_count_bits(a) == 1536)
+ err = sp_ModExp_1536(b, &r, a, &y);
+ else if (mp_count_bits(a) == 3072)
+ err = sp_ModExp_3072(b, &r, a, &y);
+ else
+#endif
+#ifdef WOLFSSL_SP_4096
+ if (mp_count_bits(a) == 4096)
+ err = sp_ModExp_4096(b, &r, a, &y);
+ else
+#endif
+#endif
+ err = mp_exptmod (b, &r, a, &y);
+ if (err != MP_OKAY)
+ goto LBL_Y;
/* if y != 1 and y != n1 do */
if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
@@ -4254,7 +4718,6 @@ static int mp_prime_is_divisible (mp_int * a, int *result)
return MP_OKAY;
}
-
/*
* Sets result to 1 if probably prime, 0 otherwise
*/
@@ -4271,10 +4734,15 @@ int mp_prime_is_prime (mp_int * a, int t, int *result)
return MP_VAL;
}
+ if (mp_isone(a)) {
+ *result = MP_NO;
+ return MP_OKAY;
+ }
+
/* is the input equal to one of the primes in the table? */
for (ix = 0; ix < PRIME_SIZE; ix++) {
if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
- *result = 1;
+ *result = MP_YES;
return MP_OKAY;
}
}
@@ -4296,7 +4764,9 @@ int mp_prime_is_prime (mp_int * a, int t, int *result)
for (ix = 0; ix < t; ix++) {
/* set the prime */
- mp_set (&b, ltm_prime_tab[ix]);
+ if ((err = mp_set (&b, ltm_prime_tab[ix])) != MP_OKAY) {
+ goto LBL_B;
+ }
if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
goto LBL_B;
@@ -4314,6 +4784,177 @@ LBL_B:mp_clear (&b);
}
+/*
+ * Sets result to 1 if probably prime, 0 otherwise
+ */
+int mp_prime_is_prime_ex (mp_int * a, int t, int *result, WC_RNG *rng)
+{
+ mp_int b, c;
+ int ix, err, res;
+ byte* base = NULL;
+ word32 baseSz = 0;
+
+ /* default to no */
+ *result = MP_NO;
+
+ /* valid value of t? */
+ if (t <= 0 || t > PRIME_SIZE) {
+ return MP_VAL;
+ }
+
+ if (mp_isone(a)) {
+ *result = MP_NO;
+ return MP_OKAY;
+ }
+
+ /* is the input equal to one of the primes in the table? */
+ for (ix = 0; ix < PRIME_SIZE; ix++) {
+ if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
+ *result = MP_YES;
+ return MP_OKAY;
+ }
+ }
+
+ /* first perform trial division */
+ if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
+ return err;
+ }
+
+ /* return if it was trivially divisible */
+ if (res == MP_YES) {
+ return MP_OKAY;
+ }
+
+ /* now perform the miller-rabin rounds */
+ if ((err = mp_init (&b)) != MP_OKAY) {
+ return err;
+ }
+ if ((err = mp_init (&c)) != MP_OKAY) {
+ mp_clear(&b);
+ return err;
+ }
+
+ baseSz = mp_count_bits(a);
+ baseSz = (baseSz / 8) + ((baseSz % 8) ? 1 : 0);
+
+ base = (byte*)XMALLOC(baseSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (base == NULL) {
+ err = MP_MEM;
+ goto LBL_B;
+ }
+
+ if ((err = mp_sub_d(a, 2, &c)) != MP_OKAY) {
+ goto LBL_B;
+ }
+
+ /* now do a miller rabin with up to t random numbers, this should
+ * give a (1/4)^t chance of a false prime. */
+ for (ix = 0; ix < t; ix++) {
+ /* Set a test candidate. */
+ if ((err = wc_RNG_GenerateBlock(rng, base, baseSz)) != 0) {
+ goto LBL_B;
+ }
+
+ if ((err = mp_read_unsigned_bin(&b, base, baseSz)) != MP_OKAY) {
+ goto LBL_B;
+ }
+
+ if (mp_cmp_d(&b, 2) != MP_GT || mp_cmp(&b, &c) != MP_LT) {
+ ix--;
+ continue;
+ }
+
+ if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
+ goto LBL_B;
+ }
+
+ if (res == MP_NO) {
+ goto LBL_B;
+ }
+ }
+
+ /* passed the test */
+ *result = MP_YES;
+LBL_B:mp_clear (&b);
+ mp_clear (&c);
+ XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ return err;
+}
+
+#endif /* WOLFSSL_KEY_GEN NO_DH NO_DSA NO_RSA */
+
+#ifdef WOLFSSL_KEY_GEN
+
+static const int USE_BBS = 1;
+
+int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
+{
+ int err, res, type;
+ byte* buf;
+
+ if (N == NULL || rng == NULL)
+ return MP_VAL;
+
+ /* get type */
+ if (len < 0) {
+ type = USE_BBS;
+ len = -len;
+ } else {
+ type = 0;
+ }
+
+ /* allow sizes between 2 and 512 bytes for a prime size */
+ if (len < 2 || len > 512) {
+ return MP_VAL;
+ }
+
+ /* allocate buffer to work with */
+ buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_RSA);
+ if (buf == NULL) {
+ return MP_MEM;
+ }
+ XMEMSET(buf, 0, len);
+
+ do {
+#ifdef SHOW_GEN
+ printf(".");
+ fflush(stdout);
+#endif
+ /* generate value */
+ err = wc_RNG_GenerateBlock(rng, buf, len);
+ if (err != 0) {
+ XFREE(buf, heap, DYNAMIC_TYPE_RSA);
+ return err;
+ }
+
+ /* munge bits */
+ buf[0] |= 0x80 | 0x40;
+ buf[len-1] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
+
+ /* load value */
+ if ((err = mp_read_unsigned_bin(N, buf, len)) != MP_OKAY) {
+ XFREE(buf, heap, DYNAMIC_TYPE_RSA);
+ return err;
+ }
+
+ /* test */
+ /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
+ * of a 1024-bit candidate being a false positive, when it is our
+ * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
+ * Using 8 because we've always used 8. */
+ if ((err = mp_prime_is_prime_ex(N, 8, &res, rng)) != MP_OKAY) {
+ XFREE(buf, heap, DYNAMIC_TYPE_RSA);
+ return err;
+ }
+ } while (res == MP_NO);
+
+ XMEMSET(buf, 0, len);
+ XFREE(buf, heap, DYNAMIC_TYPE_RSA);
+
+ return MP_OKAY;
+}
+
+
/* computes least common multiple as |a*b|/(a, b) */
int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
{
@@ -4411,7 +5052,7 @@ int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
}
}
- while (mp_iszero(&v) == 0) {
+ while (mp_iszero(&v) == MP_NO) {
/* make sure v is the largest */
if (mp_cmp_mag(&u, &v) == MP_GT) {
/* swap u and v to make sure v is >= u */
@@ -4435,21 +5076,24 @@ int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
}
c->sign = MP_ZPOS;
res = MP_OKAY;
-LBL_V:mp_clear (&u);
-LBL_U:mp_clear (&v);
+LBL_V:mp_clear (&v);
+LBL_U:mp_clear (&u);
return res;
}
-
-
#endif /* WOLFSSL_KEY_GEN */
-#ifdef HAVE_ECC
+#if !defined(NO_DSA) || defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || \
+ defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) || \
+ defined(DEBUG_WOLFSSL) || defined(OPENSSL_EXTRA)
/* chars used in radix conversions */
-const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
+const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\
+ abcdefghijklmnopqrstuvwxyz+/";
+#endif
+#if !defined(NO_DSA) || defined(HAVE_ECC)
/* read a string [ASCII] in a given radix */
int mp_read_radix (mp_int * a, const char *str, int radix)
{
@@ -4460,7 +5104,7 @@ int mp_read_radix (mp_int * a, const char *str, int radix)
mp_zero(a);
/* make sure the radix is ok */
- if (radix < 2 || radix > 64) {
+ if (radix < MP_RADIX_BIN || radix > MP_RADIX_MAX) {
return MP_VAL;
}
@@ -4478,12 +5122,12 @@ int mp_read_radix (mp_int * a, const char *str, int radix)
mp_zero (a);
/* process each digit of the string */
- while (*str) {
- /* if the radix < 36 the conversion is case insensitive
+ while (*str != '\0') {
+ /* if the radix <= 36 the conversion is case insensitive
* this allows numbers like 1AB and 1ab to represent the same value
* [e.g. in hex]
*/
- ch = (char) ((radix < 36) ? XTOUPPER((unsigned char)*str) : *str);
+ ch = (radix <= 36) ? (char)XTOUPPER((unsigned char)*str) : *str;
for (y = 0; y < 64; y++) {
if (ch == mp_s_rmap[y]) {
break;
@@ -4507,16 +5151,170 @@ int mp_read_radix (mp_int * a, const char *str, int radix)
++str;
}
+ /* if digit in isn't null term, then invalid character was found */
+ if (*str != '\0') {
+ mp_zero (a);
+ return MP_VAL;
+ }
+
/* set the sign only if a != 0 */
- if (mp_iszero(a) != 1) {
+ if (mp_iszero(a) != MP_YES) {
a->sign = neg;
}
return MP_OKAY;
}
+#endif /* !defined(NO_DSA) || defined(HAVE_ECC) */
+
+#ifdef WC_MP_TO_RADIX
+
+/* returns size of ASCII representation */
+int mp_radix_size (mp_int *a, int radix, int *size)
+{
+ int res, digs;
+ mp_int t;
+ mp_digit d;
+
+ *size = 0;
+
+ /* special case for binary */
+ if (radix == MP_RADIX_BIN) {
+ *size = mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1;
+ return MP_OKAY;
+ }
+
+ /* make sure the radix is in range */
+ if (radix < MP_RADIX_BIN || radix > MP_RADIX_MAX) {
+ return MP_VAL;
+ }
+
+ if (mp_iszero(a) == MP_YES) {
+ *size = 2;
+ return MP_OKAY;
+ }
+
+ /* digs is the digit count */
+ digs = 0;
+
+ /* if it's negative add one for the sign */
+ if (a->sign == MP_NEG) {
+ ++digs;
+ }
+
+ /* init a copy of the input */
+ if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
+ return res;
+ }
+
+ /* force temp to positive */
+ t.sign = MP_ZPOS;
+
+ /* fetch out all of the digits */
+ while (mp_iszero (&t) == MP_NO) {
+ if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
+ mp_clear (&t);
+ return res;
+ }
+ ++digs;
+ }
+ mp_clear (&t);
+
+ /* return digs + 1, the 1 is for the NULL byte that would be required. */
+ *size = digs + 1;
+ return MP_OKAY;
+}
+
+/* stores a bignum as a ASCII string in a given radix (2..64) */
+int mp_toradix (mp_int *a, char *str, int radix)
+{
+ int res, digs;
+ mp_int t;
+ mp_digit d;
+ char *_s = str;
+
+ /* check range of the radix */
+ if (radix < MP_RADIX_BIN || radix > MP_RADIX_MAX) {
+ return MP_VAL;
+ }
+
+ /* quick out if its zero */
+ if (mp_iszero(a) == MP_YES) {
+ *str++ = '0';
+ *str = '\0';
+ return MP_OKAY;
+ }
+
+ if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
+ return res;
+ }
+
+ /* if it is negative output a - */
+ if (t.sign == MP_NEG) {
+ ++_s;
+ *str++ = '-';
+ t.sign = MP_ZPOS;
+ }
+
+ digs = 0;
+ while (mp_iszero (&t) == MP_NO) {
+ if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
+ mp_clear (&t);
+ return res;
+ }
+ *str++ = mp_s_rmap[d];
+ ++digs;
+ }
+#ifndef WC_DISABLE_RADIX_ZERO_PAD
+ /* For hexadecimal output, add zero padding when number of digits is odd */
+ if ((digs & 1) && (radix == 16)) {
+ *str++ = mp_s_rmap[0];
+ ++digs;
+ }
+#endif
+ /* reverse the digits of the string. In this case _s points
+ * to the first digit [excluding the sign] of the number]
+ */
+ bn_reverse ((unsigned char *)_s, digs);
+
+ /* append a NULL so the string is properly terminated */
+ *str = '\0';
+
+ mp_clear (&t);
+ return MP_OKAY;
+}
+
+#ifdef WOLFSSL_DEBUG_MATH
+void mp_dump(const char* desc, mp_int* a, byte verbose)
+{
+ char *buffer;
+ int size = a->alloc;
+
+ buffer = (char*)XMALLOC(size * sizeof(mp_digit) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (buffer == NULL) {
+ return;
+ }
+
+ printf("%s: ptr=%p, used=%d, sign=%d, size=%d, mpd=%d\n",
+ desc, a, a->used, a->sign, size, (int)sizeof(mp_digit));
+
+ mp_tohex(a, buffer);
+ printf(" %s\n ", buffer);
+
+ if (verbose) {
+ int i;
+ for(i=0; i<a->alloc * (int)sizeof(mp_digit); i++) {
+ printf("%02x ", *(((byte*)a->dp) + i));
+ }
+ printf("\n");
+ }
+
+ XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+}
+#endif /* WOLFSSL_DEBUG_MATH */
-#endif /* HAVE_ECC */
+#endif /* WC_MP_TO_RADIX */
+
+#endif /* WOLFSSL_SP_MATH */
#endif /* USE_FAST_MATH */
#endif /* NO_BIG_INT */
-
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/logging.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/logging.c
index 321530616..0c818aa71 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/logging.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/logging.c
@@ -1,8 +1,8 @@
/* logging.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,62 +16,145 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <wolfssl/wolfcrypt/settings.h>
-/* submitted by eof */
-
#include <wolfssl/wolfcrypt/logging.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
-
-
-#ifdef __cplusplus
- extern "C" {
+#if defined(OPENSSL_EXTRA) && !defined(WOLFCRYPT_ONLY)
+/* avoid adding WANT_READ and WANT_WRITE to error queue */
+#include <wolfssl/error-ssl.h>
#endif
- WOLFSSL_API int wolfSSL_Debugging_ON(void);
- WOLFSSL_API void wolfSSL_Debugging_OFF(void);
-#ifdef __cplusplus
- }
+
+#if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
+static wolfSSL_Mutex debug_mutex; /* mutex for access to debug structure */
+
+/* accessing any node from the queue should be wrapped in a lock of
+ * debug_mutex */
+static void* wc_error_heap;
+struct wc_error_queue {
+ void* heap; /* the heap hint used with nodes creation */
+ struct wc_error_queue* next;
+ struct wc_error_queue* prev;
+ char error[WOLFSSL_MAX_ERROR_SZ];
+ char file[WOLFSSL_MAX_ERROR_SZ];
+ int value;
+ int line;
+};
+volatile struct wc_error_queue* wc_errors;
+static struct wc_error_queue* wc_current_node;
+static struct wc_error_queue* wc_last_node;
+/* pointer to last node in queue to make insertion O(1) */
#endif
+#ifdef WOLFSSL_FUNC_TIME
+/* WARNING: This code is only to be used for debugging performance.
+ * The code is not thread-safe.
+ * Do not use WOLFSSL_FUNC_TIME in production code.
+ */
+static double wc_func_start[WC_FUNC_COUNT];
+static double wc_func_time[WC_FUNC_COUNT] = { 0, };
+static const char* wc_func_name[WC_FUNC_COUNT] = {
+ "SendHelloRequest",
+ "DoHelloRequest",
+ "SendClientHello",
+ "DoClientHello",
+ "SendServerHello",
+ "DoServerHello",
+ "SendEncryptedExtensions",
+ "DoEncryptedExtensions",
+ "SendCertificateRequest",
+ "DoCertificateRequest",
+ "SendCertificate",
+ "DoCertificate",
+ "SendCertificateVerify",
+ "DoCertificateVerify",
+ "SendFinished",
+ "DoFinished",
+ "SendKeyUpdate",
+ "DoKeyUpdate",
+ "SendEarlyData",
+ "DoEarlyData",
+ "SendNewSessionTicket",
+ "DoNewSessionTicket",
+ "SendServerHelloDone",
+ "DoServerHelloDone",
+ "SendTicket",
+ "DoTicket",
+ "SendClientKeyExchange",
+ "DoClientKeyExchange",
+ "SendCertificateStatus",
+ "DoCertificateStatus",
+ "SendServerKeyExchange",
+ "DoServerKeyExchange",
+ "SendEarlyData",
+ "DoEarlyData",
+};
+
+#include <sys/time.h>
+
+/* WARNING: This function is not portable. */
+static WC_INLINE double current_time(int reset)
+{
+ struct timeval tv;
+ gettimeofday(&tv, 0);
+ (void)reset;
+
+ return (double)tv.tv_sec + (double)tv.tv_usec / 1000000;
+}
+#endif /* WOLFSSL_FUNC_TIME */
#ifdef DEBUG_WOLFSSL
/* Set these to default values initially. */
-static wolfSSL_Logging_cb log_function = 0;
+static wolfSSL_Logging_cb log_function = NULL;
static int loggingEnabled = 0;
+#if defined(WOLFSSL_APACHE_MYNEWT)
+#include "log/log.h"
+static struct log mynewt_log;
+#endif /* WOLFSSL_APACHE_MYNEWT */
+
#endif /* DEBUG_WOLFSSL */
+/* allow this to be set to NULL, so logs can be redirected to default output */
int wolfSSL_SetLoggingCb(wolfSSL_Logging_cb f)
{
#ifdef DEBUG_WOLFSSL
- int res = 0;
-
- if (f)
- log_function = f;
- else
- res = BAD_FUNC_ARG;
-
- return res;
+ log_function = f;
+ return 0;
#else
(void)f;
return NOT_COMPILED_IN;
#endif
}
+/* allow this to be set to NULL, so logs can be redirected to default output */
+wolfSSL_Logging_cb wolfSSL_GetLoggingCb(void)
+{
+#ifdef DEBUG_WOLFSSL
+ return log_function;
+#else
+ return NULL;
+#endif
+}
+
int wolfSSL_Debugging_ON(void)
{
#ifdef DEBUG_WOLFSSL
loggingEnabled = 1;
+#if defined(WOLFSSL_APACHE_MYNEWT)
+ log_register("wolfcrypt", &mynewt_log, &log_console_handler, NULL, LOG_SYSLEVEL);
+#endif /* WOLFSSL_APACHE_MYNEWT */
return 0;
#else
return NOT_COMPILED_IN;
@@ -86,16 +169,72 @@ void wolfSSL_Debugging_OFF(void)
#endif
}
+#ifdef WOLFSSL_FUNC_TIME
+/* WARNING: This code is only to be used for debugging performance.
+ * The code is not thread-safe.
+ * Do not use WOLFSSL_FUNC_TIME in production code.
+ */
+void WOLFSSL_START(int funcNum)
+{
+ double now = current_time(0) * 1000.0;
+#ifdef WOLFSSL_FUNC_TIME_LOG
+ fprintf(stderr, "%17.3f: START - %s\n", now, wc_func_name[funcNum]);
+#endif
+ wc_func_start[funcNum] = now;
+}
+
+void WOLFSSL_END(int funcNum)
+{
+ double now = current_time(0) * 1000.0;
+ wc_func_time[funcNum] += now - wc_func_start[funcNum];
+#ifdef WOLFSSL_FUNC_TIME_LOG
+ fprintf(stderr, "%17.3f: END - %s\n", now, wc_func_name[funcNum]);
+#endif
+}
+
+void WOLFSSL_TIME(int count)
+{
+ int i;
+ double avg, total = 0;
+
+ for (i = 0; i < WC_FUNC_COUNT; i++) {
+ if (wc_func_time[i] > 0) {
+ avg = wc_func_time[i] / count;
+ fprintf(stderr, "%8.3f ms: %s\n", avg, wc_func_name[i]);
+ total += avg;
+ }
+ }
+ fprintf(stderr, "%8.3f ms\n", total);
+}
+#endif
#ifdef DEBUG_WOLFSSL
-#ifdef FREESCALE_MQX
- #include <fio.h>
+#if defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
+ /* see wc_port.h for fio.h and nio.h includes */
+#elif defined(WOLFSSL_SGX)
+ /* Declare sprintf for ocall */
+ int sprintf(char* buf, const char *fmt, ...);
+#elif defined(WOLFSSL_DEOS)
+#elif defined(MICRIUM)
+ #if (BSP_SER_COMM_EN == DEF_ENABLED)
+ #include <bsp_ser.h>
+ #endif
+#elif defined(WOLFSSL_USER_LOG)
+ /* user includes their own headers */
+#elif defined(WOLFSSL_ESPIDF)
+ #include "esp_types.h"
+ #include "esp_log.h"
+#elif defined(WOLFSSL_TELIT_M2MB)
+ #include <stdio.h>
+ #include "m2m_log.h"
+#elif defined(WOLFSSL_ANDROID_DEBUG)
+ #include <android/log.h>
#else
#include <stdio.h> /* for default printf stuff */
#endif
-#ifdef THREADX
+#if defined(THREADX) && !defined(THREADX_NO_DC_PRINTF)
int dc_log_printf(char*, ...);
#endif
@@ -104,37 +243,105 @@ static void wolfssl_log(const int logLevel, const char *const logMessage)
if (log_function)
log_function(logLevel, logMessage);
else {
- if (loggingEnabled) {
-#ifdef THREADX
- dc_log_printf("%s\n", logMessage);
+#if defined(WOLFSSL_USER_LOG)
+ WOLFSSL_USER_LOG(logMessage);
+#elif defined(WOLFSSL_LOG_PRINTF)
+ printf("%s\n", logMessage);
+
+#elif defined(THREADX) && !defined(THREADX_NO_DC_PRINTF)
+ dc_log_printf("%s\n", logMessage);
+#elif defined(WOLFSSL_DEOS)
+ printf("%s\r\n", logMessage);
#elif defined(MICRIUM)
- #if (NET_SECURE_MGR_CFG_EN == DEF_ENABLED)
- NetSecure_TraceOut((CPU_CHAR *)logMessage);
- #endif
+ BSP_Ser_Printf("%s\r\n", logMessage);
#elif defined(WOLFSSL_MDK_ARM)
- fflush(stdout) ;
- printf("%s\n", logMessage);
- fflush(stdout) ;
+ fflush(stdout) ;
+ printf("%s\n", logMessage);
+ fflush(stdout) ;
+#elif defined(WOLFSSL_UTASKER)
+ fnDebugMsg((char*)logMessage);
+ fnDebugMsg("\r\n");
+#elif defined(MQX_USE_IO_OLD)
+ fprintf(_mqxio_stderr, "%s\n", logMessage);
+
+#elif defined(WOLFSSL_APACHE_MYNEWT)
+ LOG_DEBUG(&mynewt_log, LOG_MODULE_DEFAULT, "%s\n", logMessage);
+#elif defined(WOLFSSL_ESPIDF)
+ ESP_LOGI("wolfssl", "%s", logMessage);
+#elif defined(WOLFSSL_ZEPHYR)
+ printk("%s\n", logMessage);
+#elif defined(WOLFSSL_TELIT_M2MB)
+ M2M_LOG_INFO("%s\n", logMessage);
+#elif defined(WOLFSSL_ANDROID_DEBUG)
+ __android_log_print(ANDROID_LOG_VERBOSE, "[wolfSSL]", "%s", logMessage);
#else
- fprintf(stderr, "%s\n", logMessage);
+ fprintf(stderr, "%s\n", logMessage);
#endif
- }
}
}
-
+#ifndef WOLFSSL_DEBUG_ERRORS_ONLY
void WOLFSSL_MSG(const char* msg)
{
if (loggingEnabled)
wolfssl_log(INFO_LOG , msg);
}
+#ifndef LINE_LEN
+#define LINE_LEN 16
+#endif
+void WOLFSSL_BUFFER(const byte* buffer, word32 length)
+{
+ int i, buflen = (int)length, bufidx;
+ char line[(LINE_LEN * 4) + 3]; /* \t00..0F | chars...chars\0 */
+
+ if (!loggingEnabled) {
+ return;
+ }
+
+ if (!buffer) {
+ wolfssl_log(INFO_LOG, "\tNULL");
+ return;
+ }
+
+ while (buflen > 0) {
+ bufidx = 0;
+ XSNPRINTF(&line[bufidx], sizeof(line)-bufidx, "\t");
+ bufidx++;
+
+ for (i = 0; i < LINE_LEN; i++) {
+ if (i < buflen) {
+ XSNPRINTF(&line[bufidx], sizeof(line)-bufidx, "%02x ", buffer[i]);
+ }
+ else {
+ XSNPRINTF(&line[bufidx], sizeof(line)-bufidx, " ");
+ }
+ bufidx += 3;
+ }
+
+ XSNPRINTF(&line[bufidx], sizeof(line)-bufidx, "| ");
+ bufidx++;
+
+ for (i = 0; i < LINE_LEN; i++) {
+ if (i < buflen) {
+ XSNPRINTF(&line[bufidx], sizeof(line)-bufidx,
+ "%c", 31 < buffer[i] && buffer[i] < 127 ? buffer[i] : '.');
+ bufidx++;
+ }
+ }
+
+ wolfssl_log(INFO_LOG, line);
+ buffer += LINE_LEN;
+ buflen -= LINE_LEN;
+ }
+}
+
void WOLFSSL_ENTER(const char* msg)
{
if (loggingEnabled) {
- char buffer[80];
- sprintf(buffer, "wolfSSL Entering %s", msg);
+ char buffer[WOLFSSL_MAX_ERROR_SZ];
+ XSNPRINTF(buffer, sizeof(buffer), "wolfSSL Entering %s", msg);
wolfssl_log(ENTER_LOG , buffer);
}
}
@@ -143,20 +350,494 @@ void WOLFSSL_ENTER(const char* msg)
void WOLFSSL_LEAVE(const char* msg, int ret)
{
if (loggingEnabled) {
- char buffer[80];
- sprintf(buffer, "wolfSSL Leaving %s, return %d", msg, ret);
+ char buffer[WOLFSSL_MAX_ERROR_SZ];
+ XSNPRINTF(buffer, sizeof(buffer), "wolfSSL Leaving %s, return %d",
+ msg, ret);
wolfssl_log(LEAVE_LOG , buffer);
}
}
+WOLFSSL_API int WOLFSSL_IS_DEBUG_ON(void)
+{
+ return loggingEnabled;
+}
+#endif /* !WOLFSSL_DEBUG_ERRORS_ONLY */
+#endif /* DEBUG_WOLFSSL */
+/*
+ * When using OPENSSL_EXTRA or DEBUG_WOLFSSL_VERBOSE macro then WOLFSSL_ERROR is
+ * mapped to new function WOLFSSL_ERROR_LINE which gets the line # and function
+ * name where WOLFSSL_ERROR is called at.
+ */
+#if defined(DEBUG_WOLFSSL) || defined(OPENSSL_ALL) || \
+ defined(WOLFSSL_NGINX) || defined(WOLFSSL_HAPROXY) || \
+ defined(OPENSSL_EXTRA)
+
+#if (defined(OPENSSL_EXTRA) && !defined(_WIN32) && !defined(NO_ERROR_QUEUE)) \
+ || defined(DEBUG_WOLFSSL_VERBOSE)
+void WOLFSSL_ERROR_LINE(int error, const char* func, unsigned int line,
+ const char* file, void* usrCtx)
+#else
void WOLFSSL_ERROR(int error)
+#endif
{
- if (loggingEnabled) {
- char buffer[80];
- sprintf(buffer, "wolfSSL error occured, error = %d", error);
- wolfssl_log(ERROR_LOG , buffer);
+#ifdef WOLFSSL_ASYNC_CRYPT
+ if (error != WC_PENDING_E)
+#endif
+ {
+ char buffer[WOLFSSL_MAX_ERROR_SZ];
+
+ #if (defined(OPENSSL_EXTRA) && !defined(_WIN32) && \
+ !defined(NO_ERROR_QUEUE)) || defined(DEBUG_WOLFSSL_VERBOSE)
+ (void)usrCtx; /* a user ctx for future flexibility */
+ (void)func;
+
+ if (wc_LockMutex(&debug_mutex) != 0) {
+ WOLFSSL_MSG("Lock debug mutex failed");
+ XSNPRINTF(buffer, sizeof(buffer),
+ "wolfSSL error occurred, error = %d", error);
+ }
+ else {
+ #if defined(OPENSSL_EXTRA) && !defined(WOLFCRYPT_ONLY)
+ /* If running in compatibility mode do not add want read and
+ want right to error queue */
+ if (error != WANT_READ && error != WANT_WRITE) {
+ #endif
+ if (error < 0)
+ error = error - (2 * error); /* get absolute value */
+ XSNPRINTF(buffer, sizeof(buffer),
+ "wolfSSL error occurred, error = %d line:%d file:%s",
+ error, line, file);
+ if (wc_AddErrorNode(error, line, buffer, (char*)file) != 0) {
+ WOLFSSL_MSG("Error creating logging node");
+ /* with void function there is no return here, continue on
+ * to unlock mutex and log what buffer was created. */
+ }
+ #if defined(OPENSSL_EXTRA) && !defined(WOLFCRYPT_ONLY)
+ }
+ else {
+ XSNPRINTF(buffer, sizeof(buffer),
+ "wolfSSL error occurred, error = %d", error);
+
+ }
+ #endif
+
+ wc_UnLockMutex(&debug_mutex);
+ }
+ #else
+ XSNPRINTF(buffer, sizeof(buffer),
+ "wolfSSL error occurred, error = %d", error);
+ #endif
+
+ #ifdef DEBUG_WOLFSSL
+ if (loggingEnabled)
+ wolfssl_log(ERROR_LOG , buffer);
+ #endif
+ }
+}
+
+void WOLFSSL_ERROR_MSG(const char* msg)
+{
+#ifdef DEBUG_WOLFSSL
+ if (loggingEnabled)
+ wolfssl_log(ERROR_LOG , msg);
+#else
+ (void)msg;
+#endif
+}
+
+#endif /* DEBUG_WOLFSSL || WOLFSSL_NGINX || WOLFSSL_HAPROXY */
+
+#if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
+/* Internal function that is called by wolfCrypt_Init() */
+int wc_LoggingInit(void)
+{
+ if (wc_InitMutex(&debug_mutex) != 0) {
+ WOLFSSL_MSG("Bad Init Mutex");
+ return BAD_MUTEX_E;
+ }
+ wc_errors = NULL;
+ wc_current_node = NULL;
+ wc_last_node = NULL;
+
+ return 0;
+}
+
+
+/* internal function that is called by wolfCrypt_Cleanup */
+int wc_LoggingCleanup(void)
+{
+ /* clear logging entries */
+ wc_ClearErrorNodes();
+
+ /* free mutex */
+ if (wc_FreeMutex(&debug_mutex) != 0) {
+ WOLFSSL_MSG("Bad Mutex free");
+ return BAD_MUTEX_E;
+ }
+ return 0;
+}
+
+
+/* peek at an error node
+ *
+ * idx : if -1 then the most recent node is looked at, otherwise search
+ * through queue for node at the given index
+ * file : pointer to internal file string
+ * reason : pointer to internal error reason
+ * line : line number that error happened at
+ *
+ * Returns a negative value in error case, on success returns the nodes error
+ * value which is positive (absolute value)
+ */
+int wc_PeekErrorNode(int idx, const char **file, const char **reason,
+ int *line)
+{
+ struct wc_error_queue* err;
+
+ if (wc_LockMutex(&debug_mutex) != 0) {
+ WOLFSSL_MSG("Lock debug mutex failed");
+ return BAD_MUTEX_E;
+ }
+
+ if (idx < 0) {
+ err = wc_last_node;
+ }
+ else {
+ int i;
+
+ err = (struct wc_error_queue*)wc_errors;
+ for (i = 0; i < idx; i++) {
+ if (err == NULL) {
+ WOLFSSL_MSG("Error node not found. Bad index?");
+ wc_UnLockMutex(&debug_mutex);
+ return BAD_FUNC_ARG;
+ }
+ err = err->next;
+ }
+ }
+
+ if (err == NULL) {
+ WOLFSSL_MSG("No Errors in queue");
+ wc_UnLockMutex(&debug_mutex);
+ return BAD_STATE_E;
+ }
+
+ if (file != NULL) {
+ *file = err->file;
+ }
+
+ if (reason != NULL) {
+ *reason = err->error;
+ }
+
+ if (line != NULL) {
+ *line = err->line;
+ }
+
+ wc_UnLockMutex(&debug_mutex);
+
+ return err->value;
+}
+
+
+/* Pulls the current node from error queue and increments current state.
+ * Note: this does not delete nodes because input arguments are pointing to
+ * node buffers.
+ *
+ * file pointer to file that error was in. Can be NULL to return no file.
+ * reason error string giving reason for error. Can be NULL to return no reason.
+ * line return line number of where error happened.
+ *
+ * returns the error value on success and BAD_MUTEX_E or BAD_STATE_E on failure
+ */
+int wc_PullErrorNode(const char **file, const char **reason, int *line)
+{
+ struct wc_error_queue* err;
+ int value;
+
+ if (wc_LockMutex(&debug_mutex) != 0) {
+ WOLFSSL_MSG("Lock debug mutex failed");
+ return BAD_MUTEX_E;
+ }
+
+ err = wc_current_node;
+ if (err == NULL) {
+ WOLFSSL_MSG("No Errors in queue");
+ wc_UnLockMutex(&debug_mutex);
+ return BAD_STATE_E;
+ }
+
+ if (file != NULL) {
+ *file = err->file;
+ }
+
+ if (reason != NULL) {
+ *reason = err->error;
+ }
+
+ if (line != NULL) {
+ *line = err->line;
+ }
+
+ value = err->value;
+ wc_current_node = err->next;
+ wc_UnLockMutex(&debug_mutex);
+
+ return value;
+}
+
+
+/* create new error node and add it to the queue
+ * buffers are assumed to be of size WOLFSSL_MAX_ERROR_SZ for this internal
+ * function. debug_mutex should be locked before a call to this function. */
+int wc_AddErrorNode(int error, int line, char* buf, char* file)
+{
+#if defined(NO_ERROR_QUEUE)
+ (void)error;
+ (void)line;
+ (void)buf;
+ (void)file;
+ WOLFSSL_MSG("Error queue turned off, can not add nodes");
+#else
+ struct wc_error_queue* err;
+ err = (struct wc_error_queue*)XMALLOC(
+ sizeof(struct wc_error_queue), wc_error_heap, DYNAMIC_TYPE_LOG);
+ if (err == NULL) {
+ WOLFSSL_MSG("Unable to create error node for log");
+ return MEMORY_E;
+ }
+ else {
+ int sz;
+
+ XMEMSET(err, 0, sizeof(struct wc_error_queue));
+ err->heap = wc_error_heap;
+ sz = (int)XSTRLEN(buf);
+ if (sz > WOLFSSL_MAX_ERROR_SZ - 1) {
+ sz = WOLFSSL_MAX_ERROR_SZ - 1;
+ }
+ if (sz > 0) {
+ XMEMCPY(err->error, buf, sz);
+ }
+
+ sz = (int)XSTRLEN(file);
+ if (sz > WOLFSSL_MAX_ERROR_SZ - 1) {
+ sz = WOLFSSL_MAX_ERROR_SZ - 1;
+ }
+ if (sz > 0) {
+ XMEMCPY(err->file, file, sz);
+ }
+
+ err->value = error;
+ err->line = line;
+
+ /* make sure is terminated */
+ err->error[WOLFSSL_MAX_ERROR_SZ - 1] = '\0';
+ err->file[WOLFSSL_MAX_ERROR_SZ - 1] = '\0';
+
+
+ /* since is queue place new node at last of the list */
+ if (wc_last_node == NULL) {
+ /* case of first node added to queue */
+ if (wc_errors != NULL) {
+ /* check for unexpected case before over writing wc_errors */
+ WOLFSSL_MSG("ERROR in adding new node to logging queue!!\n");
+ /* In the event both wc_last_node and wc_errors are NULL, err
+ * goes unassigned to external wc_errors, wc_last_node. Free
+ * err in this instance since wc_ClearErrorNodes will not
+ */
+ XFREE(err, wc_error_heap, DYNAMIC_TYPE_LOG);
+ }
+ else {
+ wc_errors = err;
+ wc_last_node = err;
+ wc_current_node = err;
+ }
+ }
+ else {
+ wc_last_node->next = err;
+ err->prev = wc_last_node;
+ wc_last_node = err;
+
+ /* check the case where have read to the end of the queue and the
+ * current node to read needs updated */
+ if (wc_current_node == NULL) {
+ wc_current_node = err;
+ }
+ }
+ }
+#endif
+ return 0;
+}
+
+/* Removes the error node at the specified index.
+ * idx : if -1 then the most recent node is looked at, otherwise search
+ * through queue for node at the given index
+ */
+void wc_RemoveErrorNode(int idx)
+{
+ struct wc_error_queue* current;
+
+ if (wc_LockMutex(&debug_mutex) != 0) {
+ WOLFSSL_MSG("Lock debug mutex failed");
+ return;
+ }
+
+ if (idx == -1)
+ current = wc_last_node;
+ else {
+ current = (struct wc_error_queue*)wc_errors;
+ for (; current != NULL && idx > 0; idx--)
+ current = current->next;
+ }
+ if (current != NULL) {
+ if (current->prev != NULL)
+ current->prev->next = current->next;
+ if (current->next != NULL)
+ current->next->prev = current->prev;
+ if (wc_last_node == current)
+ wc_last_node = current->prev;
+ if (wc_errors == current)
+ wc_errors = current->next;
+ if (wc_current_node == current)
+ wc_current_node = current->next;
+ XFREE(current, current->heap, DYNAMIC_TYPE_LOG);
+ }
+
+ wc_UnLockMutex(&debug_mutex);
+}
+
+
+/* Clears out the list of error nodes.
+ */
+void wc_ClearErrorNodes(void)
+{
+#if defined(DEBUG_WOLFSSL) || defined(WOLFSSL_NGINX) || \
+ defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
+
+ if (wc_LockMutex(&debug_mutex) != 0) {
+ WOLFSSL_MSG("Lock debug mutex failed");
+ return;
+ }
+
+ /* free all nodes from error queue */
+ {
+ struct wc_error_queue* current;
+ struct wc_error_queue* next;
+
+ current = (struct wc_error_queue*)wc_errors;
+ while (current != NULL) {
+ next = current->next;
+ XFREE(current, current->heap, DYNAMIC_TYPE_LOG);
+ current = next;
+ }
+ }
+
+ wc_errors = NULL;
+ wc_last_node = NULL;
+ wc_current_node = NULL;
+ wc_UnLockMutex(&debug_mutex);
+#endif /* DEBUG_WOLFSSL || WOLFSSL_NGINX */
+}
+
+int wc_SetLoggingHeap(void* h)
+{
+ if (wc_LockMutex(&debug_mutex) != 0) {
+ WOLFSSL_MSG("Lock debug mutex failed");
+ return BAD_MUTEX_E;
+ }
+ wc_error_heap = h;
+ wc_UnLockMutex(&debug_mutex);
+ return 0;
+}
+
+
+/* frees all nodes in the queue
+ *
+ * id this is the thread id
+ */
+int wc_ERR_remove_state(void)
+{
+ struct wc_error_queue* current;
+ struct wc_error_queue* next;
+
+ if (wc_LockMutex(&debug_mutex) != 0) {
+ WOLFSSL_MSG("Lock debug mutex failed");
+ return BAD_MUTEX_E;
+ }
+
+ /* free all nodes from error queue */
+ current = (struct wc_error_queue*)wc_errors;
+ while (current != NULL) {
+ next = current->next;
+ XFREE(current, current->heap, DYNAMIC_TYPE_LOG);
+ current = next;
+ }
+
+ wc_errors = NULL;
+ wc_last_node = NULL;
+
+ wc_UnLockMutex(&debug_mutex);
+
+ return 0;
+}
+
+#if !defined(NO_FILESYSTEM) && !defined(NO_STDIO_FILESYSTEM)
+/* empties out the error queue into the file */
+static int wc_ERR_dump_to_file (const char *str, size_t len, void *u)
+{
+ XFILE fp = (XFILE ) u;
+ fprintf(fp, "%-*.*s\n", (int)len, (int)len, str);
+ return 0;
+}
+
+/* This callback allows the application to provide a custom error printing
+ * function. */
+void wc_ERR_print_errors_cb(int (*cb)(const char *str, size_t len, void *u),
+ void *u)
+{
+ WOLFSSL_ENTER("wc_ERR_print_errors_cb");
+
+ if (cb == NULL) {
+ /* Invalid param */
+ return;
+ }
+
+ if (wc_LockMutex(&debug_mutex) != 0)
+ {
+ WOLFSSL_MSG("Lock debug mutex failed");
}
+ else
+ {
+ /* free all nodes from error queue and print them to file */
+ struct wc_error_queue *current;
+ struct wc_error_queue *next;
+
+ current = (struct wc_error_queue *)wc_errors;
+ while (current != NULL)
+ {
+ next = current->next;
+ cb(current->error, strlen(current->error), u);
+ XFREE(current, current->heap, DYNAMIC_TYPE_LOG);
+ current = next;
+ }
+
+ /* set global pointers to match having been freed */
+ wc_errors = NULL;
+ wc_last_node = NULL;
+
+ wc_UnLockMutex(&debug_mutex);
+ }
+}
+
+void wc_ERR_print_errors_fp(XFILE fp)
+{
+ WOLFSSL_ENTER("wc_ERR_print_errors_fp");
+
+ /* Send all errors to the wc_ERR_dump_to_file function */
+ wc_ERR_print_errors_cb(wc_ERR_dump_to_file, fp);
}
-#endif /* DEBUG_WOLFSSL */
+#endif /* !defined(NO_FILESYSTEM) && !defined(NO_STDIO_FILESYSTEM) */
+
+#endif /* defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE) */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/md2.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/md2.c
index ce4e424b7..c2f34203d 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/md2.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/md2.c
@@ -1,8 +1,8 @@
/* md2.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,10 +16,11 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -34,6 +35,7 @@
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
@@ -49,7 +51,7 @@ void wc_InitMd2(Md2* md2)
void wc_Md2Update(Md2* md2, const byte* data, word32 len)
{
- static const byte S[256] =
+ static const byte S[256] =
{
41, 46, 67, 201, 162, 216, 124, 1, 61, 54, 84, 161, 236, 240, 6,
19, 98, 167, 5, 243, 192, 199, 115, 140, 152, 147, 43, 217, 188,
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/md4.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/md4.c
index c428610ef..f6f67454a 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/md4.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/md4.c
@@ -1,8 +1,8 @@
/* md4.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -31,21 +32,11 @@
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
-#ifndef WOLFSSL_HAVE_MIN
-#define WOLFSSL_HAVE_MIN
-
- static INLINE word32 min(word32 a, word32 b)
- {
- return a > b ? b : a;
- }
-
-#endif /* WOLFSSL_HAVE_MIN */
-
-
void wc_InitMd4(Md4* md4)
{
md4->digest[0] = 0x67452301L;
@@ -89,7 +80,7 @@ static void Transform(Md4* md4)
function(C,D,A,B,14,11);
function(B,C,D,A,15,19);
-#undef function
+#undef function
#define function(a,b,c,d,k,s) \
a=rotlFixed(a+G(b,c,d)+md4->buffer[k]+0x5a827999,s);
@@ -110,7 +101,7 @@ static void Transform(Md4* md4)
function(C,D,A,B,11, 9);
function(B,C,D,A,15,13);
-#undef function
+#undef function
#define function(a,b,c,d,k,s) \
a=rotlFixed(a+H(b,c,d)+md4->buffer[k]+0x6ed9eba1,s);
@@ -130,7 +121,7 @@ static void Transform(Md4* md4)
function(D,A,B,C,11, 9);
function(C,D,A,B, 7,11);
function(B,C,D,A,15,15);
-
+
/* Add the working vars back into digest state[] */
md4->digest[0] += A;
md4->digest[1] += B;
@@ -139,7 +130,7 @@ static void Transform(Md4* md4)
}
-static INLINE void AddLength(Md4* md4, word32 len)
+static WC_INLINE void AddLength(Md4* md4, word32 len)
{
word32 tmp = md4->loLen;
if ( (md4->loLen += len) < tmp)
@@ -192,9 +183,9 @@ void wc_Md4Final(Md4* md4, byte* hash)
md4->buffLen = 0;
}
XMEMSET(&local[md4->buffLen], 0, MD4_PAD_SIZE - md4->buffLen);
-
+
/* put lengths in bits */
- md4->hiLen = (md4->loLen >> (8*sizeof(md4->loLen) - 3)) +
+ md4->hiLen = (md4->loLen >> (8*sizeof(md4->loLen) - 3)) +
(md4->hiLen << 3);
md4->loLen = md4->loLen << 3;
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/md5.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/md5.c
index fbf732add..7eb2a5120 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/md5.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/md5.c
@@ -1,8 +1,8 @@
/* md5.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,12 +16,13 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
- #include <config.h>
+#include <config.h>
#endif
#include <wolfssl/wolfcrypt/settings.h>
@@ -29,369 +30,543 @@
#if !defined(NO_MD5)
#if defined(WOLFSSL_TI_HASH)
- /* #include <wolfcrypt/src/port/ti/ti-hash.c> included by wc_port.c */
-#else
+/* #include <wolfcrypt/src/port/ti/ti-hash.c> included by wc_port.c */
-#ifdef WOLFSSL_PIC32MZ_HASH
-#define wc_InitMd5 wc_InitMd5_sw
-#define wc_Md5Update wc_Md5Update_sw
-#define wc_Md5Final wc_Md5Final_sw
-#endif
+#else
#include <wolfssl/wolfcrypt/md5.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/logging.h>
+#include <wolfssl/wolfcrypt/hash.h>
#ifdef NO_INLINE
- #include <wolfssl/wolfcrypt/misc.h>
+#include <wolfssl/wolfcrypt/misc.h>
#else
- #include <wolfcrypt/src/misc.c>
+#define WOLFSSL_MISC_INCLUDED
+#include <wolfcrypt/src/misc.c>
#endif
-#ifdef FREESCALE_MMCAU
- #include "cau_api.h"
- #define XTRANSFORM(S,B) cau_md5_hash_n((B), 1, (unsigned char*)(S)->digest)
-#else
- #define XTRANSFORM(S,B) Transform((S))
-#endif
+/* Hardware Acceleration */
+#if defined(STM32_HASH)
-#ifdef STM32F2_HASH
- /*
- * STM32F2 hardware MD5 support through the STM32F2 standard peripheral
- * library. Documentation located in STM32F2xx Standard Peripheral Library
- * document (See note in README).
- */
- #include "stm32f2xx.h"
-
- void wc_InitMd5(Md5* md5)
- {
- /* STM32F2 struct notes:
- * md5->buffer = first 4 bytes used to hold partial block if needed
- * md5->buffLen = num bytes currently stored in md5->buffer
- * md5->loLen = num bytes that have been written to STM32 FIFO
- */
- XMEMSET(md5->buffer, 0, MD5_REG_SIZE);
-
- md5->buffLen = 0;
- md5->loLen = 0;
+/* Supports CubeMX HAL or Standard Peripheral Library */
+#define HAVE_MD5_CUST_API
- /* initialize HASH peripheral */
- HASH_DeInit();
+int wc_InitMd5_ex(wc_Md5* md5, void* heap, int devId)
+{
+ if (md5 == NULL) {
+ return BAD_FUNC_ARG;
+ }
- /* configure algo used, algo mode, datatype */
- HASH->CR &= ~ (HASH_CR_ALGO | HASH_CR_DATATYPE | HASH_CR_MODE);
- HASH->CR |= (HASH_AlgoSelection_MD5 | HASH_AlgoMode_HASH
- | HASH_DataType_8b);
+ (void)devId;
+ (void)heap;
- /* reset HASH processor */
- HASH->CR |= HASH_CR_INIT;
- }
+ wc_Stm32_Hash_Init(&md5->stmCtx);
- void wc_Md5Update(Md5* md5, const byte* data, word32 len)
- {
- word32 i = 0;
- word32 fill = 0;
- word32 diff = 0;
-
- /* if saved partial block is available */
- if (md5->buffLen > 0) {
- fill = 4 - md5->buffLen;
-
- /* if enough data to fill, fill and push to FIFO */
- if (fill <= len) {
- XMEMCPY((byte*)md5->buffer + md5->buffLen, data, fill);
- HASH_DataIn(*(uint32_t*)md5->buffer);
-
- data += fill;
- len -= fill;
- md5->loLen += 4;
- md5->buffLen = 0;
- } else {
- /* append partial to existing stored block */
- XMEMCPY((byte*)md5->buffer + md5->buffLen, data, len);
- md5->buffLen += len;
- return;
- }
- }
+ return 0;
+}
- /* write input block in the IN FIFO */
- for (i = 0; i < len; i += 4)
- {
- diff = len - i;
- if (diff < 4) {
- /* store incomplete last block, not yet in FIFO */
- XMEMSET(md5->buffer, 0, MD5_REG_SIZE);
- XMEMCPY((byte*)md5->buffer, data, diff);
- md5->buffLen = diff;
- } else {
- HASH_DataIn(*(uint32_t*)data);
- data+=4;
- }
- }
+int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len)
+{
+ int ret;
- /* keep track of total data length thus far */
- md5->loLen += (len - md5->buffLen);
+ if (md5 == NULL || (data == NULL && len > 0)) {
+ return BAD_FUNC_ARG;
}
- void wc_Md5Final(Md5* md5, byte* hash)
- {
- __IO uint16_t nbvalidbitsdata = 0;
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret == 0) {
+ ret = wc_Stm32_Hash_Update(&md5->stmCtx, HASH_AlgoSelection_MD5,
+ data, len);
+ wolfSSL_CryptHwMutexUnLock();
+ }
+ return ret;
+}
- /* finish reading any trailing bytes into FIFO */
- if (md5->buffLen > 0) {
- HASH_DataIn(*(uint32_t*)md5->buffer);
- md5->loLen += md5->buffLen;
- }
+int wc_Md5Final(wc_Md5* md5, byte* hash)
+{
+ int ret;
- /* calculate number of valid bits in last word of input data */
- nbvalidbitsdata = 8 * (md5->loLen % MD5_REG_SIZE);
+ if (md5 == NULL || hash == NULL) {
+ return BAD_FUNC_ARG;
+ }
- /* configure number of valid bits in last word of the data */
- HASH_SetLastWordValidBitsNbr(nbvalidbitsdata);
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret == 0) {
+ ret = wc_Stm32_Hash_Final(&md5->stmCtx, HASH_AlgoSelection_MD5,
+ hash, WC_MD5_DIGEST_SIZE);
+ wolfSSL_CryptHwMutexUnLock();
+ }
- /* start HASH processor */
- HASH_StartDigest();
+ (void)wc_InitMd5(md5); /* reset state */
- /* wait until Busy flag == RESET */
- while (HASH_GetFlagStatus(HASH_FLAG_BUSY) != RESET) {}
-
- /* read message digest */
- md5->digest[0] = HASH->HR[0];
- md5->digest[1] = HASH->HR[1];
- md5->digest[2] = HASH->HR[2];
- md5->digest[3] = HASH->HR[3];
+ return ret;
+}
- ByteReverseWords(md5->digest, md5->digest, MD5_DIGEST_SIZE);
+#elif defined(FREESCALE_MMCAU_SHA)
- XMEMCPY(hash, md5->digest, MD5_DIGEST_SIZE);
+#ifdef FREESCALE_MMCAU_CLASSIC_SHA
+ #include "cau_api.h"
+#else
+ #include "fsl_mmcau.h"
+#endif
- wc_InitMd5(md5); /* reset state */
- }
+#define XTRANSFORM(S,B) Transform((S), (B))
+#define XTRANSFORM_LEN(S,B,L) Transform_Len((S), (B), (L))
-#else /* CTaoCrypt software implementation */
+#ifndef WC_HASH_DATA_ALIGNMENT
+ /* these hardware API's require 4 byte (word32) alignment */
+ #define WC_HASH_DATA_ALIGNMENT 4
+#endif
-#ifndef WOLFSSL_HAVE_MIN
-#define WOLFSSL_HAVE_MIN
+static int Transform(wc_Md5* md5, const byte* data)
+{
+ int ret = wolfSSL_CryptHwMutexLock();
+ if (ret == 0) {
+#ifdef FREESCALE_MMCAU_CLASSIC_SHA
+ cau_md5_hash_n((byte*)data, 1, (unsigned char*)md5->digest);
+#else
+ MMCAU_MD5_HashN((byte*)data, 1, (uint32_t*)md5->digest);
+#endif
+ wolfSSL_CryptHwMutexUnLock();
+ }
+ return ret;
+}
- static INLINE word32 min(word32 a, word32 b)
- {
- return a > b ? b : a;
+static int Transform_Len(wc_Md5* md5, const byte* data, word32 len)
+{
+ int ret = wolfSSL_CryptHwMutexLock();
+ if (ret == 0) {
+ #if defined(WC_HASH_DATA_ALIGNMENT) && WC_HASH_DATA_ALIGNMENT > 0
+ if ((size_t)data % WC_HASH_DATA_ALIGNMENT) {
+ /* data pointer is NOT aligned,
+ * so copy and perform one block at a time */
+ byte* local = (byte*)md5->buffer;
+ while (len >= WC_MD5_BLOCK_SIZE) {
+ XMEMCPY(local, data, WC_MD5_BLOCK_SIZE);
+ #ifdef FREESCALE_MMCAU_CLASSIC_SHA
+ cau_md5_hash_n(local, 1, (unsigned char*)md5->digest);
+ #else
+ MMCAU_MD5_HashN(local, 1, (uint32_t*)md5->digest);
+ #endif
+ data += WC_MD5_BLOCK_SIZE;
+ len -= WC_MD5_BLOCK_SIZE;
+ }
+ }
+ else
+ #endif
+ {
+#ifdef FREESCALE_MMCAU_CLASSIC_SHA
+ cau_md5_hash_n((byte*)data, len / WC_MD5_BLOCK_SIZE,
+ (unsigned char*)md5->digest);
+#else
+ MMCAU_MD5_HashN((byte*)data, len / WC_MD5_BLOCK_SIZE,
+ (uint32_t*)md5->digest);
+#endif
+ }
+ wolfSSL_CryptHwMutexUnLock();
}
+ return ret;
+}
-#endif /* WOLFSSL_HAVE_MIN */
+#elif defined(WOLFSSL_PIC32MZ_HASH)
+#include <wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h>
+#define HAVE_MD5_CUST_API
-void wc_InitMd5(Md5* md5)
-{
- md5->digest[0] = 0x67452301L;
- md5->digest[1] = 0xefcdab89L;
- md5->digest[2] = 0x98badcfeL;
- md5->digest[3] = 0x10325476L;
+#elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH)
+/* functions implemented in wolfcrypt/src/port/caam/caam_sha.c */
+#define HAVE_MD5_CUST_API
+#else
+#define NEED_SOFT_MD5
+#endif /* End Hardware Acceleration */
- md5->buffLen = 0;
- md5->loLen = 0;
- md5->hiLen = 0;
-}
+#ifdef NEED_SOFT_MD5
-#ifndef FREESCALE_MMCAU
+#define XTRANSFORM(S,B) Transform((S),(B))
-static void Transform(Md5* md5)
-{
#define F1(x, y, z) (z ^ (x & (y ^ z)))
#define F2(x, y, z) F1(z, x, y)
#define F3(x, y, z) (x ^ y ^ z)
#define F4(x, y, z) (y ^ (x | ~z))
#define MD5STEP(f, w, x, y, z, data, s) \
- w = rotlFixed(w + f(x, y, z) + data, s) + x
+ w = rotlFixed(w + f(x, y, z) + data, s) + x
+static int Transform(wc_Md5* md5, const byte* data)
+{
+ word32* buffer = (word32*)data;
/* Copy context->state[] to working vars */
word32 a = md5->digest[0];
word32 b = md5->digest[1];
word32 c = md5->digest[2];
word32 d = md5->digest[3];
- MD5STEP(F1, a, b, c, d, md5->buffer[0] + 0xd76aa478, 7);
- MD5STEP(F1, d, a, b, c, md5->buffer[1] + 0xe8c7b756, 12);
- MD5STEP(F1, c, d, a, b, md5->buffer[2] + 0x242070db, 17);
- MD5STEP(F1, b, c, d, a, md5->buffer[3] + 0xc1bdceee, 22);
- MD5STEP(F1, a, b, c, d, md5->buffer[4] + 0xf57c0faf, 7);
- MD5STEP(F1, d, a, b, c, md5->buffer[5] + 0x4787c62a, 12);
- MD5STEP(F1, c, d, a, b, md5->buffer[6] + 0xa8304613, 17);
- MD5STEP(F1, b, c, d, a, md5->buffer[7] + 0xfd469501, 22);
- MD5STEP(F1, a, b, c, d, md5->buffer[8] + 0x698098d8, 7);
- MD5STEP(F1, d, a, b, c, md5->buffer[9] + 0x8b44f7af, 12);
- MD5STEP(F1, c, d, a, b, md5->buffer[10] + 0xffff5bb1, 17);
- MD5STEP(F1, b, c, d, a, md5->buffer[11] + 0x895cd7be, 22);
- MD5STEP(F1, a, b, c, d, md5->buffer[12] + 0x6b901122, 7);
- MD5STEP(F1, d, a, b, c, md5->buffer[13] + 0xfd987193, 12);
- MD5STEP(F1, c, d, a, b, md5->buffer[14] + 0xa679438e, 17);
- MD5STEP(F1, b, c, d, a, md5->buffer[15] + 0x49b40821, 22);
-
- MD5STEP(F2, a, b, c, d, md5->buffer[1] + 0xf61e2562, 5);
- MD5STEP(F2, d, a, b, c, md5->buffer[6] + 0xc040b340, 9);
- MD5STEP(F2, c, d, a, b, md5->buffer[11] + 0x265e5a51, 14);
- MD5STEP(F2, b, c, d, a, md5->buffer[0] + 0xe9b6c7aa, 20);
- MD5STEP(F2, a, b, c, d, md5->buffer[5] + 0xd62f105d, 5);
- MD5STEP(F2, d, a, b, c, md5->buffer[10] + 0x02441453, 9);
- MD5STEP(F2, c, d, a, b, md5->buffer[15] + 0xd8a1e681, 14);
- MD5STEP(F2, b, c, d, a, md5->buffer[4] + 0xe7d3fbc8, 20);
- MD5STEP(F2, a, b, c, d, md5->buffer[9] + 0x21e1cde6, 5);
- MD5STEP(F2, d, a, b, c, md5->buffer[14] + 0xc33707d6, 9);
- MD5STEP(F2, c, d, a, b, md5->buffer[3] + 0xf4d50d87, 14);
- MD5STEP(F2, b, c, d, a, md5->buffer[8] + 0x455a14ed, 20);
- MD5STEP(F2, a, b, c, d, md5->buffer[13] + 0xa9e3e905, 5);
- MD5STEP(F2, d, a, b, c, md5->buffer[2] + 0xfcefa3f8, 9);
- MD5STEP(F2, c, d, a, b, md5->buffer[7] + 0x676f02d9, 14);
- MD5STEP(F2, b, c, d, a, md5->buffer[12] + 0x8d2a4c8a, 20);
-
- MD5STEP(F3, a, b, c, d, md5->buffer[5] + 0xfffa3942, 4);
- MD5STEP(F3, d, a, b, c, md5->buffer[8] + 0x8771f681, 11);
- MD5STEP(F3, c, d, a, b, md5->buffer[11] + 0x6d9d6122, 16);
- MD5STEP(F3, b, c, d, a, md5->buffer[14] + 0xfde5380c, 23);
- MD5STEP(F3, a, b, c, d, md5->buffer[1] + 0xa4beea44, 4);
- MD5STEP(F3, d, a, b, c, md5->buffer[4] + 0x4bdecfa9, 11);
- MD5STEP(F3, c, d, a, b, md5->buffer[7] + 0xf6bb4b60, 16);
- MD5STEP(F3, b, c, d, a, md5->buffer[10] + 0xbebfbc70, 23);
- MD5STEP(F3, a, b, c, d, md5->buffer[13] + 0x289b7ec6, 4);
- MD5STEP(F3, d, a, b, c, md5->buffer[0] + 0xeaa127fa, 11);
- MD5STEP(F3, c, d, a, b, md5->buffer[3] + 0xd4ef3085, 16);
- MD5STEP(F3, b, c, d, a, md5->buffer[6] + 0x04881d05, 23);
- MD5STEP(F3, a, b, c, d, md5->buffer[9] + 0xd9d4d039, 4);
- MD5STEP(F3, d, a, b, c, md5->buffer[12] + 0xe6db99e5, 11);
- MD5STEP(F3, c, d, a, b, md5->buffer[15] + 0x1fa27cf8, 16);
- MD5STEP(F3, b, c, d, a, md5->buffer[2] + 0xc4ac5665, 23);
-
- MD5STEP(F4, a, b, c, d, md5->buffer[0] + 0xf4292244, 6);
- MD5STEP(F4, d, a, b, c, md5->buffer[7] + 0x432aff97, 10);
- MD5STEP(F4, c, d, a, b, md5->buffer[14] + 0xab9423a7, 15);
- MD5STEP(F4, b, c, d, a, md5->buffer[5] + 0xfc93a039, 21);
- MD5STEP(F4, a, b, c, d, md5->buffer[12] + 0x655b59c3, 6);
- MD5STEP(F4, d, a, b, c, md5->buffer[3] + 0x8f0ccc92, 10);
- MD5STEP(F4, c, d, a, b, md5->buffer[10] + 0xffeff47d, 15);
- MD5STEP(F4, b, c, d, a, md5->buffer[1] + 0x85845dd1, 21);
- MD5STEP(F4, a, b, c, d, md5->buffer[8] + 0x6fa87e4f, 6);
- MD5STEP(F4, d, a, b, c, md5->buffer[15] + 0xfe2ce6e0, 10);
- MD5STEP(F4, c, d, a, b, md5->buffer[6] + 0xa3014314, 15);
- MD5STEP(F4, b, c, d, a, md5->buffer[13] + 0x4e0811a1, 21);
- MD5STEP(F4, a, b, c, d, md5->buffer[4] + 0xf7537e82, 6);
- MD5STEP(F4, d, a, b, c, md5->buffer[11] + 0xbd3af235, 10);
- MD5STEP(F4, c, d, a, b, md5->buffer[2] + 0x2ad7d2bb, 15);
- MD5STEP(F4, b, c, d, a, md5->buffer[9] + 0xeb86d391, 21);
-
+ MD5STEP(F1, a, b, c, d, buffer[0] + 0xd76aa478, 7);
+ MD5STEP(F1, d, a, b, c, buffer[1] + 0xe8c7b756, 12);
+ MD5STEP(F1, c, d, a, b, buffer[2] + 0x242070db, 17);
+ MD5STEP(F1, b, c, d, a, buffer[3] + 0xc1bdceee, 22);
+ MD5STEP(F1, a, b, c, d, buffer[4] + 0xf57c0faf, 7);
+ MD5STEP(F1, d, a, b, c, buffer[5] + 0x4787c62a, 12);
+ MD5STEP(F1, c, d, a, b, buffer[6] + 0xa8304613, 17);
+ MD5STEP(F1, b, c, d, a, buffer[7] + 0xfd469501, 22);
+ MD5STEP(F1, a, b, c, d, buffer[8] + 0x698098d8, 7);
+ MD5STEP(F1, d, a, b, c, buffer[9] + 0x8b44f7af, 12);
+ MD5STEP(F1, c, d, a, b, buffer[10] + 0xffff5bb1, 17);
+ MD5STEP(F1, b, c, d, a, buffer[11] + 0x895cd7be, 22);
+ MD5STEP(F1, a, b, c, d, buffer[12] + 0x6b901122, 7);
+ MD5STEP(F1, d, a, b, c, buffer[13] + 0xfd987193, 12);
+ MD5STEP(F1, c, d, a, b, buffer[14] + 0xa679438e, 17);
+ MD5STEP(F1, b, c, d, a, buffer[15] + 0x49b40821, 22);
+
+ MD5STEP(F2, a, b, c, d, buffer[1] + 0xf61e2562, 5);
+ MD5STEP(F2, d, a, b, c, buffer[6] + 0xc040b340, 9);
+ MD5STEP(F2, c, d, a, b, buffer[11] + 0x265e5a51, 14);
+ MD5STEP(F2, b, c, d, a, buffer[0] + 0xe9b6c7aa, 20);
+ MD5STEP(F2, a, b, c, d, buffer[5] + 0xd62f105d, 5);
+ MD5STEP(F2, d, a, b, c, buffer[10] + 0x02441453, 9);
+ MD5STEP(F2, c, d, a, b, buffer[15] + 0xd8a1e681, 14);
+ MD5STEP(F2, b, c, d, a, buffer[4] + 0xe7d3fbc8, 20);
+ MD5STEP(F2, a, b, c, d, buffer[9] + 0x21e1cde6, 5);
+ MD5STEP(F2, d, a, b, c, buffer[14] + 0xc33707d6, 9);
+ MD5STEP(F2, c, d, a, b, buffer[3] + 0xf4d50d87, 14);
+ MD5STEP(F2, b, c, d, a, buffer[8] + 0x455a14ed, 20);
+ MD5STEP(F2, a, b, c, d, buffer[13] + 0xa9e3e905, 5);
+ MD5STEP(F2, d, a, b, c, buffer[2] + 0xfcefa3f8, 9);
+ MD5STEP(F2, c, d, a, b, buffer[7] + 0x676f02d9, 14);
+ MD5STEP(F2, b, c, d, a, buffer[12] + 0x8d2a4c8a, 20);
+
+ MD5STEP(F3, a, b, c, d, buffer[5] + 0xfffa3942, 4);
+ MD5STEP(F3, d, a, b, c, buffer[8] + 0x8771f681, 11);
+ MD5STEP(F3, c, d, a, b, buffer[11] + 0x6d9d6122, 16);
+ MD5STEP(F3, b, c, d, a, buffer[14] + 0xfde5380c, 23);
+ MD5STEP(F3, a, b, c, d, buffer[1] + 0xa4beea44, 4);
+ MD5STEP(F3, d, a, b, c, buffer[4] + 0x4bdecfa9, 11);
+ MD5STEP(F3, c, d, a, b, buffer[7] + 0xf6bb4b60, 16);
+ MD5STEP(F3, b, c, d, a, buffer[10] + 0xbebfbc70, 23);
+ MD5STEP(F3, a, b, c, d, buffer[13] + 0x289b7ec6, 4);
+ MD5STEP(F3, d, a, b, c, buffer[0] + 0xeaa127fa, 11);
+ MD5STEP(F3, c, d, a, b, buffer[3] + 0xd4ef3085, 16);
+ MD5STEP(F3, b, c, d, a, buffer[6] + 0x04881d05, 23);
+ MD5STEP(F3, a, b, c, d, buffer[9] + 0xd9d4d039, 4);
+ MD5STEP(F3, d, a, b, c, buffer[12] + 0xe6db99e5, 11);
+ MD5STEP(F3, c, d, a, b, buffer[15] + 0x1fa27cf8, 16);
+ MD5STEP(F3, b, c, d, a, buffer[2] + 0xc4ac5665, 23);
+
+ MD5STEP(F4, a, b, c, d, buffer[0] + 0xf4292244, 6);
+ MD5STEP(F4, d, a, b, c, buffer[7] + 0x432aff97, 10);
+ MD5STEP(F4, c, d, a, b, buffer[14] + 0xab9423a7, 15);
+ MD5STEP(F4, b, c, d, a, buffer[5] + 0xfc93a039, 21);
+ MD5STEP(F4, a, b, c, d, buffer[12] + 0x655b59c3, 6);
+ MD5STEP(F4, d, a, b, c, buffer[3] + 0x8f0ccc92, 10);
+ MD5STEP(F4, c, d, a, b, buffer[10] + 0xffeff47d, 15);
+ MD5STEP(F4, b, c, d, a, buffer[1] + 0x85845dd1, 21);
+ MD5STEP(F4, a, b, c, d, buffer[8] + 0x6fa87e4f, 6);
+ MD5STEP(F4, d, a, b, c, buffer[15] + 0xfe2ce6e0, 10);
+ MD5STEP(F4, c, d, a, b, buffer[6] + 0xa3014314, 15);
+ MD5STEP(F4, b, c, d, a, buffer[13] + 0x4e0811a1, 21);
+ MD5STEP(F4, a, b, c, d, buffer[4] + 0xf7537e82, 6);
+ MD5STEP(F4, d, a, b, c, buffer[11] + 0xbd3af235, 10);
+ MD5STEP(F4, c, d, a, b, buffer[2] + 0x2ad7d2bb, 15);
+ MD5STEP(F4, b, c, d, a, buffer[9] + 0xeb86d391, 21);
+
/* Add the working vars back into digest state[] */
md5->digest[0] += a;
md5->digest[1] += b;
md5->digest[2] += c;
md5->digest[3] += d;
-}
-#endif /* FREESCALE_MMCAU */
+ return 0;
+}
+#endif /* NEED_SOFT_MD5 */
+#ifndef HAVE_MD5_CUST_API
-static INLINE void AddLength(Md5* md5, word32 len)
+static WC_INLINE void AddLength(wc_Md5* md5, word32 len)
{
word32 tmp = md5->loLen;
- if ( (md5->loLen += len) < tmp)
+ if ((md5->loLen += len) < tmp) {
md5->hiLen++; /* carry low to high */
+ }
}
+static int _InitMd5(wc_Md5* md5)
+{
+ int ret = 0;
+
+ md5->digest[0] = 0x67452301L;
+ md5->digest[1] = 0xefcdab89L;
+ md5->digest[2] = 0x98badcfeL;
+ md5->digest[3] = 0x10325476L;
+
+ md5->buffLen = 0;
+ md5->loLen = 0;
+ md5->hiLen = 0;
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+ md5->flags = 0;
+#endif
+
+ return ret;
+}
-void wc_Md5Update(Md5* md5, const byte* data, word32 len)
+int wc_InitMd5_ex(wc_Md5* md5, void* heap, int devId)
{
- /* do block size increments */
- byte* local = (byte*)md5->buffer;
+ int ret = 0;
- while (len) {
- word32 add = min(len, MD5_BLOCK_SIZE - md5->buffLen);
- XMEMCPY(&local[md5->buffLen], data, add);
+ if (md5 == NULL)
+ return BAD_FUNC_ARG;
- md5->buffLen += add;
- data += add;
- len -= add;
+ md5->heap = heap;
+
+ ret = _InitMd5(md5);
+ if (ret != 0)
+ return ret;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5)
+ ret = wolfAsync_DevCtxInit(&md5->asyncDev, WOLFSSL_ASYNC_MARKER_MD5,
+ md5->heap, devId);
+#else
+ (void)devId;
+#endif
+ return ret;
+}
+
+/* do block size increments/updates */
+int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len)
+{
+ int ret = 0;
+ word32 blocksLen;
+ byte* local;
+
+ if (md5 == NULL || (data == NULL && len > 0)) {
+ return BAD_FUNC_ARG;
+ }
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5)
+ if (md5->asyncDev.marker == WOLFSSL_ASYNC_MARKER_MD5) {
+#if defined(HAVE_INTEL_QA)
+ return IntelQaSymMd5(&md5->asyncDev, NULL, data, len);
+#endif
+ }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+ /* check that internal buffLen is valid */
+ if (md5->buffLen >= WC_MD5_BLOCK_SIZE)
+ return BUFFER_E;
+
+ if (data == NULL && len == 0) {
+ /* valid, but do nothing */
+ return 0;
+ }
+
+ /* add length for final */
+ AddLength(md5, len);
+
+ local = (byte*)md5->buffer;
+
+ /* process any remainder from previous operation */
+ if (md5->buffLen > 0) {
+ blocksLen = min(len, WC_MD5_BLOCK_SIZE - md5->buffLen);
+ XMEMCPY(&local[md5->buffLen], data, blocksLen);
+
+ md5->buffLen += blocksLen;
+ data += blocksLen;
+ len -= blocksLen;
+
+ if (md5->buffLen == WC_MD5_BLOCK_SIZE) {
+ #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+ ByteReverseWords(md5->buffer, md5->buffer, WC_MD5_BLOCK_SIZE);
+ #endif
+
+ ret = XTRANSFORM(md5, (const byte*)local);
+ if (ret != 0)
+ return ret;
- if (md5->buffLen == MD5_BLOCK_SIZE) {
- #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU)
- ByteReverseWords(md5->buffer, md5->buffer, MD5_BLOCK_SIZE);
- #endif
- XTRANSFORM(md5, local);
- AddLength(md5, MD5_BLOCK_SIZE);
md5->buffLen = 0;
}
}
-}
+ /* process blocks */
+#ifdef XTRANSFORM_LEN
+ /* get number of blocks */
+ /* 64-1 = 0x3F (~ Inverted = 0xFFFFFFC0) */
+ /* len (masked by 0xFFFFFFC0) returns block aligned length */
+ blocksLen = len & ~(WC_MD5_BLOCK_SIZE-1);
+ if (blocksLen > 0) {
+ /* Byte reversal performed in function if required. */
+ XTRANSFORM_LEN(md5, data, blocksLen);
+ data += blocksLen;
+ len -= blocksLen;
+ }
+#else
+ while (len >= WC_MD5_BLOCK_SIZE) {
+ word32* local32 = md5->buffer;
+ /* optimization to avoid memcpy if data pointer is properly aligned */
+ /* Big Endian requires byte swap, so can't use data directly */
+ #if defined(WC_HASH_DATA_ALIGNMENT) && !defined(BIG_ENDIAN_ORDER)
+ if (((size_t)data % WC_HASH_DATA_ALIGNMENT) == 0) {
+ local32 = (word32*)data;
+ }
+ else
+ #endif
+ {
+ XMEMCPY(local32, data, WC_MD5_BLOCK_SIZE);
+ }
+
+ data += WC_MD5_BLOCK_SIZE;
+ len -= WC_MD5_BLOCK_SIZE;
+
+ #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+ ByteReverseWords(local32, local32, WC_MD5_BLOCK_SIZE);
+ #endif
+
+ ret = XTRANSFORM(md5, (const byte*)local32);
+ }
+#endif /* XTRANSFORM_LEN */
+
+ /* save remainder */
+ if (len > 0) {
+ XMEMCPY(local, data, len);
+ md5->buffLen = len;
+ }
+
+ return ret;
+}
-void wc_Md5Final(Md5* md5, byte* hash)
+int wc_Md5Final(wc_Md5* md5, byte* hash)
{
- byte* local = (byte*)md5->buffer;
+ byte* local;
+
+ if (md5 == NULL || hash == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5)
+ if (md5->asyncDev.marker == WOLFSSL_ASYNC_MARKER_MD5) {
+#if defined(HAVE_INTEL_QA)
+ return IntelQaSymMd5(&md5->asyncDev, hash, NULL, WC_MD5_DIGEST_SIZE);
+#endif
+ }
+#endif /* WOLFSSL_ASYNC_CRYPT */
- AddLength(md5, md5->buffLen); /* before adding pads */
+ local = (byte*)md5->buffer;
local[md5->buffLen++] = 0x80; /* add 1 */
/* pad with zeros */
- if (md5->buffLen > MD5_PAD_SIZE) {
- XMEMSET(&local[md5->buffLen], 0, MD5_BLOCK_SIZE - md5->buffLen);
- md5->buffLen += MD5_BLOCK_SIZE - md5->buffLen;
+ if (md5->buffLen > WC_MD5_PAD_SIZE) {
+ XMEMSET(&local[md5->buffLen], 0, WC_MD5_BLOCK_SIZE - md5->buffLen);
+ md5->buffLen += WC_MD5_BLOCK_SIZE - md5->buffLen;
- #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU)
- ByteReverseWords(md5->buffer, md5->buffer, MD5_BLOCK_SIZE);
- #endif
+#if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+ ByteReverseWords(md5->buffer, md5->buffer, WC_MD5_BLOCK_SIZE);
+#endif
XTRANSFORM(md5, local);
md5->buffLen = 0;
}
- XMEMSET(&local[md5->buffLen], 0, MD5_PAD_SIZE - md5->buffLen);
-
+ XMEMSET(&local[md5->buffLen], 0, WC_MD5_PAD_SIZE - md5->buffLen);
+
+#if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+ ByteReverseWords(md5->buffer, md5->buffer, WC_MD5_BLOCK_SIZE);
+#endif
+
/* put lengths in bits */
- md5->hiLen = (md5->loLen >> (8*sizeof(md5->loLen) - 3)) +
+ md5->hiLen = (md5->loLen >> (8 * sizeof(md5->loLen) - 3)) +
(md5->hiLen << 3);
md5->loLen = md5->loLen << 3;
/* store lengths */
- #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU)
- ByteReverseWords(md5->buffer, md5->buffer, MD5_BLOCK_SIZE);
- #endif
/* ! length ordering dependent on digest endian type ! */
- XMEMCPY(&local[MD5_PAD_SIZE], &md5->loLen, sizeof(word32));
- XMEMCPY(&local[MD5_PAD_SIZE + sizeof(word32)], &md5->hiLen, sizeof(word32));
+ XMEMCPY(&local[WC_MD5_PAD_SIZE], &md5->loLen, sizeof(word32));
+ XMEMCPY(&local[WC_MD5_PAD_SIZE + sizeof(word32)], &md5->hiLen, sizeof(word32));
+ /* final transform and result to hash */
XTRANSFORM(md5, local);
- #ifdef BIG_ENDIAN_ORDER
- ByteReverseWords(md5->digest, md5->digest, MD5_DIGEST_SIZE);
- #endif
- XMEMCPY(hash, md5->digest, MD5_DIGEST_SIZE);
+#ifdef BIG_ENDIAN_ORDER
+ ByteReverseWords(md5->digest, md5->digest, WC_MD5_DIGEST_SIZE);
+#endif
+ XMEMCPY(hash, md5->digest, WC_MD5_DIGEST_SIZE);
- wc_InitMd5(md5); /* reset state */
+ return _InitMd5(md5); /* reset state */
}
+#endif /* !HAVE_MD5_CUST_API */
-#endif /* STM32F2_HASH */
-
-int wc_Md5Hash(const byte* data, word32 len, byte* hash)
+int wc_InitMd5(wc_Md5* md5)
{
-#ifdef WOLFSSL_SMALL_STACK
- Md5* md5;
-#else
- Md5 md5[1];
-#endif
+ if (md5 == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ return wc_InitMd5_ex(md5, NULL, INVALID_DEVID);
+}
-#ifdef WOLFSSL_SMALL_STACK
- md5 = (Md5*)XMALLOC(sizeof(Md5), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+void wc_Md5Free(wc_Md5* md5)
+{
if (md5 == NULL)
- return MEMORY_E;
+ return;
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5)
+ wolfAsync_DevCtxFree(&md5->asyncDev, WOLFSSL_ASYNC_MARKER_MD5);
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+#ifdef WOLFSSL_PIC32MZ_HASH
+ wc_Md5Pic32Free(md5);
#endif
+}
+
+int wc_Md5GetHash(wc_Md5* md5, byte* hash)
+{
+ int ret;
+ wc_Md5 tmpMd5;
+
+ if (md5 == NULL || hash == NULL)
+ return BAD_FUNC_ARG;
- wc_InitMd5(md5);
- wc_Md5Update(md5, data, len);
- wc_Md5Final(md5, hash);
+ ret = wc_Md5Copy(md5, &tmpMd5);
+ if (ret == 0) {
+ ret = wc_Md5Final(&tmpMd5, hash);
+ }
+
+ return ret;
+}
+
+int wc_Md5Copy(wc_Md5* src, wc_Md5* dst)
+{
+ int ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(md5, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (src == NULL || dst == NULL)
+ return BAD_FUNC_ARG;
+
+ XMEMCPY(dst, src, sizeof(wc_Md5));
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+ ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
+#endif
+#ifdef WOLFSSL_PIC32MZ_HASH
+ ret = wc_Pic32HashCopy(&src->cache, &dst->cache);
+#endif
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+ dst->flags |= WC_HASH_FLAG_ISCOPY;
#endif
+ return ret;
+}
+
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+int wc_Md5SetFlags(wc_Md5* md5, word32 flags)
+{
+ if (md5) {
+ md5->flags = flags;
+ }
+ return 0;
+}
+int wc_Md5GetFlags(wc_Md5* md5, word32* flags)
+{
+ if (md5 && flags) {
+ *flags = md5->flags;
+ }
return 0;
}
+#endif
#endif /* WOLFSSL_TI_HASH */
-
#endif /* NO_MD5 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/memory.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/memory.c
index dd9281945..3bc8e21cf 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/memory.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/memory.c
@@ -1,8 +1,8 @@
/* memory.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -33,82 +34,921 @@
#define WOLFSSL_MALLOC_CHECK
#endif
+
+/*
+Possible memory options:
+ * NO_WOLFSSL_MEMORY: Disables wolf memory callback support. When not defined settings.h defines USE_WOLFSSL_MEMORY.
+ * WOLFSSL_STATIC_MEMORY: Turns on the use of static memory buffers and functions.
+ This allows for using static memory instead of dynamic.
+ * WOLFSSL_STATIC_ALIGN: Define defaults to 16 to indicate static memory alignment.
+ * HAVE_IO_POOL: Enables use of static thread safe memory pool for input/output buffers.
+ * XMALLOC_OVERRIDE: Allows override of the XMALLOC, XFREE and XREALLOC macros.
+ * XMALLOC_USER: Allows custom XMALLOC, XFREE and XREALLOC functions to be defined.
+ * WOLFSSL_NO_MALLOC: Disables the fall-back case to use STDIO malloc/free when no callbacks are set.
+ * WOLFSSL_TRACK_MEMORY: Enables memory tracking for total stats and list of allocated memory.
+ * WOLFSSL_DEBUG_MEMORY: Enables extra function and line number args for memory callbacks.
+ * WOLFSSL_DEBUG_MEMORY_PRINT: Enables printing of each malloc/free.
+ * WOLFSSL_MALLOC_CHECK: Reports malloc or alignment failure using WOLFSSL_STATIC_ALIGN
+ * WOLFSSL_FORCE_MALLOC_FAIL_TEST: Used for internal testing to induce random malloc failures.
+ * WOLFSSL_HEAP_TEST: Used for internal testing of heap hint
+ */
+
+#ifdef WOLFSSL_ZEPHYR
+#undef realloc
+void *z_realloc(void *ptr, size_t size)
+{
+ if (ptr == NULL)
+ ptr = malloc(size);
+ else
+ ptr = realloc(ptr, size);
+
+ return ptr;
+}
+#define realloc z_realloc
+#endif
+
#ifdef USE_WOLFSSL_MEMORY
#include <wolfssl/wolfcrypt/memory.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/logging.h>
-#ifdef WOLFSSL_MALLOC_CHECK
+#if defined(WOLFSSL_DEBUG_MEMORY) && defined(WOLFSSL_DEBUG_MEMORY_PRINT)
+#include <stdio.h>
+#endif
+
+#ifdef WOLFSSL_FORCE_MALLOC_FAIL_TEST
+ static int gMemFailCountSeed;
+ static int gMemFailCount;
+ void wolfSSL_SetMemFailCount(int memFailCount)
+ {
+ if (gMemFailCountSeed == 0) {
+ gMemFailCountSeed = memFailCount;
+ gMemFailCount = memFailCount;
+ }
+ }
+#endif
+#if defined(WOLFSSL_MALLOC_CHECK) || defined(WOLFSSL_TRACK_MEMORY_FULL) || \
+ defined(WOLFSSL_MEMORY_LOG)
#include <stdio.h>
#endif
+
/* Set these to default values initially. */
-static wolfSSL_Malloc_cb malloc_function = 0;
-static wolfSSL_Free_cb free_function = 0;
-static wolfSSL_Realloc_cb realloc_function = 0;
+static wolfSSL_Malloc_cb malloc_function = NULL;
+static wolfSSL_Free_cb free_function = NULL;
+static wolfSSL_Realloc_cb realloc_function = NULL;
int wolfSSL_SetAllocators(wolfSSL_Malloc_cb mf,
wolfSSL_Free_cb ff,
wolfSSL_Realloc_cb rf)
{
- int res = 0;
-
- if (mf)
- malloc_function = mf;
- else
- res = BAD_FUNC_ARG;
-
- if (ff)
- free_function = ff;
- else
- res = BAD_FUNC_ARG;
-
- if (rf)
- realloc_function = rf;
- else
- res = BAD_FUNC_ARG;
-
- return res;
+ malloc_function = mf;
+ free_function = ff;
+ realloc_function = rf;
+ return 0;
}
+int wolfSSL_GetAllocators(wolfSSL_Malloc_cb* mf,
+ wolfSSL_Free_cb* ff,
+ wolfSSL_Realloc_cb* rf)
+{
+ if (mf) *mf = malloc_function;
+ if (ff) *ff = free_function;
+ if (rf) *rf = realloc_function;
+ return 0;
+}
+#ifndef WOLFSSL_STATIC_MEMORY
+#ifdef WOLFSSL_DEBUG_MEMORY
+void* wolfSSL_Malloc(size_t size, const char* func, unsigned int line)
+#else
void* wolfSSL_Malloc(size_t size)
+#endif
{
void* res = 0;
- if (malloc_function)
+ if (malloc_function) {
+ #ifdef WOLFSSL_DEBUG_MEMORY
+ res = malloc_function(size, func, line);
+ #else
res = malloc_function(size);
- else
+ #endif
+ }
+ else {
+ #ifndef WOLFSSL_NO_MALLOC
res = malloc(size);
-
- #ifdef WOLFSSL_MALLOC_CHECK
- if (res == NULL)
- puts("wolfSSL_malloc failed");
+ #else
+ WOLFSSL_MSG("No malloc available");
#endif
-
+ }
+
+#ifdef WOLFSSL_DEBUG_MEMORY
+#if defined(WOLFSSL_DEBUG_MEMORY_PRINT) && !defined(WOLFSSL_TRACK_MEMORY)
+ printf("Alloc: %p -> %u at %s:%d\n", res, (word32)size, func, line);
+#else
+ (void)func;
+ (void)line;
+#endif
+#endif
+
+#ifdef WOLFSSL_MALLOC_CHECK
+ if (res == NULL)
+ WOLFSSL_MSG("wolfSSL_malloc failed");
+#endif
+
+#ifdef WOLFSSL_FORCE_MALLOC_FAIL_TEST
+ if (res && --gMemFailCount == 0) {
+ printf("\n---FORCED MEM FAIL TEST---\n");
+ if (free_function) {
+ #ifdef WOLFSSL_DEBUG_MEMORY
+ free_function(res, func, line);
+ #else
+ free_function(res);
+ #endif
+ }
+ else {
+ free(res); /* clear */
+ }
+ gMemFailCount = gMemFailCountSeed; /* reset */
+ return NULL;
+ }
+#endif
+
return res;
}
+#ifdef WOLFSSL_DEBUG_MEMORY
+void wolfSSL_Free(void *ptr, const char* func, unsigned int line)
+#else
void wolfSSL_Free(void *ptr)
+#endif
{
- if (free_function)
+#ifdef WOLFSSL_DEBUG_MEMORY
+#if defined(WOLFSSL_DEBUG_MEMORY_PRINT) && !defined(WOLFSSL_TRACK_MEMORY)
+ printf("Free: %p at %s:%d\n", ptr, func, line);
+#else
+ (void)func;
+ (void)line;
+#endif
+#endif
+
+ if (free_function) {
+ #ifdef WOLFSSL_DEBUG_MEMORY
+ free_function(ptr, func, line);
+ #else
free_function(ptr);
- else
+ #endif
+ }
+ else {
+ #ifndef WOLFSSL_NO_MALLOC
free(ptr);
+ #else
+ WOLFSSL_MSG("No free available");
+ #endif
+ }
}
+#ifdef WOLFSSL_DEBUG_MEMORY
+void* wolfSSL_Realloc(void *ptr, size_t size, const char* func, unsigned int line)
+#else
void* wolfSSL_Realloc(void *ptr, size_t size)
+#endif
{
void* res = 0;
- if (realloc_function)
+ if (realloc_function) {
+ #ifdef WOLFSSL_DEBUG_MEMORY
+ res = realloc_function(ptr, size, func, line);
+ #else
res = realloc_function(ptr, size);
- else
+ #endif
+ }
+ else {
+ #ifndef WOLFSSL_NO_MALLOC
res = realloc(ptr, size);
+ #else
+ WOLFSSL_MSG("No realloc available");
+ #endif
+ }
+
+ return res;
+}
+#endif /* WOLFSSL_STATIC_MEMORY */
+
+#ifdef WOLFSSL_STATIC_MEMORY
+
+struct wc_Memory {
+ byte* buffer;
+ struct wc_Memory* next;
+ word32 sz;
+};
+
+
+/* returns amount of memory used on success. On error returns negative value
+ wc_Memory** list is the list that new buckets are prepended to
+ */
+static int create_memory_buckets(byte* buffer, word32 bufSz,
+ word32 buckSz, word32 buckNum, wc_Memory** list) {
+ word32 i;
+ byte* pt = buffer;
+ int ret = 0;
+ word32 memSz = (word32)sizeof(wc_Memory);
+ word32 padSz = -(int)memSz & (WOLFSSL_STATIC_ALIGN - 1);
+
+ /* if not enough space available for bucket size then do not try */
+ if (buckSz + memSz + padSz > bufSz) {
+ return ret;
+ }
+
+ for (i = 0; i < buckNum; i++) {
+ if ((buckSz + memSz + padSz) <= (bufSz - ret)) {
+ /* create a new struct and set its values */
+ wc_Memory* mem = (struct wc_Memory*)(pt);
+ mem->sz = buckSz;
+ mem->buffer = (byte*)pt + padSz + memSz;
+ mem->next = NULL;
+
+ /* add the newly created struct to front of list */
+ if (*list == NULL) {
+ *list = mem;
+ } else {
+ mem->next = *list;
+ *list = mem;
+ }
+
+ /* advance pointer and keep track of memory used */
+ ret += buckSz + padSz + memSz;
+ pt += buckSz + padSz + memSz;
+ }
+ else {
+ break; /* not enough space left for more buckets of this size */
+ }
+ }
+
+ return ret;
+}
+
+int wolfSSL_init_memory_heap(WOLFSSL_HEAP* heap)
+{
+ word32 wc_MemSz[WOLFMEM_DEF_BUCKETS] = { WOLFMEM_BUCKETS };
+ word32 wc_Dist[WOLFMEM_DEF_BUCKETS] = { WOLFMEM_DIST };
+
+ if (heap == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ XMEMSET(heap, 0, sizeof(WOLFSSL_HEAP));
+
+ XMEMCPY(heap->sizeList, wc_MemSz, sizeof(wc_MemSz));
+ XMEMCPY(heap->distList, wc_Dist, sizeof(wc_Dist));
+
+ if (wc_InitMutex(&(heap->memory_mutex)) != 0) {
+ WOLFSSL_MSG("Error creating heap memory mutex");
+ return BAD_MUTEX_E;
+ }
+
+ return 0;
+}
+
+int wc_LoadStaticMemory(WOLFSSL_HEAP_HINT** pHint,
+ unsigned char* buf, unsigned int sz, int flag, int max)
+{
+ int ret;
+ WOLFSSL_HEAP* heap;
+ WOLFSSL_HEAP_HINT* hint;
+ word32 idx = 0;
+
+ if (pHint == NULL || buf == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ if ((sizeof(WOLFSSL_HEAP) + sizeof(WOLFSSL_HEAP_HINT)) > sz - idx) {
+ return BUFFER_E; /* not enough memory for structures */
+ }
+
+ /* check if hint has already been assigned */
+ if (*pHint == NULL) {
+ heap = (WOLFSSL_HEAP*)buf;
+ idx += sizeof(WOLFSSL_HEAP);
+ hint = (WOLFSSL_HEAP_HINT*)(buf + idx);
+ idx += sizeof(WOLFSSL_HEAP_HINT);
+
+ ret = wolfSSL_init_memory_heap(heap);
+ if (ret != 0) {
+ return ret;
+ }
+
+ XMEMSET(hint, 0, sizeof(WOLFSSL_HEAP_HINT));
+ hint->memory = heap;
+ }
+ else {
+ #ifdef WOLFSSL_HEAP_TEST
+ /* do not load in memory if test has been set */
+ if (heap == (void*)WOLFSSL_HEAP_TEST) {
+ return 0;
+ }
+ #endif
+
+ hint = (WOLFSSL_HEAP_HINT*)(*pHint);
+ heap = hint->memory;
+ }
+
+ ret = wolfSSL_load_static_memory(buf + idx, sz - idx, flag, heap);
+ if (ret != 1) {
+ WOLFSSL_MSG("Error partitioning memory");
+ return -1;
+ }
+
+ /* determine what max applies too */
+ if ((flag & WOLFMEM_IO_POOL) || (flag & WOLFMEM_IO_POOL_FIXED)) {
+ heap->maxIO = max;
+ }
+ else { /* general memory used in handshakes */
+ heap->maxHa = max;
+ }
+
+ heap->flag |= flag;
+ *pHint = hint;
+
+ (void)max;
+
+ return 0;
+}
+
+int wolfSSL_load_static_memory(byte* buffer, word32 sz, int flag,
+ WOLFSSL_HEAP* heap)
+{
+ word32 ava = sz;
+ byte* pt = buffer;
+ int ret = 0;
+ word32 memSz = (word32)sizeof(wc_Memory);
+ word32 padSz = -(int)memSz & (WOLFSSL_STATIC_ALIGN - 1);
+
+ WOLFSSL_ENTER("wolfSSL_load_static_memory");
+
+ if (buffer == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* align pt */
+ while ((wolfssl_word)pt % WOLFSSL_STATIC_ALIGN && pt < (buffer + sz)) {
+ *pt = 0x00;
+ pt++;
+ ava--;
+ }
+
+#ifdef WOLFSSL_DEBUG_MEMORY
+ printf("Allocated %d bytes for static memory @ %p\n", ava, pt);
+#endif
+
+ /* divide into chunks of memory and add them to available list */
+ while (ava >= (heap->sizeList[0] + padSz + memSz)) {
+ int i;
+ /* creating only IO buffers from memory passed in, max TLS is 16k */
+ if (flag & WOLFMEM_IO_POOL || flag & WOLFMEM_IO_POOL_FIXED) {
+ if ((ret = create_memory_buckets(pt, ava,
+ WOLFMEM_IO_SZ, 1, &(heap->io))) < 0) {
+ WOLFSSL_LEAVE("wolfSSL_load_static_memory", ret);
+ return ret;
+ }
+
+ /* check if no more room left for creating IO buffers */
+ if (ret == 0) {
+ break;
+ }
+
+ /* advance pointer in buffer for next buckets and keep track
+ of how much memory is left available */
+ pt += ret;
+ ava -= ret;
+ }
+ else {
+ /* start at largest and move to smaller buckets */
+ for (i = (WOLFMEM_MAX_BUCKETS - 1); i >= 0; i--) {
+ if ((heap->sizeList[i] + padSz + memSz) <= ava) {
+ if ((ret = create_memory_buckets(pt, ava, heap->sizeList[i],
+ heap->distList[i], &(heap->ava[i]))) < 0) {
+ WOLFSSL_LEAVE("wolfSSL_load_static_memory", ret);
+ return ret;
+ }
+
+ /* advance pointer in buffer for next buckets and keep track
+ of how much memory is left available */
+ pt += ret;
+ ava -= ret;
+ }
+ }
+ }
+ }
+
+ return 1;
+}
+
+
+/* returns the size of management memory needed for each bucket.
+ * This is memory that is used to keep track of and align memory buckets. */
+int wolfSSL_MemoryPaddingSz(void)
+{
+ word32 memSz = (word32)sizeof(wc_Memory);
+ word32 padSz = -(int)memSz & (WOLFSSL_STATIC_ALIGN - 1);
+ return memSz + padSz;
+}
+
+
+/* Used to calculate memory size for optimum use with buckets.
+ returns the suggested size rounded down to the nearest bucket. */
+int wolfSSL_StaticBufferSz(byte* buffer, word32 sz, int flag)
+{
+ word32 bucketSz[WOLFMEM_MAX_BUCKETS] = {WOLFMEM_BUCKETS};
+ word32 distList[WOLFMEM_MAX_BUCKETS] = {WOLFMEM_DIST};
+
+ word32 ava = sz;
+ byte* pt = buffer;
+ word32 memSz = (word32)sizeof(wc_Memory);
+ word32 padSz = -(int)memSz & (WOLFSSL_STATIC_ALIGN - 1);
+
+ WOLFSSL_ENTER("wolfSSL_static_size");
+
+ if (buffer == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* align pt */
+ while ((wolfssl_word)pt % WOLFSSL_STATIC_ALIGN && pt < (buffer + sz)) {
+ pt++;
+ ava--;
+ }
+
+ /* creating only IO buffers from memory passed in, max TLS is 16k */
+ if (flag & WOLFMEM_IO_POOL || flag & WOLFMEM_IO_POOL_FIXED) {
+ if (ava < (memSz + padSz + WOLFMEM_IO_SZ)) {
+ return 0; /* not enough room for even one bucket */
+ }
+
+ ava = ava % (memSz + padSz + WOLFMEM_IO_SZ);
+ }
+ else {
+ int i, k;
+
+ if (ava < (bucketSz[0] + padSz + memSz)) {
+ return 0; /* not enough room for even one bucket */
+ }
+
+ while ((ava >= (bucketSz[0] + padSz + memSz)) && (ava > 0)) {
+ /* start at largest and move to smaller buckets */
+ for (i = (WOLFMEM_MAX_BUCKETS - 1); i >= 0; i--) {
+ for (k = distList[i]; k > 0; k--) {
+ if ((bucketSz[i] + padSz + memSz) <= ava) {
+ ava -= bucketSz[i] + padSz + memSz;
+ }
+ }
+ }
+ }
+ }
+
+ return sz - ava; /* round down */
+}
+
+
+int FreeFixedIO(WOLFSSL_HEAP* heap, wc_Memory** io)
+{
+ WOLFSSL_MSG("Freeing fixed IO buffer");
+
+ /* check if fixed buffer was set */
+ if (*io == NULL) {
+ return 1;
+ }
+
+ if (heap == NULL) {
+ WOLFSSL_MSG("No heap to return fixed IO too");
+ }
+ else {
+ /* put IO buffer back into IO pool */
+ (*io)->next = heap->io;
+ heap->io = *io;
+ *io = NULL;
+ }
+
+ return 1;
+}
+
+
+int SetFixedIO(WOLFSSL_HEAP* heap, wc_Memory** io)
+{
+ WOLFSSL_MSG("Setting fixed IO for SSL");
+ if (heap == NULL) {
+ return MEMORY_E;
+ }
+
+ *io = heap->io;
+
+ if (*io != NULL) {
+ heap->io = (*io)->next;
+ (*io)->next = NULL;
+ }
+ else { /* failed to grab an IO buffer */
+ return 0;
+ }
+
+ return 1;
+}
+
+
+int wolfSSL_GetMemStats(WOLFSSL_HEAP* heap, WOLFSSL_MEM_STATS* stats)
+{
+ word32 i;
+ wc_Memory* pt;
+
+ XMEMSET(stats, 0, sizeof(WOLFSSL_MEM_STATS));
+
+ stats->totalAlloc = heap->alloc;
+ stats->totalFr = heap->frAlc;
+ stats->curAlloc = stats->totalAlloc - stats->totalFr;
+ stats->maxHa = heap->maxHa;
+ stats->maxIO = heap->maxIO;
+ for (i = 0; i < WOLFMEM_MAX_BUCKETS; i++) {
+ stats->blockSz[i] = heap->sizeList[i];
+ for (pt = heap->ava[i]; pt != NULL; pt = pt->next) {
+ stats->avaBlock[i] += 1;
+ }
+ }
+
+ for (pt = heap->io; pt != NULL; pt = pt->next) {
+ stats->avaIO++;
+ }
+
+ stats->flag = heap->flag; /* flag used */
+
+ return 1;
+}
+
+
+#ifdef WOLFSSL_DEBUG_MEMORY
+void* wolfSSL_Malloc(size_t size, void* heap, int type, const char* func, unsigned int line)
+#else
+void* wolfSSL_Malloc(size_t size, void* heap, int type)
+#endif
+{
+ void* res = 0;
+ wc_Memory* pt = NULL;
+ int i;
+
+ /* check for testing heap hint was set */
+#ifdef WOLFSSL_HEAP_TEST
+ if (heap == (void*)WOLFSSL_HEAP_TEST) {
+ return malloc(size);
+ }
+#endif
+
+ /* if no heap hint then use dynamic memory*/
+ if (heap == NULL) {
+ #ifdef WOLFSSL_HEAP_TEST
+ /* allow using malloc for creating ctx and method */
+ if (type == DYNAMIC_TYPE_CTX || type == DYNAMIC_TYPE_METHOD ||
+ type == DYNAMIC_TYPE_CERT_MANAGER) {
+ WOLFSSL_MSG("ERROR allowing null heap hint for ctx/method\n");
+ res = malloc(size);
+ }
+ else {
+ WOLFSSL_MSG("ERROR null heap hint passed into XMALLOC\n");
+ res = NULL;
+ }
+ #else
+ #ifndef WOLFSSL_NO_MALLOC
+ #ifdef FREERTOS
+ res = pvPortMalloc(size);
+ #else
+ res = malloc(size);
+ #endif
+ #else
+ WOLFSSL_MSG("No heap hint found to use and no malloc");
+ #ifdef WOLFSSL_DEBUG_MEMORY
+ printf("ERROR: at %s:%d\n", func, line);
+ #endif
+ #endif /* WOLFSSL_NO_MALLOC */
+ #endif /* WOLFSSL_HEAP_TEST */
+ }
+ else {
+ WOLFSSL_HEAP_HINT* hint = (WOLFSSL_HEAP_HINT*)heap;
+ WOLFSSL_HEAP* mem = hint->memory;
+
+ if (wc_LockMutex(&(mem->memory_mutex)) != 0) {
+ WOLFSSL_MSG("Bad memory_mutex lock");
+ return NULL;
+ }
+
+ /* case of using fixed IO buffers */
+ if (mem->flag & WOLFMEM_IO_POOL_FIXED &&
+ (type == DYNAMIC_TYPE_OUT_BUFFER ||
+ type == DYNAMIC_TYPE_IN_BUFFER)) {
+ if (type == DYNAMIC_TYPE_OUT_BUFFER) {
+ pt = hint->outBuf;
+ }
+ if (type == DYNAMIC_TYPE_IN_BUFFER) {
+ pt = hint->inBuf;
+ }
+ }
+ else {
+ /* check if using IO pool flag */
+ if (mem->flag & WOLFMEM_IO_POOL &&
+ (type == DYNAMIC_TYPE_OUT_BUFFER ||
+ type == DYNAMIC_TYPE_IN_BUFFER)) {
+ if (mem->io != NULL) {
+ pt = mem->io;
+ mem->io = pt->next;
+ }
+ }
+
+ /* general static memory */
+ if (pt == NULL) {
+ for (i = 0; i < WOLFMEM_MAX_BUCKETS; i++) {
+ if ((word32)size < mem->sizeList[i]) {
+ if (mem->ava[i] != NULL) {
+ pt = mem->ava[i];
+ mem->ava[i] = pt->next;
+ break;
+ }
+ #ifdef WOLFSSL_DEBUG_STATIC_MEMORY
+ else {
+ printf("Size: %ld, Empty: %d\n", size,
+ mem->sizeList[i]);
+ }
+ #endif
+ }
+ }
+ }
+ }
+
+ if (pt != NULL) {
+ mem->inUse += pt->sz;
+ mem->alloc += 1;
+ res = pt->buffer;
+
+ #ifdef WOLFSSL_DEBUG_MEMORY
+ printf("Alloc: %p -> %u at %s:%d\n", pt->buffer, pt->sz, func, line);
+ #endif
+
+ /* keep track of connection statistics if flag is set */
+ if (mem->flag & WOLFMEM_TRACK_STATS) {
+ WOLFSSL_MEM_CONN_STATS* stats = hint->stats;
+ if (stats != NULL) {
+ stats->curMem += pt->sz;
+ if (stats->peakMem < stats->curMem) {
+ stats->peakMem = stats->curMem;
+ }
+ stats->curAlloc++;
+ if (stats->peakAlloc < stats->curAlloc) {
+ stats->peakAlloc = stats->curAlloc;
+ }
+ stats->totalAlloc++;
+ }
+ }
+ }
+ else {
+ WOLFSSL_MSG("ERROR ran out of static memory");
+ #ifdef WOLFSSL_DEBUG_MEMORY
+ printf("Looking for %lu bytes at %s:%d\n", size, func, line);
+ #endif
+ }
+
+ wc_UnLockMutex(&(mem->memory_mutex));
+ }
+
+ #ifdef WOLFSSL_MALLOC_CHECK
+ if ((wolfssl_word)res % WOLFSSL_STATIC_ALIGN) {
+ WOLFSSL_MSG("ERROR memory is not aligned");
+ res = NULL;
+ }
+ #endif
+
+
+ (void)i;
+ (void)pt;
+ (void)type;
return res;
}
+
+#ifdef WOLFSSL_DEBUG_MEMORY
+void wolfSSL_Free(void *ptr, void* heap, int type, const char* func, unsigned int line)
+#else
+void wolfSSL_Free(void *ptr, void* heap, int type)
+#endif
+{
+ int i;
+ wc_Memory* pt;
+
+ if (ptr) {
+ /* check for testing heap hint was set */
+ #ifdef WOLFSSL_HEAP_TEST
+ if (heap == (void*)WOLFSSL_HEAP_TEST) {
+ return free(ptr);
+ }
+ #endif
+
+ if (heap == NULL) {
+ #ifdef WOLFSSL_HEAP_TEST
+ /* allow using malloc for creating ctx and method */
+ if (type == DYNAMIC_TYPE_CTX || type == DYNAMIC_TYPE_METHOD ||
+ type == DYNAMIC_TYPE_CERT_MANAGER) {
+ WOLFSSL_MSG("ERROR allowing null heap hint for ctx/method\n");
+ }
+ else {
+ WOLFSSL_MSG("ERROR null heap hint passed into XFREE\n");
+ }
+ #endif
+ #ifndef WOLFSSL_NO_MALLOC
+ #ifdef FREERTOS
+ vPortFree(ptr);
+ #else
+ free(ptr);
+ #endif
+ #else
+ WOLFSSL_MSG("Error trying to call free when turned off");
+ #endif /* WOLFSSL_NO_MALLOC */
+ }
+ else {
+ WOLFSSL_HEAP_HINT* hint = (WOLFSSL_HEAP_HINT*)heap;
+ WOLFSSL_HEAP* mem = hint->memory;
+ word32 padSz = -(int)sizeof(wc_Memory) & (WOLFSSL_STATIC_ALIGN - 1);
+
+ /* get memory struct and add it to available list */
+ pt = (wc_Memory*)((byte*)ptr - sizeof(wc_Memory) - padSz);
+ if (wc_LockMutex(&(mem->memory_mutex)) != 0) {
+ WOLFSSL_MSG("Bad memory_mutex lock");
+ return;
+ }
+
+ /* case of using fixed IO buffers */
+ if (mem->flag & WOLFMEM_IO_POOL_FIXED &&
+ (type == DYNAMIC_TYPE_OUT_BUFFER ||
+ type == DYNAMIC_TYPE_IN_BUFFER)) {
+ /* fixed IO pools are free'd at the end of SSL lifetime
+ using FreeFixedIO(WOLFSSL_HEAP* heap, wc_Memory** io) */
+ }
+ else if (mem->flag & WOLFMEM_IO_POOL && pt->sz == WOLFMEM_IO_SZ &&
+ (type == DYNAMIC_TYPE_OUT_BUFFER ||
+ type == DYNAMIC_TYPE_IN_BUFFER)) {
+ pt->next = mem->io;
+ mem->io = pt;
+ }
+ else { /* general memory free */
+ for (i = 0; i < WOLFMEM_MAX_BUCKETS; i++) {
+ if (pt->sz == mem->sizeList[i]) {
+ pt->next = mem->ava[i];
+ mem->ava[i] = pt;
+ break;
+ }
+ }
+ }
+ mem->inUse -= pt->sz;
+ mem->frAlc += 1;
+
+ #ifdef WOLFSSL_DEBUG_MEMORY
+ printf("Free: %p -> %u at %s:%d\n", pt->buffer, pt->sz, func, line);
+ #endif
+
+ /* keep track of connection statistics if flag is set */
+ if (mem->flag & WOLFMEM_TRACK_STATS) {
+ WOLFSSL_MEM_CONN_STATS* stats = hint->stats;
+ if (stats != NULL) {
+ /* avoid under flow */
+ if (stats->curMem > pt->sz) {
+ stats->curMem -= pt->sz;
+ }
+ else {
+ stats->curMem = 0;
+ }
+
+ if (stats->curAlloc > 0) {
+ stats->curAlloc--;
+ }
+ stats->totalFr++;
+ }
+ }
+ wc_UnLockMutex(&(mem->memory_mutex));
+ }
+ }
+
+ (void)i;
+ (void)pt;
+ (void)type;
+}
+
+#ifdef WOLFSSL_DEBUG_MEMORY
+void* wolfSSL_Realloc(void *ptr, size_t size, void* heap, int type, const char* func, unsigned int line)
+#else
+void* wolfSSL_Realloc(void *ptr, size_t size, void* heap, int type)
+#endif
+{
+ void* res = 0;
+ wc_Memory* pt = NULL;
+ word32 prvSz;
+ int i;
+
+ /* check for testing heap hint was set */
+#ifdef WOLFSSL_HEAP_TEST
+ if (heap == (void*)WOLFSSL_HEAP_TEST) {
+ return realloc(ptr, size);
+ }
+#endif
+
+ if (heap == NULL) {
+ #ifdef WOLFSSL_HEAP_TEST
+ WOLFSSL_MSG("ERROR null heap hint passed in to XREALLOC\n");
+ #endif
+ #ifndef WOLFSSL_NO_MALLOC
+ res = realloc(ptr, size);
+ #else
+ WOLFSSL_MSG("NO heap found to use for realloc");
+ #endif /* WOLFSSL_NO_MALLOC */
+ }
+ else {
+ WOLFSSL_HEAP_HINT* hint = (WOLFSSL_HEAP_HINT*)heap;
+ WOLFSSL_HEAP* mem = hint->memory;
+ word32 padSz = -(int)sizeof(wc_Memory) & (WOLFSSL_STATIC_ALIGN - 1);
+
+ if (ptr == NULL) {
+ #ifdef WOLFSSL_DEBUG_MEMORY
+ return wolfSSL_Malloc(size, heap, type, func, line);
+ #else
+ return wolfSSL_Malloc(size, heap, type);
+ #endif
+ }
+
+ if (wc_LockMutex(&(mem->memory_mutex)) != 0) {
+ WOLFSSL_MSG("Bad memory_mutex lock");
+ return NULL;
+ }
+
+ /* case of using fixed IO buffers or IO pool */
+ if (((mem->flag & WOLFMEM_IO_POOL)||(mem->flag & WOLFMEM_IO_POOL_FIXED))
+ && (type == DYNAMIC_TYPE_OUT_BUFFER ||
+ type == DYNAMIC_TYPE_IN_BUFFER)) {
+ /* no realloc, is fixed size */
+ pt = (wc_Memory*)((byte*)ptr - padSz - sizeof(wc_Memory));
+ if (pt->sz < size) {
+ WOLFSSL_MSG("Error IO memory was not large enough");
+ res = NULL; /* return NULL in error case */
+ }
+ res = pt->buffer;
+ }
+ else {
+ /* general memory */
+ for (i = 0; i < WOLFMEM_MAX_BUCKETS; i++) {
+ if ((word32)size < mem->sizeList[i]) {
+ if (mem->ava[i] != NULL) {
+ pt = mem->ava[i];
+ mem->ava[i] = pt->next;
+ break;
+ }
+ }
+ }
+
+ if (pt != NULL && res == NULL) {
+ res = pt->buffer;
+
+ /* copy over original information and free ptr */
+ prvSz = ((wc_Memory*)((byte*)ptr - padSz -
+ sizeof(wc_Memory)))->sz;
+ prvSz = (prvSz > pt->sz)? pt->sz: prvSz;
+ XMEMCPY(pt->buffer, ptr, prvSz);
+ mem->inUse += pt->sz;
+ mem->alloc += 1;
+
+ /* free memory that was previously being used */
+ wc_UnLockMutex(&(mem->memory_mutex));
+ wolfSSL_Free(ptr, heap, type
+ #ifdef WOLFSSL_DEBUG_MEMORY
+ , func, line
+ #endif
+ );
+ if (wc_LockMutex(&(mem->memory_mutex)) != 0) {
+ WOLFSSL_MSG("Bad memory_mutex lock");
+ return NULL;
+ }
+ }
+ }
+ wc_UnLockMutex(&(mem->memory_mutex));
+ }
+
+ #ifdef WOLFSSL_MALLOC_CHECK
+ if ((wolfssl_word)res % WOLFSSL_STATIC_ALIGN) {
+ WOLFSSL_MSG("ERROR memory is not aligned");
+ res = NULL;
+ }
+ #endif
+
+ (void)i;
+ (void)pt;
+ (void)type;
+
+ return res;
+}
+#endif /* WOLFSSL_STATIC_MEMORY */
+
#endif /* USE_WOLFSSL_MEMORY */
@@ -125,7 +965,7 @@ void* wolfSSL_Realloc(void *ptr, size_t size)
/* allow simple per thread in and out pools */
-/* use 17k size sense max record size is 16k plus overhead */
+/* use 17k size since max record size is 16k plus overhead */
static THREAD_LS_T byte pool_in[17*1024];
static THREAD_LS_T byte pool_out[17*1024];
@@ -172,9 +1012,7 @@ void* XREALLOC(void *p, size_t n, void* heap, int type)
return realloc(p, n);
}
-
-/* unit api calls, let's make sure visible with WOLFSSL_API */
-WOLFSSL_API void XFREE(void *p, void* heap, int type)
+void XFREE(void *p, void* heap, int type)
{
(void)heap;
@@ -189,3 +1027,100 @@ WOLFSSL_API void XFREE(void *p, void* heap, int type)
#endif /* HAVE_IO_POOL */
+#ifdef WOLFSSL_MEMORY_LOG
+void *xmalloc(size_t n, void* heap, int type, const char* func,
+ const char* file, unsigned int line)
+{
+ void* p;
+ word32* p32;
+
+ if (malloc_function)
+ p32 = malloc_function(n + sizeof(word32) * 4);
+ else
+ p32 = malloc(n + sizeof(word32) * 4);
+
+ p32[0] = (word32)n;
+ p = (void*)(p32 + 4);
+
+ fprintf(stderr, "Alloc: %p -> %u (%d) at %s:%s:%u\n", p, (word32)n, type,
+ func, file, line);
+
+ (void)heap;
+
+ return p;
+}
+void *xrealloc(void *p, size_t n, void* heap, int type, const char* func,
+ const char* file, unsigned int line)
+{
+ void* newp = NULL;
+ word32* p32;
+ word32* oldp32 = NULL;
+ word32 oldLen;
+
+ if (p != NULL) {
+ oldp32 = (word32*)p;
+ oldp32 -= 4;
+ oldLen = oldp32[0];
+ }
+
+ if (realloc_function)
+ p32 = realloc_function(oldp32, n + sizeof(word32) * 4);
+ else
+ p32 = realloc(oldp32, n + sizeof(word32) * 4);
+
+ if (p32 != NULL) {
+ p32[0] = (word32)n;
+ newp = (void*)(p32 + 4);
+
+ fprintf(stderr, "Alloc: %p -> %u (%d) at %s:%s:%u\n", newp, (word32)n,
+ type, func, file, line);
+ if (p != NULL) {
+ fprintf(stderr, "Free: %p -> %u (%d) at %s:%s:%u\n", p, oldLen,
+ type, func, file, line);
+ }
+ }
+
+ (void)heap;
+
+ return newp;
+}
+void xfree(void *p, void* heap, int type, const char* func, const char* file,
+ unsigned int line)
+{
+ word32* p32 = (word32*)p;
+
+ if (p != NULL) {
+ p32 -= 4;
+
+ fprintf(stderr, "Free: %p -> %u (%d) at %s:%s:%u\n", p, p32[0], type,
+ func, file, line);
+
+ if (free_function)
+ free_function(p32);
+ else
+ free(p32);
+ }
+
+ (void)heap;
+}
+#endif /* WOLFSSL_MEMORY_LOG */
+
+#ifdef WOLFSSL_STACK_LOG
+/* Note: this code only works with GCC using -finstrument-functions. */
+void __attribute__((no_instrument_function))
+ __cyg_profile_func_enter(void *func, void *caller)
+{
+ register void* sp asm("sp");
+ fprintf(stderr, "ENTER: %016lx %p\n", (unsigned long)(size_t)func, sp);
+ (void)caller;
+}
+
+void __attribute__((no_instrument_function))
+ __cyg_profile_func_exit(void *func, void *caller)
+{
+ register void* sp asm("sp");
+ fprintf(stderr, "EXIT: %016lx %p\n", (unsigned long)(size_t)func, sp);
+ (void)caller;
+}
+#endif
+
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/misc.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/misc.c
index 8a79a4c29..8f2402c9c 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/misc.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/misc.c
@@ -1,8 +1,8 @@
/* misc.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -30,7 +31,7 @@
#include <wolfssl/wolfcrypt/misc.h>
-/* inlining these functions is a huge speed increase and a small size decrease,
+/* inlining these functions is a huge speed increase and a small size decrease,
because the functions are smaller than function call setup/cleanup, e.g.,
md5 benchmark is twice as fast with inline. If you don't want it, then
define NO_INLINE and compile this file into wolfssl, otherwise it's used as
@@ -38,9 +39,22 @@
*/
#ifdef NO_INLINE
- #define STATIC
+ #define WC_STATIC
+#else
+ #define WC_STATIC static
+#endif
+
+/* Check for if compiling misc.c when not needed. */
+#if !defined(WOLFSSL_MISC_INCLUDED) && !defined(NO_INLINE)
+ #ifndef WOLFSSL_IGNORE_FILE_WARN
+ // #warning misc.c does not need to be compiled when using inline (NO_INLINE not defined)
+ #endif
+
#else
- #define STATIC static
+
+
+#if defined(__ICCARM__)
+ #include <intrinsics.h>
#endif
@@ -52,25 +66,25 @@
* i.e., _rotl and _rotr */
#pragma intrinsic(_lrotl, _lrotr)
- STATIC INLINE word32 rotlFixed(word32 x, word32 y)
+ WC_STATIC WC_INLINE word32 rotlFixed(word32 x, word32 y)
{
return y ? _lrotl(x, y) : x;
}
- STATIC INLINE word32 rotrFixed(word32 x, word32 y)
+ WC_STATIC WC_INLINE word32 rotrFixed(word32 x, word32 y)
{
return y ? _lrotr(x, y) : x;
}
#else /* generic */
- STATIC INLINE word32 rotlFixed(word32 x, word32 y)
+ WC_STATIC WC_INLINE word32 rotlFixed(word32 x, word32 y)
{
return (x << y) | (x >> (sizeof(y) * 8 - y));
- }
+ }
- STATIC INLINE word32 rotrFixed(word32 x, word32 y)
+ WC_STATIC WC_INLINE word32 rotrFixed(word32 x, word32 y)
{
return (x >> y) | (x << (sizeof(y) * 8 - y));
}
@@ -78,13 +92,18 @@
#endif
-STATIC INLINE word32 ByteReverseWord32(word32 value)
+WC_STATIC WC_INLINE word32 ByteReverseWord32(word32 value)
{
#ifdef PPC_INTRINSICS
/* PPC: load reverse indexed instruction */
return (word32)__lwbrx(&value,0);
+#elif defined(__ICCARM__)
+ return (word32)__REV(value);
#elif defined(KEIL_INTRINSICS)
return (word32)__rev(value);
+#elif defined(WOLF_ALLOW_BUILTIN) && \
+ defined(__GNUC_PREREQ) && __GNUC_PREREQ(4, 3)
+ return (word32)__builtin_bswap32(value);
#elif defined(FAST_ROTATE)
/* 5 instructions with rotate instruction, 9 without */
return (rotrFixed(value, 8U) & 0xff00ff00) |
@@ -97,7 +116,7 @@ STATIC INLINE word32 ByteReverseWord32(word32 value)
}
-STATIC INLINE void ByteReverseWords(word32* out, const word32* in,
+WC_STATIC WC_INLINE void ByteReverseWords(word32* out, const word32* in,
word32 byteCount)
{
word32 count = byteCount/(word32)sizeof(word32), i;
@@ -108,26 +127,28 @@ STATIC INLINE void ByteReverseWords(word32* out, const word32* in,
}
-#ifdef WORD64_AVAILABLE
+#if defined(WORD64_AVAILABLE) && !defined(WOLFSSL_NO_WORD64_OPS)
-STATIC INLINE word64 rotlFixed64(word64 x, word64 y)
+WC_STATIC WC_INLINE word64 rotlFixed64(word64 x, word64 y)
{
return (x << y) | (x >> (sizeof(y) * 8 - y));
-}
+}
-STATIC INLINE word64 rotrFixed64(word64 x, word64 y)
+WC_STATIC WC_INLINE word64 rotrFixed64(word64 x, word64 y)
{
return (x >> y) | (x << (sizeof(y) * 8 - y));
}
-STATIC INLINE word64 ByteReverseWord64(word64 value)
+WC_STATIC WC_INLINE word64 ByteReverseWord64(word64 value)
{
-#ifdef WOLFCRYPT_SLOW_WORD64
- return (word64)(ByteReverseWord32((word32)value)) << 32 |
- ByteReverseWord32((word32)(value>>32));
+#if defined(WOLF_ALLOW_BUILTIN) && defined(__GNUC_PREREQ) && __GNUC_PREREQ(4, 3)
+ return (word64)__builtin_bswap64(value);
+#elif defined(WOLFCRYPT_SLOW_WORD64)
+ return (word64)((word64)ByteReverseWord32((word32) value)) << 32 |
+ (word64)ByteReverseWord32((word32)(value >> 32));
#else
value = ((value & W64LIT(0xFF00FF00FF00FF00)) >> 8) |
((value & W64LIT(0x00FF00FF00FF00FF)) << 8);
@@ -138,7 +159,7 @@ STATIC INLINE word64 ByteReverseWord64(word64 value)
}
-STATIC INLINE void ByteReverseWords64(word64* out, const word64* in,
+WC_STATIC WC_INLINE void ByteReverseWords64(word64* out, const word64* in,
word32 byteCount)
{
word32 count = byteCount/(word32)sizeof(word64), i;
@@ -148,10 +169,10 @@ STATIC INLINE void ByteReverseWords64(word64* out, const word64* in,
}
-#endif /* WORD64_AVAILABLE */
+#endif /* WORD64_AVAILABLE && !WOLFSSL_NO_WORD64_OPS */
-
-STATIC INLINE void XorWords(wolfssl_word* r, const wolfssl_word* a, word32 n)
+#ifndef WOLFSSL_NO_XOR_OPS
+WC_STATIC WC_INLINE void XorWords(wolfssl_word* r, const wolfssl_word* a, word32 n)
{
word32 i;
@@ -159,7 +180,7 @@ STATIC INLINE void XorWords(wolfssl_word* r, const wolfssl_word* a, word32 n)
}
-STATIC INLINE void xorbuf(void* buf, const void* mask, word32 count)
+WC_STATIC WC_INLINE void xorbuf(void* buf, const void* mask, word32 count)
{
if (((wolfssl_word)buf | (wolfssl_word)mask | count) % WOLFSSL_WORD_SIZE == 0)
XorWords( (wolfssl_word*)buf,
@@ -172,19 +193,37 @@ STATIC INLINE void xorbuf(void* buf, const void* mask, word32 count)
for (i = 0; i < count; i++) b[i] ^= m[i];
}
}
+#endif
-
+#ifndef WOLFSSL_NO_FORCE_ZERO
/* Make sure compiler doesn't skip */
-STATIC INLINE void ForceZero(const void* mem, word32 len)
+WC_STATIC WC_INLINE void ForceZero(const void* mem, word32 len)
{
volatile byte* z = (volatile byte*)mem;
+#if defined(WOLFSSL_X86_64_BUILD) && defined(WORD64_AVAILABLE)
+ volatile word64* w;
+ #ifndef WOLFSSL_UNALIGNED_64BIT_ACCESS
+ word32 l = (sizeof(word64) - ((size_t)z & (sizeof(word64)-1))) &
+ (sizeof(word64)-1);
+
+ if (len < l) l = len;
+ len -= l;
+ while (l--) *z++ = 0;
+ #endif
+ for (w = (volatile word64*)z; len >= sizeof(*w); len -= sizeof(*w))
+ *w++ = 0;
+ z = (volatile byte*)w;
+#endif
+
while (len--) *z++ = 0;
}
+#endif
+#ifndef WOLFSSL_NO_CONST_CMP
/* check all length bytes for equality, return 0 on success */
-STATIC INLINE int ConstantCompare(const byte* a, const byte* b, int length)
+WC_STATIC WC_INLINE int ConstantCompare(const byte* a, const byte* b, int length)
{
int i;
int compareSum = 0;
@@ -195,7 +234,172 @@ STATIC INLINE int ConstantCompare(const byte* a, const byte* b, int length)
return compareSum;
}
+#endif
+
+
+#ifndef WOLFSSL_HAVE_MIN
+ #define WOLFSSL_HAVE_MIN
+ #if defined(HAVE_FIPS) && !defined(min) /* so ifdef check passes */
+ #define min min
+ #endif
+ WC_STATIC WC_INLINE word32 min(word32 a, word32 b)
+ {
+ return a > b ? b : a;
+ }
+#endif /* !WOLFSSL_HAVE_MIN */
+
+#ifndef WOLFSSL_HAVE_MAX
+ #define WOLFSSL_HAVE_MAX
+ #if defined(HAVE_FIPS) && !defined(max) /* so ifdef check passes */
+ #define max max
+ #endif
+ WC_STATIC WC_INLINE word32 max(word32 a, word32 b)
+ {
+ return a > b ? a : b;
+ }
+#endif /* !WOLFSSL_HAVE_MAX */
+
+#ifndef WOLFSSL_NO_INT_ENCODE
+/* converts a 32 bit integer to 24 bit */
+WC_STATIC WC_INLINE void c32to24(word32 in, word24 out)
+{
+ out[0] = (in >> 16) & 0xff;
+ out[1] = (in >> 8) & 0xff;
+ out[2] = in & 0xff;
+}
+
+/* convert 16 bit integer to opaque */
+WC_STATIC WC_INLINE void c16toa(word16 wc_u16, byte* c)
+{
+ c[0] = (wc_u16 >> 8) & 0xff;
+ c[1] = wc_u16 & 0xff;
+}
+
+/* convert 32 bit integer to opaque */
+WC_STATIC WC_INLINE void c32toa(word32 wc_u32, byte* c)
+{
+ c[0] = (wc_u32 >> 24) & 0xff;
+ c[1] = (wc_u32 >> 16) & 0xff;
+ c[2] = (wc_u32 >> 8) & 0xff;
+ c[3] = wc_u32 & 0xff;
+}
+#endif
+
+#ifndef WOLFSSL_NO_INT_DECODE
+/* convert a 24 bit integer into a 32 bit one */
+WC_STATIC WC_INLINE void c24to32(const word24 wc_u24, word32* wc_u32)
+{
+ *wc_u32 = ((word32)wc_u24[0] << 16) | (wc_u24[1] << 8) | wc_u24[2];
+}
+
+
+/* convert opaque to 24 bit integer */
+WC_STATIC WC_INLINE void ato24(const byte* c, word32* wc_u24)
+{
+ *wc_u24 = ((word32)c[0] << 16) | (c[1] << 8) | c[2];
+}
+
+/* convert opaque to 16 bit integer */
+WC_STATIC WC_INLINE void ato16(const byte* c, word16* wc_u16)
+{
+ *wc_u16 = (word16) ((c[0] << 8) | (c[1]));
+}
+
+/* convert opaque to 32 bit integer */
+WC_STATIC WC_INLINE void ato32(const byte* c, word32* wc_u32)
+{
+ *wc_u32 = ((word32)c[0] << 24) | ((word32)c[1] << 16) | (c[2] << 8) | c[3];
+}
+
+
+WC_STATIC WC_INLINE word32 btoi(byte b)
+{
+ return (word32)(b - 0x30);
+}
+#endif
+
+
+#ifndef WOLFSSL_NO_CT_OPS
+/* Constant time - mask set when a > b. */
+WC_STATIC WC_INLINE byte ctMaskGT(int a, int b)
+{
+ return (((word32)a - b - 1) >> 31) - 1;
+}
+
+/* Constant time - mask set when a >= b. */
+WC_STATIC WC_INLINE byte ctMaskGTE(int a, int b)
+{
+ return (((word32)a - b ) >> 31) - 1;
+}
+
+/* Constant time - mask set when a >= b. */
+WC_STATIC WC_INLINE int ctMaskIntGTE(int a, int b)
+{
+ return (((word32)a - b ) >> 31) - 1;
+}
+
+/* Constant time - mask set when a < b. */
+WC_STATIC WC_INLINE byte ctMaskLT(int a, int b)
+{
+ return (((word32)b - a - 1) >> 31) - 1;
+}
+
+/* Constant time - mask set when a <= b. */
+WC_STATIC WC_INLINE byte ctMaskLTE(int a, int b)
+{
+ return (((word32)b - a ) >> 31) - 1;
+}
+
+/* Constant time - mask set when a == b. */
+WC_STATIC WC_INLINE byte ctMaskEq(int a, int b)
+{
+ return (~ctMaskGT(a, b)) & (~ctMaskLT(a, b));
+}
+
+WC_STATIC WC_INLINE word16 ctMask16GT(int a, int b)
+{
+ return (((word32)a - b - 1) >> 31) - 1;
+}
+
+WC_STATIC WC_INLINE word16 ctMask16LT(int a, int b)
+{
+ return (((word32)a - b - 1) >> 31) - 1;
+}
+
+WC_STATIC WC_INLINE word16 ctMask16Eq(int a, int b)
+{
+ return (~ctMask16GT(a, b)) & (~ctMask16LT(a, b));
+}
+
+/* Constant time - mask set when a != b. */
+WC_STATIC WC_INLINE byte ctMaskNotEq(int a, int b)
+{
+ return ctMaskGT(a, b) | ctMaskLT(a, b);
+}
+
+/* Constant time - select a when mask is set and b otherwise. */
+WC_STATIC WC_INLINE byte ctMaskSel(byte m, byte a, byte b)
+{
+ return (b & ((byte)~(word32)m)) | (a & m);
+}
+
+/* Constant time - select integer a when mask is set and integer b otherwise. */
+WC_STATIC WC_INLINE int ctMaskSelInt(byte m, int a, int b)
+{
+ return (b & (~(signed int)(signed char)m)) |
+ (a & ( (signed int)(signed char)m));
+}
+
+/* Constant time - bit set when a <= b. */
+WC_STATIC WC_INLINE byte ctSetLTE(int a, int b)
+{
+ return ((word32)a - b - 1) >> 31;
+}
+#endif
+
+
+#undef WC_STATIC
-#undef STATIC
+#endif /* !WOLFSSL_MISC_INCLUDED && !NO_INLINE */
#endif /* WOLF_CRYPT_MISC_C */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/pkcs12.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/pkcs12.c
new file mode 100644
index 000000000..8ae500417
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/pkcs12.c
@@ -0,0 +1,2403 @@
+/* pkcs12.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#if !defined(NO_ASN) && !defined(NO_PWDBASED) && defined(HAVE_PKCS12)
+
+#include <wolfssl/wolfcrypt/asn.h>
+#include <wolfssl/wolfcrypt/asn_public.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/hmac.h>
+#include <wolfssl/wolfcrypt/logging.h>
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+#include <wolfssl/wolfcrypt/pkcs12.h>
+#include <wolfssl/wolfcrypt/pwdbased.h>
+#include <wolfssl/wolfcrypt/hash.h>
+
+
+#define ERROR_OUT(err, eLabel) { ret = (err); goto eLabel; }
+
+enum {
+ WC_PKCS12_KeyBag = 667,
+ WC_PKCS12_ShroudedKeyBag = 668,
+ WC_PKCS12_CertBag = 669,
+ WC_PKCS12_CertBag_Type1 = 675,
+ WC_PKCS12_CrlBag = 670,
+ WC_PKCS12_SecretBag = 671,
+ WC_PKCS12_SafeContentsBag = 672,
+ WC_PKCS12_DATA = 651,
+ WC_PKCS12_ENCRYPTED_DATA = 656,
+
+ WC_PKCS12_DATA_OBJ_SZ = 11,
+};
+
+static const byte WC_PKCS12_ENCRYPTED_OID[] =
+ {0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x07, 0x06};
+static const byte WC_PKCS12_DATA_OID[] =
+ {0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x07, 0x01};
+static const byte WC_PKCS12_CertBag_Type1_OID[] =
+ {0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x16, 0x01};
+static const byte WC_PKCS12_CertBag_OID[] =
+ {0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x0c, 0x0a, 0x01, 0x03};
+static const byte WC_PKCS12_KeyBag_OID[] =
+ {0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x0c, 0x0a, 0x01, 0x01};
+static const byte WC_PKCS12_ShroudedKeyBag_OID[] =
+ {0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x0c, 0x0a, 0x01, 0x02};
+
+
+typedef struct ContentInfo {
+ byte* data;
+ struct ContentInfo* next;
+ word32 encC; /* encryptedContent */
+ word32 dataSz;
+ int type; /* DATA / encrypted / enveloped */
+} ContentInfo;
+
+
+typedef struct AuthenticatedSafe {
+ ContentInfo* CI;
+ byte* data; /* T contents.... */
+ word32 oid; /* encrypted or not */
+ word32 numCI; /* number of Content Info structs */
+ word32 dataSz;
+} AuthenticatedSafe;
+
+
+typedef struct MacData {
+ byte* digest;
+ byte* salt;
+ word32 oid;
+ word32 digestSz;
+ word32 saltSz;
+ int itt; /* number of iterations when creating HMAC key */
+} MacData;
+
+
+struct WC_PKCS12 {
+ void* heap;
+ AuthenticatedSafe* safe;
+ MacData* signData;
+ word32 oid; /* DATA / Enveloped DATA ... */
+};
+
+
+/* for friendlyName, localKeyId .... */
+typedef struct WC_PKCS12_ATTRIBUTE {
+ byte* data;
+ word32 oid;
+ word32 dataSz;
+} WC_PKCS12_ATTRIBUTE;
+
+
+WC_PKCS12* wc_PKCS12_new(void)
+{
+ WC_PKCS12* pkcs12 = (WC_PKCS12*)XMALLOC(sizeof(WC_PKCS12),
+ NULL, DYNAMIC_TYPE_PKCS);
+ if (pkcs12 == NULL) {
+ WOLFSSL_MSG("Memory issue when creating WC_PKCS12 struct");
+ return NULL;
+ }
+
+ XMEMSET(pkcs12, 0, sizeof(WC_PKCS12));
+
+ return pkcs12;
+}
+
+
+static void freeSafe(AuthenticatedSafe* safe, void* heap)
+{
+ int i;
+
+ if (safe == NULL) {
+ return;
+ }
+
+ /* free content info structs */
+ for (i = safe->numCI; i > 0; i--) {
+ ContentInfo* ci = safe->CI;
+ safe->CI = ci->next;
+ XFREE(ci, heap, DYNAMIC_TYPE_PKCS);
+ }
+ if (safe->data != NULL) {
+ XFREE(safe->data, heap, DYNAMIC_TYPE_PKCS);
+ }
+ XFREE(safe, heap, DYNAMIC_TYPE_PKCS);
+
+ (void)heap;
+}
+
+
+void wc_PKCS12_free(WC_PKCS12* pkcs12)
+{
+ void* heap;
+
+ /* if null pointer is passed in do nothing */
+ if (pkcs12 == NULL) {
+ WOLFSSL_MSG("Trying to free null WC_PKCS12 object");
+ return;
+ }
+
+ heap = pkcs12->heap;
+ if (pkcs12->safe != NULL) {
+ freeSafe(pkcs12->safe, heap);
+ }
+
+ /* free mac data */
+ if (pkcs12->signData != NULL) {
+ if (pkcs12->signData->digest != NULL) {
+ XFREE(pkcs12->signData->digest, heap, DYNAMIC_TYPE_DIGEST);
+ pkcs12->signData->digest = NULL;
+ }
+ if (pkcs12->signData->salt != NULL) {
+ XFREE(pkcs12->signData->salt, heap, DYNAMIC_TYPE_SALT);
+ pkcs12->signData->salt = NULL;
+ }
+ XFREE(pkcs12->signData, heap, DYNAMIC_TYPE_PKCS);
+ pkcs12->signData = NULL;
+ }
+
+ XFREE(pkcs12, NULL, DYNAMIC_TYPE_PKCS);
+ pkcs12 = NULL;
+}
+
+
+static int GetSafeContent(WC_PKCS12* pkcs12, const byte* input,
+ word32* idx, int maxIdx)
+{
+ AuthenticatedSafe* safe;
+ word32 oid;
+ word32 localIdx = *idx;
+ int ret;
+ int size = 0;
+ byte tag;
+
+ safe = (AuthenticatedSafe*)XMALLOC(sizeof(AuthenticatedSafe), pkcs12->heap,
+ DYNAMIC_TYPE_PKCS);
+ if (safe == NULL) {
+ return MEMORY_E;
+ }
+ XMEMSET(safe, 0, sizeof(AuthenticatedSafe));
+
+ ret = GetObjectId(input, &localIdx, &oid, oidIgnoreType, maxIdx);
+ if (ret < 0) {
+ WOLFSSL_LEAVE("Get object id failed", ret);
+ freeSafe(safe, pkcs12->heap);
+ return ASN_PARSE_E;
+ }
+
+ safe->oid = oid;
+ /* check tag, length */
+ if (GetASNTag(input, &localIdx, &tag, maxIdx) < 0) {
+ freeSafe(safe, pkcs12->heap);
+ return ASN_PARSE_E;
+ }
+
+ if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) {
+ WOLFSSL_MSG("Unexpected tag in PKCS12 DER");
+ freeSafe(safe, pkcs12->heap);
+ return ASN_PARSE_E;
+ }
+ if ((ret = GetLength(input, &localIdx, &size, maxIdx)) <= 0) {
+ freeSafe(safe, pkcs12->heap);
+ return ret;
+ }
+
+ switch (oid) {
+ case WC_PKCS12_ENCRYPTED_DATA:
+ WOLFSSL_MSG("Found PKCS12 OBJECT: ENCRYPTED DATA\n");
+ break;
+
+ case WC_PKCS12_DATA:
+ WOLFSSL_MSG("Found PKCS12 OBJECT: DATA");
+ /* get octets holding contents */
+ if (GetASNTag(input, &localIdx, &tag, maxIdx) < 0) {
+ freeSafe(safe, pkcs12->heap);
+ return ASN_PARSE_E;
+ }
+
+ if (tag != ASN_OCTET_STRING) {
+ WOLFSSL_MSG("Wrong tag with content PKCS12 type DATA");
+ freeSafe(safe, pkcs12->heap);
+ return ASN_PARSE_E;
+ }
+ if ((ret = GetLength(input, &localIdx, &size, maxIdx)) <= 0) {
+ freeSafe(safe, pkcs12->heap);
+ return ret;
+ }
+
+ break;
+ }
+
+ safe->dataSz = size;
+ safe->data = (byte*)XMALLOC(size, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+ if (safe->data == NULL) {
+ freeSafe(safe, pkcs12->heap);
+ return MEMORY_E;
+ }
+ XMEMCPY(safe->data, input + localIdx, size);
+ *idx = localIdx;
+
+ /* an instance of AuthenticatedSafe is created from
+ * ContentInfo's strung together in a SEQUENCE. Here we iterate
+ * through the ContentInfo's and add them to our
+ * AuthenticatedSafe struct */
+ localIdx = 0;
+ input = safe->data;
+ {
+ int CISz;
+ ret = GetSequence(input, &localIdx, &CISz, safe->dataSz);
+ if (ret < 0) {
+ freeSafe(safe, pkcs12->heap);
+ return ASN_PARSE_E;
+ }
+ CISz += localIdx;
+ while ((int)localIdx < CISz) {
+ int curSz = 0;
+ word32 curIdx;
+ ContentInfo* ci = NULL;
+
+ #ifdef WOLFSSL_DEBUG_PKCS12
+ printf("\t\tlooking for Content Info.... ");
+ #endif
+
+ if ((ret = GetSequence(input, &localIdx, &curSz, safe->dataSz))
+ < 0) {
+ freeSafe(safe, pkcs12->heap);
+ return ret;
+ }
+
+ if (curSz > CISz) {
+ /* subset should not be larger than universe */
+ freeSafe(safe, pkcs12->heap);
+ return ASN_PARSE_E;
+ }
+
+ curIdx = localIdx;
+ if ((ret = GetObjectId(input, &localIdx, &oid, oidIgnoreType,
+ safe->dataSz)) < 0) {
+ WOLFSSL_LEAVE("Get object id failed", ret);
+ freeSafe(safe, pkcs12->heap);
+ return ret;
+ }
+
+ /* create new content info struct ... possible OID sanity check? */
+ ci = (ContentInfo*)XMALLOC(sizeof(ContentInfo), pkcs12->heap,
+ DYNAMIC_TYPE_PKCS);
+ if (ci == NULL) {
+ freeSafe(safe, pkcs12->heap);
+ return MEMORY_E;
+ }
+
+ ci->type = oid;
+ ci->dataSz = curSz - (localIdx-curIdx);
+ ci->data = (byte*)input + localIdx;
+ localIdx += ci->dataSz;
+
+ #ifdef WOLFSSL_DEBUG_PKCS12
+ switch (oid) {
+ case WC_PKCS12_ENCRYPTED_DATA:
+ printf("CONTENT INFO: ENCRYPTED DATA, size = %d\n", ci->dataSz);
+ break;
+
+ case WC_PKCS12_DATA:
+ printf("CONTENT INFO: DATA, size = %d\n", ci->dataSz);
+ break;
+ default:
+ printf("CONTENT INFO: UNKNOWN, size = %d\n", ci->dataSz);
+ }
+ #endif
+
+ /* insert to head of list */
+ ci->next = safe->CI;
+ safe->CI = ci;
+ safe->numCI += 1;
+ }
+ }
+
+ pkcs12->safe = safe;
+ *idx += localIdx;
+
+ return ret;
+}
+
+
+/* optional mac data */
+static int GetSignData(WC_PKCS12* pkcs12, const byte* mem, word32* idx,
+ word32 totalSz)
+{
+ MacData* mac;
+ word32 curIdx = *idx;
+ word32 oid = 0;
+ int size, ret;
+ byte tag;
+
+ /* Digest Info : Sequence
+ * DigestAlgorithmIdentifier
+ * Digest
+ */
+ if ((ret = GetSequence(mem, &curIdx, &size, totalSz)) <= 0) {
+ WOLFSSL_MSG("Failed to get PKCS12 sequence");
+ return ret;
+ }
+
+#ifdef WOLFSSL_DEBUG_PKCS12
+ printf("\t\tSEQUENCE: DigestInfo size = %d\n", size);
+#endif
+
+ mac = (MacData*)XMALLOC(sizeof(MacData), pkcs12->heap, DYNAMIC_TYPE_PKCS);
+ if (mac == NULL) {
+ return MEMORY_E;
+ }
+ XMEMSET(mac, 0, sizeof(MacData));
+
+ /* DigestAlgorithmIdentifier */
+ if ((ret = GetAlgoId(mem, &curIdx, &oid, oidIgnoreType, totalSz)) < 0) {
+ WOLFSSL_MSG("Failed to get PKCS12 sequence");
+ XFREE(mac, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+ return ret;
+ }
+ mac->oid = oid;
+
+#ifdef WOLFSSL_DEBUG_PKCS12
+ printf("\t\tALGO ID = %d\n", oid);
+#endif
+
+ /* Digest: should be octet type holding digest */
+ if (GetASNTag(mem, &curIdx, &tag, totalSz) < 0) {
+ XFREE(mac, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+ return ASN_PARSE_E;
+ }
+
+ if (tag != ASN_OCTET_STRING) {
+ WOLFSSL_MSG("Failed to get digest");
+ XFREE(mac, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+ return ASN_PARSE_E;
+ }
+
+ if ((ret = GetLength(mem, &curIdx, &size, totalSz)) <= 0) {
+ XFREE(mac, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+ return ret;
+ }
+ mac->digestSz = size;
+ mac->digest = (byte*)XMALLOC(mac->digestSz, pkcs12->heap,
+ DYNAMIC_TYPE_DIGEST);
+ if (mac->digest == NULL || mac->digestSz + curIdx > totalSz) {
+ ERROR_OUT(MEMORY_E, exit_gsd);
+ }
+ XMEMCPY(mac->digest, mem + curIdx, mac->digestSz);
+
+#ifdef WOLFSSL_DEBUG_PKCS12
+ {
+ byte* p;
+ for (printf("\t\tDigest = "), p = (byte*)mem+curIdx;
+ p < (byte*)mem + curIdx + mac->digestSz;
+ printf("%02X", *p), p++);
+ printf(" : size = %d\n", mac->digestSz);
+ }
+#endif
+
+ curIdx += mac->digestSz;
+
+ /* get salt, should be octet string */
+ if (GetASNTag(mem, &curIdx, &tag, totalSz) < 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_gsd);
+ }
+
+ if (tag != ASN_OCTET_STRING) {
+ WOLFSSL_MSG("Failed to get salt");
+ ERROR_OUT(ASN_PARSE_E, exit_gsd);
+ }
+
+ if ((ret = GetLength(mem, &curIdx, &size, totalSz)) < 0) {
+ goto exit_gsd;
+ }
+ mac->saltSz = size;
+ mac->salt = (byte*)XMALLOC(mac->saltSz, pkcs12->heap, DYNAMIC_TYPE_SALT);
+ if (mac->salt == NULL || mac->saltSz + curIdx > totalSz) {
+ ERROR_OUT(MEMORY_E, exit_gsd);
+ }
+ XMEMCPY(mac->salt, mem + curIdx, mac->saltSz);
+
+#ifdef WOLFSSL_DEBUG_PKCS12
+ {
+ byte* p;
+ for (printf("\t\tSalt = "), p = (byte*)mem + curIdx;
+ p < (byte*)mem + curIdx + mac->saltSz;
+ printf("%02X", *p), p++);
+ printf(" : size = %d\n", mac->saltSz);
+ }
+#endif
+
+ curIdx += mac->saltSz;
+
+ /* check for MAC iterations, default to 1 */
+ mac->itt = WC_PKCS12_MAC_DEFAULT;
+ if (curIdx < totalSz) {
+ int number = 0;
+ if ((ret = GetShortInt(mem, &curIdx, &number, totalSz)) >= 0) {
+ /* found a iteration value */
+ mac->itt = number;
+ }
+ }
+
+#ifdef WOLFSSL_DEBUG_PKCS12
+ printf("\t\tITERATIONS : %d\n", mac->itt);
+#endif
+
+ *idx = curIdx;
+ pkcs12->signData = mac;
+ ret = 0; /* success */
+
+exit_gsd:
+
+ /* failure cleanup */
+ if (ret != 0) {
+ if (mac) {
+ if (mac->digest)
+ XFREE(mac->digest, pkcs12->heap, DYNAMIC_TYPE_DIGEST);
+ XFREE(mac, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+ }
+ }
+
+ return ret;
+}
+
+
+/* expects PKCS12 signData to be set up with OID
+ *
+ * returns the size of mac created on success. A negative value will be returned
+ * in the case that an error happened.
+ */
+static int wc_PKCS12_create_mac(WC_PKCS12* pkcs12, byte* data, word32 dataSz,
+ const byte* psw, word32 pswSz, byte* out, word32 outSz)
+{
+ Hmac hmac;
+ MacData* mac;
+ int ret, kLen;
+ enum wc_HashType hashT;
+ int idx = 0;
+ int id = 3; /* value from RFC 7292 indicating key is used for MAC */
+ word32 i;
+ byte unicodePasswd[MAX_UNICODE_SZ];
+ byte key[MAX_KEY_SIZE];
+
+ if (pkcs12 == NULL || pkcs12->signData == NULL || data == NULL ||
+ out == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ mac = pkcs12->signData;
+
+ /* unicode set up from asn.c */
+ if ((pswSz * 2 + 2) > (int)sizeof(unicodePasswd)) {
+ WOLFSSL_MSG("PKCS12 max unicode size too small");
+ return UNICODE_SIZE_E;
+ }
+
+ for (i = 0; i < pswSz; i++) {
+ unicodePasswd[idx++] = 0x00;
+ unicodePasswd[idx++] = (byte)psw[i];
+ }
+ /* add trailing NULL */
+ unicodePasswd[idx++] = 0x00;
+ unicodePasswd[idx++] = 0x00;
+
+ /* get hash type used and resulting size of HMAC key */
+ hashT = wc_OidGetHash(mac->oid);
+ if (hashT == WC_HASH_TYPE_NONE) {
+ WOLFSSL_MSG("Unsupported hash used");
+ return BAD_FUNC_ARG;
+ }
+ kLen = wc_HashGetDigestSize(hashT);
+
+ /* check out buffer is large enough */
+ if (kLen < 0 || outSz < (word32)kLen) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* idx contains size of unicodePasswd */
+ if ((ret = wc_PKCS12_PBKDF_ex(key, unicodePasswd, idx, mac->salt,
+ mac->saltSz, mac->itt, kLen, (int)hashT, id, pkcs12->heap)) < 0) {
+ return ret;
+ }
+
+ /* now that key has been created use it to get HMAC hash on data */
+ if ((ret = wc_HmacInit(&hmac, pkcs12->heap, INVALID_DEVID)) != 0) {
+ return ret;
+ }
+ ret = wc_HmacSetKey(&hmac, (int)hashT, key, kLen);
+ if (ret == 0)
+ ret = wc_HmacUpdate(&hmac, data, dataSz);
+ if (ret == 0)
+ ret = wc_HmacFinal(&hmac, out);
+ wc_HmacFree(&hmac);
+
+ if (ret != 0)
+ return ret;
+
+ return kLen; /* same as digest size */
+}
+
+
+/* check mac on pkcs12, pkcs12->mac has been sanity checked before entering *
+ * returns the result of comparison, success is 0 */
+static int wc_PKCS12_verify(WC_PKCS12* pkcs12, byte* data, word32 dataSz,
+ const byte* psw, word32 pswSz)
+{
+ MacData* mac;
+ int ret;
+ byte digest[WC_MAX_DIGEST_SIZE];
+
+ if (pkcs12 == NULL || pkcs12->signData == NULL || data == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ mac = pkcs12->signData;
+
+#ifdef WOLFSSL_DEBUG_PKCS12
+ printf("Verifying MAC with OID = %d\n", mac->oid);
+#endif
+
+ /* check if this builds digest size is too small */
+ if (mac->digestSz > WC_MAX_DIGEST_SIZE) {
+ WOLFSSL_MSG("PKCS12 max digest size too small");
+ return BAD_FUNC_ARG;
+ }
+
+ if ((ret = wc_PKCS12_create_mac(pkcs12, data, dataSz, psw, pswSz,
+ digest, WC_MAX_DIGEST_SIZE)) < 0) {
+ return ret;
+ }
+
+#ifdef WOLFSSL_DEBUG_PKCS12
+ {
+ byte* p;
+ for (printf("\t\tHash = "), p = (byte*)digest;
+ p < (byte*)digest + mac->digestSz;
+ printf("%02X", *p), p++);
+ printf(" : size = %d\n", mac->digestSz);
+ }
+#endif
+
+ return XMEMCMP(digest, mac->digest, mac->digestSz);
+}
+
+
+/* Convert DER format stored in der buffer to WC_PKCS12 struct
+ * Puts the raw contents of Content Info into structure without completely
+ * parsing or decoding.
+ * der : pointer to der buffer holding PKCS12
+ * derSz : size of der buffer
+ * pkcs12 : non-null pkcs12 pointer
+ * return 0 on success and negative on failure.
+ */
+int wc_d2i_PKCS12(const byte* der, word32 derSz, WC_PKCS12* pkcs12)
+{
+ word32 idx = 0;
+ word32 totalSz = 0;
+ int ret;
+ int size = 0;
+ int version = 0;
+
+ WOLFSSL_ENTER("wolfSSL_d2i_PKCS12_bio");
+
+ if (der == NULL || pkcs12 == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ totalSz = derSz;
+ if ((ret = GetSequence(der, &idx, &size, totalSz)) <= 0) {
+ WOLFSSL_MSG("Failed to get PKCS12 sequence");
+ return ret;
+ }
+
+ /* get version */
+ if ((ret = GetMyVersion(der, &idx, &version, totalSz)) < 0) {
+ return ret;
+ }
+
+#ifdef WOLFSSL_DEBUG_PKCS12
+ printf("\nBEGIN: PKCS12 size = %d\n", totalSz);
+ printf("version = %d\n", version);
+#endif
+
+ if (version != WC_PKCS12_VERSION_DEFAULT) {
+ WOLFSSL_MSG("PKCS12 unsupported version!");
+ return ASN_VERSION_E;
+ }
+
+ if ((ret = GetSequence(der, &idx, &size, totalSz)) < 0) {
+ return ret;
+ }
+
+#ifdef WOLFSSL_DEBUG_PKCS12
+ printf("\tSEQUENCE: AuthenticatedSafe size = %d\n", size);
+#endif
+
+ if ((ret = GetSafeContent(pkcs12, der, &idx, size + idx)) < 0) {
+ WOLFSSL_MSG("GetSafeContent error");
+ return ret;
+ }
+
+ /* if more buffer left check for MAC data */
+ if (idx < totalSz) {
+ if ((ret = GetSequence(der, &idx, &size, totalSz)) < 0) {
+ WOLFSSL_MSG("Ignoring unknown data at end of PKCS12 DER buffer");
+ }
+ else {
+ #ifdef WOLFSSL_DEBUG_PKCS12
+ printf("\tSEQUENCE: Signature size = %d\n", size);
+ #endif
+
+ if ((ret = GetSignData(pkcs12, der, &idx, totalSz)) < 0) {
+ return ASN_PARSE_E;
+ }
+ }
+ }
+
+#ifdef WOLFSSL_DEBUG_PKCS12
+ printf("END: PKCS12\n");
+#endif
+
+ return ret;
+}
+
+/* Convert WC_PKCS12 struct to allocated DER buffer.
+ * pkcs12 : non-null pkcs12 pointer
+ * der : pointer-pointer to der buffer. If NULL space will be
+ * allocated for der, which must be freed by application.
+ * derSz : size of buffer passed in when der is not NULL. NULL arg disables
+ * sanity checks on buffer read/writes. Max size gets set to derSz when
+ * the "der" buffer passed in is NULL and LENGTH_ONLY_E is returned.
+ * return size of DER on success and negative on failure.
+ */
+int wc_i2d_PKCS12(WC_PKCS12* pkcs12, byte** der, int* derSz)
+{
+ int ret = 0;
+ word32 seqSz = 0, verSz = 0, totalSz = 0, idx = 0, sdBufSz = 0;
+ byte *buf = NULL;
+ byte ver[MAX_VERSION_SZ];
+ byte seq[MAX_SEQ_SZ];
+ byte *sdBuf = NULL;
+
+ if ((pkcs12 == NULL) || (pkcs12->safe == NULL) ||
+ (der == NULL && derSz == NULL)) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* Create the MAC portion */
+ if (pkcs12->signData != NULL) {
+ MacData *mac = (MacData*)pkcs12->signData;
+ word32 innerSz = 0;
+ word32 outerSz = 0;
+
+ /* get exact size */
+ {
+ byte ASNLENGTH[MAX_LENGTH_SZ];
+ byte ASNSHORT[MAX_SHORT_SZ];
+ byte ASNALGO[MAX_ALGO_SZ];
+ word32 tmpIdx = 0;
+
+ /* algo id */
+ innerSz += SetAlgoID(mac->oid, ASNALGO, oidHashType, 0);
+
+ /* Octet string holding digest */
+ innerSz += ASN_TAG_SZ;
+ innerSz += SetLength(mac->digestSz, ASNLENGTH);
+ innerSz += mac->digestSz;
+
+ /* salt */
+ outerSz += ASN_TAG_SZ;
+ outerSz += SetLength(mac->saltSz, ASNLENGTH);
+ outerSz += mac->saltSz;
+
+ /* MAC iterations */
+ outerSz += SetShortInt(ASNSHORT, &tmpIdx, mac->itt, MAX_SHORT_SZ);
+
+ /* sequence of inner data */
+ outerSz += SetSequence(innerSz, seq);
+ outerSz += innerSz;
+ }
+ sdBufSz = outerSz + SetSequence(outerSz, seq);
+ sdBuf = (byte*)XMALLOC(sdBufSz, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+ if (sdBuf == NULL) {
+ ret = MEMORY_E;
+ }
+
+ if (ret == 0) {
+ idx += SetSequence(outerSz, sdBuf);
+ idx += SetSequence(innerSz, &sdBuf[idx]);
+
+ /* Set Algorithm Identifier */
+ {
+ word32 algoIdSz;
+
+ algoIdSz = SetAlgoID(mac->oid, &sdBuf[idx], oidHashType, 0);
+ if (algoIdSz == 0) {
+ ret = ALGO_ID_E;
+ }
+ else {
+ idx += algoIdSz;
+ }
+ }
+ }
+
+ if (ret == 0) {
+
+
+ /* Octet string holding digest */
+ idx += SetOctetString(mac->digestSz, &sdBuf[idx]);
+ XMEMCPY(&sdBuf[idx], mac->digest, mac->digestSz);
+ idx += mac->digestSz;
+
+ /* Set salt */
+ idx += SetOctetString(mac->saltSz, &sdBuf[idx]);
+ XMEMCPY(&sdBuf[idx], mac->salt, mac->saltSz);
+ idx += mac->saltSz;
+
+ /* MAC iterations */
+ {
+ int tmpSz;
+ word32 tmpIdx = 0;
+ byte ar[MAX_SHORT_SZ];
+ tmpSz = SetShortInt(ar, &tmpIdx, mac->itt, MAX_SHORT_SZ);
+ if (tmpSz < 0) {
+ ret = tmpSz;
+ }
+ else {
+ XMEMCPY(&sdBuf[idx], ar, tmpSz);
+ }
+ }
+ totalSz += sdBufSz;
+ }
+ }
+
+ /* Calculate size of der */
+ if (ret == 0) {
+ totalSz += pkcs12->safe->dataSz;
+
+ totalSz += 4; /* Octet string */
+
+ totalSz += 4; /* Element */
+
+ totalSz += 2 + sizeof(WC_PKCS12_DATA_OID);
+
+ totalSz += 4; /* Seq */
+
+ ret = SetMyVersion(WC_PKCS12_VERSION_DEFAULT, ver, FALSE);
+ if (ret > 0) {
+ verSz = (word32)ret;
+ ret = 0; /* value larger than 0 is success */
+ totalSz += verSz;
+
+ seqSz = SetSequence(totalSz, seq);
+ totalSz += seqSz;
+
+ /* check if getting length only */
+ if (der == NULL && derSz != NULL) {
+ *derSz = totalSz;
+ XFREE(sdBuf, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+ return LENGTH_ONLY_E;
+ }
+
+ if (*der == NULL) {
+ /* Allocate if requested */
+ buf = (byte*)XMALLOC(totalSz, NULL, DYNAMIC_TYPE_PKCS);
+ }
+ else {
+ buf = *der;
+
+ /* sanity check on buffer size if passed in */
+ if (derSz != NULL) {
+ if (*derSz < (int)totalSz) {
+ WOLFSSL_MSG("Buffer passed in is too small");
+ ret = BUFFER_E;
+ }
+ }
+ }
+ }
+ }
+
+ if (buf == NULL) {
+ ret = MEMORY_E;
+ }
+
+ if (ret == 0) {
+ idx = 0;
+
+ /* Copy parts to buf */
+ XMEMCPY(&buf[idx], seq, seqSz);
+ idx += seqSz;
+
+ XMEMCPY(&buf[idx], ver, verSz);
+ idx += verSz;
+
+ seqSz = SetSequence(totalSz - sdBufSz - idx - 4, seq);
+ XMEMCPY(&buf[idx], seq, seqSz);
+ idx += seqSz;
+
+ /* OID */
+ idx += SetObjectId(sizeof(WC_PKCS12_DATA_OID), &buf[idx]);
+ XMEMCPY(&buf[idx], WC_PKCS12_DATA_OID, sizeof(WC_PKCS12_DATA_OID));
+ idx += sizeof(WC_PKCS12_DATA_OID);
+
+ /* Element */
+ buf[idx++] = ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC;
+ idx += SetLength(totalSz - sdBufSz - idx - 3, &buf[idx]);
+
+ /* Octet string */
+ idx += SetOctetString(totalSz - sdBufSz - idx - 4, &buf[idx]);
+
+ XMEMCPY(&buf[idx], pkcs12->safe->data, pkcs12->safe->dataSz);
+ idx += pkcs12->safe->dataSz;
+
+ if (pkcs12->signData != NULL) {
+ XMEMCPY(&buf[idx], sdBuf, sdBufSz);
+ }
+
+ if (*der == NULL) {
+ /* Point to start of data allocated for DER */
+ *der = buf;
+ }
+ else {
+ /* Increment pointer to byte past DER */
+ *der = &buf[totalSz];
+ }
+
+ /* Return size of der */
+ ret = totalSz;
+ }
+
+ XFREE(sdBuf, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+ /* Allocation of buf was the last time ret could be a failure,
+ * so no need to free here */
+
+ return ret;
+}
+
+
+/* helper function to free WC_DerCertList */
+void wc_FreeCertList(WC_DerCertList* list, void* heap)
+{
+ WC_DerCertList* current = list;
+ WC_DerCertList* next;
+
+ if (list == NULL) {
+ return;
+ }
+
+ while (current != NULL) {
+ next = current->next;
+ if (current->buffer != NULL) {
+ XFREE(current->buffer, heap, DYNAMIC_TYPE_PKCS);
+ }
+ XFREE(current, heap, DYNAMIC_TYPE_PKCS);
+ current = next;
+ }
+
+ (void)heap;
+}
+
+static void freeDecCertList(WC_DerCertList** list, byte** pkey, word32* pkeySz,
+ byte** cert, word32* certSz, void* heap)
+{
+ WC_DerCertList* current = *list;
+ WC_DerCertList* previous = NULL;
+ DecodedCert DeCert;
+
+ while (current != NULL) {
+
+ InitDecodedCert(&DeCert, current->buffer, current->bufferSz, heap);
+ if (ParseCertRelative(&DeCert, CERT_TYPE, NO_VERIFY, NULL) == 0) {
+ if (wc_CheckPrivateKey(*pkey, *pkeySz, &DeCert) == 1) {
+ WOLFSSL_MSG("Key Pair found");
+ *cert = current->buffer;
+ *certSz = current->bufferSz;
+
+ if (previous == NULL) {
+ *list = current->next;
+ }
+ else {
+ previous->next = current->next;
+ }
+ FreeDecodedCert(&DeCert);
+ XFREE(current, heap, DYNAMIC_TYPE_PKCS);
+ break;
+ }
+ }
+ FreeDecodedCert(&DeCert);
+
+ previous = current;
+ current = current->next;
+ }
+}
+
+
+/* return 0 on success and negative on failure.
+ * By side effect returns private key, cert, and optionally ca.
+ * Parses and decodes the parts of PKCS12
+ *
+ * NOTE: can parse with USER RSA enabled but may return cert that is not the
+ * pair for the key when using RSA key pairs.
+ *
+ * pkcs12 : non-null WC_PKCS12 struct
+ * psw : password to use for PKCS12 decode
+ * pkey : Private key returned
+ * cert : x509 cert returned
+ * ca : optional ca returned
+ */
+int wc_PKCS12_parse(WC_PKCS12* pkcs12, const char* psw,
+ byte** pkey, word32* pkeySz, byte** cert, word32* certSz,
+ WC_DerCertList** ca)
+{
+ ContentInfo* ci = NULL;
+ WC_DerCertList* certList = NULL;
+ WC_DerCertList* tailList = NULL;
+ byte* buf = NULL;
+ word32 i, oid;
+ int ret, pswSz;
+ word32 algId;
+
+ WOLFSSL_ENTER("wc_PKCS12_parse");
+
+ if (pkcs12 == NULL || psw == NULL || cert == NULL || certSz == NULL ||
+ pkey == NULL || pkeySz == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ pswSz = (int)XSTRLEN(psw);
+ *cert = NULL;
+ *pkey = NULL;
+ if (ca != NULL)
+ *ca = NULL;
+
+ /* if there is sign data then verify the MAC */
+ if (pkcs12->signData != NULL ) {
+ if ((ret = wc_PKCS12_verify(pkcs12, pkcs12->safe->data,
+ pkcs12->safe->dataSz, (byte*)psw, pswSz)) != 0) {
+ WOLFSSL_MSG("PKCS12 Bad MAC on verify");
+ WOLFSSL_LEAVE("wc_PKCS12_parse verify ", ret);
+ return MAC_CMP_FAILED_E;
+ }
+ }
+
+ if (pkcs12->safe == NULL) {
+ WOLFSSL_MSG("No PKCS12 safes to parse");
+ return BAD_FUNC_ARG;
+ }
+
+ /* Decode content infos */
+ ci = pkcs12->safe->CI;
+ for (i = 0; i < pkcs12->safe->numCI; i++) {
+ byte* data;
+ word32 idx = 0;
+ int size, totalSz;
+ byte tag;
+
+ if (ci->type == WC_PKCS12_ENCRYPTED_DATA) {
+ int number;
+
+ WOLFSSL_MSG("Decrypting PKCS12 Content Info Container");
+ data = ci->data;
+ if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+ }
+
+ if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) {
+ ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+ }
+ if ((ret = GetLength(data, &idx, &size, ci->dataSz)) < 0) {
+ goto exit_pk12par;
+ }
+
+ if ((ret = GetSequence(data, &idx, &size, ci->dataSz)) < 0) {
+ goto exit_pk12par;
+ }
+
+ if ((ret = GetShortInt(data, &idx, &number, ci->dataSz)) < 0) {
+ goto exit_pk12par;
+ }
+
+ if (number != 0) {
+ WOLFSSL_MSG("Expecting 0 for Integer with Encrypted PKCS12");
+ }
+
+ if ((ret = GetSequence(data, &idx, &size, ci->dataSz)) < 0) {
+ goto exit_pk12par;
+ }
+
+ ret = GetObjectId(data, &idx, &oid, oidIgnoreType, ci->dataSz);
+ if (ret < 0 || oid != WC_PKCS12_DATA) {
+ WOLFSSL_MSG("Not PKCS12 DATA object or get object parse error");
+ ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+ }
+
+ /* decrypted content overwrites input buffer */
+ size = ci->dataSz - idx;
+ buf = (byte*)XMALLOC(size, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+ if (buf == NULL) {
+ ERROR_OUT(MEMORY_E, exit_pk12par);
+ }
+ XMEMCPY(buf, data + idx, size);
+
+ if ((ret = DecryptContent(buf, size, psw, pswSz)) < 0) {
+ WOLFSSL_MSG("Decryption failed, algorithm not compiled in?");
+ goto exit_pk12par;
+ }
+
+ data = buf;
+ idx = 0;
+
+ #ifdef WOLFSSL_DEBUG_PKCS12
+ {
+ byte* p;
+ for (printf("\tData = "), p = (byte*)buf;
+ p < (byte*)buf + size;
+ printf("%02X", *p), p++);
+ printf("\n");
+ }
+ #endif
+ }
+ else { /* type DATA */
+ WOLFSSL_MSG("Parsing PKCS12 DATA Content Info Container");
+ data = ci->data;
+ if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+ }
+
+ if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) {
+ ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+ }
+ if ((ret = GetLength(data, &idx, &size, ci->dataSz)) <= 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+ }
+
+ if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+ }
+ if (tag != ASN_OCTET_STRING) {
+ ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+ }
+ if ((ret = GetLength(data, &idx, &size, ci->dataSz)) < 0) {
+ goto exit_pk12par;
+ }
+
+ }
+
+ /* parse through bags in ContentInfo */
+ if ((ret = GetSequence(data, &idx, &totalSz, ci->dataSz)) < 0) {
+ goto exit_pk12par;
+ }
+ totalSz += idx;
+
+ while ((int)idx < totalSz) {
+ int bagSz;
+ if ((ret = GetSequence(data, &idx, &bagSz, ci->dataSz)) < 0) {
+ goto exit_pk12par;
+ }
+ bagSz += idx;
+
+ if ((ret = GetObjectId(data, &idx, &oid, oidIgnoreType,
+ ci->dataSz)) < 0) {
+ goto exit_pk12par;
+ }
+
+ switch (oid) {
+ case WC_PKCS12_KeyBag: /* 667 */
+ WOLFSSL_MSG("PKCS12 Key Bag found");
+ if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+ }
+ if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) {
+ ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+ }
+ if ((ret = GetLength(data, &idx, &size, ci->dataSz)) <= 0) {
+ if (ret == 0)
+ ret = ASN_PARSE_E;
+ goto exit_pk12par;
+ }
+ if (*pkey == NULL) {
+ *pkey = (byte*)XMALLOC(size, pkcs12->heap,
+ DYNAMIC_TYPE_PUBLIC_KEY);
+ if (*pkey == NULL) {
+ ERROR_OUT(MEMORY_E, exit_pk12par);
+ }
+ XMEMCPY(*pkey, data + idx, size);
+ *pkeySz = ToTraditional_ex(*pkey, size, &algId);
+ }
+
+ #ifdef WOLFSSL_DEBUG_PKCS12
+ {
+ byte* p;
+ for (printf("\tKey = "), p = (byte*)*pkey;
+ p < (byte*)*pkey + size;
+ printf("%02X", *p), p++);
+ printf("\n");
+ }
+ #endif
+ idx += size;
+ break;
+
+ case WC_PKCS12_ShroudedKeyBag: /* 668 */
+ {
+ byte* k;
+
+ WOLFSSL_MSG("PKCS12 Shrouded Key Bag found");
+ if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+ }
+ if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) {
+ ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+ }
+ if ((ret = GetLength(data, &idx, &size,
+ ci->dataSz)) < 0) {
+ goto exit_pk12par;
+ }
+
+ k = (byte*)XMALLOC(size, pkcs12->heap,
+ DYNAMIC_TYPE_PUBLIC_KEY);
+ if (k == NULL) {
+ ERROR_OUT(MEMORY_E, exit_pk12par);
+ }
+ XMEMCPY(k, data + idx, size);
+
+ /* overwrites input, be warned */
+ if ((ret = ToTraditionalEnc(k, size, psw, pswSz,
+ &algId)) < 0) {
+ XFREE(k, pkcs12->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+ goto exit_pk12par;
+ }
+
+ if (ret < size) {
+ /* shrink key buffer */
+ byte* tmp = (byte*)XMALLOC(ret, pkcs12->heap,
+ DYNAMIC_TYPE_PUBLIC_KEY);
+ if (tmp == NULL) {
+ XFREE(k, pkcs12->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+ ERROR_OUT(MEMORY_E, exit_pk12par);
+ }
+ XMEMCPY(tmp, k, ret);
+ XFREE(k, pkcs12->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+ k = tmp;
+ }
+ size = ret;
+
+ if (*pkey == NULL) {
+ *pkey = k;
+ *pkeySz = size;
+ }
+ else { /* only expecting one key */
+ XFREE(k, pkcs12->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+ }
+ idx += size;
+
+ #ifdef WOLFSSL_DEBUG_PKCS12
+ {
+ byte* p;
+ for (printf("\tKey = "), p = (byte*)k;
+ p < (byte*)k + ret;
+ printf("%02X", *p), p++);
+ printf("\n");
+ }
+ #endif
+ }
+ break;
+
+ case WC_PKCS12_CertBag: /* 669 */
+ {
+ WC_DerCertList* node;
+ WOLFSSL_MSG("PKCS12 Cert Bag found");
+ if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+ }
+ if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC)) {
+ ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+ }
+ if ((ret = GetLength(data, &idx, &size, ci->dataSz)) < 0) {
+ goto exit_pk12par;
+ }
+
+ /* get cert bag type */
+ if ((ret = GetSequence(data, &idx, &size, ci->dataSz)) <0) {
+ goto exit_pk12par;
+ }
+
+ if ((ret = GetObjectId(data, &idx, &oid, oidIgnoreType,
+ ci->dataSz)) < 0) {
+ goto exit_pk12par;
+ }
+
+ switch (oid) {
+ case WC_PKCS12_CertBag_Type1: /* 675 */
+ /* type 1 */
+ WOLFSSL_MSG("PKCS12 cert bag type 1");
+ if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+ }
+ if (tag != (ASN_CONSTRUCTED |
+ ASN_CONTEXT_SPECIFIC)) {
+ ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+ }
+ if ((ret = GetLength(data, &idx, &size, ci->dataSz))
+ <= 0) {
+ if (ret == 0)
+ ret = ASN_PARSE_E;
+ goto exit_pk12par;
+ }
+ if (GetASNTag(data, &idx, &tag, ci->dataSz) < 0) {
+ ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+ }
+ if (tag != ASN_OCTET_STRING) {
+ ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+
+ }
+ if ((ret = GetLength(data, &idx, &size, ci->dataSz))
+ < 0) {
+ goto exit_pk12par;
+ }
+ break;
+ default:
+ WOLFSSL_MSG("Unknown PKCS12 cert bag type");
+ }
+
+ if (size + idx > (word32)bagSz) {
+ ERROR_OUT(ASN_PARSE_E, exit_pk12par);
+ }
+
+ /* list to hold all certs found */
+ node = (WC_DerCertList*)XMALLOC(sizeof(WC_DerCertList),
+ pkcs12->heap, DYNAMIC_TYPE_PKCS);
+ if (node == NULL) {
+ ERROR_OUT(MEMORY_E, exit_pk12par);
+ }
+ XMEMSET(node, 0, sizeof(WC_DerCertList));
+
+ node->buffer = (byte*)XMALLOC(size, pkcs12->heap,
+ DYNAMIC_TYPE_PKCS);
+ if (node->buffer == NULL) {
+ XFREE(node, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+ ERROR_OUT(MEMORY_E, exit_pk12par);
+ }
+ XMEMCPY(node->buffer, data + idx, size);
+ node->bufferSz = size;
+
+ /* put the new node into the list */
+ if (certList != NULL) {
+ WOLFSSL_MSG("Pushing new cert onto queue");
+ tailList->next = node;
+ tailList = node;
+ }
+ else {
+ certList = node;
+ tailList = node;
+ }
+
+ /* on to next */
+ idx += size;
+ }
+ break;
+
+ case WC_PKCS12_CrlBag: /* 670 */
+ WOLFSSL_MSG("PKCS12 CRL BAG not yet supported");
+ break;
+
+ case WC_PKCS12_SecretBag: /* 671 */
+ WOLFSSL_MSG("PKCS12 Secret BAG not yet supported");
+ break;
+
+ case WC_PKCS12_SafeContentsBag: /* 672 */
+ WOLFSSL_MSG("PKCS12 Safe Contents BAG not yet supported");
+ break;
+
+ default:
+ WOLFSSL_MSG("Unknown PKCS12 BAG type found");
+ }
+
+ /* Attribute, unknown bag or unsupported */
+ if ((int)idx < bagSz) {
+ idx = bagSz; /* skip for now */
+ }
+ }
+
+ /* free temporary buffer */
+ if (buf != NULL) {
+ XFREE(buf, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+ buf = NULL;
+ }
+
+ ci = ci->next;
+ WOLFSSL_MSG("Done Parsing PKCS12 Content Info Container");
+ }
+
+ /* check if key pair, remove from list */
+ if (*pkey != NULL) {
+ freeDecCertList(&certList, pkey, pkeySz, cert, certSz, pkcs12->heap);
+ }
+
+ /* if ca arg provided return certList, otherwise free it */
+ if (ca != NULL) {
+ *ca = certList;
+ }
+ else {
+ /* free list, not wanted */
+ wc_FreeCertList(certList, pkcs12->heap);
+ }
+ (void)tailList; /* not used */
+
+ ret = 0; /* success */
+
+exit_pk12par:
+
+ if (ret != 0) {
+ /* failure cleanup */
+ if (*pkey) {
+ XFREE(*pkey, pkcs12->heap, DYNAMIC_TYPE_PUBLIC_KEY);
+ *pkey = NULL;
+ }
+ if (buf) {
+ XFREE(buf, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+ buf = NULL;
+ }
+
+ wc_FreeCertList(certList, pkcs12->heap);
+ }
+
+ return ret;
+}
+
+
+/* Helper function to shroud keys.
+ *
+ * pkcs12 structure to use with shrouding key
+ * rng random number generator used
+ * out buffer to hold results
+ * outSz size of out buffer
+ * key key that is going to be shrouded
+ * keySz size of key buffer
+ * vAlgo algorithm version
+ * pass password to use
+ * passSz size of pass buffer
+ * itt number of iterations
+ *
+ * returns the size of the shrouded key on success
+ */
+static int wc_PKCS12_shroud_key(WC_PKCS12* pkcs12, WC_RNG* rng,
+ byte* out, word32* outSz, byte* key, word32 keySz, int vAlgo,
+ const char* pass, int passSz, int itt)
+{
+ void* heap;
+ word32 tmpIdx = 0;
+ int vPKCS = 1; /* PKCS#12 default set to 1 */
+ word32 sz;
+ word32 totalSz = 0;
+ int ret;
+
+
+ if (outSz == NULL || pkcs12 == NULL || rng == NULL || key == NULL ||
+ pass == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ heap = wc_PKCS12_GetHeap(pkcs12);
+
+ /* check if trying to get size */
+ if (out != NULL) {
+ tmpIdx += MAX_LENGTH_SZ + 1; /* save room for length and tag (+1) */
+ sz = *outSz - tmpIdx;
+ }
+
+ /* case of no encryption */
+ if (vAlgo < 0) {
+ const byte* curveOID = NULL;
+ word32 oidSz = 0;
+ int algoID;
+
+ WOLFSSL_MSG("creating PKCS12 Key Bag");
+
+ /* check key type and get OID if ECC */
+ if ((ret = wc_GetKeyOID(key, keySz, &curveOID, &oidSz, &algoID, heap))
+ < 0) {
+ return ret;
+ }
+
+ /* PKCS#8 wrapping around key */
+ ret = wc_CreatePKCS8Key(out + tmpIdx, &sz, key, keySz, algoID,
+ curveOID, oidSz);
+ }
+ else {
+ WOLFSSL_MSG("creating PKCS12 Shrouded Key Bag");
+
+ if (vAlgo == PBE_SHA1_DES) {
+ vPKCS = PKCS5;
+ vAlgo = 10;
+ }
+
+ ret = UnTraditionalEnc(key, keySz, out + tmpIdx, &sz, pass, passSz,
+ vPKCS, vAlgo, NULL, 0, itt, rng, heap);
+ }
+ if (ret == LENGTH_ONLY_E) {
+ *outSz = sz + MAX_LENGTH_SZ + 1;
+ return LENGTH_ONLY_E;
+ }
+ if (ret < 0) {
+ return ret;
+ }
+
+ totalSz += ret;
+
+ /* out should not be null at this point but check before writing */
+ if (out == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* rewind index and set tag and length */
+ tmpIdx -= MAX_LENGTH_SZ + 1;
+ sz = SetExplicit(0, ret, out + tmpIdx);
+ tmpIdx += sz; totalSz += sz;
+ XMEMMOVE(out + tmpIdx, out + MAX_LENGTH_SZ + 1, ret);
+
+ return totalSz;
+}
+
+
+/* Helper function to create key bag.
+ *
+ * pkcs12 structure to use with key bag
+ * rng random number generator used
+ * out buffer to hold results
+ * outSz size of out buffer
+ * key key that is going into key bag
+ * keySz size of key buffer
+ * algo algorithm version
+ * iter number of iterations
+ * pass password to use
+ * passSz size of pass buffer
+ *
+ * returns the size of the key bag on success
+ */
+static int wc_PKCS12_create_key_bag(WC_PKCS12* pkcs12, WC_RNG* rng,
+ byte* out, word32* outSz, byte* key, word32 keySz, int algo, int iter,
+ char* pass, int passSz)
+{
+ void* heap;
+ byte* tmp;
+ word32 length = 0;
+ word32 idx = 0;
+ word32 totalSz = 0;
+ word32 sz;
+ word32 i;
+ word32 tmpSz;
+ int ret;
+
+ /* get max size for shrouded key */
+ ret = wc_PKCS12_shroud_key(pkcs12, rng, NULL, &length, key, keySz,
+ algo, pass, passSz, iter);
+ if (ret != LENGTH_ONLY_E && ret < 0) {
+ return ret;
+ }
+
+ if (out == NULL) {
+ *outSz = MAX_SEQ_SZ + WC_PKCS12_DATA_OBJ_SZ + 1 + MAX_LENGTH_SZ +
+ length;
+ return LENGTH_ONLY_E;
+ }
+
+ heap = wc_PKCS12_GetHeap(pkcs12);
+
+ /* leave room for sequence */
+ idx += MAX_SEQ_SZ;
+
+ if (algo < 0) { /* not encrypted */
+ out[idx++] = ASN_OBJECT_ID; totalSz++;
+ sz = SetLength(sizeof(WC_PKCS12_KeyBag_OID), out + idx);
+ idx += sz; totalSz += sz;
+ for (i = 0; i < sizeof(WC_PKCS12_KeyBag_OID); i++) {
+ out[idx++] = WC_PKCS12_KeyBag_OID[i]; totalSz++;
+ }
+ }
+ else { /* encrypted */
+ out[idx++] = ASN_OBJECT_ID; totalSz++;
+ sz = SetLength(sizeof(WC_PKCS12_ShroudedKeyBag_OID), out + idx);
+ idx += sz; totalSz += sz;
+ for (i = 0; i < sizeof(WC_PKCS12_ShroudedKeyBag_OID); i++) {
+ out[idx++] = WC_PKCS12_ShroudedKeyBag_OID[i]; totalSz++;
+ }
+ }
+
+ /* shroud key */
+ tmp = (byte*)XMALLOC(length, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (tmp == NULL) {
+ return MEMORY_E;
+ }
+
+ ret = wc_PKCS12_shroud_key(pkcs12, rng, tmp, &length, key, keySz,
+ algo, pass, passSz, iter);
+ if (ret < 0) {
+ XFREE(tmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return ret;
+ }
+ length = ret;
+ XMEMCPY(out + idx, tmp, length);
+ XFREE(tmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ totalSz += length;
+
+ /* set beginning sequence */
+ tmpSz = SetSequence(totalSz, out);
+ XMEMMOVE(out + tmpSz, out + MAX_SEQ_SZ, totalSz);
+
+ (void)heap;
+ return totalSz + tmpSz;
+}
+
+
+/* Helper function to create cert bag.
+ *
+ * pkcs12 structure to use with cert bag
+ * out buffer to hold results
+ * outSz size of out buffer
+ * cert cert that is going into cert bag
+ * certSz size of cert buffer
+ *
+ * returns the size of the cert bag on success
+ */
+static int wc_PKCS12_create_cert_bag(WC_PKCS12* pkcs12,
+ byte* out, word32* outSz, byte* cert, word32 certSz)
+{
+ word32 length = 0;
+ word32 idx = 0;
+ word32 totalSz = 0;
+ word32 sz;
+ int WC_CERTBAG_OBJECT_ID = 13;
+ int WC_CERTBAG1_OBJECT_ID = 12;
+ word32 i;
+ word32 tmpSz;
+
+ if (out == NULL) {
+ *outSz = MAX_SEQ_SZ + WC_CERTBAG_OBJECT_ID + 1 + MAX_LENGTH_SZ +
+ MAX_SEQ_SZ + WC_CERTBAG1_OBJECT_ID + 1 + MAX_LENGTH_SZ + 1 +
+ MAX_LENGTH_SZ + certSz;
+ return LENGTH_ONLY_E;
+ }
+
+ /* check buffer size able to handle max size */
+ if (*outSz < (MAX_SEQ_SZ + WC_CERTBAG_OBJECT_ID + 1 + MAX_LENGTH_SZ +
+ MAX_SEQ_SZ + WC_CERTBAG1_OBJECT_ID + 1 + MAX_LENGTH_SZ + 1 +
+ MAX_LENGTH_SZ + certSz)) {
+ return BUFFER_E;
+ }
+
+ /* save room for sequence */
+ idx += MAX_SEQ_SZ;
+
+ /* objectId WC_PKCS12_CertBag */
+ out[idx++] = ASN_OBJECT_ID; totalSz++;
+ sz = SetLength(sizeof(WC_PKCS12_CertBag_OID), out + idx);
+ idx += sz; totalSz += sz;
+ for (i = 0; i < sizeof(WC_PKCS12_CertBag_OID); i++) {
+ out[idx++] = WC_PKCS12_CertBag_OID[i]; totalSz++;
+ }
+
+ /**** Cert Bag type 1 ****/
+ out[idx++] = (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC); totalSz++;
+
+ /* save room for length and sequence */
+ idx += MAX_LENGTH_SZ;
+ idx += MAX_SEQ_SZ;
+
+ /* object id WC_PKCS12_CertBag_Type1 */
+ out[idx++] = ASN_OBJECT_ID; length++;
+ sz = SetLength(sizeof(WC_PKCS12_CertBag_Type1_OID), out + idx);
+ idx += sz; length += sz;
+ for (i = 0; i < sizeof(WC_PKCS12_CertBag_Type1_OID); i++) {
+ out[idx++] = WC_PKCS12_CertBag_Type1_OID[i]; length++;
+ }
+
+ out[idx++] = (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC); length++;
+ sz = 0;
+ idx += MAX_LENGTH_SZ; /* save room for length */
+
+ /* place the cert in the buffer */
+ out[idx++] = ASN_OCTET_STRING; sz++;
+ tmpSz = SetLength(certSz, out + idx);
+ idx += tmpSz; sz += tmpSz;
+ XMEMCPY(out + idx, cert, certSz);
+ idx += certSz; sz += certSz;
+
+ /* rewind idx and place length */
+ idx -= (sz + MAX_LENGTH_SZ);
+ tmpSz = SetLength(sz, out + idx);
+ XMEMMOVE(out + idx + tmpSz, out + idx + MAX_LENGTH_SZ, sz);
+ idx += tmpSz + sz; length += tmpSz + sz;
+
+ /* rewind idx and set sequence */
+ idx -= (length + MAX_SEQ_SZ);
+ tmpSz = SetSequence(length, out + idx);
+ XMEMMOVE(out + idx + tmpSz, out + idx + MAX_SEQ_SZ, length);
+ length += tmpSz;
+
+ /* place final length */
+ idx -= MAX_LENGTH_SZ;
+ tmpSz = SetLength(length, out + idx);
+ XMEMMOVE(out + idx + tmpSz, out + idx + MAX_LENGTH_SZ, length);
+ length += tmpSz;
+
+ /* place final sequence */
+ totalSz += length;
+ tmpSz = SetSequence(totalSz, out);
+ XMEMMOVE(out + tmpSz, out + MAX_SEQ_SZ, totalSz);
+
+ (void)pkcs12;
+
+ return totalSz + tmpSz;
+}
+
+
+/* Helper function to encrypt content.
+ *
+ * pkcs12 structure to use with key bag
+ * rng random number generator used
+ * out buffer to hold results
+ * outSz size of out buffer
+ * content content to encrypt
+ * contentSz size of content buffer
+ * vAlgo algorithm version
+ * pass password to use
+ * passSz size of pass buffer
+ * iter number of iterations
+ * type content type i.e WC_PKCS12_ENCRYPTED_DATA or WC_PKCS12_DATA
+ *
+ * returns the size of result on success
+ */
+static int wc_PKCS12_encrypt_content(WC_PKCS12* pkcs12, WC_RNG* rng,
+ byte* out, word32* outSz, byte* content, word32 contentSz, int vAlgo,
+ const char* pass, int passSz, int iter, int type)
+{
+ void* heap;
+ int vPKCS = 1; /* PKCS#12 is always set to 1 */
+ int ret;
+ byte* tmp;
+ word32 idx = 0;
+ word32 totalSz = 0;
+ word32 length = 0;
+ word32 tmpSz;
+ word32 encSz;
+
+ byte seq[MAX_SEQ_SZ];
+
+ WOLFSSL_MSG("encrypting PKCS12 content");
+
+ heap = wc_PKCS12_GetHeap(pkcs12);
+
+ /* ENCRYPTED DATA
+ * ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC
+ * length
+ * sequence
+ * short int
+ * sequence
+ * get object id */
+ if (type == WC_PKCS12_ENCRYPTED_DATA) {
+ word32 outerSz = 0;
+
+ encSz = contentSz;
+ if ((ret = EncryptContent(NULL, contentSz, NULL, &encSz,
+ pass, passSz, vPKCS, vAlgo, NULL, 0, iter, rng, heap)) < 0) {
+ if (ret != LENGTH_ONLY_E) {
+ return ret;
+ }
+ }
+
+ /* calculate size */
+ totalSz = SetObjectId(sizeof(WC_PKCS12_ENCRYPTED_OID), seq);
+ totalSz += sizeof(WC_PKCS12_ENCRYPTED_OID);
+ totalSz += ASN_TAG_SZ;
+
+ length = SetMyVersion(0, seq, 0);
+ tmpSz = SetObjectId(sizeof(WC_PKCS12_DATA_OID), seq);
+ tmpSz += sizeof(WC_PKCS12_DATA_OID);
+ tmpSz += encSz;
+ length += SetSequence(tmpSz, seq) + tmpSz;
+ outerSz = SetSequence(length, seq) + length;
+
+ totalSz += SetLength(outerSz, seq) + outerSz;
+ if (out == NULL) {
+ *outSz = totalSz + SetSequence(totalSz, seq);
+ return LENGTH_ONLY_E;
+ }
+
+ if (*outSz < totalSz + SetSequence(totalSz, seq)) {
+ return BUFFER_E;
+ }
+
+ idx = 0;
+ idx += SetSequence(totalSz, out + idx);
+ idx += SetObjectId(sizeof(WC_PKCS12_ENCRYPTED_OID), out + idx);
+ if (idx + sizeof(WC_PKCS12_ENCRYPTED_OID) > *outSz){
+ return BUFFER_E;
+ }
+ XMEMCPY(out + idx, WC_PKCS12_ENCRYPTED_OID,
+ sizeof(WC_PKCS12_ENCRYPTED_OID));
+ idx += sizeof(WC_PKCS12_ENCRYPTED_OID);
+
+ if (idx + 1 > *outSz){
+ return BUFFER_E;
+ }
+ out[idx++] = (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC);
+ idx += SetLength(outerSz, out + idx);
+
+ idx += SetSequence(length, out + idx);
+ idx += SetMyVersion(0, out + idx, 0);
+ tmp = (byte*)XMALLOC(encSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (tmp == NULL) {
+ return MEMORY_E;
+ }
+
+ if ((ret = EncryptContent(content, contentSz, tmp, &encSz,
+ pass, passSz, vPKCS, vAlgo, NULL, 0, iter, rng, heap)) < 0) {
+ XFREE(tmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return ret;
+ }
+ encSz = ret;
+
+ #ifdef WOLFSSL_DEBUG_PKCS12
+ {
+ byte* p;
+ for (printf("(size %u) Encrypted Content = ", encSz),
+ p = (byte*)tmp;
+ p < (byte*)tmp + encSz;
+ printf("%02X", *p), p++);
+ printf("\n");
+ }
+ #endif
+
+ idx += SetSequence(WC_PKCS12_DATA_OBJ_SZ + encSz, out + idx);
+ idx += SetObjectId(sizeof(WC_PKCS12_DATA_OID), out + idx);
+ if (idx + sizeof(WC_PKCS12_DATA_OID) > *outSz){
+ WOLFSSL_MSG("Buffer not large enough for DATA OID");
+ return BUFFER_E;
+ }
+ XMEMCPY(out + idx, WC_PKCS12_DATA_OID, sizeof(WC_PKCS12_DATA_OID));
+ idx += sizeof(WC_PKCS12_DATA_OID);
+
+ /* copy over encrypted data */
+ if (idx + encSz > *outSz){
+ return BUFFER_E;
+ }
+ XMEMCPY(out + idx, tmp, encSz);
+ XFREE(tmp, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ idx += encSz;
+ return idx;
+ }
+
+ /* DATA
+ * ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC
+ * length
+ * ASN_OCTET_STRING
+ * length
+ * sequence containing all bags */
+ if (type == WC_PKCS12_DATA) {
+ /* calculate size */
+ totalSz = SetObjectId(sizeof(WC_PKCS12_DATA_OID), seq);
+ totalSz += sizeof(WC_PKCS12_DATA_OID);
+ totalSz += ASN_TAG_SZ;
+
+ length = SetOctetString(contentSz, seq);
+ length += contentSz;
+ totalSz += SetLength(length, seq);
+ totalSz += length;
+
+ if (out == NULL) {
+ *outSz = totalSz + SetSequence(totalSz, seq);
+ return LENGTH_ONLY_E;
+ }
+
+ if (*outSz < (totalSz + SetSequence(totalSz, seq))) {
+ return BUFFER_E;
+ }
+
+ /* place data in output buffer */
+ idx = 0;
+ idx += SetSequence(totalSz, out);
+ idx += SetObjectId(sizeof(WC_PKCS12_DATA_OID), out + idx);
+ if (idx + sizeof(WC_PKCS12_DATA_OID) > *outSz){
+ WOLFSSL_MSG("Buffer not large enough for DATA OID");
+ return BUFFER_E;
+ }
+ XMEMCPY(out + idx, WC_PKCS12_DATA_OID, sizeof(WC_PKCS12_DATA_OID));
+ idx += sizeof(WC_PKCS12_DATA_OID);
+
+ if (idx + 1 > *outSz){
+ return BUFFER_E;
+ }
+ out[idx++] = (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC);
+ idx += SetLength(length, out + idx);
+ idx += SetOctetString(contentSz, out + idx);
+
+ if (idx + contentSz > *outSz){
+ return BUFFER_E;
+ }
+ XMEMCPY(out + idx, content, contentSz);
+ idx += contentSz;
+
+ return idx;
+ }
+
+ WOLFSSL_MSG("Unknown/Unsupported content type");
+ return BAD_FUNC_ARG;
+}
+
+
+/* helper function to create the PKCS12 key content
+ * keyCiSz is output buffer size
+ * returns a pointer to be free'd by caller on success and NULL on failure */
+static byte* PKCS12_create_key_content(WC_PKCS12* pkcs12, int nidKey,
+ word32* keyCiSz, WC_RNG* rng, char* pass, word32 passSz,
+ byte* key, word32 keySz, int iter)
+{
+ byte* keyBuf;
+ word32 keyBufSz = 0;
+ byte* keyCi = NULL;
+ word32 tmpSz;
+ int ret;
+ int algo;
+ void* heap;
+
+ heap = wc_PKCS12_GetHeap(pkcs12);
+ *keyCiSz = 0;
+ switch (nidKey) {
+ case PBE_SHA1_RC4_128:
+ algo = 1;
+ break;
+
+ case PBE_SHA1_DES:
+ algo = 2;
+ break;
+
+ case PBE_SHA1_DES3:
+ algo = 3;
+ break;
+
+ /* no encryption */
+ case -1:
+ algo = -1;
+ break;
+
+ default:
+ WOLFSSL_MSG("Unknown/Unsupported key encryption");
+ return NULL;
+ }
+
+ /* get max size for key bag */
+ ret = wc_PKCS12_create_key_bag(pkcs12, rng, NULL, &keyBufSz, key, keySz,
+ algo, iter, pass, passSz);
+ if (ret != LENGTH_ONLY_E && ret < 0) {
+ WOLFSSL_MSG("Error getting key bag size");
+ return NULL;
+ }
+
+ /* account for sequence around bag */
+ keyBufSz += MAX_SEQ_SZ;
+ keyBuf = (byte*)XMALLOC(keyBufSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (keyBuf == NULL) {
+ WOLFSSL_MSG("Memory error creating keyBuf buffer");
+ return NULL;
+ }
+
+ ret = wc_PKCS12_create_key_bag(pkcs12, rng, keyBuf + MAX_SEQ_SZ, &keyBufSz,
+ key, keySz, algo, iter, pass, passSz);
+ if (ret < 0) {
+ XFREE(keyBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ WOLFSSL_MSG("Error creating key bag");
+ return NULL;
+ }
+ keyBufSz = ret;
+
+ tmpSz = SetSequence(keyBufSz, keyBuf);
+ XMEMMOVE(keyBuf + tmpSz, keyBuf + MAX_SEQ_SZ, keyBufSz);
+ keyBufSz += tmpSz;
+
+ #ifdef WOLFSSL_DEBUG_PKCS12
+ {
+ word32 i;
+ printf("(size %u) Key Bag = ", keyBufSz);
+ for (i = 0; i < keyBufSz; i++)
+ printf("%02X", keyBuf[i]);
+ printf("\n");
+ }
+ #endif
+ ret = wc_PKCS12_encrypt_content(pkcs12, rng, NULL, keyCiSz,
+ NULL, keyBufSz, algo, pass, passSz, iter, WC_PKCS12_DATA);
+ if (ret != LENGTH_ONLY_E) {
+ XFREE(keyBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ WOLFSSL_MSG("Error getting key encrypt content size");
+ return NULL;
+ }
+ keyCi = (byte*)XMALLOC(*keyCiSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (keyCi == NULL) {
+ XFREE(keyBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return NULL;
+ }
+
+ ret = wc_PKCS12_encrypt_content(pkcs12, rng, keyCi, keyCiSz,
+ keyBuf, keyBufSz, algo, pass, passSz, iter, WC_PKCS12_DATA);
+ XFREE(keyBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (ret < 0 ) {
+ XFREE(keyCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ WOLFSSL_MSG("Error creating key encrypt content");
+ return NULL;
+ }
+ *keyCiSz = ret;
+
+ #ifdef WOLFSSL_DEBUG_PKCS12
+ {
+ word32 i;
+ printf("(size %u) Key Content Info = ", *keyCiSz);
+ for (i = 0; i < *keyCiSz; i++)
+ printf("%02X", keyCi[i]);
+ printf("\n");
+ }
+ #endif
+
+ (void)heap;
+ return keyCi;
+}
+
+
+/* helper function to create the PKCS12 certificate content
+ * certCiSz is output buffer size
+ * returns a pointer to be free'd by caller on success and NULL on failure */
+static byte* PKCS12_create_cert_content(WC_PKCS12* pkcs12, int nidCert,
+ WC_DerCertList* ca, byte* cert, word32 certSz, word32* certCiSz,
+ WC_RNG* rng, char* pass, word32 passSz, int iter)
+{
+ int algo;
+ int ret;
+ int type;
+
+ byte* certBuf = NULL;
+ word32 certBufSz;
+ word32 idx;
+ word32 sz;
+ word32 tmpSz;
+
+ byte* certCi;
+ void* heap;
+
+ heap = wc_PKCS12_GetHeap(pkcs12);
+ switch (nidCert) {
+ case PBE_SHA1_RC4_128:
+ type = WC_PKCS12_ENCRYPTED_DATA;
+ algo = 1;
+ break;
+
+ case PBE_SHA1_DES:
+ type = WC_PKCS12_ENCRYPTED_DATA;
+ algo = 2;
+ break;
+
+ case PBE_SHA1_DES3:
+ type = WC_PKCS12_ENCRYPTED_DATA;
+ algo = 3;
+ break;
+
+ case -1:
+ type = WC_PKCS12_DATA;
+ algo = -1;
+ break;
+
+ default:
+ WOLFSSL_MSG("Unknown/Unsupported certificate encryption");
+ return NULL;
+ }
+
+ /* get max size of buffer needed */
+ ret = wc_PKCS12_create_cert_bag(pkcs12, NULL, &certBufSz, cert, certSz);
+ if (ret != LENGTH_ONLY_E) {
+ return NULL;
+ }
+
+ if (ca != NULL) {
+ WC_DerCertList* current = ca;
+ word32 curBufSz = 0;
+
+ /* get max buffer size */
+ while (current != NULL) {
+ ret = wc_PKCS12_create_cert_bag(pkcs12, NULL, &curBufSz,
+ current->buffer, current->bufferSz);
+ if (ret != LENGTH_ONLY_E) {
+ return NULL;
+ }
+ certBufSz += curBufSz;
+ current = current->next;
+ }
+ }
+
+ /* account for Sequence that holds all certificate bags */
+ certBufSz += MAX_SEQ_SZ;
+
+ /* completed getting max size, now create buffer and start adding bags */
+ certBuf = (byte*)XMALLOC(certBufSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (certBuf == NULL) {
+ WOLFSSL_MSG("Memory error creating certificate bags");
+ return NULL;
+ }
+
+ idx = 0;
+ idx += MAX_SEQ_SZ;
+
+ sz = certBufSz - idx;
+ if ((ret = wc_PKCS12_create_cert_bag(pkcs12, certBuf + idx, &sz,
+ cert, certSz)) < 0) {
+ XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return NULL;
+ }
+ idx += ret;
+
+ if (ca != NULL) {
+ WC_DerCertList* current = ca;
+
+ while (current != NULL) {
+ sz = certBufSz - idx;
+ if ((ret = wc_PKCS12_create_cert_bag(pkcs12, certBuf + idx, &sz,
+ current->buffer, current->bufferSz)) < 0) {
+ XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return NULL;
+ }
+ idx += ret;
+ current = current->next;
+ }
+ }
+
+ /* set sequence and create encrypted content with all certificate bags */
+ tmpSz = SetSequence(idx - MAX_SEQ_SZ, certBuf);
+ XMEMMOVE(certBuf + tmpSz, certBuf + MAX_SEQ_SZ, idx - MAX_SEQ_SZ);
+ certBufSz = tmpSz + (idx - MAX_SEQ_SZ);
+
+ /* get buffer size needed for content info */
+ ret = wc_PKCS12_encrypt_content(pkcs12, rng, NULL, certCiSz,
+ NULL, certBufSz, algo, pass, passSz, iter, type);
+ if (ret != LENGTH_ONLY_E) {
+ XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ WOLFSSL_LEAVE("wc_PKCS12_create()", ret);
+ return NULL;
+ }
+ certCi = (byte*)XMALLOC(*certCiSz, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (certCi == NULL) {
+ XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return NULL;
+ }
+
+ ret = wc_PKCS12_encrypt_content(pkcs12, rng, certCi, certCiSz,
+ certBuf, certBufSz, algo, pass, passSz, iter, type);
+ XFREE(certBuf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (ret < 0) {
+ WOLFSSL_LEAVE("wc_PKCS12_create()", ret);
+ return NULL;
+ }
+ *certCiSz = ret;
+
+ #ifdef WOLFSSL_DEBUG_PKCS12
+ {
+ word32 i;
+ printf("(size %u) Encrypted Certificate Content Info = ", *certCiSz);
+ for (i = 0; i < *certCiSz; i++)
+ printf("%02X", certCi[i]);
+ printf("\n");
+ }
+ #endif
+
+ (void)heap;
+ return certCi;
+}
+
+
+/* helper function to create the PKCS12 safe
+ * returns 0 on success */
+static int PKCS12_create_safe(WC_PKCS12* pkcs12, byte* certCi, word32 certCiSz,
+ byte* keyCi, word32 keyCiSz, WC_RNG* rng, char* pass, word32 passSz,
+ int iter)
+{
+ int length;
+ int ret;
+ byte seq[MAX_SEQ_SZ];
+ word32 safeDataSz;
+ word32 innerDataSz;
+ byte *innerData = NULL;
+ byte *safeData = NULL;
+ word32 idx;
+
+ innerDataSz = certCiSz + keyCiSz+SetSequence(certCiSz + keyCiSz, seq);
+
+ /* add Content Info structs to safe, key first then cert */
+ ret = wc_PKCS12_encrypt_content(pkcs12, rng, NULL, &safeDataSz,
+ NULL, innerDataSz, 0, NULL, 0, 0, WC_PKCS12_DATA);
+ if (ret != LENGTH_ONLY_E) {
+ return ret;
+ }
+
+ safeData = (byte*)XMALLOC(safeDataSz, pkcs12->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (safeData == NULL) {
+ WOLFSSL_MSG("Error malloc'ing safe data buffer");
+ return MEMORY_E;
+ }
+
+ /* create sequence of inner data */
+ innerData = (byte*)XMALLOC(innerDataSz, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+ if (innerData == NULL) {
+ WOLFSSL_MSG("Error malloc'ing inner data buffer");
+ XFREE(safeData, pkcs12->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return MEMORY_E;
+ }
+ idx = 0;
+ idx += SetSequence(certCiSz + keyCiSz, innerData);
+ XMEMCPY(innerData + idx, certCi, certCiSz);
+ XMEMCPY(innerData + idx + certCiSz, keyCi, keyCiSz);
+
+ ret = wc_PKCS12_encrypt_content(pkcs12, rng, safeData, &safeDataSz,
+ innerData, innerDataSz, 0, pass, passSz, iter, WC_PKCS12_DATA);
+ XFREE(innerData, pkcs12->heap, DYNAMIC_TYPE_PKCS);
+ if (ret < 0 ) {
+ WOLFSSL_MSG("Error setting data type for safe contents");
+ XFREE(safeData, pkcs12->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return ret;
+ }
+ idx = 0;
+
+ ret = GetSequence(safeData, &idx, &length, safeDataSz);
+ if (ret < 0) {
+ WOLFSSL_MSG("Error getting first sequence of safe");
+ XFREE(safeData, pkcs12->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return ret;
+ }
+
+ ret = GetSafeContent(pkcs12, safeData, &idx, safeDataSz);
+ XFREE(safeData, pkcs12->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (ret < 0) {
+ WOLFSSL_MSG("Unable to create safe contents");
+ return ret;
+ }
+ return 0;
+}
+
+
+/*
+ * pass : password to use with encryption
+ * passSz : size of the password buffer
+ * name : friendlyName to use
+ * key : DER format of key
+ * keySz : size of key buffer
+ * cert : DER format of certificate
+ * certSz : size of the certificate buffer
+ * ca : a list of extra certificates
+ * nidKey : type of encryption to use on the key (-1 means no encryption)
+ * nidCert : type of encryption to use on the certificate
+ * (-1 means no encryption)
+ * iter : number of iterations with encryption
+ * macIter : number of iterations when creating MAC
+ * keyType : flag for signature and/or encryption key
+ * heap : pointer to allocate from memory
+ *
+ * returns a pointer to a new WC_PKCS12 structure on success and NULL if failed
+ */
+WC_PKCS12* wc_PKCS12_create(char* pass, word32 passSz, char* name,
+ byte* key, word32 keySz, byte* cert, word32 certSz, WC_DerCertList* ca,
+ int nidKey, int nidCert, int iter, int macIter, int keyType, void* heap)
+{
+ WC_PKCS12* pkcs12;
+ WC_RNG rng;
+ int ret;
+
+ byte* certCi = NULL;
+ byte* keyCi = NULL;
+ word32 certCiSz;
+ word32 keyCiSz;
+
+ WOLFSSL_ENTER("wc_PKCS12_create()");
+
+ if ((ret = wc_InitRng_ex(&rng, heap, INVALID_DEVID)) != 0) {
+ return NULL;
+ }
+
+ if ((pkcs12 = wc_PKCS12_new()) == NULL) {
+ wc_FreeRng(&rng);
+ WOLFSSL_LEAVE("wc_PKCS12_create", MEMORY_E);
+ return NULL;
+ }
+
+ if ((ret = wc_PKCS12_SetHeap(pkcs12, heap)) != 0) {
+ wc_PKCS12_free(pkcs12);
+ wc_FreeRng(&rng);
+ WOLFSSL_LEAVE("wc_PKCS12_create", ret);
+ return NULL;
+ }
+
+ if (iter <= 0) {
+ iter = WC_PKCS12_ITT_DEFAULT;
+ }
+
+ /**** add private key bag ****/
+ keyCi = PKCS12_create_key_content(pkcs12, nidKey, &keyCiSz, &rng,
+ pass, passSz, key, keySz, iter);
+ if (keyCi == NULL) {
+ wc_PKCS12_free(pkcs12);
+ wc_FreeRng(&rng);
+ return NULL;
+ }
+
+ /**** add main certificate bag and extras ****/
+ certCi = PKCS12_create_cert_content(pkcs12, nidCert, ca, cert, certSz,
+ &certCiSz, &rng, pass, passSz, iter);
+ if (certCi == NULL) {
+ XFREE(keyCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS12_free(pkcs12);
+ wc_FreeRng(&rng);
+ return NULL;
+ }
+
+ /**** create safe and Content Info ****/
+ ret = PKCS12_create_safe(pkcs12, certCi, certCiSz, keyCi, keyCiSz, &rng,
+ pass, passSz, iter);
+ XFREE(keyCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(certCi, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (ret != 0) {
+ WOLFSSL_MSG("Unable to create PKCS12 safe");
+ wc_PKCS12_free(pkcs12);
+ wc_FreeRng(&rng);
+ return NULL;
+ }
+
+ /* create MAC */
+ if (macIter > 0) {
+ MacData* mac;
+ byte digest[WC_MAX_DIGEST_SIZE]; /* for MAC */
+
+ mac = (MacData*)XMALLOC(sizeof(MacData), heap, DYNAMIC_TYPE_PKCS);
+ if (mac == NULL) {
+ wc_PKCS12_free(pkcs12);
+ wc_FreeRng(&rng);
+ WOLFSSL_MSG("Error malloc'ing mac data buffer");
+ return NULL;
+ }
+ XMEMSET(mac, 0, sizeof(MacData));
+ pkcs12->signData = mac; /* now wc_PKCS12_free will free all mac too */
+
+ #ifndef NO_SHA256
+ mac->oid = SHA256h;
+ #elif !defined(NO_SHA)
+ mac->oid = SHA;
+ #elif defined(WOLFSSL_SHA384)
+ mac->oid = SHA384;
+ #elif defined(WOLFSSL_SHA512)
+ mac->oid = SHA512;
+ #else
+ WOLFSSL_MSG("No supported hash algorithm compiled in!");
+ wc_PKCS12_free(pkcs12);
+ wc_FreeRng(&rng);
+ return NULL;
+ #endif
+
+ /* store number of iterations */
+ mac->itt = macIter;
+
+ /* set mac salt */
+ mac->saltSz = 8;
+ mac->salt = (byte*)XMALLOC(mac->saltSz, heap, DYNAMIC_TYPE_PKCS);
+ if (mac->salt == NULL) {
+ wc_PKCS12_free(pkcs12);
+ wc_FreeRng(&rng);
+ WOLFSSL_MSG("Error malloc'ing salt data buffer");
+ return NULL;
+ }
+
+ if ((ret = wc_RNG_GenerateBlock(&rng, mac->salt, mac->saltSz)) != 0) {
+ WOLFSSL_MSG("Error generating random salt");
+ wc_PKCS12_free(pkcs12);
+ wc_FreeRng(&rng);
+ return NULL;
+ }
+ ret = wc_PKCS12_create_mac(pkcs12, pkcs12->safe->data,
+ pkcs12->safe->dataSz, (const byte*)pass, passSz, digest,
+ WC_MAX_DIGEST_SIZE);
+ if (ret < 0) {
+ wc_PKCS12_free(pkcs12);
+ wc_FreeRng(&rng);
+ WOLFSSL_MSG("Error creating mac");
+ WOLFSSL_LEAVE("wc_PKCS12_create", ret);
+ return NULL;
+ }
+
+ mac->digestSz = ret;
+ mac->digest = (byte*)XMALLOC(ret, heap, DYNAMIC_TYPE_PKCS);
+ if (mac->digest == NULL) {
+ WOLFSSL_MSG("Error malloc'ing mac digest buffer");
+ wc_PKCS12_free(pkcs12);
+ wc_FreeRng(&rng);
+ return NULL;
+ }
+ XMEMCPY(mac->digest, digest, mac->digestSz);
+ }
+ else {
+ pkcs12->signData = NULL;
+ }
+
+ wc_FreeRng(&rng);
+ (void)name;
+ (void)keyType;
+
+ return pkcs12;
+}
+
+
+/* if using a specific memory heap */
+int wc_PKCS12_SetHeap(WC_PKCS12* pkcs12, void* heap)
+{
+ if (pkcs12 == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ pkcs12->heap = heap;
+
+ return 0;
+}
+
+
+/* getter for heap */
+void* wc_PKCS12_GetHeap(WC_PKCS12* pkcs12)
+{
+ if (pkcs12 == NULL) {
+ return NULL;
+ }
+
+ return pkcs12->heap;
+}
+
+#undef ERROR_OUT
+
+#endif /* !NO_ASN && !NO_PWDBASED && HAVE_PKCS12 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/pkcs7.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/pkcs7.c
index 2f66ea216..e420cad37 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/pkcs7.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/pkcs7.c
@@ -1,8 +1,8 @@
/* pkcs7.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -30,45 +31,512 @@
#include <wolfssl/wolfcrypt/pkcs7.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#include <wolfssl/wolfcrypt/logging.h>
+#include <wolfssl/wolfcrypt/hash.h>
+#ifndef NO_RSA
+ #include <wolfssl/wolfcrypt/rsa.h>
+#endif
+#ifdef HAVE_ECC
+ #include <wolfssl/wolfcrypt/ecc.h>
+#endif
+#ifdef HAVE_LIBZ
+ #include <wolfssl/wolfcrypt/compress.h>
+#endif
+#ifndef NO_PWDBASED
+ #include <wolfssl/wolfcrypt/pwdbased.h>
+#endif
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
-#ifndef WOLFSSL_HAVE_MIN
-#define WOLFSSL_HAVE_MIN
+/* direction for processing, encoding or decoding */
+typedef enum {
+ WC_PKCS7_ENCODE,
+ WC_PKCS7_DECODE
+} pkcs7Direction;
+
+#define NO_USER_CHECK 0
+
+/* holds information about the signers */
+struct PKCS7SignerInfo {
+ int version;
+ byte *sid;
+ word32 sidSz;
+};
+
+
+#ifndef NO_PKCS7_STREAM
+
+#define MAX_PKCS7_STREAM_BUFFER 256
+struct PKCS7State {
+ byte* tmpCert;
+ byte* bufferPt;
+ byte* key;
+ byte* nonce; /* stored nonce */
+ byte* aad; /* additional data for AEAD algos */
+ byte* tag; /* tag data for AEAD algos */
+ byte* content;
+ byte* buffer; /* main internal read buffer */
+
+ /* stack variables to store for when returning */
+ word32 varOne;
+ int varTwo;
+ int varThree;
+
+ word32 vers;
+ word32 idx; /* index read into current input buffer */
+ word32 maxLen; /* sanity cap on maximum amount of data to allow
+ * needed for GetSequence and other calls */
+ word32 length; /* amount of data stored */
+ word32 bufferSz; /* size of internal buffer */
+ word32 expected; /* next amount of data expected, if needed */
+ word32 totalRd; /* total amount of bytes read */
+ word32 nonceSz; /* size of nonce stored */
+ word32 aadSz; /* size of additional AEAD data */
+ word32 tagSz; /* size of tag for AEAD */
+ word32 contentSz;
+ byte tmpIv[MAX_CONTENT_IV_SIZE]; /* store IV if needed */
+#ifdef WC_PKCS7_STREAM_DEBUG
+ word32 peakUsed; /* most bytes used for struct at any one time */
+ word32 peakRead; /* most bytes used by read buffer */
+#endif
+ byte multi:1; /* flag for if content is in multiple parts */
+ byte flagOne:1;
+ byte detached:1; /* flag to indicate detached signature is present */
+};
+
- static INLINE word32 min(word32 a, word32 b)
- {
- return a > b ? b : a;
+enum PKCS7_MaxLen {
+ PKCS7_DEFAULT_PEEK = 0,
+ PKCS7_SEQ_PEEK
+};
+
+/* creates a PKCS7State structure and returns 0 on success */
+static int wc_PKCS7_CreateStream(PKCS7* pkcs7)
+{
+ WOLFSSL_MSG("creating PKCS7 stream structure");
+ pkcs7->stream = (PKCS7State*)XMALLOC(sizeof(PKCS7State), pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (pkcs7->stream == NULL) {
+ return MEMORY_E;
+ }
+ XMEMSET(pkcs7->stream, 0, sizeof(PKCS7State));
+#ifdef WC_PKCS7_STREAM_DEBUG
+ printf("\nCreating new PKCS#7 stream %p\n", pkcs7->stream);
+#endif
+ return 0;
+}
+
+
+static void wc_PKCS7_ResetStream(PKCS7* pkcs7)
+{
+ if (pkcs7 != NULL && pkcs7->stream != NULL) {
+#ifdef WC_PKCS7_STREAM_DEBUG
+ /* collect final data point in case more was read right before reset */
+ if (pkcs7->stream->length > pkcs7->stream->peakRead) {
+ pkcs7->stream->peakRead = pkcs7->stream->length;
+ }
+ if (pkcs7->stream->bufferSz + pkcs7->stream->aadSz +
+ pkcs7->stream->nonceSz + pkcs7->stream->tagSz >
+ pkcs7->stream->peakUsed) {
+ pkcs7->stream->peakUsed = pkcs7->stream->bufferSz +
+ pkcs7->stream->aadSz + pkcs7->stream->nonceSz +
+ pkcs7->stream->tagSz;
+ }
+
+ /* print out debugging statistics */
+ if (pkcs7->stream->peakUsed > 0 || pkcs7->stream->peakRead > 0) {
+ printf("PKCS#7 STREAM:\n\tPeak heap used by struct = %d"
+ "\n\tPeak read buffer bytes = %d"
+ "\n\tTotal bytes read = %d"
+ "\n",
+ pkcs7->stream->peakUsed, pkcs7->stream->peakRead,
+ pkcs7->stream->totalRd);
+ }
+ printf("PKCS#7 stream reset : Address [%p]\n", pkcs7->stream);
+ #endif
+
+ /* free any buffers that may be allocated */
+ XFREE(pkcs7->stream->aad, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(pkcs7->stream->tag, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(pkcs7->stream->nonce, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(pkcs7->stream->buffer, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(pkcs7->stream->key, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ pkcs7->stream->aad = NULL;
+ pkcs7->stream->tag = NULL;
+ pkcs7->stream->nonce = NULL;
+ pkcs7->stream->buffer = NULL;
+ pkcs7->stream->key = NULL;
+
+ /* reset values, note that content and tmpCert are saved */
+ pkcs7->stream->maxLen = 0;
+ pkcs7->stream->length = 0;
+ pkcs7->stream->idx = 0;
+ pkcs7->stream->expected = 0;
+ pkcs7->stream->totalRd = 0;
+ pkcs7->stream->bufferSz = 0;
+
+ pkcs7->stream->multi = 0;
+ pkcs7->stream->flagOne = 0;
+ pkcs7->stream->detached = 0;
+ pkcs7->stream->varOne = 0;
+ pkcs7->stream->varTwo = 0;
+ pkcs7->stream->varThree = 0;
+ }
+}
+
+
+static void wc_PKCS7_FreeStream(PKCS7* pkcs7)
+{
+ if (pkcs7 != NULL && pkcs7->stream != NULL) {
+ wc_PKCS7_ResetStream(pkcs7);
+
+ XFREE(pkcs7->stream->content, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(pkcs7->stream->tmpCert, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ pkcs7->stream->content = NULL;
+ pkcs7->stream->tmpCert = NULL;
+
+ XFREE(pkcs7->stream, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ pkcs7->stream = NULL;
+ }
+}
+
+
+/* used to increase the max size for internal buffer
+ * returns 0 on success */
+static int wc_PKCS7_GrowStream(PKCS7* pkcs7, word32 newSz)
+{
+ byte* pt;
+
+ pt = (byte*)XMALLOC(newSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (pt == NULL) {
+ return MEMORY_E;
+ }
+ XMEMCPY(pt, pkcs7->stream->buffer, pkcs7->stream->bufferSz);
+
+#ifdef WC_PKCS7_STREAM_DEBUG
+ printf("PKCS7 increasing internal stream buffer %d -> %d\n",
+ pkcs7->stream->bufferSz, newSz);
+#endif
+ pkcs7->stream->bufferSz = newSz;
+ XFREE(pkcs7->stream->buffer, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ pkcs7->stream->buffer = pt;
+ return 0;
+}
+
+
+/* pt gets set to the buffer that is holding data in the case that stream struct
+ * is used.
+ *
+ * Sets idx to be the current offset into "pt" buffer
+ * returns 0 on success
+ */
+static int wc_PKCS7_AddDataToStream(PKCS7* pkcs7, byte* in, word32 inSz,
+ word32 expected, byte** pt, word32* idx)
+{
+ word32 rdSz = pkcs7->stream->idx;
+
+ /* If the input size minus current index into input buffer is greater than
+ * the expected size then use the input buffer. If data is already stored
+ * in stream buffer or if there is not enough input data available then use
+ * the stream buffer. */
+ if (inSz - rdSz >= expected && pkcs7->stream->length == 0) {
+ /* storing input buffer is not needed */
+ *pt = in; /* reset in case previously used internal buffer */
+ *idx = rdSz;
+ return 0;
+ }
+
+ /* is there enough stored in buffer already? */
+ if (pkcs7->stream->length >= expected) {
+ *idx = 0; /* start reading from beginning of stream buffer */
+ *pt = pkcs7->stream->buffer;
+ return 0;
+ }
+
+ /* check if all data has been read from input */
+ if (rdSz >= inSz) {
+ /* no more input to read, reset input index and request more data */
+ pkcs7->stream->idx = 0;
+ return WC_PKCS7_WANT_READ_E;
+ }
+
+ /* try to store input data into stream buffer */
+ if (inSz - rdSz > 0 && pkcs7->stream->length < expected) {
+ int len = min(inSz - rdSz, expected - pkcs7->stream->length);
+
+ /* sanity check that the input buffer is not internal buffer */
+ if (in == pkcs7->stream->buffer) {
+ return WC_PKCS7_WANT_READ_E;
+ }
+
+ /* check if internal buffer size needs to be increased */
+ if (len + pkcs7->stream->length > pkcs7->stream->bufferSz) {
+ int ret = wc_PKCS7_GrowStream(pkcs7, expected);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ XMEMCPY(pkcs7->stream->buffer + pkcs7->stream->length, in + rdSz, len);
+ pkcs7->stream->length += len;
+ pkcs7->stream->idx += len;
+ pkcs7->stream->totalRd += len;
+ }
+
+#ifdef WC_PKCS7_STREAM_DEBUG
+ /* collects memory usage for debugging */
+ if (pkcs7->stream->length > pkcs7->stream->peakRead) {
+ pkcs7->stream->peakRead = pkcs7->stream->length;
}
+ if (pkcs7->stream->bufferSz + pkcs7->stream->aadSz + pkcs7->stream->nonceSz +
+ pkcs7->stream->tagSz > pkcs7->stream->peakUsed) {
+ pkcs7->stream->peakUsed = pkcs7->stream->bufferSz +
+ pkcs7->stream->aadSz + pkcs7->stream->nonceSz + pkcs7->stream->tagSz;
+ }
+#endif
+
+ /* if not enough data was read in then request more */
+ if (pkcs7->stream->length < expected) {
+ pkcs7->stream->idx = 0;
+ return WC_PKCS7_WANT_READ_E;
+ }
+
+ /* adjust pointer to read from stored buffer */
+ *idx = 0;
+ *pt = pkcs7->stream->buffer;
+ return 0;
+}
+
-#endif /* WOLFSSL_HAVE_MIN */
+/* Does two things
+ * 1) Tries to get the length from current buffer and set it as max length
+ * 2) Retrieves the set max length
+ *
+ * if no flag value is set then the stored max length is returned.
+ * returns length found on success and defSz if no stored data is found
+ */
+static long wc_PKCS7_GetMaxStream(PKCS7* pkcs7, byte flag, byte* in,
+ word32 defSz)
+{
+ /* check there is a buffer to read from */
+ if (pkcs7) {
+ int length = 0, ret;
+ word32 idx = 0, maxIdx;
+ byte* pt;
+
+ if (flag != PKCS7_DEFAULT_PEEK) {
+ if (pkcs7->stream->length > 0) {
+ length = pkcs7->stream->length;
+ pt = pkcs7->stream->buffer;
+ }
+ else {
+ length = defSz;
+ pt = in;
+ }
+ maxIdx = (word32)length;
+
+ if (length < MAX_SEQ_SZ) {
+ WOLFSSL_MSG("PKCS7 Error not enough data for SEQ peek\n");
+ return 0;
+ }
+ if (flag == PKCS7_SEQ_PEEK) {
+ if ((ret = GetSequence_ex(pt, &idx, &length, maxIdx,
+ NO_USER_CHECK)) < 0) {
+ return ret;
+ }
+
+ #ifdef ASN_BER_TO_DER
+ if (length == 0 && ret == 0) {
+ idx = 0;
+ if ((ret = wc_BerToDer(pt, defSz, NULL,
+ (word32*)&length)) != LENGTH_ONLY_E) {
+ return ret;
+ }
+ }
+ #endif /* ASN_BER_TO_DER */
+ pkcs7->stream->maxLen = length + idx;
+ }
+ }
+
+ if (pkcs7->stream->maxLen == 0) {
+ pkcs7->stream->maxLen = defSz;
+ }
+
+ return pkcs7->stream->maxLen;
+ }
+
+ return defSz;
+}
+
+
+/* setter function for stored variables */
+static void wc_PKCS7_StreamStoreVar(PKCS7* pkcs7, word32 var1, int var2,
+ int var3)
+{
+ if (pkcs7 != NULL && pkcs7->stream != NULL) {
+ pkcs7->stream->varOne = var1;
+ pkcs7->stream->varTwo = var2;
+ pkcs7->stream->varThree = var3;
+ }
+}
+
+/* getter function for stored variables */
+static void wc_PKCS7_StreamGetVar(PKCS7* pkcs7, word32* var1, int* var2,
+ int* var3)
+{
+ if (pkcs7 != NULL && pkcs7->stream != NULL) {
+ if (var1 != NULL) *var1 = pkcs7->stream->varOne;
+ if (var2 != NULL) *var2 = pkcs7->stream->varTwo;
+ if (var3 != NULL) *var3 = pkcs7->stream->varThree;
+ }
+}
+
+
+/* common update of index and total read after section complete
+ * returns 0 on success */
+static int wc_PKCS7_StreamEndCase(PKCS7* pkcs7, word32* tmpIdx, word32* idx)
+{
+ int ret = 0;
+
+ if (pkcs7->stream->length > 0) {
+ if (pkcs7->stream->length < *idx) {
+ WOLFSSL_MSG("PKCS7 read too much data from internal buffer");
+ ret = BUFFER_E;
+ }
+ else {
+ XMEMMOVE(pkcs7->stream->buffer, pkcs7->stream->buffer + *idx,
+ pkcs7->stream->length - *idx);
+ pkcs7->stream->length -= *idx;
+ }
+ }
+ else {
+ pkcs7->stream->totalRd += *idx - *tmpIdx;
+ pkcs7->stream->idx = *idx; /* adjust index into input buffer */
+ *tmpIdx = *idx;
+ }
+
+ return ret;
+}
+#endif /* NO_PKCS7_STREAM */
+
+#ifdef WC_PKCS7_STREAM_DEBUG
+/* used to print out human readable state for debugging */
+static const char* wc_PKCS7_GetStateName(int in)
+{
+ switch (in) {
+ case WC_PKCS7_START: return "WC_PKCS7_START";
+
+ case WC_PKCS7_STAGE2: return "WC_PKCS7_STAGE2";
+ case WC_PKCS7_STAGE3: return "WC_PKCS7_STAGE3";
+ case WC_PKCS7_STAGE4: return "WC_PKCS7_STAGE4";
+ case WC_PKCS7_STAGE5: return "WC_PKCS7_STAGE5";
+ case WC_PKCS7_STAGE6: return "WC_PKCS7_STAGE6";
+
+ /* parse info set */
+ case WC_PKCS7_INFOSET_START: return "WC_PKCS7_INFOSET_START";
+ case WC_PKCS7_INFOSET_BER: return "WC_PKCS7_INFOSET_BER";
+ case WC_PKCS7_INFOSET_STAGE1: return "WC_PKCS7_INFOSET_STAGE1";
+ case WC_PKCS7_INFOSET_STAGE2: return "WC_PKCS7_INFOSET_STAGE2";
+ case WC_PKCS7_INFOSET_END: return "WC_PKCS7_INFOSET_END";
+
+ /* decode enveloped data */
+ case WC_PKCS7_ENV_2: return "WC_PKCS7_ENV_2";
+ case WC_PKCS7_ENV_3: return "WC_PKCS7_ENV_3";
+ case WC_PKCS7_ENV_4: return "WC_PKCS7_ENV_4";
+ case WC_PKCS7_ENV_5: return "WC_PKCS7_ENV_5";
+
+ /* decode auth enveloped */
+ case WC_PKCS7_AUTHENV_2: return "WC_PKCS7_AUTHENV_2";
+ case WC_PKCS7_AUTHENV_3: return "WC_PKCS7_AUTHENV_3";
+ case WC_PKCS7_AUTHENV_4: return "WC_PKCS7_AUTHENV_4";
+ case WC_PKCS7_AUTHENV_5: return "WC_PKCS7_AUTHENV_5";
+ case WC_PKCS7_AUTHENV_6: return "WC_PKCS7_AUTHENV_6";
+ case WC_PKCS7_AUTHENV_ATRB: return "WC_PKCS7_AUTHENV_ATRB";
+ case WC_PKCS7_AUTHENV_ATRBEND: return "WC_PKCS7_AUTHENV_ATRBEND";
+ case WC_PKCS7_AUTHENV_7: return "WC_PKCS7_AUTHENV_7";
+
+ /* decryption state types */
+ case WC_PKCS7_DECRYPT_KTRI: return "WC_PKCS7_DECRYPT_KTRI";
+ case WC_PKCS7_DECRYPT_KTRI_2: return "WC_PKCS7_DECRYPT_KTRI_2";
+ case WC_PKCS7_DECRYPT_KTRI_3: return "WC_PKCS7_DECRYPT_KTRI_3";
+
+ case WC_PKCS7_DECRYPT_KARI: return "WC_PKCS7_DECRYPT_KARI";
+ case WC_PKCS7_DECRYPT_KEKRI: return "WC_PKCS7_DECRYPT_KEKRI";
+ case WC_PKCS7_DECRYPT_PWRI: return "WC_PKCS7_DECRYPT_PWRI";
+ case WC_PKCS7_DECRYPT_ORI: return "WC_PKCS7_DECRYPT_ORI";
+ case WC_PKCS7_DECRYPT_DONE: return "WC_PKCS7_DECRYPT_DONE";
+
+ case WC_PKCS7_VERIFY_STAGE2: return "WC_PKCS7_VERIFY_STAGE2";
+ case WC_PKCS7_VERIFY_STAGE3: return "WC_PKCS7_VERIFY_STAGE3";
+ case WC_PKCS7_VERIFY_STAGE4: return "WC_PKCS7_VERIFY_STAGE4";
+ case WC_PKCS7_VERIFY_STAGE5: return "WC_PKCS7_VERIFY_STAGE5";
+ case WC_PKCS7_VERIFY_STAGE6: return "WC_PKCS7_VERIFY_STAGE6";
+
+ default:
+ return "Unknown state";
+ }
+}
+#endif
+
+/* Used to change the PKCS7 state. Having state change as a function allows
+ * for easier debugging */
+static void wc_PKCS7_ChangeState(PKCS7* pkcs7, int newState)
+{
+#ifdef WC_PKCS7_STREAM_DEBUG
+ printf("\tChanging from state [%02d] %s to [%02d] %s\n",
+ pkcs7->state, wc_PKCS7_GetStateName(pkcs7->state),
+ newState, wc_PKCS7_GetStateName(newState));
+#endif
+ pkcs7->state = newState;
+}
+
+#define MAX_PKCS7_DIGEST_SZ (MAX_SEQ_SZ + MAX_ALGO_SZ + \
+ MAX_OCTET_STR_SZ + WC_MAX_DIGEST_SIZE)
/* placed ASN.1 contentType OID into *output, return idx on success,
* 0 upon failure */
-WOLFSSL_LOCAL int wc_SetContentType(int pkcs7TypeOID, byte* output)
+static int wc_SetContentType(int pkcs7TypeOID, byte* output, word32 outputSz)
{
/* PKCS#7 content types, RFC 2315, section 14 */
- static const byte pkcs7[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
+ const byte pkcs7[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
0x0D, 0x01, 0x07 };
- static const byte data[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
+ const byte data[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
0x0D, 0x01, 0x07, 0x01 };
- static const byte signedData[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
+ const byte signedData[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
0x0D, 0x01, 0x07, 0x02};
- static const byte envelopedData[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
+ const byte envelopedData[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
0x0D, 0x01, 0x07, 0x03 };
- static const byte signedAndEnveloped[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
+ const byte authEnvelopedData[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
+ 0x0D, 0x01, 0x09, 0x10, 0x01, 0x17};
+ const byte signedAndEnveloped[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
0x0D, 0x01, 0x07, 0x04 };
- static const byte digestedData[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
+ const byte digestedData[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
0x0D, 0x01, 0x07, 0x05 };
- static const byte encryptedData[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
+#ifndef NO_PKCS7_ENCRYPTED_DATA
+ const byte encryptedData[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7,
0x0D, 0x01, 0x07, 0x06 };
+#endif
+ /* FirmwarePkgData (1.2.840.113549.1.9.16.1.16), RFC 4108 */
+ const byte firmwarePkgData[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+ 0x01, 0x09, 0x10, 0x01, 0x10 };
+#if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA)
+ /* id-ct-compressedData (1.2.840.113549.1.9.16.1.9), RFC 3274 */
+ const byte compressedData[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+ 0x01, 0x09, 0x10, 0x01, 0x09 };
+#endif
- int idSz;
- int typeSz = 0, idx = 0;
+#if !defined(NO_PWDBASED) && !defined(NO_SHA)
+ const byte pwriKek[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+ 0x01, 0x09, 0x10, 0x03, 0x09 };
+ const byte pbkdf2[] = { 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+ 0x01, 0x05, 0x0C };
+#endif
+
+ int idSz, idx = 0;
+ word32 typeSz = 0;
const byte* typeName = 0;
byte ID_Length[MAX_LENGTH_SZ];
@@ -87,12 +555,17 @@ WOLFSSL_LOCAL int wc_SetContentType(int pkcs7TypeOID, byte* output)
typeSz = sizeof(signedData);
typeName = signedData;
break;
-
+
case ENVELOPED_DATA:
typeSz = sizeof(envelopedData);
typeName = envelopedData;
break;
+ case AUTH_ENVELOPED_DATA:
+ typeSz = sizeof(authEnvelopedData);
+ typeName = authEnvelopedData;
+ break;
+
case SIGNED_AND_ENVELOPED_DATA:
typeSz = sizeof(signedAndEnveloped);
typeName = signedAndEnveloped;
@@ -103,16 +576,45 @@ WOLFSSL_LOCAL int wc_SetContentType(int pkcs7TypeOID, byte* output)
typeName = digestedData;
break;
+#ifndef NO_PKCS7_ENCRYPTED_DATA
case ENCRYPTED_DATA:
typeSz = sizeof(encryptedData);
typeName = encryptedData;
break;
+#endif
+#if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA)
+ case COMPRESSED_DATA:
+ typeSz = sizeof(compressedData);
+ typeName = compressedData;
+ break;
+#endif
+ case FIRMWARE_PKG_DATA:
+ typeSz = sizeof(firmwarePkgData);
+ typeName = firmwarePkgData;
+ break;
+
+#if !defined(NO_PWDBASED) && !defined(NO_SHA)
+ case PWRI_KEK_WRAP:
+ typeSz = sizeof(pwriKek);
+ typeName = pwriKek;
+ break;
+
+ case PBKDF2_OID:
+ typeSz = sizeof(pbkdf2);
+ typeName = pbkdf2;
+ break;
+#endif
default:
WOLFSSL_MSG("Unknown PKCS#7 Type");
return 0;
};
+ if (outputSz < (MAX_LENGTH_SZ + 1 + typeSz)) {
+ WOLFSSL_MSG("CMS content type buffer too small");
+ return BAD_FUNC_ARG;
+ }
+
idSz = SetLength(typeSz, ID_Length);
output[idx++] = ASN_OBJECT_ID;
XMEMCPY(output + idx, ID_Length, idSz);
@@ -121,82 +623,384 @@ WOLFSSL_LOCAL int wc_SetContentType(int pkcs7TypeOID, byte* output)
idx += typeSz;
return idx;
-
}
/* get ASN.1 contentType OID sum, return 0 on success, <0 on failure */
-int wc_GetContentType(const byte* input, word32* inOutIdx, word32* oid,
- word32 maxIdx)
+static int wc_GetContentType(const byte* input, word32* inOutIdx, word32* oid,
+ word32 maxIdx)
{
- int length;
- word32 i = *inOutIdx;
- byte b;
- *oid = 0;
-
WOLFSSL_ENTER("wc_GetContentType");
+ if (GetObjectId(input, inOutIdx, oid, oidIgnoreType, maxIdx) < 0)
+ return ASN_PARSE_E;
- b = input[i++];
- if (b != ASN_OBJECT_ID)
- return ASN_OBJECT_ID_E;
+ return 0;
+}
- if (GetLength(input, &i, &length, maxIdx) < 0)
- return ASN_PARSE_E;
- while(length--) {
- *oid += input[i];
- i++;
+/* return block size for algorithm represented by oid, or <0 on error */
+static int wc_PKCS7_GetOIDBlockSize(int oid)
+{
+ int blockSz;
+
+ switch (oid) {
+#ifndef NO_AES
+ #ifdef WOLFSSL_AES_128
+ case AES128CBCb:
+ case AES128GCMb:
+ case AES128CCMb:
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES192CBCb:
+ case AES192GCMb:
+ case AES192CCMb:
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES256CBCb:
+ case AES256GCMb:
+ case AES256CCMb:
+ #endif
+ blockSz = AES_BLOCK_SIZE;
+ break;
+#endif
+#ifndef NO_DES3
+ case DESb:
+ case DES3b:
+ blockSz = DES_BLOCK_SIZE;
+ break;
+#endif
+ default:
+ WOLFSSL_MSG("Unsupported content cipher type");
+ return ALGO_ID_E;
+ };
+
+ return blockSz;
+}
+
+
+/* get key size for algorithm represented by oid, or <0 on error */
+static int wc_PKCS7_GetOIDKeySize(int oid)
+{
+ int blockKeySz;
+
+ switch (oid) {
+#ifndef NO_AES
+ #ifdef WOLFSSL_AES_128
+ case AES128CBCb:
+ case AES128GCMb:
+ case AES128CCMb:
+ case AES128_WRAP:
+ blockKeySz = 16;
+ break;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES192CBCb:
+ case AES192GCMb:
+ case AES192CCMb:
+ case AES192_WRAP:
+ blockKeySz = 24;
+ break;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES256CBCb:
+ case AES256GCMb:
+ case AES256CCMb:
+ case AES256_WRAP:
+ blockKeySz = 32;
+ break;
+ #endif
+#endif
+#ifndef NO_DES3
+ case DESb:
+ blockKeySz = DES_KEYLEN;
+ break;
+
+ case DES3b:
+ blockKeySz = DES3_KEYLEN;
+ break;
+#endif
+ default:
+ WOLFSSL_MSG("Unsupported content cipher type");
+ return ALGO_ID_E;
+ };
+
+ return blockKeySz;
+}
+
+
+PKCS7* wc_PKCS7_New(void* heap, int devId)
+{
+ PKCS7* pkcs7 = (PKCS7*)XMALLOC(sizeof(PKCS7), heap, DYNAMIC_TYPE_PKCS7);
+ if (pkcs7) {
+ XMEMSET(pkcs7, 0, sizeof(PKCS7));
+ if (wc_PKCS7_Init(pkcs7, heap, devId) == 0) {
+ pkcs7->isDynamic = 1;
+ }
+ else {
+ XFREE(pkcs7, heap, DYNAMIC_TYPE_PKCS7);
+ pkcs7 = NULL;
+ }
}
+ return pkcs7;
+}
+
+/* This is to initialize a PKCS7 structure. It sets all values to 0 and can be
+ * used to set the heap hint.
+ *
+ * pkcs7 PKCS7 structure to initialize
+ * heap memory heap hint for PKCS7 structure to use
+ * devId currently not used but a place holder for async operations
+ *
+ * returns 0 on success or a negative value for failure
+ */
+int wc_PKCS7_Init(PKCS7* pkcs7, void* heap, int devId)
+{
+ word16 isDynamic;
+
+ WOLFSSL_ENTER("wc_PKCS7_Init");
+
+ if (pkcs7 == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ isDynamic = pkcs7->isDynamic;
+ XMEMSET(pkcs7, 0, sizeof(PKCS7));
+ pkcs7->isDynamic = isDynamic;
+#ifdef WOLFSSL_HEAP_TEST
+ pkcs7->heap = (void*)WOLFSSL_HEAP_TEST;
+#else
+ pkcs7->heap = heap;
+#endif
+ pkcs7->devId = devId;
+
+ return 0;
+}
+
+
+/* Certificate structure holding der pointer, size, and pointer to next
+ * Pkcs7Cert struct. Used when creating SignedData types with multiple
+ * certificates. */
+struct Pkcs7Cert {
+ byte* der;
+ word32 derSz;
+ Pkcs7Cert* next;
+};
+
+
+/* Linked list of ASN.1 encoded RecipientInfos */
+struct Pkcs7EncodedRecip {
+ byte recip[MAX_RECIP_SZ];
+ word32 recipSz;
+ int recipType;
+ int recipVersion;
+ Pkcs7EncodedRecip* next;
+};
+
+
+/* free all members of Pkcs7Cert linked list */
+static void wc_PKCS7_FreeCertSet(PKCS7* pkcs7)
+{
+ Pkcs7Cert* curr = NULL;
+ Pkcs7Cert* next = NULL;
+
+ if (pkcs7 == NULL)
+ return;
+
+ curr = pkcs7->certList;
+ pkcs7->certList = NULL;
+
+ while (curr != NULL) {
+ next = curr->next;
+ curr->next = NULL;
+ XFREE(curr, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ curr = next;
+ }
+
+ return;
+}
- *inOutIdx = i;
+
+/* Get total size of all recipients in recipient list.
+ *
+ * Returns total size of recipients, or negative upon error */
+static int wc_PKCS7_GetRecipientListSize(PKCS7* pkcs7)
+{
+ int totalSz = 0;
+ Pkcs7EncodedRecip* tmp = NULL;
+
+ if (pkcs7 == NULL)
+ return BAD_FUNC_ARG;
+
+ tmp = pkcs7->recipList;
+
+ while (tmp != NULL) {
+ totalSz += tmp->recipSz;
+ tmp = tmp->next;
+ }
+
+ return totalSz;
+}
+
+
+/* free all members of Pkcs7EncodedRecip linked list */
+static void wc_PKCS7_FreeEncodedRecipientSet(PKCS7* pkcs7)
+{
+ Pkcs7EncodedRecip* curr = NULL;
+ Pkcs7EncodedRecip* next = NULL;
+
+ if (pkcs7 == NULL)
+ return;
+
+ curr = pkcs7->recipList;
+ pkcs7->recipList = NULL;
+
+ while (curr != NULL) {
+ next = curr->next;
+ curr->next = NULL;
+ XFREE(curr, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ curr = next;
+ }
+
+ return;
+}
+
+
+/* search through RecipientInfo list for specific type.
+ * return 1 if ANY recipient of type specified is present, otherwise
+ * return 0 */
+static int wc_PKCS7_RecipientListIncludesType(PKCS7* pkcs7, int type)
+{
+ Pkcs7EncodedRecip* tmp = NULL;
+
+ if (pkcs7 == NULL)
+ return BAD_FUNC_ARG;
+
+ tmp = pkcs7->recipList;
+
+ while (tmp != NULL) {
+ if (tmp->recipType == type)
+ return 1;
+
+ tmp = tmp->next;
+ }
return 0;
}
-/* init PKCS7 struct with recipient cert, decode into DecodedCert */
-int wc_PKCS7_InitWithCert(PKCS7* pkcs7, byte* cert, word32 certSz)
+/* searches through RecipientInfo list, returns 1 if all structure
+ * versions are set to 0, otherwise returns 0 */
+static int wc_PKCS7_RecipientListVersionsAllZero(PKCS7* pkcs7)
+{
+ Pkcs7EncodedRecip* tmp = NULL;
+
+ if (pkcs7 == NULL)
+ return BAD_FUNC_ARG;
+
+ tmp = pkcs7->recipList;
+
+ while (tmp != NULL) {
+ if (tmp->recipVersion != 0)
+ return 0;
+
+ tmp = tmp->next;
+ }
+
+ return 1;
+}
+
+
+/* Init PKCS7 struct with recipient cert, decode into DecodedCert
+ * NOTE: keeps previously set pkcs7 heap hint, devId and isDynamic */
+int wc_PKCS7_InitWithCert(PKCS7* pkcs7, byte* derCert, word32 derCertSz)
{
int ret = 0;
+ void* heap;
+ int devId;
+ Pkcs7Cert* cert;
+ Pkcs7Cert* lastCert;
- XMEMSET(pkcs7, 0, sizeof(PKCS7));
- if (cert != NULL && certSz > 0) {
+ if (pkcs7 == NULL || (derCert == NULL && derCertSz != 0)) {
+ return BAD_FUNC_ARG;
+ }
+
+ heap = pkcs7->heap;
+ devId = pkcs7->devId;
+ cert = pkcs7->certList;
+ ret = wc_PKCS7_Init(pkcs7, heap, devId);
+ if (ret != 0)
+ return ret;
+ pkcs7->certList = cert;
+
+ if (derCert != NULL && derCertSz > 0) {
#ifdef WOLFSSL_SMALL_STACK
DecodedCert* dCert;
- dCert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), NULL,
- DYNAMIC_TYPE_TMP_BUFFER);
+ dCert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), pkcs7->heap,
+ DYNAMIC_TYPE_DCERT);
if (dCert == NULL)
return MEMORY_E;
#else
- DecodedCert stack_dCert;
- DecodedCert* dCert = &stack_dCert;
+ DecodedCert dCert[1];
#endif
- pkcs7->singleCert = cert;
- pkcs7->singleCertSz = certSz;
- InitDecodedCert(dCert, cert, certSz, 0);
+ pkcs7->singleCert = derCert;
+ pkcs7->singleCertSz = derCertSz;
+ pkcs7->cert[0] = derCert;
+ pkcs7->certSz[0] = derCertSz;
+
+ /* create new Pkcs7Cert for recipient, freed during cleanup */
+ cert = (Pkcs7Cert*)XMALLOC(sizeof(Pkcs7Cert), pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ XMEMSET(cert, 0, sizeof(Pkcs7Cert));
+ cert->der = derCert;
+ cert->derSz = derCertSz;
+ cert->next = NULL;
+
+ /* free existing cert list if existing */
+ wc_PKCS7_FreeCertSet(pkcs7);
+
+ /* add cert to list */
+ if (pkcs7->certList == NULL) {
+ pkcs7->certList = cert;
+ } else {
+ lastCert = pkcs7->certList;
+ while (lastCert->next != NULL) {
+ lastCert = lastCert->next;
+ }
+ lastCert->next = cert;
+ }
+ InitDecodedCert(dCert, derCert, derCertSz, pkcs7->heap);
ret = ParseCert(dCert, CA_TYPE, NO_VERIFY, 0);
if (ret < 0) {
FreeDecodedCert(dCert);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(dCert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(dCert, pkcs7->heap, DYNAMIC_TYPE_DCERT);
#endif
return ret;
}
XMEMCPY(pkcs7->publicKey, dCert->publicKey, dCert->pubKeySize);
pkcs7->publicKeySz = dCert->pubKeySize;
+ pkcs7->publicKeyOID = dCert->keyOID;
XMEMCPY(pkcs7->issuerHash, dCert->issuerHash, KEYID_SIZE);
pkcs7->issuer = dCert->issuerRaw;
pkcs7->issuerSz = dCert->issuerRawLen;
XMEMCPY(pkcs7->issuerSn, dCert->serial, dCert->serialSz);
pkcs7->issuerSnSz = dCert->serialSz;
+ XMEMCPY(pkcs7->issuerSubjKeyId, dCert->extSubjKeyId, KEYID_SIZE);
+
+ /* default to IssuerAndSerialNumber for SignerIdentifier */
+ pkcs7->sidType = CMS_ISSUER_AND_SERIAL_NUMBER;
+
+ /* free existing recipient list if existing */
+ wc_PKCS7_FreeEncodedRecipientSet(pkcs7);
+
FreeDecodedCert(dCert);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(dCert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(dCert, pkcs7->heap, DYNAMIC_TYPE_DCERT);
#endif
}
@@ -204,10 +1008,265 @@ int wc_PKCS7_InitWithCert(PKCS7* pkcs7, byte* cert, word32 certSz)
}
+/* Adds one DER-formatted certificate to the internal PKCS7/CMS certificate
+ * list, to be added as part of the certificates CertificateSet. Currently
+ * used in SignedData content type.
+ *
+ * Must be called after wc_PKCS7_Init() or wc_PKCS7_InitWithCert().
+ *
+ * Does not represent the recipient/signer certificate, only certificates that
+ * are part of the certificate chain used to build and verify signer
+ * certificates.
+ *
+ * This API does not currently validate certificates.
+ *
+ * Returns 0 on success, negative upon error */
+int wc_PKCS7_AddCertificate(PKCS7* pkcs7, byte* derCert, word32 derCertSz)
+{
+ Pkcs7Cert* cert;
+
+ if (pkcs7 == NULL || derCert == NULL || derCertSz == 0)
+ return BAD_FUNC_ARG;
+
+ cert = (Pkcs7Cert*)XMALLOC(sizeof(Pkcs7Cert), pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (cert == NULL)
+ return MEMORY_E;
+
+ cert->der = derCert;
+ cert->derSz = derCertSz;
+
+ if (pkcs7->certList == NULL) {
+ pkcs7->certList = cert;
+ } else {
+ cert->next = pkcs7->certList;
+ pkcs7->certList = cert;
+ }
+
+ return 0;
+}
+
+
+/* free linked list of PKCS7DecodedAttrib structs */
+static void wc_PKCS7_FreeDecodedAttrib(PKCS7DecodedAttrib* attrib, void* heap)
+{
+ PKCS7DecodedAttrib* current;
+
+ if (attrib == NULL) {
+ return;
+ }
+
+ current = attrib;
+ while (current != NULL) {
+ PKCS7DecodedAttrib* next = current->next;
+ if (current->oid != NULL) {
+ XFREE(current->oid, heap, DYNAMIC_TYPE_PKCS7);
+ }
+ if (current->value != NULL) {
+ XFREE(current->value, heap, DYNAMIC_TYPE_PKCS7);
+ }
+ XFREE(current, heap, DYNAMIC_TYPE_PKCS7);
+ current = next;
+ }
+
+ (void)heap;
+}
+
+
+/* return 0 on success */
+static int wc_PKCS7_SignerInfoNew(PKCS7* pkcs7)
+{
+ if (pkcs7->signerInfo != NULL) {
+ XFREE(pkcs7->signerInfo, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ pkcs7->signerInfo = NULL;
+ }
+
+ pkcs7->signerInfo = (PKCS7SignerInfo*)XMALLOC(sizeof(PKCS7SignerInfo),
+ pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (pkcs7->signerInfo == NULL) {
+ WOLFSSL_MSG("Unable to malloc memory for signer info");
+ return MEMORY_E;
+ }
+ XMEMSET(pkcs7->signerInfo, 0, sizeof(PKCS7SignerInfo));
+ return 0;
+}
+
+
+static void wc_PKCS7_SignerInfoFree(PKCS7* pkcs7)
+{
+ if (pkcs7->signerInfo != NULL) {
+ if (pkcs7->signerInfo->sid != NULL) {
+ XFREE(pkcs7->signerInfo->sid, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ pkcs7->signerInfo->sid = NULL;
+ }
+ XFREE(pkcs7->signerInfo, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ pkcs7->signerInfo = NULL;
+ }
+}
+
+
+/* free's any current SID and sets it to "in"
+ * returns 0 on success
+ */
+static int wc_PKCS7_SignerInfoSetSID(PKCS7* pkcs7, byte* in, int inSz)
+{
+ if (pkcs7 == NULL || in == NULL || inSz < 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ if (pkcs7->signerInfo->sid != NULL) {
+ XFREE(pkcs7->signerInfo->sid, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ pkcs7->signerInfo->sid = NULL;
+ }
+ pkcs7->signerInfo->sid = (byte*)XMALLOC(inSz, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (pkcs7->signerInfo->sid == NULL) {
+ return MEMORY_E;
+ }
+ XMEMCPY(pkcs7->signerInfo->sid, in, inSz);
+ pkcs7->signerInfo->sidSz = inSz;
+ return 0;
+}
+
+
/* releases any memory allocated by a PKCS7 initializer */
void wc_PKCS7_Free(PKCS7* pkcs7)
{
- (void)pkcs7;
+ if (pkcs7 == NULL)
+ return;
+
+#ifndef NO_PKCS7_STREAM
+ wc_PKCS7_FreeStream(pkcs7);
+#endif
+
+ wc_PKCS7_SignerInfoFree(pkcs7);
+ wc_PKCS7_FreeDecodedAttrib(pkcs7->decodedAttrib, pkcs7->heap);
+ wc_PKCS7_FreeCertSet(pkcs7);
+
+#ifdef ASN_BER_TO_DER
+ if (pkcs7->der != NULL)
+ XFREE(pkcs7->der, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+#endif
+ if (pkcs7->contentDynamic != NULL)
+ XFREE(pkcs7->contentDynamic, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+
+ if (pkcs7->cek != NULL) {
+ ForceZero(pkcs7->cek, pkcs7->cekSz);
+ XFREE(pkcs7->cek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ }
+
+ pkcs7->contentTypeSz = 0;
+
+ if (pkcs7->signature) {
+ XFREE(pkcs7->signature, pkcs7->heap, DYNAMIC_TYPE_SIGNATURE);
+ pkcs7->signature = NULL;
+ pkcs7->signatureSz = 0;
+ }
+ if (pkcs7->plainDigest) {
+ XFREE(pkcs7->plainDigest, pkcs7->heap, DYNAMIC_TYPE_DIGEST);
+ pkcs7->plainDigest = NULL;
+ pkcs7->plainDigestSz = 0;
+ }
+ if (pkcs7->pkcs7Digest) {
+ XFREE(pkcs7->pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_DIGEST);
+ pkcs7->pkcs7Digest = NULL;
+ pkcs7->pkcs7DigestSz = 0;
+ }
+ if (pkcs7->cachedEncryptedContent != NULL) {
+ XFREE(pkcs7->cachedEncryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ pkcs7->cachedEncryptedContent = NULL;
+ pkcs7->cachedEncryptedContentSz = 0;
+ }
+
+ if (pkcs7->isDynamic) {
+ pkcs7->isDynamic = 0;
+ XFREE(pkcs7, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ }
+}
+
+
+/* helper function for parsing through attributes and finding a specific one.
+ * returns PKCS7DecodedAttrib pointer on success */
+static PKCS7DecodedAttrib* findAttrib(PKCS7* pkcs7, const byte* oid, word32 oidSz)
+{
+ PKCS7DecodedAttrib* list;
+
+ if (pkcs7 == NULL || oid == NULL) {
+ return NULL;
+ }
+
+ /* search attributes for pkiStatus */
+ list = pkcs7->decodedAttrib;
+ while (list != NULL) {
+ word32 sz = oidSz;
+ word32 idx = 0;
+ int length = 0;
+ byte tag;
+
+ if (GetASNTag(list->oid, &idx, &tag, list->oidSz) < 0) {
+ return NULL;
+ }
+ if (tag != ASN_OBJECT_ID) {
+ WOLFSSL_MSG("Bad attribute ASN1 syntax");
+ return NULL;
+ }
+
+ if (GetLength(list->oid, &idx, &length, list->oidSz) < 0) {
+ WOLFSSL_MSG("Bad attribute length");
+ return NULL;
+ }
+
+ sz = (sz < (word32)length)? sz : (word32)length;
+ if (XMEMCMP(oid, list->oid + idx, sz) == 0) {
+ return list;
+ }
+ list = list->next;
+ }
+ return NULL;
+}
+
+
+/* Searches through decoded attributes and returns the value for the first one
+ * matching the oid passed in. Note that this value includes the leading ASN1
+ * syntax. So for a printable string of "3" this would be something like
+ *
+ * 0x13, 0x01, 0x33
+ * ID SIZE "3"
+ *
+ * pkcs7 structure to get value from
+ * oid OID value to search for with attributes
+ * oidSz size of oid buffer
+ * out buffer to hold result
+ * outSz size of out buffer (if out is NULL this is set to needed size and
+ LENGTH_ONLY_E is returned)
+ *
+ * returns size of value on success
+ */
+int wc_PKCS7_GetAttributeValue(PKCS7* pkcs7, const byte* oid, word32 oidSz,
+ byte* out, word32* outSz)
+{
+ PKCS7DecodedAttrib* attrib;
+
+ if (pkcs7 == NULL || oid == NULL || outSz == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ attrib = findAttrib(pkcs7, oid, oidSz);
+ if (attrib == NULL) {
+ return ASN_PARSE_E;
+ }
+
+ if (out == NULL) {
+ *outSz = attrib->valueSz;
+ return LENGTH_ONLY_E;
+ }
+
+ if (*outSz < attrib->valueSz) {
+ return BUFFER_E;
+ }
+
+ XMEMCPY(out, attrib->value, attrib->valueSz);
+ return attrib->valueSz;
}
@@ -224,6 +1283,10 @@ int wc_PKCS7_EncodeData(PKCS7* pkcs7, byte* output, word32 outputSz)
word32 oidSz = (word32)sizeof(oid);
int idx = 0;
+ if (pkcs7 == NULL || output == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
octetStrSz = SetOctetString(pkcs7->contentSz, octetStr);
seqSz = SetSequence(pkcs7->contentSz + octetStrSz + oidSz, seq);
@@ -253,10 +1316,11 @@ typedef struct EncodedAttrib {
typedef struct ESD {
- Sha sha;
- byte contentDigest[SHA_DIGEST_SIZE + 2]; /* content only + ASN.1 heading */
- byte contentAttribsDigest[SHA_DIGEST_SIZE];
- byte encContentDigest[512];
+ wc_HashAlg hash;
+ enum wc_HashType hashType;
+ byte contentDigest[WC_MAX_DIGEST_SIZE + 2]; /* content only + ASN.1 heading */
+ byte contentAttribsDigest[WC_MAX_DIGEST_SIZE];
+ byte encContentDigest[MAX_ENCRYPTED_KEY_SZ];
byte outerSeq[MAX_SEQ_SZ];
byte outerContent[MAX_EXP_SZ];
@@ -274,20 +1338,24 @@ typedef struct ESD {
byte signerInfoSet[MAX_SET_SZ];
byte signerInfoSeq[MAX_SEQ_SZ];
byte signerVersion[MAX_VERSION_SZ];
+ /* issuerAndSerialNumber ...*/
byte issuerSnSeq[MAX_SEQ_SZ];
byte issuerName[MAX_SEQ_SZ];
byte issuerSn[MAX_SN_SZ];
+ /* OR subjectKeyIdentifier */
+ byte issuerSKIDSeq[MAX_SEQ_SZ];
+ byte issuerSKID[MAX_OCTET_STR_SZ];
byte signerDigAlgoId[MAX_ALGO_SZ];
byte digEncAlgoId[MAX_ALGO_SZ];
byte signedAttribSet[MAX_SET_SZ];
- EncodedAttrib signedAttribs[6];
+ EncodedAttrib signedAttribs[7];
byte signerDigest[MAX_OCTET_STR_SZ];
word32 innerOctetsSz, innerContSeqSz, contentInfoSeqSz;
word32 outerSeqSz, outerContentSz, innerSeqSz, versionSz, digAlgoIdSetSz,
singleDigAlgoIdSz, certsSetSz;
word32 signerInfoSetSz, signerInfoSeqSz, signerVersionSz,
- issuerSnSeqSz, issuerNameSz, issuerSnSz,
- signerDigAlgoIdSz, digEncAlgoIdSz, signerDigestSz;
+ issuerSnSeqSz, issuerNameSz, issuerSnSz, issuerSKIDSz,
+ issuerSKIDSeqSz, signerDigAlgoIdSz, digEncAlgoIdSz, signerDigestSz;
word32 encContentDigestSz, signedAttribsSz, signedAttribsCount,
signedAttribSetSz;
} ESD;
@@ -322,12 +1390,112 @@ static int EncodeAttributes(EncodedAttrib* ea, int eaSz,
}
-static int FlattenAttributes(byte* output, EncodedAttrib* ea, int eaSz)
+typedef struct FlatAttrib {
+ byte* data;
+ word32 dataSz;
+} FlatAttrib;
+
+/* Returns a pointer to FlatAttrib whose members are initialized to 0.
+* Caller is expected to free.
+*/
+static FlatAttrib* NewAttrib(void* heap)
{
- int i, idx;
+ FlatAttrib* fb = (FlatAttrib*) XMALLOC(sizeof(FlatAttrib), heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (fb != NULL) {
+ ForceZero(fb, sizeof(FlatAttrib));
+ }
+ (void)heap;
+ return fb;
+}
+
+/* Free FlatAttrib array and memory allocated to internal struct members */
+static void FreeAttribArray(PKCS7* pkcs7, FlatAttrib** arr, int rows)
+{
+ int i;
+
+ if (arr) {
+ for (i = 0; i < rows; i++) {
+ if (arr[i]) {
+ if (arr[i]->data) {
+ ForceZero(arr[i]->data, arr[i]->dataSz);
+ XFREE(arr[i]->data, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+ ForceZero(arr[i], sizeof(FlatAttrib));
+ XFREE(arr[i], pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+ }
+ ForceZero(arr, rows);
+ XFREE(arr, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+ (void)pkcs7;
+}
+
+
+/* Sort FlatAttrib array in ascending order */
+static int SortAttribArray(FlatAttrib** arr, int rows)
+{
+ int i, j;
+ word32 minSz, minIdx;
+ FlatAttrib* a = NULL;
+ FlatAttrib* b = NULL;
+ FlatAttrib* tmp = NULL;
+
+ if (arr == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ for (i = 0; i < rows; i++) {
+ a = arr[i];
+ minSz = a->dataSz;
+ minIdx = i;
+ for (j = i+1; j < rows; j++) {
+ b = arr[j];
+ if (b->dataSz < minSz) {
+ minSz = b->dataSz;
+ minIdx = j;
+ }
+ }
+ if (minSz < a->dataSz) {
+ /* swap array positions */
+ tmp = arr[i];
+ arr[i] = arr[minIdx];
+ arr[minIdx] = tmp;
+ }
+ }
+
+ return 0;
+}
+
+
+/* Build up array of FlatAttrib structs from EncodedAttrib ones. FlatAttrib
+ * holds flattened DER encoding of each attribute */
+static int FlattenEncodedAttribs(PKCS7* pkcs7, FlatAttrib** derArr, int rows,
+ EncodedAttrib* ea, int eaSz)
+{
+ int i, idx, sz;
+ byte* output = NULL;
+ FlatAttrib* fa = NULL;
+
+ if (pkcs7 == NULL || derArr == NULL || ea == NULL) {
+ WOLFSSL_MSG("Invalid arguments to FlattenEncodedAttribs");
+ return BAD_FUNC_ARG;
+ }
+
+ if (rows != eaSz) {
+ WOLFSSL_MSG("DER array not large enough to hold attribute count");
+ return BAD_FUNC_ARG;
+ }
- idx = 0;
for (i = 0; i < eaSz; i++) {
+ sz = ea[i].valueSeqSz + ea[i].oidSz + ea[i].valueSetSz + ea[i].valueSz;
+
+ output = (byte*)XMALLOC(sz, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (output == NULL) {
+ return MEMORY_E;
+ }
+
+ idx = 0;
XMEMCPY(output + idx, ea[i].valueSeq, ea[i].valueSeqSz);
idx += ea[i].valueSeqSz;
XMEMCPY(output + idx, ea[i].oid, ea[i].oidSz);
@@ -335,293 +1503,893 @@ static int FlattenAttributes(byte* output, EncodedAttrib* ea, int eaSz)
XMEMCPY(output + idx, ea[i].valueSet, ea[i].valueSetSz);
idx += ea[i].valueSetSz;
XMEMCPY(output + idx, ea[i].value, ea[i].valueSz);
- idx += ea[i].valueSz;
+
+ fa = derArr[i];
+ fa->data = output;
+ fa->dataSz = sz;
}
+
return 0;
}
-/* build PKCS#7 signedData content type */
-int wc_PKCS7_EncodeSignedData(PKCS7* pkcs7, byte* output, word32 outputSz)
+/* Sort and Flatten EncodedAttrib attributes into output buffer */
+static int FlattenAttributes(PKCS7* pkcs7, byte* output, EncodedAttrib* ea,
+ int eaSz)
{
- static const byte outerOid[] =
- { ASN_OBJECT_ID, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01,
- 0x07, 0x02 };
- static const byte innerOid[] =
- { ASN_OBJECT_ID, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01,
- 0x07, 0x01 };
+ int i, idx, ret;
+ FlatAttrib** derArr = NULL;
+ FlatAttrib* fa = NULL;
+
+ if (pkcs7 == NULL || output == NULL || ea == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ /* create array of FlatAttrib struct pointers to hold DER attribs */
+ derArr = (FlatAttrib**) XMALLOC(eaSz * sizeof(FlatAttrib*), pkcs7->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (derArr == NULL) {
+ return MEMORY_E;
+ }
+ XMEMSET(derArr, 0, eaSz * sizeof(FlatAttrib*));
+
+ for (i = 0; i < eaSz; i++) {
+ derArr[i] = NewAttrib(pkcs7->heap);
+ if (derArr[i] == NULL) {
+ FreeAttribArray(pkcs7, derArr, eaSz);
+ return MEMORY_E;
+ }
+ ForceZero(derArr[i], sizeof(FlatAttrib));
+ }
+
+ /* flatten EncodedAttrib into DER byte arrays */
+ ret = FlattenEncodedAttribs(pkcs7, derArr, eaSz, ea, eaSz);
+ if (ret != 0) {
+ FreeAttribArray(pkcs7, derArr, eaSz);
+ return ret;
+ }
+
+ /* SET OF DER signed attributes must be sorted in ascending order */
+ ret = SortAttribArray(derArr, eaSz);
+ if (ret != 0) {
+ FreeAttribArray(pkcs7, derArr, eaSz);
+ return ret;
+ }
+
+ /* copy sorted DER attribute arrays into output buffer */
+ idx = 0;
+ for (i = 0; i < eaSz; i++) {
+ fa = derArr[i];
+ XMEMCPY(output + idx, fa->data, fa->dataSz);
+ idx += fa->dataSz;
+ }
+
+ FreeAttribArray(pkcs7, derArr, eaSz);
+
+ return 0;
+}
+
+
+#ifndef NO_RSA
+
+/* returns size of signature put into out, negative on error */
+static int wc_PKCS7_RsaSign(PKCS7* pkcs7, byte* in, word32 inSz, ESD* esd)
+{
+ int ret;
+ word32 idx;
#ifdef WOLFSSL_SMALL_STACK
- ESD* esd = NULL;
+ RsaKey* privKey;
#else
- ESD stack_esd;
- ESD* esd = &stack_esd;
+ RsaKey privKey[1];
#endif
- word32 signerInfoSz = 0;
- word32 totalSz = 0;
- int idx = 0, ret = 0;
- byte* flatSignedAttribs = NULL;
- word32 flatSignedAttribsSz = 0;
- word32 innerOidSz = sizeof(innerOid);
- word32 outerOidSz = sizeof(outerOid);
+ if (pkcs7 == NULL || pkcs7->rng == NULL || in == NULL || esd == NULL) {
+ return BAD_FUNC_ARG;
+ }
- if (pkcs7 == NULL || pkcs7->content == NULL || pkcs7->contentSz == 0 ||
- pkcs7->encryptOID == 0 || pkcs7->hashOID == 0 || pkcs7->rng == 0 ||
- pkcs7->singleCert == NULL || pkcs7->singleCertSz == 0 ||
- pkcs7->privateKey == NULL || pkcs7->privateKeySz == 0 ||
- output == NULL || outputSz == 0)
+#ifdef WOLFSSL_SMALL_STACK
+ privKey = (RsaKey*)XMALLOC(sizeof(RsaKey), pkcs7->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (privKey == NULL)
+ return MEMORY_E;
+#endif
+
+ ret = wc_InitRsaKey_ex(privKey, pkcs7->heap, pkcs7->devId);
+ if (ret == 0) {
+ if (pkcs7->privateKey != NULL && pkcs7->privateKeySz > 0) {
+ idx = 0;
+ ret = wc_RsaPrivateKeyDecode(pkcs7->privateKey, &idx, privKey,
+ pkcs7->privateKeySz);
+ }
+ else if (pkcs7->devId == INVALID_DEVID) {
+ ret = BAD_FUNC_ARG;
+ }
+ }
+ if (ret == 0) {
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ do {
+ ret = wc_AsyncWait(ret, &privKey->asyncDev,
+ WC_ASYNC_FLAG_CALL_AGAIN);
+ if (ret >= 0)
+ #endif
+ {
+ ret = wc_RsaSSL_Sign(in, inSz, esd->encContentDigest,
+ sizeof(esd->encContentDigest),
+ privKey, pkcs7->rng);
+ }
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ } while (ret == WC_PENDING_E);
+ #endif
+ }
+
+ wc_FreeRsaKey(privKey);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(privKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return ret;
+}
+
+#endif /* NO_RSA */
+
+
+#ifdef HAVE_ECC
+
+/* returns size of signature put into out, negative on error */
+static int wc_PKCS7_EcdsaSign(PKCS7* pkcs7, byte* in, word32 inSz, ESD* esd)
+{
+ int ret;
+ word32 outSz, idx;
+#ifdef WOLFSSL_SMALL_STACK
+ ecc_key* privKey;
+#else
+ ecc_key privKey[1];
+#endif
+
+ if (pkcs7 == NULL || pkcs7->rng == NULL || in == NULL || esd == NULL) {
return BAD_FUNC_ARG;
+ }
#ifdef WOLFSSL_SMALL_STACK
- esd = (ESD*)XMALLOC(sizeof(ESD), NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (esd == NULL)
+ privKey = (ecc_key*)XMALLOC(sizeof(ecc_key), pkcs7->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (privKey == NULL)
return MEMORY_E;
#endif
- XMEMSET(esd, 0, sizeof(ESD));
- ret = wc_InitSha(&esd->sha);
- if (ret != 0) {
+ ret = wc_ecc_init_ex(privKey, pkcs7->heap, pkcs7->devId);
+ if (ret == 0) {
+ if (pkcs7->privateKey != NULL && pkcs7->privateKeySz > 0) {
+ idx = 0;
+ ret = wc_EccPrivateKeyDecode(pkcs7->privateKey, &idx, privKey,
+ pkcs7->privateKeySz);
+ }
+ else if (pkcs7->devId == INVALID_DEVID) {
+ ret = BAD_FUNC_ARG;
+ }
+ }
+ if (ret == 0) {
+ outSz = sizeof(esd->encContentDigest);
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ do {
+ ret = wc_AsyncWait(ret, &privKey->asyncDev,
+ WC_ASYNC_FLAG_CALL_AGAIN);
+ if (ret >= 0)
+ #endif
+ {
+ ret = wc_ecc_sign_hash(in, inSz, esd->encContentDigest,
+ &outSz, pkcs7->rng, privKey);
+ }
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ } while (ret == WC_PENDING_E);
+ #endif
+ if (ret == 0)
+ ret = (int)outSz;
+ }
+
+ wc_ecc_free(privKey);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(esd, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(privKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- return ret;
+
+ return ret;
+}
+
+#endif /* HAVE_ECC */
+
+
+/* builds up SignedData signed attributes, including default ones.
+ *
+ * pkcs7 - pointer to initialized PKCS7 structure
+ * esd - pointer to initialized ESD structure, used for output
+ *
+ * return 0 on success, negative on error */
+static int wc_PKCS7_BuildSignedAttributes(PKCS7* pkcs7, ESD* esd,
+ const byte* contentType, word32 contentTypeSz,
+ const byte* contentTypeOid, word32 contentTypeOidSz,
+ const byte* messageDigestOid, word32 messageDigestOidSz,
+ const byte* signingTimeOid, word32 signingTimeOidSz,
+ byte* signingTime, word32 signingTimeSz)
+{
+ int hashSz;
+#ifdef NO_ASN_TIME
+ PKCS7Attrib cannedAttribs[2];
+#else
+ time_t tm;
+ int timeSz;
+ PKCS7Attrib cannedAttribs[3];
+#endif
+ word32 idx = 0;
+ word32 cannedAttribsCount;
+
+ if (pkcs7 == NULL || esd == NULL || contentType == NULL ||
+ contentTypeOid == NULL || messageDigestOid == NULL ||
+ signingTimeOid == NULL) {
+ return BAD_FUNC_ARG;
}
- if (pkcs7->contentSz != 0)
- {
- wc_ShaUpdate(&esd->sha, pkcs7->content, pkcs7->contentSz);
- esd->contentDigest[0] = ASN_OCTET_STRING;
- esd->contentDigest[1] = SHA_DIGEST_SIZE;
- wc_ShaFinal(&esd->sha, &esd->contentDigest[2]);
- }
-
- esd->innerOctetsSz = SetOctetString(pkcs7->contentSz, esd->innerOctets);
- esd->innerContSeqSz = SetExplicit(0, esd->innerOctetsSz + pkcs7->contentSz,
- esd->innerContSeq);
- esd->contentInfoSeqSz = SetSequence(pkcs7->contentSz + esd->innerOctetsSz +
- innerOidSz + esd->innerContSeqSz,
- esd->contentInfoSeq);
-
- esd->issuerSnSz = SetSerialNumber(pkcs7->issuerSn, pkcs7->issuerSnSz,
- esd->issuerSn);
- signerInfoSz += esd->issuerSnSz;
- esd->issuerNameSz = SetSequence(pkcs7->issuerSz, esd->issuerName);
- signerInfoSz += esd->issuerNameSz + pkcs7->issuerSz;
- esd->issuerSnSeqSz = SetSequence(signerInfoSz, esd->issuerSnSeq);
- signerInfoSz += esd->issuerSnSeqSz;
- esd->signerVersionSz = SetMyVersion(1, esd->signerVersion, 0);
- signerInfoSz += esd->signerVersionSz;
- esd->signerDigAlgoIdSz = SetAlgoID(pkcs7->hashOID, esd->signerDigAlgoId,
- hashType, 0);
- signerInfoSz += esd->signerDigAlgoIdSz;
- esd->digEncAlgoIdSz = SetAlgoID(pkcs7->encryptOID, esd->digEncAlgoId,
- keyType, 0);
- signerInfoSz += esd->digEncAlgoIdSz;
-
- if (pkcs7->signedAttribsSz != 0) {
- byte contentTypeOid[] =
- { ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xF7, 0x0d, 0x01,
- 0x09, 0x03 };
- byte contentType[] =
- { ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01,
- 0x07, 0x01 };
- byte messageDigestOid[] =
- { ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01,
- 0x09, 0x04 };
-
- PKCS7Attrib cannedAttribs[2] =
- {
- { contentTypeOid, sizeof(contentTypeOid),
- contentType, sizeof(contentType) },
- { messageDigestOid, sizeof(messageDigestOid),
- esd->contentDigest, sizeof(esd->contentDigest) }
- };
- word32 cannedAttribsCount = sizeof(cannedAttribs)/sizeof(PKCS7Attrib);
+ if (pkcs7->skipDefaultSignedAttribs == 0) {
+ hashSz = wc_HashGetDigestSize(esd->hashType);
+ if (hashSz < 0)
+ return hashSz;
+
+ #ifndef NO_ASN_TIME
+ if (signingTime == NULL || signingTimeSz == 0)
+ return BAD_FUNC_ARG;
+
+ tm = XTIME(0);
+ timeSz = GetAsnTimeString(&tm, signingTime, signingTimeSz);
+ if (timeSz < 0)
+ return timeSz;
+ #endif
+
+ cannedAttribsCount = sizeof(cannedAttribs)/sizeof(PKCS7Attrib);
+
+ cannedAttribs[idx].oid = contentTypeOid;
+ cannedAttribs[idx].oidSz = contentTypeOidSz;
+ cannedAttribs[idx].value = contentType;
+ cannedAttribs[idx].valueSz = contentTypeSz;
+ idx++;
+ #ifndef NO_ASN_TIME
+ cannedAttribs[idx].oid = signingTimeOid;
+ cannedAttribs[idx].oidSz = signingTimeOidSz;
+ cannedAttribs[idx].value = signingTime;
+ cannedAttribs[idx].valueSz = timeSz;
+ idx++;
+ #endif
+ cannedAttribs[idx].oid = messageDigestOid;
+ cannedAttribs[idx].oidSz = messageDigestOidSz;
+ cannedAttribs[idx].value = esd->contentDigest;
+ cannedAttribs[idx].valueSz = hashSz + 2; /* ASN.1 heading */
esd->signedAttribsCount += cannedAttribsCount;
- esd->signedAttribsSz += EncodeAttributes(&esd->signedAttribs[0], 2,
+ esd->signedAttribsSz += EncodeAttributes(&esd->signedAttribs[0], 3,
cannedAttribs, cannedAttribsCount);
+ } else {
+ esd->signedAttribsCount = 0;
+ esd->signedAttribsSz = 0;
+ }
+ /* add custom signed attributes if set */
+ if (pkcs7->signedAttribsSz > 0 && pkcs7->signedAttribs != NULL) {
esd->signedAttribsCount += pkcs7->signedAttribsSz;
+ #ifdef NO_ASN_TIME
esd->signedAttribsSz += EncodeAttributes(&esd->signedAttribs[2], 4,
pkcs7->signedAttribs, pkcs7->signedAttribsSz);
+ #else
+ esd->signedAttribsSz += EncodeAttributes(&esd->signedAttribs[3], 4,
+ pkcs7->signedAttribs, pkcs7->signedAttribsSz);
+ #endif
+ }
- flatSignedAttribs = (byte*)XMALLOC(esd->signedAttribsSz, 0, NULL);
- flatSignedAttribsSz = esd->signedAttribsSz;
- if (flatSignedAttribs == NULL) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(esd, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
- return MEMORY_E;
+#ifdef NO_ASN_TIME
+ (void)signingTimeOidSz;
+ (void)signingTime;
+ (void)signingTimeSz;
+#endif
+
+ return 0;
+}
+
+
+/* gets correct encryption algo ID for SignedData, either CTC_<hash>wRSA or
+ * CTC_<hash>wECDSA, from pkcs7->publicKeyOID and pkcs7->hashOID.
+ *
+ * pkcs7 - pointer to PKCS7 structure
+ * digEncAlgoId - [OUT] output int to store correct algo ID in
+ * digEncAlgoType - [OUT] output for algo ID type
+ *
+ * return 0 on success, negative on error */
+static int wc_PKCS7_SignedDataGetEncAlgoId(PKCS7* pkcs7, int* digEncAlgoId,
+ int* digEncAlgoType)
+{
+ int algoId = 0;
+ int algoType = 0;
+
+ if (pkcs7 == NULL || digEncAlgoId == NULL || digEncAlgoType == NULL)
+ return BAD_FUNC_ARG;
+
+ if (pkcs7->publicKeyOID == RSAk) {
+
+ algoType = oidSigType;
+
+ switch (pkcs7->hashOID) {
+ #ifndef NO_SHA
+ case SHAh:
+ algoId = CTC_SHAwRSA;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA224
+ case SHA224h:
+ algoId = CTC_SHA224wRSA;
+ break;
+ #endif
+ #ifndef NO_SHA256
+ case SHA256h:
+ algoId = CTC_SHA256wRSA;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA384
+ case SHA384h:
+ algoId = CTC_SHA384wRSA;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA512
+ case SHA512h:
+ algoId = CTC_SHA512wRSA;
+ break;
+ #endif
}
- FlattenAttributes(flatSignedAttribs,
- esd->signedAttribs, esd->signedAttribsCount);
- esd->signedAttribSetSz = SetImplicit(ASN_SET, 0, esd->signedAttribsSz,
- esd->signedAttribSet);
+
}
- /* Calculate the final hash and encrypt it. */
- {
- int result;
- word32 scratch = 0;
+#ifdef HAVE_ECC
+ else if (pkcs7->publicKeyOID == ECDSAk) {
+
+ algoType = oidSigType;
+
+ switch (pkcs7->hashOID) {
+ #ifndef NO_SHA
+ case SHAh:
+ algoId = CTC_SHAwECDSA;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA224
+ case SHA224h:
+ algoId = CTC_SHA224wECDSA;
+ break;
+ #endif
+ #ifndef NO_SHA256
+ case SHA256h:
+ algoId = CTC_SHA256wECDSA;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA384
+ case SHA384h:
+ algoId = CTC_SHA384wECDSA;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA512
+ case SHA512h:
+ algoId = CTC_SHA512wECDSA;
+ break;
+ #endif
+ }
+ }
+#endif /* HAVE_ECC */
+
+ if (algoId == 0) {
+ WOLFSSL_MSG("Invalid signature algorithm type");
+ return BAD_FUNC_ARG;
+ }
+
+ *digEncAlgoId = algoId;
+ *digEncAlgoType = algoType;
+
+ return 0;
+}
+
+
+/* build SignedData DigestInfo for use with PKCS#7/RSA
+ *
+ * pkcs7 - pointer to initialized PKCS7 struct
+ * flatSignedAttribs - flattened, signed attributes
+ * flatSignedAttrbsSz - size of flatSignedAttribs, octets
+ * esd - pointer to initialized ESD struct
+ * digestInfo - [OUT] output array for DigestInfo
+ * digestInfoSz - [IN/OUT] - input size of array, size of digestInfo
+ *
+ * return 0 on success, negative on error */
+static int wc_PKCS7_BuildDigestInfo(PKCS7* pkcs7, byte* flatSignedAttribs,
+ word32 flatSignedAttribsSz, ESD* esd,
+ byte* digestInfo, word32* digestInfoSz)
+{
+ int ret, hashSz, digIdx = 0;
+ byte digestInfoSeq[MAX_SEQ_SZ];
+ byte digestStr[MAX_OCTET_STR_SZ];
+ byte attribSet[MAX_SET_SZ];
+ byte algoId[MAX_ALGO_SZ];
+ word32 digestInfoSeqSz, digestStrSz, algoIdSz;
+ word32 attribSetSz;
+
+ if (pkcs7 == NULL || esd == NULL || digestInfo == NULL ||
+ digestInfoSz == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ hashSz = wc_HashGetDigestSize(esd->hashType);
+ if (hashSz < 0)
+ return hashSz;
+
+ if (flatSignedAttribsSz != 0) {
+
+ if (flatSignedAttribs == NULL)
+ return BAD_FUNC_ARG;
+
+ attribSetSz = SetSet(flatSignedAttribsSz, attribSet);
+
+ ret = wc_HashInit(&esd->hash, esd->hashType);
+ if (ret < 0)
+ return ret;
+
+ ret = wc_HashUpdate(&esd->hash, esd->hashType,
+ attribSet, attribSetSz);
+ if (ret == 0)
+ ret = wc_HashUpdate(&esd->hash, esd->hashType,
+ flatSignedAttribs, flatSignedAttribsSz);
+ if (ret == 0)
+ ret = wc_HashFinal(&esd->hash, esd->hashType,
+ esd->contentAttribsDigest);
+ wc_HashFree(&esd->hash, esd->hashType);
+
+ if (ret < 0)
+ return ret;
+
+ } else {
+ /* when no attrs, digest is contentDigest without tag and length */
+ XMEMCPY(esd->contentAttribsDigest, esd->contentDigest + 2, hashSz);
+ }
+
+ /* set algoID, with NULL attributes */
+ algoIdSz = SetAlgoID(pkcs7->hashOID, algoId, oidHashType, 0);
+
+ digestStrSz = SetOctetString(hashSz, digestStr);
+ digestInfoSeqSz = SetSequence(algoIdSz + digestStrSz + hashSz,
+ digestInfoSeq);
+ if (*digestInfoSz < (digestInfoSeqSz + algoIdSz + digestStrSz + hashSz)) {
+ return BUFFER_E;
+ }
+
+ XMEMCPY(digestInfo + digIdx, digestInfoSeq, digestInfoSeqSz);
+ digIdx += digestInfoSeqSz;
+ XMEMCPY(digestInfo + digIdx, algoId, algoIdSz);
+ digIdx += algoIdSz;
+ XMEMCPY(digestInfo + digIdx, digestStr, digestStrSz);
+ digIdx += digestStrSz;
+ XMEMCPY(digestInfo + digIdx, esd->contentAttribsDigest, hashSz);
+ digIdx += hashSz;
+
+ *digestInfoSz = digIdx;
+
+ return 0;
+}
+
+
+/* build SignedData signature over DigestInfo or content digest
+ *
+ * pkcs7 - pointer to initialized PKCS7 struct
+ * flatSignedAttribs - flattened, signed attributes
+ * flatSignedAttribsSz - size of flatSignedAttribs, octets
+ * esd - pointer to initialized ESD struct
+ *
+ * returns length of signature on success, negative on error */
+static int wc_PKCS7_SignedDataBuildSignature(PKCS7* pkcs7,
+ byte* flatSignedAttribs,
+ word32 flatSignedAttribsSz,
+ ESD* esd)
+{
+ int ret = 0;
+#if defined(HAVE_ECC) || \
+ (defined(HAVE_PKCS7_RSA_RAW_SIGN_CALLBACK) && !defined(NO_RSA))
+ int hashSz = 0;
+#endif
+#if defined(HAVE_PKCS7_RSA_RAW_SIGN_CALLBACK) && !defined(NO_RSA)
+ int hashOID;
+#endif
+ word32 digestInfoSz = MAX_PKCS7_DIGEST_SZ;
#ifdef WOLFSSL_SMALL_STACK
- byte* digestInfo;
- RsaKey* privKey;
+ byte* digestInfo;
#else
- RsaKey stack_privKey;
- RsaKey* privKey = &stack_privKey;
- byte digestInfo[MAX_SEQ_SZ + MAX_ALGO_SZ +
- MAX_OCTET_STR_SZ + SHA_DIGEST_SIZE];
+ byte digestInfo[MAX_PKCS7_DIGEST_SZ];
#endif
- byte digestInfoSeq[MAX_SEQ_SZ];
- byte digestStr[MAX_OCTET_STR_SZ];
- word32 digestInfoSeqSz, digestStrSz;
- int digIdx = 0;
- if (pkcs7->signedAttribsSz != 0) {
- byte attribSet[MAX_SET_SZ];
- word32 attribSetSz;
+ if (pkcs7 == NULL || esd == NULL)
+ return BAD_FUNC_ARG;
- attribSetSz = SetSet(flatSignedAttribsSz, attribSet);
+#ifdef WOLFSSL_SMALL_STACK
+ digestInfo = (byte*)XMALLOC(digestInfoSz, pkcs7->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (digestInfo == NULL) {
+ return MEMORY_E;
+ }
+#endif
+ XMEMSET(digestInfo, 0, digestInfoSz);
- ret = wc_InitSha(&esd->sha);
- if (ret < 0) {
- XFREE(flatSignedAttribs, 0, NULL);
+ ret = wc_PKCS7_BuildDigestInfo(pkcs7, flatSignedAttribs,
+ flatSignedAttribsSz, esd, digestInfo,
+ &digestInfoSz);
+ if (ret < 0) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(esd, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- return ret;
- }
- wc_ShaUpdate(&esd->sha, attribSet, attribSetSz);
- wc_ShaUpdate(&esd->sha, flatSignedAttribs, flatSignedAttribsSz);
- }
- wc_ShaFinal(&esd->sha, esd->contentAttribsDigest);
+ return ret;
+ }
- digestStrSz = SetOctetString(SHA_DIGEST_SIZE, digestStr);
- digestInfoSeqSz = SetSequence(esd->signerDigAlgoIdSz +
- digestStrSz + SHA_DIGEST_SIZE,
- digestInfoSeq);
+#if defined(HAVE_ECC) || \
+ (defined(HAVE_PKCS7_RSA_RAW_SIGN_CALLBACK) && !defined(NO_RSA))
+ /* get digest size from hash type */
+ hashSz = wc_HashGetDigestSize(esd->hashType);
+ if (hashSz < 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return hashSz;
+ }
+#endif
-#ifdef WOLFSSL_SMALL_STACK
- digestInfo = (byte*)XMALLOC(MAX_SEQ_SZ + MAX_ALGO_SZ +
- MAX_OCTET_STR_SZ + SHA_DIGEST_SIZE,
- NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (digestInfo == NULL) {
- if (pkcs7->signedAttribsSz != 0)
- XFREE(flatSignedAttribs, 0, NULL);
- XFREE(esd, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- return MEMORY_E;
- }
+ /* sign digestInfo */
+ switch (pkcs7->publicKeyOID) {
+
+#ifndef NO_RSA
+ case RSAk:
+ #ifdef HAVE_PKCS7_RSA_RAW_SIGN_CALLBACK
+ if (pkcs7->rsaSignRawDigestCb != NULL) {
+ /* get hash OID */
+ hashOID = wc_HashGetOID(esd->hashType);
+
+ /* user signing plain digest, build DigestInfo themselves */
+ ret = pkcs7->rsaSignRawDigestCb(pkcs7,
+ esd->contentAttribsDigest, hashSz,
+ esd->encContentDigest, sizeof(esd->encContentDigest),
+ pkcs7->privateKey, pkcs7->privateKeySz, pkcs7->devId,
+ hashOID);
+ break;
+ }
+ #endif
+ ret = wc_PKCS7_RsaSign(pkcs7, digestInfo, digestInfoSz, esd);
+ break;
+#endif
+
+#ifdef HAVE_ECC
+ case ECDSAk:
+ /* CMS with ECDSA does not sign DigestInfo structure
+ * like PKCS#7 with RSA does */
+ ret = wc_PKCS7_EcdsaSign(pkcs7, esd->contentAttribsDigest,
+ hashSz, esd);
+ break;
#endif
- XMEMCPY(digestInfo + digIdx, digestInfoSeq, digestInfoSeqSz);
- digIdx += digestInfoSeqSz;
- XMEMCPY(digestInfo + digIdx,
- esd->signerDigAlgoId, esd->signerDigAlgoIdSz);
- digIdx += esd->signerDigAlgoIdSz;
- XMEMCPY(digestInfo + digIdx, digestStr, digestStrSz);
- digIdx += digestStrSz;
- XMEMCPY(digestInfo + digIdx, esd->contentAttribsDigest,
- SHA_DIGEST_SIZE);
- digIdx += SHA_DIGEST_SIZE;
+ default:
+ WOLFSSL_MSG("Unsupported public key type");
+ ret = BAD_FUNC_ARG;
+ }
#ifdef WOLFSSL_SMALL_STACK
- privKey = (RsaKey*)XMALLOC(sizeof(RsaKey), NULL,
- DYNAMIC_TYPE_TMP_BUFFER);
- if (privKey == NULL) {
- if (pkcs7->signedAttribsSz != 0)
- XFREE(flatSignedAttribs, 0, NULL);
- XFREE(digestInfo, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(esd, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- return MEMORY_E;
- }
+ XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- result = wc_InitRsaKey(privKey, NULL);
- if (result == 0)
- result = wc_RsaPrivateKeyDecode(pkcs7->privateKey, &scratch, privKey,
- pkcs7->privateKeySz);
- if (result < 0) {
- if (pkcs7->signedAttribsSz != 0)
- XFREE(flatSignedAttribs, 0, NULL);
+ if (ret >= 0) {
+ esd->encContentDigestSz = (word32)ret;
+ }
+
+ return ret;
+}
+
+
+/* build PKCS#7 signedData content type */
+static int PKCS7_EncodeSigned(PKCS7* pkcs7, ESD* esd,
+ const byte* hashBuf, word32 hashSz, byte* output, word32* outputSz,
+ byte* output2, word32* output2Sz)
+{
+ /* contentType OID (1.2.840.113549.1.9.3) */
+ const byte contentTypeOid[] =
+ { ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xF7, 0x0d, 0x01,
+ 0x09, 0x03 };
+
+ /* messageDigest OID (1.2.840.113549.1.9.4) */
+ const byte messageDigestOid[] =
+ { ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01,
+ 0x09, 0x04 };
+
+ /* signingTime OID () */
+ byte signingTimeOid[] =
+ { ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01,
+ 0x09, 0x05};
+
+ Pkcs7Cert* certPtr = NULL;
+ word32 certSetSz = 0;
+
+ word32 signerInfoSz = 0;
+ word32 totalSz, total2Sz;
+ int idx = 0, ret = 0;
+ int digEncAlgoId, digEncAlgoType;
+ byte* flatSignedAttribs = NULL;
+ word32 flatSignedAttribsSz = 0;
+
+ byte signedDataOid[MAX_OID_SZ];
+ word32 signedDataOidSz;
+
+ byte signingTime[MAX_TIME_STRING_SZ];
+
+ if (pkcs7 == NULL || pkcs7->contentSz == 0 ||
+ pkcs7->encryptOID == 0 || pkcs7->hashOID == 0 || pkcs7->rng == 0 ||
+ output == NULL || outputSz == NULL || *outputSz == 0 || hashSz == 0 ||
+ hashBuf == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* verify the hash size matches */
#ifdef WOLFSSL_SMALL_STACK
- XFREE(privKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(digestInfo, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(esd, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ esd = (ESD*)XMALLOC(sizeof(ESD), pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (esd == NULL)
+ return MEMORY_E;
#endif
- return PUBLIC_KEY_E;
- }
- result = wc_RsaSSL_Sign(digestInfo, digIdx,
- esd->encContentDigest,
- sizeof(esd->encContentDigest),
- privKey, pkcs7->rng);
+ XMEMSET(esd, 0, sizeof(ESD));
- wc_FreeRsaKey(privKey);
+ /* set content type based on contentOID, unless user has set custom one
+ with wc_PKCS7_SetContentType() */
+ if (pkcs7->contentTypeSz == 0) {
+ /* default to DATA content type if user has not set */
+ if (pkcs7->contentOID == 0) {
+ pkcs7->contentOID = DATA;
+ }
+
+ ret = wc_SetContentType(pkcs7->contentOID, pkcs7->contentType,
+ sizeof(pkcs7->contentType));
+ if (ret < 0) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(privKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(digestInfo, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
+ return ret;
+ }
+ pkcs7->contentTypeSz = ret;
+ }
- if (result < 0) {
- if (pkcs7->signedAttribsSz != 0)
- XFREE(flatSignedAttribs, 0, NULL);
+ /* set signedData outer content type */
+ ret = wc_SetContentType(SIGNED_DATA, signedDataOid, sizeof(signedDataOid));
+ if (ret < 0) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(esd, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- return result;
+ return ret;
+ }
+ signedDataOidSz = ret;
+
+ if (pkcs7->sidType != DEGENERATE_SID) {
+ esd->hashType = wc_OidGetHash(pkcs7->hashOID);
+ if (wc_HashGetDigestSize(esd->hashType) != (int)hashSz) {
+ WOLFSSL_MSG("hashSz did not match hashOID");
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return BUFFER_E;
}
- esd->encContentDigestSz = (word32)result;
+
+ /* include hash */
+ esd->contentDigest[0] = ASN_OCTET_STRING;
+ esd->contentDigest[1] = (byte)hashSz;
+ XMEMCPY(&esd->contentDigest[2], hashBuf, hashSz);
}
- signerInfoSz += flatSignedAttribsSz + esd->signedAttribSetSz;
- esd->signerDigestSz = SetOctetString(esd->encContentDigestSz,
- esd->signerDigest);
- signerInfoSz += esd->signerDigestSz + esd->encContentDigestSz;
+ if (pkcs7->detached == 1) {
+ /* do not include content if generating detached signature */
+ esd->innerOctetsSz = 0;
+ esd->innerContSeqSz = 0;
+ esd->contentInfoSeqSz = SetSequence(pkcs7->contentTypeSz,
+ esd->contentInfoSeq);
+ } else {
+ esd->innerOctetsSz = SetOctetString(pkcs7->contentSz, esd->innerOctets);
+ esd->innerContSeqSz = SetExplicit(0, esd->innerOctetsSz +
+ pkcs7->contentSz, esd->innerContSeq);
+ esd->contentInfoSeqSz = SetSequence(pkcs7->contentSz +
+ esd->innerOctetsSz + pkcs7->contentTypeSz +
+ esd->innerContSeqSz, esd->contentInfoSeq);
+ }
- esd->signerInfoSeqSz = SetSequence(signerInfoSz, esd->signerInfoSeq);
- signerInfoSz += esd->signerInfoSeqSz;
+ /* SignerIdentifier */
+ if (pkcs7->sidType == CMS_ISSUER_AND_SERIAL_NUMBER) {
+ /* IssuerAndSerialNumber */
+ esd->issuerSnSz = SetSerialNumber(pkcs7->issuerSn, pkcs7->issuerSnSz,
+ esd->issuerSn, MAX_SN_SZ, MAX_SN_SZ);
+ signerInfoSz += esd->issuerSnSz;
+ esd->issuerNameSz = SetSequence(pkcs7->issuerSz, esd->issuerName);
+ signerInfoSz += esd->issuerNameSz + pkcs7->issuerSz;
+ esd->issuerSnSeqSz = SetSequence(signerInfoSz, esd->issuerSnSeq);
+ signerInfoSz += esd->issuerSnSeqSz;
+
+ if (pkcs7->version == 3) {
+ /* RFC 4108 version MUST be 3 for firmware package signer */
+ esd->signerVersionSz = SetMyVersion(3, esd->signerVersion, 0);
+ }
+ else {
+ /* version MUST be 1 otherwise*/
+ esd->signerVersionSz = SetMyVersion(1, esd->signerVersion, 0);
+ }
+
+ } else if (pkcs7->sidType == CMS_SKID) {
+ /* SubjectKeyIdentifier */
+ esd->issuerSKIDSz = SetOctetString(KEYID_SIZE, esd->issuerSKID);
+ esd->issuerSKIDSeqSz = SetExplicit(0, esd->issuerSKIDSz + KEYID_SIZE,
+ esd->issuerSKIDSeq);
+ signerInfoSz += (esd->issuerSKIDSz + esd->issuerSKIDSeqSz +
+ KEYID_SIZE);
+
+ /* version MUST be 3 */
+ esd->signerVersionSz = SetMyVersion(3, esd->signerVersion, 0);
+ } else if (pkcs7->sidType == DEGENERATE_SID) {
+ /* no signer info added */
+ } else {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return SKID_E;
+ }
+
+ if (pkcs7->sidType != DEGENERATE_SID) {
+ signerInfoSz += esd->signerVersionSz;
+ esd->signerDigAlgoIdSz = SetAlgoID(pkcs7->hashOID, esd->signerDigAlgoId,
+ oidHashType, 0);
+ signerInfoSz += esd->signerDigAlgoIdSz;
+
+ /* set signatureAlgorithm */
+ ret = wc_PKCS7_SignedDataGetEncAlgoId(pkcs7, &digEncAlgoId,
+ &digEncAlgoType);
+ if (ret < 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return ret;
+ }
+ esd->digEncAlgoIdSz = SetAlgoID(digEncAlgoId, esd->digEncAlgoId,
+ digEncAlgoType, 0);
+ signerInfoSz += esd->digEncAlgoIdSz;
+
+ /* build up signed attributes, include contentType, signingTime, and
+ messageDigest by default */
+ ret = wc_PKCS7_BuildSignedAttributes(pkcs7, esd, pkcs7->contentType,
+ pkcs7->contentTypeSz,
+ contentTypeOid, sizeof(contentTypeOid),
+ messageDigestOid, sizeof(messageDigestOid),
+ signingTimeOid, sizeof(signingTimeOid),
+ signingTime, sizeof(signingTime));
+ if (ret < 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return ret;
+ }
+
+ if (esd->signedAttribsSz > 0) {
+ flatSignedAttribs = (byte*)XMALLOC(esd->signedAttribsSz, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ flatSignedAttribsSz = esd->signedAttribsSz;
+ if (flatSignedAttribs == NULL) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return MEMORY_E;
+ }
+
+ FlattenAttributes(pkcs7, flatSignedAttribs,
+ esd->signedAttribs, esd->signedAttribsCount);
+ esd->signedAttribSetSz = SetImplicit(ASN_SET, 0, esd->signedAttribsSz,
+ esd->signedAttribSet);
+ } else {
+ esd->signedAttribSetSz = 0;
+ }
+
+ /* Calculate the final hash and encrypt it. */
+ ret = wc_PKCS7_SignedDataBuildSignature(pkcs7, flatSignedAttribs,
+ flatSignedAttribsSz, esd);
+ if (ret < 0) {
+ if (pkcs7->signedAttribsSz != 0)
+ XFREE(flatSignedAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return ret;
+ }
+
+ signerInfoSz += flatSignedAttribsSz + esd->signedAttribSetSz;
+
+ esd->signerDigestSz = SetOctetString(esd->encContentDigestSz,
+ esd->signerDigest);
+ signerInfoSz += esd->signerDigestSz + esd->encContentDigestSz;
+
+ esd->signerInfoSeqSz = SetSequence(signerInfoSz, esd->signerInfoSeq);
+ signerInfoSz += esd->signerInfoSeqSz;
+ }
esd->signerInfoSetSz = SetSet(signerInfoSz, esd->signerInfoSet);
signerInfoSz += esd->signerInfoSetSz;
- esd->certsSetSz = SetImplicit(ASN_SET, 0, pkcs7->singleCertSz,
- esd->certsSet);
+ /* certificates [0] IMPLICIT CertificateSet */
+ /* get total certificates size */
+ certPtr = pkcs7->certList;
+ while (certPtr != NULL) {
+ certSetSz += certPtr->derSz;
+ certPtr = certPtr->next;
+ }
+ certPtr = NULL;
- esd->singleDigAlgoIdSz = SetAlgoID(pkcs7->hashOID, esd->singleDigAlgoId,
- hashType, 0);
- esd->digAlgoIdSetSz = SetSet(esd->singleDigAlgoIdSz, esd->digAlgoIdSet);
+ if (certSetSz > 0)
+ esd->certsSetSz = SetImplicit(ASN_SET, 0, certSetSz, esd->certsSet);
+ if (pkcs7->sidType != DEGENERATE_SID) {
+ esd->singleDigAlgoIdSz = SetAlgoID(pkcs7->hashOID, esd->singleDigAlgoId,
+ oidHashType, 0);
+ }
+ esd->digAlgoIdSetSz = SetSet(esd->singleDigAlgoIdSz, esd->digAlgoIdSet);
- esd->versionSz = SetMyVersion(1, esd->version, 0);
+ if (pkcs7->version == 3) {
+ /* RFC 4108 version MUST be 3 for firmware package signer */
+ esd->versionSz = SetMyVersion(3, esd->version, 0);
+ }
+ else {
+ esd->versionSz = SetMyVersion(1, esd->version, 0);
+ }
totalSz = esd->versionSz + esd->singleDigAlgoIdSz + esd->digAlgoIdSetSz +
- esd->contentInfoSeqSz + esd->certsSetSz + pkcs7->singleCertSz +
- esd->innerOctetsSz + esd->innerContSeqSz +
- innerOidSz + pkcs7->contentSz +
- signerInfoSz;
- esd->innerSeqSz = SetSequence(totalSz, esd->innerSeq);
+ esd->contentInfoSeqSz + pkcs7->contentTypeSz +
+ esd->innerContSeqSz + esd->innerOctetsSz + pkcs7->contentSz;
+ total2Sz = esd->certsSetSz + certSetSz + signerInfoSz;
+
+ if (pkcs7->detached) {
+ totalSz -= pkcs7->contentSz;
+ }
+
+ esd->innerSeqSz = SetSequence(totalSz + total2Sz, esd->innerSeq);
totalSz += esd->innerSeqSz;
- esd->outerContentSz = SetExplicit(0, totalSz, esd->outerContent);
- totalSz += esd->outerContentSz + outerOidSz;
- esd->outerSeqSz = SetSequence(totalSz, esd->outerSeq);
+ esd->outerContentSz = SetExplicit(0, totalSz + total2Sz, esd->outerContent);
+ totalSz += esd->outerContentSz + signedDataOidSz;
+ esd->outerSeqSz = SetSequence(totalSz + total2Sz, esd->outerSeq);
totalSz += esd->outerSeqSz;
- if (outputSz < totalSz) {
+ /* if using header/footer, we are not returning the content */
+ if (output2 && output2Sz) {
+ if (total2Sz > *output2Sz) {
+ if (pkcs7->signedAttribsSz != 0)
+ XFREE(flatSignedAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return BUFFER_E;
+ }
+
+ if (!pkcs7->detached) {
+ totalSz -= pkcs7->contentSz;
+ }
+ }
+ else {
+ /* if using single output buffer include content and footer */
+ totalSz += total2Sz;
+ }
+
+ if (totalSz > *outputSz) {
if (pkcs7->signedAttribsSz != 0)
- XFREE(flatSignedAttribs, 0, NULL);
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(esd, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+ XFREE(flatSignedAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
return BUFFER_E;
}
idx = 0;
XMEMCPY(output + idx, esd->outerSeq, esd->outerSeqSz);
idx += esd->outerSeqSz;
- XMEMCPY(output + idx, outerOid, outerOidSz);
- idx += outerOidSz;
+ XMEMCPY(output + idx, signedDataOid, signedDataOidSz);
+ idx += signedDataOidSz;
XMEMCPY(output + idx, esd->outerContent, esd->outerContentSz);
idx += esd->outerContentSz;
XMEMCPY(output + idx, esd->innerSeq, esd->innerSeqSz);
@@ -634,326 +2402,3617 @@ int wc_PKCS7_EncodeSignedData(PKCS7* pkcs7, byte* output, word32 outputSz)
idx += esd->singleDigAlgoIdSz;
XMEMCPY(output + idx, esd->contentInfoSeq, esd->contentInfoSeqSz);
idx += esd->contentInfoSeqSz;
- XMEMCPY(output + idx, innerOid, innerOidSz);
- idx += innerOidSz;
+ XMEMCPY(output + idx, pkcs7->contentType, pkcs7->contentTypeSz);
+ idx += pkcs7->contentTypeSz;
XMEMCPY(output + idx, esd->innerContSeq, esd->innerContSeqSz);
idx += esd->innerContSeqSz;
XMEMCPY(output + idx, esd->innerOctets, esd->innerOctetsSz);
idx += esd->innerOctetsSz;
- XMEMCPY(output + idx, pkcs7->content, pkcs7->contentSz);
- idx += pkcs7->contentSz;
- XMEMCPY(output + idx, esd->certsSet, esd->certsSetSz);
+
+ /* support returning header and footer without content */
+ if (output2 && output2Sz) {
+ *outputSz = idx;
+ idx = 0;
+ }
+ else {
+ if (!pkcs7->detached) {
+ XMEMCPY(output + idx, pkcs7->content, pkcs7->contentSz);
+ idx += pkcs7->contentSz;
+ }
+ output2 = output;
+ }
+
+ /* certificates */
+ XMEMCPY(output2 + idx, esd->certsSet, esd->certsSetSz);
idx += esd->certsSetSz;
- XMEMCPY(output + idx, pkcs7->singleCert, pkcs7->singleCertSz);
- idx += pkcs7->singleCertSz;
- XMEMCPY(output + idx, esd->signerInfoSet, esd->signerInfoSetSz);
+ certPtr = pkcs7->certList;
+ while (certPtr != NULL) {
+ XMEMCPY(output2 + idx, certPtr->der, certPtr->derSz);
+ idx += certPtr->derSz;
+ certPtr = certPtr->next;
+ }
+ wc_PKCS7_FreeCertSet(pkcs7);
+
+ XMEMCPY(output2 + idx, esd->signerInfoSet, esd->signerInfoSetSz);
idx += esd->signerInfoSetSz;
- XMEMCPY(output + idx, esd->signerInfoSeq, esd->signerInfoSeqSz);
+ XMEMCPY(output2 + idx, esd->signerInfoSeq, esd->signerInfoSeqSz);
idx += esd->signerInfoSeqSz;
- XMEMCPY(output + idx, esd->signerVersion, esd->signerVersionSz);
+ XMEMCPY(output2 + idx, esd->signerVersion, esd->signerVersionSz);
idx += esd->signerVersionSz;
- XMEMCPY(output + idx, esd->issuerSnSeq, esd->issuerSnSeqSz);
- idx += esd->issuerSnSeqSz;
- XMEMCPY(output + idx, esd->issuerName, esd->issuerNameSz);
- idx += esd->issuerNameSz;
- XMEMCPY(output + idx, pkcs7->issuer, pkcs7->issuerSz);
- idx += pkcs7->issuerSz;
- XMEMCPY(output + idx, esd->issuerSn, esd->issuerSnSz);
- idx += esd->issuerSnSz;
- XMEMCPY(output + idx, esd->signerDigAlgoId, esd->signerDigAlgoIdSz);
+ /* SignerIdentifier */
+ if (pkcs7->sidType == CMS_ISSUER_AND_SERIAL_NUMBER) {
+ /* IssuerAndSerialNumber */
+ XMEMCPY(output2 + idx, esd->issuerSnSeq, esd->issuerSnSeqSz);
+ idx += esd->issuerSnSeqSz;
+ XMEMCPY(output2 + idx, esd->issuerName, esd->issuerNameSz);
+ idx += esd->issuerNameSz;
+ XMEMCPY(output2 + idx, pkcs7->issuer, pkcs7->issuerSz);
+ idx += pkcs7->issuerSz;
+ XMEMCPY(output2 + idx, esd->issuerSn, esd->issuerSnSz);
+ idx += esd->issuerSnSz;
+ } else if (pkcs7->sidType == CMS_SKID) {
+ /* SubjectKeyIdentifier */
+ XMEMCPY(output2 + idx, esd->issuerSKIDSeq, esd->issuerSKIDSeqSz);
+ idx += esd->issuerSKIDSeqSz;
+ XMEMCPY(output2 + idx, esd->issuerSKID, esd->issuerSKIDSz);
+ idx += esd->issuerSKIDSz;
+ XMEMCPY(output2 + idx, pkcs7->issuerSubjKeyId, KEYID_SIZE);
+ idx += KEYID_SIZE;
+ } else if (pkcs7->sidType == DEGENERATE_SID) {
+ /* no signer infos in degenerate case */
+ } else {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return SKID_E;
+ }
+ XMEMCPY(output2 + idx, esd->signerDigAlgoId, esd->signerDigAlgoIdSz);
idx += esd->signerDigAlgoIdSz;
/* SignerInfo:Attributes */
- if (pkcs7->signedAttribsSz != 0) {
- XMEMCPY(output + idx, esd->signedAttribSet, esd->signedAttribSetSz);
+ if (flatSignedAttribsSz > 0) {
+ XMEMCPY(output2 + idx, esd->signedAttribSet, esd->signedAttribSetSz);
idx += esd->signedAttribSetSz;
- XMEMCPY(output + idx, flatSignedAttribs, flatSignedAttribsSz);
+ XMEMCPY(output2 + idx, flatSignedAttribs, flatSignedAttribsSz);
idx += flatSignedAttribsSz;
- XFREE(flatSignedAttribs, 0, NULL);
+ XFREE(flatSignedAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
}
- XMEMCPY(output + idx, esd->digEncAlgoId, esd->digEncAlgoIdSz);
+ XMEMCPY(output2 + idx, esd->digEncAlgoId, esd->digEncAlgoIdSz);
idx += esd->digEncAlgoIdSz;
- XMEMCPY(output + idx, esd->signerDigest, esd->signerDigestSz);
+ XMEMCPY(output2 + idx, esd->signerDigest, esd->signerDigestSz);
idx += esd->signerDigestSz;
- XMEMCPY(output + idx, esd->encContentDigest, esd->encContentDigestSz);
+ XMEMCPY(output2 + idx, esd->encContentDigest, esd->encContentDigestSz);
idx += esd->encContentDigestSz;
+ if (output2 && output2Sz) {
+ *output2Sz = idx;
+ idx = 0; /* success */
+ }
+ else {
+ *outputSz = idx;
+ }
+
#ifdef WOLFSSL_SMALL_STACK
- XFREE(esd, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
-
return idx;
}
+/* hashBuf: The computed digest for the pkcs7->content
+ * hashSz: The size of computed digest for the pkcs7->content based on hashOID
+ * outputHead: The PKCS7 header that goes on top of the raw data signed.
+ * outputFoot: The PKCS7 footer that goes at the end of the raw data signed.
+ * pkcs7->content: Not used
+ * pkcs7->contentSz: Must be provided as actual sign of raw data
+ * return codes: 0=success, negative=error
+ */
+int wc_PKCS7_EncodeSignedData_ex(PKCS7* pkcs7, const byte* hashBuf,
+ word32 hashSz, byte* outputHead, word32* outputHeadSz, byte* outputFoot,
+ word32* outputFootSz)
+{
+ int ret;
+#ifdef WOLFSSL_SMALL_STACK
+ ESD* esd;
+#else
+ ESD esd[1];
+#endif
-/* Finds the certificates in the message and saves it. */
-int wc_PKCS7_VerifySignedData(PKCS7* pkcs7, byte* pkiMsg, word32 pkiMsgSz)
+ /* other args checked in wc_PKCS7_EncodeSigned_ex */
+ if (pkcs7 == NULL || outputFoot == NULL || outputFootSz == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ esd = (ESD*)XMALLOC(sizeof(ESD), pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (esd == NULL)
+ return MEMORY_E;
+#endif
+
+ XMEMSET(esd, 0, sizeof(ESD));
+
+ ret = PKCS7_EncodeSigned(pkcs7, esd, hashBuf, hashSz,
+ outputHead, outputHeadSz, outputFoot, outputFootSz);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return ret;
+}
+
+/* Toggle detached signature mode on/off for PKCS#7/CMS SignedData content type.
+ * By default wolfCrypt includes the data to be signed in the SignedData
+ * bundle. This data can be omitted in the case when a detached signature is
+ * being created. To enable generation of detached signatures, set flag to "1",
+ * otherwise set to "0":
+ *
+ * flag 1 turns on support
+ * flag 0 turns off support
+ *
+ * pkcs7 - pointer to initialized PKCS7 structure
+ * flag - turn on/off detached signature generation (1 or 0)
+ *
+ * Returns 0 on success, negative upon error. */
+int wc_PKCS7_SetDetached(PKCS7* pkcs7, word16 flag)
{
- word32 idx, contentType;
- int length, version, ret;
- byte* content = NULL;
- byte* sig = NULL;
- byte* cert = NULL;
- int contentSz = 0, sigSz = 0, certSz = 0;
+ if (pkcs7 == NULL || (flag != 0 && flag != 1))
+ return BAD_FUNC_ARG;
+
+ pkcs7->detached = flag;
+
+ return 0;
+}
- if (pkcs7 == NULL || pkiMsg == NULL || pkiMsgSz == 0)
+/* By default, SignedData bundles have the following signed attributes attached:
+ * contentType (1.2.840.113549.1.9.3)
+ * signgingTime (1.2.840.113549.1.9.5)
+ * messageDigest (1.2.840.113549.1.9.4)
+ *
+ * Calling this API before wc_PKCS7_EncodeSignedData() will disable the
+ * inclusion of those attributes.
+ *
+ * pkcs7 - pointer to initialized PKCS7 structure
+ *
+ * Returns 0 on success, negative upon error. */
+int wc_PKCS7_NoDefaultSignedAttribs(PKCS7* pkcs7)
+{
+ if (pkcs7 == NULL)
return BAD_FUNC_ARG;
- idx = 0;
+ pkcs7->skipDefaultSignedAttribs = 1;
- /* Get the contentInfo sequence */
- if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
- return ASN_PARSE_E;
+ return 0;
+}
- /* Get the contentInfo contentType */
- if (wc_GetContentType(pkiMsg, &idx, &contentType, pkiMsgSz) < 0)
- return ASN_PARSE_E;
+/* return codes: >0: Size of signed PKCS7 output buffer, negative: error */
+int wc_PKCS7_EncodeSignedData(PKCS7* pkcs7, byte* output, word32 outputSz)
+{
+ int ret;
+ int hashSz;
+ enum wc_HashType hashType;
+ byte hashBuf[WC_MAX_DIGEST_SIZE];
+#ifdef WOLFSSL_SMALL_STACK
+ ESD* esd;
+#else
+ ESD esd[1];
+#endif
- if (contentType != SIGNED_DATA) {
- WOLFSSL_MSG("PKCS#7 input not of type SignedData");
- return PKCS7_OID_E;
+ /* other args checked in wc_PKCS7_EncodeSigned_ex */
+ if (pkcs7 == NULL || pkcs7->contentSz == 0 || pkcs7->content == NULL) {
+ return BAD_FUNC_ARG;
}
- /* get the ContentInfo content */
- if (pkiMsg[idx++] != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
- return ASN_PARSE_E;
+ /* get hash type and size, validate hashOID */
+ hashType = wc_OidGetHash(pkcs7->hashOID);
+ hashSz = wc_HashGetDigestSize(hashType);
+ if (hashSz < 0)
+ return hashSz;
- if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
- return ASN_PARSE_E;
+#ifdef WOLFSSL_SMALL_STACK
+ esd = (ESD*)XMALLOC(sizeof(ESD), pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (esd == NULL)
+ return MEMORY_E;
+#endif
- /* Get the signedData sequence */
- if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
- return ASN_PARSE_E;
+ XMEMSET(esd, 0, sizeof(ESD));
+ esd->hashType = hashType;
+
+ /* calculate hash for content */
+ ret = wc_HashInit(&esd->hash, esd->hashType);
+ if (ret == 0) {
+ ret = wc_HashUpdate(&esd->hash, esd->hashType,
+ pkcs7->content, pkcs7->contentSz);
+ if (ret == 0) {
+ ret = wc_HashFinal(&esd->hash, esd->hashType, hashBuf);
+ }
+ wc_HashFree(&esd->hash, esd->hashType);
+ }
- /* Get the version */
- if (GetMyVersion(pkiMsg, &idx, &version) < 0)
- return ASN_PARSE_E;
+ if (ret == 0) {
+ ret = PKCS7_EncodeSigned(pkcs7, esd, hashBuf, hashSz,
+ output, &outputSz, NULL, NULL);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(esd, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return ret;
+}
+
+
+/* Single-shot API to generate a CMS SignedData bundle that encapsulates a
+ * content of type FirmwarePkgData. Any recipient certificates should be
+ * loaded into the PKCS7 structure prior to calling this function, using
+ * wc_PKCS7_InitWithCert() and/or wc_PKCS7_AddCertificate().
+ *
+ * pkcs7 - pointer to initialized PKCS7 struct
+ * privateKey - private RSA/ECC key, used for signing SignedData
+ * privateKeySz - size of privateKey, octets
+ * signOID - public key algorithm OID, used for sign operation
+ * hashOID - hash algorithm OID, used for signature generation
+ * content - content to be encapsulated, of type FirmwarePkgData
+ * contentSz - size of content, octets
+ * signedAttribs - optional signed attributes
+ * signedAttribsSz - number of PKCS7Attrib members in signedAttribs
+ * output - output buffer for final bundle
+ * outputSz - size of output buffer, octets
+ *
+ * Returns length of generated bundle on success, negative upon error. */
+int wc_PKCS7_EncodeSignedFPD(PKCS7* pkcs7, byte* privateKey,
+ word32 privateKeySz, int signOID, int hashOID,
+ byte* content, word32 contentSz,
+ PKCS7Attrib* signedAttribs, word32 signedAttribsSz,
+ byte* output, word32 outputSz)
+{
+ int ret = 0;
+ WC_RNG rng;
+
+ if (pkcs7 == NULL || privateKey == NULL || privateKeySz == 0 ||
+ content == NULL || contentSz == 0 || output == NULL || outputSz == 0)
+ return BAD_FUNC_ARG;
- if (version != 1) {
- WOLFSSL_MSG("PKCS#7 signedData needs to be of version 1");
- return ASN_VERSION_E;
+ ret = wc_InitRng(&rng);
+ if (ret != 0)
+ return ret;
+
+ pkcs7->rng = &rng;
+ pkcs7->content = content;
+ pkcs7->contentSz = contentSz;
+ pkcs7->contentOID = FIRMWARE_PKG_DATA;
+ pkcs7->hashOID = hashOID;
+ pkcs7->encryptOID = signOID;
+ pkcs7->privateKey = privateKey;
+ pkcs7->privateKeySz = privateKeySz;
+ pkcs7->signedAttribs = signedAttribs;
+ pkcs7->signedAttribsSz = signedAttribsSz;
+ pkcs7->version = 3;
+
+ ret = wc_PKCS7_EncodeSignedData(pkcs7, output, outputSz);
+ if (ret <= 0) {
+ WOLFSSL_MSG("Error encoding CMS SignedData content type");
}
- /* Get the set of DigestAlgorithmIdentifiers */
- if (GetSet(pkiMsg, &idx, &length, pkiMsgSz) < 0)
- return ASN_PARSE_E;
+ pkcs7->rng = NULL;
+ wc_FreeRng(&rng);
- /* Skip the set. */
- idx += length;
+ return ret;
+}
- /* Get the inner ContentInfo sequence */
- if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
- return ASN_PARSE_E;
+#ifndef NO_PKCS7_ENCRYPTED_DATA
- /* Get the inner ContentInfo contentType */
- if (wc_GetContentType(pkiMsg, &idx, &contentType, pkiMsgSz) < 0)
- return ASN_PARSE_E;
+/* Single-shot API to generate a CMS SignedData bundle that encapsulates a
+ * CMS EncryptedData bundle. Content of inner EncryptedData is set to that
+ * of FirmwarePkgData. Any recipient certificates should be loaded into the
+ * PKCS7 structure prior to calling this function, using wc_PKCS7_InitWithCert()
+ * and/or wc_PKCS7_AddCertificate().
+ *
+ * pkcs7 - pointer to initialized PKCS7 struct
+ * encryptKey - encryption key used for encrypting EncryptedData
+ * encryptKeySz - size of encryptKey, octets
+ * privateKey - private RSA/ECC key, used for signing SignedData
+ * privateKeySz - size of privateKey, octets
+ * encryptOID - encryption algorithm OID, to be used as encryption
+ * algorithm for EncryptedData
+ * signOID - public key algorithm OID, to be used for sign
+ * operation in SignedData generation
+ * hashOID - hash algorithm OID, to be used for signature in
+ * SignedData generation
+ * content - content to be encapsulated
+ * contentSz - size of content, octets
+ * unprotectedAttribs - optional unprotected attributes, for EncryptedData
+ * unprotectedAttribsSz - number of PKCS7Attrib members in unprotectedAttribs
+ * signedAttribs - optional signed attributes, for SignedData
+ * signedAttribsSz - number of PKCS7Attrib members in signedAttribs
+ * output - output buffer for final bundle
+ * outputSz - size of output buffer, octets
+ *
+ * Returns length of generated bundle on success, negative upon error. */
+int wc_PKCS7_EncodeSignedEncryptedFPD(PKCS7* pkcs7, byte* encryptKey,
+ word32 encryptKeySz, byte* privateKey,
+ word32 privateKeySz, int encryptOID,
+ int signOID, int hashOID,
+ byte* content, word32 contentSz,
+ PKCS7Attrib* unprotectedAttribs,
+ word32 unprotectedAttribsSz,
+ PKCS7Attrib* signedAttribs,
+ word32 signedAttribsSz,
+ byte* output, word32 outputSz)
+{
+ int ret = 0, encryptedSz = 0;
+ byte* encrypted = NULL;
+ WC_RNG rng;
+
+ if (pkcs7 == NULL || encryptKey == NULL || encryptKeySz == 0 ||
+ privateKey == NULL || privateKeySz == 0 || content == NULL ||
+ contentSz == 0 || output == NULL || outputSz == 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* 1: build up EncryptedData using FirmwarePkgData type, use output
+ * buffer as tmp for storage and to get size */
+
+ /* set struct elements, inner content type is FirmwarePkgData */
+ pkcs7->content = content;
+ pkcs7->contentSz = contentSz;
+ pkcs7->contentOID = FIRMWARE_PKG_DATA;
+ pkcs7->encryptOID = encryptOID;
+ pkcs7->encryptionKey = encryptKey;
+ pkcs7->encryptionKeySz = encryptKeySz;
+ pkcs7->unprotectedAttribs = unprotectedAttribs;
+ pkcs7->unprotectedAttribsSz = unprotectedAttribsSz;
+ pkcs7->version = 3;
+
+ encryptedSz = wc_PKCS7_EncodeEncryptedData(pkcs7, output, outputSz);
+ if (encryptedSz < 0) {
+ WOLFSSL_MSG("Error encoding CMS EncryptedData content type");
+ return encryptedSz;
+ }
+
+ /* save encryptedData, reset output buffer and struct */
+ encrypted = (byte*)XMALLOC(encryptedSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (encrypted == NULL) {
+ ForceZero(output, outputSz);
+ return MEMORY_E;
+ }
+
+ XMEMCPY(encrypted, output, encryptedSz);
+ ForceZero(output, outputSz);
+
+ ret = wc_InitRng(&rng);
+ if (ret != 0) {
+ ForceZero(encrypted, encryptedSz);
+ XFREE(encrypted, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ /* 2: build up SignedData, encapsulating EncryptedData */
+ pkcs7->rng = &rng;
+ pkcs7->content = encrypted;
+ pkcs7->contentSz = encryptedSz;
+ pkcs7->contentOID = ENCRYPTED_DATA;
+ pkcs7->hashOID = hashOID;
+ pkcs7->encryptOID = signOID;
+ pkcs7->privateKey = privateKey;
+ pkcs7->privateKeySz = privateKeySz;
+ pkcs7->signedAttribs = signedAttribs;
+ pkcs7->signedAttribsSz = signedAttribsSz;
+
+ ret = wc_PKCS7_EncodeSignedData(pkcs7, output, outputSz);
+ if (ret <= 0) {
+ WOLFSSL_MSG("Error encoding CMS SignedData content type");
+ }
+
+ ForceZero(encrypted, encryptedSz);
+ XFREE(encrypted, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ pkcs7->rng = NULL;
+ wc_FreeRng(&rng);
+
+ return ret;
+}
+
+#endif /* NO_PKCS7_ENCRYPTED_DATA */
+
+#if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA)
+/* Single-shot API to generate a CMS SignedData bundle that encapsulates a
+ * CMS CompressedData bundle. Content of inner CompressedData is set to that
+ * of FirmwarePkgData. Any recipient certificates should be loaded into the
+ * PKCS7 structure prior to calling this function, using wc_PKCS7_InitWithCert()
+ * and/or wc_PKCS7_AddCertificate().
+ *
+ * pkcs7 - pointer to initialized PKCS7 struct
+ * privateKey - private RSA/ECC key, used for signing SignedData
+ * privateKeySz - size of privateKey, octets
+ * signOID - public key algorithm OID, to be used for sign
+ * operation in SignedData generation
+ * hashOID - hash algorithm OID, to be used for signature in
+ * SignedData generation
+ * content - content to be encapsulated
+ * contentSz - size of content, octets
+ * signedAttribs - optional signed attributes, for SignedData
+ * signedAttribsSz - number of PKCS7Attrib members in signedAttribs
+ * output - output buffer for final bundle
+ * outputSz - size of output buffer, octets
+ *
+ * Returns length of generated bundle on success, negative upon error. */
+int wc_PKCS7_EncodeSignedCompressedFPD(PKCS7* pkcs7, byte* privateKey,
+ word32 privateKeySz, int signOID,
+ int hashOID, byte* content,
+ word32 contentSz,
+ PKCS7Attrib* signedAttribs,
+ word32 signedAttribsSz, byte* output,
+ word32 outputSz)
+{
+ int ret = 0, compressedSz = 0;
+ byte* compressed = NULL;
+ WC_RNG rng;
+
+ if (pkcs7 == NULL || privateKey == NULL || privateKeySz == 0 ||
+ content == NULL || contentSz == 0 || output == NULL || outputSz == 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* 1: build up CompressedData using FirmwarePkgData type, use output
+ * buffer as tmp for storage and to get size */
+
+ /* set struct elements, inner content type is FirmwarePkgData */
+ pkcs7->content = content;
+ pkcs7->contentSz = contentSz;
+ pkcs7->contentOID = FIRMWARE_PKG_DATA;
+ pkcs7->version = 3;
+
+ compressedSz = wc_PKCS7_EncodeCompressedData(pkcs7, output, outputSz);
+ if (compressedSz < 0) {
+ WOLFSSL_MSG("Error encoding CMS CompressedData content type");
+ return compressedSz;
+ }
+
+ /* save compressedData, reset output buffer and struct */
+ compressed = (byte*)XMALLOC(compressedSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (compressed == NULL) {
+ ForceZero(output, outputSz);
+ return MEMORY_E;
+ }
+
+ XMEMCPY(compressed, output, compressedSz);
+ ForceZero(output, outputSz);
+
+ ret = wc_InitRng(&rng);
+ if (ret != 0) {
+ ForceZero(compressed, compressedSz);
+ XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ /* 2: build up SignedData, encapsulating EncryptedData */
+ pkcs7->rng = &rng;
+ pkcs7->content = compressed;
+ pkcs7->contentSz = compressedSz;
+ pkcs7->contentOID = COMPRESSED_DATA;
+ pkcs7->hashOID = hashOID;
+ pkcs7->encryptOID = signOID;
+ pkcs7->privateKey = privateKey;
+ pkcs7->privateKeySz = privateKeySz;
+ pkcs7->signedAttribs = signedAttribs;
+ pkcs7->signedAttribsSz = signedAttribsSz;
+
+ ret = wc_PKCS7_EncodeSignedData(pkcs7, output, outputSz);
+ if (ret <= 0) {
+ WOLFSSL_MSG("Error encoding CMS SignedData content type");
+ }
+
+ ForceZero(compressed, compressedSz);
+ XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ pkcs7->rng = NULL;
+ wc_FreeRng(&rng);
+
+ return ret;
+}
+
+#ifndef NO_PKCS7_ENCRYPTED_DATA
+
+/* Single-shot API to generate a CMS SignedData bundle that encapsulates a
+ * CMS EncryptedData bundle, which then encapsulates a CMS CompressedData
+ * bundle. Content of inner CompressedData is set to that of FirmwarePkgData.
+ * Any recipient certificates should be loaded into the PKCS7 structure prior
+ * to calling this function, using wc_PKCS7_InitWithCert() and/or
+ * wc_PKCS7_AddCertificate().
+ *
+ * pkcs7 - pointer to initialized PKCS7 struct
+ * encryptKey - encryption key used for encrypting EncryptedData
+ * encryptKeySz - size of encryptKey, octets
+ * privateKey - private RSA/ECC key, used for signing SignedData
+ * privateKeySz - size of privateKey, octets
+ * encryptOID - encryption algorithm OID, to be used as encryption
+ * algorithm for EncryptedData
+ * signOID - public key algorithm OID, to be used for sign
+ * operation in SignedData generation
+ * hashOID - hash algorithm OID, to be used for signature in
+ * SignedData generation
+ * content - content to be encapsulated
+ * contentSz - size of content, octets
+ * unprotectedAttribs - optional unprotected attributes, for EncryptedData
+ * unprotectedAttribsSz - number of PKCS7Attrib members in unprotectedAttribs
+ * signedAttribs - optional signed attributes, for SignedData
+ * signedAttribsSz - number of PKCS7Attrib members in signedAttribs
+ * output - output buffer for final bundle
+ * outputSz - size of output buffer, octets
+ *
+ * Returns length of generated bundle on success, negative upon error. */
+int wc_PKCS7_EncodeSignedEncryptedCompressedFPD(PKCS7* pkcs7, byte* encryptKey,
+ word32 encryptKeySz, byte* privateKey,
+ word32 privateKeySz, int encryptOID,
+ int signOID, int hashOID, byte* content,
+ word32 contentSz,
+ PKCS7Attrib* unprotectedAttribs,
+ word32 unprotectedAttribsSz,
+ PKCS7Attrib* signedAttribs,
+ word32 signedAttribsSz,
+ byte* output, word32 outputSz)
+{
+ int ret = 0, compressedSz = 0, encryptedSz = 0;
+ byte* compressed = NULL;
+ byte* encrypted = NULL;
+ WC_RNG rng;
+
+ if (pkcs7 == NULL || encryptKey == NULL || encryptKeySz == 0 ||
+ privateKey == NULL || privateKeySz == 0 || content == NULL ||
+ contentSz == 0 || output == NULL || outputSz == 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* 1: build up CompressedData using FirmwarePkgData type, use output
+ * buffer as tmp for storage and to get size */
+ pkcs7->content = content;
+ pkcs7->contentSz = contentSz;
+ pkcs7->contentOID = FIRMWARE_PKG_DATA;
+ pkcs7->version = 3;
+
+ compressedSz = wc_PKCS7_EncodeCompressedData(pkcs7, output, outputSz);
+ if (compressedSz < 0) {
+ WOLFSSL_MSG("Error encoding CMS CompressedData content type");
+ return compressedSz;
+ }
+
+ /* save compressedData, reset output buffer and struct */
+ compressed = (byte*)XMALLOC(compressedSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (compressed == NULL)
+ return MEMORY_E;
+
+ XMEMCPY(compressed, output, compressedSz);
+ ForceZero(output, outputSz);
+
+ /* 2: build up EncryptedData using CompressedData, use output
+ * buffer as tmp for storage and to get size */
+ pkcs7->content = compressed;
+ pkcs7->contentSz = compressedSz;
+ pkcs7->contentOID = COMPRESSED_DATA;
+ pkcs7->encryptOID = encryptOID;
+ pkcs7->encryptionKey = encryptKey;
+ pkcs7->encryptionKeySz = encryptKeySz;
+ pkcs7->unprotectedAttribs = unprotectedAttribs;
+ pkcs7->unprotectedAttribsSz = unprotectedAttribsSz;
+
+ encryptedSz = wc_PKCS7_EncodeEncryptedData(pkcs7, output, outputSz);
+ if (encryptedSz < 0) {
+ WOLFSSL_MSG("Error encoding CMS EncryptedData content type");
+ XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return encryptedSz;
+ }
+
+ /* save encryptedData, reset output buffer and struct */
+ encrypted = (byte*)XMALLOC(encryptedSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (encrypted == NULL) {
+ ForceZero(compressed, compressedSz);
+ XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return MEMORY_E;
+ }
+
+ XMEMCPY(encrypted, output, encryptedSz);
+ ForceZero(compressed, compressedSz);
+ XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ ForceZero(output, outputSz);
+
+ ret = wc_InitRng(&rng);
+ if (ret != 0) {
+ ForceZero(encrypted, encryptedSz);
+ XFREE(encrypted, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ /* 3: build up SignedData, encapsulating EncryptedData */
+ pkcs7->rng = &rng;
+ pkcs7->content = encrypted;
+ pkcs7->contentSz = encryptedSz;
+ pkcs7->contentOID = ENCRYPTED_DATA;
+ pkcs7->hashOID = hashOID;
+ pkcs7->encryptOID = signOID;
+ pkcs7->privateKey = privateKey;
+ pkcs7->privateKeySz = privateKeySz;
+ pkcs7->signedAttribs = signedAttribs;
+ pkcs7->signedAttribsSz = signedAttribsSz;
+
+ ret = wc_PKCS7_EncodeSignedData(pkcs7, output, outputSz);
+ if (ret <= 0) {
+ WOLFSSL_MSG("Error encoding CMS SignedData content type");
+ }
+
+ ForceZero(encrypted, encryptedSz);
+ XFREE(encrypted, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ pkcs7->rng = NULL;
+ wc_FreeRng(&rng);
+
+ return ret;
+}
+
+#endif /* !NO_PKCS7_ENCRYPTED_DATA */
+#endif /* HAVE_LIBZ && !NO_PKCS7_COMPRESSED_DATA */
+
+
+#ifndef NO_RSA
+
+#ifdef HAVE_PKCS7_RSA_RAW_SIGN_CALLBACK
+/* register raw RSA sign digest callback */
+int wc_PKCS7_SetRsaSignRawDigestCb(PKCS7* pkcs7, CallbackRsaSignRawDigest cb)
+{
+ if (pkcs7 == NULL || cb == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ pkcs7->rsaSignRawDigestCb = cb;
+
+ return 0;
+}
+#endif
+
+/* returns size of signature put into out, negative on error */
+static int wc_PKCS7_RsaVerify(PKCS7* pkcs7, byte* sig, int sigSz,
+ byte* hash, word32 hashSz)
+{
+ int ret = 0, i;
+ word32 scratch = 0, verified = 0;
+#ifdef WOLFSSL_SMALL_STACK
+ byte* digest;
+ RsaKey* key;
+ DecodedCert* dCert;
+#else
+ byte digest[MAX_PKCS7_DIGEST_SZ];
+ RsaKey key[1];
+ DecodedCert stack_dCert;
+ DecodedCert* dCert = &stack_dCert;
+#endif
+
+ if (pkcs7 == NULL || sig == NULL || hash == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ digest = (byte*)XMALLOC(MAX_PKCS7_DIGEST_SZ, pkcs7->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (digest == NULL)
+ return MEMORY_E;
+
+ key = (RsaKey*)XMALLOC(sizeof(RsaKey), pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (key == NULL) {
+ XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return MEMORY_E;
+ }
+
+ dCert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), pkcs7->heap,
+ DYNAMIC_TYPE_DCERT);
+ if (dCert == NULL) {
+ XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(key, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return MEMORY_E;
+ }
+#endif
+
+ XMEMSET(digest, 0, MAX_PKCS7_DIGEST_SZ);
+
+ /* loop over certs received in certificates set, try to find one
+ * that will validate signature */
+ for (i = 0; i < MAX_PKCS7_CERTS; i++) {
+
+ verified = 0;
+ scratch = 0;
+
+ if (pkcs7->certSz[i] == 0)
+ continue;
+
+ ret = wc_InitRsaKey_ex(key, pkcs7->heap, pkcs7->devId);
+ if (ret != 0) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(key, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(dCert, pkcs7->heap, DYNAMIC_TYPE_DCERT);
+#endif
+ return ret;
+ }
+
+ InitDecodedCert(dCert, pkcs7->cert[i], pkcs7->certSz[i], pkcs7->heap);
+ /* not verifying, only using this to extract public key */
+ ret = ParseCert(dCert, CA_TYPE, NO_VERIFY, 0);
+ if (ret < 0) {
+ WOLFSSL_MSG("ASN RSA cert parse error");
+ FreeDecodedCert(dCert);
+ wc_FreeRsaKey(key);
+ continue;
+ }
+
+ if (wc_RsaPublicKeyDecode(dCert->publicKey, &scratch, key,
+ dCert->pubKeySize) < 0) {
+ WOLFSSL_MSG("ASN RSA key decode error");
+ FreeDecodedCert(dCert);
+ wc_FreeRsaKey(key);
+ continue;
+ }
+
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ do {
+ ret = wc_AsyncWait(ret, &key->asyncDev,
+ WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_RsaSSL_Verify(sig, sigSz, digest, MAX_PKCS7_DIGEST_SZ,
+ key);
+ }
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ } while (ret == WC_PENDING_E);
+ #endif
+ FreeDecodedCert(dCert);
+ wc_FreeRsaKey(key);
+
+ if ((ret > 0) && (hashSz == (word32)ret)) {
+ if (XMEMCMP(digest, hash, hashSz) == 0) {
+ /* found signer that successfully verified signature */
+ verified = 1;
+ break;
+ }
+ }
+ }
+
+ if (verified == 0) {
+ ret = SIG_VERIFY_E;
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(key, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(dCert, pkcs7->heap, DYNAMIC_TYPE_DCERT);
+#endif
+
+ return ret;
+}
+
+#endif /* NO_RSA */
+
+
+#ifdef HAVE_ECC
+
+/* returns size of signature put into out, negative on error */
+static int wc_PKCS7_EcdsaVerify(PKCS7* pkcs7, byte* sig, int sigSz,
+ byte* hash, word32 hashSz)
+{
+ int ret = 0, i;
+ int res = 0;
+ int verified = 0;
+#ifdef WOLFSSL_SMALL_STACK
+ byte* digest;
+ ecc_key* key;
+ DecodedCert* dCert;
+#else
+ byte digest[MAX_PKCS7_DIGEST_SZ];
+ ecc_key key[1];
+ DecodedCert stack_dCert;
+ DecodedCert* dCert = &stack_dCert;
+#endif
+ word32 idx = 0;
+
+ if (pkcs7 == NULL || sig == NULL)
+ return BAD_FUNC_ARG;
+
+#ifdef WOLFSSL_SMALL_STACK
+ digest = (byte*)XMALLOC(MAX_PKCS7_DIGEST_SZ, pkcs7->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (digest == NULL)
+ return MEMORY_E;
+
+ key = (ecc_key*)XMALLOC(sizeof(ecc_key), pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (key == NULL) {
+ XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return MEMORY_E;
+ }
+
+ dCert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), pkcs7->heap,
+ DYNAMIC_TYPE_DCERT);
+ if (dCert == NULL) {
+ XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(key, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return MEMORY_E;
+ }
+#endif
+
+ XMEMSET(digest, 0, MAX_PKCS7_DIGEST_SZ);
+
+ /* loop over certs received in certificates set, try to find one
+ * that will validate signature */
+ for (i = 0; i < MAX_PKCS7_CERTS; i++) {
+
+ verified = 0;
+
+ if (pkcs7->certSz[i] == 0)
+ continue;
+
+ ret = wc_ecc_init_ex(key, pkcs7->heap, pkcs7->devId);
+ if (ret != 0) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(key, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(dCert, pkcs7->heap, DYNAMIC_TYPE_DCERT);
+#endif
+ return ret;
+ }
+
+ InitDecodedCert(dCert, pkcs7->cert[i], pkcs7->certSz[i], pkcs7->heap);
+ /* not verifying, only using this to extract public key */
+ ret = ParseCert(dCert, CA_TYPE, NO_VERIFY, 0);
+ if (ret < 0) {
+ WOLFSSL_MSG("ASN ECC cert parse error");
+ FreeDecodedCert(dCert);
+ wc_ecc_free(key);
+ continue;
+ }
+
+ if (wc_EccPublicKeyDecode(pkcs7->publicKey, &idx, key,
+ pkcs7->publicKeySz) < 0) {
+ WOLFSSL_MSG("ASN ECC key decode error");
+ FreeDecodedCert(dCert);
+ wc_ecc_free(key);
+ continue;
+ }
+
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ do {
+ ret = wc_AsyncWait(ret, &key->asyncDev,
+ WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_ecc_verify_hash(sig, sigSz, hash, hashSz, &res, key);
+ }
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ } while (ret == WC_PENDING_E);
+ #endif
+
+ FreeDecodedCert(dCert);
+ wc_ecc_free(key);
+
+ if (ret == 0 && res == 1) {
+ /* found signer that successfully verified signature */
+ verified = 1;
+ break;
+ }
+ }
+
+ if (verified == 0) {
+ ret = SIG_VERIFY_E;
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(key, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(dCert, pkcs7->heap, DYNAMIC_TYPE_DCERT);
+#endif
+
+ return ret;
+}
+
+#endif /* HAVE_ECC */
+
+
+/* build SignedData digest, both in PKCS#7 DigestInfo format and
+ * as plain digest for CMS.
+ *
+ * pkcs7 - pointer to initialized PKCS7 struct
+ * signedAttrib - signed attributes
+ * signedAttribSz - size of signedAttrib, octets
+ * pkcs7Digest - [OUT] PKCS#7 DigestInfo
+ * pkcs7DigestSz - [IN/OUT] size of pkcs7Digest
+ * plainDigest - [OUT] pointer to plain digest, offset into pkcs7Digest
+ * plainDigestSz - [OUT] size of digest at plainDigest
+ *
+ * returns 0 on success, negative on error */
+static int wc_PKCS7_BuildSignedDataDigest(PKCS7* pkcs7, byte* signedAttrib,
+ word32 signedAttribSz, byte* pkcs7Digest,
+ word32* pkcs7DigestSz, byte** plainDigest,
+ word32* plainDigestSz,
+ const byte* hashBuf, word32 hashBufSz)
+{
+ int ret = 0, digIdx = 0;
+ word32 attribSetSz = 0, hashSz = 0;
+ byte attribSet[MAX_SET_SZ];
+ byte digest[WC_MAX_DIGEST_SIZE];
+ byte digestInfoSeq[MAX_SEQ_SZ];
+ byte digestStr[MAX_OCTET_STR_SZ];
+ byte algoId[MAX_ALGO_SZ];
+ word32 digestInfoSeqSz, digestStrSz, algoIdSz;
+#ifdef WOLFSSL_SMALL_STACK
+ byte* digestInfo;
+#else
+ byte digestInfo[MAX_PKCS7_DIGEST_SZ];
+#endif
+
+ wc_HashAlg hash;
+ enum wc_HashType hashType;
+
+ /* check arguments */
+ if (pkcs7 == NULL || pkcs7Digest == NULL ||
+ pkcs7DigestSz == NULL || plainDigest == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ hashType = wc_OidGetHash(pkcs7->hashOID);
+ ret = wc_HashGetDigestSize(hashType);
+ if (ret < 0)
+ return ret;
+ hashSz = ret;
+
+ if (signedAttribSz > 0) {
+ if (signedAttrib == NULL)
+ return BAD_FUNC_ARG;
+ }
+ else {
+ if (hashBuf && hashBufSz > 0) {
+ if (hashSz != hashBufSz)
+ return BAD_FUNC_ARG;
+ }
+ else if (pkcs7->content == NULL)
+ return BAD_FUNC_ARG;
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ digestInfo = (byte*)XMALLOC(MAX_PKCS7_DIGEST_SZ, pkcs7->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (digestInfo == NULL)
+ return MEMORY_E;
+#endif
+
+ XMEMSET(pkcs7Digest, 0, *pkcs7DigestSz);
+ XMEMSET(digest, 0, WC_MAX_DIGEST_SIZE);
+ XMEMSET(digestInfo, 0, MAX_PKCS7_DIGEST_SZ);
- if (contentType != DATA) {
- WOLFSSL_MSG("PKCS#7 inner input not of type Data");
- return PKCS7_OID_E;
+
+ /* calculate digest */
+ if (hashBuf && hashBufSz > 0 && signedAttribSz == 0) {
+ XMEMCPY(digest, hashBuf, hashBufSz);
}
+ else {
+ ret = wc_HashInit(&hash, hashType);
+ if (ret < 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return ret;
+ }
+
+ if (signedAttribSz > 0) {
+ attribSetSz = SetSet(signedAttribSz, attribSet);
+
+ /* calculate digest */
+ ret = wc_HashUpdate(&hash, hashType, attribSet, attribSetSz);
+ if (ret == 0)
+ ret = wc_HashUpdate(&hash, hashType, signedAttrib, signedAttribSz);
+ if (ret == 0)
+ ret = wc_HashFinal(&hash, hashType, digest);
+ } else {
+ ret = wc_HashUpdate(&hash, hashType, pkcs7->content, pkcs7->contentSz);
+ if (ret == 0)
+ ret = wc_HashFinal(&hash, hashType, digest);
+ }
+
+ wc_HashFree(&hash, hashType);
+ if (ret < 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return ret;
+ }
+ }
+
+ /* Set algoID, with NULL attributes */
+ algoIdSz = SetAlgoID(pkcs7->hashOID, algoId, oidHashType, 0);
+
+ digestStrSz = SetOctetString(hashSz, digestStr);
+ digestInfoSeqSz = SetSequence(algoIdSz + digestStrSz + hashSz,
+ digestInfoSeq);
+
+ XMEMCPY(digestInfo + digIdx, digestInfoSeq, digestInfoSeqSz);
+ digIdx += digestInfoSeqSz;
+ XMEMCPY(digestInfo + digIdx, algoId, algoIdSz);
+ digIdx += algoIdSz;
+ XMEMCPY(digestInfo + digIdx, digestStr, digestStrSz);
+ digIdx += digestStrSz;
+ XMEMCPY(digestInfo + digIdx, digest, hashSz);
+ digIdx += hashSz;
+
+ XMEMCPY(pkcs7Digest, digestInfo, digIdx);
+ *pkcs7DigestSz = digIdx;
+
+ /* set plain digest pointer */
+ *plainDigest = pkcs7Digest + digIdx - hashSz;
+ *plainDigestSz = hashSz;
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(digestInfo, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return 0;
+}
- if (pkiMsg[idx++] != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
+
+/* Verifies CMS/PKCS7 SignedData content digest matches that which is
+ * included in the messageDigest signed attribute. Only called when
+ * signed attributes are present, otherwise original signature verification
+ * is done over content.
+ *
+ * pkcs7 - pointer to initialized PKCS7 struct
+ * hashBuf - pointer to user-provided hash buffer, used with
+ * wc_PKCS7_VerifySignedData_ex()
+ * hashBufSz - size of hashBuf, octets
+ *
+ * return 0 on success, negative on error */
+static int wc_PKCS7_VerifyContentMessageDigest(PKCS7* pkcs7,
+ const byte* hashBuf,
+ word32 hashSz)
+{
+ int ret = 0, digestSz = 0, innerAttribSz = 0;
+ word32 idx = 0;
+ byte* digestBuf = NULL;
+#ifdef WOLFSSL_SMALL_STACK
+ byte* digest = NULL;
+#else
+ byte digest[MAX_PKCS7_DIGEST_SZ];
+#endif
+ PKCS7DecodedAttrib* attrib;
+ enum wc_HashType hashType;
+
+ /* messageDigest OID (1.2.840.113549.1.9.4) */
+ const byte mdOid[] =
+ { 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x09, 0x04 };
+
+ if (pkcs7 == NULL)
+ return BAD_FUNC_ARG;
+
+ if ((pkcs7->content == NULL || pkcs7->contentSz == 0) &&
+ (hashBuf == NULL || hashSz == 0)) {
+ WOLFSSL_MSG("SignedData bundle has no content or hash to verify");
+ return BAD_FUNC_ARG;
+ }
+
+ /* lookup messageDigest attribute */
+ attrib = findAttrib(pkcs7, mdOid, sizeof(mdOid));
+ if (attrib == NULL) {
+ WOLFSSL_MSG("messageDigest attribute not in bundle, must be when "
+ "signed attribs are present");
return ASN_PARSE_E;
+ }
- if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+ /* advance past attrib->value ASN.1 header and length */
+ if (attrib->value == NULL || attrib->valueSz == 0)
return ASN_PARSE_E;
- if (pkiMsg[idx++] != ASN_OCTET_STRING)
+ if (attrib->value[idx++] != ASN_OCTET_STRING)
return ASN_PARSE_E;
- if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+ if (GetLength(attrib->value, &idx, &innerAttribSz, attrib->valueSz) < 0)
return ASN_PARSE_E;
- /* Save the inner data as the content. */
- if (length > 0) {
- /* Local pointer for calculating hashes later */
- pkcs7->content = content = &pkiMsg[idx];
- pkcs7->contentSz = contentSz = length;
- idx += length;
+ /* get hash type and size */
+ hashType = wc_OidGetHash(pkcs7->hashOID);
+ if (hashType == WC_HASH_TYPE_NONE) {
+ WOLFSSL_MSG("Error getting hash type for PKCS7 content verification");
+ return BAD_FUNC_ARG;
}
- /* Get the implicit[0] set of certificates */
- if (pkiMsg[idx] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) {
- idx++;
- if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
- return ASN_PARSE_E;
+ /* build content hash if needed, or use existing hash value */
+ if (hashBuf == NULL) {
- if (length > 0) {
- /* At this point, idx is at the first certificate in
- * a set of certificates. There may be more than one,
- * or none, or they may be a PKCS 6 extended
- * certificate. We want to save the first cert if it
- * is X.509. */
+#ifdef WOLFSSL_SMALL_STACK
+ digest = (byte*)XMALLOC(MAX_PKCS7_DIGEST_SZ, pkcs7->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (digest == NULL)
+ return MEMORY_E;
+#endif
+ XMEMSET(digest, 0, MAX_PKCS7_DIGEST_SZ);
- word32 certIdx = idx;
+ ret = wc_Hash(hashType, pkcs7->content, pkcs7->contentSz, digest,
+ MAX_PKCS7_DIGEST_SZ);
+ if (ret < 0) {
+ WOLFSSL_MSG("Error hashing PKCS7 content for verification");
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return ret;
+ }
- if (pkiMsg[certIdx++] == (ASN_CONSTRUCTED | ASN_SEQUENCE)) {
- if (GetLength(pkiMsg, &certIdx, &certSz, pkiMsgSz) < 0)
- return ASN_PARSE_E;
+ digestBuf = digest;
+ digestSz = wc_HashGetDigestSize(hashType);
+ if (digestSz < 0) {
+ WOLFSSL_MSG("Invalid hash type");
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return digestSz;
+ }
+ } else {
+
+ /* user passed in pre-computed hash */
+ digestBuf = (byte*)hashBuf;
+ digestSz = (int)hashSz;
+ }
+
+ /* compare generated to hash in messageDigest attribute */
+ if ((innerAttribSz != digestSz) ||
+ (XMEMCMP(attrib->value + idx, digestBuf, (word32)digestSz) != 0)) {
+ WOLFSSL_MSG("Content digest does not match messageDigest attrib value");
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return SIG_VERIFY_E;
+ }
+
+ if (hashBuf == NULL) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ }
- cert = &pkiMsg[idx];
- certSz += (certIdx - idx);
+ return 0;
+}
+
+
+/* verifies SignedData signature, over either PKCS#7 DigestInfo or
+ * content digest.
+ *
+ * pkcs7 - pointer to initialized PKCS7 struct
+ * sig - signature to verify
+ * sigSz - size of sig
+ * signedAttrib - signed attributes, or null if empty
+ * signedAttribSz - size of signedAttributes
+ *
+ * return 0 on success, negative on error */
+static int wc_PKCS7_SignedDataVerifySignature(PKCS7* pkcs7, byte* sig,
+ word32 sigSz, byte* signedAttrib,
+ word32 signedAttribSz,
+ const byte* hashBuf, word32 hashSz)
+{
+ int ret = 0;
+ word32 plainDigestSz = 0, pkcs7DigestSz;
+ byte* plainDigest = NULL; /* offset into pkcs7Digest */
+#ifdef WOLFSSL_SMALL_STACK
+ byte* pkcs7Digest;
+#else
+ byte pkcs7Digest[MAX_PKCS7_DIGEST_SZ];
+#endif
+
+ if (pkcs7 == NULL)
+ return BAD_FUNC_ARG;
+
+ /* allocate space to build hash */
+ pkcs7DigestSz = MAX_PKCS7_DIGEST_SZ;
+#ifdef WOLFSSL_SMALL_STACK
+ pkcs7Digest = (byte*)XMALLOC(pkcs7DigestSz, pkcs7->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (pkcs7Digest == NULL)
+ return MEMORY_E;
+#endif
+
+ XMEMSET(pkcs7Digest, 0, pkcs7DigestSz);
+
+ /* verify signed attrib digest matches that of content */
+ if (signedAttrib != NULL) {
+ ret = wc_PKCS7_VerifyContentMessageDigest(pkcs7, hashBuf, hashSz);
+ if (ret != 0) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return ret;
+ }
+ }
+
+ /* build hash to verify against */
+ ret = wc_PKCS7_BuildSignedDataDigest(pkcs7, signedAttrib,
+ signedAttribSz, pkcs7Digest,
+ &pkcs7DigestSz, &plainDigest,
+ &plainDigestSz, hashBuf, hashSz);
+ if (ret < 0) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return ret;
+ }
+
+ /* If no certificates are available then store the signature and hash for
+ * user to verify. Make sure that different return value than success is
+ * returned because the signature was not verified here. */
+ if (ret == 0) {
+ byte haveCert = 0;
+ int i;
+
+ for (i = 0; i < MAX_PKCS7_CERTS; i++) {
+ if (pkcs7->certSz[i] == 0)
+ continue;
+ haveCert = 1;
+ }
+
+ if (!haveCert) {
+ WOLFSSL_MSG("No certificates in bundle to verify signature");
+
+ /* store signature */
+ XFREE(pkcs7->signature, pkcs7->heap, DYNAMIC_TYPE_SIGNATURE);
+ pkcs7->signature = NULL;
+ pkcs7->signatureSz = 0;
+ pkcs7->signature = (byte*)XMALLOC(sigSz, pkcs7->heap,
+ DYNAMIC_TYPE_SIGNATURE);
+ if (pkcs7->signature == NULL) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return MEMORY_E;
+ }
+ XMEMCPY(pkcs7->signature, sig, sigSz);
+ pkcs7->signatureSz = sigSz;
+
+ /* store plain digest (CMS and ECC) */
+ XFREE(pkcs7->plainDigest, pkcs7->heap, DYNAMIC_TYPE_DIGEST);
+ pkcs7->plainDigest = NULL;
+ pkcs7->plainDigestSz = 0;
+ pkcs7->plainDigest = (byte*)XMALLOC(plainDigestSz, pkcs7->heap,
+ DYNAMIC_TYPE_DIGEST);
+ if (pkcs7->plainDigest == NULL) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return MEMORY_E;
}
- wc_PKCS7_InitWithCert(pkcs7, cert, certSz);
+ XMEMCPY(pkcs7->plainDigest, plainDigest, plainDigestSz);
+ pkcs7->plainDigestSz = plainDigestSz;
+
+ /* store pkcs7 digest (default RSA) */
+ XFREE(pkcs7->pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_DIGEST);
+ pkcs7->pkcs7Digest = NULL;
+ pkcs7->pkcs7DigestSz = 0;
+ pkcs7->pkcs7Digest = (byte*)XMALLOC(pkcs7DigestSz, pkcs7->heap,
+ DYNAMIC_TYPE_DIGEST);
+ if (pkcs7->pkcs7Digest == NULL) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return MEMORY_E;
+ }
+ XMEMCPY(pkcs7->pkcs7Digest, pkcs7Digest, pkcs7DigestSz);
+ pkcs7->pkcs7DigestSz = pkcs7DigestSz;
+
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return PKCS7_SIGNEEDS_CHECK;
}
- idx += length;
}
- /* Get the implicit[1] set of crls */
- if (pkiMsg[idx] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) {
- idx++;
- if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
- return ASN_PARSE_E;
- /* Skip the set */
- idx += length;
+
+ switch (pkcs7->publicKeyOID) {
+
+#ifndef NO_RSA
+ case RSAk:
+ ret = wc_PKCS7_RsaVerify(pkcs7, sig, sigSz, pkcs7Digest,
+ pkcs7DigestSz);
+ if (ret < 0) {
+ WOLFSSL_MSG("PKCS#7 verification failed, trying CMS");
+ ret = wc_PKCS7_RsaVerify(pkcs7, sig, sigSz, plainDigest,
+ plainDigestSz);
+ }
+ break;
+#endif
+
+#ifdef HAVE_ECC
+ case ECDSAk:
+ ret = wc_PKCS7_EcdsaVerify(pkcs7, sig, sigSz, plainDigest,
+ plainDigestSz);
+ break;
+#endif
+
+ default:
+ WOLFSSL_MSG("Unsupported public key type");
+ ret = BAD_FUNC_ARG;
}
- /* Get the set of signerInfos */
- if (GetSet(pkiMsg, &idx, &length, pkiMsgSz) < 0)
- return ASN_PARSE_E;
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(pkcs7Digest, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return ret;
+}
- if (length > 0) {
- /* Get the sequence of the first signerInfo */
- if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
- return ASN_PARSE_E;
- /* Get the version */
- if (GetMyVersion(pkiMsg, &idx, &version) < 0)
+/* set correct public key OID based on signature OID, stores in
+ * pkcs7->publicKeyOID and returns same value */
+static int wc_PKCS7_SetPublicKeyOID(PKCS7* pkcs7, int sigOID)
+{
+ if (pkcs7 == NULL)
+ return BAD_FUNC_ARG;
+
+ pkcs7->publicKeyOID = 0;
+
+ switch (sigOID) {
+
+ #ifndef NO_RSA
+ /* RSA signature types */
+ case CTC_MD2wRSA:
+ case CTC_MD5wRSA:
+ case CTC_SHAwRSA:
+ case CTC_SHA224wRSA:
+ case CTC_SHA256wRSA:
+ case CTC_SHA384wRSA:
+ case CTC_SHA512wRSA:
+ pkcs7->publicKeyOID = RSAk;
+ break;
+
+ /* if sigOID is already RSAk */
+ case RSAk:
+ pkcs7->publicKeyOID = sigOID;
+ break;
+ #endif
+
+ #ifndef NO_DSA
+ /* DSA signature types */
+ case CTC_SHAwDSA:
+ pkcs7->publicKeyOID = DSAk;
+ break;
+
+ /* if sigOID is already DSAk */
+ case DSAk:
+ pkcs7->publicKeyOID = sigOID;
+ break;
+ #endif
+
+ #ifdef HAVE_ECC
+ /* ECDSA signature types */
+ case CTC_SHAwECDSA:
+ case CTC_SHA224wECDSA:
+ case CTC_SHA256wECDSA:
+ case CTC_SHA384wECDSA:
+ case CTC_SHA512wECDSA:
+ pkcs7->publicKeyOID = ECDSAk;
+ break;
+
+ /* if sigOID is already ECDSAk */
+ case ECDSAk:
+ pkcs7->publicKeyOID = sigOID;
+ break;
+ #endif
+
+ default:
+ WOLFSSL_MSG("Unsupported public key algorithm");
+ return ASN_SIG_KEY_E;
+ }
+
+ return pkcs7->publicKeyOID;
+}
+
+
+/* Parses through the attributes and adds them to the PKCS7 structure
+ * Creates dynamic attribute structures that are free'd with calling
+ * wc_PKCS7_Free()
+ *
+ * NOTE: An attribute has the ASN1 format of
+ ** Sequence
+ ****** Object ID
+ ****** Set
+ ********** {PritnableString, UTCTime, OCTET STRING ...}
+ *
+ * pkcs7 the PKCS7 structure to put the parsed attributes into
+ * in buffer holding all attributes
+ * inSz size of in buffer
+ *
+ * returns the number of attributes parsed on success
+ */
+static int wc_PKCS7_ParseAttribs(PKCS7* pkcs7, byte* in, int inSz)
+{
+ int found = 0;
+ word32 idx = 0;
+ word32 oid;
+
+ if (pkcs7 == NULL || in == NULL || inSz < 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ while (idx < (word32)inSz) {
+ int length = 0;
+ int oidIdx;
+ PKCS7DecodedAttrib* attrib;
+
+ if (GetSequence(in, &idx, &length, inSz) < 0)
return ASN_PARSE_E;
- if (version != 1) {
- WOLFSSL_MSG("PKCS#7 signerInfo needs to be of version 1");
- return ASN_VERSION_E;
+ attrib = (PKCS7DecodedAttrib*)XMALLOC(sizeof(PKCS7DecodedAttrib),
+ pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (attrib == NULL) {
+ return MEMORY_E;
}
+ XMEMSET(attrib, 0, sizeof(PKCS7DecodedAttrib));
- /* Get the sequence of IssuerAndSerialNumber */
- if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+ oidIdx = idx;
+ if (GetObjectId(in, &idx, &oid, oidIgnoreType, inSz)
+ < 0) {
+ XFREE(attrib, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
return ASN_PARSE_E;
+ }
+ attrib->oidSz = idx - oidIdx;
+ attrib->oid = (byte*)XMALLOC(attrib->oidSz, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (attrib->oid == NULL) {
+ XFREE(attrib, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return MEMORY_E;
+ }
+ XMEMCPY(attrib->oid, in + oidIdx, attrib->oidSz);
- /* Skip it */
- idx += length;
+ /* Get Set that contains the printable string value */
+ if (GetSet(in, &idx, &length, inSz) < 0) {
+ XFREE(attrib->oid, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(attrib, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ASN_PARSE_E;
+ }
- /* Get the sequence of digestAlgorithm */
- if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+ if ((inSz - idx) < (word32)length) {
+ XFREE(attrib->oid, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(attrib, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
return ASN_PARSE_E;
+ }
- /* Skip it */
+ attrib->valueSz = (word32)length;
+ attrib->value = (byte*)XMALLOC(attrib->valueSz, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (attrib->value == NULL) {
+ XFREE(attrib->oid, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(attrib, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return MEMORY_E;
+ }
+ XMEMCPY(attrib->value, in + idx, attrib->valueSz);
idx += length;
+ /* store attribute in linked list */
+ if (pkcs7->decodedAttrib != NULL) {
+ attrib->next = pkcs7->decodedAttrib;
+ pkcs7->decodedAttrib = attrib;
+ } else {
+ pkcs7->decodedAttrib = attrib;
+ }
+ found++;
+ }
+
+ return found;
+}
+
+
+/* option to turn off support for degenerate cases
+ * flag 0 turns off support
+ * flag 1 turns on support
+ *
+ * by default support for SignedData degenerate cases is on
+ */
+void wc_PKCS7_AllowDegenerate(PKCS7* pkcs7, word16 flag)
+{
+ if (pkcs7) {
+ if (flag) { /* flag of 1 turns on support for degenerate */
+ pkcs7->noDegenerate = 0;
+ }
+ else { /* flag of 0 turns off support */
+ pkcs7->noDegenerate = 1;
+ }
+ }
+}
+
+/* Parses through a signerInfo set. Reads buffer "in" from "idxIn" to "idxIn" +
+ * length treating the current "idxIn" plus the length of set as max possible
+ * index.
+ *
+ * In the case that signed attributes are found "signedAttrib" gets set to point
+ * at their location in the buffer "in". Also in this case signedAttribSz gets
+ * set to the size of the signedAttrib buffer.
+ *
+ * returns 0 on success
+ */
+static int wc_PKCS7_ParseSignerInfo(PKCS7* pkcs7, byte* in, word32 inSz,
+ word32* idxIn, int degenerate, byte** signedAttrib, int* signedAttribSz)
+{
+ int ret = 0;
+ int length;
+ int version;
+ word32 sigOID = 0, hashOID = 0;
+ word32 idx = *idxIn, localIdx;
+ byte tag;
+
+ WOLFSSL_ENTER("wc_PKCS7_ParseSignerInfo");
+ /* require a signer if degenerate case not allowed */
+ if (inSz == 0 && pkcs7->noDegenerate == 1) {
+ WOLFSSL_MSG("Set to not allow degenerate cases");
+ return PKCS7_NO_SIGNER_E;
+ }
+
+ if (inSz == 0 && degenerate == 0) {
+ WOLFSSL_MSG("PKCS7 signers expected");
+ return PKCS7_NO_SIGNER_E;
+ }
+
+ /* not a degenerate case and there is elements in the set */
+ if (inSz > 0 && degenerate == 0) {
+ ret = wc_PKCS7_SignerInfoNew(pkcs7);
+
+ /* Get the sequence of the first signerInfo */
+ if (ret == 0 && GetSequence(in, &idx, &length, inSz) < 0)
+ ret = ASN_PARSE_E;
+
+ /* Get the version */
+ if (ret == 0 && GetMyVersion(in, &idx, &version, inSz) < 0)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0) {
+ pkcs7->signerInfo->version = version;
+ }
+
+ if (ret == 0 && version == 1) {
+ /* Get the sequence of IssuerAndSerialNumber */
+ if (GetSequence(in, &idx, &length, inSz) < 0)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0) {
+ ret = wc_PKCS7_SignerInfoSetSID(pkcs7, in + idx, length);
+ idx += length;
+ }
+
+ } else if (ret == 0 && version == 3) {
+ /* Get the sequence of SubjectKeyIdentifier */
+ if (idx + 1 > inSz)
+ ret = BUFFER_E;
+
+ localIdx = idx;
+ if (ret == 0 && GetASNTag(in, &localIdx, &tag, inSz) == 0 &&
+ tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) {
+ idx++;
+
+ if (GetLength(in, &idx, &length, inSz) <= 0)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && idx + 1 > inSz)
+ ret = BUFFER_E;
+
+ if (ret == 0 && GetASNTag(in, &idx, &tag, inSz) < 0)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && tag != ASN_OCTET_STRING)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && GetLength(in, &idx, &length, inSz) < 0)
+ ret = ASN_PARSE_E;
+ }
+ else {
+ /* check if SKID with ASN_CONTEXT_SPECIFIC otherwise in version
+ * 3 try to get issuerAndSerial */
+ localIdx = idx;
+ if (GetASNTag(in, &localIdx, &tag, inSz) == 0 &&
+ tag == ASN_CONTEXT_SPECIFIC) {
+ idx++;
+ if (ret == 0 && GetLength(in, &idx, &length, inSz) < 0)
+ ret = ASN_PARSE_E;
+ }
+ else {
+ if (pkcs7->version != 3) {
+ WOLFSSL_MSG("Unexpected signer info found with version");
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0 && GetSequence(in, &idx, &length, inSz) < 0)
+ ret = ASN_PARSE_E;
+ }
+ }
+
+ if (ret == 0) {
+ ret = wc_PKCS7_SignerInfoSetSID(pkcs7, in + idx, length);
+ idx += length;
+ }
+
+ } else {
+ WOLFSSL_MSG("PKCS#7 signerInfo version must be 1 or 3");
+ ret = ASN_VERSION_E;
+ }
+
+ /* Get the sequence of digestAlgorithm */
+ if (ret == 0 && GetAlgoId(in, &idx, &hashOID, oidHashType, inSz) < 0) {
+ ret = ASN_PARSE_E;
+ }
+ pkcs7->hashOID = (int)hashOID;
+
/* Get the IMPLICIT[0] SET OF signedAttributes */
- if (pkiMsg[idx] == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) {
+ localIdx = idx;
+ if (ret == 0 && GetASNTag(in, &localIdx, &tag, inSz) == 0 &&
+ tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) {
idx++;
- if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
- return ASN_PARSE_E;
+ if (GetLength(in, &idx, &length, inSz) < 0)
+ ret = ASN_PARSE_E;
+
+ /* save pointer and length */
+ *signedAttrib = &in[idx];
+ *signedAttribSz = length;
+
+ if (ret == 0 && wc_PKCS7_ParseAttribs(pkcs7, *signedAttrib,
+ *signedAttribSz) < 0) {
+ WOLFSSL_MSG("Error parsing signed attributes");
+ ret = ASN_PARSE_E;
+ }
idx += length;
}
- /* Get the sequence of digestEncryptionAlgorithm */
- if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
- return ASN_PARSE_E;
+ /* Get digestEncryptionAlgorithm */
+ if (ret == 0 && GetAlgoId(in, &idx, &sigOID, oidSigType, inSz) < 0) {
+ ret = ASN_PARSE_E;
+ }
- /* Skip it */
- idx += length;
+ /* store public key type based on digestEncryptionAlgorithm */
+ if (ret == 0) {
+ ret = wc_PKCS7_SetPublicKeyOID(pkcs7, sigOID);
+ if (ret < 0) {
+ WOLFSSL_MSG("Failed to set public key OID from signature");
+ }
+ else {
+ /* if previous return was positive then was success */
+ ret = 0;
+ }
+ }
+ }
- /* Get the signature */
- if (pkiMsg[idx] == ASN_OCTET_STRING) {
- idx++;
+ /* update index on success */
+ if (ret == 0) {
+ *idxIn = idx;
+ }
- if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
- return ASN_PARSE_E;
+ return ret;
+}
- /* save pointer and length */
- sig = &pkiMsg[idx];
- sigSz = length;
+/* Finds the certificates in the message and saves it. By default allows
+ * degenerate cases which can have no signer.
+ *
+ * By default expects type SIGNED_DATA (SignedData) which can have any number of
+ * elements in signerInfos collection, including zero. (RFC2315 section 9.1)
+ * When adding support for the case of SignedAndEnvelopedData content types a
+ * signer is required. In this case the PKCS7 flag noDegenerate could be set.
+ */
+static int PKCS7_VerifySignedData(PKCS7* pkcs7, const byte* hashBuf,
+ word32 hashSz, byte* in, word32 inSz,
+ byte* in2, word32 in2Sz)
+{
+ word32 idx, maxIdx = inSz, outerContentType, contentTypeSz = 0, totalSz = 0;
+ int length = 0, version = 0, ret = 0;
+ byte* content = NULL;
+ byte* contentDynamic = NULL;
+ byte* sig = NULL;
+ byte* cert = NULL;
+ byte* signedAttrib = NULL;
+ byte* contentType = NULL;
+ int contentSz = 0, sigSz = 0, certSz = 0, signedAttribSz = 0;
+ word32 localIdx, start;
+ byte degenerate = 0;
+ byte detached = 0;
+ byte tag = 0;
+#ifdef ASN_BER_TO_DER
+ byte* der;
+#endif
+ int multiPart = 0, keepContent;
+ int contentLen = 0;
+
+ byte* pkiMsg = in;
+ word32 pkiMsgSz = inSz;
+#ifndef NO_PKCS7_STREAM
+ word32 stateIdx = 0;
+ long rc;
+#endif
+
+ byte* pkiMsg2 = in2;
+ word32 pkiMsg2Sz = in2Sz;
+
+ if (pkcs7 == NULL)
+ return BAD_FUNC_ARG;
+
+#ifndef NO_PKCS7_STREAM
+ /* allow for 0 size inputs with stream mode */
+ if (pkiMsg == NULL && pkiMsgSz > 0)
+ return BAD_FUNC_ARG;
+
+#else
+ if (pkiMsg == NULL || pkiMsgSz == 0)
+ return BAD_FUNC_ARG;
+#endif
+
+ if ((hashSz > 0 && hashBuf == NULL) || (pkiMsg2Sz > 0 && pkiMsg2 == NULL)) {
+ return BAD_FUNC_ARG;
+ }
+ idx = 0;
+
+#ifdef ASN_BER_TO_DER
+ if (pkcs7->derSz > 0 && pkcs7->der) {
+ pkiMsg = in = pkcs7->der;
+ }
+#endif
+
+#ifndef NO_PKCS7_STREAM
+ if (pkcs7->stream == NULL) {
+ if ((ret = wc_PKCS7_CreateStream(pkcs7)) != 0) {
+ return ret;
+ }
+ }
+#endif
+
+ switch (pkcs7->state) {
+ case WC_PKCS7_START:
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_SEQ_SZ +
+ MAX_VERSION_SZ + MAX_SEQ_SZ + MAX_LENGTH_SZ +
+ ASN_TAG_SZ + MAX_OID_SZ + MAX_SEQ_SZ,
+ &pkiMsg, &idx)) != 0) {
+ break;
+ }
+
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_SEQ_PEEK, in, inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (pkcs7->stream->length > 0)? pkcs7->stream->length :inSz;
+ #endif
+
+ /* determine total message size */
+ totalSz = pkiMsgSz;
+ if (pkiMsg2 && pkiMsg2Sz > 0) {
+ totalSz += pkiMsg2Sz + pkcs7->contentSz;
+ }
+
+ /* Get the contentInfo sequence */
+ if (ret == 0 && GetSequence_ex(pkiMsg, &idx, &length, totalSz,
+ NO_USER_CHECK) < 0)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && length == 0 && pkiMsg[idx-1] == 0x80) {
+ #ifdef ASN_BER_TO_DER
+ word32 len = 0;
+
+ ret = wc_BerToDer(pkiMsg, pkiMsgSz, NULL, &len);
+ if (ret != LENGTH_ONLY_E)
+ return ret;
+ pkcs7->der = (byte*)XMALLOC(len, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (pkcs7->der == NULL)
+ return MEMORY_E;
+ ret = wc_BerToDer(pkiMsg, pkiMsgSz, pkcs7->der, &len);
+ if (ret < 0)
+ return ret;
+
+ pkiMsg = in = pkcs7->der;
+ pkiMsgSz = pkcs7->derSz = len;
+ idx = 0;
+ if (GetSequence_ex(pkiMsg, &idx, &length, pkiMsgSz,
+ NO_USER_CHECK) < 0)
+ return ASN_PARSE_E;
+
+ #ifndef NO_PKCS7_STREAM
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_SEQ_PEEK,
+ pkiMsg, pkiMsgSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ #endif
+ #else
+ ret = BER_INDEF_E;
+ #endif
+ }
+
+ /* Get the contentInfo contentType */
+ if (ret == 0 && wc_GetContentType(pkiMsg, &idx, &outerContentType,
+ pkiMsgSz) < 0)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && outerContentType != SIGNED_DATA) {
+ WOLFSSL_MSG("PKCS#7 input not of type SignedData");
+ ret = PKCS7_OID_E;
+ }
+
+ /* get the ContentInfo content */
+ if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, totalSz) != 0)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && GetLength_ex(pkiMsg, &idx, &length, totalSz,
+ NO_USER_CHECK) < 0)
+ ret = ASN_PARSE_E;
+
+ /* Get the signedData sequence */
+ if (ret == 0 && GetSequence_ex(pkiMsg, &idx, &length, totalSz,
+ NO_USER_CHECK) < 0)
+ ret = ASN_PARSE_E;
+
+ /* Get the version */
+ if (ret == 0 && GetMyVersion(pkiMsg, &idx, &version, pkiMsgSz) < 0)
+ ret = ASN_PARSE_E;
+
+
+ /* version 1 follows RFC 2315 */
+ /* version 3 follows RFC 4108 */
+ if (ret == 0 && (version != 1 && version != 3)) {
+ WOLFSSL_MSG("PKCS#7 signedData needs to be version 1 or 3");
+ ret = ASN_VERSION_E;
+ }
+ pkcs7->version = version;
+
+ /* Get the set of DigestAlgorithmIdentifiers */
+ if (ret == 0 && GetSet(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+ ret = ASN_PARSE_E;
+
+ /* Skip the set. */
idx += length;
+ degenerate = (length == 0)? 1 : 0;
+ if (pkcs7->noDegenerate == 1 && degenerate == 1) {
+ ret = PKCS7_NO_SIGNER_E;
+ }
+
+ if (ret != 0)
+ break;
+
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &stateIdx, &idx)) != 0) {
+ break;
+ }
+ if (pkiMsg2 && pkiMsg2Sz > 0) {
+ pkcs7->stream->maxLen += pkiMsg2Sz + pkcs7->contentSz;
+ }
+ wc_PKCS7_StreamStoreVar(pkcs7, totalSz, 0, 0);
+ #endif
+
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_VERIFY_STAGE2);
+ FALL_THROUGH;
+
+ case WC_PKCS7_VERIFY_STAGE2:
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz + in2Sz,
+ MAX_SEQ_SZ + MAX_OID_SZ + ASN_TAG_SZ + MAX_LENGTH_SZ
+ + ASN_TAG_SZ + MAX_LENGTH_SZ, &pkiMsg, &idx)) != 0) {
+ break;
+ }
+
+ wc_PKCS7_StreamGetVar(pkcs7, &totalSz, 0, 0);
+ if (pkcs7->stream->length > 0)
+ pkiMsgSz = pkcs7->stream->length;
+ #ifdef ASN_BER_TO_DER
+ else if (pkcs7->der)
+ pkiMsgSz = pkcs7->derSz;
+ #endif
+ else
+ pkiMsgSz = inSz;
+
+ #endif
+ /* Get the inner ContentInfo sequence */
+ if (GetSequence_ex(pkiMsg, &idx, &length, pkiMsgSz,
+ NO_USER_CHECK) < 0)
+ ret = ASN_PARSE_E;
+
+ /* Get the inner ContentInfo contentType */
+ if (ret == 0) {
+ word32 tmpIdx = idx;
+
+ if (GetASNObjectId(pkiMsg, &idx, &length, pkiMsgSz) != 0)
+ ret = ASN_PARSE_E;
+
+ contentType = pkiMsg + tmpIdx;
+ contentTypeSz = length + (idx - tmpIdx);
+
+ idx += length;
+ }
+
+ if (ret != 0)
+ break;
+
+ /* Check for content info, it could be omitted when degenerate */
+ localIdx = idx;
+ ret = 0;
+ if (localIdx + 1 > pkiMsgSz) {
+ ret = BUFFER_E;
+ break;
+ }
+
+ if (ret == 0 && GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) != 0)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && GetLength_ex(pkiMsg, &localIdx, &length, pkiMsgSz,
+ NO_USER_CHECK) <= 0)
+ ret = ASN_PARSE_E;
+
+ if (localIdx >= pkiMsgSz) {
+ ret = BUFFER_E;
+ }
+
+ /* get length of content in the case that there is multiple parts */
+ if (ret == 0 && GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) < 0)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && tag == (ASN_OCTET_STRING | ASN_CONSTRUCTED)) {
+ multiPart = 1;
+
+ /* Get length of all OCTET_STRINGs. */
+ if (GetLength_ex(pkiMsg, &localIdx, &contentLen, pkiMsgSz,
+ NO_USER_CHECK) < 0)
+ ret = ASN_PARSE_E;
+
+ /* Check whether there is one OCTET_STRING inside. */
+ start = localIdx;
+ if (localIdx >= pkiMsgSz) {
+ ret = BUFFER_E;
+ }
+
+ if (ret == 0 && GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz)
+ != 0)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && tag != ASN_OCTET_STRING)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && GetLength_ex(pkiMsg, &localIdx, &length, pkiMsgSz,
+ NO_USER_CHECK) < 0)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0) {
+ /* Use single OCTET_STRING directly. */
+ if (localIdx - start + length == (word32)contentLen)
+ multiPart = 0;
+ localIdx = start;
+ }
+ }
+
+ /* get length of content in case of single part */
+ if (ret == 0 && !multiPart) {
+ if (tag != ASN_OCTET_STRING)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && GetLength_ex(pkiMsg, &localIdx,
+ &length, pkiMsgSz, NO_USER_CHECK) < 0)
+ ret = ASN_PARSE_E;
+ }
+
+ /* update idx if successful */
+ if (ret == 0) {
+ /* support using header and footer without content */
+ if (pkiMsg2 && pkiMsg2Sz > 0 && hashBuf && hashSz > 0) {
+ localIdx = 0;
+ }
+ idx = localIdx;
+ }
+ else {
+
+ /* if pkcs7->content and pkcs7->contentSz are set, try to
+ process as a detached signature */
+ if (!degenerate &&
+ (pkcs7->content != NULL && pkcs7->contentSz != 0)) {
+ detached = 1;
+ }
+
+ if (!degenerate && !detached && ret != 0)
+ break;
+
+ length = 0; /* no content to read */
+ pkiMsg2 = pkiMsg;
+ pkiMsg2Sz = pkiMsgSz;
+ }
+
+ #ifndef NO_PKCS7_STREAM
+ /* save detached flag value */
+ pkcs7->stream->detached = detached;
+
+ /* save contentType */
+ pkcs7->stream->nonce = (byte*)XMALLOC(contentTypeSz, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (pkcs7->stream->nonce == NULL) {
+ ret = MEMORY_E;
+ break;
+ }
+ else {
+ pkcs7->stream->nonceSz = contentTypeSz;
+ XMEMCPY(pkcs7->stream->nonce, contentType, contentTypeSz);
+ }
+
+ /* content expected? */
+ if ((ret == 0 && length > 0) &&
+ !(pkiMsg2 && pkiMsg2Sz > 0 && hashBuf && hashSz > 0)) {
+ pkcs7->stream->expected = length + ASN_TAG_SZ + MAX_LENGTH_SZ;
+ }
+ else {
+ pkcs7->stream->expected = ASN_TAG_SZ + MAX_LENGTH_SZ;
+ }
+
+ if (pkcs7->stream->expected > (pkcs7->stream->maxLen - idx)) {
+ pkcs7->stream->expected = pkcs7->stream->maxLen - idx;
+ }
+
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &stateIdx, &idx)) != 0) {
+ break;
+ }
+ wc_PKCS7_StreamStoreVar(pkcs7, pkiMsg2Sz, localIdx, length);
+
+ /* content length is in multiple parts */
+ if (multiPart) {
+ pkcs7->stream->expected = contentLen + ASN_TAG_SZ;
+ }
+ pkcs7->stream->multi = multiPart;
+
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_VERIFY_STAGE3);
+ FALL_THROUGH;
+
+ case WC_PKCS7_VERIFY_STAGE3:
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz + in2Sz,
+ pkcs7->stream->expected, &pkiMsg, &idx)) != 0) {
+ break;
+ }
+
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK,
+ pkiMsg, pkiMsgSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ #ifdef ASN_BER_TO_DER
+ if (pkcs7->derSz != 0)
+ pkiMsgSz = pkcs7->derSz;
+ else
+ #endif
+ pkiMsgSz = (word32)rc;
+ wc_PKCS7_StreamGetVar(pkcs7, &pkiMsg2Sz, (int*)&localIdx, &length);
+
+ if (pkcs7->stream->length > 0) {
+ localIdx = 0;
+ }
+ multiPart = pkcs7->stream->multi;
+ detached = pkcs7->stream->detached;
+ maxIdx = idx + pkcs7->stream->expected;
+ #endif
+
+ /* Break out before content because it can be optional in degenerate
+ * cases. */
+ if (ret != 0 && !degenerate)
+ break;
+
+ /* get parts of content */
+ if (ret == 0 && multiPart) {
+ int i = 0;
+ keepContent = !(pkiMsg2 && pkiMsg2Sz > 0 && hashBuf && hashSz > 0);
+
+ if (keepContent) {
+ /* Create a buffer to hold content of OCTET_STRINGs. */
+ pkcs7->contentDynamic = (byte*)XMALLOC(contentLen, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (pkcs7->contentDynamic == NULL)
+ ret = MEMORY_E;
+ }
+
+ start = localIdx;
+ /* Use the data from each OCTET_STRING. */
+ while (ret == 0 && localIdx < start + contentLen) {
+ if (GetASNTag(pkiMsg, &localIdx, &tag, totalSz) < 0)
+ ret = ASN_PARSE_E;
+ if (ret == 0 && tag != ASN_OCTET_STRING)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && GetLength(pkiMsg, &localIdx, &length, totalSz) < 0)
+ ret = ASN_PARSE_E;
+ if (ret == 0 && length + localIdx > start + contentLen)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0) {
+ if (keepContent) {
+ XMEMCPY(pkcs7->contentDynamic + i, pkiMsg + localIdx,
+ length);
+ }
+ i += length;
+ localIdx += length;
+ }
+ }
+ localIdx = start; /* reset for sanity check, increment later */
+ length = i;
+ }
+
+ /* Save the inner data as the content. */
+ if (ret == 0 && length > 0) {
+ contentSz = length;
+
+ /* support using header and footer without content */
+ if (pkiMsg2 && pkiMsg2Sz > 0 && hashBuf && hashSz > 0) {
+ /* Content not provided, use provided pkiMsg2 footer */
+ content = NULL;
+ localIdx = 0;
+ if (contentSz != (int)pkcs7->contentSz) {
+ WOLFSSL_MSG("Data signed does not match contentSz provided");
+ ret = BUFFER_E;
+ }
+ }
+ else {
+ if ((word32)length > pkiMsgSz - localIdx) {
+ ret = BUFFER_E;
+ }
+
+ /* Content pointer for calculating hashes later */
+ if (ret == 0 && !multiPart) {
+ content = &pkiMsg[localIdx];
+ }
+ if (ret == 0 && multiPart) {
+ content = pkcs7->contentDynamic;
+ }
+
+ if (ret == 0) {
+ idx += length;
+
+ pkiMsg2 = pkiMsg;
+ pkiMsg2Sz = pkiMsgSz;
+ #ifndef NO_PKCS7_STREAM
+ pkcs7->stream->varOne = pkiMsg2Sz;
+ pkcs7->stream->flagOne = 1;
+ #endif
+ }
+ }
+ }
+ else {
+ pkiMsg2 = pkiMsg;
+ pkiMsg2Sz = pkiMsgSz;
+ #ifndef NO_PKCS7_STREAM
+ pkcs7->stream->varOne = pkiMsg2Sz;
+ pkcs7->stream->flagOne = 1;
+ #endif
+ }
+
+ /* If getting the content info failed with non degenerate then return the
+ * error case. Otherwise with a degenerate it is ok if the content
+ * info was omitted */
+ if (!degenerate && !detached && (ret != 0)) {
+ break;
+ }
+ else {
+ ret = 0; /* reset ret state on degenerate case */
+ }
+
+ #ifndef NO_PKCS7_STREAM
+ /* save content */
+ if (detached == 1) {
+ /* if detached, use content from user in pkcs7 struct */
+ content = pkcs7->content;
+ contentSz = pkcs7->contentSz;
+ }
+
+ if (content != NULL) {
+ XFREE(pkcs7->stream->content, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ pkcs7->stream->content = (byte*)XMALLOC(contentSz, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (pkcs7->stream->content == NULL) {
+ ret = MEMORY_E;
+ break;
+ }
+ else {
+ XMEMCPY(pkcs7->stream->content, content, contentSz);
+ pkcs7->stream->contentSz = contentSz;
+ }
+ }
+ #endif /* !NO_PKCS7_STREAM */
+
+ /* Get the implicit[0] set of certificates */
+ if (ret == 0 && idx >= pkiMsg2Sz)
+ ret = BUFFER_E;
+
+ length = 0; /* set length to 0 to check if reading in any certs */
+ localIdx = idx;
+ if (ret == 0 && GetASNTag(pkiMsg2, &localIdx, &tag, pkiMsg2Sz) == 0
+ && tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) {
+ idx++;
+ if (GetLength_ex(pkiMsg2, &idx, &length, maxIdx, NO_USER_CHECK)
+ < 0)
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret != 0) {
+ break;
+ }
+ #ifndef NO_PKCS7_STREAM
+ if (in2 && in2Sz > 0 && hashBuf && hashSz > 0) {
+ stateIdx = idx; /* case where all data was read from in2 */
+ }
+
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &stateIdx, &idx)) != 0) {
+ break;
+ }
+ wc_PKCS7_StreamStoreVar(pkcs7, pkiMsg2Sz, 0, length);
+ if (length > 0) {
+ pkcs7->stream->expected = length;
+ }
+ else {
+ pkcs7->stream->expected = MAX_SEQ_SZ;
+ if (pkcs7->stream->expected > (pkcs7->stream->maxLen -
+ pkcs7->stream->totalRd) + pkcs7->stream->length) {
+ pkcs7->stream->expected = (pkcs7->stream->maxLen -
+ pkcs7->stream->totalRd) + pkcs7->stream->length;
+ }
+ }
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_VERIFY_STAGE4);
+ FALL_THROUGH;
+
+ case WC_PKCS7_VERIFY_STAGE4:
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz + in2Sz,
+ pkcs7->stream->expected, &pkiMsg, &idx)) != 0) {
+ break;
+ }
+
+ wc_PKCS7_StreamGetVar(pkcs7, &pkiMsg2Sz, 0, &length);
+ if (pkcs7->stream->flagOne) {
+ pkiMsg2 = pkiMsg;
+ }
+
+ /* restore content */
+ content = pkcs7->stream->content;
+ contentSz = pkcs7->stream->contentSz;
+
+ /* restore detached flag */
+ detached = pkcs7->stream->detached;
+
+ /* store certificate if needed */
+ if (length > 0 && in2Sz == 0) {
+ /* free tmpCert if not NULL */
+ XFREE(pkcs7->stream->tmpCert, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ pkcs7->stream->tmpCert = (byte*)XMALLOC(length,
+ pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if ((pkiMsg2 == NULL) || (pkcs7->stream->tmpCert == NULL)) {
+ ret = MEMORY_E;
+ break;
+ }
+ XMEMCPY(pkcs7->stream->tmpCert, pkiMsg2 + idx, length);
+ pkiMsg2 = pkcs7->stream->tmpCert;
+ pkiMsg2Sz = length;
+ idx = 0;
+ }
+ #endif
+
+ if (length > 0) {
+ /* At this point, idx is at the first certificate in
+ * a set of certificates. There may be more than one,
+ * or none, or they may be a PKCS 6 extended
+ * certificate. We want to save the first cert if it
+ * is X.509. */
+
+ word32 certIdx = idx;
+
+ if (length < MAX_LENGTH_SZ + ASN_TAG_SZ)
+ ret = BUFFER_E;
+
+ if (ret == 0)
+ ret = GetASNTag(pkiMsg2, &certIdx, &tag, pkiMsg2Sz);
+
+ if (ret == 0 && tag == (ASN_CONSTRUCTED | ASN_SEQUENCE)) {
+ if (GetLength(pkiMsg2, &certIdx, &certSz, pkiMsg2Sz) < 0)
+ ret = ASN_PARSE_E;
+
+ cert = &pkiMsg2[idx];
+ certSz += (certIdx - idx);
+ if (certSz > length) {
+ ret = BUFFER_E;
+ break;
+ }
+ }
+ #ifdef ASN_BER_TO_DER
+ der = pkcs7->der;
+ #endif
+ contentDynamic = pkcs7->contentDynamic;
+ version = pkcs7->version;
+
+
+ if (ret == 0) {
+ #ifndef NO_PKCS7_STREAM
+ PKCS7State* stream = pkcs7->stream;
+ #endif
+ /* This will reset PKCS7 structure and then set the
+ * certificate */
+ ret = wc_PKCS7_InitWithCert(pkcs7, cert, certSz);
+ #ifndef NO_PKCS7_STREAM
+ pkcs7->stream = stream;
+ #endif
+ }
+ pkcs7->contentDynamic = contentDynamic;
+ pkcs7->version = version;
+ #ifdef ASN_BER_TO_DER
+ pkcs7->der = der;
+ #endif
+ if (ret != 0)
+ break;
+
+ /* iterate through any additional certificates */
+ if (ret == 0 && MAX_PKCS7_CERTS > 0) {
+ int sz = 0;
+ int i;
+
+ pkcs7->cert[0] = cert;
+ pkcs7->certSz[0] = certSz;
+ certIdx = idx + certSz;
+
+ for (i = 1; i < MAX_PKCS7_CERTS &&
+ certIdx + 1 < pkiMsg2Sz &&
+ certIdx + 1 < (word32)length; i++) {
+ localIdx = certIdx;
+
+ if (ret == 0 && GetASNTag(pkiMsg2, &certIdx, &tag,
+ pkiMsg2Sz) < 0) {
+ ret = ASN_PARSE_E;
+ break;
+ }
+
+ if (ret == 0 &&
+ tag == (ASN_CONSTRUCTED | ASN_SEQUENCE)) {
+ if (GetLength(pkiMsg2, &certIdx, &sz,
+ pkiMsg2Sz) < 0) {
+ ret = ASN_PARSE_E;
+ break;
+ }
+
+ pkcs7->cert[i] = &pkiMsg2[localIdx];
+ pkcs7->certSz[i] = sz + (certIdx - localIdx);
+ certIdx += sz;
+ }
+ }
+ }
+ }
+ idx += length;
+
+ if (!detached) {
+ /* set content and size after init of PKCS7 structure */
+ pkcs7->content = content;
+ pkcs7->contentSz = contentSz;
+ }
+ #ifndef NO_PKCS7_STREAM
+ else {
+ /* save content if detached and using streaming API */
+ if (pkcs7->content != NULL) {
+ XFREE(pkcs7->stream->content, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ pkcs7->stream->content = (byte*)XMALLOC(pkcs7->contentSz,
+ pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (pkcs7->stream->content == NULL) {
+ ret = MEMORY_E;
+ break;
+ }
+ else {
+ XMEMCPY(pkcs7->stream->content, pkcs7->content,
+ contentSz);
+ pkcs7->stream->contentSz = pkcs7->contentSz;
+ }
+ }
+ }
+ #endif
+
+ if (ret != 0) {
+ break;
+ }
+ #ifndef NO_PKCS7_STREAM
+ /* factor in that recent idx was in cert buffer. If in2 buffer was
+ * used then don't advance idx. */
+ if (length > 0 && pkcs7->stream->flagOne &&
+ pkcs7->stream->length == 0) {
+ idx = stateIdx + idx;
+ if (idx > inSz) {
+ /* index is more than input size */
+ ret = BUFFER_E;
+ break;
+ }
+ }
+ else {
+ stateIdx = idx; /* didn't read any from internal buffer */
+ }
+
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &stateIdx, &idx)) != 0) {
+ break;
+ }
+ if (pkcs7->stream->flagOne && pkcs7->stream->length > 0) {
+ idx = stateIdx + idx;
+ }
+
+ pkcs7->stream->expected = MAX_OID_SZ + ASN_TAG_SZ + MAX_LENGTH_SZ +
+ MAX_SET_SZ;
+
+ if (pkcs7->stream->expected > (pkcs7->stream->maxLen -
+ pkcs7->stream->totalRd) + pkcs7->stream->length)
+ pkcs7->stream->expected = (pkcs7->stream->maxLen -
+ pkcs7->stream->totalRd) + pkcs7->stream->length;
+
+ wc_PKCS7_StreamGetVar(pkcs7, &pkiMsg2Sz, 0, 0);
+ wc_PKCS7_StreamStoreVar(pkcs7, pkiMsg2Sz, 0, length);
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_VERIFY_STAGE5);
+ FALL_THROUGH;
+
+ case WC_PKCS7_VERIFY_STAGE5:
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz + in2Sz,
+ pkcs7->stream->expected, &pkiMsg, &idx)) != 0) {
+ break;
+ }
+ wc_PKCS7_StreamGetVar(pkcs7, &pkiMsg2Sz, 0, &length);
+ if (pkcs7->stream->flagOne) {
+ pkiMsg2 = pkiMsg;
+ }
+
+ /* restore content type */
+ contentType = pkcs7->stream->nonce;
+ contentTypeSz = pkcs7->stream->nonceSz;
+
+ maxIdx = idx + pkcs7->stream->expected;
+ if (maxIdx > pkiMsg2Sz) {
+ ret = BUFFER_E;
+ break;
+ }
+ stateIdx = idx;
+ #endif
+
+ /* set contentType and size after init of PKCS7 structure */
+ if (ret == 0 && wc_PKCS7_SetContentType(pkcs7, contentType,
+ contentTypeSz) < 0)
+ ret = ASN_PARSE_E;
+
+ /* Get the implicit[1] set of crls */
+ if (ret == 0 && idx >= maxIdx)
+ ret = BUFFER_E;
+
+ localIdx = idx;
+ if (ret == 0 && GetASNTag(pkiMsg2, &localIdx, &tag, pkiMsg2Sz) == 0
+ && tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) {
+ idx++;
+ if (GetLength(pkiMsg2, &idx, &length, pkiMsg2Sz) < 0)
+ ret = ASN_PARSE_E;
+
+ /* Skip the set */
+ idx += length;
+ }
+
+ /* Get the set of signerInfos */
+ if (ret == 0 && GetSet_ex(pkiMsg2, &idx, &length, maxIdx,
+ NO_USER_CHECK) < 0)
+ ret = ASN_PARSE_E;
+
+ if (ret != 0)
+ break;
+ #ifndef NO_PKCS7_STREAM
+ if (!pkcs7->stream->flagOne) {
+ stateIdx = idx; /* didn't read any from internal buffer */
+ }
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &stateIdx, &idx)) != 0) {
+ break;
+ }
+ wc_PKCS7_StreamStoreVar(pkcs7, pkiMsg2Sz, 0, length);
+
+ if (in2 && in2Sz > 0 && hashBuf && hashSz > 0) {
+ if (length > 0) {
+ pkcs7->stream->expected = length;
+ }
+ else {
+ pkcs7->stream->expected = 0;
+ }
+ }
+ else {
+ /* last state expect the reset of the buffer */
+ pkcs7->stream->expected = (pkcs7->stream->maxLen -
+ pkcs7->stream->totalRd) + pkcs7->stream->length;
+ }
+
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_VERIFY_STAGE6);
+ FALL_THROUGH;
+
+ case WC_PKCS7_VERIFY_STAGE6:
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz + in2Sz,
+ pkcs7->stream->expected, &pkiMsg, &idx)) != 0) {
+ break;
+ }
+
+ wc_PKCS7_StreamGetVar(pkcs7, &pkiMsg2Sz, 0, &length);
+ if (pkcs7->stream->flagOne) {
+ pkiMsg2 = pkiMsg;
+ }
+
+ /* restore content */
+ content = pkcs7->stream->content;
+ contentSz = pkcs7->stream->contentSz;
+ #endif
+
+ ret = wc_PKCS7_ParseSignerInfo(pkcs7, pkiMsg2, pkiMsg2Sz, &idx,
+ degenerate, &signedAttrib, &signedAttribSz);
+
+ /* parse out the signature if present and verify it */
+ if (ret == 0 && length > 0 && degenerate == 0) {
+ WOLFSSL_MSG("Parsing signature and verifying");
+ if (idx >= pkiMsg2Sz)
+ ret = BUFFER_E;
+
+ /* Get the signature */
+ localIdx = idx;
+ if (ret == 0 && GetASNTag(pkiMsg2, &localIdx, &tag,
+ pkiMsg2Sz) == 0 && tag == ASN_OCTET_STRING) {
+ idx++;
+
+ if (GetLength(pkiMsg2, &idx, &length, pkiMsg2Sz) < 0)
+ ret = ASN_PARSE_E;
+
+ /* save pointer and length */
+ sig = &pkiMsg2[idx];
+ sigSz = length;
+
+ idx += length;
+ }
+
+ pkcs7->content = content;
+ pkcs7->contentSz = contentSz;
+
+ if (ret == 0) {
+ ret = wc_PKCS7_SignedDataVerifySignature(pkcs7, sig, sigSz,
+ signedAttrib, signedAttribSz,
+ hashBuf, hashSz);
+ }
+ }
+
+ if (ret < 0)
+ break;
+
+ ret = 0; /* success */
+ #ifndef NO_PKCS7_STREAM
+ wc_PKCS7_ResetStream(pkcs7);
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START);
+ break;
+
+ default:
+ WOLFSSL_MSG("PKCS7 Unknown verify state");
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (ret != 0 && ret != WC_PKCS7_WANT_READ_E) {
+ #ifndef NO_PKCS7_STREAM
+ wc_PKCS7_ResetStream(pkcs7);
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START);
+ }
+ return ret;
+}
+
+
+/* Gets a copy of the SID parsed from signerInfo. This can be called after
+ * wc_PKCS7_VerifySignedData has been called. SID can be SKID in version 3 case
+ * or issuerAndSerialNumber.
+ *
+ * return 0 on success and LENGTH_ONLY_E if just setting "outSz" for buffer
+ * length needed.
+ */
+int wc_PKCS7_GetSignerSID(PKCS7* pkcs7, byte* out, word32* outSz)
+{
+ if (outSz == NULL || pkcs7 == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ if (pkcs7->signerInfo == NULL) {
+ WOLFSSL_MSG("Either the bundle had no signers or"
+ "wc_PKCS7_VerifySignedData needs called yet");
+ return PKCS7_NO_SIGNER_E;
+ }
+
+ if (pkcs7->signerInfo->sidSz == 0) {
+ WOLFSSL_MSG("Bundle had no signer SID set");
+ return PKCS7_NO_SIGNER_E;
+ }
+
+ if (out == NULL) {
+ *outSz = pkcs7->signerInfo->sidSz;
+ return LENGTH_ONLY_E;
+ }
+
+ if (*outSz < pkcs7->signerInfo->sidSz) {
+ WOLFSSL_MSG("Buffer being passed in is not large enough for SKID");
+ return BUFFER_E;
+ }
+ XMEMCPY(out, pkcs7->signerInfo->sid, pkcs7->signerInfo->sidSz);
+ *outSz = pkcs7->signerInfo->sidSz;
+ return 0;
+}
+
+
+/* variant that allows computed data hash and header/foot,
+ * which is useful for large data signing */
+int wc_PKCS7_VerifySignedData_ex(PKCS7* pkcs7, const byte* hashBuf,
+ word32 hashSz, byte* pkiMsgHead, word32 pkiMsgHeadSz, byte* pkiMsgFoot,
+ word32 pkiMsgFootSz)
+{
+ return PKCS7_VerifySignedData(pkcs7, hashBuf, hashSz,
+ pkiMsgHead, pkiMsgHeadSz, pkiMsgFoot, pkiMsgFootSz);
+}
+
+int wc_PKCS7_VerifySignedData(PKCS7* pkcs7, byte* pkiMsg, word32 pkiMsgSz)
+{
+ return PKCS7_VerifySignedData(pkcs7, NULL, 0, pkiMsg, pkiMsgSz, NULL, 0);
+}
+
+
+/* Generate random content encryption key, store into pkcs7->cek and
+ * pkcs7->cekSz.
+ *
+ * pkcs7 - pointer to initialized PKCS7 structure
+ * len - length of key to be generated
+ *
+ * Returns 0 on success, negative upon error */
+static int PKCS7_GenerateContentEncryptionKey(PKCS7* pkcs7, word32 len)
+{
+ int ret;
+ WC_RNG rng;
+ byte* tmpKey;
+
+ if (pkcs7 == NULL || len == 0)
+ return BAD_FUNC_ARG;
+
+ /* if key already exists, don't need to re-generate */
+ if (pkcs7->cek != NULL && pkcs7->cekSz != 0) {
+
+ /* if key exists, but is different size, return error */
+ if (pkcs7->cekSz != len) {
+ WOLFSSL_MSG("Random content-encryption key size is inconsistent "
+ "between CMS recipients");
+ return WC_KEY_SIZE_E;
}
- pkcs7->content = content;
- pkcs7->contentSz = contentSz;
+ return 0;
+ }
- {
- word32 scratch = 0;
- int plainSz = 0;
- int digestSz = MAX_SEQ_SZ + MAX_ALGO_SZ +
- MAX_OCTET_STR_SZ + SHA_DIGEST_SIZE;
+ /* allocate space for cek */
+ tmpKey = (byte*)XMALLOC(len, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (tmpKey == NULL)
+ return MEMORY_E;
-#ifdef WOLFSSL_SMALL_STACK
- byte* digest;
- RsaKey* key;
+ XMEMSET(tmpKey, 0, len);
- digest = (byte*)XMALLOC(digestSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ ret = wc_InitRng_ex(&rng, pkcs7->heap, pkcs7->devId);
+ if (ret != 0) {
+ XFREE(tmpKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
- if (digest == NULL)
- return MEMORY_E;
+ ret = wc_RNG_GenerateBlock(&rng, tmpKey, len);
+ if (ret != 0) {
+ wc_FreeRng(&rng);
+ XFREE(tmpKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
- key = (RsaKey*)XMALLOC(sizeof(RsaKey), NULL,
- DYNAMIC_TYPE_TMP_BUFFER);
- if (key == NULL) {
- XFREE(digest, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- return MEMORY_E;
+ /* store into PKCS7, memory freed during final cleanup */
+ pkcs7->cek = tmpKey;
+ pkcs7->cekSz = len;
+
+ wc_FreeRng(&rng);
+
+ return 0;
+}
+
+
+/* wrap CEK (content encryption key) with KEK, 0 on success, < 0 on error */
+static int wc_PKCS7_KeyWrap(byte* cek, word32 cekSz, byte* kek,
+ word32 kekSz, byte* out, word32 outSz,
+ int keyWrapAlgo, int direction)
+{
+ int ret = 0;
+
+ if (cek == NULL || kek == NULL || out == NULL)
+ return BAD_FUNC_ARG;
+
+ switch (keyWrapAlgo) {
+#ifndef NO_AES
+ #ifdef WOLFSSL_AES_128
+ case AES128_WRAP:
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES192_WRAP:
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES256_WRAP:
+ #endif
+
+ if (direction == AES_ENCRYPTION) {
+
+ ret = wc_AesKeyWrap(kek, kekSz, cek, cekSz,
+ out, outSz, NULL);
+
+ } else if (direction == AES_DECRYPTION) {
+
+ ret = wc_AesKeyUnWrap(kek, kekSz, cek, cekSz,
+ out, outSz, NULL);
+ } else {
+ WOLFSSL_MSG("Bad key un/wrap direction");
+ return BAD_FUNC_ARG;
+ }
+
+ if (ret <= 0)
+ return ret;
+ break;
+#endif /* NO_AES */
+
+ default:
+ WOLFSSL_MSG("Unsupported key wrap algorithm");
+ return BAD_KEYWRAP_ALG_E;
+ };
+
+ (void)cekSz;
+ (void)kekSz;
+ (void)outSz;
+ (void)direction;
+ return ret;
+}
+
+
+#ifdef HAVE_ECC
+
+/* KARI == KeyAgreeRecipientInfo (key agreement) */
+typedef struct WC_PKCS7_KARI {
+ DecodedCert* decoded; /* decoded recip cert */
+ void* heap; /* user heap, points to PKCS7->heap */
+ int devId; /* device ID for HW based private key */
+ ecc_key* recipKey; /* recip key (pub | priv) */
+ ecc_key* senderKey; /* sender key (pub | priv) */
+ byte* senderKeyExport; /* sender ephemeral key DER */
+ byte* kek; /* key encryption key */
+ byte* ukm; /* OPTIONAL user keying material */
+ byte* sharedInfo; /* ECC-CMS-SharedInfo ASN.1 encoded blob */
+ word32 senderKeyExportSz; /* size of sender ephemeral key DER */
+ word32 kekSz; /* size of key encryption key */
+ word32 ukmSz; /* size of user keying material */
+ word32 sharedInfoSz; /* size of ECC-CMS-SharedInfo encoded */
+ byte ukmOwner; /* do we own ukm buffer? 1:yes, 0:no */
+ byte direction; /* WC_PKCS7_ENCODE | WC_PKCS7_DECODE */
+ byte decodedInit : 1; /* indicates decoded was initialized */
+ byte recipKeyInit : 1; /* indicates recipKey was initialized */
+ byte senderKeyInit : 1; /* indicates senderKey was initialized */
+} WC_PKCS7_KARI;
+
+
+/* allocate and create new WC_PKCS7_KARI struct,
+ * returns struct pointer on success, NULL on failure */
+static WC_PKCS7_KARI* wc_PKCS7_KariNew(PKCS7* pkcs7, byte direction)
+{
+ WC_PKCS7_KARI* kari = NULL;
+
+ if (pkcs7 == NULL)
+ return NULL;
+
+ kari = (WC_PKCS7_KARI*)XMALLOC(sizeof(WC_PKCS7_KARI), pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (kari == NULL) {
+ WOLFSSL_MSG("Failed to allocate WC_PKCS7_KARI");
+ return NULL;
+ }
+
+ kari->decoded = (DecodedCert*)XMALLOC(sizeof(DecodedCert), pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (kari->decoded == NULL) {
+ WOLFSSL_MSG("Failed to allocate DecodedCert");
+ XFREE(kari, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return NULL;
+ }
+
+ kari->recipKey = (ecc_key*)XMALLOC(sizeof(ecc_key), pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (kari->recipKey == NULL) {
+ WOLFSSL_MSG("Failed to allocate recipient ecc_key");
+ XFREE(kari->decoded, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(kari, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return NULL;
+ }
+
+ kari->senderKey = (ecc_key*)XMALLOC(sizeof(ecc_key), pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (kari->senderKey == NULL) {
+ WOLFSSL_MSG("Failed to allocate sender ecc_key");
+ XFREE(kari->recipKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(kari->decoded, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(kari, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return NULL;
+ }
+
+ kari->senderKeyExport = NULL;
+ kari->senderKeyExportSz = 0;
+ kari->kek = NULL;
+ kari->kekSz = 0;
+ kari->ukm = NULL;
+ kari->ukmSz = 0;
+ kari->ukmOwner = 0;
+ kari->sharedInfo = NULL;
+ kari->sharedInfoSz = 0;
+ kari->direction = direction;
+ kari->decodedInit = 0;
+ kari->recipKeyInit = 0;
+ kari->senderKeyInit = 0;
+
+ kari->heap = pkcs7->heap;
+ kari->devId = pkcs7->devId;
+
+ return kari;
+}
+
+
+/* free WC_PKCS7_KARI struct, return 0 on success */
+static int wc_PKCS7_KariFree(WC_PKCS7_KARI* kari)
+{
+ void* heap;
+
+ if (kari) {
+ heap = kari->heap;
+
+ if (kari->decoded) {
+ if (kari->decodedInit)
+ FreeDecodedCert(kari->decoded);
+ XFREE(kari->decoded, heap, DYNAMIC_TYPE_PKCS7);
+ }
+ if (kari->senderKey) {
+ if (kari->senderKeyInit)
+ wc_ecc_free(kari->senderKey);
+ XFREE(kari->senderKey, heap, DYNAMIC_TYPE_PKCS7);
+ }
+ if (kari->recipKey) {
+ if (kari->recipKeyInit)
+ wc_ecc_free(kari->recipKey);
+ XFREE(kari->recipKey, heap, DYNAMIC_TYPE_PKCS7);
+ }
+ if (kari->senderKeyExport) {
+ ForceZero(kari->senderKeyExport, kari->senderKeyExportSz);
+ XFREE(kari->senderKeyExport, heap, DYNAMIC_TYPE_PKCS7);
+ kari->senderKeyExportSz = 0;
+ }
+ if (kari->kek) {
+ ForceZero(kari->kek, kari->kekSz);
+ XFREE(kari->kek, heap, DYNAMIC_TYPE_PKCS7);
+ kari->kekSz = 0;
+ }
+ if (kari->ukm) {
+ if (kari->ukmOwner == 1) {
+ XFREE(kari->ukm, heap, DYNAMIC_TYPE_PKCS7);
}
+ kari->ukmSz = 0;
+ }
+ if (kari->sharedInfo) {
+ ForceZero(kari->sharedInfo, kari->sharedInfoSz);
+ XFREE(kari->sharedInfo, heap, DYNAMIC_TYPE_PKCS7);
+ kari->sharedInfoSz = 0;
+ }
+ XFREE(kari, heap, DYNAMIC_TYPE_PKCS7);
+ }
+
+ (void)heap;
+
+ return 0;
+}
+
+
+/* parse recipient cert/key, return 0 on success, negative on error
+ * key/keySz only needed during decoding (WC_PKCS7_DECODE) */
+static int wc_PKCS7_KariParseRecipCert(WC_PKCS7_KARI* kari, const byte* cert,
+ word32 certSz, const byte* key,
+ word32 keySz)
+{
+ int ret;
+ word32 idx;
+
+ if (kari == NULL || kari->decoded == NULL ||
+ cert == NULL || certSz == 0)
+ return BAD_FUNC_ARG;
+
+ /* decode certificate */
+ InitDecodedCert(kari->decoded, (byte*)cert, certSz, kari->heap);
+ kari->decodedInit = 1;
+ ret = ParseCert(kari->decoded, CA_TYPE, NO_VERIFY, 0);
+ if (ret < 0)
+ return ret;
+
+ /* only supports ECDSA for now */
+ if (kari->decoded->keyOID != ECDSAk) {
+ WOLFSSL_MSG("CMS KARI only supports ECDSA key types");
+ return BAD_FUNC_ARG;
+ }
+
+ /* make sure subject key id was read from cert */
+ if (kari->decoded->extSubjKeyIdSet == 0) {
+ WOLFSSL_MSG("Failed to read subject key ID from recipient cert");
+ return BAD_FUNC_ARG;
+ }
+
+ ret = wc_ecc_init_ex(kari->recipKey, kari->heap, kari->devId);
+ if (ret != 0)
+ return ret;
+
+ kari->recipKeyInit = 1;
+
+ /* get recip public key */
+ if (kari->direction == WC_PKCS7_ENCODE) {
+
+ idx = 0;
+ ret = wc_EccPublicKeyDecode(kari->decoded->publicKey, &idx,
+ kari->recipKey, kari->decoded->pubKeySize);
+ if (ret != 0)
+ return ret;
+ }
+ /* get recip private key */
+ else if (kari->direction == WC_PKCS7_DECODE) {
+ if (key != NULL && keySz > 0) {
+ idx = 0;
+ ret = wc_EccPrivateKeyDecode(key, &idx, kari->recipKey, keySz);
+ }
+ else if (kari->devId == INVALID_DEVID) {
+ ret = BAD_FUNC_ARG;
+ }
+ if (ret != 0)
+ return ret;
+
+ } else {
+ /* bad direction */
+ return BAD_FUNC_ARG;
+ }
+
+ (void)idx;
+
+ return 0;
+}
+
+
+/* create ephemeral ECC key, places ecc_key in kari->senderKey,
+ * DER encoded in kari->senderKeyExport. return 0 on success,
+ * negative on error */
+static int wc_PKCS7_KariGenerateEphemeralKey(WC_PKCS7_KARI* kari)
+{
+ int ret;
+ WC_RNG rng;
+
+ if (kari == NULL || kari->decoded == NULL ||
+ kari->recipKey == NULL || kari->recipKey->dp == NULL)
+ return BAD_FUNC_ARG;
+
+ kari->senderKeyExport = (byte*)XMALLOC(kari->decoded->pubKeySize,
+ kari->heap, DYNAMIC_TYPE_PKCS7);
+ if (kari->senderKeyExport == NULL)
+ return MEMORY_E;
+
+ kari->senderKeyExportSz = kari->decoded->pubKeySize;
+
+ ret = wc_ecc_init_ex(kari->senderKey, kari->heap, kari->devId);
+ if (ret != 0) {
+ XFREE(kari->senderKeyExport, kari->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ kari->senderKeyInit = 1;
+
+ ret = wc_InitRng_ex(&rng, kari->heap, kari->devId);
+ if (ret != 0) {
+ XFREE(kari->senderKeyExport, kari->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ ret = wc_ecc_make_key_ex(&rng, kari->recipKey->dp->size,
+ kari->senderKey, kari->recipKey->dp->id);
+ if (ret != 0) {
+ XFREE(kari->senderKeyExport, kari->heap, DYNAMIC_TYPE_PKCS7);
+ wc_FreeRng(&rng);
+ return ret;
+ }
+
+ wc_FreeRng(&rng);
+
+ /* dump generated key to X.963 DER for output in CMS bundle */
+ ret = wc_ecc_export_x963(kari->senderKey, kari->senderKeyExport,
+ &kari->senderKeyExportSz);
+ if (ret != 0) {
+ XFREE(kari->senderKeyExport, kari->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ return 0;
+}
+
+
+/* create ASN.1 encoded ECC-CMS-SharedInfo using specified key wrap algorithm,
+ * place in kari->sharedInfo. returns 0 on success, negative on error */
+static int wc_PKCS7_KariGenerateSharedInfo(WC_PKCS7_KARI* kari, int keyWrapOID)
+{
+ int idx = 0;
+ int sharedInfoSeqSz = 0;
+ int keyInfoSz = 0;
+ int suppPubInfoSeqSz = 0;
+ int entityUInfoOctetSz = 0;
+ int entityUInfoExplicitSz = 0;
+ int kekOctetSz = 0;
+ int sharedInfoSz = 0;
+
+ word32 kekBitSz = 0;
+
+ byte sharedInfoSeq[MAX_SEQ_SZ];
+ byte keyInfo[MAX_ALGO_SZ];
+ byte suppPubInfoSeq[MAX_SEQ_SZ];
+ byte entityUInfoOctet[MAX_OCTET_STR_SZ];
+ byte entityUInfoExplicitSeq[MAX_SEQ_SZ];
+ byte kekOctet[MAX_OCTET_STR_SZ];
+
+ if (kari == NULL)
+ return BAD_FUNC_ARG;
+
+ if ((kari->ukmSz > 0) && (kari->ukm == NULL))
+ return BAD_FUNC_ARG;
+
+ /* kekOctet */
+ kekOctetSz = SetOctetString(sizeof(word32), kekOctet);
+ sharedInfoSz += (kekOctetSz + sizeof(word32));
+
+ /* suppPubInfo */
+ suppPubInfoSeqSz = SetImplicit(ASN_SEQUENCE, 2,
+ kekOctetSz + sizeof(word32),
+ suppPubInfoSeq);
+ sharedInfoSz += suppPubInfoSeqSz;
+
+ /* optional ukm/entityInfo */
+ if (kari->ukmSz > 0) {
+ entityUInfoOctetSz = SetOctetString(kari->ukmSz, entityUInfoOctet);
+ sharedInfoSz += (entityUInfoOctetSz + kari->ukmSz);
+
+ entityUInfoExplicitSz = SetExplicit(0, entityUInfoOctetSz +
+ kari->ukmSz,
+ entityUInfoExplicitSeq);
+ sharedInfoSz += entityUInfoExplicitSz;
+ }
+
+ /* keyInfo */
+ keyInfoSz = SetAlgoID(keyWrapOID, keyInfo, oidKeyWrapType, 0);
+ sharedInfoSz += keyInfoSz;
+
+ /* sharedInfo */
+ sharedInfoSeqSz = SetSequence(sharedInfoSz, sharedInfoSeq);
+ sharedInfoSz += sharedInfoSeqSz;
+
+ kari->sharedInfo = (byte*)XMALLOC(sharedInfoSz, kari->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (kari->sharedInfo == NULL)
+ return MEMORY_E;
+
+ kari->sharedInfoSz = sharedInfoSz;
+
+ XMEMCPY(kari->sharedInfo + idx, sharedInfoSeq, sharedInfoSeqSz);
+ idx += sharedInfoSeqSz;
+ XMEMCPY(kari->sharedInfo + idx, keyInfo, keyInfoSz);
+ idx += keyInfoSz;
+ if (kari->ukmSz > 0) {
+ XMEMCPY(kari->sharedInfo + idx, entityUInfoExplicitSeq,
+ entityUInfoExplicitSz);
+ idx += entityUInfoExplicitSz;
+ XMEMCPY(kari->sharedInfo + idx, entityUInfoOctet, entityUInfoOctetSz);
+ idx += entityUInfoOctetSz;
+ XMEMCPY(kari->sharedInfo + idx, kari->ukm, kari->ukmSz);
+ idx += kari->ukmSz;
+ }
+ XMEMCPY(kari->sharedInfo + idx, suppPubInfoSeq, suppPubInfoSeqSz);
+ idx += suppPubInfoSeqSz;
+ XMEMCPY(kari->sharedInfo + idx, kekOctet, kekOctetSz);
+ idx += kekOctetSz;
+
+ kekBitSz = (kari->kekSz) * 8; /* convert to bits */
+#ifdef LITTLE_ENDIAN_ORDER
+ kekBitSz = ByteReverseWord32(kekBitSz); /* network byte order */
+#endif
+ XMEMCPY(kari->sharedInfo + idx, &kekBitSz, sizeof(kekBitSz));
+
+ return 0;
+}
+
+
+/* create key encryption key (KEK) using key wrap algorithm and key encryption
+ * algorithm, place in kari->kek. return 0 on success, <0 on error. */
+static int wc_PKCS7_KariGenerateKEK(WC_PKCS7_KARI* kari,
+ int keyWrapOID, int keyEncOID)
+{
+ int ret;
+ int kSz;
+ enum wc_HashType kdfType;
+ byte* secret;
+ word32 secretSz;
+
+ if (kari == NULL || kari->recipKey == NULL ||
+ kari->senderKey == NULL || kari->senderKey->dp == NULL)
+ return BAD_FUNC_ARG;
+
+ /* get KEK size, allocate buff */
+ kSz = wc_PKCS7_GetOIDKeySize(keyWrapOID);
+ if (kSz < 0)
+ return kSz;
+
+ kari->kek = (byte*)XMALLOC(kSz, kari->heap, DYNAMIC_TYPE_PKCS7);
+ if (kari->kek == NULL)
+ return MEMORY_E;
+
+ kari->kekSz = (word32)kSz;
+
+ /* generate ECC-CMS-SharedInfo */
+ ret = wc_PKCS7_KariGenerateSharedInfo(kari, keyWrapOID);
+ if (ret != 0)
+ return ret;
+
+ /* generate shared secret */
+ secretSz = kari->senderKey->dp->size;
+ secret = (byte*)XMALLOC(secretSz, kari->heap, DYNAMIC_TYPE_PKCS7);
+ if (secret == NULL)
+ return MEMORY_E;
+
+ if (kari->direction == WC_PKCS7_ENCODE) {
+
+ ret = wc_ecc_shared_secret(kari->senderKey, kari->recipKey,
+ secret, &secretSz);
+
+ } else if (kari->direction == WC_PKCS7_DECODE) {
+
+ ret = wc_ecc_shared_secret(kari->recipKey, kari->senderKey,
+ secret, &secretSz);
+
+ } else {
+ /* bad direction */
+ XFREE(secret, kari->heap, DYNAMIC_TYPE_PKCS7);
+ return BAD_FUNC_ARG;
+ }
+
+ if (ret != 0) {
+ XFREE(secret, kari->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ /* run through KDF */
+ switch (keyEncOID) {
+
+ #ifndef NO_SHA
+ case dhSinglePass_stdDH_sha1kdf_scheme:
+ kdfType = WC_HASH_TYPE_SHA;
+ break;
+ #endif
+ #ifndef WOLFSSL_SHA224
+ case dhSinglePass_stdDH_sha224kdf_scheme:
+ kdfType = WC_HASH_TYPE_SHA224;
+ break;
+ #endif
+ #ifndef NO_SHA256
+ case dhSinglePass_stdDH_sha256kdf_scheme:
+ kdfType = WC_HASH_TYPE_SHA256;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA384
+ case dhSinglePass_stdDH_sha384kdf_scheme:
+ kdfType = WC_HASH_TYPE_SHA384;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA512
+ case dhSinglePass_stdDH_sha512kdf_scheme:
+ kdfType = WC_HASH_TYPE_SHA512;
+ break;
+ #endif
+ default:
+ WOLFSSL_MSG("Unsupported key agreement algorithm");
+ XFREE(secret, kari->heap, DYNAMIC_TYPE_PKCS7);
+ return BAD_FUNC_ARG;
+ };
+
+ ret = wc_X963_KDF(kdfType, secret, secretSz, kari->sharedInfo,
+ kari->sharedInfoSz, kari->kek, kari->kekSz);
+ if (ret != 0) {
+ XFREE(secret, kari->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ XFREE(secret, kari->heap, DYNAMIC_TYPE_PKCS7);
+
+ return 0;
+}
+
+
+/* Encode and add CMS EnvelopedData KARI (KeyAgreeRecipientInfo) RecipientInfo
+ * to CMS/PKCS#7 EnvelopedData structure.
+ *
+ * Returns 0 on success, negative upon error */
+int wc_PKCS7_AddRecipient_KARI(PKCS7* pkcs7, const byte* cert, word32 certSz,
+ int keyWrapOID, int keyAgreeOID, byte* ukm,
+ word32 ukmSz, int options)
+{
+ Pkcs7EncodedRecip* recip;
+ Pkcs7EncodedRecip* lastRecip = NULL;
+ WC_PKCS7_KARI* kari = NULL;
+
+ word32 idx = 0;
+ word32 encryptedKeySz = MAX_ENCRYPTED_KEY_SZ;
+
+ int ret = 0;
+ int keySz, direction = 0;
+ int blockKeySz = 0;
+
+ /* ASN.1 layout */
+ int totalSz = 0;
+ int kariSeqSz = 0;
+ byte kariSeq[MAX_SEQ_SZ]; /* IMPLICIT [1] */
+ int verSz = 0;
+ byte ver[MAX_VERSION_SZ];
+
+ int origIdOrKeySeqSz = 0;
+ byte origIdOrKeySeq[MAX_SEQ_SZ]; /* IMPLICIT [0] */
+ int origPubKeySeqSz = 0;
+ byte origPubKeySeq[MAX_SEQ_SZ]; /* IMPLICIT [1] */
+ int origAlgIdSz = 0;
+ byte origAlgId[MAX_ALGO_SZ];
+ int origPubKeyStrSz = 0;
+ byte origPubKeyStr[MAX_OCTET_STR_SZ];
+
+ /* optional user keying material */
+ int ukmOctetSz = 0;
+ byte ukmOctetStr[MAX_OCTET_STR_SZ];
+ int ukmExplicitSz = 0;
+ byte ukmExplicitSeq[MAX_SEQ_SZ];
+
+ int keyEncryptAlgoIdSz = 0;
+ byte keyEncryptAlgoId[MAX_ALGO_SZ];
+ int keyWrapAlgSz = 0;
+ byte keyWrapAlg[MAX_ALGO_SZ];
+
+ int recipEncKeysSeqSz = 0;
+ byte recipEncKeysSeq[MAX_SEQ_SZ];
+ int recipEncKeySeqSz = 0;
+ byte recipEncKeySeq[MAX_SEQ_SZ];
+ int recipKeyIdSeqSz = 0;
+ byte recipKeyIdSeq[MAX_SEQ_SZ]; /* IMPLICIT [0] */
+ int subjKeyIdOctetSz = 0;
+ byte subjKeyIdOctet[MAX_OCTET_STR_SZ];
+ int encryptedKeyOctetSz = 0;
+ byte encryptedKeyOctet[MAX_OCTET_STR_SZ];
+
+#ifdef WOLFSSL_SMALL_STACK
+ byte* encryptedKey;
+
+ encryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (encryptedKey == NULL) {
+ return MEMORY_E;
+ }
#else
- byte digest[digestSz];
- RsaKey stack_key;
- RsaKey* key = &stack_key;
+ byte encryptedKey[MAX_ENCRYPTED_KEY_SZ];
#endif
- XMEMSET(digest, 0, digestSz);
+ /* allocate and init memory for recipient */
+ recip = (Pkcs7EncodedRecip*)XMALLOC(sizeof(Pkcs7EncodedRecip), pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (recip == NULL) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return MEMORY_E;
+ }
+ XMEMSET(recip, 0, sizeof(Pkcs7EncodedRecip));
- ret = wc_InitRsaKey(key, NULL);
- if (ret != 0) {
+ /* get key size for content-encryption key based on algorithm */
+ blockKeySz = wc_PKCS7_GetOIDKeySize(pkcs7->encryptOID);
+ if (blockKeySz < 0) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(digest, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- return ret;
- }
- if (wc_RsaPublicKeyDecode(pkcs7->publicKey, &scratch, key,
- pkcs7->publicKeySz) < 0) {
- WOLFSSL_MSG("ASN RSA key decode error");
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return blockKeySz;
+ }
+
+ /* generate random content encryption key, if needed */
+ ret = PKCS7_GenerateContentEncryptionKey(pkcs7, blockKeySz);
+ if (ret < 0) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(digest, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- return PUBLIC_KEY_E;
- }
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
- plainSz = wc_RsaSSL_Verify(sig, sigSz, digest, digestSz, key);
- wc_FreeRsaKey(key);
+ /* set direction based on keyWrapAlgo */
+ switch (keyWrapOID) {
+#ifndef NO_AES
+ #ifdef WOLFSSL_AES_128
+ case AES128_WRAP:
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES192_WRAP:
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES256_WRAP:
+ #endif
+ direction = AES_ENCRYPTION;
+ break;
+#endif
+ default:
+ WOLFSSL_MSG("Unsupported key wrap algorithm");
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return BAD_KEYWRAP_ALG_E;
+ }
+ kari = wc_PKCS7_KariNew(pkcs7, WC_PKCS7_ENCODE);
+ if (kari == NULL) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(digest, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return MEMORY_E;
+ }
- if (plainSz < 0)
- return plainSz;
+ /* set user keying material if available */
+ if (ukmSz > 0 && ukm != NULL) {
+ kari->ukm = ukm;
+ kari->ukmSz = ukmSz;
+ kari->ukmOwner = 0;
+ }
+
+ /* parse recipient cert, get public key */
+ ret = wc_PKCS7_KariParseRecipCert(kari, cert, certSz, NULL, 0);
+ if (ret != 0) {
+ wc_PKCS7_KariFree(kari);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ /* generate sender ephemeral ECC key */
+ ret = wc_PKCS7_KariGenerateEphemeralKey(kari);
+ if (ret != 0) {
+ wc_PKCS7_KariFree(kari);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ /* generate KEK (key encryption key) */
+ ret = wc_PKCS7_KariGenerateKEK(kari, keyWrapOID, keyAgreeOID);
+ if (ret != 0) {
+ wc_PKCS7_KariFree(kari);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ /* encrypt CEK with KEK */
+ keySz = wc_PKCS7_KeyWrap(pkcs7->cek, pkcs7->cekSz, kari->kek,
+ kari->kekSz, encryptedKey, encryptedKeySz,
+ keyWrapOID, direction);
+ if (keySz <= 0) {
+ wc_PKCS7_KariFree(kari);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return keySz;
+ }
+ encryptedKeySz = (word32)keySz;
+
+ /* Start of RecipientEncryptedKeys */
+
+ /* EncryptedKey */
+ encryptedKeyOctetSz = SetOctetString(encryptedKeySz, encryptedKeyOctet);
+ totalSz += (encryptedKeyOctetSz + encryptedKeySz);
+
+ /* SubjectKeyIdentifier */
+ subjKeyIdOctetSz = SetOctetString(KEYID_SIZE, subjKeyIdOctet);
+ totalSz += (subjKeyIdOctetSz + KEYID_SIZE);
+
+ /* RecipientKeyIdentifier IMPLICIT [0] */
+ recipKeyIdSeqSz = SetImplicit(ASN_SEQUENCE, 0, subjKeyIdOctetSz +
+ KEYID_SIZE, recipKeyIdSeq);
+ totalSz += recipKeyIdSeqSz;
+
+ /* RecipientEncryptedKey */
+ recipEncKeySeqSz = SetSequence(totalSz, recipEncKeySeq);
+ totalSz += recipEncKeySeqSz;
+
+ /* RecipientEncryptedKeys */
+ recipEncKeysSeqSz = SetSequence(totalSz, recipEncKeysSeq);
+ totalSz += recipEncKeysSeqSz;
+
+ /* Start of optional UserKeyingMaterial */
+
+ if (kari->ukmSz > 0) {
+ ukmOctetSz = SetOctetString(kari->ukmSz, ukmOctetStr);
+ totalSz += (ukmOctetSz + kari->ukmSz);
+
+ ukmExplicitSz = SetExplicit(1, ukmOctetSz + kari->ukmSz,
+ ukmExplicitSeq);
+ totalSz += ukmExplicitSz;
+ }
+
+ /* Start of KeyEncryptionAlgorithmIdentifier */
+
+ /* KeyWrapAlgorithm */
+ keyWrapAlgSz = SetAlgoID(keyWrapOID, keyWrapAlg, oidKeyWrapType, 0);
+ totalSz += keyWrapAlgSz;
+
+ /* KeyEncryptionAlgorithmIdentifier */
+ keyEncryptAlgoIdSz = SetAlgoID(keyAgreeOID, keyEncryptAlgoId,
+ oidCmsKeyAgreeType, keyWrapAlgSz);
+ totalSz += keyEncryptAlgoIdSz;
+
+ /* Start of OriginatorIdentifierOrKey */
+
+ /* recipient ECPoint, public key */
+ XMEMSET(origPubKeyStr, 0, sizeof(origPubKeyStr)); /* no unused bits */
+ origPubKeyStr[0] = ASN_BIT_STRING;
+ origPubKeyStrSz = SetLength(kari->senderKeyExportSz + 1,
+ origPubKeyStr + 1) + 2;
+ totalSz += (origPubKeyStrSz + kari->senderKeyExportSz);
+
+ /* Originator AlgorithmIdentifier, params set to NULL for interop
+ compatibility */
+ origAlgIdSz = SetAlgoID(ECDSAk, origAlgId, oidKeyType, 2);
+ origAlgId[origAlgIdSz++] = ASN_TAG_NULL;
+ origAlgId[origAlgIdSz++] = 0;
+ totalSz += origAlgIdSz;
+
+ /* outer OriginatorPublicKey IMPLICIT [1] */
+ origPubKeySeqSz = SetImplicit(ASN_SEQUENCE, 1,
+ origAlgIdSz + origPubKeyStrSz +
+ kari->senderKeyExportSz, origPubKeySeq);
+ totalSz += origPubKeySeqSz;
+
+ /* outer OriginatorIdentiferOrKey IMPLICIT [0] */
+ origIdOrKeySeqSz = SetImplicit(ASN_SEQUENCE, 0,
+ origPubKeySeqSz + origAlgIdSz +
+ origPubKeyStrSz + kari->senderKeyExportSz,
+ origIdOrKeySeq);
+ totalSz += origIdOrKeySeqSz;
+
+ /* version, always 3 */
+ verSz = SetMyVersion(3, ver, 0);
+ totalSz += verSz;
+ recip->recipVersion = 3;
+
+ /* outer IMPLICIT [1] kari */
+ kariSeqSz = SetImplicit(ASN_SEQUENCE, 1, totalSz, kariSeq);
+ totalSz += kariSeqSz;
+
+ if (totalSz > MAX_RECIP_SZ) {
+ WOLFSSL_MSG("KeyAgreeRecipientInfo output buffer too small");
+ wc_PKCS7_KariFree(kari);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return BUFFER_E;
+ }
+
+ XMEMCPY(recip->recip + idx, kariSeq, kariSeqSz);
+ idx += kariSeqSz;
+ XMEMCPY(recip->recip + idx, ver, verSz);
+ idx += verSz;
+
+ XMEMCPY(recip->recip + idx, origIdOrKeySeq, origIdOrKeySeqSz);
+ idx += origIdOrKeySeqSz;
+ XMEMCPY(recip->recip + idx, origPubKeySeq, origPubKeySeqSz);
+ idx += origPubKeySeqSz;
+
+ /* AlgorithmIdentifier with NULL parameter */
+ XMEMCPY(recip->recip + idx, origAlgId, origAlgIdSz);
+ idx += origAlgIdSz;
+
+ XMEMCPY(recip->recip + idx, origPubKeyStr, origPubKeyStrSz);
+ idx += origPubKeyStrSz;
+ /* ephemeral public key */
+ XMEMCPY(recip->recip + idx, kari->senderKeyExport, kari->senderKeyExportSz);
+ idx += kari->senderKeyExportSz;
+
+ if (kari->ukmSz > 0) {
+ XMEMCPY(recip->recip + idx, ukmExplicitSeq, ukmExplicitSz);
+ idx += ukmExplicitSz;
+ XMEMCPY(recip->recip + idx, ukmOctetStr, ukmOctetSz);
+ idx += ukmOctetSz;
+ XMEMCPY(recip->recip + idx, kari->ukm, kari->ukmSz);
+ idx += kari->ukmSz;
+ }
+
+ XMEMCPY(recip->recip + idx, keyEncryptAlgoId, keyEncryptAlgoIdSz);
+ idx += keyEncryptAlgoIdSz;
+ XMEMCPY(recip->recip + idx, keyWrapAlg, keyWrapAlgSz);
+ idx += keyWrapAlgSz;
+
+ XMEMCPY(recip->recip + idx, recipEncKeysSeq, recipEncKeysSeqSz);
+ idx += recipEncKeysSeqSz;
+ XMEMCPY(recip->recip + idx, recipEncKeySeq, recipEncKeySeqSz);
+ idx += recipEncKeySeqSz;
+ XMEMCPY(recip->recip + idx, recipKeyIdSeq, recipKeyIdSeqSz);
+ idx += recipKeyIdSeqSz;
+ XMEMCPY(recip->recip + idx, subjKeyIdOctet, subjKeyIdOctetSz);
+ idx += subjKeyIdOctetSz;
+ /* subject key id */
+ XMEMCPY(recip->recip + idx, kari->decoded->extSubjKeyId, KEYID_SIZE);
+ idx += KEYID_SIZE;
+ XMEMCPY(recip->recip + idx, encryptedKeyOctet, encryptedKeyOctetSz);
+ idx += encryptedKeyOctetSz;
+ /* encrypted CEK */
+ XMEMCPY(recip->recip + idx, encryptedKey, encryptedKeySz);
+ idx += encryptedKeySz;
+
+ wc_PKCS7_KariFree(kari);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ /* store recipient size */
+ recip->recipSz = idx;
+ recip->recipType = PKCS7_KARI;
+
+ /* add recipient to recip list */
+ if (pkcs7->recipList == NULL) {
+ pkcs7->recipList = recip;
+ } else {
+ lastRecip = pkcs7->recipList;
+ while (lastRecip->next != NULL) {
+ lastRecip = lastRecip->next;
}
+ lastRecip->next = recip;
}
- return 0;
+ (void)options;
+
+ return idx;
}
+#endif /* HAVE_ECC */
+
+#ifndef NO_RSA
-/* create ASN.1 fomatted RecipientInfo structure, returns sequence size */
-WOLFSSL_LOCAL int wc_CreateRecipientInfo(const byte* cert, word32 certSz,
- int keyEncAlgo, int blockKeySz,
- RNG* rng, byte* contentKeyPlain,
- byte* contentKeyEnc,
- int* keyEncSz, byte* out, word32 outSz)
+/* Encode and add CMS EnvelopedData KTRI (KeyTransRecipientInfo) RecipientInfo
+ * to CMS/PKCS#7 EnvelopedData structure.
+ *
+ * Returns 0 on success, negative upon error */
+int wc_PKCS7_AddRecipient_KTRI(PKCS7* pkcs7, const byte* cert, word32 certSz,
+ int options)
{
+ Pkcs7EncodedRecip* recip = NULL;
+ Pkcs7EncodedRecip* lastRecip = NULL;
+
+ WC_RNG rng;
word32 idx = 0;
- int ret = 0, totalSz = 0;
- int verSz, issuerSz, snSz, keyEncAlgSz;
- int issuerSeqSz, recipSeqSz, issuerSerialSeqSz;
+ word32 encryptedKeySz = 0;
+
+ int ret = 0, blockKeySz;
+ int verSz = 0, issuerSz = 0, snSz = 0, keyEncAlgSz = 0;
+ int issuerSeqSz = 0, recipSeqSz = 0, issuerSerialSeqSz = 0;
int encKeyOctetStrSz;
+ int sidType;
byte ver[MAX_VERSION_SZ];
byte issuerSerialSeq[MAX_SEQ_SZ];
@@ -961,241 +6020,1696 @@ WOLFSSL_LOCAL int wc_CreateRecipientInfo(const byte* cert, word32 certSz,
byte issuerSeq[MAX_SEQ_SZ];
byte encKeyOctetStr[MAX_OCTET_STR_SZ];
+ byte issuerSKIDSeq[MAX_SEQ_SZ];
+ byte issuerSKID[MAX_OCTET_STR_SZ];
+ word32 issuerSKIDSeqSz = 0, issuerSKIDSz = 0;
+
#ifdef WOLFSSL_SMALL_STACK
- byte *serial;
- byte *keyAlgArray;
-
+ byte* serial;
+ byte* keyAlgArray;
+ byte* encryptedKey;
RsaKey* pubKey;
DecodedCert* decoded;
- serial = (byte*)XMALLOC(MAX_SN_SZ, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- keyAlgArray = (byte*)XMALLOC(MAX_SN_SZ, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- decoded = (DecodedCert*)XMALLOC(sizeof(DecodedCert), NULL,
- DYNAMIC_TYPE_TMP_BUFFER);
-
- if (decoded == NULL || serial == NULL || keyAlgArray == NULL) {
- if (serial) XFREE(serial, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (keyAlgArray) XFREE(keyAlgArray, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (decoded) XFREE(decoded, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ serial = (byte*)XMALLOC(MAX_SN_SZ, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ keyAlgArray = (byte*)XMALLOC(MAX_SN_SZ, pkcs7->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ encryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ decoded = (DecodedCert*)XMALLOC(sizeof(DecodedCert), pkcs7->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+
+ if (decoded == NULL || serial == NULL ||
+ encryptedKey == NULL || keyAlgArray == NULL) {
+ if (serial)
+ XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (keyAlgArray)
+ XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (encryptedKey)
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (decoded)
+ XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
return MEMORY_E;
}
-
#else
byte serial[MAX_SN_SZ];
byte keyAlgArray[MAX_ALGO_SZ];
-
- RsaKey stack_pubKey;
- RsaKey* pubKey = &stack_pubKey;
- DecodedCert stack_decoded;
- DecodedCert* decoded = &stack_decoded;
+ byte encryptedKey[MAX_ENCRYPTED_KEY_SZ];
+
+ RsaKey pubKey[1];
+ DecodedCert decoded[1];
+#endif
+
+ encryptedKeySz = MAX_ENCRYPTED_KEY_SZ;
+ XMEMSET(encryptedKey, 0, encryptedKeySz);
+
+ /* default to IssuerAndSerialNumber if not set */
+ if (pkcs7->sidType != 0) {
+ sidType = pkcs7->sidType;
+ } else {
+ sidType = CMS_ISSUER_AND_SERIAL_NUMBER;
+ }
+
+ /* allow options to override SubjectIdentifier type if set */
+ if (options & CMS_SKID) {
+ sidType = CMS_SKID;
+ } else if (options & CMS_ISSUER_AND_SERIAL_NUMBER) {
+ sidType = CMS_ISSUER_AND_SERIAL_NUMBER;
+ }
+
+ /* allocate recipient struct */
+ recip = (Pkcs7EncodedRecip*)XMALLOC(sizeof(Pkcs7EncodedRecip), pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (recip == NULL) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return MEMORY_E;
+ }
+ XMEMSET(recip, 0, sizeof(Pkcs7EncodedRecip));
+
+ /* get key size for content-encryption key based on algorithm */
+ blockKeySz = wc_PKCS7_GetOIDKeySize(pkcs7->encryptOID);
+ if (blockKeySz < 0) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return blockKeySz;
+ }
+
+ /* generate random content encryption key, if needed */
+ ret = PKCS7_GenerateContentEncryptionKey(pkcs7, blockKeySz);
+ if (ret < 0) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
- InitDecodedCert(decoded, (byte*)cert, certSz, 0);
+ InitDecodedCert(decoded, (byte*)cert, certSz, pkcs7->heap);
ret = ParseCert(decoded, CA_TYPE, NO_VERIFY, 0);
if (ret < 0) {
FreeDecodedCert(decoded);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(serial, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(keyAlgArray, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(decoded, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
return ret;
}
- /* version */
- verSz = SetMyVersion(0, ver, 0);
+ if (sidType == CMS_ISSUER_AND_SERIAL_NUMBER) {
- /* IssuerAndSerialNumber */
- if (decoded->issuerRaw == NULL || decoded->issuerRawLen == 0) {
- WOLFSSL_MSG("DecodedCert lacks raw issuer pointer and length");
- FreeDecodedCert(decoded);
+ /* version, must be 0 for IssuerAndSerialNumber */
+ verSz = SetMyVersion(0, ver, 0);
+ recip->recipVersion = 0;
+
+ /* IssuerAndSerialNumber */
+ if (decoded->issuerRaw == NULL || decoded->issuerRawLen == 0) {
+ WOLFSSL_MSG("DecodedCert lacks raw issuer pointer and length");
+ FreeDecodedCert(decoded);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(serial, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(keyAlgArray, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(decoded, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- return -1;
- }
- issuerSz = decoded->issuerRawLen;
- issuerSeqSz = SetSequence(issuerSz, issuerSeq);
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return -1;
+ }
+ issuerSz = decoded->issuerRawLen;
+ issuerSeqSz = SetSequence(issuerSz, issuerSeq);
- if (decoded->serialSz == 0) {
- WOLFSSL_MSG("DecodedCert missing serial number");
+ if (decoded->serialSz == 0) {
+ WOLFSSL_MSG("DecodedCert missing serial number");
+ FreeDecodedCert(decoded);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return -1;
+ }
+ snSz = SetSerialNumber(decoded->serial, decoded->serialSz, serial,
+ MAX_SN_SZ, MAX_SN_SZ);
+
+ issuerSerialSeqSz = SetSequence(issuerSeqSz + issuerSz + snSz,
+ issuerSerialSeq);
+
+ } else if (sidType == CMS_SKID) {
+
+ /* version, must be 2 for SubjectKeyIdentifier */
+ verSz = SetMyVersion(2, ver, 0);
+ recip->recipVersion = 2;
+
+ issuerSKIDSz = SetOctetString(KEYID_SIZE, issuerSKID);
+ issuerSKIDSeqSz = SetExplicit(0, issuerSKIDSz + KEYID_SIZE,
+ issuerSKIDSeq);
+ } else {
FreeDecodedCert(decoded);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(serial, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(keyAlgArray, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(decoded, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- return -1;
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return PKCS7_RECIP_E;
}
- snSz = SetSerialNumber(decoded->serial, decoded->serialSz, serial);
- issuerSerialSeqSz = SetSequence(issuerSeqSz + issuerSz + snSz,
- issuerSerialSeq);
+ pkcs7->publicKeyOID = decoded->keyOID;
/* KeyEncryptionAlgorithmIdentifier, only support RSA now */
- if (keyEncAlgo != RSAk) {
+ if (pkcs7->publicKeyOID != RSAk) {
FreeDecodedCert(decoded);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(serial, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(keyAlgArray, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(decoded, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
return ALGO_ID_E;
}
- keyEncAlgSz = SetAlgoID(keyEncAlgo, keyAlgArray, keyType, 0);
+ keyEncAlgSz = SetAlgoID(pkcs7->publicKeyOID, keyAlgArray, oidKeyType, 0);
if (keyEncAlgSz == 0) {
FreeDecodedCert(decoded);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(serial, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(keyAlgArray, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(decoded, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
return BAD_FUNC_ARG;
}
#ifdef WOLFSSL_SMALL_STACK
- pubKey = (RsaKey*)XMALLOC(sizeof(RsaKey), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ pubKey = (RsaKey*)XMALLOC(sizeof(RsaKey), pkcs7->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
if (pubKey == NULL) {
FreeDecodedCert(decoded);
- XFREE(serial, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(keyAlgArray, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(decoded, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
return MEMORY_E;
}
#endif
/* EncryptedKey */
- ret = wc_InitRsaKey(pubKey, 0);
+ ret = wc_InitRsaKey_ex(pubKey, pkcs7->heap, INVALID_DEVID);
if (ret != 0) {
FreeDecodedCert(decoded);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(pubKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(serial, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(keyAlgArray, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(decoded, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(pubKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
return ret;
}
if (wc_RsaPublicKeyDecode(decoded->publicKey, &idx, pubKey,
- decoded->pubKeySize) < 0) {
+ decoded->pubKeySize) < 0) {
WOLFSSL_MSG("ASN RSA key decode error");
wc_FreeRsaKey(pubKey);
FreeDecodedCert(decoded);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(pubKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(serial, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(keyAlgArray, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(decoded, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(pubKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
return PUBLIC_KEY_E;
}
- *keyEncSz = wc_RsaPublicEncrypt(contentKeyPlain, blockKeySz, contentKeyEnc,
- MAX_ENCRYPTED_KEY_SZ, pubKey, rng);
+ ret = wc_InitRng_ex(&rng, pkcs7->heap, pkcs7->devId);
+ if (ret != 0) {
+ wc_FreeRsaKey(pubKey);
+ FreeDecodedCert(decoded);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(pubKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return MEMORY_E;
+ }
+
+
+ ret = wc_RsaPublicEncrypt(pkcs7->cek, pkcs7->cekSz, encryptedKey,
+ encryptedKeySz, pubKey, &rng);
wc_FreeRsaKey(pubKey);
+ wc_FreeRng(&rng);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(pubKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(pubKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- if (*keyEncSz < 0) {
+ if (ret < 0) {
WOLFSSL_MSG("RSA Public Encrypt failed");
FreeDecodedCert(decoded);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(serial, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(keyAlgArray, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(decoded, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- return *keyEncSz;
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
}
+ encryptedKeySz = ret;
- encKeyOctetStrSz = SetOctetString(*keyEncSz, encKeyOctetStr);
+ encKeyOctetStrSz = SetOctetString(encryptedKeySz, encKeyOctetStr);
/* RecipientInfo */
- recipSeqSz = SetSequence(verSz + issuerSerialSeqSz + issuerSeqSz +
- issuerSz + snSz + keyEncAlgSz + encKeyOctetStrSz +
- *keyEncSz, recipSeq);
+ if (sidType == CMS_ISSUER_AND_SERIAL_NUMBER) {
+ recipSeqSz = SetSequence(verSz + issuerSerialSeqSz + issuerSeqSz +
+ issuerSz + snSz + keyEncAlgSz +
+ encKeyOctetStrSz + encryptedKeySz, recipSeq);
+
+ if (recipSeqSz + verSz + issuerSerialSeqSz + issuerSeqSz + snSz +
+ keyEncAlgSz + encKeyOctetStrSz + encryptedKeySz > MAX_RECIP_SZ) {
+ WOLFSSL_MSG("RecipientInfo output buffer too small");
+ FreeDecodedCert(decoded);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return BUFFER_E;
+ }
- if (recipSeqSz + verSz + issuerSerialSeqSz + issuerSeqSz + snSz +
- keyEncAlgSz + encKeyOctetStrSz + *keyEncSz > (int)outSz) {
- WOLFSSL_MSG("RecipientInfo output buffer too small");
- FreeDecodedCert(decoded);
+ } else {
+ recipSeqSz = SetSequence(verSz + issuerSKIDSeqSz + issuerSKIDSz +
+ KEYID_SIZE + keyEncAlgSz + encKeyOctetStrSz +
+ encryptedKeySz, recipSeq);
+
+ if (recipSeqSz + verSz + issuerSKIDSeqSz + issuerSKIDSz + KEYID_SIZE +
+ keyEncAlgSz + encKeyOctetStrSz + encryptedKeySz > MAX_RECIP_SZ) {
+ WOLFSSL_MSG("RecipientInfo output buffer too small");
+ FreeDecodedCert(decoded);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(serial, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(keyAlgArray, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(decoded, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return BUFFER_E;
+ }
+ }
+
+ idx = 0;
+ XMEMCPY(recip->recip + idx, recipSeq, recipSeqSz);
+ idx += recipSeqSz;
+ XMEMCPY(recip->recip + idx, ver, verSz);
+ idx += verSz;
+ if (sidType == CMS_ISSUER_AND_SERIAL_NUMBER) {
+ XMEMCPY(recip->recip + idx, issuerSerialSeq, issuerSerialSeqSz);
+ idx += issuerSerialSeqSz;
+ XMEMCPY(recip->recip + idx, issuerSeq, issuerSeqSz);
+ idx += issuerSeqSz;
+ XMEMCPY(recip->recip + idx, decoded->issuerRaw, issuerSz);
+ idx += issuerSz;
+ XMEMCPY(recip->recip + idx, serial, snSz);
+ idx += snSz;
+ } else {
+ XMEMCPY(recip->recip + idx, issuerSKIDSeq, issuerSKIDSeqSz);
+ idx += issuerSKIDSeqSz;
+ XMEMCPY(recip->recip + idx, issuerSKID, issuerSKIDSz);
+ idx += issuerSKIDSz;
+ XMEMCPY(recip->recip + idx, pkcs7->issuerSubjKeyId, KEYID_SIZE);
+ idx += KEYID_SIZE;
+ }
+ XMEMCPY(recip->recip + idx, keyAlgArray, keyEncAlgSz);
+ idx += keyEncAlgSz;
+ XMEMCPY(recip->recip + idx, encKeyOctetStr, encKeyOctetStrSz);
+ idx += encKeyOctetStrSz;
+ XMEMCPY(recip->recip + idx, encryptedKey, encryptedKeySz);
+ idx += encryptedKeySz;
+
+ FreeDecodedCert(decoded);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(serial, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(keyAlgArray, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(decoded, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ /* store recipient size */
+ recip->recipSz = idx;
+ recip->recipType = PKCS7_KTRI;
+
+ /* add recipient to recip list */
+ if (pkcs7->recipList == NULL) {
+ pkcs7->recipList = recip;
+ } else {
+ lastRecip = pkcs7->recipList;
+ while (lastRecip->next != NULL) {
+ lastRecip = lastRecip->next;
+ }
+ lastRecip->next = recip;
+ }
+
+ return idx;
+}
+
+#endif /* !NO_RSA */
+
+
+/* encrypt content using encryptOID algo */
+static int wc_PKCS7_EncryptContent(int encryptOID, byte* key, int keySz,
+ byte* iv, int ivSz, byte* aad, word32 aadSz,
+ byte* authTag, word32 authTagSz, byte* in,
+ int inSz, byte* out)
+{
+ int ret;
+#ifndef NO_AES
+ Aes aes;
+#endif
+#ifndef NO_DES3
+ Des des;
+ Des3 des3;
+#endif
+
+ if (key == NULL || iv == NULL || in == NULL || out == NULL)
+ return BAD_FUNC_ARG;
+
+ switch (encryptOID) {
+#ifndef NO_AES
+ #ifdef WOLFSSL_AES_128
+ case AES128CBCb:
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES192CBCb:
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES256CBCb:
+ #endif
+ if (
+ #ifdef WOLFSSL_AES_128
+ (encryptOID == AES128CBCb && keySz != 16 ) ||
+ #endif
+ #ifdef WOLFSSL_AES_192
+ (encryptOID == AES192CBCb && keySz != 24 ) ||
+ #endif
+ #ifdef WOLFSSL_AES_256
+ (encryptOID == AES256CBCb && keySz != 32 ) ||
+ #endif
+ (ivSz != AES_BLOCK_SIZE) )
+ return BAD_FUNC_ARG;
+
+ ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_AesSetKey(&aes, key, keySz, iv, AES_ENCRYPTION);
+ if (ret == 0)
+ ret = wc_AesCbcEncrypt(&aes, out, in, inSz);
+ wc_AesFree(&aes);
+ }
+ break;
+ #ifdef HAVE_AESGCM
+ #ifdef WOLFSSL_AES_128
+ case AES128GCMb:
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES192GCMb:
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES256GCMb:
+ #endif
+ #if defined(WOLFSSL_AES_128) || defined(WOLFSSL_AES_192) || \
+ defined(WOLFSSL_AES_256)
+ if (authTag == NULL)
+ return BAD_FUNC_ARG;
+
+ ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_AesGcmSetKey(&aes, key, keySz);
+ if (ret == 0)
+ ret = wc_AesGcmEncrypt(&aes, out, in, inSz, iv, ivSz,
+ authTag, authTagSz, aad, aadSz);
+ wc_AesFree(&aes);
+ }
+ break;
+ #endif
+ #endif /* HAVE_AESGCM */
+ #ifdef HAVE_AESCCM
+ #ifdef WOLFSSL_AES_128
+ case AES128CCMb:
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES192CCMb:
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES256CCMb:
+ #endif
+ #if defined(WOLFSSL_AES_128) || defined(WOLFSSL_AES_192) || \
+ defined(WOLFSSL_AES_256)
+ if (authTag == NULL)
+ return BAD_FUNC_ARG;
+
+ ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_AesCcmSetKey(&aes, key, keySz);
+ if (ret == 0)
+ ret = wc_AesCcmEncrypt(&aes, out, in, inSz, iv, ivSz,
+ authTag, authTagSz, aad, aadSz);
+ wc_AesFree(&aes);
+ }
+ break;
+ #endif
+ #endif /* HAVE_AESCCM */
+#endif /* NO_AES */
+#ifndef NO_DES3
+ case DESb:
+ if (keySz != DES_KEYLEN || ivSz != DES_BLOCK_SIZE)
+ return BAD_FUNC_ARG;
+
+ ret = wc_Des_SetKey(&des, key, iv, DES_ENCRYPTION);
+ if (ret == 0)
+ ret = wc_Des_CbcEncrypt(&des, out, in, inSz);
+
+ break;
+
+ case DES3b:
+ if (keySz != DES3_KEYLEN || ivSz != DES_BLOCK_SIZE)
+ return BAD_FUNC_ARG;
+
+ ret = wc_Des3Init(&des3, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_Des3_SetKey(&des3, key, iv, DES_ENCRYPTION);
+ if (ret == 0)
+ ret = wc_Des3_CbcEncrypt(&des3, out, in, inSz);
+ wc_Des3Free(&des3);
+ }
+ break;
#endif
+ default:
+ WOLFSSL_MSG("Unsupported content cipher type");
+ return ALGO_ID_E;
+ };
+
+#if defined(NO_AES) || (!defined(HAVE_AESGCM) && !defined(HAVE_AESCCM))
+ (void)authTag;
+ (void)authTagSz;
+ (void)aad;
+ (void)aadSz;
+#endif
+ return ret;
+}
+
+
+/* decrypt content using encryptOID algo
+ * returns 0 on success */
+static int wc_PKCS7_DecryptContent(PKCS7* pkcs7, int encryptOID, byte* key,
+ int keySz, byte* iv, int ivSz, byte* aad, word32 aadSz, byte* authTag,
+ word32 authTagSz, byte* in, int inSz, byte* out)
+{
+ int ret;
+#ifndef NO_AES
+ Aes aes;
+#endif
+#ifndef NO_DES3
+ Des des;
+ Des3 des3;
+#endif
+
+ if (iv == NULL || in == NULL || out == NULL)
+ return BAD_FUNC_ARG;
+
+ if (pkcs7->decryptionCb != NULL) {
+ return pkcs7->decryptionCb(pkcs7, encryptOID, iv, ivSz,
+ aad, aadSz, authTag, authTagSz, in,
+ inSz, out, pkcs7->decryptionCtx);
+ }
+
+ if (key == NULL)
+ return BAD_FUNC_ARG;
+
+ switch (encryptOID) {
+#ifndef NO_AES
+ #ifdef WOLFSSL_AES_128
+ case AES128CBCb:
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES192CBCb:
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES256CBCb:
+ #endif
+ if (
+ #ifdef WOLFSSL_AES_128
+ (encryptOID == AES128CBCb && keySz != 16 ) ||
+ #endif
+ #ifdef WOLFSSL_AES_192
+ (encryptOID == AES192CBCb && keySz != 24 ) ||
+ #endif
+ #ifdef WOLFSSL_AES_256
+ (encryptOID == AES256CBCb && keySz != 32 ) ||
+ #endif
+ (ivSz != AES_BLOCK_SIZE) )
+ return BAD_FUNC_ARG;
+ ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_AesSetKey(&aes, key, keySz, iv, AES_DECRYPTION);
+ if (ret == 0)
+ ret = wc_AesCbcDecrypt(&aes, out, in, inSz);
+ wc_AesFree(&aes);
+ }
+ break;
+ #ifdef HAVE_AESGCM
+ #ifdef WOLFSSL_AES_128
+ case AES128GCMb:
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES192GCMb:
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES256GCMb:
+ #endif
+ #if defined(WOLFSSL_AES_128) || defined(WOLFSSL_AES_192) || \
+ defined(WOLFSSL_AES_256)
+ if (authTag == NULL)
+ return BAD_FUNC_ARG;
+
+ ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_AesGcmSetKey(&aes, key, keySz);
+ if (ret == 0)
+ ret = wc_AesGcmDecrypt(&aes, out, in, inSz, iv, ivSz,
+ authTag, authTagSz, aad, aadSz);
+ wc_AesFree(&aes);
+ }
+ break;
+ #endif
+ #endif /* HAVE_AESGCM */
+ #ifdef HAVE_AESCCM
+ #ifdef WOLFSSL_AES_128
+ case AES128CCMb:
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES192CCMb:
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES256CCMb:
+ #endif
+ #if defined(WOLFSSL_AES_128) || defined(WOLFSSL_AES_192) || \
+ defined(WOLFSSL_AES_256)
+ if (authTag == NULL)
+ return BAD_FUNC_ARG;
+
+ ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_AesCcmSetKey(&aes, key, keySz);
+ if (ret == 0)
+ ret = wc_AesCcmDecrypt(&aes, out, in, inSz, iv, ivSz,
+ authTag, authTagSz, aad, aadSz);
+ wc_AesFree(&aes);
+ }
+ break;
+ #endif
+ #endif /* HAVE_AESCCM */
+#endif /* NO_AES */
+#ifndef NO_DES3
+ case DESb:
+ if (keySz != DES_KEYLEN || ivSz != DES_BLOCK_SIZE)
+ return BAD_FUNC_ARG;
+
+ ret = wc_Des_SetKey(&des, key, iv, DES_DECRYPTION);
+ if (ret == 0)
+ ret = wc_Des_CbcDecrypt(&des, out, in, inSz);
+
+ break;
+ case DES3b:
+ if (keySz != DES3_KEYLEN || ivSz != DES_BLOCK_SIZE)
+ return BAD_FUNC_ARG;
+
+ ret = wc_Des3Init(&des3, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_Des3_SetKey(&des3, key, iv, DES_DECRYPTION);
+ if (ret == 0)
+ ret = wc_Des3_CbcDecrypt(&des3, out, in, inSz);
+ wc_Des3Free(&des3);
+ }
+
+ break;
+#endif
+ default:
+ WOLFSSL_MSG("Unsupported content cipher type");
+ return ALGO_ID_E;
+ };
+
+#if defined(NO_AES) || (!defined(HAVE_AESGCM) && !defined(HAVE_AESCCM))
+ (void)authTag;
+ (void)authTagSz;
+ (void)aad;
+ (void)aadSz;
+#endif
+
+ return ret;
+}
+
+
+/* Generate random block, place in out, return 0 on success negative on error.
+ * Used for generation of IV, nonce, etc */
+static int wc_PKCS7_GenerateBlock(PKCS7* pkcs7, WC_RNG* rng, byte* out,
+ word32 outSz)
+{
+ int ret;
+ WC_RNG* rnd = NULL;
+
+ if (out == NULL || outSz == 0)
+ return BAD_FUNC_ARG;
+
+ /* input RNG is optional, init local one if input rng is NULL */
+ if (rng == NULL) {
+ rnd = (WC_RNG*)XMALLOC(sizeof(WC_RNG), pkcs7->heap, DYNAMIC_TYPE_RNG);
+ if (rnd == NULL)
+ return MEMORY_E;
+
+ ret = wc_InitRng_ex(rnd, pkcs7->heap, pkcs7->devId);
+ if (ret != 0) {
+ XFREE(rnd, pkcs7->heap, DYNAMIC_TYPE_RNG);
+ return ret;
+ }
+
+ } else {
+ rnd = rng;
+ }
+
+ ret = wc_RNG_GenerateBlock(rnd, out, outSz);
+
+ if (rng == NULL) {
+ wc_FreeRng(rnd);
+ XFREE(rnd, pkcs7->heap, DYNAMIC_TYPE_RNG);
+ }
+
+ return ret;
+}
+
+
+/* Set default SignerIdentifier type to be used. Is either
+ * IssuerAndSerialNumber or SubjectKeyIdentifier. Encoding defaults to using
+ * IssuerAndSerialNumber unless set with this function or explicitly
+ * overridden via options when adding RecipientInfo type.
+ *
+ * Using the type DEGENERATE_SID skips over signer information. In degenerate
+ * cases there are no signers.
+ *
+ * pkcs7 - pointer to initialized PKCS7 structure
+ * type - either CMS_ISSUER_AND_SERIAL_NUMBER, CMS_SKID or DEGENERATE_SID
+ *
+ * return 0 on success, negative upon error */
+int wc_PKCS7_SetSignerIdentifierType(PKCS7* pkcs7, int type)
+{
+ if (pkcs7 == NULL)
+ return BAD_FUNC_ARG;
+
+ if (type != CMS_ISSUER_AND_SERIAL_NUMBER &&
+ type != CMS_SKID &&
+ type != DEGENERATE_SID) {
+ return BAD_FUNC_ARG;
+ }
+
+ pkcs7->sidType = type;
+
+ return 0;
+}
+
+
+/* Set custom contentType, currently supported with SignedData type
+ *
+ * pkcs7 - pointer to initialized PKCS7 structure
+ * contentType - pointer to array with ASN.1 encoded OID value
+ * sz - length of contentType array, octets
+ *
+ * return 0 on success, negative upon error */
+int wc_PKCS7_SetContentType(PKCS7* pkcs7, byte* contentType, word32 sz)
+{
+ if (pkcs7 == NULL || contentType == NULL || sz == 0)
+ return BAD_FUNC_ARG;
+
+ if (sz > MAX_OID_SZ) {
+ WOLFSSL_MSG("input array too large, bounded by MAX_OID_SZ");
+ return BAD_FUNC_ARG;
+ }
+
+ XMEMCPY(pkcs7->contentType, contentType, sz);
+ pkcs7->contentTypeSz = sz;
+
+ return 0;
+}
+
+
+/* return size of padded data, padded to blockSz chunks, or negative on error */
+int wc_PKCS7_GetPadSize(word32 inputSz, word32 blockSz)
+{
+ int padSz;
+
+ if (blockSz == 0)
+ return BAD_FUNC_ARG;
+
+ padSz = blockSz - (inputSz % blockSz);
+
+ return padSz;
+}
+
+
+/* pad input data to blockSz chunk, place in outSz. out must be big enough
+ * for input + pad bytes. See wc_PKCS7_GetPadSize() helper. */
+int wc_PKCS7_PadData(byte* in, word32 inSz, byte* out, word32 outSz,
+ word32 blockSz)
+{
+ int i, padSz;
+
+ if (in == NULL || inSz == 0 ||
+ out == NULL || outSz == 0)
+ return BAD_FUNC_ARG;
+
+ padSz = wc_PKCS7_GetPadSize(inSz, blockSz);
+
+ if (outSz < (inSz + padSz))
+ return BAD_FUNC_ARG;
+
+ XMEMCPY(out, in, inSz);
+
+ for (i = 0; i < padSz; i++) {
+ out[inSz + i] = (byte)padSz;
+ }
+
+ return inSz + padSz;
+}
+
+
+/* Encode and add CMS EnvelopedData ORI (OtherRecipientInfo) RecipientInfo
+ * to CMS/PKCS#7 EnvelopedData structure.
+ *
+ * Return 0 on success, negative upon error */
+int wc_PKCS7_AddRecipient_ORI(PKCS7* pkcs7, CallbackOriEncrypt oriEncryptCb,
+ int options)
+{
+ int oriTypeLenSz, blockKeySz, ret;
+ word32 idx, recipSeqSz;
+
+ Pkcs7EncodedRecip* recip = NULL;
+ Pkcs7EncodedRecip* lastRecip = NULL;
+
+ byte recipSeq[MAX_SEQ_SZ];
+ byte oriTypeLen[MAX_LENGTH_SZ];
+
+ byte oriType[MAX_ORI_TYPE_SZ];
+ byte oriValue[MAX_ORI_VALUE_SZ];
+ word32 oriTypeSz = MAX_ORI_TYPE_SZ;
+ word32 oriValueSz = MAX_ORI_VALUE_SZ;
+
+ if (pkcs7 == NULL || oriEncryptCb == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* allocate memory for RecipientInfo, KEK, encrypted key */
+ recip = (Pkcs7EncodedRecip*)XMALLOC(sizeof(Pkcs7EncodedRecip),
+ pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (recip == NULL)
+ return MEMORY_E;
+ XMEMSET(recip, 0, sizeof(Pkcs7EncodedRecip));
+
+ /* get key size for content-encryption key based on algorithm */
+ blockKeySz = wc_PKCS7_GetOIDKeySize(pkcs7->encryptOID);
+ if (blockKeySz < 0) {
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return blockKeySz;
+ }
+
+ /* generate random content encryption key, if needed */
+ ret = PKCS7_GenerateContentEncryptionKey(pkcs7, blockKeySz);
+ if (ret < 0) {
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ /* call user callback to encrypt CEK and get oriType and oriValue
+ values back */
+ ret = oriEncryptCb(pkcs7, pkcs7->cek, pkcs7->cekSz, oriType, &oriTypeSz,
+ oriValue, &oriValueSz, pkcs7->oriEncryptCtx);
+ if (ret != 0) {
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ oriTypeLenSz = SetLength(oriTypeSz, oriTypeLen);
+
+ recipSeqSz = SetImplicit(ASN_SEQUENCE, 4, 1 + oriTypeLenSz + oriTypeSz +
+ oriValueSz, recipSeq);
+
+ idx = 0;
+ XMEMCPY(recip->recip + idx, recipSeq, recipSeqSz);
+ idx += recipSeqSz;
+ /* oriType */
+ recip->recip[idx] = ASN_OBJECT_ID;
+ idx += 1;
+ XMEMCPY(recip->recip + idx, oriTypeLen, oriTypeLenSz);
+ idx += oriTypeLenSz;
+ XMEMCPY(recip->recip + idx, oriType, oriTypeSz);
+ idx += oriTypeSz;
+ /* oriValue, input MUST already be ASN.1 encoded */
+ XMEMCPY(recip->recip + idx, oriValue, oriValueSz);
+ idx += oriValueSz;
+
+ /* store recipient size */
+ recip->recipSz = idx;
+ recip->recipType = PKCS7_ORI;
+ recip->recipVersion = 4;
+
+ /* add recipient to recip list */
+ if (pkcs7->recipList == NULL) {
+ pkcs7->recipList = recip;
+ } else {
+ lastRecip = pkcs7->recipList;
+ while (lastRecip->next != NULL) {
+ lastRecip = lastRecip->next;
+ }
+ lastRecip->next = recip;
+ }
+
+ (void)options;
+
+ return idx;
+}
+
+#if !defined(NO_PWDBASED) && !defined(NO_SHA)
+
+
+static int wc_PKCS7_GenerateKEK_PWRI(PKCS7* pkcs7, byte* passwd, word32 pLen,
+ byte* salt, word32 saltSz, int kdfOID,
+ int prfOID, int iterations, byte* out,
+ word32 outSz)
+{
+ int ret;
+
+ if (pkcs7 == NULL || passwd == NULL || salt == NULL || out == NULL)
+ return BAD_FUNC_ARG;
+
+ switch (kdfOID) {
+
+ case PBKDF2_OID:
+
+ ret = wc_PBKDF2(out, passwd, pLen, salt, saltSz, iterations,
+ outSz, prfOID);
+ if (ret != 0) {
+ return ret;
+ }
+
+ break;
+
+ default:
+ WOLFSSL_MSG("Unsupported KDF OID");
+ return PKCS7_OID_E;
+ }
+
+ return 0;
+}
+
+
+/* RFC3211 (Section 2.3.1) key wrap algorithm (id-alg-PWRI-KEK).
+ *
+ * Returns output size on success, negative upon error */
+static int wc_PKCS7_PwriKek_KeyWrap(PKCS7* pkcs7, const byte* kek, word32 kekSz,
+ const byte* cek, word32 cekSz,
+ byte* out, word32 *outSz,
+ const byte* iv, word32 ivSz, int algID)
+{
+ WC_RNG rng;
+ int blockSz, outLen, ret;
+ word32 padSz;
+ byte* lastBlock;
+
+ if (kek == NULL || cek == NULL || iv == NULL || outSz == NULL)
+ return BAD_FUNC_ARG;
+
+ /* get encryption algorithm block size */
+ blockSz = wc_PKCS7_GetOIDBlockSize(algID);
+ if (blockSz < 0)
+ return blockSz;
+
+ /* get pad bytes needed to block boundary */
+ padSz = blockSz - ((4 + cekSz) % blockSz);
+ outLen = 4 + cekSz + padSz;
+
+ /* must be at least two blocks long */
+ if (outLen < 2 * blockSz)
+ padSz += blockSz;
+
+ /* if user set out to NULL, give back required length */
+ if (out == NULL) {
+ *outSz = outLen;
+ return LENGTH_ONLY_E;
+ }
+
+ /* verify output buffer is large enough */
+ if (*outSz < (word32)outLen)
return BUFFER_E;
+
+ out[0] = cekSz;
+ out[1] = ~cek[0];
+ out[2] = ~cek[1];
+ out[3] = ~cek[2];
+ XMEMCPY(out + 4, cek, cekSz);
+
+ /* random padding of size padSz */
+ ret = wc_InitRng_ex(&rng, pkcs7->heap, pkcs7->devId);
+ if (ret != 0)
+ return ret;
+
+ ret = wc_RNG_GenerateBlock(&rng, out + 4 + cekSz, padSz);
+
+ if (ret == 0) {
+ /* encrypt, normal */
+ ret = wc_PKCS7_EncryptContent(algID, (byte*)kek, kekSz, (byte*)iv,
+ ivSz, NULL, 0, NULL, 0, out, outLen, out);
+ }
+
+ if (ret == 0) {
+ /* encrypt again, using last ciphertext block as IV */
+ lastBlock = out + (((outLen / blockSz) - 1) * blockSz);
+ ret = wc_PKCS7_EncryptContent(algID, (byte*)kek, kekSz, lastBlock,
+ blockSz, NULL, 0, NULL, 0, out,
+ outLen, out);
}
- XMEMCPY(out + totalSz, recipSeq, recipSeqSz);
+ if (ret == 0) {
+ *outSz = outLen;
+ } else {
+ outLen = ret;
+ }
+
+ wc_FreeRng(&rng);
+
+ return outLen;
+}
+
+
+/* RFC3211 (Section 2.3.2) key unwrap algorithm (id-alg-PWRI-KEK).
+ *
+ * Returns cek size on success, negative upon error */
+static int wc_PKCS7_PwriKek_KeyUnWrap(PKCS7* pkcs7, const byte* kek,
+ word32 kekSz, const byte* in, word32 inSz,
+ byte* out, word32 outSz, const byte* iv,
+ word32 ivSz, int algID)
+{
+ int blockSz, cekLen, ret;
+ byte* tmpIv = NULL;
+ byte* lastBlock = NULL;
+ byte* outTmp = NULL;
+
+ if (pkcs7 == NULL || kek == NULL || in == NULL ||
+ out == NULL || iv == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ outTmp = (byte*)XMALLOC(inSz, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (outTmp == NULL)
+ return MEMORY_E;
+
+ /* get encryption algorithm block size */
+ blockSz = wc_PKCS7_GetOIDBlockSize(algID);
+ if (blockSz < 0) {
+ XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return blockSz;
+ }
+
+ /* input needs to be blockSz multiple and at least 2 * blockSz */
+ if (((inSz % blockSz) != 0) || (inSz < (2 * (word32)blockSz))) {
+ WOLFSSL_MSG("PWRI-KEK unwrap input must of block size and >= 2 "
+ "times block size");
+ XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return BAD_FUNC_ARG;
+ }
+
+ /* use block out[n-1] as IV to decrypt block out[n] */
+ lastBlock = (byte*)in + inSz - blockSz;
+ tmpIv = lastBlock - blockSz;
+
+ /* decrypt last block */
+ ret = wc_PKCS7_DecryptContent(pkcs7, algID, (byte*)kek, kekSz, tmpIv,
+ blockSz, NULL, 0, NULL, 0, lastBlock, blockSz,
+ outTmp + inSz - blockSz);
+
+ if (ret == 0) {
+ /* using last decrypted block as IV, decrypt [0 ... n-1] blocks */
+ lastBlock = outTmp + inSz - blockSz;
+ ret = wc_PKCS7_DecryptContent(pkcs7, algID, (byte*)kek, kekSz,
+ lastBlock, blockSz, NULL, 0, NULL, 0, (byte*)in, inSz - blockSz,
+ outTmp);
+ }
+
+ if (ret == 0) {
+ /* decrypt using original kek and iv */
+ ret = wc_PKCS7_DecryptContent(pkcs7, algID, (byte*)kek, kekSz,
+ (byte*)iv, ivSz, NULL, 0, NULL, 0, outTmp, inSz, outTmp);
+ }
+
+ if (ret != 0) {
+ ForceZero(outTmp, inSz);
+ XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return ret;
+ }
+
+ cekLen = outTmp[0];
+
+ /* verify length */
+ if ((word32)cekLen > inSz) {
+ ForceZero(outTmp, inSz);
+ XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return BAD_FUNC_ARG;
+ }
+
+ /* verify check bytes */
+ if ((outTmp[1] ^ outTmp[4]) != 0xFF ||
+ (outTmp[2] ^ outTmp[5]) != 0xFF ||
+ (outTmp[3] ^ outTmp[6]) != 0xFF) {
+ ForceZero(outTmp, inSz);
+ XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return BAD_FUNC_ARG;
+ }
+
+ if (outSz < (word32)cekLen) {
+ ForceZero(outTmp, inSz);
+ XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return BUFFER_E;
+ }
+
+ XMEMCPY(out, outTmp + 4, outTmp[0]);
+ ForceZero(outTmp, inSz);
+ XFREE(outTmp, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+ return cekLen;
+}
+
+
+/* Encode and add CMS EnvelopedData PWRI (PasswordRecipientInfo) RecipientInfo
+ * to CMS/PKCS#7 EnvelopedData structure.
+ *
+ * Return 0 on success, negative upon error */
+int wc_PKCS7_AddRecipient_PWRI(PKCS7* pkcs7, byte* passwd, word32 pLen,
+ byte* salt, word32 saltSz, int kdfOID,
+ int hashOID, int iterations, int kekEncryptOID,
+ int options)
+{
+ Pkcs7EncodedRecip* recip = NULL;
+ Pkcs7EncodedRecip* lastRecip = NULL;
+
+ /* PasswordRecipientInfo */
+ byte recipSeq[MAX_SEQ_SZ];
+ byte ver[MAX_VERSION_SZ];
+ word32 recipSeqSz, verSz;
+
+ /* KeyDerivationAlgorithmIdentifier */
+ byte kdfAlgoIdSeq[MAX_SEQ_SZ];
+ byte kdfAlgoId[MAX_OID_SZ];
+ byte kdfParamsSeq[MAX_SEQ_SZ]; /* PBKDF2-params */
+ byte kdfSaltOctetStr[MAX_OCTET_STR_SZ]; /* salt OCTET STRING */
+ byte kdfIterations[MAX_VERSION_SZ];
+ word32 kdfAlgoIdSeqSz, kdfAlgoIdSz;
+ word32 kdfParamsSeqSz, kdfSaltOctetStrSz, kdfIterationsSz;
+ /* OPTIONAL: keyLength, not supported yet */
+ /* OPTIONAL: prf AlgorithIdentifier, not supported yet */
+
+ /* KeyEncryptionAlgorithmIdentifier */
+ byte keyEncAlgoIdSeq[MAX_SEQ_SZ];
+ byte keyEncAlgoId[MAX_OID_SZ]; /* id-alg-PWRI-KEK */
+ byte pwriEncAlgoId[MAX_ALGO_SZ];
+ byte ivOctetString[MAX_OCTET_STR_SZ];
+ word32 keyEncAlgoIdSeqSz, keyEncAlgoIdSz;
+ word32 pwriEncAlgoIdSz, ivOctetStringSz;
+
+ /* EncryptedKey */
+ byte encKeyOctetStr[MAX_OCTET_STR_SZ];
+ word32 encKeyOctetStrSz;
+
+ byte tmpIv[MAX_CONTENT_IV_SIZE];
+ byte* encryptedKey = NULL;
+ byte* kek = NULL;
+
+ int cekKeySz = 0, kekKeySz = 0, kekBlockSz = 0, ret = 0;
+ int encryptOID;
+ word32 idx, totalSz = 0, encryptedKeySz;
+
+ if (pkcs7 == NULL || passwd == NULL || pLen == 0 ||
+ salt == NULL || saltSz == 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* allow user to use different KEK encryption algorithm than used for
+ * main content encryption algorithm, if passed in */
+ if (kekEncryptOID != 0) {
+ encryptOID = kekEncryptOID;
+ } else {
+ encryptOID = pkcs7->encryptOID;
+ }
+
+ /* get content-encryption key size, based on algorithm */
+ cekKeySz = wc_PKCS7_GetOIDKeySize(pkcs7->encryptOID);
+ if (cekKeySz < 0)
+ return cekKeySz;
+
+ /* get KEK encryption key size, based on algorithm */
+ if (encryptOID != pkcs7->encryptOID) {
+ kekKeySz = wc_PKCS7_GetOIDKeySize(encryptOID);
+ } else {
+ kekKeySz = cekKeySz;
+ }
+
+ /* get KEK encryption block size */
+ kekBlockSz = wc_PKCS7_GetOIDBlockSize(encryptOID);
+ if (kekBlockSz < 0)
+ return kekBlockSz;
+
+ /* generate random CEK */
+ ret = PKCS7_GenerateContentEncryptionKey(pkcs7, cekKeySz);
+ if (ret < 0)
+ return ret;
+
+ /* generate random IV */
+ ret = wc_PKCS7_GenerateBlock(pkcs7, NULL, tmpIv, kekBlockSz);
+ if (ret != 0)
+ return ret;
+
+ /* allocate memory for RecipientInfo, KEK, encrypted key */
+ recip = (Pkcs7EncodedRecip*)XMALLOC(sizeof(Pkcs7EncodedRecip),
+ pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (recip == NULL)
+ return MEMORY_E;
+
+ kek = (byte*)XMALLOC(kekKeySz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (kek == NULL) {
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return MEMORY_E;
+ }
+
+ encryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ,
+ pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (encryptedKey == NULL) {
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return MEMORY_E;
+ }
+
+ encryptedKeySz = MAX_ENCRYPTED_KEY_SZ;
+ XMEMSET(recip, 0, sizeof(Pkcs7EncodedRecip));
+ XMEMSET(kek, 0, kekKeySz);
+ XMEMSET(encryptedKey, 0, encryptedKeySz);
+
+ /* generate KEK: expand password into KEK */
+ ret = wc_PKCS7_GenerateKEK_PWRI(pkcs7, passwd, pLen, salt, saltSz,
+ kdfOID, hashOID, iterations, kek,
+ kekKeySz);
+ if (ret < 0) {
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ /* generate encrypted key: encrypt CEK with KEK */
+ ret = wc_PKCS7_PwriKek_KeyWrap(pkcs7, kek, kekKeySz, pkcs7->cek,
+ pkcs7->cekSz, encryptedKey, &encryptedKeySz,
+ tmpIv, kekBlockSz, encryptOID);
+ if (ret < 0) {
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+ encryptedKeySz = ret;
+
+ /* put together encrypted key OCTET STRING */
+ encKeyOctetStrSz = SetOctetString(encryptedKeySz, encKeyOctetStr);
+ totalSz += (encKeyOctetStrSz + encryptedKeySz);
+
+ /* put together IV OCTET STRING */
+ ivOctetStringSz = SetOctetString(kekBlockSz, ivOctetString);
+ totalSz += (ivOctetStringSz + kekBlockSz);
+
+ /* set PWRIAlgorithms AlgorithmIdentifier, adding (ivOctetStringSz +
+ blockKeySz) for IV OCTET STRING */
+ pwriEncAlgoIdSz = SetAlgoID(encryptOID, pwriEncAlgoId,
+ oidBlkType, ivOctetStringSz + kekBlockSz);
+ totalSz += pwriEncAlgoIdSz;
+
+ /* set KeyEncryptionAlgorithms OID */
+ ret = wc_SetContentType(PWRI_KEK_WRAP, keyEncAlgoId, sizeof(keyEncAlgoId));
+ if (ret <= 0) {
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+ keyEncAlgoIdSz = ret;
+ totalSz += keyEncAlgoIdSz;
+
+ /* KeyEncryptionAlgorithm SEQ */
+ keyEncAlgoIdSeqSz = SetSequence(keyEncAlgoIdSz + pwriEncAlgoIdSz +
+ ivOctetStringSz + kekBlockSz,
+ keyEncAlgoIdSeq);
+ totalSz += keyEncAlgoIdSeqSz;
+
+ /* set KDF salt */
+ kdfSaltOctetStrSz = SetOctetString(saltSz, kdfSaltOctetStr);
+ totalSz += (kdfSaltOctetStrSz + saltSz);
+
+ /* set KDF iteration count */
+ kdfIterationsSz = SetMyVersion(iterations, kdfIterations, 0);
+ totalSz += kdfIterationsSz;
+
+ /* set KDF params SEQ */
+ kdfParamsSeqSz = SetSequence(kdfSaltOctetStrSz + saltSz + kdfIterationsSz,
+ kdfParamsSeq);
+ totalSz += kdfParamsSeqSz;
+
+ /* set KDF algo OID */
+ ret = wc_SetContentType(kdfOID, kdfAlgoId, sizeof(kdfAlgoId));
+ if (ret <= 0) {
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+ kdfAlgoIdSz = ret;
+ totalSz += kdfAlgoIdSz;
+
+ /* set KeyDerivationAlgorithmIdentifier EXPLICIT [0] SEQ */
+ kdfAlgoIdSeqSz = SetExplicit(0, kdfAlgoIdSz + kdfParamsSeqSz +
+ kdfSaltOctetStrSz + saltSz + kdfIterationsSz,
+ kdfAlgoIdSeq);
+ totalSz += kdfAlgoIdSeqSz;
+
+ /* set PasswordRecipientInfo CMSVersion, MUST be 0 */
+ verSz = SetMyVersion(0, ver, 0);
+ totalSz += verSz;
+ recip->recipVersion = 0;
+
+ /* set PasswordRecipientInfo SEQ */
+ recipSeqSz = SetImplicit(ASN_SEQUENCE, 3, totalSz, recipSeq);
totalSz += recipSeqSz;
- XMEMCPY(out + totalSz, ver, verSz);
+
+ if (totalSz > MAX_RECIP_SZ) {
+ WOLFSSL_MSG("CMS Recipient output buffer too small");
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return BUFFER_E;
+ }
+
+ idx = 0;
+ XMEMCPY(recip->recip + idx, recipSeq, recipSeqSz);
+ idx += recipSeqSz;
+ XMEMCPY(recip->recip + idx, ver, verSz);
+ idx += verSz;
+ XMEMCPY(recip->recip + idx, kdfAlgoIdSeq, kdfAlgoIdSeqSz);
+ idx += kdfAlgoIdSeqSz;
+ XMEMCPY(recip->recip + idx, kdfAlgoId, kdfAlgoIdSz);
+ idx += kdfAlgoIdSz;
+ XMEMCPY(recip->recip + idx, kdfParamsSeq, kdfParamsSeqSz);
+ idx += kdfParamsSeqSz;
+ XMEMCPY(recip->recip + idx, kdfSaltOctetStr, kdfSaltOctetStrSz);
+ idx += kdfSaltOctetStrSz;
+ XMEMCPY(recip->recip + idx, salt, saltSz);
+ idx += saltSz;
+ XMEMCPY(recip->recip + idx, kdfIterations, kdfIterationsSz);
+ idx += kdfIterationsSz;
+ XMEMCPY(recip->recip + idx, keyEncAlgoIdSeq, keyEncAlgoIdSeqSz);
+ idx += keyEncAlgoIdSeqSz;
+ XMEMCPY(recip->recip + idx, keyEncAlgoId, keyEncAlgoIdSz);
+ idx += keyEncAlgoIdSz;
+ XMEMCPY(recip->recip + idx, pwriEncAlgoId, pwriEncAlgoIdSz);
+ idx += pwriEncAlgoIdSz;
+ XMEMCPY(recip->recip + idx, ivOctetString, ivOctetStringSz);
+ idx += ivOctetStringSz;
+ XMEMCPY(recip->recip + idx, tmpIv, kekBlockSz);
+ idx += kekBlockSz;
+ XMEMCPY(recip->recip + idx, encKeyOctetStr, encKeyOctetStrSz);
+ idx += encKeyOctetStrSz;
+ XMEMCPY(recip->recip + idx, encryptedKey, encryptedKeySz);
+ idx += encryptedKeySz;
+
+ ForceZero(kek, kekBlockSz);
+ ForceZero(encryptedKey, encryptedKeySz);
+ XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+
+ /* store recipient size */
+ recip->recipSz = idx;
+ recip->recipType = PKCS7_PWRI;
+
+ /* add recipient to recip list */
+ if (pkcs7->recipList == NULL) {
+ pkcs7->recipList = recip;
+ } else {
+ lastRecip = pkcs7->recipList;
+ while (lastRecip->next != NULL) {
+ lastRecip = lastRecip->next;
+ }
+ lastRecip->next = recip;
+ }
+
+ (void)options;
+
+ return idx;
+}
+
+/* Import password and KDF settings into a PKCS7 structure. Used for setting
+ * the password info for decryption a EnvelopedData PWRI RecipientInfo.
+ *
+ * Returns 0 on success, negative upon error */
+int wc_PKCS7_SetPassword(PKCS7* pkcs7, byte* passwd, word32 pLen)
+{
+ if (pkcs7 == NULL || passwd == NULL || pLen == 0)
+ return BAD_FUNC_ARG;
+
+ pkcs7->pass = passwd;
+ pkcs7->passSz = pLen;
+
+ return 0;
+}
+
+#endif /* NO_PWDBASED */
+
+
+/* Encode and add CMS EnvelopedData KEKRI (KEKRecipientInfo) RecipientInfo
+ * to CMS/PKCS#7 EnvelopedData structure.
+ *
+ * pkcs7 - pointer to initialized PKCS7 structure
+ * keyWrapOID - OID sum of key wrap algorithm identifier
+ * kek - key encryption key
+ * kekSz - size of kek, bytes
+ * keyID - key-encryption key identifier, pre-distributed to endpoints
+ * keyIDSz - size of keyID, bytes
+ * timePtr - pointer to "time_t", which is typically "long" (OPTIONAL)
+ * otherOID - ASN.1 encoded OID of other attribute (OPTIONAL)
+ * otherOIDSz - size of otherOID, bytes (OPTIONAL)
+ * other - other attribute (OPTIONAL)
+ * otherSz - size of other (OPTIONAL)
+ *
+ * Returns 0 on success, negative upon error */
+int wc_PKCS7_AddRecipient_KEKRI(PKCS7* pkcs7, int keyWrapOID, byte* kek,
+ word32 kekSz, byte* keyId, word32 keyIdSz,
+ void* timePtr, byte* otherOID,
+ word32 otherOIDSz, byte* other, word32 otherSz,
+ int options)
+{
+ Pkcs7EncodedRecip* recip = NULL;
+ Pkcs7EncodedRecip* lastRecip = NULL;
+
+ byte recipSeq[MAX_SEQ_SZ];
+ byte ver[MAX_VERSION_SZ];
+ byte kekIdSeq[MAX_SEQ_SZ];
+ byte kekIdOctetStr[MAX_OCTET_STR_SZ];
+ byte genTime[ASN_GENERALIZED_TIME_SIZE];
+ byte otherAttSeq[MAX_SEQ_SZ];
+ byte encAlgoId[MAX_ALGO_SZ];
+ byte encKeyOctetStr[MAX_OCTET_STR_SZ];
+#ifdef WOLFSSL_SMALL_STACK
+ byte* encryptedKey;
+#else
+ byte encryptedKey[MAX_ENCRYPTED_KEY_SZ];
+#endif
+
+ int blockKeySz = 0, ret = 0, direction;
+ word32 idx = 0;
+ word32 totalSz = 0;
+ word32 recipSeqSz = 0, verSz = 0;
+ word32 kekIdSeqSz = 0, kekIdOctetStrSz = 0;
+ word32 otherAttSeqSz = 0, encAlgoIdSz = 0, encKeyOctetStrSz = 0;
+ int encryptedKeySz;
+
+ int timeSz = 0;
+#ifndef NO_ASN_TIME
+ time_t* tm = NULL;
+#endif
+
+ if (pkcs7 == NULL || kek == NULL || keyId == NULL)
+ return BAD_FUNC_ARG;
+
+ recip = (Pkcs7EncodedRecip*)XMALLOC(sizeof(Pkcs7EncodedRecip), pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (recip == NULL)
+ return MEMORY_E;
+
+ XMEMSET(recip, 0, sizeof(Pkcs7EncodedRecip));
+
+ /* get key size for content-encryption key based on algorithm */
+ blockKeySz = wc_PKCS7_GetOIDKeySize(pkcs7->encryptOID);
+ if (blockKeySz < 0) {
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return blockKeySz;
+ }
+
+ /* generate random content encryption key, if needed */
+ ret = PKCS7_GenerateContentEncryptionKey(pkcs7, blockKeySz);
+ if (ret < 0) {
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ /* EncryptedKey */
+#ifdef WOLFSSL_SMALL_STACK
+ encryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (encryptedKey == NULL) {
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return MEMORY_E;
+ }
+#endif
+ encryptedKeySz = MAX_ENCRYPTED_KEY_SZ;
+ XMEMSET(encryptedKey, 0, encryptedKeySz);
+
+ #ifndef NO_AES
+ direction = AES_ENCRYPTION;
+ #else
+ direction = DES_ENCRYPTION;
+ #endif
+
+ encryptedKeySz = wc_PKCS7_KeyWrap(pkcs7->cek, pkcs7->cekSz, kek, kekSz,
+ encryptedKey, encryptedKeySz, keyWrapOID,
+ direction);
+ if (encryptedKeySz < 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ #endif
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return encryptedKeySz;
+ }
+ /* handle a zero size encKey case as WC_KEY_SIZE_E */
+ if (encryptedKeySz == 0 || encryptedKeySz > MAX_ENCRYPTED_KEY_SZ) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ #endif
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return WC_KEY_SIZE_E;
+ }
+
+ encKeyOctetStrSz = SetOctetString(encryptedKeySz, encKeyOctetStr);
+ totalSz += (encKeyOctetStrSz + encryptedKeySz);
+
+ /* KeyEncryptionAlgorithmIdentifier */
+ encAlgoIdSz = SetAlgoID(keyWrapOID, encAlgoId, oidKeyWrapType, 0);
+ totalSz += encAlgoIdSz;
+
+ /* KEKIdentifier: keyIdentifier */
+ kekIdOctetStrSz = SetOctetString(keyIdSz, kekIdOctetStr);
+ totalSz += (kekIdOctetStrSz + keyIdSz);
+
+ /* KEKIdentifier: GeneralizedTime (OPTIONAL) */
+#ifndef NO_ASN_TIME
+ if (timePtr != NULL) {
+ tm = (time_t*)timePtr;
+ timeSz = GetAsnTimeString(tm, genTime, sizeof(genTime));
+ if (timeSz < 0) {
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ #endif
+ return timeSz;
+ }
+ totalSz += timeSz;
+ }
+#endif
+
+ /* KEKIdentifier: OtherKeyAttribute SEQ (OPTIONAL) */
+ if (other != NULL && otherSz > 0) {
+ otherAttSeqSz = SetSequence(otherOIDSz + otherSz, otherAttSeq);
+ totalSz += otherAttSeqSz + otherOIDSz + otherSz;
+ }
+
+ /* KEKIdentifier SEQ */
+ kekIdSeqSz = SetSequence(kekIdOctetStrSz + keyIdSz + timeSz +
+ otherAttSeqSz + otherOIDSz + otherSz, kekIdSeq);
+ totalSz += kekIdSeqSz;
+
+ /* version */
+ verSz = SetMyVersion(4, ver, 0);
totalSz += verSz;
- XMEMCPY(out + totalSz, issuerSerialSeq, issuerSerialSeqSz);
- totalSz += issuerSerialSeqSz;
- XMEMCPY(out + totalSz, issuerSeq, issuerSeqSz);
- totalSz += issuerSeqSz;
- XMEMCPY(out + totalSz, decoded->issuerRaw, issuerSz);
- totalSz += issuerSz;
- XMEMCPY(out + totalSz, serial, snSz);
- totalSz += snSz;
- XMEMCPY(out + totalSz, keyAlgArray, keyEncAlgSz);
- totalSz += keyEncAlgSz;
- XMEMCPY(out + totalSz, encKeyOctetStr, encKeyOctetStrSz);
- totalSz += encKeyOctetStrSz;
- XMEMCPY(out + totalSz, contentKeyEnc, *keyEncSz);
- totalSz += *keyEncSz;
+ recip->recipVersion = 4;
- FreeDecodedCert(decoded);
+ /* KEKRecipientInfo SEQ */
+ recipSeqSz = SetImplicit(ASN_SEQUENCE, 2, totalSz, recipSeq);
+ totalSz += recipSeqSz;
+
+ if (totalSz > MAX_RECIP_SZ) {
+ WOLFSSL_MSG("CMS Recipient output buffer too small");
+ XFREE(recip, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ #endif
+ return BUFFER_E;
+ }
+
+ XMEMCPY(recip->recip + idx, recipSeq, recipSeqSz);
+ idx += recipSeqSz;
+ XMEMCPY(recip->recip + idx, ver, verSz);
+ idx += verSz;
+ XMEMCPY(recip->recip + idx, kekIdSeq, kekIdSeqSz);
+ idx += kekIdSeqSz;
+ XMEMCPY(recip->recip + idx, kekIdOctetStr, kekIdOctetStrSz);
+ idx += kekIdOctetStrSz;
+ XMEMCPY(recip->recip + idx, keyId, keyIdSz);
+ idx += keyIdSz;
+ if (timePtr != NULL) {
+ XMEMCPY(recip->recip + idx, genTime, timeSz);
+ idx += timeSz;
+ }
+ if (other != NULL && otherSz > 0) {
+ XMEMCPY(recip->recip + idx, otherAttSeq, otherAttSeqSz);
+ idx += otherAttSeqSz;
+ XMEMCPY(recip->recip + idx, otherOID, otherOIDSz);
+ idx += otherOIDSz;
+ XMEMCPY(recip->recip + idx, other, otherSz);
+ idx += otherSz;
+ }
+ XMEMCPY(recip->recip + idx, encAlgoId, encAlgoIdSz);
+ idx += encAlgoIdSz;
+ XMEMCPY(recip->recip + idx, encKeyOctetStr, encKeyOctetStrSz);
+ idx += encKeyOctetStrSz;
+ XMEMCPY(recip->recip + idx, encryptedKey, encryptedKeySz);
+ idx += encryptedKeySz;
#ifdef WOLFSSL_SMALL_STACK
- XFREE(serial, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(keyAlgArray, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(decoded, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
#endif
- return totalSz;
+ /* store recipient size */
+ recip->recipSz = idx;
+ recip->recipType = PKCS7_KEKRI;
+
+ /* add recipient to recip list */
+ if (pkcs7->recipList == NULL) {
+ pkcs7->recipList = recip;
+ } else {
+ lastRecip = pkcs7->recipList;
+ while(lastRecip->next != NULL) {
+ lastRecip = lastRecip->next;
+ }
+ lastRecip->next = recip;
+ }
+
+ (void)options;
+
+ return idx;
+}
+
+
+static int wc_PKCS7_GetCMSVersion(PKCS7* pkcs7, int cmsContentType)
+{
+ int version = -1;
+
+ if (pkcs7 == NULL)
+ return BAD_FUNC_ARG;
+
+ switch (cmsContentType) {
+ case ENVELOPED_DATA:
+
+ /* NOTE: EnvelopedData does not currently support
+ originatorInfo or unprotectedAttributes. When either of these
+ are added, version checking below needs to be updated to match
+ Section 6.1 of RFC 5652 */
+
+ /* if RecipientInfos include pwri or ori, version is 3 */
+ if (wc_PKCS7_RecipientListIncludesType(pkcs7, PKCS7_PWRI) ||
+ wc_PKCS7_RecipientListIncludesType(pkcs7, PKCS7_ORI)) {
+ version = 3;
+ break;
+ }
+
+ /* if unprotectedAttrs is absent AND all RecipientInfo structs
+ are version 0, version is 0 */
+ if (wc_PKCS7_RecipientListVersionsAllZero(pkcs7)) {
+ version = 0;
+ break;
+ }
+
+ /* otherwise, version is 2 */
+ version = 2;
+ break;
+
+ default:
+ break;
+ }
+
+ return version;
}
/* build PKCS#7 envelopedData content type, return enveloped size */
int wc_PKCS7_EncodeEnvelopedData(PKCS7* pkcs7, byte* output, word32 outputSz)
{
- int i, ret = 0, idx = 0;
- int totalSz = 0, padSz = 0, desOutSz = 0;
+ int ret, idx = 0;
+ int totalSz, padSz, encryptedOutSz;
- int contentInfoSeqSz, outerContentTypeSz, outerContentSz;
+ int contentInfoSeqSz = 0, outerContentTypeSz = 0, outerContentSz;
byte contentInfoSeq[MAX_SEQ_SZ];
byte outerContentType[MAX_ALGO_SZ];
byte outerContent[MAX_SEQ_SZ];
+ int kariVersion;
int envDataSeqSz, verSz;
byte envDataSeq[MAX_SEQ_SZ];
byte ver[MAX_VERSION_SZ];
- RNG rng;
- int contentKeyEncSz, blockKeySz;
- int dynamicFlag = 0;
- byte contentKeyPlain[MAX_CONTENT_KEY_LEN];
-#ifdef WOLFSSL_SMALL_STACK
- byte* contentKeyEnc;
-#else
- byte contentKeyEnc[MAX_ENCRYPTED_KEY_SZ];
-#endif
+ WC_RNG rng;
+ int blockSz, blockKeySz;
byte* plain;
byte* encryptedContent;
+ Pkcs7EncodedRecip* tmpRecip = NULL;
int recipSz, recipSetSz;
-#ifdef WOLFSSL_SMALL_STACK
- byte* recip;
-#else
- byte recip[MAX_RECIP_SZ];
-#endif
byte recipSet[MAX_SET_SZ];
int encContentOctetSz, encContentSeqSz, contentTypeSz;
@@ -1203,204 +7717,3109 @@ int wc_PKCS7_EncodeEnvelopedData(PKCS7* pkcs7, byte* output, word32 outputSz)
byte encContentSeq[MAX_SEQ_SZ];
byte contentType[MAX_ALGO_SZ];
byte contentEncAlgo[MAX_ALGO_SZ];
- byte tmpIv[DES_BLOCK_SIZE];
+ byte tmpIv[MAX_CONTENT_IV_SIZE];
byte ivOctetString[MAX_OCTET_STR_SZ];
byte encContentOctet[MAX_OCTET_STR_SZ];
- if (pkcs7 == NULL || pkcs7->content == NULL || pkcs7->contentSz == 0 ||
- pkcs7->encryptOID == 0 || pkcs7->singleCert == NULL)
+ if (pkcs7 == NULL || pkcs7->content == NULL || pkcs7->contentSz == 0)
return BAD_FUNC_ARG;
if (output == NULL || outputSz == 0)
return BAD_FUNC_ARG;
- /* PKCS#7 only supports DES, 3DES for now */
- switch (pkcs7->encryptOID) {
- case DESb:
- blockKeySz = DES_KEYLEN;
- break;
+ blockKeySz = wc_PKCS7_GetOIDKeySize(pkcs7->encryptOID);
+ if (blockKeySz < 0)
+ return blockKeySz;
- case DES3b:
- blockKeySz = DES3_KEYLEN;
+ blockSz = wc_PKCS7_GetOIDBlockSize(pkcs7->encryptOID);
+ if (blockSz < 0)
+ return blockSz;
+
+ if (pkcs7->contentOID != FIRMWARE_PKG_DATA) {
+ /* outer content type */
+ ret = wc_SetContentType(ENVELOPED_DATA, outerContentType,
+ sizeof(outerContentType));
+ if (ret < 0)
+ return ret;
+
+ outerContentTypeSz = ret;
+ }
+
+ /* generate random content encryption key */
+ ret = PKCS7_GenerateContentEncryptionKey(pkcs7, blockKeySz);
+ if (ret != 0) {
+ return ret;
+ }
+
+ /* build RecipientInfo, only if user manually set singleCert and size */
+ if (pkcs7->singleCert != NULL && pkcs7->singleCertSz > 0) {
+ switch (pkcs7->publicKeyOID) {
+ #ifndef NO_RSA
+ case RSAk:
+ ret = wc_PKCS7_AddRecipient_KTRI(pkcs7, pkcs7->singleCert,
+ pkcs7->singleCertSz, 0);
+ break;
+ #endif
+ #ifdef HAVE_ECC
+ case ECDSAk:
+ ret = wc_PKCS7_AddRecipient_KARI(pkcs7, pkcs7->singleCert,
+ pkcs7->singleCertSz,
+ pkcs7->keyWrapOID,
+ pkcs7->keyAgreeOID, pkcs7->ukm,
+ pkcs7->ukmSz, 0);
+ break;
+ #endif
+
+ default:
+ WOLFSSL_MSG("Unsupported RecipientInfo public key type");
+ return BAD_FUNC_ARG;
+ };
+
+ if (ret < 0) {
+ WOLFSSL_MSG("Failed to create RecipientInfo");
+ return ret;
+ }
+ }
+
+ recipSz = wc_PKCS7_GetRecipientListSize(pkcs7);
+ if (recipSz < 0) {
+ return ret;
+
+ } else if (recipSz == 0) {
+ WOLFSSL_MSG("You must add at least one CMS recipient");
+ return PKCS7_RECIP_E;
+ }
+ recipSetSz = SetSet(recipSz, recipSet);
+
+ /* version, defined in Section 6.1 of RFC 5652 */
+ kariVersion = wc_PKCS7_GetCMSVersion(pkcs7, ENVELOPED_DATA);
+ if (kariVersion < 0) {
+ WOLFSSL_MSG("Failed to set CMS EnvelopedData version");
+ return PKCS7_RECIP_E;
+ }
+
+ verSz = SetMyVersion(kariVersion, ver, 0);
+
+ ret = wc_InitRng_ex(&rng, pkcs7->heap, pkcs7->devId);
+ if (ret != 0)
+ return ret;
+
+ /* generate IV for block cipher */
+ ret = wc_PKCS7_GenerateBlock(pkcs7, &rng, tmpIv, blockSz);
+ wc_FreeRng(&rng);
+ if (ret != 0)
+ return ret;
+
+ /* EncryptedContentInfo */
+ ret = wc_SetContentType(pkcs7->contentOID, contentType,
+ sizeof(contentType));
+ if (ret < 0)
+ return ret;
+
+ contentTypeSz = ret;
+
+ /* allocate encrypted content buffer and PKCS#7 padding */
+ padSz = wc_PKCS7_GetPadSize(pkcs7->contentSz, blockSz);
+ if (padSz < 0)
+ return padSz;
+
+ encryptedOutSz = pkcs7->contentSz + padSz;
+
+ plain = (byte*)XMALLOC(encryptedOutSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (plain == NULL)
+ return MEMORY_E;
+
+ ret = wc_PKCS7_PadData(pkcs7->content, pkcs7->contentSz, plain,
+ encryptedOutSz, blockSz);
+ if (ret < 0) {
+ XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ encryptedContent = (byte*)XMALLOC(encryptedOutSz, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (encryptedContent == NULL) {
+ XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return MEMORY_E;
+ }
+
+ /* put together IV OCTET STRING */
+ ivOctetStringSz = SetOctetString(blockSz, ivOctetString);
+
+ /* build up our ContentEncryptionAlgorithmIdentifier sequence,
+ * adding (ivOctetStringSz + blockSz) for IV OCTET STRING */
+ contentEncAlgoSz = SetAlgoID(pkcs7->encryptOID, contentEncAlgo,
+ oidBlkType, ivOctetStringSz + blockSz);
+
+ if (contentEncAlgoSz == 0) {
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return BAD_FUNC_ARG;
+ }
+
+ /* encrypt content */
+ ret = wc_PKCS7_EncryptContent(pkcs7->encryptOID, pkcs7->cek,
+ pkcs7->cekSz, tmpIv, blockSz, NULL, 0, NULL, 0, plain,
+ encryptedOutSz, encryptedContent);
+
+ if (ret != 0) {
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ encContentOctetSz = SetImplicit(ASN_OCTET_STRING, 0, encryptedOutSz,
+ encContentOctet);
+
+ encContentSeqSz = SetSequence(contentTypeSz + contentEncAlgoSz +
+ ivOctetStringSz + blockSz +
+ encContentOctetSz + encryptedOutSz,
+ encContentSeq);
+
+ /* keep track of sizes for outer wrapper layering */
+ totalSz = verSz + recipSetSz + recipSz + encContentSeqSz + contentTypeSz +
+ contentEncAlgoSz + ivOctetStringSz + blockSz +
+ encContentOctetSz + encryptedOutSz;
+
+ /* EnvelopedData */
+ envDataSeqSz = SetSequence(totalSz, envDataSeq);
+ totalSz += envDataSeqSz;
+
+ /* outer content */
+ outerContentSz = SetExplicit(0, totalSz, outerContent);
+ totalSz += outerContentTypeSz;
+ totalSz += outerContentSz;
+
+ if (pkcs7->contentOID != FIRMWARE_PKG_DATA) {
+ /* ContentInfo */
+ contentInfoSeqSz = SetSequence(totalSz, contentInfoSeq);
+ totalSz += contentInfoSeqSz;
+ }
+
+ if (totalSz > (int)outputSz) {
+ WOLFSSL_MSG("Pkcs7_encrypt output buffer too small");
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return BUFFER_E;
+ }
+
+ if (pkcs7->contentOID != FIRMWARE_PKG_DATA) {
+ XMEMCPY(output + idx, contentInfoSeq, contentInfoSeqSz);
+ idx += contentInfoSeqSz;
+ XMEMCPY(output + idx, outerContentType, outerContentTypeSz);
+ idx += outerContentTypeSz;
+ XMEMCPY(output + idx, outerContent, outerContentSz);
+ idx += outerContentSz;
+ }
+ XMEMCPY(output + idx, envDataSeq, envDataSeqSz);
+ idx += envDataSeqSz;
+ XMEMCPY(output + idx, ver, verSz);
+ idx += verSz;
+ XMEMCPY(output + idx, recipSet, recipSetSz);
+ idx += recipSetSz;
+ /* copy in recipients from list */
+ tmpRecip = pkcs7->recipList;
+ while (tmpRecip != NULL) {
+ XMEMCPY(output + idx, tmpRecip->recip, tmpRecip->recipSz);
+ idx += tmpRecip->recipSz;
+ tmpRecip = tmpRecip->next;
+ }
+ wc_PKCS7_FreeEncodedRecipientSet(pkcs7);
+ XMEMCPY(output + idx, encContentSeq, encContentSeqSz);
+ idx += encContentSeqSz;
+ XMEMCPY(output + idx, contentType, contentTypeSz);
+ idx += contentTypeSz;
+ XMEMCPY(output + idx, contentEncAlgo, contentEncAlgoSz);
+ idx += contentEncAlgoSz;
+ XMEMCPY(output + idx, ivOctetString, ivOctetStringSz);
+ idx += ivOctetStringSz;
+ XMEMCPY(output + idx, tmpIv, blockSz);
+ idx += blockSz;
+ XMEMCPY(output + idx, encContentOctet, encContentOctetSz);
+ idx += encContentOctetSz;
+ XMEMCPY(output + idx, encryptedContent, encryptedOutSz);
+ idx += encryptedOutSz;
+
+ XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+
+ return idx;
+}
+
+#ifndef NO_RSA
+/* decode KeyTransRecipientInfo (ktri), return 0 on success, <0 on error */
+static int wc_PKCS7_DecryptKtri(PKCS7* pkcs7, byte* in, word32 inSz,
+ word32* idx, byte* decryptedKey,
+ word32* decryptedKeySz, int* recipFound)
+{
+ int length, encryptedKeySz = 0, ret = 0;
+ int keySz, version, sidType = 0;
+ word32 encOID;
+ word32 keyIdx;
+ byte issuerHash[KEYID_SIZE];
+ byte* outKey = NULL;
+ byte* pkiMsg = in;
+ word32 pkiMsgSz = inSz;
+ byte tag;
+
+
+#ifndef NO_PKCS7_STREAM
+ word32 tmpIdx = *idx;
+ long rc;
+#endif
+#ifdef WC_RSA_BLINDING
+ WC_RNG rng;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int* serialNum = NULL;
+ byte* encryptedKey = NULL;
+ RsaKey* privKey = NULL;
+#else
+ mp_int serialNum[1];
+ byte encryptedKey[MAX_ENCRYPTED_KEY_SZ];
+ RsaKey privKey[1];
+#endif
+
+ switch (pkcs7->state) {
+ case WC_PKCS7_DECRYPT_KTRI:
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_VERSION_SZ,
+ &pkiMsg, idx)) != 0) {
+ return ret;
+ }
+
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK,
+ in, inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
+
+ #endif
+ if (GetMyVersion(pkiMsg, idx, &version, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (version == 0) {
+ sidType = CMS_ISSUER_AND_SERIAL_NUMBER;
+ } else if (version == 2) {
+ sidType = CMS_SKID;
+ } else {
+ return ASN_VERSION_E;
+ }
+
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+ break;
+ }
+ wc_PKCS7_StreamStoreVar(pkcs7, 0, sidType, version);
+
+ /* @TODO getting total amount left because of GetInt call later on
+ * this could be optimized to stream better */
+ pkcs7->stream->expected = (pkcs7->stream->maxLen -
+ pkcs7->stream->totalRd) + pkcs7->stream->length;
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_KTRI_2);
+ FALL_THROUGH;
+
+ case WC_PKCS7_DECRYPT_KTRI_2:
+ #ifndef NO_PKCS7_STREAM
+
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, pkcs7->stream->expected,
+ &pkiMsg, idx)) != 0) {
+ return ret;
+ }
+
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK,
+ in, inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
+
+ wc_PKCS7_StreamGetVar(pkcs7, NULL, &sidType, &version);
+
+ /* @TODO get expected size for next part, does not account for
+ * GetInt call well */
+ if (pkcs7->stream->expected == MAX_SEQ_SZ) {
+ int sz;
+ word32 lidx;
+
+ if (sidType == CMS_ISSUER_AND_SERIAL_NUMBER) {
+ lidx = *idx;
+ ret = GetSequence(pkiMsg, &lidx, &sz, pkiMsgSz);
+ if (ret < 0)
+ return ret;
+ }
+ else {
+ lidx = *idx + ASN_TAG_SZ;
+ ret = GetLength(pkiMsg, &lidx, &sz, pkiMsgSz);
+ if (ret < 0)
+ return ret;
+ }
+
+ pkcs7->stream->expected = sz + MAX_ALGO_SZ + ASN_TAG_SZ +
+ MAX_LENGTH_SZ;
+ if (pkcs7->stream->length > 0 &&
+ pkcs7->stream->length < pkcs7->stream->expected) {
+ return WC_PKCS7_WANT_READ_E;
+ }
+ }
+ #endif /* !NO_PKCS7_STREAM */
+
+ if (sidType == CMS_ISSUER_AND_SERIAL_NUMBER) {
+
+ /* remove IssuerAndSerialNumber */
+ if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (GetNameHash(pkiMsg, idx, issuerHash, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ /* if we found correct recipient, issuer hashes will match */
+ if (XMEMCMP(issuerHash, pkcs7->issuerHash, KEYID_SIZE) == 0) {
+ *recipFound = 1;
+ }
+
+ #ifdef WOLFSSL_SMALL_STACK
+ serialNum = (mp_int*)XMALLOC(sizeof(mp_int), pkcs7->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (serialNum == NULL)
+ return MEMORY_E;
+ #endif
+
+ if (GetInt(serialNum, pkiMsg, idx, pkiMsgSz) < 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(serialNum, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return ASN_PARSE_E;
+ }
+
+ mp_clear(serialNum);
+
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(serialNum, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+
+ } else {
+ /* remove SubjectKeyIdentifier */
+ if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC))
+ return ASN_PARSE_E;
+
+ if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (tag != ASN_OCTET_STRING)
+ return ASN_PARSE_E;
+
+ if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ /* if we found correct recipient, SKID will match */
+ if (XMEMCMP(pkiMsg + (*idx), pkcs7->issuerSubjKeyId,
+ KEYID_SIZE) == 0) {
+ *recipFound = 1;
+ }
+ (*idx) += KEYID_SIZE;
+ }
+
+ if (GetAlgoId(pkiMsg, idx, &encOID, oidKeyType, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ /* key encryption algorithm must be RSA for now */
+ if (encOID != RSAk)
+ return ALGO_ID_E;
+
+ /* read encryptedKey */
+ if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (tag != ASN_OCTET_STRING)
+ return ASN_PARSE_E;
+
+ if (GetLength(pkiMsg, idx, &encryptedKeySz, pkiMsgSz) < 0) {
+ return ASN_PARSE_E;
+ }
+ if (encryptedKeySz > MAX_ENCRYPTED_KEY_SZ) {
+ return BUFFER_E;
+ }
+
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+ break;
+ }
+ wc_PKCS7_StreamStoreVar(pkcs7, encryptedKeySz, sidType, version);
+ pkcs7->stream->expected = encryptedKeySz;
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_KTRI_3);
+ FALL_THROUGH;
+
+ case WC_PKCS7_DECRYPT_KTRI_3:
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+ pkcs7->stream->expected, &pkiMsg, idx)) != 0) {
+ return ret;
+ }
+ encryptedKeySz = pkcs7->stream->expected;
+ #endif
+
+ #ifdef WOLFSSL_SMALL_STACK
+ encryptedKey = (byte*)XMALLOC(encryptedKeySz, pkcs7->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (encryptedKey == NULL)
+ return MEMORY_E;
+ #endif
+
+ if (*recipFound == 1)
+ XMEMCPY(encryptedKey, &pkiMsg[*idx], encryptedKeySz);
+ *idx += encryptedKeySz;
+
+ /* load private key */
+ #ifdef WOLFSSL_SMALL_STACK
+ privKey = (RsaKey*)XMALLOC(sizeof(RsaKey), pkcs7->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (privKey == NULL) {
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return MEMORY_E;
+ }
+ #endif
+
+ ret = wc_InitRsaKey_ex(privKey, pkcs7->heap, INVALID_DEVID);
+ if (ret != 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(privKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return ret;
+ }
+
+ if (pkcs7->privateKey != NULL && pkcs7->privateKeySz > 0) {
+ keyIdx = 0;
+ ret = wc_RsaPrivateKeyDecode(pkcs7->privateKey, &keyIdx,
+ privKey, pkcs7->privateKeySz);
+ }
+ else if (pkcs7->devId == INVALID_DEVID) {
+ ret = BAD_FUNC_ARG;
+ }
+ if (ret != 0) {
+ WOLFSSL_MSG("Failed to decode RSA private key");
+ wc_FreeRsaKey(privKey);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(privKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return ret;
+ }
+
+ /* decrypt encryptedKey */
+ #ifdef WC_RSA_BLINDING
+ ret = wc_InitRng_ex(&rng, pkcs7->heap, pkcs7->devId);
+ if (ret == 0) {
+ ret = wc_RsaSetRNG(privKey, &rng);
+ }
+ #endif
+ if (ret == 0) {
+ keySz = wc_RsaPrivateDecryptInline(encryptedKey, encryptedKeySz,
+ &outKey, privKey);
+ #ifdef WC_RSA_BLINDING
+ wc_FreeRng(&rng);
+ #endif
+ } else {
+ keySz = ret;
+ }
+ wc_FreeRsaKey(privKey);
+
+ if (keySz <= 0 || outKey == NULL) {
+ ForceZero(encryptedKey, encryptedKeySz);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(privKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return keySz;
+ } else {
+ *decryptedKeySz = keySz;
+ XMEMCPY(decryptedKey, outKey, keySz);
+ ForceZero(encryptedKey, encryptedKeySz);
+ }
+
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(privKey, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+ break;
+ }
+ #endif
+ ret = 0; /* success */
break;
default:
- WOLFSSL_MSG("Unsupported content cipher type");
- return ALGO_ID_E;
- };
+ WOLFSSL_MSG("PKCS7 Unknown KTRI decrypt state");
+ ret = BAD_FUNC_ARG;
+ }
- /* outer content type */
- outerContentTypeSz = wc_SetContentType(ENVELOPED_DATA, outerContentType);
+ return ret;
+}
+#endif /* !NO_RSA */
- /* version, defined as 0 in RFC 2315 */
- verSz = SetMyVersion(0, ver, 0);
+#ifdef HAVE_ECC
- /* generate random content encryption key */
- ret = wc_InitRng(&rng);
+/* remove ASN.1 OriginatorIdentifierOrKey, return 0 on success, <0 on error */
+static int wc_PKCS7_KariGetOriginatorIdentifierOrKey(WC_PKCS7_KARI* kari,
+ byte* pkiMsg, word32 pkiMsgSz, word32* idx)
+{
+ int ret, length;
+ word32 keyOID, oidSum = 0;
+ int curve_id = ECC_CURVE_DEF;
+ byte tag;
+
+ if (kari == NULL || pkiMsg == NULL || idx == NULL)
+ return BAD_FUNC_ARG;
+
+ /* remove OriginatorIdentifierOrKey */
+ if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) == 0 &&
+ tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) {
+ if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ } else {
+ return ASN_PARSE_E;
+ }
+
+ /* remove OriginatorPublicKey */
+ if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) == 0 &&
+ tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) {
+ if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ } else {
+ return ASN_PARSE_E;
+ }
+
+ /* remove AlgorithmIdentifier */
+ if (GetAlgoId(pkiMsg, idx, &keyOID, oidKeyType, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (keyOID != ECDSAk)
+ return ASN_PARSE_E;
+
+ /* optional algorithm parameters */
+ ret = GetObjectId(pkiMsg, idx, &oidSum, oidIgnoreType, pkiMsgSz);
+ if (ret == 0) {
+ /* get curve id */
+ curve_id = wc_ecc_get_oid(oidSum, NULL, 0);
+ if (curve_id < 0)
+ return ECC_CURVE_OID_E;
+ }
+
+ /* remove ECPoint BIT STRING */
+ if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (tag != ASN_BIT_STRING)
+ return ASN_PARSE_E;
+
+ if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0)
+ return ASN_EXPECT_0_E;
+
+ if (tag != ASN_OTHER_TYPE)
+ return ASN_EXPECT_0_E;
+
+ /* get sender ephemeral public ECDSA key */
+ ret = wc_ecc_init_ex(kari->senderKey, kari->heap, kari->devId);
if (ret != 0)
return ret;
- ret = wc_RNG_GenerateBlock(&rng, contentKeyPlain, blockKeySz);
+ kari->senderKeyInit = 1;
+
+ /* length-1 for unused bits counter */
+ ret = wc_ecc_import_x963_ex(pkiMsg + (*idx), length - 1, kari->senderKey,
+ curve_id);
if (ret != 0) {
- wc_FreeRng(&rng);
- return ret;
+ ret = wc_EccPublicKeyDecode(pkiMsg, idx, kari->senderKey, *idx + length - 1);
+ if (ret != 0)
+ return ret;
+ }
+ else {
+ (*idx) += length - 1;
+ }
+
+ return 0;
+}
+
+
+/* remove optional UserKeyingMaterial if available, return 0 on success,
+ * < 0 on error */
+static int wc_PKCS7_KariGetUserKeyingMaterial(WC_PKCS7_KARI* kari,
+ byte* pkiMsg, word32 pkiMsgSz, word32* idx)
+{
+ int length;
+ word32 savedIdx;
+ byte tag;
+
+ if (kari == NULL || pkiMsg == NULL || idx == NULL)
+ return BAD_FUNC_ARG;
+
+ savedIdx = *idx;
+
+ /* starts with EXPLICIT [1] */
+ if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) {
+ *idx = savedIdx;
+ return 0;
+ }
+ if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) {
+ *idx = savedIdx;
+ return 0;
+ }
+
+ if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) {
+ *idx = savedIdx;
+ return 0;
+ }
+
+ /* get OCTET STRING */
+ if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) {
+ *idx = savedIdx;
+ return 0;
+ }
+ if (tag != ASN_OCTET_STRING) {
+ *idx = savedIdx;
+ return 0;
}
+ if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) {
+ *idx = savedIdx;
+ return 0;
+ }
+
+ kari->ukm = NULL;
+ if (length > 0) {
+ kari->ukm = (byte*)XMALLOC(length, kari->heap, DYNAMIC_TYPE_PKCS7);
+ if (kari->ukm == NULL)
+ return MEMORY_E;
+
+ XMEMCPY(kari->ukm, pkiMsg + (*idx), length);
+ kari->ukmOwner = 1;
+ }
+
+ (*idx) += length;
+ kari->ukmSz = length;
+
+ return 0;
+}
+
+
+/* remove ASN.1 KeyEncryptionAlgorithmIdentifier, return 0 on success,
+ * < 0 on error */
+static int wc_PKCS7_KariGetKeyEncryptionAlgorithmId(WC_PKCS7_KARI* kari,
+ byte* pkiMsg, word32 pkiMsgSz, word32* idx,
+ word32* keyAgreeOID, word32* keyWrapOID)
+{
+ int length = 0;
+ word32 localIdx;
+
+ if (kari == NULL || pkiMsg == NULL || idx == NULL ||
+ keyAgreeOID == NULL || keyWrapOID == NULL)
+ return BAD_FUNC_ARG;
+
+ localIdx = *idx;
+
+ /* remove KeyEncryptionAlgorithmIdentifier */
+ if (GetSequence(pkiMsg, &localIdx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ localIdx = *idx;
+ if (GetAlgoId(pkiMsg, &localIdx, keyAgreeOID, oidCmsKeyAgreeType,
+ pkiMsgSz) < 0) {
+ return ASN_PARSE_E;
+ }
+
+ if (localIdx < *idx + length) {
+ *idx = localIdx;
+ }
+ /* remove KeyWrapAlgorithm, stored in parameter of KeyEncAlgoId */
+ if (GetAlgoId(pkiMsg, idx, keyWrapOID, oidKeyWrapType, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ return 0;
+}
+
+
+/* remove ASN.1 SubjectKeyIdentifier, return 0 on success, < 0 on error
+ * if subject key ID matches, recipFound is set to 1 */
+static int wc_PKCS7_KariGetSubjectKeyIdentifier(WC_PKCS7_KARI* kari,
+ byte* pkiMsg, word32 pkiMsgSz, word32* idx,
+ int* recipFound, byte* rid)
+{
+ int length;
+ byte tag;
+
+ if (kari == NULL || pkiMsg == NULL || idx == NULL || recipFound == NULL ||
+ rid == NULL)
+ return BAD_FUNC_ARG;
+
+ /* remove RecipientKeyIdentifier IMPLICIT [0] */
+ if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) {
+ return ASN_PARSE_E;
+ }
+
+ if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) {
+ if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ } else {
+ return ASN_PARSE_E;
+ }
+
+ /* remove SubjectKeyIdentifier */
+ if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) {
+ return ASN_PARSE_E;
+ }
+
+ if (tag != ASN_OCTET_STRING)
+ return ASN_PARSE_E;
+
+ if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (length != KEYID_SIZE)
+ return ASN_PARSE_E;
+
+ XMEMCPY(rid, pkiMsg + (*idx), KEYID_SIZE);
+ (*idx) += length;
+
+ /* subject key id should match if recipient found */
+ if (XMEMCMP(rid, kari->decoded->extSubjKeyId, KEYID_SIZE) == 0) {
+ *recipFound = 1;
+ }
+
+ return 0;
+}
+
+
+/* remove ASN.1 IssuerAndSerialNumber, return 0 on success, < 0 on error
+ * if issuer and serial number match, recipFound is set to 1 */
+static int wc_PKCS7_KariGetIssuerAndSerialNumber(WC_PKCS7_KARI* kari,
+ byte* pkiMsg, word32 pkiMsgSz, word32* idx,
+ int* recipFound, byte* rid)
+{
+ int length, ret;
#ifdef WOLFSSL_SMALL_STACK
- recip = (byte*)XMALLOC(MAX_RECIP_SZ, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- contentKeyEnc = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, NULL,
- DYNAMIC_TYPE_TMP_BUFFER);
- if (contentKeyEnc == NULL || recip == NULL) {
- if (recip) XFREE(recip, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (contentKeyEnc) XFREE(contentKeyEnc, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- wc_FreeRng(&rng);
+ mp_int* serial;
+ mp_int* recipSerial;
+#else
+ mp_int serial[1];
+ mp_int recipSerial[1];
+#endif
+
+ if (rid == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* remove IssuerAndSerialNumber */
+ if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (GetNameHash(pkiMsg, idx, rid, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ /* if we found correct recipient, issuer hashes will match */
+ if (XMEMCMP(rid, kari->decoded->issuerHash, KEYID_SIZE) == 0) {
+ *recipFound = 1;
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ serial = (mp_int*)XMALLOC(sizeof(mp_int), kari->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (serial == NULL)
+ return MEMORY_E;
+
+ recipSerial = (mp_int*)XMALLOC(sizeof(mp_int), kari->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (recipSerial == NULL) {
+ XFREE(serial, kari->heap, DYNAMIC_TYPE_TMP_BUFFER);
return MEMORY_E;
}
-
#endif
- /* build RecipientInfo, only handle 1 for now */
- recipSz = wc_CreateRecipientInfo(pkcs7->singleCert, pkcs7->singleCertSz, RSAk,
- blockKeySz, &rng, contentKeyPlain,
- contentKeyEnc, &contentKeyEncSz, recip,
- MAX_RECIP_SZ);
-
- ForceZero(contentKeyEnc, MAX_ENCRYPTED_KEY_SZ);
-
+ if (GetInt(serial, pkiMsg, idx, pkiMsgSz) < 0) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(contentKeyEnc, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(serial, kari->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(recipSerial, kari->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
+ return ASN_PARSE_E;
+ }
- if (recipSz < 0) {
- WOLFSSL_MSG("Failed to create RecipientInfo");
- wc_FreeRng(&rng);
+ ret = mp_read_unsigned_bin(recipSerial, kari->decoded->serial,
+ kari->decoded->serialSz);
+ if (ret != MP_OKAY) {
+ mp_clear(serial);
+ WOLFSSL_MSG("Failed to parse CMS recipient serial number");
#ifdef WOLFSSL_SMALL_STACK
- XFREE(recip, NULL, DYNAMMIC_TYPE_TMP_BUFFER);
+ XFREE(serial, kari->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(recipSerial, kari->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- return recipSz;
+ return ret;
}
- recipSetSz = SetSet(recipSz, recipSet);
- /* generate IV for block cipher */
- ret = wc_RNG_GenerateBlock(&rng, tmpIv, DES_BLOCK_SIZE);
- wc_FreeRng(&rng);
- if (ret != 0) {
+ if (mp_cmp(recipSerial, serial) != MP_EQ) {
+ mp_clear(serial);
+ mp_clear(recipSerial);
+ WOLFSSL_MSG("CMS serial number does not match recipient");
#ifdef WOLFSSL_SMALL_STACK
- XFREE(recip, NULL, DYNAMMIC_TYPE_TMP_BUFFER);
+ XFREE(serial, kari->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(recipSerial, kari->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- return ret;
+ return PKCS7_RECIP_E;
}
- /* EncryptedContentInfo */
- contentTypeSz = wc_SetContentType(pkcs7->contentOID, contentType);
- if (contentTypeSz == 0) {
+ mp_clear(serial);
+ mp_clear(recipSerial);
+
#ifdef WOLFSSL_SMALL_STACK
- XFREE(recip, NULL, DYNAMMIC_TYPE_TMP_BUFFER);
+ XFREE(serial, kari->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(recipSerial, kari->heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
+
+ return 0;
+}
+
+
+/* remove ASN.1 RecipientEncryptedKeys, return 0 on success, < 0 on error */
+static int wc_PKCS7_KariGetRecipientEncryptedKeys(WC_PKCS7_KARI* kari,
+ byte* pkiMsg, word32 pkiMsgSz, word32* idx,
+ int* recipFound, byte* encryptedKey,
+ int* encryptedKeySz, byte* rid)
+{
+ int length;
+ int ret = 0;
+ byte tag;
+ word32 localIdx;
+
+ if (kari == NULL || pkiMsg == NULL || idx == NULL ||
+ recipFound == NULL || encryptedKey == NULL)
return BAD_FUNC_ARG;
+
+ /* remove RecipientEncryptedKeys */
+ if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ /* remove RecipientEncryptedKeys */
+ if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ /* KeyAgreeRecipientIdentifier is CHOICE of IssuerAndSerialNumber
+ * or [0] IMMPLICIT RecipientKeyIdentifier */
+ localIdx = *idx;
+ if (GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0)) {
+ /* try to get RecipientKeyIdentifier */
+ ret = wc_PKCS7_KariGetSubjectKeyIdentifier(kari, pkiMsg, pkiMsgSz,
+ idx, recipFound, rid);
+ } else {
+ /* try to get IssuerAndSerialNumber */
+ ret = wc_PKCS7_KariGetIssuerAndSerialNumber(kari, pkiMsg, pkiMsgSz,
+ idx, recipFound, rid);
}
- /* allocate encrypted content buffer, pad if necessary, PKCS#7 padding */
- padSz = DES_BLOCK_SIZE - (pkcs7->contentSz % DES_BLOCK_SIZE);
- desOutSz = pkcs7->contentSz + padSz;
+ /* if we don't have either option, malformed CMS */
+ if (ret != 0)
+ return ret;
+
+ /* remove EncryptedKey */
+ if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (tag != ASN_OCTET_STRING)
+ return ASN_PARSE_E;
+
+ if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ /* put encrypted CEK in decryptedKey buffer for now, decrypt later */
+ if (length > *encryptedKeySz)
+ return BUFFER_E;
+
+ XMEMCPY(encryptedKey, pkiMsg + (*idx), length);
+ *encryptedKeySz = length;
+ (*idx) += length;
+
+ return 0;
+}
+
+#endif /* HAVE_ECC */
+
+
+int wc_PKCS7_SetOriEncryptCtx(PKCS7* pkcs7, void* ctx)
+{
+ if (pkcs7 == NULL)
+ return BAD_FUNC_ARG;
+
+ pkcs7->oriEncryptCtx = ctx;
+
+ return 0;
+}
+
+
+int wc_PKCS7_SetOriDecryptCtx(PKCS7* pkcs7, void* ctx)
+{
+
+ if (pkcs7 == NULL)
+ return BAD_FUNC_ARG;
+
+ pkcs7->oriDecryptCtx = ctx;
+
+ return 0;
+}
+
+
+int wc_PKCS7_SetOriDecryptCb(PKCS7* pkcs7, CallbackOriDecrypt cb)
+{
+ if (pkcs7 == NULL)
+ return BAD_FUNC_ARG;
+
+ pkcs7->oriDecryptCb = cb;
+
+ return 0;
+}
+
+
+/* return 0 on success */
+int wc_PKCS7_SetWrapCEKCb(PKCS7* pkcs7, CallbackWrapCEK cb)
+{
+ if (pkcs7 == NULL)
+ return BAD_FUNC_ARG;
+
+ pkcs7->wrapCEKCb = cb;
+
+ return 0;
+}
+
+/* Decrypt ASN.1 OtherRecipientInfo (ori), as defined by:
+ *
+ * OtherRecipientInfo ::= SEQUENCE {
+ * oriType OBJECT IDENTIFIER,
+ * oriValue ANY DEFINED BY oriType }
+ *
+ * pkcs7 - pointer to initialized PKCS7 structure
+ * pkiMsg - pointer to encoded CMS bundle
+ * pkiMsgSz - size of pkiMsg, bytes
+ * idx - [IN/OUT] pointer to index into pkiMsg
+ * decryptedKey - [OUT] output buf for decrypted content encryption key
+ * decryptedKeySz - [IN/OUT] size of buffer, size of decrypted key
+ * recipFound - [OUT] 1 if recipient has been found, 0 if not
+ *
+ * Return 0 on success, negative upon error.
+ */
+static int wc_PKCS7_DecryptOri(PKCS7* pkcs7, byte* in, word32 inSz,
+ word32* idx, byte* decryptedKey,
+ word32* decryptedKeySz, int* recipFound)
+{
+ int ret, seqSz, oriOIDSz;
+ word32 oriValueSz, tmpIdx;
+ byte* oriValue;
+ byte oriOID[MAX_OID_SZ];
+
+ byte* pkiMsg = in;
+ word32 pkiMsgSz = inSz;
+#ifndef NO_PKCS7_STREAM
+ word32 stateIdx = *idx;
+ long rc;
+#endif
+
+ if (pkcs7->oriDecryptCb == NULL) {
+ WOLFSSL_MSG("You must register an ORI Decrypt callback");
+ return BAD_FUNC_ARG;
+ }
+
+ switch (pkcs7->state) {
+
+ case WC_PKCS7_DECRYPT_ORI:
+ #ifndef NO_PKCS7_STREAM
+ /* @TODO for now just get full buffer, needs divided up */
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+ (pkcs7->stream->maxLen - pkcs7->stream->totalRd) +
+ pkcs7->stream->length, &pkiMsg, idx)) != 0) {
+ return ret;
+ }
+
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+ inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
+ #endif
+ /* get OtherRecipientInfo sequence length */
+ if (GetLength(pkiMsg, idx, &seqSz, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ tmpIdx = *idx;
+
+ /* remove and store oriType OBJECT IDENTIFIER */
+ if (GetASNObjectId(pkiMsg, idx, &oriOIDSz, pkiMsgSz) != 0)
+ return ASN_PARSE_E;
+
+ XMEMCPY(oriOID, pkiMsg + *idx, oriOIDSz);
+ *idx += oriOIDSz;
+
+ /* get oriValue, increment idx */
+ oriValue = pkiMsg + *idx;
+ oriValueSz = seqSz - (*idx - tmpIdx);
+ *idx += oriValueSz;
+
+ /* pass oriOID and oriValue to user callback, expect back
+ decryptedKey and size */
+ ret = pkcs7->oriDecryptCb(pkcs7, oriOID, (word32)oriOIDSz, oriValue,
+ oriValueSz, decryptedKey, decryptedKeySz,
+ pkcs7->oriDecryptCtx);
+
+ if (ret != 0 || decryptedKey == NULL || *decryptedKeySz == 0) {
+ /* decrypt operation failed */
+ *recipFound = 0;
+ return PKCS7_RECIP_E;
+ }
+
+ /* mark recipFound, since we only support one RecipientInfo for now */
+ *recipFound = 1;
+
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &stateIdx, idx)) != 0) {
+ break;
+ }
+ #endif
+ ret = 0; /* success */
+ break;
+
+ default:
+ WOLFSSL_MSG("PKCS7 ORI unknown state");
+ ret = BAD_FUNC_ARG;
+
+ }
+
+ return ret;
+}
+
+#if !defined(NO_PWDBASED) && !defined(NO_SHA)
+
+/* decode ASN.1 PasswordRecipientInfo (pwri), return 0 on success,
+ * < 0 on error */
+static int wc_PKCS7_DecryptPwri(PKCS7* pkcs7, byte* in, word32 inSz,
+ word32* idx, byte* decryptedKey,
+ word32* decryptedKeySz, int* recipFound)
+{
+ byte* salt;
+ byte* cek;
+ byte* kek;
+
+ byte tmpIv[MAX_CONTENT_IV_SIZE];
+
+ int ret = 0, length, saltSz, iterations, blockSz, kekKeySz;
+ int hashOID = WC_SHA; /* default to SHA1 */
+ word32 kdfAlgoId, pwriEncAlgoId, keyEncAlgoId, cekSz;
+ byte* pkiMsg = in;
+ word32 pkiMsgSz = inSz;
+ byte tag;
+#ifndef NO_PKCS7_STREAM
+ word32 tmpIdx = *idx;
+ long rc;
+#endif
+
+ switch (pkcs7->state) {
+ case WC_PKCS7_DECRYPT_PWRI:
+ #ifndef NO_PKCS7_STREAM
+ /*@TODO for now just get full buffer, needs divided up */
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+ (pkcs7->stream->maxLen - pkcs7->stream->totalRd) +
+ pkcs7->stream->length, &pkiMsg, idx)) != 0) {
+ return ret;
+ }
+
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+ inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
+ #endif
+ /* remove KeyDerivationAlgorithmIdentifier */
+ if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
+ return ASN_PARSE_E;
+
+ if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ /* get KeyDerivationAlgorithmIdentifier */
+ if (wc_GetContentType(pkiMsg, idx, &kdfAlgoId, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ /* get KDF params SEQ */
+ if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ /* get KDF salt OCTET STRING */
+ if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (tag != ASN_OCTET_STRING)
+ return ASN_PARSE_E;
+
+ if (GetLength(pkiMsg, idx, &saltSz, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ salt = (byte*)XMALLOC(saltSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (salt == NULL)
+ return MEMORY_E;
+
+ XMEMCPY(salt, pkiMsg + (*idx), saltSz);
+ *idx += saltSz;
+
+ /* get KDF iterations */
+ if (GetMyVersion(pkiMsg, idx, &iterations, pkiMsgSz) < 0) {
+ XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ASN_PARSE_E;
+ }
+
+ /* get KeyEncAlgoId SEQ */
+ if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0) {
+ XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ASN_PARSE_E;
+ }
+
+ /* get KeyEncAlgoId */
+ if (wc_GetContentType(pkiMsg, idx, &keyEncAlgoId, pkiMsgSz) < 0) {
+ XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ASN_PARSE_E;
+ }
+
+ /* get pwriEncAlgoId */
+ if (GetAlgoId(pkiMsg, idx, &pwriEncAlgoId, oidBlkType, pkiMsgSz) < 0) {
+ XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ASN_PARSE_E;
+ }
+
+ blockSz = wc_PKCS7_GetOIDBlockSize(pwriEncAlgoId);
+ if (blockSz < 0) {
+ XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return blockSz;
+ }
+
+ /* get content-encryption key size, based on algorithm */
+ kekKeySz = wc_PKCS7_GetOIDKeySize(pwriEncAlgoId);
+ if (kekKeySz < 0) {
+ XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return kekKeySz;
+ }
+
+ /* get block cipher IV, stored in OPTIONAL parameter of AlgoID */
+ if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) {
+ XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ASN_PARSE_E;
+ }
+
+ if (tag != ASN_OCTET_STRING) {
+ XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ASN_PARSE_E;
+ }
+
+ if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) {
+ XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ASN_PARSE_E;
+ }
+
+ if (length != blockSz) {
+ WOLFSSL_MSG("Incorrect IV length, must be of content alg block size");
+ XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ASN_PARSE_E;
+ }
+
+ XMEMCPY(tmpIv, pkiMsg + (*idx), length);
+ *idx += length;
+
+ /* get EncryptedKey */
+ if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0) {
+ XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ASN_PARSE_E;
+ }
+
+ if (tag != ASN_OCTET_STRING) {
+ XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ASN_PARSE_E;
+ }
+
+ if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0) {
+ XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ASN_PARSE_E;
+ }
+
+ /* allocate temporary space for decrypted key */
+ cekSz = length;
+ cek = (byte*)XMALLOC(cekSz, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (cek == NULL) {
+ XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return MEMORY_E;
+ }
+
+ /* generate KEK */
+ kek = (byte*)XMALLOC(kekKeySz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (kek == NULL) {
+ XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(cek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return MEMORY_E;
+ }
+
+ ret = wc_PKCS7_GenerateKEK_PWRI(pkcs7, pkcs7->pass, pkcs7->passSz,
+ salt, saltSz, kdfAlgoId, hashOID,
+ iterations, kek, kekKeySz);
+ if (ret < 0) {
+ XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(cek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ASN_PARSE_E;
+ }
+
+ /* decrypt CEK with KEK */
+ ret = wc_PKCS7_PwriKek_KeyUnWrap(pkcs7, kek, kekKeySz,
+ pkiMsg + (*idx), length, cek,
+ cekSz, tmpIv, blockSz,
+ pwriEncAlgoId);
+ if (ret < 0) {
+ XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(cek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+ cekSz = ret;
+
+ if (*decryptedKeySz < cekSz) {
+ WOLFSSL_MSG("Decrypted key buffer too small for CEK");
+ XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(cek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return BUFFER_E;
+ }
+
+ XMEMCPY(decryptedKey, cek, cekSz);
+ *decryptedKeySz = cekSz;
+
+ XFREE(salt, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(kek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(cek, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+
+ /* mark recipFound, since we only support one RecipientInfo for now */
+ *recipFound = 1;
+ *idx += length;
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+ break;
+ }
+ #endif
+ ret = 0; /* success */
+ break;
+
+ default:
+ WOLFSSL_MSG("PKCS7 PWRI unknown state");
+ ret = BAD_FUNC_ARG;
+ }
+
+ return ret;
+}
+
+#endif /* NO_PWDBASED | NO_SHA */
+
+/* decode ASN.1 KEKRecipientInfo (kekri), return 0 on success,
+ * < 0 on error */
+static int wc_PKCS7_DecryptKekri(PKCS7* pkcs7, byte* in, word32 inSz,
+ word32* idx, byte* decryptedKey,
+ word32* decryptedKeySz, int* recipFound)
+{
+ int length, keySz, dateLen, direction;
+ byte* keyId = NULL;
+ const byte* datePtr = NULL;
+ byte dateFormat, tag;
+ word32 keyIdSz, kekIdSz, keyWrapOID, localIdx;
+
+ int ret = 0;
+ byte* pkiMsg = in;
+ word32 pkiMsgSz = inSz;
+#ifndef NO_PKCS7_STREAM
+ word32 tmpIdx = *idx;
+ long rc;
+#endif
+
+ WOLFSSL_ENTER("wc_PKCS7_DecryptKekri");
+ switch (pkcs7->state) {
+ case WC_PKCS7_DECRYPT_KEKRI:
+ #ifndef NO_PKCS7_STREAM
+ /* @TODO for now just get full buffer, needs divided up */
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+ (pkcs7->stream->maxLen - pkcs7->stream->totalRd) +
+ pkcs7->stream->length, &pkiMsg, idx)) != 0) {
+ return ret;
+ }
+
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+ inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
+ #endif
+ /* remove KEKIdentifier */
+ if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ kekIdSz = length;
+
+ if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (tag != ASN_OCTET_STRING)
+ return ASN_PARSE_E;
+
+ if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ /* save keyIdentifier and length */
+ keyId = pkiMsg + *idx;
+ keyIdSz = length;
+ *idx += keyIdSz;
+
+ /* may have OPTIONAL GeneralizedTime */
+ localIdx = *idx;
+ if ((*idx < kekIdSz) && GetASNTag(pkiMsg, &localIdx, &tag,
+ pkiMsgSz) == 0 && tag == ASN_GENERALIZED_TIME) {
+ if (wc_GetDateInfo(pkiMsg + *idx, pkiMsgSz, &datePtr, &dateFormat,
+ &dateLen) != 0) {
+ return ASN_PARSE_E;
+ }
+ *idx += (dateLen + 1);
+ }
+
+ /* may have OPTIONAL OtherKeyAttribute */
+ localIdx = *idx;
+ if ((*idx < kekIdSz) && GetASNTag(pkiMsg, &localIdx, &tag,
+ pkiMsgSz) == 0 && tag == (ASN_SEQUENCE |
+ ASN_CONSTRUCTED)) {
+ if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ /* skip it */
+ *idx += length;
+ }
+
+ /* get KeyEncryptionAlgorithmIdentifier */
+ if (GetAlgoId(pkiMsg, idx, &keyWrapOID, oidKeyWrapType, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ /* get EncryptedKey */
+ if (GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (tag != ASN_OCTET_STRING)
+ return ASN_PARSE_E;
+
+ if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ #ifndef NO_AES
+ direction = AES_DECRYPTION;
+ #else
+ direction = DES_DECRYPTION;
+ #endif
+
+ /* decrypt CEK with KEK */
+ if (pkcs7->wrapCEKCb) {
+ keySz = pkcs7->wrapCEKCb(pkcs7, pkiMsg + *idx, length, keyId,
+ keyIdSz, NULL, 0, decryptedKey,
+ *decryptedKeySz, keyWrapOID,
+ (int)PKCS7_KEKRI, direction);
+ }
+ else {
+ keySz = wc_PKCS7_KeyWrap(pkiMsg + *idx, length, pkcs7->privateKey,
+ pkcs7->privateKeySz, decryptedKey, *decryptedKeySz,
+ keyWrapOID, direction);
+ }
+ if (keySz <= 0)
+ return keySz;
+
+ *decryptedKeySz = (word32)keySz;
+
+ /* mark recipFound, since we only support one RecipientInfo for now */
+ *recipFound = 1;
+ *idx += length;
+
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+ break;
+ }
+ #endif
+ ret = 0; /* success */
+ break;
+
+ default:
+ WOLFSSL_MSG("PKCS7 KEKRI unknown state");
+ ret = BAD_FUNC_ARG;
+
+ }
+
+ (void)keyId;
+ return ret;
+}
+
+
+/* decode ASN.1 KeyAgreeRecipientInfo (kari), return 0 on success,
+ * < 0 on error */
+static int wc_PKCS7_DecryptKari(PKCS7* pkcs7, byte* in, word32 inSz,
+ word32* idx, byte* decryptedKey,
+ word32* decryptedKeySz, int* recipFound)
+{
+#ifdef HAVE_ECC
+ int ret, keySz;
+ int encryptedKeySz;
+ int direction = 0;
+ word32 keyAgreeOID, keyWrapOID;
+ byte rid[KEYID_SIZE];
- if (padSz != 0) {
- plain = (byte*)XMALLOC(desOutSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (plain == NULL) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(recip, NULL, DYNAMMIC_TYPE_TMP_BUFFER);
+ byte* encryptedKey;
+#else
+ byte encryptedKey[MAX_ENCRYPTED_KEY_SZ];
#endif
- return MEMORY_E;
- }
- XMEMCPY(plain, pkcs7->content, pkcs7->contentSz);
- dynamicFlag = 1;
- for (i = 0; i < padSz; i++) {
- plain[pkcs7->contentSz + i] = padSz;
+ byte* pkiMsg = in;
+ word32 pkiMsgSz = inSz;
+#ifndef NO_PKCS7_STREAM
+ word32 tmpIdx = (idx) ? *idx : 0;
+ long rc;
+#endif
+
+ WOLFSSL_ENTER("wc_PKCS7_DecryptKari");
+ if (pkcs7 == NULL || pkiMsg == NULL ||
+ ((pkcs7->singleCert == NULL || pkcs7->singleCertSz == 0) &&
+ pkcs7->wrapCEKCb == NULL) ||
+ idx == NULL || decryptedKey == NULL || decryptedKeySz == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ switch (pkcs7->state) {
+ case WC_PKCS7_DECRYPT_KARI: {
+ #ifndef NO_PKCS7_STREAM
+ /* @TODO for now just get full buffer, needs divided up */
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+ (pkcs7->stream->maxLen - pkcs7->stream->totalRd) +
+ pkcs7->stream->length, &pkiMsg, idx)) != 0) {
+ return ret;
+ }
+
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+ inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
+ #endif
+ WC_PKCS7_KARI* kari;
+
+ kari = wc_PKCS7_KariNew(pkcs7, WC_PKCS7_DECODE);
+ if (kari == NULL)
+ return MEMORY_E;
+
+ #ifdef WOLFSSL_SMALL_STACK
+ encryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (encryptedKey == NULL) {
+ wc_PKCS7_KariFree(kari);
+ return MEMORY_E;
+ }
+ #endif
+ encryptedKeySz = MAX_ENCRYPTED_KEY_SZ;
+
+ /* parse cert and key */
+ if (pkcs7->singleCert != NULL) {
+ ret = wc_PKCS7_KariParseRecipCert(kari, (byte*)pkcs7->singleCert,
+ pkcs7->singleCertSz, pkcs7->privateKey,
+ pkcs7->privateKeySz);
+ if (ret != 0) {
+ wc_PKCS7_KariFree(kari);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ #endif
+ return ret;
+ }
+ }
+
+ /* remove OriginatorIdentifierOrKey */
+ ret = wc_PKCS7_KariGetOriginatorIdentifierOrKey(kari, pkiMsg,
+ pkiMsgSz, idx);
+ if (ret != 0) {
+ wc_PKCS7_KariFree(kari);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ #endif
+ return ret;
+ }
+
+ /* try and remove optional UserKeyingMaterial */
+ ret = wc_PKCS7_KariGetUserKeyingMaterial(kari, pkiMsg, pkiMsgSz, idx);
+ if (ret != 0) {
+ wc_PKCS7_KariFree(kari);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ #endif
+ return ret;
+ }
+
+ /* remove KeyEncryptionAlgorithmIdentifier */
+ ret = wc_PKCS7_KariGetKeyEncryptionAlgorithmId(kari, pkiMsg,
+ pkiMsgSz, idx, &keyAgreeOID, &keyWrapOID);
+ if (ret != 0) {
+ wc_PKCS7_KariFree(kari);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ #endif
+ return ret;
+ }
+
+ /* if user has not explicitly set keyAgreeOID, set from one in bundle */
+ if (pkcs7->keyAgreeOID == 0)
+ pkcs7->keyAgreeOID = keyAgreeOID;
+
+ /* set direction based on key wrap algorithm */
+ switch (keyWrapOID) {
+ #ifndef NO_AES
+ #ifdef WOLFSSL_AES_128
+ case AES128_WRAP:
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES192_WRAP:
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES256_WRAP:
+ #endif
+ direction = AES_DECRYPTION;
+ break;
+ #endif
+ default:
+ WOLFSSL_MSG("AES key wrap algorithm unsupported");
+ if (pkcs7->wrapCEKCb) {
+ WOLFSSL_MSG("Direction not set!");
+ break; /* if unwrapping callback is set then do not
+ * force restriction of supported wrap
+ * algorithms */
+ }
+
+ wc_PKCS7_KariFree(kari);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ #endif
+ return BAD_KEYWRAP_ALG_E;
+ }
+
+ /* remove RecipientEncryptedKeys */
+ ret = wc_PKCS7_KariGetRecipientEncryptedKeys(kari, pkiMsg, pkiMsgSz,
+ idx, recipFound, encryptedKey, &encryptedKeySz, rid);
+ if (ret != 0) {
+ wc_PKCS7_KariFree(kari);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ #endif
+ return ret;
+ }
+
+ /* decrypt CEK with KEK */
+ if (pkcs7->wrapCEKCb) {
+ word32 tmpKeySz = 0;
+ byte* tmpKeyDer = NULL;
+
+ ret = wc_ecc_export_x963(kari->senderKey, NULL, &tmpKeySz);
+ if (ret != LENGTH_ONLY_E) {
+ return ret;
+ }
+
+ /* buffer space for algorithm/curve */
+ tmpKeySz += MAX_SEQ_SZ;
+ tmpKeySz += 2 * MAX_ALGO_SZ;
+
+ /* buffer space for public key sequence */
+ tmpKeySz += MAX_SEQ_SZ;
+ tmpKeySz += TRAILING_ZERO;
+
+ tmpKeyDer = (byte*)XMALLOC(tmpKeySz, pkcs7->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (tmpKeyDer == NULL) {
+ return MEMORY_E;
+ }
+
+ ret = wc_EccPublicKeyToDer(kari->senderKey, tmpKeyDer,
+ tmpKeySz, 1);
+ if (ret < 0) {
+ XFREE(tmpKeyDer, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return ret;
+ }
+ tmpKeySz = (word32)ret;
+
+ keySz = pkcs7->wrapCEKCb(pkcs7, encryptedKey, encryptedKeySz,
+ rid, KEYID_SIZE, tmpKeyDer, tmpKeySz,
+ decryptedKey, *decryptedKeySz,
+ keyWrapOID, (int)PKCS7_KARI, direction);
+ XFREE(tmpKeyDer, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+ if (keySz > 0) {
+ /* If unwrapping was successful then consider recipient
+ * found. Checking for NULL singleCert to confirm previous
+ * SID check was not done */
+ if (pkcs7->singleCert == NULL)
+ *recipFound = 1;
+ }
+ }
+ else {
+ /* create KEK */
+ ret = wc_PKCS7_KariGenerateKEK(kari, keyWrapOID, pkcs7->keyAgreeOID);
+ if (ret != 0) {
+ wc_PKCS7_KariFree(kari);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ #endif
+ return ret;
+ }
+
+ /* decrypt CEK with KEK */
+ keySz = wc_PKCS7_KeyWrap(encryptedKey, encryptedKeySz, kari->kek,
+ kari->kekSz, decryptedKey, *decryptedKeySz,
+ keyWrapOID, direction);
+ }
+ if (keySz <= 0) {
+ wc_PKCS7_KariFree(kari);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ #endif
+ return keySz;
+ }
+ *decryptedKeySz = (word32)keySz;
+
+ wc_PKCS7_KariFree(kari);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(encryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ #endif
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+ break;
+ }
+ #endif
+ ret = 0; /* success */
}
+ break;
+
+ default:
+ WOLFSSL_MSG("PKCS7 kari unknown state");
+ ret = BAD_FUNC_ARG;
- } else {
- plain = pkcs7->content;
- desOutSz = pkcs7->contentSz;
}
- encryptedContent = (byte*)XMALLOC(desOutSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (encryptedContent == NULL) {
- if (dynamicFlag)
- XFREE(plain, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(recip, NULL, DYNAMMIC_TYPE_TMP_BUFFER);
+ (void)pkiMsg;
+ (void)pkiMsgSz;
+
+ return ret;
+#else
+ (void)in;
+ (void)inSz;
+ (void)pkcs7;
+ (void)idx;
+ (void)decryptedKey;
+ (void)decryptedKeySz;
+ (void)recipFound;
+
+ return NOT_COMPILED_IN;
+#endif /* HAVE_ECC */
+}
+
+
+/* decode ASN.1 RecipientInfos SET, return 0 on success, < 0 on error */
+static int wc_PKCS7_DecryptRecipientInfos(PKCS7* pkcs7, byte* in,
+ word32 inSz, word32* idx, byte* decryptedKey,
+ word32* decryptedKeySz, int* recipFound)
+{
+ word32 savedIdx;
+ int version, ret = 0, length;
+ byte* pkiMsg = in;
+ word32 pkiMsgSz = inSz;
+ byte tag;
+#ifndef NO_PKCS7_STREAM
+ word32 tmpIdx;
+ long rc;
#endif
- return MEMORY_E;
+
+ if (pkcs7 == NULL || pkiMsg == NULL || idx == NULL ||
+ decryptedKey == NULL || decryptedKeySz == NULL ||
+ recipFound == NULL) {
+ return BAD_FUNC_ARG;
}
- /* put together IV OCTET STRING */
- ivOctetStringSz = SetOctetString(DES_BLOCK_SIZE, ivOctetString);
+ WOLFSSL_ENTER("wc_PKCS7_DecryptRecipientInfos");
+#ifndef NO_PKCS7_STREAM
+ tmpIdx = *idx;
+#endif
- /* build up our ContentEncryptionAlgorithmIdentifier sequence,
- * adding (ivOctetStringSz + DES_BLOCK_SIZE) for IV OCTET STRING */
- contentEncAlgoSz = SetAlgoID(pkcs7->encryptOID, contentEncAlgo,
- blkType, ivOctetStringSz + DES_BLOCK_SIZE);
+ /* check if in the process of decrypting */
+ switch (pkcs7->state) {
+ case WC_PKCS7_DECRYPT_KTRI:
+ case WC_PKCS7_DECRYPT_KTRI_2:
+ case WC_PKCS7_DECRYPT_KTRI_3:
+ #ifndef NO_RSA
+ ret = wc_PKCS7_DecryptKtri(pkcs7, in, inSz, idx,
+ decryptedKey, decryptedKeySz, recipFound);
+ #else
+ return NOT_COMPILED_IN;
+ #endif
+ break;
- if (contentEncAlgoSz == 0) {
- XFREE(encryptedContent, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (dynamicFlag)
- XFREE(plain, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(recip, NULL, DYNAMMIC_TYPE_TMP_BUFFER);
+ case WC_PKCS7_DECRYPT_KARI:
+ ret = wc_PKCS7_DecryptKari(pkcs7, in, inSz, idx,
+ decryptedKey, decryptedKeySz, recipFound);
+ break;
+
+ case WC_PKCS7_DECRYPT_KEKRI:
+ ret = wc_PKCS7_DecryptKekri(pkcs7, in, inSz, idx,
+ decryptedKey, decryptedKeySz, recipFound);
+ break;
+
+ case WC_PKCS7_DECRYPT_PWRI:
+ #if !defined(NO_PWDBASED) && !defined(NO_SHA)
+ ret = wc_PKCS7_DecryptPwri(pkcs7, in, inSz, idx,
+ decryptedKey, decryptedKeySz, recipFound);
+ #else
+ return NOT_COMPILED_IN;
+ #endif
+ break;
+
+ case WC_PKCS7_DECRYPT_ORI:
+ ret = wc_PKCS7_DecryptOri(pkcs7, in, inSz, idx,
+ decryptedKey, decryptedKeySz, recipFound);
+ break;
+
+ default:
+ /* not in decrypting state */
+ break;
+ }
+
+ if (ret < 0) {
+ return ret;
+ }
+
+ savedIdx = *idx;
+#ifndef NO_PKCS7_STREAM
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in, inSz);
+ if (rc < 0) {
+ return (int)rc;
+ }
+ pkiMsgSz = (word32)rc;
+ if (pkcs7->stream->length > 0)
+ pkiMsg = pkcs7->stream->buffer;
+#endif
+
+ /* when looking for next recipient, use first sequence and version to
+ * indicate there is another, if not, move on */
+ while(*recipFound == 0) {
+
+ /* remove RecipientInfo, if we don't have a SEQUENCE, back up idx to
+ * last good saved one */
+ if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) > 0) {
+
+ #ifndef NO_RSA
+ /* found ktri */
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+ break;
+ }
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_KTRI);
+ ret = wc_PKCS7_DecryptKtri(pkcs7, in, inSz, idx,
+ decryptedKey, decryptedKeySz,
+ recipFound);
+ if (ret != 0)
+ return ret;
+ #else
+ return NOT_COMPILED_IN;
+ #endif
+ }
+ else {
+ word32 localIdx;
+ /* kari is IMPLICIT[1] */
+ *idx = savedIdx;
+ localIdx = *idx;
+
+ if (GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) != 0) {
+ /* no room for recipient info */
+ break;
+ }
+
+ if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) {
+ (*idx)++;
+ if (GetLength(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (GetMyVersion(pkiMsg, idx, &version, pkiMsgSz) < 0) {
+ *idx = savedIdx;
+ break;
+ }
+
+ if (version != 3)
+ return ASN_VERSION_E;
+
+ /* found kari */
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+ break;
+ }
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_KARI);
+ ret = wc_PKCS7_DecryptKari(pkcs7, in, inSz, idx,
+ decryptedKey, decryptedKeySz,
+ recipFound);
+ if (ret != 0)
+ return ret;
+
+ /* kekri is IMPLICIT[2] */
+ } else if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 2)) {
+ (*idx)++;
+
+ if (GetLength(pkiMsg, idx, &version, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (GetMyVersion(pkiMsg, idx, &version, pkiMsgSz) < 0) {
+ *idx = savedIdx;
+ break;
+ }
+
+ if (version != 4)
+ return ASN_VERSION_E;
+
+ /* found kekri */
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+ break;
+ }
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_KEKRI);
+ ret = wc_PKCS7_DecryptKekri(pkcs7, in, inSz, idx,
+ decryptedKey, decryptedKeySz,
+ recipFound);
+ if (ret != 0)
+ return ret;
+
+ /* pwri is IMPLICIT[3] */
+ } else if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 3)) {
+ #if !defined(NO_PWDBASED) && !defined(NO_SHA)
+ (*idx)++;
+
+ if (GetLength(pkiMsg, idx, &version, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (GetMyVersion(pkiMsg, idx, &version, pkiMsgSz) < 0) {
+ *idx = savedIdx;
+ break;
+ }
+
+ if (version != 0)
+ return ASN_VERSION_E;
+
+ /* found pwri */
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+ break;
+ }
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_PWRI);
+ ret = wc_PKCS7_DecryptPwri(pkcs7, in, inSz, idx,
+ decryptedKey, decryptedKeySz,
+ recipFound);
+ if (ret != 0)
+ return ret;
+ #else
+ return NOT_COMPILED_IN;
+ #endif
+
+ /* ori is IMPLICIT[4] */
+ } else if (tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 4)) {
+ (*idx)++;
+
+ /* found ori */
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+ break;
+ }
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_DECRYPT_ORI);
+ ret = wc_PKCS7_DecryptOri(pkcs7, in, inSz, idx,
+ decryptedKey, decryptedKeySz,
+ recipFound);
+ if (ret != 0)
+ return ret;
+
+ } else {
+ /* failed to find RecipientInfo, restore idx and continue */
+ *idx = savedIdx;
+ break;
+ }
+ }
+
+ /* update good idx */
+ savedIdx = *idx;
+ }
+
+ return ret;
+}
+
+
+/* Parse encoded EnvelopedData bundle up to RecipientInfo set.
+ *
+ * return size of RecipientInfo SET on success, negative upon error */
+static int wc_PKCS7_ParseToRecipientInfoSet(PKCS7* pkcs7, byte* in,
+ word32 inSz, word32* idx,
+ int type)
+{
+ int version = 0, length, ret = 0;
+ word32 contentType;
+ byte* pkiMsg = in;
+ word32 pkiMsgSz = inSz;
+ byte tag;
+#ifndef NO_PKCS7_STREAM
+ word32 tmpIdx = 0;
+ long rc;
#endif
+
+ if (pkcs7 == NULL || pkiMsg == NULL || pkiMsgSz == 0 || idx == NULL)
+ return BAD_FUNC_ARG;
+
+ if ((type != ENVELOPED_DATA) && (type != AUTH_ENVELOPED_DATA) &&
+ pkcs7->contentOID != FIRMWARE_PKG_DATA)
return BAD_FUNC_ARG;
+
+#ifndef NO_PKCS7_STREAM
+ if (pkcs7->stream == NULL) {
+ if ((ret = wc_PKCS7_CreateStream(pkcs7)) != 0) {
+ return ret;
+ }
}
+#endif
- /* encrypt content */
- if (pkcs7->encryptOID == DESb) {
- Des des;
+ switch (pkcs7->state) {
+ case WC_PKCS7_INFOSET_START:
+ case WC_PKCS7_INFOSET_BER:
+ case WC_PKCS7_INFOSET_STAGE1:
+ case WC_PKCS7_INFOSET_STAGE2:
+ case WC_PKCS7_INFOSET_END:
+ break;
- ret = wc_Des_SetKey(&des, contentKeyPlain, tmpIv, DES_ENCRYPTION);
+ default:
+ WOLFSSL_MSG("Warning, setting PKCS7 info state to start");
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_INFOSET_START);
+ }
- if (ret == 0)
- wc_Des_CbcEncrypt(&des, encryptedContent, plain, desOutSz);
+ switch (pkcs7->state) {
+ case WC_PKCS7_INFOSET_START:
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_SEQ_SZ +
+ ASN_TAG_SZ, &pkiMsg, idx)) != 0) {
+ return ret;
+ }
- if (ret != 0) {
- XFREE(encryptedContent, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (dynamicFlag)
- XFREE(plain, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(recip, NULL, DYNAMMIC_TYPE_TMP_BUFFER);
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_SEQ_PEEK, in, inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
+ #endif
+ /* read past ContentInfo, verify type is envelopedData */
+ if (ret == 0 && GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ {
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0 && length == 0 && pkiMsg[(*idx)-1] == 0x80) {
+ #ifdef ASN_BER_TO_DER
+ word32 len;
+
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_INFOSET_BER);
+ FALL_THROUGH;
+
+ /* full buffer is needed for conversion */
+ case WC_PKCS7_INFOSET_BER:
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+ pkcs7->stream->maxLen - pkcs7->stream->length,
+ &pkiMsg, idx)) != 0) {
+ return ret;
+ }
+
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK,
+ in, inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
+ #endif
+
+ len = 0;
+
+ ret = wc_BerToDer(pkiMsg, pkiMsgSz, NULL, &len);
+ if (ret != LENGTH_ONLY_E)
+ return ret;
+ pkcs7->der = (byte*)XMALLOC(len, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (pkcs7->der == NULL)
+ return MEMORY_E;
+ ret = wc_BerToDer(pkiMsg, pkiMsgSz, pkcs7->der, &len);
+ if (ret < 0)
+ return ret;
+
+ pkiMsg = in = pkcs7->der;
+ pkiMsgSz = pkcs7->derSz = len;
+ *idx = 0;
+
+ if (GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+ #else
+ return BER_INDEF_E;
+ #endif
+ }
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+ break;
+ }
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_INFOSET_STAGE1);
+ FALL_THROUGH;
+
+ case WC_PKCS7_INFOSET_STAGE1:
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_OID_SZ +
+ MAX_LENGTH_SZ + ASN_TAG_SZ, &pkiMsg, idx)) != 0) {
+ return ret;
+ }
+
+ pkiMsgSz = (pkcs7->stream->length > 0)? pkcs7->stream->length :inSz;
+ #endif
+ if (pkcs7->contentOID != FIRMWARE_PKG_DATA ||
+ type == AUTH_ENVELOPED_DATA) {
+ if (ret == 0 && wc_GetContentType(pkiMsg, idx, &contentType,
+ pkiMsgSz) < 0)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0) {
+ if (type == ENVELOPED_DATA && contentType != ENVELOPED_DATA) {
+ WOLFSSL_MSG("PKCS#7 input not of type EnvelopedData");
+ ret = PKCS7_OID_E;
+ } else if (type == AUTH_ENVELOPED_DATA &&
+ contentType != AUTH_ENVELOPED_DATA) {
+ WOLFSSL_MSG("PKCS#7 input not of type AuthEnvelopedData");
+ ret = PKCS7_OID_E;
+ }
+ }
+
+ if (ret == 0 && GetASNTag(pkiMsg, idx, &tag, pkiMsgSz) != 0)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC
+ | 0))
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && GetLength_ex(pkiMsg, idx, &length, pkiMsgSz,
+ NO_USER_CHECK) < 0)
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret < 0)
+ break;
+
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+ break;
+ }
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_INFOSET_STAGE2);
+ FALL_THROUGH;
+
+ case WC_PKCS7_INFOSET_STAGE2:
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_SEQ_SZ +
+ MAX_VERSION_SZ, &pkiMsg, idx)) != 0) {
+ return ret;
+ }
+
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+ inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
+ #endif
+ /* remove EnvelopedData and version */
+ if (pkcs7->contentOID != FIRMWARE_PKG_DATA ||
+ type == AUTH_ENVELOPED_DATA) {
+ if (ret == 0 && GetSequence(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0 && GetMyVersion(pkiMsg, idx, &version, pkiMsgSz) < 0)
+ ret = ASN_PARSE_E;
+
+ if (ret < 0)
+ break;
+
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+ break;
+ }
+
+ pkcs7->stream->varOne = version;
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_INFOSET_END);
+ FALL_THROUGH;
+
+ case WC_PKCS7_INFOSET_END:
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+ MAX_SET_SZ, &pkiMsg, idx)) != 0) {
+ return ret;
+ }
+
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+ inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
+ version = pkcs7->stream->varOne;
+ #endif
+
+ if (type == ENVELOPED_DATA) {
+ /* TODO :: make this more accurate */
+ if ((pkcs7->publicKeyOID == RSAk &&
+ (version != 0 && version != 2))
+ #ifdef HAVE_ECC
+ || (pkcs7->publicKeyOID == ECDSAk &&
+ (version != 0 && version != 2 && version != 3))
+ #endif
+ ) {
+ WOLFSSL_MSG("PKCS#7 envelopedData version incorrect");
+ ret = ASN_VERSION_E;
+ }
+ } else {
+ /* AuthEnvelopedData version MUST be 0 */
+ if (version != 0) {
+ WOLFSSL_MSG("PKCS#7 AuthEnvelopedData needs to be of version 0");
+ ret = ASN_VERSION_E;
+ }
+ }
+
+ /* remove RecipientInfo set, get length of set */
+ if (ret == 0 && GetSet(pkiMsg, idx, &length, pkiMsgSz) < 0)
+ ret = ASN_PARSE_E;
+
+ if (ret < 0)
+ break;
+
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, idx)) != 0) {
+ break;
+ }
+ #endif
+
+ if (ret == 0)
+ ret = length;
+
+ break;
+
+ default:
+ WOLFSSL_MSG("Bad PKCS7 info set state");
+ ret = BAD_FUNC_ARG;
+ break;
+ }
+
+ return ret;
+}
+
+
+/* Import secret/private key into a PKCS7 structure. Used for setting
+ * the secret key for decryption a EnvelopedData KEKRI RecipientInfo.
+ *
+ * Returns 0 on success, negative upon error */
+WOLFSSL_API int wc_PKCS7_SetKey(PKCS7* pkcs7, byte* key, word32 keySz)
+{
+ if (pkcs7 == NULL || key == NULL || keySz == 0)
+ return BAD_FUNC_ARG;
+
+ pkcs7->privateKey = key;
+ pkcs7->privateKeySz = keySz;
+
+ return 0;
+}
+
+
+/* append data to encrypted content cache in PKCS7 structure
+ * return 0 on success, negative on error */
+static int PKCS7_CacheEncryptedContent(PKCS7* pkcs7, byte* in, word32 inSz)
+{
+ byte* oldCache;
+ word32 oldCacheSz;
+
+ if (pkcs7 == NULL || in == NULL)
+ return BAD_FUNC_ARG;
+
+ /* save pointer to old cache */
+ oldCache = pkcs7->cachedEncryptedContent;
+ oldCacheSz = pkcs7->cachedEncryptedContentSz;
+
+ /* re-allocate new buffer to fit appended data */
+ pkcs7->cachedEncryptedContent = (byte*)XMALLOC(oldCacheSz + inSz,
+ pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (pkcs7->cachedEncryptedContent == NULL) {
+ pkcs7->cachedEncryptedContentSz = 0;
+ XFREE(oldCache, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return MEMORY_E;
+ }
+
+ if (oldCache != NULL) {
+ XMEMCPY(pkcs7->cachedEncryptedContent, oldCache, oldCacheSz);
+ }
+ XMEMCPY(pkcs7->cachedEncryptedContent + oldCacheSz, in, inSz);
+ pkcs7->cachedEncryptedContentSz += inSz;
+
+ XFREE(oldCache, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+
+ return 0;
+}
+
+
+/* unwrap and decrypt PKCS#7 envelopedData object, return decoded size */
+WOLFSSL_API int wc_PKCS7_DecodeEnvelopedData(PKCS7* pkcs7, byte* in,
+ word32 inSz, byte* output,
+ word32 outputSz)
+{
+ int recipFound = 0;
+ int ret, length = 0;
+ word32 idx = 0;
+#ifndef NO_PKCS7_STREAM
+ word32 tmpIdx = 0;
+ long rc;
#endif
+ word32 contentType, encOID = 0;
+ word32 decryptedKeySz = MAX_ENCRYPTED_KEY_SZ;
+
+ int expBlockSz = 0, blockKeySz = 0;
+ byte tmpIvBuf[MAX_CONTENT_IV_SIZE];
+ byte* tmpIv = tmpIvBuf;
+
+ byte* pkiMsg = in;
+ word32 pkiMsgSz = inSz;
+ byte* decryptedKey = NULL;
+ int encryptedContentTotalSz = 0;
+ int encryptedContentSz = 0;
+ byte padLen;
+ byte* encryptedContent = NULL;
+ int explicitOctet = 0;
+ word32 localIdx;
+ byte tag;
+
+ if (pkcs7 == NULL)
+ return BAD_FUNC_ARG;
+
+ if (pkiMsg == NULL || pkiMsgSz == 0 ||
+ output == NULL || outputSz == 0)
+ return BAD_FUNC_ARG;
+
+#ifndef NO_PKCS7_STREAM
+ (void)tmpIv; /* help out static analysis */
+ if (pkcs7->stream == NULL) {
+ if ((ret = wc_PKCS7_CreateStream(pkcs7)) != 0) {
return ret;
}
}
- else if (pkcs7->encryptOID == DES3b) {
- Des3 des3;
+#endif
- ret = wc_Des3_SetKey(&des3, contentKeyPlain, tmpIv, DES_ENCRYPTION);
+ switch (pkcs7->state) {
+ case WC_PKCS7_START:
+ case WC_PKCS7_INFOSET_START:
+ case WC_PKCS7_INFOSET_BER:
+ case WC_PKCS7_INFOSET_STAGE1:
+ case WC_PKCS7_INFOSET_STAGE2:
+ case WC_PKCS7_INFOSET_END:
+ ret = wc_PKCS7_ParseToRecipientInfoSet(pkcs7, pkiMsg, pkiMsgSz,
+ &idx, ENVELOPED_DATA);
+ if (ret < 0) {
+ break;
+ }
- if (ret == 0)
- ret = wc_Des3_CbcEncrypt(&des3, encryptedContent, plain, desOutSz);
+ #ifdef ASN_BER_TO_DER
+ /* check if content was BER and has been converted to DER */
+ if (pkcs7->derSz > 0)
+ pkiMsg = in = pkcs7->der;
+ #endif
- if (ret != 0) {
- XFREE(encryptedContent, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (dynamicFlag)
- XFREE(plain, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(recip, NULL, DYNAMMIC_TYPE_TMP_BUFFER);
+ decryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (decryptedKey == NULL)
+ return MEMORY_E;
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_ENV_2);
+ #ifndef NO_PKCS7_STREAM
+ tmpIdx = idx;
+ pkcs7->stream->aad = decryptedKey;
+ #endif
+ FALL_THROUGH;
+
+ case WC_PKCS7_ENV_2:
+ #ifndef NO_PKCS7_STREAM
+ /* store up enough buffer for initial info set decode */
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_LENGTH_SZ +
+ MAX_VERSION_SZ + ASN_TAG_SZ, &pkiMsg, &idx)) != 0) {
+ return ret;
+ }
+ #endif
+ FALL_THROUGH;
+
+ case WC_PKCS7_DECRYPT_KTRI:
+ case WC_PKCS7_DECRYPT_KTRI_2:
+ case WC_PKCS7_DECRYPT_KTRI_3:
+ case WC_PKCS7_DECRYPT_KARI:
+ case WC_PKCS7_DECRYPT_KEKRI:
+ case WC_PKCS7_DECRYPT_PWRI:
+ case WC_PKCS7_DECRYPT_ORI:
+ #ifndef NO_PKCS7_STREAM
+ decryptedKey = pkcs7->stream->aad;
+ decryptedKeySz = MAX_ENCRYPTED_KEY_SZ;
+ #endif
+
+ ret = wc_PKCS7_DecryptRecipientInfos(pkcs7, in, inSz, &idx,
+ decryptedKey, &decryptedKeySz,
+ &recipFound);
+ if (ret == 0 && recipFound == 0) {
+ WOLFSSL_MSG("No recipient found in envelopedData that matches input");
+ ret = PKCS7_RECIP_E;
+ }
+
+ if (ret != 0)
+ break;
+ #ifndef NO_PKCS7_STREAM
+ tmpIdx = idx;
+ pkcs7->stream->aadSz = decryptedKeySz;
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_ENV_3);
+ FALL_THROUGH;
+
+ case WC_PKCS7_ENV_3:
+
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_LENGTH_SZ +
+ MAX_VERSION_SZ + ASN_TAG_SZ +
+ MAX_LENGTH_SZ, &pkiMsg, &idx))
+ != 0) {
+ return ret;
+ }
+
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+ inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
+ #else
+ ret = 0;
+ #endif
+
+ /* remove EncryptedContentInfo */
+ if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0) {
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0 && wc_GetContentType(pkiMsg, &idx, &contentType,
+ pkiMsgSz) < 0) {
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0 && GetAlgoId(pkiMsg, &idx, &encOID, oidBlkType,
+ pkiMsgSz) < 0) {
+ ret = ASN_PARSE_E;
+ }
+
+ blockKeySz = wc_PKCS7_GetOIDKeySize(encOID);
+ if (ret == 0 && blockKeySz < 0) {
+ ret = blockKeySz;
+ }
+
+ expBlockSz = wc_PKCS7_GetOIDBlockSize(encOID);
+ if (ret == 0 && expBlockSz < 0) {
+ ret = expBlockSz;
+ }
+
+ /* get block cipher IV, stored in OPTIONAL parameter of AlgoID */
+ if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) != 0) {
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0 && tag != ASN_OCTET_STRING) {
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0 && GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0) {
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0 && length != expBlockSz) {
+ WOLFSSL_MSG("Incorrect IV length, must be of content alg block size");
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret != 0)
+ break;
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+ break;
+ }
+ wc_PKCS7_StreamStoreVar(pkcs7, encOID, expBlockSz, length);
+ pkcs7->stream->contentSz = blockKeySz;
+ pkcs7->stream->expected = length + MAX_LENGTH_SZ + MAX_LENGTH_SZ +
+ ASN_TAG_SZ + ASN_TAG_SZ;
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_ENV_4);
+ FALL_THROUGH;
+
+ case WC_PKCS7_ENV_4:
+
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+ pkcs7->stream->expected, &pkiMsg, &idx)) != 0) {
+ return ret;
+ }
+
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+ inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
+
+ wc_PKCS7_StreamGetVar(pkcs7, 0, 0, &length);
+ tmpIv = pkcs7->stream->tmpIv;
+ if (tmpIv == NULL) {
+ /* check added to help out static analysis tool */
+ ret = MEMORY_E;
+ break;
+ }
+ #else
+ ret = 0;
+ #endif
+
+ XMEMCPY(tmpIv, &pkiMsg[idx], length);
+ idx += length;
+
+ explicitOctet = 0;
+ localIdx = idx;
+ if (GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) == 0 &&
+ tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 0)) {
+ explicitOctet = 1;
+ }
+
+ /* read encryptedContent, cont[0] */
+ if (tag != (ASN_CONTEXT_SPECIFIC | 0) &&
+ tag != (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 0)) {
+ ret = ASN_PARSE_E;
+ }
+ idx++;
+
+ if (ret == 0 && GetLength(pkiMsg, &idx, &encryptedContentTotalSz,
+ pkiMsgSz) <= 0) {
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret != 0)
+ break;
+
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+ break;
+ }
+ pkcs7->stream->expected = encryptedContentTotalSz;
+ wc_PKCS7_StreamGetVar(pkcs7, &encOID, &expBlockSz, 0);
+ wc_PKCS7_StreamStoreVar(pkcs7, encOID, expBlockSz, explicitOctet);
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_ENV_5);
+ FALL_THROUGH;
+
+ case WC_PKCS7_ENV_5:
+
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+ pkcs7->stream->expected, &pkiMsg, &idx)) != 0) {
+ return ret;
+ }
+
+ wc_PKCS7_StreamGetVar(pkcs7, &encOID, &expBlockSz, &explicitOctet);
+ tmpIv = pkcs7->stream->tmpIv;
+ encryptedContentTotalSz = pkcs7->stream->expected;
+
+ /* restore decrypted key */
+ decryptedKey = pkcs7->stream->aad;
+ decryptedKeySz = pkcs7->stream->aadSz;
+ blockKeySz = pkcs7->stream->contentSz;
+ #else
+ ret = 0;
+ #endif
+
+ if (explicitOctet) {
+ /* encrypted content may be fragmented into multiple
+ * consecutive OCTET STRINGs, if so loop through
+ * collecting and caching encrypted content bytes */
+ localIdx = idx;
+ while (idx < (localIdx + encryptedContentTotalSz)) {
+
+ if (GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0) {
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0 && (tag != ASN_OCTET_STRING)) {
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0 && GetLength(pkiMsg, &idx,
+ &encryptedContentSz, pkiMsgSz) <= 0) {
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0) {
+ ret = PKCS7_CacheEncryptedContent(pkcs7, &pkiMsg[idx],
+ encryptedContentSz);
+ }
+
+ if (ret != 0) {
+ break;
+ }
+
+ /* advance idx past encrypted content */
+ idx += encryptedContentSz;
+ }
+
+ if (ret != 0) {
+ break;
+ }
+
+ } else {
+ /* cache encrypted content, no OCTET STRING */
+ ret = PKCS7_CacheEncryptedContent(pkcs7, &pkiMsg[idx],
+ encryptedContentTotalSz);
+ if (ret != 0) {
+ break;
+ }
+ idx += encryptedContentTotalSz;
+ }
+
+ /* use cached content */
+ encryptedContent = pkcs7->cachedEncryptedContent;
+ encryptedContentSz = pkcs7->cachedEncryptedContentSz;
+
+ /* decrypt encryptedContent */
+ ret = wc_PKCS7_DecryptContent(pkcs7, encOID, decryptedKey,
+ blockKeySz, tmpIv, expBlockSz, NULL, 0, NULL, 0,
+ encryptedContent, encryptedContentSz, encryptedContent);
+ if (ret != 0) {
+ break;
+ }
+
+ padLen = encryptedContent[encryptedContentSz-1];
+
+ /* copy plaintext to output */
+ if (padLen > encryptedContentSz ||
+ (word32)(encryptedContentSz - padLen) > outputSz) {
+ ret = BUFFER_E;
+ break;
+ }
+ XMEMCPY(output, encryptedContent, encryptedContentSz - padLen);
+
+ /* free memory, zero out keys */
+ ForceZero(decryptedKey, MAX_ENCRYPTED_KEY_SZ);
+ XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (pkcs7->cachedEncryptedContent != NULL) {
+ XFREE(pkcs7->cachedEncryptedContent, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ pkcs7->cachedEncryptedContent = NULL;
+ pkcs7->cachedEncryptedContentSz = 0;
+ }
+
+ ret = encryptedContentSz - padLen;
+ #ifndef NO_PKCS7_STREAM
+ pkcs7->stream->aad = NULL;
+ pkcs7->stream->aadSz = 0;
+ wc_PKCS7_ResetStream(pkcs7);
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START);
+ break;
+
+ default:
+ WOLFSSL_MSG("PKCS#7 unknown decode enveloped state");
+ ret = BAD_FUNC_ARG;
+ }
+
+#ifndef NO_PKCS7_STREAM
+ if (ret < 0 && ret != WC_PKCS7_WANT_READ_E) {
+ wc_PKCS7_ResetStream(pkcs7);
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START);
+ if (pkcs7->cachedEncryptedContent != NULL) {
+ XFREE(pkcs7->cachedEncryptedContent, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ pkcs7->cachedEncryptedContent = NULL;
+ pkcs7->cachedEncryptedContentSz = 0;
+ }
+ }
+#else
+ if (decryptedKey != NULL && ret < 0) {
+ ForceZero(decryptedKey, MAX_ENCRYPTED_KEY_SZ);
+ XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ }
+ if (pkcs7->cachedEncryptedContent != NULL && ret < 0) {
+ XFREE(pkcs7->cachedEncryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ pkcs7->cachedEncryptedContent = NULL;
+ pkcs7->cachedEncryptedContentSz = 0;
+ }
#endif
+ return ret;
+}
+
+
+/* build PKCS#7 authEnvelopedData content type, return enveloped size */
+int wc_PKCS7_EncodeAuthEnvelopedData(PKCS7* pkcs7, byte* output,
+ word32 outputSz)
+{
+#if defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
+ int ret, idx = 0;
+ int totalSz, encryptedOutSz;
+
+ int contentInfoSeqSz, outerContentTypeSz, outerContentSz;
+ byte contentInfoSeq[MAX_SEQ_SZ];
+ byte outerContentType[MAX_ALGO_SZ];
+ byte outerContent[MAX_SEQ_SZ];
+
+ int envDataSeqSz, verSz;
+ byte envDataSeq[MAX_SEQ_SZ];
+ byte ver[MAX_VERSION_SZ];
+
+ WC_RNG rng;
+ int blockSz, blockKeySz;
+ byte* encryptedContent;
+
+ Pkcs7EncodedRecip* tmpRecip = NULL;
+ int recipSz, recipSetSz;
+ byte recipSet[MAX_SET_SZ];
+
+ int encContentOctetSz, encContentSeqSz, contentTypeSz;
+ int contentEncAlgoSz, nonceOctetStringSz, macOctetStringSz;
+ byte encContentSeq[MAX_SEQ_SZ];
+ byte contentType[MAX_ALGO_SZ];
+ byte contentEncAlgo[MAX_ALGO_SZ];
+ byte nonceOctetString[MAX_OCTET_STR_SZ];
+ byte encContentOctet[MAX_OCTET_STR_SZ];
+ byte macOctetString[MAX_OCTET_STR_SZ];
+
+ byte authTag[AES_BLOCK_SIZE];
+ byte nonce[GCM_NONCE_MID_SZ]; /* GCM nonce is larger than CCM */
+ byte macInt[MAX_VERSION_SZ];
+ word32 nonceSz = 0, macIntSz = 0;
+
+ /* authAttribs */
+ byte* flatAuthAttribs = NULL;
+ byte authAttribSet[MAX_SET_SZ];
+ EncodedAttrib authAttribs[MAX_AUTH_ATTRIBS_SZ];
+ word32 authAttribsSz = 0, authAttribsCount = 0;
+ word32 authAttribsSetSz = 0;
+
+ byte* aadBuffer = NULL;
+ word32 aadBufferSz = 0;
+ byte authAttribAadSet[MAX_SET_SZ];
+ word32 authAttribsAadSetSz = 0;
+
+ /* unauthAttribs */
+ byte* flatUnauthAttribs = NULL;
+ byte unauthAttribSet[MAX_SET_SZ];
+ EncodedAttrib unauthAttribs[MAX_UNAUTH_ATTRIBS_SZ];
+ word32 unauthAttribsSz = 0, unauthAttribsCount = 0;
+ word32 unauthAttribsSetSz = 0;
+
+
+ PKCS7Attrib contentTypeAttrib;
+ byte contentTypeValue[MAX_OID_SZ];
+ /* contentType OID (1.2.840.113549.1.9.3) */
+ const byte contentTypeOid[] =
+ { ASN_OBJECT_ID, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xF7, 0x0d, 0x01,
+ 0x09, 0x03 };
+
+ if (pkcs7 == NULL || pkcs7->content == NULL || pkcs7->contentSz == 0)
+ return BAD_FUNC_ARG;
+
+ if (output == NULL || outputSz == 0)
+ return BAD_FUNC_ARG;
+
+ switch (pkcs7->encryptOID) {
+#ifdef HAVE_AESGCM
+ #ifdef WOLFSSL_AES_128
+ case AES128GCMb:
+ break;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES192GCMb:
+ break;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES256GCMb:
+ break;
+ #endif
+#endif
+#ifdef HAVE_AESCCM
+ #ifdef WOLFSSL_AES_128
+ case AES128CCMb:
+ break;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES192CCMb:
+ break;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES256CCMb:
+ break;
+ #endif
+#endif
+ default:
+ WOLFSSL_MSG("CMS AuthEnvelopedData must use AES-GCM or AES-CCM");
+ return BAD_FUNC_ARG;
+ }
+
+ blockKeySz = wc_PKCS7_GetOIDKeySize(pkcs7->encryptOID);
+ if (blockKeySz < 0)
+ return blockKeySz;
+
+ blockSz = wc_PKCS7_GetOIDBlockSize(pkcs7->encryptOID);
+ if (blockSz < 0)
+ return blockSz;
+
+ /* outer content type */
+ ret = wc_SetContentType(AUTH_ENVELOPED_DATA, outerContentType,
+ sizeof(outerContentType));
+ if (ret < 0)
+ return ret;
+
+ outerContentTypeSz = ret;
+
+ /* version, defined as 0 in RFC 5083 */
+ verSz = SetMyVersion(0, ver, 0);
+
+ /* generate random content encryption key */
+ ret = PKCS7_GenerateContentEncryptionKey(pkcs7, blockKeySz);
+ if (ret != 0) {
+ return ret;
+ }
+
+ /* build RecipientInfo, only if user manually set singleCert and size */
+ if (pkcs7->singleCert != NULL && pkcs7->singleCertSz > 0) {
+ switch (pkcs7->publicKeyOID) {
+ #ifndef NO_RSA
+ case RSAk:
+ ret = wc_PKCS7_AddRecipient_KTRI(pkcs7, pkcs7->singleCert,
+ pkcs7->singleCertSz, 0);
+ break;
+ #endif
+ #ifdef HAVE_ECC
+ case ECDSAk:
+ ret = wc_PKCS7_AddRecipient_KARI(pkcs7, pkcs7->singleCert,
+ pkcs7->singleCertSz,
+ pkcs7->keyWrapOID,
+ pkcs7->keyAgreeOID, pkcs7->ukm,
+ pkcs7->ukmSz, 0);
+ break;
+ #endif
+
+ default:
+ WOLFSSL_MSG("Unsupported RecipientInfo public key type");
+ return BAD_FUNC_ARG;
+ };
+
+ if (ret < 0) {
+ WOLFSSL_MSG("Failed to create RecipientInfo");
return ret;
}
}
- encContentOctetSz = SetImplicit(ASN_OCTET_STRING, 0,
- desOutSz, encContentOctet);
+ recipSz = wc_PKCS7_GetRecipientListSize(pkcs7);
+ if (recipSz < 0) {
+ return ret;
+
+ } else if (recipSz == 0) {
+ WOLFSSL_MSG("You must add at least one CMS recipient");
+ return PKCS7_RECIP_E;
+ }
+ recipSetSz = SetSet(recipSz, recipSet);
+
+ /* generate random nonce and IV for encryption */
+ switch (pkcs7->encryptOID) {
+#ifdef HAVE_AESGCM
+ #ifdef WOLFSSL_AES_128
+ case AES128GCMb:
+ FALL_THROUGH;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES192GCMb:
+ FALL_THROUGH;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES256GCMb:
+ #endif
+ #if defined(WOLFSSL_AES_128) || defined(WOLFSSL_AES_192) || \
+ defined(WOLFSSL_AES_256)
+ /* GCM nonce is GCM_NONCE_MID_SZ (12) */
+ nonceSz = GCM_NONCE_MID_SZ;
+ break;
+ #endif
+#endif /* HAVE_AESGCM */
+#ifdef HAVE_AESCCM
+ #ifdef WOLFSSL_AES_128
+ case AES128CCMb:
+ FALL_THROUGH;
+ #endif
+ #ifdef WOLFSSL_AES_192
+ case AES192CCMb:
+ FALL_THROUGH;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ case AES256CCMb:
+ #endif
+ #if defined(WOLFSSL_AES_128) || defined(WOLFSSL_AES_192) || \
+ defined(WOLFSSL_AES_256)
+ /* CCM nonce is CCM_NONCE_MIN_SZ (7) */
+ nonceSz = CCM_NONCE_MIN_SZ;
+ break;
+ #endif
+#endif /* HAVE_AESCCM */
+ }
+
+ ret = wc_InitRng_ex(&rng, pkcs7->heap, pkcs7->devId);
+ if (ret != 0)
+ return ret;
+
+ ret = wc_PKCS7_GenerateBlock(pkcs7, &rng, nonce, nonceSz);
+ wc_FreeRng(&rng);
+ if (ret != 0) {
+ return ret;
+ }
+
+
+ /* authAttribs: add contentType attrib if needed */
+ if (pkcs7->contentOID != DATA) {
+
+ /* if type is not id-data, contentType attribute MUST be added */
+ contentTypeAttrib.oid = contentTypeOid;
+ contentTypeAttrib.oidSz = sizeof(contentTypeOid);
+
+ /* try to set from contentOID first, known types */
+ ret = wc_SetContentType(pkcs7->contentOID, contentTypeValue,
+ sizeof(contentTypeValue));
+ if (ret > 0) {
+ contentTypeAttrib.value = contentTypeValue;
+ contentTypeAttrib.valueSz = ret;
+
+ /* otherwise, try to set from custom content type */
+ } else {
+ if (pkcs7->contentTypeSz == 0) {
+ WOLFSSL_MSG("CMS pkcs7->contentType must be set if "
+ "contentOID is not");
+ return BAD_FUNC_ARG;
+ }
+ contentTypeAttrib.value = pkcs7->contentType;
+ contentTypeAttrib.valueSz = pkcs7->contentTypeSz;
+ }
+
+ authAttribsSz += EncodeAttributes(authAttribs, 1,
+ &contentTypeAttrib, 1);
+ authAttribsCount += 1;
+ }
+
+ /* authAttribs: add in user authenticated attributes */
+ if (pkcs7->authAttribs != NULL && pkcs7->authAttribsSz > 0) {
+ authAttribsSz += EncodeAttributes(authAttribs + authAttribsCount,
+ MAX_AUTH_ATTRIBS_SZ - authAttribsCount,
+ pkcs7->authAttribs,
+ pkcs7->authAttribsSz);
+ authAttribsCount += pkcs7->authAttribsSz;
+ }
+
+ /* authAttribs: flatten authAttribs */
+ if (authAttribsSz > 0 && authAttribsCount > 0) {
+ flatAuthAttribs = (byte*)XMALLOC(authAttribsSz, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (flatAuthAttribs == NULL) {
+ return MEMORY_E;
+ }
+
+ FlattenAttributes(pkcs7, flatAuthAttribs, authAttribs,
+ authAttribsCount);
+
+ authAttribsSetSz = SetImplicit(ASN_SET, 1, authAttribsSz,
+ authAttribSet);
+
+ /* From RFC5083, "For the purpose of constructing the AAD, the
+ * IMPLICIT [1] tag in the authAttrs field is not used for the
+ * DER encoding: rather a universal SET OF tag is used. */
+ authAttribsAadSetSz = SetSet(authAttribsSz, authAttribAadSet);
+
+ /* allocate temp buffer to hold alternate attrib encoding for aad */
+ aadBuffer = (byte*)XMALLOC(authAttribsSz + authAttribsAadSetSz,
+ pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (aadBuffer == NULL) {
+ XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return MEMORY_E;
+ }
+
+ /* build up alternate attrib encoding for aad */
+ aadBufferSz = 0;
+ XMEMCPY(aadBuffer + aadBufferSz, authAttribAadSet, authAttribsAadSetSz);
+ aadBufferSz += authAttribsAadSetSz;
+ XMEMCPY(aadBuffer + aadBufferSz, flatAuthAttribs, authAttribsSz);
+ aadBufferSz += authAttribsSz;
+ }
+
+ /* build up unauthenticated attributes (unauthAttrs) */
+ if (pkcs7->unauthAttribsSz > 0) {
+ unauthAttribsSz = EncodeAttributes(unauthAttribs + unauthAttribsCount,
+ MAX_UNAUTH_ATTRIBS_SZ - unauthAttribsCount,
+ pkcs7->unauthAttribs,
+ pkcs7->unauthAttribsSz);
+ unauthAttribsCount = pkcs7->unauthAttribsSz;
+
+ flatUnauthAttribs = (byte*)XMALLOC(unauthAttribsSz, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (flatUnauthAttribs == NULL) {
+ if (aadBuffer)
+ XFREE(aadBuffer, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (flatAuthAttribs)
+ XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return MEMORY_E;
+ }
+
+ FlattenAttributes(pkcs7, flatUnauthAttribs, unauthAttribs,
+ unauthAttribsCount);
+ unauthAttribsSetSz = SetImplicit(ASN_SET, 2, unauthAttribsSz,
+ unauthAttribSet);
+ }
+
+ /* allocate encrypted content buffer */
+ encryptedOutSz = pkcs7->contentSz;
+ encryptedContent = (byte*)XMALLOC(encryptedOutSz, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (encryptedContent == NULL) {
+ if (aadBuffer)
+ XFREE(aadBuffer, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (flatUnauthAttribs)
+ XFREE(flatUnauthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (flatAuthAttribs)
+ XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return MEMORY_E;
+ }
+
+ /* encrypt content */
+ ret = wc_PKCS7_EncryptContent(pkcs7->encryptOID, pkcs7->cek,
+ pkcs7->cekSz, nonce, nonceSz, aadBuffer, aadBufferSz, authTag,
+ sizeof(authTag), pkcs7->content, encryptedOutSz, encryptedContent);
+
+ if (aadBuffer) {
+ XFREE(aadBuffer, pkcs7->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ aadBuffer = NULL;
+ }
+
+ if (ret != 0) {
+ if (flatUnauthAttribs)
+ XFREE(flatUnauthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (flatAuthAttribs)
+ XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ /* EncryptedContentInfo */
+ ret = wc_SetContentType(pkcs7->contentOID, contentType,
+ sizeof(contentType));
+ if (ret < 0) {
+ if (flatUnauthAttribs)
+ XFREE(flatUnauthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (flatAuthAttribs)
+ XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ contentTypeSz = ret;
+
+ /* put together nonce OCTET STRING */
+ nonceOctetStringSz = SetOctetString(nonceSz, nonceOctetString);
+
+ /* put together aes-ICVlen INTEGER */
+ macIntSz = SetMyVersion(sizeof(authTag), macInt, 0);
+
+ /* build up our ContentEncryptionAlgorithmIdentifier sequence,
+ * adding (nonceOctetStringSz + blockSz + macIntSz) for nonce OCTET STRING
+ * and tag size */
+ contentEncAlgoSz = SetAlgoID(pkcs7->encryptOID, contentEncAlgo,
+ oidBlkType, nonceOctetStringSz + nonceSz +
+ macIntSz);
+
+ if (contentEncAlgoSz == 0) {
+ if (flatUnauthAttribs)
+ XFREE(flatUnauthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (flatAuthAttribs)
+ XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return BAD_FUNC_ARG;
+ }
+
+ encContentOctetSz = SetImplicit(ASN_OCTET_STRING, 0, encryptedOutSz,
+ encContentOctet);
encContentSeqSz = SetSequence(contentTypeSz + contentEncAlgoSz +
- ivOctetStringSz + DES_BLOCK_SIZE +
- encContentOctetSz + desOutSz, encContentSeq);
+ nonceOctetStringSz + nonceSz + macIntSz +
+ encContentOctetSz + encryptedOutSz,
+ encContentSeq);
+
+ macOctetStringSz = SetOctetString(sizeof(authTag), macOctetString);
/* keep track of sizes for outer wrapper layering */
totalSz = verSz + recipSetSz + recipSz + encContentSeqSz + contentTypeSz +
- contentEncAlgoSz + ivOctetStringSz + DES_BLOCK_SIZE +
- encContentOctetSz + desOutSz;
+ contentEncAlgoSz + nonceOctetStringSz + nonceSz + macIntSz +
+ encContentOctetSz + encryptedOutSz + authAttribsSz +
+ authAttribsSetSz + macOctetStringSz + sizeof(authTag) +
+ unauthAttribsSz + unauthAttribsSetSz;
/* EnvelopedData */
envDataSeqSz = SetSequence(totalSz, envDataSeq);
@@ -1417,12 +10836,11 @@ int wc_PKCS7_EncodeEnvelopedData(PKCS7* pkcs7, byte* output, word32 outputSz)
if (totalSz > (int)outputSz) {
WOLFSSL_MSG("Pkcs7_encrypt output buffer too small");
- XFREE(encryptedContent, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (dynamicFlag)
- XFREE(plain, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(recip, NULL, DYNAMMIC_TYPE_TMP_BUFFER);
-#endif
+ if (flatUnauthAttribs)
+ XFREE(flatUnauthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (flatAuthAttribs)
+ XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
return BUFFER_E;
}
@@ -1438,412 +10856,1659 @@ int wc_PKCS7_EncodeEnvelopedData(PKCS7* pkcs7, byte* output, word32 outputSz)
idx += verSz;
XMEMCPY(output + idx, recipSet, recipSetSz);
idx += recipSetSz;
- XMEMCPY(output + idx, recip, recipSz);
- idx += recipSz;
+ /* copy in recipients from list */
+ tmpRecip = pkcs7->recipList;
+ while (tmpRecip != NULL) {
+ XMEMCPY(output + idx, tmpRecip->recip, tmpRecip->recipSz);
+ idx += tmpRecip->recipSz;
+ tmpRecip = tmpRecip->next;
+ }
+ wc_PKCS7_FreeEncodedRecipientSet(pkcs7);
XMEMCPY(output + idx, encContentSeq, encContentSeqSz);
idx += encContentSeqSz;
XMEMCPY(output + idx, contentType, contentTypeSz);
idx += contentTypeSz;
XMEMCPY(output + idx, contentEncAlgo, contentEncAlgoSz);
idx += contentEncAlgoSz;
- XMEMCPY(output + idx, ivOctetString, ivOctetStringSz);
- idx += ivOctetStringSz;
- XMEMCPY(output + idx, tmpIv, DES_BLOCK_SIZE);
- idx += DES_BLOCK_SIZE;
+ XMEMCPY(output + idx, nonceOctetString, nonceOctetStringSz);
+ idx += nonceOctetStringSz;
+ XMEMCPY(output + idx, nonce, nonceSz);
+ idx += nonceSz;
+ XMEMCPY(output + idx, macInt, macIntSz);
+ idx += macIntSz;
XMEMCPY(output + idx, encContentOctet, encContentOctetSz);
idx += encContentOctetSz;
- XMEMCPY(output + idx, encryptedContent, desOutSz);
- idx += desOutSz;
+ XMEMCPY(output + idx, encryptedContent, encryptedOutSz);
+ idx += encryptedOutSz;
+
+ /* authenticated attributes */
+ if (flatAuthAttribs && authAttribsSz > 0) {
+ XMEMCPY(output + idx, authAttribSet, authAttribsSetSz);
+ idx += authAttribsSetSz;
+ XMEMCPY(output + idx, flatAuthAttribs, authAttribsSz);
+ idx += authAttribsSz;
+ XFREE(flatAuthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ }
- ForceZero(contentKeyPlain, MAX_CONTENT_KEY_LEN);
+ XMEMCPY(output + idx, macOctetString, macOctetStringSz);
+ idx += macOctetStringSz;
+ XMEMCPY(output + idx, authTag, sizeof(authTag));
+ idx += sizeof(authTag);
+
+ /* unauthenticated attributes */
+ if (unauthAttribsSz > 0) {
+ XMEMCPY(output + idx, unauthAttribSet, unauthAttribsSetSz);
+ idx += unauthAttribsSetSz;
+ XMEMCPY(output + idx, flatUnauthAttribs, unauthAttribsSz);
+ idx += unauthAttribsSz;
+ }
- if (dynamicFlag)
- XFREE(plain, NULL, DYNAMMIC_TYPE_TMP_BUFFER);
- XFREE(encryptedContent, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(recip, NULL, DYNAMMIC_TYPE_TMP_BUFFER);
-#endif
+ if (flatUnauthAttribs != NULL) {
+ XFREE(flatUnauthAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ }
+
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
return idx;
+
+#else
+ WOLFSSL_MSG("AuthEnvelopedData requires AES-GCM or AES-CCM to be enabled");
+ (void)pkcs7;
+ (void)output;
+ (void)outputSz;
+
+ return NOT_COMPILED_IN;
+#endif /* HAVE_AESGCM | HAVE_AESCCM */
}
-/* unwrap and decrypt PKCS#7 envelopedData object, return decoded size */
-WOLFSSL_API int wc_PKCS7_DecodeEnvelopedData(PKCS7* pkcs7, byte* pkiMsg,
- word32 pkiMsgSz, byte* output,
- word32 outputSz)
+
+/* unwrap and decrypt PKCS#7 AuthEnvelopedData object, return decoded size */
+WOLFSSL_API int wc_PKCS7_DecodeAuthEnvelopedData(PKCS7* pkcs7, byte* in,
+ word32 inSz, byte* output,
+ word32 outputSz)
{
+#if defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
int recipFound = 0;
- int ret, version, length;
- word32 savedIdx = 0, idx = 0;
- word32 contentType, encOID;
- byte issuerHash[SHA_DIGEST_SIZE];
+ int ret = 0, length;
+ word32 idx = 0;
+#ifndef NO_PKCS7_STREAM
+ word32 tmpIdx = 0;
+ long rc;
+#endif
+ word32 contentType, encOID = 0;
+ word32 decryptedKeySz = 0;
+ byte* pkiMsg = in;
+ word32 pkiMsgSz = inSz;
- int encryptedKeySz, keySz;
- byte tmpIv[DES_BLOCK_SIZE];
- byte* decryptedKey = NULL;
+ int expBlockSz = 0, blockKeySz = 0;
+ byte authTag[AES_BLOCK_SIZE];
+ byte nonce[GCM_NONCE_MID_SZ]; /* GCM nonce is larger than CCM */
+ int nonceSz = 0, authTagSz = 0, macSz = 0;
#ifdef WOLFSSL_SMALL_STACK
- mp_int* serialNum;
- byte* encryptedKey;
- RsaKey* privKey;
+ byte* decryptedKey = NULL;
#else
- mp_int stack_serialNum;
- mp_int* serialNum = &stack_serialNum;
- byte encryptedKey[MAX_ENCRYPTED_KEY_SZ];
-
- RsaKey stack_privKey;
- RsaKey* privKey = &stack_privKey;
+ byte decryptedKey[MAX_ENCRYPTED_KEY_SZ];
#endif
- int encryptedContentSz;
- byte padLen;
+ int encryptedContentSz = 0;
byte* encryptedContent = NULL;
+ int explicitOctet = 0;
+
+ byte authAttribSetByte = 0;
+ byte* encodedAttribs = NULL;
+ word32 encodedAttribIdx = 0, encodedAttribSz = 0;
+ byte* authAttrib = NULL;
+ int authAttribSz = 0;
+ word32 localIdx;
+ byte tag;
- if (pkcs7 == NULL || pkcs7->singleCert == NULL ||
- pkcs7->singleCertSz == 0 || pkcs7->privateKey == NULL ||
- pkcs7->privateKeySz == 0)
+ if (pkcs7 == NULL)
return BAD_FUNC_ARG;
if (pkiMsg == NULL || pkiMsgSz == 0 ||
output == NULL || outputSz == 0)
return BAD_FUNC_ARG;
+#ifndef NO_PKCS7_STREAM
+ if (pkcs7->stream == NULL) {
+ if ((ret = wc_PKCS7_CreateStream(pkcs7)) != 0) {
+ return ret;
+ }
+ }
+#endif
- /* read past ContentInfo, verify type is envelopedData */
- if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
- return ASN_PARSE_E;
+ switch (pkcs7->state) {
+ case WC_PKCS7_START:
+ case WC_PKCS7_INFOSET_START:
+ case WC_PKCS7_INFOSET_STAGE1:
+ case WC_PKCS7_INFOSET_STAGE2:
+ case WC_PKCS7_INFOSET_END:
+ ret = wc_PKCS7_ParseToRecipientInfoSet(pkcs7, pkiMsg, pkiMsgSz,
+ &idx, AUTH_ENVELOPED_DATA);
+ if (ret < 0)
+ break;
+
+ #ifndef NO_PKCS7_STREAM
+ tmpIdx = idx;
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_2);
+ FALL_THROUGH;
+
+ case WC_PKCS7_AUTHENV_2:
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_LENGTH_SZ +
+ MAX_VERSION_SZ + ASN_TAG_SZ, &pkiMsg, &idx)) != 0) {
+ break;
+ }
+ #endif
+ #ifdef WOLFSSL_SMALL_STACK
+ decryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (decryptedKey == NULL) {
+ ret = MEMORY_E;
+ break;
+ }
+ #ifndef NO_PKCS7_STREAM
+ pkcs7->stream->key = decryptedKey;
+ #endif
+ #endif
+ FALL_THROUGH;
+
+ case WC_PKCS7_DECRYPT_KTRI:
+ case WC_PKCS7_DECRYPT_KTRI_2:
+ case WC_PKCS7_DECRYPT_KTRI_3:
+ case WC_PKCS7_DECRYPT_KARI:
+ case WC_PKCS7_DECRYPT_KEKRI:
+ case WC_PKCS7_DECRYPT_PWRI:
+ case WC_PKCS7_DECRYPT_ORI:
+
+ decryptedKeySz = MAX_ENCRYPTED_KEY_SZ;
+ #ifdef WOLFSSL_SMALL_STACK
+ #ifndef NO_PKCS7_STREAM
+ decryptedKey = pkcs7->stream->key;
+ #endif
+ #endif
+
+ ret = wc_PKCS7_DecryptRecipientInfos(pkcs7, in, inSz, &idx,
+ decryptedKey, &decryptedKeySz,
+ &recipFound);
+ if (ret != 0) {
+ break;
+ }
- if (wc_GetContentType(pkiMsg, &idx, &contentType, pkiMsgSz) < 0)
- return ASN_PARSE_E;
+ if (recipFound == 0) {
+ WOLFSSL_MSG("No recipient found in envelopedData that matches input");
+ ret = PKCS7_RECIP_E;
+ break;
+ }
- if (contentType != ENVELOPED_DATA) {
- WOLFSSL_MSG("PKCS#7 input not of type EnvelopedData");
- return PKCS7_OID_E;
- }
+ #ifndef NO_PKCS7_STREAM
+ tmpIdx = idx;
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_3);
+ FALL_THROUGH;
+
+ case WC_PKCS7_AUTHENV_3:
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_SEQ_SZ +
+ MAX_ALGO_SZ + MAX_ALGO_SZ + ASN_TAG_SZ,
+ &pkiMsg, &idx)) != 0) {
+ break;
+ }
- if (pkiMsg[idx++] != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
- return ASN_PARSE_E;
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK,
+ in, inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
+ #endif
- if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
- return ASN_PARSE_E;
+ /* remove EncryptedContentInfo */
+ if (ret == 0 && GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0) {
+ ret = ASN_PARSE_E;
+ }
- /* remove EnvelopedData and version */
- if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
- return ASN_PARSE_E;
+ if (ret == 0 && wc_GetContentType(pkiMsg, &idx, &contentType,
+ pkiMsgSz) < 0) {
+ ret = ASN_PARSE_E;
+ }
- if (GetMyVersion(pkiMsg, &idx, &version) < 0)
- return ASN_PARSE_E;
+ if (ret == 0 && GetAlgoId(pkiMsg, &idx, &encOID, oidBlkType,
+ pkiMsgSz) < 0) {
+ ret = ASN_PARSE_E;
+ }
- if (version != 0) {
- WOLFSSL_MSG("PKCS#7 envelopedData needs to be of version 0");
- return ASN_VERSION_E;
- }
+ blockKeySz = wc_PKCS7_GetOIDKeySize(encOID);
+ if (ret == 0 && blockKeySz < 0) {
+ ret = blockKeySz;
+ }
- /* walk through RecipientInfo set, find correct recipient */
- if (GetSet(pkiMsg, &idx, &length, pkiMsgSz) < 0)
- return ASN_PARSE_E;
-
-#ifdef WOLFSSL_SMALL_STACK
- encryptedKey = (byte*)XMALLOC(MAX_ENCRYPTED_KEY_SZ, NULL,
- DYNAMIC_TYPE_TMP_BUFFER);
- if (encryptedKey == NULL)
- return MEMORY_E;
-#endif
-
- savedIdx = idx;
- recipFound = 0;
+ expBlockSz = wc_PKCS7_GetOIDBlockSize(encOID);
+ if (ret == 0 && expBlockSz < 0) {
+ ret = expBlockSz;
+ }
- /* when looking for next recipient, use first sequence and version to
- * indicate there is another, if not, move on */
- while(recipFound == 0) {
+ /* get nonce, stored in OPTIONAL parameter of AlgoID */
+ if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0) {
+ ret = ASN_PARSE_E;
+ }
- /* remove RecipientInfo, if we don't have a SEQUENCE, back up idx to
- * last good saved one */
- if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0) {
- idx = savedIdx;
- break;
- }
+ if (ret == 0 && tag != ASN_OCTET_STRING) {
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret < 0)
+ break;
+
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+ break;
+ }
+ wc_PKCS7_StreamStoreVar(pkcs7, encOID, blockKeySz, 0);
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_4);
+ FALL_THROUGH;
+
+ case WC_PKCS7_AUTHENV_4:
+
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_LENGTH_SZ +
+ MAX_VERSION_SZ + ASN_TAG_SZ + MAX_LENGTH_SZ,
+ &pkiMsg, &idx)) != 0) {
+ break;
+ }
+
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+ inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
+ #endif
+ if (ret == 0 && GetLength(pkiMsg, &idx, &nonceSz, pkiMsgSz) < 0) {
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0 && nonceSz > (int)sizeof(nonce)) {
+ WOLFSSL_MSG("AuthEnvelopedData nonce too large for buffer");
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0) {
+ XMEMCPY(nonce, &pkiMsg[idx], nonceSz);
+ idx += nonceSz;
+ }
+
+ /* get mac size, also stored in OPTIONAL parameter of AlgoID */
+ if (ret == 0 && GetMyVersion(pkiMsg, &idx, &macSz, pkiMsgSz) < 0) {
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0) {
+ explicitOctet = 0;
+ localIdx = idx;
+ if (GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) == 0 &&
+ tag == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 0))
+ explicitOctet = 1;
+
+ /* read encryptedContent, cont[0] */
+ ret = GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz);
+ }
+
+ if (ret == 0 &&
+ tag != (ASN_CONTEXT_SPECIFIC | 0) &&
+ tag != (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | 0)) {
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0 && GetLength(pkiMsg, &idx, &encryptedContentSz,
+ pkiMsgSz) <= 0) {
+ ret = ASN_PARSE_E;
+ }
+
+ if (explicitOctet) {
+ if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0) {
+ ret = ASN_PARSE_E;
+ }
+ if (ret == 0 && tag != ASN_OCTET_STRING) {
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0 && GetLength(pkiMsg, &idx, &encryptedContentSz,
+ pkiMsgSz) <= 0) {
+ ret = ASN_PARSE_E;
+ }
+ }
+
+ if (ret < 0)
+ break;
+
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+ break;
+ }
+
+ /* store nonce for later */
+ if (nonceSz > 0) {
+ pkcs7->stream->nonceSz = nonceSz;
+ pkcs7->stream->nonce = (byte*)XMALLOC(nonceSz, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (pkcs7->stream->nonce == NULL) {
+ ret = MEMORY_E;
+ break;
+ }
+ else {
+ XMEMCPY(pkcs7->stream->nonce, nonce, nonceSz);
+ }
+ }
+
+ pkcs7->stream->expected = encryptedContentSz;
+ wc_PKCS7_StreamStoreVar(pkcs7, encOID, blockKeySz,
+ encryptedContentSz);
+ #endif
- if (GetMyVersion(pkiMsg, &idx, &version) < 0) {
- idx = savedIdx;
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_5);
+ FALL_THROUGH;
+
+ case WC_PKCS7_AUTHENV_5:
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_LENGTH_SZ +
+ ASN_TAG_SZ + ASN_TAG_SZ + pkcs7->stream->expected,
+ &pkiMsg, &idx)) != 0) {
+ break;
+ }
+
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+ inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
+
+ encryptedContentSz = pkcs7->stream->expected;
+ #endif
+
+ encryptedContent = (byte*)XMALLOC(encryptedContentSz, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (ret == 0 && encryptedContent == NULL) {
+ ret = MEMORY_E;
+ }
+
+ if (ret == 0) {
+ XMEMCPY(encryptedContent, &pkiMsg[idx], encryptedContentSz);
+ idx += encryptedContentSz;
+ }
+ #ifndef NO_PKCS7_STREAM
+ pkcs7->stream->bufferPt = encryptedContent;
+ #endif
+
+ /* may have IMPLICIT [1] authenticatedAttributes */
+ localIdx = idx;
+ if (ret == 0 && GetASNTag(pkiMsg, &localIdx, &tag, pkiMsgSz) == 0 &&
+ tag == (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1)) {
+ encodedAttribIdx = idx;
+ encodedAttribs = pkiMsg + idx;
+ idx++;
+
+ if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+ ret = ASN_PARSE_E;
+ #ifndef NO_PKCS7_STREAM
+ pkcs7->stream->expected = length;
+ #endif
+ encodedAttribSz = length + (idx - encodedAttribIdx);
+
+ if (ret != 0)
+ break;
+
+ #ifndef NO_PKCS7_STREAM
+ if (encodedAttribSz > 0) {
+ pkcs7->stream->aadSz = encodedAttribSz;
+ pkcs7->stream->aad = (byte*)XMALLOC(encodedAttribSz,
+ pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (pkcs7->stream->aad == NULL) {
+ ret = MEMORY_E;
+ break;
+ }
+ else {
+ XMEMCPY(pkcs7->stream->aad, encodedAttribs,
+ (idx - encodedAttribIdx));
+ }
+ }
+
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+ break;
+ }
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_ATRB);
+ }
+ else {
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+ break;
+ }
+ #endif
+ goto authenv_atrbend; /* jump over attribute cases */
+ }
+ FALL_THROUGH;
+
+ case WC_PKCS7_AUTHENV_ATRB:
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+ pkcs7->stream->expected, &pkiMsg, &idx)) != 0) {
+ return ret;
+ }
+
+ length = pkcs7->stream->expected;
+ encodedAttribs = pkcs7->stream->aad;
+ #else
+ length = 0;
+ #endif
+
+ /* save pointer and length */
+ authAttrib = &pkiMsg[idx];
+ authAttribSz = length;
+
+ if (ret == 0 && wc_PKCS7_ParseAttribs(pkcs7, authAttrib, authAttribSz) < 0) {
+ WOLFSSL_MSG("Error parsing authenticated attributes");
+ ret = ASN_PARSE_E;
+ break;
+ }
+
+ idx += length;
+
+ #ifndef NO_PKCS7_STREAM
+ if (encodedAttribSz > 0) {
+ XMEMCPY(pkcs7->stream->aad + (encodedAttribSz - length),
+ authAttrib, authAttribSz);
+ }
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+ break;
+ }
+
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_ATRBEND);
+ FALL_THROUGH;
+
+authenv_atrbend:
+ case WC_PKCS7_AUTHENV_ATRBEND:
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_LENGTH_SZ +
+ ASN_TAG_SZ, &pkiMsg, &idx)) != 0) {
+ return ret;
+ }
+
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK,
+ in, inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
+
+ if (pkcs7->stream->aadSz > 0) {
+ encodedAttribSz = pkcs7->stream->aadSz;
+ encodedAttribs = pkcs7->stream->aad;
+ }
+ #endif
+
+
+ /* get authTag OCTET STRING */
+ if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0) {
+ ret = ASN_PARSE_E;
+ }
+ if (ret == 0 && tag != ASN_OCTET_STRING) {
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0 && GetLength(pkiMsg, &idx, &authTagSz, pkiMsgSz) < 0) {
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0 && authTagSz > (int)sizeof(authTag)) {
+ WOLFSSL_MSG("AuthEnvelopedData authTag too large for buffer");
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret == 0) {
+ XMEMCPY(authTag, &pkiMsg[idx], authTagSz);
+ idx += authTagSz;
+ }
+
+ if (ret == 0 && authAttrib != NULL) {
+ /* temporarily swap authAttribs byte[0] to SET OF instead of
+ * IMPLICIT [1], for aad calculation */
+ authAttribSetByte = encodedAttribs[0];
+
+ encodedAttribs[0] = ASN_SET | ASN_CONSTRUCTED;
+ }
+
+ if (ret < 0)
+ break;
+
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+ break;
+ }
+ pkcs7->stream->expected = (pkcs7->stream->maxLen -
+ pkcs7->stream->totalRd) + pkcs7->stream->length;
+
+
+ /* store tag for later */
+ if (authTagSz > 0) {
+ pkcs7->stream->tagSz = authTagSz;
+ pkcs7->stream->tag = (byte*)XMALLOC(authTagSz, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (pkcs7->stream->tag == NULL) {
+ ret = MEMORY_E;
+ break;
+ }
+ else {
+ XMEMCPY(pkcs7->stream->tag, authTag, authTagSz);
+ }
+ }
+
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_AUTHENV_6);
+ FALL_THROUGH;
+
+ case WC_PKCS7_AUTHENV_6:
+ #ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+ pkcs7->stream->expected, &pkiMsg, &idx)) != 0) {
+ break;
+ }
+
+ /* restore all variables needed */
+ if (pkcs7->stream->nonceSz > 0) {
+ nonceSz = pkcs7->stream->nonceSz;
+ if (nonceSz > GCM_NONCE_MID_SZ) {
+ WOLFSSL_MSG("PKCS7 saved nonce is too large");
+ ret = BUFFER_E;
+ break;
+ }
+ else {
+ XMEMCPY(nonce, pkcs7->stream->nonce, nonceSz);
+ }
+ }
+
+ if (pkcs7->stream->tagSz > 0) {
+ authTagSz = pkcs7->stream->tagSz;
+ if (authTagSz > AES_BLOCK_SIZE) {
+ WOLFSSL_MSG("PKCS7 saved tag is too large");
+ ret = BUFFER_E;
+ break;
+ }
+ else {
+ XMEMCPY(authTag, pkcs7->stream->tag, authTagSz);
+ }
+ }
+
+ if (pkcs7->stream->aadSz > 0) {
+ encodedAttribSz = pkcs7->stream->aadSz;
+ encodedAttribs = pkcs7->stream->aad;
+ }
+
+ wc_PKCS7_StreamGetVar(pkcs7, &encOID, &blockKeySz,
+ &encryptedContentSz);
+ encryptedContent = pkcs7->stream->bufferPt;
+ #ifdef WOLFSSL_SMALL_STACK
+ decryptedKey = pkcs7->stream->key;
+ #endif
+ #endif
+
+ /* decrypt encryptedContent */
+ ret = wc_PKCS7_DecryptContent(pkcs7, encOID, decryptedKey,
+ blockKeySz, nonce, nonceSz, encodedAttribs, encodedAttribSz,
+ authTag, authTagSz, encryptedContent, encryptedContentSz,
+ encryptedContent);
+ if (ret != 0) {
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ if (authAttrib != NULL) {
+ /* restore authAttrib IMPLICIT [1] */
+ encodedAttribs[0] = authAttribSetByte;
+ }
+
+ /* copy plaintext to output */
+ XMEMCPY(output, encryptedContent, encryptedContentSz);
+
+ /* free memory, zero out keys */
+ ForceZero(encryptedContent, encryptedContentSz);
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ ForceZero(decryptedKey, MAX_ENCRYPTED_KEY_SZ);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ decryptedKey = NULL;
+ #ifdef WOLFSSL_SMALL_STACK
+ #ifndef NO_PKCS7_STREAM
+ pkcs7->stream->key = NULL;
+ #endif
+ #endif
+ #endif
+ ret = encryptedContentSz;
+ #ifndef NO_PKCS7_STREAM
+ wc_PKCS7_ResetStream(pkcs7);
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START);
break;
- }
+ default:
+ WOLFSSL_MSG("Unknown PKCS7 state");
+ ret = BAD_FUNC_ARG;
+ }
- if (version != 0) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
- return ASN_VERSION_E;
+ if (ret != 0 && ret != WC_PKCS7_WANT_READ_E) {
+ if (decryptedKey != NULL) {
+ ForceZero(decryptedKey, MAX_ENCRYPTED_KEY_SZ);
}
-
- /* remove IssuerAndSerialNumber */
- if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(decryptedKey, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ }
#endif
- return ASN_PARSE_E;
- }
-
- if (GetNameHash(pkiMsg, &idx, issuerHash, pkiMsgSz) < 0) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#ifndef NO_PKCS7_STREAM
+ if (ret != 0 && ret != WC_PKCS7_WANT_READ_E) {
+ wc_PKCS7_ResetStream(pkcs7);
+ }
#endif
- return ASN_PARSE_E;
+
+ return ret;
+
+#else
+ WOLFSSL_MSG("AuthEnvelopedData requires AES-GCM or AES-CCM to be enabled");
+ (void)pkcs7;
+ (void)in;
+ (void)inSz;
+ (void)output;
+ (void)outputSz;
+
+ return NOT_COMPILED_IN;
+#endif /* HAVE_AESGCM | HAVE_AESCCM */
+}
+
+
+#ifndef NO_PKCS7_ENCRYPTED_DATA
+
+/* build PKCS#7 encryptedData content type, return encrypted size */
+int wc_PKCS7_EncodeEncryptedData(PKCS7* pkcs7, byte* output, word32 outputSz)
+{
+ int ret, idx = 0;
+ int totalSz, padSz, encryptedOutSz;
+
+ int contentInfoSeqSz, outerContentTypeSz, outerContentSz;
+ byte contentInfoSeq[MAX_SEQ_SZ];
+ byte outerContentType[MAX_ALGO_SZ];
+ byte outerContent[MAX_SEQ_SZ];
+
+ int encDataSeqSz, verSz, blockSz;
+ byte encDataSeq[MAX_SEQ_SZ];
+ byte ver[MAX_VERSION_SZ];
+
+ byte* plain = NULL;
+ byte* encryptedContent = NULL;
+
+ int encContentOctetSz, encContentSeqSz, contentTypeSz;
+ int contentEncAlgoSz, ivOctetStringSz;
+ byte encContentSeq[MAX_SEQ_SZ];
+ byte contentType[MAX_OID_SZ];
+ byte contentEncAlgo[MAX_ALGO_SZ];
+ byte tmpIv[MAX_CONTENT_IV_SIZE];
+ byte ivOctetString[MAX_OCTET_STR_SZ];
+ byte encContentOctet[MAX_OCTET_STR_SZ];
+
+ byte attribSet[MAX_SET_SZ];
+ EncodedAttrib* attribs = NULL;
+ word32 attribsSz;
+ word32 attribsCount;
+ word32 attribsSetSz;
+
+ byte* flatAttribs = NULL;
+
+ if (pkcs7 == NULL || pkcs7->content == NULL || pkcs7->contentSz == 0 ||
+ pkcs7->encryptOID == 0 || pkcs7->encryptionKey == NULL ||
+ pkcs7->encryptionKeySz == 0)
+ return BAD_FUNC_ARG;
+
+ if (output == NULL || outputSz == 0)
+ return BAD_FUNC_ARG;
+
+ if (pkcs7->version == 3) {
+ verSz = SetMyVersion(0, ver, 0);
+ outerContentTypeSz = 0;
+ }
+ else {
+ /* outer content type */
+ ret = wc_SetContentType(ENCRYPTED_DATA, outerContentType,
+ sizeof(outerContentType));
+ if (ret < 0)
+ return ret;
+
+ outerContentTypeSz = ret;
+
+ /* version, 2 if unprotectedAttrs present, 0 if absent */
+ if (pkcs7->unprotectedAttribsSz > 0) {
+ verSz = SetMyVersion(2, ver, 0);
+ } else {
+ verSz = SetMyVersion(0, ver, 0);
}
-
- /* if we found correct recipient, issuer hashes will match */
- if (XMEMCMP(issuerHash, pkcs7->issuerHash, SHA_DIGEST_SIZE) == 0) {
- recipFound = 1;
+ }
+
+ /* EncryptedContentInfo */
+ ret = wc_SetContentType(pkcs7->contentOID, contentType,
+ sizeof(contentType));
+ if (ret < 0)
+ return ret;
+
+ contentTypeSz = ret;
+
+ /* allocate encrypted content buffer, do PKCS#7 padding */
+ blockSz = wc_PKCS7_GetOIDBlockSize(pkcs7->encryptOID);
+ if (blockSz < 0)
+ return blockSz;
+
+ padSz = wc_PKCS7_GetPadSize(pkcs7->contentSz, blockSz);
+ if (padSz < 0)
+ return padSz;
+
+ encryptedOutSz = pkcs7->contentSz + padSz;
+
+ plain = (byte*)XMALLOC(encryptedOutSz, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (plain == NULL)
+ return MEMORY_E;
+
+ ret = wc_PKCS7_PadData(pkcs7->content, pkcs7->contentSz, plain,
+ encryptedOutSz, blockSz);
+ if (ret < 0) {
+ XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ encryptedContent = (byte*)XMALLOC(encryptedOutSz, pkcs7->heap,
+ DYNAMIC_TYPE_PKCS7);
+ if (encryptedContent == NULL) {
+ XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return MEMORY_E;
+ }
+
+ /* put together IV OCTET STRING */
+ ivOctetStringSz = SetOctetString(blockSz, ivOctetString);
+
+ /* build up ContentEncryptionAlgorithmIdentifier sequence,
+ adding (ivOctetStringSz + blockSz) for IV OCTET STRING */
+ contentEncAlgoSz = SetAlgoID(pkcs7->encryptOID, contentEncAlgo,
+ oidBlkType, ivOctetStringSz + blockSz);
+ if (contentEncAlgoSz == 0) {
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return BAD_FUNC_ARG;
+ }
+
+ /* encrypt content */
+ WOLFSSL_MSG("Encrypting the content");
+ ret = wc_PKCS7_GenerateBlock(pkcs7, NULL, tmpIv, blockSz);
+ if (ret != 0) {
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ ret = wc_PKCS7_EncryptContent(pkcs7->encryptOID, pkcs7->encryptionKey,
+ pkcs7->encryptionKeySz, tmpIv, blockSz, NULL, 0, NULL, 0,
+ plain, encryptedOutSz, encryptedContent);
+ if (ret != 0) {
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+
+ encContentOctetSz = SetImplicit(ASN_OCTET_STRING, 0,
+ encryptedOutSz, encContentOctet);
+
+ encContentSeqSz = SetSequence(contentTypeSz + contentEncAlgoSz +
+ ivOctetStringSz + blockSz +
+ encContentOctetSz + encryptedOutSz,
+ encContentSeq);
+
+ /* optional UnprotectedAttributes */
+ if (pkcs7->unprotectedAttribsSz != 0) {
+
+ if (pkcs7->unprotectedAttribs == NULL) {
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return BAD_FUNC_ARG;
}
-
-#ifdef WOLFSSL_SMALL_STACK
- serialNum = (mp_int*)XMALLOC(sizeof(mp_int), NULL,
- DYNAMIC_TYPE_TMP_BUFFER);
- if (serialNum == NULL) {
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+ attribs = (EncodedAttrib*)XMALLOC(
+ sizeof(EncodedAttrib) * pkcs7->unprotectedAttribsSz,
+ pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (attribs == NULL) {
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
return MEMORY_E;
}
-#endif
-
- if (GetInt(serialNum, pkiMsg, &idx, pkiMsgSz) < 0) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(serialNum, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
- return ASN_PARSE_E;
+
+ attribsCount = pkcs7->unprotectedAttribsSz;
+ attribsSz = EncodeAttributes(attribs, pkcs7->unprotectedAttribsSz,
+ pkcs7->unprotectedAttribs,
+ pkcs7->unprotectedAttribsSz);
+
+ flatAttribs = (byte*)XMALLOC(attribsSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (flatAttribs == NULL) {
+ XFREE(attribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return MEMORY_E;
}
-
- mp_clear(serialNum);
-
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(serialNum, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
- if (GetAlgoId(pkiMsg, &idx, &encOID, pkiMsgSz) < 0) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
- return ASN_PARSE_E;
+
+ FlattenAttributes(pkcs7, flatAttribs, attribs, attribsCount);
+ attribsSetSz = SetImplicit(ASN_SET, 1, attribsSz, attribSet);
+
+ } else {
+ attribsSz = 0;
+ attribsSetSz = 0;
+ }
+
+ /* keep track of sizes for outer wrapper layering */
+ totalSz = verSz + encContentSeqSz + contentTypeSz + contentEncAlgoSz +
+ ivOctetStringSz + blockSz + encContentOctetSz + encryptedOutSz +
+ attribsSz + attribsSetSz;
+
+ /* EncryptedData */
+ encDataSeqSz = SetSequence(totalSz, encDataSeq);
+ totalSz += encDataSeqSz;
+
+ if (pkcs7->version != 3) {
+ /* outer content */
+ outerContentSz = SetExplicit(0, totalSz, outerContent);
+ totalSz += outerContentTypeSz;
+ totalSz += outerContentSz;
+ /* ContentInfo */
+ contentInfoSeqSz = SetSequence(totalSz, contentInfoSeq);
+ totalSz += contentInfoSeqSz;
+ } else {
+ contentInfoSeqSz = 0;
+ outerContentSz = 0;
+ }
+
+ if (totalSz > (int)outputSz) {
+ WOLFSSL_MSG("PKCS#7 output buffer too small");
+ if (pkcs7->unprotectedAttribsSz != 0) {
+ XFREE(attribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(flatAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
}
-
- /* key encryption algorithm must be RSA for now */
- if (encOID != RSAk) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return BUFFER_E;
+ }
+
+ XMEMCPY(output + idx, contentInfoSeq, contentInfoSeqSz);
+ idx += contentInfoSeqSz;
+ XMEMCPY(output + idx, outerContentType, outerContentTypeSz);
+ idx += outerContentTypeSz;
+ XMEMCPY(output + idx, outerContent, outerContentSz);
+ idx += outerContentSz;
+ XMEMCPY(output + idx, encDataSeq, encDataSeqSz);
+ idx += encDataSeqSz;
+ XMEMCPY(output + idx, ver, verSz);
+ idx += verSz;
+ XMEMCPY(output + idx, encContentSeq, encContentSeqSz);
+ idx += encContentSeqSz;
+ XMEMCPY(output + idx, contentType, contentTypeSz);
+ idx += contentTypeSz;
+ XMEMCPY(output + idx, contentEncAlgo, contentEncAlgoSz);
+ idx += contentEncAlgoSz;
+ XMEMCPY(output + idx, ivOctetString, ivOctetStringSz);
+ idx += ivOctetStringSz;
+ XMEMCPY(output + idx, tmpIv, blockSz);
+ idx += blockSz;
+ XMEMCPY(output + idx, encContentOctet, encContentOctetSz);
+ idx += encContentOctetSz;
+ XMEMCPY(output + idx, encryptedContent, encryptedOutSz);
+ idx += encryptedOutSz;
+
+ if (pkcs7->unprotectedAttribsSz != 0) {
+ XMEMCPY(output + idx, attribSet, attribsSetSz);
+ idx += attribsSetSz;
+ XMEMCPY(output + idx, flatAttribs, attribsSz);
+ idx += attribsSz;
+ XFREE(attribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(flatAttribs, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ }
+
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ XFREE(plain, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+
+ return idx;
+}
+
+
+/* decode and store unprotected attributes in PKCS7->decodedAttrib. Return
+ * 0 on success, negative on error. User must call wc_PKCS7_Free(). */
+static int wc_PKCS7_DecodeUnprotectedAttributes(PKCS7* pkcs7, byte* pkiMsg,
+ word32 pkiMsgSz, word32* inOutIdx)
+{
+ int ret, attribLen;
+ word32 idx;
+ byte tag;
+
+ if (pkcs7 == NULL || pkiMsg == NULL ||
+ pkiMsgSz == 0 || inOutIdx == NULL)
+ return BAD_FUNC_ARG;
+
+ idx = *inOutIdx;
+
+ if (GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 1))
+ return ASN_PARSE_E;
+
+ if (GetLength(pkiMsg, &idx, &attribLen, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ /* loop through attributes */
+ if ((ret = wc_PKCS7_ParseAttribs(pkcs7, pkiMsg + idx, attribLen)) < 0) {
+ return ret;
+ }
+
+ *inOutIdx = idx;
+
+ return 0;
+}
+
+
+/* unwrap and decrypt PKCS#7/CMS encrypted-data object, returned decoded size */
+int wc_PKCS7_DecodeEncryptedData(PKCS7* pkcs7, byte* in, word32 inSz,
+ byte* output, word32 outputSz)
+{
+ int ret = 0, version, length = 0, haveAttribs = 0;
+ word32 idx = 0;
+
+#ifndef NO_PKCS7_STREAM
+ word32 tmpIdx = 0;
+ long rc;
#endif
- return ALGO_ID_E;
+ word32 contentType, encOID;
+
+ int expBlockSz = 0;
+ byte tmpIvBuf[MAX_CONTENT_IV_SIZE];
+ byte *tmpIv = tmpIvBuf;
+
+ int encryptedContentSz = 0;
+ byte padLen;
+ byte* encryptedContent = NULL;
+
+ byte* pkiMsg = in;
+ word32 pkiMsgSz = inSz;
+ byte tag;
+
+ if (pkcs7 == NULL ||
+ ((pkcs7->encryptionKey == NULL || pkcs7->encryptionKeySz == 0) &&
+ pkcs7->decryptionCb == NULL))
+ return BAD_FUNC_ARG;
+
+ if (pkiMsg == NULL || pkiMsgSz == 0 ||
+ output == NULL || outputSz == 0)
+ return BAD_FUNC_ARG;
+
+#ifndef NO_PKCS7_STREAM
+ (void)tmpIv; /* help out static analysis */
+ if (pkcs7->stream == NULL) {
+ if ((ret = wc_PKCS7_CreateStream(pkcs7)) != 0) {
+ return ret;
}
-
- /* read encryptedKey */
- if (pkiMsg[idx++] != ASN_OCTET_STRING) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
#endif
- return ASN_PARSE_E;
- }
-
- if (GetLength(pkiMsg, &idx, &encryptedKeySz, pkiMsgSz) < 0) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+ switch (pkcs7->state) {
+ case WC_PKCS7_START:
+#ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz, MAX_SEQ_SZ +
+ MAX_ALGO_SZ, &pkiMsg, &idx)) != 0) {
+ return ret;
+ }
+
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_SEQ_PEEK, in, inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
#endif
- return ASN_PARSE_E;
- }
-
- if (recipFound == 1)
- XMEMCPY(encryptedKey, &pkiMsg[idx], encryptedKeySz);
- idx += encryptedKeySz;
- /* update good idx */
- savedIdx = idx;
- }
+ if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+ ret = ASN_PARSE_E;
- if (recipFound == 0) {
- WOLFSSL_MSG("No recipient found in envelopedData that matches input");
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (pkcs7->version != 3) { /* ContentInfo not in firmware bundles */
+ /* read past ContentInfo, verify type is encrypted-data */
+ if (ret == 0 && wc_GetContentType(pkiMsg, &idx, &contentType,
+ pkiMsgSz) < 0)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && contentType != ENCRYPTED_DATA) {
+ WOLFSSL_MSG("PKCS#7 input not of type EncryptedData");
+ ret = PKCS7_OID_E;
+ }
+ }
+ if (ret != 0) break;
+#ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+ break;
+ }
#endif
- return PKCS7_RECIP_E;
- }
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_STAGE2);
+ FALL_THROUGH;
+ /* end of stage 1 */
+
+ case WC_PKCS7_STAGE2:
+#ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+ MAX_LENGTH_SZ + MAX_SEQ_SZ + ASN_TAG_SZ, &pkiMsg,
+ &idx)) != 0) {
+ return ret;
+ }
- /* remove EncryptedContentInfo */
- if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+ inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
#endif
- return ASN_PARSE_E;
- }
-
- if (wc_GetContentType(pkiMsg, &idx, &contentType, pkiMsgSz) < 0) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (pkcs7->version != 3) {
+ if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0)
+ ret = ASN_PARSE_E;
+ if (ret == 0 && tag !=
+ (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+ ret = ASN_PARSE_E;
+
+ /* remove EncryptedData and version */
+ if (ret == 0 && GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret != 0) break;
+#ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+ break;
+ }
#endif
- return ASN_PARSE_E;
- }
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_STAGE3);
+ FALL_THROUGH;
+ /* end of stage 2 */
+
+ case WC_PKCS7_STAGE3:
+#ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+ MAX_VERSION_SZ + MAX_SEQ_SZ + MAX_ALGO_SZ * 2,
+ &pkiMsg, &idx)) != 0) {
+ return ret;
+ }
- if (GetAlgoId(pkiMsg, &idx, &encOID, pkiMsgSz) < 0) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+ inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
#endif
- return ASN_PARSE_E;
- }
-
- /* get block cipher IV, stored in OPTIONAL parameter of AlgoID */
- if (pkiMsg[idx++] != ASN_OCTET_STRING) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ /* get version, check later */
+ haveAttribs = 0;
+ if (ret == 0 && GetMyVersion(pkiMsg, &idx, &version, pkiMsgSz) < 0)
+ ret = ASN_PARSE_E;
+
+ /* remove EncryptedContentInfo */
+ if (ret == 0 && GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && wc_GetContentType(pkiMsg, &idx, &contentType,
+ pkiMsgSz) < 0)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && (ret = GetAlgoId(pkiMsg, &idx, &encOID, oidBlkType,
+ pkiMsgSz)) < 0)
+ ret = ASN_PARSE_E;
+ if (ret == 0 && (expBlockSz = wc_PKCS7_GetOIDBlockSize(encOID)) < 0)
+ ret = expBlockSz;
+
+ if (ret != 0) break;
+#ifndef NO_PKCS7_STREAM
+ /* store expBlockSz for later */
+ pkcs7->stream->varOne = expBlockSz;
+ pkcs7->stream->varTwo = encOID;
+
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+ break;
+ }
+
+ /* store version for later */
+ pkcs7->stream->vers = version;
#endif
- return ASN_PARSE_E;
- }
-
- if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_STAGE4);
+ FALL_THROUGH;
+ /* end of stage 3 */
+
+ /* get block cipher IV, stored in OPTIONAL parameter of AlgoID */
+ case WC_PKCS7_STAGE4:
+#ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+ ASN_TAG_SZ + MAX_LENGTH_SZ, &pkiMsg, &idx)) != 0) {
+ return ret;
+ }
+
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+ inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
+
+ /* restore saved variables */
+ expBlockSz = pkcs7->stream->varOne;
#endif
- return ASN_PARSE_E;
- }
-
- if (length != DES_BLOCK_SIZE) {
- WOLFSSL_MSG("Incorrect IV length, must be of DES_BLOCK_SIZE");
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0)
+ ret = ASN_PARSE_E;
+ if (ret == 0 && tag != ASN_OCTET_STRING)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && length != expBlockSz) {
+ WOLFSSL_MSG("Incorrect IV length, must be of content alg block size");
+ ret = ASN_PARSE_E;
+ }
+
+ if (ret != 0) break;
+#ifndef NO_PKCS7_STREAM
+ /* next chunk of data expected should have the IV */
+ pkcs7->stream->expected = length;
+
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+ break;
+ }
#endif
- return ASN_PARSE_E;
- }
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_STAGE5);
+ FALL_THROUGH;
+ /* end of stage 4 */
+
+ case WC_PKCS7_STAGE5:
+#ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+ pkcs7->stream->expected + ASN_TAG_SZ +
+ MAX_LENGTH_SZ, &pkiMsg, &idx)) != 0) {
+ return ret;
+ }
- XMEMCPY(tmpIv, &pkiMsg[idx], length);
- idx += length;
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+ inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
- /* read encryptedContent, cont[0] */
- if (pkiMsg[idx++] != (ASN_CONTEXT_SPECIFIC | 0)) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ /* use IV buffer from stream structure */
+ tmpIv = pkcs7->stream->tmpIv;
+ length = pkcs7->stream->expected;
#endif
- return ASN_PARSE_E;
- }
+ XMEMCPY(tmpIv, &pkiMsg[idx], length);
+ idx += length;
+ /* read encryptedContent, cont[0] */
+ if (ret == 0 && GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0)
+ ret = ASN_PARSE_E;
+ if (ret == 0 && tag != (ASN_CONTEXT_SPECIFIC | 0))
+ ret = ASN_PARSE_E;
+
+ if (ret == 0 && GetLength(pkiMsg, &idx, &encryptedContentSz,
+ pkiMsgSz) <= 0)
+ ret = ASN_PARSE_E;
+
+ if (ret < 0)
+ break;
+#ifndef NO_PKCS7_STREAM
+ /* next chunk of data should contain encrypted content */
+ pkcs7->stream->varThree = encryptedContentSz;
+ if ((ret = wc_PKCS7_StreamEndCase(pkcs7, &tmpIdx, &idx)) != 0) {
+ break;
+ }
+
+ if (pkcs7->stream->totalRd + encryptedContentSz < pkiMsgSz) {
+ pkcs7->stream->flagOne = 1;
+ }
+
+ pkcs7->stream->expected = (pkcs7->stream->maxLen -
+ pkcs7->stream->totalRd) + pkcs7->stream->length;
- if (GetLength(pkiMsg, &idx, &encryptedContentSz, pkiMsgSz) < 0) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- return ASN_PARSE_E;
- }
-
- encryptedContent = (byte*)XMALLOC(encryptedContentSz, NULL,
- DYNAMIC_TYPE_TMP_BUFFER);
- if (encryptedContent == NULL) {
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_STAGE6);
+ FALL_THROUGH;
+ /* end of stage 5 */
+
+ case WC_PKCS7_STAGE6:
+#ifndef NO_PKCS7_STREAM
+ if ((ret = wc_PKCS7_AddDataToStream(pkcs7, in, inSz,
+ pkcs7->stream->expected, &pkiMsg, &idx)) != 0) {
+ return ret;
+ }
+
+ rc = wc_PKCS7_GetMaxStream(pkcs7, PKCS7_DEFAULT_PEEK, in,
+ inSz);
+ if (rc < 0) {
+ ret = (int)rc;
+ break;
+ }
+ pkiMsgSz = (word32)rc;
+
+ /* restore saved variables */
+ expBlockSz = pkcs7->stream->varOne;
+ encOID = pkcs7->stream->varTwo;
+ encryptedContentSz = pkcs7->stream->varThree;
+ version = pkcs7->stream->vers;
+ tmpIv = pkcs7->stream->tmpIv;
+#else
+ encOID = 0;
#endif
- return MEMORY_E;
- }
+ if (ret == 0 && (encryptedContent = (byte*)XMALLOC(
+ encryptedContentSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7)) == NULL) {
+ ret = MEMORY_E;
+ break;
+ }
- XMEMCPY(encryptedContent, &pkiMsg[idx], encryptedContentSz);
+ if (ret == 0) {
+ XMEMCPY(encryptedContent, &pkiMsg[idx], encryptedContentSz);
+ idx += encryptedContentSz;
+
+ /* decrypt encryptedContent */
+ ret = wc_PKCS7_DecryptContent(pkcs7, encOID,
+ pkcs7->encryptionKey, pkcs7->encryptionKeySz, tmpIv,
+ expBlockSz, NULL, 0, NULL, 0, encryptedContent,
+ encryptedContentSz, encryptedContent);
+ if (ret != 0) {
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ }
+ }
- /* load private key */
-#ifdef WOLFSSL_SMALL_STACK
- privKey = (RsaKey*)XMALLOC(sizeof(RsaKey), NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (privKey == NULL) {
- XFREE(encryptedContent, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER); return MEMORY_E;
+ if (ret == 0) {
+ padLen = encryptedContent[encryptedContentSz-1];
+
+ if (padLen > encryptedContentSz) {
+ WOLFSSL_MSG("Bad padding size found");
+ ret = BUFFER_E;
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ break;
+ }
+
+ /* copy plaintext to output */
+ XMEMCPY(output, encryptedContent, encryptedContentSz - padLen);
+
+ /* get implicit[1] unprotected attributes, optional */
+ wc_PKCS7_FreeDecodedAttrib(pkcs7->decodedAttrib, pkcs7->heap);
+ pkcs7->decodedAttrib = NULL;
+ #ifndef NO_PKCS7_STREAM
+ if (pkcs7->stream->flagOne)
+ #else
+ if (idx < pkiMsgSz)
+ #endif
+ {
+ haveAttribs = 1;
+
+ ret = wc_PKCS7_DecodeUnprotectedAttributes(pkcs7, pkiMsg,
+ pkiMsgSz, &idx);
+ if (ret != 0) {
+ ForceZero(encryptedContent, encryptedContentSz);
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ ret = ASN_PARSE_E;
+ }
+ }
+ }
+
+ if (ret == 0) {
+ ForceZero(encryptedContent, encryptedContentSz);
+ XFREE(encryptedContent, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+
+ /* go back and check the version now that attribs have been processed */
+ if (pkcs7->version == 3 && version != 0) {
+ WOLFSSL_MSG("Wrong PKCS#7 FirmwareEncryptedData version");
+ return ASN_VERSION_E;
+ }
+
+ if (pkcs7->version != 3 &&
+ ((haveAttribs == 0 && version != 0) ||
+ (haveAttribs == 1 && version != 2))) {
+ WOLFSSL_MSG("Wrong PKCS#7 EncryptedData version");
+ return ASN_VERSION_E;
+ }
+ ret = encryptedContentSz - padLen;
+ }
+
+ if (ret != 0) break;
+ #ifndef NO_PKCS7_STREAM
+ wc_PKCS7_ResetStream(pkcs7);
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START);
+ break;
+
+ default:
+ WOLFSSL_MSG("Error in unknown PKCS#7 Decode Encrypted Data state");
+ return BAD_STATE_E;
}
-#endif
- ret = wc_InitRsaKey(privKey, 0);
if (ret != 0) {
- XFREE(encryptedContent, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(privKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
- return ret;
+ #ifndef NO_PKCS7_STREAM
+ /* restart in error case */
+ wc_PKCS7_ResetStream(pkcs7);
+ #endif
+ wc_PKCS7_ChangeState(pkcs7, WC_PKCS7_START);
}
+ return ret;
+}
- idx = 0;
- ret = wc_RsaPrivateKeyDecode(pkcs7->privateKey, &idx, privKey,
- pkcs7->privateKeySz);
- if (ret != 0) {
- WOLFSSL_MSG("Failed to decode RSA private key");
- XFREE(encryptedContent, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(privKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+/* Function to set callback during decryption, this overrides the default
+ * decryption function and can be used for choosing a key at run time based
+ * on the parsed bundle so far.
+ * returns 0 on success
+ */
+int wc_PKCS7_SetDecodeEncryptedCb(PKCS7* pkcs7,
+ CallbackDecryptContent decryptionCb)
+{
+ if (pkcs7 != NULL) {
+ pkcs7->decryptionCb = decryptionCb;
+ }
+ return 0;
+}
+
+
+/* Set an optional user context that gets passed to callback
+ * returns 0 on success
+ */
+int wc_PKCS7_SetDecodeEncryptedCtx(PKCS7* pkcs7, void* ctx)
+{
+ if (pkcs7 != NULL) {
+ pkcs7->decryptionCtx = ctx;
+ }
+ return 0;
+}
+#endif /* NO_PKCS7_ENCRYPTED_DATA */
+
+#if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA)
+
+/* build PKCS#7 compressedData content type, return encrypted size */
+int wc_PKCS7_EncodeCompressedData(PKCS7* pkcs7, byte* output, word32 outputSz)
+{
+ byte contentInfoSeq[MAX_SEQ_SZ];
+ byte contentInfoTypeOid[MAX_OID_SZ];
+ byte contentInfoContentSeq[MAX_SEQ_SZ]; /* EXPLICIT [0] */
+ byte compressedDataSeq[MAX_SEQ_SZ];
+ byte cmsVersion[MAX_VERSION_SZ];
+ byte compressAlgId[MAX_ALGO_SZ];
+ byte encapContentInfoSeq[MAX_SEQ_SZ];
+ byte contentTypeOid[MAX_OID_SZ];
+ byte contentSeq[MAX_SEQ_SZ]; /* EXPLICIT [0] */
+ byte contentOctetStr[MAX_OCTET_STR_SZ];
+
+ int ret;
+ word32 totalSz, idx;
+ word32 contentInfoSeqSz, contentInfoContentSeqSz, contentInfoTypeOidSz;
+ word32 compressedDataSeqSz, cmsVersionSz, compressAlgIdSz;
+ word32 encapContentInfoSeqSz, contentTypeOidSz, contentSeqSz;
+ word32 contentOctetStrSz;
+
+ byte* compressed;
+ word32 compressedSz;
+
+ if (pkcs7 == NULL || pkcs7->content == NULL || pkcs7->contentSz == 0 ||
+ output == NULL || outputSz == 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* allocate space for compressed content. The libz code says the compressed
+ * buffer should be srcSz + 0.1% + 12. */
+ compressedSz = (pkcs7->contentSz + (word32)(pkcs7->contentSz * 0.001) + 12);
+ compressed = (byte*)XMALLOC(compressedSz, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (compressed == NULL) {
+ WOLFSSL_MSG("Error allocating memory for CMS compressed content");
+ return MEMORY_E;
+ }
+
+ /* compress content */
+ ret = wc_Compress(compressed, compressedSz, pkcs7->content,
+ pkcs7->contentSz, 0);
+ if (ret < 0) {
+ XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
return ret;
}
+ compressedSz = (word32)ret;
- /* decrypt encryptedKey */
- keySz = wc_RsaPrivateDecryptInline(encryptedKey, encryptedKeySz,
- &decryptedKey, privKey);
- wc_FreeRsaKey(privKey);
+ /* eContent OCTET STRING, working backwards */
+ contentOctetStrSz = SetOctetString(compressedSz, contentOctetStr);
+ totalSz = contentOctetStrSz + compressedSz;
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(privKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+ /* EXPLICIT [0] eContentType */
+ contentSeqSz = SetExplicit(0, totalSz, contentSeq);
+ totalSz += contentSeqSz;
- if (keySz <= 0) {
- XFREE(encryptedContent, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
- return keySz;
+ /* eContentType OBJECT IDENTIFIER */
+ ret = wc_SetContentType(pkcs7->contentOID, contentTypeOid,
+ sizeof(contentTypeOid));
+ if (ret < 0) {
+ XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
}
- /* decrypt encryptedContent */
- if (encOID == DESb) {
- Des des;
- ret = wc_Des_SetKey(&des, decryptedKey, tmpIv, DES_DECRYPTION);
+ contentTypeOidSz = ret;
+ totalSz += contentTypeOidSz;
- if (ret == 0)
- wc_Des_CbcDecrypt(&des, encryptedContent, encryptedContent,
- encryptedContentSz);
+ /* EncapsulatedContentInfo SEQUENCE */
+ encapContentInfoSeqSz = SetSequence(totalSz, encapContentInfoSeq);
+ totalSz += encapContentInfoSeqSz;
- if (ret != 0) {
- XFREE(encryptedContent, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+ /* compressionAlgorithm AlgorithmIdentifier */
+ /* Only supports zlib for compression currently:
+ * id-alg-zlibCompress (1.2.840.113549.1.9.16.3.8) */
+ compressAlgIdSz = SetAlgoID(ZLIBc, compressAlgId, oidCompressType, 0);
+ totalSz += compressAlgIdSz;
+
+ /* version */
+ cmsVersionSz = SetMyVersion(0, cmsVersion, 0);
+ totalSz += cmsVersionSz;
+
+ /* CompressedData SEQUENCE */
+ compressedDataSeqSz = SetSequence(totalSz, compressedDataSeq);
+ totalSz += compressedDataSeqSz;
+
+ /* ContentInfo content EXPLICIT SEQUENCE */
+ contentInfoContentSeqSz = SetExplicit(0, totalSz, contentInfoContentSeq);
+ totalSz += contentInfoContentSeqSz;
+
+ /* ContentInfo ContentType (compressedData) */
+ if (pkcs7->version == 3) {
+ contentInfoTypeOidSz = 0;
+ }
+ else {
+ ret = wc_SetContentType(COMPRESSED_DATA, contentInfoTypeOid,
+ sizeof(contentInfoTypeOid));
+ if (ret < 0) {
+ XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
return ret;
}
+
+ contentInfoTypeOidSz = ret;
+ totalSz += contentInfoTypeOidSz;
}
- else if (encOID == DES3b) {
- Des3 des;
- ret = wc_Des3_SetKey(&des, decryptedKey, tmpIv, DES_DECRYPTION);
- if (ret == 0)
- ret = wc_Des3_CbcDecrypt(&des, encryptedContent, encryptedContent,
- encryptedContentSz);
- if (ret != 0) {
- XFREE(encryptedContent, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
- return ret;
- }
- } else {
- WOLFSSL_MSG("Unsupported content encryption OID type");
- XFREE(encryptedContent, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
- return ALGO_ID_E;
+ /* ContentInfo SEQUENCE */
+ contentInfoSeqSz = SetSequence(totalSz, contentInfoSeq);
+ totalSz += contentInfoSeqSz;
+
+ if (outputSz < totalSz) {
+ XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return BUFFER_E;
}
- padLen = encryptedContent[encryptedContentSz-1];
+ idx = 0;
+ XMEMCPY(output + idx, contentInfoSeq, contentInfoSeqSz);
+ idx += contentInfoSeqSz;
+ XMEMCPY(output + idx, contentInfoTypeOid, contentInfoTypeOidSz);
+ idx += contentInfoTypeOidSz;
+ XMEMCPY(output + idx, contentInfoContentSeq, contentInfoContentSeqSz);
+ idx += contentInfoContentSeqSz;
+ XMEMCPY(output + idx, compressedDataSeq, compressedDataSeqSz);
+ idx += compressedDataSeqSz;
+ XMEMCPY(output + idx, cmsVersion, cmsVersionSz);
+ idx += cmsVersionSz;
+ XMEMCPY(output + idx, compressAlgId, compressAlgIdSz);
+ idx += compressAlgIdSz;
+ XMEMCPY(output + idx, encapContentInfoSeq, encapContentInfoSeqSz);
+ idx += encapContentInfoSeqSz;
+ XMEMCPY(output + idx, contentTypeOid, contentTypeOidSz);
+ idx += contentTypeOidSz;
+ XMEMCPY(output + idx, contentSeq, contentSeqSz);
+ idx += contentSeqSz;
+ XMEMCPY(output + idx, contentOctetStr, contentOctetStrSz);
+ idx += contentOctetStrSz;
+ XMEMCPY(output + idx, compressed, compressedSz);
+ idx += compressedSz;
+
+ XFREE(compressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
- /* copy plaintext to output */
- XMEMCPY(output, encryptedContent, encryptedContentSz - padLen);
+ return idx;
+}
- /* free memory, zero out keys */
- ForceZero(encryptedKey, MAX_ENCRYPTED_KEY_SZ);
- ForceZero(encryptedContent, encryptedContentSz);
- XFREE(encryptedContent, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(encryptedKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
- return encryptedContentSz - padLen;
+/* unwrap and decompress PKCS#7/CMS compressedData object,
+ * returned decoded size */
+int wc_PKCS7_DecodeCompressedData(PKCS7* pkcs7, byte* pkiMsg, word32 pkiMsgSz,
+ byte* output, word32 outputSz)
+{
+ int length, version, ret;
+ word32 idx = 0, algOID, contentType;
+ byte tag;
+
+ byte* decompressed;
+ word32 decompressedSz;
+
+ if (pkcs7 == NULL || pkiMsg == NULL || pkiMsgSz == 0 ||
+ output == NULL || outputSz == 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* get ContentInfo SEQUENCE */
+ if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (pkcs7->version != 3) {
+ /* get ContentInfo contentType */
+ if (wc_GetContentType(pkiMsg, &idx, &contentType, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (contentType != COMPRESSED_DATA)
+ return ASN_PARSE_E;
+ }
+
+ /* get ContentInfo content EXPLICIT SEQUENCE */
+ if (GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
+ return ASN_PARSE_E;
+
+ if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ /* get CompressedData SEQUENCE */
+ if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ /* get version */
+ if (GetMyVersion(pkiMsg, &idx, &version, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (version != 0) {
+ WOLFSSL_MSG("CMS CompressedData version MUST be 0, but is not");
+ return ASN_PARSE_E;
+ }
+
+ /* get CompressionAlgorithmIdentifier */
+ if (GetAlgoId(pkiMsg, &idx, &algOID, oidIgnoreType, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ /* Only supports zlib for compression currently:
+ * id-alg-zlibCompress (1.2.840.113549.1.9.16.3.8) */
+ if (algOID != ZLIBc) {
+ WOLFSSL_MSG("CMS CompressedData only supports zlib algorithm");
+ return ASN_PARSE_E;
+ }
+
+ /* get EncapsulatedContentInfo SEQUENCE */
+ if (GetSequence(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ /* get ContentType OID */
+ if (wc_GetContentType(pkiMsg, &idx, &contentType, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ pkcs7->contentOID = contentType;
+
+ /* get eContent EXPLICIT SEQUENCE */
+ if (GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (tag != (ASN_CONSTRUCTED | ASN_CONTEXT_SPECIFIC | 0))
+ return ASN_PARSE_E;
+
+ if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ /* get content OCTET STRING */
+ if (GetASNTag(pkiMsg, &idx, &tag, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ if (tag != ASN_OCTET_STRING)
+ return ASN_PARSE_E;
+
+ if (GetLength(pkiMsg, &idx, &length, pkiMsgSz) < 0)
+ return ASN_PARSE_E;
+
+ /* allocate space for decompressed data */
+ decompressed = (byte*)XMALLOC(length, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ if (decompressed == NULL) {
+ WOLFSSL_MSG("Error allocating memory for CMS decompression buffer");
+ return MEMORY_E;
+ }
+
+ /* decompress content */
+ ret = wc_DeCompress(decompressed, length, &pkiMsg[idx], length);
+ if (ret < 0) {
+ XFREE(decompressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return ret;
+ }
+ decompressedSz = (word32)ret;
+
+ /* get content */
+ if (outputSz < decompressedSz) {
+ WOLFSSL_MSG("CMS output buffer too small to hold decompressed data");
+ XFREE(decompressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+ return BUFFER_E;
+ }
+
+ XMEMCPY(output, decompressed, decompressedSz);
+ XFREE(decompressed, pkcs7->heap, DYNAMIC_TYPE_PKCS7);
+
+ return decompressedSz;
}
+#endif /* HAVE_LIBZ && !NO_PKCS7_COMPRESSED_DATA */
#else /* HAVE_PKCS7 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/poly1305.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/poly1305.c
index 3cc86e1bb..651664884 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/poly1305.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/poly1305.c
@@ -1,8 +1,8 @@
/* poly1305.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,12 +16,15 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
- *
- * Based off the public domain implementations by Andrew Moon
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/*
+ * Based off the public domain implementations by Andrew Moon
* and Daniel J. Bernstein
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -32,9 +35,11 @@
#include <wolfssl/wolfcrypt/poly1305.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#include <wolfssl/wolfcrypt/logging.h>
+#include <wolfssl/wolfcrypt/cpuid.h>
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
#ifdef CHACHA_AEAD_TEST
@@ -46,222 +51,354 @@
#pragma warning(disable: 4127)
#endif
-#if defined(POLY130564)
-
- #if defined(_MSC_VER)
- #define POLY1305_NOINLINE __declspec(noinline)
- #elif defined(__GNUC__)
- #define POLY1305_NOINLINE __attribute__((noinline))
- #else
- #define POLY1305_NOINLINE
- #endif
-
- #if defined(_MSC_VER)
- #include <intrin.h>
-
- typedef struct word128 {
- word64 lo;
- word64 hi;
- } word128;
-
- #define MUL(out, x, y) out.lo = _umul128((x), (y), &out.hi)
- #define ADD(out, in) { word64 t = out.lo; out.lo += in.lo;
- out.hi += (out.lo < t) + in.hi; }
- #define ADDLO(out, in) { word64 t = out.lo; out.lo += in;
- out.hi += (out.lo < t); }
- #define SHR(in, shift) (__shiftright128(in.lo, in.hi, (shift)))
- #define LO(in) (in.lo)
-
- #elif defined(__GNUC__)
- #if defined(__SIZEOF_INT128__)
- typedef unsigned __int128 word128;
- #else
- typedef unsigned word128 __attribute__((mode(TI)));
- #endif
-
- #define MUL(out, x, y) out = ((word128)x * y)
- #define ADD(out, in) out += in
- #define ADDLO(out, in) out += in
- #define SHR(in, shift) (word64)(in >> (shift))
- #define LO(in) (word64)(in)
- #endif
-
- static word64 U8TO64(const byte* p) {
- return
- (((word64)(p[0] & 0xff) ) |
- ((word64)(p[1] & 0xff) << 8) |
- ((word64)(p[2] & 0xff) << 16) |
- ((word64)(p[3] & 0xff) << 24) |
- ((word64)(p[4] & 0xff) << 32) |
- ((word64)(p[5] & 0xff) << 40) |
- ((word64)(p[6] & 0xff) << 48) |
- ((word64)(p[7] & 0xff) << 56));
- }
-
- static void U64TO8(byte* p, word64 v) {
- p[0] = (v ) & 0xff;
- p[1] = (v >> 8) & 0xff;
- p[2] = (v >> 16) & 0xff;
- p[3] = (v >> 24) & 0xff;
- p[4] = (v >> 32) & 0xff;
- p[5] = (v >> 40) & 0xff;
- p[6] = (v >> 48) & 0xff;
- p[7] = (v >> 56) & 0xff;
- }
+#ifdef USE_INTEL_SPEEDUP
+ #include <emmintrin.h>
+ #include <immintrin.h>
+
+ #if defined(__GNUC__) && ((__GNUC__ < 4) || \
+ (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
+ #undef NO_AVX2_SUPPORT
+ #define NO_AVX2_SUPPORT
+ #endif
+ #if defined(__clang__) && ((__clang_major__ < 3) || \
+ (__clang_major__ == 3 && __clang_minor__ <= 5))
+ #define NO_AVX2_SUPPORT
+ #elif defined(__clang__) && defined(NO_AVX2_SUPPORT)
+ #undef NO_AVX2_SUPPORT
+ #endif
+
+ #define HAVE_INTEL_AVX1
+ #ifndef NO_AVX2_SUPPORT
+ #define HAVE_INTEL_AVX2
+ #endif
+#endif
-#else /* if not 64 bit then use 32 bit */
-
- static word32 U8TO32(const byte *p) {
- return
- (((word32)(p[0] & 0xff) ) |
- ((word32)(p[1] & 0xff) << 8) |
- ((word32)(p[2] & 0xff) << 16) |
- ((word32)(p[3] & 0xff) << 24));
- }
-
- static void U32TO8(byte *p, word32 v) {
- p[0] = (v ) & 0xff;
- p[1] = (v >> 8) & 0xff;
- p[2] = (v >> 16) & 0xff;
- p[3] = (v >> 24) & 0xff;
- }
+#ifdef USE_INTEL_SPEEDUP
+static word32 intel_flags = 0;
+static word32 cpu_flags_set = 0;
#endif
-static void poly1305_blocks(Poly1305* ctx, const unsigned char *m,
- size_t bytes) {
+#if defined(USE_INTEL_SPEEDUP) || defined(POLY130564)
+ #if defined(_MSC_VER)
+ #define POLY1305_NOINLINE __declspec(noinline)
+ #elif defined(__GNUC__)
+ #define POLY1305_NOINLINE __attribute__((noinline))
+ #else
+ #define POLY1305_NOINLINE
+ #endif
+
+ #if defined(_MSC_VER)
+ #include <intrin.h>
+
+ typedef struct word128 {
+ word64 lo;
+ word64 hi;
+ } word128;
+
+ #define MUL(out, x, y) out.lo = _umul128((x), (y), &out.hi)
+ #define ADD(out, in) { word64 t = out.lo; out.lo += in.lo; \
+ out.hi += (out.lo < t) + in.hi; }
+ #define ADDLO(out, in) { word64 t = out.lo; out.lo += in; \
+ out.hi += (out.lo < t); }
+ #define SHR(in, shift) (__shiftright128(in.lo, in.hi, (shift)))
+ #define LO(in) (in.lo)
+
+ #elif defined(__GNUC__)
+ #if defined(__SIZEOF_INT128__)
+ typedef unsigned __int128 word128;
+ #else
+ typedef unsigned word128 __attribute__((mode(TI)));
+ #endif
+
+ #define MUL(out, x, y) out = ((word128)x * y)
+ #define ADD(out, in) out += in
+ #define ADDLO(out, in) out += in
+ #define SHR(in, shift) (word64)(in >> (shift))
+ #define LO(in) (word64)(in)
+ #endif
+#endif
-#ifdef POLY130564
+#ifdef USE_INTEL_SPEEDUP
+#ifdef __cplusplus
+ extern "C" {
+#endif
- const word64 hibit = (ctx->final) ? 0 : ((word64)1 << 40); /* 1 << 128 */
- word64 r0,r1,r2;
- word64 s1,s2;
- word64 h0,h1,h2;
- word64 c;
- word128 d0,d1,d2,d;
+#ifdef HAVE_INTEL_AVX1
+/* Process one block (16 bytes) of data.
+ *
+ * ctx Poly1305 context.
+ * m One block of message data.
+ */
+extern void poly1305_block_avx(Poly1305* ctx, const unsigned char *m);
+/* Process multiple blocks (n * 16 bytes) of data.
+ *
+ * ctx Poly1305 context.
+ * m Blocks of message data.
+ * bytes The number of bytes to process.
+ */
+extern void poly1305_blocks_avx(Poly1305* ctx, const unsigned char* m,
+ size_t bytes);
+/* Set the key to use when processing data.
+ * Initialize the context.
+ *
+ * ctx Poly1305 context.
+ * key The key data (16 bytes).
+ */
+extern void poly1305_setkey_avx(Poly1305* ctx, const byte* key);
+/* Calculate the final result - authentication data.
+ * Zeros out the private data in the context.
+ *
+ * ctx Poly1305 context.
+ * mac Buffer to hold 16 bytes.
+ */
+extern void poly1305_final_avx(Poly1305* ctx, byte* mac);
+#endif
-#else
+#ifdef HAVE_INTEL_AVX2
+/* Process multiple blocks (n * 16 bytes) of data.
+ *
+ * ctx Poly1305 context.
+ * m Blocks of message data.
+ * bytes The number of bytes to process.
+ */
+extern void poly1305_blocks_avx2(Poly1305* ctx, const unsigned char* m,
+ size_t bytes);
+/* Calculate R^1, R^2, R^3 and R^4 and store them in the context.
+ *
+ * ctx Poly1305 context.
+ */
+extern void poly1305_calc_powers_avx2(Poly1305* ctx);
+/* Set the key to use when processing data.
+ * Initialize the context.
+ * Calls AVX set key function as final function calls AVX code.
+ *
+ * ctx Poly1305 context.
+ * key The key data (16 bytes).
+ */
+extern void poly1305_setkey_avx2(Poly1305* ctx, const byte* key);
+/* Calculate the final result - authentication data.
+ * Zeros out the private data in the context.
+ * Calls AVX final function to quickly process last blocks.
+ *
+ * ctx Poly1305 context.
+ * mac Buffer to hold 16 bytes - authentication data.
+ */
+extern void poly1305_final_avx2(Poly1305* ctx, byte* mac);
+#endif
- const word32 hibit = (ctx->final) ? 0 : (1 << 24); /* 1 << 128 */
- word32 r0,r1,r2,r3,r4;
- word32 s1,s2,s3,s4;
- word32 h0,h1,h2,h3,h4;
- word64 d0,d1,d2,d3,d4;
- word32 c;
+#ifdef __cplusplus
+ } /* extern "C" */
+#endif
+
+#elif defined(POLY130564)
+#ifndef WOLFSSL_ARMASM
+ static word64 U8TO64(const byte* p)
+ {
+ return
+ (((word64)(p[0] & 0xff) ) |
+ ((word64)(p[1] & 0xff) << 8) |
+ ((word64)(p[2] & 0xff) << 16) |
+ ((word64)(p[3] & 0xff) << 24) |
+ ((word64)(p[4] & 0xff) << 32) |
+ ((word64)(p[5] & 0xff) << 40) |
+ ((word64)(p[6] & 0xff) << 48) |
+ ((word64)(p[7] & 0xff) << 56));
+ }
+
+ static void U64TO8(byte* p, word64 v) {
+ p[0] = (v ) & 0xff;
+ p[1] = (v >> 8) & 0xff;
+ p[2] = (v >> 16) & 0xff;
+ p[3] = (v >> 24) & 0xff;
+ p[4] = (v >> 32) & 0xff;
+ p[5] = (v >> 40) & 0xff;
+ p[6] = (v >> 48) & 0xff;
+ p[7] = (v >> 56) & 0xff;
+ }
+#endif/* WOLFSSL_ARMASM */
+#else /* if not 64 bit then use 32 bit */
+
+ static word32 U8TO32(const byte *p)
+ {
+ return
+ (((word32)(p[0] & 0xff) ) |
+ ((word32)(p[1] & 0xff) << 8) |
+ ((word32)(p[2] & 0xff) << 16) |
+ ((word32)(p[3] & 0xff) << 24));
+ }
+ static void U32TO8(byte *p, word32 v) {
+ p[0] = (v ) & 0xff;
+ p[1] = (v >> 8) & 0xff;
+ p[2] = (v >> 16) & 0xff;
+ p[3] = (v >> 24) & 0xff;
+ }
#endif
-#ifdef POLY130564
+/* convert 32-bit unsigned to little endian 64 bit type as byte array */
+static WC_INLINE void u32tole64(const word32 inLe32, byte outLe64[8])
+{
+#ifndef WOLFSSL_X86_64_BUILD
+ outLe64[0] = (byte)(inLe32 & 0x000000FF);
+ outLe64[1] = (byte)((inLe32 & 0x0000FF00) >> 8);
+ outLe64[2] = (byte)((inLe32 & 0x00FF0000) >> 16);
+ outLe64[3] = (byte)((inLe32 & 0xFF000000) >> 24);
+ outLe64[4] = 0;
+ outLe64[5] = 0;
+ outLe64[6] = 0;
+ outLe64[7] = 0;
+#else
+ *(word64*)outLe64 = inLe32;
+#endif
+}
+
+
+#if !defined(WOLFSSL_ARMASM) || !defined(__aarch64__)
+void poly1305_blocks(Poly1305* ctx, const unsigned char *m,
+ size_t bytes)
+{
+#ifdef USE_INTEL_SPEEDUP
+ /* AVX2 is handled in wc_Poly1305Update. */
+ poly1305_blocks_avx(ctx, m, bytes);
+#elif defined(POLY130564)
+ const word64 hibit = (ctx->finished) ? 0 : ((word64)1 << 40); /* 1 << 128 */
+ word64 r0,r1,r2;
+ word64 s1,s2;
+ word64 h0,h1,h2;
+ word64 c;
+ word128 d0,d1,d2,d;
r0 = ctx->r[0];
- r1 = ctx->r[1];
- r2 = ctx->r[2];
+ r1 = ctx->r[1];
+ r2 = ctx->r[2];
- h0 = ctx->h[0];
- h1 = ctx->h[1];
- h2 = ctx->h[2];
+ h0 = ctx->h[0];
+ h1 = ctx->h[1];
+ h2 = ctx->h[2];
- s1 = r1 * (5 << 2);
- s2 = r2 * (5 << 2);
+ s1 = r1 * (5 << 2);
+ s2 = r2 * (5 << 2);
- while (bytes >= POLY1305_BLOCK_SIZE) {
- word64 t0,t1;
+ while (bytes >= POLY1305_BLOCK_SIZE) {
+ word64 t0,t1;
- /* h += m[i] */
- t0 = U8TO64(&m[0]);
- t1 = U8TO64(&m[8]);
+ /* h += m[i] */
+ t0 = U8TO64(&m[0]);
+ t1 = U8TO64(&m[8]);
- h0 += (( t0 ) & 0xfffffffffff);
- h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff);
- h2 += (((t1 >> 24) ) & 0x3ffffffffff) | hibit;
+ h0 += (( t0 ) & 0xfffffffffff);
+ h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff);
+ h2 += (((t1 >> 24) ) & 0x3ffffffffff) | hibit;
- /* h *= r */
- MUL(d0, h0, r0); MUL(d, h1, s2); ADD(d0, d); MUL(d, h2, s1); ADD(d0, d);
- MUL(d1, h0, r1); MUL(d, h1, r0); ADD(d1, d); MUL(d, h2, s2); ADD(d1, d);
- MUL(d2, h0, r2); MUL(d, h1, r1); ADD(d2, d); MUL(d, h2, r0); ADD(d2, d);
+ /* h *= r */
+ MUL(d0, h0, r0); MUL(d, h1, s2); ADD(d0, d); MUL(d, h2, s1); ADD(d0, d);
+ MUL(d1, h0, r1); MUL(d, h1, r0); ADD(d1, d); MUL(d, h2, s2); ADD(d1, d);
+ MUL(d2, h0, r2); MUL(d, h1, r1); ADD(d2, d); MUL(d, h2, r0); ADD(d2, d);
- /* (partial) h %= p */
- c = SHR(d0, 44); h0 = LO(d0) & 0xfffffffffff;
- ADDLO(d1, c); c = SHR(d1, 44); h1 = LO(d1) & 0xfffffffffff;
- ADDLO(d2, c); c = SHR(d2, 42); h2 = LO(d2) & 0x3ffffffffff;
- h0 += c * 5; c = (h0 >> 44); h0 = h0 & 0xfffffffffff;
- h1 += c;
+ /* (partial) h %= p */
+ c = SHR(d0, 44); h0 = LO(d0) & 0xfffffffffff;
+ ADDLO(d1, c); c = SHR(d1, 44); h1 = LO(d1) & 0xfffffffffff;
+ ADDLO(d2, c); c = SHR(d2, 42); h2 = LO(d2) & 0x3ffffffffff;
+ h0 += c * 5; c = (h0 >> 44); h0 = h0 & 0xfffffffffff;
+ h1 += c;
- m += POLY1305_BLOCK_SIZE;
- bytes -= POLY1305_BLOCK_SIZE;
- }
+ m += POLY1305_BLOCK_SIZE;
+ bytes -= POLY1305_BLOCK_SIZE;
+ }
- ctx->h[0] = h0;
- ctx->h[1] = h1;
- ctx->h[2] = h2;
+ ctx->h[0] = h0;
+ ctx->h[1] = h1;
+ ctx->h[2] = h2;
#else /* if not 64 bit then use 32 bit */
-
- r0 = ctx->r[0];
- r1 = ctx->r[1];
- r2 = ctx->r[2];
- r3 = ctx->r[3];
- r4 = ctx->r[4];
-
- s1 = r1 * 5;
- s2 = r2 * 5;
- s3 = r3 * 5;
- s4 = r4 * 5;
-
- h0 = ctx->h[0];
- h1 = ctx->h[1];
- h2 = ctx->h[2];
- h3 = ctx->h[3];
- h4 = ctx->h[4];
-
- while (bytes >= POLY1305_BLOCK_SIZE) {
- /* h += m[i] */
- h0 += (U8TO32(m+ 0) ) & 0x3ffffff;
- h1 += (U8TO32(m+ 3) >> 2) & 0x3ffffff;
- h2 += (U8TO32(m+ 6) >> 4) & 0x3ffffff;
- h3 += (U8TO32(m+ 9) >> 6) & 0x3ffffff;
- h4 += (U8TO32(m+12) >> 8) | hibit;
-
- /* h *= r */
- d0 = ((word64)h0 * r0) + ((word64)h1 * s4) + ((word64)h2 * s3) +
+ const word32 hibit = (ctx->finished) ? 0 : ((word32)1 << 24); /* 1 << 128 */
+ word32 r0,r1,r2,r3,r4;
+ word32 s1,s2,s3,s4;
+ word32 h0,h1,h2,h3,h4;
+ word64 d0,d1,d2,d3,d4;
+ word32 c;
+
+
+ r0 = ctx->r[0];
+ r1 = ctx->r[1];
+ r2 = ctx->r[2];
+ r3 = ctx->r[3];
+ r4 = ctx->r[4];
+
+ s1 = r1 * 5;
+ s2 = r2 * 5;
+ s3 = r3 * 5;
+ s4 = r4 * 5;
+
+ h0 = ctx->h[0];
+ h1 = ctx->h[1];
+ h2 = ctx->h[2];
+ h3 = ctx->h[3];
+ h4 = ctx->h[4];
+
+ while (bytes >= POLY1305_BLOCK_SIZE) {
+ /* h += m[i] */
+ h0 += (U8TO32(m+ 0) ) & 0x3ffffff;
+ h1 += (U8TO32(m+ 3) >> 2) & 0x3ffffff;
+ h2 += (U8TO32(m+ 6) >> 4) & 0x3ffffff;
+ h3 += (U8TO32(m+ 9) >> 6) & 0x3ffffff;
+ h4 += (U8TO32(m+12) >> 8) | hibit;
+
+ /* h *= r */
+ d0 = ((word64)h0 * r0) + ((word64)h1 * s4) + ((word64)h2 * s3) +
((word64)h3 * s2) + ((word64)h4 * s1);
- d1 = ((word64)h0 * r1) + ((word64)h1 * r0) + ((word64)h2 * s4) +
+ d1 = ((word64)h0 * r1) + ((word64)h1 * r0) + ((word64)h2 * s4) +
((word64)h3 * s3) + ((word64)h4 * s2);
- d2 = ((word64)h0 * r2) + ((word64)h1 * r1) + ((word64)h2 * r0) +
+ d2 = ((word64)h0 * r2) + ((word64)h1 * r1) + ((word64)h2 * r0) +
((word64)h3 * s4) + ((word64)h4 * s3);
- d3 = ((word64)h0 * r3) + ((word64)h1 * r2) + ((word64)h2 * r1) +
+ d3 = ((word64)h0 * r3) + ((word64)h1 * r2) + ((word64)h2 * r1) +
((word64)h3 * r0) + ((word64)h4 * s4);
- d4 = ((word64)h0 * r4) + ((word64)h1 * r3) + ((word64)h2 * r2) +
+ d4 = ((word64)h0 * r4) + ((word64)h1 * r3) + ((word64)h2 * r2) +
((word64)h3 * r1) + ((word64)h4 * r0);
- /* (partial) h %= p */
- c = (word32)(d0 >> 26); h0 = (word32)d0 & 0x3ffffff;
- d1 += c; c = (word32)(d1 >> 26); h1 = (word32)d1 & 0x3ffffff;
- d2 += c; c = (word32)(d2 >> 26); h2 = (word32)d2 & 0x3ffffff;
- d3 += c; c = (word32)(d3 >> 26); h3 = (word32)d3 & 0x3ffffff;
- d4 += c; c = (word32)(d4 >> 26); h4 = (word32)d4 & 0x3ffffff;
- h0 += c * 5; c = (h0 >> 26); h0 = h0 & 0x3ffffff;
- h1 += c;
-
- m += POLY1305_BLOCK_SIZE;
- bytes -= POLY1305_BLOCK_SIZE;
- }
-
- ctx->h[0] = h0;
- ctx->h[1] = h1;
- ctx->h[2] = h2;
- ctx->h[3] = h3;
- ctx->h[4] = h4;
+ /* (partial) h %= p */
+ c = (word32)(d0 >> 26); h0 = (word32)d0 & 0x3ffffff;
+ d1 += c; c = (word32)(d1 >> 26); h1 = (word32)d1 & 0x3ffffff;
+ d2 += c; c = (word32)(d2 >> 26); h2 = (word32)d2 & 0x3ffffff;
+ d3 += c; c = (word32)(d3 >> 26); h3 = (word32)d3 & 0x3ffffff;
+ d4 += c; c = (word32)(d4 >> 26); h4 = (word32)d4 & 0x3ffffff;
+ h0 += c * 5; c = (h0 >> 26); h0 = h0 & 0x3ffffff;
+ h1 += c;
+
+ m += POLY1305_BLOCK_SIZE;
+ bytes -= POLY1305_BLOCK_SIZE;
+ }
+
+ ctx->h[0] = h0;
+ ctx->h[1] = h1;
+ ctx->h[2] = h2;
+ ctx->h[3] = h3;
+ ctx->h[4] = h4;
#endif /* end of 64 bit cpu blocks or 32 bit cpu */
}
+void poly1305_block(Poly1305* ctx, const unsigned char *m)
+{
+#ifdef USE_INTEL_SPEEDUP
+ /* No call to poly1305_block when AVX2, AVX2 does 4 blocks at a time. */
+ poly1305_block_avx(ctx, m);
+#else
+ poly1305_blocks(ctx, m, POLY1305_BLOCK_SIZE);
+#endif
+}
+#endif /* !defined(WOLFSSL_ARMASM) || !defined(__aarch64__) */
-int wc_Poly1305SetKey(Poly1305* ctx, const byte* key, word32 keySz) {
-
-#if defined(POLY130564)
+#if !defined(WOLFSSL_ARMASM) || !defined(__aarch64__)
+int wc_Poly1305SetKey(Poly1305* ctx, const byte* key, word32 keySz)
+{
+#if defined(POLY130564) && !defined(USE_INTEL_SPEEDUP)
word64 t0,t1;
#endif
+ if (key == NULL)
+ return BAD_FUNC_ARG;
+
#ifdef CHACHA_AEAD_TEST
word32 k;
printf("Poly key used:\n");
@@ -270,239 +407,261 @@ int wc_Poly1305SetKey(Poly1305* ctx, const byte* key, word32 keySz) {
if ((k+1) % 8 == 0)
printf("\n");
}
- printf("\n");
+ printf("\n");
#endif
if (keySz != 32 || ctx == NULL)
return BAD_FUNC_ARG;
-#if defined(POLY130564)
+#ifdef USE_INTEL_SPEEDUP
+ if (!cpu_flags_set) {
+ intel_flags = cpuid_get_flags();
+ cpu_flags_set = 1;
+ }
+ #ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_AVX2(intel_flags))
+ poly1305_setkey_avx2(ctx, key);
+ else
+ #endif
+ poly1305_setkey_avx(ctx, key);
+#elif defined(POLY130564)
- /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
- t0 = U8TO64(key + 0);
- t1 = U8TO64(key + 8);
+ /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
+ t0 = U8TO64(key + 0);
+ t1 = U8TO64(key + 8);
+
+ ctx->r[0] = ( t0 ) & 0xffc0fffffff;
+ ctx->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff;
+ ctx->r[2] = ((t1 >> 24) ) & 0x00ffffffc0f;
- ctx->r[0] = ( t0 ) & 0xffc0fffffff;
- ctx->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff;
- ctx->r[2] = ((t1 >> 24) ) & 0x00ffffffc0f;
+ /* h (accumulator) = 0 */
+ ctx->h[0] = 0;
+ ctx->h[1] = 0;
+ ctx->h[2] = 0;
- /* h (accumulator) = 0 */
- ctx->h[0] = 0;
- ctx->h[1] = 0;
- ctx->h[2] = 0;
+ /* save pad for later */
+ ctx->pad[0] = U8TO64(key + 16);
+ ctx->pad[1] = U8TO64(key + 24);
- /* save pad for later */
- ctx->pad[0] = U8TO64(key + 16);
- ctx->pad[1] = U8TO64(key + 24);
+ ctx->leftover = 0;
+ ctx->finished = 0;
#else /* if not 64 bit then use 32 bit */
-
+
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
- ctx->r[0] = (U8TO32(key + 0) ) & 0x3ffffff;
- ctx->r[1] = (U8TO32(key + 3) >> 2) & 0x3ffff03;
- ctx->r[2] = (U8TO32(key + 6) >> 4) & 0x3ffc0ff;
- ctx->r[3] = (U8TO32(key + 9) >> 6) & 0x3f03fff;
- ctx->r[4] = (U8TO32(key + 12) >> 8) & 0x00fffff;
-
- /* h = 0 */
- ctx->h[0] = 0;
- ctx->h[1] = 0;
- ctx->h[2] = 0;
- ctx->h[3] = 0;
- ctx->h[4] = 0;
-
- /* save pad for later */
- ctx->pad[0] = U8TO32(key + 16);
- ctx->pad[1] = U8TO32(key + 20);
- ctx->pad[2] = U8TO32(key + 24);
- ctx->pad[3] = U8TO32(key + 28);
+ ctx->r[0] = (U8TO32(key + 0) ) & 0x3ffffff;
+ ctx->r[1] = (U8TO32(key + 3) >> 2) & 0x3ffff03;
+ ctx->r[2] = (U8TO32(key + 6) >> 4) & 0x3ffc0ff;
+ ctx->r[3] = (U8TO32(key + 9) >> 6) & 0x3f03fff;
+ ctx->r[4] = (U8TO32(key + 12) >> 8) & 0x00fffff;
+
+ /* h = 0 */
+ ctx->h[0] = 0;
+ ctx->h[1] = 0;
+ ctx->h[2] = 0;
+ ctx->h[3] = 0;
+ ctx->h[4] = 0;
+
+ /* save pad for later */
+ ctx->pad[0] = U8TO32(key + 16);
+ ctx->pad[1] = U8TO32(key + 20);
+ ctx->pad[2] = U8TO32(key + 24);
+ ctx->pad[3] = U8TO32(key + 28);
+
+ ctx->leftover = 0;
+ ctx->finished = 0;
#endif
- ctx->leftover = 0;
- ctx->final = 0;
-
return 0;
}
-
-int wc_Poly1305Final(Poly1305* ctx, byte* mac) {
-
-#if defined(POLY130564)
+int wc_Poly1305Final(Poly1305* ctx, byte* mac)
+{
+#ifdef USE_INTEL_SPEEDUP
+#elif defined(POLY130564)
word64 h0,h1,h2,c;
- word64 g0,g1,g2;
- word64 t0,t1;
+ word64 g0,g1,g2;
+ word64 t0,t1;
#else
word32 h0,h1,h2,h3,h4,c;
- word32 g0,g1,g2,g3,g4;
- word64 f;
- word32 mask;
+ word32 g0,g1,g2,g3,g4;
+ word64 f;
+ word32 mask;
#endif
if (ctx == NULL)
return BAD_FUNC_ARG;
-#if defined(POLY130564)
-
- /* process the remaining block */
- if (ctx->leftover) {
- size_t i = ctx->leftover;
- ctx->buffer[i] = 1;
- for (i = i + 1; i < POLY1305_BLOCK_SIZE; i++)
- ctx->buffer[i] = 0;
- ctx->final = 1;
- poly1305_blocks(ctx, ctx->buffer, POLY1305_BLOCK_SIZE);
- }
-
- /* fully carry h */
- h0 = ctx->h[0];
- h1 = ctx->h[1];
- h2 = ctx->h[2];
-
- c = (h1 >> 44); h1 &= 0xfffffffffff;
- h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff;
- h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
- h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff;
- h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff;
- h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
- h1 += c;
-
- /* compute h + -p */
- g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff;
- g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff;
- g2 = h2 + c - ((word64)1 << 42);
-
- /* select h if h < p, or h + -p if h >= p */
- c = (g2 >> ((sizeof(word64) * 8) - 1)) - 1;
- g0 &= c;
- g1 &= c;
- g2 &= c;
- c = ~c;
- h0 = (h0 & c) | g0;
- h1 = (h1 & c) | g1;
- h2 = (h2 & c) | g2;
-
- /* h = (h + pad) */
- t0 = ctx->pad[0];
- t1 = ctx->pad[1];
-
- h0 += (( t0 ) & 0xfffffffffff) ;
+#ifdef USE_INTEL_SPEEDUP
+ #ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_AVX2(intel_flags))
+ poly1305_final_avx2(ctx, mac);
+ else
+ #endif
+ poly1305_final_avx(ctx, mac);
+#elif defined(POLY130564)
+
+ /* process the remaining block */
+ if (ctx->leftover) {
+ size_t i = ctx->leftover;
+ ctx->buffer[i] = 1;
+ for (i = i + 1; i < POLY1305_BLOCK_SIZE; i++)
+ ctx->buffer[i] = 0;
+ ctx->finished = 1;
+ poly1305_block(ctx, ctx->buffer);
+ }
+
+ /* fully carry h */
+ h0 = ctx->h[0];
+ h1 = ctx->h[1];
+ h2 = ctx->h[2];
+
+ c = (h1 >> 44); h1 &= 0xfffffffffff;
+ h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff;
+ h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
+ h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff;
+ h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff;
+ h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
+ h1 += c;
+
+ /* compute h + -p */
+ g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff;
+ g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff;
+ g2 = h2 + c - ((word64)1 << 42);
+
+ /* select h if h < p, or h + -p if h >= p */
+ c = (g2 >> ((sizeof(word64) * 8) - 1)) - 1;
+ g0 &= c;
+ g1 &= c;
+ g2 &= c;
+ c = ~c;
+ h0 = (h0 & c) | g0;
+ h1 = (h1 & c) | g1;
+ h2 = (h2 & c) | g2;
+
+ /* h = (h + pad) */
+ t0 = ctx->pad[0];
+ t1 = ctx->pad[1];
+
+ h0 += (( t0 ) & 0xfffffffffff) ;
c = (h0 >> 44); h0 &= 0xfffffffffff;
- h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c;
+ h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c;
c = (h1 >> 44); h1 &= 0xfffffffffff;
- h2 += (((t1 >> 24) ) & 0x3ffffffffff) + c;
+ h2 += (((t1 >> 24) ) & 0x3ffffffffff) + c;
h2 &= 0x3ffffffffff;
- /* mac = h % (2^128) */
- h0 = ((h0 ) | (h1 << 44));
- h1 = ((h1 >> 20) | (h2 << 24));
+ /* mac = h % (2^128) */
+ h0 = ((h0 ) | (h1 << 44));
+ h1 = ((h1 >> 20) | (h2 << 24));
- U64TO8(mac + 0, h0);
- U64TO8(mac + 8, h1);
+ U64TO8(mac + 0, h0);
+ U64TO8(mac + 8, h1);
- /* zero out the state */
- ctx->h[0] = 0;
- ctx->h[1] = 0;
- ctx->h[2] = 0;
- ctx->r[0] = 0;
- ctx->r[1] = 0;
- ctx->r[2] = 0;
- ctx->pad[0] = 0;
- ctx->pad[1] = 0;
+ /* zero out the state */
+ ctx->h[0] = 0;
+ ctx->h[1] = 0;
+ ctx->h[2] = 0;
+ ctx->r[0] = 0;
+ ctx->r[1] = 0;
+ ctx->r[2] = 0;
+ ctx->pad[0] = 0;
+ ctx->pad[1] = 0;
#else /* if not 64 bit then use 32 bit */
-
- /* process the remaining block */
- if (ctx->leftover) {
- size_t i = ctx->leftover;
- ctx->buffer[i++] = 1;
- for (; i < POLY1305_BLOCK_SIZE; i++)
- ctx->buffer[i] = 0;
- ctx->final = 1;
- poly1305_blocks(ctx, ctx->buffer, POLY1305_BLOCK_SIZE);
- }
-
- /* fully carry h */
- h0 = ctx->h[0];
- h1 = ctx->h[1];
- h2 = ctx->h[2];
- h3 = ctx->h[3];
- h4 = ctx->h[4];
-
- c = h1 >> 26; h1 = h1 & 0x3ffffff;
- h2 += c; c = h2 >> 26; h2 = h2 & 0x3ffffff;
- h3 += c; c = h3 >> 26; h3 = h3 & 0x3ffffff;
- h4 += c; c = h4 >> 26; h4 = h4 & 0x3ffffff;
- h0 += c * 5; c = h0 >> 26; h0 = h0 & 0x3ffffff;
- h1 += c;
-
- /* compute h + -p */
- g0 = h0 + 5; c = g0 >> 26; g0 &= 0x3ffffff;
- g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff;
- g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff;
- g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff;
- g4 = h4 + c - (1 << 26);
-
- /* select h if h < p, or h + -p if h >= p */
- mask = (g4 >> ((sizeof(word32) * 8) - 1)) - 1;
- g0 &= mask;
- g1 &= mask;
- g2 &= mask;
- g3 &= mask;
- g4 &= mask;
- mask = ~mask;
- h0 = (h0 & mask) | g0;
- h1 = (h1 & mask) | g1;
- h2 = (h2 & mask) | g2;
- h3 = (h3 & mask) | g3;
- h4 = (h4 & mask) | g4;
-
- /* h = h % (2^128) */
- h0 = ((h0 ) | (h1 << 26)) & 0xffffffff;
- h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
- h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
- h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
-
- /* mac = (h + pad) % (2^128) */
- f = (word64)h0 + ctx->pad[0] ; h0 = (word32)f;
- f = (word64)h1 + ctx->pad[1] + (f >> 32); h1 = (word32)f;
- f = (word64)h2 + ctx->pad[2] + (f >> 32); h2 = (word32)f;
- f = (word64)h3 + ctx->pad[3] + (f >> 32); h3 = (word32)f;
-
- U32TO8(mac + 0, h0);
- U32TO8(mac + 4, h1);
- U32TO8(mac + 8, h2);
- U32TO8(mac + 12, h3);
-
- /* zero out the state */
- ctx->h[0] = 0;
- ctx->h[1] = 0;
- ctx->h[2] = 0;
- ctx->h[3] = 0;
- ctx->h[4] = 0;
- ctx->r[0] = 0;
- ctx->r[1] = 0;
- ctx->r[2] = 0;
- ctx->r[3] = 0;
- ctx->r[4] = 0;
- ctx->pad[0] = 0;
- ctx->pad[1] = 0;
- ctx->pad[2] = 0;
- ctx->pad[3] = 0;
+
+ /* process the remaining block */
+ if (ctx->leftover) {
+ size_t i = ctx->leftover;
+ ctx->buffer[i++] = 1;
+ for (; i < POLY1305_BLOCK_SIZE; i++)
+ ctx->buffer[i] = 0;
+ ctx->finished = 1;
+ poly1305_block(ctx, ctx->buffer);
+ }
+
+ /* fully carry h */
+ h0 = ctx->h[0];
+ h1 = ctx->h[1];
+ h2 = ctx->h[2];
+ h3 = ctx->h[3];
+ h4 = ctx->h[4];
+
+ c = h1 >> 26; h1 = h1 & 0x3ffffff;
+ h2 += c; c = h2 >> 26; h2 = h2 & 0x3ffffff;
+ h3 += c; c = h3 >> 26; h3 = h3 & 0x3ffffff;
+ h4 += c; c = h4 >> 26; h4 = h4 & 0x3ffffff;
+ h0 += c * 5; c = h0 >> 26; h0 = h0 & 0x3ffffff;
+ h1 += c;
+
+ /* compute h + -p */
+ g0 = h0 + 5; c = g0 >> 26; g0 &= 0x3ffffff;
+ g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff;
+ g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff;
+ g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff;
+ g4 = h4 + c - ((word32)1 << 26);
+
+ /* select h if h < p, or h + -p if h >= p */
+ mask = ((word32)g4 >> ((sizeof(word32) * 8) - 1)) - 1;
+ g0 &= mask;
+ g1 &= mask;
+ g2 &= mask;
+ g3 &= mask;
+ g4 &= mask;
+ mask = ~mask;
+ h0 = (h0 & mask) | g0;
+ h1 = (h1 & mask) | g1;
+ h2 = (h2 & mask) | g2;
+ h3 = (h3 & mask) | g3;
+ h4 = (h4 & mask) | g4;
+
+ /* h = h % (2^128) */
+ h0 = ((h0 ) | (h1 << 26)) & 0xffffffff;
+ h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
+ h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
+ h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
+
+ /* mac = (h + pad) % (2^128) */
+ f = (word64)h0 + ctx->pad[0] ; h0 = (word32)f;
+ f = (word64)h1 + ctx->pad[1] + (f >> 32); h1 = (word32)f;
+ f = (word64)h2 + ctx->pad[2] + (f >> 32); h2 = (word32)f;
+ f = (word64)h3 + ctx->pad[3] + (f >> 32); h3 = (word32)f;
+
+ U32TO8(mac + 0, h0);
+ U32TO8(mac + 4, h1);
+ U32TO8(mac + 8, h2);
+ U32TO8(mac + 12, h3);
+
+ /* zero out the state */
+ ctx->h[0] = 0;
+ ctx->h[1] = 0;
+ ctx->h[2] = 0;
+ ctx->h[3] = 0;
+ ctx->h[4] = 0;
+ ctx->r[0] = 0;
+ ctx->r[1] = 0;
+ ctx->r[2] = 0;
+ ctx->r[3] = 0;
+ ctx->r[4] = 0;
+ ctx->pad[0] = 0;
+ ctx->pad[1] = 0;
+ ctx->pad[2] = 0;
+ ctx->pad[3] = 0;
#endif
return 0;
}
+#endif /* !defined(WOLFSSL_ARMASM) || !defined(__aarch64__) */
-int wc_Poly1305Update(Poly1305* ctx, const byte* m, word32 bytes) {
-
- size_t i;
+int wc_Poly1305Update(Poly1305* ctx, const byte* m, word32 bytes)
+{
+ size_t i;
#ifdef CHACHA_AEAD_TEST
word32 k;
@@ -512,43 +671,198 @@ int wc_Poly1305Update(Poly1305* ctx, const byte* m, word32 bytes) {
if ((k+1) % 16 == 0)
printf("\n");
}
- printf("\n");
+ printf("\n");
#endif
-
+
if (ctx == NULL)
return BAD_FUNC_ARG;
- /* handle leftover */
- if (ctx->leftover) {
- size_t want = (POLY1305_BLOCK_SIZE - ctx->leftover);
- if (want > bytes)
- want = bytes;
- for (i = 0; i < want; i++)
- ctx->buffer[ctx->leftover + i] = m[i];
- bytes -= want;
- m += want;
- ctx->leftover += want;
- if (ctx->leftover < POLY1305_BLOCK_SIZE)
- return 0;
- poly1305_blocks(ctx, ctx->buffer, POLY1305_BLOCK_SIZE);
- ctx->leftover = 0;
- }
-
- /* process full blocks */
- if (bytes >= POLY1305_BLOCK_SIZE) {
- size_t want = (bytes & ~(POLY1305_BLOCK_SIZE - 1));
- poly1305_blocks(ctx, m, want);
- m += want;
- bytes -= want;
- }
-
- /* store leftover */
- if (bytes) {
- for (i = 0; i < bytes; i++)
- ctx->buffer[ctx->leftover + i] = m[i];
- ctx->leftover += bytes;
- }
+#ifdef USE_INTEL_SPEEDUP
+ #ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_AVX2(intel_flags)) {
+ /* handle leftover */
+ if (ctx->leftover) {
+ size_t want = sizeof(ctx->buffer) - ctx->leftover;
+ if (want > bytes)
+ want = bytes;
+
+ for (i = 0; i < want; i++)
+ ctx->buffer[ctx->leftover + i] = m[i];
+ bytes -= (word32)want;
+ m += want;
+ ctx->leftover += want;
+ if (ctx->leftover < sizeof(ctx->buffer))
+ return 0;
+
+ if (!ctx->started)
+ poly1305_calc_powers_avx2(ctx);
+ poly1305_blocks_avx2(ctx, ctx->buffer, sizeof(ctx->buffer));
+ ctx->leftover = 0;
+ }
+
+ /* process full blocks */
+ if (bytes >= sizeof(ctx->buffer)) {
+ size_t want = bytes & ~(sizeof(ctx->buffer) - 1);
+
+ if (!ctx->started)
+ poly1305_calc_powers_avx2(ctx);
+ poly1305_blocks_avx2(ctx, m, want);
+ m += want;
+ bytes -= (word32)want;
+ }
+
+ /* store leftover */
+ if (bytes) {
+ for (i = 0; i < bytes; i++)
+ ctx->buffer[ctx->leftover + i] = m[i];
+ ctx->leftover += bytes;
+ }
+ }
+ else
+ #endif
+#endif
+ {
+ /* handle leftover */
+ if (ctx->leftover) {
+ size_t want = (POLY1305_BLOCK_SIZE - ctx->leftover);
+ if (want > bytes)
+ want = bytes;
+ for (i = 0; i < want; i++)
+ ctx->buffer[ctx->leftover + i] = m[i];
+ bytes -= (word32)want;
+ m += want;
+ ctx->leftover += want;
+ if (ctx->leftover < POLY1305_BLOCK_SIZE)
+ return 0;
+ poly1305_block(ctx, ctx->buffer);
+ ctx->leftover = 0;
+ }
+
+ /* process full blocks */
+ if (bytes >= POLY1305_BLOCK_SIZE) {
+ size_t want = (bytes & ~(POLY1305_BLOCK_SIZE - 1));
+ poly1305_blocks(ctx, m, want);
+ m += want;
+ bytes -= (word32)want;
+ }
+
+ /* store leftover */
+ if (bytes) {
+ for (i = 0; i < bytes; i++)
+ ctx->buffer[ctx->leftover + i] = m[i];
+ ctx->leftover += bytes;
+ }
+ }
+
return 0;
}
-#endif /* HAVE_POLY1305 */
+/* Takes a Poly1305 struct that has a key loaded and pads the provided length
+ ctx : Initialized Poly1305 struct to use
+ lenToPad : Current number of bytes updated that needs padding to 16
+ */
+int wc_Poly1305_Pad(Poly1305* ctx, word32 lenToPad)
+{
+ int ret = 0;
+ word32 paddingLen;
+ byte padding[WC_POLY1305_PAD_SZ - 1];
+
+ if (ctx == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ if (lenToPad == 0) {
+ return 0; /* nothing needs to be done */
+ }
+
+ XMEMSET(padding, 0, sizeof(padding));
+
+ /* Pad length to 16 bytes */
+ paddingLen = -(int)lenToPad & (WC_POLY1305_PAD_SZ - 1);
+ if (paddingLen > 0) {
+ ret = wc_Poly1305Update(ctx, padding, paddingLen);
+ }
+ return ret;
+}
+
+/* Takes a Poly1305 struct that has a key loaded and adds the AEAD length
+ encoding in 64-bit little endian
+ aadSz : Size of the additional authentication data
+ dataSz : Size of the plaintext or ciphertext
+ */
+int wc_Poly1305_EncodeSizes(Poly1305* ctx, word32 aadSz, word32 dataSz)
+{
+ int ret;
+ byte little64[16]; /* sizeof(word64) * 2 */
+
+ if (ctx == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ XMEMSET(little64, 0, sizeof(little64));
+
+ /* size of additional data and input data as little endian 64 bit types */
+ u32tole64(aadSz, little64);
+ u32tole64(dataSz, little64 + 8);
+ ret = wc_Poly1305Update(ctx, little64, sizeof(little64));
+
+ return ret;
+}
+
+/* Takes in an initialized Poly1305 struct that has a key loaded and creates
+ a MAC (tag) using recent TLS AEAD padding scheme.
+ ctx : Initialized Poly1305 struct to use
+ additional : Additional data to use
+ addSz : Size of additional buffer
+ input : Input buffer to create tag from
+ sz : Size of input buffer
+ tag : Buffer to hold created tag
+ tagSz : Size of input tag buffer (must be at least
+ WC_POLY1305_MAC_SZ(16))
+ */
+int wc_Poly1305_MAC(Poly1305* ctx, byte* additional, word32 addSz,
+ byte* input, word32 sz, byte* tag, word32 tagSz)
+{
+ int ret;
+
+ /* sanity check on arguments */
+ if (ctx == NULL || input == NULL || tag == NULL ||
+ tagSz < WC_POLY1305_MAC_SZ) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* additional allowed to be 0 */
+ if (addSz > 0) {
+ if (additional == NULL)
+ return BAD_FUNC_ARG;
+
+ /* additional data plus padding */
+ if ((ret = wc_Poly1305Update(ctx, additional, addSz)) != 0) {
+ return ret;
+ }
+ /* pad additional data */
+ if ((ret = wc_Poly1305_Pad(ctx, addSz)) != 0) {
+ return ret;
+ }
+ }
+
+ /* input plus padding */
+ if ((ret = wc_Poly1305Update(ctx, input, sz)) != 0) {
+ return ret;
+ }
+ /* pad input data */
+ if ((ret = wc_Poly1305_Pad(ctx, sz)) != 0) {
+ return ret;
+ }
+
+ /* encode size of AAD and input data as little endian 64 bit types */
+ if ((ret = wc_Poly1305_EncodeSizes(ctx, addSz, sz)) != 0) {
+ return ret;
+ }
+
+ /* Finalize the auth tag */
+ ret = wc_Poly1305Final(ctx, tag);
+
+ return ret;
+
+}
+#endif /* HAVE_POLY1305 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/poly1305_asm.S b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/poly1305_asm.S
new file mode 100644
index 000000000..95711075b
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/poly1305_asm.S
@@ -0,0 +1,1105 @@
+/* poly1305_asm
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+#ifndef HAVE_INTEL_AVX1
+#define HAVE_INTEL_AVX1
+#endif /* HAVE_INTEL_AVX1 */
+#ifndef NO_AVX2_SUPPORT
+#define HAVE_INTEL_AVX2
+#endif /* NO_AVX2_SUPPORT */
+
+#ifdef HAVE_INTEL_AVX1
+#ifndef __APPLE__
+.text
+.globl poly1305_setkey_avx
+.type poly1305_setkey_avx,@function
+.align 4
+poly1305_setkey_avx:
+#else
+.section __TEXT,__text
+.globl _poly1305_setkey_avx
+.p2align 2
+_poly1305_setkey_avx:
+#endif /* __APPLE__ */
+ movabsq $0xffffffc0fffffff, %r10
+ movabsq $0xffffffc0ffffffc, %r11
+ movq (%rsi), %rdx
+ movq 8(%rsi), %rax
+ movq 16(%rsi), %rcx
+ movq 24(%rsi), %r8
+ andq %r10, %rdx
+ andq %r11, %rax
+ movq %rdx, %r10
+ movq %rax, %r11
+ xorq %r9, %r9
+ movq %rdx, (%rdi)
+ movq %rax, 8(%rdi)
+ movq %r9, 24(%rdi)
+ movq %r9, 32(%rdi)
+ movq %r9, 40(%rdi)
+ movq %rcx, 48(%rdi)
+ movq %r8, 56(%rdi)
+ movq %r9, 352(%rdi)
+ movq %r9, 408(%rdi)
+ movq %rdx, 360(%rdi)
+ movq %rax, 416(%rdi)
+ addq %rdx, %r10
+ addq %rax, %r11
+ movq %r10, 368(%rdi)
+ movq %r11, 424(%rdi)
+ addq %rdx, %r10
+ addq %rax, %r11
+ movq %r10, 376(%rdi)
+ movq %r11, 432(%rdi)
+ addq %rdx, %r10
+ addq %rax, %r11
+ movq %r10, 384(%rdi)
+ movq %r11, 440(%rdi)
+ addq %rdx, %r10
+ addq %rax, %r11
+ movq %r10, 392(%rdi)
+ movq %r11, 448(%rdi)
+ addq %rdx, %r10
+ addq %rax, %r11
+ movq %r10, 400(%rdi)
+ movq %r11, 456(%rdi)
+ movq %r9, 608(%rdi)
+ movb $0x01, 616(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size poly1305_setkey_avx,.-poly1305_setkey_avx
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl poly1305_block_avx
+.type poly1305_block_avx,@function
+.align 4
+poly1305_block_avx:
+#else
+.section __TEXT,__text
+.globl _poly1305_block_avx
+.p2align 2
+_poly1305_block_avx:
+#endif /* __APPLE__ */
+ pushq %r15
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ movq (%rdi), %r15
+ movq 8(%rdi), %rbx
+ movq 24(%rdi), %r8
+ movq 32(%rdi), %r9
+ movq 40(%rdi), %r10
+ xorq %r14, %r14
+ movb 616(%rdi), %r14b
+ # h += m
+ movq (%rsi), %r11
+ movq 8(%rsi), %r12
+ addq %r11, %r8
+ adcq %r12, %r9
+ movq %rbx, %rax
+ adcq %r14, %r10
+ # r[1] * h[0] => rdx, rax ==> t2, t1
+ mulq %r8
+ movq %rax, %r12
+ movq %rdx, %r13
+ # r[0] * h[1] => rdx, rax ++> t2, t1
+ movq %r15, %rax
+ mulq %r9
+ addq %rax, %r12
+ movq %r15, %rax
+ adcq %rdx, %r13
+ # r[0] * h[0] => rdx, rax ==> t4, t0
+ mulq %r8
+ movq %rax, %r11
+ movq %rdx, %r8
+ # r[1] * h[1] => rdx, rax =+> t3, t2
+ movq %rbx, %rax
+ mulq %r9
+ # r[0] * h[2] +> t2
+ addq 352(%rdi,%r10,8), %r13
+ movq %rdx, %r14
+ addq %r8, %r12
+ adcq %rax, %r13
+ # r[1] * h[2] +> t3
+ adcq 408(%rdi,%r10,8), %r14
+ # r * h in r14, r13, r12, r11
+ # h = (r * h) mod 2^130 - 5
+ movq %r13, %r10
+ andq $-4, %r13
+ andq $3, %r10
+ addq %r13, %r11
+ movq %r13, %r8
+ adcq %r14, %r12
+ adcq $0x00, %r10
+ shrdq $2, %r14, %r8
+ shrq $2, %r14
+ addq %r11, %r8
+ adcq %r14, %r12
+ movq %r12, %r9
+ adcq $0x00, %r10
+ # h in r10, r9, r8
+ # Store h to ctx
+ movq %r8, 24(%rdi)
+ movq %r9, 32(%rdi)
+ movq %r10, 40(%rdi)
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ popq %r15
+ repz retq
+#ifndef __APPLE__
+.size poly1305_block_avx,.-poly1305_block_avx
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl poly1305_blocks_avx
+.type poly1305_blocks_avx,@function
+.align 4
+poly1305_blocks_avx:
+#else
+.section __TEXT,__text
+.globl _poly1305_blocks_avx
+.p2align 2
+_poly1305_blocks_avx:
+#endif /* __APPLE__ */
+ pushq %r15
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ movq %rdx, %rcx
+ movq (%rdi), %r15
+ movq 8(%rdi), %rbx
+ movq 24(%rdi), %r8
+ movq 32(%rdi), %r9
+ movq 40(%rdi), %r10
+L_poly1305_avx_blocks_start:
+ # h += m
+ movq (%rsi), %r11
+ movq 8(%rsi), %r12
+ addq %r11, %r8
+ adcq %r12, %r9
+ movq %rbx, %rax
+ adcq $0x00, %r10
+ # r[1] * h[0] => rdx, rax ==> t2, t1
+ mulq %r8
+ movq %rax, %r12
+ movq %rdx, %r13
+ # r[0] * h[1] => rdx, rax ++> t2, t1
+ movq %r15, %rax
+ mulq %r9
+ addq %rax, %r12
+ movq %r15, %rax
+ adcq %rdx, %r13
+ # r[0] * h[0] => rdx, rax ==> t4, t0
+ mulq %r8
+ movq %rax, %r11
+ movq %rdx, %r8
+ # r[1] * h[1] => rdx, rax =+> t3, t2
+ movq %rbx, %rax
+ mulq %r9
+ # r[0] * h[2] +> t2
+ addq 360(%rdi,%r10,8), %r13
+ movq %rdx, %r14
+ addq %r8, %r12
+ adcq %rax, %r13
+ # r[1] * h[2] +> t3
+ adcq 416(%rdi,%r10,8), %r14
+ # r * h in r14, r13, r12, r11
+ # h = (r * h) mod 2^130 - 5
+ movq %r13, %r10
+ andq $-4, %r13
+ andq $3, %r10
+ addq %r13, %r11
+ movq %r13, %r8
+ adcq %r14, %r12
+ adcq $0x00, %r10
+ shrdq $2, %r14, %r8
+ shrq $2, %r14
+ addq %r11, %r8
+ adcq %r14, %r12
+ movq %r12, %r9
+ adcq $0x00, %r10
+ # h in r10, r9, r8
+ # Next block from message
+ addq $16, %rsi
+ subq $16, %rcx
+ jg L_poly1305_avx_blocks_start
+ # Store h to ctx
+ movq %r8, 24(%rdi)
+ movq %r9, 32(%rdi)
+ movq %r10, 40(%rdi)
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ popq %r15
+ repz retq
+#ifndef __APPLE__
+.size poly1305_blocks_avx,.-poly1305_blocks_avx
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl poly1305_final_avx
+.type poly1305_final_avx,@function
+.align 4
+poly1305_final_avx:
+#else
+.section __TEXT,__text
+.globl _poly1305_final_avx
+.p2align 2
+_poly1305_final_avx:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ movq %rsi, %rbx
+ movq 608(%rdi), %rax
+ testq %rax, %rax
+ je L_poly1305_avx_final_no_more
+ movb $0x01, 480(%rdi,%rax,1)
+ jmp L_poly1305_avx_final_cmp_rem
+L_poly1305_avx_final_zero_rem:
+ movb $0x00, 480(%rdi,%rax,1)
+L_poly1305_avx_final_cmp_rem:
+ incb %al
+ cmpq $16, %rax
+ jl L_poly1305_avx_final_zero_rem
+ movb $0x00, 616(%rdi)
+ leaq 480(%rdi), %rsi
+#ifndef __APPLE__
+ callq poly1305_block_avx@plt
+#else
+ callq _poly1305_block_avx
+#endif /* __APPLE__ */
+L_poly1305_avx_final_no_more:
+ movq 24(%rdi), %rax
+ movq 32(%rdi), %rdx
+ movq 40(%rdi), %rcx
+ movq 48(%rdi), %r11
+ movq 56(%rdi), %r12
+ # h %= p
+ # h = (h + pad)
+ # mod 2^130 - 5
+ movq %rcx, %r8
+ andq $3, %rcx
+ shrq $2, %r8
+ # Multily by 5
+ leaq 0(%r8,%r8,4), %r8
+ addq %r8, %rax
+ adcq $0x00, %rdx
+ adcq $0x00, %rcx
+ # Fixup when between (1 << 130) - 1 and (1 << 130) - 5
+ movq %rax, %r8
+ movq %rdx, %r9
+ movq %rcx, %r10
+ addq $5, %r8
+ adcq $0x00, %r9
+ adcq $0x00, %r10
+ cmpq $4, %r10
+ cmoveq %r8, %rax
+ cmoveq %r9, %rdx
+ # h += pad
+ addq %r11, %rax
+ adcq %r12, %rdx
+ movq %rax, (%rbx)
+ movq %rdx, 8(%rbx)
+ # Zero out r
+ movq $0x00, (%rdi)
+ movq $0x00, 8(%rdi)
+ # Zero out h
+ movq $0x00, 24(%rdi)
+ movq $0x00, 32(%rdi)
+ movq $0x00, 40(%rdi)
+ # Zero out pad
+ movq $0x00, 48(%rdi)
+ movq $0x00, 56(%rdi)
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size poly1305_final_avx,.-poly1305_final_avx
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX1 */
+#ifdef HAVE_INTEL_AVX2
+#ifndef __APPLE__
+.text
+.globl poly1305_calc_powers_avx2
+.type poly1305_calc_powers_avx2,@function
+.align 4
+poly1305_calc_powers_avx2:
+#else
+.section __TEXT,__text
+.globl _poly1305_calc_powers_avx2
+.p2align 2
+_poly1305_calc_powers_avx2:
+#endif /* __APPLE__ */
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rbx
+ pushq %rbp
+ movq (%rdi), %rcx
+ movq 8(%rdi), %r8
+ xorq %r9, %r9
+ # Convert to 26 bits in 32
+ movq %rcx, %rax
+ movq %rcx, %rdx
+ movq %rcx, %rsi
+ movq %r8, %rbx
+ movq %r8, %rbp
+ shrq $26, %rdx
+ shrdq $52, %r8, %rsi
+ shrq $14, %rbx
+ shrdq $40, %r9, %rbp
+ andq $0x3ffffff, %rax
+ andq $0x3ffffff, %rdx
+ andq $0x3ffffff, %rsi
+ andq $0x3ffffff, %rbx
+ andq $0x3ffffff, %rbp
+ movl %eax, 224(%rdi)
+ movl %edx, 228(%rdi)
+ movl %esi, 232(%rdi)
+ movl %ebx, 236(%rdi)
+ movl %ebp, 240(%rdi)
+ movl $0x00, 244(%rdi)
+ # Square 128-bit
+ movq %r8, %rax
+ mulq %rcx
+ xorq %r13, %r13
+ movq %rax, %r11
+ movq %rdx, %r12
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0x00, %r13
+ movq %rcx, %rax
+ mulq %rax
+ movq %rax, %r10
+ movq %rdx, %r15
+ movq %r8, %rax
+ mulq %rax
+ addq %r15, %r11
+ adcq %rax, %r12
+ adcq %rdx, %r13
+ # Reduce 256-bit to 130-bit
+ movq %r12, %rax
+ movq %r13, %rdx
+ andq $-4, %rax
+ andq $3, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ shrdq $2, %rdx, %rax
+ shrq $2, %rdx
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0x00, %r12
+ movq %r12, %rax
+ shrq $2, %rax
+ leaq 0(%rax,%rax,4), %rax
+ andq $3, %r12
+ addq %rax, %r10
+ adcq $0x00, %r11
+ adcq $0x00, %r12
+ # Convert to 26 bits in 32
+ movq %r10, %rax
+ movq %r10, %rdx
+ movq %r10, %rsi
+ movq %r11, %rbx
+ movq %r11, %rbp
+ shrq $26, %rdx
+ shrdq $52, %r11, %rsi
+ shrq $14, %rbx
+ shrdq $40, %r12, %rbp
+ andq $0x3ffffff, %rax
+ andq $0x3ffffff, %rdx
+ andq $0x3ffffff, %rsi
+ andq $0x3ffffff, %rbx
+ andq $0x3ffffff, %rbp
+ movl %eax, 256(%rdi)
+ movl %edx, 260(%rdi)
+ movl %esi, 264(%rdi)
+ movl %ebx, 268(%rdi)
+ movl %ebp, 272(%rdi)
+ movl $0x00, 276(%rdi)
+ # Multiply 128-bit by 130-bit
+ # r1[0] * r2[0]
+ movq %rcx, %rax
+ mulq %r10
+ movq %rax, %r13
+ movq %rdx, %r14
+ # r1[0] * r2[1]
+ movq %rcx, %rax
+ mulq %r11
+ movq $0x00, %r15
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # r1[1] * r2[0]
+ movq %r8, %rax
+ mulq %r10
+ movq $0x00, %rsi
+ addq %rax, %r14
+ adcq %rdx, %r15
+ adcq $0x00, %rsi
+ # r1[0] * r2[2]
+ movq %rcx, %rax
+ mulq %r12
+ addq %rax, %r15
+ adcq %rdx, %rsi
+ # r1[1] * r2[1]
+ movq %r8, %rax
+ mulq %r11
+ movq $0x00, %rbx
+ addq %rax, %r15
+ adcq %rdx, %rsi
+ adcq $0x00, %rbx
+ # r1[1] * r2[2]
+ movq %r8, %rax
+ mulq %r12
+ addq %rax, %rsi
+ adcq %rdx, %rbx
+ # Reduce 260-bit to 130-bit
+ movq %r15, %rax
+ movq %rsi, %rdx
+ movq %rbx, %rbx
+ andq $-4, %rax
+ andq $3, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq %rbx, %r15
+ shrdq $2, %rdx, %rax
+ shrdq $2, %rbx, %rdx
+ shrq $2, %rbx
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq %rbx, %r15
+ movq %r15, %rax
+ andq $3, %r15
+ shrq $2, %rax
+ leaq 0(%rax,%rax,4), %rax
+ addq %rax, %r13
+ adcq $0x00, %r14
+ adcq $0x00, %r15
+ # Convert to 26 bits in 32
+ movq %r13, %rax
+ movq %r13, %rdx
+ movq %r13, %rsi
+ movq %r14, %rbx
+ movq %r14, %rbp
+ shrq $26, %rdx
+ shrdq $52, %r14, %rsi
+ shrq $14, %rbx
+ shrdq $40, %r15, %rbp
+ andq $0x3ffffff, %rax
+ andq $0x3ffffff, %rdx
+ andq $0x3ffffff, %rsi
+ andq $0x3ffffff, %rbx
+ andq $0x3ffffff, %rbp
+ movl %eax, 288(%rdi)
+ movl %edx, 292(%rdi)
+ movl %esi, 296(%rdi)
+ movl %ebx, 300(%rdi)
+ movl %ebp, 304(%rdi)
+ movl $0x00, 308(%rdi)
+ # Square 130-bit
+ movq %r11, %rax
+ mulq %r10
+ xorq %r13, %r13
+ movq %rax, %r8
+ movq %rdx, %r9
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0x00, %r13
+ movq %r10, %rax
+ mulq %rax
+ movq %rax, %rcx
+ movq %rdx, %r15
+ movq %r11, %rax
+ mulq %rax
+ addq %r15, %r8
+ adcq %rax, %r9
+ adcq %rdx, %r13
+ movq %r12, %rax
+ mulq %rax
+ movq %rax, %r14
+ movq %r12, %rax
+ mulq %r10
+ addq %rax, %r9
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ addq %rax, %r9
+ adcq %rdx, %r13
+ adcq $0x00, %r14
+ movq %r12, %rax
+ mulq %r11
+ addq %rax, %r13
+ adcq %rdx, %r14
+ addq %rax, %r13
+ adcq %rdx, %r14
+ # Reduce 260-bit to 130-bit
+ movq %r9, %rax
+ movq %r13, %rdx
+ movq %r14, %r15
+ andq $-4, %rax
+ andq $3, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq %r15, %r9
+ shrdq $2, %rdx, %rax
+ shrdq $2, %r15, %rdx
+ shrq $2, %r15
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq %r15, %r9
+ movq %r9, %rax
+ andq $3, %r9
+ shrq $2, %rax
+ leaq 0(%rax,%rax,4), %rax
+ addq %rax, %rcx
+ adcq $0x00, %r8
+ adcq $0x00, %r9
+ # Convert to 26 bits in 32
+ movq %rcx, %rax
+ movq %rcx, %rdx
+ movq %rcx, %rsi
+ movq %r8, %rbx
+ movq %r8, %rbp
+ shrq $26, %rdx
+ shrdq $52, %r8, %rsi
+ shrq $14, %rbx
+ shrdq $40, %r9, %rbp
+ andq $0x3ffffff, %rax
+ andq $0x3ffffff, %rdx
+ andq $0x3ffffff, %rsi
+ andq $0x3ffffff, %rbx
+ andq $0x3ffffff, %rbp
+ movl %eax, 320(%rdi)
+ movl %edx, 324(%rdi)
+ movl %esi, 328(%rdi)
+ movl %ebx, 332(%rdi)
+ movl %ebp, 336(%rdi)
+ movl $0x00, 340(%rdi)
+ popq %rbp
+ popq %rbx
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ repz retq
+#ifndef __APPLE__
+.size poly1305_calc_powers_avx2,.-poly1305_calc_powers_avx2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl poly1305_setkey_avx2
+.type poly1305_setkey_avx2,@function
+.align 4
+poly1305_setkey_avx2:
+#else
+.section __TEXT,__text
+.globl _poly1305_setkey_avx2
+.p2align 2
+_poly1305_setkey_avx2:
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+ callq poly1305_setkey_avx@plt
+#else
+ callq _poly1305_setkey_avx
+#endif /* __APPLE__ */
+ vpxor %ymm0, %ymm0, %ymm0
+ vmovdqu %ymm0, 64(%rdi)
+ vmovdqu %ymm0, 96(%rdi)
+ vmovdqu %ymm0, 128(%rdi)
+ vmovdqu %ymm0, 160(%rdi)
+ vmovdqu %ymm0, 192(%rdi)
+ movq $0x00, 608(%rdi)
+ movw $0x00, 616(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size poly1305_setkey_avx2,.-poly1305_setkey_avx2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 32
+#else
+.p2align 5
+#endif /* __APPLE__ */
+L_poly1305_avx2_blocks_mask:
+.quad 0x3ffffff, 0x3ffffff
+.quad 0x3ffffff, 0x3ffffff
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 32
+#else
+.p2align 5
+#endif /* __APPLE__ */
+L_poly1305_avx2_blocks_hibit:
+.quad 0x1000000, 0x1000000
+.quad 0x1000000, 0x1000000
+#ifndef __APPLE__
+.text
+.globl poly1305_blocks_avx2
+.type poly1305_blocks_avx2,@function
+.align 4
+poly1305_blocks_avx2:
+#else
+.section __TEXT,__text
+.globl _poly1305_blocks_avx2
+.p2align 2
+_poly1305_blocks_avx2:
+#endif /* __APPLE__ */
+ pushq %r12
+ pushq %rbx
+ subq $0x140, %rsp
+ movq %rsp, %rcx
+ andq $-32, %rcx
+ addq $32, %rcx
+ vpxor %ymm15, %ymm15, %ymm15
+ movq %rcx, %rbx
+ leaq 64(%rdi), %rax
+ addq $0xa0, %rbx
+ cmpw $0x00, 616(%rdi)
+ jne L_poly1305_avx2_blocks_begin_h
+ # Load the message data
+ vmovdqu (%rsi), %ymm0
+ vmovdqu 32(%rsi), %ymm1
+ vperm2i128 $32, %ymm1, %ymm0, %ymm2
+ vperm2i128 $49, %ymm1, %ymm0, %ymm0
+ vpunpckldq %ymm0, %ymm2, %ymm1
+ vpunpckhdq %ymm0, %ymm2, %ymm3
+ vpunpckldq %ymm15, %ymm1, %ymm0
+ vpunpckhdq %ymm15, %ymm1, %ymm1
+ vpunpckldq %ymm15, %ymm3, %ymm2
+ vpunpckhdq %ymm15, %ymm3, %ymm3
+ vmovdqu L_poly1305_avx2_blocks_hibit(%rip), %ymm4
+ vpsllq $6, %ymm1, %ymm1
+ vpsllq $12, %ymm2, %ymm2
+ vpsllq $18, %ymm3, %ymm3
+ vmovdqu L_poly1305_avx2_blocks_mask(%rip), %ymm14
+ # Reduce, in place, the message data
+ vpsrlq $26, %ymm0, %ymm10
+ vpsrlq $26, %ymm3, %ymm11
+ vpand %ymm14, %ymm0, %ymm0
+ vpand %ymm14, %ymm3, %ymm3
+ vpaddq %ymm1, %ymm10, %ymm1
+ vpaddq %ymm4, %ymm11, %ymm4
+ vpsrlq $26, %ymm1, %ymm10
+ vpsrlq $26, %ymm4, %ymm11
+ vpand %ymm14, %ymm1, %ymm1
+ vpand %ymm14, %ymm4, %ymm4
+ vpaddq %ymm2, %ymm10, %ymm2
+ vpslld $2, %ymm11, %ymm12
+ vpaddd %ymm12, %ymm11, %ymm12
+ vpsrlq $26, %ymm2, %ymm10
+ vpaddq %ymm0, %ymm12, %ymm0
+ vpsrlq $26, %ymm0, %ymm11
+ vpand %ymm14, %ymm2, %ymm2
+ vpand %ymm14, %ymm0, %ymm0
+ vpaddq %ymm3, %ymm10, %ymm3
+ vpaddq %ymm1, %ymm11, %ymm1
+ vpsrlq $26, %ymm3, %ymm10
+ vpand %ymm14, %ymm3, %ymm3
+ vpaddq %ymm4, %ymm10, %ymm4
+ addq $0x40, %rsi
+ subq $0x40, %rdx
+ jz L_poly1305_avx2_blocks_store
+ jmp L_poly1305_avx2_blocks_load_r4
+L_poly1305_avx2_blocks_begin_h:
+ # Load the H values.
+ vmovdqu (%rax), %ymm0
+ vmovdqu 32(%rax), %ymm1
+ vmovdqu 64(%rax), %ymm2
+ vmovdqu 96(%rax), %ymm3
+ vmovdqu 128(%rax), %ymm4
+ # Check if there is a power of r to load - otherwise use r^4.
+ cmpb $0x00, 616(%rdi)
+ je L_poly1305_avx2_blocks_load_r4
+ # Load the 4 powers of r - r^4, r^3, r^2, r^1.
+ vmovdqu 224(%rdi), %ymm8
+ vmovdqu 256(%rdi), %ymm7
+ vmovdqu 288(%rdi), %ymm6
+ vmovdqu 320(%rdi), %ymm5
+ vpermq $0xd8, %ymm5, %ymm5
+ vpermq $0xd8, %ymm6, %ymm6
+ vpermq $0xd8, %ymm7, %ymm7
+ vpermq $0xd8, %ymm8, %ymm8
+ vpunpcklqdq %ymm6, %ymm5, %ymm10
+ vpunpckhqdq %ymm6, %ymm5, %ymm11
+ vpunpcklqdq %ymm8, %ymm7, %ymm12
+ vpunpckhqdq %ymm8, %ymm7, %ymm13
+ vperm2i128 $32, %ymm12, %ymm10, %ymm5
+ vperm2i128 $49, %ymm12, %ymm10, %ymm7
+ vperm2i128 $32, %ymm13, %ymm11, %ymm9
+ vpsrlq $32, %ymm5, %ymm6
+ vpsrlq $32, %ymm7, %ymm8
+ jmp L_poly1305_avx2_blocks_mul_5
+L_poly1305_avx2_blocks_load_r4:
+ # Load r^4 into all four positions.
+ vmovdqu 320(%rdi), %ymm13
+ vpermq $0x00, %ymm13, %ymm5
+ vpsrlq $32, %ymm13, %ymm14
+ vpermq $0x55, %ymm13, %ymm7
+ vpermq $0xaa, %ymm13, %ymm9
+ vpermq $0x00, %ymm14, %ymm6
+ vpermq $0x55, %ymm14, %ymm8
+L_poly1305_avx2_blocks_mul_5:
+ # Multiply top 4 26-bit values of all four H by 5
+ vpslld $2, %ymm6, %ymm10
+ vpslld $2, %ymm7, %ymm11
+ vpslld $2, %ymm8, %ymm12
+ vpslld $2, %ymm9, %ymm13
+ vpaddq %ymm10, %ymm6, %ymm10
+ vpaddq %ymm11, %ymm7, %ymm11
+ vpaddq %ymm12, %ymm8, %ymm12
+ vpaddq %ymm13, %ymm9, %ymm13
+ # Store powers of r and multiple of 5 for use in multiply.
+ vmovdqa %ymm10, (%rbx)
+ vmovdqa %ymm11, 32(%rbx)
+ vmovdqa %ymm12, 64(%rbx)
+ vmovdqa %ymm13, 96(%rbx)
+ vmovdqa %ymm5, (%rcx)
+ vmovdqa %ymm6, 32(%rcx)
+ vmovdqa %ymm7, 64(%rcx)
+ vmovdqa %ymm8, 96(%rcx)
+ vmovdqa %ymm9, 128(%rcx)
+ vmovdqu L_poly1305_avx2_blocks_mask(%rip), %ymm14
+ # If not finished then loop over data
+ cmpb $0x01, 616(%rdi)
+ jne L_poly1305_avx2_blocks_start
+ # Do last multiply, reduce, add the four H together and move to
+ # 32-bit registers
+ vpmuludq (%rbx), %ymm4, %ymm5
+ vpmuludq 32(%rbx), %ymm3, %ymm10
+ vpmuludq 32(%rbx), %ymm4, %ymm6
+ vpmuludq 64(%rbx), %ymm3, %ymm11
+ vpmuludq 64(%rbx), %ymm4, %ymm7
+ vpaddq %ymm5, %ymm10, %ymm5
+ vpmuludq 64(%rbx), %ymm2, %ymm12
+ vpmuludq 96(%rbx), %ymm4, %ymm8
+ vpaddq %ymm6, %ymm11, %ymm6
+ vpmuludq 96(%rbx), %ymm1, %ymm13
+ vpmuludq 96(%rbx), %ymm2, %ymm10
+ vpaddq %ymm5, %ymm12, %ymm5
+ vpmuludq 96(%rbx), %ymm3, %ymm11
+ vpmuludq (%rcx), %ymm3, %ymm12
+ vpaddq %ymm5, %ymm13, %ymm5
+ vpmuludq (%rcx), %ymm4, %ymm9
+ vpaddq %ymm6, %ymm10, %ymm6
+ vpmuludq (%rcx), %ymm0, %ymm13
+ vpaddq %ymm7, %ymm11, %ymm7
+ vpmuludq (%rcx), %ymm1, %ymm10
+ vpaddq %ymm8, %ymm12, %ymm8
+ vpmuludq (%rcx), %ymm2, %ymm11
+ vpmuludq 32(%rcx), %ymm2, %ymm12
+ vpaddq %ymm5, %ymm13, %ymm5
+ vpmuludq 32(%rcx), %ymm3, %ymm13
+ vpaddq %ymm6, %ymm10, %ymm6
+ vpmuludq 32(%rcx), %ymm0, %ymm10
+ vpaddq %ymm7, %ymm11, %ymm7
+ vpmuludq 32(%rcx), %ymm1, %ymm11
+ vpaddq %ymm8, %ymm12, %ymm8
+ vpmuludq 64(%rcx), %ymm1, %ymm12
+ vpaddq %ymm9, %ymm13, %ymm9
+ vpmuludq 64(%rcx), %ymm2, %ymm13
+ vpaddq %ymm6, %ymm10, %ymm6
+ vpmuludq 64(%rcx), %ymm0, %ymm10
+ vpaddq %ymm7, %ymm11, %ymm7
+ vpmuludq 96(%rcx), %ymm0, %ymm11
+ vpaddq %ymm8, %ymm12, %ymm8
+ vpmuludq 96(%rcx), %ymm1, %ymm12
+ vpaddq %ymm9, %ymm13, %ymm9
+ vpaddq %ymm7, %ymm10, %ymm7
+ vpmuludq 128(%rcx), %ymm0, %ymm13
+ vpaddq %ymm8, %ymm11, %ymm8
+ vpaddq %ymm9, %ymm12, %ymm9
+ vpaddq %ymm9, %ymm13, %ymm9
+ vpsrlq $26, %ymm5, %ymm10
+ vpsrlq $26, %ymm8, %ymm11
+ vpand %ymm14, %ymm5, %ymm5
+ vpand %ymm14, %ymm8, %ymm8
+ vpaddq %ymm6, %ymm10, %ymm6
+ vpaddq %ymm9, %ymm11, %ymm9
+ vpsrlq $26, %ymm6, %ymm10
+ vpsrlq $26, %ymm9, %ymm11
+ vpand %ymm14, %ymm6, %ymm1
+ vpand %ymm14, %ymm9, %ymm4
+ vpaddq %ymm7, %ymm10, %ymm7
+ vpslld $2, %ymm11, %ymm12
+ vpaddd %ymm12, %ymm11, %ymm12
+ vpsrlq $26, %ymm7, %ymm10
+ vpaddq %ymm5, %ymm12, %ymm5
+ vpsrlq $26, %ymm5, %ymm11
+ vpand %ymm14, %ymm7, %ymm2
+ vpand %ymm14, %ymm5, %ymm0
+ vpaddq %ymm8, %ymm10, %ymm8
+ vpaddq %ymm1, %ymm11, %ymm1
+ vpsrlq $26, %ymm8, %ymm10
+ vpand %ymm14, %ymm8, %ymm3
+ vpaddq %ymm4, %ymm10, %ymm4
+ vpsrldq $8, %ymm0, %ymm5
+ vpsrldq $8, %ymm1, %ymm6
+ vpsrldq $8, %ymm2, %ymm7
+ vpsrldq $8, %ymm3, %ymm8
+ vpsrldq $8, %ymm4, %ymm9
+ vpaddq %ymm0, %ymm5, %ymm0
+ vpaddq %ymm1, %ymm6, %ymm1
+ vpaddq %ymm2, %ymm7, %ymm2
+ vpaddq %ymm3, %ymm8, %ymm3
+ vpaddq %ymm4, %ymm9, %ymm4
+ vpermq $2, %ymm0, %ymm5
+ vpermq $2, %ymm1, %ymm6
+ vpermq $2, %ymm2, %ymm7
+ vpermq $2, %ymm3, %ymm8
+ vpermq $2, %ymm4, %ymm9
+ vpaddq %ymm0, %ymm5, %ymm0
+ vpaddq %ymm1, %ymm6, %ymm1
+ vpaddq %ymm2, %ymm7, %ymm2
+ vpaddq %ymm3, %ymm8, %ymm3
+ vpaddq %ymm4, %ymm9, %ymm4
+ vmovd %xmm0, %r8d
+ vmovd %xmm1, %r9d
+ vmovd %xmm2, %r10d
+ vmovd %xmm3, %r11d
+ vmovd %xmm4, %r12d
+ jmp L_poly1305_avx2_blocks_end_calc
+L_poly1305_avx2_blocks_start:
+ vmovdqu (%rsi), %ymm5
+ vmovdqu 32(%rsi), %ymm6
+ vperm2i128 $32, %ymm6, %ymm5, %ymm7
+ vperm2i128 $49, %ymm6, %ymm5, %ymm5
+ vpunpckldq %ymm5, %ymm7, %ymm6
+ vpunpckhdq %ymm5, %ymm7, %ymm8
+ vpunpckldq %ymm15, %ymm6, %ymm5
+ vpunpckhdq %ymm15, %ymm6, %ymm6
+ vpunpckldq %ymm15, %ymm8, %ymm7
+ vpunpckhdq %ymm15, %ymm8, %ymm8
+ vmovdqu L_poly1305_avx2_blocks_hibit(%rip), %ymm9
+ vpsllq $6, %ymm6, %ymm6
+ vpsllq $12, %ymm7, %ymm7
+ vpsllq $18, %ymm8, %ymm8
+ vpmuludq (%rbx), %ymm4, %ymm10
+ vpaddq %ymm5, %ymm10, %ymm5
+ vpmuludq 32(%rbx), %ymm3, %ymm10
+ vpmuludq 32(%rbx), %ymm4, %ymm11
+ vpaddq %ymm6, %ymm11, %ymm6
+ vpmuludq 64(%rbx), %ymm3, %ymm11
+ vpmuludq 64(%rbx), %ymm4, %ymm12
+ vpaddq %ymm7, %ymm12, %ymm7
+ vpaddq %ymm5, %ymm10, %ymm5
+ vpmuludq 64(%rbx), %ymm2, %ymm12
+ vpmuludq 96(%rbx), %ymm4, %ymm13
+ vpaddq %ymm8, %ymm13, %ymm8
+ vpaddq %ymm6, %ymm11, %ymm6
+ vpmuludq 96(%rbx), %ymm1, %ymm13
+ vpmuludq 96(%rbx), %ymm2, %ymm10
+ vpaddq %ymm5, %ymm12, %ymm5
+ vpmuludq 96(%rbx), %ymm3, %ymm11
+ vpmuludq (%rcx), %ymm3, %ymm12
+ vpaddq %ymm5, %ymm13, %ymm5
+ vpmuludq (%rcx), %ymm4, %ymm13
+ vpaddq %ymm9, %ymm13, %ymm9
+ vpaddq %ymm6, %ymm10, %ymm6
+ vpmuludq (%rcx), %ymm0, %ymm13
+ vpaddq %ymm7, %ymm11, %ymm7
+ vpmuludq (%rcx), %ymm1, %ymm10
+ vpaddq %ymm8, %ymm12, %ymm8
+ vpmuludq (%rcx), %ymm2, %ymm11
+ vpmuludq 32(%rcx), %ymm2, %ymm12
+ vpaddq %ymm5, %ymm13, %ymm5
+ vpmuludq 32(%rcx), %ymm3, %ymm13
+ vpaddq %ymm6, %ymm10, %ymm6
+ vpmuludq 32(%rcx), %ymm0, %ymm10
+ vpaddq %ymm7, %ymm11, %ymm7
+ vpmuludq 32(%rcx), %ymm1, %ymm11
+ vpaddq %ymm8, %ymm12, %ymm8
+ vpmuludq 64(%rcx), %ymm1, %ymm12
+ vpaddq %ymm9, %ymm13, %ymm9
+ vpmuludq 64(%rcx), %ymm2, %ymm13
+ vpaddq %ymm6, %ymm10, %ymm6
+ vpmuludq 64(%rcx), %ymm0, %ymm10
+ vpaddq %ymm7, %ymm11, %ymm7
+ vpmuludq 96(%rcx), %ymm0, %ymm11
+ vpaddq %ymm8, %ymm12, %ymm8
+ vpmuludq 96(%rcx), %ymm1, %ymm12
+ vpaddq %ymm9, %ymm13, %ymm9
+ vpaddq %ymm7, %ymm10, %ymm7
+ vpmuludq 128(%rcx), %ymm0, %ymm13
+ vpaddq %ymm8, %ymm11, %ymm8
+ vpaddq %ymm9, %ymm12, %ymm9
+ vpaddq %ymm9, %ymm13, %ymm9
+ vpsrlq $26, %ymm5, %ymm10
+ vpsrlq $26, %ymm8, %ymm11
+ vpand %ymm14, %ymm5, %ymm5
+ vpand %ymm14, %ymm8, %ymm8
+ vpaddq %ymm6, %ymm10, %ymm6
+ vpaddq %ymm9, %ymm11, %ymm9
+ vpsrlq $26, %ymm6, %ymm10
+ vpsrlq $26, %ymm9, %ymm11
+ vpand %ymm14, %ymm6, %ymm1
+ vpand %ymm14, %ymm9, %ymm4
+ vpaddq %ymm7, %ymm10, %ymm7
+ vpslld $2, %ymm11, %ymm12
+ vpaddd %ymm12, %ymm11, %ymm12
+ vpsrlq $26, %ymm7, %ymm10
+ vpaddq %ymm5, %ymm12, %ymm5
+ vpsrlq $26, %ymm5, %ymm11
+ vpand %ymm14, %ymm7, %ymm2
+ vpand %ymm14, %ymm5, %ymm0
+ vpaddq %ymm8, %ymm10, %ymm8
+ vpaddq %ymm1, %ymm11, %ymm1
+ vpsrlq $26, %ymm8, %ymm10
+ vpand %ymm14, %ymm8, %ymm3
+ vpaddq %ymm4, %ymm10, %ymm4
+ addq $0x40, %rsi
+ subq $0x40, %rdx
+ jnz L_poly1305_avx2_blocks_start
+L_poly1305_avx2_blocks_store:
+ # Store four H values - state
+ vmovdqu %ymm0, (%rax)
+ vmovdqu %ymm1, 32(%rax)
+ vmovdqu %ymm2, 64(%rax)
+ vmovdqu %ymm3, 96(%rax)
+ vmovdqu %ymm4, 128(%rax)
+L_poly1305_avx2_blocks_end_calc:
+ cmpb $0x00, 616(%rdi)
+ je L_poly1305_avx2_blocks_complete
+ movq %r8, %rax
+ movq %r10, %rdx
+ movq %r12, %rcx
+ shrq $12, %rdx
+ shrq $24, %rcx
+ shlq $26, %r9
+ shlq $52, %r10
+ shlq $14, %r11
+ shlq $40, %r12
+ addq %r9, %rax
+ adcq %r10, %rax
+ adcq %r11, %rdx
+ adcq %r12, %rdx
+ adcq $0x00, %rcx
+ movq %rcx, %r8
+ andq $3, %rcx
+ shrq $2, %r8
+ leaq 0(%r8,%r8,4), %r8
+ addq %r8, %rax
+ adcq $0x00, %rdx
+ adcq $0x00, %rcx
+ movq %rax, 24(%rdi)
+ movq %rdx, 32(%rdi)
+ movq %rcx, 40(%rdi)
+L_poly1305_avx2_blocks_complete:
+ movb $0x01, 617(%rdi)
+ addq $0x140, %rsp
+ popq %rbx
+ popq %r12
+ repz retq
+#ifndef __APPLE__
+.size poly1305_blocks_avx2,.-poly1305_blocks_avx2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl poly1305_final_avx2
+.type poly1305_final_avx2,@function
+.align 4
+poly1305_final_avx2:
+#else
+.section __TEXT,__text
+.globl _poly1305_final_avx2
+.p2align 2
+_poly1305_final_avx2:
+#endif /* __APPLE__ */
+ movb $0x01, 616(%rdi)
+ movb 617(%rdi), %cl
+ cmpb $0x00, %cl
+ je L_poly1305_avx2_final_done_blocks_X4
+ pushq %rsi
+ movq $0x40, %rdx
+ xorq %rsi, %rsi
+#ifndef __APPLE__
+ callq poly1305_blocks_avx2@plt
+#else
+ callq _poly1305_blocks_avx2
+#endif /* __APPLE__ */
+ popq %rsi
+L_poly1305_avx2_final_done_blocks_X4:
+ movq 608(%rdi), %rax
+ movq %rax, %rcx
+ andq $-16, %rcx
+ cmpb $0x00, %cl
+ je L_poly1305_avx2_final_done_blocks
+ pushq %rcx
+ pushq %rax
+ pushq %rsi
+ movq %rcx, %rdx
+ leaq 480(%rdi), %rsi
+#ifndef __APPLE__
+ callq poly1305_blocks_avx@plt
+#else
+ callq _poly1305_blocks_avx
+#endif /* __APPLE__ */
+ popq %rsi
+ popq %rax
+ popq %rcx
+L_poly1305_avx2_final_done_blocks:
+ subq %rcx, 608(%rdi)
+ xorq %rdx, %rdx
+ jmp L_poly1305_avx2_final_cmp_copy
+L_poly1305_avx2_final_start_copy:
+ movb 480(%rdi,%rcx,1), %r8b
+ movb %r8b, 480(%rdi,%rdx,1)
+ incb %cl
+ incb %dl
+L_poly1305_avx2_final_cmp_copy:
+ cmp %rcx, %rax
+ jne L_poly1305_avx2_final_start_copy
+#ifndef __APPLE__
+ callq poly1305_final_avx@plt
+#else
+ callq _poly1305_final_avx
+#endif /* __APPLE__ */
+ vpxor %ymm0, %ymm0, %ymm0
+ vmovdqu %ymm0, 64(%rdi)
+ vmovdqu %ymm0, 96(%rdi)
+ vmovdqu %ymm0, 128(%rdi)
+ vmovdqu %ymm0, 160(%rdi)
+ vmovdqu %ymm0, 192(%rdi)
+ vmovdqu %ymm0, 224(%rdi)
+ vmovdqu %ymm0, 256(%rdi)
+ vmovdqu %ymm0, 288(%rdi)
+ vmovdqu %ymm0, 320(%rdi)
+ movq $0x00, 608(%rdi)
+ movw $0x00, 616(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size poly1305_final_avx2,.-poly1305_final_avx2
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/pic32/pic32mz-crypt.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/pic32/pic32mz-crypt.c
new file mode 100644
index 000000000..1e618c194
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/pic32/pic32mz-crypt.c
@@ -0,0 +1,804 @@
+/* pic32mz-crypt.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#ifdef WOLFSSL_MICROCHIP_PIC32MZ
+
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+#include <wolfssl/wolfcrypt/logging.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+
+#include <wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h>
+
+#ifdef WOLFSSL_PIC32MZ_CRYPT
+#include <wolfssl/wolfcrypt/aes.h>
+#include <wolfssl/wolfcrypt/des3.h>
+#endif
+
+#ifdef WOLFSSL_PIC32MZ_HASH
+#include <wolfssl/wolfcrypt/md5.h>
+#include <wolfssl/wolfcrypt/sha.h>
+#include <wolfssl/wolfcrypt/sha256.h>
+#endif
+
+
+#if defined(WOLFSSL_PIC32MZ_CRYPT) || defined(WOLFSSL_PIC32MZ_HASH)
+
+static int Pic32GetBlockSize(int algo)
+{
+ switch (algo) {
+ case PIC32_ALGO_HMAC1:
+ return PIC32_BLOCKSIZE_HMAC;
+ case PIC32_ALGO_SHA256:
+ return PIC32_BLOCKSIZE_SHA256;
+ case PIC32_ALGO_SHA1:
+ return PIC32_BLOCKSIZE_SHA1;
+ case PIC32_ALGO_MD5:
+ return PIC32_BLOCKSIZE_MD5;
+ case PIC32_ALGO_AES:
+ return PIC32_BLOCKSIZE_AES;
+ case PIC32_ALGO_TDES:
+ return PIC32_BLOCKSIZE_TDES;
+ case PIC32_ALGO_DES:
+ return PIC32_BLOCKSIZE_DES;
+ }
+ return 0;
+}
+
+static int Pic32Crypto(const byte* pIn, int inLen, word32* pOut, int outLen,
+ int dir, int algo, int cryptoalgo,
+
+ /* For DES/AES only */
+ word32* key, int keyLen, word32* iv, int ivLen)
+{
+ int ret = 0;
+ int blockSize = Pic32GetBlockSize(algo);
+ volatile bufferDescriptor bd __attribute__((aligned (8)));
+ securityAssociation sa __attribute__((aligned (8)));
+ securityAssociation *sa_p;
+ bufferDescriptor *bd_p;
+ byte *in_p;
+ byte *out_p;
+ word32* dst;
+ word32 padRemain;
+ int timeout = 0xFFFFFF;
+ word32* in = (word32*)pIn;
+ word32* out = pOut;
+ int isDynamic = 0;
+
+ /* check args */
+ if (in == NULL || inLen <= 0 || out == NULL || blockSize == 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* check pointer alignment - must be word aligned */
+ if (((size_t)in % sizeof(word32)) || ((size_t)out % sizeof(word32))) {
+ /* dynamically allocate aligned pointers */
+ isDynamic = 1;
+ in = (word32*)XMALLOC(inLen, NULL, DYNAMIC_TYPE_AES_BUFFER);
+ if (in == NULL)
+ return MEMORY_E;
+ if ((word32*)pIn == pOut) /* inline */
+ out = (word32*)in;
+ else {
+ out = (word32*)XMALLOC(outLen, NULL, DYNAMIC_TYPE_AES_BUFFER);
+ if (out == NULL) {
+ XFREE(in, NULL, DYNAMIC_TYPE_AES_BUFFER);
+ return MEMORY_E;
+ }
+ }
+ XMEMCPY(in, pIn, inLen);
+ }
+
+ /* get uncached address */
+ sa_p = KVA0_TO_KVA1(&sa);
+ bd_p = KVA0_TO_KVA1(&bd);
+ out_p= KVA0_TO_KVA1(out);
+ in_p = KVA0_TO_KVA1(in);
+
+ /* Sync cache if in physical memory (not flash) */
+ if (PIC32MZ_IF_RAM(in_p)) {
+ XMEMCPY(in_p, in, inLen);
+ }
+
+ /* Set up the Security Association */
+ XMEMSET(sa_p, 0, sizeof(sa));
+ sa_p->SA_CTRL.ALGO = algo;
+ sa_p->SA_CTRL.ENCTYPE = dir;
+ sa_p->SA_CTRL.FB = 1; /* first block */
+ sa_p->SA_CTRL.LNC = 1; /* Load new set of keys */
+ if (key) {
+ /* cipher */
+ sa_p->SA_CTRL.CRYPTOALGO = cryptoalgo;
+
+ switch (keyLen) {
+ case 32:
+ sa_p->SA_CTRL.KEYSIZE = PIC32_KEYSIZE_256;
+ break;
+ case 24:
+ case 8: /* DES */
+ sa_p->SA_CTRL.KEYSIZE = PIC32_KEYSIZE_192;
+ break;
+ case 16:
+ sa_p->SA_CTRL.KEYSIZE = PIC32_KEYSIZE_128;
+ break;
+ }
+
+ dst = (word32*)KVA0_TO_KVA1(sa.SA_ENCKEY +
+ (sizeof(sa.SA_ENCKEY)/sizeof(word32)) - (keyLen/sizeof(word32)));
+ ByteReverseWords(dst, key, keyLen);
+
+ if (iv && ivLen > 0) {
+ sa_p->SA_CTRL.LOADIV = 1;
+ dst = (word32*)KVA0_TO_KVA1(sa.SA_ENCIV +
+ (sizeof(sa.SA_ENCIV)/sizeof(word32)) - (ivLen/sizeof(word32)));
+ ByteReverseWords(dst, iv, ivLen);
+ }
+ }
+ else {
+ /* hashing */
+ sa_p->SA_CTRL.LOADIV = 1;
+ sa_p->SA_CTRL.IRFLAG = 0; /* immediate result for hashing */
+
+ dst = (word32*)KVA0_TO_KVA1(sa.SA_AUTHIV +
+ (sizeof(sa.SA_AUTHIV)/sizeof(word32)) - (outLen/sizeof(word32)));
+ ByteReverseWords(dst, out, outLen);
+ }
+
+ /* Set up the Buffer Descriptor */
+ XMEMSET(bd_p, 0, sizeof(bd));
+ bd_p->BD_CTRL.BUFLEN = inLen;
+ padRemain = (inLen % 4); /* make sure buffer is 4-byte multiple */
+ if (padRemain != 0) {
+ bd_p->BD_CTRL.BUFLEN += (4 - padRemain);
+ }
+ bd_p->BD_CTRL.SA_FETCH_EN = 1; /* Fetch the security association */
+ bd_p->BD_CTRL.PKT_INT_EN = 1; /* enable interrupt */
+ bd_p->BD_CTRL.LAST_BD = 1; /* last buffer desc in chain */
+ bd_p->BD_CTRL.LIFM = 1; /* last in frame */
+ bd_p->SA_ADDR = (unsigned int)KVA_TO_PA(&sa);
+ bd_p->SRCADDR = (unsigned int)KVA_TO_PA(in);
+ if (key) {
+ /* cipher */
+ if (in != out)
+ XMEMSET(out_p, 0, outLen); /* clear output buffer */
+ bd_p->DSTADDR = (unsigned int)KVA_TO_PA(out);
+ }
+ else {
+ /* hashing */
+ /* digest result returned in UPDPTR */
+ bd_p->UPDPTR = (unsigned int)KVA_TO_PA(out);
+ }
+ bd_p->NXTPTR = (unsigned int)KVA_TO_PA(&bd);
+ bd_p->MSGLEN = inLen; /* actual message size */
+ bd_p->BD_CTRL.DESC_EN = 1; /* enable this descriptor */
+
+ /* begin access to hardware */
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret == 0) {
+ /* Software Reset the Crypto Engine */
+ CECON = 1 << 6;
+ while (CECON);
+
+ /* Clear the interrupt flags */
+ CEINTSRC = 0xF;
+
+ /* Run the engine */
+ CEBDPADDR = (unsigned int)KVA_TO_PA(&bd);
+ CEINTEN = 0x07; /* enable DMA Packet Completion Interrupt */
+
+ /* input swap, enable BD fetch and start DMA */
+ #if PIC32_NO_OUT_SWAP
+ CECON = 0x25;
+ #else
+ CECON = 0xa5; /* bit 7 = enable out swap */
+ #endif
+
+ /* wait for operation to complete */
+ while (CEINTSRCbits.PKTIF == 0 && --timeout > 0) {};
+
+ /* Clear the interrupt flags */
+ CEINTSRC = 0xF;
+
+ /* check for errors */
+ if (CESTATbits.ERROP || timeout <= 0) {
+ #if 0
+ printf("PIC32 Crypto: ERROP %x, ERRPHASE %x, TIMEOUT %s\n",
+ CESTATbits.ERROP, CESTATbits.ERRPHASE, timeout <= 0 ? "yes" : "no");
+ #endif
+ ret = ASYNC_OP_E;
+ }
+
+ wolfSSL_CryptHwMutexUnLock();
+
+ /* copy result to output */
+ #if PIC32_NO_OUT_SWAP
+ /* swap bytes */
+ ByteReverseWords(out, (word32*)out_p, outLen);
+ #elif defined(_SYS_DEVCON_LOCAL_H)
+ /* sync cache */
+ SYS_DEVCON_DataCacheInvalidate((word32)out, outLen);
+ #else
+ XMEMCPY(out, out_p, outLen);
+ #endif
+ }
+
+ /* handle unaligned */
+ if (isDynamic) {
+ /* return result */
+ XMEMCPY(pOut, out, outLen);
+
+ /* free dynamic buffers */
+ XFREE(in, NULL, DYNAMIC_TYPE_AES_BUFFER);
+ if ((word32*)pIn != pOut)
+ XFREE(out, NULL, DYNAMIC_TYPE_AES_BUFFER);
+ }
+
+ return ret;
+}
+#endif /* WOLFSSL_PIC32MZ_CRYPT || WOLFSSL_PIC32MZ_HASH */
+
+
+#ifdef WOLFSSL_PIC32MZ_HASH
+
+#ifdef WOLFSSL_PIC32MZ_LARGE_HASH
+
+/* tunable large hash block size */
+#ifndef PIC32_BLOCK_SIZE
+ #define PIC32_BLOCK_SIZE 256
+#endif
+
+#define PIC32MZ_MIN_BLOCK 64
+#define PIC32MZ_MAX_BLOCK (32*1024)
+
+#ifndef PIC32MZ_MAX_BD
+ #define PIC32MZ_MAX_BD 2
+#endif
+
+#if PIC32_BLOCK_SIZE < PIC32MZ_MIN_BLOCK
+ #error Encryption block size must be at least 64 bytes.
+#endif
+
+/* Crypt Engine descriptor */
+typedef struct {
+ int currBd;
+ int err;
+ unsigned int msgSize;
+ uint32_t processed;
+ uint32_t dbPtr;
+ int engine_ready;
+ volatile bufferDescriptor bd[PIC32MZ_MAX_BD] __attribute__((aligned (8)));
+ securityAssociation sa __attribute__((aligned (8)));
+} pic32mz_desc;
+
+static pic32mz_desc gLHDesc;
+static uint8_t gLHDataBuf[PIC32MZ_MAX_BD][PIC32_BLOCK_SIZE] __attribute__((aligned (4), coherent));
+
+static void reset_engine(pic32mz_desc *desc, int algo)
+{
+ int i;
+ pic32mz_desc* uc_desc = KVA0_TO_KVA1(desc);
+
+ wolfSSL_CryptHwMutexLock();
+
+ /* Software reset */
+ CECON = 1 << 6;
+ while (CECON);
+
+ /* Clear the interrupt flags */
+ CEINTSRC = 0xF;
+
+ /* Make sure everything is clear first before we setup */
+ XMEMSET(desc, 0, sizeof(pic32mz_desc));
+ XMEMSET((void *)&uc_desc->sa, 0, sizeof(uc_desc->sa));
+
+ /* Set up the Security Association */
+ uc_desc->sa.SA_CTRL.ALGO = algo;
+ uc_desc->sa.SA_CTRL.LNC = 1;
+ uc_desc->sa.SA_CTRL.FB = 1;
+ uc_desc->sa.SA_CTRL.ENCTYPE = 1;
+ uc_desc->sa.SA_CTRL.LOADIV = 1;
+
+ /* Set up the Buffer Descriptor */
+ uc_desc->err = 0;
+ for (i = 0; i < PIC32MZ_MAX_BD; i++) {
+ XMEMSET((void *)&uc_desc->bd[i], 0, sizeof(uc_desc->bd[i]));
+ uc_desc->bd[i].BD_CTRL.LAST_BD = 1;
+ uc_desc->bd[i].BD_CTRL.LIFM = 1;
+ uc_desc->bd[i].BD_CTRL.PKT_INT_EN = 1;
+ uc_desc->bd[i].SA_ADDR = KVA_TO_PA(&uc_desc->sa);
+ uc_desc->bd[i].SRCADDR = KVA_TO_PA(&gLHDataBuf[i]);
+ if (PIC32MZ_MAX_BD > i+1)
+ uc_desc->bd[i].NXTPTR = KVA_TO_PA(&uc_desc->bd[i+1]);
+ else
+ uc_desc->bd[i].NXTPTR = KVA_TO_PA(&uc_desc->bd[0]);
+ XMEMSET((void *)&gLHDataBuf[i], 0, PIC32_BLOCK_SIZE);
+ }
+ uc_desc->bd[0].BD_CTRL.SA_FETCH_EN = 1; /* Fetch the security association on the first BD */
+ desc->dbPtr = 0;
+ desc->currBd = 0;
+ desc->msgSize = 0;
+ desc->processed = 0;
+ CEBDPADDR = KVA_TO_PA(&(desc->bd[0]));
+
+ CEPOLLCON = 10;
+
+#if PIC32_NO_OUT_SWAP
+ CECON = 0x27;
+#else
+ CECON = 0xa7;
+#endif
+}
+
+static void update_engine(pic32mz_desc *desc, const byte *input, word32 len,
+ word32 *hash)
+{
+ int total;
+ pic32mz_desc *uc_desc = KVA0_TO_KVA1(desc);
+
+ uc_desc->bd[desc->currBd].UPDPTR = KVA_TO_PA(hash);
+
+ /* Add the data to the current buffer. If the buffer fills, start processing it
+ and fill the next one. */
+ while (len) {
+ /* If we've been given the message size, we can process along the
+ way.
+ Enable the current buffer descriptor if it is full. */
+ if (desc->dbPtr >= PIC32_BLOCK_SIZE) {
+ /* Wrap up the buffer descriptor and enable it so the engine can process */
+ uc_desc->bd[desc->currBd].MSGLEN = desc->msgSize;
+ uc_desc->bd[desc->currBd].BD_CTRL.BUFLEN = desc->dbPtr;
+ uc_desc->bd[desc->currBd].BD_CTRL.LAST_BD = 0;
+ uc_desc->bd[desc->currBd].BD_CTRL.LIFM = 0;
+ uc_desc->bd[desc->currBd].BD_CTRL.DESC_EN = 1;
+ /* Move to the next buffer descriptor, or wrap around. */
+ desc->currBd++;
+ if (desc->currBd >= PIC32MZ_MAX_BD)
+ desc->currBd = 0;
+ /* Wait until the engine has processed the new BD. */
+ while (uc_desc->bd[desc->currBd].BD_CTRL.DESC_EN);
+ uc_desc->bd[desc->currBd].UPDPTR = KVA_TO_PA(hash);
+ desc->dbPtr = 0;
+ }
+ if (!PIC32MZ_IF_RAM(input)) {
+ /* If we're inputting from flash, let the BD have
+ the address and max the buffer size */
+ uc_desc->bd[desc->currBd].SRCADDR = KVA_TO_PA(input);
+ total = (len > PIC32MZ_MAX_BLOCK ? PIC32MZ_MAX_BLOCK : len);
+ desc->dbPtr = total;
+ len -= total;
+ input += total;
+ }
+ else {
+ if (len > PIC32_BLOCK_SIZE - desc->dbPtr) {
+ /* We have more data than can be put in the buffer. Fill what we can.*/
+ total = PIC32_BLOCK_SIZE - desc->dbPtr;
+ XMEMCPY(&gLHDataBuf[desc->currBd][desc->dbPtr], input, total);
+ len -= total;
+ desc->dbPtr = PIC32_BLOCK_SIZE;
+ input += total;
+ }
+ else {
+ /* Fill up what we have, but don't turn on the engine.*/
+ XMEMCPY(&gLHDataBuf[desc->currBd][desc->dbPtr], input, len);
+ desc->dbPtr += len;
+ len = 0;
+ }
+ }
+ }
+}
+
+static void start_engine(pic32mz_desc *desc)
+{
+ /* Wrap up the last buffer descriptor and enable it */
+ int bufferLen;
+ pic32mz_desc *uc_desc = KVA0_TO_KVA1(desc);
+
+ bufferLen = desc->dbPtr;
+ if (bufferLen % 4)
+ bufferLen = (bufferLen + 4) - (bufferLen % 4);
+ /* initialize the MSGLEN on engine startup to avoid infinite loop when
+ * length is less than 257 (size of PIC32_BLOCK_SIZE) */
+ uc_desc->bd[desc->currBd].MSGLEN = desc->msgSize;
+ uc_desc->bd[desc->currBd].BD_CTRL.BUFLEN = bufferLen;
+ uc_desc->bd[desc->currBd].BD_CTRL.LAST_BD = 1;
+ uc_desc->bd[desc->currBd].BD_CTRL.LIFM = 1;
+ uc_desc->bd[desc->currBd].BD_CTRL.DESC_EN = 1;
+}
+
+void wait_engine(pic32mz_desc *desc, char *hash, int hash_sz)
+{
+ int i;
+ pic32mz_desc *uc_desc = KVA0_TO_KVA1(desc);
+ unsigned int engineRunning;
+
+ do {
+ engineRunning = 0;
+ for (i = 0; i < PIC32MZ_MAX_BD; i++) {
+ engineRunning = engineRunning || uc_desc->bd[i].BD_CTRL.DESC_EN;
+ }
+ } while (engineRunning);
+
+#if PIC32_NO_OUT_SWAP
+ /* swap bytes */
+ ByteReverseWords(hash, KVA0_TO_KVA1(hash), hash_sz);
+#else
+ /* copy output - hardware already swapped */
+ XMEMCPY(hash, KVA0_TO_KVA1(hash), hash_sz);
+#endif
+
+ wolfSSL_CryptHwMutexUnLock();
+}
+
+#endif /* WOLFSSL_PIC32MZ_LARGE_HASH */
+
+int wc_Pic32Hash(const byte* in, int inLen, word32* out, int outLen, int algo)
+{
+ return Pic32Crypto(in, inLen, out, outLen, PIC32_ENCRYPTION, algo, 0,
+ NULL, 0, NULL, 0);
+}
+
+int wc_Pic32HashCopy(hashUpdCache* src, hashUpdCache* dst)
+{
+ /* mark destination as copy, so cache->buf is not free'd */
+ if (dst) {
+ dst->isCopy = 1;
+ }
+ return 0;
+}
+
+static int wc_Pic32HashUpdate(hashUpdCache* cache, byte* stdBuf, int stdBufLen,
+ word32* digest, int digestSz, const byte* data, int len, int algo, void* heap)
+{
+ int ret = 0;
+ word32 newLenUpd, newLenPad, padRemain;
+ byte* newBuf;
+ int isNewBuf = 0;
+
+#ifdef WOLFSSL_PIC32MZ_LARGE_HASH
+ /* if final length is set then pass straight to hardware */
+ if (cache->finalLen) {
+ if (cache->bufLen == 0) {
+ reset_engine(&gLHDesc, algo);
+ gLHDesc.msgSize = cache->finalLen;
+ }
+ update_engine(&gLHDesc, data, len, digest);
+ cache->bufLen += len; /* track progress for blockType */
+ return 0;
+ }
+#endif
+
+ /* cache updates */
+ /* calculate new len */
+ newLenUpd = cache->updLen + len;
+
+ /* calculate padded len - pad buffer at 64-bytes for hardware */
+ newLenPad = newLenUpd;
+ padRemain = (newLenUpd % PIC32_BLOCKSIZE_HASH);
+ if (padRemain != 0) {
+ newLenPad += (PIC32_BLOCKSIZE_HASH - padRemain);
+ }
+
+ /* determine buffer source */
+ if (newLenPad <= stdBufLen) {
+ /* use standard buffer */
+ newBuf = stdBuf;
+ }
+ else if (newLenPad > cache->bufLen) {
+ /* alloc buffer */
+ newBuf = (byte*)XMALLOC(newLenPad, heap, DYNAMIC_TYPE_HASH_TMP);
+ if (newBuf == NULL) {
+ if (cache->buf != stdBuf && !cache->isCopy) {
+ XFREE(cache->buf, heap, DYNAMIC_TYPE_HASH_TMP);
+ cache->buf = NULL;
+ cache->updLen = cache->bufLen = 0;
+ }
+ return MEMORY_E;
+ }
+ isNewBuf = 1;
+ cache->isCopy = 0; /* no longer using copy buffer */
+ }
+ else {
+ /* use existing buffer */
+ newBuf = cache->buf;
+ }
+ if (cache->buf && cache->updLen > 0) {
+ XMEMCPY(newBuf, cache->buf, cache->updLen);
+ if (isNewBuf && cache->buf != stdBuf) {
+ XFREE(cache->buf, heap, DYNAMIC_TYPE_HASH_TMP);
+ cache->buf = NULL;
+ }
+ }
+ XMEMCPY(newBuf + cache->updLen, data, len);
+
+ cache->buf = newBuf;
+ cache->updLen = newLenUpd;
+ cache->bufLen = newLenPad;
+
+ return ret;
+}
+
+static int wc_Pic32HashFinal(hashUpdCache* cache, byte* stdBuf,
+ word32* digest, byte* hash, int digestSz, int algo, void* heap)
+{
+ int ret = 0;
+
+ /* if room add the pad */
+ if (cache->buf && cache->updLen < cache->bufLen) {
+ cache->buf[cache->updLen] = 0x80;
+ }
+
+#ifdef WOLFSSL_PIC32MZ_LARGE_HASH
+ if (cache->finalLen) {
+ start_engine(&gLHDesc);
+ wait_engine(&gLHDesc, (char*)digest, digestSz);
+ XMEMCPY(hash, digest, digestSz);
+ cache->finalLen = 0;
+ }
+ else
+#endif
+ {
+ if (cache->updLen == 0) {
+ /* handle empty input */
+ switch (algo) {
+ case PIC32_ALGO_SHA256: {
+ const char* sha256EmptyHash =
+ "\xe3\xb0\xc4\x42\x98\xfc\x1c\x14\x9a\xfb\xf4\xc8\x99\x6f\xb9"
+ "\x24\x27\xae\x41\xe4\x64\x9b\x93\x4c\xa4\x95\x99\x1b\x78\x52"
+ "\xb8\x55";
+ XMEMCPY(hash, sha256EmptyHash, digestSz);
+ break;
+ }
+ case PIC32_ALGO_SHA1: {
+ const char* shaEmptyHash =
+ "\xda\x39\xa3\xee\x5e\x6b\x4b\x0d\x32\x55\xbf\xef\x95\x60\x18"
+ "\x90\xaf\xd8\x07\x09";
+ XMEMCPY(hash, shaEmptyHash, digestSz);
+ break;
+ }
+ case PIC32_ALGO_MD5: {
+ const char* md5EmptyHash =
+ "\xd4\x1d\x8c\xd9\x8f\x00\xb2\x04\xe9\x80\x09\x98\xec\xf8\x42"
+ "\x7e";
+ XMEMCPY(hash, md5EmptyHash, digestSz);
+ break;
+ }
+ } /* switch */
+ }
+ else {
+ ret = wc_Pic32Hash(cache->buf, cache->updLen, digest, digestSz, algo);
+ if (ret == 0) {
+ XMEMCPY(hash, digest, digestSz);
+ }
+ }
+
+ if (cache->buf && cache->buf != stdBuf && !cache->isCopy) {
+ XFREE(cache->buf, heap, DYNAMIC_TYPE_HASH_TMP);
+ cache->buf = NULL;
+ }
+ }
+
+ cache->buf = NULL;
+ cache->bufLen = cache->updLen = 0;
+
+ return ret;
+}
+
+static void wc_Pic32HashFree(hashUpdCache* cache, void* heap)
+{
+ if (cache && cache->buf && !cache->isCopy) {
+ XFREE(cache->buf, heap, DYNAMIC_TYPE_HASH_TMP);
+ cache->buf = NULL;
+ }
+}
+
+/* API's for compatibility with Harmony wrappers - not used */
+#ifndef NO_MD5
+ int wc_InitMd5_ex(wc_Md5* md5, void* heap, int devId)
+ {
+ if (md5 == NULL)
+ return BAD_FUNC_ARG;
+
+ XMEMSET(md5, 0, sizeof(wc_Md5));
+ md5->heap = heap;
+ (void)devId;
+ return 0;
+ }
+ int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len)
+ {
+ if (md5 == NULL || (data == NULL && len > 0))
+ return BAD_FUNC_ARG;
+ return wc_Pic32HashUpdate(&md5->cache, (byte*)md5->buffer,
+ sizeof(md5->buffer), md5->digest, MD5_DIGEST_SIZE,
+ data, len, PIC32_ALGO_MD5, md5->heap);
+ }
+ int wc_Md5Final(wc_Md5* md5, byte* hash)
+ {
+ int ret;
+
+ if (md5 == NULL || hash == NULL)
+ return BAD_FUNC_ARG;
+
+ ret = wc_Pic32HashFinal(&md5->cache, (byte*)md5->buffer,
+ md5->digest, hash, MD5_DIGEST_SIZE,
+ PIC32_ALGO_MD5, md5->heap);
+
+ wc_InitMd5_ex(md5, md5->heap, INVALID_DEVID); /* reset state */
+
+ return ret;
+ }
+ void wc_Md5SizeSet(wc_Md5* md5, word32 len)
+ {
+ if (md5) {
+ #ifdef WOLFSSL_PIC32MZ_LARGE_HASH
+ md5->cache.finalLen = len;
+ #else
+ (void)len;
+ #endif
+ }
+ }
+ void wc_Md5Pic32Free(wc_Md5* md5)
+ {
+ if (md5) {
+ wc_Pic32HashFree(&md5->cache, md5->heap);
+ }
+ }
+#endif /* !NO_MD5 */
+#ifndef NO_SHA
+ int wc_InitSha_ex(wc_Sha* sha, void* heap, int devId)
+ {
+ if (sha == NULL)
+ return BAD_FUNC_ARG;
+
+ XMEMSET(sha, 0, sizeof(wc_Sha));
+ sha->heap = heap;
+ (void)devId;
+ return 0;
+ }
+ int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len)
+ {
+ if (sha == NULL || (data == NULL && len > 0))
+ return BAD_FUNC_ARG;
+ return wc_Pic32HashUpdate(&sha->cache, (byte*)sha->buffer,
+ sizeof(sha->buffer), sha->digest, SHA_DIGEST_SIZE,
+ data, len, PIC32_ALGO_SHA1, sha->heap);
+ }
+ int wc_ShaFinal(wc_Sha* sha, byte* hash)
+ {
+ int ret;
+
+ if (sha == NULL || hash == NULL)
+ return BAD_FUNC_ARG;
+
+ ret = wc_Pic32HashFinal(&sha->cache, (byte*)sha->buffer,
+ sha->digest, hash, SHA_DIGEST_SIZE,
+ PIC32_ALGO_SHA1, sha->heap);
+
+ wc_InitSha_ex(sha, sha->heap, INVALID_DEVID); /* reset state */
+
+ return ret;
+ }
+ void wc_ShaSizeSet(wc_Sha* sha, word32 len)
+ {
+ if (sha) {
+ #ifdef WOLFSSL_PIC32MZ_LARGE_HASH
+ sha->cache.finalLen = len;
+ #else
+ (void)len;
+ #endif
+ }
+ }
+ void wc_ShaPic32Free(wc_Sha* sha)
+ {
+ if (sha) {
+ wc_Pic32HashFree(&sha->cache, sha->heap);
+ }
+ }
+#endif /* !NO_SHA */
+#ifndef NO_SHA256
+ int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
+ {
+ if (sha256 == NULL)
+ return BAD_FUNC_ARG;
+
+ XMEMSET(sha256, 0, sizeof(wc_Sha256));
+ sha256->heap = heap;
+ (void)devId;
+ return 0;
+ }
+ int wc_Sha256Update(wc_Sha256* sha256, const byte* data, word32 len)
+ {
+ if (sha256 == NULL || (data == NULL && len > 0))
+ return BAD_FUNC_ARG;
+ return wc_Pic32HashUpdate(&sha256->cache, (byte*)sha256->buffer,
+ sizeof(sha256->buffer), sha256->digest, SHA256_DIGEST_SIZE,
+ data, len, PIC32_ALGO_SHA256, sha256->heap);
+ }
+ int wc_Sha256Final(wc_Sha256* sha256, byte* hash)
+ {
+ int ret;
+
+ if (sha256 == NULL || hash == NULL)
+ return BAD_FUNC_ARG;
+
+ ret = wc_Pic32HashFinal(&sha256->cache, (byte*)sha256->buffer,
+ sha256->digest, hash, SHA256_DIGEST_SIZE,
+ PIC32_ALGO_SHA256, sha256->heap);
+
+ wc_InitSha256_ex(sha256, sha256->heap, INVALID_DEVID); /* reset state */
+
+ return ret;
+ }
+ void wc_Sha256SizeSet(wc_Sha256* sha256, word32 len)
+ {
+ if (sha256) {
+ #ifdef WOLFSSL_PIC32MZ_LARGE_HASH
+ sha256->cache.finalLen = len;
+ #else
+ (void)len;
+ #endif
+ }
+ }
+ void wc_Sha256Pic32Free(wc_Sha256* sha256)
+ {
+ if (sha256) {
+ wc_Pic32HashFree(&sha256->cache, sha256->heap);
+ }
+ }
+#endif /* !NO_SHA256 */
+#endif /* WOLFSSL_PIC32MZ_HASH */
+
+
+#ifdef WOLFSSL_PIC32MZ_CRYPT
+#if !defined(NO_AES)
+ int wc_Pic32AesCrypt(word32 *key, int keyLen, word32 *iv, int ivLen,
+ byte* out, const byte* in, word32 sz,
+ int dir, int algo, int cryptoalgo)
+ {
+ return Pic32Crypto(in, sz, (word32*)out, sz, dir, algo, cryptoalgo,
+ key, keyLen, iv, ivLen);
+ }
+#endif /* !NO_AES */
+
+#ifndef NO_DES3
+ int wc_Pic32DesCrypt(word32 *key, int keyLen, word32 *iv, int ivLen,
+ byte* out, const byte* in, word32 sz,
+ int dir, int algo, int cryptoalgo)
+ {
+ return Pic32Crypto(in, sz, (word32*)out, sz, dir, algo, cryptoalgo,
+ key, keyLen, iv, ivLen);
+ }
+#endif /* !NO_DES3 */
+#endif /* WOLFSSL_PIC32MZ_CRYPT */
+
+#endif /* WOLFSSL_MICROCHIP_PIC32MZ */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/ti/ti-aes.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/ti/ti-aes.c
index d38e7a3cb..52f2ceb97 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/ti/ti-aes.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/ti/ti-aes.c
@@ -1,8 +1,8 @@
/* port/ti/ti-aes.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
-
+
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -65,14 +66,14 @@ WOLFSSL_API int wc_AesSetKey(Aes* aes, const byte* key, word32 len, const byte*
return BAD_FUNC_ARG;
if(!((dir == AES_ENCRYPTION) || (dir == AES_DECRYPTION)))
return BAD_FUNC_ARG;
-
+
switch(len) {
case 16: aes->keylen = AES_CFG_KEY_SIZE_128BIT ; break ;
case 24: aes->keylen = AES_CFG_KEY_SIZE_192BIT ; break ;
case 32: aes->keylen = AES_CFG_KEY_SIZE_256BIT ; break ;
- default: return BAD_FUNC_ARG;
+ default: return BAD_FUNC_ARG;
}
-
+
XMEMCPY(aes->key, key, len) ;
#ifdef WOLFSSL_AES_COUNTER
aes->left = 0;
@@ -84,19 +85,19 @@ WOLFSSL_API int wc_AesSetKey(Aes* aes, const byte* key, word32 len, const byte*
#define IS_ALIGN16(p) (((unsigned int)(p)&0xf) == 0)
static int AesAlign16(Aes* aes, byte* out, const byte* in, word32 sz, word32 dir, word32 mode)
-{
+{
wolfSSL_TI_lockCCM() ;
ROM_AESReset(AES_BASE);
- ROM_AESConfigSet(AES_BASE, (aes->keylen | dir |
+ ROM_AESConfigSet(AES_BASE, (aes->keylen | dir |
(mode==AES_CFG_MODE_CTR_NOCTR ? AES_CFG_MODE_CTR : mode)));
- ROM_AESIVSet(AES_BASE, aes->reg);
- ROM_AESKey1Set(AES_BASE, aes->key, aes->keylen);
+ ROM_AESIVSet(AES_BASE, (uint32_t *)aes->reg);
+ ROM_AESKey1Set(AES_BASE, (uint32_t *)aes->key, aes->keylen);
if((dir == AES_CFG_DIR_DECRYPT)&& (mode == AES_CFG_MODE_CBC))
/* if input and output same will overwrite input iv */
XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
ROM_AESDataProcess(AES_BASE, (uint32_t *)in, (uint32_t *)out, sz);
wolfSSL_TI_unlockCCM() ;
-
+
/* store iv for next call */
if(mode == AES_CFG_MODE_CBC){
if(dir == AES_CFG_DIR_ENCRYPT)
@@ -106,7 +107,7 @@ static int AesAlign16(Aes* aes, byte* out, const byte* in, word32 sz, word32 di
}
if(mode == AES_CFG_MODE_CTR) {
- do {
+ do {
int i ;
for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
if (++((byte *)aes->reg)[i])
@@ -120,12 +121,12 @@ static int AesAlign16(Aes* aes, byte* out, const byte* in, word32 sz, word32 di
}
static int AesProcess(Aes* aes, byte* out, const byte* in, word32 sz, word32 dir, word32 mode)
-{
- const byte * in_p ; byte * out_p ;
+{
+ const byte * in_p ; byte * out_p ;
word32 size ;
#define TI_BUFFSIZE 1024
byte buff[TI_BUFFSIZE] ;
-
+
if ((aes == NULL) || (in == NULL) || (out == NULL))
return BAD_FUNC_ARG;
if(sz % AES_BLOCK_SIZE)
@@ -135,16 +136,16 @@ static int AesProcess(Aes* aes, byte* out, const byte* in, word32 sz, word32 di
size = sz ; in_p = in ; out_p = out ;
if(!IS_ALIGN16(in)){
size = sz>TI_BUFFSIZE ? TI_BUFFSIZE : sz ;
- XMEMCPY(buff, in, size) ;
+ XMEMCPY(buff, in, size) ;
in_p = (const byte *)buff ;
}
if(!IS_ALIGN16(out)){
size = sz>TI_BUFFSIZE ? TI_BUFFSIZE : sz ;
out_p = buff ;
}
-
+
AesAlign16(aes, out_p, in_p, size, dir, mode) ;
-
+
if(!IS_ALIGN16(out)){
XMEMCPY(out, buff, size) ;
}
@@ -155,18 +156,18 @@ static int AesProcess(Aes* aes, byte* out, const byte* in, word32 sz, word32 di
}
WOLFSSL_API int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
-{
+{
return AesProcess(aes, out, in, sz, AES_CFG_DIR_ENCRYPT, AES_CFG_MODE_CBC) ;
}
WOLFSSL_API int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
-{
+{
return AesProcess(aes, out, in, sz, AES_CFG_DIR_DECRYPT, AES_CFG_MODE_CBC) ;
}
#ifdef WOLFSSL_AES_COUNTER
WOLFSSL_API void wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
-{
+{
char out_block[AES_BLOCK_SIZE] ;
int odd ;
int even ;
@@ -181,7 +182,7 @@ WOLFSSL_API void wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz
}
XMEMCPY(tmp+aes->left, in, odd) ;
if((odd+aes->left) == AES_BLOCK_SIZE){
- AesProcess(aes, (byte *)out_block, (byte const *)tmp, AES_BLOCK_SIZE,
+ AesProcess(aes, (byte *)out_block, (byte const *)tmp, AES_BLOCK_SIZE,
AES_CFG_DIR_ENCRYPT, AES_CFG_MODE_CTR) ;
XMEMCPY(out, out_block+aes->left, odd) ;
aes->left = 0 ;
@@ -201,8 +202,8 @@ WOLFSSL_API void wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz
if(odd) {
XMEMSET(tmp+aes->left, 0x0, AES_BLOCK_SIZE - aes->left) ;
XMEMCPY(tmp+aes->left, in, odd) ;
- AesProcess(aes, (byte *)out_block, (byte const *)tmp, AES_BLOCK_SIZE,
- AES_CFG_DIR_ENCRYPT,
+ AesProcess(aes, (byte *)out_block, (byte const *)tmp, AES_BLOCK_SIZE,
+ AES_CFG_DIR_ENCRYPT,
AES_CFG_MODE_CTR_NOCTR /* Counter mode without counting IV */
);
XMEMCPY(out, out_block+aes->left,odd) ;
@@ -250,11 +251,12 @@ static int AesAuthArgCheck(Aes* aes, byte* out, const byte* in, word32 inSz,
const byte* authTag, word32 authTagSz,
const byte* authIn, word32 authInSz, word32 *M, word32 *L)
{
+ (void) authInSz ;
if((aes == NULL)||(nonce == NULL)||(authTag== NULL)||(authIn == NULL))
return BAD_FUNC_ARG;
if((inSz != 0) && ((out == NULL)||(in == NULL)))
return BAD_FUNC_ARG;
-
+
switch(authTagSz){
case 4:
*M = AES_CFG_CCM_M_4; break ;
@@ -302,24 +304,24 @@ static void AesAuthSetIv(Aes *aes, const byte *nonce, word32 len, word32 L, int
if(mode == AES_CFG_MODE_CCM){
XMEMSET(aes->reg, 0, 16) ;
switch(L){
- case AES_CFG_CCM_L_8:
+ case AES_CFG_CCM_L_8:
aes->reg[0] = 0x7; break ;
- case AES_CFG_CCM_L_7:
+ case AES_CFG_CCM_L_7:
aes->reg[0] = 0x6; break ;
- case AES_CFG_CCM_L_6:
+ case AES_CFG_CCM_L_6:
aes->reg[0] = 0x5; break ;
- case AES_CFG_CCM_L_5:
+ case AES_CFG_CCM_L_5:
aes->reg[0] = 0x4; break ;
- case AES_CFG_CCM_L_4:
+ case AES_CFG_CCM_L_4:
aes->reg[0] = 0x3; break ;
- case AES_CFG_CCM_L_3:
+ case AES_CFG_CCM_L_3:
aes->reg[0] = 0x2; break ;
- case AES_CFG_CCM_L_2:
+ case AES_CFG_CCM_L_2:
aes->reg[0] = 0x1; break ;
- case AES_CFG_CCM_L_1:
+ case AES_CFG_CCM_L_1:
aes->reg[0] = 0x0; break ;
}
- XMEMCPY(((byte *)aes->reg)+1, nonce, len) ;
+ XMEMCPY(((byte *)aes->reg)+1, nonce, len) ;
} else {
byte *b = (byte *)aes->reg ;
XMEMSET(aes->reg, 0, AES_BLOCK_SIZE);
@@ -342,7 +344,7 @@ static int AesAuthEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
const byte* nonce, word32 nonceSz,
byte* authTag, word32 authTagSz,
const byte* authIn, word32 authInSz, int mode)
-{
+{
word32 M, L ;
byte *in_a, *in_save ;
byte *out_a, *out_save ;
@@ -353,26 +355,26 @@ static int AesAuthEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
if(AesAuthArgCheck(aes, out, in, inSz, nonce, nonceSz, authTag, authTagSz, authIn, authInSz, &M, &L)
== BAD_FUNC_ARG)return BAD_FUNC_ARG ;
-
+
/* 16 byte padding */
in_save = NULL ; out_save = NULL ; authIn_save = NULL ; nonce_save = NULL ;
if((inSz%16)==0){
in_save = NULL ; in_a = (byte *)in ;
out_save = NULL ; out_a = out ;
} else {
- if((in_save = XMALLOC(RoundUp16(inSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){
+ if((in_save = XMALLOC(RoundUp16(inSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){
FREE_ALL; return MEMORY_E ; }
in_a = in_save ; XMEMSET(in_a, 0, RoundUp16(inSz)) ; XMEMCPY(in_a, in, inSz) ;
-
- if((out_save = XMALLOC(RoundUp16(inSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){
- FREE_ALL; return MEMORY_E ; }
- out_a = out_save ;
+
+ if((out_save = XMALLOC(RoundUp16(inSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){
+ FREE_ALL; return MEMORY_E ; }
+ out_a = out_save ;
}
-
+
if((authInSz%16)==0){
authIn_save = NULL ; authIn_a = (byte *)authIn ;
} else {
- if((authIn_save = XMALLOC(RoundUp16(authInSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){
+ if((authIn_save = XMALLOC(RoundUp16(authInSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){
FREE_ALL; return MEMORY_E ; }
authIn_a = authIn_save ; XMEMSET(authIn_a, 0, RoundUp16(authInSz)) ; XMEMCPY(authIn_a, authIn, authInSz) ;
}
@@ -380,7 +382,7 @@ static int AesAuthEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
if((nonceSz%16)==0){
nonce_save = NULL ; nonce_a = (byte *)nonce ;
} else {
- if((nonce_save = XMALLOC(RoundUp16(nonceSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){
+ if((nonce_save = XMALLOC(RoundUp16(nonceSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){
FREE_ALL; return MEMORY_E; }
nonce_a = nonce_save ; XMEMSET(nonce_a, 0, RoundUp16(nonceSz)) ; XMEMCPY(nonce_a, nonce, nonceSz) ;
}
@@ -403,7 +405,7 @@ static int AesAuthEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
XMEMCPY(authTag, tmpTag, authTagSz) ;
}
- FREE_ALL;
+ FREE_ALL;
return 0 ;
}
@@ -411,7 +413,7 @@ static int AesAuthDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
const byte* nonce, word32 nonceSz,
const byte* authTag, word32 authTagSz,
const byte* authIn, word32 authInSz, int mode)
-{
+{
word32 M, L ;
byte *in_a, *in_save ;
byte *out_a, *out_save ;
@@ -422,26 +424,26 @@ static int AesAuthDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
if(AesAuthArgCheck(aes, out, in, inSz, nonce, nonceSz, authTag, authTagSz, authIn, authInSz, &M, &L)
== BAD_FUNC_ARG)return BAD_FUNC_ARG ;
-
+
/* 16 byte padding */
in_save = NULL ; out_save = NULL ; authIn_save = NULL ; nonce_save = NULL ;
if((inSz%16)==0){
in_save = NULL ; in_a = (byte *)in ;
out_save = NULL ; out_a = out ;
} else {
- if((in_save = XMALLOC(RoundUp16(inSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){
+ if((in_save = XMALLOC(RoundUp16(inSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){
FREE_ALL; return MEMORY_E;}
in_a = in_save ; XMEMSET(in_a, 0, RoundUp16(inSz)) ; XMEMCPY(in_a, in, inSz) ;
-
- if((out_save = XMALLOC(RoundUp16(inSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){
+
+ if((out_save = XMALLOC(RoundUp16(inSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){
FREE_ALL; return MEMORY_E;}
- out_a = out_save ;
+ out_a = out_save ;
}
-
+
if((authInSz%16)==0){
authIn_save = NULL ; authIn_a = (byte *)authIn ;
} else {
- if((authIn_save = XMALLOC(RoundUp16(authInSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){
+ if((authIn_save = XMALLOC(RoundUp16(authInSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){
FREE_ALL; return MEMORY_E; }
authIn_a = authIn_save ; XMEMSET(authIn_a, 0, RoundUp16(authInSz)) ; XMEMCPY(authIn_a, authIn, authInSz) ;
}
@@ -449,7 +451,7 @@ static int AesAuthDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
if((nonceSz%16)==0){
nonce_save = NULL ; nonce_a = (byte *)nonce ;
} else {
- if((nonce_save = XMALLOC(RoundUp16(nonceSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){
+ if((nonce_save = XMALLOC(RoundUp16(nonceSz), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL){
FREE_ALL; return MEMORY_E; }
nonce_a = nonce_save ; XMEMSET(nonce_a, 0, RoundUp16(nonceSz)) ; XMEMCPY(nonce_a, nonce, nonceSz) ;
}
@@ -468,7 +470,7 @@ static int AesAuthDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
XMEMSET(out, 0, inSz) ;
ret = false ;
} else {
- XMEMCPY(out, out_a, inSz) ;
+ XMEMCPY(out, out_a, inSz) ;
}
FREE_ALL ;
@@ -488,6 +490,9 @@ WOLFSSL_API int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz
byte* authTag, word32 authTagSz,
const byte* authIn, word32 authInSz)
{
+ if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) {
+ return BAD_FUNC_ARG;
+ }
return AesAuthEncrypt(aes, out, in, sz, iv, ivSz, authTag, authTagSz,
authIn, authInSz, AES_CFG_MODE_GCM_HY0CALC) ;
}
@@ -495,7 +500,7 @@ WOLFSSL_API int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz
const byte* iv, word32 ivSz,
const byte* authTag, word32 authTagSz,
const byte* authIn, word32 authInSz)
-{
+{
return AesAuthDecrypt(aes, out, in, sz, iv, ivSz, authTag, authTagSz,
authIn, authInSz, AES_CFG_MODE_GCM_HY0CALC) ;
}
@@ -516,17 +521,17 @@ WOLFSSL_API int wc_GmacUpdate(Gmac* gmac, const byte* iv, word32 ivSz,
#endif /* HAVE_AESGCM */
#ifdef HAVE_AESCCM
-WOLFSSL_API void wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz)
+WOLFSSL_API int wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz)
{
- AesAuthSetKey(aes, key, keySz) ;
+ return AesAuthSetKey(aes, key, keySz) ;
}
-WOLFSSL_API void wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
+WOLFSSL_API int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
const byte* nonce, word32 nonceSz,
byte* authTag, word32 authTagSz,
const byte* authIn, word32 authInSz)
-{
- AesAuthEncrypt(aes, out, in, inSz, nonce, nonceSz, authTag, authTagSz,
+{
+ return AesAuthEncrypt(aes, out, in, inSz, nonce, nonceSz, authTag, authTagSz,
authIn, authInSz, AES_CFG_MODE_CCM) ;
}
@@ -534,12 +539,28 @@ WOLFSSL_API int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inS
const byte* nonce, word32 nonceSz,
const byte* authTag, word32 authTagSz,
const byte* authIn, word32 authInSz)
-{
+{
return AesAuthDecrypt(aes, out, in, inSz, nonce, nonceSz, authTag, authTagSz,
authIn, authInSz, AES_CFG_MODE_CCM) ;
}
#endif /* HAVE_AESCCM */
+WOLFSSL_API int wc_AesInit(Aes* aes, void* heap, int devId)
+{
+ if (aes == NULL)
+ return BAD_FUNC_ARG;
+
+ aes->heap = heap;
+ (void)devId;
+
+ return 0;
+}
+
+WOLFSSL_API void wc_AesFree(Aes* aes)
+{
+ (void)aes;
+}
+
#endif /* WOLFSSL_TI_CRYPT */
#endif /* NO_AES */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/ti/ti-ccm.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/ti/ti-ccm.c
index 09705cfb8..5c0051e03 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/ti/ti-ccm.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/ti/ti-ccm.c
@@ -1,8 +1,8 @@
/* port/ti/ti_ccm.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -27,56 +28,67 @@
#if defined(WOLFSSL_TI_CRYPT) || defined(WOLFSSL_TI_HASH)
-
+#include "wolfssl/wolfcrypt/port/ti/ti-ccm.h"
#include <stdbool.h>
#include <stdint.h>
+#ifndef TI_DUMMY_BUILD
#include "driverlib/sysctl.h"
#include "driverlib/rom_map.h"
#include "driverlib/rom.h"
#ifndef SINGLE_THREADED
#include <wolfssl/wolfcrypt/wc_port.h>
- static wolfSSL_Mutex TI_CCM_Mutex ;
+ static wolfSSL_Mutex TI_CCM_Mutex;
#endif
+#endif /* TI_DUMMY_BUILD */
#define TIMEOUT 500000
-#define WAIT(stat) { volatile int i ; for(i=0; i<TIMEOUT; i++)if(stat)break ; if(i==TIMEOUT)return(false) ; }
+#define WAIT(stat) { volatile int i; for(i=0; i<TIMEOUT; i++)if(stat)break; if(i==TIMEOUT)return(false); }
-static bool ccm_init = false ;
-bool wolfSSL_TI_CCMInit(void)
+static bool ccm_init = false;
+int wolfSSL_TI_CCMInit(void)
{
- if(ccm_init)return true ;
- ccm_init = true ;
+ if (ccm_init)
+ return true;
+ ccm_init = true;
+#ifndef TI_DUMMY_BUILD
SysCtlClockFreqSet((SYSCTL_XTAL_25MHZ |
SYSCTL_OSC_MAIN |
SYSCTL_USE_PLL |
SYSCTL_CFG_VCO_480), 120000000);
-
- if(!ROM_SysCtlPeripheralPresent(SYSCTL_PERIPH_CCM0))
- return false ;
-
+
+ if (!ROM_SysCtlPeripheralPresent(SYSCTL_PERIPH_CCM0))
+ return false;
+
ROM_SysCtlPeripheralEnable(SYSCTL_PERIPH_CCM0);
- WAIT(ROM_SysCtlPeripheralReady(SYSCTL_PERIPH_CCM0)) ;
+ WAIT(ROM_SysCtlPeripheralReady(SYSCTL_PERIPH_CCM0));
ROM_SysCtlPeripheralReset(SYSCTL_PERIPH_CCM0);
- WAIT(ROM_SysCtlPeripheralReady(SYSCTL_PERIPH_CCM0)) ;
-
+ WAIT(ROM_SysCtlPeripheralReady(SYSCTL_PERIPH_CCM0));
+
#ifndef SINGLE_THREADED
- InitMutex(&TI_CCM_Mutex) ;
+ if (wc_InitMutex(&TI_CCM_Mutex))
+ return false;
#endif
+#endif /* !TI_DUMMY_BUILD */
- return true ;
+ return true;
}
#ifndef SINGLE_THREADED
-void wolfSSL_TI_lockCCM() {
- LockMutex(&TI_CCM_Mutex) ;
+void wolfSSL_TI_lockCCM(void)
+{
+#ifndef TI_DUMMY_BUILD
+ wc_LockMutex(&TI_CCM_Mutex);
+#endif
}
-void wolfSSL_TI_unlockCCM() {
- UnLockMutex(&TI_CCM_Mutex) ;
-}
+void wolfSSL_TI_unlockCCM(void){
+#ifndef TI_DUMMY_BUILD
+ wc_UnLockMutex(&TI_CCM_Mutex);
#endif
+}
+#endif /* !SINGLE_THREADED */
-#endif
+#endif /* WOLFSSL_TI_CRYPT || WOLFSSL_TI_HASH */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/ti/ti-des3.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/ti/ti-des3.c
index 21c61d310..0e3c81dcd 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/ti/ti-des3.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/ti/ti-des3.c
@@ -1,8 +1,8 @@
/* port/ti/ti-des.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -63,39 +64,39 @@ static int DesSetKey(Des* des, const byte* key, const byte* iv,int dir, int tri
return BAD_FUNC_ARG;
if(!((dir == DES_ENCRYPTION) || (dir == DES_DECRYPTION)))
return BAD_FUNC_ARG;
-
+
XMEMCPY(des->key, key, tri == DES_CFG_SINGLE ? DES_KEYLEN : DES3_KEYLEN) ;
return DesSetIV(des, iv, tri);
}
static int DesCbcAlign16(Des* des, byte* out, const byte* in, word32 sz, word32 dir, word32 tri)
-{
+{
wolfSSL_TI_lockCCM() ;
ROM_DESReset(DES_BASE);
ROM_DESConfigSet(DES_BASE, (dir | DES_CFG_MODE_CBC | tri));
- ROM_DESIVSet(DES_BASE, des->reg);
- ROM_DESKeySet(DES_BASE, des->key);
+ ROM_DESIVSet(DES_BASE, (uint32_t*)des->reg);
+ ROM_DESKeySet(DES_BASE,(uint32_t*)des->key);
if(dir == DES_CFG_DIR_DECRYPT)
/* if input and output same will overwrite input iv */
XMEMCPY(des->tmp, in + sz - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
ROM_DESDataProcess(DES_BASE, (uint32_t *)in, (uint32_t *)out, sz);
wolfSSL_TI_unlockCCM() ;
-
+
/* store iv for next call */
if(dir == DES_CFG_DIR_ENCRYPT)
XMEMCPY(des->reg, out + sz - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
else
XMEMCPY(des->reg, des->tmp, DES_BLOCK_SIZE);
-
+
return 0 ;
}
#define IS_ALIGN16(p) (((unsigned int)(p)&0xf) == 0)
static int DesCbc(Des* des, byte* out, const byte* in, word32 sz, word32 dir, word32 tri)
-{
- const byte * in_p ; byte * out_p ;
+{
+ const byte * in_p ; byte * out_p ;
word32 size ;
#define TI_BUFFSIZE 1024
byte buff[TI_BUFFSIZE] ;
@@ -103,21 +104,21 @@ static int DesCbc(Des* des, byte* out, const byte* in, word32 sz, word32 dir, w
return BAD_FUNC_ARG;
if(sz % DES_BLOCK_SIZE)
return BAD_FUNC_ARG;
-
+
while(sz > 0) {
size = sz ; in_p = in ; out_p = out ;
if(!IS_ALIGN16(in)){
size = sz>TI_BUFFSIZE ? TI_BUFFSIZE : sz ;
- XMEMCPY(buff, in, size) ;
+ XMEMCPY(buff, in, size) ;
in_p = (const byte *)buff ;
}
if(!IS_ALIGN16(out)){
size = sz>TI_BUFFSIZE ? TI_BUFFSIZE : sz ;
out_p = (byte *)buff ;
}
-
+
DesCbcAlign16(des, out_p, in_p, size, dir, tri) ;
-
+
if(!IS_ALIGN16(out)){
XMEMCPY(out, buff, size) ;
}
@@ -148,32 +149,54 @@ WOLFSSL_API int wc_Des3_SetIV(Des3* des, const byte* iv)
WOLFSSL_API int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
+{
return DesCbc(des, out, in, sz, DES_CFG_DIR_ENCRYPT, DES_CFG_SINGLE) ;
}
WOLFSSL_API int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
+{
return DesCbc(des, out, in, sz, DES_CFG_DIR_DECRYPT, DES_CFG_SINGLE) ;
}
WOLFSSL_API int wc_Des_CbcDecryptWithKey(byte* out, const byte* in, word32 sz,
const byte* key, const byte* iv)
-{ return 0 ;}
+{
+ (void)out; (void)in; (void)sz; (void)key; (void)iv ;
+ return -1 ;
+}
WOLFSSL_API int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
-{
+{
return DesCbc((Des *)des, out, in, sz, DES_CFG_DIR_ENCRYPT, DES_CFG_TRIPLE) ;
}
WOLFSSL_API int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz)
-{
+{
return DesCbc((Des *)des, out, in, sz, DES_CFG_DIR_DECRYPT, DES_CFG_TRIPLE) ;
}
WOLFSSL_API int wc_Des3_CbcDecryptWithKey(byte* out, const byte* in, word32 sz,
const byte* key, const byte* iv)
-{ return 0 ; }
+{
+ (void)out; (void)in; (void)sz; (void)key; (void)iv ;
+ return -1 ;
+ }
+
+WOLFSSL_API int wc_Des3Init(Des3* des, void* heap, int devId)
+{
+ if (des == NULL)
+ return BAD_FUNC_ARG;
+
+ des->heap = heap;
+ (void)devId;
+
+ return 0;
+}
+
+WOLFSSL_API void wc_Des3Free(Des3* des)
+{
+ (void)des;
+}
#endif /* WOLFSSL_TI_CRYPT */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/ti/ti-hash.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/ti/ti-hash.c
index c60f86423..ab8f2cc22 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/ti/ti-hash.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/port/ti/ti-hash.c
@@ -1,8 +1,8 @@
/* port/ti/ti-hash.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,10 +16,11 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -38,12 +39,13 @@
#include <stdint.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
-#include <wolfssl/wolfcrypt/md5.h>
-#include <wolfssl/wolfcrypt/sha.h>
-#include <wolfssl/wolfcrypt/sha256.h>
+#include <wolfssl/wolfcrypt/md5.h>
+#include <wolfssl/wolfcrypt/sha.h>
+#include <wolfssl/wolfcrypt/sha256.h>
#include <wolfssl/wolfcrypt/port/ti/ti-hash.h>
#include <wolfssl/wolfcrypt/port/ti/ti-ccm.h>
#include <wolfssl/wolfcrypt/logging.h>
+#include <wolfssl/wolfcrypt/hash.h>
#ifndef TI_DUMMY_BUILD
#include "inc/hw_memmap.h"
@@ -57,66 +59,70 @@
#define SHAMD5_ALGO_MD5 1
#define SHAMD5_ALGO_SHA1 2
#define SHAMD5_ALGO_SHA256 3
-bool wolfSSL_TI_CCMInit(void) { return true ; }
+#define SHAMD5_ALGO_SHA224 4
#endif
static int hashInit(wolfssl_TI_Hash *hash) {
- hash->used = 0 ;
- hash->msg = 0 ;
- hash->len = 0 ;
- return 0 ;
+ if (!wolfSSL_TI_CCMInit())return 1;
+ hash->used = 0;
+ hash->msg = 0;
+ hash->len = 0;
+ return 0;
}
static int hashUpdate(wolfssl_TI_Hash *hash, const byte* data, word32 len)
{
- void *p ;
+ void *p;
- if((hash== NULL) || (data == NULL))return BAD_FUNC_ARG;
+ if ((hash== NULL) || (data == NULL))return BAD_FUNC_ARG;
- if(hash->len < hash->used+len) {
- if(hash->msg == NULL) {
+ if (hash->len < hash->used+len) {
+ if (hash->msg == NULL) {
p = XMALLOC(hash->used+len, NULL, DYNAMIC_TYPE_TMP_BUFFER);
} else {
p = XREALLOC(hash->msg, hash->used+len, NULL, DYNAMIC_TYPE_TMP_BUFFER);
}
- if(p == 0)return 1 ;
- hash->msg = p ;
- hash->len = hash->used+len ;
- }
- XMEMCPY(hash->msg+hash->used, data, len) ;
- hash->used += len ;
- return 0 ;
+ if (p == 0)return 1;
+ hash->msg = p;
+ hash->len = hash->used+len;
+ }
+ XMEMCPY(hash->msg+hash->used, data, len);
+ hash->used += len;
+ return 0;
}
static int hashGetHash(wolfssl_TI_Hash *hash, byte* result, word32 algo, word32 hsize)
-{
- uint32_t h[16] ;
+{
+ uint32_t h[16];
#ifndef TI_DUMMY_BUILD
- wolfSSL_TI_lockCCM() ;
+ wolfSSL_TI_lockCCM();
ROM_SHAMD5Reset(SHAMD5_BASE);
ROM_SHAMD5ConfigSet(SHAMD5_BASE, algo);
- ROM_SHAMD5DataProcess(SHAMD5_BASE,
+ ROM_SHAMD5DataProcess(SHAMD5_BASE,
(uint32_t *)hash->msg, hash->used, h);
- wolfSSL_TI_unlockCCM() ;
+ wolfSSL_TI_unlockCCM();
#else
- (void) hash ;
- (void) algo ;
+ (void) hash;
+ (void) algo;
+
+ XMEMSET(h, 0, sizeof(h));
#endif
- XMEMCPY(result, h, hsize) ;
+ XMEMCPY(result, h, hsize);
- return 0 ;
+ return 0;
}
-static void hashRestorePos(wolfssl_TI_Hash *h1, wolfssl_TI_Hash *h2) {
- h1->used = h2->used ;
+static int hashCopy(wolfssl_TI_Hash *src, wolfssl_TI_Hash *dst) {
+ XMEMCPY(dst, src, sizeof(wolfssl_TI_Hash));
+ return 0;
}
static int hashFinal(wolfssl_TI_Hash *hash, byte* result, word32 algo, word32 hsize)
-{
- hashGetHash(hash, result, algo, hsize) ;
+{
+ hashGetHash(hash, result, algo, hsize);
XFREE(hash->msg, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- hashInit(hash) ;
- return 0 ;
+ hashInit(hash);
+ return 0;
}
static int hashHash(const byte* data, word32 len, byte* hash, word32 algo, word32 hsize)
@@ -143,149 +149,190 @@ static int hashHash(const byte* data, word32 len, byte* hash, word32 algo, word3
}
#ifdef WOLFSSL_SMALL_STACK
- XFREE(hash, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(hash_desc, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return ret;
}
+static int hashFree(wolfssl_TI_Hash *hash)
+{
+ XFREE(hash->msg, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ hashInit(hash);
+ return 0;
+}
+
#if !defined(NO_MD5)
-WOLFSSL_API void wc_InitMd5(Md5* md5)
+WOLFSSL_API int wc_InitMd5_ex(Md5* md5, void* heap, int devId)
{
if (md5 == NULL)
- return ;
- if(!wolfSSL_TI_CCMInit())return ;
- hashInit((wolfssl_TI_Hash *)md5) ;
+ return 1;
+ (void)heap;
+ (void)devId;
+ return hashInit((wolfssl_TI_Hash *)md5);
+}
+WOLFSSL_API int wc_InitMd5(Md5* md5)
+{
+ return wc_InitMd5_ex(md5, NULL, INVALID_DEVID);
}
-WOLFSSL_API void wc_Md5Update(Md5* md5, const byte* data, word32 len)
+WOLFSSL_API int wc_Md5Update(Md5* md5, const byte* data, word32 len)
{
- hashUpdate((wolfssl_TI_Hash *)md5, data, len) ;
+ return hashUpdate((wolfssl_TI_Hash *)md5, data, len);
}
-WOLFSSL_API void wc_Md5Final(Md5* md5, byte* hash)
+WOLFSSL_API int wc_Md5Final(Md5* md5, byte* hash)
{
- hashFinal((wolfssl_TI_Hash *)md5, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE) ;
+ return hashFinal((wolfssl_TI_Hash *)md5, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE);
}
-WOLFSSL_API void wc_Md5GetHash(Md5* md5, byte* hash)
+WOLFSSL_API int wc_Md5GetHash(Md5* md5, byte* hash)
{
- hashGetHash((wolfssl_TI_Hash *)md5, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE) ;
+ return hashGetHash((wolfssl_TI_Hash *)md5, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE);
}
-WOLFSSL_API void wc_Md5RestorePos(Md5* m1, Md5* m2) {
- hashRestorePos((wolfssl_TI_Hash *)m1, (wolfssl_TI_Hash *)m2) ;
+WOLFSSL_API int wc_Md5Copy(Md5* src, Md5* dst) {
+ return hashCopy((wolfssl_TI_Hash *)src, (wolfssl_TI_Hash *)dst);
}
WOLFSSL_API int wc_Md5Hash(const byte*data, word32 len, byte*hash)
-{
- return hashHash(data, len, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE) ;
+{
+ return hashHash(data, len, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE);
}
-#endif /* NO_MD5 */
+WOLFSSL_API void wc_Md5Free(Md5* md5)
+{
+ hashFree((wolfssl_TI_Hash *)md5);
+}
+
+#endif /* !NO_MD5 */
#if !defined(NO_SHA)
-WOLFSSL_API int wc_InitSha(Sha* sha)
+WOLFSSL_API int wc_InitSha_ex(Md5* sha, void* heap, int devId)
{
if (sha == NULL)
- return 1 ;
- if(!wolfSSL_TI_CCMInit())return 1 ;
- return hashInit((wolfssl_TI_Hash *)sha) ;
+ return 1;
+ (void)heap;
+ (void)devId;
+ return hashInit((wolfssl_TI_Hash *)sha);
+}
+WOLFSSL_API int wc_InitSha(Sha* sha)
+{
+ return wc_InitSha_ex(sha, NULL, INVALID_DEVID);
}
WOLFSSL_API int wc_ShaUpdate(Sha* sha, const byte* data, word32 len)
{
- return hashUpdate((wolfssl_TI_Hash *)sha, data, len) ;
+ return hashUpdate((wolfssl_TI_Hash *)sha, data, len);
}
WOLFSSL_API int wc_ShaFinal(Sha* sha, byte* hash)
{
- return hashFinal((wolfssl_TI_Hash *)sha, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE) ;
+ return hashFinal((wolfssl_TI_Hash *)sha, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE);
}
WOLFSSL_API int wc_ShaGetHash(Sha* sha, byte* hash)
{
- return hashGetHash(sha, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE) ;
+ return hashGetHash(sha, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE);
}
-WOLFSSL_API void wc_ShaRestorePos(Sha* s1, Sha* s2) {
- hashRestorePos((wolfssl_TI_Hash *)s1, (wolfssl_TI_Hash *)s2) ;
+WOLFSSL_API int wc_ShaCopy(Sha* src, Sha* dst) {
+ return hashCopy((wolfssl_TI_Hash *)src, (wolfssl_TI_Hash *)dst);
}
WOLFSSL_API int wc_ShaHash(const byte*data, word32 len, byte*hash)
-{
- return hashHash(data, len, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE) ;
+{
+ return hashHash(data, len, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE);
}
-#endif /* NO_SHA */
+WOLFSSL_API void wc_ShaFree(Sha* sha)
+{
+ hashFree((wolfssl_TI_Hash *)sha);
+}
-#if defined(HAVE_SHA224)
-WOLFSSL_API int wc_InitSha224(Sha224* sha224)
+#endif /* !NO_SHA */
+
+#if defined(WOLFSSL_SHA224)
+WOLFSSL_API int wc_InitSha224_ex(Sha224* sha224, void* heap, int devId)
{
if (sha224 == NULL)
- return 1 ;
- if(!wolfSSL_TI_CCMInit())return 1 ;
- return hashInit((wolfssl_TI_Hash *)sha224) ;
+ return 1;
+ (void)heap;
+ (void)devId;
+ return hashInit((wolfssl_TI_Hash *)sha224);
+}
+WOLFSSL_API int wc_InitSha224(Sha224* sha224)
+{
+ return wc_InitSha224_ex(sha224, NULL, INVALID_DEVID);
}
WOLFSSL_API int wc_Sha224Update(Sha224* sha224, const byte* data, word32 len)
{
- return hashUpdate((wolfssl_TI_Hash *)sha224, data, len) ;
+ return hashUpdate((wolfssl_TI_Hash *)sha224, data, len);
}
WOLFSSL_API int wc_Sha224Final(Sha224* sha224, byte* hash)
{
- return hashFinal((wolfssl_TI_Hash *)sha224, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE) ;
+ return hashFinal((wolfssl_TI_Hash *)sha224, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE);
}
WOLFSSL_API int wc_Sha224GetHash(Sha224* sha224, byte* hash)
{
- return hashGetHash(sha224, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE) ;
+ return hashGetHash(sha224, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE);
}
-WOLFSSL_API void wc_Sha224RestorePos(Sha224* s1, Sha224* s2) {
- hashRestorePos((wolfssl_TI_Hash *)s1, (wolfssl_TI_Hash *)s2) ;
+WOLFSSL_API int wc_Sha224Hash(const byte* data, word32 len, byte*hash)
+{
+ return hashHash(data, len, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE);
}
-WOLFSSL_API int wc_Sha224Hash(const byte* data, word32 len, byte*hash)
-{
- return hashHash(data, len, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE) ;
+WOLFSSL_API void wc_Sha224Free(Sha224* sha224)
+{
+ hashFree((wolfssl_TI_Hash *)sha224);
}
-#endif /* HAVE_SHA224 */
+#endif /* WOLFSSL_SHA224 */
#if !defined(NO_SHA256)
-WOLFSSL_API int wc_InitSha256(Sha256* sha256)
+WOLFSSL_API int wc_InitSha256_ex(Sha256* sha256, void* heap, int devId)
{
if (sha256 == NULL)
- return 1 ;
- if(!wolfSSL_TI_CCMInit())return 1 ;
- return hashInit((wolfssl_TI_Hash *)sha256) ;
+ return 1;
+ (void)heap;
+ (void)devId;
+ return hashInit((wolfssl_TI_Hash *)sha256);
+}
+
+WOLFSSL_API int wc_InitSha256(Sha256* sha256)
+{
+ return wc_InitSha256_ex(sha256, NULL, INVALID_DEVID);
}
WOLFSSL_API int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len)
{
- return hashUpdate((wolfssl_TI_Hash *)sha256, data, len) ;
+ return hashUpdate((wolfssl_TI_Hash *)sha256, data, len);
}
WOLFSSL_API int wc_Sha256Final(Sha256* sha256, byte* hash)
{
- return hashFinal((wolfssl_TI_Hash *)sha256, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE) ;
+ return hashFinal((wolfssl_TI_Hash *)sha256, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE);
}
WOLFSSL_API int wc_Sha256GetHash(Sha256* sha256, byte* hash)
{
- return hashGetHash(sha256, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE) ;
+ return hashGetHash(sha256, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE);
}
-WOLFSSL_API void wc_Sha256RestorePos(Sha256* s1, Sha256* s2) {
- hashRestorePos((wolfssl_TI_Hash *)s1, (wolfssl_TI_Hash *)s2) ;
+WOLFSSL_API int wc_Sha256Hash(const byte* data, word32 len, byte*hash)
+{
+ return hashHash(data, len, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE);
}
-WOLFSSL_API int wc_Sha256Hash(const byte* data, word32 len, byte*hash)
+WOLFSSL_API void wc_Sha256Free(Sha256* sha256)
{
- return hashHash(data, len, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE) ;
+ hashFree((wolfssl_TI_Hash *)sha256);
}
-#endif
+
+#endif /* !NO_SHA256 */
#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/pwdbased.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/pwdbased.c
index b9764d8d0..c672c2285 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/pwdbased.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/pwdbased.c
@@ -1,8 +1,8 @@
/* pwdbased.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -27,176 +28,196 @@
#ifndef NO_PWDBASED
-#ifdef WOLFSSL_PIC32MZ_HASH
- #ifndef NO_MD5
- #define wc_InitMd5 wc_InitMd5_sw
- #define wc_Md5Update wc_Md5Update_sw
- #define wc_Md5Final wc_Md5Final_sw
- #endif /* NO_MD5 */
-
- #define wc_InitSha wc_InitSha_sw
- #define wc_ShaUpdate wc_ShaUpdate_sw
- #define wc_ShaFinal wc_ShaFinal_sw
-
- #define wc_InitSha256 wc_InitSha256_sw
- #define wc_Sha256Update wc_Sha256Update_sw
- #define wc_Sha256Final wc_Sha256Final_sw
-#endif
-
#include <wolfssl/wolfcrypt/pwdbased.h>
#include <wolfssl/wolfcrypt/hmac.h>
+#include <wolfssl/wolfcrypt/hash.h>
#include <wolfssl/wolfcrypt/integer.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
-#if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384)
- #include <wolfssl/wolfcrypt/sha512.h>
-#endif
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
-#ifndef WOLFSSL_HAVE_MIN
-#define WOLFSSL_HAVE_MIN
-
- static INLINE word32 min(word32 a, word32 b)
- {
- return a > b ? b : a;
- }
-#endif /* WOLFSSL_HAVE_MIN */
+#ifdef HAVE_PBKDF1
-
-#ifndef NO_SHA
-/* PBKDF1 needs at least SHA available */
-int wc_PBKDF1(byte* output, const byte* passwd, int pLen, const byte* salt,
- int sLen, int iterations, int kLen, int hashType)
+/* PKCS#5 v1.5 with non standard extension to optionally derive the extra data (IV) */
+int wc_PBKDF1_ex(byte* key, int keyLen, byte* iv, int ivLen,
+ const byte* passwd, int passwdLen, const byte* salt, int saltLen,
+ int iterations, int hashType, void* heap)
{
- Sha sha;
-#ifndef NO_MD5
- Md5 md5;
+ int err;
+ int keyLeft, ivLeft, i;
+ int digestLeft, store;
+ int keyOutput = 0;
+ int diestLen;
+ byte digest[WC_MAX_DIGEST_SIZE];
+#ifdef WOLFSSL_SMALL_STACK
+ wc_HashAlg* hash = NULL;
+#else
+ wc_HashAlg hash[1];
#endif
- int hLen = (int)SHA_DIGEST_SIZE;
- int i, ret = 0;
- byte buffer[SHA_DIGEST_SIZE]; /* max size */
+ enum wc_HashType hashT;
- if (hashType != MD5 && hashType != SHA)
+ (void)heap;
+
+ if (key == NULL || keyLen < 0 || passwdLen < 0 || saltLen < 0 || ivLen < 0){
return BAD_FUNC_ARG;
+ }
-#ifndef NO_MD5
- if (hashType == MD5)
- hLen = (int)MD5_DIGEST_SIZE;
-#endif
+ if (iterations <= 0)
+ iterations = 1;
- if (kLen > hLen)
- return BAD_FUNC_ARG;
+ hashT = wc_HashTypeConvert(hashType);
+ err = wc_HashGetDigestSize(hashT);
+ if (err < 0)
+ return err;
+ diestLen = err;
- if (iterations < 1)
- return BAD_FUNC_ARG;
+ /* initialize hash */
+#ifdef WOLFSSL_SMALL_STACK
+ hash = (wc_HashAlg*)XMALLOC(sizeof(wc_HashAlg), heap,
+ DYNAMIC_TYPE_HASHCTX);
+ if (hash == NULL)
+ return MEMORY_E;
+#endif
- switch (hashType) {
-#ifndef NO_MD5
- case MD5:
- wc_InitMd5(&md5);
- wc_Md5Update(&md5, passwd, pLen);
- wc_Md5Update(&md5, salt, sLen);
- wc_Md5Final(&md5, buffer);
- break;
-#endif /* NO_MD5 */
- case SHA:
- default:
- ret = wc_InitSha(&sha);
- if (ret != 0)
- return ret;
- wc_ShaUpdate(&sha, passwd, pLen);
- wc_ShaUpdate(&sha, salt, sLen);
- wc_ShaFinal(&sha, buffer);
- break;
+ err = wc_HashInit_ex(hash, hashT, heap, INVALID_DEVID);
+ if (err != 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(hash, heap, DYNAMIC_TYPE_HASHCTX);
+ #endif
+ return err;
}
- for (i = 1; i < iterations; i++) {
- if (hashType == SHA) {
- wc_ShaUpdate(&sha, buffer, hLen);
- wc_ShaFinal(&sha, buffer);
+ keyLeft = keyLen;
+ ivLeft = ivLen;
+ while (keyOutput < (keyLen + ivLen)) {
+ digestLeft = diestLen;
+ /* D_(i - 1) */
+ if (keyOutput) { /* first time D_0 is empty */
+ err = wc_HashUpdate(hash, hashT, digest, diestLen);
+ if (err != 0) break;
}
-#ifndef NO_MD5
- else {
- wc_Md5Update(&md5, buffer, hLen);
- wc_Md5Final(&md5, buffer);
+
+ /* data */
+ err = wc_HashUpdate(hash, hashT, passwd, passwdLen);
+ if (err != 0) break;
+
+ /* salt */
+ if (salt) {
+ err = wc_HashUpdate(hash, hashT, salt, saltLen);
+ if (err != 0) break;
}
-#endif
- }
- XMEMCPY(output, buffer, kLen);
- return 0;
-}
-#endif /* NO_SHA */
+ err = wc_HashFinal(hash, hashT, digest);
+ if (err != 0) break;
+ /* count */
+ for (i = 1; i < iterations; i++) {
+ err = wc_HashUpdate(hash, hashT, digest, diestLen);
+ if (err != 0) break;
-int GetDigestSize(int hashType)
-{
- int hLen;
+ err = wc_HashFinal(hash, hashT, digest);
+ if (err != 0) break;
+ }
- switch (hashType) {
-#ifndef NO_MD5
- case MD5:
- hLen = MD5_DIGEST_SIZE;
- break;
-#endif
-#ifndef NO_SHA
- case SHA:
- hLen = SHA_DIGEST_SIZE;
- break;
-#endif
-#ifndef NO_SHA256
- case SHA256:
- hLen = SHA256_DIGEST_SIZE;
- break;
-#endif
-#ifdef WOLFSSL_SHA512
- case SHA512:
- hLen = SHA512_DIGEST_SIZE;
- break;
-#endif
- default:
- return BAD_FUNC_ARG;
+ if (keyLeft) {
+ store = min(keyLeft, diestLen);
+ XMEMCPY(&key[keyLen - keyLeft], digest, store);
+
+ keyOutput += store;
+ keyLeft -= store;
+ digestLeft -= store;
+ }
+
+ if (ivLeft && digestLeft) {
+ store = min(ivLeft, digestLeft);
+ if (iv != NULL)
+ XMEMCPY(&iv[ivLen - ivLeft],
+ &digest[diestLen - digestLeft], store);
+ keyOutput += store;
+ ivLeft -= store;
+ }
}
- return hLen;
-}
+ wc_HashFree(hash, hashT);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(hash, heap, DYNAMIC_TYPE_HASHCTX);
+#endif
-int wc_PBKDF2(byte* output, const byte* passwd, int pLen, const byte* salt,
+ if (err != 0)
+ return err;
+
+ if (keyOutput != (keyLen + ivLen))
+ return BUFFER_E;
+
+ return err;
+}
+
+/* PKCS#5 v1.5 */
+int wc_PBKDF1(byte* output, const byte* passwd, int pLen, const byte* salt,
int sLen, int iterations, int kLen, int hashType)
{
+ return wc_PBKDF1_ex(output, kLen, NULL, 0,
+ passwd, pLen, salt, sLen, iterations, hashType, NULL);
+}
+
+#endif /* HAVE_PKCS5 */
+
+#ifdef HAVE_PBKDF2
+
+int wc_PBKDF2_ex(byte* output, const byte* passwd, int pLen, const byte* salt,
+ int sLen, int iterations, int kLen, int hashType, void* heap, int devId)
+{
word32 i = 1;
int hLen;
int j, ret;
- Hmac hmac;
#ifdef WOLFSSL_SMALL_STACK
byte* buffer;
+ Hmac* hmac;
#else
- byte buffer[MAX_DIGEST_SIZE];
+ byte buffer[WC_MAX_DIGEST_SIZE];
+ Hmac hmac[1];
#endif
+ enum wc_HashType hashT;
+
+ if (output == NULL || pLen < 0 || sLen < 0 || kLen < 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ if (iterations <= 0)
+ iterations = 1;
- hLen = GetDigestSize(hashType);
+ hashT = wc_HashTypeConvert(hashType);
+ hLen = wc_HashGetDigestSize(hashT);
if (hLen < 0)
return BAD_FUNC_ARG;
#ifdef WOLFSSL_SMALL_STACK
- buffer = (byte*)XMALLOC(MAX_DIGEST_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ buffer = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, heap, DYNAMIC_TYPE_TMP_BUFFER);
if (buffer == NULL)
return MEMORY_E;
+ hmac = (Hmac*)XMALLOC(sizeof(Hmac), heap, DYNAMIC_TYPE_HMAC);
+ if (hmac == NULL) {
+ XFREE(buffer, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return MEMORY_E;
+ }
#endif
- ret = wc_HmacSetKey(&hmac, hashType, passwd, pLen);
-
+ ret = wc_HmacInit(hmac, heap, devId);
if (ret == 0) {
- while (kLen) {
+ /* use int hashType here, since HMAC FIPS uses the old unique value */
+ ret = wc_HmacSetKey(hmac, hashType, passwd, pLen);
+
+ while (ret == 0 && kLen) {
int currentLen;
- ret = wc_HmacUpdate(&hmac, salt, sLen);
+ ret = wc_HmacUpdate(hmac, salt, sLen);
if (ret != 0)
break;
@@ -204,7 +225,7 @@ int wc_PBKDF2(byte* output, const byte* passwd, int pLen, const byte* salt,
for (j = 0; j < 4; j++) {
byte b = (byte)(i >> ((3-j) * 8));
- ret = wc_HmacUpdate(&hmac, &b, 1);
+ ret = wc_HmacUpdate(hmac, &b, 1);
if (ret != 0)
break;
}
@@ -213,7 +234,7 @@ int wc_PBKDF2(byte* output, const byte* passwd, int pLen, const byte* salt,
if (ret != 0)
break;
- ret = wc_HmacFinal(&hmac, buffer);
+ ret = wc_HmacFinal(hmac, buffer);
if (ret != 0)
break;
@@ -221,10 +242,10 @@ int wc_PBKDF2(byte* output, const byte* passwd, int pLen, const byte* salt,
XMEMCPY(output, buffer, currentLen);
for (j = 1; j < iterations; j++) {
- ret = wc_HmacUpdate(&hmac, buffer, hLen);
+ ret = wc_HmacUpdate(hmac, buffer, hLen);
if (ret != 0)
break;
- ret = wc_HmacFinal(&hmac, buffer);
+ ret = wc_HmacFinal(hmac, buffer);
if (ret != 0)
break;
xorbuf(output, buffer, currentLen);
@@ -238,171 +259,98 @@ int wc_PBKDF2(byte* output, const byte* passwd, int pLen, const byte* salt,
kLen -= currentLen;
i++;
}
+ wc_HmacFree(hmac);
}
#ifdef WOLFSSL_SMALL_STACK
- XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(buffer, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(hmac, heap, DYNAMIC_TYPE_HMAC);
#endif
return ret;
}
-#ifdef WOLFSSL_SHA512
- #define PBKDF_DIGEST_SIZE SHA512_BLOCK_SIZE
-#elif !defined(NO_SHA256)
- #define PBKDF_DIGEST_SIZE SHA256_BLOCK_SIZE
-#else
- #define PBKDF_DIGEST_SIZE SHA_DIGEST_SIZE
-#endif
-
-/* helper for wc_PKCS12_PBKDF(), sets block and digest sizes */
-int GetPKCS12HashSizes(int hashType, word32* v, word32* u)
+int wc_PBKDF2(byte* output, const byte* passwd, int pLen, const byte* salt,
+ int sLen, int iterations, int kLen, int hashType)
{
- if (!v || !u)
- return BAD_FUNC_ARG;
+ return wc_PBKDF2_ex(output, passwd, pLen, salt, sLen, iterations, kLen,
+ hashType, NULL, INVALID_DEVID);
+}
- switch (hashType) {
-#ifndef NO_MD5
- case MD5:
- *v = MD5_BLOCK_SIZE;
- *u = MD5_DIGEST_SIZE;
- break;
-#endif
-#ifndef NO_SHA
- case SHA:
- *v = SHA_BLOCK_SIZE;
- *u = SHA_DIGEST_SIZE;
- break;
-#endif
-#ifndef NO_SHA256
- case SHA256:
- *v = SHA256_BLOCK_SIZE;
- *u = SHA256_DIGEST_SIZE;
- break;
-#endif
-#ifdef WOLFSSL_SHA512
- case SHA512:
- *v = SHA512_BLOCK_SIZE;
- *u = SHA512_DIGEST_SIZE;
- break;
-#endif
- default:
- return BAD_FUNC_ARG;
- }
+#endif /* HAVE_PBKDF2 */
- return 0;
-}
+#ifdef HAVE_PKCS12
/* helper for PKCS12_PBKDF(), does hash operation */
-int DoPKCS12Hash(int hashType, byte* buffer, word32 totalLen,
+static int DoPKCS12Hash(int hashType, byte* buffer, word32 totalLen,
byte* Ai, word32 u, int iterations)
{
int i;
int ret = 0;
+#ifdef WOLFSSL_SMALL_STACK
+ wc_HashAlg* hash = NULL;
+#else
+ wc_HashAlg hash[1];
+#endif
+ enum wc_HashType hashT;
- if (buffer == NULL || Ai == NULL)
+ if (buffer == NULL || Ai == NULL) {
return BAD_FUNC_ARG;
+ }
- switch (hashType) {
-#ifndef NO_MD5
- case MD5:
- {
- Md5 md5;
- wc_InitMd5(&md5);
- wc_Md5Update(&md5, buffer, totalLen);
- wc_Md5Final(&md5, Ai);
-
- for (i = 1; i < iterations; i++) {
- wc_Md5Update(&md5, Ai, u);
- wc_Md5Final(&md5, Ai);
- }
- }
- break;
-#endif /* NO_MD5 */
-#ifndef NO_SHA
- case SHA:
- {
- Sha sha;
- ret = wc_InitSha(&sha);
- if (ret != 0)
- break;
- wc_ShaUpdate(&sha, buffer, totalLen);
- wc_ShaFinal(&sha, Ai);
-
- for (i = 1; i < iterations; i++) {
- wc_ShaUpdate(&sha, Ai, u);
- wc_ShaFinal(&sha, Ai);
- }
- }
- break;
-#endif /* NO_SHA */
-#ifndef NO_SHA256
- case SHA256:
- {
- Sha256 sha256;
- ret = wc_InitSha256(&sha256);
- if (ret != 0)
- break;
+ hashT = wc_HashTypeConvert(hashType);
- ret = wc_Sha256Update(&sha256, buffer, totalLen);
- if (ret != 0)
- break;
+ /* initialize hash */
+#ifdef WOLFSSL_SMALL_STACK
+ hash = (wc_HashAlg*)XMALLOC(sizeof(wc_HashAlg), NULL,
+ DYNAMIC_TYPE_HASHCTX);
+ if (hash == NULL)
+ return MEMORY_E;
+#endif
- ret = wc_Sha256Final(&sha256, Ai);
- if (ret != 0)
- break;
+ ret = wc_HashInit(hash, hashT);
+ if (ret != 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(hash, NULL, DYNAMIC_TYPE_HASHCTX);
+ #endif
+ return ret;
+ }
- for (i = 1; i < iterations; i++) {
- ret = wc_Sha256Update(&sha256, Ai, u);
- if (ret != 0)
- break;
+ ret = wc_HashUpdate(hash, hashT, buffer, totalLen);
- ret = wc_Sha256Final(&sha256, Ai);
- if (ret != 0)
- break;
- }
- }
- break;
-#endif /* NO_SHA256 */
-#ifdef WOLFSSL_SHA512
- case SHA512:
- {
- Sha512 sha512;
- ret = wc_InitSha512(&sha512);
- if (ret != 0)
- break;
+ if (ret == 0)
+ ret = wc_HashFinal(hash, hashT, Ai);
- ret = wc_Sha512Update(&sha512, buffer, totalLen);
- if (ret != 0)
- break;
+ for (i = 1; i < iterations; i++) {
+ if (ret == 0)
+ ret = wc_HashUpdate(hash, hashT, Ai, u);
+ if (ret == 0)
+ ret = wc_HashFinal(hash, hashT, Ai);
+ }
- ret = wc_Sha512Final(&sha512, Ai);
- if (ret != 0)
- break;
+ wc_HashFree(hash, hashT);
- for (i = 1; i < iterations; i++) {
- ret = wc_Sha512Update(&sha512, Ai, u);
- if (ret != 0)
- break;
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(hash, NULL, DYNAMIC_TYPE_HASHCTX);
+#endif
- ret = wc_Sha512Final(&sha512, Ai);
- if (ret != 0)
- break;
- }
- }
- break;
-#endif /* WOLFSSL_SHA512 */
+ return ret;
+}
- default:
- ret = BAD_FUNC_ARG;
- break;
- }
- return ret;
+int wc_PKCS12_PBKDF(byte* output, const byte* passwd, int passLen,
+ const byte* salt, int saltLen, int iterations, int kLen, int hashType,
+ int id)
+{
+ return wc_PKCS12_PBKDF_ex(output, passwd, passLen, salt, saltLen,
+ iterations, kLen, hashType, id, NULL);
}
-int wc_PKCS12_PBKDF(byte* output, const byte* passwd, int passLen,const byte* salt,
- int saltLen, int iterations, int kLen, int hashType, int id)
+
+/* extended API that allows a heap hint to be used */
+int wc_PKCS12_PBKDF_ex(byte* output, const byte* passwd, int passLen,
+ const byte* salt, int saltLen, int iterations, int kLen,
+ int hashType, int id, void* heap)
{
/* all in bytes instead of bits */
word32 u, v, dLen, pLen, iLen, sLen, totalLen;
@@ -421,34 +369,48 @@ int wc_PKCS12_PBKDF(byte* output, const byte* passwd, int passLen,const byte* sa
byte* Ai;
byte* B;
#else
- byte Ai[PBKDF_DIGEST_SIZE];
- byte B[PBKDF_DIGEST_SIZE];
+ byte Ai[WC_MAX_DIGEST_SIZE];
+ byte B[WC_MAX_BLOCK_SIZE];
#endif
+ enum wc_HashType hashT;
+
+ (void)heap;
+
+ if (output == NULL || passLen < 0 || saltLen < 0 || kLen < 0) {
+ return BAD_FUNC_ARG;
+ }
- if (!iterations)
+ if (iterations <= 0)
iterations = 1;
- ret = GetPKCS12HashSizes(hashType, &v, &u);
+ hashT = wc_HashTypeConvert(hashType);
+ ret = wc_HashGetDigestSize(hashT);
if (ret < 0)
- return BAD_FUNC_ARG;
+ return ret;
+ u = ret;
+
+ ret = wc_HashGetBlockSize(hashT);
+ if (ret < 0)
+ return ret;
+ v = ret;
#ifdef WOLFSSL_SMALL_STACK
- Ai = (byte*)XMALLOC(PBKDF_DIGEST_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ Ai = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, heap, DYNAMIC_TYPE_TMP_BUFFER);
if (Ai == NULL)
return MEMORY_E;
- B = (byte*)XMALLOC(PBKDF_DIGEST_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ B = (byte*)XMALLOC(WC_MAX_BLOCK_SIZE, heap, DYNAMIC_TYPE_TMP_BUFFER);
if (B == NULL) {
- XFREE(Ai, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(Ai, heap, DYNAMIC_TYPE_TMP_BUFFER);
return MEMORY_E;
}
#endif
- XMEMSET(Ai, 0, PBKDF_DIGEST_SIZE);
- XMEMSET(B, 0, PBKDF_DIGEST_SIZE);
+ XMEMSET(Ai, 0, WC_MAX_DIGEST_SIZE);
+ XMEMSET(B, 0, WC_MAX_BLOCK_SIZE);
dLen = v;
- sLen = v * ((saltLen + v - 1) / v);
+ sLen = v * ((saltLen + v - 1) / v);
if (passLen)
pLen = v * ((passLen + v - 1) / v);
else
@@ -458,11 +420,11 @@ int wc_PKCS12_PBKDF(byte* output, const byte* passwd, int passLen,const byte* sa
totalLen = dLen + sLen + pLen;
if (totalLen > sizeof(staticBuffer)) {
- buffer = (byte*)XMALLOC(totalLen, 0, DYNAMIC_TYPE_KEY);
+ buffer = (byte*)XMALLOC(totalLen, heap, DYNAMIC_TYPE_KEY);
if (buffer == NULL) {
#ifdef WOLFSSL_SMALL_STACK
- XFREE(Ai, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(B, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(Ai, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(B, heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return MEMORY_E;
}
@@ -522,7 +484,7 @@ int wc_PKCS12_PBKDF(byte* output, const byte* passwd, int passLen,const byte* sa
else {
if (outSz > (int)v) {
/* take off MSB */
- byte tmp[129];
+ byte tmp[WC_MAX_BLOCK_SIZE + 1];
ret = mp_to_unsigned_bin(&res, tmp);
XMEMCPY(I + i, tmp + 1, v);
}
@@ -546,17 +508,288 @@ int wc_PKCS12_PBKDF(byte* output, const byte* passwd, int passLen,const byte* sa
mp_clear(&B1);
}
- if (dynamic) XFREE(buffer, 0, DYNAMIC_TYPE_KEY);
+ if (dynamic) XFREE(buffer, heap, DYNAMIC_TYPE_KEY);
#ifdef WOLFSSL_SMALL_STACK
- XFREE(Ai, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(B, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(Ai, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(B, heap, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return ret;
}
-#undef PBKDF_DIGEST_SIZE
+#endif /* HAVE_PKCS12 */
-#endif /* NO_PWDBASED */
+#ifdef HAVE_SCRYPT
+/* Rotate the 32-bit value a by b bits to the left.
+ *
+ * a 32-bit value.
+ * b Number of bits to rotate.
+ * returns rotated value.
+ */
+#define R(a, b) rotlFixed(a, b)
+/* One round of Salsa20/8.
+ * Code taken from RFC 7914: scrypt PBKDF.
+ *
+ * out Output buffer.
+ * in Input data to hash.
+ */
+static void scryptSalsa(word32* out, word32* in)
+{
+ int i;
+ word32 x[16];
+
+#ifdef LITTLE_ENDIAN_ORDER
+ for (i = 0; i < 16; ++i)
+ x[i] = in[i];
+#else
+ for (i = 0; i < 16; i++)
+ x[i] = ByteReverseWord32(in[i]);
+#endif
+ for (i = 8; i > 0; i -= 2) {
+ x[ 4] ^= R(x[ 0] + x[12], 7); x[ 8] ^= R(x[ 4] + x[ 0], 9);
+ x[12] ^= R(x[ 8] + x[ 4], 13); x[ 0] ^= R(x[12] + x[ 8], 18);
+ x[ 9] ^= R(x[ 5] + x[ 1], 7); x[13] ^= R(x[ 9] + x[ 5], 9);
+ x[ 1] ^= R(x[13] + x[ 9], 13); x[ 5] ^= R(x[ 1] + x[13], 18);
+ x[14] ^= R(x[10] + x[ 6], 7); x[ 2] ^= R(x[14] + x[10], 9);
+ x[ 6] ^= R(x[ 2] + x[14], 13); x[10] ^= R(x[ 6] + x[ 2], 18);
+ x[ 3] ^= R(x[15] + x[11], 7); x[ 7] ^= R(x[ 3] + x[15], 9);
+ x[11] ^= R(x[ 7] + x[ 3], 13); x[15] ^= R(x[11] + x[ 7], 18);
+ x[ 1] ^= R(x[ 0] + x[ 3], 7); x[ 2] ^= R(x[ 1] + x[ 0], 9);
+ x[ 3] ^= R(x[ 2] + x[ 1], 13); x[ 0] ^= R(x[ 3] + x[ 2], 18);
+ x[ 6] ^= R(x[ 5] + x[ 4], 7); x[ 7] ^= R(x[ 6] + x[ 5], 9);
+ x[ 4] ^= R(x[ 7] + x[ 6], 13); x[ 5] ^= R(x[ 4] + x[ 7], 18);
+ x[11] ^= R(x[10] + x[ 9], 7); x[ 8] ^= R(x[11] + x[10], 9);
+ x[ 9] ^= R(x[ 8] + x[11], 13); x[10] ^= R(x[ 9] + x[ 8], 18);
+ x[12] ^= R(x[15] + x[14], 7); x[13] ^= R(x[12] + x[15], 9);
+ x[14] ^= R(x[13] + x[12], 13); x[15] ^= R(x[14] + x[13], 18);
+ }
+#ifdef LITTLE_ENDIAN_ORDER
+ for (i = 0; i < 16; ++i)
+ out[i] = in[i] + x[i];
+#else
+ for (i = 0; i < 16; i++)
+ out[i] = ByteReverseWord32(ByteReverseWord32(in[i]) + x[i]);
+#endif
+}
+
+/* Mix a block using Salsa20/8.
+ * Based on RFC 7914: scrypt PBKDF.
+ *
+ * b Blocks to mix.
+ * y Temporary storage.
+ * r Size of the block.
+ */
+static void scryptBlockMix(byte* b, byte* y, int r)
+{
+ byte x[64];
+#ifdef WORD64_AVAILABLE
+ word64* b64 = (word64*)b;
+ word64* y64 = (word64*)y;
+ word64* x64 = (word64*)x;
+#else
+ word32* b32 = (word32*)b;
+ word32* y32 = (word32*)y;
+ word32* x32 = (word32*)x;
+#endif
+ int i;
+ int j;
+
+ /* Step 1. */
+ XMEMCPY(x, b + (2 * r - 1) * 64, sizeof(x));
+ /* Step 2. */
+ for (i = 0; i < 2 * r; i++)
+ {
+#ifdef WORD64_AVAILABLE
+ for (j = 0; j < 8; j++)
+ x64[j] ^= b64[i * 8 + j];
+#else
+ for (j = 0; j < 16; j++)
+ x32[j] ^= b32[i * 16 + j];
+#endif
+ scryptSalsa((word32*)x, (word32*)x);
+ XMEMCPY(y + i * 64, x, sizeof(x));
+ }
+ /* Step 3. */
+ for (i = 0; i < r; i++) {
+#ifdef WORD64_AVAILABLE
+ for (j = 0; j < 8; j++) {
+ b64[i * 8 + j] = y64[2 * i * 8 + j];
+ b64[(r + i) * 8 + j] = y64[(2 * i + 1) * 8 + j];
+ }
+#else
+ for (j = 0; j < 16; j++) {
+ b32[i * 16 + j] = y32[2 * i * 16 + j];
+ b32[(r + i) * 16 + j] = y32[(2 * i + 1) * 16 + j];
+ }
+#endif
+ }
+}
+
+/* Random oracles mix.
+ * Based on RFC 7914: scrypt PBKDF.
+ *
+ * x Data to mix.
+ * v Temporary buffer.
+ * y Temporary buffer for the block mix.
+ * r Block size parameter.
+ * n CPU/Memory cost parameter.
+ */
+static void scryptROMix(byte* x, byte* v, byte* y, int r, word32 n)
+{
+ word32 i;
+ word32 j;
+ word32 k;
+ word32 bSz = 128 * r;
+#ifdef WORD64_AVAILABLE
+ word64* x64 = (word64*)x;
+ word64* v64 = (word64*)v;
+#else
+ word32* x32 = (word32*)x;
+ word32* v32 = (word32*)v;
+#endif
+
+ /* Step 1. X = B (B not needed therefore not implemented) */
+ /* Step 2. */
+ for (i = 0; i < n; i++)
+ {
+ XMEMCPY(v + i * bSz, x, bSz);
+ scryptBlockMix(x, y, r);
+ }
+
+ /* Step 3. */
+ for (i = 0; i < n; i++)
+ {
+#ifdef LITTLE_ENDIAN_ORDER
+#ifdef WORD64_AVAILABLE
+ j = *(word64*)(x + (2*r - 1) * 64) & (n-1);
+#else
+ j = *(word32*)(x + (2*r - 1) * 64) & (n-1);
+#endif
+#else
+ byte* t = x + (2*r - 1) * 64;
+ j = (t[0] | (t[1] << 8) | (t[2] << 16) | ((word32)t[3] << 24)) & (n-1);
+#endif
+#ifdef WORD64_AVAILABLE
+ for (k = 0; k < bSz / 8; k++)
+ x64[k] ^= v64[j * bSz / 8 + k];
+#else
+ for (k = 0; k < bSz / 4; k++)
+ x32[k] ^= v32[j * bSz / 4 + k];
+#endif
+ scryptBlockMix(x, y, r);
+ }
+ /* Step 4. B' = X (B = X = B' so not needed, therefore not implemented) */
+}
+
+/* Generates an key derived from a password and salt using a memory hard
+ * algorithm.
+ * Implements RFC 7914: scrypt PBKDF.
+ *
+ * output The derived key.
+ * passwd The password to derive key from.
+ * passLen The length of the password.
+ * salt The key specific data.
+ * saltLen The length of the salt data.
+ * cost The CPU/memory cost parameter. Range: 1..(128*r/8-1)
+ * (Iterations = 2^cost)
+ * blockSize The number of 128 byte octets in a working block.
+ * parallel The number of parallel mix operations to perform.
+ * (Note: this implementation does not use threads.)
+ * dkLen The length of the derived key in bytes.
+ * returns BAD_FUNC_ARG when: blockSize is too large for cost.
+ */
+int wc_scrypt(byte* output, const byte* passwd, int passLen,
+ const byte* salt, int saltLen, int cost, int blockSize,
+ int parallel, int dkLen)
+{
+ int ret = 0;
+ int i;
+ byte* v = NULL;
+ byte* y = NULL;
+ byte* blocks = NULL;
+ word32 blocksSz;
+ word32 bSz;
+
+ if (blockSize > 8)
+ return BAD_FUNC_ARG;
+
+ if (cost < 1 || cost >= 128 * blockSize / 8 || parallel < 1 || dkLen < 1)
+ return BAD_FUNC_ARG;
+
+ bSz = 128 * blockSize;
+ blocksSz = bSz * parallel;
+ blocks = (byte*)XMALLOC(blocksSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (blocks == NULL)
+ goto end;
+ /* Temporary for scryptROMix. */
+ v = (byte*)XMALLOC((1 << cost) * bSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (v == NULL)
+ goto end;
+ /* Temporary for scryptBlockMix. */
+ y = (byte*)XMALLOC(blockSize * 128, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (y == NULL)
+ goto end;
+
+ /* Step 1. */
+ ret = wc_PBKDF2(blocks, passwd, passLen, salt, saltLen, 1, blocksSz,
+ WC_SHA256);
+ if (ret != 0)
+ goto end;
+
+ /* Step 2. */
+ for (i = 0; i < parallel; i++)
+ scryptROMix(blocks + i * bSz, v, y, blockSize, 1 << cost);
+
+ /* Step 3. */
+ ret = wc_PBKDF2(output, passwd, passLen, blocks, blocksSz, 1, dkLen,
+ WC_SHA256);
+end:
+ if (blocks != NULL)
+ XFREE(blocks, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (v != NULL)
+ XFREE(v, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (y != NULL)
+ XFREE(y, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+ return ret;
+}
+
+/* Generates an key derived from a password and salt using a memory hard
+ * algorithm.
+ * Implements RFC 7914: scrypt PBKDF.
+ *
+ * output Derived key.
+ * passwd Password to derive key from.
+ * passLen Length of the password.
+ * salt Key specific data.
+ * saltLen Length of the salt data.
+ * iterations Number of iterations to perform. Range: 1 << (1..(128*r/8-1))
+ * blockSize Number of 128 byte octets in a working block.
+ * parallel Number of parallel mix operations to perform.
+ * (Note: this implementation does not use threads.)
+ * dkLen Length of the derived key in bytes.
+ * returns BAD_FUNC_ARG when: iterations is not a power of 2 or blockSize is too
+ * large for iterations.
+ */
+int wc_scrypt_ex(byte* output, const byte* passwd, int passLen,
+ const byte* salt, int saltLen, word32 iterations,
+ int blockSize, int parallel, int dkLen)
+{
+ int cost;
+
+ /* Iterations must be a power of 2. */
+ if ((iterations & (iterations - 1)) != 0)
+ return BAD_FUNC_ARG;
+
+ for (cost = -1; iterations != 0; cost++) {
+ iterations >>= 1;
+ }
+
+ return wc_scrypt(output, passwd, passLen, salt, saltLen, cost, blockSize,
+ parallel, dkLen);
+}
+#endif /* HAVE_SCRYPT */
+
+#endif /* NO_PWDBASED */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/rabbit.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/rabbit.c
index fc7861115..820fd0ac3 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/rabbit.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/rabbit.c
@@ -1,8 +1,8 @@
/* rabbit.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -33,6 +34,7 @@
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
@@ -86,7 +88,7 @@ static void RABBIT_next_state(RabbitCtx* ctx)
ctx->c[6] = U32V(ctx->c[6] + 0x4D34D34D + (ctx->c[5] < c_old[5]));
ctx->c[7] = U32V(ctx->c[7] + 0xD34D34D3 + (ctx->c[6] < c_old[6]));
ctx->carry = (ctx->c[7] < c_old[7]);
-
+
/* Calculate the g-values */
for (i=0;i<8;i++)
g[i] = RABBIT_g_func(U32V(ctx->x[i] + ctx->c[i]));
@@ -114,7 +116,7 @@ static void wc_RabbitSetIV(Rabbit* ctx, const byte* inIv)
XMEMCPY(iv, inIv, sizeof(iv));
else
XMEMSET(iv, 0, sizeof(iv));
-
+
/* Generate four subvectors */
i0 = LITTLE32(iv[0]);
i2 = LITTLE32(iv[1]);
@@ -143,7 +145,7 @@ static void wc_RabbitSetIV(Rabbit* ctx, const byte* inIv)
/* Key setup */
-static INLINE int DoKey(Rabbit* ctx, const byte* key, const byte* iv)
+static WC_INLINE int DoKey(Rabbit* ctx, const byte* key, const byte* iv)
{
/* Temporary variables */
word32 k0, k1, k2, k3, i;
@@ -198,10 +200,36 @@ static INLINE int DoKey(Rabbit* ctx, const byte* key, const byte* iv)
}
+int wc_Rabbit_SetHeap(Rabbit* ctx, void* heap)
+{
+ if (ctx == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+#ifdef XSTREAM_ALIGN
+ ctx->heap = heap;
+#endif
+
+ (void)heap;
+ return 0;
+}
+
+
/* Key setup */
int wc_RabbitSetKey(Rabbit* ctx, const byte* key, const byte* iv)
{
+ if (ctx == NULL || key == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
#ifdef XSTREAM_ALIGN
+ /* default heap to NULL or heap test value */
+ #ifdef WOLFSSL_HEAP_TEST
+ ctx->heap = (void*)WOLFSSL_HEAP_TEST;
+ #else
+ ctx->heap = NULL;
+ #endif /* WOLFSSL_HEAP_TEST */
+
if ((wolfssl_word)key % 4) {
int alignKey[4];
@@ -219,7 +247,7 @@ int wc_RabbitSetKey(Rabbit* ctx, const byte* key, const byte* iv)
/* Encrypt/decrypt a message of any size */
-static INLINE int DoProcess(Rabbit* ctx, byte* output, const byte* input,
+static WC_INLINE int DoProcess(Rabbit* ctx, byte* output, const byte* input,
word32 msglen)
{
/* Encrypt/decrypt all full blocks */
@@ -262,11 +290,11 @@ static INLINE int DoProcess(Rabbit* ctx, byte* output, const byte* input,
/* Generate 16 bytes of pseudo-random data */
tmp[0] = LITTLE32(ctx->workCtx.x[0] ^
(ctx->workCtx.x[5]>>16) ^ U32V(ctx->workCtx.x[3]<<16));
- tmp[1] = LITTLE32(ctx->workCtx.x[2] ^
+ tmp[1] = LITTLE32(ctx->workCtx.x[2] ^
(ctx->workCtx.x[7]>>16) ^ U32V(ctx->workCtx.x[5]<<16));
- tmp[2] = LITTLE32(ctx->workCtx.x[4] ^
+ tmp[2] = LITTLE32(ctx->workCtx.x[4] ^
(ctx->workCtx.x[1]>>16) ^ U32V(ctx->workCtx.x[7]<<16));
- tmp[3] = LITTLE32(ctx->workCtx.x[6] ^
+ tmp[3] = LITTLE32(ctx->workCtx.x[6] ^
(ctx->workCtx.x[3]>>16) ^ U32V(ctx->workCtx.x[1]<<16));
/* Encrypt/decrypt the data */
@@ -281,20 +309,24 @@ static INLINE int DoProcess(Rabbit* ctx, byte* output, const byte* input,
/* Encrypt/decrypt a message of any size */
int wc_RabbitProcess(Rabbit* ctx, byte* output, const byte* input, word32 msglen)
{
+ if (ctx == NULL || output == NULL || input == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
#ifdef XSTREAM_ALIGN
if ((wolfssl_word)input % 4 || (wolfssl_word)output % 4) {
#ifndef NO_WOLFSSL_ALLOC_ALIGN
byte* tmp;
WOLFSSL_MSG("wc_RabbitProcess unaligned");
- tmp = (byte*)XMALLOC(msglen, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ tmp = (byte*)XMALLOC(msglen, ctx->heap, DYNAMIC_TYPE_TMP_BUFFER);
if (tmp == NULL) return MEMORY_E;
XMEMCPY(tmp, input, msglen);
DoProcess(ctx, tmp, tmp, msglen);
XMEMCPY(output, tmp, msglen);
- XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(tmp, ctx->heap, DYNAMIC_TYPE_TMP_BUFFER);
return 0;
#else
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/random.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/random.c
index 4a1f7ea5b..53041d164 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/random.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/random.c
@@ -1,8 +1,8 @@
/* random.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,135 +16,247 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
-
+
#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
/* on HPUX 11 you may need to install /dev/random see
http://h20293.www2.hp.com/portal/swdepot/displayProductInfo.do?productNumber=KRNG11I
*/
+#if defined(HAVE_FIPS) && \
+ defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
+
+ /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
+ #define FIPS_NO_WRAPPERS
+
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$c")
+ #pragma const_seg(".fipsB$c")
+ #endif
+#endif
+
+
#include <wolfssl/wolfcrypt/random.h>
+#include <wolfssl/wolfcrypt/cpuid.h>
+
+
+/* If building for old FIPS. */
+#if defined(HAVE_FIPS) && \
+ (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2))
-#ifdef HAVE_FIPS
int wc_GenerateSeed(OS_Seed* os, byte* seed, word32 sz)
{
return GenerateSeed(os, seed, sz);
}
-#ifdef HAVE_CAVIUM
- int wc_InitRngCavium(RNG* rng, int i)
- {
- return InitRngCavium(rng, i);
- }
-#endif
-
+int wc_InitRng_ex(WC_RNG* rng, void* heap, int devId)
+{
+ (void)heap;
+ (void)devId;
+ return InitRng_fips(rng);
+}
-int wc_InitRng(RNG* rng)
+int wc_InitRng(WC_RNG* rng)
{
return InitRng_fips(rng);
}
-int wc_RNG_GenerateBlock(RNG* rng, byte* b, word32 sz)
+int wc_RNG_GenerateBlock(WC_RNG* rng, byte* b, word32 sz)
{
return RNG_GenerateBlock_fips(rng, b, sz);
}
-int wc_RNG_GenerateByte(RNG* rng, byte* b)
+int wc_RNG_GenerateByte(WC_RNG* rng, byte* b)
{
return RNG_GenerateByte(rng, b);
}
-#if defined(HAVE_HASHDRBG) || defined(NO_RC4)
+#ifdef HAVE_HASHDRBG
- int wc_FreeRng(RNG* rng)
+ int wc_FreeRng(WC_RNG* rng)
{
return FreeRng_fips(rng);
}
-
- int wc_RNG_HealthTest(int reseed,
- const byte* entropyA, word32 entropyASz,
- const byte* entropyB, word32 entropyBSz,
- byte* output, word32 outputSz)
+ int wc_RNG_HealthTest(int reseed, const byte* seedA, word32 seedASz,
+ const byte* seedB, word32 seedBSz,
+ byte* output, word32 outputSz)
{
- return RNG_HealthTest_fips(reseed, entropyA, entropyASz,
- entropyB, entropyBSz, output, outputSz);
- }
-#endif /* HAVE_HASHDRBG || NO_RC4 */
-#else /* else build without fips */
-#include <wolfssl/wolfcrypt/error-crypt.h>
+ return RNG_HealthTest_fips(reseed, seedA, seedASz,
+ seedB, seedBSz, output, outputSz);
+ }
+#endif /* HAVE_HASHDRBG */
-#if defined(HAVE_HASHDRBG) || defined(NO_RC4)
+#else /* else build without fips, or for new fips */
- #include <wolfssl/wolfcrypt/sha256.h>
+#ifndef WC_NO_RNG /* if not FIPS and RNG is disabled then do not compile */
- #ifdef NO_INLINE
- #include <wolfssl/wolfcrypt/misc.h>
- #else
- #include <wolfcrypt/src/misc.c>
- #endif
-#endif /* HAVE_HASHDRBG || NO_RC4 */
+#include <wolfssl/wolfcrypt/sha256.h>
+
+#ifdef WOLF_CRYPTO_CB
+ #include <wolfssl/wolfcrypt/cryptocb.h>
+#endif
+
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
-#if defined(USE_WINDOWS_API)
+#if defined(WOLFSSL_SGX)
+ #include <sgx_trts.h>
+#elif defined(USE_WINDOWS_API)
#ifndef _WIN32_WINNT
#define _WIN32_WINNT 0x0400
#endif
#include <windows.h>
#include <wincrypt.h>
+#elif defined(HAVE_WNR)
+ #include <wnr.h>
+ #include <wolfssl/wolfcrypt/logging.h>
+ wolfSSL_Mutex wnr_mutex; /* global netRandom mutex */
+ int wnr_timeout = 0; /* entropy timeout, mililseconds */
+ int wnr_mutex_init = 0; /* flag for mutex init */
+ wnr_context* wnr_ctx; /* global netRandom context */
+#elif defined(FREESCALE_KSDK_2_0_TRNG)
+ #include "fsl_trng.h"
+#elif defined(FREESCALE_KSDK_2_0_RNGA)
+ #include "fsl_rnga.h"
+#elif defined(WOLFSSL_WICED)
+ #include "wiced_crypto.h"
+#elif defined(WOLFSSL_NETBURNER)
+ #include <predef.h>
+ #include <basictypes.h>
+ #include <random.h>
+#elif defined(NO_DEV_RANDOM)
+#elif defined(CUSTOM_RAND_GENERATE)
+#elif defined(CUSTOM_RAND_GENERATE_BLOCK)
+#elif defined(CUSTOM_RAND_GENERATE_SEED)
+#elif defined(WOLFSSL_GENSEED_FORTEST)
+#elif defined(WOLFSSL_MDK_ARM)
+#elif defined(WOLFSSL_IAR_ARM)
+#elif defined(WOLFSSL_ROWLEY_ARM)
+#elif defined(WOLFSSL_EMBOS)
+#elif defined(WOLFSSL_DEOS)
+#elif defined(MICRIUM)
+#elif defined(WOLFSSL_NUCLEUS)
+#elif defined(WOLFSSL_PB)
+#elif defined(WOLFSSL_ZEPHYR)
+#elif defined(WOLFSSL_TELIT_M2MB)
+#elif defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_TRNG)
#else
- #if !defined(NO_DEV_RANDOM) && !defined(CUSTOM_RAND_GENERATE) && \
- !defined(WOLFSSL_MDK_ARM) && !defined(WOLFSSL_IAR_ARM)
- #include <fcntl.h>
- #ifndef EBSNET
- #include <unistd.h>
- #endif
- #else
- /* include headers that may be needed to get good seed */
+ /* include headers that may be needed to get good seed */
+ #include <fcntl.h>
+ #ifndef EBSNET
+ #include <unistd.h>
+ #endif
+#endif
+
+
+#if defined(HAVE_INTEL_RDRAND) || defined(HAVE_INTEL_RDSEED)
+ static word32 intel_flags = 0;
+ static void wc_InitRng_IntelRD(void)
+ {
+ intel_flags = cpuid_get_flags();
+ }
+ #ifdef HAVE_INTEL_RDSEED
+ static int wc_GenerateSeed_IntelRD(OS_Seed* os, byte* output, word32 sz);
#endif
+ #ifdef HAVE_INTEL_RDRAND
+ static int wc_GenerateRand_IntelRD(OS_Seed* os, byte* output, word32 sz);
+ #endif
+
+#ifdef USE_WINDOWS_API
+ #include <immintrin.h>
#endif /* USE_WINDOWS_API */
-
-#ifdef HAVE_INTEL_RDGEN
- static int wc_InitRng_IntelRD(void) ;
- #if defined(HAVE_HASHDRBG) || defined(NO_RC4)
- static int wc_GenerateSeed_IntelRD(OS_Seed* os, byte* output, word32 sz) ;
+#endif
+
+/* Start NIST DRBG code */
+#ifdef HAVE_HASHDRBG
+
+#define OUTPUT_BLOCK_LEN (WC_SHA256_DIGEST_SIZE)
+#define MAX_REQUEST_LEN (0x10000)
+#define RESEED_INTERVAL WC_RESEED_INTERVAL
+
+
+/* For FIPS builds, the user should not be adjusting the values. */
+#if defined(HAVE_FIPS) && \
+ defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
+ #if defined(RNG_SECURITY_STRENGTH) \
+ || defined(ENTROPY_SCALE_FACTOR) \
+ || defined(SEED_BLOCK_SZ)
+
+ #error "Do not change the RNG parameters for FIPS builds."
+ #endif
+#endif
+
+
+/* The security strength for the RNG is the target number of bits of
+ * entropy you are looking for in a seed. */
+#ifndef RNG_SECURITY_STRENGTH
+ #if defined(HAVE_FIPS) && \
+ defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
+ /* SHA-256 requires a minimum of 256-bits of entropy. The goal
+ * of 1024 will provide 4 times that. */
+ #define RNG_SECURITY_STRENGTH (1024)
#else
- static int wc_GenerateRand_IntelRD(OS_Seed* os, byte* output, word32 sz) ;
+ /* If not using FIPS or using old FIPS, set the number down a bit.
+ * More is better, but more is also slower. */
+ #define RNG_SECURITY_STRENGTH (256)
#endif
- static word32 cpuid_check = 0 ;
- static word32 cpuid_flags = 0 ;
- #define CPUID_RDRAND 0x4
- #define CPUID_RDSEED 0x8
- #define IS_INTEL_RDRAND (cpuid_flags&CPUID_RDRAND)
- #define IS_INTEL_RDSEED (cpuid_flags&CPUID_RDSEED)
#endif
-#if defined(HAVE_HASHDRBG) || defined(NO_RC4)
+#ifndef ENTROPY_SCALE_FACTOR
+ /* The entropy scale factor should be the whole number inverse of the
+ * minimum bits of entropy per bit of NDRNG output. */
+ #if defined(HAVE_INTEL_RDSEED) || defined(HAVE_INTEL_RDRAND)
+ /* The value of 2 applies to Intel's RDSEED which provides about
+ * 0.5 bits minimum of entropy per bit. */
+ #define ENTROPY_SCALE_FACTOR 2
+ #else
+ /* Setting the default to 1. */
+ #define ENTROPY_SCALE_FACTOR 1
+ #endif
+#endif
-/* Start NIST DRBG code */
+#ifndef SEED_BLOCK_SZ
+ /* The seed block size, is the size of the output of the underlying NDRNG.
+ * This value is used for testing the output of the NDRNG. */
+ #if defined(HAVE_INTEL_RDSEED) || defined(HAVE_INTEL_RDRAND)
+ /* RDSEED outputs in blocks of 64-bits. */
+ #define SEED_BLOCK_SZ sizeof(word64)
+ #else
+ /* Setting the default to 4. */
+ #define SEED_BLOCK_SZ 4
+ #endif
+#endif
+
+#define SEED_SZ (RNG_SECURITY_STRENGTH*ENTROPY_SCALE_FACTOR/8)
+
+/* The maximum seed size will be the seed size plus a seed block for the
+ * test, and an additional half of the seed size. This additional half
+ * is in case the user does not supply a nonce. A nonce will be obtained
+ * from the NDRNG. */
+#define MAX_SEED_SZ (SEED_SZ + SEED_SZ/2 + SEED_BLOCK_SZ)
-#define OUTPUT_BLOCK_LEN (SHA256_DIGEST_SIZE)
-#define MAX_REQUEST_LEN (0x10000)
-#define RESEED_INTERVAL (1000000)
-#define SECURITY_STRENGTH (256)
-#define ENTROPY_SZ (SECURITY_STRENGTH/8)
-#define NONCE_SZ (ENTROPY_SZ/2)
-#define ENTROPY_NONCE_SZ (ENTROPY_SZ+NONCE_SZ)
/* Internal return codes */
#define DRBG_SUCCESS 0
-#define DRBG_ERROR 1
-#define DRBG_FAILURE 2
-#define DRBG_NEED_RESEED 3
-#define DRBG_CONT_FAILURE 4
+#define DRBG_FAILURE 1
+#define DRBG_NEED_RESEED 2
+#define DRBG_CONT_FAILURE 3
/* RNG health states */
#define DRBG_NOT_INIT 0
@@ -152,6 +264,12 @@ int wc_RNG_GenerateByte(RNG* rng, byte* b)
#define DRBG_FAILED 2
#define DRBG_CONT_FAILED 3
+#define RNG_HEALTH_TEST_CHECK_SIZE (WC_SHA256_DIGEST_SIZE * 4)
+
+/* Verify max gen block len */
+#if RNG_MAX_BLOCK_LEN > MAX_REQUEST_LEN
+ #error RNG_MAX_BLOCK_LEN is larger than NIST DBRG max request length
+#endif
enum {
drbgInitC = 0,
@@ -161,13 +279,20 @@ enum {
drbgInitV
};
-
+/* NOTE: if DRBG struct is changed please update random.h drbg_data size */
typedef struct DRBG {
word32 reseedCtr;
word32 lastBlock;
byte V[DRBG_SEED_LEN];
byte C[DRBG_SEED_LEN];
+#if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB)
+ void* heap;
+ int devId;
+#endif
byte matchCount;
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ wc_Sha256 sha256;
+#endif
} DRBG;
@@ -179,73 +304,104 @@ static int Hash_df(DRBG* drbg, byte* out, word32 outSz, byte type,
const byte* inA, word32 inASz,
const byte* inB, word32 inBSz)
{
+ int ret = DRBG_FAILURE;
byte ctr;
int i;
int len;
word32 bits = (outSz * 8); /* reverse byte order */
- Sha256 sha;
- byte digest[SHA256_DIGEST_SIZE];
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ wc_Sha256* sha = &drbg->sha256;
+#else
+ wc_Sha256 sha[1];
+#endif
+#ifdef WC_ASYNC_ENABLE_SHA256
+ DECLARE_VAR(digest, byte, WC_SHA256_DIGEST_SIZE, drbg->heap);
+ if (digest == NULL)
+ return MEMORY_E;
+#else
+ byte digest[WC_SHA256_DIGEST_SIZE];
+#endif
(void)drbg;
- #ifdef LITTLE_ENDIAN_ORDER
- bits = ByteReverseWord32(bits);
- #endif
+#ifdef WC_ASYNC_ENABLE_SHA256
+ if (digest == NULL)
+ return DRBG_FAILURE;
+#endif
+
+#ifdef LITTLE_ENDIAN_ORDER
+ bits = ByteReverseWord32(bits);
+#endif
len = (outSz / OUTPUT_BLOCK_LEN)
+ ((outSz % OUTPUT_BLOCK_LEN) ? 1 : 0);
- for (i = 0, ctr = 1; i < len; i++, ctr++)
- {
- if (wc_InitSha256(&sha) != 0)
- return DRBG_FAILURE;
-
- if (wc_Sha256Update(&sha, &ctr, sizeof(ctr)) != 0)
- return DRBG_FAILURE;
-
- if (wc_Sha256Update(&sha, (byte*)&bits, sizeof(bits)) != 0)
- return DRBG_FAILURE;
-
- /* churning V is the only string that doesn't have the type added */
- if (type != drbgInitV)
- if (wc_Sha256Update(&sha, &type, sizeof(type)) != 0)
- return DRBG_FAILURE;
-
- if (wc_Sha256Update(&sha, inA, inASz) != 0)
- return DRBG_FAILURE;
-
- if (inB != NULL && inBSz > 0)
- if (wc_Sha256Update(&sha, inB, inBSz) != 0)
- return DRBG_FAILURE;
+ for (i = 0, ctr = 1; i < len; i++, ctr++) {
+#ifndef WOLFSSL_SMALL_STACK_CACHE
+ #if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB)
+ ret = wc_InitSha256_ex(sha, drbg->heap, drbg->devId);
+ #else
+ ret = wc_InitSha256(sha);
+ #endif
+ if (ret != 0)
+ break;
- if (wc_Sha256Final(&sha, digest) != 0)
- return DRBG_FAILURE;
+ if (ret == 0)
+#endif
+ ret = wc_Sha256Update(sha, &ctr, sizeof(ctr));
+ if (ret == 0)
+ ret = wc_Sha256Update(sha, (byte*)&bits, sizeof(bits));
- if (outSz > OUTPUT_BLOCK_LEN) {
- XMEMCPY(out, digest, OUTPUT_BLOCK_LEN);
- outSz -= OUTPUT_BLOCK_LEN;
- out += OUTPUT_BLOCK_LEN;
+ if (ret == 0) {
+ /* churning V is the only string that doesn't have the type added */
+ if (type != drbgInitV)
+ ret = wc_Sha256Update(sha, &type, sizeof(type));
}
- else {
- XMEMCPY(out, digest, outSz);
+ if (ret == 0)
+ ret = wc_Sha256Update(sha, inA, inASz);
+ if (ret == 0) {
+ if (inB != NULL && inBSz > 0)
+ ret = wc_Sha256Update(sha, inB, inBSz);
+ }
+ if (ret == 0)
+ ret = wc_Sha256Final(sha, digest);
+
+#ifndef WOLFSSL_SMALL_STACK_CACHE
+ wc_Sha256Free(sha);
+#endif
+ if (ret == 0) {
+ if (outSz > OUTPUT_BLOCK_LEN) {
+ XMEMCPY(out, digest, OUTPUT_BLOCK_LEN);
+ outSz -= OUTPUT_BLOCK_LEN;
+ out += OUTPUT_BLOCK_LEN;
+ }
+ else {
+ XMEMCPY(out, digest, outSz);
+ }
}
}
- ForceZero(digest, sizeof(digest));
- return DRBG_SUCCESS;
-}
+ ForceZero(digest, WC_SHA256_DIGEST_SIZE);
+#ifdef WC_ASYNC_ENABLE_SHA256
+ FREE_VAR(digest, drbg->heap);
+#endif
+
+ return (ret == 0) ? DRBG_SUCCESS : DRBG_FAILURE;
+}
/* Returns: DRBG_SUCCESS or DRBG_FAILURE */
-static int Hash_DRBG_Reseed(DRBG* drbg, const byte* entropy, word32 entropySz)
+static int Hash_DRBG_Reseed(DRBG* drbg, const byte* seed, word32 seedSz)
{
- byte seed[DRBG_SEED_LEN];
+ byte newV[DRBG_SEED_LEN];
- if (Hash_df(drbg, seed, sizeof(seed), drbgReseed, drbg->V, sizeof(drbg->V),
- entropy, entropySz) != DRBG_SUCCESS) {
+ XMEMSET(newV, 0, DRBG_SEED_LEN);
+
+ if (Hash_df(drbg, newV, sizeof(newV), drbgReseed,
+ drbg->V, sizeof(drbg->V), seed, seedSz) != DRBG_SUCCESS) {
return DRBG_FAILURE;
}
- XMEMCPY(drbg->V, seed, sizeof(drbg->V));
- ForceZero(seed, sizeof(seed));
+ XMEMCPY(drbg->V, newV, sizeof(drbg->V));
+ ForceZero(newV, sizeof(newV));
if (Hash_df(drbg, drbg->C, sizeof(drbg->C), drbgInitC, drbg->V,
sizeof(drbg->V), NULL, 0) != DRBG_SUCCESS) {
@@ -258,7 +414,17 @@ static int Hash_DRBG_Reseed(DRBG* drbg, const byte* entropy, word32 entropySz)
return DRBG_SUCCESS;
}
-static INLINE void array_add_one(byte* data, word32 dataSz)
+/* Returns: DRBG_SUCCESS and DRBG_FAILURE or BAD_FUNC_ARG on fail */
+int wc_RNG_DRBG_Reseed(WC_RNG* rng, const byte* seed, word32 seedSz)
+{
+ if (rng == NULL || seed == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ return Hash_DRBG_Reseed(rng->drbg, seed, seedSz);
+}
+
+static WC_INLINE void array_add_one(byte* data, word32 dataSz)
{
int i;
@@ -269,16 +435,26 @@ static INLINE void array_add_one(byte* data, word32 dataSz)
}
}
-
/* Returns: DRBG_SUCCESS or DRBG_FAILURE */
static int Hash_gen(DRBG* drbg, byte* out, word32 outSz, const byte* V)
{
+ int ret = DRBG_FAILURE;
byte data[DRBG_SEED_LEN];
int i;
int len;
word32 checkBlock;
- Sha256 sha;
- byte digest[SHA256_DIGEST_SIZE];
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ wc_Sha256* sha = &drbg->sha256;
+#else
+ wc_Sha256 sha[1];
+#endif
+#ifdef WC_ASYNC_ENABLE_SHA256
+ DECLARE_VAR(digest, byte, WC_SHA256_DIGEST_SIZE, drbg->heap);
+ if (digest == NULL)
+ return MEMORY_E;
+#else
+ byte digest[WC_SHA256_DIGEST_SIZE];
+#endif
/* Special case: outSz is 0 and out is NULL. wc_Generate a block to save for
* the continuous test. */
@@ -289,48 +465,63 @@ static int Hash_gen(DRBG* drbg, byte* out, word32 outSz, const byte* V)
XMEMCPY(data, V, sizeof(data));
for (i = 0; i < len; i++) {
- if (wc_InitSha256(&sha) != 0 ||
- wc_Sha256Update(&sha, data, sizeof(data)) != 0 ||
- wc_Sha256Final(&sha, digest) != 0) {
-
- return DRBG_FAILURE;
- }
+#ifndef WOLFSSL_SMALL_STACK_CACHE
+ #if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB)
+ ret = wc_InitSha256_ex(sha, drbg->heap, drbg->devId);
+ #else
+ ret = wc_InitSha256(sha);
+ #endif
+ if (ret == 0)
+#endif
+ ret = wc_Sha256Update(sha, data, sizeof(data));
+ if (ret == 0)
+ ret = wc_Sha256Final(sha, digest);
+#ifndef WOLFSSL_SMALL_STACK_CACHE
+ wc_Sha256Free(sha);
+#endif
- XMEMCPY(&checkBlock, digest, sizeof(word32));
- if (drbg->reseedCtr > 1 && checkBlock == drbg->lastBlock) {
- if (drbg->matchCount == 1) {
- return DRBG_CONT_FAILURE;
+ if (ret == 0) {
+ XMEMCPY(&checkBlock, digest, sizeof(word32));
+ if (drbg->reseedCtr > 1 && checkBlock == drbg->lastBlock) {
+ if (drbg->matchCount == 1) {
+ return DRBG_CONT_FAILURE;
+ }
+ else {
+ if (i == len) {
+ len++;
+ }
+ drbg->matchCount = 1;
+ }
}
else {
- if (i == len) {
- len++;
- }
- drbg->matchCount = 1;
+ drbg->matchCount = 0;
+ drbg->lastBlock = checkBlock;
}
- }
- else {
- drbg->matchCount = 0;
- drbg->lastBlock = checkBlock;
- }
- if (outSz >= OUTPUT_BLOCK_LEN) {
- XMEMCPY(out, digest, OUTPUT_BLOCK_LEN);
- outSz -= OUTPUT_BLOCK_LEN;
- out += OUTPUT_BLOCK_LEN;
- array_add_one(data, DRBG_SEED_LEN);
- }
- else if (out != NULL && outSz != 0) {
- XMEMCPY(out, digest, outSz);
- outSz = 0;
+ if (out != NULL && outSz != 0) {
+ if (outSz >= OUTPUT_BLOCK_LEN) {
+ XMEMCPY(out, digest, OUTPUT_BLOCK_LEN);
+ outSz -= OUTPUT_BLOCK_LEN;
+ out += OUTPUT_BLOCK_LEN;
+ array_add_one(data, DRBG_SEED_LEN);
+ }
+ else {
+ XMEMCPY(out, digest, outSz);
+ outSz = 0;
+ }
+ }
}
}
ForceZero(data, sizeof(data));
- return DRBG_SUCCESS;
-}
+#ifdef WC_ASYNC_ENABLE_SHA256
+ FREE_VAR(digest, drbg->heap);
+#endif
+ return (ret == 0) ? DRBG_SUCCESS : DRBG_FAILURE;
+}
-static INLINE void array_add(byte* d, word32 dLen, const byte* s, word32 sLen)
+static WC_INLINE void array_add(byte* d, word32 dLen, const byte* s, word32 sLen)
{
word16 carry = 0;
@@ -352,53 +543,97 @@ static INLINE void array_add(byte* d, word32 dLen, const byte* s, word32 sLen)
}
}
-
/* Returns: DRBG_SUCCESS, DRBG_NEED_RESEED, or DRBG_FAILURE */
static int Hash_DRBG_Generate(DRBG* drbg, byte* out, word32 outSz)
{
- int ret = DRBG_NEED_RESEED;
- Sha256 sha;
- byte digest[SHA256_DIGEST_SIZE];
+ int ret;
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ wc_Sha256* sha = &drbg->sha256;
+#else
+ wc_Sha256 sha[1];
+#endif
+ byte type;
+ word32 reseedCtr;
- if (drbg->reseedCtr != RESEED_INTERVAL) {
- byte type = drbgGenerateH;
- word32 reseedCtr = drbg->reseedCtr;
+ if (drbg->reseedCtr == RESEED_INTERVAL) {
+ return DRBG_NEED_RESEED;
+ } else {
+ #ifdef WC_ASYNC_ENABLE_SHA256
+ DECLARE_VAR(digest, byte, WC_SHA256_DIGEST_SIZE, drbg->heap);
+ if (digest == NULL)
+ return MEMORY_E;
+ #else
+ byte digest[WC_SHA256_DIGEST_SIZE];
+ #endif
+ type = drbgGenerateH;
+ reseedCtr = drbg->reseedCtr;
ret = Hash_gen(drbg, out, outSz, drbg->V);
if (ret == DRBG_SUCCESS) {
- if (wc_InitSha256(&sha) != 0 ||
- wc_Sha256Update(&sha, &type, sizeof(type)) != 0 ||
- wc_Sha256Update(&sha, drbg->V, sizeof(drbg->V)) != 0 ||
- wc_Sha256Final(&sha, digest) != 0) {
+#ifndef WOLFSSL_SMALL_STACK_CACHE
+ #if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB)
+ ret = wc_InitSha256_ex(sha, drbg->heap, drbg->devId);
+ #else
+ ret = wc_InitSha256(sha);
+ #endif
+ if (ret == 0)
+#endif
+ ret = wc_Sha256Update(sha, &type, sizeof(type));
+ if (ret == 0)
+ ret = wc_Sha256Update(sha, drbg->V, sizeof(drbg->V));
+ if (ret == 0)
+ ret = wc_Sha256Final(sha, digest);
+
+#ifndef WOLFSSL_SMALL_STACK_CACHE
+ wc_Sha256Free(sha);
+#endif
- ret = DRBG_FAILURE;
- }
- else {
- array_add(drbg->V, sizeof(drbg->V), digest, sizeof(digest));
+ if (ret == 0) {
+ array_add(drbg->V, sizeof(drbg->V), digest, WC_SHA256_DIGEST_SIZE);
array_add(drbg->V, sizeof(drbg->V), drbg->C, sizeof(drbg->C));
- #ifdef LITTLE_ENDIAN_ORDER
- reseedCtr = ByteReverseWord32(reseedCtr);
- #endif
+ #ifdef LITTLE_ENDIAN_ORDER
+ reseedCtr = ByteReverseWord32(reseedCtr);
+ #endif
array_add(drbg->V, sizeof(drbg->V),
(byte*)&reseedCtr, sizeof(reseedCtr));
ret = DRBG_SUCCESS;
}
drbg->reseedCtr++;
}
+ ForceZero(digest, WC_SHA256_DIGEST_SIZE);
+ #ifdef WC_ASYNC_ENABLE_SHA256
+ FREE_VAR(digest, drbg->heap);
+ #endif
}
- ForceZero(digest, sizeof(digest));
- return ret;
+ return (ret == 0) ? DRBG_SUCCESS : DRBG_FAILURE;
}
-
/* Returns: DRBG_SUCCESS or DRBG_FAILURE */
static int Hash_DRBG_Instantiate(DRBG* drbg, const byte* seed, word32 seedSz,
- const byte* nonce, word32 nonceSz)
+ const byte* nonce, word32 nonceSz,
+ void* heap, int devId)
{
- int ret = DRBG_FAILURE;
+ int ret;
XMEMSET(drbg, 0, sizeof(DRBG));
+#if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB)
+ drbg->heap = heap;
+ drbg->devId = devId;
+#else
+ (void)heap;
+ (void)devId;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ #if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB)
+ ret = wc_InitSha256_ex(&drbg->sha256, drbg->heap, drbg->devId);
+ #else
+ ret = wc_InitSha256(&drbg->sha256);
+ #endif
+ if (ret != 0)
+ return ret;
+#endif
if (Hash_df(drbg, drbg->V, sizeof(drbg->V), drbgInitV, seed, seedSz,
nonce, nonceSz) == DRBG_SUCCESS &&
@@ -410,11 +645,13 @@ static int Hash_DRBG_Instantiate(DRBG* drbg, const byte* seed, word32 seedSz,
drbg->matchCount = 0;
ret = DRBG_SUCCESS;
}
+ else {
+ ret = DRBG_FAILURE;
+ }
return ret;
}
-
/* Returns: DRBG_SUCCESS or DRBG_FAILURE */
static int Hash_DRBG_Uninstantiate(DRBG* drbg)
{
@@ -422,6 +659,10 @@ static int Hash_DRBG_Uninstantiate(DRBG* drbg)
int compareSum = 0;
byte* compareDrbg = (byte*)drbg;
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ wc_Sha256Free(&drbg->sha256);
+#endif
+
ForceZero(drbg, sizeof(DRBG));
for (i = 0; i < sizeof(DRBG); i++)
@@ -430,91 +671,293 @@ static int Hash_DRBG_Uninstantiate(DRBG* drbg)
return (compareSum == 0) ? DRBG_SUCCESS : DRBG_FAILURE;
}
+
+int wc_RNG_TestSeed(const byte* seed, word32 seedSz)
+{
+ int ret = DRBG_SUCCESS;
+
+ /* Check the seed for duplicate words. */
+ word32 seedIdx = 0;
+ word32 scratchSz = min(SEED_BLOCK_SZ, seedSz - SEED_BLOCK_SZ);
+
+ while (seedIdx < seedSz - SEED_BLOCK_SZ) {
+ if (ConstantCompare(seed + seedIdx,
+ seed + seedIdx + scratchSz,
+ scratchSz) == 0) {
+
+ ret = DRBG_CONT_FAILURE;
+ }
+ seedIdx += SEED_BLOCK_SZ;
+ scratchSz = min(SEED_BLOCK_SZ, (seedSz - seedIdx));
+ }
+
+ return ret;
+}
+#endif /* HAVE_HASHDRBG */
/* End NIST DRBG Code */
-/* Get seed and key cipher */
-int wc_InitRng(RNG* rng)
+static int _InitRng(WC_RNG* rng, byte* nonce, word32 nonceSz,
+ void* heap, int devId)
{
- int ret = BAD_FUNC_ARG;
+ int ret = RNG_FAILURE_E;
+#ifdef HAVE_HASHDRBG
+ word32 seedSz = SEED_SZ + SEED_BLOCK_SZ;
+#endif
- if (rng != NULL) {
- if (wc_RNG_HealthTestLocal(0) == 0) {
- byte entropy[ENTROPY_NONCE_SZ];
+ (void)nonce;
+ (void)nonceSz;
- rng->drbg =
- (struct DRBG*)XMALLOC(sizeof(DRBG), NULL, DYNAMIC_TYPE_RNG);
- if (rng->drbg == NULL) {
- ret = MEMORY_E;
- }
- /* This doesn't use a separate nonce. The entropy input will be
- * the default size plus the size of the nonce making the seed
- * size. */
- else if (wc_GenerateSeed(&rng->seed,
- entropy, ENTROPY_NONCE_SZ) == 0 &&
- Hash_DRBG_Instantiate(rng->drbg,
- entropy, ENTROPY_NONCE_SZ, NULL, 0) == DRBG_SUCCESS) {
-
- ret = Hash_DRBG_Generate(rng->drbg, NULL, 0);
- }
- else
- ret = DRBG_FAILURE;
+ if (rng == NULL)
+ return BAD_FUNC_ARG;
+ if (nonce == NULL && nonceSz != 0)
+ return BAD_FUNC_ARG;
- ForceZero(entropy, ENTROPY_NONCE_SZ);
- }
- else
- ret = DRBG_CONT_FAILURE;
+#ifdef WOLFSSL_HEAP_TEST
+ rng->heap = (void*)WOLFSSL_HEAP_TEST;
+ (void)heap;
+#else
+ rng->heap = heap;
+#endif
+#if defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB)
+ rng->devId = devId;
+ #if defined(WOLF_CRYPTO_CB)
+ rng->seed.devId = devId;
+ #endif
+#else
+ (void)devId;
+#endif
- if (ret == DRBG_SUCCESS) {
- rng->status = DRBG_OK;
- ret = 0;
- }
- else if (ret == DRBG_CONT_FAILURE) {
- rng->status = DRBG_CONT_FAILED;
- ret = DRBG_CONT_FIPS_E;
- }
- else if (ret == DRBG_FAILURE) {
- rng->status = DRBG_FAILED;
- ret = RNG_FAILURE_E;
+#ifdef HAVE_HASHDRBG
+ /* init the DBRG to known values */
+ rng->drbg = NULL;
+ rng->status = DRBG_NOT_INIT;
+#endif
+
+#if defined(HAVE_INTEL_RDSEED) || defined(HAVE_INTEL_RDRAND)
+ /* init the intel RD seed and/or rand */
+ wc_InitRng_IntelRD();
+#endif
+
+ /* configure async RNG source if available */
+#ifdef WOLFSSL_ASYNC_CRYPT
+ ret = wolfAsync_DevCtxInit(&rng->asyncDev, WOLFSSL_ASYNC_MARKER_RNG,
+ rng->heap, rng->devId);
+ if (ret != 0)
+ return ret;
+#endif
+
+#ifdef HAVE_INTEL_RDRAND
+ /* if CPU supports RDRAND, use it directly and by-pass DRBG init */
+ if (IS_INTEL_RDRAND(intel_flags))
+ return 0;
+#endif
+
+#ifdef CUSTOM_RAND_GENERATE_BLOCK
+ ret = 0; /* success */
+#else
+#ifdef HAVE_HASHDRBG
+ if (nonceSz == 0)
+ seedSz = MAX_SEED_SZ;
+
+ if (wc_RNG_HealthTestLocal(0) == 0) {
+ #ifdef WC_ASYNC_ENABLE_SHA256
+ DECLARE_VAR(seed, byte, MAX_SEED_SZ, rng->heap);
+ if (seed == NULL)
+ return MEMORY_E;
+ #else
+ byte seed[MAX_SEED_SZ];
+ #endif
+
+#if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY)
+ rng->drbg =
+ (struct DRBG*)XMALLOC(sizeof(DRBG), rng->heap,
+ DYNAMIC_TYPE_RNG);
+#else
+ /* compile-time validation of drbg_data size */
+ typedef char drbg_data_test[sizeof(rng->drbg_data) >=
+ sizeof(struct DRBG) ? 1 : -1];
+ (void)sizeof(drbg_data_test);
+ rng->drbg = (struct DRBG*)rng->drbg_data;
+#endif
+
+ if (rng->drbg == NULL) {
+ ret = MEMORY_E;
}
else {
- rng->status = DRBG_FAILED;
+ ret = wc_GenerateSeed(&rng->seed, seed, seedSz);
+ if (ret != 0)
+ ret = DRBG_FAILURE;
+ else
+ ret = wc_RNG_TestSeed(seed, seedSz);
+
+ if (ret == DRBG_SUCCESS)
+ ret = Hash_DRBG_Instantiate(rng->drbg,
+ seed + SEED_BLOCK_SZ, seedSz - SEED_BLOCK_SZ,
+ nonce, nonceSz, rng->heap, devId);
+
+ if (ret != DRBG_SUCCESS) {
+ #if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY)
+ XFREE(rng->drbg, rng->heap, DYNAMIC_TYPE_RNG);
+ #endif
+ rng->drbg = NULL;
+ }
}
+
+ ForceZero(seed, seedSz);
+ #ifdef WC_ASYNC_ENABLE_SHA256
+ FREE_VAR(seed, rng->heap);
+ #endif
+ }
+ else
+ ret = DRBG_CONT_FAILURE;
+
+ if (ret == DRBG_SUCCESS) {
+ rng->status = DRBG_OK;
+ ret = 0;
+ }
+ else if (ret == DRBG_CONT_FAILURE) {
+ rng->status = DRBG_CONT_FAILED;
+ ret = DRBG_CONT_FIPS_E;
+ }
+ else if (ret == DRBG_FAILURE) {
+ rng->status = DRBG_FAILED;
+ ret = RNG_FAILURE_E;
+ }
+ else {
+ rng->status = DRBG_FAILED;
}
+#endif /* HAVE_HASHDRBG */
+#endif /* CUSTOM_RAND_GENERATE_BLOCK */
return ret;
}
+WOLFSSL_ABI
+WC_RNG* wc_rng_new(byte* nonce, word32 nonceSz, void* heap)
+{
+ WC_RNG* rng;
+
+ rng = (WC_RNG*)XMALLOC(sizeof(WC_RNG), heap, DYNAMIC_TYPE_RNG);
+ if (rng) {
+ int error = _InitRng(rng, nonce, nonceSz, heap, INVALID_DEVID) != 0;
+ if (error) {
+ XFREE(rng, heap, DYNAMIC_TYPE_RNG);
+ rng = NULL;
+ }
+ }
+
+ return rng;
+}
+
+
+WOLFSSL_ABI
+void wc_rng_free(WC_RNG* rng)
+{
+ if (rng) {
+ void* heap = rng->heap;
+
+ wc_FreeRng(rng);
+ ForceZero(rng, sizeof(WC_RNG));
+ XFREE(rng, heap, DYNAMIC_TYPE_RNG);
+ (void)heap;
+ }
+}
+
+
+int wc_InitRng(WC_RNG* rng)
+{
+ return _InitRng(rng, NULL, 0, NULL, INVALID_DEVID);
+}
+
+
+int wc_InitRng_ex(WC_RNG* rng, void* heap, int devId)
+{
+ return _InitRng(rng, NULL, 0, heap, devId);
+}
+
+
+int wc_InitRngNonce(WC_RNG* rng, byte* nonce, word32 nonceSz)
+{
+ return _InitRng(rng, nonce, nonceSz, NULL, INVALID_DEVID);
+}
+
+
+int wc_InitRngNonce_ex(WC_RNG* rng, byte* nonce, word32 nonceSz,
+ void* heap, int devId)
+{
+ return _InitRng(rng, nonce, nonceSz, heap, devId);
+}
+
+
/* place a generated block in output */
-int wc_RNG_GenerateBlock(RNG* rng, byte* output, word32 sz)
+WOLFSSL_ABI
+int wc_RNG_GenerateBlock(WC_RNG* rng, byte* output, word32 sz)
{
int ret;
- if (rng == NULL || output == NULL || sz > MAX_REQUEST_LEN)
+ if (rng == NULL || output == NULL)
+ return BAD_FUNC_ARG;
+
+#ifdef WOLF_CRYPTO_CB
+ if (rng->devId != INVALID_DEVID) {
+ ret = wc_CryptoCb_RandomBlock(rng, output, sz);
+ if (ret != CRYPTOCB_UNAVAILABLE)
+ return ret;
+ /* fall-through when unavailable */
+ }
+#endif
+
+#ifdef HAVE_INTEL_RDRAND
+ if (IS_INTEL_RDRAND(intel_flags))
+ return wc_GenerateRand_IntelRD(NULL, output, sz);
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ if (rng->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RNG) {
+ /* these are blocking */
+ #ifdef HAVE_CAVIUM
+ return NitroxRngGenerateBlock(rng, output, sz);
+ #elif defined(HAVE_INTEL_QA) && defined(QAT_ENABLE_RNG)
+ return IntelQaDrbg(&rng->asyncDev, output, sz);
+ #else
+ /* simulator not supported */
+ #endif
+ }
+#endif
+
+#ifdef CUSTOM_RAND_GENERATE_BLOCK
+ XMEMSET(output, 0, sz);
+ ret = CUSTOM_RAND_GENERATE_BLOCK(output, sz);
+#else
+
+#ifdef HAVE_HASHDRBG
+ if (sz > RNG_MAX_BLOCK_LEN)
return BAD_FUNC_ARG;
if (rng->status != DRBG_OK)
return RNG_FAILURE_E;
ret = Hash_DRBG_Generate(rng->drbg, output, sz);
-
if (ret == DRBG_NEED_RESEED) {
if (wc_RNG_HealthTestLocal(1) == 0) {
- byte entropy[ENTROPY_SZ];
+ byte newSeed[SEED_SZ + SEED_BLOCK_SZ];
- if (wc_GenerateSeed(&rng->seed, entropy, ENTROPY_SZ) == 0 &&
- Hash_DRBG_Reseed(rng->drbg, entropy, ENTROPY_SZ)
- == DRBG_SUCCESS) {
-
- ret = Hash_DRBG_Generate(rng->drbg, NULL, 0);
- if (ret == DRBG_SUCCESS)
- ret = Hash_DRBG_Generate(rng->drbg, output, sz);
- }
- else
+ ret = wc_GenerateSeed(&rng->seed, newSeed,
+ SEED_SZ + SEED_BLOCK_SZ);
+ if (ret != 0)
ret = DRBG_FAILURE;
+ else
+ ret = wc_RNG_TestSeed(newSeed, SEED_SZ + SEED_BLOCK_SZ);
+
+ if (ret == DRBG_SUCCESS)
+ ret = Hash_DRBG_Reseed(rng->drbg, newSeed + SEED_BLOCK_SZ,
+ SEED_SZ);
+ if (ret == DRBG_SUCCESS)
+ ret = Hash_DRBG_Generate(rng->drbg, output, sz);
- ForceZero(entropy, ENTROPY_SZ);
+ ForceZero(newSeed, sizeof(newSeed));
}
else
ret = DRBG_CONT_FAILURE;
@@ -531,90 +974,147 @@ int wc_RNG_GenerateBlock(RNG* rng, byte* output, word32 sz)
ret = RNG_FAILURE_E;
rng->status = DRBG_FAILED;
}
+#else
+
+ /* if we get here then there is an RNG configuration error */
+ ret = RNG_FAILURE_E;
+
+#endif /* HAVE_HASHDRBG */
+#endif /* CUSTOM_RAND_GENERATE_BLOCK */
return ret;
}
-int wc_RNG_GenerateByte(RNG* rng, byte* b)
+int wc_RNG_GenerateByte(WC_RNG* rng, byte* b)
{
return wc_RNG_GenerateBlock(rng, b, 1);
}
-int wc_FreeRng(RNG* rng)
+int wc_FreeRng(WC_RNG* rng)
{
- int ret = BAD_FUNC_ARG;
+ int ret = 0;
- if (rng != NULL) {
- if (rng->drbg != NULL) {
- if (Hash_DRBG_Uninstantiate(rng->drbg) == DRBG_SUCCESS)
- ret = 0;
- else
- ret = RNG_FAILURE_E;
+ if (rng == NULL)
+ return BAD_FUNC_ARG;
- XFREE(rng->drbg, NULL, DYNAMIC_TYPE_RNG);
- rng->drbg = NULL;
- }
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ wolfAsync_DevCtxFree(&rng->asyncDev, WOLFSSL_ASYNC_MARKER_RNG);
+#endif
- rng->status = DRBG_NOT_INIT;
+#ifdef HAVE_HASHDRBG
+ if (rng->drbg != NULL) {
+ if (Hash_DRBG_Uninstantiate(rng->drbg) != DRBG_SUCCESS)
+ ret = RNG_FAILURE_E;
+
+ #if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY)
+ XFREE(rng->drbg, rng->heap, DYNAMIC_TYPE_RNG);
+ #endif
+ rng->drbg = NULL;
}
+ rng->status = DRBG_NOT_INIT;
+#endif /* HAVE_HASHDRBG */
+
return ret;
}
-
-int wc_RNG_HealthTest(int reseed, const byte* entropyA, word32 entropyASz,
- const byte* entropyB, word32 entropyBSz,
+#ifdef HAVE_HASHDRBG
+int wc_RNG_HealthTest(int reseed, const byte* seedA, word32 seedASz,
+ const byte* seedB, word32 seedBSz,
byte* output, word32 outputSz)
{
- DRBG drbg;
+ return wc_RNG_HealthTest_ex(reseed, NULL, 0,
+ seedA, seedASz, seedB, seedBSz,
+ output, outputSz,
+ NULL, INVALID_DEVID);
+}
+
+
+int wc_RNG_HealthTest_ex(int reseed, const byte* nonce, word32 nonceSz,
+ const byte* seedA, word32 seedASz,
+ const byte* seedB, word32 seedBSz,
+ byte* output, word32 outputSz,
+ void* heap, int devId)
+{
+ int ret = -1;
+ DRBG* drbg;
+#ifndef WOLFSSL_SMALL_STACK
+ DRBG drbg_var;
+#endif
- if (entropyA == NULL || output == NULL)
+ if (seedA == NULL || output == NULL) {
return BAD_FUNC_ARG;
+ }
- if (reseed != 0 && entropyB == NULL)
+ if (reseed != 0 && seedB == NULL) {
return BAD_FUNC_ARG;
+ }
- if (outputSz != (SHA256_DIGEST_SIZE * 4))
- return -1;
+ if (outputSz != RNG_HEALTH_TEST_CHECK_SIZE) {
+ return ret;
+ }
- if (Hash_DRBG_Instantiate(&drbg, entropyA, entropyASz, NULL, 0) != 0)
- return -1;
+#ifdef WOLFSSL_SMALL_STACK
+ drbg = (DRBG*)XMALLOC(sizeof(DRBG), NULL, DYNAMIC_TYPE_RNG);
+ if (drbg == NULL) {
+ return MEMORY_E;
+ }
+#else
+ drbg = &drbg_var;
+#endif
+
+ if (Hash_DRBG_Instantiate(drbg, seedA, seedASz, nonce, nonceSz,
+ heap, devId) != 0) {
+ goto exit_rng_ht;
+ }
if (reseed) {
- if (Hash_DRBG_Reseed(&drbg, entropyB, entropyBSz) != 0) {
- Hash_DRBG_Uninstantiate(&drbg);
- return -1;
+ if (Hash_DRBG_Reseed(drbg, seedB, seedBSz) != 0) {
+ goto exit_rng_ht;
}
}
- if (Hash_DRBG_Generate(&drbg, output, outputSz) != 0) {
- Hash_DRBG_Uninstantiate(&drbg);
- return -1;
+ /* This call to generate is prescribed by the NIST DRBGVS
+ * procedure. The results are thrown away. The known
+ * answer test checks the second block of DRBG out of
+ * the generator to ensure the internal state is updated
+ * as expected. */
+ if (Hash_DRBG_Generate(drbg, output, outputSz) != 0) {
+ goto exit_rng_ht;
}
- if (Hash_DRBG_Generate(&drbg, output, outputSz) != 0) {
- Hash_DRBG_Uninstantiate(&drbg);
- return -1;
+ if (Hash_DRBG_Generate(drbg, output, outputSz) != 0) {
+ goto exit_rng_ht;
}
- if (Hash_DRBG_Uninstantiate(&drbg) != 0) {
- return -1;
+ /* Mark success */
+ ret = 0;
+
+exit_rng_ht:
+
+ /* This is safe to call even if Hash_DRBG_Instantiate fails */
+ if (Hash_DRBG_Uninstantiate(drbg) != 0) {
+ ret = -1;
}
- return 0;
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(drbg, NULL, DYNAMIC_TYPE_RNG);
+#endif
+
+ return ret;
}
-const byte entropyA[] = {
+const byte seedA[] = {
0x63, 0x36, 0x33, 0x77, 0xe4, 0x1e, 0x86, 0x46, 0x8d, 0xeb, 0x0a, 0xb4,
0xa8, 0xed, 0x68, 0x3f, 0x6a, 0x13, 0x4e, 0x47, 0xe0, 0x14, 0xc7, 0x00,
0x45, 0x4e, 0x81, 0xe9, 0x53, 0x58, 0xa5, 0x69, 0x80, 0x8a, 0xa3, 0x8f,
0x2a, 0x72, 0xa6, 0x23, 0x59, 0x91, 0x5a, 0x9f, 0x8a, 0x04, 0xca, 0x68
};
-const byte reseedEntropyA[] = {
+const byte reseedSeedA[] = {
0xe6, 0x2b, 0x8a, 0x8e, 0xe8, 0xf1, 0x41, 0xb6, 0x98, 0x05, 0x66, 0xe3,
0xbf, 0xe3, 0xc0, 0x49, 0x03, 0xda, 0xd4, 0xac, 0x2c, 0xdf, 0x9f, 0x22,
0x80, 0x01, 0x0a, 0x67, 0x39, 0xbc, 0x83, 0xd3
@@ -634,11 +1134,12 @@ const byte outputA[] = {
0xa1, 0x80, 0x18, 0x3a, 0x07, 0xdf, 0xae, 0x17
};
-const byte entropyB[] = {
+const byte seedB[] = {
0xa6, 0x5a, 0xd0, 0xf3, 0x45, 0xdb, 0x4e, 0x0e, 0xff, 0xe8, 0x75, 0xc3,
0xa2, 0xe7, 0x1f, 0x42, 0xc7, 0x12, 0x9d, 0x62, 0x0f, 0xf5, 0xc1, 0x19,
- 0xa9, 0xef, 0x55, 0xf0, 0x51, 0x85, 0xe0, 0xfb, 0x85, 0x81, 0xf9, 0x31,
- 0x75, 0x17, 0x27, 0x6e, 0x06, 0xe9, 0x60, 0x7d, 0xdb, 0xcb, 0xcc, 0x2e
+ 0xa9, 0xef, 0x55, 0xf0, 0x51, 0x85, 0xe0, 0xfb, /* nonce next */
+ 0x85, 0x81, 0xf9, 0x31, 0x75, 0x17, 0x27, 0x6e, 0x06, 0xe9, 0x60, 0x7d,
+ 0xdb, 0xcb, 0xcc, 0x2e
};
const byte outputB[] = {
@@ -659,320 +1160,423 @@ const byte outputB[] = {
static int wc_RNG_HealthTestLocal(int reseed)
{
int ret = 0;
- byte check[SHA256_DIGEST_SIZE * 4];
+#ifdef WOLFSSL_SMALL_STACK
+ byte* check;
+#else
+ byte check[RNG_HEALTH_TEST_CHECK_SIZE];
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ check = (byte*)XMALLOC(RNG_HEALTH_TEST_CHECK_SIZE, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (check == NULL) {
+ return MEMORY_E;
+ }
+#endif
if (reseed) {
- ret = wc_RNG_HealthTest(1, entropyA, sizeof(entropyA),
- reseedEntropyA, sizeof(reseedEntropyA),
- check, sizeof(check));
+ ret = wc_RNG_HealthTest(1, seedA, sizeof(seedA),
+ reseedSeedA, sizeof(reseedSeedA),
+ check, RNG_HEALTH_TEST_CHECK_SIZE);
if (ret == 0) {
- if (ConstantCompare(check, outputA, sizeof(check)) != 0)
+ if (ConstantCompare(check, outputA,
+ RNG_HEALTH_TEST_CHECK_SIZE) != 0)
ret = -1;
}
}
else {
- ret = wc_RNG_HealthTest(0, entropyB, sizeof(entropyB),
+ ret = wc_RNG_HealthTest(0, seedB, sizeof(seedB),
NULL, 0,
- check, sizeof(check));
+ check, RNG_HEALTH_TEST_CHECK_SIZE);
if (ret == 0) {
- if (ConstantCompare(check, outputB, sizeof(check)) != 0)
+ if (ConstantCompare(check, outputB,
+ RNG_HEALTH_TEST_CHECK_SIZE) != 0)
ret = -1;
}
+
+ /* The previous test cases use a large seed instead of a seed and nonce.
+ * seedB is actually from a test case with a seed and nonce, and
+ * just concatenates them. The pivot point between seed and nonce is
+ * byte 32, feed them into the health test separately. */
+ if (ret == 0) {
+ ret = wc_RNG_HealthTest_ex(0,
+ seedB + 32, sizeof(seedB) - 32,
+ seedB, 32,
+ NULL, 0,
+ check, RNG_HEALTH_TEST_CHECK_SIZE,
+ NULL, INVALID_DEVID);
+ if (ret == 0) {
+ if (ConstantCompare(check, outputB, sizeof(outputB)) != 0)
+ ret = -1;
+ }
+ }
}
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(check, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
return ret;
}
+#endif /* HAVE_HASHDRBG */
-#else /* HAVE_HASHDRBG || NO_RC4 */
-/* Get seed and key cipher */
-int wc_InitRng(RNG* rng)
+#ifdef HAVE_WNR
+
+/*
+ * Init global Whitewood netRandom context
+ * Returns 0 on success, negative on error
+ */
+int wc_InitNetRandom(const char* configFile, wnr_hmac_key hmac_cb, int timeout)
{
- int ret;
-#ifdef WOLFSSL_SMALL_STACK
- byte* key;
- byte* junk;
-#else
- byte key[32];
- byte junk[256];
-#endif
+ if (configFile == NULL || timeout < 0)
+ return BAD_FUNC_ARG;
-#ifdef HAVE_INTEL_RDGEN
- wc_InitRng_IntelRD() ;
- if(IS_INTEL_RDRAND)return 0 ;
-#endif
-#ifdef HAVE_CAVIUM
- if (rng->magic == WOLFSSL_RNG_CAVIUM_MAGIC)
+ if (wnr_mutex_init > 0) {
+ WOLFSSL_MSG("netRandom context already created, skipping");
return 0;
-#endif
-
-#ifdef WOLFSSL_SMALL_STACK
- key = (byte*)XMALLOC(32, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (key == NULL)
- return MEMORY_E;
+ }
- junk = (byte*)XMALLOC(256, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (junk == NULL) {
- XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- return MEMORY_E;
+ if (wc_InitMutex(&wnr_mutex) != 0) {
+ WOLFSSL_MSG("Bad Init Mutex wnr_mutex");
+ return BAD_MUTEX_E;
}
-#endif
+ wnr_mutex_init = 1;
- ret = wc_GenerateSeed(&rng->seed, key, 32);
+ if (wc_LockMutex(&wnr_mutex) != 0) {
+ WOLFSSL_MSG("Bad Lock Mutex wnr_mutex");
+ return BAD_MUTEX_E;
+ }
- if (ret == 0) {
- wc_Arc4SetKey(&rng->cipher, key, sizeof(key));
+ /* store entropy timeout */
+ wnr_timeout = timeout;
- ret = wc_RNG_GenerateBlock(rng, junk, 256); /*rid initial state*/
+ /* create global wnr_context struct */
+ if (wnr_create(&wnr_ctx) != WNR_ERROR_NONE) {
+ WOLFSSL_MSG("Error creating global netRandom context");
+ return RNG_FAILURE_E;
}
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- XFREE(junk, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+ /* load config file */
+ if (wnr_config_loadf(wnr_ctx, (char*)configFile) != WNR_ERROR_NONE) {
+ WOLFSSL_MSG("Error loading config file into netRandom context");
+ wnr_destroy(wnr_ctx);
+ wnr_ctx = NULL;
+ return RNG_FAILURE_E;
+ }
- return ret;
-}
+ /* create/init polling mechanism */
+ if (wnr_poll_create() != WNR_ERROR_NONE) {
+ printf("ERROR: wnr_poll_create() failed\n");
+ WOLFSSL_MSG("Error initializing netRandom polling mechanism");
+ wnr_destroy(wnr_ctx);
+ wnr_ctx = NULL;
+ return RNG_FAILURE_E;
+ }
-#ifdef HAVE_CAVIUM
- static void CaviumRNG_GenerateBlock(RNG* rng, byte* output, word32 sz);
-#endif
+ /* validate config, set HMAC callback (optional) */
+ if (wnr_setup(wnr_ctx, hmac_cb) != WNR_ERROR_NONE) {
+ WOLFSSL_MSG("Error setting up netRandom context");
+ wnr_destroy(wnr_ctx);
+ wnr_ctx = NULL;
+ wnr_poll_destroy();
+ return RNG_FAILURE_E;
+ }
-/* place a generated block in output */
-int wc_RNG_GenerateBlock(RNG* rng, byte* output, word32 sz)
-{
-#ifdef HAVE_INTEL_RDGEN
- if(IS_INTEL_RDRAND)
- return wc_GenerateRand_IntelRD(NULL, output, sz) ;
-#endif
-#ifdef HAVE_CAVIUM
- if (rng->magic == WOLFSSL_RNG_CAVIUM_MAGIC)
- return CaviumRNG_GenerateBlock(rng, output, sz);
-#endif
- XMEMSET(output, 0, sz);
- wc_Arc4Process(&rng->cipher, output, output, sz);
+ wc_UnLockMutex(&wnr_mutex);
return 0;
}
-
-int wc_RNG_GenerateByte(RNG* rng, byte* b)
+/*
+ * Free global Whitewood netRandom context
+ * Returns 0 on success, negative on error
+ */
+int wc_FreeNetRandom(void)
{
- return wc_RNG_GenerateBlock(rng, b, 1);
-}
+ if (wnr_mutex_init > 0) {
+ if (wc_LockMutex(&wnr_mutex) != 0) {
+ WOLFSSL_MSG("Bad Lock Mutex wnr_mutex");
+ return BAD_MUTEX_E;
+ }
+
+ if (wnr_ctx != NULL) {
+ wnr_destroy(wnr_ctx);
+ wnr_ctx = NULL;
+ }
+ wnr_poll_destroy();
+
+ wc_UnLockMutex(&wnr_mutex);
+
+ wc_FreeMutex(&wnr_mutex);
+ wnr_mutex_init = 0;
+ }
-int wc_FreeRng(RNG* rng)
-{
- (void)rng;
return 0;
}
+#endif /* HAVE_WNR */
-#ifdef HAVE_CAVIUM
-#include <wolfssl/ctaocrypt/logging.h>
-#include "cavium_common.h"
+#if defined(HAVE_INTEL_RDRAND) || defined(HAVE_INTEL_RDSEED)
-/* Initiliaze RNG for use with Nitrox device */
-int wc_InitRngCavium(RNG* rng, int devId)
-{
- if (rng == NULL)
- return -1;
-
- rng->devId = devId;
- rng->magic = WOLFSSL_RNG_CAVIUM_MAGIC;
+#ifdef WOLFSSL_ASYNC_CRYPT
+ /* need more retries if multiple cores */
+ #define INTELRD_RETRY (32 * 8)
+#else
+ #define INTELRD_RETRY 32
+#endif
- return 0;
-}
+#ifdef HAVE_INTEL_RDSEED
+#ifndef USE_WINDOWS_API
-static void CaviumRNG_GenerateBlock(RNG* rng, byte* output, word32 sz)
-{
- wolfssl_word offset = 0;
- word32 requestId;
+ /* return 0 on success */
+ static WC_INLINE int IntelRDseed64(word64* seed)
+ {
+ unsigned char ok;
- while (sz > WOLFSSL_MAX_16BIT) {
- word16 slen = (word16)WOLFSSL_MAX_16BIT;
- if (CspRandom(CAVIUM_BLOCKING, slen, output + offset, &requestId,
- rng->devId) != 0) {
- WOLFSSL_MSG("Cavium RNG failed");
- }
- sz -= WOLFSSL_MAX_16BIT;
- offset += WOLFSSL_MAX_16BIT;
- }
- if (sz) {
- word16 slen = (word16)sz;
- if (CspRandom(CAVIUM_BLOCKING, slen, output + offset, &requestId,
- rng->devId) != 0) {
- WOLFSSL_MSG("Cavium RNG failed");
- }
+ __asm__ volatile("rdseed %0; setc %1":"=r"(*seed), "=qm"(ok));
+ return (ok) ? 0 : -1;
}
-}
-#endif /* HAVE_CAVIUM */
+#else /* USE_WINDOWS_API */
+ /* The compiler Visual Studio uses does not allow inline assembly.
+ * It does allow for Intel intrinsic functions. */
-#endif /* HAVE_HASHDRBG || NO_RC4 */
+ /* return 0 on success */
+ static WC_INLINE int IntelRDseed64(word64* seed)
+ {
+ int ok;
+ ok = _rdseed64_step(seed);
+ return (ok) ? 0 : -1;
+ }
-#if defined(HAVE_INTEL_RDGEN)
+#endif /* USE_WINDOWS_API */
-#ifndef _MSC_VER
- #define cpuid(reg, leaf, sub)\
- __asm__ __volatile__ ("cpuid":\
- "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
- "a" (leaf), "c"(sub));
+/* return 0 on success */
+static WC_INLINE int IntelRDseed64_r(word64* rnd)
+{
+ int i;
+ for (i = 0; i < INTELRD_RETRY; i++) {
+ if (IntelRDseed64(rnd) == 0)
+ return 0;
+ }
+ return -1;
+}
- #define XASM_LINK(f) asm(f)
-#else
+/* return 0 on success */
+static int wc_GenerateSeed_IntelRD(OS_Seed* os, byte* output, word32 sz)
+{
+ int ret;
+ word64 rndTmp;
- #include <intrin.h>
- #define cpuid(a,b) __cpuid((int*)a,b)
+ (void)os;
- #define XASM_LINK(f)
+ if (!IS_INTEL_RDSEED(intel_flags))
+ return -1;
-#endif /* _MSC_VER */
+ for (; (sz / sizeof(word64)) > 0; sz -= sizeof(word64),
+ output += sizeof(word64)) {
+ ret = IntelRDseed64_r((word64*)output);
+ if (ret != 0)
+ return ret;
+ }
+ if (sz == 0)
+ return 0;
-#define EAX 0
-#define EBX 1
-#define ECX 2
-#define EDX 3
+ /* handle unaligned remainder */
+ ret = IntelRDseed64_r(&rndTmp);
+ if (ret != 0)
+ return ret;
-static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
- int got_intel_cpu=0;
- unsigned int reg[5];
-
- reg[4] = '\0' ;
- cpuid(reg, 0, 0);
- if(memcmp((char *)&(reg[EBX]), "Genu", 4) == 0 &&
- memcmp((char *)&(reg[EDX]), "ineI", 4) == 0 &&
- memcmp((char *)&(reg[ECX]), "ntel", 4) == 0) {
- got_intel_cpu = 1;
- }
- if (got_intel_cpu) {
- cpuid(reg, leaf, sub);
- return((reg[num]>>bit)&0x1) ;
- }
- return 0 ;
-}
+ XMEMCPY(output, &rndTmp, sz);
+ ForceZero(&rndTmp, sizeof(rndTmp));
-static int wc_InitRng_IntelRD()
-{
- if(cpuid_check==0) {
- if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ;}
- if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ;}
- cpuid_check = 1 ;
- }
- return 1 ;
+ return 0;
}
-#define INTELRD_RETRY 10
+#endif /* HAVE_INTEL_RDSEED */
-#if defined(HAVE_HASHDRBG) || defined(NO_RC4)
+#ifdef HAVE_INTEL_RDRAND
-/* return 0 on success */
-static inline int IntelRDseed32(unsigned int *seed)
-{
- int rdseed; unsigned char ok ;
-
- __asm__ volatile("rdseed %0; setc %1":"=r"(rdseed), "=qm"(ok));
- if(ok){
- *seed = rdseed ;
- return 0 ;
- } else
- return 1;
-}
+#ifndef USE_WINDOWS_API
/* return 0 on success */
-static inline int IntelRDseed32_r(unsigned int *rnd)
-{
- int i ;
- for(i=0; i<INTELRD_RETRY;i++) {
- if(IntelRDseed32(rnd) == 0) return 0 ;
- }
- return 1 ;
+static WC_INLINE int IntelRDrand64(word64 *rnd)
+{
+ unsigned char ok;
+
+ __asm__ volatile("rdrand %0; setc %1":"=r"(*rnd), "=qm"(ok));
+
+ return (ok) ? 0 : -1;
}
+#else /* USE_WINDOWS_API */
+ /* The compiler Visual Studio uses does not allow inline assembly.
+ * It does allow for Intel intrinsic functions. */
+
/* return 0 on success */
-static int wc_GenerateSeed_IntelRD(OS_Seed* os, byte* output, word32 sz)
+static WC_INLINE int IntelRDrand64(word64 *rnd)
{
- (void) os ;
- int ret ;
- unsigned int rndTmp ;
-
- for( ; sz/4 > 0; sz-=4, output+=4) {
- if(IS_INTEL_RDSEED)ret = IntelRDseed32_r((word32 *)output) ;
- else return 1 ;
- if(ret)
- return 1 ;
- }
- if(sz == 0)return 0 ;
-
- if(IS_INTEL_RDSEED)ret = IntelRDseed32_r(&rndTmp) ;
- else return 1 ;
- if(ret)
- return 1 ;
- XMEMCPY(output, &rndTmp, sz) ;
- return 0;
-}
+ int ok;
-#else
+ ok = _rdrand64_step(rnd);
-/* return 0 on success */
-static inline int IntelRDrand32(unsigned int *rnd)
-{
- int rdrand; unsigned char ok ;
- __asm__ volatile("rdrand %0; setc %1":"=r"(rdrand), "=qm"(ok));
- if(ok){
- *rnd = rdrand;
- return 0 ;
- } else
- return 1;
+ return (ok) ? 0 : -1;
}
+#endif /* USE_WINDOWS_API */
+
/* return 0 on success */
-static inline int IntelRDrand32_r(unsigned int *rnd)
-{
- int i ;
- for(i=0; i<INTELRD_RETRY;i++) {
- if(IntelRDrand32(rnd) == 0) return 0 ;
+static WC_INLINE int IntelRDrand64_r(word64 *rnd)
+{
+ int i;
+ for (i = 0; i < INTELRD_RETRY; i++) {
+ if (IntelRDrand64(rnd) == 0)
+ return 0;
}
- return 1 ;
+ return -1;
}
/* return 0 on success */
static int wc_GenerateRand_IntelRD(OS_Seed* os, byte* output, word32 sz)
{
- (void) os ;
- int ret ;
- unsigned int rndTmp;
-
- for( ; sz/4 > 0; sz-=4, output+=4) {
- if(IS_INTEL_RDRAND)ret = IntelRDrand32_r((word32 *)output);
- else return 1 ;
- if(ret)
- return 1 ;
- }
- if(sz == 0)return 0 ;
-
- if(IS_INTEL_RDRAND)ret = IntelRDrand32_r(&rndTmp);
- else return 1 ;
- if(ret)
- return 1 ;
- XMEMCPY(output, &rndTmp, sz) ;
+ int ret;
+ word64 rndTmp;
+
+ (void)os;
+
+ if (!IS_INTEL_RDRAND(intel_flags))
+ return -1;
+
+ for (; (sz / sizeof(word64)) > 0; sz -= sizeof(word64),
+ output += sizeof(word64)) {
+ ret = IntelRDrand64_r((word64 *)output);
+ if (ret != 0)
+ return ret;
+ }
+ if (sz == 0)
+ return 0;
+
+ /* handle unaligned remainder */
+ ret = IntelRDrand64_r(&rndTmp);
+ if (ret != 0)
+ return ret;
+
+ XMEMCPY(output, &rndTmp, sz);
+
return 0;
}
-#endif /* defined(HAVE_HASHDRBG) || defined(NO_RC4) */
-#endif /* HAVE_INTEL_RDGEN */
+#endif /* HAVE_INTEL_RDRAND */
+#endif /* HAVE_INTEL_RDRAND || HAVE_INTEL_RDSEED */
+
+
+/* Begin wc_GenerateSeed Implementations */
+#if defined(CUSTOM_RAND_GENERATE_SEED)
+
+ /* Implement your own random generation function
+ * Return 0 to indicate success
+ * int rand_gen_seed(byte* output, word32 sz);
+ * #define CUSTOM_RAND_GENERATE_SEED rand_gen_seed */
+
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ (void)os; /* Suppress unused arg warning */
+ return CUSTOM_RAND_GENERATE_SEED(output, sz);
+ }
+
+#elif defined(CUSTOM_RAND_GENERATE_SEED_OS)
+
+ /* Implement your own random generation function,
+ * which includes OS_Seed.
+ * Return 0 to indicate success
+ * int rand_gen_seed(OS_Seed* os, byte* output, word32 sz);
+ * #define CUSTOM_RAND_GENERATE_SEED_OS rand_gen_seed */
+
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ return CUSTOM_RAND_GENERATE_SEED_OS(os, output, sz);
+ }
+
+#elif defined(CUSTOM_RAND_GENERATE)
+
+ /* Implement your own random generation function
+ * word32 rand_gen(void);
+ * #define CUSTOM_RAND_GENERATE rand_gen */
+
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ word32 i = 0;
+
+ (void)os;
+
+ while (i < sz)
+ {
+ /* If not aligned or there is odd/remainder */
+ if( (i + sizeof(CUSTOM_RAND_TYPE)) > sz ||
+ ((wolfssl_word)&output[i] % sizeof(CUSTOM_RAND_TYPE)) != 0
+ ) {
+ /* Single byte at a time */
+ output[i++] = (byte)CUSTOM_RAND_GENERATE();
+ }
+ else {
+ /* Use native 8, 16, 32 or 64 copy instruction */
+ *((CUSTOM_RAND_TYPE*)&output[i]) = CUSTOM_RAND_GENERATE();
+ i += sizeof(CUSTOM_RAND_TYPE);
+ }
+ }
+ return 0;
+ }
+
+#elif defined(WOLFSSL_SGX)
+
+int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+{
+ int ret = !SGX_SUCCESS;
+ int i, read_max = 10;
+
+ for (i = 0; i < read_max && ret != SGX_SUCCESS; i++) {
+ ret = sgx_read_rand(output, sz);
+ }
-#if defined(USE_WINDOWS_API)
+ (void)os;
+ return (ret == SGX_SUCCESS) ? 0 : 1;
+}
+#elif defined(USE_WINDOWS_API)
int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
{
+#ifdef WOLF_CRYPTO_CB
+ int ret;
+
+ if (os != NULL && os->devId != INVALID_DEVID) {
+ ret = wc_CryptoCb_RandomSeed(os, output, sz);
+ if (ret != CRYPTOCB_UNAVAILABLE)
+ return ret;
+ /* fall-through when unavailable */
+ }
+#endif
+
+ #ifdef HAVE_INTEL_RDSEED
+ if (IS_INTEL_RDSEED(intel_flags)) {
+ if (!wc_GenerateSeed_IntelRD(NULL, output, sz)) {
+ /* success, we're done */
+ return 0;
+ }
+ #ifdef FORCE_FAILURE_RDSEED
+ /* don't fall back to CryptoAPI */
+ return READ_RAN_E;
+ #endif
+ }
+ #endif /* HAVE_INTEL_RDSEED */
+
if(!CryptAcquireContext(&os->handle, 0, 0, PROV_RSA_FULL,
CRYPT_VERIFYCONTEXT))
return WINCRYPT_E;
@@ -991,92 +1595,85 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
#include "rtprand.h" /* rtp_rand () */
#include "rtptime.h" /* rtp_get_system_msec() */
-
int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
{
- int i;
- rtp_srand(rtp_get_system_msec());
+ word32 i;
+ rtp_srand(rtp_get_system_msec());
for (i = 0; i < sz; i++ ) {
output[i] = rtp_rand() % 256;
- if ( (i % 8) == 7)
- rtp_srand(rtp_get_system_msec());
}
return 0;
}
-#elif defined(MICRIUM)
-
-int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
-{
- #if (NET_SECURE_MGR_CFG_EN == DEF_ENABLED)
- NetSecure_InitSeed(output, sz);
- #endif
- return 0;
-}
-
-#elif defined(MBED)
-
-/* write a real one !!!, just for testing board */
-int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
-{
- int i;
- for (i = 0; i < sz; i++ )
- output[i] = i;
-
- return 0;
-}
-
#elif defined(MICROCHIP_PIC32)
-#ifdef MICROCHIP_MPLAB_HARMONY
- #define PIC32_SEED_COUNT _CP0_GET_COUNT
-#else
- #if !defined(WOLFSSL_MICROCHIP_PIC32MZ)
- #include <peripheral/timer.h>
+ #ifdef MICROCHIP_MPLAB_HARMONY
+ #ifdef MICROCHIP_MPLAB_HARMONY_3
+ #include "system/time/sys_time.h"
+ #define PIC32_SEED_COUNT SYS_TIME_CounterGet
+ #else
+ #define PIC32_SEED_COUNT _CP0_GET_COUNT
+ #endif
+ #else
+ #if !defined(WOLFSSL_MICROCHIP_PIC32MZ)
+ #include <peripheral/timer.h>
+ #endif
+ extern word32 ReadCoreTimer(void);
+ #define PIC32_SEED_COUNT ReadCoreTimer
#endif
- #define PIC32_SEED_COUNT ReadCoreTimer
-#endif
- #ifdef WOLFSSL_MIC32MZ_RNG
+
+ #ifdef WOLFSSL_PIC32MZ_RNG
#include "xc.h"
int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
{
- int i ;
- byte rnd[8] ;
- word32 *rnd32 = (word32 *)rnd ;
- word32 size = sz ;
- byte* op = output ;
-
- /* This part has to be replaced with better random seed */
- RNGNUMGEN1 = ReadCoreTimer();
- RNGPOLY1 = ReadCoreTimer();
- RNGPOLY2 = ReadCoreTimer();
- RNGNUMGEN2 = ReadCoreTimer();
-#ifdef DEBUG_WOLFSSL
- printf("GenerateSeed::Seed=%08x, %08x\n", RNGNUMGEN1, RNGNUMGEN2) ;
+ int i;
+ byte rnd[8];
+ word32 *rnd32 = (word32 *)rnd;
+ word32 size = sz;
+ byte* op = output;
+
+#if ((__PIC32_FEATURE_SET0 == 'E') && (__PIC32_FEATURE_SET1 == 'C'))
+ RNGNUMGEN1 = _CP0_GET_COUNT();
+ RNGPOLY1 = _CP0_GET_COUNT();
+ RNGPOLY2 = _CP0_GET_COUNT();
+ RNGNUMGEN2 = _CP0_GET_COUNT();
+#else
+ // All others can be seeded from the TRNG
+ RNGCONbits.TRNGMODE = 1;
+ RNGCONbits.TRNGEN = 1;
+ while (RNGCNT < 64);
+ RNGCONbits.LOAD = 1;
+ while (RNGCONbits.LOAD == 1);
+ while (RNGCNT < 64);
+ RNGPOLY2 = RNGSEED2;
+ RNGPOLY1 = RNGSEED1;
#endif
+
RNGCONbits.PLEN = 0x40;
RNGCONbits.PRNGEN = 1;
- for(i=0; i<5; i++) { /* wait for RNGNUMGEN ready */
- volatile int x ;
- x = RNGNUMGEN1 ;
- x = RNGNUMGEN2 ;
+ for (i=0; i<5; i++) { /* wait for RNGNUMGEN ready */
+ volatile int x, y;
+ x = RNGNUMGEN1;
+ y = RNGNUMGEN2;
+ (void)x;
+ (void)y;
}
do {
rnd32[0] = RNGNUMGEN1;
rnd32[1] = RNGNUMGEN2;
for(i=0; i<8; i++, op++) {
- *op = rnd[i] ;
- size -- ;
- if(size==0)break ;
+ *op = rnd[i];
+ size --;
+ if(size==0)break;
}
- } while(size) ;
+ } while(size);
return 0;
}
- #else /* WOLFSSL_MIC32MZ_RNG */
+ #else /* WOLFSSL_PIC32MZ_RNG */
/* uses the core timer, in nanoseconds to seed srand */
int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
{
@@ -1090,11 +1687,12 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
}
return 0;
}
- #endif /* WOLFSSL_MIC32MZ_RNG */
+ #endif /* WOLFSSL_PIC32MZ_RNG */
-#elif defined(FREESCALE_MQX)
+#elif defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX) || \
+ defined(FREESCALE_KSDK_BM) || defined(FREESCALE_FREE_RTOS)
- #ifdef FREESCALE_K70_RNGA
+ #if defined(FREESCALE_K70_RNGA) || defined(FREESCALE_RNGA)
/*
* wc_Generates a RNG seed using the Random Number Generator Accelerator
* on the Kinetis K70. Documentation located in Chapter 37 of
@@ -1102,10 +1700,16 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
*/
int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
{
- int i;
+ word32 i;
/* turn on RNGA module */
- SIM_SCGC3 |= SIM_SCGC3_RNGA_MASK;
+ #if defined(SIM_SCGC3_RNGA_MASK)
+ SIM_SCGC3 |= SIM_SCGC3_RNGA_MASK;
+ #endif
+ #if defined(SIM_SCGC6_RNGA_MASK)
+ /* additionally needed for at least K64F */
+ SIM_SCGC6 |= SIM_SCGC6_RNGA_MASK;
+ #endif
/* set SLP bit to 0 - "RNGA is not in sleep mode" */
RNG_CR &= ~RNG_CR_SLP_MASK;
@@ -1128,7 +1732,7 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
return 0;
}
- #elif defined(FREESCALE_K53_RNGB)
+ #elif defined(FREESCALE_K53_RNGB) || defined(FREESCALE_RNGB)
/*
* wc_Generates a RNG seed using the Random Number Generator (RNGB)
* on the Kinetis K53. Documentation located in Chapter 33 of
@@ -1166,78 +1770,191 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
return 0;
}
- #else
- #warning "write a real random seed!!!!, just for testing now"
+ #elif defined(FREESCALE_KSDK_2_0_TRNG)
int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
{
- int i;
- for (i = 0; i < sz; i++ )
- output[i] = i;
+ status_t status;
+ status = TRNG_GetRandomData(TRNG0, output, sz);
+ if (status == kStatus_Success)
+ {
+ return(0);
+ }
+ else
+ {
+ return RAN_BLOCK_E;
+ }
+ }
- return 0;
+ #elif defined(FREESCALE_KSDK_2_0_RNGA)
+
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ status_t status;
+ status = RNGA_GetRandomData(RNG, output, sz);
+ if (status == kStatus_Success)
+ {
+ return(0);
+ }
+ else
+ {
+ return RAN_BLOCK_E;
+ }
}
- #endif /* FREESCALE_K70_RNGA */
-#elif defined(WOLFSSL_SAFERTOS) || defined(WOLFSSL_LEANPSK) \
- || defined(WOLFSSL_IAR_ARM) || defined(WOLFSSL_MDK_ARM) \
- || defined(WOLFSSL_uITRON4) || defined(WOLFSSL_uTKERNEL2)
-#warning "write a real random seed!!!!, just for testing now"
+ #elif defined(FREESCALE_RNGA)
-int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
-{
- word32 i;
- for (i = 0; i < sz; i++ )
- output[i] = i;
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ RNGA_DRV_GetRandomData(RNGA_INSTANCE, output, sz);
+ return 0;
+ }
- (void)os;
+ #else
+ #define USE_TEST_GENSEED
+ #endif /* FREESCALE_K70_RNGA */
- return 0;
-}
+#elif defined(STM32_RNG)
+ /* Generate a RNG seed using the hardware random number generator
+ * on the STM32F2/F4/F7/L4. */
-#elif defined(STM32F2_RNG)
- #undef RNG
- #include "stm32f2xx_rng.h"
- #include "stm32f2xx_rcc.h"
- /*
- * wc_Generate a RNG seed using the hardware random number generator
- * on the STM32F2. Documentation located in STM32F2xx Standard Peripheral
- * Library document (See note in README).
- */
+ #ifdef WOLFSSL_STM32_CUBEMX
int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
{
- int i;
+ int ret;
+ RNG_HandleTypeDef hrng;
+ word32 i = 0;
+ (void)os;
+
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret != 0) {
+ return ret;
+ }
/* enable RNG clock source */
- RCC_AHB2PeriphClockCmd(RCC_AHB2Periph_RNG, ENABLE);
+ __HAL_RCC_RNG_CLK_ENABLE();
/* enable RNG peripheral */
- RNG_Cmd(ENABLE);
+ XMEMSET(&hrng, 0, sizeof(hrng));
+ hrng.Instance = RNG;
+ HAL_RNG_Init(&hrng);
+
+ while (i < sz) {
+ /* If not aligned or there is odd/remainder */
+ if( (i + sizeof(word32)) > sz ||
+ ((wolfssl_word)&output[i] % sizeof(word32)) != 0
+ ) {
+ /* Single byte at a time */
+ uint32_t tmpRng = 0;
+ if (HAL_RNG_GenerateRandomNumber(&hrng, &tmpRng) != HAL_OK) {
+ wolfSSL_CryptHwMutexUnLock();
+ return RAN_BLOCK_E;
+ }
+ output[i++] = (byte)tmpRng;
+ }
+ else {
+ /* Use native 32 instruction */
+ if (HAL_RNG_GenerateRandomNumber(&hrng, (uint32_t*)&output[i]) != HAL_OK) {
+ wolfSSL_CryptHwMutexUnLock();
+ return RAN_BLOCK_E;
+ }
+ i += sizeof(word32);
+ }
+ }
+
+ wolfSSL_CryptHwMutexUnLock();
+
+ return 0;
+ }
+ #elif defined(WOLFSSL_STM32F427_RNG) || defined(WOLFSSL_STM32_RNG_NOLIB)
+
+ /* Generate a RNG seed using the hardware RNG on the STM32F427
+ * directly, following steps outlined in STM32F4 Reference
+ * Manual (Chapter 24) for STM32F4xx family. */
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ int ret;
+ word32 i;
+ (void)os;
+
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret != 0) {
+ return ret;
+ }
+
+ /* enable RNG peripheral clock */
+ RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN;
+
+ /* enable RNG interrupt, set IE bit in RNG->CR register */
+ RNG->CR |= RNG_CR_IE;
+
+ /* enable RNG, set RNGEN bit in RNG->CR. Activates RNG,
+ * RNG_LFSR, and error detector */
+ RNG->CR |= RNG_CR_RNGEN;
+
+ /* verify no errors, make sure SEIS and CEIS bits are 0
+ * in RNG->SR register */
+ if (RNG->SR & (RNG_SR_SECS | RNG_SR_CECS)) {
+ wolfSSL_CryptHwMutexUnLock();
+ return RNG_FAILURE_E;
+ }
for (i = 0; i < sz; i++) {
/* wait until RNG number is ready */
- while(RNG_GetFlagStatus(RNG_FLAG_DRDY)== RESET) { }
+ while ((RNG->SR & RNG_SR_DRDY) == 0) { }
/* get value */
- output[i] = RNG_GetRandomNumber();
+ output[i] = RNG->DR;
}
+ wolfSSL_CryptHwMutexUnLock();
+
return 0;
}
-#elif defined(WOLFSSL_LPC43xx) || defined(WOLFSSL_STM32F2xx)
- #warning "write a real random seed!!!!, just for testing now"
+ #else
+ /* Generate a RNG seed using the STM32 Standard Peripheral Library */
int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
{
- int i;
+ int ret;
+ word32 i;
+ (void)os;
- for (i = 0; i < sz; i++ )
- output[i] = i;
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret != 0) {
+ return ret;
+ }
+
+ /* enable RNG clock source */
+ RCC_AHB2PeriphClockCmd(RCC_AHB2Periph_RNG, ENABLE);
+
+ /* reset RNG */
+ RNG_DeInit();
+
+ /* enable RNG peripheral */
+ RNG_Cmd(ENABLE);
+
+ /* verify no errors with RNG_CLK or Seed */
+ if (RNG_GetFlagStatus(RNG_FLAG_SECS | RNG_FLAG_CECS) != RESET) {
+ wolfSSL_CryptHwMutexUnLock();
+ return RNG_FAILURE_E;
+ }
+
+ for (i = 0; i < sz; i++) {
+ /* wait until RNG number is ready */
+ while (RNG_GetFlagStatus(RNG_FLAG_DRDY) == RESET) { }
+
+ /* get value */
+ output[i] = RNG_GetRandomNumber();
+ }
+
+ wolfSSL_CryptHwMutexUnLock();
return 0;
}
+ #endif /* WOLFSSL_STM32_CUBEMX */
#elif defined(WOLFSSL_TIRTOS)
@@ -1258,82 +1975,578 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
return 0;
}
-#elif defined(CUSTOM_RAND_GENERATE)
-
- /* Implement your own random generation function
- * word32 rand_gen(void);
- * #define CUSTOM_RAND_GENERATE rand_gen */
+#elif defined(WOLFSSL_PB)
int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
{
word32 i;
+ for (i = 0; i < sz; i++)
+ output[i] = UTL_Rand();
(void)os;
- for (i = 0; i < sz; i++ )
- output[i] = CUSTOM_RAND_GENERATE();
-
return 0;
}
-#elif defined(NO_DEV_RANDOM)
-
-#error "you need to write an os specific wc_GenerateSeed() here"
+#elif defined(WOLFSSL_NUCLEUS)
+#include "nucleus.h"
+#include "kernel/plus_common.h"
-/*
+#warning "potential for not enough entropy, currently being used for testing"
int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
{
+ int i;
+ srand(NU_Get_Time_Stamp());
+
+ for (i = 0; i < sz; i++ ) {
+ output[i] = rand() % 256;
+ if ((i % 8) == 7) {
+ srand(NU_Get_Time_Stamp());
+ }
+ }
+
return 0;
}
-*/
+#elif defined(WOLFSSL_DEOS) && !defined(CUSTOM_RAND_GENERATE)
+ #include "stdlib.h"
+ #warning "potential for not enough entropy, currently being used for testing Deos"
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ int i;
+ int seed = XTIME(0);
+ (void)os;
-#else /* !USE_WINDOWS_API && !HAVE_RPT_SYS && !MICRIUM && !NO_DEV_RANDOM */
+ for (i = 0; i < sz; i++ ) {
+ output[i] = rand_r(&seed) % 256;
+ if ((i % 8) == 7) {
+ seed = XTIME(0);
+ rand_r(&seed);
+ }
+ }
-/* may block */
-int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
-{
- int ret = 0;
+ return 0;
+ }
+#elif defined(WOLFSSL_VXWORKS)
+ #include <randomNumGen.h>
+
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) {
+ STATUS status;
+
+ #ifdef VXWORKS_SIM
+ /* cannot generate true entropy with VxWorks simulator */
+ #warning "not enough entropy, simulator for testing only"
+ int i = 0;
+
+ for (i = 0; i < 1000; i++) {
+ randomAddTimeStamp();
+ }
+ #endif
+
+ status = randBytes (output, sz);
+ if (status == ERROR) {
+ return RNG_FAILURE_E;
+ }
-#if defined(HAVE_INTEL_RDGEN) && (defined(HAVE_HASHDRBG) || defined(NO_RC4))
- wc_InitRng_IntelRD() ; /* set cpuid_flags if not yet */
- if(IS_INTEL_RDSEED)
- return wc_GenerateSeed_IntelRD(NULL, output, sz) ;
+ return 0;
+ }
+
+#elif defined(WOLFSSL_NRF51)
+ #include "app_error.h"
+ #include "nrf_drv_rng.h"
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ int remaining = sz, length, pos = 0;
+ uint8_t available;
+ uint32_t err_code;
+
+ (void)os;
+
+ /* Make sure RNG is running */
+ err_code = nrf_drv_rng_init(NULL);
+ if (err_code != NRF_SUCCESS && err_code != NRF_ERROR_INVALID_STATE) {
+ return -1;
+ }
+
+ while (remaining > 0) {
+ err_code = nrf_drv_rng_bytes_available(&available);
+ if (err_code == NRF_SUCCESS) {
+ length = (remaining < available) ? remaining : available;
+ if (length > 0) {
+ err_code = nrf_drv_rng_rand(&output[pos], length);
+ remaining -= length;
+ pos += length;
+ }
+ }
+
+ if (err_code != NRF_SUCCESS) {
+ break;
+ }
+ }
+
+ return (err_code == NRF_SUCCESS) ? 0 : -1;
+ }
+
+#elif defined(HAVE_WNR)
+
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ if (os == NULL || output == NULL || wnr_ctx == NULL ||
+ wnr_timeout < 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ if (wnr_mutex_init == 0) {
+ WOLFSSL_MSG("netRandom context must be created before use");
+ return RNG_FAILURE_E;
+ }
+
+ if (wc_LockMutex(&wnr_mutex) != 0) {
+ WOLFSSL_MSG("Bad Lock Mutex wnr_mutex\n");
+ return BAD_MUTEX_E;
+ }
+
+ if (wnr_get_entropy(wnr_ctx, wnr_timeout, output, sz, sz) !=
+ WNR_ERROR_NONE)
+ return RNG_FAILURE_E;
+
+ wc_UnLockMutex(&wnr_mutex);
+
+ return 0;
+ }
+
+#elif defined(WOLFSSL_ATMEL)
+ #include <wolfssl/wolfcrypt/port/atmel/atmel.h>
+
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ int ret = 0;
+
+ (void)os;
+ if (output == NULL) {
+ return BUFFER_E;
+ }
+
+ ret = atmel_get_random_number(sz, output);
+
+ return ret;
+ }
+
+#elif defined(INTIME_RTOS)
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ int ret = 0;
+
+ (void)os;
+
+ if (output == NULL) {
+ return BUFFER_E;
+ }
+
+ /* Note: Investigate better solution */
+ /* no return to check */
+ arc4random_buf(output, sz);
+
+ return ret;
+ }
+
+#elif defined(WOLFSSL_WICED)
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ int ret;
+ (void)os;
+
+ if (output == NULL || UINT16_MAX < sz) {
+ return BUFFER_E;
+ }
+
+ if ((ret = wiced_crypto_get_random((void*) output, sz) )
+ != WICED_SUCCESS) {
+ return ret;
+ }
+
+ return ret;
+ }
+
+#elif defined(WOLFSSL_NETBURNER)
+ #warning using NetBurner pseudo random GetRandomByte for seed
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ word32 i;
+ (void)os;
+
+ if (output == NULL) {
+ return BUFFER_E;
+ }
+
+ for (i = 0; i < sz; i++) {
+ output[i] = GetRandomByte();
+
+ /* check if was a valid random number */
+ if (!RandomValid())
+ return RNG_FAILURE_E;
+ }
+
+ return 0;
+ }
+#elif defined(IDIRECT_DEV_RANDOM)
+
+ extern int getRandom( int sz, unsigned char *output );
+
+ int GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ int num_bytes_returned = 0;
+
+ num_bytes_returned = getRandom( (int) sz, (unsigned char *) output );
+
+ return 0;
+ }
+
+#elif (defined(WOLFSSL_IMX6_CAAM) || defined(WOLFSSL_IMX6_CAAM_RNG))
+
+ #include <wolfssl/wolfcrypt/port/caam/wolfcaam.h>
+ #include <wolfssl/wolfcrypt/port/caam/caam_driver.h>
+
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ Buffer buf[1];
+ int ret = 0;
+ int times = 1000, i;
+
+ (void)os;
+
+ if (output == NULL) {
+ return BUFFER_E;
+ }
+
+ buf[0].BufferType = DataBuffer | LastBuffer;
+ buf[0].TheAddress = (Address)output;
+ buf[0].Length = sz;
+
+ /* Check Waiting to make sure entropy is ready */
+ for (i = 0; i < times; i++) {
+ ret = wc_caamAddAndWait(buf, NULL, CAAM_ENTROPY);
+ if (ret == Success) {
+ break;
+ }
+
+ /* driver could be waiting for entropy */
+ if (ret != RAN_BLOCK_E) {
+ return ret;
+ }
+ usleep(100);
+ }
+
+ if (i == times && ret != Success) {
+ return RNG_FAILURE_E;
+ }
+ else { /* Success case */
+ ret = 0;
+ }
+
+ return ret;
+ }
+
+#elif defined(WOLFSSL_APACHE_MYNEWT)
+
+ #include <stdlib.h>
+ #include "os/os_time.h"
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ int i;
+ srand(os_time_get());
+
+ for (i = 0; i < sz; i++ ) {
+ output[i] = rand() % 256;
+ if ((i % 8) == 7) {
+ srand(os_time_get());
+ }
+ }
+
+ return 0;
+ }
+
+#elif defined(WOLFSSL_ESPIDF)
+ #if defined(WOLFSSL_ESPWROOM32) || defined(WOLFSSL_ESPWROOM32SE)
+ #include <esp_system.h>
+
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ word32 rand;
+ while (sz > 0) {
+ word32 len = sizeof(rand);
+ if (sz < len)
+ len = sz;
+ /* Get one random 32-bit word from hw RNG */
+ rand = esp_random( );
+ XMEMCPY(output, &rand, len);
+ output += len;
+ sz -= len;
+ }
+
+ return 0;
+ }
+ #endif /* end WOLFSSL_ESPWROOM32 */
+
+#elif defined(WOLFSSL_RENESAS_TSIP)
+#if defined(WOLFSSL_RENESA_TSIP_IAREWRX)
+ #include "r_bsp/mcu/all/r_rx_compiler.h"
#endif
+ #include "r_bsp/platform.h"
+ #include "r_tsip_rx_if.h"
+
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ int ret;
+ uint32_t buffer[4];
+
+ while (sz > 0) {
+ uint32_t len = sizeof(buffer);
+
+ if (sz < len) {
+ len = sz;
+ }
+ /* return 4 words random number*/
+ ret = R_TSIP_GenerateRandomNumber(buffer);
+ if(ret == TSIP_SUCCESS) {
+ XMEMCPY(output, &buffer, len);
+ output += len;
+ sz -= len;
+ } else
+ return ret;
+ }
+ return ret;
+ }
- os->fd = open("/dev/urandom",O_RDONLY);
- if (os->fd == -1) {
- /* may still have /dev/random */
- os->fd = open("/dev/random",O_RDONLY);
- if (os->fd == -1)
- return OPEN_RAN_E;
+#elif defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_TRNG)
+ #include "hal_data.h"
+
+ #ifndef WOLFSSL_SCE_TRNG_HANDLE
+ #define WOLFSSL_SCE_TRNG_HANDLE g_sce_trng
+ #endif
+
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ uint32_t ret;
+ uint32_t blocks;
+ word32 len = sz;
+
+ ret = WOLFSSL_SCE_TRNG_HANDLE.p_api->open(WOLFSSL_SCE_TRNG_HANDLE.p_ctrl,
+ WOLFSSL_SCE_TRNG_HANDLE.p_cfg);
+ if (ret != SSP_SUCCESS && ret != SSP_ERR_CRYPTO_ALREADY_OPEN) {
+ /* error opening TRNG driver */
+ return -1;
+ }
+
+ blocks = sz / sizeof(uint32_t);
+ if (blocks > 0) {
+ ret = WOLFSSL_SCE_TRNG_HANDLE.p_api->read(WOLFSSL_SCE_TRNG_HANDLE.p_ctrl,
+ (uint32_t*)output, blocks);
+ if (ret != SSP_SUCCESS) {
+ return -1;
+ }
+ }
+
+ len = len - (blocks * sizeof(uint32_t));
+ if (len > 0) {
+ uint32_t tmp;
+
+ if (len > sizeof(uint32_t)) {
+ return -1;
+ }
+ ret = WOLFSSL_SCE_TRNG_HANDLE.p_api->read(WOLFSSL_SCE_TRNG_HANDLE.p_ctrl,
+ (uint32_t*)tmp, 1);
+ if (ret != SSP_SUCCESS) {
+ return -1;
+ }
+ XMEMCPY(output + (blocks * sizeof(uint32_t)), (byte*)&tmp, len);
+ }
+
+ ret = WOLFSSL_SCE_TRNG_HANDLE.p_api->close(WOLFSSL_SCE_TRNG_HANDLE.p_ctrl);
+ if (ret != SSP_SUCCESS) {
+ /* error opening TRNG driver */
+ return -1;
+ }
+ return 0;
}
+#elif defined(CUSTOM_RAND_GENERATE_BLOCK)
+ /* #define CUSTOM_RAND_GENERATE_BLOCK myRngFunc
+ * extern int myRngFunc(byte* output, word32 sz);
+ */
- while (sz) {
- int len = (int)read(os->fd, output, sz);
- if (len == -1) {
- ret = READ_RAN_E;
- break;
+#elif defined(WOLFSSL_SAFERTOS) || defined(WOLFSSL_LEANPSK) || \
+ defined(WOLFSSL_IAR_ARM) || defined(WOLFSSL_MDK_ARM) || \
+ defined(WOLFSSL_uITRON4) || defined(WOLFSSL_uTKERNEL2) || \
+ defined(WOLFSSL_LPC43xx) || defined(WOLFSSL_STM32F2xx) || \
+ defined(MBED) || defined(WOLFSSL_EMBOS) || \
+ defined(WOLFSSL_GENSEED_FORTEST) || defined(WOLFSSL_CHIBIOS) || \
+ defined(WOLFSSL_CONTIKI) || defined(WOLFSSL_AZSPHERE)
+
+ /* these platforms do not have a default random seed and
+ you'll need to implement your own wc_GenerateSeed or define via
+ CUSTOM_RAND_GENERATE_BLOCK */
+
+ #define USE_TEST_GENSEED
+
+#elif defined(WOLFSSL_ZEPHYR)
+
+ #include <entropy.h>
+ #ifndef _POSIX_C_SOURCE
+ #include <posix/time.h>
+ #else
+ #include <sys/time.h>
+ #endif
+
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ int ret = 0;
+ word32 rand;
+ while (sz > 0) {
+ word32 len = sizeof(rand);
+ if (sz < len)
+ len = sz;
+ rand = sys_rand32_get();
+ XMEMCPY(output, &rand, len);
+ output += len;
+ sz -= len;
+ }
+
+ return ret;
+ }
+
+#elif defined(WOLFSSL_TELIT_M2MB)
+
+ #include "stdlib.h"
+ static long get_timestamp(void) {
+ long myTime = 0;
+ INT32 fd = m2mb_rtc_open("/dev/rtc0", 0);
+ if (fd >= 0) {
+ M2MB_RTC_TIMEVAL_T timeval;
+ m2mb_rtc_ioctl(fd, M2MB_RTC_IOCTL_GET_TIMEVAL, &timeval);
+ myTime = timeval.msec;
+ m2mb_rtc_close(fd);
+ }
+ return myTime;
+ }
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ int i;
+ srand(get_timestamp());
+ for (i = 0; i < sz; i++ ) {
+ output[i] = rand() % 256;
+ if ((i % 8) == 7) {
+ srand(get_timestamp());
+ }
+ }
+ return 0;
}
- sz -= len;
- output += len;
+#elif defined(NO_DEV_RANDOM)
+
+ #error "you need to write an os specific wc_GenerateSeed() here"
+
+ /*
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ return 0;
+ }
+ */
- if (sz) {
-#ifdef BLOCKING
- sleep(0); /* context switch */
#else
- ret = RAN_BLOCK_E;
- break;
-#endif
+
+ /* may block */
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ int ret = 0;
+
+ if (os == NULL) {
+ return BAD_FUNC_ARG;
}
+
+ #ifdef WOLF_CRYPTO_CB
+ if (os->devId != INVALID_DEVID) {
+ ret = wc_CryptoCb_RandomSeed(os, output, sz);
+ if (ret != CRYPTOCB_UNAVAILABLE)
+ return ret;
+ /* fall-through when unavailable */
+ ret = 0; /* reset error code */
+ }
+ #endif
+
+ #ifdef HAVE_INTEL_RDSEED
+ if (IS_INTEL_RDSEED(intel_flags)) {
+ ret = wc_GenerateSeed_IntelRD(NULL, output, sz);
+ if (ret == 0) {
+ /* success, we're done */
+ return ret;
+ }
+ #ifdef FORCE_FAILURE_RDSEED
+ /* don't fallback to /dev/urandom */
+ return ret;
+ #else
+ /* reset error and fallback to using /dev/urandom */
+ ret = 0;
+ #endif
+ }
+ #endif /* HAVE_INTEL_RDSEED */
+
+ #ifndef NO_DEV_URANDOM /* way to disable use of /dev/urandom */
+ os->fd = open("/dev/urandom", O_RDONLY);
+ if (os->fd == -1)
+ #endif
+ {
+ /* may still have /dev/random */
+ os->fd = open("/dev/random", O_RDONLY);
+ if (os->fd == -1)
+ return OPEN_RAN_E;
+ }
+
+ while (sz) {
+ int len = (int)read(os->fd, output, sz);
+ if (len == -1) {
+ ret = READ_RAN_E;
+ break;
+ }
+
+ sz -= len;
+ output += len;
+
+ if (sz) {
+ #if defined(BLOCKING) || defined(WC_RNG_BLOCKING)
+ sleep(0); /* context switch */
+ #else
+ ret = RAN_BLOCK_E;
+ break;
+ #endif
+ }
+ }
+ close(os->fd);
+
+ return ret;
}
- close(os->fd);
- return ret;
-}
+#endif
-#endif /* USE_WINDOWS_API */
-#endif /* HAVE_FIPS */
+#ifdef USE_TEST_GENSEED
+ #ifndef _MSC_VER
+ #warning "write a real random seed!!!!, just for testing now"
+ #else
+ #pragma message("Warning: write a real random seed!!!!, just for testing now")
+ #endif
+ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+ {
+ word32 i;
+ for (i = 0; i < sz; i++ )
+ output[i] = i;
+
+ (void)os;
+ return 0;
+ }
+#endif
+
+
+/* End wc_GenerateSeed */
+#endif /* WC_NO_RNG */
+#endif /* HAVE_FIPS */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ripemd.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ripemd.c
index 639a42d07..484c62fe4 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ripemd.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/ripemd.c
@@ -1,8 +1,8 @@
/* ripemd.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,10 +16,11 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -32,22 +33,18 @@
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
+#include <wolfssl/wolfcrypt/error-crypt.h>
-#ifndef WOLFSSL_HAVE_MIN
-#define WOLFSSL_HAVE_MIN
-
- static INLINE word32 min(word32 a, word32 b)
- {
- return a > b ? b : a;
+int wc_InitRipeMd(RipeMd* ripemd)
+{
+ if (ripemd == NULL) {
+ return BAD_FUNC_ARG;
}
-#endif /* WOLFSSL_HAVE_MIN */
-
-void wc_InitRipeMd(RipeMd* ripemd)
-{
ripemd->digest[0] = 0x67452301L;
ripemd->digest[1] = 0xEFCDAB89L;
ripemd->digest[2] = 0x98BADCFEL;
@@ -57,11 +54,13 @@ void wc_InitRipeMd(RipeMd* ripemd)
ripemd->buffLen = 0;
ripemd->loLen = 0;
ripemd->hiLen = 0;
+
+ return 0;
}
/* for all */
-#define F(x, y, z) (x ^ y ^ z)
+#define F(x, y, z) (x ^ y ^ z)
#define G(x, y, z) (z ^ (x & (y^z)))
#define H(x, y, z) (z ^ (x | ~y))
#define I(x, y, z) (y ^ (z & (x^y)))
@@ -195,7 +194,7 @@ static void Transform(RipeMd* ripemd)
Subround(J, b2, c2, d2, e2, a2, ripemd->buffer[ 3], 12, k5);
Subround(J, a2, b2, c2, d2, e2, ripemd->buffer[12], 6, k5);
- Subround(I, e2, a2, b2, c2, d2, ripemd->buffer[ 6], 9, k6);
+ Subround(I, e2, a2, b2, c2, d2, ripemd->buffer[ 6], 9, k6);
Subround(I, d2, e2, a2, b2, c2, ripemd->buffer[11], 13, k6);
Subround(I, c2, d2, e2, a2, b2, ripemd->buffer[ 3], 15, k6);
Subround(I, b2, c2, d2, e2, a2, ripemd->buffer[ 7], 7, k6);
@@ -272,7 +271,7 @@ static void Transform(RipeMd* ripemd)
}
-static INLINE void AddLength(RipeMd* ripemd, word32 len)
+static WC_INLINE void AddLength(RipeMd* ripemd, word32 len)
{
word32 tmp = ripemd->loLen;
if ( (ripemd->loLen += len) < tmp)
@@ -280,10 +279,16 @@ static INLINE void AddLength(RipeMd* ripemd, word32 len)
}
-void wc_RipeMdUpdate(RipeMd* ripemd, const byte* data, word32 len)
+int wc_RipeMdUpdate(RipeMd* ripemd, const byte* data, word32 len)
{
/* do block size increments */
- byte* local = (byte*)ripemd->buffer;
+ byte* local;
+
+ if (ripemd == NULL || (data == NULL && len > 0)) {
+ return BAD_FUNC_ARG;
+ }
+
+ local = (byte*)ripemd->buffer;
while (len) {
word32 add = min(len, RIPEMD_BLOCK_SIZE - ripemd->buffLen);
@@ -303,12 +308,19 @@ void wc_RipeMdUpdate(RipeMd* ripemd, const byte* data, word32 len)
ripemd->buffLen = 0;
}
}
+ return 0;
}
-void wc_RipeMdFinal(RipeMd* ripemd, byte* hash)
+int wc_RipeMdFinal(RipeMd* ripemd, byte* hash)
{
- byte* local = (byte*)ripemd->buffer;
+ byte* local;
+
+ if (ripemd == NULL || hash == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ local = (byte*)ripemd->buffer;
AddLength(ripemd, ripemd->buffLen); /* before adding pads */
@@ -326,10 +338,10 @@ void wc_RipeMdFinal(RipeMd* ripemd, byte* hash)
ripemd->buffLen = 0;
}
XMEMSET(&local[ripemd->buffLen], 0, RIPEMD_PAD_SIZE - ripemd->buffLen);
-
+
/* put lengths in bits */
ripemd->loLen = ripemd->loLen << 3;
- ripemd->hiLen = (ripemd->loLen >> (8*sizeof(ripemd->loLen) - 3)) +
+ ripemd->hiLen = (ripemd->loLen >> (8*sizeof(ripemd->loLen) - 3)) +
(ripemd->hiLen << 3);
/* store lengths */
@@ -338,7 +350,7 @@ void wc_RipeMdFinal(RipeMd* ripemd, byte* hash)
#endif
/* ! length ordering dependent on digest endian type ! */
XMEMCPY(&local[RIPEMD_PAD_SIZE], &ripemd->loLen, sizeof(word32));
- XMEMCPY(&local[RIPEMD_PAD_SIZE + sizeof(word32)], &ripemd->hiLen,
+ XMEMCPY(&local[RIPEMD_PAD_SIZE + sizeof(word32)], &ripemd->hiLen,
sizeof(word32));
Transform(ripemd);
@@ -347,7 +359,7 @@ void wc_RipeMdFinal(RipeMd* ripemd, byte* hash)
#endif
XMEMCPY(hash, ripemd->digest, RIPEMD_DIGEST_SIZE);
- wc_InitRipeMd(ripemd); /* reset state */
+ return wc_InitRipeMd(ripemd); /* reset state */
}
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/rsa.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/rsa.c
index 1a5021783..69ab7b21b 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/rsa.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/rsa.c
@@ -1,8 +1,8 @@
/* rsa.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,22 +16,82 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
#ifndef NO_RSA
+#if defined(HAVE_FIPS) && \
+ defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
+
+ /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
+ #define FIPS_NO_WRAPPERS
+
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$e")
+ #pragma const_seg(".fipsB$e")
+ #endif
+#endif
+
#include <wolfssl/wolfcrypt/rsa.h>
-#ifdef HAVE_FIPS
+#ifdef WOLFSSL_AFALG_XILINX_RSA
+#include <wolfssl/wolfcrypt/port/af_alg/wc_afalg.h>
+#endif
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+#include <wolfssl/wolfcrypt/sp.h>
+#endif
+
+/*
+Possible RSA enable options:
+ * NO_RSA: Overall control of RSA default: on (not defined)
+ * WC_RSA_BLINDING: Uses Blinding w/ Private Ops default: off
+ Note: slower by ~20%
+ * WOLFSSL_KEY_GEN: Allows Private Key Generation default: off
+ * RSA_LOW_MEM: NON CRT Private Operations, less memory default: off
+ * WC_NO_RSA_OAEP: Disables RSA OAEP padding default: on (not defined)
+ * WC_RSA_NONBLOCK: Enables support for RSA non-blocking default: off
+ * WC_RSA_NONBLOCK_TIME:Enables support for time based blocking default: off
+ * time calculation.
+*/
+
+/*
+RSA Key Size Configuration:
+ * FP_MAX_BITS: With USE_FAST_MATH only default: 4096
+ If USE_FAST_MATH then use this to override default.
+ Value is key size * 2. Example: RSA 3072 = 6144
+*/
+
+
+/* If building for old FIPS. */
+#if defined(HAVE_FIPS) && \
+ (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2))
+
int wc_InitRsaKey(RsaKey* key, void* ptr)
{
+ if (key == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ return InitRsaKey_fips(key, ptr);
+}
+
+
+int wc_InitRsaKey_ex(RsaKey* key, void* ptr, int devId)
+{
+ (void)devId;
+ if (key == NULL) {
+ return BAD_FUNC_ARG;
+ }
return InitRsaKey_fips(key, ptr);
}
@@ -42,16 +102,25 @@ int wc_FreeRsaKey(RsaKey* key)
}
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
int wc_RsaPublicEncrypt(const byte* in, word32 inLen, byte* out,
- word32 outLen, RsaKey* key, RNG* rng)
+ word32 outLen, RsaKey* key, WC_RNG* rng)
{
+ if (in == NULL || out == NULL || key == NULL || rng == NULL) {
+ return BAD_FUNC_ARG;
+ }
return RsaPublicEncrypt_fips(in, inLen, out, outLen, key, rng);
}
+#endif
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
int wc_RsaPrivateDecryptInline(byte* in, word32 inLen, byte** out,
RsaKey* key)
{
+ if (in == NULL || out == NULL || key == NULL) {
+ return BAD_FUNC_ARG;
+ }
return RsaPrivateDecryptInline_fips(in, inLen, out, key);
}
@@ -59,19 +128,29 @@ int wc_RsaPrivateDecryptInline(byte* in, word32 inLen, byte** out,
int wc_RsaPrivateDecrypt(const byte* in, word32 inLen, byte* out,
word32 outLen, RsaKey* key)
{
+ if (in == NULL || out == NULL || key == NULL) {
+ return BAD_FUNC_ARG;
+ }
return RsaPrivateDecrypt_fips(in, inLen, out, outLen, key);
}
int wc_RsaSSL_Sign(const byte* in, word32 inLen, byte* out,
- word32 outLen, RsaKey* key, RNG* rng)
+ word32 outLen, RsaKey* key, WC_RNG* rng)
{
+ if (in == NULL || out == NULL || key == NULL || inLen == 0) {
+ return BAD_FUNC_ARG;
+ }
return RsaSSL_Sign_fips(in, inLen, out, outLen, key, rng);
}
+#endif
int wc_RsaSSL_VerifyInline(byte* in, word32 inLen, byte** out, RsaKey* key)
{
+ if (in == NULL || out == NULL || key == NULL) {
+ return BAD_FUNC_ARG;
+ }
return RsaSSL_VerifyInline_fips(in, inLen, out, key);
}
@@ -79,177 +158,1123 @@ int wc_RsaSSL_VerifyInline(byte* in, word32 inLen, byte** out, RsaKey* key)
int wc_RsaSSL_Verify(const byte* in, word32 inLen, byte* out,
word32 outLen, RsaKey* key)
{
+ if (in == NULL || out == NULL || key == NULL || inLen == 0) {
+ return BAD_FUNC_ARG;
+ }
return RsaSSL_Verify_fips(in, inLen, out, outLen, key);
}
int wc_RsaEncryptSize(RsaKey* key)
{
+ if (key == NULL) {
+ return BAD_FUNC_ARG;
+ }
return RsaEncryptSize_fips(key);
}
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
int wc_RsaFlattenPublicKey(RsaKey* key, byte* a, word32* aSz, byte* b,
word32* bSz)
{
+
/* not specified as fips so not needing _fips */
return RsaFlattenPublicKey(key, a, aSz, b, bSz);
}
-#ifdef WOLFSSL_KEY_GEN
- int wc_MakeRsaKey(RsaKey* key, int size, long e, RNG* rng)
- {
- return MakeRsaKey(key, size, e, rng);
- }
#endif
-#ifdef HAVE_CAVIUM
- int wc_RsaInitCavium(RsaKey* key, int i)
- {
- return RsaInitCavium(key, i);
- }
-
-
- void wc_RsaFreeCavium(RsaKey* key)
+#ifdef WOLFSSL_KEY_GEN
+ int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng)
{
- RsaFreeCavium(key);
+ return MakeRsaKey(key, size, e, rng);
}
#endif
+
/* these are functions in asn and are routed to wolfssl/wolfcrypt/asn.c
* wc_RsaPrivateKeyDecode
* wc_RsaPublicKeyDecode
*/
-#else /* else build without fips */
+#else /* else build without fips, or for new fips */
+
#include <wolfssl/wolfcrypt/random.h>
-#include <wolfssl/wolfcrypt/error-crypt.h>
#include <wolfssl/wolfcrypt/logging.h>
+#ifdef WOLF_CRYPTO_CB
+ #include <wolfssl/wolfcrypt/cryptocb.h>
+#endif
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
-#ifdef SHOW_GEN
- #ifdef FREESCALE_MQX
- #include <fio.h>
- #else
- #include <stdio.h>
- #endif
+
+enum {
+ RSA_STATE_NONE = 0,
+
+ RSA_STATE_ENCRYPT_PAD,
+ RSA_STATE_ENCRYPT_EXPTMOD,
+ RSA_STATE_ENCRYPT_RES,
+
+ RSA_STATE_DECRYPT_EXPTMOD,
+ RSA_STATE_DECRYPT_UNPAD,
+ RSA_STATE_DECRYPT_RES,
+};
+
+
+static void wc_RsaCleanup(RsaKey* key)
+{
+#ifndef WOLFSSL_RSA_VERIFY_INLINE
+ if (key && key->data) {
+ /* make sure any allocated memory is free'd */
+ if (key->dataIsAlloc) {
+ #ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ if (key->type == RSA_PRIVATE_DECRYPT ||
+ key->type == RSA_PRIVATE_ENCRYPT) {
+ ForceZero(key->data, key->dataLen);
+ }
+ #endif
+ XFREE(key->data, key->heap, DYNAMIC_TYPE_WOLF_BIGINT);
+ key->dataIsAlloc = 0;
+ }
+ key->data = NULL;
+ key->dataLen = 0;
+ }
+#else
+ (void)key;
#endif
+}
+
+int wc_InitRsaKey_ex(RsaKey* key, void* heap, int devId)
+{
+ int ret = 0;
-#ifdef HAVE_CAVIUM
- static int InitCaviumRsaKey(RsaKey* key, void* heap);
- static int FreeCaviumRsaKey(RsaKey* key);
- static int CaviumRsaPublicEncrypt(const byte* in, word32 inLen, byte* out,
- word32 outLen, RsaKey* key);
- static int CaviumRsaPrivateDecrypt(const byte* in, word32 inLen, byte* out,
- word32 outLen, RsaKey* key);
- static int CaviumRsaSSL_Sign(const byte* in, word32 inLen, byte* out,
- word32 outLen, RsaKey* key);
- static int CaviumRsaSSL_Verify(const byte* in, word32 inLen, byte* out,
- word32 outLen, RsaKey* key);
+ if (key == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ XMEMSET(key, 0, sizeof(RsaKey));
+
+ key->type = RSA_TYPE_UNKNOWN;
+ key->state = RSA_STATE_NONE;
+ key->heap = heap;
+#ifndef WOLFSSL_RSA_VERIFY_INLINE
+ key->dataIsAlloc = 0;
+ key->data = NULL;
+#endif
+ key->dataLen = 0;
+#ifdef WC_RSA_BLINDING
+ key->rng = NULL;
#endif
-enum {
- RSA_PUBLIC_ENCRYPT = 0,
- RSA_PUBLIC_DECRYPT = 1,
- RSA_PRIVATE_ENCRYPT = 2,
- RSA_PRIVATE_DECRYPT = 3,
+#ifdef WOLF_CRYPTO_CB
+ key->devId = devId;
+#else
+ (void)devId;
+#endif
- RSA_BLOCK_TYPE_1 = 1,
- RSA_BLOCK_TYPE_2 = 2,
+#ifdef WOLFSSL_ASYNC_CRYPT
+ #ifdef WOLFSSL_CERT_GEN
+ XMEMSET(&key->certSignCtx, 0, sizeof(CertSignCtx));
+ #endif
- RSA_MIN_SIZE = 512,
- RSA_MAX_SIZE = 4096,
+ #ifdef WC_ASYNC_ENABLE_RSA
+ /* handle as async */
+ ret = wolfAsync_DevCtxInit(&key->asyncDev, WOLFSSL_ASYNC_MARKER_RSA,
+ key->heap, devId);
+ if (ret != 0)
+ return ret;
+ #endif /* WC_ASYNC_ENABLE_RSA */
+#endif /* WOLFSSL_ASYNC_CRYPT */
- RSA_MIN_PAD_SZ = 11 /* seperator + 0 + pad value + 8 pads */
-};
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ ret = mp_init_multi(&key->n, &key->e, NULL, NULL, NULL, NULL);
+ if (ret != MP_OKAY)
+ return ret;
+#if !defined(WOLFSSL_KEY_GEN) && !defined(OPENSSL_EXTRA) && defined(RSA_LOW_MEM)
+ ret = mp_init_multi(&key->d, &key->p, &key->q, NULL, NULL, NULL);
+#else
+ ret = mp_init_multi(&key->d, &key->p, &key->q, &key->dP, &key->dQ, &key->u);
+#endif
+ if (ret != MP_OKAY) {
+ mp_clear(&key->n);
+ mp_clear(&key->e);
+ return ret;
+ }
+#else
+ ret = mp_init(&key->n);
+ if (ret != MP_OKAY)
+ return ret;
+ ret = mp_init(&key->e);
+ if (ret != MP_OKAY) {
+ mp_clear(&key->n);
+ return ret;
+ }
+#endif
+
+#ifdef WOLFSSL_XILINX_CRYPT
+ key->pubExp = 0;
+ key->mod = NULL;
+#endif
+
+#ifdef WOLFSSL_AFALG_XILINX_RSA
+ key->alFd = WC_SOCK_NOTSET;
+ key->rdFd = WC_SOCK_NOTSET;
+#endif
+
+ return ret;
+}
int wc_InitRsaKey(RsaKey* key, void* heap)
{
-#ifdef HAVE_CAVIUM
- if (key->magic == WOLFSSL_RSA_CAVIUM_MAGIC)
- return InitCaviumRsaKey(key, heap);
+ return wc_InitRsaKey_ex(key, heap, INVALID_DEVID);
+}
+
+#ifdef HAVE_PKCS11
+int wc_InitRsaKey_Id(RsaKey* key, unsigned char* id, int len, void* heap,
+ int devId)
+{
+ int ret = 0;
+
+ if (key == NULL)
+ ret = BAD_FUNC_ARG;
+ if (ret == 0 && (len < 0 || len > RSA_MAX_ID_LEN))
+ ret = BUFFER_E;
+
+ if (ret == 0)
+ ret = wc_InitRsaKey_ex(key, heap, devId);
+
+ if (ret == 0 && id != NULL && len != 0) {
+ XMEMCPY(key->id, id, len);
+ key->idLen = len;
+ }
+
+ return ret;
+}
#endif
- key->type = -1; /* haven't decided yet */
- key->heap = heap;
-/* TomsFastMath doesn't use memory allocation */
-#ifndef USE_FAST_MATH
- key->n.dp = key->e.dp = 0; /* public alloc parts */
+#ifdef WOLFSSL_XILINX_CRYPT
+#define MAX_E_SIZE 4
+/* Used to setup hardware state
+ *
+ * key the RSA key to setup
+ *
+ * returns 0 on success
+ */
+int wc_InitRsaHw(RsaKey* key)
+{
+ unsigned char* m; /* RSA modulous */
+ word32 e = 0; /* RSA public exponent */
+ int mSz;
+ int eSz;
- key->d.dp = key->p.dp = 0; /* private alloc parts */
- key->q.dp = key->dP.dp = 0;
- key->u.dp = key->dQ.dp = 0;
-#else
- mp_init(&key->n);
- mp_init(&key->e);
- mp_init(&key->d);
- mp_init(&key->p);
- mp_init(&key->q);
- mp_init(&key->dP);
- mp_init(&key->dQ);
- mp_init(&key->u);
+ if (key == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ mSz = mp_unsigned_bin_size(&(key->n));
+ m = (unsigned char*)XMALLOC(mSz, key->heap, DYNAMIC_TYPE_KEY);
+ if (m == 0) {
+ return MEMORY_E;
+ }
+
+ if (mp_to_unsigned_bin(&(key->n), m) != MP_OKAY) {
+ WOLFSSL_MSG("Unable to get RSA key modulus");
+ XFREE(m, key->heap, DYNAMIC_TYPE_KEY);
+ return MP_READ_E;
+ }
+
+ eSz = mp_unsigned_bin_size(&(key->e));
+ if (eSz > MAX_E_SIZE) {
+ WOLFSSL_MSG("Exponent of size 4 bytes expected");
+ XFREE(m, key->heap, DYNAMIC_TYPE_KEY);
+ return BAD_FUNC_ARG;
+ }
+
+ if (mp_to_unsigned_bin(&(key->e), (byte*)&e + (MAX_E_SIZE - eSz))
+ != MP_OKAY) {
+ XFREE(m, key->heap, DYNAMIC_TYPE_KEY);
+ WOLFSSL_MSG("Unable to get RSA key exponent");
+ return MP_READ_E;
+ }
+
+ /* check for existing mod buffer to avoid memory leak */
+ if (key->mod != NULL) {
+ XFREE(key->mod, key->heap, DYNAMIC_TYPE_KEY);
+ }
+
+ key->pubExp = e;
+ key->mod = m;
+
+ if (XSecure_RsaInitialize(&(key->xRsa), key->mod, NULL,
+ (byte*)&(key->pubExp)) != XST_SUCCESS) {
+ WOLFSSL_MSG("Unable to initialize RSA on hardware");
+ XFREE(m, key->heap, DYNAMIC_TYPE_KEY);
+ return BAD_STATE_E;
+ }
+
+#ifdef WOLFSSL_XILINX_PATCH
+ /* currently a patch of xsecure_rsa.c for 2048 bit keys */
+ if (wc_RsaEncryptSize(key) == 256) {
+ if (XSecure_RsaSetSize(&(key->xRsa), 2048) != XST_SUCCESS) {
+ WOLFSSL_MSG("Unable to set RSA key size on hardware");
+ XFREE(m, key->heap, DYNAMIC_TYPE_KEY);
+ return BAD_STATE_E;
+ }
+ }
#endif
+ return 0;
+} /* WOLFSSL_XILINX_CRYPT*/
+
+#elif defined(WOLFSSL_CRYPTOCELL)
+
+int wc_InitRsaHw(RsaKey* key)
+{
+ CRYSError_t ret = 0;
+ byte e[3];
+ word32 eSz = sizeof(e);
+ byte n[256];
+ word32 nSz = sizeof(n);
+ byte d[256];
+ word32 dSz = sizeof(d);
+ byte p[128];
+ word32 pSz = sizeof(p);
+ byte q[128];
+ word32 qSz = sizeof(q);
+
+ if (key == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ ret = wc_RsaExportKey(key, e, &eSz, n, &nSz, d, &dSz, p, &pSz, q, &qSz);
+ if (ret != 0)
+ return MP_READ_E;
+
+ ret = CRYS_RSA_Build_PubKey(&key->ctx.pubKey, e, eSz, n, nSz);
+ if (ret != SA_SILIB_RET_OK){
+ WOLFSSL_MSG("CRYS_RSA_Build_PubKey failed");
+ return ret;
+ }
+
+ ret = CRYS_RSA_Build_PrivKey(&key->ctx.privKey, d, dSz, e, eSz, n, nSz);
+ if (ret != SA_SILIB_RET_OK){
+ WOLFSSL_MSG("CRYS_RSA_Build_PrivKey failed");
+ return ret;
+ }
+ key->type = RSA_PRIVATE;
return 0;
}
+static int cc310_RSA_GenerateKeyPair(RsaKey* key, int size, long e)
+{
+ CRYSError_t ret = 0;
+ CRYS_RSAKGData_t KeyGenData;
+ CRYS_RSAKGFipsContext_t FipsCtx;
+ byte ex[3];
+ uint16_t eSz = sizeof(ex);
+ byte n[256];
+ uint16_t nSz = sizeof(n);
+
+ ret = CRYS_RSA_KG_GenerateKeyPair(&wc_rndState,
+ wc_rndGenVectFunc,
+ (byte*)&e,
+ 3*sizeof(uint8_t),
+ size,
+ &key->ctx.privKey,
+ &key->ctx.pubKey,
+ &KeyGenData,
+ &FipsCtx);
+
+ if (ret != SA_SILIB_RET_OK){
+ WOLFSSL_MSG("CRYS_RSA_KG_GenerateKeyPair failed");
+ return ret;
+ }
+ ret = CRYS_RSA_Get_PubKey(&key->ctx.pubKey, ex, &eSz, n, &nSz);
+ if (ret != SA_SILIB_RET_OK){
+ WOLFSSL_MSG("CRYS_RSA_Get_PubKey failed");
+ return ret;
+ }
+ ret = wc_RsaPublicKeyDecodeRaw(n, nSz, ex, eSz, key);
+
+ key->type = RSA_PRIVATE;
+
+ return ret;
+}
+#endif /* WOLFSSL_CRYPTOCELL */
int wc_FreeRsaKey(RsaKey* key)
{
- (void)key;
+ int ret = 0;
-#ifdef HAVE_CAVIUM
- if (key->magic == WOLFSSL_RSA_CAVIUM_MAGIC)
- return FreeCaviumRsaKey(key);
+ if (key == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ wc_RsaCleanup(key);
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA)
+ wolfAsync_DevCtxFree(&key->asyncDev, WOLFSSL_ASYNC_MARKER_RSA);
#endif
-/* TomsFastMath doesn't use memory allocation */
-#ifndef USE_FAST_MATH
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
if (key->type == RSA_PRIVATE) {
- mp_clear(&key->u);
- mp_clear(&key->dQ);
- mp_clear(&key->dP);
- mp_clear(&key->q);
- mp_clear(&key->p);
- mp_clear(&key->d);
+#if defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || !defined(RSA_LOW_MEM)
+ mp_forcezero(&key->u);
+ mp_forcezero(&key->dQ);
+ mp_forcezero(&key->dP);
+#endif
+ mp_forcezero(&key->q);
+ mp_forcezero(&key->p);
+ mp_forcezero(&key->d);
}
+ /* private part */
+#if defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || !defined(RSA_LOW_MEM)
+ mp_clear(&key->u);
+ mp_clear(&key->dQ);
+ mp_clear(&key->dP);
+#endif
+ mp_clear(&key->q);
+ mp_clear(&key->p);
+ mp_clear(&key->d);
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+
+ /* public part */
mp_clear(&key->e);
mp_clear(&key->n);
+
+#ifdef WOLFSSL_XILINX_CRYPT
+ XFREE(key->mod, key->heap, DYNAMIC_TYPE_KEY);
+ key->mod = NULL;
+#endif
+
+#ifdef WOLFSSL_AFALG_XILINX_RSA
+ /* make sure that sockets are closed on cleanup */
+ if (key->alFd > 0) {
+ close(key->alFd);
+ key->alFd = WC_SOCK_NOTSET;
+ }
+ if (key->rdFd > 0) {
+ close(key->rdFd);
+ key->rdFd = WC_SOCK_NOTSET;
+ }
+#endif
+
+ return ret;
+}
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+#if defined(WOLFSSL_KEY_GEN) && !defined(WOLFSSL_NO_RSA_KEY_CHECK)
+/* Check the pair-wise consistency of the RSA key.
+ * From NIST SP 800-56B, section 6.4.1.1.
+ * Verify that k = (k^e)^d, for some k: 1 < k < n-1. */
+int wc_CheckRsaKey(RsaKey* key)
+{
+#if defined(WOLFSSL_CRYPTOCELL)
+ return 0;
+#endif
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int *k = NULL, *tmp = NULL;
+#else
+ mp_int k[1], tmp[1];
+#endif
+ int ret = 0;
+
+#ifdef WOLFSSL_SMALL_STACK
+ k = (mp_int*)XMALLOC(sizeof(mp_int) * 2, NULL, DYNAMIC_TYPE_RSA);
+ if (k == NULL)
+ return MEMORY_E;
+ tmp = k + 1;
+#endif
+
+ if (mp_init_multi(k, tmp, NULL, NULL, NULL, NULL) != MP_OKAY)
+ ret = MP_INIT_E;
+
+ if (ret == 0) {
+ if (key == NULL)
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (ret == 0) {
+ if (mp_set_int(k, 0x2342) != MP_OKAY)
+ ret = MP_READ_E;
+ }
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+#ifndef WOLFSSL_SP_NO_2048
+ if (mp_count_bits(&key->n) == 2048) {
+ ret = sp_ModExp_2048(k, &key->e, &key->n, tmp);
+ if (ret != 0)
+ ret = MP_EXPTMOD_E;
+ ret = sp_ModExp_2048(tmp, &key->d, &key->n, tmp);
+ if (ret != 0)
+ ret = MP_EXPTMOD_E;
+ }
+ else
+#endif
+#ifndef WOLFSSL_SP_NO_3072
+ if (mp_count_bits(&key->n) == 3072) {
+ ret = sp_ModExp_3072(k, &key->e, &key->n, tmp);
+ if (ret != 0)
+ ret = MP_EXPTMOD_E;
+ ret = sp_ModExp_3072(tmp, &key->d, &key->n, tmp);
+ if (ret != 0)
+ ret = MP_EXPTMOD_E;
+ }
+ else
+#endif
+#ifdef WOLFSSL_SP_4096
+ if (mp_count_bits(&key->n) == 4096) {
+ ret = sp_ModExp_4096(k, &key->e, &key->n, tmp);
+ if (ret != 0)
+ ret = MP_EXPTMOD_E;
+ ret = sp_ModExp_4096(tmp, &key->d, &key->n, tmp);
+ if (ret != 0)
+ ret = MP_EXPTMOD_E;
+ }
+ else
+#endif
+#endif
+#ifdef WOLFSSL_SP_MATH
+ {
+ ret = WC_KEY_SIZE_E;
+ }
+#else
+ {
+ if (ret == 0) {
+ if (mp_exptmod(k, &key->e, &key->n, tmp) != MP_OKAY)
+ ret = MP_EXPTMOD_E;
+ }
+
+ if (ret == 0) {
+ if (mp_exptmod(tmp, &key->d, &key->n, tmp) != MP_OKAY)
+ ret = MP_EXPTMOD_E;
+ }
+ }
+#endif
+
+ if (ret == 0) {
+ if (mp_cmp(k, tmp) != MP_EQ)
+ ret = RSA_KEY_PAIR_E;
+ }
+
+ /* Check d is less than n. */
+ if (ret == 0 ) {
+ if (mp_cmp(&key->d, &key->n) != MP_LT) {
+ ret = MP_EXPTMOD_E;
+ }
+ }
+ /* Check p*q = n. */
+ if (ret == 0 ) {
+ if (mp_mul(&key->p, &key->q, tmp) != MP_OKAY) {
+ ret = MP_EXPTMOD_E;
+ }
+ }
+ if (ret == 0 ) {
+ if (mp_cmp(&key->n, tmp) != MP_EQ) {
+ ret = MP_EXPTMOD_E;
+ }
+ }
+
+ /* Check dP, dQ and u if they exist */
+ if (ret == 0 && !mp_iszero(&key->dP)) {
+ if (mp_sub_d(&key->p, 1, tmp) != MP_OKAY) {
+ ret = MP_EXPTMOD_E;
+ }
+ /* Check dP <= p-1. */
+ if (ret == 0) {
+ if (mp_cmp(&key->dP, tmp) != MP_LT) {
+ ret = MP_EXPTMOD_E;
+ }
+ }
+ /* Check e*dP mod p-1 = 1. (dP = 1/e mod p-1) */
+ if (ret == 0) {
+ if (mp_mulmod(&key->dP, &key->e, tmp, tmp) != MP_OKAY) {
+ ret = MP_EXPTMOD_E;
+ }
+ }
+ if (ret == 0 ) {
+ if (!mp_isone(tmp)) {
+ ret = MP_EXPTMOD_E;
+ }
+ }
+
+ if (ret == 0) {
+ if (mp_sub_d(&key->q, 1, tmp) != MP_OKAY) {
+ ret = MP_EXPTMOD_E;
+ }
+ }
+ /* Check dQ <= q-1. */
+ if (ret == 0) {
+ if (mp_cmp(&key->dQ, tmp) != MP_LT) {
+ ret = MP_EXPTMOD_E;
+ }
+ }
+ /* Check e*dP mod p-1 = 1. (dQ = 1/e mod q-1) */
+ if (ret == 0) {
+ if (mp_mulmod(&key->dQ, &key->e, tmp, tmp) != MP_OKAY) {
+ ret = MP_EXPTMOD_E;
+ }
+ }
+ if (ret == 0 ) {
+ if (!mp_isone(tmp)) {
+ ret = MP_EXPTMOD_E;
+ }
+ }
+
+ /* Check u <= p. */
+ if (ret == 0) {
+ if (mp_cmp(&key->u, &key->p) != MP_LT) {
+ ret = MP_EXPTMOD_E;
+ }
+ }
+ /* Check u*q mod p = 1. (u = 1/q mod p) */
+ if (ret == 0) {
+ if (mp_mulmod(&key->u, &key->q, &key->p, tmp) != MP_OKAY) {
+ ret = MP_EXPTMOD_E;
+ }
+ }
+ if (ret == 0 ) {
+ if (!mp_isone(tmp)) {
+ ret = MP_EXPTMOD_E;
+ }
+ }
+ }
+
+ mp_forcezero(tmp);
+ mp_clear(tmp);
+ mp_clear(k);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(k, NULL, DYNAMIC_TYPE_RSA);
+#endif
+
+ return ret;
+}
#endif
+#endif
+
+
+#if !defined(WC_NO_RSA_OAEP) || defined(WC_RSA_PSS)
+/* Uses MGF1 standard as a mask generation function
+ hType: hash type used
+ seed: seed to use for generating mask
+ seedSz: size of seed buffer
+ out: mask output after generation
+ outSz: size of output buffer
+ */
+#if !defined(NO_SHA) || !defined(NO_SHA256) || defined(WOLFSSL_SHA384) || defined(WOLFSSL_SHA512)
+static int RsaMGF1(enum wc_HashType hType, byte* seed, word32 seedSz,
+ byte* out, word32 outSz, void* heap)
+{
+ byte* tmp;
+ /* needs to be large enough for seed size plus counter(4) */
+ byte tmpA[WC_MAX_DIGEST_SIZE + 4];
+ byte tmpF; /* 1 if dynamic memory needs freed */
+ word32 tmpSz;
+ int hLen;
+ int ret;
+ word32 counter;
+ word32 idx;
+ hLen = wc_HashGetDigestSize(hType);
+ counter = 0;
+ idx = 0;
+
+ (void)heap;
+
+ /* check error return of wc_HashGetDigestSize */
+ if (hLen < 0) {
+ return hLen;
+ }
+
+ /* if tmp is not large enough than use some dynamic memory */
+ if ((seedSz + 4) > sizeof(tmpA) || (word32)hLen > sizeof(tmpA)) {
+ /* find largest amount of memory needed which will be the max of
+ * hLen and (seedSz + 4) since tmp is used to store the hash digest */
+ tmpSz = ((seedSz + 4) > (word32)hLen)? seedSz + 4: (word32)hLen;
+ tmp = (byte*)XMALLOC(tmpSz, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ if (tmp == NULL) {
+ return MEMORY_E;
+ }
+ tmpF = 1; /* make sure to free memory when done */
+ }
+ else {
+ /* use array on the stack */
+ tmpSz = sizeof(tmpA);
+ tmp = tmpA;
+ tmpF = 0; /* no need to free memory at end */
+ }
+
+ do {
+ int i = 0;
+ XMEMCPY(tmp, seed, seedSz);
+
+ /* counter to byte array appended to tmp */
+ tmp[seedSz] = (counter >> 24) & 0xFF;
+ tmp[seedSz + 1] = (counter >> 16) & 0xFF;
+ tmp[seedSz + 2] = (counter >> 8) & 0xFF;
+ tmp[seedSz + 3] = (counter) & 0xFF;
+
+ /* hash and append to existing output */
+ if ((ret = wc_Hash(hType, tmp, (seedSz + 4), tmp, tmpSz)) != 0) {
+ /* check for if dynamic memory was needed, then free */
+ if (tmpF) {
+ XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ }
+ return ret;
+ }
+
+ for (i = 0; i < hLen && idx < outSz; i++) {
+ out[idx++] = tmp[i];
+ }
+ counter++;
+ } while (idx < outSz);
+
+ /* check for if dynamic memory was needed, then free */
+ if (tmpF) {
+ XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ }
return 0;
}
+#endif /* SHA2 Hashes */
-static int wc_RsaPad(const byte* input, word32 inputLen, byte* pkcsBlock,
- word32 pkcsBlockLen, byte padValue, RNG* rng)
+/* helper function to direct which mask generation function is used
+ switched on type input
+ */
+static int RsaMGF(int type, byte* seed, word32 seedSz, byte* out,
+ word32 outSz, void* heap)
{
- if (inputLen == 0)
- return 0;
+ int ret;
+
+ switch(type) {
+ #ifndef NO_SHA
+ case WC_MGF1SHA1:
+ ret = RsaMGF1(WC_HASH_TYPE_SHA, seed, seedSz, out, outSz, heap);
+ break;
+ #endif
+ #ifndef NO_SHA256
+ #ifdef WOLFSSL_SHA224
+ case WC_MGF1SHA224:
+ ret = RsaMGF1(WC_HASH_TYPE_SHA224, seed, seedSz, out, outSz, heap);
+ break;
+ #endif
+ case WC_MGF1SHA256:
+ ret = RsaMGF1(WC_HASH_TYPE_SHA256, seed, seedSz, out, outSz, heap);
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA384
+ case WC_MGF1SHA384:
+ ret = RsaMGF1(WC_HASH_TYPE_SHA384, seed, seedSz, out, outSz, heap);
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA512
+ case WC_MGF1SHA512:
+ ret = RsaMGF1(WC_HASH_TYPE_SHA512, seed, seedSz, out, outSz, heap);
+ break;
+ #endif
+ default:
+ WOLFSSL_MSG("Unknown MGF type: check build options");
+ ret = BAD_FUNC_ARG;
+ }
+
+ /* in case of default avoid unused warning */
+ (void)seed;
+ (void)seedSz;
+ (void)out;
+ (void)outSz;
+ (void)heap;
+
+ return ret;
+}
+#endif /* !WC_NO_RSA_OAEP || WC_RSA_PSS */
+
+
+/* Padding */
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
+#ifndef WC_NO_RNG
+#ifndef WC_NO_RSA_OAEP
+static int RsaPad_OAEP(const byte* input, word32 inputLen, byte* pkcsBlock,
+ word32 pkcsBlockLen, byte padValue, WC_RNG* rng,
+ enum wc_HashType hType, int mgf, byte* optLabel, word32 labelLen,
+ void* heap)
+{
+ int ret;
+ int hLen;
+ int psLen;
+ int i;
+ word32 idx;
+
+ byte* dbMask;
+
+ #ifdef WOLFSSL_SMALL_STACK
+ byte* lHash = NULL;
+ byte* seed = NULL;
+ #else
+ /* must be large enough to contain largest hash */
+ byte lHash[WC_MAX_DIGEST_SIZE];
+ byte seed[ WC_MAX_DIGEST_SIZE];
+ #endif
+
+ /* no label is allowed, but catch if no label provided and length > 0 */
+ if (optLabel == NULL && labelLen > 0) {
+ return BUFFER_E;
+ }
+
+ /* limit of label is the same as limit of hash function which is massive */
+ hLen = wc_HashGetDigestSize(hType);
+ if (hLen < 0) {
+ return hLen;
+ }
+
+ #ifdef WOLFSSL_SMALL_STACK
+ lHash = (byte*)XMALLOC(hLen, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ if (lHash == NULL) {
+ return MEMORY_E;
+ }
+ seed = (byte*)XMALLOC(hLen, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ if (seed == NULL) {
+ XFREE(lHash, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ return MEMORY_E;
+ }
+ #else
+ /* hLen should never be larger than lHash since size is max digest size,
+ but check before blindly calling wc_Hash */
+ if ((word32)hLen > sizeof(lHash)) {
+ WOLFSSL_MSG("OAEP lHash to small for digest!!");
+ return MEMORY_E;
+ }
+ #endif
+
+ if ((ret = wc_Hash(hType, optLabel, labelLen, lHash, hLen)) != 0) {
+ WOLFSSL_MSG("OAEP hash type possibly not supported or lHash to small");
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(lHash, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ XFREE(seed, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ #endif
+ return ret;
+ }
+
+ /* handles check of location for idx as well as psLen, cast to int to check
+ for pkcsBlockLen(k) - 2 * hLen - 2 being negative
+ This check is similar to decryption where k > 2 * hLen + 2 as msg
+ size approaches 0. In decryption if k is less than or equal -- then there
+ is no possible room for msg.
+ k = RSA key size
+ hLen = hash digest size -- will always be >= 0 at this point
+ */
+ if ((word32)(2 * hLen + 2) > pkcsBlockLen) {
+ WOLFSSL_MSG("OAEP pad error hash to big for RSA key size");
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(lHash, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ XFREE(seed, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ #endif
+ return BAD_FUNC_ARG;
+ }
+
+ if (inputLen > (pkcsBlockLen - 2 * hLen - 2)) {
+ WOLFSSL_MSG("OAEP pad error message too long");
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(lHash, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ XFREE(seed, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ #endif
+ return BAD_FUNC_ARG;
+ }
+
+ /* concatenate lHash || PS || 0x01 || msg */
+ idx = pkcsBlockLen - 1 - inputLen;
+ psLen = pkcsBlockLen - inputLen - 2 * hLen - 2;
+ if (pkcsBlockLen < inputLen) { /*make sure not writing over end of buffer */
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(lHash, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ XFREE(seed, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ #endif
+ return BUFFER_E;
+ }
+ XMEMCPY(pkcsBlock + (pkcsBlockLen - inputLen), input, inputLen);
+ pkcsBlock[idx--] = 0x01; /* PS and M separator */
+ while (psLen > 0 && idx > 0) {
+ pkcsBlock[idx--] = 0x00;
+ psLen--;
+ }
+
+ idx = idx - hLen + 1;
+ XMEMCPY(pkcsBlock + idx, lHash, hLen);
+
+ /* generate random seed */
+ if ((ret = wc_RNG_GenerateBlock(rng, seed, hLen)) != 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(lHash, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ XFREE(seed, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ #endif
+ return ret;
+ }
+
+ /* create maskedDB from dbMask */
+ dbMask = (byte*)XMALLOC(pkcsBlockLen - hLen - 1, heap, DYNAMIC_TYPE_RSA);
+ if (dbMask == NULL) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(lHash, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ XFREE(seed, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ #endif
+ return MEMORY_E;
+ }
+ XMEMSET(dbMask, 0, pkcsBlockLen - hLen - 1); /* help static analyzer */
+
+ ret = RsaMGF(mgf, seed, hLen, dbMask, pkcsBlockLen - hLen - 1, heap);
+ if (ret != 0) {
+ XFREE(dbMask, heap, DYNAMIC_TYPE_RSA);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(lHash, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ XFREE(seed, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ #endif
+ return ret;
+ }
+
+ i = 0;
+ idx = hLen + 1;
+ while (idx < pkcsBlockLen && (word32)i < (pkcsBlockLen - hLen -1)) {
+ pkcsBlock[idx] = dbMask[i++] ^ pkcsBlock[idx];
+ idx++;
+ }
+ XFREE(dbMask, heap, DYNAMIC_TYPE_RSA);
+
+
+ /* create maskedSeed from seedMask */
+ idx = 0;
+ pkcsBlock[idx++] = 0x00;
+ /* create seedMask inline */
+ if ((ret = RsaMGF(mgf, pkcsBlock + hLen + 1, pkcsBlockLen - hLen - 1,
+ pkcsBlock + 1, hLen, heap)) != 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(lHash, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ XFREE(seed, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ #endif
+ return ret;
+ }
+
+ /* xor created seedMask with seed to make maskedSeed */
+ i = 0;
+ while (idx < (word32)(hLen + 1) && i < hLen) {
+ pkcsBlock[idx] = pkcsBlock[idx] ^ seed[i++];
+ idx++;
+ }
+
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(lHash, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ XFREE(seed, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ #endif
+ (void)padValue;
+
+ return 0;
+}
+#endif /* !WC_NO_RSA_OAEP */
+
+#ifdef WC_RSA_PSS
+
+/* 0x00 .. 0x00 0x01 | Salt | Gen Hash | 0xbc
+ * XOR MGF over all bytes down to end of Salt
+ * Gen Hash = HASH(8 * 0x00 | Message Hash | Salt)
+ *
+ * input Digest of the message.
+ * inputLen Length of digest.
+ * pkcsBlock Buffer to write to.
+ * pkcsBlockLen Length of buffer to write to.
+ * rng Random number generator (for salt).
+ * htype Hash function to use.
+ * mgf Mask generation function.
+ * saltLen Length of salt to put in padding.
+ * bits Length of key in bits.
+ * heap Used for dynamic memory allocation.
+ * returns 0 on success, PSS_SALTLEN_E when the salt length is invalid
+ * and other negative values on error.
+ */
+static int RsaPad_PSS(const byte* input, word32 inputLen, byte* pkcsBlock,
+ word32 pkcsBlockLen, WC_RNG* rng, enum wc_HashType hType, int mgf,
+ int saltLen, int bits, void* heap)
+{
+ int ret = 0;
+ int hLen, i, o, maskLen, hiBits;
+ byte* m;
+ byte* s;
+#if defined(WOLFSSL_PSS_LONG_SALT) || defined(WOLFSSL_PSS_SALT_LEN_DISCOVER)
+ #if defined(WOLFSSL_NO_MALLOC) && !defined(WOLFSSL_STATIC_MEMORY)
+ byte salt[RSA_MAX_SIZE/8 + RSA_PSS_PAD_SZ];
+ #else
+ byte* salt = NULL;
+ #endif
+#else
+ byte salt[WC_MAX_DIGEST_SIZE];
+#endif
+
+#if defined(WOLFSSL_PSS_LONG_SALT) || defined(WOLFSSL_PSS_SALT_LEN_DISCOVER)
+ if (pkcsBlockLen > RSA_MAX_SIZE/8) {
+ return MEMORY_E;
+ }
+#endif
+
+ hLen = wc_HashGetDigestSize(hType);
+ if (hLen < 0)
+ return hLen;
+
+ hiBits = (bits - 1) & 0x7;
+ if (hiBits == 0) {
+ *(pkcsBlock++) = 0;
+ pkcsBlockLen--;
+ }
+
+ if (saltLen == RSA_PSS_SALT_LEN_DEFAULT) {
+ saltLen = hLen;
+ #ifdef WOLFSSL_SHA512
+ /* See FIPS 186-4 section 5.5 item (e). */
+ if (bits == 1024 && hLen == WC_SHA512_DIGEST_SIZE) {
+ saltLen = RSA_PSS_SALT_MAX_SZ;
+ }
+ #endif
+ }
+#ifndef WOLFSSL_PSS_LONG_SALT
+ else if (saltLen > hLen) {
+ return PSS_SALTLEN_E;
+ }
+#endif
+#ifndef WOLFSSL_PSS_SALT_LEN_DISCOVER
+ else if (saltLen < RSA_PSS_SALT_LEN_DEFAULT) {
+ return PSS_SALTLEN_E;
+ }
+#else
+ else if (saltLen == RSA_PSS_SALT_LEN_DISCOVER) {
+ saltLen = (int)pkcsBlockLen - hLen - 2;
+ if (saltLen < 0) {
+ return PSS_SALTLEN_E;
+ }
+ }
+ else if (saltLen < RSA_PSS_SALT_LEN_DISCOVER) {
+ return PSS_SALTLEN_E;
+ }
+#endif
+ if ((int)pkcsBlockLen - hLen < saltLen + 2) {
+ return PSS_SALTLEN_E;
+ }
+
+ maskLen = pkcsBlockLen - 1 - hLen;
+
+#if defined(WOLFSSL_PSS_LONG_SALT) || defined(WOLFSSL_PSS_SALT_LEN_DISCOVER)
+ #if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY)
+ salt = (byte*)XMALLOC(RSA_PSS_PAD_SZ + inputLen + saltLen, heap,
+ DYNAMIC_TYPE_RSA_BUFFER);
+ if (salt == NULL) {
+ return MEMORY_E;
+ }
+ #endif
+ s = m = salt;
+ XMEMSET(m, 0, RSA_PSS_PAD_SZ);
+ m += RSA_PSS_PAD_SZ;
+ XMEMCPY(m, input, inputLen);
+ m += inputLen;
+ o = (int)(m - s);
+ if (saltLen > 0) {
+ ret = wc_RNG_GenerateBlock(rng, m, saltLen);
+ if (ret == 0) {
+ m += saltLen;
+ }
+ }
+#else
+ s = m = pkcsBlock;
+ XMEMSET(m, 0, RSA_PSS_PAD_SZ);
+ m += RSA_PSS_PAD_SZ;
+ XMEMCPY(m, input, inputLen);
+ m += inputLen;
+ o = 0;
+ if (saltLen > 0) {
+ ret = wc_RNG_GenerateBlock(rng, salt, saltLen);
+ if (ret == 0) {
+ XMEMCPY(m, salt, saltLen);
+ m += saltLen;
+ }
+ }
+#endif
+ if (ret == 0) {
+ /* Put Hash at end of pkcsBlock - 1 */
+ ret = wc_Hash(hType, s, (word32)(m - s), pkcsBlock + maskLen, hLen);
+ }
+ if (ret == 0) {
+ pkcsBlock[pkcsBlockLen - 1] = RSA_PSS_PAD_TERM;
+
+ ret = RsaMGF(mgf, pkcsBlock + maskLen, hLen, pkcsBlock, maskLen, heap);
+ }
+ if (ret == 0) {
+ pkcsBlock[0] &= (1 << hiBits) - 1;
+
+ m = pkcsBlock + maskLen - saltLen - 1;
+ *(m++) ^= 0x01;
+ for (i = 0; i < saltLen; i++) {
+ m[i] ^= salt[o + i];
+ }
+ }
+
+#if defined(WOLFSSL_PSS_LONG_SALT) || defined(WOLFSSL_PSS_SALT_LEN_DISCOVER)
+ #if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY)
+ if (salt != NULL) {
+ XFREE(salt, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ }
+ #endif
+#endif
+ return ret;
+}
+#endif /* WC_RSA_PSS */
+#endif /* !WC_NO_RNG */
+
+static int RsaPad(const byte* input, word32 inputLen, byte* pkcsBlock,
+ word32 pkcsBlockLen, byte padValue, WC_RNG* rng)
+{
+ if (input == NULL || inputLen == 0 || pkcsBlock == NULL ||
+ pkcsBlockLen == 0) {
+ return BAD_FUNC_ARG;
+ }
pkcsBlock[0] = 0x0; /* set first byte to zero and advance */
pkcsBlock++; pkcsBlockLen--;
pkcsBlock[0] = padValue; /* insert padValue */
- if (padValue == RSA_BLOCK_TYPE_1)
+ if (padValue == RSA_BLOCK_TYPE_1) {
+ if (pkcsBlockLen < inputLen + 2) {
+ WOLFSSL_MSG("RsaPad error, invalid length");
+ return RSA_PAD_E;
+ }
+
/* pad with 0xff bytes */
XMEMSET(&pkcsBlock[1], 0xFF, pkcsBlockLen - inputLen - 2);
+ }
else {
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WC_NO_RNG)
/* pad with non-zero random bytes */
- word32 padLen = pkcsBlockLen - inputLen - 1, i;
- int ret = wc_RNG_GenerateBlock(rng, &pkcsBlock[1], padLen);
+ word32 padLen, i;
+ int ret;
- if (ret != 0)
+ if (pkcsBlockLen < inputLen + 1) {
+ WOLFSSL_MSG("RsaPad error, invalid length");
+ return RSA_PAD_E;
+ }
+
+ padLen = pkcsBlockLen - inputLen - 1;
+ ret = wc_RNG_GenerateBlock(rng, &pkcsBlock[1], padLen);
+ if (ret != 0) {
return ret;
+ }
/* remove zeros */
- for (i = 1; i < padLen; i++)
+ for (i = 1; i < padLen; i++) {
if (pkcsBlock[i] == 0) pkcsBlock[i] = 0x01;
+ }
+#else
+ (void)rng;
+ return RSA_WRONG_TYPE_E;
+#endif
}
pkcsBlock[pkcsBlockLen-inputLen-1] = 0; /* separator */
@@ -258,348 +1283,2300 @@ static int wc_RsaPad(const byte* input, word32 inputLen, byte* pkcsBlock,
return 0;
}
+/* helper function to direct which padding is used */
+int wc_RsaPad_ex(const byte* input, word32 inputLen, byte* pkcsBlock,
+ word32 pkcsBlockLen, byte padValue, WC_RNG* rng, int padType,
+ enum wc_HashType hType, int mgf, byte* optLabel, word32 labelLen,
+ int saltLen, int bits, void* heap)
+{
+ int ret;
+
+ switch (padType)
+ {
+ case WC_RSA_PKCSV15_PAD:
+ /*WOLFSSL_MSG("wolfSSL Using RSA PKCSV15 padding");*/
+ ret = RsaPad(input, inputLen, pkcsBlock, pkcsBlockLen,
+ padValue, rng);
+ break;
+
+#ifndef WC_NO_RNG
+ #ifndef WC_NO_RSA_OAEP
+ case WC_RSA_OAEP_PAD:
+ WOLFSSL_MSG("wolfSSL Using RSA OAEP padding");
+ ret = RsaPad_OAEP(input, inputLen, pkcsBlock, pkcsBlockLen,
+ padValue, rng, hType, mgf, optLabel, labelLen, heap);
+ break;
+ #endif
+
+ #ifdef WC_RSA_PSS
+ case WC_RSA_PSS_PAD:
+ WOLFSSL_MSG("wolfSSL Using RSA PSS padding");
+ ret = RsaPad_PSS(input, inputLen, pkcsBlock, pkcsBlockLen, rng,
+ hType, mgf, saltLen, bits, heap);
+ break;
+ #endif
+#endif /* !WC_NO_RNG */
+
+ #ifdef WC_RSA_NO_PADDING
+ case WC_RSA_NO_PAD:
+ WOLFSSL_MSG("wolfSSL Using NO padding");
+ /* In the case of no padding being used check that input is exactly
+ * the RSA key length */
+ if (bits <= 0 || inputLen != ((word32)bits/WOLFSSL_BIT_SIZE)) {
+ WOLFSSL_MSG("Bad input size");
+ ret = RSA_PAD_E;
+ }
+ else {
+ XMEMCPY(pkcsBlock, input, inputLen);
+ ret = 0;
+ }
+ break;
+ #endif
+
+ default:
+ WOLFSSL_MSG("Unknown RSA Pad Type");
+ ret = RSA_PAD_E;
+ }
+
+ /* silence warning if not used with padding scheme */
+ (void)input;
+ (void)inputLen;
+ (void)pkcsBlock;
+ (void)pkcsBlockLen;
+ (void)padValue;
+ (void)rng;
+ (void)padType;
+ (void)hType;
+ (void)mgf;
+ (void)optLabel;
+ (void)labelLen;
+ (void)saltLen;
+ (void)bits;
+ (void)heap;
+
+ return ret;
+}
+#endif /* WOLFSSL_RSA_VERIFY_ONLY */
+
+
+/* UnPadding */
+#ifndef WC_NO_RSA_OAEP
/* UnPad plaintext, set start to *output, return length of plaintext,
* < 0 on error */
-static int RsaUnPad(const byte *pkcsBlock, unsigned int pkcsBlockLen,
- byte **output, byte padValue)
+static int RsaUnPad_OAEP(byte *pkcsBlock, unsigned int pkcsBlockLen,
+ byte **output, enum wc_HashType hType, int mgf,
+ byte* optLabel, word32 labelLen, void* heap)
{
- word32 maxOutputLen = (pkcsBlockLen > 10) ? (pkcsBlockLen - 10) : 0,
- invalid = 0,
- i = 1,
- outputLen;
+ int hLen;
+ int ret;
+ byte h[WC_MAX_DIGEST_SIZE]; /* max digest size */
+ byte* tmp;
+ word32 idx;
- if (pkcsBlock[0] != 0x0) /* skip past zero */
- invalid = 1;
- pkcsBlock++; pkcsBlockLen--;
+ /* no label is allowed, but catch if no label provided and length > 0 */
+ if (optLabel == NULL && labelLen > 0) {
+ return BUFFER_E;
+ }
+
+ hLen = wc_HashGetDigestSize(hType);
+ if ((hLen < 0) || (pkcsBlockLen < (2 * (word32)hLen + 2))) {
+ return BAD_FUNC_ARG;
+ }
+
+ tmp = (byte*)XMALLOC(pkcsBlockLen, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ if (tmp == NULL) {
+ return MEMORY_E;
+ }
+ XMEMSET(tmp, 0, pkcsBlockLen);
+
+ /* find seedMask value */
+ if ((ret = RsaMGF(mgf, (byte*)(pkcsBlock + (hLen + 1)),
+ pkcsBlockLen - hLen - 1, tmp, hLen, heap)) != 0) {
+ XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ return ret;
+ }
+
+ /* xor seedMask value with maskedSeed to get seed value */
+ for (idx = 0; idx < (word32)hLen; idx++) {
+ tmp[idx] = tmp[idx] ^ pkcsBlock[1 + idx];
+ }
+
+ /* get dbMask value */
+ if ((ret = RsaMGF(mgf, tmp, hLen, tmp + hLen,
+ pkcsBlockLen - hLen - 1, heap)) != 0) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_RSA_BUFFER);
+ return ret;
+ }
+
+ /* get DB value by doing maskedDB xor dbMask */
+ for (idx = 0; idx < (pkcsBlockLen - hLen - 1); idx++) {
+ pkcsBlock[hLen + 1 + idx] = pkcsBlock[hLen + 1 + idx] ^ tmp[idx + hLen];
+ }
+
+ /* done with use of tmp buffer */
+ XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER);
+
+ /* advance idx to index of PS and msg separator, account for PS size of 0*/
+ idx = hLen + 1 + hLen;
+ while (idx < pkcsBlockLen && pkcsBlock[idx] == 0) {idx++;}
+
+ /* create hash of label for comparison with hash sent */
+ if ((ret = wc_Hash(hType, optLabel, labelLen, h, hLen)) != 0) {
+ return ret;
+ }
+
+ /* say no to chosen ciphertext attack.
+ Comparison of lHash, Y, and separator value needs to all happen in
+ constant time.
+ Attackers should not be able to get error condition from the timing of
+ these checks.
+ */
+ ret = 0;
+ ret |= ConstantCompare(pkcsBlock + hLen + 1, h, hLen);
+ ret += pkcsBlock[idx++] ^ 0x01; /* separator value is 0x01 */
+ ret += pkcsBlock[0] ^ 0x00; /* Y, the first value, should be 0 */
+
+ /* Return 0 data length on error. */
+ idx = ctMaskSelInt(ctMaskEq(ret, 0), idx, pkcsBlockLen);
+
+ /* adjust pointer to correct location in array and return size of M */
+ *output = (byte*)(pkcsBlock + idx);
+ return pkcsBlockLen - idx;
+}
+#endif /* WC_NO_RSA_OAEP */
+
+#ifdef WC_RSA_PSS
+/* 0x00 .. 0x00 0x01 | Salt | Gen Hash | 0xbc
+ * MGF over all bytes down to end of Salt
+ *
+ * pkcsBlock Buffer holding decrypted data.
+ * pkcsBlockLen Length of buffer.
+ * htype Hash function to use.
+ * mgf Mask generation function.
+ * saltLen Length of salt to put in padding.
+ * bits Length of key in bits.
+ * heap Used for dynamic memory allocation.
+ * returns 0 on success, PSS_SALTLEN_E when the salt length is invalid,
+ * BAD_PADDING_E when the padding is not valid, MEMORY_E when allocation fails
+ * and other negative values on error.
+ */
+static int RsaUnPad_PSS(byte *pkcsBlock, unsigned int pkcsBlockLen,
+ byte **output, enum wc_HashType hType, int mgf,
+ int saltLen, int bits, void* heap)
+{
+ int ret;
+ byte* tmp;
+ int hLen, i, maskLen;
+#ifdef WOLFSSL_SHA512
+ int orig_bits = bits;
+#endif
+#if defined(WOLFSSL_NO_MALLOC) && !defined(WOLFSSL_STATIC_MEMORY)
+ byte tmp_buf[RSA_MAX_SIZE/8];
+ tmp = tmp_buf;
+
+ if (pkcsBlockLen > RSA_MAX_SIZE/8) {
+ return MEMORY_E;
+ }
+#endif
+
+ hLen = wc_HashGetDigestSize(hType);
+ if (hLen < 0)
+ return hLen;
+ bits = (bits - 1) & 0x7;
+ if ((pkcsBlock[0] & (0xff << bits)) != 0) {
+ return BAD_PADDING_E;
+ }
+ if (bits == 0) {
+ pkcsBlock++;
+ pkcsBlockLen--;
+ }
+ maskLen = (int)pkcsBlockLen - 1 - hLen;
+ if (maskLen < 0) {
+ WOLFSSL_MSG("RsaUnPad_PSS: Hash too large");
+ return WC_KEY_SIZE_E;
+ }
+
+ if (saltLen == RSA_PSS_SALT_LEN_DEFAULT) {
+ saltLen = hLen;
+ #ifdef WOLFSSL_SHA512
+ /* See FIPS 186-4 section 5.5 item (e). */
+ if (orig_bits == 1024 && hLen == WC_SHA512_DIGEST_SIZE)
+ saltLen = RSA_PSS_SALT_MAX_SZ;
+ #endif
+ }
+#ifndef WOLFSSL_PSS_LONG_SALT
+ else if (saltLen > hLen)
+ return PSS_SALTLEN_E;
+#endif
+#ifndef WOLFSSL_PSS_SALT_LEN_DISCOVER
+ else if (saltLen < RSA_PSS_SALT_LEN_DEFAULT)
+ return PSS_SALTLEN_E;
+ if (maskLen < saltLen + 1) {
+ return PSS_SALTLEN_E;
+ }
+#else
+ else if (saltLen < RSA_PSS_SALT_LEN_DISCOVER)
+ return PSS_SALTLEN_E;
+ if (saltLen != RSA_PSS_SALT_LEN_DISCOVER && maskLen < saltLen + 1) {
+ return WC_KEY_SIZE_E;
+ }
+#endif
- /* Require block type padValue */
- invalid = (pkcsBlock[0] != padValue) || invalid;
+ if (pkcsBlock[pkcsBlockLen - 1] != RSA_PSS_PAD_TERM) {
+ WOLFSSL_MSG("RsaUnPad_PSS: Padding Term Error");
+ return BAD_PADDING_E;
+ }
+
+#if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY)
+ tmp = (byte*)XMALLOC(maskLen, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ if (tmp == NULL) {
+ return MEMORY_E;
+ }
+#endif
+
+ if ((ret = RsaMGF(mgf, pkcsBlock + maskLen, hLen, tmp, maskLen,
+ heap)) != 0) {
+ XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ return ret;
+ }
+
+ tmp[0] &= (1 << bits) - 1;
+ pkcsBlock[0] &= (1 << bits) - 1;
+#ifdef WOLFSSL_PSS_SALT_LEN_DISCOVER
+ if (saltLen == RSA_PSS_SALT_LEN_DISCOVER) {
+ for (i = 0; i < maskLen - 1; i++) {
+ if (tmp[i] != pkcsBlock[i]) {
+ break;
+ }
+ }
+ if (tmp[i] != (pkcsBlock[i] ^ 0x01)) {
+ XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ WOLFSSL_MSG("RsaUnPad_PSS: Padding Error Match");
+ return PSS_SALTLEN_RECOVER_E;
+ }
+ saltLen = maskLen - (i + 1);
+ }
+ else
+#endif
+ {
+ for (i = 0; i < maskLen - 1 - saltLen; i++) {
+ if (tmp[i] != pkcsBlock[i]) {
+ XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ WOLFSSL_MSG("RsaUnPad_PSS: Padding Error Match");
+ return PSS_SALTLEN_E;
+ }
+ }
+ if (tmp[i] != (pkcsBlock[i] ^ 0x01)) {
+ XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER);
+ WOLFSSL_MSG("RsaUnPad_PSS: Padding Error End");
+ return PSS_SALTLEN_E;
+ }
+ }
+ for (i++; i < maskLen; i++)
+ pkcsBlock[i] ^= tmp[i];
+
+#if !defined(WOLFSSL_NO_MALLOC) || defined(WOLFSSL_STATIC_MEMORY)
+ XFREE(tmp, heap, DYNAMIC_TYPE_RSA_BUFFER);
+#endif
+
+ *output = pkcsBlock + maskLen - saltLen;
+ return saltLen + hLen;
+}
+#endif
+
+/* UnPad plaintext, set start to *output, return length of plaintext,
+ * < 0 on error */
+static int RsaUnPad(const byte *pkcsBlock, unsigned int pkcsBlockLen,
+ byte **output, byte padValue)
+{
+ int ret = BAD_FUNC_ARG;
+ word16 i;
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
+ byte invalid = 0;
+#endif
+
+ if (output == NULL || pkcsBlockLen == 0 || pkcsBlockLen > 0xFFFF) {
+ return BAD_FUNC_ARG;
+ }
- /* verify the padding until we find the separator */
if (padValue == RSA_BLOCK_TYPE_1) {
- while (i<pkcsBlockLen && pkcsBlock[i++] == 0xFF) {/* Null body */}
+ /* First byte must be 0x00 and Second byte, block type, 0x01 */
+ if (pkcsBlock[0] != 0 || pkcsBlock[1] != RSA_BLOCK_TYPE_1) {
+ WOLFSSL_MSG("RsaUnPad error, invalid formatting");
+ return RSA_PAD_E;
+ }
+
+ /* check the padding until we find the separator */
+ for (i = 2; i < pkcsBlockLen && pkcsBlock[i++] == 0xFF; ) { }
+
+ /* Minimum of 11 bytes of pre-message data and must have separator. */
+ if (i < RSA_MIN_PAD_SZ || pkcsBlock[i-1] != 0) {
+ WOLFSSL_MSG("RsaUnPad error, bad formatting");
+ return RSA_PAD_E;
+ }
+
+ *output = (byte *)(pkcsBlock + i);
+ ret = pkcsBlockLen - i;
}
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
else {
- while (i<pkcsBlockLen && pkcsBlock[i++]) {/* Null body */}
- }
+ word16 j;
+ word16 pastSep = 0;
+
+ /* Decrypted with private key - unpad must be constant time. */
+ for (i = 0, j = 2; j < pkcsBlockLen; j++) {
+ /* Update i if not passed the separator and at separator. */
+ i |= (~pastSep) & ctMask16Eq(pkcsBlock[j], 0x00) & (j + 1);
+ pastSep |= ctMask16Eq(pkcsBlock[j], 0x00);
+ }
- if(!(i==pkcsBlockLen || pkcsBlock[i-1]==0)) {
- WOLFSSL_MSG("RsaUnPad error, bad formatting");
- return RSA_PAD_E;
+ /* Minimum of 11 bytes of pre-message data - including leading 0x00. */
+ invalid |= ctMaskLT(i, RSA_MIN_PAD_SZ);
+ /* Must have seen separator. */
+ invalid |= ~pastSep;
+ /* First byte must be 0x00. */
+ invalid |= ctMaskNotEq(pkcsBlock[0], 0x00);
+ /* Check against expected block type: padValue */
+ invalid |= ctMaskNotEq(pkcsBlock[1], padValue);
+
+ *output = (byte *)(pkcsBlock + i);
+ ret = ((int)~invalid) & (pkcsBlockLen - i);
}
+#endif
+
+ return ret;
+}
+
+/* helper function to direct unpadding
+ *
+ * bits is the key modulus size in bits
+ */
+int wc_RsaUnPad_ex(byte* pkcsBlock, word32 pkcsBlockLen, byte** out,
+ byte padValue, int padType, enum wc_HashType hType,
+ int mgf, byte* optLabel, word32 labelLen, int saltLen,
+ int bits, void* heap)
+{
+ int ret;
- outputLen = pkcsBlockLen - i;
- invalid = (outputLen > maxOutputLen) || invalid;
+ switch (padType) {
+ case WC_RSA_PKCSV15_PAD:
+ /*WOLFSSL_MSG("wolfSSL Using RSA PKCSV15 un-padding");*/
+ ret = RsaUnPad(pkcsBlock, pkcsBlockLen, out, padValue);
+ break;
+
+ #ifndef WC_NO_RSA_OAEP
+ case WC_RSA_OAEP_PAD:
+ WOLFSSL_MSG("wolfSSL Using RSA OAEP un-padding");
+ ret = RsaUnPad_OAEP((byte*)pkcsBlock, pkcsBlockLen, out,
+ hType, mgf, optLabel, labelLen, heap);
+ break;
+ #endif
+
+ #ifdef WC_RSA_PSS
+ case WC_RSA_PSS_PAD:
+ WOLFSSL_MSG("wolfSSL Using RSA PSS un-padding");
+ ret = RsaUnPad_PSS((byte*)pkcsBlock, pkcsBlockLen, out, hType, mgf,
+ saltLen, bits, heap);
+ break;
+ #endif
+
+ #ifdef WC_RSA_NO_PADDING
+ case WC_RSA_NO_PAD:
+ WOLFSSL_MSG("wolfSSL Using NO un-padding");
+
+ /* In the case of no padding being used check that input is exactly
+ * the RSA key length */
+ if (bits <= 0 || pkcsBlockLen !=
+ ((word32)(bits+WOLFSSL_BIT_SIZE-1)/WOLFSSL_BIT_SIZE)) {
+ WOLFSSL_MSG("Bad input size");
+ ret = RSA_PAD_E;
+ }
+ else {
+ if (out != NULL) {
+ *out = pkcsBlock;
+ }
+ ret = pkcsBlockLen;
+ }
+ break;
+ #endif /* WC_RSA_NO_PADDING */
- if (invalid) {
- WOLFSSL_MSG("RsaUnPad error, bad formatting");
- return RSA_PAD_E;
+ default:
+ WOLFSSL_MSG("Unknown RSA UnPad Type");
+ ret = RSA_PAD_E;
}
- *output = (byte *)(pkcsBlock + i);
- return outputLen;
+ /* silence warning if not used with padding scheme */
+ (void)hType;
+ (void)mgf;
+ (void)optLabel;
+ (void)labelLen;
+ (void)saltLen;
+ (void)bits;
+ (void)heap;
+
+ return ret;
}
+#if defined(WOLFSSL_XILINX_CRYPT)
+/*
+ * Xilinx hardened crypto acceleration.
+ *
+ * Returns 0 on success and negative values on error.
+ */
+static int wc_RsaFunctionXil(const byte* in, word32 inLen, byte* out,
+ word32* outLen, int type, RsaKey* key, WC_RNG* rng)
+{
+ int ret = 0;
+ word32 keyLen;
+ (void)rng;
+
+ keyLen = wc_RsaEncryptSize(key);
+ if (keyLen > *outLen) {
+ WOLFSSL_MSG("Output buffer is not big enough");
+ return BAD_FUNC_ARG;
+ }
+
+ if (inLen != keyLen) {
+ WOLFSSL_MSG("Expected that inLen equals RSA key length");
+ return BAD_FUNC_ARG;
+ }
-static int wc_RsaFunction(const byte* in, word32 inLen, byte* out,
+ switch(type) {
+ case RSA_PRIVATE_DECRYPT:
+ case RSA_PRIVATE_ENCRYPT:
+ /* Currently public exponent is loaded by default.
+ * In SDK 2017.1 RSA exponent values are expected to be of 4 bytes
+ * leading to private key operations with Xsecure_RsaDecrypt not being
+ * supported */
+ ret = RSA_WRONG_TYPE_E;
+ break;
+ case RSA_PUBLIC_ENCRYPT:
+ case RSA_PUBLIC_DECRYPT:
+ if (XSecure_RsaDecrypt(&(key->xRsa), in, out) != XST_SUCCESS) {
+ ret = BAD_STATE_E;
+ }
+ break;
+ default:
+ ret = RSA_WRONG_TYPE_E;
+ }
+
+ *outLen = keyLen;
+
+ return ret;
+}
+#endif /* WOLFSSL_XILINX_CRYPT */
+
+#ifdef WC_RSA_NONBLOCK
+static int wc_RsaFunctionNonBlock(const byte* in, word32 inLen, byte* out,
word32* outLen, int type, RsaKey* key)
{
- #define ERROR_OUT(x) { ret = (x); goto done;}
-
- mp_int tmp;
int ret = 0;
word32 keyLen, len;
- if (mp_init(&tmp) != MP_OKAY)
- return MP_INIT_E;
+ if (key == NULL || key->nb == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ if (key->nb->exptmod.state == TFM_EXPTMOD_NB_INIT) {
+ if (mp_init(&key->nb->tmp) != MP_OKAY) {
+ ret = MP_INIT_E;
+ }
- if (mp_read_unsigned_bin(&tmp, (byte*)in, inLen) != MP_OKAY)
- ERROR_OUT(MP_READ_E);
+ if (ret == 0) {
+ if (mp_read_unsigned_bin(&key->nb->tmp, (byte*)in, inLen) != MP_OKAY) {
+ ret = MP_READ_E;
+ }
+ }
+ }
- if (type == RSA_PRIVATE_DECRYPT || type == RSA_PRIVATE_ENCRYPT) {
- #ifdef RSA_LOW_MEM /* half as much memory but twice as slow */
- if (mp_exptmod(&tmp, &key->d, &key->n, &tmp) != MP_OKAY)
- ERROR_OUT(MP_EXPTMOD_E);
- #else
- #define INNER_ERROR_OUT(x) { ret = (x); goto inner_done; }
+ if (ret == 0) {
+ switch(type) {
+ case RSA_PRIVATE_DECRYPT:
+ case RSA_PRIVATE_ENCRYPT:
+ ret = fp_exptmod_nb(&key->nb->exptmod, &key->nb->tmp, &key->d,
+ &key->n, &key->nb->tmp);
+ if (ret == FP_WOULDBLOCK)
+ return ret;
+ if (ret != MP_OKAY)
+ ret = MP_EXPTMOD_E;
+ break;
+
+ case RSA_PUBLIC_ENCRYPT:
+ case RSA_PUBLIC_DECRYPT:
+ ret = fp_exptmod_nb(&key->nb->exptmod, &key->nb->tmp, &key->e,
+ &key->n, &key->nb->tmp);
+ if (ret == FP_WOULDBLOCK)
+ return ret;
+ if (ret != MP_OKAY)
+ ret = MP_EXPTMOD_E;
+ break;
+ default:
+ ret = RSA_WRONG_TYPE_E;
+ break;
+ }
+ }
- mp_int tmpa, tmpb;
+ if (ret == 0) {
+ keyLen = wc_RsaEncryptSize(key);
+ if (keyLen > *outLen)
+ ret = RSA_BUFFER_E;
+ }
+ if (ret == 0) {
+ len = mp_unsigned_bin_size(&key->nb->tmp);
- if (mp_init(&tmpa) != MP_OKAY)
- ERROR_OUT(MP_INIT_E);
+ /* pad front w/ zeros to match key length */
+ while (len < keyLen) {
+ *out++ = 0x00;
+ len++;
+ }
- if (mp_init(&tmpb) != MP_OKAY) {
- mp_clear(&tmpa);
- ERROR_OUT(MP_INIT_E);
- }
+ *outLen = keyLen;
- /* tmpa = tmp^dP mod p */
- if (mp_exptmod(&tmp, &key->dP, &key->p, &tmpa) != MP_OKAY)
- INNER_ERROR_OUT(MP_EXPTMOD_E);
+ /* convert */
+ if (mp_to_unsigned_bin(&key->nb->tmp, out) != MP_OKAY) {
+ ret = MP_TO_E;
+ }
+ }
- /* tmpb = tmp^dQ mod q */
- if (mp_exptmod(&tmp, &key->dQ, &key->q, &tmpb) != MP_OKAY)
- INNER_ERROR_OUT(MP_EXPTMOD_E);
+ mp_clear(&key->nb->tmp);
- /* tmp = (tmpa - tmpb) * qInv (mod p) */
- if (mp_sub(&tmpa, &tmpb, &tmp) != MP_OKAY)
- INNER_ERROR_OUT(MP_SUB_E);
+ return ret;
+}
+#endif /* WC_RSA_NONBLOCK */
- if (mp_mulmod(&tmp, &key->u, &key->p, &tmp) != MP_OKAY)
- INNER_ERROR_OUT(MP_MULMOD_E);
+#ifdef WOLFSSL_AFALG_XILINX_RSA
+#ifndef ERROR_OUT
+#define ERROR_OUT(x) ret = (x); goto done
+#endif
- /* tmp = tmpb + q * tmp */
- if (mp_mul(&tmp, &key->q, &tmp) != MP_OKAY)
- INNER_ERROR_OUT(MP_MUL_E);
+static const char WC_TYPE_ASYMKEY[] = "skcipher";
+static const char WC_NAME_RSA[] = "xilinx-zynqmp-rsa";
+#ifndef MAX_XILINX_RSA_KEY
+ /* max key size of 4096 bits / 512 bytes */
+ #define MAX_XILINX_RSA_KEY 512
+#endif
+static const byte XILINX_RSA_FLAG[] = {0x1};
- if (mp_add(&tmp, &tmpb, &tmp) != MP_OKAY)
- INNER_ERROR_OUT(MP_ADD_E);
- inner_done:
- mp_clear(&tmpa);
- mp_clear(&tmpb);
+/* AF_ALG implementation of RSA */
+static int wc_RsaFunctionSync(const byte* in, word32 inLen, byte* out,
+ word32* outLen, int type, RsaKey* key, WC_RNG* rng)
+{
+ struct msghdr msg;
+ struct cmsghdr* cmsg;
+ struct iovec iov;
+ byte* keyBuf = NULL;
+ word32 keyBufSz = 0;
+ char cbuf[CMSG_SPACE(4) + CMSG_SPACE(sizeof(struct af_alg_iv) + 1)] = {0};
+ int ret = 0;
+ int op = 0; /* decryption vs encryption flag */
+ word32 keyLen;
- if (ret != 0) return ret;
+ /* input and output buffer need to be aligned */
+ ALIGN64 byte outBuf[MAX_XILINX_RSA_KEY];
+ ALIGN64 byte inBuf[MAX_XILINX_RSA_KEY];
- #endif /* RSA_LOW_MEM */
+ XMEMSET(&msg, 0, sizeof(struct msghdr));
+ (void)rng;
+
+ keyLen = wc_RsaEncryptSize(key);
+ if (keyLen > *outLen) {
+ ERROR_OUT(RSA_BUFFER_E);
+ }
+
+ if (keyLen > MAX_XILINX_RSA_KEY) {
+ WOLFSSL_MSG("RSA key size larger than supported");
+ ERROR_OUT(BAD_FUNC_ARG);
}
- else if (type == RSA_PUBLIC_ENCRYPT || type == RSA_PUBLIC_DECRYPT) {
- if (mp_exptmod(&tmp, &key->e, &key->n, &tmp) != MP_OKAY)
- ERROR_OUT(MP_EXPTMOD_E);
+
+ if ((keyBuf = (byte*)XMALLOC(keyLen * 2, key->heap, DYNAMIC_TYPE_KEY))
+ == NULL) {
+ ERROR_OUT(MEMORY_E);
}
- else
- ERROR_OUT(RSA_WRONG_TYPE_E);
- keyLen = mp_unsigned_bin_size(&key->n);
- if (keyLen > *outLen)
- ERROR_OUT(RSA_BUFFER_E);
+ if ((ret = mp_to_unsigned_bin(&(key->n), keyBuf)) != MP_OKAY) {
+ ERROR_OUT(MP_TO_E);
+ }
- len = mp_unsigned_bin_size(&tmp);
+ switch(type) {
+ case RSA_PRIVATE_DECRYPT:
+ case RSA_PRIVATE_ENCRYPT:
+ op = 1; /* set as decrypt */
+ {
+ keyBufSz = mp_unsigned_bin_size(&(key->d));
+ if ((mp_to_unsigned_bin(&(key->d), keyBuf + keyLen))
+ != MP_OKAY) {
+ ERROR_OUT(MP_TO_E);
+ }
+ }
+ break;
+
+ case RSA_PUBLIC_DECRYPT:
+ case RSA_PUBLIC_ENCRYPT: {
+ word32 exp = 0;
+ word32 eSz = mp_unsigned_bin_size(&(key->e));
+ if ((mp_to_unsigned_bin(&(key->e), (byte*)&exp +
+ (sizeof(word32) - eSz))) != MP_OKAY) {
+ ERROR_OUT(MP_TO_E);
+ }
+ keyBufSz = sizeof(word32);
+ XMEMCPY(keyBuf + keyLen, (byte*)&exp, keyBufSz);
+ break;
+ }
- /* pad front w/ zeros to match key length */
- while (len < keyLen) {
- *out++ = 0x00;
- len++;
+ default:
+ ERROR_OUT(RSA_WRONG_TYPE_E);
}
+ keyBufSz += keyLen; /* add size of modulus */
- *outLen = keyLen;
+ /* check for existing sockets before creating new ones */
+ if (key->alFd > 0) {
+ close(key->alFd);
+ key->alFd = WC_SOCK_NOTSET;
+ }
+ if (key->rdFd > 0) {
+ close(key->rdFd);
+ key->rdFd = WC_SOCK_NOTSET;
+ }
- /* convert */
- if (mp_to_unsigned_bin(&tmp, out) != MP_OKAY)
- ERROR_OUT(MP_TO_E);
+ /* create new sockets and set the key to use */
+ if ((key->alFd = wc_Afalg_Socket()) < 0) {
+ WOLFSSL_MSG("Unable to create socket");
+ ERROR_OUT(key->alFd);
+ }
+ if ((key->rdFd = wc_Afalg_CreateRead(key->alFd, WC_TYPE_ASYMKEY,
+ WC_NAME_RSA)) < 0) {
+ WOLFSSL_MSG("Unable to bind and create read/send socket");
+ ERROR_OUT(key->rdFd);
+ }
+ if ((ret = setsockopt(key->alFd, SOL_ALG, ALG_SET_KEY, keyBuf,
+ keyBufSz)) < 0) {
+ WOLFSSL_MSG("Error setting RSA key");
+ ERROR_OUT(ret);
+ }
+
+ msg.msg_control = cbuf;
+ msg.msg_controllen = sizeof(cbuf);
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if ((ret = wc_Afalg_SetOp(cmsg, op)) < 0) {
+ ERROR_OUT(ret);
+ }
+
+ /* set flag in IV spot, needed for Xilinx hardware acceleration use */
+ cmsg = CMSG_NXTHDR(&msg, cmsg);
+ if ((ret = wc_Afalg_SetIv(cmsg, (byte*)XILINX_RSA_FLAG,
+ sizeof(XILINX_RSA_FLAG))) != 0) {
+ ERROR_OUT(ret);
+ }
+
+ /* compose and send msg */
+ XMEMCPY(inBuf, (byte*)in, inLen); /* for alignment */
+ iov.iov_base = inBuf;
+ iov.iov_len = inLen;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ if ((ret = sendmsg(key->rdFd, &msg, 0)) <= 0) {
+ ERROR_OUT(WC_AFALG_SOCK_E);
+ }
+
+ if ((ret = read(key->rdFd, outBuf, inLen)) <= 0) {
+ ERROR_OUT(WC_AFALG_SOCK_E);
+ }
+ XMEMCPY(out, outBuf, ret);
+ *outLen = keyLen;
done:
- mp_clear(&tmp);
- if (ret == MP_EXPTMOD_E) {
- WOLFSSL_MSG("RSA_FUNCTION MP_EXPTMOD_E: memory/config problem");
+ /* clear key data and free buffer */
+ if (keyBuf != NULL) {
+ ForceZero(keyBuf, keyBufSz);
}
+ XFREE(keyBuf, key->heap, DYNAMIC_TYPE_KEY);
+
+ if (key->alFd > 0) {
+ close(key->alFd);
+ key->alFd = WC_SOCK_NOTSET;
+ }
+ if (key->rdFd > 0) {
+ close(key->rdFd);
+ key->rdFd = WC_SOCK_NOTSET;
+ }
+
return ret;
}
-
-int wc_RsaPublicEncrypt(const byte* in, word32 inLen, byte* out, word32 outLen,
- RsaKey* key, RNG* rng)
+#else
+static int wc_RsaFunctionSync(const byte* in, word32 inLen, byte* out,
+ word32* outLen, int type, RsaKey* key, WC_RNG* rng)
{
- int sz, ret;
+#ifndef WOLFSSL_SP_MATH
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int* tmp;
+#ifdef WC_RSA_BLINDING
+ mp_int* rnd;
+ mp_int* rndi;
+#endif
+#else
+ mp_int tmp[1];
+#ifdef WC_RSA_BLINDING
+ mp_int rnd[1], rndi[1];
+#endif
+#endif
+ int ret = 0;
+ word32 keyLen = 0;
+#endif
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+#ifndef WOLFSSL_SP_NO_2048
+ if (mp_count_bits(&key->n) == 2048) {
+ switch(type) {
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ case RSA_PRIVATE_DECRYPT:
+ case RSA_PRIVATE_ENCRYPT:
+ #ifdef WC_RSA_BLINDING
+ if (rng == NULL)
+ return MISSING_RNG_E;
+ #endif
+ #ifndef RSA_LOW_MEM
+ return sp_RsaPrivate_2048(in, inLen, &key->d, &key->p, &key->q,
+ &key->dP, &key->dQ, &key->u, &key->n,
+ out, outLen);
+ #else
+ return sp_RsaPrivate_2048(in, inLen, &key->d, &key->p, &key->q,
+ NULL, NULL, NULL, &key->n, out, outLen);
+ #endif
+#endif
+ case RSA_PUBLIC_ENCRYPT:
+ case RSA_PUBLIC_DECRYPT:
+ return sp_RsaPublic_2048(in, inLen, &key->e, &key->n, out, outLen);
+ }
+ }
+#endif
+#ifndef WOLFSSL_SP_NO_3072
+ if (mp_count_bits(&key->n) == 3072) {
+ switch(type) {
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ case RSA_PRIVATE_DECRYPT:
+ case RSA_PRIVATE_ENCRYPT:
+ #ifdef WC_RSA_BLINDING
+ if (rng == NULL)
+ return MISSING_RNG_E;
+ #endif
+ #ifndef RSA_LOW_MEM
+ return sp_RsaPrivate_3072(in, inLen, &key->d, &key->p, &key->q,
+ &key->dP, &key->dQ, &key->u, &key->n,
+ out, outLen);
+ #else
+ return sp_RsaPrivate_3072(in, inLen, &key->d, &key->p, &key->q,
+ NULL, NULL, NULL, &key->n, out, outLen);
+ #endif
+#endif
+ case RSA_PUBLIC_ENCRYPT:
+ case RSA_PUBLIC_DECRYPT:
+ return sp_RsaPublic_3072(in, inLen, &key->e, &key->n, out, outLen);
+ }
+ }
+#endif
+#ifdef WOLFSSL_SP_4096
+ if (mp_count_bits(&key->n) == 4096) {
+ switch(type) {
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ case RSA_PRIVATE_DECRYPT:
+ case RSA_PRIVATE_ENCRYPT:
+ #ifdef WC_RSA_BLINDING
+ if (rng == NULL)
+ return MISSING_RNG_E;
+ #endif
+ #ifndef RSA_LOW_MEM
+ return sp_RsaPrivate_4096(in, inLen, &key->d, &key->p, &key->q,
+ &key->dP, &key->dQ, &key->u, &key->n,
+ out, outLen);
+ #else
+ return sp_RsaPrivate_4096(in, inLen, &key->d, &key->p, &key->q,
+ NULL, NULL, NULL, &key->n, out, outLen);
+ #endif
+#endif
+ case RSA_PUBLIC_ENCRYPT:
+ case RSA_PUBLIC_DECRYPT:
+ return sp_RsaPublic_4096(in, inLen, &key->e, &key->n, out, outLen);
+ }
+ }
+#endif
+#endif /* WOLFSSL_HAVE_SP_RSA */
-#ifdef HAVE_CAVIUM
- if (key->magic == WOLFSSL_RSA_CAVIUM_MAGIC)
- return CaviumRsaPublicEncrypt(in, inLen, out, outLen, key);
+#ifdef WOLFSSL_SP_MATH
+ (void)rng;
+ WOLFSSL_MSG("SP Key Size Error");
+ return WC_KEY_SIZE_E;
+#else
+ (void)rng;
+
+#ifdef WOLFSSL_SMALL_STACK
+ tmp = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_RSA);
+ if (tmp == NULL)
+ return MEMORY_E;
+#ifdef WC_RSA_BLINDING
+ rnd = (mp_int*)XMALLOC(sizeof(mp_int) * 2, key->heap, DYNAMIC_TYPE_RSA);
+ if (rnd == NULL) {
+ XFREE(tmp, key->heap, DYNAMIC_TYPE_RSA);
+ return MEMORY_E;
+ }
+ rndi = rnd + 1;
+#endif /* WC_RSA_BLINDING */
+#endif /* WOLFSSL_SMALL_STACK */
+
+ if (mp_init(tmp) != MP_OKAY)
+ ret = MP_INIT_E;
+
+#ifdef WC_RSA_BLINDING
+ if (ret == 0) {
+ if (type == RSA_PRIVATE_DECRYPT || type == RSA_PRIVATE_ENCRYPT) {
+ if (mp_init_multi(rnd, rndi, NULL, NULL, NULL, NULL) != MP_OKAY) {
+ mp_clear(tmp);
+ ret = MP_INIT_E;
+ }
+ }
+ }
#endif
- sz = mp_unsigned_bin_size(&key->n);
- if (sz > (int)outLen)
- return RSA_BUFFER_E;
+#ifndef TEST_UNPAD_CONSTANT_TIME
+ if (ret == 0 && mp_read_unsigned_bin(tmp, (byte*)in, inLen) != MP_OKAY)
+ ret = MP_READ_E;
+
+ if (ret == 0) {
+ switch(type) {
+ #ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ case RSA_PRIVATE_DECRYPT:
+ case RSA_PRIVATE_ENCRYPT:
+ {
+ #if defined(WC_RSA_BLINDING) && !defined(WC_NO_RNG)
+ /* blind */
+ ret = mp_rand(rnd, get_digit_count(&key->n), rng);
+
+ /* rndi = 1/rnd mod n */
+ if (ret == 0 && mp_invmod(rnd, &key->n, rndi) != MP_OKAY)
+ ret = MP_INVMOD_E;
+
+ /* rnd = rnd^e */
+ if (ret == 0 && mp_exptmod(rnd, &key->e, &key->n, rnd) != MP_OKAY)
+ ret = MP_EXPTMOD_E;
+
+ /* tmp = tmp*rnd mod n */
+ if (ret == 0 && mp_mulmod(tmp, rnd, &key->n, tmp) != MP_OKAY)
+ ret = MP_MULMOD_E;
+ #endif /* WC_RSA_BLINDING && !WC_NO_RNG */
- if (inLen > (word32)(sz - RSA_MIN_PAD_SZ))
- return RSA_BUFFER_E;
+ #ifdef RSA_LOW_MEM /* half as much memory but twice as slow */
+ if (ret == 0 && mp_exptmod(tmp, &key->d, &key->n, tmp) != MP_OKAY)
+ ret = MP_EXPTMOD_E;
+ #else
+ if (ret == 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ mp_int* tmpa;
+ mp_int* tmpb = NULL;
+ #else
+ mp_int tmpa[1], tmpb[1];
+ #endif
+ int cleara = 0, clearb = 0;
+
+ #ifdef WOLFSSL_SMALL_STACK
+ tmpa = (mp_int*)XMALLOC(sizeof(mp_int) * 2,
+ key->heap, DYNAMIC_TYPE_RSA);
+ if (tmpa != NULL)
+ tmpb = tmpa + 1;
+ else
+ ret = MEMORY_E;
+ #endif
+
+ if (ret == 0) {
+ if (mp_init(tmpa) != MP_OKAY)
+ ret = MP_INIT_E;
+ else
+ cleara = 1;
+ }
+
+ if (ret == 0) {
+ if (mp_init(tmpb) != MP_OKAY)
+ ret = MP_INIT_E;
+ else
+ clearb = 1;
+ }
+
+ /* tmpa = tmp^dP mod p */
+ if (ret == 0 && mp_exptmod(tmp, &key->dP, &key->p,
+ tmpa) != MP_OKAY)
+ ret = MP_EXPTMOD_E;
+
+ /* tmpb = tmp^dQ mod q */
+ if (ret == 0 && mp_exptmod(tmp, &key->dQ, &key->q,
+ tmpb) != MP_OKAY)
+ ret = MP_EXPTMOD_E;
+
+ /* tmp = (tmpa - tmpb) * qInv (mod p) */
+ if (ret == 0 && mp_sub(tmpa, tmpb, tmp) != MP_OKAY)
+ ret = MP_SUB_E;
+
+ if (ret == 0 && mp_mulmod(tmp, &key->u, &key->p,
+ tmp) != MP_OKAY)
+ ret = MP_MULMOD_E;
+
+ /* tmp = tmpb + q * tmp */
+ if (ret == 0 && mp_mul(tmp, &key->q, tmp) != MP_OKAY)
+ ret = MP_MUL_E;
+
+ if (ret == 0 && mp_add(tmp, tmpb, tmp) != MP_OKAY)
+ ret = MP_ADD_E;
+
+ #ifdef WOLFSSL_SMALL_STACK
+ if (tmpa != NULL)
+ #endif
+ {
+ if (cleara)
+ mp_clear(tmpa);
+ if (clearb)
+ mp_clear(tmpb);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(tmpa, key->heap, DYNAMIC_TYPE_RSA);
+ #endif
+ }
+ } /* tmpa/b scope */
+ #endif /* RSA_LOW_MEM */
- ret = wc_RsaPad(in, inLen, out, sz, RSA_BLOCK_TYPE_2, rng);
- if (ret != 0)
- return ret;
+ #ifdef WC_RSA_BLINDING
+ /* unblind */
+ if (ret == 0 && mp_mulmod(tmp, rndi, &key->n, tmp) != MP_OKAY)
+ ret = MP_MULMOD_E;
+ #endif /* WC_RSA_BLINDING */
+
+ break;
+ }
+ #endif
+ case RSA_PUBLIC_ENCRYPT:
+ case RSA_PUBLIC_DECRYPT:
+ #ifdef WOLFSSL_XILINX_CRYPT
+ ret = wc_RsaFunctionXil(in, inLen, out, outLen, type, key, rng);
+ #else
+ if (mp_exptmod_nct(tmp, &key->e, &key->n, tmp) != MP_OKAY)
+ ret = MP_EXPTMOD_E;
+ #endif
+ break;
+ default:
+ ret = RSA_WRONG_TYPE_E;
+ break;
+ }
+ }
- if ((ret = wc_RsaFunction(out, sz, out, &outLen,
- RSA_PUBLIC_ENCRYPT, key)) < 0)
- sz = ret;
+ if (ret == 0) {
+ keyLen = wc_RsaEncryptSize(key);
+ if (keyLen > *outLen)
+ ret = RSA_BUFFER_E;
+ }
+ if (ret == 0) {
+ *outLen = keyLen;
+ if (mp_to_unsigned_bin_len(tmp, out, keyLen) != MP_OKAY)
+ ret = MP_TO_E;
+ }
+#else
+ (void)type;
+ (void)key;
+ (void)keyLen;
+ XMEMCPY(out, in, inLen);
+ *outLen = inLen;
+#endif
- return sz;
+ mp_clear(tmp);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(tmp, key->heap, DYNAMIC_TYPE_RSA);
+#endif
+#ifdef WC_RSA_BLINDING
+ if (type == RSA_PRIVATE_DECRYPT || type == RSA_PRIVATE_ENCRYPT) {
+ mp_clear(rndi);
+ mp_clear(rnd);
+ }
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(rnd, key->heap, DYNAMIC_TYPE_RSA);
+#endif
+#endif /* WC_RSA_BLINDING */
+ return ret;
+#endif /* WOLFSSL_SP_MATH */
}
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA)
+static int wc_RsaFunctionAsync(const byte* in, word32 inLen, byte* out,
+ word32* outLen, int type, RsaKey* key, WC_RNG* rng)
+{
+ int ret = 0;
+
+ (void)rng;
+
+#ifdef WOLFSSL_ASYNC_CRYPT_TEST
+ if (wc_AsyncTestInit(&key->asyncDev, ASYNC_TEST_RSA_FUNC)) {
+ WC_ASYNC_TEST* testDev = &key->asyncDev.test;
+ testDev->rsaFunc.in = in;
+ testDev->rsaFunc.inSz = inLen;
+ testDev->rsaFunc.out = out;
+ testDev->rsaFunc.outSz = outLen;
+ testDev->rsaFunc.type = type;
+ testDev->rsaFunc.key = key;
+ testDev->rsaFunc.rng = rng;
+ return WC_PENDING_E;
+ }
+#endif /* WOLFSSL_ASYNC_CRYPT_TEST */
+
+ switch(type) {
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ case RSA_PRIVATE_DECRYPT:
+ case RSA_PRIVATE_ENCRYPT:
+ #ifdef HAVE_CAVIUM
+ key->dataLen = key->n.raw.len;
+ ret = NitroxRsaExptMod(in, inLen,
+ key->d.raw.buf, key->d.raw.len,
+ key->n.raw.buf, key->n.raw.len,
+ out, outLen, key);
+ #elif defined(HAVE_INTEL_QA)
+ #ifdef RSA_LOW_MEM
+ ret = IntelQaRsaPrivate(&key->asyncDev, in, inLen,
+ &key->d.raw, &key->n.raw,
+ out, outLen);
+ #else
+ ret = IntelQaRsaCrtPrivate(&key->asyncDev, in, inLen,
+ &key->p.raw, &key->q.raw,
+ &key->dP.raw, &key->dQ.raw,
+ &key->u.raw,
+ out, outLen);
+ #endif
+ #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+ ret = wc_RsaFunctionSync(in, inLen, out, outLen, type, key, rng);
+ #endif
+ break;
+#endif
+ case RSA_PUBLIC_ENCRYPT:
+ case RSA_PUBLIC_DECRYPT:
+ #ifdef HAVE_CAVIUM
+ key->dataLen = key->n.raw.len;
+ ret = NitroxRsaExptMod(in, inLen,
+ key->e.raw.buf, key->e.raw.len,
+ key->n.raw.buf, key->n.raw.len,
+ out, outLen, key);
+ #elif defined(HAVE_INTEL_QA)
+ ret = IntelQaRsaPublic(&key->asyncDev, in, inLen,
+ &key->e.raw, &key->n.raw,
+ out, outLen);
+ #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+ ret = wc_RsaFunctionSync(in, inLen, out, outLen, type, key, rng);
+ #endif
+ break;
-int wc_RsaPrivateDecryptInline(byte* in, word32 inLen, byte** out, RsaKey* key)
+ default:
+ ret = RSA_WRONG_TYPE_E;
+ }
+
+ return ret;
+}
+#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_RSA */
+
+#if defined(WC_RSA_DIRECT) || defined(WC_RSA_NO_PADDING)
+/* Function that does the RSA operation directly with no padding.
+ *
+ * in buffer to do operation on
+ * inLen length of input buffer
+ * out buffer to hold results
+ * outSz gets set to size of result buffer. Should be passed in as length
+ * of out buffer. If the pointer "out" is null then outSz gets set to
+ * the expected buffer size needed and LENGTH_ONLY_E gets returned.
+ * key RSA key to use for encrypt/decrypt
+ * type if using private or public key {RSA_PUBLIC_ENCRYPT,
+ * RSA_PUBLIC_DECRYPT, RSA_PRIVATE_ENCRYPT, RSA_PRIVATE_DECRYPT}
+ * rng wolfSSL RNG to use if needed
+ *
+ * returns size of result on success
+ */
+int wc_RsaDirect(byte* in, word32 inLen, byte* out, word32* outSz,
+ RsaKey* key, int type, WC_RNG* rng)
{
int ret;
-#ifdef HAVE_CAVIUM
- if (key->magic == WOLFSSL_RSA_CAVIUM_MAGIC) {
- ret = CaviumRsaPrivateDecrypt(in, inLen, in, inLen, key);
- if (ret > 0)
- *out = in;
- return ret;
+ if (in == NULL || outSz == NULL || key == NULL) {
+ return BAD_FUNC_ARG;
}
-#endif
- if ((ret = wc_RsaFunction(in, inLen, in, &inLen, RSA_PRIVATE_DECRYPT, key))
- < 0) {
+ /* sanity check on type of RSA operation */
+ switch (type) {
+ case RSA_PUBLIC_ENCRYPT:
+ case RSA_PUBLIC_DECRYPT:
+ case RSA_PRIVATE_ENCRYPT:
+ case RSA_PRIVATE_DECRYPT:
+ break;
+ default:
+ WOLFSSL_MSG("Bad RSA type");
+ return BAD_FUNC_ARG;
+ }
+
+ if ((ret = wc_RsaEncryptSize(key)) < 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ if (inLen != (word32)ret) {
+ WOLFSSL_MSG("Bad input length. Should be RSA key size");
+ return BAD_FUNC_ARG;
+ }
+
+ if (out == NULL) {
+ *outSz = inLen;
+ return LENGTH_ONLY_E;
+ }
+
+ switch (key->state) {
+ case RSA_STATE_NONE:
+ case RSA_STATE_ENCRYPT_PAD:
+ case RSA_STATE_ENCRYPT_EXPTMOD:
+ case RSA_STATE_DECRYPT_EXPTMOD:
+ case RSA_STATE_DECRYPT_UNPAD:
+ key->state = (type == RSA_PRIVATE_ENCRYPT ||
+ type == RSA_PUBLIC_ENCRYPT) ? RSA_STATE_ENCRYPT_EXPTMOD:
+ RSA_STATE_DECRYPT_EXPTMOD;
+
+ key->dataLen = *outSz;
+
+ ret = wc_RsaFunction(in, inLen, out, &key->dataLen, type, key, rng);
+ if (ret >= 0 || ret == WC_PENDING_E) {
+ key->state = (type == RSA_PRIVATE_ENCRYPT ||
+ type == RSA_PUBLIC_ENCRYPT) ? RSA_STATE_ENCRYPT_RES:
+ RSA_STATE_DECRYPT_RES;
+ }
+ if (ret < 0) {
+ break;
+ }
+
+ FALL_THROUGH;
+
+ case RSA_STATE_ENCRYPT_RES:
+ case RSA_STATE_DECRYPT_RES:
+ ret = key->dataLen;
+ break;
+
+ default:
+ ret = BAD_STATE_E;
+ }
+
+ /* if async pending then skip cleanup*/
+ if (ret == WC_PENDING_E
+ #ifdef WC_RSA_NONBLOCK
+ || ret == FP_WOULDBLOCK
+ #endif
+ ) {
return ret;
}
-
- return RsaUnPad(in, inLen, out, RSA_BLOCK_TYPE_2);
+
+ key->state = RSA_STATE_NONE;
+ wc_RsaCleanup(key);
+
+ return ret;
}
+#endif /* WC_RSA_DIRECT || WC_RSA_NO_PADDING */
+#if defined(WOLFSSL_CRYPTOCELL)
+static int cc310_RsaPublicEncrypt(const byte* in, word32 inLen, byte* out,
+ word32 outLen, RsaKey* key)
+{
+ CRYSError_t ret = 0;
+ CRYS_RSAPrimeData_t primeData;
+ int modulusSize = wc_RsaEncryptSize(key);
-int wc_RsaPrivateDecrypt(const byte* in, word32 inLen, byte* out, word32 outLen,
- RsaKey* key)
+ /* The out buffer must be at least modulus size bytes long. */
+ if (outLen < modulusSize)
+ return BAD_FUNC_ARG;
+
+ ret = CRYS_RSA_PKCS1v15_Encrypt(&wc_rndState,
+ wc_rndGenVectFunc,
+ &key->ctx.pubKey,
+ &primeData,
+ (byte*)in,
+ inLen,
+ out);
+
+ if (ret != SA_SILIB_RET_OK){
+ WOLFSSL_MSG("CRYS_RSA_PKCS1v15_Encrypt failed");
+ return -1;
+ }
+
+ return modulusSize;
+}
+static int cc310_RsaPublicDecrypt(const byte* in, word32 inLen, byte* out,
+ word32 outLen, RsaKey* key)
{
- int plainLen;
- byte* tmp;
- byte* pad = 0;
+ CRYSError_t ret = 0;
+ CRYS_RSAPrimeData_t primeData;
+ uint16_t actualOutLen = outLen;
+
+ ret = CRYS_RSA_PKCS1v15_Decrypt(&key->ctx.privKey,
+ &primeData,
+ (byte*)in,
+ inLen,
+ out,
+ &actualOutLen);
+
+ if (ret != SA_SILIB_RET_OK){
+ WOLFSSL_MSG("CRYS_RSA_PKCS1v15_Decrypt failed");
+ return -1;
+ }
+ return actualOutLen;
+}
-#ifdef HAVE_CAVIUM
- if (key->magic == WOLFSSL_RSA_CAVIUM_MAGIC)
- return CaviumRsaPrivateDecrypt(in, inLen, out, outLen, key);
-#endif
+int cc310_RsaSSL_Sign(const byte* in, word32 inLen, byte* out,
+ word32 outLen, RsaKey* key, CRYS_RSA_HASH_OpMode_t mode)
+{
+ CRYSError_t ret = 0;
+ uint16_t actualOutLen = outLen*sizeof(byte);
+ CRYS_RSAPrivUserContext_t contextPrivate;
+
+ ret = CRYS_RSA_PKCS1v15_Sign(&wc_rndState,
+ wc_rndGenVectFunc,
+ &contextPrivate,
+ &key->ctx.privKey,
+ mode,
+ (byte*)in,
+ inLen,
+ out,
+ &actualOutLen);
+
+ if (ret != SA_SILIB_RET_OK){
+ WOLFSSL_MSG("CRYS_RSA_PKCS1v15_Sign failed");
+ return -1;
+ }
+ return actualOutLen;
+}
- tmp = (byte*)XMALLOC(inLen, key->heap, DYNAMIC_TYPE_RSA);
- if (tmp == NULL) {
- return MEMORY_E;
+int cc310_RsaSSL_Verify(const byte* in, word32 inLen, byte* sig,
+ RsaKey* key, CRYS_RSA_HASH_OpMode_t mode)
+{
+ CRYSError_t ret = 0;
+ CRYS_RSAPubUserContext_t contextPub;
+
+ /* verify the signature in the sig pointer */
+ ret = CRYS_RSA_PKCS1v15_Verify(&contextPub,
+ &key->ctx.pubKey,
+ mode,
+ (byte*)in,
+ inLen,
+ sig);
+
+ if (ret != SA_SILIB_RET_OK){
+ WOLFSSL_MSG("CRYS_RSA_PKCS1v15_Verify failed");
+ return -1;
}
- XMEMCPY(tmp, in, inLen);
+ return ret;
+}
+#endif /* WOLFSSL_CRYPTOCELL */
- if ( (plainLen = wc_RsaPrivateDecryptInline(tmp, inLen, &pad, key) ) < 0) {
- XFREE(tmp, key->heap, DYNAMIC_TYPE_RSA);
- return plainLen;
+int wc_RsaFunction(const byte* in, word32 inLen, byte* out,
+ word32* outLen, int type, RsaKey* key, WC_RNG* rng)
+{
+ int ret = 0;
+
+ if (key == NULL || in == NULL || inLen == 0 || out == NULL ||
+ outLen == NULL || *outLen == 0 || type == RSA_TYPE_UNKNOWN) {
+ return BAD_FUNC_ARG;
+ }
+
+#ifdef WOLF_CRYPTO_CB
+ if (key->devId != INVALID_DEVID) {
+ ret = wc_CryptoCb_Rsa(in, inLen, out, outLen, type, key, rng);
+ if (ret != CRYPTOCB_UNAVAILABLE)
+ return ret;
+ /* fall-through when unavailable */
+ ret = 0; /* reset error code and try using software */
+ }
+#endif
+
+#ifndef TEST_UNPAD_CONSTANT_TIME
+#ifndef NO_RSA_BOUNDS_CHECK
+ if (type == RSA_PRIVATE_DECRYPT &&
+ key->state == RSA_STATE_DECRYPT_EXPTMOD) {
+
+ /* Check that 1 < in < n-1. (Requirement of 800-56B.) */
+#ifdef WOLFSSL_SMALL_STACK
+ mp_int* c;
+#else
+ mp_int c[1];
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ c = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_RSA);
+ if (c == NULL)
+ ret = MEMORY_E;
+#endif
+
+ if (mp_init(c) != MP_OKAY)
+ ret = MEMORY_E;
+ if (ret == 0) {
+ if (mp_read_unsigned_bin(c, in, inLen) != 0)
+ ret = MP_READ_E;
+ }
+ if (ret == 0) {
+ /* check c > 1 */
+ if (mp_cmp_d(c, 1) != MP_GT)
+ ret = RSA_OUT_OF_RANGE_E;
+ }
+ if (ret == 0) {
+ /* add c+1 */
+ if (mp_add_d(c, 1, c) != MP_OKAY)
+ ret = MP_ADD_E;
+ }
+ if (ret == 0) {
+ /* check c+1 < n */
+ if (mp_cmp(c, &key->n) != MP_LT)
+ ret = RSA_OUT_OF_RANGE_E;
+ }
+ mp_clear(c);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(c, key->heap, DYNAMIC_TYPE_RSA);
+#endif
+
+ if (ret != 0)
+ return ret;
+ }
+#endif /* NO_RSA_BOUNDS_CHECK */
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA)
+ if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA &&
+ key->n.raw.len > 0) {
+ ret = wc_RsaFunctionAsync(in, inLen, out, outLen, type, key, rng);
+ }
+ else
+#endif
+#ifdef WC_RSA_NONBLOCK
+ if (key->nb) {
+ ret = wc_RsaFunctionNonBlock(in, inLen, out, outLen, type, key);
}
- if (plainLen > (int)outLen)
- plainLen = BAD_FUNC_ARG;
else
- XMEMCPY(out, pad, plainLen);
+#endif
+ {
+ ret = wc_RsaFunctionSync(in, inLen, out, outLen, type, key, rng);
+ }
- ForceZero(tmp, inLen);
- XFREE(tmp, key->heap, DYNAMIC_TYPE_RSA);
+ /* handle error */
+ if (ret < 0 && ret != WC_PENDING_E
+ #ifdef WC_RSA_NONBLOCK
+ && ret != FP_WOULDBLOCK
+ #endif
+ ) {
+ if (ret == MP_EXPTMOD_E) {
+ /* This can happen due to incorrectly set FP_MAX_BITS or missing XREALLOC */
+ WOLFSSL_MSG("RSA_FUNCTION MP_EXPTMOD_E: memory/config problem");
+ }
+
+ key->state = RSA_STATE_NONE;
+ wc_RsaCleanup(key);
+ }
- return plainLen;
+ return ret;
}
-/* for Rsa Verify */
-int wc_RsaSSL_VerifyInline(byte* in, word32 inLen, byte** out, RsaKey* key)
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
+/* Internal Wrappers */
+/* Gives the option of choosing padding type
+ in : input to be encrypted
+ inLen: length of input buffer
+ out: encrypted output
+ outLen: length of encrypted output buffer
+ key : wolfSSL initialized RSA key struct
+ rng : wolfSSL initialized random number struct
+ rsa_type : type of RSA: RSA_PUBLIC_ENCRYPT, RSA_PUBLIC_DECRYPT,
+ RSA_PRIVATE_ENCRYPT or RSA_PRIVATE_DECRYPT
+ pad_value: RSA_BLOCK_TYPE_1 or RSA_BLOCK_TYPE_2
+ pad_type : type of padding: WC_RSA_PKCSV15_PAD, WC_RSA_OAEP_PAD,
+ WC_RSA_NO_PAD or WC_RSA_PSS_PAD
+ hash : type of hash algorithm to use found in wolfssl/wolfcrypt/hash.h
+ mgf : type of mask generation function to use
+ label : optional label
+ labelSz : size of optional label buffer
+ saltLen : Length of salt used in PSS
+ rng : random number generator */
+static int RsaPublicEncryptEx(const byte* in, word32 inLen, byte* out,
+ word32 outLen, RsaKey* key, int rsa_type,
+ byte pad_value, int pad_type,
+ enum wc_HashType hash, int mgf,
+ byte* label, word32 labelSz, int saltLen,
+ WC_RNG* rng)
{
- int ret;
+ int ret, sz;
+
+ if (in == NULL || inLen == 0 || out == NULL || key == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ sz = wc_RsaEncryptSize(key);
+ if (sz > (int)outLen) {
+ return RSA_BUFFER_E;
+ }
+
+ if (sz < RSA_MIN_PAD_SZ) {
+ return WC_KEY_SIZE_E;
+ }
+
+ if (inLen > (word32)(sz - RSA_MIN_PAD_SZ)) {
+#ifdef WC_RSA_NO_PADDING
+ /* In the case that no padding is used the input length can and should
+ * be the same size as the RSA key. */
+ if (pad_type != WC_RSA_NO_PAD)
+#endif
+ return RSA_BUFFER_E;
+ }
+
+ switch (key->state) {
+ case RSA_STATE_NONE:
+ case RSA_STATE_ENCRYPT_PAD:
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) && \
+ defined(HAVE_CAVIUM)
+ if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA &&
+ pad_type != WC_RSA_PSS_PAD && key->n.raw.buf) {
+ /* Async operations that include padding */
+ if (rsa_type == RSA_PUBLIC_ENCRYPT &&
+ pad_value == RSA_BLOCK_TYPE_2) {
+ key->state = RSA_STATE_ENCRYPT_RES;
+ key->dataLen = key->n.raw.len;
+ return NitroxRsaPublicEncrypt(in, inLen, out, outLen, key);
+ }
+ else if (rsa_type == RSA_PRIVATE_ENCRYPT &&
+ pad_value == RSA_BLOCK_TYPE_1) {
+ key->state = RSA_STATE_ENCRYPT_RES;
+ key->dataLen = key->n.raw.len;
+ return NitroxRsaSSL_Sign(in, inLen, out, outLen, key);
+ }
+ }
+ #elif defined(WOLFSSL_CRYPTOCELL)
+ if (rsa_type == RSA_PUBLIC_ENCRYPT &&
+ pad_value == RSA_BLOCK_TYPE_2) {
-#ifdef HAVE_CAVIUM
- if (key->magic == WOLFSSL_RSA_CAVIUM_MAGIC) {
- ret = CaviumRsaSSL_Verify(in, inLen, in, inLen, key);
- if (ret > 0)
- *out = in;
+ return cc310_RsaPublicEncrypt(in, inLen, out, outLen, key);
+ }
+ else if (rsa_type == RSA_PRIVATE_ENCRYPT &&
+ pad_value == RSA_BLOCK_TYPE_1) {
+ return cc310_RsaSSL_Sign(in, inLen, out, outLen, key,
+ cc310_hashModeRSA(hash, 0));
+ }
+ #endif /* WOLFSSL_CRYPTOCELL */
+
+ key->state = RSA_STATE_ENCRYPT_PAD;
+ ret = wc_RsaPad_ex(in, inLen, out, sz, pad_value, rng, pad_type, hash,
+ mgf, label, labelSz, saltLen, mp_count_bits(&key->n),
+ key->heap);
+ if (ret < 0) {
+ break;
+ }
+
+ key->state = RSA_STATE_ENCRYPT_EXPTMOD;
+ FALL_THROUGH;
+
+ case RSA_STATE_ENCRYPT_EXPTMOD:
+
+ key->dataLen = outLen;
+ ret = wc_RsaFunction(out, sz, out, &key->dataLen, rsa_type, key, rng);
+
+ if (ret >= 0 || ret == WC_PENDING_E) {
+ key->state = RSA_STATE_ENCRYPT_RES;
+ }
+ if (ret < 0) {
+ break;
+ }
+
+ FALL_THROUGH;
+
+ case RSA_STATE_ENCRYPT_RES:
+ ret = key->dataLen;
+ break;
+
+ default:
+ ret = BAD_STATE_E;
+ break;
+ }
+
+ /* if async pending then return and skip done cleanup below */
+ if (ret == WC_PENDING_E
+ #ifdef WC_RSA_NONBLOCK
+ || ret == FP_WOULDBLOCK
+ #endif
+ ) {
return ret;
}
+
+ key->state = RSA_STATE_NONE;
+ wc_RsaCleanup(key);
+
+ return ret;
+}
+
#endif
- if ((ret = wc_RsaFunction(in, inLen, in, &inLen, RSA_PUBLIC_DECRYPT, key))
- < 0) {
+/* Gives the option of choosing padding type
+ in : input to be decrypted
+ inLen: length of input buffer
+ out: decrypted message
+ outLen: length of decrypted message in bytes
+ outPtr: optional inline output pointer (if provided doing inline)
+ key : wolfSSL initialized RSA key struct
+ rsa_type : type of RSA: RSA_PUBLIC_ENCRYPT, RSA_PUBLIC_DECRYPT,
+ RSA_PRIVATE_ENCRYPT or RSA_PRIVATE_DECRYPT
+ pad_value: RSA_BLOCK_TYPE_1 or RSA_BLOCK_TYPE_2
+ pad_type : type of padding: WC_RSA_PKCSV15_PAD, WC_RSA_OAEP_PAD,
+ WC_RSA_NO_PAD, WC_RSA_PSS_PAD
+ hash : type of hash algorithm to use found in wolfssl/wolfcrypt/hash.h
+ mgf : type of mask generation function to use
+ label : optional label
+ labelSz : size of optional label buffer
+ saltLen : Length of salt used in PSS
+ rng : random number generator */
+static int RsaPrivateDecryptEx(byte* in, word32 inLen, byte* out,
+ word32 outLen, byte** outPtr, RsaKey* key,
+ int rsa_type, byte pad_value, int pad_type,
+ enum wc_HashType hash, int mgf,
+ byte* label, word32 labelSz, int saltLen,
+ WC_RNG* rng)
+{
+ int ret = RSA_WRONG_TYPE_E;
+ byte* pad = NULL;
+
+ if (in == NULL || inLen == 0 || out == NULL || key == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ switch (key->state) {
+ case RSA_STATE_NONE:
+ key->dataLen = inLen;
+
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) && \
+ defined(HAVE_CAVIUM)
+ /* Async operations that include padding */
+ if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA &&
+ pad_type != WC_RSA_PSS_PAD) {
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ if (rsa_type == RSA_PRIVATE_DECRYPT &&
+ pad_value == RSA_BLOCK_TYPE_2) {
+ key->state = RSA_STATE_DECRYPT_RES;
+ key->data = NULL;
+ return NitroxRsaPrivateDecrypt(in, inLen, out, &key->dataLen,
+ key);
+#endif
+ }
+ else if (rsa_type == RSA_PUBLIC_DECRYPT &&
+ pad_value == RSA_BLOCK_TYPE_1) {
+ key->state = RSA_STATE_DECRYPT_RES;
+ key->data = NULL;
+ return NitroxRsaSSL_Verify(in, inLen, out, &key->dataLen, key);
+ }
+ }
+ #elif defined(WOLFSSL_CRYPTOCELL)
+ if (rsa_type == RSA_PRIVATE_DECRYPT &&
+ pad_value == RSA_BLOCK_TYPE_2) {
+ ret = cc310_RsaPublicDecrypt(in, inLen, out, outLen, key);
+ if (outPtr != NULL)
+ *outPtr = out; /* for inline */
+ return ret;
+ }
+ else if (rsa_type == RSA_PUBLIC_DECRYPT &&
+ pad_value == RSA_BLOCK_TYPE_1) {
+ return cc310_RsaSSL_Verify(in, inLen, out, key,
+ cc310_hashModeRSA(hash, 0));
+ }
+ #endif /* WOLFSSL_CRYPTOCELL */
+
+
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_VERIFY_INLINE)
+ /* verify the tmp ptr is NULL, otherwise indicates bad state */
+ if (key->data != NULL) {
+ ret = BAD_STATE_E;
+ break;
+ }
+
+ /* if not doing this inline then allocate a buffer for it */
+ if (outPtr == NULL) {
+ key->data = (byte*)XMALLOC(inLen, key->heap,
+ DYNAMIC_TYPE_WOLF_BIGINT);
+ key->dataIsAlloc = 1;
+ if (key->data == NULL) {
+ ret = MEMORY_E;
+ break;
+ }
+ XMEMCPY(key->data, in, inLen);
+ }
+ else {
+ key->data = out;
+ }
+#endif
+
+ key->state = RSA_STATE_DECRYPT_EXPTMOD;
+ FALL_THROUGH;
+
+ case RSA_STATE_DECRYPT_EXPTMOD:
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_VERIFY_INLINE)
+ ret = wc_RsaFunction(key->data, inLen, key->data, &key->dataLen,
+ rsa_type, key, rng);
+#else
+ ret = wc_RsaFunction(in, inLen, out, &key->dataLen, rsa_type, key, rng);
+#endif
+
+ if (ret >= 0 || ret == WC_PENDING_E) {
+ key->state = RSA_STATE_DECRYPT_UNPAD;
+ }
+ if (ret < 0) {
+ break;
+ }
+
+ FALL_THROUGH;
+
+ case RSA_STATE_DECRYPT_UNPAD:
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_VERIFY_INLINE)
+ ret = wc_RsaUnPad_ex(key->data, key->dataLen, &pad, pad_value, pad_type,
+ hash, mgf, label, labelSz, saltLen,
+ mp_count_bits(&key->n), key->heap);
+#else
+ ret = wc_RsaUnPad_ex(out, key->dataLen, &pad, pad_value, pad_type, hash,
+ mgf, label, labelSz, saltLen,
+ mp_count_bits(&key->n), key->heap);
+#endif
+ if (rsa_type == RSA_PUBLIC_DECRYPT && ret > (int)outLen)
+ ret = RSA_BUFFER_E;
+ else if (ret >= 0 && pad != NULL) {
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_VERIFY_INLINE)
+ signed char c;
+#endif
+
+ /* only copy output if not inline */
+ if (outPtr == NULL) {
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_VERIFY_INLINE)
+ if (rsa_type == RSA_PRIVATE_DECRYPT) {
+ word32 i, j;
+ int start = (int)((size_t)pad - (size_t)key->data);
+
+ for (i = 0, j = 0; j < key->dataLen; j++) {
+ out[i] = key->data[j];
+ c = ctMaskGTE(j, start);
+ c &= ctMaskLT(i, outLen);
+ /* 0 - no add, -1 add */
+ i += (word32)((byte)(-c));
+ }
+ }
+ else
+#endif
+ {
+ XMEMCPY(out, pad, ret);
+ }
+ }
+ else
+ *outPtr = pad;
+
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY)
+ ret = ctMaskSelInt(ctMaskLTE(ret, outLen), ret, RSA_BUFFER_E);
+ ret = ctMaskSelInt(ctMaskNotEq(ret, 0), ret, RSA_BUFFER_E);
+#else
+ if (outLen < (word32)ret)
+ ret = RSA_BUFFER_E;
+#endif
+ }
+
+ key->state = RSA_STATE_DECRYPT_RES;
+ FALL_THROUGH;
+
+ case RSA_STATE_DECRYPT_RES:
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) && \
+ defined(HAVE_CAVIUM)
+ if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA &&
+ pad_type != WC_RSA_PSS_PAD) {
+ if (ret > 0) {
+ /* convert result */
+ byte* dataLen = (byte*)&key->dataLen;
+ ret = (dataLen[0] << 8) | (dataLen[1]);
+
+ if (outPtr)
+ *outPtr = in;
+ }
+ }
+ #endif
+ break;
+
+ default:
+ ret = BAD_STATE_E;
+ break;
+ }
+
+ /* if async pending then return and skip done cleanup below */
+ if (ret == WC_PENDING_E
+ #ifdef WC_RSA_NONBLOCK
+ || ret == FP_WOULDBLOCK
+ #endif
+ ) {
return ret;
}
-
- return RsaUnPad(in, inLen, out, RSA_BLOCK_TYPE_1);
+
+ key->state = RSA_STATE_NONE;
+ wc_RsaCleanup(key);
+
+ return ret;
+}
+
+
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
+/* Public RSA Functions */
+int wc_RsaPublicEncrypt(const byte* in, word32 inLen, byte* out, word32 outLen,
+ RsaKey* key, WC_RNG* rng)
+{
+ return RsaPublicEncryptEx(in, inLen, out, outLen, key,
+ RSA_PUBLIC_ENCRYPT, RSA_BLOCK_TYPE_2, WC_RSA_PKCSV15_PAD,
+ WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng);
+}
+
+
+#if !defined(WC_NO_RSA_OAEP) || defined(WC_RSA_NO_PADDING)
+int wc_RsaPublicEncrypt_ex(const byte* in, word32 inLen, byte* out,
+ word32 outLen, RsaKey* key, WC_RNG* rng, int type,
+ enum wc_HashType hash, int mgf, byte* label,
+ word32 labelSz)
+{
+ return RsaPublicEncryptEx(in, inLen, out, outLen, key, RSA_PUBLIC_ENCRYPT,
+ RSA_BLOCK_TYPE_2, type, hash, mgf, label, labelSz, 0, rng);
+}
+#endif /* WC_NO_RSA_OAEP */
+#endif
+
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+int wc_RsaPrivateDecryptInline(byte* in, word32 inLen, byte** out, RsaKey* key)
+{
+ WC_RNG* rng;
+#ifdef WC_RSA_BLINDING
+ rng = key->rng;
+#else
+ rng = NULL;
+#endif
+ return RsaPrivateDecryptEx(in, inLen, in, inLen, out, key,
+ RSA_PRIVATE_DECRYPT, RSA_BLOCK_TYPE_2, WC_RSA_PKCSV15_PAD,
+ WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng);
}
+#ifndef WC_NO_RSA_OAEP
+int wc_RsaPrivateDecryptInline_ex(byte* in, word32 inLen, byte** out,
+ RsaKey* key, int type, enum wc_HashType hash,
+ int mgf, byte* label, word32 labelSz)
+{
+ WC_RNG* rng;
+#ifdef WC_RSA_BLINDING
+ rng = key->rng;
+#else
+ rng = NULL;
+#endif
+ return RsaPrivateDecryptEx(in, inLen, in, inLen, out, key,
+ RSA_PRIVATE_DECRYPT, RSA_BLOCK_TYPE_2, type, hash,
+ mgf, label, labelSz, 0, rng);
+}
+#endif /* WC_NO_RSA_OAEP */
+
+
+int wc_RsaPrivateDecrypt(const byte* in, word32 inLen, byte* out,
+ word32 outLen, RsaKey* key)
+{
+ WC_RNG* rng;
+#ifdef WC_RSA_BLINDING
+ rng = key->rng;
+#else
+ rng = NULL;
+#endif
+ return RsaPrivateDecryptEx((byte*)in, inLen, out, outLen, NULL, key,
+ RSA_PRIVATE_DECRYPT, RSA_BLOCK_TYPE_2, WC_RSA_PKCSV15_PAD,
+ WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng);
+}
+
+#if !defined(WC_NO_RSA_OAEP) || defined(WC_RSA_NO_PADDING)
+int wc_RsaPrivateDecrypt_ex(const byte* in, word32 inLen, byte* out,
+ word32 outLen, RsaKey* key, int type,
+ enum wc_HashType hash, int mgf, byte* label,
+ word32 labelSz)
+{
+ WC_RNG* rng;
+#ifdef WC_RSA_BLINDING
+ rng = key->rng;
+#else
+ rng = NULL;
+#endif
+ return RsaPrivateDecryptEx((byte*)in, inLen, out, outLen, NULL, key,
+ RSA_PRIVATE_DECRYPT, RSA_BLOCK_TYPE_2, type, hash, mgf, label,
+ labelSz, 0, rng);
+}
+#endif /* WC_NO_RSA_OAEP || WC_RSA_NO_PADDING */
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+
+#if !defined(WOLFSSL_CRYPTOCELL)
+int wc_RsaSSL_VerifyInline(byte* in, word32 inLen, byte** out, RsaKey* key)
+{
+ WC_RNG* rng;
+#ifdef WC_RSA_BLINDING
+ rng = key->rng;
+#else
+ rng = NULL;
+#endif
+ return RsaPrivateDecryptEx(in, inLen, in, inLen, out, key,
+ RSA_PUBLIC_DECRYPT, RSA_BLOCK_TYPE_1, WC_RSA_PKCSV15_PAD,
+ WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng);
+}
+#endif
+
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
int wc_RsaSSL_Verify(const byte* in, word32 inLen, byte* out, word32 outLen,
- RsaKey* key)
+ RsaKey* key)
{
- int plainLen;
- byte* tmp;
- byte* pad = 0;
+ return wc_RsaSSL_Verify_ex(in, inLen, out, outLen, key , WC_RSA_PKCSV15_PAD);
+}
+
+int wc_RsaSSL_Verify_ex(const byte* in, word32 inLen, byte* out, word32 outLen,
+ RsaKey* key, int pad_type)
+{
+ WC_RNG* rng;
+
+ if (key == NULL) {
+ return BAD_FUNC_ARG;
+ }
-#ifdef HAVE_CAVIUM
- if (key->magic == WOLFSSL_RSA_CAVIUM_MAGIC)
- return CaviumRsaSSL_Verify(in, inLen, out, outLen, key);
+#ifdef WC_RSA_BLINDING
+ rng = key->rng;
+#else
+ rng = NULL;
#endif
- tmp = (byte*)XMALLOC(inLen, key->heap, DYNAMIC_TYPE_RSA);
- if (tmp == NULL) {
- return MEMORY_E;
+ return RsaPrivateDecryptEx((byte*)in, inLen, out, outLen, NULL, key,
+ RSA_PUBLIC_DECRYPT, RSA_BLOCK_TYPE_1, pad_type,
+ WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng);
+}
+#endif
+
+#ifdef WC_RSA_PSS
+/* Verify the message signed with RSA-PSS.
+ * The input buffer is reused for the output buffer.
+ * Salt length is equal to hash length.
+ *
+ * in Buffer holding encrypted data.
+ * inLen Length of data in buffer.
+ * out Pointer to address containing the PSS data.
+ * hash Hash algorithm.
+ * mgf Mask generation function.
+ * key Public RSA key.
+ * returns the length of the PSS data on success and negative indicates failure.
+ */
+int wc_RsaPSS_VerifyInline(byte* in, word32 inLen, byte** out,
+ enum wc_HashType hash, int mgf, RsaKey* key)
+{
+#ifndef WOLFSSL_PSS_SALT_LEN_DISCOVER
+ return wc_RsaPSS_VerifyInline_ex(in, inLen, out, hash, mgf,
+ RSA_PSS_SALT_LEN_DEFAULT, key);
+#else
+ return wc_RsaPSS_VerifyInline_ex(in, inLen, out, hash, mgf,
+ RSA_PSS_SALT_LEN_DISCOVER, key);
+#endif
+}
+
+/* Verify the message signed with RSA-PSS.
+ * The input buffer is reused for the output buffer.
+ *
+ * in Buffer holding encrypted data.
+ * inLen Length of data in buffer.
+ * out Pointer to address containing the PSS data.
+ * hash Hash algorithm.
+ * mgf Mask generation function.
+ * key Public RSA key.
+ * saltLen Length of salt used. RSA_PSS_SALT_LEN_DEFAULT (-1) indicates salt
+ * length is the same as the hash length. RSA_PSS_SALT_LEN_DISCOVER
+ * indicates salt length is determined from the data.
+ * returns the length of the PSS data on success and negative indicates failure.
+ */
+int wc_RsaPSS_VerifyInline_ex(byte* in, word32 inLen, byte** out,
+ enum wc_HashType hash, int mgf, int saltLen,
+ RsaKey* key)
+{
+ WC_RNG* rng;
+#ifdef WC_RSA_BLINDING
+ rng = key->rng;
+#else
+ rng = NULL;
+#endif
+ return RsaPrivateDecryptEx(in, inLen, in, inLen, out, key,
+ RSA_PUBLIC_DECRYPT, RSA_BLOCK_TYPE_1, WC_RSA_PSS_PAD,
+ hash, mgf, NULL, 0, saltLen, rng);
+}
+
+/* Verify the message signed with RSA-PSS.
+ * Salt length is equal to hash length.
+ *
+ * in Buffer holding encrypted data.
+ * inLen Length of data in buffer.
+ * out Pointer to address containing the PSS data.
+ * hash Hash algorithm.
+ * mgf Mask generation function.
+ * key Public RSA key.
+ * returns the length of the PSS data on success and negative indicates failure.
+ */
+int wc_RsaPSS_Verify(byte* in, word32 inLen, byte* out, word32 outLen,
+ enum wc_HashType hash, int mgf, RsaKey* key)
+{
+#ifndef WOLFSSL_PSS_SALT_LEN_DISCOVER
+ return wc_RsaPSS_Verify_ex(in, inLen, out, outLen, hash, mgf,
+ RSA_PSS_SALT_LEN_DEFAULT, key);
+#else
+ return wc_RsaPSS_Verify_ex(in, inLen, out, outLen, hash, mgf,
+ RSA_PSS_SALT_LEN_DISCOVER, key);
+#endif
+}
+
+/* Verify the message signed with RSA-PSS.
+ *
+ * in Buffer holding encrypted data.
+ * inLen Length of data in buffer.
+ * out Pointer to address containing the PSS data.
+ * hash Hash algorithm.
+ * mgf Mask generation function.
+ * key Public RSA key.
+ * saltLen Length of salt used. RSA_PSS_SALT_LEN_DEFAULT (-1) indicates salt
+ * length is the same as the hash length. RSA_PSS_SALT_LEN_DISCOVER
+ * indicates salt length is determined from the data.
+ * returns the length of the PSS data on success and negative indicates failure.
+ */
+int wc_RsaPSS_Verify_ex(byte* in, word32 inLen, byte* out, word32 outLen,
+ enum wc_HashType hash, int mgf, int saltLen,
+ RsaKey* key)
+{
+ WC_RNG* rng;
+#ifdef WC_RSA_BLINDING
+ rng = key->rng;
+#else
+ rng = NULL;
+#endif
+ return RsaPrivateDecryptEx(in, inLen, out, outLen, NULL, key,
+ RSA_PUBLIC_DECRYPT, RSA_BLOCK_TYPE_1, WC_RSA_PSS_PAD,
+ hash, mgf, NULL, 0, saltLen, rng);
+}
+
+
+/* Checks the PSS data to ensure that the signature matches.
+ * Salt length is equal to hash length.
+ *
+ * in Hash of the data that is being verified.
+ * inSz Length of hash.
+ * sig Buffer holding PSS data.
+ * sigSz Size of PSS data.
+ * hashType Hash algorithm.
+ * returns BAD_PADDING_E when the PSS data is invalid, BAD_FUNC_ARG when
+ * NULL is passed in to in or sig or inSz is not the same as the hash
+ * algorithm length and 0 on success.
+ */
+int wc_RsaPSS_CheckPadding(const byte* in, word32 inSz, byte* sig,
+ word32 sigSz, enum wc_HashType hashType)
+{
+ return wc_RsaPSS_CheckPadding_ex(in, inSz, sig, sigSz, hashType, inSz, 0);
+}
+
+/* Checks the PSS data to ensure that the signature matches.
+ *
+ * in Hash of the data that is being verified.
+ * inSz Length of hash.
+ * sig Buffer holding PSS data.
+ * sigSz Size of PSS data.
+ * hashType Hash algorithm.
+ * saltLen Length of salt used. RSA_PSS_SALT_LEN_DEFAULT (-1) indicates salt
+ * length is the same as the hash length. RSA_PSS_SALT_LEN_DISCOVER
+ * indicates salt length is determined from the data.
+ * returns BAD_PADDING_E when the PSS data is invalid, BAD_FUNC_ARG when
+ * NULL is passed in to in or sig or inSz is not the same as the hash
+ * algorithm length and 0 on success.
+ */
+int wc_RsaPSS_CheckPadding_ex(const byte* in, word32 inSz, byte* sig,
+ word32 sigSz, enum wc_HashType hashType,
+ int saltLen, int bits)
+{
+ int ret = 0;
+#ifndef WOLFSSL_PSS_LONG_SALT
+ byte sigCheck[WC_MAX_DIGEST_SIZE*2 + RSA_PSS_PAD_SZ];
+#else
+ byte *sigCheck = NULL;
+#endif
+
+ (void)bits;
+
+ if (in == NULL || sig == NULL ||
+ inSz != (word32)wc_HashGetDigestSize(hashType)) {
+ ret = BAD_FUNC_ARG;
}
- XMEMCPY(tmp, in, inLen);
+ if (ret == 0) {
+ if (saltLen == RSA_PSS_SALT_LEN_DEFAULT) {
+ saltLen = inSz;
+ #ifdef WOLFSSL_SHA512
+ /* See FIPS 186-4 section 5.5 item (e). */
+ if (bits == 1024 && inSz == WC_SHA512_DIGEST_SIZE) {
+ saltLen = RSA_PSS_SALT_MAX_SZ;
+ }
+ #endif
+ }
+#ifndef WOLFSSL_PSS_LONG_SALT
+ else if ((word32)saltLen > inSz) {
+ ret = PSS_SALTLEN_E;
+ }
+#endif
+#ifndef WOLFSSL_PSS_SALT_LEN_DISCOVER
+ else if (saltLen < RSA_PSS_SALT_LEN_DEFAULT) {
+ ret = PSS_SALTLEN_E;
+ }
+#else
+ else if (saltLen == RSA_PSS_SALT_LEN_DISCOVER) {
+ saltLen = sigSz - inSz;
+ if (saltLen < 0) {
+ ret = PSS_SALTLEN_E;
+ }
+ }
+ else if (saltLen < RSA_PSS_SALT_LEN_DISCOVER) {
+ ret = PSS_SALTLEN_E;
+ }
+#endif
+ }
- if ( (plainLen = wc_RsaSSL_VerifyInline(tmp, inLen, &pad, key) ) < 0) {
- XFREE(tmp, key->heap, DYNAMIC_TYPE_RSA);
- return plainLen;
+ /* Sig = Salt | Exp Hash */
+ if (ret == 0) {
+ if (sigSz != inSz + saltLen) {
+ ret = PSS_SALTLEN_E;
+ }
}
- if (plainLen > (int)outLen)
- plainLen = BAD_FUNC_ARG;
- else
- XMEMCPY(out, pad, plainLen);
+#ifdef WOLFSSL_PSS_LONG_SALT
+ if (ret == 0) {
+ sigCheck = (byte*)XMALLOC(RSA_PSS_PAD_SZ + inSz + saltLen, NULL,
+ DYNAMIC_TYPE_RSA_BUFFER);
+ if (sigCheck == NULL) {
+ ret = MEMORY_E;
+ }
+ }
+#endif
- ForceZero(tmp, inLen);
- XFREE(tmp, key->heap, DYNAMIC_TYPE_RSA);
+ /* Exp Hash = HASH(8 * 0x00 | Message Hash | Salt) */
+ if (ret == 0) {
+ XMEMSET(sigCheck, 0, RSA_PSS_PAD_SZ);
+ XMEMCPY(sigCheck + RSA_PSS_PAD_SZ, in, inSz);
+ XMEMCPY(sigCheck + RSA_PSS_PAD_SZ + inSz, sig, saltLen);
+ ret = wc_Hash(hashType, sigCheck, RSA_PSS_PAD_SZ + inSz + saltLen,
+ sigCheck, inSz);
+ }
+ if (ret == 0) {
+ if (XMEMCMP(sigCheck, sig + saltLen, inSz) != 0) {
+ WOLFSSL_MSG("RsaPSS_CheckPadding: Padding Error");
+ ret = BAD_PADDING_E;
+ }
+ }
- return plainLen;
+#ifdef WOLFSSL_PSS_LONG_SALT
+ if (sigCheck != NULL) {
+ XFREE(sigCheck, NULL, DYNAMIC_TYPE_RSA_BUFFER);
+ }
+#endif
+ return ret;
}
-/* for Rsa Sign */
-int wc_RsaSSL_Sign(const byte* in, word32 inLen, byte* out, word32 outLen,
- RsaKey* key, RNG* rng)
+/* Verify the message signed with RSA-PSS.
+ * The input buffer is reused for the output buffer.
+ * Salt length is equal to hash length.
+ *
+ * in Buffer holding encrypted data.
+ * inLen Length of data in buffer.
+ * out Pointer to address containing the PSS data.
+ * digest Hash of the data that is being verified.
+ * digestLen Length of hash.
+ * hash Hash algorithm.
+ * mgf Mask generation function.
+ * key Public RSA key.
+ * returns the length of the PSS data on success and negative indicates failure.
+ */
+int wc_RsaPSS_VerifyCheckInline(byte* in, word32 inLen, byte** out,
+ const byte* digest, word32 digestLen,
+ enum wc_HashType hash, int mgf, RsaKey* key)
{
- int sz, ret;
+ int ret = 0, verify, saltLen, hLen, bits = 0;
-#ifdef HAVE_CAVIUM
- if (key->magic == WOLFSSL_RSA_CAVIUM_MAGIC)
- return CaviumRsaSSL_Sign(in, inLen, out, outLen, key);
-#endif
+ hLen = wc_HashGetDigestSize(hash);
+ if (hLen < 0)
+ return hLen;
+ if ((word32)hLen != digestLen)
+ return BAD_FUNC_ARG;
- sz = mp_unsigned_bin_size(&key->n);
- if (sz > (int)outLen)
- return RSA_BUFFER_E;
+ saltLen = hLen;
+ #ifdef WOLFSSL_SHA512
+ /* See FIPS 186-4 section 5.5 item (e). */
+ bits = mp_count_bits(&key->n);
+ if (bits == 1024 && hLen == WC_SHA512_DIGEST_SIZE)
+ saltLen = RSA_PSS_SALT_MAX_SZ;
+ #endif
- if (inLen > (word32)(sz - RSA_MIN_PAD_SZ))
- return RSA_BUFFER_E;
+ verify = wc_RsaPSS_VerifyInline_ex(in, inLen, out, hash, mgf, saltLen, key);
+ if (verify > 0)
+ ret = wc_RsaPSS_CheckPadding_ex(digest, digestLen, *out, verify,
+ hash, saltLen, bits);
+ if (ret == 0)
+ ret = verify;
- ret = wc_RsaPad(in, inLen, out, sz, RSA_BLOCK_TYPE_1, rng);
- if (ret != 0)
- return ret;
+ return ret;
+}
+
+
+/* Verify the message signed with RSA-PSS.
+ * Salt length is equal to hash length.
+ *
+ * in Buffer holding encrypted data.
+ * inLen Length of data in buffer.
+ * out Pointer to address containing the PSS data.
+ * outLen Length of the output.
+ * digest Hash of the data that is being verified.
+ * digestLen Length of hash.
+ * hash Hash algorithm.
+ * mgf Mask generation function.
+ * key Public RSA key.
+ * returns the length of the PSS data on success and negative indicates failure.
+ */
+int wc_RsaPSS_VerifyCheck(byte* in, word32 inLen, byte* out, word32 outLen,
+ const byte* digest, word32 digestLen,
+ enum wc_HashType hash, int mgf,
+ RsaKey* key)
+{
+ int ret = 0, verify, saltLen, hLen, bits = 0;
+
+ hLen = wc_HashGetDigestSize(hash);
+ if (hLen < 0)
+ return hLen;
+ if ((word32)hLen != digestLen)
+ return BAD_FUNC_ARG;
- if ((ret = wc_RsaFunction(out, sz, out, &outLen,
- RSA_PRIVATE_ENCRYPT,key)) < 0)
- sz = ret;
-
- return sz;
+ saltLen = hLen;
+ #ifdef WOLFSSL_SHA512
+ /* See FIPS 186-4 section 5.5 item (e). */
+ bits = mp_count_bits(&key->n);
+ if (bits == 1024 && hLen == WC_SHA512_DIGEST_SIZE)
+ saltLen = RSA_PSS_SALT_MAX_SZ;
+ #endif
+
+ verify = wc_RsaPSS_Verify_ex(in, inLen, out, outLen, hash,
+ mgf, saltLen, key);
+ if (verify > 0)
+ ret = wc_RsaPSS_CheckPadding_ex(digest, digestLen, out, verify,
+ hash, saltLen, bits);
+ if (ret == 0)
+ ret = verify;
+
+ return ret;
+}
+
+#endif
+
+#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
+int wc_RsaSSL_Sign(const byte* in, word32 inLen, byte* out, word32 outLen,
+ RsaKey* key, WC_RNG* rng)
+{
+ return RsaPublicEncryptEx(in, inLen, out, outLen, key,
+ RSA_PRIVATE_ENCRYPT, RSA_BLOCK_TYPE_1, WC_RSA_PKCSV15_PAD,
+ WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng);
}
+#ifdef WC_RSA_PSS
+/* Sign the hash of a message using RSA-PSS.
+ * Salt length is equal to hash length.
+ *
+ * in Buffer holding hash of message.
+ * inLen Length of data in buffer (hash length).
+ * out Buffer to write encrypted signature into.
+ * outLen Size of buffer to write to.
+ * hash Hash algorithm.
+ * mgf Mask generation function.
+ * key Public RSA key.
+ * rng Random number generator.
+ * returns the length of the encrypted signature on success, a negative value
+ * indicates failure.
+ */
+int wc_RsaPSS_Sign(const byte* in, word32 inLen, byte* out, word32 outLen,
+ enum wc_HashType hash, int mgf, RsaKey* key, WC_RNG* rng)
+{
+ return wc_RsaPSS_Sign_ex(in, inLen, out, outLen, hash, mgf,
+ RSA_PSS_SALT_LEN_DEFAULT, key, rng);
+}
+
+/* Sign the hash of a message using RSA-PSS.
+ *
+ * in Buffer holding hash of message.
+ * inLen Length of data in buffer (hash length).
+ * out Buffer to write encrypted signature into.
+ * outLen Size of buffer to write to.
+ * hash Hash algorithm.
+ * mgf Mask generation function.
+ * saltLen Length of salt used. RSA_PSS_SALT_LEN_DEFAULT (-1) indicates salt
+ * length is the same as the hash length. RSA_PSS_SALT_LEN_DISCOVER
+ * indicates salt length is determined from the data.
+ * key Public RSA key.
+ * rng Random number generator.
+ * returns the length of the encrypted signature on success, a negative value
+ * indicates failure.
+ */
+int wc_RsaPSS_Sign_ex(const byte* in, word32 inLen, byte* out, word32 outLen,
+ enum wc_HashType hash, int mgf, int saltLen, RsaKey* key,
+ WC_RNG* rng)
+{
+ return RsaPublicEncryptEx(in, inLen, out, outLen, key,
+ RSA_PRIVATE_ENCRYPT, RSA_BLOCK_TYPE_1, WC_RSA_PSS_PAD,
+ hash, mgf, NULL, 0, saltLen, rng);
+}
+#endif
+#endif
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || !defined(WOLFSSL_SP_MATH) || \
+ defined(WC_RSA_PSS)
int wc_RsaEncryptSize(RsaKey* key)
{
-#ifdef HAVE_CAVIUM
- if (key->magic == WOLFSSL_RSA_CAVIUM_MAGIC)
- return key->c_nSz;
+ int ret;
+
+ if (key == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ ret = mp_unsigned_bin_size(&key->n);
+
+#ifdef WOLF_CRYPTO_CB
+ if (ret == 0 && key->devId != INVALID_DEVID) {
+ ret = 2048/8; /* hardware handles, use 2048-bit as default */
+ }
#endif
- return mp_unsigned_bin_size(&key->n);
+
+ return ret;
}
+#endif
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
/* flatten RsaKey structure into individual elements (e, n) */
int wc_RsaFlattenPublicKey(RsaKey* key, byte* e, word32* eSz, byte* n,
- word32* nSz)
+ word32* nSz)
{
int sz, ret;
- if (key == NULL || e == NULL || eSz == NULL || n == NULL || nSz == NULL)
- return BAD_FUNC_ARG;
+ if (key == NULL || e == NULL || eSz == NULL || n == NULL || nSz == NULL) {
+ return BAD_FUNC_ARG;
+ }
sz = mp_unsigned_bin_size(&key->e);
- if ((word32)sz > *nSz)
+ if ((word32)sz > *eSz)
return RSA_BUFFER_E;
ret = mp_to_unsigned_bin(&key->e, e);
if (ret != MP_OKAY)
return ret;
*eSz = (word32)sz;
- sz = mp_unsigned_bin_size(&key->n);
+ sz = wc_RsaEncryptSize(key);
if ((word32)sz > *nSz)
return RSA_BUFFER_E;
ret = mp_to_unsigned_bin(&key->n, n);
@@ -609,364 +3586,616 @@ int wc_RsaFlattenPublicKey(RsaKey* key, byte* e, word32* eSz, byte* n,
return 0;
}
+#endif
+#endif /* HAVE_FIPS */
-#ifdef WOLFSSL_KEY_GEN
-static const int USE_BBS = 1;
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
+static int RsaGetValue(mp_int* in, byte* out, word32* outSz)
+{
+ word32 sz;
+ int ret = 0;
+
+ /* Parameters ensured by calling function. */
+
+ sz = (word32)mp_unsigned_bin_size(in);
+ if (sz > *outSz)
+ ret = RSA_BUFFER_E;
-static int rand_prime(mp_int* N, int len, RNG* rng, void* heap)
+ if (ret == 0)
+ ret = mp_to_unsigned_bin(in, out);
+
+ if (ret == MP_OKAY)
+ *outSz = sz;
+
+ return ret;
+}
+
+
+int wc_RsaExportKey(RsaKey* key,
+ byte* e, word32* eSz, byte* n, word32* nSz,
+ byte* d, word32* dSz, byte* p, word32* pSz,
+ byte* q, word32* qSz)
{
- int err, res, type;
- byte* buf;
+ int ret = BAD_FUNC_ARG;
+
+ if (key && e && eSz && n && nSz && d && dSz && p && pSz && q && qSz)
+ ret = 0;
+
+ if (ret == 0)
+ ret = RsaGetValue(&key->e, e, eSz);
+ if (ret == 0)
+ ret = RsaGetValue(&key->n, n, nSz);
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ if (ret == 0)
+ ret = RsaGetValue(&key->d, d, dSz);
+ if (ret == 0)
+ ret = RsaGetValue(&key->p, p, pSz);
+ if (ret == 0)
+ ret = RsaGetValue(&key->q, q, qSz);
+#else
+ /* no private parts to key */
+ if (d == NULL || p == NULL || q == NULL || dSz == NULL || pSz == NULL
+ || qSz == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+ else {
+ *dSz = 0;
+ *pSz = 0;
+ *qSz = 0;
+ }
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
- (void)heap;
- if (N == NULL || rng == NULL)
- return BAD_FUNC_ARG;
+ return ret;
+}
+#endif
+
+
+#ifdef WOLFSSL_KEY_GEN
- /* get type */
- if (len < 0) {
- type = USE_BBS;
- len = -len;
- } else {
- type = 0;
+/* Check that |p-q| > 2^((size/2)-100) */
+static int wc_CompareDiffPQ(mp_int* p, mp_int* q, int size)
+{
+ mp_int c, d;
+ int ret;
+
+ if (p == NULL || q == NULL)
+ return BAD_FUNC_ARG;
+
+ ret = mp_init_multi(&c, &d, NULL, NULL, NULL, NULL);
+
+ /* c = 2^((size/2)-100) */
+ if (ret == 0)
+ ret = mp_2expt(&c, (size/2)-100);
+
+ /* d = |p-q| */
+ if (ret == 0)
+ ret = mp_sub(p, q, &d);
+
+ if (ret == 0)
+ ret = mp_abs(&d, &d);
+
+ /* compare */
+ if (ret == 0)
+ ret = mp_cmp(&d, &c);
+
+ if (ret == MP_GT)
+ ret = MP_OKAY;
+
+ mp_clear(&d);
+ mp_clear(&c);
+
+ return ret;
+}
+
+
+/* The lower_bound value is floor(2^(0.5) * 2^((nlen/2)-1)) where nlen is 4096.
+ * This number was calculated using a small test tool written with a common
+ * large number math library. Other values of nlen may be checked with a subset
+ * of lower_bound. */
+static const byte lower_bound[] = {
+ 0xB5, 0x04, 0xF3, 0x33, 0xF9, 0xDE, 0x64, 0x84,
+ 0x59, 0x7D, 0x89, 0xB3, 0x75, 0x4A, 0xBE, 0x9F,
+ 0x1D, 0x6F, 0x60, 0xBA, 0x89, 0x3B, 0xA8, 0x4C,
+ 0xED, 0x17, 0xAC, 0x85, 0x83, 0x33, 0x99, 0x15,
+/* 512 */
+ 0x4A, 0xFC, 0x83, 0x04, 0x3A, 0xB8, 0xA2, 0xC3,
+ 0xA8, 0xB1, 0xFE, 0x6F, 0xDC, 0x83, 0xDB, 0x39,
+ 0x0F, 0x74, 0xA8, 0x5E, 0x43, 0x9C, 0x7B, 0x4A,
+ 0x78, 0x04, 0x87, 0x36, 0x3D, 0xFA, 0x27, 0x68,
+/* 1024 */
+ 0xD2, 0x20, 0x2E, 0x87, 0x42, 0xAF, 0x1F, 0x4E,
+ 0x53, 0x05, 0x9C, 0x60, 0x11, 0xBC, 0x33, 0x7B,
+ 0xCA, 0xB1, 0xBC, 0x91, 0x16, 0x88, 0x45, 0x8A,
+ 0x46, 0x0A, 0xBC, 0x72, 0x2F, 0x7C, 0x4E, 0x33,
+ 0xC6, 0xD5, 0xA8, 0xA3, 0x8B, 0xB7, 0xE9, 0xDC,
+ 0xCB, 0x2A, 0x63, 0x43, 0x31, 0xF3, 0xC8, 0x4D,
+ 0xF5, 0x2F, 0x12, 0x0F, 0x83, 0x6E, 0x58, 0x2E,
+ 0xEA, 0xA4, 0xA0, 0x89, 0x90, 0x40, 0xCA, 0x4A,
+/* 2048 */
+ 0x81, 0x39, 0x4A, 0xB6, 0xD8, 0xFD, 0x0E, 0xFD,
+ 0xF4, 0xD3, 0xA0, 0x2C, 0xEB, 0xC9, 0x3E, 0x0C,
+ 0x42, 0x64, 0xDA, 0xBC, 0xD5, 0x28, 0xB6, 0x51,
+ 0xB8, 0xCF, 0x34, 0x1B, 0x6F, 0x82, 0x36, 0xC7,
+ 0x01, 0x04, 0xDC, 0x01, 0xFE, 0x32, 0x35, 0x2F,
+ 0x33, 0x2A, 0x5E, 0x9F, 0x7B, 0xDA, 0x1E, 0xBF,
+ 0xF6, 0xA1, 0xBE, 0x3F, 0xCA, 0x22, 0x13, 0x07,
+ 0xDE, 0xA0, 0x62, 0x41, 0xF7, 0xAA, 0x81, 0xC2,
+/* 3072 */
+ 0xC1, 0xFC, 0xBD, 0xDE, 0xA2, 0xF7, 0xDC, 0x33,
+ 0x18, 0x83, 0x8A, 0x2E, 0xAF, 0xF5, 0xF3, 0xB2,
+ 0xD2, 0x4F, 0x4A, 0x76, 0x3F, 0xAC, 0xB8, 0x82,
+ 0xFD, 0xFE, 0x17, 0x0F, 0xD3, 0xB1, 0xF7, 0x80,
+ 0xF9, 0xAC, 0xCE, 0x41, 0x79, 0x7F, 0x28, 0x05,
+ 0xC2, 0x46, 0x78, 0x5E, 0x92, 0x95, 0x70, 0x23,
+ 0x5F, 0xCF, 0x8F, 0x7B, 0xCA, 0x3E, 0xA3, 0x3B,
+ 0x4D, 0x7C, 0x60, 0xA5, 0xE6, 0x33, 0xE3, 0xE1
+/* 4096 */
+};
+
+
+/* returns 1 on key size ok and 0 if not ok */
+static WC_INLINE int RsaSizeCheck(int size)
+{
+ if (size < RSA_MIN_SIZE || size > RSA_MAX_SIZE) {
+ return 0;
+ }
+
+#ifdef HAVE_FIPS
+ /* Key size requirements for CAVP */
+ switch (size) {
+ case 1024:
+ case 2048:
+ case 3072:
+ case 4096:
+ return 1;
}
- /* allow sizes between 2 and 512 bytes for a prime size */
- if (len < 2 || len > 512) {
+ return 0;
+#else
+ return 1; /* allow unusual key sizes in non FIPS mode */
+#endif /* HAVE_FIPS */
+}
+
+
+static int _CheckProbablePrime(mp_int* p, mp_int* q, mp_int* e, int nlen,
+ int* isPrime, WC_RNG* rng)
+{
+ int ret;
+ mp_int tmp1, tmp2;
+ mp_int* prime;
+
+ if (p == NULL || e == NULL || isPrime == NULL)
+ return BAD_FUNC_ARG;
+
+ if (!RsaSizeCheck(nlen))
return BAD_FUNC_ARG;
+
+ *isPrime = MP_NO;
+
+ if (q != NULL) {
+ /* 5.4 - check that |p-q| <= (2^(1/2))(2^((nlen/2)-1)) */
+ ret = wc_CompareDiffPQ(p, q, nlen);
+ if (ret != MP_OKAY) goto notOkay;
+ prime = q;
}
-
- /* allocate buffer to work with */
- buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_RSA);
- if (buf == NULL) {
- return MEMORY_E;
+ else
+ prime = p;
+
+ ret = mp_init_multi(&tmp1, &tmp2, NULL, NULL, NULL, NULL);
+ if (ret != MP_OKAY) goto notOkay;
+
+ /* 4.4,5.5 - Check that prime >= (2^(1/2))(2^((nlen/2)-1))
+ * This is a comparison against lowerBound */
+ ret = mp_read_unsigned_bin(&tmp1, lower_bound, nlen/16);
+ if (ret != MP_OKAY) goto notOkay;
+ ret = mp_cmp(prime, &tmp1);
+ if (ret == MP_LT) goto exit;
+
+ /* 4.5,5.6 - Check that GCD(p-1, e) == 1 */
+ ret = mp_sub_d(prime, 1, &tmp1); /* tmp1 = prime-1 */
+ if (ret != MP_OKAY) goto notOkay;
+ ret = mp_gcd(&tmp1, e, &tmp2); /* tmp2 = gcd(prime-1, e) */
+ if (ret != MP_OKAY) goto notOkay;
+ ret = mp_cmp_d(&tmp2, 1);
+ if (ret != MP_EQ) goto exit; /* e divides p-1 */
+
+ /* 4.5.1,5.6.1 - Check primality of p with 8 rounds of M-R.
+ * mp_prime_is_prime_ex() performs test divisions against the first 256
+ * prime numbers. After that it performs 8 rounds of M-R using random
+ * bases between 2 and n-2.
+ * mp_prime_is_prime() performs the same test divisions and then does
+ * M-R with the first 8 primes. Both functions set isPrime as a
+ * side-effect. */
+ if (rng != NULL)
+ ret = mp_prime_is_prime_ex(prime, 8, isPrime, rng);
+ else
+ ret = mp_prime_is_prime(prime, 8, isPrime);
+ if (ret != MP_OKAY) goto notOkay;
+
+exit:
+ ret = MP_OKAY;
+notOkay:
+ mp_clear(&tmp1);
+ mp_clear(&tmp2);
+ return ret;
+}
+
+
+int wc_CheckProbablePrime_ex(const byte* pRaw, word32 pRawSz,
+ const byte* qRaw, word32 qRawSz,
+ const byte* eRaw, word32 eRawSz,
+ int nlen, int* isPrime, WC_RNG* rng)
+{
+ mp_int p, q, e;
+ mp_int* Q = NULL;
+ int ret;
+
+ if (pRaw == NULL || pRawSz == 0 ||
+ eRaw == NULL || eRawSz == 0 ||
+ isPrime == NULL) {
+
+ return BAD_FUNC_ARG;
}
- XMEMSET(buf, 0, len);
- do {
-#ifdef SHOW_GEN
- printf(".");
- fflush(stdout);
-#endif
- /* generate value */
- err = wc_RNG_GenerateBlock(rng, buf, len);
- if (err != 0) {
- XFREE(buf, heap, DYNAMIC_TYPE_RSA);
- return err;
- }
+ if ((qRaw != NULL && qRawSz == 0) || (qRaw == NULL && qRawSz != 0))
+ return BAD_FUNC_ARG;
- /* munge bits */
- buf[0] |= 0x80 | 0x40;
- buf[len-1] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
-
- /* load value */
- if ((err = mp_read_unsigned_bin(N, buf, len)) != MP_OKAY) {
- XFREE(buf, heap, DYNAMIC_TYPE_RSA);
- return err;
- }
+ ret = mp_init_multi(&p, &q, &e, NULL, NULL, NULL);
+
+ if (ret == MP_OKAY)
+ ret = mp_read_unsigned_bin(&p, pRaw, pRawSz);
- /* test */
- if ((err = mp_prime_is_prime(N, 8, &res)) != MP_OKAY) {
- XFREE(buf, heap, DYNAMIC_TYPE_RSA);
- return err;
+ if (ret == MP_OKAY) {
+ if (qRaw != NULL) {
+ ret = mp_read_unsigned_bin(&q, qRaw, qRawSz);
+ if (ret == MP_OKAY)
+ Q = &q;
}
- } while (res == MP_NO);
+ }
- ForceZero(buf, len);
- XFREE(buf, heap, DYNAMIC_TYPE_RSA);
+ if (ret == MP_OKAY)
+ ret = mp_read_unsigned_bin(&e, eRaw, eRawSz);
- return 0;
+ if (ret == MP_OKAY)
+ ret = _CheckProbablePrime(&p, Q, &e, nlen, isPrime, rng);
+
+ ret = (ret == MP_OKAY) ? 0 : PRIME_GEN_E;
+
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&e);
+
+ return ret;
}
+int wc_CheckProbablePrime(const byte* pRaw, word32 pRawSz,
+ const byte* qRaw, word32 qRawSz,
+ const byte* eRaw, word32 eRawSz,
+ int nlen, int* isPrime)
+{
+ return wc_CheckProbablePrime_ex(pRaw, pRawSz, qRaw, qRawSz,
+ eRaw, eRawSz, nlen, isPrime, NULL);
+}
+
+#if !defined(HAVE_FIPS) || (defined(HAVE_FIPS) && \
+ defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2))
/* Make an RSA key for size bits, with e specified, 65537 is a good e */
-int wc_MakeRsaKey(RsaKey* key, int size, long e, RNG* rng)
+int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng)
{
+#ifndef WC_NO_RNG
mp_int p, q, tmp1, tmp2, tmp3;
- int err;
+ int err, i, failCount, primeSz, isPrime = 0;
+ byte* buf = NULL;
if (key == NULL || rng == NULL)
return BAD_FUNC_ARG;
- if (size < RSA_MIN_SIZE || size > RSA_MAX_SIZE)
+ if (!RsaSizeCheck(size))
return BAD_FUNC_ARG;
if (e < 3 || (e & 1) == 0)
return BAD_FUNC_ARG;
- if ((err = mp_init_multi(&p, &q, &tmp1, &tmp2, &tmp3, NULL)) != MP_OKAY)
- return err;
+#if defined(WOLFSSL_CRYPTOCELL)
+
+ return cc310_RSA_GenerateKeyPair(key, size, e);
- err = mp_set_int(&tmp3, e);
+#endif /*WOLFSSL_CRYPTOCELL*/
+
+#ifdef WOLF_CRYPTO_CB
+ if (key->devId != INVALID_DEVID) {
+ int ret = wc_CryptoCb_MakeRsaKey(key, size, e, rng);
+ if (ret != CRYPTOCB_UNAVAILABLE)
+ return ret;
+ /* fall-through when unavailable */
+ }
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) && \
+ defined(WC_ASYNC_ENABLE_RSA_KEYGEN)
+ if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) {
+ #ifdef HAVE_CAVIUM
+ /* TODO: Not implemented */
+ #elif defined(HAVE_INTEL_QA)
+ return IntelQaRsaKeyGen(&key->asyncDev, key, size, e, rng);
+ #else
+ if (wc_AsyncTestInit(&key->asyncDev, ASYNC_TEST_RSA_MAKE)) {
+ WC_ASYNC_TEST* testDev = &key->asyncDev.test;
+ testDev->rsaMake.rng = rng;
+ testDev->rsaMake.key = key;
+ testDev->rsaMake.size = size;
+ testDev->rsaMake.e = e;
+ return WC_PENDING_E;
+ }
+ #endif
+ }
+#endif
+
+ err = mp_init_multi(&p, &q, &tmp1, &tmp2, &tmp3, NULL);
+
+ if (err == MP_OKAY)
+ err = mp_set_int(&tmp3, e);
+
+ /* The failCount value comes from NIST FIPS 186-4, section B.3.3,
+ * process steps 4.7 and 5.8. */
+ failCount = 5 * (size / 2);
+ primeSz = size / 16; /* size is the size of n in bits.
+ primeSz is in bytes. */
+
+ /* allocate buffer to work with */
+ if (err == MP_OKAY) {
+ buf = (byte*)XMALLOC(primeSz, key->heap, DYNAMIC_TYPE_RSA);
+ if (buf == NULL)
+ err = MEMORY_E;
+ }
/* make p */
if (err == MP_OKAY) {
+ isPrime = 0;
+ i = 0;
do {
- err = rand_prime(&p, size/16, rng, key->heap); /* size in bytes/2 */
+#ifdef SHOW_GEN
+ printf(".");
+ fflush(stdout);
+#endif
+ /* generate value */
+ err = wc_RNG_GenerateBlock(rng, buf, primeSz);
+ if (err == 0) {
+ /* prime lower bound has the MSB set, set it in candidate */
+ buf[0] |= 0x80;
+ /* make candidate odd */
+ buf[primeSz-1] |= 0x01;
+ /* load value */
+ err = mp_read_unsigned_bin(&p, buf, primeSz);
+ }
if (err == MP_OKAY)
- err = mp_sub_d(&p, 1, &tmp1); /* tmp1 = p-1 */
+ err = _CheckProbablePrime(&p, NULL, &tmp3, size, &isPrime, rng);
- if (err == MP_OKAY)
- err = mp_gcd(&tmp1, &tmp3, &tmp2); /* tmp2 = gcd(p-1, e) */
- } while (err == MP_OKAY && mp_cmp_d(&tmp2, 1) != 0); /* e divdes p-1 */
+#ifdef HAVE_FIPS
+ i++;
+#else
+ /* Keep the old retry behavior in non-FIPS build. */
+ (void)i;
+#endif
+ } while (err == MP_OKAY && !isPrime && i < failCount);
}
+ if (err == MP_OKAY && !isPrime)
+ err = PRIME_GEN_E;
+
/* make q */
if (err == MP_OKAY) {
+ isPrime = 0;
+ i = 0;
do {
- err = rand_prime(&q, size/16, rng, key->heap); /* size in bytes/2 */
+#ifdef SHOW_GEN
+ printf(".");
+ fflush(stdout);
+#endif
+ /* generate value */
+ err = wc_RNG_GenerateBlock(rng, buf, primeSz);
+ if (err == 0) {
+ /* prime lower bound has the MSB set, set it in candidate */
+ buf[0] |= 0x80;
+ /* make candidate odd */
+ buf[primeSz-1] |= 0x01;
+ /* load value */
+ err = mp_read_unsigned_bin(&q, buf, primeSz);
+ }
if (err == MP_OKAY)
- err = mp_sub_d(&q, 1, &tmp1); /* tmp1 = q-1 */
+ err = _CheckProbablePrime(&p, &q, &tmp3, size, &isPrime, rng);
- if (err == MP_OKAY)
- err = mp_gcd(&tmp1, &tmp3, &tmp2); /* tmp2 = gcd(q-1, e) */
- } while (err == MP_OKAY && mp_cmp_d(&tmp2, 1) != 0); /* e divdes q-1 */
+#ifdef HAVE_FIPS
+ i++;
+#else
+ /* Keep the old retry behavior in non-FIPS build. */
+ (void)i;
+#endif
+ } while (err == MP_OKAY && !isPrime && i < failCount);
}
- if (err == MP_OKAY)
- err = mp_init_multi(&key->n, &key->e, &key->d, &key->p, &key->q, NULL);
+ if (err == MP_OKAY && !isPrime)
+ err = PRIME_GEN_E;
- if (err == MP_OKAY)
- err = mp_init_multi(&key->dP, &key->dQ, &key->u, NULL, NULL, NULL);
+ if (buf) {
+ ForceZero(buf, primeSz);
+ XFREE(buf, key->heap, DYNAMIC_TYPE_RSA);
+ }
- if (err == MP_OKAY)
- err = mp_sub_d(&p, 1, &tmp2); /* tmp2 = p-1 */
+ if (err == MP_OKAY && mp_cmp(&p, &q) < 0) {
+ err = mp_copy(&p, &tmp1);
+ if (err == MP_OKAY)
+ err = mp_copy(&q, &p);
+ if (err == MP_OKAY)
+ mp_copy(&tmp1, &q);
+ }
+ /* Setup RsaKey buffers */
+ if (err == MP_OKAY)
+ err = mp_init_multi(&key->n, &key->e, &key->d, &key->p, &key->q, NULL);
if (err == MP_OKAY)
- err = mp_lcm(&tmp1, &tmp2, &tmp1); /* tmp1 = lcm(p-1, q-1),last loop */
+ err = mp_init_multi(&key->dP, &key->dQ, &key->u, NULL, NULL, NULL);
+ /* Software Key Calculation */
+ if (err == MP_OKAY) /* tmp1 = p-1 */
+ err = mp_sub_d(&p, 1, &tmp1);
+ if (err == MP_OKAY) /* tmp2 = q-1 */
+ err = mp_sub_d(&q, 1, &tmp2);
+#ifdef WC_RSA_BLINDING
+ if (err == MP_OKAY) /* tmp3 = order of n */
+ err = mp_mul(&tmp1, &tmp2, &tmp3);
+#else
+ if (err == MP_OKAY) /* tmp3 = lcm(p-1, q-1), last loop */
+ err = mp_lcm(&tmp1, &tmp2, &tmp3);
+#endif
/* make key */
+ if (err == MP_OKAY) /* key->e = e */
+ err = mp_set_int(&key->e, (mp_digit)e);
+#ifdef WC_RSA_BLINDING
+ /* Blind the inverse operation with a value that is invertable */
+ if (err == MP_OKAY) {
+ do {
+ err = mp_rand(&key->p, get_digit_count(&tmp3), rng);
+ if (err == MP_OKAY)
+ err = mp_set_bit(&key->p, 0);
+ if (err == MP_OKAY)
+ err = mp_set_bit(&key->p, size - 1);
+ if (err == MP_OKAY)
+ err = mp_gcd(&key->p, &tmp3, &key->q);
+ }
+ while ((err == MP_OKAY) && !mp_isone(&key->q));
+ }
if (err == MP_OKAY)
- err = mp_set_int(&key->e, e); /* key->e = e */
-
+ err = mp_mul_d(&key->p, (mp_digit)e, &key->e);
+#endif
if (err == MP_OKAY) /* key->d = 1/e mod lcm(p-1, q-1) */
- err = mp_invmod(&key->e, &tmp1, &key->d);
-
- if (err == MP_OKAY)
- err = mp_mul(&p, &q, &key->n); /* key->n = pq */
-
- if (err == MP_OKAY)
- err = mp_sub_d(&p, 1, &tmp1);
-
+ err = mp_invmod(&key->e, &tmp3, &key->d);
+#ifdef WC_RSA_BLINDING
+ /* Take off blinding from d and reset e */
if (err == MP_OKAY)
- err = mp_sub_d(&q, 1, &tmp2);
-
+ err = mp_mulmod(&key->d, &key->p, &tmp3, &key->d);
if (err == MP_OKAY)
+ err = mp_set_int(&key->e, (mp_digit)e);
+#endif
+ if (err == MP_OKAY) /* key->n = pq */
+ err = mp_mul(&p, &q, &key->n);
+ if (err == MP_OKAY) /* key->dP = d mod(p-1) */
err = mp_mod(&key->d, &tmp1, &key->dP);
-
- if (err == MP_OKAY)
+ if (err == MP_OKAY) /* key->dQ = d mod(q-1) */
err = mp_mod(&key->d, &tmp2, &key->dQ);
-
- if (err == MP_OKAY)
+#ifdef WOLFSSL_MP_INVMOD_CONSTANT_TIME
+ if (err == MP_OKAY) /* key->u = 1/q mod p */
err = mp_invmod(&q, &p, &key->u);
-
+#else
+ if (err == MP_OKAY)
+ err = mp_sub_d(&p, 2, &tmp3);
+ if (err == MP_OKAY) /* key->u = 1/q mod p = q^p-2 mod p */
+ err = mp_exptmod(&q, &tmp3 , &p, &key->u);
+#endif
if (err == MP_OKAY)
err = mp_copy(&p, &key->p);
-
if (err == MP_OKAY)
err = mp_copy(&q, &key->q);
+#ifdef HAVE_WOLF_BIGINT
+ /* make sure raw unsigned bin version is available */
+ if (err == MP_OKAY)
+ err = wc_mp_to_bigint(&key->n, &key->n.raw);
+ if (err == MP_OKAY)
+ err = wc_mp_to_bigint(&key->e, &key->e.raw);
+ if (err == MP_OKAY)
+ err = wc_mp_to_bigint(&key->d, &key->d.raw);
+ if (err == MP_OKAY)
+ err = wc_mp_to_bigint(&key->p, &key->p.raw);
+ if (err == MP_OKAY)
+ err = wc_mp_to_bigint(&key->q, &key->q.raw);
+ if (err == MP_OKAY)
+ err = wc_mp_to_bigint(&key->dP, &key->dP.raw);
+ if (err == MP_OKAY)
+ err = wc_mp_to_bigint(&key->dQ, &key->dQ.raw);
+ if (err == MP_OKAY)
+ err = wc_mp_to_bigint(&key->u, &key->u.raw);
+#endif
+
if (err == MP_OKAY)
- key->type = RSA_PRIVATE;
+ key->type = RSA_PRIVATE;
- mp_clear(&tmp3);
- mp_clear(&tmp2);
- mp_clear(&tmp1);
- mp_clear(&q);
+ mp_clear(&tmp1);
+ mp_clear(&tmp2);
+ mp_clear(&tmp3);
mp_clear(&p);
+ mp_clear(&q);
- if (err != MP_OKAY) {
- wc_FreeRsaKey(key);
+#if defined(WOLFSSL_KEY_GEN) && !defined(WOLFSSL_NO_RSA_KEY_CHECK)
+ /* Perform the pair-wise consistency test on the new key. */
+ if (err == 0)
+ err = wc_CheckRsaKey(key);
+#endif
+
+ if (err != 0) {
+ wc_FreeRsaKey(key);
return err;
}
+#if defined(WOLFSSL_XILINX_CRYPT) || defined(WOLFSSL_CRYPTOCELL)
+ if (wc_InitRsaHw(key) != 0) {
+ return BAD_STATE_E;
+ }
+#endif
return 0;
+#else
+ return NOT_COMPILED_IN;
+#endif
}
-
-
+#endif /* !FIPS || FIPS_VER >= 2 */
#endif /* WOLFSSL_KEY_GEN */
-#ifdef HAVE_CAVIUM
-
-#include <cyassl/ctaocrypt/logging.h>
-#include "cavium_common.h"
-
-/* Initiliaze RSA for use with Nitrox device */
-int RsaInitCavium(RsaKey* rsa, int devId)
-{
- if (rsa == NULL)
- return -1;
-
- if (CspAllocContext(CONTEXT_SSL, &rsa->contextHandle, devId) != 0)
- return -1;
-
- rsa->devId = devId;
- rsa->magic = WOLFSSL_RSA_CAVIUM_MAGIC;
-
- return 0;
-}
-
-
-/* Free RSA from use with Nitrox device */
-void wc_RsaFreeCavium(RsaKey* rsa)
-{
- if (rsa == NULL)
- return;
-
- CspFreeContext(CONTEXT_SSL, rsa->contextHandle, rsa->devId);
- rsa->magic = 0;
-}
-
-
-/* Initialize cavium RSA key */
-static int InitCaviumRsaKey(RsaKey* key, void* heap)
+#ifdef WC_RSA_BLINDING
+int wc_RsaSetRNG(RsaKey* key, WC_RNG* rng)
{
if (key == NULL)
return BAD_FUNC_ARG;
- key->heap = heap;
- key->type = -1; /* don't know yet */
-
- key->c_n = NULL;
- key->c_e = NULL;
- key->c_d = NULL;
- key->c_p = NULL;
- key->c_q = NULL;
- key->c_dP = NULL;
- key->c_dQ = NULL;
- key->c_u = NULL;
-
- key->c_nSz = 0;
- key->c_eSz = 0;
- key->c_dSz = 0;
- key->c_pSz = 0;
- key->c_qSz = 0;
- key->c_dP_Sz = 0;
- key->c_dQ_Sz = 0;
- key->c_uSz = 0;
-
+ key->rng = rng;
+
return 0;
}
+#endif /* WC_RSA_BLINDING */
-
-/* Free cavium RSA key */
-static int FreeCaviumRsaKey(RsaKey* key)
+#ifdef WC_RSA_NONBLOCK
+int wc_RsaSetNonBlock(RsaKey* key, RsaNb* nb)
{
if (key == NULL)
return BAD_FUNC_ARG;
- XFREE(key->c_n, key->heap, DYNAMIC_TYPE_CAVIUM_TMP);
- XFREE(key->c_e, key->heap, DYNAMIC_TYPE_CAVIUM_TMP);
- XFREE(key->c_d, key->heap, DYNAMIC_TYPE_CAVIUM_TMP);
- XFREE(key->c_p, key->heap, DYNAMIC_TYPE_CAVIUM_TMP);
- XFREE(key->c_q, key->heap, DYNAMIC_TYPE_CAVIUM_TMP);
- XFREE(key->c_dP, key->heap, DYNAMIC_TYPE_CAVIUM_TMP);
- XFREE(key->c_dQ, key->heap, DYNAMIC_TYPE_CAVIUM_TMP);
- XFREE(key->c_u, key->heap, DYNAMIC_TYPE_CAVIUM_TMP);
-
- return InitCaviumRsaKey(key, key->heap); /* reset pointers */
-}
-
-
-static int CaviumRsaPublicEncrypt(const byte* in, word32 inLen, byte* out,
- word32 outLen, RsaKey* key)
-{
- word32 requestId;
- word32 ret;
-
- if (key == NULL || in == NULL || out == NULL || outLen < (word32)key->c_nSz)
- return -1;
-
- ret = CspPkcs1v15Enc(CAVIUM_BLOCKING, BT2, key->c_nSz, key->c_eSz,
- (word16)inLen, key->c_n, key->c_e, (byte*)in, out,
- &requestId, key->devId);
- if (ret != 0) {
- WOLFSSL_MSG("Cavium Enc BT2 failed");
- return -1;
+ if (nb) {
+ XMEMSET(nb, 0, sizeof(RsaNb));
}
- return key->c_nSz;
-}
-
-
-static INLINE void ato16(const byte* c, word16* u16)
-{
- *u16 = (c[0] << 8) | (c[1]);
-}
-
-static int CaviumRsaPrivateDecrypt(const byte* in, word32 inLen, byte* out,
- word32 outLen, RsaKey* key)
-{
- word32 requestId;
- word32 ret;
- word16 outSz = (word16)outLen;
-
- if (key == NULL || in == NULL || out == NULL || inLen != (word32)key->c_nSz)
- return -1;
-
- ret = CspPkcs1v15CrtDec(CAVIUM_BLOCKING, BT2, key->c_nSz, key->c_q,
- key->c_dQ, key->c_p, key->c_dP, key->c_u,
- (byte*)in, &outSz, out, &requestId, key->devId);
- if (ret != 0) {
- WOLFSSL_MSG("Cavium CRT Dec BT2 failed");
- return -1;
- }
- ato16((const byte*)&outSz, &outSz);
+ /* Allow nb == NULL to clear non-block mode */
+ key->nb = nb;
- return outSz;
+ return 0;
}
-
-
-static int CaviumRsaSSL_Sign(const byte* in, word32 inLen, byte* out,
- word32 outLen, RsaKey* key)
+#ifdef WC_RSA_NONBLOCK_TIME
+int wc_RsaSetNonBlockTime(RsaKey* key, word32 maxBlockUs, word32 cpuMHz)
{
- word32 requestId;
- word32 ret;
-
- if (key == NULL || in == NULL || out == NULL || inLen == 0 || outLen <
- (word32)key->c_nSz)
- return -1;
-
- ret = CspPkcs1v15CrtEnc(CAVIUM_BLOCKING, BT1, key->c_nSz, (word16)inLen,
- key->c_q, key->c_dQ, key->c_p, key->c_dP, key->c_u,
- (byte*)in, out, &requestId, key->devId);
- if (ret != 0) {
- WOLFSSL_MSG("Cavium CRT Enc BT1 failed");
- return -1;
+ if (key == NULL || key->nb == NULL) {
+ return BAD_FUNC_ARG;
}
- return key->c_nSz;
-}
-
-
-static int CaviumRsaSSL_Verify(const byte* in, word32 inLen, byte* out,
- word32 outLen, RsaKey* key)
-{
- word32 requestId;
- word32 ret;
- word16 outSz = (word16)outLen;
- if (key == NULL || in == NULL || out == NULL || inLen != (word32)key->c_nSz)
- return -1;
-
- ret = CspPkcs1v15Dec(CAVIUM_BLOCKING, BT1, key->c_nSz, key->c_eSz,
- key->c_n, key->c_e, (byte*)in, &outSz, out,
- &requestId, key->devId);
- if (ret != 0) {
- WOLFSSL_MSG("Cavium Dec BT1 failed");
- return -1;
- }
- outSz = ntohs(outSz);
+ /* calculate maximum number of instructions to block */
+ key->nb->exptmod.maxBlockInst = cpuMHz * maxBlockUs;
- return outSz;
+ return 0;
}
+#endif /* WC_RSA_NONBLOCK_TIME */
+#endif /* WC_RSA_NONBLOCK */
-
-#endif /* HAVE_CAVIUM */
-
-#endif /* HAVE_FIPS */
#endif /* NO_RSA */
-
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/selftest.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/selftest.c
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/selftest.c
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha.c
index be8cf17af..5c80563e1 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha.c
@@ -1,8 +1,8 @@
/* sha.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,7 +16,7 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
@@ -28,431 +28,855 @@
#if !defined(NO_SHA)
+#if defined(HAVE_FIPS) && \
+ defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
+
+ /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
+ #define FIPS_NO_WRAPPERS
+
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$j")
+ #pragma const_seg(".fipsB$j")
+ #endif
+#endif
+
#include <wolfssl/wolfcrypt/sha.h>
-#include <wolfssl/wolfcrypt/logging.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/hash.h>
-#ifdef NO_INLINE
- #include <wolfssl/wolfcrypt/misc.h>
-#else
- #include <wolfcrypt/src/misc.c>
+#ifdef WOLF_CRYPTO_CB
+ #include <wolfssl/wolfcrypt/cryptocb.h>
#endif
/* fips wrapper calls, user can call direct */
-#ifdef HAVE_FIPS
- int wc_InitSha(Sha* sha)
- {
- return InitSha_fips(sha);
- }
-
-
- int wc_ShaUpdate(Sha* sha, const byte* data, word32 len)
- {
- return ShaUpdate_fips(sha, data, len);
- }
+#if defined(HAVE_FIPS) && \
+ (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2))
+ int wc_InitSha(wc_Sha* sha)
+ {
+ if (sha == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ return InitSha_fips(sha);
+ }
+ int wc_InitSha_ex(wc_Sha* sha, void* heap, int devId)
+ {
+ (void)heap;
+ (void)devId;
+ if (sha == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ return InitSha_fips(sha);
+ }
- int wc_ShaFinal(Sha* sha, byte* out)
- {
- return ShaFinal_fips(sha,out);
+ int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len)
+ {
+ if (sha == NULL || (data == NULL && len > 0)) {
+ return BAD_FUNC_ARG;
+ }
+ return ShaUpdate_fips(sha, data, len);
}
- int wc_ShaHash(const byte* data, word32 sz, byte* out)
+ int wc_ShaFinal(wc_Sha* sha, byte* out)
+ {
+ if (sha == NULL || out == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ return ShaFinal_fips(sha,out);
+ }
+ void wc_ShaFree(wc_Sha* sha)
{
- return ShaHash(data, sz, out);
+ (void)sha;
+ /* Not supported in FIPS */
}
-#else /* else build without fips */
+#else /* else build without fips, or for FIPS v2 */
+
#if defined(WOLFSSL_TI_HASH)
/* #include <wolfcrypt/src/port/ti/ti-hash.c> included by wc_port.c */
-#else
-
-#ifdef WOLFSSL_PIC32MZ_HASH
-#define wc_InitSha wc_InitSha_sw
-#define wc_ShaUpdate wc_ShaUpdate_sw
-#define wc_ShaFinal wc_ShaFinal_sw
-#endif
+#else
-#ifdef FREESCALE_MMCAU
- #include "cau_api.h"
- #define XTRANSFORM(S,B) cau_sha1_hash_n((B), 1, ((S))->digest)
+#include <wolfssl/wolfcrypt/logging.h>
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
#else
- #define XTRANSFORM(S,B) Transform((S))
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
#endif
-#ifdef STM32F2_HASH
-/*
- * STM32F2 hardware SHA1 support through the STM32F2 standard peripheral
- * library. Documentation located in STM32F2xx Standard Peripheral Library
- * document (See note in README).
- */
-#include "stm32f2xx.h"
-#include "stm32f2xx_hash.h"
-int wc_InitSha(Sha* sha)
-{
- /* STM32F2 struct notes:
- * sha->buffer = first 4 bytes used to hold partial block if needed
- * sha->buffLen = num bytes currently stored in sha->buffer
- * sha->loLen = num bytes that have been written to STM32 FIFO
- */
- XMEMSET(sha->buffer, 0, SHA_REG_SIZE);
- sha->buffLen = 0;
- sha->loLen = 0;
-
- /* initialize HASH peripheral */
- HASH_DeInit();
-
- /* configure algo used, algo mode, datatype */
- HASH->CR &= ~ (HASH_CR_ALGO | HASH_CR_DATATYPE | HASH_CR_MODE);
- HASH->CR |= (HASH_AlgoSelection_SHA1 | HASH_AlgoMode_HASH
- | HASH_DataType_8b);
-
- /* reset HASH processor */
- HASH->CR |= HASH_CR_INIT;
+/* Hardware Acceleration */
+#if defined(WOLFSSL_PIC32MZ_HASH)
+ #include <wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h>
- return 0;
-}
+#elif defined(STM32_HASH)
-int wc_ShaUpdate(Sha* sha, const byte* data, word32 len)
-{
- word32 i = 0;
- word32 fill = 0;
- word32 diff = 0;
-
- /* if saved partial block is available */
- if (sha->buffLen) {
- fill = 4 - sha->buffLen;
-
- /* if enough data to fill, fill and push to FIFO */
- if (fill <= len) {
- XMEMCPY((byte*)sha->buffer + sha->buffLen, data, fill);
- HASH_DataIn(*(uint32_t*)sha->buffer);
-
- data += fill;
- len -= fill;
- sha->loLen += 4;
- sha->buffLen = 0;
- } else {
- /* append partial to existing stored block */
- XMEMCPY((byte*)sha->buffer + sha->buffLen, data, len);
- sha->buffLen += len;
- return;
+ /* Supports CubeMX HAL or Standard Peripheral Library */
+ int wc_InitSha_ex(wc_Sha* sha, void* heap, int devId)
+ {
+ if (sha == NULL) {
+ return BAD_FUNC_ARG;
}
+
+ (void)devId;
+ (void)heap;
+
+ wc_Stm32_Hash_Init(&sha->stmCtx);
+
+ return 0;
}
- /* write input block in the IN FIFO */
- for(i = 0; i < len; i += 4)
+ int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len)
{
- diff = len - i;
- if ( diff < 4) {
- /* store incomplete last block, not yet in FIFO */
- XMEMSET(sha->buffer, 0, SHA_REG_SIZE);
- XMEMCPY((byte*)sha->buffer, data, diff);
- sha->buffLen = diff;
- } else {
- HASH_DataIn(*(uint32_t*)data);
- data+=4;
+ int ret;
+
+ if (sha == NULL || (data == NULL && len > 0)) {
+ return BAD_FUNC_ARG;
+ }
+
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret == 0) {
+ ret = wc_Stm32_Hash_Update(&sha->stmCtx, HASH_AlgoSelection_SHA1,
+ data, len);
+ wolfSSL_CryptHwMutexUnLock();
}
+ return ret;
}
- /* keep track of total data length thus far */
- sha->loLen += (len - sha->buffLen);
+ int wc_ShaFinal(wc_Sha* sha, byte* hash)
+ {
+ int ret;
- return 0;
-}
+ if (sha == NULL || hash == NULL) {
+ return BAD_FUNC_ARG;
+ }
-int wc_ShaFinal(Sha* sha, byte* hash)
-{
- __IO uint16_t nbvalidbitsdata = 0;
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret == 0) {
+ ret = wc_Stm32_Hash_Final(&sha->stmCtx, HASH_AlgoSelection_SHA1,
+ hash, WC_SHA_DIGEST_SIZE);
+ wolfSSL_CryptHwMutexUnLock();
+ }
+
+ (void)wc_InitSha(sha); /* reset state */
- /* finish reading any trailing bytes into FIFO */
- if (sha->buffLen) {
- HASH_DataIn(*(uint32_t*)sha->buffer);
- sha->loLen += sha->buffLen;
+ return ret;
}
- /* calculate number of valid bits in last word of input data */
- nbvalidbitsdata = 8 * (sha->loLen % SHA_REG_SIZE);
- /* configure number of valid bits in last word of the data */
- HASH_SetLastWordValidBitsNbr(nbvalidbitsdata);
+#elif defined(FREESCALE_LTC_SHA)
- /* start HASH processor */
- HASH_StartDigest();
+ #include "fsl_ltc.h"
+ int wc_InitSha_ex(wc_Sha* sha, void* heap, int devId)
+ {
+ if (sha == NULL) {
+ return BAD_FUNC_ARG;
+ }
- /* wait until Busy flag == RESET */
- while (HASH_GetFlagStatus(HASH_FLAG_BUSY) != RESET) {}
+ (void)devId;
+ (void)heap;
- /* read message digest */
- sha->digest[0] = HASH->HR[0];
- sha->digest[1] = HASH->HR[1];
- sha->digest[2] = HASH->HR[2];
- sha->digest[3] = HASH->HR[3];
- sha->digest[4] = HASH->HR[4];
+ LTC_HASH_Init(LTC_BASE, &sha->ctx, kLTC_Sha1, NULL, 0);
+ return 0;
+ }
- ByteReverseWords(sha->digest, sha->digest, SHA_DIGEST_SIZE);
+ int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len)
+ {
+ LTC_HASH_Update(&sha->ctx, data, len);
+ return 0;
+ }
- XMEMCPY(hash, sha->digest, SHA_DIGEST_SIZE);
+ int wc_ShaFinal(wc_Sha* sha, byte* hash)
+ {
+ uint32_t hashlen = WC_SHA_DIGEST_SIZE;
+ LTC_HASH_Finish(&sha->ctx, hash, &hashlen);
+ return wc_InitSha(sha); /* reset state */
+ }
- return wc_InitSha(sha); /* reset state */
-}
-#else /* wc_ software implementation */
+#elif defined(FREESCALE_MMCAU_SHA)
+
+ #ifdef FREESCALE_MMCAU_CLASSIC_SHA
+ #include "cau_api.h"
+ #else
+ #include "fsl_mmcau.h"
+ #endif
+
+ #define USE_SHA_SOFTWARE_IMPL /* Only for API's, actual transform is here */
-#ifndef WOLFSSL_HAVE_MIN
-#define WOLFSSL_HAVE_MIN
+ #define XTRANSFORM(S,B) Transform((S),(B))
+ #define XTRANSFORM_LEN(S,B,L) Transform_Len((S),(B),(L))
- static INLINE word32 min(word32 a, word32 b)
+ #ifndef WC_HASH_DATA_ALIGNMENT
+ /* these hardware API's require 4 byte (word32) alignment */
+ #define WC_HASH_DATA_ALIGNMENT 4
+ #endif
+
+ static int InitSha(wc_Sha* sha)
{
- return a > b ? b : a;
+ int ret = 0;
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret != 0) {
+ return ret;
+ }
+ #ifdef FREESCALE_MMCAU_CLASSIC_SHA
+ cau_sha1_initialize_output(sha->digest);
+ #else
+ MMCAU_SHA1_InitializeOutput((uint32_t*)sha->digest);
+ #endif
+ wolfSSL_CryptHwMutexUnLock();
+
+ sha->buffLen = 0;
+ sha->loLen = 0;
+ sha->hiLen = 0;
+
+ return ret;
}
-#endif /* WOLFSSL_HAVE_MIN */
+ static int Transform(wc_Sha* sha, const byte* data)
+ {
+ int ret = wolfSSL_CryptHwMutexLock();
+ if (ret == 0) {
+ #ifdef FREESCALE_MMCAU_CLASSIC_SHA
+ cau_sha1_hash_n((byte*)data, 1, sha->digest);
+ #else
+ MMCAU_SHA1_HashN((byte*)data, 1, (uint32_t*)sha->digest);
+ #endif
+ wolfSSL_CryptHwMutexUnLock();
+ }
+ return ret;
+ }
+ static int Transform_Len(wc_Sha* sha, const byte* data, word32 len)
+ {
+ int ret = wolfSSL_CryptHwMutexLock();
+ if (ret == 0) {
+ #if defined(WC_HASH_DATA_ALIGNMENT) && WC_HASH_DATA_ALIGNMENT > 0
+ if ((size_t)data % WC_HASH_DATA_ALIGNMENT) {
+ /* data pointer is NOT aligned,
+ * so copy and perform one block at a time */
+ byte* local = (byte*)sha->buffer;
+ while (len >= WC_SHA_BLOCK_SIZE) {
+ XMEMCPY(local, data, WC_SHA_BLOCK_SIZE);
+ #ifdef FREESCALE_MMCAU_CLASSIC_SHA
+ cau_sha1_hash_n(local, 1, sha->digest);
+ #else
+ MMCAU_SHA1_HashN(local, 1, sha->digest);
+ #endif
+ data += WC_SHA_BLOCK_SIZE;
+ len -= WC_SHA_BLOCK_SIZE;
+ }
+ }
+ else
+ #endif
+ {
+ #ifdef FREESCALE_MMCAU_CLASSIC_SHA
+ cau_sha1_hash_n((byte*)data, len/WC_SHA_BLOCK_SIZE, sha->digest);
+ #else
+ MMCAU_SHA1_HashN((byte*)data, len/WC_SHA_BLOCK_SIZE,
+ (uint32_t*)sha->digest);
+ #endif
+ }
+ wolfSSL_CryptHwMutexUnLock();
+ }
+ return ret;
+ }
-int wc_InitSha(Sha* sha)
-{
-#ifdef FREESCALE_MMCAU
- cau_sha1_initialize_output(sha->digest);
-#else
- sha->digest[0] = 0x67452301L;
- sha->digest[1] = 0xEFCDAB89L;
- sha->digest[2] = 0x98BADCFEL;
- sha->digest[3] = 0x10325476L;
- sha->digest[4] = 0xC3D2E1F0L;
-#endif
+#elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH)
+ /* wolfcrypt/src/port/caam/caam_sha.c */
- sha->buffLen = 0;
- sha->loLen = 0;
- sha->hiLen = 0;
+#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
- return 0;
-}
+ #include "wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h"
-#ifndef FREESCALE_MMCAU
-
-#define blk0(i) (W[i] = sha->buffer[i])
-#define blk1(i) (W[(i)&15] = \
-rotlFixed(W[((i)+13)&15]^W[((i)+8)&15]^W[((i)+2)&15]^W[(i)&15],1))
-
-#define f1(x,y,z) ((z)^((x) &((y)^(z))))
-#define f2(x,y,z) ((x)^(y)^(z))
-#define f3(x,y,z) (((x)&(y))|((z)&((x)|(y))))
-#define f4(x,y,z) ((x)^(y)^(z))
-
-/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
-#define R0(v,w,x,y,z,i) (z)+= f1((w),(x),(y)) + blk0((i)) + 0x5A827999+ \
-rotlFixed((v),5); (w) = rotlFixed((w),30);
-#define R1(v,w,x,y,z,i) (z)+= f1((w),(x),(y)) + blk1((i)) + 0x5A827999+ \
-rotlFixed((v),5); (w) = rotlFixed((w),30);
-#define R2(v,w,x,y,z,i) (z)+= f2((w),(x),(y)) + blk1((i)) + 0x6ED9EBA1+ \
-rotlFixed((v),5); (w) = rotlFixed((w),30);
-#define R3(v,w,x,y,z,i) (z)+= f3((w),(x),(y)) + blk1((i)) + 0x8F1BBCDC+ \
-rotlFixed((v),5); (w) = rotlFixed((w),30);
-#define R4(v,w,x,y,z,i) (z)+= f4((w),(x),(y)) + blk1((i)) + 0xCA62C1D6+ \
-rotlFixed((v),5); (w) = rotlFixed((w),30);
-
-static void Transform(Sha* sha)
-{
- word32 W[SHA_BLOCK_SIZE / sizeof(word32)];
+ #define USE_SHA_SOFTWARE_IMPL
- /* Copy context->state[] to working vars */
- word32 a = sha->digest[0];
- word32 b = sha->digest[1];
- word32 c = sha->digest[2];
- word32 d = sha->digest[3];
- word32 e = sha->digest[4];
+ static int InitSha(wc_Sha* sha)
+ {
+ int ret = 0;
-#ifdef USE_SLOW_SHA
- word32 t, i;
+ sha->digest[0] = 0x67452301L;
+ sha->digest[1] = 0xEFCDAB89L;
+ sha->digest[2] = 0x98BADCFEL;
+ sha->digest[3] = 0x10325476L;
+ sha->digest[4] = 0xC3D2E1F0L;
- for (i = 0; i < 16; i++) {
- R0(a, b, c, d, e, i);
- t = e; e = d; d = c; c = b; b = a; a = t;
- }
+ sha->buffLen = 0;
+ sha->loLen = 0;
+ sha->hiLen = 0;
+
+ /* always start firstblock = 1 when using hw engine */
+ sha->ctx.isfirstblock = 1;
+ sha->ctx.sha_type = SHA1;
+ if(sha->ctx.mode == ESP32_SHA_HW){
+ /* release hw engine */
+ esp_sha_hw_unlock();
+ }
+ /* always set mode as INIT
+ * whether using HW or SW is determined at first call of update()
+ */
+ sha->ctx.mode = ESP32_SHA_INIT;
- for (; i < 20; i++) {
- R1(a, b, c, d, e, i);
- t = e; e = d; d = c; c = b; b = a; a = t;
+ return ret;
}
- for (; i < 40; i++) {
- R2(a, b, c, d, e, i);
- t = e; e = d; d = c; c = b; b = a; a = t;
- }
+#elif defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \
+ !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH)
- for (; i < 60; i++) {
- R3(a, b, c, d, e, i);
- t = e; e = d; d = c; c = b; b = a; a = t;
- }
+ /* implemented in wolfcrypt/src/port/Renesas/renesas_tsip_sha.c */
- for (; i < 80; i++) {
- R4(a, b, c, d, e, i);
- t = e; e = d; d = c; c = b; b = a; a = t;
- }
#else
- /* nearly 1 K bigger in code size but 25% faster */
- /* 4 rounds of 20 operations each. Loop unrolled. */
- R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
- R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
- R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
- R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
-
- R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
-
- R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
- R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
- R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
- R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
- R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
-
- R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
- R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
- R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
- R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
- R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
-
- R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
- R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
- R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
- R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
- R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
-#endif
+ /* Software implementation */
+ #define USE_SHA_SOFTWARE_IMPL
- /* Add the working vars back into digest state[] */
- sha->digest[0] += a;
- sha->digest[1] += b;
- sha->digest[2] += c;
- sha->digest[3] += d;
- sha->digest[4] += e;
-}
+ static int InitSha(wc_Sha* sha)
+ {
+ int ret = 0;
+
+ sha->digest[0] = 0x67452301L;
+ sha->digest[1] = 0xEFCDAB89L;
+ sha->digest[2] = 0x98BADCFEL;
+ sha->digest[3] = 0x10325476L;
+ sha->digest[4] = 0xC3D2E1F0L;
-#endif /* FREESCALE_MMCAU */
+ sha->buffLen = 0;
+ sha->loLen = 0;
+ sha->hiLen = 0;
+ #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+ sha->flags = 0;
+ #endif
+
+ return ret;
+ }
+#endif /* End Hardware Acceleration */
+/* Software implementation */
+#ifdef USE_SHA_SOFTWARE_IMPL
-static INLINE void AddLength(Sha* sha, word32 len)
+static WC_INLINE void AddLength(wc_Sha* sha, word32 len)
{
word32 tmp = sha->loLen;
- if ( (sha->loLen += len) < tmp)
+ if ((sha->loLen += len) < tmp)
sha->hiLen++; /* carry low to high */
}
+/* Check if custom wc_Sha transform is used */
+#ifndef XTRANSFORM
+ #define XTRANSFORM(S,B) Transform((S),(B))
+
+ #define blk0(i) (W[i] = *((word32*)&data[i*sizeof(word32)]))
+ #define blk1(i) (W[(i)&15] = \
+ rotlFixed(W[((i)+13)&15]^W[((i)+8)&15]^W[((i)+2)&15]^W[(i)&15],1))
+
+ #define f1(x,y,z) ((z)^((x) &((y)^(z))))
+ #define f2(x,y,z) ((x)^(y)^(z))
+ #define f3(x,y,z) (((x)&(y))|((z)&((x)|(y))))
+ #define f4(x,y,z) ((x)^(y)^(z))
+
+ #ifdef WOLFSSL_NUCLEUS_1_2
+ /* nucleus.h also defines R1-R4 */
+ #undef R1
+ #undef R2
+ #undef R3
+ #undef R4
+ #endif
+
+ /* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
+ #define R0(v,w,x,y,z,i) (z)+= f1((w),(x),(y)) + blk0((i)) + 0x5A827999+ \
+ rotlFixed((v),5); (w) = rotlFixed((w),30);
+ #define R1(v,w,x,y,z,i) (z)+= f1((w),(x),(y)) + blk1((i)) + 0x5A827999+ \
+ rotlFixed((v),5); (w) = rotlFixed((w),30);
+ #define R2(v,w,x,y,z,i) (z)+= f2((w),(x),(y)) + blk1((i)) + 0x6ED9EBA1+ \
+ rotlFixed((v),5); (w) = rotlFixed((w),30);
+ #define R3(v,w,x,y,z,i) (z)+= f3((w),(x),(y)) + blk1((i)) + 0x8F1BBCDC+ \
+ rotlFixed((v),5); (w) = rotlFixed((w),30);
+ #define R4(v,w,x,y,z,i) (z)+= f4((w),(x),(y)) + blk1((i)) + 0xCA62C1D6+ \
+ rotlFixed((v),5); (w) = rotlFixed((w),30);
+
+ static int Transform(wc_Sha* sha, const byte* data)
+ {
+ word32 W[WC_SHA_BLOCK_SIZE / sizeof(word32)];
+
+ /* Copy context->state[] to working vars */
+ word32 a = sha->digest[0];
+ word32 b = sha->digest[1];
+ word32 c = sha->digest[2];
+ word32 d = sha->digest[3];
+ word32 e = sha->digest[4];
+
+ #ifdef USE_SLOW_SHA
+ word32 t, i;
+
+ for (i = 0; i < 16; i++) {
+ R0(a, b, c, d, e, i);
+ t = e; e = d; d = c; c = b; b = a; a = t;
+ }
+
+ for (; i < 20; i++) {
+ R1(a, b, c, d, e, i);
+ t = e; e = d; d = c; c = b; b = a; a = t;
+ }
+
+ for (; i < 40; i++) {
+ R2(a, b, c, d, e, i);
+ t = e; e = d; d = c; c = b; b = a; a = t;
+ }
+
+ for (; i < 60; i++) {
+ R3(a, b, c, d, e, i);
+ t = e; e = d; d = c; c = b; b = a; a = t;
+ }
-int wc_ShaUpdate(Sha* sha, const byte* data, word32 len)
+ for (; i < 80; i++) {
+ R4(a, b, c, d, e, i);
+ t = e; e = d; d = c; c = b; b = a; a = t;
+ }
+ #else
+ /* nearly 1 K bigger in code size but 25% faster */
+ /* 4 rounds of 20 operations each. Loop unrolled. */
+ R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
+ R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
+ R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
+ R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
+
+ R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
+
+ R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
+ R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
+ R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
+ R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
+ R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
+
+ R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
+ R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
+ R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
+ R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
+ R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
+
+ R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
+ R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
+ R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
+ R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
+ R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
+ #endif
+
+ /* Add the working vars back into digest state[] */
+ sha->digest[0] += a;
+ sha->digest[1] += b;
+ sha->digest[2] += c;
+ sha->digest[3] += d;
+ sha->digest[4] += e;
+
+ (void)data; /* Not used */
+
+ return 0;
+ }
+#endif /* !USE_CUSTOM_SHA_TRANSFORM */
+
+
+int wc_InitSha_ex(wc_Sha* sha, void* heap, int devId)
{
- /* do block size increments */
- byte* local = (byte*)sha->buffer;
+ int ret = 0;
+
+ if (sha == NULL)
+ return BAD_FUNC_ARG;
- while (len) {
- word32 add = min(len, SHA_BLOCK_SIZE - sha->buffLen);
- XMEMCPY(&local[sha->buffLen], data, add);
+ sha->heap = heap;
+#ifdef WOLF_CRYPTO_CB
+ sha->devId = devId;
+#endif
+
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ sha->ctx.mode = ESP32_SHA_INIT;
+ sha->ctx.isfirstblock = 1;
+#endif
+ ret = InitSha(sha);
+ if (ret != 0)
+ return ret;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA)
+ ret = wolfAsync_DevCtxInit(&sha->asyncDev, WOLFSSL_ASYNC_MARKER_SHA,
+ sha->heap, devId);
+#else
+ (void)devId;
+#endif /* WOLFSSL_ASYNC_CRYPT */
- sha->buffLen += add;
- data += add;
- len -= add;
+ return ret;
+}
- if (sha->buffLen == SHA_BLOCK_SIZE) {
-#if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU)
- ByteReverseWords(sha->buffer, sha->buffer, SHA_BLOCK_SIZE);
+/* do block size increments/updates */
+int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len)
+{
+ int ret = 0;
+ word32 blocksLen;
+ byte* local;
+
+ if (sha == NULL || (data == NULL && len > 0)) {
+ return BAD_FUNC_ARG;
+ }
+
+#ifdef WOLF_CRYPTO_CB
+ if (sha->devId != INVALID_DEVID) {
+ ret = wc_CryptoCb_ShaHash(sha, data, len, NULL);
+ if (ret != CRYPTOCB_UNAVAILABLE)
+ return ret;
+ ret = 0; /* reset ret */
+ /* fall-through when unavailable */
+ }
#endif
- XTRANSFORM(sha, local);
- AddLength(sha, SHA_BLOCK_SIZE);
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA)
+ if (sha->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA) {
+ #if defined(HAVE_INTEL_QA)
+ return IntelQaSymSha(&sha->asyncDev, NULL, data, len);
+ #endif
+ }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+ /* check that internal buffLen is valid */
+ if (sha->buffLen >= WC_SHA_BLOCK_SIZE)
+ return BUFFER_E;
+
+ if (data == NULL && len == 0) {
+ /* valid, but do nothing */
+ return 0;
+ }
+
+ /* add length for final */
+ AddLength(sha, len);
+
+ local = (byte*)sha->buffer;
+
+ /* process any remainder from previous operation */
+ if (sha->buffLen > 0) {
+ blocksLen = min(len, WC_SHA_BLOCK_SIZE - sha->buffLen);
+ XMEMCPY(&local[sha->buffLen], data, blocksLen);
+
+ sha->buffLen += blocksLen;
+ data += blocksLen;
+ len -= blocksLen;
+
+ if (sha->buffLen == WC_SHA_BLOCK_SIZE) {
+ #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+ ByteReverseWords(sha->buffer, sha->buffer, WC_SHA_BLOCK_SIZE);
+ #endif
+
+ #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ if (sha->ctx.mode == ESP32_SHA_INIT) {
+ esp_sha_try_hw_lock(&sha->ctx);
+ }
+ if (sha->ctx.mode == ESP32_SHA_SW) {
+ ret = XTRANSFORM(sha, (const byte*)local);
+ } else {
+ esp_sha_process(sha, (const byte*)local);
+ }
+ #else
+ ret = XTRANSFORM(sha, (const byte*)local);
+ #endif
+ if (ret != 0)
+ return ret;
+
sha->buffLen = 0;
}
}
- return 0;
+ /* process blocks */
+#ifdef XTRANSFORM_LEN
+ /* get number of blocks */
+ /* 64-1 = 0x3F (~ Inverted = 0xFFFFFFC0) */
+ /* len (masked by 0xFFFFFFC0) returns block aligned length */
+ blocksLen = len & ~(WC_SHA_BLOCK_SIZE-1);
+ if (blocksLen > 0) {
+ /* Byte reversal performed in function if required. */
+ XTRANSFORM_LEN(sha, data, blocksLen);
+ data += blocksLen;
+ len -= blocksLen;
+ }
+#else
+ while (len >= WC_SHA_BLOCK_SIZE) {
+ word32* local32 = sha->buffer;
+ /* optimization to avoid memcpy if data pointer is properly aligned */
+ /* Little Endian requires byte swap, so can't use data directly */
+ #if defined(WC_HASH_DATA_ALIGNMENT) && !defined(LITTLE_ENDIAN_ORDER)
+ if (((size_t)data % WC_HASH_DATA_ALIGNMENT) == 0) {
+ local32 = (word32*)data;
+ }
+ else
+ #endif
+ {
+ XMEMCPY(local32, data, WC_SHA_BLOCK_SIZE);
+ }
+
+ data += WC_SHA_BLOCK_SIZE;
+ len -= WC_SHA_BLOCK_SIZE;
+
+ #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+ ByteReverseWords(local32, local32, WC_SHA_BLOCK_SIZE);
+ #endif
+
+ #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ if (sha->ctx.mode == ESP32_SHA_INIT){
+ esp_sha_try_hw_lock(&sha->ctx);
+ }
+ if (sha->ctx.mode == ESP32_SHA_SW){
+ ret = XTRANSFORM(sha, (const byte*)local32);
+ } else {
+ esp_sha_process(sha, (const byte*)local32);
+ }
+ #else
+ ret = XTRANSFORM(sha, (const byte*)local32);
+ #endif
+ }
+#endif /* XTRANSFORM_LEN */
+
+ /* save remainder */
+ if (len > 0) {
+ XMEMCPY(local, data, len);
+ sha->buffLen = len;
+ }
+
+ return ret;
}
+int wc_ShaFinalRaw(wc_Sha* sha, byte* hash)
+{
+#ifdef LITTLE_ENDIAN_ORDER
+ word32 digest[WC_SHA_DIGEST_SIZE / sizeof(word32)];
+#endif
+
+ if (sha == NULL || hash == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+#ifdef LITTLE_ENDIAN_ORDER
+ ByteReverseWords((word32*)digest, (word32*)sha->digest, WC_SHA_DIGEST_SIZE);
+ XMEMCPY(hash, digest, WC_SHA_DIGEST_SIZE);
+#else
+ XMEMCPY(hash, sha->digest, WC_SHA_DIGEST_SIZE);
+#endif
+
+ return 0;
+}
-int wc_ShaFinal(Sha* sha, byte* hash)
+int wc_ShaFinal(wc_Sha* sha, byte* hash)
{
- byte* local = (byte*)sha->buffer;
+ int ret;
+ byte* local;
+
+ if (sha == NULL || hash == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ local = (byte*)sha->buffer;
- AddLength(sha, sha->buffLen); /* before adding pads */
+#ifdef WOLF_CRYPTO_CB
+ if (sha->devId != INVALID_DEVID) {
+ ret = wc_CryptoCb_ShaHash(sha, NULL, 0, hash);
+ if (ret != CRYPTOCB_UNAVAILABLE)
+ return ret;
+ ret = 0; /* reset ret */
+ /* fall-through when unavailable */
+ }
+#endif
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA)
+ if (sha->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA) {
+ #if defined(HAVE_INTEL_QA)
+ return IntelQaSymSha(&sha->asyncDev, hash, NULL, WC_SHA_DIGEST_SIZE);
+ #endif
+ }
+#endif /* WOLFSSL_ASYNC_CRYPT */
local[sha->buffLen++] = 0x80; /* add 1 */
/* pad with zeros */
- if (sha->buffLen > SHA_PAD_SIZE) {
- XMEMSET(&local[sha->buffLen], 0, SHA_BLOCK_SIZE - sha->buffLen);
- sha->buffLen += SHA_BLOCK_SIZE - sha->buffLen;
+ if (sha->buffLen > WC_SHA_PAD_SIZE) {
+ XMEMSET(&local[sha->buffLen], 0, WC_SHA_BLOCK_SIZE - sha->buffLen);
+ sha->buffLen += WC_SHA_BLOCK_SIZE - sha->buffLen;
+
+ #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+ ByteReverseWords(sha->buffer, sha->buffer, WC_SHA_BLOCK_SIZE);
+ #endif
+
+ #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ if (sha->ctx.mode == ESP32_SHA_INIT) {
+ esp_sha_try_hw_lock(&sha->ctx);
+ }
+ if (sha->ctx.mode == ESP32_SHA_SW) {
+ ret = XTRANSFORM(sha, (const byte*)local);
+ } else {
+ ret = esp_sha_process(sha, (const byte*)local);
+ }
+ #else
+ ret = XTRANSFORM(sha, (const byte*)local);
+ #endif
+ if (ret != 0)
+ return ret;
-#if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU)
- ByteReverseWords(sha->buffer, sha->buffer, SHA_BLOCK_SIZE);
-#endif
- XTRANSFORM(sha, local);
sha->buffLen = 0;
}
- XMEMSET(&local[sha->buffLen], 0, SHA_PAD_SIZE - sha->buffLen);
+ XMEMSET(&local[sha->buffLen], 0, WC_SHA_PAD_SIZE - sha->buffLen);
+#if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+ ByteReverseWords(sha->buffer, sha->buffer, WC_SHA_BLOCK_SIZE);
+#endif
+
+ /* store lengths */
/* put lengths in bits */
- sha->hiLen = (sha->loLen >> (8*sizeof(sha->loLen) - 3)) +
- (sha->hiLen << 3);
+ sha->hiLen = (sha->loLen >> (8*sizeof(sha->loLen) - 3)) + (sha->hiLen << 3);
sha->loLen = sha->loLen << 3;
- /* store lengths */
-#if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU)
- ByteReverseWords(sha->buffer, sha->buffer, SHA_BLOCK_SIZE);
-#endif
/* ! length ordering dependent on digest endian type ! */
- XMEMCPY(&local[SHA_PAD_SIZE], &sha->hiLen, sizeof(word32));
- XMEMCPY(&local[SHA_PAD_SIZE + sizeof(word32)], &sha->loLen, sizeof(word32));
+ XMEMCPY(&local[WC_SHA_PAD_SIZE], &sha->hiLen, sizeof(word32));
+ XMEMCPY(&local[WC_SHA_PAD_SIZE + sizeof(word32)], &sha->loLen, sizeof(word32));
-#ifdef FREESCALE_MMCAU
+#if defined(FREESCALE_MMCAU_SHA)
/* Kinetis requires only these bytes reversed */
- ByteReverseWords(&sha->buffer[SHA_PAD_SIZE/sizeof(word32)],
- &sha->buffer[SHA_PAD_SIZE/sizeof(word32)],
+ ByteReverseWords(&sha->buffer[WC_SHA_PAD_SIZE/sizeof(word32)],
+ &sha->buffer[WC_SHA_PAD_SIZE/sizeof(word32)],
2 * sizeof(word32));
#endif
- XTRANSFORM(sha, local);
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ if (sha->ctx.mode == ESP32_SHA_INIT) {
+ esp_sha_try_hw_lock(&sha->ctx);
+ }
+ if (sha->ctx.mode == ESP32_SHA_SW) {
+ ret = XTRANSFORM(sha, (const byte*)local);
+ } else {
+ ret = esp_sha_digest_process(sha, 1);
+ }
+#else
+ ret = XTRANSFORM(sha, (const byte*)local);
+#endif
+
#ifdef LITTLE_ENDIAN_ORDER
- ByteReverseWords(sha->digest, sha->digest, SHA_DIGEST_SIZE);
+ ByteReverseWords(sha->digest, sha->digest, WC_SHA_DIGEST_SIZE);
#endif
- XMEMCPY(hash, sha->digest, SHA_DIGEST_SIZE);
- return wc_InitSha(sha); /* reset state */
+ XMEMCPY(hash, sha->digest, WC_SHA_DIGEST_SIZE);
+
+ (void)InitSha(sha); /* reset state */
+
+ return ret;
}
-#endif /* STM32F2_HASH */
+#endif /* USE_SHA_SOFTWARE_IMPL */
-int wc_ShaHash(const byte* data, word32 len, byte* hash)
+int wc_InitSha(wc_Sha* sha)
{
- int ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
- Sha* sha;
-#else
- Sha sha[1];
-#endif
+ return wc_InitSha_ex(sha, NULL, INVALID_DEVID);
+}
-#ifdef WOLFSSL_SMALL_STACK
- sha = (Sha*)XMALLOC(sizeof(Sha), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+void wc_ShaFree(wc_Sha* sha)
+{
if (sha == NULL)
- return MEMORY_E;
-#endif
+ return;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA)
+ wolfAsync_DevCtxFree(&sha->asyncDev, WOLFSSL_ASYNC_MARKER_SHA);
+#endif /* WOLFSSL_ASYNC_CRYPT */
- if ((ret = wc_InitSha(sha)) != 0) {
- WOLFSSL_MSG("wc_InitSha failed");
+#ifdef WOLFSSL_PIC32MZ_HASH
+ wc_ShaPic32Free(sha);
+#endif
+#if (defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \
+ !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH))
+ if (sha->msg != NULL) {
+ XFREE(sha->msg, sha->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ sha->msg = NULL;
}
- else {
- wc_ShaUpdate(sha, data, len);
- wc_ShaFinal(sha, hash);
+#endif
+}
+
+#endif /* !WOLFSSL_TI_HASH */
+#endif /* HAVE_FIPS */
+
+#ifndef WOLFSSL_TI_HASH
+#if !defined(WOLFSSL_RENESAS_TSIP_CRYPT) || \
+ defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH)
+int wc_ShaGetHash(wc_Sha* sha, byte* hash)
+{
+ int ret;
+ wc_Sha tmpSha;
+
+ if (sha == NULL || hash == NULL)
+ return BAD_FUNC_ARG;
+
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ if(sha->ctx.mode == ESP32_SHA_INIT){
+ esp_sha_try_hw_lock(&sha->ctx);
}
+ if(sha->ctx.mode != ESP32_SHA_SW)
+ esp_sha_digest_process(sha, 0);
+#endif
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(sha, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ ret = wc_ShaCopy(sha, &tmpSha);
+ if (ret == 0) {
+ ret = wc_ShaFinal(&tmpSha, hash);
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ sha->ctx.mode = ESP32_SHA_SW;
#endif
+
+ }
return ret;
+}
+
+int wc_ShaCopy(wc_Sha* src, wc_Sha* dst)
+{
+ int ret = 0;
+
+ if (src == NULL || dst == NULL)
+ return BAD_FUNC_ARG;
+
+ XMEMCPY(dst, src, sizeof(wc_Sha));
+#ifdef WOLFSSL_ASYNC_CRYPT
+ ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
+#endif
+#ifdef WOLFSSL_PIC32MZ_HASH
+ ret = wc_Pic32HashCopy(&src->cache, &dst->cache);
+#endif
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ dst->ctx.mode = src->ctx.mode;
+ dst->ctx.isfirstblock = src->ctx.isfirstblock;
+ dst->ctx.sha_type = src->ctx.sha_type;
+#endif
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+ dst->flags |= WC_HASH_FLAG_ISCOPY;
+#endif
+ return ret;
}
+#endif /* defined(WOLFSSL_RENESAS_TSIP_CRYPT) ... */
+#endif /* !WOLFSSL_TI_HASH */
-#endif /* HAVE_FIPS */
-#endif /* WOLFSSL_TI_HASH */
-#endif /* NO_SHA */
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+int wc_ShaSetFlags(wc_Sha* sha, word32 flags)
+{
+ if (sha) {
+ sha->flags = flags;
+ }
+ return 0;
+}
+int wc_ShaGetFlags(wc_Sha* sha, word32* flags)
+{
+ if (sha && flags) {
+ *flags = sha->flags;
+ }
+ return 0;
+}
+#endif
+
+#endif /* !NO_SHA */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha256.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha256.c
index f9f02b003..eb0911b01 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha256.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha256.c
@@ -1,8 +1,8 @@
/* sha256.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,299 +16,604 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
-/* code submitted by raphael.huck@efixo.com */
-
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <wolfssl/wolfcrypt/settings.h>
-#include <wolfssl/wolfcrypt/sha256.h>
-#if !defined(NO_SHA256)
-#ifdef HAVE_FIPS
+/*
+ * SHA256 Build Options:
+ * USE_SLOW_SHA256: Reduces code size by not partially unrolling
+ (~2KB smaller and ~25% slower) (default OFF)
+ * WOLFSSL_SHA256_BY_SPEC: Uses the Ch/Maj based on SHA256 specification
+ (default ON)
+ * WOLFSSL_SHA256_ALT_CH_MAJ: Alternate Ch/Maj that is easier for compilers to
+ optimize and recognize as SHA256 (default OFF)
+ * SHA256_MANY_REGISTERS: A SHA256 version that keeps all data in registers
+ and partial unrolled (default OFF)
+ */
-int wc_InitSha256(Sha256* sha)
-{
- return InitSha256_fips(sha);
-}
+/* Default SHA256 to use Ch/Maj based on specification */
+#if !defined(WOLFSSL_SHA256_BY_SPEC) && !defined(WOLFSSL_SHA256_ALT_CH_MAJ)
+ #define WOLFSSL_SHA256_BY_SPEC
+#endif
-int wc_Sha256Update(Sha256* sha, const byte* data, word32 len)
-{
- return Sha256Update_fips(sha, data, len);
-}
+#if !defined(NO_SHA256) && !defined(WOLFSSL_ARMASM)
+#if defined(HAVE_FIPS) && \
+ defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
-int wc_Sha256Final(Sha256* sha, byte* out)
-{
- return Sha256Final_fips(sha, out);
-}
+ /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
+ #define FIPS_NO_WRAPPERS
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$d")
+ #pragma const_seg(".fipsB$d")
+ #endif
+#endif
-int wc_Sha256Hash(const byte* data, word32 len, byte* out)
-{
- return Sha256Hash(data, len, out);
-}
+#include <wolfssl/wolfcrypt/sha256.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/cpuid.h>
+#include <wolfssl/wolfcrypt/hash.h>
+
+#ifdef WOLF_CRYPTO_CB
+ #include <wolfssl/wolfcrypt/cryptocb.h>
+#endif
+
+/* fips wrapper calls, user can call direct */
+#if defined(HAVE_FIPS) && \
+ (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2))
+
+ int wc_InitSha256(wc_Sha256* sha)
+ {
+ if (sha == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ return InitSha256_fips(sha);
+ }
+ int wc_InitSha256_ex(wc_Sha256* sha, void* heap, int devId)
+ {
+ (void)heap;
+ (void)devId;
+ if (sha == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ return InitSha256_fips(sha);
+ }
+ int wc_Sha256Update(wc_Sha256* sha, const byte* data, word32 len)
+ {
+ if (sha == NULL || (data == NULL && len > 0)) {
+ return BAD_FUNC_ARG;
+ }
+
+ if (data == NULL && len == 0) {
+ /* valid, but do nothing */
+ return 0;
+ }
+
+ return Sha256Update_fips(sha, data, len);
+ }
+ int wc_Sha256Final(wc_Sha256* sha, byte* out)
+ {
+ if (sha == NULL || out == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ return Sha256Final_fips(sha, out);
+ }
+ void wc_Sha256Free(wc_Sha256* sha)
+ {
+ (void)sha;
+ /* Not supported in FIPS */
+ }
-#else /* else build without fips */
+#else /* else build without fips, or for FIPS v2 */
-#if !defined(NO_SHA256) && defined(WOLFSSL_TI_HASH)
+
+#if defined(WOLFSSL_TI_HASH)
/* #include <wolfcrypt/src/port/ti/ti-hash.c> included by wc_port.c */
+#elif defined(WOLFSSL_CRYPTOCELL)
+ /* wc_port.c includes wolfcrypt/src/port/arm/cryptoCellHash.c */
#else
-#if !defined (ALIGN32)
- #if defined (__GNUC__)
- #define ALIGN32 __attribute__ ( (aligned (32)))
- #elif defined(_MSC_VER)
- /* disable align warning, we want alignment ! */
- #pragma warning(disable: 4324)
- #define ALIGN32 __declspec (align (32))
- #else
- #define ALIGN32
- #endif
-#endif
+#include <wolfssl/wolfcrypt/logging.h>
-#ifdef WOLFSSL_PIC32MZ_HASH
-#define wc_InitSha256 wc_InitSha256_sw
-#define wc_Sha256Update wc_Sha256Update_sw
-#define wc_Sha256Final wc_Sha256Final_sw
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
#endif
-#ifdef HAVE_FIPS
- /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
- #define FIPS_NO_WRAPPERS
+#ifdef WOLFSSL_DEVCRYPTO_HASH
+ #include <wolfssl/wolfcrypt/port/devcrypto/wc_devcrypto.h>
#endif
+
+
#if defined(USE_INTEL_SPEEDUP)
-#define HAVE_INTEL_AVX1
-#define HAVE_INTEL_AVX2
+ #if defined(__GNUC__) && ((__GNUC__ < 4) || \
+ (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
+ #undef NO_AVX2_SUPPORT
+ #define NO_AVX2_SUPPORT
+ #endif
+ #if defined(__clang__) && ((__clang_major__ < 3) || \
+ (__clang_major__ == 3 && __clang_minor__ <= 5))
+ #define NO_AVX2_SUPPORT
+ #elif defined(__clang__) && defined(NO_AVX2_SUPPORT)
+ #undef NO_AVX2_SUPPORT
+ #endif
-#if defined(DEBUG_XMM)
-#include "stdio.h"
+ #define HAVE_INTEL_AVX1
+ #ifndef NO_AVX2_SUPPORT
+ #define HAVE_INTEL_AVX2
+ #endif
+#endif /* USE_INTEL_SPEEDUP */
+
+#if defined(HAVE_INTEL_AVX2)
+ #define HAVE_INTEL_RORX
#endif
+
+#if !defined(WOLFSSL_PIC32MZ_HASH) && !defined(STM32_HASH_SHA2) && \
+ (!defined(WOLFSSL_IMX6_CAAM) || defined(NO_IMX6_CAAM_HASH)) && \
+ !defined(WOLFSSL_AFALG_HASH) && !defined(WOLFSSL_DEVCRYPTO_HASH) && \
+ (!defined(WOLFSSL_ESP32WROOM32_CRYPT) || defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)) && \
+ (!defined(WOLFSSL_RENESAS_TSIP_CRYPT) || defined(NO_WOLFSSL_RENESAS_TSIP_HASH))
+
+static int InitSha256(wc_Sha256* sha256)
+{
+ int ret = 0;
+
+ if (sha256 == NULL)
+ return BAD_FUNC_ARG;
+
+ XMEMSET(sha256->digest, 0, sizeof(sha256->digest));
+ sha256->digest[0] = 0x6A09E667L;
+ sha256->digest[1] = 0xBB67AE85L;
+ sha256->digest[2] = 0x3C6EF372L;
+ sha256->digest[3] = 0xA54FF53AL;
+ sha256->digest[4] = 0x510E527FL;
+ sha256->digest[5] = 0x9B05688CL;
+ sha256->digest[6] = 0x1F83D9ABL;
+ sha256->digest[7] = 0x5BE0CD19L;
+
+ sha256->buffLen = 0;
+ sha256->loLen = 0;
+ sha256->hiLen = 0;
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+ sha256->flags = 0;
#endif
-#if defined(HAVE_INTEL_AVX2)
-#define HAVE_INTEL_RORX
+ return ret;
+}
#endif
-
-/*****
-Intel AVX1/AVX2 Macro Control Structure
-#define HAVE_INTEL_AVX1
-#define HAVE_INTEL_AVX2
+/* Hardware Acceleration */
+#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-#define HAVE_INTEL_RORX
+ /* in case intel instructions aren't available, plus we need the K[] global */
+ #define NEED_SOFT_SHA256
+ /*****
+ Intel AVX1/AVX2 Macro Control Structure
-int InitSha256(Sha256* sha256) {
- Save/Recover XMM, YMM
- ...
-}
+ #define HAVE_INTEL_AVX1
+ #define HAVE_INTEL_AVX2
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
- Transform() ; Function prototype
-#else
- Transform() { }
- int Sha256Final() {
- Save/Recover XMM, YMM
- ...
- }
-#endif
+ #define HAVE_INTEL_RORX
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
- #if defined(HAVE_INTEL_RORX
- #define RND with rorx instuction
+
+ int InitSha256(wc_Sha256* sha256) {
+ Save/Recover XMM, YMM
+ ...
+ }
+
+ #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
+ Transform_Sha256(); Function prototype
#else
- #define RND
+ Transform_Sha256() { }
+ int Sha256Final() {
+ Save/Recover XMM, YMM
+ ...
+ }
#endif
-#endif
-#if defined(HAVE_INTEL_AVX1)
-
- #define XMM Instructions/inline asm
-
- int Transform() {
- Stitched Message Sched/Round
- }
-
-#elif defined(HAVE_INTEL_AVX2)
-
- #define YMM Instructions/inline asm
-
- int Transform() {
- More granural Stitched Message Sched/Round
- }
-
-*/
+ #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
+ #if defined(HAVE_INTEL_RORX
+ #define RND with rorx instruction
+ #else
+ #define RND
+ #endif
+ #endif
+ #if defined(HAVE_INTEL_AVX1)
-#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+ #define XMM Instructions/inline asm
-/* Each platform needs to query info type 1 from cpuid to see if aesni is
- * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
- */
+ int Transform_Sha256() {
+ Stitched Message Sched/Round
+ }
-#ifndef _MSC_VER
- #define cpuid(reg, leaf, sub)\
- __asm__ __volatile__ ("cpuid":\
- "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
- "a" (leaf), "c"(sub));
+ #elif defined(HAVE_INTEL_AVX2)
- #define XASM_LINK(f) asm(f)
-#else
+ #define YMM Instructions/inline asm
- #include <intrin.h>
- #define cpuid(a,b) __cpuid((int*)a,b)
-
- #define XASM_LINK(f)
-
-#endif /* _MSC_VER */
-
-#define EAX 0
-#define EBX 1
-#define ECX 2
-#define EDX 3
-
-#define CPUID_AVX1 0x1
-#define CPUID_AVX2 0x2
-#define CPUID_RDRAND 0x4
-#define CPUID_RDSEED 0x8
-#define CPUID_BMI2 0x10 /* MULX, RORX */
-
-#define IS_INTEL_AVX1 (cpuid_flags&CPUID_AVX1)
-#define IS_INTEL_AVX2 (cpuid_flags&CPUID_AVX2)
-#define IS_INTEL_BMI2 (cpuid_flags&CPUID_BMI2)
-#define IS_INTEL_RDRAND (cpuid_flags&CPUID_RDRAND)
-#define IS_INTEL_RDSEED (cpuid_flags&CPUID_RDSEED)
-
-static word32 cpuid_check = 0 ;
-static word32 cpuid_flags = 0 ;
-
-static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
- int got_intel_cpu=0;
- unsigned int reg[5];
-
- reg[4] = '\0' ;
- cpuid(reg, 0, 0);
- if(memcmp((char *)&(reg[EBX]), "Genu", 4) == 0 &&
- memcmp((char *)&(reg[EDX]), "ineI", 4) == 0 &&
- memcmp((char *)&(reg[ECX]), "ntel", 4) == 0) {
- got_intel_cpu = 1;
- }
- if (got_intel_cpu) {
- cpuid(reg, leaf, sub);
- return((reg[num]>>bit)&0x1) ;
- }
- return 0 ;
-}
+ int Transform_Sha256() {
+ More granular Stitched Message Sched/Round
+ }
-static int set_cpuid_flags(void) {
- if(cpuid_check==0) {
- if(cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1 ;}
- if(cpuid_flag(7, 0, EBX, 5)){ cpuid_flags |= CPUID_AVX2 ; }
- if(cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2 ; }
- if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ; }
- if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ; }
- cpuid_check = 1 ;
- return 0 ;
- }
- return 1 ;
-}
+ #endif
+ */
-/* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha512 */
-static int Transform(Sha256* sha256);
+ /* Each platform needs to query info type 1 from cpuid to see if aesni is
+ * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
+ */
-#if defined(HAVE_INTEL_AVX1)
-static int Transform_AVX1(Sha256 *sha256) ;
+ /* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha256 */
+ static int Transform_Sha256(wc_Sha256* sha256, const byte* data);
+
+#ifdef __cplusplus
+ extern "C" {
#endif
-#if defined(HAVE_INTEL_AVX2)
-static int Transform_AVX2(Sha256 *sha256) ;
-static int Transform_AVX1_RORX(Sha256 *sha256) ;
+
+ #if defined(HAVE_INTEL_AVX1)
+ extern int Transform_Sha256_AVX1(wc_Sha256 *sha256, const byte* data);
+ extern int Transform_Sha256_AVX1_Len(wc_Sha256* sha256,
+ const byte* data, word32 len);
+ #endif
+ #if defined(HAVE_INTEL_AVX2)
+ extern int Transform_Sha256_AVX2(wc_Sha256 *sha256, const byte* data);
+ extern int Transform_Sha256_AVX2_Len(wc_Sha256* sha256,
+ const byte* data, word32 len);
+ #ifdef HAVE_INTEL_RORX
+ extern int Transform_Sha256_AVX1_RORX(wc_Sha256 *sha256, const byte* data);
+ extern int Transform_Sha256_AVX1_RORX_Len(wc_Sha256* sha256,
+ const byte* data, word32 len);
+ extern int Transform_Sha256_AVX2_RORX(wc_Sha256 *sha256, const byte* data);
+ extern int Transform_Sha256_AVX2_RORX_Len(wc_Sha256* sha256,
+ const byte* data, word32 len);
+ #endif /* HAVE_INTEL_RORX */
+ #endif /* HAVE_INTEL_AVX2 */
+
+#ifdef __cplusplus
+ } /* extern "C" */
#endif
-static int (*Transform_p)(Sha256* sha256) /* = _Transform */;
+ static int (*Transform_Sha256_p)(wc_Sha256* sha256, const byte* data);
+ /* = _Transform_Sha256 */
+ static int (*Transform_Sha256_Len_p)(wc_Sha256* sha256, const byte* data,
+ word32 len);
+ /* = NULL */
+ static int transform_check = 0;
+ static word32 intel_flags;
-#define XTRANSFORM(sha256, B) (*Transform_p)(sha256)
+ #define XTRANSFORM(S, D) (*Transform_Sha256_p)((S),(D))
+ #define XTRANSFORM_LEN(S, D, L) (*Transform_Sha256_Len_p)((S),(D),(L))
-static void set_Transform(void) {
- if(set_cpuid_flags())return ;
+ static void Sha256_SetTransform(void)
+ {
-#if defined(HAVE_INTEL_AVX2)
- if(IS_INTEL_AVX2 && IS_INTEL_BMI2){
- Transform_p = Transform_AVX1_RORX; return ;
- Transform_p = Transform_AVX2 ;
- /* for avoiding warning,"not used" */
- }
-#endif
-#if defined(HAVE_INTEL_AVX1)
- Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : Transform) ; return ;
-#endif
- Transform_p = Transform ; return ;
-}
+ if (transform_check)
+ return;
-#else
- #if defined(FREESCALE_MMCAU)
- #define XTRANSFORM(sha256, B) Transform(sha256, B)
- #else
- #define XTRANSFORM(sha256, B) Transform(sha256)
- #endif
-#endif
+ intel_flags = cpuid_get_flags();
-/* Dummy for saving MM_REGs on behalf of Transform */
-#if defined(HAVE_INTEL_AVX2)&& !defined(HAVE_INTEL_AVX1)
-#define SAVE_XMM_YMM __asm__ volatile("or %%r8d, %%r8d":::\
- "%ymm4","%ymm5","%ymm6","%ymm7","%ymm8","%ymm9","%ymm10","%ymm11","%ymm12","%ymm13","%ymm14","%ymm15")
-#elif defined(HAVE_INTEL_AVX1)
-#define SAVE_XMM_YMM __asm__ volatile("or %%r8d, %%r8d":::\
- "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10",\
- "xmm11","xmm12","xmm13","xmm14","xmm15")
-#else
-#define SAVE_XMM_YMM
-#endif
+ #ifdef HAVE_INTEL_AVX2
+ if (1 && IS_INTEL_AVX2(intel_flags)) {
+ #ifdef HAVE_INTEL_RORX
+ if (IS_INTEL_BMI2(intel_flags)) {
+ Transform_Sha256_p = Transform_Sha256_AVX2_RORX;
+ Transform_Sha256_Len_p = Transform_Sha256_AVX2_RORX_Len;
+ }
+ else
+ #endif
+ if (1)
+ {
+ Transform_Sha256_p = Transform_Sha256_AVX2;
+ Transform_Sha256_Len_p = Transform_Sha256_AVX2_Len;
+ }
+ #ifdef HAVE_INTEL_RORX
+ else {
+ Transform_Sha256_p = Transform_Sha256_AVX1_RORX;
+ Transform_Sha256_Len_p = Transform_Sha256_AVX1_RORX_Len;
+ }
+ #endif
+ }
+ else
+ #endif
+ #ifdef HAVE_INTEL_AVX1
+ if (IS_INTEL_AVX1(intel_flags)) {
+ Transform_Sha256_p = Transform_Sha256_AVX1;
+ Transform_Sha256_Len_p = Transform_Sha256_AVX1_Len;
+ }
+ else
+ #endif
+ {
+ Transform_Sha256_p = Transform_Sha256;
+ Transform_Sha256_Len_p = NULL;
+ }
-#ifdef WOLFSSL_PIC32MZ_HASH
-#define InitSha256 InitSha256_sw
-#define Sha256Update Sha256Update_sw
-#define Sha256Final Sha256Final_sw
-#endif
+ transform_check = 1;
+ }
-#include <wolfssl/wolfcrypt/logging.h>
-#include <wolfssl/wolfcrypt/error-crypt.h>
+ int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
+ {
+ int ret = 0;
+ if (sha256 == NULL)
+ return BAD_FUNC_ARG;
-#ifdef NO_INLINE
- #include <wolfssl/wolfcrypt/misc.h>
-#else
- #include <wolfcrypt/src/misc.c>
-#endif
+ sha256->heap = heap;
+ #ifdef WOLF_CRYPTO_CB
+ sha256->devId = devId;
+ #endif
-#ifdef FREESCALE_MMCAU
- #include "cau_api.h"
-#endif
+ ret = InitSha256(sha256);
+ if (ret != 0)
+ return ret;
+
+ /* choose best Transform function under this runtime environment */
+ Sha256_SetTransform();
+
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
+ ret = wolfAsync_DevCtxInit(&sha256->asyncDev,
+ WOLFSSL_ASYNC_MARKER_SHA256, sha256->heap, devId);
+ #else
+ (void)devId;
+ #endif /* WOLFSSL_ASYNC_CRYPT */
-#ifndef WOLFSSL_HAVE_MIN
-#define WOLFSSL_HAVE_MIN
+ return ret;
+ }
- static INLINE word32 min(word32 a, word32 b)
+#elif defined(FREESCALE_LTC_SHA)
+ int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
{
- return a > b ? b : a;
+ (void)heap;
+ (void)devId;
+
+ LTC_HASH_Init(LTC_BASE, &sha256->ctx, kLTC_Sha256, NULL, 0);
+
+ return 0;
}
-#endif /* WOLFSSL_HAVE_MIN */
+#elif defined(FREESCALE_MMCAU_SHA)
+ #ifdef FREESCALE_MMCAU_CLASSIC_SHA
+ #include "cau_api.h"
+ #else
+ #include "fsl_mmcau.h"
+ #endif
-int wc_InitSha256(Sha256* sha256)
-{
- #ifdef FREESCALE_MMCAU
+ #define XTRANSFORM(S, D) Transform_Sha256((S),(D))
+ #define XTRANSFORM_LEN(S, D, L) Transform_Sha256_Len((S),(D),(L))
+
+ #ifndef WC_HASH_DATA_ALIGNMENT
+ /* these hardware API's require 4 byte (word32) alignment */
+ #define WC_HASH_DATA_ALIGNMENT 4
+ #endif
+
+ int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
+ {
+ int ret = 0;
+
+ (void)heap;
+ (void)devId;
+
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret != 0) {
+ return ret;
+ }
+
+ #ifdef FREESCALE_MMCAU_CLASSIC_SHA
cau_sha256_initialize_output(sha256->digest);
#else
+ MMCAU_SHA256_InitializeOutput((uint32_t*)sha256->digest);
+ #endif
+ wolfSSL_CryptHwMutexUnLock();
+
+ sha256->buffLen = 0;
+ sha256->loLen = 0;
+ sha256->hiLen = 0;
+ #ifdef WOLFSSL_SMALL_STACK_CACHE
+ sha256->W = NULL;
+ #endif
+
+ return ret;
+ }
+
+ static int Transform_Sha256(wc_Sha256* sha256, const byte* data)
+ {
+ int ret = wolfSSL_CryptHwMutexLock();
+ if (ret == 0) {
+ #ifdef FREESCALE_MMCAU_CLASSIC_SHA
+ cau_sha256_hash_n((byte*)data, 1, sha256->digest);
+ #else
+ MMCAU_SHA256_HashN((byte*)data, 1, sha256->digest);
+ #endif
+ wolfSSL_CryptHwMutexUnLock();
+ }
+ return ret;
+ }
+
+ static int Transform_Sha256_Len(wc_Sha256* sha256, const byte* data,
+ word32 len)
+ {
+ int ret = wolfSSL_CryptHwMutexLock();
+ if (ret == 0) {
+ #if defined(WC_HASH_DATA_ALIGNMENT) && WC_HASH_DATA_ALIGNMENT > 0
+ if ((size_t)data % WC_HASH_DATA_ALIGNMENT) {
+ /* data pointer is NOT aligned,
+ * so copy and perform one block at a time */
+ byte* local = (byte*)sha256->buffer;
+ while (len >= WC_SHA256_BLOCK_SIZE) {
+ XMEMCPY(local, data, WC_SHA256_BLOCK_SIZE);
+ #ifdef FREESCALE_MMCAU_CLASSIC_SHA
+ cau_sha256_hash_n(local, 1, sha256->digest);
+ #else
+ MMCAU_SHA256_HashN(local, 1, sha256->digest);
+ #endif
+ data += WC_SHA256_BLOCK_SIZE;
+ len -= WC_SHA256_BLOCK_SIZE;
+ }
+ }
+ else
+ #endif
+ {
+ #ifdef FREESCALE_MMCAU_CLASSIC_SHA
+ cau_sha256_hash_n((byte*)data, len/WC_SHA256_BLOCK_SIZE,
+ sha256->digest);
+ #else
+ MMCAU_SHA256_HashN((byte*)data, len/WC_SHA256_BLOCK_SIZE,
+ sha256->digest);
+ #endif
+ }
+ wolfSSL_CryptHwMutexUnLock();
+ }
+ return ret;
+ }
+
+#elif defined(WOLFSSL_PIC32MZ_HASH)
+ #include <wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h>
+
+#elif defined(STM32_HASH_SHA2)
+
+ /* Supports CubeMX HAL or Standard Peripheral Library */
+
+ int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
+ {
+ if (sha256 == NULL)
+ return BAD_FUNC_ARG;
+
+ (void)devId;
+ (void)heap;
+
+ wc_Stm32_Hash_Init(&sha256->stmCtx);
+ return 0;
+ }
+
+ int wc_Sha256Update(wc_Sha256* sha256, const byte* data, word32 len)
+ {
+ int ret = 0;
+
+ if (sha256 == NULL || (data == NULL && len > 0)) {
+ return BAD_FUNC_ARG;
+ }
+
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret == 0) {
+ ret = wc_Stm32_Hash_Update(&sha256->stmCtx,
+ HASH_AlgoSelection_SHA256, data, len);
+ wolfSSL_CryptHwMutexUnLock();
+ }
+ return ret;
+ }
+
+ int wc_Sha256Final(wc_Sha256* sha256, byte* hash)
+ {
+ int ret = 0;
+
+ if (sha256 == NULL || hash == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret == 0) {
+ ret = wc_Stm32_Hash_Final(&sha256->stmCtx,
+ HASH_AlgoSelection_SHA256, hash, WC_SHA256_DIGEST_SIZE);
+ wolfSSL_CryptHwMutexUnLock();
+ }
+
+ (void)wc_InitSha256(sha256); /* reset state */
+
+ return ret;
+ }
+
+#elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH)
+ /* functions defined in wolfcrypt/src/port/caam/caam_sha256.c */
+
+#elif defined(WOLFSSL_AFALG_HASH)
+ /* implemented in wolfcrypt/src/port/af_alg/afalg_hash.c */
+
+#elif defined(WOLFSSL_DEVCRYPTO_HASH)
+ /* implemented in wolfcrypt/src/port/devcrypto/devcrypt_hash.c */
+
+#elif defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_HASH)
+ #include "hal_data.h"
+
+ #ifndef WOLFSSL_SCE_SHA256_HANDLE
+ #define WOLFSSL_SCE_SHA256_HANDLE g_sce_hash_0
+ #endif
+
+ #define WC_SHA256_DIGEST_WORD_SIZE 16
+ #define XTRANSFORM(S, D) wc_Sha256SCE_XTRANSFORM((S), (D))
+ static int wc_Sha256SCE_XTRANSFORM(wc_Sha256* sha256, const byte* data)
+ {
+ if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag ==
+ CRYPTO_WORD_ENDIAN_LITTLE)
+ {
+ ByteReverseWords((word32*)data, (word32*)data,
+ WC_SHA256_BLOCK_SIZE);
+ ByteReverseWords(sha256->digest, sha256->digest,
+ WC_SHA256_DIGEST_SIZE);
+ }
+
+ if (WOLFSSL_SCE_SHA256_HANDLE.p_api->hashUpdate(
+ WOLFSSL_SCE_SHA256_HANDLE.p_ctrl, (word32*)data,
+ WC_SHA256_DIGEST_WORD_SIZE, sha256->digest) != SSP_SUCCESS){
+ WOLFSSL_MSG("Unexpected hardware return value");
+ return WC_HW_E;
+ }
+
+ if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag ==
+ CRYPTO_WORD_ENDIAN_LITTLE)
+ {
+ ByteReverseWords((word32*)data, (word32*)data,
+ WC_SHA256_BLOCK_SIZE);
+ ByteReverseWords(sha256->digest, sha256->digest,
+ WC_SHA256_DIGEST_SIZE);
+ }
+
+ return 0;
+ }
+
+
+ int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
+ {
+ int ret = 0;
+ if (sha256 == NULL)
+ return BAD_FUNC_ARG;
+
+ sha256->heap = heap;
+
+ ret = InitSha256(sha256);
+ if (ret != 0)
+ return ret;
+
+ (void)devId;
+
+ return ret;
+ }
+
+#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+
+ #define NEED_SOFT_SHA256
+
+ static int InitSha256(wc_Sha256* sha256)
+ {
+ int ret = 0;
+
+ if (sha256 == NULL)
+ return BAD_FUNC_ARG;
+
+ XMEMSET(sha256->digest, 0, sizeof(sha256->digest));
sha256->digest[0] = 0x6A09E667L;
sha256->digest[1] = 0xBB67AE85L;
sha256->digest[2] = 0x3C6EF372L;
@@ -317,1450 +622,1023 @@ int wc_InitSha256(Sha256* sha256)
sha256->digest[5] = 0x9B05688CL;
sha256->digest[6] = 0x1F83D9ABL;
sha256->digest[7] = 0x5BE0CD19L;
- #endif
- sha256->buffLen = 0;
- sha256->loLen = 0;
- sha256->hiLen = 0;
-
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
- set_Transform() ; /* choose best Transform function under this runtime environment */
-#endif
-
- return 0;
-}
+ sha256->buffLen = 0;
+ sha256->loLen = 0;
+ sha256->hiLen = 0;
+
+ /* always start firstblock = 1 when using hw engine */
+ sha256->ctx.isfirstblock = 1;
+ sha256->ctx.sha_type = SHA2_256;
+ if(sha256->ctx.mode == ESP32_SHA_HW) {
+ /* release hw */
+ esp_sha_hw_unlock();
+ }
+ /* always set mode as INIT
+ * whether using HW or SW is determined at first call of update()
+ */
+ sha256->ctx.mode = ESP32_SHA_INIT;
+ return ret;
+ }
+ int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
+ {
+ int ret = 0;
-#if !defined(FREESCALE_MMCAU)
-static const ALIGN32 word32 K[64] = {
- 0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L, 0x3956C25BL,
- 0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L, 0xD807AA98L, 0x12835B01L,
- 0x243185BEL, 0x550C7DC3L, 0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L,
- 0xC19BF174L, 0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL,
- 0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL, 0x983E5152L,
- 0xA831C66DL, 0xB00327C8L, 0xBF597FC7L, 0xC6E00BF3L, 0xD5A79147L,
- 0x06CA6351L, 0x14292967L, 0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL,
- 0x53380D13L, 0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L,
- 0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L, 0xD192E819L,
- 0xD6990624L, 0xF40E3585L, 0x106AA070L, 0x19A4C116L, 0x1E376C08L,
- 0x2748774CL, 0x34B0BCB5L, 0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL,
- 0x682E6FF3L, 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L,
- 0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L
-};
+ if (sha256 == NULL)
+ return BAD_FUNC_ARG;
-#endif
+ XMEMSET(sha256, 0, sizeof(wc_Sha256));
+ sha256->ctx.mode = ESP32_SHA_INIT;
+ sha256->ctx.isfirstblock = 1;
+ (void)devId;
-#if defined(FREESCALE_MMCAU)
+ ret = InitSha256(sha256);
-static int Transform(Sha256* sha256, byte* buf)
-{
- cau_sha256_hash_n(buf, 1, sha256->digest);
+ return ret;
+ }
- return 0;
-}
+#elif defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \
+ !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH)
-#endif /* FREESCALE_MMCAU */
+ /* implemented in wolfcrypt/src/port/Renesas/renesas_tsip_sha.c */
-#define Ch(x,y,z) ((z) ^ ((x) & ((y) ^ (z))))
-#define Maj(x,y,z) ((((x) | (y)) & (z)) | ((x) & (y)))
-#define R(x, n) (((x)&0xFFFFFFFFU)>>(n))
+#else
+ #define NEED_SOFT_SHA256
-#define S(x, n) rotrFixed(x, n)
-#define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22))
-#define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25))
-#define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3))
-#define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10))
+ int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
+ {
+ int ret = 0;
+ if (sha256 == NULL)
+ return BAD_FUNC_ARG;
+
+ sha256->heap = heap;
+ #ifdef WOLF_CRYPTO_CB
+ sha256->devId = devId;
+ sha256->devCtx = NULL;
+ #endif
-#define RND(a,b,c,d,e,f,g,h,i) \
- t0 = (h) + Sigma1((e)) + Ch((e), (f), (g)) + K[(i)] + W[(i)]; \
- t1 = Sigma0((a)) + Maj((a), (b), (c)); \
- (d) += t0; \
- (h) = t0 + t1;
+ ret = InitSha256(sha256);
+ if (ret != 0)
+ return ret;
-#if !defined(FREESCALE_MMCAU)
-static int Transform(Sha256* sha256)
-{
- word32 S[8], t0, t1;
- int i;
+ #ifdef WOLFSSL_SMALL_STACK_CACHE
+ sha256->W = NULL;
+ #endif
-#ifdef WOLFSSL_SMALL_STACK
- word32* W;
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
+ ret = wolfAsync_DevCtxInit(&sha256->asyncDev,
+ WOLFSSL_ASYNC_MARKER_SHA256, sha256->heap, devId);
+ #else
+ (void)devId;
+ #endif /* WOLFSSL_ASYNC_CRYPT */
- W = (word32*) XMALLOC(sizeof(word32) * 64, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (W == NULL)
- return MEMORY_E;
+ return ret;
+ }
+#endif /* End Hardware Acceleration */
+
+#ifdef NEED_SOFT_SHA256
+
+ static const ALIGN32 word32 K[64] = {
+ 0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L, 0x3956C25BL,
+ 0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L, 0xD807AA98L, 0x12835B01L,
+ 0x243185BEL, 0x550C7DC3L, 0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L,
+ 0xC19BF174L, 0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL,
+ 0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL, 0x983E5152L,
+ 0xA831C66DL, 0xB00327C8L, 0xBF597FC7L, 0xC6E00BF3L, 0xD5A79147L,
+ 0x06CA6351L, 0x14292967L, 0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL,
+ 0x53380D13L, 0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L,
+ 0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L, 0xD192E819L,
+ 0xD6990624L, 0xF40E3585L, 0x106AA070L, 0x19A4C116L, 0x1E376C08L,
+ 0x2748774CL, 0x34B0BCB5L, 0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL,
+ 0x682E6FF3L, 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L,
+ 0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L
+ };
+
+/* Both versions of Ch and Maj are logically the same, but with the second set
+ the compilers can recognize them better for optimization */
+#ifdef WOLFSSL_SHA256_BY_SPEC
+ /* SHA256 math based on specification */
+ #define Ch(x,y,z) ((z) ^ ((x) & ((y) ^ (z))))
+ #define Maj(x,y,z) ((((x) | (y)) & (z)) | ((x) & (y)))
#else
- word32 W[64];
+ /* SHA256 math reworked for easier compiler optimization */
+ #define Ch(x,y,z) ((((y) ^ (z)) & (x)) ^ (z))
+ #define Maj(x,y,z) ((((x) ^ (y)) & ((y) ^ (z))) ^ (y))
#endif
+ #define R(x, n) (((x) & 0xFFFFFFFFU) >> (n))
+
+ #define S(x, n) rotrFixed(x, n)
+ #define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22))
+ #define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25))
+ #define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3))
+ #define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10))
+
+ #define a(i) S[(0-i) & 7]
+ #define b(i) S[(1-i) & 7]
+ #define c(i) S[(2-i) & 7]
+ #define d(i) S[(3-i) & 7]
+ #define e(i) S[(4-i) & 7]
+ #define f(i) S[(5-i) & 7]
+ #define g(i) S[(6-i) & 7]
+ #define h(i) S[(7-i) & 7]
+
+ #ifndef XTRANSFORM
+ #define XTRANSFORM(S, D) Transform_Sha256((S),(D))
+ #endif
- /* Copy context->state[] to working vars */
- for (i = 0; i < 8; i++)
- S[i] = sha256->digest[i];
-
- for (i = 0; i < 16; i++)
- W[i] = sha256->buffer[i];
-
- for (i = 16; i < 64; i++)
- W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15]) + W[i-16];
-
- for (i = 0; i < 64; i += 8) {
- RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0);
- RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1);
- RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2);
- RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3);
- RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4);
- RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5);
- RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6);
- RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7);
- }
+#ifndef SHA256_MANY_REGISTERS
+ #define RND(j) \
+ t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + W[i+j]; \
+ t1 = Sigma0(a(j)) + Maj(a(j), b(j), c(j)); \
+ d(j) += t0; \
+ h(j) = t0 + t1
+
+ static int Transform_Sha256(wc_Sha256* sha256, const byte* data)
+ {
+ word32 S[8], t0, t1;
+ int i;
+
+ #ifdef WOLFSSL_SMALL_STACK_CACHE
+ word32* W = sha256->W;
+ if (W == NULL) {
+ W = (word32*)XMALLOC(sizeof(word32) * WC_SHA256_BLOCK_SIZE, NULL,
+ DYNAMIC_TYPE_DIGEST);
+ if (W == NULL)
+ return MEMORY_E;
+ sha256->W = W;
+ }
+ #elif defined(WOLFSSL_SMALL_STACK)
+ word32* W;
+ W = (word32*)XMALLOC(sizeof(word32) * WC_SHA256_BLOCK_SIZE, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (W == NULL)
+ return MEMORY_E;
+ #else
+ word32 W[WC_SHA256_BLOCK_SIZE];
+ #endif
+
+ /* Copy context->state[] to working vars */
+ for (i = 0; i < 8; i++)
+ S[i] = sha256->digest[i];
+
+ for (i = 0; i < 16; i++)
+ W[i] = *((word32*)&data[i*sizeof(word32)]);
+
+ for (i = 16; i < WC_SHA256_BLOCK_SIZE; i++)
+ W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15]) + W[i-16];
+
+ #ifdef USE_SLOW_SHA256
+ /* not unrolled - ~2k smaller and ~25% slower */
+ for (i = 0; i < WC_SHA256_BLOCK_SIZE; i += 8) {
+ int j;
+ for (j = 0; j < 8; j++) { /* braces needed here for macros {} */
+ RND(j);
+ }
+ }
+ #else
+ /* partially loop unrolled */
+ for (i = 0; i < WC_SHA256_BLOCK_SIZE; i += 8) {
+ RND(0); RND(1); RND(2); RND(3);
+ RND(4); RND(5); RND(6); RND(7);
+ }
+ #endif /* USE_SLOW_SHA256 */
- /* Add the working vars back into digest state[] */
- for (i = 0; i < 8; i++) {
- sha256->digest[i] += S[i];
+ /* Add the working vars back into digest state[] */
+ for (i = 0; i < 8; i++) {
+ sha256->digest[i] += S[i];
+ }
+
+ #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SMALL_STACK_CACHE)
+ XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return 0;
}
+#else
+ /* SHA256 version that keeps all data in registers */
+ #define SCHED1(j) (W[j] = *((word32*)&data[j*sizeof(word32)]))
+ #define SCHED(j) ( \
+ W[ j & 15] += \
+ Gamma1(W[(j-2) & 15])+ \
+ W[(j-7) & 15] + \
+ Gamma0(W[(j-15) & 15]) \
+ )
+
+ #define RND1(j) \
+ t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + SCHED1(j); \
+ t1 = Sigma0(a(j)) + Maj(a(j), b(j), c(j)); \
+ d(j) += t0; \
+ h(j) = t0 + t1
+ #define RNDN(j) \
+ t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + SCHED(j); \
+ t1 = Sigma0(a(j)) + Maj(a(j), b(j), c(j)); \
+ d(j) += t0; \
+ h(j) = t0 + t1
+
+ static int Transform_Sha256(wc_Sha256* sha256, const byte* data)
+ {
+ word32 S[8], t0, t1;
+ int i;
+ word32 W[WC_SHA256_BLOCK_SIZE/sizeof(word32)];
+
+ /* Copy digest to working vars */
+ S[0] = sha256->digest[0];
+ S[1] = sha256->digest[1];
+ S[2] = sha256->digest[2];
+ S[3] = sha256->digest[3];
+ S[4] = sha256->digest[4];
+ S[5] = sha256->digest[5];
+ S[6] = sha256->digest[6];
+ S[7] = sha256->digest[7];
+
+ i = 0;
+ RND1( 0); RND1( 1); RND1( 2); RND1( 3);
+ RND1( 4); RND1( 5); RND1( 6); RND1( 7);
+ RND1( 8); RND1( 9); RND1(10); RND1(11);
+ RND1(12); RND1(13); RND1(14); RND1(15);
+ /* 64 operations, partially loop unrolled */
+ for (i = 16; i < 64; i += 16) {
+ RNDN( 0); RNDN( 1); RNDN( 2); RNDN( 3);
+ RNDN( 4); RNDN( 5); RNDN( 6); RNDN( 7);
+ RNDN( 8); RNDN( 9); RNDN(10); RNDN(11);
+ RNDN(12); RNDN(13); RNDN(14); RNDN(15);
+ }
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ /* Add the working vars back into digest */
+ sha256->digest[0] += S[0];
+ sha256->digest[1] += S[1];
+ sha256->digest[2] += S[2];
+ sha256->digest[3] += S[3];
+ sha256->digest[4] += S[4];
+ sha256->digest[5] += S[5];
+ sha256->digest[6] += S[6];
+ sha256->digest[7] += S[7];
+
+ return 0;
+ }
+#endif /* SHA256_MANY_REGISTERS */
#endif
+/* End wc_ software implementation */
- return 0;
-}
-#endif /* #if !defined(FREESCALE_MMCAU) */
+#ifdef XTRANSFORM
-static INLINE void AddLength(Sha256* sha256, word32 len)
-{
- word32 tmp = sha256->loLen;
- if ( (sha256->loLen += len) < tmp)
- sha256->hiLen++; /* carry low to high */
-}
+ static WC_INLINE void AddLength(wc_Sha256* sha256, word32 len)
+ {
+ word32 tmp = sha256->loLen;
+ if ((sha256->loLen += len) < tmp) {
+ sha256->hiLen++; /* carry low to high */
+ }
+ }
-int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len)
-{
+ /* do block size increments/updates */
+ static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 len)
+ {
+ int ret = 0;
+ word32 blocksLen;
+ byte* local;
- /* do block size increments */
- byte* local = (byte*)sha256->buffer;
+ if (sha256 == NULL || (data == NULL && len > 0)) {
+ return BAD_FUNC_ARG;
+ }
+
+ if (data == NULL && len == 0) {
+ /* valid, but do nothing */
+ return 0;
+ }
+
+ /* check that internal buffLen is valid */
+ if (sha256->buffLen >= WC_SHA256_BLOCK_SIZE) {
+ return BUFFER_E;
+ }
- SAVE_XMM_YMM ; /* for Intel AVX */
+ /* add length for final */
+ AddLength(sha256, len);
- while (len) {
- word32 add = min(len, SHA256_BLOCK_SIZE - sha256->buffLen);
- XMEMCPY(&local[sha256->buffLen], data, add);
+ local = (byte*)sha256->buffer;
- sha256->buffLen += add;
- data += add;
- len -= add;
+ /* process any remainder from previous operation */
+ if (sha256->buffLen > 0) {
+ blocksLen = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen);
+ XMEMCPY(&local[sha256->buffLen], data, blocksLen);
- if (sha256->buffLen == SHA256_BLOCK_SIZE) {
- int ret;
+ sha256->buffLen += blocksLen;
+ data += blocksLen;
+ len -= blocksLen;
- #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU)
+ if (sha256->buffLen == WC_SHA256_BLOCK_SIZE) {
+ #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
- if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
+ if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
#endif
- ByteReverseWords(sha256->buffer, sha256->buffer,
- SHA256_BLOCK_SIZE);
+ {
+ ByteReverseWords(sha256->buffer, sha256->buffer,
+ WC_SHA256_BLOCK_SIZE);
+ }
#endif
- ret = XTRANSFORM(sha256, local);
- if (ret != 0)
- return ret;
- AddLength(sha256, SHA256_BLOCK_SIZE);
- sha256->buffLen = 0;
+ #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ if (sha256->ctx.mode == ESP32_SHA_INIT){
+ esp_sha_try_hw_lock(&sha256->ctx);
+ }
+ if (sha256->ctx.mode == ESP32_SHA_SW){
+ ret = XTRANSFORM(sha256, (const byte*)local);
+ } else {
+ esp_sha256_process(sha256, (const byte*)local);
+ }
+ #else
+ ret = XTRANSFORM(sha256, (const byte*)local);
+ #endif
+
+ if (ret == 0)
+ sha256->buffLen = 0;
+ else
+ len = 0; /* error */
+ }
+ }
+
+ /* process blocks */
+ #ifdef XTRANSFORM_LEN
+ #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+ if (Transform_Sha256_Len_p != NULL)
+ #endif
+ {
+ /* get number of blocks */
+ /* 64-1 = 0x3F (~ Inverted = 0xFFFFFFC0) */
+ /* len (masked by 0xFFFFFFC0) returns block aligned length */
+ blocksLen = len & ~(WC_SHA256_BLOCK_SIZE-1);
+ if (blocksLen > 0) {
+ /* Byte reversal and alignment handled in function if required */
+ XTRANSFORM_LEN(sha256, data, blocksLen);
+ data += blocksLen;
+ len -= blocksLen;
+ }
}
+ #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+ else
+ #endif
+ #endif /* XTRANSFORM_LEN */
+ #if !defined(XTRANSFORM_LEN) || defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+ {
+ while (len >= WC_SHA256_BLOCK_SIZE) {
+ word32* local32 = sha256->buffer;
+ /* optimization to avoid memcpy if data pointer is properly aligned */
+ /* Intel transform function requires use of sha256->buffer */
+ /* Little Endian requires byte swap, so can't use data directly */
+ #if defined(WC_HASH_DATA_ALIGNMENT) && !defined(LITTLE_ENDIAN_ORDER) && \
+ !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2)
+ if (((size_t)data % WC_HASH_DATA_ALIGNMENT) == 0) {
+ local32 = (word32*)data;
+ }
+ else
+ #endif
+ {
+ XMEMCPY(local32, data, WC_SHA256_BLOCK_SIZE);
+ }
+
+ data += WC_SHA256_BLOCK_SIZE;
+ len -= WC_SHA256_BLOCK_SIZE;
+
+ #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+ #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+ if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
+ #endif
+ {
+ ByteReverseWords(local32, local32, WC_SHA256_BLOCK_SIZE);
+ }
+ #endif
+
+ #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ if (sha256->ctx.mode == ESP32_SHA_INIT){
+ esp_sha_try_hw_lock(&sha256->ctx);
+ }
+ if (sha256->ctx.mode == ESP32_SHA_SW){
+ ret = XTRANSFORM(sha256, (const byte*)local32);
+ } else {
+ esp_sha256_process(sha256, (const byte*)local32);
+ }
+ #else
+ ret = XTRANSFORM(sha256, (const byte*)local32);
+ #endif
+
+ if (ret != 0)
+ break;
+ }
+ }
+ #endif
+
+ /* save remainder */
+ if (len > 0) {
+ XMEMCPY(local, data, len);
+ sha256->buffLen = len;
+ }
+
+ return ret;
}
- return 0;
-}
+ int wc_Sha256Update(wc_Sha256* sha256, const byte* data, word32 len)
+ {
+ if (sha256 == NULL || (data == NULL && len > 0)) {
+ return BAD_FUNC_ARG;
+ }
-int wc_Sha256Final(Sha256* sha256, byte* hash)
-{
- byte* local = (byte*)sha256->buffer;
- int ret;
-
- SAVE_XMM_YMM ; /* for Intel AVX */
+ if (data == NULL && len == 0) {
+ /* valid, but do nothing */
+ return 0;
+ }
+
+ #ifdef WOLF_CRYPTO_CB
+ if (sha256->devId != INVALID_DEVID) {
+ int ret = wc_CryptoCb_Sha256Hash(sha256, data, len, NULL);
+ if (ret != CRYPTOCB_UNAVAILABLE)
+ return ret;
+ /* fall-through when unavailable */
+ }
+ #endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
+ if (sha256->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA256) {
+ #if defined(HAVE_INTEL_QA)
+ return IntelQaSymSha256(&sha256->asyncDev, NULL, data, len);
+ #endif
+ }
+ #endif /* WOLFSSL_ASYNC_CRYPT */
+
+ return Sha256Update(sha256, data, len);
+ }
+
+ static WC_INLINE int Sha256Final(wc_Sha256* sha256)
+ {
+
+ int ret;
+ byte* local;
- AddLength(sha256, sha256->buffLen); /* before adding pads */
+ if (sha256 == NULL) {
+ return BAD_FUNC_ARG;
+ }
- local[sha256->buffLen++] = 0x80; /* add 1 */
+ local = (byte*)sha256->buffer;
+ local[sha256->buffLen++] = 0x80; /* add 1 */
- /* pad with zeros */
- if (sha256->buffLen > SHA256_PAD_SIZE) {
- XMEMSET(&local[sha256->buffLen], 0, SHA256_BLOCK_SIZE - sha256->buffLen);
- sha256->buffLen += SHA256_BLOCK_SIZE - sha256->buffLen;
+ /* pad with zeros */
+ if (sha256->buffLen > WC_SHA256_PAD_SIZE) {
+ XMEMSET(&local[sha256->buffLen], 0,
+ WC_SHA256_BLOCK_SIZE - sha256->buffLen);
+ sha256->buffLen += WC_SHA256_BLOCK_SIZE - sha256->buffLen;
- #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU)
+ #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
- if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
+ if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
#endif
- ByteReverseWords(sha256->buffer, sha256->buffer, SHA256_BLOCK_SIZE);
+ {
+ ByteReverseWords(sha256->buffer, sha256->buffer,
+ WC_SHA256_BLOCK_SIZE);
+ }
#endif
- ret = XTRANSFORM(sha256, local);
- if (ret != 0)
- return ret;
+ #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ if (sha256->ctx.mode == ESP32_SHA_INIT) {
+ esp_sha_try_hw_lock(&sha256->ctx);
+ }
+ if (sha256->ctx.mode == ESP32_SHA_SW) {
+ ret = XTRANSFORM(sha256, (const byte*)local);
+ } else {
+ ret = esp_sha256_process(sha256, (const byte*)local);
+ }
+ #else
+ ret = XTRANSFORM(sha256, (const byte*)local);
+ #endif
+ if (ret != 0)
+ return ret;
- sha256->buffLen = 0;
- }
- XMEMSET(&local[sha256->buffLen], 0, SHA256_PAD_SIZE - sha256->buffLen);
+ sha256->buffLen = 0;
+ }
+ XMEMSET(&local[sha256->buffLen], 0,
+ WC_SHA256_PAD_SIZE - sha256->buffLen);
- /* put lengths in bits */
- sha256->hiLen = (sha256->loLen >> (8*sizeof(sha256->loLen) - 3)) +
- (sha256->hiLen << 3);
- sha256->loLen = sha256->loLen << 3;
+ /* put lengths in bits */
+ sha256->hiLen = (sha256->loLen >> (8 * sizeof(sha256->loLen) - 3)) +
+ (sha256->hiLen << 3);
+ sha256->loLen = sha256->loLen << 3;
- /* store lengths */
- #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU)
+ /* store lengths */
+ #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
- if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
+ if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
#endif
- ByteReverseWords(sha256->buffer, sha256->buffer, SHA256_BLOCK_SIZE);
+ {
+ ByteReverseWords(sha256->buffer, sha256->buffer,
+ WC_SHA256_BLOCK_SIZE);
+ }
#endif
- /* ! length ordering dependent on digest endian type ! */
- XMEMCPY(&local[SHA256_PAD_SIZE], &sha256->hiLen, sizeof(word32));
- XMEMCPY(&local[SHA256_PAD_SIZE + sizeof(word32)], &sha256->loLen,
- sizeof(word32));
+ /* ! length ordering dependent on digest endian type ! */
+ XMEMCPY(&local[WC_SHA256_PAD_SIZE], &sha256->hiLen, sizeof(word32));
+ XMEMCPY(&local[WC_SHA256_PAD_SIZE + sizeof(word32)], &sha256->loLen,
+ sizeof(word32));
- #if defined(FREESCALE_MMCAU) || defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+ #if defined(FREESCALE_MMCAU_SHA) || defined(HAVE_INTEL_AVX1) || \
+ defined(HAVE_INTEL_AVX2)
/* Kinetis requires only these bytes reversed */
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
- if(IS_INTEL_AVX1 || IS_INTEL_AVX2)
+ if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags))
#endif
- ByteReverseWords(&sha256->buffer[SHA256_PAD_SIZE/sizeof(word32)],
- &sha256->buffer[SHA256_PAD_SIZE/sizeof(word32)],
- 2 * sizeof(word32));
+ {
+ ByteReverseWords(
+ &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)],
+ &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)],
+ 2 * sizeof(word32));
+ }
+ #endif
+
+ #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ if (sha256->ctx.mode == ESP32_SHA_INIT) {
+ esp_sha_try_hw_lock(&sha256->ctx);
+ }
+ if (sha256->ctx.mode == ESP32_SHA_SW) {
+ ret = XTRANSFORM(sha256, (const byte*)local);
+ } else {
+ ret = esp_sha256_digest_process(sha256, 1);
+ }
+ #else
+ ret = XTRANSFORM(sha256, (const byte*)local);
#endif
- ret = XTRANSFORM(sha256, local);
- if (ret != 0)
return ret;
+ }
- #if defined(LITTLE_ENDIAN_ORDER)
- ByteReverseWords(sha256->digest, sha256->digest, SHA256_DIGEST_SIZE);
+ int wc_Sha256FinalRaw(wc_Sha256* sha256, byte* hash)
+ {
+ #ifdef LITTLE_ENDIAN_ORDER
+ word32 digest[WC_SHA256_DIGEST_SIZE / sizeof(word32)];
#endif
- XMEMCPY(hash, sha256->digest, SHA256_DIGEST_SIZE);
- return wc_InitSha256(sha256); /* reset state */
-}
+ if (sha256 == NULL || hash == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ #ifdef LITTLE_ENDIAN_ORDER
+ ByteReverseWords((word32*)digest, (word32*)sha256->digest,
+ WC_SHA256_DIGEST_SIZE);
+ XMEMCPY(hash, digest, WC_SHA256_DIGEST_SIZE);
+ #else
+ XMEMCPY(hash, sha256->digest, WC_SHA256_DIGEST_SIZE);
+ #endif
+ return 0;
+ }
-int wc_Sha256Hash(const byte* data, word32 len, byte* hash)
-{
- int ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
- Sha256* sha256;
-#else
- Sha256 sha256[1];
-#endif
+ int wc_Sha256Final(wc_Sha256* sha256, byte* hash)
+ {
+ int ret;
-#ifdef WOLFSSL_SMALL_STACK
- sha256 = (Sha256*)XMALLOC(sizeof(Sha256), NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (sha256 == NULL)
- return MEMORY_E;
-#endif
+ if (sha256 == NULL || hash == NULL) {
+ return BAD_FUNC_ARG;
+ }
- if ((ret = wc_InitSha256(sha256)) != 0) {
- WOLFSSL_MSG("InitSha256 failed");
+ #ifdef WOLF_CRYPTO_CB
+ if (sha256->devId != INVALID_DEVID) {
+ ret = wc_CryptoCb_Sha256Hash(sha256, NULL, 0, hash);
+ if (ret != CRYPTOCB_UNAVAILABLE)
+ return ret;
+ /* fall-through when unavailable */
+ }
+ #endif
+
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
+ if (sha256->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA256) {
+ #if defined(HAVE_INTEL_QA)
+ return IntelQaSymSha256(&sha256->asyncDev, hash, NULL,
+ WC_SHA256_DIGEST_SIZE);
+ #endif
+ }
+ #endif /* WOLFSSL_ASYNC_CRYPT */
+
+ ret = Sha256Final(sha256);
+ if (ret != 0)
+ return ret;
+
+ #if defined(LITTLE_ENDIAN_ORDER)
+ ByteReverseWords(sha256->digest, sha256->digest, WC_SHA256_DIGEST_SIZE);
+ #endif
+ XMEMCPY(hash, sha256->digest, WC_SHA256_DIGEST_SIZE);
+
+ return InitSha256(sha256); /* reset state */
}
- else if ((ret = wc_Sha256Update(sha256, data, len)) != 0) {
- WOLFSSL_MSG("Sha256Update failed");
+
+#endif /* XTRANSFORM */
+
+#ifdef WOLFSSL_SHA224
+
+#ifdef STM32_HASH_SHA2
+
+ /* Supports CubeMX HAL or Standard Peripheral Library */
+
+ int wc_InitSha224_ex(wc_Sha224* sha224, void* heap, int devId)
+ {
+ if (sha224 == NULL)
+ return BAD_FUNC_ARG;
+
+ (void)devId;
+ (void)heap;
+
+ wc_Stm32_Hash_Init(&sha224->stmCtx);
+ return 0;
}
- else if ((ret = wc_Sha256Final(sha256, hash)) != 0) {
- WOLFSSL_MSG("Sha256Final failed");
+
+ int wc_Sha224Update(wc_Sha224* sha224, const byte* data, word32 len)
+ {
+ int ret = 0;
+
+ if (sha224 == NULL || (data == NULL && len > 0)) {
+ return BAD_FUNC_ARG;
+ }
+
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret == 0) {
+ ret = wc_Stm32_Hash_Update(&sha224->stmCtx,
+ HASH_AlgoSelection_SHA224, data, len);
+ wolfSSL_CryptHwMutexUnLock();
+ }
+ return ret;
}
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(sha256, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+ int wc_Sha224Final(wc_Sha224* sha224, byte* hash)
+ {
+ int ret = 0;
- return ret;
-}
+ if (sha224 == NULL || hash == NULL) {
+ return BAD_FUNC_ARG;
+ }
-#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+ ret = wolfSSL_CryptHwMutexLock();
+ if (ret == 0) {
+ ret = wc_Stm32_Hash_Final(&sha224->stmCtx,
+ HASH_AlgoSelection_SHA224, hash, WC_SHA224_DIGEST_SIZE);
+ wolfSSL_CryptHwMutexUnLock();
+ }
-#define _DigestToReg(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
- { word32 d ;\
- d = sha256->digest[0]; __asm__ volatile("movl %0, %"#S_0::"r"(d):SSE_REGs) ;\
- d = sha256->digest[1]; __asm__ volatile("movl %0, %"#S_1::"r"(d):SSE_REGs) ;\
- d = sha256->digest[2]; __asm__ volatile("movl %0, %"#S_2::"r"(d):SSE_REGs) ;\
- d = sha256->digest[3]; __asm__ volatile("movl %0, %"#S_3::"r"(d):SSE_REGs) ;\
- d = sha256->digest[4]; __asm__ volatile("movl %0, %"#S_4::"r"(d):SSE_REGs) ;\
- d = sha256->digest[5]; __asm__ volatile("movl %0, %"#S_5::"r"(d):SSE_REGs) ;\
- d = sha256->digest[6]; __asm__ volatile("movl %0, %"#S_6::"r"(d):SSE_REGs) ;\
- d = sha256->digest[7]; __asm__ volatile("movl %0, %"#S_7::"r"(d):SSE_REGs) ;\
-}
+ (void)wc_InitSha224(sha224); /* reset state */
-#define _RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
- { word32 d ; \
- __asm__ volatile("movl %"#S_0", %0":"=r"(d)::SSE_REGs) ; sha256->digest[0] += d;\
- __asm__ volatile("movl %"#S_1", %0":"=r"(d)::SSE_REGs) ; sha256->digest[1] += d;\
- __asm__ volatile("movl %"#S_2", %0":"=r"(d)::SSE_REGs) ; sha256->digest[2] += d;\
- __asm__ volatile("movl %"#S_3", %0":"=r"(d)::SSE_REGs) ; sha256->digest[3] += d;\
- __asm__ volatile("movl %"#S_4", %0":"=r"(d)::SSE_REGs) ; sha256->digest[4] += d;\
- __asm__ volatile("movl %"#S_5", %0":"=r"(d)::SSE_REGs) ; sha256->digest[5] += d;\
- __asm__ volatile("movl %"#S_6", %0":"=r"(d)::SSE_REGs) ; sha256->digest[6] += d;\
- __asm__ volatile("movl %"#S_7", %0":"=r"(d)::SSE_REGs) ; sha256->digest[7] += d;\
-}
+ return ret;
+ }
+#elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH)
+ /* functions defined in wolfcrypt/src/port/caam/caam_sha256.c */
-#define DigestToReg(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
- _DigestToReg(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )
+#elif defined(WOLFSSL_AFALG_HASH)
+ #error SHA224 currently not supported with AF_ALG enabled
-#define RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
- _RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )
+#elif defined(WOLFSSL_DEVCRYPTO_HASH)
+ /* implemented in wolfcrypt/src/port/devcrypto/devcrypt_hash.c */
+#else
+ #define NEED_SOFT_SHA224
-#define S_0 %r15d
-#define S_1 %r10d
-#define S_2 %r11d
-#define S_3 %r12d
-#define S_4 %r13d
-#define S_5 %r14d
-#define S_6 %ebx
-#define S_7 %r9d
+ static int InitSha224(wc_Sha224* sha224)
+ {
+ int ret = 0;
-#define SSE_REGs "%edi", "%ecx", "%esi", "%edx", "%ebx","%r8","%r9","%r10","%r11","%r12","%r13","%r14","%r15"
+ if (sha224 == NULL) {
+ return BAD_FUNC_ARG;
+ }
-#if defined(HAVE_INTEL_RORX)
-#define RND_STEP_RORX_1(a,b,c,d,e,f,g,h,i)\
-__asm__ volatile("rorx $6, %"#e", %%edx\n\t":::"%edx",SSE_REGs); /* edx = e>>6 */\
+ sha224->digest[0] = 0xc1059ed8;
+ sha224->digest[1] = 0x367cd507;
+ sha224->digest[2] = 0x3070dd17;
+ sha224->digest[3] = 0xf70e5939;
+ sha224->digest[4] = 0xffc00b31;
+ sha224->digest[5] = 0x68581511;
+ sha224->digest[6] = 0x64f98fa7;
+ sha224->digest[7] = 0xbefa4fa4;
+
+ sha224->buffLen = 0;
+ sha224->loLen = 0;
+ sha224->hiLen = 0;
+
+ #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
+ /* choose best Transform function under this runtime environment */
+ Sha256_SetTransform();
+ #endif
+ #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+ sha224->flags = 0;
+ #endif
-#define RND_STEP_RORX_2(a,b,c,d,e,f,g,h,i)\
-__asm__ volatile("rorx $11, %"#e",%%edi\n\t":::"%edi",SSE_REGs); /* edi = e>>11 */\
-__asm__ volatile("xorl %%edx, %%edi\n\t":::"%edx","%edi",SSE_REGs); /* edi = (e>>11) ^ (e>>6) */\
-__asm__ volatile("rorx $25, %"#e", %%edx\n\t":::"%edx",SSE_REGs); /* edx = e>>25 */\
+ return ret;
+ }
-#define RND_STEP_RORX_3(a,b,c,d,e,f,g,h,i)\
-__asm__ volatile("movl %"#f", %%esi\n\t":::"%esi",SSE_REGs); /* esi = f */\
-__asm__ volatile("xorl %"#g", %%esi\n\t":::"%esi",SSE_REGs); /* esi = f ^ g */\
-__asm__ volatile("xorl %%edi, %%edx\n\t":::"%edi","%edx",SSE_REGs); /* edx = Sigma1(e) */\
-__asm__ volatile("andl %"#e", %%esi\n\t":::"%esi",SSE_REGs); /* esi = (f ^ g) & e */\
-__asm__ volatile("xorl %"#g", %%esi\n\t":::"%esi",SSE_REGs); /* esi = Ch(e,f,g) */\
+#endif
-#define RND_STEP_RORX_4(a,b,c,d,e,f,g,h,i)\
-/*__asm__ volatile("movl %0, %%edx\n\t"::"m"(w_k):"%edx");*/\
-__asm__ volatile("addl %0, %"#h"\n\t"::"r"(W_K[i]):SSE_REGs); /* h += w_k */\
-__asm__ volatile("addl %%edx, %"#h"\n\t":::"%edx",SSE_REGs); /* h = h + w_k + Sigma1(e) */\
-__asm__ volatile("rorx $2, %"#a", %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = a>>2 */\
-__asm__ volatile("rorx $13, %"#a", %%edi\n\t":::"%edi",SSE_REGs);/* edi = a>>13 */\
+#ifdef NEED_SOFT_SHA224
+ int wc_InitSha224_ex(wc_Sha224* sha224, void* heap, int devId)
+ {
+ int ret = 0;
-#define RND_STEP_RORX_5(a,b,c,d,e,f,g,h,i)\
-__asm__ volatile("rorx $22, %"#a", %%edx\n\t":::"%edx",SSE_REGs); /* edx = a>>22 */\
-__asm__ volatile("xorl %%r8d, %%edi\n\t":::"%edi","%r8",SSE_REGs);/* edi = (a>>2) ^ (a>>13) */\
-__asm__ volatile("xorl %%edi, %%edx\n\t":::"%edi","%edx",SSE_REGs); /* edx = Sigma0(a) */\
-
-#define RND_STEP_RORX_6(a,b,c,d,e,f,g,h,i)\
-__asm__ volatile("movl %"#b", %%edi\n\t":::"%edi",SSE_REGs); /* edi = b */\
-__asm__ volatile("orl %"#a", %%edi\n\t":::"%edi",SSE_REGs); /* edi = a | b */\
-__asm__ volatile("andl %"#c", %%edi\n\t":::"%edi",SSE_REGs); /* edi = (a | b) & c*/\
-__asm__ volatile("movl %"#b", %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = b */\
+ if (sha224 == NULL)
+ return BAD_FUNC_ARG;
-#define RND_STEP_RORX_7(a,b,c,d,e,f,g,h,i)\
-__asm__ volatile("addl %%esi, %"#h"\n\t":::"%esi",SSE_REGs); /* h += Ch(e,f,g) */\
-__asm__ volatile("andl %"#a", %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = b & a */\
-__asm__ volatile("orl %%edi, %%r8d\n\t":::"%edi","%r8",SSE_REGs); /* r8d = Maj(a,b,c) */\
+ sha224->heap = heap;
-#define RND_STEP_RORX_8(a,b,c,d,e,f,g,h,i)\
-__asm__ volatile("addl "#h", "#d"\n\t"); /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */\
-__asm__ volatile("addl %"#h", %%r8d\n\t":::"%r8",SSE_REGs); \
-__asm__ volatile("addl %%edx, %%r8d\n\t":::"%edx","%r8",SSE_REGs); \
-__asm__ volatile("movl %r8d, "#h"\n\t");
+ ret = InitSha224(sha224);
+ if (ret != 0)
+ return ret;
-#endif
+ #ifdef WOLFSSL_SMALL_STACK_CACHE
+ sha224->W = NULL;
+ #endif
-#define RND_STEP_1(a,b,c,d,e,f,g,h,i)\
-__asm__ volatile("movl %"#e", %%edx\n\t":::"%edx",SSE_REGs);\
-__asm__ volatile("roll $26, %%edx\n\t":::"%edx",SSE_REGs); /* edx = e>>6 */\
-__asm__ volatile("movl %"#e", %%edi\n\t":::"%edi",SSE_REGs);\
-
-#define RND_STEP_2(a,b,c,d,e,f,g,h,i)\
-__asm__ volatile("roll $21, %%edi\n\t":::"%edi",SSE_REGs); /* edi = e>>11 */\
-__asm__ volatile("xorl %%edx, %%edi\n\t":::"%edx","%edi",SSE_REGs); /* edi = (e>>11) ^ (e>>6) */\
-__asm__ volatile("movl %"#e", %%edx\n\t":::"%edx",SSE_REGs); /* edx = e */\
-__asm__ volatile("roll $7, %%edx\n\t":::"%edx",SSE_REGs); /* edx = e>>25 */\
-
-#define RND_STEP_3(a,b,c,d,e,f,g,h,i)\
-__asm__ volatile("movl %"#f", %%esi\n\t":::"%esi",SSE_REGs); /* esi = f */\
-__asm__ volatile("xorl %"#g", %%esi\n\t":::"%esi",SSE_REGs); /* esi = f ^ g */\
-__asm__ volatile("xorl %%edi, %%edx\n\t":::"%edi","%edx",SSE_REGs); /* edx = Sigma1(e) */\
-__asm__ volatile("andl %"#e", %%esi\n\t":::"%esi",SSE_REGs); /* esi = (f ^ g) & e */\
-__asm__ volatile("xorl %"#g", %%esi\n\t":::"%esi",SSE_REGs); /* esi = Ch(e,f,g) */\
-
-#define RND_STEP_4(a,b,c,d,e,f,g,h,i)\
-__asm__ volatile("addl %0, %"#h"\n\t"::"r"(W_K[i]):SSE_REGs); /* h += w_k */\
-__asm__ volatile("addl %%edx, %"#h"\n\t":::"%edx",SSE_REGs); /* h = h + w_k + Sigma1(e) */\
-__asm__ volatile("movl %"#a", %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = a */\
-__asm__ volatile("roll $30, %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = a>>2 */\
-__asm__ volatile("movl %"#a", %%edi\n\t":::"%edi",SSE_REGs); /* edi = a */\
-__asm__ volatile("roll $19, %%edi\n\t":::"%edi",SSE_REGs); /* edi = a>>13 */\
-__asm__ volatile("movl %"#a", %%edx\n\t":::"%edx",SSE_REGs); /* edx = a */\
-
-#define RND_STEP_5(a,b,c,d,e,f,g,h,i)\
-__asm__ volatile("roll $10, %%edx\n\t":::"%edx",SSE_REGs); /* edx = a>>22 */\
-__asm__ volatile("xorl %%r8d, %%edi\n\t":::"%edi","%r8",SSE_REGs); /* edi = (a>>2) ^ (a>>13) */\
-__asm__ volatile("xorl %%edi, %%edx\n\t":::"%edi","%edx",SSE_REGs);/* edx = Sigma0(a) */\
-
-#define RND_STEP_6(a,b,c,d,e,f,g,h,i)\
-__asm__ volatile("movl %"#b", %%edi\n\t":::"%edi",SSE_REGs); /* edi = b */\
-__asm__ volatile("orl %"#a", %%edi\n\t":::"%edi",SSE_REGs); /* edi = a | b */\
-__asm__ volatile("andl %"#c", %%edi\n\t":::"%edi",SSE_REGs); /* edi = (a | b) & c */\
-__asm__ volatile("movl %"#b", %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = b */\
-
-#define RND_STEP_7(a,b,c,d,e,f,g,h,i)\
-__asm__ volatile("addl %%esi, %"#h"\n\t":::"%esi",SSE_REGs); /* h += Ch(e,f,g) */\
-__asm__ volatile("andl %"#a", %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = b & a */\
-__asm__ volatile("orl %%edi, %%r8d\n\t":::"%edi","%r8",SSE_REGs); /* r8d = Maj(a,b,c) */\
-
-#define RND_STEP_8(a,b,c,d,e,f,g,h,i)\
-__asm__ volatile("addl "#h", "#d"\n\t"); /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */\
-__asm__ volatile("addl %"#h", %%r8d\n\t":::"%r8",SSE_REGs); \
- /* r8b = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */\
-__asm__ volatile("addl %%edx, %%r8d\n\t":::"%edx","%r8",SSE_REGs);\
- /* r8b = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */\
-__asm__ volatile("movl %%r8d, %"#h"\n\t":::"%r8", SSE_REGs); \
- /* h = h + w_k + Sigma1(e) + Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */ \
-
-#define RND_X(a,b,c,d,e,f,g,h,i) \
- RND_STEP_1(a,b,c,d,e,f,g,h,i); \
- RND_STEP_2(a,b,c,d,e,f,g,h,i); \
- RND_STEP_3(a,b,c,d,e,f,g,h,i); \
- RND_STEP_4(a,b,c,d,e,f,g,h,i); \
- RND_STEP_5(a,b,c,d,e,f,g,h,i); \
- RND_STEP_6(a,b,c,d,e,f,g,h,i); \
- RND_STEP_7(a,b,c,d,e,f,g,h,i); \
- RND_STEP_8(a,b,c,d,e,f,g,h,i);
-
-#define RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i);
-#define RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_7,S_0,S_1,S_2,S_3,S_4,S_5,S_6,_i);
-#define RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_6,S_7,S_0,S_1,S_2,S_3,S_4,S_5,_i);
-#define RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_5,S_6,S_7,S_0,S_1,S_2,S_3,S_4,_i);
-#define RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,_i);
-#define RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_3,S_4,S_5,S_6,S_7,S_0,S_1,S_2,_i);
-#define RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_2,S_3,S_4,S_5,S_6,S_7,S_0,S_1,_i);
-#define RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_1,S_2,S_3,S_4,S_5,S_6,S_7,S_0,_i);
-
-
-#define RND_1_3(a,b,c,d,e,f,g,h,i) {\
- RND_STEP_1(a,b,c,d,e,f,g,h,i); \
- RND_STEP_2(a,b,c,d,e,f,g,h,i); \
- RND_STEP_3(a,b,c,d,e,f,g,h,i); \
-}
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
+ ret = wolfAsync_DevCtxInit(&sha224->asyncDev,
+ WOLFSSL_ASYNC_MARKER_SHA224, sha224->heap, devId);
+ #else
+ (void)devId;
+ #endif /* WOLFSSL_ASYNC_CRYPT */
-#define RND_4_6(a,b,c,d,e,f,g,h,i) {\
- RND_STEP_4(a,b,c,d,e,f,g,h,i); \
- RND_STEP_5(a,b,c,d,e,f,g,h,i); \
- RND_STEP_6(a,b,c,d,e,f,g,h,i); \
-}
+ return ret;
+ }
-#define RND_7_8(a,b,c,d,e,f,g,h,i) {\
- RND_STEP_7(a,b,c,d,e,f,g,h,i); \
- RND_STEP_8(a,b,c,d,e,f,g,h,i); \
-}
+ int wc_Sha224Update(wc_Sha224* sha224, const byte* data, word32 len)
+ {
+ int ret;
-#define RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i);
-#define RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_7,S_0,S_1,S_2,S_3,S_4,S_5,S_6,_i);
-#define RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_6,S_7,S_0,S_1,S_2,S_3,S_4,S_5,_i);
-#define RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_5,S_6,S_7,S_0,S_1,S_2,S_3,S_4,_i);
-#define RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,_i);
-#define RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_3,S_4,S_5,S_6,S_7,S_0,S_1,S_2,_i);
-#define RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_2,S_3,S_4,S_5,S_6,S_7,S_0,S_1,_i);
-#define RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_1,S_2,S_3,S_4,S_5,S_6,S_7,S_0,_i);
-
-
-#define RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i);
-#define RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_7,S_0,S_1,S_2,S_3,S_4,S_5,S_6,_i);
-#define RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_6,S_7,S_0,S_1,S_2,S_3,S_4,S_5,_i);
-#define RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_5,S_6,S_7,S_0,S_1,S_2,S_3,S_4,_i);
-#define RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,_i);
-#define RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_3,S_4,S_5,S_6,S_7,S_0,S_1,S_2,_i);
-#define RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_2,S_3,S_4,S_5,S_6,S_7,S_0,S_1,_i);
-#define RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_1,S_2,S_3,S_4,S_5,S_6,S_7,S_0,_i);
-
-#define RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i);
-#define RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_7,S_0,S_1,S_2,S_3,S_4,S_5,S_6,_i);
-#define RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_6,S_7,S_0,S_1,S_2,S_3,S_4,S_5,_i);
-#define RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_5,S_6,S_7,S_0,S_1,S_2,S_3,S_4,_i);
-#define RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,_i);
-#define RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_3,S_4,S_5,S_6,S_7,S_0,S_1,S_2,_i);
-#define RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_2,S_3,S_4,S_5,S_6,S_7,S_0,S_1,_i);
-#define RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_1,S_2,S_3,S_4,S_5,S_6,S_7,S_0,_i);
-
-#define RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i);
-#define RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_7,S_0,S_1,S_2,S_3,S_4,S_5,S_6,_i);
-#define RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_6,S_7,S_0,S_1,S_2,S_3,S_4,S_5,_i);
-#define RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_5,S_6,S_7,S_0,S_1,S_2,S_3,S_4,_i);
-#define RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,_i);
-#define RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_3,S_4,S_5,S_6,S_7,S_0,S_1,S_2,_i);
-#define RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_2,S_3,S_4,S_5,S_6,S_7,S_0,S_1,_i);
-#define RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_1,S_2,S_3,S_4,S_5,S_6,S_7,S_0,_i);
-
-#define FOR(cnt, init, max, inc, loop) \
- __asm__ volatile("movl $"#init", %0\n\t"#loop":"::"m"(cnt):)
-#define END(cnt, init, max, inc, loop) \
- __asm__ volatile("addl $"#inc", %0\n\tcmpl $"#max", %0\n\tjle "#loop"\n\t":"=m"(cnt)::) ;
-
-#endif /* defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) */
-
-#if defined(HAVE_INTEL_AVX1) /* inline Assember for Intel AVX1 instructions */
-
-#define VPALIGNR(op1,op2,op3,op4) __asm__ volatile("vpalignr $"#op4", %"#op3", %"#op2", %"#op1:::XMM_REGs)
-#define VPADDD(op1,op2,op3) __asm__ volatile("vpaddd %"#op3", %"#op2", %"#op1:::XMM_REGs)
-#define VPSRLD(op1,op2,op3) __asm__ volatile("vpsrld $"#op3", %"#op2", %"#op1:::XMM_REGs)
-#define VPSRLQ(op1,op2,op3) __asm__ volatile("vpsrlq $"#op3", %"#op2", %"#op1:::XMM_REGs)
-#define VPSLLD(op1,op2,op3) __asm__ volatile("vpslld $"#op3", %"#op2", %"#op1:::XMM_REGs)
-#define VPOR(op1,op2,op3) __asm__ volatile("vpor %"#op3", %"#op2", %"#op1:::XMM_REGs)
-#define VPXOR(op1,op2,op3) __asm__ volatile("vpxor %"#op3", %"#op2", %"#op1:::XMM_REGs)
-#define VPSHUFD(op1,op2,op3) __asm__ volatile("vpshufd $"#op3", %"#op2", %"#op1:::XMM_REGs)
-#define VPSHUFB(op1,op2,op3) __asm__ volatile("vpshufb %"#op3", %"#op2", %"#op1:::XMM_REGs)
-
-#define MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER, SHUF_00BA, SHUF_DC00,\
- a,b,c,d,e,f,g,h,_i)\
- RND_STEP_1(a,b,c,d,e,f,g,h,_i);\
- VPALIGNR (XTMP0, X3, X2, 4) ;\
- RND_STEP_2(a,b,c,d,e,f,g,h,_i);\
- VPADDD (XTMP0, XTMP0, X0) ;\
- RND_STEP_3(a,b,c,d,e,f,g,h,_i);\
- VPALIGNR (XTMP1, X1, X0, 4) ; /* XTMP1 = W[-15] */\
- RND_STEP_4(a,b,c,d,e,f,g,h,_i);\
- VPSRLD (XTMP2, XTMP1, 7) ;\
- RND_STEP_5(a,b,c,d,e,f,g,h,_i);\
- VPSLLD (XTMP3, XTMP1, 25) ; /* VPSLLD (XTMP3, XTMP1, (32-7)) */\
- RND_STEP_6(a,b,c,d,e,f,g,h,_i);\
- VPOR (XTMP3, XTMP3, XTMP2) ; /* XTMP1 = W[-15] MY_ROR 7 */\
- RND_STEP_7(a,b,c,d,e,f,g,h,_i);\
- VPSRLD (XTMP2, XTMP1,18) ;\
- RND_STEP_8(a,b,c,d,e,f,g,h,_i);\
-\
- RND_STEP_1(h,a,b,c,d,e,f,g,_i+1);\
- VPSRLD (XTMP4, XTMP1, 3) ; /* XTMP4 = W[-15] >> 3 */\
- RND_STEP_2(h,a,b,c,d,e,f,g,_i+1);\
- VPSLLD (XTMP1, XTMP1, 14) ; /* VPSLLD (XTMP1, XTMP1, (32-18)) */\
- RND_STEP_3(h,a,b,c,d,e,f,g,_i+1);\
- VPXOR (XTMP3, XTMP3, XTMP1) ;\
- RND_STEP_4(h,a,b,c,d,e,f,g,_i+1);\
- VPXOR (XTMP3, XTMP3, XTMP2) ; /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 */\
- RND_STEP_5(h,a,b,c,d,e,f,g,_i+1);\
- VPXOR (XTMP1, XTMP3, XTMP4) ; /* XTMP1 = s0 */\
- RND_STEP_6(h,a,b,c,d,e,f,g,_i+1);\
- VPSHUFD(XTMP2, X3, 0b11111010) ; /* XTMP2 = W[-2] {BBAA}*/\
- RND_STEP_7(h,a,b,c,d,e,f,g,_i+1);\
- VPADDD (XTMP0, XTMP0, XTMP1) ; /* XTMP0 = W[-16] + W[-7] + s0 */\
- RND_STEP_8(h,a,b,c,d,e,f,g,_i+1);\
-\
- RND_STEP_1(g,h,a,b,c,d,e,f,_i+2);\
- VPSRLD (XTMP4, XTMP2, 10) ; /* XTMP4 = W[-2] >> 10 {BBAA} */\
- RND_STEP_2(g,h,a,b,c,d,e,f,_i+2);\
- VPSRLQ (XTMP3, XTMP2, 19) ; /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */\
- RND_STEP_3(g,h,a,b,c,d,e,f,_i+2);\
- VPSRLQ (XTMP2, XTMP2, 17) ; /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */\
- RND_STEP_4(g,h,a,b,c,d,e,f,_i+2);\
- VPXOR (XTMP2, XTMP2, XTMP3) ;\
- RND_STEP_5(g,h,a,b,c,d,e,f,_i+2);\
- VPXOR (XTMP4, XTMP4, XTMP2) ; /* XTMP4 = s1 {xBxA} */\
- RND_STEP_6(g,h,a,b,c,d,e,f,_i+2);\
- VPSHUFB (XTMP4, XTMP4, SHUF_00BA) ; /* XTMP4 = s1 {00BA} */\
- RND_STEP_7(g,h,a,b,c,d,e,f,_i+2);\
- VPADDD (XTMP0, XTMP0, XTMP4) ; /* XTMP0 = {..., ..., W[1], W[0]} */\
- RND_STEP_8(g,h,a,b,c,d,e,f,_i+2);\
-\
- RND_STEP_1(f,g,h,a,b,c,d,e,_i+3);\
- VPSHUFD (XTMP2, XTMP0, 0b01010000) ; /* XTMP2 = W[-2] {DDCC} */\
- RND_STEP_2(f,g,h,a,b,c,d,e,_i+3);\
- VPSRLD (XTMP5, XTMP2, 10); /* XTMP5 = W[-2] >> 10 {DDCC} */\
- RND_STEP_3(f,g,h,a,b,c,d,e,_i+3);\
- VPSRLQ (XTMP3, XTMP2, 19); /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */\
- RND_STEP_4(f,g,h,a,b,c,d,e,_i+3);\
- VPSRLQ (XTMP2, XTMP2, 17) ; /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */\
- RND_STEP_5(f,g,h,a,b,c,d,e,_i+3);\
- VPXOR (XTMP2, XTMP2, XTMP3) ;\
- RND_STEP_6(f,g,h,a,b,c,d,e,_i+3);\
- VPXOR (XTMP5, XTMP5, XTMP2) ; /* XTMP5 = s1 {xDxC} */\
- RND_STEP_7(f,g,h,a,b,c,d,e,_i+3);\
- VPSHUFB (XTMP5, XTMP5, SHUF_DC00) ; /* XTMP5 = s1 {DC00} */\
- RND_STEP_8(f,g,h,a,b,c,d,e,_i+3);\
- VPADDD (X0, XTMP5, XTMP0) ; /* X0 = {W[3], W[2], W[1], W[0]} */\
-
-#if defined(HAVE_INTEL_RORX)
-
-#define MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, \
- XFER, SHUF_00BA, SHUF_DC00,a,b,c,d,e,f,g,h,_i)\
- RND_STEP_RORX_1(a,b,c,d,e,f,g,h,_i);\
- VPALIGNR (XTMP0, X3, X2, 4) ;\
- RND_STEP_RORX_2(a,b,c,d,e,f,g,h,_i);\
- VPADDD (XTMP0, XTMP0, X0) ;\
- RND_STEP_RORX_3(a,b,c,d,e,f,g,h,_i);\
- VPALIGNR (XTMP1, X1, X0, 4) ; /* XTMP1 = W[-15] */\
- RND_STEP_RORX_4(a,b,c,d,e,f,g,h,_i);\
- VPSRLD (XTMP2, XTMP1, 7) ;\
- RND_STEP_RORX_5(a,b,c,d,e,f,g,h,_i);\
- VPSLLD (XTMP3, XTMP1, 25) ; /* VPSLLD (XTMP3, XTMP1, (32-7)) */\
- RND_STEP_RORX_6(a,b,c,d,e,f,g,h,_i);\
- VPOR (XTMP3, XTMP3, XTMP2) ; /* XTMP1 = W[-15] MY_ROR 7 */\
- RND_STEP_RORX_7(a,b,c,d,e,f,g,h,_i);\
- VPSRLD (XTMP2, XTMP1,18) ;\
- RND_STEP_RORX_8(a,b,c,d,e,f,g,h,_i);\
-\
- RND_STEP_RORX_1(h,a,b,c,d,e,f,g,_i+1);\
- VPSRLD (XTMP4, XTMP1, 3) ; /* XTMP4 = W[-15] >> 3 */\
- RND_STEP_RORX_2(h,a,b,c,d,e,f,g,_i+1);\
- VPSLLD (XTMP1, XTMP1, 14) ; /* VPSLLD (XTMP1, XTMP1, (32-18)) */\
- RND_STEP_RORX_3(h,a,b,c,d,e,f,g,_i+1);\
- VPXOR (XTMP3, XTMP3, XTMP1) ;\
- RND_STEP_RORX_4(h,a,b,c,d,e,f,g,_i+1);\
- VPXOR (XTMP3, XTMP3, XTMP2) ; /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 */\
- RND_STEP_RORX_5(h,a,b,c,d,e,f,g,_i+1);\
- VPXOR (XTMP1, XTMP3, XTMP4) ; /* XTMP1 = s0 */\
- RND_STEP_RORX_6(h,a,b,c,d,e,f,g,_i+1);\
- VPSHUFD(XTMP2, X3, 0b11111010) ; /* XTMP2 = W[-2] {BBAA}*/\
- RND_STEP_RORX_7(h,a,b,c,d,e,f,g,_i+1);\
- VPADDD (XTMP0, XTMP0, XTMP1) ; /* XTMP0 = W[-16] + W[-7] + s0 */\
- RND_STEP_RORX_8(h,a,b,c,d,e,f,g,_i+1);\
-\
- RND_STEP_RORX_1(g,h,a,b,c,d,e,f,_i+2);\
- VPSRLD (XTMP4, XTMP2, 10) ; /* XTMP4 = W[-2] >> 10 {BBAA} */\
- RND_STEP_RORX_2(g,h,a,b,c,d,e,f,_i+2);\
- VPSRLQ (XTMP3, XTMP2, 19) ; /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */\
- RND_STEP_RORX_3(g,h,a,b,c,d,e,f,_i+2);\
- VPSRLQ (XTMP2, XTMP2, 17) ; /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */\
- RND_STEP_RORX_4(g,h,a,b,c,d,e,f,_i+2);\
- VPXOR (XTMP2, XTMP2, XTMP3) ;\
- RND_STEP_RORX_5(g,h,a,b,c,d,e,f,_i+2);\
- VPXOR (XTMP4, XTMP4, XTMP2) ; /* XTMP4 = s1 {xBxA} */\
- RND_STEP_RORX_6(g,h,a,b,c,d,e,f,_i+2);\
- VPSHUFB (XTMP4, XTMP4, SHUF_00BA) ; /* XTMP4 = s1 {00BA} */\
- RND_STEP_RORX_7(g,h,a,b,c,d,e,f,_i+2);\
- VPADDD (XTMP0, XTMP0, XTMP4) ; /* XTMP0 = {..., ..., W[1], W[0]} */\
- RND_STEP_RORX_8(g,h,a,b,c,d,e,f,_i+2);\
-\
- RND_STEP_RORX_1(f,g,h,a,b,c,d,e,_i+3);\
- VPSHUFD (XTMP2, XTMP0, 0b01010000) ; /* XTMP2 = W[-2] {DDCC} */\
- RND_STEP_RORX_2(f,g,h,a,b,c,d,e,_i+3);\
- VPSRLD (XTMP5, XTMP2, 10); /* XTMP5 = W[-2] >> 10 {DDCC} */\
- RND_STEP_RORX_3(f,g,h,a,b,c,d,e,_i+3);\
- VPSRLQ (XTMP3, XTMP2, 19); /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */\
- RND_STEP_RORX_4(f,g,h,a,b,c,d,e,_i+3);\
- VPSRLQ (XTMP2, XTMP2, 17) ; /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */\
- RND_STEP_RORX_5(f,g,h,a,b,c,d,e,_i+3);\
- VPXOR (XTMP2, XTMP2, XTMP3) ;\
- RND_STEP_RORX_6(f,g,h,a,b,c,d,e,_i+3);\
- VPXOR (XTMP5, XTMP5, XTMP2) ; /* XTMP5 = s1 {xDxC} */\
- RND_STEP_RORX_7(f,g,h,a,b,c,d,e,_i+3);\
- VPSHUFB (XTMP5, XTMP5, SHUF_DC00) ; /* XTMP5 = s1 {DC00} */\
- RND_STEP_RORX_8(f,g,h,a,b,c,d,e,_i+3);\
- VPADDD (X0, XTMP5, XTMP0) ; /* X0 = {W[3], W[2], W[1], W[0]} */\
+ if (sha224 == NULL || (data == NULL && len > 0)) {
+ return BAD_FUNC_ARG;
+ }
-#endif
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
+ if (sha224->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA224) {
+ #if defined(HAVE_INTEL_QA)
+ return IntelQaSymSha224(&sha224->asyncDev, NULL, data, len);
+ #endif
+ }
+ #endif /* WOLFSSL_ASYNC_CRYPT */
+ ret = Sha256Update((wc_Sha256*)sha224, data, len);
-#define W_K_from_buff\
- __asm__ volatile("vmovdqu %0, %%xmm4\n\t"\
- "vpshufb %%xmm13, %%xmm4, %%xmm4\n\t"\
- :: "m"(sha256->buffer[0]):"%xmm4") ;\
- __asm__ volatile("vmovdqu %0, %%xmm5\n\t"\
- "vpshufb %%xmm13, %%xmm5, %%xmm5\n\t"\
- ::"m"(sha256->buffer[4]):"%xmm5") ;\
- __asm__ volatile("vmovdqu %0, %%xmm6\n\t"\
- "vpshufb %%xmm13, %%xmm6, %%xmm6\n\t"\
- ::"m"(sha256->buffer[8]):"%xmm6") ;\
- __asm__ volatile("vmovdqu %0, %%xmm7\n\t"\
- "vpshufb %%xmm13, %%xmm7, %%xmm7\n\t"\
- ::"m"(sha256->buffer[12]):"%xmm7") ;\
-
-#define _SET_W_K_XFER(reg, i)\
- __asm__ volatile("vpaddd %0, %"#reg", %%xmm9"::"m"(K[i]):XMM_REGs) ;\
- __asm__ volatile("vmovdqa %%xmm9, %0":"=m"(W_K[i])::XMM_REGs) ;
-
-#define SET_W_K_XFER(reg, i) _SET_W_K_XFER(reg, i)
-
-static const ALIGN32 word64 mSHUF_00BA[] = { 0x0b0a090803020100, 0xFFFFFFFFFFFFFFFF } ; /* shuffle xBxA -> 00BA */
-static const ALIGN32 word64 mSHUF_DC00[] = { 0xFFFFFFFFFFFFFFFF, 0x0b0a090803020100 } ; /* shuffle xDxC -> DC00 */
-static const ALIGN32 word64 mBYTE_FLIP_MASK[] = { 0x0405060700010203, 0x0c0d0e0f08090a0b } ;
-
-
-#define _Init_Masks(mask1, mask2, mask3)\
-__asm__ volatile("vmovdqu %0, %"#mask1 ::"m"(mBYTE_FLIP_MASK[0])) ;\
-__asm__ volatile("vmovdqu %0, %"#mask2 ::"m"(mSHUF_00BA[0])) ;\
-__asm__ volatile("vmovdqu %0, %"#mask3 ::"m"(mSHUF_DC00[0])) ;
-
-#define Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)\
- _Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
-
-#define X0 %xmm4
-#define X1 %xmm5
-#define X2 %xmm6
-#define X3 %xmm7
-#define X_ X0
-
-#define XTMP0 %xmm0
-#define XTMP1 %xmm1
-#define XTMP2 %xmm2
-#define XTMP3 %xmm3
-#define XTMP4 %xmm8
-#define XTMP5 %xmm9
-#define XFER %xmm10
-
-#define SHUF_00BA %xmm11 /* shuffle xBxA -> 00BA */
-#define SHUF_DC00 %xmm12 /* shuffle xDxC -> DC00 */
-#define BYTE_FLIP_MASK %xmm13
-
-#define XMM_REGs /* Registers are saved in Sha256Update/Finel */
- /*"xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13" */
-
-static int Transform_AVX1(Sha256* sha256)
-{
+ return ret;
+ }
- word32 W_K[64] ; /* temp for W+K */
+ int wc_Sha224Final(wc_Sha224* sha224, byte* hash)
+ {
+ int ret;
- #if defined(DEBUG_XMM)
- int i, j ;
- word32 xmm[29][4*15] ;
+ if (sha224 == NULL || hash == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
+ if (sha224->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA224) {
+ #if defined(HAVE_INTEL_QA)
+ return IntelQaSymSha224(&sha224->asyncDev, hash, NULL,
+ WC_SHA224_DIGEST_SIZE);
+ #endif
+ }
+ #endif /* WOLFSSL_ASYNC_CRYPT */
+
+ ret = Sha256Final((wc_Sha256*)sha224);
+ if (ret != 0)
+ return ret;
+
+ #if defined(LITTLE_ENDIAN_ORDER)
+ ByteReverseWords(sha224->digest, sha224->digest, WC_SHA224_DIGEST_SIZE);
#endif
+ XMEMCPY(hash, sha224->digest, WC_SHA224_DIGEST_SIZE);
- Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) ;
- W_K_from_buff ; /* X0, X1, X2, X3 = W[0..15] ; */
-
- DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
-
- SET_W_K_XFER(X0, 0) ;
- MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
- SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0) ;
- SET_W_K_XFER(X1, 4) ;
- MessageSched(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
- SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,4) ;
- SET_W_K_XFER(X2, 8) ;
- MessageSched(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
- SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8) ;
- SET_W_K_XFER(X3, 12) ;
- MessageSched(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
- SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,12) ;
- SET_W_K_XFER(X0, 16) ;
- MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
- SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16) ;
- SET_W_K_XFER(X1, 20) ;
- MessageSched(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
- SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,20) ;
- SET_W_K_XFER(X2, 24) ;
- MessageSched(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
- SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24) ;
- SET_W_K_XFER(X3, 28) ;
- MessageSched(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
- SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,28) ;
- SET_W_K_XFER(X0, 32) ;
- MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
- SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32) ;
- SET_W_K_XFER(X1, 36) ;
- MessageSched(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
- SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,36) ;
- SET_W_K_XFER(X2, 40) ;
- MessageSched(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
- SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40) ;
- SET_W_K_XFER(X3, 44) ;
- MessageSched(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
- SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,44) ;
-
- SET_W_K_XFER(X0, 48) ;
- SET_W_K_XFER(X1, 52) ;
- SET_W_K_XFER(X2, 56) ;
- SET_W_K_XFER(X3, 60) ;
-
- RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48) ;
- RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49) ;
- RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50) ;
- RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51) ;
-
- RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52) ;
- RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53) ;
- RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54) ;
- RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55) ;
-
- RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56) ;
- RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57) ;
- RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58) ;
- RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59) ;
-
- RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60) ;
- RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61) ;
- RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62) ;
- RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63) ;
-
- RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
-
- #if defined(DEBUG_XMM)
- for(i=0; i<29; i++) {
- for(j=0; j<4*14; j+=4)
- printf("xmm%d[%d]=%08x,%08x,%08x,%08x\n", j/4, i,
- xmm[i][j],xmm[i][j+1],xmm[i][j+2],xmm[i][j+3]) ;
- printf("\n") ;
+ return InitSha224(sha224); /* reset state */
}
-
- for(i=0; i<64; i++)printf("W_K[%d]%08x\n", i, W_K[i]) ;
+#endif /* end of SHA224 software implementation */
+
+ int wc_InitSha224(wc_Sha224* sha224)
+ {
+ return wc_InitSha224_ex(sha224, NULL, INVALID_DEVID);
+ }
+
+ void wc_Sha224Free(wc_Sha224* sha224)
+ {
+ if (sha224 == NULL)
+ return;
+
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ if (sha224->W != NULL) {
+ XFREE(sha224->W, NULL, DYNAMIC_TYPE_DIGEST);
+ sha224->W = NULL;
+ }
+#endif
+
+ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
+ wolfAsync_DevCtxFree(&sha224->asyncDev, WOLFSSL_ASYNC_MARKER_SHA224);
+ #endif /* WOLFSSL_ASYNC_CRYPT */
+
+ #ifdef WOLFSSL_PIC32MZ_HASH
+ wc_Sha256Pic32Free(sha224);
#endif
+ }
+#endif /* WOLFSSL_SHA224 */
- return 0;
+
+int wc_InitSha256(wc_Sha256* sha256)
+{
+ return wc_InitSha256_ex(sha256, NULL, INVALID_DEVID);
}
-#if defined(HAVE_INTEL_RORX)
-static int Transform_AVX1_RORX(Sha256* sha256)
+void wc_Sha256Free(wc_Sha256* sha256)
{
+ if (sha256 == NULL)
+ return;
- word32 W_K[64] ; /* temp for W+K */
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ if (sha256->W != NULL) {
+ XFREE(sha256->W, NULL, DYNAMIC_TYPE_DIGEST);
+ sha256->W = NULL;
+ }
+#endif
- #if defined(DEBUG_XMM)
- int i, j ;
- word32 xmm[29][4*15] ;
- #endif
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
+ wolfAsync_DevCtxFree(&sha256->asyncDev, WOLFSSL_ASYNC_MARKER_SHA256);
+#endif /* WOLFSSL_ASYNC_CRYPT */
+#ifdef WOLFSSL_PIC32MZ_HASH
+ wc_Sha256Pic32Free(sha256);
+#endif
+#if defined(WOLFSSL_AFALG_HASH)
+ if (sha256->alFd > 0) {
+ close(sha256->alFd);
+ sha256->alFd = -1; /* avoid possible double close on socket */
+ }
+ if (sha256->rdFd > 0) {
+ close(sha256->rdFd);
+ sha256->rdFd = -1; /* avoid possible double close on socket */
+ }
+#endif /* WOLFSSL_AFALG_HASH */
+#ifdef WOLFSSL_DEVCRYPTO_HASH
+ wc_DevCryptoFree(&sha256->ctx);
+#endif /* WOLFSSL_DEVCRYPTO */
+#if (defined(WOLFSSL_AFALG_HASH) && defined(WOLFSSL_AFALG_HASH_KEEP)) || \
+ (defined(WOLFSSL_DEVCRYPTO_HASH) && defined(WOLFSSL_DEVCRYPTO_HASH_KEEP)) || \
+ (defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \
+ !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH))
+ if (sha256->msg != NULL) {
+ XFREE(sha256->msg, sha256->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ sha256->msg = NULL;
+ }
+#endif
+}
+
+#endif /* !WOLFSSL_TI_HASH */
+#endif /* HAVE_FIPS */
+
+
+#ifndef WOLFSSL_TI_HASH
+#ifdef WOLFSSL_SHA224
+ int wc_Sha224GetHash(wc_Sha224* sha224, byte* hash)
+ {
+ int ret;
+ wc_Sha224 tmpSha224;
- Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) ;
- W_K_from_buff ; /* X0, X1, X2, X3 = W[0..15] ; */
-
- DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
- SET_W_K_XFER(X0, 0) ;
- MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
- XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0) ;
- SET_W_K_XFER(X1, 4) ;
- MessageSched_RORX(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
- XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,4) ;
- SET_W_K_XFER(X2, 8) ;
- MessageSched_RORX(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
- XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8) ;
- SET_W_K_XFER(X3, 12) ;
- MessageSched_RORX(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
- XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,12) ;
- SET_W_K_XFER(X0, 16) ;
- MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
- XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16) ;
- SET_W_K_XFER(X1, 20) ;
- MessageSched_RORX(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
- XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,20) ;
- SET_W_K_XFER(X2, 24) ;
- MessageSched_RORX(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
- XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24) ;
- SET_W_K_XFER(X3, 28) ;
- MessageSched_RORX(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
- XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,28) ;
- SET_W_K_XFER(X0, 32) ;
- MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
- XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32) ;
- SET_W_K_XFER(X1, 36) ;
- MessageSched_RORX(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
- XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,36) ;
- SET_W_K_XFER(X2, 40) ;
- MessageSched_RORX(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
- XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40) ;
- SET_W_K_XFER(X3, 44) ;
- MessageSched_RORX(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
- XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,44) ;
-
- SET_W_K_XFER(X0, 48) ;
- SET_W_K_XFER(X1, 52) ;
- SET_W_K_XFER(X2, 56) ;
- SET_W_K_XFER(X3, 60) ;
-
- RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48) ;
- RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49) ;
- RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50) ;
- RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51) ;
-
- RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52) ;
- RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53) ;
- RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54) ;
- RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55) ;
-
- RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56) ;
- RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57) ;
- RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58) ;
- RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59) ;
-
- RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60) ;
- RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61) ;
- RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62) ;
- RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63) ;
-
- RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
-
- #if defined(DEBUG_XMM)
- for(i=0; i<29; i++) {
- for(j=0; j<4*14; j+=4)
- printf("xmm%d[%d]=%08x,%08x,%08x,%08x\n", j/4, i,
- xmm[i][j],xmm[i][j+1],xmm[i][j+2],xmm[i][j+3]) ;
- printf("\n") ;
+ if (sha224 == NULL || hash == NULL)
+ return BAD_FUNC_ARG;
+
+ ret = wc_Sha224Copy(sha224, &tmpSha224);
+ if (ret == 0) {
+ ret = wc_Sha224Final(&tmpSha224, hash);
+ wc_Sha224Free(&tmpSha224);
+ }
+ return ret;
}
-
- for(i=0; i<64; i++)printf("W_K[%d]%08x\n", i, W_K[i]) ;
+ int wc_Sha224Copy(wc_Sha224* src, wc_Sha224* dst)
+ {
+ int ret = 0;
+
+ if (src == NULL || dst == NULL)
+ return BAD_FUNC_ARG;
+
+ XMEMCPY(dst, src, sizeof(wc_Sha224));
+ #ifdef WOLFSSL_SMALL_STACK_CACHE
+ dst->W = NULL;
#endif
- return 0;
-}
-#endif /* HAVE_INTEL_RORX */
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
+ #endif
+ #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+ dst->flags |= WC_HASH_FLAG_ISCOPY;
+ #endif
+
+ return ret;
+ }
-#endif /* HAVE_INTEL_AVX1 */
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+ int wc_Sha224SetFlags(wc_Sha224* sha224, word32 flags)
+ {
+ if (sha224) {
+ sha224->flags = flags;
+ }
+ return 0;
+ }
+ int wc_Sha224GetFlags(wc_Sha224* sha224, word32* flags)
+ {
+ if (sha224 && flags) {
+ *flags = sha224->flags;
+ }
+ return 0;
+ }
+#endif
+#endif /* WOLFSSL_SHA224 */
-#if defined(HAVE_INTEL_AVX2)
+#ifdef WOLFSSL_AFALG_HASH
+ /* implemented in wolfcrypt/src/port/af_alg/afalg_hash.c */
-#define _MOVE_to_REG(ymm, mem) __asm__ volatile("vmovdqu %0, %%"#ymm" ":: "m"(mem):YMM_REGs) ;
-#define _MOVE_to_MEM(mem, ymm) __asm__ volatile("vmovdqu %%"#ymm", %0" : "=m"(mem)::YMM_REGs) ;
-#define _BYTE_SWAP(ymm, map) __asm__ volatile("vpshufb %0, %%"#ymm", %%"#ymm"\n\t"\
- :: "m"(map):YMM_REGs) ;
-#define _MOVE_128(ymm0, ymm1, ymm2, map) __asm__ volatile("vperm2i128 $"#map", %%"\
- #ymm2", %%"#ymm1", %%"#ymm0" ":::YMM_REGs) ;
-#define _MOVE_BYTE(ymm0, ymm1, map) __asm__ volatile("vpshufb %0, %%"#ymm1", %%"\
- #ymm0"\n\t":: "m"(map):YMM_REGs) ;
-#define _S_TEMP(dest, src, bits, temp) __asm__ volatile("vpsrld $"#bits", %%"\
- #src", %%"#dest"\n\tvpslld $32-"#bits", %%"#src", %%"#temp"\n\tvpor %%"\
- #temp",%%"#dest", %%"#dest" ":::YMM_REGs) ;
-#define _AVX2_R(dest, src, bits) __asm__ volatile("vpsrld $"#bits", %%"\
- #src", %%"#dest" ":::YMM_REGs) ;
-#define _XOR(dest, src1, src2) __asm__ volatile("vpxor %%"#src1", %%"\
- #src2", %%"#dest" ":::YMM_REGs) ;
-#define _OR(dest, src1, src2) __asm__ volatile("vpor %%"#src1", %%"\
- #src2", %%"#dest" ":::YMM_REGs) ;
-#define _ADD(dest, src1, src2) __asm__ volatile("vpaddd %%"#src1", %%"\
- #src2", %%"#dest" ":::YMM_REGs) ;
-#define _ADD_MEM(dest, src1, mem) __asm__ volatile("vpaddd %0, %%"#src1", %%"\
- #dest" "::"m"(mem):YMM_REGs) ;
-#define _BLEND(map, dest, src1, src2) __asm__ volatile("vpblendd $"#map", %%"\
- #src1", %%"#src2", %%"#dest" ":::YMM_REGs) ;
-
-#define _EXTRACT_XMM_0(xmm, mem) __asm__ volatile("vpextrd $0, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
-#define _EXTRACT_XMM_1(xmm, mem) __asm__ volatile("vpextrd $1, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
-#define _EXTRACT_XMM_2(xmm, mem) __asm__ volatile("vpextrd $2, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
-#define _EXTRACT_XMM_3(xmm, mem) __asm__ volatile("vpextrd $3, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
-#define _EXTRACT_XMM_4(ymm, xmm, mem)\
- __asm__ volatile("vperm2i128 $0x1, %%"#ymm", %%"#ymm", %%"#ymm" ":::YMM_REGs) ;\
- __asm__ volatile("vpextrd $0, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
-#define _EXTRACT_XMM_5(xmm, mem) __asm__ volatile("vpextrd $1, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
-#define _EXTRACT_XMM_6(xmm, mem) __asm__ volatile("vpextrd $2, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
-#define _EXTRACT_XMM_7(xmm, mem) __asm__ volatile("vpextrd $3, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
-
-#define _SWAP_YMM_HL(ymm) __asm__ volatile("vperm2i128 $0x1, %%"#ymm", %%"#ymm", %%"#ymm" ":::YMM_REGs) ;
-#define SWAP_YMM_HL(ymm) _SWAP_YMM_HL(ymm)
-
-#define MOVE_to_REG(ymm, mem) _MOVE_to_REG(ymm, mem)
-#define MOVE_to_MEM(mem, ymm) _MOVE_to_MEM(mem, ymm)
-#define BYTE_SWAP(ymm, map) _BYTE_SWAP(ymm, map)
-#define MOVE_128(ymm0, ymm1, ymm2, map) _MOVE_128(ymm0, ymm1, ymm2, map)
-#define MOVE_BYTE(ymm0, ymm1, map) _MOVE_BYTE(ymm0, ymm1, map)
-#define XOR(dest, src1, src2) _XOR(dest, src1, src2)
-#define OR(dest, src1, src2) _OR(dest, src1, src2)
-#define ADD(dest, src1, src2) _ADD(dest, src1, src2)
-#define ADD_MEM(dest, src1, mem) _ADD_MEM(dest, src1, mem)
-#define BLEND(map, dest, src1, src2) _BLEND(map, dest, src1, src2)
-
-#define S_TMP(dest, src, bits, temp) _S_TEMP(dest, src, bits, temp);
-#define AVX2_S(dest, src, bits) S_TMP(dest, src, bits, S_TEMP)
-#define AVX2_R(dest, src, bits) _AVX2_R(dest, src, bits)
-
-#define GAMMA0(dest, src) AVX2_S(dest, src, 7); AVX2_S(G_TEMP, src, 18); \
- XOR(dest, G_TEMP, dest) ; AVX2_R(G_TEMP, src, 3); XOR(dest, G_TEMP, dest) ;
-#define GAMMA0_1(dest, src) AVX2_S(dest, src, 7); AVX2_S(G_TEMP, src, 18);
-#define GAMMA0_2(dest, src) XOR(dest, G_TEMP, dest) ; AVX2_R(G_TEMP, src, 3); \
- XOR(dest, G_TEMP, dest) ;
-
-#define GAMMA1(dest, src) AVX2_S(dest, src, 17); AVX2_S(G_TEMP, src, 19); \
- XOR(dest, G_TEMP, dest) ; AVX2_R(G_TEMP, src, 10); XOR(dest, G_TEMP, dest) ;
-#define GAMMA1_1(dest, src) AVX2_S(dest, src, 17); AVX2_S(G_TEMP, src, 19);
-#define GAMMA1_2(dest, src) XOR(dest, G_TEMP, dest) ; AVX2_R(G_TEMP, src, 10); \
- XOR(dest, G_TEMP, dest) ;
-
-#define FEEDBACK1_to_W_I_2 MOVE_BYTE(YMM_TEMP0, W_I, mMAP1toW_I_2[0]) ; \
- BLEND(0x0c, W_I_2, YMM_TEMP0, W_I_2) ;
-#define FEEDBACK2_to_W_I_2 MOVE_128(YMM_TEMP0, W_I, W_I, 0x08) ; \
- MOVE_BYTE(YMM_TEMP0, YMM_TEMP0, mMAP2toW_I_2[0]) ; BLEND(0x30, W_I_2, YMM_TEMP0, W_I_2) ;
-#define FEEDBACK3_to_W_I_2 MOVE_BYTE(YMM_TEMP0, W_I, mMAP3toW_I_2[0]) ; \
- BLEND(0xc0, W_I_2, YMM_TEMP0, W_I_2) ;
-
-#define FEEDBACK_to_W_I_7 MOVE_128(YMM_TEMP0, W_I, W_I, 0x08) ;\
- MOVE_BYTE(YMM_TEMP0, YMM_TEMP0, mMAPtoW_I_7[0]) ; BLEND(0x80, W_I_7, YMM_TEMP0, W_I_7) ;
-
-#undef voitle
-
-#define W_I_16 ymm8
-#define W_I_15 ymm9
-#define W_I_7 ymm10
-#define W_I_2 ymm11
-#define W_I ymm12
-#define G_TEMP ymm13
-#define S_TEMP ymm14
-#define YMM_TEMP0 ymm15
-#define YMM_TEMP0x xmm15
-#define W_I_TEMP ymm7
-#define W_K_TEMP ymm15
-#define W_K_TEMPx xmm15
-
-#define YMM_REGs /* Registers are saved in Sha256Update/Finel */
- /* "%ymm7","%ymm8","%ymm9","%ymm10","%ymm11","%ymm12","%ymm13","%ymm14","%ymm15"*/
-
-
-#define MOVE_15_to_16(w_i_16, w_i_15, w_i_7)\
- __asm__ volatile("vperm2i128 $0x01, %%"#w_i_15", %%"#w_i_15", %%"#w_i_15" ":::YMM_REGs) ;\
- __asm__ volatile("vpblendd $0x08, %%"#w_i_15", %%"#w_i_7", %%"#w_i_16" ":::YMM_REGs) ;\
- __asm__ volatile("vperm2i128 $0x01, %%"#w_i_7", %%"#w_i_7", %%"#w_i_15" ":::YMM_REGs) ;\
- __asm__ volatile("vpblendd $0x80, %%"#w_i_15", %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs) ;\
- __asm__ volatile("vpshufd $0x93, %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs) ;\
-
-#define MOVE_7_to_15(w_i_15, w_i_7)\
- __asm__ volatile("vmovdqu %%"#w_i_7", %%"#w_i_15" ":::YMM_REGs) ;\
-
-#define MOVE_I_to_7(w_i_7, w_i)\
- __asm__ volatile("vperm2i128 $0x01, %%"#w_i", %%"#w_i", %%"#w_i_7" ":::YMM_REGs) ;\
- __asm__ volatile("vpblendd $0x01, %%"#w_i_7", %%"#w_i", %%"#w_i_7" ":::YMM_REGs) ;\
- __asm__ volatile("vpshufd $0x39, %%"#w_i_7", %%"#w_i_7" ":::YMM_REGs) ;\
-
-#define MOVE_I_to_2(w_i_2, w_i)\
- __asm__ volatile("vperm2i128 $0x01, %%"#w_i", %%"#w_i", %%"#w_i_2" ":::YMM_REGs) ;\
- __asm__ volatile("vpshufd $0x0e, %%"#w_i_2", %%"#w_i_2" ":::YMM_REGs) ;\
-
-#define ROTATE_W(w_i_16, w_i_15, w_i_7, w_i_2, w_i)\
- MOVE_15_to_16(w_i_16, w_i_15, w_i_7) ; \
- MOVE_7_to_15(w_i_15, w_i_7) ; \
- MOVE_I_to_7(w_i_7, w_i) ; \
- MOVE_I_to_2(w_i_2, w_i) ;\
-
-#define _RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
- { word32 d ;\
- __asm__ volatile("movl %"#S_0", %0":"=r"(d)::SSE_REGs) ;\
- sha256->digest[0] += d;\
- __asm__ volatile("movl %"#S_1", %0":"=r"(d)::SSE_REGs) ;\
- sha256->digest[1] += d;\
- __asm__ volatile("movl %"#S_2", %0":"=r"(d)::SSE_REGs) ;\
- sha256->digest[2] += d;\
- __asm__ volatile("movl %"#S_3", %0":"=r"(d)::SSE_REGs) ;\
- sha256->digest[3] += d;\
- __asm__ volatile("movl %"#S_4", %0":"=r"(d)::SSE_REGs) ;\
- sha256->digest[4] += d;\
- __asm__ volatile("movl %"#S_5", %0":"=r"(d)::SSE_REGs) ;\
- sha256->digest[5] += d;\
- __asm__ volatile("movl %"#S_6", %0":"=r"(d)::SSE_REGs) ;\
- sha256->digest[6] += d;\
- __asm__ volatile("movl %"#S_7", %0":"=r"(d)::SSE_REGs) ;\
- sha256->digest[7] += d;\
-}
+#elif defined(WOLFSSL_DEVCRYPTO_HASH)
+ /* implemented in wolfcrypt/src/port/devcrypto/devcrypt_hash.c */
-#define _DumpS(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
- { word32 d[8] ;\
- __asm__ volatile("movl %"#S_0", %0":"=r"(d[0])::SSE_REGs) ;\
- __asm__ volatile("movl %"#S_1", %0":"=r"(d[1])::SSE_REGs) ;\
- __asm__ volatile("movl %"#S_2", %0":"=r"(d[2])::SSE_REGs) ;\
- __asm__ volatile("movl %"#S_3", %0":"=r"(d[3])::SSE_REGs) ;\
- __asm__ volatile("movl %"#S_4", %0":"=r"(d[4])::SSE_REGs) ;\
- __asm__ volatile("movl %"#S_5", %0":"=r"(d[5])::SSE_REGs) ;\
- __asm__ volatile("movl %"#S_6", %0":"=r"(d[6])::SSE_REGs) ;\
- __asm__ volatile("movl %"#S_7", %0":"=r"(d[7])::SSE_REGs) ;\
- printf("S[0..7]=%08x,%08x,%08x,%08x,%08x,%08x,%08x,%08x\n", d[0],d[1],d[2],d[3],d[4],d[5],d[6],d[7]);\
- __asm__ volatile("movl %0, %"#S_0::"r"(d[0]):SSE_REGs) ;\
- __asm__ volatile("movl %0, %"#S_1::"r"(d[1]):SSE_REGs) ;\
- __asm__ volatile("movl %0, %"#S_2::"r"(d[2]):SSE_REGs) ;\
- __asm__ volatile("movl %0, %"#S_3::"r"(d[3]):SSE_REGs) ;\
- __asm__ volatile("movl %0, %"#S_4::"r"(d[4]):SSE_REGs) ;\
- __asm__ volatile("movl %0, %"#S_5::"r"(d[5]):SSE_REGs) ;\
- __asm__ volatile("movl %0, %"#S_6::"r"(d[6]):SSE_REGs) ;\
- __asm__ volatile("movl %0, %"#S_7::"r"(d[7]):SSE_REGs) ;\
-}
+#elif defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \
+ !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH)
+ /* implemented in wolfcrypt/src/port/Renesas/renesas_tsip_sha.c */
+#else
-#define DigestToReg(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
- _DigestToReg(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )
-
-#define RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
- _RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )
-
-#define DumS(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
- _DumpS(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )
-
-
- /* Byte swap Masks to ensure that rest of the words are filled with zero's. */
- static const unsigned long mBYTE_FLIP_MASK_16[] =
- { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x0c0d0e0f08090a0b } ;
- static const unsigned long mBYTE_FLIP_MASK_15[] =
- { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x0c0d0e0f08090a0b } ;
- static const unsigned long mBYTE_FLIP_MASK_7 [] =
- { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x8080808008090a0b } ;
- static const unsigned long mBYTE_FLIP_MASK_2 [] =
- { 0x0405060700010203, 0x8080808080808080, 0x8080808080808080, 0x8080808080808080 } ;
-
- static const unsigned long mMAPtoW_I_7[] =
- { 0x8080808080808080, 0x8080808080808080, 0x8080808080808080, 0x0302010080808080 } ;
- static const unsigned long mMAP1toW_I_2[] =
- { 0x8080808080808080, 0x0706050403020100, 0x8080808080808080, 0x8080808080808080 } ;
- static const unsigned long mMAP2toW_I_2[] =
- { 0x8080808080808080, 0x8080808080808080, 0x0f0e0d0c0b0a0908, 0x8080808080808080 } ;
- static const unsigned long mMAP3toW_I_2[] =
- { 0x8080808080808080, 0x8080808080808080, 0x8080808080808080, 0x0706050403020100 } ;
-
-static int Transform_AVX2(Sha256* sha256)
+int wc_Sha256GetHash(wc_Sha256* sha256, byte* hash)
{
+ int ret;
+ wc_Sha256 tmpSha256;
- #ifdef WOLFSSL_SMALL_STACK
- word32* W_K;
- W_K = (word32*) XMALLOC(sizeof(word32) * 64, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (W_K == NULL)
- return MEMORY_E;
- #else
- word32 W_K[64] ;
- #endif
+ if (sha256 == NULL || hash == NULL)
+ return BAD_FUNC_ARG;
- MOVE_to_REG(W_I_16, sha256->buffer[0]); BYTE_SWAP(W_I_16, mBYTE_FLIP_MASK_16[0]) ;
- MOVE_to_REG(W_I_15, sha256->buffer[1]); BYTE_SWAP(W_I_15, mBYTE_FLIP_MASK_15[0]) ;
- MOVE_to_REG(W_I, sha256->buffer[8]) ; BYTE_SWAP(W_I, mBYTE_FLIP_MASK_16[0]) ;
- MOVE_to_REG(W_I_7, sha256->buffer[16-7]) ; BYTE_SWAP(W_I_7, mBYTE_FLIP_MASK_7[0]) ;
- MOVE_to_REG(W_I_2, sha256->buffer[16-2]) ; BYTE_SWAP(W_I_2, mBYTE_FLIP_MASK_2[0]) ;
-
- DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
-
- ADD_MEM(W_K_TEMP, W_I_16, K[0]) ;
- MOVE_to_MEM(W_K[0], W_K_TEMP) ;
-
- RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0) ;
- RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,1) ;
- RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,2) ;
- RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,3) ;
- RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,4) ;
- RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,5) ;
- RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,6) ;
- RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,7) ;
-
- ADD_MEM(YMM_TEMP0, W_I, K[8]) ;
- MOVE_to_MEM(W_K[8], YMM_TEMP0) ;
-
- /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
- RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8) ;
- GAMMA0_1(W_I_TEMP, W_I_15) ;
- RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8) ;
- GAMMA0_2(W_I_TEMP, W_I_15) ;
- RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8) ;
- ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
- RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9) ;
- ADD(W_I, W_I_7, W_I_TEMP);
- RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10) ;
- ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
- RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10) ;
- FEEDBACK1_to_W_I_2 ;
- RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10) ;
- FEEDBACK_to_W_I_7 ;
- RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11) ;
- ADD(W_I_TEMP, W_I_7, W_I_TEMP);
- RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12) ;
- ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
- RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12) ;
- FEEDBACK2_to_W_I_2 ;
- RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13) ;
- ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
- RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13) ;
- FEEDBACK3_to_W_I_2 ;
- RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14) ;
- GAMMA1(YMM_TEMP0, W_I_2) ;
- RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14) ;
- RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14) ;
- ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
- RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15) ;
-
- MOVE_to_REG(YMM_TEMP0, K[16]) ;
- RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15) ;
- ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
- RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15) ;
- ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
- MOVE_to_MEM(W_K[16], YMM_TEMP0) ;
-
- /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
- RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16) ;
- GAMMA0_1(W_I_TEMP, W_I_15) ;
- RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16) ;
- GAMMA0_2(W_I_TEMP, W_I_15) ;
- RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16) ;
- ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
- RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17) ;
- ADD(W_I, W_I_7, W_I_TEMP);
- RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18) ;
- ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
- RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18) ;
- FEEDBACK1_to_W_I_2 ;
- RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18) ;
- FEEDBACK_to_W_I_7 ;
- RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19) ;
- ADD(W_I_TEMP, W_I_7, W_I_TEMP);
- RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19) ;
- GAMMA1(YMM_TEMP0, W_I_2) ;
- RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20) ;
- ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
- RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20) ;
- FEEDBACK2_to_W_I_2 ;
- RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21) ;
- ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
- RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21) ;
- FEEDBACK3_to_W_I_2 ;
- RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22) ;
- ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
- RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23) ;
-
- MOVE_to_REG(YMM_TEMP0, K[24]) ;
- RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23) ;
- ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
- RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23) ;
- ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
- MOVE_to_MEM(W_K[24], YMM_TEMP0) ;
-
- /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
- RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24) ;
- GAMMA0_1(W_I_TEMP, W_I_15) ;
- RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24) ;
- GAMMA0_2(W_I_TEMP, W_I_15) ;
- RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24) ;
- ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
- RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25) ;
- ADD(W_I, W_I_7, W_I_TEMP);
- RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26) ;
- ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
- RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26) ;
- FEEDBACK1_to_W_I_2 ;
- RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26) ;
- FEEDBACK_to_W_I_7 ;
- RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27) ;
- ADD(W_I_TEMP, W_I_7, W_I_TEMP);
- RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28) ;
- ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
- RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28) ;
- FEEDBACK2_to_W_I_2 ;
- RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29) ;
- ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
- RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29) ;
- FEEDBACK3_to_W_I_2 ;
- RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30) ;
- GAMMA1(YMM_TEMP0, W_I_2) ;
- RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30) ;
- RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30) ;
- ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
- RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31) ;
-
- MOVE_to_REG(YMM_TEMP0, K[32]) ;
- RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31) ;
- ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
- RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31) ;
- ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
- MOVE_to_MEM(W_K[32], YMM_TEMP0) ;
-
-
- /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
- RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32) ;
- GAMMA0_1(W_I_TEMP, W_I_15) ;
- RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32) ;
- GAMMA0_2(W_I_TEMP, W_I_15) ;
- RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32) ;
- ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
- RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33) ;
- ADD(W_I, W_I_7, W_I_TEMP);
- RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34) ;
- ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
- RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34) ;
- FEEDBACK1_to_W_I_2 ;
- RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34) ;
- FEEDBACK_to_W_I_7 ;
- RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35) ;
- ADD(W_I_TEMP, W_I_7, W_I_TEMP);
- RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36) ;
- ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
- RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36) ;
- FEEDBACK2_to_W_I_2 ;
- RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37) ;
- ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
- RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37) ;
- FEEDBACK3_to_W_I_2 ;
- RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38) ;
- ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
- RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39) ;
-
- MOVE_to_REG(YMM_TEMP0, K[40]) ;
- RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39) ;
- ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
- RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39) ;
- ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
- MOVE_to_MEM(W_K[40], YMM_TEMP0) ;
-
- /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
- RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40) ;
- GAMMA0_1(W_I_TEMP, W_I_15) ;
- RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40) ;
- GAMMA0_2(W_I_TEMP, W_I_15) ;
- RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40) ;
- ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
- RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41) ;
- ADD(W_I, W_I_7, W_I_TEMP);
- RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42) ;
- ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
- RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42) ;
- FEEDBACK1_to_W_I_2 ;
- RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42) ;
- FEEDBACK_to_W_I_7 ;
- RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43) ;
- ADD(W_I_TEMP, W_I_7, W_I_TEMP);
- RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44) ;
- ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
- RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44) ;
- FEEDBACK2_to_W_I_2 ;
- RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45) ;
- ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
- RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45) ;
- FEEDBACK3_to_W_I_2 ;
- RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46) ;
- ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
- RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47) ;
-
- MOVE_to_REG(YMM_TEMP0, K[48]) ;
- RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47) ;
- ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
- RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47) ;
- ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
- MOVE_to_MEM(W_K[48], YMM_TEMP0) ;
-
- /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
- RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48) ;
- GAMMA0_1(W_I_TEMP, W_I_15) ;
- RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48) ;
- GAMMA0_2(W_I_TEMP, W_I_15) ;
- RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48) ;
- ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
- RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49) ;
- ADD(W_I, W_I_7, W_I_TEMP);
- RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50) ;
- ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
- RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50) ;
- FEEDBACK1_to_W_I_2 ;
- RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50) ;
- FEEDBACK_to_W_I_7 ;
- RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51) ;
- ADD(W_I_TEMP, W_I_7, W_I_TEMP);
- RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52) ;
- ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
- RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52) ;
- FEEDBACK2_to_W_I_2 ;
- RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53) ;
- ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
- RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53) ;
- FEEDBACK3_to_W_I_2 ;
- RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54) ;
- GAMMA1_1(YMM_TEMP0, W_I_2) ;
- RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54) ;
- GAMMA1_2(YMM_TEMP0, W_I_2) ;
- RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54) ;
- ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
- RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55) ;
-
- MOVE_to_REG(YMM_TEMP0, K[56]) ;
- RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55) ;
- ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
- RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55) ;
- ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
- MOVE_to_MEM(W_K[56], YMM_TEMP0) ;
-
- RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56) ;
- RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57) ;
- RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58) ;
- RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59) ;
-
- RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60) ;
- RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61) ;
- RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62) ;
- RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63) ;
-
- RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
-
- #ifdef WOLFSSL_SMALL_STACK
- XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- #endif
-
- return 0;
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ if(sha256->ctx.mode == ESP32_SHA_INIT){
+ esp_sha_try_hw_lock(&sha256->ctx);
+ }
+ if(sha256->ctx.mode == ESP32_SHA_HW)
+ {
+ esp_sha256_digest_process(sha256, 0);
+ }
+#endif
+ ret = wc_Sha256Copy(sha256, &tmpSha256);
+ if (ret == 0) {
+ ret = wc_Sha256Final(&tmpSha256, hash);
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ sha256->ctx.mode = ESP32_SHA_SW;
+#endif
+
+ wc_Sha256Free(&tmpSha256);
+ }
+ return ret;
}
+int wc_Sha256Copy(wc_Sha256* src, wc_Sha256* dst)
+{
+ int ret = 0;
-#endif /* HAVE_INTEL_AVX2 */
+ if (src == NULL || dst == NULL)
+ return BAD_FUNC_ARG;
-#endif /* HAVE_FIPS */
+ XMEMCPY(dst, src, sizeof(wc_Sha256));
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ dst->W = NULL;
+#endif
-#endif /* WOLFSSL_TI_HAHS */
+#ifdef WOLFSSL_ASYNC_CRYPT
+ ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
+#endif
+#ifdef WOLFSSL_PIC32MZ_HASH
+ ret = wc_Pic32HashCopy(&src->cache, &dst->cache);
+#endif
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ dst->ctx.mode = src->ctx.mode;
+ dst->ctx.isfirstblock = src->ctx.isfirstblock;
+ dst->ctx.sha_type = src->ctx.sha_type;
+#endif
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+ dst->flags |= WC_HASH_FLAG_ISCOPY;
+#endif
-#endif /* NO_SHA256 */
+ return ret;
+}
+#endif
+
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+int wc_Sha256SetFlags(wc_Sha256* sha256, word32 flags)
+{
+ if (sha256) {
+ sha256->flags = flags;
+ }
+ return 0;
+}
+int wc_Sha256GetFlags(wc_Sha256* sha256, word32* flags)
+{
+ if (sha256 && flags) {
+ *flags = sha256->flags;
+ }
+ return 0;
+}
+#endif
+#endif /* !WOLFSSL_TI_HASH */
+#endif /* NO_SHA256 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha256_asm.S b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha256_asm.S
new file mode 100644
index 000000000..c433d341c
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha256_asm.S
@@ -0,0 +1,22653 @@
+/* sha256_asm
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+#ifndef HAVE_INTEL_AVX1
+#define HAVE_INTEL_AVX1
+#endif /* HAVE_INTEL_AVX1 */
+#ifndef NO_AVX2_SUPPORT
+#define HAVE_INTEL_AVX2
+#endif /* NO_AVX2_SUPPORT */
+
+#ifdef HAVE_INTEL_AVX1
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+L_avx1_sha256_k:
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_sha256_shuf_00BA:
+.quad 0xb0a090803020100, 0xffffffffffffffff
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_sha256_shuf_DC00:
+.quad 0xffffffffffffffff, 0xb0a090803020100
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_sha256_flip_mask:
+.quad 0x405060700010203, 0xc0d0e0f08090a0b
+#ifndef __APPLE__
+.text
+.globl Transform_Sha256_AVX1
+.type Transform_Sha256_AVX1,@function
+.align 4
+Transform_Sha256_AVX1:
+#else
+.section __TEXT,__text
+.globl _Transform_Sha256_AVX1
+.p2align 2
+_Transform_Sha256_AVX1:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $0x40, %rsp
+ leaq 32(%rdi), %rax
+ vmovdqa L_avx1_sha256_flip_mask(%rip), %xmm13
+ vmovdqa L_avx1_sha256_shuf_00BA(%rip), %xmm11
+ vmovdqa L_avx1_sha256_shuf_DC00(%rip), %xmm12
+ movl (%rdi), %r8d
+ movl 4(%rdi), %r9d
+ movl 8(%rdi), %r10d
+ movl 12(%rdi), %r11d
+ movl 16(%rdi), %r12d
+ movl 20(%rdi), %r13d
+ movl 24(%rdi), %r14d
+ movl 28(%rdi), %r15d
+ # X0, X1, X2, X3 = W[0..15]
+ vmovdqu (%rax), %xmm0
+ vmovdqu 16(%rax), %xmm1
+ vpshufb %xmm13, %xmm0, %xmm0
+ vpshufb %xmm13, %xmm1, %xmm1
+ vmovdqu 32(%rax), %xmm2
+ vmovdqu 48(%rax), %xmm3
+ vpshufb %xmm13, %xmm2, %xmm2
+ vpshufb %xmm13, %xmm3, %xmm3
+ movl %r9d, %ebx
+ movl %r12d, %edx
+ xorl %r10d, %ebx
+ # set_w_k_xfer_4: 0
+ vpaddd 0+L_avx1_sha256_k(%rip), %xmm0, %xmm4
+ vpaddd 16+L_avx1_sha256_k(%rip), %xmm1, %xmm5
+ vmovdqu %xmm4, (%rsp)
+ vmovdqu %xmm5, 16(%rsp)
+ vpaddd 32+L_avx1_sha256_k(%rip), %xmm2, %xmm6
+ vpaddd 48+L_avx1_sha256_k(%rip), %xmm3, %xmm7
+ vmovdqu %xmm6, 32(%rsp)
+ vmovdqu %xmm7, 48(%rsp)
+ # msg_sched: 0-3
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm0, %xmm1, %xmm5
+ vpalignr $4, %xmm2, %xmm3, %xmm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl (%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 4(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm3, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm0, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 8(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 12(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %xmm4, %xmm9, %xmm0
+ # msg_sched done: 0-3
+ # msg_sched: 4-7
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm1, %xmm2, %xmm5
+ vpalignr $4, %xmm3, %xmm0, %xmm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 16(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 20(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm0, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm1, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 24(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 28(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %xmm4, %xmm9, %xmm1
+ # msg_sched done: 4-7
+ # msg_sched: 8-11
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm2, %xmm3, %xmm5
+ vpalignr $4, %xmm0, %xmm1, %xmm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl 32(%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 36(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm1, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm2, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 40(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 44(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %xmm4, %xmm9, %xmm2
+ # msg_sched done: 8-11
+ # msg_sched: 12-15
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm3, %xmm0, %xmm5
+ vpalignr $4, %xmm1, %xmm2, %xmm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 48(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 52(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm2, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm3, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 56(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 60(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %xmm4, %xmm9, %xmm3
+ # msg_sched done: 12-15
+ # set_w_k_xfer_4: 4
+ vpaddd 64+L_avx1_sha256_k(%rip), %xmm0, %xmm4
+ vpaddd 80+L_avx1_sha256_k(%rip), %xmm1, %xmm5
+ vmovdqu %xmm4, (%rsp)
+ vmovdqu %xmm5, 16(%rsp)
+ vpaddd 96+L_avx1_sha256_k(%rip), %xmm2, %xmm6
+ vpaddd 112+L_avx1_sha256_k(%rip), %xmm3, %xmm7
+ vmovdqu %xmm6, 32(%rsp)
+ vmovdqu %xmm7, 48(%rsp)
+ # msg_sched: 0-3
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm0, %xmm1, %xmm5
+ vpalignr $4, %xmm2, %xmm3, %xmm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl (%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 4(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm3, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm0, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 8(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 12(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %xmm4, %xmm9, %xmm0
+ # msg_sched done: 0-3
+ # msg_sched: 4-7
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm1, %xmm2, %xmm5
+ vpalignr $4, %xmm3, %xmm0, %xmm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 16(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 20(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm0, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm1, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 24(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 28(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %xmm4, %xmm9, %xmm1
+ # msg_sched done: 4-7
+ # msg_sched: 8-11
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm2, %xmm3, %xmm5
+ vpalignr $4, %xmm0, %xmm1, %xmm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl 32(%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 36(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm1, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm2, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 40(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 44(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %xmm4, %xmm9, %xmm2
+ # msg_sched done: 8-11
+ # msg_sched: 12-15
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm3, %xmm0, %xmm5
+ vpalignr $4, %xmm1, %xmm2, %xmm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 48(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 52(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm2, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm3, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 56(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 60(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %xmm4, %xmm9, %xmm3
+ # msg_sched done: 12-15
+ # set_w_k_xfer_4: 8
+ vpaddd 128+L_avx1_sha256_k(%rip), %xmm0, %xmm4
+ vpaddd 144+L_avx1_sha256_k(%rip), %xmm1, %xmm5
+ vmovdqu %xmm4, (%rsp)
+ vmovdqu %xmm5, 16(%rsp)
+ vpaddd 160+L_avx1_sha256_k(%rip), %xmm2, %xmm6
+ vpaddd 176+L_avx1_sha256_k(%rip), %xmm3, %xmm7
+ vmovdqu %xmm6, 32(%rsp)
+ vmovdqu %xmm7, 48(%rsp)
+ # msg_sched: 0-3
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm0, %xmm1, %xmm5
+ vpalignr $4, %xmm2, %xmm3, %xmm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl (%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 4(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm3, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm0, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 8(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 12(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %xmm4, %xmm9, %xmm0
+ # msg_sched done: 0-3
+ # msg_sched: 4-7
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm1, %xmm2, %xmm5
+ vpalignr $4, %xmm3, %xmm0, %xmm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 16(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 20(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm0, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm1, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 24(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 28(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %xmm4, %xmm9, %xmm1
+ # msg_sched done: 4-7
+ # msg_sched: 8-11
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm2, %xmm3, %xmm5
+ vpalignr $4, %xmm0, %xmm1, %xmm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl 32(%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 36(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm1, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm2, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 40(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 44(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %xmm4, %xmm9, %xmm2
+ # msg_sched done: 8-11
+ # msg_sched: 12-15
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm3, %xmm0, %xmm5
+ vpalignr $4, %xmm1, %xmm2, %xmm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 48(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 52(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm2, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm3, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 56(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 60(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %xmm4, %xmm9, %xmm3
+ # msg_sched done: 12-15
+ # set_w_k_xfer_4: 12
+ vpaddd 192+L_avx1_sha256_k(%rip), %xmm0, %xmm4
+ vpaddd 208+L_avx1_sha256_k(%rip), %xmm1, %xmm5
+ vmovdqu %xmm4, (%rsp)
+ vmovdqu %xmm5, 16(%rsp)
+ vpaddd 224+L_avx1_sha256_k(%rip), %xmm2, %xmm6
+ vpaddd 240+L_avx1_sha256_k(%rip), %xmm3, %xmm7
+ vmovdqu %xmm6, 32(%rsp)
+ vmovdqu %xmm7, 48(%rsp)
+ # rnd_all_4: 0-3
+ addl (%rsp), %r15d
+ movl %r13d, %ecx
+ movl %r9d, %eax
+ xorl %r14d, %ecx
+ rorl $14, %edx
+ andl %r12d, %ecx
+ xorl %r12d, %edx
+ xorl %r14d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r15d
+ xorl %r12d, %edx
+ xorl %r8d, %eax
+ rorl $6, %edx
+ movl %r8d, %ecx
+ addl %edx, %r15d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ addl 4(%rsp), %r14d
+ movl %r12d, %ecx
+ movl %r8d, %ebx
+ xorl %r13d, %ecx
+ rorl $14, %edx
+ andl %r11d, %ecx
+ xorl %r11d, %edx
+ xorl %r13d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r14d
+ xorl %r11d, %edx
+ xorl %r15d, %ebx
+ rorl $6, %edx
+ movl %r15d, %ecx
+ addl %edx, %r14d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ addl 8(%rsp), %r13d
+ movl %r11d, %ecx
+ movl %r15d, %eax
+ xorl %r12d, %ecx
+ rorl $14, %edx
+ andl %r10d, %ecx
+ xorl %r10d, %edx
+ xorl %r12d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r13d
+ xorl %r10d, %edx
+ xorl %r14d, %eax
+ rorl $6, %edx
+ movl %r14d, %ecx
+ addl %edx, %r13d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ addl 12(%rsp), %r12d
+ movl %r10d, %ecx
+ movl %r14d, %ebx
+ xorl %r11d, %ecx
+ rorl $14, %edx
+ andl %r9d, %ecx
+ xorl %r9d, %edx
+ xorl %r11d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r12d
+ xorl %r9d, %edx
+ xorl %r13d, %ebx
+ rorl $6, %edx
+ movl %r13d, %ecx
+ addl %edx, %r12d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ # rnd_all_4: 1-4
+ addl 16(%rsp), %r11d
+ movl %r9d, %ecx
+ movl %r13d, %eax
+ xorl %r10d, %ecx
+ rorl $14, %edx
+ andl %r8d, %ecx
+ xorl %r8d, %edx
+ xorl %r10d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r11d
+ xorl %r8d, %edx
+ xorl %r12d, %eax
+ rorl $6, %edx
+ movl %r12d, %ecx
+ addl %edx, %r11d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ addl 20(%rsp), %r10d
+ movl %r8d, %ecx
+ movl %r12d, %ebx
+ xorl %r9d, %ecx
+ rorl $14, %edx
+ andl %r15d, %ecx
+ xorl %r15d, %edx
+ xorl %r9d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r10d
+ xorl %r15d, %edx
+ xorl %r11d, %ebx
+ rorl $6, %edx
+ movl %r11d, %ecx
+ addl %edx, %r10d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ addl 24(%rsp), %r9d
+ movl %r15d, %ecx
+ movl %r11d, %eax
+ xorl %r8d, %ecx
+ rorl $14, %edx
+ andl %r14d, %ecx
+ xorl %r14d, %edx
+ xorl %r8d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r9d
+ xorl %r14d, %edx
+ xorl %r10d, %eax
+ rorl $6, %edx
+ movl %r10d, %ecx
+ addl %edx, %r9d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ addl 28(%rsp), %r8d
+ movl %r14d, %ecx
+ movl %r10d, %ebx
+ xorl %r15d, %ecx
+ rorl $14, %edx
+ andl %r13d, %ecx
+ xorl %r13d, %edx
+ xorl %r15d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r8d
+ xorl %r13d, %edx
+ xorl %r9d, %ebx
+ rorl $6, %edx
+ movl %r9d, %ecx
+ addl %edx, %r8d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ # rnd_all_4: 2-5
+ addl 32(%rsp), %r15d
+ movl %r13d, %ecx
+ movl %r9d, %eax
+ xorl %r14d, %ecx
+ rorl $14, %edx
+ andl %r12d, %ecx
+ xorl %r12d, %edx
+ xorl %r14d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r15d
+ xorl %r12d, %edx
+ xorl %r8d, %eax
+ rorl $6, %edx
+ movl %r8d, %ecx
+ addl %edx, %r15d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ addl 36(%rsp), %r14d
+ movl %r12d, %ecx
+ movl %r8d, %ebx
+ xorl %r13d, %ecx
+ rorl $14, %edx
+ andl %r11d, %ecx
+ xorl %r11d, %edx
+ xorl %r13d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r14d
+ xorl %r11d, %edx
+ xorl %r15d, %ebx
+ rorl $6, %edx
+ movl %r15d, %ecx
+ addl %edx, %r14d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ addl 40(%rsp), %r13d
+ movl %r11d, %ecx
+ movl %r15d, %eax
+ xorl %r12d, %ecx
+ rorl $14, %edx
+ andl %r10d, %ecx
+ xorl %r10d, %edx
+ xorl %r12d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r13d
+ xorl %r10d, %edx
+ xorl %r14d, %eax
+ rorl $6, %edx
+ movl %r14d, %ecx
+ addl %edx, %r13d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ addl 44(%rsp), %r12d
+ movl %r10d, %ecx
+ movl %r14d, %ebx
+ xorl %r11d, %ecx
+ rorl $14, %edx
+ andl %r9d, %ecx
+ xorl %r9d, %edx
+ xorl %r11d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r12d
+ xorl %r9d, %edx
+ xorl %r13d, %ebx
+ rorl $6, %edx
+ movl %r13d, %ecx
+ addl %edx, %r12d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ # rnd_all_4: 3-6
+ addl 48(%rsp), %r11d
+ movl %r9d, %ecx
+ movl %r13d, %eax
+ xorl %r10d, %ecx
+ rorl $14, %edx
+ andl %r8d, %ecx
+ xorl %r8d, %edx
+ xorl %r10d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r11d
+ xorl %r8d, %edx
+ xorl %r12d, %eax
+ rorl $6, %edx
+ movl %r12d, %ecx
+ addl %edx, %r11d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ addl 52(%rsp), %r10d
+ movl %r8d, %ecx
+ movl %r12d, %ebx
+ xorl %r9d, %ecx
+ rorl $14, %edx
+ andl %r15d, %ecx
+ xorl %r15d, %edx
+ xorl %r9d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r10d
+ xorl %r15d, %edx
+ xorl %r11d, %ebx
+ rorl $6, %edx
+ movl %r11d, %ecx
+ addl %edx, %r10d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ addl 56(%rsp), %r9d
+ movl %r15d, %ecx
+ movl %r11d, %eax
+ xorl %r8d, %ecx
+ rorl $14, %edx
+ andl %r14d, %ecx
+ xorl %r14d, %edx
+ xorl %r8d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r9d
+ xorl %r14d, %edx
+ xorl %r10d, %eax
+ rorl $6, %edx
+ movl %r10d, %ecx
+ addl %edx, %r9d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ addl 60(%rsp), %r8d
+ movl %r14d, %ecx
+ movl %r10d, %ebx
+ xorl %r15d, %ecx
+ rorl $14, %edx
+ andl %r13d, %ecx
+ xorl %r13d, %edx
+ xorl %r15d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r8d
+ xorl %r13d, %edx
+ xorl %r9d, %ebx
+ rorl $6, %edx
+ movl %r9d, %ecx
+ addl %edx, %r8d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ addl %r8d, (%rdi)
+ addl %r9d, 4(%rdi)
+ addl %r10d, 8(%rdi)
+ addl %r11d, 12(%rdi)
+ addl %r12d, 16(%rdi)
+ addl %r13d, 20(%rdi)
+ addl %r14d, 24(%rdi)
+ addl %r15d, 28(%rdi)
+ xorq %rax, %rax
+ vzeroupper
+ addq $0x40, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size Transform_Sha256_AVX1,.-Transform_Sha256_AVX1
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl Transform_Sha256_AVX1_Len
+.type Transform_Sha256_AVX1_Len,@function
+.align 4
+Transform_Sha256_AVX1_Len:
+#else
+.section __TEXT,__text
+.globl _Transform_Sha256_AVX1_Len
+.p2align 2
+_Transform_Sha256_AVX1_Len:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rbp
+ movq %rsi, %rbp
+ movq %rdx, %rsi
+ subq $0x40, %rsp
+ vmovdqa L_avx1_sha256_flip_mask(%rip), %xmm13
+ vmovdqa L_avx1_sha256_shuf_00BA(%rip), %xmm11
+ vmovdqa L_avx1_sha256_shuf_DC00(%rip), %xmm12
+ movl (%rdi), %r8d
+ movl 4(%rdi), %r9d
+ movl 8(%rdi), %r10d
+ movl 12(%rdi), %r11d
+ movl 16(%rdi), %r12d
+ movl 20(%rdi), %r13d
+ movl 24(%rdi), %r14d
+ movl 28(%rdi), %r15d
+ # Start of loop processing a block
+L_sha256_len_avx1_start:
+ # X0, X1, X2, X3 = W[0..15]
+ vmovdqu (%rbp), %xmm0
+ vmovdqu 16(%rbp), %xmm1
+ vpshufb %xmm13, %xmm0, %xmm0
+ vpshufb %xmm13, %xmm1, %xmm1
+ vmovdqu 32(%rbp), %xmm2
+ vmovdqu 48(%rbp), %xmm3
+ vpshufb %xmm13, %xmm2, %xmm2
+ vpshufb %xmm13, %xmm3, %xmm3
+ movl %r9d, %ebx
+ movl %r12d, %edx
+ xorl %r10d, %ebx
+ # set_w_k_xfer_4: 0
+ vpaddd 0+L_avx1_sha256_k(%rip), %xmm0, %xmm4
+ vpaddd 16+L_avx1_sha256_k(%rip), %xmm1, %xmm5
+ vmovdqu %xmm4, (%rsp)
+ vmovdqu %xmm5, 16(%rsp)
+ vpaddd 32+L_avx1_sha256_k(%rip), %xmm2, %xmm6
+ vpaddd 48+L_avx1_sha256_k(%rip), %xmm3, %xmm7
+ vmovdqu %xmm6, 32(%rsp)
+ vmovdqu %xmm7, 48(%rsp)
+ # msg_sched: 0-3
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm0, %xmm1, %xmm5
+ vpalignr $4, %xmm2, %xmm3, %xmm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl (%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 4(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm3, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm0, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 8(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 12(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %xmm4, %xmm9, %xmm0
+ # msg_sched done: 0-3
+ # msg_sched: 4-7
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm1, %xmm2, %xmm5
+ vpalignr $4, %xmm3, %xmm0, %xmm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 16(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 20(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm0, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm1, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 24(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 28(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %xmm4, %xmm9, %xmm1
+ # msg_sched done: 4-7
+ # msg_sched: 8-11
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm2, %xmm3, %xmm5
+ vpalignr $4, %xmm0, %xmm1, %xmm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl 32(%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 36(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm1, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm2, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 40(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 44(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %xmm4, %xmm9, %xmm2
+ # msg_sched done: 8-11
+ # msg_sched: 12-15
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm3, %xmm0, %xmm5
+ vpalignr $4, %xmm1, %xmm2, %xmm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 48(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 52(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm2, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm3, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 56(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 60(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %xmm4, %xmm9, %xmm3
+ # msg_sched done: 12-15
+ # set_w_k_xfer_4: 4
+ vpaddd 64+L_avx1_sha256_k(%rip), %xmm0, %xmm4
+ vpaddd 80+L_avx1_sha256_k(%rip), %xmm1, %xmm5
+ vmovdqu %xmm4, (%rsp)
+ vmovdqu %xmm5, 16(%rsp)
+ vpaddd 96+L_avx1_sha256_k(%rip), %xmm2, %xmm6
+ vpaddd 112+L_avx1_sha256_k(%rip), %xmm3, %xmm7
+ vmovdqu %xmm6, 32(%rsp)
+ vmovdqu %xmm7, 48(%rsp)
+ # msg_sched: 0-3
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm0, %xmm1, %xmm5
+ vpalignr $4, %xmm2, %xmm3, %xmm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl (%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 4(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm3, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm0, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 8(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 12(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %xmm4, %xmm9, %xmm0
+ # msg_sched done: 0-3
+ # msg_sched: 4-7
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm1, %xmm2, %xmm5
+ vpalignr $4, %xmm3, %xmm0, %xmm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 16(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 20(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm0, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm1, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 24(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 28(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %xmm4, %xmm9, %xmm1
+ # msg_sched done: 4-7
+ # msg_sched: 8-11
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm2, %xmm3, %xmm5
+ vpalignr $4, %xmm0, %xmm1, %xmm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl 32(%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 36(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm1, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm2, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 40(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 44(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %xmm4, %xmm9, %xmm2
+ # msg_sched done: 8-11
+ # msg_sched: 12-15
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm3, %xmm0, %xmm5
+ vpalignr $4, %xmm1, %xmm2, %xmm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 48(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 52(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm2, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm3, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 56(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 60(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %xmm4, %xmm9, %xmm3
+ # msg_sched done: 12-15
+ # set_w_k_xfer_4: 8
+ vpaddd 128+L_avx1_sha256_k(%rip), %xmm0, %xmm4
+ vpaddd 144+L_avx1_sha256_k(%rip), %xmm1, %xmm5
+ vmovdqu %xmm4, (%rsp)
+ vmovdqu %xmm5, 16(%rsp)
+ vpaddd 160+L_avx1_sha256_k(%rip), %xmm2, %xmm6
+ vpaddd 176+L_avx1_sha256_k(%rip), %xmm3, %xmm7
+ vmovdqu %xmm6, 32(%rsp)
+ vmovdqu %xmm7, 48(%rsp)
+ # msg_sched: 0-3
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm0, %xmm1, %xmm5
+ vpalignr $4, %xmm2, %xmm3, %xmm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl (%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 4(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm3, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm0, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 8(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 12(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %xmm4, %xmm9, %xmm0
+ # msg_sched done: 0-3
+ # msg_sched: 4-7
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm1, %xmm2, %xmm5
+ vpalignr $4, %xmm3, %xmm0, %xmm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 16(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 20(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm0, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm1, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 24(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 28(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %xmm4, %xmm9, %xmm1
+ # msg_sched done: 4-7
+ # msg_sched: 8-11
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm2, %xmm3, %xmm5
+ vpalignr $4, %xmm0, %xmm1, %xmm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl 32(%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 36(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm1, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm2, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 40(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 44(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %xmm4, %xmm9, %xmm2
+ # msg_sched done: 8-11
+ # msg_sched: 12-15
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %xmm3, %xmm0, %xmm5
+ vpalignr $4, %xmm1, %xmm2, %xmm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 48(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %xmm5, %xmm8
+ vpslld $14, %xmm5, %xmm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %xmm6, %xmm7, %xmm6
+ vpor %xmm8, %xmm9, %xmm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 52(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %xmm5, %xmm9
+ vpxor %xmm6, %xmm8, %xmm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %xmm6, %xmm9, %xmm5
+ vpshufd $0xfa, %xmm2, %xmm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm3, %xmm4, %xmm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 56(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %xmm6, %xmm7, %xmm6
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 60(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %xmm6, %xmm8
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %xmm6, %xmm9
+ vpxor %xmm8, %xmm7, %xmm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %xmm9, %xmm8, %xmm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %xmm4, %xmm9, %xmm3
+ # msg_sched done: 12-15
+ # set_w_k_xfer_4: 12
+ vpaddd 192+L_avx1_sha256_k(%rip), %xmm0, %xmm4
+ vpaddd 208+L_avx1_sha256_k(%rip), %xmm1, %xmm5
+ vmovdqu %xmm4, (%rsp)
+ vmovdqu %xmm5, 16(%rsp)
+ vpaddd 224+L_avx1_sha256_k(%rip), %xmm2, %xmm6
+ vpaddd 240+L_avx1_sha256_k(%rip), %xmm3, %xmm7
+ vmovdqu %xmm6, 32(%rsp)
+ vmovdqu %xmm7, 48(%rsp)
+ # rnd_all_4: 0-3
+ addl (%rsp), %r15d
+ movl %r13d, %ecx
+ movl %r9d, %eax
+ xorl %r14d, %ecx
+ rorl $14, %edx
+ andl %r12d, %ecx
+ xorl %r12d, %edx
+ xorl %r14d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r15d
+ xorl %r12d, %edx
+ xorl %r8d, %eax
+ rorl $6, %edx
+ movl %r8d, %ecx
+ addl %edx, %r15d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ addl 4(%rsp), %r14d
+ movl %r12d, %ecx
+ movl %r8d, %ebx
+ xorl %r13d, %ecx
+ rorl $14, %edx
+ andl %r11d, %ecx
+ xorl %r11d, %edx
+ xorl %r13d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r14d
+ xorl %r11d, %edx
+ xorl %r15d, %ebx
+ rorl $6, %edx
+ movl %r15d, %ecx
+ addl %edx, %r14d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ addl 8(%rsp), %r13d
+ movl %r11d, %ecx
+ movl %r15d, %eax
+ xorl %r12d, %ecx
+ rorl $14, %edx
+ andl %r10d, %ecx
+ xorl %r10d, %edx
+ xorl %r12d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r13d
+ xorl %r10d, %edx
+ xorl %r14d, %eax
+ rorl $6, %edx
+ movl %r14d, %ecx
+ addl %edx, %r13d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ addl 12(%rsp), %r12d
+ movl %r10d, %ecx
+ movl %r14d, %ebx
+ xorl %r11d, %ecx
+ rorl $14, %edx
+ andl %r9d, %ecx
+ xorl %r9d, %edx
+ xorl %r11d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r12d
+ xorl %r9d, %edx
+ xorl %r13d, %ebx
+ rorl $6, %edx
+ movl %r13d, %ecx
+ addl %edx, %r12d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ # rnd_all_4: 1-4
+ addl 16(%rsp), %r11d
+ movl %r9d, %ecx
+ movl %r13d, %eax
+ xorl %r10d, %ecx
+ rorl $14, %edx
+ andl %r8d, %ecx
+ xorl %r8d, %edx
+ xorl %r10d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r11d
+ xorl %r8d, %edx
+ xorl %r12d, %eax
+ rorl $6, %edx
+ movl %r12d, %ecx
+ addl %edx, %r11d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ addl 20(%rsp), %r10d
+ movl %r8d, %ecx
+ movl %r12d, %ebx
+ xorl %r9d, %ecx
+ rorl $14, %edx
+ andl %r15d, %ecx
+ xorl %r15d, %edx
+ xorl %r9d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r10d
+ xorl %r15d, %edx
+ xorl %r11d, %ebx
+ rorl $6, %edx
+ movl %r11d, %ecx
+ addl %edx, %r10d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ addl 24(%rsp), %r9d
+ movl %r15d, %ecx
+ movl %r11d, %eax
+ xorl %r8d, %ecx
+ rorl $14, %edx
+ andl %r14d, %ecx
+ xorl %r14d, %edx
+ xorl %r8d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r9d
+ xorl %r14d, %edx
+ xorl %r10d, %eax
+ rorl $6, %edx
+ movl %r10d, %ecx
+ addl %edx, %r9d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ addl 28(%rsp), %r8d
+ movl %r14d, %ecx
+ movl %r10d, %ebx
+ xorl %r15d, %ecx
+ rorl $14, %edx
+ andl %r13d, %ecx
+ xorl %r13d, %edx
+ xorl %r15d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r8d
+ xorl %r13d, %edx
+ xorl %r9d, %ebx
+ rorl $6, %edx
+ movl %r9d, %ecx
+ addl %edx, %r8d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ # rnd_all_4: 2-5
+ addl 32(%rsp), %r15d
+ movl %r13d, %ecx
+ movl %r9d, %eax
+ xorl %r14d, %ecx
+ rorl $14, %edx
+ andl %r12d, %ecx
+ xorl %r12d, %edx
+ xorl %r14d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r15d
+ xorl %r12d, %edx
+ xorl %r8d, %eax
+ rorl $6, %edx
+ movl %r8d, %ecx
+ addl %edx, %r15d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ addl 36(%rsp), %r14d
+ movl %r12d, %ecx
+ movl %r8d, %ebx
+ xorl %r13d, %ecx
+ rorl $14, %edx
+ andl %r11d, %ecx
+ xorl %r11d, %edx
+ xorl %r13d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r14d
+ xorl %r11d, %edx
+ xorl %r15d, %ebx
+ rorl $6, %edx
+ movl %r15d, %ecx
+ addl %edx, %r14d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ addl 40(%rsp), %r13d
+ movl %r11d, %ecx
+ movl %r15d, %eax
+ xorl %r12d, %ecx
+ rorl $14, %edx
+ andl %r10d, %ecx
+ xorl %r10d, %edx
+ xorl %r12d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r13d
+ xorl %r10d, %edx
+ xorl %r14d, %eax
+ rorl $6, %edx
+ movl %r14d, %ecx
+ addl %edx, %r13d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ addl 44(%rsp), %r12d
+ movl %r10d, %ecx
+ movl %r14d, %ebx
+ xorl %r11d, %ecx
+ rorl $14, %edx
+ andl %r9d, %ecx
+ xorl %r9d, %edx
+ xorl %r11d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r12d
+ xorl %r9d, %edx
+ xorl %r13d, %ebx
+ rorl $6, %edx
+ movl %r13d, %ecx
+ addl %edx, %r12d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ # rnd_all_4: 3-6
+ addl 48(%rsp), %r11d
+ movl %r9d, %ecx
+ movl %r13d, %eax
+ xorl %r10d, %ecx
+ rorl $14, %edx
+ andl %r8d, %ecx
+ xorl %r8d, %edx
+ xorl %r10d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r11d
+ xorl %r8d, %edx
+ xorl %r12d, %eax
+ rorl $6, %edx
+ movl %r12d, %ecx
+ addl %edx, %r11d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ addl 52(%rsp), %r10d
+ movl %r8d, %ecx
+ movl %r12d, %ebx
+ xorl %r9d, %ecx
+ rorl $14, %edx
+ andl %r15d, %ecx
+ xorl %r15d, %edx
+ xorl %r9d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r10d
+ xorl %r15d, %edx
+ xorl %r11d, %ebx
+ rorl $6, %edx
+ movl %r11d, %ecx
+ addl %edx, %r10d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ addl 56(%rsp), %r9d
+ movl %r15d, %ecx
+ movl %r11d, %eax
+ xorl %r8d, %ecx
+ rorl $14, %edx
+ andl %r14d, %ecx
+ xorl %r14d, %edx
+ xorl %r8d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r9d
+ xorl %r14d, %edx
+ xorl %r10d, %eax
+ rorl $6, %edx
+ movl %r10d, %ecx
+ addl %edx, %r9d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ addl 60(%rsp), %r8d
+ movl %r14d, %ecx
+ movl %r10d, %ebx
+ xorl %r15d, %ecx
+ rorl $14, %edx
+ andl %r13d, %ecx
+ xorl %r13d, %edx
+ xorl %r15d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r8d
+ xorl %r13d, %edx
+ xorl %r9d, %ebx
+ rorl $6, %edx
+ movl %r9d, %ecx
+ addl %edx, %r8d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ addl (%rdi), %r8d
+ addl 4(%rdi), %r9d
+ addl 8(%rdi), %r10d
+ addl 12(%rdi), %r11d
+ addl 16(%rdi), %r12d
+ addl 20(%rdi), %r13d
+ addl 24(%rdi), %r14d
+ addl 28(%rdi), %r15d
+ addq $0x40, %rbp
+ subl $0x40, %esi
+ movl %r8d, (%rdi)
+ movl %r9d, 4(%rdi)
+ movl %r10d, 8(%rdi)
+ movl %r11d, 12(%rdi)
+ movl %r12d, 16(%rdi)
+ movl %r13d, 20(%rdi)
+ movl %r14d, 24(%rdi)
+ movl %r15d, 28(%rdi)
+ jnz L_sha256_len_avx1_start
+ xorq %rax, %rax
+ vzeroupper
+ addq $0x40, %rsp
+ popq %rbp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size Transform_Sha256_AVX1_Len,.-Transform_Sha256_AVX1_Len
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+L_avx1_rorx_sha256_k:
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_rorx_sha256_shuf_00BA:
+.quad 0xb0a090803020100, 0xffffffffffffffff
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_rorx_sha256_shuf_DC00:
+.quad 0xffffffffffffffff, 0xb0a090803020100
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_rorx_sha256_flip_mask:
+.quad 0x405060700010203, 0xc0d0e0f08090a0b
+#ifndef __APPLE__
+.text
+.globl Transform_Sha256_AVX1_RORX
+.type Transform_Sha256_AVX1_RORX,@function
+.align 4
+Transform_Sha256_AVX1_RORX:
+#else
+.section __TEXT,__text
+.globl _Transform_Sha256_AVX1_RORX
+.p2align 2
+_Transform_Sha256_AVX1_RORX:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $0x40, %rsp
+ vmovdqa L_avx1_rorx_sha256_flip_mask(%rip), %xmm13
+ vmovdqa L_avx1_rorx_sha256_shuf_00BA(%rip), %xmm11
+ vmovdqa L_avx1_rorx_sha256_shuf_DC00(%rip), %xmm12
+ leaq 32(%rdi), %rax
+ # X0, X1, X2, X3 = W[0..15]
+ vmovdqu (%rax), %xmm0
+ vmovdqu 16(%rax), %xmm1
+ vpshufb %xmm13, %xmm0, %xmm0
+ vpshufb %xmm13, %xmm1, %xmm1
+ vmovdqu 32(%rax), %xmm2
+ vmovdqu 48(%rax), %xmm3
+ vpshufb %xmm13, %xmm2, %xmm2
+ vpshufb %xmm13, %xmm3, %xmm3
+ movl (%rdi), %r8d
+ movl 4(%rdi), %r9d
+ movl 8(%rdi), %r10d
+ movl 12(%rdi), %r11d
+ movl 16(%rdi), %r12d
+ movl 20(%rdi), %r13d
+ movl 24(%rdi), %r14d
+ movl 28(%rdi), %r15d
+ # set_w_k_xfer_4: 0
+ vpaddd 0+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4
+ vpaddd 16+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5
+ vmovdqu %xmm4, (%rsp)
+ vmovdqu %xmm5, 16(%rsp)
+ vpaddd 32+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6
+ vpaddd 48+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7
+ vmovdqu %xmm6, 32(%rsp)
+ vmovdqu %xmm7, 48(%rsp)
+ movl %r9d, %ebx
+ rorxl $6, %r12d, %edx
+ xorl %r10d, %ebx
+ # msg_sched: 0-3
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl (%rsp), %r15d
+ vpalignr $4, %xmm2, %xmm3, %xmm4
+ vpalignr $4, %xmm0, %xmm1, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 4(%rsp), %r14d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpshufd $0xfa, %xmm3, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ addl %r14d, %r10d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 8(%rsp), %r13d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm0, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 12(%rsp), %r12d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ addl %r12d, %r8d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vpaddd %xmm4, %xmm9, %xmm0
+ # msg_sched done: 0-3
+ # msg_sched: 4-7
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 16(%rsp), %r11d
+ vpalignr $4, %xmm3, %xmm0, %xmm4
+ vpalignr $4, %xmm1, %xmm2, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 20(%rsp), %r10d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpshufd $0xfa, %xmm0, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ addl %r10d, %r14d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 24(%rsp), %r9d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm1, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 28(%rsp), %r8d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ addl %r8d, %r12d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vpaddd %xmm4, %xmm9, %xmm1
+ # msg_sched done: 4-7
+ # msg_sched: 8-11
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl 32(%rsp), %r15d
+ vpalignr $4, %xmm0, %xmm1, %xmm4
+ vpalignr $4, %xmm2, %xmm3, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 36(%rsp), %r14d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpshufd $0xfa, %xmm1, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ addl %r14d, %r10d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 40(%rsp), %r13d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm2, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 44(%rsp), %r12d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ addl %r12d, %r8d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vpaddd %xmm4, %xmm9, %xmm2
+ # msg_sched done: 8-11
+ # msg_sched: 12-15
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 48(%rsp), %r11d
+ vpalignr $4, %xmm1, %xmm2, %xmm4
+ vpalignr $4, %xmm3, %xmm0, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 52(%rsp), %r10d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpshufd $0xfa, %xmm2, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ addl %r10d, %r14d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 56(%rsp), %r9d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm3, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 60(%rsp), %r8d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ addl %r8d, %r12d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vpaddd %xmm4, %xmm9, %xmm3
+ # msg_sched done: 12-15
+ # set_w_k_xfer_4: 4
+ vpaddd 64+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4
+ vpaddd 80+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5
+ vmovdqu %xmm4, (%rsp)
+ vmovdqu %xmm5, 16(%rsp)
+ vpaddd 96+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6
+ vpaddd 112+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7
+ vmovdqu %xmm6, 32(%rsp)
+ vmovdqu %xmm7, 48(%rsp)
+ # msg_sched: 0-3
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl (%rsp), %r15d
+ vpalignr $4, %xmm2, %xmm3, %xmm4
+ vpalignr $4, %xmm0, %xmm1, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 4(%rsp), %r14d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpshufd $0xfa, %xmm3, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ addl %r14d, %r10d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 8(%rsp), %r13d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm0, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 12(%rsp), %r12d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ addl %r12d, %r8d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vpaddd %xmm4, %xmm9, %xmm0
+ # msg_sched done: 0-3
+ # msg_sched: 4-7
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 16(%rsp), %r11d
+ vpalignr $4, %xmm3, %xmm0, %xmm4
+ vpalignr $4, %xmm1, %xmm2, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 20(%rsp), %r10d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpshufd $0xfa, %xmm0, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ addl %r10d, %r14d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 24(%rsp), %r9d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm1, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 28(%rsp), %r8d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ addl %r8d, %r12d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vpaddd %xmm4, %xmm9, %xmm1
+ # msg_sched done: 4-7
+ # msg_sched: 8-11
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl 32(%rsp), %r15d
+ vpalignr $4, %xmm0, %xmm1, %xmm4
+ vpalignr $4, %xmm2, %xmm3, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 36(%rsp), %r14d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpshufd $0xfa, %xmm1, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ addl %r14d, %r10d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 40(%rsp), %r13d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm2, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 44(%rsp), %r12d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ addl %r12d, %r8d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vpaddd %xmm4, %xmm9, %xmm2
+ # msg_sched done: 8-11
+ # msg_sched: 12-15
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 48(%rsp), %r11d
+ vpalignr $4, %xmm1, %xmm2, %xmm4
+ vpalignr $4, %xmm3, %xmm0, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 52(%rsp), %r10d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpshufd $0xfa, %xmm2, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ addl %r10d, %r14d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 56(%rsp), %r9d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm3, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 60(%rsp), %r8d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ addl %r8d, %r12d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vpaddd %xmm4, %xmm9, %xmm3
+ # msg_sched done: 12-15
+ # set_w_k_xfer_4: 8
+ vpaddd 128+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4
+ vpaddd 144+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5
+ vmovdqu %xmm4, (%rsp)
+ vmovdqu %xmm5, 16(%rsp)
+ vpaddd 160+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6
+ vpaddd 176+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7
+ vmovdqu %xmm6, 32(%rsp)
+ vmovdqu %xmm7, 48(%rsp)
+ # msg_sched: 0-3
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl (%rsp), %r15d
+ vpalignr $4, %xmm2, %xmm3, %xmm4
+ vpalignr $4, %xmm0, %xmm1, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 4(%rsp), %r14d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpshufd $0xfa, %xmm3, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ addl %r14d, %r10d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 8(%rsp), %r13d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm0, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 12(%rsp), %r12d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ addl %r12d, %r8d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vpaddd %xmm4, %xmm9, %xmm0
+ # msg_sched done: 0-3
+ # msg_sched: 4-7
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 16(%rsp), %r11d
+ vpalignr $4, %xmm3, %xmm0, %xmm4
+ vpalignr $4, %xmm1, %xmm2, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 20(%rsp), %r10d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpshufd $0xfa, %xmm0, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ addl %r10d, %r14d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 24(%rsp), %r9d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm1, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 28(%rsp), %r8d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ addl %r8d, %r12d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vpaddd %xmm4, %xmm9, %xmm1
+ # msg_sched done: 4-7
+ # msg_sched: 8-11
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl 32(%rsp), %r15d
+ vpalignr $4, %xmm0, %xmm1, %xmm4
+ vpalignr $4, %xmm2, %xmm3, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 36(%rsp), %r14d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpshufd $0xfa, %xmm1, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ addl %r14d, %r10d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 40(%rsp), %r13d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm2, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 44(%rsp), %r12d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ addl %r12d, %r8d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vpaddd %xmm4, %xmm9, %xmm2
+ # msg_sched done: 8-11
+ # msg_sched: 12-15
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 48(%rsp), %r11d
+ vpalignr $4, %xmm1, %xmm2, %xmm4
+ vpalignr $4, %xmm3, %xmm0, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 52(%rsp), %r10d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpshufd $0xfa, %xmm2, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ addl %r10d, %r14d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 56(%rsp), %r9d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm3, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 60(%rsp), %r8d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ addl %r8d, %r12d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vpaddd %xmm4, %xmm9, %xmm3
+ # msg_sched done: 12-15
+ # set_w_k_xfer_4: 12
+ vpaddd 192+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4
+ vpaddd 208+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5
+ vmovdqu %xmm4, (%rsp)
+ vmovdqu %xmm5, 16(%rsp)
+ vpaddd 224+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6
+ vpaddd 240+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7
+ vmovdqu %xmm6, 32(%rsp)
+ vmovdqu %xmm7, 48(%rsp)
+ xorl %eax, %eax
+ # rnd_all_4: 0-3
+ rorxl $6, %r12d, %edx
+ rorxl $11, %r12d, %ecx
+ addl %eax, %r8d
+ addl (%rsp), %r15d
+ movl %r13d, %eax
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ xorl %ecx, %edx
+ andl %r12d, %eax
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ rorxl $13, %r8d, %ecx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ andl %eax, %ebx
+ addl %edx, %r15d
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ rorxl $11, %r11d, %ecx
+ addl %ebx, %r15d
+ addl 4(%rsp), %r14d
+ movl %r12d, %ebx
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ xorl %ecx, %edx
+ andl %r11d, %ebx
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ rorxl $13, %r15d, %ecx
+ xorl %r13d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ addl %r14d, %r10d
+ xorl %r15d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r14d
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ rorxl $11, %r10d, %ecx
+ addl %eax, %r14d
+ addl 8(%rsp), %r13d
+ movl %r11d, %eax
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ xorl %ecx, %edx
+ andl %r10d, %eax
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ rorxl $13, %r14d, %ecx
+ xorl %r12d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ xorl %r14d, %eax
+ andl %eax, %ebx
+ addl %edx, %r13d
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ rorxl $11, %r9d, %ecx
+ addl %ebx, %r13d
+ addl 12(%rsp), %r12d
+ movl %r10d, %ebx
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ xorl %ecx, %edx
+ andl %r9d, %ebx
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ rorxl $13, %r13d, %ecx
+ xorl %r11d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ addl %r12d, %r8d
+ xorl %r13d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r12d
+ xorl %r14d, %eax
+ # rnd_all_4: 1-4
+ rorxl $6, %r8d, %edx
+ rorxl $11, %r8d, %ecx
+ addl %eax, %r12d
+ addl 16(%rsp), %r11d
+ movl %r9d, %eax
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ xorl %ecx, %edx
+ andl %r8d, %eax
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ rorxl $13, %r12d, %ecx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ andl %eax, %ebx
+ addl %edx, %r11d
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ rorxl $11, %r15d, %ecx
+ addl %ebx, %r11d
+ addl 20(%rsp), %r10d
+ movl %r8d, %ebx
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ xorl %ecx, %edx
+ andl %r15d, %ebx
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ rorxl $13, %r11d, %ecx
+ xorl %r9d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ addl %r10d, %r14d
+ xorl %r11d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r10d
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ rorxl $11, %r14d, %ecx
+ addl %eax, %r10d
+ addl 24(%rsp), %r9d
+ movl %r15d, %eax
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ xorl %ecx, %edx
+ andl %r14d, %eax
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ rorxl $13, %r10d, %ecx
+ xorl %r8d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ xorl %r10d, %eax
+ andl %eax, %ebx
+ addl %edx, %r9d
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ rorxl $11, %r13d, %ecx
+ addl %ebx, %r9d
+ addl 28(%rsp), %r8d
+ movl %r14d, %ebx
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ xorl %ecx, %edx
+ andl %r13d, %ebx
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ rorxl $13, %r9d, %ecx
+ xorl %r15d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ addl %r8d, %r12d
+ xorl %r9d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r8d
+ xorl %r10d, %eax
+ # rnd_all_4: 2-5
+ rorxl $6, %r12d, %edx
+ rorxl $11, %r12d, %ecx
+ addl %eax, %r8d
+ addl 32(%rsp), %r15d
+ movl %r13d, %eax
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ xorl %ecx, %edx
+ andl %r12d, %eax
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ rorxl $13, %r8d, %ecx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ andl %eax, %ebx
+ addl %edx, %r15d
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ rorxl $11, %r11d, %ecx
+ addl %ebx, %r15d
+ addl 36(%rsp), %r14d
+ movl %r12d, %ebx
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ xorl %ecx, %edx
+ andl %r11d, %ebx
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ rorxl $13, %r15d, %ecx
+ xorl %r13d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ addl %r14d, %r10d
+ xorl %r15d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r14d
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ rorxl $11, %r10d, %ecx
+ addl %eax, %r14d
+ addl 40(%rsp), %r13d
+ movl %r11d, %eax
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ xorl %ecx, %edx
+ andl %r10d, %eax
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ rorxl $13, %r14d, %ecx
+ xorl %r12d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ xorl %r14d, %eax
+ andl %eax, %ebx
+ addl %edx, %r13d
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ rorxl $11, %r9d, %ecx
+ addl %ebx, %r13d
+ addl 44(%rsp), %r12d
+ movl %r10d, %ebx
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ xorl %ecx, %edx
+ andl %r9d, %ebx
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ rorxl $13, %r13d, %ecx
+ xorl %r11d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ addl %r12d, %r8d
+ xorl %r13d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r12d
+ xorl %r14d, %eax
+ # rnd_all_4: 3-6
+ rorxl $6, %r8d, %edx
+ rorxl $11, %r8d, %ecx
+ addl %eax, %r12d
+ addl 48(%rsp), %r11d
+ movl %r9d, %eax
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ xorl %ecx, %edx
+ andl %r8d, %eax
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ rorxl $13, %r12d, %ecx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ andl %eax, %ebx
+ addl %edx, %r11d
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ rorxl $11, %r15d, %ecx
+ addl %ebx, %r11d
+ addl 52(%rsp), %r10d
+ movl %r8d, %ebx
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ xorl %ecx, %edx
+ andl %r15d, %ebx
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ rorxl $13, %r11d, %ecx
+ xorl %r9d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ addl %r10d, %r14d
+ xorl %r11d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r10d
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ rorxl $11, %r14d, %ecx
+ addl %eax, %r10d
+ addl 56(%rsp), %r9d
+ movl %r15d, %eax
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ xorl %ecx, %edx
+ andl %r14d, %eax
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ rorxl $13, %r10d, %ecx
+ xorl %r8d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ xorl %r10d, %eax
+ andl %eax, %ebx
+ addl %edx, %r9d
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ rorxl $11, %r13d, %ecx
+ addl %ebx, %r9d
+ addl 60(%rsp), %r8d
+ movl %r14d, %ebx
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ xorl %ecx, %edx
+ andl %r13d, %ebx
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ rorxl $13, %r9d, %ecx
+ xorl %r15d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ addl %r8d, %r12d
+ xorl %r9d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r8d
+ xorl %r10d, %eax
+ addl %eax, %r8d
+ addl %r8d, (%rdi)
+ addl %r9d, 4(%rdi)
+ addl %r10d, 8(%rdi)
+ addl %r11d, 12(%rdi)
+ addl %r12d, 16(%rdi)
+ addl %r13d, 20(%rdi)
+ addl %r14d, 24(%rdi)
+ addl %r15d, 28(%rdi)
+ xorq %rax, %rax
+ vzeroupper
+ addq $0x40, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size Transform_Sha256_AVX1_RORX,.-Transform_Sha256_AVX1_RORX
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl Transform_Sha256_AVX1_RORX_Len
+.type Transform_Sha256_AVX1_RORX_Len,@function
+.align 4
+Transform_Sha256_AVX1_RORX_Len:
+#else
+.section __TEXT,__text
+.globl _Transform_Sha256_AVX1_RORX_Len
+.p2align 2
+_Transform_Sha256_AVX1_RORX_Len:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rbp
+ movq %rsi, %rbp
+ movq %rdx, %rsi
+ subq $0x40, %rsp
+ vmovdqa L_avx1_rorx_sha256_flip_mask(%rip), %xmm13
+ vmovdqa L_avx1_rorx_sha256_shuf_00BA(%rip), %xmm11
+ vmovdqa L_avx1_rorx_sha256_shuf_DC00(%rip), %xmm12
+ movl (%rdi), %r8d
+ movl 4(%rdi), %r9d
+ movl 8(%rdi), %r10d
+ movl 12(%rdi), %r11d
+ movl 16(%rdi), %r12d
+ movl 20(%rdi), %r13d
+ movl 24(%rdi), %r14d
+ movl 28(%rdi), %r15d
+ # Start of loop processing a block
+L_sha256_len_avx1_len_rorx_start:
+ # X0, X1, X2, X3 = W[0..15]
+ vmovdqu (%rbp), %xmm0
+ vmovdqu 16(%rbp), %xmm1
+ vpshufb %xmm13, %xmm0, %xmm0
+ vpshufb %xmm13, %xmm1, %xmm1
+ vmovdqu 32(%rbp), %xmm2
+ vmovdqu 48(%rbp), %xmm3
+ vpshufb %xmm13, %xmm2, %xmm2
+ vpshufb %xmm13, %xmm3, %xmm3
+ # set_w_k_xfer_4: 0
+ vpaddd 0+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4
+ vpaddd 16+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5
+ vmovdqu %xmm4, (%rsp)
+ vmovdqu %xmm5, 16(%rsp)
+ vpaddd 32+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6
+ vpaddd 48+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7
+ vmovdqu %xmm6, 32(%rsp)
+ vmovdqu %xmm7, 48(%rsp)
+ movl %r9d, %ebx
+ rorxl $6, %r12d, %edx
+ xorl %r10d, %ebx
+ # msg_sched: 0-3
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl (%rsp), %r15d
+ vpalignr $4, %xmm2, %xmm3, %xmm4
+ vpalignr $4, %xmm0, %xmm1, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 4(%rsp), %r14d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpshufd $0xfa, %xmm3, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ addl %r14d, %r10d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 8(%rsp), %r13d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm0, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 12(%rsp), %r12d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ addl %r12d, %r8d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vpaddd %xmm4, %xmm9, %xmm0
+ # msg_sched done: 0-3
+ # msg_sched: 4-7
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 16(%rsp), %r11d
+ vpalignr $4, %xmm3, %xmm0, %xmm4
+ vpalignr $4, %xmm1, %xmm2, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 20(%rsp), %r10d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpshufd $0xfa, %xmm0, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ addl %r10d, %r14d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 24(%rsp), %r9d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm1, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 28(%rsp), %r8d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ addl %r8d, %r12d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vpaddd %xmm4, %xmm9, %xmm1
+ # msg_sched done: 4-7
+ # msg_sched: 8-11
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl 32(%rsp), %r15d
+ vpalignr $4, %xmm0, %xmm1, %xmm4
+ vpalignr $4, %xmm2, %xmm3, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 36(%rsp), %r14d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpshufd $0xfa, %xmm1, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ addl %r14d, %r10d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 40(%rsp), %r13d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm2, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 44(%rsp), %r12d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ addl %r12d, %r8d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vpaddd %xmm4, %xmm9, %xmm2
+ # msg_sched done: 8-11
+ # msg_sched: 12-15
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 48(%rsp), %r11d
+ vpalignr $4, %xmm1, %xmm2, %xmm4
+ vpalignr $4, %xmm3, %xmm0, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 52(%rsp), %r10d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpshufd $0xfa, %xmm2, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ addl %r10d, %r14d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 56(%rsp), %r9d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm3, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 60(%rsp), %r8d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ addl %r8d, %r12d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vpaddd %xmm4, %xmm9, %xmm3
+ # msg_sched done: 12-15
+ # set_w_k_xfer_4: 4
+ vpaddd 64+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4
+ vpaddd 80+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5
+ vmovdqu %xmm4, (%rsp)
+ vmovdqu %xmm5, 16(%rsp)
+ vpaddd 96+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6
+ vpaddd 112+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7
+ vmovdqu %xmm6, 32(%rsp)
+ vmovdqu %xmm7, 48(%rsp)
+ # msg_sched: 0-3
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl (%rsp), %r15d
+ vpalignr $4, %xmm2, %xmm3, %xmm4
+ vpalignr $4, %xmm0, %xmm1, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 4(%rsp), %r14d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpshufd $0xfa, %xmm3, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ addl %r14d, %r10d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 8(%rsp), %r13d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm0, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 12(%rsp), %r12d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ addl %r12d, %r8d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vpaddd %xmm4, %xmm9, %xmm0
+ # msg_sched done: 0-3
+ # msg_sched: 4-7
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 16(%rsp), %r11d
+ vpalignr $4, %xmm3, %xmm0, %xmm4
+ vpalignr $4, %xmm1, %xmm2, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 20(%rsp), %r10d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpshufd $0xfa, %xmm0, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ addl %r10d, %r14d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 24(%rsp), %r9d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm1, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 28(%rsp), %r8d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ addl %r8d, %r12d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vpaddd %xmm4, %xmm9, %xmm1
+ # msg_sched done: 4-7
+ # msg_sched: 8-11
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl 32(%rsp), %r15d
+ vpalignr $4, %xmm0, %xmm1, %xmm4
+ vpalignr $4, %xmm2, %xmm3, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 36(%rsp), %r14d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpshufd $0xfa, %xmm1, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ addl %r14d, %r10d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 40(%rsp), %r13d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm2, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 44(%rsp), %r12d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ addl %r12d, %r8d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vpaddd %xmm4, %xmm9, %xmm2
+ # msg_sched done: 8-11
+ # msg_sched: 12-15
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 48(%rsp), %r11d
+ vpalignr $4, %xmm1, %xmm2, %xmm4
+ vpalignr $4, %xmm3, %xmm0, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 52(%rsp), %r10d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpshufd $0xfa, %xmm2, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ addl %r10d, %r14d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 56(%rsp), %r9d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm3, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 60(%rsp), %r8d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ addl %r8d, %r12d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vpaddd %xmm4, %xmm9, %xmm3
+ # msg_sched done: 12-15
+ # set_w_k_xfer_4: 8
+ vpaddd 128+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4
+ vpaddd 144+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5
+ vmovdqu %xmm4, (%rsp)
+ vmovdqu %xmm5, 16(%rsp)
+ vpaddd 160+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6
+ vpaddd 176+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7
+ vmovdqu %xmm6, 32(%rsp)
+ vmovdqu %xmm7, 48(%rsp)
+ # msg_sched: 0-3
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl (%rsp), %r15d
+ vpalignr $4, %xmm2, %xmm3, %xmm4
+ vpalignr $4, %xmm0, %xmm1, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 4(%rsp), %r14d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpshufd $0xfa, %xmm3, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ addl %r14d, %r10d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 8(%rsp), %r13d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm0, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 12(%rsp), %r12d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ addl %r12d, %r8d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vpaddd %xmm4, %xmm9, %xmm0
+ # msg_sched done: 0-3
+ # msg_sched: 4-7
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 16(%rsp), %r11d
+ vpalignr $4, %xmm3, %xmm0, %xmm4
+ vpalignr $4, %xmm1, %xmm2, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 20(%rsp), %r10d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpshufd $0xfa, %xmm0, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ addl %r10d, %r14d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 24(%rsp), %r9d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm1, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 28(%rsp), %r8d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ addl %r8d, %r12d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vpaddd %xmm4, %xmm9, %xmm1
+ # msg_sched done: 4-7
+ # msg_sched: 8-11
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl 32(%rsp), %r15d
+ vpalignr $4, %xmm0, %xmm1, %xmm4
+ vpalignr $4, %xmm2, %xmm3, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 36(%rsp), %r14d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpshufd $0xfa, %xmm1, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ addl %r14d, %r10d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 40(%rsp), %r13d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm2, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 44(%rsp), %r12d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ addl %r12d, %r8d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vpaddd %xmm4, %xmm9, %xmm2
+ # msg_sched done: 8-11
+ # msg_sched: 12-15
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 48(%rsp), %r11d
+ vpalignr $4, %xmm1, %xmm2, %xmm4
+ vpalignr $4, %xmm3, %xmm0, %xmm5
+ # rnd_0: 1 - 2
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %xmm5, %xmm6
+ vpslld $25, %xmm5, %xmm7
+ # rnd_0: 3 - 4
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $3, %xmm5, %xmm8
+ vpor %xmm6, %xmm7, %xmm7
+ # rnd_0: 5 - 7
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 52(%rsp), %r10d
+ vpsrld $18, %xmm5, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpslld $14, %xmm5, %xmm5
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpxor %xmm5, %xmm7, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %xmm6, %xmm7, %xmm7
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpshufd $0xfa, %xmm2, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ addl %r10d, %r14d
+ vpxor %xmm8, %xmm7, %xmm5
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrld $10, %xmm6, %xmm8
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 56(%rsp), %r9d
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpsrlq $0x11, %xmm6, %xmm6
+ vpaddd %xmm3, %xmm4, %xmm4
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %xmm5, %xmm4, %xmm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpxor %xmm6, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufb %xmm11, %xmm8, %xmm8
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpaddd %xmm8, %xmm4, %xmm4
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 60(%rsp), %r8d
+ vpshufd $0x50, %xmm4, %xmm6
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpsrld $10, %xmm6, %xmm9
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpsrlq $19, %xmm6, %xmm7
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpsrlq $0x11, %xmm6, %xmm6
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpxor %xmm7, %xmm6, %xmm6
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ addl %r8d, %r12d
+ vpxor %xmm6, %xmm9, %xmm9
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ vpshufb %xmm12, %xmm9, %xmm9
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vpaddd %xmm4, %xmm9, %xmm3
+ # msg_sched done: 12-15
+ # set_w_k_xfer_4: 12
+ vpaddd 192+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4
+ vpaddd 208+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5
+ vmovdqu %xmm4, (%rsp)
+ vmovdqu %xmm5, 16(%rsp)
+ vpaddd 224+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6
+ vpaddd 240+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7
+ vmovdqu %xmm6, 32(%rsp)
+ vmovdqu %xmm7, 48(%rsp)
+ xorl %eax, %eax
+ xorl %ecx, %ecx
+ # rnd_all_4: 0-3
+ rorxl $6, %r12d, %edx
+ rorxl $11, %r12d, %ecx
+ addl %eax, %r8d
+ addl (%rsp), %r15d
+ movl %r13d, %eax
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ xorl %ecx, %edx
+ andl %r12d, %eax
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ rorxl $13, %r8d, %ecx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ andl %eax, %ebx
+ addl %edx, %r15d
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ rorxl $11, %r11d, %ecx
+ addl %ebx, %r15d
+ addl 4(%rsp), %r14d
+ movl %r12d, %ebx
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ xorl %ecx, %edx
+ andl %r11d, %ebx
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ rorxl $13, %r15d, %ecx
+ xorl %r13d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ addl %r14d, %r10d
+ xorl %r15d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r14d
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ rorxl $11, %r10d, %ecx
+ addl %eax, %r14d
+ addl 8(%rsp), %r13d
+ movl %r11d, %eax
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ xorl %ecx, %edx
+ andl %r10d, %eax
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ rorxl $13, %r14d, %ecx
+ xorl %r12d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ xorl %r14d, %eax
+ andl %eax, %ebx
+ addl %edx, %r13d
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ rorxl $11, %r9d, %ecx
+ addl %ebx, %r13d
+ addl 12(%rsp), %r12d
+ movl %r10d, %ebx
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ xorl %ecx, %edx
+ andl %r9d, %ebx
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ rorxl $13, %r13d, %ecx
+ xorl %r11d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ addl %r12d, %r8d
+ xorl %r13d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r12d
+ xorl %r14d, %eax
+ # rnd_all_4: 1-4
+ rorxl $6, %r8d, %edx
+ rorxl $11, %r8d, %ecx
+ addl %eax, %r12d
+ addl 16(%rsp), %r11d
+ movl %r9d, %eax
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ xorl %ecx, %edx
+ andl %r8d, %eax
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ rorxl $13, %r12d, %ecx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ andl %eax, %ebx
+ addl %edx, %r11d
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ rorxl $11, %r15d, %ecx
+ addl %ebx, %r11d
+ addl 20(%rsp), %r10d
+ movl %r8d, %ebx
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ xorl %ecx, %edx
+ andl %r15d, %ebx
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ rorxl $13, %r11d, %ecx
+ xorl %r9d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ addl %r10d, %r14d
+ xorl %r11d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r10d
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ rorxl $11, %r14d, %ecx
+ addl %eax, %r10d
+ addl 24(%rsp), %r9d
+ movl %r15d, %eax
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ xorl %ecx, %edx
+ andl %r14d, %eax
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ rorxl $13, %r10d, %ecx
+ xorl %r8d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ xorl %r10d, %eax
+ andl %eax, %ebx
+ addl %edx, %r9d
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ rorxl $11, %r13d, %ecx
+ addl %ebx, %r9d
+ addl 28(%rsp), %r8d
+ movl %r14d, %ebx
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ xorl %ecx, %edx
+ andl %r13d, %ebx
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ rorxl $13, %r9d, %ecx
+ xorl %r15d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ addl %r8d, %r12d
+ xorl %r9d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r8d
+ xorl %r10d, %eax
+ # rnd_all_4: 2-5
+ rorxl $6, %r12d, %edx
+ rorxl $11, %r12d, %ecx
+ addl %eax, %r8d
+ addl 32(%rsp), %r15d
+ movl %r13d, %eax
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ xorl %ecx, %edx
+ andl %r12d, %eax
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ rorxl $13, %r8d, %ecx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ andl %eax, %ebx
+ addl %edx, %r15d
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ rorxl $11, %r11d, %ecx
+ addl %ebx, %r15d
+ addl 36(%rsp), %r14d
+ movl %r12d, %ebx
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ xorl %ecx, %edx
+ andl %r11d, %ebx
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ rorxl $13, %r15d, %ecx
+ xorl %r13d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ addl %r14d, %r10d
+ xorl %r15d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r14d
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ rorxl $11, %r10d, %ecx
+ addl %eax, %r14d
+ addl 40(%rsp), %r13d
+ movl %r11d, %eax
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ xorl %ecx, %edx
+ andl %r10d, %eax
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ rorxl $13, %r14d, %ecx
+ xorl %r12d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ xorl %r14d, %eax
+ andl %eax, %ebx
+ addl %edx, %r13d
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ rorxl $11, %r9d, %ecx
+ addl %ebx, %r13d
+ addl 44(%rsp), %r12d
+ movl %r10d, %ebx
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ xorl %ecx, %edx
+ andl %r9d, %ebx
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ rorxl $13, %r13d, %ecx
+ xorl %r11d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ addl %r12d, %r8d
+ xorl %r13d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r12d
+ xorl %r14d, %eax
+ # rnd_all_4: 3-6
+ rorxl $6, %r8d, %edx
+ rorxl $11, %r8d, %ecx
+ addl %eax, %r12d
+ addl 48(%rsp), %r11d
+ movl %r9d, %eax
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ xorl %ecx, %edx
+ andl %r8d, %eax
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ rorxl $13, %r12d, %ecx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ andl %eax, %ebx
+ addl %edx, %r11d
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ rorxl $11, %r15d, %ecx
+ addl %ebx, %r11d
+ addl 52(%rsp), %r10d
+ movl %r8d, %ebx
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ xorl %ecx, %edx
+ andl %r15d, %ebx
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ rorxl $13, %r11d, %ecx
+ xorl %r9d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ addl %r10d, %r14d
+ xorl %r11d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r10d
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ rorxl $11, %r14d, %ecx
+ addl %eax, %r10d
+ addl 56(%rsp), %r9d
+ movl %r15d, %eax
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ xorl %ecx, %edx
+ andl %r14d, %eax
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ rorxl $13, %r10d, %ecx
+ xorl %r8d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ xorl %r10d, %eax
+ andl %eax, %ebx
+ addl %edx, %r9d
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ rorxl $11, %r13d, %ecx
+ addl %ebx, %r9d
+ addl 60(%rsp), %r8d
+ movl %r14d, %ebx
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ xorl %ecx, %edx
+ andl %r13d, %ebx
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ rorxl $13, %r9d, %ecx
+ xorl %r15d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ addl %r8d, %r12d
+ xorl %r9d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r8d
+ xorl %r10d, %eax
+ addl %eax, %r8d
+ addl (%rdi), %r8d
+ addl 4(%rdi), %r9d
+ addl 8(%rdi), %r10d
+ addl 12(%rdi), %r11d
+ addl 16(%rdi), %r12d
+ addl 20(%rdi), %r13d
+ addl 24(%rdi), %r14d
+ addl 28(%rdi), %r15d
+ addq $0x40, %rbp
+ subl $0x40, %esi
+ movl %r8d, (%rdi)
+ movl %r9d, 4(%rdi)
+ movl %r10d, 8(%rdi)
+ movl %r11d, 12(%rdi)
+ movl %r12d, 16(%rdi)
+ movl %r13d, 20(%rdi)
+ movl %r14d, 24(%rdi)
+ movl %r15d, 28(%rdi)
+ jnz L_sha256_len_avx1_len_rorx_start
+ xorq %rax, %rax
+ vzeroupper
+ addq $0x40, %rsp
+ popq %rbp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size Transform_Sha256_AVX1_RORX_Len,.-Transform_Sha256_AVX1_RORX_Len
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX1 */
+#ifdef HAVE_INTEL_AVX2
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+L_avx2_sha256_k:
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc
+.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967
+.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 32
+#else
+.p2align 5
+#endif /* __APPLE__ */
+L_avx2_sha256_shuf_00BA:
+.quad 0xb0a090803020100, 0xffffffffffffffff
+.quad 0xb0a090803020100, 0xffffffffffffffff
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 32
+#else
+.p2align 5
+#endif /* __APPLE__ */
+L_avx2_sha256_shuf_DC00:
+.quad 0xffffffffffffffff, 0xb0a090803020100
+.quad 0xffffffffffffffff, 0xb0a090803020100
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 32
+#else
+.p2align 5
+#endif /* __APPLE__ */
+L_avx2_sha256_flip_mask:
+.quad 0x405060700010203, 0xc0d0e0f08090a0b
+.quad 0x405060700010203, 0xc0d0e0f08090a0b
+#ifndef __APPLE__
+.text
+.globl Transform_Sha256_AVX2
+.type Transform_Sha256_AVX2,@function
+.align 4
+Transform_Sha256_AVX2:
+#else
+.section __TEXT,__text
+.globl _Transform_Sha256_AVX2
+.p2align 2
+_Transform_Sha256_AVX2:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $0x200, %rsp
+ leaq 32(%rdi), %rax
+ vmovdqa L_avx2_sha256_flip_mask(%rip), %xmm13
+ vmovdqa L_avx2_sha256_shuf_00BA(%rip), %ymm11
+ vmovdqa L_avx2_sha256_shuf_DC00(%rip), %ymm12
+ movl (%rdi), %r8d
+ movl 4(%rdi), %r9d
+ movl 8(%rdi), %r10d
+ movl 12(%rdi), %r11d
+ movl 16(%rdi), %r12d
+ movl 20(%rdi), %r13d
+ movl 24(%rdi), %r14d
+ movl 28(%rdi), %r15d
+ # X0, X1, X2, X3 = W[0..15]
+ vmovdqu (%rax), %xmm0
+ vmovdqu 16(%rax), %xmm1
+ vpshufb %xmm13, %xmm0, %xmm0
+ vpshufb %xmm13, %xmm1, %xmm1
+ vmovdqu 32(%rax), %xmm2
+ vmovdqu 48(%rax), %xmm3
+ vpshufb %xmm13, %xmm2, %xmm2
+ vpshufb %xmm13, %xmm3, %xmm3
+ movl %r9d, %ebx
+ movl %r12d, %edx
+ xorl %r10d, %ebx
+ # set_w_k_xfer_4: 0
+ vpaddd 0+L_avx2_sha256_k(%rip), %ymm0, %ymm4
+ vpaddd 32+L_avx2_sha256_k(%rip), %ymm1, %ymm5
+ vmovdqu %ymm4, (%rsp)
+ vmovdqu %ymm5, 32(%rsp)
+ vpaddd 64+L_avx2_sha256_k(%rip), %ymm2, %ymm4
+ vpaddd 96+L_avx2_sha256_k(%rip), %ymm3, %ymm5
+ vmovdqu %ymm4, 64(%rsp)
+ vmovdqu %ymm5, 96(%rsp)
+ # msg_sched: 0-3
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm0, %ymm1, %ymm5
+ vpalignr $4, %ymm2, %ymm3, %ymm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl (%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 4(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm3, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm0, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 8(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 12(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm0
+ # msg_sched done: 0-3
+ # msg_sched: 8-11
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm1, %ymm2, %ymm5
+ vpalignr $4, %ymm3, %ymm0, %ymm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 32(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 36(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm0, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm1, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 40(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 44(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm1
+ # msg_sched done: 8-11
+ # msg_sched: 16-19
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm2, %ymm3, %ymm5
+ vpalignr $4, %ymm0, %ymm1, %ymm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl 64(%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 68(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm1, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm2, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 72(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 76(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm2
+ # msg_sched done: 16-19
+ # msg_sched: 24-27
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm3, %ymm0, %ymm5
+ vpalignr $4, %ymm1, %ymm2, %ymm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 96(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 100(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm2, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm3, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 104(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 108(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm3
+ # msg_sched done: 24-27
+ # set_w_k_xfer_4: 4
+ vpaddd 128+L_avx2_sha256_k(%rip), %ymm0, %ymm4
+ vpaddd 160+L_avx2_sha256_k(%rip), %ymm1, %ymm5
+ vmovdqu %ymm4, 128(%rsp)
+ vmovdqu %ymm5, 160(%rsp)
+ vpaddd 192+L_avx2_sha256_k(%rip), %ymm2, %ymm4
+ vpaddd 224+L_avx2_sha256_k(%rip), %ymm3, %ymm5
+ vmovdqu %ymm4, 192(%rsp)
+ vmovdqu %ymm5, 224(%rsp)
+ # msg_sched: 32-35
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm0, %ymm1, %ymm5
+ vpalignr $4, %ymm2, %ymm3, %ymm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl 128(%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 132(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm3, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm0, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 136(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 140(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm0
+ # msg_sched done: 32-35
+ # msg_sched: 40-43
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm1, %ymm2, %ymm5
+ vpalignr $4, %ymm3, %ymm0, %ymm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 160(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 164(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm0, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm1, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 168(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 172(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm1
+ # msg_sched done: 40-43
+ # msg_sched: 48-51
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm2, %ymm3, %ymm5
+ vpalignr $4, %ymm0, %ymm1, %ymm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl 192(%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 196(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm1, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm2, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 200(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 204(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm2
+ # msg_sched done: 48-51
+ # msg_sched: 56-59
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm3, %ymm0, %ymm5
+ vpalignr $4, %ymm1, %ymm2, %ymm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 224(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 228(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm2, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm3, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 232(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 236(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm3
+ # msg_sched done: 56-59
+ # set_w_k_xfer_4: 8
+ vpaddd 256+L_avx2_sha256_k(%rip), %ymm0, %ymm4
+ vpaddd 288+L_avx2_sha256_k(%rip), %ymm1, %ymm5
+ vmovdqu %ymm4, 256(%rsp)
+ vmovdqu %ymm5, 288(%rsp)
+ vpaddd 320+L_avx2_sha256_k(%rip), %ymm2, %ymm4
+ vpaddd 352+L_avx2_sha256_k(%rip), %ymm3, %ymm5
+ vmovdqu %ymm4, 320(%rsp)
+ vmovdqu %ymm5, 352(%rsp)
+ # msg_sched: 64-67
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm0, %ymm1, %ymm5
+ vpalignr $4, %ymm2, %ymm3, %ymm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl 256(%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 260(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm3, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm0, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 264(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 268(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm0
+ # msg_sched done: 64-67
+ # msg_sched: 72-75
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm1, %ymm2, %ymm5
+ vpalignr $4, %ymm3, %ymm0, %ymm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 288(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 292(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm0, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm1, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 296(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 300(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm1
+ # msg_sched done: 72-75
+ # msg_sched: 80-83
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm2, %ymm3, %ymm5
+ vpalignr $4, %ymm0, %ymm1, %ymm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl 320(%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 324(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm1, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm2, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 328(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 332(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm2
+ # msg_sched done: 80-83
+ # msg_sched: 88-91
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm3, %ymm0, %ymm5
+ vpalignr $4, %ymm1, %ymm2, %ymm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 352(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 356(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm2, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm3, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 360(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 364(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm3
+ # msg_sched done: 88-91
+ # set_w_k_xfer_4: 12
+ vpaddd 384+L_avx2_sha256_k(%rip), %ymm0, %ymm4
+ vpaddd 416+L_avx2_sha256_k(%rip), %ymm1, %ymm5
+ vmovdqu %ymm4, 384(%rsp)
+ vmovdqu %ymm5, 416(%rsp)
+ vpaddd 448+L_avx2_sha256_k(%rip), %ymm2, %ymm4
+ vpaddd 480+L_avx2_sha256_k(%rip), %ymm3, %ymm5
+ vmovdqu %ymm4, 448(%rsp)
+ vmovdqu %ymm5, 480(%rsp)
+ # rnd_all_4: 24-27
+ addl 384(%rsp), %r15d
+ movl %r13d, %ecx
+ movl %r9d, %eax
+ xorl %r14d, %ecx
+ rorl $14, %edx
+ andl %r12d, %ecx
+ xorl %r12d, %edx
+ xorl %r14d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r15d
+ xorl %r12d, %edx
+ xorl %r8d, %eax
+ rorl $6, %edx
+ movl %r8d, %ecx
+ addl %edx, %r15d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ addl 388(%rsp), %r14d
+ movl %r12d, %ecx
+ movl %r8d, %ebx
+ xorl %r13d, %ecx
+ rorl $14, %edx
+ andl %r11d, %ecx
+ xorl %r11d, %edx
+ xorl %r13d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r14d
+ xorl %r11d, %edx
+ xorl %r15d, %ebx
+ rorl $6, %edx
+ movl %r15d, %ecx
+ addl %edx, %r14d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ addl 392(%rsp), %r13d
+ movl %r11d, %ecx
+ movl %r15d, %eax
+ xorl %r12d, %ecx
+ rorl $14, %edx
+ andl %r10d, %ecx
+ xorl %r10d, %edx
+ xorl %r12d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r13d
+ xorl %r10d, %edx
+ xorl %r14d, %eax
+ rorl $6, %edx
+ movl %r14d, %ecx
+ addl %edx, %r13d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ addl 396(%rsp), %r12d
+ movl %r10d, %ecx
+ movl %r14d, %ebx
+ xorl %r11d, %ecx
+ rorl $14, %edx
+ andl %r9d, %ecx
+ xorl %r9d, %edx
+ xorl %r11d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r12d
+ xorl %r9d, %edx
+ xorl %r13d, %ebx
+ rorl $6, %edx
+ movl %r13d, %ecx
+ addl %edx, %r12d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ # rnd_all_4: 26-29
+ addl 416(%rsp), %r11d
+ movl %r9d, %ecx
+ movl %r13d, %eax
+ xorl %r10d, %ecx
+ rorl $14, %edx
+ andl %r8d, %ecx
+ xorl %r8d, %edx
+ xorl %r10d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r11d
+ xorl %r8d, %edx
+ xorl %r12d, %eax
+ rorl $6, %edx
+ movl %r12d, %ecx
+ addl %edx, %r11d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ addl 420(%rsp), %r10d
+ movl %r8d, %ecx
+ movl %r12d, %ebx
+ xorl %r9d, %ecx
+ rorl $14, %edx
+ andl %r15d, %ecx
+ xorl %r15d, %edx
+ xorl %r9d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r10d
+ xorl %r15d, %edx
+ xorl %r11d, %ebx
+ rorl $6, %edx
+ movl %r11d, %ecx
+ addl %edx, %r10d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ addl 424(%rsp), %r9d
+ movl %r15d, %ecx
+ movl %r11d, %eax
+ xorl %r8d, %ecx
+ rorl $14, %edx
+ andl %r14d, %ecx
+ xorl %r14d, %edx
+ xorl %r8d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r9d
+ xorl %r14d, %edx
+ xorl %r10d, %eax
+ rorl $6, %edx
+ movl %r10d, %ecx
+ addl %edx, %r9d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ addl 428(%rsp), %r8d
+ movl %r14d, %ecx
+ movl %r10d, %ebx
+ xorl %r15d, %ecx
+ rorl $14, %edx
+ andl %r13d, %ecx
+ xorl %r13d, %edx
+ xorl %r15d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r8d
+ xorl %r13d, %edx
+ xorl %r9d, %ebx
+ rorl $6, %edx
+ movl %r9d, %ecx
+ addl %edx, %r8d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ # rnd_all_4: 28-31
+ addl 448(%rsp), %r15d
+ movl %r13d, %ecx
+ movl %r9d, %eax
+ xorl %r14d, %ecx
+ rorl $14, %edx
+ andl %r12d, %ecx
+ xorl %r12d, %edx
+ xorl %r14d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r15d
+ xorl %r12d, %edx
+ xorl %r8d, %eax
+ rorl $6, %edx
+ movl %r8d, %ecx
+ addl %edx, %r15d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ addl 452(%rsp), %r14d
+ movl %r12d, %ecx
+ movl %r8d, %ebx
+ xorl %r13d, %ecx
+ rorl $14, %edx
+ andl %r11d, %ecx
+ xorl %r11d, %edx
+ xorl %r13d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r14d
+ xorl %r11d, %edx
+ xorl %r15d, %ebx
+ rorl $6, %edx
+ movl %r15d, %ecx
+ addl %edx, %r14d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ addl 456(%rsp), %r13d
+ movl %r11d, %ecx
+ movl %r15d, %eax
+ xorl %r12d, %ecx
+ rorl $14, %edx
+ andl %r10d, %ecx
+ xorl %r10d, %edx
+ xorl %r12d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r13d
+ xorl %r10d, %edx
+ xorl %r14d, %eax
+ rorl $6, %edx
+ movl %r14d, %ecx
+ addl %edx, %r13d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ addl 460(%rsp), %r12d
+ movl %r10d, %ecx
+ movl %r14d, %ebx
+ xorl %r11d, %ecx
+ rorl $14, %edx
+ andl %r9d, %ecx
+ xorl %r9d, %edx
+ xorl %r11d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r12d
+ xorl %r9d, %edx
+ xorl %r13d, %ebx
+ rorl $6, %edx
+ movl %r13d, %ecx
+ addl %edx, %r12d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ # rnd_all_4: 30-33
+ addl 480(%rsp), %r11d
+ movl %r9d, %ecx
+ movl %r13d, %eax
+ xorl %r10d, %ecx
+ rorl $14, %edx
+ andl %r8d, %ecx
+ xorl %r8d, %edx
+ xorl %r10d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r11d
+ xorl %r8d, %edx
+ xorl %r12d, %eax
+ rorl $6, %edx
+ movl %r12d, %ecx
+ addl %edx, %r11d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ addl 484(%rsp), %r10d
+ movl %r8d, %ecx
+ movl %r12d, %ebx
+ xorl %r9d, %ecx
+ rorl $14, %edx
+ andl %r15d, %ecx
+ xorl %r15d, %edx
+ xorl %r9d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r10d
+ xorl %r15d, %edx
+ xorl %r11d, %ebx
+ rorl $6, %edx
+ movl %r11d, %ecx
+ addl %edx, %r10d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ addl 488(%rsp), %r9d
+ movl %r15d, %ecx
+ movl %r11d, %eax
+ xorl %r8d, %ecx
+ rorl $14, %edx
+ andl %r14d, %ecx
+ xorl %r14d, %edx
+ xorl %r8d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r9d
+ xorl %r14d, %edx
+ xorl %r10d, %eax
+ rorl $6, %edx
+ movl %r10d, %ecx
+ addl %edx, %r9d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ addl 492(%rsp), %r8d
+ movl %r14d, %ecx
+ movl %r10d, %ebx
+ xorl %r15d, %ecx
+ rorl $14, %edx
+ andl %r13d, %ecx
+ xorl %r13d, %edx
+ xorl %r15d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r8d
+ xorl %r13d, %edx
+ xorl %r9d, %ebx
+ rorl $6, %edx
+ movl %r9d, %ecx
+ addl %edx, %r8d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ addl %r8d, (%rdi)
+ addl %r9d, 4(%rdi)
+ addl %r10d, 8(%rdi)
+ addl %r11d, 12(%rdi)
+ addl %r12d, 16(%rdi)
+ addl %r13d, 20(%rdi)
+ addl %r14d, 24(%rdi)
+ addl %r15d, 28(%rdi)
+ xorq %rax, %rax
+ vzeroupper
+ addq $0x200, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size Transform_Sha256_AVX2,.-Transform_Sha256_AVX2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl Transform_Sha256_AVX2_Len
+.type Transform_Sha256_AVX2_Len,@function
+.align 4
+Transform_Sha256_AVX2_Len:
+#else
+.section __TEXT,__text
+.globl _Transform_Sha256_AVX2_Len
+.p2align 2
+_Transform_Sha256_AVX2_Len:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rbp
+ movq %rsi, %rbp
+ movq %rdx, %rsi
+ subq $0x200, %rsp
+ testb $0x40, %sil
+ je L_sha256_len_avx2_block
+ vmovdqu (%rbp), %ymm0
+ vmovdqu 32(%rbp), %ymm1
+ vmovups %ymm0, 32(%rdi)
+ vmovups %ymm1, 64(%rdi)
+#ifndef __APPLE__
+ call Transform_Sha256_AVX2@plt
+#else
+ call _Transform_Sha256_AVX2
+#endif /* __APPLE__ */
+ addq $0x40, %rbp
+ subl $0x40, %esi
+ jz L_sha256_len_avx2_done
+L_sha256_len_avx2_block:
+ vmovdqa L_avx2_sha256_flip_mask(%rip), %ymm13
+ vmovdqa L_avx2_sha256_shuf_00BA(%rip), %ymm11
+ vmovdqa L_avx2_sha256_shuf_DC00(%rip), %ymm12
+ movl (%rdi), %r8d
+ movl 4(%rdi), %r9d
+ movl 8(%rdi), %r10d
+ movl 12(%rdi), %r11d
+ movl 16(%rdi), %r12d
+ movl 20(%rdi), %r13d
+ movl 24(%rdi), %r14d
+ movl 28(%rdi), %r15d
+ # Start of loop processing two blocks
+L_sha256_len_avx2_start:
+ # X0, X1, X2, X3 = W[0..15]
+ vmovdqu (%rbp), %xmm0
+ vmovdqu 16(%rbp), %xmm1
+ vmovdqu 64(%rbp), %xmm4
+ vmovdqu 80(%rbp), %xmm5
+ vinserti128 $0x01, %xmm4, %ymm0, %ymm0
+ vinserti128 $0x01, %xmm5, %ymm1, %ymm1
+ vpshufb %ymm13, %ymm0, %ymm0
+ vpshufb %ymm13, %ymm1, %ymm1
+ vmovdqu 32(%rbp), %xmm2
+ vmovdqu 48(%rbp), %xmm3
+ vmovdqu 96(%rbp), %xmm6
+ vmovdqu 112(%rbp), %xmm7
+ vinserti128 $0x01, %xmm6, %ymm2, %ymm2
+ vinserti128 $0x01, %xmm7, %ymm3, %ymm3
+ vpshufb %ymm13, %ymm2, %ymm2
+ vpshufb %ymm13, %ymm3, %ymm3
+ movl %r9d, %ebx
+ movl %r12d, %edx
+ xorl %r10d, %ebx
+ # set_w_k_xfer_4: 0
+ vpaddd 0+L_avx2_sha256_k(%rip), %ymm0, %ymm4
+ vpaddd 32+L_avx2_sha256_k(%rip), %ymm1, %ymm5
+ vmovdqu %ymm4, (%rsp)
+ vmovdqu %ymm5, 32(%rsp)
+ vpaddd 64+L_avx2_sha256_k(%rip), %ymm2, %ymm4
+ vpaddd 96+L_avx2_sha256_k(%rip), %ymm3, %ymm5
+ vmovdqu %ymm4, 64(%rsp)
+ vmovdqu %ymm5, 96(%rsp)
+ # msg_sched: 0-3
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm0, %ymm1, %ymm5
+ vpalignr $4, %ymm2, %ymm3, %ymm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl (%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 4(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm3, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm0, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 8(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 12(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm0
+ # msg_sched done: 0-3
+ # msg_sched: 8-11
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm1, %ymm2, %ymm5
+ vpalignr $4, %ymm3, %ymm0, %ymm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 32(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 36(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm0, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm1, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 40(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 44(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm1
+ # msg_sched done: 8-11
+ # msg_sched: 16-19
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm2, %ymm3, %ymm5
+ vpalignr $4, %ymm0, %ymm1, %ymm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl 64(%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 68(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm1, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm2, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 72(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 76(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm2
+ # msg_sched done: 16-19
+ # msg_sched: 24-27
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm3, %ymm0, %ymm5
+ vpalignr $4, %ymm1, %ymm2, %ymm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 96(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 100(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm2, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm3, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 104(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 108(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm3
+ # msg_sched done: 24-27
+ # set_w_k_xfer_4: 4
+ vpaddd 128+L_avx2_sha256_k(%rip), %ymm0, %ymm4
+ vpaddd 160+L_avx2_sha256_k(%rip), %ymm1, %ymm5
+ vmovdqu %ymm4, 128(%rsp)
+ vmovdqu %ymm5, 160(%rsp)
+ vpaddd 192+L_avx2_sha256_k(%rip), %ymm2, %ymm4
+ vpaddd 224+L_avx2_sha256_k(%rip), %ymm3, %ymm5
+ vmovdqu %ymm4, 192(%rsp)
+ vmovdqu %ymm5, 224(%rsp)
+ # msg_sched: 32-35
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm0, %ymm1, %ymm5
+ vpalignr $4, %ymm2, %ymm3, %ymm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl 128(%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 132(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm3, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm0, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 136(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 140(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm0
+ # msg_sched done: 32-35
+ # msg_sched: 40-43
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm1, %ymm2, %ymm5
+ vpalignr $4, %ymm3, %ymm0, %ymm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 160(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 164(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm0, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm1, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 168(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 172(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm1
+ # msg_sched done: 40-43
+ # msg_sched: 48-51
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm2, %ymm3, %ymm5
+ vpalignr $4, %ymm0, %ymm1, %ymm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl 192(%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 196(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm1, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm2, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 200(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 204(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm2
+ # msg_sched done: 48-51
+ # msg_sched: 56-59
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm3, %ymm0, %ymm5
+ vpalignr $4, %ymm1, %ymm2, %ymm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 224(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 228(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm2, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm3, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 232(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 236(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm3
+ # msg_sched done: 56-59
+ # set_w_k_xfer_4: 8
+ vpaddd 256+L_avx2_sha256_k(%rip), %ymm0, %ymm4
+ vpaddd 288+L_avx2_sha256_k(%rip), %ymm1, %ymm5
+ vmovdqu %ymm4, 256(%rsp)
+ vmovdqu %ymm5, 288(%rsp)
+ vpaddd 320+L_avx2_sha256_k(%rip), %ymm2, %ymm4
+ vpaddd 352+L_avx2_sha256_k(%rip), %ymm3, %ymm5
+ vmovdqu %ymm4, 320(%rsp)
+ vmovdqu %ymm5, 352(%rsp)
+ # msg_sched: 64-67
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm0, %ymm1, %ymm5
+ vpalignr $4, %ymm2, %ymm3, %ymm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl 256(%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 260(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm3, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm0, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 264(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 268(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm0
+ # msg_sched done: 64-67
+ # msg_sched: 72-75
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm1, %ymm2, %ymm5
+ vpalignr $4, %ymm3, %ymm0, %ymm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 288(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 292(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm0, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm1, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 296(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 300(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm1
+ # msg_sched done: 72-75
+ # msg_sched: 80-83
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm2, %ymm3, %ymm5
+ vpalignr $4, %ymm0, %ymm1, %ymm4
+ # rnd_0: 1 - 2
+ movl %r9d, %eax
+ movl %r13d, %ecx
+ addl 320(%rsp), %r15d
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ andl %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r14d, %ecx
+ xorl %r12d, %edx
+ addl %ecx, %r15d
+ rorl $6, %edx
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ movl %r8d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r8d, %ebx
+ movl %r12d, %ecx
+ addl 324(%rsp), %r14d
+ xorl %r13d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r11d, %edx
+ andl %r11d, %ecx
+ rorl $5, %edx
+ xorl %r13d, %ecx
+ xorl %r11d, %edx
+ addl %ecx, %r14d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm1, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ movl %r15d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm2, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r15d, %eax
+ movl %r11d, %ecx
+ addl 328(%rsp), %r13d
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ andl %r10d, %ecx
+ rorl $5, %edx
+ xorl %r12d, %ecx
+ xorl %r10d, %edx
+ addl %ecx, %r13d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ movl %r14d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r14d, %ebx
+ movl %r10d, %ecx
+ addl 332(%rsp), %r12d
+ xorl %r11d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r9d, %edx
+ andl %r9d, %ecx
+ rorl $5, %edx
+ xorl %r11d, %ecx
+ xorl %r9d, %edx
+ addl %ecx, %r12d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ movl %r13d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm2
+ # msg_sched done: 80-83
+ # msg_sched: 88-91
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpalignr $4, %ymm3, %ymm0, %ymm5
+ vpalignr $4, %ymm1, %ymm2, %ymm4
+ # rnd_0: 1 - 2
+ movl %r13d, %eax
+ movl %r9d, %ecx
+ addl 352(%rsp), %r11d
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ andl %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 3 - 4
+ rorl $5, %edx
+ xorl %r10d, %ecx
+ xorl %r8d, %edx
+ addl %ecx, %r11d
+ rorl $6, %edx
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ movl %r12d, %ecx
+ vpsrld $18, %ymm5, %ymm8
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 5 - 6
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ vpor %ymm6, %ymm7, %ymm6
+ vpor %ymm8, %ymm9, %ymm8
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ # rnd_1: 0 - 1
+ rorl $14, %edx
+ movl %r12d, %ebx
+ movl %r8d, %ecx
+ addl 356(%rsp), %r10d
+ xorl %r9d, %ecx
+ vpsrld $3, %ymm5, %ymm9
+ vpxor %ymm6, %ymm8, %ymm6
+ # rnd_1: 2 - 3
+ xorl %r15d, %edx
+ andl %r15d, %ecx
+ rorl $5, %edx
+ xorl %r9d, %ecx
+ xorl %r15d, %edx
+ addl %ecx, %r10d
+ vpxor %ymm6, %ymm9, %ymm5
+ vpshufd $0xfa, %ymm2, %ymm6
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ movl %r11d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ vpsrld $10, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 6 - 7
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ # rnd_0: 0 - 0
+ rorl $14, %edx
+ vpsrlq $0x11, %ymm6, %ymm6
+ vpaddd %ymm3, %ymm4, %ymm4
+ # rnd_0: 1 - 3
+ movl %r11d, %eax
+ movl %r15d, %ecx
+ addl 360(%rsp), %r9d
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ andl %r14d, %ecx
+ rorl $5, %edx
+ xorl %r8d, %ecx
+ xorl %r14d, %edx
+ addl %ecx, %r9d
+ vpxor %ymm6, %ymm7, %ymm6
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 4 - 4
+ rorl $6, %edx
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ movl %r10d, %ecx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 5 - 5
+ andl %eax, %ebx
+ rorl $9, %ecx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 6 - 6
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 7 - 7
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ # rnd_1: 0 - 0
+ rorl $14, %edx
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_1: 1 - 1
+ movl %r10d, %ebx
+ movl %r14d, %ecx
+ addl 364(%rsp), %r8d
+ xorl %r15d, %ecx
+ vpsrlq $0x11, %ymm6, %ymm8
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 2 - 3
+ xorl %r13d, %edx
+ andl %r13d, %ecx
+ rorl $5, %edx
+ xorl %r15d, %ecx
+ xorl %r13d, %edx
+ addl %ecx, %r8d
+ vpsrld $10, %ymm6, %ymm9
+ vpxor %ymm8, %ymm7, %ymm8
+ # rnd_1: 4 - 5
+ rorl $6, %edx
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ movl %r9d, %ecx
+ andl %ebx, %eax
+ rorl $9, %ecx
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ vpxor %ymm9, %ymm8, %ymm9
+ # rnd_1: 6 - 6
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 7 - 7
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm3
+ # msg_sched done: 88-91
+ # set_w_k_xfer_4: 12
+ vpaddd 384+L_avx2_sha256_k(%rip), %ymm0, %ymm4
+ vpaddd 416+L_avx2_sha256_k(%rip), %ymm1, %ymm5
+ vmovdqu %ymm4, 384(%rsp)
+ vmovdqu %ymm5, 416(%rsp)
+ vpaddd 448+L_avx2_sha256_k(%rip), %ymm2, %ymm4
+ vpaddd 480+L_avx2_sha256_k(%rip), %ymm3, %ymm5
+ vmovdqu %ymm4, 448(%rsp)
+ vmovdqu %ymm5, 480(%rsp)
+ # rnd_all_4: 24-27
+ addl 384(%rsp), %r15d
+ movl %r13d, %ecx
+ movl %r9d, %eax
+ xorl %r14d, %ecx
+ rorl $14, %edx
+ andl %r12d, %ecx
+ xorl %r12d, %edx
+ xorl %r14d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r15d
+ xorl %r12d, %edx
+ xorl %r8d, %eax
+ rorl $6, %edx
+ movl %r8d, %ecx
+ addl %edx, %r15d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ addl 388(%rsp), %r14d
+ movl %r12d, %ecx
+ movl %r8d, %ebx
+ xorl %r13d, %ecx
+ rorl $14, %edx
+ andl %r11d, %ecx
+ xorl %r11d, %edx
+ xorl %r13d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r14d
+ xorl %r11d, %edx
+ xorl %r15d, %ebx
+ rorl $6, %edx
+ movl %r15d, %ecx
+ addl %edx, %r14d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ addl 392(%rsp), %r13d
+ movl %r11d, %ecx
+ movl %r15d, %eax
+ xorl %r12d, %ecx
+ rorl $14, %edx
+ andl %r10d, %ecx
+ xorl %r10d, %edx
+ xorl %r12d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r13d
+ xorl %r10d, %edx
+ xorl %r14d, %eax
+ rorl $6, %edx
+ movl %r14d, %ecx
+ addl %edx, %r13d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ addl 396(%rsp), %r12d
+ movl %r10d, %ecx
+ movl %r14d, %ebx
+ xorl %r11d, %ecx
+ rorl $14, %edx
+ andl %r9d, %ecx
+ xorl %r9d, %edx
+ xorl %r11d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r12d
+ xorl %r9d, %edx
+ xorl %r13d, %ebx
+ rorl $6, %edx
+ movl %r13d, %ecx
+ addl %edx, %r12d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ # rnd_all_4: 26-29
+ addl 416(%rsp), %r11d
+ movl %r9d, %ecx
+ movl %r13d, %eax
+ xorl %r10d, %ecx
+ rorl $14, %edx
+ andl %r8d, %ecx
+ xorl %r8d, %edx
+ xorl %r10d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r11d
+ xorl %r8d, %edx
+ xorl %r12d, %eax
+ rorl $6, %edx
+ movl %r12d, %ecx
+ addl %edx, %r11d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ addl 420(%rsp), %r10d
+ movl %r8d, %ecx
+ movl %r12d, %ebx
+ xorl %r9d, %ecx
+ rorl $14, %edx
+ andl %r15d, %ecx
+ xorl %r15d, %edx
+ xorl %r9d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r10d
+ xorl %r15d, %edx
+ xorl %r11d, %ebx
+ rorl $6, %edx
+ movl %r11d, %ecx
+ addl %edx, %r10d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ addl 424(%rsp), %r9d
+ movl %r15d, %ecx
+ movl %r11d, %eax
+ xorl %r8d, %ecx
+ rorl $14, %edx
+ andl %r14d, %ecx
+ xorl %r14d, %edx
+ xorl %r8d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r9d
+ xorl %r14d, %edx
+ xorl %r10d, %eax
+ rorl $6, %edx
+ movl %r10d, %ecx
+ addl %edx, %r9d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ addl 428(%rsp), %r8d
+ movl %r14d, %ecx
+ movl %r10d, %ebx
+ xorl %r15d, %ecx
+ rorl $14, %edx
+ andl %r13d, %ecx
+ xorl %r13d, %edx
+ xorl %r15d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r8d
+ xorl %r13d, %edx
+ xorl %r9d, %ebx
+ rorl $6, %edx
+ movl %r9d, %ecx
+ addl %edx, %r8d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ # rnd_all_4: 28-31
+ addl 448(%rsp), %r15d
+ movl %r13d, %ecx
+ movl %r9d, %eax
+ xorl %r14d, %ecx
+ rorl $14, %edx
+ andl %r12d, %ecx
+ xorl %r12d, %edx
+ xorl %r14d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r15d
+ xorl %r12d, %edx
+ xorl %r8d, %eax
+ rorl $6, %edx
+ movl %r8d, %ecx
+ addl %edx, %r15d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ addl 452(%rsp), %r14d
+ movl %r12d, %ecx
+ movl %r8d, %ebx
+ xorl %r13d, %ecx
+ rorl $14, %edx
+ andl %r11d, %ecx
+ xorl %r11d, %edx
+ xorl %r13d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r14d
+ xorl %r11d, %edx
+ xorl %r15d, %ebx
+ rorl $6, %edx
+ movl %r15d, %ecx
+ addl %edx, %r14d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ addl 456(%rsp), %r13d
+ movl %r11d, %ecx
+ movl %r15d, %eax
+ xorl %r12d, %ecx
+ rorl $14, %edx
+ andl %r10d, %ecx
+ xorl %r10d, %edx
+ xorl %r12d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r13d
+ xorl %r10d, %edx
+ xorl %r14d, %eax
+ rorl $6, %edx
+ movl %r14d, %ecx
+ addl %edx, %r13d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ addl 460(%rsp), %r12d
+ movl %r10d, %ecx
+ movl %r14d, %ebx
+ xorl %r11d, %ecx
+ rorl $14, %edx
+ andl %r9d, %ecx
+ xorl %r9d, %edx
+ xorl %r11d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r12d
+ xorl %r9d, %edx
+ xorl %r13d, %ebx
+ rorl $6, %edx
+ movl %r13d, %ecx
+ addl %edx, %r12d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ # rnd_all_4: 30-33
+ addl 480(%rsp), %r11d
+ movl %r9d, %ecx
+ movl %r13d, %eax
+ xorl %r10d, %ecx
+ rorl $14, %edx
+ andl %r8d, %ecx
+ xorl %r8d, %edx
+ xorl %r10d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r11d
+ xorl %r8d, %edx
+ xorl %r12d, %eax
+ rorl $6, %edx
+ movl %r12d, %ecx
+ addl %edx, %r11d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ addl 484(%rsp), %r10d
+ movl %r8d, %ecx
+ movl %r12d, %ebx
+ xorl %r9d, %ecx
+ rorl $14, %edx
+ andl %r15d, %ecx
+ xorl %r15d, %edx
+ xorl %r9d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r10d
+ xorl %r15d, %edx
+ xorl %r11d, %ebx
+ rorl $6, %edx
+ movl %r11d, %ecx
+ addl %edx, %r10d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ addl 488(%rsp), %r9d
+ movl %r15d, %ecx
+ movl %r11d, %eax
+ xorl %r8d, %ecx
+ rorl $14, %edx
+ andl %r14d, %ecx
+ xorl %r14d, %edx
+ xorl %r8d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r9d
+ xorl %r14d, %edx
+ xorl %r10d, %eax
+ rorl $6, %edx
+ movl %r10d, %ecx
+ addl %edx, %r9d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ addl 492(%rsp), %r8d
+ movl %r14d, %ecx
+ movl %r10d, %ebx
+ xorl %r15d, %ecx
+ rorl $14, %edx
+ andl %r13d, %ecx
+ xorl %r13d, %edx
+ xorl %r15d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r8d
+ xorl %r13d, %edx
+ xorl %r9d, %ebx
+ rorl $6, %edx
+ movl %r9d, %ecx
+ addl %edx, %r8d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ addl (%rdi), %r8d
+ addl 4(%rdi), %r9d
+ addl 8(%rdi), %r10d
+ addl 12(%rdi), %r11d
+ addl 16(%rdi), %r12d
+ addl 20(%rdi), %r13d
+ addl 24(%rdi), %r14d
+ addl 28(%rdi), %r15d
+ movl %r8d, (%rdi)
+ movl %r9d, 4(%rdi)
+ movl %r10d, 8(%rdi)
+ movl %r11d, 12(%rdi)
+ movl %r12d, 16(%rdi)
+ movl %r13d, 20(%rdi)
+ movl %r14d, 24(%rdi)
+ movl %r15d, 28(%rdi)
+ movl %r9d, %ebx
+ movl %r12d, %edx
+ xorl %r10d, %ebx
+ # rnd_all_4: 1-4
+ addl 16(%rsp), %r15d
+ movl %r13d, %ecx
+ movl %r9d, %eax
+ xorl %r14d, %ecx
+ rorl $14, %edx
+ andl %r12d, %ecx
+ xorl %r12d, %edx
+ xorl %r14d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r15d
+ xorl %r12d, %edx
+ xorl %r8d, %eax
+ rorl $6, %edx
+ movl %r8d, %ecx
+ addl %edx, %r15d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ addl 20(%rsp), %r14d
+ movl %r12d, %ecx
+ movl %r8d, %ebx
+ xorl %r13d, %ecx
+ rorl $14, %edx
+ andl %r11d, %ecx
+ xorl %r11d, %edx
+ xorl %r13d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r14d
+ xorl %r11d, %edx
+ xorl %r15d, %ebx
+ rorl $6, %edx
+ movl %r15d, %ecx
+ addl %edx, %r14d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ addl 24(%rsp), %r13d
+ movl %r11d, %ecx
+ movl %r15d, %eax
+ xorl %r12d, %ecx
+ rorl $14, %edx
+ andl %r10d, %ecx
+ xorl %r10d, %edx
+ xorl %r12d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r13d
+ xorl %r10d, %edx
+ xorl %r14d, %eax
+ rorl $6, %edx
+ movl %r14d, %ecx
+ addl %edx, %r13d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ addl 28(%rsp), %r12d
+ movl %r10d, %ecx
+ movl %r14d, %ebx
+ xorl %r11d, %ecx
+ rorl $14, %edx
+ andl %r9d, %ecx
+ xorl %r9d, %edx
+ xorl %r11d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r12d
+ xorl %r9d, %edx
+ xorl %r13d, %ebx
+ rorl $6, %edx
+ movl %r13d, %ecx
+ addl %edx, %r12d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ # rnd_all_4: 3-6
+ addl 48(%rsp), %r11d
+ movl %r9d, %ecx
+ movl %r13d, %eax
+ xorl %r10d, %ecx
+ rorl $14, %edx
+ andl %r8d, %ecx
+ xorl %r8d, %edx
+ xorl %r10d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r11d
+ xorl %r8d, %edx
+ xorl %r12d, %eax
+ rorl $6, %edx
+ movl %r12d, %ecx
+ addl %edx, %r11d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ addl 52(%rsp), %r10d
+ movl %r8d, %ecx
+ movl %r12d, %ebx
+ xorl %r9d, %ecx
+ rorl $14, %edx
+ andl %r15d, %ecx
+ xorl %r15d, %edx
+ xorl %r9d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r10d
+ xorl %r15d, %edx
+ xorl %r11d, %ebx
+ rorl $6, %edx
+ movl %r11d, %ecx
+ addl %edx, %r10d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ addl 56(%rsp), %r9d
+ movl %r15d, %ecx
+ movl %r11d, %eax
+ xorl %r8d, %ecx
+ rorl $14, %edx
+ andl %r14d, %ecx
+ xorl %r14d, %edx
+ xorl %r8d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r9d
+ xorl %r14d, %edx
+ xorl %r10d, %eax
+ rorl $6, %edx
+ movl %r10d, %ecx
+ addl %edx, %r9d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ addl 60(%rsp), %r8d
+ movl %r14d, %ecx
+ movl %r10d, %ebx
+ xorl %r15d, %ecx
+ rorl $14, %edx
+ andl %r13d, %ecx
+ xorl %r13d, %edx
+ xorl %r15d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r8d
+ xorl %r13d, %edx
+ xorl %r9d, %ebx
+ rorl $6, %edx
+ movl %r9d, %ecx
+ addl %edx, %r8d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ # rnd_all_4: 5-8
+ addl 80(%rsp), %r15d
+ movl %r13d, %ecx
+ movl %r9d, %eax
+ xorl %r14d, %ecx
+ rorl $14, %edx
+ andl %r12d, %ecx
+ xorl %r12d, %edx
+ xorl %r14d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r15d
+ xorl %r12d, %edx
+ xorl %r8d, %eax
+ rorl $6, %edx
+ movl %r8d, %ecx
+ addl %edx, %r15d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ addl 84(%rsp), %r14d
+ movl %r12d, %ecx
+ movl %r8d, %ebx
+ xorl %r13d, %ecx
+ rorl $14, %edx
+ andl %r11d, %ecx
+ xorl %r11d, %edx
+ xorl %r13d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r14d
+ xorl %r11d, %edx
+ xorl %r15d, %ebx
+ rorl $6, %edx
+ movl %r15d, %ecx
+ addl %edx, %r14d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ addl 88(%rsp), %r13d
+ movl %r11d, %ecx
+ movl %r15d, %eax
+ xorl %r12d, %ecx
+ rorl $14, %edx
+ andl %r10d, %ecx
+ xorl %r10d, %edx
+ xorl %r12d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r13d
+ xorl %r10d, %edx
+ xorl %r14d, %eax
+ rorl $6, %edx
+ movl %r14d, %ecx
+ addl %edx, %r13d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ addl 92(%rsp), %r12d
+ movl %r10d, %ecx
+ movl %r14d, %ebx
+ xorl %r11d, %ecx
+ rorl $14, %edx
+ andl %r9d, %ecx
+ xorl %r9d, %edx
+ xorl %r11d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r12d
+ xorl %r9d, %edx
+ xorl %r13d, %ebx
+ rorl $6, %edx
+ movl %r13d, %ecx
+ addl %edx, %r12d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ # rnd_all_4: 7-10
+ addl 112(%rsp), %r11d
+ movl %r9d, %ecx
+ movl %r13d, %eax
+ xorl %r10d, %ecx
+ rorl $14, %edx
+ andl %r8d, %ecx
+ xorl %r8d, %edx
+ xorl %r10d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r11d
+ xorl %r8d, %edx
+ xorl %r12d, %eax
+ rorl $6, %edx
+ movl %r12d, %ecx
+ addl %edx, %r11d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ addl 116(%rsp), %r10d
+ movl %r8d, %ecx
+ movl %r12d, %ebx
+ xorl %r9d, %ecx
+ rorl $14, %edx
+ andl %r15d, %ecx
+ xorl %r15d, %edx
+ xorl %r9d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r10d
+ xorl %r15d, %edx
+ xorl %r11d, %ebx
+ rorl $6, %edx
+ movl %r11d, %ecx
+ addl %edx, %r10d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ addl 120(%rsp), %r9d
+ movl %r15d, %ecx
+ movl %r11d, %eax
+ xorl %r8d, %ecx
+ rorl $14, %edx
+ andl %r14d, %ecx
+ xorl %r14d, %edx
+ xorl %r8d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r9d
+ xorl %r14d, %edx
+ xorl %r10d, %eax
+ rorl $6, %edx
+ movl %r10d, %ecx
+ addl %edx, %r9d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ addl 124(%rsp), %r8d
+ movl %r14d, %ecx
+ movl %r10d, %ebx
+ xorl %r15d, %ecx
+ rorl $14, %edx
+ andl %r13d, %ecx
+ xorl %r13d, %edx
+ xorl %r15d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r8d
+ xorl %r13d, %edx
+ xorl %r9d, %ebx
+ rorl $6, %edx
+ movl %r9d, %ecx
+ addl %edx, %r8d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ # rnd_all_4: 9-12
+ addl 144(%rsp), %r15d
+ movl %r13d, %ecx
+ movl %r9d, %eax
+ xorl %r14d, %ecx
+ rorl $14, %edx
+ andl %r12d, %ecx
+ xorl %r12d, %edx
+ xorl %r14d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r15d
+ xorl %r12d, %edx
+ xorl %r8d, %eax
+ rorl $6, %edx
+ movl %r8d, %ecx
+ addl %edx, %r15d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ addl 148(%rsp), %r14d
+ movl %r12d, %ecx
+ movl %r8d, %ebx
+ xorl %r13d, %ecx
+ rorl $14, %edx
+ andl %r11d, %ecx
+ xorl %r11d, %edx
+ xorl %r13d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r14d
+ xorl %r11d, %edx
+ xorl %r15d, %ebx
+ rorl $6, %edx
+ movl %r15d, %ecx
+ addl %edx, %r14d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ addl 152(%rsp), %r13d
+ movl %r11d, %ecx
+ movl %r15d, %eax
+ xorl %r12d, %ecx
+ rorl $14, %edx
+ andl %r10d, %ecx
+ xorl %r10d, %edx
+ xorl %r12d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r13d
+ xorl %r10d, %edx
+ xorl %r14d, %eax
+ rorl $6, %edx
+ movl %r14d, %ecx
+ addl %edx, %r13d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ addl 156(%rsp), %r12d
+ movl %r10d, %ecx
+ movl %r14d, %ebx
+ xorl %r11d, %ecx
+ rorl $14, %edx
+ andl %r9d, %ecx
+ xorl %r9d, %edx
+ xorl %r11d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r12d
+ xorl %r9d, %edx
+ xorl %r13d, %ebx
+ rorl $6, %edx
+ movl %r13d, %ecx
+ addl %edx, %r12d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ # rnd_all_4: 11-14
+ addl 176(%rsp), %r11d
+ movl %r9d, %ecx
+ movl %r13d, %eax
+ xorl %r10d, %ecx
+ rorl $14, %edx
+ andl %r8d, %ecx
+ xorl %r8d, %edx
+ xorl %r10d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r11d
+ xorl %r8d, %edx
+ xorl %r12d, %eax
+ rorl $6, %edx
+ movl %r12d, %ecx
+ addl %edx, %r11d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ addl 180(%rsp), %r10d
+ movl %r8d, %ecx
+ movl %r12d, %ebx
+ xorl %r9d, %ecx
+ rorl $14, %edx
+ andl %r15d, %ecx
+ xorl %r15d, %edx
+ xorl %r9d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r10d
+ xorl %r15d, %edx
+ xorl %r11d, %ebx
+ rorl $6, %edx
+ movl %r11d, %ecx
+ addl %edx, %r10d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ addl 184(%rsp), %r9d
+ movl %r15d, %ecx
+ movl %r11d, %eax
+ xorl %r8d, %ecx
+ rorl $14, %edx
+ andl %r14d, %ecx
+ xorl %r14d, %edx
+ xorl %r8d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r9d
+ xorl %r14d, %edx
+ xorl %r10d, %eax
+ rorl $6, %edx
+ movl %r10d, %ecx
+ addl %edx, %r9d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ addl 188(%rsp), %r8d
+ movl %r14d, %ecx
+ movl %r10d, %ebx
+ xorl %r15d, %ecx
+ rorl $14, %edx
+ andl %r13d, %ecx
+ xorl %r13d, %edx
+ xorl %r15d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r8d
+ xorl %r13d, %edx
+ xorl %r9d, %ebx
+ rorl $6, %edx
+ movl %r9d, %ecx
+ addl %edx, %r8d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ # rnd_all_4: 13-16
+ addl 208(%rsp), %r15d
+ movl %r13d, %ecx
+ movl %r9d, %eax
+ xorl %r14d, %ecx
+ rorl $14, %edx
+ andl %r12d, %ecx
+ xorl %r12d, %edx
+ xorl %r14d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r15d
+ xorl %r12d, %edx
+ xorl %r8d, %eax
+ rorl $6, %edx
+ movl %r8d, %ecx
+ addl %edx, %r15d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ addl 212(%rsp), %r14d
+ movl %r12d, %ecx
+ movl %r8d, %ebx
+ xorl %r13d, %ecx
+ rorl $14, %edx
+ andl %r11d, %ecx
+ xorl %r11d, %edx
+ xorl %r13d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r14d
+ xorl %r11d, %edx
+ xorl %r15d, %ebx
+ rorl $6, %edx
+ movl %r15d, %ecx
+ addl %edx, %r14d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ addl 216(%rsp), %r13d
+ movl %r11d, %ecx
+ movl %r15d, %eax
+ xorl %r12d, %ecx
+ rorl $14, %edx
+ andl %r10d, %ecx
+ xorl %r10d, %edx
+ xorl %r12d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r13d
+ xorl %r10d, %edx
+ xorl %r14d, %eax
+ rorl $6, %edx
+ movl %r14d, %ecx
+ addl %edx, %r13d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ addl 220(%rsp), %r12d
+ movl %r10d, %ecx
+ movl %r14d, %ebx
+ xorl %r11d, %ecx
+ rorl $14, %edx
+ andl %r9d, %ecx
+ xorl %r9d, %edx
+ xorl %r11d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r12d
+ xorl %r9d, %edx
+ xorl %r13d, %ebx
+ rorl $6, %edx
+ movl %r13d, %ecx
+ addl %edx, %r12d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ # rnd_all_4: 15-18
+ addl 240(%rsp), %r11d
+ movl %r9d, %ecx
+ movl %r13d, %eax
+ xorl %r10d, %ecx
+ rorl $14, %edx
+ andl %r8d, %ecx
+ xorl %r8d, %edx
+ xorl %r10d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r11d
+ xorl %r8d, %edx
+ xorl %r12d, %eax
+ rorl $6, %edx
+ movl %r12d, %ecx
+ addl %edx, %r11d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ addl 244(%rsp), %r10d
+ movl %r8d, %ecx
+ movl %r12d, %ebx
+ xorl %r9d, %ecx
+ rorl $14, %edx
+ andl %r15d, %ecx
+ xorl %r15d, %edx
+ xorl %r9d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r10d
+ xorl %r15d, %edx
+ xorl %r11d, %ebx
+ rorl $6, %edx
+ movl %r11d, %ecx
+ addl %edx, %r10d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ addl 248(%rsp), %r9d
+ movl %r15d, %ecx
+ movl %r11d, %eax
+ xorl %r8d, %ecx
+ rorl $14, %edx
+ andl %r14d, %ecx
+ xorl %r14d, %edx
+ xorl %r8d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r9d
+ xorl %r14d, %edx
+ xorl %r10d, %eax
+ rorl $6, %edx
+ movl %r10d, %ecx
+ addl %edx, %r9d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ addl 252(%rsp), %r8d
+ movl %r14d, %ecx
+ movl %r10d, %ebx
+ xorl %r15d, %ecx
+ rorl $14, %edx
+ andl %r13d, %ecx
+ xorl %r13d, %edx
+ xorl %r15d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r8d
+ xorl %r13d, %edx
+ xorl %r9d, %ebx
+ rorl $6, %edx
+ movl %r9d, %ecx
+ addl %edx, %r8d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ # rnd_all_4: 17-20
+ addl 272(%rsp), %r15d
+ movl %r13d, %ecx
+ movl %r9d, %eax
+ xorl %r14d, %ecx
+ rorl $14, %edx
+ andl %r12d, %ecx
+ xorl %r12d, %edx
+ xorl %r14d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r15d
+ xorl %r12d, %edx
+ xorl %r8d, %eax
+ rorl $6, %edx
+ movl %r8d, %ecx
+ addl %edx, %r15d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ addl 276(%rsp), %r14d
+ movl %r12d, %ecx
+ movl %r8d, %ebx
+ xorl %r13d, %ecx
+ rorl $14, %edx
+ andl %r11d, %ecx
+ xorl %r11d, %edx
+ xorl %r13d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r14d
+ xorl %r11d, %edx
+ xorl %r15d, %ebx
+ rorl $6, %edx
+ movl %r15d, %ecx
+ addl %edx, %r14d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ addl 280(%rsp), %r13d
+ movl %r11d, %ecx
+ movl %r15d, %eax
+ xorl %r12d, %ecx
+ rorl $14, %edx
+ andl %r10d, %ecx
+ xorl %r10d, %edx
+ xorl %r12d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r13d
+ xorl %r10d, %edx
+ xorl %r14d, %eax
+ rorl $6, %edx
+ movl %r14d, %ecx
+ addl %edx, %r13d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ addl 284(%rsp), %r12d
+ movl %r10d, %ecx
+ movl %r14d, %ebx
+ xorl %r11d, %ecx
+ rorl $14, %edx
+ andl %r9d, %ecx
+ xorl %r9d, %edx
+ xorl %r11d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r12d
+ xorl %r9d, %edx
+ xorl %r13d, %ebx
+ rorl $6, %edx
+ movl %r13d, %ecx
+ addl %edx, %r12d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ # rnd_all_4: 19-22
+ addl 304(%rsp), %r11d
+ movl %r9d, %ecx
+ movl %r13d, %eax
+ xorl %r10d, %ecx
+ rorl $14, %edx
+ andl %r8d, %ecx
+ xorl %r8d, %edx
+ xorl %r10d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r11d
+ xorl %r8d, %edx
+ xorl %r12d, %eax
+ rorl $6, %edx
+ movl %r12d, %ecx
+ addl %edx, %r11d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ addl 308(%rsp), %r10d
+ movl %r8d, %ecx
+ movl %r12d, %ebx
+ xorl %r9d, %ecx
+ rorl $14, %edx
+ andl %r15d, %ecx
+ xorl %r15d, %edx
+ xorl %r9d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r10d
+ xorl %r15d, %edx
+ xorl %r11d, %ebx
+ rorl $6, %edx
+ movl %r11d, %ecx
+ addl %edx, %r10d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ addl 312(%rsp), %r9d
+ movl %r15d, %ecx
+ movl %r11d, %eax
+ xorl %r8d, %ecx
+ rorl $14, %edx
+ andl %r14d, %ecx
+ xorl %r14d, %edx
+ xorl %r8d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r9d
+ xorl %r14d, %edx
+ xorl %r10d, %eax
+ rorl $6, %edx
+ movl %r10d, %ecx
+ addl %edx, %r9d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ addl 316(%rsp), %r8d
+ movl %r14d, %ecx
+ movl %r10d, %ebx
+ xorl %r15d, %ecx
+ rorl $14, %edx
+ andl %r13d, %ecx
+ xorl %r13d, %edx
+ xorl %r15d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r8d
+ xorl %r13d, %edx
+ xorl %r9d, %ebx
+ rorl $6, %edx
+ movl %r9d, %ecx
+ addl %edx, %r8d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ # rnd_all_4: 21-24
+ addl 336(%rsp), %r15d
+ movl %r13d, %ecx
+ movl %r9d, %eax
+ xorl %r14d, %ecx
+ rorl $14, %edx
+ andl %r12d, %ecx
+ xorl %r12d, %edx
+ xorl %r14d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r15d
+ xorl %r12d, %edx
+ xorl %r8d, %eax
+ rorl $6, %edx
+ movl %r8d, %ecx
+ addl %edx, %r15d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ addl 340(%rsp), %r14d
+ movl %r12d, %ecx
+ movl %r8d, %ebx
+ xorl %r13d, %ecx
+ rorl $14, %edx
+ andl %r11d, %ecx
+ xorl %r11d, %edx
+ xorl %r13d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r14d
+ xorl %r11d, %edx
+ xorl %r15d, %ebx
+ rorl $6, %edx
+ movl %r15d, %ecx
+ addl %edx, %r14d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ addl 344(%rsp), %r13d
+ movl %r11d, %ecx
+ movl %r15d, %eax
+ xorl %r12d, %ecx
+ rorl $14, %edx
+ andl %r10d, %ecx
+ xorl %r10d, %edx
+ xorl %r12d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r13d
+ xorl %r10d, %edx
+ xorl %r14d, %eax
+ rorl $6, %edx
+ movl %r14d, %ecx
+ addl %edx, %r13d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ addl 348(%rsp), %r12d
+ movl %r10d, %ecx
+ movl %r14d, %ebx
+ xorl %r11d, %ecx
+ rorl $14, %edx
+ andl %r9d, %ecx
+ xorl %r9d, %edx
+ xorl %r11d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r12d
+ xorl %r9d, %edx
+ xorl %r13d, %ebx
+ rorl $6, %edx
+ movl %r13d, %ecx
+ addl %edx, %r12d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ # rnd_all_4: 23-26
+ addl 368(%rsp), %r11d
+ movl %r9d, %ecx
+ movl %r13d, %eax
+ xorl %r10d, %ecx
+ rorl $14, %edx
+ andl %r8d, %ecx
+ xorl %r8d, %edx
+ xorl %r10d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r11d
+ xorl %r8d, %edx
+ xorl %r12d, %eax
+ rorl $6, %edx
+ movl %r12d, %ecx
+ addl %edx, %r11d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ addl 372(%rsp), %r10d
+ movl %r8d, %ecx
+ movl %r12d, %ebx
+ xorl %r9d, %ecx
+ rorl $14, %edx
+ andl %r15d, %ecx
+ xorl %r15d, %edx
+ xorl %r9d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r10d
+ xorl %r15d, %edx
+ xorl %r11d, %ebx
+ rorl $6, %edx
+ movl %r11d, %ecx
+ addl %edx, %r10d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ addl 376(%rsp), %r9d
+ movl %r15d, %ecx
+ movl %r11d, %eax
+ xorl %r8d, %ecx
+ rorl $14, %edx
+ andl %r14d, %ecx
+ xorl %r14d, %edx
+ xorl %r8d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r9d
+ xorl %r14d, %edx
+ xorl %r10d, %eax
+ rorl $6, %edx
+ movl %r10d, %ecx
+ addl %edx, %r9d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ addl 380(%rsp), %r8d
+ movl %r14d, %ecx
+ movl %r10d, %ebx
+ xorl %r15d, %ecx
+ rorl $14, %edx
+ andl %r13d, %ecx
+ xorl %r13d, %edx
+ xorl %r15d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r8d
+ xorl %r13d, %edx
+ xorl %r9d, %ebx
+ rorl $6, %edx
+ movl %r9d, %ecx
+ addl %edx, %r8d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ # rnd_all_4: 25-28
+ addl 400(%rsp), %r15d
+ movl %r13d, %ecx
+ movl %r9d, %eax
+ xorl %r14d, %ecx
+ rorl $14, %edx
+ andl %r12d, %ecx
+ xorl %r12d, %edx
+ xorl %r14d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r15d
+ xorl %r12d, %edx
+ xorl %r8d, %eax
+ rorl $6, %edx
+ movl %r8d, %ecx
+ addl %edx, %r15d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ addl 404(%rsp), %r14d
+ movl %r12d, %ecx
+ movl %r8d, %ebx
+ xorl %r13d, %ecx
+ rorl $14, %edx
+ andl %r11d, %ecx
+ xorl %r11d, %edx
+ xorl %r13d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r14d
+ xorl %r11d, %edx
+ xorl %r15d, %ebx
+ rorl $6, %edx
+ movl %r15d, %ecx
+ addl %edx, %r14d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ addl 408(%rsp), %r13d
+ movl %r11d, %ecx
+ movl %r15d, %eax
+ xorl %r12d, %ecx
+ rorl $14, %edx
+ andl %r10d, %ecx
+ xorl %r10d, %edx
+ xorl %r12d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r13d
+ xorl %r10d, %edx
+ xorl %r14d, %eax
+ rorl $6, %edx
+ movl %r14d, %ecx
+ addl %edx, %r13d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ addl 412(%rsp), %r12d
+ movl %r10d, %ecx
+ movl %r14d, %ebx
+ xorl %r11d, %ecx
+ rorl $14, %edx
+ andl %r9d, %ecx
+ xorl %r9d, %edx
+ xorl %r11d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r12d
+ xorl %r9d, %edx
+ xorl %r13d, %ebx
+ rorl $6, %edx
+ movl %r13d, %ecx
+ addl %edx, %r12d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ # rnd_all_4: 27-30
+ addl 432(%rsp), %r11d
+ movl %r9d, %ecx
+ movl %r13d, %eax
+ xorl %r10d, %ecx
+ rorl $14, %edx
+ andl %r8d, %ecx
+ xorl %r8d, %edx
+ xorl %r10d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r11d
+ xorl %r8d, %edx
+ xorl %r12d, %eax
+ rorl $6, %edx
+ movl %r12d, %ecx
+ addl %edx, %r11d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ addl 436(%rsp), %r10d
+ movl %r8d, %ecx
+ movl %r12d, %ebx
+ xorl %r9d, %ecx
+ rorl $14, %edx
+ andl %r15d, %ecx
+ xorl %r15d, %edx
+ xorl %r9d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r10d
+ xorl %r15d, %edx
+ xorl %r11d, %ebx
+ rorl $6, %edx
+ movl %r11d, %ecx
+ addl %edx, %r10d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ addl 440(%rsp), %r9d
+ movl %r15d, %ecx
+ movl %r11d, %eax
+ xorl %r8d, %ecx
+ rorl $14, %edx
+ andl %r14d, %ecx
+ xorl %r14d, %edx
+ xorl %r8d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r9d
+ xorl %r14d, %edx
+ xorl %r10d, %eax
+ rorl $6, %edx
+ movl %r10d, %ecx
+ addl %edx, %r9d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ addl 444(%rsp), %r8d
+ movl %r14d, %ecx
+ movl %r10d, %ebx
+ xorl %r15d, %ecx
+ rorl $14, %edx
+ andl %r13d, %ecx
+ xorl %r13d, %edx
+ xorl %r15d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r8d
+ xorl %r13d, %edx
+ xorl %r9d, %ebx
+ rorl $6, %edx
+ movl %r9d, %ecx
+ addl %edx, %r8d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ # rnd_all_4: 29-32
+ addl 464(%rsp), %r15d
+ movl %r13d, %ecx
+ movl %r9d, %eax
+ xorl %r14d, %ecx
+ rorl $14, %edx
+ andl %r12d, %ecx
+ xorl %r12d, %edx
+ xorl %r14d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r15d
+ xorl %r12d, %edx
+ xorl %r8d, %eax
+ rorl $6, %edx
+ movl %r8d, %ecx
+ addl %edx, %r15d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r8d, %ecx
+ xorl %r9d, %ebx
+ rorl $11, %ecx
+ addl %r15d, %r11d
+ xorl %r8d, %ecx
+ addl %ebx, %r15d
+ rorl $2, %ecx
+ movl %r11d, %edx
+ addl %ecx, %r15d
+ addl 468(%rsp), %r14d
+ movl %r12d, %ecx
+ movl %r8d, %ebx
+ xorl %r13d, %ecx
+ rorl $14, %edx
+ andl %r11d, %ecx
+ xorl %r11d, %edx
+ xorl %r13d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r14d
+ xorl %r11d, %edx
+ xorl %r15d, %ebx
+ rorl $6, %edx
+ movl %r15d, %ecx
+ addl %edx, %r14d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r15d, %ecx
+ xorl %r8d, %eax
+ rorl $11, %ecx
+ addl %r14d, %r10d
+ xorl %r15d, %ecx
+ addl %eax, %r14d
+ rorl $2, %ecx
+ movl %r10d, %edx
+ addl %ecx, %r14d
+ addl 472(%rsp), %r13d
+ movl %r11d, %ecx
+ movl %r15d, %eax
+ xorl %r12d, %ecx
+ rorl $14, %edx
+ andl %r10d, %ecx
+ xorl %r10d, %edx
+ xorl %r12d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r13d
+ xorl %r10d, %edx
+ xorl %r14d, %eax
+ rorl $6, %edx
+ movl %r14d, %ecx
+ addl %edx, %r13d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r14d, %ecx
+ xorl %r15d, %ebx
+ rorl $11, %ecx
+ addl %r13d, %r9d
+ xorl %r14d, %ecx
+ addl %ebx, %r13d
+ rorl $2, %ecx
+ movl %r9d, %edx
+ addl %ecx, %r13d
+ addl 476(%rsp), %r12d
+ movl %r10d, %ecx
+ movl %r14d, %ebx
+ xorl %r11d, %ecx
+ rorl $14, %edx
+ andl %r9d, %ecx
+ xorl %r9d, %edx
+ xorl %r11d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r12d
+ xorl %r9d, %edx
+ xorl %r13d, %ebx
+ rorl $6, %edx
+ movl %r13d, %ecx
+ addl %edx, %r12d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r13d, %ecx
+ xorl %r14d, %eax
+ rorl $11, %ecx
+ addl %r12d, %r8d
+ xorl %r13d, %ecx
+ addl %eax, %r12d
+ rorl $2, %ecx
+ movl %r8d, %edx
+ addl %ecx, %r12d
+ # rnd_all_4: 31-34
+ addl 496(%rsp), %r11d
+ movl %r9d, %ecx
+ movl %r13d, %eax
+ xorl %r10d, %ecx
+ rorl $14, %edx
+ andl %r8d, %ecx
+ xorl %r8d, %edx
+ xorl %r10d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r11d
+ xorl %r8d, %edx
+ xorl %r12d, %eax
+ rorl $6, %edx
+ movl %r12d, %ecx
+ addl %edx, %r11d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r12d, %ecx
+ xorl %r13d, %ebx
+ rorl $11, %ecx
+ addl %r11d, %r15d
+ xorl %r12d, %ecx
+ addl %ebx, %r11d
+ rorl $2, %ecx
+ movl %r15d, %edx
+ addl %ecx, %r11d
+ addl 500(%rsp), %r10d
+ movl %r8d, %ecx
+ movl %r12d, %ebx
+ xorl %r9d, %ecx
+ rorl $14, %edx
+ andl %r15d, %ecx
+ xorl %r15d, %edx
+ xorl %r9d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r10d
+ xorl %r15d, %edx
+ xorl %r11d, %ebx
+ rorl $6, %edx
+ movl %r11d, %ecx
+ addl %edx, %r10d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r11d, %ecx
+ xorl %r12d, %eax
+ rorl $11, %ecx
+ addl %r10d, %r14d
+ xorl %r11d, %ecx
+ addl %eax, %r10d
+ rorl $2, %ecx
+ movl %r14d, %edx
+ addl %ecx, %r10d
+ addl 504(%rsp), %r9d
+ movl %r15d, %ecx
+ movl %r11d, %eax
+ xorl %r8d, %ecx
+ rorl $14, %edx
+ andl %r14d, %ecx
+ xorl %r14d, %edx
+ xorl %r8d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r9d
+ xorl %r14d, %edx
+ xorl %r10d, %eax
+ rorl $6, %edx
+ movl %r10d, %ecx
+ addl %edx, %r9d
+ rorl $9, %ecx
+ andl %eax, %ebx
+ xorl %r10d, %ecx
+ xorl %r11d, %ebx
+ rorl $11, %ecx
+ addl %r9d, %r13d
+ xorl %r10d, %ecx
+ addl %ebx, %r9d
+ rorl $2, %ecx
+ movl %r13d, %edx
+ addl %ecx, %r9d
+ addl 508(%rsp), %r8d
+ movl %r14d, %ecx
+ movl %r10d, %ebx
+ xorl %r15d, %ecx
+ rorl $14, %edx
+ andl %r13d, %ecx
+ xorl %r13d, %edx
+ xorl %r15d, %ecx
+ rorl $5, %edx
+ addl %ecx, %r8d
+ xorl %r13d, %edx
+ xorl %r9d, %ebx
+ rorl $6, %edx
+ movl %r9d, %ecx
+ addl %edx, %r8d
+ rorl $9, %ecx
+ andl %ebx, %eax
+ xorl %r9d, %ecx
+ xorl %r10d, %eax
+ rorl $11, %ecx
+ addl %r8d, %r12d
+ xorl %r9d, %ecx
+ addl %eax, %r8d
+ rorl $2, %ecx
+ movl %r12d, %edx
+ addl %ecx, %r8d
+ addl (%rdi), %r8d
+ addl 4(%rdi), %r9d
+ addl 8(%rdi), %r10d
+ addl 12(%rdi), %r11d
+ addl 16(%rdi), %r12d
+ addl 20(%rdi), %r13d
+ addl 24(%rdi), %r14d
+ addl 28(%rdi), %r15d
+ addq $0x80, %rbp
+ subl $0x80, %esi
+ movl %r8d, (%rdi)
+ movl %r9d, 4(%rdi)
+ movl %r10d, 8(%rdi)
+ movl %r11d, 12(%rdi)
+ movl %r12d, 16(%rdi)
+ movl %r13d, 20(%rdi)
+ movl %r14d, 24(%rdi)
+ movl %r15d, 28(%rdi)
+ jnz L_sha256_len_avx2_start
+L_sha256_len_avx2_done:
+ xorq %rax, %rax
+ vzeroupper
+ addq $0x200, %rsp
+ popq %rbp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size Transform_Sha256_AVX2_Len,.-Transform_Sha256_AVX2_Len
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+L_avx2_rorx_sha256_k:
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc
+.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967
+.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 32
+#else
+.p2align 5
+#endif /* __APPLE__ */
+L_avx2_rorx_sha256_flip_mask:
+.quad 0x405060700010203, 0xc0d0e0f08090a0b
+.quad 0x405060700010203, 0xc0d0e0f08090a0b
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 32
+#else
+.p2align 5
+#endif /* __APPLE__ */
+L_avx2_rorx_sha256_shuf_00BA:
+.quad 0xb0a090803020100, 0xffffffffffffffff
+.quad 0xb0a090803020100, 0xffffffffffffffff
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 32
+#else
+.p2align 5
+#endif /* __APPLE__ */
+L_avx2_rorx_sha256_shuf_DC00:
+.quad 0xffffffffffffffff, 0xb0a090803020100
+.quad 0xffffffffffffffff, 0xb0a090803020100
+#ifndef __APPLE__
+.text
+.globl Transform_Sha256_AVX2_RORX
+.type Transform_Sha256_AVX2_RORX,@function
+.align 4
+Transform_Sha256_AVX2_RORX:
+#else
+.section __TEXT,__text
+.globl _Transform_Sha256_AVX2_RORX
+.p2align 2
+_Transform_Sha256_AVX2_RORX:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $0x200, %rsp
+ leaq 32(%rdi), %rax
+ vmovdqa L_avx2_rorx_sha256_flip_mask(%rip), %xmm13
+ vmovdqa L_avx2_rorx_sha256_shuf_00BA(%rip), %ymm11
+ vmovdqa L_avx2_rorx_sha256_shuf_DC00(%rip), %ymm12
+ # X0, X1, X2, X3 = W[0..15]
+ vmovdqu (%rax), %xmm0
+ vmovdqu 16(%rax), %xmm1
+ vpshufb %xmm13, %xmm0, %xmm0
+ vpshufb %xmm13, %xmm1, %xmm1
+ vpaddd 0+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4
+ vpaddd 32+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm5
+ vmovdqu %ymm4, (%rsp)
+ vmovdqu %ymm5, 32(%rsp)
+ vmovdqu 32(%rax), %xmm2
+ vmovdqu 48(%rax), %xmm3
+ vpshufb %xmm13, %xmm2, %xmm2
+ vpshufb %xmm13, %xmm3, %xmm3
+ vpaddd 64+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4
+ vpaddd 96+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm5
+ vmovdqu %ymm4, 64(%rsp)
+ vmovdqu %ymm5, 96(%rsp)
+ movl (%rdi), %r8d
+ movl 4(%rdi), %r9d
+ movl 8(%rdi), %r10d
+ movl 12(%rdi), %r11d
+ movl 16(%rdi), %r12d
+ movl 20(%rdi), %r13d
+ movl 24(%rdi), %r14d
+ movl 28(%rdi), %r15d
+ movl %r9d, %ebx
+ rorxl $6, %r12d, %edx
+ xorl %r10d, %ebx
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl (%rsp), %r15d
+ vpalignr $4, %ymm0, %ymm1, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ vpalignr $4, %ymm2, %ymm3, %ymm4
+ # rnd_0: 2 - 2
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 4(%rsp), %r14d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpshufd $0xfa, %ymm3, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r14d, %r10d
+ movl %r8d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ vpaddd %ymm0, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 8(%rsp), %r13d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 12(%rsp), %r12d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm0
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r12d, %r8d
+ movl %r14d, %ebx
+ vpaddd 128+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vmovdqu %ymm4, 128(%rsp)
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 32(%rsp), %r11d
+ vpalignr $4, %ymm1, %ymm2, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ vpalignr $4, %ymm3, %ymm0, %ymm4
+ # rnd_0: 2 - 2
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 36(%rsp), %r10d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpshufd $0xfa, %ymm0, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r10d, %r14d
+ movl %r12d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ vpaddd %ymm1, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 40(%rsp), %r9d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 44(%rsp), %r8d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm1
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r8d, %r12d
+ movl %r10d, %ebx
+ vpaddd 160+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vmovdqu %ymm4, 160(%rsp)
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl 64(%rsp), %r15d
+ vpalignr $4, %ymm2, %ymm3, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ vpalignr $4, %ymm0, %ymm1, %ymm4
+ # rnd_0: 2 - 2
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 68(%rsp), %r14d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpshufd $0xfa, %ymm1, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r14d, %r10d
+ movl %r8d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ vpaddd %ymm2, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 72(%rsp), %r13d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 76(%rsp), %r12d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm2
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r12d, %r8d
+ movl %r14d, %ebx
+ vpaddd 192+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vmovdqu %ymm4, 192(%rsp)
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 96(%rsp), %r11d
+ vpalignr $4, %ymm3, %ymm0, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ vpalignr $4, %ymm1, %ymm2, %ymm4
+ # rnd_0: 2 - 2
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 100(%rsp), %r10d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpshufd $0xfa, %ymm2, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r10d, %r14d
+ movl %r12d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ vpaddd %ymm3, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 104(%rsp), %r9d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 108(%rsp), %r8d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm3
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r8d, %r12d
+ movl %r10d, %ebx
+ vpaddd 224+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vmovdqu %ymm4, 224(%rsp)
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl 128(%rsp), %r15d
+ vpalignr $4, %ymm0, %ymm1, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ vpalignr $4, %ymm2, %ymm3, %ymm4
+ # rnd_0: 2 - 2
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 132(%rsp), %r14d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpshufd $0xfa, %ymm3, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r14d, %r10d
+ movl %r8d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ vpaddd %ymm0, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 136(%rsp), %r13d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 140(%rsp), %r12d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm0
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r12d, %r8d
+ movl %r14d, %ebx
+ vpaddd 256+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vmovdqu %ymm4, 256(%rsp)
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 160(%rsp), %r11d
+ vpalignr $4, %ymm1, %ymm2, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ vpalignr $4, %ymm3, %ymm0, %ymm4
+ # rnd_0: 2 - 2
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 164(%rsp), %r10d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpshufd $0xfa, %ymm0, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r10d, %r14d
+ movl %r12d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ vpaddd %ymm1, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 168(%rsp), %r9d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 172(%rsp), %r8d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm1
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r8d, %r12d
+ movl %r10d, %ebx
+ vpaddd 288+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vmovdqu %ymm4, 288(%rsp)
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl 192(%rsp), %r15d
+ vpalignr $4, %ymm2, %ymm3, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ vpalignr $4, %ymm0, %ymm1, %ymm4
+ # rnd_0: 2 - 2
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 196(%rsp), %r14d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpshufd $0xfa, %ymm1, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r14d, %r10d
+ movl %r8d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ vpaddd %ymm2, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 200(%rsp), %r13d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 204(%rsp), %r12d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm2
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r12d, %r8d
+ movl %r14d, %ebx
+ vpaddd 320+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vmovdqu %ymm4, 320(%rsp)
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 224(%rsp), %r11d
+ vpalignr $4, %ymm3, %ymm0, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ vpalignr $4, %ymm1, %ymm2, %ymm4
+ # rnd_0: 2 - 2
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 228(%rsp), %r10d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpshufd $0xfa, %ymm2, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r10d, %r14d
+ movl %r12d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ vpaddd %ymm3, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 232(%rsp), %r9d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 236(%rsp), %r8d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm3
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r8d, %r12d
+ movl %r10d, %ebx
+ vpaddd 352+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vmovdqu %ymm4, 352(%rsp)
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl 256(%rsp), %r15d
+ vpalignr $4, %ymm0, %ymm1, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ vpalignr $4, %ymm2, %ymm3, %ymm4
+ # rnd_0: 2 - 2
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 260(%rsp), %r14d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpshufd $0xfa, %ymm3, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r14d, %r10d
+ movl %r8d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ vpaddd %ymm0, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 264(%rsp), %r13d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 268(%rsp), %r12d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm0
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r12d, %r8d
+ movl %r14d, %ebx
+ vpaddd 384+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vmovdqu %ymm4, 384(%rsp)
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 288(%rsp), %r11d
+ vpalignr $4, %ymm1, %ymm2, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ vpalignr $4, %ymm3, %ymm0, %ymm4
+ # rnd_0: 2 - 2
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 292(%rsp), %r10d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpshufd $0xfa, %ymm0, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r10d, %r14d
+ movl %r12d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ vpaddd %ymm1, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 296(%rsp), %r9d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 300(%rsp), %r8d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm1
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r8d, %r12d
+ movl %r10d, %ebx
+ vpaddd 416+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vmovdqu %ymm4, 416(%rsp)
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl 320(%rsp), %r15d
+ vpalignr $4, %ymm2, %ymm3, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ vpalignr $4, %ymm0, %ymm1, %ymm4
+ # rnd_0: 2 - 2
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 324(%rsp), %r14d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpshufd $0xfa, %ymm1, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r14d, %r10d
+ movl %r8d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ vpaddd %ymm2, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 328(%rsp), %r13d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 332(%rsp), %r12d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm2
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r12d, %r8d
+ movl %r14d, %ebx
+ vpaddd 448+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vmovdqu %ymm4, 448(%rsp)
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 352(%rsp), %r11d
+ vpalignr $4, %ymm3, %ymm0, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ vpalignr $4, %ymm1, %ymm2, %ymm4
+ # rnd_0: 2 - 2
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 356(%rsp), %r10d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpshufd $0xfa, %ymm2, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r10d, %r14d
+ movl %r12d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ vpaddd %ymm3, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 360(%rsp), %r9d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 364(%rsp), %r8d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm3
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r8d, %r12d
+ movl %r10d, %ebx
+ vpaddd 480+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vmovdqu %ymm4, 480(%rsp)
+ xorl %eax, %eax
+ xorl %ecx, %ecx
+ rorxl $6, %r12d, %edx
+ rorxl $11, %r12d, %ecx
+ leal (%r8,%rax,1), %r8d
+ addl 384(%rsp), %r15d
+ movl %r13d, %eax
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ xorl %ecx, %edx
+ andl %r12d, %eax
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ rorxl $13, %r8d, %ecx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ andl %eax, %ebx
+ addl %edx, %r15d
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ rorxl $11, %r11d, %ecx
+ addl %ebx, %r15d
+ addl 388(%rsp), %r14d
+ movl %r12d, %ebx
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ xorl %ecx, %edx
+ andl %r11d, %ebx
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ rorxl $13, %r15d, %ecx
+ xorl %r13d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ leal (%r10,%r14,1), %r10d
+ xorl %r15d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r14d
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ rorxl $11, %r10d, %ecx
+ leal (%r14,%rax,1), %r14d
+ addl 392(%rsp), %r13d
+ movl %r11d, %eax
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ xorl %ecx, %edx
+ andl %r10d, %eax
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ rorxl $13, %r14d, %ecx
+ xorl %r12d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ xorl %r14d, %eax
+ andl %eax, %ebx
+ addl %edx, %r13d
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ rorxl $11, %r9d, %ecx
+ addl %ebx, %r13d
+ addl 396(%rsp), %r12d
+ movl %r10d, %ebx
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ xorl %ecx, %edx
+ andl %r9d, %ebx
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ rorxl $13, %r13d, %ecx
+ xorl %r11d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ leal (%r8,%r12,1), %r8d
+ xorl %r13d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r12d
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ rorxl $11, %r8d, %ecx
+ leal (%r12,%rax,1), %r12d
+ addl 416(%rsp), %r11d
+ movl %r9d, %eax
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ xorl %ecx, %edx
+ andl %r8d, %eax
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ rorxl $13, %r12d, %ecx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ andl %eax, %ebx
+ addl %edx, %r11d
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ rorxl $11, %r15d, %ecx
+ addl %ebx, %r11d
+ addl 420(%rsp), %r10d
+ movl %r8d, %ebx
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ xorl %ecx, %edx
+ andl %r15d, %ebx
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ rorxl $13, %r11d, %ecx
+ xorl %r9d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ leal (%r14,%r10,1), %r14d
+ xorl %r11d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r10d
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ rorxl $11, %r14d, %ecx
+ leal (%r10,%rax,1), %r10d
+ addl 424(%rsp), %r9d
+ movl %r15d, %eax
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ xorl %ecx, %edx
+ andl %r14d, %eax
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ rorxl $13, %r10d, %ecx
+ xorl %r8d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ xorl %r10d, %eax
+ andl %eax, %ebx
+ addl %edx, %r9d
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ rorxl $11, %r13d, %ecx
+ addl %ebx, %r9d
+ addl 428(%rsp), %r8d
+ movl %r14d, %ebx
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ xorl %ecx, %edx
+ andl %r13d, %ebx
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ rorxl $13, %r9d, %ecx
+ xorl %r15d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ leal (%r12,%r8,1), %r12d
+ xorl %r9d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r8d
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ rorxl $11, %r12d, %ecx
+ leal (%r8,%rax,1), %r8d
+ addl 448(%rsp), %r15d
+ movl %r13d, %eax
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ xorl %ecx, %edx
+ andl %r12d, %eax
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ rorxl $13, %r8d, %ecx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ andl %eax, %ebx
+ addl %edx, %r15d
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ rorxl $11, %r11d, %ecx
+ addl %ebx, %r15d
+ addl 452(%rsp), %r14d
+ movl %r12d, %ebx
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ xorl %ecx, %edx
+ andl %r11d, %ebx
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ rorxl $13, %r15d, %ecx
+ xorl %r13d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ leal (%r10,%r14,1), %r10d
+ xorl %r15d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r14d
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ rorxl $11, %r10d, %ecx
+ leal (%r14,%rax,1), %r14d
+ addl 456(%rsp), %r13d
+ movl %r11d, %eax
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ xorl %ecx, %edx
+ andl %r10d, %eax
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ rorxl $13, %r14d, %ecx
+ xorl %r12d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ xorl %r14d, %eax
+ andl %eax, %ebx
+ addl %edx, %r13d
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ rorxl $11, %r9d, %ecx
+ addl %ebx, %r13d
+ addl 460(%rsp), %r12d
+ movl %r10d, %ebx
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ xorl %ecx, %edx
+ andl %r9d, %ebx
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ rorxl $13, %r13d, %ecx
+ xorl %r11d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ leal (%r8,%r12,1), %r8d
+ xorl %r13d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r12d
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ rorxl $11, %r8d, %ecx
+ leal (%r12,%rax,1), %r12d
+ addl 480(%rsp), %r11d
+ movl %r9d, %eax
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ xorl %ecx, %edx
+ andl %r8d, %eax
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ rorxl $13, %r12d, %ecx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ andl %eax, %ebx
+ addl %edx, %r11d
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ rorxl $11, %r15d, %ecx
+ addl %ebx, %r11d
+ addl 484(%rsp), %r10d
+ movl %r8d, %ebx
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ xorl %ecx, %edx
+ andl %r15d, %ebx
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ rorxl $13, %r11d, %ecx
+ xorl %r9d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ leal (%r14,%r10,1), %r14d
+ xorl %r11d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r10d
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ rorxl $11, %r14d, %ecx
+ leal (%r10,%rax,1), %r10d
+ addl 488(%rsp), %r9d
+ movl %r15d, %eax
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ xorl %ecx, %edx
+ andl %r14d, %eax
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ rorxl $13, %r10d, %ecx
+ xorl %r8d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ xorl %r10d, %eax
+ andl %eax, %ebx
+ addl %edx, %r9d
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ rorxl $11, %r13d, %ecx
+ addl %ebx, %r9d
+ addl 492(%rsp), %r8d
+ movl %r14d, %ebx
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ xorl %ecx, %edx
+ andl %r13d, %ebx
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ rorxl $13, %r9d, %ecx
+ xorl %r15d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ leal (%r12,%r8,1), %r12d
+ xorl %r9d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r8d
+ xorl %r10d, %eax
+ addl %eax, %r8d
+ addl %r8d, (%rdi)
+ addl %r9d, 4(%rdi)
+ addl %r10d, 8(%rdi)
+ addl %r11d, 12(%rdi)
+ addl %r12d, 16(%rdi)
+ addl %r13d, 20(%rdi)
+ addl %r14d, 24(%rdi)
+ addl %r15d, 28(%rdi)
+ xorq %rax, %rax
+ vzeroupper
+ addq $0x200, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size Transform_Sha256_AVX2_RORX,.-Transform_Sha256_AVX2_RORX
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl Transform_Sha256_AVX2_RORX_Len
+.type Transform_Sha256_AVX2_RORX_Len,@function
+.align 4
+Transform_Sha256_AVX2_RORX_Len:
+#else
+.section __TEXT,__text
+.globl _Transform_Sha256_AVX2_RORX_Len
+.p2align 2
+_Transform_Sha256_AVX2_RORX_Len:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rbp
+ movq %rsi, %rbp
+ movq %rdx, %rsi
+ subq $0x200, %rsp
+ testb $0x40, %sil
+ je L_sha256_len_avx2_rorx_block
+ vmovdqu (%rbp), %ymm0
+ vmovdqu 32(%rbp), %ymm1
+ vmovups %ymm0, 32(%rdi)
+ vmovups %ymm1, 64(%rdi)
+#ifndef __APPLE__
+ call Transform_Sha256_AVX2_RORX@plt
+#else
+ call _Transform_Sha256_AVX2_RORX
+#endif /* __APPLE__ */
+ addq $0x40, %rbp
+ subl $0x40, %esi
+ jz L_sha256_len_avx2_rorx_done
+L_sha256_len_avx2_rorx_block:
+ vmovdqa L_avx2_rorx_sha256_flip_mask(%rip), %ymm13
+ vmovdqa L_avx2_rorx_sha256_shuf_00BA(%rip), %ymm11
+ vmovdqa L_avx2_rorx_sha256_shuf_DC00(%rip), %ymm12
+ movl (%rdi), %r8d
+ movl 4(%rdi), %r9d
+ movl 8(%rdi), %r10d
+ movl 12(%rdi), %r11d
+ movl 16(%rdi), %r12d
+ movl 20(%rdi), %r13d
+ movl 24(%rdi), %r14d
+ movl 28(%rdi), %r15d
+ # Start of loop processing two blocks
+L_sha256_len_avx2_rorx_start:
+ # X0, X1, X2, X3 = W[0..15]
+ vmovdqu (%rbp), %xmm0
+ vmovdqu 16(%rbp), %xmm1
+ vinserti128 $0x01, 64(%rbp), %ymm0, %ymm0
+ vinserti128 $0x01, 80(%rbp), %ymm1, %ymm1
+ vpshufb %ymm13, %ymm0, %ymm0
+ vpshufb %ymm13, %ymm1, %ymm1
+ vpaddd 0+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4
+ vpaddd 32+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm5
+ vmovdqu %ymm4, (%rsp)
+ vmovdqu %ymm5, 32(%rsp)
+ vmovdqu 32(%rbp), %xmm2
+ vmovdqu 48(%rbp), %xmm3
+ vinserti128 $0x01, 96(%rbp), %ymm2, %ymm2
+ vinserti128 $0x01, 112(%rbp), %ymm3, %ymm3
+ vpshufb %ymm13, %ymm2, %ymm2
+ vpshufb %ymm13, %ymm3, %ymm3
+ vpaddd 64+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4
+ vpaddd 96+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm5
+ vmovdqu %ymm4, 64(%rsp)
+ vmovdqu %ymm5, 96(%rsp)
+ movl %r9d, %ebx
+ rorxl $6, %r12d, %edx
+ xorl %r10d, %ebx
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl (%rsp), %r15d
+ vpalignr $4, %ymm0, %ymm1, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ vpalignr $4, %ymm2, %ymm3, %ymm4
+ # rnd_0: 2 - 2
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 4(%rsp), %r14d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpshufd $0xfa, %ymm3, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r14d, %r10d
+ movl %r8d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ vpaddd %ymm0, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 8(%rsp), %r13d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 12(%rsp), %r12d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm0
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r12d, %r8d
+ movl %r14d, %ebx
+ vpaddd 128+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vmovdqu %ymm4, 128(%rsp)
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 32(%rsp), %r11d
+ vpalignr $4, %ymm1, %ymm2, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ vpalignr $4, %ymm3, %ymm0, %ymm4
+ # rnd_0: 2 - 2
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 36(%rsp), %r10d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpshufd $0xfa, %ymm0, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r10d, %r14d
+ movl %r12d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ vpaddd %ymm1, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 40(%rsp), %r9d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 44(%rsp), %r8d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm1
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r8d, %r12d
+ movl %r10d, %ebx
+ vpaddd 160+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vmovdqu %ymm4, 160(%rsp)
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl 64(%rsp), %r15d
+ vpalignr $4, %ymm2, %ymm3, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ vpalignr $4, %ymm0, %ymm1, %ymm4
+ # rnd_0: 2 - 2
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 68(%rsp), %r14d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpshufd $0xfa, %ymm1, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r14d, %r10d
+ movl %r8d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ vpaddd %ymm2, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 72(%rsp), %r13d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 76(%rsp), %r12d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm2
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r12d, %r8d
+ movl %r14d, %ebx
+ vpaddd 192+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vmovdqu %ymm4, 192(%rsp)
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 96(%rsp), %r11d
+ vpalignr $4, %ymm3, %ymm0, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ vpalignr $4, %ymm1, %ymm2, %ymm4
+ # rnd_0: 2 - 2
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 100(%rsp), %r10d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpshufd $0xfa, %ymm2, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r10d, %r14d
+ movl %r12d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ vpaddd %ymm3, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 104(%rsp), %r9d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 108(%rsp), %r8d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm3
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r8d, %r12d
+ movl %r10d, %ebx
+ vpaddd 224+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vmovdqu %ymm4, 224(%rsp)
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl 128(%rsp), %r15d
+ vpalignr $4, %ymm0, %ymm1, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ vpalignr $4, %ymm2, %ymm3, %ymm4
+ # rnd_0: 2 - 2
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 132(%rsp), %r14d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpshufd $0xfa, %ymm3, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r14d, %r10d
+ movl %r8d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ vpaddd %ymm0, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 136(%rsp), %r13d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 140(%rsp), %r12d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm0
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r12d, %r8d
+ movl %r14d, %ebx
+ vpaddd 256+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vmovdqu %ymm4, 256(%rsp)
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 160(%rsp), %r11d
+ vpalignr $4, %ymm1, %ymm2, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ vpalignr $4, %ymm3, %ymm0, %ymm4
+ # rnd_0: 2 - 2
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 164(%rsp), %r10d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpshufd $0xfa, %ymm0, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r10d, %r14d
+ movl %r12d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ vpaddd %ymm1, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 168(%rsp), %r9d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 172(%rsp), %r8d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm1
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r8d, %r12d
+ movl %r10d, %ebx
+ vpaddd 288+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vmovdqu %ymm4, 288(%rsp)
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl 192(%rsp), %r15d
+ vpalignr $4, %ymm2, %ymm3, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ vpalignr $4, %ymm0, %ymm1, %ymm4
+ # rnd_0: 2 - 2
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 196(%rsp), %r14d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpshufd $0xfa, %ymm1, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r14d, %r10d
+ movl %r8d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ vpaddd %ymm2, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 200(%rsp), %r13d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 204(%rsp), %r12d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm2
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r12d, %r8d
+ movl %r14d, %ebx
+ vpaddd 320+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vmovdqu %ymm4, 320(%rsp)
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 224(%rsp), %r11d
+ vpalignr $4, %ymm3, %ymm0, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ vpalignr $4, %ymm1, %ymm2, %ymm4
+ # rnd_0: 2 - 2
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 228(%rsp), %r10d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpshufd $0xfa, %ymm2, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r10d, %r14d
+ movl %r12d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ vpaddd %ymm3, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 232(%rsp), %r9d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 236(%rsp), %r8d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm3
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r8d, %r12d
+ movl %r10d, %ebx
+ vpaddd 352+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vmovdqu %ymm4, 352(%rsp)
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl 256(%rsp), %r15d
+ vpalignr $4, %ymm0, %ymm1, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ vpalignr $4, %ymm2, %ymm3, %ymm4
+ # rnd_0: 2 - 2
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 260(%rsp), %r14d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpshufd $0xfa, %ymm3, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r14d, %r10d
+ movl %r8d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ vpaddd %ymm0, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 264(%rsp), %r13d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 268(%rsp), %r12d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm0
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r12d, %r8d
+ movl %r14d, %ebx
+ vpaddd 384+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vmovdqu %ymm4, 384(%rsp)
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 288(%rsp), %r11d
+ vpalignr $4, %ymm1, %ymm2, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ vpalignr $4, %ymm3, %ymm0, %ymm4
+ # rnd_0: 2 - 2
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 292(%rsp), %r10d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpshufd $0xfa, %ymm0, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r10d, %r14d
+ movl %r12d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ vpaddd %ymm1, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 296(%rsp), %r9d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 300(%rsp), %r8d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm1
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r8d, %r12d
+ movl %r10d, %ebx
+ vpaddd 416+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vmovdqu %ymm4, 416(%rsp)
+ # rnd_0: 0 - 0
+ movl %r13d, %eax
+ rorxl $11, %r12d, %ecx
+ addl 320(%rsp), %r15d
+ vpalignr $4, %ymm2, %ymm3, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ vpalignr $4, %ymm0, %ymm1, %ymm4
+ # rnd_0: 2 - 2
+ andl %r12d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r8d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ xorl %r14d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r8d, %eax
+ addl %edx, %r15d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ addl %ebx, %r15d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r12d, %ebx
+ rorxl $11, %r11d, %ecx
+ addl 324(%rsp), %r14d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r11d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r15d, %ecx
+ vpshufd $0xfa, %ymm1, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ xorl %r13d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r14d, %r10d
+ movl %r8d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r15d, %ebx
+ addl %edx, %r14d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ addl %eax, %r14d
+ vpaddd %ymm2, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r11d, %eax
+ rorxl $11, %r10d, %ecx
+ addl 328(%rsp), %r13d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r10d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r14d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ xorl %r12d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r14d, %eax
+ addl %edx, %r13d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ addl %ebx, %r13d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r10d, %ebx
+ rorxl $11, %r9d, %ecx
+ addl 332(%rsp), %r12d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r9d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r13d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ xorl %r11d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ vpaddd %ymm4, %ymm9, %ymm2
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r12d, %r8d
+ movl %r14d, %ebx
+ vpaddd 448+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r13d, %ebx
+ addl %edx, %r12d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ addl %eax, %r12d
+ vmovdqu %ymm4, 448(%rsp)
+ # rnd_0: 0 - 0
+ movl %r9d, %eax
+ rorxl $11, %r8d, %ecx
+ addl 352(%rsp), %r11d
+ vpalignr $4, %ymm3, %ymm0, %ymm5
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ vpalignr $4, %ymm1, %ymm2, %ymm4
+ # rnd_0: 2 - 2
+ andl %r8d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r12d, %ecx
+ vpsrld $7, %ymm5, %ymm6
+ # rnd_0: 3 - 3
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ xorl %r10d, %eax
+ vpslld $25, %ymm5, %ymm7
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ vpsrld $18, %ymm5, %ymm8
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ vpslld $14, %ymm5, %ymm9
+ # rnd_0: 6 - 6
+ xorl %r12d, %eax
+ addl %edx, %r11d
+ andl %eax, %ebx
+ vpor %ymm7, %ymm6, %ymm6
+ # rnd_0: 7 - 7
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ addl %ebx, %r11d
+ vpor %ymm9, %ymm8, %ymm8
+ # rnd_1: 0 - 0
+ movl %r8d, %ebx
+ rorxl $11, %r15d, %ecx
+ addl 356(%rsp), %r10d
+ vpsrld $3, %ymm5, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ vpxor %ymm8, %ymm6, %ymm6
+ # rnd_1: 2 - 2
+ andl %r15d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r11d, %ecx
+ vpshufd $0xfa, %ymm2, %ymm7
+ # rnd_1: 3 - 3
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ xorl %r9d, %ebx
+ vpxor %ymm6, %ymm9, %ymm5
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ vpsrld $10, %ymm7, %ymm8
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r10d, %r14d
+ movl %r12d, %ebx
+ vpsrlq $19, %ymm7, %ymm6
+ # rnd_1: 6 - 6
+ xorl %r11d, %ebx
+ addl %edx, %r10d
+ andl %ebx, %eax
+ vpsrlq $0x11, %ymm7, %ymm7
+ # rnd_1: 7 - 7
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ addl %eax, %r10d
+ vpaddd %ymm3, %ymm4, %ymm4
+ # rnd_0: 0 - 0
+ movl %r15d, %eax
+ rorxl $11, %r14d, %ecx
+ addl 360(%rsp), %r9d
+ vpxor %ymm7, %ymm6, %ymm6
+ # rnd_0: 1 - 1
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ vpxor %ymm6, %ymm8, %ymm8
+ # rnd_0: 2 - 2
+ andl %r14d, %eax
+ xorl %ecx, %edx
+ rorxl $13, %r10d, %ecx
+ vpaddd %ymm5, %ymm4, %ymm4
+ # rnd_0: 3 - 3
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ xorl %r8d, %eax
+ vpshufb %ymm11, %ymm8, %ymm8
+ # rnd_0: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ vpaddd %ymm8, %ymm4, %ymm4
+ # rnd_0: 5 - 5
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ vpshufd $0x50, %ymm4, %ymm6
+ # rnd_0: 6 - 6
+ xorl %r10d, %eax
+ addl %edx, %r9d
+ andl %eax, %ebx
+ vpsrlq $0x11, %ymm6, %ymm8
+ # rnd_0: 7 - 7
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ addl %ebx, %r9d
+ vpsrlq $19, %ymm6, %ymm7
+ # rnd_1: 0 - 0
+ movl %r14d, %ebx
+ rorxl $11, %r13d, %ecx
+ addl 364(%rsp), %r8d
+ vpsrld $10, %ymm6, %ymm9
+ # rnd_1: 1 - 1
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ vpxor %ymm7, %ymm8, %ymm8
+ # rnd_1: 2 - 2
+ andl %r13d, %ebx
+ xorl %ecx, %edx
+ rorxl $13, %r9d, %ecx
+ vpxor %ymm8, %ymm9, %ymm9
+ # rnd_1: 3 - 3
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ xorl %r15d, %ebx
+ vpshufb %ymm12, %ymm9, %ymm9
+ # rnd_1: 4 - 4
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ vpaddd %ymm4, %ymm9, %ymm3
+ # rnd_1: 5 - 5
+ xorl %ecx, %edx
+ addl %r8d, %r12d
+ movl %r10d, %ebx
+ vpaddd 480+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4
+ # rnd_1: 6 - 6
+ xorl %r9d, %ebx
+ addl %edx, %r8d
+ andl %ebx, %eax
+ # rnd_1: 7 - 7
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ addl %eax, %r8d
+ vmovdqu %ymm4, 480(%rsp)
+ xorl %eax, %eax
+ xorl %ecx, %ecx
+ rorxl $6, %r12d, %edx
+ rorxl $11, %r12d, %ecx
+ leal (%r8,%rax,1), %r8d
+ addl 384(%rsp), %r15d
+ movl %r13d, %eax
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ xorl %ecx, %edx
+ andl %r12d, %eax
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ rorxl $13, %r8d, %ecx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ andl %eax, %ebx
+ addl %edx, %r15d
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ rorxl $11, %r11d, %ecx
+ addl %ebx, %r15d
+ addl 388(%rsp), %r14d
+ movl %r12d, %ebx
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ xorl %ecx, %edx
+ andl %r11d, %ebx
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ rorxl $13, %r15d, %ecx
+ xorl %r13d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ leal (%r10,%r14,1), %r10d
+ xorl %r15d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r14d
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ rorxl $11, %r10d, %ecx
+ leal (%r14,%rax,1), %r14d
+ addl 392(%rsp), %r13d
+ movl %r11d, %eax
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ xorl %ecx, %edx
+ andl %r10d, %eax
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ rorxl $13, %r14d, %ecx
+ xorl %r12d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ xorl %r14d, %eax
+ andl %eax, %ebx
+ addl %edx, %r13d
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ rorxl $11, %r9d, %ecx
+ addl %ebx, %r13d
+ addl 396(%rsp), %r12d
+ movl %r10d, %ebx
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ xorl %ecx, %edx
+ andl %r9d, %ebx
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ rorxl $13, %r13d, %ecx
+ xorl %r11d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ leal (%r8,%r12,1), %r8d
+ xorl %r13d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r12d
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ rorxl $11, %r8d, %ecx
+ leal (%r12,%rax,1), %r12d
+ addl 416(%rsp), %r11d
+ movl %r9d, %eax
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ xorl %ecx, %edx
+ andl %r8d, %eax
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ rorxl $13, %r12d, %ecx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ andl %eax, %ebx
+ addl %edx, %r11d
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ rorxl $11, %r15d, %ecx
+ addl %ebx, %r11d
+ addl 420(%rsp), %r10d
+ movl %r8d, %ebx
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ xorl %ecx, %edx
+ andl %r15d, %ebx
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ rorxl $13, %r11d, %ecx
+ xorl %r9d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ leal (%r14,%r10,1), %r14d
+ xorl %r11d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r10d
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ rorxl $11, %r14d, %ecx
+ leal (%r10,%rax,1), %r10d
+ addl 424(%rsp), %r9d
+ movl %r15d, %eax
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ xorl %ecx, %edx
+ andl %r14d, %eax
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ rorxl $13, %r10d, %ecx
+ xorl %r8d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ xorl %r10d, %eax
+ andl %eax, %ebx
+ addl %edx, %r9d
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ rorxl $11, %r13d, %ecx
+ addl %ebx, %r9d
+ addl 428(%rsp), %r8d
+ movl %r14d, %ebx
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ xorl %ecx, %edx
+ andl %r13d, %ebx
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ rorxl $13, %r9d, %ecx
+ xorl %r15d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ leal (%r12,%r8,1), %r12d
+ xorl %r9d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r8d
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ rorxl $11, %r12d, %ecx
+ leal (%r8,%rax,1), %r8d
+ addl 448(%rsp), %r15d
+ movl %r13d, %eax
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ xorl %ecx, %edx
+ andl %r12d, %eax
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ rorxl $13, %r8d, %ecx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ andl %eax, %ebx
+ addl %edx, %r15d
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ rorxl $11, %r11d, %ecx
+ addl %ebx, %r15d
+ addl 452(%rsp), %r14d
+ movl %r12d, %ebx
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ xorl %ecx, %edx
+ andl %r11d, %ebx
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ rorxl $13, %r15d, %ecx
+ xorl %r13d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ leal (%r10,%r14,1), %r10d
+ xorl %r15d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r14d
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ rorxl $11, %r10d, %ecx
+ leal (%r14,%rax,1), %r14d
+ addl 456(%rsp), %r13d
+ movl %r11d, %eax
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ xorl %ecx, %edx
+ andl %r10d, %eax
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ rorxl $13, %r14d, %ecx
+ xorl %r12d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ xorl %r14d, %eax
+ andl %eax, %ebx
+ addl %edx, %r13d
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ rorxl $11, %r9d, %ecx
+ addl %ebx, %r13d
+ addl 460(%rsp), %r12d
+ movl %r10d, %ebx
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ xorl %ecx, %edx
+ andl %r9d, %ebx
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ rorxl $13, %r13d, %ecx
+ xorl %r11d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ leal (%r8,%r12,1), %r8d
+ xorl %r13d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r12d
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ rorxl $11, %r8d, %ecx
+ leal (%r12,%rax,1), %r12d
+ addl 480(%rsp), %r11d
+ movl %r9d, %eax
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ xorl %ecx, %edx
+ andl %r8d, %eax
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ rorxl $13, %r12d, %ecx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ andl %eax, %ebx
+ addl %edx, %r11d
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ rorxl $11, %r15d, %ecx
+ addl %ebx, %r11d
+ addl 484(%rsp), %r10d
+ movl %r8d, %ebx
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ xorl %ecx, %edx
+ andl %r15d, %ebx
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ rorxl $13, %r11d, %ecx
+ xorl %r9d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ leal (%r14,%r10,1), %r14d
+ xorl %r11d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r10d
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ rorxl $11, %r14d, %ecx
+ leal (%r10,%rax,1), %r10d
+ addl 488(%rsp), %r9d
+ movl %r15d, %eax
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ xorl %ecx, %edx
+ andl %r14d, %eax
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ rorxl $13, %r10d, %ecx
+ xorl %r8d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ xorl %r10d, %eax
+ andl %eax, %ebx
+ addl %edx, %r9d
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ rorxl $11, %r13d, %ecx
+ addl %ebx, %r9d
+ addl 492(%rsp), %r8d
+ movl %r14d, %ebx
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ xorl %ecx, %edx
+ andl %r13d, %ebx
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ rorxl $13, %r9d, %ecx
+ xorl %r15d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ leal (%r12,%r8,1), %r12d
+ xorl %r9d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r8d
+ xorl %r10d, %eax
+ addl %eax, %r8d
+ xorl %ecx, %ecx
+ addl (%rdi), %r8d
+ addl 4(%rdi), %r9d
+ addl 8(%rdi), %r10d
+ addl 12(%rdi), %r11d
+ addl 16(%rdi), %r12d
+ addl 20(%rdi), %r13d
+ addl 24(%rdi), %r14d
+ addl 28(%rdi), %r15d
+ movl %r8d, (%rdi)
+ movl %r9d, 4(%rdi)
+ movl %r10d, 8(%rdi)
+ movl %r11d, 12(%rdi)
+ movl %r12d, 16(%rdi)
+ movl %r13d, 20(%rdi)
+ movl %r14d, 24(%rdi)
+ movl %r15d, 28(%rdi)
+ movl %r9d, %ebx
+ xorl %eax, %eax
+ xorl %r10d, %ebx
+ rorxl $6, %r12d, %edx
+ rorxl $11, %r12d, %ecx
+ leal (%r8,%rax,1), %r8d
+ addl 16(%rsp), %r15d
+ movl %r13d, %eax
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ xorl %ecx, %edx
+ andl %r12d, %eax
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ rorxl $13, %r8d, %ecx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ andl %eax, %ebx
+ addl %edx, %r15d
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ rorxl $11, %r11d, %ecx
+ addl %ebx, %r15d
+ addl 20(%rsp), %r14d
+ movl %r12d, %ebx
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ xorl %ecx, %edx
+ andl %r11d, %ebx
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ rorxl $13, %r15d, %ecx
+ xorl %r13d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ leal (%r10,%r14,1), %r10d
+ xorl %r15d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r14d
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ rorxl $11, %r10d, %ecx
+ leal (%r14,%rax,1), %r14d
+ addl 24(%rsp), %r13d
+ movl %r11d, %eax
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ xorl %ecx, %edx
+ andl %r10d, %eax
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ rorxl $13, %r14d, %ecx
+ xorl %r12d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ xorl %r14d, %eax
+ andl %eax, %ebx
+ addl %edx, %r13d
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ rorxl $11, %r9d, %ecx
+ addl %ebx, %r13d
+ addl 28(%rsp), %r12d
+ movl %r10d, %ebx
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ xorl %ecx, %edx
+ andl %r9d, %ebx
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ rorxl $13, %r13d, %ecx
+ xorl %r11d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ leal (%r8,%r12,1), %r8d
+ xorl %r13d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r12d
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ rorxl $11, %r8d, %ecx
+ leal (%r12,%rax,1), %r12d
+ addl 48(%rsp), %r11d
+ movl %r9d, %eax
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ xorl %ecx, %edx
+ andl %r8d, %eax
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ rorxl $13, %r12d, %ecx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ andl %eax, %ebx
+ addl %edx, %r11d
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ rorxl $11, %r15d, %ecx
+ addl %ebx, %r11d
+ addl 52(%rsp), %r10d
+ movl %r8d, %ebx
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ xorl %ecx, %edx
+ andl %r15d, %ebx
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ rorxl $13, %r11d, %ecx
+ xorl %r9d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ leal (%r14,%r10,1), %r14d
+ xorl %r11d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r10d
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ rorxl $11, %r14d, %ecx
+ leal (%r10,%rax,1), %r10d
+ addl 56(%rsp), %r9d
+ movl %r15d, %eax
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ xorl %ecx, %edx
+ andl %r14d, %eax
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ rorxl $13, %r10d, %ecx
+ xorl %r8d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ xorl %r10d, %eax
+ andl %eax, %ebx
+ addl %edx, %r9d
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ rorxl $11, %r13d, %ecx
+ addl %ebx, %r9d
+ addl 60(%rsp), %r8d
+ movl %r14d, %ebx
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ xorl %ecx, %edx
+ andl %r13d, %ebx
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ rorxl $13, %r9d, %ecx
+ xorl %r15d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ leal (%r12,%r8,1), %r12d
+ xorl %r9d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r8d
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ rorxl $11, %r12d, %ecx
+ leal (%r8,%rax,1), %r8d
+ addl 80(%rsp), %r15d
+ movl %r13d, %eax
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ xorl %ecx, %edx
+ andl %r12d, %eax
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ rorxl $13, %r8d, %ecx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ andl %eax, %ebx
+ addl %edx, %r15d
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ rorxl $11, %r11d, %ecx
+ addl %ebx, %r15d
+ addl 84(%rsp), %r14d
+ movl %r12d, %ebx
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ xorl %ecx, %edx
+ andl %r11d, %ebx
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ rorxl $13, %r15d, %ecx
+ xorl %r13d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ leal (%r10,%r14,1), %r10d
+ xorl %r15d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r14d
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ rorxl $11, %r10d, %ecx
+ leal (%r14,%rax,1), %r14d
+ addl 88(%rsp), %r13d
+ movl %r11d, %eax
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ xorl %ecx, %edx
+ andl %r10d, %eax
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ rorxl $13, %r14d, %ecx
+ xorl %r12d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ xorl %r14d, %eax
+ andl %eax, %ebx
+ addl %edx, %r13d
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ rorxl $11, %r9d, %ecx
+ addl %ebx, %r13d
+ addl 92(%rsp), %r12d
+ movl %r10d, %ebx
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ xorl %ecx, %edx
+ andl %r9d, %ebx
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ rorxl $13, %r13d, %ecx
+ xorl %r11d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ leal (%r8,%r12,1), %r8d
+ xorl %r13d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r12d
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ rorxl $11, %r8d, %ecx
+ leal (%r12,%rax,1), %r12d
+ addl 112(%rsp), %r11d
+ movl %r9d, %eax
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ xorl %ecx, %edx
+ andl %r8d, %eax
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ rorxl $13, %r12d, %ecx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ andl %eax, %ebx
+ addl %edx, %r11d
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ rorxl $11, %r15d, %ecx
+ addl %ebx, %r11d
+ addl 116(%rsp), %r10d
+ movl %r8d, %ebx
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ xorl %ecx, %edx
+ andl %r15d, %ebx
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ rorxl $13, %r11d, %ecx
+ xorl %r9d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ leal (%r14,%r10,1), %r14d
+ xorl %r11d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r10d
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ rorxl $11, %r14d, %ecx
+ leal (%r10,%rax,1), %r10d
+ addl 120(%rsp), %r9d
+ movl %r15d, %eax
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ xorl %ecx, %edx
+ andl %r14d, %eax
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ rorxl $13, %r10d, %ecx
+ xorl %r8d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ xorl %r10d, %eax
+ andl %eax, %ebx
+ addl %edx, %r9d
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ rorxl $11, %r13d, %ecx
+ addl %ebx, %r9d
+ addl 124(%rsp), %r8d
+ movl %r14d, %ebx
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ xorl %ecx, %edx
+ andl %r13d, %ebx
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ rorxl $13, %r9d, %ecx
+ xorl %r15d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ leal (%r12,%r8,1), %r12d
+ xorl %r9d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r8d
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ rorxl $11, %r12d, %ecx
+ leal (%r8,%rax,1), %r8d
+ addl 144(%rsp), %r15d
+ movl %r13d, %eax
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ xorl %ecx, %edx
+ andl %r12d, %eax
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ rorxl $13, %r8d, %ecx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ andl %eax, %ebx
+ addl %edx, %r15d
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ rorxl $11, %r11d, %ecx
+ addl %ebx, %r15d
+ addl 148(%rsp), %r14d
+ movl %r12d, %ebx
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ xorl %ecx, %edx
+ andl %r11d, %ebx
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ rorxl $13, %r15d, %ecx
+ xorl %r13d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ leal (%r10,%r14,1), %r10d
+ xorl %r15d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r14d
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ rorxl $11, %r10d, %ecx
+ leal (%r14,%rax,1), %r14d
+ addl 152(%rsp), %r13d
+ movl %r11d, %eax
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ xorl %ecx, %edx
+ andl %r10d, %eax
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ rorxl $13, %r14d, %ecx
+ xorl %r12d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ xorl %r14d, %eax
+ andl %eax, %ebx
+ addl %edx, %r13d
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ rorxl $11, %r9d, %ecx
+ addl %ebx, %r13d
+ addl 156(%rsp), %r12d
+ movl %r10d, %ebx
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ xorl %ecx, %edx
+ andl %r9d, %ebx
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ rorxl $13, %r13d, %ecx
+ xorl %r11d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ leal (%r8,%r12,1), %r8d
+ xorl %r13d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r12d
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ rorxl $11, %r8d, %ecx
+ leal (%r12,%rax,1), %r12d
+ addl 176(%rsp), %r11d
+ movl %r9d, %eax
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ xorl %ecx, %edx
+ andl %r8d, %eax
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ rorxl $13, %r12d, %ecx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ andl %eax, %ebx
+ addl %edx, %r11d
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ rorxl $11, %r15d, %ecx
+ addl %ebx, %r11d
+ addl 180(%rsp), %r10d
+ movl %r8d, %ebx
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ xorl %ecx, %edx
+ andl %r15d, %ebx
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ rorxl $13, %r11d, %ecx
+ xorl %r9d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ leal (%r14,%r10,1), %r14d
+ xorl %r11d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r10d
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ rorxl $11, %r14d, %ecx
+ leal (%r10,%rax,1), %r10d
+ addl 184(%rsp), %r9d
+ movl %r15d, %eax
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ xorl %ecx, %edx
+ andl %r14d, %eax
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ rorxl $13, %r10d, %ecx
+ xorl %r8d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ xorl %r10d, %eax
+ andl %eax, %ebx
+ addl %edx, %r9d
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ rorxl $11, %r13d, %ecx
+ addl %ebx, %r9d
+ addl 188(%rsp), %r8d
+ movl %r14d, %ebx
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ xorl %ecx, %edx
+ andl %r13d, %ebx
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ rorxl $13, %r9d, %ecx
+ xorl %r15d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ leal (%r12,%r8,1), %r12d
+ xorl %r9d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r8d
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ rorxl $11, %r12d, %ecx
+ leal (%r8,%rax,1), %r8d
+ addl 208(%rsp), %r15d
+ movl %r13d, %eax
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ xorl %ecx, %edx
+ andl %r12d, %eax
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ rorxl $13, %r8d, %ecx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ andl %eax, %ebx
+ addl %edx, %r15d
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ rorxl $11, %r11d, %ecx
+ addl %ebx, %r15d
+ addl 212(%rsp), %r14d
+ movl %r12d, %ebx
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ xorl %ecx, %edx
+ andl %r11d, %ebx
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ rorxl $13, %r15d, %ecx
+ xorl %r13d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ leal (%r10,%r14,1), %r10d
+ xorl %r15d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r14d
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ rorxl $11, %r10d, %ecx
+ leal (%r14,%rax,1), %r14d
+ addl 216(%rsp), %r13d
+ movl %r11d, %eax
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ xorl %ecx, %edx
+ andl %r10d, %eax
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ rorxl $13, %r14d, %ecx
+ xorl %r12d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ xorl %r14d, %eax
+ andl %eax, %ebx
+ addl %edx, %r13d
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ rorxl $11, %r9d, %ecx
+ addl %ebx, %r13d
+ addl 220(%rsp), %r12d
+ movl %r10d, %ebx
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ xorl %ecx, %edx
+ andl %r9d, %ebx
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ rorxl $13, %r13d, %ecx
+ xorl %r11d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ leal (%r8,%r12,1), %r8d
+ xorl %r13d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r12d
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ rorxl $11, %r8d, %ecx
+ leal (%r12,%rax,1), %r12d
+ addl 240(%rsp), %r11d
+ movl %r9d, %eax
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ xorl %ecx, %edx
+ andl %r8d, %eax
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ rorxl $13, %r12d, %ecx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ andl %eax, %ebx
+ addl %edx, %r11d
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ rorxl $11, %r15d, %ecx
+ addl %ebx, %r11d
+ addl 244(%rsp), %r10d
+ movl %r8d, %ebx
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ xorl %ecx, %edx
+ andl %r15d, %ebx
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ rorxl $13, %r11d, %ecx
+ xorl %r9d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ leal (%r14,%r10,1), %r14d
+ xorl %r11d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r10d
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ rorxl $11, %r14d, %ecx
+ leal (%r10,%rax,1), %r10d
+ addl 248(%rsp), %r9d
+ movl %r15d, %eax
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ xorl %ecx, %edx
+ andl %r14d, %eax
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ rorxl $13, %r10d, %ecx
+ xorl %r8d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ xorl %r10d, %eax
+ andl %eax, %ebx
+ addl %edx, %r9d
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ rorxl $11, %r13d, %ecx
+ addl %ebx, %r9d
+ addl 252(%rsp), %r8d
+ movl %r14d, %ebx
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ xorl %ecx, %edx
+ andl %r13d, %ebx
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ rorxl $13, %r9d, %ecx
+ xorl %r15d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ leal (%r12,%r8,1), %r12d
+ xorl %r9d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r8d
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ rorxl $11, %r12d, %ecx
+ leal (%r8,%rax,1), %r8d
+ addl 272(%rsp), %r15d
+ movl %r13d, %eax
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ xorl %ecx, %edx
+ andl %r12d, %eax
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ rorxl $13, %r8d, %ecx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ andl %eax, %ebx
+ addl %edx, %r15d
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ rorxl $11, %r11d, %ecx
+ addl %ebx, %r15d
+ addl 276(%rsp), %r14d
+ movl %r12d, %ebx
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ xorl %ecx, %edx
+ andl %r11d, %ebx
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ rorxl $13, %r15d, %ecx
+ xorl %r13d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ leal (%r10,%r14,1), %r10d
+ xorl %r15d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r14d
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ rorxl $11, %r10d, %ecx
+ leal (%r14,%rax,1), %r14d
+ addl 280(%rsp), %r13d
+ movl %r11d, %eax
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ xorl %ecx, %edx
+ andl %r10d, %eax
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ rorxl $13, %r14d, %ecx
+ xorl %r12d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ xorl %r14d, %eax
+ andl %eax, %ebx
+ addl %edx, %r13d
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ rorxl $11, %r9d, %ecx
+ addl %ebx, %r13d
+ addl 284(%rsp), %r12d
+ movl %r10d, %ebx
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ xorl %ecx, %edx
+ andl %r9d, %ebx
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ rorxl $13, %r13d, %ecx
+ xorl %r11d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ leal (%r8,%r12,1), %r8d
+ xorl %r13d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r12d
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ rorxl $11, %r8d, %ecx
+ leal (%r12,%rax,1), %r12d
+ addl 304(%rsp), %r11d
+ movl %r9d, %eax
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ xorl %ecx, %edx
+ andl %r8d, %eax
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ rorxl $13, %r12d, %ecx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ andl %eax, %ebx
+ addl %edx, %r11d
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ rorxl $11, %r15d, %ecx
+ addl %ebx, %r11d
+ addl 308(%rsp), %r10d
+ movl %r8d, %ebx
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ xorl %ecx, %edx
+ andl %r15d, %ebx
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ rorxl $13, %r11d, %ecx
+ xorl %r9d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ leal (%r14,%r10,1), %r14d
+ xorl %r11d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r10d
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ rorxl $11, %r14d, %ecx
+ leal (%r10,%rax,1), %r10d
+ addl 312(%rsp), %r9d
+ movl %r15d, %eax
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ xorl %ecx, %edx
+ andl %r14d, %eax
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ rorxl $13, %r10d, %ecx
+ xorl %r8d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ xorl %r10d, %eax
+ andl %eax, %ebx
+ addl %edx, %r9d
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ rorxl $11, %r13d, %ecx
+ addl %ebx, %r9d
+ addl 316(%rsp), %r8d
+ movl %r14d, %ebx
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ xorl %ecx, %edx
+ andl %r13d, %ebx
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ rorxl $13, %r9d, %ecx
+ xorl %r15d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ leal (%r12,%r8,1), %r12d
+ xorl %r9d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r8d
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ rorxl $11, %r12d, %ecx
+ leal (%r8,%rax,1), %r8d
+ addl 336(%rsp), %r15d
+ movl %r13d, %eax
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ xorl %ecx, %edx
+ andl %r12d, %eax
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ rorxl $13, %r8d, %ecx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ andl %eax, %ebx
+ addl %edx, %r15d
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ rorxl $11, %r11d, %ecx
+ addl %ebx, %r15d
+ addl 340(%rsp), %r14d
+ movl %r12d, %ebx
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ xorl %ecx, %edx
+ andl %r11d, %ebx
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ rorxl $13, %r15d, %ecx
+ xorl %r13d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ leal (%r10,%r14,1), %r10d
+ xorl %r15d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r14d
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ rorxl $11, %r10d, %ecx
+ leal (%r14,%rax,1), %r14d
+ addl 344(%rsp), %r13d
+ movl %r11d, %eax
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ xorl %ecx, %edx
+ andl %r10d, %eax
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ rorxl $13, %r14d, %ecx
+ xorl %r12d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ xorl %r14d, %eax
+ andl %eax, %ebx
+ addl %edx, %r13d
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ rorxl $11, %r9d, %ecx
+ addl %ebx, %r13d
+ addl 348(%rsp), %r12d
+ movl %r10d, %ebx
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ xorl %ecx, %edx
+ andl %r9d, %ebx
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ rorxl $13, %r13d, %ecx
+ xorl %r11d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ leal (%r8,%r12,1), %r8d
+ xorl %r13d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r12d
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ rorxl $11, %r8d, %ecx
+ leal (%r12,%rax,1), %r12d
+ addl 368(%rsp), %r11d
+ movl %r9d, %eax
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ xorl %ecx, %edx
+ andl %r8d, %eax
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ rorxl $13, %r12d, %ecx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ andl %eax, %ebx
+ addl %edx, %r11d
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ rorxl $11, %r15d, %ecx
+ addl %ebx, %r11d
+ addl 372(%rsp), %r10d
+ movl %r8d, %ebx
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ xorl %ecx, %edx
+ andl %r15d, %ebx
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ rorxl $13, %r11d, %ecx
+ xorl %r9d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ leal (%r14,%r10,1), %r14d
+ xorl %r11d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r10d
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ rorxl $11, %r14d, %ecx
+ leal (%r10,%rax,1), %r10d
+ addl 376(%rsp), %r9d
+ movl %r15d, %eax
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ xorl %ecx, %edx
+ andl %r14d, %eax
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ rorxl $13, %r10d, %ecx
+ xorl %r8d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ xorl %r10d, %eax
+ andl %eax, %ebx
+ addl %edx, %r9d
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ rorxl $11, %r13d, %ecx
+ addl %ebx, %r9d
+ addl 380(%rsp), %r8d
+ movl %r14d, %ebx
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ xorl %ecx, %edx
+ andl %r13d, %ebx
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ rorxl $13, %r9d, %ecx
+ xorl %r15d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ leal (%r12,%r8,1), %r12d
+ xorl %r9d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r8d
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ rorxl $11, %r12d, %ecx
+ leal (%r8,%rax,1), %r8d
+ addl 400(%rsp), %r15d
+ movl %r13d, %eax
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ xorl %ecx, %edx
+ andl %r12d, %eax
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ rorxl $13, %r8d, %ecx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ andl %eax, %ebx
+ addl %edx, %r15d
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ rorxl $11, %r11d, %ecx
+ addl %ebx, %r15d
+ addl 404(%rsp), %r14d
+ movl %r12d, %ebx
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ xorl %ecx, %edx
+ andl %r11d, %ebx
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ rorxl $13, %r15d, %ecx
+ xorl %r13d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ leal (%r10,%r14,1), %r10d
+ xorl %r15d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r14d
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ rorxl $11, %r10d, %ecx
+ leal (%r14,%rax,1), %r14d
+ addl 408(%rsp), %r13d
+ movl %r11d, %eax
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ xorl %ecx, %edx
+ andl %r10d, %eax
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ rorxl $13, %r14d, %ecx
+ xorl %r12d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ xorl %r14d, %eax
+ andl %eax, %ebx
+ addl %edx, %r13d
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ rorxl $11, %r9d, %ecx
+ addl %ebx, %r13d
+ addl 412(%rsp), %r12d
+ movl %r10d, %ebx
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ xorl %ecx, %edx
+ andl %r9d, %ebx
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ rorxl $13, %r13d, %ecx
+ xorl %r11d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ leal (%r8,%r12,1), %r8d
+ xorl %r13d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r12d
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ rorxl $11, %r8d, %ecx
+ leal (%r12,%rax,1), %r12d
+ addl 432(%rsp), %r11d
+ movl %r9d, %eax
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ xorl %ecx, %edx
+ andl %r8d, %eax
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ rorxl $13, %r12d, %ecx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ andl %eax, %ebx
+ addl %edx, %r11d
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ rorxl $11, %r15d, %ecx
+ addl %ebx, %r11d
+ addl 436(%rsp), %r10d
+ movl %r8d, %ebx
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ xorl %ecx, %edx
+ andl %r15d, %ebx
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ rorxl $13, %r11d, %ecx
+ xorl %r9d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ leal (%r14,%r10,1), %r14d
+ xorl %r11d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r10d
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ rorxl $11, %r14d, %ecx
+ leal (%r10,%rax,1), %r10d
+ addl 440(%rsp), %r9d
+ movl %r15d, %eax
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ xorl %ecx, %edx
+ andl %r14d, %eax
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ rorxl $13, %r10d, %ecx
+ xorl %r8d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ xorl %r10d, %eax
+ andl %eax, %ebx
+ addl %edx, %r9d
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ rorxl $11, %r13d, %ecx
+ addl %ebx, %r9d
+ addl 444(%rsp), %r8d
+ movl %r14d, %ebx
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ xorl %ecx, %edx
+ andl %r13d, %ebx
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ rorxl $13, %r9d, %ecx
+ xorl %r15d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ leal (%r12,%r8,1), %r12d
+ xorl %r9d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r8d
+ xorl %r10d, %eax
+ rorxl $6, %r12d, %edx
+ rorxl $11, %r12d, %ecx
+ leal (%r8,%rax,1), %r8d
+ addl 464(%rsp), %r15d
+ movl %r13d, %eax
+ xorl %edx, %ecx
+ xorl %r14d, %eax
+ rorxl $25, %r12d, %edx
+ xorl %ecx, %edx
+ andl %r12d, %eax
+ addl %edx, %r15d
+ rorxl $2, %r8d, %edx
+ rorxl $13, %r8d, %ecx
+ xorl %r14d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r8d, %edx
+ addl %eax, %r15d
+ xorl %ecx, %edx
+ movl %r9d, %eax
+ addl %r15d, %r11d
+ xorl %r8d, %eax
+ andl %eax, %ebx
+ addl %edx, %r15d
+ xorl %r9d, %ebx
+ rorxl $6, %r11d, %edx
+ rorxl $11, %r11d, %ecx
+ addl %ebx, %r15d
+ addl 468(%rsp), %r14d
+ movl %r12d, %ebx
+ xorl %edx, %ecx
+ xorl %r13d, %ebx
+ rorxl $25, %r11d, %edx
+ xorl %ecx, %edx
+ andl %r11d, %ebx
+ addl %edx, %r14d
+ rorxl $2, %r15d, %edx
+ rorxl $13, %r15d, %ecx
+ xorl %r13d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r15d, %edx
+ addl %ebx, %r14d
+ xorl %ecx, %edx
+ movl %r8d, %ebx
+ leal (%r10,%r14,1), %r10d
+ xorl %r15d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r14d
+ xorl %r8d, %eax
+ rorxl $6, %r10d, %edx
+ rorxl $11, %r10d, %ecx
+ leal (%r14,%rax,1), %r14d
+ addl 472(%rsp), %r13d
+ movl %r11d, %eax
+ xorl %edx, %ecx
+ xorl %r12d, %eax
+ rorxl $25, %r10d, %edx
+ xorl %ecx, %edx
+ andl %r10d, %eax
+ addl %edx, %r13d
+ rorxl $2, %r14d, %edx
+ rorxl $13, %r14d, %ecx
+ xorl %r12d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r14d, %edx
+ addl %eax, %r13d
+ xorl %ecx, %edx
+ movl %r15d, %eax
+ addl %r13d, %r9d
+ xorl %r14d, %eax
+ andl %eax, %ebx
+ addl %edx, %r13d
+ xorl %r15d, %ebx
+ rorxl $6, %r9d, %edx
+ rorxl $11, %r9d, %ecx
+ addl %ebx, %r13d
+ addl 476(%rsp), %r12d
+ movl %r10d, %ebx
+ xorl %edx, %ecx
+ xorl %r11d, %ebx
+ rorxl $25, %r9d, %edx
+ xorl %ecx, %edx
+ andl %r9d, %ebx
+ addl %edx, %r12d
+ rorxl $2, %r13d, %edx
+ rorxl $13, %r13d, %ecx
+ xorl %r11d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r13d, %edx
+ addl %ebx, %r12d
+ xorl %ecx, %edx
+ movl %r14d, %ebx
+ leal (%r8,%r12,1), %r8d
+ xorl %r13d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r12d
+ xorl %r14d, %eax
+ rorxl $6, %r8d, %edx
+ rorxl $11, %r8d, %ecx
+ leal (%r12,%rax,1), %r12d
+ addl 496(%rsp), %r11d
+ movl %r9d, %eax
+ xorl %edx, %ecx
+ xorl %r10d, %eax
+ rorxl $25, %r8d, %edx
+ xorl %ecx, %edx
+ andl %r8d, %eax
+ addl %edx, %r11d
+ rorxl $2, %r12d, %edx
+ rorxl $13, %r12d, %ecx
+ xorl %r10d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r12d, %edx
+ addl %eax, %r11d
+ xorl %ecx, %edx
+ movl %r13d, %eax
+ addl %r11d, %r15d
+ xorl %r12d, %eax
+ andl %eax, %ebx
+ addl %edx, %r11d
+ xorl %r13d, %ebx
+ rorxl $6, %r15d, %edx
+ rorxl $11, %r15d, %ecx
+ addl %ebx, %r11d
+ addl 500(%rsp), %r10d
+ movl %r8d, %ebx
+ xorl %edx, %ecx
+ xorl %r9d, %ebx
+ rorxl $25, %r15d, %edx
+ xorl %ecx, %edx
+ andl %r15d, %ebx
+ addl %edx, %r10d
+ rorxl $2, %r11d, %edx
+ rorxl $13, %r11d, %ecx
+ xorl %r9d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r11d, %edx
+ addl %ebx, %r10d
+ xorl %ecx, %edx
+ movl %r12d, %ebx
+ leal (%r14,%r10,1), %r14d
+ xorl %r11d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r10d
+ xorl %r12d, %eax
+ rorxl $6, %r14d, %edx
+ rorxl $11, %r14d, %ecx
+ leal (%r10,%rax,1), %r10d
+ addl 504(%rsp), %r9d
+ movl %r15d, %eax
+ xorl %edx, %ecx
+ xorl %r8d, %eax
+ rorxl $25, %r14d, %edx
+ xorl %ecx, %edx
+ andl %r14d, %eax
+ addl %edx, %r9d
+ rorxl $2, %r10d, %edx
+ rorxl $13, %r10d, %ecx
+ xorl %r8d, %eax
+ xorl %edx, %ecx
+ rorxl $22, %r10d, %edx
+ addl %eax, %r9d
+ xorl %ecx, %edx
+ movl %r11d, %eax
+ addl %r9d, %r13d
+ xorl %r10d, %eax
+ andl %eax, %ebx
+ addl %edx, %r9d
+ xorl %r11d, %ebx
+ rorxl $6, %r13d, %edx
+ rorxl $11, %r13d, %ecx
+ addl %ebx, %r9d
+ addl 508(%rsp), %r8d
+ movl %r14d, %ebx
+ xorl %edx, %ecx
+ xorl %r15d, %ebx
+ rorxl $25, %r13d, %edx
+ xorl %ecx, %edx
+ andl %r13d, %ebx
+ addl %edx, %r8d
+ rorxl $2, %r9d, %edx
+ rorxl $13, %r9d, %ecx
+ xorl %r15d, %ebx
+ xorl %edx, %ecx
+ rorxl $22, %r9d, %edx
+ addl %ebx, %r8d
+ xorl %ecx, %edx
+ movl %r10d, %ebx
+ leal (%r12,%r8,1), %r12d
+ xorl %r9d, %ebx
+ andl %ebx, %eax
+ addl %edx, %r8d
+ xorl %r10d, %eax
+ addl %eax, %r8d
+ addq $0x80, %rbp
+ addl (%rdi), %r8d
+ addl 4(%rdi), %r9d
+ addl 8(%rdi), %r10d
+ addl 12(%rdi), %r11d
+ addl 16(%rdi), %r12d
+ addl 20(%rdi), %r13d
+ addl 24(%rdi), %r14d
+ addl 28(%rdi), %r15d
+ subl $0x80, %esi
+ movl %r8d, (%rdi)
+ movl %r9d, 4(%rdi)
+ movl %r10d, 8(%rdi)
+ movl %r11d, 12(%rdi)
+ movl %r12d, 16(%rdi)
+ movl %r13d, 20(%rdi)
+ movl %r14d, 24(%rdi)
+ movl %r15d, 28(%rdi)
+ jnz L_sha256_len_avx2_rorx_start
+L_sha256_len_avx2_rorx_done:
+ xorq %rax, %rax
+ vzeroupper
+ addq $0x200, %rsp
+ popq %rbp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size Transform_Sha256_AVX2_RORX_Len,.-Transform_Sha256_AVX2_RORX_Len
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha3.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha3.c
new file mode 100644
index 000000000..3a0c8ddbb
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha3.c
@@ -0,0 +1,1216 @@
+/* sha3.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_XILINX_CRYPT) && \
+ !defined(WOLFSSL_AFALG_XILINX_SHA3)
+
+#if defined(HAVE_FIPS) && \
+ defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
+
+ /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
+ #define FIPS_NO_WRAPPERS
+
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$l")
+ #pragma const_seg(".fipsB$l")
+ #endif
+#endif
+
+#include <wolfssl/wolfcrypt/sha3.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/hash.h>
+
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+
+#ifdef WOLFSSL_SHA3_SMALL
+/* Rotate a 64-bit value left.
+ *
+ * a Number to rotate left.
+ * r Number od bits to rotate left.
+ * returns the rotated number.
+ */
+#define ROTL64(a, n) (((a)<<(n))|((a)>>(64-(n))))
+
+/* An array of values to XOR for block operation. */
+static const word64 hash_keccak_r[24] =
+{
+ 0x0000000000000001UL, 0x0000000000008082UL,
+ 0x800000000000808aUL, 0x8000000080008000UL,
+ 0x000000000000808bUL, 0x0000000080000001UL,
+ 0x8000000080008081UL, 0x8000000000008009UL,
+ 0x000000000000008aUL, 0x0000000000000088UL,
+ 0x0000000080008009UL, 0x000000008000000aUL,
+ 0x000000008000808bUL, 0x800000000000008bUL,
+ 0x8000000000008089UL, 0x8000000000008003UL,
+ 0x8000000000008002UL, 0x8000000000000080UL,
+ 0x000000000000800aUL, 0x800000008000000aUL,
+ 0x8000000080008081UL, 0x8000000000008080UL,
+ 0x0000000080000001UL, 0x8000000080008008UL
+};
+
+/* Indices used in swap and rotate operation. */
+#define K_I_0 10
+#define K_I_1 7
+#define K_I_2 11
+#define K_I_3 17
+#define K_I_4 18
+#define K_I_5 3
+#define K_I_6 5
+#define K_I_7 16
+#define K_I_8 8
+#define K_I_9 21
+#define K_I_10 24
+#define K_I_11 4
+#define K_I_12 15
+#define K_I_13 23
+#define K_I_14 19
+#define K_I_15 13
+#define K_I_16 12
+#define K_I_17 2
+#define K_I_18 20
+#define K_I_19 14
+#define K_I_20 22
+#define K_I_21 9
+#define K_I_22 6
+#define K_I_23 1
+
+/* Number of bits to rotate in swap and rotate operation. */
+#define K_R_0 1
+#define K_R_1 3
+#define K_R_2 6
+#define K_R_3 10
+#define K_R_4 15
+#define K_R_5 21
+#define K_R_6 28
+#define K_R_7 36
+#define K_R_8 45
+#define K_R_9 55
+#define K_R_10 2
+#define K_R_11 14
+#define K_R_12 27
+#define K_R_13 41
+#define K_R_14 56
+#define K_R_15 8
+#define K_R_16 25
+#define K_R_17 43
+#define K_R_18 62
+#define K_R_19 18
+#define K_R_20 39
+#define K_R_21 61
+#define K_R_22 20
+#define K_R_23 44
+
+/* Swap and rotate left operation.
+ *
+ * s The state.
+ * t1 Temporary value.
+ * t2 Second temporary value.
+ * i The index of the loop.
+ */
+#define SWAP_ROTL(s, t1, t2, i) \
+do \
+{ \
+ t2 = s[K_I_##i]; s[K_I_##i] = ROTL64(t1, K_R_##i); \
+} \
+while (0)
+
+/* Mix the XOR of the column's values into each number by column.
+ *
+ * s The state.
+ * b Temporary array of XORed column values.
+ * x The index of the column.
+ * t Temporary variable.
+ */
+#define COL_MIX(s, b, x, t) \
+do \
+{ \
+ for (x = 0; x < 5; x++) \
+ b[x] = s[x + 0] ^ s[x + 5] ^ s[x + 10] ^ s[x + 15] ^ s[x + 20]; \
+ for (x = 0; x < 5; x++) \
+ { \
+ t = b[(x + 4) % 5] ^ ROTL64(b[(x + 1) % 5], 1); \
+ s[x + 0] ^= t; \
+ s[x + 5] ^= t; \
+ s[x + 10] ^= t; \
+ s[x + 15] ^= t; \
+ s[x + 20] ^= t; \
+ } \
+} \
+while (0)
+
+#ifdef SHA3_BY_SPEC
+/* Mix the row values.
+ * BMI1 has ANDN instruction ((~a) & b) - Haswell and above.
+ *
+ * s The state.
+ * b Temporary array of XORed row values.
+ * y The index of the row to work on.
+ * x The index of the column.
+ * t0 Temporary variable.
+ * t1 Temporary variable.
+ */
+#define ROW_MIX(s, b, y, x, t0, t1) \
+do \
+{ \
+ for (y = 0; y < 5; y++) \
+ { \
+ for (x = 0; x < 5; x++) \
+ b[x] = s[y * 5 + x]; \
+ for (x = 0; x < 5; x++) \
+ s[y * 5 + x] = b[x] ^ (~b[(x + 1) % 5] & b[(x + 2) % 5]); \
+ } \
+} \
+while (0)
+#else
+/* Mix the row values.
+ * a ^ (~b & c) == a ^ (c & (b ^ c)) == (a ^ b) ^ (b | c)
+ *
+ * s The state.
+ * b Temporary array of XORed row values.
+ * y The index of the row to work on.
+ * x The index of the column.
+ * t0 Temporary variable.
+ * t1 Temporary variable.
+ */
+#define ROW_MIX(s, b, y, x, t12, t34) \
+do \
+{ \
+ for (y = 0; y < 5; y++) \
+ { \
+ for (x = 0; x < 5; x++) \
+ b[x] = s[y * 5 + x]; \
+ t12 = (b[1] ^ b[2]); t34 = (b[3] ^ b[4]); \
+ s[y * 5 + 0] = b[0] ^ (b[2] & t12); \
+ s[y * 5 + 1] = t12 ^ (b[2] | b[3]); \
+ s[y * 5 + 2] = b[2] ^ (b[4] & t34); \
+ s[y * 5 + 3] = t34 ^ (b[4] | b[0]); \
+ s[y * 5 + 4] = b[4] ^ (b[1] & (b[0] ^ b[1])); \
+ } \
+} \
+while (0)
+#endif /* SHA3_BY_SPEC */
+
+/* The block operation performed on the state.
+ *
+ * s The state.
+ */
+static void BlockSha3(word64 *s)
+{
+ byte i, x, y;
+ word64 t0, t1;
+ word64 b[5];
+
+ for (i = 0; i < 24; i++)
+ {
+ COL_MIX(s, b, x, t0);
+
+ t0 = s[1];
+ SWAP_ROTL(s, t0, t1, 0);
+ SWAP_ROTL(s, t1, t0, 1);
+ SWAP_ROTL(s, t0, t1, 2);
+ SWAP_ROTL(s, t1, t0, 3);
+ SWAP_ROTL(s, t0, t1, 4);
+ SWAP_ROTL(s, t1, t0, 5);
+ SWAP_ROTL(s, t0, t1, 6);
+ SWAP_ROTL(s, t1, t0, 7);
+ SWAP_ROTL(s, t0, t1, 8);
+ SWAP_ROTL(s, t1, t0, 9);
+ SWAP_ROTL(s, t0, t1, 10);
+ SWAP_ROTL(s, t1, t0, 11);
+ SWAP_ROTL(s, t0, t1, 12);
+ SWAP_ROTL(s, t1, t0, 13);
+ SWAP_ROTL(s, t0, t1, 14);
+ SWAP_ROTL(s, t1, t0, 15);
+ SWAP_ROTL(s, t0, t1, 16);
+ SWAP_ROTL(s, t1, t0, 17);
+ SWAP_ROTL(s, t0, t1, 18);
+ SWAP_ROTL(s, t1, t0, 19);
+ SWAP_ROTL(s, t0, t1, 20);
+ SWAP_ROTL(s, t1, t0, 21);
+ SWAP_ROTL(s, t0, t1, 22);
+ SWAP_ROTL(s, t1, t0, 23);
+
+ ROW_MIX(s, b, y, x, t0, t1);
+
+ s[0] ^= hash_keccak_r[i];
+ }
+}
+#else
+/* Rotate a 64-bit value left.
+ *
+ * a Number to rotate left.
+ * r Number od bits to rotate left.
+ * returns the rotated number.
+ */
+#define ROTL64(a, n) (((a)<<(n))|((a)>>(64-(n))))
+
+/* An array of values to XOR for block operation. */
+static const word64 hash_keccak_r[24] =
+{
+ 0x0000000000000001UL, 0x0000000000008082UL,
+ 0x800000000000808aUL, 0x8000000080008000UL,
+ 0x000000000000808bUL, 0x0000000080000001UL,
+ 0x8000000080008081UL, 0x8000000000008009UL,
+ 0x000000000000008aUL, 0x0000000000000088UL,
+ 0x0000000080008009UL, 0x000000008000000aUL,
+ 0x000000008000808bUL, 0x800000000000008bUL,
+ 0x8000000000008089UL, 0x8000000000008003UL,
+ 0x8000000000008002UL, 0x8000000000000080UL,
+ 0x000000000000800aUL, 0x800000008000000aUL,
+ 0x8000000080008081UL, 0x8000000000008080UL,
+ 0x0000000080000001UL, 0x8000000080008008UL
+};
+
+/* Indices used in swap and rotate operation. */
+#define KI_0 6
+#define KI_1 12
+#define KI_2 18
+#define KI_3 24
+#define KI_4 3
+#define KI_5 9
+#define KI_6 10
+#define KI_7 16
+#define KI_8 22
+#define KI_9 1
+#define KI_10 7
+#define KI_11 13
+#define KI_12 19
+#define KI_13 20
+#define KI_14 4
+#define KI_15 5
+#define KI_16 11
+#define KI_17 17
+#define KI_18 23
+#define KI_19 2
+#define KI_20 8
+#define KI_21 14
+#define KI_22 15
+#define KI_23 21
+
+/* Number of bits to rotate in swap and rotate operation. */
+#define KR_0 44
+#define KR_1 43
+#define KR_2 21
+#define KR_3 14
+#define KR_4 28
+#define KR_5 20
+#define KR_6 3
+#define KR_7 45
+#define KR_8 61
+#define KR_9 1
+#define KR_10 6
+#define KR_11 25
+#define KR_12 8
+#define KR_13 18
+#define KR_14 27
+#define KR_15 36
+#define KR_16 10
+#define KR_17 15
+#define KR_18 56
+#define KR_19 62
+#define KR_20 55
+#define KR_21 39
+#define KR_22 41
+#define KR_23 2
+
+/* Mix the XOR of the column's values into each number by column.
+ *
+ * s The state.
+ * b Temporary array of XORed column values.
+ * x The index of the column.
+ * t Temporary variable.
+ */
+#define COL_MIX(s, b, x, t) \
+do \
+{ \
+ b[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20]; \
+ b[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21]; \
+ b[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22]; \
+ b[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23]; \
+ b[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24]; \
+ t = b[(0 + 4) % 5] ^ ROTL64(b[(0 + 1) % 5], 1); \
+ s[ 0] ^= t; s[ 5] ^= t; s[10] ^= t; s[15] ^= t; s[20] ^= t; \
+ t = b[(1 + 4) % 5] ^ ROTL64(b[(1 + 1) % 5], 1); \
+ s[ 1] ^= t; s[ 6] ^= t; s[11] ^= t; s[16] ^= t; s[21] ^= t; \
+ t = b[(2 + 4) % 5] ^ ROTL64(b[(2 + 1) % 5], 1); \
+ s[ 2] ^= t; s[ 7] ^= t; s[12] ^= t; s[17] ^= t; s[22] ^= t; \
+ t = b[(3 + 4) % 5] ^ ROTL64(b[(3 + 1) % 5], 1); \
+ s[ 3] ^= t; s[ 8] ^= t; s[13] ^= t; s[18] ^= t; s[23] ^= t; \
+ t = b[(4 + 4) % 5] ^ ROTL64(b[(4 + 1) % 5], 1); \
+ s[ 4] ^= t; s[ 9] ^= t; s[14] ^= t; s[19] ^= t; s[24] ^= t; \
+} \
+while (0)
+
+#define S(s1, i) ROTL64(s1[KI_##i], KR_##i)
+
+#ifdef SHA3_BY_SPEC
+/* Mix the row values.
+ * BMI1 has ANDN instruction ((~a) & b) - Haswell and above.
+ *
+ * s2 The new state.
+ * s1 The current state.
+ * b Temporary array of XORed row values.
+ * t0 Temporary variable. (Unused)
+ * t1 Temporary variable. (Unused)
+ */
+#define ROW_MIX(s2, s1, b, t0, t1) \
+do \
+{ \
+ b[0] = s1[0]; \
+ b[1] = S(s1, 0); \
+ b[2] = S(s1, 1); \
+ b[3] = S(s1, 2); \
+ b[4] = S(s1, 3); \
+ s2[0] = b[0] ^ (~b[1] & b[2]); \
+ s2[1] = b[1] ^ (~b[2] & b[3]); \
+ s2[2] = b[2] ^ (~b[3] & b[4]); \
+ s2[3] = b[3] ^ (~b[4] & b[0]); \
+ s2[4] = b[4] ^ (~b[0] & b[1]); \
+ b[0] = S(s1, 4); \
+ b[1] = S(s1, 5); \
+ b[2] = S(s1, 6); \
+ b[3] = S(s1, 7); \
+ b[4] = S(s1, 8); \
+ s2[5] = b[0] ^ (~b[1] & b[2]); \
+ s2[6] = b[1] ^ (~b[2] & b[3]); \
+ s2[7] = b[2] ^ (~b[3] & b[4]); \
+ s2[8] = b[3] ^ (~b[4] & b[0]); \
+ s2[9] = b[4] ^ (~b[0] & b[1]); \
+ b[0] = S(s1, 9); \
+ b[1] = S(s1, 10); \
+ b[2] = S(s1, 11); \
+ b[3] = S(s1, 12); \
+ b[4] = S(s1, 13); \
+ s2[10] = b[0] ^ (~b[1] & b[2]); \
+ s2[11] = b[1] ^ (~b[2] & b[3]); \
+ s2[12] = b[2] ^ (~b[3] & b[4]); \
+ s2[13] = b[3] ^ (~b[4] & b[0]); \
+ s2[14] = b[4] ^ (~b[0] & b[1]); \
+ b[0] = S(s1, 14); \
+ b[1] = S(s1, 15); \
+ b[2] = S(s1, 16); \
+ b[3] = S(s1, 17); \
+ b[4] = S(s1, 18); \
+ s2[15] = b[0] ^ (~b[1] & b[2]); \
+ s2[16] = b[1] ^ (~b[2] & b[3]); \
+ s2[17] = b[2] ^ (~b[3] & b[4]); \
+ s2[18] = b[3] ^ (~b[4] & b[0]); \
+ s2[19] = b[4] ^ (~b[0] & b[1]); \
+ b[0] = S(s1, 19); \
+ b[1] = S(s1, 20); \
+ b[2] = S(s1, 21); \
+ b[3] = S(s1, 22); \
+ b[4] = S(s1, 23); \
+ s2[20] = b[0] ^ (~b[1] & b[2]); \
+ s2[21] = b[1] ^ (~b[2] & b[3]); \
+ s2[22] = b[2] ^ (~b[3] & b[4]); \
+ s2[23] = b[3] ^ (~b[4] & b[0]); \
+ s2[24] = b[4] ^ (~b[0] & b[1]); \
+} \
+while (0)
+#else
+/* Mix the row values.
+ * a ^ (~b & c) == a ^ (c & (b ^ c)) == (a ^ b) ^ (b | c)
+ *
+ * s2 The new state.
+ * s1 The current state.
+ * b Temporary array of XORed row values.
+ * t12 Temporary variable.
+ * t34 Temporary variable.
+ */
+#define ROW_MIX(s2, s1, b, t12, t34) \
+do \
+{ \
+ b[0] = s1[0]; \
+ b[1] = S(s1, 0); \
+ b[2] = S(s1, 1); \
+ b[3] = S(s1, 2); \
+ b[4] = S(s1, 3); \
+ t12 = (b[1] ^ b[2]); t34 = (b[3] ^ b[4]); \
+ s2[0] = b[0] ^ (b[2] & t12); \
+ s2[1] = t12 ^ (b[2] | b[3]); \
+ s2[2] = b[2] ^ (b[4] & t34); \
+ s2[3] = t34 ^ (b[4] | b[0]); \
+ s2[4] = b[4] ^ (b[1] & (b[0] ^ b[1])); \
+ b[0] = S(s1, 4); \
+ b[1] = S(s1, 5); \
+ b[2] = S(s1, 6); \
+ b[3] = S(s1, 7); \
+ b[4] = S(s1, 8); \
+ t12 = (b[1] ^ b[2]); t34 = (b[3] ^ b[4]); \
+ s2[5] = b[0] ^ (b[2] & t12); \
+ s2[6] = t12 ^ (b[2] | b[3]); \
+ s2[7] = b[2] ^ (b[4] & t34); \
+ s2[8] = t34 ^ (b[4] | b[0]); \
+ s2[9] = b[4] ^ (b[1] & (b[0] ^ b[1])); \
+ b[0] = S(s1, 9); \
+ b[1] = S(s1, 10); \
+ b[2] = S(s1, 11); \
+ b[3] = S(s1, 12); \
+ b[4] = S(s1, 13); \
+ t12 = (b[1] ^ b[2]); t34 = (b[3] ^ b[4]); \
+ s2[10] = b[0] ^ (b[2] & t12); \
+ s2[11] = t12 ^ (b[2] | b[3]); \
+ s2[12] = b[2] ^ (b[4] & t34); \
+ s2[13] = t34 ^ (b[4] | b[0]); \
+ s2[14] = b[4] ^ (b[1] & (b[0] ^ b[1])); \
+ b[0] = S(s1, 14); \
+ b[1] = S(s1, 15); \
+ b[2] = S(s1, 16); \
+ b[3] = S(s1, 17); \
+ b[4] = S(s1, 18); \
+ t12 = (b[1] ^ b[2]); t34 = (b[3] ^ b[4]); \
+ s2[15] = b[0] ^ (b[2] & t12); \
+ s2[16] = t12 ^ (b[2] | b[3]); \
+ s2[17] = b[2] ^ (b[4] & t34); \
+ s2[18] = t34 ^ (b[4] | b[0]); \
+ s2[19] = b[4] ^ (b[1] & (b[0] ^ b[1])); \
+ b[0] = S(s1, 19); \
+ b[1] = S(s1, 20); \
+ b[2] = S(s1, 21); \
+ b[3] = S(s1, 22); \
+ b[4] = S(s1, 23); \
+ t12 = (b[1] ^ b[2]); t34 = (b[3] ^ b[4]); \
+ s2[20] = b[0] ^ (b[2] & t12); \
+ s2[21] = t12 ^ (b[2] | b[3]); \
+ s2[22] = b[2] ^ (b[4] & t34); \
+ s2[23] = t34 ^ (b[4] | b[0]); \
+ s2[24] = b[4] ^ (b[1] & (b[0] ^ b[1])); \
+} \
+while (0)
+#endif /* SHA3_BY_SPEC */
+
+/* The block operation performed on the state.
+ *
+ * s The state.
+ */
+static void BlockSha3(word64 *s)
+{
+ word64 n[25];
+ word64 b[5];
+ word64 t0;
+#ifndef SHA3_BY_SPEC
+ word64 t1;
+#endif
+ byte i;
+
+ for (i = 0; i < 24; i += 2)
+ {
+ COL_MIX(s, b, x, t0);
+ ROW_MIX(n, s, b, t0, t1);
+ n[0] ^= hash_keccak_r[i];
+
+ COL_MIX(n, b, x, t0);
+ ROW_MIX(s, n, b, t0, t1);
+ s[0] ^= hash_keccak_r[i+1];
+ }
+}
+#endif /* WOLFSSL_SHA3_SMALL */
+
+/* Convert the array of bytes, in little-endian order, to a 64-bit integer.
+ *
+ * a Array of bytes.
+ * returns a 64-bit integer.
+ */
+static word64 Load64BitBigEndian(const byte* a)
+{
+#ifdef BIG_ENDIAN_ORDER
+ word64 n = 0;
+ int i;
+
+ for (i = 0; i < 8; i++)
+ n |= (word64)a[i] << (8 * i);
+
+ return n;
+#else
+ return *(word64*)a;
+#endif
+}
+
+/* Initialize the state for a SHA3-224 hash operation.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * returns 0 on success.
+ */
+static int InitSha3(wc_Sha3* sha3)
+{
+ int i;
+
+ for (i = 0; i < 25; i++)
+ sha3->s[i] = 0;
+ sha3->i = 0;
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+ sha3->flags = 0;
+#endif
+
+ return 0;
+}
+
+/* Update the SHA-3 hash state with message data.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * data Message data to be hashed.
+ * len Length of the message data.
+ * p Number of 64-bit numbers in a block of data to process.
+ * returns 0 on success.
+ */
+static int Sha3Update(wc_Sha3* sha3, const byte* data, word32 len, byte p)
+{
+ byte i;
+ byte l;
+ byte *t;
+
+ if (sha3->i > 0)
+ {
+ l = p * 8 - sha3->i;
+ if (l > len) {
+ l = (byte)len;
+ }
+
+ t = &sha3->t[sha3->i];
+ for (i = 0; i < l; i++)
+ t[i] = data[i];
+ data += i;
+ len -= i;
+ sha3->i += i;
+
+ if (sha3->i == p * 8)
+ {
+ for (i = 0; i < p; i++)
+ sha3->s[i] ^= Load64BitBigEndian(sha3->t + 8 * i);
+ BlockSha3(sha3->s);
+ sha3->i = 0;
+ }
+ }
+ while (len >= ((word32)(p * 8)))
+ {
+ for (i = 0; i < p; i++)
+ sha3->s[i] ^= Load64BitBigEndian(data + 8 * i);
+ BlockSha3(sha3->s);
+ len -= p * 8;
+ data += p * 8;
+ }
+ for (i = 0; i < len; i++)
+ sha3->t[i] = data[i];
+ sha3->i += i;
+
+ return 0;
+}
+
+/* Calculate the SHA-3 hash based on all the message data seen.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * hash Buffer to hold the hash result.
+ * p Number of 64-bit numbers in a block of data to process.
+ * len Number of bytes in output.
+ * returns 0 on success.
+ */
+static int Sha3Final(wc_Sha3* sha3, byte padChar, byte* hash, byte p, byte l)
+{
+ byte i;
+ byte *s8 = (byte *)sha3->s;
+
+ sha3->t[p * 8 - 1] = 0x00;
+#ifdef WOLFSSL_HASH_FLAGS
+ if (p == WC_SHA3_256_COUNT && sha3->flags & WC_HASH_SHA3_KECCAK256) {
+ padChar = 0x01;
+ }
+#endif
+ sha3->t[ sha3->i] = padChar;
+ sha3->t[p * 8 - 1] |= 0x80;
+ for (i=sha3->i + 1; i < p * 8 - 1; i++)
+ sha3->t[i] = 0;
+ for (i = 0; i < p; i++)
+ sha3->s[i] ^= Load64BitBigEndian(sha3->t + 8 * i);
+ BlockSha3(sha3->s);
+#if defined(BIG_ENDIAN_ORDER)
+ ByteReverseWords64(sha3->s, sha3->s, ((l+7)/8)*8);
+#endif
+ for (i = 0; i < l; i++)
+ hash[i] = s8[i];
+
+ return 0;
+}
+
+/* Initialize the state for a SHA-3 hash operation.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * heap Heap reference for dynamic memory allocation. (Used in async ops.)
+ * devId Device identifier for asynchronous operation.
+ * returns 0 on success.
+ */
+static int wc_InitSha3(wc_Sha3* sha3, void* heap, int devId)
+{
+ int ret = 0;
+
+ if (sha3 == NULL)
+ return BAD_FUNC_ARG;
+
+ sha3->heap = heap;
+ ret = InitSha3(sha3);
+ if (ret != 0)
+ return ret;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA3)
+ ret = wolfAsync_DevCtxInit(&sha3->asyncDev,
+ WOLFSSL_ASYNC_MARKER_SHA3, sha3->heap, devId);
+#else
+ (void)devId;
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+ return ret;
+}
+
+/* Update the SHA-3 hash state with message data.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * data Message data to be hashed.
+ * len Length of the message data.
+ * p Number of 64-bit numbers in a block of data to process.
+ * returns 0 on success.
+ */
+static int wc_Sha3Update(wc_Sha3* sha3, const byte* data, word32 len, byte p)
+{
+ int ret;
+
+ if (sha3 == NULL || (data == NULL && len > 0)) {
+ return BAD_FUNC_ARG;
+ }
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA3)
+ if (sha3->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA3) {
+ #if defined(HAVE_INTEL_QA) && defined(QAT_V2)
+ /* QAT only supports SHA3_256 */
+ if (p == WC_SHA3_256_COUNT) {
+ ret = IntelQaSymSha3(&sha3->asyncDev, NULL, data, len);
+ if (ret != NOT_COMPILED_IN)
+ return ret;
+ /* fall-through when unavailable */
+ }
+ #endif
+ }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+ ret = Sha3Update(sha3, data, len, p);
+
+ return ret;
+}
+
+/* Calculate the SHA-3 hash based on all the message data seen.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * hash Buffer to hold the hash result.
+ * p Number of 64-bit numbers in a block of data to process.
+ * len Number of bytes in output.
+ * returns 0 on success.
+ */
+static int wc_Sha3Final(wc_Sha3* sha3, byte* hash, byte p, byte len)
+{
+ int ret;
+
+ if (sha3 == NULL || hash == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA3)
+ if (sha3->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA3) {
+ #if defined(HAVE_INTEL_QA) && defined(QAT_V2)
+ /* QAT only supports SHA3_256 */
+ /* QAT SHA-3 only supported on v2 (8970 or later cards) */
+ if (len == WC_SHA3_256_DIGEST_SIZE) {
+ ret = IntelQaSymSha3(&sha3->asyncDev, hash, NULL, len);
+ if (ret != NOT_COMPILED_IN)
+ return ret;
+ /* fall-through when unavailable */
+ }
+ #endif
+ }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+ ret = Sha3Final(sha3, 0x06, hash, p, len);
+ if (ret != 0)
+ return ret;
+
+ return InitSha3(sha3); /* reset state */
+}
+
+/* Dispose of any dynamically allocated data from the SHA3-384 operation.
+ * (Required for async ops.)
+ *
+ * sha3 wc_Sha3 object holding state.
+ * returns 0 on success.
+ */
+static void wc_Sha3Free(wc_Sha3* sha3)
+{
+ (void)sha3;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA3)
+ if (sha3 == NULL)
+ return;
+
+ wolfAsync_DevCtxFree(&sha3->asyncDev, WOLFSSL_ASYNC_MARKER_SHA3);
+#endif /* WOLFSSL_ASYNC_CRYPT */
+}
+
+
+/* Copy the state of the SHA3 operation.
+ *
+ * src wc_Sha3 object holding state top copy.
+ * dst wc_Sha3 object to copy into.
+ * returns 0 on success.
+ */
+static int wc_Sha3Copy(wc_Sha3* src, wc_Sha3* dst)
+{
+ int ret = 0;
+
+ if (src == NULL || dst == NULL)
+ return BAD_FUNC_ARG;
+
+ XMEMCPY(dst, src, sizeof(wc_Sha3));
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+ ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
+#endif
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+ dst->flags |= WC_HASH_FLAG_ISCOPY;
+#endif
+
+ return ret;
+}
+
+/* Calculate the SHA3-224 hash based on all the message data so far.
+ * More message data can be added, after this operation, using the current
+ * state.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * hash Buffer to hold the hash result. Must be at least 28 bytes.
+ * p Number of 64-bit numbers in a block of data to process.
+ * len Number of bytes in output.
+ * returns 0 on success.
+ */
+static int wc_Sha3GetHash(wc_Sha3* sha3, byte* hash, byte p, byte len)
+{
+ int ret;
+ wc_Sha3 tmpSha3;
+
+ if (sha3 == NULL || hash == NULL)
+ return BAD_FUNC_ARG;
+
+ ret = wc_Sha3Copy(sha3, &tmpSha3);
+ if (ret == 0) {
+ ret = wc_Sha3Final(&tmpSha3, hash, p, len);
+ }
+ return ret;
+}
+
+
+/* Initialize the state for a SHA3-224 hash operation.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * heap Heap reference for dynamic memory allocation. (Used in async ops.)
+ * devId Device identifier for asynchronous operation.
+ * returns 0 on success.
+ */
+int wc_InitSha3_224(wc_Sha3* sha3, void* heap, int devId)
+{
+ return wc_InitSha3(sha3, heap, devId);
+}
+
+/* Update the SHA3-224 hash state with message data.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * data Message data to be hashed.
+ * len Length of the message data.
+ * returns 0 on success.
+ */
+int wc_Sha3_224_Update(wc_Sha3* sha3, const byte* data, word32 len)
+{
+ return wc_Sha3Update(sha3, data, len, WC_SHA3_224_COUNT);
+}
+
+/* Calculate the SHA3-224 hash based on all the message data seen.
+ * The state is initialized ready for a new message to hash.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * hash Buffer to hold the hash result. Must be at least 28 bytes.
+ * returns 0 on success.
+ */
+int wc_Sha3_224_Final(wc_Sha3* sha3, byte* hash)
+{
+ return wc_Sha3Final(sha3, hash, WC_SHA3_224_COUNT, WC_SHA3_224_DIGEST_SIZE);
+}
+
+/* Dispose of any dynamically allocated data from the SHA3-224 operation.
+ * (Required for async ops.)
+ *
+ * sha3 wc_Sha3 object holding state.
+ * returns 0 on success.
+ */
+void wc_Sha3_224_Free(wc_Sha3* sha3)
+{
+ wc_Sha3Free(sha3);
+}
+
+/* Calculate the SHA3-224 hash based on all the message data so far.
+ * More message data can be added, after this operation, using the current
+ * state.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * hash Buffer to hold the hash result. Must be at least 28 bytes.
+ * returns 0 on success.
+ */
+int wc_Sha3_224_GetHash(wc_Sha3* sha3, byte* hash)
+{
+ return wc_Sha3GetHash(sha3, hash, WC_SHA3_224_COUNT, WC_SHA3_224_DIGEST_SIZE);
+}
+
+/* Copy the state of the SHA3-224 operation.
+ *
+ * src wc_Sha3 object holding state top copy.
+ * dst wc_Sha3 object to copy into.
+ * returns 0 on success.
+ */
+int wc_Sha3_224_Copy(wc_Sha3* src, wc_Sha3* dst)
+{
+ return wc_Sha3Copy(src, dst);
+}
+
+
+/* Initialize the state for a SHA3-256 hash operation.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * heap Heap reference for dynamic memory allocation. (Used in async ops.)
+ * devId Device identifier for asynchronous operation.
+ * returns 0 on success.
+ */
+int wc_InitSha3_256(wc_Sha3* sha3, void* heap, int devId)
+{
+ return wc_InitSha3(sha3, heap, devId);
+}
+
+/* Update the SHA3-256 hash state with message data.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * data Message data to be hashed.
+ * len Length of the message data.
+ * returns 0 on success.
+ */
+int wc_Sha3_256_Update(wc_Sha3* sha3, const byte* data, word32 len)
+{
+ return wc_Sha3Update(sha3, data, len, WC_SHA3_256_COUNT);
+}
+
+/* Calculate the SHA3-256 hash based on all the message data seen.
+ * The state is initialized ready for a new message to hash.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * hash Buffer to hold the hash result. Must be at least 32 bytes.
+ * returns 0 on success.
+ */
+int wc_Sha3_256_Final(wc_Sha3* sha3, byte* hash)
+{
+ return wc_Sha3Final(sha3, hash, WC_SHA3_256_COUNT, WC_SHA3_256_DIGEST_SIZE);
+}
+
+/* Dispose of any dynamically allocated data from the SHA3-256 operation.
+ * (Required for async ops.)
+ *
+ * sha3 wc_Sha3 object holding state.
+ * returns 0 on success.
+ */
+void wc_Sha3_256_Free(wc_Sha3* sha3)
+{
+ wc_Sha3Free(sha3);
+}
+
+/* Calculate the SHA3-256 hash based on all the message data so far.
+ * More message data can be added, after this operation, using the current
+ * state.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * hash Buffer to hold the hash result. Must be at least 32 bytes.
+ * returns 0 on success.
+ */
+int wc_Sha3_256_GetHash(wc_Sha3* sha3, byte* hash)
+{
+ return wc_Sha3GetHash(sha3, hash, WC_SHA3_256_COUNT, WC_SHA3_256_DIGEST_SIZE);
+}
+
+/* Copy the state of the SHA3-256 operation.
+ *
+ * src wc_Sha3 object holding state top copy.
+ * dst wc_Sha3 object to copy into.
+ * returns 0 on success.
+ */
+int wc_Sha3_256_Copy(wc_Sha3* src, wc_Sha3* dst)
+{
+ return wc_Sha3Copy(src, dst);
+}
+
+
+/* Initialize the state for a SHA3-384 hash operation.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * heap Heap reference for dynamic memory allocation. (Used in async ops.)
+ * devId Device identifier for asynchronous operation.
+ * returns 0 on success.
+ */
+int wc_InitSha3_384(wc_Sha3* sha3, void* heap, int devId)
+{
+ return wc_InitSha3(sha3, heap, devId);
+}
+
+/* Update the SHA3-384 hash state with message data.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * data Message data to be hashed.
+ * len Length of the message data.
+ * returns 0 on success.
+ */
+int wc_Sha3_384_Update(wc_Sha3* sha3, const byte* data, word32 len)
+{
+ return wc_Sha3Update(sha3, data, len, WC_SHA3_384_COUNT);
+}
+
+/* Calculate the SHA3-384 hash based on all the message data seen.
+ * The state is initialized ready for a new message to hash.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * hash Buffer to hold the hash result. Must be at least 48 bytes.
+ * returns 0 on success.
+ */
+int wc_Sha3_384_Final(wc_Sha3* sha3, byte* hash)
+{
+ return wc_Sha3Final(sha3, hash, WC_SHA3_384_COUNT, WC_SHA3_384_DIGEST_SIZE);
+}
+
+/* Dispose of any dynamically allocated data from the SHA3-384 operation.
+ * (Required for async ops.)
+ *
+ * sha3 wc_Sha3 object holding state.
+ * returns 0 on success.
+ */
+void wc_Sha3_384_Free(wc_Sha3* sha3)
+{
+ wc_Sha3Free(sha3);
+}
+
+/* Calculate the SHA3-384 hash based on all the message data so far.
+ * More message data can be added, after this operation, using the current
+ * state.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * hash Buffer to hold the hash result. Must be at least 48 bytes.
+ * returns 0 on success.
+ */
+int wc_Sha3_384_GetHash(wc_Sha3* sha3, byte* hash)
+{
+ return wc_Sha3GetHash(sha3, hash, WC_SHA3_384_COUNT, WC_SHA3_384_DIGEST_SIZE);
+}
+
+/* Copy the state of the SHA3-384 operation.
+ *
+ * src wc_Sha3 object holding state top copy.
+ * dst wc_Sha3 object to copy into.
+ * returns 0 on success.
+ */
+int wc_Sha3_384_Copy(wc_Sha3* src, wc_Sha3* dst)
+{
+ return wc_Sha3Copy(src, dst);
+}
+
+
+/* Initialize the state for a SHA3-512 hash operation.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * heap Heap reference for dynamic memory allocation. (Used in async ops.)
+ * devId Device identifier for asynchronous operation.
+ * returns 0 on success.
+ */
+int wc_InitSha3_512(wc_Sha3* sha3, void* heap, int devId)
+{
+ return wc_InitSha3(sha3, heap, devId);
+}
+
+/* Update the SHA3-512 hash state with message data.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * data Message data to be hashed.
+ * len Length of the message data.
+ * returns 0 on success.
+ */
+int wc_Sha3_512_Update(wc_Sha3* sha3, const byte* data, word32 len)
+{
+ return wc_Sha3Update(sha3, data, len, WC_SHA3_512_COUNT);
+}
+
+/* Calculate the SHA3-512 hash based on all the message data seen.
+ * The state is initialized ready for a new message to hash.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * hash Buffer to hold the hash result. Must be at least 64 bytes.
+ * returns 0 on success.
+ */
+int wc_Sha3_512_Final(wc_Sha3* sha3, byte* hash)
+{
+ return wc_Sha3Final(sha3, hash, WC_SHA3_512_COUNT, WC_SHA3_512_DIGEST_SIZE);
+}
+
+/* Dispose of any dynamically allocated data from the SHA3-512 operation.
+ * (Required for async ops.)
+ *
+ * sha3 wc_Sha3 object holding state.
+ * returns 0 on success.
+ */
+void wc_Sha3_512_Free(wc_Sha3* sha3)
+{
+ wc_Sha3Free(sha3);
+}
+
+/* Calculate the SHA3-512 hash based on all the message data so far.
+ * More message data can be added, after this operation, using the current
+ * state.
+ *
+ * sha3 wc_Sha3 object holding state.
+ * hash Buffer to hold the hash result. Must be at least 64 bytes.
+ * returns 0 on success.
+ */
+int wc_Sha3_512_GetHash(wc_Sha3* sha3, byte* hash)
+{
+ return wc_Sha3GetHash(sha3, hash, WC_SHA3_512_COUNT, WC_SHA3_512_DIGEST_SIZE);
+}
+
+/* Copy the state of the SHA3-512 operation.
+ *
+ * src wc_Sha3 object holding state top copy.
+ * dst wc_Sha3 object to copy into.
+ * returns 0 on success.
+ */
+int wc_Sha3_512_Copy(wc_Sha3* src, wc_Sha3* dst)
+{
+ return wc_Sha3Copy(src, dst);
+}
+
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+int wc_Sha3_SetFlags(wc_Sha3* sha3, word32 flags)
+{
+ if (sha3) {
+ sha3->flags = flags;
+ }
+ return 0;
+}
+int wc_Sha3_GetFlags(wc_Sha3* sha3, word32* flags)
+{
+ if (sha3 && flags) {
+ *flags = sha3->flags;
+ }
+ return 0;
+}
+#endif
+
+#if defined(WOLFSSL_SHAKE256)
+/* Initialize the state for a Shake256 hash operation.
+ *
+ * shake wc_Shake object holding state.
+ * heap Heap reference for dynamic memory allocation. (Used in async ops.)
+ * devId Device identifier for asynchronous operation.
+ * returns 0 on success.
+ */
+int wc_InitShake256(wc_Shake* shake, void* heap, int devId)
+{
+ return wc_InitSha3(shake, heap, devId);
+}
+
+/* Update the SHAKE256 hash state with message data.
+ *
+ * shake wc_Shake object holding state.
+ * data Message data to be hashed.
+ * len Length of the message data.
+ * returns 0 on success.
+ */
+int wc_Shake256_Update(wc_Shake* shake, const byte* data, word32 len)
+{
+ if (shake == NULL || (data == NULL && len > 0)) {
+ return BAD_FUNC_ARG;
+ }
+
+ return Sha3Update(shake, data, len, WC_SHA3_256_COUNT);
+}
+
+/* Calculate the SHAKE256 hash based on all the message data seen.
+ * The state is initialized ready for a new message to hash.
+ *
+ * shake wc_Shake object holding state.
+ * hash Buffer to hold the hash result. Must be at least 64 bytes.
+ * returns 0 on success.
+ */
+int wc_Shake256_Final(wc_Shake* shake, byte* hash, word32 hashLen)
+{
+ int ret;
+
+ if (shake == NULL || hash == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ ret = Sha3Final(shake, 0x1f, hash, WC_SHA3_256_COUNT, hashLen);
+ if (ret != 0)
+ return ret;
+
+ return InitSha3(shake); /* reset state */
+}
+
+/* Dispose of any dynamically allocated data from the SHAKE256 operation.
+ * (Required for async ops.)
+ *
+ * shake wc_Shake object holding state.
+ * returns 0 on success.
+ */
+void wc_Shake256_Free(wc_Shake* shake)
+{
+ wc_Sha3Free(shake);
+}
+
+/* Copy the state of the SHA3-512 operation.
+ *
+ * src wc_Shake object holding state top copy.
+ * dst wc_Shake object to copy into.
+ * returns 0 on success.
+ */
+int wc_Shake256_Copy(wc_Shake* src, wc_Shake* dst)
+{
+ return wc_Sha3Copy(src, dst);
+}
+#endif
+
+#endif /* WOLFSSL_SHA3 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha512.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha512.c
index 8e52da909..0a648bf4a 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha512.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha512.c
@@ -1,8 +1,8 @@
/* sha512.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,441 +16,504 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <wolfssl/wolfcrypt/settings.h>
-#include <wolfssl/wolfcrypt/sha512.h>
-#ifdef WOLFSSL_SHA512
+#if (defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384)) && !defined(WOLFSSL_ARMASM)
-#ifdef HAVE_FIPS
-int wc_InitSha512(Sha512* sha)
-{
- return InitSha512_fips(sha);
-}
+#if defined(HAVE_FIPS) && \
+ defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
+ /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
+ #define FIPS_NO_WRAPPERS
-int wc_Sha512Update(Sha512* sha, const byte* data, word32 len)
-{
- return Sha512Update_fips(sha, data, len);
-}
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$k")
+ #pragma const_seg(".fipsB$k")
+ #endif
+#endif
+#include <wolfssl/wolfcrypt/sha512.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/cpuid.h>
+#include <wolfssl/wolfcrypt/hash.h>
-int wc_Sha512Final(Sha512* sha, byte* out)
-{
- return Sha512Final_fips(sha, out);
-}
+/* deprecated USE_SLOW_SHA2 (replaced with USE_SLOW_SHA512) */
+#if defined(USE_SLOW_SHA2) && !defined(USE_SLOW_SHA512)
+ #define USE_SLOW_SHA512
+#endif
+/* fips wrapper calls, user can call direct */
+#if defined(HAVE_FIPS) && \
+ (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2))
-int wc_Sha512Hash(const byte* data, word32 len, byte* out)
-{
- return Sha512Hash(data, len, out);
-}
+ #ifdef WOLFSSL_SHA512
-#if defined(WOLFSSL_SHA384) || defined(HAVE_AESGCM)
-
-int wc_InitSha384(Sha384* sha)
-{
- return InitSha384_fips(sha);
-}
+ int wc_InitSha512(wc_Sha512* sha)
+ {
+ if (sha == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ return InitSha512_fips(sha);
+ }
+ int wc_InitSha512_ex(wc_Sha512* sha, void* heap, int devId)
+ {
+ (void)heap;
+ (void)devId;
+ if (sha == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ return InitSha512_fips(sha);
+ }
+ int wc_Sha512Update(wc_Sha512* sha, const byte* data, word32 len)
+ {
+ if (sha == NULL || (data == NULL && len > 0)) {
+ return BAD_FUNC_ARG;
+ }
-int wc_Sha384Update(Sha384* sha, const byte* data, word32 len)
-{
- return Sha384Update_fips(sha, data, len);
-}
+ return Sha512Update_fips(sha, data, len);
+ }
+ int wc_Sha512Final(wc_Sha512* sha, byte* out)
+ {
+ if (sha == NULL || out == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ return Sha512Final_fips(sha, out);
+ }
+ void wc_Sha512Free(wc_Sha512* sha)
+ {
+ (void)sha;
+ /* Not supported in FIPS */
+ }
+ #endif
-int wc_Sha384Final(Sha384* sha, byte* out)
-{
- return Sha384Final_fips(sha, out);
-}
+ #if defined(WOLFSSL_SHA384) || defined(HAVE_AESGCM)
+ int wc_InitSha384(wc_Sha384* sha)
+ {
+ if (sha == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ return InitSha384_fips(sha);
+ }
+ int wc_InitSha384_ex(wc_Sha384* sha, void* heap, int devId)
+ {
+ (void)heap;
+ (void)devId;
+ if (sha == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ return InitSha384_fips(sha);
+ }
+ int wc_Sha384Update(wc_Sha384* sha, const byte* data, word32 len)
+ {
+ if (sha == NULL || (data == NULL && len > 0)) {
+ return BAD_FUNC_ARG;
+ }
+ return Sha384Update_fips(sha, data, len);
+ }
+ int wc_Sha384Final(wc_Sha384* sha, byte* out)
+ {
+ if (sha == NULL || out == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ return Sha384Final_fips(sha, out);
+ }
+ void wc_Sha384Free(wc_Sha384* sha)
+ {
+ (void)sha;
+ /* Not supported in FIPS */
+ }
+ #endif /* WOLFSSL_SHA384 || HAVE_AESGCM */
+#else /* else build without fips, or for FIPS v2 */
-int wc_Sha384Hash(const byte* data, word32 len, byte* out)
-{
- return Sha384Hash(data, len, out);
-}
-#endif /* WOLFSSL_SHA384 */
-#else /* else build without using fips */
#include <wolfssl/wolfcrypt/logging.h>
-#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
#else
+ #define WOLFSSL_MISC_INCLUDED
#include <wolfcrypt/src/misc.c>
#endif
-#ifndef WOLFSSL_HAVE_MIN
-#define WOLFSSL_HAVE_MIN
-
- static INLINE word32 min(word32 a, word32 b)
- {
- return a > b ? b : a;
- }
-
-#endif /* WOLFSSL_HAVE_MIN */
-
#if defined(USE_INTEL_SPEEDUP)
- #define HAVE_INTEL_AVX1
- #define HAVE_INTEL_AVX2
+ #if defined(__GNUC__) && ((__GNUC__ < 4) || \
+ (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
+ #undef NO_AVX2_SUPPORT
+ #define NO_AVX2_SUPPORT
+ #endif
+ #if defined(__clang__) && ((__clang_major__ < 3) || \
+ (__clang_major__ == 3 && __clang_minor__ <= 5))
+ #define NO_AVX2_SUPPORT
+ #elif defined(__clang__) && defined(NO_AVX2_SUPPORT)
+ #undef NO_AVX2_SUPPORT
+ #endif
+
+ #define HAVE_INTEL_AVX1
+ #ifndef NO_AVX2_SUPPORT
+ #define HAVE_INTEL_AVX2
+ #endif
#endif
#if defined(HAVE_INTEL_AVX1)
-/* #define DEBUG_XMM */
+ /* #define DEBUG_XMM */
#endif
#if defined(HAVE_INTEL_AVX2)
-#define HAVE_INTEL_RORX
-/* #define DEBUG_YMM */
-#endif
-
-/*****
-Intel AVX1/AVX2 Macro Control Structure
-
-#if defined(HAVE_INteL_SPEEDUP)
- #define HAVE_INTEL_AVX1
- #define HAVE_INTEL_AVX2
+ #define HAVE_INTEL_RORX
+ /* #define DEBUG_YMM */
#endif
-int InitSha512(Sha512* sha512) {
- Save/Recover XMM, YMM
- ...
-
- Check Intel AVX cpuid flags
-}
-
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
- Transform_AVX1() ; # Function prototype
- Transform_AVX2() ; #
+#if defined(HAVE_BYTEREVERSE64) && \
+ !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2)
+ #define ByteReverseWords64(out, in, size) ByteReverseWords64_1(out, size)
+ #define ByteReverseWords64_1(buf, size) \
+ { unsigned int i ;\
+ for(i=0; i< size/sizeof(word64); i++){\
+ __asm__ volatile("bswapq %0":"+r"(buf[i])::) ;\
+ }\
+ }
#endif
- _Transform() { # Native Transform Function body
-
- }
-
- int Sha512Update() {
- Save/Recover XMM, YMM
- ...
- }
-
- int Sha512Final() {
- Save/Recover XMM, YMM
- ...
- }
-
+#if defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH)
+ /* functions defined in wolfcrypt/src/port/caam/caam_sha.c */
-#if defined(HAVE_INTEL_AVX1)
-
- XMM Instructions/inline asm Definitions
+#else
-#endif
+#ifdef WOLFSSL_SHA512
-#if defined(HAVE_INTEL_AVX2)
+static int InitSha512(wc_Sha512* sha512)
+{
+ if (sha512 == NULL)
+ return BAD_FUNC_ARG;
- YMM Instructions/inline asm Definitions
+ sha512->digest[0] = W64LIT(0x6a09e667f3bcc908);
+ sha512->digest[1] = W64LIT(0xbb67ae8584caa73b);
+ sha512->digest[2] = W64LIT(0x3c6ef372fe94f82b);
+ sha512->digest[3] = W64LIT(0xa54ff53a5f1d36f1);
+ sha512->digest[4] = W64LIT(0x510e527fade682d1);
+ sha512->digest[5] = W64LIT(0x9b05688c2b3e6c1f);
+ sha512->digest[6] = W64LIT(0x1f83d9abfb41bd6b);
+ sha512->digest[7] = W64LIT(0x5be0cd19137e2179);
-#endif
+ sha512->buffLen = 0;
+ sha512->loLen = 0;
+ sha512->hiLen = 0;
-#if defnied(HAVE_INTEL_AVX1)
-
- int Transform_AVX1() {
- Stitched Message Sched/Round
- }
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ sha512->ctx.sha_type = SHA2_512;
+ /* always start firstblock = 1 when using hw engine */
+ sha512->ctx.isfirstblock = 1;
+ if(sha512->ctx.mode == ESP32_SHA_HW) {
+ /* release hw */
+ esp_sha_hw_unlock();
+ }
+ /* always set mode as INIT
+ * whether using HW or SW is determined at first call of update()
+ */
+ sha512->ctx.mode = ESP32_SHA_INIT;
#endif
-
-#if defnied(HAVE_INTEL_AVX2)
-
- int Transform_AVX2() {
- Stitched Message Sched/Round
- }
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+ sha512->flags = 0;
#endif
+ return 0;
+}
+#endif /* WOLFSSL_SHA512 */
-*/
-
+/* Hardware Acceleration */
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+#ifdef WOLFSSL_SHA512
-/* Each platform needs to query info type 1 from cpuid to see if aesni is
- * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
- */
+ /*****
+ Intel AVX1/AVX2 Macro Control Structure
-#ifndef _MSC_VER
- #define cpuid(reg, leaf, sub)\
- __asm__ __volatile__ ("cpuid":\
- "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
- "a" (leaf), "c"(sub));
+ #if defined(HAVE_INteL_SPEEDUP)
+ #define HAVE_INTEL_AVX1
+ #define HAVE_INTEL_AVX2
+ #endif
- #define XASM_LINK(f) asm(f)
-#else
+ int InitSha512(wc_Sha512* sha512) {
+ Save/Recover XMM, YMM
+ ...
- #include <intrin.h>
- #define cpuid(a,b) __cpuid((int*)a,b)
-
- #define XASM_LINK(f)
-
-#endif /* _MSC_VER */
-
-#define EAX 0
-#define EBX 1
-#define ECX 2
-#define EDX 3
-
-#define CPUID_AVX1 0x1
-#define CPUID_AVX2 0x2
-#define CPUID_RDRAND 0x4
-#define CPUID_RDSEED 0x8
-#define CPUID_BMI2 0x10 /* MULX, RORX */
-
-#define IS_INTEL_AVX1 (cpuid_flags&CPUID_AVX1)
-#define IS_INTEL_AVX2 (cpuid_flags&CPUID_AVX2)
-#define IS_INTEL_BMI2 (cpuid_flags&CPUID_BMI2)
-#define IS_INTEL_RDRAND (cpuid_flags&CPUID_RDRAND)
-#define IS_INTEL_RDSEED (cpuid_flags&CPUID_RDSEED)
-
-static word32 cpuid_check = 0 ;
-static word32 cpuid_flags = 0 ;
-
-static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
- int got_intel_cpu=0;
- unsigned int reg[5];
-
- reg[4] = '\0' ;
- cpuid(reg, 0, 0);
- if(memcmp((char *)&(reg[EBX]), "Genu", 4) == 0 &&
- memcmp((char *)&(reg[EDX]), "ineI", 4) == 0 &&
- memcmp((char *)&(reg[ECX]), "ntel", 4) == 0) {
- got_intel_cpu = 1;
- }
- if (got_intel_cpu) {
- cpuid(reg, leaf, sub);
- return((reg[num]>>bit)&0x1) ;
+ Check Intel AVX cpuid flags
}
- return 0 ;
-}
-#define CHECK_SHA512 0x1
-#define CHECK_SHA384 0x2
-
-static int set_cpuid_flags(int sha) {
- if((cpuid_check & sha) ==0) {
- if(cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1 ;}
- if(cpuid_flag(7, 0, EBX, 5)){ cpuid_flags |= CPUID_AVX2 ; }
- if(cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2 ; }
- if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ; }
- if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ; }
- cpuid_check |= sha ;
- return 0 ;
- }
- return 1 ;
-}
+ #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
+ Transform_Sha512_AVX1(); # Function prototype
+ Transform_Sha512_AVX2(); #
+ #endif
+ _Transform_Sha512() { # Native Transform Function body
-/* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha512 */
+ }
-#if defined(HAVE_INTEL_AVX1)
-static int Transform_AVX1(Sha512 *sha512) ;
-#endif
+ int Sha512Update() {
+ Save/Recover XMM, YMM
+ ...
+ }
-#if defined(HAVE_INTEL_AVX2)
-static int Transform_AVX2(Sha512 *sha512) ;
+ int Sha512Final() {
+ Save/Recover XMM, YMM
+ ...
+ }
-#if defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_RORX)
-static int Transform_AVX1_RORX(Sha512 *sha512) ;
-#endif
-#endif
+ #if defined(HAVE_INTEL_AVX1)
-static int _Transform(Sha512 *sha512) ;
-
-static int (*Transform_p)(Sha512* sha512) = _Transform ;
+ XMM Instructions/INLINE asm Definitions
-#define Transform(sha512) (*Transform_p)(sha512)
+ #endif
-static void set_Transform(void) {
- if(set_cpuid_flags(CHECK_SHA512)) return ;
+ #if defined(HAVE_INTEL_AVX2)
-#if defined(HAVE_INTEL_AVX2)
- if(IS_INTEL_AVX2 && IS_INTEL_BMI2){
- Transform_p = Transform_AVX1_RORX; return ;
- Transform_p = Transform_AVX2 ;
- /* for avoiding warning,"not used" */
- }
-#endif
-#if defined(HAVE_INTEL_AVX1)
- Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : _Transform) ; return ;
-#endif
- Transform_p = _Transform ; return ;
-}
+ YMM Instructions/INLINE asm Definitions
-#else
- #define Transform(sha512) _Transform(sha512)
-#endif
+ #endif
-/* Dummy for saving MM_REGs on behalf of Transform */
-/* #if defined(HAVE_INTEL_AVX2)
- #define SAVE_XMM_YMM __asm__ volatile("orq %%r8, %%r8":::\
- "%ymm0","%ymm1","%ymm2","%ymm3","%ymm4","%ymm5","%ymm6","%ymm7","%ymm8","%ymm9","%ymm10","%ymm11",\
- "%ymm12","%ymm13","%ymm14","%ymm15")
-*/
-#if defined(HAVE_INTEL_AVX1)
- #define SAVE_XMM_YMM __asm__ volatile("orq %%r8, %%r8":::\
- "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15")
-#else
-#define SAVE_XMM_YMM
-#endif
+ #if defnied(HAVE_INTEL_AVX1)
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
+ int Transform_Sha512_AVX1() {
+ Stitched Message Sched/Round
+ }
-#include <string.h>
+ #endif
-#endif /* defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2) */
+ #if defnied(HAVE_INTEL_AVX2)
+ int Transform_Sha512_AVX2() {
+ Stitched Message Sched/Round
+ }
+ #endif
-#if defined(HAVE_INTEL_RORX)
-#define ROTR(func, bits, x) \
-word64 func(word64 x) { word64 ret ;\
- __asm__ ("rorx $"#bits", %1, %0\n\t":"=r"(ret):"r"(x):) ;\
- return ret ;\
-}
+ */
-static INLINE ROTR(rotrFixed64_28, 28, x)
-static INLINE ROTR(rotrFixed64_34, 34, x)
-static INLINE ROTR(rotrFixed64_39, 39, x)
-static INLINE ROTR(rotrFixed64_14, 14, x)
-static INLINE ROTR(rotrFixed64_18, 18, x)
-static INLINE ROTR(rotrFixed64_41, 41, x)
-#define S0_RORX(x) (rotrFixed64_28(x)^rotrFixed64_34(x)^rotrFixed64_39(x))
-#define S1_RORX(x) (rotrFixed64_14(x)^rotrFixed64_18(x)^rotrFixed64_41(x))
+ /* Each platform needs to query info type 1 from cpuid to see if aesni is
+ * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
+ */
+
+#ifdef __cplusplus
+ extern "C" {
#endif
-#if defined(HAVE_BYTEREVERSE64) && !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2)
-#define ByteReverseWords64(out, in, size) ByteReverseWords64_1(out, size)
-#define ByteReverseWords64_1(buf, size)\
- { unsigned int i ;\
- for(i=0; i< size/sizeof(word64); i++){\
- __asm__ volatile("bswapq %0":"+r"(buf[i])::) ;\
- }\
-}
+ #if defined(HAVE_INTEL_AVX1)
+ extern int Transform_Sha512_AVX1(wc_Sha512 *sha512);
+ extern int Transform_Sha512_AVX1_Len(wc_Sha512 *sha512, word32 len);
+ #endif
+ #if defined(HAVE_INTEL_AVX2)
+ extern int Transform_Sha512_AVX2(wc_Sha512 *sha512);
+ extern int Transform_Sha512_AVX2_Len(wc_Sha512 *sha512, word32 len);
+ #if defined(HAVE_INTEL_RORX)
+ extern int Transform_Sha512_AVX1_RORX(wc_Sha512 *sha512);
+ extern int Transform_Sha512_AVX1_RORX_Len(wc_Sha512 *sha512,
+ word32 len);
+ extern int Transform_Sha512_AVX2_RORX(wc_Sha512 *sha512);
+ extern int Transform_Sha512_AVX2_RORX_Len(wc_Sha512 *sha512,
+ word32 len);
+ #endif
+ #endif
+
+#ifdef __cplusplus
+ } /* extern "C" */
#endif
+ static int _Transform_Sha512(wc_Sha512 *sha512);
+ static int (*Transform_Sha512_p)(wc_Sha512* sha512) = _Transform_Sha512;
+ static int (*Transform_Sha512_Len_p)(wc_Sha512* sha512, word32 len) = NULL;
+ static int transform_check = 0;
+ static int intel_flags;
+ #define Transform_Sha512(sha512) (*Transform_Sha512_p)(sha512)
+ #define Transform_Sha512_Len(sha512, len) \
+ (*Transform_Sha512_Len_p)(sha512, len)
-int wc_InitSha512(Sha512* sha512)
-{
- sha512->digest[0] = W64LIT(0x6a09e667f3bcc908);
- sha512->digest[1] = W64LIT(0xbb67ae8584caa73b);
- sha512->digest[2] = W64LIT(0x3c6ef372fe94f82b);
- sha512->digest[3] = W64LIT(0xa54ff53a5f1d36f1);
- sha512->digest[4] = W64LIT(0x510e527fade682d1);
- sha512->digest[5] = W64LIT(0x9b05688c2b3e6c1f);
- sha512->digest[6] = W64LIT(0x1f83d9abfb41bd6b);
- sha512->digest[7] = W64LIT(0x5be0cd19137e2179);
+ static void Sha512_SetTransform()
+ {
+ if (transform_check)
+ return;
+
+ intel_flags = cpuid_get_flags();
+
+ #if defined(HAVE_INTEL_AVX2)
+ if (IS_INTEL_AVX2(intel_flags)) {
+ #ifdef HAVE_INTEL_RORX
+ if (IS_INTEL_BMI2(intel_flags)) {
+ Transform_Sha512_p = Transform_Sha512_AVX2_RORX;
+ Transform_Sha512_Len_p = Transform_Sha512_AVX2_RORX_Len;
+ }
+ else
+ #endif
+ if (1) {
+ Transform_Sha512_p = Transform_Sha512_AVX2;
+ Transform_Sha512_Len_p = Transform_Sha512_AVX2_Len;
+ }
+ #ifdef HAVE_INTEL_RORX
+ else {
+ Transform_Sha512_p = Transform_Sha512_AVX1_RORX;
+ Transform_Sha512_Len_p = Transform_Sha512_AVX1_RORX_Len;
+ }
+ #endif
+ }
+ else
+ #endif
+ #if defined(HAVE_INTEL_AVX1)
+ if (IS_INTEL_AVX1(intel_flags)) {
+ Transform_Sha512_p = Transform_Sha512_AVX1;
+ Transform_Sha512_Len_p = Transform_Sha512_AVX1_Len;
+ }
+ else
+ #endif
+ Transform_Sha512_p = _Transform_Sha512;
+
+ transform_check = 1;
+ }
+#endif /* WOLFSSL_SHA512 */
+
+#else
+ #define Transform_Sha512(sha512) _Transform_Sha512(sha512)
- sha512->buffLen = 0;
- sha512->loLen = 0;
- sha512->hiLen = 0;
-
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
- set_Transform() ; /* choose best Transform function under this runtime environment */
#endif
-
- return 0 ;
-}
+#ifdef WOLFSSL_SHA512
-static const word64 K512[80] = {
- W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
- W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
- W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
- W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118),
- W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe),
- W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2),
- W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1),
- W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694),
- W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3),
- W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65),
- W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483),
- W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5),
- W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210),
- W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4),
- W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725),
- W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70),
- W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926),
- W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df),
- W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8),
- W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b),
- W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001),
- W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30),
- W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910),
- W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8),
- W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53),
- W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8),
- W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb),
- W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3),
- W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60),
- W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec),
- W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9),
- W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b),
- W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207),
- W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178),
- W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6),
- W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b),
- W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493),
- W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c),
- W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a),
- W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
-};
+int wc_InitSha512_ex(wc_Sha512* sha512, void* heap, int devId)
+{
+ int ret = 0;
+ if (sha512 == NULL)
+ return BAD_FUNC_ARG;
+ sha512->heap = heap;
-#define blk0(i) (W[i] = sha512->buffer[i])
+ ret = InitSha512(sha512);
+ if (ret != 0)
+ return ret;
-#define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
+#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+ Sha512_SetTransform();
+#endif
-#define Ch(x,y,z) (z^(x&(y^z)))
-#define Maj(x,y,z) ((x&y)|(z&(x|y)))
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ sha512->W = NULL;
+#endif
-#define a(i) T[(0-i)&7]
-#define b(i) T[(1-i)&7]
-#define c(i) T[(2-i)&7]
-#define d(i) T[(3-i)&7]
-#define e(i) T[(4-i)&7]
-#define f(i) T[(5-i)&7]
-#define g(i) T[(6-i)&7]
-#define h(i) T[(7-i)&7]
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512)
+ ret = wolfAsync_DevCtxInit(&sha512->asyncDev,
+ WOLFSSL_ASYNC_MARKER_SHA512, sha512->heap, devId);
+#else
+ (void)devId;
+#endif /* WOLFSSL_ASYNC_CRYPT */
-#define S0(x) (rotrFixed64(x,28)^rotrFixed64(x,34)^rotrFixed64(x,39))
-#define S1(x) (rotrFixed64(x,14)^rotrFixed64(x,18)^rotrFixed64(x,41))
-#define s0(x) (rotrFixed64(x,1)^rotrFixed64(x,8)^(x>>7))
-#define s1(x) (rotrFixed64(x,19)^rotrFixed64(x,61)^(x>>6))
+ return ret;
+}
-#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j]+(j?blk2(i):blk0(i));\
- d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
+#endif /* WOLFSSL_SHA512 */
-#define blk384(i) (W[i] = sha384->buffer[i])
-#define R2(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j]+(j?blk2(i):blk384(i));\
- d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
+static const word64 K512[80] = {
+ W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
+ W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
+ W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
+ W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118),
+ W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe),
+ W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2),
+ W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1),
+ W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694),
+ W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3),
+ W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65),
+ W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483),
+ W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5),
+ W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210),
+ W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4),
+ W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725),
+ W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70),
+ W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926),
+ W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df),
+ W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8),
+ W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b),
+ W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001),
+ W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30),
+ W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910),
+ W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8),
+ W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53),
+ W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8),
+ W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb),
+ W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3),
+ W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60),
+ W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec),
+ W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9),
+ W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b),
+ W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207),
+ W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178),
+ W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6),
+ W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b),
+ W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493),
+ W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c),
+ W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a),
+ W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
+};
-static int _Transform(Sha512* sha512)
+#define blk0(i) (W[i] = sha512->buffer[i])
+
+#define blk2(i) (\
+ W[ i & 15] += \
+ s1(W[(i-2) & 15])+ \
+ W[(i-7) & 15] + \
+ s0(W[(i-15) & 15]) \
+ )
+
+#define Ch(x,y,z) (z ^ (x & (y ^ z)))
+#define Maj(x,y,z) ((x & y) | (z & (x | y)))
+
+#define a(i) T[(0-i) & 7]
+#define b(i) T[(1-i) & 7]
+#define c(i) T[(2-i) & 7]
+#define d(i) T[(3-i) & 7]
+#define e(i) T[(4-i) & 7]
+#define f(i) T[(5-i) & 7]
+#define g(i) T[(6-i) & 7]
+#define h(i) T[(7-i) & 7]
+
+#define S0(x) (rotrFixed64(x,28) ^ rotrFixed64(x,34) ^ rotrFixed64(x,39))
+#define S1(x) (rotrFixed64(x,14) ^ rotrFixed64(x,18) ^ rotrFixed64(x,41))
+#define s0(x) (rotrFixed64(x,1) ^ rotrFixed64(x,8) ^ (x>>7))
+#define s1(x) (rotrFixed64(x,19) ^ rotrFixed64(x,61) ^ (x>>6))
+
+#define R(i) \
+ h(i) += S1(e(i)) + Ch(e(i),f(i),g(i)) + K[i+j] + (j ? blk2(i) : blk0(i)); \
+ d(i) += h(i); \
+ h(i) += S0(a(i)) + Maj(a(i),b(i),c(i))
+
+static int _Transform_Sha512(wc_Sha512* sha512)
{
const word64* K = K512;
-
word32 j;
word64 T[8];
-
-#ifdef WOLFSSL_SMALL_STACK
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ word64* W = sha512->W;
+ if (W == NULL) {
+ W = (word64*) XMALLOC(sizeof(word64) * 16, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (W == NULL)
+ return MEMORY_E;
+ sha512->W = W;
+ }
+#elif defined(WOLFSSL_SMALL_STACK)
word64* W;
W = (word64*) XMALLOC(sizeof(word64) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (W == NULL)
@@ -462,11 +525,11 @@ static int _Transform(Sha512* sha512)
/* Copy digest to working vars */
XMEMCPY(T, sha512->digest, sizeof(T));
-#ifdef USE_SLOW_SHA2
+#ifdef USE_SLOW_SHA512
/* over twice as small, but 50% slower */
/* 80 operations, not unrolled */
for (j = 0; j < 80; j += 16) {
- int m;
+ int m;
for (m = 0; m < 16; m++) { /* braces needed here for macros {} */
R(m);
}
@@ -479,10 +542,9 @@ static int _Transform(Sha512* sha512)
R( 8); R( 9); R(10); R(11);
R(12); R(13); R(14); R(15);
}
-#endif /* USE_SLOW_SHA2 */
+#endif /* USE_SLOW_SHA512 */
/* Add the working vars back into digest */
-
sha512->digest[0] += a(0);
sha512->digest[1] += b(0);
sha512->digest[2] += c(0);
@@ -496,7 +558,7 @@ static int _Transform(Sha512* sha512)
ForceZero(W, sizeof(word64) * 16);
ForceZero(T, sizeof(T));
-#ifdef WOLFSSL_SMALL_STACK
+#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SMALL_STACK_CACHE)
XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
@@ -504,875 +566,350 @@ static int _Transform(Sha512* sha512)
}
-static INLINE void AddLength(Sha512* sha512, word32 len)
+static WC_INLINE void AddLength(wc_Sha512* sha512, word32 len)
{
- word32 tmp = sha512->loLen;
+ word64 tmp = sha512->loLen;
if ( (sha512->loLen += len) < tmp)
sha512->hiLen++; /* carry low to high */
}
-int wc_Sha512Update(Sha512* sha512, const byte* data, word32 len)
+static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 len)
{
+ int ret = 0;
/* do block size increments */
byte* local = (byte*)sha512->buffer;
- SAVE_XMM_YMM ; /* for Intel AVX */
-
- while (len) {
- word32 add = min(len, SHA512_BLOCK_SIZE - sha512->buffLen);
- XMEMCPY(&local[sha512->buffLen], data, add);
-
- sha512->buffLen += add;
- data += add;
- len -= add;
-
- if (sha512->buffLen == SHA512_BLOCK_SIZE) {
- int ret;
- #if defined(LITTLE_ENDIAN_ORDER)
- #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
- if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
- #endif
- ByteReverseWords64(sha512->buffer, sha512->buffer,
- SHA512_BLOCK_SIZE);
- #endif
- ret = Transform(sha512);
- if (ret != 0)
- return ret;
- AddLength(sha512, SHA512_BLOCK_SIZE);
- sha512->buffLen = 0;
- }
- }
- return 0;
-}
+ /* check that internal buffLen is valid */
+ if (sha512->buffLen >= WC_SHA512_BLOCK_SIZE)
+ return BUFFER_E;
+ AddLength(sha512, len);
-int wc_Sha512Final(Sha512* sha512, byte* hash)
-{
- byte* local = (byte*)sha512->buffer;
- int ret;
+ if (sha512->buffLen > 0) {
+ word32 add = min(len, WC_SHA512_BLOCK_SIZE - sha512->buffLen);
+ if (add > 0) {
+ XMEMCPY(&local[sha512->buffLen], data, add);
- SAVE_XMM_YMM ; /* for Intel AVX */
- AddLength(sha512, sha512->buffLen); /* before adding pads */
-
- local[sha512->buffLen++] = 0x80; /* add 1 */
-
- /* pad with zeros */
- if (sha512->buffLen > SHA512_PAD_SIZE) {
- XMEMSET(&local[sha512->buffLen], 0, SHA512_BLOCK_SIZE -sha512->buffLen);
- sha512->buffLen += SHA512_BLOCK_SIZE - sha512->buffLen;
- #if defined(LITTLE_ENDIAN_ORDER)
- #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
- if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
- #endif
- ByteReverseWords64(sha512->buffer,sha512->buffer,SHA512_BLOCK_SIZE);
- #endif
- ret = Transform(sha512);
- if (ret != 0)
- return ret;
-
- sha512->buffLen = 0;
- }
- XMEMSET(&local[sha512->buffLen], 0, SHA512_PAD_SIZE - sha512->buffLen);
-
- /* put lengths in bits */
- sha512->hiLen = (sha512->loLen >> (8*sizeof(sha512->loLen) - 3)) +
- (sha512->hiLen << 3);
- sha512->loLen = sha512->loLen << 3;
+ sha512->buffLen += add;
+ data += add;
+ len -= add;
+ }
- /* store lengths */
+ if (sha512->buffLen == WC_SHA512_BLOCK_SIZE) {
#if defined(LITTLE_ENDIAN_ORDER)
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
- if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
+ if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
#endif
- ByteReverseWords64(sha512->buffer, sha512->buffer, SHA512_PAD_SIZE);
- #endif
- /* ! length ordering dependent on digest endian type ! */
-
- sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 2] = sha512->hiLen;
- sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 1] = sha512->loLen;
- #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
- if(IS_INTEL_AVX1 || IS_INTEL_AVX2)
- ByteReverseWords64(&(sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 2]),
- &(sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 2]),
- SHA512_BLOCK_SIZE - SHA512_PAD_SIZE);
+ {
+ #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \
+ defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ ByteReverseWords64(sha512->buffer, sha512->buffer,
+ WC_SHA512_BLOCK_SIZE);
+ #endif
+ }
#endif
- ret = Transform(sha512);
- if (ret != 0)
- return ret;
-
- #ifdef LITTLE_ENDIAN_ORDER
- ByteReverseWords64(sha512->digest, sha512->digest, SHA512_DIGEST_SIZE);
+ #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \
+ defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ ret = Transform_Sha512(sha512);
+ #else
+ if(sha512->ctx.mode == ESP32_SHA_INIT) {
+ esp_sha_try_hw_lock(&sha512->ctx);
+ }
+ ret = esp_sha512_process(sha512);
+ if(ret == 0 && sha512->ctx.mode == ESP32_SHA_SW){
+ ret = Transform_Sha512(sha512);
+ }
#endif
- XMEMCPY(hash, sha512->digest, SHA512_DIGEST_SIZE);
+ if (ret == 0)
+ sha512->buffLen = 0;
+ else
+ len = 0;
+ }
+ }
- return wc_InitSha512(sha512); /* reset state */
-}
+#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+ if (Transform_Sha512_Len_p != NULL) {
+ word32 blocksLen = len & ~(WC_SHA512_BLOCK_SIZE-1);
+
+ if (blocksLen > 0) {
+ sha512->data = data;
+ /* Byte reversal performed in function if required. */
+ Transform_Sha512_Len(sha512, blocksLen);
+ data += blocksLen;
+ len -= blocksLen;
+ }
+ }
+ else
+#endif
+#if !defined(LITTLE_ENDIAN_ORDER) || defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+ {
+ while (len >= WC_SHA512_BLOCK_SIZE) {
+ XMEMCPY(local, data, WC_SHA512_BLOCK_SIZE);
+ data += WC_SHA512_BLOCK_SIZE;
+ len -= WC_SHA512_BLOCK_SIZE;
-int wc_Sha512Hash(const byte* data, word32 len, byte* hash)
-{
- int ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
- Sha512* sha512;
+ #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+ if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
+ {
+ ByteReverseWords64(sha512->buffer, sha512->buffer,
+ WC_SHA512_BLOCK_SIZE);
+ }
+ #endif
+ /* Byte reversal performed in function if required. */
+ ret = Transform_Sha512(sha512);
+ if (ret != 0)
+ break;
+ }
+ }
#else
- Sha512 sha512[1];
+ {
+ while (len >= WC_SHA512_BLOCK_SIZE) {
+ XMEMCPY(local, data, WC_SHA512_BLOCK_SIZE);
+
+ data += WC_SHA512_BLOCK_SIZE;
+ len -= WC_SHA512_BLOCK_SIZE;
+ #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \
+ defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ ByteReverseWords64(sha512->buffer, sha512->buffer,
+ WC_SHA512_BLOCK_SIZE);
+ #endif
+ #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \
+ defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ ret = Transform_Sha512(sha512);
+ #else
+ if(sha512->ctx.mode == ESP32_SHA_INIT) {
+ esp_sha_try_hw_lock(&sha512->ctx);
+ }
+ ret = esp_sha512_process(sha512);
+ if(ret == 0 && sha512->ctx.mode == ESP32_SHA_SW){
+ ret = Transform_Sha512(sha512);
+ }
+ #endif
+ if (ret != 0)
+ break;
+ }
+ }
#endif
-#ifdef WOLFSSL_SMALL_STACK
- sha512 = (Sha512*)XMALLOC(sizeof(Sha512), NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (sha512 == NULL)
- return MEMORY_E;
-#endif
-
- if ((ret = wc_InitSha512(sha512)) != 0) {
- WOLFSSL_MSG("InitSha512 failed");
- }
- else if ((ret = wc_Sha512Update(sha512, data, len)) != 0) {
- WOLFSSL_MSG("Sha512Update failed");
+ if (len > 0) {
+ XMEMCPY(local, data, len);
+ sha512->buffLen = len;
}
- else if ((ret = wc_Sha512Final(sha512, hash)) != 0) {
- WOLFSSL_MSG("Sha512Final failed");
- }
-
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(sha512, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
+
return ret;
}
-#if defined(HAVE_INTEL_AVX1)
-
-#define Rx_1(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + W_X[i] ;
-#define Rx_2(i) d(i)+=h(i);
-#define Rx_3(i) h(i)+=S0(a(i))+Maj(a(i),b(i),c(i));
-
-#if defined(HAVE_INTEL_RORX)
-#define Rx_RORX_1(i) h(i)+=S1_RORX(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + W_X[i] ;
-#define Rx_RORX_2(i) d(i)+=h(i);
-#define Rx_RORX_3(i) h(i)+=S0_RORX(a(i))+Maj(a(i),b(i),c(i));
-#endif
-
-#endif
-
-#if defined(HAVE_INTEL_AVX2)
-#define Ry_1(i, w) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + w ;
-#define Ry_2(i, w) d(i)+=h(i);
-#define Ry_3(i, w) h(i)+=S0(a(i))+Maj(a(i),b(i),c(i));
-#endif
+#ifdef WOLFSSL_SHA512
-#if defined(HAVE_INTEL_AVX1) /* inline Assember for Intel AVX1 instructions */
-#if defined(DEBUG_XMM)
-
-#define SAVE_REG(i) __asm__ volatile("vmovdqu %%xmm"#i", %0 \n\t":"=m"(reg[i][0])::XMM_REGs);
-#define RECV_REG(i) __asm__ volatile("vmovdqu %0, %%xmm"#i" \n\t"::"m"(reg[i][0]):XMM_REGs);
-
-#define _DUMP_REG(REG, name)\
- { word64 buf[16] ;word64 reg[16][2];int k ;\
- SAVE_REG(0); SAVE_REG(1); SAVE_REG(2); SAVE_REG(3); SAVE_REG(4); \
- SAVE_REG(5); SAVE_REG(6); SAVE_REG(7);SAVE_REG(8); SAVE_REG(9); SAVE_REG(10);\
- SAVE_REG(11); SAVE_REG(12); SAVE_REG(13); SAVE_REG(14); SAVE_REG(15); \
- __asm__ volatile("vmovdqu %%"#REG", %0 \n\t":"=m"(buf[0])::XMM_REGs);\
- printf(" "#name":\t") ; for(k=0; k<2; k++) printf("%016lx.", (word64)(buf[k])); printf("\n") ; \
- RECV_REG(0); RECV_REG(1); RECV_REG(2); RECV_REG(3); RECV_REG(4);\
- RECV_REG(5); RECV_REG(6); RECV_REG(7); RECV_REG(8); RECV_REG(9);\
- RECV_REG(10); RECV_REG(11); RECV_REG(12); RECV_REG(13); RECV_REG(14); RECV_REG(15);\
+int wc_Sha512Update(wc_Sha512* sha512, const byte* data, word32 len)
+{
+ if (sha512 == NULL || (data == NULL && len > 0)) {
+ return BAD_FUNC_ARG;
}
-#define DUMP_REG(REG) _DUMP_REG(REG, #REG)
-#define PRINTF(fmt, ...)
-
-#else
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512)
+ if (sha512->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA512) {
+ #if defined(HAVE_INTEL_QA)
+ return IntelQaSymSha512(&sha512->asyncDev, NULL, data, len);
+ #endif
+ }
+#endif /* WOLFSSL_ASYNC_CRYPT */
-#define DUMP_REG(REG)
-#define PRINTF(fmt, ...)
+ return Sha512Update(sha512, data, len);
+}
-#endif
+#endif /* WOLFSSL_SHA512 */
-#define _MOVE_to_REG(xymm, mem) __asm__ volatile("vmovdqu %0, %%"#xymm" "\
- :: "m"(mem):XMM_REGs) ;
-#define _MOVE_to_MEM(mem,i, xymm) __asm__ volatile("vmovdqu %%"#xymm", %0" :\
- "=m"(mem[i]),"=m"(mem[i+1]),"=m"(mem[i+2]),"=m"(mem[i+3])::XMM_REGs) ;
-#define _MOVE(dest, src) __asm__ volatile("vmovdqu %%"#src", %%"\
- #dest" ":::XMM_REGs) ;
-
-#define _S_TEMP(dest, src, bits, temp) __asm__ volatile("vpsrlq $"#bits", %%"\
- #src", %%"#dest"\n\tvpsllq $64-"#bits", %%"#src", %%"#temp"\n\tvpor %%"\
- #temp",%%"#dest", %%"#dest" ":::XMM_REGs) ;
-#define _AVX1_R(dest, src, bits) __asm__ volatile("vpsrlq $"#bits", %%"\
- #src", %%"#dest" ":::XMM_REGs) ;
-#define _XOR(dest, src1, src2) __asm__ volatile("vpxor %%"#src1", %%"\
- #src2", %%"#dest" ":::XMM_REGs) ;
-#define _OR(dest, src1, src2) __asm__ volatile("vpor %%"#src1", %%"\
- #src2", %%"#dest" ":::XMM_REGs) ;
-#define _ADD(dest, src1, src2) __asm__ volatile("vpaddq %%"#src1", %%"\
- #src2", %%"#dest" ":::XMM_REGs) ;
-#define _ADD_MEM(dest, src1, mem) __asm__ volatile("vpaddq %0, %%"#src1", %%"\
- #dest" "::"m"(mem):XMM_REGs) ;
-
-#define MOVE_to_REG(xymm, mem) _MOVE_to_REG(xymm, mem)
-#define MOVE_to_MEM(mem, i, xymm) _MOVE_to_MEM(mem, i, xymm)
-#define MOVE(dest, src) _MOVE(dest, src)
-
-#define XOR(dest, src1, src2) _XOR(dest, src1, src2)
-#define OR(dest, src1, src2) _OR(dest, src1, src2)
-#define ADD(dest, src1, src2) _ADD(dest, src1, src2)
-
-#define S_TMP(dest, src, bits, temp) _S_TEMP(dest, src, bits, temp);
-#define AVX1_S(dest, src, bits) S_TMP(dest, src, bits, S_TEMP)
-#define AVX1_R(dest, src, bits) _AVX1_R(dest, src, bits)
-
-#define Init_Mask(mask) \
- __asm__ volatile("vmovdqu %0, %%xmm1\n\t"::"m"(mask):"%xmm1") ;
-
-#define _W_from_buff1(w, buff, xmm) \
- /* X0..3(xmm4..7), W[0..15] = sha512->buffer[0.15]; */\
- __asm__ volatile("vmovdqu %1, %%"#xmm"\n\t"\
- "vpshufb %%xmm1, %%"#xmm", %%"#xmm"\n\t"\
- "vmovdqu %%"#xmm", %0"\
- :"=m"(w): "m"(buff):"%xmm0") ;
-
-#define W_from_buff1(w, buff, xmm) _W_from_buff1(w, buff, xmm)
-
-#define W_from_buff(w, buff)\
- Init_Mask(mBYTE_FLIP_MASK[0]) ;\
- W_from_buff1(w[0], buff[0], W_0);\
- W_from_buff1(w[2], buff[2], W_2);\
- W_from_buff1(w[4], buff[4], W_4);\
- W_from_buff1(w[6], buff[6], W_6);\
- W_from_buff1(w[8], buff[8], W_8);\
- W_from_buff1(w[10],buff[10],W_10);\
- W_from_buff1(w[12],buff[12],W_12);\
- W_from_buff1(w[14],buff[14],W_14);
-
-static word64 mBYTE_FLIP_MASK[] = { 0x0001020304050607, 0x08090a0b0c0d0e0f } ;
-
-#define W_I_15 xmm14
-#define W_I_7 xmm11
-#define W_I_2 xmm13
-#define W_I xmm12
-#define G_TEMP xmm0
-#define S_TEMP xmm1
-#define XMM_TEMP0 xmm2
-
-#define W_0 xmm12
-#define W_2 xmm3
-#define W_4 xmm4
-#define W_6 xmm5
-#define W_8 xmm6
-#define W_10 xmm7
-#define W_12 xmm8
-#define W_14 xmm9
-
-#define XMM_REGs
-
-#define s0_1(dest, src) AVX1_S(dest, src, 1);
-#define s0_2(dest, src) AVX1_S(G_TEMP, src, 8); XOR(dest, G_TEMP, dest) ;
-#define s0_3(dest, src) AVX1_R(G_TEMP, src, 7); XOR(dest, G_TEMP, dest) ;
-
-#define s1_1(dest, src) AVX1_S(dest, src, 19);
-#define s1_2(dest, src) AVX1_S(G_TEMP, src, 61); XOR(dest, G_TEMP, dest) ;
-#define s1_3(dest, src) AVX1_R(G_TEMP, src, 6); XOR(dest, G_TEMP, dest) ;
-
-#define s0_(dest, src) s0_1(dest, src) ; s0_2(dest, src) ; s0_3(dest, src)
-#define s1_(dest, src) s1_1(dest, src) ; s1_2(dest, src) ; s1_3(dest, src)
-
-#define Block_xx_1(i) \
- MOVE_to_REG(W_I_15, W_X[(i-15)&15]) ;\
- MOVE_to_REG(W_I_7, W_X[(i- 7)&15]) ;\
-
-#define Block_xx_2(i) \
- MOVE_to_REG(W_I_2, W_X[(i- 2)&15]) ;\
- MOVE_to_REG(W_I, W_X[(i)]) ;\
-
-#define Block_xx_3(i) \
- s0_ (XMM_TEMP0, W_I_15) ;\
-
-#define Block_xx_4(i) \
- ADD(W_I, W_I, XMM_TEMP0) ;\
- ADD(W_I, W_I, W_I_7) ;\
-
-#define Block_xx_5(i) \
- s1_ (XMM_TEMP0, W_I_2) ;\
-
-#define Block_xx_6(i) \
- ADD(W_I, W_I, XMM_TEMP0) ;\
- MOVE_to_MEM(W_X,i, W_I) ;\
- if(i==0)\
- MOVE_to_MEM(W_X,16, W_I) ;\
-
-#define Block_xx_7(i) \
- MOVE_to_REG(W_I_15, W_X[(i-15)&15]) ;\
- MOVE_to_REG(W_I_7, W_X[(i- 7)&15]) ;\
-
-#define Block_xx_8(i) \
- MOVE_to_REG(W_I_2, W_X[(i- 2)&15]) ;\
- MOVE_to_REG(W_I, W_X[(i)]) ;\
-
-#define Block_xx_9(i) \
- s0_ (XMM_TEMP0, W_I_15) ;\
-
-#define Block_xx_10(i) \
- ADD(W_I, W_I, XMM_TEMP0) ;\
- ADD(W_I, W_I, W_I_7) ;\
-
-#define Block_xx_11(i) \
- s1_ (XMM_TEMP0, W_I_2) ;\
-
-#define Block_xx_12(i) \
- ADD(W_I, W_I, XMM_TEMP0) ;\
- MOVE_to_MEM(W_X,i, W_I) ;\
- if((i)==0)\
- MOVE_to_MEM(W_X,16, W_I) ;\
-
-static inline void Block_0_1(word64 *W_X) { Block_xx_1(0) ; }
-static inline void Block_0_2(word64 *W_X) { Block_xx_2(0) ; }
-static inline void Block_0_3(void) { Block_xx_3(0) ; }
-static inline void Block_0_4(void) { Block_xx_4(0) ; }
-static inline void Block_0_5(void) { Block_xx_5(0) ; }
-static inline void Block_0_6(word64 *W_X) { Block_xx_6(0) ; }
-static inline void Block_0_7(word64 *W_X) { Block_xx_7(2) ; }
-static inline void Block_0_8(word64 *W_X) { Block_xx_8(2) ; }
-static inline void Block_0_9(void) { Block_xx_9(2) ; }
-static inline void Block_0_10(void){ Block_xx_10(2) ; }
-static inline void Block_0_11(void){ Block_xx_11(2) ; }
-static inline void Block_0_12(word64 *W_X){ Block_xx_12(2) ; }
-
-static inline void Block_4_1(word64 *W_X) { Block_xx_1(4) ; }
-static inline void Block_4_2(word64 *W_X) { Block_xx_2(4) ; }
-static inline void Block_4_3(void) { Block_xx_3(4) ; }
-static inline void Block_4_4(void) { Block_xx_4(4) ; }
-static inline void Block_4_5(void) { Block_xx_5(4) ; }
-static inline void Block_4_6(word64 *W_X) { Block_xx_6(4) ; }
-static inline void Block_4_7(word64 *W_X) { Block_xx_7(6) ; }
-static inline void Block_4_8(word64 *W_X) { Block_xx_8(6) ; }
-static inline void Block_4_9(void) { Block_xx_9(6) ; }
-static inline void Block_4_10(void){ Block_xx_10(6) ; }
-static inline void Block_4_11(void){ Block_xx_11(6) ; }
-static inline void Block_4_12(word64 *W_X){ Block_xx_12(6) ; }
-
-static inline void Block_8_1(word64 *W_X) { Block_xx_1(8) ; }
-static inline void Block_8_2(word64 *W_X) { Block_xx_2(8) ; }
-static inline void Block_8_3(void) { Block_xx_3(8) ; }
-static inline void Block_8_4(void) { Block_xx_4(8) ; }
-static inline void Block_8_5(void) { Block_xx_5(8) ; }
-static inline void Block_8_6(word64 *W_X) { Block_xx_6(8) ; }
-static inline void Block_8_7(word64 *W_X) { Block_xx_7(10) ; }
-static inline void Block_8_8(word64 *W_X) { Block_xx_8(10) ; }
-static inline void Block_8_9(void) { Block_xx_9(10) ; }
-static inline void Block_8_10(void){ Block_xx_10(10) ; }
-static inline void Block_8_11(void){ Block_xx_11(10) ; }
-static inline void Block_8_12(word64 *W_X){ Block_xx_12(10) ; }
-
-static inline void Block_12_1(word64 *W_X) { Block_xx_1(12) ; }
-static inline void Block_12_2(word64 *W_X) { Block_xx_2(12) ; }
-static inline void Block_12_3(void) { Block_xx_3(12) ; }
-static inline void Block_12_4(void) { Block_xx_4(12) ; }
-static inline void Block_12_5(void) { Block_xx_5(12) ; }
-static inline void Block_12_6(word64 *W_X) { Block_xx_6(12) ; }
-static inline void Block_12_7(word64 *W_X) { Block_xx_7(14) ; }
-static inline void Block_12_8(word64 *W_X) { Block_xx_8(14) ; }
-static inline void Block_12_9(void) { Block_xx_9(14) ; }
-static inline void Block_12_10(void){ Block_xx_10(14) ; }
-static inline void Block_12_11(void){ Block_xx_11(14) ; }
-static inline void Block_12_12(word64 *W_X){ Block_xx_12(14) ; }
+#endif /* WOLFSSL_IMX6_CAAM */
-#endif
+static WC_INLINE int Sha512Final(wc_Sha512* sha512)
+{
+ byte* local = (byte*)sha512->buffer;
+ int ret;
-#if defined(HAVE_INTEL_AVX2)
-static const unsigned long mBYTE_FLIP_MASK_Y[] =
- { 0x0001020304050607, 0x08090a0b0c0d0e0f, 0x0001020304050607, 0x08090a0b0c0d0e0f } ;
-
-#define W_from_buff_Y(buff)\
- { /* X0..3(ymm9..12), W_X[0..15] = sha512->buffer[0.15]; */\
- __asm__ volatile("vmovdqu %0, %%ymm8\n\t"::"m"(mBYTE_FLIP_MASK_Y[0]):YMM_REGs) ;\
- __asm__ volatile("vmovdqu %0, %%ymm12\n\t"\
- "vmovdqu %1, %%ymm4\n\t"\
- "vpshufb %%ymm8, %%ymm12, %%ymm12\n\t"\
- "vpshufb %%ymm8, %%ymm4, %%ymm4\n\t"\
- :: "m"(buff[0]), "m"(buff[4]):YMM_REGs) ;\
- __asm__ volatile("vmovdqu %0, %%ymm5\n\t"\
- "vmovdqu %1, %%ymm6\n\t"\
- "vpshufb %%ymm8, %%ymm5, %%ymm5\n\t"\
- "vpshufb %%ymm8, %%ymm6, %%ymm6\n\t"\
- :: "m"(buff[8]), "m"(buff[12]):YMM_REGs) ;\
+ if (sha512 == NULL) {
+ return BAD_FUNC_ARG;
}
-#if defined(DEBUG_YMM)
-
-#define SAVE_REG_Y(i) __asm__ volatile("vmovdqu %%ymm"#i", %0 \n\t":"=m"(reg[i-4][0])::YMM_REGs);
-#define RECV_REG_Y(i) __asm__ volatile("vmovdqu %0, %%ymm"#i" \n\t"::"m"(reg[i-4][0]):YMM_REGs);
-
-#define _DUMP_REG_Y(REG, name)\
- { word64 buf[16] ;word64 reg[16][2];int k ;\
- SAVE_REG_Y(4); SAVE_REG_Y(5); SAVE_REG_Y(6); SAVE_REG_Y(7); \
- SAVE_REG_Y(8); SAVE_REG_Y(9); SAVE_REG_Y(10); SAVE_REG_Y(11); SAVE_REG_Y(12);\
- SAVE_REG_Y(13); SAVE_REG_Y(14); SAVE_REG_Y(15); \
- __asm__ volatile("vmovdqu %%"#REG", %0 \n\t":"=m"(buf[0])::YMM_REGs);\
- printf(" "#name":\t") ; for(k=0; k<4; k++) printf("%016lx.", (word64)buf[k]) ; printf("\n") ; \
- RECV_REG_Y(4); RECV_REG_Y(5); RECV_REG_Y(6); RECV_REG_Y(7); \
- RECV_REG_Y(8); RECV_REG_Y(9); RECV_REG_Y(10); RECV_REG_Y(11); RECV_REG_Y(12); \
- RECV_REG_Y(13); RECV_REG_Y(14); RECV_REG_Y(15);\
- }
+ local[sha512->buffLen++] = 0x80; /* add 1 */
-#define DUMP_REG_Y(REG) _DUMP_REG_Y(REG, #REG)
-#define DUMP_REG2_Y(REG) _DUMP_REG_Y(REG, #REG)
-#define PRINTF_Y(fmt, ...)
+ /* pad with zeros */
+ if (sha512->buffLen > WC_SHA512_PAD_SIZE) {
+ XMEMSET(&local[sha512->buffLen], 0, WC_SHA512_BLOCK_SIZE - sha512->buffLen);
+ sha512->buffLen += WC_SHA512_BLOCK_SIZE - sha512->buffLen;
+#if defined(LITTLE_ENDIAN_ORDER)
+ #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+ if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
+ #endif
+ {
+ #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \
+ defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ ByteReverseWords64(sha512->buffer,sha512->buffer,
+ WC_SHA512_BLOCK_SIZE);
+ #endif
+ }
+#endif /* LITTLE_ENDIAN_ORDER */
+#if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \
+ defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ ret = Transform_Sha512(sha512);
#else
-
-#define DUMP_REG_Y(REG)
-#define DUMP_REG2_Y(REG)
-#define PRINTF_Y(fmt, ...)
-
+ if(sha512->ctx.mode == ESP32_SHA_INIT) {
+ esp_sha_try_hw_lock(&sha512->ctx);
+ }
+ ret = esp_sha512_process(sha512);
+ if(ret == 0 && sha512->ctx.mode == ESP32_SHA_SW){
+ ret = Transform_Sha512(sha512);
+ }
#endif
+ if (ret != 0)
+ return ret;
-#define _MOVE_to_REGy(ymm, mem) __asm__ volatile("vmovdqu %0, %%"#ymm" "\
- :: "m"(mem):YMM_REGs) ;
-#define _MOVE_to_MEMy(mem,i, ymm) __asm__ volatile("vmovdqu %%"#ymm", %0" \
- : "=m"(mem[i]),"=m"(mem[i+1]),"=m"(mem[i+2]),"=m"(mem[i+3])::YMM_REGs) ;
-#define _MOVE_128y(ymm0, ymm1, ymm2, map) __asm__ volatile("vperm2i128 $"\
- #map", %%"#ymm2", %%"#ymm1", %%"#ymm0" ":::YMM_REGs) ;
-#define _S_TEMPy(dest, src, bits, temp) \
- __asm__ volatile("vpsrlq $"#bits", %%"#src", %%"#dest"\n\tvpsllq $64-"#bits\
- ", %%"#src", %%"#temp"\n\tvpor %%"#temp",%%"#dest", %%"#dest" ":::YMM_REGs) ;
-#define _AVX2_R(dest, src, bits) __asm__ volatile("vpsrlq $"#bits", %%"\
- #src", %%"#dest" ":::YMM_REGs) ;
-#define _XORy(dest, src1, src2) __asm__ volatile("vpxor %%"#src1", %%"\
- #src2", %%"#dest" ":::YMM_REGs) ;
-#define _ADDy(dest, src1, src2) __asm__ volatile("vpaddq %%"#src1", %%"\
- #src2", %%"#dest" ":::YMM_REGs) ;
-#define _BLENDy(map, dest, src1, src2) __asm__ volatile("vpblendd $"#map", %%"\
- #src1", %%"#src2", %%"#dest" ":::YMM_REGs) ;
-#define _BLENDQy(map, dest, src1, src2) __asm__ volatile("vblendpd $"#map", %%"\
- #src1", %%"#src2", %%"#dest" ":::YMM_REGs) ;
-#define _PERMQy(map, dest, src) __asm__ volatile("vpermq $"#map", %%"\
- #src", %%"#dest" ":::YMM_REGs) ;
-
-#define MOVE_to_REGy(ymm, mem) _MOVE_to_REGy(ymm, mem)
-#define MOVE_to_MEMy(mem, i, ymm) _MOVE_to_MEMy(mem, i, ymm)
-
-#define MOVE_128y(ymm0, ymm1, ymm2, map) _MOVE_128y(ymm0, ymm1, ymm2, map)
-#define XORy(dest, src1, src2) _XORy(dest, src1, src2)
-#define ADDy(dest, src1, src2) _ADDy(dest, src1, src2)
-#define BLENDy(map, dest, src1, src2) _BLENDy(map, dest, src1, src2)
-#define BLENDQy(map, dest, src1, src2) _BLENDQy(map, dest, src1, src2)
-#define PERMQy(map, dest, src) _PERMQy(map, dest, src)
-
-
-#define S_TMPy(dest, src, bits, temp) _S_TEMPy(dest, src, bits, temp);
-#define AVX2_S(dest, src, bits) S_TMPy(dest, src, bits, S_TEMPy)
-#define AVX2_R(dest, src, bits) _AVX2_R(dest, src, bits)
-
-
-#define FEEDBACK1_to_W_I_2(w_i_2, w_i) MOVE_128y(YMM_TEMP0, w_i, w_i, 0x08) ;\
- BLENDy(0xf0, w_i_2, YMM_TEMP0, w_i_2) ;
-
-#define MOVE_W_to_W_I_15(w_i_15, w_0, w_4) BLENDQy(0x1, w_i_15, w_4, w_0) ;\
- PERMQy(0x39, w_i_15, w_i_15) ;
-#define MOVE_W_to_W_I_7(w_i_7, w_8, w_12) BLENDQy(0x1, w_i_7, w_12, w_8) ;\
- PERMQy(0x39, w_i_7, w_i_7) ;
-#define MOVE_W_to_W_I_2(w_i_2, w_12) BLENDQy(0xc, w_i_2, w_12, w_i_2) ;\
- PERMQy(0x0e, w_i_2, w_i_2) ;
-
-
-#define W_I_16y ymm8
-#define W_I_15y ymm9
-#define W_I_7y ymm10
-#define W_I_2y ymm11
-#define W_Iy ymm12
-#define G_TEMPy ymm13
-#define S_TEMPy ymm14
-#define YMM_TEMP0 ymm15
-#define YMM_TEMP0x xmm15
-#define W_I_TEMPy ymm7
-#define W_K_TEMPy ymm15
-#define W_K_TEMPx xmm15
-#define W_0y ymm12
-#define W_4y ymm4
-#define W_8y ymm5
-#define W_12y ymm6
-
-#define YMM_REGs
-/* Registers are saved in Sha512Update/Final */
- /* "%ymm7","%ymm8","%ymm9","%ymm10","%ymm11","%ymm12","%ymm13","%ymm14","%ymm15"*/
-
-#define MOVE_15_to_16(w_i_16, w_i_15, w_i_7)\
- __asm__ volatile("vperm2i128 $0x01, %%"#w_i_15", %%"#w_i_15", %%"#w_i_15" ":::YMM_REGs) ;\
- __asm__ volatile("vpblendd $0x08, %%"#w_i_15", %%"#w_i_7", %%"#w_i_16" ":::YMM_REGs) ;\
- __asm__ volatile("vperm2i128 $0x01, %%"#w_i_7", %%"#w_i_7", %%"#w_i_15" ":::YMM_REGs) ;\
- __asm__ volatile("vpblendd $0x80, %%"#w_i_15", %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs) ;\
- __asm__ volatile("vpshufd $0x93, %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs) ;\
-
-#define MOVE_7_to_15(w_i_15, w_i_7)\
- __asm__ volatile("vmovdqu %%"#w_i_7", %%"#w_i_15" ":::YMM_REGs) ;\
-
-#define MOVE_I_to_7(w_i_7, w_i)\
- __asm__ volatile("vperm2i128 $0x01, %%"#w_i", %%"#w_i", %%"#w_i_7" ":::YMM_REGs) ;\
- __asm__ volatile("vpblendd $0x01, %%"#w_i_7", %%"#w_i", %%"#w_i_7" ":::YMM_REGs) ;\
- __asm__ volatile("vpshufd $0x39, %%"#w_i_7", %%"#w_i_7" ":::YMM_REGs) ;\
-
-#define MOVE_I_to_2(w_i_2, w_i)\
- __asm__ volatile("vperm2i128 $0x01, %%"#w_i", %%"#w_i", %%"#w_i_2" ":::YMM_REGs) ;\
- __asm__ volatile("vpshufd $0x0e, %%"#w_i_2", %%"#w_i_2" ":::YMM_REGs) ;\
-
-#endif
+ sha512->buffLen = 0;
+ }
+ XMEMSET(&local[sha512->buffLen], 0, WC_SHA512_PAD_SIZE - sha512->buffLen);
+ /* put lengths in bits */
+ sha512->hiLen = (sha512->loLen >> (8 * sizeof(sha512->loLen) - 3)) +
+ (sha512->hiLen << 3);
+ sha512->loLen = sha512->loLen << 3;
-/*** Transform Body ***/
-#if defined(HAVE_INTEL_AVX1)
+ /* store lengths */
+#if defined(LITTLE_ENDIAN_ORDER)
+ #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+ if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
+ #endif
+ #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \
+ defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ ByteReverseWords64(sha512->buffer, sha512->buffer, WC_SHA512_PAD_SIZE);
+ #endif
+#endif
+ /* ! length ordering dependent on digest endian type ! */
-static int Transform_AVX1(Sha512* sha512)
-{
- const word64* K = K512;
- word64 W_X[16+4];
- word32 j;
- word64 T[8];
- /* Copy digest to working vars */
- XMEMCPY(T, sha512->digest, sizeof(T));
+#if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \
+ defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2] = sha512->hiLen;
+ sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 1] = sha512->loLen;
+#endif
- W_from_buff(W_X, sha512->buffer) ;
- for (j = 0; j < 80; j += 16) {
- Rx_1( 0); Block_0_1(W_X); Rx_2( 0); Block_0_2(W_X); Rx_3( 0); Block_0_3();
- Rx_1( 1); Block_0_4(); Rx_2( 1); Block_0_5(); Rx_3( 1); Block_0_6(W_X);
- Rx_1( 2); Block_0_7(W_X); Rx_2( 2); Block_0_8(W_X); Rx_3( 2); Block_0_9();
- Rx_1( 3); Block_0_10();Rx_2( 3); Block_0_11();Rx_3( 3); Block_0_12(W_X);
-
- Rx_1( 4); Block_4_1(W_X); Rx_2( 4); Block_4_2(W_X); Rx_3( 4); Block_4_3();
- Rx_1( 5); Block_4_4(); Rx_2( 5); Block_4_5(); Rx_3( 5); Block_4_6(W_X);
- Rx_1( 6); Block_4_7(W_X); Rx_2( 6); Block_4_8(W_X); Rx_3( 6); Block_4_9();
- Rx_1( 7); Block_4_10();Rx_2( 7); Block_4_11();Rx_3( 7); Block_4_12(W_X);
-
- Rx_1( 8); Block_8_1(W_X); Rx_2( 8); Block_8_2(W_X); Rx_3( 8); Block_8_3();
- Rx_1( 9); Block_8_4(); Rx_2( 9); Block_8_5(); Rx_3( 9); Block_8_6(W_X);
- Rx_1(10); Block_8_7(W_X); Rx_2(10); Block_8_8(W_X); Rx_3(10); Block_8_9();
- Rx_1(11); Block_8_10();Rx_2(11); Block_8_11();Rx_3(11); Block_8_12(W_X);
-
- Rx_1(12); Block_12_1(W_X); Rx_2(12); Block_12_2(W_X); Rx_3(12); Block_12_3();
- Rx_1(13); Block_12_4(); Rx_2(13); Block_12_5(); Rx_3(13); Block_12_6(W_X);
- Rx_1(14); Block_12_7(W_X); Rx_2(14); Block_12_8(W_X); Rx_3(14); Block_12_9();
- Rx_1(15); Block_12_10();Rx_2(15); Block_12_11();Rx_3(15); Block_12_12(W_X);
+#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+ if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags))
+ ByteReverseWords64(&(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]),
+ &(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]),
+ WC_SHA512_BLOCK_SIZE - WC_SHA512_PAD_SIZE);
+#endif
+#if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \
+ defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ ret = Transform_Sha512(sha512);
+#else
+ if(sha512->ctx.mode == ESP32_SHA_INIT) {
+ esp_sha_try_hw_lock(&sha512->ctx);
}
+ ret = esp_sha512_digest_process(sha512, 1);
+ if(ret == 0 && sha512->ctx.mode == ESP32_SHA_SW) {
+ ret = Transform_Sha512(sha512);
+ }
+#endif
+ if (ret != 0)
+ return ret;
- /* Add the working vars back into digest */
-
- sha512->digest[0] += a(0);
- sha512->digest[1] += b(0);
- sha512->digest[2] += c(0);
- sha512->digest[3] += d(0);
- sha512->digest[4] += e(0);
- sha512->digest[5] += f(0);
- sha512->digest[6] += g(0);
- sha512->digest[7] += h(0);
-
- /* Wipe variables */
- #if !defined(HAVE_INTEL_AVX1)&&!defined(HAVE_INTEL_AVX2)
- XMEMSET(W_X, 0, sizeof(word64) * 16);
+ #ifdef LITTLE_ENDIAN_ORDER
+ ByteReverseWords64(sha512->digest, sha512->digest, WC_SHA512_DIGEST_SIZE);
#endif
- XMEMSET(T, 0, sizeof(T));
return 0;
}
-#endif
-
-#if defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_RORX)
+#ifdef WOLFSSL_SHA512
-static int Transform_AVX1_RORX(Sha512* sha512)
+int wc_Sha512FinalRaw(wc_Sha512* sha512, byte* hash)
{
- const word64* K = K512;
- word64 W_X[16+4];
- word32 j;
- word64 T[8];
- /* Copy digest to working vars */
- XMEMCPY(T, sha512->digest, sizeof(T));
+#ifdef LITTLE_ENDIAN_ORDER
+ word64 digest[WC_SHA512_DIGEST_SIZE / sizeof(word64)];
+#endif
- W_from_buff(W_X, sha512->buffer) ;
- for (j = 0; j < 80; j += 16) {
- Rx_RORX_1( 0); Block_0_1(W_X); Rx_RORX_2( 0); Block_0_2(W_X);
- Rx_RORX_3( 0); Block_0_3();
- Rx_RORX_1( 1); Block_0_4(); Rx_RORX_2( 1); Block_0_5();
- Rx_RORX_3( 1); Block_0_6(W_X);
- Rx_RORX_1( 2); Block_0_7(W_X); Rx_RORX_2( 2); Block_0_8(W_X);
- Rx_RORX_3( 2); Block_0_9();
- Rx_RORX_1( 3); Block_0_10();Rx_RORX_2( 3); Block_0_11();
- Rx_RORX_3( 3); Block_0_12(W_X);
-
- Rx_RORX_1( 4); Block_4_1(W_X); Rx_RORX_2( 4); Block_4_2(W_X);
- Rx_RORX_3( 4); Block_4_3();
- Rx_RORX_1( 5); Block_4_4(); Rx_RORX_2( 5); Block_4_5();
- Rx_RORX_3( 5); Block_4_6(W_X);
- Rx_RORX_1( 6); Block_4_7(W_X); Rx_RORX_2( 6); Block_4_8(W_X);
- Rx_RORX_3( 6); Block_4_9();
- Rx_RORX_1( 7); Block_4_10();Rx_RORX_2( 7); Block_4_11();
- Rx_RORX_3( 7); Block_4_12(W_X);
-
- Rx_RORX_1( 8); Block_8_1(W_X); Rx_RORX_2( 8); Block_8_2(W_X);
- Rx_RORX_3( 8); Block_8_3();
- Rx_RORX_1( 9); Block_8_4(); Rx_RORX_2( 9); Block_8_5();
- Rx_RORX_3( 9); Block_8_6(W_X);
- Rx_RORX_1(10); Block_8_7(W_X); Rx_RORX_2(10); Block_8_8(W_X);
- Rx_RORX_3(10); Block_8_9();
- Rx_RORX_1(11); Block_8_10();Rx_RORX_2(11); Block_8_11();
- Rx_RORX_3(11); Block_8_12(W_X);
-
- Rx_RORX_1(12); Block_12_1(W_X); Rx_RORX_2(12); Block_12_2(W_X);
- Rx_RORX_3(12); Block_12_3();
- Rx_RORX_1(13); Block_12_4(); Rx_RORX_2(13); Block_12_5();
- Rx_RORX_3(13); Block_12_6(W_X);
- Rx_RORX_1(14); Block_12_7(W_X); Rx_RORX_2(14); Block_12_8(W_X);
- Rx_RORX_3(14); Block_12_9();
- Rx_RORX_1(15); Block_12_10();Rx_RORX_2(15); Block_12_11();
- Rx_RORX_3(15); Block_12_12(W_X);
+ if (sha512 == NULL || hash == NULL) {
+ return BAD_FUNC_ARG;
}
- /* Add the working vars back into digest */
- sha512->digest[0] += a(0);
- sha512->digest[1] += b(0);
- sha512->digest[2] += c(0);
- sha512->digest[3] += d(0);
- sha512->digest[4] += e(0);
- sha512->digest[5] += f(0);
- sha512->digest[6] += g(0);
- sha512->digest[7] += h(0);
-
- /* Wipe variables */
- #if !defined(HAVE_INTEL_AVX1)&&!defined(HAVE_INTEL_AVX2)
- XMEMSET(W_X, 0, sizeof(word64) * 16);
- #endif
- XMEMSET(T, 0, sizeof(T));
+#ifdef LITTLE_ENDIAN_ORDER
+ ByteReverseWords64((word64*)digest, (word64*)sha512->digest,
+ WC_SHA512_DIGEST_SIZE);
+ XMEMCPY(hash, digest, WC_SHA512_DIGEST_SIZE);
+#else
+ XMEMCPY(hash, sha512->digest, WC_SHA512_DIGEST_SIZE);
+#endif
return 0;
}
-#endif
-
-#if defined(HAVE_INTEL_AVX2)
-#define s0_1y(dest, src) AVX2_S(dest, src, 1);
-#define s0_2y(dest, src) AVX2_S(G_TEMPy, src, 8); XORy(dest, G_TEMPy, dest) ;
-#define s0_3y(dest, src) AVX2_R(G_TEMPy, src, 7); XORy(dest, G_TEMPy, dest) ;
-
-#define s1_1y(dest, src) AVX2_S(dest, src, 19);
-#define s1_2y(dest, src) AVX2_S(G_TEMPy, src, 61); XORy(dest, G_TEMPy, dest) ;
-#define s1_3y(dest, src) AVX2_R(G_TEMPy, src, 6); XORy(dest, G_TEMPy, dest) ;
-
-#define s0_y(dest, src) s0_1y(dest, src) ; s0_2y(dest, src) ; s0_3y(dest, src)
-#define s1_y(dest, src) s1_1y(dest, src) ; s1_2y(dest, src) ; s1_3y(dest, src)
-
-#define blk384(i) (W[i] = sha384->buffer[i])
-
-
-#define Block_Y_xx_1(i, w_0, w_4, w_8, w_12)\
- MOVE_W_to_W_I_15(W_I_15y, w_0, w_4) ;\
- MOVE_W_to_W_I_7 (W_I_7y, w_8, w_12) ;\
- MOVE_W_to_W_I_2 (W_I_2y, w_12) ;\
-
-#define Block_Y_xx_2(i, w_0, w_4, w_8, w_12)\
- s0_1y (YMM_TEMP0, W_I_15y) ;\
-
-#define Block_Y_xx_3(i, w_0, w_4, w_8, w_12)\
- s0_2y (YMM_TEMP0, W_I_15y) ;\
-
-#define Block_Y_xx_4(i, w_0, w_4, w_8, w_12)\
- s0_3y (YMM_TEMP0, W_I_15y) ;\
-
-#define Block_Y_xx_5(i, w_0, w_4, w_8, w_12)\
- ADDy(W_I_TEMPy, w_0, YMM_TEMP0) ;\
-
-#define Block_Y_xx_6(i, w_0, w_4, w_8, w_12)\
- ADDy(W_I_TEMPy, W_I_TEMPy, W_I_7y) ;\
- s1_1y (YMM_TEMP0, W_I_2y) ;\
-
-#define Block_Y_xx_7(i, w_0, w_4, w_8, w_12)\
- s1_2y (YMM_TEMP0, W_I_2y) ;\
-
-#define Block_Y_xx_8(i, w_0, w_4, w_8, w_12)\
- s1_3y (YMM_TEMP0, W_I_2y) ;\
- ADDy(w_0, W_I_TEMPy, YMM_TEMP0) ;\
-
-#define Block_Y_xx_9(i, w_0, w_4, w_8, w_12)\
- FEEDBACK1_to_W_I_2(W_I_2y, w_0) ;\
-
-#define Block_Y_xx_10(i, w_0, w_4, w_8, w_12) \
- s1_1y (YMM_TEMP0, W_I_2y) ;\
-
-#define Block_Y_xx_11(i, w_0, w_4, w_8, w_12) \
- s1_2y (YMM_TEMP0, W_I_2y) ;\
-
-#define Block_Y_xx_12(i, w_0, w_4, w_8, w_12)\
- s1_3y (YMM_TEMP0, W_I_2y) ;\
- ADDy(w_0, W_I_TEMPy, YMM_TEMP0) ;\
- MOVE_to_MEMy(w,0, w_4) ;\
-
-
-static inline void Block_Y_0_1(void) { Block_Y_xx_1(0, W_0y, W_4y, W_8y, W_12y) ; }
-static inline void Block_Y_0_2(void) { Block_Y_xx_2(0, W_0y, W_4y, W_8y, W_12y) ; }
-static inline void Block_Y_0_3(void) { Block_Y_xx_3(0, W_0y, W_4y, W_8y, W_12y) ; }
-static inline void Block_Y_0_4(void) { Block_Y_xx_4(0, W_0y, W_4y, W_8y, W_12y) ; }
-static inline void Block_Y_0_5(void) { Block_Y_xx_5(0, W_0y, W_4y, W_8y, W_12y) ; }
-static inline void Block_Y_0_6(void) { Block_Y_xx_6(0, W_0y, W_4y, W_8y, W_12y) ; }
-static inline void Block_Y_0_7(void) { Block_Y_xx_7(0, W_0y, W_4y, W_8y, W_12y) ; }
-static inline void Block_Y_0_8(void) { Block_Y_xx_8(0, W_0y, W_4y, W_8y, W_12y) ; }
-static inline void Block_Y_0_9(void) { Block_Y_xx_9(0, W_0y, W_4y, W_8y, W_12y) ; }
-static inline void Block_Y_0_10(void){ Block_Y_xx_10(0, W_0y, W_4y, W_8y, W_12y) ; }
-static inline void Block_Y_0_11(void){ Block_Y_xx_11(0, W_0y, W_4y, W_8y, W_12y) ; }
-static inline void Block_Y_0_12(word64 *w){ Block_Y_xx_12(0, W_0y, W_4y, W_8y, W_12y) ; }
-
-static inline void Block_Y_4_1(void) { Block_Y_xx_1(4, W_4y, W_8y, W_12y, W_0y) ; }
-static inline void Block_Y_4_2(void) { Block_Y_xx_2(4, W_4y, W_8y, W_12y, W_0y) ; }
-static inline void Block_Y_4_3(void) { Block_Y_xx_3(4, W_4y, W_8y, W_12y, W_0y) ; }
-static inline void Block_Y_4_4(void) { Block_Y_xx_4(4, W_4y, W_8y, W_12y, W_0y) ; }
-static inline void Block_Y_4_5(void) { Block_Y_xx_5(4, W_4y, W_8y, W_12y, W_0y) ; }
-static inline void Block_Y_4_6(void) { Block_Y_xx_6(4, W_4y, W_8y, W_12y, W_0y) ; }
-static inline void Block_Y_4_7(void) { Block_Y_xx_7(4, W_4y, W_8y, W_12y, W_0y) ; }
-static inline void Block_Y_4_8(void) { Block_Y_xx_8(4, W_4y, W_8y, W_12y, W_0y) ; }
-static inline void Block_Y_4_9(void) { Block_Y_xx_9(4, W_4y, W_8y, W_12y, W_0y) ; }
-static inline void Block_Y_4_10(void) { Block_Y_xx_10(4, W_4y, W_8y, W_12y, W_0y) ; }
-static inline void Block_Y_4_11(void) { Block_Y_xx_11(4, W_4y, W_8y, W_12y, W_0y) ; }
-static inline void Block_Y_4_12(word64 *w) { Block_Y_xx_12(4, W_4y, W_8y, W_12y, W_0y) ; }
-
-static inline void Block_Y_8_1(void) { Block_Y_xx_1(8, W_8y, W_12y, W_0y, W_4y) ; }
-static inline void Block_Y_8_2(void) { Block_Y_xx_2(8, W_8y, W_12y, W_0y, W_4y) ; }
-static inline void Block_Y_8_3(void) { Block_Y_xx_3(8, W_8y, W_12y, W_0y, W_4y) ; }
-static inline void Block_Y_8_4(void) { Block_Y_xx_4(8, W_8y, W_12y, W_0y, W_4y) ; }
-static inline void Block_Y_8_5(void) { Block_Y_xx_5(8, W_8y, W_12y, W_0y, W_4y) ; }
-static inline void Block_Y_8_6(void) { Block_Y_xx_6(8, W_8y, W_12y, W_0y, W_4y) ; }
-static inline void Block_Y_8_7(void) { Block_Y_xx_7(8, W_8y, W_12y, W_0y, W_4y) ; }
-static inline void Block_Y_8_8(void) { Block_Y_xx_8(8, W_8y, W_12y, W_0y, W_4y) ; }
-static inline void Block_Y_8_9(void) { Block_Y_xx_9(8, W_8y, W_12y, W_0y, W_4y) ; }
-static inline void Block_Y_8_10(void) { Block_Y_xx_10(8, W_8y, W_12y, W_0y, W_4y) ; }
-static inline void Block_Y_8_11(void) { Block_Y_xx_11(8, W_8y, W_12y, W_0y, W_4y) ; }
-static inline void Block_Y_8_12(word64 *w) { Block_Y_xx_12(8, W_8y, W_12y, W_0y, W_4y) ; }
-
-static inline void Block_Y_12_1(void) { Block_Y_xx_1(12, W_12y, W_0y, W_4y, W_8y) ; }
-static inline void Block_Y_12_2(void) { Block_Y_xx_2(12, W_12y, W_0y, W_4y, W_8y) ; }
-static inline void Block_Y_12_3(void) { Block_Y_xx_3(12, W_12y, W_0y, W_4y, W_8y) ; }
-static inline void Block_Y_12_4(void) { Block_Y_xx_4(12, W_12y, W_0y, W_4y, W_8y) ; }
-static inline void Block_Y_12_5(void) { Block_Y_xx_5(12, W_12y, W_0y, W_4y, W_8y) ; }
-static inline void Block_Y_12_6(void) { Block_Y_xx_6(12, W_12y, W_0y, W_4y, W_8y) ; }
-static inline void Block_Y_12_7(void) { Block_Y_xx_7(12, W_12y, W_0y, W_4y, W_8y) ; }
-static inline void Block_Y_12_8(void) { Block_Y_xx_8(12, W_12y, W_0y, W_4y, W_8y) ; }
-static inline void Block_Y_12_9(void) { Block_Y_xx_9(12, W_12y, W_0y, W_4y, W_8y) ; }
-static inline void Block_Y_12_10(void) { Block_Y_xx_10(12, W_12y, W_0y, W_4y, W_8y) ; }
-static inline void Block_Y_12_11(void) { Block_Y_xx_11(12, W_12y, W_0y, W_4y, W_8y) ; }
-static inline void Block_Y_12_12(word64 *w) { Block_Y_xx_12(12, W_12y, W_0y, W_4y, W_8y) ; }
-
-
-static int Transform_AVX2(Sha512* sha512)
+int wc_Sha512Final(wc_Sha512* sha512, byte* hash)
{
- const word64* K = K512;
- word64 w[4] ;
- word32 j /*, k*/;
- word64 T[8];
- /* Copy digest to working vars */
- XMEMCPY(T, sha512->digest, sizeof(T));
+ int ret;
- W_from_buff_Y(sha512->buffer) ;
- MOVE_to_MEMy(w,0, W_0y) ;
- for (j = 0; j < 80; j += 16) {
- Ry_1( 0, w[0]); Block_Y_0_1(); Ry_2( 0, w[0]); Block_Y_0_2();
- Ry_3( 0, w[0]); Block_Y_0_3();
- Ry_1( 1, w[1]); Block_Y_0_4(); Ry_2( 1, w[1]); Block_Y_0_5();
- Ry_3( 1, w[1]); Block_Y_0_6();
- Ry_1( 2, w[2]); Block_Y_0_7(); Ry_2( 2, w[2]); Block_Y_0_8();
- Ry_3( 2, w[2]); Block_Y_0_9();
- Ry_1( 3, w[3]); Block_Y_0_10();Ry_2( 3, w[3]); Block_Y_0_11();
- Ry_3( 3, w[3]); Block_Y_0_12(w);
-
- Ry_1( 4, w[0]); Block_Y_4_1(); Ry_2( 4, w[0]); Block_Y_4_2();
- Ry_3( 4, w[0]); Block_Y_4_3();
- Ry_1( 5, w[1]); Block_Y_4_4(); Ry_2( 5, w[1]); Block_Y_4_5();
- Ry_3( 5, w[1]); Block_Y_4_6();
- Ry_1( 6, w[2]); Block_Y_4_7(); Ry_2( 6, w[2]); Block_Y_4_8();
- Ry_3( 6, w[2]); Block_Y_4_9();
- Ry_1( 7, w[3]); Block_Y_4_10(); Ry_2( 7, w[3]);Block_Y_4_11();
- Ry_3( 7, w[3]);Block_Y_4_12(w);
-
- Ry_1( 8, w[0]); Block_Y_8_1(); Ry_2( 8, w[0]); Block_Y_8_2();
- Ry_3( 8, w[0]); Block_Y_8_3();
- Ry_1( 9, w[1]); Block_Y_8_4(); Ry_2( 9, w[1]); Block_Y_8_5();
- Ry_3( 9, w[1]); Block_Y_8_6();
- Ry_1(10, w[2]); Block_Y_8_7(); Ry_2(10, w[2]); Block_Y_8_8();
- Ry_3(10, w[2]); Block_Y_8_9();
- Ry_1(11, w[3]); Block_Y_8_10();Ry_2(11, w[3]); Block_Y_8_11();
- Ry_3(11, w[3]); Block_Y_8_12(w);
-
- Ry_1(12, w[0]); Block_Y_12_1(); Ry_2(12, w[0]); Block_Y_12_2();
- Ry_3(12, w[0]); Block_Y_12_3();
- Ry_1(13, w[1]); Block_Y_12_4(); Ry_2(13, w[1]); Block_Y_12_5();
- Ry_3(13, w[1]); Block_Y_12_6();
- Ry_1(14, w[2]); Block_Y_12_7(); Ry_2(14, w[2]); Block_Y_12_8();
- Ry_3(14, w[2]); Block_Y_12_9();
- Ry_1(15, w[3]); Block_Y_12_10();Ry_2(15, w[3]); Block_Y_12_11();
- Ry_3(15, w[3]);Block_Y_12_12(w);
+ if (sha512 == NULL || hash == NULL) {
+ return BAD_FUNC_ARG;
}
-
- /* Add the working vars back into digest */
-
- sha512->digest[0] += a(0);
- sha512->digest[1] += b(0);
- sha512->digest[2] += c(0);
- sha512->digest[3] += d(0);
- sha512->digest[4] += e(0);
- sha512->digest[5] += f(0);
- sha512->digest[6] += g(0);
- sha512->digest[7] += h(0);
- /* Wipe variables */
- #if !defined(HAVE_INTEL_AVX1)&&!defined(HAVE_INTEL_AVX2)
- XMEMSET(W, 0, sizeof(word64) * 16);
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512)
+ if (sha512->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA512) {
+ #if defined(HAVE_INTEL_QA)
+ return IntelQaSymSha512(&sha512->asyncDev, hash, NULL,
+ WC_SHA512_DIGEST_SIZE);
#endif
- XMEMSET(T, 0, sizeof(T));
-
- return 0;
-}
+ }
+#endif /* WOLFSSL_ASYNC_CRYPT */
-#endif
+ ret = Sha512Final(sha512);
+ if (ret != 0)
+ return ret;
+ XMEMCPY(hash, sha512->digest, WC_SHA512_DIGEST_SIZE);
-#ifdef WOLFSSL_SHA384
+ return InitSha512(sha512); /* reset state */
+}
-#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+int wc_InitSha512(wc_Sha512* sha512)
+{
+ return wc_InitSha512_ex(sha512, NULL, INVALID_DEVID);
+}
-#if defined(HAVE_INTEL_AVX1)
-static int Transform384_AVX1(Sha384 *sha384) ;
-#endif
-#if defined(HAVE_INTEL_AVX2)
-static int Transform384_AVX2(Sha384 *sha384) ;
-#endif
+void wc_Sha512Free(wc_Sha512* sha512)
+{
+ if (sha512 == NULL)
+ return;
-#if defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_AVX2) &&defined(HAVE_INTEL_RORX)
-static int Transform384_AVX1_RORX(Sha384 *sha384) ;
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ if (sha512->W != NULL) {
+ XFREE(sha512->W, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ sha512->W = NULL;
+ }
#endif
-static int _Transform384(Sha384 *sha384) ;
-static int (*Transform384_p)(Sha384* sha384) = _Transform384 ;
-
-#define Transform384(sha384) (*Transform384_p)(sha384)
-static void set_Transform384(void) {
- if(set_cpuid_flags(CHECK_SHA384))return ;
-
-#if defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2)
- Transform384_p = ((IS_INTEL_AVX1) ? Transform384_AVX1 : _Transform384) ;
-#elif defined(HAVE_INTEL_AVX2)
- #if defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_RORX)
- if(IS_INTEL_AVX2 && IS_INTEL_BMI2) { Transform384_p = Transform384_AVX1_RORX ; return ; }
- #endif
- if(IS_INTEL_AVX2) { Transform384_p = Transform384_AVX2 ; return ; }
- #if defined(HAVE_INTEL_AVX1)
- Transform384_p = ((IS_INTEL_AVX1) ? Transform384_AVX1 : _Transform384) ;
- #endif
-#else
- Transform384_p = ((IS_INTEL_AVX1) ? Transform384_AVX1 : _Transform384) ;
-#endif
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512)
+ wolfAsync_DevCtxFree(&sha512->asyncDev, WOLFSSL_ASYNC_MARKER_SHA512);
+#endif /* WOLFSSL_ASYNC_CRYPT */
}
+#endif /* WOLFSSL_SHA512 */
+
+/* -------------------------------------------------------------------------- */
+/* SHA384 */
+/* -------------------------------------------------------------------------- */
+#ifdef WOLFSSL_SHA384
+
+#if defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH)
+ /* functions defined in wolfcrypt/src/port/caam/caam_sha.c */
+
#else
- #define Transform384(sha512) _Transform384(sha512)
-#endif
-int wc_InitSha384(Sha384* sha384)
+static int InitSha384(wc_Sha384* sha384)
{
+ if (sha384 == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
sha384->digest[0] = W64LIT(0xcbbb9d5dc1059ed8);
sha384->digest[1] = W64LIT(0x629a292a367cd507);
sha384->digest[2] = W64LIT(0x9159015a3070dd17);
@@ -1386,420 +923,303 @@ int wc_InitSha384(Sha384* sha384)
sha384->loLen = 0;
sha384->hiLen = 0;
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
- set_Transform384() ;
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ sha384->ctx.sha_type = SHA2_384;
+ /* always start firstblock = 1 when using hw engine */
+ sha384->ctx.isfirstblock = 1;
+ if(sha384->ctx.mode == ESP32_SHA_HW) {
+ /* release hw */
+ esp_sha_hw_unlock();
+ }
+ /* always set mode as INIT
+ * whether using HW or SW is determined at first call of update()
+ */
+ sha384->ctx.mode = ESP32_SHA_INIT;
+
+#endif
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+ sha384->flags = 0;
#endif
-
+
return 0;
}
-static int _Transform384(Sha384* sha384)
+int wc_Sha384Update(wc_Sha384* sha384, const byte* data, word32 len)
{
- const word64* K = K512;
+ if (sha384 == NULL || (data == NULL && len > 0)) {
+ return BAD_FUNC_ARG;
+ }
- word32 j;
- word64 T[8];
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384)
+ if (sha384->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA384) {
+ #if defined(HAVE_INTEL_QA)
+ return IntelQaSymSha384(&sha384->asyncDev, NULL, data, len);
+ #endif
+ }
+#endif /* WOLFSSL_ASYNC_CRYPT */
-#ifdef WOLFSSL_SMALL_STACK
- word64* W;
+ return Sha512Update((wc_Sha512*)sha384, data, len);
+}
- W = (word64*) XMALLOC(sizeof(word64) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER);
- if (W == NULL)
- return MEMORY_E;
-#else
- word64 W[16];
-#endif
- /* Copy digest to working vars */
- XMEMCPY(T, sha384->digest, sizeof(T));
+int wc_Sha384FinalRaw(wc_Sha384* sha384, byte* hash)
+{
+#ifdef LITTLE_ENDIAN_ORDER
+ word64 digest[WC_SHA384_DIGEST_SIZE / sizeof(word64)];
+#endif
-#ifdef USE_SLOW_SHA2
- /* over twice as small, but 50% slower */
- /* 80 operations, not unrolled */
- for (j = 0; j < 80; j += 16) {
- int m;
- for (m = 0; m < 16; m++) { /* braces needed for macros {} */
- R2(m);
- }
+ if (sha384 == NULL || hash == NULL) {
+ return BAD_FUNC_ARG;
}
-#else
- /* 80 operations, partially loop unrolled */
- for (j = 0; j < 80; j += 16) {
- R2( 0); R2( 1); R2( 2); R2( 3);
- R2( 4); R2( 5); R2( 6); R2( 7);
- R2( 8); R2( 9); R2(10); R2(11);
- R2(12); R2(13); R2(14); R2(15);
- }
-#endif /* USE_SLOW_SHA2 */
- /* Add the working vars back into digest */
-
- sha384->digest[0] += a(0);
- sha384->digest[1] += b(0);
- sha384->digest[2] += c(0);
- sha384->digest[3] += d(0);
- sha384->digest[4] += e(0);
- sha384->digest[5] += f(0);
- sha384->digest[6] += g(0);
- sha384->digest[7] += h(0);
-
- /* Wipe variables */
- XMEMSET(W, 0, sizeof(word64) * 16);
- XMEMSET(T, 0, sizeof(T));
-
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#ifdef LITTLE_ENDIAN_ORDER
+ ByteReverseWords64((word64*)digest, (word64*)sha384->digest,
+ WC_SHA384_DIGEST_SIZE);
+ XMEMCPY(hash, digest, WC_SHA384_DIGEST_SIZE);
+#else
+ XMEMCPY(hash, sha384->digest, WC_SHA384_DIGEST_SIZE);
#endif
return 0;
}
-static INLINE void AddLength384(Sha384* sha384, word32 len)
-{
- word32 tmp = sha384->loLen;
- if ( (sha384->loLen += len) < tmp)
- sha384->hiLen++; /* carry low to high */
-}
-
-int wc_Sha384Update(Sha384* sha384, const byte* data, word32 len)
+int wc_Sha384Final(wc_Sha384* sha384, byte* hash)
{
- /* do block size increments */
- byte* local = (byte*)sha384->buffer;
-
- SAVE_XMM_YMM ; /* for Intel AVX */
-
- while (len) {
- word32 add = min(len, SHA384_BLOCK_SIZE - sha384->buffLen);
- XMEMCPY(&local[sha384->buffLen], data, add);
-
- sha384->buffLen += add;
- data += add;
- len -= add;
-
- if (sha384->buffLen == SHA384_BLOCK_SIZE) {
- int ret;
-
- #if defined(LITTLE_ENDIAN_ORDER)
- #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
- if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
- #endif
- ByteReverseWords64(sha384->buffer, sha384->buffer,
- SHA384_BLOCK_SIZE);
- #endif
- ret = Transform384(sha384);
- if (ret != 0)
- return ret;
+ int ret;
- AddLength384(sha384, SHA384_BLOCK_SIZE);
- sha384->buffLen = 0;
- }
+ if (sha384 == NULL || hash == NULL) {
+ return BAD_FUNC_ARG;
}
- return 0;
-}
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384)
+ if (sha384->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA384) {
+ #if defined(HAVE_INTEL_QA)
+ return IntelQaSymSha384(&sha384->asyncDev, hash, NULL,
+ WC_SHA384_DIGEST_SIZE);
+ #endif
+ }
+#endif /* WOLFSSL_ASYNC_CRYPT */
-int wc_Sha384Final(Sha384* sha384, byte* hash)
-{
- byte* local = (byte*)sha384->buffer;
- int ret;
+ ret = Sha512Final((wc_Sha512*)sha384);
+ if (ret != 0)
+ return ret;
- SAVE_XMM_YMM ; /* for Intel AVX */
- AddLength384(sha384, sha384->buffLen); /* before adding pads */
+ XMEMCPY(hash, sha384->digest, WC_SHA384_DIGEST_SIZE);
- local[sha384->buffLen++] = 0x80; /* add 1 */
+ return InitSha384(sha384); /* reset state */
+}
- /* pad with zeros */
- if (sha384->buffLen > SHA384_PAD_SIZE) {
- XMEMSET(&local[sha384->buffLen], 0, SHA384_BLOCK_SIZE -sha384->buffLen);
- sha384->buffLen += SHA384_BLOCK_SIZE - sha384->buffLen;
-
- #if defined(LITTLE_ENDIAN_ORDER)
- #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
- if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
- #endif
- ByteReverseWords64(sha384->buffer, sha384->buffer,
- SHA384_BLOCK_SIZE);
- #endif
- ret = Transform384(sha384);
- if (ret != 0)
- return ret;
+int wc_InitSha384_ex(wc_Sha384* sha384, void* heap, int devId)
+{
+ int ret;
- sha384->buffLen = 0;
+ if (sha384 == NULL) {
+ return BAD_FUNC_ARG;
}
- XMEMSET(&local[sha384->buffLen], 0, SHA384_PAD_SIZE - sha384->buffLen);
-
- /* put lengths in bits */
- sha384->hiLen = (sha384->loLen >> (8*sizeof(sha384->loLen) - 3)) +
- (sha384->hiLen << 3);
- sha384->loLen = sha384->loLen << 3;
- /* store lengths */
- #if defined(LITTLE_ENDIAN_ORDER)
- #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
- if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
- #endif
- ByteReverseWords64(sha384->buffer, sha384->buffer,
- SHA384_BLOCK_SIZE);
- #endif
- /* ! length ordering dependent on digest endian type ! */
- sha384->buffer[SHA384_BLOCK_SIZE / sizeof(word64) - 2] = sha384->hiLen;
- sha384->buffer[SHA384_BLOCK_SIZE / sizeof(word64) - 1] = sha384->loLen;
- #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
- if(IS_INTEL_AVX1 || IS_INTEL_AVX2)
- ByteReverseWords64(&(sha384->buffer[SHA384_BLOCK_SIZE / sizeof(word64) - 2]),
- &(sha384->buffer[SHA384_BLOCK_SIZE / sizeof(word64) - 2]),
- SHA384_BLOCK_SIZE - SHA384_PAD_SIZE);
- #endif
- ret = Transform384(sha384);
+ sha384->heap = heap;
+ ret = InitSha384(sha384);
if (ret != 0)
return ret;
- #ifdef LITTLE_ENDIAN_ORDER
- ByteReverseWords64(sha384->digest, sha384->digest, SHA384_DIGEST_SIZE);
- #endif
- XMEMCPY(hash, sha384->digest, SHA384_DIGEST_SIZE);
+#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+ Sha512_SetTransform();
+#endif
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ sha384->W = NULL;
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384)
+ ret = wolfAsync_DevCtxInit(&sha384->asyncDev, WOLFSSL_ASYNC_MARKER_SHA384,
+ sha384->heap, devId);
+#else
+ (void)devId;
+#endif /* WOLFSSL_ASYNC_CRYPT */
- return wc_InitSha384(sha384); /* reset state */
+ return ret;
}
+#endif /* WOLFSSL_IMX6_CAAM */
-int wc_Sha384Hash(const byte* data, word32 len, byte* hash)
+int wc_InitSha384(wc_Sha384* sha384)
{
- int ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
- Sha384* sha384;
-#else
- Sha384 sha384[1];
-#endif
+ return wc_InitSha384_ex(sha384, NULL, INVALID_DEVID);
+}
-#ifdef WOLFSSL_SMALL_STACK
- sha384 = (Sha384*)XMALLOC(sizeof(Sha384), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+void wc_Sha384Free(wc_Sha384* sha384)
+{
if (sha384 == NULL)
- return MEMORY_E;
-#endif
+ return;
- if ((ret = wc_InitSha384(sha384)) != 0) {
- WOLFSSL_MSG("InitSha384 failed");
- }
- else if ((ret = wc_Sha384Update(sha384, data, len)) != 0) {
- WOLFSSL_MSG("Sha384Update failed");
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ if (sha384->W != NULL) {
+ XFREE(sha384->W, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ sha384->W = NULL;
}
- else if ((ret = wc_Sha384Final(sha384, hash)) != 0) {
- WOLFSSL_MSG("Sha384Final failed");
- }
-
-#ifdef WOLFSSL_SMALL_STACK
- XFREE(sha384, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
- return ret;
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384)
+ wolfAsync_DevCtxFree(&sha384->asyncDev, WOLFSSL_ASYNC_MARKER_SHA384);
+#endif /* WOLFSSL_ASYNC_CRYPT */
}
-#if defined(HAVE_INTEL_AVX1)
-
-static int Transform384_AVX1(Sha384* sha384)
-{
- const word64* K = K512;
- word64 W_X[16+4];
- word32 j;
- word64 T[8];
+#endif /* WOLFSSL_SHA384 */
- /* Copy digest to working vars */
- XMEMCPY(T, sha384->digest, sizeof(T));
- W_from_buff(W_X, sha384->buffer) ;
- for (j = 0; j < 80; j += 16) {
- Rx_1( 0); Block_0_1(W_X); Rx_2( 0); Block_0_2(W_X); Rx_3( 0); Block_0_3();
- Rx_1( 1); Block_0_4(); Rx_2( 1); Block_0_5(); Rx_3( 1); Block_0_6(W_X);
- Rx_1( 2); Block_0_7(W_X); Rx_2( 2); Block_0_8(W_X); Rx_3( 2); Block_0_9();
- Rx_1( 3); Block_0_10();Rx_2( 3); Block_0_11();Rx_3( 3); Block_0_12(W_X);
-
- Rx_1( 4); Block_4_1(W_X); Rx_2( 4); Block_4_2(W_X); Rx_3( 4); Block_4_3();
- Rx_1( 5); Block_4_4(); Rx_2( 5); Block_4_5(); Rx_3( 5); Block_4_6(W_X);
- Rx_1( 6); Block_4_7(W_X); Rx_2( 6); Block_4_8(W_X); Rx_3( 6); Block_4_9();
- Rx_1( 7); Block_4_10();Rx_2( 7); Block_4_11();Rx_3( 7); Block_4_12(W_X);
-
- Rx_1( 8); Block_8_1(W_X); Rx_2( 8); Block_8_2(W_X); Rx_3( 8); Block_8_3();
- Rx_1( 9); Block_8_4(); Rx_2( 9); Block_8_5(); Rx_3( 9); Block_8_6(W_X);
- Rx_1(10); Block_8_7(W_X); Rx_2(10); Block_8_8(W_X); Rx_3(10); Block_8_9();
- Rx_1(11); Block_8_10();Rx_2(11); Block_8_11();Rx_3(11); Block_8_12(W_X);
-
- Rx_1(12); Block_12_1(W_X); Rx_2(12); Block_12_2(W_X); Rx_3(12); Block_12_3();
- Rx_1(13); Block_12_4(); Rx_2(13); Block_12_5(); Rx_3(13); Block_12_6(W_X);
- Rx_1(14); Block_12_7(W_X); Rx_2(14); Block_12_8(W_X); Rx_3(14); Block_12_9();
- Rx_1(15); Block_12_10();Rx_2(15); Block_12_11();Rx_3(15); Block_12_12(W_X);
- }
+#endif /* HAVE_FIPS */
- /* Add the working vars back into digest */
+#ifdef WOLFSSL_SHA512
- sha384->digest[0] += a(0);
- sha384->digest[1] += b(0);
- sha384->digest[2] += c(0);
- sha384->digest[3] += d(0);
- sha384->digest[4] += e(0);
- sha384->digest[5] += f(0);
- sha384->digest[6] += g(0);
- sha384->digest[7] += h(0);
+int wc_Sha512GetHash(wc_Sha512* sha512, byte* hash)
+{
+ int ret;
+ wc_Sha512 tmpSha512;
- /* Wipe variables */
- #if !defined(HAVE_INTEL_AVX1)&&!defined(HAVE_INTEL_AVX2)
- XMEMSET(W, 0, sizeof(word64) * 16);
- #endif
- XMEMSET(T, 0, sizeof(T));
+ if (sha512 == NULL || hash == NULL)
+ return BAD_FUNC_ARG;
- return 0;
-}
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ if(sha512->ctx.mode == ESP32_SHA_INIT) {
+ esp_sha_try_hw_lock(&sha512->ctx);
+ }
+ if(sha512->ctx.mode != ESP32_SHA_SW)
+ esp_sha512_digest_process(sha512, 0);
+#endif
+ ret = wc_Sha512Copy(sha512, &tmpSha512);
+ if (ret == 0) {
+ ret = wc_Sha512Final(&tmpSha512, hash);
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ sha512->ctx.mode = ESP32_SHA_SW;;
#endif
+ wc_Sha512Free(&tmpSha512);
+ }
+ return ret;
+}
-#if defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_RORX)
-static int Transform384_AVX1_RORX(Sha384* sha384)
+int wc_Sha512Copy(wc_Sha512* src, wc_Sha512* dst)
{
- const word64* K = K512;
- word64 W_X[16+4];
- word32 j;
- word64 T[8];
-
- /* Copy digest to working vars */
- XMEMCPY(T, sha384->digest, sizeof(T));
+ int ret = 0;
- W_from_buff(W_X, sha384->buffer) ;
- for (j = 0; j < 80; j += 16) {
- Rx_RORX_1( 0); Block_0_1(W_X); Rx_RORX_2( 0);
- Block_0_2(W_X); Rx_RORX_3( 0); Block_0_3();
- Rx_RORX_1( 1); Block_0_4(); Rx_RORX_2( 1);
- Block_0_5(); Rx_RORX_3( 1); Block_0_6(W_X);
- Rx_RORX_1( 2); Block_0_7(W_X); Rx_RORX_2( 2);
- Block_0_8(W_X); Rx_RORX_3( 2); Block_0_9();
- Rx_RORX_1( 3); Block_0_10();Rx_RORX_2( 3);
- Block_0_11();Rx_RORX_3( 3); Block_0_12(W_X);
-
- Rx_RORX_1( 4); Block_4_1(W_X); Rx_RORX_2( 4);
- Block_4_2(W_X); Rx_RORX_3( 4); Block_4_3();
- Rx_RORX_1( 5); Block_4_4(); Rx_RORX_2( 5);
- Block_4_5(); Rx_RORX_3( 5); Block_4_6(W_X);
- Rx_RORX_1( 6); Block_4_7(W_X); Rx_RORX_2( 6);
- Block_4_8(W_X); Rx_RORX_3( 6); Block_4_9();
- Rx_RORX_1( 7); Block_4_10();Rx_RORX_2( 7);
- Block_4_11();Rx_RORX_3( 7); Block_4_12(W_X);
-
- Rx_RORX_1( 8); Block_8_1(W_X); Rx_RORX_2( 8);
- Block_8_2(W_X); Rx_RORX_3( 8); Block_8_3();
- Rx_RORX_1( 9); Block_8_4(); Rx_RORX_2( 9);
- Block_8_5(); Rx_RORX_3( 9); Block_8_6(W_X);
- Rx_RORX_1(10); Block_8_7(W_X); Rx_RORX_2(10);
- Block_8_8(W_X); Rx_RORX_3(10); Block_8_9();
- Rx_RORX_1(11); Block_8_10();Rx_RORX_2(11);
- Block_8_11();Rx_RORX_3(11); Block_8_12(W_X);
-
- Rx_RORX_1(12); Block_12_1(W_X); Rx_RORX_2(12);
- Block_12_2(W_X); Rx_RORX_3(12); Block_12_3();
- Rx_RORX_1(13); Block_12_4(); Rx_RORX_2(13);
- Block_12_5(); Rx_RORX_3(13); Block_12_6(W_X);
- Rx_RORX_1(14); Block_12_7(W_X); Rx_RORX_2(14);
- Block_12_8(W_X); Rx_RORX_3(14); Block_12_9();
- Rx_RORX_1(15); Block_12_10();Rx_RORX_2(15);
- Block_12_11();Rx_RORX_3(15); Block_12_12(W_X);
- }
+ if (src == NULL || dst == NULL)
+ return BAD_FUNC_ARG;
- /* Add the working vars back into digest */
+ XMEMCPY(dst, src, sizeof(wc_Sha512));
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ dst->W = NULL;
+#endif
- sha384->digest[0] += a(0);
- sha384->digest[1] += b(0);
- sha384->digest[2] += c(0);
- sha384->digest[3] += d(0);
- sha384->digest[4] += e(0);
- sha384->digest[5] += f(0);
- sha384->digest[6] += g(0);
- sha384->digest[7] += h(0);
+#ifdef WOLFSSL_ASYNC_CRYPT
+ ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
+#endif
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ dst->ctx.mode = src->ctx.mode;
+ dst->ctx.isfirstblock = src->ctx.isfirstblock;
+ dst->ctx.sha_type = src->ctx.sha_type;
+#endif
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+ dst->flags |= WC_HASH_FLAG_ISCOPY;
+#endif
- /* Wipe variables */
- #if !defined(HAVE_INTEL_AVX1)&&!defined(HAVE_INTEL_AVX2)
- XMEMSET(W, 0, sizeof(word64) * 16);
- #endif
- XMEMSET(T, 0, sizeof(T));
+ return ret;
+}
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+int wc_Sha512SetFlags(wc_Sha512* sha512, word32 flags)
+{
+ if (sha512) {
+ sha512->flags = flags;
+ }
return 0;
}
-#endif
-
-#if defined(HAVE_INTEL_AVX2)
-
-static int Transform384_AVX2(Sha384* sha384)
+int wc_Sha512GetFlags(wc_Sha512* sha512, word32* flags)
{
- const word64* K = K512;
- word64 w[4] ;
- word32 j;
- word64 T[8];
-
- /* Copy digest to working vars */
- XMEMCPY(T, sha384->digest, sizeof(T));
+ if (sha512 && flags) {
+ *flags = sha512->flags;
+ }
+ return 0;
+}
+#endif
- /* over twice as small, but 50% slower */
- /* 80 operations, not unrolled */
+#endif /* WOLFSSL_SHA512 */
- W_from_buff_Y(sha384->buffer) ;
+#ifdef WOLFSSL_SHA384
- MOVE_to_MEMy(w,0, W_0y) ;
- for (j = 0; j < 80; j += 16) {
- Ry_1( 0, w[0]); Block_Y_0_1(); Ry_2( 0, w[0]);
- Block_Y_0_2(); Ry_3( 0, w[0]); Block_Y_0_3();
- Ry_1( 1, w[1]); Block_Y_0_4(); Ry_2( 1, w[1]);
- Block_Y_0_5(); Ry_3( 1, w[1]); Block_Y_0_6();
- Ry_1( 2, w[2]); Block_Y_0_7(); Ry_2( 2, w[2]);
- Block_Y_0_8(); Ry_3( 2, w[2]); Block_Y_0_9();
- Ry_1( 3, w[3]); Block_Y_0_10();Ry_2( 3, w[3]);
- Block_Y_0_11();Ry_3( 3, w[3]); Block_Y_0_12(w);
-
- Ry_1( 4, w[0]); Block_Y_4_1(); Ry_2( 4, w[0]);
- Block_Y_4_2(); Ry_3( 4, w[0]); Block_Y_4_3();
- Ry_1( 5, w[1]); Block_Y_4_4(); Ry_2( 5, w[1]);
- Block_Y_4_5(); Ry_3( 5, w[1]); Block_Y_4_6();
- Ry_1( 6, w[2]); Block_Y_4_7(); Ry_2( 6, w[2]);
- Block_Y_4_8(); Ry_3( 6, w[2]); Block_Y_4_9();
- Ry_1( 7, w[3]); Block_Y_4_10(); Ry_2( 7, w[3]);
- Block_Y_4_11(); Ry_3( 7, w[3]);Block_Y_4_12(w);
-
- Ry_1( 8, w[0]); Block_Y_8_1(); Ry_2( 8, w[0]);
- Block_Y_8_2(); Ry_3( 8, w[0]); Block_Y_8_3();
- Ry_1( 9, w[1]); Block_Y_8_4(); Ry_2( 9, w[1]);
- Block_Y_8_5(); Ry_3( 9, w[1]); Block_Y_8_6();
- Ry_1(10, w[2]); Block_Y_8_7(); Ry_2(10, w[2]);
- Block_Y_8_8(); Ry_3(10, w[2]); Block_Y_8_9();
- Ry_1(11, w[3]); Block_Y_8_10();Ry_2(11, w[3]);
- Block_Y_8_11();Ry_3(11, w[3]); Block_Y_8_12(w);
-
- Ry_1(12, w[0]); Block_Y_12_1(); Ry_2(12, w[0]);
- Block_Y_12_2(); Ry_3(12, w[0]); Block_Y_12_3();
- Ry_1(13, w[1]); Block_Y_12_4(); Ry_2(13, w[1]);
- Block_Y_12_5(); Ry_3(13, w[1]); Block_Y_12_6();
- Ry_1(14, w[2]); Block_Y_12_7(); Ry_2(14, w[2]);
- Block_Y_12_8(); Ry_3(14, w[2]); Block_Y_12_9();
- Ry_1(15, w[3]); Block_Y_12_10();Ry_2(15, w[3]);
- Block_Y_12_11();Ry_3(15, w[3]); Block_Y_12_12(w);
+int wc_Sha384GetHash(wc_Sha384* sha384, byte* hash)
+{
+ int ret;
+ wc_Sha384 tmpSha384;
+
+ if (sha384 == NULL || hash == NULL)
+ return BAD_FUNC_ARG;
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ if(sha384->ctx.mode == ESP32_SHA_INIT) {
+ esp_sha_try_hw_lock(&sha384->ctx);
+ }
+ if(sha384->ctx.mode != ESP32_SHA_SW) {
+ esp_sha512_digest_process(sha384, 0);
}
+#endif
+ ret = wc_Sha384Copy(sha384, &tmpSha384);
+ if (ret == 0) {
+ ret = wc_Sha384Final(&tmpSha384, hash);
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ sha384->ctx.mode = ESP32_SHA_SW;
+#endif
+ wc_Sha384Free(&tmpSha384);
+ }
+ return ret;
+}
+int wc_Sha384Copy(wc_Sha384* src, wc_Sha384* dst)
+{
+ int ret = 0;
- /* Add the working vars back into digest */
+ if (src == NULL || dst == NULL)
+ return BAD_FUNC_ARG;
- sha384->digest[0] += a(0);
- sha384->digest[1] += b(0);
- sha384->digest[2] += c(0);
- sha384->digest[3] += d(0);
- sha384->digest[4] += e(0);
- sha384->digest[5] += f(0);
- sha384->digest[6] += g(0);
- sha384->digest[7] += h(0);
+ XMEMCPY(dst, src, sizeof(wc_Sha384));
+#ifdef WOLFSSL_SMALL_STACK_CACHE
+ dst->W = NULL;
+#endif
- /* Wipe variables */
- XMEMSET(T, 0, sizeof(T));
+#ifdef WOLFSSL_ASYNC_CRYPT
+ ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
+#endif
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)
+ dst->ctx.mode = src->ctx.mode;
+ dst->ctx.isfirstblock = src->ctx.isfirstblock;
+ dst->ctx.sha_type = src->ctx.sha_type;
+#endif
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+ dst->flags |= WC_HASH_FLAG_ISCOPY;
+#endif
- return 0;
+ return ret;
}
+#if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB)
+int wc_Sha384SetFlags(wc_Sha384* sha384, word32 flags)
+{
+ if (sha384) {
+ sha384->flags = flags;
+ }
+ return 0;
+}
+int wc_Sha384GetFlags(wc_Sha384* sha384, word32* flags)
+{
+ if (sha384 && flags) {
+ *flags = sha384->flags;
+ }
+ return 0;
+}
#endif
#endif /* WOLFSSL_SHA384 */
-#endif /* HAVE_FIPS */
-
-#endif /* WOLFSSL_SHA512 */
-
+#endif /* WOLFSSL_SHA512 || WOLFSSL_SHA384 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha512_asm.S b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha512_asm.S
new file mode 100644
index 000000000..6a27ce42a
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sha512_asm.S
@@ -0,0 +1,10741 @@
+/* sha512_asm
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+#ifndef HAVE_INTEL_AVX1
+#define HAVE_INTEL_AVX1
+#endif /* HAVE_INTEL_AVX1 */
+#ifndef NO_AVX2_SUPPORT
+#define HAVE_INTEL_AVX2
+#endif /* NO_AVX2_SUPPORT */
+
+#ifdef HAVE_INTEL_AVX1
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_sha512_k:
+.quad 0x428a2f98d728ae22,0x7137449123ef65cd
+.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad 0x3956c25bf348b538,0x59f111f1b605d019
+.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad 0xd807aa98a3030242,0x12835b0145706fbe
+.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad 0x9bdc06a725c71235,0xc19bf174cf692694
+.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad 0xfc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad 0x983e5152ee66dfab,0xa831c66d2db43210
+.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad 0x6ca6351e003826f,0x142929670a0e6e70
+.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad 0x81c2c92e47edaee6,0x92722c851482353b
+.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad 0xd192e819d6ef5218,0xd69906245565a910
+.quad 0xf40e35855771202a,0x106aa07032bbd1b8
+.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad 0x90befffa23631e28,0xa4506cebde82bde9
+.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad 0xca273eceea26619c,0xd186b8c721c0c207
+.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad 0x6f067aa72176fba,0xa637dc5a2c898a6
+.quad 0x113f9804bef90dae,0x1b710b35131c471b
+.quad 0x28db77f523047d84,0x32caab7b40c72493
+.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_sha512_flip_mask:
+.quad 0x1020304050607, 0x8090a0b0c0d0e0f
+#ifndef __APPLE__
+.text
+.globl Transform_Sha512_AVX1
+.type Transform_Sha512_AVX1,@function
+.align 4
+Transform_Sha512_AVX1:
+#else
+.section __TEXT,__text
+.globl _Transform_Sha512_AVX1
+.p2align 2
+_Transform_Sha512_AVX1:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $0x88, %rsp
+ leaq 64(%rdi), %rax
+ vmovdqa L_avx1_sha512_flip_mask(%rip), %xmm14
+ movq (%rdi), %r8
+ movq 8(%rdi), %r9
+ movq 16(%rdi), %r10
+ movq 24(%rdi), %r11
+ movq 32(%rdi), %r12
+ movq 40(%rdi), %r13
+ movq 48(%rdi), %r14
+ movq 56(%rdi), %r15
+ vmovdqu (%rax), %xmm0
+ vmovdqu 16(%rax), %xmm1
+ vpshufb %xmm14, %xmm0, %xmm0
+ vpshufb %xmm14, %xmm1, %xmm1
+ vmovdqu 32(%rax), %xmm2
+ vmovdqu 48(%rax), %xmm3
+ vpshufb %xmm14, %xmm2, %xmm2
+ vpshufb %xmm14, %xmm3, %xmm3
+ vmovdqu 64(%rax), %xmm4
+ vmovdqu 80(%rax), %xmm5
+ vpshufb %xmm14, %xmm4, %xmm4
+ vpshufb %xmm14, %xmm5, %xmm5
+ vmovdqu 96(%rax), %xmm6
+ vmovdqu 112(%rax), %xmm7
+ vpshufb %xmm14, %xmm6, %xmm6
+ vpshufb %xmm14, %xmm7, %xmm7
+ movl $4, 128(%rsp)
+ leaq L_avx1_sha512_k(%rip), %rsi
+ movq %r9, %rbx
+ movq %r12, %rax
+ xorq %r10, %rbx
+ # Start of 16 rounds
+L_sha256_len_avx1_start:
+ vpaddq (%rsi), %xmm0, %xmm8
+ vpaddq 16(%rsi), %xmm1, %xmm9
+ vmovdqu %xmm8, (%rsp)
+ vmovdqu %xmm9, 16(%rsp)
+ vpaddq 32(%rsi), %xmm2, %xmm8
+ vpaddq 48(%rsi), %xmm3, %xmm9
+ vmovdqu %xmm8, 32(%rsp)
+ vmovdqu %xmm9, 48(%rsp)
+ vpaddq 64(%rsi), %xmm4, %xmm8
+ vpaddq 80(%rsi), %xmm5, %xmm9
+ vmovdqu %xmm8, 64(%rsp)
+ vmovdqu %xmm9, 80(%rsp)
+ vpaddq 96(%rsi), %xmm6, %xmm8
+ vpaddq 112(%rsi), %xmm7, %xmm9
+ vmovdqu %xmm8, 96(%rsp)
+ vmovdqu %xmm9, 112(%rsp)
+ addq $0x80, %rsi
+ # msg_sched: 0-1
+ # rnd_0: 0 - 0
+ rorq $23, %rax
+ vpalignr $8, %xmm0, %xmm1, %xmm12
+ vpalignr $8, %xmm4, %xmm5, %xmm13
+ # rnd_0: 1 - 1
+ movq %r8, %rdx
+ movq %r13, %rcx
+ addq (%rsp), %r15
+ xorq %r14, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 3
+ xorq %r12, %rax
+ andq %r12, %rcx
+ rorq $4, %rax
+ xorq %r14, %rcx
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 4 - 5
+ xorq %r12, %rax
+ addq %rcx, %r15
+ rorq $14, %rax
+ xorq %r9, %rdx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 6 - 7
+ addq %rax, %r15
+ movq %r8, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 8 - 9
+ xorq %r8, %rcx
+ xorq %r9, %rbx
+ rorq $6, %rcx
+ addq %r15, %r11
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm0, %xmm13, %xmm0
+ # rnd_0: 10 - 11
+ xorq %r8, %rcx
+ addq %rbx, %r15
+ rorq $28, %rcx
+ movq %r11, %rax
+ addq %rcx, %r15
+ # rnd_1: 0 - 0
+ rorq $23, %rax
+ vpaddq %xmm0, %xmm8, %xmm0
+ # rnd_1: 1 - 1
+ movq %r15, %rbx
+ movq %r12, %rcx
+ addq 8(%rsp), %r14
+ xorq %r13, %rcx
+ vpsrlq $19, %xmm7, %xmm8
+ vpsllq $45, %xmm7, %xmm9
+ # rnd_1: 2 - 3
+ xorq %r11, %rax
+ andq %r11, %rcx
+ rorq $4, %rax
+ xorq %r13, %rcx
+ vpsrlq $61, %xmm7, %xmm10
+ vpsllq $3, %xmm7, %xmm11
+ # rnd_1: 4 - 6
+ xorq %r11, %rax
+ addq %rcx, %r14
+ rorq $14, %rax
+ xorq %r8, %rbx
+ addq %rax, %r14
+ movq %r15, %rcx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 7 - 8
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r15, %rcx
+ xorq %r8, %rdx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm7, %xmm11
+ # rnd_1: 9 - 10
+ rorq $6, %rcx
+ addq %r14, %r10
+ xorq %r15, %rcx
+ addq %rdx, %r14
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 11 - 11
+ rorq $28, %rcx
+ movq %r10, %rax
+ addq %rcx, %r14
+ vpaddq %xmm0, %xmm8, %xmm0
+ # msg_sched done: 0-3
+ # msg_sched: 2-3
+ # rnd_0: 0 - 0
+ rorq $23, %rax
+ vpalignr $8, %xmm1, %xmm2, %xmm12
+ vpalignr $8, %xmm5, %xmm6, %xmm13
+ # rnd_0: 1 - 1
+ movq %r14, %rdx
+ movq %r11, %rcx
+ addq 16(%rsp), %r13
+ xorq %r12, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 3
+ xorq %r10, %rax
+ andq %r10, %rcx
+ rorq $4, %rax
+ xorq %r12, %rcx
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 4 - 5
+ xorq %r10, %rax
+ addq %rcx, %r13
+ rorq $14, %rax
+ xorq %r15, %rdx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 6 - 7
+ addq %rax, %r13
+ movq %r14, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 8 - 9
+ xorq %r14, %rcx
+ xorq %r15, %rbx
+ rorq $6, %rcx
+ addq %r13, %r9
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm1, %xmm13, %xmm1
+ # rnd_0: 10 - 11
+ xorq %r14, %rcx
+ addq %rbx, %r13
+ rorq $28, %rcx
+ movq %r9, %rax
+ addq %rcx, %r13
+ # rnd_1: 0 - 0
+ rorq $23, %rax
+ vpaddq %xmm1, %xmm8, %xmm1
+ # rnd_1: 1 - 1
+ movq %r13, %rbx
+ movq %r10, %rcx
+ addq 24(%rsp), %r12
+ xorq %r11, %rcx
+ vpsrlq $19, %xmm0, %xmm8
+ vpsllq $45, %xmm0, %xmm9
+ # rnd_1: 2 - 3
+ xorq %r9, %rax
+ andq %r9, %rcx
+ rorq $4, %rax
+ xorq %r11, %rcx
+ vpsrlq $61, %xmm0, %xmm10
+ vpsllq $3, %xmm0, %xmm11
+ # rnd_1: 4 - 6
+ xorq %r9, %rax
+ addq %rcx, %r12
+ rorq $14, %rax
+ xorq %r14, %rbx
+ addq %rax, %r12
+ movq %r13, %rcx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 7 - 8
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r13, %rcx
+ xorq %r14, %rdx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm0, %xmm11
+ # rnd_1: 9 - 10
+ rorq $6, %rcx
+ addq %r12, %r8
+ xorq %r13, %rcx
+ addq %rdx, %r12
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 11 - 11
+ rorq $28, %rcx
+ movq %r8, %rax
+ addq %rcx, %r12
+ vpaddq %xmm1, %xmm8, %xmm1
+ # msg_sched done: 2-5
+ # msg_sched: 4-5
+ # rnd_0: 0 - 0
+ rorq $23, %rax
+ vpalignr $8, %xmm2, %xmm3, %xmm12
+ vpalignr $8, %xmm6, %xmm7, %xmm13
+ # rnd_0: 1 - 1
+ movq %r12, %rdx
+ movq %r9, %rcx
+ addq 32(%rsp), %r11
+ xorq %r10, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 3
+ xorq %r8, %rax
+ andq %r8, %rcx
+ rorq $4, %rax
+ xorq %r10, %rcx
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 4 - 5
+ xorq %r8, %rax
+ addq %rcx, %r11
+ rorq $14, %rax
+ xorq %r13, %rdx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 6 - 7
+ addq %rax, %r11
+ movq %r12, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 8 - 9
+ xorq %r12, %rcx
+ xorq %r13, %rbx
+ rorq $6, %rcx
+ addq %r11, %r15
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm2, %xmm13, %xmm2
+ # rnd_0: 10 - 11
+ xorq %r12, %rcx
+ addq %rbx, %r11
+ rorq $28, %rcx
+ movq %r15, %rax
+ addq %rcx, %r11
+ # rnd_1: 0 - 0
+ rorq $23, %rax
+ vpaddq %xmm2, %xmm8, %xmm2
+ # rnd_1: 1 - 1
+ movq %r11, %rbx
+ movq %r8, %rcx
+ addq 40(%rsp), %r10
+ xorq %r9, %rcx
+ vpsrlq $19, %xmm1, %xmm8
+ vpsllq $45, %xmm1, %xmm9
+ # rnd_1: 2 - 3
+ xorq %r15, %rax
+ andq %r15, %rcx
+ rorq $4, %rax
+ xorq %r9, %rcx
+ vpsrlq $61, %xmm1, %xmm10
+ vpsllq $3, %xmm1, %xmm11
+ # rnd_1: 4 - 6
+ xorq %r15, %rax
+ addq %rcx, %r10
+ rorq $14, %rax
+ xorq %r12, %rbx
+ addq %rax, %r10
+ movq %r11, %rcx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 7 - 8
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r11, %rcx
+ xorq %r12, %rdx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm1, %xmm11
+ # rnd_1: 9 - 10
+ rorq $6, %rcx
+ addq %r10, %r14
+ xorq %r11, %rcx
+ addq %rdx, %r10
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 11 - 11
+ rorq $28, %rcx
+ movq %r14, %rax
+ addq %rcx, %r10
+ vpaddq %xmm2, %xmm8, %xmm2
+ # msg_sched done: 4-7
+ # msg_sched: 6-7
+ # rnd_0: 0 - 0
+ rorq $23, %rax
+ vpalignr $8, %xmm3, %xmm4, %xmm12
+ vpalignr $8, %xmm7, %xmm0, %xmm13
+ # rnd_0: 1 - 1
+ movq %r10, %rdx
+ movq %r15, %rcx
+ addq 48(%rsp), %r9
+ xorq %r8, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 3
+ xorq %r14, %rax
+ andq %r14, %rcx
+ rorq $4, %rax
+ xorq %r8, %rcx
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 4 - 5
+ xorq %r14, %rax
+ addq %rcx, %r9
+ rorq $14, %rax
+ xorq %r11, %rdx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 6 - 7
+ addq %rax, %r9
+ movq %r10, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 8 - 9
+ xorq %r10, %rcx
+ xorq %r11, %rbx
+ rorq $6, %rcx
+ addq %r9, %r13
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm3, %xmm13, %xmm3
+ # rnd_0: 10 - 11
+ xorq %r10, %rcx
+ addq %rbx, %r9
+ rorq $28, %rcx
+ movq %r13, %rax
+ addq %rcx, %r9
+ # rnd_1: 0 - 0
+ rorq $23, %rax
+ vpaddq %xmm3, %xmm8, %xmm3
+ # rnd_1: 1 - 1
+ movq %r9, %rbx
+ movq %r14, %rcx
+ addq 56(%rsp), %r8
+ xorq %r15, %rcx
+ vpsrlq $19, %xmm2, %xmm8
+ vpsllq $45, %xmm2, %xmm9
+ # rnd_1: 2 - 3
+ xorq %r13, %rax
+ andq %r13, %rcx
+ rorq $4, %rax
+ xorq %r15, %rcx
+ vpsrlq $61, %xmm2, %xmm10
+ vpsllq $3, %xmm2, %xmm11
+ # rnd_1: 4 - 6
+ xorq %r13, %rax
+ addq %rcx, %r8
+ rorq $14, %rax
+ xorq %r10, %rbx
+ addq %rax, %r8
+ movq %r9, %rcx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 7 - 8
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r9, %rcx
+ xorq %r10, %rdx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm2, %xmm11
+ # rnd_1: 9 - 10
+ rorq $6, %rcx
+ addq %r8, %r12
+ xorq %r9, %rcx
+ addq %rdx, %r8
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 11 - 11
+ rorq $28, %rcx
+ movq %r12, %rax
+ addq %rcx, %r8
+ vpaddq %xmm3, %xmm8, %xmm3
+ # msg_sched done: 6-9
+ # msg_sched: 8-9
+ # rnd_0: 0 - 0
+ rorq $23, %rax
+ vpalignr $8, %xmm4, %xmm5, %xmm12
+ vpalignr $8, %xmm0, %xmm1, %xmm13
+ # rnd_0: 1 - 1
+ movq %r8, %rdx
+ movq %r13, %rcx
+ addq 64(%rsp), %r15
+ xorq %r14, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 3
+ xorq %r12, %rax
+ andq %r12, %rcx
+ rorq $4, %rax
+ xorq %r14, %rcx
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 4 - 5
+ xorq %r12, %rax
+ addq %rcx, %r15
+ rorq $14, %rax
+ xorq %r9, %rdx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 6 - 7
+ addq %rax, %r15
+ movq %r8, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 8 - 9
+ xorq %r8, %rcx
+ xorq %r9, %rbx
+ rorq $6, %rcx
+ addq %r15, %r11
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm4, %xmm13, %xmm4
+ # rnd_0: 10 - 11
+ xorq %r8, %rcx
+ addq %rbx, %r15
+ rorq $28, %rcx
+ movq %r11, %rax
+ addq %rcx, %r15
+ # rnd_1: 0 - 0
+ rorq $23, %rax
+ vpaddq %xmm4, %xmm8, %xmm4
+ # rnd_1: 1 - 1
+ movq %r15, %rbx
+ movq %r12, %rcx
+ addq 72(%rsp), %r14
+ xorq %r13, %rcx
+ vpsrlq $19, %xmm3, %xmm8
+ vpsllq $45, %xmm3, %xmm9
+ # rnd_1: 2 - 3
+ xorq %r11, %rax
+ andq %r11, %rcx
+ rorq $4, %rax
+ xorq %r13, %rcx
+ vpsrlq $61, %xmm3, %xmm10
+ vpsllq $3, %xmm3, %xmm11
+ # rnd_1: 4 - 6
+ xorq %r11, %rax
+ addq %rcx, %r14
+ rorq $14, %rax
+ xorq %r8, %rbx
+ addq %rax, %r14
+ movq %r15, %rcx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 7 - 8
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r15, %rcx
+ xorq %r8, %rdx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm3, %xmm11
+ # rnd_1: 9 - 10
+ rorq $6, %rcx
+ addq %r14, %r10
+ xorq %r15, %rcx
+ addq %rdx, %r14
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 11 - 11
+ rorq $28, %rcx
+ movq %r10, %rax
+ addq %rcx, %r14
+ vpaddq %xmm4, %xmm8, %xmm4
+ # msg_sched done: 8-11
+ # msg_sched: 10-11
+ # rnd_0: 0 - 0
+ rorq $23, %rax
+ vpalignr $8, %xmm5, %xmm6, %xmm12
+ vpalignr $8, %xmm1, %xmm2, %xmm13
+ # rnd_0: 1 - 1
+ movq %r14, %rdx
+ movq %r11, %rcx
+ addq 80(%rsp), %r13
+ xorq %r12, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 3
+ xorq %r10, %rax
+ andq %r10, %rcx
+ rorq $4, %rax
+ xorq %r12, %rcx
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 4 - 5
+ xorq %r10, %rax
+ addq %rcx, %r13
+ rorq $14, %rax
+ xorq %r15, %rdx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 6 - 7
+ addq %rax, %r13
+ movq %r14, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 8 - 9
+ xorq %r14, %rcx
+ xorq %r15, %rbx
+ rorq $6, %rcx
+ addq %r13, %r9
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm5, %xmm13, %xmm5
+ # rnd_0: 10 - 11
+ xorq %r14, %rcx
+ addq %rbx, %r13
+ rorq $28, %rcx
+ movq %r9, %rax
+ addq %rcx, %r13
+ # rnd_1: 0 - 0
+ rorq $23, %rax
+ vpaddq %xmm5, %xmm8, %xmm5
+ # rnd_1: 1 - 1
+ movq %r13, %rbx
+ movq %r10, %rcx
+ addq 88(%rsp), %r12
+ xorq %r11, %rcx
+ vpsrlq $19, %xmm4, %xmm8
+ vpsllq $45, %xmm4, %xmm9
+ # rnd_1: 2 - 3
+ xorq %r9, %rax
+ andq %r9, %rcx
+ rorq $4, %rax
+ xorq %r11, %rcx
+ vpsrlq $61, %xmm4, %xmm10
+ vpsllq $3, %xmm4, %xmm11
+ # rnd_1: 4 - 6
+ xorq %r9, %rax
+ addq %rcx, %r12
+ rorq $14, %rax
+ xorq %r14, %rbx
+ addq %rax, %r12
+ movq %r13, %rcx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 7 - 8
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r13, %rcx
+ xorq %r14, %rdx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm4, %xmm11
+ # rnd_1: 9 - 10
+ rorq $6, %rcx
+ addq %r12, %r8
+ xorq %r13, %rcx
+ addq %rdx, %r12
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 11 - 11
+ rorq $28, %rcx
+ movq %r8, %rax
+ addq %rcx, %r12
+ vpaddq %xmm5, %xmm8, %xmm5
+ # msg_sched done: 10-13
+ # msg_sched: 12-13
+ # rnd_0: 0 - 0
+ rorq $23, %rax
+ vpalignr $8, %xmm6, %xmm7, %xmm12
+ vpalignr $8, %xmm2, %xmm3, %xmm13
+ # rnd_0: 1 - 1
+ movq %r12, %rdx
+ movq %r9, %rcx
+ addq 96(%rsp), %r11
+ xorq %r10, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 3
+ xorq %r8, %rax
+ andq %r8, %rcx
+ rorq $4, %rax
+ xorq %r10, %rcx
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 4 - 5
+ xorq %r8, %rax
+ addq %rcx, %r11
+ rorq $14, %rax
+ xorq %r13, %rdx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 6 - 7
+ addq %rax, %r11
+ movq %r12, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 8 - 9
+ xorq %r12, %rcx
+ xorq %r13, %rbx
+ rorq $6, %rcx
+ addq %r11, %r15
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm6, %xmm13, %xmm6
+ # rnd_0: 10 - 11
+ xorq %r12, %rcx
+ addq %rbx, %r11
+ rorq $28, %rcx
+ movq %r15, %rax
+ addq %rcx, %r11
+ # rnd_1: 0 - 0
+ rorq $23, %rax
+ vpaddq %xmm6, %xmm8, %xmm6
+ # rnd_1: 1 - 1
+ movq %r11, %rbx
+ movq %r8, %rcx
+ addq 104(%rsp), %r10
+ xorq %r9, %rcx
+ vpsrlq $19, %xmm5, %xmm8
+ vpsllq $45, %xmm5, %xmm9
+ # rnd_1: 2 - 3
+ xorq %r15, %rax
+ andq %r15, %rcx
+ rorq $4, %rax
+ xorq %r9, %rcx
+ vpsrlq $61, %xmm5, %xmm10
+ vpsllq $3, %xmm5, %xmm11
+ # rnd_1: 4 - 6
+ xorq %r15, %rax
+ addq %rcx, %r10
+ rorq $14, %rax
+ xorq %r12, %rbx
+ addq %rax, %r10
+ movq %r11, %rcx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 7 - 8
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r11, %rcx
+ xorq %r12, %rdx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm5, %xmm11
+ # rnd_1: 9 - 10
+ rorq $6, %rcx
+ addq %r10, %r14
+ xorq %r11, %rcx
+ addq %rdx, %r10
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 11 - 11
+ rorq $28, %rcx
+ movq %r14, %rax
+ addq %rcx, %r10
+ vpaddq %xmm6, %xmm8, %xmm6
+ # msg_sched done: 12-15
+ # msg_sched: 14-15
+ # rnd_0: 0 - 0
+ rorq $23, %rax
+ vpalignr $8, %xmm7, %xmm0, %xmm12
+ vpalignr $8, %xmm3, %xmm4, %xmm13
+ # rnd_0: 1 - 1
+ movq %r10, %rdx
+ movq %r15, %rcx
+ addq 112(%rsp), %r9
+ xorq %r8, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 3
+ xorq %r14, %rax
+ andq %r14, %rcx
+ rorq $4, %rax
+ xorq %r8, %rcx
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 4 - 5
+ xorq %r14, %rax
+ addq %rcx, %r9
+ rorq $14, %rax
+ xorq %r11, %rdx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 6 - 7
+ addq %rax, %r9
+ movq %r10, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 8 - 9
+ xorq %r10, %rcx
+ xorq %r11, %rbx
+ rorq $6, %rcx
+ addq %r9, %r13
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm7, %xmm13, %xmm7
+ # rnd_0: 10 - 11
+ xorq %r10, %rcx
+ addq %rbx, %r9
+ rorq $28, %rcx
+ movq %r13, %rax
+ addq %rcx, %r9
+ # rnd_1: 0 - 0
+ rorq $23, %rax
+ vpaddq %xmm7, %xmm8, %xmm7
+ # rnd_1: 1 - 1
+ movq %r9, %rbx
+ movq %r14, %rcx
+ addq 120(%rsp), %r8
+ xorq %r15, %rcx
+ vpsrlq $19, %xmm6, %xmm8
+ vpsllq $45, %xmm6, %xmm9
+ # rnd_1: 2 - 3
+ xorq %r13, %rax
+ andq %r13, %rcx
+ rorq $4, %rax
+ xorq %r15, %rcx
+ vpsrlq $61, %xmm6, %xmm10
+ vpsllq $3, %xmm6, %xmm11
+ # rnd_1: 4 - 6
+ xorq %r13, %rax
+ addq %rcx, %r8
+ rorq $14, %rax
+ xorq %r10, %rbx
+ addq %rax, %r8
+ movq %r9, %rcx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 7 - 8
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r9, %rcx
+ xorq %r10, %rdx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm6, %xmm11
+ # rnd_1: 9 - 10
+ rorq $6, %rcx
+ addq %r8, %r12
+ xorq %r9, %rcx
+ addq %rdx, %r8
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 11 - 11
+ rorq $28, %rcx
+ movq %r12, %rax
+ addq %rcx, %r8
+ vpaddq %xmm7, %xmm8, %xmm7
+ # msg_sched done: 14-17
+ subl $0x01, 128(%rsp)
+ jne L_sha256_len_avx1_start
+ vpaddq (%rsi), %xmm0, %xmm8
+ vpaddq 16(%rsi), %xmm1, %xmm9
+ vmovdqu %xmm8, (%rsp)
+ vmovdqu %xmm9, 16(%rsp)
+ vpaddq 32(%rsi), %xmm2, %xmm8
+ vpaddq 48(%rsi), %xmm3, %xmm9
+ vmovdqu %xmm8, 32(%rsp)
+ vmovdqu %xmm9, 48(%rsp)
+ vpaddq 64(%rsi), %xmm4, %xmm8
+ vpaddq 80(%rsi), %xmm5, %xmm9
+ vmovdqu %xmm8, 64(%rsp)
+ vmovdqu %xmm9, 80(%rsp)
+ vpaddq 96(%rsi), %xmm6, %xmm8
+ vpaddq 112(%rsi), %xmm7, %xmm9
+ vmovdqu %xmm8, 96(%rsp)
+ vmovdqu %xmm9, 112(%rsp)
+ # rnd_all_2: 0-1
+ # rnd_0: 0 - 11
+ rorq $23, %rax
+ movq %r8, %rdx
+ movq %r13, %rcx
+ addq (%rsp), %r15
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ andq %r12, %rcx
+ rorq $4, %rax
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ addq %rcx, %r15
+ rorq $14, %rax
+ xorq %r9, %rdx
+ addq %rax, %r15
+ movq %r8, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r8, %rcx
+ xorq %r9, %rbx
+ rorq $6, %rcx
+ addq %r15, %r11
+ xorq %r8, %rcx
+ addq %rbx, %r15
+ rorq $28, %rcx
+ movq %r11, %rax
+ addq %rcx, %r15
+ # rnd_1: 0 - 11
+ rorq $23, %rax
+ movq %r15, %rbx
+ movq %r12, %rcx
+ addq 8(%rsp), %r14
+ xorq %r13, %rcx
+ xorq %r11, %rax
+ andq %r11, %rcx
+ rorq $4, %rax
+ xorq %r13, %rcx
+ xorq %r11, %rax
+ addq %rcx, %r14
+ rorq $14, %rax
+ xorq %r8, %rbx
+ addq %rax, %r14
+ movq %r15, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r15, %rcx
+ xorq %r8, %rdx
+ rorq $6, %rcx
+ addq %r14, %r10
+ xorq %r15, %rcx
+ addq %rdx, %r14
+ rorq $28, %rcx
+ movq %r10, %rax
+ addq %rcx, %r14
+ # rnd_all_2: 2-3
+ # rnd_0: 0 - 11
+ rorq $23, %rax
+ movq %r14, %rdx
+ movq %r11, %rcx
+ addq 16(%rsp), %r13
+ xorq %r12, %rcx
+ xorq %r10, %rax
+ andq %r10, %rcx
+ rorq $4, %rax
+ xorq %r12, %rcx
+ xorq %r10, %rax
+ addq %rcx, %r13
+ rorq $14, %rax
+ xorq %r15, %rdx
+ addq %rax, %r13
+ movq %r14, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r14, %rcx
+ xorq %r15, %rbx
+ rorq $6, %rcx
+ addq %r13, %r9
+ xorq %r14, %rcx
+ addq %rbx, %r13
+ rorq $28, %rcx
+ movq %r9, %rax
+ addq %rcx, %r13
+ # rnd_1: 0 - 11
+ rorq $23, %rax
+ movq %r13, %rbx
+ movq %r10, %rcx
+ addq 24(%rsp), %r12
+ xorq %r11, %rcx
+ xorq %r9, %rax
+ andq %r9, %rcx
+ rorq $4, %rax
+ xorq %r11, %rcx
+ xorq %r9, %rax
+ addq %rcx, %r12
+ rorq $14, %rax
+ xorq %r14, %rbx
+ addq %rax, %r12
+ movq %r13, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r13, %rcx
+ xorq %r14, %rdx
+ rorq $6, %rcx
+ addq %r12, %r8
+ xorq %r13, %rcx
+ addq %rdx, %r12
+ rorq $28, %rcx
+ movq %r8, %rax
+ addq %rcx, %r12
+ # rnd_all_2: 4-5
+ # rnd_0: 0 - 11
+ rorq $23, %rax
+ movq %r12, %rdx
+ movq %r9, %rcx
+ addq 32(%rsp), %r11
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ andq %r8, %rcx
+ rorq $4, %rax
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ addq %rcx, %r11
+ rorq $14, %rax
+ xorq %r13, %rdx
+ addq %rax, %r11
+ movq %r12, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r12, %rcx
+ xorq %r13, %rbx
+ rorq $6, %rcx
+ addq %r11, %r15
+ xorq %r12, %rcx
+ addq %rbx, %r11
+ rorq $28, %rcx
+ movq %r15, %rax
+ addq %rcx, %r11
+ # rnd_1: 0 - 11
+ rorq $23, %rax
+ movq %r11, %rbx
+ movq %r8, %rcx
+ addq 40(%rsp), %r10
+ xorq %r9, %rcx
+ xorq %r15, %rax
+ andq %r15, %rcx
+ rorq $4, %rax
+ xorq %r9, %rcx
+ xorq %r15, %rax
+ addq %rcx, %r10
+ rorq $14, %rax
+ xorq %r12, %rbx
+ addq %rax, %r10
+ movq %r11, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r11, %rcx
+ xorq %r12, %rdx
+ rorq $6, %rcx
+ addq %r10, %r14
+ xorq %r11, %rcx
+ addq %rdx, %r10
+ rorq $28, %rcx
+ movq %r14, %rax
+ addq %rcx, %r10
+ # rnd_all_2: 6-7
+ # rnd_0: 0 - 11
+ rorq $23, %rax
+ movq %r10, %rdx
+ movq %r15, %rcx
+ addq 48(%rsp), %r9
+ xorq %r8, %rcx
+ xorq %r14, %rax
+ andq %r14, %rcx
+ rorq $4, %rax
+ xorq %r8, %rcx
+ xorq %r14, %rax
+ addq %rcx, %r9
+ rorq $14, %rax
+ xorq %r11, %rdx
+ addq %rax, %r9
+ movq %r10, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r10, %rcx
+ xorq %r11, %rbx
+ rorq $6, %rcx
+ addq %r9, %r13
+ xorq %r10, %rcx
+ addq %rbx, %r9
+ rorq $28, %rcx
+ movq %r13, %rax
+ addq %rcx, %r9
+ # rnd_1: 0 - 11
+ rorq $23, %rax
+ movq %r9, %rbx
+ movq %r14, %rcx
+ addq 56(%rsp), %r8
+ xorq %r15, %rcx
+ xorq %r13, %rax
+ andq %r13, %rcx
+ rorq $4, %rax
+ xorq %r15, %rcx
+ xorq %r13, %rax
+ addq %rcx, %r8
+ rorq $14, %rax
+ xorq %r10, %rbx
+ addq %rax, %r8
+ movq %r9, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r9, %rcx
+ xorq %r10, %rdx
+ rorq $6, %rcx
+ addq %r8, %r12
+ xorq %r9, %rcx
+ addq %rdx, %r8
+ rorq $28, %rcx
+ movq %r12, %rax
+ addq %rcx, %r8
+ # rnd_all_2: 8-9
+ # rnd_0: 0 - 11
+ rorq $23, %rax
+ movq %r8, %rdx
+ movq %r13, %rcx
+ addq 64(%rsp), %r15
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ andq %r12, %rcx
+ rorq $4, %rax
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ addq %rcx, %r15
+ rorq $14, %rax
+ xorq %r9, %rdx
+ addq %rax, %r15
+ movq %r8, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r8, %rcx
+ xorq %r9, %rbx
+ rorq $6, %rcx
+ addq %r15, %r11
+ xorq %r8, %rcx
+ addq %rbx, %r15
+ rorq $28, %rcx
+ movq %r11, %rax
+ addq %rcx, %r15
+ # rnd_1: 0 - 11
+ rorq $23, %rax
+ movq %r15, %rbx
+ movq %r12, %rcx
+ addq 72(%rsp), %r14
+ xorq %r13, %rcx
+ xorq %r11, %rax
+ andq %r11, %rcx
+ rorq $4, %rax
+ xorq %r13, %rcx
+ xorq %r11, %rax
+ addq %rcx, %r14
+ rorq $14, %rax
+ xorq %r8, %rbx
+ addq %rax, %r14
+ movq %r15, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r15, %rcx
+ xorq %r8, %rdx
+ rorq $6, %rcx
+ addq %r14, %r10
+ xorq %r15, %rcx
+ addq %rdx, %r14
+ rorq $28, %rcx
+ movq %r10, %rax
+ addq %rcx, %r14
+ # rnd_all_2: 10-11
+ # rnd_0: 0 - 11
+ rorq $23, %rax
+ movq %r14, %rdx
+ movq %r11, %rcx
+ addq 80(%rsp), %r13
+ xorq %r12, %rcx
+ xorq %r10, %rax
+ andq %r10, %rcx
+ rorq $4, %rax
+ xorq %r12, %rcx
+ xorq %r10, %rax
+ addq %rcx, %r13
+ rorq $14, %rax
+ xorq %r15, %rdx
+ addq %rax, %r13
+ movq %r14, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r14, %rcx
+ xorq %r15, %rbx
+ rorq $6, %rcx
+ addq %r13, %r9
+ xorq %r14, %rcx
+ addq %rbx, %r13
+ rorq $28, %rcx
+ movq %r9, %rax
+ addq %rcx, %r13
+ # rnd_1: 0 - 11
+ rorq $23, %rax
+ movq %r13, %rbx
+ movq %r10, %rcx
+ addq 88(%rsp), %r12
+ xorq %r11, %rcx
+ xorq %r9, %rax
+ andq %r9, %rcx
+ rorq $4, %rax
+ xorq %r11, %rcx
+ xorq %r9, %rax
+ addq %rcx, %r12
+ rorq $14, %rax
+ xorq %r14, %rbx
+ addq %rax, %r12
+ movq %r13, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r13, %rcx
+ xorq %r14, %rdx
+ rorq $6, %rcx
+ addq %r12, %r8
+ xorq %r13, %rcx
+ addq %rdx, %r12
+ rorq $28, %rcx
+ movq %r8, %rax
+ addq %rcx, %r12
+ # rnd_all_2: 12-13
+ # rnd_0: 0 - 11
+ rorq $23, %rax
+ movq %r12, %rdx
+ movq %r9, %rcx
+ addq 96(%rsp), %r11
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ andq %r8, %rcx
+ rorq $4, %rax
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ addq %rcx, %r11
+ rorq $14, %rax
+ xorq %r13, %rdx
+ addq %rax, %r11
+ movq %r12, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r12, %rcx
+ xorq %r13, %rbx
+ rorq $6, %rcx
+ addq %r11, %r15
+ xorq %r12, %rcx
+ addq %rbx, %r11
+ rorq $28, %rcx
+ movq %r15, %rax
+ addq %rcx, %r11
+ # rnd_1: 0 - 11
+ rorq $23, %rax
+ movq %r11, %rbx
+ movq %r8, %rcx
+ addq 104(%rsp), %r10
+ xorq %r9, %rcx
+ xorq %r15, %rax
+ andq %r15, %rcx
+ rorq $4, %rax
+ xorq %r9, %rcx
+ xorq %r15, %rax
+ addq %rcx, %r10
+ rorq $14, %rax
+ xorq %r12, %rbx
+ addq %rax, %r10
+ movq %r11, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r11, %rcx
+ xorq %r12, %rdx
+ rorq $6, %rcx
+ addq %r10, %r14
+ xorq %r11, %rcx
+ addq %rdx, %r10
+ rorq $28, %rcx
+ movq %r14, %rax
+ addq %rcx, %r10
+ # rnd_all_2: 14-15
+ # rnd_0: 0 - 11
+ rorq $23, %rax
+ movq %r10, %rdx
+ movq %r15, %rcx
+ addq 112(%rsp), %r9
+ xorq %r8, %rcx
+ xorq %r14, %rax
+ andq %r14, %rcx
+ rorq $4, %rax
+ xorq %r8, %rcx
+ xorq %r14, %rax
+ addq %rcx, %r9
+ rorq $14, %rax
+ xorq %r11, %rdx
+ addq %rax, %r9
+ movq %r10, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r10, %rcx
+ xorq %r11, %rbx
+ rorq $6, %rcx
+ addq %r9, %r13
+ xorq %r10, %rcx
+ addq %rbx, %r9
+ rorq $28, %rcx
+ movq %r13, %rax
+ addq %rcx, %r9
+ # rnd_1: 0 - 11
+ rorq $23, %rax
+ movq %r9, %rbx
+ movq %r14, %rcx
+ addq 120(%rsp), %r8
+ xorq %r15, %rcx
+ xorq %r13, %rax
+ andq %r13, %rcx
+ rorq $4, %rax
+ xorq %r15, %rcx
+ xorq %r13, %rax
+ addq %rcx, %r8
+ rorq $14, %rax
+ xorq %r10, %rbx
+ addq %rax, %r8
+ movq %r9, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r9, %rcx
+ xorq %r10, %rdx
+ rorq $6, %rcx
+ addq %r8, %r12
+ xorq %r9, %rcx
+ addq %rdx, %r8
+ rorq $28, %rcx
+ movq %r12, %rax
+ addq %rcx, %r8
+ addq %r8, (%rdi)
+ addq %r9, 8(%rdi)
+ addq %r10, 16(%rdi)
+ addq %r11, 24(%rdi)
+ addq %r12, 32(%rdi)
+ addq %r13, 40(%rdi)
+ addq %r14, 48(%rdi)
+ addq %r15, 56(%rdi)
+ xorq %rax, %rax
+ vzeroupper
+ addq $0x88, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size Transform_Sha512_AVX1,.-Transform_Sha512_AVX1
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl Transform_Sha512_AVX1_Len
+.type Transform_Sha512_AVX1_Len,@function
+.align 4
+Transform_Sha512_AVX1_Len:
+#else
+.section __TEXT,__text
+.globl _Transform_Sha512_AVX1_Len
+.p2align 2
+_Transform_Sha512_AVX1_Len:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rbp
+ movq %rsi, %rbp
+ subq $0x90, %rsp
+ movq 224(%rdi), %rsi
+ leaq L_avx1_sha512_k(%rip), %rdx
+ vmovdqa L_avx1_sha512_flip_mask(%rip), %xmm14
+ movq (%rdi), %r8
+ movq 8(%rdi), %r9
+ movq 16(%rdi), %r10
+ movq 24(%rdi), %r11
+ movq 32(%rdi), %r12
+ movq 40(%rdi), %r13
+ movq 48(%rdi), %r14
+ movq 56(%rdi), %r15
+ # Start of loop processing a block
+L_sha512_len_avx1_begin:
+ vmovdqu (%rsi), %xmm0
+ vmovdqu 16(%rsi), %xmm1
+ vpshufb %xmm14, %xmm0, %xmm0
+ vpshufb %xmm14, %xmm1, %xmm1
+ vmovdqu 32(%rsi), %xmm2
+ vmovdqu 48(%rsi), %xmm3
+ vpshufb %xmm14, %xmm2, %xmm2
+ vpshufb %xmm14, %xmm3, %xmm3
+ vmovdqu 64(%rsi), %xmm4
+ vmovdqu 80(%rsi), %xmm5
+ vpshufb %xmm14, %xmm4, %xmm4
+ vpshufb %xmm14, %xmm5, %xmm5
+ vmovdqu 96(%rsi), %xmm6
+ vmovdqu 112(%rsi), %xmm7
+ vpshufb %xmm14, %xmm6, %xmm6
+ vpshufb %xmm14, %xmm7, %xmm7
+ movl $4, 128(%rsp)
+ movq %r9, %rbx
+ movq %r12, %rax
+ xorq %r10, %rbx
+ vpaddq (%rdx), %xmm0, %xmm8
+ vpaddq 16(%rdx), %xmm1, %xmm9
+ vmovdqu %xmm8, (%rsp)
+ vmovdqu %xmm9, 16(%rsp)
+ vpaddq 32(%rdx), %xmm2, %xmm8
+ vpaddq 48(%rdx), %xmm3, %xmm9
+ vmovdqu %xmm8, 32(%rsp)
+ vmovdqu %xmm9, 48(%rsp)
+ vpaddq 64(%rdx), %xmm4, %xmm8
+ vpaddq 80(%rdx), %xmm5, %xmm9
+ vmovdqu %xmm8, 64(%rsp)
+ vmovdqu %xmm9, 80(%rsp)
+ vpaddq 96(%rdx), %xmm6, %xmm8
+ vpaddq 112(%rdx), %xmm7, %xmm9
+ vmovdqu %xmm8, 96(%rsp)
+ vmovdqu %xmm9, 112(%rsp)
+ # Start of 16 rounds
+L_sha512_len_avx1_start:
+ addq $0x80, %rdx
+ movq %rdx, 136(%rsp)
+ # msg_sched: 0-1
+ # rnd_0: 0 - 0
+ rorq $23, %rax
+ vpalignr $8, %xmm0, %xmm1, %xmm12
+ vpalignr $8, %xmm4, %xmm5, %xmm13
+ # rnd_0: 1 - 1
+ movq %r8, %rdx
+ movq %r13, %rcx
+ addq (%rsp), %r15
+ xorq %r14, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 3
+ xorq %r12, %rax
+ andq %r12, %rcx
+ rorq $4, %rax
+ xorq %r14, %rcx
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 4 - 5
+ xorq %r12, %rax
+ addq %rcx, %r15
+ rorq $14, %rax
+ xorq %r9, %rdx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 6 - 7
+ addq %rax, %r15
+ movq %r8, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 8 - 9
+ xorq %r8, %rcx
+ xorq %r9, %rbx
+ rorq $6, %rcx
+ addq %r15, %r11
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm0, %xmm13, %xmm0
+ # rnd_0: 10 - 11
+ xorq %r8, %rcx
+ addq %rbx, %r15
+ rorq $28, %rcx
+ movq %r11, %rax
+ addq %rcx, %r15
+ # rnd_1: 0 - 0
+ rorq $23, %rax
+ vpaddq %xmm0, %xmm8, %xmm0
+ # rnd_1: 1 - 1
+ movq %r15, %rbx
+ movq %r12, %rcx
+ addq 8(%rsp), %r14
+ xorq %r13, %rcx
+ vpsrlq $19, %xmm7, %xmm8
+ vpsllq $45, %xmm7, %xmm9
+ # rnd_1: 2 - 3
+ xorq %r11, %rax
+ andq %r11, %rcx
+ rorq $4, %rax
+ xorq %r13, %rcx
+ vpsrlq $61, %xmm7, %xmm10
+ vpsllq $3, %xmm7, %xmm11
+ # rnd_1: 4 - 6
+ xorq %r11, %rax
+ addq %rcx, %r14
+ rorq $14, %rax
+ xorq %r8, %rbx
+ addq %rax, %r14
+ movq %r15, %rcx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 7 - 8
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r15, %rcx
+ xorq %r8, %rdx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm7, %xmm11
+ # rnd_1: 9 - 10
+ rorq $6, %rcx
+ addq %r14, %r10
+ xorq %r15, %rcx
+ addq %rdx, %r14
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 11 - 11
+ rorq $28, %rcx
+ movq %r10, %rax
+ addq %rcx, %r14
+ vpaddq %xmm0, %xmm8, %xmm0
+ # msg_sched done: 0-3
+ # msg_sched: 2-3
+ # rnd_0: 0 - 0
+ rorq $23, %rax
+ vpalignr $8, %xmm1, %xmm2, %xmm12
+ vpalignr $8, %xmm5, %xmm6, %xmm13
+ # rnd_0: 1 - 1
+ movq %r14, %rdx
+ movq %r11, %rcx
+ addq 16(%rsp), %r13
+ xorq %r12, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 3
+ xorq %r10, %rax
+ andq %r10, %rcx
+ rorq $4, %rax
+ xorq %r12, %rcx
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 4 - 5
+ xorq %r10, %rax
+ addq %rcx, %r13
+ rorq $14, %rax
+ xorq %r15, %rdx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 6 - 7
+ addq %rax, %r13
+ movq %r14, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 8 - 9
+ xorq %r14, %rcx
+ xorq %r15, %rbx
+ rorq $6, %rcx
+ addq %r13, %r9
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm1, %xmm13, %xmm1
+ # rnd_0: 10 - 11
+ xorq %r14, %rcx
+ addq %rbx, %r13
+ rorq $28, %rcx
+ movq %r9, %rax
+ addq %rcx, %r13
+ # rnd_1: 0 - 0
+ rorq $23, %rax
+ vpaddq %xmm1, %xmm8, %xmm1
+ # rnd_1: 1 - 1
+ movq %r13, %rbx
+ movq %r10, %rcx
+ addq 24(%rsp), %r12
+ xorq %r11, %rcx
+ vpsrlq $19, %xmm0, %xmm8
+ vpsllq $45, %xmm0, %xmm9
+ # rnd_1: 2 - 3
+ xorq %r9, %rax
+ andq %r9, %rcx
+ rorq $4, %rax
+ xorq %r11, %rcx
+ vpsrlq $61, %xmm0, %xmm10
+ vpsllq $3, %xmm0, %xmm11
+ # rnd_1: 4 - 6
+ xorq %r9, %rax
+ addq %rcx, %r12
+ rorq $14, %rax
+ xorq %r14, %rbx
+ addq %rax, %r12
+ movq %r13, %rcx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 7 - 8
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r13, %rcx
+ xorq %r14, %rdx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm0, %xmm11
+ # rnd_1: 9 - 10
+ rorq $6, %rcx
+ addq %r12, %r8
+ xorq %r13, %rcx
+ addq %rdx, %r12
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 11 - 11
+ rorq $28, %rcx
+ movq %r8, %rax
+ addq %rcx, %r12
+ vpaddq %xmm1, %xmm8, %xmm1
+ # msg_sched done: 2-5
+ # msg_sched: 4-5
+ # rnd_0: 0 - 0
+ rorq $23, %rax
+ vpalignr $8, %xmm2, %xmm3, %xmm12
+ vpalignr $8, %xmm6, %xmm7, %xmm13
+ # rnd_0: 1 - 1
+ movq %r12, %rdx
+ movq %r9, %rcx
+ addq 32(%rsp), %r11
+ xorq %r10, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 3
+ xorq %r8, %rax
+ andq %r8, %rcx
+ rorq $4, %rax
+ xorq %r10, %rcx
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 4 - 5
+ xorq %r8, %rax
+ addq %rcx, %r11
+ rorq $14, %rax
+ xorq %r13, %rdx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 6 - 7
+ addq %rax, %r11
+ movq %r12, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 8 - 9
+ xorq %r12, %rcx
+ xorq %r13, %rbx
+ rorq $6, %rcx
+ addq %r11, %r15
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm2, %xmm13, %xmm2
+ # rnd_0: 10 - 11
+ xorq %r12, %rcx
+ addq %rbx, %r11
+ rorq $28, %rcx
+ movq %r15, %rax
+ addq %rcx, %r11
+ # rnd_1: 0 - 0
+ rorq $23, %rax
+ vpaddq %xmm2, %xmm8, %xmm2
+ # rnd_1: 1 - 1
+ movq %r11, %rbx
+ movq %r8, %rcx
+ addq 40(%rsp), %r10
+ xorq %r9, %rcx
+ vpsrlq $19, %xmm1, %xmm8
+ vpsllq $45, %xmm1, %xmm9
+ # rnd_1: 2 - 3
+ xorq %r15, %rax
+ andq %r15, %rcx
+ rorq $4, %rax
+ xorq %r9, %rcx
+ vpsrlq $61, %xmm1, %xmm10
+ vpsllq $3, %xmm1, %xmm11
+ # rnd_1: 4 - 6
+ xorq %r15, %rax
+ addq %rcx, %r10
+ rorq $14, %rax
+ xorq %r12, %rbx
+ addq %rax, %r10
+ movq %r11, %rcx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 7 - 8
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r11, %rcx
+ xorq %r12, %rdx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm1, %xmm11
+ # rnd_1: 9 - 10
+ rorq $6, %rcx
+ addq %r10, %r14
+ xorq %r11, %rcx
+ addq %rdx, %r10
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 11 - 11
+ rorq $28, %rcx
+ movq %r14, %rax
+ addq %rcx, %r10
+ vpaddq %xmm2, %xmm8, %xmm2
+ # msg_sched done: 4-7
+ # msg_sched: 6-7
+ # rnd_0: 0 - 0
+ rorq $23, %rax
+ vpalignr $8, %xmm3, %xmm4, %xmm12
+ vpalignr $8, %xmm7, %xmm0, %xmm13
+ # rnd_0: 1 - 1
+ movq %r10, %rdx
+ movq %r15, %rcx
+ addq 48(%rsp), %r9
+ xorq %r8, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 3
+ xorq %r14, %rax
+ andq %r14, %rcx
+ rorq $4, %rax
+ xorq %r8, %rcx
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 4 - 5
+ xorq %r14, %rax
+ addq %rcx, %r9
+ rorq $14, %rax
+ xorq %r11, %rdx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 6 - 7
+ addq %rax, %r9
+ movq %r10, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 8 - 9
+ xorq %r10, %rcx
+ xorq %r11, %rbx
+ rorq $6, %rcx
+ addq %r9, %r13
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm3, %xmm13, %xmm3
+ # rnd_0: 10 - 11
+ xorq %r10, %rcx
+ addq %rbx, %r9
+ rorq $28, %rcx
+ movq %r13, %rax
+ addq %rcx, %r9
+ # rnd_1: 0 - 0
+ rorq $23, %rax
+ vpaddq %xmm3, %xmm8, %xmm3
+ # rnd_1: 1 - 1
+ movq %r9, %rbx
+ movq %r14, %rcx
+ addq 56(%rsp), %r8
+ xorq %r15, %rcx
+ vpsrlq $19, %xmm2, %xmm8
+ vpsllq $45, %xmm2, %xmm9
+ # rnd_1: 2 - 3
+ xorq %r13, %rax
+ andq %r13, %rcx
+ rorq $4, %rax
+ xorq %r15, %rcx
+ vpsrlq $61, %xmm2, %xmm10
+ vpsllq $3, %xmm2, %xmm11
+ # rnd_1: 4 - 6
+ xorq %r13, %rax
+ addq %rcx, %r8
+ rorq $14, %rax
+ xorq %r10, %rbx
+ addq %rax, %r8
+ movq %r9, %rcx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 7 - 8
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r9, %rcx
+ xorq %r10, %rdx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm2, %xmm11
+ # rnd_1: 9 - 10
+ rorq $6, %rcx
+ addq %r8, %r12
+ xorq %r9, %rcx
+ addq %rdx, %r8
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 11 - 11
+ rorq $28, %rcx
+ movq %r12, %rax
+ addq %rcx, %r8
+ vpaddq %xmm3, %xmm8, %xmm3
+ # msg_sched done: 6-9
+ # msg_sched: 8-9
+ # rnd_0: 0 - 0
+ rorq $23, %rax
+ vpalignr $8, %xmm4, %xmm5, %xmm12
+ vpalignr $8, %xmm0, %xmm1, %xmm13
+ # rnd_0: 1 - 1
+ movq %r8, %rdx
+ movq %r13, %rcx
+ addq 64(%rsp), %r15
+ xorq %r14, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 3
+ xorq %r12, %rax
+ andq %r12, %rcx
+ rorq $4, %rax
+ xorq %r14, %rcx
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 4 - 5
+ xorq %r12, %rax
+ addq %rcx, %r15
+ rorq $14, %rax
+ xorq %r9, %rdx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 6 - 7
+ addq %rax, %r15
+ movq %r8, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 8 - 9
+ xorq %r8, %rcx
+ xorq %r9, %rbx
+ rorq $6, %rcx
+ addq %r15, %r11
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm4, %xmm13, %xmm4
+ # rnd_0: 10 - 11
+ xorq %r8, %rcx
+ addq %rbx, %r15
+ rorq $28, %rcx
+ movq %r11, %rax
+ addq %rcx, %r15
+ # rnd_1: 0 - 0
+ rorq $23, %rax
+ vpaddq %xmm4, %xmm8, %xmm4
+ # rnd_1: 1 - 1
+ movq %r15, %rbx
+ movq %r12, %rcx
+ addq 72(%rsp), %r14
+ xorq %r13, %rcx
+ vpsrlq $19, %xmm3, %xmm8
+ vpsllq $45, %xmm3, %xmm9
+ # rnd_1: 2 - 3
+ xorq %r11, %rax
+ andq %r11, %rcx
+ rorq $4, %rax
+ xorq %r13, %rcx
+ vpsrlq $61, %xmm3, %xmm10
+ vpsllq $3, %xmm3, %xmm11
+ # rnd_1: 4 - 6
+ xorq %r11, %rax
+ addq %rcx, %r14
+ rorq $14, %rax
+ xorq %r8, %rbx
+ addq %rax, %r14
+ movq %r15, %rcx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 7 - 8
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r15, %rcx
+ xorq %r8, %rdx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm3, %xmm11
+ # rnd_1: 9 - 10
+ rorq $6, %rcx
+ addq %r14, %r10
+ xorq %r15, %rcx
+ addq %rdx, %r14
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 11 - 11
+ rorq $28, %rcx
+ movq %r10, %rax
+ addq %rcx, %r14
+ vpaddq %xmm4, %xmm8, %xmm4
+ # msg_sched done: 8-11
+ # msg_sched: 10-11
+ # rnd_0: 0 - 0
+ rorq $23, %rax
+ vpalignr $8, %xmm5, %xmm6, %xmm12
+ vpalignr $8, %xmm1, %xmm2, %xmm13
+ # rnd_0: 1 - 1
+ movq %r14, %rdx
+ movq %r11, %rcx
+ addq 80(%rsp), %r13
+ xorq %r12, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 3
+ xorq %r10, %rax
+ andq %r10, %rcx
+ rorq $4, %rax
+ xorq %r12, %rcx
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 4 - 5
+ xorq %r10, %rax
+ addq %rcx, %r13
+ rorq $14, %rax
+ xorq %r15, %rdx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 6 - 7
+ addq %rax, %r13
+ movq %r14, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 8 - 9
+ xorq %r14, %rcx
+ xorq %r15, %rbx
+ rorq $6, %rcx
+ addq %r13, %r9
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm5, %xmm13, %xmm5
+ # rnd_0: 10 - 11
+ xorq %r14, %rcx
+ addq %rbx, %r13
+ rorq $28, %rcx
+ movq %r9, %rax
+ addq %rcx, %r13
+ # rnd_1: 0 - 0
+ rorq $23, %rax
+ vpaddq %xmm5, %xmm8, %xmm5
+ # rnd_1: 1 - 1
+ movq %r13, %rbx
+ movq %r10, %rcx
+ addq 88(%rsp), %r12
+ xorq %r11, %rcx
+ vpsrlq $19, %xmm4, %xmm8
+ vpsllq $45, %xmm4, %xmm9
+ # rnd_1: 2 - 3
+ xorq %r9, %rax
+ andq %r9, %rcx
+ rorq $4, %rax
+ xorq %r11, %rcx
+ vpsrlq $61, %xmm4, %xmm10
+ vpsllq $3, %xmm4, %xmm11
+ # rnd_1: 4 - 6
+ xorq %r9, %rax
+ addq %rcx, %r12
+ rorq $14, %rax
+ xorq %r14, %rbx
+ addq %rax, %r12
+ movq %r13, %rcx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 7 - 8
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r13, %rcx
+ xorq %r14, %rdx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm4, %xmm11
+ # rnd_1: 9 - 10
+ rorq $6, %rcx
+ addq %r12, %r8
+ xorq %r13, %rcx
+ addq %rdx, %r12
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 11 - 11
+ rorq $28, %rcx
+ movq %r8, %rax
+ addq %rcx, %r12
+ vpaddq %xmm5, %xmm8, %xmm5
+ # msg_sched done: 10-13
+ # msg_sched: 12-13
+ # rnd_0: 0 - 0
+ rorq $23, %rax
+ vpalignr $8, %xmm6, %xmm7, %xmm12
+ vpalignr $8, %xmm2, %xmm3, %xmm13
+ # rnd_0: 1 - 1
+ movq %r12, %rdx
+ movq %r9, %rcx
+ addq 96(%rsp), %r11
+ xorq %r10, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 3
+ xorq %r8, %rax
+ andq %r8, %rcx
+ rorq $4, %rax
+ xorq %r10, %rcx
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 4 - 5
+ xorq %r8, %rax
+ addq %rcx, %r11
+ rorq $14, %rax
+ xorq %r13, %rdx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 6 - 7
+ addq %rax, %r11
+ movq %r12, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 8 - 9
+ xorq %r12, %rcx
+ xorq %r13, %rbx
+ rorq $6, %rcx
+ addq %r11, %r15
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm6, %xmm13, %xmm6
+ # rnd_0: 10 - 11
+ xorq %r12, %rcx
+ addq %rbx, %r11
+ rorq $28, %rcx
+ movq %r15, %rax
+ addq %rcx, %r11
+ # rnd_1: 0 - 0
+ rorq $23, %rax
+ vpaddq %xmm6, %xmm8, %xmm6
+ # rnd_1: 1 - 1
+ movq %r11, %rbx
+ movq %r8, %rcx
+ addq 104(%rsp), %r10
+ xorq %r9, %rcx
+ vpsrlq $19, %xmm5, %xmm8
+ vpsllq $45, %xmm5, %xmm9
+ # rnd_1: 2 - 3
+ xorq %r15, %rax
+ andq %r15, %rcx
+ rorq $4, %rax
+ xorq %r9, %rcx
+ vpsrlq $61, %xmm5, %xmm10
+ vpsllq $3, %xmm5, %xmm11
+ # rnd_1: 4 - 6
+ xorq %r15, %rax
+ addq %rcx, %r10
+ rorq $14, %rax
+ xorq %r12, %rbx
+ addq %rax, %r10
+ movq %r11, %rcx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 7 - 8
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r11, %rcx
+ xorq %r12, %rdx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm5, %xmm11
+ # rnd_1: 9 - 10
+ rorq $6, %rcx
+ addq %r10, %r14
+ xorq %r11, %rcx
+ addq %rdx, %r10
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 11 - 11
+ rorq $28, %rcx
+ movq %r14, %rax
+ addq %rcx, %r10
+ vpaddq %xmm6, %xmm8, %xmm6
+ # msg_sched done: 12-15
+ # msg_sched: 14-15
+ # rnd_0: 0 - 0
+ rorq $23, %rax
+ vpalignr $8, %xmm7, %xmm0, %xmm12
+ vpalignr $8, %xmm3, %xmm4, %xmm13
+ # rnd_0: 1 - 1
+ movq %r10, %rdx
+ movq %r15, %rcx
+ addq 112(%rsp), %r9
+ xorq %r8, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 3
+ xorq %r14, %rax
+ andq %r14, %rcx
+ rorq $4, %rax
+ xorq %r8, %rcx
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 4 - 5
+ xorq %r14, %rax
+ addq %rcx, %r9
+ rorq $14, %rax
+ xorq %r11, %rdx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 6 - 7
+ addq %rax, %r9
+ movq %r10, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 8 - 9
+ xorq %r10, %rcx
+ xorq %r11, %rbx
+ rorq $6, %rcx
+ addq %r9, %r13
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm7, %xmm13, %xmm7
+ # rnd_0: 10 - 11
+ xorq %r10, %rcx
+ addq %rbx, %r9
+ rorq $28, %rcx
+ movq %r13, %rax
+ addq %rcx, %r9
+ # rnd_1: 0 - 0
+ rorq $23, %rax
+ vpaddq %xmm7, %xmm8, %xmm7
+ # rnd_1: 1 - 1
+ movq %r9, %rbx
+ movq %r14, %rcx
+ addq 120(%rsp), %r8
+ xorq %r15, %rcx
+ vpsrlq $19, %xmm6, %xmm8
+ vpsllq $45, %xmm6, %xmm9
+ # rnd_1: 2 - 3
+ xorq %r13, %rax
+ andq %r13, %rcx
+ rorq $4, %rax
+ xorq %r15, %rcx
+ vpsrlq $61, %xmm6, %xmm10
+ vpsllq $3, %xmm6, %xmm11
+ # rnd_1: 4 - 6
+ xorq %r13, %rax
+ addq %rcx, %r8
+ rorq $14, %rax
+ xorq %r10, %rbx
+ addq %rax, %r8
+ movq %r9, %rcx
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 7 - 8
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r9, %rcx
+ xorq %r10, %rdx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm6, %xmm11
+ # rnd_1: 9 - 10
+ rorq $6, %rcx
+ addq %r8, %r12
+ xorq %r9, %rcx
+ addq %rdx, %r8
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 11 - 11
+ rorq $28, %rcx
+ movq %r12, %rax
+ addq %rcx, %r8
+ vpaddq %xmm7, %xmm8, %xmm7
+ # msg_sched done: 14-17
+ movq 136(%rsp), %rdx
+ vpaddq (%rdx), %xmm0, %xmm8
+ vpaddq 16(%rdx), %xmm1, %xmm9
+ vmovdqu %xmm8, (%rsp)
+ vmovdqu %xmm9, 16(%rsp)
+ vpaddq 32(%rdx), %xmm2, %xmm8
+ vpaddq 48(%rdx), %xmm3, %xmm9
+ vmovdqu %xmm8, 32(%rsp)
+ vmovdqu %xmm9, 48(%rsp)
+ vpaddq 64(%rdx), %xmm4, %xmm8
+ vpaddq 80(%rdx), %xmm5, %xmm9
+ vmovdqu %xmm8, 64(%rsp)
+ vmovdqu %xmm9, 80(%rsp)
+ vpaddq 96(%rdx), %xmm6, %xmm8
+ vpaddq 112(%rdx), %xmm7, %xmm9
+ vmovdqu %xmm8, 96(%rsp)
+ vmovdqu %xmm9, 112(%rsp)
+ subl $0x01, 128(%rsp)
+ jne L_sha512_len_avx1_start
+ # rnd_all_2: 0-1
+ # rnd_0: 0 - 11
+ rorq $23, %rax
+ movq %r8, %rdx
+ movq %r13, %rcx
+ addq (%rsp), %r15
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ andq %r12, %rcx
+ rorq $4, %rax
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ addq %rcx, %r15
+ rorq $14, %rax
+ xorq %r9, %rdx
+ addq %rax, %r15
+ movq %r8, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r8, %rcx
+ xorq %r9, %rbx
+ rorq $6, %rcx
+ addq %r15, %r11
+ xorq %r8, %rcx
+ addq %rbx, %r15
+ rorq $28, %rcx
+ movq %r11, %rax
+ addq %rcx, %r15
+ # rnd_1: 0 - 11
+ rorq $23, %rax
+ movq %r15, %rbx
+ movq %r12, %rcx
+ addq 8(%rsp), %r14
+ xorq %r13, %rcx
+ xorq %r11, %rax
+ andq %r11, %rcx
+ rorq $4, %rax
+ xorq %r13, %rcx
+ xorq %r11, %rax
+ addq %rcx, %r14
+ rorq $14, %rax
+ xorq %r8, %rbx
+ addq %rax, %r14
+ movq %r15, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r15, %rcx
+ xorq %r8, %rdx
+ rorq $6, %rcx
+ addq %r14, %r10
+ xorq %r15, %rcx
+ addq %rdx, %r14
+ rorq $28, %rcx
+ movq %r10, %rax
+ addq %rcx, %r14
+ # rnd_all_2: 2-3
+ # rnd_0: 0 - 11
+ rorq $23, %rax
+ movq %r14, %rdx
+ movq %r11, %rcx
+ addq 16(%rsp), %r13
+ xorq %r12, %rcx
+ xorq %r10, %rax
+ andq %r10, %rcx
+ rorq $4, %rax
+ xorq %r12, %rcx
+ xorq %r10, %rax
+ addq %rcx, %r13
+ rorq $14, %rax
+ xorq %r15, %rdx
+ addq %rax, %r13
+ movq %r14, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r14, %rcx
+ xorq %r15, %rbx
+ rorq $6, %rcx
+ addq %r13, %r9
+ xorq %r14, %rcx
+ addq %rbx, %r13
+ rorq $28, %rcx
+ movq %r9, %rax
+ addq %rcx, %r13
+ # rnd_1: 0 - 11
+ rorq $23, %rax
+ movq %r13, %rbx
+ movq %r10, %rcx
+ addq 24(%rsp), %r12
+ xorq %r11, %rcx
+ xorq %r9, %rax
+ andq %r9, %rcx
+ rorq $4, %rax
+ xorq %r11, %rcx
+ xorq %r9, %rax
+ addq %rcx, %r12
+ rorq $14, %rax
+ xorq %r14, %rbx
+ addq %rax, %r12
+ movq %r13, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r13, %rcx
+ xorq %r14, %rdx
+ rorq $6, %rcx
+ addq %r12, %r8
+ xorq %r13, %rcx
+ addq %rdx, %r12
+ rorq $28, %rcx
+ movq %r8, %rax
+ addq %rcx, %r12
+ # rnd_all_2: 4-5
+ # rnd_0: 0 - 11
+ rorq $23, %rax
+ movq %r12, %rdx
+ movq %r9, %rcx
+ addq 32(%rsp), %r11
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ andq %r8, %rcx
+ rorq $4, %rax
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ addq %rcx, %r11
+ rorq $14, %rax
+ xorq %r13, %rdx
+ addq %rax, %r11
+ movq %r12, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r12, %rcx
+ xorq %r13, %rbx
+ rorq $6, %rcx
+ addq %r11, %r15
+ xorq %r12, %rcx
+ addq %rbx, %r11
+ rorq $28, %rcx
+ movq %r15, %rax
+ addq %rcx, %r11
+ # rnd_1: 0 - 11
+ rorq $23, %rax
+ movq %r11, %rbx
+ movq %r8, %rcx
+ addq 40(%rsp), %r10
+ xorq %r9, %rcx
+ xorq %r15, %rax
+ andq %r15, %rcx
+ rorq $4, %rax
+ xorq %r9, %rcx
+ xorq %r15, %rax
+ addq %rcx, %r10
+ rorq $14, %rax
+ xorq %r12, %rbx
+ addq %rax, %r10
+ movq %r11, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r11, %rcx
+ xorq %r12, %rdx
+ rorq $6, %rcx
+ addq %r10, %r14
+ xorq %r11, %rcx
+ addq %rdx, %r10
+ rorq $28, %rcx
+ movq %r14, %rax
+ addq %rcx, %r10
+ # rnd_all_2: 6-7
+ # rnd_0: 0 - 11
+ rorq $23, %rax
+ movq %r10, %rdx
+ movq %r15, %rcx
+ addq 48(%rsp), %r9
+ xorq %r8, %rcx
+ xorq %r14, %rax
+ andq %r14, %rcx
+ rorq $4, %rax
+ xorq %r8, %rcx
+ xorq %r14, %rax
+ addq %rcx, %r9
+ rorq $14, %rax
+ xorq %r11, %rdx
+ addq %rax, %r9
+ movq %r10, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r10, %rcx
+ xorq %r11, %rbx
+ rorq $6, %rcx
+ addq %r9, %r13
+ xorq %r10, %rcx
+ addq %rbx, %r9
+ rorq $28, %rcx
+ movq %r13, %rax
+ addq %rcx, %r9
+ # rnd_1: 0 - 11
+ rorq $23, %rax
+ movq %r9, %rbx
+ movq %r14, %rcx
+ addq 56(%rsp), %r8
+ xorq %r15, %rcx
+ xorq %r13, %rax
+ andq %r13, %rcx
+ rorq $4, %rax
+ xorq %r15, %rcx
+ xorq %r13, %rax
+ addq %rcx, %r8
+ rorq $14, %rax
+ xorq %r10, %rbx
+ addq %rax, %r8
+ movq %r9, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r9, %rcx
+ xorq %r10, %rdx
+ rorq $6, %rcx
+ addq %r8, %r12
+ xorq %r9, %rcx
+ addq %rdx, %r8
+ rorq $28, %rcx
+ movq %r12, %rax
+ addq %rcx, %r8
+ # rnd_all_2: 8-9
+ # rnd_0: 0 - 11
+ rorq $23, %rax
+ movq %r8, %rdx
+ movq %r13, %rcx
+ addq 64(%rsp), %r15
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ andq %r12, %rcx
+ rorq $4, %rax
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ addq %rcx, %r15
+ rorq $14, %rax
+ xorq %r9, %rdx
+ addq %rax, %r15
+ movq %r8, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r8, %rcx
+ xorq %r9, %rbx
+ rorq $6, %rcx
+ addq %r15, %r11
+ xorq %r8, %rcx
+ addq %rbx, %r15
+ rorq $28, %rcx
+ movq %r11, %rax
+ addq %rcx, %r15
+ # rnd_1: 0 - 11
+ rorq $23, %rax
+ movq %r15, %rbx
+ movq %r12, %rcx
+ addq 72(%rsp), %r14
+ xorq %r13, %rcx
+ xorq %r11, %rax
+ andq %r11, %rcx
+ rorq $4, %rax
+ xorq %r13, %rcx
+ xorq %r11, %rax
+ addq %rcx, %r14
+ rorq $14, %rax
+ xorq %r8, %rbx
+ addq %rax, %r14
+ movq %r15, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r15, %rcx
+ xorq %r8, %rdx
+ rorq $6, %rcx
+ addq %r14, %r10
+ xorq %r15, %rcx
+ addq %rdx, %r14
+ rorq $28, %rcx
+ movq %r10, %rax
+ addq %rcx, %r14
+ # rnd_all_2: 10-11
+ # rnd_0: 0 - 11
+ rorq $23, %rax
+ movq %r14, %rdx
+ movq %r11, %rcx
+ addq 80(%rsp), %r13
+ xorq %r12, %rcx
+ xorq %r10, %rax
+ andq %r10, %rcx
+ rorq $4, %rax
+ xorq %r12, %rcx
+ xorq %r10, %rax
+ addq %rcx, %r13
+ rorq $14, %rax
+ xorq %r15, %rdx
+ addq %rax, %r13
+ movq %r14, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r14, %rcx
+ xorq %r15, %rbx
+ rorq $6, %rcx
+ addq %r13, %r9
+ xorq %r14, %rcx
+ addq %rbx, %r13
+ rorq $28, %rcx
+ movq %r9, %rax
+ addq %rcx, %r13
+ # rnd_1: 0 - 11
+ rorq $23, %rax
+ movq %r13, %rbx
+ movq %r10, %rcx
+ addq 88(%rsp), %r12
+ xorq %r11, %rcx
+ xorq %r9, %rax
+ andq %r9, %rcx
+ rorq $4, %rax
+ xorq %r11, %rcx
+ xorq %r9, %rax
+ addq %rcx, %r12
+ rorq $14, %rax
+ xorq %r14, %rbx
+ addq %rax, %r12
+ movq %r13, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r13, %rcx
+ xorq %r14, %rdx
+ rorq $6, %rcx
+ addq %r12, %r8
+ xorq %r13, %rcx
+ addq %rdx, %r12
+ rorq $28, %rcx
+ movq %r8, %rax
+ addq %rcx, %r12
+ # rnd_all_2: 12-13
+ # rnd_0: 0 - 11
+ rorq $23, %rax
+ movq %r12, %rdx
+ movq %r9, %rcx
+ addq 96(%rsp), %r11
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ andq %r8, %rcx
+ rorq $4, %rax
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ addq %rcx, %r11
+ rorq $14, %rax
+ xorq %r13, %rdx
+ addq %rax, %r11
+ movq %r12, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r12, %rcx
+ xorq %r13, %rbx
+ rorq $6, %rcx
+ addq %r11, %r15
+ xorq %r12, %rcx
+ addq %rbx, %r11
+ rorq $28, %rcx
+ movq %r15, %rax
+ addq %rcx, %r11
+ # rnd_1: 0 - 11
+ rorq $23, %rax
+ movq %r11, %rbx
+ movq %r8, %rcx
+ addq 104(%rsp), %r10
+ xorq %r9, %rcx
+ xorq %r15, %rax
+ andq %r15, %rcx
+ rorq $4, %rax
+ xorq %r9, %rcx
+ xorq %r15, %rax
+ addq %rcx, %r10
+ rorq $14, %rax
+ xorq %r12, %rbx
+ addq %rax, %r10
+ movq %r11, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r11, %rcx
+ xorq %r12, %rdx
+ rorq $6, %rcx
+ addq %r10, %r14
+ xorq %r11, %rcx
+ addq %rdx, %r10
+ rorq $28, %rcx
+ movq %r14, %rax
+ addq %rcx, %r10
+ # rnd_all_2: 14-15
+ # rnd_0: 0 - 11
+ rorq $23, %rax
+ movq %r10, %rdx
+ movq %r15, %rcx
+ addq 112(%rsp), %r9
+ xorq %r8, %rcx
+ xorq %r14, %rax
+ andq %r14, %rcx
+ rorq $4, %rax
+ xorq %r8, %rcx
+ xorq %r14, %rax
+ addq %rcx, %r9
+ rorq $14, %rax
+ xorq %r11, %rdx
+ addq %rax, %r9
+ movq %r10, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r10, %rcx
+ xorq %r11, %rbx
+ rorq $6, %rcx
+ addq %r9, %r13
+ xorq %r10, %rcx
+ addq %rbx, %r9
+ rorq $28, %rcx
+ movq %r13, %rax
+ addq %rcx, %r9
+ # rnd_1: 0 - 11
+ rorq $23, %rax
+ movq %r9, %rbx
+ movq %r14, %rcx
+ addq 120(%rsp), %r8
+ xorq %r15, %rcx
+ xorq %r13, %rax
+ andq %r13, %rcx
+ rorq $4, %rax
+ xorq %r15, %rcx
+ xorq %r13, %rax
+ addq %rcx, %r8
+ rorq $14, %rax
+ xorq %r10, %rbx
+ addq %rax, %r8
+ movq %r9, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r9, %rcx
+ xorq %r10, %rdx
+ rorq $6, %rcx
+ addq %r8, %r12
+ xorq %r9, %rcx
+ addq %rdx, %r8
+ rorq $28, %rcx
+ movq %r12, %rax
+ addq %rcx, %r8
+ addq (%rdi), %r8
+ addq 8(%rdi), %r9
+ addq 16(%rdi), %r10
+ addq 24(%rdi), %r11
+ addq 32(%rdi), %r12
+ addq 40(%rdi), %r13
+ addq 48(%rdi), %r14
+ addq 56(%rdi), %r15
+ leaq L_avx1_sha512_k(%rip), %rdx
+ addq $0x80, %rsi
+ subl $0x80, %ebp
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq %r12, 32(%rdi)
+ movq %r13, 40(%rdi)
+ movq %r14, 48(%rdi)
+ movq %r15, 56(%rdi)
+ jnz L_sha512_len_avx1_begin
+ xorq %rax, %rax
+ vzeroupper
+ addq $0x90, %rsp
+ popq %rbp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size Transform_Sha512_AVX1_Len,.-Transform_Sha512_AVX1_Len
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_rorx_sha512_k:
+.quad 0x428a2f98d728ae22,0x7137449123ef65cd
+.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad 0x3956c25bf348b538,0x59f111f1b605d019
+.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad 0xd807aa98a3030242,0x12835b0145706fbe
+.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad 0x9bdc06a725c71235,0xc19bf174cf692694
+.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad 0xfc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad 0x983e5152ee66dfab,0xa831c66d2db43210
+.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad 0x6ca6351e003826f,0x142929670a0e6e70
+.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad 0x81c2c92e47edaee6,0x92722c851482353b
+.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad 0xd192e819d6ef5218,0xd69906245565a910
+.quad 0xf40e35855771202a,0x106aa07032bbd1b8
+.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad 0x90befffa23631e28,0xa4506cebde82bde9
+.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad 0xca273eceea26619c,0xd186b8c721c0c207
+.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad 0x6f067aa72176fba,0xa637dc5a2c898a6
+.quad 0x113f9804bef90dae,0x1b710b35131c471b
+.quad 0x28db77f523047d84,0x32caab7b40c72493
+.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx1_rorx_sha512_flip_mask:
+.quad 0x1020304050607, 0x8090a0b0c0d0e0f
+#ifndef __APPLE__
+.text
+.globl Transform_Sha512_AVX1_RORX
+.type Transform_Sha512_AVX1_RORX,@function
+.align 4
+Transform_Sha512_AVX1_RORX:
+#else
+.section __TEXT,__text
+.globl _Transform_Sha512_AVX1_RORX
+.p2align 2
+_Transform_Sha512_AVX1_RORX:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $0x88, %rsp
+ leaq 64(%rdi), %rax
+ vmovdqa L_avx1_rorx_sha512_flip_mask(%rip), %xmm14
+ movq (%rdi), %r8
+ movq 8(%rdi), %r9
+ movq 16(%rdi), %r10
+ movq 24(%rdi), %r11
+ movq 32(%rdi), %r12
+ movq 40(%rdi), %r13
+ movq 48(%rdi), %r14
+ movq 56(%rdi), %r15
+ vmovdqu (%rax), %xmm0
+ vmovdqu 16(%rax), %xmm1
+ vpshufb %xmm14, %xmm0, %xmm0
+ vpshufb %xmm14, %xmm1, %xmm1
+ vmovdqu 32(%rax), %xmm2
+ vmovdqu 48(%rax), %xmm3
+ vpshufb %xmm14, %xmm2, %xmm2
+ vpshufb %xmm14, %xmm3, %xmm3
+ vmovdqu 64(%rax), %xmm4
+ vmovdqu 80(%rax), %xmm5
+ vpshufb %xmm14, %xmm4, %xmm4
+ vpshufb %xmm14, %xmm5, %xmm5
+ vmovdqu 96(%rax), %xmm6
+ vmovdqu 112(%rax), %xmm7
+ vpshufb %xmm14, %xmm6, %xmm6
+ vpshufb %xmm14, %xmm7, %xmm7
+ movl $4, 128(%rsp)
+ leaq L_avx1_rorx_sha512_k(%rip), %rsi
+ movq %r9, %rbx
+ xorq %rdx, %rdx
+ xorq %r10, %rbx
+ vpaddq (%rsi), %xmm0, %xmm8
+ vpaddq 16(%rsi), %xmm1, %xmm9
+ vmovdqu %xmm8, (%rsp)
+ vmovdqu %xmm9, 16(%rsp)
+ vpaddq 32(%rsi), %xmm2, %xmm8
+ vpaddq 48(%rsi), %xmm3, %xmm9
+ vmovdqu %xmm8, 32(%rsp)
+ vmovdqu %xmm9, 48(%rsp)
+ vpaddq 64(%rsi), %xmm4, %xmm8
+ vpaddq 80(%rsi), %xmm5, %xmm9
+ vmovdqu %xmm8, 64(%rsp)
+ vmovdqu %xmm9, 80(%rsp)
+ vpaddq 96(%rsi), %xmm6, %xmm8
+ vpaddq 112(%rsi), %xmm7, %xmm9
+ vmovdqu %xmm8, 96(%rsp)
+ vmovdqu %xmm9, 112(%rsp)
+ # Start of 16 rounds
+L_sha256_len_avx1_rorx_start:
+ addq $0x80, %rsi
+ # msg_sched: 0-1
+ # rnd_0: 0 - 0
+ rorxq $14, %r12, %rax
+ rorxq $18, %r12, %rcx
+ addq %rdx, %r8
+ vpalignr $8, %xmm0, %xmm1, %xmm12
+ vpalignr $8, %xmm4, %xmm5, %xmm13
+ # rnd_0: 1 - 1
+ addq (%rsp), %r15
+ movq %r13, %rdx
+ xorq %rax, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 2
+ xorq %r14, %rdx
+ rorxq $41, %r12, %rax
+ xorq %rcx, %rax
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 3 - 3
+ andq %r12, %rdx
+ addq %rax, %r15
+ rorxq $28, %r8, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 4 - 4
+ rorxq $34, %r8, %rcx
+ xorq %r14, %rdx
+ xorq %rax, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ rorxq $39, %r8, %rax
+ addq %rdx, %r15
+ xorq %rcx, %rax
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm0, %xmm13, %xmm0
+ # rnd_0: 6 - 7
+ movq %r9, %rdx
+ addq %r15, %r11
+ xorq %r8, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r15
+ xorq %r9, %rbx
+ vpaddq %xmm0, %xmm8, %xmm0
+ # rnd_1: 0 - 0
+ rorxq $14, %r11, %rax
+ rorxq $18, %r11, %rcx
+ addq %rbx, %r15
+ vpsrlq $19, %xmm7, %xmm8
+ vpsllq $45, %xmm7, %xmm9
+ # rnd_1: 1 - 1
+ addq 8(%rsp), %r14
+ movq %r12, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %xmm7, %xmm10
+ vpsllq $3, %xmm7, %xmm11
+ # rnd_1: 2 - 2
+ xorq %r13, %rbx
+ rorxq $41, %r11, %rax
+ xorq %rcx, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 3 - 4
+ andq %r11, %rbx
+ addq %rax, %r14
+ rorxq $28, %r15, %rax
+ rorxq $34, %r15, %rcx
+ xorq %r13, %rbx
+ xorq %rax, %rcx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm7, %xmm11
+ # rnd_1: 5 - 6
+ rorxq $39, %r15, %rax
+ addq %rbx, %r14
+ xorq %rcx, %rax
+ movq %r8, %rbx
+ addq %r14, %r10
+ xorq %r15, %rbx
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 7 - 7
+ andq %rbx, %rdx
+ addq %rax, %r14
+ xorq %r8, %rdx
+ vpaddq %xmm0, %xmm8, %xmm0
+ # msg_sched done: 0-3
+ # msg_sched: 2-3
+ # rnd_0: 0 - 0
+ rorxq $14, %r10, %rax
+ rorxq $18, %r10, %rcx
+ addq %rdx, %r14
+ vpalignr $8, %xmm1, %xmm2, %xmm12
+ vpalignr $8, %xmm5, %xmm6, %xmm13
+ # rnd_0: 1 - 1
+ addq 16(%rsp), %r13
+ movq %r11, %rdx
+ xorq %rax, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 2
+ xorq %r12, %rdx
+ rorxq $41, %r10, %rax
+ xorq %rcx, %rax
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 3 - 3
+ andq %r10, %rdx
+ addq %rax, %r13
+ rorxq $28, %r14, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 4 - 4
+ rorxq $34, %r14, %rcx
+ xorq %r12, %rdx
+ xorq %rax, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ rorxq $39, %r14, %rax
+ addq %rdx, %r13
+ xorq %rcx, %rax
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm1, %xmm13, %xmm1
+ # rnd_0: 6 - 7
+ movq %r15, %rdx
+ addq %r13, %r9
+ xorq %r14, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r13
+ xorq %r15, %rbx
+ vpaddq %xmm1, %xmm8, %xmm1
+ # rnd_1: 0 - 0
+ rorxq $14, %r9, %rax
+ rorxq $18, %r9, %rcx
+ addq %rbx, %r13
+ vpsrlq $19, %xmm0, %xmm8
+ vpsllq $45, %xmm0, %xmm9
+ # rnd_1: 1 - 1
+ addq 24(%rsp), %r12
+ movq %r10, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %xmm0, %xmm10
+ vpsllq $3, %xmm0, %xmm11
+ # rnd_1: 2 - 2
+ xorq %r11, %rbx
+ rorxq $41, %r9, %rax
+ xorq %rcx, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 3 - 4
+ andq %r9, %rbx
+ addq %rax, %r12
+ rorxq $28, %r13, %rax
+ rorxq $34, %r13, %rcx
+ xorq %r11, %rbx
+ xorq %rax, %rcx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm0, %xmm11
+ # rnd_1: 5 - 6
+ rorxq $39, %r13, %rax
+ addq %rbx, %r12
+ xorq %rcx, %rax
+ movq %r14, %rbx
+ addq %r12, %r8
+ xorq %r13, %rbx
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 7 - 7
+ andq %rbx, %rdx
+ addq %rax, %r12
+ xorq %r14, %rdx
+ vpaddq %xmm1, %xmm8, %xmm1
+ # msg_sched done: 2-5
+ # msg_sched: 4-5
+ # rnd_0: 0 - 0
+ rorxq $14, %r8, %rax
+ rorxq $18, %r8, %rcx
+ addq %rdx, %r12
+ vpalignr $8, %xmm2, %xmm3, %xmm12
+ vpalignr $8, %xmm6, %xmm7, %xmm13
+ # rnd_0: 1 - 1
+ addq 32(%rsp), %r11
+ movq %r9, %rdx
+ xorq %rax, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 2
+ xorq %r10, %rdx
+ rorxq $41, %r8, %rax
+ xorq %rcx, %rax
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 3 - 3
+ andq %r8, %rdx
+ addq %rax, %r11
+ rorxq $28, %r12, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 4 - 4
+ rorxq $34, %r12, %rcx
+ xorq %r10, %rdx
+ xorq %rax, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ rorxq $39, %r12, %rax
+ addq %rdx, %r11
+ xorq %rcx, %rax
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm2, %xmm13, %xmm2
+ # rnd_0: 6 - 7
+ movq %r13, %rdx
+ addq %r11, %r15
+ xorq %r12, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r11
+ xorq %r13, %rbx
+ vpaddq %xmm2, %xmm8, %xmm2
+ # rnd_1: 0 - 0
+ rorxq $14, %r15, %rax
+ rorxq $18, %r15, %rcx
+ addq %rbx, %r11
+ vpsrlq $19, %xmm1, %xmm8
+ vpsllq $45, %xmm1, %xmm9
+ # rnd_1: 1 - 1
+ addq 40(%rsp), %r10
+ movq %r8, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %xmm1, %xmm10
+ vpsllq $3, %xmm1, %xmm11
+ # rnd_1: 2 - 2
+ xorq %r9, %rbx
+ rorxq $41, %r15, %rax
+ xorq %rcx, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 3 - 4
+ andq %r15, %rbx
+ addq %rax, %r10
+ rorxq $28, %r11, %rax
+ rorxq $34, %r11, %rcx
+ xorq %r9, %rbx
+ xorq %rax, %rcx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm1, %xmm11
+ # rnd_1: 5 - 6
+ rorxq $39, %r11, %rax
+ addq %rbx, %r10
+ xorq %rcx, %rax
+ movq %r12, %rbx
+ addq %r10, %r14
+ xorq %r11, %rbx
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 7 - 7
+ andq %rbx, %rdx
+ addq %rax, %r10
+ xorq %r12, %rdx
+ vpaddq %xmm2, %xmm8, %xmm2
+ # msg_sched done: 4-7
+ # msg_sched: 6-7
+ # rnd_0: 0 - 0
+ rorxq $14, %r14, %rax
+ rorxq $18, %r14, %rcx
+ addq %rdx, %r10
+ vpalignr $8, %xmm3, %xmm4, %xmm12
+ vpalignr $8, %xmm7, %xmm0, %xmm13
+ # rnd_0: 1 - 1
+ addq 48(%rsp), %r9
+ movq %r15, %rdx
+ xorq %rax, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 2
+ xorq %r8, %rdx
+ rorxq $41, %r14, %rax
+ xorq %rcx, %rax
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 3 - 3
+ andq %r14, %rdx
+ addq %rax, %r9
+ rorxq $28, %r10, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 4 - 4
+ rorxq $34, %r10, %rcx
+ xorq %r8, %rdx
+ xorq %rax, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ rorxq $39, %r10, %rax
+ addq %rdx, %r9
+ xorq %rcx, %rax
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm3, %xmm13, %xmm3
+ # rnd_0: 6 - 7
+ movq %r11, %rdx
+ addq %r9, %r13
+ xorq %r10, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r9
+ xorq %r11, %rbx
+ vpaddq %xmm3, %xmm8, %xmm3
+ # rnd_1: 0 - 0
+ rorxq $14, %r13, %rax
+ rorxq $18, %r13, %rcx
+ addq %rbx, %r9
+ vpsrlq $19, %xmm2, %xmm8
+ vpsllq $45, %xmm2, %xmm9
+ # rnd_1: 1 - 1
+ addq 56(%rsp), %r8
+ movq %r14, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %xmm2, %xmm10
+ vpsllq $3, %xmm2, %xmm11
+ # rnd_1: 2 - 2
+ xorq %r15, %rbx
+ rorxq $41, %r13, %rax
+ xorq %rcx, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 3 - 4
+ andq %r13, %rbx
+ addq %rax, %r8
+ rorxq $28, %r9, %rax
+ rorxq $34, %r9, %rcx
+ xorq %r15, %rbx
+ xorq %rax, %rcx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm2, %xmm11
+ # rnd_1: 5 - 6
+ rorxq $39, %r9, %rax
+ addq %rbx, %r8
+ xorq %rcx, %rax
+ movq %r10, %rbx
+ addq %r8, %r12
+ xorq %r9, %rbx
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 7 - 7
+ andq %rbx, %rdx
+ addq %rax, %r8
+ xorq %r10, %rdx
+ vpaddq %xmm3, %xmm8, %xmm3
+ # msg_sched done: 6-9
+ # msg_sched: 8-9
+ # rnd_0: 0 - 0
+ rorxq $14, %r12, %rax
+ rorxq $18, %r12, %rcx
+ addq %rdx, %r8
+ vpalignr $8, %xmm4, %xmm5, %xmm12
+ vpalignr $8, %xmm0, %xmm1, %xmm13
+ # rnd_0: 1 - 1
+ addq 64(%rsp), %r15
+ movq %r13, %rdx
+ xorq %rax, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 2
+ xorq %r14, %rdx
+ rorxq $41, %r12, %rax
+ xorq %rcx, %rax
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 3 - 3
+ andq %r12, %rdx
+ addq %rax, %r15
+ rorxq $28, %r8, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 4 - 4
+ rorxq $34, %r8, %rcx
+ xorq %r14, %rdx
+ xorq %rax, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ rorxq $39, %r8, %rax
+ addq %rdx, %r15
+ xorq %rcx, %rax
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm4, %xmm13, %xmm4
+ # rnd_0: 6 - 7
+ movq %r9, %rdx
+ addq %r15, %r11
+ xorq %r8, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r15
+ xorq %r9, %rbx
+ vpaddq %xmm4, %xmm8, %xmm4
+ # rnd_1: 0 - 0
+ rorxq $14, %r11, %rax
+ rorxq $18, %r11, %rcx
+ addq %rbx, %r15
+ vpsrlq $19, %xmm3, %xmm8
+ vpsllq $45, %xmm3, %xmm9
+ # rnd_1: 1 - 1
+ addq 72(%rsp), %r14
+ movq %r12, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %xmm3, %xmm10
+ vpsllq $3, %xmm3, %xmm11
+ # rnd_1: 2 - 2
+ xorq %r13, %rbx
+ rorxq $41, %r11, %rax
+ xorq %rcx, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 3 - 4
+ andq %r11, %rbx
+ addq %rax, %r14
+ rorxq $28, %r15, %rax
+ rorxq $34, %r15, %rcx
+ xorq %r13, %rbx
+ xorq %rax, %rcx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm3, %xmm11
+ # rnd_1: 5 - 6
+ rorxq $39, %r15, %rax
+ addq %rbx, %r14
+ xorq %rcx, %rax
+ movq %r8, %rbx
+ addq %r14, %r10
+ xorq %r15, %rbx
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 7 - 7
+ andq %rbx, %rdx
+ addq %rax, %r14
+ xorq %r8, %rdx
+ vpaddq %xmm4, %xmm8, %xmm4
+ # msg_sched done: 8-11
+ # msg_sched: 10-11
+ # rnd_0: 0 - 0
+ rorxq $14, %r10, %rax
+ rorxq $18, %r10, %rcx
+ addq %rdx, %r14
+ vpalignr $8, %xmm5, %xmm6, %xmm12
+ vpalignr $8, %xmm1, %xmm2, %xmm13
+ # rnd_0: 1 - 1
+ addq 80(%rsp), %r13
+ movq %r11, %rdx
+ xorq %rax, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 2
+ xorq %r12, %rdx
+ rorxq $41, %r10, %rax
+ xorq %rcx, %rax
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 3 - 3
+ andq %r10, %rdx
+ addq %rax, %r13
+ rorxq $28, %r14, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 4 - 4
+ rorxq $34, %r14, %rcx
+ xorq %r12, %rdx
+ xorq %rax, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ rorxq $39, %r14, %rax
+ addq %rdx, %r13
+ xorq %rcx, %rax
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm5, %xmm13, %xmm5
+ # rnd_0: 6 - 7
+ movq %r15, %rdx
+ addq %r13, %r9
+ xorq %r14, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r13
+ xorq %r15, %rbx
+ vpaddq %xmm5, %xmm8, %xmm5
+ # rnd_1: 0 - 0
+ rorxq $14, %r9, %rax
+ rorxq $18, %r9, %rcx
+ addq %rbx, %r13
+ vpsrlq $19, %xmm4, %xmm8
+ vpsllq $45, %xmm4, %xmm9
+ # rnd_1: 1 - 1
+ addq 88(%rsp), %r12
+ movq %r10, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %xmm4, %xmm10
+ vpsllq $3, %xmm4, %xmm11
+ # rnd_1: 2 - 2
+ xorq %r11, %rbx
+ rorxq $41, %r9, %rax
+ xorq %rcx, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 3 - 4
+ andq %r9, %rbx
+ addq %rax, %r12
+ rorxq $28, %r13, %rax
+ rorxq $34, %r13, %rcx
+ xorq %r11, %rbx
+ xorq %rax, %rcx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm4, %xmm11
+ # rnd_1: 5 - 6
+ rorxq $39, %r13, %rax
+ addq %rbx, %r12
+ xorq %rcx, %rax
+ movq %r14, %rbx
+ addq %r12, %r8
+ xorq %r13, %rbx
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 7 - 7
+ andq %rbx, %rdx
+ addq %rax, %r12
+ xorq %r14, %rdx
+ vpaddq %xmm5, %xmm8, %xmm5
+ # msg_sched done: 10-13
+ # msg_sched: 12-13
+ # rnd_0: 0 - 0
+ rorxq $14, %r8, %rax
+ rorxq $18, %r8, %rcx
+ addq %rdx, %r12
+ vpalignr $8, %xmm6, %xmm7, %xmm12
+ vpalignr $8, %xmm2, %xmm3, %xmm13
+ # rnd_0: 1 - 1
+ addq 96(%rsp), %r11
+ movq %r9, %rdx
+ xorq %rax, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 2
+ xorq %r10, %rdx
+ rorxq $41, %r8, %rax
+ xorq %rcx, %rax
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 3 - 3
+ andq %r8, %rdx
+ addq %rax, %r11
+ rorxq $28, %r12, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 4 - 4
+ rorxq $34, %r12, %rcx
+ xorq %r10, %rdx
+ xorq %rax, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ rorxq $39, %r12, %rax
+ addq %rdx, %r11
+ xorq %rcx, %rax
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm6, %xmm13, %xmm6
+ # rnd_0: 6 - 7
+ movq %r13, %rdx
+ addq %r11, %r15
+ xorq %r12, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r11
+ xorq %r13, %rbx
+ vpaddq %xmm6, %xmm8, %xmm6
+ # rnd_1: 0 - 0
+ rorxq $14, %r15, %rax
+ rorxq $18, %r15, %rcx
+ addq %rbx, %r11
+ vpsrlq $19, %xmm5, %xmm8
+ vpsllq $45, %xmm5, %xmm9
+ # rnd_1: 1 - 1
+ addq 104(%rsp), %r10
+ movq %r8, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %xmm5, %xmm10
+ vpsllq $3, %xmm5, %xmm11
+ # rnd_1: 2 - 2
+ xorq %r9, %rbx
+ rorxq $41, %r15, %rax
+ xorq %rcx, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 3 - 4
+ andq %r15, %rbx
+ addq %rax, %r10
+ rorxq $28, %r11, %rax
+ rorxq $34, %r11, %rcx
+ xorq %r9, %rbx
+ xorq %rax, %rcx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm5, %xmm11
+ # rnd_1: 5 - 6
+ rorxq $39, %r11, %rax
+ addq %rbx, %r10
+ xorq %rcx, %rax
+ movq %r12, %rbx
+ addq %r10, %r14
+ xorq %r11, %rbx
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 7 - 7
+ andq %rbx, %rdx
+ addq %rax, %r10
+ xorq %r12, %rdx
+ vpaddq %xmm6, %xmm8, %xmm6
+ # msg_sched done: 12-15
+ # msg_sched: 14-15
+ # rnd_0: 0 - 0
+ rorxq $14, %r14, %rax
+ rorxq $18, %r14, %rcx
+ addq %rdx, %r10
+ vpalignr $8, %xmm7, %xmm0, %xmm12
+ vpalignr $8, %xmm3, %xmm4, %xmm13
+ # rnd_0: 1 - 1
+ addq 112(%rsp), %r9
+ movq %r15, %rdx
+ xorq %rax, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 2
+ xorq %r8, %rdx
+ rorxq $41, %r14, %rax
+ xorq %rcx, %rax
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 3 - 3
+ andq %r14, %rdx
+ addq %rax, %r9
+ rorxq $28, %r10, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 4 - 4
+ rorxq $34, %r10, %rcx
+ xorq %r8, %rdx
+ xorq %rax, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ rorxq $39, %r10, %rax
+ addq %rdx, %r9
+ xorq %rcx, %rax
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm7, %xmm13, %xmm7
+ # rnd_0: 6 - 7
+ movq %r11, %rdx
+ addq %r9, %r13
+ xorq %r10, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r9
+ xorq %r11, %rbx
+ vpaddq %xmm7, %xmm8, %xmm7
+ # rnd_1: 0 - 0
+ rorxq $14, %r13, %rax
+ rorxq $18, %r13, %rcx
+ addq %rbx, %r9
+ vpsrlq $19, %xmm6, %xmm8
+ vpsllq $45, %xmm6, %xmm9
+ # rnd_1: 1 - 1
+ addq 120(%rsp), %r8
+ movq %r14, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %xmm6, %xmm10
+ vpsllq $3, %xmm6, %xmm11
+ # rnd_1: 2 - 2
+ xorq %r15, %rbx
+ rorxq $41, %r13, %rax
+ xorq %rcx, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 3 - 4
+ andq %r13, %rbx
+ addq %rax, %r8
+ rorxq $28, %r9, %rax
+ rorxq $34, %r9, %rcx
+ xorq %r15, %rbx
+ xorq %rax, %rcx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm6, %xmm11
+ # rnd_1: 5 - 6
+ rorxq $39, %r9, %rax
+ addq %rbx, %r8
+ xorq %rcx, %rax
+ movq %r10, %rbx
+ addq %r8, %r12
+ xorq %r9, %rbx
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 7 - 7
+ andq %rbx, %rdx
+ addq %rax, %r8
+ xorq %r10, %rdx
+ vpaddq %xmm7, %xmm8, %xmm7
+ # msg_sched done: 14-17
+ vpaddq (%rsi), %xmm0, %xmm8
+ vpaddq 16(%rsi), %xmm1, %xmm9
+ vmovdqu %xmm8, (%rsp)
+ vmovdqu %xmm9, 16(%rsp)
+ vpaddq 32(%rsi), %xmm2, %xmm8
+ vpaddq 48(%rsi), %xmm3, %xmm9
+ vmovdqu %xmm8, 32(%rsp)
+ vmovdqu %xmm9, 48(%rsp)
+ vpaddq 64(%rsi), %xmm4, %xmm8
+ vpaddq 80(%rsi), %xmm5, %xmm9
+ vmovdqu %xmm8, 64(%rsp)
+ vmovdqu %xmm9, 80(%rsp)
+ vpaddq 96(%rsi), %xmm6, %xmm8
+ vpaddq 112(%rsi), %xmm7, %xmm9
+ vmovdqu %xmm8, 96(%rsp)
+ vmovdqu %xmm9, 112(%rsp)
+ subl $0x01, 128(%rsp)
+ jne L_sha256_len_avx1_rorx_start
+ # rnd_all_2: 0-1
+ # rnd_0: 0 - 7
+ rorxq $14, %r12, %rax
+ rorxq $18, %r12, %rcx
+ addq %rdx, %r8
+ addq (%rsp), %r15
+ movq %r13, %rdx
+ xorq %rax, %rcx
+ xorq %r14, %rdx
+ rorxq $41, %r12, %rax
+ xorq %rcx, %rax
+ andq %r12, %rdx
+ addq %rax, %r15
+ rorxq $28, %r8, %rax
+ rorxq $34, %r8, %rcx
+ xorq %r14, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r8, %rax
+ addq %rdx, %r15
+ xorq %rcx, %rax
+ movq %r9, %rdx
+ addq %r15, %r11
+ xorq %r8, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r15
+ xorq %r9, %rbx
+ # rnd_1: 0 - 7
+ rorxq $14, %r11, %rax
+ rorxq $18, %r11, %rcx
+ addq %rbx, %r15
+ addq 8(%rsp), %r14
+ movq %r12, %rbx
+ xorq %rax, %rcx
+ xorq %r13, %rbx
+ rorxq $41, %r11, %rax
+ xorq %rcx, %rax
+ andq %r11, %rbx
+ addq %rax, %r14
+ rorxq $28, %r15, %rax
+ rorxq $34, %r15, %rcx
+ xorq %r13, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r15, %rax
+ addq %rbx, %r14
+ xorq %rcx, %rax
+ movq %r8, %rbx
+ addq %r14, %r10
+ xorq %r15, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r14
+ xorq %r8, %rdx
+ # rnd_all_2: 2-3
+ # rnd_0: 0 - 7
+ rorxq $14, %r10, %rax
+ rorxq $18, %r10, %rcx
+ addq %rdx, %r14
+ addq 16(%rsp), %r13
+ movq %r11, %rdx
+ xorq %rax, %rcx
+ xorq %r12, %rdx
+ rorxq $41, %r10, %rax
+ xorq %rcx, %rax
+ andq %r10, %rdx
+ addq %rax, %r13
+ rorxq $28, %r14, %rax
+ rorxq $34, %r14, %rcx
+ xorq %r12, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r14, %rax
+ addq %rdx, %r13
+ xorq %rcx, %rax
+ movq %r15, %rdx
+ addq %r13, %r9
+ xorq %r14, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r13
+ xorq %r15, %rbx
+ # rnd_1: 0 - 7
+ rorxq $14, %r9, %rax
+ rorxq $18, %r9, %rcx
+ addq %rbx, %r13
+ addq 24(%rsp), %r12
+ movq %r10, %rbx
+ xorq %rax, %rcx
+ xorq %r11, %rbx
+ rorxq $41, %r9, %rax
+ xorq %rcx, %rax
+ andq %r9, %rbx
+ addq %rax, %r12
+ rorxq $28, %r13, %rax
+ rorxq $34, %r13, %rcx
+ xorq %r11, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r13, %rax
+ addq %rbx, %r12
+ xorq %rcx, %rax
+ movq %r14, %rbx
+ addq %r12, %r8
+ xorq %r13, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r12
+ xorq %r14, %rdx
+ # rnd_all_2: 4-5
+ # rnd_0: 0 - 7
+ rorxq $14, %r8, %rax
+ rorxq $18, %r8, %rcx
+ addq %rdx, %r12
+ addq 32(%rsp), %r11
+ movq %r9, %rdx
+ xorq %rax, %rcx
+ xorq %r10, %rdx
+ rorxq $41, %r8, %rax
+ xorq %rcx, %rax
+ andq %r8, %rdx
+ addq %rax, %r11
+ rorxq $28, %r12, %rax
+ rorxq $34, %r12, %rcx
+ xorq %r10, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r12, %rax
+ addq %rdx, %r11
+ xorq %rcx, %rax
+ movq %r13, %rdx
+ addq %r11, %r15
+ xorq %r12, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r11
+ xorq %r13, %rbx
+ # rnd_1: 0 - 7
+ rorxq $14, %r15, %rax
+ rorxq $18, %r15, %rcx
+ addq %rbx, %r11
+ addq 40(%rsp), %r10
+ movq %r8, %rbx
+ xorq %rax, %rcx
+ xorq %r9, %rbx
+ rorxq $41, %r15, %rax
+ xorq %rcx, %rax
+ andq %r15, %rbx
+ addq %rax, %r10
+ rorxq $28, %r11, %rax
+ rorxq $34, %r11, %rcx
+ xorq %r9, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r11, %rax
+ addq %rbx, %r10
+ xorq %rcx, %rax
+ movq %r12, %rbx
+ addq %r10, %r14
+ xorq %r11, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r10
+ xorq %r12, %rdx
+ # rnd_all_2: 6-7
+ # rnd_0: 0 - 7
+ rorxq $14, %r14, %rax
+ rorxq $18, %r14, %rcx
+ addq %rdx, %r10
+ addq 48(%rsp), %r9
+ movq %r15, %rdx
+ xorq %rax, %rcx
+ xorq %r8, %rdx
+ rorxq $41, %r14, %rax
+ xorq %rcx, %rax
+ andq %r14, %rdx
+ addq %rax, %r9
+ rorxq $28, %r10, %rax
+ rorxq $34, %r10, %rcx
+ xorq %r8, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r10, %rax
+ addq %rdx, %r9
+ xorq %rcx, %rax
+ movq %r11, %rdx
+ addq %r9, %r13
+ xorq %r10, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r9
+ xorq %r11, %rbx
+ # rnd_1: 0 - 7
+ rorxq $14, %r13, %rax
+ rorxq $18, %r13, %rcx
+ addq %rbx, %r9
+ addq 56(%rsp), %r8
+ movq %r14, %rbx
+ xorq %rax, %rcx
+ xorq %r15, %rbx
+ rorxq $41, %r13, %rax
+ xorq %rcx, %rax
+ andq %r13, %rbx
+ addq %rax, %r8
+ rorxq $28, %r9, %rax
+ rorxq $34, %r9, %rcx
+ xorq %r15, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r9, %rax
+ addq %rbx, %r8
+ xorq %rcx, %rax
+ movq %r10, %rbx
+ addq %r8, %r12
+ xorq %r9, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r8
+ xorq %r10, %rdx
+ # rnd_all_2: 8-9
+ # rnd_0: 0 - 7
+ rorxq $14, %r12, %rax
+ rorxq $18, %r12, %rcx
+ addq %rdx, %r8
+ addq 64(%rsp), %r15
+ movq %r13, %rdx
+ xorq %rax, %rcx
+ xorq %r14, %rdx
+ rorxq $41, %r12, %rax
+ xorq %rcx, %rax
+ andq %r12, %rdx
+ addq %rax, %r15
+ rorxq $28, %r8, %rax
+ rorxq $34, %r8, %rcx
+ xorq %r14, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r8, %rax
+ addq %rdx, %r15
+ xorq %rcx, %rax
+ movq %r9, %rdx
+ addq %r15, %r11
+ xorq %r8, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r15
+ xorq %r9, %rbx
+ # rnd_1: 0 - 7
+ rorxq $14, %r11, %rax
+ rorxq $18, %r11, %rcx
+ addq %rbx, %r15
+ addq 72(%rsp), %r14
+ movq %r12, %rbx
+ xorq %rax, %rcx
+ xorq %r13, %rbx
+ rorxq $41, %r11, %rax
+ xorq %rcx, %rax
+ andq %r11, %rbx
+ addq %rax, %r14
+ rorxq $28, %r15, %rax
+ rorxq $34, %r15, %rcx
+ xorq %r13, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r15, %rax
+ addq %rbx, %r14
+ xorq %rcx, %rax
+ movq %r8, %rbx
+ addq %r14, %r10
+ xorq %r15, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r14
+ xorq %r8, %rdx
+ # rnd_all_2: 10-11
+ # rnd_0: 0 - 7
+ rorxq $14, %r10, %rax
+ rorxq $18, %r10, %rcx
+ addq %rdx, %r14
+ addq 80(%rsp), %r13
+ movq %r11, %rdx
+ xorq %rax, %rcx
+ xorq %r12, %rdx
+ rorxq $41, %r10, %rax
+ xorq %rcx, %rax
+ andq %r10, %rdx
+ addq %rax, %r13
+ rorxq $28, %r14, %rax
+ rorxq $34, %r14, %rcx
+ xorq %r12, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r14, %rax
+ addq %rdx, %r13
+ xorq %rcx, %rax
+ movq %r15, %rdx
+ addq %r13, %r9
+ xorq %r14, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r13
+ xorq %r15, %rbx
+ # rnd_1: 0 - 7
+ rorxq $14, %r9, %rax
+ rorxq $18, %r9, %rcx
+ addq %rbx, %r13
+ addq 88(%rsp), %r12
+ movq %r10, %rbx
+ xorq %rax, %rcx
+ xorq %r11, %rbx
+ rorxq $41, %r9, %rax
+ xorq %rcx, %rax
+ andq %r9, %rbx
+ addq %rax, %r12
+ rorxq $28, %r13, %rax
+ rorxq $34, %r13, %rcx
+ xorq %r11, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r13, %rax
+ addq %rbx, %r12
+ xorq %rcx, %rax
+ movq %r14, %rbx
+ addq %r12, %r8
+ xorq %r13, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r12
+ xorq %r14, %rdx
+ # rnd_all_2: 12-13
+ # rnd_0: 0 - 7
+ rorxq $14, %r8, %rax
+ rorxq $18, %r8, %rcx
+ addq %rdx, %r12
+ addq 96(%rsp), %r11
+ movq %r9, %rdx
+ xorq %rax, %rcx
+ xorq %r10, %rdx
+ rorxq $41, %r8, %rax
+ xorq %rcx, %rax
+ andq %r8, %rdx
+ addq %rax, %r11
+ rorxq $28, %r12, %rax
+ rorxq $34, %r12, %rcx
+ xorq %r10, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r12, %rax
+ addq %rdx, %r11
+ xorq %rcx, %rax
+ movq %r13, %rdx
+ addq %r11, %r15
+ xorq %r12, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r11
+ xorq %r13, %rbx
+ # rnd_1: 0 - 7
+ rorxq $14, %r15, %rax
+ rorxq $18, %r15, %rcx
+ addq %rbx, %r11
+ addq 104(%rsp), %r10
+ movq %r8, %rbx
+ xorq %rax, %rcx
+ xorq %r9, %rbx
+ rorxq $41, %r15, %rax
+ xorq %rcx, %rax
+ andq %r15, %rbx
+ addq %rax, %r10
+ rorxq $28, %r11, %rax
+ rorxq $34, %r11, %rcx
+ xorq %r9, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r11, %rax
+ addq %rbx, %r10
+ xorq %rcx, %rax
+ movq %r12, %rbx
+ addq %r10, %r14
+ xorq %r11, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r10
+ xorq %r12, %rdx
+ # rnd_all_2: 14-15
+ # rnd_0: 0 - 7
+ rorxq $14, %r14, %rax
+ rorxq $18, %r14, %rcx
+ addq %rdx, %r10
+ addq 112(%rsp), %r9
+ movq %r15, %rdx
+ xorq %rax, %rcx
+ xorq %r8, %rdx
+ rorxq $41, %r14, %rax
+ xorq %rcx, %rax
+ andq %r14, %rdx
+ addq %rax, %r9
+ rorxq $28, %r10, %rax
+ rorxq $34, %r10, %rcx
+ xorq %r8, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r10, %rax
+ addq %rdx, %r9
+ xorq %rcx, %rax
+ movq %r11, %rdx
+ addq %r9, %r13
+ xorq %r10, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r9
+ xorq %r11, %rbx
+ # rnd_1: 0 - 7
+ rorxq $14, %r13, %rax
+ rorxq $18, %r13, %rcx
+ addq %rbx, %r9
+ addq 120(%rsp), %r8
+ movq %r14, %rbx
+ xorq %rax, %rcx
+ xorq %r15, %rbx
+ rorxq $41, %r13, %rax
+ xorq %rcx, %rax
+ andq %r13, %rbx
+ addq %rax, %r8
+ rorxq $28, %r9, %rax
+ rorxq $34, %r9, %rcx
+ xorq %r15, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r9, %rax
+ addq %rbx, %r8
+ xorq %rcx, %rax
+ movq %r10, %rbx
+ addq %r8, %r12
+ xorq %r9, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r8
+ xorq %r10, %rdx
+ addq %rdx, %r8
+ addq %r8, (%rdi)
+ addq %r9, 8(%rdi)
+ addq %r10, 16(%rdi)
+ addq %r11, 24(%rdi)
+ addq %r12, 32(%rdi)
+ addq %r13, 40(%rdi)
+ addq %r14, 48(%rdi)
+ addq %r15, 56(%rdi)
+ xorq %rax, %rax
+ vzeroupper
+ addq $0x88, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size Transform_Sha512_AVX1_RORX,.-Transform_Sha512_AVX1_RORX
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl Transform_Sha512_AVX1_RORX_Len
+.type Transform_Sha512_AVX1_RORX_Len,@function
+.align 4
+Transform_Sha512_AVX1_RORX_Len:
+#else
+.section __TEXT,__text
+.globl _Transform_Sha512_AVX1_RORX_Len
+.p2align 2
+_Transform_Sha512_AVX1_RORX_Len:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rbp
+ movq %rsi, %rbp
+ subq $0x90, %rsp
+ movq 224(%rdi), %rsi
+ leaq L_avx1_rorx_sha512_k(%rip), %rcx
+ vmovdqa L_avx1_rorx_sha512_flip_mask(%rip), %xmm14
+ movq (%rdi), %r8
+ movq 8(%rdi), %r9
+ movq 16(%rdi), %r10
+ movq 24(%rdi), %r11
+ movq 32(%rdi), %r12
+ movq 40(%rdi), %r13
+ movq 48(%rdi), %r14
+ movq 56(%rdi), %r15
+ # Start of loop processing a block
+L_sha512_len_avx1_rorx_begin:
+ vmovdqu (%rsi), %xmm0
+ vmovdqu 16(%rsi), %xmm1
+ vpshufb %xmm14, %xmm0, %xmm0
+ vpshufb %xmm14, %xmm1, %xmm1
+ vmovdqu 32(%rsi), %xmm2
+ vmovdqu 48(%rsi), %xmm3
+ vpshufb %xmm14, %xmm2, %xmm2
+ vpshufb %xmm14, %xmm3, %xmm3
+ vmovdqu 64(%rsi), %xmm4
+ vmovdqu 80(%rsi), %xmm5
+ vpshufb %xmm14, %xmm4, %xmm4
+ vpshufb %xmm14, %xmm5, %xmm5
+ vmovdqu 96(%rsi), %xmm6
+ vmovdqu 112(%rsi), %xmm7
+ vpshufb %xmm14, %xmm6, %xmm6
+ vpshufb %xmm14, %xmm7, %xmm7
+ movl $4, 128(%rsp)
+ movq %r9, %rbx
+ xorq %rdx, %rdx
+ xorq %r10, %rbx
+ vpaddq (%rcx), %xmm0, %xmm8
+ vpaddq 16(%rcx), %xmm1, %xmm9
+ vmovdqu %xmm8, (%rsp)
+ vmovdqu %xmm9, 16(%rsp)
+ vpaddq 32(%rcx), %xmm2, %xmm8
+ vpaddq 48(%rcx), %xmm3, %xmm9
+ vmovdqu %xmm8, 32(%rsp)
+ vmovdqu %xmm9, 48(%rsp)
+ vpaddq 64(%rcx), %xmm4, %xmm8
+ vpaddq 80(%rcx), %xmm5, %xmm9
+ vmovdqu %xmm8, 64(%rsp)
+ vmovdqu %xmm9, 80(%rsp)
+ vpaddq 96(%rcx), %xmm6, %xmm8
+ vpaddq 112(%rcx), %xmm7, %xmm9
+ vmovdqu %xmm8, 96(%rsp)
+ vmovdqu %xmm9, 112(%rsp)
+ # Start of 16 rounds
+L_sha512_len_avx1_rorx_start:
+ addq $0x80, %rcx
+ movq %rcx, 136(%rsp)
+ # msg_sched: 0-1
+ # rnd_0: 0 - 0
+ rorxq $14, %r12, %rax
+ rorxq $18, %r12, %rcx
+ addq %rdx, %r8
+ vpalignr $8, %xmm0, %xmm1, %xmm12
+ vpalignr $8, %xmm4, %xmm5, %xmm13
+ # rnd_0: 1 - 1
+ addq (%rsp), %r15
+ movq %r13, %rdx
+ xorq %rax, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 2
+ xorq %r14, %rdx
+ rorxq $41, %r12, %rax
+ xorq %rcx, %rax
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 3 - 3
+ andq %r12, %rdx
+ addq %rax, %r15
+ rorxq $28, %r8, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 4 - 4
+ rorxq $34, %r8, %rcx
+ xorq %r14, %rdx
+ xorq %rax, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ rorxq $39, %r8, %rax
+ addq %rdx, %r15
+ xorq %rcx, %rax
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm0, %xmm13, %xmm0
+ # rnd_0: 6 - 7
+ movq %r9, %rdx
+ addq %r15, %r11
+ xorq %r8, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r15
+ xorq %r9, %rbx
+ vpaddq %xmm0, %xmm8, %xmm0
+ # rnd_1: 0 - 0
+ rorxq $14, %r11, %rax
+ rorxq $18, %r11, %rcx
+ addq %rbx, %r15
+ vpsrlq $19, %xmm7, %xmm8
+ vpsllq $45, %xmm7, %xmm9
+ # rnd_1: 1 - 1
+ addq 8(%rsp), %r14
+ movq %r12, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %xmm7, %xmm10
+ vpsllq $3, %xmm7, %xmm11
+ # rnd_1: 2 - 2
+ xorq %r13, %rbx
+ rorxq $41, %r11, %rax
+ xorq %rcx, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 3 - 4
+ andq %r11, %rbx
+ addq %rax, %r14
+ rorxq $28, %r15, %rax
+ rorxq $34, %r15, %rcx
+ xorq %r13, %rbx
+ xorq %rax, %rcx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm7, %xmm11
+ # rnd_1: 5 - 6
+ rorxq $39, %r15, %rax
+ addq %rbx, %r14
+ xorq %rcx, %rax
+ movq %r8, %rbx
+ addq %r14, %r10
+ xorq %r15, %rbx
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 7 - 7
+ andq %rbx, %rdx
+ addq %rax, %r14
+ xorq %r8, %rdx
+ vpaddq %xmm0, %xmm8, %xmm0
+ # msg_sched done: 0-3
+ # msg_sched: 2-3
+ # rnd_0: 0 - 0
+ rorxq $14, %r10, %rax
+ rorxq $18, %r10, %rcx
+ addq %rdx, %r14
+ vpalignr $8, %xmm1, %xmm2, %xmm12
+ vpalignr $8, %xmm5, %xmm6, %xmm13
+ # rnd_0: 1 - 1
+ addq 16(%rsp), %r13
+ movq %r11, %rdx
+ xorq %rax, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 2
+ xorq %r12, %rdx
+ rorxq $41, %r10, %rax
+ xorq %rcx, %rax
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 3 - 3
+ andq %r10, %rdx
+ addq %rax, %r13
+ rorxq $28, %r14, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 4 - 4
+ rorxq $34, %r14, %rcx
+ xorq %r12, %rdx
+ xorq %rax, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ rorxq $39, %r14, %rax
+ addq %rdx, %r13
+ xorq %rcx, %rax
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm1, %xmm13, %xmm1
+ # rnd_0: 6 - 7
+ movq %r15, %rdx
+ addq %r13, %r9
+ xorq %r14, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r13
+ xorq %r15, %rbx
+ vpaddq %xmm1, %xmm8, %xmm1
+ # rnd_1: 0 - 0
+ rorxq $14, %r9, %rax
+ rorxq $18, %r9, %rcx
+ addq %rbx, %r13
+ vpsrlq $19, %xmm0, %xmm8
+ vpsllq $45, %xmm0, %xmm9
+ # rnd_1: 1 - 1
+ addq 24(%rsp), %r12
+ movq %r10, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %xmm0, %xmm10
+ vpsllq $3, %xmm0, %xmm11
+ # rnd_1: 2 - 2
+ xorq %r11, %rbx
+ rorxq $41, %r9, %rax
+ xorq %rcx, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 3 - 4
+ andq %r9, %rbx
+ addq %rax, %r12
+ rorxq $28, %r13, %rax
+ rorxq $34, %r13, %rcx
+ xorq %r11, %rbx
+ xorq %rax, %rcx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm0, %xmm11
+ # rnd_1: 5 - 6
+ rorxq $39, %r13, %rax
+ addq %rbx, %r12
+ xorq %rcx, %rax
+ movq %r14, %rbx
+ addq %r12, %r8
+ xorq %r13, %rbx
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 7 - 7
+ andq %rbx, %rdx
+ addq %rax, %r12
+ xorq %r14, %rdx
+ vpaddq %xmm1, %xmm8, %xmm1
+ # msg_sched done: 2-5
+ # msg_sched: 4-5
+ # rnd_0: 0 - 0
+ rorxq $14, %r8, %rax
+ rorxq $18, %r8, %rcx
+ addq %rdx, %r12
+ vpalignr $8, %xmm2, %xmm3, %xmm12
+ vpalignr $8, %xmm6, %xmm7, %xmm13
+ # rnd_0: 1 - 1
+ addq 32(%rsp), %r11
+ movq %r9, %rdx
+ xorq %rax, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 2
+ xorq %r10, %rdx
+ rorxq $41, %r8, %rax
+ xorq %rcx, %rax
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 3 - 3
+ andq %r8, %rdx
+ addq %rax, %r11
+ rorxq $28, %r12, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 4 - 4
+ rorxq $34, %r12, %rcx
+ xorq %r10, %rdx
+ xorq %rax, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ rorxq $39, %r12, %rax
+ addq %rdx, %r11
+ xorq %rcx, %rax
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm2, %xmm13, %xmm2
+ # rnd_0: 6 - 7
+ movq %r13, %rdx
+ addq %r11, %r15
+ xorq %r12, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r11
+ xorq %r13, %rbx
+ vpaddq %xmm2, %xmm8, %xmm2
+ # rnd_1: 0 - 0
+ rorxq $14, %r15, %rax
+ rorxq $18, %r15, %rcx
+ addq %rbx, %r11
+ vpsrlq $19, %xmm1, %xmm8
+ vpsllq $45, %xmm1, %xmm9
+ # rnd_1: 1 - 1
+ addq 40(%rsp), %r10
+ movq %r8, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %xmm1, %xmm10
+ vpsllq $3, %xmm1, %xmm11
+ # rnd_1: 2 - 2
+ xorq %r9, %rbx
+ rorxq $41, %r15, %rax
+ xorq %rcx, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 3 - 4
+ andq %r15, %rbx
+ addq %rax, %r10
+ rorxq $28, %r11, %rax
+ rorxq $34, %r11, %rcx
+ xorq %r9, %rbx
+ xorq %rax, %rcx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm1, %xmm11
+ # rnd_1: 5 - 6
+ rorxq $39, %r11, %rax
+ addq %rbx, %r10
+ xorq %rcx, %rax
+ movq %r12, %rbx
+ addq %r10, %r14
+ xorq %r11, %rbx
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 7 - 7
+ andq %rbx, %rdx
+ addq %rax, %r10
+ xorq %r12, %rdx
+ vpaddq %xmm2, %xmm8, %xmm2
+ # msg_sched done: 4-7
+ # msg_sched: 6-7
+ # rnd_0: 0 - 0
+ rorxq $14, %r14, %rax
+ rorxq $18, %r14, %rcx
+ addq %rdx, %r10
+ vpalignr $8, %xmm3, %xmm4, %xmm12
+ vpalignr $8, %xmm7, %xmm0, %xmm13
+ # rnd_0: 1 - 1
+ addq 48(%rsp), %r9
+ movq %r15, %rdx
+ xorq %rax, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 2
+ xorq %r8, %rdx
+ rorxq $41, %r14, %rax
+ xorq %rcx, %rax
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 3 - 3
+ andq %r14, %rdx
+ addq %rax, %r9
+ rorxq $28, %r10, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 4 - 4
+ rorxq $34, %r10, %rcx
+ xorq %r8, %rdx
+ xorq %rax, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ rorxq $39, %r10, %rax
+ addq %rdx, %r9
+ xorq %rcx, %rax
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm3, %xmm13, %xmm3
+ # rnd_0: 6 - 7
+ movq %r11, %rdx
+ addq %r9, %r13
+ xorq %r10, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r9
+ xorq %r11, %rbx
+ vpaddq %xmm3, %xmm8, %xmm3
+ # rnd_1: 0 - 0
+ rorxq $14, %r13, %rax
+ rorxq $18, %r13, %rcx
+ addq %rbx, %r9
+ vpsrlq $19, %xmm2, %xmm8
+ vpsllq $45, %xmm2, %xmm9
+ # rnd_1: 1 - 1
+ addq 56(%rsp), %r8
+ movq %r14, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %xmm2, %xmm10
+ vpsllq $3, %xmm2, %xmm11
+ # rnd_1: 2 - 2
+ xorq %r15, %rbx
+ rorxq $41, %r13, %rax
+ xorq %rcx, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 3 - 4
+ andq %r13, %rbx
+ addq %rax, %r8
+ rorxq $28, %r9, %rax
+ rorxq $34, %r9, %rcx
+ xorq %r15, %rbx
+ xorq %rax, %rcx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm2, %xmm11
+ # rnd_1: 5 - 6
+ rorxq $39, %r9, %rax
+ addq %rbx, %r8
+ xorq %rcx, %rax
+ movq %r10, %rbx
+ addq %r8, %r12
+ xorq %r9, %rbx
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 7 - 7
+ andq %rbx, %rdx
+ addq %rax, %r8
+ xorq %r10, %rdx
+ vpaddq %xmm3, %xmm8, %xmm3
+ # msg_sched done: 6-9
+ # msg_sched: 8-9
+ # rnd_0: 0 - 0
+ rorxq $14, %r12, %rax
+ rorxq $18, %r12, %rcx
+ addq %rdx, %r8
+ vpalignr $8, %xmm4, %xmm5, %xmm12
+ vpalignr $8, %xmm0, %xmm1, %xmm13
+ # rnd_0: 1 - 1
+ addq 64(%rsp), %r15
+ movq %r13, %rdx
+ xorq %rax, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 2
+ xorq %r14, %rdx
+ rorxq $41, %r12, %rax
+ xorq %rcx, %rax
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 3 - 3
+ andq %r12, %rdx
+ addq %rax, %r15
+ rorxq $28, %r8, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 4 - 4
+ rorxq $34, %r8, %rcx
+ xorq %r14, %rdx
+ xorq %rax, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ rorxq $39, %r8, %rax
+ addq %rdx, %r15
+ xorq %rcx, %rax
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm4, %xmm13, %xmm4
+ # rnd_0: 6 - 7
+ movq %r9, %rdx
+ addq %r15, %r11
+ xorq %r8, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r15
+ xorq %r9, %rbx
+ vpaddq %xmm4, %xmm8, %xmm4
+ # rnd_1: 0 - 0
+ rorxq $14, %r11, %rax
+ rorxq $18, %r11, %rcx
+ addq %rbx, %r15
+ vpsrlq $19, %xmm3, %xmm8
+ vpsllq $45, %xmm3, %xmm9
+ # rnd_1: 1 - 1
+ addq 72(%rsp), %r14
+ movq %r12, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %xmm3, %xmm10
+ vpsllq $3, %xmm3, %xmm11
+ # rnd_1: 2 - 2
+ xorq %r13, %rbx
+ rorxq $41, %r11, %rax
+ xorq %rcx, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 3 - 4
+ andq %r11, %rbx
+ addq %rax, %r14
+ rorxq $28, %r15, %rax
+ rorxq $34, %r15, %rcx
+ xorq %r13, %rbx
+ xorq %rax, %rcx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm3, %xmm11
+ # rnd_1: 5 - 6
+ rorxq $39, %r15, %rax
+ addq %rbx, %r14
+ xorq %rcx, %rax
+ movq %r8, %rbx
+ addq %r14, %r10
+ xorq %r15, %rbx
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 7 - 7
+ andq %rbx, %rdx
+ addq %rax, %r14
+ xorq %r8, %rdx
+ vpaddq %xmm4, %xmm8, %xmm4
+ # msg_sched done: 8-11
+ # msg_sched: 10-11
+ # rnd_0: 0 - 0
+ rorxq $14, %r10, %rax
+ rorxq $18, %r10, %rcx
+ addq %rdx, %r14
+ vpalignr $8, %xmm5, %xmm6, %xmm12
+ vpalignr $8, %xmm1, %xmm2, %xmm13
+ # rnd_0: 1 - 1
+ addq 80(%rsp), %r13
+ movq %r11, %rdx
+ xorq %rax, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 2
+ xorq %r12, %rdx
+ rorxq $41, %r10, %rax
+ xorq %rcx, %rax
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 3 - 3
+ andq %r10, %rdx
+ addq %rax, %r13
+ rorxq $28, %r14, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 4 - 4
+ rorxq $34, %r14, %rcx
+ xorq %r12, %rdx
+ xorq %rax, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ rorxq $39, %r14, %rax
+ addq %rdx, %r13
+ xorq %rcx, %rax
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm5, %xmm13, %xmm5
+ # rnd_0: 6 - 7
+ movq %r15, %rdx
+ addq %r13, %r9
+ xorq %r14, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r13
+ xorq %r15, %rbx
+ vpaddq %xmm5, %xmm8, %xmm5
+ # rnd_1: 0 - 0
+ rorxq $14, %r9, %rax
+ rorxq $18, %r9, %rcx
+ addq %rbx, %r13
+ vpsrlq $19, %xmm4, %xmm8
+ vpsllq $45, %xmm4, %xmm9
+ # rnd_1: 1 - 1
+ addq 88(%rsp), %r12
+ movq %r10, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %xmm4, %xmm10
+ vpsllq $3, %xmm4, %xmm11
+ # rnd_1: 2 - 2
+ xorq %r11, %rbx
+ rorxq $41, %r9, %rax
+ xorq %rcx, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 3 - 4
+ andq %r9, %rbx
+ addq %rax, %r12
+ rorxq $28, %r13, %rax
+ rorxq $34, %r13, %rcx
+ xorq %r11, %rbx
+ xorq %rax, %rcx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm4, %xmm11
+ # rnd_1: 5 - 6
+ rorxq $39, %r13, %rax
+ addq %rbx, %r12
+ xorq %rcx, %rax
+ movq %r14, %rbx
+ addq %r12, %r8
+ xorq %r13, %rbx
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 7 - 7
+ andq %rbx, %rdx
+ addq %rax, %r12
+ xorq %r14, %rdx
+ vpaddq %xmm5, %xmm8, %xmm5
+ # msg_sched done: 10-13
+ # msg_sched: 12-13
+ # rnd_0: 0 - 0
+ rorxq $14, %r8, %rax
+ rorxq $18, %r8, %rcx
+ addq %rdx, %r12
+ vpalignr $8, %xmm6, %xmm7, %xmm12
+ vpalignr $8, %xmm2, %xmm3, %xmm13
+ # rnd_0: 1 - 1
+ addq 96(%rsp), %r11
+ movq %r9, %rdx
+ xorq %rax, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 2
+ xorq %r10, %rdx
+ rorxq $41, %r8, %rax
+ xorq %rcx, %rax
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 3 - 3
+ andq %r8, %rdx
+ addq %rax, %r11
+ rorxq $28, %r12, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 4 - 4
+ rorxq $34, %r12, %rcx
+ xorq %r10, %rdx
+ xorq %rax, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ rorxq $39, %r12, %rax
+ addq %rdx, %r11
+ xorq %rcx, %rax
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm6, %xmm13, %xmm6
+ # rnd_0: 6 - 7
+ movq %r13, %rdx
+ addq %r11, %r15
+ xorq %r12, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r11
+ xorq %r13, %rbx
+ vpaddq %xmm6, %xmm8, %xmm6
+ # rnd_1: 0 - 0
+ rorxq $14, %r15, %rax
+ rorxq $18, %r15, %rcx
+ addq %rbx, %r11
+ vpsrlq $19, %xmm5, %xmm8
+ vpsllq $45, %xmm5, %xmm9
+ # rnd_1: 1 - 1
+ addq 104(%rsp), %r10
+ movq %r8, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %xmm5, %xmm10
+ vpsllq $3, %xmm5, %xmm11
+ # rnd_1: 2 - 2
+ xorq %r9, %rbx
+ rorxq $41, %r15, %rax
+ xorq %rcx, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 3 - 4
+ andq %r15, %rbx
+ addq %rax, %r10
+ rorxq $28, %r11, %rax
+ rorxq $34, %r11, %rcx
+ xorq %r9, %rbx
+ xorq %rax, %rcx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm5, %xmm11
+ # rnd_1: 5 - 6
+ rorxq $39, %r11, %rax
+ addq %rbx, %r10
+ xorq %rcx, %rax
+ movq %r12, %rbx
+ addq %r10, %r14
+ xorq %r11, %rbx
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 7 - 7
+ andq %rbx, %rdx
+ addq %rax, %r10
+ xorq %r12, %rdx
+ vpaddq %xmm6, %xmm8, %xmm6
+ # msg_sched done: 12-15
+ # msg_sched: 14-15
+ # rnd_0: 0 - 0
+ rorxq $14, %r14, %rax
+ rorxq $18, %r14, %rcx
+ addq %rdx, %r10
+ vpalignr $8, %xmm7, %xmm0, %xmm12
+ vpalignr $8, %xmm3, %xmm4, %xmm13
+ # rnd_0: 1 - 1
+ addq 112(%rsp), %r9
+ movq %r15, %rdx
+ xorq %rax, %rcx
+ vpsrlq $0x01, %xmm12, %xmm8
+ vpsllq $63, %xmm12, %xmm9
+ # rnd_0: 2 - 2
+ xorq %r8, %rdx
+ rorxq $41, %r14, %rax
+ xorq %rcx, %rax
+ vpsrlq $8, %xmm12, %xmm10
+ vpsllq $56, %xmm12, %xmm11
+ # rnd_0: 3 - 3
+ andq %r14, %rdx
+ addq %rax, %r9
+ rorxq $28, %r10, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_0: 4 - 4
+ rorxq $34, %r10, %rcx
+ xorq %r8, %rdx
+ xorq %rax, %rcx
+ vpsrlq $7, %xmm12, %xmm11
+ vpxor %xmm10, %xmm8, %xmm8
+ # rnd_0: 5 - 5
+ rorxq $39, %r10, %rax
+ addq %rdx, %r9
+ xorq %rcx, %rax
+ vpxor %xmm11, %xmm8, %xmm8
+ vpaddq %xmm7, %xmm13, %xmm7
+ # rnd_0: 6 - 7
+ movq %r11, %rdx
+ addq %r9, %r13
+ xorq %r10, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r9
+ xorq %r11, %rbx
+ vpaddq %xmm7, %xmm8, %xmm7
+ # rnd_1: 0 - 0
+ rorxq $14, %r13, %rax
+ rorxq $18, %r13, %rcx
+ addq %rbx, %r9
+ vpsrlq $19, %xmm6, %xmm8
+ vpsllq $45, %xmm6, %xmm9
+ # rnd_1: 1 - 1
+ addq 120(%rsp), %r8
+ movq %r14, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %xmm6, %xmm10
+ vpsllq $3, %xmm6, %xmm11
+ # rnd_1: 2 - 2
+ xorq %r15, %rbx
+ rorxq $41, %r13, %rax
+ xorq %rcx, %rax
+ vpor %xmm9, %xmm8, %xmm8
+ vpor %xmm11, %xmm10, %xmm10
+ # rnd_1: 3 - 4
+ andq %r13, %rbx
+ addq %rax, %r8
+ rorxq $28, %r9, %rax
+ rorxq $34, %r9, %rcx
+ xorq %r15, %rbx
+ xorq %rax, %rcx
+ vpxor %xmm10, %xmm8, %xmm8
+ vpsrlq $6, %xmm6, %xmm11
+ # rnd_1: 5 - 6
+ rorxq $39, %r9, %rax
+ addq %rbx, %r8
+ xorq %rcx, %rax
+ movq %r10, %rbx
+ addq %r8, %r12
+ xorq %r9, %rbx
+ vpxor %xmm11, %xmm8, %xmm8
+ # rnd_1: 7 - 7
+ andq %rbx, %rdx
+ addq %rax, %r8
+ xorq %r10, %rdx
+ vpaddq %xmm7, %xmm8, %xmm7
+ # msg_sched done: 14-17
+ movq 136(%rsp), %rcx
+ vpaddq (%rcx), %xmm0, %xmm8
+ vpaddq 16(%rcx), %xmm1, %xmm9
+ vmovdqu %xmm8, (%rsp)
+ vmovdqu %xmm9, 16(%rsp)
+ vpaddq 32(%rcx), %xmm2, %xmm8
+ vpaddq 48(%rcx), %xmm3, %xmm9
+ vmovdqu %xmm8, 32(%rsp)
+ vmovdqu %xmm9, 48(%rsp)
+ vpaddq 64(%rcx), %xmm4, %xmm8
+ vpaddq 80(%rcx), %xmm5, %xmm9
+ vmovdqu %xmm8, 64(%rsp)
+ vmovdqu %xmm9, 80(%rsp)
+ vpaddq 96(%rcx), %xmm6, %xmm8
+ vpaddq 112(%rcx), %xmm7, %xmm9
+ vmovdqu %xmm8, 96(%rsp)
+ vmovdqu %xmm9, 112(%rsp)
+ subl $0x01, 128(%rsp)
+ jne L_sha512_len_avx1_rorx_start
+ vpaddq (%rcx), %xmm0, %xmm8
+ vpaddq 16(%rcx), %xmm1, %xmm9
+ vmovdqu %xmm8, (%rsp)
+ vmovdqu %xmm9, 16(%rsp)
+ vpaddq 32(%rcx), %xmm2, %xmm8
+ vpaddq 48(%rcx), %xmm3, %xmm9
+ vmovdqu %xmm8, 32(%rsp)
+ vmovdqu %xmm9, 48(%rsp)
+ vpaddq 64(%rcx), %xmm4, %xmm8
+ vpaddq 80(%rcx), %xmm5, %xmm9
+ vmovdqu %xmm8, 64(%rsp)
+ vmovdqu %xmm9, 80(%rsp)
+ vpaddq 96(%rcx), %xmm6, %xmm8
+ vpaddq 112(%rcx), %xmm7, %xmm9
+ vmovdqu %xmm8, 96(%rsp)
+ vmovdqu %xmm9, 112(%rsp)
+ # rnd_all_2: 0-1
+ # rnd_0: 0 - 7
+ rorxq $14, %r12, %rax
+ rorxq $18, %r12, %rcx
+ addq %rdx, %r8
+ addq (%rsp), %r15
+ movq %r13, %rdx
+ xorq %rax, %rcx
+ xorq %r14, %rdx
+ rorxq $41, %r12, %rax
+ xorq %rcx, %rax
+ andq %r12, %rdx
+ addq %rax, %r15
+ rorxq $28, %r8, %rax
+ rorxq $34, %r8, %rcx
+ xorq %r14, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r8, %rax
+ addq %rdx, %r15
+ xorq %rcx, %rax
+ movq %r9, %rdx
+ addq %r15, %r11
+ xorq %r8, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r15
+ xorq %r9, %rbx
+ # rnd_1: 0 - 7
+ rorxq $14, %r11, %rax
+ rorxq $18, %r11, %rcx
+ addq %rbx, %r15
+ addq 8(%rsp), %r14
+ movq %r12, %rbx
+ xorq %rax, %rcx
+ xorq %r13, %rbx
+ rorxq $41, %r11, %rax
+ xorq %rcx, %rax
+ andq %r11, %rbx
+ addq %rax, %r14
+ rorxq $28, %r15, %rax
+ rorxq $34, %r15, %rcx
+ xorq %r13, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r15, %rax
+ addq %rbx, %r14
+ xorq %rcx, %rax
+ movq %r8, %rbx
+ addq %r14, %r10
+ xorq %r15, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r14
+ xorq %r8, %rdx
+ # rnd_all_2: 2-3
+ # rnd_0: 0 - 7
+ rorxq $14, %r10, %rax
+ rorxq $18, %r10, %rcx
+ addq %rdx, %r14
+ addq 16(%rsp), %r13
+ movq %r11, %rdx
+ xorq %rax, %rcx
+ xorq %r12, %rdx
+ rorxq $41, %r10, %rax
+ xorq %rcx, %rax
+ andq %r10, %rdx
+ addq %rax, %r13
+ rorxq $28, %r14, %rax
+ rorxq $34, %r14, %rcx
+ xorq %r12, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r14, %rax
+ addq %rdx, %r13
+ xorq %rcx, %rax
+ movq %r15, %rdx
+ addq %r13, %r9
+ xorq %r14, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r13
+ xorq %r15, %rbx
+ # rnd_1: 0 - 7
+ rorxq $14, %r9, %rax
+ rorxq $18, %r9, %rcx
+ addq %rbx, %r13
+ addq 24(%rsp), %r12
+ movq %r10, %rbx
+ xorq %rax, %rcx
+ xorq %r11, %rbx
+ rorxq $41, %r9, %rax
+ xorq %rcx, %rax
+ andq %r9, %rbx
+ addq %rax, %r12
+ rorxq $28, %r13, %rax
+ rorxq $34, %r13, %rcx
+ xorq %r11, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r13, %rax
+ addq %rbx, %r12
+ xorq %rcx, %rax
+ movq %r14, %rbx
+ addq %r12, %r8
+ xorq %r13, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r12
+ xorq %r14, %rdx
+ # rnd_all_2: 4-5
+ # rnd_0: 0 - 7
+ rorxq $14, %r8, %rax
+ rorxq $18, %r8, %rcx
+ addq %rdx, %r12
+ addq 32(%rsp), %r11
+ movq %r9, %rdx
+ xorq %rax, %rcx
+ xorq %r10, %rdx
+ rorxq $41, %r8, %rax
+ xorq %rcx, %rax
+ andq %r8, %rdx
+ addq %rax, %r11
+ rorxq $28, %r12, %rax
+ rorxq $34, %r12, %rcx
+ xorq %r10, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r12, %rax
+ addq %rdx, %r11
+ xorq %rcx, %rax
+ movq %r13, %rdx
+ addq %r11, %r15
+ xorq %r12, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r11
+ xorq %r13, %rbx
+ # rnd_1: 0 - 7
+ rorxq $14, %r15, %rax
+ rorxq $18, %r15, %rcx
+ addq %rbx, %r11
+ addq 40(%rsp), %r10
+ movq %r8, %rbx
+ xorq %rax, %rcx
+ xorq %r9, %rbx
+ rorxq $41, %r15, %rax
+ xorq %rcx, %rax
+ andq %r15, %rbx
+ addq %rax, %r10
+ rorxq $28, %r11, %rax
+ rorxq $34, %r11, %rcx
+ xorq %r9, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r11, %rax
+ addq %rbx, %r10
+ xorq %rcx, %rax
+ movq %r12, %rbx
+ addq %r10, %r14
+ xorq %r11, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r10
+ xorq %r12, %rdx
+ # rnd_all_2: 6-7
+ # rnd_0: 0 - 7
+ rorxq $14, %r14, %rax
+ rorxq $18, %r14, %rcx
+ addq %rdx, %r10
+ addq 48(%rsp), %r9
+ movq %r15, %rdx
+ xorq %rax, %rcx
+ xorq %r8, %rdx
+ rorxq $41, %r14, %rax
+ xorq %rcx, %rax
+ andq %r14, %rdx
+ addq %rax, %r9
+ rorxq $28, %r10, %rax
+ rorxq $34, %r10, %rcx
+ xorq %r8, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r10, %rax
+ addq %rdx, %r9
+ xorq %rcx, %rax
+ movq %r11, %rdx
+ addq %r9, %r13
+ xorq %r10, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r9
+ xorq %r11, %rbx
+ # rnd_1: 0 - 7
+ rorxq $14, %r13, %rax
+ rorxq $18, %r13, %rcx
+ addq %rbx, %r9
+ addq 56(%rsp), %r8
+ movq %r14, %rbx
+ xorq %rax, %rcx
+ xorq %r15, %rbx
+ rorxq $41, %r13, %rax
+ xorq %rcx, %rax
+ andq %r13, %rbx
+ addq %rax, %r8
+ rorxq $28, %r9, %rax
+ rorxq $34, %r9, %rcx
+ xorq %r15, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r9, %rax
+ addq %rbx, %r8
+ xorq %rcx, %rax
+ movq %r10, %rbx
+ addq %r8, %r12
+ xorq %r9, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r8
+ xorq %r10, %rdx
+ # rnd_all_2: 8-9
+ # rnd_0: 0 - 7
+ rorxq $14, %r12, %rax
+ rorxq $18, %r12, %rcx
+ addq %rdx, %r8
+ addq 64(%rsp), %r15
+ movq %r13, %rdx
+ xorq %rax, %rcx
+ xorq %r14, %rdx
+ rorxq $41, %r12, %rax
+ xorq %rcx, %rax
+ andq %r12, %rdx
+ addq %rax, %r15
+ rorxq $28, %r8, %rax
+ rorxq $34, %r8, %rcx
+ xorq %r14, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r8, %rax
+ addq %rdx, %r15
+ xorq %rcx, %rax
+ movq %r9, %rdx
+ addq %r15, %r11
+ xorq %r8, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r15
+ xorq %r9, %rbx
+ # rnd_1: 0 - 7
+ rorxq $14, %r11, %rax
+ rorxq $18, %r11, %rcx
+ addq %rbx, %r15
+ addq 72(%rsp), %r14
+ movq %r12, %rbx
+ xorq %rax, %rcx
+ xorq %r13, %rbx
+ rorxq $41, %r11, %rax
+ xorq %rcx, %rax
+ andq %r11, %rbx
+ addq %rax, %r14
+ rorxq $28, %r15, %rax
+ rorxq $34, %r15, %rcx
+ xorq %r13, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r15, %rax
+ addq %rbx, %r14
+ xorq %rcx, %rax
+ movq %r8, %rbx
+ addq %r14, %r10
+ xorq %r15, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r14
+ xorq %r8, %rdx
+ # rnd_all_2: 10-11
+ # rnd_0: 0 - 7
+ rorxq $14, %r10, %rax
+ rorxq $18, %r10, %rcx
+ addq %rdx, %r14
+ addq 80(%rsp), %r13
+ movq %r11, %rdx
+ xorq %rax, %rcx
+ xorq %r12, %rdx
+ rorxq $41, %r10, %rax
+ xorq %rcx, %rax
+ andq %r10, %rdx
+ addq %rax, %r13
+ rorxq $28, %r14, %rax
+ rorxq $34, %r14, %rcx
+ xorq %r12, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r14, %rax
+ addq %rdx, %r13
+ xorq %rcx, %rax
+ movq %r15, %rdx
+ addq %r13, %r9
+ xorq %r14, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r13
+ xorq %r15, %rbx
+ # rnd_1: 0 - 7
+ rorxq $14, %r9, %rax
+ rorxq $18, %r9, %rcx
+ addq %rbx, %r13
+ addq 88(%rsp), %r12
+ movq %r10, %rbx
+ xorq %rax, %rcx
+ xorq %r11, %rbx
+ rorxq $41, %r9, %rax
+ xorq %rcx, %rax
+ andq %r9, %rbx
+ addq %rax, %r12
+ rorxq $28, %r13, %rax
+ rorxq $34, %r13, %rcx
+ xorq %r11, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r13, %rax
+ addq %rbx, %r12
+ xorq %rcx, %rax
+ movq %r14, %rbx
+ addq %r12, %r8
+ xorq %r13, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r12
+ xorq %r14, %rdx
+ # rnd_all_2: 12-13
+ # rnd_0: 0 - 7
+ rorxq $14, %r8, %rax
+ rorxq $18, %r8, %rcx
+ addq %rdx, %r12
+ addq 96(%rsp), %r11
+ movq %r9, %rdx
+ xorq %rax, %rcx
+ xorq %r10, %rdx
+ rorxq $41, %r8, %rax
+ xorq %rcx, %rax
+ andq %r8, %rdx
+ addq %rax, %r11
+ rorxq $28, %r12, %rax
+ rorxq $34, %r12, %rcx
+ xorq %r10, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r12, %rax
+ addq %rdx, %r11
+ xorq %rcx, %rax
+ movq %r13, %rdx
+ addq %r11, %r15
+ xorq %r12, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r11
+ xorq %r13, %rbx
+ # rnd_1: 0 - 7
+ rorxq $14, %r15, %rax
+ rorxq $18, %r15, %rcx
+ addq %rbx, %r11
+ addq 104(%rsp), %r10
+ movq %r8, %rbx
+ xorq %rax, %rcx
+ xorq %r9, %rbx
+ rorxq $41, %r15, %rax
+ xorq %rcx, %rax
+ andq %r15, %rbx
+ addq %rax, %r10
+ rorxq $28, %r11, %rax
+ rorxq $34, %r11, %rcx
+ xorq %r9, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r11, %rax
+ addq %rbx, %r10
+ xorq %rcx, %rax
+ movq %r12, %rbx
+ addq %r10, %r14
+ xorq %r11, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r10
+ xorq %r12, %rdx
+ # rnd_all_2: 14-15
+ # rnd_0: 0 - 7
+ rorxq $14, %r14, %rax
+ rorxq $18, %r14, %rcx
+ addq %rdx, %r10
+ addq 112(%rsp), %r9
+ movq %r15, %rdx
+ xorq %rax, %rcx
+ xorq %r8, %rdx
+ rorxq $41, %r14, %rax
+ xorq %rcx, %rax
+ andq %r14, %rdx
+ addq %rax, %r9
+ rorxq $28, %r10, %rax
+ rorxq $34, %r10, %rcx
+ xorq %r8, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r10, %rax
+ addq %rdx, %r9
+ xorq %rcx, %rax
+ movq %r11, %rdx
+ addq %r9, %r13
+ xorq %r10, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r9
+ xorq %r11, %rbx
+ # rnd_1: 0 - 7
+ rorxq $14, %r13, %rax
+ rorxq $18, %r13, %rcx
+ addq %rbx, %r9
+ addq 120(%rsp), %r8
+ movq %r14, %rbx
+ xorq %rax, %rcx
+ xorq %r15, %rbx
+ rorxq $41, %r13, %rax
+ xorq %rcx, %rax
+ andq %r13, %rbx
+ addq %rax, %r8
+ rorxq $28, %r9, %rax
+ rorxq $34, %r9, %rcx
+ xorq %r15, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r9, %rax
+ addq %rbx, %r8
+ xorq %rcx, %rax
+ movq %r10, %rbx
+ addq %r8, %r12
+ xorq %r9, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r8
+ xorq %r10, %rdx
+ addq %rdx, %r8
+ addq (%rdi), %r8
+ addq 8(%rdi), %r9
+ addq 16(%rdi), %r10
+ addq 24(%rdi), %r11
+ addq 32(%rdi), %r12
+ addq 40(%rdi), %r13
+ addq 48(%rdi), %r14
+ addq 56(%rdi), %r15
+ leaq L_avx1_rorx_sha512_k(%rip), %rcx
+ addq $0x80, %rsi
+ subl $0x80, %ebp
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq %r12, 32(%rdi)
+ movq %r13, 40(%rdi)
+ movq %r14, 48(%rdi)
+ movq %r15, 56(%rdi)
+ jnz L_sha512_len_avx1_rorx_begin
+ xorq %rax, %rax
+ vzeroupper
+ addq $0x90, %rsp
+ popq %rbp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size Transform_Sha512_AVX1_RORX_Len,.-Transform_Sha512_AVX1_RORX_Len
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX1 */
+#ifdef HAVE_INTEL_AVX2
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx2_sha512_k:
+.quad 0x428a2f98d728ae22,0x7137449123ef65cd
+.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad 0x3956c25bf348b538,0x59f111f1b605d019
+.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad 0xd807aa98a3030242,0x12835b0145706fbe
+.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad 0x9bdc06a725c71235,0xc19bf174cf692694
+.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad 0xfc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad 0x983e5152ee66dfab,0xa831c66d2db43210
+.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad 0x6ca6351e003826f,0x142929670a0e6e70
+.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad 0x81c2c92e47edaee6,0x92722c851482353b
+.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad 0xd192e819d6ef5218,0xd69906245565a910
+.quad 0xf40e35855771202a,0x106aa07032bbd1b8
+.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad 0x90befffa23631e28,0xa4506cebde82bde9
+.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad 0xca273eceea26619c,0xd186b8c721c0c207
+.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad 0x6f067aa72176fba,0xa637dc5a2c898a6
+.quad 0x113f9804bef90dae,0x1b710b35131c471b
+.quad 0x28db77f523047d84,0x32caab7b40c72493
+.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx2_sha512_k_2:
+.quad 0x428a2f98d728ae22,0x7137449123ef65cd
+.quad 0x428a2f98d728ae22,0x7137449123ef65cd
+.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad 0x3956c25bf348b538,0x59f111f1b605d019
+.quad 0x3956c25bf348b538,0x59f111f1b605d019
+.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad 0xd807aa98a3030242,0x12835b0145706fbe
+.quad 0xd807aa98a3030242,0x12835b0145706fbe
+.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad 0x9bdc06a725c71235,0xc19bf174cf692694
+.quad 0x9bdc06a725c71235,0xc19bf174cf692694
+.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad 0xfc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad 0xfc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad 0x983e5152ee66dfab,0xa831c66d2db43210
+.quad 0x983e5152ee66dfab,0xa831c66d2db43210
+.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad 0x6ca6351e003826f,0x142929670a0e6e70
+.quad 0x6ca6351e003826f,0x142929670a0e6e70
+.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad 0x81c2c92e47edaee6,0x92722c851482353b
+.quad 0x81c2c92e47edaee6,0x92722c851482353b
+.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad 0xd192e819d6ef5218,0xd69906245565a910
+.quad 0xd192e819d6ef5218,0xd69906245565a910
+.quad 0xf40e35855771202a,0x106aa07032bbd1b8
+.quad 0xf40e35855771202a,0x106aa07032bbd1b8
+.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad 0x90befffa23631e28,0xa4506cebde82bde9
+.quad 0x90befffa23631e28,0xa4506cebde82bde9
+.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad 0xca273eceea26619c,0xd186b8c721c0c207
+.quad 0xca273eceea26619c,0xd186b8c721c0c207
+.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad 0x6f067aa72176fba,0xa637dc5a2c898a6
+.quad 0x6f067aa72176fba,0xa637dc5a2c898a6
+.quad 0x113f9804bef90dae,0x1b710b35131c471b
+.quad 0x113f9804bef90dae,0x1b710b35131c471b
+.quad 0x28db77f523047d84,0x32caab7b40c72493
+.quad 0x28db77f523047d84,0x32caab7b40c72493
+.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 8
+#else
+.p2align 3
+#endif /* __APPLE__ */
+L_avx2_sha512_k_2_end:
+.quad 1024+L_avx2_sha512_k_2
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 32
+#else
+.p2align 5
+#endif /* __APPLE__ */
+L_avx2_sha512_flip_mask:
+.quad 0x1020304050607, 0x8090a0b0c0d0e0f
+.quad 0x1020304050607, 0x8090a0b0c0d0e0f
+#ifndef __APPLE__
+.text
+.globl Transform_Sha512_AVX2
+.type Transform_Sha512_AVX2,@function
+.align 4
+Transform_Sha512_AVX2:
+#else
+.section __TEXT,__text
+.globl _Transform_Sha512_AVX2
+.p2align 2
+_Transform_Sha512_AVX2:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $0x88, %rsp
+ leaq 64(%rdi), %rax
+ vmovdqa L_avx2_sha512_flip_mask(%rip), %ymm15
+ movq (%rdi), %r8
+ movq 8(%rdi), %r9
+ movq 16(%rdi), %r10
+ movq 24(%rdi), %r11
+ movq 32(%rdi), %r12
+ movq 40(%rdi), %r13
+ movq 48(%rdi), %r14
+ movq 56(%rdi), %r15
+ vmovdqu (%rax), %ymm0
+ vmovdqu 32(%rax), %ymm1
+ vpshufb %ymm15, %ymm0, %ymm0
+ vpshufb %ymm15, %ymm1, %ymm1
+ vmovdqu 64(%rax), %ymm2
+ vmovdqu 96(%rax), %ymm3
+ vpshufb %ymm15, %ymm2, %ymm2
+ vpshufb %ymm15, %ymm3, %ymm3
+ movl $4, 128(%rsp)
+ leaq L_avx2_sha512_k(%rip), %rsi
+ movq %r9, %rbx
+ movq %r12, %rax
+ xorq %r10, %rbx
+ vpaddq (%rsi), %ymm0, %ymm8
+ vpaddq 32(%rsi), %ymm1, %ymm9
+ vmovdqu %ymm8, (%rsp)
+ vmovdqu %ymm9, 32(%rsp)
+ vpaddq 64(%rsi), %ymm2, %ymm8
+ vpaddq 96(%rsi), %ymm3, %ymm9
+ vmovdqu %ymm8, 64(%rsp)
+ vmovdqu %ymm9, 96(%rsp)
+ # Start of 16 rounds
+L_sha256_avx2_start:
+ addq $0x80, %rsi
+ rorq $23, %rax
+ vpblendd $3, %ymm1, %ymm0, %ymm12
+ vpblendd $3, %ymm3, %ymm2, %ymm13
+ movq %r8, %rdx
+ movq %r13, %rcx
+ addq (%rsp), %r15
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ andq %r12, %rcx
+ vpermq $57, %ymm12, %ymm12
+ rorq $4, %rax
+ xorq %r14, %rcx
+ vpermq $57, %ymm13, %ymm13
+ xorq %r12, %rax
+ addq %rcx, %r15
+ rorq $14, %rax
+ xorq %r9, %rdx
+ vpsrlq $0x01, %ymm12, %ymm8
+ addq %rax, %r15
+ movq %r8, %rcx
+ vpsllq $63, %ymm12, %ymm9
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $8, %ymm12, %ymm10
+ xorq %r8, %rcx
+ xorq %r9, %rbx
+ vpsllq $56, %ymm12, %ymm11
+ rorq $6, %rcx
+ addq %r15, %r11
+ vpor %ymm9, %ymm8, %ymm8
+ xorq %r8, %rcx
+ addq %rbx, %r15
+ vpor %ymm11, %ymm10, %ymm10
+ rorq $28, %rcx
+ movq %r11, %rax
+ addq %rcx, %r15
+ rorq $23, %rax
+ vpsrlq $7, %ymm12, %ymm11
+ movq %r15, %rbx
+ movq %r12, %rcx
+ vpxor %ymm10, %ymm8, %ymm8
+ addq 8(%rsp), %r14
+ xorq %r13, %rcx
+ vpxor %ymm11, %ymm8, %ymm8
+ xorq %r11, %rax
+ andq %r11, %rcx
+ vpaddq %ymm0, %ymm13, %ymm0
+ rorq $4, %rax
+ xorq %r13, %rcx
+ vpaddq %ymm0, %ymm8, %ymm0
+ xorq %r11, %rax
+ addq %rcx, %r14
+ vperm2I128 $0x81, %ymm3, %ymm3, %ymm14
+ rorq $14, %rax
+ xorq %r8, %rbx
+ addq %rax, %r14
+ movq %r15, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ vpsrlq $19, %ymm14, %ymm8
+ xorq %r15, %rcx
+ xorq %r8, %rdx
+ vpsllq $45, %ymm14, %ymm9
+ rorq $6, %rcx
+ addq %r14, %r10
+ vpsrlq $61, %ymm14, %ymm10
+ xorq %r15, %rcx
+ addq %rdx, %r14
+ vpsllq $3, %ymm14, %ymm11
+ rorq $28, %rcx
+ movq %r10, %rax
+ addq %rcx, %r14
+ rorq $23, %rax
+ vpor %ymm9, %ymm8, %ymm8
+ movq %r14, %rdx
+ movq %r11, %rcx
+ addq 16(%rsp), %r13
+ xorq %r12, %rcx
+ vpor %ymm11, %ymm10, %ymm10
+ xorq %r10, %rax
+ andq %r10, %rcx
+ vpxor %ymm10, %ymm8, %ymm8
+ rorq $4, %rax
+ xorq %r12, %rcx
+ vpsrlq $6, %ymm14, %ymm11
+ xorq %r10, %rax
+ addq %rcx, %r13
+ vpxor %ymm11, %ymm8, %ymm8
+ rorq $14, %rax
+ xorq %r15, %rdx
+ vpaddq %ymm0, %ymm8, %ymm0
+ addq %rax, %r13
+ movq %r14, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vperm2I128 $8, %ymm0, %ymm0, %ymm14
+ xorq %r14, %rcx
+ xorq %r15, %rbx
+ rorq $6, %rcx
+ addq %r13, %r9
+ vpsrlq $19, %ymm14, %ymm8
+ xorq %r14, %rcx
+ addq %rbx, %r13
+ vpsllq $45, %ymm14, %ymm9
+ rorq $28, %rcx
+ movq %r9, %rax
+ addq %rcx, %r13
+ rorq $23, %rax
+ vpsrlq $61, %ymm14, %ymm10
+ movq %r13, %rbx
+ movq %r10, %rcx
+ addq 24(%rsp), %r12
+ xorq %r11, %rcx
+ vpsllq $3, %ymm14, %ymm11
+ xorq %r9, %rax
+ andq %r9, %rcx
+ vpor %ymm9, %ymm8, %ymm8
+ rorq $4, %rax
+ xorq %r11, %rcx
+ vpor %ymm11, %ymm10, %ymm10
+ xorq %r9, %rax
+ addq %rcx, %r12
+ vpxor %ymm10, %ymm8, %ymm8
+ rorq $14, %rax
+ xorq %r14, %rbx
+ vpsrlq $6, %ymm14, %ymm11
+ addq %rax, %r12
+ movq %r13, %rcx
+ vpxor %ymm11, %ymm8, %ymm8
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ vpaddq %ymm0, %ymm8, %ymm0
+ xorq %r13, %rcx
+ xorq %r14, %rdx
+ rorq $6, %rcx
+ addq %r12, %r8
+ xorq %r13, %rcx
+ addq %rdx, %r12
+ rorq $28, %rcx
+ movq %r8, %rax
+ addq %rcx, %r12
+ rorq $23, %rax
+ vpblendd $3, %ymm2, %ymm1, %ymm12
+ vpblendd $3, %ymm0, %ymm3, %ymm13
+ movq %r12, %rdx
+ movq %r9, %rcx
+ addq 32(%rsp), %r11
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ andq %r8, %rcx
+ vpermq $57, %ymm12, %ymm12
+ rorq $4, %rax
+ xorq %r10, %rcx
+ vpermq $57, %ymm13, %ymm13
+ xorq %r8, %rax
+ addq %rcx, %r11
+ rorq $14, %rax
+ xorq %r13, %rdx
+ vpsrlq $0x01, %ymm12, %ymm8
+ addq %rax, %r11
+ movq %r12, %rcx
+ vpsllq $63, %ymm12, %ymm9
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $8, %ymm12, %ymm10
+ xorq %r12, %rcx
+ xorq %r13, %rbx
+ vpsllq $56, %ymm12, %ymm11
+ rorq $6, %rcx
+ addq %r11, %r15
+ vpor %ymm9, %ymm8, %ymm8
+ xorq %r12, %rcx
+ addq %rbx, %r11
+ vpor %ymm11, %ymm10, %ymm10
+ rorq $28, %rcx
+ movq %r15, %rax
+ addq %rcx, %r11
+ rorq $23, %rax
+ vpsrlq $7, %ymm12, %ymm11
+ movq %r11, %rbx
+ movq %r8, %rcx
+ vpxor %ymm10, %ymm8, %ymm8
+ addq 40(%rsp), %r10
+ xorq %r9, %rcx
+ vpxor %ymm11, %ymm8, %ymm8
+ xorq %r15, %rax
+ andq %r15, %rcx
+ vpaddq %ymm1, %ymm13, %ymm1
+ rorq $4, %rax
+ xorq %r9, %rcx
+ vpaddq %ymm1, %ymm8, %ymm1
+ xorq %r15, %rax
+ addq %rcx, %r10
+ vperm2I128 $0x81, %ymm0, %ymm0, %ymm14
+ rorq $14, %rax
+ xorq %r12, %rbx
+ addq %rax, %r10
+ movq %r11, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ vpsrlq $19, %ymm14, %ymm8
+ xorq %r11, %rcx
+ xorq %r12, %rdx
+ vpsllq $45, %ymm14, %ymm9
+ rorq $6, %rcx
+ addq %r10, %r14
+ vpsrlq $61, %ymm14, %ymm10
+ xorq %r11, %rcx
+ addq %rdx, %r10
+ vpsllq $3, %ymm14, %ymm11
+ rorq $28, %rcx
+ movq %r14, %rax
+ addq %rcx, %r10
+ rorq $23, %rax
+ vpor %ymm9, %ymm8, %ymm8
+ movq %r10, %rdx
+ movq %r15, %rcx
+ addq 48(%rsp), %r9
+ xorq %r8, %rcx
+ vpor %ymm11, %ymm10, %ymm10
+ xorq %r14, %rax
+ andq %r14, %rcx
+ vpxor %ymm10, %ymm8, %ymm8
+ rorq $4, %rax
+ xorq %r8, %rcx
+ vpsrlq $6, %ymm14, %ymm11
+ xorq %r14, %rax
+ addq %rcx, %r9
+ vpxor %ymm11, %ymm8, %ymm8
+ rorq $14, %rax
+ xorq %r11, %rdx
+ vpaddq %ymm1, %ymm8, %ymm1
+ addq %rax, %r9
+ movq %r10, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vperm2I128 $8, %ymm1, %ymm1, %ymm14
+ xorq %r10, %rcx
+ xorq %r11, %rbx
+ rorq $6, %rcx
+ addq %r9, %r13
+ vpsrlq $19, %ymm14, %ymm8
+ xorq %r10, %rcx
+ addq %rbx, %r9
+ vpsllq $45, %ymm14, %ymm9
+ rorq $28, %rcx
+ movq %r13, %rax
+ addq %rcx, %r9
+ rorq $23, %rax
+ vpsrlq $61, %ymm14, %ymm10
+ movq %r9, %rbx
+ movq %r14, %rcx
+ addq 56(%rsp), %r8
+ xorq %r15, %rcx
+ vpsllq $3, %ymm14, %ymm11
+ xorq %r13, %rax
+ andq %r13, %rcx
+ vpor %ymm9, %ymm8, %ymm8
+ rorq $4, %rax
+ xorq %r15, %rcx
+ vpor %ymm11, %ymm10, %ymm10
+ xorq %r13, %rax
+ addq %rcx, %r8
+ vpxor %ymm10, %ymm8, %ymm8
+ rorq $14, %rax
+ xorq %r10, %rbx
+ vpsrlq $6, %ymm14, %ymm11
+ addq %rax, %r8
+ movq %r9, %rcx
+ vpxor %ymm11, %ymm8, %ymm8
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ vpaddq %ymm1, %ymm8, %ymm1
+ xorq %r9, %rcx
+ xorq %r10, %rdx
+ rorq $6, %rcx
+ addq %r8, %r12
+ xorq %r9, %rcx
+ addq %rdx, %r8
+ rorq $28, %rcx
+ movq %r12, %rax
+ addq %rcx, %r8
+ rorq $23, %rax
+ vpblendd $3, %ymm3, %ymm2, %ymm12
+ vpblendd $3, %ymm1, %ymm0, %ymm13
+ movq %r8, %rdx
+ movq %r13, %rcx
+ addq 64(%rsp), %r15
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ andq %r12, %rcx
+ vpermq $57, %ymm12, %ymm12
+ rorq $4, %rax
+ xorq %r14, %rcx
+ vpermq $57, %ymm13, %ymm13
+ xorq %r12, %rax
+ addq %rcx, %r15
+ rorq $14, %rax
+ xorq %r9, %rdx
+ vpsrlq $0x01, %ymm12, %ymm8
+ addq %rax, %r15
+ movq %r8, %rcx
+ vpsllq $63, %ymm12, %ymm9
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $8, %ymm12, %ymm10
+ xorq %r8, %rcx
+ xorq %r9, %rbx
+ vpsllq $56, %ymm12, %ymm11
+ rorq $6, %rcx
+ addq %r15, %r11
+ vpor %ymm9, %ymm8, %ymm8
+ xorq %r8, %rcx
+ addq %rbx, %r15
+ vpor %ymm11, %ymm10, %ymm10
+ rorq $28, %rcx
+ movq %r11, %rax
+ addq %rcx, %r15
+ rorq $23, %rax
+ vpsrlq $7, %ymm12, %ymm11
+ movq %r15, %rbx
+ movq %r12, %rcx
+ vpxor %ymm10, %ymm8, %ymm8
+ addq 72(%rsp), %r14
+ xorq %r13, %rcx
+ vpxor %ymm11, %ymm8, %ymm8
+ xorq %r11, %rax
+ andq %r11, %rcx
+ vpaddq %ymm2, %ymm13, %ymm2
+ rorq $4, %rax
+ xorq %r13, %rcx
+ vpaddq %ymm2, %ymm8, %ymm2
+ xorq %r11, %rax
+ addq %rcx, %r14
+ vperm2I128 $0x81, %ymm1, %ymm1, %ymm14
+ rorq $14, %rax
+ xorq %r8, %rbx
+ addq %rax, %r14
+ movq %r15, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ vpsrlq $19, %ymm14, %ymm8
+ xorq %r15, %rcx
+ xorq %r8, %rdx
+ vpsllq $45, %ymm14, %ymm9
+ rorq $6, %rcx
+ addq %r14, %r10
+ vpsrlq $61, %ymm14, %ymm10
+ xorq %r15, %rcx
+ addq %rdx, %r14
+ vpsllq $3, %ymm14, %ymm11
+ rorq $28, %rcx
+ movq %r10, %rax
+ addq %rcx, %r14
+ rorq $23, %rax
+ vpor %ymm9, %ymm8, %ymm8
+ movq %r14, %rdx
+ movq %r11, %rcx
+ addq 80(%rsp), %r13
+ xorq %r12, %rcx
+ vpor %ymm11, %ymm10, %ymm10
+ xorq %r10, %rax
+ andq %r10, %rcx
+ vpxor %ymm10, %ymm8, %ymm8
+ rorq $4, %rax
+ xorq %r12, %rcx
+ vpsrlq $6, %ymm14, %ymm11
+ xorq %r10, %rax
+ addq %rcx, %r13
+ vpxor %ymm11, %ymm8, %ymm8
+ rorq $14, %rax
+ xorq %r15, %rdx
+ vpaddq %ymm2, %ymm8, %ymm2
+ addq %rax, %r13
+ movq %r14, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vperm2I128 $8, %ymm2, %ymm2, %ymm14
+ xorq %r14, %rcx
+ xorq %r15, %rbx
+ rorq $6, %rcx
+ addq %r13, %r9
+ vpsrlq $19, %ymm14, %ymm8
+ xorq %r14, %rcx
+ addq %rbx, %r13
+ vpsllq $45, %ymm14, %ymm9
+ rorq $28, %rcx
+ movq %r9, %rax
+ addq %rcx, %r13
+ rorq $23, %rax
+ vpsrlq $61, %ymm14, %ymm10
+ movq %r13, %rbx
+ movq %r10, %rcx
+ addq 88(%rsp), %r12
+ xorq %r11, %rcx
+ vpsllq $3, %ymm14, %ymm11
+ xorq %r9, %rax
+ andq %r9, %rcx
+ vpor %ymm9, %ymm8, %ymm8
+ rorq $4, %rax
+ xorq %r11, %rcx
+ vpor %ymm11, %ymm10, %ymm10
+ xorq %r9, %rax
+ addq %rcx, %r12
+ vpxor %ymm10, %ymm8, %ymm8
+ rorq $14, %rax
+ xorq %r14, %rbx
+ vpsrlq $6, %ymm14, %ymm11
+ addq %rax, %r12
+ movq %r13, %rcx
+ vpxor %ymm11, %ymm8, %ymm8
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ vpaddq %ymm2, %ymm8, %ymm2
+ xorq %r13, %rcx
+ xorq %r14, %rdx
+ rorq $6, %rcx
+ addq %r12, %r8
+ xorq %r13, %rcx
+ addq %rdx, %r12
+ rorq $28, %rcx
+ movq %r8, %rax
+ addq %rcx, %r12
+ rorq $23, %rax
+ vpblendd $3, %ymm0, %ymm3, %ymm12
+ vpblendd $3, %ymm2, %ymm1, %ymm13
+ movq %r12, %rdx
+ movq %r9, %rcx
+ addq 96(%rsp), %r11
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ andq %r8, %rcx
+ vpermq $57, %ymm12, %ymm12
+ rorq $4, %rax
+ xorq %r10, %rcx
+ vpermq $57, %ymm13, %ymm13
+ xorq %r8, %rax
+ addq %rcx, %r11
+ rorq $14, %rax
+ xorq %r13, %rdx
+ vpsrlq $0x01, %ymm12, %ymm8
+ addq %rax, %r11
+ movq %r12, %rcx
+ vpsllq $63, %ymm12, %ymm9
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $8, %ymm12, %ymm10
+ xorq %r12, %rcx
+ xorq %r13, %rbx
+ vpsllq $56, %ymm12, %ymm11
+ rorq $6, %rcx
+ addq %r11, %r15
+ vpor %ymm9, %ymm8, %ymm8
+ xorq %r12, %rcx
+ addq %rbx, %r11
+ vpor %ymm11, %ymm10, %ymm10
+ rorq $28, %rcx
+ movq %r15, %rax
+ addq %rcx, %r11
+ rorq $23, %rax
+ vpsrlq $7, %ymm12, %ymm11
+ movq %r11, %rbx
+ movq %r8, %rcx
+ vpxor %ymm10, %ymm8, %ymm8
+ addq 104(%rsp), %r10
+ xorq %r9, %rcx
+ vpxor %ymm11, %ymm8, %ymm8
+ xorq %r15, %rax
+ andq %r15, %rcx
+ vpaddq %ymm3, %ymm13, %ymm3
+ rorq $4, %rax
+ xorq %r9, %rcx
+ vpaddq %ymm3, %ymm8, %ymm3
+ xorq %r15, %rax
+ addq %rcx, %r10
+ vperm2I128 $0x81, %ymm2, %ymm2, %ymm14
+ rorq $14, %rax
+ xorq %r12, %rbx
+ addq %rax, %r10
+ movq %r11, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ vpsrlq $19, %ymm14, %ymm8
+ xorq %r11, %rcx
+ xorq %r12, %rdx
+ vpsllq $45, %ymm14, %ymm9
+ rorq $6, %rcx
+ addq %r10, %r14
+ vpsrlq $61, %ymm14, %ymm10
+ xorq %r11, %rcx
+ addq %rdx, %r10
+ vpsllq $3, %ymm14, %ymm11
+ rorq $28, %rcx
+ movq %r14, %rax
+ addq %rcx, %r10
+ rorq $23, %rax
+ vpor %ymm9, %ymm8, %ymm8
+ movq %r10, %rdx
+ movq %r15, %rcx
+ addq 112(%rsp), %r9
+ xorq %r8, %rcx
+ vpor %ymm11, %ymm10, %ymm10
+ xorq %r14, %rax
+ andq %r14, %rcx
+ vpxor %ymm10, %ymm8, %ymm8
+ rorq $4, %rax
+ xorq %r8, %rcx
+ vpsrlq $6, %ymm14, %ymm11
+ xorq %r14, %rax
+ addq %rcx, %r9
+ vpxor %ymm11, %ymm8, %ymm8
+ rorq $14, %rax
+ xorq %r11, %rdx
+ vpaddq %ymm3, %ymm8, %ymm3
+ addq %rax, %r9
+ movq %r10, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vperm2I128 $8, %ymm3, %ymm3, %ymm14
+ xorq %r10, %rcx
+ xorq %r11, %rbx
+ rorq $6, %rcx
+ addq %r9, %r13
+ vpsrlq $19, %ymm14, %ymm8
+ xorq %r10, %rcx
+ addq %rbx, %r9
+ vpsllq $45, %ymm14, %ymm9
+ rorq $28, %rcx
+ movq %r13, %rax
+ addq %rcx, %r9
+ rorq $23, %rax
+ vpsrlq $61, %ymm14, %ymm10
+ movq %r9, %rbx
+ movq %r14, %rcx
+ addq 120(%rsp), %r8
+ xorq %r15, %rcx
+ vpsllq $3, %ymm14, %ymm11
+ xorq %r13, %rax
+ andq %r13, %rcx
+ vpor %ymm9, %ymm8, %ymm8
+ rorq $4, %rax
+ xorq %r15, %rcx
+ vpor %ymm11, %ymm10, %ymm10
+ xorq %r13, %rax
+ addq %rcx, %r8
+ vpxor %ymm10, %ymm8, %ymm8
+ rorq $14, %rax
+ xorq %r10, %rbx
+ vpsrlq $6, %ymm14, %ymm11
+ addq %rax, %r8
+ movq %r9, %rcx
+ vpxor %ymm11, %ymm8, %ymm8
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ vpaddq %ymm3, %ymm8, %ymm3
+ xorq %r9, %rcx
+ xorq %r10, %rdx
+ rorq $6, %rcx
+ addq %r8, %r12
+ xorq %r9, %rcx
+ addq %rdx, %r8
+ rorq $28, %rcx
+ movq %r12, %rax
+ addq %rcx, %r8
+ vpaddq (%rsi), %ymm0, %ymm8
+ vpaddq 32(%rsi), %ymm1, %ymm9
+ vmovdqu %ymm8, (%rsp)
+ vmovdqu %ymm9, 32(%rsp)
+ vpaddq 64(%rsi), %ymm2, %ymm8
+ vpaddq 96(%rsi), %ymm3, %ymm9
+ vmovdqu %ymm8, 64(%rsp)
+ vmovdqu %ymm9, 96(%rsp)
+ subl $0x01, 128(%rsp)
+ jne L_sha256_avx2_start
+ rorq $23, %rax
+ movq %r8, %rdx
+ movq %r13, %rcx
+ addq (%rsp), %r15
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ andq %r12, %rcx
+ rorq $4, %rax
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ addq %rcx, %r15
+ rorq $14, %rax
+ xorq %r9, %rdx
+ addq %rax, %r15
+ movq %r8, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r8, %rcx
+ xorq %r9, %rbx
+ rorq $6, %rcx
+ addq %r15, %r11
+ xorq %r8, %rcx
+ addq %rbx, %r15
+ rorq $28, %rcx
+ movq %r11, %rax
+ addq %rcx, %r15
+ rorq $23, %rax
+ movq %r15, %rbx
+ movq %r12, %rcx
+ addq 8(%rsp), %r14
+ xorq %r13, %rcx
+ xorq %r11, %rax
+ andq %r11, %rcx
+ rorq $4, %rax
+ xorq %r13, %rcx
+ xorq %r11, %rax
+ addq %rcx, %r14
+ rorq $14, %rax
+ xorq %r8, %rbx
+ addq %rax, %r14
+ movq %r15, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r15, %rcx
+ xorq %r8, %rdx
+ rorq $6, %rcx
+ addq %r14, %r10
+ xorq %r15, %rcx
+ addq %rdx, %r14
+ rorq $28, %rcx
+ movq %r10, %rax
+ addq %rcx, %r14
+ rorq $23, %rax
+ movq %r14, %rdx
+ movq %r11, %rcx
+ addq 16(%rsp), %r13
+ xorq %r12, %rcx
+ xorq %r10, %rax
+ andq %r10, %rcx
+ rorq $4, %rax
+ xorq %r12, %rcx
+ xorq %r10, %rax
+ addq %rcx, %r13
+ rorq $14, %rax
+ xorq %r15, %rdx
+ addq %rax, %r13
+ movq %r14, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r14, %rcx
+ xorq %r15, %rbx
+ rorq $6, %rcx
+ addq %r13, %r9
+ xorq %r14, %rcx
+ addq %rbx, %r13
+ rorq $28, %rcx
+ movq %r9, %rax
+ addq %rcx, %r13
+ rorq $23, %rax
+ movq %r13, %rbx
+ movq %r10, %rcx
+ addq 24(%rsp), %r12
+ xorq %r11, %rcx
+ xorq %r9, %rax
+ andq %r9, %rcx
+ rorq $4, %rax
+ xorq %r11, %rcx
+ xorq %r9, %rax
+ addq %rcx, %r12
+ rorq $14, %rax
+ xorq %r14, %rbx
+ addq %rax, %r12
+ movq %r13, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r13, %rcx
+ xorq %r14, %rdx
+ rorq $6, %rcx
+ addq %r12, %r8
+ xorq %r13, %rcx
+ addq %rdx, %r12
+ rorq $28, %rcx
+ movq %r8, %rax
+ addq %rcx, %r12
+ rorq $23, %rax
+ movq %r12, %rdx
+ movq %r9, %rcx
+ addq 32(%rsp), %r11
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ andq %r8, %rcx
+ rorq $4, %rax
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ addq %rcx, %r11
+ rorq $14, %rax
+ xorq %r13, %rdx
+ addq %rax, %r11
+ movq %r12, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r12, %rcx
+ xorq %r13, %rbx
+ rorq $6, %rcx
+ addq %r11, %r15
+ xorq %r12, %rcx
+ addq %rbx, %r11
+ rorq $28, %rcx
+ movq %r15, %rax
+ addq %rcx, %r11
+ rorq $23, %rax
+ movq %r11, %rbx
+ movq %r8, %rcx
+ addq 40(%rsp), %r10
+ xorq %r9, %rcx
+ xorq %r15, %rax
+ andq %r15, %rcx
+ rorq $4, %rax
+ xorq %r9, %rcx
+ xorq %r15, %rax
+ addq %rcx, %r10
+ rorq $14, %rax
+ xorq %r12, %rbx
+ addq %rax, %r10
+ movq %r11, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r11, %rcx
+ xorq %r12, %rdx
+ rorq $6, %rcx
+ addq %r10, %r14
+ xorq %r11, %rcx
+ addq %rdx, %r10
+ rorq $28, %rcx
+ movq %r14, %rax
+ addq %rcx, %r10
+ rorq $23, %rax
+ movq %r10, %rdx
+ movq %r15, %rcx
+ addq 48(%rsp), %r9
+ xorq %r8, %rcx
+ xorq %r14, %rax
+ andq %r14, %rcx
+ rorq $4, %rax
+ xorq %r8, %rcx
+ xorq %r14, %rax
+ addq %rcx, %r9
+ rorq $14, %rax
+ xorq %r11, %rdx
+ addq %rax, %r9
+ movq %r10, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r10, %rcx
+ xorq %r11, %rbx
+ rorq $6, %rcx
+ addq %r9, %r13
+ xorq %r10, %rcx
+ addq %rbx, %r9
+ rorq $28, %rcx
+ movq %r13, %rax
+ addq %rcx, %r9
+ rorq $23, %rax
+ movq %r9, %rbx
+ movq %r14, %rcx
+ addq 56(%rsp), %r8
+ xorq %r15, %rcx
+ xorq %r13, %rax
+ andq %r13, %rcx
+ rorq $4, %rax
+ xorq %r15, %rcx
+ xorq %r13, %rax
+ addq %rcx, %r8
+ rorq $14, %rax
+ xorq %r10, %rbx
+ addq %rax, %r8
+ movq %r9, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r9, %rcx
+ xorq %r10, %rdx
+ rorq $6, %rcx
+ addq %r8, %r12
+ xorq %r9, %rcx
+ addq %rdx, %r8
+ rorq $28, %rcx
+ movq %r12, %rax
+ addq %rcx, %r8
+ rorq $23, %rax
+ movq %r8, %rdx
+ movq %r13, %rcx
+ addq 64(%rsp), %r15
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ andq %r12, %rcx
+ rorq $4, %rax
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ addq %rcx, %r15
+ rorq $14, %rax
+ xorq %r9, %rdx
+ addq %rax, %r15
+ movq %r8, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r8, %rcx
+ xorq %r9, %rbx
+ rorq $6, %rcx
+ addq %r15, %r11
+ xorq %r8, %rcx
+ addq %rbx, %r15
+ rorq $28, %rcx
+ movq %r11, %rax
+ addq %rcx, %r15
+ rorq $23, %rax
+ movq %r15, %rbx
+ movq %r12, %rcx
+ addq 72(%rsp), %r14
+ xorq %r13, %rcx
+ xorq %r11, %rax
+ andq %r11, %rcx
+ rorq $4, %rax
+ xorq %r13, %rcx
+ xorq %r11, %rax
+ addq %rcx, %r14
+ rorq $14, %rax
+ xorq %r8, %rbx
+ addq %rax, %r14
+ movq %r15, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r15, %rcx
+ xorq %r8, %rdx
+ rorq $6, %rcx
+ addq %r14, %r10
+ xorq %r15, %rcx
+ addq %rdx, %r14
+ rorq $28, %rcx
+ movq %r10, %rax
+ addq %rcx, %r14
+ rorq $23, %rax
+ movq %r14, %rdx
+ movq %r11, %rcx
+ addq 80(%rsp), %r13
+ xorq %r12, %rcx
+ xorq %r10, %rax
+ andq %r10, %rcx
+ rorq $4, %rax
+ xorq %r12, %rcx
+ xorq %r10, %rax
+ addq %rcx, %r13
+ rorq $14, %rax
+ xorq %r15, %rdx
+ addq %rax, %r13
+ movq %r14, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r14, %rcx
+ xorq %r15, %rbx
+ rorq $6, %rcx
+ addq %r13, %r9
+ xorq %r14, %rcx
+ addq %rbx, %r13
+ rorq $28, %rcx
+ movq %r9, %rax
+ addq %rcx, %r13
+ rorq $23, %rax
+ movq %r13, %rbx
+ movq %r10, %rcx
+ addq 88(%rsp), %r12
+ xorq %r11, %rcx
+ xorq %r9, %rax
+ andq %r9, %rcx
+ rorq $4, %rax
+ xorq %r11, %rcx
+ xorq %r9, %rax
+ addq %rcx, %r12
+ rorq $14, %rax
+ xorq %r14, %rbx
+ addq %rax, %r12
+ movq %r13, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r13, %rcx
+ xorq %r14, %rdx
+ rorq $6, %rcx
+ addq %r12, %r8
+ xorq %r13, %rcx
+ addq %rdx, %r12
+ rorq $28, %rcx
+ movq %r8, %rax
+ addq %rcx, %r12
+ rorq $23, %rax
+ movq %r12, %rdx
+ movq %r9, %rcx
+ addq 96(%rsp), %r11
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ andq %r8, %rcx
+ rorq $4, %rax
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ addq %rcx, %r11
+ rorq $14, %rax
+ xorq %r13, %rdx
+ addq %rax, %r11
+ movq %r12, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r12, %rcx
+ xorq %r13, %rbx
+ rorq $6, %rcx
+ addq %r11, %r15
+ xorq %r12, %rcx
+ addq %rbx, %r11
+ rorq $28, %rcx
+ movq %r15, %rax
+ addq %rcx, %r11
+ rorq $23, %rax
+ movq %r11, %rbx
+ movq %r8, %rcx
+ addq 104(%rsp), %r10
+ xorq %r9, %rcx
+ xorq %r15, %rax
+ andq %r15, %rcx
+ rorq $4, %rax
+ xorq %r9, %rcx
+ xorq %r15, %rax
+ addq %rcx, %r10
+ rorq $14, %rax
+ xorq %r12, %rbx
+ addq %rax, %r10
+ movq %r11, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r11, %rcx
+ xorq %r12, %rdx
+ rorq $6, %rcx
+ addq %r10, %r14
+ xorq %r11, %rcx
+ addq %rdx, %r10
+ rorq $28, %rcx
+ movq %r14, %rax
+ addq %rcx, %r10
+ rorq $23, %rax
+ movq %r10, %rdx
+ movq %r15, %rcx
+ addq 112(%rsp), %r9
+ xorq %r8, %rcx
+ xorq %r14, %rax
+ andq %r14, %rcx
+ rorq $4, %rax
+ xorq %r8, %rcx
+ xorq %r14, %rax
+ addq %rcx, %r9
+ rorq $14, %rax
+ xorq %r11, %rdx
+ addq %rax, %r9
+ movq %r10, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r10, %rcx
+ xorq %r11, %rbx
+ rorq $6, %rcx
+ addq %r9, %r13
+ xorq %r10, %rcx
+ addq %rbx, %r9
+ rorq $28, %rcx
+ movq %r13, %rax
+ addq %rcx, %r9
+ rorq $23, %rax
+ movq %r9, %rbx
+ movq %r14, %rcx
+ addq 120(%rsp), %r8
+ xorq %r15, %rcx
+ xorq %r13, %rax
+ andq %r13, %rcx
+ rorq $4, %rax
+ xorq %r15, %rcx
+ xorq %r13, %rax
+ addq %rcx, %r8
+ rorq $14, %rax
+ xorq %r10, %rbx
+ addq %rax, %r8
+ movq %r9, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r9, %rcx
+ xorq %r10, %rdx
+ rorq $6, %rcx
+ addq %r8, %r12
+ xorq %r9, %rcx
+ addq %rdx, %r8
+ rorq $28, %rcx
+ movq %r12, %rax
+ addq %rcx, %r8
+ addq %r8, (%rdi)
+ addq %r9, 8(%rdi)
+ addq %r10, 16(%rdi)
+ addq %r11, 24(%rdi)
+ addq %r12, 32(%rdi)
+ addq %r13, 40(%rdi)
+ addq %r14, 48(%rdi)
+ addq %r15, 56(%rdi)
+ xorq %rax, %rax
+ vzeroupper
+ addq $0x88, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size Transform_Sha512_AVX2,.-Transform_Sha512_AVX2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl Transform_Sha512_AVX2_Len
+.type Transform_Sha512_AVX2_Len,@function
+.align 4
+Transform_Sha512_AVX2_Len:
+#else
+.section __TEXT,__text
+.globl _Transform_Sha512_AVX2_Len
+.p2align 2
+_Transform_Sha512_AVX2_Len:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rbp
+ movq %rsi, %rbp
+ testb $0x80, %bpl
+ je L_sha512_len_avx2_block
+ movq 224(%rdi), %rcx
+ vmovdqu (%rcx), %ymm0
+ vmovdqu 32(%rcx), %ymm1
+ vmovdqu 64(%rcx), %ymm2
+ vmovdqu 96(%rcx), %ymm3
+ vmovups %ymm0, 64(%rdi)
+ vmovups %ymm1, 96(%rdi)
+ vmovups %ymm2, 128(%rdi)
+ vmovups %ymm3, 160(%rdi)
+#ifndef __APPLE__
+ call Transform_Sha512_AVX2@plt
+#else
+ call _Transform_Sha512_AVX2
+#endif /* __APPLE__ */
+ addq $0x80, 224(%rdi)
+ subl $0x80, %ebp
+ jz L_sha512_len_avx2_done
+L_sha512_len_avx2_block:
+ movq 224(%rdi), %rcx
+ vmovdqa L_avx2_sha512_flip_mask(%rip), %ymm15
+ movq (%rdi), %r8
+ movq 8(%rdi), %r9
+ movq 16(%rdi), %r10
+ movq 24(%rdi), %r11
+ movq 32(%rdi), %r12
+ movq 40(%rdi), %r13
+ movq 48(%rdi), %r14
+ movq 56(%rdi), %r15
+ # Start of loop processing two blocks
+L_sha512_len_avx2_begin:
+ subq $0x540, %rsp
+ leaq L_avx2_sha512_k_2(%rip), %rsi
+ movq %r9, %rbx
+ movq %r12, %rax
+ vmovdqu (%rcx), %xmm0
+ vmovdqu 16(%rcx), %xmm1
+ vinserti128 $0x01, 128(%rcx), %ymm0, %ymm0
+ vinserti128 $0x01, 144(%rcx), %ymm1, %ymm1
+ vpshufb %ymm15, %ymm0, %ymm0
+ vpshufb %ymm15, %ymm1, %ymm1
+ vmovdqu 32(%rcx), %xmm2
+ vmovdqu 48(%rcx), %xmm3
+ vinserti128 $0x01, 160(%rcx), %ymm2, %ymm2
+ vinserti128 $0x01, 176(%rcx), %ymm3, %ymm3
+ vpshufb %ymm15, %ymm2, %ymm2
+ vpshufb %ymm15, %ymm3, %ymm3
+ vmovdqu 64(%rcx), %xmm4
+ vmovdqu 80(%rcx), %xmm5
+ vinserti128 $0x01, 192(%rcx), %ymm4, %ymm4
+ vinserti128 $0x01, 208(%rcx), %ymm5, %ymm5
+ vpshufb %ymm15, %ymm4, %ymm4
+ vpshufb %ymm15, %ymm5, %ymm5
+ vmovdqu 96(%rcx), %xmm6
+ vmovdqu 112(%rcx), %xmm7
+ vinserti128 $0x01, 224(%rcx), %ymm6, %ymm6
+ vinserti128 $0x01, 240(%rcx), %ymm7, %ymm7
+ vpshufb %ymm15, %ymm6, %ymm6
+ vpshufb %ymm15, %ymm7, %ymm7
+ xorq %r10, %rbx
+ # Start of 16 rounds
+L_sha512_len_avx2_start:
+ vpaddq (%rsi), %ymm0, %ymm8
+ vpaddq 32(%rsi), %ymm1, %ymm9
+ vmovdqu %ymm8, (%rsp)
+ vmovdqu %ymm9, 32(%rsp)
+ vpaddq 64(%rsi), %ymm2, %ymm8
+ vpaddq 96(%rsi), %ymm3, %ymm9
+ vmovdqu %ymm8, 64(%rsp)
+ vmovdqu %ymm9, 96(%rsp)
+ vpaddq 128(%rsi), %ymm4, %ymm8
+ vpaddq 160(%rsi), %ymm5, %ymm9
+ vmovdqu %ymm8, 128(%rsp)
+ vmovdqu %ymm9, 160(%rsp)
+ vpaddq 192(%rsi), %ymm6, %ymm8
+ vpaddq 224(%rsi), %ymm7, %ymm9
+ vmovdqu %ymm8, 192(%rsp)
+ vmovdqu %ymm9, 224(%rsp)
+ # msg_sched: 0-1
+ rorq $23, %rax
+ vpalignr $8, %ymm0, %ymm1, %ymm12
+ vpalignr $8, %ymm4, %ymm5, %ymm13
+ movq %r8, %rdx
+ movq %r13, %rcx
+ addq (%rsp), %r15
+ xorq %r14, %rcx
+ vpsrlq $0x01, %ymm12, %ymm8
+ vpsllq $63, %ymm12, %ymm9
+ xorq %r12, %rax
+ andq %r12, %rcx
+ rorq $4, %rax
+ xorq %r14, %rcx
+ vpsrlq $8, %ymm12, %ymm10
+ vpsllq $56, %ymm12, %ymm11
+ xorq %r12, %rax
+ addq %rcx, %r15
+ rorq $14, %rax
+ xorq %r9, %rdx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ addq %rax, %r15
+ movq %r8, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %ymm12, %ymm11
+ vpxor %ymm10, %ymm8, %ymm8
+ xorq %r8, %rcx
+ xorq %r9, %rbx
+ rorq $6, %rcx
+ addq %r15, %r11
+ vpxor %ymm11, %ymm8, %ymm8
+ vpaddq %ymm0, %ymm13, %ymm0
+ xorq %r8, %rcx
+ addq %rbx, %r15
+ rorq $28, %rcx
+ movq %r11, %rax
+ addq %rcx, %r15
+ rorq $23, %rax
+ vpaddq %ymm0, %ymm8, %ymm0
+ movq %r15, %rbx
+ movq %r12, %rcx
+ addq 8(%rsp), %r14
+ xorq %r13, %rcx
+ vpsrlq $19, %ymm7, %ymm8
+ vpsllq $45, %ymm7, %ymm9
+ xorq %r11, %rax
+ andq %r11, %rcx
+ rorq $4, %rax
+ xorq %r13, %rcx
+ vpsrlq $61, %ymm7, %ymm10
+ vpsllq $3, %ymm7, %ymm11
+ xorq %r11, %rax
+ addq %rcx, %r14
+ rorq $14, %rax
+ xorq %r8, %rbx
+ addq %rax, %r14
+ movq %r15, %rcx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r15, %rcx
+ xorq %r8, %rdx
+ vpxor %ymm10, %ymm8, %ymm8
+ vpsrlq $6, %ymm7, %ymm11
+ rorq $6, %rcx
+ addq %r14, %r10
+ xorq %r15, %rcx
+ addq %rdx, %r14
+ vpxor %ymm11, %ymm8, %ymm8
+ rorq $28, %rcx
+ movq %r10, %rax
+ addq %rcx, %r14
+ vpaddq %ymm0, %ymm8, %ymm0
+ # msg_sched done: 0-3
+ # msg_sched: 4-5
+ rorq $23, %rax
+ vpalignr $8, %ymm1, %ymm2, %ymm12
+ vpalignr $8, %ymm5, %ymm6, %ymm13
+ movq %r14, %rdx
+ movq %r11, %rcx
+ addq 32(%rsp), %r13
+ xorq %r12, %rcx
+ vpsrlq $0x01, %ymm12, %ymm8
+ vpsllq $63, %ymm12, %ymm9
+ xorq %r10, %rax
+ andq %r10, %rcx
+ rorq $4, %rax
+ xorq %r12, %rcx
+ vpsrlq $8, %ymm12, %ymm10
+ vpsllq $56, %ymm12, %ymm11
+ xorq %r10, %rax
+ addq %rcx, %r13
+ rorq $14, %rax
+ xorq %r15, %rdx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ addq %rax, %r13
+ movq %r14, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %ymm12, %ymm11
+ vpxor %ymm10, %ymm8, %ymm8
+ xorq %r14, %rcx
+ xorq %r15, %rbx
+ rorq $6, %rcx
+ addq %r13, %r9
+ vpxor %ymm11, %ymm8, %ymm8
+ vpaddq %ymm1, %ymm13, %ymm1
+ xorq %r14, %rcx
+ addq %rbx, %r13
+ rorq $28, %rcx
+ movq %r9, %rax
+ addq %rcx, %r13
+ rorq $23, %rax
+ vpaddq %ymm1, %ymm8, %ymm1
+ movq %r13, %rbx
+ movq %r10, %rcx
+ addq 40(%rsp), %r12
+ xorq %r11, %rcx
+ vpsrlq $19, %ymm0, %ymm8
+ vpsllq $45, %ymm0, %ymm9
+ xorq %r9, %rax
+ andq %r9, %rcx
+ rorq $4, %rax
+ xorq %r11, %rcx
+ vpsrlq $61, %ymm0, %ymm10
+ vpsllq $3, %ymm0, %ymm11
+ xorq %r9, %rax
+ addq %rcx, %r12
+ rorq $14, %rax
+ xorq %r14, %rbx
+ addq %rax, %r12
+ movq %r13, %rcx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r13, %rcx
+ xorq %r14, %rdx
+ vpxor %ymm10, %ymm8, %ymm8
+ vpsrlq $6, %ymm0, %ymm11
+ rorq $6, %rcx
+ addq %r12, %r8
+ xorq %r13, %rcx
+ addq %rdx, %r12
+ vpxor %ymm11, %ymm8, %ymm8
+ rorq $28, %rcx
+ movq %r8, %rax
+ addq %rcx, %r12
+ vpaddq %ymm1, %ymm8, %ymm1
+ # msg_sched done: 4-7
+ # msg_sched: 8-9
+ rorq $23, %rax
+ vpalignr $8, %ymm2, %ymm3, %ymm12
+ vpalignr $8, %ymm6, %ymm7, %ymm13
+ movq %r12, %rdx
+ movq %r9, %rcx
+ addq 64(%rsp), %r11
+ xorq %r10, %rcx
+ vpsrlq $0x01, %ymm12, %ymm8
+ vpsllq $63, %ymm12, %ymm9
+ xorq %r8, %rax
+ andq %r8, %rcx
+ rorq $4, %rax
+ xorq %r10, %rcx
+ vpsrlq $8, %ymm12, %ymm10
+ vpsllq $56, %ymm12, %ymm11
+ xorq %r8, %rax
+ addq %rcx, %r11
+ rorq $14, %rax
+ xorq %r13, %rdx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ addq %rax, %r11
+ movq %r12, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %ymm12, %ymm11
+ vpxor %ymm10, %ymm8, %ymm8
+ xorq %r12, %rcx
+ xorq %r13, %rbx
+ rorq $6, %rcx
+ addq %r11, %r15
+ vpxor %ymm11, %ymm8, %ymm8
+ vpaddq %ymm2, %ymm13, %ymm2
+ xorq %r12, %rcx
+ addq %rbx, %r11
+ rorq $28, %rcx
+ movq %r15, %rax
+ addq %rcx, %r11
+ rorq $23, %rax
+ vpaddq %ymm2, %ymm8, %ymm2
+ movq %r11, %rbx
+ movq %r8, %rcx
+ addq 72(%rsp), %r10
+ xorq %r9, %rcx
+ vpsrlq $19, %ymm1, %ymm8
+ vpsllq $45, %ymm1, %ymm9
+ xorq %r15, %rax
+ andq %r15, %rcx
+ rorq $4, %rax
+ xorq %r9, %rcx
+ vpsrlq $61, %ymm1, %ymm10
+ vpsllq $3, %ymm1, %ymm11
+ xorq %r15, %rax
+ addq %rcx, %r10
+ rorq $14, %rax
+ xorq %r12, %rbx
+ addq %rax, %r10
+ movq %r11, %rcx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r11, %rcx
+ xorq %r12, %rdx
+ vpxor %ymm10, %ymm8, %ymm8
+ vpsrlq $6, %ymm1, %ymm11
+ rorq $6, %rcx
+ addq %r10, %r14
+ xorq %r11, %rcx
+ addq %rdx, %r10
+ vpxor %ymm11, %ymm8, %ymm8
+ rorq $28, %rcx
+ movq %r14, %rax
+ addq %rcx, %r10
+ vpaddq %ymm2, %ymm8, %ymm2
+ # msg_sched done: 8-11
+ # msg_sched: 12-13
+ rorq $23, %rax
+ vpalignr $8, %ymm3, %ymm4, %ymm12
+ vpalignr $8, %ymm7, %ymm0, %ymm13
+ movq %r10, %rdx
+ movq %r15, %rcx
+ addq 96(%rsp), %r9
+ xorq %r8, %rcx
+ vpsrlq $0x01, %ymm12, %ymm8
+ vpsllq $63, %ymm12, %ymm9
+ xorq %r14, %rax
+ andq %r14, %rcx
+ rorq $4, %rax
+ xorq %r8, %rcx
+ vpsrlq $8, %ymm12, %ymm10
+ vpsllq $56, %ymm12, %ymm11
+ xorq %r14, %rax
+ addq %rcx, %r9
+ rorq $14, %rax
+ xorq %r11, %rdx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ addq %rax, %r9
+ movq %r10, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %ymm12, %ymm11
+ vpxor %ymm10, %ymm8, %ymm8
+ xorq %r10, %rcx
+ xorq %r11, %rbx
+ rorq $6, %rcx
+ addq %r9, %r13
+ vpxor %ymm11, %ymm8, %ymm8
+ vpaddq %ymm3, %ymm13, %ymm3
+ xorq %r10, %rcx
+ addq %rbx, %r9
+ rorq $28, %rcx
+ movq %r13, %rax
+ addq %rcx, %r9
+ rorq $23, %rax
+ vpaddq %ymm3, %ymm8, %ymm3
+ movq %r9, %rbx
+ movq %r14, %rcx
+ addq 104(%rsp), %r8
+ xorq %r15, %rcx
+ vpsrlq $19, %ymm2, %ymm8
+ vpsllq $45, %ymm2, %ymm9
+ xorq %r13, %rax
+ andq %r13, %rcx
+ rorq $4, %rax
+ xorq %r15, %rcx
+ vpsrlq $61, %ymm2, %ymm10
+ vpsllq $3, %ymm2, %ymm11
+ xorq %r13, %rax
+ addq %rcx, %r8
+ rorq $14, %rax
+ xorq %r10, %rbx
+ addq %rax, %r8
+ movq %r9, %rcx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r9, %rcx
+ xorq %r10, %rdx
+ vpxor %ymm10, %ymm8, %ymm8
+ vpsrlq $6, %ymm2, %ymm11
+ rorq $6, %rcx
+ addq %r8, %r12
+ xorq %r9, %rcx
+ addq %rdx, %r8
+ vpxor %ymm11, %ymm8, %ymm8
+ rorq $28, %rcx
+ movq %r12, %rax
+ addq %rcx, %r8
+ vpaddq %ymm3, %ymm8, %ymm3
+ # msg_sched done: 12-15
+ # msg_sched: 16-17
+ rorq $23, %rax
+ vpalignr $8, %ymm4, %ymm5, %ymm12
+ vpalignr $8, %ymm0, %ymm1, %ymm13
+ movq %r8, %rdx
+ movq %r13, %rcx
+ addq 128(%rsp), %r15
+ xorq %r14, %rcx
+ vpsrlq $0x01, %ymm12, %ymm8
+ vpsllq $63, %ymm12, %ymm9
+ xorq %r12, %rax
+ andq %r12, %rcx
+ rorq $4, %rax
+ xorq %r14, %rcx
+ vpsrlq $8, %ymm12, %ymm10
+ vpsllq $56, %ymm12, %ymm11
+ xorq %r12, %rax
+ addq %rcx, %r15
+ rorq $14, %rax
+ xorq %r9, %rdx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ addq %rax, %r15
+ movq %r8, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %ymm12, %ymm11
+ vpxor %ymm10, %ymm8, %ymm8
+ xorq %r8, %rcx
+ xorq %r9, %rbx
+ rorq $6, %rcx
+ addq %r15, %r11
+ vpxor %ymm11, %ymm8, %ymm8
+ vpaddq %ymm4, %ymm13, %ymm4
+ xorq %r8, %rcx
+ addq %rbx, %r15
+ rorq $28, %rcx
+ movq %r11, %rax
+ addq %rcx, %r15
+ rorq $23, %rax
+ vpaddq %ymm4, %ymm8, %ymm4
+ movq %r15, %rbx
+ movq %r12, %rcx
+ addq 136(%rsp), %r14
+ xorq %r13, %rcx
+ vpsrlq $19, %ymm3, %ymm8
+ vpsllq $45, %ymm3, %ymm9
+ xorq %r11, %rax
+ andq %r11, %rcx
+ rorq $4, %rax
+ xorq %r13, %rcx
+ vpsrlq $61, %ymm3, %ymm10
+ vpsllq $3, %ymm3, %ymm11
+ xorq %r11, %rax
+ addq %rcx, %r14
+ rorq $14, %rax
+ xorq %r8, %rbx
+ addq %rax, %r14
+ movq %r15, %rcx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r15, %rcx
+ xorq %r8, %rdx
+ vpxor %ymm10, %ymm8, %ymm8
+ vpsrlq $6, %ymm3, %ymm11
+ rorq $6, %rcx
+ addq %r14, %r10
+ xorq %r15, %rcx
+ addq %rdx, %r14
+ vpxor %ymm11, %ymm8, %ymm8
+ rorq $28, %rcx
+ movq %r10, %rax
+ addq %rcx, %r14
+ vpaddq %ymm4, %ymm8, %ymm4
+ # msg_sched done: 16-19
+ # msg_sched: 20-21
+ rorq $23, %rax
+ vpalignr $8, %ymm5, %ymm6, %ymm12
+ vpalignr $8, %ymm1, %ymm2, %ymm13
+ movq %r14, %rdx
+ movq %r11, %rcx
+ addq 160(%rsp), %r13
+ xorq %r12, %rcx
+ vpsrlq $0x01, %ymm12, %ymm8
+ vpsllq $63, %ymm12, %ymm9
+ xorq %r10, %rax
+ andq %r10, %rcx
+ rorq $4, %rax
+ xorq %r12, %rcx
+ vpsrlq $8, %ymm12, %ymm10
+ vpsllq $56, %ymm12, %ymm11
+ xorq %r10, %rax
+ addq %rcx, %r13
+ rorq $14, %rax
+ xorq %r15, %rdx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ addq %rax, %r13
+ movq %r14, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %ymm12, %ymm11
+ vpxor %ymm10, %ymm8, %ymm8
+ xorq %r14, %rcx
+ xorq %r15, %rbx
+ rorq $6, %rcx
+ addq %r13, %r9
+ vpxor %ymm11, %ymm8, %ymm8
+ vpaddq %ymm5, %ymm13, %ymm5
+ xorq %r14, %rcx
+ addq %rbx, %r13
+ rorq $28, %rcx
+ movq %r9, %rax
+ addq %rcx, %r13
+ rorq $23, %rax
+ vpaddq %ymm5, %ymm8, %ymm5
+ movq %r13, %rbx
+ movq %r10, %rcx
+ addq 168(%rsp), %r12
+ xorq %r11, %rcx
+ vpsrlq $19, %ymm4, %ymm8
+ vpsllq $45, %ymm4, %ymm9
+ xorq %r9, %rax
+ andq %r9, %rcx
+ rorq $4, %rax
+ xorq %r11, %rcx
+ vpsrlq $61, %ymm4, %ymm10
+ vpsllq $3, %ymm4, %ymm11
+ xorq %r9, %rax
+ addq %rcx, %r12
+ rorq $14, %rax
+ xorq %r14, %rbx
+ addq %rax, %r12
+ movq %r13, %rcx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r13, %rcx
+ xorq %r14, %rdx
+ vpxor %ymm10, %ymm8, %ymm8
+ vpsrlq $6, %ymm4, %ymm11
+ rorq $6, %rcx
+ addq %r12, %r8
+ xorq %r13, %rcx
+ addq %rdx, %r12
+ vpxor %ymm11, %ymm8, %ymm8
+ rorq $28, %rcx
+ movq %r8, %rax
+ addq %rcx, %r12
+ vpaddq %ymm5, %ymm8, %ymm5
+ # msg_sched done: 20-23
+ # msg_sched: 24-25
+ rorq $23, %rax
+ vpalignr $8, %ymm6, %ymm7, %ymm12
+ vpalignr $8, %ymm2, %ymm3, %ymm13
+ movq %r12, %rdx
+ movq %r9, %rcx
+ addq 192(%rsp), %r11
+ xorq %r10, %rcx
+ vpsrlq $0x01, %ymm12, %ymm8
+ vpsllq $63, %ymm12, %ymm9
+ xorq %r8, %rax
+ andq %r8, %rcx
+ rorq $4, %rax
+ xorq %r10, %rcx
+ vpsrlq $8, %ymm12, %ymm10
+ vpsllq $56, %ymm12, %ymm11
+ xorq %r8, %rax
+ addq %rcx, %r11
+ rorq $14, %rax
+ xorq %r13, %rdx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ addq %rax, %r11
+ movq %r12, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %ymm12, %ymm11
+ vpxor %ymm10, %ymm8, %ymm8
+ xorq %r12, %rcx
+ xorq %r13, %rbx
+ rorq $6, %rcx
+ addq %r11, %r15
+ vpxor %ymm11, %ymm8, %ymm8
+ vpaddq %ymm6, %ymm13, %ymm6
+ xorq %r12, %rcx
+ addq %rbx, %r11
+ rorq $28, %rcx
+ movq %r15, %rax
+ addq %rcx, %r11
+ rorq $23, %rax
+ vpaddq %ymm6, %ymm8, %ymm6
+ movq %r11, %rbx
+ movq %r8, %rcx
+ addq 200(%rsp), %r10
+ xorq %r9, %rcx
+ vpsrlq $19, %ymm5, %ymm8
+ vpsllq $45, %ymm5, %ymm9
+ xorq %r15, %rax
+ andq %r15, %rcx
+ rorq $4, %rax
+ xorq %r9, %rcx
+ vpsrlq $61, %ymm5, %ymm10
+ vpsllq $3, %ymm5, %ymm11
+ xorq %r15, %rax
+ addq %rcx, %r10
+ rorq $14, %rax
+ xorq %r12, %rbx
+ addq %rax, %r10
+ movq %r11, %rcx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r11, %rcx
+ xorq %r12, %rdx
+ vpxor %ymm10, %ymm8, %ymm8
+ vpsrlq $6, %ymm5, %ymm11
+ rorq $6, %rcx
+ addq %r10, %r14
+ xorq %r11, %rcx
+ addq %rdx, %r10
+ vpxor %ymm11, %ymm8, %ymm8
+ rorq $28, %rcx
+ movq %r14, %rax
+ addq %rcx, %r10
+ vpaddq %ymm6, %ymm8, %ymm6
+ # msg_sched done: 24-27
+ # msg_sched: 28-29
+ rorq $23, %rax
+ vpalignr $8, %ymm7, %ymm0, %ymm12
+ vpalignr $8, %ymm3, %ymm4, %ymm13
+ movq %r10, %rdx
+ movq %r15, %rcx
+ addq 224(%rsp), %r9
+ xorq %r8, %rcx
+ vpsrlq $0x01, %ymm12, %ymm8
+ vpsllq $63, %ymm12, %ymm9
+ xorq %r14, %rax
+ andq %r14, %rcx
+ rorq $4, %rax
+ xorq %r8, %rcx
+ vpsrlq $8, %ymm12, %ymm10
+ vpsllq $56, %ymm12, %ymm11
+ xorq %r14, %rax
+ addq %rcx, %r9
+ rorq $14, %rax
+ xorq %r11, %rdx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ addq %rax, %r9
+ movq %r10, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ vpsrlq $7, %ymm12, %ymm11
+ vpxor %ymm10, %ymm8, %ymm8
+ xorq %r10, %rcx
+ xorq %r11, %rbx
+ rorq $6, %rcx
+ addq %r9, %r13
+ vpxor %ymm11, %ymm8, %ymm8
+ vpaddq %ymm7, %ymm13, %ymm7
+ xorq %r10, %rcx
+ addq %rbx, %r9
+ rorq $28, %rcx
+ movq %r13, %rax
+ addq %rcx, %r9
+ rorq $23, %rax
+ vpaddq %ymm7, %ymm8, %ymm7
+ movq %r9, %rbx
+ movq %r14, %rcx
+ addq 232(%rsp), %r8
+ xorq %r15, %rcx
+ vpsrlq $19, %ymm6, %ymm8
+ vpsllq $45, %ymm6, %ymm9
+ xorq %r13, %rax
+ andq %r13, %rcx
+ rorq $4, %rax
+ xorq %r15, %rcx
+ vpsrlq $61, %ymm6, %ymm10
+ vpsllq $3, %ymm6, %ymm11
+ xorq %r13, %rax
+ addq %rcx, %r8
+ rorq $14, %rax
+ xorq %r10, %rbx
+ addq %rax, %r8
+ movq %r9, %rcx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r9, %rcx
+ xorq %r10, %rdx
+ vpxor %ymm10, %ymm8, %ymm8
+ vpsrlq $6, %ymm6, %ymm11
+ rorq $6, %rcx
+ addq %r8, %r12
+ xorq %r9, %rcx
+ addq %rdx, %r8
+ vpxor %ymm11, %ymm8, %ymm8
+ rorq $28, %rcx
+ movq %r12, %rax
+ addq %rcx, %r8
+ vpaddq %ymm7, %ymm8, %ymm7
+ # msg_sched done: 28-31
+ addq $0x100, %rsi
+ addq $0x100, %rsp
+ cmpq L_avx2_sha512_k_2_end(%rip), %rsi
+ jne L_sha512_len_avx2_start
+ vpaddq (%rsi), %ymm0, %ymm8
+ vpaddq 32(%rsi), %ymm1, %ymm9
+ vmovdqu %ymm8, (%rsp)
+ vmovdqu %ymm9, 32(%rsp)
+ vpaddq 64(%rsi), %ymm2, %ymm8
+ vpaddq 96(%rsi), %ymm3, %ymm9
+ vmovdqu %ymm8, 64(%rsp)
+ vmovdqu %ymm9, 96(%rsp)
+ vpaddq 128(%rsi), %ymm4, %ymm8
+ vpaddq 160(%rsi), %ymm5, %ymm9
+ vmovdqu %ymm8, 128(%rsp)
+ vmovdqu %ymm9, 160(%rsp)
+ vpaddq 192(%rsi), %ymm6, %ymm8
+ vpaddq 224(%rsi), %ymm7, %ymm9
+ vmovdqu %ymm8, 192(%rsp)
+ vmovdqu %ymm9, 224(%rsp)
+ rorq $23, %rax
+ movq %r8, %rdx
+ movq %r13, %rcx
+ addq (%rsp), %r15
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ andq %r12, %rcx
+ rorq $4, %rax
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ addq %rcx, %r15
+ rorq $14, %rax
+ xorq %r9, %rdx
+ addq %rax, %r15
+ movq %r8, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r8, %rcx
+ xorq %r9, %rbx
+ rorq $6, %rcx
+ addq %r15, %r11
+ xorq %r8, %rcx
+ addq %rbx, %r15
+ rorq $28, %rcx
+ movq %r11, %rax
+ addq %rcx, %r15
+ rorq $23, %rax
+ movq %r15, %rbx
+ movq %r12, %rcx
+ addq 8(%rsp), %r14
+ xorq %r13, %rcx
+ xorq %r11, %rax
+ andq %r11, %rcx
+ rorq $4, %rax
+ xorq %r13, %rcx
+ xorq %r11, %rax
+ addq %rcx, %r14
+ rorq $14, %rax
+ xorq %r8, %rbx
+ addq %rax, %r14
+ movq %r15, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r15, %rcx
+ xorq %r8, %rdx
+ rorq $6, %rcx
+ addq %r14, %r10
+ xorq %r15, %rcx
+ addq %rdx, %r14
+ rorq $28, %rcx
+ movq %r10, %rax
+ addq %rcx, %r14
+ rorq $23, %rax
+ movq %r14, %rdx
+ movq %r11, %rcx
+ addq 32(%rsp), %r13
+ xorq %r12, %rcx
+ xorq %r10, %rax
+ andq %r10, %rcx
+ rorq $4, %rax
+ xorq %r12, %rcx
+ xorq %r10, %rax
+ addq %rcx, %r13
+ rorq $14, %rax
+ xorq %r15, %rdx
+ addq %rax, %r13
+ movq %r14, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r14, %rcx
+ xorq %r15, %rbx
+ rorq $6, %rcx
+ addq %r13, %r9
+ xorq %r14, %rcx
+ addq %rbx, %r13
+ rorq $28, %rcx
+ movq %r9, %rax
+ addq %rcx, %r13
+ rorq $23, %rax
+ movq %r13, %rbx
+ movq %r10, %rcx
+ addq 40(%rsp), %r12
+ xorq %r11, %rcx
+ xorq %r9, %rax
+ andq %r9, %rcx
+ rorq $4, %rax
+ xorq %r11, %rcx
+ xorq %r9, %rax
+ addq %rcx, %r12
+ rorq $14, %rax
+ xorq %r14, %rbx
+ addq %rax, %r12
+ movq %r13, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r13, %rcx
+ xorq %r14, %rdx
+ rorq $6, %rcx
+ addq %r12, %r8
+ xorq %r13, %rcx
+ addq %rdx, %r12
+ rorq $28, %rcx
+ movq %r8, %rax
+ addq %rcx, %r12
+ rorq $23, %rax
+ movq %r12, %rdx
+ movq %r9, %rcx
+ addq 64(%rsp), %r11
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ andq %r8, %rcx
+ rorq $4, %rax
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ addq %rcx, %r11
+ rorq $14, %rax
+ xorq %r13, %rdx
+ addq %rax, %r11
+ movq %r12, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r12, %rcx
+ xorq %r13, %rbx
+ rorq $6, %rcx
+ addq %r11, %r15
+ xorq %r12, %rcx
+ addq %rbx, %r11
+ rorq $28, %rcx
+ movq %r15, %rax
+ addq %rcx, %r11
+ rorq $23, %rax
+ movq %r11, %rbx
+ movq %r8, %rcx
+ addq 72(%rsp), %r10
+ xorq %r9, %rcx
+ xorq %r15, %rax
+ andq %r15, %rcx
+ rorq $4, %rax
+ xorq %r9, %rcx
+ xorq %r15, %rax
+ addq %rcx, %r10
+ rorq $14, %rax
+ xorq %r12, %rbx
+ addq %rax, %r10
+ movq %r11, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r11, %rcx
+ xorq %r12, %rdx
+ rorq $6, %rcx
+ addq %r10, %r14
+ xorq %r11, %rcx
+ addq %rdx, %r10
+ rorq $28, %rcx
+ movq %r14, %rax
+ addq %rcx, %r10
+ rorq $23, %rax
+ movq %r10, %rdx
+ movq %r15, %rcx
+ addq 96(%rsp), %r9
+ xorq %r8, %rcx
+ xorq %r14, %rax
+ andq %r14, %rcx
+ rorq $4, %rax
+ xorq %r8, %rcx
+ xorq %r14, %rax
+ addq %rcx, %r9
+ rorq $14, %rax
+ xorq %r11, %rdx
+ addq %rax, %r9
+ movq %r10, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r10, %rcx
+ xorq %r11, %rbx
+ rorq $6, %rcx
+ addq %r9, %r13
+ xorq %r10, %rcx
+ addq %rbx, %r9
+ rorq $28, %rcx
+ movq %r13, %rax
+ addq %rcx, %r9
+ rorq $23, %rax
+ movq %r9, %rbx
+ movq %r14, %rcx
+ addq 104(%rsp), %r8
+ xorq %r15, %rcx
+ xorq %r13, %rax
+ andq %r13, %rcx
+ rorq $4, %rax
+ xorq %r15, %rcx
+ xorq %r13, %rax
+ addq %rcx, %r8
+ rorq $14, %rax
+ xorq %r10, %rbx
+ addq %rax, %r8
+ movq %r9, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r9, %rcx
+ xorq %r10, %rdx
+ rorq $6, %rcx
+ addq %r8, %r12
+ xorq %r9, %rcx
+ addq %rdx, %r8
+ rorq $28, %rcx
+ movq %r12, %rax
+ addq %rcx, %r8
+ rorq $23, %rax
+ movq %r8, %rdx
+ movq %r13, %rcx
+ addq 128(%rsp), %r15
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ andq %r12, %rcx
+ rorq $4, %rax
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ addq %rcx, %r15
+ rorq $14, %rax
+ xorq %r9, %rdx
+ addq %rax, %r15
+ movq %r8, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r8, %rcx
+ xorq %r9, %rbx
+ rorq $6, %rcx
+ addq %r15, %r11
+ xorq %r8, %rcx
+ addq %rbx, %r15
+ rorq $28, %rcx
+ movq %r11, %rax
+ addq %rcx, %r15
+ rorq $23, %rax
+ movq %r15, %rbx
+ movq %r12, %rcx
+ addq 136(%rsp), %r14
+ xorq %r13, %rcx
+ xorq %r11, %rax
+ andq %r11, %rcx
+ rorq $4, %rax
+ xorq %r13, %rcx
+ xorq %r11, %rax
+ addq %rcx, %r14
+ rorq $14, %rax
+ xorq %r8, %rbx
+ addq %rax, %r14
+ movq %r15, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r15, %rcx
+ xorq %r8, %rdx
+ rorq $6, %rcx
+ addq %r14, %r10
+ xorq %r15, %rcx
+ addq %rdx, %r14
+ rorq $28, %rcx
+ movq %r10, %rax
+ addq %rcx, %r14
+ rorq $23, %rax
+ movq %r14, %rdx
+ movq %r11, %rcx
+ addq 160(%rsp), %r13
+ xorq %r12, %rcx
+ xorq %r10, %rax
+ andq %r10, %rcx
+ rorq $4, %rax
+ xorq %r12, %rcx
+ xorq %r10, %rax
+ addq %rcx, %r13
+ rorq $14, %rax
+ xorq %r15, %rdx
+ addq %rax, %r13
+ movq %r14, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r14, %rcx
+ xorq %r15, %rbx
+ rorq $6, %rcx
+ addq %r13, %r9
+ xorq %r14, %rcx
+ addq %rbx, %r13
+ rorq $28, %rcx
+ movq %r9, %rax
+ addq %rcx, %r13
+ rorq $23, %rax
+ movq %r13, %rbx
+ movq %r10, %rcx
+ addq 168(%rsp), %r12
+ xorq %r11, %rcx
+ xorq %r9, %rax
+ andq %r9, %rcx
+ rorq $4, %rax
+ xorq %r11, %rcx
+ xorq %r9, %rax
+ addq %rcx, %r12
+ rorq $14, %rax
+ xorq %r14, %rbx
+ addq %rax, %r12
+ movq %r13, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r13, %rcx
+ xorq %r14, %rdx
+ rorq $6, %rcx
+ addq %r12, %r8
+ xorq %r13, %rcx
+ addq %rdx, %r12
+ rorq $28, %rcx
+ movq %r8, %rax
+ addq %rcx, %r12
+ rorq $23, %rax
+ movq %r12, %rdx
+ movq %r9, %rcx
+ addq 192(%rsp), %r11
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ andq %r8, %rcx
+ rorq $4, %rax
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ addq %rcx, %r11
+ rorq $14, %rax
+ xorq %r13, %rdx
+ addq %rax, %r11
+ movq %r12, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r12, %rcx
+ xorq %r13, %rbx
+ rorq $6, %rcx
+ addq %r11, %r15
+ xorq %r12, %rcx
+ addq %rbx, %r11
+ rorq $28, %rcx
+ movq %r15, %rax
+ addq %rcx, %r11
+ rorq $23, %rax
+ movq %r11, %rbx
+ movq %r8, %rcx
+ addq 200(%rsp), %r10
+ xorq %r9, %rcx
+ xorq %r15, %rax
+ andq %r15, %rcx
+ rorq $4, %rax
+ xorq %r9, %rcx
+ xorq %r15, %rax
+ addq %rcx, %r10
+ rorq $14, %rax
+ xorq %r12, %rbx
+ addq %rax, %r10
+ movq %r11, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r11, %rcx
+ xorq %r12, %rdx
+ rorq $6, %rcx
+ addq %r10, %r14
+ xorq %r11, %rcx
+ addq %rdx, %r10
+ rorq $28, %rcx
+ movq %r14, %rax
+ addq %rcx, %r10
+ rorq $23, %rax
+ movq %r10, %rdx
+ movq %r15, %rcx
+ addq 224(%rsp), %r9
+ xorq %r8, %rcx
+ xorq %r14, %rax
+ andq %r14, %rcx
+ rorq $4, %rax
+ xorq %r8, %rcx
+ xorq %r14, %rax
+ addq %rcx, %r9
+ rorq $14, %rax
+ xorq %r11, %rdx
+ addq %rax, %r9
+ movq %r10, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r10, %rcx
+ xorq %r11, %rbx
+ rorq $6, %rcx
+ addq %r9, %r13
+ xorq %r10, %rcx
+ addq %rbx, %r9
+ rorq $28, %rcx
+ movq %r13, %rax
+ addq %rcx, %r9
+ rorq $23, %rax
+ movq %r9, %rbx
+ movq %r14, %rcx
+ addq 232(%rsp), %r8
+ xorq %r15, %rcx
+ xorq %r13, %rax
+ andq %r13, %rcx
+ rorq $4, %rax
+ xorq %r15, %rcx
+ xorq %r13, %rax
+ addq %rcx, %r8
+ rorq $14, %rax
+ xorq %r10, %rbx
+ addq %rax, %r8
+ movq %r9, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r9, %rcx
+ xorq %r10, %rdx
+ rorq $6, %rcx
+ addq %r8, %r12
+ xorq %r9, %rcx
+ addq %rdx, %r8
+ rorq $28, %rcx
+ movq %r12, %rax
+ addq %rcx, %r8
+ subq $0x400, %rsp
+ addq (%rdi), %r8
+ addq 8(%rdi), %r9
+ addq 16(%rdi), %r10
+ addq 24(%rdi), %r11
+ addq 32(%rdi), %r12
+ addq 40(%rdi), %r13
+ addq 48(%rdi), %r14
+ addq 56(%rdi), %r15
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq %r12, 32(%rdi)
+ movq %r13, 40(%rdi)
+ movq %r14, 48(%rdi)
+ movq %r15, 56(%rdi)
+ movq %r9, %rbx
+ movq %r12, %rax
+ xorq %r10, %rbx
+ movq $5, %rsi
+L_sha512_len_avx2_tail:
+ rorq $23, %rax
+ movq %r8, %rdx
+ movq %r13, %rcx
+ addq 16(%rsp), %r15
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ andq %r12, %rcx
+ rorq $4, %rax
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ addq %rcx, %r15
+ rorq $14, %rax
+ xorq %r9, %rdx
+ addq %rax, %r15
+ movq %r8, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r8, %rcx
+ xorq %r9, %rbx
+ rorq $6, %rcx
+ addq %r15, %r11
+ xorq %r8, %rcx
+ addq %rbx, %r15
+ rorq $28, %rcx
+ movq %r11, %rax
+ addq %rcx, %r15
+ rorq $23, %rax
+ movq %r15, %rbx
+ movq %r12, %rcx
+ addq 24(%rsp), %r14
+ xorq %r13, %rcx
+ xorq %r11, %rax
+ andq %r11, %rcx
+ rorq $4, %rax
+ xorq %r13, %rcx
+ xorq %r11, %rax
+ addq %rcx, %r14
+ rorq $14, %rax
+ xorq %r8, %rbx
+ addq %rax, %r14
+ movq %r15, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r15, %rcx
+ xorq %r8, %rdx
+ rorq $6, %rcx
+ addq %r14, %r10
+ xorq %r15, %rcx
+ addq %rdx, %r14
+ rorq $28, %rcx
+ movq %r10, %rax
+ addq %rcx, %r14
+ rorq $23, %rax
+ movq %r14, %rdx
+ movq %r11, %rcx
+ addq 48(%rsp), %r13
+ xorq %r12, %rcx
+ xorq %r10, %rax
+ andq %r10, %rcx
+ rorq $4, %rax
+ xorq %r12, %rcx
+ xorq %r10, %rax
+ addq %rcx, %r13
+ rorq $14, %rax
+ xorq %r15, %rdx
+ addq %rax, %r13
+ movq %r14, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r14, %rcx
+ xorq %r15, %rbx
+ rorq $6, %rcx
+ addq %r13, %r9
+ xorq %r14, %rcx
+ addq %rbx, %r13
+ rorq $28, %rcx
+ movq %r9, %rax
+ addq %rcx, %r13
+ rorq $23, %rax
+ movq %r13, %rbx
+ movq %r10, %rcx
+ addq 56(%rsp), %r12
+ xorq %r11, %rcx
+ xorq %r9, %rax
+ andq %r9, %rcx
+ rorq $4, %rax
+ xorq %r11, %rcx
+ xorq %r9, %rax
+ addq %rcx, %r12
+ rorq $14, %rax
+ xorq %r14, %rbx
+ addq %rax, %r12
+ movq %r13, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r13, %rcx
+ xorq %r14, %rdx
+ rorq $6, %rcx
+ addq %r12, %r8
+ xorq %r13, %rcx
+ addq %rdx, %r12
+ rorq $28, %rcx
+ movq %r8, %rax
+ addq %rcx, %r12
+ rorq $23, %rax
+ movq %r12, %rdx
+ movq %r9, %rcx
+ addq 80(%rsp), %r11
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ andq %r8, %rcx
+ rorq $4, %rax
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ addq %rcx, %r11
+ rorq $14, %rax
+ xorq %r13, %rdx
+ addq %rax, %r11
+ movq %r12, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r12, %rcx
+ xorq %r13, %rbx
+ rorq $6, %rcx
+ addq %r11, %r15
+ xorq %r12, %rcx
+ addq %rbx, %r11
+ rorq $28, %rcx
+ movq %r15, %rax
+ addq %rcx, %r11
+ rorq $23, %rax
+ movq %r11, %rbx
+ movq %r8, %rcx
+ addq 88(%rsp), %r10
+ xorq %r9, %rcx
+ xorq %r15, %rax
+ andq %r15, %rcx
+ rorq $4, %rax
+ xorq %r9, %rcx
+ xorq %r15, %rax
+ addq %rcx, %r10
+ rorq $14, %rax
+ xorq %r12, %rbx
+ addq %rax, %r10
+ movq %r11, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r11, %rcx
+ xorq %r12, %rdx
+ rorq $6, %rcx
+ addq %r10, %r14
+ xorq %r11, %rcx
+ addq %rdx, %r10
+ rorq $28, %rcx
+ movq %r14, %rax
+ addq %rcx, %r10
+ rorq $23, %rax
+ movq %r10, %rdx
+ movq %r15, %rcx
+ addq 112(%rsp), %r9
+ xorq %r8, %rcx
+ xorq %r14, %rax
+ andq %r14, %rcx
+ rorq $4, %rax
+ xorq %r8, %rcx
+ xorq %r14, %rax
+ addq %rcx, %r9
+ rorq $14, %rax
+ xorq %r11, %rdx
+ addq %rax, %r9
+ movq %r10, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r10, %rcx
+ xorq %r11, %rbx
+ rorq $6, %rcx
+ addq %r9, %r13
+ xorq %r10, %rcx
+ addq %rbx, %r9
+ rorq $28, %rcx
+ movq %r13, %rax
+ addq %rcx, %r9
+ rorq $23, %rax
+ movq %r9, %rbx
+ movq %r14, %rcx
+ addq 120(%rsp), %r8
+ xorq %r15, %rcx
+ xorq %r13, %rax
+ andq %r13, %rcx
+ rorq $4, %rax
+ xorq %r15, %rcx
+ xorq %r13, %rax
+ addq %rcx, %r8
+ rorq $14, %rax
+ xorq %r10, %rbx
+ addq %rax, %r8
+ movq %r9, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r9, %rcx
+ xorq %r10, %rdx
+ rorq $6, %rcx
+ addq %r8, %r12
+ xorq %r9, %rcx
+ addq %rdx, %r8
+ rorq $28, %rcx
+ movq %r12, %rax
+ addq %rcx, %r8
+ rorq $23, %rax
+ movq %r8, %rdx
+ movq %r13, %rcx
+ addq 144(%rsp), %r15
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ andq %r12, %rcx
+ rorq $4, %rax
+ xorq %r14, %rcx
+ xorq %r12, %rax
+ addq %rcx, %r15
+ rorq $14, %rax
+ xorq %r9, %rdx
+ addq %rax, %r15
+ movq %r8, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r8, %rcx
+ xorq %r9, %rbx
+ rorq $6, %rcx
+ addq %r15, %r11
+ xorq %r8, %rcx
+ addq %rbx, %r15
+ rorq $28, %rcx
+ movq %r11, %rax
+ addq %rcx, %r15
+ rorq $23, %rax
+ movq %r15, %rbx
+ movq %r12, %rcx
+ addq 152(%rsp), %r14
+ xorq %r13, %rcx
+ xorq %r11, %rax
+ andq %r11, %rcx
+ rorq $4, %rax
+ xorq %r13, %rcx
+ xorq %r11, %rax
+ addq %rcx, %r14
+ rorq $14, %rax
+ xorq %r8, %rbx
+ addq %rax, %r14
+ movq %r15, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r15, %rcx
+ xorq %r8, %rdx
+ rorq $6, %rcx
+ addq %r14, %r10
+ xorq %r15, %rcx
+ addq %rdx, %r14
+ rorq $28, %rcx
+ movq %r10, %rax
+ addq %rcx, %r14
+ rorq $23, %rax
+ movq %r14, %rdx
+ movq %r11, %rcx
+ addq 176(%rsp), %r13
+ xorq %r12, %rcx
+ xorq %r10, %rax
+ andq %r10, %rcx
+ rorq $4, %rax
+ xorq %r12, %rcx
+ xorq %r10, %rax
+ addq %rcx, %r13
+ rorq $14, %rax
+ xorq %r15, %rdx
+ addq %rax, %r13
+ movq %r14, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r14, %rcx
+ xorq %r15, %rbx
+ rorq $6, %rcx
+ addq %r13, %r9
+ xorq %r14, %rcx
+ addq %rbx, %r13
+ rorq $28, %rcx
+ movq %r9, %rax
+ addq %rcx, %r13
+ rorq $23, %rax
+ movq %r13, %rbx
+ movq %r10, %rcx
+ addq 184(%rsp), %r12
+ xorq %r11, %rcx
+ xorq %r9, %rax
+ andq %r9, %rcx
+ rorq $4, %rax
+ xorq %r11, %rcx
+ xorq %r9, %rax
+ addq %rcx, %r12
+ rorq $14, %rax
+ xorq %r14, %rbx
+ addq %rax, %r12
+ movq %r13, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r13, %rcx
+ xorq %r14, %rdx
+ rorq $6, %rcx
+ addq %r12, %r8
+ xorq %r13, %rcx
+ addq %rdx, %r12
+ rorq $28, %rcx
+ movq %r8, %rax
+ addq %rcx, %r12
+ rorq $23, %rax
+ movq %r12, %rdx
+ movq %r9, %rcx
+ addq 208(%rsp), %r11
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ andq %r8, %rcx
+ rorq $4, %rax
+ xorq %r10, %rcx
+ xorq %r8, %rax
+ addq %rcx, %r11
+ rorq $14, %rax
+ xorq %r13, %rdx
+ addq %rax, %r11
+ movq %r12, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r12, %rcx
+ xorq %r13, %rbx
+ rorq $6, %rcx
+ addq %r11, %r15
+ xorq %r12, %rcx
+ addq %rbx, %r11
+ rorq $28, %rcx
+ movq %r15, %rax
+ addq %rcx, %r11
+ rorq $23, %rax
+ movq %r11, %rbx
+ movq %r8, %rcx
+ addq 216(%rsp), %r10
+ xorq %r9, %rcx
+ xorq %r15, %rax
+ andq %r15, %rcx
+ rorq $4, %rax
+ xorq %r9, %rcx
+ xorq %r15, %rax
+ addq %rcx, %r10
+ rorq $14, %rax
+ xorq %r12, %rbx
+ addq %rax, %r10
+ movq %r11, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r11, %rcx
+ xorq %r12, %rdx
+ rorq $6, %rcx
+ addq %r10, %r14
+ xorq %r11, %rcx
+ addq %rdx, %r10
+ rorq $28, %rcx
+ movq %r14, %rax
+ addq %rcx, %r10
+ rorq $23, %rax
+ movq %r10, %rdx
+ movq %r15, %rcx
+ addq 240(%rsp), %r9
+ xorq %r8, %rcx
+ xorq %r14, %rax
+ andq %r14, %rcx
+ rorq $4, %rax
+ xorq %r8, %rcx
+ xorq %r14, %rax
+ addq %rcx, %r9
+ rorq $14, %rax
+ xorq %r11, %rdx
+ addq %rax, %r9
+ movq %r10, %rcx
+ andq %rdx, %rbx
+ rorq $5, %rcx
+ xorq %r10, %rcx
+ xorq %r11, %rbx
+ rorq $6, %rcx
+ addq %r9, %r13
+ xorq %r10, %rcx
+ addq %rbx, %r9
+ rorq $28, %rcx
+ movq %r13, %rax
+ addq %rcx, %r9
+ rorq $23, %rax
+ movq %r9, %rbx
+ movq %r14, %rcx
+ addq 248(%rsp), %r8
+ xorq %r15, %rcx
+ xorq %r13, %rax
+ andq %r13, %rcx
+ rorq $4, %rax
+ xorq %r15, %rcx
+ xorq %r13, %rax
+ addq %rcx, %r8
+ rorq $14, %rax
+ xorq %r10, %rbx
+ addq %rax, %r8
+ movq %r9, %rcx
+ andq %rbx, %rdx
+ rorq $5, %rcx
+ xorq %r9, %rcx
+ xorq %r10, %rdx
+ rorq $6, %rcx
+ addq %r8, %r12
+ xorq %r9, %rcx
+ addq %rdx, %r8
+ rorq $28, %rcx
+ movq %r12, %rax
+ addq %rcx, %r8
+ addq $0x100, %rsp
+ subq $0x01, %rsi
+ jnz L_sha512_len_avx2_tail
+ addq (%rdi), %r8
+ addq 8(%rdi), %r9
+ addq 16(%rdi), %r10
+ addq 24(%rdi), %r11
+ addq 32(%rdi), %r12
+ addq 40(%rdi), %r13
+ addq 48(%rdi), %r14
+ addq 56(%rdi), %r15
+ movq 224(%rdi), %rcx
+ addq $0x40, %rsp
+ addq $0x100, %rcx
+ subl $0x100, %ebp
+ movq %rcx, 224(%rdi)
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq %r12, 32(%rdi)
+ movq %r13, 40(%rdi)
+ movq %r14, 48(%rdi)
+ movq %r15, 56(%rdi)
+ jnz L_sha512_len_avx2_begin
+L_sha512_len_avx2_done:
+ xorq %rax, %rax
+ vzeroupper
+ popq %rbp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size Transform_Sha512_AVX2_Len,.-Transform_Sha512_AVX2_Len
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx2_rorx_sha512_k:
+.quad 0x428a2f98d728ae22,0x7137449123ef65cd
+.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad 0x3956c25bf348b538,0x59f111f1b605d019
+.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad 0xd807aa98a3030242,0x12835b0145706fbe
+.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad 0x9bdc06a725c71235,0xc19bf174cf692694
+.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad 0xfc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad 0x983e5152ee66dfab,0xa831c66d2db43210
+.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad 0x6ca6351e003826f,0x142929670a0e6e70
+.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad 0x81c2c92e47edaee6,0x92722c851482353b
+.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad 0xd192e819d6ef5218,0xd69906245565a910
+.quad 0xf40e35855771202a,0x106aa07032bbd1b8
+.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad 0x90befffa23631e28,0xa4506cebde82bde9
+.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad 0xca273eceea26619c,0xd186b8c721c0c207
+.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad 0x6f067aa72176fba,0xa637dc5a2c898a6
+.quad 0x113f9804bef90dae,0x1b710b35131c471b
+.quad 0x28db77f523047d84,0x32caab7b40c72493
+.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 16
+#else
+.p2align 4
+#endif /* __APPLE__ */
+L_avx2_rorx_sha512_k_2:
+.quad 0x428a2f98d728ae22,0x7137449123ef65cd
+.quad 0x428a2f98d728ae22,0x7137449123ef65cd
+.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad 0x3956c25bf348b538,0x59f111f1b605d019
+.quad 0x3956c25bf348b538,0x59f111f1b605d019
+.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad 0xd807aa98a3030242,0x12835b0145706fbe
+.quad 0xd807aa98a3030242,0x12835b0145706fbe
+.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad 0x9bdc06a725c71235,0xc19bf174cf692694
+.quad 0x9bdc06a725c71235,0xc19bf174cf692694
+.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad 0xfc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad 0xfc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad 0x983e5152ee66dfab,0xa831c66d2db43210
+.quad 0x983e5152ee66dfab,0xa831c66d2db43210
+.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad 0x6ca6351e003826f,0x142929670a0e6e70
+.quad 0x6ca6351e003826f,0x142929670a0e6e70
+.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad 0x81c2c92e47edaee6,0x92722c851482353b
+.quad 0x81c2c92e47edaee6,0x92722c851482353b
+.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad 0xd192e819d6ef5218,0xd69906245565a910
+.quad 0xd192e819d6ef5218,0xd69906245565a910
+.quad 0xf40e35855771202a,0x106aa07032bbd1b8
+.quad 0xf40e35855771202a,0x106aa07032bbd1b8
+.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad 0x90befffa23631e28,0xa4506cebde82bde9
+.quad 0x90befffa23631e28,0xa4506cebde82bde9
+.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad 0xca273eceea26619c,0xd186b8c721c0c207
+.quad 0xca273eceea26619c,0xd186b8c721c0c207
+.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad 0x6f067aa72176fba,0xa637dc5a2c898a6
+.quad 0x6f067aa72176fba,0xa637dc5a2c898a6
+.quad 0x113f9804bef90dae,0x1b710b35131c471b
+.quad 0x113f9804bef90dae,0x1b710b35131c471b
+.quad 0x28db77f523047d84,0x32caab7b40c72493
+.quad 0x28db77f523047d84,0x32caab7b40c72493
+.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 8
+#else
+.p2align 3
+#endif /* __APPLE__ */
+L_avx2_rorx_sha512_k_2_end:
+.quad 1024+L_avx2_rorx_sha512_k_2
+#ifndef __APPLE__
+.data
+#else
+.section __DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align 32
+#else
+.p2align 5
+#endif /* __APPLE__ */
+L_avx2_rorx_sha512_flip_mask:
+.quad 0x1020304050607, 0x8090a0b0c0d0e0f
+.quad 0x1020304050607, 0x8090a0b0c0d0e0f
+#ifndef __APPLE__
+.text
+.globl Transform_Sha512_AVX2_RORX
+.type Transform_Sha512_AVX2_RORX,@function
+.align 4
+Transform_Sha512_AVX2_RORX:
+#else
+.section __TEXT,__text
+.globl _Transform_Sha512_AVX2_RORX
+.p2align 2
+_Transform_Sha512_AVX2_RORX:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $0x88, %rsp
+ leaq 64(%rdi), %rcx
+ vmovdqa L_avx2_rorx_sha512_flip_mask(%rip), %ymm15
+ movq (%rdi), %r8
+ movq 8(%rdi), %r9
+ movq 16(%rdi), %r10
+ movq 24(%rdi), %r11
+ movq 32(%rdi), %r12
+ movq 40(%rdi), %r13
+ movq 48(%rdi), %r14
+ movq 56(%rdi), %r15
+ vmovdqu (%rcx), %ymm0
+ vmovdqu 32(%rcx), %ymm1
+ vpshufb %ymm15, %ymm0, %ymm0
+ vpshufb %ymm15, %ymm1, %ymm1
+ vmovdqu 64(%rcx), %ymm2
+ vmovdqu 96(%rcx), %ymm3
+ vpshufb %ymm15, %ymm2, %ymm2
+ vpshufb %ymm15, %ymm3, %ymm3
+ movl $4, 128(%rsp)
+ leaq L_avx2_rorx_sha512_k(%rip), %rsi
+ movq %r9, %rbx
+ xorq %rdx, %rdx
+ xorq %r10, %rbx
+ # set_w_k: 0
+ vpaddq (%rsi), %ymm0, %ymm8
+ vpaddq 32(%rsi), %ymm1, %ymm9
+ vmovdqu %ymm8, (%rsp)
+ vmovdqu %ymm9, 32(%rsp)
+ vpaddq 64(%rsi), %ymm2, %ymm8
+ vpaddq 96(%rsi), %ymm3, %ymm9
+ vmovdqu %ymm8, 64(%rsp)
+ vmovdqu %ymm9, 96(%rsp)
+ # Start of 16 rounds
+L_sha256_len_avx2_rorx_start:
+ addq $0x80, %rsi
+ rorxq $14, %r12, %rax
+ rorxq $18, %r12, %rcx
+ addq %rdx, %r8
+ vpblendd $3, %ymm1, %ymm0, %ymm12
+ vpblendd $3, %ymm3, %ymm2, %ymm13
+ addq (%rsp), %r15
+ movq %r13, %rdx
+ xorq %rax, %rcx
+ vpermq $57, %ymm12, %ymm12
+ xorq %r14, %rdx
+ rorxq $41, %r12, %rax
+ xorq %rcx, %rax
+ vpermq $57, %ymm13, %ymm13
+ andq %r12, %rdx
+ addq %rax, %r15
+ rorxq $28, %r8, %rax
+ vpsrlq $0x01, %ymm12, %ymm8
+ vpsllq $63, %ymm12, %ymm9
+ rorxq $34, %r8, %rcx
+ xorq %r14, %rdx
+ xorq %rax, %rcx
+ vpsrlq $8, %ymm12, %ymm10
+ vpsllq $56, %ymm12, %ymm11
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ rorxq $39, %r8, %rax
+ addq %rdx, %r15
+ xorq %rcx, %rax
+ vpsrlq $7, %ymm12, %ymm11
+ movq %r9, %rdx
+ addq %r15, %r11
+ xorq %r8, %rdx
+ vperm2I128 $0x81, %ymm3, %ymm3, %ymm14
+ andq %rdx, %rbx
+ addq %rax, %r15
+ xorq %r9, %rbx
+ rorxq $14, %r11, %rax
+ rorxq $18, %r11, %rcx
+ addq %rbx, %r15
+ vpxor %ymm10, %ymm8, %ymm8
+ addq 8(%rsp), %r14
+ movq %r12, %rbx
+ xorq %rax, %rcx
+ vpxor %ymm11, %ymm8, %ymm8
+ xorq %r13, %rbx
+ rorxq $41, %r11, %rax
+ xorq %rcx, %rax
+ vpaddq %ymm0, %ymm13, %ymm0
+ vpaddq %ymm0, %ymm8, %ymm0
+ andq %r11, %rbx
+ addq %rax, %r14
+ rorxq $28, %r15, %rax
+ vpsrlq $19, %ymm14, %ymm8
+ vpsllq $45, %ymm14, %ymm9
+ rorxq $34, %r15, %rcx
+ xorq %r13, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %ymm14, %ymm10
+ vpsllq $3, %ymm14, %ymm11
+ vpor %ymm9, %ymm8, %ymm8
+ rorxq $39, %r15, %rax
+ addq %rbx, %r14
+ xorq %rcx, %rax
+ vpor %ymm11, %ymm10, %ymm10
+ movq %r8, %rbx
+ leaq (%r10,%r14,1), %r10
+ xorq %r15, %rbx
+ vpxor %ymm10, %ymm8, %ymm8
+ andq %rbx, %rdx
+ addq %rax, %r14
+ xorq %r8, %rdx
+ vpsrlq $6, %ymm14, %ymm11
+ rorxq $14, %r10, %rax
+ rorxq $18, %r10, %rcx
+ addq %rdx, %r14
+ vpxor %ymm11, %ymm8, %ymm8
+ addq 16(%rsp), %r13
+ movq %r11, %rdx
+ xorq %rax, %rcx
+ vpaddq %ymm0, %ymm8, %ymm0
+ xorq %r12, %rdx
+ rorxq $41, %r10, %rax
+ xorq %rcx, %rax
+ vperm2I128 $8, %ymm0, %ymm0, %ymm14
+ andq %r10, %rdx
+ addq %rax, %r13
+ rorxq $28, %r14, %rax
+ rorxq $34, %r14, %rcx
+ xorq %r12, %rdx
+ xorq %rax, %rcx
+ vpsrlq $19, %ymm14, %ymm8
+ vpsllq $45, %ymm14, %ymm9
+ rorxq $39, %r14, %rax
+ addq %rdx, %r13
+ xorq %rcx, %rax
+ vpsrlq $61, %ymm14, %ymm10
+ vpsllq $3, %ymm14, %ymm11
+ vpor %ymm9, %ymm8, %ymm8
+ movq %r15, %rdx
+ addq %r13, %r9
+ xorq %r14, %rdx
+ vpor %ymm11, %ymm10, %ymm10
+ andq %rdx, %rbx
+ addq %rax, %r13
+ xorq %r15, %rbx
+ vpxor %ymm10, %ymm8, %ymm8
+ rorxq $14, %r9, %rax
+ rorxq $18, %r9, %rcx
+ addq %rbx, %r13
+ vpsrlq $6, %ymm14, %ymm11
+ addq 24(%rsp), %r12
+ movq %r10, %rbx
+ xorq %rax, %rcx
+ xorq %r11, %rbx
+ rorxq $41, %r9, %rax
+ xorq %rcx, %rax
+ vpxor %ymm11, %ymm8, %ymm8
+ andq %r9, %rbx
+ addq %rax, %r12
+ rorxq $28, %r13, %rax
+ rorxq $34, %r13, %rcx
+ xorq %r11, %rbx
+ xorq %rax, %rcx
+ vpaddq %ymm0, %ymm8, %ymm0
+ rorxq $39, %r13, %rax
+ addq %rbx, %r12
+ xorq %rcx, %rax
+ vpaddq (%rsi), %ymm0, %ymm8
+ movq %r14, %rbx
+ leaq (%r8,%r12,1), %r8
+ xorq %r13, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r12
+ xorq %r14, %rdx
+ vmovdqu %ymm8, (%rsp)
+ rorxq $14, %r8, %rax
+ rorxq $18, %r8, %rcx
+ addq %rdx, %r12
+ vpblendd $3, %ymm2, %ymm1, %ymm12
+ vpblendd $3, %ymm0, %ymm3, %ymm13
+ addq 32(%rsp), %r11
+ movq %r9, %rdx
+ xorq %rax, %rcx
+ vpermq $57, %ymm12, %ymm12
+ xorq %r10, %rdx
+ rorxq $41, %r8, %rax
+ xorq %rcx, %rax
+ vpermq $57, %ymm13, %ymm13
+ andq %r8, %rdx
+ addq %rax, %r11
+ rorxq $28, %r12, %rax
+ vpsrlq $0x01, %ymm12, %ymm8
+ vpsllq $63, %ymm12, %ymm9
+ rorxq $34, %r12, %rcx
+ xorq %r10, %rdx
+ xorq %rax, %rcx
+ vpsrlq $8, %ymm12, %ymm10
+ vpsllq $56, %ymm12, %ymm11
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ rorxq $39, %r12, %rax
+ addq %rdx, %r11
+ xorq %rcx, %rax
+ vpsrlq $7, %ymm12, %ymm11
+ movq %r13, %rdx
+ addq %r11, %r15
+ xorq %r12, %rdx
+ vperm2I128 $0x81, %ymm0, %ymm0, %ymm14
+ andq %rdx, %rbx
+ addq %rax, %r11
+ xorq %r13, %rbx
+ rorxq $14, %r15, %rax
+ rorxq $18, %r15, %rcx
+ addq %rbx, %r11
+ vpxor %ymm10, %ymm8, %ymm8
+ addq 40(%rsp), %r10
+ movq %r8, %rbx
+ xorq %rax, %rcx
+ vpxor %ymm11, %ymm8, %ymm8
+ xorq %r9, %rbx
+ rorxq $41, %r15, %rax
+ xorq %rcx, %rax
+ vpaddq %ymm1, %ymm13, %ymm1
+ vpaddq %ymm1, %ymm8, %ymm1
+ andq %r15, %rbx
+ addq %rax, %r10
+ rorxq $28, %r11, %rax
+ vpsrlq $19, %ymm14, %ymm8
+ vpsllq $45, %ymm14, %ymm9
+ rorxq $34, %r11, %rcx
+ xorq %r9, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %ymm14, %ymm10
+ vpsllq $3, %ymm14, %ymm11
+ vpor %ymm9, %ymm8, %ymm8
+ rorxq $39, %r11, %rax
+ addq %rbx, %r10
+ xorq %rcx, %rax
+ vpor %ymm11, %ymm10, %ymm10
+ movq %r12, %rbx
+ leaq (%r14,%r10,1), %r14
+ xorq %r11, %rbx
+ vpxor %ymm10, %ymm8, %ymm8
+ andq %rbx, %rdx
+ addq %rax, %r10
+ xorq %r12, %rdx
+ vpsrlq $6, %ymm14, %ymm11
+ rorxq $14, %r14, %rax
+ rorxq $18, %r14, %rcx
+ addq %rdx, %r10
+ vpxor %ymm11, %ymm8, %ymm8
+ addq 48(%rsp), %r9
+ movq %r15, %rdx
+ xorq %rax, %rcx
+ vpaddq %ymm1, %ymm8, %ymm1
+ xorq %r8, %rdx
+ rorxq $41, %r14, %rax
+ xorq %rcx, %rax
+ vperm2I128 $8, %ymm1, %ymm1, %ymm14
+ andq %r14, %rdx
+ addq %rax, %r9
+ rorxq $28, %r10, %rax
+ rorxq $34, %r10, %rcx
+ xorq %r8, %rdx
+ xorq %rax, %rcx
+ vpsrlq $19, %ymm14, %ymm8
+ vpsllq $45, %ymm14, %ymm9
+ rorxq $39, %r10, %rax
+ addq %rdx, %r9
+ xorq %rcx, %rax
+ vpsrlq $61, %ymm14, %ymm10
+ vpsllq $3, %ymm14, %ymm11
+ vpor %ymm9, %ymm8, %ymm8
+ movq %r11, %rdx
+ addq %r9, %r13
+ xorq %r10, %rdx
+ vpor %ymm11, %ymm10, %ymm10
+ andq %rdx, %rbx
+ addq %rax, %r9
+ xorq %r11, %rbx
+ vpxor %ymm10, %ymm8, %ymm8
+ rorxq $14, %r13, %rax
+ rorxq $18, %r13, %rcx
+ addq %rbx, %r9
+ vpsrlq $6, %ymm14, %ymm11
+ addq 56(%rsp), %r8
+ movq %r14, %rbx
+ xorq %rax, %rcx
+ xorq %r15, %rbx
+ rorxq $41, %r13, %rax
+ xorq %rcx, %rax
+ vpxor %ymm11, %ymm8, %ymm8
+ andq %r13, %rbx
+ addq %rax, %r8
+ rorxq $28, %r9, %rax
+ rorxq $34, %r9, %rcx
+ xorq %r15, %rbx
+ xorq %rax, %rcx
+ vpaddq %ymm1, %ymm8, %ymm1
+ rorxq $39, %r9, %rax
+ addq %rbx, %r8
+ xorq %rcx, %rax
+ vpaddq 32(%rsi), %ymm1, %ymm8
+ movq %r10, %rbx
+ leaq (%r12,%r8,1), %r12
+ xorq %r9, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r8
+ xorq %r10, %rdx
+ vmovdqu %ymm8, 32(%rsp)
+ rorxq $14, %r12, %rax
+ rorxq $18, %r12, %rcx
+ addq %rdx, %r8
+ vpblendd $3, %ymm3, %ymm2, %ymm12
+ vpblendd $3, %ymm1, %ymm0, %ymm13
+ addq 64(%rsp), %r15
+ movq %r13, %rdx
+ xorq %rax, %rcx
+ vpermq $57, %ymm12, %ymm12
+ xorq %r14, %rdx
+ rorxq $41, %r12, %rax
+ xorq %rcx, %rax
+ vpermq $57, %ymm13, %ymm13
+ andq %r12, %rdx
+ addq %rax, %r15
+ rorxq $28, %r8, %rax
+ vpsrlq $0x01, %ymm12, %ymm8
+ vpsllq $63, %ymm12, %ymm9
+ rorxq $34, %r8, %rcx
+ xorq %r14, %rdx
+ xorq %rax, %rcx
+ vpsrlq $8, %ymm12, %ymm10
+ vpsllq $56, %ymm12, %ymm11
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ rorxq $39, %r8, %rax
+ addq %rdx, %r15
+ xorq %rcx, %rax
+ vpsrlq $7, %ymm12, %ymm11
+ movq %r9, %rdx
+ addq %r15, %r11
+ xorq %r8, %rdx
+ vperm2I128 $0x81, %ymm1, %ymm1, %ymm14
+ andq %rdx, %rbx
+ addq %rax, %r15
+ xorq %r9, %rbx
+ rorxq $14, %r11, %rax
+ rorxq $18, %r11, %rcx
+ addq %rbx, %r15
+ vpxor %ymm10, %ymm8, %ymm8
+ addq 72(%rsp), %r14
+ movq %r12, %rbx
+ xorq %rax, %rcx
+ vpxor %ymm11, %ymm8, %ymm8
+ xorq %r13, %rbx
+ rorxq $41, %r11, %rax
+ xorq %rcx, %rax
+ vpaddq %ymm2, %ymm13, %ymm2
+ vpaddq %ymm2, %ymm8, %ymm2
+ andq %r11, %rbx
+ addq %rax, %r14
+ rorxq $28, %r15, %rax
+ vpsrlq $19, %ymm14, %ymm8
+ vpsllq $45, %ymm14, %ymm9
+ rorxq $34, %r15, %rcx
+ xorq %r13, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %ymm14, %ymm10
+ vpsllq $3, %ymm14, %ymm11
+ vpor %ymm9, %ymm8, %ymm8
+ rorxq $39, %r15, %rax
+ addq %rbx, %r14
+ xorq %rcx, %rax
+ vpor %ymm11, %ymm10, %ymm10
+ movq %r8, %rbx
+ leaq (%r10,%r14,1), %r10
+ xorq %r15, %rbx
+ vpxor %ymm10, %ymm8, %ymm8
+ andq %rbx, %rdx
+ addq %rax, %r14
+ xorq %r8, %rdx
+ vpsrlq $6, %ymm14, %ymm11
+ rorxq $14, %r10, %rax
+ rorxq $18, %r10, %rcx
+ addq %rdx, %r14
+ vpxor %ymm11, %ymm8, %ymm8
+ addq 80(%rsp), %r13
+ movq %r11, %rdx
+ xorq %rax, %rcx
+ vpaddq %ymm2, %ymm8, %ymm2
+ xorq %r12, %rdx
+ rorxq $41, %r10, %rax
+ xorq %rcx, %rax
+ vperm2I128 $8, %ymm2, %ymm2, %ymm14
+ andq %r10, %rdx
+ addq %rax, %r13
+ rorxq $28, %r14, %rax
+ rorxq $34, %r14, %rcx
+ xorq %r12, %rdx
+ xorq %rax, %rcx
+ vpsrlq $19, %ymm14, %ymm8
+ vpsllq $45, %ymm14, %ymm9
+ rorxq $39, %r14, %rax
+ addq %rdx, %r13
+ xorq %rcx, %rax
+ vpsrlq $61, %ymm14, %ymm10
+ vpsllq $3, %ymm14, %ymm11
+ vpor %ymm9, %ymm8, %ymm8
+ movq %r15, %rdx
+ addq %r13, %r9
+ xorq %r14, %rdx
+ vpor %ymm11, %ymm10, %ymm10
+ andq %rdx, %rbx
+ addq %rax, %r13
+ xorq %r15, %rbx
+ vpxor %ymm10, %ymm8, %ymm8
+ rorxq $14, %r9, %rax
+ rorxq $18, %r9, %rcx
+ addq %rbx, %r13
+ vpsrlq $6, %ymm14, %ymm11
+ addq 88(%rsp), %r12
+ movq %r10, %rbx
+ xorq %rax, %rcx
+ xorq %r11, %rbx
+ rorxq $41, %r9, %rax
+ xorq %rcx, %rax
+ vpxor %ymm11, %ymm8, %ymm8
+ andq %r9, %rbx
+ addq %rax, %r12
+ rorxq $28, %r13, %rax
+ rorxq $34, %r13, %rcx
+ xorq %r11, %rbx
+ xorq %rax, %rcx
+ vpaddq %ymm2, %ymm8, %ymm2
+ rorxq $39, %r13, %rax
+ addq %rbx, %r12
+ xorq %rcx, %rax
+ vpaddq 64(%rsi), %ymm2, %ymm8
+ movq %r14, %rbx
+ leaq (%r8,%r12,1), %r8
+ xorq %r13, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r12
+ xorq %r14, %rdx
+ vmovdqu %ymm8, 64(%rsp)
+ rorxq $14, %r8, %rax
+ rorxq $18, %r8, %rcx
+ addq %rdx, %r12
+ vpblendd $3, %ymm0, %ymm3, %ymm12
+ vpblendd $3, %ymm2, %ymm1, %ymm13
+ addq 96(%rsp), %r11
+ movq %r9, %rdx
+ xorq %rax, %rcx
+ vpermq $57, %ymm12, %ymm12
+ xorq %r10, %rdx
+ rorxq $41, %r8, %rax
+ xorq %rcx, %rax
+ vpermq $57, %ymm13, %ymm13
+ andq %r8, %rdx
+ addq %rax, %r11
+ rorxq $28, %r12, %rax
+ vpsrlq $0x01, %ymm12, %ymm8
+ vpsllq $63, %ymm12, %ymm9
+ rorxq $34, %r12, %rcx
+ xorq %r10, %rdx
+ xorq %rax, %rcx
+ vpsrlq $8, %ymm12, %ymm10
+ vpsllq $56, %ymm12, %ymm11
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ rorxq $39, %r12, %rax
+ addq %rdx, %r11
+ xorq %rcx, %rax
+ vpsrlq $7, %ymm12, %ymm11
+ movq %r13, %rdx
+ addq %r11, %r15
+ xorq %r12, %rdx
+ vperm2I128 $0x81, %ymm2, %ymm2, %ymm14
+ andq %rdx, %rbx
+ addq %rax, %r11
+ xorq %r13, %rbx
+ rorxq $14, %r15, %rax
+ rorxq $18, %r15, %rcx
+ addq %rbx, %r11
+ vpxor %ymm10, %ymm8, %ymm8
+ addq 104(%rsp), %r10
+ movq %r8, %rbx
+ xorq %rax, %rcx
+ vpxor %ymm11, %ymm8, %ymm8
+ xorq %r9, %rbx
+ rorxq $41, %r15, %rax
+ xorq %rcx, %rax
+ vpaddq %ymm3, %ymm13, %ymm3
+ vpaddq %ymm3, %ymm8, %ymm3
+ andq %r15, %rbx
+ addq %rax, %r10
+ rorxq $28, %r11, %rax
+ vpsrlq $19, %ymm14, %ymm8
+ vpsllq $45, %ymm14, %ymm9
+ rorxq $34, %r11, %rcx
+ xorq %r9, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %ymm14, %ymm10
+ vpsllq $3, %ymm14, %ymm11
+ vpor %ymm9, %ymm8, %ymm8
+ rorxq $39, %r11, %rax
+ addq %rbx, %r10
+ xorq %rcx, %rax
+ vpor %ymm11, %ymm10, %ymm10
+ movq %r12, %rbx
+ leaq (%r14,%r10,1), %r14
+ xorq %r11, %rbx
+ vpxor %ymm10, %ymm8, %ymm8
+ andq %rbx, %rdx
+ addq %rax, %r10
+ xorq %r12, %rdx
+ vpsrlq $6, %ymm14, %ymm11
+ rorxq $14, %r14, %rax
+ rorxq $18, %r14, %rcx
+ addq %rdx, %r10
+ vpxor %ymm11, %ymm8, %ymm8
+ addq 112(%rsp), %r9
+ movq %r15, %rdx
+ xorq %rax, %rcx
+ vpaddq %ymm3, %ymm8, %ymm3
+ xorq %r8, %rdx
+ rorxq $41, %r14, %rax
+ xorq %rcx, %rax
+ vperm2I128 $8, %ymm3, %ymm3, %ymm14
+ andq %r14, %rdx
+ addq %rax, %r9
+ rorxq $28, %r10, %rax
+ rorxq $34, %r10, %rcx
+ xorq %r8, %rdx
+ xorq %rax, %rcx
+ vpsrlq $19, %ymm14, %ymm8
+ vpsllq $45, %ymm14, %ymm9
+ rorxq $39, %r10, %rax
+ addq %rdx, %r9
+ xorq %rcx, %rax
+ vpsrlq $61, %ymm14, %ymm10
+ vpsllq $3, %ymm14, %ymm11
+ vpor %ymm9, %ymm8, %ymm8
+ movq %r11, %rdx
+ addq %r9, %r13
+ xorq %r10, %rdx
+ vpor %ymm11, %ymm10, %ymm10
+ andq %rdx, %rbx
+ addq %rax, %r9
+ xorq %r11, %rbx
+ vpxor %ymm10, %ymm8, %ymm8
+ rorxq $14, %r13, %rax
+ rorxq $18, %r13, %rcx
+ addq %rbx, %r9
+ vpsrlq $6, %ymm14, %ymm11
+ addq 120(%rsp), %r8
+ movq %r14, %rbx
+ xorq %rax, %rcx
+ xorq %r15, %rbx
+ rorxq $41, %r13, %rax
+ xorq %rcx, %rax
+ vpxor %ymm11, %ymm8, %ymm8
+ andq %r13, %rbx
+ addq %rax, %r8
+ rorxq $28, %r9, %rax
+ rorxq $34, %r9, %rcx
+ xorq %r15, %rbx
+ xorq %rax, %rcx
+ vpaddq %ymm3, %ymm8, %ymm3
+ rorxq $39, %r9, %rax
+ addq %rbx, %r8
+ xorq %rcx, %rax
+ vpaddq 96(%rsi), %ymm3, %ymm8
+ movq %r10, %rbx
+ leaq (%r12,%r8,1), %r12
+ xorq %r9, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r8
+ xorq %r10, %rdx
+ vmovdqu %ymm8, 96(%rsp)
+ subl $0x01, 128(%rsp)
+ jne L_sha256_len_avx2_rorx_start
+ # rnd_all_4: 0-3
+ rorxq $14, %r12, %rax
+ rorxq $18, %r12, %rcx
+ addq %rdx, %r8
+ addq (%rsp), %r15
+ movq %r13, %rdx
+ xorq %rax, %rcx
+ xorq %r14, %rdx
+ rorxq $41, %r12, %rax
+ xorq %rcx, %rax
+ andq %r12, %rdx
+ addq %rax, %r15
+ rorxq $28, %r8, %rax
+ rorxq $34, %r8, %rcx
+ xorq %r14, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r8, %rax
+ addq %rdx, %r15
+ xorq %rcx, %rax
+ movq %r9, %rdx
+ addq %r15, %r11
+ xorq %r8, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r15
+ xorq %r9, %rbx
+ rorxq $14, %r11, %rax
+ rorxq $18, %r11, %rcx
+ addq %rbx, %r15
+ addq 8(%rsp), %r14
+ movq %r12, %rbx
+ xorq %rax, %rcx
+ xorq %r13, %rbx
+ rorxq $41, %r11, %rax
+ xorq %rcx, %rax
+ andq %r11, %rbx
+ addq %rax, %r14
+ rorxq $28, %r15, %rax
+ rorxq $34, %r15, %rcx
+ xorq %r13, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r15, %rax
+ addq %rbx, %r14
+ xorq %rcx, %rax
+ movq %r8, %rbx
+ leaq (%r10,%r14,1), %r10
+ xorq %r15, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r14
+ xorq %r8, %rdx
+ rorxq $14, %r10, %rax
+ rorxq $18, %r10, %rcx
+ addq %rdx, %r14
+ addq 16(%rsp), %r13
+ movq %r11, %rdx
+ xorq %rax, %rcx
+ xorq %r12, %rdx
+ rorxq $41, %r10, %rax
+ xorq %rcx, %rax
+ andq %r10, %rdx
+ addq %rax, %r13
+ rorxq $28, %r14, %rax
+ rorxq $34, %r14, %rcx
+ xorq %r12, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r14, %rax
+ addq %rdx, %r13
+ xorq %rcx, %rax
+ movq %r15, %rdx
+ addq %r13, %r9
+ xorq %r14, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r13
+ xorq %r15, %rbx
+ rorxq $14, %r9, %rax
+ rorxq $18, %r9, %rcx
+ addq %rbx, %r13
+ addq 24(%rsp), %r12
+ movq %r10, %rbx
+ xorq %rax, %rcx
+ xorq %r11, %rbx
+ rorxq $41, %r9, %rax
+ xorq %rcx, %rax
+ andq %r9, %rbx
+ addq %rax, %r12
+ rorxq $28, %r13, %rax
+ rorxq $34, %r13, %rcx
+ xorq %r11, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r13, %rax
+ addq %rbx, %r12
+ xorq %rcx, %rax
+ movq %r14, %rbx
+ leaq (%r8,%r12,1), %r8
+ xorq %r13, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r12
+ xorq %r14, %rdx
+ # rnd_all_4: 4-7
+ rorxq $14, %r8, %rax
+ rorxq $18, %r8, %rcx
+ addq %rdx, %r12
+ addq 32(%rsp), %r11
+ movq %r9, %rdx
+ xorq %rax, %rcx
+ xorq %r10, %rdx
+ rorxq $41, %r8, %rax
+ xorq %rcx, %rax
+ andq %r8, %rdx
+ addq %rax, %r11
+ rorxq $28, %r12, %rax
+ rorxq $34, %r12, %rcx
+ xorq %r10, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r12, %rax
+ addq %rdx, %r11
+ xorq %rcx, %rax
+ movq %r13, %rdx
+ addq %r11, %r15
+ xorq %r12, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r11
+ xorq %r13, %rbx
+ rorxq $14, %r15, %rax
+ rorxq $18, %r15, %rcx
+ addq %rbx, %r11
+ addq 40(%rsp), %r10
+ movq %r8, %rbx
+ xorq %rax, %rcx
+ xorq %r9, %rbx
+ rorxq $41, %r15, %rax
+ xorq %rcx, %rax
+ andq %r15, %rbx
+ addq %rax, %r10
+ rorxq $28, %r11, %rax
+ rorxq $34, %r11, %rcx
+ xorq %r9, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r11, %rax
+ addq %rbx, %r10
+ xorq %rcx, %rax
+ movq %r12, %rbx
+ leaq (%r14,%r10,1), %r14
+ xorq %r11, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r10
+ xorq %r12, %rdx
+ rorxq $14, %r14, %rax
+ rorxq $18, %r14, %rcx
+ addq %rdx, %r10
+ addq 48(%rsp), %r9
+ movq %r15, %rdx
+ xorq %rax, %rcx
+ xorq %r8, %rdx
+ rorxq $41, %r14, %rax
+ xorq %rcx, %rax
+ andq %r14, %rdx
+ addq %rax, %r9
+ rorxq $28, %r10, %rax
+ rorxq $34, %r10, %rcx
+ xorq %r8, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r10, %rax
+ addq %rdx, %r9
+ xorq %rcx, %rax
+ movq %r11, %rdx
+ addq %r9, %r13
+ xorq %r10, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r9
+ xorq %r11, %rbx
+ rorxq $14, %r13, %rax
+ rorxq $18, %r13, %rcx
+ addq %rbx, %r9
+ addq 56(%rsp), %r8
+ movq %r14, %rbx
+ xorq %rax, %rcx
+ xorq %r15, %rbx
+ rorxq $41, %r13, %rax
+ xorq %rcx, %rax
+ andq %r13, %rbx
+ addq %rax, %r8
+ rorxq $28, %r9, %rax
+ rorxq $34, %r9, %rcx
+ xorq %r15, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r9, %rax
+ addq %rbx, %r8
+ xorq %rcx, %rax
+ movq %r10, %rbx
+ leaq (%r12,%r8,1), %r12
+ xorq %r9, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r8
+ xorq %r10, %rdx
+ # rnd_all_4: 8-11
+ rorxq $14, %r12, %rax
+ rorxq $18, %r12, %rcx
+ addq %rdx, %r8
+ addq 64(%rsp), %r15
+ movq %r13, %rdx
+ xorq %rax, %rcx
+ xorq %r14, %rdx
+ rorxq $41, %r12, %rax
+ xorq %rcx, %rax
+ andq %r12, %rdx
+ addq %rax, %r15
+ rorxq $28, %r8, %rax
+ rorxq $34, %r8, %rcx
+ xorq %r14, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r8, %rax
+ addq %rdx, %r15
+ xorq %rcx, %rax
+ movq %r9, %rdx
+ addq %r15, %r11
+ xorq %r8, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r15
+ xorq %r9, %rbx
+ rorxq $14, %r11, %rax
+ rorxq $18, %r11, %rcx
+ addq %rbx, %r15
+ addq 72(%rsp), %r14
+ movq %r12, %rbx
+ xorq %rax, %rcx
+ xorq %r13, %rbx
+ rorxq $41, %r11, %rax
+ xorq %rcx, %rax
+ andq %r11, %rbx
+ addq %rax, %r14
+ rorxq $28, %r15, %rax
+ rorxq $34, %r15, %rcx
+ xorq %r13, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r15, %rax
+ addq %rbx, %r14
+ xorq %rcx, %rax
+ movq %r8, %rbx
+ leaq (%r10,%r14,1), %r10
+ xorq %r15, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r14
+ xorq %r8, %rdx
+ rorxq $14, %r10, %rax
+ rorxq $18, %r10, %rcx
+ addq %rdx, %r14
+ addq 80(%rsp), %r13
+ movq %r11, %rdx
+ xorq %rax, %rcx
+ xorq %r12, %rdx
+ rorxq $41, %r10, %rax
+ xorq %rcx, %rax
+ andq %r10, %rdx
+ addq %rax, %r13
+ rorxq $28, %r14, %rax
+ rorxq $34, %r14, %rcx
+ xorq %r12, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r14, %rax
+ addq %rdx, %r13
+ xorq %rcx, %rax
+ movq %r15, %rdx
+ addq %r13, %r9
+ xorq %r14, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r13
+ xorq %r15, %rbx
+ rorxq $14, %r9, %rax
+ rorxq $18, %r9, %rcx
+ addq %rbx, %r13
+ addq 88(%rsp), %r12
+ movq %r10, %rbx
+ xorq %rax, %rcx
+ xorq %r11, %rbx
+ rorxq $41, %r9, %rax
+ xorq %rcx, %rax
+ andq %r9, %rbx
+ addq %rax, %r12
+ rorxq $28, %r13, %rax
+ rorxq $34, %r13, %rcx
+ xorq %r11, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r13, %rax
+ addq %rbx, %r12
+ xorq %rcx, %rax
+ movq %r14, %rbx
+ leaq (%r8,%r12,1), %r8
+ xorq %r13, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r12
+ xorq %r14, %rdx
+ # rnd_all_4: 12-15
+ rorxq $14, %r8, %rax
+ rorxq $18, %r8, %rcx
+ addq %rdx, %r12
+ addq 96(%rsp), %r11
+ movq %r9, %rdx
+ xorq %rax, %rcx
+ xorq %r10, %rdx
+ rorxq $41, %r8, %rax
+ xorq %rcx, %rax
+ andq %r8, %rdx
+ addq %rax, %r11
+ rorxq $28, %r12, %rax
+ rorxq $34, %r12, %rcx
+ xorq %r10, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r12, %rax
+ addq %rdx, %r11
+ xorq %rcx, %rax
+ movq %r13, %rdx
+ addq %r11, %r15
+ xorq %r12, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r11
+ xorq %r13, %rbx
+ rorxq $14, %r15, %rax
+ rorxq $18, %r15, %rcx
+ addq %rbx, %r11
+ addq 104(%rsp), %r10
+ movq %r8, %rbx
+ xorq %rax, %rcx
+ xorq %r9, %rbx
+ rorxq $41, %r15, %rax
+ xorq %rcx, %rax
+ andq %r15, %rbx
+ addq %rax, %r10
+ rorxq $28, %r11, %rax
+ rorxq $34, %r11, %rcx
+ xorq %r9, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r11, %rax
+ addq %rbx, %r10
+ xorq %rcx, %rax
+ movq %r12, %rbx
+ leaq (%r14,%r10,1), %r14
+ xorq %r11, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r10
+ xorq %r12, %rdx
+ rorxq $14, %r14, %rax
+ rorxq $18, %r14, %rcx
+ addq %rdx, %r10
+ addq 112(%rsp), %r9
+ movq %r15, %rdx
+ xorq %rax, %rcx
+ xorq %r8, %rdx
+ rorxq $41, %r14, %rax
+ xorq %rcx, %rax
+ andq %r14, %rdx
+ addq %rax, %r9
+ rorxq $28, %r10, %rax
+ rorxq $34, %r10, %rcx
+ xorq %r8, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r10, %rax
+ addq %rdx, %r9
+ xorq %rcx, %rax
+ movq %r11, %rdx
+ addq %r9, %r13
+ xorq %r10, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r9
+ xorq %r11, %rbx
+ rorxq $14, %r13, %rax
+ rorxq $18, %r13, %rcx
+ addq %rbx, %r9
+ addq 120(%rsp), %r8
+ movq %r14, %rbx
+ xorq %rax, %rcx
+ xorq %r15, %rbx
+ rorxq $41, %r13, %rax
+ xorq %rcx, %rax
+ andq %r13, %rbx
+ addq %rax, %r8
+ rorxq $28, %r9, %rax
+ rorxq $34, %r9, %rcx
+ xorq %r15, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r9, %rax
+ addq %rbx, %r8
+ xorq %rcx, %rax
+ movq %r10, %rbx
+ leaq (%r12,%r8,1), %r12
+ xorq %r9, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r8
+ xorq %r10, %rdx
+ addq %rdx, %r8
+ addq %r8, (%rdi)
+ addq %r9, 8(%rdi)
+ addq %r10, 16(%rdi)
+ addq %r11, 24(%rdi)
+ addq %r12, 32(%rdi)
+ addq %r13, 40(%rdi)
+ addq %r14, 48(%rdi)
+ addq %r15, 56(%rdi)
+ xorq %rax, %rax
+ vzeroupper
+ addq $0x88, %rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size Transform_Sha512_AVX2_RORX,.-Transform_Sha512_AVX2_RORX
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
+.globl Transform_Sha512_AVX2_RORX_Len
+.type Transform_Sha512_AVX2_RORX_Len,@function
+.align 4
+Transform_Sha512_AVX2_RORX_Len:
+#else
+.section __TEXT,__text
+.globl _Transform_Sha512_AVX2_RORX_Len
+.p2align 2
+_Transform_Sha512_AVX2_RORX_Len:
+#endif /* __APPLE__ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rbp
+ testb $0x80, %sil
+ je L_sha512_len_avx2_rorx_block
+ movq 224(%rdi), %rax
+ push %rsi
+ vmovdqu (%rax), %ymm0
+ vmovdqu 32(%rax), %ymm1
+ vmovdqu 64(%rax), %ymm2
+ vmovdqu 96(%rax), %ymm3
+ vmovups %ymm0, 64(%rdi)
+ vmovups %ymm1, 96(%rdi)
+ vmovups %ymm2, 128(%rdi)
+ vmovups %ymm3, 160(%rdi)
+#ifndef __APPLE__
+ call Transform_Sha512_AVX2_RORX@plt
+#else
+ call _Transform_Sha512_AVX2_RORX
+#endif /* __APPLE__ */
+ pop %rsi
+ addq $0x80, 224(%rdi)
+ subl $0x80, %esi
+ jz L_sha512_len_avx2_rorx_done
+L_sha512_len_avx2_rorx_block:
+ movq 224(%rdi), %rax
+ vmovdqa L_avx2_rorx_sha512_flip_mask(%rip), %ymm15
+ movq (%rdi), %r8
+ movq 8(%rdi), %r9
+ movq 16(%rdi), %r10
+ movq 24(%rdi), %r11
+ movq 32(%rdi), %r12
+ movq 40(%rdi), %r13
+ movq 48(%rdi), %r14
+ movq 56(%rdi), %r15
+ # Start of loop processing two blocks
+L_sha512_len_avx2_rorx_begin:
+ subq $0x540, %rsp
+ leaq L_avx2_rorx_sha512_k_2(%rip), %rbp
+ movq %r9, %rbx
+ xorq %rdx, %rdx
+ vmovdqu (%rax), %xmm0
+ vmovdqu 16(%rax), %xmm1
+ vinserti128 $0x01, 128(%rax), %ymm0, %ymm0
+ vinserti128 $0x01, 144(%rax), %ymm1, %ymm1
+ vpshufb %ymm15, %ymm0, %ymm0
+ vpshufb %ymm15, %ymm1, %ymm1
+ vmovdqu 32(%rax), %xmm2
+ vmovdqu 48(%rax), %xmm3
+ vinserti128 $0x01, 160(%rax), %ymm2, %ymm2
+ vinserti128 $0x01, 176(%rax), %ymm3, %ymm3
+ vpshufb %ymm15, %ymm2, %ymm2
+ vpshufb %ymm15, %ymm3, %ymm3
+ vmovdqu 64(%rax), %xmm4
+ vmovdqu 80(%rax), %xmm5
+ vinserti128 $0x01, 192(%rax), %ymm4, %ymm4
+ vinserti128 $0x01, 208(%rax), %ymm5, %ymm5
+ vpshufb %ymm15, %ymm4, %ymm4
+ vpshufb %ymm15, %ymm5, %ymm5
+ vmovdqu 96(%rax), %xmm6
+ vmovdqu 112(%rax), %xmm7
+ vinserti128 $0x01, 224(%rax), %ymm6, %ymm6
+ vinserti128 $0x01, 240(%rax), %ymm7, %ymm7
+ vpshufb %ymm15, %ymm6, %ymm6
+ vpshufb %ymm15, %ymm7, %ymm7
+ xorq %r10, %rbx
+ # Start of 16 rounds
+L_sha512_len_avx2_rorx_start:
+ vpaddq (%rbp), %ymm0, %ymm8
+ vpaddq 32(%rbp), %ymm1, %ymm9
+ vmovdqu %ymm8, (%rsp)
+ vmovdqu %ymm9, 32(%rsp)
+ vpaddq 64(%rbp), %ymm2, %ymm8
+ vpaddq 96(%rbp), %ymm3, %ymm9
+ vmovdqu %ymm8, 64(%rsp)
+ vmovdqu %ymm9, 96(%rsp)
+ vpaddq 128(%rbp), %ymm4, %ymm8
+ vpaddq 160(%rbp), %ymm5, %ymm9
+ vmovdqu %ymm8, 128(%rsp)
+ vmovdqu %ymm9, 160(%rsp)
+ vpaddq 192(%rbp), %ymm6, %ymm8
+ vpaddq 224(%rbp), %ymm7, %ymm9
+ vmovdqu %ymm8, 192(%rsp)
+ vmovdqu %ymm9, 224(%rsp)
+ # msg_sched: 0-1
+ rorxq $14, %r12, %rax
+ rorxq $18, %r12, %rcx
+ addq %rdx, %r8
+ vpalignr $8, %ymm0, %ymm1, %ymm12
+ addq (%rsp), %r15
+ movq %r13, %rdx
+ xorq %rax, %rcx
+ vpalignr $8, %ymm4, %ymm5, %ymm13
+ xorq %r14, %rdx
+ rorxq $41, %r12, %rax
+ xorq %rcx, %rax
+ vpsrlq $0x01, %ymm12, %ymm8
+ vpsllq $63, %ymm12, %ymm9
+ andq %r12, %rdx
+ addq %rax, %r15
+ rorxq $28, %r8, %rax
+ vpsrlq $8, %ymm12, %ymm10
+ vpsllq $56, %ymm12, %ymm11
+ rorxq $34, %r8, %rcx
+ xorq %r14, %rdx
+ xorq %rax, %rcx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ rorxq $39, %r8, %rax
+ addq %rdx, %r15
+ xorq %rcx, %rax
+ vpsrlq $7, %ymm12, %ymm11
+ vpxor %ymm10, %ymm8, %ymm8
+ movq %r9, %rdx
+ addq %r15, %r11
+ xorq %r8, %rdx
+ vpxor %ymm11, %ymm8, %ymm8
+ vpaddq %ymm0, %ymm13, %ymm0
+ andq %rdx, %rbx
+ addq %rax, %r15
+ xorq %r9, %rbx
+ vpaddq %ymm0, %ymm8, %ymm0
+ rorxq $14, %r11, %rax
+ rorxq $18, %r11, %rcx
+ addq %rbx, %r15
+ vpsrlq $19, %ymm7, %ymm8
+ vpsllq $45, %ymm7, %ymm9
+ addq 8(%rsp), %r14
+ movq %r12, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %ymm7, %ymm10
+ vpsllq $3, %ymm7, %ymm11
+ xorq %r13, %rbx
+ rorxq $41, %r11, %rax
+ xorq %rcx, %rax
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ andq %r11, %rbx
+ addq %rax, %r14
+ rorxq $28, %r15, %rax
+ rorxq $34, %r15, %rcx
+ xorq %r13, %rbx
+ xorq %rax, %rcx
+ vpxor %ymm10, %ymm8, %ymm8
+ vpsrlq $6, %ymm7, %ymm11
+ rorxq $39, %r15, %rax
+ addq %rbx, %r14
+ xorq %rcx, %rax
+ movq %r8, %rbx
+ leaq (%r10,%r14,1), %r10
+ xorq %r15, %rbx
+ vpxor %ymm11, %ymm8, %ymm8
+ andq %rbx, %rdx
+ addq %rax, %r14
+ xorq %r8, %rdx
+ vpaddq %ymm0, %ymm8, %ymm0
+ # msg_sched done: 0-3
+ # msg_sched: 4-5
+ rorxq $14, %r10, %rax
+ rorxq $18, %r10, %rcx
+ addq %rdx, %r14
+ vpalignr $8, %ymm1, %ymm2, %ymm12
+ addq 32(%rsp), %r13
+ movq %r11, %rdx
+ xorq %rax, %rcx
+ vpalignr $8, %ymm5, %ymm6, %ymm13
+ xorq %r12, %rdx
+ rorxq $41, %r10, %rax
+ xorq %rcx, %rax
+ vpsrlq $0x01, %ymm12, %ymm8
+ vpsllq $63, %ymm12, %ymm9
+ andq %r10, %rdx
+ addq %rax, %r13
+ rorxq $28, %r14, %rax
+ vpsrlq $8, %ymm12, %ymm10
+ vpsllq $56, %ymm12, %ymm11
+ rorxq $34, %r14, %rcx
+ xorq %r12, %rdx
+ xorq %rax, %rcx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ rorxq $39, %r14, %rax
+ addq %rdx, %r13
+ xorq %rcx, %rax
+ vpsrlq $7, %ymm12, %ymm11
+ vpxor %ymm10, %ymm8, %ymm8
+ movq %r15, %rdx
+ addq %r13, %r9
+ xorq %r14, %rdx
+ vpxor %ymm11, %ymm8, %ymm8
+ vpaddq %ymm1, %ymm13, %ymm1
+ andq %rdx, %rbx
+ addq %rax, %r13
+ xorq %r15, %rbx
+ vpaddq %ymm1, %ymm8, %ymm1
+ rorxq $14, %r9, %rax
+ rorxq $18, %r9, %rcx
+ addq %rbx, %r13
+ vpsrlq $19, %ymm0, %ymm8
+ vpsllq $45, %ymm0, %ymm9
+ addq 40(%rsp), %r12
+ movq %r10, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %ymm0, %ymm10
+ vpsllq $3, %ymm0, %ymm11
+ xorq %r11, %rbx
+ rorxq $41, %r9, %rax
+ xorq %rcx, %rax
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ andq %r9, %rbx
+ addq %rax, %r12
+ rorxq $28, %r13, %rax
+ rorxq $34, %r13, %rcx
+ xorq %r11, %rbx
+ xorq %rax, %rcx
+ vpxor %ymm10, %ymm8, %ymm8
+ vpsrlq $6, %ymm0, %ymm11
+ rorxq $39, %r13, %rax
+ addq %rbx, %r12
+ xorq %rcx, %rax
+ movq %r14, %rbx
+ leaq (%r8,%r12,1), %r8
+ xorq %r13, %rbx
+ vpxor %ymm11, %ymm8, %ymm8
+ andq %rbx, %rdx
+ addq %rax, %r12
+ xorq %r14, %rdx
+ vpaddq %ymm1, %ymm8, %ymm1
+ # msg_sched done: 4-7
+ # msg_sched: 8-9
+ rorxq $14, %r8, %rax
+ rorxq $18, %r8, %rcx
+ addq %rdx, %r12
+ vpalignr $8, %ymm2, %ymm3, %ymm12
+ addq 64(%rsp), %r11
+ movq %r9, %rdx
+ xorq %rax, %rcx
+ vpalignr $8, %ymm6, %ymm7, %ymm13
+ xorq %r10, %rdx
+ rorxq $41, %r8, %rax
+ xorq %rcx, %rax
+ vpsrlq $0x01, %ymm12, %ymm8
+ vpsllq $63, %ymm12, %ymm9
+ andq %r8, %rdx
+ addq %rax, %r11
+ rorxq $28, %r12, %rax
+ vpsrlq $8, %ymm12, %ymm10
+ vpsllq $56, %ymm12, %ymm11
+ rorxq $34, %r12, %rcx
+ xorq %r10, %rdx
+ xorq %rax, %rcx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ rorxq $39, %r12, %rax
+ addq %rdx, %r11
+ xorq %rcx, %rax
+ vpsrlq $7, %ymm12, %ymm11
+ vpxor %ymm10, %ymm8, %ymm8
+ movq %r13, %rdx
+ addq %r11, %r15
+ xorq %r12, %rdx
+ vpxor %ymm11, %ymm8, %ymm8
+ vpaddq %ymm2, %ymm13, %ymm2
+ andq %rdx, %rbx
+ addq %rax, %r11
+ xorq %r13, %rbx
+ vpaddq %ymm2, %ymm8, %ymm2
+ rorxq $14, %r15, %rax
+ rorxq $18, %r15, %rcx
+ addq %rbx, %r11
+ vpsrlq $19, %ymm1, %ymm8
+ vpsllq $45, %ymm1, %ymm9
+ addq 72(%rsp), %r10
+ movq %r8, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %ymm1, %ymm10
+ vpsllq $3, %ymm1, %ymm11
+ xorq %r9, %rbx
+ rorxq $41, %r15, %rax
+ xorq %rcx, %rax
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ andq %r15, %rbx
+ addq %rax, %r10
+ rorxq $28, %r11, %rax
+ rorxq $34, %r11, %rcx
+ xorq %r9, %rbx
+ xorq %rax, %rcx
+ vpxor %ymm10, %ymm8, %ymm8
+ vpsrlq $6, %ymm1, %ymm11
+ rorxq $39, %r11, %rax
+ addq %rbx, %r10
+ xorq %rcx, %rax
+ movq %r12, %rbx
+ leaq (%r14,%r10,1), %r14
+ xorq %r11, %rbx
+ vpxor %ymm11, %ymm8, %ymm8
+ andq %rbx, %rdx
+ addq %rax, %r10
+ xorq %r12, %rdx
+ vpaddq %ymm2, %ymm8, %ymm2
+ # msg_sched done: 8-11
+ # msg_sched: 12-13
+ rorxq $14, %r14, %rax
+ rorxq $18, %r14, %rcx
+ addq %rdx, %r10
+ vpalignr $8, %ymm3, %ymm4, %ymm12
+ addq 96(%rsp), %r9
+ movq %r15, %rdx
+ xorq %rax, %rcx
+ vpalignr $8, %ymm7, %ymm0, %ymm13
+ xorq %r8, %rdx
+ rorxq $41, %r14, %rax
+ xorq %rcx, %rax
+ vpsrlq $0x01, %ymm12, %ymm8
+ vpsllq $63, %ymm12, %ymm9
+ andq %r14, %rdx
+ addq %rax, %r9
+ rorxq $28, %r10, %rax
+ vpsrlq $8, %ymm12, %ymm10
+ vpsllq $56, %ymm12, %ymm11
+ rorxq $34, %r10, %rcx
+ xorq %r8, %rdx
+ xorq %rax, %rcx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ rorxq $39, %r10, %rax
+ addq %rdx, %r9
+ xorq %rcx, %rax
+ vpsrlq $7, %ymm12, %ymm11
+ vpxor %ymm10, %ymm8, %ymm8
+ movq %r11, %rdx
+ addq %r9, %r13
+ xorq %r10, %rdx
+ vpxor %ymm11, %ymm8, %ymm8
+ vpaddq %ymm3, %ymm13, %ymm3
+ andq %rdx, %rbx
+ addq %rax, %r9
+ xorq %r11, %rbx
+ vpaddq %ymm3, %ymm8, %ymm3
+ rorxq $14, %r13, %rax
+ rorxq $18, %r13, %rcx
+ addq %rbx, %r9
+ vpsrlq $19, %ymm2, %ymm8
+ vpsllq $45, %ymm2, %ymm9
+ addq 104(%rsp), %r8
+ movq %r14, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %ymm2, %ymm10
+ vpsllq $3, %ymm2, %ymm11
+ xorq %r15, %rbx
+ rorxq $41, %r13, %rax
+ xorq %rcx, %rax
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ andq %r13, %rbx
+ addq %rax, %r8
+ rorxq $28, %r9, %rax
+ rorxq $34, %r9, %rcx
+ xorq %r15, %rbx
+ xorq %rax, %rcx
+ vpxor %ymm10, %ymm8, %ymm8
+ vpsrlq $6, %ymm2, %ymm11
+ rorxq $39, %r9, %rax
+ addq %rbx, %r8
+ xorq %rcx, %rax
+ movq %r10, %rbx
+ leaq (%r12,%r8,1), %r12
+ xorq %r9, %rbx
+ vpxor %ymm11, %ymm8, %ymm8
+ andq %rbx, %rdx
+ addq %rax, %r8
+ xorq %r10, %rdx
+ vpaddq %ymm3, %ymm8, %ymm3
+ # msg_sched done: 12-15
+ # msg_sched: 16-17
+ rorxq $14, %r12, %rax
+ rorxq $18, %r12, %rcx
+ addq %rdx, %r8
+ vpalignr $8, %ymm4, %ymm5, %ymm12
+ addq 128(%rsp), %r15
+ movq %r13, %rdx
+ xorq %rax, %rcx
+ vpalignr $8, %ymm0, %ymm1, %ymm13
+ xorq %r14, %rdx
+ rorxq $41, %r12, %rax
+ xorq %rcx, %rax
+ vpsrlq $0x01, %ymm12, %ymm8
+ vpsllq $63, %ymm12, %ymm9
+ andq %r12, %rdx
+ addq %rax, %r15
+ rorxq $28, %r8, %rax
+ vpsrlq $8, %ymm12, %ymm10
+ vpsllq $56, %ymm12, %ymm11
+ rorxq $34, %r8, %rcx
+ xorq %r14, %rdx
+ xorq %rax, %rcx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ rorxq $39, %r8, %rax
+ addq %rdx, %r15
+ xorq %rcx, %rax
+ vpsrlq $7, %ymm12, %ymm11
+ vpxor %ymm10, %ymm8, %ymm8
+ movq %r9, %rdx
+ addq %r15, %r11
+ xorq %r8, %rdx
+ vpxor %ymm11, %ymm8, %ymm8
+ vpaddq %ymm4, %ymm13, %ymm4
+ andq %rdx, %rbx
+ addq %rax, %r15
+ xorq %r9, %rbx
+ vpaddq %ymm4, %ymm8, %ymm4
+ rorxq $14, %r11, %rax
+ rorxq $18, %r11, %rcx
+ addq %rbx, %r15
+ vpsrlq $19, %ymm3, %ymm8
+ vpsllq $45, %ymm3, %ymm9
+ addq 136(%rsp), %r14
+ movq %r12, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %ymm3, %ymm10
+ vpsllq $3, %ymm3, %ymm11
+ xorq %r13, %rbx
+ rorxq $41, %r11, %rax
+ xorq %rcx, %rax
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ andq %r11, %rbx
+ addq %rax, %r14
+ rorxq $28, %r15, %rax
+ rorxq $34, %r15, %rcx
+ xorq %r13, %rbx
+ xorq %rax, %rcx
+ vpxor %ymm10, %ymm8, %ymm8
+ vpsrlq $6, %ymm3, %ymm11
+ rorxq $39, %r15, %rax
+ addq %rbx, %r14
+ xorq %rcx, %rax
+ movq %r8, %rbx
+ leaq (%r10,%r14,1), %r10
+ xorq %r15, %rbx
+ vpxor %ymm11, %ymm8, %ymm8
+ andq %rbx, %rdx
+ addq %rax, %r14
+ xorq %r8, %rdx
+ vpaddq %ymm4, %ymm8, %ymm4
+ # msg_sched done: 16-19
+ # msg_sched: 20-21
+ rorxq $14, %r10, %rax
+ rorxq $18, %r10, %rcx
+ addq %rdx, %r14
+ vpalignr $8, %ymm5, %ymm6, %ymm12
+ addq 160(%rsp), %r13
+ movq %r11, %rdx
+ xorq %rax, %rcx
+ vpalignr $8, %ymm1, %ymm2, %ymm13
+ xorq %r12, %rdx
+ rorxq $41, %r10, %rax
+ xorq %rcx, %rax
+ vpsrlq $0x01, %ymm12, %ymm8
+ vpsllq $63, %ymm12, %ymm9
+ andq %r10, %rdx
+ addq %rax, %r13
+ rorxq $28, %r14, %rax
+ vpsrlq $8, %ymm12, %ymm10
+ vpsllq $56, %ymm12, %ymm11
+ rorxq $34, %r14, %rcx
+ xorq %r12, %rdx
+ xorq %rax, %rcx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ rorxq $39, %r14, %rax
+ addq %rdx, %r13
+ xorq %rcx, %rax
+ vpsrlq $7, %ymm12, %ymm11
+ vpxor %ymm10, %ymm8, %ymm8
+ movq %r15, %rdx
+ addq %r13, %r9
+ xorq %r14, %rdx
+ vpxor %ymm11, %ymm8, %ymm8
+ vpaddq %ymm5, %ymm13, %ymm5
+ andq %rdx, %rbx
+ addq %rax, %r13
+ xorq %r15, %rbx
+ vpaddq %ymm5, %ymm8, %ymm5
+ rorxq $14, %r9, %rax
+ rorxq $18, %r9, %rcx
+ addq %rbx, %r13
+ vpsrlq $19, %ymm4, %ymm8
+ vpsllq $45, %ymm4, %ymm9
+ addq 168(%rsp), %r12
+ movq %r10, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %ymm4, %ymm10
+ vpsllq $3, %ymm4, %ymm11
+ xorq %r11, %rbx
+ rorxq $41, %r9, %rax
+ xorq %rcx, %rax
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ andq %r9, %rbx
+ addq %rax, %r12
+ rorxq $28, %r13, %rax
+ rorxq $34, %r13, %rcx
+ xorq %r11, %rbx
+ xorq %rax, %rcx
+ vpxor %ymm10, %ymm8, %ymm8
+ vpsrlq $6, %ymm4, %ymm11
+ rorxq $39, %r13, %rax
+ addq %rbx, %r12
+ xorq %rcx, %rax
+ movq %r14, %rbx
+ leaq (%r8,%r12,1), %r8
+ xorq %r13, %rbx
+ vpxor %ymm11, %ymm8, %ymm8
+ andq %rbx, %rdx
+ addq %rax, %r12
+ xorq %r14, %rdx
+ vpaddq %ymm5, %ymm8, %ymm5
+ # msg_sched done: 20-23
+ # msg_sched: 24-25
+ rorxq $14, %r8, %rax
+ rorxq $18, %r8, %rcx
+ addq %rdx, %r12
+ vpalignr $8, %ymm6, %ymm7, %ymm12
+ addq 192(%rsp), %r11
+ movq %r9, %rdx
+ xorq %rax, %rcx
+ vpalignr $8, %ymm2, %ymm3, %ymm13
+ xorq %r10, %rdx
+ rorxq $41, %r8, %rax
+ xorq %rcx, %rax
+ vpsrlq $0x01, %ymm12, %ymm8
+ vpsllq $63, %ymm12, %ymm9
+ andq %r8, %rdx
+ addq %rax, %r11
+ rorxq $28, %r12, %rax
+ vpsrlq $8, %ymm12, %ymm10
+ vpsllq $56, %ymm12, %ymm11
+ rorxq $34, %r12, %rcx
+ xorq %r10, %rdx
+ xorq %rax, %rcx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ rorxq $39, %r12, %rax
+ addq %rdx, %r11
+ xorq %rcx, %rax
+ vpsrlq $7, %ymm12, %ymm11
+ vpxor %ymm10, %ymm8, %ymm8
+ movq %r13, %rdx
+ addq %r11, %r15
+ xorq %r12, %rdx
+ vpxor %ymm11, %ymm8, %ymm8
+ vpaddq %ymm6, %ymm13, %ymm6
+ andq %rdx, %rbx
+ addq %rax, %r11
+ xorq %r13, %rbx
+ vpaddq %ymm6, %ymm8, %ymm6
+ rorxq $14, %r15, %rax
+ rorxq $18, %r15, %rcx
+ addq %rbx, %r11
+ vpsrlq $19, %ymm5, %ymm8
+ vpsllq $45, %ymm5, %ymm9
+ addq 200(%rsp), %r10
+ movq %r8, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %ymm5, %ymm10
+ vpsllq $3, %ymm5, %ymm11
+ xorq %r9, %rbx
+ rorxq $41, %r15, %rax
+ xorq %rcx, %rax
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ andq %r15, %rbx
+ addq %rax, %r10
+ rorxq $28, %r11, %rax
+ rorxq $34, %r11, %rcx
+ xorq %r9, %rbx
+ xorq %rax, %rcx
+ vpxor %ymm10, %ymm8, %ymm8
+ vpsrlq $6, %ymm5, %ymm11
+ rorxq $39, %r11, %rax
+ addq %rbx, %r10
+ xorq %rcx, %rax
+ movq %r12, %rbx
+ leaq (%r14,%r10,1), %r14
+ xorq %r11, %rbx
+ vpxor %ymm11, %ymm8, %ymm8
+ andq %rbx, %rdx
+ addq %rax, %r10
+ xorq %r12, %rdx
+ vpaddq %ymm6, %ymm8, %ymm6
+ # msg_sched done: 24-27
+ # msg_sched: 28-29
+ rorxq $14, %r14, %rax
+ rorxq $18, %r14, %rcx
+ addq %rdx, %r10
+ vpalignr $8, %ymm7, %ymm0, %ymm12
+ addq 224(%rsp), %r9
+ movq %r15, %rdx
+ xorq %rax, %rcx
+ vpalignr $8, %ymm3, %ymm4, %ymm13
+ xorq %r8, %rdx
+ rorxq $41, %r14, %rax
+ xorq %rcx, %rax
+ vpsrlq $0x01, %ymm12, %ymm8
+ vpsllq $63, %ymm12, %ymm9
+ andq %r14, %rdx
+ addq %rax, %r9
+ rorxq $28, %r10, %rax
+ vpsrlq $8, %ymm12, %ymm10
+ vpsllq $56, %ymm12, %ymm11
+ rorxq $34, %r10, %rcx
+ xorq %r8, %rdx
+ xorq %rax, %rcx
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ rorxq $39, %r10, %rax
+ addq %rdx, %r9
+ xorq %rcx, %rax
+ vpsrlq $7, %ymm12, %ymm11
+ vpxor %ymm10, %ymm8, %ymm8
+ movq %r11, %rdx
+ addq %r9, %r13
+ xorq %r10, %rdx
+ vpxor %ymm11, %ymm8, %ymm8
+ vpaddq %ymm7, %ymm13, %ymm7
+ andq %rdx, %rbx
+ addq %rax, %r9
+ xorq %r11, %rbx
+ vpaddq %ymm7, %ymm8, %ymm7
+ rorxq $14, %r13, %rax
+ rorxq $18, %r13, %rcx
+ addq %rbx, %r9
+ vpsrlq $19, %ymm6, %ymm8
+ vpsllq $45, %ymm6, %ymm9
+ addq 232(%rsp), %r8
+ movq %r14, %rbx
+ xorq %rax, %rcx
+ vpsrlq $61, %ymm6, %ymm10
+ vpsllq $3, %ymm6, %ymm11
+ xorq %r15, %rbx
+ rorxq $41, %r13, %rax
+ xorq %rcx, %rax
+ vpor %ymm9, %ymm8, %ymm8
+ vpor %ymm11, %ymm10, %ymm10
+ andq %r13, %rbx
+ addq %rax, %r8
+ rorxq $28, %r9, %rax
+ rorxq $34, %r9, %rcx
+ xorq %r15, %rbx
+ xorq %rax, %rcx
+ vpxor %ymm10, %ymm8, %ymm8
+ vpsrlq $6, %ymm6, %ymm11
+ rorxq $39, %r9, %rax
+ addq %rbx, %r8
+ xorq %rcx, %rax
+ movq %r10, %rbx
+ leaq (%r12,%r8,1), %r12
+ xorq %r9, %rbx
+ vpxor %ymm11, %ymm8, %ymm8
+ andq %rbx, %rdx
+ addq %rax, %r8
+ xorq %r10, %rdx
+ vpaddq %ymm7, %ymm8, %ymm7
+ # msg_sched done: 28-31
+ addq $0x100, %rbp
+ addq $0x100, %rsp
+ cmpq L_avx2_rorx_sha512_k_2_end(%rip), %rbp
+ jne L_sha512_len_avx2_rorx_start
+ vpaddq (%rbp), %ymm0, %ymm8
+ vpaddq 32(%rbp), %ymm1, %ymm9
+ vmovdqu %ymm8, (%rsp)
+ vmovdqu %ymm9, 32(%rsp)
+ vpaddq 64(%rbp), %ymm2, %ymm8
+ vpaddq 96(%rbp), %ymm3, %ymm9
+ vmovdqu %ymm8, 64(%rsp)
+ vmovdqu %ymm9, 96(%rsp)
+ vpaddq 128(%rbp), %ymm4, %ymm8
+ vpaddq 160(%rbp), %ymm5, %ymm9
+ vmovdqu %ymm8, 128(%rsp)
+ vmovdqu %ymm9, 160(%rsp)
+ vpaddq 192(%rbp), %ymm6, %ymm8
+ vpaddq 224(%rbp), %ymm7, %ymm9
+ vmovdqu %ymm8, 192(%rsp)
+ vmovdqu %ymm9, 224(%rsp)
+ # rnd_all_2: 0-1
+ rorxq $14, %r12, %rax
+ rorxq $18, %r12, %rcx
+ addq %rdx, %r8
+ addq (%rsp), %r15
+ movq %r13, %rdx
+ xorq %rax, %rcx
+ xorq %r14, %rdx
+ rorxq $41, %r12, %rax
+ xorq %rcx, %rax
+ andq %r12, %rdx
+ addq %rax, %r15
+ rorxq $28, %r8, %rax
+ rorxq $34, %r8, %rcx
+ xorq %r14, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r8, %rax
+ addq %rdx, %r15
+ xorq %rcx, %rax
+ movq %r9, %rdx
+ addq %r15, %r11
+ xorq %r8, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r15
+ xorq %r9, %rbx
+ rorxq $14, %r11, %rax
+ rorxq $18, %r11, %rcx
+ addq %rbx, %r15
+ addq 8(%rsp), %r14
+ movq %r12, %rbx
+ xorq %rax, %rcx
+ xorq %r13, %rbx
+ rorxq $41, %r11, %rax
+ xorq %rcx, %rax
+ andq %r11, %rbx
+ addq %rax, %r14
+ rorxq $28, %r15, %rax
+ rorxq $34, %r15, %rcx
+ xorq %r13, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r15, %rax
+ addq %rbx, %r14
+ xorq %rcx, %rax
+ movq %r8, %rbx
+ leaq (%r10,%r14,1), %r10
+ xorq %r15, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r14
+ xorq %r8, %rdx
+ # rnd_all_2: 4-5
+ rorxq $14, %r10, %rax
+ rorxq $18, %r10, %rcx
+ addq %rdx, %r14
+ addq 32(%rsp), %r13
+ movq %r11, %rdx
+ xorq %rax, %rcx
+ xorq %r12, %rdx
+ rorxq $41, %r10, %rax
+ xorq %rcx, %rax
+ andq %r10, %rdx
+ addq %rax, %r13
+ rorxq $28, %r14, %rax
+ rorxq $34, %r14, %rcx
+ xorq %r12, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r14, %rax
+ addq %rdx, %r13
+ xorq %rcx, %rax
+ movq %r15, %rdx
+ addq %r13, %r9
+ xorq %r14, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r13
+ xorq %r15, %rbx
+ rorxq $14, %r9, %rax
+ rorxq $18, %r9, %rcx
+ addq %rbx, %r13
+ addq 40(%rsp), %r12
+ movq %r10, %rbx
+ xorq %rax, %rcx
+ xorq %r11, %rbx
+ rorxq $41, %r9, %rax
+ xorq %rcx, %rax
+ andq %r9, %rbx
+ addq %rax, %r12
+ rorxq $28, %r13, %rax
+ rorxq $34, %r13, %rcx
+ xorq %r11, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r13, %rax
+ addq %rbx, %r12
+ xorq %rcx, %rax
+ movq %r14, %rbx
+ leaq (%r8,%r12,1), %r8
+ xorq %r13, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r12
+ xorq %r14, %rdx
+ # rnd_all_2: 8-9
+ rorxq $14, %r8, %rax
+ rorxq $18, %r8, %rcx
+ addq %rdx, %r12
+ addq 64(%rsp), %r11
+ movq %r9, %rdx
+ xorq %rax, %rcx
+ xorq %r10, %rdx
+ rorxq $41, %r8, %rax
+ xorq %rcx, %rax
+ andq %r8, %rdx
+ addq %rax, %r11
+ rorxq $28, %r12, %rax
+ rorxq $34, %r12, %rcx
+ xorq %r10, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r12, %rax
+ addq %rdx, %r11
+ xorq %rcx, %rax
+ movq %r13, %rdx
+ addq %r11, %r15
+ xorq %r12, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r11
+ xorq %r13, %rbx
+ rorxq $14, %r15, %rax
+ rorxq $18, %r15, %rcx
+ addq %rbx, %r11
+ addq 72(%rsp), %r10
+ movq %r8, %rbx
+ xorq %rax, %rcx
+ xorq %r9, %rbx
+ rorxq $41, %r15, %rax
+ xorq %rcx, %rax
+ andq %r15, %rbx
+ addq %rax, %r10
+ rorxq $28, %r11, %rax
+ rorxq $34, %r11, %rcx
+ xorq %r9, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r11, %rax
+ addq %rbx, %r10
+ xorq %rcx, %rax
+ movq %r12, %rbx
+ leaq (%r14,%r10,1), %r14
+ xorq %r11, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r10
+ xorq %r12, %rdx
+ # rnd_all_2: 12-13
+ rorxq $14, %r14, %rax
+ rorxq $18, %r14, %rcx
+ addq %rdx, %r10
+ addq 96(%rsp), %r9
+ movq %r15, %rdx
+ xorq %rax, %rcx
+ xorq %r8, %rdx
+ rorxq $41, %r14, %rax
+ xorq %rcx, %rax
+ andq %r14, %rdx
+ addq %rax, %r9
+ rorxq $28, %r10, %rax
+ rorxq $34, %r10, %rcx
+ xorq %r8, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r10, %rax
+ addq %rdx, %r9
+ xorq %rcx, %rax
+ movq %r11, %rdx
+ addq %r9, %r13
+ xorq %r10, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r9
+ xorq %r11, %rbx
+ rorxq $14, %r13, %rax
+ rorxq $18, %r13, %rcx
+ addq %rbx, %r9
+ addq 104(%rsp), %r8
+ movq %r14, %rbx
+ xorq %rax, %rcx
+ xorq %r15, %rbx
+ rorxq $41, %r13, %rax
+ xorq %rcx, %rax
+ andq %r13, %rbx
+ addq %rax, %r8
+ rorxq $28, %r9, %rax
+ rorxq $34, %r9, %rcx
+ xorq %r15, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r9, %rax
+ addq %rbx, %r8
+ xorq %rcx, %rax
+ movq %r10, %rbx
+ leaq (%r12,%r8,1), %r12
+ xorq %r9, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r8
+ xorq %r10, %rdx
+ # rnd_all_2: 16-17
+ rorxq $14, %r12, %rax
+ rorxq $18, %r12, %rcx
+ addq %rdx, %r8
+ addq 128(%rsp), %r15
+ movq %r13, %rdx
+ xorq %rax, %rcx
+ xorq %r14, %rdx
+ rorxq $41, %r12, %rax
+ xorq %rcx, %rax
+ andq %r12, %rdx
+ addq %rax, %r15
+ rorxq $28, %r8, %rax
+ rorxq $34, %r8, %rcx
+ xorq %r14, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r8, %rax
+ addq %rdx, %r15
+ xorq %rcx, %rax
+ movq %r9, %rdx
+ addq %r15, %r11
+ xorq %r8, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r15
+ xorq %r9, %rbx
+ rorxq $14, %r11, %rax
+ rorxq $18, %r11, %rcx
+ addq %rbx, %r15
+ addq 136(%rsp), %r14
+ movq %r12, %rbx
+ xorq %rax, %rcx
+ xorq %r13, %rbx
+ rorxq $41, %r11, %rax
+ xorq %rcx, %rax
+ andq %r11, %rbx
+ addq %rax, %r14
+ rorxq $28, %r15, %rax
+ rorxq $34, %r15, %rcx
+ xorq %r13, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r15, %rax
+ addq %rbx, %r14
+ xorq %rcx, %rax
+ movq %r8, %rbx
+ leaq (%r10,%r14,1), %r10
+ xorq %r15, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r14
+ xorq %r8, %rdx
+ # rnd_all_2: 20-21
+ rorxq $14, %r10, %rax
+ rorxq $18, %r10, %rcx
+ addq %rdx, %r14
+ addq 160(%rsp), %r13
+ movq %r11, %rdx
+ xorq %rax, %rcx
+ xorq %r12, %rdx
+ rorxq $41, %r10, %rax
+ xorq %rcx, %rax
+ andq %r10, %rdx
+ addq %rax, %r13
+ rorxq $28, %r14, %rax
+ rorxq $34, %r14, %rcx
+ xorq %r12, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r14, %rax
+ addq %rdx, %r13
+ xorq %rcx, %rax
+ movq %r15, %rdx
+ addq %r13, %r9
+ xorq %r14, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r13
+ xorq %r15, %rbx
+ rorxq $14, %r9, %rax
+ rorxq $18, %r9, %rcx
+ addq %rbx, %r13
+ addq 168(%rsp), %r12
+ movq %r10, %rbx
+ xorq %rax, %rcx
+ xorq %r11, %rbx
+ rorxq $41, %r9, %rax
+ xorq %rcx, %rax
+ andq %r9, %rbx
+ addq %rax, %r12
+ rorxq $28, %r13, %rax
+ rorxq $34, %r13, %rcx
+ xorq %r11, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r13, %rax
+ addq %rbx, %r12
+ xorq %rcx, %rax
+ movq %r14, %rbx
+ leaq (%r8,%r12,1), %r8
+ xorq %r13, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r12
+ xorq %r14, %rdx
+ # rnd_all_2: 24-25
+ rorxq $14, %r8, %rax
+ rorxq $18, %r8, %rcx
+ addq %rdx, %r12
+ addq 192(%rsp), %r11
+ movq %r9, %rdx
+ xorq %rax, %rcx
+ xorq %r10, %rdx
+ rorxq $41, %r8, %rax
+ xorq %rcx, %rax
+ andq %r8, %rdx
+ addq %rax, %r11
+ rorxq $28, %r12, %rax
+ rorxq $34, %r12, %rcx
+ xorq %r10, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r12, %rax
+ addq %rdx, %r11
+ xorq %rcx, %rax
+ movq %r13, %rdx
+ addq %r11, %r15
+ xorq %r12, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r11
+ xorq %r13, %rbx
+ rorxq $14, %r15, %rax
+ rorxq $18, %r15, %rcx
+ addq %rbx, %r11
+ addq 200(%rsp), %r10
+ movq %r8, %rbx
+ xorq %rax, %rcx
+ xorq %r9, %rbx
+ rorxq $41, %r15, %rax
+ xorq %rcx, %rax
+ andq %r15, %rbx
+ addq %rax, %r10
+ rorxq $28, %r11, %rax
+ rorxq $34, %r11, %rcx
+ xorq %r9, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r11, %rax
+ addq %rbx, %r10
+ xorq %rcx, %rax
+ movq %r12, %rbx
+ leaq (%r14,%r10,1), %r14
+ xorq %r11, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r10
+ xorq %r12, %rdx
+ # rnd_all_2: 28-29
+ rorxq $14, %r14, %rax
+ rorxq $18, %r14, %rcx
+ addq %rdx, %r10
+ addq 224(%rsp), %r9
+ movq %r15, %rdx
+ xorq %rax, %rcx
+ xorq %r8, %rdx
+ rorxq $41, %r14, %rax
+ xorq %rcx, %rax
+ andq %r14, %rdx
+ addq %rax, %r9
+ rorxq $28, %r10, %rax
+ rorxq $34, %r10, %rcx
+ xorq %r8, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r10, %rax
+ addq %rdx, %r9
+ xorq %rcx, %rax
+ movq %r11, %rdx
+ addq %r9, %r13
+ xorq %r10, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r9
+ xorq %r11, %rbx
+ rorxq $14, %r13, %rax
+ rorxq $18, %r13, %rcx
+ addq %rbx, %r9
+ addq 232(%rsp), %r8
+ movq %r14, %rbx
+ xorq %rax, %rcx
+ xorq %r15, %rbx
+ rorxq $41, %r13, %rax
+ xorq %rcx, %rax
+ andq %r13, %rbx
+ addq %rax, %r8
+ rorxq $28, %r9, %rax
+ rorxq $34, %r9, %rcx
+ xorq %r15, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r9, %rax
+ addq %rbx, %r8
+ xorq %rcx, %rax
+ movq %r10, %rbx
+ leaq (%r12,%r8,1), %r12
+ xorq %r9, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r8
+ xorq %r10, %rdx
+ addq %rdx, %r8
+ subq $0x400, %rsp
+ addq (%rdi), %r8
+ addq 8(%rdi), %r9
+ addq 16(%rdi), %r10
+ addq 24(%rdi), %r11
+ addq 32(%rdi), %r12
+ addq 40(%rdi), %r13
+ addq 48(%rdi), %r14
+ addq 56(%rdi), %r15
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq %r12, 32(%rdi)
+ movq %r13, 40(%rdi)
+ movq %r14, 48(%rdi)
+ movq %r15, 56(%rdi)
+ movq %r9, %rbx
+ xorq %rdx, %rdx
+ xorq %r10, %rbx
+ movq $5, %rbp
+L_sha512_len_avx2_rorx_tail:
+ # rnd_all_2: 2-3
+ rorxq $14, %r12, %rax
+ rorxq $18, %r12, %rcx
+ addq %rdx, %r8
+ addq 16(%rsp), %r15
+ movq %r13, %rdx
+ xorq %rax, %rcx
+ xorq %r14, %rdx
+ rorxq $41, %r12, %rax
+ xorq %rcx, %rax
+ andq %r12, %rdx
+ addq %rax, %r15
+ rorxq $28, %r8, %rax
+ rorxq $34, %r8, %rcx
+ xorq %r14, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r8, %rax
+ addq %rdx, %r15
+ xorq %rcx, %rax
+ movq %r9, %rdx
+ addq %r15, %r11
+ xorq %r8, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r15
+ xorq %r9, %rbx
+ rorxq $14, %r11, %rax
+ rorxq $18, %r11, %rcx
+ addq %rbx, %r15
+ addq 24(%rsp), %r14
+ movq %r12, %rbx
+ xorq %rax, %rcx
+ xorq %r13, %rbx
+ rorxq $41, %r11, %rax
+ xorq %rcx, %rax
+ andq %r11, %rbx
+ addq %rax, %r14
+ rorxq $28, %r15, %rax
+ rorxq $34, %r15, %rcx
+ xorq %r13, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r15, %rax
+ addq %rbx, %r14
+ xorq %rcx, %rax
+ movq %r8, %rbx
+ leaq (%r10,%r14,1), %r10
+ xorq %r15, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r14
+ xorq %r8, %rdx
+ # rnd_all_2: 6-7
+ rorxq $14, %r10, %rax
+ rorxq $18, %r10, %rcx
+ addq %rdx, %r14
+ addq 48(%rsp), %r13
+ movq %r11, %rdx
+ xorq %rax, %rcx
+ xorq %r12, %rdx
+ rorxq $41, %r10, %rax
+ xorq %rcx, %rax
+ andq %r10, %rdx
+ addq %rax, %r13
+ rorxq $28, %r14, %rax
+ rorxq $34, %r14, %rcx
+ xorq %r12, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r14, %rax
+ addq %rdx, %r13
+ xorq %rcx, %rax
+ movq %r15, %rdx
+ addq %r13, %r9
+ xorq %r14, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r13
+ xorq %r15, %rbx
+ rorxq $14, %r9, %rax
+ rorxq $18, %r9, %rcx
+ addq %rbx, %r13
+ addq 56(%rsp), %r12
+ movq %r10, %rbx
+ xorq %rax, %rcx
+ xorq %r11, %rbx
+ rorxq $41, %r9, %rax
+ xorq %rcx, %rax
+ andq %r9, %rbx
+ addq %rax, %r12
+ rorxq $28, %r13, %rax
+ rorxq $34, %r13, %rcx
+ xorq %r11, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r13, %rax
+ addq %rbx, %r12
+ xorq %rcx, %rax
+ movq %r14, %rbx
+ leaq (%r8,%r12,1), %r8
+ xorq %r13, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r12
+ xorq %r14, %rdx
+ # rnd_all_2: 10-11
+ rorxq $14, %r8, %rax
+ rorxq $18, %r8, %rcx
+ addq %rdx, %r12
+ addq 80(%rsp), %r11
+ movq %r9, %rdx
+ xorq %rax, %rcx
+ xorq %r10, %rdx
+ rorxq $41, %r8, %rax
+ xorq %rcx, %rax
+ andq %r8, %rdx
+ addq %rax, %r11
+ rorxq $28, %r12, %rax
+ rorxq $34, %r12, %rcx
+ xorq %r10, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r12, %rax
+ addq %rdx, %r11
+ xorq %rcx, %rax
+ movq %r13, %rdx
+ addq %r11, %r15
+ xorq %r12, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r11
+ xorq %r13, %rbx
+ rorxq $14, %r15, %rax
+ rorxq $18, %r15, %rcx
+ addq %rbx, %r11
+ addq 88(%rsp), %r10
+ movq %r8, %rbx
+ xorq %rax, %rcx
+ xorq %r9, %rbx
+ rorxq $41, %r15, %rax
+ xorq %rcx, %rax
+ andq %r15, %rbx
+ addq %rax, %r10
+ rorxq $28, %r11, %rax
+ rorxq $34, %r11, %rcx
+ xorq %r9, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r11, %rax
+ addq %rbx, %r10
+ xorq %rcx, %rax
+ movq %r12, %rbx
+ leaq (%r14,%r10,1), %r14
+ xorq %r11, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r10
+ xorq %r12, %rdx
+ # rnd_all_2: 14-15
+ rorxq $14, %r14, %rax
+ rorxq $18, %r14, %rcx
+ addq %rdx, %r10
+ addq 112(%rsp), %r9
+ movq %r15, %rdx
+ xorq %rax, %rcx
+ xorq %r8, %rdx
+ rorxq $41, %r14, %rax
+ xorq %rcx, %rax
+ andq %r14, %rdx
+ addq %rax, %r9
+ rorxq $28, %r10, %rax
+ rorxq $34, %r10, %rcx
+ xorq %r8, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r10, %rax
+ addq %rdx, %r9
+ xorq %rcx, %rax
+ movq %r11, %rdx
+ addq %r9, %r13
+ xorq %r10, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r9
+ xorq %r11, %rbx
+ rorxq $14, %r13, %rax
+ rorxq $18, %r13, %rcx
+ addq %rbx, %r9
+ addq 120(%rsp), %r8
+ movq %r14, %rbx
+ xorq %rax, %rcx
+ xorq %r15, %rbx
+ rorxq $41, %r13, %rax
+ xorq %rcx, %rax
+ andq %r13, %rbx
+ addq %rax, %r8
+ rorxq $28, %r9, %rax
+ rorxq $34, %r9, %rcx
+ xorq %r15, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r9, %rax
+ addq %rbx, %r8
+ xorq %rcx, %rax
+ movq %r10, %rbx
+ leaq (%r12,%r8,1), %r12
+ xorq %r9, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r8
+ xorq %r10, %rdx
+ # rnd_all_2: 18-19
+ rorxq $14, %r12, %rax
+ rorxq $18, %r12, %rcx
+ addq %rdx, %r8
+ addq 144(%rsp), %r15
+ movq %r13, %rdx
+ xorq %rax, %rcx
+ xorq %r14, %rdx
+ rorxq $41, %r12, %rax
+ xorq %rcx, %rax
+ andq %r12, %rdx
+ addq %rax, %r15
+ rorxq $28, %r8, %rax
+ rorxq $34, %r8, %rcx
+ xorq %r14, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r8, %rax
+ addq %rdx, %r15
+ xorq %rcx, %rax
+ movq %r9, %rdx
+ addq %r15, %r11
+ xorq %r8, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r15
+ xorq %r9, %rbx
+ rorxq $14, %r11, %rax
+ rorxq $18, %r11, %rcx
+ addq %rbx, %r15
+ addq 152(%rsp), %r14
+ movq %r12, %rbx
+ xorq %rax, %rcx
+ xorq %r13, %rbx
+ rorxq $41, %r11, %rax
+ xorq %rcx, %rax
+ andq %r11, %rbx
+ addq %rax, %r14
+ rorxq $28, %r15, %rax
+ rorxq $34, %r15, %rcx
+ xorq %r13, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r15, %rax
+ addq %rbx, %r14
+ xorq %rcx, %rax
+ movq %r8, %rbx
+ leaq (%r10,%r14,1), %r10
+ xorq %r15, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r14
+ xorq %r8, %rdx
+ # rnd_all_2: 22-23
+ rorxq $14, %r10, %rax
+ rorxq $18, %r10, %rcx
+ addq %rdx, %r14
+ addq 176(%rsp), %r13
+ movq %r11, %rdx
+ xorq %rax, %rcx
+ xorq %r12, %rdx
+ rorxq $41, %r10, %rax
+ xorq %rcx, %rax
+ andq %r10, %rdx
+ addq %rax, %r13
+ rorxq $28, %r14, %rax
+ rorxq $34, %r14, %rcx
+ xorq %r12, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r14, %rax
+ addq %rdx, %r13
+ xorq %rcx, %rax
+ movq %r15, %rdx
+ addq %r13, %r9
+ xorq %r14, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r13
+ xorq %r15, %rbx
+ rorxq $14, %r9, %rax
+ rorxq $18, %r9, %rcx
+ addq %rbx, %r13
+ addq 184(%rsp), %r12
+ movq %r10, %rbx
+ xorq %rax, %rcx
+ xorq %r11, %rbx
+ rorxq $41, %r9, %rax
+ xorq %rcx, %rax
+ andq %r9, %rbx
+ addq %rax, %r12
+ rorxq $28, %r13, %rax
+ rorxq $34, %r13, %rcx
+ xorq %r11, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r13, %rax
+ addq %rbx, %r12
+ xorq %rcx, %rax
+ movq %r14, %rbx
+ leaq (%r8,%r12,1), %r8
+ xorq %r13, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r12
+ xorq %r14, %rdx
+ # rnd_all_2: 26-27
+ rorxq $14, %r8, %rax
+ rorxq $18, %r8, %rcx
+ addq %rdx, %r12
+ addq 208(%rsp), %r11
+ movq %r9, %rdx
+ xorq %rax, %rcx
+ xorq %r10, %rdx
+ rorxq $41, %r8, %rax
+ xorq %rcx, %rax
+ andq %r8, %rdx
+ addq %rax, %r11
+ rorxq $28, %r12, %rax
+ rorxq $34, %r12, %rcx
+ xorq %r10, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r12, %rax
+ addq %rdx, %r11
+ xorq %rcx, %rax
+ movq %r13, %rdx
+ addq %r11, %r15
+ xorq %r12, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r11
+ xorq %r13, %rbx
+ rorxq $14, %r15, %rax
+ rorxq $18, %r15, %rcx
+ addq %rbx, %r11
+ addq 216(%rsp), %r10
+ movq %r8, %rbx
+ xorq %rax, %rcx
+ xorq %r9, %rbx
+ rorxq $41, %r15, %rax
+ xorq %rcx, %rax
+ andq %r15, %rbx
+ addq %rax, %r10
+ rorxq $28, %r11, %rax
+ rorxq $34, %r11, %rcx
+ xorq %r9, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r11, %rax
+ addq %rbx, %r10
+ xorq %rcx, %rax
+ movq %r12, %rbx
+ leaq (%r14,%r10,1), %r14
+ xorq %r11, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r10
+ xorq %r12, %rdx
+ # rnd_all_2: 30-31
+ rorxq $14, %r14, %rax
+ rorxq $18, %r14, %rcx
+ addq %rdx, %r10
+ addq 240(%rsp), %r9
+ movq %r15, %rdx
+ xorq %rax, %rcx
+ xorq %r8, %rdx
+ rorxq $41, %r14, %rax
+ xorq %rcx, %rax
+ andq %r14, %rdx
+ addq %rax, %r9
+ rorxq $28, %r10, %rax
+ rorxq $34, %r10, %rcx
+ xorq %r8, %rdx
+ xorq %rax, %rcx
+ rorxq $39, %r10, %rax
+ addq %rdx, %r9
+ xorq %rcx, %rax
+ movq %r11, %rdx
+ addq %r9, %r13
+ xorq %r10, %rdx
+ andq %rdx, %rbx
+ addq %rax, %r9
+ xorq %r11, %rbx
+ rorxq $14, %r13, %rax
+ rorxq $18, %r13, %rcx
+ addq %rbx, %r9
+ addq 248(%rsp), %r8
+ movq %r14, %rbx
+ xorq %rax, %rcx
+ xorq %r15, %rbx
+ rorxq $41, %r13, %rax
+ xorq %rcx, %rax
+ andq %r13, %rbx
+ addq %rax, %r8
+ rorxq $28, %r9, %rax
+ rorxq $34, %r9, %rcx
+ xorq %r15, %rbx
+ xorq %rax, %rcx
+ rorxq $39, %r9, %rax
+ addq %rbx, %r8
+ xorq %rcx, %rax
+ movq %r10, %rbx
+ leaq (%r12,%r8,1), %r12
+ xorq %r9, %rbx
+ andq %rbx, %rdx
+ addq %rax, %r8
+ xorq %r10, %rdx
+ addq $0x100, %rsp
+ subq $0x01, %rbp
+ jnz L_sha512_len_avx2_rorx_tail
+ addq %rdx, %r8
+ addq (%rdi), %r8
+ addq 8(%rdi), %r9
+ addq 16(%rdi), %r10
+ addq 24(%rdi), %r11
+ addq 32(%rdi), %r12
+ addq 40(%rdi), %r13
+ addq 48(%rdi), %r14
+ addq 56(%rdi), %r15
+ movq 224(%rdi), %rax
+ addq $0x40, %rsp
+ addq $0x100, %rax
+ subl $0x100, %esi
+ movq %rax, 224(%rdi)
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq %r12, 32(%rdi)
+ movq %r13, 40(%rdi)
+ movq %r14, 48(%rdi)
+ movq %r15, 56(%rdi)
+ jnz L_sha512_len_avx2_rorx_begin
+L_sha512_len_avx2_rorx_done:
+ xorq %rax, %rax
+ vzeroupper
+ popq %rbp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ repz retq
+#ifndef __APPLE__
+.size Transform_Sha512_AVX2_RORX_Len,.-Transform_Sha512_AVX2_RORX_Len
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/signature.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/signature.c
new file mode 100644
index 000000000..5d503338a
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/signature.c
@@ -0,0 +1,559 @@
+/* signature.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/signature.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/logging.h>
+#ifndef NO_ASN
+#include <wolfssl/wolfcrypt/asn.h>
+#endif
+#ifdef HAVE_ECC
+#include <wolfssl/wolfcrypt/ecc.h>
+#endif
+#ifndef NO_RSA
+#include <wolfssl/wolfcrypt/rsa.h>
+#endif
+
+/* If ECC and RSA are disabled then disable signature wrapper */
+#if (!defined(HAVE_ECC) || (defined(HAVE_ECC) && !defined(HAVE_ECC_SIGN) \
+ && !defined(HAVE_ECC_VERIFY))) && defined(NO_RSA)
+ #undef NO_SIG_WRAPPER
+ #define NO_SIG_WRAPPER
+#endif
+
+/* Signature wrapper disabled check */
+#ifndef NO_SIG_WRAPPER
+
+#if !defined(NO_RSA) && !defined(NO_ASN)
+static int wc_SignatureDerEncode(enum wc_HashType hash_type, byte* hash_data,
+ word32 hash_len, word32* hash_enc_len)
+{
+ int ret, oid;
+
+ ret = wc_HashGetOID(hash_type);
+ if (ret < 0) {
+ return ret;
+ }
+ oid = ret;
+
+ ret = wc_EncodeSignature(hash_data, hash_data, hash_len, oid);
+ if (ret > 0) {
+ *hash_enc_len = ret;
+ ret = 0;
+ }
+
+ return ret;
+}
+#endif /* !NO_RSA && !NO_ASN */
+
+int wc_SignatureGetSize(enum wc_SignatureType sig_type,
+ const void* key, word32 key_len)
+{
+ int sig_len = BAD_FUNC_ARG;
+
+ /* Suppress possible unused args if all signature types are disabled */
+ (void)key;
+ (void)key_len;
+
+ switch(sig_type) {
+ case WC_SIGNATURE_TYPE_ECC:
+#ifdef HAVE_ECC
+ /* Sanity check that void* key is at least ecc_key in size */
+ if (key_len >= sizeof(ecc_key)) {
+ sig_len = wc_ecc_sig_size((ecc_key*)key);
+ }
+ else {
+ WOLFSSL_MSG("wc_SignatureGetSize: Invalid ECC key size");
+ }
+#else
+ sig_len = SIG_TYPE_E;
+#endif
+ break;
+
+ case WC_SIGNATURE_TYPE_RSA_W_ENC:
+ case WC_SIGNATURE_TYPE_RSA:
+#ifndef NO_RSA
+ /* Sanity check that void* key is at least RsaKey in size */
+ if (key_len >= sizeof(RsaKey)) {
+ sig_len = wc_RsaEncryptSize((RsaKey*)key);
+ }
+ else {
+ WOLFSSL_MSG("wc_SignatureGetSize: Invalid RsaKey key size");
+ }
+#else
+ sig_len = SIG_TYPE_E;
+#endif
+ break;
+
+ case WC_SIGNATURE_TYPE_NONE:
+ default:
+ sig_len = BAD_FUNC_ARG;
+ break;
+ }
+ return sig_len;
+}
+
+int wc_SignatureVerifyHash(
+ enum wc_HashType hash_type, enum wc_SignatureType sig_type,
+ const byte* hash_data, word32 hash_len,
+ const byte* sig, word32 sig_len,
+ const void* key, word32 key_len)
+{
+ int ret;
+
+ /* Check arguments */
+ if (hash_data == NULL || hash_len == 0 ||
+ sig == NULL || sig_len == 0 ||
+ key == NULL || key_len == 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* Validate signature len (1 to max is okay) */
+ if ((int)sig_len > wc_SignatureGetSize(sig_type, key, key_len)) {
+ WOLFSSL_MSG("wc_SignatureVerify: Invalid sig type/len");
+ return BAD_FUNC_ARG;
+ }
+
+ /* Validate hash size */
+ ret = wc_HashGetDigestSize(hash_type);
+ if (ret < 0) {
+ WOLFSSL_MSG("wc_SignatureVerify: Invalid hash type/len");
+ return ret;
+ }
+ ret = 0;
+
+ /* Verify signature using hash */
+ switch (sig_type) {
+ case WC_SIGNATURE_TYPE_ECC:
+ {
+#if defined(HAVE_ECC) && defined(HAVE_ECC_VERIFY)
+ int is_valid_sig = 0;
+
+ /* Perform verification of signature using provided ECC key */
+ do {
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ ret = wc_AsyncWait(ret, &((ecc_key*)key)->asyncDev,
+ WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0)
+ ret = wc_ecc_verify_hash(sig, sig_len, hash_data, hash_len,
+ &is_valid_sig, (ecc_key*)key);
+ } while (ret == WC_PENDING_E);
+ if (ret != 0 || is_valid_sig != 1) {
+ ret = SIG_VERIFY_E;
+ }
+#else
+ ret = SIG_TYPE_E;
+#endif
+ break;
+ }
+
+ case WC_SIGNATURE_TYPE_RSA_W_ENC:
+ case WC_SIGNATURE_TYPE_RSA:
+ {
+#ifndef NO_RSA
+#if defined(WOLFSSL_CRYPTOCELL)
+ /* the signature must propagate to the cryptocell to get verfied */
+ if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) {
+ ret = cc310_RsaSSL_Verify(hash_data, hash_len,(byte*)sig, key,
+ CRYS_RSA_HASH_SHA256_mode);
+ }
+ else {
+ ret = cc310_RsaSSL_Verify(hash_data, hash_len,(byte*)sig, key,
+ CRYS_RSA_After_SHA256_mode);
+ }
+
+ if (ret != 0) {
+ WOLFSSL_MSG("RSA Signature Verify difference!");
+ ret = SIG_VERIFY_E;
+ }
+
+#else /* WOLFSSL_CRYPTOCELL */
+
+ word32 plain_len = hash_len;
+ byte *plain_data;
+
+ /* Make sure the plain text output is at least key size */
+ if (plain_len < sig_len) {
+ plain_len = sig_len;
+ }
+ plain_data = (byte*)XMALLOC(plain_len, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (plain_data) {
+ /* Perform verification of signature using provided RSA key */
+ do {
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ ret = wc_AsyncWait(ret, &((RsaKey*)key)->asyncDev,
+ WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0)
+ ret = wc_RsaSSL_Verify(sig, sig_len, plain_data,
+ plain_len, (RsaKey*)key);
+ } while (ret == WC_PENDING_E);
+ if (ret >= 0) {
+ if ((word32)ret == hash_len &&
+ XMEMCMP(plain_data, hash_data, hash_len) == 0) {
+ ret = 0; /* Success */
+ }
+ else {
+ WOLFSSL_MSG("RSA Signature Verify difference!");
+ ret = SIG_VERIFY_E;
+ }
+ }
+ XFREE(plain_data, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+ else {
+ ret = MEMORY_E;
+ }
+#endif /* !WOLFSSL_CRYPTOCELL */
+#else
+ ret = SIG_TYPE_E;
+#endif
+ break;
+ }
+
+ case WC_SIGNATURE_TYPE_NONE:
+ default:
+ ret = BAD_FUNC_ARG;
+ break;
+ }
+
+ return ret;
+}
+
+int wc_SignatureVerify(
+ enum wc_HashType hash_type, enum wc_SignatureType sig_type,
+ const byte* data, word32 data_len,
+ const byte* sig, word32 sig_len,
+ const void* key, word32 key_len)
+{
+ int ret;
+ word32 hash_len, hash_enc_len;
+#if defined(WOLFSSL_SMALL_STACK) || defined(NO_ASN)
+ byte *hash_data;
+#else
+ byte hash_data[MAX_DER_DIGEST_SZ];
+#endif
+
+ /* Check arguments */
+ if (data == NULL || data_len == 0 ||
+ sig == NULL || sig_len == 0 ||
+ key == NULL || key_len == 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* Validate signature len (1 to max is okay) */
+ if ((int)sig_len > wc_SignatureGetSize(sig_type, key, key_len)) {
+ WOLFSSL_MSG("wc_SignatureVerify: Invalid sig type/len");
+ return BAD_FUNC_ARG;
+ }
+
+ /* Validate hash size */
+ ret = wc_HashGetDigestSize(hash_type);
+ if (ret < 0) {
+ WOLFSSL_MSG("wc_SignatureVerify: Invalid hash type/len");
+ return ret;
+ }
+ hash_enc_len = hash_len = ret;
+
+#ifndef NO_RSA
+ if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) {
+ /* For RSA with ASN.1 encoding include room */
+ hash_enc_len += MAX_DER_DIGEST_ASN_SZ;
+ }
+#endif
+
+#if defined(WOLFSSL_SMALL_STACK) || defined(NO_ASN)
+ /* Allocate temporary buffer for hash data */
+ hash_data = (byte*)XMALLOC(hash_enc_len, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (hash_data == NULL) {
+ return MEMORY_E;
+ }
+#endif
+
+ /* Perform hash of data */
+ ret = wc_Hash(hash_type, data, data_len, hash_data, hash_len);
+ if (ret == 0) {
+ /* Handle RSA with DER encoding */
+ if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) {
+ #if defined(NO_RSA) || defined(NO_ASN)
+ ret = SIG_TYPE_E;
+ #else
+ ret = wc_SignatureDerEncode(hash_type, hash_data, hash_len,
+ &hash_enc_len);
+ #endif
+ }
+
+ if (ret == 0) {
+#if defined(WOLFSSL_CRYPTOCELL)
+ if ((sig_type == WC_SIGNATURE_TYPE_RSA)
+ || (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC)) {
+ if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) {
+ ret = cc310_RsaSSL_Verify(hash_data, hash_len, sig, key,
+ cc310_hashModeRSA(hash_type, 0));
+ }
+ else {
+ ret = cc310_RsaSSL_Verify(hash_data, hash_len, sig, key,
+ cc310_hashModeRSA(hash_type, 1));
+ }
+ }
+#else
+ /* Verify signature using hash */
+ ret = wc_SignatureVerifyHash(hash_type, sig_type,
+ hash_data, hash_enc_len, sig, sig_len, key, key_len);
+#endif /* WOLFSSL_CRYPTOCELL */
+ }
+ }
+
+#if defined(WOLFSSL_SMALL_STACK) || defined(NO_ASN)
+ XFREE(hash_data, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return ret;
+}
+
+
+int wc_SignatureGenerateHash(
+ enum wc_HashType hash_type, enum wc_SignatureType sig_type,
+ const byte* hash_data, word32 hash_len,
+ byte* sig, word32 *sig_len,
+ const void* key, word32 key_len, WC_RNG* rng)
+{
+ return wc_SignatureGenerateHash_ex(hash_type, sig_type, hash_data, hash_len,
+ sig, sig_len, key, key_len, rng, 1);
+}
+
+int wc_SignatureGenerateHash_ex(
+ enum wc_HashType hash_type, enum wc_SignatureType sig_type,
+ const byte* hash_data, word32 hash_len,
+ byte* sig, word32 *sig_len,
+ const void* key, word32 key_len, WC_RNG* rng, int verify)
+{
+ int ret;
+
+ /* Suppress possible unused arg if all signature types are disabled */
+ (void)rng;
+
+ /* Check arguments */
+ if (hash_data == NULL || hash_len == 0 ||
+ sig == NULL || sig_len == NULL || *sig_len == 0 ||
+ key == NULL || key_len == 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* Validate signature len (needs to be at least max) */
+ if ((int)*sig_len < wc_SignatureGetSize(sig_type, key, key_len)) {
+ WOLFSSL_MSG("wc_SignatureGenerate: Invalid sig type/len");
+ return BAD_FUNC_ARG;
+ }
+
+ /* Validate hash size */
+ ret = wc_HashGetDigestSize(hash_type);
+ if (ret < 0) {
+ WOLFSSL_MSG("wc_SignatureGenerate: Invalid hash type/len");
+ return ret;
+ }
+ ret = 0;
+
+ /* Create signature using hash as data */
+ switch (sig_type) {
+ case WC_SIGNATURE_TYPE_ECC:
+#if defined(HAVE_ECC) && defined(HAVE_ECC_SIGN)
+ /* Create signature using provided ECC key */
+ do {
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ ret = wc_AsyncWait(ret, &((ecc_key*)key)->asyncDev,
+ WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0)
+ ret = wc_ecc_sign_hash(hash_data, hash_len, sig, sig_len,
+ rng, (ecc_key*)key);
+ } while (ret == WC_PENDING_E);
+#else
+ ret = SIG_TYPE_E;
+#endif
+ break;
+
+ case WC_SIGNATURE_TYPE_RSA_W_ENC:
+ case WC_SIGNATURE_TYPE_RSA:
+#if !defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+ #if defined(WOLFSSL_CRYPTOCELL)
+ if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) {
+ ret = cc310_RsaSSL_Sign(hash_data, hash_len, sig, *sig_len, key,
+ cc310_hashModeRSA(hash_type, 0));
+ }
+ else {
+ ret = cc310_RsaSSL_Sign(hash_data, hash_len, sig, *sig_len, key,
+ cc310_hashModeRSA(hash_type, 1));
+ }
+ #else
+ /* Create signature using provided RSA key */
+ do {
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ ret = wc_AsyncWait(ret, &((RsaKey*)key)->asyncDev,
+ WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0)
+ ret = wc_RsaSSL_Sign(hash_data, hash_len, sig, *sig_len,
+ (RsaKey*)key, rng);
+ } while (ret == WC_PENDING_E);
+ #endif /* WOLFSSL_CRYPTOCELL */
+ if (ret >= 0) {
+ *sig_len = ret;
+ ret = 0; /* Success */
+ }
+#else
+ ret = SIG_TYPE_E;
+#endif
+ break;
+
+ case WC_SIGNATURE_TYPE_NONE:
+ default:
+ ret = BAD_FUNC_ARG;
+ break;
+ }
+
+ if (ret == 0 && verify) {
+ ret = wc_SignatureVerifyHash(hash_type, sig_type, hash_data, hash_len,
+ sig, *sig_len, key, key_len);
+ }
+
+ return ret;
+}
+
+int wc_SignatureGenerate(
+ enum wc_HashType hash_type, enum wc_SignatureType sig_type,
+ const byte* data, word32 data_len,
+ byte* sig, word32 *sig_len,
+ const void* key, word32 key_len, WC_RNG* rng)
+{
+ return wc_SignatureGenerate_ex(hash_type, sig_type, data, data_len, sig,
+ sig_len, key, key_len, rng, 1);
+}
+
+int wc_SignatureGenerate_ex(
+ enum wc_HashType hash_type, enum wc_SignatureType sig_type,
+ const byte* data, word32 data_len,
+ byte* sig, word32 *sig_len,
+ const void* key, word32 key_len, WC_RNG* rng, int verify)
+{
+ int ret;
+ word32 hash_len, hash_enc_len;
+#if defined(WOLFSSL_SMALL_STACK) || defined(NO_ASN)
+ byte *hash_data;
+#else
+ byte hash_data[MAX_DER_DIGEST_SZ];
+#endif
+
+ /* Check arguments */
+ if (data == NULL || data_len == 0 ||
+ sig == NULL || sig_len == NULL || *sig_len == 0 ||
+ key == NULL || key_len == 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* Validate signature len (needs to be at least max) */
+ if ((int)*sig_len < wc_SignatureGetSize(sig_type, key, key_len)) {
+ WOLFSSL_MSG("wc_SignatureGenerate: Invalid sig type/len");
+ return BAD_FUNC_ARG;
+ }
+
+ /* Validate hash size */
+ ret = wc_HashGetDigestSize(hash_type);
+ if (ret < 0) {
+ WOLFSSL_MSG("wc_SignatureGenerate: Invalid hash type/len");
+ return ret;
+ }
+ hash_enc_len = hash_len = ret;
+
+#if !defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+ if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) {
+ /* For RSA with ASN.1 encoding include room */
+ hash_enc_len += MAX_DER_DIGEST_ASN_SZ;
+ }
+#endif
+
+#if defined(WOLFSSL_SMALL_STACK) || defined(NO_ASN)
+ /* Allocate temporary buffer for hash data */
+ hash_data = (byte*)XMALLOC(hash_enc_len, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (hash_data == NULL) {
+ return MEMORY_E;
+ }
+#endif
+
+ /* Perform hash of data */
+ ret = wc_Hash(hash_type, data, data_len, hash_data, hash_len);
+ if (ret == 0) {
+ /* Handle RSA with DER encoding */
+ if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) {
+ #if defined(NO_RSA) || defined(NO_ASN) || \
+ defined(WOLFSSL_RSA_PUBLIC_ONLY)
+ ret = SIG_TYPE_E;
+ #else
+ ret = wc_SignatureDerEncode(hash_type, hash_data, hash_len,
+ &hash_enc_len);
+ #endif
+ }
+ if (ret == 0) {
+#if defined(WOLFSSL_CRYPTOCELL)
+ if ((sig_type == WC_SIGNATURE_TYPE_RSA)
+ || (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC)) {
+ if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) {
+ ret = cc310_RsaSSL_Sign(hash_data, hash_len, sig, *sig_len,
+ key, cc310_hashModeRSA(hash_type, 0));
+ }
+ else {
+ ret = cc310_RsaSSL_Sign(hash_data, hash_len, sig, *sig_len,
+ key, cc310_hashModeRSA(hash_type, 1));
+ }
+
+ if (ret == *sig_len) {
+ ret = 0;
+ }
+ }
+ }
+ }
+#else
+ /* Generate signature using hash */
+ ret = wc_SignatureGenerateHash(hash_type, sig_type,
+ hash_data, hash_enc_len, sig, sig_len, key, key_len, rng);
+ }
+ }
+
+ if (ret == 0 && verify) {
+ ret = wc_SignatureVerifyHash(hash_type, sig_type, hash_data,
+ hash_enc_len, sig, *sig_len, key, key_len);
+ }
+#endif /* WOLFSSL_CRYPTOCELL */
+
+#if defined(WOLFSSL_SMALL_STACK) || defined(NO_ASN)
+ XFREE(hash_data, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return ret;
+}
+
+#endif /* NO_SIG_WRAPPER */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_arm32.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_arm32.c
new file mode 100644
index 000000000..4540dde65
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_arm32.c
@@ -0,0 +1,89057 @@
+/* sp.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Implementation by Sean Parkinson. */
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/cpuid.h>
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \
+ defined(WOLFSSL_HAVE_SP_ECC)
+
+#ifdef RSA_LOW_MEM
+#ifndef WOLFSSL_SP_SMALL
+#define WOLFSSL_SP_SMALL
+#endif
+#endif
+
+#include <wolfssl/wolfcrypt/sp.h>
+
+#ifdef WOLFSSL_SP_ARM32_ASM
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+#ifndef WOLFSSL_SP_NO_2048
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 24U) {
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 32
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 32U) <= (word32)DIGIT_BIT) {
+ s += 32U;
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 32) {
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 32 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 256
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_2048_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ j = 2048 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<64 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 32) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 32);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #32\n\t"
+ "mov r10, #0\n\t"
+ "# A[0] * B[0]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r3, r4, r8, r9\n\t"
+ "mov r5, #0\n\t"
+ "str r3, [sp]\n\t"
+ "# A[0] * B[1]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[0]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #4]\n\t"
+ "# A[0] * B[2]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[1]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[0]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #8]\n\t"
+ "# A[0] * B[3]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[2]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[1]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[0]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #12]\n\t"
+ "# A[0] * B[4]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[3]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[2]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[1]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[0]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #16]\n\t"
+ "# A[0] * B[5]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[4]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[3]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[2]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[1]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[0]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #20]\n\t"
+ "# A[0] * B[6]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[5]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[4]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[3]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[2]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[1]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[0]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #24]\n\t"
+ "# A[0] * B[7]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[6]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[5]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[4]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[3]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[2]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[1]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[0]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #28]\n\t"
+ "# A[1] * B[7]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[2] * B[6]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[5]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[4]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[3]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[2]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[1]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #32]\n\t"
+ "# A[2] * B[7]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[3] * B[6]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[5]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[4]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[3]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[2]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "# A[3] * B[7]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[4] * B[6]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[5]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[4]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[3]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "# A[4] * B[7]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[5] * B[6]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[5]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[4]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #44]\n\t"
+ "# A[5] * B[7]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[6] * B[6]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[5]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "# A[6] * B[7]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[7] * B[6]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "# A[7] * B[7]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r5, [%[r], #56]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "ldr r3, [sp, #0]\n\t"
+ "ldr r4, [sp, #4]\n\t"
+ "ldr r5, [sp, #8]\n\t"
+ "ldr r6, [sp, #12]\n\t"
+ "str r3, [%[r], #0]\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r3, [sp, #16]\n\t"
+ "ldr r4, [sp, #20]\n\t"
+ "ldr r5, [sp, #24]\n\t"
+ "ldr r6, [sp, #28]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "str r5, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ "add sp, sp, #32\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #32\n\t"
+ "mov r14, #0\n\t"
+ "# A[0] * A[0]\n\t"
+ "ldr r10, [%[a], #0]\n\t"
+ "umull r8, r3, r10, r10\n\t"
+ "mov r4, #0\n\t"
+ "str r8, [sp]\n\t"
+ "# A[0] * A[1]\n\t"
+ "ldr r10, [%[a], #4]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r14, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "str r3, [sp, #4]\n\t"
+ "# A[0] * A[2]\n\t"
+ "ldr r10, [%[a], #8]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r14, r14\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "# A[1] * A[1]\n\t"
+ "ldr r10, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "str r4, [sp, #8]\n\t"
+ "# A[0] * A[3]\n\t"
+ "ldr r10, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r14, r14\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "# A[1] * A[2]\n\t"
+ "ldr r10, [%[a], #8]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "str r2, [sp, #12]\n\t"
+ "# A[0] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r14, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "# A[1] * A[3]\n\t"
+ "ldr r10, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "# A[2] * A[2]\n\t"
+ "ldr r10, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "str r3, [sp, #16]\n\t"
+ "# A[0] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[3]\n\t"
+ "ldr r10, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #20]\n\t"
+ "# A[0] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[3]\n\t"
+ "ldr r10, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #24]\n\t"
+ "# A[0] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #28]\n\t"
+ "# A[1] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[2] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "# A[2] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[3] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #36]\n\t"
+ "# A[3] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r14, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "# A[4] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "# A[5] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "str r3, [%[r], #40]\n\t"
+ "# A[4] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r14, r14\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "# A[5] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "# A[5] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r14, r14\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "# A[6] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "str r2, [%[r], #48]\n\t"
+ "# A[6] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r14, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "str r3, [%[r], #52]\n\t"
+ "# A[7] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r4, r4, r8\n\t"
+ "adc r2, r2, r9\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "str r2, [%[r], #60]\n\t"
+ "ldr r2, [sp, #0]\n\t"
+ "ldr r3, [sp, #4]\n\t"
+ "ldr r4, [sp, #8]\n\t"
+ "ldr r8, [sp, #12]\n\t"
+ "str r2, [%[r], #0]\n\t"
+ "str r3, [%[r], #4]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r8, [%[r], #12]\n\t"
+ "ldr r2, [sp, #16]\n\t"
+ "ldr r3, [sp, #20]\n\t"
+ "ldr r4, [sp, #24]\n\t"
+ "ldr r8, [sp, #28]\n\t"
+ "str r2, [%[r], #16]\n\t"
+ "str r3, [%[r], #20]\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "str r8, [%[r], #28]\n\t"
+ "add sp, sp, #32\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14"
+ );
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r12, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[a], #4]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "ldr r10, [%[b], #8]\n\t"
+ "ldr r14, [%[b], #12]\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r5, [%[r], #4]\n\t"
+ "str r6, [%[r], #8]\n\t"
+ "str r7, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[a], #20]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "ldr r10, [%[b], #24]\n\t"
+ "ldr r14, [%[b], #28]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "str r6, [%[r], #24]\n\t"
+ "str r7, [%[r], #28]\n\t"
+ "adc %[c], r12, r12\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r2, [%[a], #0]\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[a], #12]\n\t"
+ "ldr r6, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "subs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #0]\n\t"
+ "str r3, [%[a], #4]\n\t"
+ "str r4, [%[a], #8]\n\t"
+ "str r5, [%[a], #12]\n\t"
+ "ldr r2, [%[a], #16]\n\t"
+ "ldr r3, [%[a], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[a], #28]\n\t"
+ "ldr r6, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #16]\n\t"
+ "str r3, [%[a], #20]\n\t"
+ "str r4, [%[a], #24]\n\t"
+ "str r5, [%[a], #28]\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[a], #44]\n\t"
+ "ldr r6, [%[b], #32]\n\t"
+ "ldr r7, [%[b], #36]\n\t"
+ "ldr r8, [%[b], #40]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #32]\n\t"
+ "str r3, [%[a], #36]\n\t"
+ "str r4, [%[a], #40]\n\t"
+ "str r5, [%[a], #44]\n\t"
+ "ldr r2, [%[a], #48]\n\t"
+ "ldr r3, [%[a], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[a], #60]\n\t"
+ "ldr r6, [%[b], #48]\n\t"
+ "ldr r7, [%[b], #52]\n\t"
+ "ldr r8, [%[b], #56]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #48]\n\t"
+ "str r3, [%[a], #52]\n\t"
+ "str r4, [%[a], #56]\n\t"
+ "str r5, [%[a], #60]\n\t"
+ "sbc %[c], r9, r9\n\t"
+ : [c] "+r" (c)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r12, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[a], #4]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "ldr r10, [%[b], #8]\n\t"
+ "ldr r14, [%[b], #12]\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r5, [%[r], #4]\n\t"
+ "str r6, [%[r], #8]\n\t"
+ "str r7, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[a], #20]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "ldr r10, [%[b], #24]\n\t"
+ "ldr r14, [%[b], #28]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "str r6, [%[r], #24]\n\t"
+ "str r7, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[a], #36]\n\t"
+ "ldr r6, [%[a], #40]\n\t"
+ "ldr r7, [%[a], #44]\n\t"
+ "ldr r8, [%[b], #32]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "ldr r10, [%[b], #40]\n\t"
+ "ldr r14, [%[b], #44]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r5, [%[r], #36]\n\t"
+ "str r6, [%[r], #40]\n\t"
+ "str r7, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[a], #52]\n\t"
+ "ldr r6, [%[a], #56]\n\t"
+ "ldr r7, [%[a], #60]\n\t"
+ "ldr r8, [%[b], #48]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "ldr r10, [%[b], #56]\n\t"
+ "ldr r14, [%[b], #60]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "str r5, [%[r], #52]\n\t"
+ "str r6, [%[r], #56]\n\t"
+ "str r7, [%[r], #60]\n\t"
+ "adc %[c], r12, r12\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<8; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ r[0] = a[0] & m;
+ r[1] = a[1] & m;
+ r[2] = a[2] & m;
+ r[3] = a[3] & m;
+ r[4] = a[4] & m;
+ r[5] = a[5] & m;
+ r[6] = a[6] & m;
+ r[7] = a[7] & m;
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[16];
+ sp_digit a1[8];
+ sp_digit b1[8];
+ sp_digit z2[16];
+ sp_digit u, ca, cb;
+
+ ca = sp_2048_add_8(a1, a, &a[8]);
+ cb = sp_2048_add_8(b1, b, &b[8]);
+ u = ca & cb;
+ sp_2048_mul_8(z1, a1, b1);
+ sp_2048_mul_8(z2, &a[8], &b[8]);
+ sp_2048_mul_8(z0, a, b);
+ sp_2048_mask_8(r + 16, a1, 0 - cb);
+ sp_2048_mask_8(b1, b1, 0 - ca);
+ u += sp_2048_add_8(r + 16, r + 16, b1);
+ u += sp_2048_sub_in_place_16(z1, z2);
+ u += sp_2048_sub_in_place_16(z1, z0);
+ u += sp_2048_add_16(r + 8, r + 8, z1);
+ r[24] = u;
+ XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
+ (void)sp_2048_add_16(r + 16, r + 16, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[16];
+ sp_digit z1[16];
+ sp_digit a1[8];
+ sp_digit u;
+
+ u = sp_2048_add_8(a1, a, &a[8]);
+ sp_2048_sqr_8(z1, a1);
+ sp_2048_sqr_8(z2, &a[8]);
+ sp_2048_sqr_8(z0, a);
+ sp_2048_mask_8(r + 16, a1, 0 - u);
+ u += sp_2048_add_8(r + 16, r + 16, r + 16);
+ u += sp_2048_sub_in_place_16(z1, z2);
+ u += sp_2048_sub_in_place_16(z1, z0);
+ u += sp_2048_add_16(r + 8, r + 8, z1);
+ r[24] = u;
+ XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
+ (void)sp_2048_add_16(r + 16, r + 16, z2);
+}
+
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r2, [%[a], #0]\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[a], #12]\n\t"
+ "ldr r6, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "subs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #0]\n\t"
+ "str r3, [%[a], #4]\n\t"
+ "str r4, [%[a], #8]\n\t"
+ "str r5, [%[a], #12]\n\t"
+ "ldr r2, [%[a], #16]\n\t"
+ "ldr r3, [%[a], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[a], #28]\n\t"
+ "ldr r6, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #16]\n\t"
+ "str r3, [%[a], #20]\n\t"
+ "str r4, [%[a], #24]\n\t"
+ "str r5, [%[a], #28]\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[a], #44]\n\t"
+ "ldr r6, [%[b], #32]\n\t"
+ "ldr r7, [%[b], #36]\n\t"
+ "ldr r8, [%[b], #40]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #32]\n\t"
+ "str r3, [%[a], #36]\n\t"
+ "str r4, [%[a], #40]\n\t"
+ "str r5, [%[a], #44]\n\t"
+ "ldr r2, [%[a], #48]\n\t"
+ "ldr r3, [%[a], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[a], #60]\n\t"
+ "ldr r6, [%[b], #48]\n\t"
+ "ldr r7, [%[b], #52]\n\t"
+ "ldr r8, [%[b], #56]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #48]\n\t"
+ "str r3, [%[a], #52]\n\t"
+ "str r4, [%[a], #56]\n\t"
+ "str r5, [%[a], #60]\n\t"
+ "ldr r2, [%[a], #64]\n\t"
+ "ldr r3, [%[a], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[a], #76]\n\t"
+ "ldr r6, [%[b], #64]\n\t"
+ "ldr r7, [%[b], #68]\n\t"
+ "ldr r8, [%[b], #72]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #64]\n\t"
+ "str r3, [%[a], #68]\n\t"
+ "str r4, [%[a], #72]\n\t"
+ "str r5, [%[a], #76]\n\t"
+ "ldr r2, [%[a], #80]\n\t"
+ "ldr r3, [%[a], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[a], #92]\n\t"
+ "ldr r6, [%[b], #80]\n\t"
+ "ldr r7, [%[b], #84]\n\t"
+ "ldr r8, [%[b], #88]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #80]\n\t"
+ "str r3, [%[a], #84]\n\t"
+ "str r4, [%[a], #88]\n\t"
+ "str r5, [%[a], #92]\n\t"
+ "ldr r2, [%[a], #96]\n\t"
+ "ldr r3, [%[a], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[a], #108]\n\t"
+ "ldr r6, [%[b], #96]\n\t"
+ "ldr r7, [%[b], #100]\n\t"
+ "ldr r8, [%[b], #104]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #96]\n\t"
+ "str r3, [%[a], #100]\n\t"
+ "str r4, [%[a], #104]\n\t"
+ "str r5, [%[a], #108]\n\t"
+ "ldr r2, [%[a], #112]\n\t"
+ "ldr r3, [%[a], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[a], #124]\n\t"
+ "ldr r6, [%[b], #112]\n\t"
+ "ldr r7, [%[b], #116]\n\t"
+ "ldr r8, [%[b], #120]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #112]\n\t"
+ "str r3, [%[a], #116]\n\t"
+ "str r4, [%[a], #120]\n\t"
+ "str r5, [%[a], #124]\n\t"
+ "sbc %[c], r9, r9\n\t"
+ : [c] "+r" (c)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r12, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[a], #4]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "ldr r10, [%[b], #8]\n\t"
+ "ldr r14, [%[b], #12]\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r5, [%[r], #4]\n\t"
+ "str r6, [%[r], #8]\n\t"
+ "str r7, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[a], #20]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "ldr r10, [%[b], #24]\n\t"
+ "ldr r14, [%[b], #28]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "str r6, [%[r], #24]\n\t"
+ "str r7, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[a], #36]\n\t"
+ "ldr r6, [%[a], #40]\n\t"
+ "ldr r7, [%[a], #44]\n\t"
+ "ldr r8, [%[b], #32]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "ldr r10, [%[b], #40]\n\t"
+ "ldr r14, [%[b], #44]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r5, [%[r], #36]\n\t"
+ "str r6, [%[r], #40]\n\t"
+ "str r7, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[a], #52]\n\t"
+ "ldr r6, [%[a], #56]\n\t"
+ "ldr r7, [%[a], #60]\n\t"
+ "ldr r8, [%[b], #48]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "ldr r10, [%[b], #56]\n\t"
+ "ldr r14, [%[b], #60]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "str r5, [%[r], #52]\n\t"
+ "str r6, [%[r], #56]\n\t"
+ "str r7, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[a], #68]\n\t"
+ "ldr r6, [%[a], #72]\n\t"
+ "ldr r7, [%[a], #76]\n\t"
+ "ldr r8, [%[b], #64]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "ldr r10, [%[b], #72]\n\t"
+ "ldr r14, [%[b], #76]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "str r5, [%[r], #68]\n\t"
+ "str r6, [%[r], #72]\n\t"
+ "str r7, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[a], #84]\n\t"
+ "ldr r6, [%[a], #88]\n\t"
+ "ldr r7, [%[a], #92]\n\t"
+ "ldr r8, [%[b], #80]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "ldr r10, [%[b], #88]\n\t"
+ "ldr r14, [%[b], #92]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "str r5, [%[r], #84]\n\t"
+ "str r6, [%[r], #88]\n\t"
+ "str r7, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[a], #100]\n\t"
+ "ldr r6, [%[a], #104]\n\t"
+ "ldr r7, [%[a], #108]\n\t"
+ "ldr r8, [%[b], #96]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "ldr r10, [%[b], #104]\n\t"
+ "ldr r14, [%[b], #108]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "str r5, [%[r], #100]\n\t"
+ "str r6, [%[r], #104]\n\t"
+ "str r7, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[a], #116]\n\t"
+ "ldr r6, [%[a], #120]\n\t"
+ "ldr r7, [%[a], #124]\n\t"
+ "ldr r8, [%[b], #112]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "ldr r10, [%[b], #120]\n\t"
+ "ldr r14, [%[b], #124]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "str r5, [%[r], #116]\n\t"
+ "str r6, [%[r], #120]\n\t"
+ "str r7, [%[r], #124]\n\t"
+ "adc %[c], r12, r12\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<16; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 16; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[32];
+ sp_digit a1[16];
+ sp_digit b1[16];
+ sp_digit z2[32];
+ sp_digit u, ca, cb;
+
+ ca = sp_2048_add_16(a1, a, &a[16]);
+ cb = sp_2048_add_16(b1, b, &b[16]);
+ u = ca & cb;
+ sp_2048_mul_16(z1, a1, b1);
+ sp_2048_mul_16(z2, &a[16], &b[16]);
+ sp_2048_mul_16(z0, a, b);
+ sp_2048_mask_16(r + 32, a1, 0 - cb);
+ sp_2048_mask_16(b1, b1, 0 - ca);
+ u += sp_2048_add_16(r + 32, r + 32, b1);
+ u += sp_2048_sub_in_place_32(z1, z2);
+ u += sp_2048_sub_in_place_32(z1, z0);
+ u += sp_2048_add_32(r + 16, r + 16, z1);
+ r[48] = u;
+ XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
+ (void)sp_2048_add_32(r + 32, r + 32, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[32];
+ sp_digit z1[32];
+ sp_digit a1[16];
+ sp_digit u;
+
+ u = sp_2048_add_16(a1, a, &a[16]);
+ sp_2048_sqr_16(z1, a1);
+ sp_2048_sqr_16(z2, &a[16]);
+ sp_2048_sqr_16(z0, a);
+ sp_2048_mask_16(r + 32, a1, 0 - u);
+ u += sp_2048_add_16(r + 32, r + 32, r + 32);
+ u += sp_2048_sub_in_place_32(z1, z2);
+ u += sp_2048_sub_in_place_32(z1, z0);
+ u += sp_2048_add_32(r + 16, r + 16, z1);
+ r[48] = u;
+ XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
+ (void)sp_2048_add_32(r + 32, r + 32, z2);
+}
+
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r2, [%[a], #0]\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[a], #12]\n\t"
+ "ldr r6, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "subs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #0]\n\t"
+ "str r3, [%[a], #4]\n\t"
+ "str r4, [%[a], #8]\n\t"
+ "str r5, [%[a], #12]\n\t"
+ "ldr r2, [%[a], #16]\n\t"
+ "ldr r3, [%[a], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[a], #28]\n\t"
+ "ldr r6, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #16]\n\t"
+ "str r3, [%[a], #20]\n\t"
+ "str r4, [%[a], #24]\n\t"
+ "str r5, [%[a], #28]\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[a], #44]\n\t"
+ "ldr r6, [%[b], #32]\n\t"
+ "ldr r7, [%[b], #36]\n\t"
+ "ldr r8, [%[b], #40]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #32]\n\t"
+ "str r3, [%[a], #36]\n\t"
+ "str r4, [%[a], #40]\n\t"
+ "str r5, [%[a], #44]\n\t"
+ "ldr r2, [%[a], #48]\n\t"
+ "ldr r3, [%[a], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[a], #60]\n\t"
+ "ldr r6, [%[b], #48]\n\t"
+ "ldr r7, [%[b], #52]\n\t"
+ "ldr r8, [%[b], #56]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #48]\n\t"
+ "str r3, [%[a], #52]\n\t"
+ "str r4, [%[a], #56]\n\t"
+ "str r5, [%[a], #60]\n\t"
+ "ldr r2, [%[a], #64]\n\t"
+ "ldr r3, [%[a], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[a], #76]\n\t"
+ "ldr r6, [%[b], #64]\n\t"
+ "ldr r7, [%[b], #68]\n\t"
+ "ldr r8, [%[b], #72]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #64]\n\t"
+ "str r3, [%[a], #68]\n\t"
+ "str r4, [%[a], #72]\n\t"
+ "str r5, [%[a], #76]\n\t"
+ "ldr r2, [%[a], #80]\n\t"
+ "ldr r3, [%[a], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[a], #92]\n\t"
+ "ldr r6, [%[b], #80]\n\t"
+ "ldr r7, [%[b], #84]\n\t"
+ "ldr r8, [%[b], #88]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #80]\n\t"
+ "str r3, [%[a], #84]\n\t"
+ "str r4, [%[a], #88]\n\t"
+ "str r5, [%[a], #92]\n\t"
+ "ldr r2, [%[a], #96]\n\t"
+ "ldr r3, [%[a], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[a], #108]\n\t"
+ "ldr r6, [%[b], #96]\n\t"
+ "ldr r7, [%[b], #100]\n\t"
+ "ldr r8, [%[b], #104]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #96]\n\t"
+ "str r3, [%[a], #100]\n\t"
+ "str r4, [%[a], #104]\n\t"
+ "str r5, [%[a], #108]\n\t"
+ "ldr r2, [%[a], #112]\n\t"
+ "ldr r3, [%[a], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[a], #124]\n\t"
+ "ldr r6, [%[b], #112]\n\t"
+ "ldr r7, [%[b], #116]\n\t"
+ "ldr r8, [%[b], #120]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #112]\n\t"
+ "str r3, [%[a], #116]\n\t"
+ "str r4, [%[a], #120]\n\t"
+ "str r5, [%[a], #124]\n\t"
+ "ldr r2, [%[a], #128]\n\t"
+ "ldr r3, [%[a], #132]\n\t"
+ "ldr r4, [%[a], #136]\n\t"
+ "ldr r5, [%[a], #140]\n\t"
+ "ldr r6, [%[b], #128]\n\t"
+ "ldr r7, [%[b], #132]\n\t"
+ "ldr r8, [%[b], #136]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #128]\n\t"
+ "str r3, [%[a], #132]\n\t"
+ "str r4, [%[a], #136]\n\t"
+ "str r5, [%[a], #140]\n\t"
+ "ldr r2, [%[a], #144]\n\t"
+ "ldr r3, [%[a], #148]\n\t"
+ "ldr r4, [%[a], #152]\n\t"
+ "ldr r5, [%[a], #156]\n\t"
+ "ldr r6, [%[b], #144]\n\t"
+ "ldr r7, [%[b], #148]\n\t"
+ "ldr r8, [%[b], #152]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #144]\n\t"
+ "str r3, [%[a], #148]\n\t"
+ "str r4, [%[a], #152]\n\t"
+ "str r5, [%[a], #156]\n\t"
+ "ldr r2, [%[a], #160]\n\t"
+ "ldr r3, [%[a], #164]\n\t"
+ "ldr r4, [%[a], #168]\n\t"
+ "ldr r5, [%[a], #172]\n\t"
+ "ldr r6, [%[b], #160]\n\t"
+ "ldr r7, [%[b], #164]\n\t"
+ "ldr r8, [%[b], #168]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #160]\n\t"
+ "str r3, [%[a], #164]\n\t"
+ "str r4, [%[a], #168]\n\t"
+ "str r5, [%[a], #172]\n\t"
+ "ldr r2, [%[a], #176]\n\t"
+ "ldr r3, [%[a], #180]\n\t"
+ "ldr r4, [%[a], #184]\n\t"
+ "ldr r5, [%[a], #188]\n\t"
+ "ldr r6, [%[b], #176]\n\t"
+ "ldr r7, [%[b], #180]\n\t"
+ "ldr r8, [%[b], #184]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #176]\n\t"
+ "str r3, [%[a], #180]\n\t"
+ "str r4, [%[a], #184]\n\t"
+ "str r5, [%[a], #188]\n\t"
+ "ldr r2, [%[a], #192]\n\t"
+ "ldr r3, [%[a], #196]\n\t"
+ "ldr r4, [%[a], #200]\n\t"
+ "ldr r5, [%[a], #204]\n\t"
+ "ldr r6, [%[b], #192]\n\t"
+ "ldr r7, [%[b], #196]\n\t"
+ "ldr r8, [%[b], #200]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #192]\n\t"
+ "str r3, [%[a], #196]\n\t"
+ "str r4, [%[a], #200]\n\t"
+ "str r5, [%[a], #204]\n\t"
+ "ldr r2, [%[a], #208]\n\t"
+ "ldr r3, [%[a], #212]\n\t"
+ "ldr r4, [%[a], #216]\n\t"
+ "ldr r5, [%[a], #220]\n\t"
+ "ldr r6, [%[b], #208]\n\t"
+ "ldr r7, [%[b], #212]\n\t"
+ "ldr r8, [%[b], #216]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #208]\n\t"
+ "str r3, [%[a], #212]\n\t"
+ "str r4, [%[a], #216]\n\t"
+ "str r5, [%[a], #220]\n\t"
+ "ldr r2, [%[a], #224]\n\t"
+ "ldr r3, [%[a], #228]\n\t"
+ "ldr r4, [%[a], #232]\n\t"
+ "ldr r5, [%[a], #236]\n\t"
+ "ldr r6, [%[b], #224]\n\t"
+ "ldr r7, [%[b], #228]\n\t"
+ "ldr r8, [%[b], #232]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #224]\n\t"
+ "str r3, [%[a], #228]\n\t"
+ "str r4, [%[a], #232]\n\t"
+ "str r5, [%[a], #236]\n\t"
+ "ldr r2, [%[a], #240]\n\t"
+ "ldr r3, [%[a], #244]\n\t"
+ "ldr r4, [%[a], #248]\n\t"
+ "ldr r5, [%[a], #252]\n\t"
+ "ldr r6, [%[b], #240]\n\t"
+ "ldr r7, [%[b], #244]\n\t"
+ "ldr r8, [%[b], #248]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #240]\n\t"
+ "str r3, [%[a], #244]\n\t"
+ "str r4, [%[a], #248]\n\t"
+ "str r5, [%[a], #252]\n\t"
+ "sbc %[c], r9, r9\n\t"
+ : [c] "+r" (c)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r12, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[a], #4]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "ldr r10, [%[b], #8]\n\t"
+ "ldr r14, [%[b], #12]\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r5, [%[r], #4]\n\t"
+ "str r6, [%[r], #8]\n\t"
+ "str r7, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[a], #20]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "ldr r10, [%[b], #24]\n\t"
+ "ldr r14, [%[b], #28]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "str r6, [%[r], #24]\n\t"
+ "str r7, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[a], #36]\n\t"
+ "ldr r6, [%[a], #40]\n\t"
+ "ldr r7, [%[a], #44]\n\t"
+ "ldr r8, [%[b], #32]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "ldr r10, [%[b], #40]\n\t"
+ "ldr r14, [%[b], #44]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r5, [%[r], #36]\n\t"
+ "str r6, [%[r], #40]\n\t"
+ "str r7, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[a], #52]\n\t"
+ "ldr r6, [%[a], #56]\n\t"
+ "ldr r7, [%[a], #60]\n\t"
+ "ldr r8, [%[b], #48]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "ldr r10, [%[b], #56]\n\t"
+ "ldr r14, [%[b], #60]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "str r5, [%[r], #52]\n\t"
+ "str r6, [%[r], #56]\n\t"
+ "str r7, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[a], #68]\n\t"
+ "ldr r6, [%[a], #72]\n\t"
+ "ldr r7, [%[a], #76]\n\t"
+ "ldr r8, [%[b], #64]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "ldr r10, [%[b], #72]\n\t"
+ "ldr r14, [%[b], #76]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "str r5, [%[r], #68]\n\t"
+ "str r6, [%[r], #72]\n\t"
+ "str r7, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[a], #84]\n\t"
+ "ldr r6, [%[a], #88]\n\t"
+ "ldr r7, [%[a], #92]\n\t"
+ "ldr r8, [%[b], #80]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "ldr r10, [%[b], #88]\n\t"
+ "ldr r14, [%[b], #92]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "str r5, [%[r], #84]\n\t"
+ "str r6, [%[r], #88]\n\t"
+ "str r7, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[a], #100]\n\t"
+ "ldr r6, [%[a], #104]\n\t"
+ "ldr r7, [%[a], #108]\n\t"
+ "ldr r8, [%[b], #96]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "ldr r10, [%[b], #104]\n\t"
+ "ldr r14, [%[b], #108]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "str r5, [%[r], #100]\n\t"
+ "str r6, [%[r], #104]\n\t"
+ "str r7, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[a], #116]\n\t"
+ "ldr r6, [%[a], #120]\n\t"
+ "ldr r7, [%[a], #124]\n\t"
+ "ldr r8, [%[b], #112]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "ldr r10, [%[b], #120]\n\t"
+ "ldr r14, [%[b], #124]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "str r5, [%[r], #116]\n\t"
+ "str r6, [%[r], #120]\n\t"
+ "str r7, [%[r], #124]\n\t"
+ "ldr r4, [%[a], #128]\n\t"
+ "ldr r5, [%[a], #132]\n\t"
+ "ldr r6, [%[a], #136]\n\t"
+ "ldr r7, [%[a], #140]\n\t"
+ "ldr r8, [%[b], #128]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "ldr r10, [%[b], #136]\n\t"
+ "ldr r14, [%[b], #140]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #128]\n\t"
+ "str r5, [%[r], #132]\n\t"
+ "str r6, [%[r], #136]\n\t"
+ "str r7, [%[r], #140]\n\t"
+ "ldr r4, [%[a], #144]\n\t"
+ "ldr r5, [%[a], #148]\n\t"
+ "ldr r6, [%[a], #152]\n\t"
+ "ldr r7, [%[a], #156]\n\t"
+ "ldr r8, [%[b], #144]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "ldr r10, [%[b], #152]\n\t"
+ "ldr r14, [%[b], #156]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #144]\n\t"
+ "str r5, [%[r], #148]\n\t"
+ "str r6, [%[r], #152]\n\t"
+ "str r7, [%[r], #156]\n\t"
+ "ldr r4, [%[a], #160]\n\t"
+ "ldr r5, [%[a], #164]\n\t"
+ "ldr r6, [%[a], #168]\n\t"
+ "ldr r7, [%[a], #172]\n\t"
+ "ldr r8, [%[b], #160]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "ldr r10, [%[b], #168]\n\t"
+ "ldr r14, [%[b], #172]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #160]\n\t"
+ "str r5, [%[r], #164]\n\t"
+ "str r6, [%[r], #168]\n\t"
+ "str r7, [%[r], #172]\n\t"
+ "ldr r4, [%[a], #176]\n\t"
+ "ldr r5, [%[a], #180]\n\t"
+ "ldr r6, [%[a], #184]\n\t"
+ "ldr r7, [%[a], #188]\n\t"
+ "ldr r8, [%[b], #176]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "ldr r10, [%[b], #184]\n\t"
+ "ldr r14, [%[b], #188]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #176]\n\t"
+ "str r5, [%[r], #180]\n\t"
+ "str r6, [%[r], #184]\n\t"
+ "str r7, [%[r], #188]\n\t"
+ "ldr r4, [%[a], #192]\n\t"
+ "ldr r5, [%[a], #196]\n\t"
+ "ldr r6, [%[a], #200]\n\t"
+ "ldr r7, [%[a], #204]\n\t"
+ "ldr r8, [%[b], #192]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "ldr r10, [%[b], #200]\n\t"
+ "ldr r14, [%[b], #204]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #192]\n\t"
+ "str r5, [%[r], #196]\n\t"
+ "str r6, [%[r], #200]\n\t"
+ "str r7, [%[r], #204]\n\t"
+ "ldr r4, [%[a], #208]\n\t"
+ "ldr r5, [%[a], #212]\n\t"
+ "ldr r6, [%[a], #216]\n\t"
+ "ldr r7, [%[a], #220]\n\t"
+ "ldr r8, [%[b], #208]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "ldr r10, [%[b], #216]\n\t"
+ "ldr r14, [%[b], #220]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #208]\n\t"
+ "str r5, [%[r], #212]\n\t"
+ "str r6, [%[r], #216]\n\t"
+ "str r7, [%[r], #220]\n\t"
+ "ldr r4, [%[a], #224]\n\t"
+ "ldr r5, [%[a], #228]\n\t"
+ "ldr r6, [%[a], #232]\n\t"
+ "ldr r7, [%[a], #236]\n\t"
+ "ldr r8, [%[b], #224]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "ldr r10, [%[b], #232]\n\t"
+ "ldr r14, [%[b], #236]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #224]\n\t"
+ "str r5, [%[r], #228]\n\t"
+ "str r6, [%[r], #232]\n\t"
+ "str r7, [%[r], #236]\n\t"
+ "ldr r4, [%[a], #240]\n\t"
+ "ldr r5, [%[a], #244]\n\t"
+ "ldr r6, [%[a], #248]\n\t"
+ "ldr r7, [%[a], #252]\n\t"
+ "ldr r8, [%[b], #240]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "ldr r10, [%[b], #248]\n\t"
+ "ldr r14, [%[b], #252]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #240]\n\t"
+ "str r5, [%[r], #244]\n\t"
+ "str r6, [%[r], #248]\n\t"
+ "str r7, [%[r], #252]\n\t"
+ "adc %[c], r12, r12\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<32; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 32; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[64];
+ sp_digit a1[32];
+ sp_digit b1[32];
+ sp_digit z2[64];
+ sp_digit u, ca, cb;
+
+ ca = sp_2048_add_32(a1, a, &a[32]);
+ cb = sp_2048_add_32(b1, b, &b[32]);
+ u = ca & cb;
+ sp_2048_mul_32(z1, a1, b1);
+ sp_2048_mul_32(z2, &a[32], &b[32]);
+ sp_2048_mul_32(z0, a, b);
+ sp_2048_mask_32(r + 64, a1, 0 - cb);
+ sp_2048_mask_32(b1, b1, 0 - ca);
+ u += sp_2048_add_32(r + 64, r + 64, b1);
+ u += sp_2048_sub_in_place_64(z1, z2);
+ u += sp_2048_sub_in_place_64(z1, z0);
+ u += sp_2048_add_64(r + 32, r + 32, z1);
+ r[96] = u;
+ XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
+ (void)sp_2048_add_64(r + 64, r + 64, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[64];
+ sp_digit z1[64];
+ sp_digit a1[32];
+ sp_digit u;
+
+ u = sp_2048_add_32(a1, a, &a[32]);
+ sp_2048_sqr_32(z1, a1);
+ sp_2048_sqr_32(z2, &a[32]);
+ sp_2048_sqr_32(z0, a);
+ sp_2048_mask_32(r + 64, a1, 0 - u);
+ u += sp_2048_add_32(r + 64, r + 64, r + 64);
+ u += sp_2048_sub_in_place_64(z1, z2);
+ u += sp_2048_sub_in_place_64(z1, z0);
+ u += sp_2048_add_64(r + 32, r + 32, z1);
+ r[96] = u;
+ XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
+ (void)sp_2048_add_64(r + 64, r + 64, z2);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add r12, %[a], #256\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldr r4, [%[a]], #4\n\t"
+ "ldr r5, [%[a]], #4\n\t"
+ "ldr r6, [%[a]], #4\n\t"
+ "ldr r7, [%[a]], #4\n\t"
+ "ldr r8, [%[b]], #4\n\t"
+ "ldr r9, [%[b]], #4\n\t"
+ "ldr r10, [%[b]], #4\n\t"
+ "ldr r14, [%[b]], #4\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r]], #4\n\t"
+ "str r5, [%[r]], #4\n\t"
+ "str r6, [%[r]], #4\n\t"
+ "str r7, [%[r]], #4\n\t"
+ "mov r4, #0\n\t"
+ "adc %[c], r4, #0\n\t"
+ "cmp %[a], r12\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r14, #0\n\t"
+ "add r12, %[a], #256\n\t"
+ "\n1:\n\t"
+ "subs %[c], r14, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[b]], #4\n\t"
+ "ldr r8, [%[b]], #4\n\t"
+ "ldr r9, [%[b]], #4\n\t"
+ "ldr r10, [%[b]], #4\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "sbcs r6, r6, r10\n\t"
+ "str r3, [%[a]], #4\n\t"
+ "str r4, [%[a]], #4\n\t"
+ "str r5, [%[a]], #4\n\t"
+ "str r6, [%[a]], #4\n\t"
+ "sbc %[c], r14, r14\n\t"
+ "cmp %[a], r12\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #512\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #0\n\t"
+ "mov r7, #0\n\t"
+ "mov r8, #0\n\t"
+ "\n1:\n\t"
+ "subs r3, r5, #252\n\t"
+ "it cc\n\t"
+ "movcc r3, #0\n\t"
+ "sub r4, r5, r3\n\t"
+ "\n2:\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "ldr r12, [%[b], r4]\n\t"
+ "umull r9, r10, r14, r12\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, #0\n\t"
+ "add r3, r3, #4\n\t"
+ "sub r4, r4, #4\n\t"
+ "cmp r3, #256\n\t"
+ "beq 3f\n\t"
+ "cmp r3, r5\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "str r6, [sp, r5]\n\t"
+ "mov r6, r7\n\t"
+ "mov r7, r8\n\t"
+ "mov r8, #0\n\t"
+ "add r5, r5, #4\n\t"
+ "cmp r5, #504\n\t"
+ "ble 1b\n\t"
+ "str r6, [sp, r5]\n\t"
+ "\n4:\n\t"
+ "ldr r6, [sp, #0]\n\t"
+ "ldr r7, [sp, #4]\n\t"
+ "ldr r8, [sp, #8]\n\t"
+ "ldr r3, [sp, #12]\n\t"
+ "str r6, [%[r], #0]\n\t"
+ "str r7, [%[r], #4]\n\t"
+ "str r8, [%[r], #8]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "add sp, sp, #16\n\t"
+ "add %[r], %[r], #16\n\t"
+ "subs r5, r5, #16\n\t"
+ "bgt 4b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #512\n\t"
+ "mov r12, #0\n\t"
+ "mov r6, #0\n\t"
+ "mov r7, #0\n\t"
+ "mov r8, #0\n\t"
+ "mov r5, #0\n\t"
+ "\n1:\n\t"
+ "subs r3, r5, #252\n\t"
+ "it cc\n\t"
+ "movcc r3, r12\n\t"
+ "sub r4, r5, r3\n\t"
+ "\n2:\n\t"
+ "cmp r4, r3\n\t"
+ "beq 4f\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "ldr r9, [%[a], r4]\n\t"
+ "umull r9, r10, r14, r9\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "umull r9, r10, r14, r14\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "\n5:\n\t"
+ "add r3, r3, #4\n\t"
+ "sub r4, r4, #4\n\t"
+ "cmp r3, #256\n\t"
+ "beq 3f\n\t"
+ "cmp r3, r4\n\t"
+ "bgt 3f\n\t"
+ "cmp r3, r5\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "str r6, [sp, r5]\n\t"
+ "mov r6, r7\n\t"
+ "mov r7, r8\n\t"
+ "mov r8, #0\n\t"
+ "add r5, r5, #4\n\t"
+ "cmp r5, #504\n\t"
+ "ble 1b\n\t"
+ "str r6, [sp, r5]\n\t"
+ "\n4:\n\t"
+ "ldr r6, [sp, #0]\n\t"
+ "ldr r7, [sp, #4]\n\t"
+ "ldr r8, [sp, #8]\n\t"
+ "ldr r3, [sp, #12]\n\t"
+ "str r6, [%[r], #0]\n\t"
+ "str r7, [%[r], #4]\n\t"
+ "str r8, [%[r], #8]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "add sp, sp, #16\n\t"
+ "add %[r], %[r], #16\n\t"
+ "subs r5, r5, #16\n\t"
+ "bgt 4b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#ifdef WOLFSSL_SP_SMALL
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+ int i;
+
+ for (i=0; i<32; i++) {
+ r[i] = a[i] & m;
+ }
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add r12, %[a], #128\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldr r4, [%[a]], #4\n\t"
+ "ldr r5, [%[a]], #4\n\t"
+ "ldr r6, [%[a]], #4\n\t"
+ "ldr r7, [%[a]], #4\n\t"
+ "ldr r8, [%[b]], #4\n\t"
+ "ldr r9, [%[b]], #4\n\t"
+ "ldr r10, [%[b]], #4\n\t"
+ "ldr r14, [%[b]], #4\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r]], #4\n\t"
+ "str r5, [%[r]], #4\n\t"
+ "str r6, [%[r]], #4\n\t"
+ "str r7, [%[r]], #4\n\t"
+ "mov r4, #0\n\t"
+ "adc %[c], r4, #0\n\t"
+ "cmp %[a], r12\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r14, #0\n\t"
+ "add r12, %[a], #128\n\t"
+ "\n1:\n\t"
+ "subs %[c], r14, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[b]], #4\n\t"
+ "ldr r8, [%[b]], #4\n\t"
+ "ldr r9, [%[b]], #4\n\t"
+ "ldr r10, [%[b]], #4\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "sbcs r6, r6, r10\n\t"
+ "str r3, [%[a]], #4\n\t"
+ "str r4, [%[a]], #4\n\t"
+ "str r5, [%[a]], #4\n\t"
+ "str r6, [%[a]], #4\n\t"
+ "sbc %[c], r14, r14\n\t"
+ "cmp %[a], r12\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #256\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #0\n\t"
+ "mov r7, #0\n\t"
+ "mov r8, #0\n\t"
+ "\n1:\n\t"
+ "subs r3, r5, #124\n\t"
+ "it cc\n\t"
+ "movcc r3, #0\n\t"
+ "sub r4, r5, r3\n\t"
+ "\n2:\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "ldr r12, [%[b], r4]\n\t"
+ "umull r9, r10, r14, r12\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, #0\n\t"
+ "add r3, r3, #4\n\t"
+ "sub r4, r4, #4\n\t"
+ "cmp r3, #128\n\t"
+ "beq 3f\n\t"
+ "cmp r3, r5\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "str r6, [sp, r5]\n\t"
+ "mov r6, r7\n\t"
+ "mov r7, r8\n\t"
+ "mov r8, #0\n\t"
+ "add r5, r5, #4\n\t"
+ "cmp r5, #248\n\t"
+ "ble 1b\n\t"
+ "str r6, [sp, r5]\n\t"
+ "\n4:\n\t"
+ "ldr r6, [sp, #0]\n\t"
+ "ldr r7, [sp, #4]\n\t"
+ "ldr r8, [sp, #8]\n\t"
+ "ldr r3, [sp, #12]\n\t"
+ "str r6, [%[r], #0]\n\t"
+ "str r7, [%[r], #4]\n\t"
+ "str r8, [%[r], #8]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "add sp, sp, #16\n\t"
+ "add %[r], %[r], #16\n\t"
+ "subs r5, r5, #16\n\t"
+ "bgt 4b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #256\n\t"
+ "mov r12, #0\n\t"
+ "mov r6, #0\n\t"
+ "mov r7, #0\n\t"
+ "mov r8, #0\n\t"
+ "mov r5, #0\n\t"
+ "\n1:\n\t"
+ "subs r3, r5, #124\n\t"
+ "it cc\n\t"
+ "movcc r3, r12\n\t"
+ "sub r4, r5, r3\n\t"
+ "\n2:\n\t"
+ "cmp r4, r3\n\t"
+ "beq 4f\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "ldr r9, [%[a], r4]\n\t"
+ "umull r9, r10, r14, r9\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "umull r9, r10, r14, r14\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "\n5:\n\t"
+ "add r3, r3, #4\n\t"
+ "sub r4, r4, #4\n\t"
+ "cmp r3, #128\n\t"
+ "beq 3f\n\t"
+ "cmp r3, r4\n\t"
+ "bgt 3f\n\t"
+ "cmp r3, r5\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "str r6, [sp, r5]\n\t"
+ "mov r6, r7\n\t"
+ "mov r7, r8\n\t"
+ "mov r8, #0\n\t"
+ "add r5, r5, #4\n\t"
+ "cmp r5, #248\n\t"
+ "ble 1b\n\t"
+ "str r6, [sp, r5]\n\t"
+ "\n4:\n\t"
+ "ldr r6, [sp, #0]\n\t"
+ "ldr r7, [sp, #4]\n\t"
+ "ldr r8, [sp, #8]\n\t"
+ "ldr r3, [sp, #12]\n\t"
+ "str r6, [%[r], #0]\n\t"
+ "str r7, [%[r], #4]\n\t"
+ "str r8, [%[r], #8]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "add sp, sp, #16\n\t"
+ "add %[r], %[r], #16\n\t"
+ "subs r5, r5, #16\n\t"
+ "bgt 4b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+
+ /* rho = -1/m mod b */
+ *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r10, #0\n\t"
+ "# A[0] * B\n\t"
+ "ldr r8, [%[a]]\n\t"
+ "umull r5, r3, %[b], r8\n\t"
+ "mov r4, #0\n\t"
+ "str r5, [%[r]]\n\t"
+ "mov r5, #0\n\t"
+ "mov r9, #4\n\t"
+ "1:\n\t"
+ "ldr r8, [%[a], r9]\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], r9]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r9, r9, #4\n\t"
+ "cmp r9, #256\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r], #256]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov r10, #0\n\t"
+ "# A[0] * B\n\t"
+ "ldr r8, [%[a]]\n\t"
+ "umull r3, r4, %[b], r8\n\t"
+ "mov r5, #0\n\t"
+ "str r3, [%[r]]\n\t"
+ "# A[1] * B\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "# A[2] * B\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "# A[3] * B\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "# A[4] * B\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "# A[5] * B\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "# A[6] * B\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "# A[7] * B\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "# A[8] * B\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #32]\n\t"
+ "# A[9] * B\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "# A[10] * B\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "# A[11] * B\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #44]\n\t"
+ "# A[12] * B\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "# A[13] * B\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "# A[14] * B\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #56]\n\t"
+ "# A[15] * B\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "# A[16] * B\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "# A[17] * B\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #68]\n\t"
+ "# A[18] * B\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #72]\n\t"
+ "# A[19] * B\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "# A[20] * B\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #80]\n\t"
+ "# A[21] * B\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #84]\n\t"
+ "# A[22] * B\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "# A[23] * B\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #92]\n\t"
+ "# A[24] * B\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #96]\n\t"
+ "# A[25] * B\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #100]\n\t"
+ "# A[26] * B\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #104]\n\t"
+ "# A[27] * B\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #108]\n\t"
+ "# A[28] * B\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "# A[29] * B\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #116]\n\t"
+ "# A[30] * B\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #120]\n\t"
+ "# A[31] * B\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #124]\n\t"
+ "# A[32] * B\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #128]\n\t"
+ "# A[33] * B\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #132]\n\t"
+ "# A[34] * B\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #136]\n\t"
+ "# A[35] * B\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #140]\n\t"
+ "# A[36] * B\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #144]\n\t"
+ "# A[37] * B\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #148]\n\t"
+ "# A[38] * B\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #152]\n\t"
+ "# A[39] * B\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #156]\n\t"
+ "# A[40] * B\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #160]\n\t"
+ "# A[41] * B\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #164]\n\t"
+ "# A[42] * B\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #168]\n\t"
+ "# A[43] * B\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #172]\n\t"
+ "# A[44] * B\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #176]\n\t"
+ "# A[45] * B\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #180]\n\t"
+ "# A[46] * B\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #184]\n\t"
+ "# A[47] * B\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #188]\n\t"
+ "# A[48] * B\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #192]\n\t"
+ "# A[49] * B\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #196]\n\t"
+ "# A[50] * B\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #200]\n\t"
+ "# A[51] * B\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #204]\n\t"
+ "# A[52] * B\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #208]\n\t"
+ "# A[53] * B\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #212]\n\t"
+ "# A[54] * B\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #216]\n\t"
+ "# A[55] * B\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #220]\n\t"
+ "# A[56] * B\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #224]\n\t"
+ "# A[57] * B\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #228]\n\t"
+ "# A[58] * B\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #232]\n\t"
+ "# A[59] * B\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #236]\n\t"
+ "# A[60] * B\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #240]\n\t"
+ "# A[61] * B\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #244]\n\t"
+ "# A[62] * B\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #248]\n\t"
+ "# A[63] * B\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r3, [%[r], #252]\n\t"
+ "str r4, [%[r], #256]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+#endif
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 2048 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 32);
+
+ /* r = 2^n mod m */
+ sp_2048_sub_in_place_32(r, m);
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ sp_digit c = 0;
+
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r9, #0\n\t"
+ "mov r8, #0\n\t"
+ "1:\n\t"
+ "subs %[c], r9, %[c]\n\t"
+ "ldr r4, [%[a], r8]\n\t"
+ "ldr r5, [%[b], r8]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbc %[c], r9, r9\n\t"
+ "str r4, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, #128\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#else
+ __asm__ __volatile__ (
+
+ "mov r9, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "subs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r6, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r6, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r6, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r7, [%[b], #36]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r6, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r6, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r7, [%[b], #44]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "str r6, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r6, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r7, [%[b], #52]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "str r6, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r6, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r7, [%[b], #60]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "str r6, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r6, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "ldr r7, [%[b], #68]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "str r6, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r6, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "ldr r7, [%[b], #76]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "str r6, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r6, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "ldr r7, [%[b], #84]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "str r6, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r6, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "ldr r7, [%[b], #92]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "str r6, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r6, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "ldr r7, [%[b], #100]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "str r6, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r6, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "ldr r7, [%[b], #108]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "str r6, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r6, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "ldr r7, [%[b], #116]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "str r6, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r6, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "ldr r7, [%[b], #124]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "str r6, [%[r], #124]\n\t"
+ "sbc %[c], r9, r9\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#endif /* WOLFSSL_SP_SMALL */
+
+ return c;
+}
+
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "# i = 0\n\t"
+ "mov r12, #0\n\t"
+ "ldr r10, [%[a], #0]\n\t"
+ "ldr r14, [%[a], #4]\n\t"
+ "\n1:\n\t"
+ "# mu = a[i] * mp\n\t"
+ "mul r8, %[mp], r10\n\t"
+ "# a[i+0] += m[0] * mu\n\t"
+ "ldr r7, [%[m], #0]\n\t"
+ "ldr r9, [%[a], #0]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r10, r10, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "# a[i+1] += m[1] * mu\n\t"
+ "ldr r7, [%[m], #4]\n\t"
+ "ldr r9, [%[a], #4]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r10, r14, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r10, r10, r5\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+2] += m[2] * mu\n\t"
+ "ldr r7, [%[m], #8]\n\t"
+ "ldr r14, [%[a], #8]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r14, r14, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r14, r14, r4\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+3] += m[3] * mu\n\t"
+ "ldr r7, [%[m], #12]\n\t"
+ "ldr r9, [%[a], #12]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #12]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+4] += m[4] * mu\n\t"
+ "ldr r7, [%[m], #16]\n\t"
+ "ldr r9, [%[a], #16]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #16]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+5] += m[5] * mu\n\t"
+ "ldr r7, [%[m], #20]\n\t"
+ "ldr r9, [%[a], #20]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #20]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+6] += m[6] * mu\n\t"
+ "ldr r7, [%[m], #24]\n\t"
+ "ldr r9, [%[a], #24]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #24]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+7] += m[7] * mu\n\t"
+ "ldr r7, [%[m], #28]\n\t"
+ "ldr r9, [%[a], #28]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #28]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+8] += m[8] * mu\n\t"
+ "ldr r7, [%[m], #32]\n\t"
+ "ldr r9, [%[a], #32]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #32]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+9] += m[9] * mu\n\t"
+ "ldr r7, [%[m], #36]\n\t"
+ "ldr r9, [%[a], #36]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #36]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+10] += m[10] * mu\n\t"
+ "ldr r7, [%[m], #40]\n\t"
+ "ldr r9, [%[a], #40]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #40]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+11] += m[11] * mu\n\t"
+ "ldr r7, [%[m], #44]\n\t"
+ "ldr r9, [%[a], #44]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #44]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+12] += m[12] * mu\n\t"
+ "ldr r7, [%[m], #48]\n\t"
+ "ldr r9, [%[a], #48]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #48]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+13] += m[13] * mu\n\t"
+ "ldr r7, [%[m], #52]\n\t"
+ "ldr r9, [%[a], #52]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #52]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+14] += m[14] * mu\n\t"
+ "ldr r7, [%[m], #56]\n\t"
+ "ldr r9, [%[a], #56]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #56]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+15] += m[15] * mu\n\t"
+ "ldr r7, [%[m], #60]\n\t"
+ "ldr r9, [%[a], #60]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #60]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+16] += m[16] * mu\n\t"
+ "ldr r7, [%[m], #64]\n\t"
+ "ldr r9, [%[a], #64]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #64]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+17] += m[17] * mu\n\t"
+ "ldr r7, [%[m], #68]\n\t"
+ "ldr r9, [%[a], #68]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #68]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+18] += m[18] * mu\n\t"
+ "ldr r7, [%[m], #72]\n\t"
+ "ldr r9, [%[a], #72]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #72]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+19] += m[19] * mu\n\t"
+ "ldr r7, [%[m], #76]\n\t"
+ "ldr r9, [%[a], #76]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #76]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+20] += m[20] * mu\n\t"
+ "ldr r7, [%[m], #80]\n\t"
+ "ldr r9, [%[a], #80]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #80]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+21] += m[21] * mu\n\t"
+ "ldr r7, [%[m], #84]\n\t"
+ "ldr r9, [%[a], #84]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #84]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+22] += m[22] * mu\n\t"
+ "ldr r7, [%[m], #88]\n\t"
+ "ldr r9, [%[a], #88]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #88]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+23] += m[23] * mu\n\t"
+ "ldr r7, [%[m], #92]\n\t"
+ "ldr r9, [%[a], #92]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #92]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+24] += m[24] * mu\n\t"
+ "ldr r7, [%[m], #96]\n\t"
+ "ldr r9, [%[a], #96]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #96]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+25] += m[25] * mu\n\t"
+ "ldr r7, [%[m], #100]\n\t"
+ "ldr r9, [%[a], #100]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #100]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+26] += m[26] * mu\n\t"
+ "ldr r7, [%[m], #104]\n\t"
+ "ldr r9, [%[a], #104]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #104]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+27] += m[27] * mu\n\t"
+ "ldr r7, [%[m], #108]\n\t"
+ "ldr r9, [%[a], #108]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #108]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+28] += m[28] * mu\n\t"
+ "ldr r7, [%[m], #112]\n\t"
+ "ldr r9, [%[a], #112]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #112]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+29] += m[29] * mu\n\t"
+ "ldr r7, [%[m], #116]\n\t"
+ "ldr r9, [%[a], #116]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #116]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+30] += m[30] * mu\n\t"
+ "ldr r7, [%[m], #120]\n\t"
+ "ldr r9, [%[a], #120]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #120]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+31] += m[31] * mu\n\t"
+ "ldr r7, [%[m], #124]\n\t"
+ "ldr r9, [%[a], #124]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r7, r7, %[ca]\n\t"
+ "mov %[ca], #0\n\t"
+ "adc %[ca], %[ca], %[ca]\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #124]\n\t"
+ "ldr r9, [%[a], #128]\n\t"
+ "adcs r9, r9, r7\n\t"
+ "str r9, [%[a], #128]\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ "# i += 1\n\t"
+ "add %[a], %[a], #4\n\t"
+ "add r12, r12, #4\n\t"
+ "cmp r12, #128\n\t"
+ "blt 1b\n\t"
+ "str r10, [%[a], #0]\n\t"
+ "str r14, [%[a], #4]\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_2048_mul_32(r, a, b);
+ sp_2048_mont_reduce_32(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_2048_sqr_32(r, a);
+ sp_2048_mont_reduce_32(r, m, mp);
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r10, #0\n\t"
+ "# A[0] * B\n\t"
+ "ldr r8, [%[a]]\n\t"
+ "umull r5, r3, %[b], r8\n\t"
+ "mov r4, #0\n\t"
+ "str r5, [%[r]]\n\t"
+ "mov r5, #0\n\t"
+ "mov r9, #4\n\t"
+ "1:\n\t"
+ "ldr r8, [%[a], r9]\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], r9]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r9, r9, #4\n\t"
+ "cmp r9, #128\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r], #128]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov r10, #0\n\t"
+ "# A[0] * B\n\t"
+ "ldr r8, [%[a]]\n\t"
+ "umull r3, r4, %[b], r8\n\t"
+ "mov r5, #0\n\t"
+ "str r3, [%[r]]\n\t"
+ "# A[1] * B\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "# A[2] * B\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "# A[3] * B\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "# A[4] * B\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "# A[5] * B\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "# A[6] * B\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "# A[7] * B\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "# A[8] * B\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #32]\n\t"
+ "# A[9] * B\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "# A[10] * B\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "# A[11] * B\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #44]\n\t"
+ "# A[12] * B\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "# A[13] * B\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "# A[14] * B\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #56]\n\t"
+ "# A[15] * B\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "# A[16] * B\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "# A[17] * B\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #68]\n\t"
+ "# A[18] * B\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #72]\n\t"
+ "# A[19] * B\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "# A[20] * B\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #80]\n\t"
+ "# A[21] * B\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #84]\n\t"
+ "# A[22] * B\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "# A[23] * B\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #92]\n\t"
+ "# A[24] * B\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #96]\n\t"
+ "# A[25] * B\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #100]\n\t"
+ "# A[26] * B\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #104]\n\t"
+ "# A[27] * B\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #108]\n\t"
+ "# A[28] * B\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "# A[29] * B\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #116]\n\t"
+ "# A[30] * B\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #120]\n\t"
+ "# A[31] * B\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adc r5, r5, r7\n\t"
+ "str r4, [%[r], #124]\n\t"
+ "str r5, [%[r], #128]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+#endif
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r5, %[div], #1\n\t"
+ "add r5, r5, #1\n\t"
+ "mov r6, %[d0]\n\t"
+ "mov r7, %[d1]\n\t"
+ "# Do top 32\n\t"
+ "subs r8, r5, r7\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "and r8, r8, r5\n\t"
+ "subs r7, r7, r8\n\t"
+ "# Next 30 bits\n\t"
+ "mov r4, #29\n\t"
+ "1:\n\t"
+ "movs r6, r6, lsl #1\n\t"
+ "adc r7, r7, r7\n\t"
+ "subs r8, r5, r7\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "and r8, r8, r5\n\t"
+ "subs r7, r7, r8\n\t"
+ "subs r4, r4, #1\n\t"
+ "bpl 1b\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "add %[r], %[r], #1\n\t"
+ "umull r4, r5, %[r], %[div]\n\t"
+ "subs r4, %[d0], r4\n\t"
+ "sbc r5, %[d1], r5\n\t"
+ "add %[r], %[r], r5\n\t"
+ "umull r4, r5, %[r], %[div]\n\t"
+ "subs r4, %[d0], r4\n\t"
+ "sbc r5, %[d1], r5\n\t"
+ "add %[r], %[r], r5\n\t"
+ "subs r8, %[div], r4\n\t"
+ "sbc r8, r8, r8\n\t"
+ "sub %[r], %[r], r8\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r6", "r7", "r8"
+ );
+ return r;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static int32_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = -1;
+ sp_digit one = 1;
+
+
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r7, #0\n\t"
+ "mov r3, #-1\n\t"
+ "mov r6, #124\n\t"
+ "1:\n\t"
+ "ldr r4, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "subs r6, r6, #4\n\t"
+ "bcs 1b\n\t"
+ "eor %[r], %[r], r3\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [one] "r" (one)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov r7, #0\n\t"
+ "mov r3, #-1\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #124]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #116]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #108]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #100]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #92]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #84]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #76]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #68]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "eor %[r], %[r], r3\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [one] "r" (one)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+#endif
+
+ return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[64], t2[33];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+
+ div = d[31];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
+ for (i=31; i>=0; i--) {
+ r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div);
+
+ sp_2048_mul_d_32(t2, d, r1);
+ t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
+ t1[32 + i] -= t2[32];
+ sp_2048_mask_32(t2, d, t1[32 + i]);
+ t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
+ sp_2048_mask_32(t2, d, t1[32 + i]);
+ t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_2048_cmp_32(t1, d) >= 0;
+ sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_2048_div_32(a, m, NULL, r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[16][64];
+#else
+ sp_digit* t[16];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 64, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<16; i++) {
+ t[i] = td + i * 64;
+ }
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_32(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
+ if (reduceA != 0) {
+ err = sp_2048_mod_32(t[1] + 32, a, m);
+ if (err == MP_OKAY) {
+ err = sp_2048_mod_32(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
+ err = sp_2048_mod_32(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 4;
+ if (c == 32) {
+ c = 28;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
+ for (; i>=0 || c>=4; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 28;
+ n <<= 4;
+ c = 28;
+ }
+ else if (c < 4) {
+ y = n >> 28;
+ n = e[i--];
+ c = 4 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+ }
+
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+
+ sp_2048_mont_mul_32(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
+ sp_2048_mont_reduce_32(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
+ sp_2048_cond_sub_32(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][64];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 64, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++) {
+ t[i] = td + i * 64;
+ }
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_32(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
+ if (reduceA != 0) {
+ err = sp_2048_mod_32(t[1] + 32, a, m);
+ if (err == MP_OKAY) {
+ err = sp_2048_mod_32(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
+ err = sp_2048_mod_32(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
+ sp_2048_mont_sqr_32(t[16], t[ 8], m, mp);
+ sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp);
+ sp_2048_mont_sqr_32(t[18], t[ 9], m, mp);
+ sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp);
+ sp_2048_mont_sqr_32(t[20], t[10], m, mp);
+ sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp);
+ sp_2048_mont_sqr_32(t[22], t[11], m, mp);
+ sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp);
+ sp_2048_mont_sqr_32(t[24], t[12], m, mp);
+ sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp);
+ sp_2048_mont_sqr_32(t[26], t[13], m, mp);
+ sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp);
+ sp_2048_mont_sqr_32(t[28], t[14], m, mp);
+ sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp);
+ sp_2048_mont_sqr_32(t[30], t[15], m, mp);
+ sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+
+ sp_2048_mont_mul_32(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
+ sp_2048_mont_reduce_32(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
+ sp_2048_cond_sub_32(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 2048 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 64);
+
+ /* r = 2^n mod m */
+ sp_2048_sub_in_place_64(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ sp_digit c = 0;
+
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r9, #0\n\t"
+ "mov r8, #0\n\t"
+ "1:\n\t"
+ "subs %[c], r9, %[c]\n\t"
+ "ldr r4, [%[a], r8]\n\t"
+ "ldr r5, [%[b], r8]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbc %[c], r9, r9\n\t"
+ "str r4, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, #256\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#else
+ __asm__ __volatile__ (
+
+ "mov r9, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "subs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r6, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r6, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r6, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r7, [%[b], #36]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r6, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r6, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r7, [%[b], #44]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "str r6, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r6, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r7, [%[b], #52]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "str r6, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r6, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r7, [%[b], #60]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "str r6, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r6, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "ldr r7, [%[b], #68]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "str r6, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r6, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "ldr r7, [%[b], #76]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "str r6, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r6, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "ldr r7, [%[b], #84]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "str r6, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r6, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "ldr r7, [%[b], #92]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "str r6, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r6, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "ldr r7, [%[b], #100]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "str r6, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r6, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "ldr r7, [%[b], #108]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "str r6, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r6, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "ldr r7, [%[b], #116]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "str r6, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r6, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "ldr r7, [%[b], #124]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "str r6, [%[r], #124]\n\t"
+ "ldr r4, [%[a], #128]\n\t"
+ "ldr r6, [%[a], #132]\n\t"
+ "ldr r5, [%[b], #128]\n\t"
+ "ldr r7, [%[b], #132]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #128]\n\t"
+ "str r6, [%[r], #132]\n\t"
+ "ldr r4, [%[a], #136]\n\t"
+ "ldr r6, [%[a], #140]\n\t"
+ "ldr r5, [%[b], #136]\n\t"
+ "ldr r7, [%[b], #140]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #136]\n\t"
+ "str r6, [%[r], #140]\n\t"
+ "ldr r4, [%[a], #144]\n\t"
+ "ldr r6, [%[a], #148]\n\t"
+ "ldr r5, [%[b], #144]\n\t"
+ "ldr r7, [%[b], #148]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #144]\n\t"
+ "str r6, [%[r], #148]\n\t"
+ "ldr r4, [%[a], #152]\n\t"
+ "ldr r6, [%[a], #156]\n\t"
+ "ldr r5, [%[b], #152]\n\t"
+ "ldr r7, [%[b], #156]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #152]\n\t"
+ "str r6, [%[r], #156]\n\t"
+ "ldr r4, [%[a], #160]\n\t"
+ "ldr r6, [%[a], #164]\n\t"
+ "ldr r5, [%[b], #160]\n\t"
+ "ldr r7, [%[b], #164]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #160]\n\t"
+ "str r6, [%[r], #164]\n\t"
+ "ldr r4, [%[a], #168]\n\t"
+ "ldr r6, [%[a], #172]\n\t"
+ "ldr r5, [%[b], #168]\n\t"
+ "ldr r7, [%[b], #172]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #168]\n\t"
+ "str r6, [%[r], #172]\n\t"
+ "ldr r4, [%[a], #176]\n\t"
+ "ldr r6, [%[a], #180]\n\t"
+ "ldr r5, [%[b], #176]\n\t"
+ "ldr r7, [%[b], #180]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #176]\n\t"
+ "str r6, [%[r], #180]\n\t"
+ "ldr r4, [%[a], #184]\n\t"
+ "ldr r6, [%[a], #188]\n\t"
+ "ldr r5, [%[b], #184]\n\t"
+ "ldr r7, [%[b], #188]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #184]\n\t"
+ "str r6, [%[r], #188]\n\t"
+ "ldr r4, [%[a], #192]\n\t"
+ "ldr r6, [%[a], #196]\n\t"
+ "ldr r5, [%[b], #192]\n\t"
+ "ldr r7, [%[b], #196]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #192]\n\t"
+ "str r6, [%[r], #196]\n\t"
+ "ldr r4, [%[a], #200]\n\t"
+ "ldr r6, [%[a], #204]\n\t"
+ "ldr r5, [%[b], #200]\n\t"
+ "ldr r7, [%[b], #204]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #200]\n\t"
+ "str r6, [%[r], #204]\n\t"
+ "ldr r4, [%[a], #208]\n\t"
+ "ldr r6, [%[a], #212]\n\t"
+ "ldr r5, [%[b], #208]\n\t"
+ "ldr r7, [%[b], #212]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #208]\n\t"
+ "str r6, [%[r], #212]\n\t"
+ "ldr r4, [%[a], #216]\n\t"
+ "ldr r6, [%[a], #220]\n\t"
+ "ldr r5, [%[b], #216]\n\t"
+ "ldr r7, [%[b], #220]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #216]\n\t"
+ "str r6, [%[r], #220]\n\t"
+ "ldr r4, [%[a], #224]\n\t"
+ "ldr r6, [%[a], #228]\n\t"
+ "ldr r5, [%[b], #224]\n\t"
+ "ldr r7, [%[b], #228]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #224]\n\t"
+ "str r6, [%[r], #228]\n\t"
+ "ldr r4, [%[a], #232]\n\t"
+ "ldr r6, [%[a], #236]\n\t"
+ "ldr r5, [%[b], #232]\n\t"
+ "ldr r7, [%[b], #236]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #232]\n\t"
+ "str r6, [%[r], #236]\n\t"
+ "ldr r4, [%[a], #240]\n\t"
+ "ldr r6, [%[a], #244]\n\t"
+ "ldr r5, [%[b], #240]\n\t"
+ "ldr r7, [%[b], #244]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #240]\n\t"
+ "str r6, [%[r], #244]\n\t"
+ "ldr r4, [%[a], #248]\n\t"
+ "ldr r6, [%[a], #252]\n\t"
+ "ldr r5, [%[b], #248]\n\t"
+ "ldr r7, [%[b], #252]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #248]\n\t"
+ "str r6, [%[r], #252]\n\t"
+ "sbc %[c], r9, r9\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#endif /* WOLFSSL_SP_SMALL */
+
+ return c;
+}
+
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "# i = 0\n\t"
+ "mov r12, #0\n\t"
+ "ldr r10, [%[a], #0]\n\t"
+ "ldr r14, [%[a], #4]\n\t"
+ "\n1:\n\t"
+ "# mu = a[i] * mp\n\t"
+ "mul r8, %[mp], r10\n\t"
+ "# a[i+0] += m[0] * mu\n\t"
+ "ldr r7, [%[m], #0]\n\t"
+ "ldr r9, [%[a], #0]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r10, r10, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "# a[i+1] += m[1] * mu\n\t"
+ "ldr r7, [%[m], #4]\n\t"
+ "ldr r9, [%[a], #4]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r10, r14, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r10, r10, r5\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+2] += m[2] * mu\n\t"
+ "ldr r7, [%[m], #8]\n\t"
+ "ldr r14, [%[a], #8]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r14, r14, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r14, r14, r4\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+3] += m[3] * mu\n\t"
+ "ldr r7, [%[m], #12]\n\t"
+ "ldr r9, [%[a], #12]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #12]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+4] += m[4] * mu\n\t"
+ "ldr r7, [%[m], #16]\n\t"
+ "ldr r9, [%[a], #16]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #16]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+5] += m[5] * mu\n\t"
+ "ldr r7, [%[m], #20]\n\t"
+ "ldr r9, [%[a], #20]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #20]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+6] += m[6] * mu\n\t"
+ "ldr r7, [%[m], #24]\n\t"
+ "ldr r9, [%[a], #24]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #24]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+7] += m[7] * mu\n\t"
+ "ldr r7, [%[m], #28]\n\t"
+ "ldr r9, [%[a], #28]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #28]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+8] += m[8] * mu\n\t"
+ "ldr r7, [%[m], #32]\n\t"
+ "ldr r9, [%[a], #32]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #32]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+9] += m[9] * mu\n\t"
+ "ldr r7, [%[m], #36]\n\t"
+ "ldr r9, [%[a], #36]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #36]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+10] += m[10] * mu\n\t"
+ "ldr r7, [%[m], #40]\n\t"
+ "ldr r9, [%[a], #40]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #40]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+11] += m[11] * mu\n\t"
+ "ldr r7, [%[m], #44]\n\t"
+ "ldr r9, [%[a], #44]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #44]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+12] += m[12] * mu\n\t"
+ "ldr r7, [%[m], #48]\n\t"
+ "ldr r9, [%[a], #48]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #48]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+13] += m[13] * mu\n\t"
+ "ldr r7, [%[m], #52]\n\t"
+ "ldr r9, [%[a], #52]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #52]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+14] += m[14] * mu\n\t"
+ "ldr r7, [%[m], #56]\n\t"
+ "ldr r9, [%[a], #56]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #56]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+15] += m[15] * mu\n\t"
+ "ldr r7, [%[m], #60]\n\t"
+ "ldr r9, [%[a], #60]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #60]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+16] += m[16] * mu\n\t"
+ "ldr r7, [%[m], #64]\n\t"
+ "ldr r9, [%[a], #64]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #64]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+17] += m[17] * mu\n\t"
+ "ldr r7, [%[m], #68]\n\t"
+ "ldr r9, [%[a], #68]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #68]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+18] += m[18] * mu\n\t"
+ "ldr r7, [%[m], #72]\n\t"
+ "ldr r9, [%[a], #72]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #72]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+19] += m[19] * mu\n\t"
+ "ldr r7, [%[m], #76]\n\t"
+ "ldr r9, [%[a], #76]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #76]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+20] += m[20] * mu\n\t"
+ "ldr r7, [%[m], #80]\n\t"
+ "ldr r9, [%[a], #80]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #80]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+21] += m[21] * mu\n\t"
+ "ldr r7, [%[m], #84]\n\t"
+ "ldr r9, [%[a], #84]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #84]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+22] += m[22] * mu\n\t"
+ "ldr r7, [%[m], #88]\n\t"
+ "ldr r9, [%[a], #88]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #88]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+23] += m[23] * mu\n\t"
+ "ldr r7, [%[m], #92]\n\t"
+ "ldr r9, [%[a], #92]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #92]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+24] += m[24] * mu\n\t"
+ "ldr r7, [%[m], #96]\n\t"
+ "ldr r9, [%[a], #96]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #96]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+25] += m[25] * mu\n\t"
+ "ldr r7, [%[m], #100]\n\t"
+ "ldr r9, [%[a], #100]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #100]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+26] += m[26] * mu\n\t"
+ "ldr r7, [%[m], #104]\n\t"
+ "ldr r9, [%[a], #104]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #104]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+27] += m[27] * mu\n\t"
+ "ldr r7, [%[m], #108]\n\t"
+ "ldr r9, [%[a], #108]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #108]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+28] += m[28] * mu\n\t"
+ "ldr r7, [%[m], #112]\n\t"
+ "ldr r9, [%[a], #112]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #112]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+29] += m[29] * mu\n\t"
+ "ldr r7, [%[m], #116]\n\t"
+ "ldr r9, [%[a], #116]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #116]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+30] += m[30] * mu\n\t"
+ "ldr r7, [%[m], #120]\n\t"
+ "ldr r9, [%[a], #120]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #120]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+31] += m[31] * mu\n\t"
+ "ldr r7, [%[m], #124]\n\t"
+ "ldr r9, [%[a], #124]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #124]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+32] += m[32] * mu\n\t"
+ "ldr r7, [%[m], #128]\n\t"
+ "ldr r9, [%[a], #128]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #128]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+33] += m[33] * mu\n\t"
+ "ldr r7, [%[m], #132]\n\t"
+ "ldr r9, [%[a], #132]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #132]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+34] += m[34] * mu\n\t"
+ "ldr r7, [%[m], #136]\n\t"
+ "ldr r9, [%[a], #136]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #136]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+35] += m[35] * mu\n\t"
+ "ldr r7, [%[m], #140]\n\t"
+ "ldr r9, [%[a], #140]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #140]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+36] += m[36] * mu\n\t"
+ "ldr r7, [%[m], #144]\n\t"
+ "ldr r9, [%[a], #144]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #144]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+37] += m[37] * mu\n\t"
+ "ldr r7, [%[m], #148]\n\t"
+ "ldr r9, [%[a], #148]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #148]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+38] += m[38] * mu\n\t"
+ "ldr r7, [%[m], #152]\n\t"
+ "ldr r9, [%[a], #152]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #152]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+39] += m[39] * mu\n\t"
+ "ldr r7, [%[m], #156]\n\t"
+ "ldr r9, [%[a], #156]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #156]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+40] += m[40] * mu\n\t"
+ "ldr r7, [%[m], #160]\n\t"
+ "ldr r9, [%[a], #160]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #160]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+41] += m[41] * mu\n\t"
+ "ldr r7, [%[m], #164]\n\t"
+ "ldr r9, [%[a], #164]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #164]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+42] += m[42] * mu\n\t"
+ "ldr r7, [%[m], #168]\n\t"
+ "ldr r9, [%[a], #168]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #168]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+43] += m[43] * mu\n\t"
+ "ldr r7, [%[m], #172]\n\t"
+ "ldr r9, [%[a], #172]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #172]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+44] += m[44] * mu\n\t"
+ "ldr r7, [%[m], #176]\n\t"
+ "ldr r9, [%[a], #176]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #176]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+45] += m[45] * mu\n\t"
+ "ldr r7, [%[m], #180]\n\t"
+ "ldr r9, [%[a], #180]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #180]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+46] += m[46] * mu\n\t"
+ "ldr r7, [%[m], #184]\n\t"
+ "ldr r9, [%[a], #184]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #184]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+47] += m[47] * mu\n\t"
+ "ldr r7, [%[m], #188]\n\t"
+ "ldr r9, [%[a], #188]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #188]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+48] += m[48] * mu\n\t"
+ "ldr r7, [%[m], #192]\n\t"
+ "ldr r9, [%[a], #192]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #192]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+49] += m[49] * mu\n\t"
+ "ldr r7, [%[m], #196]\n\t"
+ "ldr r9, [%[a], #196]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #196]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+50] += m[50] * mu\n\t"
+ "ldr r7, [%[m], #200]\n\t"
+ "ldr r9, [%[a], #200]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #200]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+51] += m[51] * mu\n\t"
+ "ldr r7, [%[m], #204]\n\t"
+ "ldr r9, [%[a], #204]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #204]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+52] += m[52] * mu\n\t"
+ "ldr r7, [%[m], #208]\n\t"
+ "ldr r9, [%[a], #208]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #208]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+53] += m[53] * mu\n\t"
+ "ldr r7, [%[m], #212]\n\t"
+ "ldr r9, [%[a], #212]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #212]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+54] += m[54] * mu\n\t"
+ "ldr r7, [%[m], #216]\n\t"
+ "ldr r9, [%[a], #216]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #216]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+55] += m[55] * mu\n\t"
+ "ldr r7, [%[m], #220]\n\t"
+ "ldr r9, [%[a], #220]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #220]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+56] += m[56] * mu\n\t"
+ "ldr r7, [%[m], #224]\n\t"
+ "ldr r9, [%[a], #224]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #224]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+57] += m[57] * mu\n\t"
+ "ldr r7, [%[m], #228]\n\t"
+ "ldr r9, [%[a], #228]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #228]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+58] += m[58] * mu\n\t"
+ "ldr r7, [%[m], #232]\n\t"
+ "ldr r9, [%[a], #232]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #232]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+59] += m[59] * mu\n\t"
+ "ldr r7, [%[m], #236]\n\t"
+ "ldr r9, [%[a], #236]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #236]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+60] += m[60] * mu\n\t"
+ "ldr r7, [%[m], #240]\n\t"
+ "ldr r9, [%[a], #240]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #240]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+61] += m[61] * mu\n\t"
+ "ldr r7, [%[m], #244]\n\t"
+ "ldr r9, [%[a], #244]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #244]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+62] += m[62] * mu\n\t"
+ "ldr r7, [%[m], #248]\n\t"
+ "ldr r9, [%[a], #248]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #248]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+63] += m[63] * mu\n\t"
+ "ldr r7, [%[m], #252]\n\t"
+ "ldr r9, [%[a], #252]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r7, r7, %[ca]\n\t"
+ "mov %[ca], #0\n\t"
+ "adc %[ca], %[ca], %[ca]\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #252]\n\t"
+ "ldr r9, [%[a], #256]\n\t"
+ "adcs r9, r9, r7\n\t"
+ "str r9, [%[a], #256]\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ "# i += 1\n\t"
+ "add %[a], %[a], #4\n\t"
+ "add r12, r12, #4\n\t"
+ "cmp r12, #256\n\t"
+ "blt 1b\n\t"
+ "str r10, [%[a], #0]\n\t"
+ "str r14, [%[a], #4]\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_2048_mul_64(r, a, b);
+ sp_2048_mont_reduce_64(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_2048_sqr_64(r, a);
+ sp_2048_mont_reduce_64(r, m, mp);
+}
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r5, %[div], #1\n\t"
+ "add r5, r5, #1\n\t"
+ "mov r6, %[d0]\n\t"
+ "mov r7, %[d1]\n\t"
+ "# Do top 32\n\t"
+ "subs r8, r5, r7\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "and r8, r8, r5\n\t"
+ "subs r7, r7, r8\n\t"
+ "# Next 30 bits\n\t"
+ "mov r4, #29\n\t"
+ "1:\n\t"
+ "movs r6, r6, lsl #1\n\t"
+ "adc r7, r7, r7\n\t"
+ "subs r8, r5, r7\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "and r8, r8, r5\n\t"
+ "subs r7, r7, r8\n\t"
+ "subs r4, r4, #1\n\t"
+ "bpl 1b\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "add %[r], %[r], #1\n\t"
+ "umull r4, r5, %[r], %[div]\n\t"
+ "subs r4, %[d0], r4\n\t"
+ "sbc r5, %[d1], r5\n\t"
+ "add %[r], %[r], r5\n\t"
+ "umull r4, r5, %[r], %[div]\n\t"
+ "subs r4, %[d0], r4\n\t"
+ "sbc r5, %[d1], r5\n\t"
+ "add %[r], %[r], r5\n\t"
+ "subs r8, %[div], r4\n\t"
+ "sbc r8, r8, r8\n\t"
+ "sub %[r], %[r], r8\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r6", "r7", "r8"
+ );
+ return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<64; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 64; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static int32_t sp_2048_cmp_64(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = -1;
+ sp_digit one = 1;
+
+
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r7, #0\n\t"
+ "mov r3, #-1\n\t"
+ "mov r6, #252\n\t"
+ "1:\n\t"
+ "ldr r4, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "subs r6, r6, #4\n\t"
+ "bcs 1b\n\t"
+ "eor %[r], %[r], r3\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [one] "r" (one)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov r7, #0\n\t"
+ "mov r3, #-1\n\t"
+ "ldr r4, [%[a], #252]\n\t"
+ "ldr r5, [%[b], #252]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #248]\n\t"
+ "ldr r5, [%[b], #248]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #244]\n\t"
+ "ldr r5, [%[b], #244]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #240]\n\t"
+ "ldr r5, [%[b], #240]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #236]\n\t"
+ "ldr r5, [%[b], #236]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #232]\n\t"
+ "ldr r5, [%[b], #232]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #228]\n\t"
+ "ldr r5, [%[b], #228]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #224]\n\t"
+ "ldr r5, [%[b], #224]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #220]\n\t"
+ "ldr r5, [%[b], #220]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #216]\n\t"
+ "ldr r5, [%[b], #216]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #212]\n\t"
+ "ldr r5, [%[b], #212]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #208]\n\t"
+ "ldr r5, [%[b], #208]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #204]\n\t"
+ "ldr r5, [%[b], #204]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #200]\n\t"
+ "ldr r5, [%[b], #200]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #196]\n\t"
+ "ldr r5, [%[b], #196]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #192]\n\t"
+ "ldr r5, [%[b], #192]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #188]\n\t"
+ "ldr r5, [%[b], #188]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #184]\n\t"
+ "ldr r5, [%[b], #184]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #180]\n\t"
+ "ldr r5, [%[b], #180]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #176]\n\t"
+ "ldr r5, [%[b], #176]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #172]\n\t"
+ "ldr r5, [%[b], #172]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #168]\n\t"
+ "ldr r5, [%[b], #168]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #164]\n\t"
+ "ldr r5, [%[b], #164]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #160]\n\t"
+ "ldr r5, [%[b], #160]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #156]\n\t"
+ "ldr r5, [%[b], #156]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #152]\n\t"
+ "ldr r5, [%[b], #152]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #148]\n\t"
+ "ldr r5, [%[b], #148]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #144]\n\t"
+ "ldr r5, [%[b], #144]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #140]\n\t"
+ "ldr r5, [%[b], #140]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #136]\n\t"
+ "ldr r5, [%[b], #136]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #132]\n\t"
+ "ldr r5, [%[b], #132]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #128]\n\t"
+ "ldr r5, [%[b], #128]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #124]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #116]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #108]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #100]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #92]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #84]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #76]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #68]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "eor %[r], %[r], r3\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [one] "r" (one)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+#endif
+
+ return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[128], t2[65];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+
+ div = d[63];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
+ for (i=63; i>=0; i--) {
+ r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div);
+
+ sp_2048_mul_d_64(t2, d, r1);
+ t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
+ t1[64 + i] -= t2[64];
+ sp_2048_mask_64(t2, d, t1[64 + i]);
+ t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
+ sp_2048_mask_64(t2, d, t1[64 + i]);
+ t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_2048_cmp_64(t1, d) >= 0;
+ sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_2048_div_64(a, m, NULL, r);
+}
+
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[128], t2[65];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+
+ div = d[63];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
+ for (i=63; i>=0; i--) {
+ r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div);
+
+ sp_2048_mul_d_64(t2, d, r1);
+ t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
+ t1[64 + i] -= t2[64];
+ if (t1[64 + i] != 0) {
+ t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
+ if (t1[64 + i] != 0)
+ t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
+ }
+ }
+
+ r1 = sp_2048_cmp_64(t1, d) >= 0;
+ sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_2048_div_64_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+ defined(WOLFSSL_HAVE_SP_DH)
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[16][128];
+#else
+ sp_digit* t[16];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 128, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<16; i++) {
+ t[i] = td + i * 128;
+ }
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_64(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
+ if (reduceA != 0) {
+ err = sp_2048_mod_64(t[1] + 64, a, m);
+ if (err == MP_OKAY) {
+ err = sp_2048_mod_64(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
+ err = sp_2048_mod_64(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_64(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_64(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_64(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 4;
+ if (c == 32) {
+ c = 28;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
+ for (; i>=0 || c>=4; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 28;
+ n <<= 4;
+ c = 28;
+ }
+ else if (c < 4) {
+ y = n >> 28;
+ n = e[i--];
+ c = 4 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+ }
+
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+
+ sp_2048_mont_mul_64(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+ sp_2048_mont_reduce_64(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
+ sp_2048_cond_sub_64(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][128];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 128, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++) {
+ t[i] = td + i * 128;
+ }
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_64(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
+ if (reduceA != 0) {
+ err = sp_2048_mod_64(t[1] + 64, a, m);
+ if (err == MP_OKAY) {
+ err = sp_2048_mod_64(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
+ err = sp_2048_mod_64(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_64(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_64(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_64(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
+ sp_2048_mont_sqr_64(t[16], t[ 8], m, mp);
+ sp_2048_mont_mul_64(t[17], t[ 9], t[ 8], m, mp);
+ sp_2048_mont_sqr_64(t[18], t[ 9], m, mp);
+ sp_2048_mont_mul_64(t[19], t[10], t[ 9], m, mp);
+ sp_2048_mont_sqr_64(t[20], t[10], m, mp);
+ sp_2048_mont_mul_64(t[21], t[11], t[10], m, mp);
+ sp_2048_mont_sqr_64(t[22], t[11], m, mp);
+ sp_2048_mont_mul_64(t[23], t[12], t[11], m, mp);
+ sp_2048_mont_sqr_64(t[24], t[12], m, mp);
+ sp_2048_mont_mul_64(t[25], t[13], t[12], m, mp);
+ sp_2048_mont_sqr_64(t[26], t[13], m, mp);
+ sp_2048_mont_mul_64(t[27], t[14], t[13], m, mp);
+ sp_2048_mont_sqr_64(t[28], t[14], m, mp);
+ sp_2048_mont_mul_64(t[29], t[15], t[14], m, mp);
+ sp_2048_mont_sqr_64(t[30], t[15], m, mp);
+ sp_2048_mont_mul_64(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+
+ sp_2048_mont_mul_64(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+ sp_2048_mont_reduce_64(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
+ sp_2048_cond_sub_64(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[128], m[64], r[128];
+#else
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+#endif
+ sp_digit *ah;
+ sp_digit e[1];
+ int err = MP_OKAY;
+
+ if (*outLen < 256)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 256 ||
+ mp_count_bits(mm) != 2048))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 64 * 2;
+ m = r + 64 * 2;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ ah = a + 64;
+
+ sp_2048_from_bin(ah, 64, in, inLen);
+#if DIGIT_BIT >= 32
+ e[0] = em->dp[0];
+#else
+ e[0] = em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(m, 64, mm);
+
+ if (e[0] == 0x3) {
+ if (err == MP_OKAY) {
+ sp_2048_sqr_64(r, ah);
+ err = sp_2048_mod_64_cond(r, r, m);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_mul_64(r, ah, r);
+ err = sp_2048_mod_64_cond(r, r, m);
+ }
+ }
+ else {
+ int i;
+ sp_digit mp;
+
+ sp_2048_mont_setup(m, &mp);
+
+ /* Convert to Montgomery form. */
+ XMEMSET(a, 0, sizeof(sp_digit) * 64);
+ err = sp_2048_mod_64_cond(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i = 31; i >= 0; i--) {
+ if (e[0] >> i) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 64);
+ for (i--; i>=0; i--) {
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ if (((e[0] >> i) & 1) == 1) {
+ sp_2048_mont_mul_64(r, r, a, m, mp);
+ }
+ }
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
+ sp_2048_mont_reduce_64(r, m, mp);
+
+ for (i = 63; i > 0; i--) {
+ if (r[i] != m[i]) {
+ break;
+ }
+ }
+ if (r[i] >= m[i]) {
+ sp_2048_sub_in_place_64(r, m);
+ }
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#endif
+
+ return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+ sp_digit* a;
+ sp_digit* d = NULL;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 256U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 2048) {
+ err = MP_READ_E;
+ }
+ if (inLen > 256) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = d + 64;
+ m = a + 128;
+ r = a;
+
+ sp_2048_from_bin(a, 64, in, inLen);
+ sp_2048_from_mp(d, 64, dm);
+ sp_2048_from_mp(m, 64, mm);
+ err = sp_2048_mod_exp_64(r, a, d, 2048, m, 0);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 64);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ sp_digit c = 0;
+
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r9, #0\n\t"
+ "mov r8, #0\n\t"
+ "1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldr r4, [%[a], r8]\n\t"
+ "ldr r5, [%[b], r8]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adc %[c], r9, r9\n\t"
+ "str r4, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, #128\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#else
+ __asm__ __volatile__ (
+
+ "mov r9, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r6, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r6, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r6, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r7, [%[b], #36]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r6, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r6, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r7, [%[b], #44]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "str r6, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r6, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r7, [%[b], #52]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "str r6, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r6, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r7, [%[b], #60]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "str r6, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r6, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "ldr r7, [%[b], #68]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "str r6, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r6, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "ldr r7, [%[b], #76]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "str r6, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r6, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "ldr r7, [%[b], #84]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "str r6, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r6, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "ldr r7, [%[b], #92]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "str r6, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r6, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "ldr r7, [%[b], #100]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "str r6, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r6, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "ldr r7, [%[b], #108]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "str r6, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r6, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "ldr r7, [%[b], #116]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "str r6, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r6, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "ldr r7, [%[b], #124]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "str r6, [%[r], #124]\n\t"
+ "adc %[c], r9, r9\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#endif /* WOLFSSL_SP_SMALL */
+
+ return c;
+}
+
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[64 * 2];
+ sp_digit p[32], q[32], dp[32];
+ sp_digit tmpa[64], tmpb[64];
+#else
+ sp_digit* t = NULL;
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+#endif
+ sp_digit* r;
+ sp_digit* qi;
+ sp_digit* dq;
+ sp_digit c;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 256)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL)
+ err = MEMORY_E;
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 64 * 2;
+ q = p + 32;
+ qi = dq = dp = q + 32;
+ tmpa = qi + 32;
+ tmpb = tmpa + 64;
+
+ r = t + 64;
+ }
+#else
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ r = a;
+ qi = dq = dp;
+#endif
+ sp_2048_from_bin(a, 64, in, inLen);
+ sp_2048_from_mp(p, 32, pm);
+ sp_2048_from_mp(q, 32, qm);
+ sp_2048_from_mp(dp, 32, dpm);
+
+ err = sp_2048_mod_exp_32(tmpa, a, dp, 1024, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(dq, 32, dqm);
+ err = sp_2048_mod_exp_32(tmpb, a, dq, 1024, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ c = sp_2048_sub_in_place_32(tmpa, tmpb);
+ c += sp_2048_cond_add_32(tmpa, tmpa, p, c);
+ sp_2048_cond_add_32(tmpa, tmpa, p, c);
+
+ sp_2048_from_mp(qi, 32, qim);
+ sp_2048_mul_32(tmpa, tmpa, qi);
+ err = sp_2048_mod_32(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mul_32(tmpa, q, tmpa);
+ XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32);
+ sp_2048_add_64(r, tmpb, tmpa);
+
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 32 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+#else
+ XMEMSET(tmpa, 0, sizeof(tmpa));
+ XMEMSET(tmpb, 0, sizeof(tmpb));
+ XMEMSET(p, 0, sizeof(p));
+ XMEMSET(q, 0, sizeof(q));
+ XMEMSET(dp, 0, sizeof(dp));
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_2048_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 64);
+ r->used = 64;
+ mp_clamp(r);
+#elif DIGIT_BIT < 32
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 64; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 32) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 32 - s;
+ }
+ r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 64; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 32 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 32 - s;
+ }
+ else {
+ s += 32;
+ }
+ }
+ r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[128], e[64], m[64];
+ sp_digit* r = b;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 2048) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 64, base);
+ sp_2048_from_mp(e, 64, exp);
+ sp_2048_from_mp(m, 64, mod);
+
+ err = sp_2048_mod_exp_64(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_2048_to_mp(r, res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_2048
+static void sp_2048_lshift_64(sp_digit* r, sp_digit* a, byte n)
+{
+ __asm__ __volatile__ (
+ "mov r6, #31\n\t"
+ "sub r6, r6, %[n]\n\t"
+ "ldr r3, [%[a], #252]\n\t"
+ "lsr r4, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r4, r4, r6\n\t"
+ "ldr r2, [%[a], #248]\n\t"
+ "str r4, [%[r], #256]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #244]\n\t"
+ "str r3, [%[r], #252]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #240]\n\t"
+ "str r2, [%[r], #248]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #236]\n\t"
+ "str r4, [%[r], #244]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #232]\n\t"
+ "str r3, [%[r], #240]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #228]\n\t"
+ "str r2, [%[r], #236]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #224]\n\t"
+ "str r4, [%[r], #232]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #220]\n\t"
+ "str r3, [%[r], #228]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #216]\n\t"
+ "str r2, [%[r], #224]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #212]\n\t"
+ "str r4, [%[r], #220]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #208]\n\t"
+ "str r3, [%[r], #216]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #204]\n\t"
+ "str r2, [%[r], #212]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #200]\n\t"
+ "str r4, [%[r], #208]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #196]\n\t"
+ "str r3, [%[r], #204]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #192]\n\t"
+ "str r2, [%[r], #200]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #188]\n\t"
+ "str r4, [%[r], #196]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #184]\n\t"
+ "str r3, [%[r], #192]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #180]\n\t"
+ "str r2, [%[r], #188]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #176]\n\t"
+ "str r4, [%[r], #184]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #172]\n\t"
+ "str r3, [%[r], #180]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #168]\n\t"
+ "str r2, [%[r], #176]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #164]\n\t"
+ "str r4, [%[r], #172]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #160]\n\t"
+ "str r3, [%[r], #168]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #156]\n\t"
+ "str r2, [%[r], #164]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #152]\n\t"
+ "str r4, [%[r], #160]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #148]\n\t"
+ "str r3, [%[r], #156]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #144]\n\t"
+ "str r2, [%[r], #152]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #140]\n\t"
+ "str r4, [%[r], #148]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #136]\n\t"
+ "str r3, [%[r], #144]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #132]\n\t"
+ "str r2, [%[r], #140]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #128]\n\t"
+ "str r4, [%[r], #136]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "str r3, [%[r], #132]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #120]\n\t"
+ "str r2, [%[r], #128]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #116]\n\t"
+ "str r4, [%[r], #124]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "str r3, [%[r], #120]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #108]\n\t"
+ "str r2, [%[r], #116]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #104]\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "str r3, [%[r], #108]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #96]\n\t"
+ "str r2, [%[r], #104]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #92]\n\t"
+ "str r4, [%[r], #100]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "str r3, [%[r], #96]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #84]\n\t"
+ "str r2, [%[r], #92]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #80]\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "str r3, [%[r], #84]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #72]\n\t"
+ "str r2, [%[r], #80]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #68]\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "str r3, [%[r], #72]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #60]\n\t"
+ "str r2, [%[r], #68]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #56]\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "str r2, [%[r], #56]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #44]\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "str r2, [%[r], #44]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "str r2, [%[r], #32]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #20]\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #12]\n\t"
+ "str r2, [%[r], #20]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #8]\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "str r2, [%[r], #8]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "str r3, [%[r]]\n\t"
+ "str r4, [%[r], #4]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+ : "memory", "r2", "r3", "r4", "r5", "r6"
+ );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits,
+ const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[128];
+ sp_digit td[65];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 128;
+#else
+ norm = nd;
+ tmp = td;
+#endif
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_64(norm, m);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ sp_2048_lshift_64(r, norm, y);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+
+ sp_2048_lshift_64(r, r, y);
+ sp_2048_mul_d_64(tmp, norm, r[64]);
+ r[64] = 0;
+ o = sp_2048_add_64(r, r, tmp);
+ sp_2048_cond_sub_64(r, r, m, (sp_digit)0 - o);
+ }
+
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+ sp_2048_mont_reduce_64(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
+ sp_2048_cond_sub_64(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* HAVE_FFDHE_2048 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+ int err = MP_OKAY;
+ sp_digit b[128], e[64], m[64];
+ sp_digit* r = b;
+ word32 i;
+
+ if (mp_count_bits(base) > 2048) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 256) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 64, base);
+ sp_2048_from_bin(e, 64, exp, expLen);
+ sp_2048_from_mp(m, 64, mod);
+
+ #ifdef HAVE_FFDHE_2048
+ if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1)
+ err = sp_2048_mod_exp_2_64(r, e, expLen * 8, m);
+ else
+ #endif
+ err = sp_2048_mod_exp_64(r, b, e, expLen * 8, m, 0);
+
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ for (i=0; i<256 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[64], e[32], m[32];
+ sp_digit* r = b;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 1024) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 1024) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 1024) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 32, base);
+ sp_2048_from_mp(e, 32, exp);
+ sp_2048_from_mp(m, 32, mod);
+
+ err = sp_2048_mod_exp_32(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(r + 32, 0, sizeof(*r) * 32U);
+ err = sp_2048_to_mp(r, res);
+ res->used = mod->used;
+ mp_clamp(res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_2048 */
+
+#ifndef WOLFSSL_SP_NO_3072
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 24U) {
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 32
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 32U) <= (word32)DIGIT_BIT) {
+ s += 32U;
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 32) {
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 32 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 384
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_3072_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ j = 3072 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<96 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 32) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 32);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #48\n\t"
+ "mov r10, #0\n\t"
+ "# A[0] * B[0]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r3, r4, r8, r9\n\t"
+ "mov r5, #0\n\t"
+ "str r3, [sp]\n\t"
+ "# A[0] * B[1]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[0]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #4]\n\t"
+ "# A[0] * B[2]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[1]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[0]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #8]\n\t"
+ "# A[0] * B[3]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[2]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[1]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[0]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #12]\n\t"
+ "# A[0] * B[4]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[3]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[2]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[1]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[0]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #16]\n\t"
+ "# A[0] * B[5]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[4]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[3]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[2]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[1]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[0]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #20]\n\t"
+ "# A[0] * B[6]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[5]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[4]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[3]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[2]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[1]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[0]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #24]\n\t"
+ "# A[0] * B[7]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[6]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[5]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[4]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[3]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[2]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[1]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[0]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #28]\n\t"
+ "# A[0] * B[8]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[7]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[6]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[5]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[4]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[3]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[2]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[1]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[0]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #32]\n\t"
+ "# A[0] * B[9]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[8]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[7]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[6]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[5]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[4]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[3]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[2]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[1]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[0]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #36]\n\t"
+ "# A[0] * B[10]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[9]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[8]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[7]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[6]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[5]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[4]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[3]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[2]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[1]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[0]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #40]\n\t"
+ "# A[0] * B[11]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[10]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[9]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[8]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[7]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[6]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[5]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[4]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[3]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[2]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[1]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[0]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #44]\n\t"
+ "# A[1] * B[11]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[2] * B[10]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[9]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[8]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[7]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[6]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[5]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[4]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[3]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[2]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[1]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "# A[2] * B[11]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[3] * B[10]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[9]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[8]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[7]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[6]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[5]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[4]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[3]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[2]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "# A[3] * B[11]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[4] * B[10]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[9]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[8]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[7]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[6]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[5]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[4]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[3]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #56]\n\t"
+ "# A[4] * B[11]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[5] * B[10]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[9]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[8]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[7]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[6]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[5]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[4]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "# A[5] * B[11]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[6] * B[10]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[9]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[8]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[7]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[6]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[5]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "# A[6] * B[11]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[7] * B[10]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[9]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[8]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[7]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[6]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #68]\n\t"
+ "# A[7] * B[11]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[8] * B[10]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[9]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[8]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[7]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #72]\n\t"
+ "# A[8] * B[11]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[9] * B[10]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[9]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[8]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "# A[9] * B[11]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[10] * B[10]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[9]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #80]\n\t"
+ "# A[10] * B[11]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[11] * B[10]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #84]\n\t"
+ "# A[11] * B[11]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adc r5, r5, r7\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "str r5, [%[r], #92]\n\t"
+ "ldr r3, [sp, #0]\n\t"
+ "ldr r4, [sp, #4]\n\t"
+ "ldr r5, [sp, #8]\n\t"
+ "ldr r6, [sp, #12]\n\t"
+ "str r3, [%[r], #0]\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r3, [sp, #16]\n\t"
+ "ldr r4, [sp, #20]\n\t"
+ "ldr r5, [sp, #24]\n\t"
+ "ldr r6, [sp, #28]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "str r5, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ "ldr r3, [sp, #32]\n\t"
+ "ldr r4, [sp, #36]\n\t"
+ "ldr r5, [sp, #40]\n\t"
+ "ldr r6, [sp, #44]\n\t"
+ "str r3, [%[r], #32]\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "str r5, [%[r], #40]\n\t"
+ "str r6, [%[r], #44]\n\t"
+ "add sp, sp, #48\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #48\n\t"
+ "mov r14, #0\n\t"
+ "# A[0] * A[0]\n\t"
+ "ldr r10, [%[a], #0]\n\t"
+ "umull r8, r3, r10, r10\n\t"
+ "mov r4, #0\n\t"
+ "str r8, [sp]\n\t"
+ "# A[0] * A[1]\n\t"
+ "ldr r10, [%[a], #4]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r14, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "str r3, [sp, #4]\n\t"
+ "# A[0] * A[2]\n\t"
+ "ldr r10, [%[a], #8]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r14, r14\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "# A[1] * A[1]\n\t"
+ "ldr r10, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "str r4, [sp, #8]\n\t"
+ "# A[0] * A[3]\n\t"
+ "ldr r10, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r14, r14\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "# A[1] * A[2]\n\t"
+ "ldr r10, [%[a], #8]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "str r2, [sp, #12]\n\t"
+ "# A[0] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r14, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "# A[1] * A[3]\n\t"
+ "ldr r10, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "# A[2] * A[2]\n\t"
+ "ldr r10, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "str r3, [sp, #16]\n\t"
+ "# A[0] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[3]\n\t"
+ "ldr r10, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #20]\n\t"
+ "# A[0] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[3]\n\t"
+ "ldr r10, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #24]\n\t"
+ "# A[0] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #28]\n\t"
+ "# A[0] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #32]\n\t"
+ "# A[0] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #36]\n\t"
+ "# A[0] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #40]\n\t"
+ "# A[0] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #44]\n\t"
+ "# A[1] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[2] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #48]\n\t"
+ "# A[2] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[3] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #52]\n\t"
+ "# A[3] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[4] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "# A[4] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[5] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #60]\n\t"
+ "# A[5] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[6] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #64]\n\t"
+ "# A[6] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[7] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "# A[7] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r14, r14\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "# A[8] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "# A[9] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "str r2, [%[r], #72]\n\t"
+ "# A[8] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r14, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "# A[9] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "str r3, [%[r], #76]\n\t"
+ "# A[9] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r14, r14\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "# A[10] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "# A[10] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r14, r14\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "str r2, [%[r], #84]\n\t"
+ "# A[11] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r3, r3, r8\n\t"
+ "adc r4, r4, r9\n\t"
+ "str r3, [%[r], #88]\n\t"
+ "str r4, [%[r], #92]\n\t"
+ "ldr r2, [sp, #0]\n\t"
+ "ldr r3, [sp, #4]\n\t"
+ "ldr r4, [sp, #8]\n\t"
+ "ldr r8, [sp, #12]\n\t"
+ "str r2, [%[r], #0]\n\t"
+ "str r3, [%[r], #4]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r8, [%[r], #12]\n\t"
+ "ldr r2, [sp, #16]\n\t"
+ "ldr r3, [sp, #20]\n\t"
+ "ldr r4, [sp, #24]\n\t"
+ "ldr r8, [sp, #28]\n\t"
+ "str r2, [%[r], #16]\n\t"
+ "str r3, [%[r], #20]\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "str r8, [%[r], #28]\n\t"
+ "ldr r2, [sp, #32]\n\t"
+ "ldr r3, [sp, #36]\n\t"
+ "ldr r4, [sp, #40]\n\t"
+ "ldr r8, [sp, #44]\n\t"
+ "str r2, [%[r], #32]\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "str r8, [%[r], #44]\n\t"
+ "add sp, sp, #48\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14"
+ );
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r12, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[a], #4]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "ldr r10, [%[b], #8]\n\t"
+ "ldr r14, [%[b], #12]\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r5, [%[r], #4]\n\t"
+ "str r6, [%[r], #8]\n\t"
+ "str r7, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[a], #20]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "ldr r10, [%[b], #24]\n\t"
+ "ldr r14, [%[b], #28]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "str r6, [%[r], #24]\n\t"
+ "str r7, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[a], #36]\n\t"
+ "ldr r6, [%[a], #40]\n\t"
+ "ldr r7, [%[a], #44]\n\t"
+ "ldr r8, [%[b], #32]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "ldr r10, [%[b], #40]\n\t"
+ "ldr r14, [%[b], #44]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r5, [%[r], #36]\n\t"
+ "str r6, [%[r], #40]\n\t"
+ "str r7, [%[r], #44]\n\t"
+ "adc %[c], r12, r12\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r2, [%[a], #0]\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[a], #12]\n\t"
+ "ldr r6, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "subs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #0]\n\t"
+ "str r3, [%[a], #4]\n\t"
+ "str r4, [%[a], #8]\n\t"
+ "str r5, [%[a], #12]\n\t"
+ "ldr r2, [%[a], #16]\n\t"
+ "ldr r3, [%[a], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[a], #28]\n\t"
+ "ldr r6, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #16]\n\t"
+ "str r3, [%[a], #20]\n\t"
+ "str r4, [%[a], #24]\n\t"
+ "str r5, [%[a], #28]\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[a], #44]\n\t"
+ "ldr r6, [%[b], #32]\n\t"
+ "ldr r7, [%[b], #36]\n\t"
+ "ldr r8, [%[b], #40]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #32]\n\t"
+ "str r3, [%[a], #36]\n\t"
+ "str r4, [%[a], #40]\n\t"
+ "str r5, [%[a], #44]\n\t"
+ "ldr r2, [%[a], #48]\n\t"
+ "ldr r3, [%[a], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[a], #60]\n\t"
+ "ldr r6, [%[b], #48]\n\t"
+ "ldr r7, [%[b], #52]\n\t"
+ "ldr r8, [%[b], #56]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #48]\n\t"
+ "str r3, [%[a], #52]\n\t"
+ "str r4, [%[a], #56]\n\t"
+ "str r5, [%[a], #60]\n\t"
+ "ldr r2, [%[a], #64]\n\t"
+ "ldr r3, [%[a], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[a], #76]\n\t"
+ "ldr r6, [%[b], #64]\n\t"
+ "ldr r7, [%[b], #68]\n\t"
+ "ldr r8, [%[b], #72]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #64]\n\t"
+ "str r3, [%[a], #68]\n\t"
+ "str r4, [%[a], #72]\n\t"
+ "str r5, [%[a], #76]\n\t"
+ "ldr r2, [%[a], #80]\n\t"
+ "ldr r3, [%[a], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[a], #92]\n\t"
+ "ldr r6, [%[b], #80]\n\t"
+ "ldr r7, [%[b], #84]\n\t"
+ "ldr r8, [%[b], #88]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #80]\n\t"
+ "str r3, [%[a], #84]\n\t"
+ "str r4, [%[a], #88]\n\t"
+ "str r5, [%[a], #92]\n\t"
+ "sbc %[c], r9, r9\n\t"
+ : [c] "+r" (c)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r12, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[a], #4]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "ldr r10, [%[b], #8]\n\t"
+ "ldr r14, [%[b], #12]\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r5, [%[r], #4]\n\t"
+ "str r6, [%[r], #8]\n\t"
+ "str r7, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[a], #20]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "ldr r10, [%[b], #24]\n\t"
+ "ldr r14, [%[b], #28]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "str r6, [%[r], #24]\n\t"
+ "str r7, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[a], #36]\n\t"
+ "ldr r6, [%[a], #40]\n\t"
+ "ldr r7, [%[a], #44]\n\t"
+ "ldr r8, [%[b], #32]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "ldr r10, [%[b], #40]\n\t"
+ "ldr r14, [%[b], #44]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r5, [%[r], #36]\n\t"
+ "str r6, [%[r], #40]\n\t"
+ "str r7, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[a], #52]\n\t"
+ "ldr r6, [%[a], #56]\n\t"
+ "ldr r7, [%[a], #60]\n\t"
+ "ldr r8, [%[b], #48]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "ldr r10, [%[b], #56]\n\t"
+ "ldr r14, [%[b], #60]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "str r5, [%[r], #52]\n\t"
+ "str r6, [%[r], #56]\n\t"
+ "str r7, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[a], #68]\n\t"
+ "ldr r6, [%[a], #72]\n\t"
+ "ldr r7, [%[a], #76]\n\t"
+ "ldr r8, [%[b], #64]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "ldr r10, [%[b], #72]\n\t"
+ "ldr r14, [%[b], #76]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "str r5, [%[r], #68]\n\t"
+ "str r6, [%[r], #72]\n\t"
+ "str r7, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[a], #84]\n\t"
+ "ldr r6, [%[a], #88]\n\t"
+ "ldr r7, [%[a], #92]\n\t"
+ "ldr r8, [%[b], #80]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "ldr r10, [%[b], #88]\n\t"
+ "ldr r14, [%[b], #92]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "str r5, [%[r], #84]\n\t"
+ "str r6, [%[r], #88]\n\t"
+ "str r7, [%[r], #92]\n\t"
+ "adc %[c], r12, r12\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<12; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ r[0] = a[0] & m;
+ r[1] = a[1] & m;
+ r[2] = a[2] & m;
+ r[3] = a[3] & m;
+ r[4] = a[4] & m;
+ r[5] = a[5] & m;
+ r[6] = a[6] & m;
+ r[7] = a[7] & m;
+ r[8] = a[8] & m;
+ r[9] = a[9] & m;
+ r[10] = a[10] & m;
+ r[11] = a[11] & m;
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[24];
+ sp_digit a1[12];
+ sp_digit b1[12];
+ sp_digit z2[24];
+ sp_digit u, ca, cb;
+
+ ca = sp_3072_add_12(a1, a, &a[12]);
+ cb = sp_3072_add_12(b1, b, &b[12]);
+ u = ca & cb;
+ sp_3072_mul_12(z1, a1, b1);
+ sp_3072_mul_12(z2, &a[12], &b[12]);
+ sp_3072_mul_12(z0, a, b);
+ sp_3072_mask_12(r + 24, a1, 0 - cb);
+ sp_3072_mask_12(b1, b1, 0 - ca);
+ u += sp_3072_add_12(r + 24, r + 24, b1);
+ u += sp_3072_sub_in_place_24(z1, z2);
+ u += sp_3072_sub_in_place_24(z1, z0);
+ u += sp_3072_add_24(r + 12, r + 12, z1);
+ r[36] = u;
+ XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
+ (void)sp_3072_add_24(r + 24, r + 24, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[24];
+ sp_digit z1[24];
+ sp_digit a1[12];
+ sp_digit u;
+
+ u = sp_3072_add_12(a1, a, &a[12]);
+ sp_3072_sqr_12(z1, a1);
+ sp_3072_sqr_12(z2, &a[12]);
+ sp_3072_sqr_12(z0, a);
+ sp_3072_mask_12(r + 24, a1, 0 - u);
+ u += sp_3072_add_12(r + 24, r + 24, r + 24);
+ u += sp_3072_sub_in_place_24(z1, z2);
+ u += sp_3072_sub_in_place_24(z1, z0);
+ u += sp_3072_add_24(r + 12, r + 12, z1);
+ r[36] = u;
+ XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
+ (void)sp_3072_add_24(r + 24, r + 24, z2);
+}
+
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r2, [%[a], #0]\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[a], #12]\n\t"
+ "ldr r6, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "subs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #0]\n\t"
+ "str r3, [%[a], #4]\n\t"
+ "str r4, [%[a], #8]\n\t"
+ "str r5, [%[a], #12]\n\t"
+ "ldr r2, [%[a], #16]\n\t"
+ "ldr r3, [%[a], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[a], #28]\n\t"
+ "ldr r6, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #16]\n\t"
+ "str r3, [%[a], #20]\n\t"
+ "str r4, [%[a], #24]\n\t"
+ "str r5, [%[a], #28]\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[a], #44]\n\t"
+ "ldr r6, [%[b], #32]\n\t"
+ "ldr r7, [%[b], #36]\n\t"
+ "ldr r8, [%[b], #40]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #32]\n\t"
+ "str r3, [%[a], #36]\n\t"
+ "str r4, [%[a], #40]\n\t"
+ "str r5, [%[a], #44]\n\t"
+ "ldr r2, [%[a], #48]\n\t"
+ "ldr r3, [%[a], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[a], #60]\n\t"
+ "ldr r6, [%[b], #48]\n\t"
+ "ldr r7, [%[b], #52]\n\t"
+ "ldr r8, [%[b], #56]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #48]\n\t"
+ "str r3, [%[a], #52]\n\t"
+ "str r4, [%[a], #56]\n\t"
+ "str r5, [%[a], #60]\n\t"
+ "ldr r2, [%[a], #64]\n\t"
+ "ldr r3, [%[a], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[a], #76]\n\t"
+ "ldr r6, [%[b], #64]\n\t"
+ "ldr r7, [%[b], #68]\n\t"
+ "ldr r8, [%[b], #72]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #64]\n\t"
+ "str r3, [%[a], #68]\n\t"
+ "str r4, [%[a], #72]\n\t"
+ "str r5, [%[a], #76]\n\t"
+ "ldr r2, [%[a], #80]\n\t"
+ "ldr r3, [%[a], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[a], #92]\n\t"
+ "ldr r6, [%[b], #80]\n\t"
+ "ldr r7, [%[b], #84]\n\t"
+ "ldr r8, [%[b], #88]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #80]\n\t"
+ "str r3, [%[a], #84]\n\t"
+ "str r4, [%[a], #88]\n\t"
+ "str r5, [%[a], #92]\n\t"
+ "ldr r2, [%[a], #96]\n\t"
+ "ldr r3, [%[a], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[a], #108]\n\t"
+ "ldr r6, [%[b], #96]\n\t"
+ "ldr r7, [%[b], #100]\n\t"
+ "ldr r8, [%[b], #104]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #96]\n\t"
+ "str r3, [%[a], #100]\n\t"
+ "str r4, [%[a], #104]\n\t"
+ "str r5, [%[a], #108]\n\t"
+ "ldr r2, [%[a], #112]\n\t"
+ "ldr r3, [%[a], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[a], #124]\n\t"
+ "ldr r6, [%[b], #112]\n\t"
+ "ldr r7, [%[b], #116]\n\t"
+ "ldr r8, [%[b], #120]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #112]\n\t"
+ "str r3, [%[a], #116]\n\t"
+ "str r4, [%[a], #120]\n\t"
+ "str r5, [%[a], #124]\n\t"
+ "ldr r2, [%[a], #128]\n\t"
+ "ldr r3, [%[a], #132]\n\t"
+ "ldr r4, [%[a], #136]\n\t"
+ "ldr r5, [%[a], #140]\n\t"
+ "ldr r6, [%[b], #128]\n\t"
+ "ldr r7, [%[b], #132]\n\t"
+ "ldr r8, [%[b], #136]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #128]\n\t"
+ "str r3, [%[a], #132]\n\t"
+ "str r4, [%[a], #136]\n\t"
+ "str r5, [%[a], #140]\n\t"
+ "ldr r2, [%[a], #144]\n\t"
+ "ldr r3, [%[a], #148]\n\t"
+ "ldr r4, [%[a], #152]\n\t"
+ "ldr r5, [%[a], #156]\n\t"
+ "ldr r6, [%[b], #144]\n\t"
+ "ldr r7, [%[b], #148]\n\t"
+ "ldr r8, [%[b], #152]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #144]\n\t"
+ "str r3, [%[a], #148]\n\t"
+ "str r4, [%[a], #152]\n\t"
+ "str r5, [%[a], #156]\n\t"
+ "ldr r2, [%[a], #160]\n\t"
+ "ldr r3, [%[a], #164]\n\t"
+ "ldr r4, [%[a], #168]\n\t"
+ "ldr r5, [%[a], #172]\n\t"
+ "ldr r6, [%[b], #160]\n\t"
+ "ldr r7, [%[b], #164]\n\t"
+ "ldr r8, [%[b], #168]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #160]\n\t"
+ "str r3, [%[a], #164]\n\t"
+ "str r4, [%[a], #168]\n\t"
+ "str r5, [%[a], #172]\n\t"
+ "ldr r2, [%[a], #176]\n\t"
+ "ldr r3, [%[a], #180]\n\t"
+ "ldr r4, [%[a], #184]\n\t"
+ "ldr r5, [%[a], #188]\n\t"
+ "ldr r6, [%[b], #176]\n\t"
+ "ldr r7, [%[b], #180]\n\t"
+ "ldr r8, [%[b], #184]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #176]\n\t"
+ "str r3, [%[a], #180]\n\t"
+ "str r4, [%[a], #184]\n\t"
+ "str r5, [%[a], #188]\n\t"
+ "sbc %[c], r9, r9\n\t"
+ : [c] "+r" (c)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r12, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[a], #4]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "ldr r10, [%[b], #8]\n\t"
+ "ldr r14, [%[b], #12]\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r5, [%[r], #4]\n\t"
+ "str r6, [%[r], #8]\n\t"
+ "str r7, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[a], #20]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "ldr r10, [%[b], #24]\n\t"
+ "ldr r14, [%[b], #28]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "str r6, [%[r], #24]\n\t"
+ "str r7, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[a], #36]\n\t"
+ "ldr r6, [%[a], #40]\n\t"
+ "ldr r7, [%[a], #44]\n\t"
+ "ldr r8, [%[b], #32]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "ldr r10, [%[b], #40]\n\t"
+ "ldr r14, [%[b], #44]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r5, [%[r], #36]\n\t"
+ "str r6, [%[r], #40]\n\t"
+ "str r7, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[a], #52]\n\t"
+ "ldr r6, [%[a], #56]\n\t"
+ "ldr r7, [%[a], #60]\n\t"
+ "ldr r8, [%[b], #48]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "ldr r10, [%[b], #56]\n\t"
+ "ldr r14, [%[b], #60]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "str r5, [%[r], #52]\n\t"
+ "str r6, [%[r], #56]\n\t"
+ "str r7, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[a], #68]\n\t"
+ "ldr r6, [%[a], #72]\n\t"
+ "ldr r7, [%[a], #76]\n\t"
+ "ldr r8, [%[b], #64]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "ldr r10, [%[b], #72]\n\t"
+ "ldr r14, [%[b], #76]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "str r5, [%[r], #68]\n\t"
+ "str r6, [%[r], #72]\n\t"
+ "str r7, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[a], #84]\n\t"
+ "ldr r6, [%[a], #88]\n\t"
+ "ldr r7, [%[a], #92]\n\t"
+ "ldr r8, [%[b], #80]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "ldr r10, [%[b], #88]\n\t"
+ "ldr r14, [%[b], #92]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "str r5, [%[r], #84]\n\t"
+ "str r6, [%[r], #88]\n\t"
+ "str r7, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[a], #100]\n\t"
+ "ldr r6, [%[a], #104]\n\t"
+ "ldr r7, [%[a], #108]\n\t"
+ "ldr r8, [%[b], #96]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "ldr r10, [%[b], #104]\n\t"
+ "ldr r14, [%[b], #108]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "str r5, [%[r], #100]\n\t"
+ "str r6, [%[r], #104]\n\t"
+ "str r7, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[a], #116]\n\t"
+ "ldr r6, [%[a], #120]\n\t"
+ "ldr r7, [%[a], #124]\n\t"
+ "ldr r8, [%[b], #112]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "ldr r10, [%[b], #120]\n\t"
+ "ldr r14, [%[b], #124]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "str r5, [%[r], #116]\n\t"
+ "str r6, [%[r], #120]\n\t"
+ "str r7, [%[r], #124]\n\t"
+ "ldr r4, [%[a], #128]\n\t"
+ "ldr r5, [%[a], #132]\n\t"
+ "ldr r6, [%[a], #136]\n\t"
+ "ldr r7, [%[a], #140]\n\t"
+ "ldr r8, [%[b], #128]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "ldr r10, [%[b], #136]\n\t"
+ "ldr r14, [%[b], #140]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #128]\n\t"
+ "str r5, [%[r], #132]\n\t"
+ "str r6, [%[r], #136]\n\t"
+ "str r7, [%[r], #140]\n\t"
+ "ldr r4, [%[a], #144]\n\t"
+ "ldr r5, [%[a], #148]\n\t"
+ "ldr r6, [%[a], #152]\n\t"
+ "ldr r7, [%[a], #156]\n\t"
+ "ldr r8, [%[b], #144]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "ldr r10, [%[b], #152]\n\t"
+ "ldr r14, [%[b], #156]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #144]\n\t"
+ "str r5, [%[r], #148]\n\t"
+ "str r6, [%[r], #152]\n\t"
+ "str r7, [%[r], #156]\n\t"
+ "ldr r4, [%[a], #160]\n\t"
+ "ldr r5, [%[a], #164]\n\t"
+ "ldr r6, [%[a], #168]\n\t"
+ "ldr r7, [%[a], #172]\n\t"
+ "ldr r8, [%[b], #160]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "ldr r10, [%[b], #168]\n\t"
+ "ldr r14, [%[b], #172]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #160]\n\t"
+ "str r5, [%[r], #164]\n\t"
+ "str r6, [%[r], #168]\n\t"
+ "str r7, [%[r], #172]\n\t"
+ "ldr r4, [%[a], #176]\n\t"
+ "ldr r5, [%[a], #180]\n\t"
+ "ldr r6, [%[a], #184]\n\t"
+ "ldr r7, [%[a], #188]\n\t"
+ "ldr r8, [%[b], #176]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "ldr r10, [%[b], #184]\n\t"
+ "ldr r14, [%[b], #188]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #176]\n\t"
+ "str r5, [%[r], #180]\n\t"
+ "str r6, [%[r], #184]\n\t"
+ "str r7, [%[r], #188]\n\t"
+ "adc %[c], r12, r12\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<24; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 24; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[48];
+ sp_digit a1[24];
+ sp_digit b1[24];
+ sp_digit z2[48];
+ sp_digit u, ca, cb;
+
+ ca = sp_3072_add_24(a1, a, &a[24]);
+ cb = sp_3072_add_24(b1, b, &b[24]);
+ u = ca & cb;
+ sp_3072_mul_24(z1, a1, b1);
+ sp_3072_mul_24(z2, &a[24], &b[24]);
+ sp_3072_mul_24(z0, a, b);
+ sp_3072_mask_24(r + 48, a1, 0 - cb);
+ sp_3072_mask_24(b1, b1, 0 - ca);
+ u += sp_3072_add_24(r + 48, r + 48, b1);
+ u += sp_3072_sub_in_place_48(z1, z2);
+ u += sp_3072_sub_in_place_48(z1, z0);
+ u += sp_3072_add_48(r + 24, r + 24, z1);
+ r[72] = u;
+ XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
+ (void)sp_3072_add_48(r + 48, r + 48, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[48];
+ sp_digit z1[48];
+ sp_digit a1[24];
+ sp_digit u;
+
+ u = sp_3072_add_24(a1, a, &a[24]);
+ sp_3072_sqr_24(z1, a1);
+ sp_3072_sqr_24(z2, &a[24]);
+ sp_3072_sqr_24(z0, a);
+ sp_3072_mask_24(r + 48, a1, 0 - u);
+ u += sp_3072_add_24(r + 48, r + 48, r + 48);
+ u += sp_3072_sub_in_place_48(z1, z2);
+ u += sp_3072_sub_in_place_48(z1, z0);
+ u += sp_3072_add_48(r + 24, r + 24, z1);
+ r[72] = u;
+ XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
+ (void)sp_3072_add_48(r + 48, r + 48, z2);
+}
+
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r2, [%[a], #0]\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[a], #12]\n\t"
+ "ldr r6, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "subs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #0]\n\t"
+ "str r3, [%[a], #4]\n\t"
+ "str r4, [%[a], #8]\n\t"
+ "str r5, [%[a], #12]\n\t"
+ "ldr r2, [%[a], #16]\n\t"
+ "ldr r3, [%[a], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[a], #28]\n\t"
+ "ldr r6, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #16]\n\t"
+ "str r3, [%[a], #20]\n\t"
+ "str r4, [%[a], #24]\n\t"
+ "str r5, [%[a], #28]\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[a], #44]\n\t"
+ "ldr r6, [%[b], #32]\n\t"
+ "ldr r7, [%[b], #36]\n\t"
+ "ldr r8, [%[b], #40]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #32]\n\t"
+ "str r3, [%[a], #36]\n\t"
+ "str r4, [%[a], #40]\n\t"
+ "str r5, [%[a], #44]\n\t"
+ "ldr r2, [%[a], #48]\n\t"
+ "ldr r3, [%[a], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[a], #60]\n\t"
+ "ldr r6, [%[b], #48]\n\t"
+ "ldr r7, [%[b], #52]\n\t"
+ "ldr r8, [%[b], #56]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #48]\n\t"
+ "str r3, [%[a], #52]\n\t"
+ "str r4, [%[a], #56]\n\t"
+ "str r5, [%[a], #60]\n\t"
+ "ldr r2, [%[a], #64]\n\t"
+ "ldr r3, [%[a], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[a], #76]\n\t"
+ "ldr r6, [%[b], #64]\n\t"
+ "ldr r7, [%[b], #68]\n\t"
+ "ldr r8, [%[b], #72]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #64]\n\t"
+ "str r3, [%[a], #68]\n\t"
+ "str r4, [%[a], #72]\n\t"
+ "str r5, [%[a], #76]\n\t"
+ "ldr r2, [%[a], #80]\n\t"
+ "ldr r3, [%[a], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[a], #92]\n\t"
+ "ldr r6, [%[b], #80]\n\t"
+ "ldr r7, [%[b], #84]\n\t"
+ "ldr r8, [%[b], #88]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #80]\n\t"
+ "str r3, [%[a], #84]\n\t"
+ "str r4, [%[a], #88]\n\t"
+ "str r5, [%[a], #92]\n\t"
+ "ldr r2, [%[a], #96]\n\t"
+ "ldr r3, [%[a], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[a], #108]\n\t"
+ "ldr r6, [%[b], #96]\n\t"
+ "ldr r7, [%[b], #100]\n\t"
+ "ldr r8, [%[b], #104]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #96]\n\t"
+ "str r3, [%[a], #100]\n\t"
+ "str r4, [%[a], #104]\n\t"
+ "str r5, [%[a], #108]\n\t"
+ "ldr r2, [%[a], #112]\n\t"
+ "ldr r3, [%[a], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[a], #124]\n\t"
+ "ldr r6, [%[b], #112]\n\t"
+ "ldr r7, [%[b], #116]\n\t"
+ "ldr r8, [%[b], #120]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #112]\n\t"
+ "str r3, [%[a], #116]\n\t"
+ "str r4, [%[a], #120]\n\t"
+ "str r5, [%[a], #124]\n\t"
+ "ldr r2, [%[a], #128]\n\t"
+ "ldr r3, [%[a], #132]\n\t"
+ "ldr r4, [%[a], #136]\n\t"
+ "ldr r5, [%[a], #140]\n\t"
+ "ldr r6, [%[b], #128]\n\t"
+ "ldr r7, [%[b], #132]\n\t"
+ "ldr r8, [%[b], #136]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #128]\n\t"
+ "str r3, [%[a], #132]\n\t"
+ "str r4, [%[a], #136]\n\t"
+ "str r5, [%[a], #140]\n\t"
+ "ldr r2, [%[a], #144]\n\t"
+ "ldr r3, [%[a], #148]\n\t"
+ "ldr r4, [%[a], #152]\n\t"
+ "ldr r5, [%[a], #156]\n\t"
+ "ldr r6, [%[b], #144]\n\t"
+ "ldr r7, [%[b], #148]\n\t"
+ "ldr r8, [%[b], #152]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #144]\n\t"
+ "str r3, [%[a], #148]\n\t"
+ "str r4, [%[a], #152]\n\t"
+ "str r5, [%[a], #156]\n\t"
+ "ldr r2, [%[a], #160]\n\t"
+ "ldr r3, [%[a], #164]\n\t"
+ "ldr r4, [%[a], #168]\n\t"
+ "ldr r5, [%[a], #172]\n\t"
+ "ldr r6, [%[b], #160]\n\t"
+ "ldr r7, [%[b], #164]\n\t"
+ "ldr r8, [%[b], #168]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #160]\n\t"
+ "str r3, [%[a], #164]\n\t"
+ "str r4, [%[a], #168]\n\t"
+ "str r5, [%[a], #172]\n\t"
+ "ldr r2, [%[a], #176]\n\t"
+ "ldr r3, [%[a], #180]\n\t"
+ "ldr r4, [%[a], #184]\n\t"
+ "ldr r5, [%[a], #188]\n\t"
+ "ldr r6, [%[b], #176]\n\t"
+ "ldr r7, [%[b], #180]\n\t"
+ "ldr r8, [%[b], #184]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #176]\n\t"
+ "str r3, [%[a], #180]\n\t"
+ "str r4, [%[a], #184]\n\t"
+ "str r5, [%[a], #188]\n\t"
+ "ldr r2, [%[a], #192]\n\t"
+ "ldr r3, [%[a], #196]\n\t"
+ "ldr r4, [%[a], #200]\n\t"
+ "ldr r5, [%[a], #204]\n\t"
+ "ldr r6, [%[b], #192]\n\t"
+ "ldr r7, [%[b], #196]\n\t"
+ "ldr r8, [%[b], #200]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #192]\n\t"
+ "str r3, [%[a], #196]\n\t"
+ "str r4, [%[a], #200]\n\t"
+ "str r5, [%[a], #204]\n\t"
+ "ldr r2, [%[a], #208]\n\t"
+ "ldr r3, [%[a], #212]\n\t"
+ "ldr r4, [%[a], #216]\n\t"
+ "ldr r5, [%[a], #220]\n\t"
+ "ldr r6, [%[b], #208]\n\t"
+ "ldr r7, [%[b], #212]\n\t"
+ "ldr r8, [%[b], #216]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #208]\n\t"
+ "str r3, [%[a], #212]\n\t"
+ "str r4, [%[a], #216]\n\t"
+ "str r5, [%[a], #220]\n\t"
+ "ldr r2, [%[a], #224]\n\t"
+ "ldr r3, [%[a], #228]\n\t"
+ "ldr r4, [%[a], #232]\n\t"
+ "ldr r5, [%[a], #236]\n\t"
+ "ldr r6, [%[b], #224]\n\t"
+ "ldr r7, [%[b], #228]\n\t"
+ "ldr r8, [%[b], #232]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #224]\n\t"
+ "str r3, [%[a], #228]\n\t"
+ "str r4, [%[a], #232]\n\t"
+ "str r5, [%[a], #236]\n\t"
+ "ldr r2, [%[a], #240]\n\t"
+ "ldr r3, [%[a], #244]\n\t"
+ "ldr r4, [%[a], #248]\n\t"
+ "ldr r5, [%[a], #252]\n\t"
+ "ldr r6, [%[b], #240]\n\t"
+ "ldr r7, [%[b], #244]\n\t"
+ "ldr r8, [%[b], #248]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #240]\n\t"
+ "str r3, [%[a], #244]\n\t"
+ "str r4, [%[a], #248]\n\t"
+ "str r5, [%[a], #252]\n\t"
+ "ldr r2, [%[a], #256]\n\t"
+ "ldr r3, [%[a], #260]\n\t"
+ "ldr r4, [%[a], #264]\n\t"
+ "ldr r5, [%[a], #268]\n\t"
+ "ldr r6, [%[b], #256]\n\t"
+ "ldr r7, [%[b], #260]\n\t"
+ "ldr r8, [%[b], #264]\n\t"
+ "ldr r9, [%[b], #268]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #256]\n\t"
+ "str r3, [%[a], #260]\n\t"
+ "str r4, [%[a], #264]\n\t"
+ "str r5, [%[a], #268]\n\t"
+ "ldr r2, [%[a], #272]\n\t"
+ "ldr r3, [%[a], #276]\n\t"
+ "ldr r4, [%[a], #280]\n\t"
+ "ldr r5, [%[a], #284]\n\t"
+ "ldr r6, [%[b], #272]\n\t"
+ "ldr r7, [%[b], #276]\n\t"
+ "ldr r8, [%[b], #280]\n\t"
+ "ldr r9, [%[b], #284]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #272]\n\t"
+ "str r3, [%[a], #276]\n\t"
+ "str r4, [%[a], #280]\n\t"
+ "str r5, [%[a], #284]\n\t"
+ "ldr r2, [%[a], #288]\n\t"
+ "ldr r3, [%[a], #292]\n\t"
+ "ldr r4, [%[a], #296]\n\t"
+ "ldr r5, [%[a], #300]\n\t"
+ "ldr r6, [%[b], #288]\n\t"
+ "ldr r7, [%[b], #292]\n\t"
+ "ldr r8, [%[b], #296]\n\t"
+ "ldr r9, [%[b], #300]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #288]\n\t"
+ "str r3, [%[a], #292]\n\t"
+ "str r4, [%[a], #296]\n\t"
+ "str r5, [%[a], #300]\n\t"
+ "ldr r2, [%[a], #304]\n\t"
+ "ldr r3, [%[a], #308]\n\t"
+ "ldr r4, [%[a], #312]\n\t"
+ "ldr r5, [%[a], #316]\n\t"
+ "ldr r6, [%[b], #304]\n\t"
+ "ldr r7, [%[b], #308]\n\t"
+ "ldr r8, [%[b], #312]\n\t"
+ "ldr r9, [%[b], #316]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #304]\n\t"
+ "str r3, [%[a], #308]\n\t"
+ "str r4, [%[a], #312]\n\t"
+ "str r5, [%[a], #316]\n\t"
+ "ldr r2, [%[a], #320]\n\t"
+ "ldr r3, [%[a], #324]\n\t"
+ "ldr r4, [%[a], #328]\n\t"
+ "ldr r5, [%[a], #332]\n\t"
+ "ldr r6, [%[b], #320]\n\t"
+ "ldr r7, [%[b], #324]\n\t"
+ "ldr r8, [%[b], #328]\n\t"
+ "ldr r9, [%[b], #332]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #320]\n\t"
+ "str r3, [%[a], #324]\n\t"
+ "str r4, [%[a], #328]\n\t"
+ "str r5, [%[a], #332]\n\t"
+ "ldr r2, [%[a], #336]\n\t"
+ "ldr r3, [%[a], #340]\n\t"
+ "ldr r4, [%[a], #344]\n\t"
+ "ldr r5, [%[a], #348]\n\t"
+ "ldr r6, [%[b], #336]\n\t"
+ "ldr r7, [%[b], #340]\n\t"
+ "ldr r8, [%[b], #344]\n\t"
+ "ldr r9, [%[b], #348]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #336]\n\t"
+ "str r3, [%[a], #340]\n\t"
+ "str r4, [%[a], #344]\n\t"
+ "str r5, [%[a], #348]\n\t"
+ "ldr r2, [%[a], #352]\n\t"
+ "ldr r3, [%[a], #356]\n\t"
+ "ldr r4, [%[a], #360]\n\t"
+ "ldr r5, [%[a], #364]\n\t"
+ "ldr r6, [%[b], #352]\n\t"
+ "ldr r7, [%[b], #356]\n\t"
+ "ldr r8, [%[b], #360]\n\t"
+ "ldr r9, [%[b], #364]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #352]\n\t"
+ "str r3, [%[a], #356]\n\t"
+ "str r4, [%[a], #360]\n\t"
+ "str r5, [%[a], #364]\n\t"
+ "ldr r2, [%[a], #368]\n\t"
+ "ldr r3, [%[a], #372]\n\t"
+ "ldr r4, [%[a], #376]\n\t"
+ "ldr r5, [%[a], #380]\n\t"
+ "ldr r6, [%[b], #368]\n\t"
+ "ldr r7, [%[b], #372]\n\t"
+ "ldr r8, [%[b], #376]\n\t"
+ "ldr r9, [%[b], #380]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #368]\n\t"
+ "str r3, [%[a], #372]\n\t"
+ "str r4, [%[a], #376]\n\t"
+ "str r5, [%[a], #380]\n\t"
+ "sbc %[c], r9, r9\n\t"
+ : [c] "+r" (c)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r12, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[a], #4]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "ldr r10, [%[b], #8]\n\t"
+ "ldr r14, [%[b], #12]\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r5, [%[r], #4]\n\t"
+ "str r6, [%[r], #8]\n\t"
+ "str r7, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[a], #20]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "ldr r10, [%[b], #24]\n\t"
+ "ldr r14, [%[b], #28]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "str r6, [%[r], #24]\n\t"
+ "str r7, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[a], #36]\n\t"
+ "ldr r6, [%[a], #40]\n\t"
+ "ldr r7, [%[a], #44]\n\t"
+ "ldr r8, [%[b], #32]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "ldr r10, [%[b], #40]\n\t"
+ "ldr r14, [%[b], #44]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r5, [%[r], #36]\n\t"
+ "str r6, [%[r], #40]\n\t"
+ "str r7, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[a], #52]\n\t"
+ "ldr r6, [%[a], #56]\n\t"
+ "ldr r7, [%[a], #60]\n\t"
+ "ldr r8, [%[b], #48]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "ldr r10, [%[b], #56]\n\t"
+ "ldr r14, [%[b], #60]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "str r5, [%[r], #52]\n\t"
+ "str r6, [%[r], #56]\n\t"
+ "str r7, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[a], #68]\n\t"
+ "ldr r6, [%[a], #72]\n\t"
+ "ldr r7, [%[a], #76]\n\t"
+ "ldr r8, [%[b], #64]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "ldr r10, [%[b], #72]\n\t"
+ "ldr r14, [%[b], #76]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "str r5, [%[r], #68]\n\t"
+ "str r6, [%[r], #72]\n\t"
+ "str r7, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[a], #84]\n\t"
+ "ldr r6, [%[a], #88]\n\t"
+ "ldr r7, [%[a], #92]\n\t"
+ "ldr r8, [%[b], #80]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "ldr r10, [%[b], #88]\n\t"
+ "ldr r14, [%[b], #92]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "str r5, [%[r], #84]\n\t"
+ "str r6, [%[r], #88]\n\t"
+ "str r7, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[a], #100]\n\t"
+ "ldr r6, [%[a], #104]\n\t"
+ "ldr r7, [%[a], #108]\n\t"
+ "ldr r8, [%[b], #96]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "ldr r10, [%[b], #104]\n\t"
+ "ldr r14, [%[b], #108]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "str r5, [%[r], #100]\n\t"
+ "str r6, [%[r], #104]\n\t"
+ "str r7, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[a], #116]\n\t"
+ "ldr r6, [%[a], #120]\n\t"
+ "ldr r7, [%[a], #124]\n\t"
+ "ldr r8, [%[b], #112]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "ldr r10, [%[b], #120]\n\t"
+ "ldr r14, [%[b], #124]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "str r5, [%[r], #116]\n\t"
+ "str r6, [%[r], #120]\n\t"
+ "str r7, [%[r], #124]\n\t"
+ "ldr r4, [%[a], #128]\n\t"
+ "ldr r5, [%[a], #132]\n\t"
+ "ldr r6, [%[a], #136]\n\t"
+ "ldr r7, [%[a], #140]\n\t"
+ "ldr r8, [%[b], #128]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "ldr r10, [%[b], #136]\n\t"
+ "ldr r14, [%[b], #140]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #128]\n\t"
+ "str r5, [%[r], #132]\n\t"
+ "str r6, [%[r], #136]\n\t"
+ "str r7, [%[r], #140]\n\t"
+ "ldr r4, [%[a], #144]\n\t"
+ "ldr r5, [%[a], #148]\n\t"
+ "ldr r6, [%[a], #152]\n\t"
+ "ldr r7, [%[a], #156]\n\t"
+ "ldr r8, [%[b], #144]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "ldr r10, [%[b], #152]\n\t"
+ "ldr r14, [%[b], #156]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #144]\n\t"
+ "str r5, [%[r], #148]\n\t"
+ "str r6, [%[r], #152]\n\t"
+ "str r7, [%[r], #156]\n\t"
+ "ldr r4, [%[a], #160]\n\t"
+ "ldr r5, [%[a], #164]\n\t"
+ "ldr r6, [%[a], #168]\n\t"
+ "ldr r7, [%[a], #172]\n\t"
+ "ldr r8, [%[b], #160]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "ldr r10, [%[b], #168]\n\t"
+ "ldr r14, [%[b], #172]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #160]\n\t"
+ "str r5, [%[r], #164]\n\t"
+ "str r6, [%[r], #168]\n\t"
+ "str r7, [%[r], #172]\n\t"
+ "ldr r4, [%[a], #176]\n\t"
+ "ldr r5, [%[a], #180]\n\t"
+ "ldr r6, [%[a], #184]\n\t"
+ "ldr r7, [%[a], #188]\n\t"
+ "ldr r8, [%[b], #176]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "ldr r10, [%[b], #184]\n\t"
+ "ldr r14, [%[b], #188]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #176]\n\t"
+ "str r5, [%[r], #180]\n\t"
+ "str r6, [%[r], #184]\n\t"
+ "str r7, [%[r], #188]\n\t"
+ "ldr r4, [%[a], #192]\n\t"
+ "ldr r5, [%[a], #196]\n\t"
+ "ldr r6, [%[a], #200]\n\t"
+ "ldr r7, [%[a], #204]\n\t"
+ "ldr r8, [%[b], #192]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "ldr r10, [%[b], #200]\n\t"
+ "ldr r14, [%[b], #204]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #192]\n\t"
+ "str r5, [%[r], #196]\n\t"
+ "str r6, [%[r], #200]\n\t"
+ "str r7, [%[r], #204]\n\t"
+ "ldr r4, [%[a], #208]\n\t"
+ "ldr r5, [%[a], #212]\n\t"
+ "ldr r6, [%[a], #216]\n\t"
+ "ldr r7, [%[a], #220]\n\t"
+ "ldr r8, [%[b], #208]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "ldr r10, [%[b], #216]\n\t"
+ "ldr r14, [%[b], #220]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #208]\n\t"
+ "str r5, [%[r], #212]\n\t"
+ "str r6, [%[r], #216]\n\t"
+ "str r7, [%[r], #220]\n\t"
+ "ldr r4, [%[a], #224]\n\t"
+ "ldr r5, [%[a], #228]\n\t"
+ "ldr r6, [%[a], #232]\n\t"
+ "ldr r7, [%[a], #236]\n\t"
+ "ldr r8, [%[b], #224]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "ldr r10, [%[b], #232]\n\t"
+ "ldr r14, [%[b], #236]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #224]\n\t"
+ "str r5, [%[r], #228]\n\t"
+ "str r6, [%[r], #232]\n\t"
+ "str r7, [%[r], #236]\n\t"
+ "ldr r4, [%[a], #240]\n\t"
+ "ldr r5, [%[a], #244]\n\t"
+ "ldr r6, [%[a], #248]\n\t"
+ "ldr r7, [%[a], #252]\n\t"
+ "ldr r8, [%[b], #240]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "ldr r10, [%[b], #248]\n\t"
+ "ldr r14, [%[b], #252]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #240]\n\t"
+ "str r5, [%[r], #244]\n\t"
+ "str r6, [%[r], #248]\n\t"
+ "str r7, [%[r], #252]\n\t"
+ "ldr r4, [%[a], #256]\n\t"
+ "ldr r5, [%[a], #260]\n\t"
+ "ldr r6, [%[a], #264]\n\t"
+ "ldr r7, [%[a], #268]\n\t"
+ "ldr r8, [%[b], #256]\n\t"
+ "ldr r9, [%[b], #260]\n\t"
+ "ldr r10, [%[b], #264]\n\t"
+ "ldr r14, [%[b], #268]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #256]\n\t"
+ "str r5, [%[r], #260]\n\t"
+ "str r6, [%[r], #264]\n\t"
+ "str r7, [%[r], #268]\n\t"
+ "ldr r4, [%[a], #272]\n\t"
+ "ldr r5, [%[a], #276]\n\t"
+ "ldr r6, [%[a], #280]\n\t"
+ "ldr r7, [%[a], #284]\n\t"
+ "ldr r8, [%[b], #272]\n\t"
+ "ldr r9, [%[b], #276]\n\t"
+ "ldr r10, [%[b], #280]\n\t"
+ "ldr r14, [%[b], #284]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #272]\n\t"
+ "str r5, [%[r], #276]\n\t"
+ "str r6, [%[r], #280]\n\t"
+ "str r7, [%[r], #284]\n\t"
+ "ldr r4, [%[a], #288]\n\t"
+ "ldr r5, [%[a], #292]\n\t"
+ "ldr r6, [%[a], #296]\n\t"
+ "ldr r7, [%[a], #300]\n\t"
+ "ldr r8, [%[b], #288]\n\t"
+ "ldr r9, [%[b], #292]\n\t"
+ "ldr r10, [%[b], #296]\n\t"
+ "ldr r14, [%[b], #300]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #288]\n\t"
+ "str r5, [%[r], #292]\n\t"
+ "str r6, [%[r], #296]\n\t"
+ "str r7, [%[r], #300]\n\t"
+ "ldr r4, [%[a], #304]\n\t"
+ "ldr r5, [%[a], #308]\n\t"
+ "ldr r6, [%[a], #312]\n\t"
+ "ldr r7, [%[a], #316]\n\t"
+ "ldr r8, [%[b], #304]\n\t"
+ "ldr r9, [%[b], #308]\n\t"
+ "ldr r10, [%[b], #312]\n\t"
+ "ldr r14, [%[b], #316]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #304]\n\t"
+ "str r5, [%[r], #308]\n\t"
+ "str r6, [%[r], #312]\n\t"
+ "str r7, [%[r], #316]\n\t"
+ "ldr r4, [%[a], #320]\n\t"
+ "ldr r5, [%[a], #324]\n\t"
+ "ldr r6, [%[a], #328]\n\t"
+ "ldr r7, [%[a], #332]\n\t"
+ "ldr r8, [%[b], #320]\n\t"
+ "ldr r9, [%[b], #324]\n\t"
+ "ldr r10, [%[b], #328]\n\t"
+ "ldr r14, [%[b], #332]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #320]\n\t"
+ "str r5, [%[r], #324]\n\t"
+ "str r6, [%[r], #328]\n\t"
+ "str r7, [%[r], #332]\n\t"
+ "ldr r4, [%[a], #336]\n\t"
+ "ldr r5, [%[a], #340]\n\t"
+ "ldr r6, [%[a], #344]\n\t"
+ "ldr r7, [%[a], #348]\n\t"
+ "ldr r8, [%[b], #336]\n\t"
+ "ldr r9, [%[b], #340]\n\t"
+ "ldr r10, [%[b], #344]\n\t"
+ "ldr r14, [%[b], #348]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #336]\n\t"
+ "str r5, [%[r], #340]\n\t"
+ "str r6, [%[r], #344]\n\t"
+ "str r7, [%[r], #348]\n\t"
+ "ldr r4, [%[a], #352]\n\t"
+ "ldr r5, [%[a], #356]\n\t"
+ "ldr r6, [%[a], #360]\n\t"
+ "ldr r7, [%[a], #364]\n\t"
+ "ldr r8, [%[b], #352]\n\t"
+ "ldr r9, [%[b], #356]\n\t"
+ "ldr r10, [%[b], #360]\n\t"
+ "ldr r14, [%[b], #364]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #352]\n\t"
+ "str r5, [%[r], #356]\n\t"
+ "str r6, [%[r], #360]\n\t"
+ "str r7, [%[r], #364]\n\t"
+ "ldr r4, [%[a], #368]\n\t"
+ "ldr r5, [%[a], #372]\n\t"
+ "ldr r6, [%[a], #376]\n\t"
+ "ldr r7, [%[a], #380]\n\t"
+ "ldr r8, [%[b], #368]\n\t"
+ "ldr r9, [%[b], #372]\n\t"
+ "ldr r10, [%[b], #376]\n\t"
+ "ldr r14, [%[b], #380]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #368]\n\t"
+ "str r5, [%[r], #372]\n\t"
+ "str r6, [%[r], #376]\n\t"
+ "str r7, [%[r], #380]\n\t"
+ "adc %[c], r12, r12\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<48; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 48; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[96];
+ sp_digit a1[48];
+ sp_digit b1[48];
+ sp_digit z2[96];
+ sp_digit u, ca, cb;
+
+ ca = sp_3072_add_48(a1, a, &a[48]);
+ cb = sp_3072_add_48(b1, b, &b[48]);
+ u = ca & cb;
+ sp_3072_mul_48(z1, a1, b1);
+ sp_3072_mul_48(z2, &a[48], &b[48]);
+ sp_3072_mul_48(z0, a, b);
+ sp_3072_mask_48(r + 96, a1, 0 - cb);
+ sp_3072_mask_48(b1, b1, 0 - ca);
+ u += sp_3072_add_48(r + 96, r + 96, b1);
+ u += sp_3072_sub_in_place_96(z1, z2);
+ u += sp_3072_sub_in_place_96(z1, z0);
+ u += sp_3072_add_96(r + 48, r + 48, z1);
+ r[144] = u;
+ XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
+ (void)sp_3072_add_96(r + 96, r + 96, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[96];
+ sp_digit z1[96];
+ sp_digit a1[48];
+ sp_digit u;
+
+ u = sp_3072_add_48(a1, a, &a[48]);
+ sp_3072_sqr_48(z1, a1);
+ sp_3072_sqr_48(z2, &a[48]);
+ sp_3072_sqr_48(z0, a);
+ sp_3072_mask_48(r + 96, a1, 0 - u);
+ u += sp_3072_add_48(r + 96, r + 96, r + 96);
+ u += sp_3072_sub_in_place_96(z1, z2);
+ u += sp_3072_sub_in_place_96(z1, z0);
+ u += sp_3072_add_96(r + 48, r + 48, z1);
+ r[144] = u;
+ XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
+ (void)sp_3072_add_96(r + 96, r + 96, z2);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add r12, %[a], #384\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldr r4, [%[a]], #4\n\t"
+ "ldr r5, [%[a]], #4\n\t"
+ "ldr r6, [%[a]], #4\n\t"
+ "ldr r7, [%[a]], #4\n\t"
+ "ldr r8, [%[b]], #4\n\t"
+ "ldr r9, [%[b]], #4\n\t"
+ "ldr r10, [%[b]], #4\n\t"
+ "ldr r14, [%[b]], #4\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r]], #4\n\t"
+ "str r5, [%[r]], #4\n\t"
+ "str r6, [%[r]], #4\n\t"
+ "str r7, [%[r]], #4\n\t"
+ "mov r4, #0\n\t"
+ "adc %[c], r4, #0\n\t"
+ "cmp %[a], r12\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r14, #0\n\t"
+ "add r12, %[a], #384\n\t"
+ "\n1:\n\t"
+ "subs %[c], r14, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[b]], #4\n\t"
+ "ldr r8, [%[b]], #4\n\t"
+ "ldr r9, [%[b]], #4\n\t"
+ "ldr r10, [%[b]], #4\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "sbcs r6, r6, r10\n\t"
+ "str r3, [%[a]], #4\n\t"
+ "str r4, [%[a]], #4\n\t"
+ "str r5, [%[a]], #4\n\t"
+ "str r6, [%[a]], #4\n\t"
+ "sbc %[c], r14, r14\n\t"
+ "cmp %[a], r12\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #768\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #0\n\t"
+ "mov r7, #0\n\t"
+ "mov r8, #0\n\t"
+ "\n1:\n\t"
+ "subs r3, r5, #380\n\t"
+ "it cc\n\t"
+ "movcc r3, #0\n\t"
+ "sub r4, r5, r3\n\t"
+ "\n2:\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "ldr r12, [%[b], r4]\n\t"
+ "umull r9, r10, r14, r12\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, #0\n\t"
+ "add r3, r3, #4\n\t"
+ "sub r4, r4, #4\n\t"
+ "cmp r3, #384\n\t"
+ "beq 3f\n\t"
+ "cmp r3, r5\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "str r6, [sp, r5]\n\t"
+ "mov r6, r7\n\t"
+ "mov r7, r8\n\t"
+ "mov r8, #0\n\t"
+ "add r5, r5, #4\n\t"
+ "cmp r5, #760\n\t"
+ "ble 1b\n\t"
+ "str r6, [sp, r5]\n\t"
+ "\n4:\n\t"
+ "ldr r6, [sp, #0]\n\t"
+ "ldr r7, [sp, #4]\n\t"
+ "ldr r8, [sp, #8]\n\t"
+ "ldr r3, [sp, #12]\n\t"
+ "str r6, [%[r], #0]\n\t"
+ "str r7, [%[r], #4]\n\t"
+ "str r8, [%[r], #8]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "add sp, sp, #16\n\t"
+ "add %[r], %[r], #16\n\t"
+ "subs r5, r5, #16\n\t"
+ "bgt 4b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #768\n\t"
+ "mov r12, #0\n\t"
+ "mov r6, #0\n\t"
+ "mov r7, #0\n\t"
+ "mov r8, #0\n\t"
+ "mov r5, #0\n\t"
+ "\n1:\n\t"
+ "subs r3, r5, #380\n\t"
+ "it cc\n\t"
+ "movcc r3, r12\n\t"
+ "sub r4, r5, r3\n\t"
+ "\n2:\n\t"
+ "cmp r4, r3\n\t"
+ "beq 4f\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "ldr r9, [%[a], r4]\n\t"
+ "umull r9, r10, r14, r9\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "umull r9, r10, r14, r14\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "\n5:\n\t"
+ "add r3, r3, #4\n\t"
+ "sub r4, r4, #4\n\t"
+ "cmp r3, #384\n\t"
+ "beq 3f\n\t"
+ "cmp r3, r4\n\t"
+ "bgt 3f\n\t"
+ "cmp r3, r5\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "str r6, [sp, r5]\n\t"
+ "mov r6, r7\n\t"
+ "mov r7, r8\n\t"
+ "mov r8, #0\n\t"
+ "add r5, r5, #4\n\t"
+ "cmp r5, #760\n\t"
+ "ble 1b\n\t"
+ "str r6, [sp, r5]\n\t"
+ "\n4:\n\t"
+ "ldr r6, [sp, #0]\n\t"
+ "ldr r7, [sp, #4]\n\t"
+ "ldr r8, [sp, #8]\n\t"
+ "ldr r3, [sp, #12]\n\t"
+ "str r6, [%[r], #0]\n\t"
+ "str r7, [%[r], #4]\n\t"
+ "str r8, [%[r], #8]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "add sp, sp, #16\n\t"
+ "add %[r], %[r], #16\n\t"
+ "subs r5, r5, #16\n\t"
+ "bgt 4b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#ifdef WOLFSSL_SP_SMALL
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+ int i;
+
+ for (i=0; i<48; i++) {
+ r[i] = a[i] & m;
+ }
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add r12, %[a], #192\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldr r4, [%[a]], #4\n\t"
+ "ldr r5, [%[a]], #4\n\t"
+ "ldr r6, [%[a]], #4\n\t"
+ "ldr r7, [%[a]], #4\n\t"
+ "ldr r8, [%[b]], #4\n\t"
+ "ldr r9, [%[b]], #4\n\t"
+ "ldr r10, [%[b]], #4\n\t"
+ "ldr r14, [%[b]], #4\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r]], #4\n\t"
+ "str r5, [%[r]], #4\n\t"
+ "str r6, [%[r]], #4\n\t"
+ "str r7, [%[r]], #4\n\t"
+ "mov r4, #0\n\t"
+ "adc %[c], r4, #0\n\t"
+ "cmp %[a], r12\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r14, #0\n\t"
+ "add r12, %[a], #192\n\t"
+ "\n1:\n\t"
+ "subs %[c], r14, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[b]], #4\n\t"
+ "ldr r8, [%[b]], #4\n\t"
+ "ldr r9, [%[b]], #4\n\t"
+ "ldr r10, [%[b]], #4\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "sbcs r6, r6, r10\n\t"
+ "str r3, [%[a]], #4\n\t"
+ "str r4, [%[a]], #4\n\t"
+ "str r5, [%[a]], #4\n\t"
+ "str r6, [%[a]], #4\n\t"
+ "sbc %[c], r14, r14\n\t"
+ "cmp %[a], r12\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #384\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #0\n\t"
+ "mov r7, #0\n\t"
+ "mov r8, #0\n\t"
+ "\n1:\n\t"
+ "subs r3, r5, #188\n\t"
+ "it cc\n\t"
+ "movcc r3, #0\n\t"
+ "sub r4, r5, r3\n\t"
+ "\n2:\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "ldr r12, [%[b], r4]\n\t"
+ "umull r9, r10, r14, r12\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, #0\n\t"
+ "add r3, r3, #4\n\t"
+ "sub r4, r4, #4\n\t"
+ "cmp r3, #192\n\t"
+ "beq 3f\n\t"
+ "cmp r3, r5\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "str r6, [sp, r5]\n\t"
+ "mov r6, r7\n\t"
+ "mov r7, r8\n\t"
+ "mov r8, #0\n\t"
+ "add r5, r5, #4\n\t"
+ "cmp r5, #376\n\t"
+ "ble 1b\n\t"
+ "str r6, [sp, r5]\n\t"
+ "\n4:\n\t"
+ "ldr r6, [sp, #0]\n\t"
+ "ldr r7, [sp, #4]\n\t"
+ "ldr r8, [sp, #8]\n\t"
+ "ldr r3, [sp, #12]\n\t"
+ "str r6, [%[r], #0]\n\t"
+ "str r7, [%[r], #4]\n\t"
+ "str r8, [%[r], #8]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "add sp, sp, #16\n\t"
+ "add %[r], %[r], #16\n\t"
+ "subs r5, r5, #16\n\t"
+ "bgt 4b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #384\n\t"
+ "mov r12, #0\n\t"
+ "mov r6, #0\n\t"
+ "mov r7, #0\n\t"
+ "mov r8, #0\n\t"
+ "mov r5, #0\n\t"
+ "\n1:\n\t"
+ "subs r3, r5, #188\n\t"
+ "it cc\n\t"
+ "movcc r3, r12\n\t"
+ "sub r4, r5, r3\n\t"
+ "\n2:\n\t"
+ "cmp r4, r3\n\t"
+ "beq 4f\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "ldr r9, [%[a], r4]\n\t"
+ "umull r9, r10, r14, r9\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "umull r9, r10, r14, r14\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "\n5:\n\t"
+ "add r3, r3, #4\n\t"
+ "sub r4, r4, #4\n\t"
+ "cmp r3, #192\n\t"
+ "beq 3f\n\t"
+ "cmp r3, r4\n\t"
+ "bgt 3f\n\t"
+ "cmp r3, r5\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "str r6, [sp, r5]\n\t"
+ "mov r6, r7\n\t"
+ "mov r7, r8\n\t"
+ "mov r8, #0\n\t"
+ "add r5, r5, #4\n\t"
+ "cmp r5, #376\n\t"
+ "ble 1b\n\t"
+ "str r6, [sp, r5]\n\t"
+ "\n4:\n\t"
+ "ldr r6, [sp, #0]\n\t"
+ "ldr r7, [sp, #4]\n\t"
+ "ldr r8, [sp, #8]\n\t"
+ "ldr r3, [sp, #12]\n\t"
+ "str r6, [%[r], #0]\n\t"
+ "str r7, [%[r], #4]\n\t"
+ "str r8, [%[r], #8]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "add sp, sp, #16\n\t"
+ "add %[r], %[r], #16\n\t"
+ "subs r5, r5, #16\n\t"
+ "bgt 4b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+
+ /* rho = -1/m mod b */
+ *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r10, #0\n\t"
+ "# A[0] * B\n\t"
+ "ldr r8, [%[a]]\n\t"
+ "umull r5, r3, %[b], r8\n\t"
+ "mov r4, #0\n\t"
+ "str r5, [%[r]]\n\t"
+ "mov r5, #0\n\t"
+ "mov r9, #4\n\t"
+ "1:\n\t"
+ "ldr r8, [%[a], r9]\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], r9]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r9, r9, #4\n\t"
+ "cmp r9, #384\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r], #384]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov r10, #0\n\t"
+ "# A[0] * B\n\t"
+ "ldr r8, [%[a]]\n\t"
+ "umull r3, r4, %[b], r8\n\t"
+ "mov r5, #0\n\t"
+ "str r3, [%[r]]\n\t"
+ "# A[1] * B\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "# A[2] * B\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "# A[3] * B\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "# A[4] * B\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "# A[5] * B\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "# A[6] * B\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "# A[7] * B\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "# A[8] * B\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #32]\n\t"
+ "# A[9] * B\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "# A[10] * B\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "# A[11] * B\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #44]\n\t"
+ "# A[12] * B\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "# A[13] * B\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "# A[14] * B\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #56]\n\t"
+ "# A[15] * B\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "# A[16] * B\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "# A[17] * B\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #68]\n\t"
+ "# A[18] * B\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #72]\n\t"
+ "# A[19] * B\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "# A[20] * B\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #80]\n\t"
+ "# A[21] * B\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #84]\n\t"
+ "# A[22] * B\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "# A[23] * B\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #92]\n\t"
+ "# A[24] * B\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #96]\n\t"
+ "# A[25] * B\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #100]\n\t"
+ "# A[26] * B\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #104]\n\t"
+ "# A[27] * B\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #108]\n\t"
+ "# A[28] * B\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "# A[29] * B\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #116]\n\t"
+ "# A[30] * B\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #120]\n\t"
+ "# A[31] * B\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #124]\n\t"
+ "# A[32] * B\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #128]\n\t"
+ "# A[33] * B\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #132]\n\t"
+ "# A[34] * B\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #136]\n\t"
+ "# A[35] * B\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #140]\n\t"
+ "# A[36] * B\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #144]\n\t"
+ "# A[37] * B\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #148]\n\t"
+ "# A[38] * B\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #152]\n\t"
+ "# A[39] * B\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #156]\n\t"
+ "# A[40] * B\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #160]\n\t"
+ "# A[41] * B\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #164]\n\t"
+ "# A[42] * B\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #168]\n\t"
+ "# A[43] * B\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #172]\n\t"
+ "# A[44] * B\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #176]\n\t"
+ "# A[45] * B\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #180]\n\t"
+ "# A[46] * B\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #184]\n\t"
+ "# A[47] * B\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #188]\n\t"
+ "# A[48] * B\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #192]\n\t"
+ "# A[49] * B\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #196]\n\t"
+ "# A[50] * B\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #200]\n\t"
+ "# A[51] * B\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #204]\n\t"
+ "# A[52] * B\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #208]\n\t"
+ "# A[53] * B\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #212]\n\t"
+ "# A[54] * B\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #216]\n\t"
+ "# A[55] * B\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #220]\n\t"
+ "# A[56] * B\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #224]\n\t"
+ "# A[57] * B\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #228]\n\t"
+ "# A[58] * B\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #232]\n\t"
+ "# A[59] * B\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #236]\n\t"
+ "# A[60] * B\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #240]\n\t"
+ "# A[61] * B\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #244]\n\t"
+ "# A[62] * B\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #248]\n\t"
+ "# A[63] * B\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #252]\n\t"
+ "# A[64] * B\n\t"
+ "ldr r8, [%[a], #256]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #256]\n\t"
+ "# A[65] * B\n\t"
+ "ldr r8, [%[a], #260]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #260]\n\t"
+ "# A[66] * B\n\t"
+ "ldr r8, [%[a], #264]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #264]\n\t"
+ "# A[67] * B\n\t"
+ "ldr r8, [%[a], #268]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #268]\n\t"
+ "# A[68] * B\n\t"
+ "ldr r8, [%[a], #272]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #272]\n\t"
+ "# A[69] * B\n\t"
+ "ldr r8, [%[a], #276]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #276]\n\t"
+ "# A[70] * B\n\t"
+ "ldr r8, [%[a], #280]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #280]\n\t"
+ "# A[71] * B\n\t"
+ "ldr r8, [%[a], #284]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #284]\n\t"
+ "# A[72] * B\n\t"
+ "ldr r8, [%[a], #288]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #288]\n\t"
+ "# A[73] * B\n\t"
+ "ldr r8, [%[a], #292]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #292]\n\t"
+ "# A[74] * B\n\t"
+ "ldr r8, [%[a], #296]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #296]\n\t"
+ "# A[75] * B\n\t"
+ "ldr r8, [%[a], #300]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #300]\n\t"
+ "# A[76] * B\n\t"
+ "ldr r8, [%[a], #304]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #304]\n\t"
+ "# A[77] * B\n\t"
+ "ldr r8, [%[a], #308]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #308]\n\t"
+ "# A[78] * B\n\t"
+ "ldr r8, [%[a], #312]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #312]\n\t"
+ "# A[79] * B\n\t"
+ "ldr r8, [%[a], #316]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #316]\n\t"
+ "# A[80] * B\n\t"
+ "ldr r8, [%[a], #320]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #320]\n\t"
+ "# A[81] * B\n\t"
+ "ldr r8, [%[a], #324]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #324]\n\t"
+ "# A[82] * B\n\t"
+ "ldr r8, [%[a], #328]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #328]\n\t"
+ "# A[83] * B\n\t"
+ "ldr r8, [%[a], #332]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #332]\n\t"
+ "# A[84] * B\n\t"
+ "ldr r8, [%[a], #336]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #336]\n\t"
+ "# A[85] * B\n\t"
+ "ldr r8, [%[a], #340]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #340]\n\t"
+ "# A[86] * B\n\t"
+ "ldr r8, [%[a], #344]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #344]\n\t"
+ "# A[87] * B\n\t"
+ "ldr r8, [%[a], #348]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #348]\n\t"
+ "# A[88] * B\n\t"
+ "ldr r8, [%[a], #352]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #352]\n\t"
+ "# A[89] * B\n\t"
+ "ldr r8, [%[a], #356]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #356]\n\t"
+ "# A[90] * B\n\t"
+ "ldr r8, [%[a], #360]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #360]\n\t"
+ "# A[91] * B\n\t"
+ "ldr r8, [%[a], #364]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #364]\n\t"
+ "# A[92] * B\n\t"
+ "ldr r8, [%[a], #368]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #368]\n\t"
+ "# A[93] * B\n\t"
+ "ldr r8, [%[a], #372]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #372]\n\t"
+ "# A[94] * B\n\t"
+ "ldr r8, [%[a], #376]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #376]\n\t"
+ "# A[95] * B\n\t"
+ "ldr r8, [%[a], #380]\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r5, [%[r], #380]\n\t"
+ "str r3, [%[r], #384]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+#endif
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 3072 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 48);
+
+ /* r = 2^n mod m */
+ sp_3072_sub_in_place_48(r, m);
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ sp_digit c = 0;
+
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r9, #0\n\t"
+ "mov r8, #0\n\t"
+ "1:\n\t"
+ "subs %[c], r9, %[c]\n\t"
+ "ldr r4, [%[a], r8]\n\t"
+ "ldr r5, [%[b], r8]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbc %[c], r9, r9\n\t"
+ "str r4, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, #192\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#else
+ __asm__ __volatile__ (
+
+ "mov r9, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "subs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r6, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r6, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r6, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r7, [%[b], #36]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r6, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r6, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r7, [%[b], #44]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "str r6, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r6, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r7, [%[b], #52]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "str r6, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r6, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r7, [%[b], #60]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "str r6, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r6, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "ldr r7, [%[b], #68]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "str r6, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r6, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "ldr r7, [%[b], #76]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "str r6, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r6, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "ldr r7, [%[b], #84]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "str r6, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r6, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "ldr r7, [%[b], #92]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "str r6, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r6, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "ldr r7, [%[b], #100]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "str r6, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r6, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "ldr r7, [%[b], #108]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "str r6, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r6, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "ldr r7, [%[b], #116]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "str r6, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r6, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "ldr r7, [%[b], #124]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "str r6, [%[r], #124]\n\t"
+ "ldr r4, [%[a], #128]\n\t"
+ "ldr r6, [%[a], #132]\n\t"
+ "ldr r5, [%[b], #128]\n\t"
+ "ldr r7, [%[b], #132]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #128]\n\t"
+ "str r6, [%[r], #132]\n\t"
+ "ldr r4, [%[a], #136]\n\t"
+ "ldr r6, [%[a], #140]\n\t"
+ "ldr r5, [%[b], #136]\n\t"
+ "ldr r7, [%[b], #140]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #136]\n\t"
+ "str r6, [%[r], #140]\n\t"
+ "ldr r4, [%[a], #144]\n\t"
+ "ldr r6, [%[a], #148]\n\t"
+ "ldr r5, [%[b], #144]\n\t"
+ "ldr r7, [%[b], #148]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #144]\n\t"
+ "str r6, [%[r], #148]\n\t"
+ "ldr r4, [%[a], #152]\n\t"
+ "ldr r6, [%[a], #156]\n\t"
+ "ldr r5, [%[b], #152]\n\t"
+ "ldr r7, [%[b], #156]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #152]\n\t"
+ "str r6, [%[r], #156]\n\t"
+ "ldr r4, [%[a], #160]\n\t"
+ "ldr r6, [%[a], #164]\n\t"
+ "ldr r5, [%[b], #160]\n\t"
+ "ldr r7, [%[b], #164]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #160]\n\t"
+ "str r6, [%[r], #164]\n\t"
+ "ldr r4, [%[a], #168]\n\t"
+ "ldr r6, [%[a], #172]\n\t"
+ "ldr r5, [%[b], #168]\n\t"
+ "ldr r7, [%[b], #172]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #168]\n\t"
+ "str r6, [%[r], #172]\n\t"
+ "ldr r4, [%[a], #176]\n\t"
+ "ldr r6, [%[a], #180]\n\t"
+ "ldr r5, [%[b], #176]\n\t"
+ "ldr r7, [%[b], #180]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #176]\n\t"
+ "str r6, [%[r], #180]\n\t"
+ "ldr r4, [%[a], #184]\n\t"
+ "ldr r6, [%[a], #188]\n\t"
+ "ldr r5, [%[b], #184]\n\t"
+ "ldr r7, [%[b], #188]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #184]\n\t"
+ "str r6, [%[r], #188]\n\t"
+ "sbc %[c], r9, r9\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#endif /* WOLFSSL_SP_SMALL */
+
+ return c;
+}
+
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "# i = 0\n\t"
+ "mov r12, #0\n\t"
+ "ldr r10, [%[a], #0]\n\t"
+ "ldr r14, [%[a], #4]\n\t"
+ "\n1:\n\t"
+ "# mu = a[i] * mp\n\t"
+ "mul r8, %[mp], r10\n\t"
+ "# a[i+0] += m[0] * mu\n\t"
+ "ldr r7, [%[m], #0]\n\t"
+ "ldr r9, [%[a], #0]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r10, r10, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "# a[i+1] += m[1] * mu\n\t"
+ "ldr r7, [%[m], #4]\n\t"
+ "ldr r9, [%[a], #4]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r10, r14, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r10, r10, r5\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+2] += m[2] * mu\n\t"
+ "ldr r7, [%[m], #8]\n\t"
+ "ldr r14, [%[a], #8]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r14, r14, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r14, r14, r4\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+3] += m[3] * mu\n\t"
+ "ldr r7, [%[m], #12]\n\t"
+ "ldr r9, [%[a], #12]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #12]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+4] += m[4] * mu\n\t"
+ "ldr r7, [%[m], #16]\n\t"
+ "ldr r9, [%[a], #16]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #16]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+5] += m[5] * mu\n\t"
+ "ldr r7, [%[m], #20]\n\t"
+ "ldr r9, [%[a], #20]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #20]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+6] += m[6] * mu\n\t"
+ "ldr r7, [%[m], #24]\n\t"
+ "ldr r9, [%[a], #24]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #24]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+7] += m[7] * mu\n\t"
+ "ldr r7, [%[m], #28]\n\t"
+ "ldr r9, [%[a], #28]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #28]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+8] += m[8] * mu\n\t"
+ "ldr r7, [%[m], #32]\n\t"
+ "ldr r9, [%[a], #32]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #32]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+9] += m[9] * mu\n\t"
+ "ldr r7, [%[m], #36]\n\t"
+ "ldr r9, [%[a], #36]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #36]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+10] += m[10] * mu\n\t"
+ "ldr r7, [%[m], #40]\n\t"
+ "ldr r9, [%[a], #40]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #40]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+11] += m[11] * mu\n\t"
+ "ldr r7, [%[m], #44]\n\t"
+ "ldr r9, [%[a], #44]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #44]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+12] += m[12] * mu\n\t"
+ "ldr r7, [%[m], #48]\n\t"
+ "ldr r9, [%[a], #48]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #48]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+13] += m[13] * mu\n\t"
+ "ldr r7, [%[m], #52]\n\t"
+ "ldr r9, [%[a], #52]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #52]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+14] += m[14] * mu\n\t"
+ "ldr r7, [%[m], #56]\n\t"
+ "ldr r9, [%[a], #56]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #56]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+15] += m[15] * mu\n\t"
+ "ldr r7, [%[m], #60]\n\t"
+ "ldr r9, [%[a], #60]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #60]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+16] += m[16] * mu\n\t"
+ "ldr r7, [%[m], #64]\n\t"
+ "ldr r9, [%[a], #64]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #64]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+17] += m[17] * mu\n\t"
+ "ldr r7, [%[m], #68]\n\t"
+ "ldr r9, [%[a], #68]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #68]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+18] += m[18] * mu\n\t"
+ "ldr r7, [%[m], #72]\n\t"
+ "ldr r9, [%[a], #72]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #72]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+19] += m[19] * mu\n\t"
+ "ldr r7, [%[m], #76]\n\t"
+ "ldr r9, [%[a], #76]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #76]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+20] += m[20] * mu\n\t"
+ "ldr r7, [%[m], #80]\n\t"
+ "ldr r9, [%[a], #80]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #80]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+21] += m[21] * mu\n\t"
+ "ldr r7, [%[m], #84]\n\t"
+ "ldr r9, [%[a], #84]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #84]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+22] += m[22] * mu\n\t"
+ "ldr r7, [%[m], #88]\n\t"
+ "ldr r9, [%[a], #88]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #88]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+23] += m[23] * mu\n\t"
+ "ldr r7, [%[m], #92]\n\t"
+ "ldr r9, [%[a], #92]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #92]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+24] += m[24] * mu\n\t"
+ "ldr r7, [%[m], #96]\n\t"
+ "ldr r9, [%[a], #96]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #96]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+25] += m[25] * mu\n\t"
+ "ldr r7, [%[m], #100]\n\t"
+ "ldr r9, [%[a], #100]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #100]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+26] += m[26] * mu\n\t"
+ "ldr r7, [%[m], #104]\n\t"
+ "ldr r9, [%[a], #104]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #104]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+27] += m[27] * mu\n\t"
+ "ldr r7, [%[m], #108]\n\t"
+ "ldr r9, [%[a], #108]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #108]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+28] += m[28] * mu\n\t"
+ "ldr r7, [%[m], #112]\n\t"
+ "ldr r9, [%[a], #112]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #112]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+29] += m[29] * mu\n\t"
+ "ldr r7, [%[m], #116]\n\t"
+ "ldr r9, [%[a], #116]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #116]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+30] += m[30] * mu\n\t"
+ "ldr r7, [%[m], #120]\n\t"
+ "ldr r9, [%[a], #120]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #120]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+31] += m[31] * mu\n\t"
+ "ldr r7, [%[m], #124]\n\t"
+ "ldr r9, [%[a], #124]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #124]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+32] += m[32] * mu\n\t"
+ "ldr r7, [%[m], #128]\n\t"
+ "ldr r9, [%[a], #128]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #128]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+33] += m[33] * mu\n\t"
+ "ldr r7, [%[m], #132]\n\t"
+ "ldr r9, [%[a], #132]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #132]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+34] += m[34] * mu\n\t"
+ "ldr r7, [%[m], #136]\n\t"
+ "ldr r9, [%[a], #136]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #136]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+35] += m[35] * mu\n\t"
+ "ldr r7, [%[m], #140]\n\t"
+ "ldr r9, [%[a], #140]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #140]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+36] += m[36] * mu\n\t"
+ "ldr r7, [%[m], #144]\n\t"
+ "ldr r9, [%[a], #144]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #144]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+37] += m[37] * mu\n\t"
+ "ldr r7, [%[m], #148]\n\t"
+ "ldr r9, [%[a], #148]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #148]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+38] += m[38] * mu\n\t"
+ "ldr r7, [%[m], #152]\n\t"
+ "ldr r9, [%[a], #152]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #152]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+39] += m[39] * mu\n\t"
+ "ldr r7, [%[m], #156]\n\t"
+ "ldr r9, [%[a], #156]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #156]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+40] += m[40] * mu\n\t"
+ "ldr r7, [%[m], #160]\n\t"
+ "ldr r9, [%[a], #160]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #160]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+41] += m[41] * mu\n\t"
+ "ldr r7, [%[m], #164]\n\t"
+ "ldr r9, [%[a], #164]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #164]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+42] += m[42] * mu\n\t"
+ "ldr r7, [%[m], #168]\n\t"
+ "ldr r9, [%[a], #168]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #168]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+43] += m[43] * mu\n\t"
+ "ldr r7, [%[m], #172]\n\t"
+ "ldr r9, [%[a], #172]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #172]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+44] += m[44] * mu\n\t"
+ "ldr r7, [%[m], #176]\n\t"
+ "ldr r9, [%[a], #176]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #176]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+45] += m[45] * mu\n\t"
+ "ldr r7, [%[m], #180]\n\t"
+ "ldr r9, [%[a], #180]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #180]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+46] += m[46] * mu\n\t"
+ "ldr r7, [%[m], #184]\n\t"
+ "ldr r9, [%[a], #184]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #184]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+47] += m[47] * mu\n\t"
+ "ldr r7, [%[m], #188]\n\t"
+ "ldr r9, [%[a], #188]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r7, r7, %[ca]\n\t"
+ "mov %[ca], #0\n\t"
+ "adc %[ca], %[ca], %[ca]\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #188]\n\t"
+ "ldr r9, [%[a], #192]\n\t"
+ "adcs r9, r9, r7\n\t"
+ "str r9, [%[a], #192]\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ "# i += 1\n\t"
+ "add %[a], %[a], #4\n\t"
+ "add r12, r12, #4\n\t"
+ "cmp r12, #192\n\t"
+ "blt 1b\n\t"
+ "str r10, [%[a], #0]\n\t"
+ "str r14, [%[a], #4]\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_3072_mul_48(r, a, b);
+ sp_3072_mont_reduce_48(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_3072_sqr_48(r, a);
+ sp_3072_mont_reduce_48(r, m, mp);
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r10, #0\n\t"
+ "# A[0] * B\n\t"
+ "ldr r8, [%[a]]\n\t"
+ "umull r5, r3, %[b], r8\n\t"
+ "mov r4, #0\n\t"
+ "str r5, [%[r]]\n\t"
+ "mov r5, #0\n\t"
+ "mov r9, #4\n\t"
+ "1:\n\t"
+ "ldr r8, [%[a], r9]\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], r9]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r9, r9, #4\n\t"
+ "cmp r9, #192\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r], #192]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov r10, #0\n\t"
+ "# A[0] * B\n\t"
+ "ldr r8, [%[a]]\n\t"
+ "umull r3, r4, %[b], r8\n\t"
+ "mov r5, #0\n\t"
+ "str r3, [%[r]]\n\t"
+ "# A[1] * B\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "# A[2] * B\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "# A[3] * B\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "# A[4] * B\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "# A[5] * B\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "# A[6] * B\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "# A[7] * B\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "# A[8] * B\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #32]\n\t"
+ "# A[9] * B\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "# A[10] * B\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "# A[11] * B\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #44]\n\t"
+ "# A[12] * B\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "# A[13] * B\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "# A[14] * B\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #56]\n\t"
+ "# A[15] * B\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "# A[16] * B\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "# A[17] * B\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #68]\n\t"
+ "# A[18] * B\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #72]\n\t"
+ "# A[19] * B\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "# A[20] * B\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #80]\n\t"
+ "# A[21] * B\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #84]\n\t"
+ "# A[22] * B\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "# A[23] * B\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #92]\n\t"
+ "# A[24] * B\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #96]\n\t"
+ "# A[25] * B\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #100]\n\t"
+ "# A[26] * B\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #104]\n\t"
+ "# A[27] * B\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #108]\n\t"
+ "# A[28] * B\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "# A[29] * B\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #116]\n\t"
+ "# A[30] * B\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #120]\n\t"
+ "# A[31] * B\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #124]\n\t"
+ "# A[32] * B\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #128]\n\t"
+ "# A[33] * B\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #132]\n\t"
+ "# A[34] * B\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #136]\n\t"
+ "# A[35] * B\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #140]\n\t"
+ "# A[36] * B\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #144]\n\t"
+ "# A[37] * B\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #148]\n\t"
+ "# A[38] * B\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #152]\n\t"
+ "# A[39] * B\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #156]\n\t"
+ "# A[40] * B\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #160]\n\t"
+ "# A[41] * B\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #164]\n\t"
+ "# A[42] * B\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #168]\n\t"
+ "# A[43] * B\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #172]\n\t"
+ "# A[44] * B\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #176]\n\t"
+ "# A[45] * B\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #180]\n\t"
+ "# A[46] * B\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #184]\n\t"
+ "# A[47] * B\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r5, [%[r], #188]\n\t"
+ "str r3, [%[r], #192]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+#endif
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r5, %[div], #1\n\t"
+ "add r5, r5, #1\n\t"
+ "mov r6, %[d0]\n\t"
+ "mov r7, %[d1]\n\t"
+ "# Do top 32\n\t"
+ "subs r8, r5, r7\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "and r8, r8, r5\n\t"
+ "subs r7, r7, r8\n\t"
+ "# Next 30 bits\n\t"
+ "mov r4, #29\n\t"
+ "1:\n\t"
+ "movs r6, r6, lsl #1\n\t"
+ "adc r7, r7, r7\n\t"
+ "subs r8, r5, r7\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "and r8, r8, r5\n\t"
+ "subs r7, r7, r8\n\t"
+ "subs r4, r4, #1\n\t"
+ "bpl 1b\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "add %[r], %[r], #1\n\t"
+ "umull r4, r5, %[r], %[div]\n\t"
+ "subs r4, %[d0], r4\n\t"
+ "sbc r5, %[d1], r5\n\t"
+ "add %[r], %[r], r5\n\t"
+ "umull r4, r5, %[r], %[div]\n\t"
+ "subs r4, %[d0], r4\n\t"
+ "sbc r5, %[d1], r5\n\t"
+ "add %[r], %[r], r5\n\t"
+ "subs r8, %[div], r4\n\t"
+ "sbc r8, r8, r8\n\t"
+ "sub %[r], %[r], r8\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r6", "r7", "r8"
+ );
+ return r;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static int32_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = -1;
+ sp_digit one = 1;
+
+
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r7, #0\n\t"
+ "mov r3, #-1\n\t"
+ "mov r6, #188\n\t"
+ "1:\n\t"
+ "ldr r4, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "subs r6, r6, #4\n\t"
+ "bcs 1b\n\t"
+ "eor %[r], %[r], r3\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [one] "r" (one)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov r7, #0\n\t"
+ "mov r3, #-1\n\t"
+ "ldr r4, [%[a], #188]\n\t"
+ "ldr r5, [%[b], #188]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #184]\n\t"
+ "ldr r5, [%[b], #184]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #180]\n\t"
+ "ldr r5, [%[b], #180]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #176]\n\t"
+ "ldr r5, [%[b], #176]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #172]\n\t"
+ "ldr r5, [%[b], #172]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #168]\n\t"
+ "ldr r5, [%[b], #168]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #164]\n\t"
+ "ldr r5, [%[b], #164]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #160]\n\t"
+ "ldr r5, [%[b], #160]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #156]\n\t"
+ "ldr r5, [%[b], #156]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #152]\n\t"
+ "ldr r5, [%[b], #152]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #148]\n\t"
+ "ldr r5, [%[b], #148]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #144]\n\t"
+ "ldr r5, [%[b], #144]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #140]\n\t"
+ "ldr r5, [%[b], #140]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #136]\n\t"
+ "ldr r5, [%[b], #136]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #132]\n\t"
+ "ldr r5, [%[b], #132]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #128]\n\t"
+ "ldr r5, [%[b], #128]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #124]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #116]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #108]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #100]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #92]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #84]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #76]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #68]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "eor %[r], %[r], r3\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [one] "r" (one)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+#endif
+
+ return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[96], t2[49];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+
+ div = d[47];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
+ for (i=47; i>=0; i--) {
+ r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div);
+
+ sp_3072_mul_d_48(t2, d, r1);
+ t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2);
+ t1[48 + i] -= t2[48];
+ sp_3072_mask_48(t2, d, t1[48 + i]);
+ t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
+ sp_3072_mask_48(t2, d, t1[48 + i]);
+ t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_3072_cmp_48(t1, d) >= 0;
+ sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_3072_div_48(a, m, NULL, r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[16][96];
+#else
+ sp_digit* t[16];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 96, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<16; i++) {
+ t[i] = td + i * 96;
+ }
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_48(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
+ if (reduceA != 0) {
+ err = sp_3072_mod_48(t[1] + 48, a, m);
+ if (err == MP_OKAY) {
+ err = sp_3072_mod_48(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
+ err = sp_3072_mod_48(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 4;
+ if (c == 32) {
+ c = 28;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
+ for (; i>=0 || c>=4; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 28;
+ n <<= 4;
+ c = 28;
+ }
+ else if (c < 4) {
+ y = n >> 28;
+ n = e[i--];
+ c = 4 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+ }
+
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+
+ sp_3072_mont_mul_48(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
+ sp_3072_mont_reduce_48(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
+ sp_3072_cond_sub_48(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][96];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 96, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++) {
+ t[i] = td + i * 96;
+ }
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_48(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
+ if (reduceA != 0) {
+ err = sp_3072_mod_48(t[1] + 48, a, m);
+ if (err == MP_OKAY) {
+ err = sp_3072_mod_48(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
+ err = sp_3072_mod_48(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
+ sp_3072_mont_sqr_48(t[16], t[ 8], m, mp);
+ sp_3072_mont_mul_48(t[17], t[ 9], t[ 8], m, mp);
+ sp_3072_mont_sqr_48(t[18], t[ 9], m, mp);
+ sp_3072_mont_mul_48(t[19], t[10], t[ 9], m, mp);
+ sp_3072_mont_sqr_48(t[20], t[10], m, mp);
+ sp_3072_mont_mul_48(t[21], t[11], t[10], m, mp);
+ sp_3072_mont_sqr_48(t[22], t[11], m, mp);
+ sp_3072_mont_mul_48(t[23], t[12], t[11], m, mp);
+ sp_3072_mont_sqr_48(t[24], t[12], m, mp);
+ sp_3072_mont_mul_48(t[25], t[13], t[12], m, mp);
+ sp_3072_mont_sqr_48(t[26], t[13], m, mp);
+ sp_3072_mont_mul_48(t[27], t[14], t[13], m, mp);
+ sp_3072_mont_sqr_48(t[28], t[14], m, mp);
+ sp_3072_mont_mul_48(t[29], t[15], t[14], m, mp);
+ sp_3072_mont_sqr_48(t[30], t[15], m, mp);
+ sp_3072_mont_mul_48(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+
+ sp_3072_mont_mul_48(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
+ sp_3072_mont_reduce_48(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
+ sp_3072_cond_sub_48(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 3072 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 96);
+
+ /* r = 2^n mod m */
+ sp_3072_sub_in_place_96(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ sp_digit c = 0;
+
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r9, #0\n\t"
+ "mov r8, #0\n\t"
+ "1:\n\t"
+ "subs %[c], r9, %[c]\n\t"
+ "ldr r4, [%[a], r8]\n\t"
+ "ldr r5, [%[b], r8]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbc %[c], r9, r9\n\t"
+ "str r4, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, #384\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#else
+ __asm__ __volatile__ (
+
+ "mov r9, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "subs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r6, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r6, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r6, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r7, [%[b], #36]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r6, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r6, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r7, [%[b], #44]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "str r6, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r6, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r7, [%[b], #52]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "str r6, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r6, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r7, [%[b], #60]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "str r6, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r6, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "ldr r7, [%[b], #68]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "str r6, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r6, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "ldr r7, [%[b], #76]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "str r6, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r6, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "ldr r7, [%[b], #84]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "str r6, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r6, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "ldr r7, [%[b], #92]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "str r6, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r6, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "ldr r7, [%[b], #100]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "str r6, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r6, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "ldr r7, [%[b], #108]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "str r6, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r6, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "ldr r7, [%[b], #116]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "str r6, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r6, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "ldr r7, [%[b], #124]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "str r6, [%[r], #124]\n\t"
+ "ldr r4, [%[a], #128]\n\t"
+ "ldr r6, [%[a], #132]\n\t"
+ "ldr r5, [%[b], #128]\n\t"
+ "ldr r7, [%[b], #132]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #128]\n\t"
+ "str r6, [%[r], #132]\n\t"
+ "ldr r4, [%[a], #136]\n\t"
+ "ldr r6, [%[a], #140]\n\t"
+ "ldr r5, [%[b], #136]\n\t"
+ "ldr r7, [%[b], #140]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #136]\n\t"
+ "str r6, [%[r], #140]\n\t"
+ "ldr r4, [%[a], #144]\n\t"
+ "ldr r6, [%[a], #148]\n\t"
+ "ldr r5, [%[b], #144]\n\t"
+ "ldr r7, [%[b], #148]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #144]\n\t"
+ "str r6, [%[r], #148]\n\t"
+ "ldr r4, [%[a], #152]\n\t"
+ "ldr r6, [%[a], #156]\n\t"
+ "ldr r5, [%[b], #152]\n\t"
+ "ldr r7, [%[b], #156]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #152]\n\t"
+ "str r6, [%[r], #156]\n\t"
+ "ldr r4, [%[a], #160]\n\t"
+ "ldr r6, [%[a], #164]\n\t"
+ "ldr r5, [%[b], #160]\n\t"
+ "ldr r7, [%[b], #164]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #160]\n\t"
+ "str r6, [%[r], #164]\n\t"
+ "ldr r4, [%[a], #168]\n\t"
+ "ldr r6, [%[a], #172]\n\t"
+ "ldr r5, [%[b], #168]\n\t"
+ "ldr r7, [%[b], #172]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #168]\n\t"
+ "str r6, [%[r], #172]\n\t"
+ "ldr r4, [%[a], #176]\n\t"
+ "ldr r6, [%[a], #180]\n\t"
+ "ldr r5, [%[b], #176]\n\t"
+ "ldr r7, [%[b], #180]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #176]\n\t"
+ "str r6, [%[r], #180]\n\t"
+ "ldr r4, [%[a], #184]\n\t"
+ "ldr r6, [%[a], #188]\n\t"
+ "ldr r5, [%[b], #184]\n\t"
+ "ldr r7, [%[b], #188]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #184]\n\t"
+ "str r6, [%[r], #188]\n\t"
+ "ldr r4, [%[a], #192]\n\t"
+ "ldr r6, [%[a], #196]\n\t"
+ "ldr r5, [%[b], #192]\n\t"
+ "ldr r7, [%[b], #196]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #192]\n\t"
+ "str r6, [%[r], #196]\n\t"
+ "ldr r4, [%[a], #200]\n\t"
+ "ldr r6, [%[a], #204]\n\t"
+ "ldr r5, [%[b], #200]\n\t"
+ "ldr r7, [%[b], #204]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #200]\n\t"
+ "str r6, [%[r], #204]\n\t"
+ "ldr r4, [%[a], #208]\n\t"
+ "ldr r6, [%[a], #212]\n\t"
+ "ldr r5, [%[b], #208]\n\t"
+ "ldr r7, [%[b], #212]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #208]\n\t"
+ "str r6, [%[r], #212]\n\t"
+ "ldr r4, [%[a], #216]\n\t"
+ "ldr r6, [%[a], #220]\n\t"
+ "ldr r5, [%[b], #216]\n\t"
+ "ldr r7, [%[b], #220]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #216]\n\t"
+ "str r6, [%[r], #220]\n\t"
+ "ldr r4, [%[a], #224]\n\t"
+ "ldr r6, [%[a], #228]\n\t"
+ "ldr r5, [%[b], #224]\n\t"
+ "ldr r7, [%[b], #228]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #224]\n\t"
+ "str r6, [%[r], #228]\n\t"
+ "ldr r4, [%[a], #232]\n\t"
+ "ldr r6, [%[a], #236]\n\t"
+ "ldr r5, [%[b], #232]\n\t"
+ "ldr r7, [%[b], #236]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #232]\n\t"
+ "str r6, [%[r], #236]\n\t"
+ "ldr r4, [%[a], #240]\n\t"
+ "ldr r6, [%[a], #244]\n\t"
+ "ldr r5, [%[b], #240]\n\t"
+ "ldr r7, [%[b], #244]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #240]\n\t"
+ "str r6, [%[r], #244]\n\t"
+ "ldr r4, [%[a], #248]\n\t"
+ "ldr r6, [%[a], #252]\n\t"
+ "ldr r5, [%[b], #248]\n\t"
+ "ldr r7, [%[b], #252]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #248]\n\t"
+ "str r6, [%[r], #252]\n\t"
+ "ldr r4, [%[a], #256]\n\t"
+ "ldr r6, [%[a], #260]\n\t"
+ "ldr r5, [%[b], #256]\n\t"
+ "ldr r7, [%[b], #260]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #256]\n\t"
+ "str r6, [%[r], #260]\n\t"
+ "ldr r4, [%[a], #264]\n\t"
+ "ldr r6, [%[a], #268]\n\t"
+ "ldr r5, [%[b], #264]\n\t"
+ "ldr r7, [%[b], #268]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #264]\n\t"
+ "str r6, [%[r], #268]\n\t"
+ "ldr r4, [%[a], #272]\n\t"
+ "ldr r6, [%[a], #276]\n\t"
+ "ldr r5, [%[b], #272]\n\t"
+ "ldr r7, [%[b], #276]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #272]\n\t"
+ "str r6, [%[r], #276]\n\t"
+ "ldr r4, [%[a], #280]\n\t"
+ "ldr r6, [%[a], #284]\n\t"
+ "ldr r5, [%[b], #280]\n\t"
+ "ldr r7, [%[b], #284]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #280]\n\t"
+ "str r6, [%[r], #284]\n\t"
+ "ldr r4, [%[a], #288]\n\t"
+ "ldr r6, [%[a], #292]\n\t"
+ "ldr r5, [%[b], #288]\n\t"
+ "ldr r7, [%[b], #292]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #288]\n\t"
+ "str r6, [%[r], #292]\n\t"
+ "ldr r4, [%[a], #296]\n\t"
+ "ldr r6, [%[a], #300]\n\t"
+ "ldr r5, [%[b], #296]\n\t"
+ "ldr r7, [%[b], #300]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #296]\n\t"
+ "str r6, [%[r], #300]\n\t"
+ "ldr r4, [%[a], #304]\n\t"
+ "ldr r6, [%[a], #308]\n\t"
+ "ldr r5, [%[b], #304]\n\t"
+ "ldr r7, [%[b], #308]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #304]\n\t"
+ "str r6, [%[r], #308]\n\t"
+ "ldr r4, [%[a], #312]\n\t"
+ "ldr r6, [%[a], #316]\n\t"
+ "ldr r5, [%[b], #312]\n\t"
+ "ldr r7, [%[b], #316]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #312]\n\t"
+ "str r6, [%[r], #316]\n\t"
+ "ldr r4, [%[a], #320]\n\t"
+ "ldr r6, [%[a], #324]\n\t"
+ "ldr r5, [%[b], #320]\n\t"
+ "ldr r7, [%[b], #324]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #320]\n\t"
+ "str r6, [%[r], #324]\n\t"
+ "ldr r4, [%[a], #328]\n\t"
+ "ldr r6, [%[a], #332]\n\t"
+ "ldr r5, [%[b], #328]\n\t"
+ "ldr r7, [%[b], #332]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #328]\n\t"
+ "str r6, [%[r], #332]\n\t"
+ "ldr r4, [%[a], #336]\n\t"
+ "ldr r6, [%[a], #340]\n\t"
+ "ldr r5, [%[b], #336]\n\t"
+ "ldr r7, [%[b], #340]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #336]\n\t"
+ "str r6, [%[r], #340]\n\t"
+ "ldr r4, [%[a], #344]\n\t"
+ "ldr r6, [%[a], #348]\n\t"
+ "ldr r5, [%[b], #344]\n\t"
+ "ldr r7, [%[b], #348]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #344]\n\t"
+ "str r6, [%[r], #348]\n\t"
+ "ldr r4, [%[a], #352]\n\t"
+ "ldr r6, [%[a], #356]\n\t"
+ "ldr r5, [%[b], #352]\n\t"
+ "ldr r7, [%[b], #356]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #352]\n\t"
+ "str r6, [%[r], #356]\n\t"
+ "ldr r4, [%[a], #360]\n\t"
+ "ldr r6, [%[a], #364]\n\t"
+ "ldr r5, [%[b], #360]\n\t"
+ "ldr r7, [%[b], #364]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #360]\n\t"
+ "str r6, [%[r], #364]\n\t"
+ "ldr r4, [%[a], #368]\n\t"
+ "ldr r6, [%[a], #372]\n\t"
+ "ldr r5, [%[b], #368]\n\t"
+ "ldr r7, [%[b], #372]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #368]\n\t"
+ "str r6, [%[r], #372]\n\t"
+ "ldr r4, [%[a], #376]\n\t"
+ "ldr r6, [%[a], #380]\n\t"
+ "ldr r5, [%[b], #376]\n\t"
+ "ldr r7, [%[b], #380]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #376]\n\t"
+ "str r6, [%[r], #380]\n\t"
+ "sbc %[c], r9, r9\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#endif /* WOLFSSL_SP_SMALL */
+
+ return c;
+}
+
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "# i = 0\n\t"
+ "mov r12, #0\n\t"
+ "ldr r10, [%[a], #0]\n\t"
+ "ldr r14, [%[a], #4]\n\t"
+ "\n1:\n\t"
+ "# mu = a[i] * mp\n\t"
+ "mul r8, %[mp], r10\n\t"
+ "# a[i+0] += m[0] * mu\n\t"
+ "ldr r7, [%[m], #0]\n\t"
+ "ldr r9, [%[a], #0]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r10, r10, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "# a[i+1] += m[1] * mu\n\t"
+ "ldr r7, [%[m], #4]\n\t"
+ "ldr r9, [%[a], #4]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r10, r14, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r10, r10, r5\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+2] += m[2] * mu\n\t"
+ "ldr r7, [%[m], #8]\n\t"
+ "ldr r14, [%[a], #8]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r14, r14, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r14, r14, r4\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+3] += m[3] * mu\n\t"
+ "ldr r7, [%[m], #12]\n\t"
+ "ldr r9, [%[a], #12]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #12]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+4] += m[4] * mu\n\t"
+ "ldr r7, [%[m], #16]\n\t"
+ "ldr r9, [%[a], #16]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #16]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+5] += m[5] * mu\n\t"
+ "ldr r7, [%[m], #20]\n\t"
+ "ldr r9, [%[a], #20]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #20]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+6] += m[6] * mu\n\t"
+ "ldr r7, [%[m], #24]\n\t"
+ "ldr r9, [%[a], #24]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #24]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+7] += m[7] * mu\n\t"
+ "ldr r7, [%[m], #28]\n\t"
+ "ldr r9, [%[a], #28]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #28]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+8] += m[8] * mu\n\t"
+ "ldr r7, [%[m], #32]\n\t"
+ "ldr r9, [%[a], #32]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #32]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+9] += m[9] * mu\n\t"
+ "ldr r7, [%[m], #36]\n\t"
+ "ldr r9, [%[a], #36]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #36]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+10] += m[10] * mu\n\t"
+ "ldr r7, [%[m], #40]\n\t"
+ "ldr r9, [%[a], #40]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #40]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+11] += m[11] * mu\n\t"
+ "ldr r7, [%[m], #44]\n\t"
+ "ldr r9, [%[a], #44]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #44]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+12] += m[12] * mu\n\t"
+ "ldr r7, [%[m], #48]\n\t"
+ "ldr r9, [%[a], #48]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #48]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+13] += m[13] * mu\n\t"
+ "ldr r7, [%[m], #52]\n\t"
+ "ldr r9, [%[a], #52]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #52]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+14] += m[14] * mu\n\t"
+ "ldr r7, [%[m], #56]\n\t"
+ "ldr r9, [%[a], #56]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #56]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+15] += m[15] * mu\n\t"
+ "ldr r7, [%[m], #60]\n\t"
+ "ldr r9, [%[a], #60]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #60]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+16] += m[16] * mu\n\t"
+ "ldr r7, [%[m], #64]\n\t"
+ "ldr r9, [%[a], #64]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #64]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+17] += m[17] * mu\n\t"
+ "ldr r7, [%[m], #68]\n\t"
+ "ldr r9, [%[a], #68]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #68]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+18] += m[18] * mu\n\t"
+ "ldr r7, [%[m], #72]\n\t"
+ "ldr r9, [%[a], #72]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #72]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+19] += m[19] * mu\n\t"
+ "ldr r7, [%[m], #76]\n\t"
+ "ldr r9, [%[a], #76]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #76]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+20] += m[20] * mu\n\t"
+ "ldr r7, [%[m], #80]\n\t"
+ "ldr r9, [%[a], #80]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #80]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+21] += m[21] * mu\n\t"
+ "ldr r7, [%[m], #84]\n\t"
+ "ldr r9, [%[a], #84]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #84]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+22] += m[22] * mu\n\t"
+ "ldr r7, [%[m], #88]\n\t"
+ "ldr r9, [%[a], #88]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #88]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+23] += m[23] * mu\n\t"
+ "ldr r7, [%[m], #92]\n\t"
+ "ldr r9, [%[a], #92]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #92]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+24] += m[24] * mu\n\t"
+ "ldr r7, [%[m], #96]\n\t"
+ "ldr r9, [%[a], #96]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #96]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+25] += m[25] * mu\n\t"
+ "ldr r7, [%[m], #100]\n\t"
+ "ldr r9, [%[a], #100]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #100]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+26] += m[26] * mu\n\t"
+ "ldr r7, [%[m], #104]\n\t"
+ "ldr r9, [%[a], #104]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #104]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+27] += m[27] * mu\n\t"
+ "ldr r7, [%[m], #108]\n\t"
+ "ldr r9, [%[a], #108]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #108]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+28] += m[28] * mu\n\t"
+ "ldr r7, [%[m], #112]\n\t"
+ "ldr r9, [%[a], #112]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #112]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+29] += m[29] * mu\n\t"
+ "ldr r7, [%[m], #116]\n\t"
+ "ldr r9, [%[a], #116]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #116]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+30] += m[30] * mu\n\t"
+ "ldr r7, [%[m], #120]\n\t"
+ "ldr r9, [%[a], #120]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #120]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+31] += m[31] * mu\n\t"
+ "ldr r7, [%[m], #124]\n\t"
+ "ldr r9, [%[a], #124]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #124]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+32] += m[32] * mu\n\t"
+ "ldr r7, [%[m], #128]\n\t"
+ "ldr r9, [%[a], #128]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #128]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+33] += m[33] * mu\n\t"
+ "ldr r7, [%[m], #132]\n\t"
+ "ldr r9, [%[a], #132]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #132]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+34] += m[34] * mu\n\t"
+ "ldr r7, [%[m], #136]\n\t"
+ "ldr r9, [%[a], #136]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #136]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+35] += m[35] * mu\n\t"
+ "ldr r7, [%[m], #140]\n\t"
+ "ldr r9, [%[a], #140]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #140]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+36] += m[36] * mu\n\t"
+ "ldr r7, [%[m], #144]\n\t"
+ "ldr r9, [%[a], #144]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #144]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+37] += m[37] * mu\n\t"
+ "ldr r7, [%[m], #148]\n\t"
+ "ldr r9, [%[a], #148]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #148]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+38] += m[38] * mu\n\t"
+ "ldr r7, [%[m], #152]\n\t"
+ "ldr r9, [%[a], #152]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #152]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+39] += m[39] * mu\n\t"
+ "ldr r7, [%[m], #156]\n\t"
+ "ldr r9, [%[a], #156]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #156]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+40] += m[40] * mu\n\t"
+ "ldr r7, [%[m], #160]\n\t"
+ "ldr r9, [%[a], #160]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #160]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+41] += m[41] * mu\n\t"
+ "ldr r7, [%[m], #164]\n\t"
+ "ldr r9, [%[a], #164]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #164]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+42] += m[42] * mu\n\t"
+ "ldr r7, [%[m], #168]\n\t"
+ "ldr r9, [%[a], #168]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #168]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+43] += m[43] * mu\n\t"
+ "ldr r7, [%[m], #172]\n\t"
+ "ldr r9, [%[a], #172]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #172]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+44] += m[44] * mu\n\t"
+ "ldr r7, [%[m], #176]\n\t"
+ "ldr r9, [%[a], #176]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #176]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+45] += m[45] * mu\n\t"
+ "ldr r7, [%[m], #180]\n\t"
+ "ldr r9, [%[a], #180]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #180]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+46] += m[46] * mu\n\t"
+ "ldr r7, [%[m], #184]\n\t"
+ "ldr r9, [%[a], #184]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #184]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+47] += m[47] * mu\n\t"
+ "ldr r7, [%[m], #188]\n\t"
+ "ldr r9, [%[a], #188]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #188]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+48] += m[48] * mu\n\t"
+ "ldr r7, [%[m], #192]\n\t"
+ "ldr r9, [%[a], #192]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #192]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+49] += m[49] * mu\n\t"
+ "ldr r7, [%[m], #196]\n\t"
+ "ldr r9, [%[a], #196]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #196]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+50] += m[50] * mu\n\t"
+ "ldr r7, [%[m], #200]\n\t"
+ "ldr r9, [%[a], #200]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #200]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+51] += m[51] * mu\n\t"
+ "ldr r7, [%[m], #204]\n\t"
+ "ldr r9, [%[a], #204]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #204]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+52] += m[52] * mu\n\t"
+ "ldr r7, [%[m], #208]\n\t"
+ "ldr r9, [%[a], #208]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #208]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+53] += m[53] * mu\n\t"
+ "ldr r7, [%[m], #212]\n\t"
+ "ldr r9, [%[a], #212]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #212]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+54] += m[54] * mu\n\t"
+ "ldr r7, [%[m], #216]\n\t"
+ "ldr r9, [%[a], #216]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #216]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+55] += m[55] * mu\n\t"
+ "ldr r7, [%[m], #220]\n\t"
+ "ldr r9, [%[a], #220]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #220]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+56] += m[56] * mu\n\t"
+ "ldr r7, [%[m], #224]\n\t"
+ "ldr r9, [%[a], #224]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #224]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+57] += m[57] * mu\n\t"
+ "ldr r7, [%[m], #228]\n\t"
+ "ldr r9, [%[a], #228]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #228]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+58] += m[58] * mu\n\t"
+ "ldr r7, [%[m], #232]\n\t"
+ "ldr r9, [%[a], #232]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #232]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+59] += m[59] * mu\n\t"
+ "ldr r7, [%[m], #236]\n\t"
+ "ldr r9, [%[a], #236]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #236]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+60] += m[60] * mu\n\t"
+ "ldr r7, [%[m], #240]\n\t"
+ "ldr r9, [%[a], #240]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #240]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+61] += m[61] * mu\n\t"
+ "ldr r7, [%[m], #244]\n\t"
+ "ldr r9, [%[a], #244]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #244]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+62] += m[62] * mu\n\t"
+ "ldr r7, [%[m], #248]\n\t"
+ "ldr r9, [%[a], #248]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #248]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+63] += m[63] * mu\n\t"
+ "ldr r7, [%[m], #252]\n\t"
+ "ldr r9, [%[a], #252]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #252]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+64] += m[64] * mu\n\t"
+ "ldr r7, [%[m], #256]\n\t"
+ "ldr r9, [%[a], #256]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #256]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+65] += m[65] * mu\n\t"
+ "ldr r7, [%[m], #260]\n\t"
+ "ldr r9, [%[a], #260]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #260]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+66] += m[66] * mu\n\t"
+ "ldr r7, [%[m], #264]\n\t"
+ "ldr r9, [%[a], #264]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #264]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+67] += m[67] * mu\n\t"
+ "ldr r7, [%[m], #268]\n\t"
+ "ldr r9, [%[a], #268]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #268]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+68] += m[68] * mu\n\t"
+ "ldr r7, [%[m], #272]\n\t"
+ "ldr r9, [%[a], #272]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #272]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+69] += m[69] * mu\n\t"
+ "ldr r7, [%[m], #276]\n\t"
+ "ldr r9, [%[a], #276]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #276]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+70] += m[70] * mu\n\t"
+ "ldr r7, [%[m], #280]\n\t"
+ "ldr r9, [%[a], #280]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #280]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+71] += m[71] * mu\n\t"
+ "ldr r7, [%[m], #284]\n\t"
+ "ldr r9, [%[a], #284]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #284]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+72] += m[72] * mu\n\t"
+ "ldr r7, [%[m], #288]\n\t"
+ "ldr r9, [%[a], #288]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #288]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+73] += m[73] * mu\n\t"
+ "ldr r7, [%[m], #292]\n\t"
+ "ldr r9, [%[a], #292]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #292]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+74] += m[74] * mu\n\t"
+ "ldr r7, [%[m], #296]\n\t"
+ "ldr r9, [%[a], #296]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #296]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+75] += m[75] * mu\n\t"
+ "ldr r7, [%[m], #300]\n\t"
+ "ldr r9, [%[a], #300]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #300]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+76] += m[76] * mu\n\t"
+ "ldr r7, [%[m], #304]\n\t"
+ "ldr r9, [%[a], #304]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #304]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+77] += m[77] * mu\n\t"
+ "ldr r7, [%[m], #308]\n\t"
+ "ldr r9, [%[a], #308]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #308]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+78] += m[78] * mu\n\t"
+ "ldr r7, [%[m], #312]\n\t"
+ "ldr r9, [%[a], #312]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #312]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+79] += m[79] * mu\n\t"
+ "ldr r7, [%[m], #316]\n\t"
+ "ldr r9, [%[a], #316]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #316]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+80] += m[80] * mu\n\t"
+ "ldr r7, [%[m], #320]\n\t"
+ "ldr r9, [%[a], #320]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #320]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+81] += m[81] * mu\n\t"
+ "ldr r7, [%[m], #324]\n\t"
+ "ldr r9, [%[a], #324]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #324]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+82] += m[82] * mu\n\t"
+ "ldr r7, [%[m], #328]\n\t"
+ "ldr r9, [%[a], #328]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #328]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+83] += m[83] * mu\n\t"
+ "ldr r7, [%[m], #332]\n\t"
+ "ldr r9, [%[a], #332]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #332]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+84] += m[84] * mu\n\t"
+ "ldr r7, [%[m], #336]\n\t"
+ "ldr r9, [%[a], #336]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #336]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+85] += m[85] * mu\n\t"
+ "ldr r7, [%[m], #340]\n\t"
+ "ldr r9, [%[a], #340]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #340]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+86] += m[86] * mu\n\t"
+ "ldr r7, [%[m], #344]\n\t"
+ "ldr r9, [%[a], #344]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #344]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+87] += m[87] * mu\n\t"
+ "ldr r7, [%[m], #348]\n\t"
+ "ldr r9, [%[a], #348]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #348]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+88] += m[88] * mu\n\t"
+ "ldr r7, [%[m], #352]\n\t"
+ "ldr r9, [%[a], #352]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #352]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+89] += m[89] * mu\n\t"
+ "ldr r7, [%[m], #356]\n\t"
+ "ldr r9, [%[a], #356]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #356]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+90] += m[90] * mu\n\t"
+ "ldr r7, [%[m], #360]\n\t"
+ "ldr r9, [%[a], #360]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #360]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+91] += m[91] * mu\n\t"
+ "ldr r7, [%[m], #364]\n\t"
+ "ldr r9, [%[a], #364]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #364]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+92] += m[92] * mu\n\t"
+ "ldr r7, [%[m], #368]\n\t"
+ "ldr r9, [%[a], #368]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #368]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+93] += m[93] * mu\n\t"
+ "ldr r7, [%[m], #372]\n\t"
+ "ldr r9, [%[a], #372]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #372]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+94] += m[94] * mu\n\t"
+ "ldr r7, [%[m], #376]\n\t"
+ "ldr r9, [%[a], #376]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #376]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+95] += m[95] * mu\n\t"
+ "ldr r7, [%[m], #380]\n\t"
+ "ldr r9, [%[a], #380]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r7, r7, %[ca]\n\t"
+ "mov %[ca], #0\n\t"
+ "adc %[ca], %[ca], %[ca]\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #380]\n\t"
+ "ldr r9, [%[a], #384]\n\t"
+ "adcs r9, r9, r7\n\t"
+ "str r9, [%[a], #384]\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ "# i += 1\n\t"
+ "add %[a], %[a], #4\n\t"
+ "add r12, r12, #4\n\t"
+ "cmp r12, #384\n\t"
+ "blt 1b\n\t"
+ "str r10, [%[a], #0]\n\t"
+ "str r14, [%[a], #4]\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_3072_mul_96(r, a, b);
+ sp_3072_mont_reduce_96(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_3072_sqr_96(r, a);
+ sp_3072_mont_reduce_96(r, m, mp);
+}
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r5, %[div], #1\n\t"
+ "add r5, r5, #1\n\t"
+ "mov r6, %[d0]\n\t"
+ "mov r7, %[d1]\n\t"
+ "# Do top 32\n\t"
+ "subs r8, r5, r7\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "and r8, r8, r5\n\t"
+ "subs r7, r7, r8\n\t"
+ "# Next 30 bits\n\t"
+ "mov r4, #29\n\t"
+ "1:\n\t"
+ "movs r6, r6, lsl #1\n\t"
+ "adc r7, r7, r7\n\t"
+ "subs r8, r5, r7\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "and r8, r8, r5\n\t"
+ "subs r7, r7, r8\n\t"
+ "subs r4, r4, #1\n\t"
+ "bpl 1b\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "add %[r], %[r], #1\n\t"
+ "umull r4, r5, %[r], %[div]\n\t"
+ "subs r4, %[d0], r4\n\t"
+ "sbc r5, %[d1], r5\n\t"
+ "add %[r], %[r], r5\n\t"
+ "umull r4, r5, %[r], %[div]\n\t"
+ "subs r4, %[d0], r4\n\t"
+ "sbc r5, %[d1], r5\n\t"
+ "add %[r], %[r], r5\n\t"
+ "subs r8, %[div], r4\n\t"
+ "sbc r8, r8, r8\n\t"
+ "sub %[r], %[r], r8\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r6", "r7", "r8"
+ );
+ return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<96; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 96; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static int32_t sp_3072_cmp_96(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = -1;
+ sp_digit one = 1;
+
+
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r7, #0\n\t"
+ "mov r3, #-1\n\t"
+ "mov r6, #380\n\t"
+ "1:\n\t"
+ "ldr r4, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "subs r6, r6, #4\n\t"
+ "bcs 1b\n\t"
+ "eor %[r], %[r], r3\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [one] "r" (one)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov r7, #0\n\t"
+ "mov r3, #-1\n\t"
+ "ldr r4, [%[a], #380]\n\t"
+ "ldr r5, [%[b], #380]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #376]\n\t"
+ "ldr r5, [%[b], #376]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #372]\n\t"
+ "ldr r5, [%[b], #372]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #368]\n\t"
+ "ldr r5, [%[b], #368]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #364]\n\t"
+ "ldr r5, [%[b], #364]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #360]\n\t"
+ "ldr r5, [%[b], #360]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #356]\n\t"
+ "ldr r5, [%[b], #356]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #352]\n\t"
+ "ldr r5, [%[b], #352]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #348]\n\t"
+ "ldr r5, [%[b], #348]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #344]\n\t"
+ "ldr r5, [%[b], #344]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #340]\n\t"
+ "ldr r5, [%[b], #340]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #336]\n\t"
+ "ldr r5, [%[b], #336]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #332]\n\t"
+ "ldr r5, [%[b], #332]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #328]\n\t"
+ "ldr r5, [%[b], #328]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #324]\n\t"
+ "ldr r5, [%[b], #324]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #320]\n\t"
+ "ldr r5, [%[b], #320]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #316]\n\t"
+ "ldr r5, [%[b], #316]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #312]\n\t"
+ "ldr r5, [%[b], #312]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #308]\n\t"
+ "ldr r5, [%[b], #308]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #304]\n\t"
+ "ldr r5, [%[b], #304]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #300]\n\t"
+ "ldr r5, [%[b], #300]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #296]\n\t"
+ "ldr r5, [%[b], #296]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #292]\n\t"
+ "ldr r5, [%[b], #292]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #288]\n\t"
+ "ldr r5, [%[b], #288]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #284]\n\t"
+ "ldr r5, [%[b], #284]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #280]\n\t"
+ "ldr r5, [%[b], #280]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #276]\n\t"
+ "ldr r5, [%[b], #276]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #272]\n\t"
+ "ldr r5, [%[b], #272]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #268]\n\t"
+ "ldr r5, [%[b], #268]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #264]\n\t"
+ "ldr r5, [%[b], #264]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #260]\n\t"
+ "ldr r5, [%[b], #260]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #256]\n\t"
+ "ldr r5, [%[b], #256]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #252]\n\t"
+ "ldr r5, [%[b], #252]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #248]\n\t"
+ "ldr r5, [%[b], #248]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #244]\n\t"
+ "ldr r5, [%[b], #244]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #240]\n\t"
+ "ldr r5, [%[b], #240]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #236]\n\t"
+ "ldr r5, [%[b], #236]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #232]\n\t"
+ "ldr r5, [%[b], #232]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #228]\n\t"
+ "ldr r5, [%[b], #228]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #224]\n\t"
+ "ldr r5, [%[b], #224]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #220]\n\t"
+ "ldr r5, [%[b], #220]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #216]\n\t"
+ "ldr r5, [%[b], #216]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #212]\n\t"
+ "ldr r5, [%[b], #212]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #208]\n\t"
+ "ldr r5, [%[b], #208]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #204]\n\t"
+ "ldr r5, [%[b], #204]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #200]\n\t"
+ "ldr r5, [%[b], #200]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #196]\n\t"
+ "ldr r5, [%[b], #196]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #192]\n\t"
+ "ldr r5, [%[b], #192]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #188]\n\t"
+ "ldr r5, [%[b], #188]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #184]\n\t"
+ "ldr r5, [%[b], #184]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #180]\n\t"
+ "ldr r5, [%[b], #180]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #176]\n\t"
+ "ldr r5, [%[b], #176]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #172]\n\t"
+ "ldr r5, [%[b], #172]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #168]\n\t"
+ "ldr r5, [%[b], #168]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #164]\n\t"
+ "ldr r5, [%[b], #164]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #160]\n\t"
+ "ldr r5, [%[b], #160]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #156]\n\t"
+ "ldr r5, [%[b], #156]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #152]\n\t"
+ "ldr r5, [%[b], #152]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #148]\n\t"
+ "ldr r5, [%[b], #148]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #144]\n\t"
+ "ldr r5, [%[b], #144]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #140]\n\t"
+ "ldr r5, [%[b], #140]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #136]\n\t"
+ "ldr r5, [%[b], #136]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #132]\n\t"
+ "ldr r5, [%[b], #132]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #128]\n\t"
+ "ldr r5, [%[b], #128]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #124]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #116]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #108]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #100]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #92]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #84]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #76]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #68]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "eor %[r], %[r], r3\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [one] "r" (one)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+#endif
+
+ return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[192], t2[97];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+
+ div = d[95];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
+ for (i=95; i>=0; i--) {
+ r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div);
+
+ sp_3072_mul_d_96(t2, d, r1);
+ t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
+ t1[96 + i] -= t2[96];
+ sp_3072_mask_96(t2, d, t1[96 + i]);
+ t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2);
+ sp_3072_mask_96(t2, d, t1[96 + i]);
+ t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_3072_cmp_96(t1, d) >= 0;
+ sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_96(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_3072_div_96(a, m, NULL, r);
+}
+
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[192], t2[97];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+
+ div = d[95];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
+ for (i=95; i>=0; i--) {
+ r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div);
+
+ sp_3072_mul_d_96(t2, d, r1);
+ t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
+ t1[96 + i] -= t2[96];
+ if (t1[96 + i] != 0) {
+ t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d);
+ if (t1[96 + i] != 0)
+ t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d);
+ }
+ }
+
+ r1 = sp_3072_cmp_96(t1, d) >= 0;
+ sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_96_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_3072_div_96_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+ defined(WOLFSSL_HAVE_SP_DH)
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[16][192];
+#else
+ sp_digit* t[16];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 192, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<16; i++) {
+ t[i] = td + i * 192;
+ }
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_96(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 96U);
+ if (reduceA != 0) {
+ err = sp_3072_mod_96(t[1] + 96, a, m);
+ if (err == MP_OKAY) {
+ err = sp_3072_mod_96(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
+ err = sp_3072_mod_96(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_96(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_96(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_96(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 4;
+ if (c == 32) {
+ c = 28;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
+ for (; i>=0 || c>=4; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 28;
+ n <<= 4;
+ c = 28;
+ }
+ else if (c < 4) {
+ y = n >> 28;
+ n = e[i--];
+ c = 4 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+ }
+
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+
+ sp_3072_mont_mul_96(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
+ sp_3072_mont_reduce_96(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
+ sp_3072_cond_sub_96(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][192];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 192, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++) {
+ t[i] = td + i * 192;
+ }
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_96(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 96U);
+ if (reduceA != 0) {
+ err = sp_3072_mod_96(t[1] + 96, a, m);
+ if (err == MP_OKAY) {
+ err = sp_3072_mod_96(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
+ err = sp_3072_mod_96(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_96(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_96(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_96(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp);
+ sp_3072_mont_sqr_96(t[16], t[ 8], m, mp);
+ sp_3072_mont_mul_96(t[17], t[ 9], t[ 8], m, mp);
+ sp_3072_mont_sqr_96(t[18], t[ 9], m, mp);
+ sp_3072_mont_mul_96(t[19], t[10], t[ 9], m, mp);
+ sp_3072_mont_sqr_96(t[20], t[10], m, mp);
+ sp_3072_mont_mul_96(t[21], t[11], t[10], m, mp);
+ sp_3072_mont_sqr_96(t[22], t[11], m, mp);
+ sp_3072_mont_mul_96(t[23], t[12], t[11], m, mp);
+ sp_3072_mont_sqr_96(t[24], t[12], m, mp);
+ sp_3072_mont_mul_96(t[25], t[13], t[12], m, mp);
+ sp_3072_mont_sqr_96(t[26], t[13], m, mp);
+ sp_3072_mont_mul_96(t[27], t[14], t[13], m, mp);
+ sp_3072_mont_sqr_96(t[28], t[14], m, mp);
+ sp_3072_mont_mul_96(t[29], t[15], t[14], m, mp);
+ sp_3072_mont_sqr_96(t[30], t[15], m, mp);
+ sp_3072_mont_mul_96(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+
+ sp_3072_mont_mul_96(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
+ sp_3072_mont_reduce_96(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
+ sp_3072_cond_sub_96(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[192], m[96], r[192];
+#else
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+#endif
+ sp_digit *ah;
+ sp_digit e[1];
+ int err = MP_OKAY;
+
+ if (*outLen < 384)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 384 ||
+ mp_count_bits(mm) != 3072))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 96 * 2;
+ m = r + 96 * 2;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ ah = a + 96;
+
+ sp_3072_from_bin(ah, 96, in, inLen);
+#if DIGIT_BIT >= 32
+ e[0] = em->dp[0];
+#else
+ e[0] = em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(m, 96, mm);
+
+ if (e[0] == 0x3) {
+ if (err == MP_OKAY) {
+ sp_3072_sqr_96(r, ah);
+ err = sp_3072_mod_96_cond(r, r, m);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_mul_96(r, ah, r);
+ err = sp_3072_mod_96_cond(r, r, m);
+ }
+ }
+ else {
+ int i;
+ sp_digit mp;
+
+ sp_3072_mont_setup(m, &mp);
+
+ /* Convert to Montgomery form. */
+ XMEMSET(a, 0, sizeof(sp_digit) * 96);
+ err = sp_3072_mod_96_cond(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i = 31; i >= 0; i--) {
+ if (e[0] >> i) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 96);
+ for (i--; i>=0; i--) {
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ if (((e[0] >> i) & 1) == 1) {
+ sp_3072_mont_mul_96(r, r, a, m, mp);
+ }
+ }
+ XMEMSET(&r[96], 0, sizeof(sp_digit) * 96);
+ sp_3072_mont_reduce_96(r, m, mp);
+
+ for (i = 95; i > 0; i--) {
+ if (r[i] != m[i]) {
+ break;
+ }
+ }
+ if (r[i] >= m[i]) {
+ sp_3072_sub_in_place_96(r, m);
+ }
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#endif
+
+ return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+ sp_digit* a;
+ sp_digit* d = NULL;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 384U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 3072) {
+ err = MP_READ_E;
+ }
+ if (inLen > 384) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = d + 96;
+ m = a + 192;
+ r = a;
+
+ sp_3072_from_bin(a, 96, in, inLen);
+ sp_3072_from_mp(d, 96, dm);
+ sp_3072_from_mp(m, 96, mm);
+ err = sp_3072_mod_exp_96(r, a, d, 3072, m, 0);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 96);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ sp_digit c = 0;
+
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r9, #0\n\t"
+ "mov r8, #0\n\t"
+ "1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldr r4, [%[a], r8]\n\t"
+ "ldr r5, [%[b], r8]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adc %[c], r9, r9\n\t"
+ "str r4, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, #192\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#else
+ __asm__ __volatile__ (
+
+ "mov r9, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r6, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r6, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r6, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r7, [%[b], #36]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r6, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r6, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r7, [%[b], #44]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "str r6, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r6, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r7, [%[b], #52]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "str r6, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r6, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r7, [%[b], #60]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "str r6, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r6, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "ldr r7, [%[b], #68]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "str r6, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r6, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "ldr r7, [%[b], #76]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "str r6, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r6, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "ldr r7, [%[b], #84]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "str r6, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r6, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "ldr r7, [%[b], #92]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "str r6, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r6, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "ldr r7, [%[b], #100]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "str r6, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r6, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "ldr r7, [%[b], #108]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "str r6, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r6, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "ldr r7, [%[b], #116]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "str r6, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r6, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "ldr r7, [%[b], #124]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "str r6, [%[r], #124]\n\t"
+ "ldr r4, [%[a], #128]\n\t"
+ "ldr r6, [%[a], #132]\n\t"
+ "ldr r5, [%[b], #128]\n\t"
+ "ldr r7, [%[b], #132]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #128]\n\t"
+ "str r6, [%[r], #132]\n\t"
+ "ldr r4, [%[a], #136]\n\t"
+ "ldr r6, [%[a], #140]\n\t"
+ "ldr r5, [%[b], #136]\n\t"
+ "ldr r7, [%[b], #140]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #136]\n\t"
+ "str r6, [%[r], #140]\n\t"
+ "ldr r4, [%[a], #144]\n\t"
+ "ldr r6, [%[a], #148]\n\t"
+ "ldr r5, [%[b], #144]\n\t"
+ "ldr r7, [%[b], #148]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #144]\n\t"
+ "str r6, [%[r], #148]\n\t"
+ "ldr r4, [%[a], #152]\n\t"
+ "ldr r6, [%[a], #156]\n\t"
+ "ldr r5, [%[b], #152]\n\t"
+ "ldr r7, [%[b], #156]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #152]\n\t"
+ "str r6, [%[r], #156]\n\t"
+ "ldr r4, [%[a], #160]\n\t"
+ "ldr r6, [%[a], #164]\n\t"
+ "ldr r5, [%[b], #160]\n\t"
+ "ldr r7, [%[b], #164]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #160]\n\t"
+ "str r6, [%[r], #164]\n\t"
+ "ldr r4, [%[a], #168]\n\t"
+ "ldr r6, [%[a], #172]\n\t"
+ "ldr r5, [%[b], #168]\n\t"
+ "ldr r7, [%[b], #172]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #168]\n\t"
+ "str r6, [%[r], #172]\n\t"
+ "ldr r4, [%[a], #176]\n\t"
+ "ldr r6, [%[a], #180]\n\t"
+ "ldr r5, [%[b], #176]\n\t"
+ "ldr r7, [%[b], #180]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #176]\n\t"
+ "str r6, [%[r], #180]\n\t"
+ "ldr r4, [%[a], #184]\n\t"
+ "ldr r6, [%[a], #188]\n\t"
+ "ldr r5, [%[b], #184]\n\t"
+ "ldr r7, [%[b], #188]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #184]\n\t"
+ "str r6, [%[r], #188]\n\t"
+ "adc %[c], r9, r9\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#endif /* WOLFSSL_SP_SMALL */
+
+ return c;
+}
+
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[96 * 2];
+ sp_digit p[48], q[48], dp[48];
+ sp_digit tmpa[96], tmpb[96];
+#else
+ sp_digit* t = NULL;
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+#endif
+ sp_digit* r;
+ sp_digit* qi;
+ sp_digit* dq;
+ sp_digit c;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 384)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL)
+ err = MEMORY_E;
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 96 * 2;
+ q = p + 48;
+ qi = dq = dp = q + 48;
+ tmpa = qi + 48;
+ tmpb = tmpa + 96;
+
+ r = t + 96;
+ }
+#else
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ r = a;
+ qi = dq = dp;
+#endif
+ sp_3072_from_bin(a, 96, in, inLen);
+ sp_3072_from_mp(p, 48, pm);
+ sp_3072_from_mp(q, 48, qm);
+ sp_3072_from_mp(dp, 48, dpm);
+
+ err = sp_3072_mod_exp_48(tmpa, a, dp, 1536, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(dq, 48, dqm);
+ err = sp_3072_mod_exp_48(tmpb, a, dq, 1536, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ c = sp_3072_sub_in_place_48(tmpa, tmpb);
+ c += sp_3072_cond_add_48(tmpa, tmpa, p, c);
+ sp_3072_cond_add_48(tmpa, tmpa, p, c);
+
+ sp_3072_from_mp(qi, 48, qim);
+ sp_3072_mul_48(tmpa, tmpa, qi);
+ err = sp_3072_mod_48(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mul_48(tmpa, q, tmpa);
+ XMEMSET(&tmpb[48], 0, sizeof(sp_digit) * 48);
+ sp_3072_add_96(r, tmpb, tmpa);
+
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 48 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+#else
+ XMEMSET(tmpa, 0, sizeof(tmpa));
+ XMEMSET(tmpb, 0, sizeof(tmpb));
+ XMEMSET(p, 0, sizeof(p));
+ XMEMSET(q, 0, sizeof(q));
+ XMEMSET(dp, 0, sizeof(dp));
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_3072_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 96);
+ r->used = 96;
+ mp_clamp(r);
+#elif DIGIT_BIT < 32
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 96; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 32) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 32 - s;
+ }
+ r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 96; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 32 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 32 - s;
+ }
+ else {
+ s += 32;
+ }
+ }
+ r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[192], e[96], m[96];
+ sp_digit* r = b;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 3072) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 96, base);
+ sp_3072_from_mp(e, 96, exp);
+ sp_3072_from_mp(m, 96, mod);
+
+ err = sp_3072_mod_exp_96(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_3072_to_mp(r, res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_3072
+static void sp_3072_lshift_96(sp_digit* r, sp_digit* a, byte n)
+{
+ __asm__ __volatile__ (
+ "mov r6, #31\n\t"
+ "sub r6, r6, %[n]\n\t"
+ "ldr r3, [%[a], #380]\n\t"
+ "lsr r4, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r4, r4, r6\n\t"
+ "ldr r2, [%[a], #376]\n\t"
+ "str r4, [%[r], #384]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #372]\n\t"
+ "str r3, [%[r], #380]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #368]\n\t"
+ "str r2, [%[r], #376]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #364]\n\t"
+ "str r4, [%[r], #372]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #360]\n\t"
+ "str r3, [%[r], #368]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #356]\n\t"
+ "str r2, [%[r], #364]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #352]\n\t"
+ "str r4, [%[r], #360]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #348]\n\t"
+ "str r3, [%[r], #356]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #344]\n\t"
+ "str r2, [%[r], #352]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #340]\n\t"
+ "str r4, [%[r], #348]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #336]\n\t"
+ "str r3, [%[r], #344]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #332]\n\t"
+ "str r2, [%[r], #340]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #328]\n\t"
+ "str r4, [%[r], #336]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #324]\n\t"
+ "str r3, [%[r], #332]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #320]\n\t"
+ "str r2, [%[r], #328]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #316]\n\t"
+ "str r4, [%[r], #324]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #312]\n\t"
+ "str r3, [%[r], #320]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #308]\n\t"
+ "str r2, [%[r], #316]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #304]\n\t"
+ "str r4, [%[r], #312]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #300]\n\t"
+ "str r3, [%[r], #308]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #296]\n\t"
+ "str r2, [%[r], #304]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #292]\n\t"
+ "str r4, [%[r], #300]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #288]\n\t"
+ "str r3, [%[r], #296]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #284]\n\t"
+ "str r2, [%[r], #292]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #280]\n\t"
+ "str r4, [%[r], #288]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #276]\n\t"
+ "str r3, [%[r], #284]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #272]\n\t"
+ "str r2, [%[r], #280]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #268]\n\t"
+ "str r4, [%[r], #276]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #264]\n\t"
+ "str r3, [%[r], #272]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #260]\n\t"
+ "str r2, [%[r], #268]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #256]\n\t"
+ "str r4, [%[r], #264]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #252]\n\t"
+ "str r3, [%[r], #260]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #248]\n\t"
+ "str r2, [%[r], #256]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #244]\n\t"
+ "str r4, [%[r], #252]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #240]\n\t"
+ "str r3, [%[r], #248]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #236]\n\t"
+ "str r2, [%[r], #244]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #232]\n\t"
+ "str r4, [%[r], #240]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #228]\n\t"
+ "str r3, [%[r], #236]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #224]\n\t"
+ "str r2, [%[r], #232]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #220]\n\t"
+ "str r4, [%[r], #228]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #216]\n\t"
+ "str r3, [%[r], #224]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #212]\n\t"
+ "str r2, [%[r], #220]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #208]\n\t"
+ "str r4, [%[r], #216]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #204]\n\t"
+ "str r3, [%[r], #212]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #200]\n\t"
+ "str r2, [%[r], #208]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #196]\n\t"
+ "str r4, [%[r], #204]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #192]\n\t"
+ "str r3, [%[r], #200]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #188]\n\t"
+ "str r2, [%[r], #196]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #184]\n\t"
+ "str r4, [%[r], #192]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #180]\n\t"
+ "str r3, [%[r], #188]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #176]\n\t"
+ "str r2, [%[r], #184]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #172]\n\t"
+ "str r4, [%[r], #180]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #168]\n\t"
+ "str r3, [%[r], #176]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #164]\n\t"
+ "str r2, [%[r], #172]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #160]\n\t"
+ "str r4, [%[r], #168]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #156]\n\t"
+ "str r3, [%[r], #164]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #152]\n\t"
+ "str r2, [%[r], #160]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #148]\n\t"
+ "str r4, [%[r], #156]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #144]\n\t"
+ "str r3, [%[r], #152]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #140]\n\t"
+ "str r2, [%[r], #148]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #136]\n\t"
+ "str r4, [%[r], #144]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #132]\n\t"
+ "str r3, [%[r], #140]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #128]\n\t"
+ "str r2, [%[r], #136]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #124]\n\t"
+ "str r4, [%[r], #132]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "str r3, [%[r], #128]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #116]\n\t"
+ "str r2, [%[r], #124]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #112]\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "str r3, [%[r], #116]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #104]\n\t"
+ "str r2, [%[r], #112]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #100]\n\t"
+ "str r4, [%[r], #108]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "str r3, [%[r], #104]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #92]\n\t"
+ "str r2, [%[r], #100]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #88]\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "str r3, [%[r], #92]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #80]\n\t"
+ "str r2, [%[r], #88]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #76]\n\t"
+ "str r4, [%[r], #84]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "str r3, [%[r], #80]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #68]\n\t"
+ "str r2, [%[r], #76]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #64]\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "str r3, [%[r], #68]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "str r2, [%[r], #64]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #52]\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "str r3, [%[r], #56]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #44]\n\t"
+ "str r2, [%[r], #52]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #40]\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "str r3, [%[r], #44]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "str r2, [%[r], #40]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #28]\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "str r3, [%[r], #32]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #20]\n\t"
+ "str r2, [%[r], #28]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #16]\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "str r3, [%[r], #20]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "str r2, [%[r], #16]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #4]\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "str r3, [%[r], #8]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "str r2, [%[r], #4]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+ : "memory", "r2", "r3", "r4", "r5", "r6"
+ );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_2_96(sp_digit* r, const sp_digit* e, int bits,
+ const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[192];
+ sp_digit td[97];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 289, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 192;
+#else
+ norm = nd;
+ tmp = td;
+#endif
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_96(norm, m);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ sp_3072_lshift_96(r, norm, y);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+
+ sp_3072_lshift_96(r, r, y);
+ sp_3072_mul_d_96(tmp, norm, r[96]);
+ r[96] = 0;
+ o = sp_3072_add_96(r, r, tmp);
+ sp_3072_cond_sub_96(r, r, m, (sp_digit)0 - o);
+ }
+
+ XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
+ sp_3072_mont_reduce_96(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
+ sp_3072_cond_sub_96(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* HAVE_FFDHE_3072 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+ int err = MP_OKAY;
+ sp_digit b[192], e[96], m[96];
+ sp_digit* r = b;
+ word32 i;
+
+ if (mp_count_bits(base) > 3072) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 384) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 96, base);
+ sp_3072_from_bin(e, 96, exp, expLen);
+ sp_3072_from_mp(m, 96, mod);
+
+ #ifdef HAVE_FFDHE_3072
+ if (base->used == 1 && base->dp[0] == 2 && m[95] == (sp_digit)-1)
+ err = sp_3072_mod_exp_2_96(r, e, expLen * 8, m);
+ else
+ #endif
+ err = sp_3072_mod_exp_96(r, b, e, expLen * 8, m, 0);
+
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ for (i=0; i<384 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[96], e[48], m[48];
+ sp_digit* r = b;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 1536) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 1536) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 1536) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 48, base);
+ sp_3072_from_mp(e, 48, exp);
+ sp_3072_from_mp(m, 48, mod);
+
+ err = sp_3072_mod_exp_48(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(r + 48, 0, sizeof(*r) * 48U);
+ err = sp_3072_to_mp(r, res);
+ res->used = mod->used;
+ mp_clamp(res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_3072 */
+
+#ifdef WOLFSSL_SP_4096
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 24U) {
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 32
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 32U) <= (word32)DIGIT_BIT) {
+ s += 32U;
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 32) {
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 32 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 512
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_4096_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ j = 4096 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<128 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 32) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 32);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r12, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[a], #4]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "ldr r10, [%[b], #8]\n\t"
+ "ldr r14, [%[b], #12]\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r5, [%[r], #4]\n\t"
+ "str r6, [%[r], #8]\n\t"
+ "str r7, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[a], #20]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "ldr r10, [%[b], #24]\n\t"
+ "ldr r14, [%[b], #28]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "str r6, [%[r], #24]\n\t"
+ "str r7, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[a], #36]\n\t"
+ "ldr r6, [%[a], #40]\n\t"
+ "ldr r7, [%[a], #44]\n\t"
+ "ldr r8, [%[b], #32]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "ldr r10, [%[b], #40]\n\t"
+ "ldr r14, [%[b], #44]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r5, [%[r], #36]\n\t"
+ "str r6, [%[r], #40]\n\t"
+ "str r7, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[a], #52]\n\t"
+ "ldr r6, [%[a], #56]\n\t"
+ "ldr r7, [%[a], #60]\n\t"
+ "ldr r8, [%[b], #48]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "ldr r10, [%[b], #56]\n\t"
+ "ldr r14, [%[b], #60]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "str r5, [%[r], #52]\n\t"
+ "str r6, [%[r], #56]\n\t"
+ "str r7, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[a], #68]\n\t"
+ "ldr r6, [%[a], #72]\n\t"
+ "ldr r7, [%[a], #76]\n\t"
+ "ldr r8, [%[b], #64]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "ldr r10, [%[b], #72]\n\t"
+ "ldr r14, [%[b], #76]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "str r5, [%[r], #68]\n\t"
+ "str r6, [%[r], #72]\n\t"
+ "str r7, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[a], #84]\n\t"
+ "ldr r6, [%[a], #88]\n\t"
+ "ldr r7, [%[a], #92]\n\t"
+ "ldr r8, [%[b], #80]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "ldr r10, [%[b], #88]\n\t"
+ "ldr r14, [%[b], #92]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "str r5, [%[r], #84]\n\t"
+ "str r6, [%[r], #88]\n\t"
+ "str r7, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[a], #100]\n\t"
+ "ldr r6, [%[a], #104]\n\t"
+ "ldr r7, [%[a], #108]\n\t"
+ "ldr r8, [%[b], #96]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "ldr r10, [%[b], #104]\n\t"
+ "ldr r14, [%[b], #108]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "str r5, [%[r], #100]\n\t"
+ "str r6, [%[r], #104]\n\t"
+ "str r7, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[a], #116]\n\t"
+ "ldr r6, [%[a], #120]\n\t"
+ "ldr r7, [%[a], #124]\n\t"
+ "ldr r8, [%[b], #112]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "ldr r10, [%[b], #120]\n\t"
+ "ldr r14, [%[b], #124]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "str r5, [%[r], #116]\n\t"
+ "str r6, [%[r], #120]\n\t"
+ "str r7, [%[r], #124]\n\t"
+ "ldr r4, [%[a], #128]\n\t"
+ "ldr r5, [%[a], #132]\n\t"
+ "ldr r6, [%[a], #136]\n\t"
+ "ldr r7, [%[a], #140]\n\t"
+ "ldr r8, [%[b], #128]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "ldr r10, [%[b], #136]\n\t"
+ "ldr r14, [%[b], #140]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #128]\n\t"
+ "str r5, [%[r], #132]\n\t"
+ "str r6, [%[r], #136]\n\t"
+ "str r7, [%[r], #140]\n\t"
+ "ldr r4, [%[a], #144]\n\t"
+ "ldr r5, [%[a], #148]\n\t"
+ "ldr r6, [%[a], #152]\n\t"
+ "ldr r7, [%[a], #156]\n\t"
+ "ldr r8, [%[b], #144]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "ldr r10, [%[b], #152]\n\t"
+ "ldr r14, [%[b], #156]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #144]\n\t"
+ "str r5, [%[r], #148]\n\t"
+ "str r6, [%[r], #152]\n\t"
+ "str r7, [%[r], #156]\n\t"
+ "ldr r4, [%[a], #160]\n\t"
+ "ldr r5, [%[a], #164]\n\t"
+ "ldr r6, [%[a], #168]\n\t"
+ "ldr r7, [%[a], #172]\n\t"
+ "ldr r8, [%[b], #160]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "ldr r10, [%[b], #168]\n\t"
+ "ldr r14, [%[b], #172]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #160]\n\t"
+ "str r5, [%[r], #164]\n\t"
+ "str r6, [%[r], #168]\n\t"
+ "str r7, [%[r], #172]\n\t"
+ "ldr r4, [%[a], #176]\n\t"
+ "ldr r5, [%[a], #180]\n\t"
+ "ldr r6, [%[a], #184]\n\t"
+ "ldr r7, [%[a], #188]\n\t"
+ "ldr r8, [%[b], #176]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "ldr r10, [%[b], #184]\n\t"
+ "ldr r14, [%[b], #188]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #176]\n\t"
+ "str r5, [%[r], #180]\n\t"
+ "str r6, [%[r], #184]\n\t"
+ "str r7, [%[r], #188]\n\t"
+ "ldr r4, [%[a], #192]\n\t"
+ "ldr r5, [%[a], #196]\n\t"
+ "ldr r6, [%[a], #200]\n\t"
+ "ldr r7, [%[a], #204]\n\t"
+ "ldr r8, [%[b], #192]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "ldr r10, [%[b], #200]\n\t"
+ "ldr r14, [%[b], #204]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #192]\n\t"
+ "str r5, [%[r], #196]\n\t"
+ "str r6, [%[r], #200]\n\t"
+ "str r7, [%[r], #204]\n\t"
+ "ldr r4, [%[a], #208]\n\t"
+ "ldr r5, [%[a], #212]\n\t"
+ "ldr r6, [%[a], #216]\n\t"
+ "ldr r7, [%[a], #220]\n\t"
+ "ldr r8, [%[b], #208]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "ldr r10, [%[b], #216]\n\t"
+ "ldr r14, [%[b], #220]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #208]\n\t"
+ "str r5, [%[r], #212]\n\t"
+ "str r6, [%[r], #216]\n\t"
+ "str r7, [%[r], #220]\n\t"
+ "ldr r4, [%[a], #224]\n\t"
+ "ldr r5, [%[a], #228]\n\t"
+ "ldr r6, [%[a], #232]\n\t"
+ "ldr r7, [%[a], #236]\n\t"
+ "ldr r8, [%[b], #224]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "ldr r10, [%[b], #232]\n\t"
+ "ldr r14, [%[b], #236]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #224]\n\t"
+ "str r5, [%[r], #228]\n\t"
+ "str r6, [%[r], #232]\n\t"
+ "str r7, [%[r], #236]\n\t"
+ "ldr r4, [%[a], #240]\n\t"
+ "ldr r5, [%[a], #244]\n\t"
+ "ldr r6, [%[a], #248]\n\t"
+ "ldr r7, [%[a], #252]\n\t"
+ "ldr r8, [%[b], #240]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "ldr r10, [%[b], #248]\n\t"
+ "ldr r14, [%[b], #252]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #240]\n\t"
+ "str r5, [%[r], #244]\n\t"
+ "str r6, [%[r], #248]\n\t"
+ "str r7, [%[r], #252]\n\t"
+ "adc %[c], r12, r12\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r2, [%[a], #0]\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[a], #12]\n\t"
+ "ldr r6, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "subs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #0]\n\t"
+ "str r3, [%[a], #4]\n\t"
+ "str r4, [%[a], #8]\n\t"
+ "str r5, [%[a], #12]\n\t"
+ "ldr r2, [%[a], #16]\n\t"
+ "ldr r3, [%[a], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[a], #28]\n\t"
+ "ldr r6, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #16]\n\t"
+ "str r3, [%[a], #20]\n\t"
+ "str r4, [%[a], #24]\n\t"
+ "str r5, [%[a], #28]\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[a], #44]\n\t"
+ "ldr r6, [%[b], #32]\n\t"
+ "ldr r7, [%[b], #36]\n\t"
+ "ldr r8, [%[b], #40]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #32]\n\t"
+ "str r3, [%[a], #36]\n\t"
+ "str r4, [%[a], #40]\n\t"
+ "str r5, [%[a], #44]\n\t"
+ "ldr r2, [%[a], #48]\n\t"
+ "ldr r3, [%[a], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[a], #60]\n\t"
+ "ldr r6, [%[b], #48]\n\t"
+ "ldr r7, [%[b], #52]\n\t"
+ "ldr r8, [%[b], #56]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #48]\n\t"
+ "str r3, [%[a], #52]\n\t"
+ "str r4, [%[a], #56]\n\t"
+ "str r5, [%[a], #60]\n\t"
+ "ldr r2, [%[a], #64]\n\t"
+ "ldr r3, [%[a], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[a], #76]\n\t"
+ "ldr r6, [%[b], #64]\n\t"
+ "ldr r7, [%[b], #68]\n\t"
+ "ldr r8, [%[b], #72]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #64]\n\t"
+ "str r3, [%[a], #68]\n\t"
+ "str r4, [%[a], #72]\n\t"
+ "str r5, [%[a], #76]\n\t"
+ "ldr r2, [%[a], #80]\n\t"
+ "ldr r3, [%[a], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[a], #92]\n\t"
+ "ldr r6, [%[b], #80]\n\t"
+ "ldr r7, [%[b], #84]\n\t"
+ "ldr r8, [%[b], #88]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #80]\n\t"
+ "str r3, [%[a], #84]\n\t"
+ "str r4, [%[a], #88]\n\t"
+ "str r5, [%[a], #92]\n\t"
+ "ldr r2, [%[a], #96]\n\t"
+ "ldr r3, [%[a], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[a], #108]\n\t"
+ "ldr r6, [%[b], #96]\n\t"
+ "ldr r7, [%[b], #100]\n\t"
+ "ldr r8, [%[b], #104]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #96]\n\t"
+ "str r3, [%[a], #100]\n\t"
+ "str r4, [%[a], #104]\n\t"
+ "str r5, [%[a], #108]\n\t"
+ "ldr r2, [%[a], #112]\n\t"
+ "ldr r3, [%[a], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[a], #124]\n\t"
+ "ldr r6, [%[b], #112]\n\t"
+ "ldr r7, [%[b], #116]\n\t"
+ "ldr r8, [%[b], #120]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #112]\n\t"
+ "str r3, [%[a], #116]\n\t"
+ "str r4, [%[a], #120]\n\t"
+ "str r5, [%[a], #124]\n\t"
+ "ldr r2, [%[a], #128]\n\t"
+ "ldr r3, [%[a], #132]\n\t"
+ "ldr r4, [%[a], #136]\n\t"
+ "ldr r5, [%[a], #140]\n\t"
+ "ldr r6, [%[b], #128]\n\t"
+ "ldr r7, [%[b], #132]\n\t"
+ "ldr r8, [%[b], #136]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #128]\n\t"
+ "str r3, [%[a], #132]\n\t"
+ "str r4, [%[a], #136]\n\t"
+ "str r5, [%[a], #140]\n\t"
+ "ldr r2, [%[a], #144]\n\t"
+ "ldr r3, [%[a], #148]\n\t"
+ "ldr r4, [%[a], #152]\n\t"
+ "ldr r5, [%[a], #156]\n\t"
+ "ldr r6, [%[b], #144]\n\t"
+ "ldr r7, [%[b], #148]\n\t"
+ "ldr r8, [%[b], #152]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #144]\n\t"
+ "str r3, [%[a], #148]\n\t"
+ "str r4, [%[a], #152]\n\t"
+ "str r5, [%[a], #156]\n\t"
+ "ldr r2, [%[a], #160]\n\t"
+ "ldr r3, [%[a], #164]\n\t"
+ "ldr r4, [%[a], #168]\n\t"
+ "ldr r5, [%[a], #172]\n\t"
+ "ldr r6, [%[b], #160]\n\t"
+ "ldr r7, [%[b], #164]\n\t"
+ "ldr r8, [%[b], #168]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #160]\n\t"
+ "str r3, [%[a], #164]\n\t"
+ "str r4, [%[a], #168]\n\t"
+ "str r5, [%[a], #172]\n\t"
+ "ldr r2, [%[a], #176]\n\t"
+ "ldr r3, [%[a], #180]\n\t"
+ "ldr r4, [%[a], #184]\n\t"
+ "ldr r5, [%[a], #188]\n\t"
+ "ldr r6, [%[b], #176]\n\t"
+ "ldr r7, [%[b], #180]\n\t"
+ "ldr r8, [%[b], #184]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #176]\n\t"
+ "str r3, [%[a], #180]\n\t"
+ "str r4, [%[a], #184]\n\t"
+ "str r5, [%[a], #188]\n\t"
+ "ldr r2, [%[a], #192]\n\t"
+ "ldr r3, [%[a], #196]\n\t"
+ "ldr r4, [%[a], #200]\n\t"
+ "ldr r5, [%[a], #204]\n\t"
+ "ldr r6, [%[b], #192]\n\t"
+ "ldr r7, [%[b], #196]\n\t"
+ "ldr r8, [%[b], #200]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #192]\n\t"
+ "str r3, [%[a], #196]\n\t"
+ "str r4, [%[a], #200]\n\t"
+ "str r5, [%[a], #204]\n\t"
+ "ldr r2, [%[a], #208]\n\t"
+ "ldr r3, [%[a], #212]\n\t"
+ "ldr r4, [%[a], #216]\n\t"
+ "ldr r5, [%[a], #220]\n\t"
+ "ldr r6, [%[b], #208]\n\t"
+ "ldr r7, [%[b], #212]\n\t"
+ "ldr r8, [%[b], #216]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #208]\n\t"
+ "str r3, [%[a], #212]\n\t"
+ "str r4, [%[a], #216]\n\t"
+ "str r5, [%[a], #220]\n\t"
+ "ldr r2, [%[a], #224]\n\t"
+ "ldr r3, [%[a], #228]\n\t"
+ "ldr r4, [%[a], #232]\n\t"
+ "ldr r5, [%[a], #236]\n\t"
+ "ldr r6, [%[b], #224]\n\t"
+ "ldr r7, [%[b], #228]\n\t"
+ "ldr r8, [%[b], #232]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #224]\n\t"
+ "str r3, [%[a], #228]\n\t"
+ "str r4, [%[a], #232]\n\t"
+ "str r5, [%[a], #236]\n\t"
+ "ldr r2, [%[a], #240]\n\t"
+ "ldr r3, [%[a], #244]\n\t"
+ "ldr r4, [%[a], #248]\n\t"
+ "ldr r5, [%[a], #252]\n\t"
+ "ldr r6, [%[b], #240]\n\t"
+ "ldr r7, [%[b], #244]\n\t"
+ "ldr r8, [%[b], #248]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #240]\n\t"
+ "str r3, [%[a], #244]\n\t"
+ "str r4, [%[a], #248]\n\t"
+ "str r5, [%[a], #252]\n\t"
+ "ldr r2, [%[a], #256]\n\t"
+ "ldr r3, [%[a], #260]\n\t"
+ "ldr r4, [%[a], #264]\n\t"
+ "ldr r5, [%[a], #268]\n\t"
+ "ldr r6, [%[b], #256]\n\t"
+ "ldr r7, [%[b], #260]\n\t"
+ "ldr r8, [%[b], #264]\n\t"
+ "ldr r9, [%[b], #268]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #256]\n\t"
+ "str r3, [%[a], #260]\n\t"
+ "str r4, [%[a], #264]\n\t"
+ "str r5, [%[a], #268]\n\t"
+ "ldr r2, [%[a], #272]\n\t"
+ "ldr r3, [%[a], #276]\n\t"
+ "ldr r4, [%[a], #280]\n\t"
+ "ldr r5, [%[a], #284]\n\t"
+ "ldr r6, [%[b], #272]\n\t"
+ "ldr r7, [%[b], #276]\n\t"
+ "ldr r8, [%[b], #280]\n\t"
+ "ldr r9, [%[b], #284]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #272]\n\t"
+ "str r3, [%[a], #276]\n\t"
+ "str r4, [%[a], #280]\n\t"
+ "str r5, [%[a], #284]\n\t"
+ "ldr r2, [%[a], #288]\n\t"
+ "ldr r3, [%[a], #292]\n\t"
+ "ldr r4, [%[a], #296]\n\t"
+ "ldr r5, [%[a], #300]\n\t"
+ "ldr r6, [%[b], #288]\n\t"
+ "ldr r7, [%[b], #292]\n\t"
+ "ldr r8, [%[b], #296]\n\t"
+ "ldr r9, [%[b], #300]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #288]\n\t"
+ "str r3, [%[a], #292]\n\t"
+ "str r4, [%[a], #296]\n\t"
+ "str r5, [%[a], #300]\n\t"
+ "ldr r2, [%[a], #304]\n\t"
+ "ldr r3, [%[a], #308]\n\t"
+ "ldr r4, [%[a], #312]\n\t"
+ "ldr r5, [%[a], #316]\n\t"
+ "ldr r6, [%[b], #304]\n\t"
+ "ldr r7, [%[b], #308]\n\t"
+ "ldr r8, [%[b], #312]\n\t"
+ "ldr r9, [%[b], #316]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #304]\n\t"
+ "str r3, [%[a], #308]\n\t"
+ "str r4, [%[a], #312]\n\t"
+ "str r5, [%[a], #316]\n\t"
+ "ldr r2, [%[a], #320]\n\t"
+ "ldr r3, [%[a], #324]\n\t"
+ "ldr r4, [%[a], #328]\n\t"
+ "ldr r5, [%[a], #332]\n\t"
+ "ldr r6, [%[b], #320]\n\t"
+ "ldr r7, [%[b], #324]\n\t"
+ "ldr r8, [%[b], #328]\n\t"
+ "ldr r9, [%[b], #332]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #320]\n\t"
+ "str r3, [%[a], #324]\n\t"
+ "str r4, [%[a], #328]\n\t"
+ "str r5, [%[a], #332]\n\t"
+ "ldr r2, [%[a], #336]\n\t"
+ "ldr r3, [%[a], #340]\n\t"
+ "ldr r4, [%[a], #344]\n\t"
+ "ldr r5, [%[a], #348]\n\t"
+ "ldr r6, [%[b], #336]\n\t"
+ "ldr r7, [%[b], #340]\n\t"
+ "ldr r8, [%[b], #344]\n\t"
+ "ldr r9, [%[b], #348]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #336]\n\t"
+ "str r3, [%[a], #340]\n\t"
+ "str r4, [%[a], #344]\n\t"
+ "str r5, [%[a], #348]\n\t"
+ "ldr r2, [%[a], #352]\n\t"
+ "ldr r3, [%[a], #356]\n\t"
+ "ldr r4, [%[a], #360]\n\t"
+ "ldr r5, [%[a], #364]\n\t"
+ "ldr r6, [%[b], #352]\n\t"
+ "ldr r7, [%[b], #356]\n\t"
+ "ldr r8, [%[b], #360]\n\t"
+ "ldr r9, [%[b], #364]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #352]\n\t"
+ "str r3, [%[a], #356]\n\t"
+ "str r4, [%[a], #360]\n\t"
+ "str r5, [%[a], #364]\n\t"
+ "ldr r2, [%[a], #368]\n\t"
+ "ldr r3, [%[a], #372]\n\t"
+ "ldr r4, [%[a], #376]\n\t"
+ "ldr r5, [%[a], #380]\n\t"
+ "ldr r6, [%[b], #368]\n\t"
+ "ldr r7, [%[b], #372]\n\t"
+ "ldr r8, [%[b], #376]\n\t"
+ "ldr r9, [%[b], #380]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #368]\n\t"
+ "str r3, [%[a], #372]\n\t"
+ "str r4, [%[a], #376]\n\t"
+ "str r5, [%[a], #380]\n\t"
+ "ldr r2, [%[a], #384]\n\t"
+ "ldr r3, [%[a], #388]\n\t"
+ "ldr r4, [%[a], #392]\n\t"
+ "ldr r5, [%[a], #396]\n\t"
+ "ldr r6, [%[b], #384]\n\t"
+ "ldr r7, [%[b], #388]\n\t"
+ "ldr r8, [%[b], #392]\n\t"
+ "ldr r9, [%[b], #396]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #384]\n\t"
+ "str r3, [%[a], #388]\n\t"
+ "str r4, [%[a], #392]\n\t"
+ "str r5, [%[a], #396]\n\t"
+ "ldr r2, [%[a], #400]\n\t"
+ "ldr r3, [%[a], #404]\n\t"
+ "ldr r4, [%[a], #408]\n\t"
+ "ldr r5, [%[a], #412]\n\t"
+ "ldr r6, [%[b], #400]\n\t"
+ "ldr r7, [%[b], #404]\n\t"
+ "ldr r8, [%[b], #408]\n\t"
+ "ldr r9, [%[b], #412]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #400]\n\t"
+ "str r3, [%[a], #404]\n\t"
+ "str r4, [%[a], #408]\n\t"
+ "str r5, [%[a], #412]\n\t"
+ "ldr r2, [%[a], #416]\n\t"
+ "ldr r3, [%[a], #420]\n\t"
+ "ldr r4, [%[a], #424]\n\t"
+ "ldr r5, [%[a], #428]\n\t"
+ "ldr r6, [%[b], #416]\n\t"
+ "ldr r7, [%[b], #420]\n\t"
+ "ldr r8, [%[b], #424]\n\t"
+ "ldr r9, [%[b], #428]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #416]\n\t"
+ "str r3, [%[a], #420]\n\t"
+ "str r4, [%[a], #424]\n\t"
+ "str r5, [%[a], #428]\n\t"
+ "ldr r2, [%[a], #432]\n\t"
+ "ldr r3, [%[a], #436]\n\t"
+ "ldr r4, [%[a], #440]\n\t"
+ "ldr r5, [%[a], #444]\n\t"
+ "ldr r6, [%[b], #432]\n\t"
+ "ldr r7, [%[b], #436]\n\t"
+ "ldr r8, [%[b], #440]\n\t"
+ "ldr r9, [%[b], #444]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #432]\n\t"
+ "str r3, [%[a], #436]\n\t"
+ "str r4, [%[a], #440]\n\t"
+ "str r5, [%[a], #444]\n\t"
+ "ldr r2, [%[a], #448]\n\t"
+ "ldr r3, [%[a], #452]\n\t"
+ "ldr r4, [%[a], #456]\n\t"
+ "ldr r5, [%[a], #460]\n\t"
+ "ldr r6, [%[b], #448]\n\t"
+ "ldr r7, [%[b], #452]\n\t"
+ "ldr r8, [%[b], #456]\n\t"
+ "ldr r9, [%[b], #460]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #448]\n\t"
+ "str r3, [%[a], #452]\n\t"
+ "str r4, [%[a], #456]\n\t"
+ "str r5, [%[a], #460]\n\t"
+ "ldr r2, [%[a], #464]\n\t"
+ "ldr r3, [%[a], #468]\n\t"
+ "ldr r4, [%[a], #472]\n\t"
+ "ldr r5, [%[a], #476]\n\t"
+ "ldr r6, [%[b], #464]\n\t"
+ "ldr r7, [%[b], #468]\n\t"
+ "ldr r8, [%[b], #472]\n\t"
+ "ldr r9, [%[b], #476]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #464]\n\t"
+ "str r3, [%[a], #468]\n\t"
+ "str r4, [%[a], #472]\n\t"
+ "str r5, [%[a], #476]\n\t"
+ "ldr r2, [%[a], #480]\n\t"
+ "ldr r3, [%[a], #484]\n\t"
+ "ldr r4, [%[a], #488]\n\t"
+ "ldr r5, [%[a], #492]\n\t"
+ "ldr r6, [%[b], #480]\n\t"
+ "ldr r7, [%[b], #484]\n\t"
+ "ldr r8, [%[b], #488]\n\t"
+ "ldr r9, [%[b], #492]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #480]\n\t"
+ "str r3, [%[a], #484]\n\t"
+ "str r4, [%[a], #488]\n\t"
+ "str r5, [%[a], #492]\n\t"
+ "ldr r2, [%[a], #496]\n\t"
+ "ldr r3, [%[a], #500]\n\t"
+ "ldr r4, [%[a], #504]\n\t"
+ "ldr r5, [%[a], #508]\n\t"
+ "ldr r6, [%[b], #496]\n\t"
+ "ldr r7, [%[b], #500]\n\t"
+ "ldr r8, [%[b], #504]\n\t"
+ "ldr r9, [%[b], #508]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #496]\n\t"
+ "str r3, [%[a], #500]\n\t"
+ "str r4, [%[a], #504]\n\t"
+ "str r5, [%[a], #508]\n\t"
+ "sbc %[c], r9, r9\n\t"
+ : [c] "+r" (c)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r12, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[a], #4]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "ldr r10, [%[b], #8]\n\t"
+ "ldr r14, [%[b], #12]\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r5, [%[r], #4]\n\t"
+ "str r6, [%[r], #8]\n\t"
+ "str r7, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[a], #20]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "ldr r10, [%[b], #24]\n\t"
+ "ldr r14, [%[b], #28]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "str r6, [%[r], #24]\n\t"
+ "str r7, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[a], #36]\n\t"
+ "ldr r6, [%[a], #40]\n\t"
+ "ldr r7, [%[a], #44]\n\t"
+ "ldr r8, [%[b], #32]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "ldr r10, [%[b], #40]\n\t"
+ "ldr r14, [%[b], #44]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r5, [%[r], #36]\n\t"
+ "str r6, [%[r], #40]\n\t"
+ "str r7, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[a], #52]\n\t"
+ "ldr r6, [%[a], #56]\n\t"
+ "ldr r7, [%[a], #60]\n\t"
+ "ldr r8, [%[b], #48]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "ldr r10, [%[b], #56]\n\t"
+ "ldr r14, [%[b], #60]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "str r5, [%[r], #52]\n\t"
+ "str r6, [%[r], #56]\n\t"
+ "str r7, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[a], #68]\n\t"
+ "ldr r6, [%[a], #72]\n\t"
+ "ldr r7, [%[a], #76]\n\t"
+ "ldr r8, [%[b], #64]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "ldr r10, [%[b], #72]\n\t"
+ "ldr r14, [%[b], #76]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "str r5, [%[r], #68]\n\t"
+ "str r6, [%[r], #72]\n\t"
+ "str r7, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[a], #84]\n\t"
+ "ldr r6, [%[a], #88]\n\t"
+ "ldr r7, [%[a], #92]\n\t"
+ "ldr r8, [%[b], #80]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "ldr r10, [%[b], #88]\n\t"
+ "ldr r14, [%[b], #92]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "str r5, [%[r], #84]\n\t"
+ "str r6, [%[r], #88]\n\t"
+ "str r7, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[a], #100]\n\t"
+ "ldr r6, [%[a], #104]\n\t"
+ "ldr r7, [%[a], #108]\n\t"
+ "ldr r8, [%[b], #96]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "ldr r10, [%[b], #104]\n\t"
+ "ldr r14, [%[b], #108]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "str r5, [%[r], #100]\n\t"
+ "str r6, [%[r], #104]\n\t"
+ "str r7, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[a], #116]\n\t"
+ "ldr r6, [%[a], #120]\n\t"
+ "ldr r7, [%[a], #124]\n\t"
+ "ldr r8, [%[b], #112]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "ldr r10, [%[b], #120]\n\t"
+ "ldr r14, [%[b], #124]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "str r5, [%[r], #116]\n\t"
+ "str r6, [%[r], #120]\n\t"
+ "str r7, [%[r], #124]\n\t"
+ "ldr r4, [%[a], #128]\n\t"
+ "ldr r5, [%[a], #132]\n\t"
+ "ldr r6, [%[a], #136]\n\t"
+ "ldr r7, [%[a], #140]\n\t"
+ "ldr r8, [%[b], #128]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "ldr r10, [%[b], #136]\n\t"
+ "ldr r14, [%[b], #140]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #128]\n\t"
+ "str r5, [%[r], #132]\n\t"
+ "str r6, [%[r], #136]\n\t"
+ "str r7, [%[r], #140]\n\t"
+ "ldr r4, [%[a], #144]\n\t"
+ "ldr r5, [%[a], #148]\n\t"
+ "ldr r6, [%[a], #152]\n\t"
+ "ldr r7, [%[a], #156]\n\t"
+ "ldr r8, [%[b], #144]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "ldr r10, [%[b], #152]\n\t"
+ "ldr r14, [%[b], #156]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #144]\n\t"
+ "str r5, [%[r], #148]\n\t"
+ "str r6, [%[r], #152]\n\t"
+ "str r7, [%[r], #156]\n\t"
+ "ldr r4, [%[a], #160]\n\t"
+ "ldr r5, [%[a], #164]\n\t"
+ "ldr r6, [%[a], #168]\n\t"
+ "ldr r7, [%[a], #172]\n\t"
+ "ldr r8, [%[b], #160]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "ldr r10, [%[b], #168]\n\t"
+ "ldr r14, [%[b], #172]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #160]\n\t"
+ "str r5, [%[r], #164]\n\t"
+ "str r6, [%[r], #168]\n\t"
+ "str r7, [%[r], #172]\n\t"
+ "ldr r4, [%[a], #176]\n\t"
+ "ldr r5, [%[a], #180]\n\t"
+ "ldr r6, [%[a], #184]\n\t"
+ "ldr r7, [%[a], #188]\n\t"
+ "ldr r8, [%[b], #176]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "ldr r10, [%[b], #184]\n\t"
+ "ldr r14, [%[b], #188]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #176]\n\t"
+ "str r5, [%[r], #180]\n\t"
+ "str r6, [%[r], #184]\n\t"
+ "str r7, [%[r], #188]\n\t"
+ "ldr r4, [%[a], #192]\n\t"
+ "ldr r5, [%[a], #196]\n\t"
+ "ldr r6, [%[a], #200]\n\t"
+ "ldr r7, [%[a], #204]\n\t"
+ "ldr r8, [%[b], #192]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "ldr r10, [%[b], #200]\n\t"
+ "ldr r14, [%[b], #204]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #192]\n\t"
+ "str r5, [%[r], #196]\n\t"
+ "str r6, [%[r], #200]\n\t"
+ "str r7, [%[r], #204]\n\t"
+ "ldr r4, [%[a], #208]\n\t"
+ "ldr r5, [%[a], #212]\n\t"
+ "ldr r6, [%[a], #216]\n\t"
+ "ldr r7, [%[a], #220]\n\t"
+ "ldr r8, [%[b], #208]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "ldr r10, [%[b], #216]\n\t"
+ "ldr r14, [%[b], #220]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #208]\n\t"
+ "str r5, [%[r], #212]\n\t"
+ "str r6, [%[r], #216]\n\t"
+ "str r7, [%[r], #220]\n\t"
+ "ldr r4, [%[a], #224]\n\t"
+ "ldr r5, [%[a], #228]\n\t"
+ "ldr r6, [%[a], #232]\n\t"
+ "ldr r7, [%[a], #236]\n\t"
+ "ldr r8, [%[b], #224]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "ldr r10, [%[b], #232]\n\t"
+ "ldr r14, [%[b], #236]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #224]\n\t"
+ "str r5, [%[r], #228]\n\t"
+ "str r6, [%[r], #232]\n\t"
+ "str r7, [%[r], #236]\n\t"
+ "ldr r4, [%[a], #240]\n\t"
+ "ldr r5, [%[a], #244]\n\t"
+ "ldr r6, [%[a], #248]\n\t"
+ "ldr r7, [%[a], #252]\n\t"
+ "ldr r8, [%[b], #240]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "ldr r10, [%[b], #248]\n\t"
+ "ldr r14, [%[b], #252]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #240]\n\t"
+ "str r5, [%[r], #244]\n\t"
+ "str r6, [%[r], #248]\n\t"
+ "str r7, [%[r], #252]\n\t"
+ "ldr r4, [%[a], #256]\n\t"
+ "ldr r5, [%[a], #260]\n\t"
+ "ldr r6, [%[a], #264]\n\t"
+ "ldr r7, [%[a], #268]\n\t"
+ "ldr r8, [%[b], #256]\n\t"
+ "ldr r9, [%[b], #260]\n\t"
+ "ldr r10, [%[b], #264]\n\t"
+ "ldr r14, [%[b], #268]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #256]\n\t"
+ "str r5, [%[r], #260]\n\t"
+ "str r6, [%[r], #264]\n\t"
+ "str r7, [%[r], #268]\n\t"
+ "ldr r4, [%[a], #272]\n\t"
+ "ldr r5, [%[a], #276]\n\t"
+ "ldr r6, [%[a], #280]\n\t"
+ "ldr r7, [%[a], #284]\n\t"
+ "ldr r8, [%[b], #272]\n\t"
+ "ldr r9, [%[b], #276]\n\t"
+ "ldr r10, [%[b], #280]\n\t"
+ "ldr r14, [%[b], #284]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #272]\n\t"
+ "str r5, [%[r], #276]\n\t"
+ "str r6, [%[r], #280]\n\t"
+ "str r7, [%[r], #284]\n\t"
+ "ldr r4, [%[a], #288]\n\t"
+ "ldr r5, [%[a], #292]\n\t"
+ "ldr r6, [%[a], #296]\n\t"
+ "ldr r7, [%[a], #300]\n\t"
+ "ldr r8, [%[b], #288]\n\t"
+ "ldr r9, [%[b], #292]\n\t"
+ "ldr r10, [%[b], #296]\n\t"
+ "ldr r14, [%[b], #300]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #288]\n\t"
+ "str r5, [%[r], #292]\n\t"
+ "str r6, [%[r], #296]\n\t"
+ "str r7, [%[r], #300]\n\t"
+ "ldr r4, [%[a], #304]\n\t"
+ "ldr r5, [%[a], #308]\n\t"
+ "ldr r6, [%[a], #312]\n\t"
+ "ldr r7, [%[a], #316]\n\t"
+ "ldr r8, [%[b], #304]\n\t"
+ "ldr r9, [%[b], #308]\n\t"
+ "ldr r10, [%[b], #312]\n\t"
+ "ldr r14, [%[b], #316]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #304]\n\t"
+ "str r5, [%[r], #308]\n\t"
+ "str r6, [%[r], #312]\n\t"
+ "str r7, [%[r], #316]\n\t"
+ "ldr r4, [%[a], #320]\n\t"
+ "ldr r5, [%[a], #324]\n\t"
+ "ldr r6, [%[a], #328]\n\t"
+ "ldr r7, [%[a], #332]\n\t"
+ "ldr r8, [%[b], #320]\n\t"
+ "ldr r9, [%[b], #324]\n\t"
+ "ldr r10, [%[b], #328]\n\t"
+ "ldr r14, [%[b], #332]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #320]\n\t"
+ "str r5, [%[r], #324]\n\t"
+ "str r6, [%[r], #328]\n\t"
+ "str r7, [%[r], #332]\n\t"
+ "ldr r4, [%[a], #336]\n\t"
+ "ldr r5, [%[a], #340]\n\t"
+ "ldr r6, [%[a], #344]\n\t"
+ "ldr r7, [%[a], #348]\n\t"
+ "ldr r8, [%[b], #336]\n\t"
+ "ldr r9, [%[b], #340]\n\t"
+ "ldr r10, [%[b], #344]\n\t"
+ "ldr r14, [%[b], #348]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #336]\n\t"
+ "str r5, [%[r], #340]\n\t"
+ "str r6, [%[r], #344]\n\t"
+ "str r7, [%[r], #348]\n\t"
+ "ldr r4, [%[a], #352]\n\t"
+ "ldr r5, [%[a], #356]\n\t"
+ "ldr r6, [%[a], #360]\n\t"
+ "ldr r7, [%[a], #364]\n\t"
+ "ldr r8, [%[b], #352]\n\t"
+ "ldr r9, [%[b], #356]\n\t"
+ "ldr r10, [%[b], #360]\n\t"
+ "ldr r14, [%[b], #364]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #352]\n\t"
+ "str r5, [%[r], #356]\n\t"
+ "str r6, [%[r], #360]\n\t"
+ "str r7, [%[r], #364]\n\t"
+ "ldr r4, [%[a], #368]\n\t"
+ "ldr r5, [%[a], #372]\n\t"
+ "ldr r6, [%[a], #376]\n\t"
+ "ldr r7, [%[a], #380]\n\t"
+ "ldr r8, [%[b], #368]\n\t"
+ "ldr r9, [%[b], #372]\n\t"
+ "ldr r10, [%[b], #376]\n\t"
+ "ldr r14, [%[b], #380]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #368]\n\t"
+ "str r5, [%[r], #372]\n\t"
+ "str r6, [%[r], #376]\n\t"
+ "str r7, [%[r], #380]\n\t"
+ "ldr r4, [%[a], #384]\n\t"
+ "ldr r5, [%[a], #388]\n\t"
+ "ldr r6, [%[a], #392]\n\t"
+ "ldr r7, [%[a], #396]\n\t"
+ "ldr r8, [%[b], #384]\n\t"
+ "ldr r9, [%[b], #388]\n\t"
+ "ldr r10, [%[b], #392]\n\t"
+ "ldr r14, [%[b], #396]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #384]\n\t"
+ "str r5, [%[r], #388]\n\t"
+ "str r6, [%[r], #392]\n\t"
+ "str r7, [%[r], #396]\n\t"
+ "ldr r4, [%[a], #400]\n\t"
+ "ldr r5, [%[a], #404]\n\t"
+ "ldr r6, [%[a], #408]\n\t"
+ "ldr r7, [%[a], #412]\n\t"
+ "ldr r8, [%[b], #400]\n\t"
+ "ldr r9, [%[b], #404]\n\t"
+ "ldr r10, [%[b], #408]\n\t"
+ "ldr r14, [%[b], #412]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #400]\n\t"
+ "str r5, [%[r], #404]\n\t"
+ "str r6, [%[r], #408]\n\t"
+ "str r7, [%[r], #412]\n\t"
+ "ldr r4, [%[a], #416]\n\t"
+ "ldr r5, [%[a], #420]\n\t"
+ "ldr r6, [%[a], #424]\n\t"
+ "ldr r7, [%[a], #428]\n\t"
+ "ldr r8, [%[b], #416]\n\t"
+ "ldr r9, [%[b], #420]\n\t"
+ "ldr r10, [%[b], #424]\n\t"
+ "ldr r14, [%[b], #428]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #416]\n\t"
+ "str r5, [%[r], #420]\n\t"
+ "str r6, [%[r], #424]\n\t"
+ "str r7, [%[r], #428]\n\t"
+ "ldr r4, [%[a], #432]\n\t"
+ "ldr r5, [%[a], #436]\n\t"
+ "ldr r6, [%[a], #440]\n\t"
+ "ldr r7, [%[a], #444]\n\t"
+ "ldr r8, [%[b], #432]\n\t"
+ "ldr r9, [%[b], #436]\n\t"
+ "ldr r10, [%[b], #440]\n\t"
+ "ldr r14, [%[b], #444]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #432]\n\t"
+ "str r5, [%[r], #436]\n\t"
+ "str r6, [%[r], #440]\n\t"
+ "str r7, [%[r], #444]\n\t"
+ "ldr r4, [%[a], #448]\n\t"
+ "ldr r5, [%[a], #452]\n\t"
+ "ldr r6, [%[a], #456]\n\t"
+ "ldr r7, [%[a], #460]\n\t"
+ "ldr r8, [%[b], #448]\n\t"
+ "ldr r9, [%[b], #452]\n\t"
+ "ldr r10, [%[b], #456]\n\t"
+ "ldr r14, [%[b], #460]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #448]\n\t"
+ "str r5, [%[r], #452]\n\t"
+ "str r6, [%[r], #456]\n\t"
+ "str r7, [%[r], #460]\n\t"
+ "ldr r4, [%[a], #464]\n\t"
+ "ldr r5, [%[a], #468]\n\t"
+ "ldr r6, [%[a], #472]\n\t"
+ "ldr r7, [%[a], #476]\n\t"
+ "ldr r8, [%[b], #464]\n\t"
+ "ldr r9, [%[b], #468]\n\t"
+ "ldr r10, [%[b], #472]\n\t"
+ "ldr r14, [%[b], #476]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #464]\n\t"
+ "str r5, [%[r], #468]\n\t"
+ "str r6, [%[r], #472]\n\t"
+ "str r7, [%[r], #476]\n\t"
+ "ldr r4, [%[a], #480]\n\t"
+ "ldr r5, [%[a], #484]\n\t"
+ "ldr r6, [%[a], #488]\n\t"
+ "ldr r7, [%[a], #492]\n\t"
+ "ldr r8, [%[b], #480]\n\t"
+ "ldr r9, [%[b], #484]\n\t"
+ "ldr r10, [%[b], #488]\n\t"
+ "ldr r14, [%[b], #492]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #480]\n\t"
+ "str r5, [%[r], #484]\n\t"
+ "str r6, [%[r], #488]\n\t"
+ "str r7, [%[r], #492]\n\t"
+ "ldr r4, [%[a], #496]\n\t"
+ "ldr r5, [%[a], #500]\n\t"
+ "ldr r6, [%[a], #504]\n\t"
+ "ldr r7, [%[a], #508]\n\t"
+ "ldr r8, [%[b], #496]\n\t"
+ "ldr r9, [%[b], #500]\n\t"
+ "ldr r10, [%[b], #504]\n\t"
+ "ldr r14, [%[b], #508]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #496]\n\t"
+ "str r5, [%[r], #500]\n\t"
+ "str r6, [%[r], #504]\n\t"
+ "str r7, [%[r], #508]\n\t"
+ "adc %[c], r12, r12\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #256\n\t"
+ "mov r10, #0\n\t"
+ "# A[0] * B[0]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r3, r4, r8, r9\n\t"
+ "mov r5, #0\n\t"
+ "str r3, [sp]\n\t"
+ "# A[0] * B[1]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[0]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #4]\n\t"
+ "# A[0] * B[2]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[1]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[0]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #8]\n\t"
+ "# A[0] * B[3]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[2]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[1]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[0]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #12]\n\t"
+ "# A[0] * B[4]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[3]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[2]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[1]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[0]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #16]\n\t"
+ "# A[0] * B[5]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[4]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[3]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[2]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[1]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[0]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #20]\n\t"
+ "# A[0] * B[6]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[5]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[4]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[3]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[2]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[1]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[0]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #24]\n\t"
+ "# A[0] * B[7]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[6]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[5]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[4]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[3]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[2]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[1]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[0]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #28]\n\t"
+ "# A[0] * B[8]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[7]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[6]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[5]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[4]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[3]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[2]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[1]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[0]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #32]\n\t"
+ "# A[0] * B[9]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[8]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[7]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[6]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[5]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[4]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[3]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[2]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[1]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[0]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #36]\n\t"
+ "# A[0] * B[10]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[9]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[8]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[7]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[6]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[5]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[4]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[3]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[2]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[1]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[0]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #40]\n\t"
+ "# A[0] * B[11]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[10]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[9]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[8]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[7]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[6]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[5]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[4]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[3]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[2]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[1]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[0]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #44]\n\t"
+ "# A[0] * B[12]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[11]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[10]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[9]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[8]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[7]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[6]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[5]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[4]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[3]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[2]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[1]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[0]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #48]\n\t"
+ "# A[0] * B[13]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[12]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[11]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[10]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[9]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[8]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[7]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[6]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[5]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[4]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[3]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[2]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[1]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[0]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #52]\n\t"
+ "# A[0] * B[14]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[13]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[12]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[11]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[10]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[9]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[8]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[7]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[6]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[5]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[4]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[3]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[12] * B[2]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[1]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[0]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #56]\n\t"
+ "# A[0] * B[15]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[14]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[13]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[12]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[11]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[10]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[9]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[8]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[7]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[6]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[5]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[4]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[3]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[13] * B[2]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[1]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[0]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #60]\n\t"
+ "# A[0] * B[16]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[15]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[14]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[13]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[12]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[11]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[10]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[9]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[8]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[7]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[6]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[5]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[4]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[3]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[14] * B[2]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[1]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[0]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #64]\n\t"
+ "# A[0] * B[17]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[16]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[15]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[14]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[13]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[12]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[11]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[10]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[9]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[8]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[7]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[6]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[12] * B[5]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[4]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[3]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[15] * B[2]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[1]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[0]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #68]\n\t"
+ "# A[0] * B[18]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[17]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[16]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[15]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[14]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[13]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[12]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[11]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[10]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[9]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[8]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[7]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[6]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[13] * B[5]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[4]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[3]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[16] * B[2]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[1]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[0]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #72]\n\t"
+ "# A[0] * B[19]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[18]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[17]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[16]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[15]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[14]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[13]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[12]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[11]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[10]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[9]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[8]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[7]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[6]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[14] * B[5]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[4]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[3]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[17] * B[2]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[1]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[0]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #76]\n\t"
+ "# A[0] * B[20]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[19]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[18]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[17]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[16]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[15]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[14]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[13]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[12]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[11]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[10]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[9]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[12] * B[8]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[7]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[6]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[15] * B[5]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[4]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[3]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[18] * B[2]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[1]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[0]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #80]\n\t"
+ "# A[0] * B[21]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[20]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[19]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[18]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[17]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[16]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[15]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[14]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[13]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[12]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[11]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[10]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[9]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[13] * B[8]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[7]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[6]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[16] * B[5]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[4]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[3]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[19] * B[2]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[1]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[0]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #84]\n\t"
+ "# A[0] * B[22]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[21]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[20]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[19]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[18]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[17]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[16]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[15]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[14]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[13]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[12]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[11]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[10]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[9]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[14] * B[8]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[7]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[6]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[17] * B[5]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[4]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[3]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[20] * B[2]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[1]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[0]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #88]\n\t"
+ "# A[0] * B[23]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[22]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[21]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[20]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[19]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[18]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[17]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[16]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[15]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[14]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[13]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[12]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[12] * B[11]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[10]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[9]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[15] * B[8]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[7]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[6]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[18] * B[5]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[4]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[3]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[21] * B[2]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[1]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[0]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #92]\n\t"
+ "# A[0] * B[24]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[23]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[22]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[21]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[20]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[19]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[18]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[17]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[16]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[15]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[14]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[13]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[12]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[13] * B[11]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[10]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[9]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[16] * B[8]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[7]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[6]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[19] * B[5]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[4]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[3]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[22] * B[2]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[1]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[0]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #96]\n\t"
+ "# A[0] * B[25]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[24]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[23]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[22]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[21]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[20]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[19]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[18]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[17]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[16]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[15]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[14]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[13]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[12]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[14] * B[11]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[10]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[9]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[17] * B[8]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[7]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[6]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[20] * B[5]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[4]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[3]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[23] * B[2]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[1]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[0]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #100]\n\t"
+ "# A[0] * B[26]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[25]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[24]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[23]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[22]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[21]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[20]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[19]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[18]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[17]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[16]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[15]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[12] * B[14]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[13]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[12]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[15] * B[11]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[10]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[9]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[18] * B[8]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[7]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[6]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[21] * B[5]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[4]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[3]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[24] * B[2]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[1]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[0]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #104]\n\t"
+ "# A[0] * B[27]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[26]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[25]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[24]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[23]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[22]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[21]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[20]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[19]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[18]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[17]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[16]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[15]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[13] * B[14]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[13]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[12]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[16] * B[11]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[10]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[9]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[19] * B[8]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[7]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[6]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[22] * B[5]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[4]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[3]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[25] * B[2]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[1]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[0]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #108]\n\t"
+ "# A[0] * B[28]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[27]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[26]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[25]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[24]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[23]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[22]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[21]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[20]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[19]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[18]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[17]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[16]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[15]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[14] * B[14]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[13]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[12]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[17] * B[11]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[10]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[9]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[20] * B[8]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[7]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[6]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[23] * B[5]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[4]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[3]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[26] * B[2]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[1]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[0]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #112]\n\t"
+ "# A[0] * B[29]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[28]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[27]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[26]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[25]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[24]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[23]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[22]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[21]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[20]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[19]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[18]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[12] * B[17]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[16]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[15]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[15] * B[14]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[13]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[12]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[18] * B[11]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[10]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[9]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[21] * B[8]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[7]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[6]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[24] * B[5]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[4]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[3]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[27] * B[2]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[1]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[0]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #116]\n\t"
+ "# A[0] * B[30]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[29]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[28]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[27]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[26]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[25]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[24]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[23]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[22]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[21]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[20]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[19]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[18]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[13] * B[17]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[16]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[15]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[16] * B[14]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[13]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[12]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[19] * B[11]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[10]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[9]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[22] * B[8]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[7]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[6]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[25] * B[5]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[4]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[3]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[28] * B[2]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[1]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[0]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #120]\n\t"
+ "# A[0] * B[31]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[30]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[29]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[28]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[27]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[26]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[25]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[24]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[23]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[22]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[21]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[20]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[19]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[18]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[14] * B[17]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[16]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[15]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[17] * B[14]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[13]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[12]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[20] * B[11]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[10]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[9]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[23] * B[8]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[7]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[6]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[26] * B[5]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[4]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[3]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[29] * B[2]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[1]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[0]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #124]\n\t"
+ "# A[0] * B[32]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[31]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[30]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[29]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[28]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[27]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[26]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[25]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[24]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[23]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[22]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[21]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[12] * B[20]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[19]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[18]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[15] * B[17]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[16]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[15]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[18] * B[14]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[13]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[12]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[21] * B[11]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[10]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[9]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[24] * B[8]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[7]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[6]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[27] * B[5]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[4]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[3]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[30] * B[2]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[1]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[0]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #128]\n\t"
+ "# A[0] * B[33]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[32]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[31]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[30]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[29]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[28]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[27]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[26]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[25]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[24]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[23]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[22]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[21]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[13] * B[20]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[19]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[18]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[16] * B[17]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[16]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[15]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[19] * B[14]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[13]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[12]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[22] * B[11]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[10]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[9]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[25] * B[8]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[7]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[6]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[28] * B[5]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[4]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[3]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[31] * B[2]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[1]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[0]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #132]\n\t"
+ "# A[0] * B[34]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[33]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[32]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[31]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[30]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[29]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[28]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[27]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[26]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[25]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[24]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[23]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[22]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[21]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[14] * B[20]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[19]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[18]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[17] * B[17]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[16]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[15]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[20] * B[14]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[13]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[12]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[23] * B[11]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[10]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[9]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[26] * B[8]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[7]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[6]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[29] * B[5]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[4]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[3]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[32] * B[2]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[1]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[0]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #136]\n\t"
+ "# A[0] * B[35]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[34]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[33]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[32]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[31]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[30]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[29]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[28]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[27]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[26]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[25]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[24]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[12] * B[23]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[22]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[21]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[15] * B[20]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[19]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[18]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[18] * B[17]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[16]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[15]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[21] * B[14]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[13]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[12]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[24] * B[11]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[10]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[9]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[27] * B[8]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[7]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[6]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[30] * B[5]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[4]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[3]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[33] * B[2]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[1]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[0]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #140]\n\t"
+ "# A[0] * B[36]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[35]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[34]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[33]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[32]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[31]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[30]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[29]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[28]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[27]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[26]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[25]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[24]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[13] * B[23]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[22]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[21]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[16] * B[20]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[19]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[18]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[19] * B[17]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[16]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[15]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[22] * B[14]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[13]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[12]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[25] * B[11]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[10]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[9]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[28] * B[8]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[7]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[6]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[31] * B[5]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[4]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[3]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[34] * B[2]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[1]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[0]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #144]\n\t"
+ "# A[0] * B[37]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[36]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[35]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[34]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[33]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[32]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[31]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[30]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[29]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[28]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[27]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[26]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[25]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[24]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[14] * B[23]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[22]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[21]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[17] * B[20]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[19]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[18]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[20] * B[17]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[16]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[15]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[23] * B[14]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[13]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[12]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[26] * B[11]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[10]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[9]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[29] * B[8]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[7]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[6]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[32] * B[5]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[4]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[3]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[35] * B[2]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[1]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[0]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #148]\n\t"
+ "# A[0] * B[38]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[37]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[36]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[35]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[34]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[33]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[32]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[31]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[30]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[29]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[28]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[27]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[12] * B[26]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[25]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[24]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[15] * B[23]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[22]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[21]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[18] * B[20]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[19]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[18]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[21] * B[17]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[16]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[15]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[24] * B[14]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[13]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[12]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[27] * B[11]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[10]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[9]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[30] * B[8]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[7]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[6]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[33] * B[5]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[4]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[3]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[36] * B[2]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[1]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[0]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #152]\n\t"
+ "# A[0] * B[39]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[38]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[37]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[36]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[35]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[34]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[33]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[32]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[31]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[30]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[29]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[28]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[27]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[13] * B[26]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[25]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[24]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[16] * B[23]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[22]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[21]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[19] * B[20]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[19]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[18]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[22] * B[17]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[16]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[15]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[25] * B[14]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[13]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[12]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[28] * B[11]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[10]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[9]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[31] * B[8]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[7]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[6]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[34] * B[5]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[4]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[3]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[37] * B[2]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[1]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[0]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #156]\n\t"
+ "# A[0] * B[40]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[39]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[38]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[37]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[36]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[35]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[34]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[33]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[32]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[31]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[30]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[29]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[28]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[27]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[14] * B[26]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[25]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[24]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[17] * B[23]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[22]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[21]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[20] * B[20]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[19]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[18]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[23] * B[17]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[16]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[15]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[26] * B[14]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[13]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[12]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[29] * B[11]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[10]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[9]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[32] * B[8]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[7]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[6]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[35] * B[5]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[4]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[3]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[38] * B[2]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[1]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[0]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #160]\n\t"
+ "# A[0] * B[41]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[40]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[39]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[38]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[37]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[36]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[35]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[34]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[33]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[32]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[31]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[30]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[12] * B[29]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[28]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[27]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[15] * B[26]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[25]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[24]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[18] * B[23]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[22]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[21]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[21] * B[20]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[19]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[18]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[24] * B[17]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[16]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[15]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[27] * B[14]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[13]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[12]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[30] * B[11]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[10]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[9]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[33] * B[8]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[7]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[6]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[36] * B[5]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[4]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[3]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[39] * B[2]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[1]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[0]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #164]\n\t"
+ "# A[0] * B[42]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[41]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[40]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[39]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[38]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[37]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[36]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[35]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[34]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[33]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[32]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[31]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[30]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[13] * B[29]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[28]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[27]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[16] * B[26]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[25]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[24]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[19] * B[23]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[22]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[21]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[22] * B[20]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[19]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[18]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[25] * B[17]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[16]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[15]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[28] * B[14]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[13]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[12]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[31] * B[11]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[10]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[9]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[34] * B[8]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[7]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[6]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[37] * B[5]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[4]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[3]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[40] * B[2]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[1]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[0]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #168]\n\t"
+ "# A[0] * B[43]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[42]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[41]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[40]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[39]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[38]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[37]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[36]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[35]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[34]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[33]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[32]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[31]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[30]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[14] * B[29]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[28]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[27]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[17] * B[26]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[25]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[24]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[20] * B[23]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[22]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[21]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[23] * B[20]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[19]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[18]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[26] * B[17]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[16]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[15]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[29] * B[14]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[13]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[12]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[32] * B[11]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[10]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[9]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[35] * B[8]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[7]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[6]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[38] * B[5]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[4]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[3]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[41] * B[2]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[1]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[0]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #172]\n\t"
+ "# A[0] * B[44]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[43]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[42]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[41]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[40]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[39]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[38]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[37]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[36]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[35]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[34]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[33]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[12] * B[32]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[31]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[30]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[15] * B[29]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[28]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[27]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[18] * B[26]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[25]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[24]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[21] * B[23]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[22]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[21]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[24] * B[20]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[19]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[18]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[27] * B[17]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[16]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[15]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[30] * B[14]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[13]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[12]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[33] * B[11]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[10]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[9]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[36] * B[8]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[7]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[6]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[39] * B[5]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[4]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[3]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[42] * B[2]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[1]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[0]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #176]\n\t"
+ "# A[0] * B[45]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[44]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[43]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[42]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[41]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[40]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[39]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[38]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[37]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[36]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[35]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[34]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[33]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[13] * B[32]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[31]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[30]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[16] * B[29]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[28]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[27]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[19] * B[26]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[25]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[24]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[22] * B[23]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[22]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[21]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[25] * B[20]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[19]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[18]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[28] * B[17]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[16]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[15]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[31] * B[14]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[13]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[12]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[34] * B[11]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[10]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[9]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[37] * B[8]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[7]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[6]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[40] * B[5]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[4]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[3]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[43] * B[2]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[1]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[0]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #180]\n\t"
+ "# A[0] * B[46]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[45]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[44]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[43]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[42]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[41]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[40]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[39]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[38]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[37]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[36]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[35]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[34]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[33]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[14] * B[32]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[31]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[30]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[17] * B[29]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[28]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[27]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[20] * B[26]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[25]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[24]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[23] * B[23]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[22]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[21]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[26] * B[20]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[19]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[18]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[29] * B[17]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[16]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[15]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[32] * B[14]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[13]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[12]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[35] * B[11]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[10]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[9]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[38] * B[8]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[7]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[6]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[41] * B[5]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[4]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[3]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[44] * B[2]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[1]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[0]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #184]\n\t"
+ "# A[0] * B[47]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[46]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[45]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[44]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[43]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[42]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[41]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[40]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[39]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[38]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[37]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[36]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[12] * B[35]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[34]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[33]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[15] * B[32]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[31]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[30]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[18] * B[29]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[28]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[27]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[21] * B[26]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[25]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[24]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[24] * B[23]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[22]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[21]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[27] * B[20]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[19]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[18]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[30] * B[17]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[16]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[15]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[33] * B[14]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[13]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[12]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[36] * B[11]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[10]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[9]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[39] * B[8]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[7]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[6]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[42] * B[5]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[4]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[3]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[45] * B[2]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[1]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[0]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #188]\n\t"
+ "# A[0] * B[48]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[47]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[46]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[45]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[44]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[43]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[42]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[41]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[40]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[39]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[38]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[37]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[36]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[13] * B[35]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[34]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[33]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[16] * B[32]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[31]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[30]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[19] * B[29]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[28]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[27]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[22] * B[26]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[25]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[24]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[25] * B[23]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[22]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[21]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[28] * B[20]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[19]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[18]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[31] * B[17]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[16]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[15]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[34] * B[14]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[13]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[12]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[37] * B[11]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[10]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[9]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[40] * B[8]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[7]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[6]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[43] * B[5]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[4]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[3]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[46] * B[2]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[1]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[0]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #192]\n\t"
+ "# A[0] * B[49]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[48]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[47]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[46]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[45]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[44]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[43]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[42]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[41]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[40]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[39]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[38]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[37]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[36]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[14] * B[35]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[34]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[33]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[17] * B[32]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[31]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[30]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[20] * B[29]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[28]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[27]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[23] * B[26]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[25]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[24]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[26] * B[23]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[22]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[21]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[29] * B[20]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[19]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[18]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[32] * B[17]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[16]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[15]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[35] * B[14]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[13]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[12]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[38] * B[11]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[10]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[9]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[41] * B[8]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[7]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[6]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[44] * B[5]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[4]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[3]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[47] * B[2]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[1]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[0]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #196]\n\t"
+ "# A[0] * B[50]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[49]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[48]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[47]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[46]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[45]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[44]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[43]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[42]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[41]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[40]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[39]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[12] * B[38]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[37]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[36]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[15] * B[35]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[34]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[33]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[18] * B[32]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[31]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[30]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[21] * B[29]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[28]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[27]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[24] * B[26]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[25]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[24]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[27] * B[23]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[22]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[21]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[30] * B[20]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[19]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[18]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[33] * B[17]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[16]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[15]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[36] * B[14]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[13]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[12]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[39] * B[11]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[10]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[9]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[42] * B[8]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[7]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[6]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[45] * B[5]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[4]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[3]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[48] * B[2]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[1]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[0]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #200]\n\t"
+ "# A[0] * B[51]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[50]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[49]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[48]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[47]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[46]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[45]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[44]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[43]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[42]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[41]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[40]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[39]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[13] * B[38]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[37]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[36]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[16] * B[35]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[34]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[33]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[19] * B[32]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[31]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[30]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[22] * B[29]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[28]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[27]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[25] * B[26]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[25]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[24]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[28] * B[23]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[22]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[21]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[31] * B[20]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[19]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[18]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[34] * B[17]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[16]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[15]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[37] * B[14]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[13]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[12]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[40] * B[11]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[10]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[9]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[43] * B[8]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[7]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[6]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[46] * B[5]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[4]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[3]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[49] * B[2]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[1]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[0]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #204]\n\t"
+ "# A[0] * B[52]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[51]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[50]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[49]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[48]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[47]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[46]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[45]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[44]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[43]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[42]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[41]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[40]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[39]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[14] * B[38]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[37]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[36]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[17] * B[35]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[34]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[33]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[20] * B[32]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[31]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[30]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[23] * B[29]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[28]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[27]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[26] * B[26]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[25]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[24]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[29] * B[23]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[22]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[21]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[32] * B[20]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[19]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[18]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[35] * B[17]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[16]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[15]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[38] * B[14]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[13]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[12]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[41] * B[11]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[10]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[9]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[44] * B[8]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[7]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[6]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[47] * B[5]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[4]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[3]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[50] * B[2]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[1]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[0]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #208]\n\t"
+ "# A[0] * B[53]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[52]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[51]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[50]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[49]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[48]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[47]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[46]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[45]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[44]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[43]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[42]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[12] * B[41]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[40]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[39]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[15] * B[38]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[37]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[36]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[18] * B[35]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[34]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[33]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[21] * B[32]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[31]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[30]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[24] * B[29]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[28]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[27]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[27] * B[26]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[25]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[24]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[30] * B[23]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[22]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[21]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[33] * B[20]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[19]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[18]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[36] * B[17]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[16]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[15]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[39] * B[14]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[13]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[12]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[42] * B[11]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[10]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[9]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[45] * B[8]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[7]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[6]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[48] * B[5]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[4]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[3]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[51] * B[2]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[1]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[0]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #212]\n\t"
+ "# A[0] * B[54]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[53]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[52]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[51]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[50]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[49]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[48]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[47]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[46]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[45]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[44]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[43]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[42]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[13] * B[41]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[40]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[39]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[16] * B[38]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[37]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[36]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[19] * B[35]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[34]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[33]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[22] * B[32]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[31]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[30]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[25] * B[29]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[28]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[27]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[28] * B[26]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[25]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[24]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[31] * B[23]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[22]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[21]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[34] * B[20]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[19]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[18]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[37] * B[17]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[16]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[15]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[40] * B[14]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[13]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[12]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[43] * B[11]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[10]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[9]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[46] * B[8]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[7]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[6]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[49] * B[5]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[4]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[3]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[52] * B[2]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[1]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[0]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #216]\n\t"
+ "# A[0] * B[55]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[54]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[53]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[52]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[51]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[50]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[49]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[48]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[47]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[46]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[45]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[44]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[43]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[42]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[14] * B[41]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[40]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[39]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[17] * B[38]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[37]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[36]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[20] * B[35]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[34]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[33]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[23] * B[32]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[31]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[30]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[26] * B[29]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[28]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[27]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[29] * B[26]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[25]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[24]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[32] * B[23]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[22]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[21]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[35] * B[20]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[19]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[18]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[38] * B[17]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[16]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[15]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[41] * B[14]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[13]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[12]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[44] * B[11]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[10]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[9]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[47] * B[8]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[7]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[6]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[50] * B[5]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[4]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[3]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[53] * B[2]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[1]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[0]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #220]\n\t"
+ "# A[0] * B[56]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[55]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[54]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[53]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[52]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[51]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[50]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[49]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[48]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[47]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[46]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[45]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[12] * B[44]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[43]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[42]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[15] * B[41]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[40]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[39]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[18] * B[38]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[37]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[36]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[21] * B[35]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[34]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[33]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[24] * B[32]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[31]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[30]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[27] * B[29]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[28]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[27]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[30] * B[26]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[25]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[24]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[33] * B[23]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[22]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[21]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[36] * B[20]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[19]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[18]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[39] * B[17]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[16]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[15]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[42] * B[14]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[13]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[12]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[45] * B[11]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[10]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[9]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[48] * B[8]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[7]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[6]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[51] * B[5]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[4]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[3]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[54] * B[2]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[1]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[0]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #224]\n\t"
+ "# A[0] * B[57]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[56]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[55]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[54]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[53]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[52]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[51]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[50]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[49]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[48]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[47]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[46]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[45]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[13] * B[44]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[43]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[42]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[16] * B[41]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[40]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[39]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[19] * B[38]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[37]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[36]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[22] * B[35]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[34]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[33]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[25] * B[32]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[31]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[30]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[28] * B[29]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[28]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[27]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[31] * B[26]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[25]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[24]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[34] * B[23]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[22]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[21]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[37] * B[20]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[19]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[18]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[40] * B[17]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[16]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[15]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[43] * B[14]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[13]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[12]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[46] * B[11]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[10]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[9]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[49] * B[8]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[7]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[6]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[52] * B[5]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[4]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[3]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[55] * B[2]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[1]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[0]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #228]\n\t"
+ "# A[0] * B[58]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[57]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[56]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[55]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[54]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[53]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[52]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[51]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[50]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[49]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[48]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[47]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[46]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[45]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[14] * B[44]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[43]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[42]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[17] * B[41]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[40]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[39]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[20] * B[38]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[37]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[36]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[23] * B[35]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[34]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[33]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[26] * B[32]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[31]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[30]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[29] * B[29]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[28]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[27]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[32] * B[26]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[25]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[24]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[35] * B[23]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[22]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[21]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[38] * B[20]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[19]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[18]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[41] * B[17]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[16]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[15]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[44] * B[14]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[13]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[12]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[47] * B[11]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[10]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[9]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[50] * B[8]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[7]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[6]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[53] * B[5]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[4]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[3]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[56] * B[2]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[1]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[0]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #232]\n\t"
+ "# A[0] * B[59]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[58]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[57]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[56]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[55]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[54]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[53]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[52]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[51]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[50]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[49]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[48]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[12] * B[47]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[46]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[45]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[15] * B[44]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[43]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[42]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[18] * B[41]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[40]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[39]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[21] * B[38]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[37]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[36]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[24] * B[35]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[34]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[33]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[27] * B[32]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[31]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[30]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[30] * B[29]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[28]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[27]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[33] * B[26]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[25]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[24]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[36] * B[23]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[22]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[21]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[39] * B[20]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[19]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[18]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[42] * B[17]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[16]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[15]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[45] * B[14]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[13]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[12]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[48] * B[11]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[10]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[9]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[51] * B[8]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[7]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[6]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[54] * B[5]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[4]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[3]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[57] * B[2]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[1]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[0]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #236]\n\t"
+ "# A[0] * B[60]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[59]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[58]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[57]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[56]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[55]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[54]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[53]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[52]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[51]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[50]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[49]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[48]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[13] * B[47]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[46]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[45]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[16] * B[44]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[43]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[42]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[19] * B[41]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[40]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[39]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[22] * B[38]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[37]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[36]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[25] * B[35]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[34]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[33]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[28] * B[32]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[31]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[30]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[31] * B[29]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[28]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[27]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[34] * B[26]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[25]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[24]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[37] * B[23]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[22]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[21]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[40] * B[20]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[19]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[18]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[43] * B[17]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[16]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[15]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[46] * B[14]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[13]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[12]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[49] * B[11]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[10]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[9]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[52] * B[8]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[7]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[6]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[55] * B[5]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[4]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[3]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[58] * B[2]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[1]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[0]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #240]\n\t"
+ "# A[0] * B[61]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[60]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[59]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[58]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[57]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[56]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[55]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[54]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[53]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[52]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[51]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[50]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[49]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[48]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[14] * B[47]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[46]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[45]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[17] * B[44]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[43]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[42]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[20] * B[41]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[40]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[39]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[23] * B[38]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[37]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[36]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[26] * B[35]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[34]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[33]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[29] * B[32]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[31]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[30]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[32] * B[29]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[28]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[27]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[35] * B[26]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[25]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[24]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[38] * B[23]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[22]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[21]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[41] * B[20]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[19]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[18]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[44] * B[17]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[16]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[15]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[47] * B[14]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[13]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[12]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[50] * B[11]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[10]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[9]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[53] * B[8]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[7]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[6]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[56] * B[5]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[4]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[3]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[59] * B[2]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[1]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[0]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #244]\n\t"
+ "# A[0] * B[62]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[61]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[60]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[59]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[58]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[57]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[56]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[55]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[54]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[53]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[52]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[51]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[12] * B[50]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[49]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[48]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[15] * B[47]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[46]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[45]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[18] * B[44]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[43]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[42]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[21] * B[41]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[40]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[39]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[24] * B[38]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[37]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[36]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[27] * B[35]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[34]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[33]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[30] * B[32]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[31]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[30]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[33] * B[29]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[28]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[27]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[36] * B[26]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[25]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[24]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[39] * B[23]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[22]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[21]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[42] * B[20]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[19]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[18]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[45] * B[17]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[16]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[15]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[48] * B[14]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[13]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[12]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[51] * B[11]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[10]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[9]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[54] * B[8]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[7]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[6]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[57] * B[5]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[4]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[3]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[60] * B[2]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[1]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[0]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #248]\n\t"
+ "# A[0] * B[63]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[62]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[61]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[60]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[59]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[58]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[57]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[56]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[55]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[54]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[53]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[52]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[51]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[13] * B[50]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[49]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[48]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[16] * B[47]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[46]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[45]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[19] * B[44]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[43]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[42]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[22] * B[41]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[40]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[39]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[25] * B[38]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[37]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[36]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[28] * B[35]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[34]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[33]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[31] * B[32]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[31]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[30]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[34] * B[29]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[28]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[27]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[37] * B[26]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[25]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[24]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[40] * B[23]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[22]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[21]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[43] * B[20]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[19]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[18]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[46] * B[17]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[16]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[15]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[49] * B[14]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[13]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[12]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[52] * B[11]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[10]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[9]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[55] * B[8]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[7]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[6]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[58] * B[5]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[4]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[3]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[61] * B[2]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[1]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[0]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #252]\n\t"
+ "# A[1] * B[63]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[2] * B[62]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[61]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[60]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[59]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[58]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[57]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[56]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[55]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[54]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[53]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[52]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[51]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[14] * B[50]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[49]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[48]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[17] * B[47]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[46]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[45]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[20] * B[44]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[43]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[42]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[23] * B[41]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[40]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[39]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[26] * B[38]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[37]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[36]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[29] * B[35]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[34]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[33]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[32] * B[32]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[31]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[30]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[35] * B[29]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[28]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[27]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[38] * B[26]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[25]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[24]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[41] * B[23]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[22]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[21]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[44] * B[20]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[19]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[18]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[47] * B[17]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[16]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[15]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[50] * B[14]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[13]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[12]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[53] * B[11]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[10]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[9]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[56] * B[8]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[7]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[6]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[59] * B[5]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[4]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[3]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[62] * B[2]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[1]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #256]\n\t"
+ "# A[2] * B[63]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[3] * B[62]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[61]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[60]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[59]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[58]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[57]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[56]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[55]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[54]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[12] * B[53]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[52]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[51]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[15] * B[50]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[49]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[48]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[18] * B[47]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[46]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[45]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[21] * B[44]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[43]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[42]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[24] * B[41]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[40]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[39]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[27] * B[38]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[37]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[36]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[30] * B[35]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[34]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[33]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[33] * B[32]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[31]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[30]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[36] * B[29]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[28]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[27]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[39] * B[26]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[25]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[24]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[42] * B[23]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[22]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[21]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[45] * B[20]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[19]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[18]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[48] * B[17]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[16]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[15]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[51] * B[14]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[13]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[12]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[54] * B[11]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[10]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[9]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[57] * B[8]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[7]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[6]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[60] * B[5]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[4]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[3]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[63] * B[2]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #260]\n\t"
+ "# A[3] * B[63]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[4] * B[62]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[61]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[60]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[59]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[58]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[57]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[56]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[55]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[54]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[13] * B[53]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[52]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[51]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[16] * B[50]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[49]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[48]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[19] * B[47]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[46]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[45]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[22] * B[44]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[43]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[42]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[25] * B[41]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[40]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[39]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[28] * B[38]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[37]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[36]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[31] * B[35]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[34]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[33]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[34] * B[32]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[31]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[30]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[37] * B[29]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[28]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[27]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[40] * B[26]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[25]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[24]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[43] * B[23]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[22]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[21]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[46] * B[20]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[19]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[18]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[49] * B[17]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[16]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[15]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[52] * B[14]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[13]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[12]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[55] * B[11]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[10]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[9]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[58] * B[8]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[7]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[6]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[61] * B[5]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[4]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[3]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #264]\n\t"
+ "# A[4] * B[63]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[5] * B[62]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[61]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[60]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[59]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[58]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[57]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[56]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[55]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[54]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[14] * B[53]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[52]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[51]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[17] * B[50]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[49]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[48]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[20] * B[47]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[46]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[45]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[23] * B[44]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[43]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[42]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[26] * B[41]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[40]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[39]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[29] * B[38]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[37]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[36]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[32] * B[35]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[34]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[33]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[35] * B[32]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[31]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[30]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[38] * B[29]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[28]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[27]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[41] * B[26]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[25]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[24]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[44] * B[23]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[22]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[21]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[47] * B[20]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[19]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[18]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[50] * B[17]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[16]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[15]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[53] * B[14]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[13]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[12]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[56] * B[11]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[10]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[9]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[59] * B[8]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[7]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[6]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[62] * B[5]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[4]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #268]\n\t"
+ "# A[5] * B[63]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[6] * B[62]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[61]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[60]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[59]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[58]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[57]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[12] * B[56]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[55]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[54]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[15] * B[53]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[52]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[51]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[18] * B[50]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[49]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[48]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[21] * B[47]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[46]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[45]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[24] * B[44]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[43]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[42]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[27] * B[41]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[40]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[39]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[30] * B[38]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[37]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[36]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[33] * B[35]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[34]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[33]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[36] * B[32]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[31]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[30]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[39] * B[29]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[28]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[27]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[42] * B[26]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[25]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[24]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[45] * B[23]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[22]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[21]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[48] * B[20]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[19]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[18]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[51] * B[17]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[16]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[15]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[54] * B[14]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[13]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[12]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[57] * B[11]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[10]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[9]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[60] * B[8]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[7]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[6]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[63] * B[5]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #272]\n\t"
+ "# A[6] * B[63]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[7] * B[62]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[61]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[60]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[59]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[58]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[57]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[13] * B[56]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[55]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[54]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[16] * B[53]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[52]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[51]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[19] * B[50]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[49]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[48]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[22] * B[47]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[46]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[45]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[25] * B[44]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[43]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[42]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[28] * B[41]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[40]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[39]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[31] * B[38]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[37]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[36]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[34] * B[35]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[34]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[33]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[37] * B[32]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[31]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[30]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[40] * B[29]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[28]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[27]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[43] * B[26]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[25]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[24]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[46] * B[23]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[22]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[21]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[49] * B[20]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[19]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[18]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[52] * B[17]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[16]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[15]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[55] * B[14]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[13]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[12]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[58] * B[11]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[10]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[9]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[61] * B[8]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[7]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[6]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #276]\n\t"
+ "# A[7] * B[63]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[8] * B[62]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[61]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[60]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[59]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[58]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[57]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[14] * B[56]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[55]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[54]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[17] * B[53]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[52]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[51]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[20] * B[50]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[49]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[48]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[23] * B[47]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[46]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[45]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[26] * B[44]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[43]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[42]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[29] * B[41]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[40]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[39]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[32] * B[38]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[37]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[36]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[35] * B[35]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[34]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[33]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[38] * B[32]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[31]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[30]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[41] * B[29]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[28]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[27]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[44] * B[26]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[25]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[24]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[47] * B[23]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[22]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[21]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[50] * B[20]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[19]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[18]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[53] * B[17]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[16]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[15]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[56] * B[14]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[13]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[12]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[59] * B[11]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[10]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[9]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[62] * B[8]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[7]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #280]\n\t"
+ "# A[8] * B[63]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[9] * B[62]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[61]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[60]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[12] * B[59]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[58]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[57]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[15] * B[56]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[55]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[54]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[18] * B[53]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[52]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[51]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[21] * B[50]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[49]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[48]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[24] * B[47]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[46]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[45]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[27] * B[44]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[43]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[42]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[30] * B[41]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[40]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[39]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[33] * B[38]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[37]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[36]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[36] * B[35]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[34]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[33]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[39] * B[32]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[31]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[30]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[42] * B[29]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[28]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[27]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[45] * B[26]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[25]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[24]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[48] * B[23]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[22]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[21]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[51] * B[20]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[19]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[18]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[54] * B[17]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[16]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[15]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[57] * B[14]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[13]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[12]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[60] * B[11]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[10]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[9]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[63] * B[8]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #284]\n\t"
+ "# A[9] * B[63]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[10] * B[62]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[61]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[12] * B[60]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[13] * B[59]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[58]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[57]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[16] * B[56]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[55]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[54]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[19] * B[53]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[52]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[51]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[22] * B[50]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[49]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[48]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[25] * B[47]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[46]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[45]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[28] * B[44]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[43]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[42]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[31] * B[41]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[40]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[39]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[34] * B[38]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[37]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[36]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[37] * B[35]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[34]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[33]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[40] * B[32]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[31]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[30]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[43] * B[29]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[28]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[27]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[46] * B[26]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[25]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[24]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[49] * B[23]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[22]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[21]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[52] * B[20]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[19]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[18]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[55] * B[17]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[16]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[15]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[58] * B[14]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[13]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[12]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[61] * B[11]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[10]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[9]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #288]\n\t"
+ "# A[10] * B[63]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[11] * B[62]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[12] * B[61]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[13] * B[60]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[14] * B[59]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[58]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[57]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[17] * B[56]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[55]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[54]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[20] * B[53]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[52]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[51]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[23] * B[50]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[49]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[48]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[26] * B[47]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[46]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[45]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[29] * B[44]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[43]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[42]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[32] * B[41]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[40]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[39]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[35] * B[38]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[37]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[36]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[38] * B[35]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[34]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[33]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[41] * B[32]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[31]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[30]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[44] * B[29]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[28]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[27]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[47] * B[26]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[25]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[24]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[50] * B[23]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[22]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[21]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[53] * B[20]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[19]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[18]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[56] * B[17]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[16]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[15]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[59] * B[14]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[13]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[12]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[62] * B[11]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[10]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #292]\n\t"
+ "# A[11] * B[63]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[12] * B[62]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[13] * B[61]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[14] * B[60]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[15] * B[59]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[58]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[57]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[18] * B[56]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[55]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[54]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[21] * B[53]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[52]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[51]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[24] * B[50]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[49]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[48]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[27] * B[47]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[46]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[45]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[30] * B[44]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[43]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[42]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[33] * B[41]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[40]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[39]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[36] * B[38]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[37]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[36]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[39] * B[35]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[34]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[33]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[42] * B[32]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[31]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[30]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[45] * B[29]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[28]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[27]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[48] * B[26]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[25]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[24]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[51] * B[23]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[22]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[21]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[54] * B[20]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[19]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[18]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[57] * B[17]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[16]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[15]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[60] * B[14]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[13]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[12]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[63] * B[11]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #296]\n\t"
+ "# A[12] * B[63]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[13] * B[62]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[14] * B[61]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[15] * B[60]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[16] * B[59]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[58]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[57]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[19] * B[56]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[55]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[54]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[22] * B[53]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[52]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[51]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[25] * B[50]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[49]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[48]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[28] * B[47]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[46]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[45]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[31] * B[44]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[43]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[42]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[34] * B[41]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[40]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[39]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[37] * B[38]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[37]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[36]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[40] * B[35]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[34]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[33]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[43] * B[32]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[31]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[30]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[46] * B[29]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[28]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[27]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[49] * B[26]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[25]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[24]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[52] * B[23]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[22]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[21]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[55] * B[20]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[19]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[18]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[58] * B[17]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[16]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[15]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[61] * B[14]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[13]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[12]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #48]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #300]\n\t"
+ "# A[13] * B[63]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[14] * B[62]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[15] * B[61]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[16] * B[60]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[17] * B[59]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[58]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[57]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[20] * B[56]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[55]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[54]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[23] * B[53]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[52]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[51]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[26] * B[50]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[49]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[48]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[29] * B[47]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[46]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[45]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[32] * B[44]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[43]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[42]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[35] * B[41]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[40]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[39]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[38] * B[38]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[37]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[36]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[41] * B[35]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[34]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[33]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[44] * B[32]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[31]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[30]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[47] * B[29]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[28]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[27]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[50] * B[26]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[25]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[24]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[53] * B[23]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[22]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[21]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[56] * B[20]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[19]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[18]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[59] * B[17]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[16]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[15]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[62] * B[14]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[13]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #52]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #304]\n\t"
+ "# A[14] * B[63]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[15] * B[62]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[16] * B[61]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[17] * B[60]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[18] * B[59]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[58]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[57]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[21] * B[56]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[55]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[54]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[24] * B[53]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[52]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[51]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[27] * B[50]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[49]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[48]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[30] * B[47]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[46]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[45]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[33] * B[44]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[43]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[42]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[36] * B[41]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[40]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[39]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[39] * B[38]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[37]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[36]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[42] * B[35]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[34]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[33]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[45] * B[32]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[31]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[30]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[48] * B[29]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[28]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[27]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[51] * B[26]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[25]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[24]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[54] * B[23]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[22]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[21]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[57] * B[20]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[19]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[18]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[60] * B[17]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[16]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[15]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[63] * B[14]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #56]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #308]\n\t"
+ "# A[15] * B[63]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[16] * B[62]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[17] * B[61]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[18] * B[60]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[19] * B[59]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[58]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[57]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[22] * B[56]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[55]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[54]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[25] * B[53]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[52]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[51]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[28] * B[50]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[49]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[48]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[31] * B[47]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[46]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[45]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[34] * B[44]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[43]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[42]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[37] * B[41]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[40]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[39]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[40] * B[38]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[37]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[36]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[43] * B[35]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[34]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[33]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[46] * B[32]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[31]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[30]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[49] * B[29]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[28]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[27]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[52] * B[26]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[25]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[24]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[55] * B[23]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[22]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[21]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[58] * B[20]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[19]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[18]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[61] * B[17]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[16]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[15]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #60]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #312]\n\t"
+ "# A[16] * B[63]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[17] * B[62]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[18] * B[61]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[19] * B[60]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[20] * B[59]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[58]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[57]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[23] * B[56]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[55]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[54]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[26] * B[53]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[52]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[51]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[29] * B[50]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[49]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[48]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[32] * B[47]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[46]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[45]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[35] * B[44]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[43]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[42]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[38] * B[41]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[40]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[39]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[41] * B[38]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[37]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[36]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[44] * B[35]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[34]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[33]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[47] * B[32]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[31]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[30]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[50] * B[29]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[28]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[27]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[53] * B[26]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[25]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[24]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[56] * B[23]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[22]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[21]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[59] * B[20]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[19]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[18]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[62] * B[17]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[16]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #64]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #316]\n\t"
+ "# A[17] * B[63]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[18] * B[62]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[19] * B[61]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[20] * B[60]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[21] * B[59]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[58]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[57]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[24] * B[56]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[55]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[54]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[27] * B[53]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[52]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[51]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[30] * B[50]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[49]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[48]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[33] * B[47]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[46]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[45]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[36] * B[44]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[43]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[42]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[39] * B[41]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[40]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[39]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[42] * B[38]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[37]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[36]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[45] * B[35]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[34]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[33]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[48] * B[32]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[31]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[30]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[51] * B[29]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[28]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[27]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[54] * B[26]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[25]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[24]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[57] * B[23]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[22]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[21]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[60] * B[20]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[19]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[18]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[63] * B[17]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #68]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #320]\n\t"
+ "# A[18] * B[63]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[19] * B[62]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[20] * B[61]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[21] * B[60]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[22] * B[59]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[58]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[57]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[25] * B[56]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[55]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[54]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[28] * B[53]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[52]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[51]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[31] * B[50]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[49]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[48]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[34] * B[47]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[46]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[45]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[37] * B[44]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[43]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[42]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[40] * B[41]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[40]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[39]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[43] * B[38]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[37]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[36]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[46] * B[35]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[34]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[33]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[49] * B[32]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[31]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[30]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[52] * B[29]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[28]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[27]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[55] * B[26]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[25]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[24]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[58] * B[23]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[22]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[21]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[61] * B[20]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[19]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[18]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #72]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #324]\n\t"
+ "# A[19] * B[63]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[20] * B[62]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[21] * B[61]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[22] * B[60]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[23] * B[59]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[58]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[57]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[26] * B[56]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[55]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[54]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[29] * B[53]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[52]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[51]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[32] * B[50]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[49]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[48]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[35] * B[47]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[46]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[45]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[38] * B[44]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[43]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[42]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[41] * B[41]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[40]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[39]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[44] * B[38]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[37]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[36]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[47] * B[35]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[34]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[33]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[50] * B[32]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[31]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[30]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[53] * B[29]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[28]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[27]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[56] * B[26]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[25]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[24]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[59] * B[23]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[22]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[21]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[62] * B[20]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[19]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #76]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #328]\n\t"
+ "# A[20] * B[63]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[21] * B[62]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[22] * B[61]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[23] * B[60]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[24] * B[59]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[58]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[57]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[27] * B[56]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[55]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[54]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[30] * B[53]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[52]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[51]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[33] * B[50]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[49]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[48]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[36] * B[47]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[46]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[45]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[39] * B[44]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[43]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[42]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[42] * B[41]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[40]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[39]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[45] * B[38]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[37]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[36]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[48] * B[35]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[34]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[33]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[51] * B[32]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[31]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[30]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[54] * B[29]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[28]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[27]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[57] * B[26]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[25]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[24]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[60] * B[23]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[22]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[21]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[63] * B[20]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #80]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #332]\n\t"
+ "# A[21] * B[63]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[22] * B[62]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[23] * B[61]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[24] * B[60]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[25] * B[59]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[58]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[57]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[28] * B[56]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[55]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[54]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[31] * B[53]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[52]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[51]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[34] * B[50]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[49]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[48]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[37] * B[47]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[46]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[45]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[40] * B[44]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[43]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[42]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[43] * B[41]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[40]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[39]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[46] * B[38]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[37]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[36]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[49] * B[35]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[34]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[33]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[52] * B[32]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[31]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[30]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[55] * B[29]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[28]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[27]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[58] * B[26]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[25]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[24]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[61] * B[23]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[22]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[21]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #84]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #336]\n\t"
+ "# A[22] * B[63]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[23] * B[62]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[24] * B[61]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[25] * B[60]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[26] * B[59]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[58]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[57]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[29] * B[56]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[55]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[54]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[32] * B[53]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[52]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[51]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[35] * B[50]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[49]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[48]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[38] * B[47]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[46]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[45]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[41] * B[44]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[43]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[42]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[44] * B[41]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[40]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[39]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[47] * B[38]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[37]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[36]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[50] * B[35]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[34]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[33]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[53] * B[32]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[31]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[30]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[56] * B[29]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[28]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[27]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[59] * B[26]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[25]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[24]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[62] * B[23]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[22]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #88]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #340]\n\t"
+ "# A[23] * B[63]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[24] * B[62]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[25] * B[61]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[26] * B[60]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[27] * B[59]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[58]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[57]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[30] * B[56]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[55]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[54]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[33] * B[53]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[52]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[51]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[36] * B[50]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[49]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[48]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[39] * B[47]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[46]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[45]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[42] * B[44]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[43]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[42]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[45] * B[41]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[40]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[39]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[48] * B[38]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[37]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[36]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[51] * B[35]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[34]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[33]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[54] * B[32]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[31]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[30]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[57] * B[29]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[28]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[27]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[60] * B[26]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[25]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[24]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[63] * B[23]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #92]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #344]\n\t"
+ "# A[24] * B[63]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[25] * B[62]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[26] * B[61]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[27] * B[60]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[28] * B[59]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[58]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[57]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[31] * B[56]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[55]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[54]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[34] * B[53]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[52]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[51]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[37] * B[50]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[49]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[48]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[40] * B[47]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[46]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[45]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[43] * B[44]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[43]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[42]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[46] * B[41]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[40]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[39]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[49] * B[38]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[37]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[36]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[52] * B[35]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[34]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[33]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[55] * B[32]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[31]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[30]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[58] * B[29]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[28]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[27]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[61] * B[26]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[25]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[24]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #96]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #348]\n\t"
+ "# A[25] * B[63]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[26] * B[62]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[27] * B[61]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[28] * B[60]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[29] * B[59]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[58]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[57]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[32] * B[56]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[55]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[54]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[35] * B[53]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[52]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[51]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[38] * B[50]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[49]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[48]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[41] * B[47]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[46]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[45]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[44] * B[44]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[43]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[42]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[47] * B[41]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[40]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[39]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[50] * B[38]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[37]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[36]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[53] * B[35]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[34]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[33]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[56] * B[32]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[31]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[30]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[59] * B[29]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[28]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[27]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[62] * B[26]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[25]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #100]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #352]\n\t"
+ "# A[26] * B[63]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[27] * B[62]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[28] * B[61]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[29] * B[60]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[30] * B[59]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[58]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[57]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[33] * B[56]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[55]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[54]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[36] * B[53]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[52]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[51]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[39] * B[50]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[49]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[48]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[42] * B[47]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[46]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[45]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[45] * B[44]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[43]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[42]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[48] * B[41]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[40]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[39]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[51] * B[38]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[37]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[36]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[54] * B[35]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[34]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[33]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[57] * B[32]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[31]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[30]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[60] * B[29]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[28]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[27]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[63] * B[26]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #104]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #356]\n\t"
+ "# A[27] * B[63]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[28] * B[62]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[29] * B[61]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[30] * B[60]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[31] * B[59]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[58]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[57]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[34] * B[56]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[55]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[54]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[37] * B[53]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[52]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[51]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[40] * B[50]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[49]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[48]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[43] * B[47]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[46]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[45]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[46] * B[44]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[43]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[42]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[49] * B[41]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[40]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[39]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[52] * B[38]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[37]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[36]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[55] * B[35]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[34]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[33]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[58] * B[32]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[31]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[30]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[61] * B[29]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[28]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[27]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #108]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #360]\n\t"
+ "# A[28] * B[63]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[29] * B[62]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[30] * B[61]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[31] * B[60]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[32] * B[59]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[58]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[57]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[35] * B[56]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[55]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[54]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[38] * B[53]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[52]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[51]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[41] * B[50]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[49]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[48]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[44] * B[47]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[46]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[45]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[47] * B[44]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[43]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[42]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[50] * B[41]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[40]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[39]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[53] * B[38]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[37]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[36]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[56] * B[35]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[34]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[33]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[59] * B[32]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[31]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[30]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[62] * B[29]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[28]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #112]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #364]\n\t"
+ "# A[29] * B[63]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[30] * B[62]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[31] * B[61]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[32] * B[60]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[33] * B[59]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[58]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[57]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[36] * B[56]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[55]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[54]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[39] * B[53]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[52]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[51]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[42] * B[50]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[49]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[48]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[45] * B[47]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[46]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[45]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[48] * B[44]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[43]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[42]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[51] * B[41]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[40]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[39]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[54] * B[38]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[37]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[36]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[57] * B[35]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[34]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[33]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[60] * B[32]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[31]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[30]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[63] * B[29]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #116]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #368]\n\t"
+ "# A[30] * B[63]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[31] * B[62]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[32] * B[61]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[33] * B[60]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[34] * B[59]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[58]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[57]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[37] * B[56]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[55]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[54]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[40] * B[53]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[52]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[51]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[43] * B[50]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[49]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[48]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[46] * B[47]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[46]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[45]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[49] * B[44]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[43]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[42]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[52] * B[41]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[40]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[39]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[55] * B[38]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[37]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[36]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[58] * B[35]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[34]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[33]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[61] * B[32]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[31]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[30]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #120]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #372]\n\t"
+ "# A[31] * B[63]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[32] * B[62]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[33] * B[61]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[34] * B[60]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[35] * B[59]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[58]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[57]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[38] * B[56]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[55]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[54]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[41] * B[53]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[52]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[51]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[44] * B[50]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[49]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[48]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[47] * B[47]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[46]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[45]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[50] * B[44]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[43]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[42]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[53] * B[41]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[40]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[39]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[56] * B[38]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[37]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[36]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[59] * B[35]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[34]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[33]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[62] * B[32]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[31]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #124]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #376]\n\t"
+ "# A[32] * B[63]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[33] * B[62]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[34] * B[61]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[35] * B[60]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[36] * B[59]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[58]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[57]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[39] * B[56]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[55]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[54]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[42] * B[53]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[52]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[51]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[45] * B[50]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[49]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[48]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[48] * B[47]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[46]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[45]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[51] * B[44]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[43]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[42]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[54] * B[41]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[40]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[39]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[57] * B[38]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[37]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[36]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[60] * B[35]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[34]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[33]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[63] * B[32]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #128]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #380]\n\t"
+ "# A[33] * B[63]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[34] * B[62]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[35] * B[61]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[36] * B[60]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[37] * B[59]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[58]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[57]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[40] * B[56]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[55]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[54]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[43] * B[53]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[52]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[51]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[46] * B[50]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[49]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[48]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[49] * B[47]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[46]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[45]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[52] * B[44]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[43]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[42]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[55] * B[41]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[40]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[39]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[58] * B[38]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[37]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[36]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[61] * B[35]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[34]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[33]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #132]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #384]\n\t"
+ "# A[34] * B[63]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[35] * B[62]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[36] * B[61]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[37] * B[60]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[38] * B[59]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[58]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[57]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[41] * B[56]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[55]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[54]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[44] * B[53]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[52]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[51]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[47] * B[50]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[49]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[48]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[50] * B[47]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[46]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[45]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[53] * B[44]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[43]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[42]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[56] * B[41]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[40]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[39]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[59] * B[38]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[37]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[36]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[62] * B[35]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[34]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #136]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #388]\n\t"
+ "# A[35] * B[63]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[36] * B[62]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[37] * B[61]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[38] * B[60]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[39] * B[59]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[58]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[57]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[42] * B[56]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[55]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[54]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[45] * B[53]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[52]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[51]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[48] * B[50]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[49]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[48]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[51] * B[47]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[46]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[45]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[54] * B[44]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[43]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[42]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[57] * B[41]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[40]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[39]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[60] * B[38]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[37]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[36]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[63] * B[35]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #140]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #392]\n\t"
+ "# A[36] * B[63]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[37] * B[62]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[38] * B[61]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[39] * B[60]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[40] * B[59]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[58]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[57]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[43] * B[56]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[55]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[54]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[46] * B[53]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[52]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[51]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[49] * B[50]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[49]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[48]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[52] * B[47]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[46]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[45]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[55] * B[44]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[43]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[42]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[58] * B[41]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[40]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[39]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[61] * B[38]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[37]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[36]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #144]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #396]\n\t"
+ "# A[37] * B[63]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[38] * B[62]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[39] * B[61]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[40] * B[60]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[41] * B[59]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[58]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[57]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[44] * B[56]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[55]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[54]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[47] * B[53]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[52]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[51]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[50] * B[50]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[49]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[48]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[53] * B[47]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[46]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[45]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[56] * B[44]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[43]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[42]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[59] * B[41]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[40]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[39]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[62] * B[38]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[37]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #148]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #400]\n\t"
+ "# A[38] * B[63]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[39] * B[62]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[40] * B[61]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[41] * B[60]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[42] * B[59]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[58]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[57]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[45] * B[56]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[55]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[54]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[48] * B[53]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[52]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[51]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[51] * B[50]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[49]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[48]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[54] * B[47]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[46]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[45]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[57] * B[44]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[43]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[42]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[60] * B[41]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[40]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[39]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[63] * B[38]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #152]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #404]\n\t"
+ "# A[39] * B[63]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[40] * B[62]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[41] * B[61]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[42] * B[60]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[43] * B[59]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[58]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[57]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[46] * B[56]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[55]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[54]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[49] * B[53]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[52]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[51]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[52] * B[50]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[49]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[48]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[55] * B[47]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[46]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[45]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[58] * B[44]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[43]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[42]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[61] * B[41]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[40]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[39]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #156]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #408]\n\t"
+ "# A[40] * B[63]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[41] * B[62]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[42] * B[61]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[43] * B[60]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[44] * B[59]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[58]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[57]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[47] * B[56]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[55]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[54]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[50] * B[53]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[52]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[51]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[53] * B[50]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[49]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[48]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[56] * B[47]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[46]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[45]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[59] * B[44]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[43]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[42]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[62] * B[41]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[40]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #160]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #412]\n\t"
+ "# A[41] * B[63]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[42] * B[62]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[43] * B[61]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[44] * B[60]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[45] * B[59]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[58]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[57]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[48] * B[56]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[55]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[54]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[51] * B[53]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[52]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[51]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[54] * B[50]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[49]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[48]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[57] * B[47]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[46]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[45]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[60] * B[44]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[43]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[42]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[63] * B[41]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #164]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #416]\n\t"
+ "# A[42] * B[63]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[43] * B[62]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[44] * B[61]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[45] * B[60]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[46] * B[59]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[58]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[57]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[49] * B[56]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[55]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[54]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[52] * B[53]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[52]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[51]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[55] * B[50]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[49]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[48]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[58] * B[47]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[46]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[45]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[61] * B[44]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[43]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[42]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #168]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #420]\n\t"
+ "# A[43] * B[63]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[44] * B[62]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[45] * B[61]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[46] * B[60]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[47] * B[59]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[58]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[57]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[50] * B[56]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[55]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[54]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[53] * B[53]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[52]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[51]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[56] * B[50]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[49]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[48]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[59] * B[47]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[46]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[45]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[62] * B[44]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[43]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #172]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #424]\n\t"
+ "# A[44] * B[63]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[45] * B[62]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[46] * B[61]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[47] * B[60]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[48] * B[59]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[58]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[57]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[51] * B[56]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[55]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[54]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[54] * B[53]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[52]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[51]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[57] * B[50]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[49]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[48]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[60] * B[47]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[46]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[45]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[63] * B[44]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #176]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #428]\n\t"
+ "# A[45] * B[63]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[46] * B[62]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[47] * B[61]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[48] * B[60]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[49] * B[59]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[58]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[57]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[52] * B[56]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[55]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[54]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[55] * B[53]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[52]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[51]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[58] * B[50]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[49]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[48]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[61] * B[47]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[46]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[45]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #180]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #432]\n\t"
+ "# A[46] * B[63]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[47] * B[62]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[48] * B[61]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[49] * B[60]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[50] * B[59]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[58]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[57]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[53] * B[56]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[55]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[54]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[56] * B[53]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[52]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[51]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[59] * B[50]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[49]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[48]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[62] * B[47]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[46]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #184]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #436]\n\t"
+ "# A[47] * B[63]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[48] * B[62]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[49] * B[61]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[50] * B[60]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[51] * B[59]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[58]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[57]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[54] * B[56]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[55]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[54]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[57] * B[53]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[52]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[51]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[60] * B[50]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[49]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[48]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[63] * B[47]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #188]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #440]\n\t"
+ "# A[48] * B[63]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[49] * B[62]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[50] * B[61]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[51] * B[60]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[52] * B[59]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[58]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[57]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[55] * B[56]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[55]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[54]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[58] * B[53]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[52]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[51]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[61] * B[50]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[49]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[48]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #192]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #444]\n\t"
+ "# A[49] * B[63]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[50] * B[62]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[51] * B[61]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[52] * B[60]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[53] * B[59]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[58]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[57]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[56] * B[56]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[55]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[54]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[59] * B[53]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[52]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[51]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[62] * B[50]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[49]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #196]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #448]\n\t"
+ "# A[50] * B[63]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[51] * B[62]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[52] * B[61]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[53] * B[60]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[54] * B[59]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[58]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[57]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[57] * B[56]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[55]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[54]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[60] * B[53]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[52]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[51]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[63] * B[50]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #200]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #452]\n\t"
+ "# A[51] * B[63]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[52] * B[62]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[53] * B[61]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[54] * B[60]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[55] * B[59]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[58]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[57]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[58] * B[56]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[55]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[54]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[61] * B[53]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[52]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[51]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #204]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #456]\n\t"
+ "# A[52] * B[63]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[53] * B[62]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[54] * B[61]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[55] * B[60]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[56] * B[59]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[58]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[57]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[59] * B[56]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[55]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[54]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[62] * B[53]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[52]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #208]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #460]\n\t"
+ "# A[53] * B[63]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[54] * B[62]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[55] * B[61]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[56] * B[60]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[57] * B[59]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[58]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[57]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[60] * B[56]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[55]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[54]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[63] * B[53]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #212]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #464]\n\t"
+ "# A[54] * B[63]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[55] * B[62]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[56] * B[61]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[57] * B[60]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[58] * B[59]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[58]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[57]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[61] * B[56]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[55]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[54]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #216]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #468]\n\t"
+ "# A[55] * B[63]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[56] * B[62]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[57] * B[61]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[58] * B[60]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[59] * B[59]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[58]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[57]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[62] * B[56]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[55]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #220]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #472]\n\t"
+ "# A[56] * B[63]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[57] * B[62]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[58] * B[61]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[59] * B[60]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[60] * B[59]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[58]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[57]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[63] * B[56]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #224]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #476]\n\t"
+ "# A[57] * B[63]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[58] * B[62]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[59] * B[61]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[60] * B[60]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[61] * B[59]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[58]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[57]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #228]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #480]\n\t"
+ "# A[58] * B[63]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[59] * B[62]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[60] * B[61]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[61] * B[60]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[62] * B[59]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[58]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #232]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #484]\n\t"
+ "# A[59] * B[63]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[60] * B[62]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[61] * B[61]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[62] * B[60]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[63] * B[59]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #236]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #488]\n\t"
+ "# A[60] * B[63]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[61] * B[62]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[62] * B[61]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[63] * B[60]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #240]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #492]\n\t"
+ "# A[61] * B[63]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[62] * B[62]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[63] * B[61]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #244]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #496]\n\t"
+ "# A[62] * B[63]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[63] * B[62]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #248]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #500]\n\t"
+ "# A[63] * B[63]\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "ldr r9, [%[b], #252]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r3, [%[r], #504]\n\t"
+ "str r4, [%[r], #508]\n\t"
+ "ldr r3, [sp, #0]\n\t"
+ "ldr r4, [sp, #4]\n\t"
+ "ldr r5, [sp, #8]\n\t"
+ "ldr r6, [sp, #12]\n\t"
+ "str r3, [%[r], #0]\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r3, [sp, #16]\n\t"
+ "ldr r4, [sp, #20]\n\t"
+ "ldr r5, [sp, #24]\n\t"
+ "ldr r6, [sp, #28]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "str r5, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ "ldr r3, [sp, #32]\n\t"
+ "ldr r4, [sp, #36]\n\t"
+ "ldr r5, [sp, #40]\n\t"
+ "ldr r6, [sp, #44]\n\t"
+ "str r3, [%[r], #32]\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "str r5, [%[r], #40]\n\t"
+ "str r6, [%[r], #44]\n\t"
+ "ldr r3, [sp, #48]\n\t"
+ "ldr r4, [sp, #52]\n\t"
+ "ldr r5, [sp, #56]\n\t"
+ "ldr r6, [sp, #60]\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "str r5, [%[r], #56]\n\t"
+ "str r6, [%[r], #60]\n\t"
+ "ldr r3, [sp, #64]\n\t"
+ "ldr r4, [sp, #68]\n\t"
+ "ldr r5, [sp, #72]\n\t"
+ "ldr r6, [sp, #76]\n\t"
+ "str r3, [%[r], #64]\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "str r5, [%[r], #72]\n\t"
+ "str r6, [%[r], #76]\n\t"
+ "ldr r3, [sp, #80]\n\t"
+ "ldr r4, [sp, #84]\n\t"
+ "ldr r5, [sp, #88]\n\t"
+ "ldr r6, [sp, #92]\n\t"
+ "str r3, [%[r], #80]\n\t"
+ "str r4, [%[r], #84]\n\t"
+ "str r5, [%[r], #88]\n\t"
+ "str r6, [%[r], #92]\n\t"
+ "ldr r3, [sp, #96]\n\t"
+ "ldr r4, [sp, #100]\n\t"
+ "ldr r5, [sp, #104]\n\t"
+ "ldr r6, [sp, #108]\n\t"
+ "str r3, [%[r], #96]\n\t"
+ "str r4, [%[r], #100]\n\t"
+ "str r5, [%[r], #104]\n\t"
+ "str r6, [%[r], #108]\n\t"
+ "ldr r3, [sp, #112]\n\t"
+ "ldr r4, [sp, #116]\n\t"
+ "ldr r5, [sp, #120]\n\t"
+ "ldr r6, [sp, #124]\n\t"
+ "str r3, [%[r], #112]\n\t"
+ "str r4, [%[r], #116]\n\t"
+ "str r5, [%[r], #120]\n\t"
+ "str r6, [%[r], #124]\n\t"
+ "ldr r3, [sp, #128]\n\t"
+ "ldr r4, [sp, #132]\n\t"
+ "ldr r5, [sp, #136]\n\t"
+ "ldr r6, [sp, #140]\n\t"
+ "str r3, [%[r], #128]\n\t"
+ "str r4, [%[r], #132]\n\t"
+ "str r5, [%[r], #136]\n\t"
+ "str r6, [%[r], #140]\n\t"
+ "ldr r3, [sp, #144]\n\t"
+ "ldr r4, [sp, #148]\n\t"
+ "ldr r5, [sp, #152]\n\t"
+ "ldr r6, [sp, #156]\n\t"
+ "str r3, [%[r], #144]\n\t"
+ "str r4, [%[r], #148]\n\t"
+ "str r5, [%[r], #152]\n\t"
+ "str r6, [%[r], #156]\n\t"
+ "ldr r3, [sp, #160]\n\t"
+ "ldr r4, [sp, #164]\n\t"
+ "ldr r5, [sp, #168]\n\t"
+ "ldr r6, [sp, #172]\n\t"
+ "str r3, [%[r], #160]\n\t"
+ "str r4, [%[r], #164]\n\t"
+ "str r5, [%[r], #168]\n\t"
+ "str r6, [%[r], #172]\n\t"
+ "ldr r3, [sp, #176]\n\t"
+ "ldr r4, [sp, #180]\n\t"
+ "ldr r5, [sp, #184]\n\t"
+ "ldr r6, [sp, #188]\n\t"
+ "str r3, [%[r], #176]\n\t"
+ "str r4, [%[r], #180]\n\t"
+ "str r5, [%[r], #184]\n\t"
+ "str r6, [%[r], #188]\n\t"
+ "ldr r3, [sp, #192]\n\t"
+ "ldr r4, [sp, #196]\n\t"
+ "ldr r5, [sp, #200]\n\t"
+ "ldr r6, [sp, #204]\n\t"
+ "str r3, [%[r], #192]\n\t"
+ "str r4, [%[r], #196]\n\t"
+ "str r5, [%[r], #200]\n\t"
+ "str r6, [%[r], #204]\n\t"
+ "ldr r3, [sp, #208]\n\t"
+ "ldr r4, [sp, #212]\n\t"
+ "ldr r5, [sp, #216]\n\t"
+ "ldr r6, [sp, #220]\n\t"
+ "str r3, [%[r], #208]\n\t"
+ "str r4, [%[r], #212]\n\t"
+ "str r5, [%[r], #216]\n\t"
+ "str r6, [%[r], #220]\n\t"
+ "ldr r3, [sp, #224]\n\t"
+ "ldr r4, [sp, #228]\n\t"
+ "ldr r5, [sp, #232]\n\t"
+ "ldr r6, [sp, #236]\n\t"
+ "str r3, [%[r], #224]\n\t"
+ "str r4, [%[r], #228]\n\t"
+ "str r5, [%[r], #232]\n\t"
+ "str r6, [%[r], #236]\n\t"
+ "ldr r3, [sp, #240]\n\t"
+ "ldr r4, [sp, #244]\n\t"
+ "ldr r5, [sp, #248]\n\t"
+ "ldr r6, [sp, #252]\n\t"
+ "str r3, [%[r], #240]\n\t"
+ "str r4, [%[r], #244]\n\t"
+ "str r5, [%[r], #248]\n\t"
+ "str r6, [%[r], #252]\n\t"
+ "add sp, sp, #256\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<64; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 64; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[128];
+ sp_digit a1[64];
+ sp_digit b1[64];
+ sp_digit z2[128];
+ sp_digit u, ca, cb;
+
+ ca = sp_2048_add_64(a1, a, &a[64]);
+ cb = sp_2048_add_64(b1, b, &b[64]);
+ u = ca & cb;
+ sp_2048_mul_64(z1, a1, b1);
+ sp_2048_mul_64(z2, &a[64], &b[64]);
+ sp_2048_mul_64(z0, a, b);
+ sp_2048_mask_64(r + 128, a1, 0 - cb);
+ sp_2048_mask_64(b1, b1, 0 - ca);
+ u += sp_2048_add_64(r + 128, r + 128, b1);
+ u += sp_4096_sub_in_place_128(z1, z2);
+ u += sp_4096_sub_in_place_128(z1, z0);
+ u += sp_4096_add_128(r + 64, r + 64, z1);
+ r[192] = u;
+ XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1));
+ (void)sp_4096_add_128(r + 128, r + 128, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #256\n\t"
+ "mov r14, #0\n\t"
+ "# A[0] * A[0]\n\t"
+ "ldr r10, [%[a], #0]\n\t"
+ "umull r8, r3, r10, r10\n\t"
+ "mov r4, #0\n\t"
+ "str r8, [sp]\n\t"
+ "# A[0] * A[1]\n\t"
+ "ldr r10, [%[a], #4]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r14, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "str r3, [sp, #4]\n\t"
+ "# A[0] * A[2]\n\t"
+ "ldr r10, [%[a], #8]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r14, r14\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "# A[1] * A[1]\n\t"
+ "ldr r10, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "str r4, [sp, #8]\n\t"
+ "# A[0] * A[3]\n\t"
+ "ldr r10, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r14, r14\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "# A[1] * A[2]\n\t"
+ "ldr r10, [%[a], #8]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "str r2, [sp, #12]\n\t"
+ "# A[0] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r14, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "# A[1] * A[3]\n\t"
+ "ldr r10, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "# A[2] * A[2]\n\t"
+ "ldr r10, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "str r3, [sp, #16]\n\t"
+ "# A[0] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[3]\n\t"
+ "ldr r10, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #20]\n\t"
+ "# A[0] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[3]\n\t"
+ "ldr r10, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #24]\n\t"
+ "# A[0] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #28]\n\t"
+ "# A[0] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #32]\n\t"
+ "# A[0] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #36]\n\t"
+ "# A[0] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #40]\n\t"
+ "# A[0] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #44]\n\t"
+ "# A[0] * A[12]\n\t"
+ "ldr r10, [%[a], #48]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #48]\n\t"
+ "# A[0] * A[13]\n\t"
+ "ldr r10, [%[a], #52]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[12]\n\t"
+ "ldr r10, [%[a], #48]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #52]\n\t"
+ "# A[0] * A[14]\n\t"
+ "ldr r10, [%[a], #56]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[13]\n\t"
+ "ldr r10, [%[a], #52]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[12]\n\t"
+ "ldr r10, [%[a], #48]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #56]\n\t"
+ "# A[0] * A[15]\n\t"
+ "ldr r10, [%[a], #60]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[14]\n\t"
+ "ldr r10, [%[a], #56]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[13]\n\t"
+ "ldr r10, [%[a], #52]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[12]\n\t"
+ "ldr r10, [%[a], #48]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #60]\n\t"
+ "# A[0] * A[16]\n\t"
+ "ldr r10, [%[a], #64]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[15]\n\t"
+ "ldr r10, [%[a], #60]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[14]\n\t"
+ "ldr r10, [%[a], #56]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[13]\n\t"
+ "ldr r10, [%[a], #52]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[12]\n\t"
+ "ldr r10, [%[a], #48]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #64]\n\t"
+ "# A[0] * A[17]\n\t"
+ "ldr r10, [%[a], #68]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[16]\n\t"
+ "ldr r10, [%[a], #64]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[15]\n\t"
+ "ldr r10, [%[a], #60]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[14]\n\t"
+ "ldr r10, [%[a], #56]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[13]\n\t"
+ "ldr r10, [%[a], #52]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[12]\n\t"
+ "ldr r10, [%[a], #48]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #68]\n\t"
+ "# A[0] * A[18]\n\t"
+ "ldr r10, [%[a], #72]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[17]\n\t"
+ "ldr r10, [%[a], #68]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[16]\n\t"
+ "ldr r10, [%[a], #64]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[15]\n\t"
+ "ldr r10, [%[a], #60]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[14]\n\t"
+ "ldr r10, [%[a], #56]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[13]\n\t"
+ "ldr r10, [%[a], #52]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[12]\n\t"
+ "ldr r10, [%[a], #48]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #72]\n\t"
+ "# A[0] * A[19]\n\t"
+ "ldr r10, [%[a], #76]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[18]\n\t"
+ "ldr r10, [%[a], #72]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[17]\n\t"
+ "ldr r10, [%[a], #68]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[16]\n\t"
+ "ldr r10, [%[a], #64]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[15]\n\t"
+ "ldr r10, [%[a], #60]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[14]\n\t"
+ "ldr r10, [%[a], #56]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[13]\n\t"
+ "ldr r10, [%[a], #52]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[12]\n\t"
+ "ldr r10, [%[a], #48]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #76]\n\t"
+ "# A[0] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[19]\n\t"
+ "ldr r10, [%[a], #76]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[18]\n\t"
+ "ldr r10, [%[a], #72]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[17]\n\t"
+ "ldr r10, [%[a], #68]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[16]\n\t"
+ "ldr r10, [%[a], #64]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[15]\n\t"
+ "ldr r10, [%[a], #60]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[14]\n\t"
+ "ldr r10, [%[a], #56]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[13]\n\t"
+ "ldr r10, [%[a], #52]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[12]\n\t"
+ "ldr r10, [%[a], #48]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #80]\n\t"
+ "# A[0] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[19]\n\t"
+ "ldr r10, [%[a], #76]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[18]\n\t"
+ "ldr r10, [%[a], #72]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[17]\n\t"
+ "ldr r10, [%[a], #68]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[16]\n\t"
+ "ldr r10, [%[a], #64]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[15]\n\t"
+ "ldr r10, [%[a], #60]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[14]\n\t"
+ "ldr r10, [%[a], #56]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[13]\n\t"
+ "ldr r10, [%[a], #52]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[12]\n\t"
+ "ldr r10, [%[a], #48]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #84]\n\t"
+ "# A[0] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[19]\n\t"
+ "ldr r10, [%[a], #76]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[18]\n\t"
+ "ldr r10, [%[a], #72]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[17]\n\t"
+ "ldr r10, [%[a], #68]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[16]\n\t"
+ "ldr r10, [%[a], #64]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[15]\n\t"
+ "ldr r10, [%[a], #60]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[14]\n\t"
+ "ldr r10, [%[a], #56]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[13]\n\t"
+ "ldr r10, [%[a], #52]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[12]\n\t"
+ "ldr r10, [%[a], #48]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #88]\n\t"
+ "# A[0] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[19]\n\t"
+ "ldr r10, [%[a], #76]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[18]\n\t"
+ "ldr r10, [%[a], #72]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[17]\n\t"
+ "ldr r10, [%[a], #68]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[16]\n\t"
+ "ldr r10, [%[a], #64]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[15]\n\t"
+ "ldr r10, [%[a], #60]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[14]\n\t"
+ "ldr r10, [%[a], #56]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[13]\n\t"
+ "ldr r10, [%[a], #52]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[12]\n\t"
+ "ldr r10, [%[a], #48]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #92]\n\t"
+ "# A[0] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[19]\n\t"
+ "ldr r10, [%[a], #76]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[18]\n\t"
+ "ldr r10, [%[a], #72]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[17]\n\t"
+ "ldr r10, [%[a], #68]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[16]\n\t"
+ "ldr r10, [%[a], #64]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[15]\n\t"
+ "ldr r10, [%[a], #60]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[14]\n\t"
+ "ldr r10, [%[a], #56]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[13]\n\t"
+ "ldr r10, [%[a], #52]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[12]\n\t"
+ "ldr r10, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #96]\n\t"
+ "# A[0] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[19]\n\t"
+ "ldr r10, [%[a], #76]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[18]\n\t"
+ "ldr r10, [%[a], #72]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[17]\n\t"
+ "ldr r10, [%[a], #68]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[16]\n\t"
+ "ldr r10, [%[a], #64]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[15]\n\t"
+ "ldr r10, [%[a], #60]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[14]\n\t"
+ "ldr r10, [%[a], #56]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[13]\n\t"
+ "ldr r10, [%[a], #52]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #100]\n\t"
+ "# A[0] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[19]\n\t"
+ "ldr r10, [%[a], #76]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[18]\n\t"
+ "ldr r10, [%[a], #72]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[17]\n\t"
+ "ldr r10, [%[a], #68]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[16]\n\t"
+ "ldr r10, [%[a], #64]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[15]\n\t"
+ "ldr r10, [%[a], #60]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[14]\n\t"
+ "ldr r10, [%[a], #56]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[13]\n\t"
+ "ldr r10, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #104]\n\t"
+ "# A[0] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[19]\n\t"
+ "ldr r10, [%[a], #76]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[18]\n\t"
+ "ldr r10, [%[a], #72]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[17]\n\t"
+ "ldr r10, [%[a], #68]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[16]\n\t"
+ "ldr r10, [%[a], #64]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[15]\n\t"
+ "ldr r10, [%[a], #60]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[14]\n\t"
+ "ldr r10, [%[a], #56]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #108]\n\t"
+ "# A[0] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[19]\n\t"
+ "ldr r10, [%[a], #76]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[18]\n\t"
+ "ldr r10, [%[a], #72]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[17]\n\t"
+ "ldr r10, [%[a], #68]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[16]\n\t"
+ "ldr r10, [%[a], #64]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[15]\n\t"
+ "ldr r10, [%[a], #60]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[14]\n\t"
+ "ldr r10, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #112]\n\t"
+ "# A[0] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[19]\n\t"
+ "ldr r10, [%[a], #76]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[18]\n\t"
+ "ldr r10, [%[a], #72]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[17]\n\t"
+ "ldr r10, [%[a], #68]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[16]\n\t"
+ "ldr r10, [%[a], #64]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[15]\n\t"
+ "ldr r10, [%[a], #60]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #116]\n\t"
+ "# A[0] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[19]\n\t"
+ "ldr r10, [%[a], #76]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[18]\n\t"
+ "ldr r10, [%[a], #72]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[17]\n\t"
+ "ldr r10, [%[a], #68]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[16]\n\t"
+ "ldr r10, [%[a], #64]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[15]\n\t"
+ "ldr r10, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #120]\n\t"
+ "# A[0] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[19]\n\t"
+ "ldr r10, [%[a], #76]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[18]\n\t"
+ "ldr r10, [%[a], #72]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[17]\n\t"
+ "ldr r10, [%[a], #68]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[16]\n\t"
+ "ldr r10, [%[a], #64]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #124]\n\t"
+ "# A[0] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[19]\n\t"
+ "ldr r10, [%[a], #76]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[18]\n\t"
+ "ldr r10, [%[a], #72]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[17]\n\t"
+ "ldr r10, [%[a], #68]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[16]\n\t"
+ "ldr r10, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #128]\n\t"
+ "# A[0] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[19]\n\t"
+ "ldr r10, [%[a], #76]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[18]\n\t"
+ "ldr r10, [%[a], #72]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[17]\n\t"
+ "ldr r10, [%[a], #68]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #132]\n\t"
+ "# A[0] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[19]\n\t"
+ "ldr r10, [%[a], #76]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[18]\n\t"
+ "ldr r10, [%[a], #72]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[17]\n\t"
+ "ldr r10, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #136]\n\t"
+ "# A[0] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[19]\n\t"
+ "ldr r10, [%[a], #76]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[18]\n\t"
+ "ldr r10, [%[a], #72]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #140]\n\t"
+ "# A[0] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[19]\n\t"
+ "ldr r10, [%[a], #76]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[18]\n\t"
+ "ldr r10, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #144]\n\t"
+ "# A[0] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[19]\n\t"
+ "ldr r10, [%[a], #76]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #148]\n\t"
+ "# A[0] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[19]\n\t"
+ "ldr r10, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #152]\n\t"
+ "# A[0] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #156]\n\t"
+ "# A[0] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[20]\n\t"
+ "ldr r10, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #160]\n\t"
+ "# A[0] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #164]\n\t"
+ "# A[0] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[21]\n\t"
+ "ldr r10, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #168]\n\t"
+ "# A[0] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #172]\n\t"
+ "# A[0] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[22]\n\t"
+ "ldr r10, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #176]\n\t"
+ "# A[0] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #180]\n\t"
+ "# A[0] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[23]\n\t"
+ "ldr r10, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #184]\n\t"
+ "# A[0] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #188]\n\t"
+ "# A[0] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[24]\n\t"
+ "ldr r10, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #192]\n\t"
+ "# A[0] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #196]\n\t"
+ "# A[0] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[25]\n\t"
+ "ldr r10, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #200]\n\t"
+ "# A[0] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #204]\n\t"
+ "# A[0] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[26]\n\t"
+ "ldr r10, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #208]\n\t"
+ "# A[0] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #212]\n\t"
+ "# A[0] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[27]\n\t"
+ "ldr r10, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #216]\n\t"
+ "# A[0] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #220]\n\t"
+ "# A[0] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[28]\n\t"
+ "ldr r10, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #224]\n\t"
+ "# A[0] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #228]\n\t"
+ "# A[0] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[29]\n\t"
+ "ldr r10, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #232]\n\t"
+ "# A[0] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #236]\n\t"
+ "# A[0] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[30]\n\t"
+ "ldr r10, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #240]\n\t"
+ "# A[0] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #244]\n\t"
+ "# A[0] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[31]\n\t"
+ "ldr r10, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #248]\n\t"
+ "# A[0] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #252]\n\t"
+ "# A[1] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[2] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[32]\n\t"
+ "ldr r10, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #256]\n\t"
+ "# A[2] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[3] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #260]\n\t"
+ "# A[3] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[4] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[33]\n\t"
+ "ldr r10, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #264]\n\t"
+ "# A[4] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[5] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #268]\n\t"
+ "# A[5] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[6] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[34]\n\t"
+ "ldr r10, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #272]\n\t"
+ "# A[6] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[7] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #276]\n\t"
+ "# A[7] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[8] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[9] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[35]\n\t"
+ "ldr r10, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #280]\n\t"
+ "# A[8] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[9] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[10] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #284]\n\t"
+ "# A[9] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[10] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[11] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[36]\n\t"
+ "ldr r10, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #288]\n\t"
+ "# A[10] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[11] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[12] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #292]\n\t"
+ "# A[11] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[12] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[13] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[37]\n\t"
+ "ldr r10, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #296]\n\t"
+ "# A[12] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[13] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[14] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #300]\n\t"
+ "# A[13] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[14] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[15] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[38]\n\t"
+ "ldr r10, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #304]\n\t"
+ "# A[14] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[15] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[16] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #308]\n\t"
+ "# A[15] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[16] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[17] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[39]\n\t"
+ "ldr r10, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #312]\n\t"
+ "# A[16] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[17] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[18] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #316]\n\t"
+ "# A[17] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[18] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[19] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[40]\n\t"
+ "ldr r10, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #320]\n\t"
+ "# A[18] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[19] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[20] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #324]\n\t"
+ "# A[19] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[20] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[21] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[41]\n\t"
+ "ldr r10, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #328]\n\t"
+ "# A[20] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[21] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[22] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #332]\n\t"
+ "# A[21] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[22] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[23] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[42] * A[42]\n\t"
+ "ldr r10, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #336]\n\t"
+ "# A[22] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[23] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[24] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[42] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #340]\n\t"
+ "# A[23] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[24] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[25] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[42] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[43] * A[43]\n\t"
+ "ldr r10, [%[a], #172]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #344]\n\t"
+ "# A[24] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[25] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[26] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[42] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[43] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #348]\n\t"
+ "# A[25] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[26] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[27] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[42] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[43] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[44] * A[44]\n\t"
+ "ldr r10, [%[a], #176]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #352]\n\t"
+ "# A[26] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[27] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[28] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[42] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[43] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[44] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #356]\n\t"
+ "# A[27] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[28] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[29] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[42] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[43] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[44] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[45] * A[45]\n\t"
+ "ldr r10, [%[a], #180]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #360]\n\t"
+ "# A[28] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[29] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[30] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[42] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[43] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[44] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[45] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #364]\n\t"
+ "# A[29] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[30] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[31] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[42] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[43] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[44] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[45] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[46] * A[46]\n\t"
+ "ldr r10, [%[a], #184]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #368]\n\t"
+ "# A[30] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[31] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[32] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[42] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[43] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[44] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[45] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[46] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #372]\n\t"
+ "# A[31] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[32] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[33] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[42] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[43] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[44] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[45] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[46] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[47] * A[47]\n\t"
+ "ldr r10, [%[a], #188]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #376]\n\t"
+ "# A[32] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[33] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[34] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[42] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[43] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[44] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[45] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[46] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[47] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #380]\n\t"
+ "# A[33] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[34] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[35] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[42] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[43] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[44] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[45] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[46] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[47] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[48] * A[48]\n\t"
+ "ldr r10, [%[a], #192]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #384]\n\t"
+ "# A[34] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[35] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[36] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[42] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[43] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[44] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[45] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[46] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[47] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[48] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #388]\n\t"
+ "# A[35] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[36] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[37] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[42] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[43] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[44] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[45] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[46] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[47] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[48] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[49] * A[49]\n\t"
+ "ldr r10, [%[a], #196]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #392]\n\t"
+ "# A[36] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[37] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[38] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[42] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[43] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[44] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[45] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[46] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[47] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[48] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[49] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #396]\n\t"
+ "# A[37] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[38] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[39] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[42] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[43] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[44] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[45] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[46] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[47] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[48] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[49] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[50] * A[50]\n\t"
+ "ldr r10, [%[a], #200]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #400]\n\t"
+ "# A[38] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[39] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[40] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[42] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[43] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[44] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[45] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[46] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[47] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[48] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[49] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[50] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #404]\n\t"
+ "# A[39] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[40] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[41] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[42] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[43] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[44] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[45] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[46] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[47] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[48] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[49] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[50] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[51] * A[51]\n\t"
+ "ldr r10, [%[a], #204]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #408]\n\t"
+ "# A[40] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[41] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[42] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[43] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[44] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[45] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[46] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[47] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[48] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[49] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[50] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[51] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #412]\n\t"
+ "# A[41] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[42] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[43] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[44] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[45] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[46] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[47] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[48] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[49] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[50] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[51] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[52] * A[52]\n\t"
+ "ldr r10, [%[a], #208]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #416]\n\t"
+ "# A[42] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[43] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[44] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[45] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[46] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[47] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[48] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[49] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[50] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[51] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[52] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #420]\n\t"
+ "# A[43] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[44] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[45] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[46] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[47] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[48] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[49] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[50] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[51] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[52] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[53] * A[53]\n\t"
+ "ldr r10, [%[a], #212]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #424]\n\t"
+ "# A[44] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[45] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[46] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[47] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[48] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[49] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[50] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[51] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[52] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[53] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #428]\n\t"
+ "# A[45] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[46] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[47] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[48] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[49] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[50] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[51] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[52] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[53] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[54] * A[54]\n\t"
+ "ldr r10, [%[a], #216]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #432]\n\t"
+ "# A[46] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[47] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[48] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[49] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[50] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[51] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[52] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[53] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[54] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #436]\n\t"
+ "# A[47] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[48] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[49] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[50] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[51] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[52] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[53] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[54] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[55] * A[55]\n\t"
+ "ldr r10, [%[a], #220]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #440]\n\t"
+ "# A[48] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[49] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[50] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[51] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[52] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[53] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[54] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[55] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #444]\n\t"
+ "# A[49] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[50] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[51] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[52] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[53] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[54] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[55] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[56] * A[56]\n\t"
+ "ldr r10, [%[a], #224]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #448]\n\t"
+ "# A[50] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[51] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[52] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[53] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[54] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[55] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[56] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #452]\n\t"
+ "# A[51] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[52] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[53] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[54] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[55] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[56] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[57] * A[57]\n\t"
+ "ldr r10, [%[a], #228]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #456]\n\t"
+ "# A[52] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[53] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[54] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[55] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[56] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[57] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #460]\n\t"
+ "# A[53] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[54] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[55] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[56] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[57] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[58] * A[58]\n\t"
+ "ldr r10, [%[a], #232]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #464]\n\t"
+ "# A[54] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[55] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[56] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[57] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[58] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #468]\n\t"
+ "# A[55] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[56] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[57] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[58] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[59] * A[59]\n\t"
+ "ldr r10, [%[a], #236]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #472]\n\t"
+ "# A[56] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[57] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[58] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[59] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #476]\n\t"
+ "# A[57] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[58] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[59] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[60] * A[60]\n\t"
+ "ldr r10, [%[a], #240]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #480]\n\t"
+ "# A[58] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[59] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[60] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #484]\n\t"
+ "# A[59] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r14, r14\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "# A[60] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "# A[61] * A[61]\n\t"
+ "ldr r10, [%[a], #244]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "str r4, [%[r], #488]\n\t"
+ "# A[60] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r14, r14\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "# A[61] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "str r2, [%[r], #492]\n\t"
+ "# A[61] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r14, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "# A[62] * A[62]\n\t"
+ "ldr r10, [%[a], #248]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "str r3, [%[r], #496]\n\t"
+ "# A[62] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r14, r14\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "str r4, [%[r], #500]\n\t"
+ "# A[63] * A[63]\n\t"
+ "ldr r10, [%[a], #252]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r2, r2, r8\n\t"
+ "adc r3, r3, r9\n\t"
+ "str r2, [%[r], #504]\n\t"
+ "str r3, [%[r], #508]\n\t"
+ "ldr r2, [sp, #0]\n\t"
+ "ldr r3, [sp, #4]\n\t"
+ "ldr r4, [sp, #8]\n\t"
+ "ldr r8, [sp, #12]\n\t"
+ "str r2, [%[r], #0]\n\t"
+ "str r3, [%[r], #4]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r8, [%[r], #12]\n\t"
+ "ldr r2, [sp, #16]\n\t"
+ "ldr r3, [sp, #20]\n\t"
+ "ldr r4, [sp, #24]\n\t"
+ "ldr r8, [sp, #28]\n\t"
+ "str r2, [%[r], #16]\n\t"
+ "str r3, [%[r], #20]\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "str r8, [%[r], #28]\n\t"
+ "ldr r2, [sp, #32]\n\t"
+ "ldr r3, [sp, #36]\n\t"
+ "ldr r4, [sp, #40]\n\t"
+ "ldr r8, [sp, #44]\n\t"
+ "str r2, [%[r], #32]\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "str r8, [%[r], #44]\n\t"
+ "ldr r2, [sp, #48]\n\t"
+ "ldr r3, [sp, #52]\n\t"
+ "ldr r4, [sp, #56]\n\t"
+ "ldr r8, [sp, #60]\n\t"
+ "str r2, [%[r], #48]\n\t"
+ "str r3, [%[r], #52]\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "str r8, [%[r], #60]\n\t"
+ "ldr r2, [sp, #64]\n\t"
+ "ldr r3, [sp, #68]\n\t"
+ "ldr r4, [sp, #72]\n\t"
+ "ldr r8, [sp, #76]\n\t"
+ "str r2, [%[r], #64]\n\t"
+ "str r3, [%[r], #68]\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "str r8, [%[r], #76]\n\t"
+ "ldr r2, [sp, #80]\n\t"
+ "ldr r3, [sp, #84]\n\t"
+ "ldr r4, [sp, #88]\n\t"
+ "ldr r8, [sp, #92]\n\t"
+ "str r2, [%[r], #80]\n\t"
+ "str r3, [%[r], #84]\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "str r8, [%[r], #92]\n\t"
+ "ldr r2, [sp, #96]\n\t"
+ "ldr r3, [sp, #100]\n\t"
+ "ldr r4, [sp, #104]\n\t"
+ "ldr r8, [sp, #108]\n\t"
+ "str r2, [%[r], #96]\n\t"
+ "str r3, [%[r], #100]\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "str r8, [%[r], #108]\n\t"
+ "ldr r2, [sp, #112]\n\t"
+ "ldr r3, [sp, #116]\n\t"
+ "ldr r4, [sp, #120]\n\t"
+ "ldr r8, [sp, #124]\n\t"
+ "str r2, [%[r], #112]\n\t"
+ "str r3, [%[r], #116]\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "str r8, [%[r], #124]\n\t"
+ "ldr r2, [sp, #128]\n\t"
+ "ldr r3, [sp, #132]\n\t"
+ "ldr r4, [sp, #136]\n\t"
+ "ldr r8, [sp, #140]\n\t"
+ "str r2, [%[r], #128]\n\t"
+ "str r3, [%[r], #132]\n\t"
+ "str r4, [%[r], #136]\n\t"
+ "str r8, [%[r], #140]\n\t"
+ "ldr r2, [sp, #144]\n\t"
+ "ldr r3, [sp, #148]\n\t"
+ "ldr r4, [sp, #152]\n\t"
+ "ldr r8, [sp, #156]\n\t"
+ "str r2, [%[r], #144]\n\t"
+ "str r3, [%[r], #148]\n\t"
+ "str r4, [%[r], #152]\n\t"
+ "str r8, [%[r], #156]\n\t"
+ "ldr r2, [sp, #160]\n\t"
+ "ldr r3, [sp, #164]\n\t"
+ "ldr r4, [sp, #168]\n\t"
+ "ldr r8, [sp, #172]\n\t"
+ "str r2, [%[r], #160]\n\t"
+ "str r3, [%[r], #164]\n\t"
+ "str r4, [%[r], #168]\n\t"
+ "str r8, [%[r], #172]\n\t"
+ "ldr r2, [sp, #176]\n\t"
+ "ldr r3, [sp, #180]\n\t"
+ "ldr r4, [sp, #184]\n\t"
+ "ldr r8, [sp, #188]\n\t"
+ "str r2, [%[r], #176]\n\t"
+ "str r3, [%[r], #180]\n\t"
+ "str r4, [%[r], #184]\n\t"
+ "str r8, [%[r], #188]\n\t"
+ "ldr r2, [sp, #192]\n\t"
+ "ldr r3, [sp, #196]\n\t"
+ "ldr r4, [sp, #200]\n\t"
+ "ldr r8, [sp, #204]\n\t"
+ "str r2, [%[r], #192]\n\t"
+ "str r3, [%[r], #196]\n\t"
+ "str r4, [%[r], #200]\n\t"
+ "str r8, [%[r], #204]\n\t"
+ "ldr r2, [sp, #208]\n\t"
+ "ldr r3, [sp, #212]\n\t"
+ "ldr r4, [sp, #216]\n\t"
+ "ldr r8, [sp, #220]\n\t"
+ "str r2, [%[r], #208]\n\t"
+ "str r3, [%[r], #212]\n\t"
+ "str r4, [%[r], #216]\n\t"
+ "str r8, [%[r], #220]\n\t"
+ "ldr r2, [sp, #224]\n\t"
+ "ldr r3, [sp, #228]\n\t"
+ "ldr r4, [sp, #232]\n\t"
+ "ldr r8, [sp, #236]\n\t"
+ "str r2, [%[r], #224]\n\t"
+ "str r3, [%[r], #228]\n\t"
+ "str r4, [%[r], #232]\n\t"
+ "str r8, [%[r], #236]\n\t"
+ "ldr r2, [sp, #240]\n\t"
+ "ldr r3, [sp, #244]\n\t"
+ "ldr r4, [sp, #248]\n\t"
+ "ldr r8, [sp, #252]\n\t"
+ "str r2, [%[r], #240]\n\t"
+ "str r3, [%[r], #244]\n\t"
+ "str r4, [%[r], #248]\n\t"
+ "str r8, [%[r], #252]\n\t"
+ "add sp, sp, #256\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14"
+ );
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[128];
+ sp_digit z1[128];
+ sp_digit a1[64];
+ sp_digit u;
+
+ u = sp_2048_add_64(a1, a, &a[64]);
+ sp_2048_sqr_64(z1, a1);
+ sp_2048_sqr_64(z2, &a[64]);
+ sp_2048_sqr_64(z0, a);
+ sp_2048_mask_64(r + 128, a1, 0 - u);
+ u += sp_2048_add_64(r + 128, r + 128, r + 128);
+ u += sp_4096_sub_in_place_128(z1, z2);
+ u += sp_4096_sub_in_place_128(z1, z0);
+ u += sp_4096_add_128(r + 64, r + 64, z1);
+ r[192] = u;
+ XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1));
+ (void)sp_4096_add_128(r + 128, r + 128, z2);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add r12, %[a], #512\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldr r4, [%[a]], #4\n\t"
+ "ldr r5, [%[a]], #4\n\t"
+ "ldr r6, [%[a]], #4\n\t"
+ "ldr r7, [%[a]], #4\n\t"
+ "ldr r8, [%[b]], #4\n\t"
+ "ldr r9, [%[b]], #4\n\t"
+ "ldr r10, [%[b]], #4\n\t"
+ "ldr r14, [%[b]], #4\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r]], #4\n\t"
+ "str r5, [%[r]], #4\n\t"
+ "str r6, [%[r]], #4\n\t"
+ "str r7, [%[r]], #4\n\t"
+ "mov r4, #0\n\t"
+ "adc %[c], r4, #0\n\t"
+ "cmp %[a], r12\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r14, #0\n\t"
+ "add r12, %[a], #512\n\t"
+ "\n1:\n\t"
+ "subs %[c], r14, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[b]], #4\n\t"
+ "ldr r8, [%[b]], #4\n\t"
+ "ldr r9, [%[b]], #4\n\t"
+ "ldr r10, [%[b]], #4\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "sbcs r6, r6, r10\n\t"
+ "str r3, [%[a]], #4\n\t"
+ "str r4, [%[a]], #4\n\t"
+ "str r5, [%[a]], #4\n\t"
+ "str r6, [%[a]], #4\n\t"
+ "sbc %[c], r14, r14\n\t"
+ "cmp %[a], r12\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #1024\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #0\n\t"
+ "mov r7, #0\n\t"
+ "mov r8, #0\n\t"
+ "\n1:\n\t"
+ "subs r3, r5, #508\n\t"
+ "it cc\n\t"
+ "movcc r3, #0\n\t"
+ "sub r4, r5, r3\n\t"
+ "\n2:\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "ldr r12, [%[b], r4]\n\t"
+ "umull r9, r10, r14, r12\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, #0\n\t"
+ "add r3, r3, #4\n\t"
+ "sub r4, r4, #4\n\t"
+ "cmp r3, #512\n\t"
+ "beq 3f\n\t"
+ "cmp r3, r5\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "str r6, [sp, r5]\n\t"
+ "mov r6, r7\n\t"
+ "mov r7, r8\n\t"
+ "mov r8, #0\n\t"
+ "add r5, r5, #4\n\t"
+ "cmp r5, #1016\n\t"
+ "ble 1b\n\t"
+ "str r6, [sp, r5]\n\t"
+ "\n4:\n\t"
+ "ldr r6, [sp, #0]\n\t"
+ "ldr r7, [sp, #4]\n\t"
+ "ldr r8, [sp, #8]\n\t"
+ "ldr r3, [sp, #12]\n\t"
+ "str r6, [%[r], #0]\n\t"
+ "str r7, [%[r], #4]\n\t"
+ "str r8, [%[r], #8]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "add sp, sp, #16\n\t"
+ "add %[r], %[r], #16\n\t"
+ "subs r5, r5, #16\n\t"
+ "bgt 4b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #1024\n\t"
+ "mov r12, #0\n\t"
+ "mov r6, #0\n\t"
+ "mov r7, #0\n\t"
+ "mov r8, #0\n\t"
+ "mov r5, #0\n\t"
+ "\n1:\n\t"
+ "subs r3, r5, #508\n\t"
+ "it cc\n\t"
+ "movcc r3, r12\n\t"
+ "sub r4, r5, r3\n\t"
+ "\n2:\n\t"
+ "cmp r4, r3\n\t"
+ "beq 4f\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "ldr r9, [%[a], r4]\n\t"
+ "umull r9, r10, r14, r9\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "umull r9, r10, r14, r14\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "\n5:\n\t"
+ "add r3, r3, #4\n\t"
+ "sub r4, r4, #4\n\t"
+ "cmp r3, #512\n\t"
+ "beq 3f\n\t"
+ "cmp r3, r4\n\t"
+ "bgt 3f\n\t"
+ "cmp r3, r5\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "str r6, [sp, r5]\n\t"
+ "mov r6, r7\n\t"
+ "mov r7, r8\n\t"
+ "mov r8, #0\n\t"
+ "add r5, r5, #4\n\t"
+ "cmp r5, #1016\n\t"
+ "ble 1b\n\t"
+ "str r6, [sp, r5]\n\t"
+ "\n4:\n\t"
+ "ldr r6, [sp, #0]\n\t"
+ "ldr r7, [sp, #4]\n\t"
+ "ldr r8, [sp, #8]\n\t"
+ "ldr r3, [sp, #12]\n\t"
+ "str r6, [%[r], #0]\n\t"
+ "str r7, [%[r], #4]\n\t"
+ "str r8, [%[r], #8]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "add sp, sp, #16\n\t"
+ "add %[r], %[r], #16\n\t"
+ "subs r5, r5, #16\n\t"
+ "bgt 4b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+
+ /* rho = -1/m mod b */
+ *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r10, #0\n\t"
+ "# A[0] * B\n\t"
+ "ldr r8, [%[a]]\n\t"
+ "umull r5, r3, %[b], r8\n\t"
+ "mov r4, #0\n\t"
+ "str r5, [%[r]]\n\t"
+ "mov r5, #0\n\t"
+ "mov r9, #4\n\t"
+ "1:\n\t"
+ "ldr r8, [%[a], r9]\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], r9]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r9, r9, #4\n\t"
+ "cmp r9, #512\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r], #512]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov r10, #0\n\t"
+ "# A[0] * B\n\t"
+ "ldr r8, [%[a]]\n\t"
+ "umull r3, r4, %[b], r8\n\t"
+ "mov r5, #0\n\t"
+ "str r3, [%[r]]\n\t"
+ "# A[1] * B\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "# A[2] * B\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "# A[3] * B\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "# A[4] * B\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "# A[5] * B\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "# A[6] * B\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "# A[7] * B\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "# A[8] * B\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #32]\n\t"
+ "# A[9] * B\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "# A[10] * B\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "# A[11] * B\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #44]\n\t"
+ "# A[12] * B\n\t"
+ "ldr r8, [%[a], #48]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "# A[13] * B\n\t"
+ "ldr r8, [%[a], #52]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "# A[14] * B\n\t"
+ "ldr r8, [%[a], #56]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #56]\n\t"
+ "# A[15] * B\n\t"
+ "ldr r8, [%[a], #60]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "# A[16] * B\n\t"
+ "ldr r8, [%[a], #64]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "# A[17] * B\n\t"
+ "ldr r8, [%[a], #68]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #68]\n\t"
+ "# A[18] * B\n\t"
+ "ldr r8, [%[a], #72]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #72]\n\t"
+ "# A[19] * B\n\t"
+ "ldr r8, [%[a], #76]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "# A[20] * B\n\t"
+ "ldr r8, [%[a], #80]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #80]\n\t"
+ "# A[21] * B\n\t"
+ "ldr r8, [%[a], #84]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #84]\n\t"
+ "# A[22] * B\n\t"
+ "ldr r8, [%[a], #88]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "# A[23] * B\n\t"
+ "ldr r8, [%[a], #92]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #92]\n\t"
+ "# A[24] * B\n\t"
+ "ldr r8, [%[a], #96]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #96]\n\t"
+ "# A[25] * B\n\t"
+ "ldr r8, [%[a], #100]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #100]\n\t"
+ "# A[26] * B\n\t"
+ "ldr r8, [%[a], #104]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #104]\n\t"
+ "# A[27] * B\n\t"
+ "ldr r8, [%[a], #108]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #108]\n\t"
+ "# A[28] * B\n\t"
+ "ldr r8, [%[a], #112]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "# A[29] * B\n\t"
+ "ldr r8, [%[a], #116]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #116]\n\t"
+ "# A[30] * B\n\t"
+ "ldr r8, [%[a], #120]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #120]\n\t"
+ "# A[31] * B\n\t"
+ "ldr r8, [%[a], #124]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #124]\n\t"
+ "# A[32] * B\n\t"
+ "ldr r8, [%[a], #128]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #128]\n\t"
+ "# A[33] * B\n\t"
+ "ldr r8, [%[a], #132]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #132]\n\t"
+ "# A[34] * B\n\t"
+ "ldr r8, [%[a], #136]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #136]\n\t"
+ "# A[35] * B\n\t"
+ "ldr r8, [%[a], #140]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #140]\n\t"
+ "# A[36] * B\n\t"
+ "ldr r8, [%[a], #144]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #144]\n\t"
+ "# A[37] * B\n\t"
+ "ldr r8, [%[a], #148]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #148]\n\t"
+ "# A[38] * B\n\t"
+ "ldr r8, [%[a], #152]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #152]\n\t"
+ "# A[39] * B\n\t"
+ "ldr r8, [%[a], #156]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #156]\n\t"
+ "# A[40] * B\n\t"
+ "ldr r8, [%[a], #160]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #160]\n\t"
+ "# A[41] * B\n\t"
+ "ldr r8, [%[a], #164]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #164]\n\t"
+ "# A[42] * B\n\t"
+ "ldr r8, [%[a], #168]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #168]\n\t"
+ "# A[43] * B\n\t"
+ "ldr r8, [%[a], #172]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #172]\n\t"
+ "# A[44] * B\n\t"
+ "ldr r8, [%[a], #176]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #176]\n\t"
+ "# A[45] * B\n\t"
+ "ldr r8, [%[a], #180]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #180]\n\t"
+ "# A[46] * B\n\t"
+ "ldr r8, [%[a], #184]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #184]\n\t"
+ "# A[47] * B\n\t"
+ "ldr r8, [%[a], #188]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #188]\n\t"
+ "# A[48] * B\n\t"
+ "ldr r8, [%[a], #192]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #192]\n\t"
+ "# A[49] * B\n\t"
+ "ldr r8, [%[a], #196]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #196]\n\t"
+ "# A[50] * B\n\t"
+ "ldr r8, [%[a], #200]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #200]\n\t"
+ "# A[51] * B\n\t"
+ "ldr r8, [%[a], #204]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #204]\n\t"
+ "# A[52] * B\n\t"
+ "ldr r8, [%[a], #208]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #208]\n\t"
+ "# A[53] * B\n\t"
+ "ldr r8, [%[a], #212]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #212]\n\t"
+ "# A[54] * B\n\t"
+ "ldr r8, [%[a], #216]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #216]\n\t"
+ "# A[55] * B\n\t"
+ "ldr r8, [%[a], #220]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #220]\n\t"
+ "# A[56] * B\n\t"
+ "ldr r8, [%[a], #224]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #224]\n\t"
+ "# A[57] * B\n\t"
+ "ldr r8, [%[a], #228]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #228]\n\t"
+ "# A[58] * B\n\t"
+ "ldr r8, [%[a], #232]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #232]\n\t"
+ "# A[59] * B\n\t"
+ "ldr r8, [%[a], #236]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #236]\n\t"
+ "# A[60] * B\n\t"
+ "ldr r8, [%[a], #240]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #240]\n\t"
+ "# A[61] * B\n\t"
+ "ldr r8, [%[a], #244]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #244]\n\t"
+ "# A[62] * B\n\t"
+ "ldr r8, [%[a], #248]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #248]\n\t"
+ "# A[63] * B\n\t"
+ "ldr r8, [%[a], #252]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #252]\n\t"
+ "# A[64] * B\n\t"
+ "ldr r8, [%[a], #256]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #256]\n\t"
+ "# A[65] * B\n\t"
+ "ldr r8, [%[a], #260]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #260]\n\t"
+ "# A[66] * B\n\t"
+ "ldr r8, [%[a], #264]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #264]\n\t"
+ "# A[67] * B\n\t"
+ "ldr r8, [%[a], #268]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #268]\n\t"
+ "# A[68] * B\n\t"
+ "ldr r8, [%[a], #272]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #272]\n\t"
+ "# A[69] * B\n\t"
+ "ldr r8, [%[a], #276]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #276]\n\t"
+ "# A[70] * B\n\t"
+ "ldr r8, [%[a], #280]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #280]\n\t"
+ "# A[71] * B\n\t"
+ "ldr r8, [%[a], #284]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #284]\n\t"
+ "# A[72] * B\n\t"
+ "ldr r8, [%[a], #288]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #288]\n\t"
+ "# A[73] * B\n\t"
+ "ldr r8, [%[a], #292]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #292]\n\t"
+ "# A[74] * B\n\t"
+ "ldr r8, [%[a], #296]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #296]\n\t"
+ "# A[75] * B\n\t"
+ "ldr r8, [%[a], #300]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #300]\n\t"
+ "# A[76] * B\n\t"
+ "ldr r8, [%[a], #304]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #304]\n\t"
+ "# A[77] * B\n\t"
+ "ldr r8, [%[a], #308]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #308]\n\t"
+ "# A[78] * B\n\t"
+ "ldr r8, [%[a], #312]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #312]\n\t"
+ "# A[79] * B\n\t"
+ "ldr r8, [%[a], #316]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #316]\n\t"
+ "# A[80] * B\n\t"
+ "ldr r8, [%[a], #320]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #320]\n\t"
+ "# A[81] * B\n\t"
+ "ldr r8, [%[a], #324]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #324]\n\t"
+ "# A[82] * B\n\t"
+ "ldr r8, [%[a], #328]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #328]\n\t"
+ "# A[83] * B\n\t"
+ "ldr r8, [%[a], #332]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #332]\n\t"
+ "# A[84] * B\n\t"
+ "ldr r8, [%[a], #336]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #336]\n\t"
+ "# A[85] * B\n\t"
+ "ldr r8, [%[a], #340]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #340]\n\t"
+ "# A[86] * B\n\t"
+ "ldr r8, [%[a], #344]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #344]\n\t"
+ "# A[87] * B\n\t"
+ "ldr r8, [%[a], #348]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #348]\n\t"
+ "# A[88] * B\n\t"
+ "ldr r8, [%[a], #352]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #352]\n\t"
+ "# A[89] * B\n\t"
+ "ldr r8, [%[a], #356]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #356]\n\t"
+ "# A[90] * B\n\t"
+ "ldr r8, [%[a], #360]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #360]\n\t"
+ "# A[91] * B\n\t"
+ "ldr r8, [%[a], #364]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #364]\n\t"
+ "# A[92] * B\n\t"
+ "ldr r8, [%[a], #368]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #368]\n\t"
+ "# A[93] * B\n\t"
+ "ldr r8, [%[a], #372]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #372]\n\t"
+ "# A[94] * B\n\t"
+ "ldr r8, [%[a], #376]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #376]\n\t"
+ "# A[95] * B\n\t"
+ "ldr r8, [%[a], #380]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #380]\n\t"
+ "# A[96] * B\n\t"
+ "ldr r8, [%[a], #384]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #384]\n\t"
+ "# A[97] * B\n\t"
+ "ldr r8, [%[a], #388]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #388]\n\t"
+ "# A[98] * B\n\t"
+ "ldr r8, [%[a], #392]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #392]\n\t"
+ "# A[99] * B\n\t"
+ "ldr r8, [%[a], #396]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #396]\n\t"
+ "# A[100] * B\n\t"
+ "ldr r8, [%[a], #400]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #400]\n\t"
+ "# A[101] * B\n\t"
+ "ldr r8, [%[a], #404]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #404]\n\t"
+ "# A[102] * B\n\t"
+ "ldr r8, [%[a], #408]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #408]\n\t"
+ "# A[103] * B\n\t"
+ "ldr r8, [%[a], #412]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #412]\n\t"
+ "# A[104] * B\n\t"
+ "ldr r8, [%[a], #416]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #416]\n\t"
+ "# A[105] * B\n\t"
+ "ldr r8, [%[a], #420]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #420]\n\t"
+ "# A[106] * B\n\t"
+ "ldr r8, [%[a], #424]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #424]\n\t"
+ "# A[107] * B\n\t"
+ "ldr r8, [%[a], #428]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #428]\n\t"
+ "# A[108] * B\n\t"
+ "ldr r8, [%[a], #432]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #432]\n\t"
+ "# A[109] * B\n\t"
+ "ldr r8, [%[a], #436]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #436]\n\t"
+ "# A[110] * B\n\t"
+ "ldr r8, [%[a], #440]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #440]\n\t"
+ "# A[111] * B\n\t"
+ "ldr r8, [%[a], #444]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #444]\n\t"
+ "# A[112] * B\n\t"
+ "ldr r8, [%[a], #448]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #448]\n\t"
+ "# A[113] * B\n\t"
+ "ldr r8, [%[a], #452]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #452]\n\t"
+ "# A[114] * B\n\t"
+ "ldr r8, [%[a], #456]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #456]\n\t"
+ "# A[115] * B\n\t"
+ "ldr r8, [%[a], #460]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #460]\n\t"
+ "# A[116] * B\n\t"
+ "ldr r8, [%[a], #464]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #464]\n\t"
+ "# A[117] * B\n\t"
+ "ldr r8, [%[a], #468]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #468]\n\t"
+ "# A[118] * B\n\t"
+ "ldr r8, [%[a], #472]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #472]\n\t"
+ "# A[119] * B\n\t"
+ "ldr r8, [%[a], #476]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #476]\n\t"
+ "# A[120] * B\n\t"
+ "ldr r8, [%[a], #480]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #480]\n\t"
+ "# A[121] * B\n\t"
+ "ldr r8, [%[a], #484]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #484]\n\t"
+ "# A[122] * B\n\t"
+ "ldr r8, [%[a], #488]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #488]\n\t"
+ "# A[123] * B\n\t"
+ "ldr r8, [%[a], #492]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #492]\n\t"
+ "# A[124] * B\n\t"
+ "ldr r8, [%[a], #496]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #496]\n\t"
+ "# A[125] * B\n\t"
+ "ldr r8, [%[a], #500]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #500]\n\t"
+ "# A[126] * B\n\t"
+ "ldr r8, [%[a], #504]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #504]\n\t"
+ "# A[127] * B\n\t"
+ "ldr r8, [%[a], #508]\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adc r5, r5, r7\n\t"
+ "str r4, [%[r], #508]\n\t"
+ "str r5, [%[r], #512]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+#endif
+}
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 4096 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 128);
+
+ /* r = 2^n mod m */
+ sp_4096_sub_in_place_128(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ sp_digit c = 0;
+
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r9, #0\n\t"
+ "mov r8, #0\n\t"
+ "1:\n\t"
+ "subs %[c], r9, %[c]\n\t"
+ "ldr r4, [%[a], r8]\n\t"
+ "ldr r5, [%[b], r8]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbc %[c], r9, r9\n\t"
+ "str r4, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, #512\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#else
+ __asm__ __volatile__ (
+
+ "mov r9, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "subs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r6, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r6, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r6, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r7, [%[b], #36]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r6, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r6, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r7, [%[b], #44]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "str r6, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r6, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r7, [%[b], #52]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "str r6, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r6, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r7, [%[b], #60]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "str r6, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r6, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "ldr r7, [%[b], #68]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "str r6, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r6, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "ldr r7, [%[b], #76]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "str r6, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r6, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "ldr r7, [%[b], #84]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "str r6, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r6, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "ldr r7, [%[b], #92]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "str r6, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r6, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "ldr r7, [%[b], #100]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "str r6, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r6, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "ldr r7, [%[b], #108]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "str r6, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r6, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "ldr r7, [%[b], #116]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "str r6, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r6, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "ldr r7, [%[b], #124]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "str r6, [%[r], #124]\n\t"
+ "ldr r4, [%[a], #128]\n\t"
+ "ldr r6, [%[a], #132]\n\t"
+ "ldr r5, [%[b], #128]\n\t"
+ "ldr r7, [%[b], #132]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #128]\n\t"
+ "str r6, [%[r], #132]\n\t"
+ "ldr r4, [%[a], #136]\n\t"
+ "ldr r6, [%[a], #140]\n\t"
+ "ldr r5, [%[b], #136]\n\t"
+ "ldr r7, [%[b], #140]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #136]\n\t"
+ "str r6, [%[r], #140]\n\t"
+ "ldr r4, [%[a], #144]\n\t"
+ "ldr r6, [%[a], #148]\n\t"
+ "ldr r5, [%[b], #144]\n\t"
+ "ldr r7, [%[b], #148]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #144]\n\t"
+ "str r6, [%[r], #148]\n\t"
+ "ldr r4, [%[a], #152]\n\t"
+ "ldr r6, [%[a], #156]\n\t"
+ "ldr r5, [%[b], #152]\n\t"
+ "ldr r7, [%[b], #156]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #152]\n\t"
+ "str r6, [%[r], #156]\n\t"
+ "ldr r4, [%[a], #160]\n\t"
+ "ldr r6, [%[a], #164]\n\t"
+ "ldr r5, [%[b], #160]\n\t"
+ "ldr r7, [%[b], #164]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #160]\n\t"
+ "str r6, [%[r], #164]\n\t"
+ "ldr r4, [%[a], #168]\n\t"
+ "ldr r6, [%[a], #172]\n\t"
+ "ldr r5, [%[b], #168]\n\t"
+ "ldr r7, [%[b], #172]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #168]\n\t"
+ "str r6, [%[r], #172]\n\t"
+ "ldr r4, [%[a], #176]\n\t"
+ "ldr r6, [%[a], #180]\n\t"
+ "ldr r5, [%[b], #176]\n\t"
+ "ldr r7, [%[b], #180]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #176]\n\t"
+ "str r6, [%[r], #180]\n\t"
+ "ldr r4, [%[a], #184]\n\t"
+ "ldr r6, [%[a], #188]\n\t"
+ "ldr r5, [%[b], #184]\n\t"
+ "ldr r7, [%[b], #188]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #184]\n\t"
+ "str r6, [%[r], #188]\n\t"
+ "ldr r4, [%[a], #192]\n\t"
+ "ldr r6, [%[a], #196]\n\t"
+ "ldr r5, [%[b], #192]\n\t"
+ "ldr r7, [%[b], #196]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #192]\n\t"
+ "str r6, [%[r], #196]\n\t"
+ "ldr r4, [%[a], #200]\n\t"
+ "ldr r6, [%[a], #204]\n\t"
+ "ldr r5, [%[b], #200]\n\t"
+ "ldr r7, [%[b], #204]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #200]\n\t"
+ "str r6, [%[r], #204]\n\t"
+ "ldr r4, [%[a], #208]\n\t"
+ "ldr r6, [%[a], #212]\n\t"
+ "ldr r5, [%[b], #208]\n\t"
+ "ldr r7, [%[b], #212]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #208]\n\t"
+ "str r6, [%[r], #212]\n\t"
+ "ldr r4, [%[a], #216]\n\t"
+ "ldr r6, [%[a], #220]\n\t"
+ "ldr r5, [%[b], #216]\n\t"
+ "ldr r7, [%[b], #220]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #216]\n\t"
+ "str r6, [%[r], #220]\n\t"
+ "ldr r4, [%[a], #224]\n\t"
+ "ldr r6, [%[a], #228]\n\t"
+ "ldr r5, [%[b], #224]\n\t"
+ "ldr r7, [%[b], #228]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #224]\n\t"
+ "str r6, [%[r], #228]\n\t"
+ "ldr r4, [%[a], #232]\n\t"
+ "ldr r6, [%[a], #236]\n\t"
+ "ldr r5, [%[b], #232]\n\t"
+ "ldr r7, [%[b], #236]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #232]\n\t"
+ "str r6, [%[r], #236]\n\t"
+ "ldr r4, [%[a], #240]\n\t"
+ "ldr r6, [%[a], #244]\n\t"
+ "ldr r5, [%[b], #240]\n\t"
+ "ldr r7, [%[b], #244]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #240]\n\t"
+ "str r6, [%[r], #244]\n\t"
+ "ldr r4, [%[a], #248]\n\t"
+ "ldr r6, [%[a], #252]\n\t"
+ "ldr r5, [%[b], #248]\n\t"
+ "ldr r7, [%[b], #252]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #248]\n\t"
+ "str r6, [%[r], #252]\n\t"
+ "ldr r4, [%[a], #256]\n\t"
+ "ldr r6, [%[a], #260]\n\t"
+ "ldr r5, [%[b], #256]\n\t"
+ "ldr r7, [%[b], #260]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #256]\n\t"
+ "str r6, [%[r], #260]\n\t"
+ "ldr r4, [%[a], #264]\n\t"
+ "ldr r6, [%[a], #268]\n\t"
+ "ldr r5, [%[b], #264]\n\t"
+ "ldr r7, [%[b], #268]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #264]\n\t"
+ "str r6, [%[r], #268]\n\t"
+ "ldr r4, [%[a], #272]\n\t"
+ "ldr r6, [%[a], #276]\n\t"
+ "ldr r5, [%[b], #272]\n\t"
+ "ldr r7, [%[b], #276]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #272]\n\t"
+ "str r6, [%[r], #276]\n\t"
+ "ldr r4, [%[a], #280]\n\t"
+ "ldr r6, [%[a], #284]\n\t"
+ "ldr r5, [%[b], #280]\n\t"
+ "ldr r7, [%[b], #284]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #280]\n\t"
+ "str r6, [%[r], #284]\n\t"
+ "ldr r4, [%[a], #288]\n\t"
+ "ldr r6, [%[a], #292]\n\t"
+ "ldr r5, [%[b], #288]\n\t"
+ "ldr r7, [%[b], #292]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #288]\n\t"
+ "str r6, [%[r], #292]\n\t"
+ "ldr r4, [%[a], #296]\n\t"
+ "ldr r6, [%[a], #300]\n\t"
+ "ldr r5, [%[b], #296]\n\t"
+ "ldr r7, [%[b], #300]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #296]\n\t"
+ "str r6, [%[r], #300]\n\t"
+ "ldr r4, [%[a], #304]\n\t"
+ "ldr r6, [%[a], #308]\n\t"
+ "ldr r5, [%[b], #304]\n\t"
+ "ldr r7, [%[b], #308]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #304]\n\t"
+ "str r6, [%[r], #308]\n\t"
+ "ldr r4, [%[a], #312]\n\t"
+ "ldr r6, [%[a], #316]\n\t"
+ "ldr r5, [%[b], #312]\n\t"
+ "ldr r7, [%[b], #316]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #312]\n\t"
+ "str r6, [%[r], #316]\n\t"
+ "ldr r4, [%[a], #320]\n\t"
+ "ldr r6, [%[a], #324]\n\t"
+ "ldr r5, [%[b], #320]\n\t"
+ "ldr r7, [%[b], #324]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #320]\n\t"
+ "str r6, [%[r], #324]\n\t"
+ "ldr r4, [%[a], #328]\n\t"
+ "ldr r6, [%[a], #332]\n\t"
+ "ldr r5, [%[b], #328]\n\t"
+ "ldr r7, [%[b], #332]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #328]\n\t"
+ "str r6, [%[r], #332]\n\t"
+ "ldr r4, [%[a], #336]\n\t"
+ "ldr r6, [%[a], #340]\n\t"
+ "ldr r5, [%[b], #336]\n\t"
+ "ldr r7, [%[b], #340]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #336]\n\t"
+ "str r6, [%[r], #340]\n\t"
+ "ldr r4, [%[a], #344]\n\t"
+ "ldr r6, [%[a], #348]\n\t"
+ "ldr r5, [%[b], #344]\n\t"
+ "ldr r7, [%[b], #348]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #344]\n\t"
+ "str r6, [%[r], #348]\n\t"
+ "ldr r4, [%[a], #352]\n\t"
+ "ldr r6, [%[a], #356]\n\t"
+ "ldr r5, [%[b], #352]\n\t"
+ "ldr r7, [%[b], #356]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #352]\n\t"
+ "str r6, [%[r], #356]\n\t"
+ "ldr r4, [%[a], #360]\n\t"
+ "ldr r6, [%[a], #364]\n\t"
+ "ldr r5, [%[b], #360]\n\t"
+ "ldr r7, [%[b], #364]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #360]\n\t"
+ "str r6, [%[r], #364]\n\t"
+ "ldr r4, [%[a], #368]\n\t"
+ "ldr r6, [%[a], #372]\n\t"
+ "ldr r5, [%[b], #368]\n\t"
+ "ldr r7, [%[b], #372]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #368]\n\t"
+ "str r6, [%[r], #372]\n\t"
+ "ldr r4, [%[a], #376]\n\t"
+ "ldr r6, [%[a], #380]\n\t"
+ "ldr r5, [%[b], #376]\n\t"
+ "ldr r7, [%[b], #380]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #376]\n\t"
+ "str r6, [%[r], #380]\n\t"
+ "ldr r4, [%[a], #384]\n\t"
+ "ldr r6, [%[a], #388]\n\t"
+ "ldr r5, [%[b], #384]\n\t"
+ "ldr r7, [%[b], #388]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #384]\n\t"
+ "str r6, [%[r], #388]\n\t"
+ "ldr r4, [%[a], #392]\n\t"
+ "ldr r6, [%[a], #396]\n\t"
+ "ldr r5, [%[b], #392]\n\t"
+ "ldr r7, [%[b], #396]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #392]\n\t"
+ "str r6, [%[r], #396]\n\t"
+ "ldr r4, [%[a], #400]\n\t"
+ "ldr r6, [%[a], #404]\n\t"
+ "ldr r5, [%[b], #400]\n\t"
+ "ldr r7, [%[b], #404]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #400]\n\t"
+ "str r6, [%[r], #404]\n\t"
+ "ldr r4, [%[a], #408]\n\t"
+ "ldr r6, [%[a], #412]\n\t"
+ "ldr r5, [%[b], #408]\n\t"
+ "ldr r7, [%[b], #412]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #408]\n\t"
+ "str r6, [%[r], #412]\n\t"
+ "ldr r4, [%[a], #416]\n\t"
+ "ldr r6, [%[a], #420]\n\t"
+ "ldr r5, [%[b], #416]\n\t"
+ "ldr r7, [%[b], #420]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #416]\n\t"
+ "str r6, [%[r], #420]\n\t"
+ "ldr r4, [%[a], #424]\n\t"
+ "ldr r6, [%[a], #428]\n\t"
+ "ldr r5, [%[b], #424]\n\t"
+ "ldr r7, [%[b], #428]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #424]\n\t"
+ "str r6, [%[r], #428]\n\t"
+ "ldr r4, [%[a], #432]\n\t"
+ "ldr r6, [%[a], #436]\n\t"
+ "ldr r5, [%[b], #432]\n\t"
+ "ldr r7, [%[b], #436]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #432]\n\t"
+ "str r6, [%[r], #436]\n\t"
+ "ldr r4, [%[a], #440]\n\t"
+ "ldr r6, [%[a], #444]\n\t"
+ "ldr r5, [%[b], #440]\n\t"
+ "ldr r7, [%[b], #444]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #440]\n\t"
+ "str r6, [%[r], #444]\n\t"
+ "ldr r4, [%[a], #448]\n\t"
+ "ldr r6, [%[a], #452]\n\t"
+ "ldr r5, [%[b], #448]\n\t"
+ "ldr r7, [%[b], #452]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #448]\n\t"
+ "str r6, [%[r], #452]\n\t"
+ "ldr r4, [%[a], #456]\n\t"
+ "ldr r6, [%[a], #460]\n\t"
+ "ldr r5, [%[b], #456]\n\t"
+ "ldr r7, [%[b], #460]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #456]\n\t"
+ "str r6, [%[r], #460]\n\t"
+ "ldr r4, [%[a], #464]\n\t"
+ "ldr r6, [%[a], #468]\n\t"
+ "ldr r5, [%[b], #464]\n\t"
+ "ldr r7, [%[b], #468]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #464]\n\t"
+ "str r6, [%[r], #468]\n\t"
+ "ldr r4, [%[a], #472]\n\t"
+ "ldr r6, [%[a], #476]\n\t"
+ "ldr r5, [%[b], #472]\n\t"
+ "ldr r7, [%[b], #476]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #472]\n\t"
+ "str r6, [%[r], #476]\n\t"
+ "ldr r4, [%[a], #480]\n\t"
+ "ldr r6, [%[a], #484]\n\t"
+ "ldr r5, [%[b], #480]\n\t"
+ "ldr r7, [%[b], #484]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #480]\n\t"
+ "str r6, [%[r], #484]\n\t"
+ "ldr r4, [%[a], #488]\n\t"
+ "ldr r6, [%[a], #492]\n\t"
+ "ldr r5, [%[b], #488]\n\t"
+ "ldr r7, [%[b], #492]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #488]\n\t"
+ "str r6, [%[r], #492]\n\t"
+ "ldr r4, [%[a], #496]\n\t"
+ "ldr r6, [%[a], #500]\n\t"
+ "ldr r5, [%[b], #496]\n\t"
+ "ldr r7, [%[b], #500]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #496]\n\t"
+ "str r6, [%[r], #500]\n\t"
+ "ldr r4, [%[a], #504]\n\t"
+ "ldr r6, [%[a], #508]\n\t"
+ "ldr r5, [%[b], #504]\n\t"
+ "ldr r7, [%[b], #508]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #504]\n\t"
+ "str r6, [%[r], #508]\n\t"
+ "sbc %[c], r9, r9\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#endif /* WOLFSSL_SP_SMALL */
+
+ return c;
+}
+
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "# i = 0\n\t"
+ "mov r12, #0\n\t"
+ "ldr r10, [%[a], #0]\n\t"
+ "ldr r14, [%[a], #4]\n\t"
+ "\n1:\n\t"
+ "# mu = a[i] * mp\n\t"
+ "mul r8, %[mp], r10\n\t"
+ "# a[i+0] += m[0] * mu\n\t"
+ "ldr r7, [%[m], #0]\n\t"
+ "ldr r9, [%[a], #0]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r10, r10, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "# a[i+1] += m[1] * mu\n\t"
+ "ldr r7, [%[m], #4]\n\t"
+ "ldr r9, [%[a], #4]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r10, r14, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r10, r10, r5\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+2] += m[2] * mu\n\t"
+ "ldr r7, [%[m], #8]\n\t"
+ "ldr r14, [%[a], #8]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r14, r14, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r14, r14, r4\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+3] += m[3] * mu\n\t"
+ "ldr r7, [%[m], #12]\n\t"
+ "ldr r9, [%[a], #12]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #12]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+4] += m[4] * mu\n\t"
+ "ldr r7, [%[m], #16]\n\t"
+ "ldr r9, [%[a], #16]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #16]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+5] += m[5] * mu\n\t"
+ "ldr r7, [%[m], #20]\n\t"
+ "ldr r9, [%[a], #20]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #20]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+6] += m[6] * mu\n\t"
+ "ldr r7, [%[m], #24]\n\t"
+ "ldr r9, [%[a], #24]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #24]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+7] += m[7] * mu\n\t"
+ "ldr r7, [%[m], #28]\n\t"
+ "ldr r9, [%[a], #28]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #28]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+8] += m[8] * mu\n\t"
+ "ldr r7, [%[m], #32]\n\t"
+ "ldr r9, [%[a], #32]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #32]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+9] += m[9] * mu\n\t"
+ "ldr r7, [%[m], #36]\n\t"
+ "ldr r9, [%[a], #36]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #36]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+10] += m[10] * mu\n\t"
+ "ldr r7, [%[m], #40]\n\t"
+ "ldr r9, [%[a], #40]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #40]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+11] += m[11] * mu\n\t"
+ "ldr r7, [%[m], #44]\n\t"
+ "ldr r9, [%[a], #44]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #44]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+12] += m[12] * mu\n\t"
+ "ldr r7, [%[m], #48]\n\t"
+ "ldr r9, [%[a], #48]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #48]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+13] += m[13] * mu\n\t"
+ "ldr r7, [%[m], #52]\n\t"
+ "ldr r9, [%[a], #52]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #52]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+14] += m[14] * mu\n\t"
+ "ldr r7, [%[m], #56]\n\t"
+ "ldr r9, [%[a], #56]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #56]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+15] += m[15] * mu\n\t"
+ "ldr r7, [%[m], #60]\n\t"
+ "ldr r9, [%[a], #60]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #60]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+16] += m[16] * mu\n\t"
+ "ldr r7, [%[m], #64]\n\t"
+ "ldr r9, [%[a], #64]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #64]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+17] += m[17] * mu\n\t"
+ "ldr r7, [%[m], #68]\n\t"
+ "ldr r9, [%[a], #68]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #68]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+18] += m[18] * mu\n\t"
+ "ldr r7, [%[m], #72]\n\t"
+ "ldr r9, [%[a], #72]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #72]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+19] += m[19] * mu\n\t"
+ "ldr r7, [%[m], #76]\n\t"
+ "ldr r9, [%[a], #76]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #76]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+20] += m[20] * mu\n\t"
+ "ldr r7, [%[m], #80]\n\t"
+ "ldr r9, [%[a], #80]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #80]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+21] += m[21] * mu\n\t"
+ "ldr r7, [%[m], #84]\n\t"
+ "ldr r9, [%[a], #84]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #84]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+22] += m[22] * mu\n\t"
+ "ldr r7, [%[m], #88]\n\t"
+ "ldr r9, [%[a], #88]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #88]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+23] += m[23] * mu\n\t"
+ "ldr r7, [%[m], #92]\n\t"
+ "ldr r9, [%[a], #92]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #92]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+24] += m[24] * mu\n\t"
+ "ldr r7, [%[m], #96]\n\t"
+ "ldr r9, [%[a], #96]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #96]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+25] += m[25] * mu\n\t"
+ "ldr r7, [%[m], #100]\n\t"
+ "ldr r9, [%[a], #100]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #100]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+26] += m[26] * mu\n\t"
+ "ldr r7, [%[m], #104]\n\t"
+ "ldr r9, [%[a], #104]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #104]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+27] += m[27] * mu\n\t"
+ "ldr r7, [%[m], #108]\n\t"
+ "ldr r9, [%[a], #108]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #108]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+28] += m[28] * mu\n\t"
+ "ldr r7, [%[m], #112]\n\t"
+ "ldr r9, [%[a], #112]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #112]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+29] += m[29] * mu\n\t"
+ "ldr r7, [%[m], #116]\n\t"
+ "ldr r9, [%[a], #116]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #116]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+30] += m[30] * mu\n\t"
+ "ldr r7, [%[m], #120]\n\t"
+ "ldr r9, [%[a], #120]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #120]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+31] += m[31] * mu\n\t"
+ "ldr r7, [%[m], #124]\n\t"
+ "ldr r9, [%[a], #124]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #124]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+32] += m[32] * mu\n\t"
+ "ldr r7, [%[m], #128]\n\t"
+ "ldr r9, [%[a], #128]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #128]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+33] += m[33] * mu\n\t"
+ "ldr r7, [%[m], #132]\n\t"
+ "ldr r9, [%[a], #132]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #132]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+34] += m[34] * mu\n\t"
+ "ldr r7, [%[m], #136]\n\t"
+ "ldr r9, [%[a], #136]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #136]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+35] += m[35] * mu\n\t"
+ "ldr r7, [%[m], #140]\n\t"
+ "ldr r9, [%[a], #140]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #140]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+36] += m[36] * mu\n\t"
+ "ldr r7, [%[m], #144]\n\t"
+ "ldr r9, [%[a], #144]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #144]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+37] += m[37] * mu\n\t"
+ "ldr r7, [%[m], #148]\n\t"
+ "ldr r9, [%[a], #148]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #148]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+38] += m[38] * mu\n\t"
+ "ldr r7, [%[m], #152]\n\t"
+ "ldr r9, [%[a], #152]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #152]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+39] += m[39] * mu\n\t"
+ "ldr r7, [%[m], #156]\n\t"
+ "ldr r9, [%[a], #156]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #156]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+40] += m[40] * mu\n\t"
+ "ldr r7, [%[m], #160]\n\t"
+ "ldr r9, [%[a], #160]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #160]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+41] += m[41] * mu\n\t"
+ "ldr r7, [%[m], #164]\n\t"
+ "ldr r9, [%[a], #164]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #164]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+42] += m[42] * mu\n\t"
+ "ldr r7, [%[m], #168]\n\t"
+ "ldr r9, [%[a], #168]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #168]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+43] += m[43] * mu\n\t"
+ "ldr r7, [%[m], #172]\n\t"
+ "ldr r9, [%[a], #172]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #172]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+44] += m[44] * mu\n\t"
+ "ldr r7, [%[m], #176]\n\t"
+ "ldr r9, [%[a], #176]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #176]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+45] += m[45] * mu\n\t"
+ "ldr r7, [%[m], #180]\n\t"
+ "ldr r9, [%[a], #180]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #180]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+46] += m[46] * mu\n\t"
+ "ldr r7, [%[m], #184]\n\t"
+ "ldr r9, [%[a], #184]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #184]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+47] += m[47] * mu\n\t"
+ "ldr r7, [%[m], #188]\n\t"
+ "ldr r9, [%[a], #188]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #188]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+48] += m[48] * mu\n\t"
+ "ldr r7, [%[m], #192]\n\t"
+ "ldr r9, [%[a], #192]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #192]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+49] += m[49] * mu\n\t"
+ "ldr r7, [%[m], #196]\n\t"
+ "ldr r9, [%[a], #196]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #196]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+50] += m[50] * mu\n\t"
+ "ldr r7, [%[m], #200]\n\t"
+ "ldr r9, [%[a], #200]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #200]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+51] += m[51] * mu\n\t"
+ "ldr r7, [%[m], #204]\n\t"
+ "ldr r9, [%[a], #204]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #204]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+52] += m[52] * mu\n\t"
+ "ldr r7, [%[m], #208]\n\t"
+ "ldr r9, [%[a], #208]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #208]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+53] += m[53] * mu\n\t"
+ "ldr r7, [%[m], #212]\n\t"
+ "ldr r9, [%[a], #212]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #212]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+54] += m[54] * mu\n\t"
+ "ldr r7, [%[m], #216]\n\t"
+ "ldr r9, [%[a], #216]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #216]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+55] += m[55] * mu\n\t"
+ "ldr r7, [%[m], #220]\n\t"
+ "ldr r9, [%[a], #220]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #220]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+56] += m[56] * mu\n\t"
+ "ldr r7, [%[m], #224]\n\t"
+ "ldr r9, [%[a], #224]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #224]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+57] += m[57] * mu\n\t"
+ "ldr r7, [%[m], #228]\n\t"
+ "ldr r9, [%[a], #228]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #228]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+58] += m[58] * mu\n\t"
+ "ldr r7, [%[m], #232]\n\t"
+ "ldr r9, [%[a], #232]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #232]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+59] += m[59] * mu\n\t"
+ "ldr r7, [%[m], #236]\n\t"
+ "ldr r9, [%[a], #236]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #236]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+60] += m[60] * mu\n\t"
+ "ldr r7, [%[m], #240]\n\t"
+ "ldr r9, [%[a], #240]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #240]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+61] += m[61] * mu\n\t"
+ "ldr r7, [%[m], #244]\n\t"
+ "ldr r9, [%[a], #244]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #244]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+62] += m[62] * mu\n\t"
+ "ldr r7, [%[m], #248]\n\t"
+ "ldr r9, [%[a], #248]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #248]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+63] += m[63] * mu\n\t"
+ "ldr r7, [%[m], #252]\n\t"
+ "ldr r9, [%[a], #252]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #252]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+64] += m[64] * mu\n\t"
+ "ldr r7, [%[m], #256]\n\t"
+ "ldr r9, [%[a], #256]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #256]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+65] += m[65] * mu\n\t"
+ "ldr r7, [%[m], #260]\n\t"
+ "ldr r9, [%[a], #260]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #260]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+66] += m[66] * mu\n\t"
+ "ldr r7, [%[m], #264]\n\t"
+ "ldr r9, [%[a], #264]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #264]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+67] += m[67] * mu\n\t"
+ "ldr r7, [%[m], #268]\n\t"
+ "ldr r9, [%[a], #268]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #268]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+68] += m[68] * mu\n\t"
+ "ldr r7, [%[m], #272]\n\t"
+ "ldr r9, [%[a], #272]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #272]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+69] += m[69] * mu\n\t"
+ "ldr r7, [%[m], #276]\n\t"
+ "ldr r9, [%[a], #276]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #276]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+70] += m[70] * mu\n\t"
+ "ldr r7, [%[m], #280]\n\t"
+ "ldr r9, [%[a], #280]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #280]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+71] += m[71] * mu\n\t"
+ "ldr r7, [%[m], #284]\n\t"
+ "ldr r9, [%[a], #284]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #284]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+72] += m[72] * mu\n\t"
+ "ldr r7, [%[m], #288]\n\t"
+ "ldr r9, [%[a], #288]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #288]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+73] += m[73] * mu\n\t"
+ "ldr r7, [%[m], #292]\n\t"
+ "ldr r9, [%[a], #292]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #292]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+74] += m[74] * mu\n\t"
+ "ldr r7, [%[m], #296]\n\t"
+ "ldr r9, [%[a], #296]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #296]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+75] += m[75] * mu\n\t"
+ "ldr r7, [%[m], #300]\n\t"
+ "ldr r9, [%[a], #300]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #300]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+76] += m[76] * mu\n\t"
+ "ldr r7, [%[m], #304]\n\t"
+ "ldr r9, [%[a], #304]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #304]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+77] += m[77] * mu\n\t"
+ "ldr r7, [%[m], #308]\n\t"
+ "ldr r9, [%[a], #308]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #308]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+78] += m[78] * mu\n\t"
+ "ldr r7, [%[m], #312]\n\t"
+ "ldr r9, [%[a], #312]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #312]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+79] += m[79] * mu\n\t"
+ "ldr r7, [%[m], #316]\n\t"
+ "ldr r9, [%[a], #316]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #316]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+80] += m[80] * mu\n\t"
+ "ldr r7, [%[m], #320]\n\t"
+ "ldr r9, [%[a], #320]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #320]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+81] += m[81] * mu\n\t"
+ "ldr r7, [%[m], #324]\n\t"
+ "ldr r9, [%[a], #324]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #324]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+82] += m[82] * mu\n\t"
+ "ldr r7, [%[m], #328]\n\t"
+ "ldr r9, [%[a], #328]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #328]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+83] += m[83] * mu\n\t"
+ "ldr r7, [%[m], #332]\n\t"
+ "ldr r9, [%[a], #332]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #332]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+84] += m[84] * mu\n\t"
+ "ldr r7, [%[m], #336]\n\t"
+ "ldr r9, [%[a], #336]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #336]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+85] += m[85] * mu\n\t"
+ "ldr r7, [%[m], #340]\n\t"
+ "ldr r9, [%[a], #340]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #340]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+86] += m[86] * mu\n\t"
+ "ldr r7, [%[m], #344]\n\t"
+ "ldr r9, [%[a], #344]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #344]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+87] += m[87] * mu\n\t"
+ "ldr r7, [%[m], #348]\n\t"
+ "ldr r9, [%[a], #348]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #348]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+88] += m[88] * mu\n\t"
+ "ldr r7, [%[m], #352]\n\t"
+ "ldr r9, [%[a], #352]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #352]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+89] += m[89] * mu\n\t"
+ "ldr r7, [%[m], #356]\n\t"
+ "ldr r9, [%[a], #356]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #356]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+90] += m[90] * mu\n\t"
+ "ldr r7, [%[m], #360]\n\t"
+ "ldr r9, [%[a], #360]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #360]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+91] += m[91] * mu\n\t"
+ "ldr r7, [%[m], #364]\n\t"
+ "ldr r9, [%[a], #364]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #364]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+92] += m[92] * mu\n\t"
+ "ldr r7, [%[m], #368]\n\t"
+ "ldr r9, [%[a], #368]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #368]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+93] += m[93] * mu\n\t"
+ "ldr r7, [%[m], #372]\n\t"
+ "ldr r9, [%[a], #372]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #372]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+94] += m[94] * mu\n\t"
+ "ldr r7, [%[m], #376]\n\t"
+ "ldr r9, [%[a], #376]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #376]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+95] += m[95] * mu\n\t"
+ "ldr r7, [%[m], #380]\n\t"
+ "ldr r9, [%[a], #380]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #380]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+96] += m[96] * mu\n\t"
+ "ldr r7, [%[m], #384]\n\t"
+ "ldr r9, [%[a], #384]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #384]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+97] += m[97] * mu\n\t"
+ "ldr r7, [%[m], #388]\n\t"
+ "ldr r9, [%[a], #388]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #388]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+98] += m[98] * mu\n\t"
+ "ldr r7, [%[m], #392]\n\t"
+ "ldr r9, [%[a], #392]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #392]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+99] += m[99] * mu\n\t"
+ "ldr r7, [%[m], #396]\n\t"
+ "ldr r9, [%[a], #396]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #396]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+100] += m[100] * mu\n\t"
+ "ldr r7, [%[m], #400]\n\t"
+ "ldr r9, [%[a], #400]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #400]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+101] += m[101] * mu\n\t"
+ "ldr r7, [%[m], #404]\n\t"
+ "ldr r9, [%[a], #404]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #404]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+102] += m[102] * mu\n\t"
+ "ldr r7, [%[m], #408]\n\t"
+ "ldr r9, [%[a], #408]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #408]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+103] += m[103] * mu\n\t"
+ "ldr r7, [%[m], #412]\n\t"
+ "ldr r9, [%[a], #412]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #412]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+104] += m[104] * mu\n\t"
+ "ldr r7, [%[m], #416]\n\t"
+ "ldr r9, [%[a], #416]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #416]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+105] += m[105] * mu\n\t"
+ "ldr r7, [%[m], #420]\n\t"
+ "ldr r9, [%[a], #420]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #420]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+106] += m[106] * mu\n\t"
+ "ldr r7, [%[m], #424]\n\t"
+ "ldr r9, [%[a], #424]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #424]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+107] += m[107] * mu\n\t"
+ "ldr r7, [%[m], #428]\n\t"
+ "ldr r9, [%[a], #428]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #428]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+108] += m[108] * mu\n\t"
+ "ldr r7, [%[m], #432]\n\t"
+ "ldr r9, [%[a], #432]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #432]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+109] += m[109] * mu\n\t"
+ "ldr r7, [%[m], #436]\n\t"
+ "ldr r9, [%[a], #436]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #436]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+110] += m[110] * mu\n\t"
+ "ldr r7, [%[m], #440]\n\t"
+ "ldr r9, [%[a], #440]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #440]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+111] += m[111] * mu\n\t"
+ "ldr r7, [%[m], #444]\n\t"
+ "ldr r9, [%[a], #444]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #444]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+112] += m[112] * mu\n\t"
+ "ldr r7, [%[m], #448]\n\t"
+ "ldr r9, [%[a], #448]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #448]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+113] += m[113] * mu\n\t"
+ "ldr r7, [%[m], #452]\n\t"
+ "ldr r9, [%[a], #452]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #452]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+114] += m[114] * mu\n\t"
+ "ldr r7, [%[m], #456]\n\t"
+ "ldr r9, [%[a], #456]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #456]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+115] += m[115] * mu\n\t"
+ "ldr r7, [%[m], #460]\n\t"
+ "ldr r9, [%[a], #460]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #460]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+116] += m[116] * mu\n\t"
+ "ldr r7, [%[m], #464]\n\t"
+ "ldr r9, [%[a], #464]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #464]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+117] += m[117] * mu\n\t"
+ "ldr r7, [%[m], #468]\n\t"
+ "ldr r9, [%[a], #468]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #468]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+118] += m[118] * mu\n\t"
+ "ldr r7, [%[m], #472]\n\t"
+ "ldr r9, [%[a], #472]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #472]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+119] += m[119] * mu\n\t"
+ "ldr r7, [%[m], #476]\n\t"
+ "ldr r9, [%[a], #476]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #476]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+120] += m[120] * mu\n\t"
+ "ldr r7, [%[m], #480]\n\t"
+ "ldr r9, [%[a], #480]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #480]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+121] += m[121] * mu\n\t"
+ "ldr r7, [%[m], #484]\n\t"
+ "ldr r9, [%[a], #484]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #484]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+122] += m[122] * mu\n\t"
+ "ldr r7, [%[m], #488]\n\t"
+ "ldr r9, [%[a], #488]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #488]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+123] += m[123] * mu\n\t"
+ "ldr r7, [%[m], #492]\n\t"
+ "ldr r9, [%[a], #492]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #492]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+124] += m[124] * mu\n\t"
+ "ldr r7, [%[m], #496]\n\t"
+ "ldr r9, [%[a], #496]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #496]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+125] += m[125] * mu\n\t"
+ "ldr r7, [%[m], #500]\n\t"
+ "ldr r9, [%[a], #500]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #500]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+126] += m[126] * mu\n\t"
+ "ldr r7, [%[m], #504]\n\t"
+ "ldr r9, [%[a], #504]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #504]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+127] += m[127] * mu\n\t"
+ "ldr r7, [%[m], #508]\n\t"
+ "ldr r9, [%[a], #508]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r7, r7, %[ca]\n\t"
+ "mov %[ca], #0\n\t"
+ "adc %[ca], %[ca], %[ca]\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #508]\n\t"
+ "ldr r9, [%[a], #512]\n\t"
+ "adcs r9, r9, r7\n\t"
+ "str r9, [%[a], #512]\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ "# i += 1\n\t"
+ "add %[a], %[a], #4\n\t"
+ "add r12, r12, #4\n\t"
+ "cmp r12, #512\n\t"
+ "blt 1b\n\t"
+ "str r10, [%[a], #0]\n\t"
+ "str r14, [%[a], #4]\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_4096_mont_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_4096_mul_128(r, a, b);
+ sp_4096_mont_reduce_128(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_4096_sqr_128(r, a);
+ sp_4096_mont_reduce_128(r, m, mp);
+}
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r5, %[div], #1\n\t"
+ "add r5, r5, #1\n\t"
+ "mov r6, %[d0]\n\t"
+ "mov r7, %[d1]\n\t"
+ "# Do top 32\n\t"
+ "subs r8, r5, r7\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "and r8, r8, r5\n\t"
+ "subs r7, r7, r8\n\t"
+ "# Next 30 bits\n\t"
+ "mov r4, #29\n\t"
+ "1:\n\t"
+ "movs r6, r6, lsl #1\n\t"
+ "adc r7, r7, r7\n\t"
+ "subs r8, r5, r7\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "and r8, r8, r5\n\t"
+ "subs r7, r7, r8\n\t"
+ "subs r4, r4, #1\n\t"
+ "bpl 1b\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "add %[r], %[r], #1\n\t"
+ "umull r4, r5, %[r], %[div]\n\t"
+ "subs r4, %[d0], r4\n\t"
+ "sbc r5, %[d1], r5\n\t"
+ "add %[r], %[r], r5\n\t"
+ "umull r4, r5, %[r], %[div]\n\t"
+ "subs r4, %[d0], r4\n\t"
+ "sbc r5, %[d1], r5\n\t"
+ "add %[r], %[r], r5\n\t"
+ "subs r8, %[div], r4\n\t"
+ "sbc r8, r8, r8\n\t"
+ "sub %[r], %[r], r8\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r6", "r7", "r8"
+ );
+ return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<128; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 128; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static int32_t sp_4096_cmp_128(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = -1;
+ sp_digit one = 1;
+
+
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r7, #0\n\t"
+ "mov r3, #-1\n\t"
+ "mov r6, #508\n\t"
+ "1:\n\t"
+ "ldr r4, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "subs r6, r6, #4\n\t"
+ "bcs 1b\n\t"
+ "eor %[r], %[r], r3\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [one] "r" (one)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov r7, #0\n\t"
+ "mov r3, #-1\n\t"
+ "ldr r4, [%[a], #508]\n\t"
+ "ldr r5, [%[b], #508]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #504]\n\t"
+ "ldr r5, [%[b], #504]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #500]\n\t"
+ "ldr r5, [%[b], #500]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #496]\n\t"
+ "ldr r5, [%[b], #496]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #492]\n\t"
+ "ldr r5, [%[b], #492]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #488]\n\t"
+ "ldr r5, [%[b], #488]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #484]\n\t"
+ "ldr r5, [%[b], #484]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #480]\n\t"
+ "ldr r5, [%[b], #480]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #476]\n\t"
+ "ldr r5, [%[b], #476]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #472]\n\t"
+ "ldr r5, [%[b], #472]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #468]\n\t"
+ "ldr r5, [%[b], #468]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #464]\n\t"
+ "ldr r5, [%[b], #464]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #460]\n\t"
+ "ldr r5, [%[b], #460]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #456]\n\t"
+ "ldr r5, [%[b], #456]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #452]\n\t"
+ "ldr r5, [%[b], #452]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #448]\n\t"
+ "ldr r5, [%[b], #448]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #444]\n\t"
+ "ldr r5, [%[b], #444]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #440]\n\t"
+ "ldr r5, [%[b], #440]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #436]\n\t"
+ "ldr r5, [%[b], #436]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #432]\n\t"
+ "ldr r5, [%[b], #432]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #428]\n\t"
+ "ldr r5, [%[b], #428]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #424]\n\t"
+ "ldr r5, [%[b], #424]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #420]\n\t"
+ "ldr r5, [%[b], #420]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #416]\n\t"
+ "ldr r5, [%[b], #416]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #412]\n\t"
+ "ldr r5, [%[b], #412]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #408]\n\t"
+ "ldr r5, [%[b], #408]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #404]\n\t"
+ "ldr r5, [%[b], #404]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #400]\n\t"
+ "ldr r5, [%[b], #400]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #396]\n\t"
+ "ldr r5, [%[b], #396]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #392]\n\t"
+ "ldr r5, [%[b], #392]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #388]\n\t"
+ "ldr r5, [%[b], #388]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #384]\n\t"
+ "ldr r5, [%[b], #384]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #380]\n\t"
+ "ldr r5, [%[b], #380]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #376]\n\t"
+ "ldr r5, [%[b], #376]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #372]\n\t"
+ "ldr r5, [%[b], #372]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #368]\n\t"
+ "ldr r5, [%[b], #368]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #364]\n\t"
+ "ldr r5, [%[b], #364]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #360]\n\t"
+ "ldr r5, [%[b], #360]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #356]\n\t"
+ "ldr r5, [%[b], #356]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #352]\n\t"
+ "ldr r5, [%[b], #352]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #348]\n\t"
+ "ldr r5, [%[b], #348]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #344]\n\t"
+ "ldr r5, [%[b], #344]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #340]\n\t"
+ "ldr r5, [%[b], #340]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #336]\n\t"
+ "ldr r5, [%[b], #336]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #332]\n\t"
+ "ldr r5, [%[b], #332]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #328]\n\t"
+ "ldr r5, [%[b], #328]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #324]\n\t"
+ "ldr r5, [%[b], #324]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #320]\n\t"
+ "ldr r5, [%[b], #320]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #316]\n\t"
+ "ldr r5, [%[b], #316]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #312]\n\t"
+ "ldr r5, [%[b], #312]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #308]\n\t"
+ "ldr r5, [%[b], #308]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #304]\n\t"
+ "ldr r5, [%[b], #304]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #300]\n\t"
+ "ldr r5, [%[b], #300]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #296]\n\t"
+ "ldr r5, [%[b], #296]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #292]\n\t"
+ "ldr r5, [%[b], #292]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #288]\n\t"
+ "ldr r5, [%[b], #288]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #284]\n\t"
+ "ldr r5, [%[b], #284]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #280]\n\t"
+ "ldr r5, [%[b], #280]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #276]\n\t"
+ "ldr r5, [%[b], #276]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #272]\n\t"
+ "ldr r5, [%[b], #272]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #268]\n\t"
+ "ldr r5, [%[b], #268]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #264]\n\t"
+ "ldr r5, [%[b], #264]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #260]\n\t"
+ "ldr r5, [%[b], #260]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #256]\n\t"
+ "ldr r5, [%[b], #256]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #252]\n\t"
+ "ldr r5, [%[b], #252]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #248]\n\t"
+ "ldr r5, [%[b], #248]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #244]\n\t"
+ "ldr r5, [%[b], #244]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #240]\n\t"
+ "ldr r5, [%[b], #240]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #236]\n\t"
+ "ldr r5, [%[b], #236]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #232]\n\t"
+ "ldr r5, [%[b], #232]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #228]\n\t"
+ "ldr r5, [%[b], #228]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #224]\n\t"
+ "ldr r5, [%[b], #224]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #220]\n\t"
+ "ldr r5, [%[b], #220]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #216]\n\t"
+ "ldr r5, [%[b], #216]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #212]\n\t"
+ "ldr r5, [%[b], #212]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #208]\n\t"
+ "ldr r5, [%[b], #208]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #204]\n\t"
+ "ldr r5, [%[b], #204]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #200]\n\t"
+ "ldr r5, [%[b], #200]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #196]\n\t"
+ "ldr r5, [%[b], #196]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #192]\n\t"
+ "ldr r5, [%[b], #192]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #188]\n\t"
+ "ldr r5, [%[b], #188]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #184]\n\t"
+ "ldr r5, [%[b], #184]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #180]\n\t"
+ "ldr r5, [%[b], #180]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #176]\n\t"
+ "ldr r5, [%[b], #176]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #172]\n\t"
+ "ldr r5, [%[b], #172]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #168]\n\t"
+ "ldr r5, [%[b], #168]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #164]\n\t"
+ "ldr r5, [%[b], #164]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #160]\n\t"
+ "ldr r5, [%[b], #160]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #156]\n\t"
+ "ldr r5, [%[b], #156]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #152]\n\t"
+ "ldr r5, [%[b], #152]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #148]\n\t"
+ "ldr r5, [%[b], #148]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #144]\n\t"
+ "ldr r5, [%[b], #144]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #140]\n\t"
+ "ldr r5, [%[b], #140]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #136]\n\t"
+ "ldr r5, [%[b], #136]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #132]\n\t"
+ "ldr r5, [%[b], #132]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #128]\n\t"
+ "ldr r5, [%[b], #128]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #124]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #116]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #108]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #100]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #92]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #84]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #76]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #68]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "eor %[r], %[r], r3\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [one] "r" (one)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+#endif
+
+ return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[256], t2[129];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+
+ div = d[127];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
+ for (i=127; i>=0; i--) {
+ r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div);
+
+ sp_4096_mul_d_128(t2, d, r1);
+ t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
+ t1[128 + i] -= t2[128];
+ sp_4096_mask_128(t2, d, t1[128 + i]);
+ t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2);
+ sp_4096_mask_128(t2, d, t1[128 + i]);
+ t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_4096_cmp_128(t1, d) >= 0;
+ sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_4096_div_128(a, m, NULL, r);
+}
+
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[256], t2[129];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+
+ div = d[127];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
+ for (i=127; i>=0; i--) {
+ r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div);
+
+ sp_4096_mul_d_128(t2, d, r1);
+ t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
+ t1[128 + i] -= t2[128];
+ if (t1[128 + i] != 0) {
+ t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d);
+ if (t1[128 + i] != 0)
+ t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d);
+ }
+ }
+
+ r1 = sp_4096_cmp_128(t1, d) >= 0;
+ sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_mod_128_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_4096_div_128_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+ defined(WOLFSSL_HAVE_SP_DH)
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[16][256];
+#else
+ sp_digit* t[16];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 256, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<16; i++) {
+ t[i] = td + i * 256;
+ }
+#endif
+ norm = t[0];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_128(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 128U);
+ if (reduceA != 0) {
+ err = sp_4096_mod_128(t[1] + 128, a, m);
+ if (err == MP_OKAY) {
+ err = sp_4096_mod_128(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128);
+ err = sp_4096_mod_128(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp);
+ sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp);
+ sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp);
+ sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp);
+ sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_4096_mont_sqr_128(t[10], t[ 5], m, mp);
+ sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp);
+ sp_4096_mont_sqr_128(t[12], t[ 6], m, mp);
+ sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp);
+ sp_4096_mont_sqr_128(t[14], t[ 7], m, mp);
+ sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 4;
+ if (c == 32) {
+ c = 28;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 128);
+ for (; i>=0 || c>=4; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 28;
+ n <<= 4;
+ c = 28;
+ }
+ else if (c < 4) {
+ y = n >> 28;
+ n = e[i--];
+ c = 4 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+ }
+
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+
+ sp_4096_mont_mul_128(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
+ sp_4096_mont_reduce_128(r, m, mp);
+
+ mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
+ sp_4096_cond_sub_128(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][256];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 256, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++) {
+ t[i] = td + i * 256;
+ }
+#endif
+ norm = t[0];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_128(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 128U);
+ if (reduceA != 0) {
+ err = sp_4096_mod_128(t[1] + 128, a, m);
+ if (err == MP_OKAY) {
+ err = sp_4096_mod_128(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128);
+ err = sp_4096_mod_128(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp);
+ sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp);
+ sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp);
+ sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp);
+ sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_4096_mont_sqr_128(t[10], t[ 5], m, mp);
+ sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp);
+ sp_4096_mont_sqr_128(t[12], t[ 6], m, mp);
+ sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp);
+ sp_4096_mont_sqr_128(t[14], t[ 7], m, mp);
+ sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp);
+ sp_4096_mont_sqr_128(t[16], t[ 8], m, mp);
+ sp_4096_mont_mul_128(t[17], t[ 9], t[ 8], m, mp);
+ sp_4096_mont_sqr_128(t[18], t[ 9], m, mp);
+ sp_4096_mont_mul_128(t[19], t[10], t[ 9], m, mp);
+ sp_4096_mont_sqr_128(t[20], t[10], m, mp);
+ sp_4096_mont_mul_128(t[21], t[11], t[10], m, mp);
+ sp_4096_mont_sqr_128(t[22], t[11], m, mp);
+ sp_4096_mont_mul_128(t[23], t[12], t[11], m, mp);
+ sp_4096_mont_sqr_128(t[24], t[12], m, mp);
+ sp_4096_mont_mul_128(t[25], t[13], t[12], m, mp);
+ sp_4096_mont_sqr_128(t[26], t[13], m, mp);
+ sp_4096_mont_mul_128(t[27], t[14], t[13], m, mp);
+ sp_4096_mont_sqr_128(t[28], t[14], m, mp);
+ sp_4096_mont_mul_128(t[29], t[15], t[14], m, mp);
+ sp_4096_mont_sqr_128(t[30], t[15], m, mp);
+ sp_4096_mont_mul_128(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 128);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+
+ sp_4096_mont_mul_128(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
+ sp_4096_mont_reduce_128(r, m, mp);
+
+ mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
+ sp_4096_cond_sub_128(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[256], m[128], r[256];
+#else
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+#endif
+ sp_digit *ah;
+ sp_digit e[1];
+ int err = MP_OKAY;
+
+ if (*outLen < 512)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 512 ||
+ mp_count_bits(mm) != 4096))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 128 * 2;
+ m = r + 128 * 2;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ ah = a + 128;
+
+ sp_4096_from_bin(ah, 128, in, inLen);
+#if DIGIT_BIT >= 32
+ e[0] = em->dp[0];
+#else
+ e[0] = em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(m, 128, mm);
+
+ if (e[0] == 0x3) {
+ if (err == MP_OKAY) {
+ sp_4096_sqr_128(r, ah);
+ err = sp_4096_mod_128_cond(r, r, m);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_mul_128(r, ah, r);
+ err = sp_4096_mod_128_cond(r, r, m);
+ }
+ }
+ else {
+ int i;
+ sp_digit mp;
+
+ sp_4096_mont_setup(m, &mp);
+
+ /* Convert to Montgomery form. */
+ XMEMSET(a, 0, sizeof(sp_digit) * 128);
+ err = sp_4096_mod_128_cond(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i = 31; i >= 0; i--) {
+ if (e[0] >> i) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 128);
+ for (i--; i>=0; i--) {
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ if (((e[0] >> i) & 1) == 1) {
+ sp_4096_mont_mul_128(r, r, a, m, mp);
+ }
+ }
+ XMEMSET(&r[128], 0, sizeof(sp_digit) * 128);
+ sp_4096_mont_reduce_128(r, m, mp);
+
+ for (i = 127; i > 0; i--) {
+ if (r[i] != m[i]) {
+ break;
+ }
+ }
+ if (r[i] >= m[i]) {
+ sp_4096_sub_in_place_128(r, m);
+ }
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#endif
+
+ return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+ sp_digit* a;
+ sp_digit* d = NULL;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 512U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 4096) {
+ err = MP_READ_E;
+ }
+ if (inLen > 512) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = d + 128;
+ m = a + 256;
+ r = a;
+
+ sp_4096_from_bin(a, 128, in, inLen);
+ sp_4096_from_mp(d, 128, dm);
+ sp_4096_from_mp(m, 128, mm);
+ err = sp_4096_mod_exp_128(r, a, d, 4096, m, 0);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 128);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ sp_digit c = 0;
+
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r9, #0\n\t"
+ "mov r8, #0\n\t"
+ "1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldr r4, [%[a], r8]\n\t"
+ "ldr r5, [%[b], r8]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adc %[c], r9, r9\n\t"
+ "str r4, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, #256\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#else
+ __asm__ __volatile__ (
+
+ "mov r9, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r6, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r6, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r6, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r7, [%[b], #36]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r6, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r6, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r7, [%[b], #44]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "str r6, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r6, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r7, [%[b], #52]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "str r6, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r6, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r7, [%[b], #60]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "str r6, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r6, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "ldr r7, [%[b], #68]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "str r6, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r6, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "ldr r7, [%[b], #76]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "str r6, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r6, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "ldr r7, [%[b], #84]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "str r6, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r6, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "ldr r7, [%[b], #92]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "str r6, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r6, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "ldr r7, [%[b], #100]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "str r6, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r6, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "ldr r7, [%[b], #108]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "str r6, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r6, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "ldr r7, [%[b], #116]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "str r6, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r6, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "ldr r7, [%[b], #124]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "str r6, [%[r], #124]\n\t"
+ "ldr r4, [%[a], #128]\n\t"
+ "ldr r6, [%[a], #132]\n\t"
+ "ldr r5, [%[b], #128]\n\t"
+ "ldr r7, [%[b], #132]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #128]\n\t"
+ "str r6, [%[r], #132]\n\t"
+ "ldr r4, [%[a], #136]\n\t"
+ "ldr r6, [%[a], #140]\n\t"
+ "ldr r5, [%[b], #136]\n\t"
+ "ldr r7, [%[b], #140]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #136]\n\t"
+ "str r6, [%[r], #140]\n\t"
+ "ldr r4, [%[a], #144]\n\t"
+ "ldr r6, [%[a], #148]\n\t"
+ "ldr r5, [%[b], #144]\n\t"
+ "ldr r7, [%[b], #148]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #144]\n\t"
+ "str r6, [%[r], #148]\n\t"
+ "ldr r4, [%[a], #152]\n\t"
+ "ldr r6, [%[a], #156]\n\t"
+ "ldr r5, [%[b], #152]\n\t"
+ "ldr r7, [%[b], #156]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #152]\n\t"
+ "str r6, [%[r], #156]\n\t"
+ "ldr r4, [%[a], #160]\n\t"
+ "ldr r6, [%[a], #164]\n\t"
+ "ldr r5, [%[b], #160]\n\t"
+ "ldr r7, [%[b], #164]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #160]\n\t"
+ "str r6, [%[r], #164]\n\t"
+ "ldr r4, [%[a], #168]\n\t"
+ "ldr r6, [%[a], #172]\n\t"
+ "ldr r5, [%[b], #168]\n\t"
+ "ldr r7, [%[b], #172]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #168]\n\t"
+ "str r6, [%[r], #172]\n\t"
+ "ldr r4, [%[a], #176]\n\t"
+ "ldr r6, [%[a], #180]\n\t"
+ "ldr r5, [%[b], #176]\n\t"
+ "ldr r7, [%[b], #180]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #176]\n\t"
+ "str r6, [%[r], #180]\n\t"
+ "ldr r4, [%[a], #184]\n\t"
+ "ldr r6, [%[a], #188]\n\t"
+ "ldr r5, [%[b], #184]\n\t"
+ "ldr r7, [%[b], #188]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #184]\n\t"
+ "str r6, [%[r], #188]\n\t"
+ "ldr r4, [%[a], #192]\n\t"
+ "ldr r6, [%[a], #196]\n\t"
+ "ldr r5, [%[b], #192]\n\t"
+ "ldr r7, [%[b], #196]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #192]\n\t"
+ "str r6, [%[r], #196]\n\t"
+ "ldr r4, [%[a], #200]\n\t"
+ "ldr r6, [%[a], #204]\n\t"
+ "ldr r5, [%[b], #200]\n\t"
+ "ldr r7, [%[b], #204]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #200]\n\t"
+ "str r6, [%[r], #204]\n\t"
+ "ldr r4, [%[a], #208]\n\t"
+ "ldr r6, [%[a], #212]\n\t"
+ "ldr r5, [%[b], #208]\n\t"
+ "ldr r7, [%[b], #212]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #208]\n\t"
+ "str r6, [%[r], #212]\n\t"
+ "ldr r4, [%[a], #216]\n\t"
+ "ldr r6, [%[a], #220]\n\t"
+ "ldr r5, [%[b], #216]\n\t"
+ "ldr r7, [%[b], #220]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #216]\n\t"
+ "str r6, [%[r], #220]\n\t"
+ "ldr r4, [%[a], #224]\n\t"
+ "ldr r6, [%[a], #228]\n\t"
+ "ldr r5, [%[b], #224]\n\t"
+ "ldr r7, [%[b], #228]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #224]\n\t"
+ "str r6, [%[r], #228]\n\t"
+ "ldr r4, [%[a], #232]\n\t"
+ "ldr r6, [%[a], #236]\n\t"
+ "ldr r5, [%[b], #232]\n\t"
+ "ldr r7, [%[b], #236]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #232]\n\t"
+ "str r6, [%[r], #236]\n\t"
+ "ldr r4, [%[a], #240]\n\t"
+ "ldr r6, [%[a], #244]\n\t"
+ "ldr r5, [%[b], #240]\n\t"
+ "ldr r7, [%[b], #244]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #240]\n\t"
+ "str r6, [%[r], #244]\n\t"
+ "ldr r4, [%[a], #248]\n\t"
+ "ldr r6, [%[a], #252]\n\t"
+ "ldr r5, [%[b], #248]\n\t"
+ "ldr r7, [%[b], #252]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #248]\n\t"
+ "str r6, [%[r], #252]\n\t"
+ "adc %[c], r9, r9\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#endif /* WOLFSSL_SP_SMALL */
+
+ return c;
+}
+
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[128 * 2];
+ sp_digit p[64], q[64], dp[64];
+ sp_digit tmpa[128], tmpb[128];
+#else
+ sp_digit* t = NULL;
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+#endif
+ sp_digit* r;
+ sp_digit* qi;
+ sp_digit* dq;
+ sp_digit c;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 512)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (inLen > 512 || mp_count_bits(mm) != 4096))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL)
+ err = MEMORY_E;
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 128 * 2;
+ q = p + 64;
+ qi = dq = dp = q + 64;
+ tmpa = qi + 64;
+ tmpb = tmpa + 128;
+
+ r = t + 128;
+ }
+#else
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ r = a;
+ qi = dq = dp;
+#endif
+ sp_4096_from_bin(a, 128, in, inLen);
+ sp_4096_from_mp(p, 64, pm);
+ sp_4096_from_mp(q, 64, qm);
+ sp_4096_from_mp(dp, 64, dpm);
+
+ err = sp_2048_mod_exp_64(tmpa, a, dp, 2048, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(dq, 64, dqm);
+ err = sp_2048_mod_exp_64(tmpb, a, dq, 2048, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ c = sp_2048_sub_in_place_64(tmpa, tmpb);
+ c += sp_4096_cond_add_64(tmpa, tmpa, p, c);
+ sp_4096_cond_add_64(tmpa, tmpa, p, c);
+
+ sp_2048_from_mp(qi, 64, qim);
+ sp_2048_mul_64(tmpa, tmpa, qi);
+ err = sp_2048_mod_64(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mul_64(tmpa, q, tmpa);
+ XMEMSET(&tmpb[64], 0, sizeof(sp_digit) * 64);
+ sp_4096_add_128(r, tmpb, tmpa);
+
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 64 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+#else
+ XMEMSET(tmpa, 0, sizeof(tmpa));
+ XMEMSET(tmpb, 0, sizeof(tmpb));
+ XMEMSET(p, 0, sizeof(p));
+ XMEMSET(q, 0, sizeof(q));
+ XMEMSET(dp, 0, sizeof(dp));
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_4096_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 128);
+ r->used = 128;
+ mp_clamp(r);
+#elif DIGIT_BIT < 32
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 128; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 32) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 32 - s;
+ }
+ r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 128; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 32 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 32 - s;
+ }
+ else {
+ s += 32;
+ }
+ }
+ r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[256], e[128], m[128];
+ sp_digit* r = b;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 4096) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(b, 128, base);
+ sp_4096_from_mp(e, 128, exp);
+ sp_4096_from_mp(m, 128, mod);
+
+ err = sp_4096_mod_exp_128(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_4096_to_mp(r, res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_4096
+static void sp_4096_lshift_128(sp_digit* r, sp_digit* a, byte n)
+{
+ __asm__ __volatile__ (
+ "mov r6, #31\n\t"
+ "sub r6, r6, %[n]\n\t"
+ "ldr r3, [%[a], #508]\n\t"
+ "lsr r4, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r4, r4, r6\n\t"
+ "ldr r2, [%[a], #504]\n\t"
+ "str r4, [%[r], #512]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #500]\n\t"
+ "str r3, [%[r], #508]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #496]\n\t"
+ "str r2, [%[r], #504]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #492]\n\t"
+ "str r4, [%[r], #500]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #488]\n\t"
+ "str r3, [%[r], #496]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #484]\n\t"
+ "str r2, [%[r], #492]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #480]\n\t"
+ "str r4, [%[r], #488]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #476]\n\t"
+ "str r3, [%[r], #484]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #472]\n\t"
+ "str r2, [%[r], #480]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #468]\n\t"
+ "str r4, [%[r], #476]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #464]\n\t"
+ "str r3, [%[r], #472]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #460]\n\t"
+ "str r2, [%[r], #468]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #456]\n\t"
+ "str r4, [%[r], #464]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #452]\n\t"
+ "str r3, [%[r], #460]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #448]\n\t"
+ "str r2, [%[r], #456]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #444]\n\t"
+ "str r4, [%[r], #452]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #440]\n\t"
+ "str r3, [%[r], #448]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #436]\n\t"
+ "str r2, [%[r], #444]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #432]\n\t"
+ "str r4, [%[r], #440]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #428]\n\t"
+ "str r3, [%[r], #436]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #424]\n\t"
+ "str r2, [%[r], #432]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #420]\n\t"
+ "str r4, [%[r], #428]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #416]\n\t"
+ "str r3, [%[r], #424]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #412]\n\t"
+ "str r2, [%[r], #420]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #408]\n\t"
+ "str r4, [%[r], #416]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #404]\n\t"
+ "str r3, [%[r], #412]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #400]\n\t"
+ "str r2, [%[r], #408]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #396]\n\t"
+ "str r4, [%[r], #404]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #392]\n\t"
+ "str r3, [%[r], #400]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #388]\n\t"
+ "str r2, [%[r], #396]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #384]\n\t"
+ "str r4, [%[r], #392]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #380]\n\t"
+ "str r3, [%[r], #388]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #376]\n\t"
+ "str r2, [%[r], #384]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #372]\n\t"
+ "str r4, [%[r], #380]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #368]\n\t"
+ "str r3, [%[r], #376]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #364]\n\t"
+ "str r2, [%[r], #372]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #360]\n\t"
+ "str r4, [%[r], #368]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #356]\n\t"
+ "str r3, [%[r], #364]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #352]\n\t"
+ "str r2, [%[r], #360]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #348]\n\t"
+ "str r4, [%[r], #356]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #344]\n\t"
+ "str r3, [%[r], #352]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #340]\n\t"
+ "str r2, [%[r], #348]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #336]\n\t"
+ "str r4, [%[r], #344]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #332]\n\t"
+ "str r3, [%[r], #340]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #328]\n\t"
+ "str r2, [%[r], #336]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #324]\n\t"
+ "str r4, [%[r], #332]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #320]\n\t"
+ "str r3, [%[r], #328]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #316]\n\t"
+ "str r2, [%[r], #324]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #312]\n\t"
+ "str r4, [%[r], #320]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #308]\n\t"
+ "str r3, [%[r], #316]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #304]\n\t"
+ "str r2, [%[r], #312]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #300]\n\t"
+ "str r4, [%[r], #308]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #296]\n\t"
+ "str r3, [%[r], #304]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #292]\n\t"
+ "str r2, [%[r], #300]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #288]\n\t"
+ "str r4, [%[r], #296]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #284]\n\t"
+ "str r3, [%[r], #292]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #280]\n\t"
+ "str r2, [%[r], #288]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #276]\n\t"
+ "str r4, [%[r], #284]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #272]\n\t"
+ "str r3, [%[r], #280]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #268]\n\t"
+ "str r2, [%[r], #276]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #264]\n\t"
+ "str r4, [%[r], #272]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #260]\n\t"
+ "str r3, [%[r], #268]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #256]\n\t"
+ "str r2, [%[r], #264]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #252]\n\t"
+ "str r4, [%[r], #260]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #248]\n\t"
+ "str r3, [%[r], #256]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #244]\n\t"
+ "str r2, [%[r], #252]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #240]\n\t"
+ "str r4, [%[r], #248]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #236]\n\t"
+ "str r3, [%[r], #244]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #232]\n\t"
+ "str r2, [%[r], #240]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #228]\n\t"
+ "str r4, [%[r], #236]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #224]\n\t"
+ "str r3, [%[r], #232]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #220]\n\t"
+ "str r2, [%[r], #228]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #216]\n\t"
+ "str r4, [%[r], #224]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #212]\n\t"
+ "str r3, [%[r], #220]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #208]\n\t"
+ "str r2, [%[r], #216]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #204]\n\t"
+ "str r4, [%[r], #212]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #200]\n\t"
+ "str r3, [%[r], #208]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #196]\n\t"
+ "str r2, [%[r], #204]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #192]\n\t"
+ "str r4, [%[r], #200]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #188]\n\t"
+ "str r3, [%[r], #196]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #184]\n\t"
+ "str r2, [%[r], #192]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #180]\n\t"
+ "str r4, [%[r], #188]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #176]\n\t"
+ "str r3, [%[r], #184]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #172]\n\t"
+ "str r2, [%[r], #180]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #168]\n\t"
+ "str r4, [%[r], #176]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #164]\n\t"
+ "str r3, [%[r], #172]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #160]\n\t"
+ "str r2, [%[r], #168]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #156]\n\t"
+ "str r4, [%[r], #164]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #152]\n\t"
+ "str r3, [%[r], #160]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #148]\n\t"
+ "str r2, [%[r], #156]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #144]\n\t"
+ "str r4, [%[r], #152]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #140]\n\t"
+ "str r3, [%[r], #148]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #136]\n\t"
+ "str r2, [%[r], #144]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #132]\n\t"
+ "str r4, [%[r], #140]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #128]\n\t"
+ "str r3, [%[r], #136]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #124]\n\t"
+ "str r2, [%[r], #132]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #120]\n\t"
+ "str r4, [%[r], #128]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "str r3, [%[r], #124]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #112]\n\t"
+ "str r2, [%[r], #120]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #108]\n\t"
+ "str r4, [%[r], #116]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "str r3, [%[r], #112]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #100]\n\t"
+ "str r2, [%[r], #108]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #96]\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "str r3, [%[r], #100]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #88]\n\t"
+ "str r2, [%[r], #96]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #84]\n\t"
+ "str r4, [%[r], #92]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "str r3, [%[r], #88]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #76]\n\t"
+ "str r2, [%[r], #84]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #72]\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "str r3, [%[r], #76]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #64]\n\t"
+ "str r2, [%[r], #72]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #60]\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "str r3, [%[r], #64]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #52]\n\t"
+ "str r2, [%[r], #60]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #48]\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "str r3, [%[r], #52]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "str r2, [%[r], #48]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #36]\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "str r3, [%[r], #40]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #28]\n\t"
+ "str r2, [%[r], #36]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #24]\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "str r3, [%[r], #28]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "str r2, [%[r], #24]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #12]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "str r2, [%[r], #12]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #0]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "str r2, [%[r]]\n\t"
+ "str r3, [%[r], #4]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+ : "memory", "r2", "r3", "r4", "r5", "r6"
+ );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits,
+ const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[256];
+ sp_digit td[129];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 385, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 256;
+#else
+ norm = nd;
+ tmp = td;
+#endif
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_128(norm, m);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ sp_4096_lshift_128(r, norm, y);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+
+ sp_4096_lshift_128(r, r, y);
+ sp_4096_mul_d_128(tmp, norm, r[128]);
+ r[128] = 0;
+ o = sp_4096_add_128(r, r, tmp);
+ sp_4096_cond_sub_128(r, r, m, (sp_digit)0 - o);
+ }
+
+ XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
+ sp_4096_mont_reduce_128(r, m, mp);
+
+ mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
+ sp_4096_cond_sub_128(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* HAVE_FFDHE_4096 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+ int err = MP_OKAY;
+ sp_digit b[256], e[128], m[128];
+ sp_digit* r = b;
+ word32 i;
+
+ if (mp_count_bits(base) > 4096) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 512) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(b, 128, base);
+ sp_4096_from_bin(e, 128, exp, expLen);
+ sp_4096_from_mp(m, 128, mod);
+
+ #ifdef HAVE_FFDHE_4096
+ if (base->used == 1 && base->dp[0] == 2 && m[127] == (sp_digit)-1)
+ err = sp_4096_mod_exp_2_128(r, e, expLen * 8, m);
+ else
+ #endif
+ err = sp_4096_mod_exp_128(r, b, e, expLen * 8, m, 0);
+
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ for (i=0; i<512 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* WOLFSSL_SP_4096 */
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+#ifdef WOLFSSL_HAVE_SP_ECC
+#ifndef WOLFSSL_SP_NO_256
+
+/* Point structure to use. */
+typedef struct sp_point_256 {
+ sp_digit x[2 * 8];
+ sp_digit y[2 * 8];
+ sp_digit z[2 * 8];
+ int infinity;
+} sp_point_256;
+
+/* The modulus (prime) of the curve P256. */
+static const sp_digit p256_mod[8] = {
+ 0xffffffff,0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000,
+ 0x00000001,0xffffffff
+};
+/* The Montogmery normalizer for modulus of the curve P256. */
+static const sp_digit p256_norm_mod[8] = {
+ 0x00000001,0x00000000,0x00000000,0xffffffff,0xffffffff,0xffffffff,
+ 0xfffffffe,0x00000000
+};
+/* The Montogmery multiplier for modulus of the curve P256. */
+static const sp_digit p256_mp_mod = 0x00000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* The order of the curve P256. */
+static const sp_digit p256_order[8] = {
+ 0xfc632551,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
+ 0x00000000,0xffffffff
+};
+#endif
+/* The order of the curve P256 minus 2. */
+static const sp_digit p256_order2[8] = {
+ 0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
+ 0x00000000,0xffffffff
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P256. */
+static const sp_digit p256_norm_order[8] = {
+ 0x039cdaaf,0x0c46353d,0x58e8617b,0x43190552,0x00000000,0x00000000,
+ 0xffffffff,0x00000000
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P256. */
+static const sp_digit p256_mp_order = 0xee00bc4f;
+#endif
+/* The base point of curve P256. */
+static const sp_point_256 p256_base = {
+ /* X ordinate */
+ {
+ 0xd898c296,0xf4a13945,0x2deb33a0,0x77037d81,0x63a440f2,0xf8bce6e5,
+ 0xe12c4247,0x6b17d1f2,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Y ordinate */
+ {
+ 0x37bf51f5,0xcbb64068,0x6b315ece,0x2bce3357,0x7c0f9e16,0x8ee7eb4a,
+ 0xfe1a7f9b,0x4fe342e2,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Z ordinate */
+ {
+ 0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
+ 0x00000000,0x00000000,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* infinity */
+ 0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p256_b[8] = {
+ 0x27d2604b,0x3bce3c3e,0xcc53b0f6,0x651d06b0,0x769886bc,0xb3ebbd55,
+ 0xaa3a93e7,0x5ac635d8
+};
+#endif
+
+static int sp_256_point_new_ex_8(void* heap, sp_point_256* sp, sp_point_256** p)
+{
+ int ret = MP_OKAY;
+ (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ (void)sp;
+ *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC);
+#else
+ *p = sp;
+#endif
+ if (*p == NULL) {
+ ret = MEMORY_E;
+ }
+ return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), &(sp), &(p))
+#endif
+
+
+static void sp_256_point_free_8(sp_point_256* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+ if (p != NULL) {
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+ XFREE(p, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+/* Clear point data if requested. */
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+#endif
+ (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r The resulting Montgomery form number.
+ * a The number to convert.
+ * m The modulus (prime).
+ */
+static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ (void)m;
+
+ __asm__ __volatile__ (
+ "sub sp, sp, #24\n\t"
+ "ldr r2, [%[a], #0]\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[a], #12]\n\t"
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r7, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[a], #28]\n\t"
+ "# Clear overflow and underflow\n\t"
+ "mov r14, #0\n\t"
+ "mov r12, #0\n\t"
+ "# t[0] = 1 1 0 -1 -1 -1 -1 0\n\t"
+ "adds r10, r2, r3\n\t"
+ "adc r14, r14, #0\n\t"
+ "subs r10, r10, r5\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r10, r10, r6\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r10, r10, r7\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r10, r10, r8\n\t"
+ "sbc r12, r12, #0\n\t"
+ "# Store t[0]\n\t"
+ "str r10, [sp, #0]\n\t"
+ "neg r12, r12\n\t"
+ "mov r10, #0\n\t"
+ "# t[1] = 0 1 1 0 -1 -1 -1 -1\n\t"
+ "adds r14, r14, r3\n\t"
+ "adc r10, r10, #0\n\t"
+ "adds r14, r14, r4\n\t"
+ "adc r10, r10, #0\n\t"
+ "subs r14, r14, r12\n\t"
+ "mov r12, #0\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r14, r14, r6\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r14, r14, r7\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r14, r14, r8\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r14, r14, r9\n\t"
+ "sbc r12, r12, #0\n\t"
+ "# Store t[1]\n\t"
+ "str r14, [sp, #4]\n\t"
+ "neg r12, r12\n\t"
+ "mov r14, #0\n\t"
+ "# t[2] = 0 0 1 1 0 -1 -1 -1\n\t"
+ "adds r10, r10, r4\n\t"
+ "adc r14, r14, #0\n\t"
+ "adds r10, r10, r5\n\t"
+ "adc r14, r14, #0\n\t"
+ "subs r10, r10, r12\n\t"
+ "mov r12, #0\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r10, r10, r7\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r10, r10, r8\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r10, r10, r9\n\t"
+ "sbc r12, r12, #0\n\t"
+ "# Store t[2]\n\t"
+ "str r10, [sp, #8]\n\t"
+ "neg r12, r12\n\t"
+ "mov r10, #0\n\t"
+ "# t[3] = -1 -1 0 2 2 1 0 -1\n\t"
+ "adds r14, r14, r5\n\t"
+ "adc r10, r10, #0\n\t"
+ "adds r14, r14, r5\n\t"
+ "adc r10, r10, #0\n\t"
+ "adds r14, r14, r6\n\t"
+ "adc r10, r10, #0\n\t"
+ "adds r14, r14, r6\n\t"
+ "adc r10, r10, #0\n\t"
+ "adds r14, r14, r7\n\t"
+ "adc r10, r10, #0\n\t"
+ "subs r14, r14, r12\n\t"
+ "mov r12, #0\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r14, r14, r2\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r14, r14, r3\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r14, r14, r9\n\t"
+ "sbc r12, r12, #0\n\t"
+ "# Store t[3]\n\t"
+ "str r14, [sp, #12]\n\t"
+ "neg r12, r12\n\t"
+ "mov r14, #0\n\t"
+ "# t[4] = 0 -1 -1 0 2 2 1 0\n\t"
+ "adds r10, r10, r6\n\t"
+ "adc r14, r14, #0\n\t"
+ "adds r10, r10, r6\n\t"
+ "adc r14, r14, #0\n\t"
+ "adds r10, r10, r7\n\t"
+ "adc r14, r14, #0\n\t"
+ "adds r10, r10, r7\n\t"
+ "adc r14, r14, #0\n\t"
+ "adds r10, r10, r8\n\t"
+ "adc r14, r14, #0\n\t"
+ "subs r10, r10, r12\n\t"
+ "mov r12, #0\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r10, r10, r3\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r10, r10, r4\n\t"
+ "sbc r12, r12, #0\n\t"
+ "# Store t[4]\n\t"
+ "str r10, [sp, #16]\n\t"
+ "neg r12, r12\n\t"
+ "mov r10, #0\n\t"
+ "# t[5] = 0 0 -1 -1 0 2 2 1\n\t"
+ "adds r14, r14, r7\n\t"
+ "adc r10, r10, #0\n\t"
+ "adds r14, r14, r7\n\t"
+ "adc r10, r10, #0\n\t"
+ "adds r14, r14, r8\n\t"
+ "adc r10, r10, #0\n\t"
+ "adds r14, r14, r8\n\t"
+ "adc r10, r10, #0\n\t"
+ "adds r14, r14, r9\n\t"
+ "adc r10, r10, #0\n\t"
+ "subs r14, r14, r12\n\t"
+ "mov r12, #0\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r14, r14, r4\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r14, r14, r5\n\t"
+ "sbc r12, r12, #0\n\t"
+ "# Store t[5]\n\t"
+ "str r14, [sp, #20]\n\t"
+ "neg r12, r12\n\t"
+ "mov r14, #0\n\t"
+ "# t[6] = -1 -1 0 0 0 1 3 2\n\t"
+ "adds r10, r10, r7\n\t"
+ "adc r14, r14, #0\n\t"
+ "adds r10, r10, r8\n\t"
+ "adc r14, r14, #0\n\t"
+ "adds r10, r10, r8\n\t"
+ "adc r14, r14, #0\n\t"
+ "adds r10, r10, r8\n\t"
+ "adc r14, r14, #0\n\t"
+ "adds r10, r10, r9\n\t"
+ "adc r14, r14, #0\n\t"
+ "adds r10, r10, r9\n\t"
+ "adc r14, r14, #0\n\t"
+ "subs r10, r10, r12\n\t"
+ "mov r12, #0\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r10, r10, r2\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r10, r10, r3\n\t"
+ "sbc r12, r12, #0\n\t"
+ "# Store t[6]\n\t"
+ "mov r8, r10\n\t"
+ "neg r12, r12\n\t"
+ "mov r10, #0\n\t"
+ "# t[7] = 1 0 -1 -1 -1 -1 0 3\n\t"
+ "adds r14, r14, r2\n\t"
+ "adc r10, r10, #0\n\t"
+ "adds r14, r14, r9\n\t"
+ "adc r10, r10, #0\n\t"
+ "adds r14, r14, r9\n\t"
+ "adc r10, r10, #0\n\t"
+ "adds r14, r14, r9\n\t"
+ "adc r10, r10, #0\n\t"
+ "subs r14, r14, r12\n\t"
+ "mov r12, #0\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r14, r14, r4\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r14, r14, r5\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r14, r14, r6\n\t"
+ "sbc r12, r12, #0\n\t"
+ "subs r14, r14, r7\n\t"
+ "sbc r12, r12, #0\n\t"
+ "# Store t[7]\n\t"
+ "# Load intermediate\n\t"
+ "ldr r2, [sp, #0]\n\t"
+ "ldr r3, [sp, #4]\n\t"
+ "ldr r4, [sp, #8]\n\t"
+ "ldr r5, [sp, #12]\n\t"
+ "ldr r6, [sp, #16]\n\t"
+ "ldr r7, [sp, #20]\n\t"
+ "neg r12, r12\n\t"
+ "# Add overflow\n\t"
+ "# Subtract underflow - add neg underflow\n\t"
+ "adds r2, r2, r10\n\t"
+ "adcs r3, r3, #0\n\t"
+ "adcs r4, r4, #0\n\t"
+ "adds r5, r5, r12\n\t"
+ "adcs r6, r6, #0\n\t"
+ "adcs r7, r7, #0\n\t"
+ "adcs r8, r8, r12\n\t"
+ "adc r14, r14, r10\n\t"
+ "# Subtract overflow\n\t"
+ "# Add underflow - subtract neg underflow\n\t"
+ "subs r2, r2, r12\n\t"
+ "sbcs r3, r3, #0\n\t"
+ "sbcs r4, r4, #0\n\t"
+ "subs r5, r5, r10\n\t"
+ "sbcs r6, r6, #0\n\t"
+ "sbcs r7, r7, #0\n\t"
+ "sbcs r8, r8, r10\n\t"
+ "sbc r14, r14, r12\n\t"
+ "# Store result\n\t"
+ "str r2, [%[r], #0]\n\t"
+ "str r3, [%[r], #4]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r5, [%[r], #12]\n\t"
+ "str r6, [%[r], #16]\n\t"
+ "str r7, [%[r], #20]\n\t"
+ "str r8, [%[r], #24]\n\t"
+ "str r14, [%[r], #28]\n\t"
+ "add sp, sp, #24\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return MP_OKAY;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 32
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 32U) <= (word32)DIGIT_BIT) {
+ s += 32U;
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 32) {
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 32 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_256.
+ *
+ * p Point of type sp_point_256 (result).
+ * pm Point of type ecc_point.
+ */
+static void sp_256_point_from_ecc_point_8(sp_point_256* p, const ecc_point* pm)
+{
+ XMEMSET(p->x, 0, sizeof(p->x));
+ XMEMSET(p->y, 0, sizeof(p->y));
+ XMEMSET(p->z, 0, sizeof(p->z));
+ sp_256_from_mp(p->x, 8, pm->x);
+ sp_256_from_mp(p->y, 8, pm->y);
+ sp_256_from_mp(p->z, 8, pm->z);
+ p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_256_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 8);
+ r->used = 8;
+ mp_clamp(r);
+#elif DIGIT_BIT < 32
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 8; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 32) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 32 - s;
+ }
+ r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 8; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 32 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 32 - s;
+ }
+ else {
+ s += 32;
+ }
+ }
+ r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Convert a point of type sp_point_256 to type ecc_point.
+ *
+ * p Point of type sp_point_256.
+ * pm Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm)
+{
+ int err;
+
+ err = sp_256_to_mp(p->x, pm->x);
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, pm->y);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, pm->z);
+ }
+
+ return err;
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+SP_NOINLINE static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ (void)mp;
+ (void)m;
+
+ __asm__ __volatile__ (
+ "sub sp, sp, #68\n\t"
+ "mov r5, #0\n\t"
+ "# A[0] * B[0]\n\t"
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r7, [%[b], #0]\n\t"
+ "umull r8, r9, r6, r7\n\t"
+ "str r8, [sp, #0]\n\t"
+ "# A[0] * B[1]\n\t"
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adc r10, r4, #0\n\t"
+ "# A[1] * B[0]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r7, [%[b], #0]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adcs r10, r4, r10\n\t"
+ "adc r14, r5, #0\n\t"
+ "str r9, [sp, #4]\n\t"
+ "# A[0] * B[2]\n\t"
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r7, [%[b], #8]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adc r14, r4, r14\n\t"
+ "# A[1] * B[1]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adcs r14, r4, r14\n\t"
+ "adc r8, r5, #0\n\t"
+ "# A[2] * B[0]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[b], #0]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adcs r14, r4, r14\n\t"
+ "adc r8, r5, r8\n\t"
+ "str r10, [sp, #8]\n\t"
+ "# A[0] * B[3]\n\t"
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, #0\n\t"
+ "# A[1] * B[2]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r7, [%[b], #8]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, r9\n\t"
+ "# A[2] * B[1]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, r9\n\t"
+ "# A[3] * B[0]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[b], #0]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, r9\n\t"
+ "str r14, [sp, #12]\n\t"
+ "# A[0] * B[4]\n\t"
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r7, [%[b], #16]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adcs r9, r4, r9\n\t"
+ "adc r10, r5, #0\n\t"
+ "# A[1] * B[3]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adcs r9, r4, r9\n\t"
+ "adc r10, r5, r10\n\t"
+ "# A[2] * B[2]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[b], #8]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adcs r9, r4, r9\n\t"
+ "adc r10, r5, r10\n\t"
+ "# A[3] * B[1]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adcs r9, r4, r9\n\t"
+ "adc r10, r5, r10\n\t"
+ "# A[4] * B[0]\n\t"
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r7, [%[b], #0]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adcs r9, r4, r9\n\t"
+ "adc r10, r5, r10\n\t"
+ "str r8, [sp, #16]\n\t"
+ "# A[0] * B[5]\n\t"
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adcs r10, r4, r10\n\t"
+ "adc r14, r5, #0\n\t"
+ "# A[1] * B[4]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r7, [%[b], #16]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adcs r10, r4, r10\n\t"
+ "adc r14, r5, r14\n\t"
+ "# A[2] * B[3]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adcs r10, r4, r10\n\t"
+ "adc r14, r5, r14\n\t"
+ "# A[3] * B[2]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[b], #8]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adcs r10, r4, r10\n\t"
+ "adc r14, r5, r14\n\t"
+ "# A[4] * B[1]\n\t"
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adcs r10, r4, r10\n\t"
+ "adc r14, r5, r14\n\t"
+ "# A[5] * B[0]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r7, [%[b], #0]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adcs r10, r4, r10\n\t"
+ "adc r14, r5, r14\n\t"
+ "str r9, [sp, #20]\n\t"
+ "# A[0] * B[6]\n\t"
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r7, [%[b], #24]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adcs r14, r4, r14\n\t"
+ "adc r8, r5, #0\n\t"
+ "# A[1] * B[5]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adcs r14, r4, r14\n\t"
+ "adc r8, r5, r8\n\t"
+ "# A[2] * B[4]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[b], #16]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adcs r14, r4, r14\n\t"
+ "adc r8, r5, r8\n\t"
+ "# A[3] * B[3]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adcs r14, r4, r14\n\t"
+ "adc r8, r5, r8\n\t"
+ "# A[4] * B[2]\n\t"
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r7, [%[b], #8]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adcs r14, r4, r14\n\t"
+ "adc r8, r5, r8\n\t"
+ "# A[5] * B[1]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adcs r14, r4, r14\n\t"
+ "adc r8, r5, r8\n\t"
+ "# A[6] * B[0]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[b], #0]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adcs r14, r4, r14\n\t"
+ "adc r8, r5, r8\n\t"
+ "str r10, [sp, #24]\n\t"
+ "# A[0] * B[7]\n\t"
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, #0\n\t"
+ "# A[1] * B[6]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r7, [%[b], #24]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, r9\n\t"
+ "# A[2] * B[5]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, r9\n\t"
+ "# A[3] * B[4]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[b], #16]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, r9\n\t"
+ "# A[4] * B[3]\n\t"
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, r9\n\t"
+ "# A[5] * B[2]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r7, [%[b], #8]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, r9\n\t"
+ "# A[6] * B[1]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, r9\n\t"
+ "# A[7] * B[0]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r7, [%[b], #0]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, r9\n\t"
+ "str r14, [sp, #28]\n\t"
+ "# A[1] * B[7]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adcs r9, r4, r9\n\t"
+ "adc r10, r5, #0\n\t"
+ "# A[2] * B[6]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[b], #24]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adcs r9, r4, r9\n\t"
+ "adc r10, r5, r10\n\t"
+ "# A[3] * B[5]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adcs r9, r4, r9\n\t"
+ "adc r10, r5, r10\n\t"
+ "# A[4] * B[4]\n\t"
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r7, [%[b], #16]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adcs r9, r4, r9\n\t"
+ "adc r10, r5, r10\n\t"
+ "# A[5] * B[3]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adcs r9, r4, r9\n\t"
+ "adc r10, r5, r10\n\t"
+ "# A[6] * B[2]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[b], #8]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adcs r9, r4, r9\n\t"
+ "adc r10, r5, r10\n\t"
+ "# A[7] * B[1]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adcs r9, r4, r9\n\t"
+ "adc r10, r5, r10\n\t"
+ "str r8, [sp, #32]\n\t"
+ "# A[2] * B[7]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adcs r10, r4, r10\n\t"
+ "adc r14, r5, #0\n\t"
+ "# A[3] * B[6]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[b], #24]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adcs r10, r4, r10\n\t"
+ "adc r14, r5, r14\n\t"
+ "# A[4] * B[5]\n\t"
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adcs r10, r4, r10\n\t"
+ "adc r14, r5, r14\n\t"
+ "# A[5] * B[4]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r7, [%[b], #16]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adcs r10, r4, r10\n\t"
+ "adc r14, r5, r14\n\t"
+ "# A[6] * B[3]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adcs r10, r4, r10\n\t"
+ "adc r14, r5, r14\n\t"
+ "# A[7] * B[2]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r7, [%[b], #8]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adcs r10, r4, r10\n\t"
+ "adc r14, r5, r14\n\t"
+ "str r9, [sp, #36]\n\t"
+ "# A[3] * B[7]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adcs r14, r4, r14\n\t"
+ "adc r8, r5, #0\n\t"
+ "# A[4] * B[6]\n\t"
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r7, [%[b], #24]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adcs r14, r4, r14\n\t"
+ "adc r8, r5, r8\n\t"
+ "# A[5] * B[5]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adcs r14, r4, r14\n\t"
+ "adc r8, r5, r8\n\t"
+ "# A[6] * B[4]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[b], #16]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adcs r14, r4, r14\n\t"
+ "adc r8, r5, r8\n\t"
+ "# A[7] * B[3]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adcs r14, r4, r14\n\t"
+ "adc r8, r5, r8\n\t"
+ "str r10, [sp, #40]\n\t"
+ "# A[4] * B[7]\n\t"
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, #0\n\t"
+ "# A[5] * B[6]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r7, [%[b], #24]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, r9\n\t"
+ "# A[6] * B[5]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, r9\n\t"
+ "# A[7] * B[4]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r7, [%[b], #16]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, r9\n\t"
+ "str r14, [sp, #44]\n\t"
+ "# A[5] * B[7]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adcs r9, r4, r9\n\t"
+ "adc r10, r5, #0\n\t"
+ "# A[6] * B[6]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[b], #24]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adcs r9, r4, r9\n\t"
+ "adc r10, r5, r10\n\t"
+ "# A[7] * B[5]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adcs r9, r4, r9\n\t"
+ "adc r10, r5, r10\n\t"
+ "# A[6] * B[7]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adcs r10, r4, r10\n\t"
+ "adc r14, r5, #0\n\t"
+ "# A[7] * B[6]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r7, [%[b], #24]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adcs r10, r4, r10\n\t"
+ "adc r14, r5, r14\n\t"
+ "# A[7] * B[7]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adc r14, r4, r14\n\t"
+ "str r8, [sp, #48]\n\t"
+ "str r9, [sp, #52]\n\t"
+ "str r10, [sp, #56]\n\t"
+ "str r14, [sp, #60]\n\t"
+ "# Start Reduction\n\t"
+ "ldr r4, [sp, #0]\n\t"
+ "ldr r5, [sp, #4]\n\t"
+ "ldr r6, [sp, #8]\n\t"
+ "ldr r7, [sp, #12]\n\t"
+ "ldr r8, [sp, #16]\n\t"
+ "ldr r9, [sp, #20]\n\t"
+ "ldr r10, [sp, #24]\n\t"
+ "ldr r14, [sp, #28]\n\t"
+ "# mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192\n\t"
+ "# - a[0] << 224\n\t"
+ "# + (a[0]-a[1] * 2) << (6 * 32)\n\t"
+ "adds r10, r10, r4\n\t"
+ "adc r14, r14, r5\n\t"
+ "adds r10, r10, r4\n\t"
+ "adc r14, r14, r5\n\t"
+ "# - a[0] << (7 * 32)\n\t"
+ "sub r14, r14, r4\n\t"
+ "# + a[0]-a[4] << (3 * 32)\n\t"
+ "mov %[a], r7\n\t"
+ "mov %[b], r8\n\t"
+ "adds r7, r7, r4\n\t"
+ "adcs r8, r8, r5\n\t"
+ "adcs r9, r9, r6\n\t"
+ "adcs r10, r10, %[a]\n\t"
+ "adc r14, r14, %[b]\n\t"
+ "str r4, [sp, #0]\n\t"
+ "str r5, [sp, #4]\n\t"
+ "str r6, [sp, #8]\n\t"
+ "str r7, [sp, #12]\n\t"
+ "str r8, [sp, #16]\n\t"
+ "str r9, [sp, #20]\n\t"
+ "# a += mu * m\n\t"
+ "# += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1)\n\t"
+ "mov %[a], #0\n\t"
+ "# a[6] += t[0] + t[3]\n\t"
+ "ldr r3, [sp, #24]\n\t"
+ "adds r3, r3, r4\n\t"
+ "adc %[b], %[a], #0\n\t"
+ "adds r3, r3, r7\n\t"
+ "adc %[b], %[b], #0\n\t"
+ "str r10, [sp, #24]\n\t"
+ "# a[7] += t[1] + t[4]\n\t"
+ "ldr r3, [sp, #28]\n\t"
+ "adds r3, r3, %[b]\n\t"
+ "adc %[b], %[a], #0\n\t"
+ "adds r3, r3, r5\n\t"
+ "adc %[b], %[b], #0\n\t"
+ "adds r3, r3, r8\n\t"
+ "adc %[b], %[b], #0\n\t"
+ "str r14, [sp, #28]\n\t"
+ "str r3, [sp, #64]\n\t"
+ "# a[8] += t[0] + t[2] + t[5]\n\t"
+ "ldr r3, [sp, #32]\n\t"
+ "adds r3, r3, %[b]\n\t"
+ "adc %[b], %[a], #0\n\t"
+ "adds r3, r3, r4\n\t"
+ "adc %[b], %[b], #0\n\t"
+ "adds r3, r3, r6\n\t"
+ "adc %[b], %[b], #0\n\t"
+ "adds r3, r3, r9\n\t"
+ "adc %[b], %[b], #0\n\t"
+ "str r3, [sp, #32]\n\t"
+ "# a[9] += t[1] + t[3] + t[6]\n\t"
+ "# a[10] += t[2] + t[4] + t[7]\n\t"
+ "ldr r3, [sp, #36]\n\t"
+ "ldr r4, [sp, #40]\n\t"
+ "adds r3, r3, %[b]\n\t"
+ "adcs r4, r4, #0\n\t"
+ "adc %[b], %[a], #0\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc %[b], %[b], #0\n\t"
+ "adds r3, r3, r7\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc %[b], %[b], #0\n\t"
+ "adds r3, r3, r10\n\t"
+ "adcs r4, r4, r14\n\t"
+ "adc %[b], %[b], #0\n\t"
+ "str r3, [sp, #36]\n\t"
+ "str r4, [sp, #40]\n\t"
+ "# a[11] += t[3] + t[5]\n\t"
+ "# a[12] += t[4] + t[6]\n\t"
+ "# a[13] += t[5] + t[7]\n\t"
+ "# a[14] += t[6]\n\t"
+ "ldr r3, [sp, #44]\n\t"
+ "ldr r4, [sp, #48]\n\t"
+ "ldr r5, [sp, #52]\n\t"
+ "ldr r6, [sp, #56]\n\t"
+ "adds r3, r3, %[b]\n\t"
+ "adcs r4, r4, #0\n\t"
+ "adcs r5, r5, #0\n\t"
+ "adcs r6, r6, #0\n\t"
+ "adc %[b], %[a], #0\n\t"
+ "adds r3, r3, r7\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adc %[b], %[b], #0\n\t"
+ "adds r3, r3, r9\n\t"
+ "adcs r4, r4, r10\n\t"
+ "adcs r5, r5, r14\n\t"
+ "adcs r6, r6, #0\n\t"
+ "adc %[b], %[b], #0\n\t"
+ "str r3, [sp, #44]\n\t"
+ "str r4, [sp, #48]\n\t"
+ "str r5, [sp, #52]\n\t"
+ "str r6, [sp, #56]\n\t"
+ "# a[15] += t[7]\n\t"
+ "ldr r3, [sp, #60]\n\t"
+ "adds r3, r3, %[b]\n\t"
+ "adc %[b], %[a], #0\n\t"
+ "adds r3, r3, r14\n\t"
+ "adc %[b], %[b], #0\n\t"
+ "str r3, [sp, #60]\n\t"
+ "ldr r3, [sp, #64]\n\t"
+ "ldr r4, [sp, #32]\n\t"
+ "ldr r5, [sp, #36]\n\t"
+ "ldr r6, [sp, #40]\n\t"
+ "ldr r8, [sp, #0]\n\t"
+ "ldr r9, [sp, #4]\n\t"
+ "ldr r10, [sp, #8]\n\t"
+ "ldr r14, [sp, #12]\n\t"
+ "subs r3, r3, r8\n\t"
+ "sbcs r4, r4, r9\n\t"
+ "sbcs r5, r5, r10\n\t"
+ "sbcs r6, r6, r14\n\t"
+ "str r4, [sp, #32]\n\t"
+ "str r5, [sp, #36]\n\t"
+ "str r6, [sp, #40]\n\t"
+ "ldr r3, [sp, #44]\n\t"
+ "ldr r4, [sp, #48]\n\t"
+ "ldr r5, [sp, #52]\n\t"
+ "ldr r6, [sp, #56]\n\t"
+ "ldr r7, [sp, #60]\n\t"
+ "ldr r8, [sp, #16]\n\t"
+ "ldr r9, [sp, #20]\n\t"
+ "ldr r10, [sp, #24]\n\t"
+ "ldr r14, [sp, #28]\n\t"
+ "sbcs r3, r3, r8\n\t"
+ "sbcs r4, r4, r9\n\t"
+ "sbcs r5, r5, r10\n\t"
+ "sbcs r6, r6, r14\n\t"
+ "sbc r7, r7, #0\n\t"
+ "str r3, [sp, #44]\n\t"
+ "str r4, [sp, #48]\n\t"
+ "str r5, [sp, #52]\n\t"
+ "str r6, [sp, #56]\n\t"
+ "str r7, [sp, #60]\n\t"
+ "# mask m and sub from result if overflow\n\t"
+ "sub %[b], %[a], %[b]\n\t"
+ "and %[a], %[b], #1\n\t"
+ "ldr r3, [sp, #32]\n\t"
+ "ldr r4, [sp, #36]\n\t"
+ "ldr r5, [sp, #40]\n\t"
+ "ldr r6, [sp, #44]\n\t"
+ "ldr r7, [sp, #48]\n\t"
+ "ldr r8, [sp, #52]\n\t"
+ "ldr r9, [sp, #56]\n\t"
+ "ldr r10, [sp, #60]\n\t"
+ "subs r3, r3, %[b]\n\t"
+ "sbcs r4, r4, %[b]\n\t"
+ "sbcs r5, r5, %[b]\n\t"
+ "sbcs r6, r6, #0\n\t"
+ "sbcs r7, r7, #0\n\t"
+ "sbcs r8, r8, #0\n\t"
+ "sbcs r9, r9, %[a]\n\t"
+ "sbc r10, r10, %[b]\n\t"
+ "str r3, [%[r], #0]\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "str r7, [%[r], #16]\n\t"
+ "str r8, [%[r], #20]\n\t"
+ "str r9, [%[r], #24]\n\t"
+ "str r10, [%[r], #28]\n\t"
+ "add sp, sp, #68\n\t"
+ : [a] "+r" (a), [b] "+r" (b)
+ : [r] "r" (r)
+ : "memory", "r8", "r9", "r10", "r14", "r3", "r4", "r5", "r6", "r7"
+ );
+}
+
+/* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ (void)mp;
+ (void)m;
+
+ __asm__ __volatile__ (
+ "sub sp, sp, #68\n\t"
+ "mov r5, #0\n\t"
+ "# A[0] * A[1]\n\t"
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r7, [%[a], #4]\n\t"
+ "umull r9, r10, r6, r7\n\t"
+ "str r9, [sp, #4]\n\t"
+ "# A[0] * A[2]\n\t"
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r7, [%[a], #8]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adc r14, r4, #0\n\t"
+ "str r10, [sp, #8]\n\t"
+ "# A[0] * A[3]\n\t"
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r7, [%[a], #12]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adc r8, r4, #0\n\t"
+ "# A[1] * A[2]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r7, [%[a], #8]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, #0\n\t"
+ "str r14, [sp, #12]\n\t"
+ "# A[0] * A[4]\n\t"
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r7, [%[a], #16]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adc r9, r4, r9\n\t"
+ "# A[1] * A[3]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r7, [%[a], #12]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adcs r9, r4, r9\n\t"
+ "adc r10, r5, #0\n\t"
+ "str r8, [sp, #16]\n\t"
+ "# A[0] * A[5]\n\t"
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r7, [%[a], #20]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adc r10, r4, r10\n\t"
+ "# A[1] * A[4]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r7, [%[a], #16]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adcs r10, r4, r10\n\t"
+ "adc r14, r5, #0\n\t"
+ "# A[2] * A[3]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[a], #12]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adcs r10, r4, r10\n\t"
+ "adc r14, r5, r14\n\t"
+ "str r9, [sp, #20]\n\t"
+ "# A[0] * A[6]\n\t"
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r7, [%[a], #24]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adcs r14, r4, r14\n\t"
+ "adc r8, r5, #0\n\t"
+ "# A[1] * A[5]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r7, [%[a], #20]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adcs r14, r4, r14\n\t"
+ "adc r8, r5, r8\n\t"
+ "# A[2] * A[4]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[a], #16]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adcs r14, r4, r14\n\t"
+ "adc r8, r5, r8\n\t"
+ "str r10, [sp, #24]\n\t"
+ "# A[0] * A[7]\n\t"
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r7, [%[a], #28]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, #0\n\t"
+ "# A[1] * A[6]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r7, [%[a], #24]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, r9\n\t"
+ "# A[2] * A[5]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[a], #20]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, r9\n\t"
+ "# A[3] * A[4]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[a], #16]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, r9\n\t"
+ "str r14, [sp, #28]\n\t"
+ "# A[1] * A[7]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r7, [%[a], #28]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adcs r9, r4, r9\n\t"
+ "adc r10, r5, #0\n\t"
+ "# A[2] * A[6]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[a], #24]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adcs r9, r4, r9\n\t"
+ "adc r10, r5, r10\n\t"
+ "# A[3] * A[5]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[a], #20]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adcs r9, r4, r9\n\t"
+ "adc r10, r5, r10\n\t"
+ "str r8, [sp, #32]\n\t"
+ "# A[2] * A[7]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[a], #28]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adcs r10, r4, r10\n\t"
+ "adc r14, r5, #0\n\t"
+ "# A[3] * A[6]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[a], #24]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adcs r10, r4, r10\n\t"
+ "adc r14, r5, r14\n\t"
+ "# A[4] * A[5]\n\t"
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r7, [%[a], #20]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adcs r10, r4, r10\n\t"
+ "adc r14, r5, r14\n\t"
+ "str r9, [sp, #36]\n\t"
+ "# A[3] * A[7]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[a], #28]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adcs r14, r4, r14\n\t"
+ "adc r8, r5, #0\n\t"
+ "# A[4] * A[6]\n\t"
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r7, [%[a], #24]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r10, r3, r10\n\t"
+ "adcs r14, r4, r14\n\t"
+ "adc r8, r5, r8\n\t"
+ "str r10, [sp, #40]\n\t"
+ "# A[4] * A[7]\n\t"
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r7, [%[a], #28]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, #0\n\t"
+ "# A[5] * A[6]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r7, [%[a], #24]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r14, r3, r14\n\t"
+ "adcs r8, r4, r8\n\t"
+ "adc r9, r5, r9\n\t"
+ "str r14, [sp, #44]\n\t"
+ "# A[5] * A[7]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r7, [%[a], #28]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r8, r3, r8\n\t"
+ "adcs r9, r4, r9\n\t"
+ "adc r10, r5, #0\n\t"
+ "str r8, [sp, #48]\n\t"
+ "# A[6] * A[7]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[a], #28]\n\t"
+ "umull r3, r4, r6, r7\n\t"
+ "adds r9, r3, r9\n\t"
+ "adc r10, r4, r10\n\t"
+ "str r9, [sp, #52]\n\t"
+ "str r10, [sp, #56]\n\t"
+ "# Double\n\t"
+ "ldr r4, [sp, #4]\n\t"
+ "ldr r6, [sp, #8]\n\t"
+ "ldr r7, [sp, #12]\n\t"
+ "ldr r8, [sp, #16]\n\t"
+ "ldr r9, [sp, #20]\n\t"
+ "ldr r10, [sp, #24]\n\t"
+ "ldr r14, [sp, #28]\n\t"
+ "ldr r12, [sp, #32]\n\t"
+ "ldr r3, [sp, #36]\n\t"
+ "adds r4, r4, r4\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adcs r7, r7, r7\n\t"
+ "adcs r8, r8, r8\n\t"
+ "adcs r9, r9, r9\n\t"
+ "adcs r10, r10, r10\n\t"
+ "adcs r14, r14, r14\n\t"
+ "adcs r12, r12, r12\n\t"
+ "adcs r3, r3, r3\n\t"
+ "str r4, [sp, #4]\n\t"
+ "str r6, [sp, #8]\n\t"
+ "str r7, [sp, #12]\n\t"
+ "str r8, [sp, #16]\n\t"
+ "str r9, [sp, #20]\n\t"
+ "str r10, [sp, #24]\n\t"
+ "str r14, [sp, #28]\n\t"
+ "str r12, [sp, #32]\n\t"
+ "str r3, [sp, #36]\n\t"
+ "ldr r4, [sp, #40]\n\t"
+ "ldr r6, [sp, #44]\n\t"
+ "ldr r7, [sp, #48]\n\t"
+ "ldr r8, [sp, #52]\n\t"
+ "ldr r9, [sp, #56]\n\t"
+ "adcs r4, r4, r4\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adcs r7, r7, r7\n\t"
+ "adcs r8, r8, r8\n\t"
+ "adcs r9, r9, r9\n\t"
+ "str r4, [sp, #40]\n\t"
+ "str r6, [sp, #44]\n\t"
+ "str r7, [sp, #48]\n\t"
+ "str r8, [sp, #52]\n\t"
+ "str r9, [sp, #56]\n\t"
+ "adc r10, r5, #0\n\t"
+ "str r10, [sp, #60]\n\t"
+ "ldr r4, [sp, #4]\n\t"
+ "ldr r5, [sp, #8]\n\t"
+ "ldr r12, [sp, #12]\n\t"
+ "# A[0] * A[0]\n\t"
+ "ldr r6, [%[a], #0]\n\t"
+ "umull r8, r9, r6, r6\n\t"
+ "# A[1] * A[1]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "umull r10, r14, r6, r6\n\t"
+ "adds r9, r9, r4\n\t"
+ "adcs r10, r10, r5\n\t"
+ "adcs r14, r14, r12\n\t"
+ "str r8, [sp, #0]\n\t"
+ "str r9, [sp, #4]\n\t"
+ "str r10, [sp, #8]\n\t"
+ "str r14, [sp, #12]\n\t"
+ "ldr r3, [sp, #16]\n\t"
+ "ldr r4, [sp, #20]\n\t"
+ "ldr r5, [sp, #24]\n\t"
+ "ldr r12, [sp, #28]\n\t"
+ "# A[2] * A[2]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "umull r8, r9, r6, r6\n\t"
+ "# A[3] * A[3]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "umull r10, r14, r6, r6\n\t"
+ "adcs r8, r8, r3\n\t"
+ "adcs r9, r9, r4\n\t"
+ "adcs r10, r10, r5\n\t"
+ "adcs r14, r14, r12\n\t"
+ "str r8, [sp, #16]\n\t"
+ "str r9, [sp, #20]\n\t"
+ "str r10, [sp, #24]\n\t"
+ "str r14, [sp, #28]\n\t"
+ "ldr r3, [sp, #32]\n\t"
+ "ldr r4, [sp, #36]\n\t"
+ "ldr r5, [sp, #40]\n\t"
+ "ldr r12, [sp, #44]\n\t"
+ "# A[4] * A[4]\n\t"
+ "ldr r6, [%[a], #16]\n\t"
+ "umull r8, r9, r6, r6\n\t"
+ "# A[5] * A[5]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "umull r10, r14, r6, r6\n\t"
+ "adcs r8, r8, r3\n\t"
+ "adcs r9, r9, r4\n\t"
+ "adcs r10, r10, r5\n\t"
+ "adcs r14, r14, r12\n\t"
+ "str r8, [sp, #32]\n\t"
+ "str r9, [sp, #36]\n\t"
+ "str r10, [sp, #40]\n\t"
+ "str r14, [sp, #44]\n\t"
+ "ldr r3, [sp, #48]\n\t"
+ "ldr r4, [sp, #52]\n\t"
+ "ldr r5, [sp, #56]\n\t"
+ "ldr r12, [sp, #60]\n\t"
+ "# A[6] * A[6]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "umull r8, r9, r6, r6\n\t"
+ "# A[7] * A[7]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "umull r10, r14, r6, r6\n\t"
+ "adcs r8, r8, r3\n\t"
+ "adcs r9, r9, r4\n\t"
+ "adcs r10, r10, r5\n\t"
+ "adc r14, r14, r12\n\t"
+ "str r8, [sp, #48]\n\t"
+ "str r9, [sp, #52]\n\t"
+ "str r10, [sp, #56]\n\t"
+ "str r14, [sp, #60]\n\t"
+ "# Start Reduction\n\t"
+ "ldr r4, [sp, #0]\n\t"
+ "ldr r5, [sp, #4]\n\t"
+ "ldr r6, [sp, #8]\n\t"
+ "ldr r7, [sp, #12]\n\t"
+ "ldr r8, [sp, #16]\n\t"
+ "ldr r9, [sp, #20]\n\t"
+ "ldr r10, [sp, #24]\n\t"
+ "ldr r14, [sp, #28]\n\t"
+ "# mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192\n\t"
+ "# - a[0] << 224\n\t"
+ "# + (a[0]-a[1] * 2) << (6 * 32)\n\t"
+ "adds r10, r10, r4\n\t"
+ "adc r14, r14, r5\n\t"
+ "adds r10, r10, r4\n\t"
+ "adc r14, r14, r5\n\t"
+ "# - a[0] << (7 * 32)\n\t"
+ "sub r14, r14, r4\n\t"
+ "# + a[0]-a[4] << (3 * 32)\n\t"
+ "mov %[a], r7\n\t"
+ "mov r12, r8\n\t"
+ "adds r7, r7, r4\n\t"
+ "adcs r8, r8, r5\n\t"
+ "adcs r9, r9, r6\n\t"
+ "adcs r10, r10, %[a]\n\t"
+ "adc r14, r14, r12\n\t"
+ "str r4, [sp, #0]\n\t"
+ "str r5, [sp, #4]\n\t"
+ "str r6, [sp, #8]\n\t"
+ "str r7, [sp, #12]\n\t"
+ "str r8, [sp, #16]\n\t"
+ "str r9, [sp, #20]\n\t"
+ "# a += mu * m\n\t"
+ "# += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1)\n\t"
+ "mov %[a], #0\n\t"
+ "# a[6] += t[0] + t[3]\n\t"
+ "ldr r3, [sp, #24]\n\t"
+ "adds r3, r3, r4\n\t"
+ "adc r12, %[a], #0\n\t"
+ "adds r3, r3, r7\n\t"
+ "adc r12, r12, #0\n\t"
+ "str r10, [sp, #24]\n\t"
+ "# a[7] += t[1] + t[4]\n\t"
+ "ldr r3, [sp, #28]\n\t"
+ "adds r3, r3, r12\n\t"
+ "adc r12, %[a], #0\n\t"
+ "adds r3, r3, r5\n\t"
+ "adc r12, r12, #0\n\t"
+ "adds r3, r3, r8\n\t"
+ "adc r12, r12, #0\n\t"
+ "str r14, [sp, #28]\n\t"
+ "str r3, [sp, #64]\n\t"
+ "# a[8] += t[0] + t[2] + t[5]\n\t"
+ "ldr r3, [sp, #32]\n\t"
+ "adds r3, r3, r12\n\t"
+ "adc r12, %[a], #0\n\t"
+ "adds r3, r3, r4\n\t"
+ "adc r12, r12, #0\n\t"
+ "adds r3, r3, r6\n\t"
+ "adc r12, r12, #0\n\t"
+ "adds r3, r3, r9\n\t"
+ "adc r12, r12, #0\n\t"
+ "str r3, [sp, #32]\n\t"
+ "# a[9] += t[1] + t[3] + t[6]\n\t"
+ "# a[10] += t[2] + t[4] + t[7]\n\t"
+ "ldr r3, [sp, #36]\n\t"
+ "ldr r4, [sp, #40]\n\t"
+ "adds r3, r3, r12\n\t"
+ "adcs r4, r4, #0\n\t"
+ "adc r12, %[a], #0\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r12, r12, #0\n\t"
+ "adds r3, r3, r7\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r12, r12, #0\n\t"
+ "adds r3, r3, r10\n\t"
+ "adcs r4, r4, r14\n\t"
+ "adc r12, r12, #0\n\t"
+ "str r3, [sp, #36]\n\t"
+ "str r4, [sp, #40]\n\t"
+ "# a[11] += t[3] + t[5]\n\t"
+ "# a[12] += t[4] + t[6]\n\t"
+ "# a[13] += t[5] + t[7]\n\t"
+ "# a[14] += t[6]\n\t"
+ "ldr r3, [sp, #44]\n\t"
+ "ldr r4, [sp, #48]\n\t"
+ "ldr r5, [sp, #52]\n\t"
+ "ldr r6, [sp, #56]\n\t"
+ "adds r3, r3, r12\n\t"
+ "adcs r4, r4, #0\n\t"
+ "adcs r5, r5, #0\n\t"
+ "adcs r6, r6, #0\n\t"
+ "adc r12, %[a], #0\n\t"
+ "adds r3, r3, r7\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adc r12, r12, #0\n\t"
+ "adds r3, r3, r9\n\t"
+ "adcs r4, r4, r10\n\t"
+ "adcs r5, r5, r14\n\t"
+ "adcs r6, r6, #0\n\t"
+ "adc r12, r12, #0\n\t"
+ "str r3, [sp, #44]\n\t"
+ "str r4, [sp, #48]\n\t"
+ "str r5, [sp, #52]\n\t"
+ "str r6, [sp, #56]\n\t"
+ "# a[15] += t[7]\n\t"
+ "ldr r3, [sp, #60]\n\t"
+ "adds r3, r3, r12\n\t"
+ "adc r12, %[a], #0\n\t"
+ "adds r3, r3, r14\n\t"
+ "adc r12, r12, #0\n\t"
+ "str r3, [sp, #60]\n\t"
+ "ldr r3, [sp, #64]\n\t"
+ "ldr r4, [sp, #32]\n\t"
+ "ldr r5, [sp, #36]\n\t"
+ "ldr r6, [sp, #40]\n\t"
+ "ldr r8, [sp, #0]\n\t"
+ "ldr r9, [sp, #4]\n\t"
+ "ldr r10, [sp, #8]\n\t"
+ "ldr r14, [sp, #12]\n\t"
+ "subs r3, r3, r8\n\t"
+ "sbcs r4, r4, r9\n\t"
+ "sbcs r5, r5, r10\n\t"
+ "sbcs r6, r6, r14\n\t"
+ "str r4, [sp, #32]\n\t"
+ "str r5, [sp, #36]\n\t"
+ "str r6, [sp, #40]\n\t"
+ "ldr r3, [sp, #44]\n\t"
+ "ldr r4, [sp, #48]\n\t"
+ "ldr r5, [sp, #52]\n\t"
+ "ldr r6, [sp, #56]\n\t"
+ "ldr r7, [sp, #60]\n\t"
+ "ldr r8, [sp, #16]\n\t"
+ "ldr r9, [sp, #20]\n\t"
+ "ldr r10, [sp, #24]\n\t"
+ "ldr r14, [sp, #28]\n\t"
+ "sbcs r3, r3, r8\n\t"
+ "sbcs r4, r4, r9\n\t"
+ "sbcs r5, r5, r10\n\t"
+ "sbcs r6, r6, r14\n\t"
+ "sbc r7, r7, #0\n\t"
+ "str r3, [sp, #44]\n\t"
+ "str r4, [sp, #48]\n\t"
+ "str r5, [sp, #52]\n\t"
+ "str r6, [sp, #56]\n\t"
+ "str r7, [sp, #60]\n\t"
+ "# mask m and sub from result if overflow\n\t"
+ "sub r12, %[a], r12\n\t"
+ "and %[a], r12, #1\n\t"
+ "ldr r3, [sp, #32]\n\t"
+ "ldr r4, [sp, #36]\n\t"
+ "ldr r5, [sp, #40]\n\t"
+ "ldr r6, [sp, #44]\n\t"
+ "ldr r7, [sp, #48]\n\t"
+ "ldr r8, [sp, #52]\n\t"
+ "ldr r9, [sp, #56]\n\t"
+ "ldr r10, [sp, #60]\n\t"
+ "subs r3, r3, r12\n\t"
+ "sbcs r4, r4, r12\n\t"
+ "sbcs r5, r5, r12\n\t"
+ "sbcs r6, r6, #0\n\t"
+ "sbcs r7, r7, #0\n\t"
+ "sbcs r8, r8, #0\n\t"
+ "sbcs r9, r9, %[a]\n\t"
+ "sbc r10, r10, r12\n\t"
+ "str r3, [%[r], #0]\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "str r7, [%[r], #16]\n\t"
+ "str r8, [%[r], #20]\n\t"
+ "str r9, [%[r], #24]\n\t"
+ "str r10, [%[r], #28]\n\t"
+ "add sp, sp, #68\n\t"
+ : [a] "+r" (a)
+ : [r] "r" (r)
+ : "memory", "r8", "r9", "r10", "r14", "r3", "r4", "r5", "r6", "r7", "r12"
+ );
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * n Number of times to square.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_n_8(sp_digit* r, const sp_digit* a, int n,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_256_mont_sqr_8(r, a, m, mp);
+ for (; n > 1; n--) {
+ sp_256_mont_sqr_8(r, r, m, mp);
+ }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P256 curve. */
+static const uint32_t p256_mod_minus_2[8] = {
+ 0xfffffffdU,0xffffffffU,0xffffffffU,0x00000000U,0x00000000U,0x00000000U,
+ 0x00000001U,0xffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P256 curve. (r = 1 / a mod m)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_256_mont_inv_8(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 8);
+ for (i=254; i>=0; i--) {
+ sp_256_mont_sqr_8(t, t, p256_mod, p256_mp_mod);
+ if (p256_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
+ sp_256_mont_mul_8(t, t, a, p256_mod, p256_mp_mod);
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 8);
+#else
+ sp_digit* t1 = td;
+ sp_digit* t2 = td + 2 * 8;
+ sp_digit* t3 = td + 4 * 8;
+ /* 0x2 */
+ sp_256_mont_sqr_8(t1, a, p256_mod, p256_mp_mod);
+ /* 0x3 */
+ sp_256_mont_mul_8(t2, t1, a, p256_mod, p256_mp_mod);
+ /* 0xc */
+ sp_256_mont_sqr_n_8(t1, t2, 2, p256_mod, p256_mp_mod);
+ /* 0xd */
+ sp_256_mont_mul_8(t3, t1, a, p256_mod, p256_mp_mod);
+ /* 0xf */
+ sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xf0 */
+ sp_256_mont_sqr_n_8(t1, t2, 4, p256_mod, p256_mp_mod);
+ /* 0xfd */
+ sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xff */
+ sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xff00 */
+ sp_256_mont_sqr_n_8(t1, t2, 8, p256_mod, p256_mp_mod);
+ /* 0xfffd */
+ sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xffff */
+ sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffff0000 */
+ sp_256_mont_sqr_n_8(t1, t2, 16, p256_mod, p256_mp_mod);
+ /* 0xfffffffd */
+ sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff */
+ sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000000 */
+ sp_256_mont_sqr_n_8(t1, t2, 32, p256_mod, p256_mp_mod);
+ /* 0xffffffffffffffff */
+ sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001 */
+ sp_256_mont_mul_8(r, t1, a, p256_mod, p256_mp_mod);
+ /* 0xffffffff000000010000000000000000000000000000000000000000 */
+ sp_256_mont_sqr_n_8(r, r, 160, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */
+ sp_256_mont_mul_8(r, r, t2, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */
+ sp_256_mont_sqr_n_8(r, r, 32, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */
+ sp_256_mont_mul_8(r, r, t3, p256_mod, p256_mp_mod);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static int32_t sp_256_cmp_8(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = -1;
+ sp_digit one = 1;
+
+
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r7, #0\n\t"
+ "mov r3, #-1\n\t"
+ "mov r6, #28\n\t"
+ "1:\n\t"
+ "ldr r4, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "subs r6, r6, #4\n\t"
+ "bcs 1b\n\t"
+ "eor %[r], %[r], r3\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [one] "r" (one)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov r7, #0\n\t"
+ "mov r3, #-1\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "eor %[r], %[r], r3\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [one] "r" (one)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+#endif
+
+ return r;
+}
+
+/* Normalize the values in each word to 32.
+ *
+ * a Array of sp_digit to normalize.
+ */
+#define sp_256_norm_8(a)
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ sp_digit c = 0;
+
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r9, #0\n\t"
+ "mov r8, #0\n\t"
+ "1:\n\t"
+ "subs %[c], r9, %[c]\n\t"
+ "ldr r4, [%[a], r8]\n\t"
+ "ldr r5, [%[b], r8]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbc %[c], r9, r9\n\t"
+ "str r4, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, #32\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#else
+ __asm__ __volatile__ (
+
+ "mov r9, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "subs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r6, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r6, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ "sbc %[c], r9, r9\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#endif /* WOLFSSL_SP_SMALL */
+
+ return c;
+}
+
+#define sp_256_mont_reduce_order_8 sp_256_mont_reduce_8
+
+/* Reduce the number back to 256 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "# i = 0\n\t"
+ "mov r12, #0\n\t"
+ "ldr r10, [%[a], #0]\n\t"
+ "ldr r14, [%[a], #4]\n\t"
+ "\n1:\n\t"
+ "# mu = a[i] * mp\n\t"
+ "mul r8, %[mp], r10\n\t"
+ "# a[i+0] += m[0] * mu\n\t"
+ "ldr r7, [%[m], #0]\n\t"
+ "ldr r9, [%[a], #0]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r10, r10, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "# a[i+1] += m[1] * mu\n\t"
+ "ldr r7, [%[m], #4]\n\t"
+ "ldr r9, [%[a], #4]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r10, r14, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r10, r10, r5\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+2] += m[2] * mu\n\t"
+ "ldr r7, [%[m], #8]\n\t"
+ "ldr r14, [%[a], #8]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r14, r14, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r14, r14, r4\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+3] += m[3] * mu\n\t"
+ "ldr r7, [%[m], #12]\n\t"
+ "ldr r9, [%[a], #12]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #12]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+4] += m[4] * mu\n\t"
+ "ldr r7, [%[m], #16]\n\t"
+ "ldr r9, [%[a], #16]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #16]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+5] += m[5] * mu\n\t"
+ "ldr r7, [%[m], #20]\n\t"
+ "ldr r9, [%[a], #20]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #20]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+6] += m[6] * mu\n\t"
+ "ldr r7, [%[m], #24]\n\t"
+ "ldr r9, [%[a], #24]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #24]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+7] += m[7] * mu\n\t"
+ "ldr r7, [%[m], #28]\n\t"
+ "ldr r9, [%[a], #28]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r7, r7, %[ca]\n\t"
+ "mov %[ca], #0\n\t"
+ "adc %[ca], %[ca], %[ca]\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #28]\n\t"
+ "ldr r9, [%[a], #32]\n\t"
+ "adcs r9, r9, r7\n\t"
+ "str r9, [%[a], #32]\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ "# i += 1\n\t"
+ "add %[a], %[a], #4\n\t"
+ "add r12, r12, #4\n\t"
+ "cmp r12, #32\n\t"
+ "blt 1b\n\t"
+ "str r10, [%[a], #0]\n\t"
+ "str r14, [%[a], #4]\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - ca);
+}
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r Resulting affine coordinate point.
+ * p Montgomery form projective coordinate point.
+ * t Temporary ordinate data.
+ */
+static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*8;
+ int32_t n;
+
+ sp_256_mont_inv_8(t1, p->z, t + 2*8);
+
+ sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod);
+
+ /* x /= z^2 */
+ sp_256_mont_mul_8(r->x, p->x, t2, p256_mod, p256_mp_mod);
+ XMEMSET(r->x + 8, 0, sizeof(r->x) / 2U);
+ sp_256_mont_reduce_8(r->x, p256_mod, p256_mp_mod);
+ /* Reduce x to less than modulus */
+ n = sp_256_cmp_8(r->x, p256_mod);
+ sp_256_cond_sub_8(r->x, r->x, p256_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_8(r->x);
+
+ /* y /= z^3 */
+ sp_256_mont_mul_8(r->y, p->y, t1, p256_mod, p256_mp_mod);
+ XMEMSET(r->y + 8, 0, sizeof(r->y) / 2U);
+ sp_256_mont_reduce_8(r->y, p256_mod, p256_mp_mod);
+ /* Reduce y to less than modulus */
+ n = sp_256_cmp_8(r->y, p256_mod);
+ sp_256_cond_sub_8(r->y, r->y, p256_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_8(r->y);
+
+ XMEMSET(r->z, 0, sizeof(r->z));
+ r->z[0] = 1;
+
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add r12, %[a], #32\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldr r4, [%[a]], #4\n\t"
+ "ldr r5, [%[a]], #4\n\t"
+ "ldr r6, [%[a]], #4\n\t"
+ "ldr r7, [%[a]], #4\n\t"
+ "ldr r8, [%[b]], #4\n\t"
+ "ldr r9, [%[b]], #4\n\t"
+ "ldr r10, [%[b]], #4\n\t"
+ "ldr r14, [%[b]], #4\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r]], #4\n\t"
+ "str r5, [%[r]], #4\n\t"
+ "str r6, [%[r]], #4\n\t"
+ "str r7, [%[r]], #4\n\t"
+ "mov r4, #0\n\t"
+ "adc %[c], r4, #0\n\t"
+ "cmp %[a], r12\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+#else
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r12, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[a], #4]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "ldr r10, [%[b], #8]\n\t"
+ "ldr r14, [%[b], #12]\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r5, [%[r], #4]\n\t"
+ "str r6, [%[r], #8]\n\t"
+ "str r7, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[a], #20]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "ldr r10, [%[b], #24]\n\t"
+ "ldr r14, [%[b], #28]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "str r6, [%[r], #24]\n\t"
+ "str r7, [%[r], #28]\n\t"
+ "adc %[c], r12, r12\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r Result of addition.
+ * a First number to add in Montogmery form.
+ * b Second number to add in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ (void)m;
+
+ __asm__ __volatile__ (
+ "mov r12, #0\n\t"
+ "ldr r4, [%[a],#0]\n\t"
+ "ldr r5, [%[a],#4]\n\t"
+ "ldr r6, [%[a],#8]\n\t"
+ "ldr r7, [%[a],#12]\n\t"
+ "ldr r8, [%[b],#0]\n\t"
+ "ldr r9, [%[b],#4]\n\t"
+ "ldr r10, [%[b],#8]\n\t"
+ "ldr r14, [%[b],#12]\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r],#0]\n\t"
+ "str r5, [%[r],#4]\n\t"
+ "str r6, [%[r],#8]\n\t"
+ "str r7, [%[r],#12]\n\t"
+ "ldr r4, [%[a],#16]\n\t"
+ "ldr r5, [%[a],#20]\n\t"
+ "ldr r6, [%[a],#24]\n\t"
+ "ldr r7, [%[a],#28]\n\t"
+ "ldr r8, [%[b],#16]\n\t"
+ "ldr r9, [%[b],#20]\n\t"
+ "ldr r10, [%[b],#24]\n\t"
+ "ldr r14, [%[b],#28]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "adc r3, r12, #0\n\t"
+ "sub r3, r12, r3\n\t"
+ "and r12, r3, #1\n\t"
+ "ldr r8, [%[r],#0]\n\t"
+ "ldr r9, [%[r],#4]\n\t"
+ "ldr r10, [%[r],#8]\n\t"
+ "ldr r14, [%[r],#12]\n\t"
+ "subs r8, r8, r3\n\t"
+ "sbcs r9, r9, r3\n\t"
+ "sbcs r10, r10, r3\n\t"
+ "sbcs r14, r14, #0\n\t"
+ "sbcs r4, r4, #0\n\t"
+ "sbcs r5, r5, #0\n\t"
+ "sbcs r6, r6, r12\n\t"
+ "sbc r7, r7, r3\n\t"
+ "str r8, [%[r],#0]\n\t"
+ "str r9, [%[r],#4]\n\t"
+ "str r10, [%[r],#8]\n\t"
+ "str r14, [%[r],#12]\n\t"
+ "str r4, [%[r],#16]\n\t"
+ "str r5, [%[r],#20]\n\t"
+ "str r6, [%[r],#24]\n\t"
+ "str r7, [%[r],#28]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r3", "r12"
+ );
+}
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r Result of doubling.
+ * a Number to double in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ (void)m;
+
+ __asm__ __volatile__ (
+ "mov r12, #0\n\t"
+ "ldr r4, [%[a],#0]\n\t"
+ "ldr r5, [%[a],#4]\n\t"
+ "ldr r6, [%[a],#8]\n\t"
+ "ldr r7, [%[a],#12]\n\t"
+ "ldr r8, [%[a],#16]\n\t"
+ "ldr r9, [%[a],#20]\n\t"
+ "ldr r10, [%[a],#24]\n\t"
+ "ldr r14, [%[a],#28]\n\t"
+ "adds r4, r4, r4\n\t"
+ "adcs r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adcs r7, r7, r7\n\t"
+ "adcs r8, r8, r8\n\t"
+ "adcs r9, r9, r9\n\t"
+ "adcs r10, r10, r10\n\t"
+ "adcs r14, r14, r14\n\t"
+ "adc r3, r12, #0\n\t"
+ "sub r3, r12, r3\n\t"
+ "and r12, r3, #1\n\t"
+ "subs r4, r4, r3\n\t"
+ "sbcs r5, r5, r3\n\t"
+ "sbcs r6, r6, r3\n\t"
+ "sbcs r7, r7, #0\n\t"
+ "sbcs r8, r8, #0\n\t"
+ "sbcs r9, r9, #0\n\t"
+ "sbcs r10, r10, r12\n\t"
+ "sbc r14, r14, r3\n\t"
+ "str r4, [%[r],#0]\n\t"
+ "str r5, [%[r],#4]\n\t"
+ "str r6, [%[r],#8]\n\t"
+ "str r7, [%[r],#12]\n\t"
+ "str r8, [%[r],#16]\n\t"
+ "str r9, [%[r],#20]\n\t"
+ "str r10, [%[r],#24]\n\t"
+ "str r14, [%[r],#28]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r3", "r12"
+ );
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r Result of Tripling.
+ * a Number to triple in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ (void)m;
+
+ __asm__ __volatile__ (
+ "mov r12, #0\n\t"
+ "ldr r4, [%[a],#0]\n\t"
+ "ldr r5, [%[a],#4]\n\t"
+ "ldr r6, [%[a],#8]\n\t"
+ "ldr r7, [%[a],#12]\n\t"
+ "ldr r8, [%[a],#16]\n\t"
+ "ldr r9, [%[a],#20]\n\t"
+ "ldr r10, [%[a],#24]\n\t"
+ "ldr r14, [%[a],#28]\n\t"
+ "adds r4, r4, r4\n\t"
+ "adcs r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adcs r7, r7, r7\n\t"
+ "adcs r8, r8, r8\n\t"
+ "adcs r9, r9, r9\n\t"
+ "adcs r10, r10, r10\n\t"
+ "adcs r14, r14, r14\n\t"
+ "adc r3, r12, #0\n\t"
+ "sub r3, r12, r3\n\t"
+ "and r12, r3, #1\n\t"
+ "subs r4, r4, r3\n\t"
+ "sbcs r5, r5, r3\n\t"
+ "sbcs r6, r6, r3\n\t"
+ "sbcs r7, r7, #0\n\t"
+ "sbcs r8, r8, #0\n\t"
+ "sbcs r9, r9, #0\n\t"
+ "sbcs r10, r10, r12\n\t"
+ "sbc r14, r14, r3\n\t"
+ "str r8, [%[r],#16]\n\t"
+ "str r9, [%[r],#20]\n\t"
+ "str r10, [%[r],#24]\n\t"
+ "str r14, [%[r],#28]\n\t"
+ "mov r12, #0\n\t"
+ "ldr r8, [%[a],#0]\n\t"
+ "ldr r9, [%[a],#4]\n\t"
+ "ldr r10, [%[a],#8]\n\t"
+ "ldr r14, [%[a],#12]\n\t"
+ "adds r8, r8, r4\n\t"
+ "adcs r9, r9, r5\n\t"
+ "adcs r10, r10, r6\n\t"
+ "adcs r14, r14, r7\n\t"
+ "str r8, [%[r],#0]\n\t"
+ "str r9, [%[r],#4]\n\t"
+ "str r10, [%[r],#8]\n\t"
+ "str r14, [%[r],#12]\n\t"
+ "ldr r8, [%[a],#16]\n\t"
+ "ldr r9, [%[a],#20]\n\t"
+ "ldr r10, [%[a],#24]\n\t"
+ "ldr r14, [%[a],#28]\n\t"
+ "ldr r4, [%[r],#16]\n\t"
+ "ldr r5, [%[r],#20]\n\t"
+ "ldr r6, [%[r],#24]\n\t"
+ "ldr r7, [%[r],#28]\n\t"
+ "adcs r8, r8, r4\n\t"
+ "adcs r9, r9, r5\n\t"
+ "adcs r10, r10, r6\n\t"
+ "adcs r14, r14, r7\n\t"
+ "adc r3, r12, #0\n\t"
+ "sub r3, r12, r3\n\t"
+ "and r12, r3, #1\n\t"
+ "ldr r4, [%[r],#0]\n\t"
+ "ldr r5, [%[r],#4]\n\t"
+ "ldr r6, [%[r],#8]\n\t"
+ "ldr r7, [%[r],#12]\n\t"
+ "subs r4, r4, r3\n\t"
+ "sbcs r5, r5, r3\n\t"
+ "sbcs r6, r6, r3\n\t"
+ "sbcs r7, r7, #0\n\t"
+ "sbcs r8, r8, #0\n\t"
+ "sbcs r9, r9, #0\n\t"
+ "sbcs r10, r10, r12\n\t"
+ "sbc r14, r14, r3\n\t"
+ "str r4, [%[r],#0]\n\t"
+ "str r5, [%[r],#4]\n\t"
+ "str r6, [%[r],#8]\n\t"
+ "str r7, [%[r],#12]\n\t"
+ "str r8, [%[r],#16]\n\t"
+ "str r9, [%[r],#20]\n\t"
+ "str r10, [%[r],#24]\n\t"
+ "str r14, [%[r],#28]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r3", "r12"
+ );
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r Result of subtration.
+ * a Number to subtract from in Montogmery form.
+ * b Number to subtract with in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ (void)m;
+
+ __asm__ __volatile__ (
+ "mov r12, #0\n\t"
+ "ldr r4, [%[a],#0]\n\t"
+ "ldr r5, [%[a],#4]\n\t"
+ "ldr r6, [%[a],#8]\n\t"
+ "ldr r7, [%[a],#12]\n\t"
+ "ldr r8, [%[b],#0]\n\t"
+ "ldr r9, [%[b],#4]\n\t"
+ "ldr r10, [%[b],#8]\n\t"
+ "ldr r14, [%[b],#12]\n\t"
+ "subs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "sbcs r6, r6, r10\n\t"
+ "sbcs r7, r7, r14\n\t"
+ "str r4, [%[r],#0]\n\t"
+ "str r5, [%[r],#4]\n\t"
+ "str r6, [%[r],#8]\n\t"
+ "str r7, [%[r],#12]\n\t"
+ "ldr r4, [%[a],#16]\n\t"
+ "ldr r5, [%[a],#20]\n\t"
+ "ldr r6, [%[a],#24]\n\t"
+ "ldr r7, [%[a],#28]\n\t"
+ "ldr r8, [%[b],#16]\n\t"
+ "ldr r9, [%[b],#20]\n\t"
+ "ldr r10, [%[b],#24]\n\t"
+ "ldr r14, [%[b],#28]\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "sbcs r6, r6, r10\n\t"
+ "sbcs r7, r7, r14\n\t"
+ "sbc r3, r12, #0\n\t"
+ "and r12, r3, #1\n\t"
+ "ldr r8, [%[r],#0]\n\t"
+ "ldr r9, [%[r],#4]\n\t"
+ "ldr r10, [%[r],#8]\n\t"
+ "ldr r14, [%[r],#12]\n\t"
+ "adds r8, r8, r3\n\t"
+ "adcs r9, r9, r3\n\t"
+ "adcs r10, r10, r3\n\t"
+ "adcs r14, r14, #0\n\t"
+ "adcs r4, r4, #0\n\t"
+ "adcs r5, r5, #0\n\t"
+ "adcs r6, r6, r12\n\t"
+ "adc r7, r7, r3\n\t"
+ "str r8, [%[r],#0]\n\t"
+ "str r9, [%[r],#4]\n\t"
+ "str r10, [%[r],#8]\n\t"
+ "str r14, [%[r],#12]\n\t"
+ "str r4, [%[r],#16]\n\t"
+ "str r5, [%[r],#20]\n\t"
+ "str r6, [%[r],#24]\n\t"
+ "str r7, [%[r],#28]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r3", "r12"
+ );
+}
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r Result of division by 2.
+ * a Number to divide.
+ * m Modulus (prime).
+ */
+static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ __asm__ __volatile__ (
+ "mov r10, #0\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "and r9, r3, #1\n\t"
+ "sub r7, r10, r9\n\t"
+ "and r8, r7, #1\n\t"
+ "adds r3, r3, r7\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adcs r6, r6, r10\n\t"
+ "str r3, [%[r], #0]\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[a], #24]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "adcs r3, r3, r10\n\t"
+ "adcs r4, r4, r10\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adcs r6, r6, r7\n\t"
+ "adc r9, r10, r10\n\t"
+ "lsr r7, r3, #1\n\t"
+ "and r3, r3, #1\n\t"
+ "lsr r8, r4, #1\n\t"
+ "lsr r10, r5, #1\n\t"
+ "lsr r14, r6, #1\n\t"
+ "orr r7, r7, r4, lsl #31\n\t"
+ "orr r8, r8, r5, lsl #31\n\t"
+ "orr r10, r10, r6, lsl #31\n\t"
+ "orr r14, r14, r9, lsl #31\n\t"
+ "mov r9, r3\n\t"
+ "str r7, [%[r], #16]\n\t"
+ "str r8, [%[r], #20]\n\t"
+ "str r10, [%[r], #24]\n\t"
+ "str r14, [%[r], #28]\n\t"
+ "ldr r3, [%[r], #0]\n\t"
+ "ldr r4, [%[r], #4]\n\t"
+ "ldr r5, [%[r], #8]\n\t"
+ "ldr r6, [%[r], #12]\n\t"
+ "lsr r7, r3, #1\n\t"
+ "lsr r8, r4, #1\n\t"
+ "lsr r10, r5, #1\n\t"
+ "lsr r14, r6, #1\n\t"
+ "orr r7, r7, r4, lsl #31\n\t"
+ "orr r8, r8, r5, lsl #31\n\t"
+ "orr r10, r10, r6, lsl #31\n\t"
+ "orr r14, r14, r9, lsl #31\n\t"
+ "str r7, [%[r], #0]\n\t"
+ "str r8, [%[r], #4]\n\t"
+ "str r10, [%[r], #8]\n\t"
+ "str r14, [%[r], #12]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [m] "r" (m)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r10", "r14", "r9"
+ );
+
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r Result of doubling point.
+ * p Point to double.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*8;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = r->x;
+ y = r->y;
+ z = r->z;
+ /* Put infinity into result. */
+ if (r != p) {
+ r->infinity = p->infinity;
+ }
+
+ /* T1 = Z * Z */
+ sp_256_mont_sqr_8(t1, p->z, p256_mod, p256_mp_mod);
+ /* Z = Y * Z */
+ sp_256_mont_mul_8(z, p->y, p->z, p256_mod, p256_mp_mod);
+ /* Z = 2Z */
+ sp_256_mont_dbl_8(z, z, p256_mod);
+ /* T2 = X - T1 */
+ sp_256_mont_sub_8(t2, p->x, t1, p256_mod);
+ /* T1 = X + T1 */
+ sp_256_mont_add_8(t1, p->x, t1, p256_mod);
+ /* T2 = T1 * T2 */
+ sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod);
+ /* T1 = 3T2 */
+ sp_256_mont_tpl_8(t1, t2, p256_mod);
+ /* Y = 2Y */
+ sp_256_mont_dbl_8(y, p->y, p256_mod);
+ /* Y = Y * Y */
+ sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod);
+ /* T2 = Y * Y */
+ sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
+ /* T2 = T2/2 */
+ sp_256_div2_8(t2, t2, p256_mod);
+ /* Y = Y * X */
+ sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod);
+ /* X = T1 * T1 */
+ sp_256_mont_sqr_8(x, t1, p256_mod, p256_mp_mod);
+ /* X = X - Y */
+ sp_256_mont_sub_8(x, x, y, p256_mod);
+ /* X = X - Y */
+ sp_256_mont_sub_8(x, x, y, p256_mod);
+ /* Y = Y - X */
+ sp_256_mont_sub_8(y, y, x, p256_mod);
+ /* Y = Y * T1 */
+ sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod);
+ /* Y = Y - T2 */
+ sp_256_mont_sub_8(y, y, t2, p256_mod);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add r12, %[a], #32\n\t"
+ "\n1:\n\t"
+ "rsbs %[c], %[c], #0\n\t"
+ "ldr r4, [%[a]], #4\n\t"
+ "ldr r5, [%[a]], #4\n\t"
+ "ldr r6, [%[a]], #4\n\t"
+ "ldr r7, [%[a]], #4\n\t"
+ "ldr r8, [%[b]], #4\n\t"
+ "ldr r9, [%[b]], #4\n\t"
+ "ldr r10, [%[b]], #4\n\t"
+ "ldr r14, [%[b]], #4\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "sbcs r6, r6, r10\n\t"
+ "sbcs r7, r7, r14\n\t"
+ "str r4, [%[r]], #4\n\t"
+ "str r5, [%[r]], #4\n\t"
+ "str r6, [%[r]], #4\n\t"
+ "str r7, [%[r]], #4\n\t"
+ "sbc %[c], r4, r4\n\t"
+ "cmp %[a], r12\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[b], #0]\n\t"
+ "ldr r8, [%[b], #4]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "ldr r10, [%[b], #12]\n\t"
+ "subs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "sbcs r6, r6, r10\n\t"
+ "str r3, [%[r], #0]\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[a], #24]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r7, [%[b], #16]\n\t"
+ "ldr r8, [%[b], #20]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "ldr r10, [%[b], #28]\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "sbcs r6, r6, r10\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "str r5, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ "sbc %[c], %[c], #0\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a First number to compare.
+ * b Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_256_cmp_equal_8(const sp_digit* a, const sp_digit* b)
+{
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+ (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_8(sp_point_256* r, const sp_point_256* p, const sp_point_256* q,
+ sp_digit* t)
+{
+ const sp_point_256* ap[2];
+ sp_point_256* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*8;
+ sp_digit* t3 = t + 4*8;
+ sp_digit* t4 = t + 6*8;
+ sp_digit* t5 = t + 8*8;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Ensure only the first point is the same as the result. */
+ if (q == r) {
+ const sp_point_256* a = p;
+ p = q;
+ q = a;
+ }
+
+ /* Check double */
+ (void)sp_256_sub_8(t1, p256_mod, q->y);
+ sp_256_norm_8(t1);
+ if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
+ (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) {
+ sp_256_proj_point_dbl_8(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_256));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<8; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<8; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<8; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U1 = X1*Z2^2 */
+ sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t1, t1, x, p256_mod, p256_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_256_mont_mul_8(t3, t3, y, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - U1 */
+ sp_256_mont_sub_8(t2, t2, t1, p256_mod);
+ /* R = S2 - S1 */
+ sp_256_mont_sub_8(t4, t4, t3, p256_mod);
+ /* Z3 = H*Z1*Z2 */
+ sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_8(x, x, t5, p256_mod);
+ sp_256_mont_dbl_8(t1, y, p256_mod);
+ sp_256_mont_sub_8(x, x, t1, p256_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ sp_256_mont_sub_8(y, y, x, p256_mod);
+ sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_8(y, y, t5, p256_mod);
+ }
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_fast_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+ int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 td[16];
+ sp_point_256 rtd;
+ sp_digit tmpd[2 * 8 * 5];
+#endif
+ sp_point_256* t;
+ sp_point_256* rt;
+ sp_digit* tmp;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_8(heap, rtd, rt);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 16, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ t = td;
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ (void)sp_256_mod_mul_norm_8(t[1].x, g->x, p256_mod);
+ (void)sp_256_mod_mul_norm_8(t[1].y, g->y, p256_mod);
+ (void)sp_256_mod_mul_norm_8(t[1].z, g->z, p256_mod);
+ t[1].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[ 2], &t[ 1], tmp);
+ t[ 2].infinity = 0;
+ sp_256_proj_point_add_8(&t[ 3], &t[ 2], &t[ 1], tmp);
+ t[ 3].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[ 4], &t[ 2], tmp);
+ t[ 4].infinity = 0;
+ sp_256_proj_point_add_8(&t[ 5], &t[ 3], &t[ 2], tmp);
+ t[ 5].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[ 6], &t[ 3], tmp);
+ t[ 6].infinity = 0;
+ sp_256_proj_point_add_8(&t[ 7], &t[ 4], &t[ 3], tmp);
+ t[ 7].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[ 8], &t[ 4], tmp);
+ t[ 8].infinity = 0;
+ sp_256_proj_point_add_8(&t[ 9], &t[ 5], &t[ 4], tmp);
+ t[ 9].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[10], &t[ 5], tmp);
+ t[10].infinity = 0;
+ sp_256_proj_point_add_8(&t[11], &t[ 6], &t[ 5], tmp);
+ t[11].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[12], &t[ 6], tmp);
+ t[12].infinity = 0;
+ sp_256_proj_point_add_8(&t[13], &t[ 7], &t[ 6], tmp);
+ t[13].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[14], &t[ 7], tmp);
+ t[14].infinity = 0;
+ sp_256_proj_point_add_8(&t[15], &t[ 8], &t[ 7], tmp);
+ t[15].infinity = 0;
+
+ i = 6;
+ n = k[i+1] << 0;
+ c = 28;
+ y = n >> 28;
+ XMEMCPY(rt, &t[y], sizeof(sp_point_256));
+ n <<= 4;
+ for (; i>=0 || c>=4; ) {
+ if (c < 4) {
+ n |= k[i--];
+ c += 32;
+ }
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+
+ sp_256_proj_point_dbl_8(rt, rt, tmp);
+ sp_256_proj_point_dbl_8(rt, rt, tmp);
+ sp_256_proj_point_dbl_8(rt, rt, tmp);
+ sp_256_proj_point_dbl_8(rt, rt, tmp);
+
+ sp_256_proj_point_add_8(rt, rt, &t[y], tmp);
+ }
+
+ if (map != 0) {
+ sp_256_map_8(r, rt, tmp);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 8 * 5);
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+ }
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_point_256) * 16);
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmpd, sizeof(tmpd));
+ ForceZero(td, sizeof(td));
+#endif
+ sp_256_point_free_8(rt, 1, heap);
+
+ return err;
+}
+
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_256 {
+ sp_digit x[8];
+ sp_digit y[8];
+} sp_table_entry_256;
+
+#ifdef FP_ECC
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int n, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*8;
+ sp_digit* b = t + 4*8;
+ sp_digit* t1 = t + 6*8;
+ sp_digit* t2 = t + 8*8;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = p->x;
+ y = p->y;
+ z = p->z;
+
+ /* Y = 2*Y */
+ sp_256_mont_dbl_8(y, y, p256_mod);
+ /* W = Z^4 */
+ sp_256_mont_sqr_8(w, z, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_8(w, w, p256_mod, p256_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+ while (--n > 0)
+#else
+ while (--n >= 0)
+#endif
+ {
+ /* A = 3*(X^2 - W) */
+ sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_8(t1, t1, w, p256_mod);
+ sp_256_mont_tpl_8(a, t1, p256_mod);
+ /* B = X*Y^2 */
+ sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod);
+ /* X = A^2 - 2B */
+ sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_8(t2, b, p256_mod);
+ sp_256_mont_sub_8(x, x, t2, p256_mod);
+ /* Z = Z*Y */
+ sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod);
+ /* t2 = Y^4 */
+ sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+ if (n != 0)
+#endif
+ {
+ /* W = W*Y^4 */
+ sp_256_mont_mul_8(w, w, t1, p256_mod, p256_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_256_mont_sub_8(y, b, x, p256_mod);
+ sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_8(y, y, p256_mod);
+ sp_256_mont_sub_8(y, y, t1, p256_mod);
+ }
+#ifndef WOLFSSL_SP_SMALL
+ /* A = 3*(X^2 - W) */
+ sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_8(t1, t1, w, p256_mod);
+ sp_256_mont_tpl_8(a, t1, p256_mod);
+ /* B = X*Y^2 */
+ sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod);
+ /* X = A^2 - 2B */
+ sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_8(t2, b, p256_mod);
+ sp_256_mont_sub_8(x, x, t2, p256_mod);
+ /* Z = Z*Y */
+ sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod);
+ /* t2 = Y^4 */
+ sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod);
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_256_mont_sub_8(y, b, x, p256_mod);
+ sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_8(y, y, p256_mod);
+ sp_256_mont_sub_8(y, y, t1, p256_mod);
+#endif
+ /* Y = Y/2 */
+ sp_256_div2_8(y, y, p256_mod);
+}
+
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p,
+ const sp_point_256* q, sp_digit* t)
+{
+ const sp_point_256* ap[2];
+ sp_point_256* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*8;
+ sp_digit* t3 = t + 4*8;
+ sp_digit* t4 = t + 6*8;
+ sp_digit* t5 = t + 8*8;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Check double */
+ (void)sp_256_sub_8(t1, p256_mod, q->y);
+ sp_256_norm_8(t1);
+ if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
+ (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) {
+ sp_256_proj_point_dbl_8(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_256));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<8; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<8; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<8; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - X1 */
+ sp_256_mont_sub_8(t2, t2, x, p256_mod);
+ /* R = S2 - Y1 */
+ sp_256_mont_sub_8(t4, t4, y, p256_mod);
+ /* Z3 = H*Z1 */
+ sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod);
+ /* X3 = R^2 - H^3 - 2*X1*H^2 */
+ sp_256_mont_sqr_8(t1, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t3, x, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_8(x, t1, t5, p256_mod);
+ sp_256_mont_dbl_8(t1, t3, p256_mod);
+ sp_256_mont_sub_8(x, x, t1, p256_mod);
+ /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+ sp_256_mont_sub_8(t3, t3, x, p256_mod);
+ sp_256_mont_mul_8(t3, t3, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t5, t5, y, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_8(y, t3, t5, p256_mod);
+ }
+}
+
+#ifdef WOLFSSL_SP_SMALL
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a Point to convert.
+ * t Temporary data.
+ */
+static void sp_256_proj_to_affine_8(sp_point_256* a, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2 * 8;
+ sp_digit* tmp = t + 4 * 8;
+
+ sp_256_mont_inv_8(t1, a->z, tmp);
+
+ sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod);
+
+ sp_256_mont_mul_8(a->x, a->x, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(a->y, a->y, t1, p256_mod, p256_mp_mod);
+ XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
+}
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_256_gen_stripe_table_8(const sp_point_256* a,
+ sp_table_entry_256* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 td, s1d, s2d;
+#endif
+ sp_point_256* t;
+ sp_point_256* s1 = NULL;
+ sp_point_256* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_8(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_256_proj_to_affine_8(t, tmp);
+
+ XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<4; i++) {
+ sp_256_proj_point_dbl_n_8(t, 64, tmp);
+ sp_256_proj_to_affine_8(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<4; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_256_proj_point_add_qz1_8(t, s1, s2, tmp);
+ sp_256_proj_to_affine_8(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_256_point_free_8(s2, 0, heap);
+ sp_256_point_free_8(s1, 0, heap);
+ sp_256_point_free_8( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g,
+ const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 rtd;
+ sp_point_256 pd;
+ sp_digit td[2 * 8 * 5];
+#endif
+ sp_point_256* rt;
+ sp_point_256* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_256_point_new_8(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
+ XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+ y = 0;
+ for (j=0,x=63; j<4; j++,x+=64) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=62; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<4; j++,x+=64) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+
+ sp_256_proj_point_dbl_8(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_256_proj_point_add_qz1_8(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_256_map_8(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(p, 0, heap);
+ sp_256_point_free_8(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_256_t {
+ sp_digit x[8];
+ sp_digit y[8];
+ sp_table_entry_256 table[16];
+ uint32_t cnt;
+ int set;
+} sp_cache_256_t;
+
+static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_256_last = -1;
+static THREAD_LS_T int sp_cache_256_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex_256 = 0;
+ static wolfSSL_Mutex sp_cache_256_lock;
+#endif
+
+static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_256_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache_256[i].set = 0;
+ }
+ sp_cache_256_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache_256[i].set)
+ continue;
+
+ if (sp_256_cmp_equal_8(g->x, sp_cache_256[i].x) &
+ sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) {
+ sp_cache_256[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_256_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache_256[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_256_last) {
+ least = sp_cache_256[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache_256[j].cnt < least) {
+ i = j;
+ least = sp_cache_256[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
+ XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
+ sp_cache_256[i].set = 1;
+ sp_cache_256[i].cnt = 1;
+ }
+
+ *cache = &sp_cache_256[i];
+ sp_cache_256_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 8 * 5];
+ sp_cache_256_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_256 == 0) {
+ wc_InitMutex(&sp_cache_256_lock);
+ initCacheMutex_256 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_256_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_256(g, &cache);
+ if (cache->cnt == 2)
+ sp_256_gen_stripe_table_8(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_256_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
+ }
+ else {
+ err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#else
+#ifdef FP_ECC
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_256_gen_stripe_table_8(const sp_point_256* a,
+ sp_table_entry_256* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 td, s1d, s2d;
+#endif
+ sp_point_256* t;
+ sp_point_256* s1 = NULL;
+ sp_point_256* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_8(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_256_proj_to_affine_8(t, tmp);
+
+ XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<8; i++) {
+ sp_256_proj_point_dbl_n_8(t, 32, tmp);
+ sp_256_proj_to_affine_8(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<8; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_256_proj_point_add_qz1_8(t, s1, s2, tmp);
+ sp_256_proj_to_affine_8(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_256_point_free_8(s2, 0, heap);
+ sp_256_point_free_8(s1, 0, heap);
+ sp_256_point_free_8( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g,
+ const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 rtd;
+ sp_point_256 pd;
+ sp_digit td[2 * 8 * 5];
+#endif
+ sp_point_256* rt;
+ sp_point_256* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_256_point_new_8(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
+ XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+ y = 0;
+ for (j=0,x=31; j<8; j++,x+=32) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=30; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<8; j++,x+=32) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+
+ sp_256_proj_point_dbl_8(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_256_proj_point_add_qz1_8(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_256_map_8(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(p, 0, heap);
+ sp_256_point_free_8(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_256_t {
+ sp_digit x[8];
+ sp_digit y[8];
+ sp_table_entry_256 table[256];
+ uint32_t cnt;
+ int set;
+} sp_cache_256_t;
+
+static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_256_last = -1;
+static THREAD_LS_T int sp_cache_256_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex_256 = 0;
+ static wolfSSL_Mutex sp_cache_256_lock;
+#endif
+
+static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_256_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache_256[i].set = 0;
+ }
+ sp_cache_256_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache_256[i].set)
+ continue;
+
+ if (sp_256_cmp_equal_8(g->x, sp_cache_256[i].x) &
+ sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) {
+ sp_cache_256[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_256_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache_256[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_256_last) {
+ least = sp_cache_256[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache_256[j].cnt < least) {
+ i = j;
+ least = sp_cache_256[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
+ XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
+ sp_cache_256[i].set = 1;
+ sp_cache_256[i].cnt = 1;
+ }
+
+ *cache = &sp_cache_256[i];
+ sp_cache_256_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 8 * 5];
+ sp_cache_256_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_256 == 0) {
+ wc_InitMutex(&sp_cache_256_lock);
+ initCacheMutex_256 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_256_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_256(g, &cache);
+ if (cache->cnt == 2)
+ sp_256_gen_stripe_table_8(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_256_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
+ }
+ else {
+ err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * p Point to multiply.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+ void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[8];
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_256_point_new_8(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 8, km);
+ sp_256_point_from_ecc_point_8(point, gm);
+
+ err = sp_256_ecc_mulmod_8(point, point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_8(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(point, 0, heap);
+
+ return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+static const sp_table_entry_256 p256_table[16] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,
+ 0xa53755c6,0x18905f76 },
+ { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,
+ 0x25885d85,0x8571ff18 } },
+ /* 2 */
+ { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,
+ 0xfd1b667f,0x2f5e6961 },
+ { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,
+ 0x8d6f0f7b,0xf648f916 } },
+ /* 3 */
+ { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761,
+ 0x133d0015,0x5abe0285 },
+ { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562,
+ 0x6b6f7383,0x94bb725b } },
+ /* 4 */
+ { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,
+ 0x21d324f6,0x61d587d4 },
+ { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,
+ 0x4621efbe,0xfa11fe12 } },
+ /* 5 */
+ { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67,
+ 0x1f13bedc,0x586eb04c },
+ { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0,
+ 0x70864f11,0x19d5ac08 } },
+ /* 6 */
+ { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a,
+ 0xc3b266b1,0xbb6de651 },
+ { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1,
+ 0x5d18b99b,0x60b4619a } },
+ /* 7 */
+ { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014,
+ 0xaeebffcd,0x9d0f27b2 },
+ { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0,
+ 0x356ec48d,0x244a566d } },
+ /* 8 */
+ { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,
+ 0xcd42ab1b,0x803f3e02 },
+ { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,
+ 0x5067adc1,0xc097440e } },
+ /* 9 */
+ { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459,
+ 0x915f1f30,0xf1af32d5 },
+ { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418,
+ 0xe2d41c8b,0x23d0f130 } },
+ /* 10 */
+ { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926,
+ 0x7990216a,0x50bbb4d9 },
+ { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b,
+ 0x01fe49c3,0x2b100118 } },
+ /* 11 */
+ { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa,
+ 0x83fbae0c,0xdd558999 },
+ { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf,
+ 0x149d6041,0xe6e4c551 } },
+ /* 12 */
+ { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07,
+ 0xdb7e63af,0xfad27148 },
+ { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875,
+ 0x9f0e1a84,0x77387de3 } },
+ /* 13 */
+ { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408,
+ 0xbef0c47e,0xb37b85c0 },
+ { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa,
+ 0xf9f628d5,0x9c135ac8 } },
+ /* 14 */
+ { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403,
+ 0x91ece900,0xc109f9cb },
+ { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d,
+ 0x2eee1ee1,0x9bc3344f } },
+ /* 15 */
+ { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665,
+ 0x5f1a4cc1,0x29591d52 },
+ { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496,
+ 0x18ef332c,0x6376551f } },
+};
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
+ k, map, heap);
+}
+
+#else
+static const sp_table_entry_256 p256_table[256] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,
+ 0xa53755c6,0x18905f76 },
+ { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,
+ 0x25885d85,0x8571ff18 } },
+ /* 2 */
+ { { 0x4147519a,0x20288602,0x26b372f0,0xd0981eac,0xa785ebc8,0xa9d4a7ca,
+ 0xdbdf58e9,0xd953c50d },
+ { 0xfd590f8f,0x9d6361cc,0x44e6c917,0x72e9626b,0x22eb64cf,0x7fd96110,
+ 0x9eb288f3,0x863ebb7e } },
+ /* 3 */
+ { { 0x5cdb6485,0x7856b623,0x2f0a2f97,0x808f0ea2,0x4f7e300b,0x3e68d954,
+ 0xb5ff80a0,0x00076055 },
+ { 0x838d2010,0x7634eb9b,0x3243708a,0x54014fbb,0x842a6606,0xe0e47d39,
+ 0x34373ee0,0x83087761 } },
+ /* 4 */
+ { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,
+ 0xfd1b667f,0x2f5e6961 },
+ { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,
+ 0x8d6f0f7b,0xf648f916 } },
+ /* 5 */
+ { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761,
+ 0x133d0015,0x5abe0285 },
+ { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562,
+ 0x6b6f7383,0x94bb725b } },
+ /* 6 */
+ { { 0x720f141c,0xbbf9b48f,0x2df5bc74,0x6199b3cd,0x411045c4,0xdc3f6129,
+ 0x2f7dc4ef,0xcdd6bbcb },
+ { 0xeaf436fd,0xcca6700b,0xb99326be,0x6f647f6d,0x014f2522,0x0c0fa792,
+ 0x4bdae5f6,0xa361bebd } },
+ /* 7 */
+ { { 0x597c13c7,0x28aa2558,0x50b7c3e1,0xc38d635f,0xf3c09d1d,0x07039aec,
+ 0xc4b5292c,0xba12ca09 },
+ { 0x59f91dfd,0x9e408fa4,0xceea07fb,0x3af43b66,0x9d780b29,0x1eceb089,
+ 0x701fef4b,0x53ebb99d } },
+ /* 8 */
+ { { 0xb0e63d34,0x4fe7ee31,0xa9e54fab,0xf4600572,0xd5e7b5a4,0xc0493334,
+ 0x06d54831,0x8589fb92 },
+ { 0x6583553a,0xaa70f5cc,0xe25649e5,0x0879094a,0x10044652,0xcc904507,
+ 0x02541c4f,0xebb0696d } },
+ /* 9 */
+ { { 0xac1647c5,0x4616ca15,0xc4cf5799,0xb8127d47,0x764dfbac,0xdc666aa3,
+ 0xd1b27da3,0xeb2820cb },
+ { 0x6a87e008,0x9406f8d8,0x922378f3,0xd87dfa9d,0x80ccecb2,0x56ed2e42,
+ 0x55a7da1d,0x1f28289b } },
+ /* 10 */
+ { { 0x3b89da99,0xabbaa0c0,0xb8284022,0xa6f2d79e,0xb81c05e8,0x27847862,
+ 0x05e54d63,0x337a4b59 },
+ { 0x21f7794a,0x3c67500d,0x7d6d7f61,0x207005b7,0x04cfd6e8,0x0a5a3781,
+ 0xf4c2fbd6,0x0d65e0d5 } },
+ /* 11 */
+ { { 0xb5275d38,0xd9d09bbe,0x0be0a358,0x4268a745,0x973eb265,0xf0762ff4,
+ 0x52f4a232,0xc23da242 },
+ { 0x0b94520c,0x5da1b84f,0xb05bd78e,0x09666763,0x94d29ea1,0x3a4dcb86,
+ 0xc790cff1,0x19de3b8c } },
+ /* 12 */
+ { { 0x26c5fe04,0x183a716c,0x3bba1bdb,0x3b28de0b,0xa4cb712c,0x7432c586,
+ 0x91fccbfd,0xe34dcbd4 },
+ { 0xaaa58403,0xb408d46b,0x82e97a53,0x9a697486,0x36aaa8af,0x9e390127,
+ 0x7b4e0f7f,0xe7641f44 } },
+ /* 13 */
+ { { 0xdf64ba59,0x7d753941,0x0b0242fc,0xd33f10ec,0xa1581859,0x4f06dfc6,
+ 0x052a57bf,0x4a12df57 },
+ { 0x9439dbd0,0xbfa6338f,0xbde53e1f,0xd3c24bd4,0x21f1b314,0xfd5e4ffa,
+ 0xbb5bea46,0x6af5aa93 } },
+ /* 14 */
+ { { 0x10c91999,0xda10b699,0x2a580491,0x0a24b440,0xb8cc2090,0x3e0094b4,
+ 0x66a44013,0x5fe3475a },
+ { 0xf93e7b4b,0xb0f8cabd,0x7c23f91a,0x292b501a,0xcd1e6263,0x42e889ae,
+ 0xecfea916,0xb544e308 } },
+ /* 15 */
+ { { 0x16ddfdce,0x6478c6e9,0xf89179e6,0x2c329166,0x4d4e67e1,0x4e8d6e76,
+ 0xa6b0c20b,0xe0b6b2bd },
+ { 0xbb7efb57,0x0d312df2,0x790c4007,0x1aac0dde,0x679bc944,0xf90336ad,
+ 0x25a63774,0x71c023de } },
+ /* 16 */
+ { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,
+ 0x21d324f6,0x61d587d4 },
+ { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,
+ 0x4621efbe,0xfa11fe12 } },
+ /* 17 */
+ { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67,
+ 0x1f13bedc,0x586eb04c },
+ { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0,
+ 0x70864f11,0x19d5ac08 } },
+ /* 18 */
+ { { 0x309a4e1f,0x1e99f581,0xe9270074,0xab7de71b,0xefd28d20,0x26a5ef0b,
+ 0x7f9c563f,0xe7c0073f },
+ { 0x0ef59f76,0x1f6d663a,0x20fcb050,0x669b3b54,0x7a6602d4,0xc08c1f7a,
+ 0xc65b3c0a,0xe08504fe } },
+ /* 19 */
+ { { 0xa031b3ca,0xf098f68d,0xe6da6d66,0x6d1cab9e,0x94f246e8,0x5bfd81fa,
+ 0x5b0996b4,0x78f01882 },
+ { 0x3a25787f,0xb7eefde4,0x1dccac9b,0x8016f80d,0xb35bfc36,0x0cea4877,
+ 0x7e94747a,0x43a773b8 } },
+ /* 20 */
+ { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a,
+ 0xc3b266b1,0xbb6de651 },
+ { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1,
+ 0x5d18b99b,0x60b4619a } },
+ /* 21 */
+ { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014,
+ 0xaeebffcd,0x9d0f27b2 },
+ { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0,
+ 0x356ec48d,0x244a566d } },
+ /* 22 */
+ { { 0xeacf1f96,0x6db0394a,0x024c271c,0x9f2122a9,0x82cbd3b9,0x2626ac1b,
+ 0x3581ef69,0x45e58c87 },
+ { 0xa38f9dbc,0xd3ff479d,0xe888a040,0xa8aaf146,0x46e0bed7,0x945adfb2,
+ 0xc1e4b7a4,0xc040e21c } },
+ /* 23 */
+ { { 0x6f8117b6,0x847af000,0x73a35433,0x651969ff,0x1d9475eb,0x482b3576,
+ 0x682c6ec7,0x1cdf5c97 },
+ { 0x11f04839,0x7db775b4,0x48de1698,0x7dbeacf4,0xb70b3219,0xb2921dd1,
+ 0xa92dff3d,0x046755f8 } },
+ /* 24 */
+ { { 0xbce8ffcd,0xcc8ac5d2,0x2fe61a82,0x0d53c48b,0x7202d6c7,0xf6f16172,
+ 0x3b83a5f3,0x046e5e11 },
+ { 0xd8007f01,0xe7b8ff64,0x5af43183,0x7fb1ef12,0x35e1a03c,0x045c5ea6,
+ 0x303d005b,0x6e0106c3 } },
+ /* 25 */
+ { { 0x88dd73b1,0x48c73584,0x995ed0d9,0x7670708f,0xc56a2ab7,0x38385ea8,
+ 0xe901cf1f,0x442594ed },
+ { 0x12d4b65b,0xf8faa2c9,0x96c90c37,0x94c2343b,0x5e978d1f,0xd326e4a1,
+ 0x4c2ee68e,0xa796fa51 } },
+ /* 26 */
+ { { 0x823addd7,0x359fb604,0xe56693b3,0x9e2a6183,0x3cbf3c80,0xf885b78e,
+ 0xc69766e9,0xe4ad2da9 },
+ { 0x8e048a61,0x357f7f42,0xc092d9a0,0x082d198c,0xc03ed8ef,0xfc3a1af4,
+ 0xc37b5143,0xc5e94046 } },
+ /* 27 */
+ { { 0x2be75f9e,0x476a538c,0xcb123a78,0x6fd1a9e8,0xb109c04b,0xd85e4df0,
+ 0xdb464747,0x63283daf },
+ { 0xbaf2df15,0xce728cf7,0x0ad9a7f4,0xe592c455,0xe834bcc3,0xfab226ad,
+ 0x1981a938,0x68bd19ab } },
+ /* 28 */
+ { { 0x1887d659,0xc08ead51,0xb359305a,0x3374d5f4,0xcfe74fe3,0x96986981,
+ 0x3c6fdfd6,0x495292f5 },
+ { 0x1acec896,0x4a878c9e,0xec5b4484,0xd964b210,0x664d60a7,0x6696f7e2,
+ 0x26036837,0x0ec7530d } },
+ /* 29 */
+ { { 0xad2687bb,0x2da13a05,0xf32e21fa,0xa1f83b6a,0x1dd4607b,0x390f5ef5,
+ 0x64863f0b,0x0f6207a6 },
+ { 0x0f138233,0xbd67e3bb,0x272aa718,0xdd66b96c,0x26ec88ae,0x8ed00407,
+ 0x08ed6dcf,0xff0db072 } },
+ /* 30 */
+ { { 0x4c95d553,0x749fa101,0x5d680a8a,0xa44052fd,0xff3b566f,0x183b4317,
+ 0x88740ea3,0x313b513c },
+ { 0x08d11549,0xb402e2ac,0xb4dee21c,0x071ee10b,0x47f2320e,0x26b987dd,
+ 0x86f19f81,0x2d3abcf9 } },
+ /* 31 */
+ { { 0x815581a2,0x4c288501,0x632211af,0x9a0a6d56,0x0cab2e99,0x19ba7a0f,
+ 0xded98cdf,0xc036fa10 },
+ { 0xc1fbd009,0x29ae08ba,0x06d15816,0x0b68b190,0x9b9e0d8f,0xc2eb3277,
+ 0xb6d40194,0xa6b2a2c4 } },
+ /* 32 */
+ { { 0x6d3549cf,0xd433e50f,0xfacd665e,0x6f33696f,0xce11fcb4,0x695bfdac,
+ 0xaf7c9860,0x810ee252 },
+ { 0x7159bb2c,0x65450fe1,0x758b357b,0xf7dfbebe,0xd69fea72,0x2b057e74,
+ 0x92731745,0xd485717a } },
+ /* 33 */
+ { { 0xf0cb5a98,0x11741a8a,0x1f3110bf,0xd3da8f93,0xab382adf,0x1994e2cb,
+ 0x2f9a604e,0x6a6045a7 },
+ { 0xa2b2411d,0x170c0d3f,0x510e96e0,0xbe0eb83e,0x8865b3cc,0x3bcc9f73,
+ 0xf9e15790,0xd3e45cfa } },
+ /* 34 */
+ { { 0xe83f7669,0xce1f69bb,0x72877d6b,0x09f8ae82,0x3244278d,0x9548ae54,
+ 0xe3c2c19c,0x207755de },
+ { 0x6fef1945,0x87bd61d9,0xb12d28c3,0x18813cef,0x72df64aa,0x9fbcd1d6,
+ 0x7154b00d,0x48dc5ee5 } },
+ /* 35 */
+ { { 0xf7e5a199,0x123790bf,0x989ccbb7,0xe0efb8cf,0x0a519c79,0xc27a2bfe,
+ 0xdff6f445,0xf2fb0aed },
+ { 0xf0b5025f,0x41c09575,0x40fa9f22,0x550543d7,0x380bfbd0,0x8fa3c8ad,
+ 0xdb28d525,0xa13e9015 } },
+ /* 36 */
+ { { 0xa2b65cbc,0xf9f7a350,0x2a464226,0x0b04b972,0xe23f07a1,0x265ce241,
+ 0x1497526f,0x2bf0d6b0 },
+ { 0x4b216fb7,0xd3d4dd3f,0xfbdda26a,0xf7d7b867,0x6708505c,0xaeb7b83f,
+ 0x162fe89f,0x42a94a5a } },
+ /* 37 */
+ { { 0xeaadf191,0x5846ad0b,0x25a268d7,0x0f8a4890,0x494dc1f6,0xe8603050,
+ 0xc65ede3d,0x2c2dd969 },
+ { 0x93849c17,0x6d02171d,0x1da250dd,0x460488ba,0x3c3a5485,0x4810c706,
+ 0x42c56dbc,0xf437fa1f } },
+ /* 38 */
+ { { 0x4a0f7dab,0x6aa0d714,0x1776e9ac,0x0f049793,0xf5f39786,0x52c0a050,
+ 0x54707aa8,0xaaf45b33 },
+ { 0xc18d364a,0x85e37c33,0x3e497165,0xd40b9b06,0x15ec5444,0xf4171681,
+ 0xf4f272bc,0xcdf6310d } },
+ /* 39 */
+ { { 0x8ea8b7ef,0x7473c623,0x85bc2287,0x08e93518,0x2bda8e34,0x41956772,
+ 0xda9e2ff2,0xf0d008ba },
+ { 0x2414d3b1,0x2912671d,0xb019ea76,0xb3754985,0x453bcbdb,0x5c61b96d,
+ 0xca887b8b,0x5bd5c2f5 } },
+ /* 40 */
+ { { 0xf49a3154,0xef0f469e,0x6e2b2e9a,0x3e85a595,0xaa924a9c,0x45aaec1e,
+ 0xa09e4719,0xaa12dfc8 },
+ { 0x4df69f1d,0x26f27227,0xa2ff5e73,0xe0e4c82c,0xb7a9dd44,0xb9d8ce73,
+ 0xe48ca901,0x6c036e73 } },
+ /* 41 */
+ { { 0x0f6e3138,0x5cfae12a,0x25ad345a,0x6966ef00,0x45672bc5,0x8993c64b,
+ 0x96afbe24,0x292ff658 },
+ { 0x5e213402,0xd5250d44,0x4392c9fe,0xf6580e27,0xda1c72e8,0x097b397f,
+ 0x311b7276,0x644e0c90 } },
+ /* 42 */
+ { { 0xa47153f0,0xe1e421e1,0x920418c9,0xb86c3b79,0x705d7672,0x93bdce87,
+ 0xcab79a77,0xf25ae793 },
+ { 0x6d869d0c,0x1f3194a3,0x4986c264,0x9d55c882,0x096e945e,0x49fb5ea3,
+ 0x13db0a3e,0x39b8e653 } },
+ /* 43 */
+ { { 0xb6fd2e59,0x37754200,0x9255c98f,0x35e2c066,0x0e2a5739,0xd9dab21a,
+ 0x0f19db06,0x39122f2f },
+ { 0x03cad53c,0xcfbce1e0,0xe65c17e3,0x225b2c0f,0x9aa13877,0x72baf1d2,
+ 0xce80ff8d,0x8de80af8 } },
+ /* 44 */
+ { { 0x207bbb76,0xafbea8d9,0x21782758,0x921c7e7c,0x1c0436b1,0xdfa2b74b,
+ 0x2e368c04,0x87194906 },
+ { 0xa3993df5,0xb5f928bb,0xf3b3d26a,0x639d75b5,0x85b55050,0x011aa78a,
+ 0x5b74fde1,0xfc315e6a } },
+ /* 45 */
+ { { 0xe8d6ecfa,0x561fd41a,0x1aec7f86,0x5f8c44f6,0x4924741d,0x98452a7b,
+ 0xee389088,0xe6d4a7ad },
+ { 0x4593c75d,0x60552ed1,0xdd271162,0x70a70da4,0x7ba2c7db,0xd2aede93,
+ 0x9be2ae57,0x35dfaf9a } },
+ /* 46 */
+ { { 0xaa736636,0x6b956fcd,0xae2cab7e,0x09f51d97,0x0f349966,0xfb10bf41,
+ 0x1c830d2b,0x1da5c7d7 },
+ { 0x3cce6825,0x5c41e483,0xf9573c3b,0x15ad118f,0xf23036b8,0xa28552c7,
+ 0xdbf4b9d6,0x7077c0fd } },
+ /* 47 */
+ { { 0x46b9661c,0xbf63ff8d,0x0d2cfd71,0xa1dfd36b,0xa847f8f7,0x0373e140,
+ 0xe50efe44,0x53a8632e },
+ { 0x696d8051,0x0976ff68,0xc74f468a,0xdaec0c95,0x5e4e26bd,0x62994dc3,
+ 0x34e1fcc1,0x028ca76d } },
+ /* 48 */
+ { { 0xfc9877ee,0xd11d47dc,0x801d0002,0xc8b36210,0x54c260b6,0xd002c117,
+ 0x6962f046,0x04c17cd8 },
+ { 0xb0daddf5,0x6d9bd094,0x24ce55c0,0xbea23575,0x72da03b5,0x663356e6,
+ 0xfed97474,0xf7ba4de9 } },
+ /* 49 */
+ { { 0xebe1263f,0xd0dbfa34,0x71ae7ce6,0x55763735,0x82a6f523,0xd2440553,
+ 0x52131c41,0xe31f9600 },
+ { 0xea6b6ec6,0xd1bb9216,0x73c2fc44,0x37a1d12e,0x89d0a294,0xc10e7eac,
+ 0xce34d47b,0xaa3a6259 } },
+ /* 50 */
+ { { 0x36f3dcd3,0xfbcf9df5,0xd2bf7360,0x6ceded50,0xdf504f5b,0x491710fa,
+ 0x7e79daee,0x2398dd62 },
+ { 0x6d09569e,0xcf4705a3,0x5149f769,0xea0619bb,0x35f6034c,0xff9c0377,
+ 0x1c046210,0x5717f5b2 } },
+ /* 51 */
+ { { 0x21dd895e,0x9fe229c9,0x40c28451,0x8e518500,0x1d637ecd,0xfa13d239,
+ 0x0e3c28de,0x660a2c56 },
+ { 0xd67fcbd0,0x9cca88ae,0x0ea9f096,0xc8472478,0x72e92b4d,0x32b2f481,
+ 0x4f522453,0x624ee54c } },
+ /* 52 */
+ { { 0xd897eccc,0x09549ce4,0x3f9880aa,0x4d49d1d9,0x043a7c20,0x723c2423,
+ 0x92bdfbc0,0x4f392afb },
+ { 0x7de44fd9,0x6969f8fa,0x57b32156,0xb66cfbe4,0x368ebc3c,0xdb2fa803,
+ 0xccdb399c,0x8a3e7977 } },
+ /* 53 */
+ { { 0x06c4b125,0xdde1881f,0xf6e3ca8c,0xae34e300,0x5c7a13e9,0xef6999de,
+ 0x70c24404,0x3888d023 },
+ { 0x44f91081,0x76280356,0x5f015504,0x3d9fcf61,0x632cd36e,0x1827edc8,
+ 0x18102336,0xa5e62e47 } },
+ /* 54 */
+ { { 0x2facd6c8,0x1a825ee3,0x54bcbc66,0x699c6354,0x98df9931,0x0ce3edf7,
+ 0x466a5adc,0x2c4768e6 },
+ { 0x90a64bc9,0xb346ff8c,0xe4779f5c,0x630a6020,0xbc05e884,0xd949d064,
+ 0xf9e652a0,0x7b5e6441 } },
+ /* 55 */
+ { { 0x1d28444a,0x2169422c,0xbe136a39,0xe996c5d8,0xfb0c7fce,0x2387afe5,
+ 0x0c8d744a,0xb8af73cb },
+ { 0x338b86fd,0x5fde83aa,0xa58a5cff,0xfee3f158,0x20ac9433,0xc9ee8f6f,
+ 0x7f3f0895,0xa036395f } },
+ /* 56 */
+ { { 0xa10f7770,0x8c73c6bb,0xa12a0e24,0xa6f16d81,0x51bc2b9f,0x100df682,
+ 0x875fb533,0x4be36b01 },
+ { 0x9fb56dbb,0x9226086e,0x07e7a4f8,0x306fef8b,0x66d52f20,0xeeaccc05,
+ 0x1bdc00c0,0x8cbc9a87 } },
+ /* 57 */
+ { { 0xc0dac4ab,0xe131895c,0x712ff112,0xa874a440,0x6a1cee57,0x6332ae7c,
+ 0x0c0835f8,0x44e7553e },
+ { 0x7734002d,0x6d503fff,0x0b34425c,0x9d35cb8b,0x0e8738b5,0x95f70276,
+ 0x5eb8fc18,0x470a683a } },
+ /* 58 */
+ { { 0x90513482,0x81b761dc,0x01e9276a,0x0287202a,0x0ce73083,0xcda441ee,
+ 0xc63dc6ef,0x16410690 },
+ { 0x6d06a2ed,0xf5034a06,0x189b100b,0xdd4d7745,0xab8218c9,0xd914ae72,
+ 0x7abcbb4f,0xd73479fd } },
+ /* 59 */
+ { { 0x5ad4c6e5,0x7edefb16,0x5b06d04d,0x262cf08f,0x8575cb14,0x12ed5bb1,
+ 0x0771666b,0x816469e3 },
+ { 0x561e291e,0xd7ab9d79,0xc1de1661,0xeb9daf22,0x135e0513,0xf49827eb,
+ 0xf0dd3f9c,0x0a36dd23 } },
+ /* 60 */
+ { { 0x41d5533c,0x098d32c7,0x8684628f,0x7c5f5a9e,0xe349bd11,0x39a228ad,
+ 0xfdbab118,0xe331dfd6 },
+ { 0x6bcc6ed8,0x5100ab68,0xef7a260e,0x7160c3bd,0xbce850d7,0x9063d9a7,
+ 0x492e3389,0xd3b4782a } },
+ /* 61 */
+ { { 0xf3821f90,0xa149b6e8,0x66eb7aad,0x92edd9ed,0x1a013116,0x0bb66953,
+ 0x4c86a5bd,0x7281275a },
+ { 0xd3ff47e5,0x503858f7,0x61016441,0x5e1616bc,0x7dfd9bb1,0x62b0f11a,
+ 0xce145059,0x2c062e7e } },
+ /* 62 */
+ { { 0x0159ac2e,0xa76f996f,0xcbdb2713,0x281e7736,0x08e46047,0x2ad6d288,
+ 0x2c4e7ef1,0x282a35f9 },
+ { 0xc0ce5cd2,0x9c354b1e,0x1379c229,0xcf99efc9,0x3e82c11e,0x992caf38,
+ 0x554d2abd,0xc71cd513 } },
+ /* 63 */
+ { { 0x09b578f4,0x4885de9c,0xe3affa7a,0x1884e258,0x59182f1f,0x8f76b1b7,
+ 0xcf47f3a3,0xc50f6740 },
+ { 0x374b68ea,0xa9c4adf3,0x69965fe2,0xa406f323,0x85a53050,0x2f86a222,
+ 0x212958dc,0xb9ecb3a7 } },
+ /* 64 */
+ { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,
+ 0xcd42ab1b,0x803f3e02 },
+ { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,
+ 0x5067adc1,0xc097440e } },
+ /* 65 */
+ { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459,
+ 0x915f1f30,0xf1af32d5 },
+ { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418,
+ 0xe2d41c8b,0x23d0f130 } },
+ /* 66 */
+ { { 0xf41500d9,0x857ab6ed,0xfcbeada8,0x0d890ae5,0x89725951,0x52fe8648,
+ 0xc0a3fadd,0xb0288dd6 },
+ { 0x650bcb08,0x85320f30,0x695d6e16,0x71af6313,0xb989aa76,0x31f520a7,
+ 0xf408c8d2,0xffd3724f } },
+ /* 67 */
+ { { 0xb458e6cb,0x53968e64,0x317a5d28,0x992dad20,0x7aa75f56,0x3814ae0b,
+ 0xd78c26df,0xf5590f4a },
+ { 0xcf0ba55a,0x0fc24bd3,0x0c778bae,0x0fc4724a,0x683b674a,0x1ce9864f,
+ 0xf6f74a20,0x18d6da54 } },
+ /* 68 */
+ { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926,
+ 0x7990216a,0x50bbb4d9 },
+ { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b,
+ 0x01fe49c3,0x2b100118 } },
+ /* 69 */
+ { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa,
+ 0x83fbae0c,0xdd558999 },
+ { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf,
+ 0x149d6041,0xe6e4c551 } },
+ /* 70 */
+ { { 0x1e9af288,0x55f655bb,0xf7ada931,0x647e1a64,0xcb2820e5,0x43697e4b,
+ 0x07ed56ff,0x51e00db1 },
+ { 0x771c327e,0x43d169b8,0x4a96c2ad,0x29cdb20b,0x3deb4779,0xc07d51f5,
+ 0x49829177,0xe22f4241 } },
+ /* 71 */
+ { { 0x635f1abb,0xcd45e8f4,0x68538874,0x7edc0cb5,0xb5a8034d,0xc9472c1f,
+ 0x52dc48c9,0xf709373d },
+ { 0xa8af30d6,0x401966bb,0xf137b69c,0x95bf5f4a,0x9361c47e,0x3966162a,
+ 0xe7275b11,0xbd52d288 } },
+ /* 72 */
+ { { 0x9c5fa877,0xab155c7a,0x7d3a3d48,0x17dad672,0x73d189d8,0x43f43f9e,
+ 0xc8aa77a6,0xa0d0f8e4 },
+ { 0xcc94f92d,0x0bbeafd8,0x0c4ddb3a,0xd818c8be,0xb82eba14,0x22cc65f8,
+ 0x946d6a00,0xa56c78c7 } },
+ /* 73 */
+ { { 0x0dd09529,0x2962391b,0x3daddfcf,0x803e0ea6,0x5b5bf481,0x2c77351f,
+ 0x731a367a,0xd8befdf8 },
+ { 0xfc0157f4,0xab919d42,0xfec8e650,0xf51caed7,0x02d48b0a,0xcdf9cb40,
+ 0xce9f6478,0x854a68a5 } },
+ /* 74 */
+ { { 0x63506ea5,0xdc35f67b,0xa4fe0d66,0x9286c489,0xfe95cd4d,0x3f101d3b,
+ 0x98846a95,0x5cacea0b },
+ { 0x9ceac44d,0xa90df60c,0x354d1c3a,0x3db29af4,0xad5dbabe,0x08dd3de8,
+ 0x35e4efa9,0xe4982d12 } },
+ /* 75 */
+ { { 0xc34cd55e,0x23104a22,0x2680d132,0x58695bb3,0x1fa1d943,0xfb345afa,
+ 0x16b20499,0x8046b7f6 },
+ { 0x38e7d098,0xb533581e,0xf46f0b70,0xd7f61e8d,0x44cb78c4,0x30dea9ea,
+ 0x9082af55,0xeb17ca7b } },
+ /* 76 */
+ { { 0x76a145b9,0x1751b598,0xc1bc71ec,0xa5cf6b0f,0x392715bb,0xd3e03565,
+ 0xfab5e131,0x097b00ba },
+ { 0x565f69e1,0xaa66c8e9,0xb5be5199,0x77e8f75a,0xda4fd984,0x6033ba11,
+ 0xafdbcc9e,0xf95c747b } },
+ /* 77 */
+ { { 0xbebae45e,0x558f01d3,0xc4bc6955,0xa8ebe9f0,0xdbc64fc6,0xaeb705b1,
+ 0x566ed837,0x3512601e },
+ { 0xfa1161cd,0x9336f1e1,0x4c65ef87,0x328ab8d5,0x724f21e5,0x4757eee2,
+ 0x6068ab6b,0x0ef97123 } },
+ /* 78 */
+ { { 0x54ca4226,0x02598cf7,0xf8642c8e,0x5eede138,0x468e1790,0x48963f74,
+ 0x3b4fbc95,0xfc16d933 },
+ { 0xe7c800ca,0xbe96fb31,0x2678adaa,0x13806331,0x6ff3e8b5,0x3d624497,
+ 0xb95d7a17,0x14ca4af1 } },
+ /* 79 */
+ { { 0xbd2f81d5,0x7a4771ba,0x01f7d196,0x1a5f9d69,0xcad9c907,0xd898bef7,
+ 0xf59c231d,0x4057b063 },
+ { 0x89c05c0a,0xbffd82fe,0x1dc0df85,0xe4911c6f,0xa35a16db,0x3befccae,
+ 0xf1330b13,0x1c3b5d64 } },
+ /* 80 */
+ { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07,
+ 0xdb7e63af,0xfad27148 },
+ { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875,
+ 0x9f0e1a84,0x77387de3 } },
+ /* 81 */
+ { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408,
+ 0xbef0c47e,0xb37b85c0 },
+ { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa,
+ 0xf9f628d5,0x9c135ac8 } },
+ /* 82 */
+ { { 0x84e35743,0x32aa3202,0x85a3cdef,0x320d6ab1,0x1df19819,0xb821b176,
+ 0xc433851f,0x5721361f },
+ { 0x71fc9168,0x1f0db36a,0x5e5c403c,0x5f98ba73,0x37bcd8f5,0xf64ca87e,
+ 0xe6bb11bd,0xdcbac3c9 } },
+ /* 83 */
+ { { 0x4518cbe2,0xf01d9968,0x9c9eb04e,0xd242fc18,0xe47feebf,0x727663c7,
+ 0x2d626862,0xb8c1c89e },
+ { 0xc8e1d569,0x51a58bdd,0xb7d88cd0,0x563809c8,0xf11f31eb,0x26c27fd9,
+ 0x2f9422d4,0x5d23bbda } },
+ /* 84 */
+ { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403,
+ 0x91ece900,0xc109f9cb },
+ { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d,
+ 0x2eee1ee1,0x9bc3344f } },
+ /* 85 */
+ { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665,
+ 0x5f1a4cc1,0x29591d52 },
+ { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496,
+ 0x18ef332c,0x6376551f } },
+ /* 86 */
+ { { 0x562976cc,0xbda5f14e,0x0ef12c38,0x22bca3e6,0x6cca9852,0xbbfa3064,
+ 0x08e2987a,0xbdb79dc8 },
+ { 0xcb06a772,0xfd2cb5c9,0xfe536dce,0x38f475aa,0x7c2b5db8,0xc2a3e022,
+ 0xadd3c14a,0x8ee86001 } },
+ /* 87 */
+ { { 0xa4ade873,0xcbe96981,0xc4fba48c,0x7ee9aa4d,0x5a054ba5,0x2cee2899,
+ 0x6f77aa4b,0x92e51d7a },
+ { 0x7190a34d,0x948bafa8,0xf6bd1ed1,0xd698f75b,0x0caf1144,0xd00ee6e3,
+ 0x0a56aaaa,0x5182f86f } },
+ /* 88 */
+ { { 0x7a4cc99c,0xfba6212c,0x3e6d9ca1,0xff609b68,0x5ac98c5a,0x5dbb27cb,
+ 0x4073a6f2,0x91dcab5d },
+ { 0x5f575a70,0x01b6cc3d,0x6f8d87fa,0x0cb36139,0x89981736,0x165d4e8c,
+ 0x97974f2b,0x17a0cedb } },
+ /* 89 */
+ { { 0x076c8d3a,0x38861e2a,0x210f924b,0x701aad39,0x13a835d9,0x94d0eae4,
+ 0x7f4cdf41,0x2e8ce36c },
+ { 0x037a862b,0x91273dab,0x60e4c8fa,0x01ba9bb7,0x33baf2dd,0xf9645388,
+ 0x34f668f3,0xf4ccc6cb } },
+ /* 90 */
+ { { 0xf1f79687,0x44ef525c,0x92efa815,0x7c595495,0xa5c78d29,0xe1231741,
+ 0x9a0df3c9,0xac0db488 },
+ { 0xdf01747f,0x86bfc711,0xef17df13,0x592b9358,0x5ccb6bb5,0xe5880e4f,
+ 0x94c974a2,0x95a64a61 } },
+ /* 91 */
+ { { 0xc15a4c93,0x72c1efda,0x82585141,0x40269b73,0x16cb0bad,0x6a8dfb1c,
+ 0x29210677,0x231e54ba },
+ { 0x8ae6d2dc,0xa70df917,0x39112918,0x4d6aa63f,0x5e5b7223,0xf627726b,
+ 0xd8a731e1,0xab0be032 } },
+ /* 92 */
+ { { 0x8d131f2d,0x097ad0e9,0x3b04f101,0x637f09e3,0xd5e9a748,0x1ac86196,
+ 0x2cf6a679,0xf1bcc880 },
+ { 0xe8daacb4,0x25c69140,0x60f65009,0x3c4e4055,0x477937a6,0x591cc8fc,
+ 0x5aebb271,0x85169469 } },
+ /* 93 */
+ { { 0xf1dcf593,0xde35c143,0xb018be3b,0x78202b29,0x9bdd9d3d,0xe9cdadc2,
+ 0xdaad55d8,0x8f67d9d2 },
+ { 0x7481ea5f,0x84111656,0xe34c590c,0xe7d2dde9,0x05053fa8,0xffdd43f4,
+ 0xc0728b5d,0xf84572b9 } },
+ /* 94 */
+ { { 0x97af71c9,0x5e1a7a71,0x7a736565,0xa1449444,0x0e1d5063,0xa1b4ae07,
+ 0x616b2c19,0xedee2710 },
+ { 0x11734121,0xb2f034f5,0x4a25e9f0,0x1cac6e55,0xa40c2ecf,0x8dc148f3,
+ 0x44ebd7f4,0x9fd27e9b } },
+ /* 95 */
+ { { 0xf6e2cb16,0x3cc7658a,0xfe5919b6,0xe3eb7d2c,0x168d5583,0x5a8c5816,
+ 0x958ff387,0xa40c2fb6 },
+ { 0xfedcc158,0x8c9ec560,0x55f23056,0x7ad804c6,0x9a307e12,0xd9396704,
+ 0x7dc6decf,0x99bc9bb8 } },
+ /* 96 */
+ { { 0x927dafc6,0x84a9521d,0x5c09cd19,0x52c1fb69,0xf9366dde,0x9d9581a0,
+ 0xa16d7e64,0x9abe210b },
+ { 0x48915220,0x480af84a,0x4dd816c6,0xfa73176a,0x1681ca5a,0xc7d53987,
+ 0x87f344b0,0x7881c257 } },
+ /* 97 */
+ { { 0xe0bcf3ff,0x93399b51,0x127f74f6,0x0d02cbc5,0xdd01d968,0x8fb465a2,
+ 0xa30e8940,0x15e6e319 },
+ { 0x3e0e05f4,0x646d6e0d,0x43588404,0xfad7bddc,0xc4f850d3,0xbe61c7d1,
+ 0x191172ce,0x0e55facf } },
+ /* 98 */
+ { { 0xf8787564,0x7e9d9806,0x31e85ce6,0x1a331721,0xb819e8d6,0x6b0158ca,
+ 0x6fe96577,0xd73d0976 },
+ { 0x1eb7206e,0x42483425,0xc618bb42,0xa519290f,0x5e30a520,0x5dcbb859,
+ 0x8f15a50b,0x9250a374 } },
+ /* 99 */
+ { { 0xbe577410,0xcaff08f8,0x5077a8c6,0xfd408a03,0xec0a63a4,0xf1f63289,
+ 0xc1cc8c0b,0x77414082 },
+ { 0xeb0991cd,0x05a40fa6,0x49fdc296,0xc1ca0866,0xb324fd40,0x3a68a3c7,
+ 0x12eb20b9,0x8cb04f4d } },
+ /* 100 */
+ { { 0x6906171c,0xb1c2d055,0xb0240c3f,0x9073e9cd,0xd8906841,0xdb8e6b4f,
+ 0x47123b51,0xe4e429ef },
+ { 0x38ec36f4,0x0b8dd53c,0xff4b6a27,0xf9d2dc01,0x879a9a48,0x5d066e07,
+ 0x3c6e6552,0x37bca2ff } },
+ /* 101 */
+ { { 0xdf562470,0x4cd2e3c7,0xc0964ac9,0x44f272a2,0x80c793be,0x7c6d5df9,
+ 0x3002b22a,0x59913edc },
+ { 0x5750592a,0x7a139a83,0xe783de02,0x99e01d80,0xea05d64f,0xcf8c0375,
+ 0xb013e226,0x43786e4a } },
+ /* 102 */
+ { { 0x9e56b5a6,0xff32b0ed,0xd9fc68f9,0x0750d9a6,0x597846a7,0xec15e845,
+ 0xb7e79e7a,0x8638ca98 },
+ { 0x0afc24b2,0x2f5ae096,0x4dace8f2,0x05398eaf,0xaecba78f,0x3b765dd0,
+ 0x7b3aa6f0,0x1ecdd36a } },
+ /* 103 */
+ { { 0x6c5ff2f3,0x5d3acd62,0x2873a978,0xa2d516c0,0xd2110d54,0xad94c9fa,
+ 0xd459f32d,0xd85d0f85 },
+ { 0x10b11da3,0x9f700b8d,0xa78318c4,0xd2c22c30,0x9208decd,0x556988f4,
+ 0xb4ed3c62,0xa04f19c3 } },
+ /* 104 */
+ { { 0xed7f93bd,0x087924c8,0x392f51f6,0xcb64ac5d,0x821b71af,0x7cae330a,
+ 0x5c0950b0,0x92b2eeea },
+ { 0x85b6e235,0x85ac4c94,0x2936c0f0,0xab2ca4a9,0xe0508891,0x80faa6b3,
+ 0x5834276c,0x1ee78221 } },
+ /* 105 */
+ { { 0xe63e79f7,0xa60a2e00,0xf399d906,0xf590e7b2,0x6607c09d,0x9021054a,
+ 0x57a6e150,0xf3f2ced8 },
+ { 0xf10d9b55,0x200510f3,0xd8642648,0x9d2fcfac,0xe8bd0e7c,0xe5631aa7,
+ 0x3da3e210,0x0f56a454 } },
+ /* 106 */
+ { { 0x1043e0df,0x5b21bffa,0x9c007e6d,0x6c74b6cc,0xd4a8517a,0x1a656ec0,
+ 0x1969e263,0xbd8f1741 },
+ { 0xbeb7494a,0x8a9bbb86,0x45f3b838,0x1567d46f,0xa4e5a79a,0xdf7a12a7,
+ 0x30ccfa09,0x2d1a1c35 } },
+ /* 107 */
+ { { 0x506508da,0x192e3813,0xa1d795a7,0x336180c4,0x7a9944b3,0xcddb5949,
+ 0xb91fba46,0xa107a65e },
+ { 0x0f94d639,0xe6d1d1c5,0x8a58b7d7,0x8b4af375,0xbd37ca1c,0x1a7c5584,
+ 0xf87a9af2,0x183d760a } },
+ /* 108 */
+ { { 0x0dde59a4,0x29d69711,0x0e8bef87,0xf1ad8d07,0x4f2ebe78,0x229b4963,
+ 0xc269d754,0x1d44179d },
+ { 0x8390d30e,0xb32dc0cf,0x0de8110c,0x0a3b2753,0x2bc0339a,0x31af1dc5,
+ 0x9606d262,0x771f9cc2 } },
+ /* 109 */
+ { { 0x85040739,0x99993e77,0x8026a939,0x44539db9,0xf5f8fc26,0xcf40f6f2,
+ 0x0362718e,0x64427a31 },
+ { 0x85428aa8,0x4f4f2d87,0xebfb49a8,0x7b7adc3f,0xf23d01ac,0x201b2c6d,
+ 0x6ae90d6d,0x49d9b749 } },
+ /* 110 */
+ { { 0x435d1099,0xcc78d8bc,0x8e8d1a08,0x2adbcd4e,0x2cb68a41,0x02c2e2a0,
+ 0x3f605445,0x9037d81b },
+ { 0x074c7b61,0x7cdbac27,0x57bfd72e,0xfe2031ab,0x596d5352,0x61ccec96,
+ 0x7cc0639c,0x08c3de6a } },
+ /* 111 */
+ { { 0xf6d552ab,0x20fdd020,0x05cd81f1,0x56baff98,0x91351291,0x06fb7c3e,
+ 0x45796b2f,0xc6909442 },
+ { 0x41231bd1,0x17b3ae9c,0x5cc58205,0x1eac6e87,0xf9d6a122,0x208837ab,
+ 0xcafe3ac0,0x3fa3db02 } },
+ /* 112 */
+ { { 0x05058880,0xd75a3e65,0x643943f2,0x7da365ef,0xfab24925,0x4147861c,
+ 0xfdb808ff,0xc5c4bdb0 },
+ { 0xb272b56b,0x73513e34,0x11b9043a,0xc8327e95,0xf8844969,0xfd8ce37d,
+ 0x46c2b6b5,0x2d56db94 } },
+ /* 113 */
+ { { 0xff46ac6b,0x2461782f,0x07a2e425,0xd19f7926,0x09a48de1,0xfafea3c4,
+ 0xe503ba42,0x0f56bd9d },
+ { 0x345cda49,0x137d4ed1,0x816f299d,0x821158fc,0xaeb43402,0xe7c6a54a,
+ 0x1173b5f1,0x4003bb9d } },
+ /* 114 */
+ { { 0xa0803387,0x3b8e8189,0x39cbd404,0xece115f5,0xd2877f21,0x4297208d,
+ 0xa07f2f9e,0x53765522 },
+ { 0xa8a4182d,0xa4980a21,0x3219df79,0xa2bbd07a,0x1a19a2d4,0x674d0a2e,
+ 0x6c5d4549,0x7a056f58 } },
+ /* 115 */
+ { { 0x9d8a2a47,0x646b2558,0xc3df2773,0x5b582948,0xabf0d539,0x51ec000e,
+ 0x7a1a2675,0x77d482f1 },
+ { 0x87853948,0xb8a1bd95,0x6cfbffee,0xa6f817bd,0x80681e47,0xab6ec057,
+ 0x2b38b0e4,0x4115012b } },
+ /* 116 */
+ { { 0x6de28ced,0x3c73f0f4,0x9b13ec47,0x1d5da760,0x6e5c6392,0x61b8ce9e,
+ 0xfbea0946,0xcdf04572 },
+ { 0x6c53c3b0,0x1cb3c58b,0x447b843c,0x97fe3c10,0x2cb9780e,0xfb2b8ae1,
+ 0x97383109,0xee703dda } },
+ /* 117 */
+ { { 0xff57e43a,0x34515140,0xb1b811b8,0xd44660d3,0x8f42b986,0x2b3b5dff,
+ 0xa162ce21,0x2a0ad89d },
+ { 0x6bc277ba,0x64e4a694,0xc141c276,0xc788c954,0xcabf6274,0x141aa64c,
+ 0xac2b4659,0xd62d0b67 } },
+ /* 118 */
+ { { 0x2c054ac4,0x39c5d87b,0xf27df788,0x57005859,0xb18128d6,0xedf7cbf3,
+ 0x991c2426,0xb39a23f2 },
+ { 0xf0b16ae5,0x95284a15,0xa136f51b,0x0c6a05b1,0xf2700783,0x1d63c137,
+ 0xc0674cc5,0x04ed0092 } },
+ /* 119 */
+ { { 0x9ae90393,0x1f4185d1,0x4a3d64e6,0x3047b429,0x9854fc14,0xae0001a6,
+ 0x0177c387,0xa0a91fc1 },
+ { 0xae2c831e,0xff0a3f01,0x2b727e16,0xbb76ae82,0x5a3075b4,0x8f12c8a1,
+ 0x9ed20c41,0x084cf988 } },
+ /* 120 */
+ { { 0xfca6becf,0xd98509de,0x7dffb328,0x2fceae80,0x4778e8b9,0x5d8a15c4,
+ 0x73abf77e,0xd57955b2 },
+ { 0x31b5d4f1,0x210da79e,0x3cfa7a1c,0xaa52f04b,0xdc27c20b,0xd4d12089,
+ 0x02d141f1,0x8e14ea42 } },
+ /* 121 */
+ { { 0xf2897042,0xeed50345,0x43402c4a,0x8d05331f,0xc8bdfb21,0xc8d9c194,
+ 0x2aa4d158,0x597e1a37 },
+ { 0xcf0bd68c,0x0327ec1a,0xab024945,0x6d4be0dc,0xc9fe3e84,0x5b9c8d7a,
+ 0x199b4dea,0xca3f0236 } },
+ /* 122 */
+ { { 0x6170bd20,0x592a10b5,0x6d3f5de7,0x0ea897f1,0x44b2ade2,0xa3363ff1,
+ 0x309c07e4,0xbde7fd7e },
+ { 0xb8f5432c,0x516bb6d2,0xe043444b,0x210dc1cb,0xf8f95b5a,0x3db01e6f,
+ 0x0a7dd198,0xb623ad0e } },
+ /* 123 */
+ { { 0x60c7b65b,0xa75bd675,0x23a4a289,0xab8c5590,0xd7b26795,0xf8220fd0,
+ 0x58ec137b,0xd6aa2e46 },
+ { 0x5138bb85,0x10abc00b,0xd833a95c,0x8c31d121,0x1702a32e,0xb24ff00b,
+ 0x2dcc513a,0x111662e0 } },
+ /* 124 */
+ { { 0xefb42b87,0x78114015,0x1b6c4dff,0xbd9f5d70,0xa7d7c129,0x66ecccd7,
+ 0x94b750f8,0xdb3ee1cb },
+ { 0xf34837cf,0xb26f3db0,0xb9578d4f,0xe7eed18b,0x7c56657d,0x5d2cdf93,
+ 0x52206a59,0x886a6442 } },
+ /* 125 */
+ { { 0x65b569ea,0x3c234cfb,0xf72119c1,0x20011141,0xa15a619e,0x8badc85d,
+ 0x018a17bc,0xa70cf4eb },
+ { 0x8c4a6a65,0x224f97ae,0x0134378f,0x36e5cf27,0x4f7e0960,0xbe3a609e,
+ 0xd1747b77,0xaa4772ab } },
+ /* 126 */
+ { { 0x7aa60cc0,0x67676131,0x0368115f,0xc7916361,0xbbc1bb5a,0xded98bb4,
+ 0x30faf974,0x611a6ddc },
+ { 0xc15ee47a,0x30e78cbc,0x4e0d96a5,0x2e896282,0x3dd9ed88,0x36f35adf,
+ 0x16429c88,0x5cfffaf8 } },
+ /* 127 */
+ { { 0x9b7a99cd,0xc0d54cff,0x843c45a1,0x7bf3b99d,0x62c739e1,0x038a908f,
+ 0x7dc1994c,0x6e5a6b23 },
+ { 0x0ba5db77,0xef8b454e,0xacf60d63,0xb7b8807f,0x76608378,0xe591c0c6,
+ 0x242dabcc,0x481a238d } },
+ /* 128 */
+ { { 0x35d0b34a,0xe3417bc0,0x8327c0a7,0x440b386b,0xac0362d1,0x8fb7262d,
+ 0xe0cdf943,0x2c41114c },
+ { 0xad95a0b1,0x2ba5cef1,0x67d54362,0xc09b37a8,0x01e486c9,0x26d6cdd2,
+ 0x42ff9297,0x20477abf } },
+ /* 129 */
+ { { 0x18d65dbf,0x2f75173c,0x339edad8,0x77bf940e,0xdcf1001c,0x7022d26b,
+ 0xc77396b6,0xac66409a },
+ { 0xc6261cc3,0x8b0bb36f,0x190e7e90,0x213f7bc9,0xa45e6c10,0x6541ceba,
+ 0xcc122f85,0xce8e6975 } },
+ /* 130 */
+ { { 0xbc0a67d2,0x0f121b41,0x444d248a,0x62d4760a,0x659b4737,0x0e044f1d,
+ 0x250bb4a8,0x08fde365 },
+ { 0x848bf287,0xaceec3da,0xd3369d6e,0xc2a62182,0x92449482,0x3582dfdc,
+ 0x565d6cd7,0x2f7e2fd2 } },
+ /* 131 */
+ { { 0xc3770fa7,0xae4b92db,0x379043f9,0x095e8d5c,0x17761171,0x54f34e9d,
+ 0x907702ae,0xc65be92e },
+ { 0xf6fd0a40,0x2758a303,0xbcce784b,0xe7d822e3,0x4f9767bf,0x7ae4f585,
+ 0xd1193b3a,0x4bff8e47 } },
+ /* 132 */
+ { { 0x00ff1480,0xcd41d21f,0x0754db16,0x2ab8fb7d,0xbbe0f3ea,0xac81d2ef,
+ 0x5772967d,0x3e4e4ae6 },
+ { 0x3c5303e6,0x7e18f36d,0x92262397,0x3bd9994b,0x1324c3c0,0x9ed70e26,
+ 0x58ec6028,0x5388aefd } },
+ /* 133 */
+ { { 0x5e5d7713,0xad1317eb,0x75de49da,0x09b985ee,0xc74fb261,0x32f5bc4f,
+ 0x4f75be0e,0x5cf908d1 },
+ { 0x8e657b12,0x76043510,0xb96ed9e6,0xbfd421a5,0x8970ccc2,0x0e29f51f,
+ 0x60f00ce2,0xa698ba40 } },
+ /* 134 */
+ { { 0xef748fec,0x73db1686,0x7e9d2cf9,0xe6e755a2,0xce265eff,0x630b6544,
+ 0x7aebad8d,0xb142ef8a },
+ { 0x17d5770a,0xad31af9f,0x2cb3412f,0x66af3b67,0xdf3359de,0x6bd60d1b,
+ 0x58515075,0xd1896a96 } },
+ /* 135 */
+ { { 0x33c41c08,0xec5957ab,0x5468e2e1,0x87de94ac,0xac472f6c,0x18816b73,
+ 0x7981da39,0x267b0e0b },
+ { 0x8e62b988,0x6e554e5d,0x116d21e7,0xd8ddc755,0x3d2a6f99,0x4610faf0,
+ 0xa1119393,0xb54e287a } },
+ /* 136 */
+ { { 0x178a876b,0x0a0122b5,0x085104b4,0x51ff96ff,0x14f29f76,0x050b31ab,
+ 0x5f87d4e6,0x84abb28b },
+ { 0x8270790a,0xd5ed439f,0x85e3f46b,0x2d6cb59d,0x6c1e2212,0x75f55c1b,
+ 0x17655640,0xe5436f67 } },
+ /* 137 */
+ { { 0x2286e8d5,0x53f9025e,0x864453be,0x353c95b4,0xe408e3a0,0xd832f5bd,
+ 0x5b9ce99e,0x0404f68b },
+ { 0xa781e8e5,0xcad33bde,0x163c2f5b,0x3cdf5018,0x0119caa3,0x57576960,
+ 0x0ac1c701,0x3a4263df } },
+ /* 138 */
+ { { 0x9aeb596d,0xc2965ecc,0x023c92b4,0x01ea03e7,0x2e013961,0x4704b4b6,
+ 0x905ea367,0x0ca8fd3f },
+ { 0x551b2b61,0x92523a42,0x390fcd06,0x1eb7a89c,0x0392a63e,0xe7f1d2be,
+ 0x4ddb0c33,0x96dca264 } },
+ /* 139 */
+ { { 0x387510af,0x203bb43a,0xa9a36a01,0x846feaa8,0x2f950378,0xd23a5770,
+ 0x3aad59dc,0x4363e212 },
+ { 0x40246a47,0xca43a1c7,0xe55dd24d,0xb362b8d2,0x5d8faf96,0xf9b08604,
+ 0xd8bb98c4,0x840e115c } },
+ /* 140 */
+ { { 0x1023e8a7,0xf12205e2,0xd8dc7a0b,0xc808a8cd,0x163a5ddf,0xe292a272,
+ 0x30ded6d4,0x5e0d6abd },
+ { 0x7cfc0f64,0x07a721c2,0x0e55ed88,0x42eec01d,0x1d1f9db2,0x26a7bef9,
+ 0x2945a25a,0x7dea48f4 } },
+ /* 141 */
+ { { 0xe5060a81,0xabdf6f1c,0xf8f95615,0xe79f9c72,0x06ac268b,0xcfd36c54,
+ 0xebfd16d1,0xabc2a2be },
+ { 0xd3e2eac7,0x8ac66f91,0xd2dd0466,0x6f10ba63,0x0282d31b,0x6790e377,
+ 0x6c7eefc1,0x4ea35394 } },
+ /* 142 */
+ { { 0x5266309d,0xed8a2f8d,0x81945a3e,0x0a51c6c0,0x578c5dc1,0xcecaf45a,
+ 0x1c94ffc3,0x3a76e689 },
+ { 0x7d7b0d0f,0x9aace8a4,0x8f584a5f,0x963ace96,0x4e697fbe,0x51a30c72,
+ 0x465e6464,0x8212a10a } },
+ /* 143 */
+ { { 0xcfab8caa,0xef7c61c3,0x0e142390,0x18eb8e84,0x7e9733ca,0xcd1dff67,
+ 0x599cb164,0xaa7cab71 },
+ { 0xbc837bd1,0x02fc9273,0xc36af5d7,0xc06407d0,0xf423da49,0x17621292,
+ 0xfe0617c3,0x40e38073 } },
+ /* 144 */
+ { { 0xa7bf9b7c,0xf4f80824,0x3fbe30d0,0x365d2320,0x97cf9ce3,0xbfbe5320,
+ 0xb3055526,0xe3604700 },
+ { 0x6cc6c2c7,0x4dcb9911,0xba4cbee6,0x72683708,0x637ad9ec,0xdcded434,
+ 0xa3dee15f,0x6542d677 } },
+ /* 145 */
+ { { 0x7b6c377a,0x3f32b6d0,0x903448be,0x6cb03847,0x20da8af7,0xd6fdd3a8,
+ 0x09bb6f21,0xa6534aee },
+ { 0x1035facf,0x30a1780d,0x9dcb47e6,0x35e55a33,0xc447f393,0x6ea50fe1,
+ 0xdc9aef22,0xf3cb672f } },
+ /* 146 */
+ { { 0x3b55fd83,0xeb3719fe,0x875ddd10,0xe0d7a46c,0x05cea784,0x33ac9fa9,
+ 0xaae870e7,0x7cafaa2e },
+ { 0x1d53b338,0x9b814d04,0xef87e6c6,0xe0acc0a0,0x11672b0f,0xfb93d108,
+ 0xb9bd522e,0x0aab13c1 } },
+ /* 147 */
+ { { 0xd2681297,0xddcce278,0xb509546a,0xcb350eb1,0x7661aaf2,0x2dc43173,
+ 0x847012e9,0x4b91a602 },
+ { 0x72f8ddcf,0xdcff1095,0x9a911af4,0x08ebf61e,0xc372430e,0x48f4360a,
+ 0x72321cab,0x49534c53 } },
+ /* 148 */
+ { { 0xf07b7e9d,0x83df7d71,0x13cd516f,0xa478efa3,0x6c047ee3,0x78ef264b,
+ 0xd65ac5ee,0xcaf46c4f },
+ { 0x92aa8266,0xa04d0c77,0x913684bb,0xedf45466,0xae4b16b0,0x56e65168,
+ 0x04c6770f,0x14ce9e57 } },
+ /* 149 */
+ { { 0x965e8f91,0x99445e3e,0xcb0f2492,0xd3aca1ba,0x90c8a0a0,0xd31cc70f,
+ 0x3e4c9a71,0x1bb708a5 },
+ { 0x558bdd7a,0xd5ca9e69,0x018a26b1,0x734a0508,0x4c9cf1ec,0xb093aa71,
+ 0xda300102,0xf9d126f2 } },
+ /* 150 */
+ { { 0xaff9563e,0x749bca7a,0xb49914a0,0xdd077afe,0xbf5f1671,0xe27a0311,
+ 0x729ecc69,0x807afcb9 },
+ { 0xc9b08b77,0x7f8a9337,0x443c7e38,0x86c3a785,0x476fd8ba,0x85fafa59,
+ 0x6568cd8c,0x751adcd1 } },
+ /* 151 */
+ { { 0x10715c0d,0x8aea38b4,0x8f7697f7,0xd113ea71,0x93fbf06d,0x665eab14,
+ 0x2537743f,0x29ec4468 },
+ { 0xb50bebbc,0x3d94719c,0xe4505422,0x399ee5bf,0x8d2dedb1,0x90cd5b3a,
+ 0x92a4077d,0xff9370e3 } },
+ /* 152 */
+ { { 0xc6b75b65,0x59a2d69b,0x266651c5,0x4188f8d5,0x3de9d7d2,0x28a9f33e,
+ 0xa2a9d01a,0x9776478b },
+ { 0x929af2c7,0x8852622d,0x4e690923,0x334f5d6d,0xa89a51e9,0xce6cc7e5,
+ 0xac2f82fa,0x74a6313f } },
+ /* 153 */
+ { { 0xb75f079c,0xb2f4dfdd,0x18e36fbb,0x85b07c95,0xe7cd36dd,0x1b6cfcf0,
+ 0x0ff4863d,0xab75be15 },
+ { 0x173fc9b7,0x81b367c0,0xd2594fd0,0xb90a7420,0xc4091236,0x15fdbf03,
+ 0x0b4459f6,0x4ebeac2e } },
+ /* 154 */
+ { { 0x5c9f2c53,0xeb6c5fe7,0x8eae9411,0xd2522011,0xf95ac5d8,0xc8887633,
+ 0x2c1baffc,0xdf99887b },
+ { 0x850aaecb,0xbb78eed2,0x01d6a272,0x9d49181b,0xb1cdbcac,0x978dd511,
+ 0x779f4058,0x27b040a7 } },
+ /* 155 */
+ { { 0xf73b2eb2,0x90405db7,0x8e1b2118,0xe0df8508,0x5962327e,0x501b7152,
+ 0xe4cfa3f5,0xb393dd37 },
+ { 0x3fd75165,0xa1230e7b,0xbcd33554,0xd66344c2,0x0f7b5022,0x6c36f1be,
+ 0xd0463419,0x09588c12 } },
+ /* 156 */
+ { { 0x02601c3b,0xe086093f,0xcf5c335f,0xfb0252f8,0x894aff28,0x955cf280,
+ 0xdb9f648b,0x81c879a9 },
+ { 0xc6f56c51,0x040e687c,0x3f17618c,0xfed47169,0x9059353b,0x44f88a41,
+ 0x5fc11bc4,0xfa0d48f5 } },
+ /* 157 */
+ { { 0xe1608e4d,0xbc6e1c9d,0x3582822c,0x010dda11,0x157ec2d7,0xf6b7ddc1,
+ 0xb6a367d6,0x8ea0e156 },
+ { 0x2383b3b4,0xa354e02f,0x3f01f53c,0x69966b94,0x2de03ca5,0x4ff6632b,
+ 0xfa00b5ac,0x3f5ab924 } },
+ /* 158 */
+ { { 0x59739efb,0x337bb0d9,0xe7ebec0d,0xc751b0f4,0x411a67d1,0x2da52dd6,
+ 0x2b74256e,0x8bc76887 },
+ { 0x82d3d253,0xa5be3b72,0xf58d779f,0xa9f679a1,0xe16767bb,0xa1cac168,
+ 0x60fcf34f,0xb386f190 } },
+ /* 159 */
+ { { 0x2fedcfc2,0x31f3c135,0x62f8af0d,0x5396bf62,0xe57288c2,0x9a02b4ea,
+ 0x1b069c4d,0x4cb460f7 },
+ { 0x5b8095ea,0xae67b4d3,0x6fc07603,0x92bbf859,0xb614a165,0xe1475f66,
+ 0x95ef5223,0x52c0d508 } },
+ /* 160 */
+ { { 0x15339848,0x231c210e,0x70778c8d,0xe87a28e8,0x6956e170,0x9d1de661,
+ 0x2bb09c0b,0x4ac3c938 },
+ { 0x6998987d,0x19be0551,0xae09f4d6,0x8b2376c4,0x1a3f933d,0x1de0b765,
+ 0xe39705f4,0x380d94c7 } },
+ /* 161 */
+ { { 0x81542e75,0x01a355aa,0xee01b9b7,0x96c724a1,0x624d7087,0x6b3a2977,
+ 0xde2637af,0x2ce3e171 },
+ { 0xf5d5bc1a,0xcfefeb49,0x2777e2b5,0xa655607e,0x9513756c,0x4feaac2f,
+ 0x0b624e4d,0x2e6cd852 } },
+ /* 162 */
+ { { 0x8c31c31d,0x3685954b,0x5bf21a0c,0x68533d00,0x75c79ec9,0x0bd7626e,
+ 0x42c69d54,0xca177547 },
+ { 0xf6d2dbb2,0xcc6edaff,0x174a9d18,0xfd0d8cbd,0xaa4578e8,0x875e8793,
+ 0x9cab2ce6,0xa976a713 } },
+ /* 163 */
+ { { 0x93fb353d,0x0a651f1b,0x57fcfa72,0xd75cab8b,0x31b15281,0xaa88cfa7,
+ 0x0a1f4999,0x8720a717 },
+ { 0x693e1b90,0x8c3e8d37,0x16f6dfc3,0xd345dc0b,0xb52a8742,0x8ea8d00a,
+ 0xc769893c,0x9719ef29 } },
+ /* 164 */
+ { { 0x58e35909,0x820eed8d,0x33ddc116,0x9366d8dc,0x6e205026,0xd7f999d0,
+ 0xe15704c1,0xa5072976 },
+ { 0xc4e70b2e,0x002a37ea,0x6890aa8a,0x84dcf657,0x645b2a5c,0xcd71bf18,
+ 0xf7b77725,0x99389c9d } },
+ /* 165 */
+ { { 0x7ada7a4b,0x238c08f2,0xfd389366,0x3abe9d03,0x766f512c,0x6b672e89,
+ 0x202c82e4,0xa88806aa },
+ { 0xd380184e,0x6602044a,0x126a8b85,0xa8cb78c4,0xad844f17,0x79d670c0,
+ 0x4738dcfe,0x0043bffb } },
+ /* 166 */
+ { { 0x36d5192e,0x8d59b5dc,0x4590b2af,0xacf885d3,0x11601781,0x83566d0a,
+ 0xba6c4866,0x52f3ef01 },
+ { 0x0edcb64d,0x3986732a,0x8068379f,0x0a482c23,0x7040f309,0x16cbe5fa,
+ 0x9ef27e75,0x3296bd89 } },
+ /* 167 */
+ { { 0x454d81d7,0x476aba89,0x51eb9b3c,0x9eade7ef,0x81c57986,0x619a21cd,
+ 0xaee571e9,0x3b90febf },
+ { 0x5496f7cb,0x9393023e,0x7fb51bc4,0x55be41d8,0x99beb5ce,0x03f1dd48,
+ 0x9f810b18,0x6e88069d } },
+ /* 168 */
+ { { 0xb43ea1db,0xce37ab11,0x5259d292,0x0a7ff1a9,0x8f84f186,0x851b0221,
+ 0xdefaad13,0xa7222bea },
+ { 0x2b0a9144,0xa2ac78ec,0xf2fa59c5,0x5a024051,0x6147ce38,0x91d1eca5,
+ 0xbc2ac690,0xbe94d523 } },
+ /* 169 */
+ { { 0x0b226ce7,0x72f4945e,0x967e8b70,0xb8afd747,0x85a6c63e,0xedea46f1,
+ 0x9be8c766,0x7782defe },
+ { 0x3db38626,0x760d2aa4,0x76f67ad1,0x460ae787,0x54499cdb,0x341b86fc,
+ 0xa2892e4b,0x03838567 } },
+ /* 170 */
+ { { 0x79ec1a0f,0x2d8daefd,0xceb39c97,0x3bbcd6fd,0x58f61a95,0xf5575ffc,
+ 0xadf7b420,0xdbd986c4 },
+ { 0x15f39eb7,0x81aa8814,0xb98d976c,0x6ee2fcf5,0xcf2f717d,0x5465475d,
+ 0x6860bbd0,0x8e24d3c4 } },
+ /* 171 */
+ { { 0x9a587390,0x749d8e54,0x0cbec588,0x12bb194f,0xb25983c6,0x46e07da4,
+ 0x407bafc8,0x541a99c4 },
+ { 0x624c8842,0xdb241692,0xd86c05ff,0x6044c12a,0x4f7fcf62,0xc59d14b4,
+ 0xf57d35d1,0xc0092c49 } },
+ /* 172 */
+ { { 0xdf2e61ef,0xd3cc75c3,0x2e1b35ca,0x7e8841c8,0x909f29f4,0xc62d30d1,
+ 0x7286944d,0x75e40634 },
+ { 0xbbc237d0,0xe7d41fc5,0xec4f01c9,0xc9537bf0,0x282bd534,0x91c51a16,
+ 0xc7848586,0x5b7cb658 } },
+ /* 173 */
+ { { 0x8a28ead1,0x964a7084,0xfd3b47f6,0x802dc508,0x767e5b39,0x9ae4bfd1,
+ 0x8df097a1,0x7ae13eba },
+ { 0xeadd384e,0xfd216ef8,0xb6b2ff06,0x0361a2d9,0x4bcdb5f3,0x204b9878,
+ 0xe2a8e3fd,0x787d8074 } },
+ /* 174 */
+ { { 0x757fbb1c,0xc5e25d6b,0xca201deb,0xe47bddb2,0x6d2233ff,0x4a55e9a3,
+ 0x9ef28484,0x5c222819 },
+ { 0x88315250,0x773d4a85,0x827097c1,0x21b21a2b,0xdef5d33f,0xab7c4ea1,
+ 0xbaf0f2b0,0xe45d37ab } },
+ /* 175 */
+ { { 0x28511c8a,0xd2df1e34,0xbdca6cd3,0xebb229c8,0x627c39a7,0x578a71a7,
+ 0x84dfb9d3,0xed7bc122 },
+ { 0x93dea561,0xcf22a6df,0xd48f0ed1,0x5443f18d,0x5bad23e8,0xd8b86140,
+ 0x45ca6d27,0xaac97cc9 } },
+ /* 176 */
+ { { 0xa16bd00a,0xeb54ea74,0xf5c0bcc1,0xd839e9ad,0x1f9bfc06,0x092bb7f1,
+ 0x1163dc4e,0x318f97b3 },
+ { 0xc30d7138,0xecc0c5be,0xabc30220,0x44e8df23,0xb0223606,0x2bb7972f,
+ 0x9a84ff4d,0xfa41faa1 } },
+ /* 177 */
+ { { 0xa6642269,0x4402d974,0x9bb783bd,0xc81814ce,0x7941e60b,0x398d38e4,
+ 0x1d26e9e2,0x38bb6b2c },
+ { 0x6a577f87,0xc64e4a25,0xdc11fe1c,0x8b52d253,0x62280728,0xff336abf,
+ 0xce7601a5,0x94dd0905 } },
+ /* 178 */
+ { { 0xde93f92a,0x156cf7dc,0x89b5f315,0xa01333cb,0xc995e750,0x02404df9,
+ 0xd25c2ae9,0x92077867 },
+ { 0x0bf39d44,0xe2471e01,0x96bb53d7,0x5f2c9020,0x5c9c3d8f,0x4c44b7b3,
+ 0xd29beb51,0x81e8428b } },
+ /* 179 */
+ { { 0xc477199f,0x6dd9c2ba,0x6b5ecdd9,0x8cb8eeee,0xee40fd0e,0x8af7db3f,
+ 0xdbbfa4b1,0x1b94ab62 },
+ { 0xce47f143,0x44f0d8b3,0x63f46163,0x51e623fc,0xcc599383,0xf18f270f,
+ 0x055590ee,0x06a38e28 } },
+ /* 180 */
+ { { 0xb3355b49,0x2e5b0139,0xb4ebf99b,0x20e26560,0xd269f3dc,0xc08ffa6b,
+ 0x83d9d4f8,0xa7b36c20 },
+ { 0x1b3e8830,0x64d15c3a,0xa89f9c0b,0xd5fceae1,0xe2d16930,0xcfeee4a2,
+ 0xa2822a20,0xbe54c6b4 } },
+ /* 181 */
+ { { 0x8d91167c,0xd6cdb3df,0xe7a6625e,0x517c3f79,0x346ac7f4,0x7105648f,
+ 0xeae022bb,0xbf30a5ab },
+ { 0x93828a68,0x8e7785be,0x7f3ef036,0x5161c332,0x592146b2,0xe11b5feb,
+ 0x2732d13a,0xd1c820de } },
+ /* 182 */
+ { { 0x9038b363,0x043e1347,0x6b05e519,0x58c11f54,0x6026cad1,0x4fe57abe,
+ 0x68a18da3,0xb7d17bed },
+ { 0xe29c2559,0x44ca5891,0x5bfffd84,0x4f7a0376,0x74e46948,0x498de4af,
+ 0x6412cc64,0x3997fd5e } },
+ /* 183 */
+ { { 0x8bd61507,0xf2074682,0x34a64d2a,0x29e132d5,0x8a8a15e3,0xffeddfb0,
+ 0x3c6c13e8,0x0eeb8929 },
+ { 0xa7e259f8,0xe9b69a3e,0xd13e7e67,0xce1db7e6,0xad1fa685,0x277318f6,
+ 0xc922b6ef,0x228916f8 } },
+ /* 184 */
+ { { 0x0a12ab5b,0x959ae25b,0x957bc136,0xcc11171f,0xd16e2b0c,0x8058429e,
+ 0x6e93097e,0xec05ad1d },
+ { 0xac3f3708,0x157ba5be,0x30b59d77,0x31baf935,0x118234e5,0x47b55237,
+ 0x7ff11b37,0x7d314156 } },
+ /* 185 */
+ { { 0xf6dfefab,0x7bd9c05c,0xdcb37707,0xbe2f2268,0x3a38bb95,0xe53ead97,
+ 0x9bc1d7a3,0xe9ce66fc },
+ { 0x6f6a02a1,0x75aa1576,0x60e600ed,0x38c087df,0x68cdc1b9,0xf8947f34,
+ 0x72280651,0xd9650b01 } },
+ /* 186 */
+ { { 0x5a057e60,0x504b4c4a,0x8def25e4,0xcbccc3be,0x17c1ccbd,0xa6353208,
+ 0x804eb7a2,0x14d6699a },
+ { 0xdb1f411a,0x2c8a8415,0xf80d769c,0x09fbaf0b,0x1c2f77ad,0xb4deef90,
+ 0x0d43598a,0x6f4c6841 } },
+ /* 187 */
+ { { 0x96c24a96,0x8726df4e,0xfcbd99a3,0x534dbc85,0x8b2ae30a,0x3c466ef2,
+ 0x61189abb,0x4c4350fd },
+ { 0xf855b8da,0x2967f716,0x463c38a1,0x41a42394,0xeae93343,0xc37e1413,
+ 0x5a3118b5,0xa726d242 } },
+ /* 188 */
+ { { 0x948c1086,0xdae6b3ee,0xcbd3a2e1,0xf1de503d,0x03d022f3,0x3f35ed3f,
+ 0xcc6cf392,0x13639e82 },
+ { 0xcdafaa86,0x9ac938fb,0x2654a258,0xf45bc5fb,0x45051329,0x1963b26e,
+ 0xc1a335a3,0xca9365e1 } },
+ /* 189 */
+ { { 0x4c3b2d20,0x3615ac75,0x904e241b,0x742a5417,0xcc9d071d,0xb08521c4,
+ 0x970b72a5,0x9ce29c34 },
+ { 0x6d3e0ad6,0x8cc81f73,0xf2f8434c,0x8060da9e,0x6ce862d9,0x35ed1d1a,
+ 0xab42af98,0x48c4abd7 } },
+ /* 190 */
+ { { 0x40c7485a,0xd221b0cc,0xe5274dbf,0xead455bb,0x9263d2e8,0x493c7698,
+ 0xf67b33cb,0x78017c32 },
+ { 0x930cb5ee,0xb9d35769,0x0c408ed2,0xc0d14e94,0x272f1a4d,0xf8b7bf55,
+ 0xde5c1c04,0x53cd0454 } },
+ /* 191 */
+ { { 0x5d28ccac,0xbcd585fa,0x005b746e,0x5f823e56,0xcd0123aa,0x7c79f0a1,
+ 0xd3d7fa8f,0xeea465c1 },
+ { 0x0551803b,0x7810659f,0x7ce6af70,0x6c0b599f,0x29288e70,0x4195a770,
+ 0x7ae69193,0x1b6e42a4 } },
+ /* 192 */
+ { { 0xf67d04c3,0x2e80937c,0x89eeb811,0x1e312be2,0x92594d60,0x56b5d887,
+ 0x187fbd3d,0x0224da14 },
+ { 0x0c5fe36f,0x87abb863,0x4ef51f5f,0x580f3c60,0xb3b429ec,0x964fb1bf,
+ 0x42bfff33,0x60838ef0 } },
+ /* 193 */
+ { { 0x7e0bbe99,0x432cb2f2,0x04aa39ee,0x7bda44f3,0x9fa93903,0x5f497c7a,
+ 0x2d331643,0x636eb202 },
+ { 0x93ae00aa,0xfcfd0e61,0x31ae6d2f,0x875a00fe,0x9f93901c,0xf43658a2,
+ 0x39218bac,0x8844eeb6 } },
+ /* 194 */
+ { { 0x6b3bae58,0x114171d2,0x17e39f3e,0x7db3df71,0x81a8eada,0xcd37bc7f,
+ 0x51fb789e,0x27ba83dc },
+ { 0xfbf54de5,0xa7df439f,0xb5fe1a71,0x7277030b,0xdb297a48,0x42ee8e35,
+ 0x87f3a4ab,0xadb62d34 } },
+ /* 195 */
+ { { 0xa175df2a,0x9b1168a2,0x618c32e9,0x082aa04f,0x146b0916,0xc9e4f2e7,
+ 0x75e7c8b2,0xb990fd76 },
+ { 0x4df37313,0x0829d96b,0xd0b40789,0x1c205579,0x78087711,0x66c9ae4a,
+ 0x4d10d18d,0x81707ef9 } },
+ /* 196 */
+ { { 0x03d6ff96,0x97d7cab2,0x0d843360,0x5b851bfc,0xd042db4b,0x268823c4,
+ 0xd5a8aa5c,0x3792daea },
+ { 0x941afa0b,0x52818865,0x42d83671,0xf3e9e741,0x5be4e0a7,0x17c82527,
+ 0x94b001ba,0x5abd635e } },
+ /* 197 */
+ { { 0x0ac4927c,0x727fa84e,0xa7c8cf23,0xe3886035,0x4adca0df,0xa4bcd5ea,
+ 0x846ab610,0x5995bf21 },
+ { 0x829dfa33,0xe90f860b,0x958fc18b,0xcaafe2ae,0x78630366,0x9b3baf44,
+ 0xd483411e,0x44c32ca2 } },
+ /* 198 */
+ { { 0xe40ed80c,0xa74a97f1,0x31d2ca82,0x5f938cb1,0x7c2d6ad9,0x53f2124b,
+ 0x8082a54c,0x1f2162fb },
+ { 0x720b173e,0x7e467cc5,0x085f12f9,0x40e8a666,0x4c9d65dc,0x8cebc20e,
+ 0xc3e907c9,0x8f1d402b } },
+ /* 199 */
+ { { 0xfbc4058a,0x4f592f9c,0x292f5670,0xb15e14b6,0xbc1d8c57,0xc55cfe37,
+ 0x926edbf9,0xb1980f43 },
+ { 0x32c76b09,0x98c33e09,0x33b07f78,0x1df5279d,0x863bb461,0x6f08ead4,
+ 0x37448e45,0x2828ad9b } },
+ /* 200 */
+ { { 0xc4cf4ac5,0x696722c4,0xdde64afb,0xf5ac1a3f,0xe0890832,0x0551baa2,
+ 0x5a14b390,0x4973f127 },
+ { 0x322eac5d,0xe59d8335,0x0bd9b568,0x5e07eef5,0xa2588393,0xab36720f,
+ 0xdb168ac7,0x6dac8ed0 } },
+ /* 201 */
+ { { 0xeda835ef,0xf7b545ae,0x1d10ed51,0x4aa113d2,0x13741b09,0x035a65e0,
+ 0x20b9de4c,0x4b23ef59 },
+ { 0x3c4c7341,0xe82bb680,0x3f58bc37,0xd457706d,0xa51e3ee8,0x73527863,
+ 0xddf49a4e,0x4dd71534 } },
+ /* 202 */
+ { { 0x95476cd9,0xbf944672,0xe31a725b,0x648d072f,0xfc4b67e0,0x1441c8b8,
+ 0x2f4a4dbb,0xfd317000 },
+ { 0x8995d0e1,0x1cb43ff4,0x0ef729aa,0x76e695d1,0x41798982,0xe0d5f976,
+ 0x9569f365,0x14fac58c } },
+ /* 203 */
+ { { 0xf312ae18,0xad9a0065,0xfcc93fc9,0x51958dc0,0x8a7d2846,0xd9a14240,
+ 0x36abda50,0xed7c7651 },
+ { 0x25d4abbc,0x46270f1a,0xf1a113ea,0x9b5dd8f3,0x5b51952f,0xc609b075,
+ 0x4d2e9f53,0xfefcb7f7 } },
+ /* 204 */
+ { { 0xba119185,0xbd09497a,0xaac45ba4,0xd54e8c30,0xaa521179,0x492479de,
+ 0x87e0d80b,0x1801a57e },
+ { 0xfcafffb0,0x073d3f8d,0xae255240,0x6cf33c0b,0x5b5fdfbc,0x781d763b,
+ 0x1ead1064,0x9f8fc11e } },
+ /* 205 */
+ { { 0x5e69544c,0x1583a171,0xf04b7813,0x0eaf8567,0x278a4c32,0x1e22a8fd,
+ 0x3d3a69a9,0xa9d3809d },
+ { 0x59a2da3b,0x936c2c2c,0x1895c847,0x38ccbcf6,0x63d50869,0x5e65244e,
+ 0xe1178ef7,0x3006b9ae } },
+ /* 206 */
+ { { 0xc9eead28,0x0bb1f2b0,0x89f4dfbc,0x7eef635d,0xb2ce8939,0x074757fd,
+ 0x45f8f761,0x0ab85fd7 },
+ { 0x3e5b4549,0xecda7c93,0x97922f21,0x4be2bb5c,0xb43b8040,0x261a1274,
+ 0x11e942c2,0xb122d675 } },
+ /* 207 */
+ { { 0x66a5ae7a,0x3be607be,0x76adcbe3,0x01e703fa,0x4eb6e5c5,0xaf904301,
+ 0x097dbaec,0x9f599dc1 },
+ { 0x0ff250ed,0x6d75b718,0x349a20dc,0x8eb91574,0x10b227a3,0x425605a4,
+ 0x8a294b78,0x7d5528e0 } },
+ /* 208 */
+ { { 0x20c26def,0xf0f58f66,0x582b2d1e,0x025585ea,0x01ce3881,0xfbe7d79b,
+ 0x303f1730,0x28ccea01 },
+ { 0x79644ba5,0xd1dabcd1,0x06fff0b8,0x1fc643e8,0x66b3e17b,0xa60a76fc,
+ 0xa1d013bf,0xc18baf48 } },
+ /* 209 */
+ { { 0x5dc4216d,0x34e638c8,0x206142ac,0x00c01067,0x95f5064a,0xd453a171,
+ 0xb7a9596b,0x9def809d },
+ { 0x67ab8d2c,0x41e8642e,0x6237a2b6,0xb4240433,0x64c4218b,0x7d506a6d,
+ 0x68808ce5,0x0357f8b0 } },
+ /* 210 */
+ { { 0x4cd2cc88,0x8e9dbe64,0xf0b8f39d,0xcc61c28d,0xcd30a0c8,0x4a309874,
+ 0x1b489887,0xe4a01add },
+ { 0xf57cd8f9,0x2ed1eeac,0xbd594c48,0x1b767d3e,0x7bd2f787,0xa7295c71,
+ 0xce10cc30,0x466d7d79 } },
+ /* 211 */
+ { { 0x9dada2c7,0x47d31892,0x8f9aa27d,0x4fa0a6c3,0x820a59e1,0x90e4fd28,
+ 0x451ead1a,0xc672a522 },
+ { 0x5d86b655,0x30607cc8,0xf9ad4af1,0xf0235d3b,0x571172a6,0x99a08680,
+ 0xf2a67513,0x5e3d64fa } },
+ /* 212 */
+ { { 0x9b3b4416,0xaa6410c7,0xeab26d99,0xcd8fcf85,0xdb656a74,0x5ebff74a,
+ 0xeb8e42fc,0x6c8a7a95 },
+ { 0xb02a63bd,0x10c60ba7,0x8b8f0047,0x6b2f2303,0x312d90b0,0x8c6c3738,
+ 0xad82ca91,0x348ae422 } },
+ /* 213 */
+ { { 0x5ccda2fb,0x7f474663,0x8e0726d2,0x22accaa1,0x492b1f20,0x85adf782,
+ 0xd9ef2d2e,0xc1074de0 },
+ { 0xae9a65b3,0xfcf3ce44,0x05d7151b,0xfd71e4ac,0xce6a9788,0xd4711f50,
+ 0xc9e54ffc,0xfbadfbdb } },
+ /* 214 */
+ { { 0x20a99363,0x1713f1cd,0x6cf22775,0xb915658f,0x24d359b2,0x968175cd,
+ 0x83716fcd,0xb7f976b4 },
+ { 0x5d6dbf74,0x5758e24d,0x71c3af36,0x8d23bafd,0x0243dfe3,0x48f47760,
+ 0xcafcc805,0xf4d41b2e } },
+ /* 215 */
+ { { 0xfdabd48d,0x51f1cf28,0x32c078a4,0xce81be36,0x117146e9,0x6ace2974,
+ 0xe0160f10,0x180824ea },
+ { 0x66e58358,0x0387698b,0xce6ca358,0x63568752,0x5e41e6c5,0x82380e34,
+ 0x83cf6d25,0x67e5f639 } },
+ /* 216 */
+ { { 0xcf4899ef,0xf89ccb8d,0x9ebb44c0,0x949015f0,0xb2598ec9,0x546f9276,
+ 0x04c11fc6,0x9fef789a },
+ { 0x53d2a071,0x6d367ecf,0xa4519b09,0xb10e1a7f,0x611e2eef,0xca6b3fb0,
+ 0xa99c4e20,0xbc80c181 } },
+ /* 217 */
+ { { 0xe5eb82e6,0x972536f8,0xf56cb920,0x1a484fc7,0x50b5da5e,0xc78e2171,
+ 0x9f8cdf10,0x49270e62 },
+ { 0xea6b50ad,0x1a39b7bb,0xa2388ffc,0x9a0284c1,0x8107197b,0x5403eb17,
+ 0x61372f7f,0xd2ee52f9 } },
+ /* 218 */
+ { { 0x88e0362a,0xd37cd285,0x8fa5d94d,0x442fa8a7,0xa434a526,0xaff836e5,
+ 0xe5abb733,0xdfb478be },
+ { 0x673eede6,0xa91f1ce7,0x2b5b2f04,0xa5390ad4,0x5530da2f,0x5e66f7bf,
+ 0x08df473a,0xd9a140b4 } },
+ /* 219 */
+ { { 0x6e8ea498,0x0e0221b5,0x3563ee09,0x62347829,0x335d2ade,0xe06b8391,
+ 0x623f4b1a,0x760c058d },
+ { 0xc198aa79,0x0b89b58c,0xf07aba7f,0xf74890d2,0xfde2556a,0x4e204110,
+ 0x8f190409,0x7141982d } },
+ /* 220 */
+ { { 0x4d4b0f45,0x6f0a0e33,0x392a94e1,0xd9280b38,0xb3c61d5e,0x3af324c6,
+ 0x89d54e47,0x3af9d1ce },
+ { 0x20930371,0xfd8f7981,0x21c17097,0xeda2664c,0xdc42309b,0x0e9545dc,
+ 0x73957dd6,0xb1f815c3 } },
+ /* 221 */
+ { { 0x89fec44a,0x84faa78e,0x3caa4caf,0xc8c2ae47,0xc1b6a624,0x691c807d,
+ 0x1543f052,0xa41aed14 },
+ { 0x7d5ffe04,0x42435399,0x625b6e20,0x8bacb2df,0x87817775,0x85d660be,
+ 0x86fb60ef,0xd6e9c1dd } },
+ /* 222 */
+ { { 0xc6853264,0x3aa2e97e,0xe2304a0b,0x771533b7,0xb8eae9be,0x1b912bb7,
+ 0xae9bf8c2,0x9c9c6e10 },
+ { 0xe030b74c,0xa2309a59,0x6a631e90,0x4ed7494d,0xa49b79f2,0x89f44b23,
+ 0x40fa61b6,0x566bd596 } },
+ /* 223 */
+ { { 0xc18061f3,0x066c0118,0x7c83fc70,0x190b25d3,0x27273245,0xf05fc8e0,
+ 0xf525345e,0xcf2c7390 },
+ { 0x10eb30cf,0xa09bceb4,0x0d77703a,0xcfd2ebba,0x150ff255,0xe842c43a,
+ 0x8aa20979,0x02f51755 } },
+ /* 224 */
+ { { 0xaddb7d07,0x396ef794,0x24455500,0x0b4fc742,0xc78aa3ce,0xfaff8eac,
+ 0xe8d4d97d,0x14e9ada5 },
+ { 0x2f7079e2,0xdaa480a1,0xe4b0800e,0x45baa3cd,0x7838157d,0x01765e2d,
+ 0x8e9d9ae8,0xa0ad4fab } },
+ /* 225 */
+ { { 0x4a653618,0x0bfb7621,0x31eaaa5f,0x1872813c,0x44949d5e,0x1553e737,
+ 0x6e56ed1e,0xbcd530b8 },
+ { 0x32e9c47b,0x169be853,0xb50059ab,0xdc2776fe,0x192bfbb4,0xcdba9761,
+ 0x6979341d,0x909283cf } },
+ /* 226 */
+ { { 0x76e81a13,0x67b00324,0x62171239,0x9bee1a99,0xd32e19d6,0x08ed361b,
+ 0xace1549a,0x35eeb7c9 },
+ { 0x7e4e5bdc,0x1280ae5a,0xb6ceec6e,0x2dcd2cd3,0x6e266bc1,0x52e4224c,
+ 0x448ae864,0x9a8b2cf4 } },
+ /* 227 */
+ { { 0x09d03b59,0xf6471bf2,0xb65af2ab,0xc90e62a3,0xebd5eec9,0xff7ff168,
+ 0xd4491379,0x6bdb60f4 },
+ { 0x8a55bc30,0xdadafebc,0x10097fe0,0xc79ead16,0x4c1e3bdd,0x42e19741,
+ 0x94ba08a9,0x01ec3cfd } },
+ /* 228 */
+ { { 0xdc9485c2,0xba6277eb,0x22fb10c7,0x48cc9a79,0x70a28d8a,0x4f61d60f,
+ 0x475464f6,0xd1acb1c0 },
+ { 0x26f36612,0xd26902b1,0xe0618d8b,0x59c3a44e,0x308357ee,0x4df8a813,
+ 0x405626c2,0x7dcd079d } },
+ /* 229 */
+ { { 0xf05a4b48,0x5ce7d4d3,0x37230772,0xadcd2952,0x812a915a,0xd18f7971,
+ 0x377d19b8,0x0bf53589 },
+ { 0x6c68ea73,0x35ecd95a,0x823a584d,0xc7f3bbca,0xf473a723,0x9fb674c6,
+ 0xe16686fc,0xd28be4d9 } },
+ /* 230 */
+ { { 0x38fa8e4b,0x5d2b9906,0x893fd8fc,0x559f186e,0x436fb6fc,0x3a6de2aa,
+ 0x510f88ce,0xd76007aa },
+ { 0x523a4988,0x2d10aab6,0x74dd0273,0xb455cf44,0xa3407278,0x7f467082,
+ 0xb303bb01,0xf2b52f68 } },
+ /* 231 */
+ { { 0x9835b4ca,0x0d57eafa,0xbb669cbc,0x2d2232fc,0xc6643198,0x8eeeb680,
+ 0xcc5aed3a,0xd8dbe98e },
+ { 0xc5a02709,0xcba9be3f,0xf5ba1fa8,0x30be68e5,0xf10ea852,0xfebd43cd,
+ 0xee559705,0xe01593a3 } },
+ /* 232 */
+ { { 0xea75a0a6,0xd3e5af50,0x57858033,0x512226ac,0xd0176406,0x6fe6d50f,
+ 0xaeb8ef06,0xafec07b1 },
+ { 0x80bb0a31,0x7fb99567,0x37309aae,0x6f1af3cc,0x01abf389,0x9153a15a,
+ 0x6e2dbfdd,0xa71b9354 } },
+ /* 233 */
+ { { 0x18f593d2,0xbf8e12e0,0xa078122b,0xd1a90428,0x0ba4f2ad,0x150505db,
+ 0x628523d9,0x53a2005c },
+ { 0xe7f2b935,0x07c8b639,0xc182961a,0x2bff975a,0x7518ca2c,0x86bceea7,
+ 0x3d588e3d,0xbf47d19b } },
+ /* 234 */
+ { { 0xdd7665d5,0x672967a7,0x2f2f4de5,0x4e303057,0x80d4903f,0x144005ae,
+ 0x39c9a1b6,0x001c2c7f },
+ { 0x69efc6d6,0x143a8014,0x7bc7a724,0xc810bdaa,0xa78150a4,0x5f65670b,
+ 0x86ffb99b,0xfdadf8e7 } },
+ /* 235 */
+ { { 0xffc00785,0xfd38cb88,0x3b48eb67,0x77fa7591,0xbf368fbc,0x0454d055,
+ 0x5aa43c94,0x3a838e4d },
+ { 0x3e97bb9a,0x56166329,0x441d94d9,0x9eb93363,0x0adb2a83,0x515591a6,
+ 0x873e1da3,0x3cdb8257 } },
+ /* 236 */
+ { { 0x7de77eab,0x137140a9,0x41648109,0xf7e1c50d,0xceb1d0df,0x762dcad2,
+ 0xf1f57fba,0x5a60cc89 },
+ { 0x40d45673,0x80b36382,0x5913c655,0x1b82be19,0xdd64b741,0x057284b8,
+ 0xdbfd8fc0,0x922ff56f } },
+ /* 237 */
+ { { 0xc9a129a1,0x1b265dee,0xcc284e04,0xa5b1ce57,0xcebfbe3c,0x04380c46,
+ 0xf6c5cd62,0x72919a7d },
+ { 0x8fb90f9a,0x298f453a,0x88e4031b,0xd719c00b,0x796f1856,0xe32c0e77,
+ 0x3624089a,0x5e791780 } },
+ /* 238 */
+ { { 0x7f63cdfb,0x5c16ec55,0xf1cae4fd,0x8e6a3571,0x560597ca,0xfce26bea,
+ 0xe24c2fab,0x4e0a5371 },
+ { 0xa5765357,0x276a40d3,0x0d73a2b4,0x3c89af44,0x41d11a32,0xb8f370ae,
+ 0xd56604ee,0xf5ff7818 } },
+ /* 239 */
+ { { 0x1a09df21,0xfbf3e3fe,0xe66e8e47,0x26d5d28e,0x29c89015,0x2096bd0a,
+ 0x533f5e64,0xe41df0e9 },
+ { 0xb3ba9e3f,0x305fda40,0x2604d895,0xf2340ceb,0x7f0367c7,0x0866e192,
+ 0xac4f155f,0x8edd7d6e } },
+ /* 240 */
+ { { 0x0bfc8ff3,0xc9a1dc0e,0xe936f42f,0x14efd82b,0xcca381ef,0x67016f7c,
+ 0xed8aee96,0x1432c1ca },
+ { 0x70b23c26,0xec684829,0x0735b273,0xa64fe873,0xeaef0f5a,0xe389f6e5,
+ 0x5ac8d2c6,0xcaef480b } },
+ /* 241 */
+ { { 0x75315922,0x5245c978,0x3063cca5,0xd8295171,0xb64ef2cb,0xf3ce60d0,
+ 0x8efae236,0xd0ba177e },
+ { 0xb1b3af60,0x53a9ae8f,0x3d2da20e,0x1a796ae5,0xdf9eef28,0x01d63605,
+ 0x1c54ae16,0xf31c957c } },
+ /* 242 */
+ { { 0x49cc4597,0xc0f58d52,0xbae0a028,0xdc5015b0,0x734a814a,0xefc5fc55,
+ 0x96e17c3a,0x013404cb },
+ { 0xc9a824bf,0xb29e2585,0x001eaed7,0xd593185e,0x61ef68ac,0x8d6ee682,
+ 0x91933e6c,0x6f377c4b } },
+ /* 243 */
+ { { 0xa8333fd2,0x9f93bad1,0x5a2a95b8,0xa8930202,0xeaf75ace,0x211e5037,
+ 0xd2d09506,0x6dba3e4e },
+ { 0xd04399cd,0xa48ef98c,0xe6b73ade,0x1811c66e,0xc17ecaf3,0x72f60752,
+ 0x3becf4a7,0xf13cf342 } },
+ /* 244 */
+ { { 0xa919e2eb,0xceeb9ec0,0xf62c0f68,0x83a9a195,0x7aba2299,0xcfba3bb6,
+ 0x274bbad3,0xc83fa9a9 },
+ { 0x62fa1ce0,0x0d7d1b0b,0x3418efbf,0xe58b60f5,0x52706f04,0xbfa8ef9e,
+ 0x5d702683,0xb49d70f4 } },
+ /* 245 */
+ { { 0xfad5513b,0x914c7510,0xb1751e2d,0x05f32eec,0xd9fb9d59,0x6d850418,
+ 0x0c30f1cf,0x59cfadbb },
+ { 0x55cb7fd6,0xe167ac23,0x820426a3,0x249367b8,0x90a78864,0xeaeec58c,
+ 0x354a4b67,0x5babf362 } },
+ /* 246 */
+ { { 0xee424865,0x37c981d1,0xf2e5577f,0x8b002878,0xb9e0c058,0x702970f1,
+ 0x9026c8f0,0x6188c6a7 },
+ { 0xd0f244da,0x06f9a19b,0xfb080873,0x1ecced5c,0x9f213637,0x35470f9b,
+ 0xdf50b9d9,0x993fe475 } },
+ /* 247 */
+ { { 0x9b2c3609,0x68e31cdf,0x2c46d4ea,0x84eb19c0,0x9a775101,0x7ac9ec1a,
+ 0x4c80616b,0x81f76466 },
+ { 0x75fbe978,0x1d7c2a5a,0xf183b356,0x6743fed3,0x501dd2bf,0x838d1f04,
+ 0x5fe9060d,0x564a812a } },
+ /* 248 */
+ { { 0xfa817d1d,0x7a5a64f4,0xbea82e0f,0x55f96844,0xcd57f9aa,0xb5ff5a0f,
+ 0x00e51d6c,0x226bf3cf },
+ { 0x2f2833cf,0xd6d1a9f9,0x4f4f89a8,0x20a0a35a,0x8f3f7f77,0x11536c49,
+ 0xff257836,0x68779f47 } },
+ /* 249 */
+ { { 0x73043d08,0x79b0c1c1,0x1fc020fa,0xa5446774,0x9a6d26d0,0xd3767e28,
+ 0xeb092e0b,0x97bcb0d1 },
+ { 0xf32ed3c3,0x2ab6eaa8,0xb281bc48,0xc8a4f151,0xbfa178f3,0x4d1bf4f3,
+ 0x0a784655,0xa872ffe8 } },
+ /* 250 */
+ { { 0xa32b2086,0xb1ab7935,0x8160f486,0xe1eb710e,0x3b6ae6be,0x9bd0cd91,
+ 0xb732a36a,0x02812bfc },
+ { 0xcf605318,0xa63fd7ca,0xfdfd6d1d,0x646e5d50,0x2102d619,0xa1d68398,
+ 0xfe5396af,0x07391cc9 } },
+ /* 251 */
+ { { 0x8b80d02b,0xc50157f0,0x62877f7f,0x6b8333d1,0x78d542ae,0x7aca1af8,
+ 0x7e6d2a08,0x355d2adc },
+ { 0x287386e1,0xb41f335a,0xf8e43275,0xfd272a94,0xe79989ea,0x286ca2cd,
+ 0x7c2a3a79,0x3dc2b1e3 } },
+ /* 252 */
+ { { 0x04581352,0xd689d21c,0x376782be,0x0a00c825,0x9fed701f,0x203bd590,
+ 0x3ccd846b,0xc4786910 },
+ { 0x24c768ed,0x5dba7708,0x6841f657,0x72feea02,0x6accce0e,0x73313ed5,
+ 0xd5bb4d32,0xccc42968 } },
+ /* 253 */
+ { { 0x3d7620b9,0x94e50de1,0x5992a56a,0xd89a5c8a,0x675487c9,0xdc007640,
+ 0xaa4871cf,0xe147eb42 },
+ { 0xacf3ae46,0x274ab4ee,0x50350fbe,0xfd4936fb,0x48c840ea,0xdf2afe47,
+ 0x080e96e3,0x239ac047 } },
+ /* 254 */
+ { { 0x2bfee8d4,0x481d1f35,0xfa7b0fec,0xce80b5cf,0x2ce9af3c,0x105c4c9e,
+ 0xf5f7e59d,0xc55fa1a3 },
+ { 0x8257c227,0x3186f14e,0x342be00b,0xc5b1653f,0xaa904fb2,0x09afc998,
+ 0xd4f4b699,0x094cd99c } },
+ /* 255 */
+ { { 0xd703beba,0x8a981c84,0x32ceb291,0x8631d150,0xe3bd49ec,0xa445f2c9,
+ 0x42abad33,0xb90a30b6 },
+ { 0xb4a5abf9,0xb465404f,0x75db7603,0x004750c3,0xca35d89f,0x6f9a42cc,
+ 0x1b7924f7,0x019f8b9a } },
+};
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
+ k, map, heap);
+}
+
+#endif
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[8];
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_256_point_new_8(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 8, km);
+
+ err = sp_256_ecc_mulmod_base_8(point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_8(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(point, 0, heap);
+
+ return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_256_iszero_8(const sp_digit* a)
+{
+ return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+/* Add 1 to a. (a = a + 1)
+ *
+ * a A single precision integer.
+ */
+static void sp_256_add_one_8(sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "ldr r1, [%[a], #0]\n\t"
+ "ldr r2, [%[a], #4]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "adds r1, r1, #1\n\t"
+ "adcs r2, r2, #0\n\t"
+ "adcs r3, r3, #0\n\t"
+ "adcs r4, r4, #0\n\t"
+ "str r1, [%[a], #0]\n\t"
+ "str r2, [%[a], #4]\n\t"
+ "str r3, [%[a], #8]\n\t"
+ "str r4, [%[a], #12]\n\t"
+ "ldr r1, [%[a], #16]\n\t"
+ "ldr r2, [%[a], #20]\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "adcs r1, r1, #0\n\t"
+ "adcs r2, r2, #0\n\t"
+ "adcs r3, r3, #0\n\t"
+ "adcs r4, r4, #0\n\t"
+ "str r1, [%[a], #16]\n\t"
+ "str r2, [%[a], #20]\n\t"
+ "str r3, [%[a], #24]\n\t"
+ "str r4, [%[a], #28]\n\t"
+ :
+ : [a] "r" (a)
+ : "memory", "r1", "r2", "r3", "r4"
+ );
+}
+
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 24U) {
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng Random number generator.
+ * k Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_256_ecc_gen_k_8(WC_RNG* rng, sp_digit* k)
+{
+ int err;
+ byte buf[32];
+
+ do {
+ err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+ if (err == 0) {
+ sp_256_from_bin(k, 8, buf, (int)sizeof(buf));
+ if (sp_256_cmp_8(k, p256_order2) < 0) {
+ sp_256_add_one_8(k);
+ break;
+ }
+ }
+ }
+ while (err == 0);
+
+ return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng Random number generator.
+ * priv Generated private value.
+ * pub Generated public point.
+ * heap Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[8];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_256 inf;
+#endif
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_256* infinity;
+#endif
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_8(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, inf, infinity);
+ }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_gen_k_8(rng, k);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL);
+ }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_8(infinity, point, p256_order, 1, NULL);
+ }
+ if (err == MP_OKAY) {
+ if ((sp_256_iszero_8(point->x) == 0) || (sp_256_iszero_8(point->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(k, priv);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_8(point, pub);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_256_point_free_8(infinity, 1, heap);
+#endif
+ sp_256_point_free_8(point, 1, heap);
+
+ return err;
+}
+
+#ifdef HAVE_ECC_DHE
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 32
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_256_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ j = 256 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<8 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 32) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 32);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv Scalar to multiply the point by.
+ * pub Point to multiply.
+ * out Buffer to hold X ordinate.
+ * outLen On entry, size of the buffer in bytes.
+ * On exit, length of data in buffer in bytes.
+ * heap Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
+ word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[8];
+#endif
+ sp_point_256* point = NULL;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ if (*outLen < 32U) {
+ err = BUFFER_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, p, point);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 8, priv);
+ sp_256_point_from_ecc_point_8(point, pub);
+ err = sp_256_ecc_mulmod_8(point, point, k, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ sp_256_to_bin(point->x, out);
+ *outLen = 32;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(point, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #64\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #0\n\t"
+ "mov r7, #0\n\t"
+ "mov r8, #0\n\t"
+ "\n1:\n\t"
+ "subs r3, r5, #28\n\t"
+ "it cc\n\t"
+ "movcc r3, #0\n\t"
+ "sub r4, r5, r3\n\t"
+ "\n2:\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "ldr r12, [%[b], r4]\n\t"
+ "umull r9, r10, r14, r12\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, #0\n\t"
+ "add r3, r3, #4\n\t"
+ "sub r4, r4, #4\n\t"
+ "cmp r3, #32\n\t"
+ "beq 3f\n\t"
+ "cmp r3, r5\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "str r6, [sp, r5]\n\t"
+ "mov r6, r7\n\t"
+ "mov r7, r8\n\t"
+ "mov r8, #0\n\t"
+ "add r5, r5, #4\n\t"
+ "cmp r5, #56\n\t"
+ "ble 1b\n\t"
+ "str r6, [sp, r5]\n\t"
+ "\n4:\n\t"
+ "ldr r6, [sp, #0]\n\t"
+ "ldr r7, [sp, #4]\n\t"
+ "ldr r8, [sp, #8]\n\t"
+ "ldr r3, [sp, #12]\n\t"
+ "str r6, [%[r], #0]\n\t"
+ "str r7, [%[r], #4]\n\t"
+ "str r8, [%[r], #8]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "add sp, sp, #16\n\t"
+ "add %[r], %[r], #16\n\t"
+ "subs r5, r5, #16\n\t"
+ "bgt 4b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+}
+
+#else
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #32\n\t"
+ "mov r10, #0\n\t"
+ "# A[0] * B[0]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r3, r4, r8, r9\n\t"
+ "mov r5, #0\n\t"
+ "str r3, [sp]\n\t"
+ "# A[0] * B[1]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[0]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #4]\n\t"
+ "# A[0] * B[2]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[1]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[0]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #8]\n\t"
+ "# A[0] * B[3]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[2]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[1]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[0]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #12]\n\t"
+ "# A[0] * B[4]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[3]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[2]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[1]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[0]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #16]\n\t"
+ "# A[0] * B[5]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[4]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[3]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[2]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[1]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[0]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #20]\n\t"
+ "# A[0] * B[6]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[5]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[4]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[3]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[2]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[1]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[0]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #24]\n\t"
+ "# A[0] * B[7]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[6]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[5]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[4]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[3]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[2]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[1]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[0]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #28]\n\t"
+ "# A[1] * B[7]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[2] * B[6]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[5]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[4]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[3]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[2]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[1]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #32]\n\t"
+ "# A[2] * B[7]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[3] * B[6]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[5]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[4]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[3]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[2]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "# A[3] * B[7]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[4] * B[6]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[5]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[4]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[3]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "# A[4] * B[7]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[5] * B[6]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[5]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[4]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #44]\n\t"
+ "# A[5] * B[7]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[6] * B[6]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[5]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "# A[6] * B[7]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[7] * B[6]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "# A[7] * B[7]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r5, [%[r], #56]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "ldr r3, [sp, #0]\n\t"
+ "ldr r4, [sp, #4]\n\t"
+ "ldr r5, [sp, #8]\n\t"
+ "ldr r6, [sp, #12]\n\t"
+ "str r3, [%[r], #0]\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r3, [sp, #16]\n\t"
+ "ldr r4, [sp, #20]\n\t"
+ "ldr r5, [sp, #24]\n\t"
+ "ldr r6, [sp, #28]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "str r5, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ "add sp, sp, #32\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r14, #0\n\t"
+ "add r12, %[a], #32\n\t"
+ "\n1:\n\t"
+ "subs %[c], r14, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[b]], #4\n\t"
+ "ldr r8, [%[b]], #4\n\t"
+ "ldr r9, [%[b]], #4\n\t"
+ "ldr r10, [%[b]], #4\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "sbcs r6, r6, r10\n\t"
+ "str r3, [%[a]], #4\n\t"
+ "str r4, [%[a]], #4\n\t"
+ "str r5, [%[a]], #4\n\t"
+ "str r6, [%[a]], #4\n\t"
+ "sbc %[c], r14, r14\n\t"
+ "cmp %[a], r12\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
+ );
+
+ return c;
+}
+
+#else
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r2, [%[a], #0]\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[a], #12]\n\t"
+ "ldr r6, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "subs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #0]\n\t"
+ "str r3, [%[a], #4]\n\t"
+ "str r4, [%[a], #8]\n\t"
+ "str r5, [%[a], #12]\n\t"
+ "ldr r2, [%[a], #16]\n\t"
+ "ldr r3, [%[a], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[a], #28]\n\t"
+ "ldr r6, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #16]\n\t"
+ "str r3, [%[a], #20]\n\t"
+ "str r4, [%[a], #24]\n\t"
+ "str r5, [%[a], #28]\n\t"
+ "sbc %[c], r9, r9\n\t"
+ : [c] "+r" (c)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r10, #0\n\t"
+ "# A[0] * B\n\t"
+ "ldr r8, [%[a]]\n\t"
+ "umull r5, r3, %[b], r8\n\t"
+ "mov r4, #0\n\t"
+ "str r5, [%[r]]\n\t"
+ "mov r5, #0\n\t"
+ "mov r9, #4\n\t"
+ "1:\n\t"
+ "ldr r8, [%[a], r9]\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], r9]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r9, r9, #4\n\t"
+ "cmp r9, #32\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r], #32]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov r10, #0\n\t"
+ "# A[0] * B\n\t"
+ "ldr r8, [%[a]]\n\t"
+ "umull r3, r4, %[b], r8\n\t"
+ "mov r5, #0\n\t"
+ "str r3, [%[r]]\n\t"
+ "# A[1] * B\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "# A[2] * B\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "# A[3] * B\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "# A[4] * B\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "# A[5] * B\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "# A[6] * B\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "# A[7] * B\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adc r5, r5, r7\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "str r5, [%[r], #32]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+#endif
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r5, %[div], #1\n\t"
+ "add r5, r5, #1\n\t"
+ "mov r6, %[d0]\n\t"
+ "mov r7, %[d1]\n\t"
+ "# Do top 32\n\t"
+ "subs r8, r5, r7\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "and r8, r8, r5\n\t"
+ "subs r7, r7, r8\n\t"
+ "# Next 30 bits\n\t"
+ "mov r4, #29\n\t"
+ "1:\n\t"
+ "movs r6, r6, lsl #1\n\t"
+ "adc r7, r7, r7\n\t"
+ "subs r8, r5, r7\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "and r8, r8, r5\n\t"
+ "subs r7, r7, r8\n\t"
+ "subs r4, r4, #1\n\t"
+ "bpl 1b\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "add %[r], %[r], #1\n\t"
+ "umull r4, r5, %[r], %[div]\n\t"
+ "subs r4, %[d0], r4\n\t"
+ "sbc r5, %[d1], r5\n\t"
+ "add %[r], %[r], r5\n\t"
+ "umull r4, r5, %[r], %[div]\n\t"
+ "subs r4, %[d0], r4\n\t"
+ "sbc r5, %[d1], r5\n\t"
+ "add %[r], %[r], r5\n\t"
+ "subs r8, %[div], r4\n\t"
+ "sbc r8, r8, r8\n\t"
+ "sub %[r], %[r], r8\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r6", "r7", "r8"
+ );
+ return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_256_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<8; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ r[0] = a[0] & m;
+ r[1] = a[1] & m;
+ r[2] = a[2] & m;
+ r[3] = a[3] & m;
+ r[4] = a[4] & m;
+ r[5] = a[5] & m;
+ r[6] = a[6] & m;
+ r[7] = a[7] & m;
+#endif
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[16], t2[9];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+
+ div = d[7];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 8);
+ for (i=7; i>=0; i--) {
+ r1 = div_256_word_8(t1[8 + i], t1[8 + i - 1], div);
+
+ sp_256_mul_d_8(t2, d, r1);
+ t1[8 + i] += sp_256_sub_in_place_8(&t1[i], t2);
+ t1[8 + i] -= t2[8];
+ sp_256_mask_8(t2, d, t1[8 + i]);
+ t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2);
+ sp_256_mask_8(t2, d, t1[8 + i]);
+ t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_256_cmp_8(t1, d) >= 0;
+ sp_256_cond_sub_8(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_256_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_256_div_8(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #64\n\t"
+ "mov r12, #0\n\t"
+ "mov r6, #0\n\t"
+ "mov r7, #0\n\t"
+ "mov r8, #0\n\t"
+ "mov r5, #0\n\t"
+ "\n1:\n\t"
+ "subs r3, r5, #28\n\t"
+ "it cc\n\t"
+ "movcc r3, r12\n\t"
+ "sub r4, r5, r3\n\t"
+ "\n2:\n\t"
+ "cmp r4, r3\n\t"
+ "beq 4f\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "ldr r9, [%[a], r4]\n\t"
+ "umull r9, r10, r14, r9\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "umull r9, r10, r14, r14\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "\n5:\n\t"
+ "add r3, r3, #4\n\t"
+ "sub r4, r4, #4\n\t"
+ "cmp r3, #32\n\t"
+ "beq 3f\n\t"
+ "cmp r3, r4\n\t"
+ "bgt 3f\n\t"
+ "cmp r3, r5\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "str r6, [sp, r5]\n\t"
+ "mov r6, r7\n\t"
+ "mov r7, r8\n\t"
+ "mov r8, #0\n\t"
+ "add r5, r5, #4\n\t"
+ "cmp r5, #56\n\t"
+ "ble 1b\n\t"
+ "str r6, [sp, r5]\n\t"
+ "\n4:\n\t"
+ "ldr r6, [sp, #0]\n\t"
+ "ldr r7, [sp, #4]\n\t"
+ "ldr r8, [sp, #8]\n\t"
+ "ldr r3, [sp, #12]\n\t"
+ "str r6, [%[r], #0]\n\t"
+ "str r7, [%[r], #4]\n\t"
+ "str r8, [%[r], #8]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "add sp, sp, #16\n\t"
+ "add %[r], %[r], #16\n\t"
+ "subs r5, r5, #16\n\t"
+ "bgt 4b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
+ );
+}
+
+#else
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #32\n\t"
+ "mov r14, #0\n\t"
+ "# A[0] * A[0]\n\t"
+ "ldr r10, [%[a], #0]\n\t"
+ "umull r8, r3, r10, r10\n\t"
+ "mov r4, #0\n\t"
+ "str r8, [sp]\n\t"
+ "# A[0] * A[1]\n\t"
+ "ldr r10, [%[a], #4]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r14, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "str r3, [sp, #4]\n\t"
+ "# A[0] * A[2]\n\t"
+ "ldr r10, [%[a], #8]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r14, r14\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "# A[1] * A[1]\n\t"
+ "ldr r10, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "str r4, [sp, #8]\n\t"
+ "# A[0] * A[3]\n\t"
+ "ldr r10, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r14, r14\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "# A[1] * A[2]\n\t"
+ "ldr r10, [%[a], #8]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "str r2, [sp, #12]\n\t"
+ "# A[0] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r14, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "# A[1] * A[3]\n\t"
+ "ldr r10, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "# A[2] * A[2]\n\t"
+ "ldr r10, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "str r3, [sp, #16]\n\t"
+ "# A[0] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[3]\n\t"
+ "ldr r10, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #20]\n\t"
+ "# A[0] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[3]\n\t"
+ "ldr r10, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #24]\n\t"
+ "# A[0] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #28]\n\t"
+ "# A[1] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[2] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "# A[2] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[3] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #36]\n\t"
+ "# A[3] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r14, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "# A[4] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "# A[5] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "str r3, [%[r], #40]\n\t"
+ "# A[4] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r14, r14\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "# A[5] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "# A[5] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r14, r14\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "# A[6] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "str r2, [%[r], #48]\n\t"
+ "# A[6] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r14, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "str r3, [%[r], #52]\n\t"
+ "# A[7] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r4, r4, r8\n\t"
+ "adc r2, r2, r9\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "str r2, [%[r], #60]\n\t"
+ "ldr r2, [sp, #0]\n\t"
+ "ldr r3, [sp, #4]\n\t"
+ "ldr r4, [sp, #8]\n\t"
+ "ldr r8, [sp, #12]\n\t"
+ "str r2, [%[r], #0]\n\t"
+ "str r3, [%[r], #4]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r8, [%[r], #12]\n\t"
+ "ldr r2, [sp, #16]\n\t"
+ "ldr r3, [sp, #20]\n\t"
+ "ldr r4, [sp, #24]\n\t"
+ "ldr r8, [sp, #28]\n\t"
+ "str r2, [%[r], #16]\n\t"
+ "str r3, [%[r], #20]\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "str r8, [%[r], #28]\n\t"
+ "add sp, sp, #32\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P256 curve. */
+static const uint32_t p256_order_minus_2[8] = {
+ 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU,0xffffffffU,0xffffffffU,
+ 0x00000000U,0xffffffffU
+};
+#else
+/* The low half of the order-2 of the P256 curve. */
+static const uint32_t p256_order_low[4] = {
+ 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
+ *
+ * r Result of the multiplication.
+ * a First operand of the multiplication.
+ * b Second operand of the multiplication.
+ */
+static void sp_256_mont_mul_order_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_256_mul_8(r, a, b);
+ sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order);
+}
+
+/* Square number mod the order of P256 curve. (r = a * a mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_256_mont_sqr_order_8(sp_digit* r, const sp_digit* a)
+{
+ sp_256_sqr_8(r, a);
+ sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P256 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_256_mont_sqr_n_order_8(sp_digit* r, const sp_digit* a, int n)
+{
+ int i;
+
+ sp_256_mont_sqr_order_8(r, a);
+ for (i=1; i<n; i++) {
+ sp_256_mont_sqr_order_8(r, r);
+ }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
+ * (r = 1 / a mod order)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_256_mont_inv_order_8(sp_digit* r, const sp_digit* a,
+ sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 8);
+ for (i=254; i>=0; i--) {
+ sp_256_mont_sqr_order_8(t, t);
+ if ((p256_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_8(t, t, a);
+ }
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 8U);
+#else
+ sp_digit* t = td;
+ sp_digit* t2 = td + 2 * 8;
+ sp_digit* t3 = td + 4 * 8;
+ int i;
+
+ /* t = a^2 */
+ sp_256_mont_sqr_order_8(t, a);
+ /* t = a^3 = t * a */
+ sp_256_mont_mul_order_8(t, t, a);
+ /* t2= a^c = t ^ 2 ^ 2 */
+ sp_256_mont_sqr_n_order_8(t2, t, 2);
+ /* t3= a^f = t2 * t */
+ sp_256_mont_mul_order_8(t3, t2, t);
+ /* t2= a^f0 = t3 ^ 2 ^ 4 */
+ sp_256_mont_sqr_n_order_8(t2, t3, 4);
+ /* t = a^ff = t2 * t3 */
+ sp_256_mont_mul_order_8(t, t2, t3);
+ /* t3= a^ff00 = t ^ 2 ^ 8 */
+ sp_256_mont_sqr_n_order_8(t2, t, 8);
+ /* t = a^ffff = t2 * t */
+ sp_256_mont_mul_order_8(t, t2, t);
+ /* t2= a^ffff0000 = t ^ 2 ^ 16 */
+ sp_256_mont_sqr_n_order_8(t2, t, 16);
+ /* t = a^ffffffff = t2 * t */
+ sp_256_mont_mul_order_8(t, t2, t);
+ /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */
+ sp_256_mont_sqr_n_order_8(t2, t, 64);
+ /* t2= a^ffffffff00000000ffffffff = t2 * t */
+ sp_256_mont_mul_order_8(t2, t2, t);
+ /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */
+ sp_256_mont_sqr_n_order_8(t2, t2, 32);
+ /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
+ sp_256_mont_mul_order_8(t2, t2, t);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
+ for (i=127; i>=112; i--) {
+ sp_256_mont_sqr_order_8(t2, t2);
+ if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_8(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
+ sp_256_mont_sqr_n_order_8(t2, t2, 4);
+ sp_256_mont_mul_order_8(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
+ for (i=107; i>=64; i--) {
+ sp_256_mont_sqr_order_8(t2, t2);
+ if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_8(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
+ sp_256_mont_sqr_n_order_8(t2, t2, 4);
+ sp_256_mont_mul_order_8(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
+ for (i=59; i>=32; i--) {
+ sp_256_mont_sqr_order_8(t2, t2);
+ if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_8(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
+ sp_256_mont_sqr_n_order_8(t2, t2, 4);
+ sp_256_mont_mul_order_8(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
+ for (i=27; i>=0; i--) {
+ sp_256_mont_sqr_order_8(t2, t2);
+ if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_8(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
+ sp_256_mont_sqr_n_order_8(t2, t2, 4);
+ /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
+ sp_256_mont_mul_order_8(r, t2, t3);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN 64
+#endif
+
+/* Sign the hash using the private key.
+ * e = [hash, 256 bits] from binary
+ * r = (k.G)->x mod order
+ * s = (r * x + e) / k mod order
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+ mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit ed[2*8];
+ sp_digit xd[2*8];
+ sp_digit kd[2*8];
+ sp_digit rd[2*8];
+ sp_digit td[3 * 2*8];
+ sp_point_256 p;
+#endif
+ sp_digit* e = NULL;
+ sp_digit* x = NULL;
+ sp_digit* k = NULL;
+ sp_digit* r = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_256* point = NULL;
+ sp_digit carry;
+ sp_digit* s = NULL;
+ sp_digit* kInv = NULL;
+ int err = MP_OKAY;
+ int32_t c;
+ int i;
+
+ (void)heap;
+
+ err = sp_256_point_new_8(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ e = d + 0 * 8;
+ x = d + 2 * 8;
+ k = d + 4 * 8;
+ r = d + 6 * 8;
+ tmp = d + 8 * 8;
+#else
+ e = ed;
+ x = xd;
+ k = kd;
+ r = rd;
+ tmp = td;
+#endif
+ s = e;
+ kInv = k;
+
+ if (hashLen > 32U) {
+ hashLen = 32U;
+ }
+
+ sp_256_from_bin(e, 8, hash, (int)hashLen);
+ }
+
+ for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+ sp_256_from_mp(x, 8, priv);
+
+ /* New random point. */
+ if (km == NULL || mp_iszero(km)) {
+ err = sp_256_ecc_gen_k_8(rng, k);
+ }
+ else {
+ sp_256_from_mp(k, 8, km);
+ mp_zero(km);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL);
+ }
+
+ if (err == MP_OKAY) {
+ /* r = point->x mod order */
+ XMEMCPY(r, point->x, sizeof(sp_digit) * 8U);
+ sp_256_norm_8(r);
+ c = sp_256_cmp_8(r, p256_order);
+ sp_256_cond_sub_8(r, r, p256_order, 0L - (sp_digit)(c >= 0));
+ sp_256_norm_8(r);
+
+ /* Conv k to Montgomery form (mod order) */
+ sp_256_mul_8(k, k, p256_norm_order);
+ err = sp_256_mod_8(k, k, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_8(k);
+ /* kInv = 1/k mod order */
+ sp_256_mont_inv_order_8(kInv, k, tmp);
+ sp_256_norm_8(kInv);
+
+ /* s = r * x + e */
+ sp_256_mul_8(x, x, r);
+ err = sp_256_mod_8(x, x, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_8(x);
+ carry = sp_256_add_8(s, e, x);
+ sp_256_cond_sub_8(s, s, p256_order, 0 - carry);
+ sp_256_norm_8(s);
+ c = sp_256_cmp_8(s, p256_order);
+ sp_256_cond_sub_8(s, s, p256_order, 0L - (sp_digit)(c >= 0));
+ sp_256_norm_8(s);
+
+ /* s = s * k^-1 mod order */
+ sp_256_mont_mul_order_8(s, s, kInv);
+ sp_256_norm_8(s);
+
+ /* Check that signature is usable. */
+ if (sp_256_iszero_8(s) == 0) {
+ break;
+ }
+ }
+ }
+
+ if (i == 0) {
+ err = RNG_FAILURE_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(r, rm);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(s, sm);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 8 * 8);
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 2U * 8U);
+ XMEMSET(x, 0, sizeof(sp_digit) * 2U * 8U);
+ XMEMSET(k, 0, sizeof(sp_digit) * 2U * 8U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U);
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 8U);
+#endif
+ sp_256_point_free_8(point, 1, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ * e = Truncate(hash, 256)
+ * u1 = e/s mod order
+ * u2 = r/s mod order
+ * r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
+ mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit u1d[2*8];
+ sp_digit u2d[2*8];
+ sp_digit sd[2*8];
+ sp_digit tmpd[2*8 * 5];
+ sp_point_256 p1d;
+ sp_point_256 p2d;
+#endif
+ sp_digit* u1 = NULL;
+ sp_digit* u2 = NULL;
+ sp_digit* s = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_256* p1;
+ sp_point_256* p2 = NULL;
+ sp_digit carry;
+ int32_t c;
+ int err;
+
+ err = sp_256_point_new_8(heap, p1d, p1);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, p2d, p2);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ u1 = d + 0 * 8;
+ u2 = d + 2 * 8;
+ s = d + 4 * 8;
+ tmp = d + 6 * 8;
+#else
+ u1 = u1d;
+ u2 = u2d;
+ s = sd;
+ tmp = tmpd;
+#endif
+
+ if (hashLen > 32U) {
+ hashLen = 32U;
+ }
+
+ sp_256_from_bin(u1, 8, hash, (int)hashLen);
+ sp_256_from_mp(u2, 8, r);
+ sp_256_from_mp(s, 8, sm);
+ sp_256_from_mp(p2->x, 8, pX);
+ sp_256_from_mp(p2->y, 8, pY);
+ sp_256_from_mp(p2->z, 8, pZ);
+
+ {
+ sp_256_mul_8(s, s, p256_norm_order);
+ }
+ err = sp_256_mod_8(s, s, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_8(s);
+ {
+ sp_256_mont_inv_order_8(s, s, tmp);
+ sp_256_mont_mul_order_8(u1, u1, s);
+ sp_256_mont_mul_order_8(u2, u2, s);
+ }
+
+ err = sp_256_ecc_mulmod_base_8(p1, u1, 0, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_8(p2, p2, u2, 0, heap);
+ }
+
+ if (err == MP_OKAY) {
+ {
+ sp_256_proj_point_add_8(p1, p1, p2, tmp);
+ if (sp_256_iszero_8(p1->z)) {
+ if (sp_256_iszero_8(p1->x) && sp_256_iszero_8(p1->y)) {
+ sp_256_proj_point_dbl_8(p1, p2, tmp);
+ }
+ else {
+ /* Y ordinate is not used from here - don't set. */
+ p1->x[0] = 0;
+ p1->x[1] = 0;
+ p1->x[2] = 0;
+ p1->x[3] = 0;
+ p1->x[4] = 0;
+ p1->x[5] = 0;
+ p1->x[6] = 0;
+ p1->x[7] = 0;
+ XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod));
+ }
+ }
+ }
+
+ /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+ /* Reload r and convert to Montgomery form. */
+ sp_256_from_mp(u2, 8, r);
+ err = sp_256_mod_mul_norm_8(u2, u2, p256_mod);
+ }
+
+ if (err == MP_OKAY) {
+ /* u1 = r.z'.z' mod prime */
+ sp_256_mont_sqr_8(p1->z, p1->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(u1, u2, p1->z, p256_mod, p256_mp_mod);
+ *res = (int)(sp_256_cmp_8(p1->x, u1) == 0);
+ if (*res == 0) {
+ /* Reload r and add order. */
+ sp_256_from_mp(u2, 8, r);
+ carry = sp_256_add_8(u2, u2, p256_order);
+ /* Carry means result is greater than mod and is not valid. */
+ if (carry == 0) {
+ sp_256_norm_8(u2);
+
+ /* Compare with mod and if greater or equal then not valid. */
+ c = sp_256_cmp_8(u2, p256_mod);
+ if (c < 0) {
+ /* Convert to Montogomery form */
+ err = sp_256_mod_mul_norm_8(u2, u2, p256_mod);
+ if (err == MP_OKAY) {
+ /* u1 = (r + 1*order).z'.z' mod prime */
+ sp_256_mont_mul_8(u1, u2, p1->z, p256_mod,
+ p256_mp_mod);
+ *res = (int)(sp_256_cmp_8(p1->x, u1) == 0);
+ }
+ }
+ }
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL)
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_256_point_free_8(p1, 0, heap);
+ sp_256_point_free_8(p2, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point EC point.
+ * heap Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_256_ecc_is_point_8(sp_point_256* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit t1d[2*8];
+ sp_digit t2d[2*8];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8 * 4, heap, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 8;
+ t2 = d + 2 * 8;
+#else
+ (void)heap;
+
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ sp_256_sqr_8(t1, point->y);
+ (void)sp_256_mod_8(t1, t1, p256_mod);
+ sp_256_sqr_8(t2, point->x);
+ (void)sp_256_mod_8(t2, t2, p256_mod);
+ sp_256_mul_8(t2, t2, point->x);
+ (void)sp_256_mod_8(t2, t2, p256_mod);
+ (void)sp_256_sub_8(t2, p256_mod, t2);
+ sp_256_mont_add_8(t1, t1, t2, p256_mod);
+
+ sp_256_mont_add_8(t1, t1, point->x, p256_mod);
+ sp_256_mont_add_8(t1, t1, point->x, p256_mod);
+ sp_256_mont_add_8(t1, t1, point->x, p256_mod);
+
+ if (sp_256_cmp_8(t1, p256_b) != 0) {
+ err = MP_VAL;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_256(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 pubd;
+#endif
+ sp_point_256* pub;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_256_point_new_8(NULL, pubd, pub);
+ if (err == MP_OKAY) {
+ sp_256_from_mp(pub->x, 8, pX);
+ sp_256_from_mp(pub->y, 8, pY);
+ sp_256_from_bin(pub->z, 8, one, (int)sizeof(one));
+
+ err = sp_256_ecc_is_point_8(pub, NULL);
+ }
+
+ sp_256_point_free_8(pub, 0, NULL);
+
+ return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * privm Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit privd[8];
+ sp_point_256 pubd;
+ sp_point_256 pd;
+#endif
+ sp_digit* priv = NULL;
+ sp_point_256* pub;
+ sp_point_256* p = NULL;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_256_point_new_8(heap, pubd, pub);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (priv == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ priv = privd;
+#endif
+
+ sp_256_from_mp(pub->x, 8, pX);
+ sp_256_from_mp(pub->y, 8, pY);
+ sp_256_from_bin(pub->z, 8, one, (int)sizeof(one));
+ sp_256_from_mp(priv, 8, privm);
+
+ /* Check point at infinitiy. */
+ if ((sp_256_iszero_8(pub->x) != 0) &&
+ (sp_256_iszero_8(pub->y) != 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check range of X and Y */
+ if (sp_256_cmp_8(pub->x, p256_mod) >= 0 ||
+ sp_256_cmp_8(pub->y, p256_mod) >= 0) {
+ err = ECC_OUT_OF_RANGE_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check point is on curve */
+ err = sp_256_ecc_is_point_8(pub, heap);
+ }
+
+ if (err == MP_OKAY) {
+ /* Point * order = infinity */
+ err = sp_256_ecc_mulmod_8(p, pub, p256_order, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is infinity */
+ if ((sp_256_iszero_8(p->x) == 0) ||
+ (sp_256_iszero_8(p->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Base * private = point */
+ err = sp_256_ecc_mulmod_base_8(p, priv, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is public key */
+ if (sp_256_cmp_8(p->x, pub->x) != 0 ||
+ sp_256_cmp_8(p->y, pub->y) != 0) {
+ err = ECC_PRIV_KEY_E;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (priv != NULL) {
+ XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(p, 0, heap);
+ sp_256_point_free_8(pub, 0, heap);
+
+ return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX First EC point's X ordinate.
+ * pY First EC point's Y ordinate.
+ * pZ First EC point's Z ordinate.
+ * qX Second EC point's X ordinate.
+ * qY Second EC point's Y ordinate.
+ * qZ Second EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* qX, mp_int* qY, mp_int* qZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 8 * 5];
+ sp_point_256 pd;
+ sp_point_256 qd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ sp_point_256* q = NULL;
+ int err;
+
+ err = sp_256_point_new_8(NULL, pd, p);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(NULL, qd, q);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 8, pX);
+ sp_256_from_mp(p->y, 8, pY);
+ sp_256_from_mp(p->z, 8, pZ);
+ sp_256_from_mp(q->x, 8, qX);
+ sp_256_from_mp(q->y, 8, qY);
+ sp_256_from_mp(q->z, 8, qZ);
+
+ sp_256_proj_point_add_8(p, p, q, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(q, 0, NULL);
+ sp_256_point_free_8(p, 0, NULL);
+
+ return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 8 * 2];
+ sp_point_256 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ int err;
+
+ err = sp_256_point_new_8(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 2, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 8, pX);
+ sp_256_from_mp(p->y, 8, pY);
+ sp_256_from_mp(p->z, 8, pZ);
+
+ sp_256_proj_point_dbl_8(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(p, 0, NULL);
+
+ return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 8 * 4];
+ sp_point_256 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ int err;
+
+ err = sp_256_point_new_8(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 4, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 8, pX);
+ sp_256_from_mp(p->y, 8, pY);
+ sp_256_from_mp(p->z, 8, pZ);
+
+ sp_256_map_8(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, pX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, pY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, pZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(p, 0, NULL);
+
+ return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_256_mont_sqrt_8(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit t1d[2 * 8];
+ sp_digit t2d[2 * 8];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 8;
+ t2 = d + 2 * 8;
+#else
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ {
+ /* t2 = y ^ 0x2 */
+ sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0x3 */
+ sp_256_mont_mul_8(t1, t2, y, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xc */
+ sp_256_mont_sqr_n_8(t2, t1, 2, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xf */
+ sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xf0 */
+ sp_256_mont_sqr_n_8(t2, t1, 4, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xff */
+ sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xff00 */
+ sp_256_mont_sqr_n_8(t2, t1, 8, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffff */
+ sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xffff0000 */
+ sp_256_mont_sqr_n_8(t2, t1, 16, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff */
+ sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000000 */
+ sp_256_mont_sqr_n_8(t1, t1, 32, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001 */
+ sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
+ sp_256_mont_sqr_n_8(t1, t1, 96, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
+ sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_n_8(y, t1, 94, p256_mod, p256_mp_mod);
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm X ordinate.
+ * odd Whether the Y ordinate is odd.
+ * ym Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit xd[2 * 8];
+ sp_digit yd[2 * 8];
+#endif
+ sp_digit* x = NULL;
+ sp_digit* y = NULL;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ x = d + 0 * 8;
+ y = d + 2 * 8;
+#else
+ x = xd;
+ y = yd;
+#endif
+
+ sp_256_from_mp(x, 8, xm);
+ err = sp_256_mod_mul_norm_8(x, x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ /* y = x^3 */
+ {
+ sp_256_mont_sqr_8(y, x, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(y, y, x, p256_mod, p256_mp_mod);
+ }
+ /* y = x^3 - 3x */
+ sp_256_mont_sub_8(y, y, x, p256_mod);
+ sp_256_mont_sub_8(y, y, x, p256_mod);
+ sp_256_mont_sub_8(y, y, x, p256_mod);
+ /* y = x^3 - 3x + b */
+ err = sp_256_mod_mul_norm_8(x, p256_b, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ sp_256_mont_add_8(y, y, x, p256_mod);
+ /* y = sqrt(x^3 - 3x + b) */
+ err = sp_256_mont_sqrt_8(y);
+ }
+ if (err == MP_OKAY) {
+ XMEMSET(y + 8, 0, 8U * sizeof(sp_digit));
+ sp_256_mont_reduce_8(y, p256_mod, p256_mp_mod);
+ if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+ sp_256_mont_sub_8(y, p256_mod, y, p256_mod);
+ }
+
+ err = sp_256_to_mp(y, ym);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+#endif
+#endif /* !WOLFSSL_SP_NO_256 */
+#ifdef WOLFSSL_SP_384
+
+/* Point structure to use. */
+typedef struct sp_point_384 {
+ sp_digit x[2 * 12];
+ sp_digit y[2 * 12];
+ sp_digit z[2 * 12];
+ int infinity;
+} sp_point_384;
+
+/* The modulus (prime) of the curve P384. */
+static const sp_digit p384_mod[12] = {
+ 0xffffffff,0x00000000,0x00000000,0xffffffff,0xfffffffe,0xffffffff,
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
+};
+/* The Montogmery normalizer for modulus of the curve P384. */
+static const sp_digit p384_norm_mod[12] = {
+ 0x00000001,0xffffffff,0xffffffff,0x00000000,0x00000001,0x00000000,
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000
+};
+/* The Montogmery multiplier for modulus of the curve P384. */
+static sp_digit p384_mp_mod = 0x00000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* The order of the curve P384. */
+static const sp_digit p384_order[12] = {
+ 0xccc52973,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81,
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
+};
+#endif
+/* The order of the curve P384 minus 2. */
+static const sp_digit p384_order2[12] = {
+ 0xccc52971,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81,
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P384. */
+static const sp_digit p384_norm_order[12] = {
+ 0x333ad68d,0x1313e695,0xb74f5885,0xa7e5f24d,0x0bc8d220,0x389cb27e,
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P384. */
+static sp_digit p384_mp_order = 0xe88fdc45;
+#endif
+/* The base point of curve P384. */
+static const sp_point_384 p384_base = {
+ /* X ordinate */
+ {
+ 0x72760ab7,0x3a545e38,0xbf55296c,0x5502f25d,0x82542a38,0x59f741e0,
+ 0x8ba79b98,0x6e1d3b62,0xf320ad74,0x8eb1c71e,0xbe8b0537,0xaa87ca22,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Y ordinate */
+ {
+ 0x90ea0e5f,0x7a431d7c,0x1d7e819d,0x0a60b1ce,0xb5f0b8c0,0xe9da3113,
+ 0x289a147c,0xf8f41dbd,0x9292dc29,0x5d9e98bf,0x96262c6f,0x3617de4a,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Z ordinate */
+ {
+ 0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* infinity */
+ 0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p384_b[12] = {
+ 0xd3ec2aef,0x2a85c8ed,0x8a2ed19d,0xc656398d,0x5013875a,0x0314088f,
+ 0xfe814112,0x181d9c6e,0xe3f82d19,0x988e056b,0xe23ee7e4,0xb3312fa7
+};
+#endif
+
+static int sp_384_point_new_ex_12(void* heap, sp_point_384* sp, sp_point_384** p)
+{
+ int ret = MP_OKAY;
+ (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ (void)sp;
+ *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC);
+#else
+ *p = sp;
+#endif
+ if (*p == NULL) {
+ ret = MEMORY_E;
+ }
+ return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), &(sp), &(p))
+#endif
+
+
+static void sp_384_point_free_12(sp_point_384* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+ if (p != NULL) {
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+ XFREE(p, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+/* Clear point data if requested. */
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+#endif
+ (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r The resulting Montgomery form number.
+ * a The number to convert.
+ * m The modulus (prime).
+ * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mod_mul_norm_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ int64_t* t;
+#else
+ int64_t t[12];
+#endif
+ int64_t o;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (int64_t*)XMALLOC(sizeof(int64_t) * 12, NULL, DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ /* 1 0 0 0 0 0 0 0 1 1 0 -1 */
+ t[0] = 0 + (uint64_t)a[0] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[11];
+ /* -1 1 0 0 0 0 0 0 -1 0 1 1 */
+ t[1] = 0 - (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[8] + (uint64_t)a[10] + (uint64_t)a[11];
+ /* 0 -1 1 0 0 0 0 0 0 -1 0 1 */
+ t[2] = 0 - (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[9] + (uint64_t)a[11];
+ /* 1 0 -1 1 0 0 0 0 1 1 -1 -1 */
+ t[3] = 0 + (uint64_t)a[0] - (uint64_t)a[2] + (uint64_t)a[3] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[10] - (uint64_t)a[11];
+ /* 1 1 0 -1 1 0 0 0 1 2 1 -2 */
+ t[4] = 0 + (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[3] + (uint64_t)a[4] + (uint64_t)a[8] + 2 * (uint64_t)a[9] + (uint64_t)a[10] - 2 * (uint64_t)a[11];
+ /* 0 1 1 0 -1 1 0 0 0 1 2 1 */
+ t[5] = 0 + (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[4] + (uint64_t)a[5] + (uint64_t)a[9] + 2 * (uint64_t)a[10] + (uint64_t)a[11];
+ /* 0 0 1 1 0 -1 1 0 0 0 1 2 */
+ t[6] = 0 + (uint64_t)a[2] + (uint64_t)a[3] - (uint64_t)a[5] + (uint64_t)a[6] + (uint64_t)a[10] + 2 * (uint64_t)a[11];
+ /* 0 0 0 1 1 0 -1 1 0 0 0 1 */
+ t[7] = 0 + (uint64_t)a[3] + (uint64_t)a[4] - (uint64_t)a[6] + (uint64_t)a[7] + (uint64_t)a[11];
+ /* 0 0 0 0 1 1 0 -1 1 0 0 0 */
+ t[8] = 0 + (uint64_t)a[4] + (uint64_t)a[5] - (uint64_t)a[7] + (uint64_t)a[8];
+ /* 0 0 0 0 0 1 1 0 -1 1 0 0 */
+ t[9] = 0 + (uint64_t)a[5] + (uint64_t)a[6] - (uint64_t)a[8] + (uint64_t)a[9];
+ /* 0 0 0 0 0 0 1 1 0 -1 1 0 */
+ t[10] = 0 + (uint64_t)a[6] + (uint64_t)a[7] - (uint64_t)a[9] + (uint64_t)a[10];
+ /* 0 0 0 0 0 0 0 1 1 0 -1 1 */
+ t[11] = 0 + (uint64_t)a[7] + (uint64_t)a[8] - (uint64_t)a[10] + (uint64_t)a[11];
+
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+ t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+ t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+ t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+ o = t[11] >> 32; t[11] &= 0xffffffff;
+ t[0] += o;
+ t[1] -= o;
+ t[3] += o;
+ t[4] += o;
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+ t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+ t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+ t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+
+ r[0] = t[0];
+ r[1] = t[1];
+ r[2] = t[2];
+ r[3] = t[3];
+ r[4] = t[4];
+ r[5] = t[5];
+ r[6] = t[6];
+ r[7] = t[7];
+ r[8] = t[8];
+ r[9] = t[9];
+ r[10] = t[10];
+ r[11] = t[11];
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL)
+ XFREE(t, NULL, DYNAMIC_TYPE_ECC);
+#endif
+
+ return err;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 32
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 32U) <= (word32)DIGIT_BIT) {
+ s += 32U;
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 32) {
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 32 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_384.
+ *
+ * p Point of type sp_point_384 (result).
+ * pm Point of type ecc_point.
+ */
+static void sp_384_point_from_ecc_point_12(sp_point_384* p, const ecc_point* pm)
+{
+ XMEMSET(p->x, 0, sizeof(p->x));
+ XMEMSET(p->y, 0, sizeof(p->y));
+ XMEMSET(p->z, 0, sizeof(p->z));
+ sp_384_from_mp(p->x, 12, pm->x);
+ sp_384_from_mp(p->y, 12, pm->y);
+ sp_384_from_mp(p->z, 12, pm->z);
+ p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_384_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 12);
+ r->used = 12;
+ mp_clamp(r);
+#elif DIGIT_BIT < 32
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 12; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 32) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 32 - s;
+ }
+ r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 12; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 32 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 32 - s;
+ }
+ else {
+ s += 32;
+ }
+ }
+ r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Convert a point of type sp_point_384 to type ecc_point.
+ *
+ * p Point of type sp_point_384.
+ * pm Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm)
+{
+ int err;
+
+ err = sp_384_to_mp(p->x, pm->x);
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, pm->y);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, pm->z);
+ }
+
+ return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #96\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #0\n\t"
+ "mov r7, #0\n\t"
+ "mov r8, #0\n\t"
+ "\n1:\n\t"
+ "subs r3, r5, #44\n\t"
+ "it cc\n\t"
+ "movcc r3, #0\n\t"
+ "sub r4, r5, r3\n\t"
+ "\n2:\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "ldr r12, [%[b], r4]\n\t"
+ "umull r9, r10, r14, r12\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, #0\n\t"
+ "add r3, r3, #4\n\t"
+ "sub r4, r4, #4\n\t"
+ "cmp r3, #48\n\t"
+ "beq 3f\n\t"
+ "cmp r3, r5\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "str r6, [sp, r5]\n\t"
+ "mov r6, r7\n\t"
+ "mov r7, r8\n\t"
+ "mov r8, #0\n\t"
+ "add r5, r5, #4\n\t"
+ "cmp r5, #88\n\t"
+ "ble 1b\n\t"
+ "str r6, [sp, r5]\n\t"
+ "\n4:\n\t"
+ "ldr r6, [sp, #0]\n\t"
+ "ldr r7, [sp, #4]\n\t"
+ "ldr r8, [sp, #8]\n\t"
+ "ldr r3, [sp, #12]\n\t"
+ "str r6, [%[r], #0]\n\t"
+ "str r7, [%[r], #4]\n\t"
+ "str r8, [%[r], #8]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "add sp, sp, #16\n\t"
+ "add %[r], %[r], #16\n\t"
+ "subs r5, r5, #16\n\t"
+ "bgt 4b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+}
+
+#else
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #48\n\t"
+ "mov r10, #0\n\t"
+ "# A[0] * B[0]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r3, r4, r8, r9\n\t"
+ "mov r5, #0\n\t"
+ "str r3, [sp]\n\t"
+ "# A[0] * B[1]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[0]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #4]\n\t"
+ "# A[0] * B[2]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[1]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[0]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #8]\n\t"
+ "# A[0] * B[3]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[2]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[1]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[0]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #12]\n\t"
+ "# A[0] * B[4]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[3]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[2]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[1]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[0]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #16]\n\t"
+ "# A[0] * B[5]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[4]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[3]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[2]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[1]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[0]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #20]\n\t"
+ "# A[0] * B[6]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[5]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[4]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[3]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[2]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[1]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[0]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #24]\n\t"
+ "# A[0] * B[7]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[6]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[5]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[4]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[3]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[2]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[1]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[0]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #28]\n\t"
+ "# A[0] * B[8]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[7]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[6]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[5]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[4]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[3]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[2]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[1]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[0]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #32]\n\t"
+ "# A[0] * B[9]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[1] * B[8]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[2] * B[7]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[6]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[5]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[4]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[3]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[2]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[1]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[0]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [sp, #36]\n\t"
+ "# A[0] * B[10]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[1] * B[9]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[2] * B[8]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[3] * B[7]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[6]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[5]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[4]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[3]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[2]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[1]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[0]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [sp, #40]\n\t"
+ "# A[0] * B[11]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[1] * B[10]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[2] * B[9]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[3] * B[8]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[4] * B[7]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[6]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[5]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[4]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[3]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[2]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[1]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[0]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #0]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [sp, #44]\n\t"
+ "# A[1] * B[11]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[2] * B[10]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[3] * B[9]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[4] * B[8]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[5] * B[7]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[6]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[5]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[4]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[3]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[2]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[1]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "# A[2] * B[11]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[3] * B[10]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[4] * B[9]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[5] * B[8]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[6] * B[7]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[6]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[5]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[4]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[3]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[2]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "# A[3] * B[11]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[4] * B[10]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[5] * B[9]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[6] * B[8]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[7] * B[7]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[6]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[5]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[4]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[3]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #56]\n\t"
+ "# A[4] * B[11]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[5] * B[10]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[6] * B[9]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[7] * B[8]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[8] * B[7]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[6]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[5]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[4]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #16]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "# A[5] * B[11]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[6] * B[10]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[7] * B[9]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[8] * B[8]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[9] * B[7]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[6]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[5]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "# A[6] * B[11]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[7] * B[10]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[8] * B[9]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[9] * B[8]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[10] * B[7]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[6]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #68]\n\t"
+ "# A[7] * B[11]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[8] * B[10]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[9] * B[9]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[10] * B[8]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "# A[11] * B[7]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #72]\n\t"
+ "# A[8] * B[11]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r10, r10\n\t"
+ "# A[9] * B[10]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[10] * B[9]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "# A[11] * B[8]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #32]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "# A[9] * B[11]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r10, r10\n\t"
+ "# A[10] * B[10]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "# A[11] * B[9]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #80]\n\t"
+ "# A[10] * B[11]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r10, r10\n\t"
+ "# A[11] * B[10]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #84]\n\t"
+ "# A[11] * B[11]\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "umull r6, r7, r8, r9\n\t"
+ "adds r4, r4, r6\n\t"
+ "adc r5, r5, r7\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "str r5, [%[r], #92]\n\t"
+ "ldr r3, [sp, #0]\n\t"
+ "ldr r4, [sp, #4]\n\t"
+ "ldr r5, [sp, #8]\n\t"
+ "ldr r6, [sp, #12]\n\t"
+ "str r3, [%[r], #0]\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r3, [sp, #16]\n\t"
+ "ldr r4, [sp, #20]\n\t"
+ "ldr r5, [sp, #24]\n\t"
+ "ldr r6, [sp, #28]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "str r5, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ "ldr r3, [sp, #32]\n\t"
+ "ldr r4, [sp, #36]\n\t"
+ "ldr r5, [sp, #40]\n\t"
+ "ldr r6, [sp, #44]\n\t"
+ "str r3, [%[r], #32]\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "str r5, [%[r], #40]\n\t"
+ "str r6, [%[r], #44]\n\t"
+ "add sp, sp, #48\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ sp_digit c = 0;
+
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r9, #0\n\t"
+ "mov r8, #0\n\t"
+ "1:\n\t"
+ "subs %[c], r9, %[c]\n\t"
+ "ldr r4, [%[a], r8]\n\t"
+ "ldr r5, [%[b], r8]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbc %[c], r9, r9\n\t"
+ "str r4, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, #48\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#else
+ __asm__ __volatile__ (
+
+ "mov r9, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "subs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r6, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r6, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r6, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r7, [%[b], #36]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r6, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r6, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r7, [%[b], #44]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "sbcs r6, r6, r7\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "str r6, [%[r], #44]\n\t"
+ "sbc %[c], r9, r9\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#endif /* WOLFSSL_SP_SMALL */
+
+ return c;
+}
+
+#define sp_384_mont_reduce_order_12 sp_384_mont_reduce_12
+
+/* Reduce the number back to 384 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "# i = 0\n\t"
+ "mov r12, #0\n\t"
+ "ldr r10, [%[a], #0]\n\t"
+ "ldr r14, [%[a], #4]\n\t"
+ "\n1:\n\t"
+ "# mu = a[i] * mp\n\t"
+ "mul r8, %[mp], r10\n\t"
+ "# a[i+0] += m[0] * mu\n\t"
+ "ldr r7, [%[m], #0]\n\t"
+ "ldr r9, [%[a], #0]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r10, r10, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "# a[i+1] += m[1] * mu\n\t"
+ "ldr r7, [%[m], #4]\n\t"
+ "ldr r9, [%[a], #4]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r10, r14, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r10, r10, r5\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+2] += m[2] * mu\n\t"
+ "ldr r7, [%[m], #8]\n\t"
+ "ldr r14, [%[a], #8]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r14, r14, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r14, r14, r4\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+3] += m[3] * mu\n\t"
+ "ldr r7, [%[m], #12]\n\t"
+ "ldr r9, [%[a], #12]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #12]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+4] += m[4] * mu\n\t"
+ "ldr r7, [%[m], #16]\n\t"
+ "ldr r9, [%[a], #16]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #16]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+5] += m[5] * mu\n\t"
+ "ldr r7, [%[m], #20]\n\t"
+ "ldr r9, [%[a], #20]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #20]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+6] += m[6] * mu\n\t"
+ "ldr r7, [%[m], #24]\n\t"
+ "ldr r9, [%[a], #24]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #24]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+7] += m[7] * mu\n\t"
+ "ldr r7, [%[m], #28]\n\t"
+ "ldr r9, [%[a], #28]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #28]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+8] += m[8] * mu\n\t"
+ "ldr r7, [%[m], #32]\n\t"
+ "ldr r9, [%[a], #32]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #32]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+9] += m[9] * mu\n\t"
+ "ldr r7, [%[m], #36]\n\t"
+ "ldr r9, [%[a], #36]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r4, r7, #0\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #36]\n\t"
+ "adc r4, r4, #0\n\t"
+ "# a[i+10] += m[10] * mu\n\t"
+ "ldr r7, [%[m], #40]\n\t"
+ "ldr r9, [%[a], #40]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r9, r9, r6\n\t"
+ "adc r5, r7, #0\n\t"
+ "adds r9, r9, r4\n\t"
+ "str r9, [%[a], #40]\n\t"
+ "adc r5, r5, #0\n\t"
+ "# a[i+11] += m[11] * mu\n\t"
+ "ldr r7, [%[m], #44]\n\t"
+ "ldr r9, [%[a], #44]\n\t"
+ "umull r6, r7, r8, r7\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r7, r7, %[ca]\n\t"
+ "mov %[ca], #0\n\t"
+ "adc %[ca], %[ca], %[ca]\n\t"
+ "adds r9, r9, r5\n\t"
+ "str r9, [%[a], #44]\n\t"
+ "ldr r9, [%[a], #48]\n\t"
+ "adcs r9, r9, r7\n\t"
+ "str r9, [%[a], #48]\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ "# i += 1\n\t"
+ "add %[a], %[a], #4\n\t"
+ "add r12, r12, #4\n\t"
+ "cmp r12, #48\n\t"
+ "blt 1b\n\t"
+ "str r10, [%[a], #0]\n\t"
+ "str r14, [%[a], #4]\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_384_mul_12(r, a, b);
+ sp_384_mont_reduce_12(r, m, mp);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_384_sqr_12(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #96\n\t"
+ "mov r12, #0\n\t"
+ "mov r6, #0\n\t"
+ "mov r7, #0\n\t"
+ "mov r8, #0\n\t"
+ "mov r5, #0\n\t"
+ "\n1:\n\t"
+ "subs r3, r5, #44\n\t"
+ "it cc\n\t"
+ "movcc r3, r12\n\t"
+ "sub r4, r5, r3\n\t"
+ "\n2:\n\t"
+ "cmp r4, r3\n\t"
+ "beq 4f\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "ldr r9, [%[a], r4]\n\t"
+ "umull r9, r10, r14, r9\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ "ldr r14, [%[a], r3]\n\t"
+ "umull r9, r10, r14, r14\n\t"
+ "adds r6, r6, r9\n\t"
+ "adcs r7, r7, r10\n\t"
+ "adc r8, r8, r12\n\t"
+ "\n5:\n\t"
+ "add r3, r3, #4\n\t"
+ "sub r4, r4, #4\n\t"
+ "cmp r3, #48\n\t"
+ "beq 3f\n\t"
+ "cmp r3, r4\n\t"
+ "bgt 3f\n\t"
+ "cmp r3, r5\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "str r6, [sp, r5]\n\t"
+ "mov r6, r7\n\t"
+ "mov r7, r8\n\t"
+ "mov r8, #0\n\t"
+ "add r5, r5, #4\n\t"
+ "cmp r5, #88\n\t"
+ "ble 1b\n\t"
+ "str r6, [sp, r5]\n\t"
+ "\n4:\n\t"
+ "ldr r6, [sp, #0]\n\t"
+ "ldr r7, [sp, #4]\n\t"
+ "ldr r8, [sp, #8]\n\t"
+ "ldr r3, [sp, #12]\n\t"
+ "str r6, [%[r], #0]\n\t"
+ "str r7, [%[r], #4]\n\t"
+ "str r8, [%[r], #8]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "add sp, sp, #16\n\t"
+ "add %[r], %[r], #16\n\t"
+ "subs r5, r5, #16\n\t"
+ "bgt 4b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
+ );
+}
+
+#else
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_384_sqr_12(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "sub sp, sp, #48\n\t"
+ "mov r14, #0\n\t"
+ "# A[0] * A[0]\n\t"
+ "ldr r10, [%[a], #0]\n\t"
+ "umull r8, r3, r10, r10\n\t"
+ "mov r4, #0\n\t"
+ "str r8, [sp]\n\t"
+ "# A[0] * A[1]\n\t"
+ "ldr r10, [%[a], #4]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r14, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "str r3, [sp, #4]\n\t"
+ "# A[0] * A[2]\n\t"
+ "ldr r10, [%[a], #8]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r14, r14\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "# A[1] * A[1]\n\t"
+ "ldr r10, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "str r4, [sp, #8]\n\t"
+ "# A[0] * A[3]\n\t"
+ "ldr r10, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r14, r14\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "# A[1] * A[2]\n\t"
+ "ldr r10, [%[a], #8]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "str r2, [sp, #12]\n\t"
+ "# A[0] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r14, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "# A[1] * A[3]\n\t"
+ "ldr r10, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "# A[2] * A[2]\n\t"
+ "ldr r10, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "str r3, [sp, #16]\n\t"
+ "# A[0] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[3]\n\t"
+ "ldr r10, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #20]\n\t"
+ "# A[0] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[3]\n\t"
+ "ldr r10, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #24]\n\t"
+ "# A[0] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #28]\n\t"
+ "# A[0] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[4]\n\t"
+ "ldr r10, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #32]\n\t"
+ "# A[0] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [sp, #36]\n\t"
+ "# A[0] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[5]\n\t"
+ "ldr r10, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [sp, #40]\n\t"
+ "# A[0] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #0]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[1] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[2] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [sp, #44]\n\t"
+ "# A[1] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[2] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[3] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[6]\n\t"
+ "ldr r10, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #48]\n\t"
+ "# A[2] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[3] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[4] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #52]\n\t"
+ "# A[3] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[4] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[5] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[7]\n\t"
+ "ldr r10, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "# A[4] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r4, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[5] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[6] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r2, r2, r5\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adc r4, r4, r7\n\t"
+ "str r2, [%[r], #60]\n\t"
+ "# A[5] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r2, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[6] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[7] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[8]\n\t"
+ "ldr r10, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adc r2, r2, r7\n\t"
+ "str r3, [%[r], #64]\n\t"
+ "# A[6] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r5, r6, r10, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "# A[7] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "# A[8] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r5, r5, r8\n\t"
+ "adcs r6, r6, r9\n\t"
+ "adc r7, r7, r14\n\t"
+ "adds r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adc r7, r7, r7\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r2, r2, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "# A[7] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r14, r14\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "# A[8] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "# A[9] * A[9]\n\t"
+ "ldr r10, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "str r2, [%[r], #72]\n\t"
+ "# A[8] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r14, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "# A[9] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "adds r3, r3, r8\n\t"
+ "adcs r4, r4, r9\n\t"
+ "adc r2, r2, r14\n\t"
+ "str r3, [%[r], #76]\n\t"
+ "# A[9] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r14, r14\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "# A[10] * A[10]\n\t"
+ "ldr r10, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r2, r2, r9\n\t"
+ "adc r3, r3, r14\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "# A[10] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "umull r8, r9, r10, r8\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r14, r14\n\t"
+ "adds r2, r2, r8\n\t"
+ "adcs r3, r3, r9\n\t"
+ "adc r4, r4, r14\n\t"
+ "str r2, [%[r], #84]\n\t"
+ "# A[11] * A[11]\n\t"
+ "ldr r10, [%[a], #44]\n\t"
+ "umull r8, r9, r10, r10\n\t"
+ "adds r3, r3, r8\n\t"
+ "adc r4, r4, r9\n\t"
+ "str r3, [%[r], #88]\n\t"
+ "str r4, [%[r], #92]\n\t"
+ "ldr r2, [sp, #0]\n\t"
+ "ldr r3, [sp, #4]\n\t"
+ "ldr r4, [sp, #8]\n\t"
+ "ldr r8, [sp, #12]\n\t"
+ "str r2, [%[r], #0]\n\t"
+ "str r3, [%[r], #4]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r8, [%[r], #12]\n\t"
+ "ldr r2, [sp, #16]\n\t"
+ "ldr r3, [sp, #20]\n\t"
+ "ldr r4, [sp, #24]\n\t"
+ "ldr r8, [sp, #28]\n\t"
+ "str r2, [%[r], #16]\n\t"
+ "str r3, [%[r], #20]\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "str r8, [%[r], #28]\n\t"
+ "ldr r2, [sp, #32]\n\t"
+ "ldr r3, [sp, #36]\n\t"
+ "ldr r4, [sp, #40]\n\t"
+ "ldr r8, [sp, #44]\n\t"
+ "str r2, [%[r], #32]\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "str r8, [%[r], #44]\n\t"
+ "add sp, sp, #48\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_12(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_384_sqr_12(r, a);
+ sp_384_mont_reduce_12(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * n Number of times to square.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_n_12(sp_digit* r, const sp_digit* a, int n,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_384_mont_sqr_12(r, a, m, mp);
+ for (; n > 1; n--) {
+ sp_384_mont_sqr_12(r, r, m, mp);
+ }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P384 curve. */
+static const uint32_t p384_mod_minus_2[12] = {
+ 0xfffffffdU,0x00000000U,0x00000000U,0xffffffffU,0xfffffffeU,0xffffffffU,
+ 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P384 curve. (r = 1 / a mod m)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_384_mont_inv_12(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 12);
+ for (i=382; i>=0; i--) {
+ sp_384_mont_sqr_12(t, t, p384_mod, p384_mp_mod);
+ if (p384_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
+ sp_384_mont_mul_12(t, t, a, p384_mod, p384_mp_mod);
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 12);
+#else
+ sp_digit* t1 = td;
+ sp_digit* t2 = td + 2 * 12;
+ sp_digit* t3 = td + 4 * 12;
+ sp_digit* t4 = td + 6 * 12;
+ sp_digit* t5 = td + 8 * 12;
+
+ /* 0x2 */
+ sp_384_mont_sqr_12(t1, a, p384_mod, p384_mp_mod);
+ /* 0x3 */
+ sp_384_mont_mul_12(t5, t1, a, p384_mod, p384_mp_mod);
+ /* 0xc */
+ sp_384_mont_sqr_n_12(t1, t5, 2, p384_mod, p384_mp_mod);
+ /* 0xf */
+ sp_384_mont_mul_12(t2, t5, t1, p384_mod, p384_mp_mod);
+ /* 0x1e */
+ sp_384_mont_sqr_12(t1, t2, p384_mod, p384_mp_mod);
+ /* 0x1f */
+ sp_384_mont_mul_12(t4, t1, a, p384_mod, p384_mp_mod);
+ /* 0x3e0 */
+ sp_384_mont_sqr_n_12(t1, t4, 5, p384_mod, p384_mp_mod);
+ /* 0x3ff */
+ sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0x7fe0 */
+ sp_384_mont_sqr_n_12(t1, t2, 5, p384_mod, p384_mp_mod);
+ /* 0x7fff */
+ sp_384_mont_mul_12(t4, t4, t1, p384_mod, p384_mp_mod);
+ /* 0x3fff8000 */
+ sp_384_mont_sqr_n_12(t1, t4, 15, p384_mod, p384_mp_mod);
+ /* 0x3fffffff */
+ sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffc */
+ sp_384_mont_sqr_n_12(t3, t2, 2, p384_mod, p384_mp_mod);
+ /* 0xfffffffd */
+ sp_384_mont_mul_12(r, t3, a, p384_mod, p384_mp_mod);
+ /* 0xffffffff */
+ sp_384_mont_mul_12(t3, t5, t3, p384_mod, p384_mp_mod);
+ /* 0xfffffffc0000000 */
+ sp_384_mont_sqr_n_12(t1, t2, 30, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffff */
+ sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffff000000000000000 */
+ sp_384_mont_sqr_n_12(t1, t2, 60, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+ sp_384_mont_sqr_n_12(t1, t2, 120, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+ sp_384_mont_sqr_n_12(t1, t2, 15, p384_mod, p384_mp_mod);
+ /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */
+ sp_384_mont_sqr_n_12(t1, t2, 33, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */
+ sp_384_mont_mul_12(t2, t3, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */
+ sp_384_mont_sqr_n_12(t1, t2, 96, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */
+ sp_384_mont_mul_12(r, r, t1, p384_mod, p384_mp_mod);
+
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static int32_t sp_384_cmp_12(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = -1;
+ sp_digit one = 1;
+
+
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r7, #0\n\t"
+ "mov r3, #-1\n\t"
+ "mov r6, #44\n\t"
+ "1:\n\t"
+ "ldr r4, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "subs r6, r6, #4\n\t"
+ "bcs 1b\n\t"
+ "eor %[r], %[r], r3\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [one] "r" (one)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov r7, #0\n\t"
+ "mov r3, #-1\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "and r4, r4, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "subs r4, r4, r5\n\t"
+ "it hi\n\t"
+ "movhi %[r], %[one]\n\t"
+ "it lo\n\t"
+ "movlo %[r], r3\n\t"
+ "it ne\n\t"
+ "movne r3, r7\n\t"
+ "eor %[r], %[r], r3\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [one] "r" (one)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+#endif
+
+ return r;
+}
+
+/* Normalize the values in each word to 32.
+ *
+ * a Array of sp_digit to normalize.
+ */
+#define sp_384_norm_12(a)
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r Resulting affine coordinate point.
+ * p Montgomery form projective coordinate point.
+ * t Temporary ordinate data.
+ */
+static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*12;
+ int32_t n;
+
+ sp_384_mont_inv_12(t1, p->z, t + 2*12);
+
+ sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod);
+
+ /* x /= z^2 */
+ sp_384_mont_mul_12(r->x, p->x, t2, p384_mod, p384_mp_mod);
+ XMEMSET(r->x + 12, 0, sizeof(r->x) / 2U);
+ sp_384_mont_reduce_12(r->x, p384_mod, p384_mp_mod);
+ /* Reduce x to less than modulus */
+ n = sp_384_cmp_12(r->x, p384_mod);
+ sp_384_cond_sub_12(r->x, r->x, p384_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_12(r->x);
+
+ /* y /= z^3 */
+ sp_384_mont_mul_12(r->y, p->y, t1, p384_mod, p384_mp_mod);
+ XMEMSET(r->y + 12, 0, sizeof(r->y) / 2U);
+ sp_384_mont_reduce_12(r->y, p384_mod, p384_mp_mod);
+ /* Reduce y to less than modulus */
+ n = sp_384_cmp_12(r->y, p384_mod);
+ sp_384_cond_sub_12(r->y, r->y, p384_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_12(r->y);
+
+ XMEMSET(r->z, 0, sizeof(r->z));
+ r->z[0] = 1;
+
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add r12, %[a], #48\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldr r4, [%[a]], #4\n\t"
+ "ldr r5, [%[a]], #4\n\t"
+ "ldr r6, [%[a]], #4\n\t"
+ "ldr r7, [%[a]], #4\n\t"
+ "ldr r8, [%[b]], #4\n\t"
+ "ldr r9, [%[b]], #4\n\t"
+ "ldr r10, [%[b]], #4\n\t"
+ "ldr r14, [%[b]], #4\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r]], #4\n\t"
+ "str r5, [%[r]], #4\n\t"
+ "str r6, [%[r]], #4\n\t"
+ "str r7, [%[r]], #4\n\t"
+ "mov r4, #0\n\t"
+ "adc %[c], r4, #0\n\t"
+ "cmp %[a], r12\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+#else
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r12, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[a], #4]\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r7, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "ldr r9, [%[b], #4]\n\t"
+ "ldr r10, [%[b], #8]\n\t"
+ "ldr r14, [%[b], #12]\n\t"
+ "adds r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r5, [%[r], #4]\n\t"
+ "str r6, [%[r], #8]\n\t"
+ "str r7, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[a], #20]\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r7, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "ldr r9, [%[b], #20]\n\t"
+ "ldr r10, [%[b], #24]\n\t"
+ "ldr r14, [%[b], #28]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "str r6, [%[r], #24]\n\t"
+ "str r7, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[a], #36]\n\t"
+ "ldr r6, [%[a], #40]\n\t"
+ "ldr r7, [%[a], #44]\n\t"
+ "ldr r8, [%[b], #32]\n\t"
+ "ldr r9, [%[b], #36]\n\t"
+ "ldr r10, [%[b], #40]\n\t"
+ "ldr r14, [%[b], #44]\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adcs r5, r5, r9\n\t"
+ "adcs r6, r6, r10\n\t"
+ "adcs r7, r7, r14\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r5, [%[r], #36]\n\t"
+ "str r6, [%[r], #40]\n\t"
+ "str r7, [%[r], #44]\n\t"
+ "adc %[c], r12, r12\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r Result of addition.
+ * a First number to add in Montogmery form.
+ * b Second number to add in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_add_12(r, a, b);
+ sp_384_cond_sub_12(r, r, m, 0 - o);
+}
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r Result of doubling.
+ * a Number to double in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_add_12(r, a, a);
+ sp_384_cond_sub_12(r, r, m, 0 - o);
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r Result of Tripling.
+ * a Number to triple in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_add_12(r, a, a);
+ sp_384_cond_sub_12(r, r, m, 0 - o);
+ o = sp_384_add_12(r, r, a);
+ sp_384_cond_sub_12(r, r, m, 0 - o);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add r12, %[a], #48\n\t"
+ "\n1:\n\t"
+ "rsbs %[c], %[c], #0\n\t"
+ "ldr r4, [%[a]], #4\n\t"
+ "ldr r5, [%[a]], #4\n\t"
+ "ldr r6, [%[a]], #4\n\t"
+ "ldr r7, [%[a]], #4\n\t"
+ "ldr r8, [%[b]], #4\n\t"
+ "ldr r9, [%[b]], #4\n\t"
+ "ldr r10, [%[b]], #4\n\t"
+ "ldr r14, [%[b]], #4\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "sbcs r6, r6, r10\n\t"
+ "sbcs r7, r7, r14\n\t"
+ "str r4, [%[r]], #4\n\t"
+ "str r5, [%[r]], #4\n\t"
+ "str r6, [%[r]], #4\n\t"
+ "str r7, [%[r]], #4\n\t"
+ "sbc %[c], r4, r4\n\t"
+ "cmp %[a], r12\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
+ );
+
+ return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[b], #0]\n\t"
+ "ldr r8, [%[b], #4]\n\t"
+ "ldr r9, [%[b], #8]\n\t"
+ "ldr r10, [%[b], #12]\n\t"
+ "subs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "sbcs r6, r6, r10\n\t"
+ "str r3, [%[r], #0]\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[a], #24]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r7, [%[b], #16]\n\t"
+ "ldr r8, [%[b], #20]\n\t"
+ "ldr r9, [%[b], #24]\n\t"
+ "ldr r10, [%[b], #28]\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "sbcs r6, r6, r10\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "str r5, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[a], #40]\n\t"
+ "ldr r6, [%[a], #44]\n\t"
+ "ldr r7, [%[b], #32]\n\t"
+ "ldr r8, [%[b], #36]\n\t"
+ "ldr r9, [%[b], #40]\n\t"
+ "ldr r10, [%[b], #44]\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "sbcs r6, r6, r10\n\t"
+ "str r3, [%[r], #32]\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "str r5, [%[r], #40]\n\t"
+ "str r6, [%[r], #44]\n\t"
+ "sbc %[c], %[c], #0\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ sp_digit c = 0;
+
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r9, #0\n\t"
+ "mov r8, #0\n\t"
+ "1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldr r4, [%[a], r8]\n\t"
+ "ldr r5, [%[b], r8]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adc %[c], r9, r9\n\t"
+ "str r4, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, #48\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#else
+ __asm__ __volatile__ (
+
+ "mov r9, #0\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adds r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r6, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r6, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r6, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r7, [%[b], #36]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r6, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r6, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r7, [%[b], #44]\n\t"
+ "and r5, r5, %[m]\n\t"
+ "and r7, r7, %[m]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adcs r6, r6, r7\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "str r6, [%[r], #44]\n\t"
+ "adc %[c], r9, r9\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
+ );
+#endif /* WOLFSSL_SP_SMALL */
+
+ return c;
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r Result of subtration.
+ * a Number to subtract from in Montogmery form.
+ * b Number to subtract with in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_sub_12(r, a, b);
+ sp_384_cond_add_12(r, r, m, o);
+}
+
+static void sp_384_rshift1_12(sp_digit* r, sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "ldr r2, [%[a]]\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "lsr r2, r2, #1\n\t"
+ "orr r2, r2, r3, lsl #31\n\t"
+ "lsr r3, r3, #1\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "str r2, [%[r], #0]\n\t"
+ "orr r3, r3, r4, lsl #31\n\t"
+ "lsr r4, r4, #1\n\t"
+ "ldr r2, [%[a], #12]\n\t"
+ "str r3, [%[r], #4]\n\t"
+ "orr r4, r4, r2, lsl #31\n\t"
+ "lsr r2, r2, #1\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "orr r2, r2, r3, lsl #31\n\t"
+ "lsr r3, r3, #1\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "str r2, [%[r], #12]\n\t"
+ "orr r3, r3, r4, lsl #31\n\t"
+ "lsr r4, r4, #1\n\t"
+ "ldr r2, [%[a], #24]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "orr r4, r4, r2, lsl #31\n\t"
+ "lsr r2, r2, #1\n\t"
+ "ldr r3, [%[a], #28]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "orr r2, r2, r3, lsl #31\n\t"
+ "lsr r3, r3, #1\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "str r2, [%[r], #24]\n\t"
+ "orr r3, r3, r4, lsl #31\n\t"
+ "lsr r4, r4, #1\n\t"
+ "ldr r2, [%[a], #36]\n\t"
+ "str r3, [%[r], #28]\n\t"
+ "orr r4, r4, r2, lsl #31\n\t"
+ "lsr r2, r2, #1\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "orr r2, r2, r3, lsl #31\n\t"
+ "lsr r3, r3, #1\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "str r2, [%[r], #36]\n\t"
+ "orr r3, r3, r4, lsl #31\n\t"
+ "lsr r4, r4, #1\n\t"
+ "str r3, [%[r], #40]\n\t"
+ "str r4, [%[r], #44]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4"
+ );
+}
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r Result of division by 2.
+ * a Number to divide.
+ * m Modulus (prime).
+ */
+static void sp_384_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_cond_add_12(r, a, m, 0 - (a[0] & 1));
+ sp_384_rshift1_12(r, r);
+ r[11] |= o << 31;
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r Result of doubling point.
+ * p Point to double.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*12;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = r->x;
+ y = r->y;
+ z = r->z;
+ /* Put infinity into result. */
+ if (r != p) {
+ r->infinity = p->infinity;
+ }
+
+ /* T1 = Z * Z */
+ sp_384_mont_sqr_12(t1, p->z, p384_mod, p384_mp_mod);
+ /* Z = Y * Z */
+ sp_384_mont_mul_12(z, p->y, p->z, p384_mod, p384_mp_mod);
+ /* Z = 2Z */
+ sp_384_mont_dbl_12(z, z, p384_mod);
+ /* T2 = X - T1 */
+ sp_384_mont_sub_12(t2, p->x, t1, p384_mod);
+ /* T1 = X + T1 */
+ sp_384_mont_add_12(t1, p->x, t1, p384_mod);
+ /* T2 = T1 * T2 */
+ sp_384_mont_mul_12(t2, t1, t2, p384_mod, p384_mp_mod);
+ /* T1 = 3T2 */
+ sp_384_mont_tpl_12(t1, t2, p384_mod);
+ /* Y = 2Y */
+ sp_384_mont_dbl_12(y, p->y, p384_mod);
+ /* Y = Y * Y */
+ sp_384_mont_sqr_12(y, y, p384_mod, p384_mp_mod);
+ /* T2 = Y * Y */
+ sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod);
+ /* T2 = T2/2 */
+ sp_384_div2_12(t2, t2, p384_mod);
+ /* Y = Y * X */
+ sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod);
+ /* X = T1 * T1 */
+ sp_384_mont_sqr_12(x, t1, p384_mod, p384_mp_mod);
+ /* X = X - Y */
+ sp_384_mont_sub_12(x, x, y, p384_mod);
+ /* X = X - Y */
+ sp_384_mont_sub_12(x, x, y, p384_mod);
+ /* Y = Y - X */
+ sp_384_mont_sub_12(y, y, x, p384_mod);
+ /* Y = Y * T1 */
+ sp_384_mont_mul_12(y, y, t1, p384_mod, p384_mp_mod);
+ /* Y = Y - T2 */
+ sp_384_mont_sub_12(y, y, t2, p384_mod);
+}
+
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a First number to compare.
+ * b Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_384_cmp_equal_12(const sp_digit* a, const sp_digit* b)
+{
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+ (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) |
+ (a[8] ^ b[8]) | (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_12(sp_point_384* r, const sp_point_384* p, const sp_point_384* q,
+ sp_digit* t)
+{
+ const sp_point_384* ap[2];
+ sp_point_384* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*12;
+ sp_digit* t3 = t + 4*12;
+ sp_digit* t4 = t + 6*12;
+ sp_digit* t5 = t + 8*12;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Ensure only the first point is the same as the result. */
+ if (q == r) {
+ const sp_point_384* a = p;
+ p = q;
+ q = a;
+ }
+
+ /* Check double */
+ (void)sp_384_sub_12(t1, p384_mod, q->y);
+ sp_384_norm_12(t1);
+ if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) &
+ (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) {
+ sp_384_proj_point_dbl_12(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_384));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<12; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<12; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<12; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U1 = X1*Z2^2 */
+ sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t1, t1, x, p384_mod, p384_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_384_mont_mul_12(t3, t3, y, p384_mod, p384_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod);
+ /* H = U2 - U1 */
+ sp_384_mont_sub_12(t2, t2, t1, p384_mod);
+ /* R = S2 - S1 */
+ sp_384_mont_sub_12(t4, t4, t3, p384_mod);
+ /* Z3 = H*Z1*Z2 */
+ sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_12(x, x, t5, p384_mod);
+ sp_384_mont_dbl_12(t1, y, p384_mod);
+ sp_384_mont_sub_12(x, x, t1, p384_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ sp_384_mont_sub_12(y, y, x, p384_mod);
+ sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_12(y, y, t5, p384_mod);
+ }
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_fast_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+ int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 td[16];
+ sp_point_384 rtd;
+ sp_digit tmpd[2 * 12 * 6];
+#endif
+ sp_point_384* t;
+ sp_point_384* rt;
+ sp_digit* tmp;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_12(heap, rtd, rt);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 16, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ t = td;
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ (void)sp_384_mod_mul_norm_12(t[1].x, g->x, p384_mod);
+ (void)sp_384_mod_mul_norm_12(t[1].y, g->y, p384_mod);
+ (void)sp_384_mod_mul_norm_12(t[1].z, g->z, p384_mod);
+ t[1].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[ 2], &t[ 1], tmp);
+ t[ 2].infinity = 0;
+ sp_384_proj_point_add_12(&t[ 3], &t[ 2], &t[ 1], tmp);
+ t[ 3].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[ 4], &t[ 2], tmp);
+ t[ 4].infinity = 0;
+ sp_384_proj_point_add_12(&t[ 5], &t[ 3], &t[ 2], tmp);
+ t[ 5].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[ 6], &t[ 3], tmp);
+ t[ 6].infinity = 0;
+ sp_384_proj_point_add_12(&t[ 7], &t[ 4], &t[ 3], tmp);
+ t[ 7].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[ 8], &t[ 4], tmp);
+ t[ 8].infinity = 0;
+ sp_384_proj_point_add_12(&t[ 9], &t[ 5], &t[ 4], tmp);
+ t[ 9].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[10], &t[ 5], tmp);
+ t[10].infinity = 0;
+ sp_384_proj_point_add_12(&t[11], &t[ 6], &t[ 5], tmp);
+ t[11].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[12], &t[ 6], tmp);
+ t[12].infinity = 0;
+ sp_384_proj_point_add_12(&t[13], &t[ 7], &t[ 6], tmp);
+ t[13].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[14], &t[ 7], tmp);
+ t[14].infinity = 0;
+ sp_384_proj_point_add_12(&t[15], &t[ 8], &t[ 7], tmp);
+ t[15].infinity = 0;
+
+ i = 10;
+ n = k[i+1] << 0;
+ c = 28;
+ y = n >> 28;
+ XMEMCPY(rt, &t[y], sizeof(sp_point_384));
+ n <<= 4;
+ for (; i>=0 || c>=4; ) {
+ if (c < 4) {
+ n |= k[i--];
+ c += 32;
+ }
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+
+ sp_384_proj_point_dbl_12(rt, rt, tmp);
+ sp_384_proj_point_dbl_12(rt, rt, tmp);
+ sp_384_proj_point_dbl_12(rt, rt, tmp);
+ sp_384_proj_point_dbl_12(rt, rt, tmp);
+
+ sp_384_proj_point_add_12(rt, rt, &t[y], tmp);
+ }
+
+ if (map != 0) {
+ sp_384_map_12(r, rt, tmp);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 12 * 6);
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+ }
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_point_384) * 16);
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmpd, sizeof(tmpd));
+ ForceZero(td, sizeof(td));
+#endif
+ sp_384_point_free_12(rt, 1, heap);
+
+ return err;
+}
+
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_384 {
+ sp_digit x[12];
+ sp_digit y[12];
+} sp_table_entry_384;
+
+#ifdef FP_ECC
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int n, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*12;
+ sp_digit* b = t + 4*12;
+ sp_digit* t1 = t + 6*12;
+ sp_digit* t2 = t + 8*12;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = p->x;
+ y = p->y;
+ z = p->z;
+
+ /* Y = 2*Y */
+ sp_384_mont_dbl_12(y, y, p384_mod);
+ /* W = Z^4 */
+ sp_384_mont_sqr_12(w, z, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_12(w, w, p384_mod, p384_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+ while (--n > 0)
+#else
+ while (--n >= 0)
+#endif
+ {
+ /* A = 3*(X^2 - W) */
+ sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_12(t1, t1, w, p384_mod);
+ sp_384_mont_tpl_12(a, t1, p384_mod);
+ /* B = X*Y^2 */
+ sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod);
+ /* X = A^2 - 2B */
+ sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_12(t2, b, p384_mod);
+ sp_384_mont_sub_12(x, x, t2, p384_mod);
+ /* Z = Z*Y */
+ sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod);
+ /* t2 = Y^4 */
+ sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+ if (n != 0)
+#endif
+ {
+ /* W = W*Y^4 */
+ sp_384_mont_mul_12(w, w, t1, p384_mod, p384_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_384_mont_sub_12(y, b, x, p384_mod);
+ sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_12(y, y, p384_mod);
+ sp_384_mont_sub_12(y, y, t1, p384_mod);
+ }
+#ifndef WOLFSSL_SP_SMALL
+ /* A = 3*(X^2 - W) */
+ sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_12(t1, t1, w, p384_mod);
+ sp_384_mont_tpl_12(a, t1, p384_mod);
+ /* B = X*Y^2 */
+ sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod);
+ /* X = A^2 - 2B */
+ sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_12(t2, b, p384_mod);
+ sp_384_mont_sub_12(x, x, t2, p384_mod);
+ /* Z = Z*Y */
+ sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod);
+ /* t2 = Y^4 */
+ sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod);
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_384_mont_sub_12(y, b, x, p384_mod);
+ sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_12(y, y, p384_mod);
+ sp_384_mont_sub_12(y, y, t1, p384_mod);
+#endif
+ /* Y = Y/2 */
+ sp_384_div2_12(y, y, p384_mod);
+}
+
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p,
+ const sp_point_384* q, sp_digit* t)
+{
+ const sp_point_384* ap[2];
+ sp_point_384* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*12;
+ sp_digit* t3 = t + 4*12;
+ sp_digit* t4 = t + 6*12;
+ sp_digit* t5 = t + 8*12;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Check double */
+ (void)sp_384_sub_12(t1, p384_mod, q->y);
+ sp_384_norm_12(t1);
+ if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) &
+ (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) {
+ sp_384_proj_point_dbl_12(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_384));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<12; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<12; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<12; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U2 = X2*Z1^2 */
+ sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod);
+ /* H = U2 - X1 */
+ sp_384_mont_sub_12(t2, t2, x, p384_mod);
+ /* R = S2 - Y1 */
+ sp_384_mont_sub_12(t4, t4, y, p384_mod);
+ /* Z3 = H*Z1 */
+ sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod);
+ /* X3 = R^2 - H^3 - 2*X1*H^2 */
+ sp_384_mont_sqr_12(t1, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t3, x, t5, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_12(x, t1, t5, p384_mod);
+ sp_384_mont_dbl_12(t1, t3, p384_mod);
+ sp_384_mont_sub_12(x, x, t1, p384_mod);
+ /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+ sp_384_mont_sub_12(t3, t3, x, p384_mod);
+ sp_384_mont_mul_12(t3, t3, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t5, t5, y, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_12(y, t3, t5, p384_mod);
+ }
+}
+
+#ifdef WOLFSSL_SP_SMALL
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a Point to convert.
+ * t Temporary data.
+ */
+static void sp_384_proj_to_affine_12(sp_point_384* a, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2 * 12;
+ sp_digit* tmp = t + 4 * 12;
+
+ sp_384_mont_inv_12(t1, a->z, tmp);
+
+ sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod);
+
+ sp_384_mont_mul_12(a->x, a->x, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(a->y, a->y, t1, p384_mod, p384_mp_mod);
+ XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod));
+}
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_384_gen_stripe_table_12(const sp_point_384* a,
+ sp_table_entry_384* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 td, s1d, s2d;
+#endif
+ sp_point_384* t;
+ sp_point_384* s1 = NULL;
+ sp_point_384* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_12(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_384_proj_to_affine_12(t, tmp);
+
+ XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<4; i++) {
+ sp_384_proj_point_dbl_n_12(t, 96, tmp);
+ sp_384_proj_to_affine_12(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<4; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_384_proj_point_add_qz1_12(t, s1, s2, tmp);
+ sp_384_proj_to_affine_12(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_384_point_free_12(s2, 0, heap);
+ sp_384_point_free_12(s1, 0, heap);
+ sp_384_point_free_12( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g,
+ const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 rtd;
+ sp_point_384 pd;
+ sp_digit td[2 * 12 * 6];
+#endif
+ sp_point_384* rt;
+ sp_point_384* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_384_point_new_12(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
+ XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
+
+ y = 0;
+ for (j=0,x=95; j<4; j++,x+=96) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=94; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<4; j++,x+=96) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+
+ sp_384_proj_point_dbl_12(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_384_proj_point_add_qz1_12(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_384_map_12(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(p, 0, heap);
+ sp_384_point_free_12(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_384_t {
+ sp_digit x[12];
+ sp_digit y[12];
+ sp_table_entry_384 table[16];
+ uint32_t cnt;
+ int set;
+} sp_cache_384_t;
+
+static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_384_last = -1;
+static THREAD_LS_T int sp_cache_384_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex_384 = 0;
+ static wolfSSL_Mutex sp_cache_384_lock;
+#endif
+
+static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_384_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache_384[i].set = 0;
+ }
+ sp_cache_384_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache_384[i].set)
+ continue;
+
+ if (sp_384_cmp_equal_12(g->x, sp_cache_384[i].x) &
+ sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) {
+ sp_cache_384[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_384_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache_384[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_384_last) {
+ least = sp_cache_384[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache_384[j].cnt < least) {
+ i = j;
+ least = sp_cache_384[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
+ XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
+ sp_cache_384[i].set = 1;
+ sp_cache_384[i].cnt = 1;
+ }
+
+ *cache = &sp_cache_384[i];
+ sp_cache_384_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 12 * 7];
+ sp_cache_384_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_384 == 0) {
+ wc_InitMutex(&sp_cache_384_lock);
+ initCacheMutex_384 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_384_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_384(g, &cache);
+ if (cache->cnt == 2)
+ sp_384_gen_stripe_table_12(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_384_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+ }
+ else {
+ err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#else
+#ifdef FP_ECC
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_384_gen_stripe_table_12(const sp_point_384* a,
+ sp_table_entry_384* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 td, s1d, s2d;
+#endif
+ sp_point_384* t;
+ sp_point_384* s1 = NULL;
+ sp_point_384* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_12(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_384_proj_to_affine_12(t, tmp);
+
+ XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<8; i++) {
+ sp_384_proj_point_dbl_n_12(t, 48, tmp);
+ sp_384_proj_to_affine_12(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<8; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_384_proj_point_add_qz1_12(t, s1, s2, tmp);
+ sp_384_proj_to_affine_12(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_384_point_free_12(s2, 0, heap);
+ sp_384_point_free_12(s1, 0, heap);
+ sp_384_point_free_12( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g,
+ const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 rtd;
+ sp_point_384 pd;
+ sp_digit td[2 * 12 * 6];
+#endif
+ sp_point_384* rt;
+ sp_point_384* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_384_point_new_12(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
+ XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
+
+ y = 0;
+ for (j=0,x=47; j<8; j++,x+=48) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=46; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<8; j++,x+=48) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+
+ sp_384_proj_point_dbl_12(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_384_proj_point_add_qz1_12(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_384_map_12(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(p, 0, heap);
+ sp_384_point_free_12(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_384_t {
+ sp_digit x[12];
+ sp_digit y[12];
+ sp_table_entry_384 table[256];
+ uint32_t cnt;
+ int set;
+} sp_cache_384_t;
+
+static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_384_last = -1;
+static THREAD_LS_T int sp_cache_384_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex_384 = 0;
+ static wolfSSL_Mutex sp_cache_384_lock;
+#endif
+
+static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_384_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache_384[i].set = 0;
+ }
+ sp_cache_384_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache_384[i].set)
+ continue;
+
+ if (sp_384_cmp_equal_12(g->x, sp_cache_384[i].x) &
+ sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) {
+ sp_cache_384[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_384_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache_384[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_384_last) {
+ least = sp_cache_384[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache_384[j].cnt < least) {
+ i = j;
+ least = sp_cache_384[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
+ XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
+ sp_cache_384[i].set = 1;
+ sp_cache_384[i].cnt = 1;
+ }
+
+ *cache = &sp_cache_384[i];
+ sp_cache_384_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 12 * 7];
+ sp_cache_384_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_384 == 0) {
+ wc_InitMutex(&sp_cache_384_lock);
+ initCacheMutex_384 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_384_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_384(g, &cache);
+ if (cache->cnt == 2)
+ sp_384_gen_stripe_table_12(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_384_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+ }
+ else {
+ err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * p Point to multiply.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+ void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[12];
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_384_point_new_12(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 12, km);
+ sp_384_point_from_ecc_point_12(point, gm);
+
+ err = sp_384_ecc_mulmod_12(point, point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_12(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(point, 0, heap);
+
+ return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+static const sp_table_entry_384 p384_table[16] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc,
+ 0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 },
+ { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756,
+ 0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } },
+ /* 2 */
+ { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3,
+ 0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 },
+ { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc,
+ 0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } },
+ /* 3 */
+ { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480,
+ 0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 },
+ { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047,
+ 0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } },
+ /* 4 */
+ { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c,
+ 0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 },
+ { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc,
+ 0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } },
+ /* 5 */
+ { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98,
+ 0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c },
+ { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28,
+ 0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } },
+ /* 6 */
+ { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e,
+ 0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 },
+ { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec,
+ 0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } },
+ /* 7 */
+ { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b,
+ 0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b },
+ { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b,
+ 0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } },
+ /* 8 */
+ { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9,
+ 0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 },
+ { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1,
+ 0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } },
+ /* 9 */
+ { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc,
+ 0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a },
+ { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18,
+ 0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } },
+ /* 10 */
+ { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247,
+ 0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 },
+ { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d,
+ 0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } },
+ /* 11 */
+ { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12,
+ 0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e },
+ { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f,
+ 0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } },
+ /* 12 */
+ { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe,
+ 0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 },
+ { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6,
+ 0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } },
+ /* 13 */
+ { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6,
+ 0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 },
+ { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf,
+ 0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } },
+ /* 14 */
+ { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53,
+ 0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 },
+ { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370,
+ 0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } },
+ /* 15 */
+ { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f,
+ 0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc },
+ { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2,
+ 0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } },
+};
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table,
+ k, map, heap);
+}
+
+#else
+static const sp_table_entry_384 p384_table[256] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc,
+ 0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 },
+ { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756,
+ 0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } },
+ /* 2 */
+ { { 0x2b0c535b,0x29864753,0x70506296,0x90dd6953,0x216ab9ac,0x038cd6b4,
+ 0xbe12d76a,0x3df9b7b7,0x5f347bdb,0x13f4d978,0x13e94489,0x222c5c9c },
+ { 0x2680dc64,0x5f8e796f,0x58352417,0x120e7cb7,0xd10740b8,0x254b5d8a,
+ 0x5337dee6,0xc38b8efb,0x94f02247,0xf688c2e1,0x6c25bc4c,0x7b5c75f3 } },
+ /* 3 */
+ { { 0x9edffea5,0xe26a3cc3,0x37d7e9fc,0x35bbfd1c,0x9bde3ef6,0xf0e7700d,
+ 0x1a538f5a,0x0380eb47,0x05bf9eb3,0x2e9da8bb,0x1a460c3e,0xdbb93c73 },
+ { 0xf526b605,0x37dba260,0xfd785537,0x95d4978e,0xed72a04a,0x24ed793a,
+ 0x76005b1a,0x26948377,0x9e681f82,0x99f557b9,0xd64954ef,0xae5f9557 } },
+ /* 4 */
+ { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3,
+ 0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 },
+ { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc,
+ 0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } },
+ /* 5 */
+ { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480,
+ 0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 },
+ { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047,
+ 0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } },
+ /* 6 */
+ { { 0x4fc52870,0x28f9c07a,0x1a53a961,0xce0b3748,0x0e1828d9,0xd550fa18,
+ 0x6adb225a,0xa24abaf7,0x6e58a348,0xd11ed0a5,0x948acb62,0xf3d811e6 },
+ { 0x4c61ed22,0x8618dd77,0x80b47c9d,0x0bb747f9,0xde6b8559,0x22bf796f,
+ 0x680a21e9,0xfdfd1c6d,0x2af2c9dd,0xc0db1577,0xc1e90f3d,0xa09379e6 } },
+ /* 7 */
+ { { 0xe085c629,0x386c66ef,0x095bc89a,0x5fc2a461,0x203f4b41,0x1353d631,
+ 0x7e4bd8f5,0x7ca1972b,0xa7df8ce9,0xb077380a,0xee7e4ea3,0xd8a90389 },
+ { 0xe7b14461,0x1bc74dc7,0x0c9c4f78,0xdc2cb014,0x84ef0a10,0x52b4b3a6,
+ 0x20327fe2,0xbde6ea5d,0x660f9615,0xb71ec435,0xb8ad8173,0xeede5a04 } },
+ /* 8 */
+ { { 0x893b9a2d,0x5584cbb3,0x00850c5d,0x820c660b,0x7df2d43d,0x4126d826,
+ 0x0109e801,0xdd5bbbf0,0x38172f1c,0x85b92ee3,0xf31430d9,0x609d4f93 },
+ { 0xeadaf9d6,0x1e059a07,0x0f125fb0,0x70e6536c,0x560f20e7,0xd6220751,
+ 0x7aaf3a9a,0xa59489ae,0x64bae14e,0x7b70e2f6,0x76d08249,0x0dd03701 } },
+ /* 9 */
+ { { 0x8510521f,0x4cc13be8,0xf724cc17,0x87315ba9,0x353dc263,0xb49d83bb,
+ 0x0c279257,0x8b677efe,0xc93c9537,0x510a1c1c,0xa4702c99,0x33e30cd8 },
+ { 0x2208353f,0xf0ffc89d,0xced42b2b,0x0170fa8d,0x26e2a5f5,0x090851ed,
+ 0xecb52c96,0x81276455,0x7fe1adf4,0x0646c4e1,0xb0868eab,0x513f047e } },
+ /* 10 */
+ { { 0xdf5bdf53,0xc07611f4,0x58b11a6d,0x45d331a7,0x1c4ee394,0x58965daf,
+ 0x5a5878d1,0xba8bebe7,0x82dd3025,0xaecc0a18,0xa923eb8b,0xcf2a3899 },
+ { 0xd24fd048,0xf98c9281,0x8bbb025d,0x841bfb59,0xc9ab9d53,0xb8ddf8ce,
+ 0x7fef044e,0x538a4cb6,0x23236662,0x092ac21f,0x0b66f065,0xa919d385 } },
+ /* 11 */
+ { { 0x85d480d8,0x3db03b40,0x1b287a7d,0x8cd9f479,0x4a8f3bae,0x8f24dc75,
+ 0x3db41892,0x482eb800,0x9c56e0f5,0x38bf9eb3,0x9a91dc6f,0x8b977320 },
+ { 0x7209cfc2,0xa31b05b2,0x05b2db70,0x4c49bf85,0xd619527b,0x56462498,
+ 0x1fac51ba,0x3fe51039,0xab4b8342,0xfb04f55e,0x04c6eabf,0xc07c10dc } },
+ /* 12 */
+ { { 0xdb32f048,0xad22fe4c,0x475ed6df,0x5f23bf91,0xaa66b6cb,0xa50ce0c0,
+ 0xf03405c0,0xdf627a89,0xf95e2d6a,0x3674837d,0xba42e64e,0x081c95b6 },
+ { 0xe71d6ceb,0xeba3e036,0x6c6b0271,0xb45bcccf,0x0684701d,0x67b47e63,
+ 0xe712523f,0x60f8f942,0x5cd47adc,0x82423472,0x87649cbb,0x83027d79 } },
+ /* 13 */
+ { { 0x3615b0b8,0xb3929ea6,0xa54dac41,0xb41441fd,0xb5b6a368,0x8995d556,
+ 0x167ef05e,0xa80d4529,0x6d25a27f,0xf6bcb4a1,0x7bd55b68,0x210d6a4c },
+ { 0x25351130,0xf3804abb,0x903e37eb,0x1d2df699,0x084c25c8,0x5f201efc,
+ 0xa1c68e91,0x31a28c87,0x563f62a5,0x81dad253,0xd6c415d4,0x5dd6de70 } },
+ /* 14 */
+ { { 0x846612ce,0x29f470fd,0xda18d997,0x986f3eec,0x2f34af86,0x6b84c161,
+ 0x46ddaf8b,0x5ef0a408,0xe49e795f,0x14405a00,0xaa2f7a37,0x5f491b16 },
+ { 0xdb41b38d,0xc7f07ae4,0x18fbfcaa,0xef7d119e,0x14443b19,0x3a18e076,
+ 0x79a19926,0x4356841a,0xe2226fbe,0x91f4a91c,0x3cc88721,0xdc77248c } },
+ /* 15 */
+ { { 0xe4b1ec9d,0xd570ff1a,0xe7eef706,0x21d23e0e,0xca19e086,0x3cde40f4,
+ 0xcd4bb270,0x7d6523c4,0xbf13aa6c,0x16c1f06c,0xd14c4b60,0x5aa7245a },
+ { 0x44b74de8,0x37f81467,0x620a934e,0x839e7a17,0xde8b1aa1,0xf74d14e8,
+ 0xf30d75e2,0x8789fa51,0xc81c261e,0x09b24052,0x33c565ee,0x654e2678 } },
+ /* 16 */
+ { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c,
+ 0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 },
+ { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc,
+ 0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } },
+ /* 17 */
+ { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98,
+ 0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c },
+ { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28,
+ 0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } },
+ /* 18 */
+ { { 0x7d8c1bba,0x7ecbac01,0x90b0f3d5,0x6058f9c3,0xf6197d0f,0xaee116e3,
+ 0x4033b128,0xc4dd7068,0xc209b983,0xf084dba6,0x831dbc4a,0x97c7c2cf },
+ { 0xf96010e8,0x2f4e61dd,0x529faa17,0xd97e4e20,0x69d37f20,0x4ee66660,
+ 0x3d366d72,0xccc139ed,0x13488e0f,0x690b6ee2,0xf3a6d533,0x7cad1dc5 } },
+ /* 19 */
+ { { 0xda57a41f,0x660a9a81,0xec0039b6,0xe74a0412,0x5e1dad15,0x42343c6b,
+ 0x46681d4c,0x284f3ff5,0x63749e89,0xb51087f1,0x6f9f2f13,0x070f23cc },
+ { 0x5d186e14,0x542211da,0xfddb0dff,0x84748f37,0xdb1f4180,0x41a3aab4,
+ 0xa6402d0e,0x25ed667b,0x02f58355,0x2f2924a9,0xfa44a689,0x5844ee7c } },
+ /* 20 */
+ { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e,
+ 0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 },
+ { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec,
+ 0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } },
+ /* 21 */
+ { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b,
+ 0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b },
+ { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b,
+ 0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } },
+ /* 22 */
+ { { 0x104cbba5,0xc023780d,0xfa35dd4c,0x6207e747,0x1ca9b6a3,0x35c23928,
+ 0x97987b10,0x4ff19be8,0x8022eee8,0xb8476bbf,0xd3bbe74d,0xaa0a4a14 },
+ { 0x187d4543,0x20f94331,0x79f6e066,0x32153870,0xac7e82e1,0x83b0f74e,
+ 0x828f06ab,0xa7748ba2,0xc26ef35f,0xc5f0298a,0x8e9a7dbd,0x0f0c5070 } },
+ /* 23 */
+ { { 0xdef029dd,0x0c5c244c,0x850661b8,0x3dabc687,0xfe11d981,0x9992b865,
+ 0x6274dbad,0xe9801b8f,0x098da242,0xe54e6319,0x91a53d08,0x9929a91a },
+ { 0x35285887,0x37bffd72,0xf1418102,0xbc759425,0xfd2e6e20,0x9280cc35,
+ 0xfbc42ee5,0x735c600c,0x8837619a,0xb7ad2864,0xa778c57b,0xa3627231 } },
+ /* 24 */
+ { { 0x91361ed8,0xae799b5c,0x6c63366c,0x47d71b75,0x1b265a6a,0x54cdd521,
+ 0x98d77b74,0xe0215a59,0xbab29db0,0x4424d9b7,0x7fd9e536,0x8b0ffacc },
+ { 0x37b5d9ef,0x46d85d12,0xbfa91747,0x5b106d62,0x5f99ba2d,0xed0479f8,
+ 0x1d104de4,0x0e6f3923,0x25e8983f,0x83a84c84,0xf8105a70,0xa9507e0a } },
+ /* 25 */
+ { { 0x14cf381c,0xf6c68a6e,0xc22e31cc,0xaf9d27bd,0xaa8a5ccb,0x23568d4d,
+ 0xe338e4d2,0xe431eec0,0x8f52ad1f,0xf1a828fe,0xe86acd80,0xdb6a0579 },
+ { 0x4507832a,0x2885672e,0x887e5289,0x73fc275f,0x05610d08,0x65f80278,
+ 0x075ff5b0,0x8d9b4554,0x09f712b5,0x3a8e8fb1,0x2ebe9cf2,0x39f0ac86 } },
+ /* 26 */
+ { { 0x4c52edf5,0xd8fabf78,0xa589ae53,0xdcd737e5,0xd791ab17,0x94918bf0,
+ 0xbcff06c9,0xb5fbd956,0xdca46d45,0xf6d3032e,0x41a3e486,0x2cdff7e1 },
+ { 0x61f47ec8,0x6674b3ba,0xeef84608,0x8a882163,0x4c687f90,0xa257c705,
+ 0xf6cdf227,0xe30cb2ed,0x7f6ea846,0x2c4c64ca,0xcc6bcd3c,0x186fa17c } },
+ /* 27 */
+ { { 0x1dfcb91e,0x48a3f536,0x646d358a,0x83595e13,0x91128798,0xbd15827b,
+ 0x2187757a,0x3ce612b8,0x61bd7372,0x873150a1,0xb662f568,0xf4684530 },
+ { 0x401896f6,0x8833950b,0x77f3e090,0xe11cb89a,0x48e7f4a5,0xb2f12cac,
+ 0xf606677e,0x313dd769,0x16579f93,0xfdcf08b3,0x46b8f22b,0x6429cec9 } },
+ /* 28 */
+ { { 0xbb75f9a4,0x4984dd54,0x29d3b570,0x4aef06b9,0x3d6e4c1e,0xb5f84ca2,
+ 0xb083ef35,0x24c61c11,0x392ca9ff,0xce4a7392,0x6730a800,0x865d6517 },
+ { 0x722b4a2b,0xca3dfe76,0x7b083e0e,0x12c04bf9,0x1b86b8a5,0x803ce5b5,
+ 0x6a7e3e0c,0x3fc7632d,0xc81adbe4,0xc89970c2,0x120e16b1,0x3cbcd3ad } },
+ /* 29 */
+ { { 0xec30ce93,0xfbfb4cc7,0xb72720a2,0x10ed6c7d,0x47b55500,0xec675bf7,
+ 0x333ff7c3,0x90725903,0x5075bfc0,0xc7c3973e,0x07acf31b,0xb049ecb0 },
+ { 0x4f58839c,0xb4076eaf,0xa2b05e4f,0x101896da,0xab40c66e,0x3f6033b0,
+ 0xc8d864ba,0x19ee9eeb,0x47bf6d2a,0xeb6cf155,0xf826477d,0x8e5a9663 } },
+ /* 30 */
+ { { 0xf7fbd5e1,0x69e62fdd,0x76912b1d,0x38ecfe54,0xd1da3bfb,0x845a3d56,
+ 0x1c86f0d4,0x0494950e,0x3bc36ce8,0x83cadbf9,0x4fccc8d1,0x41fce572 },
+ { 0x8332c144,0x05f939c2,0x0871e46e,0xb17f248b,0x66e8aff6,0x3d8534e2,
+ 0x3b85c629,0x1d06f1dc,0xa3131b73,0xdb06a32e,0x8b3f64e5,0xf295184d } },
+ /* 31 */
+ { { 0x36ddc103,0xd9653ff7,0x95ef606f,0x25f43e37,0xfe06dce8,0x09e301fc,
+ 0x30b6eebf,0x85af2341,0x0ff56b20,0x79b12b53,0xfe9a3c6b,0x9b4fb499 },
+ { 0x51d27ac2,0x0154f892,0x56ca5389,0xd33167e3,0xafc065a6,0x7828ec1f,
+ 0x7f746c9b,0x0959a258,0x0c44f837,0xb18f1be3,0xc4132fdb,0xa7946117 } },
+ /* 32 */
+ { { 0x5e3c647b,0xc0426b77,0x8cf05348,0xbfcbd939,0x172c0d3d,0x31d312e3,
+ 0xee754737,0x5f49fde6,0x6da7ee61,0x895530f0,0xe8b3a5fb,0xcf281b0a },
+ { 0x41b8a543,0xfd149735,0x3080dd30,0x41a625a7,0x653908cf,0xe2baae07,
+ 0xba02a278,0xc3d01436,0x7b21b8f8,0xa0d0222e,0xd7ec1297,0xfdc270e9 } },
+ /* 33 */
+ { { 0xbc7f41d6,0x00873c0c,0x1b7ad641,0xd976113e,0x238443fb,0x2a536ff4,
+ 0x41e62e45,0x030d00e2,0x5f545fc6,0x532e9867,0x8e91208c,0xcd033108 },
+ { 0x9797612c,0xd1a04c99,0xeea674e2,0xd4393e02,0xe19742a1,0xd56fa69e,
+ 0x85f0590e,0xdd2ab480,0x48a2243d,0xa5cefc52,0x54383f41,0x48cc67b6 } },
+ /* 34 */
+ { { 0xfc14ab48,0x4e50430e,0x26706a74,0x195b7f4f,0xcc881ff6,0x2fe8a228,
+ 0xd945013d,0xb1b968e2,0x4b92162b,0x936aa579,0x364e754a,0x4fb766b7 },
+ { 0x31e1ff7f,0x13f93bca,0xce4f2691,0x696eb5ca,0xa2b09e02,0xff754bf8,
+ 0xe58e3ff8,0x58f13c9c,0x1678c0b0,0xb757346f,0xa86692b3,0xd54200db } },
+ /* 35 */
+ { { 0x6dda1265,0x9a030bbd,0xe89718dd,0xf7b4f3fc,0x936065b8,0xa6a4931f,
+ 0x5f72241c,0xbce72d87,0x65775857,0x6cbb51cb,0x4e993675,0xc7161815 },
+ { 0x2ee32189,0xe81a0f79,0x277dc0b2,0xef2fab26,0xb71f469f,0x9e64f6fe,
+ 0xdfdaf859,0xb448ce33,0xbe6b5df1,0x3f5c1c4c,0x1de45f7b,0xfb8dfb00 } },
+ /* 36 */
+ { { 0x4d5bb921,0xc7345fa7,0x4d2b667e,0x5c7e04be,0x282d7a3e,0x47ed3a80,
+ 0x7e47b2a4,0x5c2777f8,0x08488e2e,0x89b3b100,0xb2eb5b45,0x9aad77c2 },
+ { 0xdaac34ae,0xd681bca7,0x26afb326,0x2452e4e5,0x41a1ee14,0x0c887924,
+ 0xc2407ade,0x743b04d4,0xfc17a2ac,0xcb5e999b,0x4a701a06,0x4dca2f82 } },
+ /* 37 */
+ { { 0x1127bc1a,0x68e31ca6,0x17ead3be,0xa3edd59b,0xe25f5a15,0x67b6b645,
+ 0xa420e15e,0x76221794,0x4b1e872e,0x794fd83b,0xb2dece1b,0x7cab3f03 },
+ { 0xca9b3586,0x7119bf15,0x4d250bd7,0xa5545924,0xcc6bcf24,0x173633ea,
+ 0xb1b6f884,0x9bd308c2,0x447d38c3,0x3bae06f5,0xf341fe1c,0x54dcc135 } },
+ /* 38 */
+ { { 0x943caf0d,0x56d3598d,0x225ff133,0xce044ea9,0x563fadea,0x9edf6a7c,
+ 0x73e8dc27,0x632eb944,0x3190dcab,0x814b467e,0x6dbb1e31,0x2d4f4f31 },
+ { 0xa143b7ca,0x8d69811c,0xde7cf950,0x4ec1ac32,0x37b5fe82,0x223ab5fd,
+ 0x9390f1d9,0xe82616e4,0x75804610,0xabff4b20,0x875b08f0,0x11b9be15 } },
+ /* 39 */
+ { { 0x3bbe682c,0x4ae31a3d,0x74eef2dd,0xbc7c5d26,0x3c47dd40,0x92afd10a,
+ 0xc14ab9e1,0xec7e0a3b,0xb2e495e4,0x6a6c3dd1,0x309bcd85,0x085ee5e9 },
+ { 0x8c2e67fd,0xf381a908,0xe261eaf2,0x32083a80,0x96deee15,0x0fcd6a49,
+ 0x5e524c79,0xe3b8fb03,0x1d5b08b9,0x8dc360d9,0x7f26719f,0x3a06e2c8 } },
+ /* 40 */
+ { { 0x7237cac0,0x5cd9f5a8,0x43586794,0x93f0b59d,0xe94f6c4e,0x4384a764,
+ 0xb62782d3,0x8304ed2b,0xcde06015,0x0b8db8b3,0x5dbe190f,0x4336dd53 },
+ { 0x92ab473a,0x57443553,0xbe5ed046,0x031c7275,0x21909aa4,0x3e78678c,
+ 0x99202ddb,0x4ab7e04f,0x6977e635,0x2648d206,0x093198be,0xd427d184 } },
+ /* 41 */
+ { { 0x0f9b5a31,0x822848f5,0xbaadb62a,0xbb003468,0x3357559c,0x233a0472,
+ 0x79aee843,0x49ef6880,0xaeb9e1e3,0xa89867a0,0x1f6f9a55,0xc151931b },
+ { 0xad74251e,0xd264eb0b,0x4abf295e,0x37b9b263,0x04960d10,0xb600921b,
+ 0x4da77dc0,0x0de53dbc,0xd2b18697,0x01d9bab3,0xf7156ddf,0xad54ec7a } },
+ /* 42 */
+ { { 0x79efdc58,0x8e74dc35,0x4ff68ddb,0x456bd369,0xd32096a5,0x724e74cc,
+ 0x386783d0,0xe41cff42,0x7c70d8a4,0xa04c7f21,0xe61a19a2,0x41199d2f },
+ { 0x29c05dd2,0xd389a3e0,0xe7e3fda9,0x535f2a6b,0x7c2b4df8,0x26ecf72d,
+ 0xfe745294,0x678275f4,0x9d23f519,0x6319c9cc,0x88048fc4,0x1e05a02d } },
+ /* 43 */
+ { { 0xd4d5ffe8,0x75cc8e2e,0xdbea17f2,0xf8bb4896,0xcee3cb4a,0x35059790,
+ 0xa47c6165,0x4c06ee85,0x92935d2f,0xf98fff25,0x32ffd7c7,0x34c4a572 },
+ { 0xea0376a2,0xc4b14806,0x4f115e02,0x2ea5e750,0x1e55d7c0,0x532d76e2,
+ 0xf31044da,0x68dc9411,0x71b77993,0x9272e465,0x93a8cfd5,0xadaa38bb } },
+ /* 44 */
+ { { 0x7d4ed72a,0x4bf0c712,0xba1f79a3,0xda0e9264,0xf4c39ea4,0x48c0258b,
+ 0x2a715138,0xa5394ed8,0xbf06c660,0x4af511ce,0xec5c37cd,0xfcebceef },
+ { 0x779ae8c1,0xf23b75aa,0xad1e606e,0xdeff59cc,0x22755c82,0xf3f526fd,
+ 0xbb32cefd,0x64c5ab44,0x915bdefd,0xa96e11a2,0x1143813e,0xab19746a } },
+ /* 45 */
+ { { 0xec837d7d,0x43c78585,0xb8ee0ba4,0xca5b6fbc,0xd5dbb5ee,0x34e924d9,
+ 0xbb4f1ca5,0x3f4fa104,0x398640f7,0x15458b72,0xd7f407ea,0x4231faa9 },
+ { 0xf96e6896,0x53e0661e,0xd03b0f9d,0x554e4c69,0x9c7858d1,0xd4fcb07b,
+ 0x52cb04fa,0x7e952793,0x8974e7f7,0x5f5f1574,0x6b6d57c8,0x2e3fa558 } },
+ /* 46 */
+ { { 0x6a9951a8,0x42cd4803,0x42792ad0,0xa8b15b88,0xabb29a73,0x18e8bcf9,
+ 0x409933e8,0xbfd9a092,0xefb88dc4,0x760a3594,0x40724458,0x14418863 },
+ { 0x99caedc7,0x162a56ee,0x91d101c9,0x8fb12ecd,0x393202da,0xea671967,
+ 0xa4ccd796,0x1aac8c4a,0x1cf185a8,0x7db05036,0x8cfd095a,0x0c9f86cd } },
+ /* 47 */
+ { { 0x10b2a556,0x9a728147,0x327b70b2,0x767ca964,0x5e3799b7,0x04ed9e12,
+ 0x22a3eb2a,0x6781d2dc,0x0d9450ac,0x5bd116eb,0xa7ebe08a,0xeccac1fc },
+ { 0xdc2d6e94,0xde68444f,0x35ecf21b,0x3621f429,0x29e03a2c,0x14e2d543,
+ 0x7d3e7f0a,0x53e42cd5,0x73ed00b9,0xbba26c09,0xc57d2272,0x00297c39 } },
+ /* 48 */
+ { { 0xb8243a7d,0x3aaaab10,0x8fa58c5b,0x6eeef93e,0x9ae7f764,0xf866fca3,
+ 0x61ab04d3,0x64105a26,0x03945d66,0xa3578d8a,0x791b848c,0xb08cd3e4 },
+ { 0x756d2411,0x45edc5f8,0xa755128c,0xd4a790d9,0x49e5f6a0,0xc2cf0963,
+ 0xf649beaa,0xc66d267d,0x8467039e,0x3ce6d968,0x42f7816f,0x50046c6b } },
+ /* 49 */
+ { { 0x66425043,0x92ae1602,0xf08db890,0x1ff66afd,0x8f162ce5,0x386f5a7f,
+ 0xfcf5598f,0x18d2dea0,0x1a8ca18e,0x78372b3a,0x8cd0e6f7,0xdf0d20eb },
+ { 0x75bb4045,0x7edd5e1d,0xb96d94b7,0x252a47ce,0x2c626776,0xbdb29358,
+ 0x40dd1031,0x853c3943,0x7d5f47fd,0x9dc9becf,0xbae4044a,0x27c2302f } },
+ /* 50 */
+ { { 0x8f2d49ce,0x2d1d208a,0x162df0a2,0x0d91aa02,0x09a07f65,0x9c5cce87,
+ 0x84339012,0xdf07238b,0x419442cd,0x5028e2c8,0x72062aba,0x2dcbd358 },
+ { 0xe4680967,0xb5fbc3cb,0x9f92d72c,0x2a7bc645,0x116c369d,0x806c76e1,
+ 0x3177e8d8,0x5c50677a,0x4569df57,0x753739eb,0x36c3f40b,0x2d481ef6 } },
+ /* 51 */
+ { { 0xfea1103e,0x1a2d39fd,0x95f81b17,0xeaae5592,0xf59b264a,0xdbd0aa18,
+ 0xcb592ee0,0x90c39c1a,0x9750cca3,0xdf62f80d,0xdf97cc6c,0xda4d8283 },
+ { 0x1e201067,0x0a6dd346,0x69fb1f6b,0x1531f859,0x1d60121f,0x4895e552,
+ 0x4c041c91,0x0b21aab0,0xbcc1ccf8,0x9d896c46,0x3141bde7,0xd24da3b3 } },
+ /* 52 */
+ { { 0x53b0a354,0x575a0537,0x0c6ddcd8,0x392ff2f4,0x56157b94,0x0b8e8cff,
+ 0x3b1b80d1,0x073e57bd,0x3fedee15,0x2a75e0f0,0xaa8e6f19,0x752380e4 },
+ { 0x6558ffe9,0x1f4e227c,0x19ec5415,0x3a348618,0xf7997085,0xab382d5e,
+ 0xddc46ac2,0x5e6deaff,0xfc8d094c,0xe5144078,0xf60e37c6,0xf674fe51 } },
+ /* 53 */
+ { { 0xaf63408f,0x6fb87ae5,0xcd75a737,0xa39c36a9,0xcf4c618d,0x7833313f,
+ 0xf034c88d,0xfbcd4482,0x39b35288,0x4469a761,0x66b5d9c9,0x77a711c5 },
+ { 0x944f8d65,0x4a695dc7,0x161aaba8,0xe6da5f65,0x24601669,0x8654e9c3,
+ 0x28ae7491,0xbc8b93f5,0x8f5580d8,0x5f1d1e83,0xcea32cc8,0x8ccf9a1a } },
+ /* 54 */
+ { { 0x7196fee2,0x28ab110c,0x874c8945,0x75799d63,0x29aedadd,0xa2629348,
+ 0x2be88ff4,0x9714cc7b,0xd58d60d6,0xf71293cf,0x32a564e9,0xda6b6cb3 },
+ { 0x3dd821c2,0xf43fddb1,0x90dd323d,0xf2f2785f,0x048489f8,0x91246419,
+ 0xd24c6749,0x61660f26,0xc803c15c,0x961d9e8c,0xfaadc4c9,0x631c6158 } },
+ /* 55 */
+ { { 0xfd752366,0xacf2ebe0,0x139be88b,0xb93c340e,0x0f20179e,0x98f66485,
+ 0xff1da785,0x14820254,0x4f85c16e,0x5278e276,0x7aab1913,0xa246ee45 },
+ { 0x53763b33,0x43861eb4,0x45c0bc0d,0xc49f03fc,0xad6b1ea1,0xafff16bc,
+ 0x6fd49c99,0xce33908b,0xf7fde8c3,0x5c51e9bf,0xff142c5e,0x076a7a39 } },
+ /* 56 */
+ { { 0x9e338d10,0x04639dfe,0xf42b411b,0x8ee6996f,0xa875cef2,0x960461d1,
+ 0x95b4d0ba,0x1057b6d6,0xa906e0bc,0x27639252,0xe1c20f8a,0x2c19f09a },
+ { 0xeef4c43d,0x5b8fc3f0,0x07a84aa9,0xe2e1b1a8,0x835d2bdb,0x5f455528,
+ 0x207132dd,0x0f4aee4d,0x3907f675,0xe9f8338c,0x0e0531f0,0x7a874dc9 } },
+ /* 57 */
+ { { 0x97c27050,0x84b22d45,0x59e70bf8,0xbd0b8df7,0x79738b9b,0xb4d67405,
+ 0xcd917c4f,0x47f4d5f5,0x13ce6e33,0x9099c4ce,0x521d0f8b,0x942bfd39 },
+ { 0xa43b566d,0x5028f0f6,0x21bff7de,0xaf6e8669,0xc44232cd,0x83f6f856,
+ 0xf915069a,0x65680579,0xecfecb85,0xd12095a2,0xdb01ba16,0xcf7f06ae } },
+ /* 58 */
+ { { 0x8ef96c80,0x0f56e3c4,0x3ddb609c,0xd521f2b3,0x7dc1450d,0x2be94102,
+ 0x02a91fe2,0x2d21a071,0x1efa37de,0x2e6f74fa,0x156c28a1,0x9a9a90b8 },
+ { 0x9dc7dfcb,0xc54ea9ea,0x2c2c1d62,0xc74e66fc,0x49d3e067,0x9f23f967,
+ 0x54dd38ad,0x1c7c3a46,0x5946cee3,0xc7005884,0x45cc045d,0x89856368 } },
+ /* 59 */
+ { { 0xfce73946,0x29da7cd4,0x23168563,0x8f697db5,0xcba92ec6,0x8e235e9c,
+ 0x9f91d3ea,0x55d4655f,0xaa50a6cd,0xf3689f23,0x21e6a1a0,0xdcf21c26 },
+ { 0x61b818bf,0xcffbc82e,0xda47a243,0xc74a2f96,0x8bc1a0cf,0x234e980a,
+ 0x7929cb6d,0xf35fd6b5,0xefe17d6c,0x81468e12,0x58b2dafb,0xddea6ae5 } },
+ /* 60 */
+ { { 0x7e787b2e,0x294de887,0x39a9310d,0x258acc1f,0xac14265d,0x92d9714a,
+ 0x708b48a0,0x18b5591c,0xe1abbf71,0x27cc6bb0,0x568307b9,0xc0581fa3 },
+ { 0xf24d4d58,0x9e0f58a3,0xe0ce2327,0xfebe9bb8,0x9d1be702,0x91fd6a41,
+ 0xfacac993,0x9a7d8a45,0x9e50d66d,0xabc0a08c,0x06498201,0x02c342f7 } },
+ /* 61 */
+ { { 0x157bdbc2,0xccd71407,0xad0e1605,0x72fa89c6,0xb92a015f,0xb1d3da2b,
+ 0xa0a3fe56,0x8ad9e7cd,0x24f06737,0x160edcbd,0x61275be6,0x79d4db33 },
+ { 0x5f3497c4,0xd3d31fd9,0x04192fb0,0x8cafeaee,0x13a50af3,0xe13ca745,
+ 0x8c85aae5,0x18826167,0x9eb556ff,0xce06cea8,0xbdb549f3,0x2eef1995 } },
+ /* 62 */
+ { { 0x50596edc,0x8ed7d3eb,0x905243a2,0xaa359362,0xa4b6d02b,0xa212c2c2,
+ 0xc4fbec68,0x611fd727,0xb84f733d,0x8a0b8ff7,0x5f0daf0e,0xd85a6b90 },
+ { 0xd4091cf7,0x60e899f5,0x2eff2768,0x4fef2b67,0x10c33964,0xc1f195cb,
+ 0x93626a8f,0x8275d369,0x0d6c840a,0xc77904f4,0x7a868acd,0x88d8b7fd } },
+ /* 63 */
+ { { 0x7bd98425,0x85f23723,0xc70b154e,0xd4463992,0x96687a2e,0xcbb00ee2,
+ 0xc83214fd,0x905fdbf7,0x13593684,0x2019d293,0xef51218e,0x0428c393 },
+ { 0x981e909a,0x40c7623f,0x7be192da,0x92513385,0x4010907e,0x48fe480f,
+ 0x3120b459,0xdd7a187c,0xa1fd8f3c,0xc9d7702d,0xe358efc5,0x66e4753b } },
+ /* 64 */
+ { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9,
+ 0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 },
+ { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1,
+ 0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } },
+ /* 65 */
+ { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc,
+ 0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a },
+ { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18,
+ 0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } },
+ /* 66 */
+ { { 0xc6a2123f,0xb1a358f5,0xfe28df6d,0x927b2d95,0xf199d2f9,0x89702753,
+ 0x1a3f82dc,0x0a73754c,0x777affe1,0x063d029d,0xdae6d34d,0x5439817e },
+ { 0x6b8b83c4,0xf7979eef,0x9d945682,0x615cb214,0xc5e57eae,0x8f0e4fac,
+ 0x113047dd,0x042b89b8,0x93f36508,0x888356dc,0x5fd1f32f,0xbf008d18 } },
+ /* 67 */
+ { { 0x4e8068db,0x8012aa24,0xa5729a47,0xc72cc641,0x43f0691d,0x3c33df2c,
+ 0x1d92145f,0xfa057347,0xb97f7946,0xaefc0f2f,0x2f8121bf,0x813d75cb },
+ { 0x4383bba6,0x05613c72,0xa4224b3f,0xa924ce70,0x5f2179a6,0xe59cecbe,
+ 0x79f62b61,0x78e2e8aa,0x53ad8079,0x3ac2cc3b,0xd8f4fa96,0x55518d71 } },
+ /* 68 */
+ { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247,
+ 0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 },
+ { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d,
+ 0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } },
+ /* 69 */
+ { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12,
+ 0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e },
+ { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f,
+ 0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } },
+ /* 70 */
+ { { 0xb0ab9645,0xb5e405d3,0xd5f1f711,0xaeec7f98,0x585c2a6e,0x8ad42311,
+ 0x512c6944,0x045acb9e,0xa90db1c6,0xae106c4e,0x898e6563,0xb89f33d5 },
+ { 0x7fed2ce4,0x43b07cd9,0xdd815b20,0xf9934e17,0x0a81a349,0x6778d4d5,
+ 0x52918061,0x9e616ade,0xd7e67112,0xfa06db06,0x88488091,0x1da23cf1 } },
+ /* 71 */
+ { { 0x42f2c4b5,0x821c46b3,0x66059e47,0x931513ef,0x66f50cd1,0x7030ae43,
+ 0x43e7b127,0x43b536c9,0x5fca5360,0x006258cf,0x6b557abf,0xe4e3ee79 },
+ { 0x24c8b22f,0xbb6b3900,0xfcbf1054,0x2eb5e2c1,0x567492af,0x937b18c9,
+ 0xacf53957,0xf09432e4,0x1dbf3a56,0x585f5a9d,0xbe0887cf,0xf86751fd } },
+ /* 72 */
+ { { 0x9d10e0b2,0x157399cb,0x60dc51b7,0x1c0d5956,0x1f583090,0x1d496b8a,
+ 0x88590484,0x6658bc26,0x03213f28,0x88c08ab7,0x7ae58de4,0x8d2e0f73 },
+ { 0x486cfee6,0x9b79bc95,0xe9e5bc57,0x036a26c7,0xcd8ae97a,0x1ad03601,
+ 0xff3a0494,0x06907f87,0x2c7eb584,0x078f4bbf,0x7e8d0a5a,0xe3731bf5 } },
+ /* 73 */
+ { { 0xe1cd0abe,0x72f2282b,0x87efefa2,0xd4f9015e,0x6c3834bd,0x9d189806,
+ 0xb8a29ced,0x9c8cdcc1,0xfee82ebc,0x0601b9f4,0x7206a756,0x371052bc },
+ { 0x46f32562,0x76fa1092,0x17351bb4,0xdaad534c,0xb3636bb5,0xc3d64c37,
+ 0x45d54e00,0x038a8c51,0x32c09e7c,0x301e6180,0x95735151,0x9764eae7 } },
+ /* 74 */
+ { { 0xcbd5256a,0x8791b19f,0x6ca13a3b,0x4007e0f2,0x4cf06904,0x03b79460,
+ 0xb6c17589,0xb18a9c22,0x81d45908,0xa1cb7d7d,0x21bb68f1,0x6e13fa9d },
+ { 0xa71e6e16,0x47183c62,0xe18749ed,0x5cf0ef8e,0x2e5ed409,0x2c9c7f9b,
+ 0xe6e117e1,0x042eeacc,0x13fb5a7f,0xb86d4816,0xc9e5feb1,0xea1cf0ed } },
+ /* 75 */
+ { { 0xcea4cc9b,0x6e6573c9,0xafcec8f3,0x5417961d,0xa438b6f6,0x804bf02a,
+ 0xdcd4ea88,0xb894b03c,0x3799571f,0xd0f807e9,0x862156e8,0x3466a7f5 },
+ { 0x56515664,0x51e59acd,0xa3c5eb0b,0x55b0f93c,0x6a4279db,0x84a06b02,
+ 0xc5fae08e,0x5c850579,0xa663a1a2,0xcf07b8db,0xf46ffc8d,0x49a36bbc } },
+ /* 76 */
+ { { 0x46d93106,0xe47f5acc,0xaa897c9c,0x65b7ade0,0x12d7e4be,0x37cf4c94,
+ 0xd4b2caa9,0xa2ae9b80,0xe60357a3,0x5e7ce09c,0xc8ecd5f9,0x29f77667 },
+ { 0xa8a0b1c5,0xdf6868f5,0x62978ad8,0x240858cf,0xdc0002a1,0x0f7ac101,
+ 0xffe9aa05,0x1d28a9d7,0x5b962c97,0x744984d6,0x3d28c8b2,0xa8a7c00b } },
+ /* 77 */
+ { { 0xae11a338,0x7c58a852,0xd1af96e7,0xa78613f1,0x5355cc73,0x7e9767d2,
+ 0x792a2de6,0x6ba37009,0x124386b2,0x7d60f618,0x11157674,0xab09b531 },
+ { 0x98eb9dd0,0x95a04841,0x15070328,0xe6c17acc,0x489c6e49,0xafc6da45,
+ 0xbb211530,0xab45a60a,0x7d7ea933,0xc58d6592,0x095642c6,0xa3ef3c65 } },
+ /* 78 */
+ { { 0xdf010879,0x89d420e9,0x39576179,0x9d25255d,0xe39513b6,0x9cdefd50,
+ 0xd5d1c313,0xe4efe45b,0x3f7af771,0xc0149de7,0x340ab06b,0x55a6b4f4 },
+ { 0xebeaf771,0xf1325251,0x878d4288,0x2ab44128,0x18e05afe,0xfcd5832e,
+ 0xcc1fb62b,0xef52a348,0xc1c4792a,0x2bd08274,0x877c6dc7,0x345c5846 } },
+ /* 79 */
+ { { 0xbea65e90,0xde15ceb0,0x2416d99c,0x0987f72b,0xfd863dec,0x44db578d,
+ 0xac6a3578,0xf617b74b,0xdb48e999,0x9e62bd7a,0xeab1a1be,0x877cae61 },
+ { 0x3a358610,0x23adddaa,0x325e2b07,0x2fc4d6d1,0x1585754e,0x897198f5,
+ 0xb392b584,0xf741852c,0xb55f7de1,0x9927804c,0x1aa8efae,0xe9e6c4ed } },
+ /* 80 */
+ { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe,
+ 0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 },
+ { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6,
+ 0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } },
+ /* 81 */
+ { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6,
+ 0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 },
+ { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf,
+ 0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } },
+ /* 82 */
+ { { 0xf8e60f5b,0x511188b4,0x48aa2ada,0x7fe67015,0x381abca2,0xdb333cb8,
+ 0xdaf3fc97,0xb15e6d9d,0x36aabc03,0x4b24f6eb,0x72a748b4,0xc59789df },
+ { 0x29cf5279,0x26fcb8a5,0x01ad9a6c,0x7a3c6bfc,0x4b8bac9b,0x866cf88d,
+ 0x9c80d041,0xf4c89989,0x70add148,0xf0a04241,0x45d81a41,0x5a02f479 } },
+ /* 83 */
+ { { 0xc1c90202,0xfa5c877c,0xf8ac7570,0xd099d440,0xd17881f7,0x428a5b1b,
+ 0x5b2501d7,0x61e267db,0xf2e4465b,0xf889bf04,0x76aa4cb8,0x4da3ae08 },
+ { 0xe3e66861,0x3ef0fe26,0x3318b86d,0x5e772953,0x747396df,0xc3c35fbc,
+ 0x439ffd37,0x5115a29c,0xb2d70374,0xbfc4bd97,0x56246b9d,0x088630ea } },
+ /* 84 */
+ { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53,
+ 0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 },
+ { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370,
+ 0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } },
+ /* 85 */
+ { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f,
+ 0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc },
+ { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2,
+ 0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } },
+ /* 86 */
+ { { 0xfecf5b9b,0x34e0f9d1,0xf206966a,0xa115b12b,0x1eaa0534,0x5591cf3b,
+ 0xfb1558f9,0x5f0293cb,0x1bc703a5,0x1c8507a4,0x862c1f81,0x92e6b81c },
+ { 0xcdaf24e3,0xcc9ebc66,0x72fcfc70,0x68917ecd,0x8157ba48,0x6dc9a930,
+ 0xb06ab2b2,0x5d425c08,0x36e929c4,0x362f8ce7,0x62e89324,0x09f6f57c } },
+ /* 87 */
+ { { 0xd29375fb,0x1c7d6b78,0xe35d1157,0xfabd851e,0x4243ea47,0xf6f62dcd,
+ 0x8fe30b0f,0x1dd92460,0xffc6e709,0x08166dfa,0x0881e6a7,0xc6c4c693 },
+ { 0xd6a53fb0,0x20368f87,0x9eb4d1f9,0x38718e9f,0xafd7e790,0x03f08acd,
+ 0x72fe2a1c,0x0835eb44,0x88076e5d,0x7e050903,0xa638e731,0x538f765e } },
+ /* 88 */
+ { { 0xc2663b4b,0x0e0249d9,0x47cd38dd,0xe700ab5b,0x2c46559f,0xb192559d,
+ 0x4bcde66d,0x8f9f74a8,0x3e2aced5,0xad161523,0x3dd03a5b,0xc155c047 },
+ { 0x3be454eb,0x346a8799,0x83b7dccd,0x66ee94db,0xab9d2abe,0x1f6d8378,
+ 0x7733f355,0x4a396dd2,0xf53553c2,0x419bd40a,0x731dd943,0xd0ead98d } },
+ /* 89 */
+ { { 0xec142408,0x908e0b0e,0x4114b310,0x98943cb9,0x1742b1d7,0x03dbf7d8,
+ 0x693412f4,0xd270df6b,0x8f69e20c,0xc5065494,0x697e43a1,0xa76a90c3 },
+ { 0x4624825a,0xe0fa3384,0x8acc34c2,0x82e48c0b,0xe9a14f2b,0x7b24bd14,
+ 0x4db30803,0x4f5dd5e2,0x932da0a3,0x0c77a9e7,0x74c653dc,0x20db90f2 } },
+ /* 90 */
+ { { 0x0e6c5fd9,0x261179b7,0x6c982eea,0xf8bec123,0xd4957b7e,0x47683338,
+ 0x0a72f66a,0xcc47e664,0x1bad9350,0xbd54bf6a,0xf454e95a,0xdfbf4c6a },
+ { 0x6907f4fa,0x3f7a7afa,0x865ca735,0x7311fae0,0x2a496ada,0x24737ab8,
+ 0x15feb79b,0x13e425f1,0xa1b93c21,0xe9e97c50,0x4ddd3eb5,0xb26b6eac } },
+ /* 91 */
+ { { 0x2a2e5f2b,0x81cab9f5,0xbf385ac4,0xf93caf29,0xc909963a,0xf4bf35c3,
+ 0x74c9143c,0x081e7300,0xc281b4c5,0x3ea57fa8,0x9b340741,0xe497905c },
+ { 0x55ab3cfb,0xf556dd8a,0x518db6ad,0xd444b96b,0x5ef4b955,0x34f5425a,
+ 0xecd26aa3,0xdda7a3ac,0xda655e97,0xb57da11b,0xc2024c70,0x02da3eff } },
+ /* 92 */
+ { { 0x6481d0d9,0xe24b0036,0x818fdfe2,0x3740dbe5,0x190fda00,0xc1fc1f45,
+ 0x3cf27fde,0x329c9280,0x6934f43e,0x7435cb53,0x7884e8fe,0x2b505a5d },
+ { 0x711adcc9,0x6cfcc6a6,0x531e21e1,0xf034325c,0x9b2a8a99,0xa2f4a967,
+ 0x3c21bdff,0x9d5f3842,0x31b57d66,0xb25c7811,0x0b8093b9,0xdb5344d8 } },
+ /* 93 */
+ { { 0xae50a2f5,0x0d72e667,0xe4a861d1,0x9b7f8d8a,0x330df1cb,0xa129f70f,
+ 0xe04fefc3,0xe90aa5d7,0xe72c3ae1,0xff561ecb,0xcdb955fa,0x0d8fb428 },
+ { 0xd7663784,0xd2235f73,0x7e2c456a,0xc05baec6,0x2adbfccc,0xe5c292e4,
+ 0xefb110d5,0x4fd17988,0xd19d49f3,0x27e57734,0x84f679fe,0x188ac4ce } },
+ /* 94 */
+ { { 0xa796c53e,0x7ee344cf,0x0868009b,0xbbf6074d,0x474a1295,0x1f1594f7,
+ 0xac11632d,0x66776edc,0x04e2fa5a,0x1862278b,0xc854a89a,0x52665cf2 },
+ { 0x8104ab58,0x7e376464,0x7204fd6d,0x16775913,0x44ea1199,0x86ca06a5,
+ 0x1c9240dd,0xaa3f765b,0x24746149,0x5f8501a9,0xdcd251d7,0x7b982e30 } },
+ /* 95 */
+ { { 0xc15f3060,0xe44e9efc,0xa87ebbe6,0x5ad62f2e,0xc79500d4,0x36499d41,
+ 0x336fa9d1,0xa66d6dc0,0x5afd3b1f,0xf8afc495,0xe5c9822b,0x1d8ccb24 },
+ { 0x79d7584b,0x4031422b,0xea3f20dd,0xc54a0580,0x958468c5,0x3f837c8f,
+ 0xfbea7735,0x3d82f110,0x7dffe2fc,0x679a8778,0x20704803,0x48eba63b } },
+ /* 96 */
+ { { 0xdf46e2f6,0x89b10d41,0x19514367,0x13ab57f8,0x1d469c87,0x067372b9,
+ 0x4f6c5798,0x0c195afa,0x272c9acf,0xea43a12a,0x678abdac,0x9dadd8cb },
+ { 0xe182579a,0xcce56c6b,0x2d26c2d8,0x86febadb,0x2a44745c,0x1c668ee1,
+ 0x98dc047a,0x580acd86,0x51b9ec2d,0x5a2b79cc,0x4054f6a0,0x007da608 } },
+ /* 97 */
+ { { 0x17b00dd0,0x9e3ca352,0x0e81a7a6,0x046779cb,0xd482d871,0xb999fef3,
+ 0xd9233fbc,0xe6f38134,0xf48cd0e0,0x112c3001,0x3c6c66ae,0x934e7576 },
+ { 0xd73234dc,0xb44d4fc3,0x864eafc1,0xfcae2062,0x26bef21a,0x843afe25,
+ 0xf3b75fdf,0x61355107,0x794c2e6b,0x8367a5aa,0x8548a372,0x3d2629b1 } },
+ /* 98 */
+ { { 0x437cfaf8,0x6230618f,0x2032c299,0x5b8742cb,0x2293643a,0x949f7247,
+ 0x09464f79,0xb8040f1a,0x4f254143,0x049462d2,0x366c7e76,0xabd6b522 },
+ { 0xd5338f55,0x119b392b,0x01495a0c,0x1a80a9ce,0xf8d7537e,0xf3118ca7,
+ 0x6bf4b762,0xb715adc2,0xa8482b6c,0x24506165,0x96a7c84d,0xd958d7c6 } },
+ /* 99 */
+ { { 0xbdc21f31,0x9ad8aa87,0x8063e58c,0xadb3cab4,0xb07dd7b8,0xefd86283,
+ 0x1be7c6b4,0xc7b9b762,0x015582de,0x2ef58741,0x299addf3,0xc970c52e },
+ { 0x22f24d66,0x78f02e2a,0x74cc100a,0xefec1d10,0x09316e1a,0xaf2a6a39,
+ 0x5849dd49,0xce7c2205,0x96bffc4c,0x9c1fe75c,0x7ba06ec0,0xcad98fd2 } },
+ /* 100 */
+ { { 0xb648b73e,0xed76e2d0,0x1cfd285e,0xa9f92ce5,0x2ed13de1,0xa8c86c06,
+ 0xa5191a93,0x1d3a574e,0x1ad1b8bf,0x385cdf8b,0x47d2cfe3,0xbbecc28a },
+ { 0x69cec548,0x98d326c0,0xf240a0b2,0x4f5bc1dd,0x29057236,0x241a7062,
+ 0xc68294a4,0x0fc6e9c5,0xa319f17a,0x4d04838b,0x9ffc1c6f,0x8b612cf1 } },
+ /* 101 */
+ { { 0x4c3830eb,0x9bb0b501,0x8ee0d0c5,0x3d08f83c,0x79ba9389,0xa4a62642,
+ 0x9cbc2914,0x5d5d4044,0x074c46f0,0xae9eb83e,0x74ead7d6,0x63bb758f },
+ { 0xc6bb29e0,0x1c40d2ea,0x4b02f41e,0x95aa2d87,0x53cb199a,0x92989175,
+ 0x51584f6d,0xdd91bafe,0x31a1aaec,0x3715efb9,0x46780f9e,0xc1b6ae5b } },
+ /* 102 */
+ { { 0x42772f41,0xcded3e4b,0x3bcb79d1,0x3a700d5d,0x80feee60,0x4430d50e,
+ 0xf5e5d4bb,0x444ef1fc,0xe6e358ff,0xc660194f,0x6a91b43c,0xe68a2f32 },
+ { 0x977fe4d2,0x5842775c,0x7e2a41eb,0x78fdef5c,0xff8df00e,0x5f3bec02,
+ 0x5852525d,0xf4b840cd,0x4e6988bd,0x0870483a,0xcc64b837,0x39499e39 } },
+ /* 103 */
+ { { 0xb08df5fe,0xfc05de80,0x63ba0362,0x0c12957c,0xd5cf1428,0xea379414,
+ 0x54ef6216,0xc559132a,0xb9e65cf8,0x33d5f12f,0x1695d663,0x09c60278 },
+ { 0x61f7a2fb,0x3ac1ced4,0xd4f5eeb8,0xdd838444,0x8318fcad,0x82a38c6c,
+ 0xe9f1a864,0x315be2e5,0x442daf47,0x317b5771,0x95aa5f9e,0x81b5904a } },
+ /* 104 */
+ { { 0x8b21d232,0x6b6b1c50,0x8c2cba75,0x87f3dbc0,0xae9f0faf,0xa7e74b46,
+ 0xbb7b8079,0x036a0985,0x8d974a25,0x4f185b90,0xd9af5ec9,0x5aa7cef0 },
+ { 0x57dcfffc,0xe0566a70,0xb8453225,0x6ea311da,0x23368aa9,0x72ea1a8d,
+ 0x48cd552d,0xed9b2083,0xc80ea435,0xb987967c,0x6c104173,0xad735c75 } },
+ /* 105 */
+ { { 0xcee76ef4,0xaea85ab3,0xaf1d2b93,0x44997444,0xeacb923f,0x0851929b,
+ 0x51e3bc0c,0xb080b590,0x59be68a2,0xc4ee1d86,0x64b26cda,0xf00de219 },
+ { 0xf2e90d4d,0x8d7fb5c0,0x77d9ec64,0x00e219a7,0x5d1c491c,0xc4e6febd,
+ 0x1a8f4585,0x080e3754,0x48d2af9c,0x4a9b86c8,0xb6679851,0x2ed70db6 } },
+ /* 106 */
+ { { 0x586f25cb,0xaee44116,0xa0fcf70f,0xf7b6861f,0x18a350e8,0x55d2cd20,
+ 0x92dc286f,0x861bf3e5,0x6226aba7,0x9ab18ffa,0xa9857b03,0xd15827be },
+ { 0x92e6acef,0x26c1f547,0xac1fbac3,0x422c63c8,0xfcbfd71d,0xa2d8760d,
+ 0xb2511224,0x35f6a539,0x048d1a21,0xbaa88fa1,0xebf999db,0x49f1abe9 } },
+ /* 107 */
+ { { 0xf7492b73,0x16f9f4f4,0xcb392b1a,0xcf28ec1e,0x69ca6ffc,0x45b130d4,
+ 0xb72efa58,0x28ba8d40,0x5ca066f5,0xace987c7,0x4ad022eb,0x3e399246 },
+ { 0x752555bb,0x63a2d84e,0x9c2ae394,0xaaa93b4a,0xc89539ca,0xcd80424e,
+ 0xaa119a99,0x6d6b5a6d,0x379f2629,0xbd50334c,0xef3cc7d3,0x899e925e } },
+ /* 108 */
+ { { 0xbf825dc4,0xb7ff3651,0x40b9c462,0x0f741cc4,0x5cc4fb5b,0x771ff5a9,
+ 0x47fd56fe,0xcb9e9c9b,0x5626c0d3,0xbdf053db,0xf7e14098,0xa97ce675 },
+ { 0x6c934f5e,0x68afe5a3,0xccefc46f,0x6cd5e148,0xd7a88586,0xc7758570,
+ 0xdd558d40,0x49978f5e,0x64ae00c1,0xa1d5088a,0xf1d65bb2,0x58f2a720 } },
+ /* 109 */
+ { { 0x3e4daedb,0x66fdda4a,0x65d1b052,0x38318c12,0x4c4bbf5c,0x28d910a2,
+ 0x78a9cd14,0x762fe5c4,0xd2cc0aee,0x08e5ebaa,0xca0c654c,0xd2cdf257 },
+ { 0x08b717d2,0x48f7c58b,0x386cd07a,0x3807184a,0xae7d0112,0x3240f626,
+ 0xc43917b0,0x03e9361b,0x20aea018,0xf261a876,0x7e1e6372,0x53f556a4 } },
+ /* 110 */
+ { { 0x2f512a90,0xc84cee56,0x1b0ea9f1,0x24b3c004,0xe26cc1ea,0x0ee15d2d,
+ 0xf0c9ef7d,0xd848762c,0xd5341435,0x1026e9c5,0xfdb16b31,0x8f5b73dc },
+ { 0xd2c75d95,0x1f69bef2,0xbe064dda,0x8d33d581,0x57ed35e6,0x8c024c12,
+ 0xc309c281,0xf8d435f9,0xd6960193,0xfd295061,0xe9e49541,0x66618d78 } },
+ /* 111 */
+ { { 0x8ce382de,0x571cfd45,0xde900dde,0x175806ee,0x34aba3b5,0x61849965,
+ 0xde7aec95,0xe899778a,0xff4aa97f,0xe8f00f6e,0x010b0c6d,0xae971cb5 },
+ { 0x3af788f1,0x1827eebc,0xe413fe2d,0xd46229ff,0x4741c9b4,0x8a15455b,
+ 0xf8e424eb,0x5f02e690,0xdae87712,0x40a1202e,0x64944f6d,0x49b3bda2 } },
+ /* 112 */
+ { { 0x035b2d69,0xd63c6067,0x6bed91b0,0xb507150d,0x7afb39b2,0x1f35f82f,
+ 0x16012b66,0xb9bd9c01,0xed0a5f50,0x00d97960,0x2716f7c9,0xed705451 },
+ { 0x127abdb4,0x1576eff4,0xf01e701c,0x6850d698,0x3fc87e2f,0x9fa7d749,
+ 0xb0ce3e48,0x0b6bcc6f,0xf7d8c1c0,0xf4fbe1f5,0x02719cc6,0xcf75230e } },
+ /* 113 */
+ { { 0x722d94ed,0x6761d6c2,0x3718820e,0xd1ec3f21,0x25d0e7c6,0x65a40b70,
+ 0xbaf3cf31,0xd67f830e,0xb93ea430,0x633b3807,0x0bc96c69,0x17faa0ea },
+ { 0xdf866b98,0xe6bf3482,0xa9db52d4,0x205c1ee9,0xff9ab869,0x51ef9bbd,
+ 0x75eeb985,0x3863dad1,0xd3cf442a,0xef216c3b,0xf9c8e321,0x3fb228e3 } },
+ /* 114 */
+ { { 0x0760ac07,0x94f9b70c,0x9d79bf4d,0xf3c9ccae,0xc5ffc83d,0x73cea084,
+ 0xdc49c38e,0xef50f943,0xbc9e7330,0xf467a2ae,0x44ea7fba,0x5ee534b6 },
+ { 0x03609e7f,0x20cb6272,0x62fdc9f0,0x09844355,0x0f1457f7,0xaf5c8e58,
+ 0xb4b25941,0xd1f50a6c,0x2ec82395,0x77cb247c,0xda3dca33,0xa5f3e1e5 } },
+ /* 115 */
+ { { 0x7d85fa94,0x023489d6,0x2db9ce47,0x0ba40537,0xaed7aad1,0x0fdf7a1f,
+ 0x9a4ccb40,0xa57b0d73,0x5b18967c,0x48fcec99,0xb7274d24,0xf30b5b6e },
+ { 0xc81c5338,0x7ccb4773,0xa3ed6bd0,0xb85639e6,0x1d56eada,0x7d9df95f,
+ 0x0a1607ad,0xe256d57f,0x957574d6,0x6da7ffdc,0x01c7a8c4,0x65f84046 } },
+ /* 116 */
+ { { 0xcba1e7f1,0x8d45d0cb,0x02b55f64,0xef0a08c0,0x17e19892,0x771ca31b,
+ 0x4885907e,0xe1843ecb,0x364ce16a,0x67797ebc,0x8df4b338,0x816d2b2d },
+ { 0x39aa8671,0xe870b0e5,0xc102b5f5,0x9f0db3e4,0x1720c697,0x34296659,
+ 0x613c0d2a,0x0ad4c89e,0x418ddd61,0x1af900b2,0xd336e20e,0xe087ca72 } },
+ /* 117 */
+ { { 0xaba10079,0x222831ff,0x6d64fff2,0x0dc5f87b,0x3e8cb330,0x44547907,
+ 0x702a33fb,0xe815aaa2,0x5fba3215,0x338d6b2e,0x79f549c8,0x0f7535cb },
+ { 0x2ee95923,0x471ecd97,0xc6d1c09f,0x1e868b37,0xc666ef4e,0x2bc7b8ec,
+ 0x808a4bfc,0xf5416589,0x3fbc4d2e,0xf23e9ee2,0x2d75125b,0x4357236c } },
+ /* 118 */
+ { { 0xba9cdb1b,0xfe176d95,0x2f82791e,0x45a1ca01,0x4de4cca2,0x97654af2,
+ 0x5cc4bcb9,0xbdbf9d0e,0xad97ac0a,0xf6a7df50,0x61359fd6,0xc52112b0 },
+ { 0x4f05eae3,0x696d9ce3,0xe943ac2b,0x903adc02,0x0848be17,0xa9075347,
+ 0x2a3973e5,0x1e20f170,0x6feb67e9,0xe1aacc1c,0xe16bc6b9,0x2ca0ac32 } },
+ /* 119 */
+ { { 0xef871eb5,0xffea12e4,0xa8bf0a7a,0x94c2f25d,0x78134eaa,0x4d1e4c2a,
+ 0x0360fb10,0x11ed16fb,0x85fc11be,0x4029b6db,0xf4d390fa,0x5e9f7ab7 },
+ { 0x30646612,0x5076d72f,0xdda1d0d8,0xa0afed1d,0x85a1d103,0x29022257,
+ 0x4e276bcd,0xcb499e17,0x51246c3d,0x16d1da71,0x589a0443,0xc72d56d3 } },
+ /* 120 */
+ { { 0xdae5bb45,0xdf5ffc74,0x261bd6dc,0x99068c4a,0xaa98ec7b,0xdc0afa7a,
+ 0xf121e96d,0xedd2ee00,0x1414045c,0x163cc7be,0x335af50e,0xb0b1bbce },
+ { 0x01a06293,0xd440d785,0x6552e644,0xcdebab7c,0x8c757e46,0x48cb8dbc,
+ 0x3cabe3cb,0x81f9cf78,0xb123f59a,0xddd02611,0xeeb3784d,0x3dc7b88e } },
+ /* 121 */
+ { { 0xc4741456,0xe1b8d398,0x6032a121,0xa9dfa902,0x1263245b,0x1cbfc86d,
+ 0x5244718c,0xf411c762,0x05b0fc54,0x96521d54,0xdbaa4985,0x1afab46e },
+ { 0x8674b4ad,0xa75902ba,0x5ad87d12,0x486b43ad,0x36e0d099,0x72b1c736,
+ 0xbb6cd6d6,0x39890e07,0x59bace4e,0x8128999c,0x7b535e33,0xd8da430b } },
+ /* 122 */
+ { { 0xc6b75791,0x39f65642,0x21806bfb,0x050947a6,0x1362ef84,0x0ca3e370,
+ 0x8c3d2391,0x9bc60aed,0x732e1ddc,0x9b488671,0xa98ee077,0x12d10d9e },
+ { 0x3651b7dc,0xb6f2822d,0x80abd138,0x6345a5ba,0x472d3c84,0x62033262,
+ 0xacc57527,0xd54a1d40,0x424447cb,0x6ea46b3a,0x2fb1a496,0x5bc41057 } },
+ /* 123 */
+ { { 0xa751cd0e,0xe70c57a3,0xeba3c7d6,0x190d8419,0x9d47d55a,0xb1c3bee7,
+ 0xf912c6d8,0xda941266,0x407a6ad6,0x12e9aacc,0x6e838911,0xd6ce5f11 },
+ { 0x70e1f2ce,0x063ca97b,0x8213d434,0xa3e47c72,0x84df810a,0xa016e241,
+ 0xdfd881a4,0x688ad7b0,0xa89bf0ad,0xa37d99fc,0xa23c2d23,0xd8e3f339 } },
+ /* 124 */
+ { { 0x750bed6f,0xbdf53163,0x83e68b0a,0x808abc32,0x5bb08a33,0x85a36627,
+ 0x6b0e4abe,0xf72a3a0f,0xfaf0c6ad,0xf7716d19,0x5379b25f,0x22dcc020 },
+ { 0xf9a56e11,0x7400bf8d,0x56a47f21,0x6cb8bad7,0x7a6eb644,0x7c97176f,
+ 0xd1f5b646,0xe8fd84f7,0x44ddb054,0x98320a94,0x1dde86f5,0x07071ba3 } },
+ /* 125 */
+ { { 0x98f8fcb9,0x6fdfa0e5,0x94d0d70c,0x89cec8e0,0x106d20a8,0xa0899397,
+ 0xba8acc9c,0x915bfb9a,0x5507e01c,0x1370c94b,0x8a821ffb,0x83246a60 },
+ { 0xbe3c378f,0xa8273a9f,0x35a25be9,0x7e544789,0x4dd929d7,0x6cfa4972,
+ 0x365bd878,0x987fed9d,0x5c29a7ae,0x4982ac94,0x5ddd7ec5,0x4589a5d7 } },
+ /* 126 */
+ { { 0xa95540a9,0x9fabb174,0x0162c5b0,0x7cfb886f,0xea3dee18,0x17be766b,
+ 0xe88e624c,0xff7da41f,0x8b919c38,0xad0b71eb,0xf31ff9a9,0x86a522e0 },
+ { 0x868bc259,0xbc8e6f72,0x3ccef9e4,0x6130c638,0x9a466555,0x09f1f454,
+ 0x19b2bfb4,0x8e6c0f09,0x0ca7bb22,0x945c46c9,0x4dafb67b,0xacd87168 } },
+ /* 127 */
+ { { 0x10c53841,0x090c72ca,0x55a4fced,0xc20ae01b,0xe10234ad,0x03f7ebd5,
+ 0x85892064,0xb3f42a6a,0xb4a14722,0xbdbc30c0,0x8ca124cc,0x971bc437 },
+ { 0x517ff2ff,0x6f79f46d,0xecba947b,0x6a9c96e2,0x62925122,0x5e79f2f4,
+ 0x6a4e91f1,0x30a96bb1,0x2d4c72da,0x1147c923,0x5811e4df,0x65bc311f } },
+ /* 128 */
+ { { 0x139b3239,0x87c7dd7d,0x4d833bae,0x8b57824e,0x9fff0015,0xbcbc4878,
+ 0x909eaf1a,0x8ffcef8b,0xf1443a78,0x9905f4ee,0xe15cbfed,0x020dd4a2 },
+ { 0xa306d695,0xca2969ec,0xb93caf60,0xdf940cad,0x87ea6e39,0x67f7fab7,
+ 0xf98c4fe5,0x0d0ee10f,0xc19cb91e,0xc646879a,0x7d1d7ab4,0x4b4ea50c } },
+ /* 129 */
+ { { 0x7a0db57e,0x19e40945,0x9a8c9702,0xe6017cad,0x1be5cff9,0xdbf739e5,
+ 0xa7a938a2,0x3646b3cd,0x68350dfc,0x04511085,0x56e098b5,0xad3bd6f3 },
+ { 0xee2e3e3e,0x935ebabf,0x473926cb,0xfbd01702,0x9e9fb5aa,0x7c735b02,
+ 0x2e3feff0,0xc52a1b85,0x046b405a,0x9199abd3,0x39039971,0xe306fcec } },
+ /* 130 */
+ { { 0x23e4712c,0xd6d9aec8,0xc3c198ee,0x7ca8376c,0x31bebd8a,0xe6d83187,
+ 0xd88bfef3,0xed57aff3,0xcf44edc7,0x72a645ee,0x5cbb1517,0xd4e63d0b },
+ { 0xceee0ecf,0x98ce7a1c,0x5383ee8e,0x8f012633,0xa6b455e8,0x3b879078,
+ 0xc7658c06,0xcbcd3d96,0x0783336a,0x721d6fe7,0x5a677136,0xf21a7263 } },
+ /* 131 */
+ { { 0x9586ba11,0x19d8b3cd,0x8a5c0480,0xd9e0aeb2,0x2230ef5c,0xe4261dbf,
+ 0x02e6bf09,0x095a9dee,0x80dc7784,0x8963723c,0x145157b1,0x5c97dbaf },
+ { 0x4bc4503e,0x97e74434,0x85a6b370,0x0fb1cb31,0xcd205d4b,0x3e8df2be,
+ 0xf8f765da,0x497dd1bc,0x6c988a1a,0x92ef95c7,0x64dc4cfa,0x3f924baa } },
+ /* 132 */
+ { { 0x7268b448,0x6bf1b8dd,0xefd79b94,0xd4c28ba1,0xe4e3551f,0x2fa1f8c8,
+ 0x5c9187a9,0x769e3ad4,0x40326c0d,0x28843b4d,0x50d5d669,0xfefc8094 },
+ { 0x90339366,0x30c85bfd,0x5ccf6c3a,0x4eeb56f1,0x28ccd1dc,0x0e72b149,
+ 0xf2ce978e,0x73ee85b5,0x3165bb23,0xcdeb2bf3,0x4e410abf,0x8106c923 } },
+ /* 133 */
+ { { 0x7d02f4ee,0xc8df0161,0x18e21225,0x8a781547,0x6acf9e40,0x4ea895eb,
+ 0x6e5a633d,0x8b000cb5,0x7e981ffb,0xf31d86d5,0x4475bc32,0xf5c8029c },
+ { 0x1b568973,0x764561ce,0xa62996ec,0x2f809b81,0xda085408,0x9e513d64,
+ 0xe61ce309,0xc27d815d,0x272999e0,0x0da6ff99,0xfead73f7,0xbd284779 } },
+ /* 134 */
+ { { 0x9b1cdf2b,0x6033c2f9,0xbc5fa151,0x2a99cf06,0x12177b3b,0x7d27d259,
+ 0xc4485483,0xb1f15273,0x102e2297,0x5fd57d81,0xc7f6acb7,0x3d43e017 },
+ { 0x3a70eb28,0x41a8bb0b,0x3e80b06b,0x67de2d8e,0x70c28de5,0x09245a41,
+ 0xa7b26023,0xad7dbcb1,0x2cbc6c1e,0x70b08a35,0x9b33041f,0xb504fb66 } },
+ /* 135 */
+ { { 0xf97a27c2,0xa8e85ab5,0xc10a011b,0x6ac5ec8b,0xffbcf161,0x55745533,
+ 0x65790a60,0x01780e85,0x99ee75b0,0xe451bf85,0x39c29881,0x8907a63b },
+ { 0x260189ed,0x76d46738,0x47bd35cb,0x284a4436,0x20cab61e,0xd74e8c40,
+ 0x416cf20a,0x6264bf8c,0x5fd820ce,0xfa5a6c95,0xf24bb5fc,0xfa7154d0 } },
+ /* 136 */
+ { { 0x9b3f5034,0x18482cec,0xcd9e68fd,0x962d445a,0x95746f23,0x266fb1d6,
+ 0x58c94a4b,0xc66ade5a,0xed68a5b6,0xdbbda826,0x7ab0d6ae,0x05664a4d },
+ { 0x025e32fc,0xbcd4fe51,0xa96df252,0x61a5aebf,0x31592a31,0xd88a07e2,
+ 0x98905517,0x5d9d94de,0x5fd440e7,0x96bb4010,0xe807db4c,0x1b0c47a2 } },
+ /* 137 */
+ { { 0x08223878,0x5c2a6ac8,0xe65a5558,0xba08c269,0x9bbc27fd,0xd22b1b9b,
+ 0x72b9607d,0x919171bf,0xe588dc58,0x9ab455f9,0x23662d93,0x6d54916e },
+ { 0x3b1de0c1,0x8da8e938,0x804f278f,0xa84d186a,0xd3461695,0xbf4988cc,
+ 0xe10eb0cb,0xf5eae3be,0xbf2a66ed,0x1ff8b68f,0xc305b570,0xa68daf67 } },
+ /* 138 */
+ { { 0x44b2e045,0xc1004cff,0x4b1c05d4,0x91b5e136,0x88a48a07,0x53ae4090,
+ 0xea11bb1a,0x73fb2995,0x3d93a4ea,0x32048570,0x3bfc8a5f,0xcce45de8 },
+ { 0xc2b3106e,0xaff4a97e,0xb6848b4f,0x9069c630,0xed76241c,0xeda837a6,
+ 0x6cc3f6cf,0x8a0daf13,0x3da018a8,0x199d049d,0xd9093ba3,0xf867c6b1 } },
+ /* 139 */
+ { { 0x56527296,0xe4d42a56,0xce71178d,0xae26c73d,0x6c251664,0x70a0adac,
+ 0x5dc0ae1d,0x813483ae,0xdaab2daf,0x7574eacd,0xc2d55f4f,0xc56b52dc },
+ { 0x95f32923,0x872bc167,0x5bdd2a89,0x4be17581,0xa7699f00,0x9b57f1e7,
+ 0x3ac2de02,0x5fcd9c72,0x92377739,0x83af3ba1,0xfc50b97f,0xa64d4e2b } },
+ /* 140 */
+ { { 0x0e552b40,0x2172dae2,0xd34d52e8,0x62f49725,0x07958f98,0x7930ee40,
+ 0x751fdd74,0x56da2a90,0xf53e48c3,0xf1192834,0x8e53c343,0x34d2ac26 },
+ { 0x13111286,0x1073c218,0xda9d9827,0x201dac14,0xee95d378,0xec2c29db,
+ 0x1f3ee0b1,0x9316f119,0x544ce71c,0x7890c9f0,0x27612127,0xd77138af } },
+ /* 141 */
+ { { 0x3b4ad1cd,0x78045e6d,0x4aa49bc1,0xcd86b94e,0xfd677a16,0x57e51f1d,
+ 0xfa613697,0xd9290935,0x34f4d893,0x7a3f9593,0x5d5fcf9b,0x8c9c248b },
+ { 0x6f70d4e9,0x9f23a482,0x63190ae9,0x17273454,0x5b081a48,0x4bdd7c13,
+ 0x28d65271,0x1e2de389,0xe5841d1f,0x0bbaaa25,0x746772e5,0xc4c18a79 } },
+ /* 142 */
+ { { 0x593375ac,0x10ee2681,0x7dd5e113,0x4f3288be,0x240f3538,0x9a97b2fb,
+ 0x1de6b1e2,0xfa11089f,0x1351bc58,0x516da562,0x2dfa85b5,0x573b6119 },
+ { 0x6cba7df5,0x89e96683,0x8c28ab40,0xf299be15,0xad43fcbf,0xe91c9348,
+ 0x9a1cefb3,0xe9bbc7cc,0x738b2775,0xc8add876,0x775eaa01,0x6e3b1f2e } },
+ /* 143 */
+ { { 0xb677788b,0x0365a888,0x3fd6173c,0x634ae8c4,0x9e498dbe,0x30498761,
+ 0xc8f779ab,0x08c43e6d,0x4c09aca9,0x068ae384,0x2018d170,0x2380c70b },
+ { 0xa297c5ec,0xcf77fbc3,0xca457948,0xdacbc853,0x336bec7e,0x3690de04,
+ 0x14eec461,0x26bbac64,0x1f713abf,0xd1c23c7e,0xe6fd569e,0xf08bbfcd } },
+ /* 144 */
+ { { 0x84770ee3,0x5f8163f4,0x744a1706,0x0e0c7f94,0xe1b2d46d,0x9c8f05f7,
+ 0xd01fd99a,0x417eafe7,0x11440e5b,0x2ba15df5,0x91a6fbcf,0xdc5c552a },
+ { 0xa270f721,0x86271d74,0xa004485b,0x32c0a075,0x8defa075,0x9d1a87e3,
+ 0xbf0d20fe,0xb590a7ac,0x8feda1f5,0x430c41c2,0x58f6ec24,0x454d2879 } },
+ /* 145 */
+ { { 0x7c525435,0x52b7a635,0x37c4bdbc,0x3d9ef57f,0xdffcc475,0x2bb93e9e,
+ 0x7710f3be,0xf7b8ba98,0x21b727de,0x42ee86da,0x2e490d01,0x55ac3f19 },
+ { 0xc0c1c390,0x487e3a6e,0x446cde7b,0x036fb345,0x496ae951,0x089eb276,
+ 0x71ed1234,0xedfed4d9,0x900f0b46,0x661b0dd5,0x8582f0d3,0x11bd6f1b } },
+ /* 146 */
+ { { 0x076bc9d1,0x5cf9350f,0xcf3cd2c3,0x15d903be,0x25af031c,0x21cfc8c2,
+ 0x8b1cc657,0xe0ad3248,0x70014e87,0xdd9fb963,0x297f1658,0xf0f3a5a1 },
+ { 0xf1f703aa,0xbb908fba,0x2f6760ba,0x2f9cc420,0x66a38b51,0x00ceec66,
+ 0x05d645da,0x4deda330,0xf7de3394,0xb9cf5c72,0x1ad4c906,0xaeef6502 } },
+ /* 147 */
+ { { 0x7a19045d,0x0583c8b1,0xd052824c,0xae7c3102,0xff6cfa58,0x2a234979,
+ 0x62c733c0,0xfe9dffc9,0x9c0c4b09,0x3a7fa250,0x4fe21805,0x516437bb },
+ { 0xc2a23ddb,0x9454e3d5,0x289c104e,0x0726d887,0x4fd15243,0x8977d918,
+ 0x6d7790ba,0xc559e73f,0x465af85f,0x8fd3e87d,0x5feee46b,0xa2615c74 } },
+ /* 148 */
+ { { 0x4335167d,0xc8d607a8,0xe0f5c887,0x8b42d804,0x398d11f9,0x5f9f13df,
+ 0x20740c67,0x5aaa5087,0xa3d9234b,0x83da9a6a,0x2a54bad1,0xbd3a5c4e },
+ { 0x2db0f658,0xdd13914c,0x5a3f373a,0x29dcb66e,0x5245a72b,0xbfd62df5,
+ 0x91e40847,0x19d18023,0xb136b1ae,0xd9df74db,0x3f93bc5b,0x72a06b6b } },
+ /* 149 */
+ { { 0xad19d96f,0x6da19ec3,0xfb2a4099,0xb342daa4,0x662271ea,0x0e61633a,
+ 0xce8c054b,0x3bcece81,0x8bd62dc6,0x7cc8e061,0xee578d8b,0xae189e19 },
+ { 0xdced1eed,0x73e7a25d,0x7875d3ab,0xc1257f0a,0x1cfef026,0x2cb2d5a2,
+ 0xb1fdf61c,0xd98ef39b,0x24e83e6c,0xcd8e6f69,0xc7b7088b,0xd71e7076 } },
+ /* 150 */
+ { { 0x9d4245bf,0x33936830,0x2ac2953b,0x22d96217,0x56c3c3cd,0xb3bf5a82,
+ 0x0d0699e8,0x50c9be91,0x8f366459,0xec094463,0x513b7c35,0x6c056dba },
+ { 0x045ab0e3,0x687a6a83,0x445c9295,0x8d40b57f,0xa16f5954,0x0f345048,
+ 0x3d8f0a87,0x64b5c639,0x9f71c5e2,0x106353a2,0x874f0dd4,0xdd58b475 } },
+ /* 151 */
+ { { 0x62230c72,0x67ec084f,0x481385e3,0xf14f6cca,0x4cda7774,0xf58bb407,
+ 0xaa2dbb6b,0xe15011b1,0x0c035ab1,0xd488369d,0x8245f2fd,0xef83c24a },
+ { 0x9fdc2538,0xfb57328f,0x191fe46a,0x79808293,0x32ede548,0xe28f5c44,
+ 0xea1a022c,0x1b3cda99,0x3df2ec7f,0x39e639b7,0x760e9a18,0x77b6272b } },
+ /* 152 */
+ { { 0xa65d56d5,0x2b1d51bd,0x7ea696e0,0x3a9b71f9,0x9904f4c4,0x95250ecc,
+ 0xe75774b7,0x8bc4d6eb,0xeaeeb9aa,0x0e343f8a,0x930e04cb,0xc473c1d1 },
+ { 0x064cd8ae,0x282321b1,0x5562221c,0xf4b4371e,0xd1bf1221,0xc1cc81ec,
+ 0xe2c8082f,0xa52a07a9,0xba64a958,0x350d8e59,0x6fb32c9a,0x29e4f3de } },
+ /* 153 */
+ { { 0xba89aaa5,0x0aa9d56c,0xc4c6059e,0xf0208ac0,0xbd6ddca4,0x7400d9c6,
+ 0xf2c2f74a,0xb384e475,0xb1562dd3,0x4c1061fc,0x2e153b8d,0x3924e248 },
+ { 0x849808ab,0xf38b8d98,0xa491aa36,0x29bf3260,0x88220ede,0x85159ada,
+ 0xbe5bc422,0x8b47915b,0xd7300967,0xa934d72e,0x2e515d0d,0xc4f30398 } },
+ /* 154 */
+ { { 0x1b1de38b,0xe3e9ee42,0x42636760,0xa124e25a,0x90165b1a,0x90bf73c0,
+ 0x146434c5,0x21802a34,0x2e1fa109,0x54aa83f2,0xed9c51e9,0x1d4bd03c },
+ { 0x798751e6,0xc2d96a38,0x8c3507f5,0xed27235f,0xc8c24f88,0xb5fb80e2,
+ 0xd37f4f78,0xf873eefa,0xf224ba96,0x7229fd74,0x9edd7149,0x9dcd9199 } },
+ /* 155 */
+ { { 0x4e94f22a,0xee9f81a6,0xf71ec341,0xe5609892,0xa998284e,0x6c818ddd,
+ 0x3b54b098,0x9fd47295,0x0e8a7cc9,0x47a6ac03,0xb207a382,0xde684e5e },
+ { 0x2b6b956b,0x4bdd1ecd,0xf01b3583,0x09084414,0x55233b14,0xe2f80b32,
+ 0xef5ebc5e,0x5a0fec54,0xbf8b29a2,0x74cf25e6,0x7f29e014,0x1c757fa0 } },
+ /* 156 */
+ { { 0xeb0fdfe4,0x1bcb5c4a,0xf0899367,0xd7c649b3,0x05bc083b,0xaef68e3f,
+ 0xa78aa607,0x57a06e46,0x21223a44,0xa2136ecc,0x52f5a50b,0x89bd6484 },
+ { 0x4455f15a,0x724411b9,0x08a9c0fd,0x23dfa970,0x6db63bef,0x7b0da4d1,
+ 0xfb162443,0x6f8a7ec1,0xe98284fb,0xc1ac9cee,0x33566022,0x085a582b } },
+ /* 157 */
+ { { 0xec1f138a,0x15cb61f9,0x668f0c28,0x11c9a230,0xdf93f38f,0xac829729,
+ 0x4048848d,0xcef25698,0x2bba8fbf,0x3f686da0,0x111c619a,0xed5fea78 },
+ { 0xd6d1c833,0x9b4f73bc,0x86e7bf80,0x50951606,0x042b1d51,0xa2a73508,
+ 0x5fb89ec2,0x9ef6ea49,0x5ef8b892,0xf1008ce9,0x9ae8568b,0x78a7e684 } },
+ /* 158 */
+ { { 0x10470cd8,0x3fe83a7c,0xf86df000,0x92734682,0xda9409b5,0xb5dac06b,
+ 0x94939c5f,0x1e7a9660,0x5cc116dc,0xdec6c150,0x66bac8cc,0x1a52b408 },
+ { 0x6e864045,0x5303a365,0x9139efc1,0x45eae72a,0x6f31d54f,0x83bec646,
+ 0x6e958a6d,0x2fb4a86f,0x4ff44030,0x6760718e,0xe91ae0df,0x008117e3 } },
+ /* 159 */
+ { { 0x384310a2,0x5d5833ba,0x1fd6c9fc,0xbdfb4edc,0x849c4fb8,0xb9a4f102,
+ 0x581c1e1f,0xe5fb239a,0xd0a9746d,0xba44b2e7,0x3bd942b9,0x78f7b768 },
+ { 0xc87607ae,0x076c8ca1,0xd5caaa7e,0x82b23c2e,0x2763e461,0x6a581f39,
+ 0x3886df11,0xca8a5e4a,0x264e7f22,0xc87e90cf,0x215cfcfc,0x04f74870 } },
+ /* 160 */
+ { { 0x141d161c,0x5285d116,0x93c4ed17,0x67cd2e0e,0x7c36187e,0x12c62a64,
+ 0xed2584ca,0xf5329539,0x42fbbd69,0xc4c777c4,0x1bdfc50a,0x107de776 },
+ { 0xe96beebd,0x9976dcc5,0xa865a151,0xbe2aff95,0x9d8872af,0x0e0a9da1,
+ 0xa63c17cc,0x5e357a3d,0xe15cc67c,0xd31fdfd8,0x7970c6d8,0xc44bbefd } },
+ /* 161 */
+ { { 0x4c0c62f1,0x703f83e2,0x4e195572,0x9b1e28ee,0xfe26cced,0x6a82858b,
+ 0xc43638fa,0xd381c84b,0xa5ba43d8,0x94f72867,0x10b82743,0x3b4a783d },
+ { 0x7576451e,0xee1ad7b5,0x14b6b5c8,0xc3d0b597,0xfcacc1b8,0x3dc30954,
+ 0x472c9d7b,0x55df110e,0x02f8a328,0x97c86ed7,0x88dc098f,0xd0433413 } },
+ /* 162 */
+ { { 0x2ca8f2fe,0x1a60d152,0x491bd41f,0x61640948,0x58dfe035,0x6dae29a5,
+ 0x278e4863,0x9a615bea,0x9ad7c8e5,0xbbdb4477,0x2ceac2fc,0x1c706630 },
+ { 0x99699b4b,0x5e2b54c6,0x239e17e8,0xb509ca6d,0xea063a82,0x728165fe,
+ 0xb6a22e02,0x6b5e609d,0xb26ee1df,0x12813905,0x439491fa,0x07b9f722 } },
+ /* 163 */
+ { { 0x48ff4e49,0x1592ec14,0x6d644129,0x3e4e9f17,0x1156acc0,0x7acf8288,
+ 0xbb092b0b,0x5aa34ba8,0x7d38393d,0xcd0f9022,0xea4f8187,0x416724dd },
+ { 0xc0139e73,0x3c4e641c,0x91e4d87d,0xe0fe46cf,0xcab61f8a,0xedb3c792,
+ 0xd3868753,0x4cb46de4,0x20f1098a,0xe449c21d,0xf5b8ea6e,0x5e5fd059 } },
+ /* 164 */
+ { { 0x75856031,0x7fcadd46,0xeaf2fbd0,0x89c7a4cd,0x7a87c480,0x1af523ce,
+ 0x61d9ae90,0xe5fc1095,0xbcdb95f5,0x3fb5864f,0xbb5b2c7d,0xbeb5188e },
+ { 0x3ae65825,0x3d1563c3,0x0e57d641,0x116854c4,0x1942ebd3,0x11f73d34,
+ 0xc06955b3,0x24dc5904,0x995a0a62,0x8a0d4c83,0x5d577b7d,0xfb26b86d } },
+ /* 165 */
+ { { 0xc686ae17,0xc53108e7,0xd1c1da56,0x9090d739,0x9aec50ae,0x4583b013,
+ 0xa49a6ab2,0xdd9a088b,0xf382f850,0x28192eea,0xf5fe910e,0xcc8df756 },
+ { 0x9cab7630,0x877823a3,0xfb8e7fc1,0x64984a9a,0x364bfc16,0x5448ef9c,
+ 0xc44e2a9a,0xbbb4f871,0x435c95e9,0x901a41ab,0xaaa50a06,0xc6c23e5f } },
+ /* 166 */
+ { { 0x9034d8dd,0xb78016c1,0x0b13e79b,0x856bb44b,0xb3241a05,0x85c6409a,
+ 0x2d78ed21,0x8d2fe19a,0x726eddf2,0xdcc7c26d,0x25104f04,0x3ccaff5f },
+ { 0x6b21f843,0x397d7edc,0xe975de4c,0xda88e4dd,0x4f5ab69e,0x5273d396,
+ 0x9aae6cc0,0x537680e3,0x3e6f9461,0xf749cce5,0x957bffd3,0x021ddbd9 } },
+ /* 167 */
+ { { 0x777233cf,0x7b64585f,0x0942a6f0,0xfe6771f6,0xdfe6eef0,0x636aba7a,
+ 0x86038029,0x63bbeb56,0xde8fcf36,0xacee5842,0xd4a20524,0x48d9aa99 },
+ { 0x0da5e57a,0xcff7a74c,0xe549d6c9,0xc232593c,0xf0f2287b,0x68504bcc,
+ 0xbc8360b5,0x6d7d098d,0x5b402f41,0xeac5f149,0xb87d1bf1,0x61936f11 } },
+ /* 168 */
+ { { 0xb8153a9d,0xaa9da167,0x9e83ecf0,0xa49fe3ac,0x1b661384,0x14c18f8e,
+ 0x38434de1,0x61c24dab,0x283dae96,0x3d973c3a,0x82754fc9,0xc99baa01 },
+ { 0x4c26b1e3,0x477d198f,0xa7516202,0x12e8e186,0x362addfa,0x386e52f6,
+ 0xc3962853,0x31e8f695,0x6aaedb60,0xdec2af13,0x29cf74ac,0xfcfdb4c6 } },
+ /* 169 */
+ { { 0xcca40298,0x6b3ee958,0xf2f5d195,0xc3878153,0xed2eae5b,0x0c565630,
+ 0x3a697cf2,0xd089b37e,0xad5029ea,0xc2ed2ac7,0x0f0dda6a,0x7e5cdfad },
+ { 0xd9b86202,0xf98426df,0x4335e054,0xed1960b1,0x3f14639e,0x1fdb0246,
+ 0x0db6c670,0x17f709c3,0x773421e1,0xbfc687ae,0x26c1a8ac,0x13fefc4a } },
+ /* 170 */
+ { { 0x7ffa0a5f,0xe361a198,0xc63fe109,0xf4b26102,0x6c74e111,0x264acbc5,
+ 0x77abebaf,0x4af445fa,0x24cddb75,0x448c4fdd,0x44506eea,0x0b13157d },
+ { 0x72e9993d,0x22a6b159,0x85e5ecbe,0x2c3c57e4,0xfd83e1a1,0xa673560b,
+ 0xc3b8c83b,0x6be23f82,0x40bbe38e,0x40b13a96,0xad17399b,0x66eea033 } },
+ /* 171 */
+ { { 0xb4c6c693,0x49fc6e95,0x36af7d38,0xefc735de,0x35fe42fc,0xe053343d,
+ 0x6a9ab7c3,0xf0aa427c,0x4a0fcb24,0xc79f0436,0x93ebbc50,0x16287243 },
+ { 0x16927e1e,0x5c3d6bd0,0x673b984c,0x40158ed2,0x4cd48b9a,0xa7f86fc8,
+ 0x60ea282d,0x1643eda6,0xe2a1beed,0x45b393ea,0x19571a94,0x664c839e } },
+ /* 172 */
+ { { 0x27eeaf94,0x57745750,0xea99e1e7,0x2875c925,0x5086adea,0xc127e7ba,
+ 0x86fe424f,0x765252a0,0x2b6c0281,0x1143cc6c,0xd671312d,0xc9bb2989 },
+ { 0x51acb0a5,0x880c337c,0xd3c60f78,0xa3710915,0x9262b6ed,0x496113c0,
+ 0x9ce48182,0x5d25d9f8,0xb3813586,0x53b6ad72,0x4c0e159c,0x0ea3bebc } },
+ /* 173 */
+ { { 0xc5e49bea,0xcaba450a,0x7c05da59,0x684e5415,0xde7ac36c,0xa2e9cab9,
+ 0x2e6f957b,0x4ca79b5f,0x09b817b1,0xef7b0247,0x7d89df0f,0xeb304990 },
+ { 0x46fe5096,0x508f7307,0x2e04eaaf,0x695810e8,0x3512f76c,0x88ef1bd9,
+ 0x3ebca06b,0x77661351,0xccf158b7,0xf7d4863a,0x94ee57da,0xb2a81e44 } },
+ /* 174 */
+ { { 0x6d53e6ba,0xff288e5b,0x14484ea2,0xa90de1a9,0xed33c8ec,0x2fadb60c,
+ 0x28b66a40,0x579d6ef3,0xec24372d,0x4f2dd6dd,0x1d66ec7d,0xe9e33fc9 },
+ { 0x039eab6e,0x110899d2,0x3e97bb5e,0xa31a667a,0xcfdce68e,0x6200166d,
+ 0x5137d54b,0xbe83ebae,0x4800acdf,0x085f7d87,0x0c6f8c86,0xcf4ab133 } },
+ /* 175 */
+ { { 0x931e08fb,0x03f65845,0x1506e2c0,0x6438551e,0x9c36961f,0x5791f0dc,
+ 0xe3dcc916,0x68107b29,0xf495d2ca,0x83242374,0x6ee5895b,0xd8cfb663 },
+ { 0xa0349b1b,0x525e0f16,0x4a0fab86,0x33cd2c6c,0x2af8dda9,0x46c12ee8,
+ 0x71e97ad3,0x7cc424ba,0x37621eb0,0x69766ddf,0xa5f0d390,0x95565f56 } },
+ /* 176 */
+ { { 0x1a0f5e94,0xe0e7bbf2,0x1d82d327,0xf771e115,0xceb111fa,0x10033e3d,
+ 0xd3426638,0xd269744d,0x00d01ef6,0xbdf2d9da,0xa049ceaf,0x1cb80c71 },
+ { 0x9e21c677,0x17f18328,0x19c8f98b,0x6452af05,0x80b67997,0x35b9c5f7,
+ 0x40f8f3d4,0x5c2e1cbe,0x66d667ca,0x43f91656,0xcf9d6e79,0x9faaa059 } },
+ /* 177 */
+ { { 0x0a078fe6,0x8ad24618,0x464fd1dd,0xf6cc73e6,0xc3e37448,0x4d2ce34d,
+ 0xe3271b5f,0x624950c5,0xefc5af72,0x62910f5e,0xaa132bc6,0x8b585bf8 },
+ { 0xa839327f,0x11723985,0x4aac252f,0x34e2d27d,0x6296cc4e,0x402f59ef,
+ 0x47053de9,0x00ae055c,0x28b4f09b,0xfc22a972,0xfa0c180e,0xa9e86264 } },
+ /* 178 */
+ { { 0xbc310ecc,0x0b7b6224,0x67fa14ed,0x8a1a74f1,0x7214395c,0x87dd0960,
+ 0xf5c91128,0xdf1b3d09,0x86b264a8,0x39ff23c6,0x3e58d4c5,0xdc2d49d0 },
+ { 0xa9d6f501,0x2152b7d3,0xc04094f7,0xf4c32e24,0xd938990f,0xc6366596,
+ 0x94fb207f,0x084d078f,0x328594cb,0xfd99f1d7,0xcb2d96b3,0x36defa64 } },
+ /* 179 */
+ { { 0x13ed7cbe,0x4619b781,0x9784bd0e,0x95e50015,0x2c7705fe,0x2a32251c,
+ 0x5f0dd083,0xa376af99,0x0361a45b,0x55425c6c,0x1f291e7b,0x812d2cef },
+ { 0x5fd94972,0xccf581a0,0xe56dc383,0x26e20e39,0x63dbfbf0,0x0093685d,
+ 0x36b8c575,0x1fc164cc,0x390ef5e7,0xb9c5ab81,0x26908c66,0x40086beb } },
+ /* 180 */
+ { { 0x37e3c115,0xe5e54f79,0xc1445a8a,0x69b8ee8c,0xb7659709,0x79aedff2,
+ 0x1b46fbe6,0xe288e163,0xd18d7bb7,0xdb4844f0,0x48aa6424,0xe0ea23d0 },
+ { 0xf3d80a73,0x714c0e4e,0x3bd64f98,0x87a0aa9e,0x2ec63080,0x8844b8a8,
+ 0x255d81a3,0xe0ac9c30,0x455397fc,0x86151237,0x2f820155,0x0b979464 } },
+ /* 181 */
+ { { 0x4ae03080,0x127a255a,0x580a89fb,0x232306b4,0x6416f539,0x04e8cd6a,
+ 0x13b02a0e,0xaeb70dee,0x4c09684a,0xa3038cf8,0x28e433ee,0xa710ec3c },
+ { 0x681b1f7d,0x77a72567,0x2fc28170,0x86fbce95,0xf5735ac8,0xd3408683,
+ 0x6bd68e93,0x3a324e2a,0xc027d155,0x7ec74353,0xd4427177,0xab60354c } },
+ /* 182 */
+ { { 0xef4c209d,0x32a5342a,0x08d62704,0x2ba75274,0xc825d5fe,0x4bb4af6f,
+ 0xd28e7ff1,0x1c3919ce,0xde0340f6,0x1dfc2fdc,0x29f33ba9,0xc6580baf },
+ { 0x41d442cb,0xae121e75,0x3a4724e4,0x4c7727fd,0x524f3474,0xe556d6a4,
+ 0x785642a2,0x87e13cc7,0xa17845fd,0x182efbb1,0x4e144857,0xdcec0cf1 } },
+ /* 183 */
+ { { 0xe9539819,0x1cb89541,0x9d94dbf1,0xc8cb3b4f,0x417da578,0x1d353f63,
+ 0x8053a09e,0xb7a697fb,0xc35d8b78,0x8d841731,0xb656a7a9,0x85748d6f },
+ { 0xc1859c5d,0x1fd03947,0x535d22a2,0x6ce965c1,0x0ca3aadc,0x1966a13e,
+ 0x4fb14eff,0x9802e41d,0x76dd3fcd,0xa9048cbb,0xe9455bba,0x89b182b5 } },
+ /* 184 */
+ { { 0x43360710,0xd777ad6a,0x55e9936b,0x841287ef,0x04a21b24,0xbaf5c670,
+ 0x35ad86f1,0xf2c0725f,0xc707e72e,0x338fa650,0xd8883e52,0x2bf8ed2e },
+ { 0xb56e0d6a,0xb0212cf4,0x6843290c,0x50537e12,0x98b3dc6f,0xd8b184a1,
+ 0x0210b722,0xd2be9a35,0x559781ee,0x407406db,0x0bc18534,0x5a78d591 } },
+ /* 185 */
+ { { 0xd748b02c,0x4d57aa2a,0xa12b3b95,0xbe5b3451,0x64711258,0xadca7a45,
+ 0x322153db,0x597e091a,0x32eb1eab,0xf3271006,0x2873f301,0xbd9adcba },
+ { 0x38543f7f,0xd1dc79d1,0x921b1fef,0x00022092,0x1e5df8ed,0x86db3ef5,
+ 0x9e6b944a,0x888cae04,0x791a32b4,0x71bd29ec,0xa6d1c13e,0xd3516206 } },
+ /* 186 */
+ { { 0x55924f43,0x2ef6b952,0x4f9de8d5,0xd2f401ae,0xadc68042,0xfc73e8d7,
+ 0x0d9d1bb4,0x627ea70c,0xbbf35679,0xc3bb3e3e,0xd882dee4,0x7e8a254a },
+ { 0xb5924407,0x08906f50,0xa1ad444a,0xf14a0e61,0x65f3738e,0xaa0efa21,
+ 0xae71f161,0xd60c7dd6,0xf175894d,0x9e8390fa,0x149f4c00,0xd115cd20 } },
+ /* 187 */
+ { { 0xa52abf77,0x2f2e2c1d,0x54232568,0xc2a0dca5,0x54966dcc,0xed423ea2,
+ 0xcd0dd039,0xe48c93c7,0x176405c7,0x1e54a225,0x70d58f2e,0x1efb5b16 },
+ { 0x94fb1471,0xa751f9d9,0x67d2941d,0xfdb31e1f,0x53733698,0xa6c74eb2,
+ 0x89a0f64a,0xd3155d11,0xa4b8d2b6,0x4414cfe4,0xf7a8e9e3,0x8d5a4be8 } },
+ /* 188 */
+ { { 0x52669e98,0x5c96b4d4,0x8fd42a03,0x4547f922,0xd285174e,0xcf5c1319,
+ 0x064bffa0,0x805cd1ae,0x246d27e7,0x50e8bc4f,0xd5781e11,0xf89ef98f },
+ { 0xdee0b63f,0xb4ff95f6,0x222663a4,0xad850047,0x4d23ce9c,0x02691860,
+ 0x50019f59,0x3e5309ce,0x69a508ae,0x27e6f722,0x267ba52c,0xe9376652 } },
+ /* 189 */
+ { { 0xc0368708,0xa04d289c,0x5e306e1d,0xc458872f,0x33112fea,0x76fa23de,
+ 0x6efde42e,0x718e3974,0x1d206091,0xf0c98cdc,0x14a71987,0x5fa3ca62 },
+ { 0xdcaa9f2a,0xeee8188b,0x589a860d,0x312cc732,0xc63aeb1f,0xf9808dd6,
+ 0x4ea62b53,0x70fd43db,0x890b6e97,0x2c2bfe34,0xfa426aa6,0x105f863c } },
+ /* 190 */
+ { { 0xb38059ad,0x0b29795d,0x90647ea0,0x5686b77e,0xdb473a3e,0xeff0470e,
+ 0xf9b6d1e2,0x278d2340,0xbd594ec7,0xebbff95b,0xd3a7f23d,0xf4b72334 },
+ { 0xa5a83f0b,0x2a285980,0x9716a8b3,0x0786c41a,0x22511812,0x138901bd,
+ 0xe2fede6e,0xd1b55221,0xdf4eb590,0x0806e264,0x762e462e,0x6c4c897e } },
+ /* 191 */
+ { { 0xb4b41d9d,0xd10b905f,0x4523a65b,0x826ca466,0xb699fa37,0x535bbd13,
+ 0x73bc8f90,0x5b9933d7,0xcd2118ad,0x9332d61f,0xd4a65fd0,0x158c693e },
+ { 0xe6806e63,0x4ddfb2a8,0xb5de651b,0xe31ed3ec,0x819bc69a,0xf9460e51,
+ 0x2c76b1f8,0x6229c0d6,0x901970a3,0xbb78f231,0x9cee72b8,0x31f3820f } },
+ /* 192 */
+ { { 0xc09e1c72,0xe931caf2,0x12990cf4,0x0715f298,0x943262d8,0x33aad81d,
+ 0x73048d3f,0x5d292b7a,0xdc7415f6,0xb152aaa4,0x0fd19587,0xc3d10fd9 },
+ { 0x75ddadd0,0xf76b35c5,0x1e7b694c,0x9f5f4a51,0xc0663025,0x2f1ab7eb,
+ 0x920260b0,0x01c9cc87,0x05d39da6,0xc4b1f61a,0xeb4a9c4e,0x6dcd76c4 } },
+ /* 193 */
+ { { 0xfdc83f01,0x0ba0916f,0x9553e4f9,0x354c8b44,0xffc5e622,0xa6cc511a,
+ 0xe95be787,0xb954726a,0x75b41a62,0xcb048115,0xebfde989,0xfa2ae6cd },
+ { 0x0f24659a,0x6376bbc7,0x4c289c43,0x13a999fd,0xec9abd8b,0xc7134184,
+ 0xa789ab04,0x28c02bf6,0xd3e526ec,0xff841ebc,0x640893a8,0x442b191e } },
+ /* 194 */
+ { { 0xfa2b6e20,0x4cac6c62,0xf6d69861,0x97f29e9b,0xbc96d12d,0x228ab1db,
+ 0x5e8e108d,0x6eb91327,0x40771245,0xd4b3d4d1,0xca8a803a,0x61b20623 },
+ { 0xa6a560b1,0x2c2f3b41,0x3859fcf4,0x879e1d40,0x024dbfc3,0x7cdb5145,
+ 0x3bfa5315,0x55d08f15,0xaa93823a,0x2f57d773,0xc6a2c9a2,0xa97f259c } },
+ /* 195 */
+ { { 0xe58edbbb,0xc306317b,0x79dfdf13,0x25ade51c,0x16d83dd6,0x6b5beaf1,
+ 0x1dd8f925,0xe8038a44,0xb2a87b6b,0x7f00143c,0xf5b438de,0xa885d00d },
+ { 0xcf9e48bd,0xe9f76790,0xa5162768,0xf0bdf9f0,0xad7b57cb,0x0436709f,
+ 0xf7c15db7,0x7e151c12,0x5d90ee3b,0x3514f022,0x2c361a8d,0x2e84e803 } },
+ /* 196 */
+ { { 0x563ec8d8,0x2277607d,0xe3934cb7,0xa661811f,0xf58fd5de,0x3ca72e7a,
+ 0x62294c6a,0x7989da04,0xf6bbefe9,0x88b3708b,0x53ed7c82,0x0d524cf7 },
+ { 0x2f30c073,0x69f699ca,0x9dc1dcf3,0xf0fa264b,0x05f0aaf6,0x44ca4568,
+ 0xd19b9baf,0x0f5b23c7,0xeabd1107,0x39193f41,0x2a7c9b83,0x9e3e10ad } },
+ /* 197 */
+ { { 0xd4ae972f,0xa90824f0,0xc6e846e7,0x43eef02b,0x29d2160a,0x7e460612,
+ 0xfe604e91,0x29a178ac,0x4eb184b2,0x23056f04,0xeb54cdf4,0x4fcad55f },
+ { 0xae728d15,0xa0ff96f3,0xc6a00331,0x8a2680c6,0x7ee52556,0x5f84cae0,
+ 0xc5a65dad,0x5e462c3a,0xe2d23f4f,0x5d2b81df,0xc5b1eb07,0x6e47301b } },
+ /* 198 */
+ { { 0xaf8219b9,0x77411d68,0x51b1907a,0xcb883ce6,0x101383b5,0x25c87e57,
+ 0x982f970d,0x9c7d9859,0x118305d2,0xaa6abca5,0x9013a5db,0x725fed2f },
+ { 0xababd109,0x487cdbaf,0x87586528,0xc0f8cf56,0x8ad58254,0xa02591e6,
+ 0xdebbd526,0xc071b1d1,0x961e7e31,0x927dfe8b,0x9263dfe1,0x55f895f9 } },
+ /* 199 */
+ { { 0xb175645b,0xf899b00d,0xb65b4b92,0x51f3a627,0xb67399ef,0xa2f3ac8d,
+ 0xe400bc20,0xe717867f,0x1967b952,0x42cc9020,0x3ecd1de1,0x3d596751 },
+ { 0xdb979775,0xd41ebcde,0x6a2e7e88,0x99ba61bc,0x321504f2,0x039149a5,
+ 0x27ba2fad,0xe7dc2314,0xb57d8368,0x9f556308,0x57da80a7,0x2b6d16c9 } },
+ /* 200 */
+ { { 0x279ad982,0x84af5e76,0x9c8b81a6,0x9bb4c92d,0x0e698e67,0xd79ad44e,
+ 0x265fc167,0xe8be9048,0x0c3a4ccc,0xf135f7e6,0xb8863a33,0xa0a10d38 },
+ { 0xd386efd9,0xe197247c,0xb52346c2,0x0eefd3f9,0x78607bc8,0xc22415f9,
+ 0x508674ce,0xa2a8f862,0xc8c9d607,0xa72ad09e,0x50fa764f,0xcd9f0ede } },
+ /* 201 */
+ { { 0xd1a46d4d,0x063391c7,0x9eb01693,0x2df51c11,0x849e83de,0xc5849800,
+ 0x8ad08382,0x48fd09aa,0xaa742736,0xa405d873,0xe1f9600c,0xee49e61e },
+ { 0x48c76f73,0xd76676be,0x01274b2a,0xd9c100f6,0x83f8718d,0x110bb67c,
+ 0x02fc0d73,0xec85a420,0x744656ad,0xc0449e1e,0x37d9939b,0x28ce7376 } },
+ /* 202 */
+ { { 0x44544ac7,0x97e9af72,0xba010426,0xf2c658d5,0xfb3adfbd,0x732dec39,
+ 0xa2df0b07,0xd12faf91,0x2171e208,0x8ac26725,0x5b24fa54,0xf820cdc8 },
+ { 0x94f4cf77,0x307a6eea,0x944a33c6,0x18c783d2,0x0b741ac5,0x4b939d4c,
+ 0x3ffbb6e4,0x1d7acd15,0x7a255e44,0x06a24858,0xce336d50,0x14fbc494 } },
+ /* 203 */
+ { { 0x51584e3c,0x9b920c0c,0xf7e54027,0xc7733c59,0x88422bbe,0xe24ce139,
+ 0x523bd6ab,0x11ada812,0xb88e6def,0xde068800,0xfe8c582d,0x7b872671 },
+ { 0x7de53510,0x4e746f28,0xf7971968,0x492f8b99,0x7d928ac2,0x1ec80bc7,
+ 0x432eb1b5,0xb3913e48,0x32028f6e,0xad084866,0x8fc2f38b,0x122bb835 } },
+ /* 204 */
+ { { 0x3b0b29c3,0x0a9f3b1e,0x4fa44151,0x837b6432,0x17b28ea7,0xb9905c92,
+ 0x98451750,0xf39bc937,0xce8b6da1,0xcd383c24,0x010620b2,0x299f57db },
+ { 0x58afdce3,0x7b6ac396,0x3d05ef47,0xa15206b3,0xb9bb02ff,0xa0ae37e2,
+ 0x9db3964c,0x107760ab,0x67954bea,0xe29de9a0,0x431c3f82,0x446a1ad8 } },
+ /* 205 */
+ { { 0x5c6b8195,0xc6fecea0,0xf49e71b9,0xd744a7c5,0x177a7ae7,0xa8e96acc,
+ 0x358773a7,0x1a05746c,0x37567369,0xa4162146,0x87d1c971,0xaa0217f7 },
+ { 0x77fd3226,0x61e9d158,0xe4f600be,0x0f6f2304,0x7a6dff07,0xa9c4cebc,
+ 0x09f12a24,0xd15afa01,0x8c863ee9,0x2bbadb22,0xe5eb8c78,0xa28290e4 } },
+ /* 206 */
+ { { 0x3e9de330,0x55b87fa0,0x195c145b,0x12b26066,0xa920bef0,0xe08536e0,
+ 0x4d195adc,0x7bff6f2c,0x945f4187,0x7f319e9d,0xf892ce47,0xf9848863 },
+ { 0x4fe37657,0xd0efc1d3,0x5cf0e45a,0x3c58de82,0x8b0ccbbe,0x626ad21a,
+ 0xaf952fc5,0xd2a31208,0xeb437357,0x81791995,0x98e95d4f,0x5f19d30f } },
+ /* 207 */
+ { { 0x0e6865bb,0x72e83d9a,0xf63456a6,0x22f5af3b,0x463c8d9e,0x409e9c73,
+ 0xdfe6970e,0x40e9e578,0x711b91ca,0x876b6efa,0x942625a3,0x895512cf },
+ { 0xcb4e462b,0x84c8eda8,0x4412e7c8,0x84c0154a,0xceb7b71f,0x04325db1,
+ 0x66f70877,0x1537dde3,0x1992b9ac,0xf3a09399,0xd498ae77,0xa7316606 } },
+ /* 208 */
+ { { 0xcad260f5,0x13990d2f,0xeec0e8c0,0x76c3be29,0x0f7bd7d5,0x7dc5bee0,
+ 0xefebda4b,0x9be167d2,0x9122b87e,0xcce3dde6,0x82b5415c,0x75a28b09 },
+ { 0xe84607a6,0xf6810bcd,0x6f4dbf0d,0xc6d58128,0x1b4dafeb,0xfead577d,
+ 0x066b28eb,0x9bc440b2,0x8b17e84b,0x53f1da97,0xcda9a575,0x0459504b } },
+ /* 209 */
+ { { 0x329e5836,0x13e39a02,0xf717269d,0x2c9e7d51,0xf26c963b,0xc5ac58d6,
+ 0x79967bf5,0x3b0c6c43,0x55908d9d,0x60bbea3f,0xf07c9ad1,0xd84811e7 },
+ { 0x5bd20e4a,0xfe7609a7,0x0a70baa8,0xe4325dd2,0xb3600386,0x3711f370,
+ 0xd0924302,0x97f9562f,0x4acc4436,0x040dc0c3,0xde79cdd4,0xfd6d725c } },
+ /* 210 */
+ { { 0xcf13eafb,0xb3efd0e3,0x5aa0ae5f,0x21009cbb,0x79022279,0xe480c553,
+ 0xb2fc9a6d,0x755cf334,0x07096ae7,0x8564a5bf,0xbd238139,0xddd649d0 },
+ { 0x8a045041,0xd0de10b1,0xc957d572,0x6e05b413,0x4e0fb25c,0x5c5ff806,
+ 0x641162fb,0xd933179b,0xe57439f9,0x42d48485,0x8a8d72aa,0x70c5bd0a } },
+ /* 211 */
+ { { 0x97bdf646,0xa7671738,0xab329f7c,0xaa1485b4,0xf8f25fdf,0xce3e11d6,
+ 0xc6221824,0x76a3fc7e,0xf3924740,0x045f281f,0x96d13a9a,0x24557d4e },
+ { 0xdd4c27cd,0x875c804b,0x0f5c7fea,0x11c5f0f4,0xdc55ff7e,0xac8c880b,
+ 0x1103f101,0x2acddec5,0xf99faa89,0x38341a21,0xce9d6b57,0xc7b67a2c } },
+ /* 212 */
+ { { 0x8e357586,0x9a0d724f,0xdf648da0,0x1d7f4ff5,0xfdee62a5,0x9c3e6c9b,
+ 0x0389b372,0x0499cef0,0x98eab879,0xe904050d,0x6c051617,0xe8eef1b6 },
+ { 0xc37e3ca9,0xebf5bfeb,0xa4e0b91d,0x7c5e946d,0x2c4bea28,0x79097314,
+ 0xee67b2b7,0x81f6c109,0xdafc5ede,0xaf237d9b,0x2abb04c7,0xd2e60201 } },
+ /* 213 */
+ { { 0x8a4f57bf,0x6156060c,0xff11182a,0xf9758696,0x6296ef00,0x8336773c,
+ 0xff666899,0x9c054bce,0x719cd11c,0xd6a11611,0xdbe1acfa,0x9824a641 },
+ { 0xba89fd01,0x0b7b7a5f,0x889f79d8,0xf8d3b809,0xf578285c,0xc5e1ea08,
+ 0xae6d8288,0x7ac74536,0x7521ef5f,0x5d37a200,0xb260a25d,0x5ecc4184 } },
+ /* 214 */
+ { { 0xa708c8d3,0xddcebb19,0xc63f81ec,0xe63ed04f,0x11873f95,0xd045f5a0,
+ 0x79f276d5,0x3b5ad544,0x425ae5b3,0x81272a3d,0x10ce1605,0x8bfeb501 },
+ { 0x888228bf,0x4233809c,0xb2aff7df,0x4bd82acf,0x0cbd4a7f,0x9c68f180,
+ 0x6b44323d,0xfcd77124,0x891db957,0x60c0fcf6,0x04da8f7f,0xcfbb4d89 } },
+ /* 215 */
+ { { 0x3b26139a,0x9a6a5df9,0xb2cc7eb8,0x3e076a83,0x5a964bcd,0x47a8e82d,
+ 0xb9278d6b,0x8a4e2a39,0xe4443549,0x93506c98,0xf1e0d566,0x06497a8f },
+ { 0x2b1efa05,0x3dee8d99,0x45393e33,0x2da63ca8,0xcf0579ad,0xa4af7277,
+ 0x3236d8ea,0xaf4b4639,0x32b617f5,0x6ccad95b,0xb88bb124,0xce76d8b8 } },
+ /* 216 */
+ { { 0x083843dc,0x63d2537a,0x1e4153b4,0x89eb3514,0xea9afc94,0x5175ebc4,
+ 0x8ed1aed7,0x7a652580,0xd85e8297,0x67295611,0xb584b73d,0x8dd2d68b },
+ { 0x0133c3a4,0x237139e6,0x4bd278ea,0x9de838ab,0xc062fcd9,0xe829b072,
+ 0x63ba8706,0x70730d4f,0xd3cd05ec,0x6080483f,0x0c85f84d,0x872ab5b8 } },
+ /* 217 */
+ { { 0x999d4d49,0xfc0776d3,0xec3f45e7,0xa3eb59de,0x0dae1fc1,0xbc990e44,
+ 0xa15371ff,0x33596b1e,0x9bc7ab25,0xd447dcb2,0x35979582,0xcd5b63e9 },
+ { 0x77d1ff11,0xae3366fa,0xedee6903,0x59f28f05,0xa4433bf2,0x6f43fed1,
+ 0xdf9ce00e,0x15409c9b,0xaca9c5dc,0x21b5cded,0x82d7bdb4,0xf9f33595 } },
+ /* 218 */
+ { { 0x9422c792,0x95944378,0xc958b8bf,0x239ea923,0xdf076541,0x4b61a247,
+ 0xbb9fc544,0x4d29ce85,0x0b424559,0x9a692a67,0x0e486900,0x6e0ca5a0 },
+ { 0x85b3bece,0x6b79a782,0xc61f9892,0x41f35e39,0xae747f82,0xff82099a,
+ 0xd0ca59d6,0x58c8ae3f,0x99406b5f,0x4ac930e2,0x9df24243,0x2ce04eb9 } },
+ /* 219 */
+ { { 0x1ac37b82,0x4366b994,0x25b04d83,0xff0c728d,0x19c47b7c,0x1f551361,
+ 0xbeff13e7,0xdbf2d5ed,0xe12a683d,0xf78efd51,0x989cf9c4,0x82cd85b9 },
+ { 0xe0cb5d37,0xe23c6db6,0x72ee1a15,0x818aeebd,0x28771b14,0x8212aafd,
+ 0x1def817d,0x7bc221d9,0x9445c51f,0xdac403a2,0x12c3746b,0x711b0517 } },
+ /* 220 */
+ { { 0x5ea99ecc,0x0ed9ed48,0xb8cab5e1,0xf799500d,0xb570cbdc,0xa8ec87dc,
+ 0xd35dfaec,0x52cfb2c2,0x6e4d80a4,0x8d31fae2,0xdcdeabe5,0xe6a37dc9 },
+ { 0x1deca452,0x5d365a34,0x0d68b44e,0x09a5f8a5,0xa60744b1,0x59238ea5,
+ 0xbb4249e9,0xf2fedc0d,0xa909b2e3,0xe395c74e,0x39388250,0xe156d1a5 } },
+ /* 221 */
+ { { 0x47181ae9,0xd796b3d0,0x44197808,0xbaf44ba8,0x34cf3fac,0xe6933094,
+ 0xc3bd5c46,0x41aa6ade,0xeed947c6,0x4fda75d8,0x9ea5a525,0xacd9d412 },
+ { 0xd430301b,0x65cc55a3,0x7b52ea49,0x3c9a5bcf,0x159507f0,0x22d319cf,
+ 0xde74a8dd,0x2ee0b9b5,0x877ac2b6,0x20c26a1e,0x92e7c314,0x387d73da } },
+ /* 222 */
+ { { 0x8cd3fdac,0x13c4833e,0x332e5b8e,0x76fcd473,0xe2fe1fd3,0xff671b4b,
+ 0x5d98d8ec,0x4d734e8b,0x514bbc11,0xb1ead3c6,0x7b390494,0xd14ca858 },
+ { 0x5d2d37e9,0x95a443af,0x00464622,0x73c6ea73,0x15755044,0xa44aeb4b,
+ 0xfab58fee,0xba3f8575,0xdc680a6f,0x9779dbc9,0x7b37ddfc,0xe1ee5f5a } },
+ /* 223 */
+ { { 0x12d29f46,0xcd0b4648,0x0ed53137,0x93295b0b,0x80bef6c9,0xbfe26094,
+ 0x54248b00,0xa6565788,0x80e7f9c4,0x69c43fca,0xbe141ea1,0x2190837b },
+ { 0xa1b26cfb,0x875e159a,0x7affe852,0x90ca9f87,0x92ca598e,0x15e6550d,
+ 0x1938ad11,0xe3e0945d,0x366ef937,0xef7636bb,0xb39869e5,0xb6034d0b } },
+ /* 224 */
+ { { 0x26d8356e,0x4d255e30,0xd314626f,0xf83666ed,0xd0c8ed64,0x421ddf61,
+ 0x26677b61,0x96e473c5,0x9e9b18b3,0xdad4af7e,0xa9393f75,0xfceffd4a },
+ { 0x11c731d5,0x843138a1,0xb2f141d9,0x05bcb3a1,0x617b7671,0x20e1fa95,
+ 0x88ccec7b,0xbefce812,0x90f1b568,0x582073dc,0x1f055cb7,0xf572261a } },
+ /* 225 */
+ { { 0x36973088,0xf3148277,0x86a9f980,0xc008e708,0xe046c261,0x1b795947,
+ 0xca76bca0,0xdf1e6a7d,0x71acddf0,0xabafd886,0x1364d8f4,0xff7054d9 },
+ { 0xe2260594,0x2cf63547,0xd73b277e,0x468a5372,0xef9bd35e,0xc7419e24,
+ 0x24043cc3,0x2b4a1c20,0x890b39cd,0xa28f047a,0x46f9a2e3,0xdca2cea1 } },
+ /* 226 */
+ { { 0x53277538,0xab788736,0xcf697738,0xa734e225,0x6b22e2c1,0x66ee1d1e,
+ 0xebe1d212,0x2c615389,0x02bb0766,0xf36cad40,0x3e64f207,0x120885c3 },
+ { 0x90fbfec2,0x59e77d56,0xd7a574ae,0xf9e781aa,0x5d045e53,0x801410b0,
+ 0xa91b5f0e,0xd3b5f0aa,0x7fbb3521,0xb3d1df00,0xc72bee9a,0x11c4b33e } },
+ /* 227 */
+ { { 0x83c3a7f3,0xd32b9832,0x88d8a354,0x8083abcf,0x50f4ec5a,0xdeb16404,
+ 0x641e2907,0x18d747f0,0xf1bbf03e,0x4e8978ae,0x88a0cd89,0x932447dc },
+ { 0xcf3d5897,0x561e0feb,0x13600e6d,0xfc3a682f,0xd16a6b73,0xc78b9d73,
+ 0xd29bf580,0xe713fede,0x08d69e5c,0x0a225223,0x1ff7fda4,0x3a924a57 } },
+ /* 228 */
+ { { 0xb4093bee,0xfb64554c,0xa58c6ec0,0xa6d65a25,0x43d0ed37,0x4126994d,
+ 0x55152d44,0xa5689a51,0x284caa8d,0xb8e5ea8c,0xd1f25538,0x33f05d4f },
+ { 0x1b615d6e,0xe0fdfe09,0x705507da,0x2ded7e8f,0x17bbcc80,0xdd5631e5,
+ 0x267fd11f,0x4f87453e,0xff89d62d,0xc6da723f,0xe3cda21d,0x55cbcae2 } },
+ /* 229 */
+ { { 0x6b4e84f3,0x336bc94e,0x4ef72c35,0x72863031,0xeeb57f99,0x6d85fdee,
+ 0xa42ece1b,0x7f4e3272,0x36f0320a,0x7f86cbb5,0x923331e6,0xf09b6a2b },
+ { 0x56778435,0x21d3ecf1,0x8323b2d2,0x2977ba99,0x1704bc0f,0x6a1b57fb,
+ 0x389f048a,0xd777cf8b,0xac6b42cd,0x9ce2174f,0x09e6c55a,0x404e2bff } },
+ /* 230 */
+ { { 0x204c5ddb,0x9b9b135e,0x3eff550e,0x9dbfe044,0xec3be0f6,0x35eab4bf,
+ 0x0a43e56f,0x8b4c3f0d,0x0e73f9b3,0x4c1c6673,0x2c78c905,0x92ed38bd },
+ { 0xa386e27c,0xc7003f6a,0xaced8507,0xb9c4f46f,0x59df5464,0xea024ec8,
+ 0x429572ea,0x4af96152,0xe1fc1194,0x279cd5e2,0x281e358c,0xaa376a03 } },
+ /* 231 */
+ { { 0x3cdbc95c,0x07859223,0xef2e337a,0xaae1aa6a,0x472a8544,0xc040108d,
+ 0x8d037b7d,0x80c853e6,0x8c7eee24,0xd221315c,0x8ee47752,0x195d3856 },
+ { 0xdacd7fbe,0xd4b1ba03,0xd3e0c52b,0x4b5ac61e,0x6aab7b52,0x68d3c052,
+ 0x660e3fea,0xf0d7248c,0x3145efb4,0xafdb3f89,0x8f40936d,0xa73fd9a3 } },
+ /* 232 */
+ { { 0xbb1b17ce,0x891b9ef3,0xc6127f31,0x14023667,0x305521fd,0x12b2e58d,
+ 0xe3508088,0x3a47e449,0xff751507,0xe49fc84b,0x5310d16e,0x4023f722 },
+ { 0xb73399fa,0xa608e5ed,0xd532aa3e,0xf12632d8,0x845e8415,0x13a2758e,
+ 0x1fc2d861,0xae4b6f85,0x339d02f2,0x3879f5b1,0x80d99ebd,0x446d22a6 } },
+ /* 233 */
+ { { 0x4be164f1,0x0f502302,0x88b81920,0x8d09d2d6,0x984aceff,0x514056f1,
+ 0x75e9e80d,0xa5c4ddf0,0xdf496a93,0x38cb47e6,0x38df6bf7,0x899e1d6b },
+ { 0xb59eb2a6,0x69e87e88,0x9b47f38b,0x280d9d63,0x3654e955,0x599411ea,
+ 0x969aa581,0xcf8dd4fd,0x530742a7,0xff5c2baf,0x1a373085,0xa4391536 } },
+ /* 234 */
+ { { 0xa8a4bdd2,0x6ace72a3,0xb68ef702,0xc656cdd1,0x90c4dad8,0xd4a33e7e,
+ 0x9d951c50,0x4aece08a,0x085d68e6,0xea8005ae,0x6f7502b8,0xfdd7a7d7 },
+ { 0x98d6fa45,0xce6fb0a6,0x1104eb8c,0x228f8672,0xda09d7dc,0xd23d8787,
+ 0x2ae93065,0x5521428b,0xea56c366,0x95faba3d,0x0a88aca5,0xedbe5039 } },
+ /* 235 */
+ { { 0xbfb26c82,0xd64da0ad,0x952c2f9c,0xe5d70b3c,0xf7e77f68,0xf5e8f365,
+ 0x08f2d695,0x7234e002,0xd12e7be6,0xfaf900ee,0x4acf734e,0x27dc6934 },
+ { 0xc260a46a,0x80e4ff5e,0x2dc31c28,0x7da5ebce,0xca69f552,0x485c5d73,
+ 0x69cc84c2,0xcdfb6b29,0xed6d4eca,0x031c5afe,0x22247637,0xc7bbf4c8 } },
+ /* 236 */
+ { { 0x49fe01b2,0x9d5b72c7,0x793a91b8,0x34785186,0xcf460438,0xa3ba3c54,
+ 0x3ab21b6f,0x73e8e43d,0xbe57b8ab,0x50cde8e0,0xdd204264,0x6488b3a7 },
+ { 0xdddc4582,0xa9e398b3,0x5bec46fe,0x1698c1a9,0x156d3843,0x7f1446ef,
+ 0x770329a2,0x3fd25dd8,0x2c710668,0x05b1221a,0xa72ee6cf,0x65b2dc2a } },
+ /* 237 */
+ { { 0xcd021d63,0x21a885f7,0xfea61f08,0x3f344b15,0xc5cf73e6,0xad5ba6dd,
+ 0x227a8b23,0x154d0d8f,0xdc559311,0x9b74373c,0x98620fa1,0x4feab715 },
+ { 0x7d9ec924,0x5098938e,0x6d47e550,0x84d54a5e,0x1b617506,0x1a2d1bdc,
+ 0x615868a4,0x99fe1782,0x3005a924,0x171da780,0x7d8f79b6,0xa70bf5ed } },
+ /* 238 */
+ { { 0xfe2216c5,0x0bc1250d,0x7601b351,0x2c37e250,0xd6f06b7e,0xb6300175,
+ 0x8bfeb9b7,0x4dde8ca1,0xb82f843d,0x4f210432,0xb1ac0afd,0x8d70e2f9 },
+ { 0xaae91abb,0x25c73b78,0x863028f2,0x0230dca3,0xe5cf30b7,0x8b923ecf,
+ 0x5506f265,0xed754ec2,0x729a5e39,0x8e41b88c,0xbabf889b,0xee67cec2 } },
+ /* 239 */
+ { { 0x1be46c65,0xe183acf5,0xe7565d7a,0x9789538f,0xd9627b4e,0x87873391,
+ 0x9f1d9187,0xbf4ac4c1,0x4691f5c8,0x5db99f63,0x74a1fb98,0xa68df803 },
+ { 0xbf92b5fa,0x3c448ed1,0x3e0bdc32,0xa098c841,0x79bf016c,0x8e74cd55,
+ 0x115e244d,0x5df0d09c,0x3410b66e,0x9418ad01,0x17a02130,0x8b6124cb } },
+ /* 240 */
+ { { 0xc26e3392,0x425ec3af,0xa1722e00,0xc07f8470,0xe2356b43,0xdcc28190,
+ 0xb1ef59a6,0x4ed97dff,0xc63028c1,0xc22b3ad1,0x68c18988,0x070723c2 },
+ { 0x4cf49e7d,0x70da302f,0x3f12a522,0xc5e87c93,0x18594148,0x74acdd1d,
+ 0xca74124c,0xad5f73ab,0xd69fd478,0xe72e4a3e,0x7b117cc3,0x61593868 } },
+ /* 241 */
+ { { 0xa9aa0486,0x7b7b9577,0xa063d557,0x6e41fb35,0xda9047d7,0xb017d5c7,
+ 0x68a87ba9,0x8c748280,0xdf08ad93,0xab45fa5c,0x4c288a28,0xcd9fb217 },
+ { 0x5747843d,0x59544642,0xa56111e3,0x34d64c6c,0x4bfce8d5,0x12e47ea1,
+ 0x6169267f,0x17740e05,0xeed03fb5,0x5c49438e,0x4fc3f513,0x9da30add } },
+ /* 242 */
+ { { 0xccfa5200,0xc4e85282,0x6a19b13d,0x2707608f,0xf5726e2f,0xdcb9a53d,
+ 0xe9427de5,0x612407c9,0xd54d582a,0x3e5a17e1,0x655ae118,0xb99877de },
+ { 0x015254de,0x6f0e972b,0xf0a6f7c5,0x92a56db1,0xa656f8b2,0xd297e4e1,
+ 0xad981983,0x99fe0052,0x07cfed84,0xd3652d2f,0x843c1738,0xc784352e } },
+ /* 243 */
+ { { 0x7e9b2d8a,0x6ee90af0,0x57cf1964,0xac8d7018,0x71f28efc,0xf6ed9031,
+ 0x6812b20e,0x7f70d5a9,0xf1c61eee,0x27b557f4,0xc6263758,0xf1c9bd57 },
+ { 0x2a1a6194,0x5cf7d014,0x1890ab84,0xdd614e0b,0x0e93c2a6,0x3ef9de10,
+ 0xe0cd91c5,0xf98cf575,0x14befc32,0x504ec0c6,0x6279d68c,0xd0513a66 } },
+ /* 244 */
+ { { 0xa859fb6a,0xa8eadbad,0xdb283666,0xcf8346e7,0x3e22e355,0x7b35e61a,
+ 0x99639c6b,0x293ece2c,0x56f241c8,0xfa0162e2,0xbf7a1dda,0xd2e6c7b9 },
+ { 0x40075e63,0xd0de6253,0xf9ec8286,0x2405aa61,0x8fe45494,0x2237830a,
+ 0x364e9c8c,0x4fd01ac7,0x904ba750,0x4d9c3d21,0xaf1b520b,0xd589be14 } },
+ /* 245 */
+ { { 0x4662e53b,0x13576a4f,0xf9077676,0x35ec2f51,0x97c0af97,0x66297d13,
+ 0x9e598b58,0xed3201fe,0x5e70f604,0x49bc752a,0xbb12d951,0xb54af535 },
+ { 0x212c1c76,0x36ea4c2b,0xeb250dfd,0x18f5bbc7,0x9a0a1a46,0xa0d466cc,
+ 0xdac2d917,0x52564da4,0x8e95fab5,0x206559f4,0x9ca67a33,0x7487c190 } },
+ /* 246 */
+ { { 0xdde98e9c,0x75abfe37,0x2a411199,0x99b90b26,0xdcdb1f7c,0x1b410996,
+ 0x8b3b5675,0xab346f11,0xf1f8ae1e,0x04852193,0x6b8b98c1,0x1ec4d227 },
+ { 0x45452baa,0xba3bc926,0xacc4a572,0x387d1858,0xe51f171e,0x9478eff6,
+ 0x931e1c00,0xf357077d,0xe54c8ca8,0xffee77cd,0x551dc9a4,0xfb4892ff } },
+ /* 247 */
+ { { 0x2db8dff8,0x5b1bdad0,0x5a2285a2,0xd462f4fd,0xda00b461,0x1d6aad8e,
+ 0x41306d1b,0x43fbefcf,0x6a13fe19,0x428e86f3,0x17f89404,0xc8b2f118 },
+ { 0xf0d51afb,0x762528aa,0x549b1d06,0xa3e2fea4,0xea3ddf66,0x86fad8f2,
+ 0x4fbdd206,0x0d9ccc4b,0xc189ff5a,0xcde97d4c,0x199f19a6,0xc36793d6 } },
+ /* 248 */
+ { { 0x51b85197,0xea38909b,0xb4c92895,0xffb17dd0,0x1ddb3f3f,0x0eb0878b,
+ 0xc57cf0f2,0xb05d28ff,0x1abd57e2,0xd8bde2e7,0xc40c1b20,0x7f2be28d },
+ { 0x299a2d48,0x6554dca2,0x8377982d,0x5130ba2e,0x1071971a,0x8863205f,
+ 0x7cf2825d,0x15ee6282,0x03748f2b,0xd4b6c57f,0x430385a0,0xa9e3f4da } },
+ /* 249 */
+ { { 0x83fbc9c6,0x33eb7cec,0x4541777e,0x24a311c7,0x4f0767fc,0xc81377f7,
+ 0x4ab702da,0x12adae36,0x2a779696,0xb7fcb6db,0x01cea6ad,0x4a6fb284 },
+ { 0xcdfc73de,0x5e8b1d2a,0x1b02fd32,0xd0efae8d,0xd81d8519,0x3f99c190,
+ 0xfc808971,0x3c18f7fa,0x51b7ae7b,0x41f713e7,0xf07fc3f8,0x0a4b3435 } },
+ /* 250 */
+ { { 0x019b7d2e,0x7dda3c4c,0xd4dc4b89,0x631c8d1a,0x1cdb313c,0x5489cd6e,
+ 0x4c07bb06,0xd44aed10,0x75f000d1,0x8f97e13a,0xdda5df4d,0x0e9ee64f },
+ { 0x3e346910,0xeaa99f3b,0xfa294ad7,0x622f6921,0x0d0b2fe9,0x22aaa20d,
+ 0x1e5881ba,0x4fed2f99,0xc1571802,0x9af3b2d6,0xdc7ee17c,0x919e67a8 } },
+ /* 251 */
+ { { 0x76250533,0xc724fe4c,0x7d817ef8,0x8a2080e5,0x172c9751,0xa2afb0f4,
+ 0x17c0702e,0x9b10cdeb,0xc9b7e3e9,0xbf3975e3,0x1cd0cdc5,0x206117df },
+ { 0xbe05ebd5,0xfb049e61,0x16c782c0,0xeb0bb55c,0xab7fed09,0x13a331b8,
+ 0x632863f0,0xf6c58b1d,0x4d3b6195,0x6264ef6e,0x9a53f116,0x92c51b63 } },
+ /* 252 */
+ { { 0x288b364d,0xa57c7bc8,0x7b41e5c4,0x4a562e08,0x698a9a11,0x699d21c6,
+ 0xf3f849b9,0xa4ed9581,0x9eb726ba,0xa223eef3,0xcc2884f9,0x13159c23 },
+ { 0x3a3f4963,0x73931e58,0x0ada6a81,0x96500389,0x5ab2950b,0x3ee8a1c6,
+ 0x775fab52,0xeedf4949,0x4f2671b6,0x63d652e1,0x3c4e2f55,0xfed4491c } },
+ /* 253 */
+ { { 0xf4eb453e,0x335eadc3,0xcadd1a5b,0x5ff74b63,0x5d84a91a,0x6933d0d7,
+ 0xb49ba337,0x9ca3eeb9,0xc04c15b8,0x1f6facce,0xdc09a7e4,0x4ef19326 },
+ { 0x3dca3233,0x53d2d324,0xa2259d4b,0x0ee40590,0x5546f002,0x18c22edb,
+ 0x09ea6b71,0x92429801,0xb0e91e61,0xaada0add,0x99963c50,0x5fe53ef4 } },
+ /* 254 */
+ { { 0x90c28c65,0x372dd06b,0x119ce47d,0x1765242c,0x6b22fc82,0xc041fb80,
+ 0xb0a7ccc1,0x667edf07,0x1261bece,0xc79599e7,0x19cff22a,0xbc69d9ba },
+ { 0x13c06819,0x009d77cd,0xe282b79d,0x635a66ae,0x225b1be8,0x4edac4a6,
+ 0x524008f9,0x57d4f4e4,0xb056af84,0xee299ac5,0x3a0bc386,0xcc38444c } },
+ /* 255 */
+ { { 0xcd4c2356,0x490643b1,0x750547be,0x740a4851,0xd4944c04,0x643eaf29,
+ 0x299a98a0,0xba572479,0xee05fdf9,0x48b29f16,0x089b2d7b,0x33fb4f61 },
+ { 0xa950f955,0x86704902,0xfedc3ddf,0x97e1034d,0x05fbb6a2,0x211320b6,
+ 0x432299bb,0x23d7b93f,0x8590e4a3,0x1fe1a057,0xf58c0ce6,0x8e1d0586 } },
+};
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table,
+ k, map, heap);
+}
+
+#endif
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[12];
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_384_point_new_12(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 12, km);
+
+ err = sp_384_ecc_mulmod_base_12(point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_12(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(point, 0, heap);
+
+ return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_384_iszero_12(const sp_digit* a)
+{
+ return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] |
+ a[8] | a[9] | a[10] | a[11]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+/* Add 1 to a. (a = a + 1)
+ *
+ * a A single precision integer.
+ */
+static void sp_384_add_one_12(sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "ldr r1, [%[a], #0]\n\t"
+ "ldr r2, [%[a], #4]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "adds r1, r1, #1\n\t"
+ "adcs r2, r2, #0\n\t"
+ "adcs r3, r3, #0\n\t"
+ "adcs r4, r4, #0\n\t"
+ "str r1, [%[a], #0]\n\t"
+ "str r2, [%[a], #4]\n\t"
+ "str r3, [%[a], #8]\n\t"
+ "str r4, [%[a], #12]\n\t"
+ "ldr r1, [%[a], #16]\n\t"
+ "ldr r2, [%[a], #20]\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "adcs r1, r1, #0\n\t"
+ "adcs r2, r2, #0\n\t"
+ "adcs r3, r3, #0\n\t"
+ "adcs r4, r4, #0\n\t"
+ "str r1, [%[a], #16]\n\t"
+ "str r2, [%[a], #20]\n\t"
+ "str r3, [%[a], #24]\n\t"
+ "str r4, [%[a], #28]\n\t"
+ "ldr r1, [%[a], #32]\n\t"
+ "ldr r2, [%[a], #36]\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "adcs r1, r1, #0\n\t"
+ "adcs r2, r2, #0\n\t"
+ "adcs r3, r3, #0\n\t"
+ "adcs r4, r4, #0\n\t"
+ "str r1, [%[a], #32]\n\t"
+ "str r2, [%[a], #36]\n\t"
+ "str r3, [%[a], #40]\n\t"
+ "str r4, [%[a], #44]\n\t"
+ :
+ : [a] "r" (a)
+ : "memory", "r1", "r2", "r3", "r4"
+ );
+}
+
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 24U) {
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng Random number generator.
+ * k Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_384_ecc_gen_k_12(WC_RNG* rng, sp_digit* k)
+{
+ int err;
+ byte buf[48];
+
+ do {
+ err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+ if (err == 0) {
+ sp_384_from_bin(k, 12, buf, (int)sizeof(buf));
+ if (sp_384_cmp_12(k, p384_order2) < 0) {
+ sp_384_add_one_12(k);
+ break;
+ }
+ }
+ }
+ while (err == 0);
+
+ return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng Random number generator.
+ * priv Generated private value.
+ * pub Generated public point.
+ * heap Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[12];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_384 inf;
+#endif
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_384* infinity;
+#endif
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_12(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, inf, infinity);
+ }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_gen_k_12(rng, k);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_base_12(point, k, 1, NULL);
+ }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_12(infinity, point, p384_order, 1, NULL);
+ }
+ if (err == MP_OKAY) {
+ if ((sp_384_iszero_12(point->x) == 0) || (sp_384_iszero_12(point->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(k, priv);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_12(point, pub);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_384_point_free_12(infinity, 1, heap);
+#endif
+ sp_384_point_free_12(point, 1, heap);
+
+ return err;
+}
+
+#ifdef HAVE_ECC_DHE
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 48
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_384_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ j = 384 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<12 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 32) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 32);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv Scalar to multiply the point by.
+ * pub Point to multiply.
+ * out Buffer to hold X ordinate.
+ * outLen On entry, size of the buffer in bytes.
+ * On exit, length of data in buffer in bytes.
+ * heap Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out,
+ word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[12];
+#endif
+ sp_point_384* point = NULL;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ if (*outLen < 48U) {
+ err = BUFFER_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, p, point);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 12, priv);
+ sp_384_point_from_ecc_point_12(point, pub);
+ err = sp_384_ecc_mulmod_12(point, point, k, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ sp_384_to_bin(point->x, out);
+ *outLen = 48;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(point, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r14, #0\n\t"
+ "add r12, %[a], #48\n\t"
+ "\n1:\n\t"
+ "subs %[c], r14, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[a], #8]\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r7, [%[b]], #4\n\t"
+ "ldr r8, [%[b]], #4\n\t"
+ "ldr r9, [%[b]], #4\n\t"
+ "ldr r10, [%[b]], #4\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "sbcs r6, r6, r10\n\t"
+ "str r3, [%[a]], #4\n\t"
+ "str r4, [%[a]], #4\n\t"
+ "str r5, [%[a]], #4\n\t"
+ "str r6, [%[a]], #4\n\t"
+ "sbc %[c], r14, r14\n\t"
+ "cmp %[a], r12\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
+ );
+
+ return c;
+}
+
+#else
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r2, [%[a], #0]\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[a], #12]\n\t"
+ "ldr r6, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "ldr r9, [%[b], #12]\n\t"
+ "subs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #0]\n\t"
+ "str r3, [%[a], #4]\n\t"
+ "str r4, [%[a], #8]\n\t"
+ "str r5, [%[a], #12]\n\t"
+ "ldr r2, [%[a], #16]\n\t"
+ "ldr r3, [%[a], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[a], #28]\n\t"
+ "ldr r6, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "ldr r9, [%[b], #28]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #16]\n\t"
+ "str r3, [%[a], #20]\n\t"
+ "str r4, [%[a], #24]\n\t"
+ "str r5, [%[a], #28]\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[a], #44]\n\t"
+ "ldr r6, [%[b], #32]\n\t"
+ "ldr r7, [%[b], #36]\n\t"
+ "ldr r8, [%[b], #40]\n\t"
+ "ldr r9, [%[b], #44]\n\t"
+ "sbcs r2, r2, r6\n\t"
+ "sbcs r3, r3, r7\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r9\n\t"
+ "str r2, [%[a], #32]\n\t"
+ "str r3, [%[a], #36]\n\t"
+ "str r4, [%[a], #40]\n\t"
+ "str r5, [%[a], #44]\n\t"
+ "sbc %[c], r9, r9\n\t"
+ : [c] "+r" (c)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov r10, #0\n\t"
+ "# A[0] * B\n\t"
+ "ldr r8, [%[a]]\n\t"
+ "umull r5, r3, %[b], r8\n\t"
+ "mov r4, #0\n\t"
+ "str r5, [%[r]]\n\t"
+ "mov r5, #0\n\t"
+ "mov r9, #4\n\t"
+ "1:\n\t"
+ "ldr r8, [%[a], r9]\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], r9]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r9, r9, #4\n\t"
+ "cmp r9, #48\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r], #48]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov r10, #0\n\t"
+ "# A[0] * B\n\t"
+ "ldr r8, [%[a]]\n\t"
+ "umull r3, r4, %[b], r8\n\t"
+ "mov r5, #0\n\t"
+ "str r3, [%[r]]\n\t"
+ "# A[1] * B\n\t"
+ "ldr r8, [%[a], #4]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "# A[2] * B\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "# A[3] * B\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "# A[4] * B\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "# A[5] * B\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "# A[6] * B\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "# A[7] * B\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "# A[8] * B\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "mov r4, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r7\n\t"
+ "adc r4, r4, r10\n\t"
+ "str r5, [%[r], #32]\n\t"
+ "# A[9] * B\n\t"
+ "ldr r8, [%[a], #36]\n\t"
+ "mov r5, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r7\n\t"
+ "adc r5, r5, r10\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "# A[10] * B\n\t"
+ "ldr r8, [%[a], #40]\n\t"
+ "mov r3, #0\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r7\n\t"
+ "adc r3, r3, r10\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "# A[11] * B\n\t"
+ "ldr r8, [%[a], #44]\n\t"
+ "umull r6, r7, %[b], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adc r3, r3, r7\n\t"
+ "str r5, [%[r], #44]\n\t"
+ "str r3, [%[r], #48]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
+ );
+#endif
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r5, %[div], #1\n\t"
+ "add r5, r5, #1\n\t"
+ "mov r6, %[d0]\n\t"
+ "mov r7, %[d1]\n\t"
+ "# Do top 32\n\t"
+ "subs r8, r5, r7\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "and r8, r8, r5\n\t"
+ "subs r7, r7, r8\n\t"
+ "# Next 30 bits\n\t"
+ "mov r4, #29\n\t"
+ "1:\n\t"
+ "movs r6, r6, lsl #1\n\t"
+ "adc r7, r7, r7\n\t"
+ "subs r8, r5, r7\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "and r8, r8, r5\n\t"
+ "subs r7, r7, r8\n\t"
+ "subs r4, r4, #1\n\t"
+ "bpl 1b\n\t"
+ "add %[r], %[r], %[r]\n\t"
+ "add %[r], %[r], #1\n\t"
+ "umull r4, r5, %[r], %[div]\n\t"
+ "subs r4, %[d0], r4\n\t"
+ "sbc r5, %[d1], r5\n\t"
+ "add %[r], %[r], r5\n\t"
+ "umull r4, r5, %[r], %[div]\n\t"
+ "subs r4, %[d0], r4\n\t"
+ "sbc r5, %[d1], r5\n\t"
+ "add %[r], %[r], r5\n\t"
+ "subs r8, %[div], r4\n\t"
+ "sbc r8, r8, r8\n\t"
+ "sub %[r], %[r], r8\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r6", "r7", "r8"
+ );
+ return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_384_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<12; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ r[0] = a[0] & m;
+ r[1] = a[1] & m;
+ r[2] = a[2] & m;
+ r[3] = a[3] & m;
+ r[4] = a[4] & m;
+ r[5] = a[5] & m;
+ r[6] = a[6] & m;
+ r[7] = a[7] & m;
+ r[8] = a[8] & m;
+ r[9] = a[9] & m;
+ r[10] = a[10] & m;
+ r[11] = a[11] & m;
+#endif
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[24], t2[13];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+
+ div = d[11];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 12);
+ for (i=11; i>=0; i--) {
+ r1 = div_384_word_12(t1[12 + i], t1[12 + i - 1], div);
+
+ sp_384_mul_d_12(t2, d, r1);
+ t1[12 + i] += sp_384_sub_in_place_12(&t1[i], t2);
+ t1[12 + i] -= t2[12];
+ sp_384_mask_12(t2, d, t1[12 + i]);
+ t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2);
+ sp_384_mask_12(t2, d, t1[12 + i]);
+ t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_384_cmp_12(t1, d) >= 0;
+ sp_384_cond_sub_12(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_384_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_384_div_12(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P384 curve. */
+static const uint32_t p384_order_minus_2[12] = {
+ 0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U,
+ 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
+};
+#else
+/* The low half of the order-2 of the P384 curve. */
+static const uint32_t p384_order_low[6] = {
+ 0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U
+
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P384 curve. (r = a * b mod order)
+ *
+ * r Result of the multiplication.
+ * a First operand of the multiplication.
+ * b Second operand of the multiplication.
+ */
+static void sp_384_mont_mul_order_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_384_mul_12(r, a, b);
+ sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order);
+}
+
+/* Square number mod the order of P384 curve. (r = a * a mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_384_mont_sqr_order_12(sp_digit* r, const sp_digit* a)
+{
+ sp_384_sqr_12(r, a);
+ sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P384 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_384_mont_sqr_n_order_12(sp_digit* r, const sp_digit* a, int n)
+{
+ int i;
+
+ sp_384_mont_sqr_order_12(r, a);
+ for (i=1; i<n; i++) {
+ sp_384_mont_sqr_order_12(r, r);
+ }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P384 curve.
+ * (r = 1 / a mod order)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_384_mont_inv_order_12(sp_digit* r, const sp_digit* a,
+ sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 12);
+ for (i=382; i>=0; i--) {
+ sp_384_mont_sqr_order_12(t, t);
+ if ((p384_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_384_mont_mul_order_12(t, t, a);
+ }
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 12U);
+#else
+ sp_digit* t = td;
+ sp_digit* t2 = td + 2 * 12;
+ sp_digit* t3 = td + 4 * 12;
+ int i;
+
+ /* t = a^2 */
+ sp_384_mont_sqr_order_12(t, a);
+ /* t = a^3 = t * a */
+ sp_384_mont_mul_order_12(t, t, a);
+ /* t2= a^c = t ^ 2 ^ 2 */
+ sp_384_mont_sqr_n_order_12(t2, t, 2);
+ /* t = a^f = t2 * t */
+ sp_384_mont_mul_order_12(t, t2, t);
+ /* t2= a^f0 = t ^ 2 ^ 4 */
+ sp_384_mont_sqr_n_order_12(t2, t, 4);
+ /* t = a^ff = t2 * t */
+ sp_384_mont_mul_order_12(t, t2, t);
+ /* t2= a^ff00 = t ^ 2 ^ 8 */
+ sp_384_mont_sqr_n_order_12(t2, t, 8);
+ /* t3= a^ffff = t2 * t */
+ sp_384_mont_mul_order_12(t3, t2, t);
+ /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */
+ sp_384_mont_sqr_n_order_12(t2, t3, 16);
+ /* t = a^ffffffff = t2 * t3 */
+ sp_384_mont_mul_order_12(t, t2, t3);
+ /* t2= a^ffffffff0000 = t ^ 2 ^ 16 */
+ sp_384_mont_sqr_n_order_12(t2, t, 16);
+ /* t = a^ffffffffffff = t2 * t3 */
+ sp_384_mont_mul_order_12(t, t2, t3);
+ /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48 */
+ sp_384_mont_sqr_n_order_12(t2, t, 48);
+ /* t= a^fffffffffffffffffffffffff = t2 * t */
+ sp_384_mont_mul_order_12(t, t2, t);
+ /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */
+ sp_384_mont_sqr_n_order_12(t2, t, 96);
+ /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */
+ sp_384_mont_mul_order_12(t2, t2, t);
+ for (i=191; i>=1; i--) {
+ sp_384_mont_sqr_order_12(t2, t2);
+ if (((sp_digit)p384_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_384_mont_mul_order_12(t2, t2, a);
+ }
+ }
+ sp_384_mont_sqr_order_12(t2, t2);
+ sp_384_mont_mul_order_12(r, t2, a);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN 64
+#endif
+
+/* Sign the hash using the private key.
+ * e = [hash, 384 bits] from binary
+ * r = (k.G)->x mod order
+ * s = (r * x + e) / k mod order
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+ mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit ed[2*12];
+ sp_digit xd[2*12];
+ sp_digit kd[2*12];
+ sp_digit rd[2*12];
+ sp_digit td[3 * 2*12];
+ sp_point_384 p;
+#endif
+ sp_digit* e = NULL;
+ sp_digit* x = NULL;
+ sp_digit* k = NULL;
+ sp_digit* r = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_384* point = NULL;
+ sp_digit carry;
+ sp_digit* s = NULL;
+ sp_digit* kInv = NULL;
+ int err = MP_OKAY;
+ int32_t c;
+ int i;
+
+ (void)heap;
+
+ err = sp_384_point_new_12(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ e = d + 0 * 12;
+ x = d + 2 * 12;
+ k = d + 4 * 12;
+ r = d + 6 * 12;
+ tmp = d + 8 * 12;
+#else
+ e = ed;
+ x = xd;
+ k = kd;
+ r = rd;
+ tmp = td;
+#endif
+ s = e;
+ kInv = k;
+
+ if (hashLen > 48U) {
+ hashLen = 48U;
+ }
+
+ sp_384_from_bin(e, 12, hash, (int)hashLen);
+ }
+
+ for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+ sp_384_from_mp(x, 12, priv);
+
+ /* New random point. */
+ if (km == NULL || mp_iszero(km)) {
+ err = sp_384_ecc_gen_k_12(rng, k);
+ }
+ else {
+ sp_384_from_mp(k, 12, km);
+ mp_zero(km);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_base_12(point, k, 1, NULL);
+ }
+
+ if (err == MP_OKAY) {
+ /* r = point->x mod order */
+ XMEMCPY(r, point->x, sizeof(sp_digit) * 12U);
+ sp_384_norm_12(r);
+ c = sp_384_cmp_12(r, p384_order);
+ sp_384_cond_sub_12(r, r, p384_order, 0L - (sp_digit)(c >= 0));
+ sp_384_norm_12(r);
+
+ /* Conv k to Montgomery form (mod order) */
+ sp_384_mul_12(k, k, p384_norm_order);
+ err = sp_384_mod_12(k, k, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_12(k);
+ /* kInv = 1/k mod order */
+ sp_384_mont_inv_order_12(kInv, k, tmp);
+ sp_384_norm_12(kInv);
+
+ /* s = r * x + e */
+ sp_384_mul_12(x, x, r);
+ err = sp_384_mod_12(x, x, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_12(x);
+ carry = sp_384_add_12(s, e, x);
+ sp_384_cond_sub_12(s, s, p384_order, 0 - carry);
+ sp_384_norm_12(s);
+ c = sp_384_cmp_12(s, p384_order);
+ sp_384_cond_sub_12(s, s, p384_order, 0L - (sp_digit)(c >= 0));
+ sp_384_norm_12(s);
+
+ /* s = s * k^-1 mod order */
+ sp_384_mont_mul_order_12(s, s, kInv);
+ sp_384_norm_12(s);
+
+ /* Check that signature is usable. */
+ if (sp_384_iszero_12(s) == 0) {
+ break;
+ }
+ }
+ }
+
+ if (i == 0) {
+ err = RNG_FAILURE_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(r, rm);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(s, sm);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 8 * 12);
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 2U * 12U);
+ XMEMSET(x, 0, sizeof(sp_digit) * 2U * 12U);
+ XMEMSET(k, 0, sizeof(sp_digit) * 2U * 12U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U);
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 12U);
+#endif
+ sp_384_point_free_12(point, 1, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ * e = Truncate(hash, 384)
+ * u1 = e/s mod order
+ * u2 = r/s mod order
+ * r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX,
+ mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit u1d[2*12];
+ sp_digit u2d[2*12];
+ sp_digit sd[2*12];
+ sp_digit tmpd[2*12 * 5];
+ sp_point_384 p1d;
+ sp_point_384 p2d;
+#endif
+ sp_digit* u1 = NULL;
+ sp_digit* u2 = NULL;
+ sp_digit* s = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_384* p1;
+ sp_point_384* p2 = NULL;
+ sp_digit carry;
+ int32_t c;
+ int err;
+
+ err = sp_384_point_new_12(heap, p1d, p1);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, p2d, p2);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ u1 = d + 0 * 12;
+ u2 = d + 2 * 12;
+ s = d + 4 * 12;
+ tmp = d + 6 * 12;
+#else
+ u1 = u1d;
+ u2 = u2d;
+ s = sd;
+ tmp = tmpd;
+#endif
+
+ if (hashLen > 48U) {
+ hashLen = 48U;
+ }
+
+ sp_384_from_bin(u1, 12, hash, (int)hashLen);
+ sp_384_from_mp(u2, 12, r);
+ sp_384_from_mp(s, 12, sm);
+ sp_384_from_mp(p2->x, 12, pX);
+ sp_384_from_mp(p2->y, 12, pY);
+ sp_384_from_mp(p2->z, 12, pZ);
+
+ {
+ sp_384_mul_12(s, s, p384_norm_order);
+ }
+ err = sp_384_mod_12(s, s, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_12(s);
+ {
+ sp_384_mont_inv_order_12(s, s, tmp);
+ sp_384_mont_mul_order_12(u1, u1, s);
+ sp_384_mont_mul_order_12(u2, u2, s);
+ }
+
+ err = sp_384_ecc_mulmod_base_12(p1, u1, 0, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_12(p2, p2, u2, 0, heap);
+ }
+
+ if (err == MP_OKAY) {
+ {
+ sp_384_proj_point_add_12(p1, p1, p2, tmp);
+ if (sp_384_iszero_12(p1->z)) {
+ if (sp_384_iszero_12(p1->x) && sp_384_iszero_12(p1->y)) {
+ sp_384_proj_point_dbl_12(p1, p2, tmp);
+ }
+ else {
+ /* Y ordinate is not used from here - don't set. */
+ p1->x[0] = 0;
+ p1->x[1] = 0;
+ p1->x[2] = 0;
+ p1->x[3] = 0;
+ p1->x[4] = 0;
+ p1->x[5] = 0;
+ p1->x[6] = 0;
+ p1->x[7] = 0;
+ p1->x[8] = 0;
+ p1->x[9] = 0;
+ p1->x[10] = 0;
+ p1->x[11] = 0;
+ XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod));
+ }
+ }
+ }
+
+ /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+ /* Reload r and convert to Montgomery form. */
+ sp_384_from_mp(u2, 12, r);
+ err = sp_384_mod_mul_norm_12(u2, u2, p384_mod);
+ }
+
+ if (err == MP_OKAY) {
+ /* u1 = r.z'.z' mod prime */
+ sp_384_mont_sqr_12(p1->z, p1->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(u1, u2, p1->z, p384_mod, p384_mp_mod);
+ *res = (int)(sp_384_cmp_12(p1->x, u1) == 0);
+ if (*res == 0) {
+ /* Reload r and add order. */
+ sp_384_from_mp(u2, 12, r);
+ carry = sp_384_add_12(u2, u2, p384_order);
+ /* Carry means result is greater than mod and is not valid. */
+ if (carry == 0) {
+ sp_384_norm_12(u2);
+
+ /* Compare with mod and if greater or equal then not valid. */
+ c = sp_384_cmp_12(u2, p384_mod);
+ if (c < 0) {
+ /* Convert to Montogomery form */
+ err = sp_384_mod_mul_norm_12(u2, u2, p384_mod);
+ if (err == MP_OKAY) {
+ /* u1 = (r + 1*order).z'.z' mod prime */
+ sp_384_mont_mul_12(u1, u2, p1->z, p384_mod,
+ p384_mp_mod);
+ *res = (int)(sp_384_cmp_12(p1->x, u1) == 0);
+ }
+ }
+ }
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL)
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_384_point_free_12(p1, 0, heap);
+ sp_384_point_free_12(p2, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point EC point.
+ * heap Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_384_ecc_is_point_12(sp_point_384* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit t1d[2*12];
+ sp_digit t2d[2*12];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12 * 4, heap, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 12;
+ t2 = d + 2 * 12;
+#else
+ (void)heap;
+
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ sp_384_sqr_12(t1, point->y);
+ (void)sp_384_mod_12(t1, t1, p384_mod);
+ sp_384_sqr_12(t2, point->x);
+ (void)sp_384_mod_12(t2, t2, p384_mod);
+ sp_384_mul_12(t2, t2, point->x);
+ (void)sp_384_mod_12(t2, t2, p384_mod);
+ (void)sp_384_sub_12(t2, p384_mod, t2);
+ sp_384_mont_add_12(t1, t1, t2, p384_mod);
+
+ sp_384_mont_add_12(t1, t1, point->x, p384_mod);
+ sp_384_mont_add_12(t1, t1, point->x, p384_mod);
+ sp_384_mont_add_12(t1, t1, point->x, p384_mod);
+
+ if (sp_384_cmp_12(t1, p384_b) != 0) {
+ err = MP_VAL;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_384(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 pubd;
+#endif
+ sp_point_384* pub;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_384_point_new_12(NULL, pubd, pub);
+ if (err == MP_OKAY) {
+ sp_384_from_mp(pub->x, 12, pX);
+ sp_384_from_mp(pub->y, 12, pY);
+ sp_384_from_bin(pub->z, 12, one, (int)sizeof(one));
+
+ err = sp_384_ecc_is_point_12(pub, NULL);
+ }
+
+ sp_384_point_free_12(pub, 0, NULL);
+
+ return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * privm Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit privd[12];
+ sp_point_384 pubd;
+ sp_point_384 pd;
+#endif
+ sp_digit* priv = NULL;
+ sp_point_384* pub;
+ sp_point_384* p = NULL;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_384_point_new_12(heap, pubd, pub);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (priv == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ priv = privd;
+#endif
+
+ sp_384_from_mp(pub->x, 12, pX);
+ sp_384_from_mp(pub->y, 12, pY);
+ sp_384_from_bin(pub->z, 12, one, (int)sizeof(one));
+ sp_384_from_mp(priv, 12, privm);
+
+ /* Check point at infinitiy. */
+ if ((sp_384_iszero_12(pub->x) != 0) &&
+ (sp_384_iszero_12(pub->y) != 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check range of X and Y */
+ if (sp_384_cmp_12(pub->x, p384_mod) >= 0 ||
+ sp_384_cmp_12(pub->y, p384_mod) >= 0) {
+ err = ECC_OUT_OF_RANGE_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check point is on curve */
+ err = sp_384_ecc_is_point_12(pub, heap);
+ }
+
+ if (err == MP_OKAY) {
+ /* Point * order = infinity */
+ err = sp_384_ecc_mulmod_12(p, pub, p384_order, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is infinity */
+ if ((sp_384_iszero_12(p->x) == 0) ||
+ (sp_384_iszero_12(p->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Base * private = point */
+ err = sp_384_ecc_mulmod_base_12(p, priv, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is public key */
+ if (sp_384_cmp_12(p->x, pub->x) != 0 ||
+ sp_384_cmp_12(p->y, pub->y) != 0) {
+ err = ECC_PRIV_KEY_E;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (priv != NULL) {
+ XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(p, 0, heap);
+ sp_384_point_free_12(pub, 0, heap);
+
+ return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX First EC point's X ordinate.
+ * pY First EC point's Y ordinate.
+ * pZ First EC point's Z ordinate.
+ * qX Second EC point's X ordinate.
+ * qY Second EC point's Y ordinate.
+ * qZ Second EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* qX, mp_int* qY, mp_int* qZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 12 * 5];
+ sp_point_384 pd;
+ sp_point_384 qd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ sp_point_384* q = NULL;
+ int err;
+
+ err = sp_384_point_new_12(NULL, pd, p);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(NULL, qd, q);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 5, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 12, pX);
+ sp_384_from_mp(p->y, 12, pY);
+ sp_384_from_mp(p->z, 12, pZ);
+ sp_384_from_mp(q->x, 12, qX);
+ sp_384_from_mp(q->y, 12, qY);
+ sp_384_from_mp(q->z, 12, qZ);
+
+ sp_384_proj_point_add_12(p, p, q, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(q, 0, NULL);
+ sp_384_point_free_12(p, 0, NULL);
+
+ return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 12 * 2];
+ sp_point_384 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ int err;
+
+ err = sp_384_point_new_12(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 2, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 12, pX);
+ sp_384_from_mp(p->y, 12, pY);
+ sp_384_from_mp(p->z, 12, pZ);
+
+ sp_384_proj_point_dbl_12(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(p, 0, NULL);
+
+ return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 12 * 6];
+ sp_point_384 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ int err;
+
+ err = sp_384_point_new_12(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 12, pX);
+ sp_384_from_mp(p->y, 12, pY);
+ sp_384_from_mp(p->z, 12, pZ);
+
+ sp_384_map_12(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, pX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, pY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, pZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(p, 0, NULL);
+
+ return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mont_sqrt_12(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit t1d[2 * 12];
+ sp_digit t2d[2 * 12];
+ sp_digit t3d[2 * 12];
+ sp_digit t4d[2 * 12];
+ sp_digit t5d[2 * 12];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ sp_digit* t3;
+ sp_digit* t4;
+ sp_digit* t5;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 12, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 12;
+ t2 = d + 2 * 12;
+ t3 = d + 4 * 12;
+ t4 = d + 6 * 12;
+ t5 = d + 8 * 12;
+#else
+ t1 = t1d;
+ t2 = t2d;
+ t3 = t3d;
+ t4 = t4d;
+ t5 = t5d;
+#endif
+
+ {
+ /* t2 = y ^ 0x2 */
+ sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3 */
+ sp_384_mont_mul_12(t1, t2, y, p384_mod, p384_mp_mod);
+ /* t5 = y ^ 0xc */
+ sp_384_mont_sqr_n_12(t5, t1, 2, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xf */
+ sp_384_mont_mul_12(t1, t1, t5, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x1e */
+ sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
+ /* t3 = y ^ 0x1f */
+ sp_384_mont_mul_12(t3, t2, y, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3e0 */
+ sp_384_mont_sqr_n_12(t2, t3, 5, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3ff */
+ sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x7fe0 */
+ sp_384_mont_sqr_n_12(t2, t1, 5, p384_mod, p384_mp_mod);
+ /* t3 = y ^ 0x7fff */
+ sp_384_mont_mul_12(t3, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fff800 */
+ sp_384_mont_sqr_n_12(t2, t3, 15, p384_mod, p384_mp_mod);
+ /* t4 = y ^ 0x3ffffff */
+ sp_384_mont_mul_12(t4, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xffffffc000000 */
+ sp_384_mont_sqr_n_12(t2, t4, 30, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xfffffffffffff */
+ sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xfffffffffffffff000000000000000 */
+ sp_384_mont_sqr_n_12(t2, t1, 60, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+ sp_384_mont_sqr_n_12(t2, t1, 120, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+ sp_384_mont_sqr_n_12(t2, t1, 15, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */
+ sp_384_mont_sqr_n_12(t2, t1, 31, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */
+ sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */
+ sp_384_mont_sqr_n_12(t2, t1, 4, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */
+ sp_384_mont_mul_12(t1, t5, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */
+ sp_384_mont_sqr_n_12(t2, t1, 62, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */
+ sp_384_mont_mul_12(t1, y, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */
+ sp_384_mont_sqr_n_12(y, t1, 30, p384_mod, p384_mp_mod);
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm X ordinate.
+ * odd Whether the Y ordinate is odd.
+ * ym Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit xd[2 * 12];
+ sp_digit yd[2 * 12];
+#endif
+ sp_digit* x = NULL;
+ sp_digit* y = NULL;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 12, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ x = d + 0 * 12;
+ y = d + 2 * 12;
+#else
+ x = xd;
+ y = yd;
+#endif
+
+ sp_384_from_mp(x, 12, xm);
+ err = sp_384_mod_mul_norm_12(x, x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ /* y = x^3 */
+ {
+ sp_384_mont_sqr_12(y, x, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(y, y, x, p384_mod, p384_mp_mod);
+ }
+ /* y = x^3 - 3x */
+ sp_384_mont_sub_12(y, y, x, p384_mod);
+ sp_384_mont_sub_12(y, y, x, p384_mod);
+ sp_384_mont_sub_12(y, y, x, p384_mod);
+ /* y = x^3 - 3x + b */
+ err = sp_384_mod_mul_norm_12(x, p384_b, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ sp_384_mont_add_12(y, y, x, p384_mod);
+ /* y = sqrt(x^3 - 3x + b) */
+ err = sp_384_mont_sqrt_12(y);
+ }
+ if (err == MP_OKAY) {
+ XMEMSET(y + 12, 0, 12U * sizeof(sp_digit));
+ sp_384_mont_reduce_12(y, p384_mod, p384_mp_mod);
+ if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+ sp_384_mont_sub_12(y, p384_mod, y, p384_mod);
+ }
+
+ err = sp_384_to_mp(y, ym);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+#endif
+#endif /* WOLFSSL_SP_384 */
+#endif /* WOLFSSL_HAVE_SP_ECC */
+#endif /* WOLFSSL_SP_ARM32_ASM */
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_arm64.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_arm64.c
new file mode 100644
index 000000000..ebebe2a55
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_arm64.c
@@ -0,0 +1,42082 @@
+/* sp.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Implementation by Sean Parkinson. */
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/cpuid.h>
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \
+ defined(WOLFSSL_HAVE_SP_ECC)
+
+#ifdef RSA_LOW_MEM
+#ifndef WOLFSSL_SP_SMALL
+#define WOLFSSL_SP_SMALL
+#endif
+#endif
+
+#include <wolfssl/wolfcrypt/sp.h>
+
+#ifdef WOLFSSL_SP_ARM64_ASM
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+#ifndef WOLFSSL_SP_NO_2048
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j;
+ byte* d;
+
+ for (i = n - 1,j = 0; i >= 7; i -= 8) {
+ r[j] = ((sp_digit)a[i - 0] << 0) |
+ ((sp_digit)a[i - 1] << 8) |
+ ((sp_digit)a[i - 2] << 16) |
+ ((sp_digit)a[i - 3] << 24) |
+ ((sp_digit)a[i - 4] << 32) |
+ ((sp_digit)a[i - 5] << 40) |
+ ((sp_digit)a[i - 6] << 48) |
+ ((sp_digit)a[i - 7] << 56);
+ j++;
+ }
+
+ if (i >= 0) {
+ r[j] = 0;
+
+ d = (byte*)r;
+ switch (i) {
+ case 6: d[n - 1 - 6] = a[6]; //fallthrough
+ case 5: d[n - 1 - 5] = a[5]; //fallthrough
+ case 4: d[n - 1 - 4] = a[4]; //fallthrough
+ case 3: d[n - 1 - 3] = a[3]; //fallthrough
+ case 2: d[n - 1 - 2] = a[2]; //fallthrough
+ case 1: d[n - 1 - 1] = a[1]; //fallthrough
+ case 0: d[n - 1 - 0] = a[0]; //fallthrough
+ }
+ j++;
+ }
+
+ for (; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 64
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 64
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffffffffffffl;
+ s = 64U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 64U) <= (word32)DIGIT_BIT) {
+ s += 64U;
+ r[j] &= 0xffffffffffffffffl;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 64) {
+ r[j] &= 0xffffffffffffffffl;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 64 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 256
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_2048_to_bin(sp_digit* r, byte* a)
+{
+ int i, j;
+
+ for (i = 31, j = 0; i >= 0; i--) {
+ a[j++] = r[i] >> 56;
+ a[j++] = r[i] >> 48;
+ a[j++] = r[i] >> 40;
+ a[j++] = r[i] >> 32;
+ a[j++] = r[i] >> 24;
+ a[j++] = r[i] >> 16;
+ a[j++] = r[i] >> 8;
+ a[j++] = r[i] >> 0;
+ }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_digit tmp[8];
+
+ __asm__ __volatile__ (
+ "ldp x9, x10, [%[a], 0]\n\t"
+ "ldp x11, x12, [%[a], 16]\n\t"
+ "ldp x13, x14, [%[a], 32]\n\t"
+ "ldp x15, x16, [%[a], 48]\n\t"
+ "ldp x17, x19, [%[b], 0]\n\t"
+ "ldp x20, x21, [%[b], 16]\n\t"
+ "ldp x22, x23, [%[b], 32]\n\t"
+ "ldp x24, x25, [%[b], 48]\n\t"
+ "# A[0] * B[0]\n\t"
+ "mul x4, x9, x17\n\t"
+ "umulh x5, x9, x17\n\t"
+ "str x4, [%[tmp]]\n\t"
+ "# A[0] * B[1]\n\t"
+ "mul x7, x9, x19\n\t"
+ "umulh x8, x9, x19\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[1] * B[0]\n\t"
+ "mul x7, x10, x17\n\t"
+ "adc x6, xzr, x8\n\t"
+ "umulh x8, x10, x17\n\t"
+ "adds x5, x5, x7\n\t"
+ "adcs x6, x6, x8\n\t"
+ "str x5, [%[tmp], 8]\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "# A[0] * B[2]\n\t"
+ "mul x7, x9, x20\n\t"
+ "umulh x8, x9, x20\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[1] * B[1]\n\t"
+ "mul x7, x10, x19\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x10, x19\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[2] * B[0]\n\t"
+ "mul x7, x11, x17\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x11, x17\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "adcs x4, x4, x8\n\t"
+ "str x6, [%[tmp], 16]\n\t"
+ "adc x5, x5, xzr\n\t"
+ "# A[0] * B[3]\n\t"
+ "mul x7, x9, x21\n\t"
+ "umulh x8, x9, x21\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[1] * B[2]\n\t"
+ "mul x7, x10, x20\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x10, x20\n\t"
+ "adc x6, xzr, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[2] * B[1]\n\t"
+ "mul x7, x11, x19\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x11, x19\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[3] * B[0]\n\t"
+ "mul x7, x12, x17\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x12, x17\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "adcs x5, x5, x8\n\t"
+ "str x4, [%[tmp], 24]\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# A[0] * B[4]\n\t"
+ "mul x7, x9, x22\n\t"
+ "umulh x8, x9, x22\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[1] * B[3]\n\t"
+ "mul x7, x10, x21\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x10, x21\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[2] * B[2]\n\t"
+ "mul x7, x11, x20\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x11, x20\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[3] * B[1]\n\t"
+ "mul x7, x12, x19\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x12, x19\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[4] * B[0]\n\t"
+ "mul x7, x13, x17\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x13, x17\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "adcs x6, x6, x8\n\t"
+ "str x5, [%[tmp], 32]\n\t"
+ "adc x4, x4, xzr\n\t"
+ "# A[0] * B[5]\n\t"
+ "mul x7, x9, x23\n\t"
+ "umulh x8, x9, x23\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[1] * B[4]\n\t"
+ "mul x7, x10, x22\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x10, x22\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[2] * B[3]\n\t"
+ "mul x7, x11, x21\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x11, x21\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[3] * B[2]\n\t"
+ "mul x7, x12, x20\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x12, x20\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[4] * B[1]\n\t"
+ "mul x7, x13, x19\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x13, x19\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[5] * B[0]\n\t"
+ "mul x7, x14, x17\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x14, x17\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "adcs x4, x4, x8\n\t"
+ "str x6, [%[tmp], 40]\n\t"
+ "adc x5, x5, xzr\n\t"
+ "# A[0] * B[6]\n\t"
+ "mul x7, x9, x24\n\t"
+ "umulh x8, x9, x24\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[1] * B[5]\n\t"
+ "mul x7, x10, x23\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x10, x23\n\t"
+ "adc x6, xzr, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[2] * B[4]\n\t"
+ "mul x7, x11, x22\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x11, x22\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[3] * B[3]\n\t"
+ "mul x7, x12, x21\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x12, x21\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[4] * B[2]\n\t"
+ "mul x7, x13, x20\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x13, x20\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[5] * B[1]\n\t"
+ "mul x7, x14, x19\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x14, x19\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[6] * B[0]\n\t"
+ "mul x7, x15, x17\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x15, x17\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "adcs x5, x5, x8\n\t"
+ "str x4, [%[tmp], 48]\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# A[0] * B[7]\n\t"
+ "mul x7, x9, x25\n\t"
+ "umulh x8, x9, x25\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[1] * B[6]\n\t"
+ "mul x7, x10, x24\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x10, x24\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[2] * B[5]\n\t"
+ "mul x7, x11, x23\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x11, x23\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[3] * B[4]\n\t"
+ "mul x7, x12, x22\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x12, x22\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[4] * B[3]\n\t"
+ "mul x7, x13, x21\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x13, x21\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[5] * B[2]\n\t"
+ "mul x7, x14, x20\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x14, x20\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[6] * B[1]\n\t"
+ "mul x7, x15, x19\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x15, x19\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[7] * B[0]\n\t"
+ "mul x7, x16, x17\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x16, x17\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "adcs x6, x6, x8\n\t"
+ "str x5, [%[tmp], 56]\n\t"
+ "adc x4, x4, xzr\n\t"
+ "# A[1] * B[7]\n\t"
+ "mul x7, x10, x25\n\t"
+ "umulh x8, x10, x25\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[2] * B[6]\n\t"
+ "mul x7, x11, x24\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x11, x24\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[3] * B[5]\n\t"
+ "mul x7, x12, x23\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x12, x23\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[4] * B[4]\n\t"
+ "mul x7, x13, x22\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x13, x22\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[5] * B[3]\n\t"
+ "mul x7, x14, x21\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x14, x21\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[6] * B[2]\n\t"
+ "mul x7, x15, x20\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x15, x20\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[7] * B[1]\n\t"
+ "mul x7, x16, x19\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x16, x19\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "adcs x4, x4, x8\n\t"
+ "str x6, [%[r], 64]\n\t"
+ "adc x5, x5, xzr\n\t"
+ "# A[2] * B[7]\n\t"
+ "mul x7, x11, x25\n\t"
+ "umulh x8, x11, x25\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[3] * B[6]\n\t"
+ "mul x7, x12, x24\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x12, x24\n\t"
+ "adc x6, xzr, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[4] * B[5]\n\t"
+ "mul x7, x13, x23\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x13, x23\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[5] * B[4]\n\t"
+ "mul x7, x14, x22\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x14, x22\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[6] * B[3]\n\t"
+ "mul x7, x15, x21\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x15, x21\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[7] * B[2]\n\t"
+ "mul x7, x16, x20\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x16, x20\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "adcs x5, x5, x8\n\t"
+ "str x4, [%[r], 72]\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# A[3] * B[7]\n\t"
+ "mul x7, x12, x25\n\t"
+ "umulh x8, x12, x25\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[4] * B[6]\n\t"
+ "mul x7, x13, x24\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x13, x24\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[5] * B[5]\n\t"
+ "mul x7, x14, x23\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x14, x23\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[6] * B[4]\n\t"
+ "mul x7, x15, x22\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x15, x22\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[7] * B[3]\n\t"
+ "mul x7, x16, x21\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x16, x21\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "adcs x6, x6, x8\n\t"
+ "str x5, [%[r], 80]\n\t"
+ "adc x4, x4, xzr\n\t"
+ "# A[4] * B[7]\n\t"
+ "mul x7, x13, x25\n\t"
+ "umulh x8, x13, x25\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[5] * B[6]\n\t"
+ "mul x7, x14, x24\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x14, x24\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[6] * B[5]\n\t"
+ "mul x7, x15, x23\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x15, x23\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[7] * B[4]\n\t"
+ "mul x7, x16, x22\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x16, x22\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "adcs x4, x4, x8\n\t"
+ "str x6, [%[r], 88]\n\t"
+ "adc x5, x5, xzr\n\t"
+ "# A[5] * B[7]\n\t"
+ "mul x7, x14, x25\n\t"
+ "umulh x8, x14, x25\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[6] * B[6]\n\t"
+ "mul x7, x15, x24\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x15, x24\n\t"
+ "adc x6, xzr, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[7] * B[5]\n\t"
+ "mul x7, x16, x23\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x16, x23\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "adcs x5, x5, x8\n\t"
+ "str x4, [%[r], 96]\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# A[6] * B[7]\n\t"
+ "mul x7, x15, x25\n\t"
+ "umulh x8, x15, x25\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[7] * B[6]\n\t"
+ "mul x7, x16, x24\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x16, x24\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "adcs x6, x6, x8\n\t"
+ "str x5, [%[r], 104]\n\t"
+ "adc x4, x4, xzr\n\t"
+ "# A[7] * B[7]\n\t"
+ "mul x7, x16, x25\n\t"
+ "umulh x8, x16, x25\n\t"
+ "adds x6, x6, x7\n\t"
+ "adc x4, x4, x8\n\t"
+ "stp x6, x4, [%[r], 112]\n\t"
+ "ldp x9, x10, [%[tmp], 0]\n\t"
+ "ldp x11, x12, [%[tmp], 16]\n\t"
+ "ldp x13, x14, [%[tmp], 32]\n\t"
+ "ldp x15, x16, [%[tmp], 48]\n\t"
+ "stp x9, x10, [%[r], 0]\n\t"
+ "stp x11, x12, [%[r], 16]\n\t"
+ "stp x13, x14, [%[r], 32]\n\t"
+ "stp x15, x16, [%[r], 48]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
+ : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25"
+ );
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "ldp x22, x23, [%[a], 0]\n\t"
+ "ldp x24, x25, [%[a], 16]\n\t"
+ "ldp x26, x27, [%[a], 32]\n\t"
+ "ldp x28, x29, [%[a], 48]\n\t"
+ "# A[0] * A[1]\n\t"
+ "mul x3, x22, x23\n\t"
+ "umulh x7, x22, x23\n\t"
+ "# A[0] * A[2]\n\t"
+ "mul x4, x22, x24\n\t"
+ "umulh x5, x22, x24\n\t"
+ "adds x7, x7, x4\n\t"
+ "# A[0] * A[3]\n\t"
+ "mul x4, x22, x25\n\t"
+ "adc x8, xzr, x5\n\t"
+ "umulh x5, x22, x25\n\t"
+ "adds x8, x8, x4\n\t"
+ "# A[1] * A[2]\n\t"
+ "mul x4, x23, x24\n\t"
+ "adc x9, xzr, x5\n\t"
+ "umulh x5, x23, x24\n\t"
+ "adds x8, x8, x4\n\t"
+ "# A[0] * A[4]\n\t"
+ "mul x4, x22, x26\n\t"
+ "adcs x9, x9, x5\n\t"
+ "umulh x5, x22, x26\n\t"
+ "adc x10, xzr, xzr\n\t"
+ "adds x9, x9, x4\n\t"
+ "# A[1] * A[3]\n\t"
+ "mul x4, x23, x25\n\t"
+ "adc x10, x10, x5\n\t"
+ "umulh x5, x23, x25\n\t"
+ "adds x9, x9, x4\n\t"
+ "# A[0] * A[5]\n\t"
+ "mul x4, x22, x27\n\t"
+ "adcs x10, x10, x5\n\t"
+ "umulh x5, x22, x27\n\t"
+ "adc x11, xzr, xzr\n\t"
+ "adds x10, x10, x4\n\t"
+ "# A[1] * A[4]\n\t"
+ "mul x4, x23, x26\n\t"
+ "adc x11, x11, x5\n\t"
+ "umulh x5, x23, x26\n\t"
+ "adds x10, x10, x4\n\t"
+ "# A[2] * A[3]\n\t"
+ "mul x4, x24, x25\n\t"
+ "adcs x11, x11, x5\n\t"
+ "umulh x5, x24, x25\n\t"
+ "adc x12, xzr, xzr\n\t"
+ "adds x10, x10, x4\n\t"
+ "# A[0] * A[6]\n\t"
+ "mul x4, x22, x28\n\t"
+ "adcs x11, x11, x5\n\t"
+ "umulh x5, x22, x28\n\t"
+ "adc x12, x12, xzr\n\t"
+ "adds x11, x11, x4\n\t"
+ "# A[1] * A[5]\n\t"
+ "mul x4, x23, x27\n\t"
+ "adcs x12, x12, x5\n\t"
+ "umulh x5, x23, x27\n\t"
+ "adc x13, xzr, xzr\n\t"
+ "adds x11, x11, x4\n\t"
+ "# A[2] * A[4]\n\t"
+ "mul x4, x24, x26\n\t"
+ "adcs x12, x12, x5\n\t"
+ "umulh x5, x24, x26\n\t"
+ "adc x13, x13, xzr\n\t"
+ "adds x11, x11, x4\n\t"
+ "# A[0] * A[7]\n\t"
+ "mul x4, x22, x29\n\t"
+ "adcs x12, x12, x5\n\t"
+ "umulh x5, x22, x29\n\t"
+ "adc x13, x13, xzr\n\t"
+ "adds x12, x12, x4\n\t"
+ "# A[1] * A[6]\n\t"
+ "mul x4, x23, x28\n\t"
+ "adcs x13, x13, x5\n\t"
+ "umulh x5, x23, x28\n\t"
+ "adc x14, xzr, xzr\n\t"
+ "adds x12, x12, x4\n\t"
+ "# A[2] * A[5]\n\t"
+ "mul x4, x24, x27\n\t"
+ "adcs x13, x13, x5\n\t"
+ "umulh x5, x24, x27\n\t"
+ "adc x14, x14, xzr\n\t"
+ "adds x12, x12, x4\n\t"
+ "# A[3] * A[4]\n\t"
+ "mul x4, x25, x26\n\t"
+ "adcs x13, x13, x5\n\t"
+ "umulh x5, x25, x26\n\t"
+ "adc x14, x14, xzr\n\t"
+ "adds x12, x12, x4\n\t"
+ "# A[1] * A[7]\n\t"
+ "mul x4, x23, x29\n\t"
+ "adcs x13, x13, x5\n\t"
+ "umulh x5, x23, x29\n\t"
+ "adc x14, x14, xzr\n\t"
+ "adds x13, x13, x4\n\t"
+ "# A[2] * A[6]\n\t"
+ "mul x4, x24, x28\n\t"
+ "adcs x14, x14, x5\n\t"
+ "umulh x5, x24, x28\n\t"
+ "adc x15, xzr, xzr\n\t"
+ "adds x13, x13, x4\n\t"
+ "# A[3] * A[5]\n\t"
+ "mul x4, x25, x27\n\t"
+ "adcs x14, x14, x5\n\t"
+ "umulh x5, x25, x27\n\t"
+ "adc x15, x15, xzr\n\t"
+ "adds x13, x13, x4\n\t"
+ "# A[2] * A[7]\n\t"
+ "mul x4, x24, x29\n\t"
+ "adcs x14, x14, x5\n\t"
+ "umulh x5, x24, x29\n\t"
+ "adc x15, x15, xzr\n\t"
+ "adds x14, x14, x4\n\t"
+ "# A[3] * A[6]\n\t"
+ "mul x4, x25, x28\n\t"
+ "adcs x15, x15, x5\n\t"
+ "umulh x5, x25, x28\n\t"
+ "adc x16, xzr, xzr\n\t"
+ "adds x14, x14, x4\n\t"
+ "# A[4] * A[5]\n\t"
+ "mul x4, x26, x27\n\t"
+ "adcs x15, x15, x5\n\t"
+ "umulh x5, x26, x27\n\t"
+ "adc x16, x16, xzr\n\t"
+ "adds x14, x14, x4\n\t"
+ "# A[3] * A[7]\n\t"
+ "mul x4, x25, x29\n\t"
+ "adcs x15, x15, x5\n\t"
+ "umulh x5, x25, x29\n\t"
+ "adc x16, x16, xzr\n\t"
+ "adds x15, x15, x4\n\t"
+ "# A[4] * A[6]\n\t"
+ "mul x4, x26, x28\n\t"
+ "adcs x16, x16, x5\n\t"
+ "umulh x5, x26, x28\n\t"
+ "adc x17, xzr, xzr\n\t"
+ "adds x15, x15, x4\n\t"
+ "# A[4] * A[7]\n\t"
+ "mul x4, x26, x29\n\t"
+ "adcs x16, x16, x5\n\t"
+ "umulh x5, x26, x29\n\t"
+ "adc x17, x17, xzr\n\t"
+ "adds x16, x16, x4\n\t"
+ "# A[5] * A[6]\n\t"
+ "mul x4, x27, x28\n\t"
+ "adcs x17, x17, x5\n\t"
+ "umulh x5, x27, x28\n\t"
+ "adc x19, xzr, xzr\n\t"
+ "adds x16, x16, x4\n\t"
+ "# A[5] * A[7]\n\t"
+ "mul x4, x27, x29\n\t"
+ "adcs x17, x17, x5\n\t"
+ "umulh x5, x27, x29\n\t"
+ "adc x19, x19, xzr\n\t"
+ "adds x17, x17, x4\n\t"
+ "# A[6] * A[7]\n\t"
+ "mul x4, x28, x29\n\t"
+ "adcs x19, x19, x5\n\t"
+ "umulh x5, x28, x29\n\t"
+ "adc x20, xzr, xzr\n\t"
+ "adds x19, x19, x4\n\t"
+ "adc x20, x20, x5\n\t"
+ "# Double\n\t"
+ "adds x3, x3, x3\n\t"
+ "adcs x7, x7, x7\n\t"
+ "adcs x8, x8, x8\n\t"
+ "adcs x9, x9, x9\n\t"
+ "adcs x10, x10, x10\n\t"
+ "adcs x11, x11, x11\n\t"
+ "adcs x12, x12, x12\n\t"
+ "adcs x13, x13, x13\n\t"
+ "adcs x14, x14, x14\n\t"
+ "adcs x15, x15, x15\n\t"
+ "adcs x16, x16, x16\n\t"
+ "adcs x17, x17, x17\n\t"
+ "adcs x19, x19, x19\n\t"
+ "# A[0] * A[0]\n\t"
+ "mul x2, x22, x22\n\t"
+ "adcs x20, x20, x20\n\t"
+ "umulh x4, x22, x22\n\t"
+ "cset x21, cs\n\t"
+ "# A[1] * A[1]\n\t"
+ "mul x5, x23, x23\n\t"
+ "adds x3, x3, x4\n\t"
+ "umulh x6, x23, x23\n\t"
+ "adcs x7, x7, x5\n\t"
+ "# A[2] * A[2]\n\t"
+ "mul x4, x24, x24\n\t"
+ "adcs x8, x8, x6\n\t"
+ "umulh x5, x24, x24\n\t"
+ "adcs x9, x9, x4\n\t"
+ "# A[3] * A[3]\n\t"
+ "mul x6, x25, x25\n\t"
+ "adcs x10, x10, x5\n\t"
+ "umulh x4, x25, x25\n\t"
+ "adcs x11, x11, x6\n\t"
+ "# A[4] * A[4]\n\t"
+ "mul x5, x26, x26\n\t"
+ "adcs x12, x12, x4\n\t"
+ "umulh x6, x26, x26\n\t"
+ "adcs x13, x13, x5\n\t"
+ "# A[5] * A[5]\n\t"
+ "mul x4, x27, x27\n\t"
+ "adcs x14, x14, x6\n\t"
+ "umulh x5, x27, x27\n\t"
+ "adcs x15, x15, x4\n\t"
+ "# A[6] * A[6]\n\t"
+ "mul x6, x28, x28\n\t"
+ "adcs x16, x16, x5\n\t"
+ "umulh x4, x28, x28\n\t"
+ "adcs x17, x17, x6\n\t"
+ "# A[7] * A[7]\n\t"
+ "mul x5, x29, x29\n\t"
+ "adcs x19, x19, x4\n\t"
+ "umulh x6, x29, x29\n\t"
+ "adcs x20, x20, x5\n\t"
+ "stp x2, x3, [%[r], 0]\n\t"
+ "adc x21, x21, x6\n\t"
+ "stp x7, x8, [%[r], 16]\n\t"
+ "stp x9, x10, [%[r], 32]\n\t"
+ "stp x11, x12, [%[r], 48]\n\t"
+ "stp x13, x14, [%[r], 64]\n\t"
+ "stp x15, x16, [%[r], 80]\n\t"
+ "stp x17, x19, [%[r], 96]\n\t"
+ "stp x20, x21, [%[r], 112]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "x4", "x5", "x6", "x2", "x3", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29"
+ );
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[b], 0]\n\t"
+ "adds x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 16]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "ldp x7, x8, [%[b], 32]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 48]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ "cset %[r], cs\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+ return (sp_digit)r;
+}
+
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x2, x3, [%[a], 0]\n\t"
+ "ldp x6, x7, [%[b], 0]\n\t"
+ "subs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 16]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 16]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 0]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 16]\n\t"
+ "ldp x2, x3, [%[a], 32]\n\t"
+ "ldp x6, x7, [%[b], 32]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 48]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 48]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 32]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 48]\n\t"
+ "ldp x2, x3, [%[a], 64]\n\t"
+ "ldp x6, x7, [%[b], 64]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 80]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 80]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 64]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 80]\n\t"
+ "ldp x2, x3, [%[a], 96]\n\t"
+ "ldp x6, x7, [%[b], 96]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 112]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 112]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 96]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 112]\n\t"
+ "csetm %[a], cc\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+
+ return (sp_digit)a;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[b], 0]\n\t"
+ "adds x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 16]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "ldp x7, x8, [%[b], 32]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 48]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ "ldp x3, x4, [%[a], 64]\n\t"
+ "ldp x7, x8, [%[b], 64]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 80]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 80]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 64]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 80]\n\t"
+ "ldp x3, x4, [%[a], 96]\n\t"
+ "ldp x7, x8, [%[b], 96]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 112]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 112]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 96]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 112]\n\t"
+ "cset %[r], cs\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+ return (sp_digit)r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<8; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ r[0] = a[0] & m;
+ r[1] = a[1] & m;
+ r[2] = a[2] & m;
+ r[3] = a[3] & m;
+ r[4] = a[4] & m;
+ r[5] = a[5] & m;
+ r[6] = a[6] & m;
+ r[7] = a[7] & m;
+#endif
+}
+
+/* Add digit to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_2048_add_zero_8(sp_digit* r, const sp_digit* a,
+ const sp_digit d)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "adds x3, x3, %[d]\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "adcs x3, x3, xzr\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [d] "r" (d)
+ : "memory", "x3", "x4", "x5", "x6"
+ );
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[16];
+ sp_digit a1[8];
+ sp_digit b1[8];
+ sp_digit z2[16];
+ sp_digit u, ca, cb;
+
+ ca = sp_2048_add_8(a1, a, &a[8]);
+ cb = sp_2048_add_8(b1, b, &b[8]);
+ u = ca & cb;
+ sp_2048_mul_8(z1, a1, b1);
+ sp_2048_mul_8(z2, &a[8], &b[8]);
+ sp_2048_mul_8(z0, a, b);
+ sp_2048_mask_8(r + 16, a1, 0 - cb);
+ sp_2048_mask_8(b1, b1, 0 - ca);
+ u += sp_2048_add_8(r + 16, r + 16, b1);
+ u += sp_2048_sub_in_place_16(z1, z2);
+ u += sp_2048_sub_in_place_16(z1, z0);
+ u += sp_2048_add_16(r + 8, r + 8, z1);
+ u += sp_2048_add_8(r + 16, r + 16, z2);
+ sp_2048_add_zero_8(r + 24, z2 + 8, u);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Double a into r. (r = a + a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static sp_digit sp_2048_dbl_8(sp_digit* r, const sp_digit* a)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add x11, %[a], 64\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldp x3, x4, [%[a]], #16\n\t"
+ "ldp x5, x6, [%[a]], #16\n\t"
+ "adcs x3, x3, x3\n\t"
+ "adcs x4, x4, x4\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r]], #16\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r]], #16\n\t"
+ "cset %[c], cs\n\t"
+ "cmp %[a], x11\n\t"
+ "b.ne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a)
+ :
+ : "memory", "x3", "x4", "x5", "x6", "x11"
+ );
+
+ return c;
+}
+
+#else
+/* Double a into r. (r = a + a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static sp_digit sp_2048_dbl_8(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "adds x3, x3, x3\n\t"
+ "ldr x5, [%[a], 16]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 24]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "adcs x3, x3, x3\n\t"
+ "ldr x5, [%[a], 48]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 56]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ "cset %[r], cs\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a)
+ : "memory", "x3", "x4", "x5", "x6"
+ );
+
+ return (sp_digit)r;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[16];
+ sp_digit z1[16];
+ sp_digit a1[8];
+ sp_digit u;
+
+ u = sp_2048_add_8(a1, a, &a[8]);
+ sp_2048_sqr_8(z1, a1);
+ sp_2048_sqr_8(z2, &a[8]);
+ sp_2048_sqr_8(z0, a);
+ sp_2048_mask_8(r + 16, a1, 0 - u);
+ u += sp_2048_dbl_8(r + 16, r + 16);
+ u += sp_2048_sub_in_place_16(z1, z2);
+ u += sp_2048_sub_in_place_16(z1, z0);
+ u += sp_2048_add_16(r + 8, r + 8, z1);
+ u += sp_2048_add_8(r + 16, r + 16, z2);
+ sp_2048_add_zero_8(r + 24, z2 + 8, u);
+
+}
+
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x2, x3, [%[a], 0]\n\t"
+ "ldp x6, x7, [%[b], 0]\n\t"
+ "subs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 16]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 16]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 0]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 16]\n\t"
+ "ldp x2, x3, [%[a], 32]\n\t"
+ "ldp x6, x7, [%[b], 32]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 48]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 48]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 32]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 48]\n\t"
+ "ldp x2, x3, [%[a], 64]\n\t"
+ "ldp x6, x7, [%[b], 64]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 80]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 80]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 64]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 80]\n\t"
+ "ldp x2, x3, [%[a], 96]\n\t"
+ "ldp x6, x7, [%[b], 96]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 112]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 112]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 96]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 112]\n\t"
+ "ldp x2, x3, [%[a], 128]\n\t"
+ "ldp x6, x7, [%[b], 128]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 144]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 144]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 128]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 144]\n\t"
+ "ldp x2, x3, [%[a], 160]\n\t"
+ "ldp x6, x7, [%[b], 160]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 176]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 176]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 160]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 176]\n\t"
+ "ldp x2, x3, [%[a], 192]\n\t"
+ "ldp x6, x7, [%[b], 192]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 208]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 208]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 192]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 208]\n\t"
+ "ldp x2, x3, [%[a], 224]\n\t"
+ "ldp x6, x7, [%[b], 224]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 240]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 240]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 224]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 240]\n\t"
+ "csetm %[a], cc\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+
+ return (sp_digit)a;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[b], 0]\n\t"
+ "adds x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 16]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "ldp x7, x8, [%[b], 32]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 48]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ "ldp x3, x4, [%[a], 64]\n\t"
+ "ldp x7, x8, [%[b], 64]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 80]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 80]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 64]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 80]\n\t"
+ "ldp x3, x4, [%[a], 96]\n\t"
+ "ldp x7, x8, [%[b], 96]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 112]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 112]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 96]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 112]\n\t"
+ "ldp x3, x4, [%[a], 128]\n\t"
+ "ldp x7, x8, [%[b], 128]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 144]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 144]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 128]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 144]\n\t"
+ "ldp x3, x4, [%[a], 160]\n\t"
+ "ldp x7, x8, [%[b], 160]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 176]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 176]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 160]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 176]\n\t"
+ "ldp x3, x4, [%[a], 192]\n\t"
+ "ldp x7, x8, [%[b], 192]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 208]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 208]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 192]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 208]\n\t"
+ "ldp x3, x4, [%[a], 224]\n\t"
+ "ldp x7, x8, [%[b], 224]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 240]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 240]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 224]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 240]\n\t"
+ "cset %[r], cs\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+ return (sp_digit)r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<16; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 16; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Add digit to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_2048_add_zero_16(sp_digit* r, const sp_digit* a,
+ const sp_digit d)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "adds x3, x3, %[d]\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "adcs x3, x3, xzr\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ "ldp x3, x4, [%[a], 64]\n\t"
+ "ldp x5, x6, [%[a], 80]\n\t"
+ "adcs x3, x3, xzr\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 64]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 80]\n\t"
+ "ldp x3, x4, [%[a], 96]\n\t"
+ "ldp x5, x6, [%[a], 112]\n\t"
+ "adcs x3, x3, xzr\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 96]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 112]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [d] "r" (d)
+ : "memory", "x3", "x4", "x5", "x6"
+ );
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[32];
+ sp_digit a1[16];
+ sp_digit b1[16];
+ sp_digit z2[32];
+ sp_digit u, ca, cb;
+
+ ca = sp_2048_add_16(a1, a, &a[16]);
+ cb = sp_2048_add_16(b1, b, &b[16]);
+ u = ca & cb;
+ sp_2048_mul_16(z1, a1, b1);
+ sp_2048_mul_16(z2, &a[16], &b[16]);
+ sp_2048_mul_16(z0, a, b);
+ sp_2048_mask_16(r + 32, a1, 0 - cb);
+ sp_2048_mask_16(b1, b1, 0 - ca);
+ u += sp_2048_add_16(r + 32, r + 32, b1);
+ u += sp_2048_sub_in_place_32(z1, z2);
+ u += sp_2048_sub_in_place_32(z1, z0);
+ u += sp_2048_add_32(r + 16, r + 16, z1);
+ u += sp_2048_add_16(r + 32, r + 32, z2);
+ sp_2048_add_zero_16(r + 48, z2 + 16, u);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Double a into r. (r = a + a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static sp_digit sp_2048_dbl_16(sp_digit* r, const sp_digit* a)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add x11, %[a], 128\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldp x3, x4, [%[a]], #16\n\t"
+ "ldp x5, x6, [%[a]], #16\n\t"
+ "adcs x3, x3, x3\n\t"
+ "adcs x4, x4, x4\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r]], #16\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r]], #16\n\t"
+ "cset %[c], cs\n\t"
+ "cmp %[a], x11\n\t"
+ "b.ne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a)
+ :
+ : "memory", "x3", "x4", "x5", "x6", "x11"
+ );
+
+ return c;
+}
+
+#else
+/* Double a into r. (r = a + a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static sp_digit sp_2048_dbl_16(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "adds x3, x3, x3\n\t"
+ "ldr x5, [%[a], 16]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 24]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "adcs x3, x3, x3\n\t"
+ "ldr x5, [%[a], 48]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 56]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ "ldp x3, x4, [%[a], 64]\n\t"
+ "adcs x3, x3, x3\n\t"
+ "ldr x5, [%[a], 80]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 88]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 64]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 80]\n\t"
+ "ldp x3, x4, [%[a], 96]\n\t"
+ "adcs x3, x3, x3\n\t"
+ "ldr x5, [%[a], 112]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 120]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 96]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 112]\n\t"
+ "cset %[r], cs\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a)
+ : "memory", "x3", "x4", "x5", "x6"
+ );
+
+ return (sp_digit)r;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[32];
+ sp_digit z1[32];
+ sp_digit a1[16];
+ sp_digit u;
+
+ u = sp_2048_add_16(a1, a, &a[16]);
+ sp_2048_sqr_16(z1, a1);
+ sp_2048_sqr_16(z2, &a[16]);
+ sp_2048_sqr_16(z0, a);
+ sp_2048_mask_16(r + 32, a1, 0 - u);
+ u += sp_2048_dbl_16(r + 32, r + 32);
+ u += sp_2048_sub_in_place_32(z1, z2);
+ u += sp_2048_sub_in_place_32(z1, z0);
+ u += sp_2048_add_32(r + 16, r + 16, z1);
+ u += sp_2048_add_16(r + 32, r + 32, z2);
+ sp_2048_add_zero_16(r + 48, z2 + 16, u);
+
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add x11, %[a], 256\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldp x3, x4, [%[a]], #16\n\t"
+ "ldp x5, x6, [%[a]], #16\n\t"
+ "ldp x7, x8, [%[b]], #16\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x9, x10, [%[b]], #16\n\t"
+ "adcs x4, x4, x8\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r]], #16\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r]], #16\n\t"
+ "cset %[c], cs\n\t"
+ "cmp %[a], x11\n\t"
+ "b.ne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add x10, %[a], 256\n\t"
+ "\n1:\n\t"
+ "subs %[c], xzr, %[c]\n\t"
+ "ldp x2, x3, [%[a]]\n\t"
+ "ldp x4, x5, [%[a], #16]\n\t"
+ "ldp x6, x7, [%[b]], #16\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x8, x9, [%[b]], #16\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a]], #16\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a]], #16\n\t"
+ "csetm %[c], cc\n\t"
+ "cmp %[a], x10\n\t"
+ "b.ne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_digit tmp[64];
+
+ __asm__ __volatile__ (
+ "mov x5, 0\n\t"
+ "mov x6, 0\n\t"
+ "mov x7, 0\n\t"
+ "mov x8, 0\n\t"
+ "\n1:\n\t"
+ "subs x3, x5, 248\n\t"
+ "csel x3, xzr, x3, cc\n\t"
+ "sub x4, x5, x3\n\t"
+ "\n2:\n\t"
+ "ldr x10, [%[a], x3]\n\t"
+ "ldr x11, [%[b], x4]\n\t"
+ "mul x9, x10, x11\n\t"
+ "umulh x10, x10, x11\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "add x3, x3, #8\n\t"
+ "sub x4, x4, #8\n\t"
+ "cmp x3, 256\n\t"
+ "b.eq 3f\n\t"
+ "cmp x3, x5\n\t"
+ "b.le 2b\n\t"
+ "\n3:\n\t"
+ "str x6, [%[r], x5]\n\t"
+ "mov x6, x7\n\t"
+ "mov x7, x8\n\t"
+ "mov x8, #0\n\t"
+ "add x5, x5, #8\n\t"
+ "cmp x5, 496\n\t"
+ "b.le 1b\n\t"
+ "str x6, [%[r], x5]\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
+{
+ sp_digit tmp[64];
+
+ __asm__ __volatile__ (
+ "mov x6, 0\n\t"
+ "mov x7, 0\n\t"
+ "mov x8, 0\n\t"
+ "mov x5, 0\n\t"
+ "\n1:\n\t"
+ "subs x3, x5, 248\n\t"
+ "csel x3, xzr, x3, cc\n\t"
+ "sub x4, x5, x3\n\t"
+ "\n2:\n\t"
+ "cmp x4, x3\n\t"
+ "b.eq 4f\n\t"
+ "ldr x10, [%[a], x3]\n\t"
+ "ldr x11, [%[a], x4]\n\t"
+ "mul x9, x10, x11\n\t"
+ "umulh x10, x10, x11\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "b.al 5f\n\t"
+ "\n4:\n\t"
+ "ldr x10, [%[a], x3]\n\t"
+ "mul x9, x10, x10\n\t"
+ "umulh x10, x10, x10\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "\n5:\n\t"
+ "add x3, x3, #8\n\t"
+ "sub x4, x4, #8\n\t"
+ "cmp x3, 256\n\t"
+ "b.eq 3f\n\t"
+ "cmp x3, x4\n\t"
+ "b.gt 3f\n\t"
+ "cmp x3, x5\n\t"
+ "b.le 2b\n\t"
+ "\n3:\n\t"
+ "str x6, [%[r], x5]\n\t"
+ "mov x6, x7\n\t"
+ "mov x7, x8\n\t"
+ "mov x8, #0\n\t"
+ "add x5, x5, #8\n\t"
+ "cmp x5, 496\n\t"
+ "b.le 1b\n\t"
+ "str x6, [%[r], x5]\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#ifdef WOLFSSL_SP_SMALL
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+ int i;
+
+ for (i=0; i<16; i++) {
+ r[i] = a[i] & m;
+ }
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add x11, %[a], 128\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldp x3, x4, [%[a]], #16\n\t"
+ "ldp x5, x6, [%[a]], #16\n\t"
+ "ldp x7, x8, [%[b]], #16\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x9, x10, [%[b]], #16\n\t"
+ "adcs x4, x4, x8\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r]], #16\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r]], #16\n\t"
+ "cset %[c], cs\n\t"
+ "cmp %[a], x11\n\t"
+ "b.ne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add x10, %[a], 128\n\t"
+ "\n1:\n\t"
+ "subs %[c], xzr, %[c]\n\t"
+ "ldp x2, x3, [%[a]]\n\t"
+ "ldp x4, x5, [%[a], #16]\n\t"
+ "ldp x6, x7, [%[b]], #16\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x8, x9, [%[b]], #16\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a]], #16\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a]], #16\n\t"
+ "csetm %[c], cc\n\t"
+ "cmp %[a], x10\n\t"
+ "b.ne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_2048_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_digit tmp[32];
+
+ __asm__ __volatile__ (
+ "mov x5, 0\n\t"
+ "mov x6, 0\n\t"
+ "mov x7, 0\n\t"
+ "mov x8, 0\n\t"
+ "\n1:\n\t"
+ "subs x3, x5, 120\n\t"
+ "csel x3, xzr, x3, cc\n\t"
+ "sub x4, x5, x3\n\t"
+ "\n2:\n\t"
+ "ldr x10, [%[a], x3]\n\t"
+ "ldr x11, [%[b], x4]\n\t"
+ "mul x9, x10, x11\n\t"
+ "umulh x10, x10, x11\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "add x3, x3, #8\n\t"
+ "sub x4, x4, #8\n\t"
+ "cmp x3, 128\n\t"
+ "b.eq 3f\n\t"
+ "cmp x3, x5\n\t"
+ "b.le 2b\n\t"
+ "\n3:\n\t"
+ "str x6, [%[r], x5]\n\t"
+ "mov x6, x7\n\t"
+ "mov x7, x8\n\t"
+ "mov x8, #0\n\t"
+ "add x5, x5, #8\n\t"
+ "cmp x5, 240\n\t"
+ "b.le 1b\n\t"
+ "str x6, [%[r], x5]\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a)
+{
+ sp_digit tmp[32];
+
+ __asm__ __volatile__ (
+ "mov x6, 0\n\t"
+ "mov x7, 0\n\t"
+ "mov x8, 0\n\t"
+ "mov x5, 0\n\t"
+ "\n1:\n\t"
+ "subs x3, x5, 120\n\t"
+ "csel x3, xzr, x3, cc\n\t"
+ "sub x4, x5, x3\n\t"
+ "\n2:\n\t"
+ "cmp x4, x3\n\t"
+ "b.eq 4f\n\t"
+ "ldr x10, [%[a], x3]\n\t"
+ "ldr x11, [%[a], x4]\n\t"
+ "mul x9, x10, x11\n\t"
+ "umulh x10, x10, x11\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "b.al 5f\n\t"
+ "\n4:\n\t"
+ "ldr x10, [%[a], x3]\n\t"
+ "mul x9, x10, x10\n\t"
+ "umulh x10, x10, x10\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "\n5:\n\t"
+ "add x3, x3, #8\n\t"
+ "sub x4, x4, #8\n\t"
+ "cmp x3, 128\n\t"
+ "b.eq 3f\n\t"
+ "cmp x3, x4\n\t"
+ "b.gt 3f\n\t"
+ "cmp x3, x5\n\t"
+ "b.le 2b\n\t"
+ "\n3:\n\t"
+ "str x6, [%[r], x5]\n\t"
+ "mov x6, x7\n\t"
+ "mov x7, x8\n\t"
+ "mov x8, #0\n\t"
+ "add x5, x5, #8\n\t"
+ "cmp x5, 240\n\t"
+ "b.le 1b\n\t"
+ "str x6, [%[r], x5]\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**64 */
+
+ /* rho = -1/m mod b */
+ *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "# A[0] * B\n\t"
+ "ldr x8, [%[a]]\n\t"
+ "mul x5, %[b], x8\n\t"
+ "umulh x3, %[b], x8\n\t"
+ "mov x4, 0\n\t"
+ "str x5, [%[r]]\n\t"
+ "mov x5, 0\n\t"
+ "mov x9, #8\n\t"
+ "1:\n\t"
+ "ldr x8, [%[a], x9]\n\t"
+ "mul x6, %[b], x8\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adds x3, x3, x6\n\t"
+ "adcs x4, x4, x7\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "str x3, [%[r], x9]\n\t"
+ "mov x3, x4\n\t"
+ "mov x4, x5\n\t"
+ "mov x5, #0\n\t"
+ "add x9, x9, #8\n\t"
+ "cmp x9, 256\n\t"
+ "b.lt 1b\n\t"
+ "str x3, [%[r], 256]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+#else
+ __asm__ __volatile__ (
+ "# A[0] * B\n\t"
+ "ldp x8, x9, [%[a]]\n\t"
+ "mul x3, %[b], x8\n\t"
+ "umulh x4, %[b], x8\n\t"
+ "mov x5, 0\n\t"
+ "# A[1] * B\n\t"
+ "str x3, [%[r]]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[2] * B\n\t"
+ "ldp x8, x9, [%[a], 16]\n\t"
+ "str x4, [%[r], 8]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[3] * B\n\t"
+ "str x5, [%[r], 16]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[4] * B\n\t"
+ "ldp x8, x9, [%[a], 32]\n\t"
+ "str x3, [%[r], 24]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[5] * B\n\t"
+ "str x4, [%[r], 32]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[6] * B\n\t"
+ "ldp x8, x9, [%[a], 48]\n\t"
+ "str x5, [%[r], 40]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[7] * B\n\t"
+ "str x3, [%[r], 48]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[8] * B\n\t"
+ "ldp x8, x9, [%[a], 64]\n\t"
+ "str x4, [%[r], 56]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[9] * B\n\t"
+ "str x5, [%[r], 64]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[10] * B\n\t"
+ "ldp x8, x9, [%[a], 80]\n\t"
+ "str x3, [%[r], 72]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[11] * B\n\t"
+ "str x4, [%[r], 80]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[12] * B\n\t"
+ "ldp x8, x9, [%[a], 96]\n\t"
+ "str x5, [%[r], 88]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[13] * B\n\t"
+ "str x3, [%[r], 96]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[14] * B\n\t"
+ "ldp x8, x9, [%[a], 112]\n\t"
+ "str x4, [%[r], 104]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[15] * B\n\t"
+ "str x5, [%[r], 112]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[16] * B\n\t"
+ "ldp x8, x9, [%[a], 128]\n\t"
+ "str x3, [%[r], 120]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[17] * B\n\t"
+ "str x4, [%[r], 128]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[18] * B\n\t"
+ "ldp x8, x9, [%[a], 144]\n\t"
+ "str x5, [%[r], 136]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[19] * B\n\t"
+ "str x3, [%[r], 144]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[20] * B\n\t"
+ "ldp x8, x9, [%[a], 160]\n\t"
+ "str x4, [%[r], 152]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[21] * B\n\t"
+ "str x5, [%[r], 160]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[22] * B\n\t"
+ "ldp x8, x9, [%[a], 176]\n\t"
+ "str x3, [%[r], 168]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[23] * B\n\t"
+ "str x4, [%[r], 176]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[24] * B\n\t"
+ "ldp x8, x9, [%[a], 192]\n\t"
+ "str x5, [%[r], 184]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[25] * B\n\t"
+ "str x3, [%[r], 192]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[26] * B\n\t"
+ "ldp x8, x9, [%[a], 208]\n\t"
+ "str x4, [%[r], 200]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[27] * B\n\t"
+ "str x5, [%[r], 208]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[28] * B\n\t"
+ "ldp x8, x9, [%[a], 224]\n\t"
+ "str x3, [%[r], 216]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[29] * B\n\t"
+ "str x4, [%[r], 224]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[30] * B\n\t"
+ "ldp x8, x9, [%[a], 240]\n\t"
+ "str x5, [%[r], 232]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[31] * B\n\t"
+ "str x3, [%[r], 240]\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "adc x5, x5, x7\n\t"
+ "stp x4, x5, [%[r], 248]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+#endif
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 2048 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_2048_mont_norm_16(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 16);
+
+ /* r = 2^n mod m */
+ sp_2048_sub_in_place_16(r, m);
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static sp_digit sp_2048_cond_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov x8, #0\n\t"
+ "1:\n\t"
+ "subs %[c], xzr, %[c]\n\t"
+ "ldr x4, [%[a], x8]\n\t"
+ "ldr x5, [%[b], x8]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "csetm %[c], cc\n\t"
+ "str x4, [%[r], x8]\n\t"
+ "add x8, x8, #8\n\t"
+ "cmp x8, 128\n\t"
+ "b.lt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+
+ return c;
+#else
+ __asm__ __volatile__ (
+
+ "ldp x5, x7, [%[b], 0]\n\t"
+ "ldp x11, x12, [%[b], 16]\n\t"
+ "ldp x4, x6, [%[a], 0]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 16]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "subs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 0]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 16]\n\t"
+ "ldp x5, x7, [%[b], 32]\n\t"
+ "ldp x11, x12, [%[b], 48]\n\t"
+ "ldp x4, x6, [%[a], 32]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 48]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 32]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 48]\n\t"
+ "ldp x5, x7, [%[b], 64]\n\t"
+ "ldp x11, x12, [%[b], 80]\n\t"
+ "ldp x4, x6, [%[a], 64]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 80]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 64]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 80]\n\t"
+ "ldp x5, x7, [%[b], 96]\n\t"
+ "ldp x11, x12, [%[b], 112]\n\t"
+ "ldp x4, x6, [%[a], 96]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 112]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 96]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 112]\n\t"
+ "csetm %[r], cc\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+
+ return (sp_digit)r;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_2048_mont_reduce_16(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "ldp x14, x15, [%[m], 0]\n\t"
+ "ldp x16, x17, [%[m], 16]\n\t"
+ "ldp x19, x20, [%[m], 32]\n\t"
+ "ldp x21, x22, [%[m], 48]\n\t"
+ "ldp x23, x24, [%[m], 64]\n\t"
+ "ldp x25, x26, [%[m], 80]\n\t"
+ "ldp x27, x28, [%[m], 96]\n\t"
+ "# i = 16\n\t"
+ "mov x4, 16\n\t"
+ "ldp x12, x13, [%[a], 0]\n\t"
+ "\n1:\n\t"
+ "# mu = a[i] * mp\n\t"
+ "mul x9, %[mp], x12\n\t"
+ "# a[i+0] += m[0] * mu\n\t"
+ "mul x7, x14, x9\n\t"
+ "umulh x8, x14, x9\n\t"
+ "adds x12, x12, x7\n\t"
+ "# a[i+1] += m[1] * mu\n\t"
+ "mul x7, x15, x9\n\t"
+ "adc x6, x8, xzr\n\t"
+ "umulh x8, x15, x9\n\t"
+ "adds x12, x13, x7\n\t"
+ "# a[i+2] += m[2] * mu\n\t"
+ "ldr x13, [%[a], 16]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "mul x7, x16, x9\n\t"
+ "adds x12, x12, x6\n\t"
+ "umulh x8, x16, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x13, x13, x7\n\t"
+ "# a[i+3] += m[3] * mu\n\t"
+ "ldr x10, [%[a], 24]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "mul x7, x17, x9\n\t"
+ "adds x13, x13, x5\n\t"
+ "umulh x8, x17, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+4] += m[4] * mu\n\t"
+ "ldr x11, [%[a], 32]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x19, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x19, x9\n\t"
+ "str x10, [%[a], 24]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+5] += m[5] * mu\n\t"
+ "ldr x10, [%[a], 40]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x20, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x20, x9\n\t"
+ "str x11, [%[a], 32]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+6] += m[6] * mu\n\t"
+ "ldr x11, [%[a], 48]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x21, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x21, x9\n\t"
+ "str x10, [%[a], 40]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+7] += m[7] * mu\n\t"
+ "ldr x10, [%[a], 56]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x22, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x22, x9\n\t"
+ "str x11, [%[a], 48]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+8] += m[8] * mu\n\t"
+ "ldr x11, [%[a], 64]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x23, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x23, x9\n\t"
+ "str x10, [%[a], 56]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+9] += m[9] * mu\n\t"
+ "ldr x10, [%[a], 72]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x24, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x24, x9\n\t"
+ "str x11, [%[a], 64]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+10] += m[10] * mu\n\t"
+ "ldr x11, [%[a], 80]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x25, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x25, x9\n\t"
+ "str x10, [%[a], 72]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+11] += m[11] * mu\n\t"
+ "ldr x10, [%[a], 88]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x26, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x26, x9\n\t"
+ "str x11, [%[a], 80]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+12] += m[12] * mu\n\t"
+ "ldr x11, [%[a], 96]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x27, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x27, x9\n\t"
+ "str x10, [%[a], 88]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+13] += m[13] * mu\n\t"
+ "ldr x10, [%[a], 104]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x28, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x28, x9\n\t"
+ "str x11, [%[a], 96]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+14] += m[14] * mu\n\t"
+ "ldr x11, [%[a], 112]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 112]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 104]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+15] += m[15] * mu\n\t"
+ "ldr x10, [%[a], 120]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 120]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "adcs x8, x8, %[ca]\n\t"
+ "str x11, [%[a], 112]\n\t"
+ "cset %[ca], cs\n\t"
+ "adds x10, x10, x6\n\t"
+ "ldr x11, [%[a], 128]\n\t"
+ "str x10, [%[a], 120]\n\t"
+ "adcs x11, x11, x8\n\t"
+ "str x11, [%[a], 128]\n\t"
+ "adc %[ca], %[ca], xzr\n\t"
+ "subs x4, x4, 1\n\t"
+ "add %[a], %[a], 8\n\t"
+ "bne 1b\n\t"
+ "stp x12, x13, [%[a], 0]\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28"
+ );
+
+ sp_2048_cond_sub_16(a - 16, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_2048_mul_16(r, a, b);
+ sp_2048_mont_reduce_16(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_sqr_16(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_2048_sqr_16(r, a);
+ sp_2048_mont_reduce_16(r, m, mp);
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+static void sp_2048_mul_d_16(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "# A[0] * B\n\t"
+ "ldr x8, [%[a]]\n\t"
+ "mul x5, %[b], x8\n\t"
+ "umulh x3, %[b], x8\n\t"
+ "mov x4, 0\n\t"
+ "str x5, [%[r]]\n\t"
+ "mov x5, 0\n\t"
+ "mov x9, #8\n\t"
+ "1:\n\t"
+ "ldr x8, [%[a], x9]\n\t"
+ "mul x6, %[b], x8\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adds x3, x3, x6\n\t"
+ "adcs x4, x4, x7\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "str x3, [%[r], x9]\n\t"
+ "mov x3, x4\n\t"
+ "mov x4, x5\n\t"
+ "mov x5, #0\n\t"
+ "add x9, x9, #8\n\t"
+ "cmp x9, 128\n\t"
+ "b.lt 1b\n\t"
+ "str x3, [%[r], 128]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+#else
+ __asm__ __volatile__ (
+ "# A[0] * B\n\t"
+ "ldp x8, x9, [%[a]]\n\t"
+ "mul x3, %[b], x8\n\t"
+ "umulh x4, %[b], x8\n\t"
+ "mov x5, 0\n\t"
+ "# A[1] * B\n\t"
+ "str x3, [%[r]]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[2] * B\n\t"
+ "ldp x8, x9, [%[a], 16]\n\t"
+ "str x4, [%[r], 8]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[3] * B\n\t"
+ "str x5, [%[r], 16]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[4] * B\n\t"
+ "ldp x8, x9, [%[a], 32]\n\t"
+ "str x3, [%[r], 24]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[5] * B\n\t"
+ "str x4, [%[r], 32]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[6] * B\n\t"
+ "ldp x8, x9, [%[a], 48]\n\t"
+ "str x5, [%[r], 40]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[7] * B\n\t"
+ "str x3, [%[r], 48]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[8] * B\n\t"
+ "ldp x8, x9, [%[a], 64]\n\t"
+ "str x4, [%[r], 56]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[9] * B\n\t"
+ "str x5, [%[r], 64]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[10] * B\n\t"
+ "ldp x8, x9, [%[a], 80]\n\t"
+ "str x3, [%[r], 72]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[11] * B\n\t"
+ "str x4, [%[r], 80]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[12] * B\n\t"
+ "ldp x8, x9, [%[a], 96]\n\t"
+ "str x5, [%[r], 88]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[13] * B\n\t"
+ "str x3, [%[r], 96]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[14] * B\n\t"
+ "ldp x8, x9, [%[a], 112]\n\t"
+ "str x4, [%[r], 104]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[15] * B\n\t"
+ "str x5, [%[r], 112]\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "adc x4, x4, x7\n\t"
+ "stp x3, x4, [%[r], 120]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+#endif
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ */
+static sp_digit div_2048_word_16(sp_digit d1, sp_digit d0, sp_digit div)
+{
+ sp_digit r;
+
+ __asm__ __volatile__ (
+ "lsr x5, %[div], 32\n\t"
+ "add x5, x5, 1\n\t"
+
+ "udiv x3, %[d1], x5\n\t"
+ "lsl x6, x3, 32\n\t"
+ "mul x4, %[div], x6\n\t"
+ "umulh x3, %[div], x6\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "udiv x3, %[d1], x5\n\t"
+ "lsl x3, x3, 32\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "umulh x3, %[div], x3\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "lsr x3, %[d0], 32\n\t"
+ "orr x3, x3, %[d1], lsl 32\n\t"
+
+ "udiv x3, x3, x5\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "umulh x3, %[div], x3\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "lsr x3, %[d0], 32\n\t"
+ "orr x3, x3, %[d1], lsl 32\n\t"
+
+ "udiv x3, x3, x5\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "sub %[d0], %[d0], x4\n\t"
+
+ "udiv x3, %[d0], %[div]\n\t"
+ "add %[r], x6, x3\n\t"
+
+ : [r] "=r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "x3", "x4", "x5", "x6"
+ );
+
+ return r;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static int64_t sp_2048_cmp_16(const sp_digit* a, const sp_digit* b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov x2, -1\n\t"
+ "mov x3, 1\n\t"
+ "mov x4, -1\n\t"
+ "mov x5, 120\n\t"
+ "1:\n\t"
+ "ldr x6, [%[a], x5]\n\t"
+ "ldr x7, [%[b], x5]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x6, x6, x7\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "subs x5, x5, #8\n\t"
+ "b.cs 1b\n\t"
+ "eor %[a], x2, x4\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "x2", "x3", "x4", "x5", "x6", "x7", "x8"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov x2, -1\n\t"
+ "mov x3, 1\n\t"
+ "mov x4, -1\n\t"
+ "ldp x5, x6, [%[a], 112]\n\t"
+ "ldp x7, x8, [%[b], 112]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 96]\n\t"
+ "ldp x7, x8, [%[b], 96]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 80]\n\t"
+ "ldp x7, x8, [%[b], 80]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 64]\n\t"
+ "ldp x7, x8, [%[b], 64]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "ldp x7, x8, [%[b], 48]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 32]\n\t"
+ "ldp x7, x8, [%[b], 32]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "ldp x7, x8, [%[b], 16]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[b], 0]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "eor %[a], x2, x4\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "x2", "x3", "x4", "x5", "x6", "x7", "x8"
+ );
+#endif
+
+ return (int64_t)a;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_16(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[32], t2[17];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[15];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 16);
+ for (i=15; i>=0; i--) {
+ r1 = div_2048_word_16(t1[16 + i], t1[16 + i - 1], div);
+
+ sp_2048_mul_d_16(t2, d, r1);
+ t1[16 + i] += sp_2048_sub_in_place_16(&t1[i], t2);
+ t1[16 + i] -= t2[16];
+ sp_2048_mask_16(t2, d, t1[16 + i]);
+ t1[16 + i] += sp_2048_add_16(&t1[i], &t1[i], t2);
+ sp_2048_mask_16(t2, d, t1[16 + i]);
+ t1[16 + i] += sp_2048_add_16(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_2048_cmp_16(t1, d) >= 0;
+ sp_2048_cond_sub_16(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_16(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_2048_div_16(a, m, NULL, r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[16][32];
+#else
+ sp_digit* t[16];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 32, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<16; i++) {
+ t[i] = td + i * 32;
+ }
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_16(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 16U);
+ if (reduceA != 0) {
+ err = sp_2048_mod_16(t[1] + 16, a, m);
+ if (err == MP_OKAY) {
+ err = sp_2048_mod_16(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 16, a, sizeof(sp_digit) * 16);
+ err = sp_2048_mod_16(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_16(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_16(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_16(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_16(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_16(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_16(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_16(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_16(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_16(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_16(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_16(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_16(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_16(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_16(t[15], t[ 8], t[ 7], m, mp);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ c -= bits % 4;
+ if (c == 64) {
+ c = 60;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 16);
+ for (; i>=0 || c>=4; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 60;
+ n <<= 4;
+ c = 60;
+ }
+ else if (c < 4) {
+ y = n >> 60;
+ n = e[i--];
+ c = 4 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+ else {
+ y = (n >> 60) & 0xf;
+ n <<= 4;
+ c -= 4;
+ }
+
+ sp_2048_mont_sqr_16(r, r, m, mp);
+ sp_2048_mont_sqr_16(r, r, m, mp);
+ sp_2048_mont_sqr_16(r, r, m, mp);
+ sp_2048_mont_sqr_16(r, r, m, mp);
+
+ sp_2048_mont_mul_16(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[16], 0, sizeof(sp_digit) * 16U);
+ sp_2048_mont_reduce_16(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_16(r, m) >= 0);
+ sp_2048_cond_sub_16(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][32];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 32, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++) {
+ t[i] = td + i * 32;
+ }
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_16(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 16U);
+ if (reduceA != 0) {
+ err = sp_2048_mod_16(t[1] + 16, a, m);
+ if (err == MP_OKAY) {
+ err = sp_2048_mod_16(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 16, a, sizeof(sp_digit) * 16);
+ err = sp_2048_mod_16(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_16(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_16(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_16(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_16(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_16(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_16(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_16(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_16(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_16(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_16(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_16(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_16(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_16(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_16(t[15], t[ 8], t[ 7], m, mp);
+ sp_2048_mont_sqr_16(t[16], t[ 8], m, mp);
+ sp_2048_mont_mul_16(t[17], t[ 9], t[ 8], m, mp);
+ sp_2048_mont_sqr_16(t[18], t[ 9], m, mp);
+ sp_2048_mont_mul_16(t[19], t[10], t[ 9], m, mp);
+ sp_2048_mont_sqr_16(t[20], t[10], m, mp);
+ sp_2048_mont_mul_16(t[21], t[11], t[10], m, mp);
+ sp_2048_mont_sqr_16(t[22], t[11], m, mp);
+ sp_2048_mont_mul_16(t[23], t[12], t[11], m, mp);
+ sp_2048_mont_sqr_16(t[24], t[12], m, mp);
+ sp_2048_mont_mul_16(t[25], t[13], t[12], m, mp);
+ sp_2048_mont_sqr_16(t[26], t[13], m, mp);
+ sp_2048_mont_mul_16(t[27], t[14], t[13], m, mp);
+ sp_2048_mont_sqr_16(t[28], t[14], m, mp);
+ sp_2048_mont_mul_16(t[29], t[15], t[14], m, mp);
+ sp_2048_mont_sqr_16(t[30], t[15], m, mp);
+ sp_2048_mont_mul_16(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ c -= bits % 5;
+ if (c == 64) {
+ c = 59;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 16);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 59;
+ n <<= 5;
+ c = 59;
+ }
+ else if (c < 5) {
+ y = n >> 59;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+ else {
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_2048_mont_sqr_16(r, r, m, mp);
+ sp_2048_mont_sqr_16(r, r, m, mp);
+ sp_2048_mont_sqr_16(r, r, m, mp);
+ sp_2048_mont_sqr_16(r, r, m, mp);
+ sp_2048_mont_sqr_16(r, r, m, mp);
+
+ sp_2048_mont_mul_16(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[16], 0, sizeof(sp_digit) * 16U);
+ sp_2048_mont_reduce_16(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_16(r, m) >= 0);
+ sp_2048_cond_sub_16(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 2048 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 32);
+
+ /* r = 2^n mod m */
+ sp_2048_sub_in_place_32(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov x8, #0\n\t"
+ "1:\n\t"
+ "subs %[c], xzr, %[c]\n\t"
+ "ldr x4, [%[a], x8]\n\t"
+ "ldr x5, [%[b], x8]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "csetm %[c], cc\n\t"
+ "str x4, [%[r], x8]\n\t"
+ "add x8, x8, #8\n\t"
+ "cmp x8, 256\n\t"
+ "b.lt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+
+ return c;
+#else
+ __asm__ __volatile__ (
+
+ "ldp x5, x7, [%[b], 0]\n\t"
+ "ldp x11, x12, [%[b], 16]\n\t"
+ "ldp x4, x6, [%[a], 0]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 16]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "subs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 0]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 16]\n\t"
+ "ldp x5, x7, [%[b], 32]\n\t"
+ "ldp x11, x12, [%[b], 48]\n\t"
+ "ldp x4, x6, [%[a], 32]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 48]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 32]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 48]\n\t"
+ "ldp x5, x7, [%[b], 64]\n\t"
+ "ldp x11, x12, [%[b], 80]\n\t"
+ "ldp x4, x6, [%[a], 64]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 80]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 64]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 80]\n\t"
+ "ldp x5, x7, [%[b], 96]\n\t"
+ "ldp x11, x12, [%[b], 112]\n\t"
+ "ldp x4, x6, [%[a], 96]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 112]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 96]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 112]\n\t"
+ "ldp x5, x7, [%[b], 128]\n\t"
+ "ldp x11, x12, [%[b], 144]\n\t"
+ "ldp x4, x6, [%[a], 128]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 144]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 128]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 144]\n\t"
+ "ldp x5, x7, [%[b], 160]\n\t"
+ "ldp x11, x12, [%[b], 176]\n\t"
+ "ldp x4, x6, [%[a], 160]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 176]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 160]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 176]\n\t"
+ "ldp x5, x7, [%[b], 192]\n\t"
+ "ldp x11, x12, [%[b], 208]\n\t"
+ "ldp x4, x6, [%[a], 192]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 208]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 192]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 208]\n\t"
+ "ldp x5, x7, [%[b], 224]\n\t"
+ "ldp x11, x12, [%[b], 240]\n\t"
+ "ldp x4, x6, [%[a], 224]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 240]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 224]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 240]\n\t"
+ "csetm %[r], cc\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+
+ return (sp_digit)r;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "ldp x14, x15, [%[m], 0]\n\t"
+ "ldp x16, x17, [%[m], 16]\n\t"
+ "ldp x19, x20, [%[m], 32]\n\t"
+ "ldp x21, x22, [%[m], 48]\n\t"
+ "ldp x23, x24, [%[m], 64]\n\t"
+ "ldp x25, x26, [%[m], 80]\n\t"
+ "ldp x27, x28, [%[m], 96]\n\t"
+ "# i = 32\n\t"
+ "mov x4, 32\n\t"
+ "ldp x12, x13, [%[a], 0]\n\t"
+ "\n1:\n\t"
+ "# mu = a[i] * mp\n\t"
+ "mul x9, %[mp], x12\n\t"
+ "# a[i+0] += m[0] * mu\n\t"
+ "mul x7, x14, x9\n\t"
+ "umulh x8, x14, x9\n\t"
+ "adds x12, x12, x7\n\t"
+ "# a[i+1] += m[1] * mu\n\t"
+ "mul x7, x15, x9\n\t"
+ "adc x6, x8, xzr\n\t"
+ "umulh x8, x15, x9\n\t"
+ "adds x12, x13, x7\n\t"
+ "# a[i+2] += m[2] * mu\n\t"
+ "ldr x13, [%[a], 16]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "mul x7, x16, x9\n\t"
+ "adds x12, x12, x6\n\t"
+ "umulh x8, x16, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x13, x13, x7\n\t"
+ "# a[i+3] += m[3] * mu\n\t"
+ "ldr x10, [%[a], 24]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "mul x7, x17, x9\n\t"
+ "adds x13, x13, x5\n\t"
+ "umulh x8, x17, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+4] += m[4] * mu\n\t"
+ "ldr x11, [%[a], 32]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x19, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x19, x9\n\t"
+ "str x10, [%[a], 24]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+5] += m[5] * mu\n\t"
+ "ldr x10, [%[a], 40]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x20, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x20, x9\n\t"
+ "str x11, [%[a], 32]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+6] += m[6] * mu\n\t"
+ "ldr x11, [%[a], 48]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x21, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x21, x9\n\t"
+ "str x10, [%[a], 40]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+7] += m[7] * mu\n\t"
+ "ldr x10, [%[a], 56]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x22, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x22, x9\n\t"
+ "str x11, [%[a], 48]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+8] += m[8] * mu\n\t"
+ "ldr x11, [%[a], 64]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x23, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x23, x9\n\t"
+ "str x10, [%[a], 56]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+9] += m[9] * mu\n\t"
+ "ldr x10, [%[a], 72]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x24, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x24, x9\n\t"
+ "str x11, [%[a], 64]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+10] += m[10] * mu\n\t"
+ "ldr x11, [%[a], 80]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x25, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x25, x9\n\t"
+ "str x10, [%[a], 72]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+11] += m[11] * mu\n\t"
+ "ldr x10, [%[a], 88]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x26, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x26, x9\n\t"
+ "str x11, [%[a], 80]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+12] += m[12] * mu\n\t"
+ "ldr x11, [%[a], 96]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x27, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x27, x9\n\t"
+ "str x10, [%[a], 88]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+13] += m[13] * mu\n\t"
+ "ldr x10, [%[a], 104]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x28, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x28, x9\n\t"
+ "str x11, [%[a], 96]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+14] += m[14] * mu\n\t"
+ "ldr x11, [%[a], 112]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 112]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 104]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+15] += m[15] * mu\n\t"
+ "ldr x10, [%[a], 120]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 120]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 112]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+16] += m[16] * mu\n\t"
+ "ldr x11, [%[a], 128]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 128]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 120]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+17] += m[17] * mu\n\t"
+ "ldr x10, [%[a], 136]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 136]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 128]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+18] += m[18] * mu\n\t"
+ "ldr x11, [%[a], 144]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 144]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 136]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+19] += m[19] * mu\n\t"
+ "ldr x10, [%[a], 152]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 152]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 144]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+20] += m[20] * mu\n\t"
+ "ldr x11, [%[a], 160]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 160]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 152]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+21] += m[21] * mu\n\t"
+ "ldr x10, [%[a], 168]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 168]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 160]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+22] += m[22] * mu\n\t"
+ "ldr x11, [%[a], 176]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 176]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 168]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+23] += m[23] * mu\n\t"
+ "ldr x10, [%[a], 184]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 184]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 176]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+24] += m[24] * mu\n\t"
+ "ldr x11, [%[a], 192]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 192]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 184]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+25] += m[25] * mu\n\t"
+ "ldr x10, [%[a], 200]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 200]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 192]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+26] += m[26] * mu\n\t"
+ "ldr x11, [%[a], 208]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 208]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 200]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+27] += m[27] * mu\n\t"
+ "ldr x10, [%[a], 216]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 216]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 208]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+28] += m[28] * mu\n\t"
+ "ldr x11, [%[a], 224]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 224]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 216]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+29] += m[29] * mu\n\t"
+ "ldr x10, [%[a], 232]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 232]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 224]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+30] += m[30] * mu\n\t"
+ "ldr x11, [%[a], 240]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 240]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 232]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+31] += m[31] * mu\n\t"
+ "ldr x10, [%[a], 248]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 248]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "adcs x8, x8, %[ca]\n\t"
+ "str x11, [%[a], 240]\n\t"
+ "cset %[ca], cs\n\t"
+ "adds x10, x10, x6\n\t"
+ "ldr x11, [%[a], 256]\n\t"
+ "str x10, [%[a], 248]\n\t"
+ "adcs x11, x11, x8\n\t"
+ "str x11, [%[a], 256]\n\t"
+ "adc %[ca], %[ca], xzr\n\t"
+ "subs x4, x4, 1\n\t"
+ "add %[a], %[a], 8\n\t"
+ "bne 1b\n\t"
+ "stp x12, x13, [%[a], 0]\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28"
+ );
+
+ sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_2048_mul_32(r, a, b);
+ sp_2048_mont_reduce_32(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_2048_sqr_32(r, a);
+ sp_2048_mont_reduce_32(r, m, mp);
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ */
+static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div)
+{
+ sp_digit r;
+
+ __asm__ __volatile__ (
+ "lsr x5, %[div], 32\n\t"
+ "add x5, x5, 1\n\t"
+
+ "udiv x3, %[d1], x5\n\t"
+ "lsl x6, x3, 32\n\t"
+ "mul x4, %[div], x6\n\t"
+ "umulh x3, %[div], x6\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "udiv x3, %[d1], x5\n\t"
+ "lsl x3, x3, 32\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "umulh x3, %[div], x3\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "lsr x3, %[d0], 32\n\t"
+ "orr x3, x3, %[d1], lsl 32\n\t"
+
+ "udiv x3, x3, x5\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "umulh x3, %[div], x3\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "lsr x3, %[d0], 32\n\t"
+ "orr x3, x3, %[d1], lsl 32\n\t"
+
+ "udiv x3, x3, x5\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "sub %[d0], %[d0], x4\n\t"
+
+ "udiv x3, %[d0], %[div]\n\t"
+ "add %[r], x6, x3\n\t"
+
+ : [r] "=r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "x3", "x4", "x5", "x6"
+ );
+
+ return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<32; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 32; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static int64_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov x2, -1\n\t"
+ "mov x3, 1\n\t"
+ "mov x4, -1\n\t"
+ "mov x5, 248\n\t"
+ "1:\n\t"
+ "ldr x6, [%[a], x5]\n\t"
+ "ldr x7, [%[b], x5]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x6, x6, x7\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "subs x5, x5, #8\n\t"
+ "b.cs 1b\n\t"
+ "eor %[a], x2, x4\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "x2", "x3", "x4", "x5", "x6", "x7", "x8"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov x2, -1\n\t"
+ "mov x3, 1\n\t"
+ "mov x4, -1\n\t"
+ "ldp x5, x6, [%[a], 240]\n\t"
+ "ldp x7, x8, [%[b], 240]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 224]\n\t"
+ "ldp x7, x8, [%[b], 224]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 208]\n\t"
+ "ldp x7, x8, [%[b], 208]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 192]\n\t"
+ "ldp x7, x8, [%[b], 192]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 176]\n\t"
+ "ldp x7, x8, [%[b], 176]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 160]\n\t"
+ "ldp x7, x8, [%[b], 160]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 144]\n\t"
+ "ldp x7, x8, [%[b], 144]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 128]\n\t"
+ "ldp x7, x8, [%[b], 128]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 112]\n\t"
+ "ldp x7, x8, [%[b], 112]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 96]\n\t"
+ "ldp x7, x8, [%[b], 96]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 80]\n\t"
+ "ldp x7, x8, [%[b], 80]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 64]\n\t"
+ "ldp x7, x8, [%[b], 64]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "ldp x7, x8, [%[b], 48]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 32]\n\t"
+ "ldp x7, x8, [%[b], 32]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "ldp x7, x8, [%[b], 16]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[b], 0]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "eor %[a], x2, x4\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "x2", "x3", "x4", "x5", "x6", "x7", "x8"
+ );
+#endif
+
+ return (int64_t)a;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[64], t2[33];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[31];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
+ for (i=31; i>=0; i--) {
+ r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div);
+
+ sp_2048_mul_d_32(t2, d, r1);
+ t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
+ t1[32 + i] -= t2[32];
+ sp_2048_mask_32(t2, d, t1[32 + i]);
+ t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
+ sp_2048_mask_32(t2, d, t1[32 + i]);
+ t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_2048_cmp_32(t1, d) >= 0;
+ sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_2048_div_32(a, m, NULL, r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add x11, %[a], 256\n\t"
+ "\n1:\n\t"
+ "subs %[c], xzr, %[c]\n\t"
+ "ldp x3, x4, [%[a]], #16\n\t"
+ "ldp x5, x6, [%[a]], #16\n\t"
+ "ldp x7, x8, [%[b]], #16\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x9, x10, [%[b]], #16\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r]], #16\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r]], #16\n\t"
+ "csetm %[c], cc\n\t"
+ "cmp %[a], x11\n\t"
+ "b.ne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[b], 0]\n\t"
+ "subs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 16]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "ldp x7, x8, [%[b], 32]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 48]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ "ldp x3, x4, [%[a], 64]\n\t"
+ "ldp x7, x8, [%[b], 64]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 80]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 80]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 64]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 80]\n\t"
+ "ldp x3, x4, [%[a], 96]\n\t"
+ "ldp x7, x8, [%[b], 96]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 112]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 112]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 96]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 112]\n\t"
+ "ldp x3, x4, [%[a], 128]\n\t"
+ "ldp x7, x8, [%[b], 128]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 144]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 144]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 128]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 144]\n\t"
+ "ldp x3, x4, [%[a], 160]\n\t"
+ "ldp x7, x8, [%[b], 160]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 176]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 176]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 160]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 176]\n\t"
+ "ldp x3, x4, [%[a], 192]\n\t"
+ "ldp x7, x8, [%[b], 192]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 208]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 208]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 192]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 208]\n\t"
+ "ldp x3, x4, [%[a], 224]\n\t"
+ "ldp x7, x8, [%[b], 224]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 240]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 240]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 224]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 240]\n\t"
+ "csetm %[r], cc\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+ return (sp_digit)r;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_32_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[64], t2[33];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[31];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
+ for (i=31; i>=0; i--) {
+ r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div);
+
+ sp_2048_mul_d_32(t2, d, r1);
+ t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
+ t1[32 + i] -= t2[32];
+ if (t1[32 + i] != 0) {
+ t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], d);
+ if (t1[32 + i] != 0)
+ t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], d);
+ }
+ }
+
+ for (i = 31; i > 0; i--) {
+ if (t1[i] != d[i])
+ break;
+ }
+ if (t1[i] >= d[i]) {
+ sp_2048_sub_32(r, t1, d);
+ }
+ else {
+ XMEMCPY(r, t1, sizeof(*t1) * 32);
+ }
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_32_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_2048_div_32_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+ defined(WOLFSSL_HAVE_SP_DH)
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[16][64];
+#else
+ sp_digit* t[16];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 64, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<16; i++) {
+ t[i] = td + i * 64;
+ }
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_32(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
+ if (reduceA != 0) {
+ err = sp_2048_mod_32(t[1] + 32, a, m);
+ if (err == MP_OKAY) {
+ err = sp_2048_mod_32(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
+ err = sp_2048_mod_32(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ c -= bits % 4;
+ if (c == 64) {
+ c = 60;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
+ for (; i>=0 || c>=4; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 60;
+ n <<= 4;
+ c = 60;
+ }
+ else if (c < 4) {
+ y = n >> 60;
+ n = e[i--];
+ c = 4 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+ else {
+ y = (n >> 60) & 0xf;
+ n <<= 4;
+ c -= 4;
+ }
+
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+
+ sp_2048_mont_mul_32(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
+ sp_2048_mont_reduce_32(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
+ sp_2048_cond_sub_32(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][64];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 64, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++) {
+ t[i] = td + i * 64;
+ }
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_32(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
+ if (reduceA != 0) {
+ err = sp_2048_mod_32(t[1] + 32, a, m);
+ if (err == MP_OKAY) {
+ err = sp_2048_mod_32(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
+ err = sp_2048_mod_32(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
+ sp_2048_mont_sqr_32(t[16], t[ 8], m, mp);
+ sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp);
+ sp_2048_mont_sqr_32(t[18], t[ 9], m, mp);
+ sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp);
+ sp_2048_mont_sqr_32(t[20], t[10], m, mp);
+ sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp);
+ sp_2048_mont_sqr_32(t[22], t[11], m, mp);
+ sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp);
+ sp_2048_mont_sqr_32(t[24], t[12], m, mp);
+ sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp);
+ sp_2048_mont_sqr_32(t[26], t[13], m, mp);
+ sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp);
+ sp_2048_mont_sqr_32(t[28], t[14], m, mp);
+ sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp);
+ sp_2048_mont_sqr_32(t[30], t[15], m, mp);
+ sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ c -= bits % 5;
+ if (c == 64) {
+ c = 59;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 59;
+ n <<= 5;
+ c = 59;
+ }
+ else if (c < 5) {
+ y = n >> 59;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+ else {
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+
+ sp_2048_mont_mul_32(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
+ sp_2048_mont_reduce_32(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
+ sp_2048_cond_sub_32(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[64], m[32], r[64];
+#else
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+#endif
+ sp_digit *ah;
+ sp_digit e[1];
+ int err = MP_OKAY;
+
+ if (*outLen < 256)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (mp_count_bits(em) > 64 || inLen > 256 ||
+ mp_count_bits(mm) != 2048))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 32 * 2;
+ m = r + 32 * 2;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ ah = a + 32;
+
+ sp_2048_from_bin(ah, 32, in, inLen);
+#if DIGIT_BIT >= 64
+ e[0] = em->dp[0];
+#else
+ e[0] = em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(m, 32, mm);
+
+ if (e[0] == 0x3) {
+ if (err == MP_OKAY) {
+ sp_2048_sqr_32(r, ah);
+ err = sp_2048_mod_32_cond(r, r, m);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_mul_32(r, ah, r);
+ err = sp_2048_mod_32_cond(r, r, m);
+ }
+ }
+ else {
+ int i;
+ sp_digit mp;
+
+ sp_2048_mont_setup(m, &mp);
+
+ /* Convert to Montgomery form. */
+ XMEMSET(a, 0, sizeof(sp_digit) * 32);
+ err = sp_2048_mod_32_cond(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i = 63; i >= 0; i--) {
+ if (e[0] >> i) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 32);
+ for (i--; i>=0; i--) {
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ if (((e[0] >> i) & 1) == 1) {
+ sp_2048_mont_mul_32(r, r, a, m, mp);
+ }
+ }
+ XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
+ sp_2048_mont_reduce_32(r, m, mp);
+
+ for (i = 31; i > 0; i--) {
+ if (r[i] != m[i]) {
+ break;
+ }
+ }
+ if (r[i] >= m[i]) {
+ sp_2048_sub_in_place_32(r, m);
+ }
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#endif
+
+ return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+ sp_digit* a;
+ sp_digit* d = NULL;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 256U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 2048) {
+ err = MP_READ_E;
+ }
+ if (inLen > 256) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = d + 32;
+ m = a + 64;
+ r = a;
+
+ sp_2048_from_bin(a, 32, in, inLen);
+ sp_2048_from_mp(d, 32, dm);
+ sp_2048_from_mp(m, 32, mm);
+ err = sp_2048_mod_exp_32(r, a, d, 2048, m, 0);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 32);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static sp_digit sp_2048_cond_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov x8, #0\n\t"
+ "1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldr x4, [%[a], x8]\n\t"
+ "ldr x5, [%[b], x8]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "adcs x4, x4, x5\n\t"
+ "cset %[c], cs\n\t"
+ "str x4, [%[r], x8]\n\t"
+ "add x8, x8, #8\n\t"
+ "cmp x8, 128\n\t"
+ "b.lt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+
+ return c;
+#else
+ __asm__ __volatile__ (
+
+ "ldp x5, x7, [%[b], 0]\n\t"
+ "ldp x11, x12, [%[b], 16]\n\t"
+ "ldp x4, x6, [%[a], 0]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 16]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "adds x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "adcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "adcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 0]\n\t"
+ "adcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 16]\n\t"
+ "ldp x5, x7, [%[b], 32]\n\t"
+ "ldp x11, x12, [%[b], 48]\n\t"
+ "ldp x4, x6, [%[a], 32]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 48]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "adcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "adcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "adcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 32]\n\t"
+ "adcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 48]\n\t"
+ "ldp x5, x7, [%[b], 64]\n\t"
+ "ldp x11, x12, [%[b], 80]\n\t"
+ "ldp x4, x6, [%[a], 64]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 80]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "adcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "adcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "adcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 64]\n\t"
+ "adcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 80]\n\t"
+ "ldp x5, x7, [%[b], 96]\n\t"
+ "ldp x11, x12, [%[b], 112]\n\t"
+ "ldp x4, x6, [%[a], 96]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 112]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "adcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "adcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "adcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 96]\n\t"
+ "adcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 112]\n\t"
+ "cset %[r], cs\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+
+ return (sp_digit)r;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[32 * 2];
+ sp_digit p[16], q[16], dp[16];
+ sp_digit tmpa[32], tmpb[32];
+#else
+ sp_digit* t = NULL;
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+#endif
+ sp_digit* r;
+ sp_digit* qi;
+ sp_digit* dq;
+ sp_digit c;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 256)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL)
+ err = MEMORY_E;
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 32 * 2;
+ q = p + 16;
+ qi = dq = dp = q + 16;
+ tmpa = qi + 16;
+ tmpb = tmpa + 32;
+
+ r = t + 32;
+ }
+#else
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ r = a;
+ qi = dq = dp;
+#endif
+ sp_2048_from_bin(a, 32, in, inLen);
+ sp_2048_from_mp(p, 16, pm);
+ sp_2048_from_mp(q, 16, qm);
+ sp_2048_from_mp(dp, 16, dpm);
+
+ err = sp_2048_mod_exp_16(tmpa, a, dp, 1024, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(dq, 16, dqm);
+ err = sp_2048_mod_exp_16(tmpb, a, dq, 1024, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ c = sp_2048_sub_in_place_16(tmpa, tmpb);
+ c += sp_2048_cond_add_16(tmpa, tmpa, p, c);
+ sp_2048_cond_add_16(tmpa, tmpa, p, c);
+
+ sp_2048_from_mp(qi, 16, qim);
+ sp_2048_mul_16(tmpa, tmpa, qi);
+ err = sp_2048_mod_16(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mul_16(tmpa, q, tmpa);
+ XMEMSET(&tmpb[16], 0, sizeof(sp_digit) * 16);
+ sp_2048_add_32(r, tmpb, tmpa);
+
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 16 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+#else
+ XMEMSET(tmpa, 0, sizeof(tmpa));
+ XMEMSET(tmpb, 0, sizeof(tmpb));
+ XMEMSET(p, 0, sizeof(p));
+ XMEMSET(q, 0, sizeof(q));
+ XMEMSET(dp, 0, sizeof(dp));
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_2048_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 64
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 32);
+ r->used = 32;
+ mp_clamp(r);
+#elif DIGIT_BIT < 64
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 32; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 64) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 64 - s;
+ }
+ r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 32; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 64 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 64 - s;
+ }
+ else {
+ s += 64;
+ }
+ }
+ r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[64], e[32], m[32];
+ sp_digit* r = b;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 2048) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 32, base);
+ sp_2048_from_mp(e, 32, exp);
+ sp_2048_from_mp(m, 32, mod);
+
+ err = sp_2048_mod_exp_32(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_2048_to_mp(r, res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_2048
+static void sp_2048_lshift_32(sp_digit* r, sp_digit* a, byte n)
+{
+ __asm__ __volatile__ (
+ "mov x6, 63\n\t"
+ "sub x6, x6, %[n]\n\t"
+ "ldr x3, [%[a], 248]\n\t"
+ "lsr x4, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x4, x4, x6\n\t"
+ "ldr x2, [%[a], 240]\n\t"
+ "str x4, [%[r], 256]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 232]\n\t"
+ "str x3, [%[r], 248]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 224]\n\t"
+ "str x2, [%[r], 240]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 216]\n\t"
+ "str x4, [%[r], 232]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 208]\n\t"
+ "str x3, [%[r], 224]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 200]\n\t"
+ "str x2, [%[r], 216]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 192]\n\t"
+ "str x4, [%[r], 208]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 184]\n\t"
+ "str x3, [%[r], 200]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 176]\n\t"
+ "str x2, [%[r], 192]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 168]\n\t"
+ "str x4, [%[r], 184]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 160]\n\t"
+ "str x3, [%[r], 176]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 152]\n\t"
+ "str x2, [%[r], 168]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 144]\n\t"
+ "str x4, [%[r], 160]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 136]\n\t"
+ "str x3, [%[r], 152]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 128]\n\t"
+ "str x2, [%[r], 144]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 120]\n\t"
+ "str x4, [%[r], 136]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 112]\n\t"
+ "str x3, [%[r], 128]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 104]\n\t"
+ "str x2, [%[r], 120]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 96]\n\t"
+ "str x4, [%[r], 112]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 88]\n\t"
+ "str x3, [%[r], 104]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 80]\n\t"
+ "str x2, [%[r], 96]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 72]\n\t"
+ "str x4, [%[r], 88]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 64]\n\t"
+ "str x3, [%[r], 80]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 56]\n\t"
+ "str x2, [%[r], 72]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 48]\n\t"
+ "str x4, [%[r], 64]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 40]\n\t"
+ "str x3, [%[r], 56]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 32]\n\t"
+ "str x2, [%[r], 48]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 24]\n\t"
+ "str x4, [%[r], 40]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 16]\n\t"
+ "str x3, [%[r], 32]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 8]\n\t"
+ "str x2, [%[r], 24]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 0]\n\t"
+ "str x4, [%[r], 16]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "str x2, [%[r]]\n\t"
+ "str x3, [%[r], 8]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+ : "memory", "x2", "x3", "x4", "x5", "x6"
+ );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_2_32(sp_digit* r, const sp_digit* e, int bits,
+ const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[64];
+ sp_digit td[33];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 97, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 64;
+#else
+ norm = nd;
+ tmp = td;
+#endif
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_32(norm, m);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ c -= bits % 6;
+ if (c == 64) {
+ c = 58;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ sp_2048_lshift_32(r, norm, y);
+ for (; i>=0 || c>=6; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 58;
+ n <<= 6;
+ c = 58;
+ }
+ else if (c < 6) {
+ y = n >> 58;
+ n = e[i--];
+ c = 6 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+ else {
+ y = (n >> 58) & 0x3f;
+ n <<= 6;
+ c -= 6;
+ }
+
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+
+ sp_2048_lshift_32(r, r, y);
+ sp_2048_mul_d_32(tmp, norm, r[32]);
+ r[32] = 0;
+ o = sp_2048_add_32(r, r, tmp);
+ sp_2048_cond_sub_32(r, r, m, (sp_digit)0 - o);
+ }
+
+ XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
+ sp_2048_mont_reduce_32(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
+ sp_2048_cond_sub_32(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* HAVE_FFDHE_2048 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+ int err = MP_OKAY;
+ sp_digit b[64], e[32], m[32];
+ sp_digit* r = b;
+ word32 i;
+
+ if (mp_count_bits(base) > 2048) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 256) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 32, base);
+ sp_2048_from_bin(e, 32, exp, expLen);
+ sp_2048_from_mp(m, 32, mod);
+
+ #ifdef HAVE_FFDHE_2048
+ if (base->used == 1 && base->dp[0] == 2 && m[31] == (sp_digit)-1)
+ err = sp_2048_mod_exp_2_32(r, e, expLen * 8, m);
+ else
+ #endif
+ err = sp_2048_mod_exp_32(r, b, e, expLen * 8, m, 0);
+
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ for (i=0; i<256 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[32], e[16], m[16];
+ sp_digit* r = b;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 1024) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 1024) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 1024) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 16, base);
+ sp_2048_from_mp(e, 16, exp);
+ sp_2048_from_mp(m, 16, mod);
+
+ err = sp_2048_mod_exp_16(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(r + 16, 0, sizeof(*r) * 16U);
+ err = sp_2048_to_mp(r, res);
+ res->used = mod->used;
+ mp_clamp(res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_2048 */
+
+#ifndef WOLFSSL_SP_NO_3072
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j;
+ byte* d;
+
+ for (i = n - 1,j = 0; i >= 7; i -= 8) {
+ r[j] = ((sp_digit)a[i - 0] << 0) |
+ ((sp_digit)a[i - 1] << 8) |
+ ((sp_digit)a[i - 2] << 16) |
+ ((sp_digit)a[i - 3] << 24) |
+ ((sp_digit)a[i - 4] << 32) |
+ ((sp_digit)a[i - 5] << 40) |
+ ((sp_digit)a[i - 6] << 48) |
+ ((sp_digit)a[i - 7] << 56);
+ j++;
+ }
+
+ if (i >= 0) {
+ r[j] = 0;
+
+ d = (byte*)r;
+ switch (i) {
+ case 6: d[n - 1 - 6] = a[6]; //fallthrough
+ case 5: d[n - 1 - 5] = a[5]; //fallthrough
+ case 4: d[n - 1 - 4] = a[4]; //fallthrough
+ case 3: d[n - 1 - 3] = a[3]; //fallthrough
+ case 2: d[n - 1 - 2] = a[2]; //fallthrough
+ case 1: d[n - 1 - 1] = a[1]; //fallthrough
+ case 0: d[n - 1 - 0] = a[0]; //fallthrough
+ }
+ j++;
+ }
+
+ for (; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 64
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 64
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffffffffffffl;
+ s = 64U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 64U) <= (word32)DIGIT_BIT) {
+ s += 64U;
+ r[j] &= 0xffffffffffffffffl;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 64) {
+ r[j] &= 0xffffffffffffffffl;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 64 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 384
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_3072_to_bin(sp_digit* r, byte* a)
+{
+ int i, j;
+
+ for (i = 47, j = 0; i >= 0; i--) {
+ a[j++] = r[i] >> 56;
+ a[j++] = r[i] >> 48;
+ a[j++] = r[i] >> 40;
+ a[j++] = r[i] >> 32;
+ a[j++] = r[i] >> 24;
+ a[j++] = r[i] >> 16;
+ a[j++] = r[i] >> 8;
+ a[j++] = r[i] >> 0;
+ }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_digit tmp[12];
+
+ __asm__ __volatile__ (
+ "ldp x10, x11, [%[a], 0]\n\t"
+ "ldp x12, x13, [%[a], 16]\n\t"
+ "ldp x14, x15, [%[a], 32]\n\t"
+ "ldp x16, x17, [%[a], 48]\n\t"
+ "ldp x19, x20, [%[a], 64]\n\t"
+ "ldp x21, x22, [%[a], 80]\n\t"
+ "# A[0] * B[0]\n\t"
+ "ldr x9, [%[b], 0]\n\t"
+ "mul x4, x10, x9\n\t"
+ "umulh x5, x10, x9\n\t"
+ "mov x6, 0\n\t"
+ "str x4, [%[tmp]]\n\t"
+ "# A[0] * B[1]\n\t"
+ "ldr x9, [%[b], 8]\n\t"
+ "mul x7, x10, x9\n\t"
+ "umulh x8, x10, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[1] * B[0]\n\t"
+ "ldr x9, [%[b], 0]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x11, x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "umulh x8, x11, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "adcs x6, x6, x8\n\t"
+ "str x5, [%[tmp], 8]\n\t"
+ "adc x4, x4, xzr\n\t"
+ "# A[0] * B[2]\n\t"
+ "ldr x9, [%[b], 16]\n\t"
+ "mul x7, x10, x9\n\t"
+ "umulh x8, x10, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[1] * B[1]\n\t"
+ "ldr x9, [%[b], 8]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x11, x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "umulh x8, x11, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[2] * B[0]\n\t"
+ "ldr x9, [%[b], 0]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x12, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x12, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "adcs x4, x4, x8\n\t"
+ "str x6, [%[tmp], 16]\n\t"
+ "adc x5, x5, xzr\n\t"
+ "# A[0] * B[3]\n\t"
+ "ldr x9, [%[b], 24]\n\t"
+ "mul x7, x10, x9\n\t"
+ "umulh x8, x10, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[1] * B[2]\n\t"
+ "ldr x9, [%[b], 16]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x11, x9\n\t"
+ "adc x6, xzr, xzr\n\t"
+ "umulh x8, x11, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[2] * B[1]\n\t"
+ "ldr x9, [%[b], 8]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x12, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x12, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[3] * B[0]\n\t"
+ "ldr x9, [%[b], 0]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x13, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x13, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "adcs x5, x5, x8\n\t"
+ "str x4, [%[tmp], 24]\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# A[0] * B[4]\n\t"
+ "ldr x9, [%[b], 32]\n\t"
+ "mul x7, x10, x9\n\t"
+ "umulh x8, x10, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[1] * B[3]\n\t"
+ "ldr x9, [%[b], 24]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x11, x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "umulh x8, x11, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[2] * B[2]\n\t"
+ "ldr x9, [%[b], 16]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x12, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x12, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[3] * B[1]\n\t"
+ "ldr x9, [%[b], 8]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x13, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x13, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[4] * B[0]\n\t"
+ "ldr x9, [%[b], 0]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x14, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x14, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "adcs x6, x6, x8\n\t"
+ "str x5, [%[tmp], 32]\n\t"
+ "adc x4, x4, xzr\n\t"
+ "# A[0] * B[5]\n\t"
+ "ldr x9, [%[b], 40]\n\t"
+ "mul x7, x10, x9\n\t"
+ "umulh x8, x10, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[1] * B[4]\n\t"
+ "ldr x9, [%[b], 32]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x11, x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "umulh x8, x11, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[2] * B[3]\n\t"
+ "ldr x9, [%[b], 24]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x12, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x12, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[3] * B[2]\n\t"
+ "ldr x9, [%[b], 16]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x13, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x13, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[4] * B[1]\n\t"
+ "ldr x9, [%[b], 8]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x14, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x14, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[5] * B[0]\n\t"
+ "ldr x9, [%[b], 0]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x15, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x15, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "adcs x4, x4, x8\n\t"
+ "str x6, [%[tmp], 40]\n\t"
+ "adc x5, x5, xzr\n\t"
+ "# A[0] * B[6]\n\t"
+ "ldr x9, [%[b], 48]\n\t"
+ "mul x7, x10, x9\n\t"
+ "umulh x8, x10, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[1] * B[5]\n\t"
+ "ldr x9, [%[b], 40]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x11, x9\n\t"
+ "adc x6, xzr, xzr\n\t"
+ "umulh x8, x11, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[2] * B[4]\n\t"
+ "ldr x9, [%[b], 32]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x12, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x12, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[3] * B[3]\n\t"
+ "ldr x9, [%[b], 24]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x13, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x13, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[4] * B[2]\n\t"
+ "ldr x9, [%[b], 16]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x14, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x14, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[5] * B[1]\n\t"
+ "ldr x9, [%[b], 8]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x15, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x15, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[6] * B[0]\n\t"
+ "ldr x9, [%[b], 0]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x16, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x16, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "adcs x5, x5, x8\n\t"
+ "str x4, [%[tmp], 48]\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# A[0] * B[7]\n\t"
+ "ldr x9, [%[b], 56]\n\t"
+ "mul x7, x10, x9\n\t"
+ "umulh x8, x10, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[1] * B[6]\n\t"
+ "ldr x9, [%[b], 48]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x11, x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "umulh x8, x11, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[2] * B[5]\n\t"
+ "ldr x9, [%[b], 40]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x12, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x12, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[3] * B[4]\n\t"
+ "ldr x9, [%[b], 32]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x13, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x13, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[4] * B[3]\n\t"
+ "ldr x9, [%[b], 24]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x14, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x14, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[5] * B[2]\n\t"
+ "ldr x9, [%[b], 16]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x15, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x15, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[6] * B[1]\n\t"
+ "ldr x9, [%[b], 8]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x16, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x16, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[7] * B[0]\n\t"
+ "ldr x9, [%[b], 0]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x17, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x17, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "adcs x6, x6, x8\n\t"
+ "str x5, [%[tmp], 56]\n\t"
+ "adc x4, x4, xzr\n\t"
+ "# A[0] * B[8]\n\t"
+ "ldr x9, [%[b], 64]\n\t"
+ "mul x7, x10, x9\n\t"
+ "umulh x8, x10, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[1] * B[7]\n\t"
+ "ldr x9, [%[b], 56]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x11, x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "umulh x8, x11, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[2] * B[6]\n\t"
+ "ldr x9, [%[b], 48]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x12, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x12, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[3] * B[5]\n\t"
+ "ldr x9, [%[b], 40]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x13, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x13, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[4] * B[4]\n\t"
+ "ldr x9, [%[b], 32]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x14, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x14, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[5] * B[3]\n\t"
+ "ldr x9, [%[b], 24]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x15, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x15, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[6] * B[2]\n\t"
+ "ldr x9, [%[b], 16]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x16, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x16, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[7] * B[1]\n\t"
+ "ldr x9, [%[b], 8]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x17, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x17, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[8] * B[0]\n\t"
+ "ldr x9, [%[b], 0]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x19, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x19, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "adcs x4, x4, x8\n\t"
+ "str x6, [%[tmp], 64]\n\t"
+ "adc x5, x5, xzr\n\t"
+ "# A[0] * B[9]\n\t"
+ "ldr x9, [%[b], 72]\n\t"
+ "mul x7, x10, x9\n\t"
+ "umulh x8, x10, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[1] * B[8]\n\t"
+ "ldr x9, [%[b], 64]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x11, x9\n\t"
+ "adc x6, xzr, xzr\n\t"
+ "umulh x8, x11, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[2] * B[7]\n\t"
+ "ldr x9, [%[b], 56]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x12, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x12, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[3] * B[6]\n\t"
+ "ldr x9, [%[b], 48]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x13, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x13, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[4] * B[5]\n\t"
+ "ldr x9, [%[b], 40]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x14, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x14, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[5] * B[4]\n\t"
+ "ldr x9, [%[b], 32]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x15, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x15, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[6] * B[3]\n\t"
+ "ldr x9, [%[b], 24]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x16, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x16, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[7] * B[2]\n\t"
+ "ldr x9, [%[b], 16]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x17, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x17, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[8] * B[1]\n\t"
+ "ldr x9, [%[b], 8]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x19, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x19, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[9] * B[0]\n\t"
+ "ldr x9, [%[b], 0]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x20, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x20, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "adcs x5, x5, x8\n\t"
+ "str x4, [%[tmp], 72]\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# A[0] * B[10]\n\t"
+ "ldr x9, [%[b], 80]\n\t"
+ "mul x7, x10, x9\n\t"
+ "umulh x8, x10, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[1] * B[9]\n\t"
+ "ldr x9, [%[b], 72]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x11, x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "umulh x8, x11, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[2] * B[8]\n\t"
+ "ldr x9, [%[b], 64]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x12, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x12, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[3] * B[7]\n\t"
+ "ldr x9, [%[b], 56]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x13, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x13, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[4] * B[6]\n\t"
+ "ldr x9, [%[b], 48]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x14, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x14, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[5] * B[5]\n\t"
+ "ldr x9, [%[b], 40]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x15, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x15, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[6] * B[4]\n\t"
+ "ldr x9, [%[b], 32]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x16, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x16, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[7] * B[3]\n\t"
+ "ldr x9, [%[b], 24]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x17, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x17, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[8] * B[2]\n\t"
+ "ldr x9, [%[b], 16]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x19, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x19, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[9] * B[1]\n\t"
+ "ldr x9, [%[b], 8]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x20, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x20, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[10] * B[0]\n\t"
+ "ldr x9, [%[b], 0]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x21, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x21, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "adcs x6, x6, x8\n\t"
+ "str x5, [%[tmp], 80]\n\t"
+ "adc x4, x4, xzr\n\t"
+ "# A[0] * B[11]\n\t"
+ "ldr x9, [%[b], 88]\n\t"
+ "mul x7, x10, x9\n\t"
+ "umulh x8, x10, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[1] * B[10]\n\t"
+ "ldr x9, [%[b], 80]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x11, x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "umulh x8, x11, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[2] * B[9]\n\t"
+ "ldr x9, [%[b], 72]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x12, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x12, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[3] * B[8]\n\t"
+ "ldr x9, [%[b], 64]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x13, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x13, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[4] * B[7]\n\t"
+ "ldr x9, [%[b], 56]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x14, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x14, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[5] * B[6]\n\t"
+ "ldr x9, [%[b], 48]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x15, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x15, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[6] * B[5]\n\t"
+ "ldr x9, [%[b], 40]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x16, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x16, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[7] * B[4]\n\t"
+ "ldr x9, [%[b], 32]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x17, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x17, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[8] * B[3]\n\t"
+ "ldr x9, [%[b], 24]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x19, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x19, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[9] * B[2]\n\t"
+ "ldr x9, [%[b], 16]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x20, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x20, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[10] * B[1]\n\t"
+ "ldr x9, [%[b], 8]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x21, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x21, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[11] * B[0]\n\t"
+ "ldr x9, [%[b], 0]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x22, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x22, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "adcs x4, x4, x8\n\t"
+ "str x6, [%[tmp], 88]\n\t"
+ "adc x5, x5, xzr\n\t"
+ "# A[1] * B[11]\n\t"
+ "ldr x9, [%[b], 88]\n\t"
+ "mul x7, x11, x9\n\t"
+ "umulh x8, x11, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[2] * B[10]\n\t"
+ "ldr x9, [%[b], 80]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x12, x9\n\t"
+ "adc x6, xzr, xzr\n\t"
+ "umulh x8, x12, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[3] * B[9]\n\t"
+ "ldr x9, [%[b], 72]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x13, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x13, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[4] * B[8]\n\t"
+ "ldr x9, [%[b], 64]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x14, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x14, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[5] * B[7]\n\t"
+ "ldr x9, [%[b], 56]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x15, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x15, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[6] * B[6]\n\t"
+ "ldr x9, [%[b], 48]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x16, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x16, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[7] * B[5]\n\t"
+ "ldr x9, [%[b], 40]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x17, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x17, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[8] * B[4]\n\t"
+ "ldr x9, [%[b], 32]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x19, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x19, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[9] * B[3]\n\t"
+ "ldr x9, [%[b], 24]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x20, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x20, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[10] * B[2]\n\t"
+ "ldr x9, [%[b], 16]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x21, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x21, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[11] * B[1]\n\t"
+ "ldr x9, [%[b], 8]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x22, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x22, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "adcs x5, x5, x8\n\t"
+ "str x4, [%[r], 96]\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# A[2] * B[11]\n\t"
+ "ldr x9, [%[b], 88]\n\t"
+ "mul x7, x12, x9\n\t"
+ "umulh x8, x12, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[3] * B[10]\n\t"
+ "ldr x9, [%[b], 80]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x13, x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "umulh x8, x13, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[4] * B[9]\n\t"
+ "ldr x9, [%[b], 72]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x14, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x14, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[5] * B[8]\n\t"
+ "ldr x9, [%[b], 64]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x15, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x15, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[6] * B[7]\n\t"
+ "ldr x9, [%[b], 56]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x16, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x16, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[7] * B[6]\n\t"
+ "ldr x9, [%[b], 48]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x17, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x17, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[8] * B[5]\n\t"
+ "ldr x9, [%[b], 40]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x19, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x19, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[9] * B[4]\n\t"
+ "ldr x9, [%[b], 32]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x20, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x20, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[10] * B[3]\n\t"
+ "ldr x9, [%[b], 24]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x21, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x21, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[11] * B[2]\n\t"
+ "ldr x9, [%[b], 16]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x22, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x22, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "adcs x6, x6, x8\n\t"
+ "str x5, [%[r], 104]\n\t"
+ "adc x4, x4, xzr\n\t"
+ "# A[3] * B[11]\n\t"
+ "ldr x9, [%[b], 88]\n\t"
+ "mul x7, x13, x9\n\t"
+ "umulh x8, x13, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[4] * B[10]\n\t"
+ "ldr x9, [%[b], 80]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x14, x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "umulh x8, x14, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[5] * B[9]\n\t"
+ "ldr x9, [%[b], 72]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x15, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x15, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[6] * B[8]\n\t"
+ "ldr x9, [%[b], 64]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x16, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x16, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[7] * B[7]\n\t"
+ "ldr x9, [%[b], 56]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x17, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x17, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[8] * B[6]\n\t"
+ "ldr x9, [%[b], 48]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x19, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x19, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[9] * B[5]\n\t"
+ "ldr x9, [%[b], 40]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x20, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x20, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[10] * B[4]\n\t"
+ "ldr x9, [%[b], 32]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x21, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x21, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[11] * B[3]\n\t"
+ "ldr x9, [%[b], 24]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x22, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x22, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "adcs x4, x4, x8\n\t"
+ "str x6, [%[r], 112]\n\t"
+ "adc x5, x5, xzr\n\t"
+ "# A[4] * B[11]\n\t"
+ "ldr x9, [%[b], 88]\n\t"
+ "mul x7, x14, x9\n\t"
+ "umulh x8, x14, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[5] * B[10]\n\t"
+ "ldr x9, [%[b], 80]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x15, x9\n\t"
+ "adc x6, xzr, xzr\n\t"
+ "umulh x8, x15, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[6] * B[9]\n\t"
+ "ldr x9, [%[b], 72]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x16, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x16, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[7] * B[8]\n\t"
+ "ldr x9, [%[b], 64]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x17, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x17, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[8] * B[7]\n\t"
+ "ldr x9, [%[b], 56]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x19, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x19, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[9] * B[6]\n\t"
+ "ldr x9, [%[b], 48]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x20, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x20, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[10] * B[5]\n\t"
+ "ldr x9, [%[b], 40]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x21, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x21, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[11] * B[4]\n\t"
+ "ldr x9, [%[b], 32]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x22, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x22, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "adcs x5, x5, x8\n\t"
+ "str x4, [%[r], 120]\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# A[5] * B[11]\n\t"
+ "ldr x9, [%[b], 88]\n\t"
+ "mul x7, x15, x9\n\t"
+ "umulh x8, x15, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[6] * B[10]\n\t"
+ "ldr x9, [%[b], 80]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x16, x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "umulh x8, x16, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[7] * B[9]\n\t"
+ "ldr x9, [%[b], 72]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x17, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x17, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[8] * B[8]\n\t"
+ "ldr x9, [%[b], 64]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x19, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x19, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[9] * B[7]\n\t"
+ "ldr x9, [%[b], 56]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x20, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x20, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[10] * B[6]\n\t"
+ "ldr x9, [%[b], 48]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x21, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x21, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[11] * B[5]\n\t"
+ "ldr x9, [%[b], 40]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x22, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x22, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "adcs x6, x6, x8\n\t"
+ "str x5, [%[r], 128]\n\t"
+ "adc x4, x4, xzr\n\t"
+ "# A[6] * B[11]\n\t"
+ "ldr x9, [%[b], 88]\n\t"
+ "mul x7, x16, x9\n\t"
+ "umulh x8, x16, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[7] * B[10]\n\t"
+ "ldr x9, [%[b], 80]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x17, x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "umulh x8, x17, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[8] * B[9]\n\t"
+ "ldr x9, [%[b], 72]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x19, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x19, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[9] * B[8]\n\t"
+ "ldr x9, [%[b], 64]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x20, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x20, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[10] * B[7]\n\t"
+ "ldr x9, [%[b], 56]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x21, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x21, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[11] * B[6]\n\t"
+ "ldr x9, [%[b], 48]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x22, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x22, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "adcs x4, x4, x8\n\t"
+ "str x6, [%[r], 136]\n\t"
+ "adc x5, x5, xzr\n\t"
+ "# A[7] * B[11]\n\t"
+ "ldr x9, [%[b], 88]\n\t"
+ "mul x7, x17, x9\n\t"
+ "umulh x8, x17, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[8] * B[10]\n\t"
+ "ldr x9, [%[b], 80]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x19, x9\n\t"
+ "adc x6, xzr, xzr\n\t"
+ "umulh x8, x19, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[9] * B[9]\n\t"
+ "ldr x9, [%[b], 72]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x20, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x20, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[10] * B[8]\n\t"
+ "ldr x9, [%[b], 64]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x21, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x21, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[11] * B[7]\n\t"
+ "ldr x9, [%[b], 56]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x22, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x22, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "adcs x5, x5, x8\n\t"
+ "str x4, [%[r], 144]\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# A[8] * B[11]\n\t"
+ "ldr x9, [%[b], 88]\n\t"
+ "mul x7, x19, x9\n\t"
+ "umulh x8, x19, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[9] * B[10]\n\t"
+ "ldr x9, [%[b], 80]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x20, x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "umulh x8, x20, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[10] * B[9]\n\t"
+ "ldr x9, [%[b], 72]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x21, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x21, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[11] * B[8]\n\t"
+ "ldr x9, [%[b], 64]\n\t"
+ "adcs x6, x6, x8\n\t"
+ "mul x7, x22, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "umulh x8, x22, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "adcs x6, x6, x8\n\t"
+ "str x5, [%[r], 152]\n\t"
+ "adc x4, x4, xzr\n\t"
+ "# A[9] * B[11]\n\t"
+ "ldr x9, [%[b], 88]\n\t"
+ "mul x7, x20, x9\n\t"
+ "umulh x8, x20, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[10] * B[10]\n\t"
+ "ldr x9, [%[b], 80]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x21, x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "umulh x8, x21, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[11] * B[9]\n\t"
+ "ldr x9, [%[b], 72]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "mul x7, x22, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x22, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "adcs x4, x4, x8\n\t"
+ "str x6, [%[r], 160]\n\t"
+ "adc x5, x5, xzr\n\t"
+ "# A[10] * B[11]\n\t"
+ "ldr x9, [%[b], 88]\n\t"
+ "mul x7, x21, x9\n\t"
+ "umulh x8, x21, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[11] * B[10]\n\t"
+ "ldr x9, [%[b], 80]\n\t"
+ "adcs x5, x5, x8\n\t"
+ "mul x7, x22, x9\n\t"
+ "adc x6, xzr, xzr\n\t"
+ "umulh x8, x22, x9\n\t"
+ "adds x4, x4, x7\n\t"
+ "adcs x5, x5, x8\n\t"
+ "str x4, [%[r], 168]\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# A[11] * B[11]\n\t"
+ "ldr x9, [%[b], 88]\n\t"
+ "mul x7, x22, x9\n\t"
+ "umulh x8, x22, x9\n\t"
+ "adds x5, x5, x7\n\t"
+ "adc x6, x6, x8\n\t"
+ "stp x5, x6, [%[r], 176]\n\t"
+ "ldp x10, x11, [%[tmp], 0]\n\t"
+ "ldp x12, x13, [%[tmp], 16]\n\t"
+ "ldp x14, x15, [%[tmp], 32]\n\t"
+ "ldp x16, x17, [%[tmp], 48]\n\t"
+ "ldp x19, x20, [%[tmp], 64]\n\t"
+ "ldp x21, x22, [%[tmp], 80]\n\t"
+ "stp x10, x11, [%[r], 0]\n\t"
+ "stp x12, x13, [%[r], 16]\n\t"
+ "stp x14, x15, [%[r], 32]\n\t"
+ "stp x16, x17, [%[r], 48]\n\t"
+ "stp x19, x20, [%[r], 64]\n\t"
+ "stp x21, x22, [%[r], 80]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
+ : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22"
+ );
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "ldp x10, x11, [%[a], 0]\n\t"
+ "ldp x12, x13, [%[a], 16]\n\t"
+ "ldp x14, x15, [%[a], 32]\n\t"
+ "ldp x16, x17, [%[a], 48]\n\t"
+ "ldp x19, x20, [%[a], 64]\n\t"
+ "ldp x21, x22, [%[a], 80]\n\t"
+ "# A[0] * A[0]\n\t"
+ "mul x2, x10, x10\n\t"
+ "umulh x3, x10, x10\n\t"
+ "str x2, [%[r]]\n\t"
+ "mov x4, 0\n\t"
+ "# A[0] * A[1]\n\t"
+ "mul x8, x10, x11\n\t"
+ "umulh x9, x10, x11\n\t"
+ "adds x3, x3, x8\n\t"
+ "adcs x4, x4, x9\n\t"
+ "adc x2, xzr, xzr\n\t"
+ "adds x3, x3, x8\n\t"
+ "str x3, [%[r], 8]\n\t"
+ "# A[0] * A[2]\n\t"
+ "mul x8, x10, x12\n\t"
+ "adcs x4, x4, x9\n\t"
+ "umulh x9, x10, x12\n\t"
+ "adc x2, x2, xzr\n\t"
+ "adds x4, x4, x8\n\t"
+ "adcs x2, x2, x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x4, x4, x8\n\t"
+ "# A[1] * A[1]\n\t"
+ "mul x8, x11, x11\n\t"
+ "adcs x2, x2, x9\n\t"
+ "umulh x9, x11, x11\n\t"
+ "adc x3, x3, xzr\n\t"
+ "adds x4, x4, x8\n\t"
+ "str x4, [%[r], 16]\n\t"
+ "# A[0] * A[3]\n\t"
+ "mul x8, x10, x13\n\t"
+ "adcs x2, x2, x9\n\t"
+ "umulh x9, x10, x13\n\t"
+ "adc x3, x3, xzr\n\t"
+ "adds x2, x2, x8\n\t"
+ "adcs x3, x3, x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x2, x2, x8\n\t"
+ "# A[1] * A[2]\n\t"
+ "mul x8, x11, x12\n\t"
+ "adcs x3, x3, x9\n\t"
+ "umulh x9, x11, x12\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x2, x2, x8\n\t"
+ "adcs x3, x3, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x2, x2, x8\n\t"
+ "str x2, [%[r], 24]\n\t"
+ "# A[0] * A[4]\n\t"
+ "mul x8, x10, x14\n\t"
+ "adcs x3, x3, x9\n\t"
+ "umulh x9, x10, x14\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x3, x3, x8\n\t"
+ "adcs x4, x4, x9\n\t"
+ "adc x2, xzr, xzr\n\t"
+ "adds x3, x3, x8\n\t"
+ "# A[1] * A[3]\n\t"
+ "mul x8, x11, x13\n\t"
+ "adcs x4, x4, x9\n\t"
+ "umulh x9, x11, x13\n\t"
+ "adc x2, x2, xzr\n\t"
+ "adds x3, x3, x8\n\t"
+ "adcs x4, x4, x9\n\t"
+ "adc x2, x2, xzr\n\t"
+ "adds x3, x3, x8\n\t"
+ "# A[2] * A[2]\n\t"
+ "mul x8, x12, x12\n\t"
+ "adcs x4, x4, x9\n\t"
+ "umulh x9, x12, x12\n\t"
+ "adc x2, x2, xzr\n\t"
+ "adds x3, x3, x8\n\t"
+ "str x3, [%[r], 32]\n\t"
+ "# A[0] * A[5]\n\t"
+ "mul x5, x10, x15\n\t"
+ "adcs x4, x4, x9\n\t"
+ "umulh x6, x10, x15\n\t"
+ "adc x2, x2, xzr\n\t"
+ "mov x3, 0\n\t"
+ "mov x7, 0\n\t"
+ "# A[1] * A[4]\n\t"
+ "mul x8, x11, x14\n\t"
+ "umulh x9, x11, x14\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[2] * A[3]\n\t"
+ "mul x8, x12, x13\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x12, x13\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "adcs x6, x6, x9\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x5\n\t"
+ "adcs x6, x6, x6\n\t"
+ "adc x7, x7, x7\n\t"
+ "adds x4, x4, x5\n\t"
+ "adcs x2, x2, x6\n\t"
+ "adc x3, x3, x7\n\t"
+ "str x4, [%[r], 40]\n\t"
+ "# A[0] * A[6]\n\t"
+ "mul x5, x10, x16\n\t"
+ "umulh x6, x10, x16\n\t"
+ "mov x4, 0\n\t"
+ "mov x7, 0\n\t"
+ "# A[1] * A[5]\n\t"
+ "mul x8, x11, x15\n\t"
+ "umulh x9, x11, x15\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[2] * A[4]\n\t"
+ "mul x8, x12, x14\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x12, x14\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[3] * A[3]\n\t"
+ "mul x8, x13, x13\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x13, x13\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x5\n\t"
+ "adcs x6, x6, x6\n\t"
+ "adc x7, x7, x7\n\t"
+ "adds x5, x5, x8\n\t"
+ "adcs x6, x6, x9\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x2, x2, x5\n\t"
+ "adcs x3, x3, x6\n\t"
+ "adc x4, x4, x7\n\t"
+ "str x2, [%[r], 48]\n\t"
+ "# A[0] * A[7]\n\t"
+ "mul x5, x10, x17\n\t"
+ "umulh x6, x10, x17\n\t"
+ "mov x2, 0\n\t"
+ "mov x7, 0\n\t"
+ "# A[1] * A[6]\n\t"
+ "mul x8, x11, x16\n\t"
+ "umulh x9, x11, x16\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[2] * A[5]\n\t"
+ "mul x8, x12, x15\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x12, x15\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[3] * A[4]\n\t"
+ "mul x8, x13, x14\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x13, x14\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "adcs x6, x6, x9\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x5\n\t"
+ "adcs x6, x6, x6\n\t"
+ "adc x7, x7, x7\n\t"
+ "adds x3, x3, x5\n\t"
+ "adcs x4, x4, x6\n\t"
+ "adc x2, x2, x7\n\t"
+ "str x3, [%[r], 56]\n\t"
+ "# A[0] * A[8]\n\t"
+ "mul x5, x10, x19\n\t"
+ "umulh x6, x10, x19\n\t"
+ "mov x3, 0\n\t"
+ "mov x7, 0\n\t"
+ "# A[1] * A[7]\n\t"
+ "mul x8, x11, x17\n\t"
+ "umulh x9, x11, x17\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[2] * A[6]\n\t"
+ "mul x8, x12, x16\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x12, x16\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[3] * A[5]\n\t"
+ "mul x8, x13, x15\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x13, x15\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[4] * A[4]\n\t"
+ "mul x8, x14, x14\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x14, x14\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x5\n\t"
+ "adcs x6, x6, x6\n\t"
+ "adc x7, x7, x7\n\t"
+ "adds x5, x5, x8\n\t"
+ "adcs x6, x6, x9\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x4, x4, x5\n\t"
+ "adcs x2, x2, x6\n\t"
+ "adc x3, x3, x7\n\t"
+ "str x4, [%[r], 64]\n\t"
+ "# A[0] * A[9]\n\t"
+ "mul x5, x10, x20\n\t"
+ "umulh x6, x10, x20\n\t"
+ "mov x4, 0\n\t"
+ "mov x7, 0\n\t"
+ "# A[1] * A[8]\n\t"
+ "mul x8, x11, x19\n\t"
+ "umulh x9, x11, x19\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[2] * A[7]\n\t"
+ "mul x8, x12, x17\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x12, x17\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[3] * A[6]\n\t"
+ "mul x8, x13, x16\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x13, x16\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[4] * A[5]\n\t"
+ "mul x8, x14, x15\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x14, x15\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "adcs x6, x6, x9\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x5\n\t"
+ "adcs x6, x6, x6\n\t"
+ "adc x7, x7, x7\n\t"
+ "adds x2, x2, x5\n\t"
+ "adcs x3, x3, x6\n\t"
+ "adc x4, x4, x7\n\t"
+ "str x2, [%[r], 72]\n\t"
+ "# A[0] * A[10]\n\t"
+ "mul x5, x10, x21\n\t"
+ "umulh x6, x10, x21\n\t"
+ "mov x2, 0\n\t"
+ "mov x7, 0\n\t"
+ "# A[1] * A[9]\n\t"
+ "mul x8, x11, x20\n\t"
+ "umulh x9, x11, x20\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[2] * A[8]\n\t"
+ "mul x8, x12, x19\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x12, x19\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[3] * A[7]\n\t"
+ "mul x8, x13, x17\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x13, x17\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[4] * A[6]\n\t"
+ "mul x8, x14, x16\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x14, x16\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[5] * A[5]\n\t"
+ "mul x8, x15, x15\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x15, x15\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x5\n\t"
+ "adcs x6, x6, x6\n\t"
+ "adc x7, x7, x7\n\t"
+ "adds x5, x5, x8\n\t"
+ "adcs x6, x6, x9\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x3, x3, x5\n\t"
+ "adcs x4, x4, x6\n\t"
+ "adc x2, x2, x7\n\t"
+ "str x3, [%[r], 80]\n\t"
+ "# A[0] * A[11]\n\t"
+ "mul x5, x10, x22\n\t"
+ "umulh x6, x10, x22\n\t"
+ "mov x3, 0\n\t"
+ "mov x7, 0\n\t"
+ "# A[1] * A[10]\n\t"
+ "mul x8, x11, x21\n\t"
+ "umulh x9, x11, x21\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[2] * A[9]\n\t"
+ "mul x8, x12, x20\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x12, x20\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[3] * A[8]\n\t"
+ "mul x8, x13, x19\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x13, x19\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[4] * A[7]\n\t"
+ "mul x8, x14, x17\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x14, x17\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[5] * A[6]\n\t"
+ "mul x8, x15, x16\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x15, x16\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "adcs x6, x6, x9\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x5\n\t"
+ "adcs x6, x6, x6\n\t"
+ "adc x7, x7, x7\n\t"
+ "adds x4, x4, x5\n\t"
+ "adcs x2, x2, x6\n\t"
+ "adc x3, x3, x7\n\t"
+ "str x4, [%[r], 88]\n\t"
+ "# A[1] * A[11]\n\t"
+ "mul x5, x11, x22\n\t"
+ "umulh x6, x11, x22\n\t"
+ "mov x4, 0\n\t"
+ "mov x7, 0\n\t"
+ "# A[2] * A[10]\n\t"
+ "mul x8, x12, x21\n\t"
+ "umulh x9, x12, x21\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[3] * A[9]\n\t"
+ "mul x8, x13, x20\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x13, x20\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[4] * A[8]\n\t"
+ "mul x8, x14, x19\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x14, x19\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[5] * A[7]\n\t"
+ "mul x8, x15, x17\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x15, x17\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[6] * A[6]\n\t"
+ "mul x8, x16, x16\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x16, x16\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x5\n\t"
+ "adcs x6, x6, x6\n\t"
+ "adc x7, x7, x7\n\t"
+ "adds x5, x5, x8\n\t"
+ "adcs x6, x6, x9\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x2, x2, x5\n\t"
+ "adcs x3, x3, x6\n\t"
+ "adc x4, x4, x7\n\t"
+ "str x2, [%[r], 96]\n\t"
+ "# A[2] * A[11]\n\t"
+ "mul x5, x12, x22\n\t"
+ "umulh x6, x12, x22\n\t"
+ "mov x2, 0\n\t"
+ "mov x7, 0\n\t"
+ "# A[3] * A[10]\n\t"
+ "mul x8, x13, x21\n\t"
+ "umulh x9, x13, x21\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[4] * A[9]\n\t"
+ "mul x8, x14, x20\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x14, x20\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[5] * A[8]\n\t"
+ "mul x8, x15, x19\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x15, x19\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[6] * A[7]\n\t"
+ "mul x8, x16, x17\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x16, x17\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "adcs x6, x6, x9\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x5\n\t"
+ "adcs x6, x6, x6\n\t"
+ "adc x7, x7, x7\n\t"
+ "adds x3, x3, x5\n\t"
+ "adcs x4, x4, x6\n\t"
+ "adc x2, x2, x7\n\t"
+ "str x3, [%[r], 104]\n\t"
+ "# A[3] * A[11]\n\t"
+ "mul x5, x13, x22\n\t"
+ "umulh x6, x13, x22\n\t"
+ "mov x3, 0\n\t"
+ "mov x7, 0\n\t"
+ "# A[4] * A[10]\n\t"
+ "mul x8, x14, x21\n\t"
+ "umulh x9, x14, x21\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[5] * A[9]\n\t"
+ "mul x8, x15, x20\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x15, x20\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[6] * A[8]\n\t"
+ "mul x8, x16, x19\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x16, x19\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[7] * A[7]\n\t"
+ "mul x8, x17, x17\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x17, x17\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x5\n\t"
+ "adcs x6, x6, x6\n\t"
+ "adc x7, x7, x7\n\t"
+ "adds x5, x5, x8\n\t"
+ "adcs x6, x6, x9\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x4, x4, x5\n\t"
+ "adcs x2, x2, x6\n\t"
+ "adc x3, x3, x7\n\t"
+ "str x4, [%[r], 112]\n\t"
+ "# A[4] * A[11]\n\t"
+ "mul x5, x14, x22\n\t"
+ "umulh x6, x14, x22\n\t"
+ "mov x4, 0\n\t"
+ "mov x7, 0\n\t"
+ "# A[5] * A[10]\n\t"
+ "mul x8, x15, x21\n\t"
+ "umulh x9, x15, x21\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[6] * A[9]\n\t"
+ "mul x8, x16, x20\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x16, x20\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[7] * A[8]\n\t"
+ "mul x8, x17, x19\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x17, x19\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "adcs x6, x6, x9\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x5\n\t"
+ "adcs x6, x6, x6\n\t"
+ "adc x7, x7, x7\n\t"
+ "adds x2, x2, x5\n\t"
+ "adcs x3, x3, x6\n\t"
+ "adc x4, x4, x7\n\t"
+ "str x2, [%[r], 120]\n\t"
+ "# A[5] * A[11]\n\t"
+ "mul x5, x15, x22\n\t"
+ "umulh x6, x15, x22\n\t"
+ "mov x2, 0\n\t"
+ "mov x7, 0\n\t"
+ "# A[6] * A[10]\n\t"
+ "mul x8, x16, x21\n\t"
+ "umulh x9, x16, x21\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[7] * A[9]\n\t"
+ "mul x8, x17, x20\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x17, x20\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[8] * A[8]\n\t"
+ "mul x8, x19, x19\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x19, x19\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x5\n\t"
+ "adcs x6, x6, x6\n\t"
+ "adc x7, x7, x7\n\t"
+ "adds x5, x5, x8\n\t"
+ "adcs x6, x6, x9\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x3, x3, x5\n\t"
+ "adcs x4, x4, x6\n\t"
+ "adc x2, x2, x7\n\t"
+ "str x3, [%[r], 128]\n\t"
+ "# A[6] * A[11]\n\t"
+ "mul x5, x16, x22\n\t"
+ "umulh x6, x16, x22\n\t"
+ "mov x3, 0\n\t"
+ "mov x7, 0\n\t"
+ "# A[7] * A[10]\n\t"
+ "mul x8, x17, x21\n\t"
+ "umulh x9, x17, x21\n\t"
+ "adds x5, x5, x8\n\t"
+ "# A[8] * A[9]\n\t"
+ "mul x8, x19, x20\n\t"
+ "adcs x6, x6, x9\n\t"
+ "umulh x9, x19, x20\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x8\n\t"
+ "adcs x6, x6, x9\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x5, x5, x5\n\t"
+ "adcs x6, x6, x6\n\t"
+ "adc x7, x7, x7\n\t"
+ "adds x4, x4, x5\n\t"
+ "adcs x2, x2, x6\n\t"
+ "adc x3, x3, x7\n\t"
+ "str x4, [%[r], 136]\n\t"
+ "# A[7] * A[11]\n\t"
+ "mul x8, x17, x22\n\t"
+ "umulh x9, x17, x22\n\t"
+ "adds x2, x2, x8\n\t"
+ "adcs x3, x3, x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x2, x2, x8\n\t"
+ "# A[8] * A[10]\n\t"
+ "mul x8, x19, x21\n\t"
+ "adcs x3, x3, x9\n\t"
+ "umulh x9, x19, x21\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x2, x2, x8\n\t"
+ "adcs x3, x3, x9\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x2, x2, x8\n\t"
+ "# A[9] * A[9]\n\t"
+ "mul x8, x20, x20\n\t"
+ "adcs x3, x3, x9\n\t"
+ "umulh x9, x20, x20\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x2, x2, x8\n\t"
+ "str x2, [%[r], 144]\n\t"
+ "# A[8] * A[11]\n\t"
+ "mul x8, x19, x22\n\t"
+ "adcs x3, x3, x9\n\t"
+ "umulh x9, x19, x22\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x3, x3, x8\n\t"
+ "adcs x4, x4, x9\n\t"
+ "adc x2, xzr, xzr\n\t"
+ "adds x3, x3, x8\n\t"
+ "# A[9] * A[10]\n\t"
+ "mul x8, x20, x21\n\t"
+ "adcs x4, x4, x9\n\t"
+ "umulh x9, x20, x21\n\t"
+ "adc x2, x2, xzr\n\t"
+ "adds x3, x3, x8\n\t"
+ "adcs x4, x4, x9\n\t"
+ "adc x2, x2, xzr\n\t"
+ "adds x3, x3, x8\n\t"
+ "str x3, [%[r], 152]\n\t"
+ "# A[9] * A[11]\n\t"
+ "mul x8, x20, x22\n\t"
+ "adcs x4, x4, x9\n\t"
+ "umulh x9, x20, x22\n\t"
+ "adc x2, x2, xzr\n\t"
+ "adds x4, x4, x8\n\t"
+ "adcs x2, x2, x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x4, x4, x8\n\t"
+ "# A[10] * A[10]\n\t"
+ "mul x8, x21, x21\n\t"
+ "adcs x2, x2, x9\n\t"
+ "umulh x9, x21, x21\n\t"
+ "adc x3, x3, xzr\n\t"
+ "adds x4, x4, x8\n\t"
+ "str x4, [%[r], 160]\n\t"
+ "# A[10] * A[11]\n\t"
+ "mul x8, x21, x22\n\t"
+ "adcs x2, x2, x9\n\t"
+ "umulh x9, x21, x22\n\t"
+ "adc x3, x3, xzr\n\t"
+ "adds x2, x2, x8\n\t"
+ "adcs x3, x3, x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x2, x2, x8\n\t"
+ "str x2, [%[r], 168]\n\t"
+ "# A[11] * A[11]\n\t"
+ "mul x8, x22, x22\n\t"
+ "adcs x3, x3, x9\n\t"
+ "umulh x9, x22, x22\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x3, x3, x8\n\t"
+ "adc x4, x4, x9\n\t"
+ "stp x3, x4, [%[r], 176]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "x2", "x3", "x4", "x8", "x9", "x10", "x5", "x6", "x7", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22"
+ );
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[b], 0]\n\t"
+ "adds x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 16]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "ldp x7, x8, [%[b], 32]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 48]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ "ldp x3, x4, [%[a], 64]\n\t"
+ "ldp x7, x8, [%[b], 64]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 80]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 80]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 64]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 80]\n\t"
+ "cset %[r], cs\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+ return (sp_digit)r;
+}
+
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x2, x3, [%[a], 0]\n\t"
+ "ldp x6, x7, [%[b], 0]\n\t"
+ "subs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 16]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 16]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 0]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 16]\n\t"
+ "ldp x2, x3, [%[a], 32]\n\t"
+ "ldp x6, x7, [%[b], 32]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 48]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 48]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 32]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 48]\n\t"
+ "ldp x2, x3, [%[a], 64]\n\t"
+ "ldp x6, x7, [%[b], 64]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 80]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 80]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 64]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 80]\n\t"
+ "ldp x2, x3, [%[a], 96]\n\t"
+ "ldp x6, x7, [%[b], 96]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 112]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 112]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 96]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 112]\n\t"
+ "ldp x2, x3, [%[a], 128]\n\t"
+ "ldp x6, x7, [%[b], 128]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 144]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 144]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 128]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 144]\n\t"
+ "ldp x2, x3, [%[a], 160]\n\t"
+ "ldp x6, x7, [%[b], 160]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 176]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 176]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 160]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 176]\n\t"
+ "csetm %[a], cc\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+
+ return (sp_digit)a;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[b], 0]\n\t"
+ "adds x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 16]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "ldp x7, x8, [%[b], 32]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 48]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ "ldp x3, x4, [%[a], 64]\n\t"
+ "ldp x7, x8, [%[b], 64]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 80]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 80]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 64]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 80]\n\t"
+ "ldp x3, x4, [%[a], 96]\n\t"
+ "ldp x7, x8, [%[b], 96]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 112]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 112]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 96]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 112]\n\t"
+ "ldp x3, x4, [%[a], 128]\n\t"
+ "ldp x7, x8, [%[b], 128]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 144]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 144]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 128]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 144]\n\t"
+ "ldp x3, x4, [%[a], 160]\n\t"
+ "ldp x7, x8, [%[b], 160]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 176]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 176]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 160]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 176]\n\t"
+ "cset %[r], cs\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+ return (sp_digit)r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<12; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ r[0] = a[0] & m;
+ r[1] = a[1] & m;
+ r[2] = a[2] & m;
+ r[3] = a[3] & m;
+ r[4] = a[4] & m;
+ r[5] = a[5] & m;
+ r[6] = a[6] & m;
+ r[7] = a[7] & m;
+ r[8] = a[8] & m;
+ r[9] = a[9] & m;
+ r[10] = a[10] & m;
+ r[11] = a[11] & m;
+#endif
+}
+
+/* Add digit to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_3072_add_zero_12(sp_digit* r, const sp_digit* a,
+ const sp_digit d)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "adds x3, x3, %[d]\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "adcs x3, x3, xzr\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ "ldp x3, x4, [%[a], 64]\n\t"
+ "ldp x5, x6, [%[a], 80]\n\t"
+ "adcs x3, x3, xzr\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 64]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 80]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [d] "r" (d)
+ : "memory", "x3", "x4", "x5", "x6"
+ );
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[24];
+ sp_digit a1[12];
+ sp_digit b1[12];
+ sp_digit z2[24];
+ sp_digit u, ca, cb;
+
+ ca = sp_3072_add_12(a1, a, &a[12]);
+ cb = sp_3072_add_12(b1, b, &b[12]);
+ u = ca & cb;
+ sp_3072_mul_12(z1, a1, b1);
+ sp_3072_mul_12(z2, &a[12], &b[12]);
+ sp_3072_mul_12(z0, a, b);
+ sp_3072_mask_12(r + 24, a1, 0 - cb);
+ sp_3072_mask_12(b1, b1, 0 - ca);
+ u += sp_3072_add_12(r + 24, r + 24, b1);
+ u += sp_3072_sub_in_place_24(z1, z2);
+ u += sp_3072_sub_in_place_24(z1, z0);
+ u += sp_3072_add_24(r + 12, r + 12, z1);
+ u += sp_3072_add_12(r + 24, r + 24, z2);
+ sp_3072_add_zero_12(r + 36, z2 + 12, u);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Double a into r. (r = a + a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static sp_digit sp_3072_dbl_12(sp_digit* r, const sp_digit* a)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add x11, %[a], 96\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldp x3, x4, [%[a]], #16\n\t"
+ "ldp x5, x6, [%[a]], #16\n\t"
+ "adcs x3, x3, x3\n\t"
+ "adcs x4, x4, x4\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r]], #16\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r]], #16\n\t"
+ "cset %[c], cs\n\t"
+ "cmp %[a], x11\n\t"
+ "b.ne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a)
+ :
+ : "memory", "x3", "x4", "x5", "x6", "x11"
+ );
+
+ return c;
+}
+
+#else
+/* Double a into r. (r = a + a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static sp_digit sp_3072_dbl_12(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "adds x3, x3, x3\n\t"
+ "ldr x5, [%[a], 16]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 24]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "adcs x3, x3, x3\n\t"
+ "ldr x5, [%[a], 48]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 56]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ "ldp x3, x4, [%[a], 64]\n\t"
+ "adcs x3, x3, x3\n\t"
+ "ldr x5, [%[a], 80]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 88]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 64]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 80]\n\t"
+ "cset %[r], cs\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a)
+ : "memory", "x3", "x4", "x5", "x6"
+ );
+
+ return (sp_digit)r;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[24];
+ sp_digit z1[24];
+ sp_digit a1[12];
+ sp_digit u;
+
+ u = sp_3072_add_12(a1, a, &a[12]);
+ sp_3072_sqr_12(z1, a1);
+ sp_3072_sqr_12(z2, &a[12]);
+ sp_3072_sqr_12(z0, a);
+ sp_3072_mask_12(r + 24, a1, 0 - u);
+ u += sp_3072_dbl_12(r + 24, r + 24);
+ u += sp_3072_sub_in_place_24(z1, z2);
+ u += sp_3072_sub_in_place_24(z1, z0);
+ u += sp_3072_add_24(r + 12, r + 12, z1);
+ u += sp_3072_add_12(r + 24, r + 24, z2);
+ sp_3072_add_zero_12(r + 36, z2 + 12, u);
+
+}
+
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x2, x3, [%[a], 0]\n\t"
+ "ldp x6, x7, [%[b], 0]\n\t"
+ "subs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 16]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 16]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 0]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 16]\n\t"
+ "ldp x2, x3, [%[a], 32]\n\t"
+ "ldp x6, x7, [%[b], 32]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 48]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 48]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 32]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 48]\n\t"
+ "ldp x2, x3, [%[a], 64]\n\t"
+ "ldp x6, x7, [%[b], 64]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 80]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 80]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 64]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 80]\n\t"
+ "ldp x2, x3, [%[a], 96]\n\t"
+ "ldp x6, x7, [%[b], 96]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 112]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 112]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 96]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 112]\n\t"
+ "ldp x2, x3, [%[a], 128]\n\t"
+ "ldp x6, x7, [%[b], 128]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 144]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 144]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 128]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 144]\n\t"
+ "ldp x2, x3, [%[a], 160]\n\t"
+ "ldp x6, x7, [%[b], 160]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 176]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 176]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 160]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 176]\n\t"
+ "ldp x2, x3, [%[a], 192]\n\t"
+ "ldp x6, x7, [%[b], 192]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 208]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 208]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 192]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 208]\n\t"
+ "ldp x2, x3, [%[a], 224]\n\t"
+ "ldp x6, x7, [%[b], 224]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 240]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 240]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 224]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 240]\n\t"
+ "ldp x2, x3, [%[a], 256]\n\t"
+ "ldp x6, x7, [%[b], 256]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 272]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 272]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 256]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 272]\n\t"
+ "ldp x2, x3, [%[a], 288]\n\t"
+ "ldp x6, x7, [%[b], 288]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 304]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 304]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 288]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 304]\n\t"
+ "ldp x2, x3, [%[a], 320]\n\t"
+ "ldp x6, x7, [%[b], 320]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 336]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 336]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 320]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 336]\n\t"
+ "ldp x2, x3, [%[a], 352]\n\t"
+ "ldp x6, x7, [%[b], 352]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 368]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 368]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 352]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 368]\n\t"
+ "csetm %[a], cc\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+
+ return (sp_digit)a;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[b], 0]\n\t"
+ "adds x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 16]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "ldp x7, x8, [%[b], 32]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 48]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ "ldp x3, x4, [%[a], 64]\n\t"
+ "ldp x7, x8, [%[b], 64]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 80]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 80]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 64]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 80]\n\t"
+ "ldp x3, x4, [%[a], 96]\n\t"
+ "ldp x7, x8, [%[b], 96]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 112]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 112]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 96]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 112]\n\t"
+ "ldp x3, x4, [%[a], 128]\n\t"
+ "ldp x7, x8, [%[b], 128]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 144]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 144]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 128]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 144]\n\t"
+ "ldp x3, x4, [%[a], 160]\n\t"
+ "ldp x7, x8, [%[b], 160]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 176]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 176]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 160]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 176]\n\t"
+ "ldp x3, x4, [%[a], 192]\n\t"
+ "ldp x7, x8, [%[b], 192]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 208]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 208]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 192]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 208]\n\t"
+ "ldp x3, x4, [%[a], 224]\n\t"
+ "ldp x7, x8, [%[b], 224]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 240]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 240]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 224]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 240]\n\t"
+ "ldp x3, x4, [%[a], 256]\n\t"
+ "ldp x7, x8, [%[b], 256]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 272]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 272]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 256]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 272]\n\t"
+ "ldp x3, x4, [%[a], 288]\n\t"
+ "ldp x7, x8, [%[b], 288]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 304]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 304]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 288]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 304]\n\t"
+ "ldp x3, x4, [%[a], 320]\n\t"
+ "ldp x7, x8, [%[b], 320]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 336]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 336]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 320]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 336]\n\t"
+ "ldp x3, x4, [%[a], 352]\n\t"
+ "ldp x7, x8, [%[b], 352]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 368]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 368]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 352]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 368]\n\t"
+ "cset %[r], cs\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+ return (sp_digit)r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<24; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 24; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Add digit to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_3072_add_zero_24(sp_digit* r, const sp_digit* a,
+ const sp_digit d)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "adds x3, x3, %[d]\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "adcs x3, x3, xzr\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ "ldp x3, x4, [%[a], 64]\n\t"
+ "ldp x5, x6, [%[a], 80]\n\t"
+ "adcs x3, x3, xzr\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 64]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 80]\n\t"
+ "ldp x3, x4, [%[a], 96]\n\t"
+ "ldp x5, x6, [%[a], 112]\n\t"
+ "adcs x3, x3, xzr\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 96]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 112]\n\t"
+ "ldp x3, x4, [%[a], 128]\n\t"
+ "ldp x5, x6, [%[a], 144]\n\t"
+ "adcs x3, x3, xzr\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 128]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 144]\n\t"
+ "ldp x3, x4, [%[a], 160]\n\t"
+ "ldp x5, x6, [%[a], 176]\n\t"
+ "adcs x3, x3, xzr\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 160]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 176]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [d] "r" (d)
+ : "memory", "x3", "x4", "x5", "x6"
+ );
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[48];
+ sp_digit a1[24];
+ sp_digit b1[24];
+ sp_digit z2[48];
+ sp_digit u, ca, cb;
+
+ ca = sp_3072_add_24(a1, a, &a[24]);
+ cb = sp_3072_add_24(b1, b, &b[24]);
+ u = ca & cb;
+ sp_3072_mul_24(z1, a1, b1);
+ sp_3072_mul_24(z2, &a[24], &b[24]);
+ sp_3072_mul_24(z0, a, b);
+ sp_3072_mask_24(r + 48, a1, 0 - cb);
+ sp_3072_mask_24(b1, b1, 0 - ca);
+ u += sp_3072_add_24(r + 48, r + 48, b1);
+ u += sp_3072_sub_in_place_48(z1, z2);
+ u += sp_3072_sub_in_place_48(z1, z0);
+ u += sp_3072_add_48(r + 24, r + 24, z1);
+ u += sp_3072_add_24(r + 48, r + 48, z2);
+ sp_3072_add_zero_24(r + 72, z2 + 24, u);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Double a into r. (r = a + a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static sp_digit sp_3072_dbl_24(sp_digit* r, const sp_digit* a)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add x11, %[a], 192\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldp x3, x4, [%[a]], #16\n\t"
+ "ldp x5, x6, [%[a]], #16\n\t"
+ "adcs x3, x3, x3\n\t"
+ "adcs x4, x4, x4\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r]], #16\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r]], #16\n\t"
+ "cset %[c], cs\n\t"
+ "cmp %[a], x11\n\t"
+ "b.ne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a)
+ :
+ : "memory", "x3", "x4", "x5", "x6", "x11"
+ );
+
+ return c;
+}
+
+#else
+/* Double a into r. (r = a + a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static sp_digit sp_3072_dbl_24(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "adds x3, x3, x3\n\t"
+ "ldr x5, [%[a], 16]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 24]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "adcs x3, x3, x3\n\t"
+ "ldr x5, [%[a], 48]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 56]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ "ldp x3, x4, [%[a], 64]\n\t"
+ "adcs x3, x3, x3\n\t"
+ "ldr x5, [%[a], 80]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 88]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 64]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 80]\n\t"
+ "ldp x3, x4, [%[a], 96]\n\t"
+ "adcs x3, x3, x3\n\t"
+ "ldr x5, [%[a], 112]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 120]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 96]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 112]\n\t"
+ "ldp x3, x4, [%[a], 128]\n\t"
+ "adcs x3, x3, x3\n\t"
+ "ldr x5, [%[a], 144]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 152]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 128]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 144]\n\t"
+ "ldp x3, x4, [%[a], 160]\n\t"
+ "adcs x3, x3, x3\n\t"
+ "ldr x5, [%[a], 176]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 184]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 160]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 176]\n\t"
+ "cset %[r], cs\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a)
+ : "memory", "x3", "x4", "x5", "x6"
+ );
+
+ return (sp_digit)r;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[48];
+ sp_digit z1[48];
+ sp_digit a1[24];
+ sp_digit u;
+
+ u = sp_3072_add_24(a1, a, &a[24]);
+ sp_3072_sqr_24(z1, a1);
+ sp_3072_sqr_24(z2, &a[24]);
+ sp_3072_sqr_24(z0, a);
+ sp_3072_mask_24(r + 48, a1, 0 - u);
+ u += sp_3072_dbl_24(r + 48, r + 48);
+ u += sp_3072_sub_in_place_48(z1, z2);
+ u += sp_3072_sub_in_place_48(z1, z0);
+ u += sp_3072_add_48(r + 24, r + 24, z1);
+ u += sp_3072_add_24(r + 48, r + 48, z2);
+ sp_3072_add_zero_24(r + 72, z2 + 24, u);
+
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add x11, %[a], 384\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldp x3, x4, [%[a]], #16\n\t"
+ "ldp x5, x6, [%[a]], #16\n\t"
+ "ldp x7, x8, [%[b]], #16\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x9, x10, [%[b]], #16\n\t"
+ "adcs x4, x4, x8\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r]], #16\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r]], #16\n\t"
+ "cset %[c], cs\n\t"
+ "cmp %[a], x11\n\t"
+ "b.ne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add x10, %[a], 384\n\t"
+ "\n1:\n\t"
+ "subs %[c], xzr, %[c]\n\t"
+ "ldp x2, x3, [%[a]]\n\t"
+ "ldp x4, x5, [%[a], #16]\n\t"
+ "ldp x6, x7, [%[b]], #16\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x8, x9, [%[b]], #16\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a]], #16\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a]], #16\n\t"
+ "csetm %[c], cc\n\t"
+ "cmp %[a], x10\n\t"
+ "b.ne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_digit tmp[96];
+
+ __asm__ __volatile__ (
+ "mov x5, 0\n\t"
+ "mov x6, 0\n\t"
+ "mov x7, 0\n\t"
+ "mov x8, 0\n\t"
+ "\n1:\n\t"
+ "subs x3, x5, 376\n\t"
+ "csel x3, xzr, x3, cc\n\t"
+ "sub x4, x5, x3\n\t"
+ "\n2:\n\t"
+ "ldr x10, [%[a], x3]\n\t"
+ "ldr x11, [%[b], x4]\n\t"
+ "mul x9, x10, x11\n\t"
+ "umulh x10, x10, x11\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "add x3, x3, #8\n\t"
+ "sub x4, x4, #8\n\t"
+ "cmp x3, 384\n\t"
+ "b.eq 3f\n\t"
+ "cmp x3, x5\n\t"
+ "b.le 2b\n\t"
+ "\n3:\n\t"
+ "str x6, [%[r], x5]\n\t"
+ "mov x6, x7\n\t"
+ "mov x7, x8\n\t"
+ "mov x8, #0\n\t"
+ "add x5, x5, #8\n\t"
+ "cmp x5, 752\n\t"
+ "b.le 1b\n\t"
+ "str x6, [%[r], x5]\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
+{
+ sp_digit tmp[96];
+
+ __asm__ __volatile__ (
+ "mov x6, 0\n\t"
+ "mov x7, 0\n\t"
+ "mov x8, 0\n\t"
+ "mov x5, 0\n\t"
+ "\n1:\n\t"
+ "subs x3, x5, 376\n\t"
+ "csel x3, xzr, x3, cc\n\t"
+ "sub x4, x5, x3\n\t"
+ "\n2:\n\t"
+ "cmp x4, x3\n\t"
+ "b.eq 4f\n\t"
+ "ldr x10, [%[a], x3]\n\t"
+ "ldr x11, [%[a], x4]\n\t"
+ "mul x9, x10, x11\n\t"
+ "umulh x10, x10, x11\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "b.al 5f\n\t"
+ "\n4:\n\t"
+ "ldr x10, [%[a], x3]\n\t"
+ "mul x9, x10, x10\n\t"
+ "umulh x10, x10, x10\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "\n5:\n\t"
+ "add x3, x3, #8\n\t"
+ "sub x4, x4, #8\n\t"
+ "cmp x3, 384\n\t"
+ "b.eq 3f\n\t"
+ "cmp x3, x4\n\t"
+ "b.gt 3f\n\t"
+ "cmp x3, x5\n\t"
+ "b.le 2b\n\t"
+ "\n3:\n\t"
+ "str x6, [%[r], x5]\n\t"
+ "mov x6, x7\n\t"
+ "mov x7, x8\n\t"
+ "mov x8, #0\n\t"
+ "add x5, x5, #8\n\t"
+ "cmp x5, 752\n\t"
+ "b.le 1b\n\t"
+ "str x6, [%[r], x5]\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#ifdef WOLFSSL_SP_SMALL
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+ int i;
+
+ for (i=0; i<24; i++) {
+ r[i] = a[i] & m;
+ }
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add x11, %[a], 192\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldp x3, x4, [%[a]], #16\n\t"
+ "ldp x5, x6, [%[a]], #16\n\t"
+ "ldp x7, x8, [%[b]], #16\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x9, x10, [%[b]], #16\n\t"
+ "adcs x4, x4, x8\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r]], #16\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r]], #16\n\t"
+ "cset %[c], cs\n\t"
+ "cmp %[a], x11\n\t"
+ "b.ne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add x10, %[a], 192\n\t"
+ "\n1:\n\t"
+ "subs %[c], xzr, %[c]\n\t"
+ "ldp x2, x3, [%[a]]\n\t"
+ "ldp x4, x5, [%[a], #16]\n\t"
+ "ldp x6, x7, [%[b]], #16\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x8, x9, [%[b]], #16\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a]], #16\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a]], #16\n\t"
+ "csetm %[c], cc\n\t"
+ "cmp %[a], x10\n\t"
+ "b.ne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_3072_mul_24(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_digit tmp[48];
+
+ __asm__ __volatile__ (
+ "mov x5, 0\n\t"
+ "mov x6, 0\n\t"
+ "mov x7, 0\n\t"
+ "mov x8, 0\n\t"
+ "\n1:\n\t"
+ "subs x3, x5, 184\n\t"
+ "csel x3, xzr, x3, cc\n\t"
+ "sub x4, x5, x3\n\t"
+ "\n2:\n\t"
+ "ldr x10, [%[a], x3]\n\t"
+ "ldr x11, [%[b], x4]\n\t"
+ "mul x9, x10, x11\n\t"
+ "umulh x10, x10, x11\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "add x3, x3, #8\n\t"
+ "sub x4, x4, #8\n\t"
+ "cmp x3, 192\n\t"
+ "b.eq 3f\n\t"
+ "cmp x3, x5\n\t"
+ "b.le 2b\n\t"
+ "\n3:\n\t"
+ "str x6, [%[r], x5]\n\t"
+ "mov x6, x7\n\t"
+ "mov x7, x8\n\t"
+ "mov x8, #0\n\t"
+ "add x5, x5, #8\n\t"
+ "cmp x5, 368\n\t"
+ "b.le 1b\n\t"
+ "str x6, [%[r], x5]\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a)
+{
+ sp_digit tmp[48];
+
+ __asm__ __volatile__ (
+ "mov x6, 0\n\t"
+ "mov x7, 0\n\t"
+ "mov x8, 0\n\t"
+ "mov x5, 0\n\t"
+ "\n1:\n\t"
+ "subs x3, x5, 184\n\t"
+ "csel x3, xzr, x3, cc\n\t"
+ "sub x4, x5, x3\n\t"
+ "\n2:\n\t"
+ "cmp x4, x3\n\t"
+ "b.eq 4f\n\t"
+ "ldr x10, [%[a], x3]\n\t"
+ "ldr x11, [%[a], x4]\n\t"
+ "mul x9, x10, x11\n\t"
+ "umulh x10, x10, x11\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "b.al 5f\n\t"
+ "\n4:\n\t"
+ "ldr x10, [%[a], x3]\n\t"
+ "mul x9, x10, x10\n\t"
+ "umulh x10, x10, x10\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "\n5:\n\t"
+ "add x3, x3, #8\n\t"
+ "sub x4, x4, #8\n\t"
+ "cmp x3, 192\n\t"
+ "b.eq 3f\n\t"
+ "cmp x3, x4\n\t"
+ "b.gt 3f\n\t"
+ "cmp x3, x5\n\t"
+ "b.le 2b\n\t"
+ "\n3:\n\t"
+ "str x6, [%[r], x5]\n\t"
+ "mov x6, x7\n\t"
+ "mov x7, x8\n\t"
+ "mov x8, #0\n\t"
+ "add x5, x5, #8\n\t"
+ "cmp x5, 368\n\t"
+ "b.le 1b\n\t"
+ "str x6, [%[r], x5]\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**64 */
+
+ /* rho = -1/m mod b */
+ *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "# A[0] * B\n\t"
+ "ldr x8, [%[a]]\n\t"
+ "mul x5, %[b], x8\n\t"
+ "umulh x3, %[b], x8\n\t"
+ "mov x4, 0\n\t"
+ "str x5, [%[r]]\n\t"
+ "mov x5, 0\n\t"
+ "mov x9, #8\n\t"
+ "1:\n\t"
+ "ldr x8, [%[a], x9]\n\t"
+ "mul x6, %[b], x8\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adds x3, x3, x6\n\t"
+ "adcs x4, x4, x7\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "str x3, [%[r], x9]\n\t"
+ "mov x3, x4\n\t"
+ "mov x4, x5\n\t"
+ "mov x5, #0\n\t"
+ "add x9, x9, #8\n\t"
+ "cmp x9, 384\n\t"
+ "b.lt 1b\n\t"
+ "str x3, [%[r], 384]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+#else
+ __asm__ __volatile__ (
+ "# A[0] * B\n\t"
+ "ldp x8, x9, [%[a]]\n\t"
+ "mul x3, %[b], x8\n\t"
+ "umulh x4, %[b], x8\n\t"
+ "mov x5, 0\n\t"
+ "# A[1] * B\n\t"
+ "str x3, [%[r]]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[2] * B\n\t"
+ "ldp x8, x9, [%[a], 16]\n\t"
+ "str x4, [%[r], 8]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[3] * B\n\t"
+ "str x5, [%[r], 16]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[4] * B\n\t"
+ "ldp x8, x9, [%[a], 32]\n\t"
+ "str x3, [%[r], 24]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[5] * B\n\t"
+ "str x4, [%[r], 32]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[6] * B\n\t"
+ "ldp x8, x9, [%[a], 48]\n\t"
+ "str x5, [%[r], 40]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[7] * B\n\t"
+ "str x3, [%[r], 48]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[8] * B\n\t"
+ "ldp x8, x9, [%[a], 64]\n\t"
+ "str x4, [%[r], 56]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[9] * B\n\t"
+ "str x5, [%[r], 64]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[10] * B\n\t"
+ "ldp x8, x9, [%[a], 80]\n\t"
+ "str x3, [%[r], 72]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[11] * B\n\t"
+ "str x4, [%[r], 80]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[12] * B\n\t"
+ "ldp x8, x9, [%[a], 96]\n\t"
+ "str x5, [%[r], 88]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[13] * B\n\t"
+ "str x3, [%[r], 96]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[14] * B\n\t"
+ "ldp x8, x9, [%[a], 112]\n\t"
+ "str x4, [%[r], 104]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[15] * B\n\t"
+ "str x5, [%[r], 112]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[16] * B\n\t"
+ "ldp x8, x9, [%[a], 128]\n\t"
+ "str x3, [%[r], 120]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[17] * B\n\t"
+ "str x4, [%[r], 128]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[18] * B\n\t"
+ "ldp x8, x9, [%[a], 144]\n\t"
+ "str x5, [%[r], 136]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[19] * B\n\t"
+ "str x3, [%[r], 144]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[20] * B\n\t"
+ "ldp x8, x9, [%[a], 160]\n\t"
+ "str x4, [%[r], 152]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[21] * B\n\t"
+ "str x5, [%[r], 160]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[22] * B\n\t"
+ "ldp x8, x9, [%[a], 176]\n\t"
+ "str x3, [%[r], 168]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[23] * B\n\t"
+ "str x4, [%[r], 176]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[24] * B\n\t"
+ "ldp x8, x9, [%[a], 192]\n\t"
+ "str x5, [%[r], 184]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[25] * B\n\t"
+ "str x3, [%[r], 192]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[26] * B\n\t"
+ "ldp x8, x9, [%[a], 208]\n\t"
+ "str x4, [%[r], 200]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[27] * B\n\t"
+ "str x5, [%[r], 208]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[28] * B\n\t"
+ "ldp x8, x9, [%[a], 224]\n\t"
+ "str x3, [%[r], 216]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[29] * B\n\t"
+ "str x4, [%[r], 224]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[30] * B\n\t"
+ "ldp x8, x9, [%[a], 240]\n\t"
+ "str x5, [%[r], 232]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[31] * B\n\t"
+ "str x3, [%[r], 240]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[32] * B\n\t"
+ "ldp x8, x9, [%[a], 256]\n\t"
+ "str x4, [%[r], 248]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[33] * B\n\t"
+ "str x5, [%[r], 256]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[34] * B\n\t"
+ "ldp x8, x9, [%[a], 272]\n\t"
+ "str x3, [%[r], 264]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[35] * B\n\t"
+ "str x4, [%[r], 272]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[36] * B\n\t"
+ "ldp x8, x9, [%[a], 288]\n\t"
+ "str x5, [%[r], 280]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[37] * B\n\t"
+ "str x3, [%[r], 288]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[38] * B\n\t"
+ "ldp x8, x9, [%[a], 304]\n\t"
+ "str x4, [%[r], 296]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[39] * B\n\t"
+ "str x5, [%[r], 304]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[40] * B\n\t"
+ "ldp x8, x9, [%[a], 320]\n\t"
+ "str x3, [%[r], 312]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[41] * B\n\t"
+ "str x4, [%[r], 320]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[42] * B\n\t"
+ "ldp x8, x9, [%[a], 336]\n\t"
+ "str x5, [%[r], 328]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[43] * B\n\t"
+ "str x3, [%[r], 336]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[44] * B\n\t"
+ "ldp x8, x9, [%[a], 352]\n\t"
+ "str x4, [%[r], 344]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[45] * B\n\t"
+ "str x5, [%[r], 352]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[46] * B\n\t"
+ "ldp x8, x9, [%[a], 368]\n\t"
+ "str x3, [%[r], 360]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[47] * B\n\t"
+ "str x4, [%[r], 368]\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "adc x3, x3, x7\n\t"
+ "stp x5, x3, [%[r], 376]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+#endif
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 3072 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_3072_mont_norm_24(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 24);
+
+ /* r = 2^n mod m */
+ sp_3072_sub_in_place_24(r, m);
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static sp_digit sp_3072_cond_sub_24(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov x8, #0\n\t"
+ "1:\n\t"
+ "subs %[c], xzr, %[c]\n\t"
+ "ldr x4, [%[a], x8]\n\t"
+ "ldr x5, [%[b], x8]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "csetm %[c], cc\n\t"
+ "str x4, [%[r], x8]\n\t"
+ "add x8, x8, #8\n\t"
+ "cmp x8, 192\n\t"
+ "b.lt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+
+ return c;
+#else
+ __asm__ __volatile__ (
+
+ "ldp x5, x7, [%[b], 0]\n\t"
+ "ldp x11, x12, [%[b], 16]\n\t"
+ "ldp x4, x6, [%[a], 0]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 16]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "subs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 0]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 16]\n\t"
+ "ldp x5, x7, [%[b], 32]\n\t"
+ "ldp x11, x12, [%[b], 48]\n\t"
+ "ldp x4, x6, [%[a], 32]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 48]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 32]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 48]\n\t"
+ "ldp x5, x7, [%[b], 64]\n\t"
+ "ldp x11, x12, [%[b], 80]\n\t"
+ "ldp x4, x6, [%[a], 64]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 80]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 64]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 80]\n\t"
+ "ldp x5, x7, [%[b], 96]\n\t"
+ "ldp x11, x12, [%[b], 112]\n\t"
+ "ldp x4, x6, [%[a], 96]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 112]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 96]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 112]\n\t"
+ "ldp x5, x7, [%[b], 128]\n\t"
+ "ldp x11, x12, [%[b], 144]\n\t"
+ "ldp x4, x6, [%[a], 128]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 144]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 128]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 144]\n\t"
+ "ldp x5, x7, [%[b], 160]\n\t"
+ "ldp x11, x12, [%[b], 176]\n\t"
+ "ldp x4, x6, [%[a], 160]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 176]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 160]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 176]\n\t"
+ "csetm %[r], cc\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+
+ return (sp_digit)r;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_3072_mont_reduce_24(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "ldp x14, x15, [%[m], 0]\n\t"
+ "ldp x16, x17, [%[m], 16]\n\t"
+ "ldp x19, x20, [%[m], 32]\n\t"
+ "ldp x21, x22, [%[m], 48]\n\t"
+ "ldp x23, x24, [%[m], 64]\n\t"
+ "ldp x25, x26, [%[m], 80]\n\t"
+ "ldp x27, x28, [%[m], 96]\n\t"
+ "# i = 24\n\t"
+ "mov x4, 24\n\t"
+ "ldp x12, x13, [%[a], 0]\n\t"
+ "\n1:\n\t"
+ "# mu = a[i] * mp\n\t"
+ "mul x9, %[mp], x12\n\t"
+ "# a[i+0] += m[0] * mu\n\t"
+ "mul x7, x14, x9\n\t"
+ "umulh x8, x14, x9\n\t"
+ "adds x12, x12, x7\n\t"
+ "# a[i+1] += m[1] * mu\n\t"
+ "mul x7, x15, x9\n\t"
+ "adc x6, x8, xzr\n\t"
+ "umulh x8, x15, x9\n\t"
+ "adds x12, x13, x7\n\t"
+ "# a[i+2] += m[2] * mu\n\t"
+ "ldr x13, [%[a], 16]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "mul x7, x16, x9\n\t"
+ "adds x12, x12, x6\n\t"
+ "umulh x8, x16, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x13, x13, x7\n\t"
+ "# a[i+3] += m[3] * mu\n\t"
+ "ldr x10, [%[a], 24]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "mul x7, x17, x9\n\t"
+ "adds x13, x13, x5\n\t"
+ "umulh x8, x17, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+4] += m[4] * mu\n\t"
+ "ldr x11, [%[a], 32]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x19, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x19, x9\n\t"
+ "str x10, [%[a], 24]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+5] += m[5] * mu\n\t"
+ "ldr x10, [%[a], 40]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x20, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x20, x9\n\t"
+ "str x11, [%[a], 32]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+6] += m[6] * mu\n\t"
+ "ldr x11, [%[a], 48]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x21, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x21, x9\n\t"
+ "str x10, [%[a], 40]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+7] += m[7] * mu\n\t"
+ "ldr x10, [%[a], 56]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x22, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x22, x9\n\t"
+ "str x11, [%[a], 48]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+8] += m[8] * mu\n\t"
+ "ldr x11, [%[a], 64]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x23, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x23, x9\n\t"
+ "str x10, [%[a], 56]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+9] += m[9] * mu\n\t"
+ "ldr x10, [%[a], 72]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x24, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x24, x9\n\t"
+ "str x11, [%[a], 64]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+10] += m[10] * mu\n\t"
+ "ldr x11, [%[a], 80]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x25, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x25, x9\n\t"
+ "str x10, [%[a], 72]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+11] += m[11] * mu\n\t"
+ "ldr x10, [%[a], 88]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x26, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x26, x9\n\t"
+ "str x11, [%[a], 80]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+12] += m[12] * mu\n\t"
+ "ldr x11, [%[a], 96]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x27, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x27, x9\n\t"
+ "str x10, [%[a], 88]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+13] += m[13] * mu\n\t"
+ "ldr x10, [%[a], 104]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x28, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x28, x9\n\t"
+ "str x11, [%[a], 96]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+14] += m[14] * mu\n\t"
+ "ldr x11, [%[a], 112]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 112]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 104]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+15] += m[15] * mu\n\t"
+ "ldr x10, [%[a], 120]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 120]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 112]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+16] += m[16] * mu\n\t"
+ "ldr x11, [%[a], 128]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 128]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 120]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+17] += m[17] * mu\n\t"
+ "ldr x10, [%[a], 136]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 136]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 128]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+18] += m[18] * mu\n\t"
+ "ldr x11, [%[a], 144]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 144]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 136]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+19] += m[19] * mu\n\t"
+ "ldr x10, [%[a], 152]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 152]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 144]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+20] += m[20] * mu\n\t"
+ "ldr x11, [%[a], 160]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 160]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 152]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+21] += m[21] * mu\n\t"
+ "ldr x10, [%[a], 168]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 168]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 160]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+22] += m[22] * mu\n\t"
+ "ldr x11, [%[a], 176]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 176]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 168]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+23] += m[23] * mu\n\t"
+ "ldr x10, [%[a], 184]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 184]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "adcs x8, x8, %[ca]\n\t"
+ "str x11, [%[a], 176]\n\t"
+ "cset %[ca], cs\n\t"
+ "adds x10, x10, x6\n\t"
+ "ldr x11, [%[a], 192]\n\t"
+ "str x10, [%[a], 184]\n\t"
+ "adcs x11, x11, x8\n\t"
+ "str x11, [%[a], 192]\n\t"
+ "adc %[ca], %[ca], xzr\n\t"
+ "subs x4, x4, 1\n\t"
+ "add %[a], %[a], 8\n\t"
+ "bne 1b\n\t"
+ "stp x12, x13, [%[a], 0]\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28"
+ );
+
+ sp_3072_cond_sub_24(a - 24, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_mul_24(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_3072_mul_24(r, a, b);
+ sp_3072_mont_reduce_24(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_sqr_24(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_3072_sqr_24(r, a);
+ sp_3072_mont_reduce_24(r, m, mp);
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+static void sp_3072_mul_d_24(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "# A[0] * B\n\t"
+ "ldr x8, [%[a]]\n\t"
+ "mul x5, %[b], x8\n\t"
+ "umulh x3, %[b], x8\n\t"
+ "mov x4, 0\n\t"
+ "str x5, [%[r]]\n\t"
+ "mov x5, 0\n\t"
+ "mov x9, #8\n\t"
+ "1:\n\t"
+ "ldr x8, [%[a], x9]\n\t"
+ "mul x6, %[b], x8\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adds x3, x3, x6\n\t"
+ "adcs x4, x4, x7\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "str x3, [%[r], x9]\n\t"
+ "mov x3, x4\n\t"
+ "mov x4, x5\n\t"
+ "mov x5, #0\n\t"
+ "add x9, x9, #8\n\t"
+ "cmp x9, 192\n\t"
+ "b.lt 1b\n\t"
+ "str x3, [%[r], 192]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+#else
+ __asm__ __volatile__ (
+ "# A[0] * B\n\t"
+ "ldp x8, x9, [%[a]]\n\t"
+ "mul x3, %[b], x8\n\t"
+ "umulh x4, %[b], x8\n\t"
+ "mov x5, 0\n\t"
+ "# A[1] * B\n\t"
+ "str x3, [%[r]]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[2] * B\n\t"
+ "ldp x8, x9, [%[a], 16]\n\t"
+ "str x4, [%[r], 8]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[3] * B\n\t"
+ "str x5, [%[r], 16]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[4] * B\n\t"
+ "ldp x8, x9, [%[a], 32]\n\t"
+ "str x3, [%[r], 24]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[5] * B\n\t"
+ "str x4, [%[r], 32]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[6] * B\n\t"
+ "ldp x8, x9, [%[a], 48]\n\t"
+ "str x5, [%[r], 40]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[7] * B\n\t"
+ "str x3, [%[r], 48]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[8] * B\n\t"
+ "ldp x8, x9, [%[a], 64]\n\t"
+ "str x4, [%[r], 56]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[9] * B\n\t"
+ "str x5, [%[r], 64]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[10] * B\n\t"
+ "ldp x8, x9, [%[a], 80]\n\t"
+ "str x3, [%[r], 72]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[11] * B\n\t"
+ "str x4, [%[r], 80]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[12] * B\n\t"
+ "ldp x8, x9, [%[a], 96]\n\t"
+ "str x5, [%[r], 88]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[13] * B\n\t"
+ "str x3, [%[r], 96]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[14] * B\n\t"
+ "ldp x8, x9, [%[a], 112]\n\t"
+ "str x4, [%[r], 104]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[15] * B\n\t"
+ "str x5, [%[r], 112]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[16] * B\n\t"
+ "ldp x8, x9, [%[a], 128]\n\t"
+ "str x3, [%[r], 120]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[17] * B\n\t"
+ "str x4, [%[r], 128]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[18] * B\n\t"
+ "ldp x8, x9, [%[a], 144]\n\t"
+ "str x5, [%[r], 136]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[19] * B\n\t"
+ "str x3, [%[r], 144]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[20] * B\n\t"
+ "ldp x8, x9, [%[a], 160]\n\t"
+ "str x4, [%[r], 152]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[21] * B\n\t"
+ "str x5, [%[r], 160]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[22] * B\n\t"
+ "ldp x8, x9, [%[a], 176]\n\t"
+ "str x3, [%[r], 168]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[23] * B\n\t"
+ "str x4, [%[r], 176]\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "adc x3, x3, x7\n\t"
+ "stp x5, x3, [%[r], 184]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+#endif
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ */
+static sp_digit div_3072_word_24(sp_digit d1, sp_digit d0, sp_digit div)
+{
+ sp_digit r;
+
+ __asm__ __volatile__ (
+ "lsr x5, %[div], 32\n\t"
+ "add x5, x5, 1\n\t"
+
+ "udiv x3, %[d1], x5\n\t"
+ "lsl x6, x3, 32\n\t"
+ "mul x4, %[div], x6\n\t"
+ "umulh x3, %[div], x6\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "udiv x3, %[d1], x5\n\t"
+ "lsl x3, x3, 32\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "umulh x3, %[div], x3\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "lsr x3, %[d0], 32\n\t"
+ "orr x3, x3, %[d1], lsl 32\n\t"
+
+ "udiv x3, x3, x5\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "umulh x3, %[div], x3\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "lsr x3, %[d0], 32\n\t"
+ "orr x3, x3, %[d1], lsl 32\n\t"
+
+ "udiv x3, x3, x5\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "sub %[d0], %[d0], x4\n\t"
+
+ "udiv x3, %[d0], %[div]\n\t"
+ "add %[r], x6, x3\n\t"
+
+ : [r] "=r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "x3", "x4", "x5", "x6"
+ );
+
+ return r;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static int64_t sp_3072_cmp_24(const sp_digit* a, const sp_digit* b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov x2, -1\n\t"
+ "mov x3, 1\n\t"
+ "mov x4, -1\n\t"
+ "mov x5, 184\n\t"
+ "1:\n\t"
+ "ldr x6, [%[a], x5]\n\t"
+ "ldr x7, [%[b], x5]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x6, x6, x7\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "subs x5, x5, #8\n\t"
+ "b.cs 1b\n\t"
+ "eor %[a], x2, x4\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "x2", "x3", "x4", "x5", "x6", "x7", "x8"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov x2, -1\n\t"
+ "mov x3, 1\n\t"
+ "mov x4, -1\n\t"
+ "ldp x5, x6, [%[a], 176]\n\t"
+ "ldp x7, x8, [%[b], 176]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 160]\n\t"
+ "ldp x7, x8, [%[b], 160]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 144]\n\t"
+ "ldp x7, x8, [%[b], 144]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 128]\n\t"
+ "ldp x7, x8, [%[b], 128]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 112]\n\t"
+ "ldp x7, x8, [%[b], 112]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 96]\n\t"
+ "ldp x7, x8, [%[b], 96]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 80]\n\t"
+ "ldp x7, x8, [%[b], 80]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 64]\n\t"
+ "ldp x7, x8, [%[b], 64]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "ldp x7, x8, [%[b], 48]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 32]\n\t"
+ "ldp x7, x8, [%[b], 32]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "ldp x7, x8, [%[b], 16]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[b], 0]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "eor %[a], x2, x4\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "x2", "x3", "x4", "x5", "x6", "x7", "x8"
+ );
+#endif
+
+ return (int64_t)a;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_24(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[48], t2[25];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[23];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 24);
+ for (i=23; i>=0; i--) {
+ r1 = div_3072_word_24(t1[24 + i], t1[24 + i - 1], div);
+
+ sp_3072_mul_d_24(t2, d, r1);
+ t1[24 + i] += sp_3072_sub_in_place_24(&t1[i], t2);
+ t1[24 + i] -= t2[24];
+ sp_3072_mask_24(t2, d, t1[24 + i]);
+ t1[24 + i] += sp_3072_add_24(&t1[i], &t1[i], t2);
+ sp_3072_mask_24(t2, d, t1[24 + i]);
+ t1[24 + i] += sp_3072_add_24(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_3072_cmp_24(t1, d) >= 0;
+ sp_3072_cond_sub_24(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_24(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_3072_div_24(a, m, NULL, r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[16][48];
+#else
+ sp_digit* t[16];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 48, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<16; i++) {
+ t[i] = td + i * 48;
+ }
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_24(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 24U);
+ if (reduceA != 0) {
+ err = sp_3072_mod_24(t[1] + 24, a, m);
+ if (err == MP_OKAY) {
+ err = sp_3072_mod_24(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 24, a, sizeof(sp_digit) * 24);
+ err = sp_3072_mod_24(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_24(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_24(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_24(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_24(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_24(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_24(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_24(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_24(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_24(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_24(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_24(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_24(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_24(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_24(t[15], t[ 8], t[ 7], m, mp);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ c -= bits % 4;
+ if (c == 64) {
+ c = 60;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 24);
+ for (; i>=0 || c>=4; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 60;
+ n <<= 4;
+ c = 60;
+ }
+ else if (c < 4) {
+ y = n >> 60;
+ n = e[i--];
+ c = 4 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+ else {
+ y = (n >> 60) & 0xf;
+ n <<= 4;
+ c -= 4;
+ }
+
+ sp_3072_mont_sqr_24(r, r, m, mp);
+ sp_3072_mont_sqr_24(r, r, m, mp);
+ sp_3072_mont_sqr_24(r, r, m, mp);
+ sp_3072_mont_sqr_24(r, r, m, mp);
+
+ sp_3072_mont_mul_24(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[24], 0, sizeof(sp_digit) * 24U);
+ sp_3072_mont_reduce_24(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_24(r, m) >= 0);
+ sp_3072_cond_sub_24(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][48];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 48, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++) {
+ t[i] = td + i * 48;
+ }
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_24(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 24U);
+ if (reduceA != 0) {
+ err = sp_3072_mod_24(t[1] + 24, a, m);
+ if (err == MP_OKAY) {
+ err = sp_3072_mod_24(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 24, a, sizeof(sp_digit) * 24);
+ err = sp_3072_mod_24(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_24(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_24(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_24(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_24(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_24(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_24(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_24(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_24(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_24(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_24(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_24(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_24(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_24(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_24(t[15], t[ 8], t[ 7], m, mp);
+ sp_3072_mont_sqr_24(t[16], t[ 8], m, mp);
+ sp_3072_mont_mul_24(t[17], t[ 9], t[ 8], m, mp);
+ sp_3072_mont_sqr_24(t[18], t[ 9], m, mp);
+ sp_3072_mont_mul_24(t[19], t[10], t[ 9], m, mp);
+ sp_3072_mont_sqr_24(t[20], t[10], m, mp);
+ sp_3072_mont_mul_24(t[21], t[11], t[10], m, mp);
+ sp_3072_mont_sqr_24(t[22], t[11], m, mp);
+ sp_3072_mont_mul_24(t[23], t[12], t[11], m, mp);
+ sp_3072_mont_sqr_24(t[24], t[12], m, mp);
+ sp_3072_mont_mul_24(t[25], t[13], t[12], m, mp);
+ sp_3072_mont_sqr_24(t[26], t[13], m, mp);
+ sp_3072_mont_mul_24(t[27], t[14], t[13], m, mp);
+ sp_3072_mont_sqr_24(t[28], t[14], m, mp);
+ sp_3072_mont_mul_24(t[29], t[15], t[14], m, mp);
+ sp_3072_mont_sqr_24(t[30], t[15], m, mp);
+ sp_3072_mont_mul_24(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ c -= bits % 5;
+ if (c == 64) {
+ c = 59;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 24);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 59;
+ n <<= 5;
+ c = 59;
+ }
+ else if (c < 5) {
+ y = n >> 59;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+ else {
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_3072_mont_sqr_24(r, r, m, mp);
+ sp_3072_mont_sqr_24(r, r, m, mp);
+ sp_3072_mont_sqr_24(r, r, m, mp);
+ sp_3072_mont_sqr_24(r, r, m, mp);
+ sp_3072_mont_sqr_24(r, r, m, mp);
+
+ sp_3072_mont_mul_24(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[24], 0, sizeof(sp_digit) * 24U);
+ sp_3072_mont_reduce_24(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_24(r, m) >= 0);
+ sp_3072_cond_sub_24(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 3072 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 48);
+
+ /* r = 2^n mod m */
+ sp_3072_sub_in_place_48(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov x8, #0\n\t"
+ "1:\n\t"
+ "subs %[c], xzr, %[c]\n\t"
+ "ldr x4, [%[a], x8]\n\t"
+ "ldr x5, [%[b], x8]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "csetm %[c], cc\n\t"
+ "str x4, [%[r], x8]\n\t"
+ "add x8, x8, #8\n\t"
+ "cmp x8, 384\n\t"
+ "b.lt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+
+ return c;
+#else
+ __asm__ __volatile__ (
+
+ "ldp x5, x7, [%[b], 0]\n\t"
+ "ldp x11, x12, [%[b], 16]\n\t"
+ "ldp x4, x6, [%[a], 0]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 16]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "subs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 0]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 16]\n\t"
+ "ldp x5, x7, [%[b], 32]\n\t"
+ "ldp x11, x12, [%[b], 48]\n\t"
+ "ldp x4, x6, [%[a], 32]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 48]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 32]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 48]\n\t"
+ "ldp x5, x7, [%[b], 64]\n\t"
+ "ldp x11, x12, [%[b], 80]\n\t"
+ "ldp x4, x6, [%[a], 64]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 80]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 64]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 80]\n\t"
+ "ldp x5, x7, [%[b], 96]\n\t"
+ "ldp x11, x12, [%[b], 112]\n\t"
+ "ldp x4, x6, [%[a], 96]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 112]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 96]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 112]\n\t"
+ "ldp x5, x7, [%[b], 128]\n\t"
+ "ldp x11, x12, [%[b], 144]\n\t"
+ "ldp x4, x6, [%[a], 128]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 144]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 128]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 144]\n\t"
+ "ldp x5, x7, [%[b], 160]\n\t"
+ "ldp x11, x12, [%[b], 176]\n\t"
+ "ldp x4, x6, [%[a], 160]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 176]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 160]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 176]\n\t"
+ "ldp x5, x7, [%[b], 192]\n\t"
+ "ldp x11, x12, [%[b], 208]\n\t"
+ "ldp x4, x6, [%[a], 192]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 208]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 192]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 208]\n\t"
+ "ldp x5, x7, [%[b], 224]\n\t"
+ "ldp x11, x12, [%[b], 240]\n\t"
+ "ldp x4, x6, [%[a], 224]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 240]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 224]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 240]\n\t"
+ "ldp x5, x7, [%[b], 256]\n\t"
+ "ldp x11, x12, [%[b], 272]\n\t"
+ "ldp x4, x6, [%[a], 256]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 272]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 256]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 272]\n\t"
+ "ldp x5, x7, [%[b], 288]\n\t"
+ "ldp x11, x12, [%[b], 304]\n\t"
+ "ldp x4, x6, [%[a], 288]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 304]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 288]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 304]\n\t"
+ "ldp x5, x7, [%[b], 320]\n\t"
+ "ldp x11, x12, [%[b], 336]\n\t"
+ "ldp x4, x6, [%[a], 320]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 336]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 320]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 336]\n\t"
+ "ldp x5, x7, [%[b], 352]\n\t"
+ "ldp x11, x12, [%[b], 368]\n\t"
+ "ldp x4, x6, [%[a], 352]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 368]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 352]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 368]\n\t"
+ "csetm %[r], cc\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+
+ return (sp_digit)r;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "ldp x14, x15, [%[m], 0]\n\t"
+ "ldp x16, x17, [%[m], 16]\n\t"
+ "ldp x19, x20, [%[m], 32]\n\t"
+ "ldp x21, x22, [%[m], 48]\n\t"
+ "ldp x23, x24, [%[m], 64]\n\t"
+ "ldp x25, x26, [%[m], 80]\n\t"
+ "ldp x27, x28, [%[m], 96]\n\t"
+ "# i = 48\n\t"
+ "mov x4, 48\n\t"
+ "ldp x12, x13, [%[a], 0]\n\t"
+ "\n1:\n\t"
+ "# mu = a[i] * mp\n\t"
+ "mul x9, %[mp], x12\n\t"
+ "# a[i+0] += m[0] * mu\n\t"
+ "mul x7, x14, x9\n\t"
+ "umulh x8, x14, x9\n\t"
+ "adds x12, x12, x7\n\t"
+ "# a[i+1] += m[1] * mu\n\t"
+ "mul x7, x15, x9\n\t"
+ "adc x6, x8, xzr\n\t"
+ "umulh x8, x15, x9\n\t"
+ "adds x12, x13, x7\n\t"
+ "# a[i+2] += m[2] * mu\n\t"
+ "ldr x13, [%[a], 16]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "mul x7, x16, x9\n\t"
+ "adds x12, x12, x6\n\t"
+ "umulh x8, x16, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x13, x13, x7\n\t"
+ "# a[i+3] += m[3] * mu\n\t"
+ "ldr x10, [%[a], 24]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "mul x7, x17, x9\n\t"
+ "adds x13, x13, x5\n\t"
+ "umulh x8, x17, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+4] += m[4] * mu\n\t"
+ "ldr x11, [%[a], 32]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x19, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x19, x9\n\t"
+ "str x10, [%[a], 24]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+5] += m[5] * mu\n\t"
+ "ldr x10, [%[a], 40]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x20, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x20, x9\n\t"
+ "str x11, [%[a], 32]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+6] += m[6] * mu\n\t"
+ "ldr x11, [%[a], 48]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x21, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x21, x9\n\t"
+ "str x10, [%[a], 40]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+7] += m[7] * mu\n\t"
+ "ldr x10, [%[a], 56]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x22, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x22, x9\n\t"
+ "str x11, [%[a], 48]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+8] += m[8] * mu\n\t"
+ "ldr x11, [%[a], 64]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x23, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x23, x9\n\t"
+ "str x10, [%[a], 56]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+9] += m[9] * mu\n\t"
+ "ldr x10, [%[a], 72]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x24, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x24, x9\n\t"
+ "str x11, [%[a], 64]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+10] += m[10] * mu\n\t"
+ "ldr x11, [%[a], 80]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x25, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x25, x9\n\t"
+ "str x10, [%[a], 72]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+11] += m[11] * mu\n\t"
+ "ldr x10, [%[a], 88]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x26, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x26, x9\n\t"
+ "str x11, [%[a], 80]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+12] += m[12] * mu\n\t"
+ "ldr x11, [%[a], 96]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x27, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x27, x9\n\t"
+ "str x10, [%[a], 88]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+13] += m[13] * mu\n\t"
+ "ldr x10, [%[a], 104]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x28, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x28, x9\n\t"
+ "str x11, [%[a], 96]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+14] += m[14] * mu\n\t"
+ "ldr x11, [%[a], 112]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 112]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 104]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+15] += m[15] * mu\n\t"
+ "ldr x10, [%[a], 120]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 120]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 112]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+16] += m[16] * mu\n\t"
+ "ldr x11, [%[a], 128]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 128]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 120]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+17] += m[17] * mu\n\t"
+ "ldr x10, [%[a], 136]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 136]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 128]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+18] += m[18] * mu\n\t"
+ "ldr x11, [%[a], 144]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 144]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 136]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+19] += m[19] * mu\n\t"
+ "ldr x10, [%[a], 152]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 152]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 144]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+20] += m[20] * mu\n\t"
+ "ldr x11, [%[a], 160]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 160]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 152]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+21] += m[21] * mu\n\t"
+ "ldr x10, [%[a], 168]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 168]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 160]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+22] += m[22] * mu\n\t"
+ "ldr x11, [%[a], 176]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 176]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 168]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+23] += m[23] * mu\n\t"
+ "ldr x10, [%[a], 184]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 184]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 176]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+24] += m[24] * mu\n\t"
+ "ldr x11, [%[a], 192]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 192]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 184]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+25] += m[25] * mu\n\t"
+ "ldr x10, [%[a], 200]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 200]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 192]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+26] += m[26] * mu\n\t"
+ "ldr x11, [%[a], 208]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 208]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 200]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+27] += m[27] * mu\n\t"
+ "ldr x10, [%[a], 216]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 216]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 208]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+28] += m[28] * mu\n\t"
+ "ldr x11, [%[a], 224]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 224]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 216]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+29] += m[29] * mu\n\t"
+ "ldr x10, [%[a], 232]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 232]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 224]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+30] += m[30] * mu\n\t"
+ "ldr x11, [%[a], 240]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 240]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 232]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+31] += m[31] * mu\n\t"
+ "ldr x10, [%[a], 248]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 248]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 240]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+32] += m[32] * mu\n\t"
+ "ldr x11, [%[a], 256]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 256]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 248]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+33] += m[33] * mu\n\t"
+ "ldr x10, [%[a], 264]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 264]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 256]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+34] += m[34] * mu\n\t"
+ "ldr x11, [%[a], 272]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 272]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 264]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+35] += m[35] * mu\n\t"
+ "ldr x10, [%[a], 280]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 280]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 272]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+36] += m[36] * mu\n\t"
+ "ldr x11, [%[a], 288]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 288]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 280]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+37] += m[37] * mu\n\t"
+ "ldr x10, [%[a], 296]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 296]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 288]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+38] += m[38] * mu\n\t"
+ "ldr x11, [%[a], 304]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 304]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 296]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+39] += m[39] * mu\n\t"
+ "ldr x10, [%[a], 312]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 312]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 304]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+40] += m[40] * mu\n\t"
+ "ldr x11, [%[a], 320]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 320]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 312]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+41] += m[41] * mu\n\t"
+ "ldr x10, [%[a], 328]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 328]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 320]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+42] += m[42] * mu\n\t"
+ "ldr x11, [%[a], 336]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 336]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 328]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+43] += m[43] * mu\n\t"
+ "ldr x10, [%[a], 344]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 344]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 336]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+44] += m[44] * mu\n\t"
+ "ldr x11, [%[a], 352]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 352]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 344]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+45] += m[45] * mu\n\t"
+ "ldr x10, [%[a], 360]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 360]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 352]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+46] += m[46] * mu\n\t"
+ "ldr x11, [%[a], 368]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 368]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 360]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+47] += m[47] * mu\n\t"
+ "ldr x10, [%[a], 376]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 376]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "adcs x8, x8, %[ca]\n\t"
+ "str x11, [%[a], 368]\n\t"
+ "cset %[ca], cs\n\t"
+ "adds x10, x10, x6\n\t"
+ "ldr x11, [%[a], 384]\n\t"
+ "str x10, [%[a], 376]\n\t"
+ "adcs x11, x11, x8\n\t"
+ "str x11, [%[a], 384]\n\t"
+ "adc %[ca], %[ca], xzr\n\t"
+ "subs x4, x4, 1\n\t"
+ "add %[a], %[a], 8\n\t"
+ "bne 1b\n\t"
+ "stp x12, x13, [%[a], 0]\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28"
+ );
+
+ sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_3072_mul_48(r, a, b);
+ sp_3072_mont_reduce_48(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_3072_sqr_48(r, a);
+ sp_3072_mont_reduce_48(r, m, mp);
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ */
+static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div)
+{
+ sp_digit r;
+
+ __asm__ __volatile__ (
+ "lsr x5, %[div], 32\n\t"
+ "add x5, x5, 1\n\t"
+
+ "udiv x3, %[d1], x5\n\t"
+ "lsl x6, x3, 32\n\t"
+ "mul x4, %[div], x6\n\t"
+ "umulh x3, %[div], x6\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "udiv x3, %[d1], x5\n\t"
+ "lsl x3, x3, 32\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "umulh x3, %[div], x3\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "lsr x3, %[d0], 32\n\t"
+ "orr x3, x3, %[d1], lsl 32\n\t"
+
+ "udiv x3, x3, x5\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "umulh x3, %[div], x3\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "lsr x3, %[d0], 32\n\t"
+ "orr x3, x3, %[d1], lsl 32\n\t"
+
+ "udiv x3, x3, x5\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "sub %[d0], %[d0], x4\n\t"
+
+ "udiv x3, %[d0], %[div]\n\t"
+ "add %[r], x6, x3\n\t"
+
+ : [r] "=r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "x3", "x4", "x5", "x6"
+ );
+
+ return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<48; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 48; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static int64_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov x2, -1\n\t"
+ "mov x3, 1\n\t"
+ "mov x4, -1\n\t"
+ "mov x5, 376\n\t"
+ "1:\n\t"
+ "ldr x6, [%[a], x5]\n\t"
+ "ldr x7, [%[b], x5]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x6, x6, x7\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "subs x5, x5, #8\n\t"
+ "b.cs 1b\n\t"
+ "eor %[a], x2, x4\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "x2", "x3", "x4", "x5", "x6", "x7", "x8"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov x2, -1\n\t"
+ "mov x3, 1\n\t"
+ "mov x4, -1\n\t"
+ "ldp x5, x6, [%[a], 368]\n\t"
+ "ldp x7, x8, [%[b], 368]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 352]\n\t"
+ "ldp x7, x8, [%[b], 352]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 336]\n\t"
+ "ldp x7, x8, [%[b], 336]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 320]\n\t"
+ "ldp x7, x8, [%[b], 320]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 304]\n\t"
+ "ldp x7, x8, [%[b], 304]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 288]\n\t"
+ "ldp x7, x8, [%[b], 288]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 272]\n\t"
+ "ldp x7, x8, [%[b], 272]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 256]\n\t"
+ "ldp x7, x8, [%[b], 256]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 240]\n\t"
+ "ldp x7, x8, [%[b], 240]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 224]\n\t"
+ "ldp x7, x8, [%[b], 224]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 208]\n\t"
+ "ldp x7, x8, [%[b], 208]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 192]\n\t"
+ "ldp x7, x8, [%[b], 192]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 176]\n\t"
+ "ldp x7, x8, [%[b], 176]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 160]\n\t"
+ "ldp x7, x8, [%[b], 160]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 144]\n\t"
+ "ldp x7, x8, [%[b], 144]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 128]\n\t"
+ "ldp x7, x8, [%[b], 128]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 112]\n\t"
+ "ldp x7, x8, [%[b], 112]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 96]\n\t"
+ "ldp x7, x8, [%[b], 96]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 80]\n\t"
+ "ldp x7, x8, [%[b], 80]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 64]\n\t"
+ "ldp x7, x8, [%[b], 64]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "ldp x7, x8, [%[b], 48]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 32]\n\t"
+ "ldp x7, x8, [%[b], 32]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "ldp x7, x8, [%[b], 16]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[b], 0]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "eor %[a], x2, x4\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "x2", "x3", "x4", "x5", "x6", "x7", "x8"
+ );
+#endif
+
+ return (int64_t)a;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[96], t2[49];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[47];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
+ for (i=47; i>=0; i--) {
+ r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div);
+
+ sp_3072_mul_d_48(t2, d, r1);
+ t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2);
+ t1[48 + i] -= t2[48];
+ sp_3072_mask_48(t2, d, t1[48 + i]);
+ t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
+ sp_3072_mask_48(t2, d, t1[48 + i]);
+ t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_3072_cmp_48(t1, d) >= 0;
+ sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_3072_div_48(a, m, NULL, r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_sub_48(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add x11, %[a], 384\n\t"
+ "\n1:\n\t"
+ "subs %[c], xzr, %[c]\n\t"
+ "ldp x3, x4, [%[a]], #16\n\t"
+ "ldp x5, x6, [%[a]], #16\n\t"
+ "ldp x7, x8, [%[b]], #16\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x9, x10, [%[b]], #16\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r]], #16\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r]], #16\n\t"
+ "csetm %[c], cc\n\t"
+ "cmp %[a], x11\n\t"
+ "b.ne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_3072_sub_48(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[b], 0]\n\t"
+ "subs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 16]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "ldp x7, x8, [%[b], 32]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 48]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ "ldp x3, x4, [%[a], 64]\n\t"
+ "ldp x7, x8, [%[b], 64]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 80]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 80]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 64]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 80]\n\t"
+ "ldp x3, x4, [%[a], 96]\n\t"
+ "ldp x7, x8, [%[b], 96]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 112]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 112]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 96]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 112]\n\t"
+ "ldp x3, x4, [%[a], 128]\n\t"
+ "ldp x7, x8, [%[b], 128]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 144]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 144]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 128]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 144]\n\t"
+ "ldp x3, x4, [%[a], 160]\n\t"
+ "ldp x7, x8, [%[b], 160]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 176]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 176]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 160]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 176]\n\t"
+ "ldp x3, x4, [%[a], 192]\n\t"
+ "ldp x7, x8, [%[b], 192]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 208]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 208]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 192]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 208]\n\t"
+ "ldp x3, x4, [%[a], 224]\n\t"
+ "ldp x7, x8, [%[b], 224]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 240]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 240]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 224]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 240]\n\t"
+ "ldp x3, x4, [%[a], 256]\n\t"
+ "ldp x7, x8, [%[b], 256]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 272]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 272]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 256]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 272]\n\t"
+ "ldp x3, x4, [%[a], 288]\n\t"
+ "ldp x7, x8, [%[b], 288]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 304]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 304]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 288]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 304]\n\t"
+ "ldp x3, x4, [%[a], 320]\n\t"
+ "ldp x7, x8, [%[b], 320]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 336]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 336]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 320]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 336]\n\t"
+ "ldp x3, x4, [%[a], 352]\n\t"
+ "ldp x7, x8, [%[b], 352]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 368]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 368]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 352]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 368]\n\t"
+ "csetm %[r], cc\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+ return (sp_digit)r;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_48_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[96], t2[49];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[47];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
+ for (i=47; i>=0; i--) {
+ r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div);
+
+ sp_3072_mul_d_48(t2, d, r1);
+ t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2);
+ t1[48 + i] -= t2[48];
+ if (t1[48 + i] != 0) {
+ t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], d);
+ if (t1[48 + i] != 0)
+ t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], d);
+ }
+ }
+
+ for (i = 47; i > 0; i--) {
+ if (t1[i] != d[i])
+ break;
+ }
+ if (t1[i] >= d[i]) {
+ sp_3072_sub_48(r, t1, d);
+ }
+ else {
+ XMEMCPY(r, t1, sizeof(*t1) * 48);
+ }
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_48_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_3072_div_48_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+ defined(WOLFSSL_HAVE_SP_DH)
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[16][96];
+#else
+ sp_digit* t[16];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 96, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<16; i++) {
+ t[i] = td + i * 96;
+ }
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_48(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
+ if (reduceA != 0) {
+ err = sp_3072_mod_48(t[1] + 48, a, m);
+ if (err == MP_OKAY) {
+ err = sp_3072_mod_48(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
+ err = sp_3072_mod_48(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ c -= bits % 4;
+ if (c == 64) {
+ c = 60;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
+ for (; i>=0 || c>=4; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 60;
+ n <<= 4;
+ c = 60;
+ }
+ else if (c < 4) {
+ y = n >> 60;
+ n = e[i--];
+ c = 4 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+ else {
+ y = (n >> 60) & 0xf;
+ n <<= 4;
+ c -= 4;
+ }
+
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+
+ sp_3072_mont_mul_48(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
+ sp_3072_mont_reduce_48(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
+ sp_3072_cond_sub_48(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][96];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 96, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++) {
+ t[i] = td + i * 96;
+ }
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_48(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
+ if (reduceA != 0) {
+ err = sp_3072_mod_48(t[1] + 48, a, m);
+ if (err == MP_OKAY) {
+ err = sp_3072_mod_48(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
+ err = sp_3072_mod_48(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
+ sp_3072_mont_sqr_48(t[16], t[ 8], m, mp);
+ sp_3072_mont_mul_48(t[17], t[ 9], t[ 8], m, mp);
+ sp_3072_mont_sqr_48(t[18], t[ 9], m, mp);
+ sp_3072_mont_mul_48(t[19], t[10], t[ 9], m, mp);
+ sp_3072_mont_sqr_48(t[20], t[10], m, mp);
+ sp_3072_mont_mul_48(t[21], t[11], t[10], m, mp);
+ sp_3072_mont_sqr_48(t[22], t[11], m, mp);
+ sp_3072_mont_mul_48(t[23], t[12], t[11], m, mp);
+ sp_3072_mont_sqr_48(t[24], t[12], m, mp);
+ sp_3072_mont_mul_48(t[25], t[13], t[12], m, mp);
+ sp_3072_mont_sqr_48(t[26], t[13], m, mp);
+ sp_3072_mont_mul_48(t[27], t[14], t[13], m, mp);
+ sp_3072_mont_sqr_48(t[28], t[14], m, mp);
+ sp_3072_mont_mul_48(t[29], t[15], t[14], m, mp);
+ sp_3072_mont_sqr_48(t[30], t[15], m, mp);
+ sp_3072_mont_mul_48(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ c -= bits % 5;
+ if (c == 64) {
+ c = 59;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 59;
+ n <<= 5;
+ c = 59;
+ }
+ else if (c < 5) {
+ y = n >> 59;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+ else {
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+
+ sp_3072_mont_mul_48(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
+ sp_3072_mont_reduce_48(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
+ sp_3072_cond_sub_48(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[96], m[48], r[96];
+#else
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+#endif
+ sp_digit *ah;
+ sp_digit e[1];
+ int err = MP_OKAY;
+
+ if (*outLen < 384)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (mp_count_bits(em) > 64 || inLen > 384 ||
+ mp_count_bits(mm) != 3072))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 48 * 2;
+ m = r + 48 * 2;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ ah = a + 48;
+
+ sp_3072_from_bin(ah, 48, in, inLen);
+#if DIGIT_BIT >= 64
+ e[0] = em->dp[0];
+#else
+ e[0] = em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(m, 48, mm);
+
+ if (e[0] == 0x3) {
+ if (err == MP_OKAY) {
+ sp_3072_sqr_48(r, ah);
+ err = sp_3072_mod_48_cond(r, r, m);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_mul_48(r, ah, r);
+ err = sp_3072_mod_48_cond(r, r, m);
+ }
+ }
+ else {
+ int i;
+ sp_digit mp;
+
+ sp_3072_mont_setup(m, &mp);
+
+ /* Convert to Montgomery form. */
+ XMEMSET(a, 0, sizeof(sp_digit) * 48);
+ err = sp_3072_mod_48_cond(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i = 63; i >= 0; i--) {
+ if (e[0] >> i) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 48);
+ for (i--; i>=0; i--) {
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ if (((e[0] >> i) & 1) == 1) {
+ sp_3072_mont_mul_48(r, r, a, m, mp);
+ }
+ }
+ XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
+ sp_3072_mont_reduce_48(r, m, mp);
+
+ for (i = 47; i > 0; i--) {
+ if (r[i] != m[i]) {
+ break;
+ }
+ }
+ if (r[i] >= m[i]) {
+ sp_3072_sub_in_place_48(r, m);
+ }
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#endif
+
+ return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+ sp_digit* a;
+ sp_digit* d = NULL;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 384U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 3072) {
+ err = MP_READ_E;
+ }
+ if (inLen > 384) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = d + 48;
+ m = a + 96;
+ r = a;
+
+ sp_3072_from_bin(a, 48, in, inLen);
+ sp_3072_from_mp(d, 48, dm);
+ sp_3072_from_mp(m, 48, mm);
+ err = sp_3072_mod_exp_48(r, a, d, 3072, m, 0);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 48);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static sp_digit sp_3072_cond_add_24(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov x8, #0\n\t"
+ "1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldr x4, [%[a], x8]\n\t"
+ "ldr x5, [%[b], x8]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "adcs x4, x4, x5\n\t"
+ "cset %[c], cs\n\t"
+ "str x4, [%[r], x8]\n\t"
+ "add x8, x8, #8\n\t"
+ "cmp x8, 192\n\t"
+ "b.lt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+
+ return c;
+#else
+ __asm__ __volatile__ (
+
+ "ldp x5, x7, [%[b], 0]\n\t"
+ "ldp x11, x12, [%[b], 16]\n\t"
+ "ldp x4, x6, [%[a], 0]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 16]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "adds x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "adcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "adcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 0]\n\t"
+ "adcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 16]\n\t"
+ "ldp x5, x7, [%[b], 32]\n\t"
+ "ldp x11, x12, [%[b], 48]\n\t"
+ "ldp x4, x6, [%[a], 32]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 48]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "adcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "adcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "adcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 32]\n\t"
+ "adcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 48]\n\t"
+ "ldp x5, x7, [%[b], 64]\n\t"
+ "ldp x11, x12, [%[b], 80]\n\t"
+ "ldp x4, x6, [%[a], 64]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 80]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "adcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "adcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "adcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 64]\n\t"
+ "adcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 80]\n\t"
+ "ldp x5, x7, [%[b], 96]\n\t"
+ "ldp x11, x12, [%[b], 112]\n\t"
+ "ldp x4, x6, [%[a], 96]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 112]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "adcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "adcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "adcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 96]\n\t"
+ "adcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 112]\n\t"
+ "ldp x5, x7, [%[b], 128]\n\t"
+ "ldp x11, x12, [%[b], 144]\n\t"
+ "ldp x4, x6, [%[a], 128]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 144]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "adcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "adcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "adcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 128]\n\t"
+ "adcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 144]\n\t"
+ "ldp x5, x7, [%[b], 160]\n\t"
+ "ldp x11, x12, [%[b], 176]\n\t"
+ "ldp x4, x6, [%[a], 160]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 176]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "adcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "adcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "adcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 160]\n\t"
+ "adcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 176]\n\t"
+ "cset %[r], cs\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+
+ return (sp_digit)r;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[48 * 2];
+ sp_digit p[24], q[24], dp[24];
+ sp_digit tmpa[48], tmpb[48];
+#else
+ sp_digit* t = NULL;
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+#endif
+ sp_digit* r;
+ sp_digit* qi;
+ sp_digit* dq;
+ sp_digit c;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 384)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 24 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL)
+ err = MEMORY_E;
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 48 * 2;
+ q = p + 24;
+ qi = dq = dp = q + 24;
+ tmpa = qi + 24;
+ tmpb = tmpa + 48;
+
+ r = t + 48;
+ }
+#else
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ r = a;
+ qi = dq = dp;
+#endif
+ sp_3072_from_bin(a, 48, in, inLen);
+ sp_3072_from_mp(p, 24, pm);
+ sp_3072_from_mp(q, 24, qm);
+ sp_3072_from_mp(dp, 24, dpm);
+
+ err = sp_3072_mod_exp_24(tmpa, a, dp, 1536, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(dq, 24, dqm);
+ err = sp_3072_mod_exp_24(tmpb, a, dq, 1536, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ c = sp_3072_sub_in_place_24(tmpa, tmpb);
+ c += sp_3072_cond_add_24(tmpa, tmpa, p, c);
+ sp_3072_cond_add_24(tmpa, tmpa, p, c);
+
+ sp_3072_from_mp(qi, 24, qim);
+ sp_3072_mul_24(tmpa, tmpa, qi);
+ err = sp_3072_mod_24(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mul_24(tmpa, q, tmpa);
+ XMEMSET(&tmpb[24], 0, sizeof(sp_digit) * 24);
+ sp_3072_add_48(r, tmpb, tmpa);
+
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 24 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+#else
+ XMEMSET(tmpa, 0, sizeof(tmpa));
+ XMEMSET(tmpb, 0, sizeof(tmpb));
+ XMEMSET(p, 0, sizeof(p));
+ XMEMSET(q, 0, sizeof(q));
+ XMEMSET(dp, 0, sizeof(dp));
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_3072_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 64
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 48);
+ r->used = 48;
+ mp_clamp(r);
+#elif DIGIT_BIT < 64
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 48; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 64) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 64 - s;
+ }
+ r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 48; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 64 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 64 - s;
+ }
+ else {
+ s += 64;
+ }
+ }
+ r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[96], e[48], m[48];
+ sp_digit* r = b;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 3072) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 48, base);
+ sp_3072_from_mp(e, 48, exp);
+ sp_3072_from_mp(m, 48, mod);
+
+ err = sp_3072_mod_exp_48(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_3072_to_mp(r, res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_3072
+static void sp_3072_lshift_48(sp_digit* r, sp_digit* a, byte n)
+{
+ __asm__ __volatile__ (
+ "mov x6, 63\n\t"
+ "sub x6, x6, %[n]\n\t"
+ "ldr x3, [%[a], 376]\n\t"
+ "lsr x4, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x4, x4, x6\n\t"
+ "ldr x2, [%[a], 368]\n\t"
+ "str x4, [%[r], 384]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 360]\n\t"
+ "str x3, [%[r], 376]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 352]\n\t"
+ "str x2, [%[r], 368]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 344]\n\t"
+ "str x4, [%[r], 360]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 336]\n\t"
+ "str x3, [%[r], 352]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 328]\n\t"
+ "str x2, [%[r], 344]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 320]\n\t"
+ "str x4, [%[r], 336]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 312]\n\t"
+ "str x3, [%[r], 328]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 304]\n\t"
+ "str x2, [%[r], 320]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 296]\n\t"
+ "str x4, [%[r], 312]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 288]\n\t"
+ "str x3, [%[r], 304]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 280]\n\t"
+ "str x2, [%[r], 296]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 272]\n\t"
+ "str x4, [%[r], 288]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 264]\n\t"
+ "str x3, [%[r], 280]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 256]\n\t"
+ "str x2, [%[r], 272]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 248]\n\t"
+ "str x4, [%[r], 264]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 240]\n\t"
+ "str x3, [%[r], 256]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 232]\n\t"
+ "str x2, [%[r], 248]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 224]\n\t"
+ "str x4, [%[r], 240]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 216]\n\t"
+ "str x3, [%[r], 232]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 208]\n\t"
+ "str x2, [%[r], 224]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 200]\n\t"
+ "str x4, [%[r], 216]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 192]\n\t"
+ "str x3, [%[r], 208]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 184]\n\t"
+ "str x2, [%[r], 200]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 176]\n\t"
+ "str x4, [%[r], 192]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 168]\n\t"
+ "str x3, [%[r], 184]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 160]\n\t"
+ "str x2, [%[r], 176]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 152]\n\t"
+ "str x4, [%[r], 168]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 144]\n\t"
+ "str x3, [%[r], 160]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 136]\n\t"
+ "str x2, [%[r], 152]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 128]\n\t"
+ "str x4, [%[r], 144]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 120]\n\t"
+ "str x3, [%[r], 136]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 112]\n\t"
+ "str x2, [%[r], 128]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 104]\n\t"
+ "str x4, [%[r], 120]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 96]\n\t"
+ "str x3, [%[r], 112]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 88]\n\t"
+ "str x2, [%[r], 104]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 80]\n\t"
+ "str x4, [%[r], 96]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 72]\n\t"
+ "str x3, [%[r], 88]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 64]\n\t"
+ "str x2, [%[r], 80]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 56]\n\t"
+ "str x4, [%[r], 72]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 48]\n\t"
+ "str x3, [%[r], 64]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 40]\n\t"
+ "str x2, [%[r], 56]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 32]\n\t"
+ "str x4, [%[r], 48]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 24]\n\t"
+ "str x3, [%[r], 40]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 16]\n\t"
+ "str x2, [%[r], 32]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 8]\n\t"
+ "str x4, [%[r], 24]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 0]\n\t"
+ "str x3, [%[r], 16]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "str x4, [%[r]]\n\t"
+ "str x2, [%[r], 8]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+ : "memory", "x2", "x3", "x4", "x5", "x6"
+ );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_2_48(sp_digit* r, const sp_digit* e, int bits,
+ const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[96];
+ sp_digit td[49];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 145, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 96;
+#else
+ norm = nd;
+ tmp = td;
+#endif
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_48(norm, m);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ c -= bits % 6;
+ if (c == 64) {
+ c = 58;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ sp_3072_lshift_48(r, norm, y);
+ for (; i>=0 || c>=6; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 58;
+ n <<= 6;
+ c = 58;
+ }
+ else if (c < 6) {
+ y = n >> 58;
+ n = e[i--];
+ c = 6 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+ else {
+ y = (n >> 58) & 0x3f;
+ n <<= 6;
+ c -= 6;
+ }
+
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+
+ sp_3072_lshift_48(r, r, y);
+ sp_3072_mul_d_48(tmp, norm, r[48]);
+ r[48] = 0;
+ o = sp_3072_add_48(r, r, tmp);
+ sp_3072_cond_sub_48(r, r, m, (sp_digit)0 - o);
+ }
+
+ XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
+ sp_3072_mont_reduce_48(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
+ sp_3072_cond_sub_48(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* HAVE_FFDHE_3072 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+ int err = MP_OKAY;
+ sp_digit b[96], e[48], m[48];
+ sp_digit* r = b;
+ word32 i;
+
+ if (mp_count_bits(base) > 3072) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 384) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 48, base);
+ sp_3072_from_bin(e, 48, exp, expLen);
+ sp_3072_from_mp(m, 48, mod);
+
+ #ifdef HAVE_FFDHE_3072
+ if (base->used == 1 && base->dp[0] == 2 && m[47] == (sp_digit)-1)
+ err = sp_3072_mod_exp_2_48(r, e, expLen * 8, m);
+ else
+ #endif
+ err = sp_3072_mod_exp_48(r, b, e, expLen * 8, m, 0);
+
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ for (i=0; i<384 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[48], e[24], m[24];
+ sp_digit* r = b;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 1536) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 1536) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 1536) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 24, base);
+ sp_3072_from_mp(e, 24, exp);
+ sp_3072_from_mp(m, 24, mod);
+
+ err = sp_3072_mod_exp_24(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(r + 24, 0, sizeof(*r) * 24U);
+ err = sp_3072_to_mp(r, res);
+ res->used = mod->used;
+ mp_clamp(res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_3072 */
+
+#ifdef WOLFSSL_SP_4096
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j;
+ byte* d;
+
+ for (i = n - 1,j = 0; i >= 7; i -= 8) {
+ r[j] = ((sp_digit)a[i - 0] << 0) |
+ ((sp_digit)a[i - 1] << 8) |
+ ((sp_digit)a[i - 2] << 16) |
+ ((sp_digit)a[i - 3] << 24) |
+ ((sp_digit)a[i - 4] << 32) |
+ ((sp_digit)a[i - 5] << 40) |
+ ((sp_digit)a[i - 6] << 48) |
+ ((sp_digit)a[i - 7] << 56);
+ j++;
+ }
+
+ if (i >= 0) {
+ r[j] = 0;
+
+ d = (byte*)r;
+ switch (i) {
+ case 6: d[n - 1 - 6] = a[6]; //fallthrough
+ case 5: d[n - 1 - 5] = a[5]; //fallthrough
+ case 4: d[n - 1 - 4] = a[4]; //fallthrough
+ case 3: d[n - 1 - 3] = a[3]; //fallthrough
+ case 2: d[n - 1 - 2] = a[2]; //fallthrough
+ case 1: d[n - 1 - 1] = a[1]; //fallthrough
+ case 0: d[n - 1 - 0] = a[0]; //fallthrough
+ }
+ j++;
+ }
+
+ for (; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 64
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 64
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffffffffffffl;
+ s = 64U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 64U) <= (word32)DIGIT_BIT) {
+ s += 64U;
+ r[j] &= 0xffffffffffffffffl;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 64) {
+ r[j] &= 0xffffffffffffffffl;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 64 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 512
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_4096_to_bin(sp_digit* r, byte* a)
+{
+ int i, j;
+
+ for (i = 63, j = 0; i >= 0; i--) {
+ a[j++] = r[i] >> 56;
+ a[j++] = r[i] >> 48;
+ a[j++] = r[i] >> 40;
+ a[j++] = r[i] >> 32;
+ a[j++] = r[i] >> 24;
+ a[j++] = r[i] >> 16;
+ a[j++] = r[i] >> 8;
+ a[j++] = r[i] >> 0;
+ }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_4096_add_32(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[b], 0]\n\t"
+ "adds x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 16]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "ldp x7, x8, [%[b], 32]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 48]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ "ldp x3, x4, [%[a], 64]\n\t"
+ "ldp x7, x8, [%[b], 64]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 80]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 80]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 64]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 80]\n\t"
+ "ldp x3, x4, [%[a], 96]\n\t"
+ "ldp x7, x8, [%[b], 96]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 112]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 112]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 96]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 112]\n\t"
+ "ldp x3, x4, [%[a], 128]\n\t"
+ "ldp x7, x8, [%[b], 128]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 144]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 144]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 128]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 144]\n\t"
+ "ldp x3, x4, [%[a], 160]\n\t"
+ "ldp x7, x8, [%[b], 160]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 176]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 176]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 160]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 176]\n\t"
+ "ldp x3, x4, [%[a], 192]\n\t"
+ "ldp x7, x8, [%[b], 192]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 208]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 208]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 192]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 208]\n\t"
+ "ldp x3, x4, [%[a], 224]\n\t"
+ "ldp x7, x8, [%[b], 224]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 240]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 240]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 224]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 240]\n\t"
+ "cset %[r], cs\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+ return (sp_digit)r;
+}
+
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+static sp_digit sp_4096_sub_in_place_64(sp_digit* a, const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x2, x3, [%[a], 0]\n\t"
+ "ldp x6, x7, [%[b], 0]\n\t"
+ "subs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 16]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 16]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 0]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 16]\n\t"
+ "ldp x2, x3, [%[a], 32]\n\t"
+ "ldp x6, x7, [%[b], 32]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 48]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 48]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 32]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 48]\n\t"
+ "ldp x2, x3, [%[a], 64]\n\t"
+ "ldp x6, x7, [%[b], 64]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 80]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 80]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 64]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 80]\n\t"
+ "ldp x2, x3, [%[a], 96]\n\t"
+ "ldp x6, x7, [%[b], 96]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 112]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 112]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 96]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 112]\n\t"
+ "ldp x2, x3, [%[a], 128]\n\t"
+ "ldp x6, x7, [%[b], 128]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 144]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 144]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 128]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 144]\n\t"
+ "ldp x2, x3, [%[a], 160]\n\t"
+ "ldp x6, x7, [%[b], 160]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 176]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 176]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 160]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 176]\n\t"
+ "ldp x2, x3, [%[a], 192]\n\t"
+ "ldp x6, x7, [%[b], 192]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 208]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 208]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 192]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 208]\n\t"
+ "ldp x2, x3, [%[a], 224]\n\t"
+ "ldp x6, x7, [%[b], 224]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 240]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 240]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 224]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 240]\n\t"
+ "ldp x2, x3, [%[a], 256]\n\t"
+ "ldp x6, x7, [%[b], 256]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 272]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 272]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 256]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 272]\n\t"
+ "ldp x2, x3, [%[a], 288]\n\t"
+ "ldp x6, x7, [%[b], 288]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 304]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 304]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 288]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 304]\n\t"
+ "ldp x2, x3, [%[a], 320]\n\t"
+ "ldp x6, x7, [%[b], 320]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 336]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 336]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 320]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 336]\n\t"
+ "ldp x2, x3, [%[a], 352]\n\t"
+ "ldp x6, x7, [%[b], 352]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 368]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 368]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 352]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 368]\n\t"
+ "ldp x2, x3, [%[a], 384]\n\t"
+ "ldp x6, x7, [%[b], 384]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 400]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 400]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 384]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 400]\n\t"
+ "ldp x2, x3, [%[a], 416]\n\t"
+ "ldp x6, x7, [%[b], 416]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 432]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 432]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 416]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 432]\n\t"
+ "ldp x2, x3, [%[a], 448]\n\t"
+ "ldp x6, x7, [%[b], 448]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 464]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 464]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 448]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 464]\n\t"
+ "ldp x2, x3, [%[a], 480]\n\t"
+ "ldp x6, x7, [%[b], 480]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 496]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 496]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 480]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 496]\n\t"
+ "csetm %[a], cc\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+
+ return (sp_digit)a;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[b], 0]\n\t"
+ "adds x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 16]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "ldp x7, x8, [%[b], 32]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 48]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ "ldp x3, x4, [%[a], 64]\n\t"
+ "ldp x7, x8, [%[b], 64]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 80]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 80]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 64]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 80]\n\t"
+ "ldp x3, x4, [%[a], 96]\n\t"
+ "ldp x7, x8, [%[b], 96]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 112]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 112]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 96]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 112]\n\t"
+ "ldp x3, x4, [%[a], 128]\n\t"
+ "ldp x7, x8, [%[b], 128]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 144]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 144]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 128]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 144]\n\t"
+ "ldp x3, x4, [%[a], 160]\n\t"
+ "ldp x7, x8, [%[b], 160]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 176]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 176]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 160]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 176]\n\t"
+ "ldp x3, x4, [%[a], 192]\n\t"
+ "ldp x7, x8, [%[b], 192]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 208]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 208]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 192]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 208]\n\t"
+ "ldp x3, x4, [%[a], 224]\n\t"
+ "ldp x7, x8, [%[b], 224]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 240]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 240]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 224]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 240]\n\t"
+ "ldp x3, x4, [%[a], 256]\n\t"
+ "ldp x7, x8, [%[b], 256]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 272]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 272]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 256]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 272]\n\t"
+ "ldp x3, x4, [%[a], 288]\n\t"
+ "ldp x7, x8, [%[b], 288]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 304]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 304]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 288]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 304]\n\t"
+ "ldp x3, x4, [%[a], 320]\n\t"
+ "ldp x7, x8, [%[b], 320]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 336]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 336]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 320]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 336]\n\t"
+ "ldp x3, x4, [%[a], 352]\n\t"
+ "ldp x7, x8, [%[b], 352]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 368]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 368]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 352]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 368]\n\t"
+ "ldp x3, x4, [%[a], 384]\n\t"
+ "ldp x7, x8, [%[b], 384]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 400]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 400]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 384]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 400]\n\t"
+ "ldp x3, x4, [%[a], 416]\n\t"
+ "ldp x7, x8, [%[b], 416]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 432]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 432]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 416]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 432]\n\t"
+ "ldp x3, x4, [%[a], 448]\n\t"
+ "ldp x7, x8, [%[b], 448]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 464]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 464]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 448]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 464]\n\t"
+ "ldp x3, x4, [%[a], 480]\n\t"
+ "ldp x7, x8, [%[b], 480]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 496]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 496]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 480]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 496]\n\t"
+ "cset %[r], cs\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+ return (sp_digit)r;
+}
+
+/* Add digit to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_4096_add_zero_32(sp_digit* r, const sp_digit* a,
+ const sp_digit d)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "adds x3, x3, %[d]\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "adcs x3, x3, xzr\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ "ldp x3, x4, [%[a], 64]\n\t"
+ "ldp x5, x6, [%[a], 80]\n\t"
+ "adcs x3, x3, xzr\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 64]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 80]\n\t"
+ "ldp x3, x4, [%[a], 96]\n\t"
+ "ldp x5, x6, [%[a], 112]\n\t"
+ "adcs x3, x3, xzr\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 96]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 112]\n\t"
+ "ldp x3, x4, [%[a], 128]\n\t"
+ "ldp x5, x6, [%[a], 144]\n\t"
+ "adcs x3, x3, xzr\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 128]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 144]\n\t"
+ "ldp x3, x4, [%[a], 160]\n\t"
+ "ldp x5, x6, [%[a], 176]\n\t"
+ "adcs x3, x3, xzr\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 160]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 176]\n\t"
+ "ldp x3, x4, [%[a], 192]\n\t"
+ "ldp x5, x6, [%[a], 208]\n\t"
+ "adcs x3, x3, xzr\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 192]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 208]\n\t"
+ "ldp x3, x4, [%[a], 224]\n\t"
+ "ldp x5, x6, [%[a], 240]\n\t"
+ "adcs x3, x3, xzr\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 224]\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x5, x6, [%[r], 240]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [d] "r" (d)
+ : "memory", "x3", "x4", "x5", "x6"
+ );
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[64];
+ sp_digit a1[32];
+ sp_digit b1[32];
+ sp_digit z2[64];
+ sp_digit u, ca, cb;
+
+ ca = sp_2048_add_32(a1, a, &a[32]);
+ cb = sp_2048_add_32(b1, b, &b[32]);
+ u = ca & cb;
+ sp_2048_mul_32(z1, a1, b1);
+ sp_2048_mul_32(z2, &a[32], &b[32]);
+ sp_2048_mul_32(z0, a, b);
+ sp_2048_mask_32(r + 64, a1, 0 - cb);
+ sp_2048_mask_32(b1, b1, 0 - ca);
+ u += sp_2048_add_32(r + 64, r + 64, b1);
+ u += sp_4096_sub_in_place_64(z1, z2);
+ u += sp_4096_sub_in_place_64(z1, z0);
+ u += sp_4096_add_64(r + 32, r + 32, z1);
+ u += sp_4096_add_32(r + 64, r + 64, z2);
+ sp_4096_add_zero_32(r + 96, z2 + 32, u);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Double a into r. (r = a + a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static sp_digit sp_2048_dbl_32(sp_digit* r, const sp_digit* a)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add x11, %[a], 256\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldp x3, x4, [%[a]], #16\n\t"
+ "ldp x5, x6, [%[a]], #16\n\t"
+ "adcs x3, x3, x3\n\t"
+ "adcs x4, x4, x4\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r]], #16\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r]], #16\n\t"
+ "cset %[c], cs\n\t"
+ "cmp %[a], x11\n\t"
+ "b.ne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a)
+ :
+ : "memory", "x3", "x4", "x5", "x6", "x11"
+ );
+
+ return c;
+}
+
+#else
+/* Double a into r. (r = a + a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static sp_digit sp_2048_dbl_32(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "adds x3, x3, x3\n\t"
+ "ldr x5, [%[a], 16]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 24]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "adcs x3, x3, x3\n\t"
+ "ldr x5, [%[a], 48]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 56]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ "ldp x3, x4, [%[a], 64]\n\t"
+ "adcs x3, x3, x3\n\t"
+ "ldr x5, [%[a], 80]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 88]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 64]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 80]\n\t"
+ "ldp x3, x4, [%[a], 96]\n\t"
+ "adcs x3, x3, x3\n\t"
+ "ldr x5, [%[a], 112]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 120]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 96]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 112]\n\t"
+ "ldp x3, x4, [%[a], 128]\n\t"
+ "adcs x3, x3, x3\n\t"
+ "ldr x5, [%[a], 144]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 152]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 128]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 144]\n\t"
+ "ldp x3, x4, [%[a], 160]\n\t"
+ "adcs x3, x3, x3\n\t"
+ "ldr x5, [%[a], 176]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 184]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 160]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 176]\n\t"
+ "ldp x3, x4, [%[a], 192]\n\t"
+ "adcs x3, x3, x3\n\t"
+ "ldr x5, [%[a], 208]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 216]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 192]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 208]\n\t"
+ "ldp x3, x4, [%[a], 224]\n\t"
+ "adcs x3, x3, x3\n\t"
+ "ldr x5, [%[a], 240]\n\t"
+ "adcs x4, x4, x4\n\t"
+ "ldr x6, [%[a], 248]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "stp x3, x4, [%[r], 224]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "stp x5, x6, [%[r], 240]\n\t"
+ "cset %[r], cs\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a)
+ : "memory", "x3", "x4", "x5", "x6"
+ );
+
+ return (sp_digit)r;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[64];
+ sp_digit z1[64];
+ sp_digit a1[32];
+ sp_digit u;
+
+ u = sp_2048_add_32(a1, a, &a[32]);
+ sp_2048_sqr_32(z1, a1);
+ sp_2048_sqr_32(z2, &a[32]);
+ sp_2048_sqr_32(z0, a);
+ sp_2048_mask_32(r + 64, a1, 0 - u);
+ u += sp_2048_dbl_32(r + 64, r + 64);
+ u += sp_4096_sub_in_place_64(z1, z2);
+ u += sp_4096_sub_in_place_64(z1, z0);
+ u += sp_4096_add_64(r + 32, r + 32, z1);
+ u += sp_4096_add_32(r + 64, r + 64, z2);
+ sp_4096_add_zero_32(r + 96, z2 + 32, u);
+
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add x11, %[a], 512\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldp x3, x4, [%[a]], #16\n\t"
+ "ldp x5, x6, [%[a]], #16\n\t"
+ "ldp x7, x8, [%[b]], #16\n\t"
+ "adcs x3, x3, x7\n\t"
+ "ldp x9, x10, [%[b]], #16\n\t"
+ "adcs x4, x4, x8\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r]], #16\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r]], #16\n\t"
+ "cset %[c], cs\n\t"
+ "cmp %[a], x11\n\t"
+ "b.ne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_4096_sub_in_place_64(sp_digit* a, const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add x10, %[a], 512\n\t"
+ "\n1:\n\t"
+ "subs %[c], xzr, %[c]\n\t"
+ "ldp x2, x3, [%[a]]\n\t"
+ "ldp x4, x5, [%[a], #16]\n\t"
+ "ldp x6, x7, [%[b]], #16\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "ldp x8, x9, [%[b]], #16\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a]], #16\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a]], #16\n\t"
+ "csetm %[c], cc\n\t"
+ "cmp %[a], x10\n\t"
+ "b.ne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_digit tmp[128];
+
+ __asm__ __volatile__ (
+ "mov x5, 0\n\t"
+ "mov x6, 0\n\t"
+ "mov x7, 0\n\t"
+ "mov x8, 0\n\t"
+ "\n1:\n\t"
+ "subs x3, x5, 504\n\t"
+ "csel x3, xzr, x3, cc\n\t"
+ "sub x4, x5, x3\n\t"
+ "\n2:\n\t"
+ "ldr x10, [%[a], x3]\n\t"
+ "ldr x11, [%[b], x4]\n\t"
+ "mul x9, x10, x11\n\t"
+ "umulh x10, x10, x11\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "add x3, x3, #8\n\t"
+ "sub x4, x4, #8\n\t"
+ "cmp x3, 512\n\t"
+ "b.eq 3f\n\t"
+ "cmp x3, x5\n\t"
+ "b.le 2b\n\t"
+ "\n3:\n\t"
+ "str x6, [%[r], x5]\n\t"
+ "mov x6, x7\n\t"
+ "mov x7, x8\n\t"
+ "mov x8, #0\n\t"
+ "add x5, x5, #8\n\t"
+ "cmp x5, 1008\n\t"
+ "b.le 1b\n\t"
+ "str x6, [%[r], x5]\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a)
+{
+ sp_digit tmp[128];
+
+ __asm__ __volatile__ (
+ "mov x6, 0\n\t"
+ "mov x7, 0\n\t"
+ "mov x8, 0\n\t"
+ "mov x5, 0\n\t"
+ "\n1:\n\t"
+ "subs x3, x5, 504\n\t"
+ "csel x3, xzr, x3, cc\n\t"
+ "sub x4, x5, x3\n\t"
+ "\n2:\n\t"
+ "cmp x4, x3\n\t"
+ "b.eq 4f\n\t"
+ "ldr x10, [%[a], x3]\n\t"
+ "ldr x11, [%[a], x4]\n\t"
+ "mul x9, x10, x11\n\t"
+ "umulh x10, x10, x11\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "b.al 5f\n\t"
+ "\n4:\n\t"
+ "ldr x10, [%[a], x3]\n\t"
+ "mul x9, x10, x10\n\t"
+ "umulh x10, x10, x10\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "\n5:\n\t"
+ "add x3, x3, #8\n\t"
+ "sub x4, x4, #8\n\t"
+ "cmp x3, 512\n\t"
+ "b.eq 3f\n\t"
+ "cmp x3, x4\n\t"
+ "b.gt 3f\n\t"
+ "cmp x3, x5\n\t"
+ "b.le 2b\n\t"
+ "\n3:\n\t"
+ "str x6, [%[r], x5]\n\t"
+ "mov x6, x7\n\t"
+ "mov x7, x8\n\t"
+ "mov x8, #0\n\t"
+ "add x5, x5, #8\n\t"
+ "cmp x5, 1008\n\t"
+ "b.le 1b\n\t"
+ "str x6, [%[r], x5]\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**64 */
+
+ /* rho = -1/m mod b */
+ *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+static void sp_4096_mul_d_64(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "# A[0] * B\n\t"
+ "ldr x8, [%[a]]\n\t"
+ "mul x5, %[b], x8\n\t"
+ "umulh x3, %[b], x8\n\t"
+ "mov x4, 0\n\t"
+ "str x5, [%[r]]\n\t"
+ "mov x5, 0\n\t"
+ "mov x9, #8\n\t"
+ "1:\n\t"
+ "ldr x8, [%[a], x9]\n\t"
+ "mul x6, %[b], x8\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adds x3, x3, x6\n\t"
+ "adcs x4, x4, x7\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "str x3, [%[r], x9]\n\t"
+ "mov x3, x4\n\t"
+ "mov x4, x5\n\t"
+ "mov x5, #0\n\t"
+ "add x9, x9, #8\n\t"
+ "cmp x9, 512\n\t"
+ "b.lt 1b\n\t"
+ "str x3, [%[r], 512]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+#else
+ __asm__ __volatile__ (
+ "# A[0] * B\n\t"
+ "ldp x8, x9, [%[a]]\n\t"
+ "mul x3, %[b], x8\n\t"
+ "umulh x4, %[b], x8\n\t"
+ "mov x5, 0\n\t"
+ "# A[1] * B\n\t"
+ "str x3, [%[r]]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[2] * B\n\t"
+ "ldp x8, x9, [%[a], 16]\n\t"
+ "str x4, [%[r], 8]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[3] * B\n\t"
+ "str x5, [%[r], 16]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[4] * B\n\t"
+ "ldp x8, x9, [%[a], 32]\n\t"
+ "str x3, [%[r], 24]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[5] * B\n\t"
+ "str x4, [%[r], 32]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[6] * B\n\t"
+ "ldp x8, x9, [%[a], 48]\n\t"
+ "str x5, [%[r], 40]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[7] * B\n\t"
+ "str x3, [%[r], 48]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[8] * B\n\t"
+ "ldp x8, x9, [%[a], 64]\n\t"
+ "str x4, [%[r], 56]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[9] * B\n\t"
+ "str x5, [%[r], 64]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[10] * B\n\t"
+ "ldp x8, x9, [%[a], 80]\n\t"
+ "str x3, [%[r], 72]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[11] * B\n\t"
+ "str x4, [%[r], 80]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[12] * B\n\t"
+ "ldp x8, x9, [%[a], 96]\n\t"
+ "str x5, [%[r], 88]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[13] * B\n\t"
+ "str x3, [%[r], 96]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[14] * B\n\t"
+ "ldp x8, x9, [%[a], 112]\n\t"
+ "str x4, [%[r], 104]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[15] * B\n\t"
+ "str x5, [%[r], 112]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[16] * B\n\t"
+ "ldp x8, x9, [%[a], 128]\n\t"
+ "str x3, [%[r], 120]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[17] * B\n\t"
+ "str x4, [%[r], 128]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[18] * B\n\t"
+ "ldp x8, x9, [%[a], 144]\n\t"
+ "str x5, [%[r], 136]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[19] * B\n\t"
+ "str x3, [%[r], 144]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[20] * B\n\t"
+ "ldp x8, x9, [%[a], 160]\n\t"
+ "str x4, [%[r], 152]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[21] * B\n\t"
+ "str x5, [%[r], 160]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[22] * B\n\t"
+ "ldp x8, x9, [%[a], 176]\n\t"
+ "str x3, [%[r], 168]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[23] * B\n\t"
+ "str x4, [%[r], 176]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[24] * B\n\t"
+ "ldp x8, x9, [%[a], 192]\n\t"
+ "str x5, [%[r], 184]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[25] * B\n\t"
+ "str x3, [%[r], 192]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[26] * B\n\t"
+ "ldp x8, x9, [%[a], 208]\n\t"
+ "str x4, [%[r], 200]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[27] * B\n\t"
+ "str x5, [%[r], 208]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[28] * B\n\t"
+ "ldp x8, x9, [%[a], 224]\n\t"
+ "str x3, [%[r], 216]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[29] * B\n\t"
+ "str x4, [%[r], 224]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[30] * B\n\t"
+ "ldp x8, x9, [%[a], 240]\n\t"
+ "str x5, [%[r], 232]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[31] * B\n\t"
+ "str x3, [%[r], 240]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[32] * B\n\t"
+ "ldp x8, x9, [%[a], 256]\n\t"
+ "str x4, [%[r], 248]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[33] * B\n\t"
+ "str x5, [%[r], 256]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[34] * B\n\t"
+ "ldp x8, x9, [%[a], 272]\n\t"
+ "str x3, [%[r], 264]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[35] * B\n\t"
+ "str x4, [%[r], 272]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[36] * B\n\t"
+ "ldp x8, x9, [%[a], 288]\n\t"
+ "str x5, [%[r], 280]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[37] * B\n\t"
+ "str x3, [%[r], 288]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[38] * B\n\t"
+ "ldp x8, x9, [%[a], 304]\n\t"
+ "str x4, [%[r], 296]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[39] * B\n\t"
+ "str x5, [%[r], 304]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[40] * B\n\t"
+ "ldp x8, x9, [%[a], 320]\n\t"
+ "str x3, [%[r], 312]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[41] * B\n\t"
+ "str x4, [%[r], 320]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[42] * B\n\t"
+ "ldp x8, x9, [%[a], 336]\n\t"
+ "str x5, [%[r], 328]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[43] * B\n\t"
+ "str x3, [%[r], 336]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[44] * B\n\t"
+ "ldp x8, x9, [%[a], 352]\n\t"
+ "str x4, [%[r], 344]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[45] * B\n\t"
+ "str x5, [%[r], 352]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[46] * B\n\t"
+ "ldp x8, x9, [%[a], 368]\n\t"
+ "str x3, [%[r], 360]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[47] * B\n\t"
+ "str x4, [%[r], 368]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[48] * B\n\t"
+ "ldp x8, x9, [%[a], 384]\n\t"
+ "str x5, [%[r], 376]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[49] * B\n\t"
+ "str x3, [%[r], 384]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[50] * B\n\t"
+ "ldp x8, x9, [%[a], 400]\n\t"
+ "str x4, [%[r], 392]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[51] * B\n\t"
+ "str x5, [%[r], 400]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[52] * B\n\t"
+ "ldp x8, x9, [%[a], 416]\n\t"
+ "str x3, [%[r], 408]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[53] * B\n\t"
+ "str x4, [%[r], 416]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[54] * B\n\t"
+ "ldp x8, x9, [%[a], 432]\n\t"
+ "str x5, [%[r], 424]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[55] * B\n\t"
+ "str x3, [%[r], 432]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[56] * B\n\t"
+ "ldp x8, x9, [%[a], 448]\n\t"
+ "str x4, [%[r], 440]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[57] * B\n\t"
+ "str x5, [%[r], 448]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[58] * B\n\t"
+ "ldp x8, x9, [%[a], 464]\n\t"
+ "str x3, [%[r], 456]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[59] * B\n\t"
+ "str x4, [%[r], 464]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[60] * B\n\t"
+ "ldp x8, x9, [%[a], 480]\n\t"
+ "str x5, [%[r], 472]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[61] * B\n\t"
+ "str x3, [%[r], 480]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[62] * B\n\t"
+ "ldp x8, x9, [%[a], 496]\n\t"
+ "str x4, [%[r], 488]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[63] * B\n\t"
+ "str x5, [%[r], 496]\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "adc x4, x4, x7\n\t"
+ "stp x3, x4, [%[r], 504]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+#endif
+}
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 4096 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_4096_mont_norm_64(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 64);
+
+ /* r = 2^n mod m */
+ sp_4096_sub_in_place_64(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static sp_digit sp_4096_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov x8, #0\n\t"
+ "1:\n\t"
+ "subs %[c], xzr, %[c]\n\t"
+ "ldr x4, [%[a], x8]\n\t"
+ "ldr x5, [%[b], x8]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "csetm %[c], cc\n\t"
+ "str x4, [%[r], x8]\n\t"
+ "add x8, x8, #8\n\t"
+ "cmp x8, 512\n\t"
+ "b.lt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+
+ return c;
+#else
+ __asm__ __volatile__ (
+
+ "ldp x5, x7, [%[b], 0]\n\t"
+ "ldp x11, x12, [%[b], 16]\n\t"
+ "ldp x4, x6, [%[a], 0]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 16]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "subs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 0]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 16]\n\t"
+ "ldp x5, x7, [%[b], 32]\n\t"
+ "ldp x11, x12, [%[b], 48]\n\t"
+ "ldp x4, x6, [%[a], 32]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 48]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 32]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 48]\n\t"
+ "ldp x5, x7, [%[b], 64]\n\t"
+ "ldp x11, x12, [%[b], 80]\n\t"
+ "ldp x4, x6, [%[a], 64]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 80]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 64]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 80]\n\t"
+ "ldp x5, x7, [%[b], 96]\n\t"
+ "ldp x11, x12, [%[b], 112]\n\t"
+ "ldp x4, x6, [%[a], 96]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 112]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 96]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 112]\n\t"
+ "ldp x5, x7, [%[b], 128]\n\t"
+ "ldp x11, x12, [%[b], 144]\n\t"
+ "ldp x4, x6, [%[a], 128]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 144]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 128]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 144]\n\t"
+ "ldp x5, x7, [%[b], 160]\n\t"
+ "ldp x11, x12, [%[b], 176]\n\t"
+ "ldp x4, x6, [%[a], 160]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 176]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 160]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 176]\n\t"
+ "ldp x5, x7, [%[b], 192]\n\t"
+ "ldp x11, x12, [%[b], 208]\n\t"
+ "ldp x4, x6, [%[a], 192]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 208]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 192]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 208]\n\t"
+ "ldp x5, x7, [%[b], 224]\n\t"
+ "ldp x11, x12, [%[b], 240]\n\t"
+ "ldp x4, x6, [%[a], 224]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 240]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 224]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 240]\n\t"
+ "ldp x5, x7, [%[b], 256]\n\t"
+ "ldp x11, x12, [%[b], 272]\n\t"
+ "ldp x4, x6, [%[a], 256]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 272]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 256]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 272]\n\t"
+ "ldp x5, x7, [%[b], 288]\n\t"
+ "ldp x11, x12, [%[b], 304]\n\t"
+ "ldp x4, x6, [%[a], 288]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 304]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 288]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 304]\n\t"
+ "ldp x5, x7, [%[b], 320]\n\t"
+ "ldp x11, x12, [%[b], 336]\n\t"
+ "ldp x4, x6, [%[a], 320]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 336]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 320]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 336]\n\t"
+ "ldp x5, x7, [%[b], 352]\n\t"
+ "ldp x11, x12, [%[b], 368]\n\t"
+ "ldp x4, x6, [%[a], 352]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 368]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 352]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 368]\n\t"
+ "ldp x5, x7, [%[b], 384]\n\t"
+ "ldp x11, x12, [%[b], 400]\n\t"
+ "ldp x4, x6, [%[a], 384]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 400]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 384]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 400]\n\t"
+ "ldp x5, x7, [%[b], 416]\n\t"
+ "ldp x11, x12, [%[b], 432]\n\t"
+ "ldp x4, x6, [%[a], 416]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 432]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 416]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 432]\n\t"
+ "ldp x5, x7, [%[b], 448]\n\t"
+ "ldp x11, x12, [%[b], 464]\n\t"
+ "ldp x4, x6, [%[a], 448]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 464]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 448]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 464]\n\t"
+ "ldp x5, x7, [%[b], 480]\n\t"
+ "ldp x11, x12, [%[b], 496]\n\t"
+ "ldp x4, x6, [%[a], 480]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 496]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 480]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 496]\n\t"
+ "csetm %[r], cc\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+
+ return (sp_digit)r;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_4096_mont_reduce_64(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "ldp x14, x15, [%[m], 0]\n\t"
+ "ldp x16, x17, [%[m], 16]\n\t"
+ "ldp x19, x20, [%[m], 32]\n\t"
+ "ldp x21, x22, [%[m], 48]\n\t"
+ "ldp x23, x24, [%[m], 64]\n\t"
+ "ldp x25, x26, [%[m], 80]\n\t"
+ "ldp x27, x28, [%[m], 96]\n\t"
+ "# i = 64\n\t"
+ "mov x4, 64\n\t"
+ "ldp x12, x13, [%[a], 0]\n\t"
+ "\n1:\n\t"
+ "# mu = a[i] * mp\n\t"
+ "mul x9, %[mp], x12\n\t"
+ "# a[i+0] += m[0] * mu\n\t"
+ "mul x7, x14, x9\n\t"
+ "umulh x8, x14, x9\n\t"
+ "adds x12, x12, x7\n\t"
+ "# a[i+1] += m[1] * mu\n\t"
+ "mul x7, x15, x9\n\t"
+ "adc x6, x8, xzr\n\t"
+ "umulh x8, x15, x9\n\t"
+ "adds x12, x13, x7\n\t"
+ "# a[i+2] += m[2] * mu\n\t"
+ "ldr x13, [%[a], 16]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "mul x7, x16, x9\n\t"
+ "adds x12, x12, x6\n\t"
+ "umulh x8, x16, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x13, x13, x7\n\t"
+ "# a[i+3] += m[3] * mu\n\t"
+ "ldr x10, [%[a], 24]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "mul x7, x17, x9\n\t"
+ "adds x13, x13, x5\n\t"
+ "umulh x8, x17, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+4] += m[4] * mu\n\t"
+ "ldr x11, [%[a], 32]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x19, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x19, x9\n\t"
+ "str x10, [%[a], 24]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+5] += m[5] * mu\n\t"
+ "ldr x10, [%[a], 40]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x20, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x20, x9\n\t"
+ "str x11, [%[a], 32]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+6] += m[6] * mu\n\t"
+ "ldr x11, [%[a], 48]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x21, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x21, x9\n\t"
+ "str x10, [%[a], 40]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+7] += m[7] * mu\n\t"
+ "ldr x10, [%[a], 56]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x22, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x22, x9\n\t"
+ "str x11, [%[a], 48]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+8] += m[8] * mu\n\t"
+ "ldr x11, [%[a], 64]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x23, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x23, x9\n\t"
+ "str x10, [%[a], 56]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+9] += m[9] * mu\n\t"
+ "ldr x10, [%[a], 72]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x24, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x24, x9\n\t"
+ "str x11, [%[a], 64]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+10] += m[10] * mu\n\t"
+ "ldr x11, [%[a], 80]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x25, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x25, x9\n\t"
+ "str x10, [%[a], 72]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+11] += m[11] * mu\n\t"
+ "ldr x10, [%[a], 88]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x26, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x26, x9\n\t"
+ "str x11, [%[a], 80]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+12] += m[12] * mu\n\t"
+ "ldr x11, [%[a], 96]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x27, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x27, x9\n\t"
+ "str x10, [%[a], 88]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+13] += m[13] * mu\n\t"
+ "ldr x10, [%[a], 104]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x28, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x28, x9\n\t"
+ "str x11, [%[a], 96]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+14] += m[14] * mu\n\t"
+ "ldr x11, [%[a], 112]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 112]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 104]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+15] += m[15] * mu\n\t"
+ "ldr x10, [%[a], 120]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 120]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 112]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+16] += m[16] * mu\n\t"
+ "ldr x11, [%[a], 128]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 128]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 120]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+17] += m[17] * mu\n\t"
+ "ldr x10, [%[a], 136]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 136]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 128]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+18] += m[18] * mu\n\t"
+ "ldr x11, [%[a], 144]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 144]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 136]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+19] += m[19] * mu\n\t"
+ "ldr x10, [%[a], 152]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 152]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 144]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+20] += m[20] * mu\n\t"
+ "ldr x11, [%[a], 160]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 160]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 152]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+21] += m[21] * mu\n\t"
+ "ldr x10, [%[a], 168]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 168]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 160]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+22] += m[22] * mu\n\t"
+ "ldr x11, [%[a], 176]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 176]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 168]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+23] += m[23] * mu\n\t"
+ "ldr x10, [%[a], 184]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 184]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 176]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+24] += m[24] * mu\n\t"
+ "ldr x11, [%[a], 192]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 192]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 184]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+25] += m[25] * mu\n\t"
+ "ldr x10, [%[a], 200]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 200]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 192]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+26] += m[26] * mu\n\t"
+ "ldr x11, [%[a], 208]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 208]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 200]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+27] += m[27] * mu\n\t"
+ "ldr x10, [%[a], 216]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 216]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 208]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+28] += m[28] * mu\n\t"
+ "ldr x11, [%[a], 224]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 224]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 216]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+29] += m[29] * mu\n\t"
+ "ldr x10, [%[a], 232]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 232]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 224]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+30] += m[30] * mu\n\t"
+ "ldr x11, [%[a], 240]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 240]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 232]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+31] += m[31] * mu\n\t"
+ "ldr x10, [%[a], 248]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 248]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 240]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+32] += m[32] * mu\n\t"
+ "ldr x11, [%[a], 256]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 256]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 248]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+33] += m[33] * mu\n\t"
+ "ldr x10, [%[a], 264]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 264]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 256]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+34] += m[34] * mu\n\t"
+ "ldr x11, [%[a], 272]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 272]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 264]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+35] += m[35] * mu\n\t"
+ "ldr x10, [%[a], 280]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 280]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 272]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+36] += m[36] * mu\n\t"
+ "ldr x11, [%[a], 288]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 288]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 280]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+37] += m[37] * mu\n\t"
+ "ldr x10, [%[a], 296]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 296]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 288]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+38] += m[38] * mu\n\t"
+ "ldr x11, [%[a], 304]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 304]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 296]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+39] += m[39] * mu\n\t"
+ "ldr x10, [%[a], 312]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 312]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 304]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+40] += m[40] * mu\n\t"
+ "ldr x11, [%[a], 320]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 320]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 312]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+41] += m[41] * mu\n\t"
+ "ldr x10, [%[a], 328]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 328]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 320]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+42] += m[42] * mu\n\t"
+ "ldr x11, [%[a], 336]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 336]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 328]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+43] += m[43] * mu\n\t"
+ "ldr x10, [%[a], 344]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 344]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 336]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+44] += m[44] * mu\n\t"
+ "ldr x11, [%[a], 352]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 352]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 344]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+45] += m[45] * mu\n\t"
+ "ldr x10, [%[a], 360]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 360]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 352]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+46] += m[46] * mu\n\t"
+ "ldr x11, [%[a], 368]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 368]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 360]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+47] += m[47] * mu\n\t"
+ "ldr x10, [%[a], 376]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 376]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 368]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+48] += m[48] * mu\n\t"
+ "ldr x11, [%[a], 384]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 384]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 376]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+49] += m[49] * mu\n\t"
+ "ldr x10, [%[a], 392]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 392]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 384]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+50] += m[50] * mu\n\t"
+ "ldr x11, [%[a], 400]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 400]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 392]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+51] += m[51] * mu\n\t"
+ "ldr x10, [%[a], 408]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 408]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 400]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+52] += m[52] * mu\n\t"
+ "ldr x11, [%[a], 416]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 416]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 408]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+53] += m[53] * mu\n\t"
+ "ldr x10, [%[a], 424]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 424]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 416]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+54] += m[54] * mu\n\t"
+ "ldr x11, [%[a], 432]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 432]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 424]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+55] += m[55] * mu\n\t"
+ "ldr x10, [%[a], 440]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 440]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 432]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+56] += m[56] * mu\n\t"
+ "ldr x11, [%[a], 448]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 448]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 440]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+57] += m[57] * mu\n\t"
+ "ldr x10, [%[a], 456]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 456]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 448]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+58] += m[58] * mu\n\t"
+ "ldr x11, [%[a], 464]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 464]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 456]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+59] += m[59] * mu\n\t"
+ "ldr x10, [%[a], 472]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 472]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 464]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+60] += m[60] * mu\n\t"
+ "ldr x11, [%[a], 480]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 480]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 472]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+61] += m[61] * mu\n\t"
+ "ldr x10, [%[a], 488]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 488]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x11, [%[a], 480]\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+62] += m[62] * mu\n\t"
+ "ldr x11, [%[a], 496]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "ldr x8, [%[m], 496]\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "str x10, [%[a], 488]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+63] += m[63] * mu\n\t"
+ "ldr x10, [%[a], 504]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "ldr x8, [%[m], 504]\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x8, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x8, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "adcs x8, x8, %[ca]\n\t"
+ "str x11, [%[a], 496]\n\t"
+ "cset %[ca], cs\n\t"
+ "adds x10, x10, x6\n\t"
+ "ldr x11, [%[a], 512]\n\t"
+ "str x10, [%[a], 504]\n\t"
+ "adcs x11, x11, x8\n\t"
+ "str x11, [%[a], 512]\n\t"
+ "adc %[ca], %[ca], xzr\n\t"
+ "subs x4, x4, 1\n\t"
+ "add %[a], %[a], 8\n\t"
+ "bne 1b\n\t"
+ "stp x12, x13, [%[a], 0]\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28"
+ );
+
+ sp_4096_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_4096_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_4096_mul_64(r, a, b);
+ sp_4096_mont_reduce_64(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_4096_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_4096_sqr_64(r, a);
+ sp_4096_mont_reduce_64(r, m, mp);
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ */
+static sp_digit div_4096_word_64(sp_digit d1, sp_digit d0, sp_digit div)
+{
+ sp_digit r;
+
+ __asm__ __volatile__ (
+ "lsr x5, %[div], 32\n\t"
+ "add x5, x5, 1\n\t"
+
+ "udiv x3, %[d1], x5\n\t"
+ "lsl x6, x3, 32\n\t"
+ "mul x4, %[div], x6\n\t"
+ "umulh x3, %[div], x6\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "udiv x3, %[d1], x5\n\t"
+ "lsl x3, x3, 32\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "umulh x3, %[div], x3\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "lsr x3, %[d0], 32\n\t"
+ "orr x3, x3, %[d1], lsl 32\n\t"
+
+ "udiv x3, x3, x5\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "umulh x3, %[div], x3\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "lsr x3, %[d0], 32\n\t"
+ "orr x3, x3, %[d1], lsl 32\n\t"
+
+ "udiv x3, x3, x5\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "sub %[d0], %[d0], x4\n\t"
+
+ "udiv x3, %[d0], %[div]\n\t"
+ "add %[r], x6, x3\n\t"
+
+ : [r] "=r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "x3", "x4", "x5", "x6"
+ );
+
+ return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<64; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 64; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static int64_t sp_4096_cmp_64(const sp_digit* a, const sp_digit* b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov x2, -1\n\t"
+ "mov x3, 1\n\t"
+ "mov x4, -1\n\t"
+ "mov x5, 504\n\t"
+ "1:\n\t"
+ "ldr x6, [%[a], x5]\n\t"
+ "ldr x7, [%[b], x5]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x6, x6, x7\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "subs x5, x5, #8\n\t"
+ "b.cs 1b\n\t"
+ "eor %[a], x2, x4\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "x2", "x3", "x4", "x5", "x6", "x7", "x8"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov x2, -1\n\t"
+ "mov x3, 1\n\t"
+ "mov x4, -1\n\t"
+ "ldp x5, x6, [%[a], 496]\n\t"
+ "ldp x7, x8, [%[b], 496]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 480]\n\t"
+ "ldp x7, x8, [%[b], 480]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 464]\n\t"
+ "ldp x7, x8, [%[b], 464]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 448]\n\t"
+ "ldp x7, x8, [%[b], 448]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 432]\n\t"
+ "ldp x7, x8, [%[b], 432]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 416]\n\t"
+ "ldp x7, x8, [%[b], 416]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 400]\n\t"
+ "ldp x7, x8, [%[b], 400]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 384]\n\t"
+ "ldp x7, x8, [%[b], 384]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 368]\n\t"
+ "ldp x7, x8, [%[b], 368]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 352]\n\t"
+ "ldp x7, x8, [%[b], 352]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 336]\n\t"
+ "ldp x7, x8, [%[b], 336]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 320]\n\t"
+ "ldp x7, x8, [%[b], 320]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 304]\n\t"
+ "ldp x7, x8, [%[b], 304]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 288]\n\t"
+ "ldp x7, x8, [%[b], 288]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 272]\n\t"
+ "ldp x7, x8, [%[b], 272]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 256]\n\t"
+ "ldp x7, x8, [%[b], 256]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 240]\n\t"
+ "ldp x7, x8, [%[b], 240]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 224]\n\t"
+ "ldp x7, x8, [%[b], 224]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 208]\n\t"
+ "ldp x7, x8, [%[b], 208]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 192]\n\t"
+ "ldp x7, x8, [%[b], 192]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 176]\n\t"
+ "ldp x7, x8, [%[b], 176]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 160]\n\t"
+ "ldp x7, x8, [%[b], 160]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 144]\n\t"
+ "ldp x7, x8, [%[b], 144]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 128]\n\t"
+ "ldp x7, x8, [%[b], 128]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 112]\n\t"
+ "ldp x7, x8, [%[b], 112]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 96]\n\t"
+ "ldp x7, x8, [%[b], 96]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 80]\n\t"
+ "ldp x7, x8, [%[b], 80]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 64]\n\t"
+ "ldp x7, x8, [%[b], 64]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "ldp x7, x8, [%[b], 48]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 32]\n\t"
+ "ldp x7, x8, [%[b], 32]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "ldp x7, x8, [%[b], 16]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "ldp x5, x6, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[b], 0]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x8, x8, x4\n\t"
+ "subs x6, x6, x8\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x5, x5, x7\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "eor %[a], x2, x4\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "x2", "x3", "x4", "x5", "x6", "x7", "x8"
+ );
+#endif
+
+ return (int64_t)a;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[128], t2[65];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[63];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
+ for (i=63; i>=0; i--) {
+ r1 = div_4096_word_64(t1[64 + i], t1[64 + i - 1], div);
+
+ sp_4096_mul_d_64(t2, d, r1);
+ t1[64 + i] += sp_4096_sub_in_place_64(&t1[i], t2);
+ t1[64 + i] -= t2[64];
+ sp_4096_mask_64(t2, d, t1[64 + i]);
+ t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], t2);
+ sp_4096_mask_64(t2, d, t1[64 + i]);
+ t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_4096_cmp_64(t1, d) >= 0;
+ sp_4096_cond_sub_64(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_4096_div_64(a, m, NULL, r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_4096_sub_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "add x11, %[a], 512\n\t"
+ "\n1:\n\t"
+ "subs %[c], xzr, %[c]\n\t"
+ "ldp x3, x4, [%[a]], #16\n\t"
+ "ldp x5, x6, [%[a]], #16\n\t"
+ "ldp x7, x8, [%[b]], #16\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x9, x10, [%[b]], #16\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r]], #16\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r]], #16\n\t"
+ "csetm %[c], cc\n\t"
+ "cmp %[a], x11\n\t"
+ "b.ne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_4096_sub_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[b], 0]\n\t"
+ "subs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 16]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldp x3, x4, [%[a], 32]\n\t"
+ "ldp x7, x8, [%[b], 32]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 48]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 48]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 32]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 48]\n\t"
+ "ldp x3, x4, [%[a], 64]\n\t"
+ "ldp x7, x8, [%[b], 64]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 80]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 80]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 64]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 80]\n\t"
+ "ldp x3, x4, [%[a], 96]\n\t"
+ "ldp x7, x8, [%[b], 96]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 112]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 112]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 96]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 112]\n\t"
+ "ldp x3, x4, [%[a], 128]\n\t"
+ "ldp x7, x8, [%[b], 128]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 144]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 144]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 128]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 144]\n\t"
+ "ldp x3, x4, [%[a], 160]\n\t"
+ "ldp x7, x8, [%[b], 160]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 176]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 176]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 160]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 176]\n\t"
+ "ldp x3, x4, [%[a], 192]\n\t"
+ "ldp x7, x8, [%[b], 192]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 208]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 208]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 192]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 208]\n\t"
+ "ldp x3, x4, [%[a], 224]\n\t"
+ "ldp x7, x8, [%[b], 224]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 240]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 240]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 224]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 240]\n\t"
+ "ldp x3, x4, [%[a], 256]\n\t"
+ "ldp x7, x8, [%[b], 256]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 272]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 272]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 256]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 272]\n\t"
+ "ldp x3, x4, [%[a], 288]\n\t"
+ "ldp x7, x8, [%[b], 288]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 304]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 304]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 288]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 304]\n\t"
+ "ldp x3, x4, [%[a], 320]\n\t"
+ "ldp x7, x8, [%[b], 320]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 336]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 336]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 320]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 336]\n\t"
+ "ldp x3, x4, [%[a], 352]\n\t"
+ "ldp x7, x8, [%[b], 352]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 368]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 368]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 352]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 368]\n\t"
+ "ldp x3, x4, [%[a], 384]\n\t"
+ "ldp x7, x8, [%[b], 384]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 400]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 400]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 384]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 400]\n\t"
+ "ldp x3, x4, [%[a], 416]\n\t"
+ "ldp x7, x8, [%[b], 416]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 432]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 432]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 416]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 432]\n\t"
+ "ldp x3, x4, [%[a], 448]\n\t"
+ "ldp x7, x8, [%[b], 448]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 464]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 464]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 448]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 464]\n\t"
+ "ldp x3, x4, [%[a], 480]\n\t"
+ "ldp x7, x8, [%[b], 480]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 496]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 496]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 480]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 496]\n\t"
+ "csetm %[r], cc\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+ return (sp_digit)r;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[128], t2[65];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[63];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
+ for (i=63; i>=0; i--) {
+ r1 = div_4096_word_64(t1[64 + i], t1[64 + i - 1], div);
+
+ sp_4096_mul_d_64(t2, d, r1);
+ t1[64 + i] += sp_4096_sub_in_place_64(&t1[i], t2);
+ t1[64 + i] -= t2[64];
+ if (t1[64 + i] != 0) {
+ t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], d);
+ if (t1[64 + i] != 0)
+ t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], d);
+ }
+ }
+
+ for (i = 63; i > 0; i--) {
+ if (t1[i] != d[i])
+ break;
+ }
+ if (t1[i] >= d[i]) {
+ sp_4096_sub_64(r, t1, d);
+ }
+ else {
+ XMEMCPY(r, t1, sizeof(*t1) * 64);
+ }
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_4096_div_64_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+ defined(WOLFSSL_HAVE_SP_DH)
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[16][128];
+#else
+ sp_digit* t[16];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 128, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<16; i++) {
+ t[i] = td + i * 128;
+ }
+#endif
+ norm = t[0];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_64(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
+ if (reduceA != 0) {
+ err = sp_4096_mod_64(t[1] + 64, a, m);
+ if (err == MP_OKAY) {
+ err = sp_4096_mod_64(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
+ err = sp_4096_mod_64(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_mont_sqr_64(t[ 2], t[ 1], m, mp);
+ sp_4096_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_4096_mont_sqr_64(t[ 4], t[ 2], m, mp);
+ sp_4096_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_4096_mont_sqr_64(t[ 6], t[ 3], m, mp);
+ sp_4096_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_4096_mont_sqr_64(t[ 8], t[ 4], m, mp);
+ sp_4096_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_4096_mont_sqr_64(t[10], t[ 5], m, mp);
+ sp_4096_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
+ sp_4096_mont_sqr_64(t[12], t[ 6], m, mp);
+ sp_4096_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
+ sp_4096_mont_sqr_64(t[14], t[ 7], m, mp);
+ sp_4096_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ c -= bits % 4;
+ if (c == 64) {
+ c = 60;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
+ for (; i>=0 || c>=4; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 60;
+ n <<= 4;
+ c = 60;
+ }
+ else if (c < 4) {
+ y = n >> 60;
+ n = e[i--];
+ c = 4 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+ else {
+ y = (n >> 60) & 0xf;
+ n <<= 4;
+ c -= 4;
+ }
+
+ sp_4096_mont_sqr_64(r, r, m, mp);
+ sp_4096_mont_sqr_64(r, r, m, mp);
+ sp_4096_mont_sqr_64(r, r, m, mp);
+ sp_4096_mont_sqr_64(r, r, m, mp);
+
+ sp_4096_mont_mul_64(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+ sp_4096_mont_reduce_64(r, m, mp);
+
+ mask = 0 - (sp_4096_cmp_64(r, m) >= 0);
+ sp_4096_cond_sub_64(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][128];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 128, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++) {
+ t[i] = td + i * 128;
+ }
+#endif
+ norm = t[0];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_64(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
+ if (reduceA != 0) {
+ err = sp_4096_mod_64(t[1] + 64, a, m);
+ if (err == MP_OKAY) {
+ err = sp_4096_mod_64(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
+ err = sp_4096_mod_64(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_mont_sqr_64(t[ 2], t[ 1], m, mp);
+ sp_4096_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_4096_mont_sqr_64(t[ 4], t[ 2], m, mp);
+ sp_4096_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_4096_mont_sqr_64(t[ 6], t[ 3], m, mp);
+ sp_4096_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_4096_mont_sqr_64(t[ 8], t[ 4], m, mp);
+ sp_4096_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_4096_mont_sqr_64(t[10], t[ 5], m, mp);
+ sp_4096_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
+ sp_4096_mont_sqr_64(t[12], t[ 6], m, mp);
+ sp_4096_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
+ sp_4096_mont_sqr_64(t[14], t[ 7], m, mp);
+ sp_4096_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
+ sp_4096_mont_sqr_64(t[16], t[ 8], m, mp);
+ sp_4096_mont_mul_64(t[17], t[ 9], t[ 8], m, mp);
+ sp_4096_mont_sqr_64(t[18], t[ 9], m, mp);
+ sp_4096_mont_mul_64(t[19], t[10], t[ 9], m, mp);
+ sp_4096_mont_sqr_64(t[20], t[10], m, mp);
+ sp_4096_mont_mul_64(t[21], t[11], t[10], m, mp);
+ sp_4096_mont_sqr_64(t[22], t[11], m, mp);
+ sp_4096_mont_mul_64(t[23], t[12], t[11], m, mp);
+ sp_4096_mont_sqr_64(t[24], t[12], m, mp);
+ sp_4096_mont_mul_64(t[25], t[13], t[12], m, mp);
+ sp_4096_mont_sqr_64(t[26], t[13], m, mp);
+ sp_4096_mont_mul_64(t[27], t[14], t[13], m, mp);
+ sp_4096_mont_sqr_64(t[28], t[14], m, mp);
+ sp_4096_mont_mul_64(t[29], t[15], t[14], m, mp);
+ sp_4096_mont_sqr_64(t[30], t[15], m, mp);
+ sp_4096_mont_mul_64(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ c -= bits % 5;
+ if (c == 64) {
+ c = 59;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 59;
+ n <<= 5;
+ c = 59;
+ }
+ else if (c < 5) {
+ y = n >> 59;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+ else {
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_4096_mont_sqr_64(r, r, m, mp);
+ sp_4096_mont_sqr_64(r, r, m, mp);
+ sp_4096_mont_sqr_64(r, r, m, mp);
+ sp_4096_mont_sqr_64(r, r, m, mp);
+ sp_4096_mont_sqr_64(r, r, m, mp);
+
+ sp_4096_mont_mul_64(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+ sp_4096_mont_reduce_64(r, m, mp);
+
+ mask = 0 - (sp_4096_cmp_64(r, m) >= 0);
+ sp_4096_cond_sub_64(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[128], m[64], r[128];
+#else
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+#endif
+ sp_digit *ah;
+ sp_digit e[1];
+ int err = MP_OKAY;
+
+ if (*outLen < 512)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (mp_count_bits(em) > 64 || inLen > 512 ||
+ mp_count_bits(mm) != 4096))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 64 * 2;
+ m = r + 64 * 2;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ ah = a + 64;
+
+ sp_4096_from_bin(ah, 64, in, inLen);
+#if DIGIT_BIT >= 64
+ e[0] = em->dp[0];
+#else
+ e[0] = em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(m, 64, mm);
+
+ if (e[0] == 0x3) {
+ if (err == MP_OKAY) {
+ sp_4096_sqr_64(r, ah);
+ err = sp_4096_mod_64_cond(r, r, m);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_mul_64(r, ah, r);
+ err = sp_4096_mod_64_cond(r, r, m);
+ }
+ }
+ else {
+ int i;
+ sp_digit mp;
+
+ sp_4096_mont_setup(m, &mp);
+
+ /* Convert to Montgomery form. */
+ XMEMSET(a, 0, sizeof(sp_digit) * 64);
+ err = sp_4096_mod_64_cond(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i = 63; i >= 0; i--) {
+ if (e[0] >> i) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 64);
+ for (i--; i>=0; i--) {
+ sp_4096_mont_sqr_64(r, r, m, mp);
+ if (((e[0] >> i) & 1) == 1) {
+ sp_4096_mont_mul_64(r, r, a, m, mp);
+ }
+ }
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
+ sp_4096_mont_reduce_64(r, m, mp);
+
+ for (i = 63; i > 0; i--) {
+ if (r[i] != m[i]) {
+ break;
+ }
+ }
+ if (r[i] >= m[i]) {
+ sp_4096_sub_in_place_64(r, m);
+ }
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#endif
+
+ return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+ sp_digit* a;
+ sp_digit* d = NULL;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 512U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 4096) {
+ err = MP_READ_E;
+ }
+ if (inLen > 512) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = d + 64;
+ m = a + 128;
+ r = a;
+
+ sp_4096_from_bin(a, 64, in, inLen);
+ sp_4096_from_mp(d, 64, dm);
+ sp_4096_from_mp(m, 64, mm);
+ err = sp_4096_mod_exp_64(r, a, d, 4096, m, 0);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 64);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static sp_digit sp_4096_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov x8, #0\n\t"
+ "1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldr x4, [%[a], x8]\n\t"
+ "ldr x5, [%[b], x8]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "adcs x4, x4, x5\n\t"
+ "cset %[c], cs\n\t"
+ "str x4, [%[r], x8]\n\t"
+ "add x8, x8, #8\n\t"
+ "cmp x8, 256\n\t"
+ "b.lt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+
+ return c;
+#else
+ __asm__ __volatile__ (
+
+ "ldp x5, x7, [%[b], 0]\n\t"
+ "ldp x11, x12, [%[b], 16]\n\t"
+ "ldp x4, x6, [%[a], 0]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 16]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "adds x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "adcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "adcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 0]\n\t"
+ "adcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 16]\n\t"
+ "ldp x5, x7, [%[b], 32]\n\t"
+ "ldp x11, x12, [%[b], 48]\n\t"
+ "ldp x4, x6, [%[a], 32]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 48]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "adcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "adcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "adcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 32]\n\t"
+ "adcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 48]\n\t"
+ "ldp x5, x7, [%[b], 64]\n\t"
+ "ldp x11, x12, [%[b], 80]\n\t"
+ "ldp x4, x6, [%[a], 64]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 80]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "adcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "adcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "adcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 64]\n\t"
+ "adcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 80]\n\t"
+ "ldp x5, x7, [%[b], 96]\n\t"
+ "ldp x11, x12, [%[b], 112]\n\t"
+ "ldp x4, x6, [%[a], 96]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 112]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "adcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "adcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "adcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 96]\n\t"
+ "adcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 112]\n\t"
+ "ldp x5, x7, [%[b], 128]\n\t"
+ "ldp x11, x12, [%[b], 144]\n\t"
+ "ldp x4, x6, [%[a], 128]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 144]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "adcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "adcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "adcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 128]\n\t"
+ "adcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 144]\n\t"
+ "ldp x5, x7, [%[b], 160]\n\t"
+ "ldp x11, x12, [%[b], 176]\n\t"
+ "ldp x4, x6, [%[a], 160]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 176]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "adcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "adcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "adcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 160]\n\t"
+ "adcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 176]\n\t"
+ "ldp x5, x7, [%[b], 192]\n\t"
+ "ldp x11, x12, [%[b], 208]\n\t"
+ "ldp x4, x6, [%[a], 192]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 208]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "adcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "adcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "adcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 192]\n\t"
+ "adcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 208]\n\t"
+ "ldp x5, x7, [%[b], 224]\n\t"
+ "ldp x11, x12, [%[b], 240]\n\t"
+ "ldp x4, x6, [%[a], 224]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 240]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "adcs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "adcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "adcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 224]\n\t"
+ "adcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 240]\n\t"
+ "cset %[r], cs\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+
+ return (sp_digit)r;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[64 * 2];
+ sp_digit p[32], q[32], dp[32];
+ sp_digit tmpa[64], tmpb[64];
+#else
+ sp_digit* t = NULL;
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+#endif
+ sp_digit* r;
+ sp_digit* qi;
+ sp_digit* dq;
+ sp_digit c;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 512)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (inLen > 512 || mp_count_bits(mm) != 4096))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL)
+ err = MEMORY_E;
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 64 * 2;
+ q = p + 32;
+ qi = dq = dp = q + 32;
+ tmpa = qi + 32;
+ tmpb = tmpa + 64;
+
+ r = t + 64;
+ }
+#else
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ r = a;
+ qi = dq = dp;
+#endif
+ sp_4096_from_bin(a, 64, in, inLen);
+ sp_4096_from_mp(p, 32, pm);
+ sp_4096_from_mp(q, 32, qm);
+ sp_4096_from_mp(dp, 32, dpm);
+
+ err = sp_2048_mod_exp_32(tmpa, a, dp, 2048, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(dq, 32, dqm);
+ err = sp_2048_mod_exp_32(tmpb, a, dq, 2048, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ c = sp_2048_sub_in_place_32(tmpa, tmpb);
+ c += sp_4096_cond_add_32(tmpa, tmpa, p, c);
+ sp_4096_cond_add_32(tmpa, tmpa, p, c);
+
+ sp_2048_from_mp(qi, 32, qim);
+ sp_2048_mul_32(tmpa, tmpa, qi);
+ err = sp_2048_mod_32(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mul_32(tmpa, q, tmpa);
+ XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32);
+ sp_4096_add_64(r, tmpb, tmpa);
+
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 32 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+#else
+ XMEMSET(tmpa, 0, sizeof(tmpa));
+ XMEMSET(tmpb, 0, sizeof(tmpb));
+ XMEMSET(p, 0, sizeof(p));
+ XMEMSET(q, 0, sizeof(q));
+ XMEMSET(dp, 0, sizeof(dp));
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_4096_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 64
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 64);
+ r->used = 64;
+ mp_clamp(r);
+#elif DIGIT_BIT < 64
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 64; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 64) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 64 - s;
+ }
+ r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 64; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 64 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 64 - s;
+ }
+ else {
+ s += 64;
+ }
+ }
+ r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[128], e[64], m[64];
+ sp_digit* r = b;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 4096) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(b, 64, base);
+ sp_4096_from_mp(e, 64, exp);
+ sp_4096_from_mp(m, 64, mod);
+
+ err = sp_4096_mod_exp_64(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_4096_to_mp(r, res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_4096
+static void sp_4096_lshift_64(sp_digit* r, sp_digit* a, byte n)
+{
+ __asm__ __volatile__ (
+ "mov x6, 63\n\t"
+ "sub x6, x6, %[n]\n\t"
+ "ldr x3, [%[a], 504]\n\t"
+ "lsr x4, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x4, x4, x6\n\t"
+ "ldr x2, [%[a], 496]\n\t"
+ "str x4, [%[r], 512]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 488]\n\t"
+ "str x3, [%[r], 504]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 480]\n\t"
+ "str x2, [%[r], 496]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 472]\n\t"
+ "str x4, [%[r], 488]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 464]\n\t"
+ "str x3, [%[r], 480]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 456]\n\t"
+ "str x2, [%[r], 472]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 448]\n\t"
+ "str x4, [%[r], 464]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 440]\n\t"
+ "str x3, [%[r], 456]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 432]\n\t"
+ "str x2, [%[r], 448]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 424]\n\t"
+ "str x4, [%[r], 440]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 416]\n\t"
+ "str x3, [%[r], 432]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 408]\n\t"
+ "str x2, [%[r], 424]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 400]\n\t"
+ "str x4, [%[r], 416]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 392]\n\t"
+ "str x3, [%[r], 408]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 384]\n\t"
+ "str x2, [%[r], 400]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 376]\n\t"
+ "str x4, [%[r], 392]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 368]\n\t"
+ "str x3, [%[r], 384]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 360]\n\t"
+ "str x2, [%[r], 376]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 352]\n\t"
+ "str x4, [%[r], 368]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 344]\n\t"
+ "str x3, [%[r], 360]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 336]\n\t"
+ "str x2, [%[r], 352]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 328]\n\t"
+ "str x4, [%[r], 344]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 320]\n\t"
+ "str x3, [%[r], 336]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 312]\n\t"
+ "str x2, [%[r], 328]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 304]\n\t"
+ "str x4, [%[r], 320]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 296]\n\t"
+ "str x3, [%[r], 312]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 288]\n\t"
+ "str x2, [%[r], 304]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 280]\n\t"
+ "str x4, [%[r], 296]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 272]\n\t"
+ "str x3, [%[r], 288]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 264]\n\t"
+ "str x2, [%[r], 280]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 256]\n\t"
+ "str x4, [%[r], 272]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 248]\n\t"
+ "str x3, [%[r], 264]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 240]\n\t"
+ "str x2, [%[r], 256]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 232]\n\t"
+ "str x4, [%[r], 248]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 224]\n\t"
+ "str x3, [%[r], 240]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 216]\n\t"
+ "str x2, [%[r], 232]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 208]\n\t"
+ "str x4, [%[r], 224]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 200]\n\t"
+ "str x3, [%[r], 216]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 192]\n\t"
+ "str x2, [%[r], 208]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 184]\n\t"
+ "str x4, [%[r], 200]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 176]\n\t"
+ "str x3, [%[r], 192]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 168]\n\t"
+ "str x2, [%[r], 184]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 160]\n\t"
+ "str x4, [%[r], 176]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 152]\n\t"
+ "str x3, [%[r], 168]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 144]\n\t"
+ "str x2, [%[r], 160]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 136]\n\t"
+ "str x4, [%[r], 152]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 128]\n\t"
+ "str x3, [%[r], 144]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 120]\n\t"
+ "str x2, [%[r], 136]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 112]\n\t"
+ "str x4, [%[r], 128]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 104]\n\t"
+ "str x3, [%[r], 120]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 96]\n\t"
+ "str x2, [%[r], 112]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 88]\n\t"
+ "str x4, [%[r], 104]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 80]\n\t"
+ "str x3, [%[r], 96]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 72]\n\t"
+ "str x2, [%[r], 88]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 64]\n\t"
+ "str x4, [%[r], 80]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 56]\n\t"
+ "str x3, [%[r], 72]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 48]\n\t"
+ "str x2, [%[r], 64]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 40]\n\t"
+ "str x4, [%[r], 56]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 32]\n\t"
+ "str x3, [%[r], 48]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 24]\n\t"
+ "str x2, [%[r], 40]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "ldr x2, [%[a], 16]\n\t"
+ "str x4, [%[r], 32]\n\t"
+ "lsr x5, x2, 1\n\t"
+ "lsl x2, x2, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x3, x3, x5\n\t"
+ "ldr x4, [%[a], 8]\n\t"
+ "str x3, [%[r], 24]\n\t"
+ "lsr x5, x4, 1\n\t"
+ "lsl x4, x4, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x2, x2, x5\n\t"
+ "ldr x3, [%[a], 0]\n\t"
+ "str x2, [%[r], 16]\n\t"
+ "lsr x5, x3, 1\n\t"
+ "lsl x3, x3, %[n]\n\t"
+ "lsr x5, x5, x6\n\t"
+ "orr x4, x4, x5\n\t"
+ "str x3, [%[r]]\n\t"
+ "str x4, [%[r], 8]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+ : "memory", "x2", "x3", "x4", "x5", "x6"
+ );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits,
+ const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[128];
+ sp_digit td[65];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 128;
+#else
+ norm = nd;
+ tmp = td;
+#endif
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_64(norm, m);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ c -= bits % 6;
+ if (c == 64) {
+ c = 58;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ sp_4096_lshift_64(r, norm, y);
+ for (; i>=0 || c>=6; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 58;
+ n <<= 6;
+ c = 58;
+ }
+ else if (c < 6) {
+ y = n >> 58;
+ n = e[i--];
+ c = 6 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+ else {
+ y = (n >> 58) & 0x3f;
+ n <<= 6;
+ c -= 6;
+ }
+
+ sp_4096_mont_sqr_64(r, r, m, mp);
+ sp_4096_mont_sqr_64(r, r, m, mp);
+ sp_4096_mont_sqr_64(r, r, m, mp);
+ sp_4096_mont_sqr_64(r, r, m, mp);
+ sp_4096_mont_sqr_64(r, r, m, mp);
+ sp_4096_mont_sqr_64(r, r, m, mp);
+
+ sp_4096_lshift_64(r, r, y);
+ sp_4096_mul_d_64(tmp, norm, r[64]);
+ r[64] = 0;
+ o = sp_4096_add_64(r, r, tmp);
+ sp_4096_cond_sub_64(r, r, m, (sp_digit)0 - o);
+ }
+
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+ sp_4096_mont_reduce_64(r, m, mp);
+
+ mask = 0 - (sp_4096_cmp_64(r, m) >= 0);
+ sp_4096_cond_sub_64(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* HAVE_FFDHE_4096 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+ int err = MP_OKAY;
+ sp_digit b[128], e[64], m[64];
+ sp_digit* r = b;
+ word32 i;
+
+ if (mp_count_bits(base) > 4096) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 512) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(b, 64, base);
+ sp_4096_from_bin(e, 64, exp, expLen);
+ sp_4096_from_mp(m, 64, mod);
+
+ #ifdef HAVE_FFDHE_4096
+ if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1)
+ err = sp_4096_mod_exp_2_64(r, e, expLen * 8, m);
+ else
+ #endif
+ err = sp_4096_mod_exp_64(r, b, e, expLen * 8, m, 0);
+
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ for (i=0; i<512 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* WOLFSSL_SP_4096 */
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+#ifdef WOLFSSL_HAVE_SP_ECC
+#ifndef WOLFSSL_SP_NO_256
+
+/* Point structure to use. */
+typedef struct sp_point_256 {
+ sp_digit x[2 * 4];
+ sp_digit y[2 * 4];
+ sp_digit z[2 * 4];
+ int infinity;
+} sp_point_256;
+
+/* The modulus (prime) of the curve P256. */
+static const sp_digit p256_mod[4] = {
+ 0xffffffffffffffffL,0x00000000ffffffffL,0x0000000000000000L,
+ 0xffffffff00000001L
+};
+/* The Montogmery normalizer for modulus of the curve P256. */
+static const sp_digit p256_norm_mod[4] = {
+ 0x0000000000000001L,0xffffffff00000000L,0xffffffffffffffffL,
+ 0x00000000fffffffeL
+};
+/* The Montogmery multiplier for modulus of the curve P256. */
+static const sp_digit p256_mp_mod = 0x0000000000000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* The order of the curve P256. */
+static const sp_digit p256_order[4] = {
+ 0xf3b9cac2fc632551L,0xbce6faada7179e84L,0xffffffffffffffffL,
+ 0xffffffff00000000L
+};
+#endif
+/* The order of the curve P256 minus 2. */
+static const sp_digit p256_order2[4] = {
+ 0xf3b9cac2fc63254fL,0xbce6faada7179e84L,0xffffffffffffffffL,
+ 0xffffffff00000000L
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P256. */
+static const sp_digit p256_norm_order[4] = {
+ 0x0c46353d039cdaafL,0x4319055258e8617bL,0x0000000000000000L,
+ 0x00000000ffffffffL
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P256. */
+static const sp_digit p256_mp_order = 0xccd1c8aaee00bc4fL;
+#endif
+#ifdef WOLFSSL_SP_SMALL
+/* The base point of curve P256. */
+static const sp_point_256 p256_base = {
+ /* X ordinate */
+ {
+ 0xf4a13945d898c296L,0x77037d812deb33a0L,0xf8bce6e563a440f2L,
+ 0x6b17d1f2e12c4247L,
+ 0L, 0L, 0L, 0L
+ },
+ /* Y ordinate */
+ {
+ 0xcbb6406837bf51f5L,0x2bce33576b315eceL,0x8ee7eb4a7c0f9e16L,
+ 0x4fe342e2fe1a7f9bL,
+ 0L, 0L, 0L, 0L
+ },
+ /* Z ordinate */
+ {
+ 0x0000000000000001L,0x0000000000000000L,0x0000000000000000L,
+ 0x0000000000000000L,
+ 0L, 0L, 0L, 0L
+ },
+ /* infinity */
+ 0
+};
+#endif /* WOLFSSL_SP_SMALL */
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p256_b[4] = {
+ 0x3bce3c3e27d2604bL,0x651d06b0cc53b0f6L,0xb3ebbd55769886bcL,
+ 0x5ac635d8aa3a93e7L
+};
+#endif
+
+static int sp_256_point_new_ex_4(void* heap, sp_point_256* sp, sp_point_256** p)
+{
+ int ret = MP_OKAY;
+ (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ (void)sp;
+ *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC);
+#else
+ *p = sp;
+#endif
+ if (*p == NULL) {
+ ret = MEMORY_E;
+ }
+ return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_256_point_new_4(heap, sp, p) sp_256_point_new_ex_4((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_256_point_new_4(heap, sp, p) sp_256_point_new_ex_4((heap), &(sp), &(p))
+#endif
+
+
+static void sp_256_point_free_4(sp_point_256* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+ if (p != NULL) {
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+ XFREE(p, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+/* Clear point data if requested. */
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+#endif
+ (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r The resulting Montgomery form number.
+ * a The number to convert.
+ * m The modulus (prime).
+ */
+static int sp_256_mod_mul_norm_4(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ int64_t t[8];
+ int64_t a32[8];
+ int64_t o;
+
+ (void)m;
+
+ a32[0] = a[0] & 0xffffffff;
+ a32[1] = a[0] >> 32;
+ a32[2] = a[1] & 0xffffffff;
+ a32[3] = a[1] >> 32;
+ a32[4] = a[2] & 0xffffffff;
+ a32[5] = a[2] >> 32;
+ a32[6] = a[3] & 0xffffffff;
+ a32[7] = a[3] >> 32;
+
+ /* 1 1 0 -1 -1 -1 -1 0 */
+ t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6];
+ /* 0 1 1 0 -1 -1 -1 -1 */
+ t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7];
+ /* 0 0 1 1 0 -1 -1 -1 */
+ t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7];
+ /* -1 -1 0 2 2 1 0 -1 */
+ t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7];
+ /* 0 -1 -1 0 2 2 1 0 */
+ t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6];
+ /* 0 0 -1 -1 0 2 2 1 */
+ t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7];
+ /* -1 -1 0 0 0 1 3 2 */
+ t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7];
+ /* 1 0 -1 -1 -1 -1 0 3 */
+ t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7];
+
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ o = t[7] >> 32; t[7] &= 0xffffffff;
+ t[0] += o;
+ t[3] -= o;
+ t[6] -= o;
+ t[7] += o;
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ r[0] = (t[1] << 32) | t[0];
+ r[1] = (t[3] << 32) | t[2];
+ r[2] = (t[5] << 32) | t[4];
+ r[3] = (t[7] << 32) | t[6];
+
+ return MP_OKAY;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 64
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 64
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffffffffffffl;
+ s = 64U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 64U) <= (word32)DIGIT_BIT) {
+ s += 64U;
+ r[j] &= 0xffffffffffffffffl;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 64) {
+ r[j] &= 0xffffffffffffffffl;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 64 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_256.
+ *
+ * p Point of type sp_point_256 (result).
+ * pm Point of type ecc_point.
+ */
+static void sp_256_point_from_ecc_point_4(sp_point_256* p, const ecc_point* pm)
+{
+ XMEMSET(p->x, 0, sizeof(p->x));
+ XMEMSET(p->y, 0, sizeof(p->y));
+ XMEMSET(p->z, 0, sizeof(p->z));
+ sp_256_from_mp(p->x, 4, pm->x);
+ sp_256_from_mp(p->y, 4, pm->y);
+ sp_256_from_mp(p->z, 4, pm->z);
+ p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_256_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 64
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 4);
+ r->used = 4;
+ mp_clamp(r);
+#elif DIGIT_BIT < 64
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 4; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 64) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 64 - s;
+ }
+ r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 4; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 64 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 64 - s;
+ }
+ else {
+ s += 64;
+ }
+ }
+ r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Convert a point of type sp_point_256 to type ecc_point.
+ *
+ * p Point of type sp_point_256.
+ * pm Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_256_point_to_ecc_point_4(const sp_point_256* p, ecc_point* pm)
+{
+ int err;
+
+ err = sp_256_to_mp(p->x, pm->x);
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, pm->y);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, pm->z);
+ }
+
+ return err;
+}
+
+/* Conditionally copy a into r using the mask m.
+ * m is -1 to copy and 0 when not.
+ *
+ * r A single precision number to copy over.
+ * a A single precision number to copy.
+ * m Mask value to apply.
+ */
+static void sp_256_cond_copy_4(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[r], 0]\n\t"
+ "ldp x7, x8, [%[a], 0]\n\t"
+ "eor x7, x7, x3\n\t"
+ "ldp x5, x6, [%[r], 16]\n\t"
+ "eor x8, x8, x4\n\t"
+ "ldp x9, x10, [%[a], 16]\n\t"
+ "eor x9, x9, x5\n\t"
+ "eor x10, x10, x6\n\t"
+ "and x7, x7, %[m]\n\t"
+ "and x8, x8, %[m]\n\t"
+ "and x9, x9, %[m]\n\t"
+ "and x10, x10, %[m]\n\t"
+ "eor x3, x3, x7\n\t"
+ "eor x4, x4, x8\n\t"
+ "eor x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "eor x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [m] "r" (m)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+SP_NOINLINE static void sp_256_mont_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ (void)m;
+ (void)mp;
+
+ __asm__ __volatile__ (
+ "ldp x16, x17, [%[a], 0]\n\t"
+ "ldp x21, x22, [%[b], 0]\n\t"
+ "# A[0] * B[0]\n\t"
+ "mul x8, x16, x21\n\t"
+ "ldr x19, [%[a], 16]\n\t"
+ "umulh x9, x16, x21\n\t"
+ "ldr x23, [%[b], 16]\n\t"
+ "# A[0] * B[1]\n\t"
+ "mul x4, x16, x22\n\t"
+ "ldr x20, [%[a], 24]\n\t"
+ "umulh x5, x16, x22\n\t"
+ "ldr x24, [%[b], 24]\n\t"
+ "adds x9, x9, x4\n\t"
+ "# A[1] * B[0]\n\t"
+ "mul x4, x17, x21\n\t"
+ "adc x10, xzr, x5\n\t"
+ "umulh x5, x17, x21\n\t"
+ "adds x9, x9, x4\n\t"
+ "# A[0] * B[2]\n\t"
+ "mul x4, x16, x23\n\t"
+ "adcs x10, x10, x5\n\t"
+ "umulh x5, x16, x23\n\t"
+ "adc x11, xzr, xzr\n\t"
+ "adds x10, x10, x4\n\t"
+ "# A[1] * B[1]\n\t"
+ "mul x4, x17, x22\n\t"
+ "adc x11, x11, x5\n\t"
+ "umulh x5, x17, x22\n\t"
+ "adds x10, x10, x4\n\t"
+ "# A[2] * B[0]\n\t"
+ "mul x4, x19, x21\n\t"
+ "adcs x11, x11, x5\n\t"
+ "umulh x5, x19, x21\n\t"
+ "adc x12, xzr, xzr\n\t"
+ "adds x10, x10, x4\n\t"
+ "# A[0] * B[3]\n\t"
+ "mul x4, x16, x24\n\t"
+ "adcs x11, x11, x5\n\t"
+ "umulh x5, x16, x24\n\t"
+ "adc x12, x12, xzr\n\t"
+ "adds x11, x11, x4\n\t"
+ "# A[1] * B[2]\n\t"
+ "mul x4, x17, x23\n\t"
+ "adcs x12, x12, x5\n\t"
+ "umulh x5, x17, x23\n\t"
+ "adc x13, xzr, xzr\n\t"
+ "adds x11, x11, x4\n\t"
+ "# A[2] * B[1]\n\t"
+ "mul x4, x19, x22\n\t"
+ "adcs x12, x12, x5\n\t"
+ "umulh x5, x19, x22\n\t"
+ "adc x13, x13, xzr\n\t"
+ "adds x11, x11, x4\n\t"
+ "# A[3] * B[0]\n\t"
+ "mul x4, x20, x21\n\t"
+ "adcs x12, x12, x5\n\t"
+ "umulh x5, x20, x21\n\t"
+ "adc x13, x13, xzr\n\t"
+ "adds x11, x11, x4\n\t"
+ "# A[1] * B[3]\n\t"
+ "mul x4, x17, x24\n\t"
+ "adcs x12, x12, x5\n\t"
+ "umulh x5, x17, x24\n\t"
+ "adc x13, x13, xzr\n\t"
+ "adds x12, x12, x4\n\t"
+ "# A[2] * B[2]\n\t"
+ "mul x4, x19, x23\n\t"
+ "adcs x13, x13, x5\n\t"
+ "umulh x5, x19, x23\n\t"
+ "adc x14, xzr, xzr\n\t"
+ "adds x12, x12, x4\n\t"
+ "# A[3] * B[1]\n\t"
+ "mul x4, x20, x22\n\t"
+ "adcs x13, x13, x5\n\t"
+ "umulh x5, x20, x22\n\t"
+ "adc x14, x14, xzr\n\t"
+ "adds x12, x12, x4\n\t"
+ "# A[2] * B[3]\n\t"
+ "mul x4, x19, x24\n\t"
+ "adcs x13, x13, x5\n\t"
+ "umulh x5, x19, x24\n\t"
+ "adc x14, x14, xzr\n\t"
+ "adds x13, x13, x4\n\t"
+ "# A[3] * B[2]\n\t"
+ "mul x4, x20, x23\n\t"
+ "adcs x14, x14, x5\n\t"
+ "umulh x5, x20, x23\n\t"
+ "adc x15, xzr, xzr\n\t"
+ "adds x13, x13, x4\n\t"
+ "# A[3] * B[3]\n\t"
+ "mul x4, x20, x24\n\t"
+ "adcs x14, x14, x5\n\t"
+ "umulh x5, x20, x24\n\t"
+ "adc x15, x15, xzr\n\t"
+ "adds x14, x14, x4\n\t"
+ "mov x4, x8\n\t"
+ "adc x15, x15, x5\n\t"
+ "# Start Reduction\n\t"
+ "mov x5, x9\n\t"
+ "mov x6, x10\n\t"
+ "# mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192\n\t"
+ "# - a[0] << 32 << 192\n\t"
+ "# + (a[0] * 2) << 192\n\t"
+ "# a[0]-a[2] << 32\n\t"
+ "lsl x10, x10, 32\n\t"
+ "add x7, x11, x8\n\t"
+ "eor x10, x10, x9, lsr #32\n\t"
+ "lsl x9, x9, 32\n\t"
+ "add x7, x7, x8\n\t"
+ "eor x9, x9, x8, lsr #32\n\t"
+ "# + a[0]-a[2] << 32 << 64\n\t"
+ "# - a[0] << 32 << 192\n\t"
+ "adds x5, x5, x8, lsl #32\n\t"
+ "sub x7, x7, x8, lsl #32\n\t"
+ "adcs x6, x6, x9\n\t"
+ "adc x7, x7, x10\n\t"
+ "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t"
+ "# a += mu << 256\n\t"
+ "adds x12, x12, x4\n\t"
+ "adcs x13, x13, x5\n\t"
+ "adcs x14, x14, x6\n\t"
+ "adcs x15, x15, x7\n\t"
+ "cset x8, cs\n\t"
+ "# a += mu << 192\n\t"
+ "# mu <<= 32\n\t"
+ "# a += (mu << 32) << 64\n\t"
+ "adds x11, x11, x4\n\t"
+ "adcs x12, x12, x5\n\t"
+ "adcs x13, x13, x6\n\t"
+ "lsr x16, x7, 32\n\t"
+ "adcs x14, x14, x7\n\t"
+ "lsl x7, x7, 32\n\t"
+ "adcs x15, x15, xzr\n\t"
+ "eor x7, x7, x6, lsr #32\n\t"
+ "adc x8, x8, xzr\n\t"
+ "lsl x6, x6, 32\n\t"
+ "eor x6, x6, x5, lsr #32\n\t"
+ "adds x11, x11, x6\n\t"
+ "lsl x5, x5, 32\n\t"
+ "adcs x12, x12, x7\n\t"
+ "eor x5, x5, x4, lsr #32\n\t"
+ "adcs x13, x13, x16\n\t"
+ "lsl x4, x4, 32\n\t"
+ "adcs x14, x14, xzr\n\t"
+ "adcs x15, x15, xzr\n\t"
+ "adc x8, x8, xzr\n\t"
+ "# a -= (mu << 32) << 192\n\t"
+ "subs x11, x11, x4\n\t"
+ "sbcs x12, x12, x5\n\t"
+ "sbcs x13, x13, x6\n\t"
+ "sub x8, xzr, x8\n\t"
+ "sbcs x14, x14, x7\n\t"
+ "sub x8, x8, #1\n\t"
+ "sbcs x15, x15, x16\n\t"
+ "mov x19, 0xffffffff00000001\n\t"
+ "adc x8, x8, xzr\n\t"
+ "# mask m and sub from result if overflow\n\t"
+ "# m[0] = -1 & mask = mask\n\t"
+ "subs x12, x12, x8\n\t"
+ "# m[1] = 0xffffffff & mask = mask >> 32 as mask is all 1s or 0s\n\t"
+ "lsr x17, x8, 32\n\t"
+ "sbcs x13, x13, x17\n\t"
+ "and x19, x19, x8\n\t"
+ "# m[2] = 0 & mask = 0\n\t"
+ "sbcs x14, x14, xzr\n\t"
+ "stp x12, x13, [%[r], 0]\n\t"
+ "# m[3] = 0xffffffff00000001 & mask\n\t"
+ "sbc x15, x15, x19\n\t"
+ "stp x14, x15, [%[r], 16]\n\t"
+ : [a] "+r" (a), [b] "+r" (b)
+ : [r] "r" (r)
+ : "memory", "x4", "x5", "x6", "x7", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15"
+ );
+}
+
+/* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+SP_NOINLINE static void sp_256_mont_sqr_4(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ (void)m;
+ (void)mp;
+
+ __asm__ __volatile__ (
+ "ldp x16, x17, [%[a], 0]\n\t"
+ "# A[0] * A[1]\n\t"
+ "mul x9, x16, x17\n\t"
+ "ldr x19, [%[a], 16]\n\t"
+ "umulh x10, x16, x17\n\t"
+ "ldr x20, [%[a], 24]\n\t"
+ "# A[0] * A[2]\n\t"
+ "mul x4, x16, x19\n\t"
+ "umulh x5, x16, x19\n\t"
+ "adds x10, x10, x4\n\t"
+ "# A[0] * A[3]\n\t"
+ "mul x4, x16, x20\n\t"
+ "adc x11, xzr, x5\n\t"
+ "umulh x5, x16, x20\n\t"
+ "adds x11, x11, x4\n\t"
+ "# A[1] * A[2]\n\t"
+ "mul x4, x17, x19\n\t"
+ "adc x12, xzr, x5\n\t"
+ "umulh x5, x17, x19\n\t"
+ "adds x11, x11, x4\n\t"
+ "# A[1] * A[3]\n\t"
+ "mul x4, x17, x20\n\t"
+ "adcs x12, x12, x5\n\t"
+ "umulh x5, x17, x20\n\t"
+ "adc x13, xzr, xzr\n\t"
+ "adds x12, x12, x4\n\t"
+ "# A[2] * A[3]\n\t"
+ "mul x4, x19, x20\n\t"
+ "adc x13, x13, x5\n\t"
+ "umulh x5, x19, x20\n\t"
+ "adds x13, x13, x4\n\t"
+ "adc x14, xzr, x5\n\t"
+ "# Double\n\t"
+ "adds x9, x9, x9\n\t"
+ "adcs x10, x10, x10\n\t"
+ "adcs x11, x11, x11\n\t"
+ "adcs x12, x12, x12\n\t"
+ "adcs x13, x13, x13\n\t"
+ "# A[0] * A[0]\n\t"
+ "mul x8, x16, x16\n\t"
+ "adcs x14, x14, x14\n\t"
+ "umulh x3, x16, x16\n\t"
+ "cset x15, cs\n\t"
+ "# A[1] * A[1]\n\t"
+ "mul x4, x17, x17\n\t"
+ "adds x9, x9, x3\n\t"
+ "umulh x5, x17, x17\n\t"
+ "adcs x10, x10, x4\n\t"
+ "# A[2] * A[2]\n\t"
+ "mul x6, x19, x19\n\t"
+ "adcs x11, x11, x5\n\t"
+ "umulh x7, x19, x19\n\t"
+ "adcs x12, x12, x6\n\t"
+ "# A[3] * A[3]\n\t"
+ "mul x16, x20, x20\n\t"
+ "adcs x13, x13, x7\n\t"
+ "umulh x17, x20, x20\n\t"
+ "adcs x14, x14, x16\n\t"
+ "mov x3, x8\n\t"
+ "adc x15, x15, x17\n\t"
+ "# Start Reduction\n\t"
+ "mov x4, x9\n\t"
+ "mov x5, x10\n\t"
+ "# mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192\n\t"
+ "# - a[0] << 32 << 192\n\t"
+ "# + (a[0] * 2) << 192\n\t"
+ "# a[0]-a[2] << 32\n\t"
+ "lsl x10, x10, 32\n\t"
+ "add x6, x11, x8\n\t"
+ "eor x10, x10, x9, lsr #32\n\t"
+ "lsl x9, x9, 32\n\t"
+ "add x6, x6, x8\n\t"
+ "eor x9, x9, x8, lsr #32\n\t"
+ "# + a[0]-a[2] << 32 << 64\n\t"
+ "# - a[0] << 32 << 192\n\t"
+ "adds x4, x4, x8, lsl #32\n\t"
+ "sub x6, x6, x8, lsl #32\n\t"
+ "adcs x5, x5, x9\n\t"
+ "adc x6, x6, x10\n\t"
+ "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t"
+ "# a += mu << 256\n\t"
+ "adds x12, x12, x3\n\t"
+ "adcs x13, x13, x4\n\t"
+ "adcs x14, x14, x5\n\t"
+ "adcs x15, x15, x6\n\t"
+ "cset x8, cs\n\t"
+ "# a += mu << 192\n\t"
+ "# mu <<= 32\n\t"
+ "# a += (mu << 32) << 64\n\t"
+ "adds x11, x11, x3\n\t"
+ "adcs x12, x12, x4\n\t"
+ "adcs x13, x13, x5\n\t"
+ "lsr x7, x6, 32\n\t"
+ "adcs x14, x14, x6\n\t"
+ "lsl x6, x6, 32\n\t"
+ "adcs x15, x15, xzr\n\t"
+ "eor x6, x6, x5, lsr #32\n\t"
+ "adc x8, x8, xzr\n\t"
+ "lsl x5, x5, 32\n\t"
+ "eor x5, x5, x4, lsr #32\n\t"
+ "adds x11, x11, x5\n\t"
+ "lsl x4, x4, 32\n\t"
+ "adcs x12, x12, x6\n\t"
+ "eor x4, x4, x3, lsr #32\n\t"
+ "adcs x13, x13, x7\n\t"
+ "lsl x3, x3, 32\n\t"
+ "adcs x14, x14, xzr\n\t"
+ "adcs x15, x15, xzr\n\t"
+ "adc x8, x8, xzr\n\t"
+ "# a -= (mu << 32) << 192\n\t"
+ "subs x11, x11, x3\n\t"
+ "sbcs x12, x12, x4\n\t"
+ "sbcs x13, x13, x5\n\t"
+ "sub x8, xzr, x8\n\t"
+ "sbcs x14, x14, x6\n\t"
+ "sub x8, x8, #1\n\t"
+ "sbcs x15, x15, x7\n\t"
+ "mov x17, 0xffffffff00000001\n\t"
+ "adc x8, x8, xzr\n\t"
+ "# mask m and sub from result if overflow\n\t"
+ "# m[0] = -1 & mask = mask\n\t"
+ "subs x12, x12, x8\n\t"
+ "# m[1] = 0xffffffff & mask = mask >> 32 as mask is all 1s or 0s\n\t"
+ "lsr x16, x8, 32\n\t"
+ "sbcs x13, x13, x16\n\t"
+ "and x17, x17, x8\n\t"
+ "# m[2] = 0 & mask = 0\n\t"
+ "sbcs x14, x14, xzr\n\t"
+ "stp x12, x13, [%[r], 0]\n\t"
+ "# m[3] = 0xffffffff00000001 & mask\n\t"
+ "sbc x15, x15, x17\n\t"
+ "stp x14, x15, [%[r], 16]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20"
+ );
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * n Number of times to square.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_n_4(sp_digit* r, const sp_digit* a, int n,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_256_mont_sqr_4(r, a, m, mp);
+ for (; n > 1; n--) {
+ sp_256_mont_sqr_4(r, r, m, mp);
+ }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P256 curve. */
+static const uint64_t p256_mod_minus_2[4] = {
+ 0xfffffffffffffffdU,0x00000000ffffffffU,0x0000000000000000U,
+ 0xffffffff00000001U
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P256 curve. (r = 1 / a mod m)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_256_mont_inv_4(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 4);
+ for (i=254; i>=0; i--) {
+ sp_256_mont_sqr_4(t, t, p256_mod, p256_mp_mod);
+ if (p256_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64)))
+ sp_256_mont_mul_4(t, t, a, p256_mod, p256_mp_mod);
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 4);
+#else
+ sp_digit* t1 = td;
+ sp_digit* t2 = td + 2 * 4;
+ sp_digit* t3 = td + 4 * 4;
+ /* 0x2 */
+ sp_256_mont_sqr_4(t1, a, p256_mod, p256_mp_mod);
+ /* 0x3 */
+ sp_256_mont_mul_4(t2, t1, a, p256_mod, p256_mp_mod);
+ /* 0xc */
+ sp_256_mont_sqr_n_4(t1, t2, 2, p256_mod, p256_mp_mod);
+ /* 0xd */
+ sp_256_mont_mul_4(t3, t1, a, p256_mod, p256_mp_mod);
+ /* 0xf */
+ sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xf0 */
+ sp_256_mont_sqr_n_4(t1, t2, 4, p256_mod, p256_mp_mod);
+ /* 0xfd */
+ sp_256_mont_mul_4(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xff */
+ sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xff00 */
+ sp_256_mont_sqr_n_4(t1, t2, 8, p256_mod, p256_mp_mod);
+ /* 0xfffd */
+ sp_256_mont_mul_4(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xffff */
+ sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffff0000 */
+ sp_256_mont_sqr_n_4(t1, t2, 16, p256_mod, p256_mp_mod);
+ /* 0xfffffffd */
+ sp_256_mont_mul_4(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff */
+ sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000000 */
+ sp_256_mont_sqr_n_4(t1, t2, 32, p256_mod, p256_mp_mod);
+ /* 0xffffffffffffffff */
+ sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001 */
+ sp_256_mont_mul_4(r, t1, a, p256_mod, p256_mp_mod);
+ /* 0xffffffff000000010000000000000000000000000000000000000000 */
+ sp_256_mont_sqr_n_4(r, r, 160, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */
+ sp_256_mont_mul_4(r, r, t2, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */
+ sp_256_mont_sqr_n_4(r, r, 32, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */
+ sp_256_mont_mul_4(r, r, t3, p256_mod, p256_mp_mod);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static int64_t sp_256_cmp_4(const sp_digit* a, const sp_digit* b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov x2, -1\n\t"
+ "mov x3, 1\n\t"
+ "mov x4, -1\n\t"
+ "mov x5, 24\n\t"
+ "1:\n\t"
+ "ldr x6, [%[a], x5]\n\t"
+ "ldr x7, [%[b], x5]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x6, x6, x7\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "subs x5, x5, #8\n\t"
+ "b.cs 1b\n\t"
+ "eor %[a], x2, x4\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov x2, -1\n\t"
+ "mov x3, 1\n\t"
+ "mov x4, -1\n\t"
+ "ldp x5, x6, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[a], 16]\n\t"
+ "ldp x9, x10, [%[b], 0]\n\t"
+ "ldp x11, x12, [%[b], 16]\n\t"
+ "and x8, x8, x4\n\t"
+ "and x12, x12, x4\n\t"
+ "subs x8, x8, x12\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x7, x7, x4\n\t"
+ "and x11, x11, x4\n\t"
+ "subs x7, x7, x11\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x6, x6, x4\n\t"
+ "and x10, x10, x4\n\t"
+ "subs x6, x6, x10\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x9, x9, x4\n\t"
+ "subs x5, x5, x9\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "eor %[a], x2, x4\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+#endif
+
+ return (int64_t)a;
+}
+
+/* Normalize the values in each word to 64.
+ *
+ * a Array of sp_digit to normalize.
+ */
+#define sp_256_norm_4(a)
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static sp_digit sp_256_cond_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ __asm__ __volatile__ (
+
+ "ldp x5, x7, [%[b], 0]\n\t"
+ "ldp x11, x12, [%[b], 16]\n\t"
+ "ldp x4, x6, [%[a], 0]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 16]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "subs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 0]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 16]\n\t"
+ "csetm %[r], cc\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+
+ return (sp_digit)r;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_256_sub_4(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[b], 0]\n\t"
+ "subs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 16]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "csetm %[r], cc\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+ return (sp_digit)r;
+}
+
+#define sp_256_mont_reduce_order_4 sp_256_mont_reduce_4
+
+/* Reduce the number back to 256 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_256_mont_reduce_4(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ __asm__ __volatile__ (
+ "ldp x9, x10, [%[a], 0]\n\t"
+ "ldp x11, x12, [%[a], 16]\n\t"
+ "ldp x17, x19, [%[m], 0]\n\t"
+ "ldp x20, x21, [%[m], 16]\n\t"
+ "mov x8, xzr\n\t"
+ "# mu = a[0] * mp\n\t"
+ "mul x5, %[mp], x9\n\t"
+ "ldr x13, [%[a], 32]\n\t"
+ "# a[0+0] += m[0] * mu\n\t"
+ "mul x3, x17, x5\n\t"
+ "ldr x14, [%[a], 40]\n\t"
+ "umulh x6, x17, x5\n\t"
+ "ldr x15, [%[a], 48]\n\t"
+ "adds x9, x9, x3\n\t"
+ "ldr x16, [%[a], 56]\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# a[0+1] += m[1] * mu\n\t"
+ "mul x3, x19, x5\n\t"
+ "umulh x7, x19, x5\n\t"
+ "adds x3, x3, x6\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x10, x10, x3\n\t"
+ "adc x7, x7, xzr\n\t"
+ "# a[0+2] += m[2] * mu\n\t"
+ "mul x3, x20, x5\n\t"
+ "umulh x6, x20, x5\n\t"
+ "adds x3, x3, x7\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x11, x11, x3\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# a[0+3] += m[3] * mu\n\t"
+ "mul x3, x21, x5\n\t"
+ "umulh x4, x21, x5\n\t"
+ "adds x3, x3, x6\n\t"
+ "adcs x4, x4, x8\n\t"
+ "cset x8, cs\n\t"
+ "adds x12, x12, x3\n\t"
+ "adcs x13, x13, x4\n\t"
+ "adc x8, x8, xzr\n\t"
+ "# mu = a[1] * mp\n\t"
+ "mul x5, %[mp], x10\n\t"
+ "# a[1+0] += m[0] * mu\n\t"
+ "mul x3, x17, x5\n\t"
+ "umulh x6, x17, x5\n\t"
+ "adds x10, x10, x3\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# a[1+1] += m[1] * mu\n\t"
+ "mul x3, x19, x5\n\t"
+ "umulh x7, x19, x5\n\t"
+ "adds x3, x3, x6\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x11, x11, x3\n\t"
+ "adc x7, x7, xzr\n\t"
+ "# a[1+2] += m[2] * mu\n\t"
+ "mul x3, x20, x5\n\t"
+ "umulh x6, x20, x5\n\t"
+ "adds x3, x3, x7\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x12, x12, x3\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# a[1+3] += m[3] * mu\n\t"
+ "mul x3, x21, x5\n\t"
+ "umulh x4, x21, x5\n\t"
+ "adds x3, x3, x6\n\t"
+ "adcs x4, x4, x8\n\t"
+ "cset x8, cs\n\t"
+ "adds x13, x13, x3\n\t"
+ "adcs x14, x14, x4\n\t"
+ "adc x8, x8, xzr\n\t"
+ "# mu = a[2] * mp\n\t"
+ "mul x5, %[mp], x11\n\t"
+ "# a[2+0] += m[0] * mu\n\t"
+ "mul x3, x17, x5\n\t"
+ "umulh x6, x17, x5\n\t"
+ "adds x11, x11, x3\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# a[2+1] += m[1] * mu\n\t"
+ "mul x3, x19, x5\n\t"
+ "umulh x7, x19, x5\n\t"
+ "adds x3, x3, x6\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x12, x12, x3\n\t"
+ "adc x7, x7, xzr\n\t"
+ "# a[2+2] += m[2] * mu\n\t"
+ "mul x3, x20, x5\n\t"
+ "umulh x6, x20, x5\n\t"
+ "adds x3, x3, x7\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x13, x13, x3\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# a[2+3] += m[3] * mu\n\t"
+ "mul x3, x21, x5\n\t"
+ "umulh x4, x21, x5\n\t"
+ "adds x3, x3, x6\n\t"
+ "adcs x4, x4, x8\n\t"
+ "cset x8, cs\n\t"
+ "adds x14, x14, x3\n\t"
+ "adcs x15, x15, x4\n\t"
+ "adc x8, x8, xzr\n\t"
+ "# mu = a[3] * mp\n\t"
+ "mul x5, %[mp], x12\n\t"
+ "# a[3+0] += m[0] * mu\n\t"
+ "mul x3, x17, x5\n\t"
+ "umulh x6, x17, x5\n\t"
+ "adds x12, x12, x3\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# a[3+1] += m[1] * mu\n\t"
+ "mul x3, x19, x5\n\t"
+ "umulh x7, x19, x5\n\t"
+ "adds x3, x3, x6\n\t"
+ "adc x7, x7, xzr\n\t"
+ "adds x13, x13, x3\n\t"
+ "adc x7, x7, xzr\n\t"
+ "# a[3+2] += m[2] * mu\n\t"
+ "mul x3, x20, x5\n\t"
+ "umulh x6, x20, x5\n\t"
+ "adds x3, x3, x7\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x14, x14, x3\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# a[3+3] += m[3] * mu\n\t"
+ "mul x3, x21, x5\n\t"
+ "umulh x4, x21, x5\n\t"
+ "adds x3, x3, x6\n\t"
+ "adcs x4, x4, x8\n\t"
+ "cset x8, cs\n\t"
+ "adds x15, x15, x3\n\t"
+ "adcs x16, x16, x4\n\t"
+ "adc x8, x8, xzr\n\t"
+ "sub x3, xzr, x8\n\t"
+ "and x17, x17, x3\n\t"
+ "and x19, x19, x3\n\t"
+ "and x20, x20, x3\n\t"
+ "and x21, x21, x3\n\t"
+ "subs x13, x13, x17\n\t"
+ "sbcs x14, x14, x19\n\t"
+ "sbcs x15, x15, x20\n\t"
+ "stp x13, x14, [%[a], 0]\n\t"
+ "sbc x16, x16, x21\n\t"
+ "stp x15, x16, [%[a], 16]\n\t"
+ :
+ : [a] "r" (a), [m] "r" (m), [mp] "r" (mp)
+ : "memory", "x3", "x4", "x5", "x8", "x6", "x7", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21"
+ );
+}
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r Resulting affine coordinate point.
+ * p Montgomery form projective coordinate point.
+ * t Temporary ordinate data.
+ */
+static void sp_256_map_4(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*4;
+ int64_t n;
+
+ sp_256_mont_inv_4(t1, p->z, t + 2*4);
+
+ sp_256_mont_sqr_4(t2, t1, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t1, t2, t1, p256_mod, p256_mp_mod);
+
+ /* x /= z^2 */
+ sp_256_mont_mul_4(r->x, p->x, t2, p256_mod, p256_mp_mod);
+ XMEMSET(r->x + 4, 0, sizeof(r->x) / 2U);
+ sp_256_mont_reduce_4(r->x, p256_mod, p256_mp_mod);
+ /* Reduce x to less than modulus */
+ n = sp_256_cmp_4(r->x, p256_mod);
+ sp_256_cond_sub_4(r->x, r->x, p256_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_4(r->x);
+
+ /* y /= z^3 */
+ sp_256_mont_mul_4(r->y, p->y, t1, p256_mod, p256_mp_mod);
+ XMEMSET(r->y + 4, 0, sizeof(r->y) / 2U);
+ sp_256_mont_reduce_4(r->y, p256_mod, p256_mp_mod);
+ /* Reduce y to less than modulus */
+ n = sp_256_cmp_4(r->y, p256_mod);
+ sp_256_cond_sub_4(r->y, r->y, p256_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_4(r->y);
+
+ XMEMSET(r->z, 0, sizeof(r->z));
+ r->z[0] = 1;
+
+}
+
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r Result of addition.
+ * a First number to add in Montogmery form.
+ * b Second number to add in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_add_4(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ __asm__ __volatile__ (
+ "ldp x4, x5, [%[a], 0]\n\t"
+ "ldp x8, x9, [%[b], 0]\n\t"
+ "adds x4, x4, x8\n\t"
+ "ldp x6, x7, [%[a], 16]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "ldp x10, x11, [%[b], 16]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "adcs x7, x7, x11\n\t"
+ "mov x13, 0xffffffff00000001\n\t"
+ "csetm x14, cs\n\t"
+ "subs x4, x4, x14\n\t"
+ "lsr x12, x14, 32\n\t"
+ "sbcs x5, x5, x12\n\t"
+ "and x13, x13, x14\n\t"
+ "sbcs x6, x6, xzr\n\t"
+ "stp x4, x5, [%[r],0]\n\t"
+ "sbc x7, x7, x13\n\t"
+ "stp x6, x7, [%[r],16]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14"
+ );
+}
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r Result of doubling.
+ * a Number to double in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_dbl_4(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a]]\n\t"
+ "ldp x5, x6, [%[a],16]\n\t"
+ "adds x3, x3, x3\n\t"
+ "adcs x4, x4, x4\n\t"
+ "adcs x5, x5, x5\n\t"
+ "adcs x6, x6, x6\n\t"
+ "mov x8, 0xffffffff00000001\n\t"
+ "csetm x9, cs\n\t"
+ "subs x3, x3, x9\n\t"
+ "lsr x7, x9, 32\n\t"
+ "sbcs x4, x4, x7\n\t"
+ "and x8, x8, x9\n\t"
+ "sbcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r],0]\n\t"
+ "sbc x6, x6, x8\n\t"
+ "stp x5, x6, [%[r],16]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+
+ (void)m;
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r Result of Tripling.
+ * a Number to triple in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_tpl_4(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ __asm__ __volatile__ (
+ "ldp x10, x11, [%[a]]\n\t"
+ "adds x3, x10, x10\n\t"
+ "ldr x12, [%[a], 16]\n\t"
+ "adcs x4, x11, x11\n\t"
+ "ldr x13, [%[a], 24]\n\t"
+ "adcs x5, x12, x12\n\t"
+ "adcs x6, x13, x13\n\t"
+ "mov x8, 0xffffffff00000001\n\t"
+ "csetm x9, cs\n\t"
+ "subs x3, x3, x9\n\t"
+ "lsr x7, x9, 32\n\t"
+ "sbcs x4, x4, x7\n\t"
+ "and x8, x8, x9\n\t"
+ "sbcs x5, x5, xzr\n\t"
+ "sbc x6, x6, x8\n\t"
+ "adds x3, x3, x10\n\t"
+ "adcs x4, x4, x11\n\t"
+ "adcs x5, x5, x12\n\t"
+ "adcs x6, x6, x13\n\t"
+ "mov x8, 0xffffffff00000001\n\t"
+ "csetm x9, cs\n\t"
+ "subs x3, x3, x9\n\t"
+ "lsr x7, x9, 32\n\t"
+ "sbcs x4, x4, x7\n\t"
+ "and x8, x8, x9\n\t"
+ "sbcs x5, x5, xzr\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "sbc x6, x6, x8\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "x10", "x11", "x12", "x13", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+
+ (void)m;
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r Result of subtration.
+ * a Number to subtract from in Montogmery form.
+ * b Number to subtract with in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ __asm__ __volatile__ (
+ "ldp x4, x5, [%[a], 0]\n\t"
+ "ldp x8, x9, [%[b], 0]\n\t"
+ "subs x4, x4, x8\n\t"
+ "ldp x6, x7, [%[a], 16]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "ldp x10, x11, [%[b], 16]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "sbcs x7, x7, x11\n\t"
+ "mov x13, 0xffffffff00000001\n\t"
+ "csetm x14, cc\n\t"
+ "adds x4, x4, x14\n\t"
+ "lsr x12, x14, 32\n\t"
+ "adcs x5, x5, x12\n\t"
+ "and x13, x13, x14\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x4, x5, [%[r],0]\n\t"
+ "adc x7, x7, x13\n\t"
+ "stp x6, x7, [%[r],16]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14"
+ );
+}
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r Result of division by 2.
+ * a Number to divide.
+ * m Modulus (prime).
+ */
+static void sp_256_div2_4(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "and x9, x3, 1\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "sub x10, xzr, x9\n\t"
+ "lsr x7, x10, 32\n\t"
+ "adds x3, x3, x10\n\t"
+ "and x8, x10, 0xffffffff00000001\n\t"
+ "adcs x4, x4, x7\n\t"
+ "lsr x3, x3, 1\n\t"
+ "adcs x5, x5, xzr\n\t"
+ "lsr x7, x4, 1\n\t"
+ "adcs x6, x6, x8\n\t"
+ "lsr x8, x5, 1\n\t"
+ "cset x9, cs\n\t"
+ "lsr x10, x6, 1\n\t"
+ "orr x3, x3, x4, lsl 63\n\t"
+ "orr x4, x7, x5, lsl 63\n\t"
+ "orr x5, x8, x6, lsl 63\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "orr x6, x10, x9, lsl 63\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [m] "r" (m)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r Result of doubling point.
+ * p Point to double.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_4(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*4;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = r->x;
+ y = r->y;
+ z = r->z;
+ /* Put infinity into result. */
+ if (r != p) {
+ r->infinity = p->infinity;
+ }
+
+ /* T1 = Z * Z */
+ sp_256_mont_sqr_4(t1, p->z, p256_mod, p256_mp_mod);
+ /* Z = Y * Z */
+ sp_256_mont_mul_4(z, p->y, p->z, p256_mod, p256_mp_mod);
+ /* Z = 2Z */
+ sp_256_mont_dbl_4(z, z, p256_mod);
+ /* T2 = X - T1 */
+ sp_256_mont_sub_4(t2, p->x, t1, p256_mod);
+ /* T1 = X + T1 */
+ sp_256_mont_add_4(t1, p->x, t1, p256_mod);
+ /* T2 = T1 * T2 */
+ sp_256_mont_mul_4(t2, t1, t2, p256_mod, p256_mp_mod);
+ /* T1 = 3T2 */
+ sp_256_mont_tpl_4(t1, t2, p256_mod);
+ /* Y = 2Y */
+ sp_256_mont_dbl_4(y, p->y, p256_mod);
+ /* Y = Y * Y */
+ sp_256_mont_sqr_4(y, y, p256_mod, p256_mp_mod);
+ /* T2 = Y * Y */
+ sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod);
+ /* T2 = T2/2 */
+ sp_256_div2_4(t2, t2, p256_mod);
+ /* Y = Y * X */
+ sp_256_mont_mul_4(y, y, p->x, p256_mod, p256_mp_mod);
+ /* X = T1 * T1 */
+ sp_256_mont_sqr_4(x, t1, p256_mod, p256_mp_mod);
+ /* X = X - Y */
+ sp_256_mont_sub_4(x, x, y, p256_mod);
+ /* X = X - Y */
+ sp_256_mont_sub_4(x, x, y, p256_mod);
+ /* Y = Y - X */
+ sp_256_mont_sub_4(y, y, x, p256_mod);
+ /* Y = Y * T1 */
+ sp_256_mont_mul_4(y, y, t1, p256_mod, p256_mp_mod);
+ /* Y = Y - T2 */
+ sp_256_mont_sub_4(y, y, t2, p256_mod);
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r Result of subtration.
+ * a Number to subtract from in Montogmery form.
+ * b Number to subtract with in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_sub_dbl_4(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ __asm__ __volatile__ (
+ "ldp x8, x9, [%[b]]\n\t"
+ "ldp x10, x11, [%[b],16]\n\t"
+ "adds x8, x8, x8\n\t"
+ "ldp x4, x5, [%[a]]\n\t"
+ "adcs x9, x9, x9\n\t"
+ "ldp x6, x7, [%[a],16]\n\t"
+ "adcs x10, x10, x10\n\t"
+ "adcs x11, x11, x11\n\t"
+ "mov x13, 0xffffffff00000001\n\t"
+ "csetm x14, cs\n\t"
+ "subs x8, x8, x14\n\t"
+ "lsr x12, x14, 32\n\t"
+ "sbcs x9, x9, x12\n\t"
+ "and x13, x13, x14\n\t"
+ "sbcs x10, x10, xzr\n\t"
+ "sbc x11, x11, x13\n\t"
+ "subs x4, x4, x8\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "sbcs x7, x7, x11\n\t"
+ "mov x13, 0xffffffff00000001\n\t"
+ "csetm x14, cc\n\t"
+ "adds x4, x4, x14\n\t"
+ "lsr x12, x14, 32\n\t"
+ "adcs x5, x5, x12\n\t"
+ "and x13, x13, x14\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x4, x5, [%[r],0]\n\t"
+ "adc x7, x7, x13\n\t"
+ "stp x6, x7, [%[r],16]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14"
+ );
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r Result of subtration.
+ * a Number to subtract from in Montogmery form.
+ * b Number to subtract with in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_dbl_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ __asm__ __volatile__ (
+ "ldp x4, x5, [%[a]]\n\t"
+ "ldp x6, x7, [%[a],16]\n\t"
+ "adds x4, x4, x4\n\t"
+ "ldp x8, x9, [%[b]]\n\t"
+ "adcs x5, x5, x5\n\t"
+ "ldp x10, x11, [%[b],16]\n\t"
+ "adcs x6, x6, x6\n\t"
+ "adcs x7, x7, x7\n\t"
+ "mov x13, 0xffffffff00000001\n\t"
+ "csetm x14, cs\n\t"
+ "subs x4, x4, x14\n\t"
+ "lsr x12, x14, 32\n\t"
+ "sbcs x5, x5, x12\n\t"
+ "and x13, x13, x14\n\t"
+ "sbcs x6, x6, xzr\n\t"
+ "sbc x7, x7, x13\n\t"
+ "subs x4, x4, x8\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "sbcs x7, x7, x11\n\t"
+ "mov x13, 0xffffffff00000001\n\t"
+ "csetm x14, cc\n\t"
+ "adds x4, x4, x14\n\t"
+ "lsr x12, x14, 32\n\t"
+ "adcs x5, x5, x12\n\t"
+ "and x13, x13, x14\n\t"
+ "adcs x6, x6, xzr\n\t"
+ "stp x4, x5, [%[r],0]\n\t"
+ "adc x7, x7, x13\n\t"
+ "stp x6, x7, [%[r],16]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14"
+ );
+}
+
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int n, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*4;
+ sp_digit* b = t + 4*4;
+ sp_digit* t1 = t + 6*4;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = p->x;
+ y = p->y;
+ z = p->z;
+
+ /* Y = 2*Y */
+ sp_256_mont_dbl_4(y, y, p256_mod);
+ /* W = Z^4 */
+ sp_256_mont_sqr_4(w, z, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_4(w, w, p256_mod, p256_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+ while (--n > 0)
+#else
+ while (--n >= 0)
+#endif
+ {
+ /* A = 3*(X^2 - W) */
+ sp_256_mont_sqr_4(t1, x, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(t1, t1, w, p256_mod);
+ sp_256_mont_tpl_4(a, t1, p256_mod);
+ /* B = X*Y^2 */
+ sp_256_mont_sqr_4(t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(b, t1, x, p256_mod, p256_mp_mod);
+ /* X = A^2 - 2B */
+ sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_dbl_4(x, x, b, p256_mod);
+ /* Z = Z*Y */
+ sp_256_mont_mul_4(z, z, y, p256_mod, p256_mp_mod);
+ /* t2 = Y^4 */
+ sp_256_mont_sqr_4(t1, t1, p256_mod, p256_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+ if (n != 0)
+#endif
+ {
+ /* W = W*Y^4 */
+ sp_256_mont_mul_4(w, w, t1, p256_mod, p256_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_256_mont_sub_4(y, b, x, p256_mod);
+ sp_256_mont_mul_4(y, y, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_sub_4(y, y, t1, p256_mod);
+ }
+#ifndef WOLFSSL_SP_SMALL
+ /* A = 3*(X^2 - W) */
+ sp_256_mont_sqr_4(t1, x, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(t1, t1, w, p256_mod);
+ sp_256_mont_tpl_4(a, t1, p256_mod);
+ /* B = X*Y^2 */
+ sp_256_mont_sqr_4(t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(b, t1, x, p256_mod, p256_mp_mod);
+ /* X = A^2 - 2B */
+ sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_dbl_4(x, x, b, p256_mod);
+ /* Z = Z*Y */
+ sp_256_mont_mul_4(z, z, y, p256_mod, p256_mp_mod);
+ /* t2 = Y^4 */
+ sp_256_mont_sqr_4(t1, t1, p256_mod, p256_mp_mod);
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_256_mont_sub_4(y, b, x, p256_mod);
+ sp_256_mont_mul_4(y, y, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_sub_4(y, y, t1, p256_mod);
+#endif
+ /* Y = Y/2 */
+ sp_256_div2_4(y, y, p256_mod);
+}
+
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a First number to compare.
+ * b Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_256_cmp_equal_4(const sp_digit* a, const sp_digit* b)
+{
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_4(sp_point_256* r, const sp_point_256* p, const sp_point_256* q,
+ sp_digit* t)
+{
+ const sp_point_256* ap[2];
+ sp_point_256* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*4;
+ sp_digit* t3 = t + 4*4;
+ sp_digit* t4 = t + 6*4;
+ sp_digit* t5 = t + 8*4;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Ensure only the first point is the same as the result. */
+ if (q == r) {
+ const sp_point_256* a = p;
+ p = q;
+ q = a;
+ }
+
+ /* Check double */
+ (void)sp_256_sub_4(t1, p256_mod, q->y);
+ sp_256_norm_4(t1);
+ if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) &
+ (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) {
+ sp_256_proj_point_dbl_4(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_256));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<4; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<4; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<4; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U1 = X1*Z2^2 */
+ sp_256_mont_sqr_4(t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t3, t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t1, t1, x, p256_mod, p256_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_4(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_256_mont_mul_4(t3, t3, y, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - U1 */
+ sp_256_mont_sub_4(t2, t2, t1, p256_mod);
+ /* R = S2 - S1 */
+ sp_256_mont_sub_4(t4, t4, t3, p256_mod);
+ /* Z3 = H*Z1*Z2 */
+ sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(z, z, t2, p256_mod, p256_mp_mod);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(x, x, t5, p256_mod);
+ sp_256_mont_dbl_4(t1, y, p256_mod);
+ sp_256_mont_sub_4(x, x, t1, p256_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ sp_256_mont_sub_4(y, y, x, p256_mod);
+ sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(y, y, t5, p256_mod);
+ }
+}
+
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_n_store_4(sp_point_256* r, const sp_point_256* p,
+ int n, int m, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*4;
+ sp_digit* b = t + 4*4;
+ sp_digit* t1 = t + 6*4;
+ sp_digit* t2 = t + 8*4;
+ sp_digit* x = r[2*m].x;
+ sp_digit* y = r[(1<<n)*m].y;
+ sp_digit* z = r[2*m].z;
+ int i;
+
+ for (i=0; i<4; i++) {
+ x[i] = p->x[i];
+ }
+ for (i=0; i<4; i++) {
+ y[i] = p->y[i];
+ }
+ for (i=0; i<4; i++) {
+ z[i] = p->z[i];
+ }
+
+ /* Y = 2*Y */
+ sp_256_mont_dbl_4(y, y, p256_mod);
+ /* W = Z^4 */
+ sp_256_mont_sqr_4(w, z, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_4(w, w, p256_mod, p256_mp_mod);
+ for (i=1; i<=n; i++) {
+ /* A = 3*(X^2 - W) */
+ sp_256_mont_sqr_4(t1, x, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(t1, t1, w, p256_mod);
+ sp_256_mont_tpl_4(a, t1, p256_mod);
+ /* B = X*Y^2 */
+ sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(b, t2, x, p256_mod, p256_mp_mod);
+ x = r[(1<<i)*m].x;
+ /* X = A^2 - 2B */
+ sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_4(t1, b, p256_mod);
+ sp_256_mont_sub_4(x, x, t1, p256_mod);
+ /* Z = Z*Y */
+ sp_256_mont_mul_4(r[(1<<i)*m].z, z, y, p256_mod, p256_mp_mod);
+ z = r[(1<<i)*m].z;
+ /* t2 = Y^4 */
+ sp_256_mont_sqr_4(t2, t2, p256_mod, p256_mp_mod);
+ if (i != n) {
+ /* W = W*Y^4 */
+ sp_256_mont_mul_4(w, w, t2, p256_mod, p256_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_256_mont_sub_4(y, b, x, p256_mod);
+ sp_256_mont_mul_4(y, y, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_4(y, y, p256_mod);
+ sp_256_mont_sub_4(y, y, t2, p256_mod);
+
+ /* Y = Y/2 */
+ sp_256_div2_4(r[(1<<i)*m].y, y, p256_mod);
+ r[(1<<i)*m].infinity = 0;
+ }
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * ra Result of addition.
+ * rs Result of subtraction.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_sub_4(sp_point_256* ra, sp_point_256* rs,
+ const sp_point_256* p, const sp_point_256* q, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*4;
+ sp_digit* t3 = t + 4*4;
+ sp_digit* t4 = t + 6*4;
+ sp_digit* t5 = t + 8*4;
+ sp_digit* t6 = t + 10*4;
+ sp_digit* x = ra->x;
+ sp_digit* y = ra->y;
+ sp_digit* z = ra->z;
+ sp_digit* xs = rs->x;
+ sp_digit* ys = rs->y;
+ sp_digit* zs = rs->z;
+
+
+ XMEMCPY(x, p->x, sizeof(p->x) / 2);
+ XMEMCPY(y, p->y, sizeof(p->y) / 2);
+ XMEMCPY(z, p->z, sizeof(p->z) / 2);
+ ra->infinity = 0;
+ rs->infinity = 0;
+
+ /* U1 = X1*Z2^2 */
+ sp_256_mont_sqr_4(t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t3, t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t1, t1, x, p256_mod, p256_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_4(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_256_mont_mul_4(t3, t3, y, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - U1 */
+ sp_256_mont_sub_4(t2, t2, t1, p256_mod);
+ /* RS = S2 + S1 */
+ sp_256_mont_add_4(t6, t4, t3, p256_mod);
+ /* R = S2 - S1 */
+ sp_256_mont_sub_4(t4, t4, t3, p256_mod);
+ /* Z3 = H*Z1*Z2 */
+ /* ZS = H*Z1*Z2 */
+ sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(z, z, t2, p256_mod, p256_mp_mod);
+ XMEMCPY(zs, z, sizeof(p->z)/2);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ /* XS = RS^2 - H^3 - 2*U1*H^2 */
+ sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_4(xs, t6, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(x, x, t5, p256_mod);
+ sp_256_mont_sub_4(xs, xs, t5, p256_mod);
+ sp_256_mont_dbl_4(t1, y, p256_mod);
+ sp_256_mont_sub_4(x, x, t1, p256_mod);
+ sp_256_mont_sub_4(xs, xs, t1, p256_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */
+ sp_256_mont_sub_4(ys, y, xs, p256_mod);
+ sp_256_mont_sub_4(y, y, x, p256_mod);
+ sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod);
+ sp_256_sub_4(t6, p256_mod, t6);
+ sp_256_mont_mul_4(ys, ys, t6, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(y, y, t5, p256_mod);
+ sp_256_mont_sub_4(ys, ys, t5, p256_mod);
+}
+
+/* Structure used to describe recoding of scalar multiplication. */
+typedef struct ecc_recode_256 {
+ /* Index into pre-computation table. */
+ uint8_t i;
+ /* Use the negative of the point. */
+ uint8_t neg;
+} ecc_recode_256;
+
+/* The index into pre-computation table to use. */
+static const uint8_t recode_index_4_6[66] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
+ 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
+ 0, 1,
+};
+
+/* Whether to negate y-ordinate. */
+static const uint8_t recode_neg_4_6[66] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0,
+};
+
+/* Recode the scalar for multiplication using pre-computed values and
+ * subtraction.
+ *
+ * k Scalar to multiply by.
+ * v Vector of operations to perform.
+ */
+static void sp_256_ecc_recode_6_4(const sp_digit* k, ecc_recode_256* v)
+{
+ int i, j;
+ uint8_t y;
+ int carry = 0;
+ int o;
+ sp_digit n;
+
+ j = 0;
+ n = k[j];
+ o = 0;
+ for (i=0; i<43; i++) {
+ y = n;
+ if (o + 6 < 64) {
+ y &= 0x3f;
+ n >>= 6;
+ o += 6;
+ }
+ else if (o + 6 == 64) {
+ n >>= 6;
+ if (++j < 4)
+ n = k[j];
+ o = 0;
+ }
+ else if (++j < 4) {
+ n = k[j];
+ y |= (n << (64 - o)) & 0x3f;
+ o -= 58;
+ n >>= o;
+ }
+
+ y += carry;
+ v[i].i = recode_index_4_6[y];
+ v[i].neg = recode_neg_4_6[y];
+ carry = (y >> 6) + v[i].neg;
+ }
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_win_add_sub_4(sp_point_256* r, const sp_point_256* g,
+ const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 td[33];
+ sp_point_256 rtd, pd;
+ sp_digit tmpd[2 * 4 * 6];
+#endif
+ sp_point_256* t;
+ sp_point_256* rt;
+ sp_point_256* p = NULL;
+ sp_digit* tmp;
+ sp_digit* negy;
+ int i;
+ ecc_recode_256 v[43];
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_4(heap, rtd, rt);
+ if (err == MP_OKAY)
+ err = sp_256_point_new_4(heap, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 33, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ t = td;
+ tmp = tmpd;
+#endif
+
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ err = sp_256_mod_mul_norm_4(t[1].x, g->x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_4(t[1].y, g->y, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_4(t[1].z, g->z, p256_mod);
+ }
+
+ if (err == MP_OKAY) {
+ t[1].infinity = 0;
+ /* t[2] ... t[32] */
+ sp_256_proj_point_dbl_n_store_4(t, &t[ 1], 5, 1, tmp);
+ sp_256_proj_point_add_4(&t[ 3], &t[ 2], &t[ 1], tmp);
+ sp_256_proj_point_dbl_4(&t[ 6], &t[ 3], tmp);
+ sp_256_proj_point_add_sub_4(&t[ 7], &t[ 5], &t[ 6], &t[ 1], tmp);
+ sp_256_proj_point_dbl_4(&t[10], &t[ 5], tmp);
+ sp_256_proj_point_add_sub_4(&t[11], &t[ 9], &t[10], &t[ 1], tmp);
+ sp_256_proj_point_dbl_4(&t[12], &t[ 6], tmp);
+ sp_256_proj_point_dbl_4(&t[14], &t[ 7], tmp);
+ sp_256_proj_point_add_sub_4(&t[15], &t[13], &t[14], &t[ 1], tmp);
+ sp_256_proj_point_dbl_4(&t[18], &t[ 9], tmp);
+ sp_256_proj_point_add_sub_4(&t[19], &t[17], &t[18], &t[ 1], tmp);
+ sp_256_proj_point_dbl_4(&t[20], &t[10], tmp);
+ sp_256_proj_point_dbl_4(&t[22], &t[11], tmp);
+ sp_256_proj_point_add_sub_4(&t[23], &t[21], &t[22], &t[ 1], tmp);
+ sp_256_proj_point_dbl_4(&t[24], &t[12], tmp);
+ sp_256_proj_point_dbl_4(&t[26], &t[13], tmp);
+ sp_256_proj_point_add_sub_4(&t[27], &t[25], &t[26], &t[ 1], tmp);
+ sp_256_proj_point_dbl_4(&t[28], &t[14], tmp);
+ sp_256_proj_point_dbl_4(&t[30], &t[15], tmp);
+ sp_256_proj_point_add_sub_4(&t[31], &t[29], &t[30], &t[ 1], tmp);
+
+ negy = t[0].y;
+
+ sp_256_ecc_recode_6_4(k, v);
+
+ i = 42;
+ XMEMCPY(rt, &t[v[i].i], sizeof(sp_point_256));
+ for (--i; i>=0; i--) {
+ sp_256_proj_point_dbl_n_4(rt, 6, tmp);
+
+ XMEMCPY(p, &t[v[i].i], sizeof(sp_point_256));
+ sp_256_sub_4(negy, p256_mod, p->y);
+ sp_256_cond_copy_4(p->y, negy, (sp_digit)0 - v[i].neg);
+ sp_256_proj_point_add_4(rt, rt, p, tmp);
+ }
+
+ if (map != 0) {
+ sp_256_map_4(r, rt, tmp);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL)
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ if (tmp != NULL)
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_256_point_free_4(p, 0, heap);
+ sp_256_point_free_4(rt, 0, heap);
+
+ return err;
+}
+
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_256 {
+ sp_digit x[4];
+ sp_digit y[4];
+} sp_table_entry_256;
+
+#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL)
+#endif /* FP_ECC || WOLFSSL_SP_SMALL */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_qz1_4(sp_point_256* r, const sp_point_256* p,
+ const sp_point_256* q, sp_digit* t)
+{
+ const sp_point_256* ap[2];
+ sp_point_256* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*4;
+ sp_digit* t3 = t + 4*4;
+ sp_digit* t4 = t + 6*4;
+ sp_digit* t5 = t + 8*4;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Check double */
+ (void)sp_256_sub_4(t1, p256_mod, q->y);
+ sp_256_norm_4(t1);
+ if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) &
+ (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) {
+ sp_256_proj_point_dbl_4(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_256));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<4; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<4; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<4; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_4(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - X1 */
+ sp_256_mont_sub_4(t2, t2, x, p256_mod);
+ /* R = S2 - Y1 */
+ sp_256_mont_sub_4(t4, t4, y, p256_mod);
+ /* Z3 = H*Z1 */
+ sp_256_mont_mul_4(z, z, t2, p256_mod, p256_mp_mod);
+ /* X3 = R^2 - H^3 - 2*X1*H^2 */
+ sp_256_mont_sqr_4(t1, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t3, x, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(x, t1, t5, p256_mod);
+ sp_256_mont_dbl_4(t1, t3, p256_mod);
+ sp_256_mont_sub_4(x, x, t1, p256_mod);
+ /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+ sp_256_mont_sub_4(t3, t3, x, p256_mod);
+ sp_256_mont_mul_4(t3, t3, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t5, t5, y, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(y, t3, t5, p256_mod);
+ }
+}
+
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a Point to convert.
+ * t Temporary data.
+ */
+static void sp_256_proj_to_affine_4(sp_point_256* a, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2 * 4;
+ sp_digit* tmp = t + 4 * 4;
+
+ sp_256_mont_inv_4(t1, a->z, tmp);
+
+ sp_256_mont_sqr_4(t2, t1, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t1, t2, t1, p256_mod, p256_mp_mod);
+
+ sp_256_mont_mul_4(a->x, a->x, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(a->y, a->y, t1, p256_mod, p256_mp_mod);
+ XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
+}
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_256_gen_stripe_table_4(const sp_point_256* a,
+ sp_table_entry_256* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 td, s1d, s2d;
+#endif
+ sp_point_256* t;
+ sp_point_256* s1 = NULL;
+ sp_point_256* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_4(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_4(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_4(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_4(t->x, a->x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_4(t->y, a->y, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_4(t->z, a->z, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_256_proj_to_affine_4(t, tmp);
+
+ XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<8; i++) {
+ sp_256_proj_point_dbl_n_4(t, 32, tmp);
+ sp_256_proj_to_affine_4(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<8; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_256_proj_point_add_qz1_4(t, s1, s2, tmp);
+ sp_256_proj_to_affine_4(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_256_point_free_4(s2, 0, heap);
+ sp_256_point_free_4(s1, 0, heap);
+ sp_256_point_free_4( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL)
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_stripe_4(sp_point_256* r, const sp_point_256* g,
+ const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 rtd;
+ sp_point_256 pd;
+ sp_digit td[2 * 4 * 5];
+#endif
+ sp_point_256* rt;
+ sp_point_256* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_256_point_new_4(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_4(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
+ XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+ y = 0;
+ for (j=0,x=31; j<8; j++,x+=32) {
+ y |= ((k[x / 64] >> (x % 64)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=30; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<8; j++,x+=32) {
+ y |= ((k[x / 64] >> (x % 64)) & 1) << j;
+ }
+
+ sp_256_proj_point_dbl_4(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_256_proj_point_add_qz1_4(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_256_map_4(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_4(p, 0, heap);
+ sp_256_point_free_4(rt, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC || WOLFSSL_SP_SMALL */
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_256_t {
+ sp_digit x[4];
+ sp_digit y[4];
+ sp_table_entry_256 table[256];
+ uint32_t cnt;
+ int set;
+} sp_cache_256_t;
+
+static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_256_last = -1;
+static THREAD_LS_T int sp_cache_256_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex_256 = 0;
+ static wolfSSL_Mutex sp_cache_256_lock;
+#endif
+
+static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_256_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache_256[i].set = 0;
+ }
+ sp_cache_256_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache_256[i].set)
+ continue;
+
+ if (sp_256_cmp_equal_4(g->x, sp_cache_256[i].x) &
+ sp_256_cmp_equal_4(g->y, sp_cache_256[i].y)) {
+ sp_cache_256[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_256_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache_256[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_256_last) {
+ least = sp_cache_256[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache_256[j].cnt < least) {
+ i = j;
+ least = sp_cache_256[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
+ XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
+ sp_cache_256[i].set = 1;
+ sp_cache_256[i].cnt = 1;
+ }
+
+ *cache = &sp_cache_256[i];
+ sp_cache_256_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_4(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_256_ecc_mulmod_win_add_sub_4(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 4 * 5];
+ sp_cache_256_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_256 == 0) {
+ wc_InitMutex(&sp_cache_256_lock);
+ initCacheMutex_256 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_256_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_256(g, &cache);
+ if (cache->cnt == 2)
+ sp_256_gen_stripe_table_4(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_256_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_256_ecc_mulmod_win_add_sub_4(r, g, k, map, heap);
+ }
+ else {
+ err = sp_256_ecc_mulmod_stripe_4(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * p Point to multiply.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+ void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[4];
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_256_point_new_4(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 4, km);
+ sp_256_point_from_ecc_point_4(point, gm);
+
+ err = sp_256_ecc_mulmod_4(point, point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_4(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_4(point, 0, heap);
+
+ return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+static const sp_table_entry_256 p256_table[256] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x79e730d418a9143cL,0x75ba95fc5fedb601L,0x79fb732b77622510L,
+ 0x18905f76a53755c6L },
+ { 0xddf25357ce95560aL,0x8b4ab8e4ba19e45cL,0xd2e88688dd21f325L,
+ 0x8571ff1825885d85L } },
+ /* 2 */
+ { { 0x202886024147519aL,0xd0981eac26b372f0L,0xa9d4a7caa785ebc8L,
+ 0xd953c50ddbdf58e9L },
+ { 0x9d6361ccfd590f8fL,0x72e9626b44e6c917L,0x7fd9611022eb64cfL,
+ 0x863ebb7e9eb288f3L } },
+ /* 3 */
+ { { 0x7856b6235cdb6485L,0x808f0ea22f0a2f97L,0x3e68d9544f7e300bL,
+ 0x00076055b5ff80a0L },
+ { 0x7634eb9b838d2010L,0x54014fbb3243708aL,0xe0e47d39842a6606L,
+ 0x8308776134373ee0L } },
+ /* 4 */
+ { { 0x4f922fc516a0d2bbL,0x0d5cc16c1a623499L,0x9241cf3a57c62c8bL,
+ 0x2f5e6961fd1b667fL },
+ { 0x5c15c70bf5a01797L,0x3d20b44d60956192L,0x04911b37071fdb52L,
+ 0xf648f9168d6f0f7bL } },
+ /* 5 */
+ { { 0x9e566847e137bbbcL,0xe434469e8a6a0becL,0xb1c4276179d73463L,
+ 0x5abe0285133d0015L },
+ { 0x92aa837cc04c7dabL,0x573d9f4c43260c07L,0x0c93156278e6cc37L,
+ 0x94bb725b6b6f7383L } },
+ /* 6 */
+ { { 0xbbf9b48f720f141cL,0x6199b3cd2df5bc74L,0xdc3f6129411045c4L,
+ 0xcdd6bbcb2f7dc4efL },
+ { 0xcca6700beaf436fdL,0x6f647f6db99326beL,0x0c0fa792014f2522L,
+ 0xa361bebd4bdae5f6L } },
+ /* 7 */
+ { { 0x28aa2558597c13c7L,0xc38d635f50b7c3e1L,0x07039aecf3c09d1dL,
+ 0xba12ca09c4b5292cL },
+ { 0x9e408fa459f91dfdL,0x3af43b66ceea07fbL,0x1eceb0899d780b29L,
+ 0x53ebb99d701fef4bL } },
+ /* 8 */
+ { { 0x4fe7ee31b0e63d34L,0xf4600572a9e54fabL,0xc0493334d5e7b5a4L,
+ 0x8589fb9206d54831L },
+ { 0xaa70f5cc6583553aL,0x0879094ae25649e5L,0xcc90450710044652L,
+ 0xebb0696d02541c4fL } },
+ /* 9 */
+ { { 0x4616ca15ac1647c5L,0xb8127d47c4cf5799L,0xdc666aa3764dfbacL,
+ 0xeb2820cbd1b27da3L },
+ { 0x9406f8d86a87e008L,0xd87dfa9d922378f3L,0x56ed2e4280ccecb2L,
+ 0x1f28289b55a7da1dL } },
+ /* 10 */
+ { { 0xabbaa0c03b89da99L,0xa6f2d79eb8284022L,0x27847862b81c05e8L,
+ 0x337a4b5905e54d63L },
+ { 0x3c67500d21f7794aL,0x207005b77d6d7f61L,0x0a5a378104cfd6e8L,
+ 0x0d65e0d5f4c2fbd6L } },
+ /* 11 */
+ { { 0xd9d09bbeb5275d38L,0x4268a7450be0a358L,0xf0762ff4973eb265L,
+ 0xc23da24252f4a232L },
+ { 0x5da1b84f0b94520cL,0x09666763b05bd78eL,0x3a4dcb8694d29ea1L,
+ 0x19de3b8cc790cff1L } },
+ /* 12 */
+ { { 0x183a716c26c5fe04L,0x3b28de0b3bba1bdbL,0x7432c586a4cb712cL,
+ 0xe34dcbd491fccbfdL },
+ { 0xb408d46baaa58403L,0x9a69748682e97a53L,0x9e39012736aaa8afL,
+ 0xe7641f447b4e0f7fL } },
+ /* 13 */
+ { { 0x7d753941df64ba59L,0xd33f10ec0b0242fcL,0x4f06dfc6a1581859L,
+ 0x4a12df57052a57bfL },
+ { 0xbfa6338f9439dbd0L,0xd3c24bd4bde53e1fL,0xfd5e4ffa21f1b314L,
+ 0x6af5aa93bb5bea46L } },
+ /* 14 */
+ { { 0xda10b69910c91999L,0x0a24b4402a580491L,0x3e0094b4b8cc2090L,
+ 0x5fe3475a66a44013L },
+ { 0xb0f8cabdf93e7b4bL,0x292b501a7c23f91aL,0x42e889aecd1e6263L,
+ 0xb544e308ecfea916L } },
+ /* 15 */
+ { { 0x6478c6e916ddfdceL,0x2c329166f89179e6L,0x4e8d6e764d4e67e1L,
+ 0xe0b6b2bda6b0c20bL },
+ { 0x0d312df2bb7efb57L,0x1aac0dde790c4007L,0xf90336ad679bc944L,
+ 0x71c023de25a63774L } },
+ /* 16 */
+ { { 0x62a8c244bfe20925L,0x91c19ac38fdce867L,0x5a96a5d5dd387063L,
+ 0x61d587d421d324f6L },
+ { 0xe87673a2a37173eaL,0x2384800853778b65L,0x10f8441e05bab43eL,
+ 0xfa11fe124621efbeL } },
+ /* 17 */
+ { { 0x1c891f2b2cb19ffdL,0x01ba8d5bb1923c23L,0xb6d03d678ac5ca8eL,
+ 0x586eb04c1f13bedcL },
+ { 0x0c35c6e527e8ed09L,0x1e81a33c1819ede2L,0x278fd6c056c652faL,
+ 0x19d5ac0870864f11L } },
+ /* 18 */
+ { { 0x1e99f581309a4e1fL,0xab7de71be9270074L,0x26a5ef0befd28d20L,
+ 0xe7c0073f7f9c563fL },
+ { 0x1f6d663a0ef59f76L,0x669b3b5420fcb050L,0xc08c1f7a7a6602d4L,
+ 0xe08504fec65b3c0aL } },
+ /* 19 */
+ { { 0xf098f68da031b3caL,0x6d1cab9ee6da6d66L,0x5bfd81fa94f246e8L,
+ 0x78f018825b0996b4L },
+ { 0xb7eefde43a25787fL,0x8016f80d1dccac9bL,0x0cea4877b35bfc36L,
+ 0x43a773b87e94747aL } },
+ /* 20 */
+ { { 0x62577734d2b533d5L,0x673b8af6a1bdddc0L,0x577e7c9aa79ec293L,
+ 0xbb6de651c3b266b1L },
+ { 0xe7e9303ab65259b3L,0xd6a0afd3d03a7480L,0xc5ac83d19b3cfc27L,
+ 0x60b4619a5d18b99bL } },
+ /* 21 */
+ { { 0xbd6a38e11ae5aa1cL,0xb8b7652b49e73658L,0x0b130014ee5f87edL,
+ 0x9d0f27b2aeebffcdL },
+ { 0xca9246317a730a55L,0x9c955b2fddbbc83aL,0x07c1dfe0ac019a71L,
+ 0x244a566d356ec48dL } },
+ /* 22 */
+ { { 0x6db0394aeacf1f96L,0x9f2122a9024c271cL,0x2626ac1b82cbd3b9L,
+ 0x45e58c873581ef69L },
+ { 0xd3ff479da38f9dbcL,0xa8aaf146e888a040L,0x945adfb246e0bed7L,
+ 0xc040e21cc1e4b7a4L } },
+ /* 23 */
+ { { 0x847af0006f8117b6L,0x651969ff73a35433L,0x482b35761d9475ebL,
+ 0x1cdf5c97682c6ec7L },
+ { 0x7db775b411f04839L,0x7dbeacf448de1698L,0xb2921dd1b70b3219L,
+ 0x046755f8a92dff3dL } },
+ /* 24 */
+ { { 0xcc8ac5d2bce8ffcdL,0x0d53c48b2fe61a82L,0xf6f161727202d6c7L,
+ 0x046e5e113b83a5f3L },
+ { 0xe7b8ff64d8007f01L,0x7fb1ef125af43183L,0x045c5ea635e1a03cL,
+ 0x6e0106c3303d005bL } },
+ /* 25 */
+ { { 0x48c7358488dd73b1L,0x7670708f995ed0d9L,0x38385ea8c56a2ab7L,
+ 0x442594ede901cf1fL },
+ { 0xf8faa2c912d4b65bL,0x94c2343b96c90c37L,0xd326e4a15e978d1fL,
+ 0xa796fa514c2ee68eL } },
+ /* 26 */
+ { { 0x359fb604823addd7L,0x9e2a6183e56693b3L,0xf885b78e3cbf3c80L,
+ 0xe4ad2da9c69766e9L },
+ { 0x357f7f428e048a61L,0x082d198cc092d9a0L,0xfc3a1af4c03ed8efL,
+ 0xc5e94046c37b5143L } },
+ /* 27 */
+ { { 0x476a538c2be75f9eL,0x6fd1a9e8cb123a78L,0xd85e4df0b109c04bL,
+ 0x63283dafdb464747L },
+ { 0xce728cf7baf2df15L,0xe592c4550ad9a7f4L,0xfab226ade834bcc3L,
+ 0x68bd19ab1981a938L } },
+ /* 28 */
+ { { 0xc08ead511887d659L,0x3374d5f4b359305aL,0x96986981cfe74fe3L,
+ 0x495292f53c6fdfd6L },
+ { 0x4a878c9e1acec896L,0xd964b210ec5b4484L,0x6696f7e2664d60a7L,
+ 0x0ec7530d26036837L } },
+ /* 29 */
+ { { 0x2da13a05ad2687bbL,0xa1f83b6af32e21faL,0x390f5ef51dd4607bL,
+ 0x0f6207a664863f0bL },
+ { 0xbd67e3bb0f138233L,0xdd66b96c272aa718L,0x8ed0040726ec88aeL,
+ 0xff0db07208ed6dcfL } },
+ /* 30 */
+ { { 0x749fa1014c95d553L,0xa44052fd5d680a8aL,0x183b4317ff3b566fL,
+ 0x313b513c88740ea3L },
+ { 0xb402e2ac08d11549L,0x071ee10bb4dee21cL,0x26b987dd47f2320eL,
+ 0x2d3abcf986f19f81L } },
+ /* 31 */
+ { { 0x4c288501815581a2L,0x9a0a6d56632211afL,0x19ba7a0f0cab2e99L,
+ 0xc036fa10ded98cdfL },
+ { 0x29ae08bac1fbd009L,0x0b68b19006d15816L,0xc2eb32779b9e0d8fL,
+ 0xa6b2a2c4b6d40194L } },
+ /* 32 */
+ { { 0xd433e50f6d3549cfL,0x6f33696ffacd665eL,0x695bfdacce11fcb4L,
+ 0x810ee252af7c9860L },
+ { 0x65450fe17159bb2cL,0xf7dfbebe758b357bL,0x2b057e74d69fea72L,
+ 0xd485717a92731745L } },
+ /* 33 */
+ { { 0x11741a8af0cb5a98L,0xd3da8f931f3110bfL,0x1994e2cbab382adfL,
+ 0x6a6045a72f9a604eL },
+ { 0x170c0d3fa2b2411dL,0xbe0eb83e510e96e0L,0x3bcc9f738865b3ccL,
+ 0xd3e45cfaf9e15790L } },
+ /* 34 */
+ { { 0xce1f69bbe83f7669L,0x09f8ae8272877d6bL,0x9548ae543244278dL,
+ 0x207755dee3c2c19cL },
+ { 0x87bd61d96fef1945L,0x18813cefb12d28c3L,0x9fbcd1d672df64aaL,
+ 0x48dc5ee57154b00dL } },
+ /* 35 */
+ { { 0x123790bff7e5a199L,0xe0efb8cf989ccbb7L,0xc27a2bfe0a519c79L,
+ 0xf2fb0aeddff6f445L },
+ { 0x41c09575f0b5025fL,0x550543d740fa9f22L,0x8fa3c8ad380bfbd0L,
+ 0xa13e9015db28d525L } },
+ /* 36 */
+ { { 0xf9f7a350a2b65cbcL,0x0b04b9722a464226L,0x265ce241e23f07a1L,
+ 0x2bf0d6b01497526fL },
+ { 0xd3d4dd3f4b216fb7L,0xf7d7b867fbdda26aL,0xaeb7b83f6708505cL,
+ 0x42a94a5a162fe89fL } },
+ /* 37 */
+ { { 0x5846ad0beaadf191L,0x0f8a489025a268d7L,0xe8603050494dc1f6L,
+ 0x2c2dd969c65ede3dL },
+ { 0x6d02171d93849c17L,0x460488ba1da250ddL,0x4810c7063c3a5485L,
+ 0xf437fa1f42c56dbcL } },
+ /* 38 */
+ { { 0x6aa0d7144a0f7dabL,0x0f0497931776e9acL,0x52c0a050f5f39786L,
+ 0xaaf45b3354707aa8L },
+ { 0x85e37c33c18d364aL,0xd40b9b063e497165L,0xf417168115ec5444L,
+ 0xcdf6310df4f272bcL } },
+ /* 39 */
+ { { 0x7473c6238ea8b7efL,0x08e9351885bc2287L,0x419567722bda8e34L,
+ 0xf0d008bada9e2ff2L },
+ { 0x2912671d2414d3b1L,0xb3754985b019ea76L,0x5c61b96d453bcbdbL,
+ 0x5bd5c2f5ca887b8bL } },
+ /* 40 */
+ { { 0xef0f469ef49a3154L,0x3e85a5956e2b2e9aL,0x45aaec1eaa924a9cL,
+ 0xaa12dfc8a09e4719L },
+ { 0x26f272274df69f1dL,0xe0e4c82ca2ff5e73L,0xb9d8ce73b7a9dd44L,
+ 0x6c036e73e48ca901L } },
+ /* 41 */
+ { { 0x5cfae12a0f6e3138L,0x6966ef0025ad345aL,0x8993c64b45672bc5L,
+ 0x292ff65896afbe24L },
+ { 0xd5250d445e213402L,0xf6580e274392c9feL,0x097b397fda1c72e8L,
+ 0x644e0c90311b7276L } },
+ /* 42 */
+ { { 0xe1e421e1a47153f0L,0xb86c3b79920418c9L,0x93bdce87705d7672L,
+ 0xf25ae793cab79a77L },
+ { 0x1f3194a36d869d0cL,0x9d55c8824986c264L,0x49fb5ea3096e945eL,
+ 0x39b8e65313db0a3eL } },
+ /* 43 */
+ { { 0x37754200b6fd2e59L,0x35e2c0669255c98fL,0xd9dab21a0e2a5739L,
+ 0x39122f2f0f19db06L },
+ { 0xcfbce1e003cad53cL,0x225b2c0fe65c17e3L,0x72baf1d29aa13877L,
+ 0x8de80af8ce80ff8dL } },
+ /* 44 */
+ { { 0xafbea8d9207bbb76L,0x921c7e7c21782758L,0xdfa2b74b1c0436b1L,
+ 0x871949062e368c04L },
+ { 0xb5f928bba3993df5L,0x639d75b5f3b3d26aL,0x011aa78a85b55050L,
+ 0xfc315e6a5b74fde1L } },
+ /* 45 */
+ { { 0x561fd41ae8d6ecfaL,0x5f8c44f61aec7f86L,0x98452a7b4924741dL,
+ 0xe6d4a7adee389088L },
+ { 0x60552ed14593c75dL,0x70a70da4dd271162L,0xd2aede937ba2c7dbL,
+ 0x35dfaf9a9be2ae57L } },
+ /* 46 */
+ { { 0x6b956fcdaa736636L,0x09f51d97ae2cab7eL,0xfb10bf410f349966L,
+ 0x1da5c7d71c830d2bL },
+ { 0x5c41e4833cce6825L,0x15ad118ff9573c3bL,0xa28552c7f23036b8L,
+ 0x7077c0fddbf4b9d6L } },
+ /* 47 */
+ { { 0xbf63ff8d46b9661cL,0xa1dfd36b0d2cfd71L,0x0373e140a847f8f7L,
+ 0x53a8632ee50efe44L },
+ { 0x0976ff68696d8051L,0xdaec0c95c74f468aL,0x62994dc35e4e26bdL,
+ 0x028ca76d34e1fcc1L } },
+ /* 48 */
+ { { 0xd11d47dcfc9877eeL,0xc8b36210801d0002L,0xd002c11754c260b6L,
+ 0x04c17cd86962f046L },
+ { 0x6d9bd094b0daddf5L,0xbea2357524ce55c0L,0x663356e672da03b5L,
+ 0xf7ba4de9fed97474L } },
+ /* 49 */
+ { { 0xd0dbfa34ebe1263fL,0x5576373571ae7ce6L,0xd244055382a6f523L,
+ 0xe31f960052131c41L },
+ { 0xd1bb9216ea6b6ec6L,0x37a1d12e73c2fc44L,0xc10e7eac89d0a294L,
+ 0xaa3a6259ce34d47bL } },
+ /* 50 */
+ { { 0xfbcf9df536f3dcd3L,0x6ceded50d2bf7360L,0x491710fadf504f5bL,
+ 0x2398dd627e79daeeL },
+ { 0xcf4705a36d09569eL,0xea0619bb5149f769L,0xff9c037735f6034cL,
+ 0x5717f5b21c046210L } },
+ /* 51 */
+ { { 0x9fe229c921dd895eL,0x8e51850040c28451L,0xfa13d2391d637ecdL,
+ 0x660a2c560e3c28deL },
+ { 0x9cca88aed67fcbd0L,0xc84724780ea9f096L,0x32b2f48172e92b4dL,
+ 0x624ee54c4f522453L } },
+ /* 52 */
+ { { 0x09549ce4d897ecccL,0x4d49d1d93f9880aaL,0x723c2423043a7c20L,
+ 0x4f392afb92bdfbc0L },
+ { 0x6969f8fa7de44fd9L,0xb66cfbe457b32156L,0xdb2fa803368ebc3cL,
+ 0x8a3e7977ccdb399cL } },
+ /* 53 */
+ { { 0xdde1881f06c4b125L,0xae34e300f6e3ca8cL,0xef6999de5c7a13e9L,
+ 0x3888d02370c24404L },
+ { 0x7628035644f91081L,0x3d9fcf615f015504L,0x1827edc8632cd36eL,
+ 0xa5e62e4718102336L } },
+ /* 54 */
+ { { 0x1a825ee32facd6c8L,0x699c635454bcbc66L,0x0ce3edf798df9931L,
+ 0x2c4768e6466a5adcL },
+ { 0xb346ff8c90a64bc9L,0x630a6020e4779f5cL,0xd949d064bc05e884L,
+ 0x7b5e6441f9e652a0L } },
+ /* 55 */
+ { { 0x2169422c1d28444aL,0xe996c5d8be136a39L,0x2387afe5fb0c7fceL,
+ 0xb8af73cb0c8d744aL },
+ { 0x5fde83aa338b86fdL,0xfee3f158a58a5cffL,0xc9ee8f6f20ac9433L,
+ 0xa036395f7f3f0895L } },
+ /* 56 */
+ { { 0x8c73c6bba10f7770L,0xa6f16d81a12a0e24L,0x100df68251bc2b9fL,
+ 0x4be36b01875fb533L },
+ { 0x9226086e9fb56dbbL,0x306fef8b07e7a4f8L,0xeeaccc0566d52f20L,
+ 0x8cbc9a871bdc00c0L } },
+ /* 57 */
+ { { 0xe131895cc0dac4abL,0xa874a440712ff112L,0x6332ae7c6a1cee57L,
+ 0x44e7553e0c0835f8L },
+ { 0x6d503fff7734002dL,0x9d35cb8b0b34425cL,0x95f702760e8738b5L,
+ 0x470a683a5eb8fc18L } },
+ /* 58 */
+ { { 0x81b761dc90513482L,0x0287202a01e9276aL,0xcda441ee0ce73083L,
+ 0x16410690c63dc6efL },
+ { 0xf5034a066d06a2edL,0xdd4d7745189b100bL,0xd914ae72ab8218c9L,
+ 0xd73479fd7abcbb4fL } },
+ /* 59 */
+ { { 0x7edefb165ad4c6e5L,0x262cf08f5b06d04dL,0x12ed5bb18575cb14L,
+ 0x816469e30771666bL },
+ { 0xd7ab9d79561e291eL,0xeb9daf22c1de1661L,0xf49827eb135e0513L,
+ 0x0a36dd23f0dd3f9cL } },
+ /* 60 */
+ { { 0x098d32c741d5533cL,0x7c5f5a9e8684628fL,0x39a228ade349bd11L,
+ 0xe331dfd6fdbab118L },
+ { 0x5100ab686bcc6ed8L,0x7160c3bdef7a260eL,0x9063d9a7bce850d7L,
+ 0xd3b4782a492e3389L } },
+ /* 61 */
+ { { 0xa149b6e8f3821f90L,0x92edd9ed66eb7aadL,0x0bb669531a013116L,
+ 0x7281275a4c86a5bdL },
+ { 0x503858f7d3ff47e5L,0x5e1616bc61016441L,0x62b0f11a7dfd9bb1L,
+ 0x2c062e7ece145059L } },
+ /* 62 */
+ { { 0xa76f996f0159ac2eL,0x281e7736cbdb2713L,0x2ad6d28808e46047L,
+ 0x282a35f92c4e7ef1L },
+ { 0x9c354b1ec0ce5cd2L,0xcf99efc91379c229L,0x992caf383e82c11eL,
+ 0xc71cd513554d2abdL } },
+ /* 63 */
+ { { 0x4885de9c09b578f4L,0x1884e258e3affa7aL,0x8f76b1b759182f1fL,
+ 0xc50f6740cf47f3a3L },
+ { 0xa9c4adf3374b68eaL,0xa406f32369965fe2L,0x2f86a22285a53050L,
+ 0xb9ecb3a7212958dcL } },
+ /* 64 */
+ { { 0x56f8410ef4f8b16aL,0x97241afec47b266aL,0x0a406b8e6d9c87c1L,
+ 0x803f3e02cd42ab1bL },
+ { 0x7f0309a804dbec69L,0xa83b85f73bbad05fL,0xc6097273ad8e197fL,
+ 0xc097440e5067adc1L } },
+ /* 65 */
+ { { 0x846a56f2c379ab34L,0xa8ee068b841df8d1L,0x20314459176c68efL,
+ 0xf1af32d5915f1f30L },
+ { 0x99c375315d75bd50L,0x837cffbaf72f67bcL,0x0613a41848d7723fL,
+ 0x23d0f130e2d41c8bL } },
+ /* 66 */
+ { { 0x857ab6edf41500d9L,0x0d890ae5fcbeada8L,0x52fe864889725951L,
+ 0xb0288dd6c0a3faddL },
+ { 0x85320f30650bcb08L,0x71af6313695d6e16L,0x31f520a7b989aa76L,
+ 0xffd3724ff408c8d2L } },
+ /* 67 */
+ { { 0x53968e64b458e6cbL,0x992dad20317a5d28L,0x3814ae0b7aa75f56L,
+ 0xf5590f4ad78c26dfL },
+ { 0x0fc24bd3cf0ba55aL,0x0fc4724a0c778baeL,0x1ce9864f683b674aL,
+ 0x18d6da54f6f74a20L } },
+ /* 68 */
+ { { 0xed93e225d5be5a2bL,0x6fe799835934f3c6L,0x4314092622626ffcL,
+ 0x50bbb4d97990216aL },
+ { 0x378191c6e57ec63eL,0x65422c40181dcdb2L,0x41a8099b0236e0f6L,
+ 0x2b10011801fe49c3L } },
+ /* 69 */
+ { { 0xfc68b5c59b391593L,0xc385f5a2598270fcL,0x7144f3aad19adcbbL,
+ 0xdd55899983fbae0cL },
+ { 0x93b88b8e74b82ff4L,0xd2e03c4071e734c9L,0x9a7a9eaf43c0322aL,
+ 0xe6e4c551149d6041L } },
+ /* 70 */
+ { { 0x55f655bb1e9af288L,0x647e1a64f7ada931L,0x43697e4bcb2820e5L,
+ 0x51e00db107ed56ffL },
+ { 0x43d169b8771c327eL,0x29cdb20b4a96c2adL,0xc07d51f53deb4779L,
+ 0xe22f424149829177L } },
+ /* 71 */
+ { { 0xcd45e8f4635f1abbL,0x7edc0cb568538874L,0xc9472c1fb5a8034dL,
+ 0xf709373d52dc48c9L },
+ { 0x401966bba8af30d6L,0x95bf5f4af137b69cL,0x3966162a9361c47eL,
+ 0xbd52d288e7275b11L } },
+ /* 72 */
+ { { 0xab155c7a9c5fa877L,0x17dad6727d3a3d48L,0x43f43f9e73d189d8L,
+ 0xa0d0f8e4c8aa77a6L },
+ { 0x0bbeafd8cc94f92dL,0xd818c8be0c4ddb3aL,0x22cc65f8b82eba14L,
+ 0xa56c78c7946d6a00L } },
+ /* 73 */
+ { { 0x2962391b0dd09529L,0x803e0ea63daddfcfL,0x2c77351f5b5bf481L,
+ 0xd8befdf8731a367aL },
+ { 0xab919d42fc0157f4L,0xf51caed7fec8e650L,0xcdf9cb4002d48b0aL,
+ 0x854a68a5ce9f6478L } },
+ /* 74 */
+ { { 0xdc35f67b63506ea5L,0x9286c489a4fe0d66L,0x3f101d3bfe95cd4dL,
+ 0x5cacea0b98846a95L },
+ { 0xa90df60c9ceac44dL,0x3db29af4354d1c3aL,0x08dd3de8ad5dbabeL,
+ 0xe4982d1235e4efa9L } },
+ /* 75 */
+ { { 0x23104a22c34cd55eL,0x58695bb32680d132L,0xfb345afa1fa1d943L,
+ 0x8046b7f616b20499L },
+ { 0xb533581e38e7d098L,0xd7f61e8df46f0b70L,0x30dea9ea44cb78c4L,
+ 0xeb17ca7b9082af55L } },
+ /* 76 */
+ { { 0x1751b59876a145b9L,0xa5cf6b0fc1bc71ecL,0xd3e03565392715bbL,
+ 0x097b00bafab5e131L },
+ { 0xaa66c8e9565f69e1L,0x77e8f75ab5be5199L,0x6033ba11da4fd984L,
+ 0xf95c747bafdbcc9eL } },
+ /* 77 */
+ { { 0x558f01d3bebae45eL,0xa8ebe9f0c4bc6955L,0xaeb705b1dbc64fc6L,
+ 0x3512601e566ed837L },
+ { 0x9336f1e1fa1161cdL,0x328ab8d54c65ef87L,0x4757eee2724f21e5L,
+ 0x0ef971236068ab6bL } },
+ /* 78 */
+ { { 0x02598cf754ca4226L,0x5eede138f8642c8eL,0x48963f74468e1790L,
+ 0xfc16d9333b4fbc95L },
+ { 0xbe96fb31e7c800caL,0x138063312678adaaL,0x3d6244976ff3e8b5L,
+ 0x14ca4af1b95d7a17L } },
+ /* 79 */
+ { { 0x7a4771babd2f81d5L,0x1a5f9d6901f7d196L,0xd898bef7cad9c907L,
+ 0x4057b063f59c231dL },
+ { 0xbffd82fe89c05c0aL,0xe4911c6f1dc0df85L,0x3befccaea35a16dbL,
+ 0x1c3b5d64f1330b13L } },
+ /* 80 */
+ { { 0x5fe14bfe80ec21feL,0xf6ce116ac255be82L,0x98bc5a072f4a5d67L,
+ 0xfad27148db7e63afL },
+ { 0x90c0b6ac29ab05b3L,0x37a9a83c4e251ae6L,0x0a7dc875c2aade7dL,
+ 0x77387de39f0e1a84L } },
+ /* 81 */
+ { { 0x1e9ecc49a56c0dd7L,0xa5cffcd846086c74L,0x8f7a1408f505aeceL,
+ 0xb37b85c0bef0c47eL },
+ { 0x3596b6e4cc0e6a8fL,0xfd6d4bbf6b388f23L,0xaba453fac39cef4eL,
+ 0x9c135ac8f9f628d5L } },
+ /* 82 */
+ { { 0x32aa320284e35743L,0x320d6ab185a3cdefL,0xb821b1761df19819L,
+ 0x5721361fc433851fL },
+ { 0x1f0db36a71fc9168L,0x5f98ba735e5c403cL,0xf64ca87e37bcd8f5L,
+ 0xdcbac3c9e6bb11bdL } },
+ /* 83 */
+ { { 0xf01d99684518cbe2L,0xd242fc189c9eb04eL,0x727663c7e47feebfL,
+ 0xb8c1c89e2d626862L },
+ { 0x51a58bddc8e1d569L,0x563809c8b7d88cd0L,0x26c27fd9f11f31ebL,
+ 0x5d23bbda2f9422d4L } },
+ /* 84 */
+ { { 0x0a1c729495c8f8beL,0x2961c4803bf362bfL,0x9e418403df63d4acL,
+ 0xc109f9cb91ece900L },
+ { 0xc2d095d058945705L,0xb9083d96ddeb85c0L,0x84692b8d7a40449bL,
+ 0x9bc3344f2eee1ee1L } },
+ /* 85 */
+ { { 0x0d5ae35642913074L,0x55491b2748a542b1L,0x469ca665b310732aL,
+ 0x29591d525f1a4cc1L },
+ { 0xe76f5b6bb84f983fL,0xbe7eef419f5f84e1L,0x1200d49680baa189L,
+ 0x6376551f18ef332cL } },
+ /* 86 */
+ { { 0xbda5f14e562976ccL,0x22bca3e60ef12c38L,0xbbfa30646cca9852L,
+ 0xbdb79dc808e2987aL },
+ { 0xfd2cb5c9cb06a772L,0x38f475aafe536dceL,0xc2a3e0227c2b5db8L,
+ 0x8ee86001add3c14aL } },
+ /* 87 */
+ { { 0xcbe96981a4ade873L,0x7ee9aa4dc4fba48cL,0x2cee28995a054ba5L,
+ 0x92e51d7a6f77aa4bL },
+ { 0x948bafa87190a34dL,0xd698f75bf6bd1ed1L,0xd00ee6e30caf1144L,
+ 0x5182f86f0a56aaaaL } },
+ /* 88 */
+ { { 0xfba6212c7a4cc99cL,0xff609b683e6d9ca1L,0x5dbb27cb5ac98c5aL,
+ 0x91dcab5d4073a6f2L },
+ { 0x01b6cc3d5f575a70L,0x0cb361396f8d87faL,0x165d4e8c89981736L,
+ 0x17a0cedb97974f2bL } },
+ /* 89 */
+ { { 0x38861e2a076c8d3aL,0x701aad39210f924bL,0x94d0eae413a835d9L,
+ 0x2e8ce36c7f4cdf41L },
+ { 0x91273dab037a862bL,0x01ba9bb760e4c8faL,0xf964538833baf2ddL,
+ 0xf4ccc6cb34f668f3L } },
+ /* 90 */
+ { { 0x44ef525cf1f79687L,0x7c59549592efa815L,0xe1231741a5c78d29L,
+ 0xac0db4889a0df3c9L },
+ { 0x86bfc711df01747fL,0x592b9358ef17df13L,0xe5880e4f5ccb6bb5L,
+ 0x95a64a6194c974a2L } },
+ /* 91 */
+ { { 0x72c1efdac15a4c93L,0x40269b7382585141L,0x6a8dfb1c16cb0badL,
+ 0x231e54ba29210677L },
+ { 0xa70df9178ae6d2dcL,0x4d6aa63f39112918L,0xf627726b5e5b7223L,
+ 0xab0be032d8a731e1L } },
+ /* 92 */
+ { { 0x097ad0e98d131f2dL,0x637f09e33b04f101L,0x1ac86196d5e9a748L,
+ 0xf1bcc8802cf6a679L },
+ { 0x25c69140e8daacb4L,0x3c4e405560f65009L,0x591cc8fc477937a6L,
+ 0x851694695aebb271L } },
+ /* 93 */
+ { { 0xde35c143f1dcf593L,0x78202b29b018be3bL,0xe9cdadc29bdd9d3dL,
+ 0x8f67d9d2daad55d8L },
+ { 0x841116567481ea5fL,0xe7d2dde9e34c590cL,0xffdd43f405053fa8L,
+ 0xf84572b9c0728b5dL } },
+ /* 94 */
+ { { 0x5e1a7a7197af71c9L,0xa14494447a736565L,0xa1b4ae070e1d5063L,
+ 0xedee2710616b2c19L },
+ { 0xb2f034f511734121L,0x1cac6e554a25e9f0L,0x8dc148f3a40c2ecfL,
+ 0x9fd27e9b44ebd7f4L } },
+ /* 95 */
+ { { 0x3cc7658af6e2cb16L,0xe3eb7d2cfe5919b6L,0x5a8c5816168d5583L,
+ 0xa40c2fb6958ff387L },
+ { 0x8c9ec560fedcc158L,0x7ad804c655f23056L,0xd93967049a307e12L,
+ 0x99bc9bb87dc6decfL } },
+ /* 96 */
+ { { 0x84a9521d927dafc6L,0x52c1fb695c09cd19L,0x9d9581a0f9366ddeL,
+ 0x9abe210ba16d7e64L },
+ { 0x480af84a48915220L,0xfa73176a4dd816c6L,0xc7d539871681ca5aL,
+ 0x7881c25787f344b0L } },
+ /* 97 */
+ { { 0x93399b51e0bcf3ffL,0x0d02cbc5127f74f6L,0x8fb465a2dd01d968L,
+ 0x15e6e319a30e8940L },
+ { 0x646d6e0d3e0e05f4L,0xfad7bddc43588404L,0xbe61c7d1c4f850d3L,
+ 0x0e55facf191172ceL } },
+ /* 98 */
+ { { 0x7e9d9806f8787564L,0x1a33172131e85ce6L,0x6b0158cab819e8d6L,
+ 0xd73d09766fe96577L },
+ { 0x424834251eb7206eL,0xa519290fc618bb42L,0x5dcbb8595e30a520L,
+ 0x9250a3748f15a50bL } },
+ /* 99 */
+ { { 0xcaff08f8be577410L,0xfd408a035077a8c6L,0xf1f63289ec0a63a4L,
+ 0x77414082c1cc8c0bL },
+ { 0x05a40fa6eb0991cdL,0xc1ca086649fdc296L,0x3a68a3c7b324fd40L,
+ 0x8cb04f4d12eb20b9L } },
+ /* 100 */
+ { { 0xb1c2d0556906171cL,0x9073e9cdb0240c3fL,0xdb8e6b4fd8906841L,
+ 0xe4e429ef47123b51L },
+ { 0x0b8dd53c38ec36f4L,0xf9d2dc01ff4b6a27L,0x5d066e07879a9a48L,
+ 0x37bca2ff3c6e6552L } },
+ /* 101 */
+ { { 0x4cd2e3c7df562470L,0x44f272a2c0964ac9L,0x7c6d5df980c793beL,
+ 0x59913edc3002b22aL },
+ { 0x7a139a835750592aL,0x99e01d80e783de02L,0xcf8c0375ea05d64fL,
+ 0x43786e4ab013e226L } },
+ /* 102 */
+ { { 0xff32b0ed9e56b5a6L,0x0750d9a6d9fc68f9L,0xec15e845597846a7L,
+ 0x8638ca98b7e79e7aL },
+ { 0x2f5ae0960afc24b2L,0x05398eaf4dace8f2L,0x3b765dd0aecba78fL,
+ 0x1ecdd36a7b3aa6f0L } },
+ /* 103 */
+ { { 0x5d3acd626c5ff2f3L,0xa2d516c02873a978L,0xad94c9fad2110d54L,
+ 0xd85d0f85d459f32dL },
+ { 0x9f700b8d10b11da3L,0xd2c22c30a78318c4L,0x556988f49208decdL,
+ 0xa04f19c3b4ed3c62L } },
+ /* 104 */
+ { { 0x087924c8ed7f93bdL,0xcb64ac5d392f51f6L,0x7cae330a821b71afL,
+ 0x92b2eeea5c0950b0L },
+ { 0x85ac4c9485b6e235L,0xab2ca4a92936c0f0L,0x80faa6b3e0508891L,
+ 0x1ee782215834276cL } },
+ /* 105 */
+ { { 0xa60a2e00e63e79f7L,0xf590e7b2f399d906L,0x9021054a6607c09dL,
+ 0xf3f2ced857a6e150L },
+ { 0x200510f3f10d9b55L,0x9d2fcfacd8642648L,0xe5631aa7e8bd0e7cL,
+ 0x0f56a4543da3e210L } },
+ /* 106 */
+ { { 0x5b21bffa1043e0dfL,0x6c74b6cc9c007e6dL,0x1a656ec0d4a8517aL,
+ 0xbd8f17411969e263L },
+ { 0x8a9bbb86beb7494aL,0x1567d46f45f3b838L,0xdf7a12a7a4e5a79aL,
+ 0x2d1a1c3530ccfa09L } },
+ /* 107 */
+ { { 0x192e3813506508daL,0x336180c4a1d795a7L,0xcddb59497a9944b3L,
+ 0xa107a65eb91fba46L },
+ { 0xe6d1d1c50f94d639L,0x8b4af3758a58b7d7L,0x1a7c5584bd37ca1cL,
+ 0x183d760af87a9af2L } },
+ /* 108 */
+ { { 0x29d697110dde59a4L,0xf1ad8d070e8bef87L,0x229b49634f2ebe78L,
+ 0x1d44179dc269d754L },
+ { 0xb32dc0cf8390d30eL,0x0a3b27530de8110cL,0x31af1dc52bc0339aL,
+ 0x771f9cc29606d262L } },
+ /* 109 */
+ { { 0x99993e7785040739L,0x44539db98026a939L,0xcf40f6f2f5f8fc26L,
+ 0x64427a310362718eL },
+ { 0x4f4f2d8785428aa8L,0x7b7adc3febfb49a8L,0x201b2c6df23d01acL,
+ 0x49d9b7496ae90d6dL } },
+ /* 110 */
+ { { 0xcc78d8bc435d1099L,0x2adbcd4e8e8d1a08L,0x02c2e2a02cb68a41L,
+ 0x9037d81b3f605445L },
+ { 0x7cdbac27074c7b61L,0xfe2031ab57bfd72eL,0x61ccec96596d5352L,
+ 0x08c3de6a7cc0639cL } },
+ /* 111 */
+ { { 0x20fdd020f6d552abL,0x56baff9805cd81f1L,0x06fb7c3e91351291L,
+ 0xc690944245796b2fL },
+ { 0x17b3ae9c41231bd1L,0x1eac6e875cc58205L,0x208837abf9d6a122L,
+ 0x3fa3db02cafe3ac0L } },
+ /* 112 */
+ { { 0xd75a3e6505058880L,0x7da365ef643943f2L,0x4147861cfab24925L,
+ 0xc5c4bdb0fdb808ffL },
+ { 0x73513e34b272b56bL,0xc8327e9511b9043aL,0xfd8ce37df8844969L,
+ 0x2d56db9446c2b6b5L } },
+ /* 113 */
+ { { 0x2461782fff46ac6bL,0xd19f792607a2e425L,0xfafea3c409a48de1L,
+ 0x0f56bd9de503ba42L },
+ { 0x137d4ed1345cda49L,0x821158fc816f299dL,0xe7c6a54aaeb43402L,
+ 0x4003bb9d1173b5f1L } },
+ /* 114 */
+ { { 0x3b8e8189a0803387L,0xece115f539cbd404L,0x4297208dd2877f21L,
+ 0x53765522a07f2f9eL },
+ { 0xa4980a21a8a4182dL,0xa2bbd07a3219df79L,0x674d0a2e1a19a2d4L,
+ 0x7a056f586c5d4549L } },
+ /* 115 */
+ { { 0x646b25589d8a2a47L,0x5b582948c3df2773L,0x51ec000eabf0d539L,
+ 0x77d482f17a1a2675L },
+ { 0xb8a1bd9587853948L,0xa6f817bd6cfbffeeL,0xab6ec05780681e47L,
+ 0x4115012b2b38b0e4L } },
+ /* 116 */
+ { { 0x3c73f0f46de28cedL,0x1d5da7609b13ec47L,0x61b8ce9e6e5c6392L,
+ 0xcdf04572fbea0946L },
+ { 0x1cb3c58b6c53c3b0L,0x97fe3c10447b843cL,0xfb2b8ae12cb9780eL,
+ 0xee703dda97383109L } },
+ /* 117 */
+ { { 0x34515140ff57e43aL,0xd44660d3b1b811b8L,0x2b3b5dff8f42b986L,
+ 0x2a0ad89da162ce21L },
+ { 0x64e4a6946bc277baL,0xc788c954c141c276L,0x141aa64ccabf6274L,
+ 0xd62d0b67ac2b4659L } },
+ /* 118 */
+ { { 0x39c5d87b2c054ac4L,0x57005859f27df788L,0xedf7cbf3b18128d6L,
+ 0xb39a23f2991c2426L },
+ { 0x95284a15f0b16ae5L,0x0c6a05b1a136f51bL,0x1d63c137f2700783L,
+ 0x04ed0092c0674cc5L } },
+ /* 119 */
+ { { 0x1f4185d19ae90393L,0x3047b4294a3d64e6L,0xae0001a69854fc14L,
+ 0xa0a91fc10177c387L },
+ { 0xff0a3f01ae2c831eL,0xbb76ae822b727e16L,0x8f12c8a15a3075b4L,
+ 0x084cf9889ed20c41L } },
+ /* 120 */
+ { { 0xd98509defca6becfL,0x2fceae807dffb328L,0x5d8a15c44778e8b9L,
+ 0xd57955b273abf77eL },
+ { 0x210da79e31b5d4f1L,0xaa52f04b3cfa7a1cL,0xd4d12089dc27c20bL,
+ 0x8e14ea4202d141f1L } },
+ /* 121 */
+ { { 0xeed50345f2897042L,0x8d05331f43402c4aL,0xc8d9c194c8bdfb21L,
+ 0x597e1a372aa4d158L },
+ { 0x0327ec1acf0bd68cL,0x6d4be0dcab024945L,0x5b9c8d7ac9fe3e84L,
+ 0xca3f0236199b4deaL } },
+ /* 122 */
+ { { 0x592a10b56170bd20L,0x0ea897f16d3f5de7L,0xa3363ff144b2ade2L,
+ 0xbde7fd7e309c07e4L },
+ { 0x516bb6d2b8f5432cL,0x210dc1cbe043444bL,0x3db01e6ff8f95b5aL,
+ 0xb623ad0e0a7dd198L } },
+ /* 123 */
+ { { 0xa75bd67560c7b65bL,0xab8c559023a4a289L,0xf8220fd0d7b26795L,
+ 0xd6aa2e4658ec137bL },
+ { 0x10abc00b5138bb85L,0x8c31d121d833a95cL,0xb24ff00b1702a32eL,
+ 0x111662e02dcc513aL } },
+ /* 124 */
+ { { 0x78114015efb42b87L,0xbd9f5d701b6c4dffL,0x66ecccd7a7d7c129L,
+ 0xdb3ee1cb94b750f8L },
+ { 0xb26f3db0f34837cfL,0xe7eed18bb9578d4fL,0x5d2cdf937c56657dL,
+ 0x886a644252206a59L } },
+ /* 125 */
+ { { 0x3c234cfb65b569eaL,0x20011141f72119c1L,0x8badc85da15a619eL,
+ 0xa70cf4eb018a17bcL },
+ { 0x224f97ae8c4a6a65L,0x36e5cf270134378fL,0xbe3a609e4f7e0960L,
+ 0xaa4772abd1747b77L } },
+ /* 126 */
+ { { 0x676761317aa60cc0L,0xc79163610368115fL,0xded98bb4bbc1bb5aL,
+ 0x611a6ddc30faf974L },
+ { 0x30e78cbcc15ee47aL,0x2e8962824e0d96a5L,0x36f35adf3dd9ed88L,
+ 0x5cfffaf816429c88L } },
+ /* 127 */
+ { { 0xc0d54cff9b7a99cdL,0x7bf3b99d843c45a1L,0x038a908f62c739e1L,
+ 0x6e5a6b237dc1994cL },
+ { 0xef8b454e0ba5db77L,0xb7b8807facf60d63L,0xe591c0c676608378L,
+ 0x481a238d242dabccL } },
+ /* 128 */
+ { { 0xe3417bc035d0b34aL,0x440b386b8327c0a7L,0x8fb7262dac0362d1L,
+ 0x2c41114ce0cdf943L },
+ { 0x2ba5cef1ad95a0b1L,0xc09b37a867d54362L,0x26d6cdd201e486c9L,
+ 0x20477abf42ff9297L } },
+ /* 129 */
+ { { 0x2f75173c18d65dbfL,0x77bf940e339edad8L,0x7022d26bdcf1001cL,
+ 0xac66409ac77396b6L },
+ { 0x8b0bb36fc6261cc3L,0x213f7bc9190e7e90L,0x6541cebaa45e6c10L,
+ 0xce8e6975cc122f85L } },
+ /* 130 */
+ { { 0x0f121b41bc0a67d2L,0x62d4760a444d248aL,0x0e044f1d659b4737L,
+ 0x08fde365250bb4a8L },
+ { 0xaceec3da848bf287L,0xc2a62182d3369d6eL,0x3582dfdc92449482L,
+ 0x2f7e2fd2565d6cd7L } },
+ /* 131 */
+ { { 0xae4b92dbc3770fa7L,0x095e8d5c379043f9L,0x54f34e9d17761171L,
+ 0xc65be92e907702aeL },
+ { 0x2758a303f6fd0a40L,0xe7d822e3bcce784bL,0x7ae4f5854f9767bfL,
+ 0x4bff8e47d1193b3aL } },
+ /* 132 */
+ { { 0xcd41d21f00ff1480L,0x2ab8fb7d0754db16L,0xac81d2efbbe0f3eaL,
+ 0x3e4e4ae65772967dL },
+ { 0x7e18f36d3c5303e6L,0x3bd9994b92262397L,0x9ed70e261324c3c0L,
+ 0x5388aefd58ec6028L } },
+ /* 133 */
+ { { 0xad1317eb5e5d7713L,0x09b985ee75de49daL,0x32f5bc4fc74fb261L,
+ 0x5cf908d14f75be0eL },
+ { 0x760435108e657b12L,0xbfd421a5b96ed9e6L,0x0e29f51f8970ccc2L,
+ 0xa698ba4060f00ce2L } },
+ /* 134 */
+ { { 0x73db1686ef748fecL,0xe6e755a27e9d2cf9L,0x630b6544ce265effL,
+ 0xb142ef8a7aebad8dL },
+ { 0xad31af9f17d5770aL,0x66af3b672cb3412fL,0x6bd60d1bdf3359deL,
+ 0xd1896a9658515075L } },
+ /* 135 */
+ { { 0xec5957ab33c41c08L,0x87de94ac5468e2e1L,0x18816b73ac472f6cL,
+ 0x267b0e0b7981da39L },
+ { 0x6e554e5d8e62b988L,0xd8ddc755116d21e7L,0x4610faf03d2a6f99L,
+ 0xb54e287aa1119393L } },
+ /* 136 */
+ { { 0x0a0122b5178a876bL,0x51ff96ff085104b4L,0x050b31ab14f29f76L,
+ 0x84abb28b5f87d4e6L },
+ { 0xd5ed439f8270790aL,0x2d6cb59d85e3f46bL,0x75f55c1b6c1e2212L,
+ 0xe5436f6717655640L } },
+ /* 137 */
+ { { 0x53f9025e2286e8d5L,0x353c95b4864453beL,0xd832f5bde408e3a0L,
+ 0x0404f68b5b9ce99eL },
+ { 0xcad33bdea781e8e5L,0x3cdf5018163c2f5bL,0x575769600119caa3L,
+ 0x3a4263df0ac1c701L } },
+ /* 138 */
+ { { 0xc2965ecc9aeb596dL,0x01ea03e7023c92b4L,0x4704b4b62e013961L,
+ 0x0ca8fd3f905ea367L },
+ { 0x92523a42551b2b61L,0x1eb7a89c390fcd06L,0xe7f1d2be0392a63eL,
+ 0x96dca2644ddb0c33L } },
+ /* 139 */
+ { { 0x203bb43a387510afL,0x846feaa8a9a36a01L,0xd23a57702f950378L,
+ 0x4363e2123aad59dcL },
+ { 0xca43a1c740246a47L,0xb362b8d2e55dd24dL,0xf9b086045d8faf96L,
+ 0x840e115cd8bb98c4L } },
+ /* 140 */
+ { { 0xf12205e21023e8a7L,0xc808a8cdd8dc7a0bL,0xe292a272163a5ddfL,
+ 0x5e0d6abd30ded6d4L },
+ { 0x07a721c27cfc0f64L,0x42eec01d0e55ed88L,0x26a7bef91d1f9db2L,
+ 0x7dea48f42945a25aL } },
+ /* 141 */
+ { { 0xabdf6f1ce5060a81L,0xe79f9c72f8f95615L,0xcfd36c5406ac268bL,
+ 0xabc2a2beebfd16d1L },
+ { 0x8ac66f91d3e2eac7L,0x6f10ba63d2dd0466L,0x6790e3770282d31bL,
+ 0x4ea353946c7eefc1L } },
+ /* 142 */
+ { { 0xed8a2f8d5266309dL,0x0a51c6c081945a3eL,0xcecaf45a578c5dc1L,
+ 0x3a76e6891c94ffc3L },
+ { 0x9aace8a47d7b0d0fL,0x963ace968f584a5fL,0x51a30c724e697fbeL,
+ 0x8212a10a465e6464L } },
+ /* 143 */
+ { { 0xef7c61c3cfab8caaL,0x18eb8e840e142390L,0xcd1dff677e9733caL,
+ 0xaa7cab71599cb164L },
+ { 0x02fc9273bc837bd1L,0xc06407d0c36af5d7L,0x17621292f423da49L,
+ 0x40e38073fe0617c3L } },
+ /* 144 */
+ { { 0xf4f80824a7bf9b7cL,0x365d23203fbe30d0L,0xbfbe532097cf9ce3L,
+ 0xe3604700b3055526L },
+ { 0x4dcb99116cc6c2c7L,0x72683708ba4cbee6L,0xdcded434637ad9ecL,
+ 0x6542d677a3dee15fL } },
+ /* 145 */
+ { { 0x3f32b6d07b6c377aL,0x6cb03847903448beL,0xd6fdd3a820da8af7L,
+ 0xa6534aee09bb6f21L },
+ { 0x30a1780d1035facfL,0x35e55a339dcb47e6L,0x6ea50fe1c447f393L,
+ 0xf3cb672fdc9aef22L } },
+ /* 146 */
+ { { 0xeb3719fe3b55fd83L,0xe0d7a46c875ddd10L,0x33ac9fa905cea784L,
+ 0x7cafaa2eaae870e7L },
+ { 0x9b814d041d53b338L,0xe0acc0a0ef87e6c6L,0xfb93d10811672b0fL,
+ 0x0aab13c1b9bd522eL } },
+ /* 147 */
+ { { 0xddcce278d2681297L,0xcb350eb1b509546aL,0x2dc431737661aaf2L,
+ 0x4b91a602847012e9L },
+ { 0xdcff109572f8ddcfL,0x08ebf61e9a911af4L,0x48f4360ac372430eL,
+ 0x49534c5372321cabL } },
+ /* 148 */
+ { { 0x83df7d71f07b7e9dL,0xa478efa313cd516fL,0x78ef264b6c047ee3L,
+ 0xcaf46c4fd65ac5eeL },
+ { 0xa04d0c7792aa8266L,0xedf45466913684bbL,0x56e65168ae4b16b0L,
+ 0x14ce9e5704c6770fL } },
+ /* 149 */
+ { { 0x99445e3e965e8f91L,0xd3aca1bacb0f2492L,0xd31cc70f90c8a0a0L,
+ 0x1bb708a53e4c9a71L },
+ { 0xd5ca9e69558bdd7aL,0x734a0508018a26b1L,0xb093aa714c9cf1ecL,
+ 0xf9d126f2da300102L } },
+ /* 150 */
+ { { 0x749bca7aaff9563eL,0xdd077afeb49914a0L,0xe27a0311bf5f1671L,
+ 0x807afcb9729ecc69L },
+ { 0x7f8a9337c9b08b77L,0x86c3a785443c7e38L,0x85fafa59476fd8baL,
+ 0x751adcd16568cd8cL } },
+ /* 151 */
+ { { 0x8aea38b410715c0dL,0xd113ea718f7697f7L,0x665eab1493fbf06dL,
+ 0x29ec44682537743fL },
+ { 0x3d94719cb50bebbcL,0x399ee5bfe4505422L,0x90cd5b3a8d2dedb1L,
+ 0xff9370e392a4077dL } },
+ /* 152 */
+ { { 0x59a2d69bc6b75b65L,0x4188f8d5266651c5L,0x28a9f33e3de9d7d2L,
+ 0x9776478ba2a9d01aL },
+ { 0x8852622d929af2c7L,0x334f5d6d4e690923L,0xce6cc7e5a89a51e9L,
+ 0x74a6313fac2f82faL } },
+ /* 153 */
+ { { 0xb2f4dfddb75f079cL,0x85b07c9518e36fbbL,0x1b6cfcf0e7cd36ddL,
+ 0xab75be150ff4863dL },
+ { 0x81b367c0173fc9b7L,0xb90a7420d2594fd0L,0x15fdbf03c4091236L,
+ 0x4ebeac2e0b4459f6L } },
+ /* 154 */
+ { { 0xeb6c5fe75c9f2c53L,0xd25220118eae9411L,0xc8887633f95ac5d8L,
+ 0xdf99887b2c1baffcL },
+ { 0xbb78eed2850aaecbL,0x9d49181b01d6a272L,0x978dd511b1cdbcacL,
+ 0x27b040a7779f4058L } },
+ /* 155 */
+ { { 0x90405db7f73b2eb2L,0xe0df85088e1b2118L,0x501b71525962327eL,
+ 0xb393dd37e4cfa3f5L },
+ { 0xa1230e7b3fd75165L,0xd66344c2bcd33554L,0x6c36f1be0f7b5022L,
+ 0x09588c12d0463419L } },
+ /* 156 */
+ { { 0xe086093f02601c3bL,0xfb0252f8cf5c335fL,0x955cf280894aff28L,
+ 0x81c879a9db9f648bL },
+ { 0x040e687cc6f56c51L,0xfed471693f17618cL,0x44f88a419059353bL,
+ 0xfa0d48f55fc11bc4L } },
+ /* 157 */
+ { { 0xbc6e1c9de1608e4dL,0x010dda113582822cL,0xf6b7ddc1157ec2d7L,
+ 0x8ea0e156b6a367d6L },
+ { 0xa354e02f2383b3b4L,0x69966b943f01f53cL,0x4ff6632b2de03ca5L,
+ 0x3f5ab924fa00b5acL } },
+ /* 158 */
+ { { 0x337bb0d959739efbL,0xc751b0f4e7ebec0dL,0x2da52dd6411a67d1L,
+ 0x8bc768872b74256eL },
+ { 0xa5be3b7282d3d253L,0xa9f679a1f58d779fL,0xa1cac168e16767bbL,
+ 0xb386f19060fcf34fL } },
+ /* 159 */
+ { { 0x31f3c1352fedcfc2L,0x5396bf6262f8af0dL,0x9a02b4eae57288c2L,
+ 0x4cb460f71b069c4dL },
+ { 0xae67b4d35b8095eaL,0x92bbf8596fc07603L,0xe1475f66b614a165L,
+ 0x52c0d50895ef5223L } },
+ /* 160 */
+ { { 0x231c210e15339848L,0xe87a28e870778c8dL,0x9d1de6616956e170L,
+ 0x4ac3c9382bb09c0bL },
+ { 0x19be05516998987dL,0x8b2376c4ae09f4d6L,0x1de0b7651a3f933dL,
+ 0x380d94c7e39705f4L } },
+ /* 161 */
+ { { 0x01a355aa81542e75L,0x96c724a1ee01b9b7L,0x6b3a2977624d7087L,
+ 0x2ce3e171de2637afL },
+ { 0xcfefeb49f5d5bc1aL,0xa655607e2777e2b5L,0x4feaac2f9513756cL,
+ 0x2e6cd8520b624e4dL } },
+ /* 162 */
+ { { 0x3685954b8c31c31dL,0x68533d005bf21a0cL,0x0bd7626e75c79ec9L,
+ 0xca17754742c69d54L },
+ { 0xcc6edafff6d2dbb2L,0xfd0d8cbd174a9d18L,0x875e8793aa4578e8L,
+ 0xa976a7139cab2ce6L } },
+ /* 163 */
+ { { 0x0a651f1b93fb353dL,0xd75cab8b57fcfa72L,0xaa88cfa731b15281L,
+ 0x8720a7170a1f4999L },
+ { 0x8c3e8d37693e1b90L,0xd345dc0b16f6dfc3L,0x8ea8d00ab52a8742L,
+ 0x9719ef29c769893cL } },
+ /* 164 */
+ { { 0x820eed8d58e35909L,0x9366d8dc33ddc116L,0xd7f999d06e205026L,
+ 0xa5072976e15704c1L },
+ { 0x002a37eac4e70b2eL,0x84dcf6576890aa8aL,0xcd71bf18645b2a5cL,
+ 0x99389c9df7b77725L } },
+ /* 165 */
+ { { 0x238c08f27ada7a4bL,0x3abe9d03fd389366L,0x6b672e89766f512cL,
+ 0xa88806aa202c82e4L },
+ { 0x6602044ad380184eL,0xa8cb78c4126a8b85L,0x79d670c0ad844f17L,
+ 0x0043bffb4738dcfeL } },
+ /* 166 */
+ { { 0x8d59b5dc36d5192eL,0xacf885d34590b2afL,0x83566d0a11601781L,
+ 0x52f3ef01ba6c4866L },
+ { 0x3986732a0edcb64dL,0x0a482c238068379fL,0x16cbe5fa7040f309L,
+ 0x3296bd899ef27e75L } },
+ /* 167 */
+ { { 0x476aba89454d81d7L,0x9eade7ef51eb9b3cL,0x619a21cd81c57986L,
+ 0x3b90febfaee571e9L },
+ { 0x9393023e5496f7cbL,0x55be41d87fb51bc4L,0x03f1dd4899beb5ceL,
+ 0x6e88069d9f810b18L } },
+ /* 168 */
+ { { 0xce37ab11b43ea1dbL,0x0a7ff1a95259d292L,0x851b02218f84f186L,
+ 0xa7222beadefaad13L },
+ { 0xa2ac78ec2b0a9144L,0x5a024051f2fa59c5L,0x91d1eca56147ce38L,
+ 0xbe94d523bc2ac690L } },
+ /* 169 */
+ { { 0x72f4945e0b226ce7L,0xb8afd747967e8b70L,0xedea46f185a6c63eL,
+ 0x7782defe9be8c766L },
+ { 0x760d2aa43db38626L,0x460ae78776f67ad1L,0x341b86fc54499cdbL,
+ 0x03838567a2892e4bL } },
+ /* 170 */
+ { { 0x2d8daefd79ec1a0fL,0x3bbcd6fdceb39c97L,0xf5575ffc58f61a95L,
+ 0xdbd986c4adf7b420L },
+ { 0x81aa881415f39eb7L,0x6ee2fcf5b98d976cL,0x5465475dcf2f717dL,
+ 0x8e24d3c46860bbd0L } },
+ /* 171 */
+ { { 0x749d8e549a587390L,0x12bb194f0cbec588L,0x46e07da4b25983c6L,
+ 0x541a99c4407bafc8L },
+ { 0xdb241692624c8842L,0x6044c12ad86c05ffL,0xc59d14b44f7fcf62L,
+ 0xc0092c49f57d35d1L } },
+ /* 172 */
+ { { 0xd3cc75c3df2e61efL,0x7e8841c82e1b35caL,0xc62d30d1909f29f4L,
+ 0x75e406347286944dL },
+ { 0xe7d41fc5bbc237d0L,0xc9537bf0ec4f01c9L,0x91c51a16282bd534L,
+ 0x5b7cb658c7848586L } },
+ /* 173 */
+ { { 0x964a70848a28ead1L,0x802dc508fd3b47f6L,0x9ae4bfd1767e5b39L,
+ 0x7ae13eba8df097a1L },
+ { 0xfd216ef8eadd384eL,0x0361a2d9b6b2ff06L,0x204b98784bcdb5f3L,
+ 0x787d8074e2a8e3fdL } },
+ /* 174 */
+ { { 0xc5e25d6b757fbb1cL,0xe47bddb2ca201debL,0x4a55e9a36d2233ffL,
+ 0x5c2228199ef28484L },
+ { 0x773d4a8588315250L,0x21b21a2b827097c1L,0xab7c4ea1def5d33fL,
+ 0xe45d37abbaf0f2b0L } },
+ /* 175 */
+ { { 0xd2df1e3428511c8aL,0xebb229c8bdca6cd3L,0x578a71a7627c39a7L,
+ 0xed7bc12284dfb9d3L },
+ { 0xcf22a6df93dea561L,0x5443f18dd48f0ed1L,0xd8b861405bad23e8L,
+ 0xaac97cc945ca6d27L } },
+ /* 176 */
+ { { 0xeb54ea74a16bd00aL,0xd839e9adf5c0bcc1L,0x092bb7f11f9bfc06L,
+ 0x318f97b31163dc4eL },
+ { 0xecc0c5bec30d7138L,0x44e8df23abc30220L,0x2bb7972fb0223606L,
+ 0xfa41faa19a84ff4dL } },
+ /* 177 */
+ { { 0x4402d974a6642269L,0xc81814ce9bb783bdL,0x398d38e47941e60bL,
+ 0x38bb6b2c1d26e9e2L },
+ { 0xc64e4a256a577f87L,0x8b52d253dc11fe1cL,0xff336abf62280728L,
+ 0x94dd0905ce7601a5L } },
+ /* 178 */
+ { { 0x156cf7dcde93f92aL,0xa01333cb89b5f315L,0x02404df9c995e750L,
+ 0x92077867d25c2ae9L },
+ { 0xe2471e010bf39d44L,0x5f2c902096bb53d7L,0x4c44b7b35c9c3d8fL,
+ 0x81e8428bd29beb51L } },
+ /* 179 */
+ { { 0x6dd9c2bac477199fL,0x8cb8eeee6b5ecdd9L,0x8af7db3fee40fd0eL,
+ 0x1b94ab62dbbfa4b1L },
+ { 0x44f0d8b3ce47f143L,0x51e623fc63f46163L,0xf18f270fcc599383L,
+ 0x06a38e28055590eeL } },
+ /* 180 */
+ { { 0x2e5b0139b3355b49L,0x20e26560b4ebf99bL,0xc08ffa6bd269f3dcL,
+ 0xa7b36c2083d9d4f8L },
+ { 0x64d15c3a1b3e8830L,0xd5fceae1a89f9c0bL,0xcfeee4a2e2d16930L,
+ 0xbe54c6b4a2822a20L } },
+ /* 181 */
+ { { 0xd6cdb3df8d91167cL,0x517c3f79e7a6625eL,0x7105648f346ac7f4L,
+ 0xbf30a5abeae022bbL },
+ { 0x8e7785be93828a68L,0x5161c3327f3ef036L,0xe11b5feb592146b2L,
+ 0xd1c820de2732d13aL } },
+ /* 182 */
+ { { 0x043e13479038b363L,0x58c11f546b05e519L,0x4fe57abe6026cad1L,
+ 0xb7d17bed68a18da3L },
+ { 0x44ca5891e29c2559L,0x4f7a03765bfffd84L,0x498de4af74e46948L,
+ 0x3997fd5e6412cc64L } },
+ /* 183 */
+ { { 0xf20746828bd61507L,0x29e132d534a64d2aL,0xffeddfb08a8a15e3L,
+ 0x0eeb89293c6c13e8L },
+ { 0xe9b69a3ea7e259f8L,0xce1db7e6d13e7e67L,0x277318f6ad1fa685L,
+ 0x228916f8c922b6efL } },
+ /* 184 */
+ { { 0x959ae25b0a12ab5bL,0xcc11171f957bc136L,0x8058429ed16e2b0cL,
+ 0xec05ad1d6e93097eL },
+ { 0x157ba5beac3f3708L,0x31baf93530b59d77L,0x47b55237118234e5L,
+ 0x7d3141567ff11b37L } },
+ /* 185 */
+ { { 0x7bd9c05cf6dfefabL,0xbe2f2268dcb37707L,0xe53ead973a38bb95L,
+ 0xe9ce66fc9bc1d7a3L },
+ { 0x75aa15766f6a02a1L,0x38c087df60e600edL,0xf8947f3468cdc1b9L,
+ 0xd9650b0172280651L } },
+ /* 186 */
+ { { 0x504b4c4a5a057e60L,0xcbccc3be8def25e4L,0xa635320817c1ccbdL,
+ 0x14d6699a804eb7a2L },
+ { 0x2c8a8415db1f411aL,0x09fbaf0bf80d769cL,0xb4deef901c2f77adL,
+ 0x6f4c68410d43598aL } },
+ /* 187 */
+ { { 0x8726df4e96c24a96L,0x534dbc85fcbd99a3L,0x3c466ef28b2ae30aL,
+ 0x4c4350fd61189abbL },
+ { 0x2967f716f855b8daL,0x41a42394463c38a1L,0xc37e1413eae93343L,
+ 0xa726d2425a3118b5L } },
+ /* 188 */
+ { { 0xdae6b3ee948c1086L,0xf1de503dcbd3a2e1L,0x3f35ed3f03d022f3L,
+ 0x13639e82cc6cf392L },
+ { 0x9ac938fbcdafaa86L,0xf45bc5fb2654a258L,0x1963b26e45051329L,
+ 0xca9365e1c1a335a3L } },
+ /* 189 */
+ { { 0x3615ac754c3b2d20L,0x742a5417904e241bL,0xb08521c4cc9d071dL,
+ 0x9ce29c34970b72a5L },
+ { 0x8cc81f736d3e0ad6L,0x8060da9ef2f8434cL,0x35ed1d1a6ce862d9L,
+ 0x48c4abd7ab42af98L } },
+ /* 190 */
+ { { 0xd221b0cc40c7485aL,0xead455bbe5274dbfL,0x493c76989263d2e8L,
+ 0x78017c32f67b33cbL },
+ { 0xb9d35769930cb5eeL,0xc0d14e940c408ed2L,0xf8b7bf55272f1a4dL,
+ 0x53cd0454de5c1c04L } },
+ /* 191 */
+ { { 0xbcd585fa5d28ccacL,0x5f823e56005b746eL,0x7c79f0a1cd0123aaL,
+ 0xeea465c1d3d7fa8fL },
+ { 0x7810659f0551803bL,0x6c0b599f7ce6af70L,0x4195a77029288e70L,
+ 0x1b6e42a47ae69193L } },
+ /* 192 */
+ { { 0x2e80937cf67d04c3L,0x1e312be289eeb811L,0x56b5d88792594d60L,
+ 0x0224da14187fbd3dL },
+ { 0x87abb8630c5fe36fL,0x580f3c604ef51f5fL,0x964fb1bfb3b429ecL,
+ 0x60838ef042bfff33L } },
+ /* 193 */
+ { { 0x432cb2f27e0bbe99L,0x7bda44f304aa39eeL,0x5f497c7a9fa93903L,
+ 0x636eb2022d331643L },
+ { 0xfcfd0e6193ae00aaL,0x875a00fe31ae6d2fL,0xf43658a29f93901cL,
+ 0x8844eeb639218bacL } },
+ /* 194 */
+ { { 0x114171d26b3bae58L,0x7db3df7117e39f3eL,0xcd37bc7f81a8eadaL,
+ 0x27ba83dc51fb789eL },
+ { 0xa7df439ffbf54de5L,0x7277030bb5fe1a71L,0x42ee8e35db297a48L,
+ 0xadb62d3487f3a4abL } },
+ /* 195 */
+ { { 0x9b1168a2a175df2aL,0x082aa04f618c32e9L,0xc9e4f2e7146b0916L,
+ 0xb990fd7675e7c8b2L },
+ { 0x0829d96b4df37313L,0x1c205579d0b40789L,0x66c9ae4a78087711L,
+ 0x81707ef94d10d18dL } },
+ /* 196 */
+ { { 0x97d7cab203d6ff96L,0x5b851bfc0d843360L,0x268823c4d042db4bL,
+ 0x3792daead5a8aa5cL },
+ { 0x52818865941afa0bL,0xf3e9e74142d83671L,0x17c825275be4e0a7L,
+ 0x5abd635e94b001baL } },
+ /* 197 */
+ { { 0x727fa84e0ac4927cL,0xe3886035a7c8cf23L,0xa4bcd5ea4adca0dfL,
+ 0x5995bf21846ab610L },
+ { 0xe90f860b829dfa33L,0xcaafe2ae958fc18bL,0x9b3baf4478630366L,
+ 0x44c32ca2d483411eL } },
+ /* 198 */
+ { { 0xa74a97f1e40ed80cL,0x5f938cb131d2ca82L,0x53f2124b7c2d6ad9L,
+ 0x1f2162fb8082a54cL },
+ { 0x7e467cc5720b173eL,0x40e8a666085f12f9L,0x8cebc20e4c9d65dcL,
+ 0x8f1d402bc3e907c9L } },
+ /* 199 */
+ { { 0x4f592f9cfbc4058aL,0xb15e14b6292f5670L,0xc55cfe37bc1d8c57L,
+ 0xb1980f43926edbf9L },
+ { 0x98c33e0932c76b09L,0x1df5279d33b07f78L,0x6f08ead4863bb461L,
+ 0x2828ad9b37448e45L } },
+ /* 200 */
+ { { 0x696722c4c4cf4ac5L,0xf5ac1a3fdde64afbL,0x0551baa2e0890832L,
+ 0x4973f1275a14b390L },
+ { 0xe59d8335322eac5dL,0x5e07eef50bd9b568L,0xab36720fa2588393L,
+ 0x6dac8ed0db168ac7L } },
+ /* 201 */
+ { { 0xf7b545aeeda835efL,0x4aa113d21d10ed51L,0x035a65e013741b09L,
+ 0x4b23ef5920b9de4cL },
+ { 0xe82bb6803c4c7341L,0xd457706d3f58bc37L,0x73527863a51e3ee8L,
+ 0x4dd71534ddf49a4eL } },
+ /* 202 */
+ { { 0xbf94467295476cd9L,0x648d072fe31a725bL,0x1441c8b8fc4b67e0L,
+ 0xfd3170002f4a4dbbL },
+ { 0x1cb43ff48995d0e1L,0x76e695d10ef729aaL,0xe0d5f97641798982L,
+ 0x14fac58c9569f365L } },
+ /* 203 */
+ { { 0xad9a0065f312ae18L,0x51958dc0fcc93fc9L,0xd9a142408a7d2846L,
+ 0xed7c765136abda50L },
+ { 0x46270f1a25d4abbcL,0x9b5dd8f3f1a113eaL,0xc609b0755b51952fL,
+ 0xfefcb7f74d2e9f53L } },
+ /* 204 */
+ { { 0xbd09497aba119185L,0xd54e8c30aac45ba4L,0x492479deaa521179L,
+ 0x1801a57e87e0d80bL },
+ { 0x073d3f8dfcafffb0L,0x6cf33c0bae255240L,0x781d763b5b5fdfbcL,
+ 0x9f8fc11e1ead1064L } },
+ /* 205 */
+ { { 0x1583a1715e69544cL,0x0eaf8567f04b7813L,0x1e22a8fd278a4c32L,
+ 0xa9d3809d3d3a69a9L },
+ { 0x936c2c2c59a2da3bL,0x38ccbcf61895c847L,0x5e65244e63d50869L,
+ 0x3006b9aee1178ef7L } },
+ /* 206 */
+ { { 0x0bb1f2b0c9eead28L,0x7eef635d89f4dfbcL,0x074757fdb2ce8939L,
+ 0x0ab85fd745f8f761L },
+ { 0xecda7c933e5b4549L,0x4be2bb5c97922f21L,0x261a1274b43b8040L,
+ 0xb122d67511e942c2L } },
+ /* 207 */
+ { { 0x3be607be66a5ae7aL,0x01e703fa76adcbe3L,0xaf9043014eb6e5c5L,
+ 0x9f599dc1097dbaecL },
+ { 0x6d75b7180ff250edL,0x8eb91574349a20dcL,0x425605a410b227a3L,
+ 0x7d5528e08a294b78L } },
+ /* 208 */
+ { { 0xf0f58f6620c26defL,0x025585ea582b2d1eL,0xfbe7d79b01ce3881L,
+ 0x28ccea01303f1730L },
+ { 0xd1dabcd179644ba5L,0x1fc643e806fff0b8L,0xa60a76fc66b3e17bL,
+ 0xc18baf48a1d013bfL } },
+ /* 209 */
+ { { 0x34e638c85dc4216dL,0x00c01067206142acL,0xd453a17195f5064aL,
+ 0x9def809db7a9596bL },
+ { 0x41e8642e67ab8d2cL,0xb42404336237a2b6L,0x7d506a6d64c4218bL,
+ 0x0357f8b068808ce5L } },
+ /* 210 */
+ { { 0x8e9dbe644cd2cc88L,0xcc61c28df0b8f39dL,0x4a309874cd30a0c8L,
+ 0xe4a01add1b489887L },
+ { 0x2ed1eeacf57cd8f9L,0x1b767d3ebd594c48L,0xa7295c717bd2f787L,
+ 0x466d7d79ce10cc30L } },
+ /* 211 */
+ { { 0x47d318929dada2c7L,0x4fa0a6c38f9aa27dL,0x90e4fd28820a59e1L,
+ 0xc672a522451ead1aL },
+ { 0x30607cc85d86b655L,0xf0235d3bf9ad4af1L,0x99a08680571172a6L,
+ 0x5e3d64faf2a67513L } },
+ /* 212 */
+ { { 0xaa6410c79b3b4416L,0xcd8fcf85eab26d99L,0x5ebff74adb656a74L,
+ 0x6c8a7a95eb8e42fcL },
+ { 0x10c60ba7b02a63bdL,0x6b2f23038b8f0047L,0x8c6c3738312d90b0L,
+ 0x348ae422ad82ca91L } },
+ /* 213 */
+ { { 0x7f4746635ccda2fbL,0x22accaa18e0726d2L,0x85adf782492b1f20L,
+ 0xc1074de0d9ef2d2eL },
+ { 0xfcf3ce44ae9a65b3L,0xfd71e4ac05d7151bL,0xd4711f50ce6a9788L,
+ 0xfbadfbdbc9e54ffcL } },
+ /* 214 */
+ { { 0x1713f1cd20a99363L,0xb915658f6cf22775L,0x968175cd24d359b2L,
+ 0xb7f976b483716fcdL },
+ { 0x5758e24d5d6dbf74L,0x8d23bafd71c3af36L,0x48f477600243dfe3L,
+ 0xf4d41b2ecafcc805L } },
+ /* 215 */
+ { { 0x51f1cf28fdabd48dL,0xce81be3632c078a4L,0x6ace2974117146e9L,
+ 0x180824eae0160f10L },
+ { 0x0387698b66e58358L,0x63568752ce6ca358L,0x82380e345e41e6c5L,
+ 0x67e5f63983cf6d25L } },
+ /* 216 */
+ { { 0xf89ccb8dcf4899efL,0x949015f09ebb44c0L,0x546f9276b2598ec9L,
+ 0x9fef789a04c11fc6L },
+ { 0x6d367ecf53d2a071L,0xb10e1a7fa4519b09L,0xca6b3fb0611e2eefL,
+ 0xbc80c181a99c4e20L } },
+ /* 217 */
+ { { 0x972536f8e5eb82e6L,0x1a484fc7f56cb920L,0xc78e217150b5da5eL,
+ 0x49270e629f8cdf10L },
+ { 0x1a39b7bbea6b50adL,0x9a0284c1a2388ffcL,0x5403eb178107197bL,
+ 0xd2ee52f961372f7fL } },
+ /* 218 */
+ { { 0xd37cd28588e0362aL,0x442fa8a78fa5d94dL,0xaff836e5a434a526L,
+ 0xdfb478bee5abb733L },
+ { 0xa91f1ce7673eede6L,0xa5390ad42b5b2f04L,0x5e66f7bf5530da2fL,
+ 0xd9a140b408df473aL } },
+ /* 219 */
+ { { 0x0e0221b56e8ea498L,0x623478293563ee09L,0xe06b8391335d2adeL,
+ 0x760c058d623f4b1aL },
+ { 0x0b89b58cc198aa79L,0xf74890d2f07aba7fL,0x4e204110fde2556aL,
+ 0x7141982d8f190409L } },
+ /* 220 */
+ { { 0x6f0a0e334d4b0f45L,0xd9280b38392a94e1L,0x3af324c6b3c61d5eL,
+ 0x3af9d1ce89d54e47L },
+ { 0xfd8f798120930371L,0xeda2664c21c17097L,0x0e9545dcdc42309bL,
+ 0xb1f815c373957dd6L } },
+ /* 221 */
+ { { 0x84faa78e89fec44aL,0xc8c2ae473caa4cafL,0x691c807dc1b6a624L,
+ 0xa41aed141543f052L },
+ { 0x424353997d5ffe04L,0x8bacb2df625b6e20L,0x85d660be87817775L,
+ 0xd6e9c1dd86fb60efL } },
+ /* 222 */
+ { { 0x3aa2e97ec6853264L,0x771533b7e2304a0bL,0x1b912bb7b8eae9beL,
+ 0x9c9c6e10ae9bf8c2L },
+ { 0xa2309a59e030b74cL,0x4ed7494d6a631e90L,0x89f44b23a49b79f2L,
+ 0x566bd59640fa61b6L } },
+ /* 223 */
+ { { 0x066c0118c18061f3L,0x190b25d37c83fc70L,0xf05fc8e027273245L,
+ 0xcf2c7390f525345eL },
+ { 0xa09bceb410eb30cfL,0xcfd2ebba0d77703aL,0xe842c43a150ff255L,
+ 0x02f517558aa20979L } },
+ /* 224 */
+ { { 0x396ef794addb7d07L,0x0b4fc74224455500L,0xfaff8eacc78aa3ceL,
+ 0x14e9ada5e8d4d97dL },
+ { 0xdaa480a12f7079e2L,0x45baa3cde4b0800eL,0x01765e2d7838157dL,
+ 0xa0ad4fab8e9d9ae8L } },
+ /* 225 */
+ { { 0x0bfb76214a653618L,0x1872813c31eaaa5fL,0x1553e73744949d5eL,
+ 0xbcd530b86e56ed1eL },
+ { 0x169be85332e9c47bL,0xdc2776feb50059abL,0xcdba9761192bfbb4L,
+ 0x909283cf6979341dL } },
+ /* 226 */
+ { { 0x67b0032476e81a13L,0x9bee1a9962171239L,0x08ed361bd32e19d6L,
+ 0x35eeb7c9ace1549aL },
+ { 0x1280ae5a7e4e5bdcL,0x2dcd2cd3b6ceec6eL,0x52e4224c6e266bc1L,
+ 0x9a8b2cf4448ae864L } },
+ /* 227 */
+ { { 0xf6471bf209d03b59L,0xc90e62a3b65af2abL,0xff7ff168ebd5eec9L,
+ 0x6bdb60f4d4491379L },
+ { 0xdadafebc8a55bc30L,0xc79ead1610097fe0L,0x42e197414c1e3bddL,
+ 0x01ec3cfd94ba08a9L } },
+ /* 228 */
+ { { 0xba6277ebdc9485c2L,0x48cc9a7922fb10c7L,0x4f61d60f70a28d8aL,
+ 0xd1acb1c0475464f6L },
+ { 0xd26902b126f36612L,0x59c3a44ee0618d8bL,0x4df8a813308357eeL,
+ 0x7dcd079d405626c2L } },
+ /* 229 */
+ { { 0x5ce7d4d3f05a4b48L,0xadcd295237230772L,0xd18f7971812a915aL,
+ 0x0bf53589377d19b8L },
+ { 0x35ecd95a6c68ea73L,0xc7f3bbca823a584dL,0x9fb674c6f473a723L,
+ 0xd28be4d9e16686fcL } },
+ /* 230 */
+ { { 0x5d2b990638fa8e4bL,0x559f186e893fd8fcL,0x3a6de2aa436fb6fcL,
+ 0xd76007aa510f88ceL },
+ { 0x2d10aab6523a4988L,0xb455cf4474dd0273L,0x7f467082a3407278L,
+ 0xf2b52f68b303bb01L } },
+ /* 231 */
+ { { 0x0d57eafa9835b4caL,0x2d2232fcbb669cbcL,0x8eeeb680c6643198L,
+ 0xd8dbe98ecc5aed3aL },
+ { 0xcba9be3fc5a02709L,0x30be68e5f5ba1fa8L,0xfebd43cdf10ea852L,
+ 0xe01593a3ee559705L } },
+ /* 232 */
+ { { 0xd3e5af50ea75a0a6L,0x512226ac57858033L,0x6fe6d50fd0176406L,
+ 0xafec07b1aeb8ef06L },
+ { 0x7fb9956780bb0a31L,0x6f1af3cc37309aaeL,0x9153a15a01abf389L,
+ 0xa71b93546e2dbfddL } },
+ /* 233 */
+ { { 0xbf8e12e018f593d2L,0xd1a90428a078122bL,0x150505db0ba4f2adL,
+ 0x53a2005c628523d9L },
+ { 0x07c8b639e7f2b935L,0x2bff975ac182961aL,0x86bceea77518ca2cL,
+ 0xbf47d19b3d588e3dL } },
+ /* 234 */
+ { { 0x672967a7dd7665d5L,0x4e3030572f2f4de5L,0x144005ae80d4903fL,
+ 0x001c2c7f39c9a1b6L },
+ { 0x143a801469efc6d6L,0xc810bdaa7bc7a724L,0x5f65670ba78150a4L,
+ 0xfdadf8e786ffb99bL } },
+ /* 235 */
+ { { 0xfd38cb88ffc00785L,0x77fa75913b48eb67L,0x0454d055bf368fbcL,
+ 0x3a838e4d5aa43c94L },
+ { 0x561663293e97bb9aL,0x9eb93363441d94d9L,0x515591a60adb2a83L,
+ 0x3cdb8257873e1da3L } },
+ /* 236 */
+ { { 0x137140a97de77eabL,0xf7e1c50d41648109L,0x762dcad2ceb1d0dfL,
+ 0x5a60cc89f1f57fbaL },
+ { 0x80b3638240d45673L,0x1b82be195913c655L,0x057284b8dd64b741L,
+ 0x922ff56fdbfd8fc0L } },
+ /* 237 */
+ { { 0x1b265deec9a129a1L,0xa5b1ce57cc284e04L,0x04380c46cebfbe3cL,
+ 0x72919a7df6c5cd62L },
+ { 0x298f453a8fb90f9aL,0xd719c00b88e4031bL,0xe32c0e77796f1856L,
+ 0x5e7917803624089aL } },
+ /* 238 */
+ { { 0x5c16ec557f63cdfbL,0x8e6a3571f1cae4fdL,0xfce26bea560597caL,
+ 0x4e0a5371e24c2fabL },
+ { 0x276a40d3a5765357L,0x3c89af440d73a2b4L,0xb8f370ae41d11a32L,
+ 0xf5ff7818d56604eeL } },
+ /* 239 */
+ { { 0xfbf3e3fe1a09df21L,0x26d5d28ee66e8e47L,0x2096bd0a29c89015L,
+ 0xe41df0e9533f5e64L },
+ { 0x305fda40b3ba9e3fL,0xf2340ceb2604d895L,0x0866e1927f0367c7L,
+ 0x8edd7d6eac4f155fL } },
+ /* 240 */
+ { { 0xc9a1dc0e0bfc8ff3L,0x14efd82be936f42fL,0x67016f7ccca381efL,
+ 0x1432c1caed8aee96L },
+ { 0xec68482970b23c26L,0xa64fe8730735b273L,0xe389f6e5eaef0f5aL,
+ 0xcaef480b5ac8d2c6L } },
+ /* 241 */
+ { { 0x5245c97875315922L,0xd82951713063cca5L,0xf3ce60d0b64ef2cbL,
+ 0xd0ba177e8efae236L },
+ { 0x53a9ae8fb1b3af60L,0x1a796ae53d2da20eL,0x01d63605df9eef28L,
+ 0xf31c957c1c54ae16L } },
+ /* 242 */
+ { { 0xc0f58d5249cc4597L,0xdc5015b0bae0a028L,0xefc5fc55734a814aL,
+ 0x013404cb96e17c3aL },
+ { 0xb29e2585c9a824bfL,0xd593185e001eaed7L,0x8d6ee68261ef68acL,
+ 0x6f377c4b91933e6cL } },
+ /* 243 */
+ { { 0x9f93bad1a8333fd2L,0xa89302025a2a95b8L,0x211e5037eaf75aceL,
+ 0x6dba3e4ed2d09506L },
+ { 0xa48ef98cd04399cdL,0x1811c66ee6b73adeL,0x72f60752c17ecaf3L,
+ 0xf13cf3423becf4a7L } },
+ /* 244 */
+ { { 0xceeb9ec0a919e2ebL,0x83a9a195f62c0f68L,0xcfba3bb67aba2299L,
+ 0xc83fa9a9274bbad3L },
+ { 0x0d7d1b0b62fa1ce0L,0xe58b60f53418efbfL,0xbfa8ef9e52706f04L,
+ 0xb49d70f45d702683L } },
+ /* 245 */
+ { { 0x914c7510fad5513bL,0x05f32eecb1751e2dL,0x6d850418d9fb9d59L,
+ 0x59cfadbb0c30f1cfL },
+ { 0xe167ac2355cb7fd6L,0x249367b8820426a3L,0xeaeec58c90a78864L,
+ 0x5babf362354a4b67L } },
+ /* 246 */
+ { { 0x37c981d1ee424865L,0x8b002878f2e5577fL,0x702970f1b9e0c058L,
+ 0x6188c6a79026c8f0L },
+ { 0x06f9a19bd0f244daL,0x1ecced5cfb080873L,0x35470f9b9f213637L,
+ 0x993fe475df50b9d9L } },
+ /* 247 */
+ { { 0x68e31cdf9b2c3609L,0x84eb19c02c46d4eaL,0x7ac9ec1a9a775101L,
+ 0x81f764664c80616bL },
+ { 0x1d7c2a5a75fbe978L,0x6743fed3f183b356L,0x838d1f04501dd2bfL,
+ 0x564a812a5fe9060dL } },
+ /* 248 */
+ { { 0x7a5a64f4fa817d1dL,0x55f96844bea82e0fL,0xb5ff5a0fcd57f9aaL,
+ 0x226bf3cf00e51d6cL },
+ { 0xd6d1a9f92f2833cfL,0x20a0a35a4f4f89a8L,0x11536c498f3f7f77L,
+ 0x68779f47ff257836L } },
+ /* 249 */
+ { { 0x79b0c1c173043d08L,0xa54467741fc020faL,0xd3767e289a6d26d0L,
+ 0x97bcb0d1eb092e0bL },
+ { 0x2ab6eaa8f32ed3c3L,0xc8a4f151b281bc48L,0x4d1bf4f3bfa178f3L,
+ 0xa872ffe80a784655L } },
+ /* 250 */
+ { { 0xb1ab7935a32b2086L,0xe1eb710e8160f486L,0x9bd0cd913b6ae6beL,
+ 0x02812bfcb732a36aL },
+ { 0xa63fd7cacf605318L,0x646e5d50fdfd6d1dL,0xa1d683982102d619L,
+ 0x07391cc9fe5396afL } },
+ /* 251 */
+ { { 0xc50157f08b80d02bL,0x6b8333d162877f7fL,0x7aca1af878d542aeL,
+ 0x355d2adc7e6d2a08L },
+ { 0xb41f335a287386e1L,0xfd272a94f8e43275L,0x286ca2cde79989eaL,
+ 0x3dc2b1e37c2a3a79L } },
+ /* 252 */
+ { { 0xd689d21c04581352L,0x0a00c825376782beL,0x203bd5909fed701fL,
+ 0xc47869103ccd846bL },
+ { 0x5dba770824c768edL,0x72feea026841f657L,0x73313ed56accce0eL,
+ 0xccc42968d5bb4d32L } },
+ /* 253 */
+ { { 0x94e50de13d7620b9L,0xd89a5c8a5992a56aL,0xdc007640675487c9L,
+ 0xe147eb42aa4871cfL },
+ { 0x274ab4eeacf3ae46L,0xfd4936fb50350fbeL,0xdf2afe4748c840eaL,
+ 0x239ac047080e96e3L } },
+ /* 254 */
+ { { 0x481d1f352bfee8d4L,0xce80b5cffa7b0fecL,0x105c4c9e2ce9af3cL,
+ 0xc55fa1a3f5f7e59dL },
+ { 0x3186f14e8257c227L,0xc5b1653f342be00bL,0x09afc998aa904fb2L,
+ 0x094cd99cd4f4b699L } },
+ /* 255 */
+ { { 0x8a981c84d703bebaL,0x8631d15032ceb291L,0xa445f2c9e3bd49ecL,
+ 0xb90a30b642abad33L },
+ { 0xb465404fb4a5abf9L,0x004750c375db7603L,0x6f9a42ccca35d89fL,
+ 0x019f8b9a1b7924f7L } },
+};
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_4(sp_point_256* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_256_ecc_mulmod_stripe_4(r, &p256_base, p256_table,
+ k, map, heap);
+}
+
+#else
+/* The index into pre-computation table to use. */
+static const uint8_t recode_index_4_7[130] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49,
+ 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33,
+ 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
+ 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
+ 0, 1,
+};
+
+/* Whether to negate y-ordinate. */
+static const uint8_t recode_neg_4_7[130] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0,
+};
+
+/* Recode the scalar for multiplication using pre-computed values and
+ * subtraction.
+ *
+ * k Scalar to multiply by.
+ * v Vector of operations to perform.
+ */
+static void sp_256_ecc_recode_7_4(const sp_digit* k, ecc_recode_256* v)
+{
+ int i, j;
+ uint8_t y;
+ int carry = 0;
+ int o;
+ sp_digit n;
+
+ j = 0;
+ n = k[j];
+ o = 0;
+ for (i=0; i<37; i++) {
+ y = n;
+ if (o + 7 < 64) {
+ y &= 0x7f;
+ n >>= 7;
+ o += 7;
+ }
+ else if (o + 7 == 64) {
+ n >>= 7;
+ if (++j < 4)
+ n = k[j];
+ o = 0;
+ }
+ else if (++j < 4) {
+ n = k[j];
+ y |= (n << (64 - o)) & 0x7f;
+ o -= 57;
+ n >>= o;
+ }
+
+ y += carry;
+ v[i].i = recode_index_4_7[y];
+ v[i].neg = recode_neg_4_7[y];
+ carry = (y >> 7) + v[i].neg;
+ }
+}
+
+static const sp_table_entry_256 p256_table[2405] = {
+ /* 0 << 0 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 0 */
+ { { 0x79e730d418a9143cL,0x75ba95fc5fedb601L,0x79fb732b77622510L,
+ 0x18905f76a53755c6L },
+ { 0xddf25357ce95560aL,0x8b4ab8e4ba19e45cL,0xd2e88688dd21f325L,
+ 0x8571ff1825885d85L } },
+ /* 2 << 0 */
+ { { 0x850046d410ddd64dL,0xaa6ae3c1a433827dL,0x732205038d1490d9L,
+ 0xf6bb32e43dcf3a3bL },
+ { 0x2f3648d361bee1a5L,0x152cd7cbeb236ff8L,0x19a8fb0e92042dbeL,
+ 0x78c577510a5b8a3bL } },
+ /* 3 << 0 */
+ { { 0xffac3f904eebc127L,0xb027f84a087d81fbL,0x66ad77dd87cbbc98L,
+ 0x26936a3fb6ff747eL },
+ { 0xb04c5c1fc983a7ebL,0x583e47ad0861fe1aL,0x788208311a2ee98eL,
+ 0xd5f06a29e587cc07L } },
+ /* 4 << 0 */
+ { { 0x74b0b50d46918dccL,0x4650a6edc623c173L,0x0cdaacace8100af2L,
+ 0x577362f541b0176bL },
+ { 0x2d96f24ce4cbaba6L,0x17628471fad6f447L,0x6b6c36dee5ddd22eL,
+ 0x84b14c394c5ab863L } },
+ /* 5 << 0 */
+ { { 0xbe1b8aaec45c61f5L,0x90ec649a94b9537dL,0x941cb5aad076c20cL,
+ 0xc9079605890523c8L },
+ { 0xeb309b4ae7ba4f10L,0x73c568efe5eb882bL,0x3540a9877e7a1f68L,
+ 0x73a076bb2dd1e916L } },
+ /* 6 << 0 */
+ { { 0x403947373e77664aL,0x55ae744f346cee3eL,0xd50a961a5b17a3adL,
+ 0x13074b5954213673L },
+ { 0x93d36220d377e44bL,0x299c2b53adff14b5L,0xf424d44cef639f11L,
+ 0xa4c9916d4a07f75fL } },
+ /* 7 << 0 */
+ { { 0x0746354ea0173b4fL,0x2bd20213d23c00f7L,0xf43eaab50c23bb08L,
+ 0x13ba5119c3123e03L },
+ { 0x2847d0303f5b9d4dL,0x6742f2f25da67bddL,0xef933bdc77c94195L,
+ 0xeaedd9156e240867L } },
+ /* 8 << 0 */
+ { { 0x27f14cd19499a78fL,0x462ab5c56f9b3455L,0x8f90f02af02cfc6bL,
+ 0xb763891eb265230dL },
+ { 0xf59da3a9532d4977L,0x21e3327dcf9eba15L,0x123c7b84be60bbf0L,
+ 0x56ec12f27706df76L } },
+ /* 9 << 0 */
+ { { 0x75c96e8f264e20e8L,0xabe6bfed59a7a841L,0x2cc09c0444c8eb00L,
+ 0xe05b3080f0c4e16bL },
+ { 0x1eb7777aa45f3314L,0x56af7bedce5d45e3L,0x2b6e019a88b12f1aL,
+ 0x086659cdfd835f9bL } },
+ /* 10 << 0 */
+ { { 0x2c18dbd19dc21ec8L,0x98f9868a0fcf8139L,0x737d2cd648250b49L,
+ 0xcc61c94724b3428fL },
+ { 0x0c2b407880dd9e76L,0xc43a8991383fbe08L,0x5f7d2d65779be5d2L,
+ 0x78719a54eb3b4ab5L } },
+ /* 11 << 0 */
+ { { 0xea7d260a6245e404L,0x9de407956e7fdfe0L,0x1ff3a4158dac1ab5L,
+ 0x3e7090f1649c9073L },
+ { 0x1a7685612b944e88L,0x250f939ee57f61c8L,0x0c0daa891ead643dL,
+ 0x68930023e125b88eL } },
+ /* 12 << 0 */
+ { { 0x04b71aa7d2697768L,0xabdedef5ca345a33L,0x2409d29dee37385eL,
+ 0x4ee1df77cb83e156L },
+ { 0x0cac12d91cbb5b43L,0x170ed2f6ca895637L,0x28228cfa8ade6d66L,
+ 0x7ff57c9553238acaL } },
+ /* 13 << 0 */
+ { { 0xccc425634b2ed709L,0x0e356769856fd30dL,0xbcbcd43f559e9811L,
+ 0x738477ac5395b759L },
+ { 0x35752b90c00ee17fL,0x68748390742ed2e3L,0x7cd06422bd1f5bc1L,
+ 0xfbc08769c9e7b797L } },
+ /* 14 << 0 */
+ { { 0xa242a35bb0cf664aL,0x126e48f77f9707e3L,0x1717bf54c6832660L,
+ 0xfaae7332fd12c72eL },
+ { 0x27b52db7995d586bL,0xbe29569e832237c2L,0xe8e4193e2a65e7dbL,
+ 0x152706dc2eaa1bbbL } },
+ /* 15 << 0 */
+ { { 0x72bcd8b7bc60055bL,0x03cc23ee56e27e4bL,0xee337424e4819370L,
+ 0xe2aa0e430ad3da09L },
+ { 0x40b8524f6383c45dL,0xd766355442a41b25L,0x64efa6de778a4797L,
+ 0x2042170a7079adf4L } },
+ /* 16 << 0 */
+ { { 0x808b0b650bc6fb80L,0x5882e0753ffe2e6bL,0xd5ef2f7c2c83f549L,
+ 0x54d63c809103b723L },
+ { 0xf2f11bd652a23f9bL,0x3670c3194b0b6587L,0x55c4623bb1580e9eL,
+ 0x64edf7b201efe220L } },
+ /* 17 << 0 */
+ { { 0x97091dcbd53c5c9dL,0xf17624b6ac0a177bL,0xb0f139752cfe2dffL,
+ 0xc1a35c0a6c7a574eL },
+ { 0x227d314693e79987L,0x0575bf30e89cb80eL,0x2f4e247f0d1883bbL,
+ 0xebd512263274c3d0L } },
+ /* 18 << 0 */
+ { { 0x5f3e51c856ada97aL,0x4afc964d8f8b403eL,0xa6f247ab412e2979L,
+ 0x675abd1b6f80ebdaL },
+ { 0x66a2bd725e485a1dL,0x4b2a5caf8f4f0b3cL,0x2626927f1b847bbaL,
+ 0x6c6fc7d90502394dL } },
+ /* 19 << 0 */
+ { { 0xfea912baa5659ae8L,0x68363aba25e1a16eL,0xb8842277752c41acL,
+ 0xfe545c282897c3fcL },
+ { 0x2d36e9e7dc4c696bL,0x5806244afba977c5L,0x85665e9be39508c1L,
+ 0xf720ee256d12597bL } },
+ /* 20 << 0 */
+ { { 0x8a979129d2337a31L,0x5916868f0f862bdcL,0x048099d95dd283baL,
+ 0xe2d1eeb6fe5bfb4eL },
+ { 0x82ef1c417884005dL,0xa2d4ec17ffffcbaeL,0x9161c53f8aa95e66L,
+ 0x5ee104e1c5fee0d0L } },
+ /* 21 << 0 */
+ { { 0x562e4cecc135b208L,0x74e1b2654783f47dL,0x6d2a506c5a3f3b30L,
+ 0xecead9f4c16762fcL },
+ { 0xf29dd4b2e286e5b9L,0x1b0fadc083bb3c61L,0x7a75023e7fac29a4L,
+ 0xc086d5f1c9477fa3L } },
+ /* 22 << 0 */
+ { { 0x0fc611352f6f3076L,0xc99ffa23e3912a9aL,0x6a0b0685d2f8ba3dL,
+ 0xfdc777e8e93358a4L },
+ { 0x94a787bb35415f04L,0x640c2d6a4d23fea4L,0x9de917da153a35b5L,
+ 0x793e8d075d5cd074L } },
+ /* 23 << 0 */
+ { { 0xf4f876532de45068L,0x37c7a7e89e2e1f6eL,0xd0825fa2a3584069L,
+ 0xaf2cea7c1727bf42L },
+ { 0x0360a4fb9e4785a9L,0xe5fda49c27299f4aL,0x48068e1371ac2f71L,
+ 0x83d0687b9077666fL } },
+ /* 24 << 0 */
+ { { 0x6d3883b215d02819L,0x6d0d755040dd9a35L,0x61d7cbf91d2b469fL,
+ 0xf97b232f2efc3115L },
+ { 0xa551d750b24bcbc7L,0x11ea494988a1e356L,0x7669f03193cb7501L,
+ 0x595dc55eca737b8aL } },
+ /* 25 << 0 */
+ { { 0xa4a319acd837879fL,0x6fc1b49eed6b67b0L,0xe395993332f1f3afL,
+ 0x966742eb65432a2eL },
+ { 0x4b8dc9feb4966228L,0x96cc631243f43950L,0x12068859c9b731eeL,
+ 0x7b948dc356f79968L } },
+ /* 26 << 0 */
+ { { 0x61e4ad32ed1f8008L,0xe6c9267ad8b17538L,0x1ac7c5eb857ff6fbL,
+ 0x994baaa855f2fb10L },
+ { 0x84cf14e11d248018L,0x5a39898b628ac508L,0x14fde97b5fa944f5L,
+ 0xed178030d12e5ac7L } },
+ /* 27 << 0 */
+ { { 0x042c2af497e2feb4L,0xd36a42d7aebf7313L,0x49d2c9eb084ffdd7L,
+ 0x9f8aa54b2ef7c76aL },
+ { 0x9200b7ba09895e70L,0x3bd0c66fddb7fb58L,0x2d97d10878eb4cbbL,
+ 0x2d431068d84bde31L } },
+ /* 28 << 0 */
+ { { 0x4b523eb7172ccd1fL,0x7323cb2830a6a892L,0x97082ec0cfe153ebL,
+ 0xe97f6b6af2aadb97L },
+ { 0x1d3d393ed1a83da1L,0xa6a7f9c7804b2a68L,0x4a688b482d0cb71eL,
+ 0xa9b4cc5f40585278L } },
+ /* 29 << 0 */
+ { { 0x5e5db46acb66e132L,0xf1be963a0d925880L,0x944a70270317b9e2L,
+ 0xe266f95948603d48L },
+ { 0x98db66735c208899L,0x90472447a2fb18a3L,0x8a966939777c619fL,
+ 0x3798142a2a3be21bL } },
+ /* 30 << 0 */
+ { { 0xb4241cb13298b343L,0xa3a14e49b44f65a1L,0xc5f4d6cd3ac77acdL,
+ 0xd0288cb552b6fc3cL },
+ { 0xd5cc8c2f1c040abcL,0xb675511e06bf9b4aL,0xd667da379b3aa441L,
+ 0x460d45ce51601f72L } },
+ /* 31 << 0 */
+ { { 0xe2f73c696755ff89L,0xdd3cf7e7473017e6L,0x8ef5689d3cf7600dL,
+ 0x948dc4f8b1fc87b4L },
+ { 0xd9e9fe814ea53299L,0x2d921ca298eb6028L,0xfaecedfd0c9803fcL,
+ 0xf38ae8914d7b4745L } },
+ /* 32 << 0 */
+ { { 0xd8c5fccfc5e3a3d8L,0xbefd904c4079dfbfL,0xbc6d6a58fead0197L,
+ 0x39227077695532a4L },
+ { 0x09e23e6ddbef42f5L,0x7e449b64480a9908L,0x7b969c1aad9a2e40L,
+ 0x6231d7929591c2a4L } },
+ /* 33 << 0 */
+ { { 0x871514560f664534L,0x85ceae7c4b68f103L,0xac09c4ae65578ab9L,
+ 0x33ec6868f044b10cL },
+ { 0x6ac4832b3a8ec1f1L,0x5509d1285847d5efL,0xf909604f763f1574L,
+ 0xb16c4303c32f63c4L } },
+ /* 34 << 0 */
+ { { 0xb6ab20147ca23cd3L,0xcaa7a5c6a391849dL,0x5b0673a375678d94L,
+ 0xc982ddd4dd303e64L },
+ { 0xfd7b000b5db6f971L,0xbba2cb1f6f876f92L,0xc77332a33c569426L,
+ 0xa159100c570d74f8L } },
+ /* 35 << 0 */
+ { { 0xfd16847fdec67ef5L,0x742ee464233e76b7L,0x0b8e4134efc2b4c8L,
+ 0xca640b8642a3e521L },
+ { 0x653a01908ceb6aa9L,0x313c300c547852d5L,0x24e4ab126b237af7L,
+ 0x2ba901628bb47af8L } },
+ /* 36 << 0 */
+ { { 0x3d5e58d6a8219bb7L,0xc691d0bd1b06c57fL,0x0ae4cb10d257576eL,
+ 0x3569656cd54a3dc3L },
+ { 0xe5ebaebd94cda03aL,0x934e82d3162bfe13L,0x450ac0bae251a0c6L,
+ 0x480b9e11dd6da526L } },
+ /* 37 << 0 */
+ { { 0x00467bc58cce08b5L,0xb636458c7f178d55L,0xc5748baea677d806L,
+ 0x2763a387dfa394ebL },
+ { 0xa12b448a7d3cebb6L,0xe7adda3e6f20d850L,0xf63ebce51558462cL,
+ 0x58b36143620088a8L } },
+ /* 38 << 0 */
+ { { 0x8a2cc3ca4d63c0eeL,0x512331170fe948ceL,0x7463fd85222ef33bL,
+ 0xadf0c7dc7c603d6cL },
+ { 0x0ec32d3bfe7765e5L,0xccaab359bf380409L,0xbdaa84d68e59319cL,
+ 0xd9a4c2809c80c34dL } },
+ /* 39 << 0 */
+ { { 0xa9d89488a059c142L,0x6f5ae714ff0b9346L,0x068f237d16fb3664L,
+ 0x5853e4c4363186acL },
+ { 0xe2d87d2363c52f98L,0x2ec4a76681828876L,0x47b864fae14e7b1cL,
+ 0x0c0bc0e569192408L } },
+ /* 40 << 0 */
+ { { 0xe4d7681db82e9f3eL,0x83200f0bdf25e13cL,0x8909984c66f27280L,
+ 0x462d7b0075f73227L },
+ { 0xd90ba188f2651798L,0x74c6e18c36ab1c34L,0xab256ea35ef54359L,
+ 0x03466612d1aa702fL } },
+ /* 41 << 0 */
+ { { 0x624d60492ed22e91L,0x6fdfe0b56f072822L,0xeeca111539ce2271L,
+ 0x98100a4fdb01614fL },
+ { 0xb6b0daa2a35c628fL,0xb6f94d2ec87e9a47L,0xc67732591d57d9ceL,
+ 0xf70bfeec03884a7bL } },
+ /* 42 << 0 */
+ { { 0x5fb35ccfed2bad01L,0xa155cbe31da6a5c7L,0xc2e2594c30a92f8fL,
+ 0x649c89ce5bfafe43L },
+ { 0xd158667de9ff257aL,0x9b359611f32c50aeL,0x4b00b20b906014cfL,
+ 0xf3a8cfe389bc7d3dL } },
+ /* 43 << 0 */
+ { { 0x4ff23ffd248a7d06L,0x80c5bfb4878873faL,0xb7d9ad9005745981L,
+ 0x179c85db3db01994L },
+ { 0xba41b06261a6966cL,0x4d82d052eadce5a8L,0x9e91cd3ba5e6a318L,
+ 0x47795f4f95b2dda0L } },
+ /* 44 << 0 */
+ { { 0xecfd7c1fd55a897cL,0x009194abb29110fbL,0x5f0e2046e381d3b0L,
+ 0x5f3425f6a98dd291L },
+ { 0xbfa06687730d50daL,0x0423446c4b083b7fL,0x397a247dd69d3417L,
+ 0xeb629f90387ba42aL } },
+ /* 45 << 0 */
+ { { 0x1ee426ccd5cd79bfL,0x0032940b946c6e18L,0x1b1e8ae057477f58L,
+ 0xe94f7d346d823278L },
+ { 0xc747cb96782ba21aL,0xc5254469f72b33a5L,0x772ef6dec7f80c81L,
+ 0xd73acbfe2cd9e6b5L } },
+ /* 46 << 0 */
+ { { 0x4075b5b149ee90d9L,0x785c339aa06e9ebaL,0xa1030d5babf825e0L,
+ 0xcec684c3a42931dcL },
+ { 0x42ab62c9c1586e63L,0x45431d665ab43f2bL,0x57c8b2c055f7835dL,
+ 0x033da338c1b7f865L } },
+ /* 47 << 0 */
+ { { 0x283c7513caa76097L,0x0a624fa936c83906L,0x6b20afec715af2c7L,
+ 0x4b969974eba78bfdL },
+ { 0x220755ccd921d60eL,0x9b944e107baeca13L,0x04819d515ded93d4L,
+ 0x9bbff86e6dddfd27L } },
+ /* 48 << 0 */
+ { { 0x6b34413077adc612L,0xa7496529bbd803a0L,0x1a1baaa76d8805bdL,
+ 0xc8403902470343adL },
+ { 0x39f59f66175adff1L,0x0b26d7fbb7d8c5b7L,0xa875f5ce529d75e3L,
+ 0x85efc7e941325cc2L } },
+ /* 49 << 0 */
+ { { 0x21950b421ff6acd3L,0xffe7048453dc6909L,0xff4cd0b228766127L,
+ 0xabdbe6084fb7db2bL },
+ { 0x837c92285e1109e8L,0x26147d27f4645b5aL,0x4d78f592f7818ed8L,
+ 0xd394077ef247fa36L } },
+ /* 50 << 0 */
+ { { 0x0fb9c2d0488c171aL,0xa78bfbaa13685278L,0xedfbe268d5b1fa6aL,
+ 0x0dceb8db2b7eaba7L },
+ { 0xbf9e80899ae2b710L,0xefde7ae6a4449c96L,0x43b7716bcc143a46L,
+ 0xd7d34194c3628c13L } },
+ /* 51 << 0 */
+ { { 0x508cec1c3b3f64c9L,0xe20bc0ba1e5edf3fL,0xda1deb852f4318d4L,
+ 0xd20ebe0d5c3fa443L },
+ { 0x370b4ea773241ea3L,0x61f1511c5e1a5f65L,0x99a5e23d82681c62L,
+ 0xd731e383a2f54c2dL } },
+ /* 52 << 0 */
+ { { 0x2692f36e83445904L,0x2e0ec469af45f9c0L,0x905a3201c67528b7L,
+ 0x88f77f34d0e5e542L },
+ { 0xf67a8d295864687cL,0x23b92eae22df3562L,0x5c27014b9bbec39eL,
+ 0x7ef2f2269c0f0f8dL } },
+ /* 53 << 0 */
+ { { 0x97359638546c4d8dL,0x5f9c3fc492f24679L,0x912e8beda8c8acd9L,
+ 0xec3a318d306634b0L },
+ { 0x80167f41c31cb264L,0x3db82f6f522113f2L,0xb155bcd2dcafe197L,
+ 0xfba1da5943465283L } },
+ /* 54 << 0 */
+ { { 0xa0425b8eb212cf53L,0x4f2e512ef8557c5fL,0xc1286ff925c4d56cL,
+ 0xbb8a0feaee26c851L },
+ { 0xc28f70d2e7d6107eL,0x7ee0c444e76265aaL,0x3df277a41d1936b1L,
+ 0x1a556e3fea9595ebL } },
+ /* 55 << 0 */
+ { { 0x258bbbf9e7305683L,0x31eea5bf07ef5be6L,0x0deb0e4a46c814c1L,
+ 0x5cee8449a7b730ddL },
+ { 0xeab495c5a0182bdeL,0xee759f879e27a6b4L,0xc2cf6a6880e518caL,
+ 0x25e8013ff14cf3f4L } },
+ /* 56 << 0 */
+ { { 0x8fc441407e8d7a14L,0xbb1ff3ca9556f36aL,0x6a84438514600044L,
+ 0xba3f0c4a7451ae63L },
+ { 0xdfcac25b1f9af32aL,0x01e0db86b1f2214bL,0x4e9a5bc2a4b596acL,
+ 0x83927681026c2c08L } },
+ /* 57 << 0 */
+ { { 0x3ec832e77acaca28L,0x1bfeea57c7385b29L,0x068212e3fd1eaf38L,
+ 0xc13298306acf8cccL },
+ { 0xb909f2db2aac9e59L,0x5748060db661782aL,0xc5ab2632c79b7a01L,
+ 0xda44c6c600017626L } },
+ /* 58 << 0 */
+ { { 0xf26c00e8a7ea82f0L,0x99cac80de4299aafL,0xd66fe3b67ed78be1L,
+ 0x305f725f648d02cdL },
+ { 0x33ed1bc4623fb21bL,0xfa70533e7a6319adL,0x17ab562dbe5ffb3eL,
+ 0x0637499456674741L } },
+ /* 59 << 0 */
+ { { 0x69d44ed65c46aa8eL,0x2100d5d3a8d063d1L,0xcb9727eaa2d17c36L,
+ 0x4c2bab1b8add53b7L },
+ { 0xa084e90c15426704L,0x778afcd3a837ebeaL,0x6651f7017ce477f8L,
+ 0xa062499846fb7a8bL } },
+ /* 60 << 0 */
+ { { 0xdc1e6828ed8a6e19L,0x33fc23364189d9c7L,0x026f8fe2671c39bcL,
+ 0xd40c4ccdbc6f9915L },
+ { 0xafa135bbf80e75caL,0x12c651a022adff2cL,0xc40a04bd4f51ad96L,
+ 0x04820109bbe4e832L } },
+ /* 61 << 0 */
+ { { 0x3667eb1a7f4c04ccL,0x59556621a9404f84L,0x71cdf6537eceb50aL,
+ 0x994a44a69b8335faL },
+ { 0xd7faf819dbeb9b69L,0x473c5680eed4350dL,0xb6658466da44bba2L,
+ 0x0d1bc780872bdbf3L } },
+ /* 62 << 0 */
+ { { 0xe535f175a1962f91L,0x6ed7e061ed58f5a7L,0x177aa4c02089a233L,
+ 0x0dbcb03ae539b413L },
+ { 0xe3dc424ebb32e38eL,0x6472e5ef6806701eL,0xdd47ff98814be9eeL,
+ 0x6b60cfff35ace009L } },
+ /* 63 << 0 */
+ { { 0xb8d3d9319ff91fe5L,0x039c4800f0518eedL,0x95c376329182cb26L,
+ 0x0763a43482fc568dL },
+ { 0x707c04d5383e76baL,0xac98b930824e8197L,0x92bf7c8f91230de0L,
+ 0x90876a0140959b70L } },
+ /* 64 << 0 */
+ { { 0xdb6d96f305968b80L,0x380a0913089f73b9L,0x7da70b83c2c61e01L,
+ 0x95fb8394569b38c7L },
+ { 0x9a3c651280edfe2fL,0x8f726bb98faeaf82L,0x8010a4a078424bf8L,
+ 0x296720440e844970L } },
+ /* 0 << 7 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 7 */
+ { { 0x63c5cb817a2ad62aL,0x7ef2b6b9ac62ff54L,0x3749bba4b3ad9db5L,
+ 0xad311f2c46d5a617L },
+ { 0xb77a8087c2ff3b6dL,0xb46feaf3367834ffL,0xf8aa266d75d6b138L,
+ 0xfa38d320ec008188L } },
+ /* 2 << 7 */
+ { { 0x486d8ffa696946fcL,0x50fbc6d8b9cba56dL,0x7e3d423e90f35a15L,
+ 0x7c3da195c0dd962cL },
+ { 0xe673fdb03cfd5d8bL,0x0704b7c2889dfca5L,0xf6ce581ff52305aaL,
+ 0x399d49eb914d5e53L } },
+ /* 3 << 7 */
+ { { 0x380a496d6ec293cdL,0x733dbda78e7051f5L,0x037e388db849140aL,
+ 0xee4b32b05946dbf6L },
+ { 0xb1c4fda9cae368d1L,0x5001a7b0fdb0b2f3L,0x6df593742e3ac46eL,
+ 0x4af675f239b3e656L } },
+ /* 4 << 7 */
+ { { 0x44e3811039949296L,0x5b63827b361db1b5L,0x3e5323ed206eaff5L,
+ 0x942370d2c21f4290L },
+ { 0xf2caaf2ee0d985a1L,0x192cc64b7239846dL,0x7c0b8f47ae6312f8L,
+ 0x7dc61f9196620108L } },
+ /* 5 << 7 */
+ { { 0xb830fb5bc2da7de9L,0xd0e643df0ff8d3beL,0x31ee77ba188a9641L,
+ 0x4e8aa3aabcf6d502L },
+ { 0xf9fb65329a49110fL,0xd18317f62dd6b220L,0x7e3ced4152c3ea5aL,
+ 0x0d296a147d579c4aL } },
+ /* 6 << 7 */
+ { { 0x35d6a53eed4c3717L,0x9f8240cf3d0ed2a3L,0x8c0d4d05e5543aa5L,
+ 0x45d5bbfbdd33b4b4L },
+ { 0xfa04cc73137fd28eL,0x862ac6efc73b3ffdL,0x403ff9f531f51ef2L,
+ 0x34d5e0fcbc73f5a2L } },
+ /* 7 << 7 */
+ { { 0xf252682008913f4fL,0xea20ed61eac93d95L,0x51ed38b46ca6b26cL,
+ 0x8662dcbcea4327b0L },
+ { 0x6daf295c725d2aaaL,0xbad2752f8e52dcdaL,0x2210e7210b17daccL,
+ 0xa37f7912d51e8232L } },
+ /* 8 << 7 */
+ { { 0x4f7081e144cc3addL,0xd5ffa1d687be82cfL,0x89890b6c0edd6472L,
+ 0xada26e1a3ed17863L },
+ { 0x276f271563483caaL,0xe6924cd92f6077fdL,0x05a7fe980a466e3cL,
+ 0xf1c794b0b1902d1fL } },
+ /* 9 << 7 */
+ { { 0xe521368882a8042cL,0xd931cfafcd278298L,0x069a0ae0f597a740L,
+ 0x0adbb3f3eb59107cL },
+ { 0x983e951e5eaa8eb8L,0xe663a8b511b48e78L,0x1631cc0d8a03f2c5L,
+ 0x7577c11e11e271e2L } },
+ /* 10 << 7 */
+ { { 0x33b2385c08369a90L,0x2990c59b190eb4f8L,0x819a6145c68eac80L,
+ 0x7a786d622ec4a014L },
+ { 0x33faadbe20ac3a8dL,0x31a217815aba2d30L,0x209d2742dba4f565L,
+ 0xdb2ce9e355aa0fbbL } },
+ /* 11 << 7 */
+ { { 0x8cef334b168984dfL,0xe81dce1733879638L,0xf6e6949c263720f0L,
+ 0x5c56feaff593cbecL },
+ { 0x8bff5601fde58c84L,0x74e241172eccb314L,0xbcf01b614c9a8a78L,
+ 0xa233e35e544c9868L } },
+ /* 12 << 7 */
+ { { 0xb3156bf38bd7aff1L,0x1b5ee4cb1d81b146L,0x7ba1ac41d628a915L,
+ 0x8f3a8f9cfd89699eL },
+ { 0x7329b9c9a0748be7L,0x1d391c95a92e621fL,0xe51e6b214d10a837L,
+ 0xd255f53a4947b435L } },
+ /* 13 << 7 */
+ { { 0x07669e04f1788ee3L,0xc14f27afa86938a2L,0x8b47a334e93a01c0L,
+ 0xff627438d9366808L },
+ { 0x7a0985d8ca2a5965L,0x3d9a5542d6e9b9b3L,0xc23eb80b4cf972e8L,
+ 0x5c1c33bb4fdf72fdL } },
+ /* 14 << 7 */
+ { { 0x0c4a58d474a86108L,0xf8048a8fee4c5d90L,0xe3c7c924e86d4c80L,
+ 0x28c889de056a1e60L },
+ { 0x57e2662eb214a040L,0xe8c48e9837e10347L,0x8774286280ac748aL,
+ 0xf1c24022186b06f2L } },
+ /* 15 << 7 */
+ { { 0xac2dd4c35f74040aL,0x409aeb71fceac957L,0x4fbad78255c4ec23L,
+ 0xb359ed618a7b76ecL },
+ { 0x12744926ed6f4a60L,0xe21e8d7f4b912de3L,0xe2575a59fc705a59L,
+ 0x72f1d4deed2dbc0eL } },
+ /* 16 << 7 */
+ { { 0x3d2b24b9eb7926b8L,0xbff88cb3cdbe5509L,0xd0f399afe4dd640bL,
+ 0x3c5fe1302f76ed45L },
+ { 0x6f3562f43764fb3dL,0x7b5af3183151b62dL,0xd5bd0bc7d79ce5f3L,
+ 0xfdaf6b20ec66890fL } },
+ /* 17 << 7 */
+ { { 0x735c67ec6063540cL,0x50b259c2e5f9cb8fL,0xb8734f9a3f99c6abL,
+ 0xf8cc13d5a3a7bc85L },
+ { 0x80c1b305c5217659L,0xfe5364d44ec12a54L,0xbd87045e681345feL,
+ 0x7f8efeb1582f897fL } },
+ /* 18 << 7 */
+ { { 0xe8cbf1e5d5923359L,0xdb0cea9d539b9fb0L,0x0c5b34cf49859b98L,
+ 0x5e583c56a4403cc6L },
+ { 0x11fc1a2dd48185b7L,0xc93fbc7e6e521787L,0x47e7a05805105b8bL,
+ 0x7b4d4d58db8260c8L } },
+ /* 19 << 7 */
+ { { 0xe33930b046eb842aL,0x8e844a9a7bdae56dL,0x34ef3a9e13f7fdfcL,
+ 0xb3768f82636ca176L },
+ { 0x2821f4e04e09e61cL,0x414dc3a1a0c7cddcL,0xd537943754945fcdL,
+ 0x151b6eefb3555ff1L } },
+ /* 20 << 7 */
+ { { 0xb31bd6136339c083L,0x39ff8155dfb64701L,0x7c3388d2e29604abL,
+ 0x1e19084ba6b10442L },
+ { 0x17cf54c0eccd47efL,0x896933854a5dfb30L,0x69d023fb47daf9f6L,
+ 0x9222840b7d91d959L } },
+ /* 21 << 7 */
+ { { 0x439108f5803bac62L,0x0b7dd91d379bd45fL,0xd651e827ca63c581L,
+ 0x5c5d75f6509c104fL },
+ { 0x7d5fc7381f2dc308L,0x20faa7bfd98454beL,0x95374beea517b031L,
+ 0xf036b9b1642692acL } },
+ /* 22 << 7 */
+ { { 0xc510610939842194L,0xb7e2353e49d05295L,0xfc8c1d5cefb42ee0L,
+ 0xe04884eb08ce811cL },
+ { 0xf1f75d817419f40eL,0x5b0ac162a995c241L,0x120921bbc4c55646L,
+ 0x713520c28d33cf97L } },
+ /* 23 << 7 */
+ { { 0xb4a65a5ce98c5100L,0x6cec871d2ddd0f5aL,0x251f0b7f9ba2e78bL,
+ 0x224a8434ce3a2a5fL },
+ { 0x26827f6125f5c46fL,0x6a22bedc48545ec0L,0x25ae5fa0b1bb5cdcL,
+ 0xd693682ffcb9b98fL } },
+ /* 24 << 7 */
+ { { 0x32027fe891e5d7d3L,0xf14b7d1773a07678L,0xf88497b3c0dfdd61L,
+ 0xf7c2eec02a8c4f48L },
+ { 0xaa5573f43756e621L,0xc013a2401825b948L,0x1c03b34563878572L,
+ 0xa0472bea653a4184L } },
+ /* 25 << 7 */
+ { { 0xf4222e270ac69a80L,0x34096d25f51e54f6L,0x00a648cb8fffa591L,
+ 0x4e87acdc69b6527fL },
+ { 0x0575e037e285ccb4L,0x188089e450ddcf52L,0xaa96c9a8870ff719L,
+ 0x74a56cd81fc7e369L } },
+ /* 26 << 7 */
+ { { 0x41d04ee21726931aL,0x0bbbb2c83660ecfdL,0xa6ef6de524818e18L,
+ 0xe421cc51e7d57887L },
+ { 0xf127d208bea87be6L,0x16a475d3b1cdd682L,0x9db1b684439b63f7L,
+ 0x5359b3dbf0f113b6L } },
+ /* 27 << 7 */
+ { { 0xdfccf1de8bf06e31L,0x1fdf8f44dd383901L,0x10775cad5017e7d2L,
+ 0xdfc3a59758d11eefL },
+ { 0x6ec9c8a0b1ecff10L,0xee6ed6cc28400549L,0xb5ad7bae1b4f8d73L,
+ 0x61b4f11de00aaab9L } },
+ /* 28 << 7 */
+ { { 0x7b32d69bd4eff2d7L,0x88ae67714288b60fL,0x159461b437a1e723L,
+ 0x1f3d4789570aae8cL },
+ { 0x869118c07f9871daL,0x35fbda78f635e278L,0x738f3641e1541dacL,
+ 0x6794b13ac0dae45fL } },
+ /* 29 << 7 */
+ { { 0x065064ac09cc0917L,0x27c53729c68540fdL,0x0d2d4c8eef227671L,
+ 0xd23a9f80a1785a04L },
+ { 0x98c5952852650359L,0xfa09ad0174a1acadL,0x082d5a290b55bf5cL,
+ 0xa40f1c67419b8084L } },
+ /* 30 << 7 */
+ { { 0x3a5c752edcc18770L,0x4baf1f2f8825c3a5L,0xebd63f7421b153edL,
+ 0xa2383e47b2f64723L },
+ { 0xe7bf620a2646d19aL,0x56cb44ec03c83ffdL,0xaf7267c94f6be9f1L,
+ 0x8b2dfd7bc06bb5e9L } },
+ /* 31 << 7 */
+ { { 0xb87072f2a672c5c7L,0xeacb11c80d53c5e2L,0x22dac29dff435932L,
+ 0x37bdb99d4408693cL },
+ { 0xf6e62fb62899c20fL,0x3535d512447ece24L,0xfbdc6b88ff577ce3L,
+ 0x726693bd190575f2L } },
+ /* 32 << 7 */
+ { { 0x6772b0e5ab4b35a2L,0x1d8b6001f5eeaacfL,0x728f7ce4795b9580L,
+ 0x4a20ed2a41fb81daL },
+ { 0x9f685cd44fec01e6L,0x3ed7ddcca7ff50adL,0x460fd2640c2d97fdL,
+ 0x3a241426eb82f4f9L } },
+ /* 33 << 7 */
+ { { 0x17d1df2c6a8ea820L,0xb2b50d3bf22cc254L,0x03856cbab7291426L,
+ 0x87fd26ae04f5ee39L },
+ { 0x9cb696cc02bee4baL,0x5312180406820fd6L,0xa5dfc2690212e985L,
+ 0x666f7ffa160f9a09L } },
+ /* 34 << 7 */
+ { { 0xc503cd33bccd9617L,0x365dede4ba7730a3L,0x798c63555ddb0786L,
+ 0xa6c3200efc9cd3bcL },
+ { 0x060ffb2ce5e35efdL,0x99a4e25b5555a1c1L,0x11d95375f70b3751L,
+ 0x0a57354a160e1bf6L } },
+ /* 35 << 7 */
+ { { 0xecb3ae4bf8e4b065L,0x07a834c42e53022bL,0x1cd300b38692ed96L,
+ 0x16a6f79261ee14ecL },
+ { 0x8f1063c66a8649edL,0xfbcdfcfe869f3e14L,0x2cfb97c100a7b3ecL,
+ 0xcea49b3c7130c2f1L } },
+ /* 36 << 7 */
+ { { 0x462d044fe9d96488L,0x4b53d52e8182a0c1L,0x84b6ddd30391e9e9L,
+ 0x80ab7b48b1741a09L },
+ { 0xec0e15d427d3317fL,0x8dfc1ddb1a64671eL,0x93cc5d5fd49c5b92L,
+ 0xc995d53d3674a331L } },
+ /* 37 << 7 */
+ { { 0x302e41ec090090aeL,0x2278a0ccedb06830L,0x1d025932fbc99690L,
+ 0x0c32fbd2b80d68daL },
+ { 0xd79146daf341a6c1L,0xae0ba1391bef68a0L,0xc6b8a5638d774b3aL,
+ 0x1cf307bd880ba4d7L } },
+ /* 38 << 7 */
+ { { 0xc033bdc719803511L,0xa9f97b3b8888c3beL,0x3d68aebc85c6d05eL,
+ 0xc3b88a9d193919ebL },
+ { 0x2d300748c48b0ee3L,0x7506bc7c07a746c1L,0xfc48437c6e6d57f3L,
+ 0x5bd71587cfeaa91aL } },
+ /* 39 << 7 */
+ { { 0xa4ed0408c1bc5225L,0xd0b946db2719226dL,0x109ecd62758d2d43L,
+ 0x75c8485a2751759bL },
+ { 0xb0b75f499ce4177aL,0x4fa61a1e79c10c3dL,0xc062d300a167fcd7L,
+ 0x4df3874c750f0fa8L } },
+ /* 40 << 7 */
+ { { 0x29ae2cf983dfedc9L,0xf84371348d87631aL,0xaf5717117429c8d2L,
+ 0x18d15867146d9272L },
+ { 0x83053ecf69769bb7L,0xc55eb856c479ab82L,0x5ef7791c21b0f4b2L,
+ 0xaa5956ba3d491525L } },
+ /* 41 << 7 */
+ { { 0x407a96c29fe20ebaL,0xf27168bbe52a5ad3L,0x43b60ab3bf1d9d89L,
+ 0xe45c51ef710e727aL },
+ { 0xdfca5276099b4221L,0x8dc6407c2557a159L,0x0ead833591035895L,
+ 0x0a9db9579c55dc32L } },
+ /* 42 << 7 */
+ { { 0xe40736d3df61bc76L,0x13a619c03f778cdbL,0x6dd921a4c56ea28fL,
+ 0x76a524332fa647b4L },
+ { 0x23591891ac5bdc5dL,0xff4a1a72bac7dc01L,0x9905e26162df8453L,
+ 0x3ac045dfe63b265fL } },
+ /* 43 << 7 */
+ { { 0x8a3f341bad53dba7L,0x8ec269cc837b625aL,0xd71a27823ae31189L,
+ 0x8fb4f9a355e96120L },
+ { 0x804af823ff9875cfL,0x23224f575d442a9bL,0x1c4d3b9eecc62679L,
+ 0x91da22fba0e7ddb1L } },
+ /* 44 << 7 */
+ { { 0xa370324d6c04a661L,0x9710d3b65e376d17L,0xed8c98f03044e357L,
+ 0xc364ebbe6422701cL },
+ { 0x347f5d517733d61cL,0xd55644b9cea826c3L,0x80c6e0ad55a25548L,
+ 0x0aa7641d844220a7L } },
+ /* 45 << 7 */
+ { { 0x1438ec8131810660L,0x9dfa6507de4b4043L,0x10b515d8cc3e0273L,
+ 0x1b6066dd28d8cfb2L },
+ { 0xd3b045919c9efebdL,0x425d4bdfa21c1ff4L,0x5fe5af19d57607d3L,
+ 0xbbf773f754481084L } },
+ /* 46 << 7 */
+ { { 0x8435bd6994b03ed1L,0xd9ad1de3634cc546L,0x2cf423fc00e420caL,
+ 0xeed26d80a03096ddL },
+ { 0xd7f60be7a4db09d2L,0xf47f569d960622f7L,0xe5925fd77296c729L,
+ 0xeff2db2626ca2715L } },
+ /* 47 << 7 */
+ { { 0xa6fcd014b913e759L,0x53da47868ff4de93L,0x14616d79c32068e1L,
+ 0xb187d664ccdf352eL },
+ { 0xf7afb6501dc90b59L,0x8170e9437daa1b26L,0xc8e3bdd8700c0a84L,
+ 0x6e8d345f6482bdfaL } },
+ /* 48 << 7 */
+ { { 0x84cfbfa1c5c5ea50L,0xd3baf14c67960681L,0x263984030dd50942L,
+ 0xe4b7839c4716a663L },
+ { 0xd5f1f794e7de6dc0L,0x5cd0f4d4622aa7ceL,0x5295f3f159acfeecL,
+ 0x8d933552953e0607L } },
+ /* 49 << 7 */
+ { { 0xc7db8ec5776c5722L,0xdc467e622b5f290cL,0xd4297e704ff425a9L,
+ 0x4be924c10cf7bb72L },
+ { 0x0d5dc5aea1892131L,0x8bf8a8e3a705c992L,0x73a0b0647a305ac5L,
+ 0x00c9ca4e9a8c77a8L } },
+ /* 50 << 7 */
+ { { 0x5dfee80f83774bddL,0x6313160285734485L,0xa1b524ae914a69a9L,
+ 0xebc2ffafd4e300d7L },
+ { 0x52c93db77cfa46a5L,0x71e6161f21653b50L,0x3574fc57a4bc580aL,
+ 0xc09015dde1bc1253L } },
+ /* 51 << 7 */
+ { { 0x4b7b47b2d174d7aaL,0x4072d8e8f3a15d04L,0xeeb7d47fd6fa07edL,
+ 0x6f2b9ff9edbdafb1L },
+ { 0x18c516153760fe8aL,0x7a96e6bff06c6c13L,0x4d7a04100ea2d071L,
+ 0xa1914e9b0be2a5ceL } },
+ /* 52 << 7 */
+ { { 0x5726e357d8a3c5cfL,0x1197ecc32abb2b13L,0x6c0d7f7f31ae88ddL,
+ 0x15b20d1afdbb3efeL },
+ { 0xcd06aa2670584039L,0x2277c969a7dc9747L,0xbca695877855d815L,
+ 0x899ea2385188b32aL } },
+ /* 53 << 7 */
+ { { 0x37d9228b760c1c9dL,0xc7efbb119b5c18daL,0x7f0d1bc819f6dbc5L,
+ 0x4875384b07e6905bL },
+ { 0xc7c50baa3ba8cd86L,0xb0ce40fbc2905de0L,0x708406737a231952L,
+ 0xa912a262cf43de26L } },
+ /* 54 << 7 */
+ { { 0x9c38ddcceb5b76c1L,0x746f528526fc0ab4L,0x52a63a50d62c269fL,
+ 0x60049c5599458621L },
+ { 0xe7f48f823c2f7c9eL,0x6bd99043917d5cf3L,0xeb1317a88701f469L,
+ 0xbd3fe2ed9a449fe0L } },
+ /* 55 << 7 */
+ { { 0x421e79ca12ef3d36L,0x9ee3c36c3e7ea5deL,0xe48198b5cdff36f7L,
+ 0xaff4f967c6b82228L },
+ { 0x15e19dd0c47adb7eL,0x45699b23032e7dfaL,0x40680c8b1fae026aL,
+ 0x5a347a48550dbf4dL } },
+ /* 56 << 7 */
+ { { 0xe652533b3cef0d7dL,0xd94f7b182bbb4381L,0x838752be0e80f500L,
+ 0x8e6e24889e9c9bfbL },
+ { 0xc975169716caca6aL,0x866c49d838531ad9L,0xc917e2397151ade1L,
+ 0x2d016ec16037c407L } },
+ /* 57 << 7 */
+ { { 0xa407ccc900eac3f9L,0x835f6280e2ed4748L,0xcc54c3471cc98e0dL,
+ 0x0e969937dcb572ebL },
+ { 0x1b16c8e88f30c9cbL,0xa606ae75373c4661L,0x47aa689b35502cabL,
+ 0xf89014ae4d9bb64fL } },
+ /* 58 << 7 */
+ { { 0x202f6a9c31c71f7bL,0x01f95aa3296ffe5cL,0x5fc0601453cec3a3L,
+ 0xeb9912375f498a45L },
+ { 0xae9a935e5d91ba87L,0xc6ac62810b564a19L,0x8a8fe81c3bd44e69L,
+ 0x7c8b467f9dd11d45L } },
+ /* 59 << 7 */
+ { { 0xf772251fea5b8e69L,0xaeecb3bdc5b75fbcL,0x1aca3331887ff0e5L,
+ 0xbe5d49ff19f0a131L },
+ { 0x582c13aae5c8646fL,0xdbaa12e820e19980L,0x8f40f31af7abbd94L,
+ 0x1f13f5a81dfc7663L } },
+ /* 60 << 7 */
+ { { 0x5d81f1eeaceb4fc0L,0x362560025e6f0f42L,0x4b67d6d7751370c8L,
+ 0x2608b69803e80589L },
+ { 0xcfc0d2fc05268301L,0xa6943d3940309212L,0x192a90c21fd0e1c2L,
+ 0xb209f11337f1dc76L } },
+ /* 61 << 7 */
+ { { 0xefcc5e0697bf1298L,0xcbdb6730219d639eL,0xd009c116b81e8c6fL,
+ 0xa3ffdde31a7ce2e5L },
+ { 0xc53fbaaaa914d3baL,0x836d500f88df85eeL,0xd98dc71b66ee0751L,
+ 0x5a3d7005714516fdL } },
+ /* 62 << 7 */
+ { { 0x21d3634d39eedbbaL,0x35cd2e680455a46dL,0xc8cafe65f9d7eb0cL,
+ 0xbda3ce9e00cefb3eL },
+ { 0xddc17a602c9cf7a4L,0x01572ee47bcb8773L,0xa92b2b018c7548dfL,
+ 0x732fd309a84600e3L } },
+ /* 63 << 7 */
+ { { 0xe22109c716543a40L,0x9acafd36fede3c6cL,0xfb2068526824e614L,
+ 0x2a4544a9da25dca0L },
+ { 0x2598526291d60b06L,0x281b7be928753545L,0xec667b1a90f13b27L,
+ 0x33a83aff940e2eb4L } },
+ /* 64 << 7 */
+ { { 0x80009862d5d721d5L,0x0c3357a35bd3a182L,0x27f3a83b7aa2cda4L,
+ 0xb58ae74ef6f83085L },
+ { 0x2a911a812e6dad6bL,0xde286051f43d6c5bL,0x4bdccc41f996c4d8L,
+ 0xe7312ec00ae1e24eL } },
+ /* 0 << 14 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 14 */
+ { { 0xf8d112e76e6485b3L,0x4d3e24db771c52f8L,0x48e3ee41684a2f6dL,
+ 0x7161957d21d95551L },
+ { 0x19631283cdb12a6cL,0xbf3fa8822e50e164L,0xf6254b633166cc73L,
+ 0x3aefa7aeaee8cc38L } },
+ /* 2 << 14 */
+ { { 0x79b0fe623b36f9fdL,0x26543b23fde19fc0L,0x136e64a0958482efL,
+ 0x23f637719b095825L },
+ { 0x14cfd596b6a1142eL,0x5ea6aac6335aac0bL,0x86a0e8bdf3081dd5L,
+ 0x5fb89d79003dc12aL } },
+ /* 3 << 14 */
+ { { 0xf615c33af72e34d4L,0x0bd9ea40110eec35L,0x1c12bc5bc1dea34eL,
+ 0x686584c949ae4699L },
+ { 0x13ad95d38c97b942L,0x4609561a4e5c7562L,0x9e94a4aef2737f89L,
+ 0xf57594c6371c78b6L } },
+ /* 4 << 14 */
+ { { 0x0f0165fce3779ee3L,0xe00e7f9dbd495d9eL,0x1fa4efa220284e7aL,
+ 0x4564bade47ac6219L },
+ { 0x90e6312ac4708e8eL,0x4f5725fba71e9adfL,0xe95f55ae3d684b9fL,
+ 0x47f7ccb11e94b415L } },
+ /* 5 << 14 */
+ { { 0x7322851b8d946581L,0xf0d13133bdf4a012L,0xa3510f696584dae0L,
+ 0x03a7c1713c9f6c6dL },
+ { 0x5be97f38e475381aL,0xca1ba42285823334L,0xf83cc5c70be17ddaL,
+ 0x158b14940b918c0fL } },
+ /* 6 << 14 */
+ { { 0xda3a77e5522e6b69L,0x69c908c3bbcd6c18L,0x1f1b9e48d924fd56L,
+ 0x37c64e36aa4bb3f7L },
+ { 0x5a4fdbdfee478d7dL,0xba75c8bc0193f7a0L,0x84bc1e8456cd16dfL,
+ 0x1fb08f0846fad151L } },
+ /* 7 << 14 */
+ { { 0x8a7cabf9842e9f30L,0xa331d4bf5eab83afL,0xd272cfba017f2a6aL,
+ 0x27560abc83aba0e3L },
+ { 0x94b833870e3a6b75L,0x25c6aea26b9f50f5L,0x803d691db5fdf6d0L,
+ 0x03b77509e6333514L } },
+ /* 8 << 14 */
+ { { 0x3617890361a341c1L,0x3604dc600cfd6142L,0x022295eb8533316cL,
+ 0x3dbde4ac44af2922L },
+ { 0x898afc5d1c7eef69L,0x58896805d14f4fa1L,0x05002160203c21caL,
+ 0x6f0d1f3040ef730bL } },
+ /* 9 << 14 */
+ { { 0x8e8c44d4196224f8L,0x75a4ab95374d079dL,0x79085ecc7d48f123L,
+ 0x56f04d311bf65ad8L },
+ { 0xe220bf1cbda602b2L,0x73ee1742f9612c69L,0x76008fc8084fd06bL,
+ 0x4000ef9ff11380d1L } },
+ /* 10 << 14 */
+ { { 0x48201b4b12cfe297L,0x3eee129c292f74e5L,0xe1fe114ec9e874e8L,
+ 0x899b055c92c5fc41L },
+ { 0x4e477a643a39c8cfL,0x82f09efe78963cc9L,0x6fd3fd8fd333f863L,
+ 0x85132b2adc949c63L } },
+ /* 11 << 14 */
+ { { 0x7e06a3ab516eb17bL,0x73bec06fd2c7372bL,0xe4f74f55ba896da6L,
+ 0xbb4afef88e9eb40fL },
+ { 0x2d75bec8e61d66b0L,0x02bda4b4ef29300bL,0x8bbaa8de026baa5aL,
+ 0xff54befda07f4440L } },
+ /* 12 << 14 */
+ { { 0xbd9b8b1dbe7a2af3L,0xec51caa94fb74a72L,0xb9937a4b63879697L,
+ 0x7c9a9d20ec2687d5L },
+ { 0x1773e44f6ef5f014L,0x8abcf412e90c6900L,0x387bd0228142161eL,
+ 0x50393755fcb6ff2aL } },
+ /* 13 << 14 */
+ { { 0x9813fd56ed6def63L,0x53cf64827d53106cL,0x991a35bd431f7ac1L,
+ 0xf1e274dd63e65fafL },
+ { 0xf63ffa3c44cc7880L,0x411a426b7c256981L,0xb698b9fd93a420e0L,
+ 0x89fdddc0ae53f8feL } },
+ /* 14 << 14 */
+ { { 0x766e072232398baaL,0x205fee425cfca031L,0xa49f53417a029cf2L,
+ 0xa88c68b84023890dL },
+ { 0xbc2750417337aaa8L,0x9ed364ad0eb384f4L,0xe0816f8529aba92fL,
+ 0x2e9e194104e38a88L } },
+ /* 15 << 14 */
+ { { 0x57eef44a3dafd2d5L,0x35d1fae597ed98d8L,0x50628c092307f9b1L,
+ 0x09d84aaed6cba5c6L },
+ { 0x67071bc788aaa691L,0x2dea57a9afe6cb03L,0xdfe11bb43d78ac01L,
+ 0x7286418c7fd7aa51L } },
+ /* 16 << 14 */
+ { { 0xfabf770977f7195aL,0x8ec86167adeb838fL,0xea1285a8bb4f012dL,
+ 0xd68835039a3eab3fL },
+ { 0xee5d24f8309004c2L,0xa96e4b7613ffe95eL,0x0cdffe12bd223ea4L,
+ 0x8f5c2ee5b6739a53L } },
+ /* 17 << 14 */
+ { { 0x5cb4aaa5dd968198L,0xfa131c5272413a6cL,0x53d46a909536d903L,
+ 0xb270f0d348606d8eL },
+ { 0x518c7564a053a3bcL,0x088254b71a86caefL,0xb3ba8cb40ab5efd0L,
+ 0x5c59900e4605945dL } },
+ /* 18 << 14 */
+ { { 0xecace1dda1887395L,0x40960f36932a65deL,0x9611ff5c3aa95529L,
+ 0xc58215b07c1e5a36L },
+ { 0xd48c9b58f0e1a524L,0xb406856bf590dfb8L,0xc7605e049cd95662L,
+ 0x0dd036eea33ecf82L } },
+ /* 19 << 14 */
+ { { 0xa50171acc33156b3L,0xf09d24ea4a80172eL,0x4e1f72c676dc8eefL,
+ 0xe60caadc5e3d44eeL },
+ { 0x006ef8a6979b1d8fL,0x60908a1c97788d26L,0x6e08f95b266feec0L,
+ 0x618427c222e8c94eL } },
+ /* 20 << 14 */
+ { { 0x3d61333959145a65L,0xcd9bc368fa406337L,0x82d11be32d8a52a0L,
+ 0xf6877b2797a1c590L },
+ { 0x837a819bf5cbdb25L,0x2a4fd1d8de090249L,0x622a7de774990e5fL,
+ 0x840fa5a07945511bL } },
+ /* 21 << 14 */
+ { { 0x30b974be6558842dL,0x70df8c6417f3d0a6L,0x7c8035207542e46dL,
+ 0x7251fe7fe4ecc823L },
+ { 0xe59134cb5e9aac9aL,0x11bb0934f0045d71L,0x53e5d9b5dbcb1d4eL,
+ 0x8d97a90592defc91L } },
+ /* 22 << 14 */
+ { { 0xfe2893277946d3f9L,0xe132bd2407472273L,0xeeeb510c1eb6ae86L,
+ 0x777708c5f0595067L },
+ { 0x18e2c8cd1297029eL,0x2c61095cbbf9305eL,0xe466c2586b85d6d9L,
+ 0x8ac06c36da1ea530L } },
+ /* 23 << 14 */
+ { { 0xa365dc39a1304668L,0xe4a9c88507f89606L,0x65a4898facc7228dL,
+ 0x3e2347ff84ca8303L },
+ { 0xa5f6fb77ea7d23a3L,0x2fac257d672a71cdL,0x6908bef87e6a44d3L,
+ 0x8ff87566891d3d7aL } },
+ /* 24 << 14 */
+ { { 0xe58e90b36b0cf82eL,0x6438d2462615b5e7L,0x07b1f8fc669c145aL,
+ 0xb0d8b2da36f1e1cbL },
+ { 0x54d5dadbd9184c4dL,0x3dbb18d5f93d9976L,0x0a3e0f56d1147d47L,
+ 0x2afa8c8da0a48609L } },
+ /* 25 << 14 */
+ { { 0x275353e8bc36742cL,0x898f427eeea0ed90L,0x26f4947e3e477b00L,
+ 0x8ad8848a308741e3L },
+ { 0x6c703c38d74a2a46L,0x5e3e05a99ba17ba2L,0xc1fa6f664ab9a9e4L,
+ 0x474a2d9a3841d6ecL } },
+ /* 26 << 14 */
+ { { 0x871239ad653ae326L,0x14bcf72aa74cbb43L,0x8737650e20d4c083L,
+ 0x3df86536110ed4afL },
+ { 0xd2d86fe7b53ca555L,0x688cb00dabd5d538L,0xcf81bda31ad38468L,
+ 0x7ccfe3ccf01167b6L } },
+ /* 27 << 14 */
+ { { 0xcf4f47e06c4c1fe6L,0x557e1f1a298bbb79L,0xf93b974f30d45a14L,
+ 0x174a1d2d0baf97c4L },
+ { 0x7a003b30c51fbf53L,0xd8940991ee68b225L,0x5b0aa7b71c0f4173L,
+ 0x975797c9a20a7153L } },
+ /* 28 << 14 */
+ { { 0x26e08c07e3533d77L,0xd7222e6a2e341c99L,0x9d60ec3d8d2dc4edL,
+ 0xbdfe0d8f7c476cf8L },
+ { 0x1fe59ab61d056605L,0xa9ea9df686a8551fL,0x8489941e47fb8d8cL,
+ 0xfeb874eb4a7f1b10L } },
+ /* 29 << 14 */
+ { { 0xfe5fea867ee0d98fL,0x201ad34bdbf61864L,0x45d8fe4737c031d4L,
+ 0xd5f49fae795f0822L },
+ { 0xdb0fb291c7f4a40cL,0x2e69d9c1730ddd92L,0x754e105449d76987L,
+ 0x8a24911d7662db87L } },
+ /* 30 << 14 */
+ { { 0x61fc181060a71676L,0xe852d1a8f66a8ad1L,0x172bbd656417231eL,
+ 0x0d6de7bd3babb11fL },
+ { 0x6fde6f88c8e347f8L,0x1c5875479bd99cc3L,0x78e54ed034076950L,
+ 0x97f0f334796e83baL } },
+ /* 31 << 14 */
+ { { 0xe4dbe1ce4924867aL,0xbd5f51b060b84917L,0x375300403cb09a79L,
+ 0xdb3fe0f8ff1743d8L },
+ { 0xed7894d8556fa9dbL,0xfa26216923412fbfL,0x563be0dbba7b9291L,
+ 0x6ca8b8c00c9fb234L } },
+ /* 32 << 14 */
+ { { 0xed406aa9bd763802L,0xc21486a065303da1L,0x61ae291ec7e62ec4L,
+ 0x622a0492df99333eL },
+ { 0x7fd80c9dbb7a8ee0L,0xdc2ed3bc6c01aedbL,0x35c35a1208be74ecL,
+ 0xd540cb1a469f671fL } },
+ /* 33 << 14 */
+ { { 0xd16ced4ecf84f6c7L,0x8561fb9c2d090f43L,0x7e693d796f239db4L,
+ 0xa736f92877bd0d94L },
+ { 0x07b4d9292c1950eeL,0xda17754356dc11b3L,0xa5dfbbaa7a6a878eL,
+ 0x1c70cb294decb08aL } },
+ /* 34 << 14 */
+ { { 0xfba28c8b6f0f7c50L,0xa8eba2b8854dcc6dL,0x5ff8e89a36b78642L,
+ 0x070c1c8ef6873adfL },
+ { 0xbbd3c3716484d2e4L,0xfb78318f0d414129L,0x2621a39c6ad93b0bL,
+ 0x979d74c2a9e917f7L } },
+ /* 35 << 14 */
+ { { 0xfc19564761fb0428L,0x4d78954abee624d4L,0xb94896e0b8ae86fdL,
+ 0x6667ac0cc91c8b13L },
+ { 0x9f18051243bcf832L,0xfbadf8b7a0010137L,0xc69b4089b3ba8aa7L,
+ 0xfac4bacde687ce85L } },
+ /* 36 << 14 */
+ { { 0x9164088d977eab40L,0x51f4c5b62760b390L,0xd238238f340dd553L,
+ 0x358566c3db1d31c9L },
+ { 0x3a5ad69e5068f5ffL,0xf31435fcdaff6b06L,0xae549a5bd6debff0L,
+ 0x59e5f0b775e01331L } },
+ /* 37 << 14 */
+ { { 0x5d492fb898559acfL,0x96018c2e4db79b50L,0x55f4a48f609f66aaL,
+ 0x1943b3af4900a14fL },
+ { 0xc22496df15a40d39L,0xb2a446844c20f7c5L,0x76a35afa3b98404cL,
+ 0xbec75725ff5d1b77L } },
+ /* 38 << 14 */
+ { { 0xb67aa163bea06444L,0x27e95bb2f724b6f2L,0x3c20e3e9d238c8abL,
+ 0x1213754eddd6ae17L },
+ { 0x8c431020716e0f74L,0x6679c82effc095c2L,0x2eb3adf4d0ac2932L,
+ 0x2cc970d301bb7a76L } },
+ /* 39 << 14 */
+ { { 0x70c71f2f740f0e66L,0x545c616b2b6b23ccL,0x4528cfcbb40a8bd7L,
+ 0xff8396332ab27722L },
+ { 0x049127d9025ac99aL,0xd314d4a02b63e33bL,0xc8c310e728d84519L,
+ 0x0fcb8983b3bc84baL } },
+ /* 40 << 14 */
+ { { 0x2cc5226138634818L,0x501814f4b44c2e0bL,0xf7e181aa54dfdba3L,
+ 0xcfd58ff0e759718cL },
+ { 0xf90cdb14d3b507a8L,0x57bd478ec50bdad8L,0x29c197e250e5f9aaL,
+ 0x4db6eef8e40bc855L } },
+ /* 41 << 14 */
+ { { 0x2cc8f21ad1fc0654L,0xc71cc96381269d73L,0xecfbb204077f49f9L,
+ 0xdde92571ca56b793L },
+ { 0x9abed6a3f97ad8f7L,0xe6c19d3f924de3bdL,0x8dce92f4a140a800L,
+ 0x85f44d1e1337af07L } },
+ /* 42 << 14 */
+ { { 0x5953c08b09d64c52L,0xa1b5e49ff5df9749L,0x336a8fb852735f7dL,
+ 0xb332b6db9add676bL },
+ { 0x558b88a0b4511aa4L,0x09788752dbd5cc55L,0x16b43b9cd8cd52bdL,
+ 0x7f0bc5a0c2a2696bL } },
+ /* 43 << 14 */
+ { { 0x146e12d4c11f61efL,0x9ce107543a83e79eL,0x08ec73d96cbfca15L,
+ 0x09ff29ad5b49653fL },
+ { 0xe31b72bde7da946eL,0xebf9eb3bee80a4f2L,0xd1aabd0817598ce4L,
+ 0x18b5fef453f37e80L } },
+ /* 44 << 14 */
+ { { 0xd5d5cdd35958cd79L,0x3580a1b51d373114L,0xa36e4c91fa935726L,
+ 0xa38c534def20d760L },
+ { 0x7088e40a2ff5845bL,0xe5bb40bdbd78177fL,0x4f06a7a8857f9920L,
+ 0xe3cc3e50e968f05dL } },
+ /* 45 << 14 */
+ { { 0x1d68b7fee5682d26L,0x5206f76faec7f87cL,0x41110530041951abL,
+ 0x58ec52c1d4b5a71aL },
+ { 0xf3488f990f75cf9aL,0xf411951fba82d0d5L,0x27ee75be618895abL,
+ 0xeae060d46d8aab14L } },
+ /* 46 << 14 */
+ { { 0x9ae1df737fb54dc2L,0x1f3e391b25963649L,0x242ec32afe055081L,
+ 0x5bd450ef8491c9bdL },
+ { 0x367efc67981eb389L,0xed7e19283a0550d5L,0x362e776bab3ce75cL,
+ 0xe890e3081f24c523L } },
+ /* 47 << 14 */
+ { { 0xb961b682feccef76L,0x8b8e11f58bba6d92L,0x8f2ccc4c2b2375c4L,
+ 0x0d7f7a52e2f86cfaL },
+ { 0xfd94d30a9efe5633L,0x2d8d246b5451f934L,0x2234c6e3244e6a00L,
+ 0xde2b5b0dddec8c50L } },
+ /* 48 << 14 */
+ { { 0x2ce53c5abf776f5bL,0x6f72407160357b05L,0xb259371771bf3f7aL,
+ 0x87d2501c440c4a9fL },
+ { 0x440552e187b05340L,0xb7bf7cc821624c32L,0x4155a6ce22facddbL,
+ 0x5a4228cb889837efL } },
+ /* 49 << 14 */
+ { { 0xef87d6d6fd4fd671L,0xa233687ec2daa10eL,0x7562224403c0eb96L,
+ 0x7632d1848bf19be6L },
+ { 0x05d0f8e940735ff4L,0x3a3e6e13c00931f1L,0x31ccde6adafe3f18L,
+ 0xf381366acfe51207L } },
+ /* 50 << 14 */
+ { { 0x24c222a960167d92L,0x62f9d6f87529f18cL,0x412397c00353b114L,
+ 0x334d89dcef808043L },
+ { 0xd9ec63ba2a4383ceL,0xcec8e9375cf92ba0L,0xfb8b4288c8be74c0L,
+ 0x67d6912f105d4391L } },
+ /* 51 << 14 */
+ { { 0x7b996c461b913149L,0x36aae2ef3a4e02daL,0xb68aa003972de594L,
+ 0x284ec70d4ec6d545L },
+ { 0xf3d2b2d061391d54L,0x69c5d5d6fe114e92L,0xbe0f00b5b4482dffL,
+ 0xe1596fa5f5bf33c5L } },
+ /* 52 << 14 */
+ { { 0x10595b5696a71cbaL,0x944938b2fdcadeb7L,0xa282da4cfccd8471L,
+ 0x98ec05f30d37bfe1L },
+ { 0xe171ce1b0698304aL,0x2d69144421bdf79bL,0xd0cd3b741b21dec1L,
+ 0x712ecd8b16a15f71L } },
+ /* 53 << 14 */
+ { { 0x8d4c00a700fd56e1L,0x02ec9692f9527c18L,0x21c449374a3e42e1L,
+ 0x9176fbab1392ae0aL },
+ { 0x8726f1ba44b7b618L,0xb4d7aae9f1de491cL,0xf91df7b907b582c0L,
+ 0x7e116c30ef60aa3aL } },
+ /* 54 << 14 */
+ { { 0x99270f81466265d7L,0xb15b6fe24df7adf0L,0xfe33b2d3f9738f7fL,
+ 0x48553ab9d6d70f95L },
+ { 0x2cc72ac8c21e94dbL,0x795ac38dbdc0bbeeL,0x0a1be4492e40478fL,
+ 0x81bd3394052bde55L } },
+ /* 55 << 14 */
+ { { 0x63c8dbe956b3c4f2L,0x017a99cf904177ccL,0x947bbddb4d010fc1L,
+ 0xacf9b00bbb2c9b21L },
+ { 0x2970bc8d47173611L,0x1a4cbe08ac7d756fL,0x06d9f4aa67d541a2L,
+ 0xa3e8b68959c2cf44L } },
+ /* 56 << 14 */
+ { { 0xaad066da4d88f1ddL,0xc604f1657ad35deaL,0x7edc07204478ca67L,
+ 0xa10dfae0ba02ce06L },
+ { 0xeceb1c76af36f4e4L,0x994b2292af3f8f48L,0xbf9ed77b77c8a68cL,
+ 0x74f544ea51744c9dL } },
+ /* 57 << 14 */
+ { { 0x82d05bb98113a757L,0x4ef2d2b48a9885e4L,0x1e332be51aa7865fL,
+ 0x22b76b18290d1a52L },
+ { 0x308a231044351683L,0x9d861896a3f22840L,0x5959ddcd841ed947L,
+ 0x0def0c94154b73bfL } },
+ /* 58 << 14 */
+ { { 0xf01054174c7c15e0L,0x539bfb023a277c32L,0xe699268ef9dccf5fL,
+ 0x9f5796a50247a3bdL },
+ { 0x8b839de84f157269L,0xc825c1e57a30196bL,0x6ef0aabcdc8a5a91L,
+ 0xf4a8ce6c498b7fe6L } },
+ /* 59 << 14 */
+ { { 0x1cce35a770cbac78L,0x83488e9bf6b23958L,0x0341a070d76cb011L,
+ 0xda6c9d06ae1b2658L },
+ { 0xb701fb30dd648c52L,0x994ca02c52fb9fd1L,0x069331176f563086L,
+ 0x3d2b810017856babL } },
+ /* 60 << 14 */
+ { { 0xe89f48c85963a46eL,0x658ab875a99e61c7L,0x6e296f874b8517b4L,
+ 0x36c4fcdcfc1bc656L },
+ { 0xde5227a1a3906defL,0x9fe95f5762418945L,0x20c91e81fdd96cdeL,
+ 0x5adbe47eda4480deL } },
+ /* 61 << 14 */
+ { { 0xa009370f396de2b6L,0x98583d4bf0ecc7bdL,0xf44f6b57e51d0672L,
+ 0x03d6b078556b1984L },
+ { 0x27dbdd93b0b64912L,0x9b3a343415687b09L,0x0dba646151ec20a9L,
+ 0xec93db7fff28187cL } },
+ /* 62 << 14 */
+ { { 0x00ff8c2466e48bddL,0x2514f2f911ccd78eL,0xeba11f4fe1250603L,
+ 0x8a22cd41243fa156L },
+ { 0xa4e58df4b283e4c6L,0x78c298598b39783fL,0x5235aee2a5259809L,
+ 0xc16284b50e0227ddL } },
+ /* 63 << 14 */
+ { { 0xa5f579161338830dL,0x6d4b8a6bd2123fcaL,0x236ea68af9c546f8L,
+ 0xc1d36873fa608d36L },
+ { 0xcd76e4958d436d13L,0xd4d9c2218fb080afL,0x665c1728e8ad3fb5L,
+ 0xcf1ebe4db3d572e0L } },
+ /* 64 << 14 */
+ { { 0xa7a8746a584c5e20L,0x267e4ea1b9dc7035L,0x593a15cfb9548c9bL,
+ 0x5e6e21354bd012f3L },
+ { 0xdf31cc6a8c8f936eL,0x8af84d04b5c241dcL,0x63990a6f345efb86L,
+ 0x6fef4e61b9b962cbL } },
+ /* 0 << 21 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 21 */
+ { { 0xf6368f0925722608L,0x131260db131cf5c6L,0x40eb353bfab4f7acL,
+ 0x85c7888037eee829L },
+ { 0x4c1581ffc3bdf24eL,0x5bff75cbf5c3c5a8L,0x35e8c83fa14e6f40L,
+ 0xb81d1c0f0295e0caL } },
+ /* 2 << 21 */
+ { { 0xfcde7cc8f43a730fL,0xe89b6f3c33ab590eL,0xc823f529ad03240bL,
+ 0x82b79afe98bea5dbL },
+ { 0x568f2856962fe5deL,0x0c590adb60c591f3L,0x1fc74a144a28a858L,
+ 0x3b662498b3203f4cL } },
+ /* 3 << 21 */
+ { { 0x91e3cf0d6c39765aL,0xa2db3acdac3cca0bL,0x288f2f08cb953b50L,
+ 0x2414582ccf43cf1aL },
+ { 0x8dec8bbc60eee9a8L,0x54c79f02729aa042L,0xd81cd5ec6532f5d5L,
+ 0xa672303acf82e15fL } },
+ /* 4 << 21 */
+ { { 0x376aafa8719c0563L,0xcd8ad2dcbc5fc79fL,0x303fdb9fcb750cd3L,
+ 0x14ff052f4418b08eL },
+ { 0xf75084cf3e2d6520L,0x7ebdf0f8144ed509L,0xf43bf0f2d3f25b98L,
+ 0x86ad71cfa354d837L } },
+ /* 5 << 21 */
+ { { 0xb827fe9226f43572L,0xdfd3ab5b5d824758L,0x315dd23a539094c1L,
+ 0x85c0e37a66623d68L },
+ { 0x575c79727be19ae0L,0x616a3396df0d36b5L,0xa1ebb3c826b1ff7eL,
+ 0x635b9485140ad453L } },
+ /* 6 << 21 */
+ { { 0x92bf3cdada430c0bL,0x4702850e3a96dac6L,0xc91cf0a515ac326aL,
+ 0x95de4f49ab8c25e4L },
+ { 0xb01bad09e265c17cL,0x24e45464087b3881L,0xd43e583ce1fac5caL,
+ 0xe17cb3186ead97a6L } },
+ /* 7 << 21 */
+ { { 0x6cc3924374dcec46L,0x33cfc02d54c2b73fL,0x82917844f26cd99cL,
+ 0x8819dd95d1773f89L },
+ { 0x09572aa60871f427L,0x8e0cf365f6f01c34L,0x7fa52988bff1f5afL,
+ 0x4eb357eae75e8e50L } },
+ /* 8 << 21 */
+ { { 0xd9d0c8c4868af75dL,0xd7325cff45c8c7eaL,0xab471996cc81ecb0L,
+ 0xff5d55f3611824edL },
+ { 0xbe3145411977a0eeL,0x5085c4c5722038c6L,0x2d5335bff94bb495L,
+ 0x894ad8a6c8e2a082L } },
+ /* 9 << 21 */
+ { { 0x5c3e2341ada35438L,0xf4a9fc89049b8c4eL,0xbeeb355a9f17cf34L,
+ 0x3f311e0e6c91fe10L },
+ { 0xc2d2003892ab9891L,0x257bdcc13e8ce9a9L,0x1b2d978988c53beeL,
+ 0x927ce89acdba143aL } },
+ /* 10 << 21 */
+ { { 0xb0a32cca523db280L,0x5c889f8a50d43783L,0x503e04b34897d16fL,
+ 0x8cdb6e7808f5f2e8L },
+ { 0x6ab91cf0179c8e74L,0xd8874e5248211d60L,0xf948d4d5ea851200L,
+ 0x4076d41ee6f9840aL } },
+ /* 11 << 21 */
+ { { 0xc20e263c47b517eaL,0x79a448fd30685e5eL,0xe55f6f78f90631a0L,
+ 0x88a790b1a79e6346L },
+ { 0x62160c7d80969fe8L,0x54f92fd441491bb9L,0xa6645c235c957526L,
+ 0xf44cc5aebea3ce7bL } },
+ /* 12 << 21 */
+ { { 0xf76283278b1e68b7L,0xc731ad7a303f29d3L,0xfe5a9ca957d03ecbL,
+ 0x96c0d50c41bc97a7L },
+ { 0xc4669fe79b4f7f24L,0xfdd781d83d9967efL,0x7892c7c35d2c208dL,
+ 0x8bf64f7cae545cb3L } },
+ /* 13 << 21 */
+ { { 0xc01f862c467be912L,0xf4c85ee9c73d30ccL,0x1fa6f4be6ab83ec7L,
+ 0xa07a3c1c4e3e3cf9L },
+ { 0x87f8ef450c00beb3L,0x30e2c2b3000d4c3eL,0x1aa00b94fe08bf5bL,
+ 0x32c133aa9224ef52L } },
+ /* 14 << 21 */
+ { { 0x38df16bb32e5685dL,0x68a9e06958e6f544L,0x495aaff7cdc5ebc6L,
+ 0xf894a645378b135fL },
+ { 0xf316350a09e27ecfL,0xeced201e58f7179dL,0x2eec273ce97861baL,
+ 0x47ec2caed693be2eL } },
+ /* 15 << 21 */
+ { { 0xfa4c97c4f68367ceL,0xe4f47d0bbe5a5755L,0x17de815db298a979L,
+ 0xd7eca659c177dc7dL },
+ { 0x20fdbb7149ded0a3L,0x4cb2aad4fb34d3c5L,0x2cf31d2860858a33L,
+ 0x3b6873efa24aa40fL } },
+ /* 16 << 21 */
+ { { 0x540234b22c11bb37L,0x2d0366dded4c74a3L,0xf9a968daeec5f25dL,
+ 0x3660106867b63142L },
+ { 0x07cd6d2c68d7b6d4L,0xa8f74f090c842942L,0xe27514047768b1eeL,
+ 0x4b5f7e89fe62aee4L } },
+ /* 17 << 21 */
+ { { 0xc6a7717789070d26L,0xa1f28e4edd1c8bc7L,0xea5f4f06469e1f17L,
+ 0x78fc242afbdb78e0L },
+ { 0xc9c7c5928b0588f1L,0xb6b7a0fd1535921eL,0xcc5bdb91bde5ae35L,
+ 0xb42c485e12ff1864L } },
+ /* 18 << 21 */
+ { { 0xa1113e13dbab98aaL,0xde9d469ba17b1024L,0x23f48b37c0462d3aL,
+ 0x3752e5377c5c078dL },
+ { 0xe3a86add15544eb9L,0xf013aea780fba279L,0x8b5bb76cf22001b5L,
+ 0xe617ba14f02891abL } },
+ /* 19 << 21 */
+ { { 0xd39182a6936219d3L,0x5ce1f194ae51cb19L,0xc78f8598bf07a74cL,
+ 0x6d7158f222cbf1bcL },
+ { 0x3b846b21e300ce18L,0x35fba6302d11275dL,0x5fe25c36a0239b9bL,
+ 0xd8beb35ddf05d940L } },
+ /* 20 << 21 */
+ { { 0x4db02bb01f7e320dL,0x0641c3646da320eaL,0x6d95fa5d821389a3L,
+ 0x926997488fcd8e3dL },
+ { 0x316fef17ceb6c143L,0x67fcb841d933762bL,0xbb837e35118b17f8L,
+ 0x4b92552f9fd24821L } },
+ /* 21 << 21 */
+ { { 0xae6bc70e46aca793L,0x1cf0b0e4e579311bL,0x8dc631be5802f716L,
+ 0x099bdc6fbddbee4dL },
+ { 0xcc352bb20caf8b05L,0xf74d505a72d63df2L,0xb9876d4b91c4f408L,
+ 0x1ce184739e229b2dL } },
+ /* 22 << 21 */
+ { { 0x4950759783abdb4aL,0x850fbcb6dee84b18L,0x6325236e609e67dcL,
+ 0x04d831d99336c6d8L },
+ { 0x8deaae3bfa12d45dL,0xe425f8ce4746e246L,0x8004c17524f5f31eL,
+ 0xaca16d8fad62c3b7L } },
+ /* 23 << 21 */
+ { { 0x0dc15a6a9152f934L,0xf1235e5ded0e12c1L,0xc33c06ecda477dacL,
+ 0x76be8732b2ea0006L },
+ { 0xcf3f78310c0cd313L,0x3c524553a614260dL,0x31a756f8cab22d15L,
+ 0x03ee10d177827a20L } },
+ /* 24 << 21 */
+ { { 0xd1e059b21994ef20L,0x2a653b69638ae318L,0x70d5eb582f699010L,
+ 0x279739f709f5f84aL },
+ { 0x5da4663c8b799336L,0xfdfdf14d203c37ebL,0x32d8a9dca1dbfb2dL,
+ 0xab40cff077d48f9bL } },
+ /* 25 << 21 */
+ { { 0xc018b383d20b42d5L,0xf9a810ef9f78845fL,0x40af3753bdba9df0L,
+ 0xb90bdcfc131dfdf9L },
+ { 0x18720591f01ab782L,0xc823f2116af12a88L,0xa51b80f30dc14401L,
+ 0xde248f77fb2dfbe3L } },
+ /* 26 << 21 */
+ { { 0xef5a44e50cafe751L,0x73997c9cd4dcd221L,0x32fd86d1de854024L,
+ 0xd5b53adca09b84bbL },
+ { 0x008d7a11dcedd8d1L,0x406bd1c874b32c84L,0x5d4472ff05dde8b1L,
+ 0x2e25f2cdfce2b32fL } },
+ /* 27 << 21 */
+ { { 0xbec0dd5e29dfc254L,0x4455fcf62b98b267L,0x0b4d43a5c72df2adL,
+ 0xea70e6be48a75397L },
+ { 0x2aad61695820f3bfL,0xf410d2dd9e37f68fL,0x70fb7dba7be5ac83L,
+ 0x636bb64536ec3eecL } },
+ /* 28 << 21 */
+ { { 0x27104ea39754e21cL,0xbc87a3e68d63c373L,0x483351d74109db9aL,
+ 0x0fa724e360134da7L },
+ { 0x9ff44c29b0720b16L,0x2dd0cf1306aceeadL,0x5942758ce26929a6L,
+ 0x96c5db92b766a92bL } },
+ /* 29 << 21 */
+ { { 0xcec7d4c05f18395eL,0xd3f227441f80d032L,0x7a68b37acb86075bL,
+ 0x074764ddafef92dbL },
+ { 0xded1e9507bc7f389L,0xc580c850b9756460L,0xaeeec2a47da48157L,
+ 0x3f0b4e7f82c587b3L } },
+ /* 30 << 21 */
+ { { 0x231c6de8a9f19c53L,0x5717bd736974e34eL,0xd9e1d216f1508fa9L,
+ 0x9f112361dadaa124L },
+ { 0x80145e31823b7348L,0x4dd8f0d5ac634069L,0xe3d82fc72297c258L,
+ 0x276fcfee9cee7431L } },
+ /* 31 << 21 */
+ { { 0x8eb61b5e2bc0aea9L,0x4f668fd5de329431L,0x03a32ab138e4b87eL,
+ 0xe137451773d0ef0bL },
+ { 0x1a46f7e6853ac983L,0xc3bdf42e68e78a57L,0xacf207852ea96dd1L,
+ 0xa10649b9f1638460L } },
+ /* 32 << 21 */
+ { { 0xf2369f0b879fbbedL,0x0ff0ae86da9d1869L,0x5251d75956766f45L,
+ 0x4984d8c02be8d0fcL },
+ { 0x7ecc95a6d21008f0L,0x29bd54a03a1a1c49L,0xab9828c5d26c50f3L,
+ 0x32c0087c51d0d251L } },
+ /* 33 << 21 */
+ { { 0x9bac3ce60c1cdb26L,0xcd94d947557ca205L,0x1b1bd5989db1fdcdL,
+ 0x0eda0108a3d8b149L },
+ { 0x9506661056152fccL,0xc2f037e6e7192b33L,0xdeffb41ac92e05a4L,
+ 0x1105f6c2c2f6c62eL } },
+ /* 34 << 21 */
+ { { 0x68e735008733913cL,0xcce861633f3adc40L,0xf407a94238a278e9L,
+ 0xd13c1b9d2ab21292L },
+ { 0x93ed7ec71c74cf5cL,0x8887dc48f1a4c1b4L,0x3830ff304b3a11f1L,
+ 0x358c5a3c58937cb6L } },
+ /* 35 << 21 */
+ { { 0x027dc40489022829L,0x40e939773b798f79L,0x90ad333738be6eadL,
+ 0x9c23f6bcf34c0a5dL },
+ { 0xd1711a35fbffd8bbL,0x60fcfb491949d3ddL,0x09c8ef4b7825d93aL,
+ 0x24233cffa0a8c968L } },
+ /* 36 << 21 */
+ { { 0x67ade46ce6d982afL,0xebb6bf3ee7544d7cL,0xd6b9ba763d8bd087L,
+ 0x46fe382d4dc61280L },
+ { 0xbd39a7e8b5bdbd75L,0xab381331b8f228feL,0x0709a77cce1c4300L,
+ 0x6a247e56f337ceacL } },
+ /* 37 << 21 */
+ { { 0x8f34f21b636288beL,0x9dfdca74c8a7c305L,0x6decfd1bea919e04L,
+ 0xcdf2688d8e1991f8L },
+ { 0xe607df44d0f8a67eL,0xd985df4b0b58d010L,0x57f834c50c24f8f4L,
+ 0xe976ef56a0bf01aeL } },
+ /* 38 << 21 */
+ { { 0x536395aca1c32373L,0x351027aa734c0a13L,0xd2f1b5d65e6bd5bcL,
+ 0x2b539e24223debedL },
+ { 0xd4994cec0eaa1d71L,0x2a83381d661dcf65L,0x5f1aed2f7b54c740L,
+ 0x0bea3fa5d6dda5eeL } },
+ /* 39 << 21 */
+ { { 0x9d4fb68436cc6134L,0x8eb9bbf3c0a443ddL,0xfc500e2e383b7d2aL,
+ 0x7aad621c5b775257L },
+ { 0x69284d740a8f7cc0L,0xe820c2ce07562d65L,0xbf9531b9499758eeL,
+ 0x73e95ca56ee0cc2dL } },
+ /* 40 << 21 */
+ { { 0xf61790abfbaf50a5L,0xdf55e76b684e0750L,0xec516da7f176b005L,
+ 0x575553bb7a2dddc7L },
+ { 0x37c87ca3553afa73L,0x315f3ffc4d55c251L,0xe846442aaf3e5d35L,
+ 0x61b911496495ff28L } },
+ /* 41 << 21 */
+ { { 0x23cc95d3fa326dc3L,0x1df4da1f18fc2ceaL,0x24bf9adcd0a37d59L,
+ 0xb6710053320d6e1eL },
+ { 0x96f9667e618344d1L,0xcc7ce042a06445afL,0xa02d8514d68dbc3aL,
+ 0x4ea109e4280b5a5bL } },
+ /* 42 << 21 */
+ { { 0x5741a7acb40961bfL,0x4ada59376aa56bfaL,0x7feb914502b765d1L,
+ 0x561e97bee6ad1582L },
+ { 0xbbc4a5b6da3982f5L,0x0c2659edb546f468L,0xb8e7e6aa59612d20L,
+ 0xd83dfe20ac19e8e0L } },
+ /* 43 << 21 */
+ { { 0x8530c45fb835398cL,0x6106a8bfb38a41c2L,0x21e8f9a635f5dcdbL,
+ 0x39707137cae498edL },
+ { 0x70c23834d8249f00L,0x9f14b58fab2537a0L,0xd043c3655f61c0c2L,
+ 0xdc5926d609a194a7L } },
+ /* 44 << 21 */
+ { { 0xddec03398e77738aL,0xd07a63effba46426L,0x2e58e79cee7f6e86L,
+ 0xe59b0459ff32d241L },
+ { 0xc5ec84e520fa0338L,0x97939ac8eaff5aceL,0x0310a4e3b4a38313L,
+ 0x9115fba28f9d9885L } },
+ /* 45 << 21 */
+ { { 0x8dd710c25fadf8c3L,0x66be38a2ce19c0e2L,0xd42a279c4cfe5022L,
+ 0x597bb5300e24e1b8L },
+ { 0x3cde86b7c153ca7fL,0xa8d30fb3707d63bdL,0xac905f92bd60d21eL,
+ 0x98e7ffb67b9a54abL } },
+ /* 46 << 21 */
+ { { 0xd7147df8e9726a30L,0xb5e216ffafce3533L,0xb550b7992ff1ec40L,
+ 0x6b613b87a1e953fdL },
+ { 0x87b88dba792d5610L,0x2ee1270aa190fbe1L,0x02f4e2dc2ef581daL,
+ 0x016530e4eff82a95L } },
+ /* 47 << 21 */
+ { { 0xcbb93dfd8fd6ee89L,0x16d3d98646848fffL,0x600eff241da47adfL,
+ 0x1b9754a00ad47a71L },
+ { 0x8f9266df70c33b98L,0xaadc87aedf34186eL,0x0d2ce8e14ad24132L,
+ 0x8a47cbfc19946ebaL } },
+ /* 48 << 21 */
+ { { 0x47feeb6662b5f3afL,0xcefab5610abb3734L,0x449de60e19f35cb1L,
+ 0x39f8db14157f0eb9L },
+ { 0xffaecc5b3c61bfd6L,0xa5a4d41d41216703L,0x7f8fabed224e1cc2L,
+ 0x0d5a8186871ad953L } },
+ /* 49 << 21 */
+ { { 0xf10774f7d22da9a9L,0x45b8a678cc8a9b0dL,0xd9c2e722bdc32cffL,
+ 0xbf71b5f5337202a5L },
+ { 0x95c57f2f69fc4db9L,0xb6dad34c765d01e1L,0x7e0bd13fcb904635L,
+ 0x61751253763a588cL } },
+ /* 50 << 21 */
+ { { 0xd85c299781af2c2dL,0xc0f7d9c481b9d7daL,0x838a34ae08533e8dL,
+ 0x15c4cb08311d8311L },
+ { 0x97f832858e121e14L,0xeea7dc1e85000a5fL,0x0c6059b65d256274L,
+ 0xec9beaceb95075c0L } },
+ /* 51 << 21 */
+ { { 0x173daad71df97828L,0xbf851cb5a8937877L,0xb083c59401646f3cL,
+ 0x3bad30cf50c6d352L },
+ { 0xfeb2b202496bbceaL,0x3cf9fd4f18a1e8baL,0xd26de7ff1c066029L,
+ 0x39c81e9e4e9ed4f8L } },
+ /* 52 << 21 */
+ { { 0xd8be0cb97b390d35L,0x01df2bbd964aab27L,0x3e8c1a65c3ef64f8L,
+ 0x567291d1716ed1ddL },
+ { 0x95499c6c5f5406d3L,0x71fdda395ba8e23fL,0xcfeb320ed5096eceL,
+ 0xbe7ba92bca66dd16L } },
+ /* 53 << 21 */
+ { { 0x4608d36bc6fb5a7dL,0xe3eea15a6d2dd0e0L,0x75b0a3eb8f97a36aL,
+ 0xf59814cc1c83de1eL },
+ { 0x56c9c5b01c33c23fL,0xa96c1da46faa4136L,0x46bf2074de316551L,
+ 0x3b866e7b1f756c8fL } },
+ /* 54 << 21 */
+ { { 0x727727d81495ed6bL,0xb2394243b682dce7L,0x8ab8454e758610f3L,
+ 0xc243ce84857d72a4L },
+ { 0x7b320d71dbbf370fL,0xff9afa3778e0f7caL,0x0119d1e0ea7b523fL,
+ 0xb997f8cb058c7d42L } },
+ /* 55 << 21 */
+ { { 0x285bcd2a37bbb184L,0x51dcec49a45d1fa6L,0x6ade3b64e29634cbL,
+ 0x080c94a726b86ef1L },
+ { 0xba583db12283fbe3L,0x902bddc85a9315edL,0x07c1ccb386964becL,
+ 0x78f4eacfb6258301L } },
+ /* 56 << 21 */
+ { { 0x4bdf3a4956f90823L,0xba0f5080741d777bL,0x091d71c3f38bf760L,
+ 0x9633d50f9b625b02L },
+ { 0x03ecb743b8c9de61L,0xb47512545de74720L,0x9f9defc974ce1cb2L,
+ 0x774a4f6a00bd32efL } },
+ /* 57 << 21 */
+ { { 0xaca385f773848f22L,0x53dad716f3f8558eL,0xab7b34b093c471f9L,
+ 0xf530e06919644bc7L },
+ { 0x3d9fb1ffdd59d31aL,0x4382e0df08daa795L,0x165c6f4bd5cc88d7L,
+ 0xeaa392d54a18c900L } },
+ /* 58 << 21 */
+ { { 0x94203c67648024eeL,0x188763f28c2fabcdL,0xa80f87acbbaec835L,
+ 0x632c96e0f29d8d54L },
+ { 0x29b0a60e4c00a95eL,0x2ef17f40e011e9faL,0xf6c0e1d115b77223L,
+ 0xaaec2c6214b04e32L } },
+ /* 59 << 21 */
+ { { 0xd35688d83d84e58cL,0x2af5094c958571dbL,0x4fff7e19760682a6L,
+ 0x4cb27077e39a407cL },
+ { 0x0f59c5474ff0e321L,0x169f34a61b34c8ffL,0x2bff109652bc1ba7L,
+ 0xa25423b783583544L } },
+ /* 60 << 21 */
+ { { 0x5d55d5d50ac8b782L,0xff6622ec2db3c892L,0x48fce7416b8bb642L,
+ 0x31d6998c69d7e3dcL },
+ { 0xdbaf8004cadcaed0L,0x801b0142d81d053cL,0x94b189fc59630ec6L,
+ 0x120e9934af762c8eL } },
+ /* 61 << 21 */
+ { { 0x53a29aa4fdc6a404L,0x19d8e01ea1909948L,0x3cfcabf1d7e89681L,
+ 0x3321a50d4e132d37L },
+ { 0xd0496863e9a86111L,0x8c0cde6106a3bc65L,0xaf866c49fc9f8eefL,
+ 0x2066350eff7f5141L } },
+ /* 62 << 21 */
+ { { 0x4f8a4689e56ddfbdL,0xea1b0c07fe32983aL,0x2b317462873cb8cbL,
+ 0x658deddc2d93229fL },
+ { 0x65efaf4d0f64ef58L,0xfe43287d730cc7a8L,0xaebc0c723d047d70L,
+ 0x92efa539d92d26c9L } },
+ /* 63 << 21 */
+ { { 0x06e7845794b56526L,0x415cb80f0961002dL,0x89e5c56576dcb10fL,
+ 0x8bbb6982ff9259feL },
+ { 0x4fe8795b9abc2668L,0xb5d4f5341e678fb1L,0x6601f3be7b7da2b9L,
+ 0x98da59e2a13d6805L } },
+ /* 64 << 21 */
+ { { 0x190d8ea601799a52L,0xa20cec41b86d2952L,0x3062ffb27fff2a7cL,
+ 0x741b32e579f19d37L },
+ { 0xf80d81814eb57d47L,0x7a2d0ed416aef06bL,0x09735fb01cecb588L,
+ 0x1641caaac6061f5bL } },
+ /* 0 << 28 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 28 */
+ { { 0x7f99824f20151427L,0x206828b692430206L,0xaa9097d7e1112357L,
+ 0xacf9a2f209e414ecL },
+ { 0xdbdac9da27915356L,0x7e0734b7001efee3L,0x54fab5bbd2b288e2L,
+ 0x4c630fc4f62dd09cL } },
+ /* 2 << 28 */
+ { { 0x8537107a1ac2703bL,0xb49258d86bc857b5L,0x57df14debcdaccd1L,
+ 0x24ab68d7c4ae8529L },
+ { 0x7ed8b5d4734e59d0L,0x5f8740c8c495cc80L,0x84aedd5a291db9b3L,
+ 0x80b360f84fb995beL } },
+ /* 3 << 28 */
+ { { 0xae915f5d5fa067d1L,0x4134b57f9668960cL,0xbd3656d6a48edaacL,
+ 0xdac1e3e4fc1d7436L },
+ { 0x674ff869d81fbb26L,0x449ed3ecb26c33d4L,0x85138705d94203e8L,
+ 0xccde538bbeeb6f4aL } },
+ /* 4 << 28 */
+ { { 0x55d5c68da61a76faL,0x598b441dca1554dcL,0xd39923b9773b279cL,
+ 0x33331d3c36bf9efcL },
+ { 0x2d4c848e298de399L,0xcfdb8e77a1a27f56L,0x94c855ea57b8ab70L,
+ 0xdcdb9dae6f7879baL } },
+ /* 5 << 28 */
+ { { 0x7bdff8c2019f2a59L,0xb3ce5bb3cb4fbc74L,0xea907f688a9173ddL,
+ 0x6cd3d0d395a75439L },
+ { 0x92ecc4d6efed021cL,0x09a9f9b06a77339aL,0x87ca6b157188c64aL,
+ 0x10c2996844899158L } },
+ /* 6 << 28 */
+ { { 0x5859a229ed6e82efL,0x16f338e365ebaf4eL,0x0cd313875ead67aeL,
+ 0x1c73d22854ef0bb4L },
+ { 0x4cb5513174a5c8c7L,0x01cd29707f69ad6aL,0xa04d00dde966f87eL,
+ 0xd96fe4470b7b0321L } },
+ /* 7 << 28 */
+ { { 0x342ac06e88fbd381L,0x02cd4a845c35a493L,0xe8fa89de54f1bbcdL,
+ 0x341d63672575ed4cL },
+ { 0xebe357fbd238202bL,0x600b4d1aa984ead9L,0xc35c9f4452436ea0L,
+ 0x96fe0a39a370751bL } },
+ /* 8 << 28 */
+ { { 0x4c4f07367f636a38L,0x9f943fb70e76d5cbL,0xb03510baa8b68b8bL,
+ 0xc246780a9ed07a1fL },
+ { 0x3c0514156d549fc2L,0xc2953f31607781caL,0x955e2c69d8d95413L,
+ 0xb300fadc7bd282e3L } },
+ /* 9 << 28 */
+ { { 0x81fe7b5087e9189fL,0xdb17375cf42dda27L,0x22f7d896cf0a5904L,
+ 0xa0e57c5aebe348e6L },
+ { 0xa61011d3f40e3c80L,0xb11893218db705c5L,0x4ed9309e50fedec3L,
+ 0xdcf14a104d6d5c1dL } },
+ /* 10 << 28 */
+ { { 0x056c265b55691342L,0xe8e0850491049dc7L,0x131329f5c9bae20aL,
+ 0x96c8b3e8d9dccdb4L },
+ { 0x8c5ff838fb4ee6b4L,0xfc5a9aeb41e8ccf0L,0x7417b764fae050c6L,
+ 0x0953c3d700452080L } },
+ /* 11 << 28 */
+ { { 0x2137268238dfe7e8L,0xea417e152bb79d4bL,0x59641f1c76e7cf2dL,
+ 0x271e3059ea0bcfccL },
+ { 0x624c7dfd7253ecbdL,0x2f552e254fca6186L,0xcbf84ecd4d866e9cL,
+ 0x73967709f68d4610L } },
+ /* 12 << 28 */
+ { { 0xa14b1163c27901b4L,0xfd9236e0899b8bf3L,0x42b091eccbc6da0aL,
+ 0xbb1dac6f5ad1d297L },
+ { 0x80e61d53a91cf76eL,0x4110a412d31f1ee7L,0x2d87c3ba13efcf77L,
+ 0x1f374bb4df450d76L } },
+ /* 13 << 28 */
+ { { 0x5e78e2f20d188dabL,0xe3968ed0f4b885efL,0x46c0568e7314570fL,
+ 0x3161633801170521L },
+ { 0x18e1e7e24f0c8afeL,0x4caa75ffdeea78daL,0x82db67f27c5d8a51L,
+ 0x36a44d866f505370L } },
+ /* 14 << 28 */
+ { { 0xd72c5bda0333974fL,0x5db516ae27a70146L,0x34705281210ef921L,
+ 0xbff17a8f0c9c38e5L },
+ { 0x78f4814e12476da1L,0xc1e1661333c16980L,0x9e5b386f424d4bcaL,
+ 0x4c274e87c85740deL } },
+ /* 15 << 28 */
+ { { 0xb6a9b88d6c2f5226L,0x14d1b944550d7ca8L,0x580c85fc1fc41709L,
+ 0xc1da368b54c6d519L },
+ { 0x2b0785ced5113cf7L,0x0670f6335a34708fL,0x46e2376715cc3f88L,
+ 0x1b480cfa50c72c8fL } },
+ /* 16 << 28 */
+ { { 0x202886024147519aL,0xd0981eac26b372f0L,0xa9d4a7caa785ebc8L,
+ 0xd953c50ddbdf58e9L },
+ { 0x9d6361ccfd590f8fL,0x72e9626b44e6c917L,0x7fd9611022eb64cfL,
+ 0x863ebb7e9eb288f3L } },
+ /* 17 << 28 */
+ { { 0x6e6ab7616aca8ee7L,0x97d10b39d7b40358L,0x1687d3771e5feb0dL,
+ 0xc83e50e48265a27aL },
+ { 0x8f75a9fec954b313L,0xcc2e8f47310d1f61L,0xf5ba81c56557d0e0L,
+ 0x25f9680c3eaf6207L } },
+ /* 18 << 28 */
+ { { 0xf95c66094354080bL,0x5225bfa57bf2fe1cL,0xc5c004e25c7d98faL,
+ 0x3561bf1c019aaf60L },
+ { 0x5e6f9f17ba151474L,0xdec2f934b04f6ecaL,0x64e368a1269acb1eL,
+ 0x1332d9e40cdda493L } },
+ /* 19 << 28 */
+ { { 0x60d6cf69df23de05L,0x66d17da2009339a0L,0x9fcac9850a693923L,
+ 0xbcf057fced7c6a6dL },
+ { 0xc3c5c8c5f0b5662cL,0x25318dd8dcba4f24L,0x60e8cb75082b69ffL,
+ 0x7c23b3ee1e728c01L } },
+ /* 20 << 28 */
+ { { 0x15e10a0a097e4403L,0xcb3d0a8619854665L,0x88d8e211d67d4826L,
+ 0xb39af66e0b9d2839L },
+ { 0xa5f94588bd475ca8L,0xe06b7966c077b80bL,0xfedb1485da27c26cL,
+ 0xd290d33afe0fd5e0L } },
+ /* 21 << 28 */
+ { { 0xa40bcc47f34fb0faL,0xb4760cc81fb1ab09L,0x8fca0993a273bfe3L,
+ 0x13e4fe07f70b213cL },
+ { 0x3bcdb992fdb05163L,0x8c484b110c2b19b6L,0x1acb815faaf2e3e2L,
+ 0xc6905935b89ff1b4L } },
+ /* 22 << 28 */
+ { { 0xb2ad6f9d586e74e1L,0x488883ad67b80484L,0x758aa2c7369c3ddbL,
+ 0x8ab74e699f9afd31L },
+ { 0x10fc2d285e21beb1L,0x3484518a318c42f9L,0x377427dc53cf40c3L,
+ 0x9de0781a391bc1d9L } },
+ /* 23 << 28 */
+ { { 0x8faee858693807e1L,0xa38653274e81ccc7L,0x02c30ff26f835b84L,
+ 0xb604437b0d3d38d4L },
+ { 0xb3fc8a985ca1823dL,0xb82f7ec903be0324L,0xee36d761cf684a33L,
+ 0x5a01df0e9f29bf7dL } },
+ /* 24 << 28 */
+ { { 0x686202f31306583dL,0x05b10da0437c622eL,0xbf9aaa0f076a7bc8L,
+ 0x25e94efb8f8f4e43L },
+ { 0x8a35c9b7fa3dc26dL,0xe0e5fb9396ff03c5L,0xa77e3843ebc394ceL,
+ 0xcede65958361de60L } },
+ /* 25 << 28 */
+ { { 0xd27c22f6a1993545L,0xab01cc3624d671baL,0x63fa2877a169c28eL,
+ 0x925ef9042eb08376L },
+ { 0x3b2fa3cf53aa0b32L,0xb27beb5b71c49d7aL,0xb60e1834d105e27fL,
+ 0xd60897884f68570dL } },
+ /* 26 << 28 */
+ { { 0x23094ce0d6fbc2acL,0x738037a1815ff551L,0xda73b1bb6bef119cL,
+ 0xdcf6c430eef506baL },
+ { 0x00e4fe7be3ef104aL,0xebdd9a2c0a065628L,0x853a81c38792043eL,
+ 0x22ad6eceb3b59108L } },
+ /* 27 << 28 */
+ { { 0x9fb813c039cd297dL,0x8ec7e16e05bda5d9L,0x2834797c0d104b96L,
+ 0xcc11a2e77c511510L },
+ { 0x96ca5a5396ee6380L,0x054c8655cea38742L,0xb5946852d54dfa7dL,
+ 0x97c422e71f4ab207L } },
+ /* 28 << 28 */
+ { { 0xbf9075090c22b540L,0x2cde42aab7c267d4L,0xba18f9ed5ab0d693L,
+ 0x3ba62aa66e4660d9L },
+ { 0xb24bf97bab9ea96aL,0x5d039642e3b60e32L,0x4e6a45067c4d9bd5L,
+ 0x666c5b9e7ed4a6a4L } },
+ /* 29 << 28 */
+ { { 0xfa3fdcd98edbd7ccL,0x4660bb87c6ccd753L,0x9ae9082021e6b64fL,
+ 0x8a56a713b36bfb3fL },
+ { 0xabfce0965726d47fL,0x9eed01b20b1a9a7fL,0x30e9cad44eb74a37L,
+ 0x7b2524cc53e9666dL } },
+ /* 30 << 28 */
+ { { 0x6a29683b8f4b002fL,0xc2200d7a41f4fc20L,0xcf3af47a3a338accL,
+ 0x6539a4fbe7128975L },
+ { 0xcec31c14c33c7fcfL,0x7eb6799bc7be322bL,0x119ef4e96646f623L,
+ 0x7b7a26a554d7299bL } },
+ /* 31 << 28 */
+ { { 0xcb37f08d403f46f2L,0x94b8fc431a0ec0c7L,0xbb8514e3c332142fL,
+ 0xf3ed2c33e80d2a7aL },
+ { 0x8d2080afb639126cL,0xf7b6be60e3553adeL,0x3950aa9f1c7e2b09L,
+ 0x847ff9586410f02bL } },
+ /* 32 << 28 */
+ { { 0x877b7cf5678a31b0L,0xd50301ae3998b620L,0x734257c5c00fb396L,
+ 0xf9fb18a004e672a6L },
+ { 0xff8bd8ebe8758851L,0x1e64e4c65d99ba44L,0x4b8eaedf7dfd93b7L,
+ 0xba2f2a9804e76b8cL } },
+ /* 33 << 28 */
+ { { 0x7d790cbae8053433L,0xc8e725a03d2c9585L,0x58c5c476cdd8f5edL,
+ 0xd106b952efa9fe1dL },
+ { 0x3c5c775b0eff13a9L,0x242442bae057b930L,0xe9f458d4c9b70cbdL,
+ 0x69b71448a3cdb89aL } },
+ /* 34 << 28 */
+ { { 0x41ee46f60e2ed742L,0x573f104540067493L,0xb1e154ff9d54c304L,
+ 0x2ad0436a8d3a7502L },
+ { 0xee4aaa2d431a8121L,0xcd38b3ab886f11edL,0x57d49ea6034a0eb7L,
+ 0xd2b773bdf7e85e58L } },
+ /* 35 << 28 */
+ { { 0x4a559ac49b5c1f14L,0xc444be1a3e54df2bL,0x13aad704eda41891L,
+ 0xcd927bec5eb5c788L },
+ { 0xeb3c8516e48c8a34L,0x1b7ac8124b546669L,0x1815f896594df8ecL,
+ 0x87c6a79c79227865L } },
+ /* 36 << 28 */
+ { { 0xae02a2f09b56ddbdL,0x1339b5ac8a2f1cf3L,0xf2b569c7839dff0dL,
+ 0xb0b9e864fee9a43dL },
+ { 0x4ff8ca4177bb064eL,0x145a2812fd249f63L,0x3ab7beacf86f689aL,
+ 0x9bafec2701d35f5eL } },
+ /* 37 << 28 */
+ { { 0x28054c654265aa91L,0xa4b18304035efe42L,0x6887b0e69639dec7L,
+ 0xf4b8f6ad3d52aea5L },
+ { 0xfb9293cc971a8a13L,0x3f159e5d4c934d07L,0x2c50e9b109acbc29L,
+ 0x08eb65e67154d129L } },
+ /* 38 << 28 */
+ { { 0x4feff58930b75c3eL,0x0bb82fe294491c93L,0xd8ac377a89af62bbL,
+ 0xd7b514909685e49fL },
+ { 0xabca9a7b04497f19L,0x1b35ed0a1a7ad13fL,0x6b601e213ec86ed6L,
+ 0xda91fcb9ce0c76f1L } },
+ /* 39 << 28 */
+ { { 0x9e28507bd7ab27e1L,0x7c19a55563945b7bL,0x6b43f0a1aafc9827L,
+ 0x443b4fbd3aa55b91L },
+ { 0x962b2e656962c88fL,0x139da8d4ce0db0caL,0xb93f05dd1b8d6c4fL,
+ 0x779cdff7180b9824L } },
+ /* 40 << 28 */
+ { { 0xbba23fddae57c7b7L,0x345342f21b932522L,0xfd9c80fe556d4aa3L,
+ 0xa03907ba6525bb61L },
+ { 0x38b010e1ff218933L,0xc066b654aa52117bL,0x8e14192094f2e6eaL,
+ 0x66a27dca0d32f2b2L } },
+ /* 41 << 28 */
+ { { 0x69c7f993048b3717L,0xbf5a989ab178ae1cL,0x49fa9058564f1d6bL,
+ 0x27ec6e15d31fde4eL },
+ { 0x4cce03737276e7fcL,0x64086d7989d6bf02L,0x5a72f0464ccdd979L,
+ 0x909c356647775631L } },
+ /* 42 << 28 */
+ { { 0x1c07bc6b75dd7125L,0xb4c6bc9787a0428dL,0x507ece52fdeb6b9dL,
+ 0xfca56512b2c95432L },
+ { 0x15d97181d0e8bd06L,0x384dd317c6bb46eaL,0x5441ea203952b624L,
+ 0xbcf70dee4e7dc2fbL } },
+ /* 43 << 28 */
+ { { 0x372b016e6628e8c3L,0x07a0d667b60a7522L,0xcf05751b0a344ee2L,
+ 0x0ec09a48118bdeecL },
+ { 0x6e4b3d4ed83dce46L,0x43a6316d99d2fc6eL,0xa99d898956cf044cL,
+ 0x7c7f4454ae3e5fb7L } },
+ /* 44 << 28 */
+ { { 0xb2e6b121fbabbe92L,0x281850fbe1330076L,0x093581ec97890015L,
+ 0x69b1dded75ff77f5L },
+ { 0x7cf0b18fab105105L,0x953ced31a89ccfefL,0x3151f85feb914009L,
+ 0x3c9f1b8788ed48adL } },
+ /* 45 << 28 */
+ { { 0xc9aba1a14a7eadcbL,0x928e7501522e71cfL,0xeaede7273a2e4f83L,
+ 0x467e10d11ce3bbd3L },
+ { 0xf3442ac3b955dcf0L,0xba96307dd3d5e527L,0xf763a10efd77f474L,
+ 0x5d744bd06a6e1ff0L } },
+ /* 46 << 28 */
+ { { 0xd287282aa777899eL,0xe20eda8fd03f3cdeL,0x6a7e75bb50b07d31L,
+ 0x0b7e2a946f379de4L },
+ { 0x31cb64ad19f593cfL,0x7b1a9e4f1e76ef1dL,0xe18c9c9db62d609cL,
+ 0x439bad6de779a650L } },
+ /* 47 << 28 */
+ { { 0x219d9066e032f144L,0x1db632b8e8b2ec6aL,0xff0d0fd4fda12f78L,
+ 0x56fb4c2d2a25d265L },
+ { 0x5f4e2ee1255a03f1L,0x61cd6af2e96af176L,0xe0317ba8d068bc97L,
+ 0x927d6bab264b988eL } },
+ /* 48 << 28 */
+ { { 0xa18f07e0e90fb21eL,0x00fd2b80bba7fca1L,0x20387f2795cd67b5L,
+ 0x5b89a4e7d39707f7L },
+ { 0x8f83ad3f894407ceL,0xa0025b946c226132L,0xc79563c7f906c13bL,
+ 0x5f548f314e7bb025L } },
+ /* 49 << 28 */
+ { { 0x2b4c6b8feac6d113L,0xa67e3f9c0e813c76L,0x3982717c3fe1f4b9L,
+ 0x5886581926d8050eL },
+ { 0x99f3640cf7f06f20L,0xdc6102162a66ebc2L,0x52f2c175767a1e08L,
+ 0x05660e1a5999871bL } },
+ /* 50 << 28 */
+ { { 0x6b0f17626d3c4693L,0xf0e7d62737ed7beaL,0xc51758c7b75b226dL,
+ 0x40a886281f91613bL },
+ { 0x889dbaa7bbb38ce0L,0xe0404b65bddcad81L,0xfebccd3a8bc9671fL,
+ 0xfbf9a357ee1f5375L } },
+ /* 51 << 28 */
+ { { 0x5dc169b028f33398L,0xb07ec11d72e90f65L,0xae7f3b4afaab1eb1L,
+ 0xd970195e5f17538aL },
+ { 0x52b05cbe0181e640L,0xf5debd622643313dL,0x761481545df31f82L,
+ 0x23e03b333a9e13c5L } },
+ /* 52 << 28 */
+ { { 0xff7589494fde0c1fL,0xbf8a1abee5b6ec20L,0x702278fb87e1db6cL,
+ 0xc447ad7a35ed658fL },
+ { 0x48d4aa3803d0ccf2L,0x80acb338819a7c03L,0x9bc7c89e6e17ceccL,
+ 0x46736b8b03be1d82L } },
+ /* 53 << 28 */
+ { { 0xd65d7b60c0432f96L,0xddebe7a3deb5442fL,0x79a253077dff69a2L,
+ 0x37a56d9402cf3122L },
+ { 0x8bab8aedf2350d0aL,0x13c3f276037b0d9aL,0xc664957c44c65caeL,
+ 0x88b44089c2e71a88L } },
+ /* 54 << 28 */
+ { { 0xdb88e5a35cb02664L,0x5d4c0bf18686c72eL,0xea3d9b62a682d53eL,
+ 0x9b605ef40b2ad431L },
+ { 0x71bac202c69645d0L,0xa115f03a6a1b66e7L,0xfe2c563a158f4dc4L,
+ 0xf715b3a04d12a78cL } },
+ /* 55 << 28 */
+ { { 0x8f7f0a48d413213aL,0x2035806dc04becdbL,0xecd34a995d8587f5L,
+ 0x4d8c30799f6d3a71L },
+ { 0x1b2a2a678d95a8f6L,0xc58c9d7df2110d0dL,0xdeee81d5cf8fba3fL,
+ 0xa42be3c00c7cdf68L } },
+ /* 56 << 28 */
+ { { 0x2126f742d43b5eaaL,0x054a0766dfa59b85L,0x9d0d5e36126bfd45L,
+ 0xa1f8fbd7384f8a8fL },
+ { 0x317680f5d563fcccL,0x48ca5055f280a928L,0xe00b81b227b578cfL,
+ 0x10aad9182994a514L } },
+ /* 57 << 28 */
+ { { 0xd9e07b62b7bdc953L,0x9f0f6ff25bc086ddL,0x09d1ccff655eee77L,
+ 0x45475f795bef7df1L },
+ { 0x3faa28fa86f702ccL,0x92e609050f021f07L,0xe9e629687f8fa8c6L,
+ 0xbd71419af036ea2cL } },
+ /* 58 << 28 */
+ { { 0x171ee1cc6028da9aL,0x5352fe1ac251f573L,0xf8ff236e3fa997f4L,
+ 0xd831b6c9a5749d5fL },
+ { 0x7c872e1de350e2c2L,0xc56240d91e0ce403L,0xf9deb0776974f5cbL,
+ 0x7d50ba87961c3728L } },
+ /* 59 << 28 */
+ { { 0xd6f894265a3a2518L,0xcf817799c6303d43L,0x510a0471619e5696L,
+ 0xab049ff63a5e307bL },
+ { 0xe4cdf9b0feb13ec7L,0xd5e971179d8ff90cL,0xf6f64d069afa96afL,
+ 0x00d0bf5e9d2012a2L } },
+ /* 60 << 28 */
+ { { 0xe63f301f358bcdc0L,0x07689e990a9d47f8L,0x1f689e2f4f43d43aL,
+ 0x4d542a1690920904L },
+ { 0xaea293d59ca0a707L,0xd061fe458ac68065L,0x1033bf1b0090008cL,
+ 0x29749558c08a6db6L } },
+ /* 61 << 28 */
+ { { 0x74b5fc59c1d5d034L,0xf712e9f667e215e0L,0xfd520cbd860200e6L,
+ 0x0229acb43ea22588L },
+ { 0x9cd1e14cfff0c82eL,0x87684b6259c69e73L,0xda85e61c96ccb989L,
+ 0x2d5dbb02a3d06493L } },
+ /* 62 << 28 */
+ { { 0xf22ad33ae86b173cL,0xe8e41ea5a79ff0e3L,0x01d2d725dd0d0c10L,
+ 0x31f39088032d28f9L },
+ { 0x7b3f71e17829839eL,0x0cf691b44502ae58L,0xef658dbdbefc6115L,
+ 0xa5cd6ee5b3ab5314L } },
+ /* 63 << 28 */
+ { { 0x206c8d7b5f1d2347L,0x794645ba4cc2253aL,0xd517d8ff58389e08L,
+ 0x4fa20dee9f847288L },
+ { 0xeba072d8d797770aL,0x7360c91dbf429e26L,0x7200a3b380af8279L,
+ 0x6a1c915082dadce3L } },
+ /* 64 << 28 */
+ { { 0x0ee6d3a7c35d8794L,0x042e65580356bae5L,0x9f59698d643322fdL,
+ 0x9379ae1550a61967L },
+ { 0x64b9ae62fcc9981eL,0xaed3d6316d2934c6L,0x2454b3025e4e65ebL,
+ 0xab09f647f9950428L } },
+ /* 0 << 35 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 35 */
+ { { 0xb2083a1222248accL,0x1f6ec0ef3264e366L,0x5659b7045afdee28L,
+ 0x7a823a40e6430bb5L },
+ { 0x24592a04e1900a79L,0xcde09d4ac9ee6576L,0x52b6463f4b5ea54aL,
+ 0x1efe9ed3d3ca65a7L } },
+ /* 2 << 35 */
+ { { 0xe27a6dbe305406ddL,0x8eb7dc7fdd5d1957L,0xf54a6876387d4d8fL,
+ 0x9c479409c7762de4L },
+ { 0xbe4d5b5d99b30778L,0x25380c566e793682L,0x602d37f3dac740e3L,
+ 0x140deabe1566e4aeL } },
+ /* 3 << 35 */
+ { { 0x4481d067afd32acfL,0xd8f0fccae1f71ccfL,0xd208dd0cb596f2daL,
+ 0xd049d7309aad93f9L },
+ { 0xc79f263d42ab580eL,0x09411bb123f707b4L,0x8cfde1ff835e0edaL,
+ 0x7270749090f03402L } },
+ /* 4 << 35 */
+ { { 0xeaee6126c49a861eL,0x024f3b65e14f0d06L,0x51a3f1e8c69bfc17L,
+ 0xc3c3a8e9a7686381L },
+ { 0x3400752cb103d4c8L,0x02bc46139218b36bL,0xc67f75eb7651504aL,
+ 0xd6848b56d02aebfaL } },
+ /* 5 << 35 */
+ { { 0xbd9802e6c30fa92bL,0x5a70d96d9a552784L,0x9085c4ea3f83169bL,
+ 0xfa9423bb06908228L },
+ { 0x2ffebe12fe97a5b9L,0x85da604971b99118L,0x9cbc2f7f63178846L,
+ 0xfd96bc709153218eL } },
+ /* 6 << 35 */
+ { { 0x958381db1782269bL,0xae34bf792597e550L,0xbb5c60645f385153L,
+ 0x6f0e96afe3088048L },
+ { 0xbf6a021577884456L,0xb3b5688c69310ea7L,0x17c9429504fad2deL,
+ 0xe020f0e517896d4dL } },
+ /* 7 << 35 */
+ { { 0x730ba0ab0976505fL,0x567f6813095e2ec5L,0x470620106331ab71L,
+ 0x72cfa97741d22b9fL },
+ { 0x33e55ead8a2373daL,0xa8d0d5f47ba45a68L,0xba1d8f9c03029d15L,
+ 0x8f34f1ccfc55b9f3L } },
+ /* 8 << 35 */
+ { { 0xcca4428dbbe5a1a9L,0x8187fd5f3126bd67L,0x0036973a48105826L,
+ 0xa39b6663b8bd61a0L },
+ { 0x6d42deef2d65a808L,0x4969044f94636b19L,0xf611ee47dd5d564cL,
+ 0x7b2f3a49d2873077L } },
+ /* 9 << 35 */
+ { { 0x94157d45300eb294L,0x2b2a656e169c1494L,0xc000dd76d3a47aa9L,
+ 0xa2864e4fa6243ea4L },
+ { 0x82716c47db89842eL,0x12dfd7d761479fb7L,0x3b9a2c56e0b2f6dcL,
+ 0x46be862ad7f85d67L } },
+ /* 10 << 35 */
+ { { 0x03b0d8dd0f82b214L,0x460c34f9f103cbc6L,0xf32e5c0318d79e19L,
+ 0x8b8888baa84117f8L },
+ { 0x8f3c37dcc0722677L,0x10d21be91c1c0f27L,0xd47c8468e0f7a0c6L,
+ 0x9bf02213adecc0e0L } },
+ /* 11 << 35 */
+ { { 0x0baa7d1242b48b99L,0x1bcb665d48424096L,0x8b847cd6ebfb5cfbL,
+ 0x87c2ae569ad4d10dL },
+ { 0xf1cbb1220de36726L,0xe7043c683fdfbd21L,0x4bd0826a4e79d460L,
+ 0x11f5e5984bd1a2cbL } },
+ /* 12 << 35 */
+ { { 0x97554160b7fe7b6eL,0x7d16189a400a3fb2L,0xd73e9beae328ca1eL,
+ 0x0dd04b97e793d8ccL },
+ { 0xa9c83c9b506db8ccL,0x5cd47aaecf38814cL,0x26fc430db64b45e6L,
+ 0x079b5499d818ea84L } },
+ /* 13 << 35 */
+ { { 0xebb01102c1c24a3bL,0xca24e5681c161c1aL,0x103eea6936f00a4aL,
+ 0x9ad76ee876176c7bL },
+ { 0x97451fc2538e0ff7L,0x94f898096604b3b0L,0x6311436e3249cfd7L,
+ 0x27b4a7bd41224f69L } },
+ /* 14 << 35 */
+ { { 0x03b5d21ae0ac2941L,0x279b0254c2d31937L,0x3307c052cac992d0L,
+ 0x6aa7cb92efa8b1f3L },
+ { 0x5a1825800d37c7a5L,0x13380c37342d5422L,0x92ac2d66d5d2ef92L,
+ 0x035a70c9030c63c6L } },
+ /* 15 << 35 */
+ { { 0xc16025dd4ce4f152L,0x1f419a71f9df7c06L,0x6d5b221491e4bb14L,
+ 0xfc43c6cc839fb4ceL },
+ { 0x49f06591925d6b2dL,0x4b37d9d362186598L,0x8c54a971d01b1629L,
+ 0xe1a9c29f51d50e05L } },
+ /* 16 << 35 */
+ { { 0x5109b78571ba1861L,0x48b22d5cd0c8f93dL,0xe8fa84a78633bb93L,
+ 0x53fba6ba5aebbd08L },
+ { 0x7ff27df3e5eea7d8L,0x521c879668ca7158L,0xb9d5133bce6f1a05L,
+ 0x2d50cd53fd0ebee4L } },
+ /* 17 << 35 */
+ { { 0xc82115d6c5a3ef16L,0x993eff9dba079221L,0xe4da2c5e4b5da81cL,
+ 0x9a89dbdb8033fd85L },
+ { 0x60819ebf2b892891L,0x53902b215d14a4d5L,0x6ac35051d7fda421L,
+ 0xcc6ab88561c83284L } },
+ /* 18 << 35 */
+ { { 0x14eba133f74cff17L,0x240aaa03ecb813f2L,0xcfbb65406f665beeL,
+ 0x084b1fe4a425ad73L },
+ { 0x009d5d16d081f6a6L,0x35304fe8eef82c90L,0xf20346d5aa9eaa22L,
+ 0x0ada9f07ac1c91e3L } },
+ /* 19 << 35 */
+ { { 0xa6e21678968a6144L,0x54c1f77c07b31a1eL,0xd6bb787e5781fbe1L,
+ 0x61bd2ee0e31f1c4aL },
+ { 0xf25aa1e9781105fcL,0x9cf2971f7b2f8e80L,0x26d15412cdff919bL,
+ 0x01db4ebe34bc896eL } },
+ /* 20 << 35 */
+ { { 0x7d9b3e23b40df1cfL,0x5933737394e971b4L,0xbf57bd14669cf921L,
+ 0x865daedf0c1a1064L },
+ { 0x3eb70bd383279125L,0xbc3d5b9f34ecdaabL,0x91e3ed7e5f755cafL,
+ 0x49699f54d41e6f02L } },
+ /* 21 << 35 */
+ { { 0x185770e1d4a7a15bL,0x08f3587aeaac87e7L,0x352018db473133eaL,
+ 0x674ce71904fd30fcL },
+ { 0x7b8d9835088b3e0eL,0x7a0356a95d0d47a1L,0x9d9e76596474a3c4L,
+ 0x61ea48a7ff66966cL } },
+ /* 22 << 35 */
+ { { 0x304177580f3e4834L,0xfdbb21c217a9afcbL,0x756fa17f2f9a67b3L,
+ 0x2a6b2421a245c1a8L },
+ { 0x64be27944af02291L,0xade465c62a5804feL,0x8dffbd39a6f08fd7L,
+ 0xc4efa84caa14403bL } },
+ /* 23 << 35 */
+ { { 0xa1b91b2a442b0f5cL,0xb748e317cf997736L,0x8d1b62bfcee90e16L,
+ 0x907ae2710b2078c0L },
+ { 0xdf31534b0c9bcdddL,0x043fb05439adce83L,0x99031043d826846aL,
+ 0x61a9c0d6b144f393L } },
+ /* 24 << 35 */
+ { { 0xdab4804647718427L,0xdf17ff9b6e830f8bL,0x408d7ee8e49a1347L,
+ 0x6ac71e2391c1d4aeL },
+ { 0xc8cbb9fd1defd73cL,0x19840657bbbbfec5L,0x39db1cb59e7ef8eaL,
+ 0x78aa829664105f30L } },
+ /* 25 << 35 */
+ { { 0xa3d9b7f0a3738c29L,0x0a2f235abc3250a3L,0x55e506f6445e4cafL,
+ 0x0974f73d33475f7aL },
+ { 0xd37dbba35ba2f5a8L,0x542c6e636af40066L,0x26d99b53c5d73e2cL,
+ 0x06060d7d6c3ca33eL } },
+ /* 26 << 35 */
+ { { 0xcdbef1c2065fef4aL,0x77e60f7dfd5b92e3L,0xd7c549f026708350L,
+ 0x201b3ad034f121bfL },
+ { 0x5fcac2a10334fc14L,0x8a9a9e09344552f6L,0x7dd8a1d397653082L,
+ 0x5fc0738f79d4f289L } },
+ /* 27 << 35 */
+ { { 0x787d244d17d2d8c3L,0xeffc634570830684L,0x5ddb96dde4f73ae5L,
+ 0x8efb14b1172549a5L },
+ { 0x6eb73eee2245ae7aL,0xbca4061eea11f13eL,0xb577421d30b01f5dL,
+ 0xaa688b24782e152cL } },
+ /* 28 << 35 */
+ { { 0x67608e71bd3502baL,0x4ef41f24b4de75a0L,0xb08dde5efd6125e5L,
+ 0xde484825a409543fL },
+ { 0x1f198d9865cc2295L,0x428a37716e0edfa2L,0x4f9697a2adf35fc7L,
+ 0x01a43c79f7cac3c7L } },
+ /* 29 << 35 */
+ { { 0xb05d70590fd3659aL,0x8927f30cbb7f2d9aL,0x4023d1ac8cf984d3L,
+ 0x32125ed302897a45L },
+ { 0xfb572dad3d414205L,0x73000ef2e3fa82a9L,0x4c0868e9f10a5581L,
+ 0x5b61fc676b0b3ca5L } },
+ /* 30 << 35 */
+ { { 0xc1258d5b7cae440cL,0x21c08b41402b7531L,0xf61a8955de932321L,
+ 0x3568faf82d1408afL },
+ { 0x71b15e999ecf965bL,0xf14ed248e917276fL,0xc6f4caa1820cf9e2L,
+ 0x681b20b218d83c7eL } },
+ /* 31 << 35 */
+ { { 0x6cde738dc6c01120L,0x71db0813ae70e0dbL,0x95fc064474afe18cL,
+ 0x34619053129e2be7L },
+ { 0x80615ceadb2a3b15L,0x0a49a19edb4c7073L,0x0e1b84c88fd2d367L,
+ 0xd74bf462033fb8aaL } },
+ /* 32 << 35 */
+ { { 0x889f6d65533ef217L,0x7158c7e4c3ca2e87L,0xfb670dfbdc2b4167L,
+ 0x75910a01844c257fL },
+ { 0xf336bf07cf88577dL,0x22245250e45e2aceL,0x2ed92e8d7ca23d85L,
+ 0x29f8be4c2b812f58L } },
+ /* 33 << 35 */
+ { { 0xdd9ebaa7076fe12bL,0x3f2400cbae1537f9L,0x1aa9352817bdfb46L,
+ 0xc0f9843067883b41L },
+ { 0x5590ede10170911dL,0x7562f5bb34d4b17fL,0xe1fa1df21826b8d2L,
+ 0xb40b796a6bd80d59L } },
+ /* 34 << 35 */
+ { { 0xd65bf1973467ba92L,0x8c9b46dbf70954b0L,0x97c8a0f30e78f15dL,
+ 0xa8f3a69a85a4c961L },
+ { 0x4242660f61e4ce9bL,0xbf06aab36ea6790cL,0xc6706f8eec986416L,
+ 0x9e56dec19a9fc225L } },
+ /* 35 << 35 */
+ { { 0x527c46f49a9898d9L,0xd799e77b5633cdefL,0x24eacc167d9e4297L,
+ 0xabb61cea6b1cb734L },
+ { 0xbee2e8a7f778443cL,0x3bb42bf129de2fe6L,0xcbed86a13003bb6fL,
+ 0xd3918e6cd781cdf6L } },
+ /* 36 << 35 */
+ { { 0x4bee32719a5103f1L,0x5243efc6f50eac06L,0xb8e122cb6adcc119L,
+ 0x1b7faa84c0b80a08L },
+ { 0x32c3d1bd6dfcd08cL,0x129dec4e0be427deL,0x98ab679c1d263c83L,
+ 0xafc83cb7cef64effL } },
+ /* 37 << 35 */
+ { { 0x85eb60882fa6be76L,0x892585fb1328cbfeL,0xc154d3edcf618ddaL,
+ 0xc44f601b3abaf26eL },
+ { 0x7bf57d0b2be1fdfdL,0xa833bd2d21137feeL,0x9353af362db591a8L,
+ 0xc76f26dc5562a056L } },
+ /* 38 << 35 */
+ { { 0x1d87e47d3fdf5a51L,0x7afb5f9355c9cab0L,0x91bbf58f89e0586eL,
+ 0x7c72c0180d843709L },
+ { 0xa9a5aafb99b5c3dcL,0xa48a0f1d3844aeb0L,0x7178b7ddb667e482L,
+ 0x453985e96e23a59aL } },
+ /* 39 << 35 */
+ { { 0x4a54c86001b25dd8L,0x0dd37f48fb897c8aL,0x5f8aa6100ea90cd9L,
+ 0xc8892c6816d5830dL },
+ { 0xeb4befc0ef514ca5L,0x478eb679e72c9ee6L,0x9bca20dadbc40d5fL,
+ 0xf015de21dde4f64aL } },
+ /* 40 << 35 */
+ { { 0xaa6a4de0eaf4b8a5L,0x68cfd9ca4bc60e32L,0x668a4b017fd15e70L,
+ 0xd9f0694af27dc09dL },
+ { 0xf6c3cad5ba708bcdL,0x5cd2ba695bb95c2aL,0xaa28c1d333c0a58fL,
+ 0x23e274e3abc77870L } },
+ /* 41 << 35 */
+ { { 0x44c3692ddfd20a4aL,0x091c5fd381a66653L,0x6c0bb69109a0757dL,
+ 0x9072e8b9667343eaL },
+ { 0x31d40eb080848becL,0x95bd480a79fd36ccL,0x01a77c6165ed43f5L,
+ 0xafccd1272e0d40bfL } },
+ /* 42 << 35 */
+ { { 0xeccfc82d1cc1884bL,0xc85ac2015d4753b4L,0xc7a6caac658e099fL,
+ 0xcf46369e04b27390L },
+ { 0xe2e7d049506467eaL,0x481b63a237cdecccL,0x4029abd8ed80143aL,
+ 0x28bfe3c7bcb00b88L } },
+ /* 43 << 35 */
+ { { 0x3bec10090643d84aL,0x885f3668abd11041L,0xdb02432cf83a34d6L,
+ 0x32f7b360719ceebeL },
+ { 0xf06c7837dad1fe7aL,0x60a157a95441a0b0L,0x704970e9e2d47550L,
+ 0xcd2bd553271b9020L } },
+ /* 44 << 35 */
+ { { 0xff57f82f33e24a0bL,0x9cbee23ff2565079L,0x16353427eb5f5825L,
+ 0x276feec4e948d662L },
+ { 0xd1b62bc6da10032bL,0x718351ddf0e72a53L,0x934520762420e7baL,
+ 0x96368fff3a00118dL } },
+ /* 45 << 35 */
+ { { 0x00ce2d26150a49e4L,0x0c28b6363f04706bL,0xbad65a4658b196d0L,
+ 0x6c8455fcec9f8b7cL },
+ { 0xe90c895f2d71867eL,0x5c0be31bedf9f38cL,0x2a37a15ed8f6ec04L,
+ 0x239639e78cd85251L } },
+ /* 46 << 35 */
+ { { 0xd89753159c7c4c6bL,0x603aa3c0d7409af7L,0xb8d53d0c007132fbL,
+ 0x68d12af7a6849238L },
+ { 0xbe0607e7bf5d9279L,0x9aa50055aada74ceL,0xe81079cbba7e8ccbL,
+ 0x610c71d1a5f4ff5eL } },
+ /* 47 << 35 */
+ { { 0x9e2ee1a75aa07093L,0xca84004ba75da47cL,0x074d39513de75401L,
+ 0xf938f756bb311592L },
+ { 0x9619761800a43421L,0x39a2536207bc78c8L,0x278f710a0a171276L,
+ 0xb28446ea8d1a8f08L } },
+ /* 48 << 35 */
+ { { 0x184781bfe3b6a661L,0x7751cb1de6d279f7L,0xf8ff95d6c59eb662L,
+ 0x186d90b758d3dea7L },
+ { 0x0e4bb6c1dfb4f754L,0x5c5cf56b2b2801dcL,0xc561e4521f54564dL,
+ 0xb4fb8c60f0dd7f13L } },
+ /* 49 << 35 */
+ { { 0xf884963033ff98c7L,0x9619fffacf17769cL,0xf8090bf61bfdd80aL,
+ 0x14d9a149422cfe63L },
+ { 0xb354c3606f6df9eaL,0xdbcf770d218f17eaL,0x207db7c879eb3480L,
+ 0x213dbda8559b6a26L } },
+ /* 50 << 35 */
+ { { 0xac4c200b29fc81b3L,0xebc3e09f171d87c1L,0x917995301481aa9eL,
+ 0x051b92e192e114faL },
+ { 0xdf8f92e9ecb5537fL,0x44b1b2cc290c7483L,0xa711455a2adeb016L,
+ 0x964b685681a10c2cL } },
+ /* 51 << 35 */
+ { { 0x4f159d99cec03623L,0x05532225ef3271eaL,0xb231bea3c5ee4849L,
+ 0x57a54f507094f103L },
+ { 0x3e2d421d9598b352L,0xe865a49c67412ab4L,0xd2998a251cc3a912L,
+ 0x5d0928080c74d65dL } },
+ /* 52 << 35 */
+ { { 0x73f459084088567aL,0xeb6b280e1f214a61L,0x8c9adc34caf0c13dL,
+ 0x39d12938f561fb80L },
+ { 0xb2dc3a5ebc6edfb4L,0x7485b1b1fe4d210eL,0x062e0400e186ae72L,
+ 0x91e32d5c6eeb3b88L } },
+ /* 53 << 35 */
+ { { 0x6df574d74be59224L,0xebc88ccc716d55f3L,0x26c2e6d0cad6ed33L,
+ 0xc6e21e7d0d3e8b10L },
+ { 0x2cc5840e5bcc36bbL,0x9292445e7da74f69L,0x8be8d3214e5193a8L,
+ 0x3ec236298df06413L } },
+ /* 54 << 35 */
+ { { 0xc7e9ae85b134defaL,0x6073b1d01bb2d475L,0xb9ad615e2863c00dL,
+ 0x9e29493d525f4ac4L },
+ { 0xc32b1dea4e9acf4fL,0x3e1f01c8a50db88dL,0xb05d70ea04da916cL,
+ 0x714b0d0ad865803eL } },
+ /* 55 << 35 */
+ { { 0x4bd493fc9920cb5eL,0x5b44b1f792c7a3acL,0xa2a77293bcec9235L,
+ 0x5ee06e87cd378553L },
+ { 0xceff8173da621607L,0x2bb03e4c99f5d290L,0x2945106aa6f734acL,
+ 0xb5056604d25c4732L } },
+ /* 56 << 35 */
+ { { 0x5945920ce079afeeL,0x686e17a06789831fL,0x5966bee8b74a5ae5L,
+ 0x38a673a21e258d46L },
+ { 0xbd1cc1f283141c95L,0x3b2ecf4f0e96e486L,0xcd3aa89674e5fc78L,
+ 0x415ec10c2482fa7aL } },
+ /* 57 << 35 */
+ { { 0x1523441980503380L,0x513d917ad314b392L,0xb0b52f4e63caecaeL,
+ 0x07bf22ad2dc7780bL },
+ { 0xe761e8a1e4306839L,0x1b3be9625dd7feaaL,0x4fe728de74c778f1L,
+ 0xf1fa0bda5e0070f6L } },
+ /* 58 << 35 */
+ { { 0x85205a316ec3f510L,0x2c7e4a14d2980475L,0xde3c19c06f30ebfdL,
+ 0xdb1c1f38d4b7e644L },
+ { 0xfe291a755dce364aL,0xb7b22a3c058f5be3L,0x2cd2c30237fea38cL,
+ 0x2930967a2e17be17L } },
+ /* 59 << 35 */
+ { { 0x87f009de0c061c65L,0xcb014aacedc6ed44L,0x49bd1cb43bafb1ebL,
+ 0x81bd8b5c282d3688L },
+ { 0x1cdab87ef01a17afL,0x21f37ac4e710063bL,0x5a6c567642fc8193L,
+ 0xf4753e7056a6015cL } },
+ /* 60 << 35 */
+ { { 0x020f795ea15b0a44L,0x8f37c8d78958a958L,0x63b7e89ba4b675b5L,
+ 0xb4fb0c0c0fc31aeaL },
+ { 0xed95e639a7ff1f2eL,0x9880f5a3619614fbL,0xdeb6ff02947151abL,
+ 0x5bc5118ca868dcdbL } },
+ /* 61 << 35 */
+ { { 0xd8da20554c20cea5L,0xcac2776e14c4d69aL,0xcccb22c1622d599bL,
+ 0xa4ddb65368a9bb50L },
+ { 0x2c4ff1511b4941b4L,0xe1ff19b46efba588L,0x35034363c48345e0L,
+ 0x45542e3d1e29dfc4L } },
+ /* 62 << 35 */
+ { { 0xf197cb91349f7aedL,0x3b2b5a008fca8420L,0x7c175ee823aaf6d8L,
+ 0x54dcf42135af32b6L },
+ { 0x0ba1430727d6561eL,0x879d5ee4d175b1e2L,0xc7c4367399807db5L,
+ 0x77a544559cd55bcdL } },
+ /* 63 << 35 */
+ { { 0xe6c2ff130105c072L,0x18f7a99f8dda7da4L,0x4c3018200e2d35c1L,
+ 0x06a53ca0d9cc6c82L },
+ { 0xaa21cc1ef1aa1d9eL,0x324143344a75b1e8L,0x2a6d13280ebe9fdcL,
+ 0x16bd173f98a4755aL } },
+ /* 64 << 35 */
+ { { 0xfbb9b2452133ffd9L,0x39a8b2f1830f1a20L,0x484bc97dd5a1f52aL,
+ 0xd6aebf56a40eddf8L },
+ { 0x32257acb76ccdac6L,0xaf4d36ec1586ff27L,0x8eaa8863f8de7dd1L,
+ 0x0045d5cf88647c16L } },
+ /* 0 << 42 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 42 */
+ { { 0xa6f3d574c005979dL,0xc2072b426a40e350L,0xfca5c1568de2ecf9L,
+ 0xa8c8bf5ba515344eL },
+ { 0x97aee555114df14aL,0xd4374a4dfdc5ec6bL,0x754cc28f2ca85418L,
+ 0x71cb9e27d3c41f78L } },
+ /* 2 << 42 */
+ { { 0x8910507903605c39L,0xf0843d9ea142c96cL,0xf374493416923684L,
+ 0x732caa2ffa0a2893L },
+ { 0xb2e8c27061160170L,0xc32788cc437fbaa3L,0x39cd818ea6eda3acL,
+ 0xe2e942399e2b2e07L } },
+ /* 3 << 42 */
+ { { 0x6967d39b0260e52aL,0xd42585cc90653325L,0x0d9bd60521ca7954L,
+ 0x4fa2087781ed57b3L },
+ { 0x60c1eff8e34a0bbeL,0x56b0040c84f6ef64L,0x28be2b24b1af8483L,
+ 0xb2278163f5531614L } },
+ /* 4 << 42 */
+ { { 0x8df275455922ac1cL,0xa7b3ef5ca52b3f63L,0x8e77b21471de57c4L,
+ 0x31682c10834c008bL },
+ { 0xc76824f04bd55d31L,0xb6d1c08617b61c71L,0x31db0903c2a5089dL,
+ 0x9c092172184e5d3fL } },
+ /* 5 << 42 */
+ { { 0xdd7ced5bc00cc638L,0x1a2015eb61278fc2L,0x2e8e52886a37f8d6L,
+ 0xc457786fe79933adL },
+ { 0xb3fe4cce2c51211aL,0xad9b10b224c20498L,0x90d87a4fd28db5e5L,
+ 0x698cd1053aca2fc3L } },
+ /* 6 << 42 */
+ { { 0x4f112d07e91b536dL,0xceb982f29eba09d6L,0x3c157b2c197c396fL,
+ 0xe23c2d417b66eb24L },
+ { 0x480c57d93f330d37L,0xb3a4c8a179108debL,0x702388decb199ce5L,
+ 0x0b019211b944a8d4L } },
+ /* 7 << 42 */
+ { { 0x24f2a692840bb336L,0x7c353bdca669fa7bL,0xda20d6fcdec9c300L,
+ 0x625fbe2fa13a4f17L },
+ { 0xa2b1b61adbc17328L,0x008965bfa9515621L,0x49690939c620ff46L,
+ 0x182dd27d8717e91cL } },
+ /* 8 << 42 */
+ { { 0x5ace5035ea6c3997L,0x54259aaac2610befL,0xef18bb3f3c80dd39L,
+ 0x6910b95b5fc3fa39L },
+ { 0xfce2f51043e09aeeL,0xced56c9fa7675665L,0x10e265acd872db61L,
+ 0x6982812eae9fce69L } },
+ /* 9 << 42 */
+ { { 0x29be11c6ce800998L,0x72bb1752b90360d9L,0x2c1931975a4ad590L,
+ 0x2ba2f5489fc1dbc0L },
+ { 0x7fe4eebbe490ebe0L,0x12a0a4cd7fae11c0L,0x7197cf81e903ba37L,
+ 0xcf7d4aa8de1c6dd8L } },
+ /* 10 << 42 */
+ { { 0x92af6bf43fd5684cL,0x2b26eecf80360aa1L,0xbd960f3000546a82L,
+ 0x407b3c43f59ad8feL },
+ { 0x86cae5fe249c82baL,0x9e0faec72463744cL,0x87f551e894916272L,
+ 0x033f93446ceb0615L } },
+ /* 11 << 42 */
+ { { 0x1e5eb0d18be82e84L,0x89967f0e7a582fefL,0xbcf687d5a6e921faL,
+ 0xdfee4cf3d37a09baL },
+ { 0x94f06965b493c465L,0x638b9a1c7635c030L,0x7666786466f05e9fL,
+ 0xccaf6808c04da725L } },
+ /* 12 << 42 */
+ { { 0xca2eb690768fccfcL,0xf402d37db835b362L,0x0efac0d0e2fdfcceL,
+ 0xefc9cdefb638d990L },
+ { 0x2af12b72d1669a8bL,0x33c536bc5774ccbdL,0x30b21909fb34870eL,
+ 0xc38fa2f77df25acaL } },
+ /* 13 << 42 */
+ { { 0x74c5f02bbf81f3f5L,0x0525a5aeaf7e4581L,0x88d2aaba433c54aeL,
+ 0xed9775db806a56c5L },
+ { 0xd320738ac0edb37dL,0x25fdb6ee66cc1f51L,0xac661d1710600d76L,
+ 0x931ec1f3bdd1ed76L } },
+ /* 14 << 42 */
+ { { 0x65c11d6219ee43f1L,0x5cd57c3e60829d97L,0xd26c91a3984be6e8L,
+ 0xf08d93098b0c53bdL },
+ { 0x94bc9e5bc016e4eaL,0xd391683911d43d2bL,0x886c5ad773701155L,
+ 0xe037762620b00715L } },
+ /* 15 << 42 */
+ { { 0x7f01c9ecaa80ba59L,0x3083411a68538e51L,0x970370f1e88128afL,
+ 0x625cc3db91dec14bL },
+ { 0xfef9666c01ac3107L,0xb2a8d577d5057ac3L,0xb0f2629992be5df7L,
+ 0xf579c8e500353924L } },
+ /* 16 << 42 */
+ { { 0xb8fa3d931341ed7aL,0x4223272ca7b59d49L,0x3dcb194783b8c4a4L,
+ 0x4e413c01ed1302e4L },
+ { 0x6d999127e17e44ceL,0xee86bf7533b3adfbL,0xf6902fe625aa96caL,
+ 0xb73540e4e5aae47dL } },
+ /* 17 << 42 */
+ { { 0x32801d7b1b4a158cL,0xe571c99e27e2a369L,0x40cb76c010d9f197L,
+ 0xc308c2893167c0aeL },
+ { 0xa6ef9dd3eb7958f2L,0xa7226dfc300879b1L,0x6cd0b3627edf0636L,
+ 0x4efbce6c7bc37eedL } },
+ /* 18 << 42 */
+ { { 0x75f92a058d699021L,0x586d4c79772566e3L,0x378ca5f1761ad23aL,
+ 0x650d86fc1465a8acL },
+ { 0x7a4ed457842ba251L,0x6b65e3e642234933L,0xaf1543b731aad657L,
+ 0xa4cefe98cbfec369L } },
+ /* 19 << 42 */
+ { { 0xb587da909f47befbL,0x6562e9fb41312d13L,0xa691ea59eff1cefeL,
+ 0xcc30477a05fc4cf6L },
+ { 0xa16324610b0ffd3dL,0xa1f16f3b5b355956L,0x5b148d534224ec24L,
+ 0xdc834e7bf977012aL } },
+ /* 20 << 42 */
+ { { 0x7bfc5e75b2c69dbcL,0x3aa77a2903c3da6cL,0xde0df03cca910271L,
+ 0xcbd5ca4a7806dc55L },
+ { 0xe1ca58076db476cbL,0xfde15d625f37a31eL,0xf49af520f41af416L,
+ 0x96c5c5b17d342db5L } },
+ /* 21 << 42 */
+ { { 0x155c43b7eb4ceb9bL,0x2e9930104e77371aL,0x1d2987da675d43afL,
+ 0xef2bc1c08599fd72L },
+ { 0x96894b7b9342f6b2L,0x201eadf27c8e71f0L,0xf3479d9f4a1f3efcL,
+ 0xe0f8a742702a9704L } },
+ /* 22 << 42 */
+ { { 0xeafd44b6b3eba40cL,0xf9739f29c1c1e0d0L,0x0091471a619d505eL,
+ 0xc15f9c969d7c263eL },
+ { 0x5be4728583afbe33L,0xa3b6d6af04f1e092L,0xe76526b9751a9d11L,
+ 0x2ec5b26d9a4ae4d2L } },
+ /* 23 << 42 */
+ { { 0xeb66f4d902f6fb8dL,0x4063c56196912164L,0xeb7050c180ef3000L,
+ 0x288d1c33eaa5b3f0L },
+ { 0xe87c68d607806fd8L,0xb2f7f9d54bbbf50fL,0x25972f3aac8d6627L,
+ 0xf854777410e8c13bL } },
+ /* 24 << 42 */
+ { { 0xcc50ef6c872b4a60L,0xab2a34a44613521bL,0x39c5c190983e15d1L,
+ 0x61dde5df59905512L },
+ { 0xe417f6219f2275f3L,0x0750c8b6451d894bL,0x75b04ab978b0bdaaL,
+ 0x3bfd9fd4458589bdL } },
+ /* 25 << 42 */
+ { { 0xf1013e30ee9120b6L,0x2b51af9323a4743eL,0xea96ffae48d14d9eL,
+ 0x71dc0dbe698a1d32L },
+ { 0x914962d20180cca4L,0x1ae60677c3568963L,0x8cf227b1437bc444L,
+ 0xc650c83bc9962c7aL } },
+ /* 26 << 42 */
+ { { 0x23c2c7ddfe7ccfc4L,0xf925c89d1b929d48L,0x4460f74b06783c33L,
+ 0xac2c8d49a590475aL },
+ { 0xfb40b407b807bba0L,0x9d1e362d69ff8f3aL,0xa33e9681cbef64a4L,
+ 0x67ece5fa332fb4b2L } },
+ /* 27 << 42 */
+ { { 0x6900a99b739f10e3L,0xc3341ca9ff525925L,0xee18a626a9e2d041L,
+ 0xa5a8368529580dddL },
+ { 0xf3470c819d7de3cdL,0xedf025862062cf9cL,0xf43522fac010edb0L,
+ 0x3031413513a4b1aeL } },
+ /* 28 << 42 */
+ { { 0xc792e02adb22b94bL,0x993d8ae9a1eaa45bL,0x8aad6cd3cd1e1c63L,
+ 0x89529ca7c5ce688aL },
+ { 0x2ccee3aae572a253L,0xe02b643802a21efbL,0xa7091b6ec9430358L,
+ 0x06d1b1fa9d7db504L } },
+ /* 29 << 42 */
+ { { 0x58846d32c4744733L,0x40517c71379f9e34L,0x2f65655f130ef6caL,
+ 0x526e4488f1f3503fL },
+ { 0x8467bd177ee4a976L,0x1d9dc913921363d1L,0xd8d24c33b069e041L,
+ 0x5eb5da0a2cdf7f51L } },
+ /* 30 << 42 */
+ { { 0x1c0f3cb1197b994fL,0x3c95a6c52843eae9L,0x7766ffc9a6097ea5L,
+ 0x7bea4093d723b867L },
+ { 0xb48e1f734db378f9L,0x70025b00e37b77acL,0x943dc8e7af24ad46L,
+ 0xb98a15ac16d00a85L } },
+ /* 31 << 42 */
+ { { 0x3adc38ba2743b004L,0xb1c7f4f7334415eeL,0xea43df8f1e62d05aL,
+ 0x326189059d76a3b6L },
+ { 0x2fbd0bb5a23a0f46L,0x5bc971db6a01918cL,0x7801d94ab4743f94L,
+ 0xb94df65e676ae22bL } },
+ /* 32 << 42 */
+ { { 0xaafcbfabaf95894cL,0x7b9bdc07276b2241L,0xeaf983625bdda48bL,
+ 0x5977faf2a3fcb4dfL },
+ { 0xbed042ef052c4b5bL,0x9fe87f71067591f0L,0xc89c73ca22f24ec7L,
+ 0x7d37fa9ee64a9f1bL } },
+ /* 33 << 42 */
+ { { 0x2710841a15562627L,0x2c01a613c243b034L,0x1d135c562bc68609L,
+ 0xc2ca17158b03f1f6L },
+ { 0xc9966c2d3eb81d82L,0xc02abf4a8f6df13eL,0x77b34bd78f72b43bL,
+ 0xaff6218f360c82b0L } },
+ /* 34 << 42 */
+ { { 0x0aa5726c8d55b9d2L,0xdc0adbe999e9bffbL,0x9097549cefb9e72aL,
+ 0x167557129dfb3111L },
+ { 0xdd8bf984f26847f9L,0xbcb8e387dfb30cb7L,0xc1fd32a75171ef9cL,
+ 0x977f3fc7389b363fL } },
+ /* 35 << 42 */
+ { { 0x116eaf2bf4babda0L,0xfeab68bdf7113c8eL,0xd1e3f064b7def526L,
+ 0x1ac30885e0b3fa02L },
+ { 0x1c5a6e7b40142d9dL,0x839b560330921c0bL,0x48f301fa36a116a3L,
+ 0x380e1107cfd9ee6dL } },
+ /* 36 << 42 */
+ { { 0x7945ead858854be1L,0x4111c12ecbd4d49dL,0xece3b1ec3a29c2efL,
+ 0x6356d4048d3616f5L },
+ { 0x9f0d6a8f594d320eL,0x0989316df651ccd2L,0x6c32117a0f8fdde4L,
+ 0x9abe5cc5a26a9bbcL } },
+ /* 37 << 42 */
+ { { 0xcff560fb9723f671L,0x21b2a12d7f3d593cL,0xe4cb18da24ba0696L,
+ 0x186e2220c3543384L },
+ { 0x722f64e088312c29L,0x94282a9917dc7752L,0x62467bbf5a85ee89L,
+ 0xf435c650f10076a0L } },
+ /* 38 << 42 */
+ { { 0xc9ff153943b3a50bL,0x7132130c1a53efbcL,0x31bfe063f7b0c5b7L,
+ 0xb0179a7d4ea994ccL },
+ { 0x12d064b3c85f455bL,0x472593288f6e0062L,0xf64e590bb875d6d9L,
+ 0x22dd6225ad92bcc7L } },
+ /* 39 << 42 */
+ { { 0xb658038eb9c3bd6dL,0x00cdb0d6fbba27c8L,0x0c6813371062c45dL,
+ 0xd8515b8c2d33407dL },
+ { 0xcb8f699e8cbb5ecfL,0x8c4347f8c608d7d8L,0x2c11850abb3e00dbL,
+ 0x20a8dafdecb49d19L } },
+ /* 40 << 42 */
+ { { 0xbd78148045ee2f40L,0x75e354af416b60cfL,0xde0b58a18d49a8c4L,
+ 0xe40e94e2fa359536L },
+ { 0xbd4fa59f62accd76L,0x05cf466a8c762837L,0xb5abda99448c277bL,
+ 0x5a9e01bf48b13740L } },
+ /* 41 << 42 */
+ { { 0x9d457798326aad8dL,0xbdef4954c396f7e7L,0x6fb274a2c253e292L,
+ 0x2800bf0a1cfe53e7L },
+ { 0x22426d3144438fd4L,0xef2339235e259f9aL,0x4188503c03f66264L,
+ 0x9e5e7f137f9fdfabL } },
+ /* 42 << 42 */
+ { { 0x565eb76c5fcc1abaL,0xea63254859b5bff8L,0x5587c087aab6d3faL,
+ 0x92b639ea6ce39c1bL },
+ { 0x0706e782953b135cL,0x7308912e425268efL,0x599e92c7090e7469L,
+ 0x83b90f529bc35e75L } },
+ /* 43 << 42 */
+ { { 0x4750b3d0244975b3L,0xf3a4435811965d72L,0x179c67749c8dc751L,
+ 0xff18cdfed23d9ff0L },
+ { 0xc40138332028e247L,0x96e280e2f3bfbc79L,0xf60417bdd0880a84L,
+ 0x263c9f3d2a568151L } },
+ /* 44 << 42 */
+ { { 0x36be15b32d2ce811L,0x846dc0c2f8291d21L,0x5cfa0ecb789fcfdbL,
+ 0x45a0beedd7535b9aL },
+ { 0xec8e9f0796d69af1L,0x31a7c5b8599ab6dcL,0xd36d45eff9e2e09fL,
+ 0x3cf49ef1dcee954bL } },
+ /* 45 << 42 */
+ { { 0x6be34cf3086cff9bL,0x88dbd49139a3360fL,0x1e96b8cc0dbfbd1dL,
+ 0xc1e5f7bfcb7e2552L },
+ { 0x0547b21428819d98L,0xc770dd9c7aea9dcbL,0xaef0d4c7041d68c8L,
+ 0xcc2b981813cb9ba8L } },
+ /* 46 << 42 */
+ { { 0x7fc7bc76fe86c607L,0x6b7b9337502a9a95L,0x1948dc27d14dab63L,
+ 0x249dd198dae047beL },
+ { 0xe8356584a981a202L,0x3531dd183a893387L,0x1be11f90c85c7209L,
+ 0x93d2fe1ee2a52b5aL } },
+ /* 47 << 42 */
+ { { 0x8225bfe2ec6d6b97L,0x9cf6d6f4bd0aa5deL,0x911459cb54779f5fL,
+ 0x5649cddb86aeb1f3L },
+ { 0x321335793f26ce5aL,0xc289a102550f431eL,0x559dcfda73b84c6fL,
+ 0x84973819ee3ac4d7L } },
+ /* 48 << 42 */
+ { { 0xb51e55e6f2606a82L,0xe25f706190f2fb57L,0xacef6c2ab1a4e37cL,
+ 0x864e359d5dcf2706L },
+ { 0x479e6b187ce57316L,0x2cab25003a96b23dL,0xed4898628ef16df7L,
+ 0x2056538cef3758b5L } },
+ /* 49 << 42 */
+ { { 0xa7df865ef15d3101L,0x80c5533a61b553d7L,0x366e19974ed14294L,
+ 0x6620741fb3c0bcd6L },
+ { 0x21d1d9c4edc45418L,0x005b859ec1cc4a9dL,0xdf01f630a1c462f0L,
+ 0x15d06cf3f26820c7L } },
+ /* 50 << 42 */
+ { { 0x9f7f24ee3484be47L,0x2ff33e964a0c902fL,0x00bdf4575a0bc453L,
+ 0x2378dfaf1aa238dbL },
+ { 0x272420ec856720f2L,0x2ad9d95b96797291L,0xd1242cc6768a1558L,
+ 0x2e287f8b5cc86aa8L } },
+ /* 51 << 42 */
+ { { 0x796873d0990cecaaL,0xade55f81675d4080L,0x2645eea321f0cd84L,
+ 0x7a1efa0fb4e17d02L },
+ { 0xf6858420037cc061L,0x682e05f0d5d43e12L,0x59c3699427218710L,
+ 0x85cbba4d3f7cd2fcL } },
+ /* 52 << 42 */
+ { { 0x726f97297a3cd22aL,0x9f8cd5dc4a628397L,0x17b93ab9c23165edL,
+ 0xff5f5dbf122823d4L },
+ { 0xc1e4e4b5654a446dL,0xd1a9496f677257baL,0x6387ba94de766a56L,
+ 0x23608bc8521ec74aL } },
+ /* 53 << 42 */
+ { { 0x16a522d76688c4d4L,0x9d6b428207373abdL,0xa62f07acb42efaa3L,
+ 0xf73e00f7e3b90180L },
+ { 0x36175fec49421c3eL,0xc4e44f9b3dcf2678L,0x76df436b7220f09fL,
+ 0x172755fb3aa8b6cfL } },
+ /* 54 << 42 */
+ { { 0xbab89d57446139ccL,0x0a0a6e025fe0208fL,0xcdbb63e211e5d399L,
+ 0x33ecaa12a8977f0bL },
+ { 0x59598b21f7c42664L,0xb3e91b32ab65d08aL,0x035822eef4502526L,
+ 0x1dcf0176720a82a9L } },
+ /* 55 << 42 */
+ { { 0x50f8598f3d589e02L,0xdf0478ffb1d63d2cL,0x8b8068bd1571cd07L,
+ 0x30c3aa4fd79670cdL },
+ { 0x25e8fd4b941ade7fL,0x3d1debdc32790011L,0x65b6dcbd3a3f9ff0L,
+ 0x282736a4793de69cL } },
+ /* 56 << 42 */
+ { { 0xef69a0c3d41d3bd3L,0xb533b8c907a26bdeL,0xe2801d97db2edf9fL,
+ 0xdc4a8269e1877af0L },
+ { 0x6c1c58513d590dbeL,0x84632f6bee4e9357L,0xd36d36b779b33374L,
+ 0xb46833e39bbca2e6L } },
+ /* 57 << 42 */
+ { { 0x37893913f7fc0586L,0x385315f766bf4719L,0x72c56293b31855dcL,
+ 0xd1416d4e849061feL },
+ { 0xbeb3ab7851047213L,0x447f6e61f040c996L,0xd06d310d638b1d0cL,
+ 0xe28a413fbad1522eL } },
+ /* 58 << 42 */
+ { { 0x685a76cb82003f86L,0x610d07f70bcdbca3L,0x6ff660219ca4c455L,
+ 0x7df39b87cea10eecL },
+ { 0xb9255f96e22db218L,0x8cc6d9eb08a34c44L,0xcd4ffb86859f9276L,
+ 0x8fa15eb250d07335L } },
+ /* 59 << 42 */
+ { { 0xdf553845cf2c24b5L,0x89f66a9f52f9c3baL,0x8f22b5b9e4a7ceb3L,
+ 0xaffef8090e134686L },
+ { 0x3e53e1c68eb8fac2L,0x93c1e4eb28aec98eL,0xb6b91ec532a43bcbL,
+ 0x2dbfa947b2d74a51L } },
+ /* 60 << 42 */
+ { { 0xe065d190ca84bad7L,0xfb13919fad58e65cL,0x3c41718bf1cb6e31L,
+ 0x688969f006d05c3fL },
+ { 0xd4f94ce721264d45L,0xfdfb65e97367532bL,0x5b1be8b10945a39dL,
+ 0x229f789c2b8baf3bL } },
+ /* 61 << 42 */
+ { { 0xd8f41f3e6f49f15dL,0x678ce828907f0792L,0xc69ace82fca6e867L,
+ 0x106451aed01dcc89L },
+ { 0x1bb4f7f019fc32d2L,0x64633dfcb00c52d2L,0x8f13549aad9ea445L,
+ 0x99a3bf50fb323705L } },
+ /* 62 << 42 */
+ { { 0x0c9625a2534d4dbcL,0x45b8f1d1c2a2fea3L,0x76ec21a1a530fc1aL,
+ 0x4bac9c2a9e5bd734L },
+ { 0x5996d76a7b4e3587L,0x0045cdee1182d9e3L,0x1aee24b91207f13dL,
+ 0x66452e9797345a41L } },
+ /* 63 << 42 */
+ { { 0x16e5b0549f950cd0L,0x9cc72fb1d7fdd075L,0x6edd61e766249663L,
+ 0xde4caa4df043cccbL },
+ { 0x11b1f57a55c7ac17L,0x779cbd441a85e24dL,0x78030f86e46081e7L,
+ 0xfd4a60328e20f643L } },
+ /* 64 << 42 */
+ { { 0xcc7a64880a750c0fL,0x39bacfe34e548e83L,0x3d418c760c110f05L,
+ 0x3e4daa4cb1f11588L },
+ { 0x2733e7b55ffc69ffL,0x46f147bc92053127L,0x885b2434d722df94L,
+ 0x6a444f65e6fc6b7cL } },
+ /* 0 << 49 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 49 */
+ { { 0x7a1a465ac3f16ea8L,0x115a461db2f1d11cL,0x4767dd956c68a172L,
+ 0x3392f2ebd13a4698L },
+ { 0xc7a99ccde526cdc7L,0x8e537fdc22292b81L,0x76d8cf69a6d39198L,
+ 0xffc5ff432446852dL } },
+ /* 2 << 49 */
+ { { 0x97b14f7ea90567e6L,0x513257b7b6ae5cb7L,0x85454a3c9f10903dL,
+ 0xd8d2c9ad69bc3724L },
+ { 0x38da93246b29cb44L,0xb540a21d77c8cbacL,0x9bbfe43501918e42L,
+ 0xfffa707a56c3614eL } },
+ /* 3 << 49 */
+ { { 0x0ce4e3f1d4e353b7L,0x062d8a14ef46b0a0L,0x6408d5ab574b73fdL,
+ 0xbc41d1c9d3273ffdL },
+ { 0x3538e1e76be77800L,0x71fe8b37c5655031L,0x1cd916216b9b331aL,
+ 0xad825d0bbb388f73L } },
+ /* 4 << 49 */
+ { { 0x56c2e05b1cb76219L,0x0ec0bf9171567e7eL,0xe7076f8661c4c910L,
+ 0xd67b085bbabc04d9L },
+ { 0x9fb904595e93a96aL,0x7526c1eafbdc249aL,0x0d44d367ecdd0bb7L,
+ 0x953999179dc0d695L } },
+ /* 5 << 49 */
+ { { 0x61360ee99e240d18L,0x057cdcacb4b94466L,0xe7667cd12fe5325cL,
+ 0x1fa297b521974e3bL },
+ { 0xfa4081e7db083d76L,0x31993be6f206bd15L,0x8949269b14c19f8cL,
+ 0x21468d72a9d92357L } },
+ /* 6 << 49 */
+ { { 0x2ccbc583a4c506ecL,0x957ed188d1acfe97L,0x8baed83312f1aea2L,
+ 0xef2a6cb48325362dL },
+ { 0x130dde428e195c43L,0xc842025a0e6050c6L,0x2da972a708686a5dL,
+ 0xb52999a1e508b4a8L } },
+ /* 7 << 49 */
+ { { 0xd9f090b910a5a8bdL,0xca91d249096864daL,0x8e6a93be3f67dbc1L,
+ 0xacae6fbaf5f4764cL },
+ { 0x1563c6e0d21411a0L,0x28fa787fda0a4ad8L,0xd524491c908c8030L,
+ 0x1257ba0e4c795f07L } },
+ /* 8 << 49 */
+ { { 0x83f49167ceca9754L,0x426d2cf64b7939a0L,0x2555e355723fd0bfL,
+ 0xa96e6d06c4f144e2L },
+ { 0x4768a8dd87880e61L,0x15543815e508e4d5L,0x09d7e772b1b65e15L,
+ 0x63439dd6ac302fa0L } },
+ /* 9 << 49 */
+ { { 0xb93f802fc14e35c2L,0x71735b7c4341333cL,0x03a2510416d4f362L,
+ 0x3f4d069bbf433c8eL },
+ { 0x0d83ae01f78f5a7cL,0x50a8ffbe7c4eed07L,0xc74f890676e10f83L,
+ 0x7d0809669ddaf8e1L } },
+ /* 10 << 49 */
+ { { 0xb11df8e1698e04ccL,0x877be203169005c8L,0x32749e8c4f3c6179L,
+ 0x2dbc9d0a7853fc05L },
+ { 0x187d4f939454d937L,0xe682ce9db4800e1bL,0xa9129ad8165e68e8L,
+ 0x0fe29735be7f785bL } },
+ /* 11 << 49 */
+ { { 0x5303f40c5b9e02b7L,0xa37c969235ee04e8L,0x5f46cc2034d6632bL,
+ 0x55ef72b296ac545bL },
+ { 0xabec5c1f7b91b062L,0x0a79e1c7bb33e821L,0xbb04b4283a9f4117L,
+ 0x0de1f28ffd2a475aL } },
+ /* 12 << 49 */
+ { { 0x31019ccf3a4434b4L,0xa34581111a7954dcL,0xa9dac80de34972a7L,
+ 0xb043d05474f6b8ddL },
+ { 0x021c319e11137b1aL,0x00a754ceed5cc03fL,0x0aa2c794cbea5ad4L,
+ 0x093e67f470c015b6L } },
+ /* 13 << 49 */
+ { { 0x72cdfee9c97e3f6bL,0xc10bcab4b6da7461L,0x3b02d2fcb59806b9L,
+ 0x85185e89a1de6f47L },
+ { 0x39e6931f0eb6c4d4L,0x4d4440bdd4fa5b04L,0x5418786e34be7eb8L,
+ 0x6380e5219d7259bcL } },
+ /* 14 << 49 */
+ { { 0x20ac0351d598d710L,0x272c4166cb3a4da4L,0xdb82fe1aca71de1fL,
+ 0x746e79f2d8f54b0fL },
+ { 0x6e7fc7364b573e9bL,0x75d03f46fd4b5040L,0x5c1cc36d0b98d87bL,
+ 0x513ba3f11f472da1L } },
+ /* 15 << 49 */
+ { { 0x79d0af26abb177ddL,0xf82ab5687891d564L,0x2b6768a972232173L,
+ 0xefbb3bb08c1f6619L },
+ { 0xb29c11dba6d18358L,0x519e2797b0916d3aL,0xd4dc18f09188e290L,
+ 0x648e86e398b0ca7fL } },
+ /* 16 << 49 */
+ { { 0x859d3145983c38b5L,0xb14f176c637abc8bL,0x2793fb9dcaff7be6L,
+ 0xebe5a55f35a66a5aL },
+ { 0x7cec1dcd9f87dc59L,0x7c595cd3fbdbf560L,0x5b543b2226eb3257L,
+ 0x69080646c4c935fdL } },
+ /* 17 << 49 */
+ { { 0x7f2e440381e9ede3L,0x243c3894caf6df0aL,0x7c605bb11c073b11L,
+ 0xcd06a541ba6a4a62L },
+ { 0x2916894949d4e2e5L,0x33649d074af66880L,0xbfc0c885e9a85035L,
+ 0xb4e52113fc410f4bL } },
+ /* 18 << 49 */
+ { { 0xdca3b70678a6513bL,0x92ea4a2a9edb1943L,0x02642216db6e2dd8L,
+ 0x9b45d0b49fd57894L },
+ { 0x114e70dbc69d11aeL,0x1477dd194c57595fL,0xbc2208b4ec77c272L,
+ 0x95c5b4d7db68f59cL } },
+ /* 19 << 49 */
+ { { 0xb8c4fc6342e532b7L,0x386ba4229ae35290L,0xfb5dda42d201ecbcL,
+ 0x2353dc8ba0e38fd6L },
+ { 0x9a0b85ea68f7e978L,0x96ec56822ad6d11fL,0x5e279d6ce5f6886dL,
+ 0xd3fe03cd3cb1914dL } },
+ /* 20 << 49 */
+ { { 0xfe541fa47ea67c77L,0x952bd2afe3ea810cL,0x791fef568d01d374L,
+ 0xa3a1c6210f11336eL },
+ { 0x5ad0d5a9c7ec6d79L,0xff7038af3225c342L,0x003c6689bc69601bL,
+ 0x25059bc745e8747dL } },
+ /* 21 << 49 */
+ { { 0xfa4965b2f2086fbfL,0xf6840ea686916078L,0xd7ac762070081d6cL,
+ 0xe600da31b5328645L },
+ { 0x01916f63529b8a80L,0xe80e48582d7d6f3eL,0x29eb0fe8d664ca7cL,
+ 0xf017637be7b43b0cL } },
+ /* 22 << 49 */
+ { { 0x9a75c80676cb2566L,0x8f76acb1b24892d9L,0x7ae7b9cc1f08fe45L,
+ 0x19ef73296a4907d8L },
+ { 0x2db4ab715f228bf0L,0xf3cdea39817032d7L,0x0b1f482edcabe3c0L,
+ 0x3baf76b4bb86325cL } },
+ /* 23 << 49 */
+ { { 0xd49065e010089465L,0x3bab5d298e77c596L,0x7636c3a6193dbd95L,
+ 0xdef5d294b246e499L },
+ { 0xb22c58b9286b2475L,0xa0b93939cd80862bL,0x3002c83af0992388L,
+ 0x6de01f9beacbe14cL } },
+ /* 24 << 49 */
+ { { 0x6aac688eadd70482L,0x708de92a7b4a4e8aL,0x75b6dd73758a6eefL,
+ 0xea4bf352725b3c43L },
+ { 0x10041f2c87912868L,0xb1b1be95ef09297aL,0x19ae23c5a9f3860aL,
+ 0xc4f0f839515dcf4bL } },
+ /* 25 << 49 */
+ { { 0x3c7ecca397f6306aL,0x744c44ae68a3a4b0L,0x69cd13a0b3a1d8a2L,
+ 0x7cad0a1e5256b578L },
+ { 0xea653fcd33791d9eL,0x9cc2a05d74b2e05fL,0x73b391dcfd7affa2L,
+ 0xddb7091eb6b05442L } },
+ /* 26 << 49 */
+ { { 0xc71e27bf8538a5c6L,0x195c63dd89abff17L,0xfd3152851b71e3daL,
+ 0x9cbdfda7fa680fa0L },
+ { 0x9db876ca849d7eabL,0xebe2764b3c273271L,0x663357e3f208dceaL,
+ 0x8c5bd833565b1b70L } },
+ /* 27 << 49 */
+ { { 0xccc3b4f59837fc0dL,0x9b641ba8a79cf00fL,0x7428243ddfdf3990L,
+ 0x83a594c4020786b1L },
+ { 0xb712451a526c4502L,0x9d39438e6adb3f93L,0xfdb261e3e9ff0ccdL,
+ 0x80344e3ce07af4c3L } },
+ /* 28 << 49 */
+ { { 0x75900d7c2fa4f126L,0x08a3b8655c99a232L,0x2478b6bfdb25e0c3L,
+ 0x482cc2c271db2edfL },
+ { 0x37df7e645f321bb8L,0x8a93821b9a8005b4L,0x3fa2f10ccc8c1958L,
+ 0x0d3322182c269d0aL } },
+ /* 29 << 49 */
+ { { 0x20ab8119e246b0e6L,0xb39781e4d349fd17L,0xd293231eb31aa100L,
+ 0x4b779c97bb032168L },
+ { 0x4b3f19e1c8470500L,0x45b7efe90c4c869dL,0xdb84f38aa1a6bbccL,
+ 0x3b59cb15b2fddbc1L } },
+ /* 30 << 49 */
+ { { 0xba5514df3fd165e8L,0x499fd6a9061f8811L,0x72cd1fe0bfef9f00L,
+ 0x120a4bb979ad7e8aL },
+ { 0xf2ffd0955f4a5ac5L,0xcfd174f195a7a2f0L,0xd42301ba9d17baf1L,
+ 0xd2fa487a77f22089L } },
+ /* 31 << 49 */
+ { { 0x9cb09efeb1dc77e1L,0xe956693921c99682L,0x8c5469016c6067bbL,
+ 0xfd37857461c24456L },
+ { 0x2b6a6cbe81796b33L,0x62d550f658e87f8bL,0x1b763e1c7f1b01b4L,
+ 0x4b93cfea1b1b5e12L } },
+ /* 32 << 49 */
+ { { 0xb93452381d531696L,0x57201c0088cdde69L,0xdde922519a86afc7L,
+ 0xe3043895bd35cea8L },
+ { 0x7608c1e18555970dL,0x8267dfa92535935eL,0xd4c60a57322ea38bL,
+ 0xe0bf7977804ef8b5L } },
+ /* 33 << 49 */
+ { { 0x1a0dab28c06fece4L,0xd405991e94e7b49dL,0xc542b6d2706dab28L,
+ 0xcb228da3a91618fbL },
+ { 0x224e4164107d1ceaL,0xeb9fdab3d0f5d8f1L,0xc02ba3860d6e41cdL,
+ 0x676a72c59b1f7146L } },
+ /* 34 << 49 */
+ { { 0xffd6dd984d6cb00bL,0xcef9c5cade2e8d7cL,0xa1bbf5d7641c7936L,
+ 0x1b95b230ee8f772eL },
+ { 0xf765a92ee8ac25b1L,0xceb04cfc3a18b7c6L,0x27944cef0acc8966L,
+ 0xcbb3c957434c1004L } },
+ /* 35 << 49 */
+ { { 0x9c9971a1a43ff93cL,0x5bc2db17a1e358a9L,0x45b4862ea8d9bc82L,
+ 0x70ebfbfb2201e052L },
+ { 0xafdf64c792871591L,0xea5bcae6b42d0219L,0xde536c552ad8f03cL,
+ 0xcd6c3f4da76aa33cL } },
+ /* 36 << 49 */
+ { { 0xbeb5f6230bca6de3L,0xdd20dd99b1e706fdL,0x90b3ff9dac9059d4L,
+ 0x2d7b29027ccccc4eL },
+ { 0x8a090a59ce98840fL,0xa5d947e08410680aL,0x49ae346a923379a5L,
+ 0x7dbc84f9b28a3156L } },
+ /* 37 << 49 */
+ { { 0xfd40d91654a1aff2L,0xabf318ba3a78fb9bL,0x50152ed83029f95eL,
+ 0x9fc1dd77c58ad7faL },
+ { 0x5fa5791513595c17L,0xb95046688f62b3a9L,0x907b5b24ff3055b0L,
+ 0x2e995e359a84f125L } },
+ /* 38 << 49 */
+ { { 0x87dacf697e9bbcfbL,0x95d0c1d6e86d96e3L,0x65726e3c2d95a75cL,
+ 0x2c3c9001acd27f21L },
+ { 0x1deab5616c973f57L,0x108b7e2ca5221643L,0x5fee9859c4ef79d4L,
+ 0xbd62b88a40d4b8c6L } },
+ /* 39 << 49 */
+ { { 0xb4dd29c4197c75d6L,0x266a6df2b7076febL,0x9512d0ea4bf2df11L,
+ 0x1320c24f6b0cc9ecL },
+ { 0x6bb1e0e101a59596L,0x8317c5bbeff9aaacL,0x65bb405e385aa6c9L,
+ 0x613439c18f07988fL } },
+ /* 40 << 49 */
+ { { 0xd730049f16a66e91L,0xe97f2820fa1b0e0dL,0x4131e003304c28eaL,
+ 0x820ab732526bac62L },
+ { 0xb2ac9ef928714423L,0x54ecfffaadb10cb2L,0x8781476ef886a4ccL,
+ 0x4b2c87b5db2f8d49L } },
+ /* 41 << 49 */
+ { { 0xe857cd200a44295dL,0x707d7d2158c6b044L,0xae8521f9f596757cL,
+ 0x87448f0367b2b714L },
+ { 0x13a9bc455ebcd58dL,0x79bcced99122d3c1L,0x3c6442479e076642L,
+ 0x0cf227782df4767dL } },
+ /* 42 << 49 */
+ { { 0x5e61aee471d444b6L,0x211236bfc5084a1dL,0x7e15bc9a4fd3eaf6L,
+ 0x68df2c34ab622bf5L },
+ { 0x9e674f0f59bf4f36L,0xf883669bd7f34d73L,0xc48ac1b831497b1dL,
+ 0x323b925d5106703bL } },
+ /* 43 << 49 */
+ { { 0x22156f4274082008L,0xeffc521ac8482bcbL,0x5c6831bf12173479L,
+ 0xcaa2528fc4739490L },
+ { 0x84d2102a8f1b3c4dL,0xcf64dfc12d9bec0dL,0x433febad78a546efL,
+ 0x1f621ec37b73cef1L } },
+ /* 44 << 49 */
+ { { 0x6aecd62737338615L,0x162082ab01d8edf6L,0x833a811919e86b66L,
+ 0x6023a251d299b5dbL },
+ { 0xf5bb0c3abbf04b89L,0x6735eb69ae749a44L,0xd0e058c54713de3bL,
+ 0xfdf2593e2c3d4ccdL } },
+ /* 45 << 49 */
+ { { 0x1b8f414efdd23667L,0xdd52aacafa2015eeL,0x3e31b517bd9625ffL,
+ 0x5ec9322d8db5918cL },
+ { 0xbc73ac85a96f5294L,0x82aa5bf361a0666aL,0x49755810bf08ac42L,
+ 0xd21cdfd5891cedfcL } },
+ /* 46 << 49 */
+ { { 0x918cb57b67f8be10L,0x365d1a7c56ffa726L,0x2435c5046532de93L,
+ 0xc0fc5e102674cd02L },
+ { 0x6e51fcf89cbbb142L,0x1d436e5aafc50692L,0x766bffff3fbcae22L,
+ 0x3148c2fdfd55d3b8L } },
+ /* 47 << 49 */
+ { { 0x52c7fdc9233222faL,0x89ff1092e419fb6bL,0x3cd6db9925254977L,
+ 0x2e85a1611cf12ca7L },
+ { 0xadd2547cdc810bc9L,0xea3f458f9d257c22L,0x642c1fbe27d6b19bL,
+ 0xed07e6b5140481a6L } },
+ /* 48 << 49 */
+ { { 0x6ada1d4286d2e0f8L,0xe59201220e8a9fd5L,0x02c936af708c1b49L,
+ 0x60f30fee2b4bfaffL },
+ { 0x6637ad06858e6a61L,0xce4c77673fd374d0L,0x39d54b2d7188defbL,
+ 0xa8c9d250f56a6b66L } },
+ /* 49 << 49 */
+ { { 0x58fc0f5eb24fe1dcL,0x9eaf9dee6b73f24cL,0xa90d588b33650705L,
+ 0xde5b62c5af2ec729L },
+ { 0x5c72cfaed3c2b36eL,0x868c19d5034435daL,0x88605f93e17ee145L,
+ 0xaa60c4ee77a5d5b1L } },
+ /* 50 << 49 */
+ { { 0xbcf5bfd23b60c472L,0xaf4ef13ceb1d3049L,0x373f44fce13895c9L,
+ 0xf29b382f0cbc9822L },
+ { 0x1bfcb85373efaef6L,0xcf56ac9ca8c96f40L,0xd7adf1097a191e24L,
+ 0x98035f44bf8a8dc2L } },
+ /* 51 << 49 */
+ { { 0xf40a71b91e750c84L,0xc57f7b0c5dc6c469L,0x49a0e79c6fbc19c1L,
+ 0x6b0f5889a48ebdb8L },
+ { 0x5d3fd084a07c4e9fL,0xc3830111ab27de14L,0x0e4929fe33e08dccL,
+ 0xf4a5ad2440bb73a3L } },
+ /* 52 << 49 */
+ { { 0xde86c2bf490f97caL,0x288f09c667a1ce18L,0x364bb8861844478dL,
+ 0x7840fa42ceedb040L },
+ { 0x1269fdd25a631b37L,0x94761f1ea47c8b7dL,0xfc0c2e17481c6266L,
+ 0x85e16ea23daa5fa7L } },
+ /* 53 << 49 */
+ { { 0xccd8603392491048L,0x0c2f6963f4d402d7L,0x6336f7dfdf6a865cL,
+ 0x0a2a463cb5c02a87L },
+ { 0xb0e29be7bf2f12eeL,0xf0a2200266bad988L,0x27f87e039123c1d7L,
+ 0x21669c55328a8c98L } },
+ /* 54 << 49 */
+ { { 0x186b980392f14529L,0xd3d056cc63954df3L,0x2f03fd58175a46f6L,
+ 0x63e34ebe11558558L },
+ { 0xe13fedee5b80cfa5L,0xe872a120d401dbd1L,0x52657616e8a9d667L,
+ 0xbc8da4b6e08d6693L } },
+ /* 55 << 49 */
+ { { 0x370fb9bb1b703e75L,0x6773b186d4338363L,0x18dad378ecef7bffL,
+ 0xaac787ed995677daL },
+ { 0x4801ea8b0437164bL,0xf430ad2073fe795eL,0xb164154d8ee5eb73L,
+ 0x0884ecd8108f7c0eL } },
+ /* 56 << 49 */
+ { { 0x0e6ec0965f520698L,0x640631fe44f7b8d9L,0x92fd34fca35a68b9L,
+ 0x9c5a4b664d40cf4eL },
+ { 0x949454bf80b6783dL,0x80e701fe3a320a10L,0x8d1a564a1a0a39b2L,
+ 0x1436d53d320587dbL } },
+ /* 57 << 49 */
+ { { 0xf5096e6d6556c362L,0xbc23a3c0e2455d7eL,0x3a7aee54807230f9L,
+ 0x9ba1cfa622ae82fdL },
+ { 0x833a057a99c5d706L,0x8be85f4b842315c9L,0xd083179a66a72f12L,
+ 0x2fc77d5dcdcc73cdL } },
+ /* 58 << 49 */
+ { { 0x22b88a805616ee30L,0xfb09548fe7ab1083L,0x8ad6ab0d511270cdL,
+ 0x61f6c57a6924d9abL },
+ { 0xa0f7bf7290aecb08L,0x849f87c90df784a4L,0x27c79c15cfaf1d03L,
+ 0xbbf9f675c463faceL } },
+ /* 59 << 49 */
+ { { 0x91502c65765ba543L,0x18ce3cac42ea60ddL,0xe5cee6ac6e43ecb3L,
+ 0x63e4e91068f2aeebL },
+ { 0x26234fa3c85932eeL,0x96883e8b4c90c44dL,0x29b9e738a18a50f6L,
+ 0xbfc62b2a3f0420dfL } },
+ /* 60 << 49 */
+ { { 0xd22a7d906d3e1fa9L,0x17115618fe05b8a3L,0x2a0c9926bb2b9c01L,
+ 0xc739fcc6e07e76a2L },
+ { 0x540e9157165e439aL,0x06353a626a9063d8L,0x84d9559461e927a3L,
+ 0x013b9b26e2e0be7fL } },
+ /* 61 << 49 */
+ { { 0x4feaec3b973497f1L,0x15c0f94e093ebc2dL,0x6af5f22733af0583L,
+ 0x0c2af206c61f3340L },
+ { 0xd25dbdf14457397cL,0x2e8ed017cabcbae0L,0xe3010938c2815306L,
+ 0xbaa99337e8c6cd68L } },
+ /* 62 << 49 */
+ { { 0x085131823b0ec7deL,0x1e1b822b58df05dfL,0x5c14842fa5c3b683L,
+ 0x98fe977e3eba34ceL },
+ { 0xfd2316c20d5e8873L,0xe48d839abd0d427dL,0x495b2218623fc961L,
+ 0x24ee56e7b46fba5eL } },
+ /* 63 << 49 */
+ { { 0x9184a55b91e4de58L,0xa7488ca5dfdea288L,0xa723862ea8dcc943L,
+ 0x92d762b2849dc0fcL },
+ { 0x3c444a12091ff4a9L,0x581113fa0cada274L,0xb9de0a4530d8eae2L,
+ 0x5e0fcd85df6b41eaL } },
+ /* 64 << 49 */
+ { { 0x6233ea68c094dbb5L,0xb77d062ed968d410L,0x3e719bbc58b3002dL,
+ 0x68e7dd3d3dc49d58L },
+ { 0x8d825740013a5e58L,0x213117473c9e3c1bL,0x0cb0a2a77c99b6abL,
+ 0x5c48a3b3c2f888f2L } },
+ /* 0 << 56 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 56 */
+ { { 0xc7913e91991724f3L,0x5eda799c39cbd686L,0xddb595c763d4fc1eL,
+ 0x6b63b80bac4fed54L },
+ { 0x6ea0fc697e5fb516L,0x737708bad0f1c964L,0x9628745f11a92ca5L,
+ 0x61f379589a86967aL } },
+ /* 2 << 56 */
+ { { 0x9af39b2caa665072L,0x78322fa4efd324efL,0x3d153394c327bd31L,
+ 0x81d5f2713129dab0L },
+ { 0xc72e0c42f48027f5L,0xaa40cdbc8536e717L,0xf45a657a2d369d0fL,
+ 0xb03bbfc4ea7f74e6L } },
+ /* 3 << 56 */
+ { { 0x46a8c4180d738dedL,0x6f1a5bb0e0de5729L,0xf10230b98ba81675L,
+ 0x32c6f30c112b33d4L },
+ { 0x7559129dd8fffb62L,0x6a281b47b459bf05L,0x77c1bd3afa3b6776L,
+ 0x0709b3807829973aL } },
+ /* 4 << 56 */
+ { { 0x8c26b232a3326505L,0x38d69272ee1d41bfL,0x0459453effe32afaL,
+ 0xce8143ad7cb3ea87L },
+ { 0x932ec1fa7e6ab666L,0x6cd2d23022286264L,0x459a46fe6736f8edL,
+ 0x50bf0d009eca85bbL } },
+ /* 5 << 56 */
+ { { 0x0b825852877a21ecL,0x300414a70f537a94L,0x3f1cba4021a9a6a2L,
+ 0x50824eee76943c00L },
+ { 0xa0dbfcecf83cba5dL,0xf953814893b4f3c0L,0x6174416248f24dd7L,
+ 0x5322d64de4fb09ddL } },
+ /* 6 << 56 */
+ { { 0x574473843d9325f3L,0xa9bef2d0f371cb84L,0x77d2188ba61e36c5L,
+ 0xbbd6a7d7c602df72L },
+ { 0xba3aa9028f61bc0bL,0xf49085ed6ed0b6a1L,0x8bc625d6ae6e8298L,
+ 0x832b0b1da2e9c01dL } },
+ /* 7 << 56 */
+ { { 0xa337c447f1f0ced1L,0x800cc7939492dd2bL,0x4b93151dbea08efaL,
+ 0x820cf3f8de0a741eL },
+ { 0xff1982dc1c0f7d13L,0xef92196084dde6caL,0x1ad7d97245f96ee3L,
+ 0x319c8dbe29dea0c7L } },
+ /* 8 << 56 */
+ { { 0xd3ea38717b82b99bL,0x75922d4d470eb624L,0x8f66ec543b95d466L,
+ 0x66e673ccbee1e346L },
+ { 0x6afe67c4b5f2b89aL,0x3de9c1e6290e5cd3L,0x8c278bb6310a2adaL,
+ 0x420fa3840bdb323bL } },
+ /* 9 << 56 */
+ { { 0x0ae1d63b0eb919b0L,0xd74ee51da74b9620L,0x395458d0a674290cL,
+ 0x324c930f4620a510L },
+ { 0x2d1f4d19fbac27d4L,0x4086e8ca9bedeeacL,0x0cdd211b9b679ab8L,
+ 0x5970167d7090fec4L } },
+ /* 10 << 56 */
+ { { 0x3420f2c9faf1fc63L,0x616d333a328c8bb4L,0x7d65364c57f1fe4aL,
+ 0x9343e87755e5c73aL },
+ { 0x5795176be970e78cL,0xa36ccebf60533627L,0xfc7c738009cdfc1bL,
+ 0xb39a2afeb3fec326L } },
+ /* 11 << 56 */
+ { { 0xb7ff1ba16224408aL,0xcc856e92247cfc5eL,0x01f102e7c18bc493L,
+ 0x4613ab742091c727L },
+ { 0xaa25e89cc420bf2bL,0x00a5317690337ec2L,0xd2be9f437d025fc7L,
+ 0x3316fb856e6fe3dcL } },
+ /* 12 << 56 */
+ { { 0x27520af59ac50814L,0xfdf95e789a8e4223L,0xb7e7df2a56bec5a0L,
+ 0xf7022f7ddf159e5dL },
+ { 0x93eeeab1cac1fe8fL,0x8040188c37451168L,0x7ee8aa8ad967dce6L,
+ 0xfa0e79e73abc9299L } },
+ /* 13 << 56 */
+ { { 0x67332cfc2064cfd1L,0x339c31deb0651934L,0x719b28d52a3bcbeaL,
+ 0xee74c82b9d6ae5c6L },
+ { 0x0927d05ebaf28ee6L,0x82cecf2c9d719028L,0x0b0d353eddb30289L,
+ 0xfe4bb977fddb2e29L } },
+ /* 14 << 56 */
+ { { 0xbb5bb990640bfd9eL,0xd226e27782f62108L,0x4bf0098502ffdd56L,
+ 0x7756758a2ca1b1b5L },
+ { 0xc32b62a35285fe91L,0xedbc546a8c9cd140L,0x1e47a013af5cb008L,
+ 0xbca7e720073ce8f2L } },
+ /* 15 << 56 */
+ { { 0xe10b2ab817a91caeL,0xb89aab6508e27f63L,0x7b3074a7dba3ddf9L,
+ 0x1c20ce09330c2972L },
+ { 0x6b9917b45fcf7e33L,0xe6793743945ceb42L,0x18fc22155c633d19L,
+ 0xad1adb3cc7485474L } },
+ /* 16 << 56 */
+ { { 0x646f96796424c49bL,0xf888dfe867c241c9L,0xe12d4b9324f68b49L,
+ 0x9a6b62d8a571df20L },
+ { 0x81b4b26d179483cbL,0x666f96329511fae2L,0xd281b3e4d53aa51fL,
+ 0x7f96a7657f3dbd16L } },
+ /* 17 << 56 */
+ { { 0xa7f8b5bf074a30ceL,0xd7f52107005a32e6L,0x6f9e090750237ed4L,
+ 0x2f21da478096fa2bL },
+ { 0xf3e19cb4eec863a0L,0xd18f77fd9527620aL,0x9505c81c407c1cf8L,
+ 0x9998db4e1b6ec284L } },
+ /* 18 << 56 */
+ { { 0x7e3389e5c247d44dL,0x125071413f4f3d80L,0xd4ba01104a78a6c7L,
+ 0x312874a0767720beL },
+ { 0xded059a675944370L,0xd6123d903b2c0bddL,0xa56b717b51c108e3L,
+ 0x9bb7940e070623e9L } },
+ /* 19 << 56 */
+ { { 0x794e2d5984ac066cL,0xf5954a92e68c69a0L,0x28c524584fd99dccL,
+ 0x60e639fcb1012517L },
+ { 0xc2e601257de79248L,0xe9ef6404f12fc6d7L,0x4c4f28082a3b5d32L,
+ 0x865ad32ec768eb8aL } },
+ /* 20 << 56 */
+ { { 0xac02331b13fb70b6L,0x037b44c195599b27L,0x1a860fc460bd082cL,
+ 0xa2e25745c980cd01L },
+ { 0xee3387a81da0263eL,0x931bfb952d10f3d6L,0x5b687270a1f24a32L,
+ 0xf140e65dca494b86L } },
+ /* 21 << 56 */
+ { { 0x4f4ddf91b2f1ac7aL,0xf99eaabb760fee27L,0x57f4008a49c228e5L,
+ 0x090be4401cf713bbL },
+ { 0xac91fbe45004f022L,0xd838c2c2569e1af6L,0xd6c7d20b0f1daaa5L,
+ 0xaa063ac11bbb02c0L } },
+ /* 22 << 56 */
+ { { 0x0938a42259558a78L,0x5343c6698435da2fL,0x96f67b18034410dcL,
+ 0x7cc1e42484510804L },
+ { 0x86a1543f16dfbb7dL,0x921fa9425b5bd592L,0x9dcccb6eb33dd03cL,
+ 0x8581ddd9b843f51eL } },
+ /* 23 << 56 */
+ { { 0x54935fcb81d73c9eL,0x6d07e9790a5e97abL,0x4dc7b30acf3a6babL,
+ 0x147ab1f3170bee11L },
+ { 0x0aaf8e3d9fafdee4L,0xfab3dbcb538a8b95L,0x405df4b36ef13871L,
+ 0xf1f4e9cb088d5a49L } },
+ /* 24 << 56 */
+ { { 0x9bcd24d366b33f1dL,0x3b97b8205ce445c0L,0xe2926549ba93ff61L,
+ 0xd9c341ce4dafe616L },
+ { 0xfb30a76e16efb6f3L,0xdf24b8ca605b953cL,0x8bd52afec2fffb9fL,
+ 0xbbac5ff7e19d0b96L } },
+ /* 25 << 56 */
+ { { 0x43c01b87459afccdL,0x6bd45143b7432652L,0x8473453055b5d78eL,
+ 0x81088fdb1554ba7dL },
+ { 0xada0a52c1e269375L,0xf9f037c42dc5ec10L,0xc066060794bfbc11L,
+ 0xc0a630bbc9c40d2fL } },
+ /* 26 << 56 */
+ { { 0x5efc797eab64c31eL,0xffdb1dab74507144L,0xf61242871ca6790cL,
+ 0xe9609d81e69bf1bfL },
+ { 0xdb89859500d24fc9L,0x9c750333e51fb417L,0x51830a91fef7bbdeL,
+ 0x0ce67dc8945f585cL } },
+ /* 27 << 56 */
+ { { 0x9a730ed44763eb50L,0x24a0e221c1ab0d66L,0x643b6393648748f3L,
+ 0x1982daa16d3c6291L },
+ { 0x6f00a9f78bbc5549L,0x7a1783e17f36384eL,0xe8346323de977f50L,
+ 0x91ab688db245502aL } },
+ /* 28 << 56 */
+ { { 0x331ab6b56d0bdd66L,0x0a6ef32e64b71229L,0x1028150efe7c352fL,
+ 0x27e04350ce7b39d3L },
+ { 0x2a3c8acdc1070c82L,0xfb2034d380c9feefL,0x2d729621709f3729L,
+ 0x8df290bf62cb4549L } },
+ /* 29 << 56 */
+ { { 0x02f99f33fc2e4326L,0x3b30076d5eddf032L,0xbb21f8cf0c652fb5L,
+ 0x314fb49eed91cf7bL },
+ { 0xa013eca52f700750L,0x2b9e3c23712a4575L,0xe5355557af30fbb0L,
+ 0x1ada35167c77e771L } },
+ /* 30 << 56 */
+ { { 0x45f6ecb27b135670L,0xe85d19df7cfc202eL,0x0f1b50c758d1be9fL,
+ 0x5ebf2c0aead2e344L },
+ { 0x1531fe4eabc199c9L,0xc703259256bab0aeL,0x16ab2e486c1fec54L,
+ 0x0f87fda804280188L } },
+ /* 31 << 56 */
+ { { 0xdc9f46fc609e4a74L,0x2a44a143ba667f91L,0xbc3d8b95b4d83436L,
+ 0xa01e4bd0c7bd2958L },
+ { 0x7b18293273483c90L,0xa79c6aa1a7c7b598L,0xbf3983c6eaaac07eL,
+ 0x8f18181e96e0d4e6L } },
+ /* 32 << 56 */
+ { { 0x8553d37c051af62bL,0xe9a998eb0bf94496L,0xe0844f9fb0d59aa1L,
+ 0x983fd558e6afb813L },
+ { 0x9670c0ca65d69804L,0x732b22de6ea5ff2dL,0xd7640ba95fd8623bL,
+ 0x9f619163a6351782L } },
+ /* 33 << 56 */
+ { { 0x0bfc27eeacee5043L,0xae419e732eb10f02L,0x19c028d18943fb05L,
+ 0x71f01cf7ff13aa2aL },
+ { 0x7790737e8887a132L,0x6751330966318410L,0x9819e8a37ddb795eL,
+ 0xfecb8ef5dad100b2L } },
+ /* 34 << 56 */
+ { { 0x59f74a223021926aL,0xb7c28a496f9b4c1cL,0xed1a733f912ad0abL,
+ 0x42a910af01a5659cL },
+ { 0x3842c6e07bd68cabL,0x2b57fa3876d70ac8L,0x8a6707a83c53aaebL,
+ 0x62c1c51065b4db18L } },
+ /* 35 << 56 */
+ { { 0x8de2c1fbb2d09dc7L,0xc3dfed12266bd23bL,0x927d039bd5b27db6L,
+ 0x2fb2f0f1103243daL },
+ { 0xf855a07b80be7399L,0xed9327ce1f9f27a8L,0xa0bd99c7729bdef7L,
+ 0x2b67125e28250d88L } },
+ /* 36 << 56 */
+ { { 0x784b26e88670ced7L,0xe3dfe41fc31bd3b4L,0x9e353a06bcc85cbcL,
+ 0x302e290960178a9dL },
+ { 0x860abf11a6eac16eL,0x76447000aa2b3aacL,0x46ff9d19850afdabL,
+ 0x35bdd6a5fdb2d4c1L } },
+ /* 37 << 56 */
+ { { 0xe82594b07e5c9ce9L,0x0f379e5320af346eL,0x608b31e3bc65ad4aL,
+ 0x710c6b12267c4826L },
+ { 0x51c966f971954cf1L,0xb1cec7930d0aa215L,0x1f15598986bd23a8L,
+ 0xae2ff99cf9452e86L } },
+ /* 38 << 56 */
+ { { 0xd8dd953c340ceaa2L,0x263552752e2e9333L,0x15d4e5f98586f06dL,
+ 0xd6bf94a8f7cab546L },
+ { 0x33c59a0ab76a9af0L,0x52740ab3ba095af7L,0xc444de8a24389ca0L,
+ 0xcc6f9863706da0cbL } },
+ /* 39 << 56 */
+ { { 0xb5a741a76b2515cfL,0x71c416019585c749L,0x78350d4fe683de97L,
+ 0x31d6152463d0b5f5L },
+ { 0x7a0cc5e1fbce090bL,0xaac927edfbcb2a5bL,0xe920de4920d84c35L,
+ 0x8c06a0b622b4de26L } },
+ /* 40 << 56 */
+ { { 0xd34dd58bafe7ddf3L,0x55851fedc1e6e55bL,0xd1395616960696e7L,
+ 0x940304b25f22705fL },
+ { 0x6f43f861b0a2a860L,0xcf1212820e7cc981L,0x121862120ab64a96L,
+ 0x09215b9ab789383cL } },
+ /* 41 << 56 */
+ { { 0x311eb30537387c09L,0xc5832fcef03ee760L,0x30358f5832f7ea19L,
+ 0xe01d3c3491d53551L },
+ { 0x1ca5ee41da48ea80L,0x34e71e8ecf4fa4c1L,0x312abd257af1e1c7L,
+ 0xe3afcdeb2153f4a5L } },
+ /* 42 << 56 */
+ { { 0x9d5c84d700235e9aL,0x0308d3f48c4c836fL,0xc0a66b0489332de5L,
+ 0x610dd39989e566efL },
+ { 0xf8eea460d1ac1635L,0x84cbb3fb20a2c0dfL,0x40afb488e74a48c5L,
+ 0x29738198d326b150L } },
+ /* 43 << 56 */
+ { { 0x2a17747fa6d74081L,0x60ea4c0555a26214L,0x53514bb41f88c5feL,
+ 0xedd645677e83426cL },
+ { 0xd5d6cbec96460b25L,0xa12fd0ce68dc115eL,0xc5bc3ed2697840eaL,
+ 0x969876a8a6331e31L } },
+ /* 44 << 56 */
+ { { 0x60c36217472ff580L,0xf42297054ad41393L,0x4bd99ef0a03b8b92L,
+ 0x501c7317c144f4f6L },
+ { 0x159009b318464945L,0x6d5e594c74c5c6beL,0x2d587011321a3660L,
+ 0xd1e184b13898d022L } },
+ /* 45 << 56 */
+ { { 0x5ba047524c6a7e04L,0x47fa1e2b45550b65L,0x9419daf048c0a9a5L,
+ 0x663629537c243236L },
+ { 0xcd0744b15cb12a88L,0x561b6f9a2b646188L,0x599415a566c2c0c0L,
+ 0xbe3f08590f83f09aL } },
+ /* 46 << 56 */
+ { { 0x9141c5beb92041b8L,0x01ae38c726477d0dL,0xca8b71f3d12c7a94L,
+ 0xfab5b31f765c70dbL },
+ { 0x76ae7492487443e9L,0x8595a310990d1349L,0xf8dbeda87d460a37L,
+ 0x7f7ad0821e45a38fL } },
+ /* 47 << 56 */
+ { { 0xed1d4db61059705aL,0xa3dd492ae6b9c697L,0x4b92ee3a6eb38bd5L,
+ 0xbab2609d67cc0bb7L },
+ { 0x7fc4fe896e70ee82L,0xeff2c56e13e6b7e3L,0x9b18959e34d26fcaL,
+ 0x2517ab66889d6b45L } },
+ /* 48 << 56 */
+ { { 0xf167b4e0bdefdd4fL,0x69958465f366e401L,0x5aa368aba73bbec0L,
+ 0x121487097b240c21L },
+ { 0x378c323318969006L,0xcb4d73cee1fe53d1L,0x5f50a80e130c4361L,
+ 0xd67f59517ef5212bL } },
+ /* 49 << 56 */
+ { { 0xf145e21e9e70c72eL,0xb2e52e295566d2fbL,0x44eaba4a032397f5L,
+ 0x5e56937b7e31a7deL },
+ { 0x68dcf517456c61e1L,0xbc2e954aa8b0a388L,0xe3552fa760a8b755L,
+ 0x03442dae73ad0cdeL } },
+ /* 50 << 56 */
+ { { 0x37ffe747ceb26210L,0x983545e8787baef9L,0x8b8c853586a3de31L,
+ 0xc621dbcbfacd46dbL },
+ { 0x82e442e959266fbbL,0xa3514c37339d471cL,0x3a11b77162cdad96L,
+ 0xf0cb3b3cecf9bdf0L } },
+ /* 51 << 56 */
+ { { 0x3fcbdbce478e2135L,0x7547b5cfbda35342L,0xa97e81f18a677af6L,
+ 0xc8c2bf8328817987L },
+ { 0xdf07eaaf45580985L,0xc68d1f05c93b45cbL,0x106aa2fec77b4cacL,
+ 0x4c1d8afc04a7ae86L } },
+ /* 52 << 56 */
+ { { 0xdb41c3fd9eb45ab2L,0x5b234b5bd4b22e74L,0xda253decf215958aL,
+ 0x67e0606ea04edfa0L },
+ { 0xabbbf070ef751b11L,0xf352f175f6f06dceL,0xdfc4b6af6839f6b4L,
+ 0x53ddf9a89959848eL } },
+ /* 53 << 56 */
+ { { 0xda49c379c21520b0L,0x90864ff0dbd5d1b6L,0x2f055d235f49c7f7L,
+ 0xe51e4e6aa796b2d8L },
+ { 0xc361a67f5c9dc340L,0x5ad53c37bca7c620L,0xda1d658832c756d0L,
+ 0xad60d9118bb67e13L } },
+ /* 54 << 56 */
+ { { 0xd6c47bdf0eeec8c6L,0x4a27fec1078a1821L,0x081f7415c3099524L,
+ 0x8effdf0b82cd8060L },
+ { 0xdb70ec1c65842df8L,0x8821b358d319a901L,0x72ee56eede42b529L,
+ 0x5bb39592236e4286L } },
+ /* 55 << 56 */
+ { { 0xd1183316fd6f7140L,0xf9fadb5bbd8e81f7L,0x701d5e0c5a02d962L,
+ 0xfdee4dbf1b601324L },
+ { 0xbed1740735d7620eL,0x04e3c2c3f48c0012L,0x9ee29da73455449aL,
+ 0x562cdef491a836c4L } },
+ /* 56 << 56 */
+ { { 0x8f682a5f47701097L,0x617125d8ff88d0c2L,0x948fda2457bb86ddL,
+ 0x348abb8f289f7286L },
+ { 0xeb10eab599d94bbdL,0xd51ba28e4684d160L,0xabe0e51c30c8f41aL,
+ 0x66588b4513254f4aL } },
+ /* 57 << 56 */
+ { { 0x147ebf01fad097a5L,0x49883ea8610e815dL,0xe44d60ba8a11de56L,
+ 0xa970de6e827a7a6dL },
+ { 0x2be414245e17fc19L,0xd833c65701214057L,0x1375813b363e723fL,
+ 0x6820bb88e6a52e9bL } },
+ /* 58 << 56 */
+ { { 0x7e7f6970d875d56aL,0xd6a0a9ac51fbf6bfL,0x54ba8790a3083c12L,
+ 0xebaeb23d6ae7eb64L },
+ { 0xa8685c3ab99a907aL,0xf1e74550026bf40bL,0x7b73a027c802cd9eL,
+ 0x9a8a927c4fef4635L } },
+ /* 59 << 56 */
+ { { 0xe1b6f60c08191224L,0xc4126ebbde4ec091L,0xe1dff4dc4ae38d84L,
+ 0xde3f57db4f2ef985L },
+ { 0x34964337d446a1ddL,0x7bf217a0859e77f6L,0x8ff105278e1d13f5L,
+ 0xa304ef0374eeae27L } },
+ /* 60 << 56 */
+ { { 0xfc6f5e47d19dfa5aL,0xdb007de37fad982bL,0x28205ad1613715f5L,
+ 0x251e67297889529eL },
+ { 0x727051841ae98e78L,0xf818537d271cac32L,0xc8a15b7eb7f410f5L,
+ 0xc474356f81f62393L } },
+ /* 61 << 56 */
+ { { 0x92dbdc5ac242316bL,0xabe060acdbf4aff5L,0x6e8c38fe909a8ec6L,
+ 0x43e514e56116cb94L },
+ { 0x2078fa3807d784f9L,0x1161a880f4b5b357L,0x5283ce7913adea3dL,
+ 0x0756c3e6cc6a910bL } },
+ /* 62 << 56 */
+ { { 0x60bcfe01aaa79697L,0x04a73b2956391db1L,0xdd8dad47189b45a0L,
+ 0xbfac0dd048d5b8d9L },
+ { 0x34ab3af57d3d2ec2L,0x6fa2fc2d207bd3afL,0x9ff4009266550dedL,
+ 0x719b3e871fd5b913L } },
+ /* 63 << 56 */
+ { { 0xa573a4966d17fbc7L,0x0cd1a70a73d2b24eL,0x34e2c5cab2676937L,
+ 0xe7050b06bf669f21L },
+ { 0xfbe948b61ede9046L,0xa053005197662659L,0x58cbd4edf10124c5L,
+ 0xde2646e4dd6c06c8L } },
+ /* 64 << 56 */
+ { { 0x332f81088cad38c0L,0x471b7e906bd68ae2L,0x56ac3fb20d8e27a3L,
+ 0xb54660db136b4b0dL },
+ { 0x123a1e11a6fd8de4L,0x44dbffeaa37799efL,0x4540b977ce6ac17cL,
+ 0x495173a8af60acefL } },
+ /* 0 << 63 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 63 */
+ { { 0x9ebb284d391c2a82L,0xbcdd4863158308e8L,0x006f16ec83f1edcaL,
+ 0xa13e2c37695dc6c8L },
+ { 0x2ab756f04a057a87L,0xa8765500a6b48f98L,0x4252face68651c44L,
+ 0xa52b540be1765e02L } },
+ /* 2 << 63 */
+ { { 0x4f922fc516a0d2bbL,0x0d5cc16c1a623499L,0x9241cf3a57c62c8bL,
+ 0x2f5e6961fd1b667fL },
+ { 0x5c15c70bf5a01797L,0x3d20b44d60956192L,0x04911b37071fdb52L,
+ 0xf648f9168d6f0f7bL } },
+ /* 3 << 63 */
+ { { 0x6dc1acafe60b7cf7L,0x25860a5084a9d869L,0x56fc6f09e7ba8ac4L,
+ 0x828c5bd06148d29eL },
+ { 0xac6b435edc55ae5fL,0xa527f56cc0117411L,0x94d5045efd24342cL,
+ 0x2c4c0a3570b67c0dL } },
+ /* 4 << 63 */
+ { { 0x027cc8b8fac61d9aL,0x7d25e062e3c6fe8aL,0xe08805bfe5bff503L,
+ 0x13271e6c6ff632f7L },
+ { 0x55dca6c0232f76a5L,0x8957c32d701ef426L,0xee728bcba10a5178L,
+ 0x5ea60411b62c5173L } },
+ /* 5 << 63 */
+ { { 0xfc4e964ed0b8892bL,0x9ea176839301bb74L,0x6265c5aefcc48626L,
+ 0xe60cf82ebb3e9102L },
+ { 0x57adf797d4df5531L,0x235b59a18deeefe2L,0x60adcf583f306eb1L,
+ 0x105c27533d09492dL } },
+ /* 6 << 63 */
+ { { 0x4090914bb5def996L,0x1cb69c83233dd1e7L,0xc1e9c1d39b3d5e76L,
+ 0x1f3338edfccf6012L },
+ { 0xb1e95d0d2f5378a8L,0xacf4c2c72f00cd21L,0x6e984240eb5fe290L,
+ 0xd66c038d248088aeL } },
+ /* 7 << 63 */
+ { { 0x804d264af94d70cfL,0xbdb802ef7314bf7eL,0x8fb54de24333ed02L,
+ 0x740461e0285635d9L },
+ { 0x4113b2c8365e9383L,0xea762c833fdef652L,0x4eec6e2e47b956c1L,
+ 0xa3d814be65620fa4L } },
+ /* 8 << 63 */
+ { { 0x9ad5462bb4d8bc50L,0x181c0b16a9195770L,0xebd4fe1c78412a68L,
+ 0xae0341bcc0dff48cL },
+ { 0xb6bc45cf7003e866L,0xf11a6dea8a24a41bL,0x5407151ad04c24c2L,
+ 0x62c9d27dda5b7b68L } },
+ /* 9 << 63 */
+ { { 0x2e96423588cceff6L,0x8594c54f8b07ed69L,0x1578e73cc84d0d0dL,
+ 0x7b4e1055ff532868L },
+ { 0xa348c0d5b5ec995aL,0xbf4b9d5514289a54L,0x9ba155a658fbd777L,
+ 0x186ed7a81a84491dL } },
+ /* 10 << 63 */
+ { { 0xd4992b30614c0900L,0xda98d121bd00c24bL,0x7f534dc87ec4bfa1L,
+ 0x4a5ff67437dc34bcL },
+ { 0x68c196b81d7ea1d7L,0x38cf289380a6d208L,0xfd56cd09e3cbbd6eL,
+ 0xec72e27e4205a5b6L } },
+ /* 11 << 63 */
+ { { 0x15ea68f5a44f77f7L,0x7aa5f9fdb43c52bcL,0x86ff676f94f0e609L,
+ 0xa4cde9632e2d432bL },
+ { 0x8cafa0c0eee470afL,0x84137d0e8a3f5ec8L,0xebb40411faa31231L,
+ 0xa239c13f6f7f7ccfL } },
+ /* 12 << 63 */
+ { { 0x32865719a8afd30bL,0x867983288a826dceL,0xdf04e891c4a8fbe0L,
+ 0xbb6b6e1bebf56ad3L },
+ { 0x0a695b11471f1ff0L,0xd76c3389be15baf0L,0x018edb95be96c43eL,
+ 0xf2beaaf490794158L } },
+ /* 13 << 63 */
+ { { 0x152db09ec3076a27L,0x5e82908ee416545dL,0xa2c41272356d6f2eL,
+ 0xdc9c964231fd74e1L },
+ { 0x66ceb88d519bf615L,0xe29ecd7605a2274eL,0x3a0473c4bf5e2fa0L,
+ 0x6b6eb67164284e67L } },
+ /* 14 << 63 */
+ { { 0xe8b97932b88756ddL,0xed4e8652f17e3e61L,0xc2dd14993ee1c4a4L,
+ 0xc0aaee17597f8c0eL },
+ { 0x15c4edb96c168af3L,0x6563c7bfb39ae875L,0xadfadb6f20adb436L,
+ 0xad55e8c99a042ac0L } },
+ /* 15 << 63 */
+ { { 0x975a1ed8b76da1f5L,0x10dfa466a58acb94L,0x8dd7f7e3ac060282L,
+ 0x6813e66a572a051eL },
+ { 0xb4ccae1e350cb901L,0xb653d65650cb7822L,0x42484710dfab3b87L,
+ 0xcd7ee5379b670fd0L } },
+ /* 16 << 63 */
+ { { 0x0a50b12e523b8bf6L,0x8009eb5b8f910c1bL,0xf535af824a167588L,
+ 0x0f835f9cfb2a2abdL },
+ { 0xf59b29312afceb62L,0xc797df2a169d383fL,0xeb3f5fb066ac02b0L,
+ 0x029d4c6fdaa2d0caL } },
+ /* 17 << 63 */
+ { { 0xd4059bc1afab4bc5L,0x833f5c6f56783247L,0xb53466308d2d3605L,
+ 0x83387891d34d8433L },
+ { 0xd973b30fadd9419aL,0xbcca1099afe3fce8L,0x081783150809aac6L,
+ 0x01b7f21a540f0f11L } },
+ /* 18 << 63 */
+ { { 0x65c29219909523c8L,0xa62f648fa3a1c741L,0x88598d4f60c9e55aL,
+ 0xbce9141b0e4f347aL },
+ { 0x9af97d8435f9b988L,0x0210da62320475b6L,0x3c076e229191476cL,
+ 0x7520dbd944fc7834L } },
+ /* 19 << 63 */
+ { { 0x6a6b2cfec1ab1bbdL,0xef8a65bedc650938L,0x72855540805d7bc4L,
+ 0xda389396ed11fdfdL },
+ { 0xa9d5bd3674660876L,0x11d67c54b45dff35L,0x6af7d148a4f5da94L,
+ 0xbb8d4c3fc0bbeb31L } },
+ /* 20 << 63 */
+ { { 0x87a7ebd1e0a1b12aL,0x1e4ef88d770ba95fL,0x8c33345cdc2ae9cbL,
+ 0xcecf127601cc8403L },
+ { 0x687c012e1b39b80fL,0xfd90d0ad35c33ba4L,0xa3ef5a675c9661c2L,
+ 0x368fc88ee017429eL } },
+ /* 21 << 63 */
+ { { 0xd30c6761196a2fa2L,0x931b9817bd5b312eL,0xba01000c72f54a31L,
+ 0xa203d2c866eaa541L },
+ { 0xf2abdee098939db3L,0xe37d6c2c3e606c02L,0xf2921574521ff643L,
+ 0x2781b3c4d7e2fca3L } },
+ /* 22 << 63 */
+ { { 0x664300b07850ec06L,0xac5a38b97d3a10cfL,0x9233188de34ab39dL,
+ 0xe77057e45072cbb9L },
+ { 0xbcf0c042b59e78dfL,0x4cfc91e81d97de52L,0x4661a26c3ee0ca4aL,
+ 0x5620a4c1fb8507bcL } },
+ /* 23 << 63 */
+ { { 0x4b44d4aa049f842cL,0xceabc5d51540e82bL,0x306710fd15c6f156L,
+ 0xbe5ae52b63db1d72L },
+ { 0x06f1e7e6334957f1L,0x57e388f031144a70L,0xfb69bb2fdf96447bL,
+ 0x0f78ebd373e38a12L } },
+ /* 24 << 63 */
+ { { 0xb82226052b7ce542L,0xe6d4ce997472bde1L,0x53e16ebe09d2f4daL,
+ 0x180ff42e53b92b2eL },
+ { 0xc59bcc022c34a1c6L,0x3803d6f9422c46c2L,0x18aff74f5c14a8a2L,
+ 0x55aebf8010a08b28L } },
+ /* 25 << 63 */
+ { { 0x66097d587135593fL,0x32e6eff72be570cdL,0x584e6a102a8c860dL,
+ 0xcd185890a2eb4163L },
+ { 0x7ceae99d6d97e134L,0xd42c6b70dd8447ceL,0x59ddbb4ab8c50273L,
+ 0x03c612df3cf34e1eL } },
+ /* 26 << 63 */
+ { { 0x84b9ca1504b6c5a0L,0x35216f3918f0e3a3L,0x3ec2d2bcbd986c00L,
+ 0x8bf546d9d19228feL },
+ { 0xd1c655a44cd623c3L,0x366ce718502b8e5aL,0x2cfc84b4eea0bfe7L,
+ 0xe01d5ceecf443e8eL } },
+ /* 27 << 63 */
+ { { 0x8ec045d9036520f8L,0xdfb3c3d192d40e98L,0x0bac4ccecc559a04L,
+ 0x35eccae5240ea6b1L },
+ { 0x180b32dbf8a5a0acL,0x547972a5eb699700L,0xa3765801ca26bca0L,
+ 0x57e09d0ea647f25aL } },
+ /* 28 << 63 */
+ { { 0xb956970e2fdd23ccL,0xb80288bc5682e971L,0xe6e6d91e9ae86ebcL,
+ 0x0564c83f8c9f1939L },
+ { 0x551932a239560368L,0xe893752b049c28e2L,0x0b03cee5a6a158c3L,
+ 0xe12d656b04964263L } },
+ /* 29 << 63 */
+ { { 0x4b47554e63e3bc1dL,0xc719b6a245044ff7L,0x4f24d30ae48daa07L,
+ 0xa3f37556c8c1edc3L },
+ { 0x9a47bf760700d360L,0xbb1a1824822ae4e2L,0x22e275a389f1fb4cL,
+ 0x72b1aa239968c5f5L } },
+ /* 30 << 63 */
+ { { 0xa75feacabe063f64L,0x9b392f43bce47a09L,0xd42415091ad07acaL,
+ 0x4b0c591b8d26cd0fL },
+ { 0x2d42ddfd92f1169aL,0x63aeb1ac4cbf2392L,0x1de9e8770691a2afL,
+ 0xebe79af7d98021daL } },
+ /* 31 << 63 */
+ { { 0xcfdf2a4e40e50acfL,0xf0a98ad7af01d665L,0xefb640bf1831be1fL,
+ 0x6fe8bd2f80e9ada0L },
+ { 0x94c103a16cafbc91L,0x170f87598308e08cL,0x5de2d2ab9780ff4fL,
+ 0x666466bc45b201f2L } },
+ /* 32 << 63 */
+ { { 0x58af2010f5b343bcL,0x0f2e400af2f142feL,0x3483bfdea85f4bdfL,
+ 0xf0b1d09303bfeaa9L },
+ { 0x2ea01b95c7081603L,0xe943e4c93dba1097L,0x47be92adb438f3a6L,
+ 0x00bb7742e5bf6636L } },
+ /* 33 << 63 */
+ { { 0x136b7083824297b4L,0x9d0e55805584455fL,0xab48cedcf1c7d69eL,
+ 0x53a9e4812a256e76L },
+ { 0x0402b0e065eb2413L,0xdadbbb848fc407a7L,0xa65cd5a48d7f5492L,
+ 0x21d4429374bae294L } },
+ /* 34 << 63 */
+ { { 0x66917ce63b5f1cc4L,0x37ae52eace872e62L,0xbb087b722905f244L,
+ 0x120770861e6af74fL },
+ { 0x4b644e491058edeaL,0x827510e3b638ca1dL,0x8cf2b7046038591cL,
+ 0xffc8b47afe635063L } },
+ /* 35 << 63 */
+ { { 0x3ae220e61b4d5e63L,0xbd8647429d961b4bL,0x610c107e9bd16bedL,
+ 0x4270352a1127147bL },
+ { 0x7d17ffe664cfc50eL,0x50dee01a1e36cb42L,0x068a762235dc5f9aL,
+ 0x9a08d536df53f62cL } },
+ /* 36 << 63 */
+ { { 0x4ed714576be5f7deL,0xd93006f8c2263c9eL,0xe073694ccacacb36L,
+ 0x2ff7a5b43ae118abL },
+ { 0x3cce53f1cd871236L,0xf156a39dc2aa6d52L,0x9cc5f271b198d76dL,
+ 0xbc615b6f81383d39L } },
+ /* 37 << 63 */
+ { { 0xa54538e8de3eee6bL,0x58c77538ab910d91L,0x31e5bdbc58d278bdL,
+ 0x3cde4adfb963acaeL },
+ { 0xb1881fd25302169cL,0x8ca60fa0a989ed8bL,0xa1999458ff96a0eeL,
+ 0xc1141f03ac6c283dL } },
+ /* 38 << 63 */
+ { { 0x7677408d6dfafed3L,0x33a0165339661588L,0x3c9c15ec0b726fa0L,
+ 0x090cfd936c9b56daL },
+ { 0xe34f4baea3c40af5L,0x3469eadbd21129f1L,0xcc51674a1e207ce8L,
+ 0x1e293b24c83b1ef9L } },
+ /* 39 << 63 */
+ { { 0x17173d131e6c0bb4L,0x1900469590776d35L,0xe7980e346de6f922L,
+ 0x873554cbf4dd9a22L },
+ { 0x0316c627cbf18a51L,0x4d93651b3032c081L,0x207f27713946834dL,
+ 0x2c08d7b430cdbf80L } },
+ /* 40 << 63 */
+ { { 0x137a4fb486df2a61L,0xa1ed9c07ecf7b4a2L,0xb2e460e27bd042ffL,
+ 0xb7f5e2fa5f62f5ecL },
+ { 0x7aa6ec6bcc2423b7L,0x75ce0a7fba63eea7L,0x67a45fb1f250a6e1L,
+ 0x93bc919ce53cdc9fL } },
+ /* 41 << 63 */
+ { { 0x9271f56f871942dfL,0x2372ff6f7859ad66L,0x5f4c2b9633cb1a78L,
+ 0xe3e291015838aa83L },
+ { 0xa7ed1611e4e8110cL,0x2a2d70d5330198ceL,0xbdf132e86720efe0L,
+ 0xe61a896266a471bfL } },
+ /* 42 << 63 */
+ { { 0x796d3a85825808bdL,0x51dc3cb73fd6e902L,0x643c768a916219d1L,
+ 0x36cd7685a2ad7d32L },
+ { 0xe3db9d05b22922a4L,0x6494c87edba29660L,0xf0ac91dfbcd2ebc7L,
+ 0x4deb57a045107f8dL } },
+ /* 43 << 63 */
+ { { 0x42271f59c3d12a73L,0x5f71687ca5c2c51dL,0xcb1f50c605797bcbL,
+ 0x29ed0ed9d6d34eb0L },
+ { 0xe5fe5b474683c2ebL,0x4956eeb597447c46L,0x5b163a4371207167L,
+ 0x93fa2fed0248c5efL } },
+ /* 44 << 63 */
+ { { 0x67930af231f63950L,0xa77797c114caa2c9L,0x526e80ee27ac7e62L,
+ 0xe1e6e62658b28aecL },
+ { 0x636178b0b3c9fef0L,0xaf7752e06d5f90beL,0x94ecaf18eece51cfL,
+ 0x2864d0edca806e1fL } },
+ /* 45 << 63 */
+ { { 0x6de2e38397c69134L,0x5a42c316eb291293L,0xc77792196a60bae0L,
+ 0xa24de3466b7599d1L },
+ { 0x49d374aab75d4941L,0x989005862d501ff0L,0x9f16d40eeb7974cfL,
+ 0x1033860bcdd8c115L } },
+ /* 46 << 63 */
+ { { 0xb6c69ac82094cec3L,0x9976fb88403b770cL,0x1dea026c4859590dL,
+ 0xb6acbb468562d1fdL },
+ { 0x7cd6c46144569d85L,0xc3190a3697f0891dL,0xc6f5319548d5a17dL,
+ 0x7d919966d749abc8L } },
+ /* 47 << 63 */
+ { { 0x65104837dd1c8a20L,0x7e5410c82f683419L,0x958c3ca8be94022eL,
+ 0x605c31976145dac2L },
+ { 0x3fc0750101683d54L,0x1d7127c5595b1234L,0x10b8f87c9481277fL,
+ 0x677db2a8e65a1adbL } },
+ /* 48 << 63 */
+ { { 0xec2fccaaddce3345L,0x2a6811b7012a4350L,0x96760ff1ac598bdcL,
+ 0x054d652ad1bf4128L },
+ { 0x0a1151d492a21005L,0xad7f397133110fdfL,0x8c95928c1960100fL,
+ 0x6c91c8257bf03362L } },
+ /* 49 << 63 */
+ { { 0xc8c8b2a2ce309f06L,0xfdb27b59ca27204bL,0xd223eaa50848e32eL,
+ 0xb93e4b2ee7bfaf1eL },
+ { 0xc5308ae644aa3dedL,0x317a666ac015d573L,0xc888ce231a979707L,
+ 0xf141c1e60d5c4958L } },
+ /* 50 << 63 */
+ { { 0xb53b7de561906373L,0x858dbadeeb999595L,0x8cbb47b2a59e5c36L,
+ 0x660318b3dcf4e842L },
+ { 0xbd161ccd12ba4b7aL,0xf399daabf8c8282aL,0x1587633aeeb2130dL,
+ 0xa465311ada38dd7dL } },
+ /* 51 << 63 */
+ { { 0x5f75eec864d3779bL,0x3c5d0476ad64c171L,0x874103712a914428L,
+ 0x8096a89190e2fc29L },
+ { 0xd3d2ae9d23b3ebc2L,0x90bdd6dba580cfd6L,0x52dbb7f3c5b01f6cL,
+ 0xe68eded4e102a2dcL } },
+ /* 52 << 63 */
+ { { 0x17785b7799eb6df0L,0x26c3cc517386b779L,0x345ed9886417a48eL,
+ 0xe990b4e407d6ef31L },
+ { 0x0f456b7e2586abbaL,0x239ca6a559c96e9aL,0xe327459ce2eb4206L,
+ 0x3a4c3313a002b90aL } },
+ /* 53 << 63 */
+ { { 0x2a114806f6a3f6fbL,0xad5cad2f85c251ddL,0x92c1f613f5a784d3L,
+ 0xec7bfacf349766d5L },
+ { 0x04b3cd333e23cb3bL,0x3979fe84c5a64b2dL,0x192e27207e589106L,
+ 0xa60c43d1a15b527fL } },
+ /* 54 << 63 */
+ { { 0x2dae9082be7cf3a6L,0xcc86ba92bc967274L,0xf28a2ce8aea0a8a9L,
+ 0x404ca6d96ee988b3L },
+ { 0xfd7e9c5d005921b8L,0xf56297f144e79bf9L,0xa163b4600d75ddc2L,
+ 0x30b23616a1f2be87L } },
+ /* 55 << 63 */
+ { { 0x4b070d21bfe50e2bL,0x7ef8cfd0e1bfede1L,0xadba00112aac4ae0L,
+ 0x2a3e7d01b9ebd033L },
+ { 0x995277ece38d9d1cL,0xb500249e9c5d2de3L,0x8912b820f13ca8c9L,
+ 0xc8798114877793afL } },
+ /* 56 << 63 */
+ { { 0x19e6125dec3f1decL,0x07b1f040911178daL,0xd93ededa904a6738L,
+ 0x55187a5a0bebedcdL },
+ { 0xf7d04722eb329d41L,0xf449099ef170b391L,0xfd317a69ca99f828L,
+ 0x50c3db2b34a4976dL } },
+ /* 57 << 63 */
+ { { 0xe9ba77843757b392L,0x326caefdaa3ca05aL,0x78e5293bf1e593d4L,
+ 0x7842a9370d98fd13L },
+ { 0xe694bf965f96b10dL,0x373a9df606a8cd05L,0x997d1e51e8f0c7fcL,
+ 0x1d01979063fd972eL } },
+ /* 58 << 63 */
+ { { 0x0064d8585499fb32L,0x7b67bad977a8aeb7L,0x1d3eb9772d08eec5L,
+ 0x5fc047a6cbabae1dL },
+ { 0x0577d159e54a64bbL,0x8862201bc43497e4L,0xad6b4e282ce0608dL,
+ 0x8b687b7d0b167aacL } },
+ /* 59 << 63 */
+ { { 0x6ed4d3678b2ecfa9L,0x24dfe62da90c3c38L,0xa1862e103fe5c42bL,
+ 0x1ca73dcad5732a9fL },
+ { 0x35f038b776bb87adL,0x674976abf242b81fL,0x4f2bde7eb0fd90cdL,
+ 0x6efc172ea7fdf092L } },
+ /* 60 << 63 */
+ { { 0x3806b69b92222f1fL,0x5a2459ca6cf7ae70L,0x6789f69ca85217eeL,
+ 0x5f232b5ee3dc85acL },
+ { 0x660e3ec548e9e516L,0x124b4e473197eb31L,0x10a0cb13aafcca23L,
+ 0x7bd63ba48213224fL } },
+ /* 61 << 63 */
+ { { 0xaffad7cc290a7f4fL,0x6b409c9e0286b461L,0x58ab809fffa407afL,
+ 0xc3122eedc68ac073L },
+ { 0x17bf9e504ef24d7eL,0x5d9297943e2a5811L,0x519bc86702902e01L,
+ 0x76bba5da39c8a851L } },
+ /* 62 << 63 */
+ { { 0xe9f9669cda94951eL,0x4b6af58d66b8d418L,0xfa32107417d426a4L,
+ 0xc78e66a99dde6027L },
+ { 0x0516c0834a53b964L,0xfc659d38ff602330L,0x0ab55e5c58c5c897L,
+ 0x985099b2838bc5dfL } },
+ /* 63 << 63 */
+ { { 0x061d9efcc52fc238L,0x712b27286ac1da3fL,0xfb6581499283fe08L,
+ 0x4954ac94b8aaa2f7L },
+ { 0x85c0ada47fb2e74fL,0xee8ba98eb89926b0L,0xe4f9d37d23d1af5bL,
+ 0x14ccdbf9ba9b015eL } },
+ /* 64 << 63 */
+ { { 0xb674481b7bfe7178L,0x4e1debae65405868L,0x061b2821c48c867dL,
+ 0x69c15b35513b30eaL },
+ { 0x3b4a166636871088L,0xe5e29f5d1220b1ffL,0x4b82bb35233d9f4dL,
+ 0x4e07633318cdc675L } },
+ /* 0 << 70 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 70 */
+ { { 0x0d53f5c7a3e6fcedL,0xe8cbbdd5f45fbdebL,0xf85c01df13339a70L,
+ 0x0ff71880142ceb81L },
+ { 0x4c4e8774bd70437aL,0x5fb32891ba0bda6aL,0x1cdbebd2f18bd26eL,
+ 0x2f9526f103a9d522L } },
+ /* 2 << 70 */
+ { { 0x40ce305192c4d684L,0x8b04d7257612efcdL,0xb9dcda366f9cae20L,
+ 0x0edc4d24f058856cL },
+ { 0x64f2e6bf85427900L,0x3de81295dc09dfeaL,0xd41b4487379bf26cL,
+ 0x50b62c6d6df135a9L } },
+ /* 3 << 70 */
+ { { 0xd4f8e3b4c72dfe67L,0xc416b0f690e19fdfL,0x18b9098d4c13bd35L,
+ 0xac11118a15b8cb9eL },
+ { 0xf598a318f0062841L,0xbfe0602f89f356f4L,0x7ae3637e30177a0cL,
+ 0x3409774761136537L } },
+ /* 4 << 70 */
+ { { 0x0db2fb5ed005832aL,0x5f5efd3b91042e4fL,0x8c4ffdc6ed70f8caL,
+ 0xe4645d0bb52da9ccL },
+ { 0x9596f58bc9001d1fL,0x52c8f0bc4e117205L,0xfd4aa0d2e398a084L,
+ 0x815bfe3a104f49deL } },
+ /* 5 << 70 */
+ { { 0x97e5443f23885e5fL,0xf72f8f99e8433aabL,0xbd00b154e4d4e604L,
+ 0xd0b35e6ae5e173ffL },
+ { 0x57b2a0489164722dL,0x3e3c665b88761ec8L,0x6bdd13973da83832L,
+ 0x3c8b1a1e73dafe3bL } },
+ /* 6 << 70 */
+ { { 0x4497ace654317cacL,0xbe600ab9521771b3L,0xb42e409eb0dfe8b8L,
+ 0x386a67d73942310fL },
+ { 0x25548d8d4431cc28L,0xa7cff142985dc524L,0x4d60f5a193c4be32L,
+ 0x83ebd5c8d071c6e1L } },
+ /* 7 << 70 */
+ { { 0xba3a80a7b1fd2b0bL,0x9b3ad3965bec33e8L,0xb3868d6179743fb3L,
+ 0xcfd169fcfdb462faL },
+ { 0xd3b499d79ce0a6afL,0x55dc1cf1e42d3ff8L,0x04fb9e6cc6c3e1b2L,
+ 0x47e6961d6f69a474L } },
+ /* 8 << 70 */
+ { { 0x54eb3acce548b37bL,0xb38e754284d40549L,0x8c3daa517b341b4fL,
+ 0x2f6928ec690bf7faL },
+ { 0x0496b32386ce6c41L,0x01be1c5510adadcdL,0xc04e67e74bb5faf9L,
+ 0x3cbaf678e15c9985L } },
+ /* 9 << 70 */
+ { { 0x8cd1214550ca4247L,0xba1aa47ae7dd30aaL,0x2f81ddf1e58fee24L,
+ 0x03452936eec9b0e8L },
+ { 0x8bdc3b81243aea96L,0x9a2919af15c3d0e5L,0x9ea640ec10948361L,
+ 0x5ac86d5b6e0bcccfL } },
+ /* 10 << 70 */
+ { { 0xf892d918c36cf440L,0xaed3e837c939719cL,0xb07b08d2c0218b64L,
+ 0x6f1bcbbace9790ddL },
+ { 0x4a84d6ed60919b8eL,0xd89007918ac1f9ebL,0xf84941aa0dd5daefL,
+ 0xb22fe40a67fd62c5L } },
+ /* 11 << 70 */
+ { { 0x97e15ba2157f2db3L,0xbda2fc8f8e28ca9cL,0x5d050da437b9f454L,
+ 0x3d57eb572379d72eL },
+ { 0xe9b5eba2fb5ee997L,0x01648ca2e11538caL,0x32bb76f6f6327974L,
+ 0x338f14b8ff3f4bb7L } },
+ /* 12 << 70 */
+ { { 0x524d226ad7ab9a2dL,0x9c00090d7dfae958L,0x0ba5f5398751d8c2L,
+ 0x8afcbcdd3ab8262dL },
+ { 0x57392729e99d043bL,0xef51263baebc943aL,0x9feace9320862935L,
+ 0x639efc03b06c817bL } },
+ /* 13 << 70 */
+ { { 0x1fe054b366b4be7aL,0x3f25a9de84a37a1eL,0xf39ef1ad78d75cd9L,
+ 0xd7b58f495062c1b5L },
+ { 0x6f74f9a9ff563436L,0xf718ff29e8af51e7L,0x5234d31315e97fecL,
+ 0xb6a8e2b1292f1c0aL } },
+ /* 14 << 70 */
+ { { 0xa7f53aa8327720c1L,0x956ca322ba092cc8L,0x8f03d64a28746c4dL,
+ 0x51fe178266d0d392L },
+ { 0xd19b34db3c832c80L,0x60dccc5c6da2e3b4L,0x245dd62e0a104cccL,
+ 0xa7ab1de1620b21fdL } },
+ /* 15 << 70 */
+ { { 0xb293ae0b3893d123L,0xf7b75783b15ee71cL,0x5aa3c61442a9468bL,
+ 0xd686123cdb15d744L },
+ { 0x8c616891a7ab4116L,0x6fcd72c8a4e6a459L,0xac21911077e5fad7L,
+ 0xfb6a20e7704fa46bL } },
+ /* 16 << 70 */
+ { { 0xe839be7d341d81dcL,0xcddb688932148379L,0xda6211a1f7026eadL,
+ 0xf3b2575ff4d1cc5eL },
+ { 0x40cfc8f6a7a73ae6L,0x83879a5e61d5b483L,0xc5acb1ed41a50ebcL,
+ 0x59a60cc83c07d8faL } },
+ /* 17 << 70 */
+ { { 0x1b73bdceb1876262L,0x2b0d79f012af4ee9L,0x8bcf3b0bd46e1d07L,
+ 0x17d6af9de45d152fL },
+ { 0x735204616d736451L,0x43cbbd9756b0bf5aL,0xb0833a5bd5999b9dL,
+ 0x702614f0eb72e398L } },
+ /* 18 << 70 */
+ { { 0x0aadf01a59c3e9f8L,0x40200e77ce6b3d16L,0xda22bdd3deddafadL,
+ 0x76dedaf4310d72e1L },
+ { 0x49ef807c4bc2e88fL,0x6ba81291146dd5a5L,0xa1a4077a7d8d59e9L,
+ 0x87b6a2e7802db349L } },
+ /* 19 << 70 */
+ { { 0xd56799971b4e598eL,0xf499ef1f06fe4b1dL,0x3978d3aefcb267c5L,
+ 0xb582b557235786d0L },
+ { 0x32b3b2ca1715cb07L,0x4c3de6a28480241dL,0x63b5ffedcb571ecdL,
+ 0xeaf53900ed2fe9a9L } },
+ /* 20 << 70 */
+ { { 0xdec98d4ac3b81990L,0x1cb837229e0cc8feL,0xfe0b0491d2b427b9L,
+ 0x0f2386ace983a66cL },
+ { 0x930c4d1eb3291213L,0xa2f82b2e59a62ae4L,0x77233853f93e89e3L,
+ 0x7f8063ac11777c7fL } },
+ /* 21 << 70 */
+ { { 0xff0eb56759ad2877L,0x6f4546429865c754L,0xe6fe701a236e9a84L,
+ 0xc586ef1606e40fc3L },
+ { 0x3f62b6e024bafad9L,0xc8b42bd264da906aL,0xc98e1eb4da3276a0L,
+ 0x30d0e5fc06cbf852L } },
+ /* 22 << 70 */
+ { { 0x1b6b2ae1e8b4dfd4L,0xd754d5c78301cbacL,0x66097629112a39acL,
+ 0xf86b599993ba4ab9L },
+ { 0x26c9dea799f9d581L,0x0473b1a8c2fafeaaL,0x1469af553b2505a5L,
+ 0x227d16d7d6a43323L } },
+ /* 23 << 70 */
+ { { 0x3316f73cad3d97f9L,0x52bf3bb51f137455L,0x953eafeb09954e7cL,
+ 0xa721dfeddd732411L },
+ { 0xb4929821141d4579L,0x3411321caa3bd435L,0xafb355aa17fa6015L,
+ 0xb4e7ef4a18e42f0eL } },
+ /* 24 << 70 */
+ { { 0x604ac97c59371000L,0xe1c48c707f759c18L,0x3f62ecc5a5db6b65L,
+ 0x0a78b17338a21495L },
+ { 0x6be1819dbcc8ad94L,0x70dc04f6d89c3400L,0x462557b4a6b4840aL,
+ 0x544c6ade60bd21c0L } },
+ /* 25 << 70 */
+ { { 0x6a00f24e907a544bL,0xa7520dcb313da210L,0xfe939b7511e4994bL,
+ 0x918b6ba6bc275d70L },
+ { 0xd3e5e0fc644be892L,0x707a9816fdaf6c42L,0x60145567f15c13feL,
+ 0x4818ebaae130a54aL } },
+ /* 26 << 70 */
+ { { 0x28aad3ad58d2f767L,0xdc5267fdd7e7c773L,0x4919cc88c3afcc98L,
+ 0xaa2e6ab02db8cd4bL },
+ { 0xd46fec04d0c63eaaL,0xa1cb92c519ffa832L,0x678dd178e43a631fL,
+ 0xfb5ae1cd3dc788b3L } },
+ /* 27 << 70 */
+ { { 0x68b4fb906e77de04L,0x7992bcf0f06dbb97L,0x896e6a13c417c01dL,
+ 0x8d96332cb956be01L },
+ { 0x902fc93a413aa2b9L,0x99a4d915fc98c8a5L,0x52c29407565f1137L,
+ 0x4072690f21e4f281L } },
+ /* 28 << 70 */
+ { { 0x36e607cf02ff6072L,0xa47d2ca98ad98cdcL,0xbf471d1ef5f56609L,
+ 0xbcf86623f264ada0L },
+ { 0xb70c0687aa9e5cb6L,0xc98124f217401c6cL,0x8189635fd4a61435L,
+ 0xd28fb8afa9d98ea6L } },
+ /* 29 << 70 */
+ { { 0xb9a67c2a40c251f8L,0x88cd5d87a2da44beL,0x437deb96e09b5423L,
+ 0x150467db64287dc1L },
+ { 0xe161debbcdabb839L,0xa79e9742f1839a3eL,0xbb8dd3c2652d202bL,
+ 0x7b3e67f7e9f97d96L } },
+ /* 30 << 70 */
+ { { 0x5aa5d78fb1cb6ac9L,0xffa13e8eca1d0d45L,0x369295dd2ba5bf95L,
+ 0xd68bd1f839aff05eL },
+ { 0xaf0d86f926d783f2L,0x543a59b3fc3aafc1L,0x3fcf81d27b7da97cL,
+ 0xc990a056d25dee46L } },
+ /* 31 << 70 */
+ { { 0x3e6775b8519cce2cL,0xfc9af71fae13d863L,0x774a4a6f47c1605cL,
+ 0x46ba42452fd205e8L },
+ { 0xa06feea4d3fd524dL,0x1e7246416de1acc2L,0xf53816f1334e2b42L,
+ 0x49e5918e922f0024L } },
+ /* 32 << 70 */
+ { { 0x439530b665c7322dL,0xcf12cc01b3c1b3fbL,0xc70b01860172f685L,
+ 0xb915ee221b58391dL },
+ { 0x9afdf03ba317db24L,0x87dec65917b8ffc4L,0x7f46597be4d3d050L,
+ 0x80a1c1ed006500e7L } },
+ /* 33 << 70 */
+ { { 0x84902a9678bf030eL,0xfb5e9c9a50560148L,0x6dae0a9263362426L,
+ 0xdcaeecf4a9e30c40L },
+ { 0xc0d887bb518d0c6bL,0x99181152cb985b9dL,0xad186898ef7bc381L,
+ 0x18168ffb9ee46201L } },
+ /* 34 << 70 */
+ { { 0x9a04cdaa2502753cL,0xbb279e2651407c41L,0xeacb03aaf23564e5L,
+ 0x1833658271e61016L },
+ { 0x8684b8c4eb809877L,0xb336e18dea0e672eL,0xefb601f034ee5867L,
+ 0x2733edbe1341cfd1L } },
+ /* 35 << 70 */
+ { { 0xb15e809a26025c3cL,0xe6e981a69350df88L,0x923762378502fd8eL,
+ 0x4791f2160c12be9bL },
+ { 0xb725678925f02425L,0xec8631947a974443L,0x7c0ce882fb41cc52L,
+ 0xc266ff7ef25c07f2L } },
+ /* 36 << 70 */
+ { { 0x3d4da8c3017025f3L,0xefcf628cfb9579b4L,0x5c4d00161f3716ecL,
+ 0x9c27ebc46801116eL },
+ { 0x5eba0ea11da1767eL,0xfe15145247004c57L,0x3ace6df68c2373b7L,
+ 0x75c3dffe5dbc37acL } },
+ /* 37 << 70 */
+ { { 0x3dc32a73ddc925fcL,0xb679c8412f65ee0bL,0x715a3295451cbfebL,
+ 0xd9889768f76e9a29L },
+ { 0xec20ce7fb28ad247L,0xe99146c400894d79L,0x71457d7c9f5e3ea7L,
+ 0x097b266238030031L } },
+ /* 38 << 70 */
+ { { 0xdb7f6ae6cf9f82a8L,0x319decb9438f473aL,0xa63ab386283856c3L,
+ 0x13e3172fb06a361bL },
+ { 0x2959f8dc7d5a006cL,0x2dbc27c675fba752L,0xc1227ab287c22c9eL,
+ 0x06f61f7571a268b2L } },
+ /* 39 << 70 */
+ { { 0x1b6bb97104779ce2L,0xaca838120aadcb1dL,0x297ae0bcaeaab2d5L,
+ 0xa5c14ee75bfb9f13L },
+ { 0xaa00c583f17a62c7L,0x39eb962c173759f6L,0x1eeba1d486c9a88fL,
+ 0x0ab6c37adf016c5eL } },
+ /* 40 << 70 */
+ { { 0xa2a147dba28a0749L,0x246c20d6ee519165L,0x5068d1b1d3810715L,
+ 0xb1e7018c748160b9L },
+ { 0x03f5b1faf380ff62L,0xef7fb1ddf3cb2c1eL,0xeab539a8fc91a7daL,
+ 0x83ddb707f3f9b561L } },
+ /* 41 << 70 */
+ { { 0xc550e211fe7df7a4L,0xa7cd07f2063f6f40L,0xb0de36352976879cL,
+ 0xb5f83f85e55741daL },
+ { 0x4ea9d25ef3d8ac3dL,0x6fe2066f62819f02L,0x4ab2b9c2cef4a564L,
+ 0x1e155d965ffa2de3L } },
+ /* 42 << 70 */
+ { { 0x0eb0a19bc3a72d00L,0x4037665b8513c31bL,0x2fb2b6bf04c64637L,
+ 0x45c34d6e08cdc639L },
+ { 0x56f1e10ff01fd796L,0x4dfb8101fe3667b8L,0xe0eda2539021d0c0L,
+ 0x7a94e9ff8a06c6abL } },
+ /* 43 << 70 */
+ { { 0x2d3bb0d9bb9aa882L,0xea20e4e5ec05fd10L,0xed7eeb5f1a1ca64eL,
+ 0x2fa6b43cc6327cbdL },
+ { 0xb577e3cf3aa91121L,0x8c6bd5ea3a34079bL,0xd7e5ba3960e02fc0L,
+ 0xf16dd2c390141bf8L } },
+ /* 44 << 70 */
+ { { 0xb57276d980101b98L,0x760883fdb82f0f66L,0x89d7de754bc3eff3L,
+ 0x03b606435dc2ab40L },
+ { 0xcd6e53dfe05beeacL,0xf2f1e862bc3325cdL,0xdd0f7921774f03c3L,
+ 0x97ca72214552cc1bL } },
+ /* 45 << 70 */
+ { { 0x5a0d6afe1cd19f72L,0xa20915dcf183fbebL,0x9fda4b40832c403cL,
+ 0x32738eddbe425442L },
+ { 0x469a1df6b5eccf1aL,0x4b5aff4228bbe1f0L,0x31359d7f570dfc93L,
+ 0xa18be235f0088628L } },
+ /* 46 << 70 */
+ { { 0xa5b30fbab00ed3a9L,0x34c6137473cdf8beL,0x2c5c5f46abc56797L,
+ 0x5cecf93db82a8ae2L },
+ { 0x7d3dbe41a968fbf0L,0xd23d45831a5c7f3dL,0xf28f69a0c087a9c7L,
+ 0xc2d75471474471caL } },
+ /* 47 << 70 */
+ { { 0x36ec9f4a4eb732ecL,0x6c943bbdb1ca6bedL,0xd64535e1f2457892L,
+ 0x8b84a8eaf7e2ac06L },
+ { 0xe0936cd32499dd5fL,0x12053d7e0ed04e57L,0x4bdd0076e4305d9dL,
+ 0x34a527b91f67f0a2L } },
+ /* 48 << 70 */
+ { { 0xe79a4af09cec46eaL,0xb15347a1658b9bc7L,0x6bd2796f35af2f75L,
+ 0xac9579904051c435L },
+ { 0x2669dda3c33a655dL,0x5d503c2e88514aa3L,0xdfa113373753dd41L,
+ 0x3f0546730b754f78L } },
+ /* 49 << 70 */
+ { { 0xbf185677496125bdL,0xfb0023c83775006cL,0xfa0f072f3a037899L,
+ 0x4222b6eb0e4aea57L },
+ { 0x3dde5e767866d25aL,0xb6eb04f84837aa6fL,0x5315591a2cf1cdb8L,
+ 0x6dfb4f412d4e683cL } },
+ /* 50 << 70 */
+ { { 0x7e923ea448ee1f3aL,0x9604d9f705a2afd5L,0xbe1d4a3340ea4948L,
+ 0x5b45f1f4b44cbd2fL },
+ { 0x5faf83764acc757eL,0xa7cf9ab863d68ff7L,0x8ad62f69df0e404bL,
+ 0xd65f33c212bdafdfL } },
+ /* 51 << 70 */
+ { { 0xc365de15a377b14eL,0x6bf5463b8e39f60cL,0x62030d2d2ce68148L,
+ 0xd95867efe6f843a8L },
+ { 0xd39a0244ef5ab017L,0x0bd2d8c14ab55d12L,0xc9503db341639169L,
+ 0x2d4e25b0f7660c8aL } },
+ /* 52 << 70 */
+ { { 0x760cb3b5e224c5d7L,0xfa3baf8c68616919L,0x9fbca1138d142552L,
+ 0x1ab18bf17669ebf5L },
+ { 0x55e6f53e9bdf25ddL,0x04cc0bf3cb6cd154L,0x595bef4995e89080L,
+ 0xfe9459a8104a9ac1L } },
+ /* 53 << 70 */
+ { { 0xad2d89cacce9bb32L,0xddea65e1f7de8285L,0x62ed8c35b351bd4bL,
+ 0x4150ff360c0e19a7L },
+ { 0x86e3c801345f4e47L,0x3bf21f71203a266cL,0x7ae110d4855b1f13L,
+ 0x5d6aaf6a07262517L } },
+ /* 54 << 70 */
+ { { 0x1e0f12e1813d28f1L,0x6000e11d7ad7a523L,0xc7d8deefc744a17bL,
+ 0x1e990b4814c05a00L },
+ { 0x68fddaee93e976d5L,0x696241d146610d63L,0xb204e7c3893dda88L,
+ 0x8bccfa656a3a6946L } },
+ /* 55 << 70 */
+ { { 0xb59425b4c5cd1411L,0x701b4042ff3658b1L,0xe3e56bca4784cf93L,
+ 0x27de5f158fe68d60L },
+ { 0x4ab9cfcef8d53f19L,0xddb10311a40a730dL,0x6fa73cd14eee0a8aL,
+ 0xfd5487485249719dL } },
+ /* 56 << 70 */
+ { { 0x49d66316a8123ef0L,0x73c32db4e7f95438L,0x2e2ed2090d9e7854L,
+ 0xf98a93299d9f0507L },
+ { 0xc5d33cf60c6aa20aL,0x9a32ba1475279bb2L,0x7e3202cb774a7307L,
+ 0x64ed4bc4e8c42dbdL } },
+ /* 57 << 70 */
+ { { 0xc20f1a06d4caed0dL,0xb8021407171d22b3L,0xd426ca04d13268d7L,
+ 0x9237700725f4d126L },
+ { 0x4204cbc371f21a85L,0x18461b7af82369baL,0xc0c07d313fc858f9L,
+ 0x5deb5a50e2bab569L } },
+ /* 58 << 70 */
+ { { 0xd5959d46d5eea89eL,0xfdff842408437f4bL,0xf21071e43cfe254fL,
+ 0x7241769695468321L },
+ { 0x5d8288b9102cae3eL,0x2d143e3df1965dffL,0x00c9a376a078d847L,
+ 0x6fc0da3126028731L } },
+ /* 59 << 70 */
+ { { 0xa2baeadfe45083a2L,0x66bc72185e5b4bcdL,0x2c826442d04b8e7fL,
+ 0xc19f54516c4b586bL },
+ { 0x60182c495b7eeed5L,0xd9954ecd7aa9dfa1L,0xa403a8ecc73884adL,
+ 0x7fb17de29bb39041L } },
+ /* 60 << 70 */
+ { { 0x694b64c5abb020e8L,0x3d18c18419c4eec7L,0x9c4673ef1c4793e5L,
+ 0xc7b8aeb5056092e6L },
+ { 0x3aa1ca43f0f8c16bL,0x224ed5ecd679b2f6L,0x0d56eeaf55a205c9L,
+ 0xbfe115ba4b8e028bL } },
+ /* 61 << 70 */
+ { { 0x97e608493927f4feL,0xf91fbf94759aa7c5L,0x985af7696be90a51L,
+ 0xc1277b7878ccb823L },
+ { 0x395b656ee7a75952L,0x00df7de0928da5f5L,0x09c231754ca4454fL,
+ 0x4ec971f47aa2d3c1L } },
+ /* 62 << 70 */
+ { { 0x45c3c507e75d9cccL,0x63b7be8a3dc90306L,0x37e09c665db44bdcL,
+ 0x50d60da16841c6a2L },
+ { 0x6f9b65ee08df1b12L,0x387348797ff089dfL,0x9c331a663fe8013dL,
+ 0x017f5de95f42fcc8L } },
+ /* 63 << 70 */
+ { { 0x43077866e8e57567L,0xc9f781cef9fcdb18L,0x38131dda9b12e174L,
+ 0x25d84aa38a03752aL },
+ { 0x45e09e094d0c0ce2L,0x1564008b92bebba5L,0xf7e8ad31a87284c7L,
+ 0xb7c4b46c97e7bbaaL } },
+ /* 64 << 70 */
+ { { 0x3e22a7b397acf4ecL,0x0426c4005ea8b640L,0x5e3295a64e969285L,
+ 0x22aabc59a6a45670L },
+ { 0xb929714c5f5942bcL,0x9a6168bdfa3182edL,0x2216a665104152baL,
+ 0x46908d03b6926368L } },
+ /* 0 << 77 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 77 */
+ { { 0xa9f5d8745a1251fbL,0x967747a8c72725c7L,0x195c33e531ffe89eL,
+ 0x609d210fe964935eL },
+ { 0xcafd6ca82fe12227L,0xaf9b5b960426469dL,0x2e9ee04c5693183cL,
+ 0x1084a333c8146fefL } },
+ /* 2 << 77 */
+ { { 0x96649933aed1d1f7L,0x566eaff350563090L,0x345057f0ad2e39cfL,
+ 0x148ff65b1f832124L },
+ { 0x042e89d4cf94cf0dL,0x319bec84520c58b3L,0x2a2676265361aa0dL,
+ 0xc86fa3028fbc87adL } },
+ /* 3 << 77 */
+ { { 0xfc83d2ab5c8b06d5L,0xb1a785a2fe4eac46L,0xb99315bc846f7779L,
+ 0xcf31d816ef9ea505L },
+ { 0x2391fe6a15d7dc85L,0x2f132b04b4016b33L,0x29547fe3181cb4c7L,
+ 0xdb66d8a6650155a1L } },
+ /* 4 << 77 */
+ { { 0x6b66d7e1adc1696fL,0x98ebe5930acd72d0L,0x65f24550cc1b7435L,
+ 0xce231393b4b9a5ecL },
+ { 0x234a22d4db067df9L,0x98dda095caff9b00L,0x1bbc75a06100c9c1L,
+ 0x1560a9c8939cf695L } },
+ /* 5 << 77 */
+ { { 0xcf006d3e99e0925fL,0x2dd74a966322375aL,0xc58b446ab56af5baL,
+ 0x50292683e0b9b4f1L },
+ { 0xe2c34cb41aeaffa3L,0x8b17203f9b9587c1L,0x6d559207ead1350cL,
+ 0x2b66a215fb7f9604L } },
+ /* 6 << 77 */
+ { { 0x0850325efe51bf74L,0x9c4f579e5e460094L,0x5c87b92a76da2f25L,
+ 0x889de4e06febef33L },
+ { 0x6900ec06646083ceL,0xbe2a0335bfe12773L,0xadd1da35c5344110L,
+ 0x757568b7b802cd20L } },
+ /* 7 << 77 */
+ { { 0x7555977900f7e6c8L,0x38e8b94f0facd2f0L,0xfea1f3af03fde375L,
+ 0x5e11a1d875881dfcL },
+ { 0xb3a6b02ec1e2f2efL,0x193d2bbbc605a6c5L,0x325ffeee339a0b2dL,
+ 0x27b6a7249e0c8846L } },
+ /* 8 << 77 */
+ { { 0xe4050f1cf1c367caL,0x9bc85a9bc90fbc7dL,0xa373c4a2e1a11032L,
+ 0xb64232b7ad0393a9L },
+ { 0xf5577eb0167dad29L,0x1604f30194b78ab2L,0x0baa94afe829348bL,
+ 0x77fbd8dd41654342L } },
+ /* 9 << 77 */
+ { { 0xdab50ea5b964e39aL,0xd4c29e3cd0d3c76eL,0x80dae67c56d11964L,
+ 0x7307a8bfe5ffcc2fL },
+ { 0x65bbc1aa91708c3bL,0xa151e62c28bf0eebL,0x6cb533816fa34db7L,
+ 0x5139e05ca29403a8L } },
+ /* 10 << 77 */
+ { { 0x6ff651b494a7cd2eL,0x5671ffd10699336cL,0x6f5fd2cc979a896aL,
+ 0x11e893a8d8148cefL },
+ { 0x988906a165cf7b10L,0x81b67178c50d8485L,0x7c0deb358a35b3deL,
+ 0x423ac855c1d29799L } },
+ /* 11 << 77 */
+ { { 0xaf580d87dac50b74L,0x28b2b89f5869734cL,0x99a3b936874e28fbL,
+ 0xbb2c919025f3f73aL },
+ { 0x199f691884a9d5b7L,0x7ebe23257e770374L,0xf442e1070738efe2L,
+ 0xcf9f3f56cf9082d2L } },
+ /* 12 << 77 */
+ { { 0x719f69e109618708L,0xcc9e8364c183f9b1L,0xec203a95366a21afL,
+ 0x6aec5d6d068b141fL },
+ { 0xee2df78a994f04e9L,0xb39ccae8271245b0L,0xb875a4a997e43f4fL,
+ 0x507dfe11db2cea98L } },
+ /* 13 << 77 */
+ { { 0x4fbf81cb489b03e9L,0xdb86ec5b6ec414faL,0xfad444f9f51b3ae5L,
+ 0xca7d33d61914e3feL },
+ { 0xa9c32f5c0ae6c4d0L,0xa9ca1d1e73969568L,0x98043c311aa7467eL,
+ 0xe832e75ce21b5ac6L } },
+ /* 14 << 77 */
+ { { 0x314b7aea5232123dL,0x08307c8c65ae86dbL,0x06e7165caa4668edL,
+ 0xb170458bb4d3ec39L },
+ { 0x4d2e3ec6c19bb986L,0xc5f34846ae0304edL,0x917695a06c9f9722L,
+ 0x6c7f73174cab1c0aL } },
+ /* 15 << 77 */
+ { { 0x6295940e9d6d2e8bL,0xd318b8c1549f7c97L,0x2245320497713885L,
+ 0x468d834ba8a440feL },
+ { 0xd81fe5b2bfba796eL,0x152364db6d71f116L,0xbb8c7c59b5b66e53L,
+ 0x0b12c61b2641a192L } },
+ /* 16 << 77 */
+ { { 0x31f14802fcf0a7fdL,0x42fd07895488b01eL,0x71d78d6d9952b498L,
+ 0x8eb572d907ac5201L },
+ { 0xe0a2a44c4d194a88L,0xd2b63fd9ba017e66L,0x78efc6c8f888aefcL,
+ 0xb76f6bda4a881a11L } },
+ /* 17 << 77 */
+ { { 0x187f314bb46c2397L,0x004cf5665ded2819L,0xa9ea570438764d34L,
+ 0xbba4521778084709L },
+ { 0x064745711171121eL,0xad7b7eb1e7c9b671L,0xdacfbc40730f7507L,
+ 0x178cd8c6c7ad7bd1L } },
+ /* 18 << 77 */
+ { { 0xbf0be101b2a67238L,0x3556d367af9c14f2L,0x104b7831a5662075L,
+ 0x58ca59bb79d9e60aL },
+ { 0x4bc45392a569a73bL,0x517a52e85698f6c9L,0x85643da5aeadd755L,
+ 0x1aed0cd52a581b84L } },
+ /* 19 << 77 */
+ { { 0xb9b4ff8480af1372L,0x244c3113f1ba5d1fL,0x2a5dacbef5f98d31L,
+ 0x2c3323e84375bc2aL },
+ { 0x17a3ab4a5594b1ddL,0xa1928bfbceb4797eL,0xe83af245e4886a19L,
+ 0x8979d54672b5a74aL } },
+ /* 20 << 77 */
+ { { 0xa0f726bc19f9e967L,0xd9d03152e8fbbf4eL,0xcfd6f51db7707d40L,
+ 0x633084d963f6e6e0L },
+ { 0xedcd9cdc55667eafL,0x73b7f92b2e44d56fL,0xfb2e39b64e962b14L,
+ 0x7d408f6ef671fcbfL } },
+ /* 21 << 77 */
+ { { 0xcc634ddc164a89bbL,0x74a42bb23ef3bd05L,0x1280dbb2428decbbL,
+ 0x6103f6bb402c8596L },
+ { 0xfa2bf581355a5752L,0x562f96a800946674L,0x4e4ca16d6da0223bL,
+ 0xfe47819f28d3aa25L } },
+ /* 22 << 77 */
+ { { 0x9eea3075f8dfcf8aL,0xa284f0aa95669825L,0xb3fca250867d3fd8L,
+ 0x20757b5f269d691eL },
+ { 0xf2c2402093b8a5deL,0xd3f93359ebc06da6L,0x1178293eb2739c33L,
+ 0xd2a3e770bcd686e5L } },
+ /* 23 << 77 */
+ { { 0xa76f49f4cd941534L,0x0d37406be3c71c0eL,0x172d93973b97f7e3L,
+ 0xec17e239bd7fd0deL },
+ { 0xe32905516f496ba2L,0x6a69317236ad50e7L,0xc4e539a283e7eff5L,
+ 0x752737e718e1b4cfL } },
+ /* 24 << 77 */
+ { { 0xa2f7932c68af43eeL,0x5502468e703d00bdL,0xe5dc978f2fb061f5L,
+ 0xc9a1904a28c815adL },
+ { 0xd3af538d470c56a4L,0x159abc5f193d8cedL,0x2a37245f20108ef3L,
+ 0xfa17081e223f7178L } },
+ /* 25 << 77 */
+ { { 0x27b0fb2b10c8c0f5L,0x2102c3ea40650547L,0x594564df8ac3bfa7L,
+ 0x98102033509dad96L },
+ { 0x6989643ff1d18a13L,0x35eebd91d7fc5af0L,0x078d096afaeaafd8L,
+ 0xb7a89341def3de98L } },
+ /* 26 << 77 */
+ { { 0x2a206e8decf2a73aL,0x066a63978e551994L,0x3a6a088ab98d53a2L,
+ 0x0ce7c67c2d1124aaL },
+ { 0x48cec671759a113cL,0xe3b373d34f6f67faL,0x5455d479fd36727bL,
+ 0xe5a428eea13c0d81L } },
+ /* 27 << 77 */
+ { { 0xb853dbc81c86682bL,0xb78d2727b8d02b2aL,0xaaf69bed8ebc329aL,
+ 0xdb6b40b3293b2148L },
+ { 0xe42ea77db8c4961fL,0xb1a12f7c20e5e0abL,0xa0ec527479e8b05eL,
+ 0x68027391fab60a80L } },
+ /* 28 << 77 */
+ { { 0x6bfeea5f16b1bd5eL,0xf957e4204de30ad3L,0xcbaf664e6a353b9eL,
+ 0x5c87331226d14febL },
+ { 0x4e87f98cb65f57cbL,0xdb60a6215e0cdd41L,0x67c16865a6881440L,
+ 0x1093ef1a46ab52aaL } },
+ /* 29 << 77 */
+ { { 0xc095afb53f4ece64L,0x6a6bb02e7604551aL,0x55d44b4e0b26b8cdL,
+ 0xe5f9a999f971268aL },
+ { 0xc08ec42511a7de84L,0x83568095fda469ddL,0x737bfba16c6c90a2L,
+ 0x1cb9c4a0be229831L } },
+ /* 30 << 77 */
+ { { 0x93bccbbabb2eec64L,0xa0c23b64da03adbeL,0x5f7aa00ae0e86ac4L,
+ 0x470b941efc1401e6L },
+ { 0x5ad8d6799df43574L,0x4ccfb8a90f65d810L,0x1bce80e3aa7fbd81L,
+ 0x273291ad9508d20aL } },
+ /* 31 << 77 */
+ { { 0xf5c4b46b42a92806L,0x810684eca86ab44aL,0x4591640bca0bc9f8L,
+ 0xb5efcdfc5c4b6054L },
+ { 0x16fc89076e9edd12L,0xe29d0b50d4d792f9L,0xa45fd01c9b03116dL,
+ 0x85035235c81765a4L } },
+ /* 32 << 77 */
+ { { 0x1fe2a9b2b4b4b67cL,0xc1d10df0e8020604L,0x9d64abfcbc8058d8L,
+ 0x8943b9b2712a0fbbL },
+ { 0x90eed9143b3def04L,0x85ab3aa24ce775ffL,0x605fd4ca7bbc9040L,
+ 0x8b34a564e2c75dfbL } },
+ /* 33 << 77 */
+ { { 0x41ffc94a10358560L,0x2d8a50729e5c28aaL,0xe915a0fc4cc7eb15L,
+ 0xe9efab058f6d0f5dL },
+ { 0xdbab47a9d19e9b91L,0x8cfed7450276154cL,0x154357ae2cfede0dL,
+ 0x520630df19f5a4efL } },
+ /* 34 << 77 */
+ { { 0x25759f7ce382360fL,0xb6db05c988bf5857L,0x2917d61d6c58d46cL,
+ 0x14f8e491fd20cb7aL },
+ { 0xb68a727a11c20340L,0x0386f86faf7ccbb6L,0x5c8bc6ccfee09a20L,
+ 0x7d76ff4abb7eea35L } },
+ /* 35 << 77 */
+ { { 0xa7bdebe7db15be7aL,0x67a08054d89f0302L,0x56bf0ea9c1193364L,
+ 0xc824446762837ebeL },
+ { 0x32bd8e8b20d841b8L,0x127a0548dbb8a54fL,0x83dd4ca663b20236L,
+ 0x87714718203491faL } },
+ /* 36 << 77 */
+ { { 0x4dabcaaaaa8a5288L,0x91cc0c8aaf23a1c9L,0x34c72c6a3f220e0cL,
+ 0xbcc20bdf1232144aL },
+ { 0x6e2f42daa20ede1bL,0xc441f00c74a00515L,0xbf46a5b6734b8c4bL,
+ 0x574095037b56c9a4L } },
+ /* 37 << 77 */
+ { { 0x9f735261e4585d45L,0x9231faed6734e642L,0x1158a176be70ee6cL,
+ 0x35f1068d7c3501bfL },
+ { 0x6beef900a2d26115L,0x649406f2ef0afee3L,0x3f43a60abc2420a1L,
+ 0x509002a7d5aee4acL } },
+ /* 38 << 77 */
+ { { 0xb46836a53ff3571bL,0x24f98b78837927c1L,0x6254256a4533c716L,
+ 0xf27abb0bd07ee196L },
+ { 0xd7cf64fc5c6d5bfdL,0x6915c751f0cd7a77L,0xd9f590128798f534L,
+ 0x772b0da8f81d8b5fL } },
+ /* 39 << 77 */
+ { { 0x1244260c2e03fa69L,0x36cf0e3a3be1a374L,0x6e7c1633ef06b960L,
+ 0xa71a4c55671f90f6L },
+ { 0x7a94125133c673dbL,0xc0bea51073e8c131L,0x61a8a699d4f6c734L,
+ 0x25e78c88341ed001L } },
+ /* 40 << 77 */
+ { { 0x5c18acf88e2f7d90L,0xfdbf33d777be32cdL,0x0a085cd7d2eb5ee9L,
+ 0x2d702cfbb3201115L },
+ { 0xb6e0ebdb85c88ce8L,0x23a3ce3c1e01d617L,0x3041618e567333acL,
+ 0x9dd0fd8f157edb6bL } },
+ /* 41 << 77 */
+ { { 0x27f74702b57872b8L,0x2ef26b4f657d5fe1L,0x95426f0a57cf3d40L,
+ 0x847e2ad165a6067aL },
+ { 0xd474d9a009996a74L,0x16a56acd2a26115cL,0x02a615c3d16f4d43L,
+ 0xcc3fc965aadb85b7L } },
+ /* 42 << 77 */
+ { { 0x386bda73ce07d1b0L,0xd82910c258ad4178L,0x124f82cfcd2617f4L,
+ 0xcc2f5e8def691770L },
+ { 0x82702550b8c30cccL,0x7b856aea1a8e575aL,0xbb822fefb1ab9459L,
+ 0x085928bcec24e38eL } },
+ /* 43 << 77 */
+ { { 0x5d0402ecba8f4b4dL,0xc07cd4ba00b4d58bL,0x5d8dffd529227e7aL,
+ 0x61d44d0c31bf386fL },
+ { 0xe486dc2b135e6f4dL,0x680962ebe79410efL,0xa61bd343f10088b5L,
+ 0x6aa76076e2e28686L } },
+ /* 44 << 77 */
+ { { 0x80463d118fb98871L,0xcb26f5c3bbc76affL,0xd4ab8eddfbe03614L,
+ 0xc8eb579bc0cf2deeL },
+ { 0xcc004c15c93bae41L,0x46fbae5d3aeca3b2L,0x671235cf0f1e9ab1L,
+ 0xadfba9349ec285c1L } },
+ /* 45 << 77 */
+ { { 0x88ded013f216c980L,0xc8ac4fb8f79e0bc1L,0xa29b89c6fb97a237L,
+ 0xb697b7809922d8e7L },
+ { 0x3142c639ddb945b5L,0x447b06c7e094c3a9L,0xcdcb364272266c90L,
+ 0x633aad08a9385046L } },
+ /* 46 << 77 */
+ { { 0xa36c936bb57c6477L,0x871f8b64e94dbcc6L,0x28d0fb62a591a67bL,
+ 0x9d40e081c1d926f5L },
+ { 0x3111eaf6f2d84b5aL,0x228993f9a565b644L,0x0ccbf5922c83188bL,
+ 0xf87b30ab3df3e197L } },
+ /* 47 << 77 */
+ { { 0xb8658b317642bca8L,0x1a032d7f52800f17L,0x051dcae579bf9445L,
+ 0xeba6b8ee54a2e253L },
+ { 0x5c8b9cadd4485692L,0x84bda40e8986e9beL,0xd16d16a42f0db448L,
+ 0x8ec80050a14d4188L } },
+ /* 48 << 77 */
+ { { 0xb2b2610798fa7aaaL,0x41209ee4f073aa4eL,0xf1570359f2d6b19bL,
+ 0xcbe6868cfc577cafL },
+ { 0x186c4bdc32c04dd3L,0xa6c35faecfeee397L,0xb4a1b312f086c0cfL,
+ 0xe0a5ccc6d9461fe2L } },
+ /* 49 << 77 */
+ { { 0xc32278aa1536189fL,0x1126c55fba6df571L,0x0f71a602b194560eL,
+ 0x8b2d7405324bd6e1L },
+ { 0x8481939e3738be71L,0xb5090b1a1a4d97a9L,0x116c65a3f05ba915L,
+ 0x21863ad3aae448aaL } },
+ /* 50 << 77 */
+ { { 0xd24e2679a7aae5d3L,0x7076013d0de5c1c4L,0x2d50f8babb05b629L,
+ 0x73c1abe26e66efbbL },
+ { 0xefd4b422f2488af7L,0xe4105d02663ba575L,0x7eb60a8b53a69457L,
+ 0x62210008c945973bL } },
+ /* 51 << 77 */
+ { { 0xfb25547877a50ec6L,0xbf0392f70a37a72cL,0xa0a7a19c4be18e7aL,
+ 0x90d8ea1625b1e0afL },
+ { 0x7582a293ef953f57L,0x90a64d05bdc5465aL,0xca79c497e2510717L,
+ 0x560dbb7c18cb641fL } },
+ /* 52 << 77 */
+ { { 0x1d8e32864b66abfbL,0xd26f52e559030900L,0x1ee3f6435584941aL,
+ 0x6d3b3730569f5958L },
+ { 0x9ff2a62f4789dba5L,0x91fcb81572b5c9b7L,0xf446cb7d6c8f9a0eL,
+ 0x48f625c139b7ecb5L } },
+ /* 53 << 77 */
+ { { 0xbabae8011c6219b8L,0xe7a562d928ac2f23L,0xe1b4873226e20588L,
+ 0x06ee1cad775af051L },
+ { 0xda29ae43faff79f7L,0xc141a412652ee9e0L,0x1e127f6f195f4bd0L,
+ 0x29c6ab4f072f34f8L } },
+ /* 54 << 77 */
+ { { 0x7b7c147730448112L,0x82b51af1e4a38656L,0x2bf2028a2f315010L,
+ 0xc9a4a01f6ea88cd4L },
+ { 0xf63e95d8257e5818L,0xdd8efa10b4519b16L,0xed8973e00da910bfL,
+ 0xed49d0775c0fe4a9L } },
+ /* 55 << 77 */
+ { { 0xac3aac5eb7caee1eL,0x1033898da7f4da57L,0x42145c0e5c6669b9L,
+ 0x42daa688c1aa2aa0L },
+ { 0x629cc15c1a1d885aL,0x25572ec0f4b76817L,0x8312e4359c8f8f28L,
+ 0x8107f8cd81965490L } },
+ /* 56 << 77 */
+ { { 0x516ff3a36fa6110cL,0x74fb1eb1fb93561fL,0x6c0c90478457522bL,
+ 0xcfd321046bb8bdc6L },
+ { 0x2d6884a2cc80ad57L,0x7c27fc3586a9b637L,0x3461baedadf4e8cdL,
+ 0x1d56251a617242f0L } },
+ /* 57 << 77 */
+ { { 0x0b80d209c955bef4L,0xdf02cad206adb047L,0xf0d7cb915ec74feeL,
+ 0xd25033751111ba44L },
+ { 0x9671755edf53cb36L,0x54dcb6123368551bL,0x66d69aacc8a025a4L,
+ 0x6be946c6e77ef445L } },
+ /* 58 << 77 */
+ { { 0x719946d1a995e094L,0x65e848f6e51e04d8L,0xe62f33006a1e3113L,
+ 0x1541c7c1501de503L },
+ { 0x4daac9faf4acfadeL,0x0e58589744cd0b71L,0x544fd8690a51cd77L,
+ 0x60fc20ed0031016dL } },
+ /* 59 << 77 */
+ { { 0x58b404eca4276867L,0x46f6c3cc34f34993L,0x477ca007c636e5bdL,
+ 0x8018f5e57c458b47L },
+ { 0xa1202270e47b668fL,0xcef48ccdee14f203L,0x23f98bae62ff9b4dL,
+ 0x55acc035c589edddL } },
+ /* 60 << 77 */
+ { { 0x3fe712af64db4444L,0x19e9d634becdd480L,0xe08bc047a930978aL,
+ 0x2dbf24eca1280733L },
+ { 0x3c0ae38c2cd706b2L,0x5b012a5b359017b9L,0x3943c38c72e0f5aeL,
+ 0x786167ea57176fa3L } },
+ /* 61 << 77 */
+ { { 0xe5f9897d594881dcL,0x6b5efad8cfb820c1L,0xb2179093d55018deL,
+ 0x39ad7d320bac56ceL },
+ { 0xb55122e02cfc0e81L,0x117c4661f6d89daaL,0x362d01e1cb64fa09L,
+ 0x6a309b4e3e9c4dddL } },
+ /* 62 << 77 */
+ { { 0xfa979fb7abea49b1L,0xb4b1d27d10e2c6c5L,0xbd61c2c423afde7aL,
+ 0xeb6614f89786d358L },
+ { 0x4a5d816b7f6f7459L,0xe431a44f09360e7bL,0x8c27a032c309914cL,
+ 0xcea5d68acaede3d8L } },
+ /* 63 << 77 */
+ { { 0x3668f6653a0a3f95L,0x893694167ceba27bL,0x89981fade4728fe9L,
+ 0x7102c8a08a093562L },
+ { 0xbb80310e235d21c8L,0x505e55d1befb7f7bL,0xa0a9081112958a67L,
+ 0xd67e106a4d851fefL } },
+ /* 64 << 77 */
+ { { 0xb84011a9431dd80eL,0xeb7c7cca73306cd9L,0x20fadd29d1b3b730L,
+ 0x83858b5bfe37b3d3L },
+ { 0xbf4cd193b6251d5cL,0x1cca1fd31352d952L,0xc66157a490fbc051L,
+ 0x7990a63889b98636L } },
+ /* 0 << 84 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 84 */
+ { { 0xe5aa692a87dec0e1L,0x010ded8df7b39d00L,0x7b1b80c854cfa0b5L,
+ 0x66beb876a0f8ea28L },
+ { 0x50d7f5313476cd0eL,0xa63d0e65b08d3949L,0x1a09eea953479fc6L,
+ 0x82ae9891f499e742L } },
+ /* 2 << 84 */
+ { { 0xab58b9105ca7d866L,0x582967e23adb3b34L,0x89ae4447cceac0bcL,
+ 0x919c667c7bf56af5L },
+ { 0x9aec17b160f5dcd7L,0xec697b9fddcaadbcL,0x0b98f341463467f5L,
+ 0xb187f1f7a967132fL } },
+ /* 3 << 84 */
+ { { 0x90fe7a1d214aeb18L,0x1506af3c741432f7L,0xbb5565f9e591a0c4L,
+ 0x10d41a77b44f1bc3L },
+ { 0xa09d65e4a84bde96L,0x42f060d8f20a6a1cL,0x652a3bfdf27f9ce7L,
+ 0xb6bdb65c3b3d739fL } },
+ /* 4 << 84 */
+ { { 0xeb5ddcb6ec7fae9fL,0x995f2714efb66e5aL,0xdee95d8e69445d52L,
+ 0x1b6c2d4609e27620L },
+ { 0x32621c318129d716L,0xb03909f10958c1aaL,0x8c468ef91af4af63L,
+ 0x162c429ffba5cdf6L } },
+ /* 5 << 84 */
+ { { 0x2f682343753b9371L,0x29cab45a5f1f9cd7L,0x571623abb245db96L,
+ 0xc507db093fd79999L },
+ { 0x4e2ef652af036c32L,0x86f0cc7805018e5cL,0xc10a73d4ab8be350L,
+ 0x6519b3977e826327L } },
+ /* 6 << 84 */
+ { { 0xe8cb5eef9c053df7L,0x8de25b37b300ea6fL,0xdb03fa92c849cffbL,
+ 0x242e43a7e84169bbL },
+ { 0xe4fa51f4dd6f958eL,0x6925a77ff4445a8dL,0xe6e72a50e90d8949L,
+ 0xc66648e32b1f6390L } },
+ /* 7 << 84 */
+ { { 0xb2ab1957173e460cL,0x1bbbce7530704590L,0xc0a90dbddb1c7162L,
+ 0x505e399e15cdd65dL },
+ { 0x68434dcb57797ab7L,0x60ad35ba6a2ca8e8L,0x4bfdb1e0de3336c1L,
+ 0xbbef99ebd8b39015L } },
+ /* 8 << 84 */
+ { { 0x6c3b96f31711ebecL,0x2da40f1fce98fdc4L,0xb99774d357b4411fL,
+ 0x87c8bdf415b65bb6L },
+ { 0xda3a89e3c2eef12dL,0xde95bb9b3c7471f3L,0x600f225bd812c594L,
+ 0x54907c5d2b75a56bL } },
+ /* 9 << 84 */
+ { { 0xa93cc5f08db60e35L,0x743e3cd6fa833319L,0x7dad5c41f81683c9L,
+ 0x70c1e7d99c34107eL },
+ { 0x0edc4a39a6be0907L,0x36d4703586d0b7d3L,0x8c76da03272bfa60L,
+ 0x0b4a07ea0f08a414L } },
+ /* 10 << 84 */
+ { { 0x699e4d2945c1dd53L,0xcadc5898231debb5L,0xdf49fcc7a77f00e0L,
+ 0x93057bbfa73e5a0eL },
+ { 0x2f8b7ecd027a4cd1L,0x114734b3c614011aL,0xe7a01db767677c68L,
+ 0x89d9be5e7e273f4fL } },
+ /* 11 << 84 */
+ { { 0xd225cb2e089808efL,0xf1f7a27dd59e4107L,0x53afc7618211b9c9L,
+ 0x0361bc67e6819159L },
+ { 0x2a865d0b7f071426L,0x6a3c1810e7072567L,0x3e3bca1e0d6bcabdL,
+ 0xa1b02bc1408591bcL } },
+ /* 12 << 84 */
+ { { 0xe0deee5931fba239L,0xf47424d398bd91d1L,0x0f8886f4071a3c1dL,
+ 0x3f7d41e8a819233bL },
+ { 0x708623c2cf6eb998L,0x86bb49af609a287fL,0x942bb24963c90762L,
+ 0x0ef6eea555a9654bL } },
+ /* 13 << 84 */
+ { { 0x5f6d2d7236f5defeL,0xfa9922dc56f99176L,0x6c8c5ecef78ce0c7L,
+ 0x7b44589dbe09b55eL },
+ { 0xe11b3bca9ea83770L,0xd7fa2c7f2ab71547L,0x2a3dd6fa2a1ddcc0L,
+ 0x09acb4305a7b7707L } },
+ /* 14 << 84 */
+ { { 0x4add4a2e649d4e57L,0xcd53a2b01917526eL,0xc526233020b44ac4L,
+ 0x4028746abaa2c31dL },
+ { 0x5131839064291d4cL,0xbf48f151ee5ad909L,0xcce57f597b185681L,
+ 0x7c3ac1b04854d442L } },
+ /* 15 << 84 */
+ { { 0x65587dc3c093c171L,0xae7acb2424f42b65L,0x5a338adb955996cbL,
+ 0xc8e656756051f91bL },
+ { 0x66711fba28b8d0b1L,0x15d74137b6c10a90L,0x70cdd7eb3a232a80L,
+ 0xc9e2f07f6191ed24L } },
+ /* 16 << 84 */
+ { { 0xa80d1db6f79588c0L,0xfa52fc69b55768ccL,0x0b4df1ae7f54438aL,
+ 0x0cadd1a7f9b46a4fL },
+ { 0xb40ea6b31803dd6fL,0x488e4fa555eaae35L,0x9f047d55382e4e16L,
+ 0xc9b5b7e02f6e0c98L } },
+ /* 17 << 84 */
+ { { 0x6b1bd2d395762649L,0xa9604ee7c7aea3f6L,0x3646ff276dc6f896L,
+ 0x9bf0e7f52860bad1L },
+ { 0x2d92c8217cb44b92L,0xa2f5ce63aea9c182L,0xd0a2afb19154a5fdL,
+ 0x482e474c95801da6L } },
+ /* 18 << 84 */
+ { { 0xc19972d0b611c24bL,0x1d468e6560a8f351L,0xeb7580697bcf6421L,
+ 0xec9dd0ee88fbc491L },
+ { 0x5b59d2bf956c2e32L,0x73dc6864dcddf94eL,0xfd5e2321bcee7665L,
+ 0xa7b4f8ef5e9a06c4L } },
+ /* 19 << 84 */
+ { { 0xfba918dd7280f855L,0xbbaac2608baec688L,0xa3b3f00f33400f42L,
+ 0x3d2dba2966f2e6e4L },
+ { 0xb6f71a9498509375L,0x8f33031fcea423ccL,0x009b8dd04807e6fbL,
+ 0x5163cfe55cdb954cL } },
+ /* 20 << 84 */
+ { { 0x03cc8f17cf41c6e8L,0xf1f03c2a037b925cL,0xc39c19cc66d2427cL,
+ 0x823d24ba7b6c18e4L },
+ { 0x32ef9013901f0b4fL,0x684360f1f8941c2eL,0x0ebaff522c28092eL,
+ 0x7891e4e3256c932fL } },
+ /* 21 << 84 */
+ { { 0x51264319ac445e3dL,0x553432e78ea74381L,0xe6eeaa6967e9c50aL,
+ 0x27ced28462e628c7L },
+ { 0x3f96d3757a4afa57L,0xde0a14c3e484c150L,0x364a24eb38bd9923L,
+ 0x1df18da0e5177422L } },
+ /* 22 << 84 */
+ { { 0x174e8f82d8d38a9bL,0x2e97c600e7de1391L,0xc5709850a1c175ddL,
+ 0x969041a032ae5035L },
+ { 0xcbfd533b76a2086bL,0xd6bba71bd7c2e8feL,0xb2d58ee6099dfb67L,
+ 0x3a8b342d064a85d9L } },
+ /* 23 << 84 */
+ { { 0x3bc07649522f9be3L,0x690c075bdf1f49a8L,0x80e1aee83854ec42L,
+ 0x2a7dbf4417689dc7L },
+ { 0xc004fc0e3faf4078L,0xb2f02e9edf11862cL,0xf10a5e0fa0a1b7b3L,
+ 0x30aca6238936ec80L } },
+ /* 24 << 84 */
+ { { 0xf83cbf0502f40d9aL,0x4681c4682c318a4dL,0x985756180e9c2674L,
+ 0xbe79d0461847092eL },
+ { 0xaf1e480a78bd01e0L,0x6dd359e472a51db9L,0x62ce3821e3afbab6L,
+ 0xc5cee5b617733199L } },
+ /* 25 << 84 */
+ { { 0xe08b30d46ffd9fbbL,0x6e5bc69936c610b7L,0xf343cff29ce262cfL,
+ 0xca2e4e3568b914c1L },
+ { 0x011d64c016de36c5L,0xe0b10fdd42e2b829L,0x789429816685aaf8L,
+ 0xe7511708230ede97L } },
+ /* 26 << 84 */
+ { { 0x671ed8fc3b922bf8L,0xe4d8c0a04c29b133L,0x87eb12393b6e99c4L,
+ 0xaff3974c8793bebaL },
+ { 0x037494052c18df9bL,0xc5c3a29391007139L,0x6a77234fe37a0b95L,
+ 0x02c29a21b661c96bL } },
+ /* 27 << 84 */
+ { { 0xc3aaf1d6141ecf61L,0x9195509e3bb22f53L,0x2959740422d51357L,
+ 0x1b083822537bed60L },
+ { 0xcd7d6e35e07289f0L,0x1f94c48c6dd86effL,0xc8bb1f82eb0f9cfaL,
+ 0x9ee0b7e61b2eb97dL } },
+ /* 28 << 84 */
+ { { 0x5a52fe2e34d74e31L,0xa352c3103bf79ab6L,0x97ff6c5aabfeeb8fL,
+ 0xbfbe8feff5c97305L },
+ { 0xd6081ce6a7904608L,0x1f812f3ac4fca249L,0x9b24bc9ab9e5e200L,
+ 0x91022c6738012ee8L } },
+ /* 29 << 84 */
+ { { 0xe83d9c5d30a713a1L,0x4876e3f084ef0f93L,0xc9777029c1fbf928L,
+ 0xef7a6bb3bce7d2a4L },
+ { 0xb8067228dfa2a659L,0xd5cd3398d877a48fL,0xbea4fd8f025d0f3fL,
+ 0xd67d2e352eae7c2bL } },
+ /* 30 << 84 */
+ { { 0x184de7d7cc5f4394L,0xb5551b5c4536e142L,0x2e89b212d34aa60aL,
+ 0x14a96feaf50051d5L },
+ { 0x4e21ef740d12bb0bL,0xc522f02060b9677eL,0x8b12e4672df7731dL,
+ 0x39f803827b326d31L } },
+ /* 31 << 84 */
+ { { 0xdfb8630c39024a94L,0xaacb96a897319452L,0xd68a3961eda3867cL,
+ 0x0c58e2b077c4ffcaL },
+ { 0x3d545d634da919faL,0xef79b69af15e2289L,0x54bc3d3d808bab10L,
+ 0xc8ab300745f82c37L } },
+ /* 32 << 84 */
+ { { 0xc12738b67c4a658aL,0xb3c4763940e72182L,0x3b77be468798e44fL,
+ 0xdc047df217a7f85fL },
+ { 0x2439d4c55e59d92dL,0xcedca475e8e64d8dL,0xa724cd0d87ca9b16L,
+ 0x35e4fd59a5540dfeL } },
+ /* 33 << 84 */
+ { { 0xf8c1ff18e4bcf6b1L,0x856d6285295018faL,0x433f665c3263c949L,
+ 0xa6a76dd6a1f21409L },
+ { 0x17d32334cc7b4f79L,0xa1d0312206720e4aL,0xadb6661d81d9bed5L,
+ 0xf0d6fb0211db15d1L } },
+ /* 34 << 84 */
+ { { 0x7fd11ad51fb747d2L,0xab50f9593033762bL,0x2a7e711bfbefaf5aL,
+ 0xc73932783fef2bbfL },
+ { 0xe29fa2440df6f9beL,0x9092757b71efd215L,0xee60e3114f3d6fd9L,
+ 0x338542d40acfb78bL } },
+ /* 35 << 84 */
+ { { 0x44a23f0838961a0fL,0x1426eade986987caL,0x36e6ee2e4a863cc6L,
+ 0x48059420628b8b79L },
+ { 0x30303ad87396e1deL,0x5c8bdc4838c5aad1L,0x3e40e11f5c8f5066L,
+ 0xabd6e7688d246bbdL } },
+ /* 36 << 84 */
+ { { 0x68aa40bb23330a01L,0xd23f5ee4c34eafa0L,0x3bbee3155de02c21L,
+ 0x18dd4397d1d8dd06L },
+ { 0x3ba1939a122d7b44L,0xe6d3b40aa33870d6L,0x8e620f701c4fe3f8L,
+ 0xf6bba1a5d3a50cbfL } },
+ /* 37 << 84 */
+ { { 0x4a78bde5cfc0aee0L,0x847edc46c08c50bdL,0xbaa2439cad63c9b2L,
+ 0xceb4a72810fc2acbL },
+ { 0xa419e40e26da033dL,0x6cc3889d03e02683L,0x1cd28559fdccf725L,
+ 0x0fd7e0f18d13d208L } },
+ /* 38 << 84 */
+ { { 0x01b9733b1f0df9d4L,0x8cc2c5f3a2b5e4f3L,0x43053bfa3a304fd4L,
+ 0x8e87665c0a9f1aa7L },
+ { 0x087f29ecd73dc965L,0x15ace4553e9023dbL,0x2370e3092bce28b4L,
+ 0xf9723442b6b1e84aL } },
+ /* 39 << 84 */
+ { { 0xbeee662eb72d9f26L,0xb19396def0e47109L,0x85b1fa73e13289d0L,
+ 0x436cf77e54e58e32L },
+ { 0x0ec833b3e990ef77L,0x7373e3ed1b11fc25L,0xbe0eda870fc332ceL,
+ 0xced049708d7ea856L } },
+ /* 40 << 84 */
+ { { 0xf85ff7857e977ca0L,0xb66ee8dadfdd5d2bL,0xf5e37950905af461L,
+ 0x587b9090966d487cL },
+ { 0x6a198a1b32ba0127L,0xa7720e07141615acL,0xa23f3499996ef2f2L,
+ 0xef5f64b4470bcb3dL } },
+ /* 41 << 84 */
+ { { 0xa526a96292b8c559L,0x0c14aac069740a0fL,0x0d41a9e3a6bdc0a5L,
+ 0x97d521069c48aef4L },
+ { 0xcf16bd303e7c253bL,0xcc834b1a47fdedc1L,0x7362c6e5373aab2eL,
+ 0x264ed85ec5f590ffL } },
+ /* 42 << 84 */
+ { { 0x7a46d9c066d41870L,0xa50c20b14787ba09L,0x185e7e51e3d44635L,
+ 0xb3b3e08031e2d8dcL },
+ { 0xbed1e558a179e9d9L,0x2daa3f7974a76781L,0x4372baf23a40864fL,
+ 0x46900c544fe75cb5L } },
+ /* 43 << 84 */
+ { { 0xb95f171ef76765d0L,0x4ad726d295c87502L,0x2ec769da4d7c99bdL,
+ 0x5e2ddd19c36cdfa8L },
+ { 0xc22117fca93e6deaL,0xe8a2583b93771123L,0xbe2f6089fa08a3a2L,
+ 0x4809d5ed8f0e1112L } },
+ /* 44 << 84 */
+ { { 0x3b414aa3da7a095eL,0x9049acf126f5aaddL,0x78d46a4d6be8b84aL,
+ 0xd66b1963b732b9b3L },
+ { 0x5c2ac2a0de6e9555L,0xcf52d098b5bd8770L,0x15a15fa60fd28921L,
+ 0x56ccb81e8b27536dL } },
+ /* 45 << 84 */
+ { { 0x0f0d8ab89f4ccbb8L,0xed5f44d2db221729L,0x4314198800bed10cL,
+ 0xc94348a41d735b8bL },
+ { 0x79f3e9c429ef8479L,0x4c13a4e3614c693fL,0x32c9af568e143a14L,
+ 0xbc517799e29ac5c4L } },
+ /* 46 << 84 */
+ { { 0x05e179922774856fL,0x6e52fb056c1bf55fL,0xaeda4225e4f19e16L,
+ 0x70f4728aaf5ccb26L },
+ { 0x5d2118d1b2947f22L,0xc827ea16281d6fb9L,0x8412328d8cf0eabdL,
+ 0x45ee9fb203ef9dcfL } },
+ /* 47 << 84 */
+ { { 0x8e700421bb937d63L,0xdf8ff2d5cc4b37a6L,0xa4c0d5b25ced7b68L,
+ 0x6537c1efc7308f59L },
+ { 0x25ce6a263b37f8e8L,0x170e9a9bdeebc6ceL,0xdd0379528728d72cL,
+ 0x445b0e55850154bcL } },
+ /* 48 << 84 */
+ { { 0x4b7d0e0683a7337bL,0x1e3416d4ffecf249L,0x24840eff66a2b71fL,
+ 0xd0d9a50ab37cc26dL },
+ { 0xe21981506fe28ef7L,0x3cc5ef1623324c7fL,0x220f3455769b5263L,
+ 0xe2ade2f1a10bf475L } },
+ /* 49 << 84 */
+ { { 0x28cd20fa458d3671L,0x1549722c2dc4847bL,0x6dd01e55591941e3L,
+ 0x0e6fbcea27128ccbL },
+ { 0xae1a1e6b3bef0262L,0xfa8c472c8f54e103L,0x7539c0a872c052ecL,
+ 0xd7b273695a3490e9L } },
+ /* 50 << 84 */
+ { { 0x143fe1f171684349L,0x36b4722e32e19b97L,0xdc05922790980affL,
+ 0x175c9c889e13d674L },
+ { 0xa7de5b226e6bfdb1L,0x5ea5b7b2bedb4b46L,0xd5570191d34a6e44L,
+ 0xfcf60d2ea24ff7e6L } },
+ /* 51 << 84 */
+ { { 0x614a392d677819e1L,0x7be74c7eaa5a29e8L,0xab50fece63c85f3fL,
+ 0xaca2e2a946cab337L },
+ { 0x7f700388122a6fe3L,0xdb69f703882a04a8L,0x9a77935dcf7aed57L,
+ 0xdf16207c8d91c86fL } },
+ /* 52 << 84 */
+ { { 0x2fca49ab63ed9998L,0xa3125c44a77ddf96L,0x05dd8a8624344072L,
+ 0xa023dda2fec3fb56L },
+ { 0x421b41fc0c743032L,0x4f2120c15e438639L,0xfb7cae51c83c1b07L,
+ 0xb2370caacac2171aL } },
+ /* 53 << 84 */
+ { { 0x2eb2d9626cc820fbL,0x59feee5cb85a44bfL,0x94620fca5b6598f0L,
+ 0x6b922cae7e314051L },
+ { 0xff8745ad106bed4eL,0x546e71f5dfa1e9abL,0x935c1e481ec29487L,
+ 0x9509216c4d936530L } },
+ /* 54 << 84 */
+ { { 0xc7ca306785c9a2dbL,0xd6ae51526be8606fL,0x09dbcae6e14c651dL,
+ 0xc9536e239bc32f96L },
+ { 0xa90535a934521b03L,0xf39c526c878756ffL,0x383172ec8aedf03cL,
+ 0x20a8075eefe0c034L } },
+ /* 55 << 84 */
+ { { 0xf22f9c6264026422L,0x8dd1078024b9d076L,0x944c742a3bef2950L,
+ 0x55b9502e88a2b00bL },
+ { 0xa59e14b486a09817L,0xa39dd3ac47bb4071L,0x55137f663be0592fL,
+ 0x07fcafd4c9e63f5bL } },
+ /* 56 << 84 */
+ { { 0x963652ee346eb226L,0x7dfab085ec2facb7L,0x273bf2b8691add26L,
+ 0x30d74540f2b46c44L },
+ { 0x05e8e73ef2c2d065L,0xff9b8a00d42eeac9L,0x2fcbd20597209d22L,
+ 0xeb740ffade14ea2cL } },
+ /* 57 << 84 */
+ { { 0xc71ff913a8aef518L,0x7bfc74bbfff4cfa2L,0x1716680cb6b36048L,
+ 0x121b2cce9ef79af1L },
+ { 0xbff3c836a01eb3d3L,0x50eb1c6a5f79077bL,0xa48c32d6a004bbcfL,
+ 0x47a593167d64f61dL } },
+ /* 58 << 84 */
+ { { 0x6068147f93102016L,0x12c5f65494d12576L,0xefb071a7c9bc6b91L,
+ 0x7c2da0c56e23ea95L },
+ { 0xf4fd45b6d4a1dd5dL,0x3e7ad9b69122b13cL,0x342ca118e6f57a48L,
+ 0x1c2e94a706f8288fL } },
+ /* 59 << 84 */
+ { { 0x99e68f075a97d231L,0x7c80de974d838758L,0xbce0f5d005872727L,
+ 0xbe5d95c219c4d016L },
+ { 0x921d5cb19c2492eeL,0x42192dc1404d6fb3L,0x4c84dcd132f988d3L,
+ 0xde26d61fa17b8e85L } },
+ /* 60 << 84 */
+ { { 0xc466dcb6137c7408L,0x9a38d7b636a266daL,0x7ef5cb0683bebf1bL,
+ 0xe5cdcbbf0fd014e3L },
+ { 0x30aa376df65965a0L,0x60fe88c2ebb3e95eL,0x33fd0b6166ee6f20L,
+ 0x8827dcdb3f41f0a0L } },
+ /* 61 << 84 */
+ { { 0xbf8a9d240c56c690L,0x40265dadddb7641dL,0x522b05bf3a6b662bL,
+ 0x466d1dfeb1478c9bL },
+ { 0xaa6169621484469bL,0x0db6054902df8f9fL,0xc37bca023cb8bf51L,
+ 0x5effe34621371ce8L } },
+ /* 62 << 84 */
+ { { 0xe8f65264ff112c32L,0x8a9c736d7b971fb2L,0xa4f194707b75080dL,
+ 0xfc3f2c5a8839c59bL },
+ { 0x1d6c777e5aeb49c2L,0xf3db034dda1addfeL,0xd76fee5a5535affcL,
+ 0x0853ac70b92251fdL } },
+ /* 63 << 84 */
+ { { 0x37e3d5948b2a29d5L,0x28f1f4574de00ddbL,0x8083c1b5f42c328bL,
+ 0xd8ef1d8fe493c73bL },
+ { 0x96fb626041dc61bdL,0xf74e8a9d27ee2f8aL,0x7c605a802c946a5dL,
+ 0xeed48d653839ccfdL } },
+ /* 64 << 84 */
+ { { 0x9894344f3a29467aL,0xde81e949c51eba6dL,0xdaea066ba5e5c2f2L,
+ 0x3fc8a61408c8c7b3L },
+ { 0x7adff88f06d0de9fL,0xbbc11cf53b75ce0aL,0x9fbb7accfbbc87d5L,
+ 0xa1458e267badfde2L } },
+ /* 0 << 91 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 91 */
+ { { 0x1cb43668e039c256L,0x5f26fb8b7c17fd5dL,0xeee426af79aa062bL,
+ 0x072002d0d78fbf04L },
+ { 0x4c9ca237e84fb7e3L,0xb401d8a10c82133dL,0xaaa525926d7e4181L,
+ 0xe943083373dbb152L } },
+ /* 2 << 91 */
+ { { 0xf92dda31be24319aL,0x03f7d28be095a8e7L,0xa52fe84098782185L,
+ 0x276ddafe29c24dbcL },
+ { 0x80cd54961d7a64ebL,0xe43608897f1dbe42L,0x2f81a8778438d2d5L,
+ 0x7e4d52a885169036L } },
+ /* 3 << 91 */
+ { { 0x19e3d5b11d59715dL,0xc7eaa762d788983eL,0xe5a730b0abf1f248L,
+ 0xfbab8084fae3fd83L },
+ { 0x65e50d2153765b2fL,0xbdd4e083fa127f3dL,0x9cf3c074397b1b10L,
+ 0x59f8090cb1b59fd3L } },
+ /* 4 << 91 */
+ { { 0x7b15fd9d615faa8fL,0x8fa1eb40968554edL,0x7bb4447e7aa44882L,
+ 0x2bb2d0d1029fff32L },
+ { 0x075e2a646caa6d2fL,0x8eb879de22e7351bL,0xbcd5624e9a506c62L,
+ 0x218eaef0a87e24dcL } },
+ /* 5 << 91 */
+ { { 0x37e5684744ddfa35L,0x9ccfc5c5dab3f747L,0x9ac1df3f1ee96cf4L,
+ 0x0c0571a13b480b8fL },
+ { 0x2fbeb3d54b3a7b3cL,0x35c036695dcdbb99L,0x52a0f5dcb2415b3aL,
+ 0xd57759b44413ed9aL } },
+ /* 6 << 91 */
+ { { 0x1fe647d83d30a2c5L,0x0857f77ef78a81dcL,0x11d5a334131a4a9bL,
+ 0xc0a94af929d393f5L },
+ { 0xbc3a5c0bdaa6ec1aL,0xba9fe49388d2d7edL,0xbb4335b4bb614797L,
+ 0x991c4d6872f83533L } },
+ /* 7 << 91 */
+ { { 0x53258c28d2f01cb3L,0x93d6eaa3d75db0b1L,0x419a2b0de87d0db4L,
+ 0xa1e48f03d8fe8493L },
+ { 0xf747faf6c508b23aL,0xf137571a35d53549L,0x9f5e58e2fcf9b838L,
+ 0xc7186ceea7fd3cf5L } },
+ /* 8 << 91 */
+ { { 0x77b868cee978a1d3L,0xe3a68b337ab92d04L,0x5102979487a5b862L,
+ 0x5f0606c33a61d41dL },
+ { 0x2814be276f9326f1L,0x2f521c14c6fe3c2eL,0x17464d7dacdf7351L,
+ 0x10f5f9d3777f7e44L } },
+ /* 9 << 91 */
+ { { 0xce8e616b269fb37dL,0xaaf738047de62de5L,0xaba111754fdd4153L,
+ 0x515759ba3770b49bL },
+ { 0x8b09ebf8aa423a61L,0x592245a1cd41fb92L,0x1cba8ec19b4c8936L,
+ 0xa87e91e3af36710eL } },
+ /* 10 << 91 */
+ { { 0x1fd84ce43d34a2e3L,0xee3759ceb43b5d61L,0x895bc78c619186c7L,
+ 0xf19c3809cbb9725aL },
+ { 0xc0be21aade744b1fL,0xa7d222b060f8056bL,0x74be6157b23efe11L,
+ 0x6fab2b4f0cd68253L } },
+ /* 11 << 91 */
+ { { 0xad33ea5f4bf1d725L,0x9c1d8ee24f6c950fL,0x544ee78aa377af06L,
+ 0x54f489bb94a113e1L },
+ { 0x8f11d634992fb7e8L,0x0169a7aaa2a44347L,0x1d49d4af95020e00L,
+ 0x95945722e08e120bL } },
+ /* 12 << 91 */
+ { { 0xb6e33878a4d32282L,0xe36e029d48020ae7L,0xe05847fb37a9b750L,
+ 0xf876812cb29e3819L },
+ { 0x84ad138ed23a17f0L,0x6d7b4480f0b3950eL,0xdfa8aef42fd67ae0L,
+ 0x8d3eea2452333af6L } },
+ /* 13 << 91 */
+ { { 0x0d052075b15d5accL,0xc6d9c79fbd815bc4L,0x8dcafd88dfa36cf2L,
+ 0x908ccbe238aa9070L },
+ { 0x638722c4ba35afceL,0x5a3da8b0fd6abf0bL,0x2dce252cc9c335c1L,
+ 0x84e7f0de65aa799bL } },
+ /* 14 << 91 */
+ { { 0x2101a522b99a72cbL,0x06de6e6787618016L,0x5ff8c7cde6f3653eL,
+ 0x0a821ab5c7a6754aL },
+ { 0x7e3fa52b7cb0b5a2L,0xa7fb121cc9048790L,0x1a72502006ce053aL,
+ 0xb490a31f04e929b0L } },
+ /* 15 << 91 */
+ { { 0xe17be47d62dd61adL,0x781a961c6be01371L,0x1063bfd3dae3cbbaL,
+ 0x356474067f73c9baL },
+ { 0xf50e957b2736a129L,0xa6313702ed13f256L,0x9436ee653a19fcc5L,
+ 0xcf2bdb29e7a4c8b6L } },
+ /* 16 << 91 */
+ { { 0xb06b1244c5f95cd8L,0xda8c8af0f4ab95f4L,0x1bae59c2b9e5836dL,
+ 0x07d51e7e3acffffcL },
+ { 0x01e15e6ac2ccbcdaL,0x3bc1923f8528c3e0L,0x43324577a49fead4L,
+ 0x61a1b8842aa7a711L } },
+ /* 17 << 91 */
+ { { 0xf9a86e08700230efL,0x0af585a1bd19adf8L,0x7645f361f55ad8f2L,
+ 0x6e67622346c3614cL },
+ { 0x23cb257c4e774d3fL,0x82a38513ac102d1bL,0x9bcddd887b126aa5L,
+ 0xe716998beefd3ee4L } },
+ /* 18 << 91 */
+ { { 0x4239d571fb167583L,0xdd011c78d16c8f8aL,0x271c289569a27519L,
+ 0x9ce0a3b7d2d64b6aL },
+ { 0x8c977289d5ec6738L,0xa3b49f9a8840ef6bL,0x808c14c99a453419L,
+ 0x5c00295b0cf0a2d5L } },
+ /* 19 << 91 */
+ { { 0x524414fb1d4bcc76L,0xb07691d2459a88f1L,0x77f43263f70d110fL,
+ 0x64ada5e0b7abf9f3L },
+ { 0xafd0f94e5b544cf5L,0xb4a13a15fd2713feL,0xb99b7d6e250c74f4L,
+ 0x097f2f7320324e45L } },
+ /* 20 << 91 */
+ { { 0x994b37d8affa8208L,0xc3c31b0bdc29aafcL,0x3da746517a3a607fL,
+ 0xd8e1b8c1fe6955d6L },
+ { 0x716e1815c8418682L,0x541d487f7dc91d97L,0x48a04669c6996982L,
+ 0xf39cab1583a6502eL } },
+ /* 21 << 91 */
+ { { 0x025801a0e68db055L,0xf3569758ba3338d5L,0xb0c8c0aaee2afa84L,
+ 0x4f6985d3fb6562d1L },
+ { 0x351f1f15132ed17aL,0x510ed0b4c04365feL,0xa3f98138e5b1f066L,
+ 0xbc9d95d632df03dcL } },
+ /* 22 << 91 */
+ { { 0xa83ccf6e19abd09eL,0x0b4097c14ff17edbL,0x58a5c478d64a06ceL,
+ 0x2ddcc3fd544a58fdL },
+ { 0xd449503d9e8153b8L,0x3324fd027774179bL,0xaf5d47c8dbd9120cL,
+ 0xeb86016234fa94dbL } },
+ /* 23 << 91 */
+ { { 0x5817bdd1972f07f4L,0xe5579e2ed27bbcebL,0x86847a1f5f11e5a6L,
+ 0xb39ed2557c3cf048L },
+ { 0xe1076417a2f62e55L,0x6b9ab38f1bcf82a2L,0x4bb7c3197aeb29f9L,
+ 0xf6d17da317227a46L } },
+ /* 24 << 91 */
+ { { 0xab53ddbd0f968c00L,0xa03da7ec000c880bL,0x7b2396246a9ad24dL,
+ 0x612c040101ec60d0L },
+ { 0x70d10493109f5df1L,0xfbda403080af7550L,0x30b93f95c6b9a9b3L,
+ 0x0c74ec71007d9418L } },
+ /* 25 << 91 */
+ { { 0x941755646edb951fL,0x5f4a9d787f22c282L,0xb7870895b38d1196L,
+ 0xbc593df3a228ce7cL },
+ { 0xc78c5bd46af3641aL,0x7802200b3d9b3dccL,0x0dc73f328be33304L,
+ 0x847ed87d61ffb79aL } },
+ /* 26 << 91 */
+ { { 0xf85c974e6d671192L,0x1e14100ade16f60fL,0x45cb0d5a95c38797L,
+ 0x18923bba9b022da4L },
+ { 0xef2be899bbe7e86eL,0x4a1510ee216067bfL,0xd98c815484d5ce3eL,
+ 0x1af777f0f92a2b90L } },
+ /* 27 << 91 */
+ { { 0x9fbcb4004ef65724L,0x3e04a4c93c0ca6feL,0xfb3e2cb555002994L,
+ 0x1f3a93c55363ecabL },
+ { 0x1fe00efe3923555bL,0x744bedd91e1751eaL,0x3fb2db596ab69357L,
+ 0x8dbd7365f5e6618bL } },
+ /* 28 << 91 */
+ { { 0x99d53099df1ea40eL,0xb3f24a0b57d61e64L,0xd088a198596eb812L,
+ 0x22c8361b5762940bL },
+ { 0x66f01f97f9c0d95cL,0x884611728e43cdaeL,0x11599a7fb72b15c3L,
+ 0x135a7536420d95ccL } },
+ /* 29 << 91 */
+ { { 0x2dcdf0f75f7ae2f6L,0x15fc6e1dd7fa6da2L,0x81ca829ad1d441b6L,
+ 0x84c10cf804a106b6L },
+ { 0xa9b26c95a73fbbd0L,0x7f24e0cb4d8f6ee8L,0x48b459371e25a043L,
+ 0xf8a74fca036f3dfeL } },
+ /* 30 << 91 */
+ { { 0x1ed46585c9f84296L,0x7fbaa8fb3bc278b0L,0xa8e96cd46c4fcbd0L,
+ 0x940a120273b60a5fL },
+ { 0x34aae12055a4aec8L,0x550e9a74dbd742f0L,0x794456d7228c68abL,
+ 0x492f8868a4e25ec6L } },
+ /* 31 << 91 */
+ { { 0x682915adb2d8f398L,0xf13b51cc5b84c953L,0xcda90ab85bb917d6L,
+ 0x4b6155604ea3dee1L },
+ { 0x578b4e850a52c1c8L,0xeab1a69520b75fc4L,0x60c14f3caa0bb3c6L,
+ 0x220f448ab8216094L } },
+ /* 32 << 91 */
+ { { 0x4fe7ee31b0e63d34L,0xf4600572a9e54fabL,0xc0493334d5e7b5a4L,
+ 0x8589fb9206d54831L },
+ { 0xaa70f5cc6583553aL,0x0879094ae25649e5L,0xcc90450710044652L,
+ 0xebb0696d02541c4fL } },
+ /* 33 << 91 */
+ { { 0x5a171fdeb9718710L,0x38f1bed8f374a9f5L,0xc8c582e1ba39bdc1L,
+ 0xfc457b0a908cc0ceL },
+ { 0x9a187fd4883841e2L,0x8ec25b3938725381L,0x2553ed0596f84395L,
+ 0x095c76616f6c6897L } },
+ /* 34 << 91 */
+ { { 0x917ac85c4bdc5610L,0xb2885fe4179eb301L,0x5fc655478b78bdccL,
+ 0x4a9fc893e59e4699L },
+ { 0xbb7ff0cd3ce299afL,0x195be9b3adf38b20L,0x6a929c87d38ddb8fL,
+ 0x55fcc99cb21a51b9L } },
+ /* 35 << 91 */
+ { { 0x2b695b4c721a4593L,0xed1e9a15768eaac2L,0xfb63d71c7489f914L,
+ 0xf98ba31c78118910L },
+ { 0x802913739b128eb4L,0x7801214ed448af4aL,0xdbd2e22b55418dd3L,
+ 0xeffb3c0dd3998242L } },
+ /* 36 << 91 */
+ { { 0xdfa6077cc7bf3827L,0xf2165bcb47f8238fL,0xfe37cf688564d554L,
+ 0xe5f825c40a81fb98L },
+ { 0x43cc4f67ffed4d6fL,0xbc609578b50a34b0L,0x8aa8fcf95041faf1L,
+ 0x5659f053651773b6L } },
+ /* 37 << 91 */
+ { { 0xe87582c36044d63bL,0xa60894090cdb0ca0L,0x8c993e0fbfb2bcf6L,
+ 0xfc64a71945985cfcL },
+ { 0x15c4da8083dbedbaL,0x804ae1122be67df7L,0xda4c9658a23defdeL,
+ 0x12002ddd5156e0d3L } },
+ /* 38 << 91 */
+ { { 0xe68eae895dd21b96L,0x8b99f28bcf44624dL,0x0ae008081ec8897aL,
+ 0xdd0a93036712f76eL },
+ { 0x962375224e233de4L,0x192445b12b36a8a5L,0xabf9ff74023993d9L,
+ 0x21f37bf42aad4a8fL } },
+ /* 39 << 91 */
+ { { 0x340a4349f8bd2bbdL,0x1d902cd94868195dL,0x3d27bbf1e5fdb6f1L,
+ 0x7a5ab088124f9f1cL },
+ { 0xc466ab06f7a09e03L,0x2f8a197731f2c123L,0xda355dc7041b6657L,
+ 0xcb840d128ece2a7cL } },
+ /* 40 << 91 */
+ { { 0xb600ad9f7db32675L,0x78fea13307a06f1bL,0x5d032269b31f6094L,
+ 0x07753ef583ec37aaL },
+ { 0x03485aed9c0bea78L,0x41bb3989bc3f4524L,0x09403761697f726dL,
+ 0x6109beb3df394820L } },
+ /* 41 << 91 */
+ { { 0x804111ea3b6d1145L,0xb6271ea9a8582654L,0x619615e624e66562L,
+ 0xa2554945d7b6ad9cL },
+ { 0xd9c4985e99bfe35fL,0x9770ccc07b51cdf6L,0x7c32701392881832L,
+ 0x8777d45f286b26d1L } },
+ /* 42 << 91 */
+ { { 0x9bbeda22d847999dL,0x03aa33b6c3525d32L,0x4b7b96d428a959a1L,
+ 0xbb3786e531e5d234L },
+ { 0xaeb5d3ce6961f247L,0x20aa85af02f93d3fL,0x9cd1ad3dd7a7ae4fL,
+ 0xbf6688f0781adaa8L } },
+ /* 43 << 91 */
+ { { 0xb1b40e867469ceadL,0x1904c524309fca48L,0x9b7312af4b54bbc7L,
+ 0xbe24bf8f593affa2L },
+ { 0xbe5e0790bd98764bL,0xa0f45f17a26e299eL,0x4af0d2c26b8fe4c7L,
+ 0xef170db18ae8a3e6L } },
+ /* 44 << 91 */
+ { { 0x0e8d61a029e0ccc1L,0xcd53e87e60ad36caL,0x328c6623c8173822L,
+ 0x7ee1767da496be55L },
+ { 0x89f13259648945afL,0x9e45a5fd25c8009cL,0xaf2febd91f61ab8cL,
+ 0x43f6bc868a275385L } },
+ /* 45 << 91 */
+ { { 0x87792348f2142e79L,0x17d89259c6e6238aL,0x7536d2f64a839d9bL,
+ 0x1f428fce76a1fbdcL },
+ { 0x1c1096010db06dfeL,0xbfc16bc150a3a3ccL,0xf9cbd9ec9b30f41bL,
+ 0x5b5da0d600138cceL } },
+ /* 46 << 91 */
+ { { 0xec1d0a4856ef96a7L,0xb47eb848982bf842L,0x66deae32ec3f700dL,
+ 0x4e43c42caa1181e0L },
+ { 0xa1d72a31d1a4aa2aL,0x440d4668c004f3ceL,0x0d6a2d3b45fe8a7aL,
+ 0x820e52e2fb128365L } },
+ /* 47 << 91 */
+ { { 0x29ac5fcf25e51b09L,0x180cd2bf2023d159L,0xa9892171a1ebf90eL,
+ 0xf97c4c877c132181L },
+ { 0x9f1dc724c03dbb7eL,0xae043765018cbbe4L,0xfb0b2a360767d153L,
+ 0xa8e2f4d6249cbaebL } },
+ /* 48 << 91 */
+ { { 0x172a5247d95ea168L,0x1758fada2970764aL,0xac803a511d978169L,
+ 0x299cfe2ede77e01bL },
+ { 0x652a1e17b0a98927L,0x2e26e1d120014495L,0x7ae0af9f7175b56aL,
+ 0xc2e22a80d64b9f95L } },
+ /* 49 << 91 */
+ { { 0x4d0ff9fbd90a060aL,0x496a27dbbaf38085L,0x32305401da776bcfL,
+ 0xb8cdcef6725f209eL },
+ { 0x61ba0f37436a0bbaL,0x263fa10876860049L,0x92beb98eda3542cfL,
+ 0xa2d4d14ad5849538L } },
+ /* 50 << 91 */
+ { { 0x989b9d6812e9a1bcL,0x61d9075c5f6e3268L,0x352c6aa999ace638L,
+ 0xde4e4a55920f43ffL },
+ { 0xe5e4144ad673c017L,0x667417ae6f6e05eaL,0x613416aedcd1bd56L,
+ 0x5eb3620186693711L } },
+ /* 51 << 91 */
+ { { 0x2d7bc5043a1aa914L,0x175a129976dc5975L,0xe900e0f23fc8125cL,
+ 0x569ef68c11198875L },
+ { 0x9012db6363a113b4L,0xe3bd3f5698835766L,0xa5c94a5276412deaL,
+ 0xad9e2a09aa735e5cL } },
+ /* 52 << 91 */
+ { { 0x405a984c508b65e9L,0xbde4a1d16df1a0d1L,0x1a9433a1dfba80daL,
+ 0xe9192ff99440ad2eL },
+ { 0x9f6496965099fe92L,0x25ddb65c0b27a54aL,0x178279ddc590da61L,
+ 0x5479a999fbde681aL } },
+ /* 53 << 91 */
+ { { 0xd0e84e05013fe162L,0xbe11dc92632d471bL,0xdf0b0c45fc0e089fL,
+ 0x04fb15b04c144025L },
+ { 0xa61d5fc213c99927L,0xa033e9e03de2eb35L,0xf8185d5cb8dacbb4L,
+ 0x9a88e2658644549dL } },
+ /* 54 << 91 */
+ { { 0xf717af6254671ff6L,0x4bd4241b5fa58603L,0x06fba40be67773c0L,
+ 0xc1d933d26a2847e9L },
+ { 0xf4f5acf3689e2c70L,0x92aab0e746bafd31L,0x798d76aa3473f6e5L,
+ 0xcc6641db93141934L } },
+ /* 55 << 91 */
+ { { 0xcae27757d31e535eL,0x04cc43b687c2ee11L,0x8d1f96752e029ffaL,
+ 0xc2150672e4cc7a2cL },
+ { 0x3b03c1e08d68b013L,0xa9d6816fedf298f3L,0x1bfbb529a2804464L,
+ 0x95a52fae5db22125L } },
+ /* 56 << 91 */
+ { { 0x55b321600e1cb64eL,0x004828f67e7fc9feL,0x13394b821bb0fb93L,
+ 0xb6293a2d35f1a920L },
+ { 0xde35ef21d145d2d9L,0xbe6225b3bb8fa603L,0x00fc8f6b32cf252dL,
+ 0xa28e52e6117cf8c2L } },
+ /* 57 << 91 */
+ { { 0x9d1dc89b4c371e6dL,0xcebe067536ef0f28L,0x5de05d09a4292f81L,
+ 0xa8303593353e3083L },
+ { 0xa1715b0a7e37a9bbL,0x8c56f61e2b8faec3L,0x5250743133c9b102L,
+ 0x0130cefca44431f0L } },
+ /* 58 << 91 */
+ { { 0x56039fa0bd865cfbL,0x4b03e578bc5f1dd7L,0x40edf2e4babe7224L,
+ 0xc752496d3a1988f6L },
+ { 0xd1572d3b564beb6bL,0x0db1d11039a1c608L,0x568d193416f60126L,
+ 0x05ae9668f354af33L } },
+ /* 59 << 91 */
+ { { 0x19de6d37c92544f2L,0xcc084353a35837d5L,0xcbb6869c1a514eceL,
+ 0xb633e7282e1d1066L },
+ { 0xf15dd69f936c581cL,0x96e7b8ce7439c4f9L,0x5e676f482e448a5bL,
+ 0xb2ca7d5bfd916bbbL } },
+ /* 60 << 91 */
+ { { 0xd55a2541f5024025L,0x47bc5769e4c2d937L,0x7d31b92a0362189fL,
+ 0x83f3086eef7816f9L },
+ { 0xf9f46d94b587579aL,0xec2d22d830e76c5fL,0x27d57461b000ffcfL,
+ 0xbb7e65f9364ffc2cL } },
+ /* 61 << 91 */
+ { { 0x7c7c94776652a220L,0x61618f89d696c981L,0x5021701d89effff3L,
+ 0xf2c8ff8e7c314163L },
+ { 0x2da413ad8efb4d3eL,0x937b5adfce176d95L,0x22867d342a67d51cL,
+ 0x262b9b1018eb3ac9L } },
+ /* 62 << 91 */
+ { { 0x4e314fe4c43ff28bL,0x764766276a664e7aL,0x3e90e40bb7a565c2L,
+ 0x8588993ac1acf831L },
+ { 0xd7b501d68f938829L,0x996627ee3edd7d4cL,0x37d44a6290cd34c7L,
+ 0xa8327499f3833e8dL } },
+ /* 63 << 91 */
+ { { 0x2e18917d4bf50353L,0x85dd726b556765fbL,0x54fe65d693d5ab66L,
+ 0x3ddbaced915c25feL },
+ { 0xa799d9a412f22e85L,0xe2a248676d06f6bcL,0xf4f1ee5643ca1637L,
+ 0xfda2828b61ece30aL } },
+ /* 64 << 91 */
+ { { 0x758c1a3ea2dee7a6L,0xdcde2f3c734b2284L,0xaba445d24eaba6adL,
+ 0x35aaf66876cee0a7L },
+ { 0x7e0b04a9e5aa049aL,0xe74083ad91103e84L,0xbeb183ce40afecc3L,
+ 0x6b89de9fea043f7aL } },
+ /* 0 << 98 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 98 */
+ { { 0x0e299d23fe67ba66L,0x9145076093cf2f34L,0xf45b5ea997fcf913L,
+ 0x5be008438bd7dddaL },
+ { 0x358c3e05d53ff04dL,0xbf7ccdc35de91ef7L,0xad684dbfb69ec1a0L,
+ 0x367e7cf2801fd997L } },
+ /* 2 << 98 */
+ { { 0x0ca1f3b7b0dc8595L,0x27de46089f1d9f2eL,0x1af3bf39badd82a7L,
+ 0x79356a7965862448L },
+ { 0xc0602345f5f9a052L,0x1a8b0f89139a42f9L,0xb53eee42844d40fcL,
+ 0x93b0bfe54e5b6368L } },
+ /* 3 << 98 */
+ { { 0x5434dd02c024789cL,0x90dca9ea41b57bfcL,0x8aa898e2243398dfL,
+ 0xf607c834894a94bbL },
+ { 0xbb07be97c2c99b76L,0x6576ba6718c29302L,0x3d79efcce703a88cL,
+ 0xf259ced7b6a0d106L } },
+ /* 4 << 98 */
+ { { 0x0f893a5dc8de610bL,0xe8c515fb67e223ceL,0x7774bfa64ead6dc5L,
+ 0x89d20f95925c728fL },
+ { 0x7a1e0966098583ceL,0xa2eedb9493f2a7d7L,0x1b2820974c304d4aL,
+ 0x0842e3dac077282dL } },
+ /* 5 << 98 */
+ { { 0xe4d972a33b9e2d7bL,0x7cc60b27c48218ffL,0x8fc7083884149d91L,
+ 0x5c04346f2f461eccL },
+ { 0xebe9fdf2614650a9L,0x5e35b537c1f666acL,0x645613d188babc83L,
+ 0x88cace3ac5e1c93eL } },
+ /* 6 << 98 */
+ { { 0x209ca3753de92e23L,0xccb03cc85fbbb6e3L,0xccb90f03d7b1487eL,
+ 0xfa9c2a38c710941fL },
+ { 0x756c38236724ceedL,0x3a902258192d0323L,0xb150e519ea5e038eL,
+ 0xdcba2865c7427591L } },
+ /* 7 << 98 */
+ { { 0xe549237f78890732L,0xc443bef953fcb4d9L,0x9884d8a6eb3480d6L,
+ 0x8a35b6a13048b186L },
+ { 0xb4e4471665e9a90aL,0x45bf380d653006c0L,0x8f3f820d4fe9ae3bL,
+ 0x244a35a0979a3b71L } },
+ /* 8 << 98 */
+ { { 0xa1010e9d74cd06ffL,0x9c17c7dfaca3eeacL,0x74c86cd38063aa2bL,
+ 0x8595c4b3734614ffL },
+ { 0xa3de00ca990f62ccL,0xd9bed213ca0c3be5L,0x7886078adf8ce9f5L,
+ 0xddb27ce35cd44444L } },
+ /* 9 << 98 */
+ { { 0xed374a6658926dddL,0x138b2d49908015b8L,0x886c6579de1f7ab8L,
+ 0x888b9aa0c3020b7aL },
+ { 0xd3ec034e3a96e355L,0xba65b0b8f30fbe9aL,0x064c8e50ff21367aL,
+ 0x1f508ea40b04b46eL } },
+ /* 10 << 98 */
+ { { 0x98561a49747c866cL,0xbbb1e5fe0518a062L,0x20ff4e8becdc3608L,
+ 0x7f55cded20184027L },
+ { 0x8d73ec95f38c85f0L,0x5b589fdf8bc3b8c3L,0xbe95dd980f12b66fL,
+ 0xf5bd1a090e338e01L } },
+ /* 11 << 98 */
+ { { 0x65163ae55e915918L,0x6158d6d986f8a46bL,0x8466b538eeebf99cL,
+ 0xca8761f6bca477efL },
+ { 0xaf3449c29ebbc601L,0xef3b0f41e0c3ae2fL,0xaa6c577d5de63752L,
+ 0xe916660164682a51L } },
+ /* 12 << 98 */
+ { { 0x5a3097befc15aa1eL,0x40d12548b54b0745L,0x5bad4706519a5f12L,
+ 0xed03f717a439dee6L },
+ { 0x0794bb6c4a02c499L,0xf725083dcffe71d2L,0x2cad75190f3adcafL,
+ 0x7f68ea1c43729310L } },
+ /* 13 << 98 */
+ { { 0xe747c8c7b7ffd977L,0xec104c3580761a22L,0x8395ebaf5a3ffb83L,
+ 0xfb3261f4e4b63db7L },
+ { 0x53544960d883e544L,0x13520d708cc2eeb8L,0x08f6337bd3d65f99L,
+ 0x83997db2781cf95bL } },
+ /* 14 << 98 */
+ { { 0xce6ff1060dbd2c01L,0x4f8eea6b1f9ce934L,0x546f7c4b0e993921L,
+ 0x6236a3245e753fc7L },
+ { 0x65a41f84a16022e9L,0x0c18d87843d1dbb2L,0x73c556402d4cef9cL,
+ 0xa042810870444c74L } },
+ /* 15 << 98 */
+ { { 0x68e4f15e9afdfb3cL,0x49a561435bdfb6dfL,0xa9bc1bd45f823d97L,
+ 0xbceb5970ea111c2aL },
+ { 0x366b455fb269bbc4L,0x7cd85e1ee9bc5d62L,0xc743c41c4f18b086L,
+ 0xa4b4099095294fb9L } },
+ /* 16 << 98 */
+ { { 0x9c7c581d26ee8382L,0xcf17dcc5359d638eL,0xee8273abb728ae3dL,
+ 0x1d112926f821f047L },
+ { 0x1149847750491a74L,0x687fa761fde0dfb9L,0x2c2580227ea435abL,
+ 0x6b8bdb9491ce7e3fL } },
+ /* 17 << 98 */
+ { { 0x4c5b5dc93bf834aaL,0x043718194f6c7e4bL,0xc284e00a3736bcadL,
+ 0x0d88111821ae8f8dL },
+ { 0xf9cf0f82f48c8e33L,0xa11fd075a1bf40dbL,0xdceab0dedc2733e5L,
+ 0xc560a8b58e986bd7L } },
+ /* 18 << 98 */
+ { { 0x48dd1fe23929d097L,0x3885b29092f188f1L,0x0f2ae613da6fcdacL,
+ 0x9054303eb662a46cL },
+ { 0xb6871e440738042aL,0x98e6a977bdaf6449L,0xd8bc0650d1c9df1bL,
+ 0xef3d645136e098f9L } },
+ /* 19 << 98 */
+ { { 0x03fbae82b6d72d28L,0x77ca9db1f5d84080L,0x8a112cffa58efc1cL,
+ 0x518d761cc564cb4aL },
+ { 0x69b5740ef0d1b5ceL,0x717039cce9eb1785L,0x3fe29f9022f53382L,
+ 0x8e54ba566bc7c95cL } },
+ /* 20 << 98 */
+ { { 0x9c806d8af7f91d0fL,0x3b61b0f1a82a5728L,0x4640032d94d76754L,
+ 0x273eb5de47d834c6L },
+ { 0x2988abf77b4e4d53L,0xb7ce66bfde401777L,0x9fba6b32715071b3L,
+ 0x82413c24ad3a1a98L } },
+ /* 21 << 98 */
+ { { 0x5b7fc8c4e0e8ad93L,0xb5679aee5fab868dL,0xb1f9d2fa2b3946f3L,
+ 0x458897dc5685b50aL },
+ { 0x1e98c93089d0caf3L,0x39564c5f78642e92L,0x1b77729a0dbdaf18L,
+ 0xf9170722579e82e6L } },
+ /* 22 << 98 */
+ { { 0x680c0317e4515fa5L,0xf85cff84fb0c790fL,0xc7a82aab6d2e0765L,
+ 0x7446bca935c82b32L },
+ { 0x5de607aa6d63184fL,0x7c1a46a8262803a6L,0xd218313daebe8035L,
+ 0x92113ffdc73c51f8L } },
+ /* 23 << 98 */
+ { { 0x4b38e08312e7e46cL,0x69d0a37a56126bd5L,0xfb3f324b73c07e04L,
+ 0xa0c22f678fda7267L },
+ { 0x8f2c00514d2c7d8fL,0xbc45ced3cbe2cae5L,0xe1c6cf07a8f0f277L,
+ 0xbc3923121eb99a98L } },
+ /* 24 << 98 */
+ { { 0x75537b7e3cc8ac85L,0x8d725f57dd02753bL,0xfd05ff64b737df2fL,
+ 0x55fe8712f6d2531dL },
+ { 0x57ce04a96ab6b01cL,0x69a02a897cd93724L,0x4f82ac35cf86699bL,
+ 0x8242d3ad9cb4b232L } },
+ /* 25 << 98 */
+ { { 0x713d0f65d62105e5L,0xbb222bfa2d29be61L,0xf2f9a79e6cfbef09L,
+ 0xfc24d8d3d5d6782fL },
+ { 0x5db77085d4129967L,0xdb81c3ccdc3c2a43L,0x9d655fc005d8d9a3L,
+ 0x3f5d057a54298026L } },
+ /* 26 << 98 */
+ { { 0x1157f56d88c54694L,0xb26baba59b09573eL,0x2cab03b022adffd1L,
+ 0x60a412c8dd69f383L },
+ { 0xed76e98b54b25039L,0xd4ee67d3687e714dL,0x877396487b00b594L,
+ 0xce419775c9ef709bL } },
+ /* 27 << 98 */
+ { { 0x40f76f851c203a40L,0x30d352d6eafd8f91L,0xaf196d3d95578dd2L,
+ 0xea4bb3d777cc3f3dL },
+ { 0x42a5bd03b98e782bL,0xac958c400624920dL,0xb838134cfc56fcc8L,
+ 0x86ec4ccf89572e5eL } },
+ /* 28 << 98 */
+ { { 0x69c435269be47be0L,0x323b7dd8cb28fea1L,0xfa5538ba3a6c67e5L,
+ 0xef921d701d378e46L },
+ { 0xf92961fc3c4b880eL,0x3f6f914e98940a67L,0xa990eb0afef0ff39L,
+ 0xa6c2920ff0eeff9cL } },
+ /* 29 << 98 */
+ { { 0xca80416651b8d9a3L,0x42531bc90ffb0db1L,0x72ce4718aa82e7ceL,
+ 0x6e199913df574741L },
+ { 0xd5f1b13dd5d36946L,0x8255dc65f68f0194L,0xdc9df4cd8710d230L,
+ 0x3453c20f138c1988L } },
+ /* 30 << 98 */
+ { { 0x9af98dc089a6ef01L,0x4dbcc3f09857df85L,0x348056015c1ad924L,
+ 0x40448da5d0493046L },
+ { 0xf629926d4ee343e2L,0x6343f1bd90e8a301L,0xefc9349140815b3fL,
+ 0xf882a423de8f66fbL } },
+ /* 31 << 98 */
+ { { 0x3a12d5f4e7db9f57L,0x7dfba38a3c384c27L,0x7a904bfd6fc660b1L,
+ 0xeb6c5db32773b21cL },
+ { 0xc350ee661cdfe049L,0x9baac0ce44540f29L,0xbc57b6aba5ec6aadL,
+ 0x167ce8c30a7c1baaL } },
+ /* 32 << 98 */
+ { { 0xb23a03a553fb2b56L,0x6ce141e74e057f78L,0x796525c389e490d9L,
+ 0x0bc95725a31a7e75L },
+ { 0x1ec567911220fd06L,0x716e3a3c408b0bd6L,0x31cd6bf7e8ebeba9L,
+ 0xa7326ca6bee6b670L } },
+ /* 33 << 98 */
+ { { 0x3d9f851ccd090c43L,0x561e8f13f12c3988L,0x50490b6a904b7be4L,
+ 0x61690ce10410737bL },
+ { 0x299e9a370f009052L,0x258758f0f026092eL,0x9fa255f3fdfcdc0fL,
+ 0xdbc9fb1fc0e1bcd2L } },
+ /* 34 << 98 */
+ { { 0x35f9dd6e24651840L,0xdca45a84a5c59abcL,0x103d396fecca4938L,
+ 0x4532da0ab97b3f29L },
+ { 0xc4135ea51999a6bfL,0x3aa9505a5e6bf2eeL,0xf77cef063f5be093L,
+ 0x97d1a0f8a943152eL } },
+ /* 35 << 98 */
+ { { 0x2cb0ebba2e1c21ddL,0xf41b29fc2c6797c4L,0xc6e17321b300101fL,
+ 0x4422b0e9d0d79a89L },
+ { 0x49e4901c92f1bfc4L,0x06ab1f8fe1e10ed9L,0x84d35577db2926b8L,
+ 0xca349d39356e8ec2L } },
+ /* 36 << 98 */
+ { { 0x70b63d32343bf1a9L,0x8fd3bd2837d1a6b1L,0x0454879c316865b4L,
+ 0xee959ff6c458efa2L },
+ { 0x0461dcf89706dc3fL,0x737db0e2164e4b2eL,0x092626802f8843c8L,
+ 0x54498bbc7745e6f6L } },
+ /* 37 << 98 */
+ { { 0x359473faa29e24afL,0xfcc3c45470aa87a1L,0xfd2c4bf500573aceL,
+ 0xb65b514e28dd1965L },
+ { 0xe46ae7cf2193e393L,0x60e9a4e1f5444d97L,0xe7594e9600ff38edL,
+ 0x43d84d2f0a0e0f02L } },
+ /* 38 << 98 */
+ { { 0x8b6db141ee398a21L,0xb88a56aee3bcc5beL,0x0a1aa52f373460eaL,
+ 0x20da1a56160bb19bL },
+ { 0xfb54999d65bf0384L,0x71a14d245d5a180eL,0xbc44db7b21737b04L,
+ 0xd84fcb1801dd8e92L } },
+ /* 39 << 98 */
+ { { 0x80de937bfa44b479L,0x535054995c98fd4fL,0x1edb12ab28f08727L,
+ 0x4c58b582a5f3ef53L },
+ { 0xbfb236d88327f246L,0xc3a3bfaa4d7df320L,0xecd96c59b96024f2L,
+ 0xfc293a537f4e0433L } },
+ /* 40 << 98 */
+ { { 0x5341352b5acf6e10L,0xc50343fdafe652c3L,0x4af3792d18577a7fL,
+ 0xe1a4c617af16823dL },
+ { 0x9b26d0cd33425d0aL,0x306399ed9b7bc47fL,0x2a792f33706bb20bL,
+ 0x3121961498111055L } },
+ /* 41 << 98 */
+ { { 0x864ec06487f5d28bL,0x11392d91962277fdL,0xb5aa7942bb6aed5fL,
+ 0x080094dc47e799d9L },
+ { 0x4afa588c208ba19bL,0xd3e7570f8512f284L,0xcbae64e602f5799aL,
+ 0xdeebe7ef514b9492L } },
+ /* 42 << 98 */
+ { { 0x30300f98e5c298ffL,0x17f561be3678361fL,0xf52ff31298cb9a16L,
+ 0x6233c3bc5562d490L },
+ { 0x7bfa15a192e3a2cbL,0x961bcfd1e6365119L,0x3bdd29bf2c8c53b1L,
+ 0x739704df822844baL } },
+ /* 43 << 98 */
+ { { 0x7dacfb587e7b754bL,0x23360791a806c9b9L,0xe7eb88c923504452L,
+ 0x2983e996852c1783L },
+ { 0xdd4ae529958d881dL,0x026bae03262c7b3cL,0x3a6f9193960b52d1L,
+ 0xd0980f9092696cfbL } },
+ /* 44 << 98 */
+ { { 0x4c1f428cd5f30851L,0x94dfed272a4f6630L,0x4df53772fc5d48a4L,
+ 0xdd2d5a2f933260ceL },
+ { 0x574115bdd44cc7a5L,0x4ba6b20dbd12533aL,0x30e93cb8243057c9L,
+ 0x794c486a14de320eL } },
+ /* 45 << 98 */
+ { { 0xe925d4cef21496e4L,0xf951d198ec696331L,0x9810e2de3e8d812fL,
+ 0xd0a47259389294abL },
+ { 0x513ba2b50e3bab66L,0x462caff5abad306fL,0xe2dc6d59af04c49eL,
+ 0x1aeb8750e0b84b0bL } },
+ /* 46 << 98 */
+ { { 0xc034f12f2f7d0ca2L,0x6d2e8128e06acf2fL,0x801f4f8321facc2fL,
+ 0xa1170c03f40ef607L },
+ { 0xfe0a1d4f7805a99cL,0xbde56a36cc26aba5L,0x5b1629d035531f40L,
+ 0xac212c2b9afa6108L } },
+ /* 47 << 98 */
+ { { 0x30a06bf315697be5L,0x6f0545dc2c63c7c1L,0x5d8cb8427ccdadafL,
+ 0xd52e379bac7015bbL },
+ { 0xc4f56147f462c23eL,0xd44a429846bc24b0L,0xbc73d23ae2856d4fL,
+ 0x61cedd8c0832bcdfL } },
+ /* 48 << 98 */
+ { { 0x6095355699f241d7L,0xee4adbd7001a349dL,0x0b35bf6aaa89e491L,
+ 0x7f0076f4136f7546L },
+ { 0xd19a18ba9264da3dL,0x6eb2d2cd62a7a28bL,0xcdba941f8761c971L,
+ 0x1550518ba3be4a5dL } },
+ /* 49 << 98 */
+ { { 0xd0e8e2f057d0b70cL,0xeea8612ecd133ba3L,0x814670f044416aecL,
+ 0x424db6c330775061L },
+ { 0xd96039d116213fd1L,0xc61e7fa518a3478fL,0xa805bdcccb0c5021L,
+ 0xbdd6f3a80cc616ddL } },
+ /* 50 << 98 */
+ { { 0x060096675d97f7e2L,0x31db0fc1af0bf4b6L,0x23680ed45491627aL,
+ 0xb99a3c667d741fb1L },
+ { 0xe9bb5f5536b1ff92L,0x29738577512b388dL,0xdb8a2ce750fcf263L,
+ 0x385346d46c4f7b47L } },
+ /* 51 << 98 */
+ { { 0xbe86c5ef31631f9eL,0xbf91da2103a57a29L,0xc3b1f7967b23f821L,
+ 0x0f7d00d2770db354L },
+ { 0x8ffc6c3bd8fe79daL,0xcc5e8c40d525c996L,0x4640991dcfff632aL,
+ 0x64d97e8c67112528L } },
+ /* 52 << 98 */
+ { { 0xc232d97302f1cd1eL,0xce87eacb1dd212a4L,0x6e4c8c73e69802f7L,
+ 0x12ef02901fffddbdL },
+ { 0x941ec74e1bcea6e2L,0xd0b540243cb92cbbL,0x809fb9d47e8f9d05L,
+ 0x3bf16159f2992aaeL } },
+ /* 53 << 98 */
+ { { 0xad40f279f8a7a838L,0x11aea63105615660L,0xbf52e6f1a01f6fa1L,
+ 0xef0469953dc2aec9L },
+ { 0x785dbec9d8080711L,0xe1aec60a9fdedf76L,0xece797b5fa21c126L,
+ 0xc66e898f05e52732L } },
+ /* 54 << 98 */
+ { { 0x39bb69c408811fdbL,0x8bfe1ef82fc7f082L,0xc8e7a393174f4138L,
+ 0xfba8ad1dd58d1f98L },
+ { 0xbc21d0cebfd2fd5bL,0x0b839a826ee60d61L,0xaacf7658afd22253L,
+ 0xb526bed8aae396b3L } },
+ /* 55 << 98 */
+ { { 0xccc1bbc238564464L,0x9e3ff9478c45bc73L,0xcde9bca358188a78L,
+ 0x138b8ee0d73bf8f7L },
+ { 0x5c7e234c4123c489L,0x66e69368fa643297L,0x0629eeee39a15fa3L,
+ 0x95fab881a9e2a927L } },
+ /* 56 << 98 */
+ { { 0xb2497007eafbb1e1L,0xd75c9ce6e75b7a93L,0x3558352defb68d78L,
+ 0xa2f26699223f6396L },
+ { 0xeb911ecfe469b17aL,0x62545779e72d3ec2L,0x8ea47de782cb113fL,
+ 0xebe4b0864e1fa98dL } },
+ /* 57 << 98 */
+ { { 0xec2d5ed78cdfedb1L,0xa535c077fe211a74L,0x9678109b11d244c5L,
+ 0xf17c8bfbbe299a76L },
+ { 0xb651412efb11fbc4L,0xea0b548294ab3f65L,0xd8dffd950cf78243L,
+ 0x2e719e57ce0361d4L } },
+ /* 58 << 98 */
+ { { 0x9007f085304ddc5bL,0x095e8c6d4daba2eaL,0x5a33cdb43f9d28a9L,
+ 0x85b95cd8e2283003L },
+ { 0xbcd6c819b9744733L,0x29c5f538fc7f5783L,0x6c49b2fad59038e4L,
+ 0x68349cc13bbe1018L } },
+ /* 59 << 98 */
+ { { 0xcc490c1d21830ee5L,0x36f9c4eee9bfa297L,0x58fd729448de1a94L,
+ 0xaadb13a84e8f2cdcL },
+ { 0x515eaaa081313dbaL,0xc76bb468c2152dd8L,0x357f8d75a653dbf8L,
+ 0xe4d8c4d1b14ac143L } },
+ /* 60 << 98 */
+ { { 0xbdb8e675b055cb40L,0x898f8e7b977b5167L,0xecc65651b82fb863L,
+ 0x565448146d88f01fL },
+ { 0xb0928e95263a75a9L,0xcfb6836f1a22fcdaL,0x651d14db3f3bd37cL,
+ 0x1d3837fbb6ad4664L } },
+ /* 61 << 98 */
+ { { 0x7c5fb538ff4f94abL,0x7243c7126d7fb8f2L,0xef13d60ca85c5287L,
+ 0x18cfb7c74bb8dd1bL },
+ { 0x82f9bfe672908219L,0x35c4592b9d5144abL,0x52734f379cf4b42fL,
+ 0x6bac55e78c60ddc4L } },
+ /* 62 << 98 */
+ { { 0xb5cd811e94dea0f6L,0x259ecae4e18cc1a3L,0x6a0e836e15e660f8L,
+ 0x6c639ea60e02bff2L },
+ { 0x8721b8cb7e1026fdL,0x9e73b50b63261942L,0xb8c7097477f01da3L,
+ 0x1839e6a68268f57fL } },
+ /* 63 << 98 */
+ { { 0x571b94155150b805L,0x1892389ef92c7097L,0x8d69c18e4a084b95L,
+ 0x7014c512be5b495cL },
+ { 0x4780db361b07523cL,0x2f6219ce2c1c64faL,0xc38b81b0602c105aL,
+ 0xab4f4f205dc8e360L } },
+ /* 64 << 98 */
+ { { 0x20d3c982cf7d62d2L,0x1f36e29d23ba8150L,0x48ae0bf092763f9eL,
+ 0x7a527e6b1d3a7007L },
+ { 0xb4a89097581a85e3L,0x1f1a520fdc158be5L,0xf98db37d167d726eL,
+ 0x8802786e1113e862L } },
+ /* 0 << 105 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 105 */
+ { { 0xefb2149e36f09ab0L,0x03f163ca4a10bb5bL,0xd029704506e20998L,
+ 0x56f0af001b5a3babL },
+ { 0x7af4cfec70880e0dL,0x7332a66fbe3d913fL,0x32e6c84a7eceb4bdL,
+ 0xedc4a79a9c228f55L } },
+ /* 2 << 105 */
+ { { 0xc37c7dd0c55c4496L,0xa6a9635725bbabd2L,0x5b7e63f2add7f363L,
+ 0x9dce37822e73f1dfL },
+ { 0xe1e5a16ab2b91f71L,0xe44898235ba0163cL,0xf2759c32f6e515adL,
+ 0xa5e2f1f88615eecfL } },
+ /* 3 << 105 */
+ { { 0x74519be7abded551L,0x03d358b8c8b74410L,0x4d00b10b0e10d9a9L,
+ 0x6392b0b128da52b7L },
+ { 0x6744a2980b75c904L,0xc305b0aea8f7f96cL,0x042e421d182cf932L,
+ 0xf6fc5d509e4636caL } },
+ /* 4 << 105 */
+ { { 0x795847c9d64cc78cL,0x6c50621b9b6cb27bL,0x07099bf8df8022abL,
+ 0x48f862ebc04eda1dL },
+ { 0xd12732ede1603c16L,0x19a80e0f5c9a9450L,0xe2257f54b429b4fcL,
+ 0x66d3b2c645460515L } },
+ /* 5 << 105 */
+ { { 0x6ca4f87e822e37beL,0x73f237b4253bda4eL,0xf747f3a241190aebL,
+ 0xf06fa36f804cf284L },
+ { 0x0a6bbb6efc621c12L,0x5d624b6440b80ec6L,0x4b0724257ba556f3L,
+ 0x7fa0c3543e2d20a8L } },
+ /* 6 << 105 */
+ { { 0xe921fa31e3229d41L,0xa929c65294531bd4L,0x84156027a6d38209L,
+ 0xf3d69f736bdb97bdL },
+ { 0x8906d19a16833631L,0x68a34c2e03d51be3L,0xcb59583b0e511cd8L,
+ 0x99ce6bfdfdc132a8L } },
+ /* 7 << 105 */
+ { { 0x3facdaaaffcdb463L,0x658bbc1a34a38b08L,0x12a801f8f1a9078dL,
+ 0x1567bcf96ab855deL },
+ { 0xe08498e03572359bL,0xcf0353e58659e68bL,0xbb86e9c87d23807cL,
+ 0xbc08728d2198e8a2L } },
+ /* 8 << 105 */
+ { { 0x8de2b7bc453cadd6L,0x203900a7bc0bc1f8L,0xbcd86e47a6abd3afL,
+ 0x911cac128502effbL },
+ { 0x2d550242ec965469L,0x0e9f769229e0017eL,0x633f078f65979885L,
+ 0xfb87d4494cf751efL } },
+ /* 9 << 105 */
+ { { 0xe1790e4bfc25419aL,0x364672034bff3cfdL,0xc8db638625b6e83fL,
+ 0x6cc69f236cad6fd2L },
+ { 0x0219e45a6bc68bb9L,0xe43d79b6297f7334L,0x7d445368465dc97cL,
+ 0x4b9eea322a0b949aL } },
+ /* 10 << 105 */
+ { { 0x1b96c6ba6102d021L,0xeaafac782f4461eaL,0xd4b85c41c49f19a8L,
+ 0x275c28e4cf538875L },
+ { 0x35451a9ddd2e54e0L,0x6991adb50605618bL,0x5b8b4bcd7b36cd24L,
+ 0x372a4f8c56f37216L } },
+ /* 11 << 105 */
+ { { 0xc890bd73a6a5da60L,0x6f083da0dc4c9ff0L,0xf4e14d94f0536e57L,
+ 0xf9ee1edaaaec8243L },
+ { 0x571241ec8bdcf8e7L,0xa5db82710b041e26L,0x9a0b9a99e3fff040L,
+ 0xcaaf21dd7c271202L } },
+ /* 12 << 105 */
+ { { 0xb4e2b2e14f0dd2e8L,0xe77e7c4f0a377ac7L,0x69202c3f0d7a2198L,
+ 0xf759b7ff28200eb8L },
+ { 0xc87526eddcfe314eL,0xeb84c52453d5cf99L,0xb1b52ace515138b6L,
+ 0x5aa7ff8c23fca3f4L } },
+ /* 13 << 105 */
+ { { 0xff0b13c3b9791a26L,0x960022dacdd58b16L,0xdbd55c9257aad2deL,
+ 0x3baaaaa3f30fe619L },
+ { 0x9a4b23460d881efdL,0x506416c046325e2aL,0x91381e76035c18d4L,
+ 0xb3bb68bef27817b0L } },
+ /* 14 << 105 */
+ { { 0x15bfb8bf5116f937L,0x7c64a586c1268943L,0x71e25cc38419a2c8L,
+ 0x9fd6b0c48335f463L },
+ { 0x4bf0ba3ce8ee0e0eL,0x6f6fba60298c21faL,0x57d57b39ae66bee0L,
+ 0x292d513022672544L } },
+ /* 15 << 105 */
+ { { 0xf451105dbab093b3L,0x012f59b902839986L,0x8a9158023474a89cL,
+ 0x048c919c2de03e97L },
+ { 0xc476a2b591071cd5L,0x791ed89a034970a5L,0x89bd9042e1b7994bL,
+ 0x8eaf5179a1057ffdL } },
+ /* 16 << 105 */
+ { { 0x6066e2a2d551ee10L,0x87a8f1d8727e09a6L,0x00d08bab2c01148dL,
+ 0x6da8e4f1424f33feL },
+ { 0x466d17f0cf9a4e71L,0xff5020103bf5cb19L,0xdccf97d8d062ecc0L,
+ 0x80c0d9af81d80ac4L } },
+ /* 17 << 105 */
+ { { 0xe87771d8033f2876L,0xb0186ec67d5cc3dbL,0x58e8bb803bc9bc1dL,
+ 0x4d1395cc6f6ef60eL },
+ { 0xa73c62d6186244a0L,0x918e5f23110a5b53L,0xed4878ca741b7eabL,
+ 0x3038d71adbe03e51L } },
+ /* 18 << 105 */
+ { { 0x840204b7a93c3246L,0x21ab6069a0b9b4cdL,0xf5fa6e2bb1d64218L,
+ 0x1de6ad0ef3d56191L },
+ { 0x570aaa88ff1929c7L,0xc6df4c6b640e87b5L,0xde8a74f2c65f0cccL,
+ 0x8b972fd5e6f6cc01L } },
+ /* 19 << 105 */
+ { { 0x3fff36b60b846531L,0xba7e45e610a5e475L,0x84a1d10e4145b6c5L,
+ 0xf1f7f91a5e046d9dL },
+ { 0x0317a69244de90d7L,0x951a1d4af199c15eL,0x91f78046c9d73debL,
+ 0x74c82828fab8224fL } },
+ /* 20 << 105 */
+ { { 0xaa6778fce7560b90L,0xb4073e61a7e824ceL,0xff0d693cd642eba8L,
+ 0x7ce2e57a5dccef38L },
+ { 0x89c2c7891df1ad46L,0x83a06922098346fdL,0x2d715d72da2fc177L,
+ 0x7b6dd71d85b6cf1dL } },
+ /* 21 << 105 */
+ { { 0xc60a6d0a73fa9cb0L,0xedd3992e328bf5a9L,0xc380ddd0832c8c82L,
+ 0xd182d410a2a0bf50L },
+ { 0x7d9d7438d9a528dbL,0xe8b1a0e9caf53994L,0xddd6e5fe0e19987cL,
+ 0xacb8df03190b059dL } },
+ /* 22 << 105 */
+ { { 0x53703a328300129fL,0x1f63766268c43bfdL,0xbcbd191300e54051L,
+ 0x812fcc627bf5a8c5L },
+ { 0x3f969d5f29fb85daL,0x72f4e00a694759e8L,0x426b6e52790726b7L,
+ 0x617bbc873bdbb209L } },
+ /* 23 << 105 */
+ { { 0x511f8bb997aee317L,0x812a4096e81536a8L,0x137dfe593ac09b9bL,
+ 0x0682238fba8c9a7aL },
+ { 0x7072ead6aeccb4bdL,0x6a34e9aa692ba633L,0xc82eaec26fff9d33L,
+ 0xfb7535121d4d2b62L } },
+ /* 24 << 105 */
+ { { 0x1a0445ff1d7aadabL,0x65d38260d5f6a67cL,0x6e62fb0891cfb26fL,
+ 0xef1e0fa55c7d91d6L },
+ { 0x47e7c7ba33db72cdL,0x017cbc09fa7c74b2L,0x3c931590f50a503cL,
+ 0xcac54f60616baa42L } },
+ /* 25 << 105 */
+ { { 0x9b6cd380b2369f0fL,0x97d3a70d23c76151L,0x5f9dd6fc9862a9c6L,
+ 0x044c4ab212312f51L },
+ { 0x035ea0fd834a2ddcL,0x49e6b862cc7b826dL,0xb03d688362fce490L,
+ 0x62f2497ab37e36e9L } },
+ /* 26 << 105 */
+ { { 0x04b005b6c6458293L,0x36bb5276e8d10af7L,0xacf2dc138ee617b8L,
+ 0x470d2d35b004b3d4L },
+ { 0x06790832feeb1b77L,0x2bb75c3985657f9cL,0xd70bd4edc0f60004L,
+ 0xfe797ecc219b018bL } },
+ /* 27 << 105 */
+ { { 0x9b5bec2a753aebccL,0xdaf9f3dcc939eca5L,0xd6bc6833d095ad09L,
+ 0x98abdd51daa4d2fcL },
+ { 0xd9840a318d168be5L,0xcf7c10e02325a23cL,0xa5c02aa07e6ecfafL,
+ 0x2462e7e6b5bfdf18L } },
+ /* 28 << 105 */
+ { { 0xab2d8a8ba0cc3f12L,0x68dd485dbc672a29L,0x72039752596f2cd3L,
+ 0x5d3eea67a0cf3d8dL },
+ { 0x810a1a81e6602671L,0x8f144a4014026c0cL,0xbc753a6d76b50f85L,
+ 0xc4dc21e8645cd4a4L } },
+ /* 29 << 105 */
+ { { 0xc5262dea521d0378L,0x802b8e0e05011c6fL,0x1ba19cbb0b4c19eaL,
+ 0x21db64b5ebf0aaecL },
+ { 0x1f394ee970342f9dL,0x93a10aee1bc44a14L,0xa7eed31b3efd0baaL,
+ 0x6e7c824e1d154e65L } },
+ /* 30 << 105 */
+ { { 0xee23fa819966e7eeL,0x64ec4aa805b7920dL,0x2d44462d2d90aad4L,
+ 0xf44dd195df277ad5L },
+ { 0x8d6471f1bb46b6a1L,0x1e65d313fd885090L,0x33a800f513a977b4L,
+ 0xaca9d7210797e1efL } },
+ /* 31 << 105 */
+ { { 0x9a5a85a0fcff6a17L,0x9970a3f31eca7ceeL,0xbb9f0d6bc9504be3L,
+ 0xe0c504beadd24ee2L },
+ { 0x7e09d95677fcc2f4L,0xef1a522765bb5fc4L,0x145d4fb18b9286aaL,
+ 0x66fd0c5d6649028bL } },
+ /* 32 << 105 */
+ { { 0x98857ceb1bf4581cL,0xe635e186aca7b166L,0x278ddd22659722acL,
+ 0xa0903c4c1db68007L },
+ { 0x366e458948f21402L,0x31b49c14b96abda2L,0x329c4b09e0403190L,
+ 0x97197ca3d29f43feL } },
+ /* 33 << 105 */
+ { { 0x8073dd1e274983d8L,0xda1a3bde55717c8fL,0xfd3d4da20361f9d1L,
+ 0x1332d0814c7de1ceL },
+ { 0x9b7ef7a3aa6d0e10L,0x17db2e73f54f1c4aL,0xaf3dffae4cd35567L,
+ 0xaaa2f406e56f4e71L } },
+ /* 34 << 105 */
+ { { 0x8966759e7ace3fc7L,0x9594eacf45a8d8c6L,0x8de3bd8b91834e0eL,
+ 0xafe4ca53548c0421L },
+ { 0xfdd7e856e6ee81c6L,0x8f671beb6b891a3aL,0xf7a58f2bfae63829L,
+ 0x9ab186fb9c11ac9fL } },
+ /* 35 << 105 */
+ { { 0x8d6eb36910b5be76L,0x046b7739fb040bcdL,0xccb4529fcb73de88L,
+ 0x1df0fefccf26be03L },
+ { 0xad7757a6bcfcd027L,0xa8786c75bb3165caL,0xe9db1e347e99a4d9L,
+ 0x99ee86dfb06c504bL } },
+ /* 36 << 105 */
+ { { 0x5b7c2dddc15c9f0aL,0xdf87a7344295989eL,0x59ece47c03d08fdaL,
+ 0xb074d3ddad5fc702L },
+ { 0x2040790351a03776L,0x2bb1f77b2a608007L,0x25c58f4fe1153185L,
+ 0xe6df62f6766e6447L } },
+ /* 37 << 105 */
+ { { 0xefb3d1beed51275aL,0x5de47dc72f0f483fL,0x7932d98e97c2bedfL,
+ 0xd5c119270219f8a1L },
+ { 0x9d751200a73a294eL,0x5f88434a9dc20172L,0xd28d9fd3a26f506aL,
+ 0xa890cd319d1dcd48L } },
+ /* 38 << 105 */
+ { { 0x0aebaec170f4d3b4L,0xfd1a13690ffc8d00L,0xb9d9c24057d57838L,
+ 0x45929d2668bac361L },
+ { 0x5a2cd06025b15ca6L,0x4b3c83e16e474446L,0x1aac7578ee1e5134L,
+ 0xa418f5d6c91e2f41L } },
+ /* 39 << 105 */
+ { { 0x6936fc8a213ed68bL,0x860ae7ed510a5224L,0x63660335def09b53L,
+ 0x641b2897cd79c98dL },
+ { 0x29bd38e101110f35L,0x79c26f42648b1937L,0x64dae5199d9164f4L,
+ 0xd85a23100265c273L } },
+ /* 40 << 105 */
+ { { 0x7173dd5d4b07e2b1L,0xd144c4cb8d9ea221L,0xe8b04ea41105ab14L,
+ 0x92dda542fe80d8f1L },
+ { 0xe9982fa8cf03dce6L,0x8b5ea9651a22cffcL,0xf7f4ea7f3fad88c4L,
+ 0x62db773e6a5ba95cL } },
+ /* 41 << 105 */
+ { { 0xd20f02fb93f24567L,0xfd46c69a315257caL,0x0ac74cc78bcab987L,
+ 0x46f31c015ceca2f5L },
+ { 0x40aedb59888b219eL,0xe50ecc37e1fccd02L,0x1bcd9dad911f816cL,
+ 0x583cc1ec8db9b00cL } },
+ /* 42 << 105 */
+ { { 0xf3cd2e66a483bf11L,0xfa08a6f5b1b2c169L,0xf375e2454be9fa28L,
+ 0x99a7ffec5b6d011fL },
+ { 0x6a3ebddbc4ae62daL,0x6cea00ae374aef5dL,0xab5fb98d9d4d05bcL,
+ 0x7cba1423d560f252L } },
+ /* 43 << 105 */
+ { { 0x49b2cc21208490deL,0x1ca66ec3bcfb2879L,0x7f1166b71b6fb16fL,
+ 0xfff63e0865fe5db3L },
+ { 0xb8345abe8b2610beL,0xb732ed8039de3df4L,0x0e24ed50211c32b4L,
+ 0xd10d8a69848ff27dL } },
+ /* 44 << 105 */
+ { { 0xc1074398ed4de248L,0xd7cedace10488927L,0xa4aa6bf885673e13L,
+ 0xb46bae916daf30afL },
+ { 0x07088472fcef7ad8L,0x61151608d4b35e97L,0xbcfe8f26dde29986L,
+ 0xeb84c4c7d5a34c79L } },
+ /* 45 << 105 */
+ { { 0xc1eec55c164e1214L,0x891be86da147bb03L,0x9fab4d100ba96835L,
+ 0xbf01e9b8a5c1ae9fL },
+ { 0x6b4de139b186ebc0L,0xd5c74c2685b91bcaL,0x5086a99cc2d93854L,
+ 0xeed62a7ba7a9dfbcL } },
+ /* 46 << 105 */
+ { { 0x8778ed6f76b7618aL,0xbff750a503b66062L,0x4cb7be22b65186dbL,
+ 0x369dfbf0cc3a6d13L },
+ { 0xc7dab26c7191a321L,0x9edac3f940ed718eL,0xbc142b36d0cfd183L,
+ 0xc8af82f67c991693L } },
+ /* 47 << 105 */
+ { { 0xb3d1e4d897ce0b2aL,0xe6d7c87fc3a55cdfL,0x35846b9568b81afeL,
+ 0x018d12afd3c239d8L },
+ { 0x2b2c620801206e15L,0xe0e42453a3b882c6L,0x854470a3a50162d5L,
+ 0x081574787017a62aL } },
+ /* 48 << 105 */
+ { { 0x18bd3fb4820357c7L,0x992039ae6f1458adL,0x9a1df3c525b44aa1L,
+ 0x2d780357ed3d5281L },
+ { 0x58cf7e4dc77ad4d4L,0xd49a7998f9df4fc4L,0x4465a8b51d71205eL,
+ 0xa0ee0ea6649254aaL } },
+ /* 49 << 105 */
+ { { 0x4b5eeecfab7bd771L,0x6c87307335c262b9L,0xdc5bd6483c9d61e7L,
+ 0x233d6d54321460d2L },
+ { 0xd20c5626fc195bccL,0x2544595804d78b63L,0xe03fcb3d17ec8ef3L,
+ 0x54b690d146b8f781L } },
+ /* 50 << 105 */
+ { { 0x82fa2c8a21230646L,0xf51aabb9084f418cL,0xff4fbec11a30ba43L,
+ 0x6a5acf73743c9df7L },
+ { 0x1da2b357d635b4d5L,0xc3de68ddecd5c1daL,0xa689080bd61af0ddL,
+ 0xdea5938ad665bf99L } },
+ /* 51 << 105 */
+ { { 0x0231d71afe637294L,0x01968aa6a5a81cd8L,0x11252d50048e63b5L,
+ 0xc446bc526ca007e9L },
+ { 0xef8c50a696d6134bL,0x9361fbf59e09a05cL,0xf17f85a6dca3291aL,
+ 0xb178d548ff251a21L } },
+ /* 52 << 105 */
+ { { 0x87f6374ba4df3915L,0x566ce1bf2fd5d608L,0x425cba4d7de35102L,
+ 0x6b745f8f58c5d5e2L },
+ { 0x88402af663122edfL,0x3190f9ed3b989a89L,0x4ad3d387ebba3156L,
+ 0xef385ad9c7c469a5L } },
+ /* 53 << 105 */
+ { { 0xb08281de3f642c29L,0x20be0888910ffb88L,0xf353dd4ad5292546L,
+ 0x3f1627de8377a262L },
+ { 0xa5faa013eefcd638L,0x8f3bf62674cc77c3L,0x32618f65a348f55eL,
+ 0x5787c0dc9fefeb9eL } },
+ /* 54 << 105 */
+ { { 0xf1673aa2d9a23e44L,0x88dfa9934e10690dL,0x1ced1b362bf91108L,
+ 0x9193ceca3af48649L },
+ { 0xfb34327d2d738fc5L,0x6697b037975fee6cL,0x2f485da0c04079a5L,
+ 0x2cdf57352feaa1acL } },
+ /* 55 << 105 */
+ { { 0x76944420bd55659eL,0x7973e32b4376090cL,0x86bb4fe1163b591aL,
+ 0x10441aedc196f0caL },
+ { 0x3b431f4a045ad915L,0x6c11b437a4afacb1L,0x30b0c7db71fdbbd8L,
+ 0xb642931feda65acdL } },
+ /* 56 << 105 */
+ { { 0x4baae6e89c92b235L,0xa73bbd0e6b3993a1L,0xd06d60ec693dd031L,
+ 0x03cab91b7156881cL },
+ { 0xd615862f1db3574bL,0x485b018564bb061aL,0x27434988a0181e06L,
+ 0x2cd61ad4c1c0c757L } },
+ /* 57 << 105 */
+ { { 0x3effed5a2ff9f403L,0x8dc98d8b62239029L,0x2206021e1f17b70dL,
+ 0xafbec0cabf510015L },
+ { 0x9fed716480130dfaL,0x306dc2b58a02dcf5L,0x48f06620feb10fc0L,
+ 0x78d1e1d55a57cf51L } },
+ /* 58 << 105 */
+ { { 0xadef8c5a192ef710L,0x88afbd4b3b7431f9L,0x7e1f740764250c9eL,
+ 0x6e31318db58bec07L },
+ { 0xfd4fc4b824f89b4eL,0x65a5dd8848c36a2aL,0x4f1eccfff024baa7L,
+ 0x22a21cf2cba94650L } },
+ /* 59 << 105 */
+ { { 0x95d29dee42a554f7L,0x828983a5002ec4baL,0x8112a1f78badb73dL,
+ 0x79ea8897a27c1839L },
+ { 0x8969a5a7d065fd83L,0xf49af791b262a0bcL,0xfcdea8b6af2b5127L,
+ 0x10e913e1564c2dbcL } },
+ /* 60 << 105 */
+ { { 0x51239d14bc21ef51L,0xe51c3ceb4ce57292L,0x795ff06847bbcc3bL,
+ 0x86b46e1ebd7e11e6L },
+ { 0x0ea6ba2380041ef4L,0xd72fe5056262342eL,0x8abc6dfd31d294d4L,
+ 0xbbe017a21278c2c9L } },
+ /* 61 << 105 */
+ { { 0xb1fcfa09b389328aL,0x322fbc62d01771b5L,0x04c0d06360b045bfL,
+ 0xdb652edc10e52d01L },
+ { 0x50ef932c03ec6627L,0xde1b3b2dc1ee50e3L,0x5ab7bdc5dc37a90dL,
+ 0xfea6721331e33a96L } },
+ /* 62 << 105 */
+ { { 0x6482b5cb4f2999aaL,0x38476cc6b8cbf0ddL,0x93ebfacb173405bbL,
+ 0x15cdafe7e52369ecL },
+ { 0xd42d5ba4d935b7dbL,0x648b60041c99a4cdL,0x785101bda3b5545bL,
+ 0x4bf2c38a9dd67fafL } },
+ /* 63 << 105 */
+ { { 0xb1aadc634442449cL,0xe0e9921a33ad4fb8L,0x5c552313aa686d82L,
+ 0xdee635fa465d866cL },
+ { 0xbc3c224a18ee6e8aL,0xeed748a6ed42e02fL,0xe70f930ad474cd08L,
+ 0x774ea6ecfff24adfL } },
+ /* 64 << 105 */
+ { { 0x03e2de1cf3480d4aL,0xf0d8edc7bc8acf1aL,0xf23e330368295a9cL,
+ 0xfadd5f68c546a97dL },
+ { 0x895597ad96f8acb1L,0xbddd49d5671bdae2L,0x16fcd52821dd43f4L,
+ 0xa5a454126619141aL } },
+ /* 0 << 112 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 112 */
+ { { 0x8ce9b6bfc360e25aL,0xe6425195075a1a78L,0x9dc756a8481732f4L,
+ 0x83c0440f5432b57aL },
+ { 0xc670b3f1d720281fL,0x2205910ed135e051L,0xded14b0edb052be7L,
+ 0x697b3d27c568ea39L } },
+ /* 2 << 112 */
+ { { 0x2e599b9afb3ff9edL,0x28c2e0ab17f6515cL,0x1cbee4fd474da449L,
+ 0x071279a44f364452L },
+ { 0x97abff6601fbe855L,0x3ee394e85fda51c4L,0x190385f667597c0bL,
+ 0x6e9fccc6a27ee34bL } },
+ /* 3 << 112 */
+ { { 0x0b89de9314092ebbL,0xf17256bd428e240cL,0xcf89a7f393d2f064L,
+ 0x4f57841ee1ed3b14L },
+ { 0x4ee14405e708d855L,0x856aae7203f1c3d0L,0xc8e5424fbdd7eed5L,
+ 0x3333e4ef73ab4270L } },
+ /* 4 << 112 */
+ { { 0x3bc77adedda492f8L,0xc11a3aea78297205L,0x5e89a3e734931b4cL,
+ 0x17512e2e9f5694bbL },
+ { 0x5dc349f3177bf8b6L,0x232ea4ba08c7ff3eL,0x9c4f9d16f511145dL,
+ 0xccf109a333b379c3L } },
+ /* 5 << 112 */
+ { { 0xe75e7a88a1f25897L,0x7ac6961fa1b5d4d8L,0xe3e1077308f3ed5cL,
+ 0x208a54ec0a892dfbL },
+ { 0xbe826e1978660710L,0x0cf70a97237df2c8L,0x418a7340ed704da5L,
+ 0xa3eeb9a908ca33fdL } },
+ /* 6 << 112 */
+ { { 0x49d96233169bca96L,0x04d286d42da6aafbL,0xc09606eca0c2fa94L,
+ 0x8869d0d523ff0fb3L },
+ { 0xa99937e5d0150d65L,0xa92e2503240c14c9L,0x656bf945108e2d49L,
+ 0x152a733aa2f59e2bL } },
+ /* 7 << 112 */
+ { { 0xb4323d588434a920L,0xc0af8e93622103c5L,0x667518ef938dbf9aL,
+ 0xa184307383a9cdf2L },
+ { 0x350a94aa5447ab80L,0xe5e5a325c75a3d61L,0x74ba507f68411a9eL,
+ 0x10581fc1594f70c5L } },
+ /* 8 << 112 */
+ { { 0x60e2857080eb24a9L,0x7bedfb4d488e0cfdL,0x721ebbd7c259cdb8L,
+ 0x0b0da855bc6390a9L },
+ { 0x2b4d04dbde314c70L,0xcdbf1fbc6c32e846L,0x33833eabb162fc9eL,
+ 0x9939b48bb0dd3ab7L } },
+ /* 9 << 112 */
+ { { 0x5aaa98a7cb0c9c8cL,0x75105f3081c4375cL,0xceee50575ef1c90fL,
+ 0xb31e065fc23a17bfL },
+ { 0x5364d275d4b6d45aL,0xd363f3ad62ec8996L,0xb5d212394391c65bL,
+ 0x84564765ebb41b47L } },
+ /* 10 << 112 */
+ { { 0x20d18ecc37107c78L,0xacff3b6b570c2a66L,0x22f975d99bd0d845L,
+ 0xef0a0c46ba178fa0L },
+ { 0x1a41965176b6028eL,0xc49ec674248612d4L,0x5b6ac4f27338af55L,
+ 0x06145e627bee5a36L } },
+ /* 11 << 112 */
+ { { 0x33e95d07e75746b5L,0x1c1e1f6dc40c78beL,0x967833ef222ff8e2L,
+ 0x4bedcf6ab49180adL },
+ { 0x6b37e9c13d7a4c8aL,0x2748887c6ddfe760L,0xf7055123aa3a5bbcL,
+ 0x954ff2257bbb8e74L } },
+ /* 12 << 112 */
+ { { 0xc42b8ab197c3dfb9L,0x55a549b0cf168154L,0xad6748e7c1b50692L,
+ 0x2775780f6fc5cbcbL },
+ { 0x4eab80b8e1c9d7c8L,0x8c69dae13fdbcd56L,0x47e6b4fb9969eaceL,
+ 0x002f1085a705cb5aL } },
+ /* 13 << 112 */
+ { { 0x4e23ca446d3fea55L,0xb4ae9c86f4810568L,0x47bfb91b2a62f27dL,
+ 0x60deb4c9d9bac28cL },
+ { 0xa892d8947de6c34cL,0x4ee682594494587dL,0x914ee14e1a3f8a5bL,
+ 0xbb113eaa28700385L } },
+ /* 14 << 112 */
+ { { 0x81ca03b92115b4c9L,0x7c163d388908cad1L,0xc912a118aa18179aL,
+ 0xe09ed750886e3081L },
+ { 0xa676e3fa26f516caL,0x753cacf78e732f91L,0x51592aea833da8b4L,
+ 0xc626f42f4cbea8aaL } },
+ /* 15 << 112 */
+ { { 0xef9dc899a7b56eafL,0x00c0e52c34ef7316L,0x5b1e4e24fe818a86L,
+ 0x9d31e20dc538be47L },
+ { 0x22eb932d3ed68974L,0xe44bbc087c4e87c4L,0x4121086e0dde9aefL,
+ 0x8e6b9cff134f4345L } },
+ /* 16 << 112 */
+ { { 0x96892c1f711b0eb9L,0xb905f2c8780ab954L,0xace26309a20792dbL,
+ 0xec8ac9b30684e126L },
+ { 0x486ad8b6b40a2447L,0x60121fc19fe3fb24L,0x5626fccf1a8e3b3fL,
+ 0x4e5686226ad1f394L } },
+ /* 17 << 112 */
+ { { 0xda7aae0d196aa5a1L,0xe0df8c771041b5fbL,0x451465d926b318b7L,
+ 0xc29b6e557ab136e9L },
+ { 0x2c2ab48b71148463L,0xb5738de364454a76L,0x54ccf9a05a03abe4L,
+ 0x377c02960427d58eL } },
+ /* 18 << 112 */
+ { { 0x73f5f0b92bb39c1fL,0x14373f2ce608d8c5L,0xdcbfd31400fbb805L,
+ 0xdf18fb2083afdcfbL },
+ { 0x81a57f4242b3523fL,0xe958532d87f650fbL,0xaa8dc8b68b0a7d7cL,
+ 0x1b75dfb7150166beL } },
+ /* 19 << 112 */
+ { { 0x90e4f7c92d7d1413L,0x67e2d6b59834f597L,0x4fd4f4f9a808c3e8L,
+ 0xaf8237e0d5281ec1L },
+ { 0x25ab5fdc84687ceeL,0xc5ded6b1a5b26c09L,0x8e4a5aecc8ea7650L,
+ 0x23b73e5c14cc417fL } },
+ /* 20 << 112 */
+ { { 0x2bfb43183037bf52L,0xb61e6db578c725d7L,0x8efd4060bbb3e5d7L,
+ 0x2e014701dbac488eL },
+ { 0xac75cf9a360aa449L,0xb70cfd0579634d08L,0xa591536dfffb15efL,
+ 0xb2c37582d07c106cL } },
+ /* 21 << 112 */
+ { { 0xb4293fdcf50225f9L,0xc52e175cb0e12b03L,0xf649c3bad0a8bf64L,
+ 0x745a8fefeb8ae3c6L },
+ { 0x30d7e5a358321bc3L,0xb1732be70bc4df48L,0x1f217993e9ea5058L,
+ 0xf7a71cde3e4fd745L } },
+ /* 22 << 112 */
+ { { 0x86cc533e894c5bbbL,0x6915c7d969d83082L,0xa6aa2d055815c244L,
+ 0xaeeee59249b22ce5L },
+ { 0x89e39d1378135486L,0x3a275c1f16b76f2fL,0xdb6bcc1be036e8f5L,
+ 0x4df69b215e4709f5L } },
+ /* 23 << 112 */
+ { { 0xa188b2502d0f39aaL,0x622118bb15a85947L,0x2ebf520ffde0f4faL,
+ 0xa40e9f294860e539L },
+ { 0x7b6a51eb22b57f0fL,0x849a33b97e80644aL,0x50e5d16f1cf095feL,
+ 0xd754b54eec55f002L } },
+ /* 24 << 112 */
+ { { 0x5cfbbb22236f4a98L,0x0b0c59e9066800bbL,0x4ac69a8f5a9a7774L,
+ 0x2b33f804d6bec948L },
+ { 0xb372929532e6c466L,0x68956d0f4e599c73L,0xa47a249f155c31ccL,
+ 0x24d80f0de1ce284eL } },
+ /* 25 << 112 */
+ { { 0xcd821dfb988baf01L,0xe6331a7ddbb16647L,0x1eb8ad33094cb960L,
+ 0x593cca38c91bbca5L },
+ { 0x384aac8d26567456L,0x40fa0309c04b6490L,0x97834cd6dab6c8f6L,
+ 0x68a7318d3f91e55fL } },
+ /* 26 << 112 */
+ { { 0xa00fd04efc4d3157L,0xb56f8ab22bf3bdeaL,0x014f56484fa57172L,
+ 0x948c5860450abdb3L },
+ { 0x342b5df00ebd4f08L,0x3e5168cd0e82938eL,0x7aedc1ceb0df5dd0L,
+ 0x6bbbc6d9e5732516L } },
+ /* 27 << 112 */
+ { { 0xc7bfd486605daaa6L,0x46fd72b7bb9a6c9eL,0xe4847fb1a124fb89L,
+ 0x75959cbda2d8ffbcL },
+ { 0x42579f65c8a588eeL,0x368c92e6b80b499dL,0xea4ef6cd999a5df1L,
+ 0xaa73bb7f936fe604L } },
+ /* 28 << 112 */
+ { { 0xf347a70d6457d188L,0x86eda86b8b7a388bL,0xb7cdff060ccd6013L,
+ 0xbeb1b6c7d0053fb2L },
+ { 0x0b02238799240a9fL,0x1bbb384f776189b2L,0x8695e71e9066193aL,
+ 0x2eb5009706ffac7eL } },
+ /* 29 << 112 */
+ { { 0x0654a9c04a7d2caaL,0x6f3fb3d1a5aaa290L,0x835db041ff476e8fL,
+ 0x540b8b0bc42295e4L },
+ { 0xa5c73ac905e214f5L,0x9a74075a56a0b638L,0x2e4b1090ce9e680bL,
+ 0x57a5b4796b8d9afaL } },
+ /* 30 << 112 */
+ { { 0x0dca48e726bfe65cL,0x097e391c7290c307L,0x683c462e6669e72eL,
+ 0xf505be1e062559acL },
+ { 0x5fbe3ea1e3a3035aL,0x6431ebf69cd50da8L,0xfd169d5c1f6407f2L,
+ 0x8d838a9560fce6b8L } },
+ /* 31 << 112 */
+ { { 0x2a2bfa7f650006f0L,0xdfd7dad350c0fbb2L,0x92452495ccf9ad96L,
+ 0x183bf494d95635f9L },
+ { 0x02d5df434a7bd989L,0x505385cca5431095L,0xdd98e67dfd43f53eL,
+ 0xd61e1a6c500c34a9L } },
+ /* 32 << 112 */
+ { { 0x5a4b46c64a8a3d62L,0x8469c4d0247743d2L,0x2bb3a13d88f7e433L,
+ 0x62b23a1001be5849L },
+ { 0xe83596b4a63d1a4cL,0x454e7fea7d183f3eL,0x643fce6117afb01cL,
+ 0x4e65e5e61c4c3638L } },
+ /* 33 << 112 */
+ { { 0x41d85ea1ef74c45bL,0x2cfbfa66ae328506L,0x98b078f53ada7da9L,
+ 0xd985fe37ec752fbbL },
+ { 0xeece68fe5a0148b4L,0x6f9a55c72d78136dL,0x232dccc4d2b729ceL,
+ 0xa27e0dfd90aafbc4L } },
+ /* 34 << 112 */
+ { { 0x9647445212b4603eL,0xa876c5516b706d14L,0xdf145fcf69a9d412L,
+ 0xe2ab75b72d479c34L },
+ { 0x12df9a761a23ff97L,0xc61389925d359d10L,0x6e51c7aefa835f22L,
+ 0x69a79cb1c0fcc4d9L } },
+ /* 35 << 112 */
+ { { 0xf57f350d594cc7e1L,0x3079ca633350ab79L,0x226fb6149aff594aL,
+ 0x35afec026d59a62bL },
+ { 0x9bee46f406ed2c6eL,0x58da17357d939a57L,0x44c504028fd1797eL,
+ 0xd8853e7c5ccea6caL } },
+ /* 36 << 112 */
+ { { 0x4065508da35fcd5fL,0x8965df8c495ccaebL,0x0f2da85012e1a962L,
+ 0xee471b94c1cf1cc4L },
+ { 0xcef19bc80a08fb75L,0x704958f581de3591L,0x2867f8b23aef4f88L,
+ 0x8d749384ea9f9a5fL } },
+ /* 37 << 112 */
+ { { 0x1b3855378c9049f4L,0x5be948f37b92d8b6L,0xd96f725db6e2bd6bL,
+ 0x37a222bc958c454dL },
+ { 0xe7c61abb8809bf61L,0x46f07fbc1346f18dL,0xfb567a7ae87c0d1cL,
+ 0x84a461c87ef3d07aL } },
+ /* 38 << 112 */
+ { { 0x0a5adce6d9278d98L,0x24d948139dfc73e1L,0x4f3528b6054321c3L,
+ 0x2e03fdde692ea706L },
+ { 0x10e6061947b533c0L,0x1a8bc73f2ca3c055L,0xae58d4b21bb62b8fL,
+ 0xb2045a73584a24e3L } },
+ /* 39 << 112 */
+ { { 0x3ab3d5afbd76e195L,0x478dd1ad6938a810L,0x6ffab3936ee3d5cbL,
+ 0xdfb693db22b361e4L },
+ { 0xf969449651dbf1a7L,0xcab4b4ef08a2e762L,0xe8c92f25d39bba9aL,
+ 0x850e61bcf1464d96L } },
+ /* 40 << 112 */
+ { { 0xb7e830e3dc09508bL,0xfaf6d2cf74317655L,0x72606cebdf690355L,
+ 0x48bb92b3d0c3ded6L },
+ { 0x65b754845c7cf892L,0xf6cd7ac9d5d5f01fL,0xc2c30a5996401d69L,
+ 0x91268650ed921878L } },
+ /* 41 << 112 */
+ { { 0x380bf913b78c558fL,0x43c0baebc8afdaa9L,0x377f61d554f169d3L,
+ 0xf8da07e3ae5ff20bL },
+ { 0xb676c49da8a90ea8L,0x81c1ff2b83a29b21L,0x383297ac2ad8d276L,
+ 0x3001122fba89f982L } },
+ /* 42 << 112 */
+ { { 0xe1d794be6718e448L,0x246c14827c3e6e13L,0x56646ef85d26b5efL,
+ 0x80f5091e88069cddL },
+ { 0xc5992e2f724bdd38L,0x02e915b48471e8c7L,0x96ff320a0d0ff2a9L,
+ 0xbf8864874384d1a0L } },
+ /* 43 << 112 */
+ { { 0xbbe1e6a6c93f72d6L,0xd5f75d12cad800eaL,0xfa40a09fe7acf117L,
+ 0x32c8cdd57581a355L },
+ { 0x742219927023c499L,0xa8afe5d738ec3901L,0x5691afcba90e83f0L,
+ 0x41bcaa030b8f8eacL } },
+ /* 44 << 112 */
+ { { 0xe38b5ff98d2668d5L,0x0715281a7ad81965L,0x1bc8fc7c03c6ce11L,
+ 0xcbbee6e28b650436L },
+ { 0x06b00fe80cdb9808L,0x17d6e066fe3ed315L,0x2e9d38c64d0b5018L,
+ 0xab8bfd56844dcaefL } },
+ /* 45 << 112 */
+ { { 0x42894a59513aed8bL,0xf77f3b6d314bd07aL,0xbbdecb8f8e42b582L,
+ 0xf10e2fa8d2390fe6L },
+ { 0xefb9502262a2f201L,0x4d59ea5050ee32b0L,0xd87f77286da789a8L,
+ 0xcf98a2cff79492c4L } },
+ /* 46 << 112 */
+ { { 0xf9577239720943c2L,0xba044cf53990b9d0L,0x5aa8e82395f2884aL,
+ 0x834de6ed0278a0afL },
+ { 0xc8e1ee9a5f25bd12L,0x9259ceaa6f7ab271L,0x7e6d97a277d00b76L,
+ 0x5c0c6eeaa437832aL } },
+ /* 47 << 112 */
+ { { 0x5232c20f5606b81dL,0xabd7b3750d991ee5L,0x4d2bfe358632d951L,
+ 0x78f8514698ed9364L },
+ { 0x951873f0f30c3282L,0x0da8ac80a789230bL,0x3ac7789c5398967fL,
+ 0xa69b8f7fbdda0fb5L } },
+ /* 48 << 112 */
+ { { 0xe5db77176add8545L,0x1b71cb6672c49b66L,0xd856073968421d77L,
+ 0x03840fe883e3afeaL },
+ { 0xb391dad51ec69977L,0xae243fb9307f6726L,0xc88ac87be8ca160cL,
+ 0x5174cced4ce355f4L } },
+ /* 49 << 112 */
+ { { 0x98a35966e58ba37dL,0xfdcc8da27817335dL,0x5b75283083fbc7bfL,
+ 0x68e419d4d9c96984L },
+ { 0x409a39f402a40380L,0x88940faf1fe977bcL,0xc640a94b8f8edea6L,
+ 0x1e22cd17ed11547dL } },
+ /* 50 << 112 */
+ { { 0xe28568ce59ffc3e2L,0x60aa1b55c1dee4e7L,0xc67497c8837cb363L,
+ 0x06fb438a105a2bf2L },
+ { 0x30357ec4500d8e20L,0x1ad9095d0670db10L,0x7f589a05c73b7cfdL,
+ 0xf544607d880d6d28L } },
+ /* 51 << 112 */
+ { { 0x17ba93b1a20ef103L,0xad8591306ba6577bL,0x65c91cf66fa214a0L,
+ 0xd7d49c6c27990da5L },
+ { 0xecd9ec8d20bb569dL,0xbd4b2502eeffbc33L,0x2056ca5a6bed0467L,
+ 0x7916a1f75b63728cL } },
+ /* 52 << 112 */
+ { { 0xd4f9497d53a4f566L,0x8973466497b56810L,0xf8e1da740494a621L,
+ 0x82546a938d011c68L },
+ { 0x1f3acb19c61ac162L,0x52f8fa9cabad0d3eL,0x15356523b4b7ea43L,
+ 0x5a16ad61ae608125L } },
+ /* 53 << 112 */
+ { { 0xb0bcb87f4faed184L,0x5f236b1d5029f45fL,0xd42c76070bc6b1fcL,
+ 0xc644324e68aefce3L },
+ { 0x8e191d595c5d8446L,0xc020807713ae1979L,0xadcaee553ba59cc7L,
+ 0x20ed6d6ba2cb81baL } },
+ /* 54 << 112 */
+ { { 0x0952ba19b6efcffcL,0x60f12d6897c0b87cL,0x4ee2c7c49caa30bcL,
+ 0x767238b797fbff4eL },
+ { 0xebc73921501b5d92L,0x3279e3dfc2a37737L,0x9fc12bc86d197543L,
+ 0xfa94dc6f0a40db4eL } },
+ /* 55 << 112 */
+ { { 0x7392b41a530ccbbdL,0x87c82146ea823525L,0xa52f984c05d98d0cL,
+ 0x2ae57d735ef6974cL },
+ { 0x9377f7bf3042a6ddL,0xb1a007c019647a64L,0xfaa9079a0cca9767L,
+ 0x3d81a25bf68f72d5L } },
+ /* 56 << 112 */
+ { { 0x752067f8ff81578eL,0x786221509045447dL,0xc0c22fcf0505aa6fL,
+ 0x1030f0a66bed1c77L },
+ { 0x31f29f151f0bd739L,0x2d7989c7e6debe85L,0x5c070e728e677e98L,
+ 0x0a817bd306e81fd5L } },
+ /* 57 << 112 */
+ { { 0xc110d830b0f2ac95L,0x48d0995aab20e64eL,0x0f3e00e17729cd9aL,
+ 0x2a570c20dd556946L },
+ { 0x912dbcfd4e86214dL,0x2d014ee2cf615498L,0x55e2b1e63530d76eL,
+ 0xc5135ae4fd0fd6d1L } },
+ /* 58 << 112 */
+ { { 0x0066273ad4f3049fL,0xbb8e9893e7087477L,0x2dba1ddb14c6e5fdL,
+ 0xdba3788651f57e6cL },
+ { 0x5aaee0a65a72f2cfL,0x1208bfbf7bea5642L,0xf5c6aa3b67872c37L,
+ 0xd726e08343f93224L } },
+ /* 59 << 112 */
+ { { 0x1854daa5061f1658L,0xc0016df1df0cd2b3L,0xc2a3f23e833d50deL,
+ 0x73b681d2bbbd3017L },
+ { 0x2f046dc43ac343c0L,0x9c847e7d85716421L,0xe1e13c910917eed4L,
+ 0x3fc9eebd63a1b9c6L } },
+ /* 60 << 112 */
+ { { 0x0f816a727fe02299L,0x6335ccc2294f3319L,0x3820179f4745c5beL,
+ 0xe647b782922f066eL },
+ { 0xc22e49de02cafb8aL,0x299bc2fffcc2ecccL,0x9a8feea26e0e8282L,
+ 0xa627278bfe893205L } },
+ /* 61 << 112 */
+ { { 0xa7e197337933e47bL,0xf4ff6b132e766402L,0xa4d8be0a98440d9fL,
+ 0x658f5c2f38938808L },
+ { 0x90b75677c95b3b3eL,0xfa0442693137b6ffL,0x077b039b43c47c29L,
+ 0xcca95dd38a6445b2L } },
+ /* 62 << 112 */
+ { { 0x0b498ba42333fc4cL,0x274f8e68f736a1b1L,0x6ca348fd5f1d4b2eL,
+ 0x24d3be78a8f10199L },
+ { 0x8535f858ca14f530L,0xa6e7f1635b982e51L,0x847c851236e1bf62L,
+ 0xf6a7c58e03448418L } },
+ /* 63 << 112 */
+ { { 0x583f3703f9374ab6L,0x864f91956e564145L,0x33bc3f4822526d50L,
+ 0x9f323c801262a496L },
+ { 0xaa97a7ae3f046a9aL,0x70da183edf8a039aL,0x5b68f71c52aa0ba6L,
+ 0x9be0fe5121459c2dL } },
+ /* 64 << 112 */
+ { { 0xc1e17eb6cbc613e5L,0x33131d55497ea61cL,0x2f69d39eaf7eded5L,
+ 0x73c2f434de6af11bL },
+ { 0x4ca52493a4a375faL,0x5f06787cb833c5c2L,0x814e091f3e6e71cfL,
+ 0x76451f578b746666L } },
+ /* 0 << 119 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 119 */
+ { { 0x80f9bdef694db7e0L,0xedca8787b9fcddc6L,0x51981c3403b8dce1L,
+ 0x4274dcf170e10ba1L },
+ { 0xf72743b86def6d1aL,0xd25b1670ebdb1866L,0xc4491e8c050c6f58L,
+ 0x2be2b2ab87fbd7f5L } },
+ /* 2 << 119 */
+ { { 0x3e0e5c9dd111f8ecL,0xbcc33f8db7c4e760L,0x702f9a91bd392a51L,
+ 0x7da4a795c132e92dL },
+ { 0x1a0b0ae30bb1151bL,0x54febac802e32251L,0xea3a5082694e9e78L,
+ 0xe58ffec1e4fe40b8L } },
+ /* 3 << 119 */
+ { { 0xf85592fcd1e0cf9eL,0xdea75f0dc0e7b2e8L,0xc04215cfc135584eL,
+ 0x174fc7272f57092aL },
+ { 0xe7277877eb930beaL,0x504caccb5eb02a5aL,0xf9fe08f7f5241b9bL,
+ 0xe7fb62f48d5ca954L } },
+ /* 4 << 119 */
+ { { 0xfbb8349d29c4120bL,0x9f94391fc0d0d915L,0xc4074fa75410ba51L,
+ 0xa66adbf6150a5911L },
+ { 0xc164543c34bfca38L,0xe0f27560b9e1ccfcL,0x99da0f53e820219cL,
+ 0xe8234498c6b4997aL } },
+ /* 5 << 119 */
+ { { 0xcfb88b769d4c5423L,0x9e56eb10b0521c49L,0x418e0b5ebe8700a1L,
+ 0x00cbaad6f93cb58aL },
+ { 0xe923fbded92a5e67L,0xca4979ac1f347f11L,0x89162d856bc0585bL,
+ 0xdd6254afac3c70e3L } },
+ /* 6 << 119 */
+ { { 0x7b23c513516e19e4L,0x56e2e847c5c4d593L,0x9f727d735ce71ef6L,
+ 0x5b6304a6f79a44c5L },
+ { 0x6638a7363ab7e433L,0x1adea470fe742f83L,0xe054b8545b7fc19fL,
+ 0xf935381aba1d0698L } },
+ /* 7 << 119 */
+ { { 0x546eab2d799e9a74L,0x96239e0ea949f729L,0xca274c6b7090055aL,
+ 0x835142c39020c9b0L },
+ { 0xa405667aa2e8807fL,0x29f2c0851aa3d39eL,0xcc555d6442fc72f5L,
+ 0xe856e0e7fbeacb3cL } },
+ /* 8 << 119 */
+ { { 0xb5504f9d918e4936L,0x65035ef6b2513982L,0x0553a0c26f4d9cb9L,
+ 0x6cb10d56bea85509L },
+ { 0x48d957b7a242da11L,0x16a4d3dd672b7268L,0x3d7e637c8502a96bL,
+ 0x27c7032b730d463bL } },
+ /* 9 << 119 */
+ { { 0xbdc02b18e4136a14L,0xbacf969d678e32bfL,0xc98d89a3dd9c3c03L,
+ 0x7b92420a23becc4fL },
+ { 0xd4b41f78c64d565cL,0x9f969d0010f28295L,0xec7f7f76b13d051aL,
+ 0x08945e1ea92da585L } },
+ /* 10 << 119 */
+ { { 0x55366b7d5846426fL,0xe7d09e89247d441dL,0x510b404d736fbf48L,
+ 0x7fa003d0e784bd7dL },
+ { 0x25f7614f17fd9596L,0x49e0e0a135cb98dbL,0x2c65957b2e83a76aL,
+ 0x5d40da8dcddbe0f8L } },
+ /* 11 << 119 */
+ { { 0xf2b8c405050bad24L,0x8918426dc2aa4823L,0x2aeab3dda38365a7L,
+ 0x720317177c91b690L },
+ { 0x8b00d69960a94120L,0x478a255de99eaeecL,0xbf656a5f6f60aafdL,
+ 0xdfd7cb755dee77b3L } },
+ /* 12 << 119 */
+ { { 0x37f68bb4a595939dL,0x0355647928740217L,0x8e740e7c84ad7612L,
+ 0xd89bc8439044695fL },
+ { 0xf7f3da5d85a9184dL,0x562563bb9fc0b074L,0x06d2e6aaf88a888eL,
+ 0x612d8643161fbe7cL } },
+ /* 13 << 119 */
+ { { 0x465edba7f64085e7L,0xb230f30429aa8511L,0x53388426cda2d188L,
+ 0x908857354b666649L },
+ { 0x6f02ff9a652f54f6L,0x65c822945fae2bf0L,0x7816ade062f5eee3L,
+ 0xdcdbdf43fcc56d70L } },
+ /* 14 << 119 */
+ { { 0x9fb3bba354530bb2L,0xbde3ef77cb0869eaL,0x89bc90460b431163L,
+ 0x4d03d7d2e4819a35L },
+ { 0x33ae4f9e43b6a782L,0x216db3079c88a686L,0x91dd88e000ffedd9L,
+ 0xb280da9f12bd4840L } },
+ /* 15 << 119 */
+ { { 0x32a7cb8a1635e741L,0xfe14008a78be02a7L,0x3fafb3341b7ae030L,
+ 0x7fd508e75add0ce9L },
+ { 0x72c83219d607ad51L,0x0f229c0a8d40964aL,0x1be2c3361c878da2L,
+ 0xe0c96742eab2ab86L } },
+ /* 16 << 119 */
+ { { 0x458f86913e538cd7L,0xa7001f6c8e08ad53L,0x52b8c6e6bf5d15ffL,
+ 0x548234a4011215ddL },
+ { 0xff5a9d2d3d5b4045L,0xb0ffeeb64a904190L,0x55a3aca448607f8bL,
+ 0x8cbd665c30a0672aL } },
+ /* 17 << 119 */
+ { { 0x87f834e042583068L,0x02da2aebf3f6e683L,0x6b763e5d05c12248L,
+ 0x7230378f65a8aefcL },
+ { 0x93bd80b571e8e5caL,0x53ab041cb3b62524L,0x1b8605136c9c552eL,
+ 0xe84d402cd5524e66L } },
+ /* 18 << 119 */
+ { { 0xa37f3573f37f5937L,0xeb0f6c7dd1e4fca5L,0x2965a554ac8ab0fcL,
+ 0x17fbf56c274676acL },
+ { 0x2e2f6bd9acf7d720L,0x41fc8f8810224766L,0x517a14b385d53befL,
+ 0xdae327a57d76a7d1L } },
+ /* 19 << 119 */
+ { { 0x6ad0a065c4818267L,0x33aa189b37c1bbc1L,0x64970b5227392a92L,
+ 0x21699a1c2d1535eaL },
+ { 0xcd20779cc2d7a7fdL,0xe318605999c83cf2L,0x9b69440b72c0b8c7L,
+ 0xa81497d77b9e0e4dL } },
+ /* 20 << 119 */
+ { { 0x515d5c891f5f82dcL,0x9a7f67d76361079eL,0xa8da81e311a35330L,
+ 0xe44990c44b18be1bL },
+ { 0xc7d5ed95af103e59L,0xece8aba78dac9261L,0xbe82b0999394b8d3L,
+ 0x6830f09a16adfe83L } },
+ /* 21 << 119 */
+ { { 0x250a29b488172d01L,0x8b20bd65caff9e02L,0xb8a7661ee8a6329aL,
+ 0x4520304dd3fce920L },
+ { 0xae45da1f2b47f7efL,0xe07f52885bffc540L,0xf79970093464f874L,
+ 0x2244c2cda6fa1f38L } },
+ /* 22 << 119 */
+ { { 0x43c41ac194d7d9b1L,0x5bafdd82c82e7f17L,0xdf0614c15fda0fcaL,
+ 0x74b043a7a8ae37adL },
+ { 0x3ba6afa19e71734cL,0x15d5437e9c450f2eL,0x4a5883fe67e242b1L,
+ 0x5143bdc22c1953c2L } },
+ /* 23 << 119 */
+ { { 0x542b8b53fc5e8920L,0x363bf9a89a9cee08L,0x02375f10c3486e08L,
+ 0x2037543b8c5e70d2L },
+ { 0x7109bccc625640b4L,0xcbc1051e8bc62c3bL,0xf8455fed803f26eaL,
+ 0x6badceabeb372424L } },
+ /* 24 << 119 */
+ { { 0xa2a9ce7c6b53f5f9L,0x642465951b176d99L,0xb1298d36b95c081bL,
+ 0x53505bb81d9a9ee6L },
+ { 0x3f6f9e61f2ba70b0L,0xd07e16c98afad453L,0x9f1694bbe7eb4a6aL,
+ 0xdfebced93cb0bc8eL } },
+ /* 25 << 119 */
+ { { 0x92d3dcdc53868c8bL,0x174311a2386107a6L,0x4109e07c689b4e64L,
+ 0x30e4587f2df3dcb6L },
+ { 0x841aea310811b3b2L,0x6144d41d0cce43eaL,0x464c45812a9a7803L,
+ 0xd03d371f3e158930L } },
+ /* 26 << 119 */
+ { { 0xc676d7f2b1f3390bL,0x9f7a1b8ca5b61272L,0x4ebebfc9c2e127a9L,
+ 0x4602500c5dd997bfL },
+ { 0x7f09771c4711230fL,0x058eb37c020f09c1L,0xab693d4bfee5e38bL,
+ 0x9289eb1f4653cbc0L } },
+ /* 27 << 119 */
+ { { 0xbecf46abd51b9cf5L,0xd2aa9c029f0121afL,0x36aaf7d2e90dc274L,
+ 0x909e4ea048b95a3cL },
+ { 0xe6b704966f32dbdbL,0x672188a08b030b3eL,0xeeffe5b3cfb617e2L,
+ 0x87e947de7c82709eL } },
+ /* 28 << 119 */
+ { { 0xa44d2b391770f5a7L,0xe4d4d7910e44eb82L,0x42e69d1e3f69712aL,
+ 0xbf11c4d6ac6a820eL },
+ { 0xb5e7f3e542c4224cL,0xd6b4e81c449d941cL,0x5d72bd165450e878L,
+ 0x6a61e28aee25ac54L } },
+ /* 29 << 119 */
+ { { 0x33272094e6f1cd95L,0x7512f30d0d18673fL,0x32f7a4ca5afc1464L,
+ 0x2f0956566bbb977bL },
+ { 0x586f47caa8226200L,0x02c868ad1ac07369L,0x4ef2b845c613acbeL,
+ 0x43d7563e0386054cL } },
+ /* 30 << 119 */
+ { { 0x54da9dc7ab952578L,0xb5423df226e84d0bL,0xa8b64eeb9b872042L,
+ 0xac2057825990f6dfL },
+ { 0x4ff696eb21f4c77aL,0x1a79c3e4aab273afL,0x29bc922e9436b3f1L,
+ 0xff807ef8d6d9a27aL } },
+ /* 31 << 119 */
+ { { 0x82acea3d778f22a0L,0xfb10b2e85b5e7469L,0xc0b169802818ee7dL,
+ 0x011afff4c91c1a2fL },
+ { 0x95a6d126ad124418L,0x31c081a5e72e295fL,0x36bb283af2f4db75L,
+ 0xd115540f7acef462L } },
+ /* 32 << 119 */
+ { { 0xc7f3a8f833f6746cL,0x21e46f65fea990caL,0x915fd5c5caddb0a9L,
+ 0xbd41f01678614555L },
+ { 0x346f4434426ffb58L,0x8055943614dbc204L,0xf3dd20fe5a969b7fL,
+ 0x9d59e956e899a39aL } },
+ /* 33 << 119 */
+ { { 0xf1b0971c8ad4cf4bL,0x034488602ffb8fb8L,0xf071ac3c65340ba4L,
+ 0x408d0596b27fd758L },
+ { 0xe7c78ea498c364b0L,0xa4aac4a5051e8ab5L,0xb9e1d560485d9002L,
+ 0x9acd518a88844455L } },
+ /* 34 << 119 */
+ { { 0xe4ca688fd06f56c0L,0xa48af70ddf027972L,0x691f0f045e9a609dL,
+ 0xa9dd82cdee61270eL },
+ { 0x8903ca63a0ef18d3L,0x9fb7ee353d6ca3bdL,0xa7b4a09cabf47d03L,
+ 0x4cdada011c67de8eL } },
+ /* 35 << 119 */
+ { { 0x520037499355a244L,0xe77fd2b64f2151a9L,0x695d6cf666b4efcbL,
+ 0xc5a0cacfda2cfe25L },
+ { 0x104efe5cef811865L,0xf52813e89ea5cc3dL,0x855683dc40b58dbcL,
+ 0x0338ecde175fcb11L } },
+ /* 36 << 119 */
+ { { 0xf9a0563774921592L,0xb4f1261db9bb9d31L,0x551429b74e9c5459L,
+ 0xbe182e6f6ea71f53L },
+ { 0xd3a3b07cdfc50573L,0x9ba1afda62be8d44L,0x9bcfd2cb52ab65d3L,
+ 0xdf11d547a9571802L } },
+ /* 37 << 119 */
+ { { 0x099403ee02a2404aL,0x497406f421088a71L,0x994794095004ae71L,
+ 0xbdb42078a812c362L },
+ { 0x2b72a30fd8828442L,0x283add27fcb5ed1cL,0xf7c0e20066a40015L,
+ 0x3e3be64108b295efL } },
+ /* 38 << 119 */
+ { { 0xac127dc1e038a675L,0x729deff38c5c6320L,0xb7df8fd4a90d2c53L,
+ 0x9b74b0ec681e7cd3L },
+ { 0x5cb5a623dab407e5L,0xcdbd361576b340c6L,0xa184415a7d28392cL,
+ 0xc184c1d8e96f7830L } },
+ /* 39 << 119 */
+ { { 0xc3204f1981d3a80fL,0xfde0c841c8e02432L,0x78203b3e8149e0c1L,
+ 0x5904bdbb08053a73L },
+ { 0x30fc1dd1101b6805L,0x43c223bc49aa6d49L,0x9ed671417a174087L,
+ 0x311469a0d5997008L } },
+ /* 40 << 119 */
+ { { 0xb189b6845e43fc61L,0xf3282375e0d3ab57L,0x4fa34b67b1181da8L,
+ 0x621ed0b299ee52b8L },
+ { 0x9b178de1ad990676L,0xd51de67b56d54065L,0x2a2c27c47538c201L,
+ 0x33856ec838a40f5cL } },
+ /* 41 << 119 */
+ { { 0x2522fc15be6cdcdeL,0x1e603f339f0c6f89L,0x7994edc3103e30a6L,
+ 0x033a00db220c853eL },
+ { 0xd3cfa409f7bb7fd7L,0x70f8781e462d18f6L,0xbbd82980687fe295L,
+ 0x6eef4c32595669f3L } },
+ /* 42 << 119 */
+ { { 0x86a9303b2f7e85c3L,0x5fce462171988f9bL,0x5b935bf6c138acb5L,
+ 0x30ea7d6725661212L },
+ { 0xef1eb5f4e51ab9a2L,0x0587c98aae067c78L,0xb3ce1b3c77ca9ca6L,
+ 0x2a553d4d54b5f057L } },
+ /* 43 << 119 */
+ { { 0xc78982364da29ec2L,0xdbdd5d13b9c57316L,0xc57d6e6b2cd80d47L,
+ 0x80b460cffe9e7391L },
+ { 0x98648cabf963c31eL,0x67f9f633cc4d32fdL,0x0af42a9dfdf7c687L,
+ 0x55f292a30b015ea7L } },
+ /* 44 << 119 */
+ { { 0x89e468b2cd21ab3dL,0xe504f022c393d392L,0xab21e1d4a5013af9L,
+ 0xe3283f78c2c28acbL },
+ { 0xf38b35f6226bf99fL,0xe83542740e291e69L,0x61673a15b20c162dL,
+ 0xc101dc75b04fbdbeL } },
+ /* 45 << 119 */
+ { { 0x8323b4c2255bd617L,0x6c9696936c2a9154L,0xc6e6586062679387L,
+ 0x8e01db0cb8c88e23L },
+ { 0x33c42873893a5559L,0x7630f04b47a3e149L,0xb5d80805ddcf35f8L,
+ 0x582ca08077dfe732L } },
+ /* 46 << 119 */
+ { { 0x2c7156e10b1894a0L,0x92034001d81c68c0L,0xed225d00c8b115b5L,
+ 0x237f9c2283b907f2L },
+ { 0x0ea2f32f4470e2c0L,0xb725f7c158be4e95L,0x0f1dcafab1ae5463L,
+ 0x59ed51871ba2fc04L } },
+ /* 47 << 119 */
+ { { 0xf6e0f316d0115d4dL,0x5180b12fd3691599L,0x157e32c9527f0a41L,
+ 0x7b0b081da8e0ecc0L },
+ { 0x6dbaaa8abf4f0dd0L,0x99b289c74d252696L,0x79b7755edbf864feL,
+ 0x6974e2b176cad3abL } },
+ /* 48 << 119 */
+ { { 0x35dbbee206ddd657L,0xe7cbdd112ff3a96dL,0x88381968076be758L,
+ 0x2d737e7208c91f5dL },
+ { 0x5f83ab6286ec3776L,0x98aa649d945fa7a1L,0xf477ec3772ef0933L,
+ 0x66f52b1e098c17b1L } },
+ /* 49 << 119 */
+ { { 0x9eec58fbd803738bL,0x91aaade7e4e86aa4L,0x6b1ae617a5b51492L,
+ 0x63272121bbc45974L },
+ { 0x7e0e28f0862c5129L,0x0a8f79a93321a4a0L,0xe26d16645041c88fL,
+ 0x0571b80553233e3aL } },
+ /* 50 << 119 */
+ { { 0xd1b0ccdec9520711L,0x55a9e4ed3c8b84bfL,0x9426bd39a1fef314L,
+ 0x4f5f638e6eb93f2bL },
+ { 0xba2a1ed32bf9341bL,0xd63c13214d42d5a9L,0xd2964a89316dc7c5L,
+ 0xd1759606ca511851L } },
+ /* 51 << 119 */
+ { { 0xd8a9201ff9e6ed35L,0xb7b5ee456736925aL,0x0a83fbbc99581af7L,
+ 0x3076bc4064eeb051L },
+ { 0x5511c98c02dec312L,0x270de898238dcb78L,0x2cf4cf9c539c08c9L,
+ 0xa70cb65e38d3b06eL } },
+ /* 52 << 119 */
+ { { 0xb12ec10ecfe57bbdL,0x82c7b65635a0c2b5L,0xddc7d5cd161c67bdL,
+ 0xe32e8985ae3a32ccL },
+ { 0x7aba9444d11a5529L,0xe964ed022427fa1aL,0x1528392d24a1770aL,
+ 0xa152ce2c12c72fcdL } },
+ /* 53 << 119 */
+ { { 0x714553a48ec07649L,0x18b4c290459dd453L,0xea32b7147b64b110L,
+ 0xb871bfa52e6f07a2L },
+ { 0xb67112e59e2e3c9bL,0xfbf250e544aa90f6L,0xf77aedb8bd539006L,
+ 0x3b0cdf9ad172a66fL } },
+ /* 54 << 119 */
+ { { 0xedf69feaf8c51187L,0x05bb67ec741e4da7L,0x47df0f3208114345L,
+ 0x56facb07bb9792b1L },
+ { 0xf3e007e98f6229e4L,0x62d103f4526fba0fL,0x4f33bef7b0339d79L,
+ 0x9841357bb59bfec1L } },
+ /* 55 << 119 */
+ { { 0xfa8dbb59c34e6705L,0xc3c7180b7fdaa84cL,0xf95872fca4108537L,
+ 0x8750cc3b932a3e5aL },
+ { 0xb61cc69db7275d7dL,0xffa0168b2e59b2e9L,0xca032abc6ecbb493L,
+ 0x1d86dbd32c9082d8L } },
+ /* 56 << 119 */
+ { { 0xae1e0b67e28ef5baL,0x2c9a4699cb18e169L,0x0ecd0e331e6bbd20L,
+ 0x571b360eaf5e81d2L },
+ { 0xcd9fea58101c1d45L,0x6651788e18880452L,0xa99726351f8dd446L,
+ 0x44bed022e37281d0L } },
+ /* 57 << 119 */
+ { { 0x094b2b2d33da525dL,0xf193678e13144fd8L,0xb8ab5ba4f4c1061dL,
+ 0x4343b5fadccbe0f4L },
+ { 0xa870237163812713L,0x47bf6d2df7611d93L,0x46729b8cbd21e1d7L,
+ 0x7484d4e0d629e77dL } },
+ /* 58 << 119 */
+ { { 0x830e6eea60dbac1fL,0x23d8c484da06a2f7L,0x896714b050ca535bL,
+ 0xdc8d3644ebd97a9bL },
+ { 0x106ef9fab12177b4L,0xf79bf464534d5d9cL,0x2537a349a6ab360bL,
+ 0xc7c54253a00c744fL } },
+ /* 59 << 119 */
+ { { 0xb3c7a047e5911a76L,0x61ffa5c8647f1ee7L,0x15aed36f8f56ab42L,
+ 0x6a0d41b0a3ff9ac9L },
+ { 0x68f469f5cc30d357L,0xbe9adf816b72be96L,0x1cd926fe903ad461L,
+ 0x7e89e38fcaca441bL } },
+ /* 60 << 119 */
+ { { 0xf0f82de5facf69d4L,0x363b7e764775344cL,0x6894f312b2e36d04L,
+ 0x3c6cb4fe11d1c9a5L },
+ { 0x85d9c3394008e1f2L,0x5e9a85ea249f326cL,0xdc35c60a678c5e06L,
+ 0xc08b944f9f86fba9L } },
+ /* 61 << 119 */
+ { { 0xde40c02c89f71f0fL,0xad8f3e31ff3da3c0L,0x3ea5096b42125dedL,
+ 0x13879cbfa7379183L },
+ { 0x6f4714a56b306a0bL,0x359c2ea667646c5eL,0xfacf894307726368L,
+ 0x07a5893565ff431eL } },
+ /* 62 << 119 */
+ { { 0x24d661d168754ab0L,0x801fce1d6f429a76L,0xc068a85fa58ce769L,
+ 0xedc35c545d5eca2bL },
+ { 0xea31276fa3f660d1L,0xa0184ebeb8fc7167L,0x0f20f21a1d8db0aeL,
+ 0xd96d095f56c35e12L } },
+ /* 63 << 119 */
+ { { 0xedf402b5f8c2a25bL,0x1bb772b9059204b6L,0x50cbeae219b4e34cL,
+ 0x93109d803fa0845aL },
+ { 0x54f7ccf78ef59fb5L,0x3b438fe288070963L,0x9e28c65931f3ba9bL,
+ 0x9cc31b46ead9da92L } },
+ /* 64 << 119 */
+ { { 0x3c2f0ba9b733aa5fL,0xdece47cbf05af235L,0xf8e3f715a2ac82a5L,
+ 0xc97ba6412203f18aL },
+ { 0xc3af550409c11060L,0x56ea2c0546af512dL,0xfac28daff3f28146L,
+ 0x87fab43a959ef494L } },
+ /* 0 << 126 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 126 */
+ { { 0x09891641d4c5105fL,0x1ae80f8e6d7fbd65L,0x9d67225fbee6bdb0L,
+ 0x3b433b597fc4d860L },
+ { 0x44e66db693e85638L,0xf7b59252e3e9862fL,0xdb785157665c32ecL,
+ 0x702fefd7ae362f50L } },
+ /* 2 << 126 */
+ { { 0x3754475d0fefb0c3L,0xd48fb56b46d7c35dL,0xa070b633363798a4L,
+ 0xae89f3d28fdb98e6L },
+ { 0x970b89c86363d14cL,0x8981752167abd27dL,0x9bf7d47444d5a021L,
+ 0xb3083bafcac72aeeL } },
+ /* 3 << 126 */
+ { { 0x389741debe949a44L,0x638e9388546a4fa5L,0x3fe6419ca0047bdcL,
+ 0x7047f648aaea57caL },
+ { 0x54e48a9041fbab17L,0xda8e0b28576bdba2L,0xe807eebcc72afddcL,
+ 0x07d3336df42577bfL } },
+ /* 4 << 126 */
+ { { 0x62a8c244bfe20925L,0x91c19ac38fdce867L,0x5a96a5d5dd387063L,
+ 0x61d587d421d324f6L },
+ { 0xe87673a2a37173eaL,0x2384800853778b65L,0x10f8441e05bab43eL,
+ 0xfa11fe124621efbeL } },
+ /* 5 << 126 */
+ { { 0x047b772e81685d7bL,0x23f27d81bf34a976L,0xc27608e2915f48efL,
+ 0x3b0b43faa521d5c3L },
+ { 0x7613fb2663ca7284L,0x7f5729b41d4db837L,0x87b14898583b526bL,
+ 0x00b732a6bbadd3d1L } },
+ /* 6 << 126 */
+ { { 0x8e02f4262048e396L,0x436b50b6383d9de4L,0xf78d3481471e85adL,
+ 0x8b01ea6ad005c8d6L },
+ { 0xd3c7afee97015c07L,0x46cdf1a94e3ba2aeL,0x7a42e50183d3a1d2L,
+ 0xd54b5268b541dff4L } },
+ /* 7 << 126 */
+ { { 0x3f24cf304e23e9bcL,0x4387f816126e3624L,0x26a46a033b0b6d61L,
+ 0xaf1bc8458b2d777cL },
+ { 0x25c401ba527de79cL,0x0e1346d44261bbb6L,0x4b96c44b287b4bc7L,
+ 0x658493c75254562fL } },
+ /* 8 << 126 */
+ { { 0x23f949feb8a24a20L,0x17ebfed1f52ca53fL,0x9b691bbebcfb4853L,
+ 0x5617ff6b6278a05dL },
+ { 0x241b34c5e3c99ebdL,0xfc64242e1784156aL,0x4206482f695d67dfL,
+ 0xb967ce0eee27c011L } },
+ /* 9 << 126 */
+ { { 0x65db375121c80b5dL,0x2e7a563ca31ecca0L,0xe56ffc4e5238a07eL,
+ 0x3d6c296632ced854L },
+ { 0xe99d7d1aaf70b885L,0xafc3bad92d686459L,0x9c78bf460cc8ba5bL,
+ 0x5a43951918955aa3L } },
+ /* 10 << 126 */
+ { { 0xf8b517a85fe4e314L,0xe60234d0fcb8906fL,0xffe542acf2061b23L,
+ 0x287e191f6b4cb59cL },
+ { 0x21857ddc09d877d8L,0x1c23478c14678941L,0xbbf0c056b6e05ea4L,
+ 0x82da4b53b01594feL } },
+ /* 11 << 126 */
+ { { 0xf7526791fadb8608L,0x049e832d7b74cdf6L,0xa43581ccc2b90a34L,
+ 0x73639eb89360b10cL },
+ { 0x4fba331fe1e4a71bL,0x6ffd6b938072f919L,0x6e53271c65679032L,
+ 0x67206444f14272ceL } },
+ /* 12 << 126 */
+ { { 0xc0f734a3b2335834L,0x9526205a90ef6860L,0xcb8be71704e2bb0dL,
+ 0x2418871e02f383faL },
+ { 0xd71776814082c157L,0xcc914ad029c20073L,0xf186c1ebe587e728L,
+ 0x6fdb3c2261bcd5fdL } },
+ /* 13 << 126 */
+ { { 0x30d014a6f2f9f8e9L,0x963ece234fec49d2L,0x862025c59605a8d9L,
+ 0x3987444519f8929aL },
+ { 0x01b6ff6512bf476aL,0x598a64d809cf7d91L,0xd7ec774993be56caL,
+ 0x10899785cbb33615L } },
+ /* 14 << 126 */
+ { { 0xb8a092fd02eee3adL,0xa86b3d3530145270L,0x323d98c68512b675L,
+ 0x4b8bc78562ebb40fL },
+ { 0x7d301f54413f9cdeL,0xa5e4fb4f2bab5664L,0x1d2b252d1cbfec23L,
+ 0xfcd576bbe177120dL } },
+ /* 15 << 126 */
+ { { 0x04427d3e83731a34L,0x2bb9028eed836e8eL,0xb36acff8b612ca7cL,
+ 0xb88fe5efd3d9c73aL },
+ { 0xbe2a6bc6edea4eb3L,0x43b93133488eec77L,0xf41ff566b17106e1L,
+ 0x469e9172654efa32L } },
+ /* 16 << 126 */
+ { { 0xb4480f0441c23fa3L,0xb4712eb0c1989a2eL,0x3ccbba0f93a29ca7L,
+ 0x6e205c14d619428cL },
+ { 0x90db7957b3641686L,0x0432691d45ac8b4eL,0x07a759acf64e0350L,
+ 0x0514d89c9c972517L } },
+ /* 17 << 126 */
+ { { 0x1701147fa8e67fc3L,0x9e2e0b8bab2085beL,0xd5651824ac284e57L,
+ 0x890d432574893664L },
+ { 0x8a7c5e6ec55e68a3L,0xbf12e90b4339c85aL,0x31846b85f922b655L,
+ 0x9a54ce4d0bf4d700L } },
+ /* 18 << 126 */
+ { { 0xd7f4e83af1a14295L,0x916f955cb285d4f9L,0xe57bb0e099ffdabaL,
+ 0x28a43034eab0d152L },
+ { 0x0a36ffa2b8a9cef8L,0x5517407eb9ec051aL,0x9c796096ea68e672L,
+ 0x853db5fbfb3c77fbL } },
+ /* 19 << 126 */
+ { { 0x21474ba9e864a51aL,0x6c2676996e8a1b8bL,0x7c82362694120a28L,
+ 0xe61e9a488383a5dbL },
+ { 0x7dd750039f84216dL,0xab020d07ad43cd85L,0x9437ae48da12c659L,
+ 0x6449c2ebe65452adL } },
+ /* 20 << 126 */
+ { { 0xcc7c4c1c2cf9d7c1L,0x1320886aee95e5abL,0xbb7b9056beae170cL,
+ 0xc8a5b250dbc0d662L },
+ { 0x4ed81432c11d2303L,0x7da669121f03769fL,0x3ac7a5fd84539828L,
+ 0x14dada943bccdd02L } },
+ /* 21 << 126 */
+ { { 0x8b84c3217ef6b0d1L,0x52a9477a7c933f22L,0x5ef6728afd440b82L,
+ 0x5c3bd8596ce4bd5eL },
+ { 0x918b80f5f22c2d3eL,0x368d5040b7bb6cc5L,0xb66142a12695a11cL,
+ 0x60ac583aeb19ea70L } },
+ /* 22 << 126 */
+ { { 0x317cbb980eab2437L,0x8cc08c555e2654c8L,0xfe2d6520e6d8307fL,
+ 0xe9f147f357428993L },
+ { 0x5f9c7d14d2fd6cf1L,0xa3ecd0642d4fcbb0L,0xad83fef08e7341f7L,
+ 0x643f23a03a63115cL } },
+ /* 23 << 126 */
+ { { 0xd38a78abe65ab743L,0xbf7c75b135edc89cL,0x3dd8752e530df568L,
+ 0xf85c4a76e308c682L },
+ { 0x4c9955b2e68acf37L,0xa544df3dab32af85L,0x4b8ec3f5a25cf493L,
+ 0x4d8f27641a622febL } },
+ /* 24 << 126 */
+ { { 0x7bb4f7aaf0dcbc49L,0x7de551f970bbb45bL,0xcfd0f3e49f2ca2e5L,
+ 0xece587091f5c76efL },
+ { 0x32920edd167d79aeL,0x039df8a2fa7d7ec1L,0xf46206c0bb30af91L,
+ 0x1ff5e2f522676b59L } },
+ /* 25 << 126 */
+ { { 0x11f4a0396ea51d66L,0x506c1445807d7a26L,0x60da5705755a9b24L,
+ 0x8fc8cc321f1a319eL },
+ { 0x83642d4d9433d67dL,0x7fa5cb8f6a7dd296L,0x576591db9b7bde07L,
+ 0x13173d25419716fbL } },
+ /* 26 << 126 */
+ { { 0xea30599dd5b340ffL,0xfc6b5297b0fe76c5L,0x1c6968c8ab8f5adcL,
+ 0xf723c7f5901c928dL },
+ { 0x4203c3219773d402L,0xdf7c6aa31b51dd47L,0x3d49e37a552be23cL,
+ 0x57febee80b5a6e87L } },
+ /* 27 << 126 */
+ { { 0xc5ecbee47bd8e739L,0x79d44994ae63bf75L,0x168bd00f38fb8923L,
+ 0x75d48ee4d0533130L },
+ { 0x554f77aadb5cdf33L,0x3396e8963c696769L,0x2fdddbf2d3fd674eL,
+ 0xbbb8f6ee99d0e3e5L } },
+ /* 28 << 126 */
+ { { 0x51b90651cbae2f70L,0xefc4bc0593aaa8ebL,0x8ecd8689dd1df499L,
+ 0x1aee99a822f367a5L },
+ { 0x95d485b9ae8274c5L,0x6c14d4457d30b39cL,0xbafea90bbcc1ef81L,
+ 0x7c5f317aa459a2edL } },
+ /* 29 << 126 */
+ { { 0x012110754ef44227L,0xa17bed6edc20f496L,0x0cdfe424819853cdL,
+ 0x13793298f71e2ce7L },
+ { 0x3c1f3078dbbe307bL,0x6dd1c20e76ee9936L,0x23ee4b57423caa20L,
+ 0x4ac3793b8efb840eL } },
+ /* 30 << 126 */
+ { { 0x934438ebed1f8ca0L,0x3e5466584ebb25a2L,0xc415af0ec069896fL,
+ 0xc13eddb09a5aa43dL },
+ { 0x7a04204fd49eb8f6L,0xd0d5bdfcd74f1670L,0x3697e28656fc0558L,
+ 0x1020737101cebadeL } },
+ /* 31 << 126 */
+ { { 0x5f87e6900647a82bL,0x908e0ed48f40054fL,0xa9f633d479853803L,
+ 0x8ed13c9a4a28b252L },
+ { 0x3e2ef6761f460f64L,0x53930b9b36d06336L,0x347073ac8fc4979bL,
+ 0x84380e0e5ecd5597L } },
+ /* 32 << 126 */
+ { { 0xe3b22c6bc4fe3c39L,0xba4a81536c7bebdfL,0xf23ab6b725693459L,
+ 0x53bc377014922b11L },
+ { 0x4645c8ab5afc60dbL,0xaa02235520b9f2a3L,0x52a2954cce0fc507L,
+ 0x8c2731bb7ce1c2e7L } },
+ /* 33 << 126 */
+ { { 0xf39608ab18a0339dL,0xac7a658d3735436cL,0xb22c2b07cd992b4fL,
+ 0x4e83daecf40dcfd4L },
+ { 0x8a34c7be2f39ea3eL,0xef0c005fb0a56d2eL,0x62731f6a6edd8038L,
+ 0x5721d7404e3cb075L } },
+ /* 34 << 126 */
+ { { 0x1ea41511fbeeee1bL,0xd1ef5e73ef1d0c05L,0x42feefd173c07d35L,
+ 0xe530a00a8a329493L },
+ { 0x5d55b7fef15ebfb0L,0x549de03cd322491aL,0xf7b5f602745b3237L,
+ 0x3632a3a21ab6e2b6L } },
+ /* 35 << 126 */
+ { { 0x0d3bba890ef59f78L,0x0dfc6443c9e52b9aL,0x1dc7969972631447L,
+ 0xef033917b3be20b1L },
+ { 0x0c92735db1383948L,0xc1fc29a2c0dd7d7dL,0x6485b697403ed068L,
+ 0x13bfaab3aac93bdcL } },
+ /* 36 << 126 */
+ { { 0x410dc6a90deeaf52L,0xb003fb024c641c15L,0x1384978c5bc504c4L,
+ 0x37640487864a6a77L },
+ { 0x05991bc6222a77daL,0x62260a575e47eb11L,0xc7af6613f21b432cL,
+ 0x22f3acc9ab4953e9L } },
+ /* 37 << 126 */
+ { { 0x529349228e41d155L,0x4d0245683ac059efL,0xb02017554d884411L,
+ 0xce8055cfa59a178fL },
+ { 0xcd77d1aff6204549L,0xa0a00a3ec7066759L,0x471071ef0272c229L,
+ 0x009bcf6bd3c4b6b0L } },
+ /* 38 << 126 */
+ { { 0x2a2638a822305177L,0xd51d59df41645bbfL,0xa81142fdc0a7a3c0L,
+ 0xa17eca6d4c7063eeL },
+ { 0x0bb887ed60d9dcecL,0xd6d28e5120ad2455L,0xebed6308a67102baL,
+ 0x042c31148bffa408L } },
+ /* 39 << 126 */
+ { { 0xfd099ac58aa68e30L,0x7a6a3d7c1483513eL,0xffcc6b75ba2d8f0cL,
+ 0x54dacf961e78b954L },
+ { 0xf645696fa4a9af89L,0x3a41194006ac98ecL,0x41b8b3f622a67a20L,
+ 0x2d0b1e0f99dec626L } },
+ /* 40 << 126 */
+ { { 0x27c8919240be34e8L,0xc7162b3791907f35L,0x90188ec1a956702bL,
+ 0xca132f7ddf93769cL },
+ { 0x3ece44f90e2025b4L,0x67aaec690c62f14cL,0xad74141822e3cc11L,
+ 0xcf9b75c37ff9a50eL } },
+ /* 41 << 126 */
+ { { 0x02fa2b164d348272L,0xbd99d61a9959d56dL,0xbc4f19db18762916L,
+ 0xcc7cce5049c1ac80L },
+ { 0x4d59ebaad846bd83L,0x8775a9dca9202849L,0x07ec4ae16e1f4ca9L,
+ 0x27eb5875ba893f11L } },
+ /* 42 << 126 */
+ { { 0x00284d51662cc565L,0x82353a6b0db4138dL,0xd9c7aaaaaa32a594L,
+ 0xf5528b5ea5669c47L },
+ { 0xf32202312f23c5ffL,0xe3e8147a6affa3a1L,0xfb423d5c202ddda0L,
+ 0x3d6414ac6b871bd4L } },
+ /* 43 << 126 */
+ { { 0x586f82e1a51a168aL,0xb712c67148ae5448L,0x9a2e4bd176233eb8L,
+ 0x0188223a78811ca9L },
+ { 0x553c5e21f7c18de1L,0x7682e451b27bb286L,0x3ed036b30e51e929L,
+ 0xf487211bec9cb34fL } },
+ /* 44 << 126 */
+ { { 0x0d0942770c24efc8L,0x0349fd04bef737a4L,0x6d1c9dd2514cdd28L,
+ 0x29c135ff30da9521L },
+ { 0xea6e4508f78b0b6fL,0x176f5dd2678c143cL,0x081484184be21e65L,
+ 0x27f7525ce7df38c4L } },
+ /* 45 << 126 */
+ { { 0x1fb70e09748ab1a4L,0x9cba50a05efe4433L,0x7846c7a615f75af2L,
+ 0x2a7c2c575ee73ea8L },
+ { 0x42e566a43f0a449aL,0x45474c3bad90fc3dL,0x7447be3d8b61d057L,
+ 0x3e9d1cf13a4ec092L } },
+ /* 46 << 126 */
+ { { 0x1603e453f380a6e6L,0x0b86e4319b1437c2L,0x7a4173f2ef29610aL,
+ 0x8fa729a7f03d57f7L },
+ { 0x3e186f6e6c9c217eL,0xbe1d307991919524L,0x92a62a70153d4fb1L,
+ 0x32ed3e34d68c2f71L } },
+ /* 47 << 126 */
+ { { 0xd785027f9eb1a8b7L,0xbc37eb77c5b22fe8L,0x466b34f0b9d6a191L,
+ 0x008a89af9a05f816L },
+ { 0x19b028fb7d42c10aL,0x7fe8c92f49b3f6b8L,0x58907cc0a5a0ade3L,
+ 0xb3154f51559d1a7cL } },
+ /* 48 << 126 */
+ { { 0x5066efb6d9790ed6L,0xa77a0cbca6aa793bL,0x1a915f3c223e042eL,
+ 0x1c5def0469c5874bL },
+ { 0x0e83007873b6c1daL,0x55cf85d2fcd8557aL,0x0f7c7c760460f3b1L,
+ 0x87052acb46e58063L } },
+ /* 49 << 126 */
+ { { 0x09212b80907eae66L,0x3cb068e04d721c89L,0xa87941aedd45ac1cL,
+ 0xde8d5c0d0daa0dbbL },
+ { 0xda421fdce3502e6eL,0xc89442014d89a084L,0x7307ba5ef0c24bfbL,
+ 0xda212beb20bde0efL } },
+ /* 50 << 126 */
+ { { 0xea2da24bf82ce682L,0x058d381607f71fe4L,0x35a024625ffad8deL,
+ 0xcd7b05dcaadcefabL },
+ { 0xd442f8ed1d9f54ecL,0x8be3d618b2d3b5caL,0xe2220ed0e06b2ce2L,
+ 0x82699a5f1b0da4c0L } },
+ /* 51 << 126 */
+ { { 0x3ff106f571c0c3a7L,0x8f580f5a0d34180cL,0x4ebb120e22d7d375L,
+ 0x5e5782cce9513675L },
+ { 0x2275580c99c82a70L,0xe8359fbf15ea8c4cL,0x53b48db87b415e70L,
+ 0xaacf2240100c6014L } },
+ /* 52 << 126 */
+ { { 0x9faaccf5e4652f1dL,0xbd6fdd2ad56157b2L,0xa4f4fb1f6261ec50L,
+ 0x244e55ad476bcd52L },
+ { 0x881c9305047d320bL,0x1ca983d56181263fL,0x354e9a44278fb8eeL,
+ 0xad2dbc0f396e4964L } },
+ /* 53 << 126 */
+ { { 0x723f3aa29268b3deL,0x0d1ca29ae6e0609aL,0x794866aa6cf44252L,
+ 0x0b59f3e301af87edL },
+ { 0xe234e5ff7f4a6c51L,0xa8768fd261dc2f7eL,0xdafc73320a94d81fL,
+ 0xd7f8428206938ce1L } },
+ /* 54 << 126 */
+ { { 0xae0b3c0e0546063eL,0x7fbadcb25d61abc6L,0xd5d7a2c9369ac400L,
+ 0xa5978d09ae67d10cL },
+ { 0x290f211e4f85eaacL,0xe61e2ad1facac681L,0xae125225388384cdL,
+ 0xa7fb68e9ccfde30fL } },
+ /* 55 << 126 */
+ { { 0x7a59b9363daed4c2L,0x80a9aa402606f789L,0xb40c1ea5f6a6d90aL,
+ 0x948364d3514d5885L },
+ { 0x062ebc6070985182L,0xa6db5b0e33310895L,0x64a12175e329c2f5L,
+ 0xc5f25bd290ea237eL } },
+ /* 56 << 126 */
+ { { 0x7915c5242d0a4c23L,0xeb5d26e46bb3cc52L,0x369a9116c09e2c92L,
+ 0x0c527f92cf182cf8L },
+ { 0x9e5919382aede0acL,0xb29222086cc34939L,0x3c9d896299a34361L,
+ 0x3c81836dc1905fe6L } },
+ /* 57 << 126 */
+ { { 0x4bfeb57fa001ec5aL,0xe993f5bba0dc5dbaL,0x47884109724a1380L,
+ 0x8a0369ab32fe9a04L },
+ { 0xea068d608c927db8L,0xbf5f37cf94655741L,0x47d402a204b6c7eaL,
+ 0x4551c2956af259cbL } },
+ /* 58 << 126 */
+ { { 0x698b71e7ed77ee8bL,0xbddf7bd0f309d5c7L,0x6201c22c34e780caL,
+ 0xab04f7d84c295ef4L },
+ { 0x1c9472944313a8ceL,0xe532e4ac92ca4cfeL,0x89738f80d0a7a97aL,
+ 0xec088c88a580fd5bL } },
+ /* 59 << 126 */
+ { { 0x612b1ecc42ce9e51L,0x8f9840fdb25fdd2aL,0x3cda78c001e7f839L,
+ 0x546b3d3aece05480L },
+ { 0x271719a980d30916L,0x45497107584c20c4L,0xaf8f94785bc78608L,
+ 0x28c7d484277e2a4cL } },
+ /* 60 << 126 */
+ { { 0xfce0176788a2ffe4L,0xdc506a3528e169a5L,0x0ea108617af9c93aL,
+ 0x1ed2436103fa0e08L },
+ { 0x96eaaa92a3d694e7L,0xc0f43b4def50bc74L,0xce6aa58c64114db4L,
+ 0x8218e8ea7c000fd4L } },
+ /* 61 << 126 */
+ { { 0xac815dfb185f8844L,0xcd7e90cb1557abfbL,0x23d16655afbfecdfL,
+ 0x80f3271f085cac4aL },
+ { 0x7fc39aa7d0e62f47L,0x88d519d1460a48e5L,0x59559ac4d28f101eL,
+ 0x7981d9e9ca9ae816L } },
+ /* 62 << 126 */
+ { { 0x5c38652c9ac38203L,0x86eaf87f57657fe5L,0x568fc472e21f5416L,
+ 0x2afff39ce7e597b5L },
+ { 0x3adbbb07256d4eabL,0x225986928285ab89L,0x35f8112a041caefeL,
+ 0x95df02e3a5064c8bL } },
+ /* 63 << 126 */
+ { { 0x4d63356ec7004bf3L,0x230a08f4db83c7deL,0xca27b2708709a7b7L,
+ 0x0d1c4cc4cb9abd2dL },
+ { 0x8a0bc66e7550fee8L,0x369cd4c79cf7247eL,0x75562e8492b5b7e7L,
+ 0x8fed0da05802af7bL } },
+ /* 64 << 126 */
+ { { 0x6a7091c2e48fb889L,0x26882c137b8a9d06L,0xa24986631b82a0e2L,
+ 0x844ed7363518152dL },
+ { 0x282f476fd86e27c7L,0xa04edaca04afefdcL,0x8b256ebc6119e34dL,
+ 0x56a413e90787d78bL } },
+ /* 0 << 133 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 133 */
+ { { 0x82ee061d5a74be50L,0xe41781c4dea16ff5L,0xe0b0c81e99bfc8a2L,
+ 0x624f4d690b547e2dL },
+ { 0x3a83545dbdcc9ae4L,0x2573dbb6409b1e8eL,0x482960c4a6c93539L,
+ 0xf01059ad5ae18798L } },
+ /* 2 << 133 */
+ { { 0x715c9f973112795fL,0xe8244437984e6ee1L,0x55cb4858ecb66bcdL,
+ 0x7c136735abaffbeeL },
+ { 0x546615955dbec38eL,0x51c0782c388ad153L,0x9ba4c53ac6e0952fL,
+ 0x27e6782a1b21dfa8L } },
+ /* 3 << 133 */
+ { { 0x682f903d4ed2dbc2L,0x0eba59c87c3b2d83L,0x8e9dc84d9c7e9335L,
+ 0x5f9b21b00eb226d7L },
+ { 0xe33bd394af267baeL,0xaa86cc25be2e15aeL,0x4f0bf67d6a8ec500L,
+ 0x5846aa44f9630658L } },
+ /* 4 << 133 */
+ { { 0xfeb09740e2c2bf15L,0x627a2205a9e99704L,0xec8d73d0c2fbc565L,
+ 0x223eed8fc20c8de8L },
+ { 0x1ee32583a8363b49L,0x1a0b6cb9c9c2b0a6L,0x49f7c3d290dbc85cL,
+ 0xa8dfbb971ef4c1acL } },
+ /* 5 << 133 */
+ { { 0xafb34d4c65c7c2abL,0x1d4610e7e2c5ea84L,0x893f6d1b973c4ab5L,
+ 0xa3cdd7e9945ba5c4L },
+ { 0x60514983064417eeL,0x1459b23cad6bdf2bL,0x23b2c3415cf726c3L,
+ 0x3a82963532d6354aL } },
+ /* 6 << 133 */
+ { { 0x294f901fab192c18L,0xec5fcbfe7030164fL,0xe2e2fcb7e2246ba6L,
+ 0x1e7c88b3221a1a0cL },
+ { 0x72c7dd93c92d88c5L,0x41c2148e1106fb59L,0x547dd4f5a0f60f14L,
+ 0xed9b52b263960f31L } },
+ /* 7 << 133 */
+ { { 0x6c8349ebb0a5b358L,0xb154c5c29e7e2ed6L,0xcad5eccfeda462dbL,
+ 0xf2d6dbe42de66b69L },
+ { 0x426aedf38665e5b2L,0x488a85137b7f5723L,0x15cc43b38bcbb386L,
+ 0x27ad0af3d791d879L } },
+ /* 8 << 133 */
+ { { 0xc16c236e846e364fL,0x7f33527cdea50ca0L,0xc48107750926b86dL,
+ 0x6c2a36090598e70cL },
+ { 0xa6755e52f024e924L,0xe0fa07a49db4afcaL,0x15c3ce7d66831790L,
+ 0x5b4ef350a6cbb0d6L } },
+ /* 9 << 133 */
+ { { 0x2c4aafc4b6205969L,0x42563f02f6c7854fL,0x016aced51d983b48L,
+ 0xfeb356d899949755L },
+ { 0x8c2a2c81d1a39bd7L,0x8f44340fe6934ae9L,0x148cf91c447904daL,
+ 0x7340185f0f51a926L } },
+ /* 10 << 133 */
+ { { 0x2f8f00fb7409ab46L,0x057e78e680e289b2L,0x03e5022ca888e5d1L,
+ 0x3c87111a9dede4e2L },
+ { 0x5b9b0e1c7809460bL,0xe751c85271c9abc7L,0x8b944e28c7cc1dc9L,
+ 0x4f201ffa1d3cfa08L } },
+ /* 11 << 133 */
+ { { 0x02fc905c3e6721ceL,0xd52d70dad0b3674cL,0x5dc2e5ca18810da4L,
+ 0xa984b2735c69dd99L },
+ { 0x63b9252784de5ca4L,0x2f1c9872c852dec4L,0x18b03593c2e3de09L,
+ 0x19d70b019813dc2fL } },
+ /* 12 << 133 */
+ { { 0x42806b2da6dc1d29L,0xd3030009f871e144L,0xa1feb333aaf49276L,
+ 0xb5583b9ec70bc04bL },
+ { 0x1db0be7895695f20L,0xfc84181189d012b5L,0x6409f27205f61643L,
+ 0x40d34174d5883128L } },
+ /* 13 << 133 */
+ { { 0xd79196f567419833L,0x6059e252863b7b08L,0x84da18171c56700cL,
+ 0x5758ee56b28d3ec4L },
+ { 0x7da2771d013b0ea6L,0xfddf524b54c5e9b9L,0x7df4faf824305d80L,
+ 0x58f5c1bf3a97763fL } },
+ /* 14 << 133 */
+ { { 0xa5af37f17c696042L,0xd4cba22c4a2538deL,0x211cb9959ea42600L,
+ 0xcd105f417b069889L },
+ { 0xb1e1cf19ddb81e74L,0x472f2d895157b8caL,0x086fb008ee9db885L,
+ 0x365cd5700f26d131L } },
+ /* 15 << 133 */
+ { { 0x284b02bba2be7053L,0xdcbbf7c67ab9a6d6L,0x4425559c20f7a530L,
+ 0x961f2dfa188767c8L },
+ { 0xe2fd943570dc80c4L,0x104d6b63f0784120L,0x7f592bc153567122L,
+ 0xf6bc1246f688ad77L } },
+ /* 16 << 133 */
+ { { 0x05214c050f15dde9L,0xa47a76a80d5f2b82L,0xbb254d3062e82b62L,
+ 0x11a05fe03ec955eeL },
+ { 0x7eaff46e9d529b36L,0x55ab13018f9e3df6L,0xc463e37199317698L,
+ 0xfd251438ccda47adL } },
+ /* 17 << 133 */
+ { { 0xca9c354723d695eaL,0x48ce626e16e589b5L,0x6b5b64c7b187d086L,
+ 0xd02e1794b2207948L },
+ { 0x8b58e98f7198111dL,0x90ca6305dcf9c3ccL,0x5691fe72f34089b0L,
+ 0x60941af1fc7c80ffL } },
+ /* 18 << 133 */
+ { { 0xa09bc0a222eb51e5L,0xc0bb7244aa9cf09aL,0x36a8077f80159f06L,
+ 0x8b5c989edddc560eL },
+ { 0x19d2f316512e1f43L,0x02eac554ad08ff62L,0x012ab84c07d20b4eL,
+ 0x37d1e115d6d4e4e1L } },
+ /* 19 << 133 */
+ { { 0xb6443e1aab7b19a8L,0xf08d067edef8cd45L,0x63adf3e9685e03daL,
+ 0xcf15a10e4792b916L },
+ { 0xf44bcce5b738a425L,0xebe131d59636b2fdL,0x940688417850d605L,
+ 0x09684eaab40d749dL } },
+ /* 20 << 133 */
+ { { 0x8c3c669c72ba075bL,0x89f78b55ba469015L,0x5706aade3e9f8ba8L,
+ 0x6d8bd565b32d7ed7L },
+ { 0x25f4e63b805f08d6L,0x7f48200dc3bcc1b5L,0x4e801968b025d847L,
+ 0x74afac0487cbe0a8L } },
+ /* 21 << 133 */
+ { { 0x43ed2c2b7e63d690L,0xefb6bbf00223cdb8L,0x4fec3cae2884d3feL,
+ 0x065ecce6d75e25a4L },
+ { 0x6c2294ce69f79071L,0x0d9a8e5f044b8666L,0x5009f23817b69d8fL,
+ 0x3c29f8fec5dfdaf7L } },
+ /* 22 << 133 */
+ { { 0x9067528febae68c4L,0x5b38563230c5ba21L,0x540df1191fdd1aecL,
+ 0xcf37825bcfba4c78L },
+ { 0x77eff980beb11454L,0x40a1a99160c1b066L,0xe8018980f889a1c7L,
+ 0xb9c52ae976c24be0L } },
+ /* 23 << 133 */
+ { { 0x05fbbcce45650ef4L,0xae000f108aa29ac7L,0x884b71724f04c470L,
+ 0x7cd4fde219bb5c25L },
+ { 0x6477b22ae8840869L,0xa88688595fbd0686L,0xf23cc02e1116dfbaL,
+ 0x76cd563fd87d7776L } },
+ /* 24 << 133 */
+ { { 0xe2a37598a9d82abfL,0x5f188ccbe6c170f5L,0x816822005066b087L,
+ 0xda22c212c7155adaL },
+ { 0x151e5d3afbddb479L,0x4b606b846d715b99L,0x4a73b54bf997cb2eL,
+ 0x9a1bfe433ecd8b66L } },
+ /* 25 << 133 */
+ { { 0x1c3128092a67d48aL,0xcd6a671e031fa9e2L,0xbec3312a0e43a34aL,
+ 0x1d93563955ef47d3L },
+ { 0x5ea024898fea73eaL,0x8247b364a035afb2L,0xb58300a65265b54cL,
+ 0x3286662f722c7148L } },
+ /* 26 << 133 */
+ { { 0xb77fd76bb4ec4c20L,0xf0a12fa70f3fe3fdL,0xf845bbf541d8c7e8L,
+ 0xe4d969ca5ec10aa8L },
+ { 0x4c0053b743e232a3L,0xdc7a3fac37f8a45aL,0x3c4261c520d81c8fL,
+ 0xfd4b3453b00eab00L } },
+ /* 27 << 133 */
+ { { 0x76d48f86d36e3062L,0x626c5277a143ff02L,0x538174deaf76f42eL,
+ 0x2267aa866407ceacL },
+ { 0xfad7635172e572d5L,0xab861af7ba7330ebL,0xa0a1c8c7418d8657L,
+ 0x988821cb20289a52L } },
+ /* 28 << 133 */
+ { { 0x79732522cccc18adL,0xaadf3f8df1a6e027L,0xf7382c9317c2354dL,
+ 0x5ce1680cd818b689L },
+ { 0x359ebbfcd9ecbee9L,0x4330689c1cae62acL,0xb55ce5b4c51ac38aL,
+ 0x7921dfeafe238ee8L } },
+ /* 29 << 133 */
+ { { 0x3972bef8271d1ca5L,0x3e423bc7e8aabd18L,0x57b09f3f44a3e5e3L,
+ 0x5da886ae7b444d66L },
+ { 0x68206634a9964375L,0x356a2fa3699cd0ffL,0xaf0faa24dba515e9L,
+ 0x536e1f5cb321d79aL } },
+ /* 30 << 133 */
+ { { 0xd3b9913a5c04e4eaL,0xd549dcfed6f11513L,0xee227bf579fd1d94L,
+ 0x9f35afeeb43f2c67L },
+ { 0xd2638d24f1314f53L,0x62baf948cabcd822L,0x5542de294ef48db0L,
+ 0xb3eb6a04fc5f6bb2L } },
+ /* 31 << 133 */
+ { { 0x23c110ae1208e16aL,0x1a4d15b5f8363e24L,0x30716844164be00bL,
+ 0xa8e24824f6f4690dL },
+ { 0x548773a290b170cfL,0xa1bef33142f191f4L,0x70f418d09247aa97L,
+ 0xea06028e48be9147L } },
+ /* 32 << 133 */
+ { { 0xe13122f3dbfb894eL,0xbe9b79f6ce274b18L,0x85a49de5ca58aadfL,
+ 0x2495775811487351L },
+ { 0x111def61bb939099L,0x1d6a974a26d13694L,0x4474b4ced3fc253bL,
+ 0x3a1485e64c5db15eL } },
+ /* 33 << 133 */
+ { { 0xe79667b4147c15b4L,0xe34f553b7bc61301L,0x032b80f817094381L,
+ 0x55d8bafd723eaa21L },
+ { 0x5a987995f1c0e74eL,0x5a9b292eebba289cL,0x413cd4b2eb4c8251L,
+ 0x98b5d243d162db0aL } },
+ /* 34 << 133 */
+ { { 0xbb47bf6668342520L,0x08d68949baa862d1L,0x11f349c7e906abcdL,
+ 0x454ce985ed7bf00eL },
+ { 0xacab5c9eb55b803bL,0xb03468ea31e3c16dL,0x5c24213dd273bf12L,
+ 0x211538eb71587887L } },
+ /* 35 << 133 */
+ { { 0x198e4a2f731dea2dL,0xd5856cf274ed7b2aL,0x86a632eb13a664feL,
+ 0x932cd909bda41291L },
+ { 0x850e95d4c0c4ddc0L,0xc0f422f8347fc2c9L,0xe68cbec486076bcbL,
+ 0xf9e7c0c0cd6cd286L } },
+ /* 36 << 133 */
+ { { 0x65994ddb0f5f27caL,0xe85461fba80d59ffL,0xff05481a66601023L,
+ 0xc665427afc9ebbfbL },
+ { 0xb0571a697587fd52L,0x935289f88d49efceL,0x61becc60ea420688L,
+ 0xb22639d913a786afL } },
+ /* 37 << 133 */
+ { { 0x1a8e6220361ecf90L,0x001f23e025506463L,0xe4ae9b5d0a5c2b79L,
+ 0xebc9cdadd8149db5L },
+ { 0xb33164a1934aa728L,0x750eb00eae9b60f3L,0x5a91615b9b9cfbfdL,
+ 0x97015cbfef45f7f6L } },
+ /* 38 << 133 */
+ { { 0xb462c4a5bf5151dfL,0x21adcc41b07118f2L,0xd60c545b043fa42cL,
+ 0xfc21aa54e96be1abL },
+ { 0xe84bc32f4e51ea80L,0x3dae45f0259b5d8dL,0xbb73c7ebc38f1b5eL,
+ 0xe405a74ae8ae617dL } },
+ /* 39 << 133 */
+ { { 0xbb1ae9c69f1c56bdL,0x8c176b9849f196a4L,0xc448f3116875092bL,
+ 0xb5afe3de9f976033L },
+ { 0xa8dafd49145813e5L,0x687fc4d9e2b34226L,0xf2dfc92d4c7ff57fL,
+ 0x004e3fc1401f1b46L } },
+ /* 40 << 133 */
+ { { 0x5afddab61430c9abL,0x0bdd41d32238e997L,0xf0947430418042aeL,
+ 0x71f9addacdddc4cbL },
+ { 0x7090c016c52dd907L,0xd9bdf44d29e2047fL,0xe6f1fe801b1011a6L,
+ 0xb63accbcd9acdc78L } },
+ /* 41 << 133 */
+ { { 0xcfc7e2351272a95bL,0x0c667717a6276ac8L,0x3c0d3709e2d7eef7L,
+ 0x5add2b069a685b3eL },
+ { 0x363ad32d14ea5d65L,0xf8e01f068d7dd506L,0xc9ea221375b4aac6L,
+ 0xed2a2bf90d353466L } },
+ /* 42 << 133 */
+ { { 0x439d79b5e9d3a7c3L,0x8e0ee5a681b7f34bL,0xcf3dacf51dc4ba75L,
+ 0x1d3d1773eb3310c7L },
+ { 0xa8e671127747ae83L,0x31f43160197d6b40L,0x0521cceecd961400L,
+ 0x67246f11f6535768L } },
+ /* 43 << 133 */
+ { { 0x702fcc5aef0c3133L,0x247cc45d7e16693bL,0xfd484e49c729b749L,
+ 0x522cef7db218320fL },
+ { 0xe56ef40559ab93b3L,0x225fba119f181071L,0x33bd659515330ed0L,
+ 0xc4be69d51ddb32f7L } },
+ /* 44 << 133 */
+ { { 0x264c76680448087cL,0xac30903f71432daeL,0x3851b26600f9bf47L,
+ 0x400ed3116cdd6d03L },
+ { 0x045e79fef8fd2424L,0xfdfd974afa6da98bL,0x45c9f6410c1e673aL,
+ 0x76f2e7335b2c5168L } },
+ /* 45 << 133 */
+ { { 0x1adaebb52a601753L,0xb286514cc57c2d49L,0xd87696701e0bfd24L,
+ 0x950c547e04478922L },
+ { 0xd1d41969e5d32bfeL,0x30bc1472750d6c3eL,0x8f3679fee0e27f3aL,
+ 0x8f64a7dca4a6ee0cL } },
+ /* 46 << 133 */
+ { { 0x2fe59937633dfb1fL,0xea82c395977f2547L,0xcbdfdf1a661ea646L,
+ 0xc7ccc591b9085451L },
+ { 0x8217796281761e13L,0xda57596f9196885cL,0xbc17e84928ffbd70L,
+ 0x1e6e0a412671d36fL } },
+ /* 47 << 133 */
+ { { 0x61ae872c4152fcf5L,0x441c87b09e77e754L,0xd0799dd5a34dff09L,
+ 0x766b4e4488a6b171L },
+ { 0xdc06a51211f1c792L,0xea02ae934be35c3eL,0xe5ca4d6de90c469eL,
+ 0x4df4368e56e4ff5cL } },
+ /* 48 << 133 */
+ { { 0x7817acab4baef62eL,0x9f5a2202a85b91e8L,0x9666ebe66ce57610L,
+ 0x32ad31f3f73bfe03L },
+ { 0x628330a425bcf4d6L,0xea950593515056e6L,0x59811c89e1332156L,
+ 0xc89cf1fe8c11b2d7L } },
+ /* 49 << 133 */
+ { { 0x75b6391304e60cc0L,0xce811e8d4625d375L,0x030e43fc2d26e562L,
+ 0xfbb30b4b608d36a0L },
+ { 0x634ff82c48528118L,0x7c6fe085cd285911L,0x7f2830c099358f28L,
+ 0x2e60a95e665e6c09L } },
+ /* 50 << 133 */
+ { { 0x08407d3d9b785dbfL,0x530889aba759bce7L,0xf228e0e652f61239L,
+ 0x2b6d14616879be3cL },
+ { 0xe6902c0451a7bbf7L,0x30ad99f076f24a64L,0x66d9317a98bc6da0L,
+ 0xf4f877f3cb596ac0L } },
+ /* 51 << 133 */
+ { { 0xb05ff62d4c44f119L,0x4555f536e9b77416L,0xc7c0d0598caed63bL,
+ 0x0cd2b7cec358b2a9L },
+ { 0x3f33287b46945fa3L,0xf8785b20d67c8791L,0xc54a7a619637bd08L,
+ 0x54d4598c18be79d7L } },
+ /* 52 << 133 */
+ { { 0x889e5acbc46d7ce1L,0x9a515bb78b085877L,0xfac1a03d0b7a5050L,
+ 0x7d3e738af2926035L },
+ { 0x861cc2ce2a6cb0ebL,0x6f2e29558f7adc79L,0x61c4d45133016376L,
+ 0xd9fd2c805ad59090L } },
+ /* 53 << 133 */
+ { { 0xe5a83738b2b836a1L,0x855b41a07c0d6622L,0x186fe3177cc19af1L,
+ 0x6465c1fffdd99acbL },
+ { 0x46e5c23f6974b99eL,0x75a7cf8ba2717cbeL,0x4d2ebc3f062be658L,
+ 0x094b44475f209c98L } },
+ /* 54 << 133 */
+ { { 0x4af285edb940cb5aL,0x6706d7927cc82f10L,0xc8c8776c030526faL,
+ 0xfa8e6f76a0da9140L },
+ { 0x77ea9d34591ee4f0L,0x5f46e33740274166L,0x1bdf98bbea671457L,
+ 0xd7c08b46862a1fe2L } },
+ /* 55 << 133 */
+ { { 0x46cc303c1c08ad63L,0x995434404c845e7bL,0x1b8fbdb548f36bf7L,
+ 0x5b82c3928c8273a7L },
+ { 0x08f712c4928435d5L,0x071cf0f179330380L,0xc74c2d24a8da054aL,
+ 0xcb0e720143c46b5cL } },
+ /* 56 << 133 */
+ { { 0x0ad7337ac0b7eff3L,0x8552225ec5e48b3cL,0xe6f78b0c73f13a5fL,
+ 0x5e70062e82349cbeL },
+ { 0x6b8d5048e7073969L,0x392d2a29c33cb3d2L,0xee4f727c4ecaa20fL,
+ 0xa068c99e2ccde707L } },
+ /* 57 << 133 */
+ { { 0xfcd5651fb87a2913L,0xea3e3c153cc252f0L,0x777d92df3b6cd3e4L,
+ 0x7a414143c5a732e7L },
+ { 0xa895951aa71ff493L,0xfe980c92bbd37cf6L,0x45bd5e64decfeeffL,
+ 0x910dc2a9a44c43e9L } },
+ /* 58 << 133 */
+ { { 0xcb403f26cca9f54dL,0x928bbdfb9303f6dbL,0x3c37951ea9eee67cL,
+ 0x3bd61a52f79961c3L },
+ { 0x09a238e6395c9a79L,0x6940ca2d61eb352dL,0x7d1e5c5ec1875631L,
+ 0x1e19742c1e1b20d1L } },
+ /* 59 << 133 */
+ { { 0x4633d90823fc2e6eL,0xa76e29a908959149L,0x61069d9c84ed7da5L,
+ 0x0baa11cf5dbcad51L },
+ { 0xd01eec64961849daL,0x93b75f1faf3d8c28L,0x57bc4f9f1ca2ee44L,
+ 0x5a26322d00e00558L } },
+ /* 60 << 133 */
+ { { 0x1888d65861a023efL,0x1d72aab4b9e5246eL,0xa9a26348e5563ec0L,
+ 0xa0971963c3439a43L },
+ { 0x567dd54badb9b5b7L,0x73fac1a1c45a524bL,0x8fe97ef7fe38e608L,
+ 0x608748d23f384f48L } },
+ /* 61 << 133 */
+ { { 0xb0571794c486094fL,0x869254a38bf3a8d6L,0x148a8dd1310b0e25L,
+ 0x99ab9f3f9aa3f7d8L },
+ { 0x0927c68a6706c02eL,0x22b5e76c69790e6cL,0x6c3252606c71376cL,
+ 0x53a5769009ef6657L } },
+ /* 62 << 133 */
+ { { 0x8d63f852edffcf3aL,0xb4d2ed043c0a6f55L,0xdb3aa8de12519b9eL,
+ 0x5d38e9c41e0a569aL },
+ { 0x871528bf303747e2L,0xa208e77cf5b5c18dL,0x9d129c88ca6bf923L,
+ 0xbcbf197fbf02839fL } },
+ /* 63 << 133 */
+ { { 0x9b9bf03027323194L,0x3b055a8b339ca59dL,0xb46b23120f669520L,
+ 0x19789f1f497e5f24L },
+ { 0x9c499468aaf01801L,0x72ee11908b69d59cL,0x8bd39595acf4c079L,
+ 0x3ee11ece8e0cd048L } },
+ /* 64 << 133 */
+ { { 0xebde86ec1ed66f18L,0x225d906bd61fce43L,0x5cab07d6e8bed74dL,
+ 0x16e4617f27855ab7L },
+ { 0x6568aaddb2fbc3ddL,0xedb5484f8aeddf5bL,0x878f20e86dcf2fadL,
+ 0x3516497c615f5699L } },
+ /* 0 << 140 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 140 */
+ { { 0xef0a3fecfa181e69L,0x9ea02f8130d69a98L,0xb2e9cf8e66eab95dL,
+ 0x520f2beb24720021L },
+ { 0x621c540a1df84361L,0x1203772171fa6d5dL,0x6e3c7b510ff5f6ffL,
+ 0x817a069babb2bef3L } },
+ /* 2 << 140 */
+ { { 0x83572fb6b294cda6L,0x6ce9bf75b9039f34L,0x20e012f0095cbb21L,
+ 0xa0aecc1bd063f0daL },
+ { 0x57c21c3af02909e5L,0xc7d59ecf48ce9cdcL,0x2732b8448ae336f8L,
+ 0x056e37233f4f85f4L } },
+ /* 3 << 140 */
+ { { 0x8a10b53189e800caL,0x50fe0c17145208fdL,0x9e43c0d3b714ba37L,
+ 0x427d200e34189accL },
+ { 0x05dee24fe616e2c0L,0x9c25f4c8ee1854c1L,0x4d3222a58f342a73L,
+ 0x0807804fa027c952L } },
+ /* 4 << 140 */
+ { { 0xc222653a4f0d56f3L,0x961e4047ca28b805L,0x2c03f8b04a73434bL,
+ 0x4c966787ab712a19L },
+ { 0xcc196c42864fee42L,0xc1be93da5b0ece5cL,0xa87d9f22c131c159L,
+ 0x2bb6d593dce45655L } },
+ /* 5 << 140 */
+ { { 0x22c49ec9b809b7ceL,0x8a41486be2c72c2cL,0x813b9420fea0bf36L,
+ 0xb3d36ee9a66dac69L },
+ { 0x6fddc08a328cc987L,0x0a3bcd2c3a326461L,0x7103c49dd810dbbaL,
+ 0xf9d81a284b78a4c4L } },
+ /* 6 << 140 */
+ { { 0x3de865ade4d55941L,0xdedafa5e30384087L,0x6f414abb4ef18b9bL,
+ 0x9ee9ea42faee5268L },
+ { 0x260faa1637a55a4aL,0xeb19a514015f93b9L,0x51d7ebd29e9c3598L,
+ 0x523fc56d1932178eL } },
+ /* 7 << 140 */
+ { { 0x501d070cb98fe684L,0xd60fbe9a124a1458L,0xa45761c892bc6b3fL,
+ 0xf5384858fe6f27cbL },
+ { 0x4b0271f7b59e763bL,0x3d4606a95b5a8e5eL,0x1eda5d9b05a48292L,
+ 0xda7731d0e6fec446L } },
+ /* 8 << 140 */
+ { { 0xa3e3369390d45871L,0xe976404006166d8dL,0xb5c3368289a90403L,
+ 0x4bd1798372f1d637L },
+ { 0xa616679ed5d2c53aL,0x5ec4bcd8fdcf3b87L,0xae6d7613b66a694eL,
+ 0x7460fc76e3fc27e5L } },
+ /* 9 << 140 */
+ { { 0x70469b8295caabeeL,0xde024ca5889501e3L,0x6bdadc06076ed265L,
+ 0x0cb1236b5a0ef8b2L },
+ { 0x4065ddbf0972ebf9L,0xf1dd387522aca432L,0xa88b97cf744aff76L,
+ 0xd1359afdfe8e3d24L } },
+ /* 10 << 140 */
+ { { 0x52a3ba2b91502cf3L,0x2c3832a8084db75dL,0x04a12dddde30b1c9L,
+ 0x7802eabce31fd60cL },
+ { 0x33707327a37fddabL,0x65d6f2abfaafa973L,0x3525c5b811e6f91aL,
+ 0x76aeb0c95f46530bL } },
+ /* 11 << 140 */
+ { { 0xe8815ff62f93a675L,0xa6ec968405f48679L,0x6dcbb556358ae884L,
+ 0x0af61472e19e3873L },
+ { 0x72334372a5f696beL,0xc65e57ea6f22fb70L,0x268da30c946cea90L,
+ 0x136a8a8765681b2aL } },
+ /* 12 << 140 */
+ { { 0xad5e81dc0f9f44d4L,0xf09a69602c46585aL,0xd1649164c447d1b1L,
+ 0x3b4b36c8879dc8b1L },
+ { 0x20d4177b3b6b234cL,0x096a25051730d9d0L,0x0611b9b8ef80531dL,
+ 0xba904b3b64bb495dL } },
+ /* 13 << 140 */
+ { { 0x1192d9d493a3147aL,0x9f30a5dc9a565545L,0x90b1f9cb6ef07212L,
+ 0x299585460d87fc13L },
+ { 0xd3323effc17db9baL,0xcb18548ccb1644a8L,0x18a306d44f49ffbcL,
+ 0x28d658f14c2e8684L } },
+ /* 14 << 140 */
+ { { 0x44ba60cda99f8c71L,0x67b7abdb4bf742ffL,0x66310f9c914b3f99L,
+ 0xae430a32f412c161L },
+ { 0x1e6776d388ace52fL,0x4bc0fa2452d7067dL,0x03c286aa8f07cd1bL,
+ 0x4cb8f38ca985b2c1L } },
+ /* 15 << 140 */
+ { { 0x83ccbe808c3bff36L,0x005a0bd25263e575L,0x460d7dda259bdcd1L,
+ 0x4a1c5642fa5cab6bL },
+ { 0x2b7bdbb99fe4fc88L,0x09418e28cc97bbb5L,0xd8274fb4a12321aeL,
+ 0xb137007d5c87b64eL } },
+ /* 16 << 140 */
+ { { 0x80531fe1c63c4962L,0x50541e89981fdb25L,0xdc1291a1fd4c2b6bL,
+ 0xc0693a17a6df4fcaL },
+ { 0xb2c4604e0117f203L,0x245f19630a99b8d0L,0xaedc20aac6212c44L,
+ 0xb1ed4e56520f52a8L } },
+ /* 17 << 140 */
+ { { 0xfe48f575f8547be3L,0x0a7033cda9e45f98L,0x4b45d3a918c50100L,
+ 0xb2a6cd6aa61d41daL },
+ { 0x60bbb4f557933c6bL,0xa7538ebd2b0d7ffcL,0x9ea3ab8d8cd626b6L,
+ 0x8273a4843601625aL } },
+ /* 18 << 140 */
+ { { 0x888598450168e508L,0x8cbc9bb299a94abdL,0x713ac792fab0a671L,
+ 0xa3995b196c9ebffcL },
+ { 0xe711668e1239e152L,0x56892558bbb8dff4L,0x8bfc7dabdbf17963L,
+ 0x5b59fe5ab3de1253L } },
+ /* 19 << 140 */
+ { { 0x7e3320eb34a9f7aeL,0xe5e8cf72d751efe4L,0x7ea003bcd9be2f37L,
+ 0xc0f551a0b6c08ef7L },
+ { 0x56606268038f6725L,0x1dd38e356d92d3b6L,0x07dfce7cc3cbd686L,
+ 0x4e549e04651c5da8L } },
+ /* 20 << 140 */
+ { { 0x4058f93b08b19340L,0xc2fae6f4cac6d89dL,0x4bad8a8c8f159cc7L,
+ 0x0ddba4b3cb0b601cL },
+ { 0xda4fc7b51dd95f8cL,0x1d163cd7cea5c255L,0x30707d06274a8c4cL,
+ 0x79d9e0082802e9ceL } },
+ /* 21 << 140 */
+ { { 0x02a29ebfe6ddd505L,0x37064e74b50bed1aL,0x3f6bae65a7327d57L,
+ 0x3846f5f1f83920bcL },
+ { 0x87c3749160df1b9bL,0x4cfb28952d1da29fL,0x10a478ca4ed1743cL,
+ 0x390c60303edd47c6L } },
+ /* 22 << 140 */
+ { { 0x8f3e53128c0a78deL,0xccd02bda1e85df70L,0xd6c75c03a61b6582L,
+ 0x0762921cfc0eebd1L },
+ { 0xd34d0823d85010c0L,0xd73aaacb0044cf1fL,0xfb4159bba3b5e78aL,
+ 0x2287c7f7e5826f3fL } },
+ /* 23 << 140 */
+ { { 0x4aeaf742580b1a01L,0xf080415d60423b79L,0xe12622cda7dea144L,
+ 0x49ea499659d62472L },
+ { 0xb42991ef571f3913L,0x0610f214f5b25a8aL,0x47adc58530b79e8fL,
+ 0xf90e3df607a065a2L } },
+ /* 24 << 140 */
+ { { 0x5d0a5deb43e2e034L,0x53fb5a34444024aaL,0xa8628c686b0c9f7fL,
+ 0x9c69c29cac563656L },
+ { 0x5a231febbace47b6L,0xbdce02899ea5a2ecL,0x05da1fac9463853eL,
+ 0x96812c52509e78aaL } },
+ /* 25 << 140 */
+ { { 0xd3fb577157151692L,0xeb2721f8d98e1c44L,0xc050608732399be1L,
+ 0xda5a5511d979d8b8L },
+ { 0x737ed55dc6f56780L,0xe20d30040dc7a7f4L,0x02ce7301f5941a03L,
+ 0x91ef5215ed30f83aL } },
+ /* 26 << 140 */
+ { { 0x28727fc14092d85fL,0x72d223c65c49e41aL,0xa7cf30a2ba6a4d81L,
+ 0x7c086209b030d87dL },
+ { 0x04844c7dfc588b09L,0x728cd4995874bbb0L,0xcc1281eee84c0495L,
+ 0x0769b5baec31958fL } },
+ /* 27 << 140 */
+ { { 0x665c228bf99c2471L,0xf2d8a11b191eb110L,0x4594f494d36d7024L,
+ 0x482ded8bcdcb25a1L },
+ { 0xc958a9d8dadd4885L,0x7004477ef1d2b547L,0x0a45f6ef2a0af550L,
+ 0x4fc739d62f8d6351L } },
+ /* 28 << 140 */
+ { { 0x75cdaf27786f08a9L,0x8700bb2642c2737fL,0x855a71411c4e2670L,
+ 0x810188c115076fefL },
+ { 0xc251d0c9abcd3297L,0xae4c8967f48108ebL,0xbd146de718ceed30L,
+ 0xf9d4f07ac986bcedL } },
+ /* 29 << 140 */
+ { { 0x5ad98ed583fa1e08L,0x7780d33ebeabd1fbL,0xe330513c903b1196L,
+ 0xba11de9ea47bc8c4L },
+ { 0x684334da02c2d064L,0x7ecf360da48de23bL,0x57a1b4740a9089d8L,
+ 0xf28fa439ff36734cL } },
+ /* 30 << 140 */
+ { { 0xf2a482cbea4570b3L,0xee65d68ba5ebcee9L,0x988d0036b9694cd5L,
+ 0x53edd0e937885d32L },
+ { 0xe37e3307beb9bc6dL,0xe9abb9079f5c6768L,0x4396ccd551f2160fL,
+ 0x2500888c47336da6L } },
+ /* 31 << 140 */
+ { { 0x383f9ed9926fce43L,0x809dd1c704da2930L,0x30f6f5968a4cb227L,
+ 0x0d700c7f73a56b38L },
+ { 0x1825ea33ab64a065L,0xaab9b7351338df80L,0x1516100d9b63f57fL,
+ 0x2574395a27a6a634L } },
+ /* 32 << 140 */
+ { { 0xb5560fb6700a1acdL,0xe823fd73fd999681L,0xda915d1f6cb4e1baL,
+ 0x0d0301186ebe00a3L },
+ { 0x744fb0c989fca8cdL,0x970d01dbf9da0e0bL,0x0ad8c5647931d76fL,
+ 0xb15737bff659b96aL } },
+ /* 33 << 140 */
+ { { 0xdc9933e8a8b484e7L,0xb2fdbdf97a26dec7L,0x2349e9a49f1f0136L,
+ 0x7860368e70fddddbL },
+ { 0xd93d2c1cf9ad3e18L,0x6d6c5f17689f4e79L,0x7a544d91b24ff1b6L,
+ 0x3e12a5ebfe16cd8cL } },
+ /* 34 << 140 */
+ { { 0x543574e9a56b872fL,0xa1ad550cfcf68ea2L,0x689e37d23f560ef7L,
+ 0x8c54b9cac9d47a8bL },
+ { 0x46d40a4a088ac342L,0xec450c7c1576c6d0L,0xb589e31c1f9689e9L,
+ 0xdacf2602b8781718L } },
+ /* 35 << 140 */
+ { { 0xa89237c6c8cb6b42L,0x1326fc93b96ef381L,0x55d56c6db5f07825L,
+ 0xacba2eea7449e22dL },
+ { 0x74e0887a633c3000L,0xcb6cd172d7cbcf71L,0x309e81dec36cf1beL,
+ 0x07a18a6d60ae399bL } },
+ /* 36 << 140 */
+ { { 0xb36c26799edce57eL,0x52b892f4df001d41L,0xd884ae5d16a1f2c6L,
+ 0x9b329424efcc370aL },
+ { 0x3120daf2bd2e21dfL,0x55298d2d02470a99L,0x0b78af6ca05db32eL,
+ 0x5c76a331601f5636L } },
+ /* 37 << 140 */
+ { { 0xaae861fff8a4f29cL,0x70dc9240d68f8d49L,0x960e649f81b1321cL,
+ 0x3d2c801b8792e4ceL },
+ { 0xf479f77242521876L,0x0bed93bc416c79b1L,0xa67fbc05263e5bc9L,
+ 0x01e8e630521db049L } },
+ /* 38 << 140 */
+ { { 0x76f26738c6f3431eL,0xe609cb02e3267541L,0xb10cff2d818c877cL,
+ 0x1f0e75ce786a13cbL },
+ { 0xf4fdca641158544dL,0x5d777e896cb71ed0L,0x3c233737a9aa4755L,
+ 0x7b453192e527ab40L } },
+ /* 39 << 140 */
+ { { 0xdb59f68839f05ffeL,0x8f4f4be06d82574eL,0xcce3450cee292d1bL,
+ 0xaa448a1261ccd086L },
+ { 0xabce91b3f7914967L,0x4537f09b1908a5edL,0xa812421ef51042e7L,
+ 0xfaf5cebcec0b3a34L } },
+ /* 40 << 140 */
+ { { 0x730ffd874ca6b39aL,0x70fb72ed02efd342L,0xeb4735f9d75c8edbL,
+ 0xc11f2157c278aa51L },
+ { 0xc459f635bf3bfebfL,0x3a1ff0b46bd9601fL,0xc9d12823c420cb73L,
+ 0x3e9af3e23c2915a3L } },
+ /* 41 << 140 */
+ { { 0xe0c82c72b41c3440L,0x175239e5e3039a5fL,0xe1084b8a558795a3L,
+ 0x328d0a1dd01e5c60L },
+ { 0x0a495f2ed3788a04L,0x25d8ff1666c11a9fL,0xf5155f059ed692d6L,
+ 0x954fa1074f425fe4L } },
+ /* 42 << 140 */
+ { { 0xd16aabf2e98aaa99L,0x90cd8ba096b0f88aL,0x957f4782c154026aL,
+ 0x54ee073452af56d2L },
+ { 0xbcf89e5445b4147aL,0x3d102f219a52816cL,0x6808517e39b62e77L,
+ 0x92e2542169169ad8L } },
+ /* 43 << 140 */
+ { { 0xd721d871bb608558L,0x60e4ebaef6d4ff9bL,0x0ba1081941f2763eL,
+ 0xca2e45be51ee3247L },
+ { 0x66d172ec2bfd7a5fL,0x528a8f2f74d0b12dL,0xe17f1e38dabe70dcL,
+ 0x1d5d73169f93983cL } },
+ /* 44 << 140 */
+ { { 0x51b2184adf423e31L,0xcb417291aedb1a10L,0x2054ca93625bcab9L,
+ 0x54396860a98998f0L },
+ { 0x4e53f6c4a54ae57eL,0x0ffeb590ee648e9dL,0xfbbdaadc6afaf6bcL,
+ 0xf88ae796aa3bfb8aL } },
+ /* 45 << 140 */
+ { { 0x209f1d44d2359ed9L,0xac68dd03f3544ce2L,0xf378da47fd51e569L,
+ 0xe1abd8602cc80097L },
+ { 0x23ca18d9343b6e3aL,0x480797e8b40a1baeL,0xd1f0c717533f3e67L,
+ 0x4489697006e6cdfcL } },
+ /* 46 << 140 */
+ { { 0x8ca2105552a82e8dL,0xb2caf78578460cdcL,0x4c1b7b62e9037178L,
+ 0xefc09d2cdb514b58L },
+ { 0x5f2df9ee9113be5cL,0x2fbda78fb3f9271cL,0xe09a81af8f83fc54L,
+ 0x06b138668afb5141L } },
+ /* 47 << 140 */
+ { { 0x38f6480f43e3865dL,0x72dd77a81ddf47d9L,0xf2a8e9714c205ff7L,
+ 0x46d449d89d088ad8L },
+ { 0x926619ea185d706fL,0xe47e02ebc7dd7f62L,0xe7f120a78cbc2031L,
+ 0xc18bef00998d4ac9L } },
+ /* 48 << 140 */
+ { { 0x18f37a9c6bdf22daL,0xefbc432f90dc82dfL,0xc52cef8e5d703651L,
+ 0x82887ba0d99881a5L },
+ { 0x7cec9ddab920ec1dL,0xd0d7e8c3ec3e8d3bL,0x445bc3954ca88747L,
+ 0xedeaa2e09fd53535L } },
+ /* 49 << 140 */
+ { { 0x461b1d936cc87475L,0xd92a52e26d2383bdL,0xfabccb59d7903546L,
+ 0x6111a7613d14b112L },
+ { 0x0ae584feb3d5f612L,0x5ea69b8d60e828ecL,0x6c07898554087030L,
+ 0x649cab04ac4821feL } },
+ /* 50 << 140 */
+ { { 0x25ecedcf8bdce214L,0xb5622f7286af7361L,0x0e1227aa7038b9e2L,
+ 0xd0efb273ac20fa77L },
+ { 0x817ff88b79df975bL,0x856bf2861999503eL,0xb4d5351f5038ec46L,
+ 0x740a52c5fc42af6eL } },
+ /* 51 << 140 */
+ { { 0x2e38bb152cbb1a3fL,0xc3eb99fe17a83429L,0xca4fcbf1dd66bb74L,
+ 0x880784d6cde5e8fcL },
+ { 0xddc84c1cb4e7a0beL,0x8780510dbd15a72fL,0x44bcf1af81ec30e1L,
+ 0x141e50a80a61073eL } },
+ /* 52 << 140 */
+ { { 0x0d95571847be87aeL,0x68a61417f76a4372L,0xf57e7e87c607c3d3L,
+ 0x043afaf85252f332L },
+ { 0xcc14e1211552a4d2L,0xb6dee692bb4d4ab4L,0xb6ab74c8a03816a4L,
+ 0x84001ae46f394a29L } },
+ /* 53 << 140 */
+ { { 0x5bed8344d795fb45L,0x57326e7db79f55a5L,0xc9533ce04accdffcL,
+ 0x53473caf3993fa04L },
+ { 0x7906eb93a13df4c8L,0xa73e51f697cbe46fL,0xd1ab3ae10ae4ccf8L,
+ 0x256145088a5b3dbcL } },
+ /* 54 << 140 */
+ { { 0x61eff96211a71b27L,0xdf71412b6bb7fa39L,0xb31ba6b82bd7f3efL,
+ 0xb0b9c41569180d29L },
+ { 0xeec14552014cdde5L,0x702c624b227b4bbbL,0x2b15e8c2d3e988f3L,
+ 0xee3bcc6da4f7fd04L } },
+ /* 55 << 140 */
+ { { 0x9d00822a42ac6c85L,0x2db0cea61df9f2b7L,0xd7cad2ab42de1e58L,
+ 0x346ed5262d6fbb61L },
+ { 0xb39629951a2faf09L,0x2fa8a5807c25612eL,0x30ae04da7cf56490L,
+ 0x756629080eea3961L } },
+ /* 56 << 140 */
+ { { 0x3609f5c53d080847L,0xcb081d395241d4f6L,0xb4fb381077961a63L,
+ 0xc20c59842abb66fcL },
+ { 0x3d40aa7cf902f245L,0x9cb127364e536b1eL,0x5eda24da99b3134fL,
+ 0xafbd9c695cd011afL } },
+ /* 57 << 140 */
+ { { 0x9a16e30ac7088c7dL,0x5ab657103207389fL,0x1b09547fe7407a53L,
+ 0x2322f9d74fdc6eabL },
+ { 0xc0f2f22d7430de4dL,0x19382696e68ca9a9L,0x17f1eff1918e5868L,
+ 0xe3b5b635586f4204L } },
+ /* 58 << 140 */
+ { { 0x146ef9803fbc4341L,0x359f2c805b5eed4eL,0x9f35744e7482e41dL,
+ 0x9a9ac3ecf3b224c2L },
+ { 0x9161a6fe91fc50aeL,0x89ccc66bc613fa7cL,0x89268b14c732f15aL,
+ 0x7cd6f4e2b467ed03L } },
+ /* 59 << 140 */
+ { { 0xfbf79869ce56b40eL,0xf93e094cc02dde98L,0xefe0c3a8edee2cd7L,
+ 0x90f3ffc0b268fd42L },
+ { 0x81a7fd5608241aedL,0x95ab7ad800b1afe8L,0x401270563e310d52L,
+ 0xd3ffdeb109d9fc43L } },
+ /* 60 << 140 */
+ { { 0xc8f85c91d11a8594L,0x2e74d25831cf6db8L,0x829c7ca302b5dfd0L,
+ 0xe389cfbe69143c86L },
+ { 0xd01b6405941768d8L,0x4510399503bf825dL,0xcc4ee16656cd17e2L,
+ 0xbea3c283ba037e79L } },
+ /* 61 << 140 */
+ { { 0x4e1ac06ed9a47520L,0xfbfe18aaaf852404L,0x5615f8e28087648aL,
+ 0x7301e47eb9d150d9L },
+ { 0x79f9f9ddb299b977L,0x76697a7ba5b78314L,0x10d674687d7c90e7L,
+ 0x7afffe03937210b5L } },
+ /* 62 << 140 */
+ { { 0x5aef3e4b28c22ceeL,0xefb0ecd809fd55aeL,0x4cea71320d2a5d6aL,
+ 0x9cfb5fa101db6357L },
+ { 0x395e0b57f36e1ac5L,0x008fa9ad36cafb7dL,0x8f6cdf705308c4dbL,
+ 0x51527a3795ed2477L } },
+ /* 63 << 140 */
+ { { 0xba0dee305bd21311L,0x6ed41b22909c90d7L,0xc5f6b7587c8696d3L,
+ 0x0db8eaa83ce83a80L },
+ { 0xd297fe37b24b4b6fL,0xfe58afe8522d1f0dL,0x973587368c98dbd9L,
+ 0x6bc226ca9454a527L } },
+ /* 64 << 140 */
+ { { 0xa12b384ece53c2d0L,0x779d897d5e4606daL,0xa53e47b073ec12b0L,
+ 0x462dbbba5756f1adL },
+ { 0x69fe09f2cafe37b6L,0x273d1ebfecce2e17L,0x8ac1d5383cf607fdL,
+ 0x8035f7ff12e10c25L } },
+ /* 0 << 147 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 147 */
+ { { 0x854d34c77e6c5520L,0xc27df9efdcb9ea58L,0x405f2369d686666dL,
+ 0x29d1febf0417aa85L },
+ { 0x9846819e93470afeL,0x3e6a9669e2a27f9eL,0x24d008a2e31e6504L,
+ 0xdba7cecf9cb7680aL } },
+ /* 2 << 147 */
+ { { 0xecaff541338d6e43L,0x56f7dd734541d5ccL,0xb5d426de96bc88caL,
+ 0x48d94f6b9ed3a2c3L },
+ { 0x6354a3bb2ef8279cL,0xd575465b0b1867f2L,0xef99b0ff95225151L,
+ 0xf3e19d88f94500d8L } },
+ /* 3 << 147 */
+ { { 0x92a83268e32dd620L,0x913ec99f627849a2L,0xedd8fdfa2c378882L,
+ 0xaf96f33eee6f8cfeL },
+ { 0xc06737e5dc3fa8a5L,0x236bb531b0b03a1dL,0x33e59f2989f037b0L,
+ 0x13f9b5a7d9a12a53L } },
+ /* 4 << 147 */
+ { { 0x0d0df6ce51efb310L,0xcb5b2eb4958df5beL,0xd6459e2936158e59L,
+ 0x82aae2b91466e336L },
+ { 0xfb658a39411aa636L,0x7152ecc5d4c0a933L,0xf10c758a49f026b7L,
+ 0xf4837f97cb09311fL } },
+ /* 5 << 147 */
+ { { 0xddfb02c4c753c45fL,0x18ca81b6f9c840feL,0x846fd09ab0f8a3e6L,
+ 0xb1162adde7733dbcL },
+ { 0x7070ad20236e3ab6L,0xf88cdaf5b2a56326L,0x05fc8719997cbc7aL,
+ 0x442cd4524b665272L } },
+ /* 6 << 147 */
+ { { 0x7807f364b71698f5L,0x6ba418d29f7b605eL,0xfd20b00fa03b2cbbL,
+ 0x883eca37da54386fL },
+ { 0xff0be43ff3437f24L,0xe910b432a48bb33cL,0x4963a128329df765L,
+ 0xac1dd556be2fe6f7L } },
+ /* 7 << 147 */
+ { { 0x557610f924a0a3fcL,0x38e17bf4e881c3f9L,0x6ba84fafed0dac99L,
+ 0xd4a222c359eeb918L },
+ { 0xc79c1dbe13f542b6L,0x1fc65e0de425d457L,0xeffb754f1debb779L,
+ 0x638d8fd09e08af60L } },
+ /* 8 << 147 */
+ { { 0x994f523a626332d5L,0x7bc388335561bb44L,0x005ed4b03d845ea2L,
+ 0xd39d3ee1c2a1f08aL },
+ { 0x6561fdd3e7676b0dL,0x620e35fffb706017L,0x36ce424ff264f9a8L,
+ 0xc4c3419fda2681f7L } },
+ /* 9 << 147 */
+ { { 0xfb6afd2f69beb6e8L,0x3a50b9936d700d03L,0xc840b2ad0c83a14fL,
+ 0x573207be54085befL },
+ { 0x5af882e309fe7e5bL,0x957678a43b40a7e1L,0x172d4bdd543056e2L,
+ 0x9c1b26b40df13c0aL } },
+ /* 10 << 147 */
+ { { 0x1c30861cf405ff06L,0xebac86bd486e828bL,0xe791a971636933fcL,
+ 0x50e7c2be7aeee947L },
+ { 0xc3d4a095fa90d767L,0xae60eb7be670ab7bL,0x17633a64397b056dL,
+ 0x93a21f33105012aaL } },
+ /* 11 << 147 */
+ { { 0x663c370babb88643L,0x91df36d722e21599L,0x183ba8358b761671L,
+ 0x381eea1d728f3bf1L },
+ { 0xb9b2f1ba39966e6cL,0x7c464a28e7295492L,0x0fd5f70a09b26b7fL,
+ 0xa9aba1f9fbe009dfL } },
+ /* 12 << 147 */
+ { { 0x857c1f22369b87adL,0x3c00e5d932fca556L,0x1ad74cab90b06466L,
+ 0xa7112386550faaf2L },
+ { 0x7435e1986d9bd5f5L,0x2dcc7e3859c3463fL,0xdc7df748ca7bd4b2L,
+ 0x13cd4c089dec2f31L } },
+ /* 13 << 147 */
+ { { 0x0d3b5df8e3237710L,0x0dadb26ecbd2f7b0L,0x9f5966abe4aa082bL,
+ 0x666ec8de350e966eL },
+ { 0x1bfd1ed5ee524216L,0xcd93c59b41dab0b6L,0x658a8435d186d6baL,
+ 0x1b7d34d2159d1195L } },
+ /* 14 << 147 */
+ { { 0x5936e46022caf46bL,0x6a45dd8f9a96fe4fL,0xf7925434b98f474eL,
+ 0x414104120053ef15L },
+ { 0x71cf8d1241de97bfL,0xb8547b61bd80bef4L,0xb47d3970c4db0037L,
+ 0xf1bcd328fef20dffL } },
+ /* 15 << 147 */
+ { { 0x31a92e0910caad67L,0x1f5919605531a1e1L,0x3bb852e05f4fc840L,
+ 0x63e297ca93a72c6cL },
+ { 0x3c2b0b2e49abad67L,0x6ec405fced3db0d9L,0xdc14a5307fef1d40L,
+ 0xccd19846280896fcL } },
+ /* 16 << 147 */
+ { { 0x00f831769bb81648L,0xd69eb485653120d0L,0xd17d75f44ccabc62L,
+ 0x34a07f82b749fcb1L },
+ { 0x2c3af787bbfb5554L,0xb06ed4d062e283f8L,0x5722889fa19213a0L,
+ 0x162b085edcf3c7b4L } },
+ /* 17 << 147 */
+ { { 0xbcaecb31e0dd3ecaL,0xc6237fbce52f13a5L,0xcc2b6b0327bac297L,
+ 0x2ae1cac5b917f54aL },
+ { 0x474807d47845ae4fL,0xfec7dd92ce5972e0L,0xc3bd25411d7915bbL,
+ 0x66f85dc4d94907caL } },
+ /* 18 << 147 */
+ { { 0xd981b888bdbcf0caL,0xd75f5da6df279e9fL,0x128bbf247054e934L,
+ 0x3c6ff6e581db134bL },
+ { 0x795b7cf4047d26e4L,0xf370f7b85049ec37L,0xc6712d4dced945afL,
+ 0xdf30b5ec095642bcL } },
+ /* 19 << 147 */
+ { { 0x9b034c624896246eL,0x5652c016ee90bbd1L,0xeb38636f87fedb73L,
+ 0x5e32f8470135a613L },
+ { 0x0703b312cf933c83L,0xd05bb76e1a7f47e6L,0x825e4f0c949c2415L,
+ 0x569e56227250d6f8L } },
+ /* 20 << 147 */
+ { { 0xbbe9eb3a6568013eL,0x8dbd203f22f243fcL,0x9dbd7694b342734aL,
+ 0x8f6d12f846afa984L },
+ { 0xb98610a2c9eade29L,0xbab4f32347dd0f18L,0x5779737b671c0d46L,
+ 0x10b6a7c6d3e0a42aL } },
+ /* 21 << 147 */
+ { { 0xfb19ddf33035b41cL,0xd336343f99c45895L,0x61fe493854c857e5L,
+ 0xc4d506beae4e57d5L },
+ { 0x3cd8c8cbbbc33f75L,0x7281f08a9262c77dL,0x083f4ea6f11a2823L,
+ 0x8895041e9fba2e33L } },
+ /* 22 << 147 */
+ { { 0xfcdfea499c438edfL,0x7678dcc391edba44L,0xf07b3b87e2ba50f0L,
+ 0xc13888ef43948c1bL },
+ { 0xc2135ad41140af42L,0x8e5104f3926ed1a7L,0xf24430cb88f6695fL,
+ 0x0ce0637b6d73c120L } },
+ /* 23 << 147 */
+ { { 0xb2db01e6fe631e8fL,0x1c5563d7d7bdd24bL,0x8daea3ba369ad44fL,
+ 0x000c81b68187a9f9L },
+ { 0x5f48a951aae1fd9aL,0xe35626c78d5aed8aL,0x209527630498c622L,
+ 0x76d17634773aa504L } },
+ /* 24 << 147 */
+ { { 0x36d90ddaeb300f7aL,0x9dcf7dfcedb5e801L,0x645cb26874d5244cL,
+ 0xa127ee79348e3aa2L },
+ { 0x488acc53575f1dbbL,0x95037e8580e6161eL,0x57e59283292650d0L,
+ 0xabe67d9914938216L } },
+ /* 25 << 147 */
+ { { 0x3c7f944b3f8e1065L,0xed908cb6330e8924L,0x08ee8fd56f530136L,
+ 0x2227b7d5d7ffc169L },
+ { 0x4f55c893b5cd6dd5L,0x82225e11a62796e8L,0x5c6cead1cb18e12cL,
+ 0x4381ae0c84f5a51aL } },
+ /* 26 << 147 */
+ { { 0x345913d37fafa4c8L,0x3d9180820491aac0L,0x9347871f3e69264cL,
+ 0xbea9dd3cb4f4f0cdL },
+ { 0xbda5d0673eadd3e7L,0x0033c1b80573bcd8L,0x255893795da2486cL,
+ 0xcb89ee5b86abbee7L } },
+ /* 27 << 147 */
+ { { 0x8fe0a8f322532e5dL,0xb6410ff0727dfc4cL,0x619b9d58226726dbL,
+ 0x5ec256697a2b2dc7L },
+ { 0xaf4d2e064c3beb01L,0x852123d07acea556L,0x0e9470faf783487aL,
+ 0x75a7ea045664b3ebL } },
+ /* 28 << 147 */
+ { { 0x4ad78f356798e4baL,0x9214e6e5c7d0e091L,0xc420b488b1290403L,
+ 0x64049e0afc295749L },
+ { 0x03ef5af13ae9841fL,0xdbe4ca19b0b662a6L,0x46845c5ffa453458L,
+ 0xf8dabf1910b66722L } },
+ /* 29 << 147 */
+ { { 0xb650f0aacce2793bL,0x71db851ec5ec47c1L,0x3eb78f3e3b234fa9L,
+ 0xb0c60f35fc0106ceL },
+ { 0x05427121774eadbdL,0x25367fafce323863L,0x7541b5c9cd086976L,
+ 0x4ff069e2dc507ad1L } },
+ /* 30 << 147 */
+ { { 0x741452568776e667L,0x6e76142cb23c6bb5L,0xdbf307121b3a8a87L,
+ 0x60e7363e98450836L },
+ { 0x5741450eb7366d80L,0xe4ee14ca4837dbdfL,0xa765eb9b69d4316fL,
+ 0x04548dca8ef43825L } },
+ /* 31 << 147 */
+ { { 0x9c9f4e4c5ae888ebL,0x733abb5156e9ac99L,0xdaad3c20ba6ac029L,
+ 0x9b8dd3d32ba3e38eL },
+ { 0xa9bb4c920bc5d11aL,0xf20127a79c5f88a3L,0x4f52b06e161d3cb8L,
+ 0x26c1ff096afaf0a6L } },
+ /* 32 << 147 */
+ { { 0x32670d2f7189e71fL,0xc64387485ecf91e7L,0x15758e57db757a21L,
+ 0x427d09f8290a9ce5L },
+ { 0x846a308f38384a7aL,0xaac3acb4b0732b99L,0x9e94100917845819L,
+ 0x95cba111a7ce5e03L } },
+ /* 33 << 147 */
+ { { 0x6f3d4f7fb00009c4L,0xb8396c278ff28b5fL,0xb1a9ae431c97975dL,
+ 0x9d7ba8afe5d9fed5L },
+ { 0x338cf09f34f485b6L,0xbc0ddacc64122516L,0xa450da1205d471feL,
+ 0x4c3a6250628dd8c9L } },
+ /* 34 << 147 */
+ { { 0x69c7d103d1295837L,0xa2893e503807eb2fL,0xd6e1e1debdb41491L,
+ 0xc630745b5e138235L },
+ { 0xc892109e48661ae1L,0x8d17e7ebea2b2674L,0x00ec0f87c328d6b5L,
+ 0x6d858645f079ff9eL } },
+ /* 35 << 147 */
+ { { 0x6cdf243e19115eadL,0x1ce1393e4bac4fcfL,0x2c960ed09c29f25bL,
+ 0x59be4d8e9d388a05L },
+ { 0x0d46e06cd0def72bL,0xb923db5de0342748L,0xf7d3aacd936d4a3dL,
+ 0x558519cc0b0b099eL } },
+ /* 36 << 147 */
+ { { 0x3ea8ebf8827097efL,0x259353dbd054f55dL,0x84c89abc6d2ed089L,
+ 0x5c548b698e096a7cL },
+ { 0xd587f616994b995dL,0x4d1531f6a5845601L,0x792ab31e451fd9f0L,
+ 0xc8b57bb265adf6caL } },
+ /* 37 << 147 */
+ { { 0x68440fcb1cd5ad73L,0xb9c860e66144da4fL,0x2ab286aa8462beb8L,
+ 0xcc6b8fffef46797fL },
+ { 0xac820da420c8a471L,0x69ae05a177ff7fafL,0xb9163f39bfb5da77L,
+ 0xbd03e5902c73ab7aL } },
+ /* 38 << 147 */
+ { { 0x7e862b5eb2940d9eL,0x3c663d864b9af564L,0xd8309031bde3033dL,
+ 0x298231b2d42c5bc6L },
+ { 0x42090d2c552ad093L,0xa4799d1cff854695L,0x0a88b5d6d31f0d00L,
+ 0xf8b40825a2f26b46L } },
+ /* 39 << 147 */
+ { { 0xec29b1edf1bd7218L,0xd491c53b4b24c86eL,0xd2fe588f3395ea65L,
+ 0x6f3764f74456ef15L },
+ { 0xdb43116dcdc34800L,0xcdbcd456c1e33955L,0xefdb554074ab286bL,
+ 0x948c7a51d18c5d7cL } },
+ /* 40 << 147 */
+ { { 0xeb81aa377378058eL,0x41c746a104411154L,0xa10c73bcfb828ac7L,
+ 0x6439be919d972b29L },
+ { 0x4bf3b4b043a2fbadL,0x39e6dadf82b5e840L,0x4f7164086397bd4cL,
+ 0x0f7de5687f1eeccbL } },
+ /* 41 << 147 */
+ { { 0x5865c5a1d2ffbfc1L,0xf74211fa4ccb6451L,0x66368a88c0b32558L,
+ 0x5b539dc29ad7812eL },
+ { 0x579483d02f3af6f6L,0x5213207899934eceL,0x50b9650fdcc9e983L,
+ 0xca989ec9aee42b8aL } },
+ /* 42 << 147 */
+ { { 0x6a44c829d6f62f99L,0x8f06a3094c2a7c0cL,0x4ea2b3a098a0cb0aL,
+ 0x5c547b70beee8364L },
+ { 0x461d40e1682afe11L,0x9e0fc77a7b41c0a8L,0x79e4aefde20d5d36L,
+ 0x2916e52032dd9f63L } },
+ /* 43 << 147 */
+ { { 0xf59e52e83f883fafL,0x396f96392b868d35L,0xc902a9df4ca19881L,
+ 0x0fc96822db2401a6L },
+ { 0x4123758766f1c68dL,0x10fc6de3fb476c0dL,0xf8b6b579841f5d90L,
+ 0x2ba8446cfa24f44aL } },
+ /* 44 << 147 */
+ { { 0xa237b920ef4a9975L,0x60bb60042330435fL,0xd6f4ab5acfb7e7b5L,
+ 0xb2ac509783435391L },
+ { 0xf036ee2fb0d1ea67L,0xae779a6a74c56230L,0x59bff8c8ab838ae6L,
+ 0xcd83ca999b38e6f0L } },
+ /* 45 << 147 */
+ { { 0xbb27bef5e33deed3L,0xe6356f6f001892a8L,0xbf3be6cc7adfbd3eL,
+ 0xaecbc81c33d1ac9dL },
+ { 0xe4feb909e6e861dcL,0x90a247a453f5f801L,0x01c50acb27346e57L,
+ 0xce29242e461acc1bL } },
+ /* 46 << 147 */
+ { { 0x04dd214a2f998a91L,0x271ee9b1d4baf27bL,0x7e3027d1e8c26722L,
+ 0x21d1645c1820dce5L },
+ { 0x086f242c7501779cL,0xf0061407fa0e8009L,0xf23ce47760187129L,
+ 0x05bbdedb0fde9bd0L } },
+ /* 47 << 147 */
+ { { 0x682f483225d98473L,0xf207fe855c658427L,0xb6fdd7ba4166ffa1L,
+ 0x0c3140569eed799dL },
+ { 0x0db8048f4107e28fL,0x74ed387141216840L,0x74489f8f56a3c06eL,
+ 0x1e1c005b12777134L } },
+ /* 48 << 147 */
+ { { 0xdb332a73f37ec3c3L,0xc65259bddd59eba0L,0x2291709cdb4d3257L,
+ 0x9a793b25bd389390L },
+ { 0xf39fe34be43756f0L,0x2f76bdce9afb56c9L,0x9f37867a61208b27L,
+ 0xea1d4307089972c3L } },
+ /* 49 << 147 */
+ { { 0x8c5953308bdf623aL,0x5f5accda8441fb7dL,0xfafa941832ddfd95L,
+ 0x6ad40c5a0fde9be7L },
+ { 0x43faba89aeca8709L,0xc64a7cf12c248a9dL,0x1662025272637a76L,
+ 0xaee1c79122b8d1bbL } },
+ /* 50 << 147 */
+ { { 0xf0f798fd21a843b2L,0x56e4ed4d8d005cb1L,0x355f77801f0d8abeL,
+ 0x197b04cf34522326L },
+ { 0x41f9b31ffd42c13fL,0x5ef7feb2b40f933dL,0x27326f425d60bad4L,
+ 0x027ecdb28c92cf89L } },
+ /* 51 << 147 */
+ { { 0x04aae4d14e3352feL,0x08414d2f73591b90L,0x5ed6124eb7da7d60L,
+ 0xb985b9314d13d4ecL },
+ { 0xa592d3ab96bf36f9L,0x012dbed5bbdf51dfL,0xa57963c0df6c177dL,
+ 0x010ec86987ca29cfL } },
+ /* 52 << 147 */
+ { { 0xba1700f6bf926dffL,0x7c9fdbd1f4bf6bc2L,0xdc18dc8f64da11f5L,
+ 0xa6074b7ad938ae75L },
+ { 0x14270066e84f44a4L,0x99998d38d27b954eL,0xc1be8ab2b4f38e9aL,
+ 0x8bb55bbf15c01016L } },
+ /* 53 << 147 */
+ { { 0xf73472b40ea2ab30L,0xd365a340f73d68ddL,0xc01a716819c2e1ebL,
+ 0x32f49e3734061719L },
+ { 0xb73c57f101d8b4d6L,0x03c8423c26b47700L,0x321d0bc8a4d8826aL,
+ 0x6004213c4bc0e638L } },
+ /* 54 << 147 */
+ { { 0xf78c64a1c1c06681L,0x16e0a16fef018e50L,0x31cbdf91db42b2b3L,
+ 0xf8f4ffcee0d36f58L },
+ { 0xcdcc71cd4cc5e3e0L,0xd55c7cfaa129e3e0L,0xccdb6ba00fb2cbf1L,
+ 0x6aba0005c4bce3cbL } },
+ /* 55 << 147 */
+ { { 0x501cdb30d232cfc4L,0x9ddcf12ed58a3cefL,0x02d2cf9c87e09149L,
+ 0xdc5d7ec72c976257L },
+ { 0x6447986e0b50d7ddL,0x88fdbaf7807f112aL,0x58c9822ab00ae9f6L,
+ 0x6abfb9506d3d27e0L } },
+ /* 56 << 147 */
+ { { 0xd0a744878a429f4fL,0x0649712bdb516609L,0xb826ba57e769b5dfL,
+ 0x82335df21fc7aaf2L },
+ { 0x2389f0675c93d995L,0x59ac367a68677be6L,0xa77985ff21d9951bL,
+ 0x038956fb85011cceL } },
+ /* 57 << 147 */
+ { { 0x608e48cbbb734e37L,0xc08c0bf22be5b26fL,0x17bbdd3bf9b1a0d9L,
+ 0xeac7d89810483319L },
+ { 0xc95c4bafbc1a6deaL,0xfdd0e2bf172aafdbL,0x40373cbc8235c41aL,
+ 0x14303f21fb6f41d5L } },
+ /* 58 << 147 */
+ { { 0xba0636210408f237L,0xcad3b09aecd2d1edL,0x4667855a52abb6a2L,
+ 0xba9157dcaa8b417bL },
+ { 0xfe7f35074f013efbL,0x1b112c4baa38c4a2L,0xa1406a609ba64345L,
+ 0xe53cba336993c80bL } },
+ /* 59 << 147 */
+ { { 0x45466063ded40d23L,0x3d5f1f4d54908e25L,0x9ebefe62403c3c31L,
+ 0x274ea0b50672a624L },
+ { 0xff818d99451d1b71L,0x80e826438f79cf79L,0xa165df1373ce37f5L,
+ 0xa744ef4ffe3a21fdL } },
+ /* 60 << 147 */
+ { { 0x73f1e7f5cf551396L,0xc616898e868c676bL,0x671c28c78c442c36L,
+ 0xcfe5e5585e0a317dL },
+ { 0x1242d8187051f476L,0x56fad2a614f03442L,0x262068bc0a44d0f6L,
+ 0xdfa2cd6ece6edf4eL } },
+ /* 61 << 147 */
+ { { 0x0f43813ad15d1517L,0x61214cb2377d44f5L,0xd399aa29c639b35fL,
+ 0x42136d7154c51c19L },
+ { 0x9774711b08417221L,0x0a5546b352545a57L,0x80624c411150582dL,
+ 0x9ec5c418fbc555bcL } },
+ /* 62 << 147 */
+ { { 0x2c87dcad771849f1L,0xb0c932c501d7bf6fL,0x6aa5cd3e89116eb2L,
+ 0xd378c25a51ca7bd3L },
+ { 0xc612a0da9e6e3e31L,0x0417a54db68ad5d0L,0x00451e4a22c6edb8L,
+ 0x9fbfe019b42827ceL } },
+ /* 63 << 147 */
+ { { 0x2fa92505ba9384a2L,0x21b8596e64ad69c1L,0x8f4fcc49983b35a6L,
+ 0xde09376072754672L },
+ { 0x2f14ccc8f7bffe6dL,0x27566bff5d94263dL,0xb5b4e9c62df3ec30L,
+ 0x94f1d7d53e6ea6baL } },
+ /* 64 << 147 */
+ { { 0x97b7851aaaca5e9bL,0x518aa52156713b97L,0x3357e8c7150a61f6L,
+ 0x7842e7e2ec2c2b69L },
+ { 0x8dffaf656868a548L,0xd963bd82e068fc81L,0x64da5c8b65917733L,
+ 0x927090ff7b247328L } },
+ /* 0 << 154 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 154 */
+ { { 0x214bc9a7d298c241L,0xe3b697ba56807cfdL,0xef1c78024564eadbL,
+ 0xdde8cdcfb48149c5L },
+ { 0x946bf0a75a4d2604L,0x27154d7f6c1538afL,0x95cc9230de5b1fccL,
+ 0xd88519e966864f82L } },
+ /* 2 << 154 */
+ { { 0xb828dd1a7cb1282cL,0xa08d7626be46973aL,0x6baf8d40e708d6b2L,
+ 0x72571fa14daeb3f3L },
+ { 0x85b1732ff22dfd98L,0x87ab01a70087108dL,0xaaaafea85988207aL,
+ 0xccc832f869f00755L } },
+ /* 3 << 154 */
+ { { 0x964d950e36ff3bf0L,0x8ad20f6ff0b34638L,0x4d9177b3b5d7585fL,
+ 0xcf839760ef3f019fL },
+ { 0x582fc5b38288c545L,0x2f8e4e9b13116bd1L,0xf91e1b2f332120efL,
+ 0xcf5687242a17dd23L } },
+ /* 4 << 154 */
+ { { 0x488f1185ca8d9d1aL,0xadf2c77dd987ded2L,0x5f3039f060c46124L,
+ 0xe5d70b7571e095f4L },
+ { 0x82d586506260e70fL,0x39d75ea7f750d105L,0x8cf3d0b175bac364L,
+ 0xf3a7564d21d01329L } },
+ /* 5 << 154 */
+ { { 0x182f04cd2f52d2a7L,0x4fde149ae2df565aL,0xb80c5eeca79fb2f7L,
+ 0xab491d7b22ddc897L },
+ { 0x99d76c18c6312c7fL,0xca0d5f3d6aa41a57L,0x71207325d15363a0L,
+ 0xe82aa265beb252c2L } },
+ /* 6 << 154 */
+ { { 0x94ab4700ec3128c2L,0x6c76d8628e383f49L,0xdc36b150c03024ebL,
+ 0xfb43947753daac69L },
+ { 0xfc68764a8dc79623L,0x5b86995db440fbb2L,0xd66879bfccc5ee0dL,
+ 0x0522894295aa8bd3L } },
+ /* 7 << 154 */
+ { { 0xb51a40a51e6a75c1L,0x24327c760ea7d817L,0x0663018207774597L,
+ 0xd6fdbec397fa7164L },
+ { 0x20c99dfb13c90f48L,0xd6ac5273686ef263L,0xc6a50bdcfef64eebL,
+ 0xcd87b28186fdfc32L } },
+ /* 8 << 154 */
+ { { 0xb24aa43e3fcd3efcL,0xdd26c034b8088e9aL,0xa5ef4dc9bd3d46eaL,
+ 0xa2f99d588a4c6a6fL },
+ { 0xddabd3552f1da46cL,0x72c3f8ce1afacdd1L,0xd90c4eee92d40578L,
+ 0xd28bb41fca623b94L } },
+ /* 9 << 154 */
+ { { 0x50fc0711745edc11L,0x9dd9ad7d3dc87558L,0xce6931fbb49d1e64L,
+ 0x6c77a0a2c98bd0f9L },
+ { 0x62b9a6296baf7cb1L,0xcf065f91ccf72d22L,0x7203cce979639071L,
+ 0x09ae4885f9cb732fL } },
+ /* 10 << 154 */
+ { { 0x5e7c3becee8314f3L,0x1c068aeddbea298fL,0x08d381f17c80acecL,
+ 0x03b56be8e330495bL },
+ { 0xaeffb8f29222882dL,0x95ff38f6c4af8bf7L,0x50e32d351fc57d8cL,
+ 0x6635be5217b444f0L } },
+ /* 11 << 154 */
+ { { 0x04d15276a5177900L,0x4e1dbb47f6858752L,0x5b475622c615796cL,
+ 0xa6fa0387691867bfL },
+ { 0xed7f5d562844c6d0L,0xc633cf9b03a2477dL,0xf6be5c402d3721d6L,
+ 0xaf312eb7e9fd68e6L } },
+ /* 12 << 154 */
+ { { 0x242792d2e7417ce1L,0xff42bc71970ee7f5L,0x1ff4dc6d5c67a41eL,
+ 0x77709b7b20882a58L },
+ { 0x3554731dbe217f2cL,0x2af2a8cd5bb72177L,0x58eee769591dd059L,
+ 0xbb2930c94bba6477L } },
+ /* 13 << 154 */
+ { { 0x863ee0477d930cfcL,0x4c262ad1396fd1f4L,0xf4765bc8039af7e1L,
+ 0x2519834b5ba104f6L },
+ { 0x7cd61b4cd105f961L,0xa5415da5d63bca54L,0x778280a088a1f17cL,
+ 0xc49689492329512cL } },
+ /* 14 << 154 */
+ { { 0x174a9126cecdaa7aL,0xfc8c7e0e0b13247bL,0x29c110d23484c1c4L,
+ 0xf8eb8757831dfc3bL },
+ { 0x022f0212c0067452L,0x3f6f69ee7b9b926cL,0x09032da0ef42daf4L,
+ 0x79f00ade83f80de4L } },
+ /* 15 << 154 */
+ { { 0x6210db7181236c97L,0x74f7685b3ee0781fL,0x4df7da7ba3e41372L,
+ 0x2aae38b1b1a1553eL },
+ { 0x1688e222f6dd9d1bL,0x576954485b8b6487L,0x478d21274b2edeaaL,
+ 0xb2818fa51e85956aL } },
+ /* 16 << 154 */
+ { { 0x1e6adddaf176f2c0L,0x01ca4604e2572658L,0x0a404ded85342ffbL,
+ 0x8cf60f96441838d6L },
+ { 0x9bbc691cc9071c4aL,0xfd58874434442803L,0x97101c85809c0d81L,
+ 0xa7fb754c8c456f7fL } },
+ /* 17 << 154 */
+ { { 0xc95f3c5cd51805e1L,0xab4ccd39b299dca8L,0x3e03d20b47eaf500L,
+ 0xfa3165c1d7b80893L },
+ { 0x005e8b54e160e552L,0xdc4972ba9019d11fL,0x21a6972e0c9a4a7aL,
+ 0xa52c258f37840fd7L } },
+ /* 18 << 154 */
+ { { 0xf8559ff4c1e99d81L,0x08e1a7d6a3c617c0L,0xb398fd43248c6ba7L,
+ 0x6ffedd91d1283794L },
+ { 0x8a6a59d2d629d208L,0xa9d141d53490530eL,0x42f6fc1838505989L,
+ 0x09bf250d479d94eeL } },
+ /* 19 << 154 */
+ { { 0x223ad3b1b3822790L,0x6c5926c093b8971cL,0x609efc7e75f7fa62L,
+ 0x45d66a6d1ec2d989L },
+ { 0x4422d663987d2792L,0x4a73caad3eb31d2bL,0xf06c2ac1a32cb9e6L,
+ 0xd9445c5f91aeba84L } },
+ /* 20 << 154 */
+ { { 0x6af7a1d5af71013fL,0xe68216e50bedc946L,0xf4cba30bd27370a0L,
+ 0x7981afbf870421ccL },
+ { 0x02496a679449f0e1L,0x86cfc4be0a47edaeL,0x3073c936b1feca22L,
+ 0xf569461203f8f8fbL } },
+ /* 21 << 154 */
+ { { 0xd063b723901515eaL,0x4c6c77a5749cf038L,0x6361e360ab9e5059L,
+ 0x596cf171a76a37c0L },
+ { 0x800f53fa6530ae7aL,0x0f5e631e0792a7a6L,0x5cc29c24efdb81c9L,
+ 0xa269e8683f9c40baL } },
+ /* 22 << 154 */
+ { { 0xec14f9e12cb7191eL,0x78ea1bd8e5b08ea6L,0x3c65aa9b46332bb9L,
+ 0x84cc22b3bf80ce25L },
+ { 0x0098e9e9d49d5bf1L,0xcd4ec1c619087da4L,0x3c9d07c5aef6e357L,
+ 0x839a02689f8f64b8L } },
+ /* 23 << 154 */
+ { { 0xc5e9eb62c6d8607fL,0x759689f56aa995e4L,0x70464669bbb48317L,
+ 0x921474bfe402417dL },
+ { 0xcabe135b2a354c8cL,0xd51e52d2812fa4b5L,0xec74109653311fe8L,
+ 0x4f774535b864514bL } },
+ /* 24 << 154 */
+ { { 0xbcadd6715bde48f8L,0xc97038732189bc7dL,0x5d45299ec709ee8aL,
+ 0xd1287ee2845aaff8L },
+ { 0x7d1f8874db1dbf1fL,0xea46588b990c88d6L,0x60ba649a84368313L,
+ 0xd5fdcbce60d543aeL } },
+ /* 25 << 154 */
+ { { 0x90b46d43810d5ab0L,0x6739d8f904d7e5ccL,0x021c1a580d337c33L,
+ 0x00a6116268e67c40L },
+ { 0x95ef413b379f0a1fL,0xfe126605e9e2ab95L,0x67578b852f5f199cL,
+ 0xf5c003292cb84913L } },
+ /* 26 << 154 */
+ { { 0xf795643037577dd8L,0x83b82af429c5fe88L,0x9c1bea26cdbdc132L,
+ 0x589fa0869c04339eL },
+ { 0x033e9538b13799dfL,0x85fa8b21d295d034L,0xdf17f73fbd9ddccaL,
+ 0xf32bd122ddb66334L } },
+ /* 27 << 154 */
+ { { 0x55ef88a7858b044cL,0x1f0d69c25aa9e397L,0x55fd9cc340d85559L,
+ 0xc774df727785ddb2L },
+ { 0x5dcce9f6d3bd2e1cL,0xeb30da20a85dfed0L,0x5ed7f5bbd3ed09c4L,
+ 0x7d42a35c82a9c1bdL } },
+ /* 28 << 154 */
+ { { 0xcf3de9959890272dL,0x75f3432a3e713a10L,0x5e13479fe28227b8L,
+ 0xb8561ea9fefacdc8L },
+ { 0xa6a297a08332aafdL,0x9b0d8bb573809b62L,0xd2fa1cfd0c63036fL,
+ 0x7a16eb55bd64bda8L } },
+ /* 29 << 154 */
+ { { 0x3f5cf5f678e62ddcL,0x2267c45407fd752bL,0x5e361b6b5e437bbeL,
+ 0x95c595018354e075L },
+ { 0xec725f85f2b254d9L,0x844b617d2cb52b4eL,0xed8554f5cf425fb5L,
+ 0xab67703e2af9f312L } },
+ /* 30 << 154 */
+ { { 0x4cc34ec13cf48283L,0xb09daa259c8a705eL,0xd1e9d0d05b7d4f84L,
+ 0x4df6ef64db38929dL },
+ { 0xe16b0763aa21ba46L,0xc6b1d178a293f8fbL,0x0ff5b602d520aabfL,
+ 0x94d671bdc339397aL } },
+ /* 31 << 154 */
+ { { 0x7c7d98cf4f5792faL,0x7c5e0d6711215261L,0x9b19a631a7c5a6d4L,
+ 0xc8511a627a45274dL },
+ { 0x0c16621ca5a60d99L,0xf7fbab88cf5e48cbL,0xab1e6ca2f7ddee08L,
+ 0x83bd08cee7867f3cL } },
+ /* 32 << 154 */
+ { { 0xf7e48e8a2ac13e27L,0x4494f6df4eb1a9f5L,0xedbf84eb981f0a62L,
+ 0x49badc32536438f0L },
+ { 0x50bea541004f7571L,0xbac67d10df1c94eeL,0x253d73a1b727bc31L,
+ 0xb3d01cf230686e28L } },
+ /* 33 << 154 */
+ { { 0x51b77b1b55fd0b8bL,0xa099d183feec3173L,0x202b1fb7670e72b7L,
+ 0xadc88b33a8e1635fL },
+ { 0x34e8216af989d905L,0xc2e68d2029b58d01L,0x11f81c926fe55a93L,
+ 0x15f1462a8f296f40L } },
+ /* 34 << 154 */
+ { { 0x1915d375ea3d62f2L,0xa17765a301c8977dL,0x7559710ae47b26f6L,
+ 0xe0bd29c8535077a5L },
+ { 0x615f976d08d84858L,0x370dfe8569ced5c1L,0xbbc7503ca734fa56L,
+ 0xfbb9f1ec91ac4574L } },
+ /* 35 << 154 */
+ { { 0x95d7ec53060dd7efL,0xeef2dacd6e657979L,0x54511af3e2a08235L,
+ 0x1e324aa41f4aea3dL },
+ { 0x550e7e71e6e67671L,0xbccd5190bf52faf7L,0xf880d316223cc62aL,
+ 0x0d402c7e2b32eb5dL } },
+ /* 36 << 154 */
+ { { 0xa40bc039306a5a3bL,0x4e0a41fd96783a1bL,0xa1e8d39a0253cdd4L,
+ 0x6480be26c7388638L },
+ { 0xee365e1d2285f382L,0x188d8d8fec0b5c36L,0x34ef1a481f0f4d82L,
+ 0x1a8f43e1a487d29aL } },
+ /* 37 << 154 */
+ { { 0x8168226d77aefb3aL,0xf69a751e1e72c253L,0x8e04359ae9594df1L,
+ 0x475ffd7dd14c0467L },
+ { 0xb5a2c2b13844e95cL,0x85caf647dd12ef94L,0x1ecd2a9ff1063d00L,
+ 0x1dd2e22923843311L } },
+ /* 38 << 154 */
+ { { 0x38f0e09d73d17244L,0x3ede77468fc653f1L,0xae4459f5dc20e21cL,
+ 0x00db2ffa6a8599eaL },
+ { 0x11682c3930cfd905L,0x4934d074a5c112a6L,0xbdf063c5568bfe95L,
+ 0x779a440a016c441aL } },
+ /* 39 << 154 */
+ { { 0x0c23f21897d6fbdcL,0xd3a5cd87e0776aacL,0xcee37f72d712e8dbL,
+ 0xfb28c70d26f74e8dL },
+ { 0xffe0c728b61301a0L,0xa6282168d3724354L,0x7ff4cb00768ffedcL,
+ 0xc51b308803b02de9L } },
+ /* 40 << 154 */
+ { { 0xa5a8147c3902dda5L,0x35d2f706fe6973b4L,0x5ac2efcfc257457eL,
+ 0x933f48d48700611bL },
+ { 0xc365af884912beb2L,0x7f5a4de6162edf94L,0xc646ba7c0c32f34bL,
+ 0x632c6af3b2091074L } },
+ /* 41 << 154 */
+ { { 0x58d4f2e3753e43a9L,0x70e1d21724d4e23fL,0xb24bf729afede6a6L,
+ 0x7f4a94d8710c8b60L },
+ { 0xaad90a968d4faa6aL,0xd9ed0b32b066b690L,0x52fcd37b78b6dbfdL,
+ 0x0b64615e8bd2b431L } },
+ /* 42 << 154 */
+ { { 0x228e2048cfb9fad5L,0xbeaa386d240b76bdL,0x2d6681c890dad7bcL,
+ 0x3e553fc306d38f5eL },
+ { 0xf27cdb9b9d5f9750L,0x3e85c52ad28c5b0eL,0x190795af5247c39bL,
+ 0x547831ebbddd6828L } },
+ /* 43 << 154 */
+ { { 0xf327a2274a82f424L,0x36919c787e47f89dL,0xe478391943c7392cL,
+ 0xf101b9aa2316fefeL },
+ { 0xbcdc9e9c1c5009d2L,0xfb55ea139cd18345L,0xf5b5e231a3ce77c7L,
+ 0xde6b4527d2f2cb3dL } },
+ /* 44 << 154 */
+ { { 0x10f6a3339bb26f5fL,0x1e85db8e044d85b6L,0xc3697a0894197e54L,
+ 0x65e18cc0a7cb4ea8L },
+ { 0xa38c4f50a471fe6eL,0xf031747a2f13439cL,0x53c4a6bac007318bL,
+ 0xa8da3ee51deccb3dL } },
+ /* 45 << 154 */
+ { { 0x0555b31c558216b1L,0x90c7810c2f79e6c2L,0x9b669f4dfe8eed3cL,
+ 0x70398ec8e0fac126L },
+ { 0xa96a449ef701b235L,0x0ceecdb3eb94f395L,0x285fc368d0cb7431L,
+ 0x0d37bb5216a18c64L } },
+ /* 46 << 154 */
+ { { 0x05110d38b880d2ddL,0xa60f177b65930d57L,0x7da34a67f36235f5L,
+ 0x47f5e17c183816b9L },
+ { 0xc7664b57db394af4L,0x39ba215d7036f789L,0x46d2ca0e2f27b472L,
+ 0xc42647eef73a84b7L } },
+ /* 47 << 154 */
+ { { 0x44bc754564488f1dL,0xaa922708f4cf85d5L,0x721a01d553e4df63L,
+ 0x649c0c515db46cedL },
+ { 0x6bf0d64e3cffcb6cL,0xe3bf93fe50f71d96L,0x75044558bcc194a0L,
+ 0x16ae33726afdc554L } },
+ /* 48 << 154 */
+ { { 0xbfc01adf5ca48f3fL,0x64352f06e22a9b84L,0xcee54da1c1099e4aL,
+ 0xbbda54e8fa1b89c0L },
+ { 0x166a3df56f6e55fbL,0x1ca44a2420176f88L,0x936afd88dfb7b5ffL,
+ 0xe34c24378611d4a0L } },
+ /* 49 << 154 */
+ { { 0x7effbb7586142103L,0x6704ba1b1f34fc4dL,0x7c2a468f10c1b122L,
+ 0x36b3a6108c6aace9L },
+ { 0xabfcc0a775a0d050L,0x066f91973ce33e32L,0xce905ef429fe09beL,
+ 0x89ee25baa8376351L } },
+ /* 50 << 154 */
+ { { 0x2a3ede22fd29dc76L,0x7fd32ed936f17260L,0x0cadcf68284b4126L,
+ 0x63422f08a7951fc8L },
+ { 0x562b24f40807e199L,0xfe9ce5d122ad4490L,0xc2f51b100db2b1b4L,
+ 0xeb3613ffe4541d0dL } },
+ /* 51 << 154 */
+ { { 0xbd2c4a052680813bL,0x527aa55d561b08d6L,0xa9f8a40ea7205558L,
+ 0xe3eea56f243d0becL },
+ { 0x7b853817a0ff58b3L,0xb67d3f651a69e627L,0x0b76bbb9a869b5d6L,
+ 0xa3afeb82546723edL } },
+ /* 52 << 154 */
+ { { 0x5f24416d3e554892L,0x8413b53d430e2a45L,0x99c56aee9032a2a0L,
+ 0x09432bf6eec367b1L },
+ { 0x552850c6daf0ecc1L,0x49ebce555bc92048L,0xdfb66ba654811307L,
+ 0x1b84f7976f298597L } },
+ /* 53 << 154 */
+ { { 0x795904818d1d7a0dL,0xd9fabe033a6fa556L,0xa40f9c59ba9e5d35L,
+ 0xcb1771c1f6247577L },
+ { 0x542a47cae9a6312bL,0xa34b3560552dd8c5L,0xfdf94de00d794716L,
+ 0xd46124a99c623094L } },
+ /* 54 << 154 */
+ { { 0x56b7435d68afe8b4L,0x27f205406c0d8ea1L,0x12b77e1473186898L,
+ 0xdbc3dd467479490fL },
+ { 0x951a9842c03b0c05L,0x8b1b3bb37921bc96L,0xa573b3462b202e0aL,
+ 0x77e4665d47254d56L } },
+ /* 55 << 154 */
+ { { 0x08b70dfcd23e3984L,0xab86e8bcebd14236L,0xaa3e07f857114ba7L,
+ 0x5ac71689ab0ef4f2L },
+ { 0x88fca3840139d9afL,0x72733f8876644af0L,0xf122f72a65d74f4aL,
+ 0x13931577a5626c7aL } },
+ /* 56 << 154 */
+ { { 0xd5b5d9eb70f8d5a4L,0x375adde7d7bbb228L,0x31e88b860c1c0b32L,
+ 0xd1f568c4173edbaaL },
+ { 0x1592fc835459df02L,0x2beac0fb0fcd9a7eL,0xb0a6fdb81b473b0aL,
+ 0xe3224c6f0fe8fc48L } },
+ /* 57 << 154 */
+ { { 0x680bd00ee87edf5bL,0x30385f0220e77cf5L,0xe9ab98c04d42d1b2L,
+ 0x72d191d2d3816d77L },
+ { 0x1564daca0917d9e5L,0x394eab591f8fed7fL,0xa209aa8d7fbb3896L,
+ 0x5564f3b9be6ac98eL } },
+ /* 58 << 154 */
+ { { 0xead21d05d73654efL,0x68d1a9c413d78d74L,0x61e017086d4973a0L,
+ 0x83da350046e6d32aL },
+ { 0x6a3dfca468ae0118L,0xa1b9a4c9d02da069L,0x0b2ff9c7ebab8302L,
+ 0x98af07c3944ba436L } },
+ /* 59 << 154 */
+ { { 0x85997326995f0f9fL,0x467fade071b58bc6L,0x47e4495abd625a2bL,
+ 0xfdd2d01d33c3b8cdL },
+ { 0x2c38ae28c693f9faL,0x48622329348f7999L,0x97bf738e2161f583L,
+ 0x15ee2fa7565e8cc9L } },
+ /* 60 << 154 */
+ { { 0xa1a5c8455777e189L,0xcc10bee0456f2829L,0x8ad95c56da762bd5L,
+ 0x152e2214e9d91da8L },
+ { 0x975b0e727cb23c74L,0xfd5d7670a90c66dfL,0xb5b5b8ad225ffc53L,
+ 0xab6dff73faded2aeL } },
+ /* 61 << 154 */
+ { { 0xebd567816f4cbe9dL,0x0ed8b2496a574bd7L,0x41c246fe81a881faL,
+ 0x91564805c3db9c70L },
+ { 0xd7c12b085b862809L,0x1facd1f155858d7bL,0x7693747caf09e92aL,
+ 0x3b69dcba189a425fL } },
+ /* 62 << 154 */
+ { { 0x0be28e9f967365efL,0x57300eb2e801f5c9L,0x93b8ac6ad583352fL,
+ 0xa2cf1f89cd05b2b7L },
+ { 0x7c0c9b744dcc40ccL,0xfee38c45ada523fbL,0xb49a4dec1099cc4dL,
+ 0x325c377f69f069c6L } },
+ /* 63 << 154 */
+ { { 0xe12458ce476cc9ffL,0x580e0b6cc6d4cb63L,0xd561c8b79072289bL,
+ 0x0377f264a619e6daL },
+ { 0x2668536288e591a5L,0xa453a7bd7523ca2bL,0x8a9536d2c1df4533L,
+ 0xc8e50f2fbe972f79L } },
+ /* 64 << 154 */
+ { { 0xd433e50f6d3549cfL,0x6f33696ffacd665eL,0x695bfdacce11fcb4L,
+ 0x810ee252af7c9860L },
+ { 0x65450fe17159bb2cL,0xf7dfbebe758b357bL,0x2b057e74d69fea72L,
+ 0xd485717a92731745L } },
+ /* 0 << 161 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 161 */
+ { { 0x896c42e8ee36860cL,0xdaf04dfd4113c22dL,0x1adbb7b744104213L,
+ 0xe5fd5fa11fd394eaL },
+ { 0x68235d941a4e0551L,0x6772cfbe18d10151L,0x276071e309984523L,
+ 0xe4e879de5a56ba98L } },
+ /* 2 << 161 */
+ { { 0xaaafafb0285b9491L,0x01a0be881e4c705eL,0xff1d4f5d2ad9caabL,
+ 0x6e349a4ac37a233fL },
+ { 0xcf1c12464a1c6a16L,0xd99e6b6629383260L,0xea3d43665f6d5471L,
+ 0x36974d04ff8cc89bL } },
+ /* 3 << 161 */
+ { { 0xc26c49a1cfe89d80L,0xb42c026dda9c8371L,0xca6c013adad066d2L,
+ 0xfb8f722856a4f3eeL },
+ { 0x08b579ecd850935bL,0x34c1a74cd631e1b3L,0xcb5fe596ac198534L,
+ 0x39ff21f6e1f24f25L } },
+ /* 4 << 161 */
+ { { 0x27f29e148f929057L,0x7a64ae06c0c853dfL,0x256cd18358e9c5ceL,
+ 0x9d9cce82ded092a5L },
+ { 0xcc6e59796e93b7c7L,0xe1e4709231bb9e27L,0xb70b3083aa9e29a0L,
+ 0xbf181a753785e644L } },
+ /* 5 << 161 */
+ { { 0xf53f2c658ead09f7L,0x1335e1d59780d14dL,0x69cc20e0cd1b66bcL,
+ 0x9b670a37bbe0bfc8L },
+ { 0xce53dc8128efbeedL,0x0c74e77c8326a6e5L,0x3604e0d2b88e9a63L,
+ 0xbab38fca13dc2248L } },
+ /* 6 << 161 */
+ { { 0x8ed6e8c85c0a3f1eL,0xbcad24927c87c37fL,0xfdfb62bb9ee3b78dL,
+ 0xeba8e477cbceba46L },
+ { 0x37d38cb0eeaede4bL,0x0bc498e87976deb6L,0xb2944c046b6147fbL,
+ 0x8b123f35f71f9609L } },
+ /* 7 << 161 */
+ { { 0xa155dcc7de79dc24L,0xf1168a32558f69cdL,0xbac215950d1850dfL,
+ 0x15c8295bb204c848L },
+ { 0xf661aa367d8184ffL,0xc396228e30447bdbL,0x11cd5143bde4a59eL,
+ 0xe3a26e3b6beab5e6L } },
+ /* 8 << 161 */
+ { { 0xd3b3a13f1402b9d0L,0x573441c32c7bc863L,0x4b301ec4578c3e6eL,
+ 0xc26fc9c40adaf57eL },
+ { 0x96e71bfd7493cea3L,0xd05d4b3f1af81456L,0xdaca2a8a6a8c608fL,
+ 0x53ef07f60725b276L } },
+ /* 9 << 161 */
+ { { 0x07a5fbd27824fc56L,0x3467521813289077L,0x5bf69fd5e0c48349L,
+ 0xa613ddd3b6aa7875L },
+ { 0x7f78c19c5450d866L,0x46f4409c8f84a481L,0x9f1d192890fce239L,
+ 0x016c4168b2ce44b9L } },
+ /* 10 << 161 */
+ { { 0xbae023f0c7435978L,0xb152c88820e30e19L,0x9c241645e3fa6fafL,
+ 0x735d95c184823e60L },
+ { 0x0319757303955317L,0x0b4b02a9f03b4995L,0x076bf55970274600L,
+ 0x32c5cc53aaf57508L } },
+ /* 11 << 161 */
+ { { 0xe8af6d1f60624129L,0xb7bc5d649a5e2b5eL,0x3814b0485f082d72L,
+ 0x76f267f2ce19677aL },
+ { 0x626c630fb36eed93L,0x55230cd73bf56803L,0x78837949ce2736a0L,
+ 0x0d792d60aa6c55f1L } },
+ /* 12 << 161 */
+ { { 0x0318dbfdd5c7c5d2L,0xb38f8da7072b342dL,0x3569bddc7b8de38aL,
+ 0xf25b5887a1c94842L },
+ { 0xb2d5b2842946ad60L,0x854f29ade9d1707eL,0xaa5159dc2c6a4509L,
+ 0x899f94c057189837L } },
+ /* 13 << 161 */
+ { { 0xcf6adc51f4a55b03L,0x261762de35e3b2d5L,0x4cc4301204827b51L,
+ 0xcd22a113c6021442L },
+ { 0xce2fd61a247c9569L,0x59a50973d152becaL,0x6c835a1163a716d4L,
+ 0xc26455ed187dedcfL } },
+ /* 14 << 161 */
+ { { 0x27f536e049ce89e7L,0x18908539cc890cb5L,0x308909abd83c2aa1L,
+ 0xecd3142b1ab73bd3L },
+ { 0x6a85bf59b3f5ab84L,0x3c320a68f2bea4c6L,0xad8dc5386da4541fL,
+ 0xeaf34eb0b7c41186L } },
+ /* 15 << 161 */
+ { { 0x1c780129977c97c4L,0x5ff9beebc57eb9faL,0xa24d0524c822c478L,
+ 0xfd8eec2a461cd415L },
+ { 0xfbde194ef027458cL,0xb4ff53191d1be115L,0x63f874d94866d6f4L,
+ 0x35c75015b21ad0c9L } },
+ /* 16 << 161 */
+ { { 0xa6b5c9d646ac49d2L,0x42c77c0b83137aa9L,0x24d000fc68225a38L,
+ 0x0f63cfc82fe1e907L },
+ { 0x22d1b01bc6441f95L,0x7d38f719ec8e448fL,0x9b33fa5f787fb1baL,
+ 0x94dcfda1190158dfL } },
+ /* 17 << 161 */
+ { { 0xc47cb3395f6d4a09L,0x6b4f355cee52b826L,0x3d100f5df51b930aL,
+ 0xf4512fac9f668f69L },
+ { 0x546781d5206c4c74L,0xd021d4d4cb4d2e48L,0x494a54c2ca085c2dL,
+ 0xf1dbaca4520850a8L } },
+ /* 18 << 161 */
+ { { 0x63c79326490a1acaL,0xcb64dd9c41526b02L,0xbb772591a2979258L,
+ 0x3f58297048d97846L },
+ { 0xd66b70d17c213ba7L,0xc28febb5e8a0ced4L,0x6b911831c10338c1L,
+ 0x0d54e389bf0126f3L } },
+ /* 19 << 161 */
+ { { 0x7048d4604af206eeL,0x786c88f677e97cb9L,0xd4375ae1ac64802eL,
+ 0x469bcfe1d53ec11cL },
+ { 0xfc9b340d47062230L,0xe743bb57c5b4a3acL,0xfe00b4aa59ef45acL,
+ 0x29a4ef2359edf188L } },
+ /* 20 << 161 */
+ { { 0x40242efeb483689bL,0x2575d3f6513ac262L,0xf30037c80ca6db72L,
+ 0xc9fcce8298864be2L },
+ { 0x84a112ff0149362dL,0x95e575821c4ae971L,0x1fa4b1a8945cf86cL,
+ 0x4525a7340b024a2fL } },
+ /* 21 << 161 */
+ { { 0xe76c8b628f338360L,0x483ff59328edf32bL,0x67e8e90a298b1aecL,
+ 0x9caab338736d9a21L },
+ { 0x5c09d2fd66892709L,0x2496b4dcb55a1d41L,0x93f5fb1ae24a4394L,
+ 0x08c750496fa8f6c1L } },
+ /* 22 << 161 */
+ { { 0xcaead1c2c905d85fL,0xe9d7f7900733ae57L,0x24c9a65cf07cdd94L,
+ 0x7389359ca4b55931L },
+ { 0xf58709b7367e45f7L,0x1f203067cb7e7adcL,0x82444bffc7b72818L,
+ 0x07303b35baac8033L } },
+ /* 23 << 161 */
+ { { 0x1e1ee4e4d13b7ea1L,0xe6489b24e0e74180L,0xa5f2c6107e70ef70L,
+ 0xa1655412bdd10894L },
+ { 0x555ebefb7af4194eL,0x533c1c3c8e89bd9cL,0x735b9b5789895856L,
+ 0x15fb3cd2567f5c15L } },
+ /* 24 << 161 */
+ { { 0x057fed45526f09fdL,0xe8a4f10c8128240aL,0x9332efc4ff2bfd8dL,
+ 0x214e77a0bd35aa31L },
+ { 0x32896d7314faa40eL,0x767867ec01e5f186L,0xc9adf8f117a1813eL,
+ 0xcb6cda7854741795L } },
+ /* 25 << 161 */
+ { { 0xb7521b6d349d51aaL,0xf56b5a9ee3c7b8e9L,0xc6f1e5c932a096dfL,
+ 0x083667c4a3635024L },
+ { 0x365ea13518087f2fL,0xf1b8eaacd136e45dL,0xc8a0e48473aec989L,
+ 0xd75a324b142c9259L } },
+ /* 26 << 161 */
+ { { 0xb7b4d00101dae185L,0x45434e0b9b7a94bcL,0xf54339affbd8cb0bL,
+ 0xdcc4569ee98ef49eL },
+ { 0x7789318a09a51299L,0x81b4d206b2b025d8L,0xf64aa418fae85792L,
+ 0x3e50258facd7baf7L } },
+ /* 27 << 161 */
+ { { 0xdce84cdb2996864bL,0xa2e670891f485fa4L,0xb28b2bb6534c6a5aL,
+ 0x31a7ec6bc94b9d39L },
+ { 0x1d217766d6bc20daL,0x4acdb5ec86761190L,0x6872632873701063L,
+ 0x4d24ee7c2128c29bL } },
+ /* 28 << 161 */
+ { { 0xc072ebd3a19fd868L,0x612e481cdb8ddd3bL,0xb4e1d7541a64d852L,
+ 0x00ef95acc4c6c4abL },
+ { 0x1536d2edaa0a6c46L,0x6129408643774790L,0x54af25e8343fda10L,
+ 0x9ff9d98dfd25d6f2L } },
+ /* 29 << 161 */
+ { { 0x0746af7c468b8835L,0x977a31cb730ecea7L,0xa5096b80c2cf4a81L,
+ 0xaa9868336458c37aL },
+ { 0x6af29bf3a6bd9d34L,0x6a62fe9b33c5d854L,0x50e6c304b7133b5eL,
+ 0x04b601597d6e6848L } },
+ /* 30 << 161 */
+ { { 0x4cd296df5579bea4L,0x10e35ac85ceedaf1L,0x04c4c5fde3bcc5b1L,
+ 0x95f9ee8a89412cf9L },
+ { 0x2c9459ee82b6eb0fL,0x2e84576595c2aaddL,0x774a84aed327fcfeL,
+ 0xd8c937220368d476L } },
+ /* 31 << 161 */
+ { { 0x0dbd5748f83e8a3bL,0xa579aa968d2495f3L,0x535996a0ae496e9bL,
+ 0x07afbfe9b7f9bcc2L },
+ { 0x3ac1dc6d5b7bd293L,0x3b592cff7022323dL,0xba0deb989c0a3e76L,
+ 0x18e78e9f4b197acbL } },
+ /* 32 << 161 */
+ { { 0x211cde10296c36efL,0x7ee8967282c4da77L,0xb617d270a57836daL,
+ 0xf0cd9c319cb7560bL },
+ { 0x01fdcbf7e455fe90L,0x3fb53cbb7e7334f3L,0x781e2ea44e7de4ecL,
+ 0x8adab3ad0b384fd0L } },
+ /* 33 << 161 */
+ { { 0x129eee2f53d64829L,0x7a471e17a261492bL,0xe4f9adb9e4cb4a2cL,
+ 0x3d359f6f97ba2c2dL },
+ { 0x346c67860aacd697L,0x92b444c375c2f8a8L,0xc79fa117d85df44eL,
+ 0x56782372398ddf31L } },
+ /* 34 << 161 */
+ { { 0x60e690f2bbbab3b8L,0x4851f8ae8b04816bL,0xc72046ab9c92e4d2L,
+ 0x518c74a17cf3136bL },
+ { 0xff4eb50af9877d4cL,0x14578d90a919cabbL,0x8218f8c4ac5eb2b6L,
+ 0xa3ccc547542016e4L } },
+ /* 35 << 161 */
+ { { 0x025bf48e327f8349L,0xf3e97346f43cb641L,0xdc2bafdf500f1085L,
+ 0x571678762f063055L },
+ { 0x5bd914b9411925a6L,0x7c078d48a1123de5L,0xee6bf835182b165dL,
+ 0xb11b5e5bba519727L } },
+ /* 36 << 161 */
+ { { 0xe33ea76c1eea7b85L,0x2352b46192d4f85eL,0xf101d334afe115bbL,
+ 0xfabc1294889175a3L },
+ { 0x7f6bcdc05233f925L,0xe0a802dbe77fec55L,0xbdb47b758069b659L,
+ 0x1c5e12def98fbd74L } },
+ /* 37 << 161 */
+ { { 0x869c58c64b8457eeL,0xa5360f694f7ea9f7L,0xe576c09ff460b38fL,
+ 0x6b70d54822b7fb36L },
+ { 0x3fd237f13bfae315L,0x33797852cbdff369L,0x97df25f525b516f9L,
+ 0x46f388f2ba38ad2dL } },
+ /* 38 << 161 */
+ { { 0x656c465889d8ddbbL,0x8830b26e70f38ee8L,0x4320fd5cde1212b0L,
+ 0xc34f30cfe4a2edb2L },
+ { 0xabb131a356ab64b8L,0x7f77f0ccd99c5d26L,0x66856a37bf981d94L,
+ 0x19e76d09738bd76eL } },
+ /* 39 << 161 */
+ { { 0xe76c8ac396238f39L,0xc0a482bea830b366L,0xb7b8eaff0b4eb499L,
+ 0x8ecd83bc4bfb4865L },
+ { 0x971b2cb7a2f3776fL,0xb42176a4f4b88adfL,0xb9617df5be1fa446L,
+ 0x8b32d508cd031bd2L } },
+ /* 40 << 161 */
+ { { 0x1c6bd47d53b618c0L,0xc424f46c6a227923L,0x7303ffdedd92d964L,
+ 0xe971287871b5abf2L },
+ { 0x8f48a632f815561dL,0x85f48ff5d3c055d1L,0x222a14277525684fL,
+ 0xd0d841a067360cc3L } },
+ /* 41 << 161 */
+ { { 0x4245a9260b9267c6L,0xc78913f1cf07f863L,0xaa844c8e4d0d9e24L,
+ 0xa42ad5223d5f9017L },
+ { 0xbd371749a2c989d5L,0x928292dfe1f5e78eL,0x493b383e0a1ea6daL,
+ 0x5136fd8d13aee529L } },
+ /* 42 << 161 */
+ { { 0x860c44b1f2c34a99L,0x3b00aca4bf5855acL,0xabf6aaa0faaf37beL,
+ 0x65f436822a53ec08L },
+ { 0x1d9a5801a11b12e1L,0x78a7ab2ce20ed475L,0x0de1067e9a41e0d5L,
+ 0x30473f5f305023eaL } },
+ /* 43 << 161 */
+ { { 0xdd3ae09d169c7d97L,0x5cd5baa4cfaef9cdL,0x5cd7440b65a44803L,
+ 0xdc13966a47f364deL },
+ { 0x077b2be82b8357c1L,0x0cb1b4c5e9d57c2aL,0x7a4ceb3205ff363eL,
+ 0xf310fa4dca35a9efL } },
+ /* 44 << 161 */
+ { { 0xdbb7b352f97f68c6L,0x0c773b500b02cf58L,0xea2e48213c1f96d9L,
+ 0xffb357b0eee01815L },
+ { 0xb9c924cde0f28039L,0x0b36c95a46a3fbe4L,0x1faaaea45e46db6cL,
+ 0xcae575c31928aaffL } },
+ /* 45 << 161 */
+ { { 0x7f671302a70dab86L,0xfcbd12a971c58cfcL,0xcbef9acfbee0cb92L,
+ 0x573da0b9f8c1b583L },
+ { 0x4752fcfe0d41d550L,0xe7eec0e32155cffeL,0x0fc39fcb545ae248L,
+ 0x522cb8d18065f44eL } },
+ /* 46 << 161 */
+ { { 0x263c962a70cbb96cL,0xe034362abcd124a9L,0xf120db283c2ae58dL,
+ 0xb9a38d49fef6d507L },
+ { 0xb1fd2a821ff140fdL,0xbd162f3020aee7e0L,0x4e17a5d4cb251949L,
+ 0x2aebcb834f7e1c3dL } },
+ /* 47 << 161 */
+ { { 0x608eb25f937b0527L,0xf42e1e47eb7d9997L,0xeba699c4b8a53a29L,
+ 0x1f921c71e091b536L },
+ { 0xcce29e7b5b26bbd5L,0x7a8ef5ed3b61a680L,0xe5ef8043ba1f1c7eL,
+ 0x16ea821718158ddaL } },
+ /* 48 << 161 */
+ { { 0x01778a2b599ff0f9L,0x68a923d78104fc6bL,0x5bfa44dfda694ff3L,
+ 0x4f7199dbf7667f12L },
+ { 0xc06d8ff6e46f2a79L,0x08b5deade9f8131dL,0x02519a59abb4ce7cL,
+ 0xc4f710bcb42aec3eL } },
+ /* 49 << 161 */
+ { { 0x3d77b05778bde41aL,0x6474bf80b4186b5aL,0x048b3f6788c65741L,
+ 0xc64519de03c7c154L },
+ { 0xdf0738460edfcc4fL,0x319aa73748f1aa6bL,0x8b9f8a02ca909f77L,
+ 0x902581397580bfefL } },
+ /* 50 << 161 */
+ { { 0xd8bfd3cac0c22719L,0xc60209e4c9ca151eL,0x7a744ab5d9a1a69cL,
+ 0x6de5048b14937f8fL },
+ { 0x171938d8e115ac04L,0x7df709401c6b16d2L,0xa6aeb6637f8e94e7L,
+ 0xc130388e2a2cf094L } },
+ /* 51 << 161 */
+ { { 0x1850be8477f54e6eL,0x9f258a7265d60fe5L,0xff7ff0c06c9146d6L,
+ 0x039aaf90e63a830bL },
+ { 0x38f27a739460342fL,0x4703148c3f795f8aL,0x1bb5467b9681a97eL,
+ 0x00931ba5ecaeb594L } },
+ /* 52 << 161 */
+ { { 0xcdb6719d786f337cL,0xd9c01cd2e704397dL,0x0f4a3f20555c2fefL,
+ 0x004525097c0af223L },
+ { 0x54a5804784db8e76L,0x3bacf1aa93c8aa06L,0x11ca957cf7919422L,
+ 0x5064105378cdaa40L } },
+ /* 53 << 161 */
+ { { 0x7a3038749f7144aeL,0x170c963f43d4acfdL,0x5e14814958ddd3efL,
+ 0xa7bde5829e72dba8L },
+ { 0x0769da8b6fa68750L,0xfa64e532572e0249L,0xfcaadf9d2619ad31L,
+ 0x87882daaa7b349cdL } },
+ /* 54 << 161 */
+ { { 0x9f6eb7316c67a775L,0xcb10471aefc5d0b1L,0xb433750ce1b806b2L,
+ 0x19c5714d57b1ae7eL },
+ { 0xc0dc8b7bed03fd3fL,0xdd03344f31bc194eL,0xa66c52a78c6320b5L,
+ 0x8bc82ce3d0b6fd93L } },
+ /* 55 << 161 */
+ { { 0xf8e13501b35f1341L,0xe53156dd25a43e42L,0xd3adf27e4daeb85cL,
+ 0xb81d8379bbeddeb5L },
+ { 0x1b0b546e2e435867L,0x9020eb94eba5dd60L,0x37d911618210cb9dL,
+ 0x4c596b315c91f1cfL } },
+ /* 56 << 161 */
+ { { 0xb228a90f0e0b040dL,0xbaf02d8245ff897fL,0x2aac79e600fa6122L,
+ 0x248288178e36f557L },
+ { 0xb9521d31113ec356L,0x9e48861e15eff1f8L,0x2aa1d412e0d41715L,
+ 0x71f8620353f131b8L } },
+ /* 57 << 161 */
+ { { 0xf60da8da3fd19408L,0x4aa716dc278d9d99L,0x394531f7a8c51c90L,
+ 0xb560b0e8f59db51cL },
+ { 0xa28fc992fa34bdadL,0xf024fa149cd4f8bdL,0x5cf530f723a9d0d3L,
+ 0x615ca193e28c9b56L } },
+ /* 58 << 161 */
+ { { 0x6d2a483d6f73c51eL,0xa4cb2412ea0dc2ddL,0x50663c411eb917ffL,
+ 0x3d3a74cfeade299eL },
+ { 0x29b3990f4a7a9202L,0xa9bccf59a7b15c3dL,0x66a3ccdca5df9208L,
+ 0x48027c1443f2f929L } },
+ /* 59 << 161 */
+ { { 0xd385377c40b557f0L,0xe001c366cd684660L,0x1b18ed6be2183a27L,
+ 0x879738d863210329L },
+ { 0xa687c74bbda94882L,0xd1bbcc48a684b299L,0xaf6f1112863b3724L,
+ 0x6943d1b42c8ce9f8L } },
+ /* 60 << 161 */
+ { { 0xe044a3bb098cafb4L,0x27ed231060d48cafL,0x542b56753a31b84dL,
+ 0xcbf3dd50fcddbed7L },
+ { 0x25031f1641b1d830L,0xa7ec851dcb0c1e27L,0xac1c8fe0b5ae75dbL,
+ 0xb24c755708c52120L } },
+ /* 61 << 161 */
+ { { 0x57f811dc1d4636c3L,0xf8436526681a9939L,0x1f6bc6d99c81adb3L,
+ 0x840f8ac35b7d80d4L },
+ { 0x731a9811f4387f1aL,0x7c501cd3b5156880L,0xa5ca4a07dfe68867L,
+ 0xf123d8f05fcea120L } },
+ /* 62 << 161 */
+ { { 0x1fbb0e71d607039eL,0x2b70e215cd3a4546L,0x32d2f01d53324091L,
+ 0xb796ff08180ab19bL },
+ { 0x32d87a863c57c4aaL,0x2aed9cafb7c49a27L,0x9fb35eac31630d98L,
+ 0x338e8cdf5c3e20a3L } },
+ /* 63 << 161 */
+ { { 0x80f1618266cde8dbL,0x4e1599802d72fd36L,0xd7b8f13b9b6e5072L,
+ 0xf52139073b7b5dc1L },
+ { 0x4d431f1d8ce4396eL,0x37a1a680a7ed2142L,0xbf375696d01aaf6bL,
+ 0xaa1c0c54e63aab66L } },
+ /* 64 << 161 */
+ { { 0x3014368b4ed80940L,0x67e6d0567a6fceddL,0x7c208c49ca97579fL,
+ 0xfe3d7a81a23597f6L },
+ { 0x5e2032027e096ae2L,0xb1f3e1e724b39366L,0x26da26f32fdcdffcL,
+ 0x79422f1d6097be83L } },
+ /* 0 << 168 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 168 */
+ { { 0x263a2cfb9db3b381L,0x9c3a2deed4df0a4bL,0x728d06e97d04e61fL,
+ 0x8b1adfbc42449325L },
+ { 0x6ec1d9397e053a1bL,0xee2be5c766daf707L,0x80ba1e14810ac7abL,
+ 0xdd2ae778f530f174L } },
+ /* 2 << 168 */
+ { { 0x0435d97a205b9d8bL,0x6eb8f064056756d4L,0xd5e88a8bb6f8210eL,
+ 0x070ef12dec9fd9eaL },
+ { 0x4d8495053bcc876aL,0x12a75338a7404ce3L,0xd22b49e1b8a1db5eL,
+ 0xec1f205114bfa5adL } },
+ /* 3 << 168 */
+ { { 0xadbaeb79b6828f36L,0x9d7a025801bd5b9eL,0xeda01e0d1e844b0cL,
+ 0x4b625175887edfc9L },
+ { 0x14109fdd9669b621L,0x88a2ca56f6f87b98L,0xfe2eb788170df6bcL,
+ 0x0cea06f4ffa473f9L } },
+ /* 4 << 168 */
+ { { 0x43ed81b5c4e83d33L,0xd9f358795efd488bL,0x164a620f9deb4d0fL,
+ 0xc6927bdbac6a7394L },
+ { 0x45c28df79f9e0f03L,0x2868661efcd7e1a9L,0x7cf4e8d0ffa348f1L,
+ 0x6bd4c284398538e0L } },
+ /* 5 << 168 */
+ { { 0x2618a091289a8619L,0xef796e606671b173L,0x664e46e59090c632L,
+ 0xa38062d41e66f8fbL },
+ { 0x6c744a200573274eL,0xd07b67e4a9271394L,0x391223b26bdc0e20L,
+ 0xbe2d93f1eb0a05a7L } },
+ /* 6 << 168 */
+ { { 0xf23e2e533f36d141L,0xe84bb3d44dfca442L,0xb804a48d6b7c023aL,
+ 0x1e16a8fa76431c3bL },
+ { 0x1b5452adddd472e0L,0x7d405ee70d1ee127L,0x50fc6f1dffa27599L,
+ 0x351ac53cbf391b35L } },
+ /* 7 << 168 */
+ { { 0x7efa14b84444896bL,0x64974d2ff94027fbL,0xefdcd0e8de84487dL,
+ 0x8c45b2602b48989bL },
+ { 0xa8fcbbc2d8463487L,0xd1b2b3f73fbc476cL,0x21d005b7c8f443c0L,
+ 0x518f2e6740c0139cL } },
+ /* 8 << 168 */
+ { { 0x56036e8c06d75fc1L,0x2dcf7bb73249a89fL,0x81dd1d3de245e7ddL,
+ 0xf578dc4bebd6e2a7L },
+ { 0x4c028903df2ce7a0L,0xaee362889c39afacL,0xdc847c31146404abL,
+ 0x6304c0d8a4e97818L } },
+ /* 9 << 168 */
+ { { 0xae51dca2a91f6791L,0x2abe41909baa9efcL,0xd9d2e2f4559c7ac1L,
+ 0xe82f4b51fc9f773aL },
+ { 0xa77130274073e81cL,0xc0276facfbb596fcL,0x1d819fc9a684f70cL,
+ 0x29b47fddc9f7b1e0L } },
+ /* 10 << 168 */
+ { { 0x358de103459b1940L,0xec881c595b013e93L,0x51574c9349532ad3L,
+ 0x2db1d445b37b46deL },
+ { 0xc6445b87df239fd8L,0xc718af75151d24eeL,0xaea1c4a4f43c6259L,
+ 0x40c0e5d770be02f7L } },
+ /* 11 << 168 */
+ { { 0x6a4590f4721b33f2L,0x2124f1fbfedf04eaL,0xf8e53cde9745efe7L,
+ 0xe7e1043265f046d9L },
+ { 0xc3fca28ee4d0c7e6L,0x847e339a87253b1bL,0x9b5953483743e643L,
+ 0xcb6a0a0b4fd12fc5L } },
+ /* 12 << 168 */
+ { { 0xfb6836c327d02dccL,0x5ad009827a68bcc2L,0x1b24b44c005e912dL,
+ 0xcc83d20f811fdcfeL },
+ { 0x36527ec1666fba0cL,0x6994819714754635L,0xfcdcb1a8556da9c2L,
+ 0xa593426781a732b2L } },
+ /* 13 << 168 */
+ { { 0xec1214eda714181dL,0x609ac13b6067b341L,0xff4b4c97a545df1fL,
+ 0xa124050134d2076bL },
+ { 0x6efa0c231409ca97L,0x254cc1a820638c43L,0xd4e363afdcfb46cdL,
+ 0x62c2adc303942a27L } },
+ /* 14 << 168 */
+ { { 0xc67b9df056e46483L,0xa55abb2063736356L,0xab93c098c551bc52L,
+ 0x382b49f9b15fe64bL },
+ { 0x9ec221ad4dff8d47L,0x79caf615437df4d6L,0x5f13dc64bb456509L,
+ 0xe4c589d9191f0714L } },
+ /* 15 << 168 */
+ { { 0x27b6a8ab3fd40e09L,0xe455842e77313ea9L,0x8b51d1e21f55988bL,
+ 0x5716dd73062bbbfcL },
+ { 0x633c11e54e8bf3deL,0x9a0e77b61b85be3bL,0x565107290911cca6L,
+ 0x27e76495efa6590fL } },
+ /* 16 << 168 */
+ { { 0xe4ac8b33070d3aabL,0x2643672b9a2cd5e5L,0x52eff79b1cfc9173L,
+ 0x665ca49b90a7c13fL },
+ { 0x5a8dda59b3efb998L,0x8a5b922d052f1341L,0xae9ebbab3cf9a530L,
+ 0x35986e7bf56da4d7L } },
+ /* 17 << 168 */
+ { { 0x3a636b5cff3513ccL,0xbb0cf8ba3198f7ddL,0xb8d4052241f16f86L,
+ 0x760575d8de13a7bfL },
+ { 0x36f74e169f7aa181L,0x163a3ecff509ed1cL,0x6aead61f3c40a491L,
+ 0x158c95fcdfe8fcaaL } },
+ /* 18 << 168 */
+ { { 0xa3991b6e13cda46fL,0x79482415342faed0L,0xf3ba5bde666b5970L,
+ 0x1d52e6bcb26ab6ddL },
+ { 0x768ba1e78608dd3dL,0x4930db2aea076586L,0xd9575714e7dc1afaL,
+ 0x1fc7bf7df7c58817L } },
+ /* 19 << 168 */
+ { { 0x6b47accdd9eee96cL,0x0ca277fbe58cec37L,0x113fe413e702c42aL,
+ 0xdd1764eec47cbe51L },
+ { 0x041e7cde7b3ed739L,0x50cb74595ce9e1c0L,0x355685132925b212L,
+ 0x7cff95c4001b081cL } },
+ /* 20 << 168 */
+ { { 0x63ee4cbd8088b454L,0xdb7f32f79a9e0c8aL,0xb377d4186b2447cbL,
+ 0xe3e982aad370219bL },
+ { 0x06ccc1e4c2a2a593L,0x72c368650773f24fL,0xa13b4da795859423L,
+ 0x8bbf1d3375040c8fL } },
+ /* 21 << 168 */
+ { { 0x726f0973da50c991L,0x48afcd5b822d6ee2L,0xe5fc718b20fd7771L,
+ 0xb9e8e77dfd0807a1L },
+ { 0x7f5e0f4499a7703dL,0x6972930e618e36f3L,0x2b7c77b823807bbeL,
+ 0xe5b82405cb27ff50L } },
+ /* 22 << 168 */
+ { { 0xba8b8be3bd379062L,0xd64b7a1d2dce4a92L,0x040a73c5b2952e37L,
+ 0x0a9e252ed438aecaL },
+ { 0xdd43956bc39d3bcbL,0x1a31ca00b32b2d63L,0xd67133b85c417a18L,
+ 0xd08e47902ef442c8L } },
+ /* 23 << 168 */
+ { { 0x98cb1ae9255c0980L,0x4bd863812b4a739fL,0x5a5c31e11e4a45a1L,
+ 0x1e5d55fe9cb0db2fL },
+ { 0x74661b068ff5cc29L,0x026b389f0eb8a4f4L,0x536b21a458848c24L,
+ 0x2e5bf8ec81dc72b0L } },
+ /* 24 << 168 */
+ { { 0x03c187d0ad886aacL,0x5c16878ab771b645L,0xb07dfc6fc74045abL,
+ 0x2c6360bf7800caedL },
+ { 0x24295bb5b9c972a3L,0xc9e6f88e7c9a6dbaL,0x90ffbf2492a79aa6L,
+ 0xde29d50a41c26ac2L } },
+ /* 25 << 168 */
+ { { 0x9f0af483d309cbe6L,0x5b020d8ae0bced4fL,0x606e986db38023e3L,
+ 0xad8f2c9d1abc6933L },
+ { 0x19292e1de7400e93L,0xfe3e18a952be5e4dL,0xe8e9771d2e0680bfL,
+ 0x8c5bec98c54db063L } },
+ /* 26 << 168 */
+ { { 0x2af9662a74a55d1fL,0xe3fbf28f046f66d8L,0xa3a72ab4d4dc4794L,
+ 0x09779f455c7c2dd8L },
+ { 0xd893bdafc3d19d8dL,0xd5a7509457d6a6dfL,0x8cf8fef9952e6255L,
+ 0x3da67cfbda9a8affL } },
+ /* 27 << 168 */
+ { { 0x4c23f62a2c160dcdL,0x34e6c5e38f90eaefL,0x35865519a9a65d5aL,
+ 0x07c48aae8fd38a3dL },
+ { 0xb7e7aeda50068527L,0x2c09ef231c90936aL,0x31ecfeb6e879324cL,
+ 0xa0871f6bfb0ec938L } },
+ /* 28 << 168 */
+ { { 0xb1f0fb68d84d835dL,0xc90caf39861dc1e6L,0x12e5b0467594f8d7L,
+ 0x26897ae265012b92L },
+ { 0xbcf68a08a4d6755dL,0x403ee41c0991fbdaL,0x733e343e3bbf17e8L,
+ 0xd2c7980d679b3d65L } },
+ /* 29 << 168 */
+ { { 0x33056232d2e11305L,0x966be492f3c07a6fL,0x6a8878ffbb15509dL,
+ 0xff2211010a9b59a4L },
+ { 0x6c9f564aabe30129L,0xc6f2c940336e64cfL,0x0fe752628b0c8022L,
+ 0xbe0267e96ae8db87L } },
+ /* 30 << 168 */
+ { { 0x22e192f193bc042bL,0xf085b534b237c458L,0xa0d192bd832c4168L,
+ 0x7a76e9e3bdf6271dL },
+ { 0x52a882fab88911b5L,0xc85345e4b4db0eb5L,0xa3be02a681a7c3ffL,
+ 0x51889c8cf0ec0469L } },
+ /* 31 << 168 */
+ { { 0x9d031369a5e829e5L,0xcbb4c6fc1607aa41L,0x75ac59a6241d84c1L,
+ 0xc043f2bf8829e0eeL },
+ { 0x82a38f758ea5e185L,0x8bda40b9d87cbd9fL,0x9e65e75e2d8fc601L,
+ 0x3d515f74a35690b3L } },
+ /* 32 << 168 */
+ { { 0x534acf4fda79e5acL,0x68b83b3a8630215fL,0x5c748b2ed085756eL,
+ 0xb0317258e5d37cb2L },
+ { 0x6735841ac5ccc2c4L,0x7d7dc96b3d9d5069L,0xa147e410fd1754bdL,
+ 0x65296e94d399ddd5L } },
+ /* 33 << 168 */
+ { { 0xf6b5b2d0bc8fa5bcL,0x8a5ead67500c277bL,0x214625e6dfa08a5dL,
+ 0x51fdfedc959cf047L },
+ { 0x6bc9430b289fca32L,0xe36ff0cf9d9bdc3fL,0x2fe187cb58ea0edeL,
+ 0xed66af205a900b3fL } },
+ /* 34 << 168 */
+ { { 0x00e0968b5fa9f4d6L,0x2d4066ce37a362e7L,0xa99a9748bd07e772L,
+ 0x710989c006a4f1d0L },
+ { 0xd5dedf35ce40cbd8L,0xab55c5f01743293dL,0x766f11448aa24e2cL,
+ 0x94d874f8605fbcb4L } },
+ /* 35 << 168 */
+ { { 0xa365f0e8a518001bL,0xee605eb69d04ef0fL,0x5a3915cdba8d4d25L,
+ 0x44c0e1b8b5113472L },
+ { 0xcbb024e88b6740dcL,0x89087a53ee1d4f0cL,0xa88fa05c1fc4e372L,
+ 0x8bf395cbaf8b3af2L } },
+ /* 36 << 168 */
+ { { 0x1e71c9a1deb8568bL,0xa35daea080fb3d32L,0xe8b6f2662cf8fb81L,
+ 0x6d51afe89490696aL },
+ { 0x81beac6e51803a19L,0xe3d24b7f86219080L,0x727cfd9ddf6f463cL,
+ 0x8c6865ca72284ee8L } },
+ /* 37 << 168 */
+ { { 0x32c88b7db743f4efL,0x3793909be7d11dceL,0xd398f9222ff2ebe8L,
+ 0x2c70ca44e5e49796L },
+ { 0xdf4d9929cb1131b1L,0x7826f29825888e79L,0x4d3a112cf1d8740aL,
+ 0x00384cb6270afa8bL } },
+ /* 38 << 168 */
+ { { 0xcb64125b3ab48095L,0x3451c25662d05106L,0xd73d577da4955845L,
+ 0x39570c16bf9f4433L },
+ { 0xd7dfaad3adecf263L,0xf1c3d8d1dc76e102L,0x5e774a5854c6a836L,
+ 0xdad4b6723e92d47bL } },
+ /* 39 << 168 */
+ { { 0xbe7e990ff0d796a0L,0x5fc62478df0e8b02L,0x8aae8bf4030c00adL,
+ 0x3d2db93b9004ba0fL },
+ { 0xe48c8a79d85d5ddcL,0xe907caa76bb07f34L,0x58db343aa39eaed5L,
+ 0x0ea6e007adaf5724L } },
+ /* 40 << 168 */
+ { { 0xe00df169d23233f3L,0x3e32279677cb637fL,0x1f897c0e1da0cf6cL,
+ 0xa651f5d831d6bbddL },
+ { 0xdd61af191a230c76L,0xbd527272cdaa5e4aL,0xca753636d0abcd7eL,
+ 0x78bdd37c370bd8dcL } },
+ /* 41 << 168 */
+ { { 0xc23916c217cd93feL,0x65b97a4ddadce6e2L,0xe04ed4eb174e42f8L,
+ 0x1491ccaabb21480aL },
+ { 0x145a828023196332L,0x3c3862d7587b479aL,0x9f4a88a301dcd0edL,
+ 0x4da2b7ef3ea12f1fL } },
+ /* 42 << 168 */
+ { { 0xf8e7ae33b126e48eL,0x404a0b32f494e237L,0x9beac474c55acadbL,
+ 0x4ee5cf3bcbec9fd9L },
+ { 0x336b33b97df3c8c3L,0xbd905fe3b76808fdL,0x8f436981aa45c16aL,
+ 0x255c5bfa3dd27b62L } },
+ /* 43 << 168 */
+ { { 0x71965cbfc3dd9b4dL,0xce23edbffc068a87L,0xb78d4725745b029bL,
+ 0x74610713cefdd9bdL },
+ { 0x7116f75f1266bf52L,0x0204672218e49bb6L,0xdf43df9f3d6f19e3L,
+ 0xef1bc7d0e685cb2fL } },
+ /* 44 << 168 */
+ { { 0xcddb27c17078c432L,0xe1961b9cb77fedb7L,0x1edc2f5cc2290570L,
+ 0x2c3fefca19cbd886L },
+ { 0xcf880a36c2af389aL,0x96c610fdbda71ceaL,0xf03977a932aa8463L,
+ 0x8eb7763f8586d90aL } },
+ /* 45 << 168 */
+ { { 0x3f3424542a296e77L,0xc871868342837a35L,0x7dc710906a09c731L,
+ 0x54778ffb51b816dbL },
+ { 0x6b33bfecaf06defdL,0xfe3c105f8592b70bL,0xf937fda461da6114L,
+ 0x3c13e6514c266ad7L } },
+ /* 46 << 168 */
+ { { 0xe363a829855938e8L,0x2eeb5d9e9de54b72L,0xbeb93b0e20ccfab9L,
+ 0x3dffbb5f25e61a25L },
+ { 0x7f655e431acc093dL,0x0cb6cc3d3964ce61L,0x6ab283a1e5e9b460L,
+ 0x55d787c5a1c7e72dL } },
+ /* 47 << 168 */
+ { { 0x4d2efd47deadbf02L,0x11e80219ac459068L,0x810c762671f311f0L,
+ 0xfa17ef8d4ab6ef53L },
+ { 0xaf47fd2593e43bffL,0x5cb5ff3f0be40632L,0x546871068ee61da3L,
+ 0x7764196eb08afd0fL } },
+ /* 48 << 168 */
+ { { 0x831ab3edf0290a8fL,0xcae81966cb47c387L,0xaad7dece184efb4fL,
+ 0xdcfc53b34749110eL },
+ { 0x6698f23c4cb632f9L,0xc42a1ad6b91f8067L,0xb116a81d6284180aL,
+ 0xebedf5f8e901326fL } },
+ /* 49 << 168 */
+ { { 0xf2274c9f97e3e044L,0x4201852011d09fc9L,0x56a65f17d18e6e23L,
+ 0x2ea61e2a352b683cL },
+ { 0x27d291bc575eaa94L,0x9e7bc721b8ff522dL,0x5f7268bfa7f04d6fL,
+ 0x5868c73faba41748L } },
+ /* 50 << 168 */
+ { { 0x9f85c2db7be0eeadL,0x511e7842ff719135L,0x5a06b1e9c5ea90d7L,
+ 0x0c19e28326fab631L },
+ { 0x8af8f0cfe9206c55L,0x89389cb43553c06aL,0x39dbed97f65f8004L,
+ 0x0621b037c508991dL } },
+ /* 51 << 168 */
+ { { 0x1c52e63596e78cc4L,0x5385c8b20c06b4a8L,0xd84ddfdbb0e87d03L,
+ 0xc49dfb66934bafadL },
+ { 0x7071e17059f70772L,0x3a073a843a1db56bL,0x034949033b8af190L,
+ 0x7d882de3d32920f0L } },
+ /* 52 << 168 */
+ { { 0x91633f0ab2cf8940L,0x72b0b1786f948f51L,0x2d28dc30782653c8L,
+ 0x88829849db903a05L },
+ { 0xb8095d0c6a19d2bbL,0x4b9e7f0c86f782cbL,0x7af739882d907064L,
+ 0xd12be0fe8b32643cL } },
+ /* 53 << 168 */
+ { { 0x358ed23d0e165dc3L,0x3d47ce624e2378ceL,0x7e2bb0b9feb8a087L,
+ 0x3246e8aee29e10b9L },
+ { 0x459f4ec703ce2b4dL,0xe9b4ca1bbbc077cfL,0x2613b4f20e9940c1L,
+ 0xfc598bb9047d1eb1L } },
+ /* 54 << 168 */
+ { { 0x9744c62b45036099L,0xa9dee742167c65d8L,0x0c511525dabe1943L,
+ 0xda11055493c6c624L },
+ { 0xae00a52c651a3be2L,0xcda5111d884449a6L,0x063c06f4ff33bed1L,
+ 0x73baaf9a0d3d76b4L } },
+ /* 55 << 168 */
+ { { 0x52fb0c9d7fc63668L,0x6886c9dd0c039cdeL,0x602bd59955b22351L,
+ 0xb00cab02360c7c13L },
+ { 0x8cb616bc81b69442L,0x41486700b55c3ceeL,0x71093281f49ba278L,
+ 0xad956d9c64a50710L } },
+ /* 56 << 168 */
+ { { 0x9561f28b638a7e81L,0x54155cdf5980ddc3L,0xb2db4a96d26f247aL,
+ 0x9d774e4e4787d100L },
+ { 0x1a9e6e2e078637d2L,0x1c363e2d5e0ae06aL,0x7493483ee9cfa354L,
+ 0x76843cb37f74b98dL } },
+ /* 57 << 168 */
+ { { 0xbaca6591d4b66947L,0xb452ce9804460a8cL,0x6830d24643768f55L,
+ 0xf4197ed87dff12dfL },
+ { 0x6521b472400dd0f7L,0x59f5ca8f4b1e7093L,0x6feff11b080338aeL,
+ 0x0ada31f6a29ca3c6L } },
+ /* 58 << 168 */
+ { { 0x24794eb694a2c215L,0xd83a43ab05a57ab4L,0x264a543a2a6f89feL,
+ 0x2c2a3868dd5ec7c2L },
+ { 0xd33739408439d9b2L,0x715ea6720acd1f11L,0x42c1d235e7e6cc19L,
+ 0x81ce6e96b990585cL } },
+ /* 59 << 168 */
+ { { 0x04e5dfe0d809c7bdL,0xd7b2580c8f1050abL,0x6d91ad78d8a4176fL,
+ 0x0af556ee4e2e897cL },
+ { 0x162a8b73921de0acL,0x52ac9c227ea78400L,0xee2a4eeaefce2174L,
+ 0xbe61844e6d637f79L } },
+ /* 60 << 168 */
+ { { 0x0491f1bc789a283bL,0x72d3ac3d880836f4L,0xaa1c5ea388e5402dL,
+ 0x1b192421d5cc473dL },
+ { 0x5c0b99989dc84cacL,0xb0a8482d9c6e75b8L,0x639961d03a191ce2L,
+ 0xda3bc8656d837930L } },
+ /* 61 << 168 */
+ { { 0xca990653056e6f8fL,0x84861c4164d133a7L,0x8b403276746abe40L,
+ 0xb7b4d51aebf8e303L },
+ { 0x05b43211220a255dL,0xc997152c02419e6eL,0x76ff47b6630c2feaL,
+ 0x50518677281fdadeL } },
+ /* 62 << 168 */
+ { { 0x3283b8bacf902b0bL,0x8d4b4eb537db303bL,0xcc89f42d755011bcL,
+ 0xb43d74bbdd09d19bL },
+ { 0x65746bc98adba350L,0x364eaf8cb51c1927L,0x13c7659610ad72ecL,
+ 0x30045121f8d40c20L } },
+ /* 63 << 168 */
+ { { 0x6d2d99b7ea7b979bL,0xcd78cd74e6fb3bcdL,0x11e45a9e86cffbfeL,
+ 0x78a61cf4637024f6L },
+ { 0xd06bc8723d502295L,0xf1376854458cb288L,0xb9db26a1342f8586L,
+ 0xf33effcf4beee09eL } },
+ /* 64 << 168 */
+ { { 0xd7e0c4cdb30cfb3aL,0x6d09b8c16c9db4c8L,0x40ba1a4207c8d9dfL,
+ 0x6fd495f71c52c66dL },
+ { 0xfb0e169f275264daL,0x80c2b746e57d8362L,0xedd987f749ad7222L,
+ 0xfdc229af4398ec7bL } },
+ /* 0 << 175 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 175 */
+ { { 0xb0d1ed8452666a58L,0x4bcb6e00e6a9c3c2L,0x3c57411c26906408L,
+ 0xcfc2075513556400L },
+ { 0xa08b1c505294dba3L,0xa30ba2868b7dd31eL,0xd70ba90e991eca74L,
+ 0x094e142ce762c2b9L } },
+ /* 2 << 175 */
+ { { 0xb81d783e979f3925L,0x1efd130aaf4c89a7L,0x525c2144fd1bf7faL,
+ 0x4b2969041b265a9eL },
+ { 0xed8e9634b9db65b6L,0x35c82e3203599d8aL,0xdaa7a54f403563f3L,
+ 0x9df088ad022c38abL } },
+ /* 3 << 175 */
+ { { 0xe5cfb066bb3fd30aL,0x429169daeff0354eL,0x809cf8523524e36cL,
+ 0x136f4fb30155be1dL },
+ { 0x4826af011fbba712L,0x6ef0f0b4506ba1a1L,0xd9928b3177aea73eL,
+ 0xe2bf6af25eaa244eL } },
+ /* 4 << 175 */
+ { { 0x8d084f124237b64bL,0x688ebe99e3ecfd07L,0x57b8a70cf6845dd8L,
+ 0x808fc59c5da4a325L },
+ { 0xa9032b2ba3585862L,0xb66825d5edf29386L,0xb5a5a8db431ec29bL,
+ 0xbb143a983a1e8dc8L } },
+ /* 5 << 175 */
+ { { 0x35ee94ce12ae381bL,0x3a7f176c86ccda90L,0xc63a657e4606eacaL,
+ 0x9ae5a38043cd04dfL },
+ { 0x9bec8d15ed251b46L,0x1f5d6d30caca5e64L,0x347b3b359ff20f07L,
+ 0x4d65f034f7e4b286L } },
+ /* 6 << 175 */
+ { { 0x9e93ba24f111661eL,0xedced484b105eb04L,0x96dc9ba1f424b578L,
+ 0xbf8f66b7e83e9069L },
+ { 0x872d4df4d7ed8216L,0xbf07f3778e2cbecfL,0x4281d89998e73754L,
+ 0xfec85fbb8aab8708L } },
+ /* 7 << 175 */
+ { { 0x9a3c0deea5ba5b0bL,0xe6a116ce42d05299L,0xae9775fee9b02d42L,
+ 0x72b05200a1545cb6L },
+ { 0xbc506f7d31a3b4eaL,0xe58930788bbd9b32L,0xc8bc5f37e4b12a97L,
+ 0x6b000c064a73b671L } },
+ /* 8 << 175 */
+ { { 0x13b5bf22765fa7d0L,0x59805bf01d6a5370L,0x67a5e29d4280db98L,
+ 0x4f53916f776b1ce3L },
+ { 0x714ff61f33ddf626L,0x4206238ea085d103L,0x1c50d4b7e5809ee3L,
+ 0x999f450d85f8eb1dL } },
+ /* 9 << 175 */
+ { { 0x658a6051e4c79e9bL,0x1394cb73c66a9feaL,0x27f31ed5c6be7b23L,
+ 0xf4c88f365aa6f8feL },
+ { 0x0fb0721f4aaa499eL,0x68b3a7d5e3fb2a6bL,0xa788097d3a92851dL,
+ 0x060e7f8ae96f4913L } },
+ /* 10 << 175 */
+ { { 0x82eebe731a3a93bcL,0x42bbf465a21adc1aL,0xc10b6fa4ef030efdL,
+ 0x247aa4c787b097bbL },
+ { 0x8b8dc632f60c77daL,0x6ffbc26ac223523eL,0xa4f6ff11344579cfL,
+ 0x5825653c980250f6L } },
+ /* 11 << 175 */
+ { { 0xb2dd097ebc1aa2b9L,0x0788939337a0333aL,0x1cf55e7137a0db38L,
+ 0x2648487f792c1613L },
+ { 0xdad013363fcef261L,0x6239c81d0eabf129L,0x8ee761de9d276be2L,
+ 0x406a7a341eda6ad3L } },
+ /* 12 << 175 */
+ { { 0x4bf367ba4a493b31L,0x54f20a529bf7f026L,0xb696e0629795914bL,
+ 0xcddab96d8bf236acL },
+ { 0x4ff2c70aed25ea13L,0xfa1d09eb81cbbbe7L,0x88fc8c87468544c5L,
+ 0x847a670d696b3317L } },
+ /* 13 << 175 */
+ { { 0xf133421e64bcb626L,0xaea638c826dee0b5L,0xd6e7680bb310346cL,
+ 0xe06f4097d5d4ced3L },
+ { 0x099614527512a30bL,0xf3d867fde589a59aL,0x2e73254f52d0c180L,
+ 0x9063d8a3333c74acL } },
+ /* 14 << 175 */
+ { { 0xeda6c595d314e7bcL,0x2ee7464b467899edL,0x1cef423c0a1ed5d3L,
+ 0x217e76ea69cc7613L },
+ { 0x27ccce1fe7cda917L,0x12d8016b8a893f16L,0xbcd6de849fc74f6bL,
+ 0xfa5817e2f3144e61L } },
+ /* 15 << 175 */
+ { { 0x1f3541640821ee4cL,0x1583eab40bc61992L,0x7490caf61d72879fL,
+ 0x998ad9f3f76ae7b2L },
+ { 0x1e181950a41157f7L,0xa9d7e1e6e8da3a7eL,0x963784eb8426b95fL,
+ 0x0ee4ed6e542e2a10L } },
+ /* 16 << 175 */
+ { { 0xb79d4cc5ac751e7bL,0x93f96472fd4211bdL,0x8c72d3d2c8de4fc6L,
+ 0x7b69cbf5df44f064L },
+ { 0x3da90ca2f4bf94e1L,0x1a5325f8f12894e2L,0x0a437f6c7917d60bL,
+ 0x9be7048696c9cb5dL } },
+ /* 17 << 175 */
+ { { 0xb4d880bfe1dc5c05L,0xd738addaeebeeb57L,0x6f0119d3df0fe6a3L,
+ 0x5c686e5566eaaf5aL },
+ { 0x9cb10b50dfd0b7ecL,0xbdd0264b6a497c21L,0xfc0935148c546c96L,
+ 0x58a947fa79dbf42aL } },
+ /* 18 << 175 */
+ { { 0xc0b48d4e49ccd6d7L,0xff8fb02c88bd5580L,0xc75235e907d473b2L,
+ 0x4fab1ac5a2188af3L },
+ { 0x030fa3bc97576ec0L,0xe8c946e80b7e7d2fL,0x40a5c9cc70305600L,
+ 0x6d8260a9c8b013b4L } },
+ /* 19 << 175 */
+ { { 0x0368304f70bba85cL,0xad090da1a4a0d311L,0x7170e8702415eec1L,
+ 0xbfba35fe8461ea47L },
+ { 0x6279019ac1e91938L,0xa47638f31afc415fL,0x36c65cbbbcba0e0fL,
+ 0x02160efb034e2c48L } },
+ /* 20 << 175 */
+ { { 0xe6c51073615cd9e4L,0x498ec047f1243c06L,0x3e5a8809b17b3d8cL,
+ 0x5cd99e610cc565f1L },
+ { 0x81e312df7851dafeL,0xf156f5baa79061e2L,0x80d62b71880c590eL,
+ 0xbec9746f0a39faa1L } },
+ /* 21 << 175 */
+ { { 0x1d98a9c1c8ed1f7aL,0x09e43bb5a81d5ff2L,0xd5f00f680da0794aL,
+ 0x412050d9661aa836L },
+ { 0xa89f7c4e90747e40L,0x6dc05ebbb62a3686L,0xdf4de847308e3353L,
+ 0x53868fbb9fb53bb9L } },
+ /* 22 << 175 */
+ { { 0x2b09d2c3cfdcf7ddL,0x41a9fce3723fcab4L,0x73d905f707f57ca3L,
+ 0x080f9fb1ac8e1555L },
+ { 0x7c088e849ba7a531L,0x07d35586ed9a147fL,0x602846abaf48c336L,
+ 0x7320fd320ccf0e79L } },
+ /* 23 << 175 */
+ { { 0xaa780798b18bd1ffL,0x52c2e300afdd2905L,0xf27ea3d6434267cdL,
+ 0x8b96d16d15605b5fL },
+ { 0x7bb310494b45706bL,0xe7f58b8e743d25f8L,0xe9b5e45b87f30076L,
+ 0xd19448d65d053d5aL } },
+ /* 24 << 175 */
+ { { 0x1ecc8cb9d3210a04L,0x6bc7d463dafb5269L,0x3e59b10a67c3489fL,
+ 0x1769788c65641e1bL },
+ { 0x8a53b82dbd6cb838L,0x7066d6e6236d5f22L,0x03aa1c616908536eL,
+ 0xc971da0d66ae9809L } },
+ /* 25 << 175 */
+ { { 0x01b3a86bc49a2facL,0x3b8420c03092e77aL,0x020573007d6fb556L,
+ 0x6941b2a1bff40a87L },
+ { 0x140b63080658ff2aL,0x878043633424ab36L,0x0253bd515751e299L,
+ 0xc75bcd76449c3e3aL } },
+ /* 26 << 175 */
+ { { 0x92eb40907f8f875dL,0x9c9d754e56c26bbfL,0x158cea618110bbe7L,
+ 0x62a6b802745f91eaL },
+ { 0xa79c41aac6e7394bL,0x445b6a83ad57ef10L,0x0c5277eb6ea6f40cL,
+ 0x319fe96b88633365L } },
+ /* 27 << 175 */
+ { { 0x0b0fc61f385f63cbL,0x41250c8422bdd127L,0x67d153f109e942c2L,
+ 0x60920d08c021ad5dL },
+ { 0x229f5746724d81a5L,0xb7ffb8925bba3299L,0x518c51a1de413032L,
+ 0x2a9bfe773c2fd94cL } },
+ /* 28 << 175 */
+ { { 0xcbcde2393191f4fdL,0x43093e16d3d6ada1L,0x184579f358769606L,
+ 0x2c94a8b3d236625cL },
+ { 0x6922b9c05c437d8eL,0x3d4ae423d8d9f3c8L,0xf72c31c12e7090a2L,
+ 0x4ac3f5f3d76a55bdL } },
+ /* 29 << 175 */
+ { { 0x342508fc6b6af991L,0x0d5271001b5cebbdL,0xb84740d0dd440dd7L,
+ 0x748ef841780162fdL },
+ { 0xa8dbfe0edfc6fafbL,0xeadfdf05f7300f27L,0x7d06555ffeba4ec9L,
+ 0x12c56f839e25fa97L } },
+ /* 30 << 175 */
+ { { 0x77f84203d39b8c34L,0xed8b1be63125eddbL,0x5bbf2441f6e39dc5L,
+ 0xb00f6ee66a5d678aL },
+ { 0xba456ecf57d0ea99L,0xdcae0f5817e06c43L,0x01643de40f5b4baaL,
+ 0x2c324341d161b9beL } },
+ /* 31 << 175 */
+ { { 0x80177f55e126d468L,0xed325f1f76748e09L,0x6116004acfa9bdc2L,
+ 0x2d8607e63a9fb468L },
+ { 0x0e573e276009d660L,0x3a525d2e8d10c5a1L,0xd26cb45c3b9009a0L,
+ 0xb6b0cdc0de9d7448L } },
+ /* 32 << 175 */
+ { { 0x949c9976e1337c26L,0x6faadebdd73d68e5L,0x9e158614f1b768d9L,
+ 0x22dfa5579cc4f069L },
+ { 0xccd6da17be93c6d6L,0x24866c61a504f5b9L,0x2121353c8d694da1L,
+ 0x1c6ca5800140b8c6L } },
+ /* 33 << 175 */
+ { { 0xc245ad8ce964021eL,0xb83bffba032b82b3L,0xfaa220c647ef9898L,
+ 0x7e8d3ac6982c948aL },
+ { 0x1faa2091bc2d124aL,0xbd54c3dd05b15ff4L,0x386bf3abc87c6fb7L,
+ 0xfb2b0563fdeb6f66L } },
+ /* 34 << 175 */
+ { { 0x4e77c5575b45afb4L,0xe9ded649efb8912dL,0x7ec9bbf542f6e557L,
+ 0x2570dfff62671f00L },
+ { 0x2b3bfb7888e084bdL,0xa024b238f37fe5b4L,0x44e7dc0495649aeeL,
+ 0x498ca2555e7ec1d8L } },
+ /* 35 << 175 */
+ { { 0x3bc766eaaaa07e86L,0x0db6facbf3608586L,0xbadd2549bdc259c8L,
+ 0x95af3c6e041c649fL },
+ { 0xb36a928c02e30afbL,0x9b5356ad008a88b8L,0x4b67a5f1cf1d9e9dL,
+ 0xc6542e47a5d8d8ceL } },
+ /* 36 << 175 */
+ { { 0x73061fe87adfb6ccL,0xcc826fd398678141L,0x00e758b13c80515aL,
+ 0x6afe324741485083L },
+ { 0x0fcb08b9b6ae8a75L,0xb8cf388d4acf51e1L,0x344a55606961b9d6L,
+ 0x1a6778b86a97fd0cL } },
+ /* 37 << 175 */
+ { { 0xd840fdc1ecc4c7e3L,0xde9fe47d16db68ccL,0xe95f89dea3e216aaL,
+ 0x84f1a6a49594a8beL },
+ { 0x7ddc7d725a7b162bL,0xc5cfda19adc817a3L,0x80a5d35078b58d46L,
+ 0x93365b1382978f19L } },
+ /* 38 << 175 */
+ { { 0x2e44d22526a1fc90L,0x0d6d10d24d70705dL,0xd94b6b10d70c45f4L,
+ 0x0f201022b216c079L },
+ { 0xcec966c5658fde41L,0xa8d2bc7d7e27601dL,0xbfcce3e1ff230be7L,
+ 0x3394ff6b0033ffb5L } },
+ /* 39 << 175 */
+ { { 0xd890c5098132c9afL,0xaac4b0eb361e7868L,0x5194ded3e82d15aaL,
+ 0x4550bd2e23ae6b7dL },
+ { 0x3fda318eea5399d4L,0xd989bffa91638b80L,0x5ea124d0a14aa12dL,
+ 0x1fb1b8993667b944L } },
+ /* 40 << 175 */
+ { { 0x95ec796944c44d6aL,0x91df144a57e86137L,0x915fd62073adac44L,
+ 0x8f01732d59a83801L },
+ { 0xec579d253aa0a633L,0x06de5e7cc9d6d59cL,0xc132f958b1ef8010L,
+ 0x29476f96e65c1a02L } },
+ /* 41 << 175 */
+ { { 0x336a77c0d34c3565L,0xef1105b21b9f1e9eL,0x63e6d08bf9e08002L,
+ 0x9aff2f21c613809eL },
+ { 0xb5754f853a80e75dL,0xde71853e6bbda681L,0x86f041df8197fd7aL,
+ 0x8b332e08127817faL } },
+ /* 42 << 175 */
+ { { 0x05d99be8b9c20cdaL,0x89f7aad5d5cd0c98L,0x7ef936fe5bb94183L,
+ 0x92ca0753b05cd7f2L },
+ { 0x9d65db1174a1e035L,0x02628cc813eaea92L,0xf2d9e24249e4fbf2L,
+ 0x94fdfd9be384f8b7L } },
+ /* 43 << 175 */
+ { { 0x65f5605463428c6bL,0x2f7205b290b409a5L,0xf778bb78ff45ae11L,
+ 0xa13045bec5ee53b2L },
+ { 0xe00a14ff03ef77feL,0x689cd59fffef8befL,0x3578f0ed1e9ade22L,
+ 0xe99f3ec06268b6a8L } },
+ /* 44 << 175 */
+ { { 0xa2057d91ea1b3c3eL,0x2d1a7053b8823a4aL,0xabbb336a2cca451eL,
+ 0xcd2466e32218bb5dL },
+ { 0x3ac1f42fc8cb762dL,0x7e312aae7690211fL,0xebb9bd7345d07450L,
+ 0x207c4b8246c2213fL } },
+ /* 45 << 175 */
+ { { 0x99d425c1375913ecL,0x94e45e9667908220L,0xc08f3087cd67dbf6L,
+ 0xa5670fbec0887056L },
+ { 0x6717b64a66f5b8fcL,0xd5a56aea786fec28L,0xa8c3f55fc0ff4952L,
+ 0xa77fefae457ac49bL } },
+ /* 46 << 175 */
+ { { 0x29882d7c98379d44L,0xd000bdfb509edc8aL,0xc6f95979e66fe464L,
+ 0x504a6115fa61bde0L },
+ { 0x56b3b871effea31aL,0x2d3de26df0c21a54L,0x21dbff31834753bfL,
+ 0xe67ecf4969269d86L } },
+ /* 47 << 175 */
+ { { 0x7a176952151fe690L,0x035158047f2adb5fL,0xee794b15d1b62a8dL,
+ 0xf004ceecaae454e6L },
+ { 0x0897ea7cf0386facL,0x3b62ff12d1fca751L,0x154181df1b7a04ecL,
+ 0x2008e04afb5847ecL } },
+ /* 48 << 175 */
+ { { 0xd147148e41dbd772L,0x2b419f7322942654L,0x669f30d3e9c544f7L,
+ 0x52a2c223c8540149L },
+ { 0x5da9ee14634dfb02L,0x5f074ff0f47869f3L,0x74ee878da3933accL,
+ 0xe65106514fe35ed1L } },
+ /* 49 << 175 */
+ { { 0xb3eb9482f1012e7aL,0x51013cc0a8a566aeL,0xdd5e924347c00d3bL,
+ 0x7fde089d946bb0e5L },
+ { 0x030754fec731b4b3L,0x12a136a499fda062L,0x7c1064b85a1a35bcL,
+ 0xbf1f5763446c84efL } },
+ /* 50 << 175 */
+ { { 0xed29a56da16d4b34L,0x7fba9d09dca21c4fL,0x66d7ac006d8de486L,
+ 0x6006198773a2a5e1L },
+ { 0x8b400f869da28ff0L,0x3133f70843c4599cL,0x9911c9b8ee28cb0dL,
+ 0xcd7e28748e0af61dL } },
+ /* 51 << 175 */
+ { { 0x5a85f0f272ed91fcL,0x85214f319cd4a373L,0x881fe5be1925253cL,
+ 0xd8dc98e091e8bc76L },
+ { 0x7120affe585cc3a2L,0x724952ed735bf97aL,0x5581e7dc3eb34581L,
+ 0x5cbff4f2e52ee57dL } },
+ /* 52 << 175 */
+ { { 0x8d320a0e87d8cc7bL,0x9beaa7f3f1d280d0L,0x7a0b95719beec704L,
+ 0x9126332e5b7f0057L },
+ { 0x01fbc1b48ed3bd6dL,0x35bb2c12d945eb24L,0x6404694e9a8ae255L,
+ 0xb6092eec8d6abfb3L } },
+ /* 53 << 175 */
+ { { 0x4d76143fcc058865L,0x7b0a5af26e249922L,0x8aef94406a50d353L,
+ 0xe11e4bcc64f0e07aL },
+ { 0x4472993aa14a90faL,0x7706e20cba0c51d4L,0xf403292f1532672dL,
+ 0x52573bfa21829382L } },
+ /* 54 << 175 */
+ { { 0x6a7bb6a93b5bdb83L,0x08da65c0a4a72318L,0xc58d22aa63eb065fL,
+ 0x1717596c1b15d685L },
+ { 0x112df0d0b266d88bL,0xf688ae975941945aL,0x487386e37c292cacL,
+ 0x42f3b50d57d6985cL } },
+ /* 55 << 175 */
+ { { 0x6da4f9986a90fc34L,0xc8f257d365ca8a8dL,0xc2feabca6951f762L,
+ 0xe1bc81d074c323acL },
+ { 0x1bc68f67251a2a12L,0x10d86587be8a70dcL,0xd648af7ff0f84d2eL,
+ 0xf0aa9ebc6a43ac92L } },
+ /* 56 << 175 */
+ { { 0x69e3be0427596893L,0xb6bb02a645bf452bL,0x0875c11af4c698c8L,
+ 0x6652b5c7bece3794L },
+ { 0x7b3755fd4f5c0499L,0x6ea16558b5532b38L,0xd1c69889a2e96ef7L,
+ 0x9c773c3a61ed8f48L } },
+ /* 57 << 175 */
+ { { 0x2b653a409b323abcL,0xe26605e1f0e1d791L,0x45d410644a87157aL,
+ 0x8f9a78b7cbbce616L },
+ { 0xcf1e44aac407edddL,0x81ddd1d8a35b964fL,0x473e339efd083999L,
+ 0x6c94bdde8e796802L } },
+ /* 58 << 175 */
+ { { 0x5a304ada8545d185L,0x82ae44ea738bb8cbL,0x628a35e3df87e10eL,
+ 0xd3624f3da15b9fe3L },
+ { 0xcc44209b14be4254L,0x7d0efcbcbdbc2ea5L,0x1f60336204c37bbeL,
+ 0x21f363f556a5852cL } },
+ /* 59 << 175 */
+ { { 0xa1503d1ca8501550L,0x2251e0e1d8ab10bbL,0xde129c966961c51cL,
+ 0x1f7246a481910f68L },
+ { 0x2eb744ee5f2591f2L,0x3c47d33f5e627157L,0x4d6d62c922f3bd68L,
+ 0x6120a64bcb8df856L } },
+ /* 60 << 175 */
+ { { 0x3a9ac6c07b5d07dfL,0xa92b95587ef39783L,0xe128a134ab3a9b4fL,
+ 0x41c18807b1252f05L },
+ { 0xfc7ed08980ba9b1cL,0xac8dc6dec532a9ddL,0xbf829cef55246809L,
+ 0x101b784f5b4ee80fL } },
+ /* 61 << 175 */
+ { { 0xc09945bbb6f11603L,0x57b09dbe41d2801eL,0xfba5202fa97534a8L,
+ 0x7fd8ae5fc17b9614L },
+ { 0xa50ba66678308435L,0x9572f77cd3868c4dL,0x0cef7bfd2dd7aab0L,
+ 0xe7958e082c7c79ffL } },
+ /* 62 << 175 */
+ { { 0x81262e4225346689L,0x716da290b07c7004L,0x35f911eab7950ee3L,
+ 0x6fd72969261d21b5L },
+ { 0x5238980308b640d3L,0x5b0026ee887f12a1L,0x20e21660742e9311L,
+ 0x0ef6d5415ff77ff7L } },
+ /* 63 << 175 */
+ { { 0x969127f0f9c41135L,0xf21d60c968a64993L,0x656e5d0ce541875cL,
+ 0xf1e0f84ea1d3c233L },
+ { 0x9bcca35906002d60L,0xbe2da60c06191552L,0x5da8bbae61181ec3L,
+ 0x9f04b82365806f19L } },
+ /* 64 << 175 */
+ { { 0xf1604a7dd4b79bb8L,0xaee806fb52c878c8L,0x34144f118d47b8e8L,
+ 0x72edf52b949f9054L },
+ { 0xebfca84e2127015aL,0x9051d0c09cb7cef3L,0x86e8fe58296deec8L,
+ 0x33b2818841010d74L } },
+ /* 0 << 182 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 182 */
+ { { 0x01079383171b445fL,0x9bcf21e38131ad4cL,0x8cdfe205c93987e8L,
+ 0xe63f4152c92e8c8fL },
+ { 0x729462a930add43dL,0x62ebb143c980f05aL,0x4f3954e53b06e968L,
+ 0xfe1d75ad242cf6b1L } },
+ /* 2 << 182 */
+ { { 0x5f95c6c7af8685c8L,0xd4c1c8ce2f8f01aaL,0xc44bbe322574692aL,
+ 0xb8003478d4a4a068L },
+ { 0x7c8fc6e52eca3cdbL,0xea1db16bec04d399L,0xb05bc82e8f2bc5cfL,
+ 0x763d517ff44793d2L } },
+ /* 3 << 182 */
+ { { 0x4451c1b808bd98d0L,0x644b1cd46575f240L,0x6907eb337375d270L,
+ 0x56c8bebdfa2286bdL },
+ { 0xc713d2acc4632b46L,0x17da427aafd60242L,0x313065b7c95c7546L,
+ 0xf8239898bf17a3deL } },
+ /* 4 << 182 */
+ { { 0xf3b7963f4c830320L,0x842c7aa0903203e3L,0xaf22ca0ae7327afbL,
+ 0x38e13092967609b6L },
+ { 0x73b8fb62757558f1L,0x3cc3e831f7eca8c1L,0xe4174474f6331627L,
+ 0xa77989cac3c40234L } },
+ /* 5 << 182 */
+ { { 0xe5fd17a144a081e0L,0xd797fb7db70e296aL,0x2b472b30481f719cL,
+ 0x0e632a98fe6f8c52L },
+ { 0x89ccd116c5f0c284L,0xf51088af2d987c62L,0x2a2bccda4c2de6cfL,
+ 0x810f9efef679f0f9L } },
+ /* 6 << 182 */
+ { { 0xb0f394b97ffe4b3eL,0x0b691d21e5fa5d21L,0xb0bd77479dfbbc75L,
+ 0xd2830fdafaf78b00L },
+ { 0xf78c249c52434f57L,0x4b1f754598096dabL,0x73bf6f948ff8c0b3L,
+ 0x34aef03d454e134cL } },
+ /* 7 << 182 */
+ { { 0xf8d151f4b7ac7ec5L,0xd6ceb95ae50da7d5L,0xa1b492b0dc3a0eb8L,
+ 0x75157b69b3dd2863L },
+ { 0xe2c4c74ec5413d62L,0xbe329ff7bc5fc4c7L,0x835a2aea60fa9ddaL,
+ 0xf117f5ad7445cb87L } },
+ /* 8 << 182 */
+ { { 0xae8317f4b0166f7aL,0xfbd3e3f7ceec74e6L,0xfdb516ace0874bfdL,
+ 0x3d846019c681f3a3L },
+ { 0x0b12ee5c7c1620b0L,0xba68b4dd2b63c501L,0xac03cd326668c51eL,
+ 0x2a6279f74e0bcb5bL } },
+ /* 9 << 182 */
+ { { 0x17bd69b06ae85c10L,0x729469791dfdd3a6L,0xd9a032682c078becL,
+ 0x41c6a658bfd68a52L },
+ { 0xcdea10240e023900L,0xbaeec121b10d144dL,0x5a600e74058ab8dcL,
+ 0x1333af21bb89ccddL } },
+ /* 10 << 182 */
+ { { 0xdf25eae03aaba1f1L,0x2cada16e3b7144cfL,0x657ee27d71ab98bcL,
+ 0x99088b4c7a6fc96eL },
+ { 0x05d5c0a03549dbd4L,0x42cbdf8ff158c3acL,0x3fb6b3b087edd685L,
+ 0x22071cf686f064d0L } },
+ /* 11 << 182 */
+ { { 0xd2d6721fff2811e5L,0xdb81b703fe7fae8cL,0x3cfb74efd3f1f7bbL,
+ 0x0cdbcd7616cdeb5dL },
+ { 0x4f39642a566a808cL,0x02b74454340064d6L,0xfabbadca0528fa6fL,
+ 0xe4c3074cd3fc0bb6L } },
+ /* 12 << 182 */
+ { { 0xb32cb8b0b796d219L,0xc3e95f4f34741dd9L,0x8721212568edf6f5L,
+ 0x7a03aee4a2b9cb8eL },
+ { 0x0cd3c376f53a89aaL,0x0d8af9b1948a28dcL,0xcf86a3f4902ab04fL,
+ 0x8aacb62a7f42002dL } },
+ /* 13 << 182 */
+ { { 0x106985ebf62ffd52L,0xe670b54e5797bf10L,0x4b405209c5e30aefL,
+ 0x12c97a204365b5e9L },
+ { 0x104646ce1fe32093L,0x13cb4ff63907a8c9L,0x8b9f30d1d46e726bL,
+ 0xe1985e21aba0f499L } },
+ /* 14 << 182 */
+ { { 0xc573dea910a230cdL,0x24f46a93cd30f947L,0xf2623fcfabe2010aL,
+ 0x3f278cb273f00e4fL },
+ { 0xed55c67d50b920ebL,0xf1cb9a2d8e760571L,0x7c50d1090895b709L,
+ 0x4207cf07190d4369L } },
+ /* 15 << 182 */
+ { { 0x3b027e81c4127fe1L,0xa9f8b9ad3ae9c566L,0x5ab10851acbfbba5L,
+ 0xa747d648569556f5L },
+ { 0xcc172b5c2ba97bf7L,0x15e0f77dbcfa3324L,0xa345b7977686279dL,
+ 0x5a723480e38003d3L } },
+ /* 16 << 182 */
+ { { 0xfd8e139f8f5fcda8L,0xf3e558c4bdee5bfdL,0xd76cbaf4e33f9f77L,
+ 0x3a4c97a471771969L },
+ { 0xda27e84bf6dce6a7L,0xff373d9613e6c2d1L,0xf115193cd759a6e9L,
+ 0x3f9b702563d2262cL } },
+ /* 17 << 182 */
+ { { 0xd9764a31317cd062L,0x30779d8e199f8332L,0xd807410616b11b0bL,
+ 0x7917ab9f78aeaed8L },
+ { 0xb67a9cbe28fb1d8eL,0x2e313563136eda33L,0x010b7069a371a86cL,
+ 0x44d90fa26744e6b7L } },
+ /* 18 << 182 */
+ { { 0x68190867d6b3e243L,0x9fe6cd9d59048c48L,0xb900b02895731538L,
+ 0xa012062f32cae04fL },
+ { 0x8107c8bc9399d082L,0x47e8c54a41df12e2L,0x14ba5117b6ef3f73L,
+ 0x22260bea81362f0bL } },
+ /* 19 << 182 */
+ { { 0x90ea261e1a18cc20L,0x2192999f2321d636L,0xef64d314e311b6a0L,
+ 0xd7401e4c3b54a1f5L },
+ { 0x190199836fbca2baL,0x46ad32938fbffc4bL,0xa142d3f63786bf40L,
+ 0xeb5cbc26b67039fcL } },
+ /* 20 << 182 */
+ { { 0x9cb0ae6c252bd479L,0x05e0f88a12b5848fL,0x78f6d2b2a5c97663L,
+ 0x6f6e149bc162225cL },
+ { 0xe602235cde601a89L,0xd17bbe98f373be1fL,0xcaf49a5ba8471827L,
+ 0x7e1a0a8518aaa116L } },
+ /* 21 << 182 */
+ { { 0x6c833196270580c3L,0x1e233839f1c98a14L,0x67b2f7b4ae34e0a5L,
+ 0x47ac8745d8ce7289L },
+ { 0x2b74779a100dd467L,0x274a43374ee50d09L,0x603dcf1383608bc9L,
+ 0xcd9da6c3c89e8388L } },
+ /* 22 << 182 */
+ { { 0x2660199f355116acL,0xcc38bb59b6d18eedL,0x3075f31f2f4bc071L,
+ 0x9774457f265dc57eL },
+ { 0x06a6a9c8c6db88bbL,0x6429d07f4ec98e04L,0x8d05e57b05ecaa8bL,
+ 0x20f140b17872ea7bL } },
+ /* 23 << 182 */
+ { { 0xdf8c0f09ca494693L,0x48d3a020f252e909L,0x4c5c29af57b14b12L,
+ 0x7e6fa37dbf47ad1cL },
+ { 0x66e7b50649a0c938L,0xb72c0d486be5f41fL,0x6a6242b8b2359412L,
+ 0xcd35c7748e859480L } },
+ /* 24 << 182 */
+ { { 0x12536fea87baa627L,0x58c1fec1f72aa680L,0x6c29b637601e5dc9L,
+ 0x9e3c3c1cde9e01b9L },
+ { 0xefc8127b2bcfe0b0L,0x351071022a12f50dL,0x6ccd6cb14879b397L,
+ 0xf792f804f8a82f21L } },
+ /* 25 << 182 */
+ { { 0x509d4804a9b46402L,0xedddf85dc10f0850L,0x928410dc4b6208aaL,
+ 0xf6229c46391012dcL },
+ { 0xc5a7c41e7727b9b6L,0x289e4e4baa444842L,0x049ba1d9e9a947eaL,
+ 0x44f9e47f83c8debcL } },
+ /* 26 << 182 */
+ { { 0xfa77a1fe611f8b8eL,0xfd2e416af518f427L,0xc5fffa70114ebac3L,
+ 0xfe57c4e95d89697bL },
+ { 0xfdd053acb1aaf613L,0x31df210fea585a45L,0x318cc10e24985034L,
+ 0x1a38efd15f1d6130L } },
+ /* 27 << 182 */
+ { { 0xbf86f2370b1e9e21L,0xb258514d1dbe88aaL,0x1e38a58890c1baf9L,
+ 0x2936a01ebdb9b692L },
+ { 0xd576de986dd5b20cL,0xb586bf7170f98ecfL,0xcccf0f12c42d2fd7L,
+ 0x8717e61cfb35bd7bL } },
+ /* 28 << 182 */
+ { { 0x8b1e572235e6fc06L,0x3477728f0b3e13d5L,0x150c294daa8a7372L,
+ 0xc0291d433bfa528aL },
+ { 0xc6c8bc67cec5a196L,0xdeeb31e45c2e8a7cL,0xba93e244fb6e1c51L,
+ 0xb9f8b71b2e28e156L } },
+ /* 29 << 182 */
+ { { 0xce65a287968a2ab9L,0xe3c5ce6946bbcb1fL,0xf8c835b9e7ae3f30L,
+ 0x16bbee26ff72b82bL },
+ { 0x665e2017fd42cd22L,0x1e139970f8b1d2a0L,0x125cda2979204932L,
+ 0x7aee94a549c3bee5L } },
+ /* 30 << 182 */
+ { { 0x68c7016089821a66L,0xf7c376788f981669L,0xd90829fc48cc3645L,
+ 0x346af049d70addfcL },
+ { 0x2057b232370bf29cL,0xf90c73ce42e650eeL,0xe03386eaa126ab90L,
+ 0x0e266e7e975a087bL } },
+ /* 31 << 182 */
+ { { 0x80578eb90fca65d9L,0x7e2989ea16af45b8L,0x7438212dcac75a4eL,
+ 0x38c7ca394fef36b8L },
+ { 0x8650c494d402676aL,0x26ab5a66f72c7c48L,0x4e6cb426ce3a464eL,
+ 0xf8f998962b72f841L } },
+ /* 32 << 182 */
+ { { 0x8c3184911a335cc8L,0x563459ba6a5913e4L,0x1b920d61c7b32919L,
+ 0x805ab8b6a02425adL },
+ { 0x2ac512da8d006086L,0x6ca4846abcf5c0fdL,0xafea51d8ac2138d7L,
+ 0xcb647545344cd443L } },
+ /* 33 << 182 */
+ { { 0x0429ee8fbd7d9040L,0xee66a2de819b9c96L,0x54f9ec25dea7d744L,
+ 0x2ffea642671721bbL },
+ { 0x4f19dbd1114344eaL,0x04304536fd0dbc8bL,0x014b50aa29ec7f91L,
+ 0xb5fc22febb06014dL } },
+ /* 34 << 182 */
+ { { 0x60d963a91ee682e0L,0xdf48abc0fe85c727L,0x0cadba132e707c2dL,
+ 0xde608d3aa645aeffL },
+ { 0x05f1c28bedafd883L,0x3c362edebd94de1fL,0x8dd0629d13593e41L,
+ 0x0a5e736f766d6eafL } },
+ /* 35 << 182 */
+ { { 0xbfa92311f68cf9d1L,0xa4f9ef87c1797556L,0x10d75a1f5601c209L,
+ 0x651c374c09b07361L },
+ { 0x49950b5888b5ceadL,0x0ef000586fa9dbaaL,0xf51ddc264e15f33aL,
+ 0x1f8b5ca62ef46140L } },
+ /* 36 << 182 */
+ { { 0x343ac0a3ee9523f0L,0xbb75eab2975ea978L,0x1bccf332107387f4L,
+ 0x790f92599ab0062eL },
+ { 0xf1a363ad1e4f6a5fL,0x06e08b8462519a50L,0x609151877265f1eeL,
+ 0x6a80ca3493ae985eL } },
+ /* 37 << 182 */
+ { { 0x81b29768aaba4864L,0xb13cabf28d52a7d6L,0xb5c363488ead03f1L,
+ 0xc932ad9581c7c1c0L },
+ { 0x5452708ecae1e27bL,0x9dac42691b0df648L,0x233e3f0cdfcdb8bcL,
+ 0xe6ceccdfec540174L } },
+ /* 38 << 182 */
+ { { 0xbd0d845e95081181L,0xcc8a7920699355d5L,0x111c0f6dc3b375a8L,
+ 0xfd95bc6bfd51e0dcL },
+ { 0x4a106a266888523aL,0x4d142bd6cb01a06dL,0x79bfd289adb9b397L,
+ 0x0bdbfb94e9863914L } },
+ /* 39 << 182 */
+ { { 0x29d8a2291660f6a6L,0x7f6abcd6551c042dL,0x13039deb0ac3ffe8L,
+ 0xa01be628ec8523fbL },
+ { 0x6ea341030ca1c328L,0xc74114bdb903928eL,0x8aa4ff4e9e9144b0L,
+ 0x7064091f7f9a4b17L } },
+ /* 40 << 182 */
+ { { 0xa3f4f521e447f2c4L,0x81b8da7a604291f0L,0xd680bc467d5926deL,
+ 0x84f21fd534a1202fL },
+ { 0x1d1e31814e9df3d8L,0x1ca4861a39ab8d34L,0x809ddeec5b19aa4aL,
+ 0x59f72f7e4d329366L } },
+ /* 41 << 182 */
+ { { 0xa2f93f41386d5087L,0x40bf739cdd67d64fL,0xb449420566702158L,
+ 0xc33c65be73b1e178L },
+ { 0xcdcd657c38ca6153L,0x97f4519adc791976L,0xcc7c7f29cd6e1f39L,
+ 0x38de9cfb7e3c3932L } },
+ /* 42 << 182 */
+ { { 0xe448eba37b793f85L,0xe9f8dbf9f067e914L,0xc0390266f114ae87L,
+ 0x39ed75a7cd6a8e2aL },
+ { 0xadb148487ffba390L,0x67f8cb8b6af9bc09L,0x322c38489c7476dbL,
+ 0xa320fecf52a538d6L } },
+ /* 43 << 182 */
+ { { 0xe0493002b2aced2bL,0xdfba1809616bd430L,0x531c4644c331be70L,
+ 0xbc04d32e90d2e450L },
+ { 0x1805a0d10f9f142dL,0x2c44a0c547ee5a23L,0x31875a433989b4e3L,
+ 0x6b1949fd0c063481L } },
+ /* 44 << 182 */
+ { { 0x2dfb9e08be0f4492L,0x3ff0da03e9d5e517L,0x03dbe9a1f79466a8L,
+ 0x0b87bcd015ea9932L },
+ { 0xeb64fc83ab1f58abL,0x6d9598da817edc8aL,0x699cff661d3b67e5L,
+ 0x645c0f2992635853L } },
+ /* 45 << 182 */
+ { { 0x253cdd82eabaf21cL,0x82b9602a2241659eL,0x2cae07ec2d9f7091L,
+ 0xbe4c720c8b48cd9bL },
+ { 0x6ce5bc036f08d6c9L,0x36e8a997af10bf40L,0x83422d213e10ff12L,
+ 0x7b26d3ebbcc12494L } },
+ /* 46 << 182 */
+ { { 0xb240d2d0c9469ad6L,0xc4a11b4d30afa05bL,0x4b604acedd6ba286L,
+ 0x184866003ee2864cL },
+ { 0x5869d6ba8d9ce5beL,0x0d8f68c5ff4bfb0dL,0xb69f210b5700cf73L,
+ 0x61f6653a6d37c135L } },
+ /* 47 << 182 */
+ { { 0xff3d432b5aff5a48L,0x0d81c4b972ba3a69L,0xee879ae9fa1899efL,
+ 0xbac7e2a02d6acafdL },
+ { 0xd6d93f6c1c664399L,0x4c288de15bcb135dL,0x83031dab9dab7cbfL,
+ 0xfe23feb03abbf5f0L } },
+ /* 48 << 182 */
+ { { 0x9f1b2466cdedca85L,0x140bb7101a09538cL,0xac8ae8515e11115dL,
+ 0x0d63ff676f03f59eL },
+ { 0x755e55517d234afbL,0x61c2db4e7e208fc1L,0xaa9859cef28a4b5dL,
+ 0xbdd6d4fc34af030fL } },
+ /* 49 << 182 */
+ { { 0xd1c4a26d3be01cb1L,0x9ba14ffc243aa07cL,0xf95cd3a9b2503502L,
+ 0xe379bc067d2a93abL },
+ { 0x3efc18e9d4ca8d68L,0x083558ec80bb412aL,0xd903b9409645a968L,
+ 0xa499f0b69ba6054fL } },
+ /* 50 << 182 */
+ { { 0x208b573cb8349abeL,0x3baab3e530b4fc1cL,0x87e978bacb524990L,
+ 0x3524194eccdf0e80L },
+ { 0x627117257d4bcc42L,0xe90a3d9bb90109baL,0x3b1bdd571323e1e0L,
+ 0xb78e9bd55eae1599L } },
+ /* 51 << 182 */
+ { { 0x0794b7469e03d278L,0x80178605d70e6297L,0x171792f899c97855L,
+ 0x11b393eef5a86b5cL },
+ { 0x48ef6582d8884f27L,0xbd44737abf19ba5fL,0x8698de4ca42062c6L,
+ 0x8975eb8061ce9c54L } },
+ /* 52 << 182 */
+ { { 0xd50e57c7d7fe71f3L,0x15342190bc97ce38L,0x51bda2de4df07b63L,
+ 0xba12aeae200eb87dL },
+ { 0xabe135d2a9b4f8f6L,0x04619d65fad6d99cL,0x4a6683a77994937cL,
+ 0x7a778c8b6f94f09aL } },
+ /* 53 << 182 */
+ { { 0x8c50862320a71b89L,0x241a2aed1c229165L,0x352be595aaf83a99L,
+ 0x9fbfee7f1562bac8L },
+ { 0xeaf658b95c4017e3L,0x1dc7f9e015120b86L,0xd84f13dd4c034d6fL,
+ 0x283dd737eaea3038L } },
+ /* 54 << 182 */
+ { { 0x197f2609cd85d6a2L,0x6ebbc345fae60177L,0xb80f031b4e12fedeL,
+ 0xde55d0c207a2186bL },
+ { 0x1fb3e37f24dcdd5aL,0x8d602da57ed191fbL,0x108fb05676023e0dL,
+ 0x70178c71459c20c0L } },
+ /* 55 << 182 */
+ { { 0xfad5a3863fe54cf0L,0xa4a3ec4f02bbb475L,0x1aa5ec20919d94d7L,
+ 0x5d3b63b5a81e4ab3L },
+ { 0x7fa733d85ad3d2afL,0xfbc586ddd1ac7a37L,0x282925de40779614L,
+ 0xfe0ffffbe74a242aL } },
+ /* 56 << 182 */
+ { { 0x3f39e67f906151e5L,0xcea27f5f55e10649L,0xdca1d4e1c17cf7b7L,
+ 0x0c326d122fe2362dL },
+ { 0x05f7ac337dd35df3L,0x0c3b7639c396dbdfL,0x0912f5ac03b7db1cL,
+ 0x9dea4b705c9ed4a9L } },
+ /* 57 << 182 */
+ { { 0x475e6e53aae3f639L,0xfaba0e7cfc278bacL,0x16f9e2219490375fL,
+ 0xaebf9746a5a7ed0aL },
+ { 0x45f9af3ff41ad5d6L,0x03c4623cb2e99224L,0x82c5bb5cb3cf56aaL,
+ 0x6431181934567ed3L } },
+ /* 58 << 182 */
+ { { 0xec57f2118be489acL,0x2821895db9a1104bL,0x610dc8756064e007L,
+ 0x8e526f3f5b20d0feL },
+ { 0x6e71ca775b645aeeL,0x3d1dcb9f800e10ffL,0x36b51162189cf6deL,
+ 0x2c5a3e306bb17353L } },
+ /* 59 << 182 */
+ { { 0xc186cd3e2a6c6fbfL,0xa74516fa4bf97906L,0x5b4b8f4b279d6901L,
+ 0x0c4e57b42b573743L },
+ { 0x75fdb229b6e386b6L,0xb46793fd99deac27L,0xeeec47eacf712629L,
+ 0xe965f3c4cbc3b2ddL } },
+ /* 60 << 182 */
+ { { 0x8dd1fb83425c6559L,0x7fc00ee60af06fdaL,0xe98c922533d956dfL,
+ 0x0f1ef3354fbdc8a2L },
+ { 0x2abb5145b79b8ea2L,0x40fd2945bdbff288L,0x6a814ac4d7185db7L,
+ 0xc4329d6fc084609aL } },
+ /* 61 << 182 */
+ { { 0xc9ba7b52ed1be45dL,0x891dd20de4cd2c74L,0x5a4d4a7f824139b1L,
+ 0x66c17716b873c710L },
+ { 0x5e5bc1412843c4e0L,0xd5ac4817b97eb5bfL,0xc0f8af54450c95c7L,
+ 0xc91b3fa0318406c5L } },
+ /* 62 << 182 */
+ { { 0x360c340aab9d97f8L,0xfb57bd0790a2d611L,0x4339ae3ca6a6f7e5L,
+ 0x9c1fcd2a2feb8a10L },
+ { 0x972bcca9c7ea7432L,0x1b0b924c308076f6L,0x80b2814a2a5b4ca5L,
+ 0x2f78f55b61ef3b29L } },
+ /* 63 << 182 */
+ { { 0xf838744ac18a414fL,0xc611eaae903d0a86L,0x94dabc162a453f55L,
+ 0xe6f2e3da14efb279L },
+ { 0x5b7a60179320dc3cL,0x692e382f8df6b5a4L,0x3f5e15e02d40fa90L,
+ 0xc87883ae643dd318L } },
+ /* 64 << 182 */
+ { { 0x511053e453544774L,0x834d0ecc3adba2bcL,0x4215d7f7bae371f5L,
+ 0xfcfd57bf6c8663bcL },
+ { 0xded2383dd6901b1dL,0x3b49fbb4b5587dc3L,0xfd44a08d07625f62L,
+ 0x3ee4d65b9de9b762L } },
+ /* 0 << 189 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 189 */
+ { { 0x64e5137d0d63d1faL,0x658fc05202a9d89fL,0x4889487450436309L,
+ 0xe9ae30f8d598da61L },
+ { 0x2ed710d1818baf91L,0xe27e9e068b6a0c20L,0x1e28dcfb1c1a6b44L,
+ 0x883acb64d6ac57dcL } },
+ /* 2 << 189 */
+ { { 0x8735728dc2c6ff70L,0x79d6122fc5dc2235L,0x23f5d00319e277f9L,
+ 0x7ee84e25dded8cc7L },
+ { 0x91a8afb063cd880aL,0x3f3ea7c63574af60L,0x0cfcdc8402de7f42L,
+ 0x62d0792fb31aa152L } },
+ /* 3 << 189 */
+ { { 0x8e1b4e438a5807ceL,0xad283893e4109a7eL,0xc30cc9cbafd59ddaL,
+ 0xf65f36c63d8d8093L },
+ { 0xdf31469ea60d32b2L,0xee93df4b3e8191c8L,0x9c1017c5355bdeb5L,
+ 0xd26231858616aa28L } },
+ /* 4 << 189 */
+ { { 0xb02c83f9dec31a21L,0x988c8b236ad9d573L,0x53e983aea57be365L,
+ 0xe968734d646f834eL },
+ { 0x9137ea8f5da6309bL,0x10f3a624c1f1ce16L,0x782a9ea2ca440921L,
+ 0xdf94739e5b46f1b5L } },
+ /* 5 << 189 */
+ { { 0x9f9be006cce85c9bL,0x360e70d6a4c7c2d3L,0x2cd5beeaaefa1e60L,
+ 0x64cf63c08c3d2b6dL },
+ { 0xfb107fa3e1cf6f90L,0xb7e937c6d5e044e6L,0x74e8ca78ce34db9fL,
+ 0x4f8b36c13e210bd0L } },
+ /* 6 << 189 */
+ { { 0x1df165a434a35ea8L,0x3418e0f74d4412f6L,0x5af1f8af518836c3L,
+ 0x42ceef4d130e1965L },
+ { 0x5560ca0b543a1957L,0xc33761e5886cb123L,0x66624b1ffe98ed30L,
+ 0xf772f4bf1090997dL } },
+ /* 7 << 189 */
+ { { 0xf4e540bb4885d410L,0x7287f8109ba5f8d7L,0x22d0d865de98dfb1L,
+ 0x49ff51a1bcfbb8a3L },
+ { 0xb6b6fa536bc3012eL,0x3d31fd72170d541dL,0x8018724f4b0f4966L,
+ 0x79e7399f87dbde07L } },
+ /* 8 << 189 */
+ { { 0x56f8410ef4f8b16aL,0x97241afec47b266aL,0x0a406b8e6d9c87c1L,
+ 0x803f3e02cd42ab1bL },
+ { 0x7f0309a804dbec69L,0xa83b85f73bbad05fL,0xc6097273ad8e197fL,
+ 0xc097440e5067adc1L } },
+ /* 9 << 189 */
+ { { 0x730eafb63524ff16L,0xd7f9b51e823fc6ceL,0x27bd0d32443e4ac0L,
+ 0x40c59ad94d66f217L },
+ { 0x6c33136f17c387a4L,0x5043b8d5eb86804dL,0x74970312675a73c9L,
+ 0x838fdb31f16669b6L } },
+ /* 10 << 189 */
+ { { 0xc507b6dd418e7dddL,0x39888d93472f19d6L,0x7eae26be0c27eb4dL,
+ 0x17b53ed3fbabb884L },
+ { 0xfc27021b2b01ae4fL,0x88462e87cf488682L,0xbee096ec215e2d87L,
+ 0xeb2fea9ad242e29bL } },
+ /* 11 << 189 */
+ { { 0x5d985b5fb821fc28L,0x89d2e197dc1e2ad2L,0x55b566b89030ba62L,
+ 0xe3fd41b54f41b1c6L },
+ { 0xb738ac2eb9a96d61L,0x7f8567ca369443f4L,0x8698622df803a440L,
+ 0x2b5862368fe2f4dcL } },
+ /* 12 << 189 */
+ { { 0xbbcc00c756b95bceL,0x5ec03906616da680L,0x79162ee672214252L,
+ 0x43132b6386a892d2L },
+ { 0x4bdd3ff22f3263bfL,0xd5b3733c9cd0a142L,0x592eaa8244415ccbL,
+ 0x663e89248d5474eaL } },
+ /* 13 << 189 */
+ { { 0x8058a25e5236344eL,0x82e8df9dbda76ee6L,0xdcf6efd811cc3d22L,
+ 0x00089cda3b4ab529L },
+ { 0x91d3a071bd38a3dbL,0x4ea97fc0ef72b925L,0x0c9fc15bea3edf75L,
+ 0x5a6297cda4348ed3L } },
+ /* 14 << 189 */
+ { { 0x0d38ab35ce7c42d4L,0x9fd493ef82feab10L,0x46056b6d82111b45L,
+ 0xda11dae173efc5c3L },
+ { 0xdc7402785545a7fbL,0xbdb2601c40d507e6L,0x121dfeeb7066fa58L,
+ 0x214369a839ae8c2aL } },
+ /* 15 << 189 */
+ { { 0x195709cb06e0956cL,0x4c9d254f010cd34bL,0xf51e13f70471a532L,
+ 0xe19d67911e73054dL },
+ { 0xf702a628db5c7be3L,0xc7141218b24dde05L,0xdc18233cf29b2e2eL,
+ 0x3a6bd1e885342dbaL } },
+ /* 16 << 189 */
+ { { 0x3f747fa0b311898cL,0xe2a272e4cd0eac65L,0x4bba5851f914d0bcL,
+ 0x7a1a9660c4a43ee3L },
+ { 0xe5a367cea1c8cde9L,0x9d958ba97271abe3L,0xf3ff7eb63d1615cdL,
+ 0xa2280dcef5ae20b0L } },
+ /* 17 << 189 */
+ { { 0x56dba5c1cf640147L,0xea5a2e3d5e83d118L,0x04cd6b6dda24c511L,
+ 0x1c0f4671e854d214L },
+ { 0x91a6b7a969565381L,0xdc966240decf1f5bL,0x1b22d21cfcf5d009L,
+ 0x2a05f6419021dbd5L } },
+ /* 18 << 189 */
+ { { 0x8c0ed566d4312483L,0x5179a95d643e216fL,0xcc185fec17044493L,
+ 0xb306333954991a21L },
+ { 0xd801ecdb0081a726L,0x0149b0c64fa89bbbL,0xafe9065a4391b6b9L,
+ 0xedc92786d633f3a3L } },
+ /* 19 << 189 */
+ { { 0xe408c24aae6a8e13L,0x85833fde9f3897abL,0x43800e7ed81a0715L,
+ 0xde08e346b44ffc5fL },
+ { 0x7094184ccdeff2e0L,0x49f9387b165eaed1L,0x635d6129777c468aL,
+ 0x8c0dcfd1538c2dd8L } },
+ /* 20 << 189 */
+ { { 0xd6d9d9e37a6a308bL,0x623758304c2767d3L,0x874a8bc6f38cbeb6L,
+ 0xd94d3f1accb6fd9eL },
+ { 0x92a9735bba21f248L,0x272ad0e56cd1efb0L,0x7437b69c05b03284L,
+ 0xe7f047026948c225L } },
+ /* 21 << 189 */
+ { { 0x8a56c04acba2ececL,0x0c181270e3a73e41L,0x6cb34e9d03e93725L,
+ 0xf77c8713496521a9L },
+ { 0x94569183fa7f9f90L,0xf2e7aa4c8c9707adL,0xced2c9ba26c1c9a3L,
+ 0x9109fe9640197507L } },
+ /* 22 << 189 */
+ { { 0x9ae868a9e9adfe1cL,0x3984403d314e39bbL,0xb5875720f2fe378fL,
+ 0x33f901e0ba44a628L },
+ { 0xea1125fe3652438cL,0xae9ec4e69dd1f20bL,0x1e740d9ebebf7fbdL,
+ 0x6dbd3ddc42dbe79cL } },
+ /* 23 << 189 */
+ { { 0x62082aecedd36776L,0xf612c478e9859039L,0xa493b201032f7065L,
+ 0xebd4d8f24ff9b211L },
+ { 0x3f23a0aaaac4cb32L,0xea3aadb715ed4005L,0xacf17ea4afa27e63L,
+ 0x56125c1ac11fd66cL } },
+ /* 24 << 189 */
+ { { 0x266344a43794f8dcL,0xdcca923a483c5c36L,0x2d6b6bbf3f9d10a0L,
+ 0xb320c5ca81d9bdf3L },
+ { 0x620e28ff47b50a95L,0x933e3b01cef03371L,0xf081bf8599100153L,
+ 0x183be9a0c3a8c8d6L } },
+ /* 25 << 189 */
+ { { 0x4e3ddc5ad6bbe24dL,0xc6c7463053843795L,0x78193dd765ec2d4cL,
+ 0xb8df26cccd3c89b2L },
+ { 0x98dbe3995a483f8dL,0x72d8a9577dd3313aL,0x65087294ab0bd375L,
+ 0xfcd892487c259d16L } },
+ /* 26 << 189 */
+ { { 0x8a9443d77613aa81L,0x8010080085fe6584L,0x70fc4dbc7fb10288L,
+ 0xf58280d3e86beee8L },
+ { 0x14fdd82f7c978c38L,0xdf1204c10de44d7bL,0xa08a1c844160252fL,
+ 0x591554cac17646a5L } },
+ /* 27 << 189 */
+ { { 0x214a37d6a05bd525L,0x48d5f09b07957b3cL,0x0247cdcbd7109bc9L,
+ 0x40f9e4bb30599ce7L },
+ { 0xc325fa03f46ad2ecL,0x00f766cfc3e3f9eeL,0xab556668d43a4577L,
+ 0x68d30a613ee03b93L } },
+ /* 28 << 189 */
+ { { 0x7ddc81ea77b46a08L,0xcf5a6477c7480699L,0x43a8cb346633f683L,
+ 0x1b867e6b92363c60L },
+ { 0x439211141f60558eL,0xcdbcdd632f41450eL,0x7fc04601cc630e8bL,
+ 0xea7c66d597038b43L } },
+ /* 29 << 189 */
+ { { 0x7259b8a504e99fd8L,0x98a8dd124785549aL,0x0e459a7c840552e1L,
+ 0xcdfcf4d04bb0909eL },
+ { 0x34a86db253758da7L,0xe643bb83eac997e1L,0x96400bd7530c5b7eL,
+ 0x9f97af87b41c8b52L } },
+ /* 30 << 189 */
+ { { 0x34fc8820fbeee3f9L,0x93e5349049091afdL,0x764b9be59a31f35cL,
+ 0x71f3786457e3d924L },
+ { 0x02fb34e0943aa75eL,0xa18c9c58ab8ff6e4L,0x080f31b133cf0d19L,
+ 0x5c9682db083518a7L } },
+ /* 31 << 189 */
+ { { 0x873d4ca6b709c3deL,0x64a842623575b8f0L,0x6275da1f020154bbL,
+ 0x97678caad17cf1abL },
+ { 0x8779795f951a95c3L,0xdd35b16350fccc08L,0x3270962733d8f031L,
+ 0x3c5ab10a498dd85cL } },
+ /* 32 << 189 */
+ { { 0xb6c185c341dca566L,0x7de7fedad8622aa3L,0x99e84d92901b6dfbL,
+ 0x30a02b0e7c4ad288L },
+ { 0xc7c81daa2fd3cf36L,0xd1319547df89e59fL,0xb2be8184cd496733L,
+ 0xd5f449eb93d3412bL } },
+ /* 33 << 189 */
+ { { 0x7ea41b1b25fe531dL,0xf97974326a1d5646L,0x86067f722bde501aL,
+ 0xf91481c00c85e89cL },
+ { 0xca8ee465f8b05bc6L,0x1844e1cf02e83cdaL,0xca82114ab4dbe33bL,
+ 0x0f9f87694eabfde2L } },
+ /* 34 << 189 */
+ { { 0x4936b1c038b27fe2L,0x63b6359baba402dfL,0x40c0ea2f656bdbabL,
+ 0x9c992a896580c39cL },
+ { 0x600e8f152a60aed1L,0xeb089ca4e0bf49dfL,0x9c233d7d2d42d99aL,
+ 0x648d3f954c6bc2faL } },
+ /* 35 << 189 */
+ { { 0xdcc383a8e1add3f3L,0xf42c0c6a4f64a348L,0x2abd176f0030dbdbL,
+ 0x4de501a37d6c215eL },
+ { 0x4a107c1f4b9a64bcL,0xa77f0ad32496cd59L,0xfb78ac627688dffbL,
+ 0x7025a2ca67937d8eL } },
+ /* 36 << 189 */
+ { { 0xfde8b2d1d1a8f4e7L,0xf5b3da477354927cL,0xe48606a3d9205735L,
+ 0xac477cc6e177b917L },
+ { 0xfb1f73d2a883239aL,0xe12572f6cc8b8357L,0x9d355e9cfb1f4f86L,
+ 0x89b795f8d9f3ec6eL } },
+ /* 37 << 189 */
+ { { 0x27be56f1b54398dcL,0x1890efd73fedeed5L,0x62f77f1f9c6d0140L,
+ 0x7ef0e314596f0ee4L },
+ { 0x50ca6631cc61dab3L,0x4a39801df4866e4fL,0x66c8d032ae363b39L,
+ 0x22c591e52ead66aaL } },
+ /* 38 << 189 */
+ { { 0x954ba308de02a53eL,0x2a6c060fd389f357L,0xe6cfcde8fbf40b66L,
+ 0x8e02fc56c6340ce1L },
+ { 0xe495779573adb4baL,0x7b86122ca7b03805L,0x63f835120c8e6fa6L,
+ 0x83660ea0057d7804L } },
+ /* 39 << 189 */
+ { { 0xbad7910521ba473cL,0xb6c50beeded5389dL,0xee2caf4daa7c9bc0L,
+ 0xd97b8de48c4e98a7L },
+ { 0xa9f63e70ab3bbddbL,0x3898aabf2597815aL,0x7659af89ac15b3d9L,
+ 0xedf7725b703ce784L } },
+ /* 40 << 189 */
+ { { 0x25470fabe085116bL,0x04a4337587285310L,0x4e39187ee2bfd52fL,
+ 0x36166b447d9ebc74L },
+ { 0x92ad433cfd4b322cL,0x726aa817ba79ab51L,0xf96eacd8c1db15ebL,
+ 0xfaf71e910476be63L } },
+ /* 41 << 189 */
+ { { 0xdd69a640641fad98L,0xb799591829622559L,0x03c6daa5de4199dcL,
+ 0x92cadc97ad545eb4L },
+ { 0x1028238b256534e4L,0x73e80ce68595409aL,0x690d4c66d05dc59bL,
+ 0xc95f7b8f981dee80L } },
+ /* 42 << 189 */
+ { { 0xf4337014d856ac25L,0x441bd9ddac524dcaL,0x640b3d855f0499f5L,
+ 0x39cf84a9d5fda182L },
+ { 0x04e7b055b2aa95a0L,0x29e33f0a0ddf1860L,0x082e74b5423f6b43L,
+ 0x217edeb90aaa2b0fL } },
+ /* 43 << 189 */
+ { { 0x58b83f3583cbea55L,0xc485ee4dbc185d70L,0x833ff03b1e5f6992L,
+ 0xb5b9b9cccf0c0dd5L },
+ { 0x7caaee8e4e9e8a50L,0x462e907b6269dafdL,0x6ed5cee9fbe791c6L,
+ 0x68ca3259ed430790L } },
+ /* 44 << 189 */
+ { { 0x2b72bdf213b5ba88L,0x60294c8a35ef0ac4L,0x9c3230ed19b99b08L,
+ 0x560fff176c2589aaL },
+ { 0x552b8487d6770374L,0xa373202d9a56f685L,0xd3e7f90745f175d9L,
+ 0x3c2f315fd080d810L } },
+ /* 45 << 189 */
+ { { 0x1130e9dd7b9520e8L,0xc078f9e20af037b5L,0x38cd2ec71e9c104cL,
+ 0x0f684368c472fe92L },
+ { 0xd3f1b5ed6247e7efL,0xb32d33a9396dfe21L,0x46f59cf44a9aa2c2L,
+ 0x69cd5168ff0f7e41L } },
+ /* 46 << 189 */
+ { { 0x3f59da0f4b3234daL,0xcf0b0235b4579ebeL,0x6d1cbb256d2476c7L,
+ 0x4f0837e69dc30f08L },
+ { 0x9a4075bb906f6e98L,0x253bb434c761e7d1L,0xde2e645f6e73af10L,
+ 0xb89a40600c5f131cL } },
+ /* 47 << 189 */
+ { { 0xd12840c5b8cc037fL,0x3d093a5b7405bb47L,0x6202c253206348b8L,
+ 0xbf5d57fcc55a3ca7L },
+ { 0x89f6c90c8c3bef48L,0x23ac76235a0a960aL,0xdfbd3d6b552b42abL,
+ 0x3ef22458132061f6L } },
+ /* 48 << 189 */
+ { { 0xd74e9bdac97e6516L,0x88779360c230f49eL,0xa6ec1de31e74ea49L,
+ 0x581dcee53fb645a2L },
+ { 0xbaef23918f483f14L,0x6d2dddfcd137d13bL,0x54cde50ed2743a42L,
+ 0x89a34fc5e4d97e67L } },
+ /* 49 << 189 */
+ { { 0x13f1f5b312e08ce5L,0xa80540b8a7f0b2caL,0x854bcf7701982805L,
+ 0xb8653ffd233bea04L },
+ { 0x8e7b878702b0b4c9L,0x2675261f9acb170aL,0x061a9d90930c14e5L,
+ 0xb59b30e0def0abeaL } },
+ /* 50 << 189 */
+ { { 0x1dc19ea60200ec7dL,0xb6f4a3f90bce132bL,0xb8d5de90f13e27e0L,
+ 0xbaee5ef01fade16fL },
+ { 0x6f406aaae4c6cf38L,0xab4cfe06d1369815L,0x0dcffe87efd550c6L,
+ 0x9d4f59c775ff7d39L } },
+ /* 51 << 189 */
+ { { 0xb02553b151deb6adL,0x812399a4b1877749L,0xce90f71fca6006e1L,
+ 0xc32363a6b02b6e77L },
+ { 0x02284fbedc36c64dL,0x86c81e31a7e1ae61L,0x2576c7e5b909d94aL,
+ 0x8b6f7d02818b2bb0L } },
+ /* 52 << 189 */
+ { { 0xeca3ed0756faa38aL,0xa3790e6c9305bb54L,0xd784eeda7bc73061L,
+ 0xbd56d3696dd50614L },
+ { 0xd6575949229a8aa9L,0xdcca8f474595ec28L,0x814305c106ab4fe6L,
+ 0xc8c3976824f43f16L } },
+ /* 53 << 189 */
+ { { 0xe2a45f36523f2b36L,0x995c6493920d93bbL,0xf8afdab790f1632bL,
+ 0x79ebbecd1c295954L },
+ { 0xc7bb3ddb79592f48L,0x67216a7b5f88e998L,0xd91f098bbc01193eL,
+ 0xf7d928a5b1db83fcL } },
+ /* 54 << 189 */
+ { { 0x55e38417e991f600L,0x2a91113e2981a934L,0xcbc9d64806b13bdeL,
+ 0xb011b6ac0755ff44L },
+ { 0x6f4cb518045ec613L,0x522d2d31c2f5930aL,0x5acae1af382e65deL,
+ 0x5764306727bc966fL } },
+ /* 55 << 189 */
+ { { 0x5e12705d1c7193f0L,0xf0f32f473be8858eL,0x785c3d7d96c6dfc7L,
+ 0xd75b4a20bf31795dL },
+ { 0x91acf17b342659d4L,0xe596ea3444f0378fL,0x4515708fce52129dL,
+ 0x17387e1e79f2f585L } },
+ /* 56 << 189 */
+ { { 0x72cfd2e949dee168L,0x1ae052233e2af239L,0x009e75be1d94066aL,
+ 0x6cca31c738abf413L },
+ { 0xb50bd61d9bc49908L,0x4a9b4a8cf5e2bc1eL,0xeb6cc5f7946f83acL,
+ 0x27da93fcebffab28L } },
+ /* 57 << 189 */
+ { { 0xea314c964821c8c5L,0x8de49deda83c15f4L,0x7a64cf207af33004L,
+ 0x45f1bfebc9627e10L },
+ { 0x878b062654b9df60L,0x5e4fdc3ca95c0b33L,0xe54a37cac2035d8eL,
+ 0x9087cda980f20b8cL } },
+ /* 58 << 189 */
+ { { 0x36f61c238319ade4L,0x766f287ade8cfdf8L,0x48821948346f3705L,
+ 0x49a7b85316e4f4a2L },
+ { 0xb9b3f8a75cedadfdL,0x8f5628158db2a815L,0xc0b7d55401f68f95L,
+ 0x12971e27688a208eL } },
+ /* 59 << 189 */
+ { { 0xc9f8b696d0ff34fcL,0x20824de21222718cL,0x7213cf9f0c95284dL,
+ 0xe2ad741bdc158240L },
+ { 0x0ee3a6df54043ccfL,0x16ff479bd84412b3L,0xf6c74ee0dfc98af0L,
+ 0xa78a169f52fcd2fbL } },
+ /* 60 << 189 */
+ { { 0xd8ae874699c930e9L,0x1d33e85849e117a5L,0x7581fcb46624759fL,
+ 0xde50644f5bedc01dL },
+ { 0xbeec5d00caf3155eL,0x672d66acbc73e75fL,0x86b9d8c6270b01dbL,
+ 0xd249ef8350f55b79L } },
+ /* 61 << 189 */
+ { { 0x6131d6d473978fe3L,0xcc4e4542754b00a1L,0x4e05df0557dfcfe9L,
+ 0x94b29cdd51ef6bf0L },
+ { 0xe4530cff9bc7edf2L,0x8ac236fdd3da65f3L,0x0faf7d5fc8eb0b48L,
+ 0x4d2de14c660eb039L } },
+ /* 62 << 189 */
+ { { 0xc006bba760430e54L,0x10a2d0d6da3289abL,0x9c037a5dd7979c59L,
+ 0x04d1f3d3a116d944L },
+ { 0x9ff224738a0983cdL,0x28e25b38c883cabbL,0xe968dba547a58995L,
+ 0x2c80b505774eebdfL } },
+ /* 63 << 189 */
+ { { 0xee763b714a953bebL,0x502e223f1642e7f6L,0x6fe4b64161d5e722L,
+ 0x9d37c5b0dbef5316L },
+ { 0x0115ed70f8330bc7L,0x139850e675a72789L,0x27d7faecffceccc2L,
+ 0x3016a8604fd9f7f6L } },
+ /* 64 << 189 */
+ { { 0xc492ec644cd8f64cL,0x58a2d790279d7b51L,0x0ced1fc51fc75256L,
+ 0x3e658aed8f433017L },
+ { 0x0b61942e05da59ebL,0xba3d60a30ddc3722L,0x7c311cd1742e7f87L,
+ 0x6473ffeef6b01b6eL } },
+ /* 0 << 196 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 196 */
+ { { 0x8303604f692ac542L,0xf079ffe1227b91d3L,0x19f63e6315aaf9bdL,
+ 0xf99ee565f1f344fbL },
+ { 0x8a1d661fd6219199L,0x8c883bc6d48ce41cL,0x1065118f3c74d904L,
+ 0x713889ee0faf8b1bL } },
+ /* 2 << 196 */
+ { { 0x972b3f8f81a1b3beL,0x4f3ce145ce2764a0L,0xe2d0f1cc28c4f5f7L,
+ 0xdeee0c0dc7f3985bL },
+ { 0x7df4adc0d39e25c3L,0x40619820c467a080L,0x440ebc9361cf5a58L,
+ 0x527729a6422ad600L } },
+ /* 3 << 196 */
+ { { 0xca6c0937b1b76ba6L,0x1a2eab854d2026dcL,0xb1715e1519d9ae0aL,
+ 0xf1ad9199bac4a026L },
+ { 0x35b3dfb807ea7b0eL,0xedf5496f3ed9eb89L,0x8932e5ff2d6d08abL,
+ 0xf314874e25bd2731L } },
+ /* 4 << 196 */
+ { { 0xefb26a753f73f449L,0x1d1c94f88d44fc79L,0x49f0fbc53bc0dc4dL,
+ 0xb747ea0b3698a0d0L },
+ { 0x5218c3fe228d291eL,0x35b804b543c129d6L,0xfac859b8d1acc516L,
+ 0x6c10697d95d6e668L } },
+ /* 5 << 196 */
+ { { 0xc38e438f0876fd4eL,0x45f0c30783d2f383L,0x203cc2ecb10934cbL,
+ 0x6a8f24392c9d46eeL },
+ { 0xf16b431b65ccde7bL,0x41e2cd1827e76a6fL,0xb9c8cf8f4e3484d7L,
+ 0x64426efd8315244aL } },
+ /* 6 << 196 */
+ { { 0x1c0a8e44fc94dea3L,0x34c8cdbfdad6a0b0L,0x919c384004113cefL,
+ 0xfd32fba415490ffaL },
+ { 0x58d190f6795dcfb7L,0xfef01b0383588bafL,0x9e6d1d63ca1fc1c0L,
+ 0x53173f96f0a41ac9L } },
+ /* 7 << 196 */
+ { { 0x2b1d402aba16f73bL,0x2fb310148cf9b9fcL,0x2d51e60e446ef7bfL,
+ 0xc731021bb91e1745L },
+ { 0x9d3b47244fee99d4L,0x4bca48b6fac5c1eaL,0x70f5f514bbea9af7L,
+ 0x751f55a5974c283aL } },
+ /* 8 << 196 */
+ { { 0x6e30251acb452fdbL,0x31ee696550f30650L,0xb0b3e508933548d9L,
+ 0xb8949a4ff4b0ef5bL },
+ { 0x208b83263c88f3bdL,0xab147c30db1d9989L,0xed6515fd44d4df03L,
+ 0x17a12f75e72eb0c5L } },
+ /* 9 << 196 */
+ { { 0x3b59796d36cf69dbL,0x1219eee956670c18L,0xfe3341f77a070d8eL,
+ 0x9b70130ba327f90cL },
+ { 0x36a324620ae18e0eL,0x2021a62346c0a638L,0x251b5817c62eb0d4L,
+ 0x87bfbcdf4c762293L } },
+ /* 10 << 196 */
+ { { 0xf78ab505cdd61d64L,0x8c7a53fcc8c18857L,0xa653ce6f16147515L,
+ 0x9c923aa5ea7d52d5L },
+ { 0xc24709cb5c18871fL,0x7d53bec873b3cc74L,0x59264afffdd1d4c4L,
+ 0x5555917e240da582L } },
+ /* 11 << 196 */
+ { { 0xcae8bbda548f5a0eL,0x1910eaba3bbfbbe1L,0xae5796857677afc3L,
+ 0x49ea61f173ff0b5cL },
+ { 0x786554784f7c3922L,0x95d337cd20c68eefL,0x68f1e1e5df779ab9L,
+ 0x14b491b0b5cf69a8L } },
+ /* 12 << 196 */
+ { { 0x7a6cbbe028e3fe89L,0xe7e1fee4c5aac0ebL,0x7f47eda5697e5140L,
+ 0x4f450137b454921fL },
+ { 0xdb625f8495cd8185L,0x74be0ba1cdb2e583L,0xaee4fd7cdd5e6de4L,
+ 0x4251437de8101739L } },
+ /* 13 << 196 */
+ { { 0x686d72a0ac620366L,0x4be3fb9cb6d59344L,0x6e8b44e7a1eb75b9L,
+ 0x84e39da391a5c10cL },
+ { 0x37cc1490b38f0409L,0x029519432c2ade82L,0x9b6887831190a2d8L,
+ 0x25627d14231182baL } },
+ /* 14 << 196 */
+ { { 0x6eb550aa658a6d87L,0x1405aaa7cf9c7325L,0xd147142e5c8748c9L,
+ 0x7f637e4f53ede0e0L },
+ { 0xf8ca277614ffad2cL,0xe58fb1bdbafb6791L,0x17158c23bf8f93fcL,
+ 0x7f15b3730a4a4655L } },
+ /* 15 << 196 */
+ { { 0x39d4add2d842ca72L,0xa71e43913ed96305L,0x5bb09cbe6700be14L,
+ 0x68d69d54d8befcf6L },
+ { 0xa45f536737183bcfL,0x7152b7bb3370dff7L,0xcf887baabf12525bL,
+ 0xe7ac7bddd6d1e3cdL } },
+ /* 16 << 196 */
+ { { 0x25914f7881fdad90L,0xcf638f560d2cf6abL,0xb90bc03fcc054de5L,
+ 0x932811a718b06350L },
+ { 0x2f00b3309bbd11ffL,0x76108a6fb4044974L,0x801bb9e0a851d266L,
+ 0x0dd099bebf8990c1L } },
+ /* 17 << 196 */
+ { { 0x58c5aaaaabe32986L,0x0fe9dd2a50d59c27L,0x84951ff48d307305L,
+ 0x6c23f82986529b78L },
+ { 0x50bb22180b136a79L,0x7e2174de77a20996L,0x6f00a4b9c0bb4da6L,
+ 0x89a25a17efdde8daL } },
+ /* 18 << 196 */
+ { { 0xf728a27ec11ee01dL,0xf900553ae5f10dfbL,0x189a83c802ec893cL,
+ 0x3ca5bdc123f66d77L },
+ { 0x9878153797eada9fL,0x59c50ab310256230L,0x346042d9323c69b3L,
+ 0x1b715a6d2c460449L } },
+ /* 19 << 196 */
+ { { 0xa41dd4766ae06e0bL,0xcdd7888e9d42e25fL,0x0f395f7456b25a20L,
+ 0xeadfe0ae8700e27eL },
+ { 0xb09d52a969950093L,0x3525d9cb327f8d40L,0xb8235a9467df886aL,
+ 0x77e4b0dd035faec2L } },
+ /* 20 << 196 */
+ { { 0x115eb20a517d7061L,0x77fe34336c2df683L,0x6870ddc7cdc6fc67L,
+ 0xb16105880b87de83L },
+ { 0x343584cad9c4ddbeL,0xb3164f1c3d754be2L,0x0731ed3ac1e6c894L,
+ 0x26327dec4f6b904cL } },
+ /* 21 << 196 */
+ { { 0x9d49c6de97b5cd32L,0x40835daeb5eceecdL,0xc66350edd9ded7feL,
+ 0x8aeebb5c7a678804L },
+ { 0x51d42fb75b8ee9ecL,0xd7a17bdd8e3ca118L,0x40d7511a2ef4400eL,
+ 0xc48990ac875a66f4L } },
+ /* 22 << 196 */
+ { { 0x8de07d2a2199e347L,0xbee755562a39e051L,0x56918786916e51dcL,
+ 0xeb1913134a2d89ecL },
+ { 0x6679610d37d341edL,0x434fbb4156d51c2bL,0xe54b7ee7d7492dbaL,
+ 0xaa33a79a59021493L } },
+ /* 23 << 196 */
+ { { 0x49fc5054e4bd6d3dL,0x09540f045ab551d0L,0x8acc90854942d3a6L,
+ 0x231af02f2d28323bL },
+ { 0x93458cac0992c163L,0x1fef8e71888e3bb4L,0x27578da5be8c268cL,
+ 0xcc8be792e805ec00L } },
+ /* 24 << 196 */
+ { { 0x29267baec61c3855L,0xebff429d58c1fd3bL,0x22d886c08c0b93b8L,
+ 0xca5e00b22ddb8953L },
+ { 0xcf330117c3fed8b7L,0xd49ac6fa819c01f6L,0x6ddaa6bd3c0fbd54L,
+ 0x917430688049a2cfL } },
+ /* 25 << 196 */
+ { { 0xd67f981eaff2ef81L,0xc3654d352818ae80L,0x81d050441b2aa892L,
+ 0x2db067bf3d099328L },
+ { 0xe7c79e86703dcc97L,0xe66f9b37e133e215L,0xcdf119a6e39a7a5cL,
+ 0x47c60de3876f1b61L } },
+ /* 26 << 196 */
+ { { 0x6e405939d860f1b2L,0x3e9a1dbcf5ed4d4aL,0x3f23619ec9b6bcbdL,
+ 0x5ee790cf734e4497L },
+ { 0xf0a834b15bdaf9bbL,0x02cedda74ca295f0L,0x4619aa2bcb8e378cL,
+ 0xe5613244cc987ea4L } },
+ /* 27 << 196 */
+ { { 0x0bc022cc76b23a50L,0x4a2793ad0a6c21ceL,0x3832878089cac3f5L,
+ 0x29176f1bcba26d56L },
+ { 0x062961874f6f59ebL,0x86e9bca98bdc658eL,0x2ca9c4d357e30402L,
+ 0x5438b216516a09bbL } },
+ /* 28 << 196 */
+ { { 0x0a6a063c7672765aL,0x37a3ce640547b9bfL,0x42c099c898b1a633L,
+ 0xb5ab800d05ee6961L },
+ { 0xf1963f5911a5acd6L,0xbaee615746201063L,0x36d9a649a596210aL,
+ 0xaed043631ba7138cL } },
+ /* 29 << 196 */
+ { { 0xcf817d1ca4a82b76L,0x5586960ef3806be9L,0x7ab67c8909dc6bb5L,
+ 0x52ace7a0114fe7ebL },
+ { 0xcd987618cbbc9b70L,0x4f06fd5a604ca5e1L,0x90af14ca6dbde133L,
+ 0x1afe4322948a3264L } },
+ /* 30 << 196 */
+ { { 0xa70d2ca6c44b2c6cL,0xab7267990ef87dfeL,0x310f64dc2e696377L,
+ 0x49b42e684c8126a0L },
+ { 0x0ea444c3cea0b176L,0x53a8ddf7cb269182L,0xf3e674ebbbba9dcbL,
+ 0x0d2878a8d8669d33L } },
+ /* 31 << 196 */
+ { { 0x04b935d5d019b6a3L,0xbb5cf88e406f1e46L,0xa1912d165b57c111L,
+ 0x9803fc2119ebfd78L },
+ { 0x4f231c9ec07764a9L,0xd93286eeb75bd055L,0x83a9457d8ee6c9deL,
+ 0x046959156087ec90L } },
+ /* 32 << 196 */
+ { { 0x14c6dd8a58d6cd46L,0x9cb633b58e6634d2L,0xc1305047f81bc328L,
+ 0x12ede0e226a177e5L },
+ { 0x332cca62065a6f4fL,0xc3a47ecd67be487bL,0x741eb1870f47ed1cL,
+ 0x99e66e58e7598b14L } },
+ /* 33 << 196 */
+ { { 0x6f0544ca63d0ff12L,0xe5efc784b610a05fL,0xf72917b17cad7b47L,
+ 0x3ff6ea20f2cac0c0L },
+ { 0xcc23791bf21db8b7L,0x7dac70b1d7d93565L,0x682cda1d694bdaadL,
+ 0xeb88bb8c1023516dL } },
+ /* 34 << 196 */
+ { { 0xc4c634b4dfdbeb1bL,0x22f5ca72b4ee4deaL,0x1045a368e6524821L,
+ 0xed9e8a3f052b18b2L },
+ { 0x9b7f2cb1b961f49aL,0x7fee2ec17b009670L,0x350d875422507a6dL,
+ 0x561bd7114db55f1dL } },
+ /* 35 << 196 */
+ { { 0x4c189ccc320bbcafL,0x568434cfdf1de48cL,0x6af1b00e0fa8f128L,
+ 0xf0ba9d028907583cL },
+ { 0x735a400432ff9f60L,0x3dd8e4b6c25dcf33L,0xf2230f1642c74cefL,
+ 0xd8117623013fa8adL } },
+ /* 36 << 196 */
+ { { 0x36822876f51fe76eL,0x8a6811cc11d62589L,0xc3fc7e6546225718L,
+ 0xb7df2c9fc82fdbcdL },
+ { 0x3b1d4e52dd7b205bL,0xb695947847a2e414L,0x05e4d793efa91148L,
+ 0xb47ed446fd2e9675L } },
+ /* 37 << 196 */
+ { { 0x1a7098b904c9d9bfL,0x661e28811b793048L,0xb1a16966b01ee461L,
+ 0xbc5213082954746fL },
+ { 0xc909a0fc2477de50L,0xd80bb41c7dbd51efL,0xa85be7ec53294905L,
+ 0x6d465b1883958f97L } },
+ /* 38 << 196 */
+ { { 0x16f6f330fb6840fdL,0xfaaeb2143401e6c8L,0xaf83d30fccb5b4f8L,
+ 0x22885739266dec4bL },
+ { 0x51b4367c7bc467dfL,0x926562e3d842d27aL,0xdfcb66140fea14a6L,
+ 0xeb394daef2734cd9L } },
+ /* 39 << 196 */
+ { { 0x3eeae5d211c0be98L,0xb1e6ed11814e8165L,0x191086bce52bce1cL,
+ 0x14b74cc6a75a04daL },
+ { 0x63cf11868c060985L,0x071047de2dbd7f7cL,0x4e433b8bce0942caL,
+ 0xecbac447d8fec61dL } },
+ /* 40 << 196 */
+ { { 0x8f0ed0e2ebf3232fL,0xfff80f9ec52a2eddL,0xad9ab43375b55fdbL,
+ 0x73ca7820e42e0c11L },
+ { 0x6dace0a0e6251b46L,0x89bc6b5c4c0d932dL,0x3438cd77095da19aL,
+ 0x2f24a9398d48bdfbL } },
+ /* 41 << 196 */
+ { { 0x99b47e46766561b7L,0x736600e60ed0322aL,0x06a47cb1638e1865L,
+ 0x927c1c2dcb136000L },
+ { 0x295423370cc5df69L,0x99b37c0209d649a9L,0xc5f0043c6aefdb27L,
+ 0x6cdd99871be95c27L } },
+ /* 42 << 196 */
+ { { 0x69850931390420d2L,0x299c40ac0983efa4L,0x3a05e778af39aeadL,
+ 0x8427440843a45193L },
+ { 0x6bcd0fb991a711a0L,0x461592c89f52ab17L,0xb49302b4da3c6ed6L,
+ 0xc51fddc7330d7067L } },
+ /* 43 << 196 */
+ { { 0x94babeb6da50d531L,0x521b840da6a7b9daL,0x5305151e404bdc89L,
+ 0x1bcde201d0d07449L },
+ { 0xf427a78b3b76a59aL,0xf84841ce07791a1bL,0xebd314bebf91ed1cL,
+ 0x8e61d34cbf172943L } },
+ /* 44 << 196 */
+ { { 0x1d5dc4515541b892L,0xb186ee41fc9d9e54L,0x9d9f345ed5bf610dL,
+ 0x3e7ba65df6acca9fL },
+ { 0x9dda787aa8369486L,0x09f9dab78eb5ba53L,0x5afb2033d6481bc3L,
+ 0x76f4ce30afa62104L } },
+ /* 45 << 196 */
+ { { 0xa8fa00cff4f066b5L,0x89ab5143461dafc2L,0x44339ed7a3389998L,
+ 0x2ff862f1bc214903L },
+ { 0x2c88f985b05556e3L,0xcd96058e3467081eL,0x7d6a4176edc637eaL,
+ 0xe1743d0936a5acdcL } },
+ /* 46 << 196 */
+ { { 0x66fd72e27eb37726L,0xf7fa264e1481a037L,0x9fbd3bde45f4aa79L,
+ 0xed1e0147767c3e22L },
+ { 0x7621f97982e7abe2L,0x19eedc7245f633f8L,0xe69b155e6137bf3aL,
+ 0xa0ad13ce414ee94eL } },
+ /* 47 << 196 */
+ { { 0x93e3d5241c0e651aL,0xab1a6e2a02ce227eL,0xe7af17974ab27ecaL,
+ 0x245446debd444f39L },
+ { 0x59e22a2156c07613L,0x43deafcef4275498L,0x10834ccb67fd0946L,
+ 0xa75841e547406edfL } },
+ /* 48 << 196 */
+ { { 0xebd6a6777b0ac93dL,0xa6e37b0d78f5e0d7L,0x2516c09676f5492bL,
+ 0x1e4bf8889ac05f3aL },
+ { 0xcdb42ce04df0ba2bL,0x935d5cfd5062341bL,0x8a30333382acac20L,
+ 0x429438c45198b00eL } },
+ /* 49 << 196 */
+ { { 0x1d083bc9049d33faL,0x58b82dda946f67ffL,0xac3e2db867a1d6a3L,
+ 0x62e6bead1798aac8L },
+ { 0xfc85980fde46c58cL,0xa7f6937969c8d7beL,0x23557927837b35ecL,
+ 0x06a933d8e0790c0cL } },
+ /* 50 << 196 */
+ { { 0x827c0e9b077ff55dL,0x53977798bb26e680L,0x595308741d9cb54fL,
+ 0xcca3f4494aac53efL },
+ { 0x11dc5c87a07eda0fL,0xc138bccffd6400c8L,0x549680d313e5da72L,
+ 0xc93eed824540617eL } },
+ /* 51 << 196 */
+ { { 0xfd3db1574d0b75c0L,0x9716eb426386075bL,0x0639605c817b2c16L,
+ 0x09915109f1e4f201L },
+ { 0x35c9a9285cca6c3bL,0xb25f7d1a3505c900L,0xeb9f7d20630480c4L,
+ 0xc3c7b8c62a1a501cL } },
+ /* 52 << 196 */
+ { { 0x3f99183c5a1f8e24L,0xfdb118fa9dd255f0L,0xb9b18b90c27f62a6L,
+ 0xe8f732f7396ec191L },
+ { 0x524a2d910be786abL,0x5d32adef0ac5a0f5L,0x9b53d4d69725f694L,
+ 0x032a76c60510ba89L } },
+ /* 53 << 196 */
+ { { 0x840391a3ebeb1544L,0x44b7b88c3ed73ac3L,0xd24bae7a256cb8b3L,
+ 0x7ceb151ae394cb12L },
+ { 0xbd6b66d05bc1e6a8L,0xec70cecb090f07bfL,0x270644ed7d937589L,
+ 0xee9e1a3d5f1dccfeL } },
+ /* 54 << 196 */
+ { { 0xb0d40a84745b98d2L,0xda429a212556ed40L,0xf676eced85148cb9L,
+ 0x5a22d40cded18936L },
+ { 0x3bc4b9e570e8a4ceL,0xbfd1445b9eae0379L,0xf23f2c0c1a0bd47eL,
+ 0xa9c0bb31e1845531L } },
+ /* 55 << 196 */
+ { { 0x9ddc4d600a4c3f6bL,0xbdfaad792c15ef44L,0xce55a2367f484accL,
+ 0x08653ca7055b1f15L },
+ { 0x2efa8724538873a3L,0x09299e5dace1c7e7L,0x07afab66ade332baL,
+ 0x9be1fdf692dd71b7L } },
+ /* 56 << 196 */
+ { { 0xa49b5d595758b11cL,0x0b852893c8654f40L,0xb63ef6f452379447L,
+ 0xd4957d29105e690cL },
+ { 0x7d484363646559b0L,0xf4a8273c49788a8eL,0xee406cb834ce54a9L,
+ 0x1e1c260ff86fda9bL } },
+ /* 57 << 196 */
+ { { 0xe150e228cf6a4a81L,0x1fa3b6a31b488772L,0x1e6ff110c5a9c15bL,
+ 0xc6133b918ad6aa47L },
+ { 0x8ac5d55c9dffa978L,0xba1d1c1d5f3965f2L,0xf969f4e07732b52fL,
+ 0xfceecdb5a5172a07L } },
+ /* 58 << 196 */
+ { { 0xb0120a5f10f2b8f5L,0xc83a6cdf5c4c2f63L,0x4d47a491f8f9c213L,
+ 0xd9e1cce5d3f1bbd5L },
+ { 0x0d91bc7caba7e372L,0xfcdc74c8dfd1a2dbL,0x05efa800374618e5L,
+ 0x1121696915a7925eL } },
+ /* 59 << 196 */
+ { { 0xd4c89823f6021c5dL,0x880d5e84eff14423L,0x6523bc5a6dcd1396L,
+ 0xd1acfdfc113c978bL },
+ { 0xb0c164e8bbb66840L,0xf7f4301e72b58459L,0xc29ad4a6a638e8ecL,
+ 0xf5ab896146b78699L } },
+ /* 60 << 196 */
+ { { 0x9dbd79740e954750L,0x0121de8864f9d2c6L,0x2e597b42d985232eL,
+ 0x55b6c3c553451777L },
+ { 0xbb53e547519cb9fbL,0xf134019f8428600dL,0x5a473176e081791aL,
+ 0x2f3e226335fb0c08L } },
+ /* 61 << 196 */
+ { { 0xb28c301773d273b0L,0xccd210767721ef9aL,0x054cc292b650dc39L,
+ 0x662246de6188045eL },
+ { 0x904b52fa6b83c0d1L,0xa72df26797e9cd46L,0x886b43cd899725e4L,
+ 0x2b651688d849ff22L } },
+ /* 62 << 196 */
+ { { 0x60479b7902f34533L,0x5e354c140c77c148L,0xb4bb7581a8537c78L,
+ 0x188043d7efe1495fL },
+ { 0x9ba12f428c1d5026L,0x2e0c8a2693d4aaabL,0xbdba7b8baa57c450L,
+ 0x140c9ad69bbdafefL } },
+ /* 63 << 196 */
+ { { 0x2067aa4225ac0f18L,0xf7b1295b04d1fbf3L,0x14829111a4b04824L,
+ 0x2ce3f19233bd5e91L },
+ { 0x9c7a1d558f2e1b72L,0xfe932286302aa243L,0x497ca7b4d4be9554L,
+ 0xb8e821b8e0547a6eL } },
+ /* 64 << 196 */
+ { { 0xfb2838be67e573e0L,0x05891db94084c44bL,0x9131137396c1c2c5L,
+ 0x6aebfa3fd958444bL },
+ { 0xac9cdce9e56e55c1L,0x7148ced32caa46d0L,0x2e10c7efb61fe8ebL,
+ 0x9fd835daff97cf4dL } },
+ /* 0 << 203 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 203 */
+ { { 0xa36da109081e9387L,0xfb9780d78c935828L,0xd5940332e540b015L,
+ 0xc9d7b51be0f466faL },
+ { 0xfaadcd41d6d9f671L,0xba6c1e28b1a2ac17L,0x066a7833ed201e5fL,
+ 0x19d99719f90f462bL } },
+ /* 2 << 203 */
+ { { 0xf431f462060b5f61L,0xa56f46b47bd057c2L,0x348dca6c47e1bf65L,
+ 0x9a38783e41bcf1ffL },
+ { 0x7a5d33a9da710718L,0x5a7799872e0aeaf6L,0xca87314d2d29d187L,
+ 0xfa0edc3ec687d733L } },
+ /* 3 << 203 */
+ { { 0x9df336216a31e09bL,0xde89e44dc1350e35L,0x292148714ca0cf52L,
+ 0xdf3796720b88a538L },
+ { 0xc92a510a2591d61bL,0x79aa87d7585b447bL,0xf67db604e5287f77L,
+ 0x1697c8bf5efe7a80L } },
+ /* 4 << 203 */
+ { { 0x1c894849cb198ac7L,0xa884a93d0f264665L,0x2da964ef9b200678L,
+ 0x3c351b87009834e6L },
+ { 0xafb2ef9fe2c4b44bL,0x580f6c473326790cL,0xb84805210b02264aL,
+ 0x8ba6f9e242a194e2L } },
+ /* 5 << 203 */
+ { { 0xfc87975f8fb54738L,0x3516078827c3ead3L,0x834116d2b74a085aL,
+ 0x53c99a73a62fe996L },
+ { 0x87585be05b81c51bL,0x925bafa8be0852b7L,0x76a4fafda84d19a7L,
+ 0x39a45982585206d4L } },
+ /* 6 << 203 */
+ { { 0x499b6ab65eb03c0eL,0xf19b795472bc3fdeL,0xa86b5b9c6e3a80d2L,
+ 0xe43775086d42819fL },
+ { 0xc1663650bb3ee8a3L,0x75eb14fcb132075fL,0xa8ccc9067ad834f6L,
+ 0xea6a2474e6e92ffdL } },
+ /* 7 << 203 */
+ { { 0x9d72fd950f8d6758L,0xcb84e101408c07ddL,0xb9114bfda5e23221L,
+ 0x358b5fe2e94e742cL },
+ { 0x1c0577ec95f40e75L,0xf01554513d73f3d6L,0x9d55cd67bd1b9b66L,
+ 0x63e86e78af8d63c7L } },
+ /* 8 << 203 */
+ { { 0x39d934abd3c095f1L,0x04b261bee4b76d71L,0x1d2e6970e73e6984L,
+ 0x879fb23b5e5fcb11L },
+ { 0x11506c72dfd75490L,0x3a97d08561bcf1c1L,0x43201d82bf5e7007L,
+ 0x7f0ac52f798232a7L } },
+ /* 9 << 203 */
+ { { 0x2715cbc46eb564d4L,0x8d6c752c9e570e29L,0xf80247c89ef5fd5dL,
+ 0xc3c66b46d53eb514L },
+ { 0x9666b4010f87de56L,0xce62c06fc6c603b5L,0xae7b4c607e4fc942L,
+ 0x38ac0b77663a9c19L } },
+ /* 10 << 203 */
+ { { 0xcb4d20ee4b049136L,0x8b63bf12356a4613L,0x1221aef670e08128L,
+ 0xe62d8c514acb6b16L },
+ { 0x71f64a67379e7896L,0xb25237a2cafd7fa5L,0xf077bd983841ba6aL,
+ 0xc4ac02443cd16e7eL } },
+ /* 11 << 203 */
+ { { 0x548ba86921fea4caL,0xd36d0817f3dfdac1L,0x09d8d71ff4685fafL,
+ 0x8eff66bec52c459aL },
+ { 0x182faee70b57235eL,0xee3c39b10106712bL,0x5107331fc0fcdcb0L,
+ 0x669fb9dca51054baL } },
+ /* 12 << 203 */
+ { { 0xb25101fb319d7682L,0xb02931290a982feeL,0x51c1c9b90261b344L,
+ 0x0e008c5bbfd371faL },
+ { 0xd866dd1c0278ca33L,0x666f76a6e5aa53b1L,0xe5cfb7796013a2cfL,
+ 0x1d3a1aada3521836L } },
+ /* 13 << 203 */
+ { { 0xcedd253173faa485L,0xc8ee6c4fc0a76878L,0xddbccfc92a11667dL,
+ 0x1a418ea91c2f695aL },
+ { 0xdb11bd9251f73971L,0x3e4b3c82da2ed89fL,0x9a44f3f4e73e0319L,
+ 0xd1e3de0f303431afL } },
+ /* 14 << 203 */
+ { { 0x3c5604ff50f75f9cL,0x1d8eddf37e752b22L,0x0ef074dd3c9a1118L,
+ 0xd0ffc172ccb86d7bL },
+ { 0xabd1ece3037d90f2L,0xe3f307d66055856cL,0x422f93287e4c6dafL,
+ 0x902aac66334879a0L } },
+ /* 15 << 203 */
+ { { 0xb6a1e7bf94cdfadeL,0x6c97e1ed7fc6d634L,0x662ad24da2fb63f8L,
+ 0xf81be1b9a5928405L },
+ { 0x86d765e4d14b4206L,0xbecc2e0e8fa0db65L,0xa28838e0b17fc76cL,
+ 0xe49a602ae37cf24eL } },
+ /* 16 << 203 */
+ { { 0x76b4131a567193ecL,0xaf3c305ae5f6e70bL,0x9587bd39031eebddL,
+ 0x5709def871bbe831L },
+ { 0x570599830eb2b669L,0x4d80ce1b875b7029L,0x838a7da80364ac16L,
+ 0x2f431d23be1c83abL } },
+ /* 17 << 203 */
+ { { 0xe56812a6f9294dd3L,0xb448d01f9b4b0d77L,0xf3ae606104e8305cL,
+ 0x2bead64594d8c63eL },
+ { 0x0a85434d84fd8b07L,0x537b983ff7a9dee5L,0xedcc5f18ef55bd85L,
+ 0x2041af6221c6cf8bL } },
+ /* 18 << 203 */
+ { { 0x8e52874cb940c71eL,0x211935a9db5f4b3aL,0x94350492301b1dc3L,
+ 0x33d2646d29958620L },
+ { 0x16b0d64bef911404L,0x9d1f25ea9a3c5ef4L,0x20f200eb4a352c78L,
+ 0x43929f2c4bd0b428L } },
+ /* 19 << 203 */
+ { { 0xa5656667c7196e29L,0x7992c2f09391be48L,0xaaa97cbd9ee0cd6eL,
+ 0x51b0310c3dc8c9bfL },
+ { 0x237f8acfdd9f22cbL,0xbb1d81a1b585d584L,0x8d5d85f58c416388L,
+ 0x0d6e5a5a42fe474fL } },
+ /* 20 << 203 */
+ { { 0xe781276638235d4eL,0x1c62bd67496e3298L,0x8378660c3f175bc8L,
+ 0x4d04e18917afdd4dL },
+ { 0x32a8160185a8068cL,0xdb58e4e192b29a85L,0xe8a65b86c70d8a3bL,
+ 0x5f0e6f4e98a0403bL } },
+ /* 21 << 203 */
+ { { 0x0812968469ed2370L,0x34dc30bd0871ee26L,0x3a5ce9487c9c5b05L,
+ 0x7d487b8043a90c87L },
+ { 0x4089ba37dd0e7179L,0x45f80191b4041811L,0x1c3e105898747ba5L,
+ 0x98c4e13a6e1ae592L } },
+ /* 22 << 203 */
+ { { 0xd44636e6e82c9f9eL,0x711db87cc33a1043L,0x6f431263aa8aec05L,
+ 0x43ff120d2744a4aaL },
+ { 0xd3bd892fae77779bL,0xf0fe0cc98cdc9f82L,0xca5f7fe6f1c5b1bcL,
+ 0xcc63a68244929a72L } },
+ /* 23 << 203 */
+ { { 0xc7eaba0c09dbe19aL,0x2f3585ad6b5c73c2L,0x8ab8924b0ae50c30L,
+ 0x17fcd27a638b30baL },
+ { 0xaf414d3410b3d5a5L,0x09c107d22a9accf1L,0x15dac49f946a6242L,
+ 0xaec3df2ad707d642L } },
+ /* 24 << 203 */
+ { { 0x2c2492b73f894ae0L,0xf59df3e5b75f18ceL,0x7cb740d28f53cad0L,
+ 0x3eb585fbc4f01294L },
+ { 0x17da0c8632c7f717L,0xeb8c795baf943f4cL,0x4ee23fb5f67c51d2L,
+ 0xef18757568889949L } },
+ /* 25 << 203 */
+ { { 0xa6b4bdb20389168bL,0xc4ecd258ea577d03L,0x3a63782b55743082L,
+ 0x6f678f4cc72f08cdL },
+ { 0x553511cf65e58dd8L,0xd53b4e3ed402c0cdL,0x37de3e29a037c14cL,
+ 0x86b6c516c05712aaL } },
+ /* 26 << 203 */
+ { { 0x2834da3eb38dff6fL,0xbe012c52ea636be8L,0x292d238c61dd37f8L,
+ 0x0e54523f8f8142dbL },
+ { 0xe31eb436036a05d8L,0x83e3cdff1e93c0ffL,0x3fd2fe0f50821ddfL,
+ 0xc8e19b0dff9eb33bL } },
+ /* 27 << 203 */
+ { { 0xc8cc943fb569a5feL,0xad0090d4d4342d75L,0x82090b4bcaeca000L,
+ 0xca39687f1bd410ebL },
+ { 0xe7bb0df765959d77L,0x39d782189c964999L,0xd87f62e8b2415451L,
+ 0xe5efb774bed76108L } },
+ /* 28 << 203 */
+ { { 0x3ea011a4e822f0d0L,0xbc647ad15a8704f8L,0xbb315b3550c6820fL,
+ 0x863dec3db7e76becL },
+ { 0x01ff5d3af017bfc7L,0x20054439976b8229L,0x067fca370bbd0d3bL,
+ 0xf63dde647f5e3d0fL } },
+ /* 29 << 203 */
+ { { 0x22dbefb32a4c94e9L,0xafbff0fe96f8278aL,0x80aea0b13503793dL,
+ 0xb22380295f06cd29L },
+ { 0x65703e578ec3fecaL,0x06c38314393e7053L,0xa0b751eb7c6734c4L,
+ 0xd2e8a435c59f0f1eL } },
+ /* 30 << 203 */
+ { { 0x147d90525e9ca895L,0x2f4dd31e972072dfL,0xa16fda8ee6c6755cL,
+ 0xc66826ffcf196558L },
+ { 0x1f1a76a30cf43895L,0xa9d604e083c3097bL,0xe190830966390e0eL,
+ 0xa50bf753b3c85effL } },
+ /* 31 << 203 */
+ { { 0x0696bddef6a70251L,0x548b801b3c6ab16aL,0x37fcf704a4d08762L,
+ 0x090b3defdff76c4eL },
+ { 0x87e8cb8969cb9158L,0x44a90744995ece43L,0xf85395f40ad9fbf5L,
+ 0x49b0f6c54fb0c82dL } },
+ /* 32 << 203 */
+ { { 0x75d9bc15adf7cccfL,0x81a3e5d6dfa1e1b0L,0x8c39e444249bc17eL,
+ 0xf37dccb28ea7fd43L },
+ { 0xda654873907fba12L,0x35daa6da4a372904L,0x0564cfc66283a6c5L,
+ 0xd09fa4f64a9395bfL } },
+ /* 33 << 203 */
+ { { 0x688e9ec9aeb19a36L,0xd913f1cec7bfbfb4L,0x797b9a3c61c2faa6L,
+ 0x2f979bec6a0a9c12L },
+ { 0xb5969d0f359679ecL,0xebcf523d079b0460L,0xfd6b000810fab870L,
+ 0x3f2edcda9373a39cL } },
+ /* 34 << 203 */
+ { { 0x0d64f9a76f568431L,0xf848c27c02f8898cL,0xf418ade1260b5bd5L,
+ 0xc1f3e3236973dee8L },
+ { 0x46e9319c26c185ddL,0x6d85b7d8546f0ac4L,0x427965f2247f9d57L,
+ 0xb519b636b0035f48L } },
+ /* 35 << 203 */
+ { { 0x6b6163a9ab87d59cL,0xff9f58c339caaa11L,0x4ac39cde3177387bL,
+ 0x5f6557c2873e77f9L },
+ { 0x6750400636a83041L,0x9b1c96ca75ef196cL,0xf34283deb08c7940L,
+ 0x7ea096441128c316L } },
+ /* 36 << 203 */
+ { { 0xb510b3b56aa39dffL,0x59b43da29f8e4d8cL,0xa8ce31fd9e4c4b9fL,
+ 0x0e20be26c1303c01L },
+ { 0x18187182e8ee47c9L,0xd9687cdb7db98101L,0x7a520e4da1e14ff6L,
+ 0x429808ba8836d572L } },
+ /* 37 << 203 */
+ { { 0xa37ca60d4944b663L,0xf901f7a9a3f91ae5L,0xe4e3e76e9e36e3b1L,
+ 0x9aa219cf29d93250L },
+ { 0x347fe275056a2512L,0xa4d643d9de65d95cL,0x9669d396699fc3edL,
+ 0xb598dee2cf8c6bbeL } },
+ /* 38 << 203 */
+ { { 0x682ac1e5dda9e5c6L,0x4e0d3c72caa9fc95L,0x17faaade772bea44L,
+ 0x5ef8428cab0009c8L },
+ { 0xcc4ce47a460ff016L,0xda6d12bf725281cbL,0x44c678480223aad2L,
+ 0x6e342afa36256e28L } },
+ /* 39 << 203 */
+ { { 0x1400bb0b93a37c04L,0x62b1bc9bdd10bd96L,0x7251adeb0dac46b7L,
+ 0x7d33b92e7be4ef51L },
+ { 0x28b2a94be61fa29aL,0x4b2be13f06422233L,0x36d6d062330d8d37L,
+ 0x5ef80e1eb28ca005L } },
+ /* 40 << 203 */
+ { { 0x174d46996d16768eL,0x9fc4ff6a628bf217L,0x77705a94154e490dL,
+ 0x9d96dd288d2d997aL },
+ { 0x77e2d9d8ce5d72c4L,0x9d06c5a4c11c714fL,0x02aa513679e4a03eL,
+ 0x1386b3c2030ff28bL } },
+ /* 41 << 203 */
+ { { 0xfe82e8a6fb283f61L,0x7df203e5f3abc3fbL,0xeec7c3513a4d3622L,
+ 0xf7d17dbfdf762761L },
+ { 0xc3956e44522055f0L,0xde3012db8fa748dbL,0xca9fcb63bf1dcc14L,
+ 0xa56d9dcfbe4e2f3aL } },
+ /* 42 << 203 */
+ { { 0xb86186b68bcec9c2L,0x7cf24df9680b9f06L,0xc46b45eac0d29281L,
+ 0xfff42bc507b10e12L },
+ { 0x12263c404d289427L,0x3d5f1899b4848ec4L,0x11f97010d040800cL,
+ 0xb4c5f529300feb20L } },
+ /* 43 << 203 */
+ { { 0xcc543f8fde94fdcbL,0xe96af739c7c2f05eL,0xaa5e0036882692e1L,
+ 0x09c75b68950d4ae9L },
+ { 0x62f63df2b5932a7aL,0x2658252ede0979adL,0x2a19343fb5e69631L,
+ 0x718c7501525b666bL } },
+ /* 44 << 203 */
+ { { 0x26a42d69ea40dc3aL,0xdc84ad22aecc018fL,0x25c36c7b3270f04aL,
+ 0x46ba6d4750fa72edL },
+ { 0x6c37d1c593e58a8eL,0xa2394731120c088cL,0xc3be4263cb6e86daL,
+ 0x2c417d367126d038L } },
+ /* 45 << 203 */
+ { { 0x5b70f9c58b6f8efaL,0x671a2faa37718536L,0xd3ced3c6b539c92bL,
+ 0xe56f1bd9a31203c2L },
+ { 0x8b096ec49ff3c8ebL,0x2deae43243491ceaL,0x2465c6eb17943794L,
+ 0x5d267e6620586843L } },
+ /* 46 << 203 */
+ { { 0x9d3d116db07159d0L,0xae07a67fc1896210L,0x8fc84d87bb961579L,
+ 0x30009e491c1f8dd6L },
+ { 0x8a8caf22e3132819L,0xcffa197cf23ab4ffL,0x58103a44205dd687L,
+ 0x57b796c30ded67a2L } },
+ /* 47 << 203 */
+ { { 0x0b9c3a6ca1779ad7L,0xa33cfe2e357c09c5L,0x2ea293153db4a57eL,
+ 0x919596958ebeb52eL },
+ { 0x118db9a6e546c879L,0x8e996df46295c8d6L,0xdd99048455ec806bL,
+ 0x24f291ca165c1035L } },
+ /* 48 << 203 */
+ { { 0xcca523bb440e2229L,0x324673a273ef4d04L,0xaf3adf343e11ec39L,
+ 0x6136d7f1dc5968d3L },
+ { 0x7a7b2899b053a927L,0x3eaa2661ae067ecdL,0x8549b9c802779cd9L,
+ 0x061d7940c53385eaL } },
+ /* 49 << 203 */
+ { { 0x3e0ba883f06d18bdL,0x4ba6de53b2700843L,0xb966b668591a9e4dL,
+ 0x93f675677f4fa0edL },
+ { 0x5a02711b4347237bL,0xbc041e2fe794608eL,0x55af10f570f73d8cL,
+ 0xd2d4d4f7bb7564f7L } },
+ /* 50 << 203 */
+ { { 0xd7d27a89b3e93ce7L,0xf7b5a8755d3a2c1bL,0xb29e68a0255b218aL,
+ 0xb533837e8af76754L },
+ { 0xd1b05a73579fab2eL,0xb41055a1ecd74385L,0xb2369274445e9115L,
+ 0x2972a7c4f520274eL } },
+ /* 51 << 203 */
+ { { 0x6c08334ef678e68aL,0x4e4160f099b057edL,0x3cfe11b852ccb69aL,
+ 0x2fd1823a21c8f772L },
+ { 0xdf7f072f3298f055L,0x8c0566f9fec74a6eL,0xe549e0195bb4d041L,
+ 0x7c3930ba9208d850L } },
+ /* 52 << 203 */
+ { { 0xe07141fcaaa2902bL,0x539ad799e4f69ad3L,0xa6453f94813f9ffdL,
+ 0xc58d3c48375bc2f7L },
+ { 0xb3326fad5dc64e96L,0x3aafcaa9b240e354L,0x1d1b0903aca1e7a9L,
+ 0x4ceb97671211b8a0L } },
+ /* 53 << 203 */
+ { { 0xeca83e49e32a858eL,0x4c32892eae907badL,0xd5b42ab62eb9b494L,
+ 0x7fde3ee21eabae1bL },
+ { 0x13b5ab09caf54957L,0xbfb028bee5f5d5d5L,0x928a06502003e2c0L,
+ 0x90793aac67476843L } },
+ /* 54 << 203 */
+ { { 0x5e942e79c81710a0L,0x557e4a3627ccadd4L,0x72a2bc564bcf6d0cL,
+ 0x09ee5f4326d7b80cL },
+ { 0x6b70dbe9d4292f19L,0x56f74c2663f16b18L,0xc23db0f735fbb42aL,
+ 0xb606bdf66ae10040L } },
+ /* 55 << 203 */
+ { { 0x1eb15d4d044573acL,0x7dc3cf86556b0ba4L,0x97af9a33c60df6f7L,
+ 0x0b1ef85ca716ce8cL },
+ { 0x2922f884c96958beL,0x7c32fa9435690963L,0x2d7f667ceaa00061L,
+ 0xeaaf7c173547365cL } },
+ /* 56 << 203 */
+ { { 0x1eb4de4687032d58L,0xc54f3d835e2c79e0L,0x07818df45d04ef23L,
+ 0x55faa9c8673d41b4L },
+ { 0xced64f6f89b95355L,0x4860d2eab7415c84L,0x5fdb9bd2050ebad3L,
+ 0xdb53e0cc6685a5bfL } },
+ /* 57 << 203 */
+ { { 0xb830c0319feb6593L,0xdd87f3106accff17L,0x2303ebab9f555c10L,
+ 0x94603695287e7065L },
+ { 0xf88311c32e83358cL,0x508dd9b4eefb0178L,0x7ca237062dba8652L,
+ 0x62aac5a30047abe5L } },
+ /* 58 << 203 */
+ { { 0x9a61d2a08b1ea7b3L,0xd495ab63ae8b1485L,0x38740f8487052f99L,
+ 0x178ebe5bb2974eeaL },
+ { 0x030bbcca5b36d17fL,0xb5e4cce3aaf86eeaL,0xb51a022068f8e9e0L,
+ 0xa434879609eb3e75L } },
+ /* 59 << 203 */
+ { { 0xbe592309eef1a752L,0x5d7162d76f2aa1edL,0xaebfb5ed0f007dd2L,
+ 0x255e14b2c89edd22L },
+ { 0xba85e0720303b697L,0xc5d17e25f05720ffL,0x02b58d6e5128ebb6L,
+ 0x2c80242dd754e113L } },
+ /* 60 << 203 */
+ { { 0x919fca5fabfae1caL,0x937afaac1a21459bL,0x9e0ca91c1f66a4d2L,
+ 0x194cc7f323ec1331L },
+ { 0xad25143a8aa11690L,0xbe40ad8d09b59e08L,0x37d60d9be750860aL,
+ 0x6c53b008c6bf434cL } },
+ /* 61 << 203 */
+ { { 0xb572415d1356eb80L,0xb8bf9da39578ded8L,0x22658e365e8fb38bL,
+ 0x9b70ce225af8cb22L },
+ { 0x7c00018a829a8180L,0x84329f93b81ed295L,0x7c343ea25f3cea83L,
+ 0x38f8655f67586536L } },
+ /* 62 << 203 */
+ { { 0xa661a0d01d3ec517L,0x98744652512321aeL,0x084ca591eca92598L,
+ 0xa9bb9dc91dcb3febL },
+ { 0x14c5435578b4c240L,0x5ed62a3b610cafdcL,0x07512f371b38846bL,
+ 0x571bb70ab0e38161L } },
+ /* 63 << 203 */
+ { { 0xb556b95b2da705d2L,0x3ef8ada6b1a08f98L,0x85302ca7ddecfbe5L,
+ 0x0e530573943105cdL },
+ { 0x60554d5521a9255dL,0x63a32fa1f2f3802aL,0x35c8c5b0cd477875L,
+ 0x97f458ea6ad42da1L } },
+ /* 64 << 203 */
+ { { 0x832d7080eb6b242dL,0xd30bd0233b71e246L,0x7027991bbe31139dL,
+ 0x68797e91462e4e53L },
+ { 0x423fe20a6b4e185aL,0x82f2c67e42d9b707L,0x25c817684cf7811bL,
+ 0xbd53005e045bb95dL } },
+ /* 0 << 210 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 210 */
+ { { 0xe5f649be9d8e68fdL,0xdb0f05331b044320L,0xf6fde9b3e0c33398L,
+ 0x92f4209b66c8cfaeL },
+ { 0xe9d1afcc1a739d4bL,0x09aea75fa28ab8deL,0x14375fb5eac6f1d0L,
+ 0x6420b560708f7aa5L } },
+ /* 2 << 210 */
+ { { 0x9eae499c6254dc41L,0x7e2939247a837e7eL,0x74aec08c090524a7L,
+ 0xf82b92198d6f55f2L },
+ { 0x493c962e1402cec5L,0x9f17ca17fa2f30e7L,0xbcd783e8e9b879cbL,
+ 0xea3d8c145a6f145fL } },
+ /* 3 << 210 */
+ { { 0xdede15e75e0dee6eL,0x74f24872dc628aa2L,0xd3e9c4fe7861bb93L,
+ 0x56d4822a6187b2e0L },
+ { 0xb66417cfc59826f9L,0xca2609692408169eL,0xedf69d06c79ef885L,
+ 0x00031f8adc7d138fL } },
+ /* 4 << 210 */
+ { { 0x103c46e60ebcf726L,0x4482b8316231470eL,0x6f6dfaca487c2109L,
+ 0x2e0ace9762e666efL },
+ { 0x3246a9d31f8d1f42L,0x1b1e83f1574944d2L,0x13dfa63aa57f334bL,
+ 0x0cf8daed9f025d81L } },
+ /* 5 << 210 */
+ { { 0x30d78ea800ee11c1L,0xeb053cd4b5e3dd75L,0x9b65b13ed58c43c5L,
+ 0xc3ad49bdbd151663L },
+ { 0x99fd8e41b6427990L,0x12cf15bd707eae1eL,0x29ad4f1b1aabb71eL,
+ 0x5143e74d07545d0eL } },
+ /* 6 << 210 */
+ { { 0x30266336c88bdee1L,0x25f293065876767cL,0x9c078571c6731996L,
+ 0xc88690b2ed552951L },
+ { 0x274f2c2d852705b4L,0xb0bf8d444e09552dL,0x7628beeb986575d1L,
+ 0x407be2387f864651L } },
+ /* 7 << 210 */
+ { { 0x0e5e3049a639fc6bL,0xe75c35d986003625L,0x0cf35bd85dcc1646L,
+ 0x8bcaced26c26273aL },
+ { 0xe22ecf1db5536742L,0x013dd8971a9e068bL,0x17f411cb8a7909c5L,
+ 0x5757ac98861dd506L } },
+ /* 8 << 210 */
+ { { 0x85de1f0d1e935abbL,0xdefd10b4154de37aL,0xb8d9e392369cebb5L,
+ 0x54d5ef9b761324beL },
+ { 0x4d6341ba74f17e26L,0xc0a0e3c878c1dde4L,0xa6d7758187d918fdL,
+ 0x6687601502ca3a13L } },
+ /* 9 << 210 */
+ { { 0xc7313e9cf36658f0L,0xc433ef1c71f8057eL,0x853262461b6a835aL,
+ 0xc8f053987c86394cL },
+ { 0xff398cdfe983c4a1L,0xbf5e816203b7b931L,0x93193c46b7b9045bL,
+ 0x1e4ebf5da4a6e46bL } },
+ /* 10 << 210 */
+ { { 0xf9942a6043a24fe7L,0x29c1191effb3492bL,0x9f662449902fde05L,
+ 0xc792a7ac6713c32dL },
+ { 0x2fd88ad8b737982cL,0x7e3a0319a21e60e3L,0x09b0de447383591aL,
+ 0x6df141ee8310a456L } },
+ /* 11 << 210 */
+ { { 0xaec1a039e6d6f471L,0x14b2ba0f1198d12eL,0xebc1a1603aeee5acL,
+ 0x401f4836e0b964ceL },
+ { 0x2ee437964fd03f66L,0x3fdb4e49dd8f3f12L,0x6ef267f629380f18L,
+ 0x3e8e96708da64d16L } },
+ /* 12 << 210 */
+ { { 0xbc19180c207674f1L,0x112e09a733ae8fdbL,0x996675546aaeb71eL,
+ 0x79432af1e101b1c7L },
+ { 0xd5eb558fde2ddec6L,0x81392d1f5357753fL,0xa7a76b973ae1158aL,
+ 0x416fbbff4a899991L } },
+ /* 13 << 210 */
+ { { 0x9e65fdfd0d4a9dcfL,0x7bc29e48944ddf12L,0xbc1a92d93c856866L,
+ 0x273c69056e98dfe2L },
+ { 0x69fce418cdfaa6b8L,0x606bd8235061c69fL,0x42d495a06af75e27L,
+ 0x8ed3d5056d873a1fL } },
+ /* 14 << 210 */
+ { { 0xaf5528416ab25b6aL,0xc6c0ffc72b1a4523L,0xab18827b21c99e03L,
+ 0x060e86489034691bL },
+ { 0x5207f90f93c7f398L,0x9f4a96cb82f8d10bL,0xdd71cd793ad0f9e3L,
+ 0x84f435d2fc3a54f5L } },
+ /* 15 << 210 */
+ { { 0x4b03c55b8e33787fL,0xef42f975a6384673L,0xff7304f75051b9f0L,
+ 0x18aca1dc741c87c2L },
+ { 0x56f120a72d4bfe80L,0xfd823b3d053e732cL,0x11bccfe47537ca16L,
+ 0xdf6c9c741b5a996bL } },
+ /* 16 << 210 */
+ { { 0xee7332c7904fc3faL,0x14a23f45c7e3636aL,0xc38659c3f091d9aaL,
+ 0x4a995e5db12d8540L },
+ { 0x20a53becf3a5598aL,0x56534b17b1eaa995L,0x9ed3dca4bf04e03cL,
+ 0x716c563ad8d56268L } },
+ /* 17 << 210 */
+ { { 0x27ba77a41d6178e7L,0xe4c80c4068a1ff8eL,0x750110990a13f63dL,
+ 0x7bf33521a61d46f3L },
+ { 0x0aff218e10b365bbL,0x810218040fd7ea75L,0x05a3fd8aa4b3a925L,
+ 0xb829e75f9b3db4e6L } },
+ /* 18 << 210 */
+ { { 0x6bdc75a54d53e5fbL,0x04a5dc02d52717e3L,0x86af502fe9a42ec2L,
+ 0x8867e8fb2630e382L },
+ { 0xbf845c6ebec9889bL,0x54f491f2cb47c98dL,0xa3091fba790c2a12L,
+ 0xd7f6fd78c20f708bL } },
+ /* 19 << 210 */
+ { { 0xa569ac30acde5e17L,0xd0f996d06852b4d7L,0xe51d4bb54609ae54L,
+ 0x3fa37d170daed061L },
+ { 0x62a8868434b8fb41L,0x99a2acbd9efb64f1L,0xb75c1a5e6448e1f2L,
+ 0xfa99951a42b5a069L } },
+ /* 20 << 210 */
+ { { 0x6d956e892f3b26e7L,0xf4709860da875247L,0x3ad151792482dda3L,
+ 0xd64110e3017d82f0L },
+ { 0x14928d2cfad414e4L,0x2b155f582ed02b24L,0x481a141bcb821bf1L,
+ 0x12e3c7704f81f5daL } },
+ /* 21 << 210 */
+ { { 0xe49c5de59fff8381L,0x110532325bbec894L,0xa0d051cc454d88c4L,
+ 0x4f6db89c1f8e531bL },
+ { 0x34fe3fd6ca563a44L,0x7f5c221558da8ab9L,0x8445016d9474f0a1L,
+ 0x17d34d61cb7d8a0aL } },
+ /* 22 << 210 */
+ { { 0x8e9d39101c474019L,0xcaff2629d52ceefbL,0xf9cf3e32c1622c2bL,
+ 0xd4b95e3ce9071a05L },
+ { 0xfbbca61f1594438cL,0x1eb6e6a604aadedfL,0x853027f468e14940L,
+ 0x221d322adfabda9cL } },
+ /* 23 << 210 */
+ { { 0xed8ea9f6b7cb179aL,0xdc7b764db7934dccL,0xfcb139405e09180dL,
+ 0x6629a6bfb47dc2ddL },
+ { 0xbfc55e4e9f5a915eL,0xb1db9d376204441eL,0xf82d68cf930c5f53L,
+ 0x17d3a142cbb605b1L } },
+ /* 24 << 210 */
+ { { 0xdd5944ea308780f2L,0xdc8de7613845f5e4L,0x6beaba7d7624d7a3L,
+ 0x1e709afd304df11eL },
+ { 0x9536437602170456L,0xbf204b3ac8f94b64L,0x4e53af7c5680ca68L,
+ 0x0526074ae0c67574L } },
+ /* 25 << 210 */
+ { { 0x95d8cef8ecd92af6L,0xe6b9fa7a6cd1745aL,0x3d546d3da325c3e4L,
+ 0x1f57691d9ae93aaeL },
+ { 0xe891f3fe9d2e1a33L,0xd430093fac063d35L,0xeda59b125513a327L,
+ 0xdc2134f35536f18fL } },
+ /* 26 << 210 */
+ { { 0xaa51fe2c5c210286L,0x3f68aaee1cab658cL,0x5a23a00bf9357292L,
+ 0x9a626f397efdabedL },
+ { 0xfe2b3bf3199d78e3L,0xb7a2af7771bbc345L,0x3d19827a1e59802cL,
+ 0x823bbc15b487a51cL } },
+ /* 27 << 210 */
+ { { 0x856139f299d0a422L,0x9ac3df65f456c6fbL,0xaddf65c6701f8bd6L,
+ 0x149f321e3758df87L },
+ { 0xb1ecf714721b7ebaL,0xe17df09831a3312aL,0xdb2fd6ecd5c4d581L,
+ 0xfd02996f8fcea1b3L } },
+ /* 28 << 210 */
+ { { 0xe29fa63e7882f14fL,0xc9f6dc3507c6cadcL,0x46f22d6fb882bed0L,
+ 0x1a45755bd118e52cL },
+ { 0x9f2c7c277c4608cfL,0x7ccbdf32568012c2L,0xfcb0aedd61729b0eL,
+ 0x7ca2ca9ef7d75dbfL } },
+ /* 29 << 210 */
+ { { 0xf58fecb16f640f62L,0xe274b92b39f51946L,0x7f4dfc046288af44L,
+ 0x0a91f32aeac329e5L },
+ { 0x43ad274bd6aaba31L,0x719a16400f6884f9L,0x685d29f6daf91e20L,
+ 0x5ec1cc3327e49d52L } },
+ /* 30 << 210 */
+ { { 0x38f4de963b54a059L,0x0e0015e5efbcfdb3L,0x177d23d94dbb8da6L,
+ 0x98724aa297a617adL },
+ { 0x30f0885bfdb6558eL,0xf9f7a28ac7899a96L,0xd2ae8ac8872dc112L,
+ 0xfa0642ca73c3c459L } },
+ /* 31 << 210 */
+ { { 0x15296981e7dfc8d6L,0x67cd44501fb5b94aL,0x0ec71cf10eddfd37L,
+ 0xc7e5eeb39a8eddc7L },
+ { 0x02ac8e3d81d95028L,0x0088f17270b0e35dL,0xec041fabe1881fe3L,
+ 0x62cf71b8d99e7faaL } },
+ /* 32 << 210 */
+ { { 0x5043dea7e0f222c2L,0x309d42ac72e65142L,0x94fe9ddd9216cd30L,
+ 0xd6539c7d0f87feecL },
+ { 0x03c5a57c432ac7d7L,0x72692cf0327fda10L,0xec28c85f280698deL,
+ 0x2331fb467ec283b1L } },
+ /* 33 << 210 */
+ { { 0xd34bfa322867e633L,0x78709a820a9cc815L,0xb7fe6964875e2fa5L,
+ 0x25cc064f9e98bfb5L },
+ { 0x9eb0151c493a65c5L,0x5fb5d94153182464L,0x69e6f130f04618e2L,
+ 0xa8ecec22f89c8ab6L } },
+ /* 34 << 210 */
+ { { 0xcd6ac88bb96209bdL,0x65fa8cdbb3e1c9e0L,0xa47d22f54a8d8eacL,
+ 0x83895cdf8d33f963L },
+ { 0xa8adca59b56cd3d1L,0x10c8350bdaf38232L,0x2b161fb3a5080a9fL,
+ 0xbe7f5c643af65b3aL } },
+ /* 35 << 210 */
+ { { 0x2c75403997403a11L,0x94626cf7121b96afL,0x431de7c46a983ec2L,
+ 0x3780dd3a52cc3df7L },
+ { 0xe28a0e462baf8e3bL,0xabe68aad51d299aeL,0x603eb8f9647a2408L,
+ 0x14c61ed65c750981L } },
+ /* 36 << 210 */
+ { { 0x88b34414c53352e7L,0x5a34889c1337d46eL,0x612c1560f95f2bc8L,
+ 0x8a3f8441d4807a3aL },
+ { 0x680d9e975224da68L,0x60cd6e88c3eb00e9L,0x3875a98e9a6bc375L,
+ 0xdc80f9244fd554c2L } },
+ /* 37 << 210 */
+ { { 0x6c4b34156ac77407L,0xa1e5ea8f25420681L,0x541bfa144607a458L,
+ 0x5dbc7e7a96d7fbf9L },
+ { 0x646a851b31590a47L,0x039e85ba15ee6df8L,0xd19fa231d7b43fc0L,
+ 0x84bc8be8299a0e04L } },
+ /* 38 << 210 */
+ { { 0x2b9d2936f20df03aL,0x240543828608d472L,0x76b6ba049149202aL,
+ 0xb21c38313670e7b7L },
+ { 0xddd93059d6fdee10L,0x9da47ad378488e71L,0x99cc1dfda0fcfb25L,
+ 0x42abde1064696954L } },
+ /* 39 << 210 */
+ { { 0x14cc15fc17eab9feL,0xd6e863e4d3e70972L,0x29a7765c6432112cL,
+ 0x886600015b0774d8L },
+ { 0x3729175a2c088eaeL,0x13afbcae8230b8d4L,0x44768151915f4379L,
+ 0xf086431ad8d22812L } },
+ /* 40 << 210 */
+ { { 0x37461955c298b974L,0x905fb5f0f8711e04L,0x787abf3afe969d18L,
+ 0x392167c26f6a494eL },
+ { 0xfc7a0d2d28c511daL,0xf127c7dcb66a262dL,0xf9c4bb95fd63fdf0L,
+ 0x900165893913ef46L } },
+ /* 41 << 210 */
+ { { 0x74d2a73c11aa600dL,0x2f5379bd9fb5ab52L,0xe49e53a47fb70068L,
+ 0x68dd39e5404aa9a7L },
+ { 0xb9b0cf572ecaa9c3L,0xba0e103be824826bL,0x60c2198b4631a3c4L,
+ 0xc5ff84abfa8966a2L } },
+ /* 42 << 210 */
+ { { 0x2d6ebe22ac95aff8L,0x1c9bb6dbb5a46d09L,0x419062da53ee4f8dL,
+ 0x7b9042d0bb97efefL },
+ { 0x0f87f080830cf6bdL,0x4861d19a6ec8a6c6L,0xd3a0daa1202f01aaL,
+ 0xb0111674f25afbd5L } },
+ /* 43 << 210 */
+ { { 0x6d00d6cf1afb20d9L,0x1369500040671bc5L,0x913ab0dc2485ea9bL,
+ 0x1f2bed069eef61acL },
+ { 0x850c82176d799e20L,0x93415f373271c2deL,0x5afb06e96c4f5910L,
+ 0x688a52dfc4e9e421L } },
+ /* 44 << 210 */
+ { { 0x30495ba3e2a9a6dbL,0x4601303d58f9268bL,0xbe3b0dad7eb0f04fL,
+ 0x4ea472504456936dL },
+ { 0x8caf8798d33fd3e7L,0x1ccd8a89eb433708L,0x9effe3e887fd50adL,
+ 0xbe240a566b29c4dfL } },
+ /* 45 << 210 */
+ { { 0xec4ffd98ca0e7ebdL,0xf586783ae748616eL,0xa5b00d8fc77baa99L,
+ 0x0acada29b4f34c9cL },
+ { 0x36dad67d0fe723acL,0x1d8e53a539c36c1eL,0xe4dd342d1f4bea41L,
+ 0x64fd5e35ebc9e4e0L } },
+ /* 46 << 210 */
+ { { 0x96f01f9057908805L,0xb5b9ea3d5ed480ddL,0x366c5dc23efd2dd0L,
+ 0xed2fe3056e9dfa27L },
+ { 0x4575e8926e9197e2L,0x11719c09ab502a5dL,0x264c7bece81f213fL,
+ 0x741b924155f5c457L } },
+ /* 47 << 210 */
+ { { 0x78ac7b6849a5f4f4L,0xf91d70a29fc45b7dL,0x39b05544b0f5f355L,
+ 0x11f06bceeef930d9L },
+ { 0xdb84d25d038d05e1L,0x04838ee5bacc1d51L,0x9da3ce869e8ee00bL,
+ 0xc3412057c36eda1fL } },
+ /* 48 << 210 */
+ { { 0xae80b91364d9c2f4L,0x7468bac3a010a8ffL,0xdfd2003737359d41L,
+ 0x1a0f5ab815efeaccL },
+ { 0x7c25ad2f659d0ce0L,0x4011bcbb6785cff1L,0x128b99127e2192c7L,
+ 0xa549d8e113ccb0e8L } },
+ /* 49 << 210 */
+ { { 0x805588d8c85438b1L,0x5680332dbc25cb27L,0xdcd1bc961a4bfdf4L,
+ 0x779ff428706f6566L },
+ { 0x8bbee998f059987aL,0xf6ce8cf2cc686de7L,0xf8ad3c4a953cfdb2L,
+ 0xd1d426d92205da36L } },
+ /* 50 << 210 */
+ { { 0xb3c0f13fc781a241L,0x3e89360ed75362a8L,0xccd05863c8a91184L,
+ 0x9bd0c9b7efa8a7f4L },
+ { 0x97ee4d538a912a4bL,0xde5e15f8bcf518fdL,0x6a055bf8c467e1e0L,
+ 0x10be4b4b1587e256L } },
+ /* 51 << 210 */
+ { { 0xd90c14f2668621c9L,0xd5518f51ab9c92c1L,0x8e6a0100d6d47b3cL,
+ 0xcbe980dd66716175L },
+ { 0x500d3f10ddd83683L,0x3b6cb35d99cac73cL,0x53730c8b6083d550L,
+ 0xcf159767df0a1987L } },
+ /* 52 << 210 */
+ { { 0x84bfcf5343ad73b3L,0x1b528c204f035a94L,0x4294edf733eeac69L,
+ 0xb6283e83817f3240L },
+ { 0xc3fdc9590a5f25b1L,0xefaf8aa55844ee22L,0xde269ba5dbdde4deL,
+ 0xe3347160c56133bfL } },
+ /* 53 << 210 */
+ { { 0xc11842198d9ea9f8L,0x090de5dbf3fc1ab5L,0x404c37b10bf22cdaL,
+ 0x7de20ec8f5618894L },
+ { 0x754c588eecdaecabL,0x6ca4b0ed88342743L,0x76f08bddf4a938ecL,
+ 0xd182de8991493ccbL } },
+ /* 54 << 210 */
+ { { 0xd652c53ec8a4186aL,0xb3e878db946d8e33L,0x088453c05f37663cL,
+ 0x5cd9daaab407748bL },
+ { 0xa1f5197f586d5e72L,0x47500be8c443ca59L,0x78ef35b2e2652424L,
+ 0x09c5d26f6dd7767dL } },
+ /* 55 << 210 */
+ { { 0x7175a79aa74d3f7bL,0x0428fd8dcf5ea459L,0x511cb97ca5d1746dL,
+ 0x36363939e71d1278L },
+ { 0xcf2df95510350bf4L,0xb381743960aae782L,0xa748c0e43e688809L,
+ 0x98021fbfd7a5a006L } },
+ /* 56 << 210 */
+ { { 0x9076a70c0e367a98L,0xbea1bc150f62b7c2L,0x2645a68c30fe0343L,
+ 0xacaffa78699dc14fL },
+ { 0xf4469964457bf9c4L,0x0db6407b0d2ead83L,0x68d56cadb2c6f3ebL,
+ 0x3b512e73f376356cL } },
+ /* 57 << 210 */
+ { { 0xe43b0e1ffce10408L,0x89ddc0035a5e257dL,0xb0ae0d120362e5b3L,
+ 0x07f983c7b0519161L },
+ { 0xc2e94d155d5231e7L,0xcff22aed0b4f9513L,0xb02588dd6ad0b0b5L,
+ 0xb967d1ac11d0dcd5L } },
+ /* 58 << 210 */
+ { { 0x8dac6bc6cf777b6cL,0x0062bdbd4c6d1959L,0x53da71b50ef5cc85L,
+ 0x07012c7d4006f14fL },
+ { 0x4617f962ac47800dL,0x53365f2bc102ed75L,0xb422efcb4ab8c9d3L,
+ 0x195cb26b34af31c9L } },
+ /* 59 << 210 */
+ { { 0x3a926e2905f2c4ceL,0xbd2bdecb9856966cL,0x5d16ab3a85527015L,
+ 0x9f81609e4486c231L },
+ { 0xd8b96b2cda350002L,0xbd054690fa1b7d36L,0xdc90ebf5e71d79bcL,
+ 0xf241b6f908964e4eL } },
+ /* 60 << 210 */
+ { { 0x7c8386432fe3cd4cL,0xe0f33acbb4bc633cL,0xb4a9ecec3d139f1fL,
+ 0x05ce69cddc4a1f49L },
+ { 0xa19d1b16f5f98aafL,0x45bb71d66f23e0efL,0x33789fcd46cdfdd3L,
+ 0x9b8e2978cee040caL } },
+ /* 61 << 210 */
+ { { 0x9c69b246ae0a6828L,0xba533d247078d5aaL,0x7a2e42c07bb4fbdbL,
+ 0xcfb4879a7035385cL },
+ { 0x8c3dd30b3281705bL,0x7e361c6c404fe081L,0x7b21649c3f604edfL,
+ 0x5dbf6a3fe52ffe47L } },
+ /* 62 << 210 */
+ { { 0xc41b7c234b54d9bfL,0x1374e6813511c3d9L,0x1863bf16c1b2b758L,
+ 0x90e785071e9e6a96L },
+ { 0xab4bf98d5d86f174L,0xd74e0bd385e96fe4L,0x8afde39fcac5d344L,
+ 0x90946dbcbd91b847L } },
+ /* 63 << 210 */
+ { { 0xf5b42358fe1a838cL,0x05aae6c5620ac9d8L,0x8e193bd8a1ce5a0bL,
+ 0x8f7105714dabfd72L },
+ { 0x8d8fdd48182caaacL,0x8c4aeefa040745cfL,0x73c6c30af3b93e6dL,
+ 0x991241f316f42011L } },
+ /* 64 << 210 */
+ { { 0xa0158eeae457a477L,0xd19857dbee6ddc05L,0xb326522418c41671L,
+ 0x3ffdfc7e3c2c0d58L },
+ { 0x3a3a525426ee7cdaL,0x341b0869df02c3a8L,0xa023bf42723bbfc8L,
+ 0x3d15002a14452691L } },
+ /* 0 << 217 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 217 */
+ { { 0x5ef7324c85edfa30L,0x2597655487d4f3daL,0x352f5bc0dcb50c86L,
+ 0x8f6927b04832a96cL },
+ { 0xd08ee1ba55f2f94cL,0x6a996f99344b45faL,0xe133cb8da8aa455dL,
+ 0x5d0721ec758dc1f7L } },
+ /* 2 << 217 */
+ { { 0x6ba7a92079e5fb67L,0xe1331feb70aa725eL,0x5080ccf57df5d837L,
+ 0xe4cae01d7ff72e21L },
+ { 0xd9243ee60412a77dL,0x06ff7cacdf449025L,0xbe75f7cd23ef5a31L,
+ 0xbc9578220ddef7a8L } },
+ /* 3 << 217 */
+ { { 0x8cf7230cb0ce1c55L,0x5b534d050bbfb607L,0xee1ef1130e16363bL,
+ 0x27e0aa7ab4999e82L },
+ { 0xce1dac2d79362c41L,0x67920c9091bb6cb0L,0x1e648d632223df24L,
+ 0x0f7d9eefe32e8f28L } },
+ /* 4 << 217 */
+ { { 0x6943f39afa833834L,0x22951722a6328562L,0x81d63dd54170fc10L,
+ 0x9f5fa58faecc2e6dL },
+ { 0xb66c8725e77d9a3bL,0x11235cea6384ebe0L,0x06a8c1185845e24aL,
+ 0x0137b286ebd093b1L } },
+ /* 5 << 217 */
+ { { 0xc589e1ce44ace150L,0xe0f8d3d94381e97cL,0x59e99b1162c5a4b8L,
+ 0x90d262f7fd0ec9f9L },
+ { 0xfbc854c9283e13c9L,0x2d04fde7aedc7085L,0x057d776547dcbecbL,
+ 0x8dbdf5919a76fa5fL } },
+ /* 6 << 217 */
+ { { 0xd01506950de1e578L,0x2e1463e7e9f72bc6L,0xffa684411b39eca5L,
+ 0x673c85307c037f2fL },
+ { 0xd0d6a600747f91daL,0xb08d43e1c9cb78e9L,0x0fc0c64427b5cef5L,
+ 0x5c1d160aa60a2fd6L } },
+ /* 7 << 217 */
+ { { 0xf98cae5328c8e13bL,0x375f10c4b2eddcd1L,0xd4eb8b7f5cce06adL,
+ 0xb4669f4580a2e1efL },
+ { 0xd593f9d05bbd8699L,0x5528a4c9e7976d13L,0x3923e0951c7e28d3L,
+ 0xb92937903f6bb577L } },
+ /* 8 << 217 */
+ { { 0xdb567d6ac42bd6d2L,0x6df86468bb1f96aeL,0x0efe5b1a4843b28eL,
+ 0x961bbb056379b240L },
+ { 0xb6caf5f070a6a26bL,0x70686c0d328e6e39L,0x80da06cf895fc8d3L,
+ 0x804d8810b363fdc9L } },
+ /* 9 << 217 */
+ { { 0xbe22877b207f1670L,0x9b0dd1884e615291L,0x625ae8dc97a3c2bfL,
+ 0x08584ef7439b86e8L },
+ { 0xde7190a5dcd898ffL,0x26286c402058ee3dL,0x3db0b2175f87b1c1L,
+ 0xcc334771102a6db5L } },
+ /* 10 << 217 */
+ { { 0xd99de9542f770fb1L,0x97c1c6204cd7535eL,0xd3b6c4483f09cefcL,
+ 0xd725af155a63b4f8L },
+ { 0x0c95d24fc01e20ecL,0xdfd374949ae7121fL,0x7d6ddb72ec77b7ecL,
+ 0xfe079d3b0353a4aeL } },
+ /* 11 << 217 */
+ { { 0x3066e70a2e6ac8d2L,0x9c6b5a43106e5c05L,0x52d3c6f5ede59b8cL,
+ 0x30d6a5c3fccec9aeL },
+ { 0xedec7c224fc0a9efL,0x190ff08395c16cedL,0xbe12ec8f94de0fdeL,
+ 0x0d131ab8852d3433L } },
+ /* 12 << 217 */
+ { { 0x42ace07e85701291L,0x94793ed9194061a8L,0x30e83ed6d7f4a485L,
+ 0x9eec7269f9eeff4dL },
+ { 0x90acba590c9d8005L,0x5feca4581e79b9d1L,0x8fbe54271d506a1eL,
+ 0xa32b2c8e2439cfa7L } },
+ /* 13 << 217 */
+ { { 0x1671c17373dd0b4eL,0x37a2821444a054c6L,0x81760a1b4e8b53f1L,
+ 0xa6c04224f9f93b9eL },
+ { 0x18784b34cf671e3cL,0x81bbecd2cda9b994L,0x38831979b2ab3848L,
+ 0xef54feb7f2e03c2dL } },
+ /* 14 << 217 */
+ { { 0xcf197ca7fb8088faL,0x014272474ddc96c5L,0xa2d2550a30777176L,
+ 0x534698984d0cf71dL },
+ { 0x6ce937b83a2aaac6L,0xe9f91dc35af38d9bL,0x2598ad83c8bf2899L,
+ 0x8e706ac9b5536c16L } },
+ /* 15 << 217 */
+ { { 0x40dc7495f688dc98L,0x26490cd7124c4afcL,0xe651ec841f18775cL,
+ 0x393ea6c3b4fdaf4aL },
+ { 0x1e1f33437f338e0dL,0x39fb832b6053e7b5L,0x46e702da619e14d5L,
+ 0x859cacd1cdeef6e0L } },
+ /* 16 << 217 */
+ { { 0x63b99ce74462007dL,0xb8ab48a54cb5f5b7L,0x9ec673d2f55edde7L,
+ 0xd1567f748cfaefdaL },
+ { 0x46381b6b0887bcecL,0x694497cee178f3c2L,0x5e6525e31e6266cbL,
+ 0x5931de26697d6413L } },
+ /* 17 << 217 */
+ { { 0x87f8df7c0e58d493L,0xb1ae5ed058b73f12L,0xc368f784dea0c34dL,
+ 0x9bd0a120859a91a0L },
+ { 0xb00d88b7cc863c68L,0x3a1cc11e3d1f4d65L,0xea38e0e70aa85593L,
+ 0x37f13e987dc4aee8L } },
+ /* 18 << 217 */
+ { { 0x10d38667bc947badL,0x738e07ce2a36ee2eL,0xc93470cdc577fcacL,
+ 0xdee1b6162782470dL },
+ { 0x36a25e672e793d12L,0xd6aa6caee0f186daL,0x474d0fd980e07af7L,
+ 0xf7cdc47dba8a5cd4L } },
+ /* 19 << 217 */
+ { { 0x28af6d9dab15247fL,0x7c789c10493a537fL,0x7ac9b11023a334e7L,
+ 0x0236ac0912c9c277L },
+ { 0xa7e5bd251d7a5144L,0x098b9c2af13ec4ecL,0x3639dacad3f0abcaL,
+ 0x642da81aa23960f9L } },
+ /* 20 << 217 */
+ { { 0x7d2e5c054f7269b1L,0xfcf30777e287c385L,0x10edc84ff2a46f21L,
+ 0x354417574f43fa36L },
+ { 0xf1327899fd703431L,0xa438d7a616dd587aL,0x65c34c57e9c8352dL,
+ 0xa728edab5cc5a24eL } },
+ /* 21 << 217 */
+ { { 0xaed78abc42531689L,0x0a51a0e8010963efL,0x5776fa0ad717d9b3L,
+ 0xf356c2397dd3428bL },
+ { 0x29903fff8d3a3dacL,0x409597fa3d94491fL,0x4cd7a5ffbf4a56a4L,
+ 0xe50964748adab462L } },
+ /* 22 << 217 */
+ { { 0xa97b51265c3427b0L,0x6401405cd282c9bdL,0x3629f8d7222c5c45L,
+ 0xb1c02c16e8d50aedL },
+ { 0xbea2ed75d9635bc9L,0x226790c76e24552fL,0x3c33f2a365f1d066L,
+ 0x2a43463e6dfccc2eL } },
+ /* 23 << 217 */
+ { { 0x8cc3453adb483761L,0xe7cc608565d5672bL,0x277ed6cbde3efc87L,
+ 0x19f2f36869234eafL },
+ { 0x9aaf43175c0b800bL,0x1f1e7c898b6da6e2L,0x6cfb4715b94ec75eL,
+ 0xd590dd5f453118c2L } },
+ /* 24 << 217 */
+ { { 0x14e49da11f17a34cL,0x5420ab39235a1456L,0xb76372412f50363bL,
+ 0x7b15d623c3fabb6eL },
+ { 0xa0ef40b1e274e49cL,0x5cf5074496b1860aL,0xd6583fbf66afe5a4L,
+ 0x44240510f47e3e9aL } },
+ /* 25 << 217 */
+ { { 0x9925434311b2d595L,0xf1367499eec8df57L,0x3cb12c613e73dd05L,
+ 0xd248c0337dac102aL },
+ { 0xcf154f13a77739f5L,0xbf4288cb23d2af42L,0xaa64c9b632e4a1cfL,
+ 0xee8c07a8c8a208f3L } },
+ /* 26 << 217 */
+ { { 0xe10d49996fe8393fL,0x0f809a3fe91f3a32L,0x61096d1c802f63c8L,
+ 0x289e146257750d3dL },
+ { 0xed06167e9889feeaL,0xd5c9c0e2e0993909L,0x46fca0d856508ac6L,
+ 0x918260474f1b8e83L } },
+ /* 27 << 217 */
+ { { 0x4f2c877a9a4a2751L,0x71bd0072cae6feadL,0x38df8dcc06aa1941L,
+ 0x5a074b4c63beeaa8L },
+ { 0xd6d65934c1cec8edL,0xa6ecb49eaabc03bdL,0xaade91c2de8a8415L,
+ 0xcfb0efdf691136e0L } },
+ /* 28 << 217 */
+ { { 0x11af45ee23ab3495L,0xa132df880b77463dL,0x8923c15c815d06f4L,
+ 0xc3ceb3f50d61a436L },
+ { 0xaf52291de88fb1daL,0xea0579741da12179L,0xb0d7218cd2fef720L,
+ 0x6c0899c98e1d8845L } },
+ /* 29 << 217 */
+ { { 0x98157504752ddad7L,0xd60bd74fa1a68a97L,0x7047a3a9f658fb99L,
+ 0x1f5d86d65f8511e4L },
+ { 0xb8a4bc424b5a6d88L,0x69eb2c331abefa7dL,0x95bf39e813c9c510L,
+ 0xf571960ad48aab43L } },
+ /* 30 << 217 */
+ { { 0x7e8cfbcf704e23c6L,0xc71b7d2228aaa65bL,0xa041b2bd245e3c83L,
+ 0x69b98834d21854ffL },
+ { 0x89d227a3963bfeecL,0x99947aaade7da7cbL,0x1d9ee9dbee68a9b1L,
+ 0x0a08f003698ec368L } },
+ /* 31 << 217 */
+ { { 0xe9ea409478ef2487L,0xc8d2d41502cfec26L,0xc52f9a6eb7dcf328L,
+ 0x0ed489e385b6a937L },
+ { 0x9b94986bbef3366eL,0x0de59c70edddddb8L,0xffdb748ceadddbe2L,
+ 0x9b9784bb8266ea40L } },
+ /* 32 << 217 */
+ { { 0x142b55021a93507aL,0xb4cd11878d3c06cfL,0xdf70e76a91ec3f40L,
+ 0x484e81ad4e7553c2L },
+ { 0x830f87b5272e9d6eL,0xea1c93e5c6ff514aL,0x67cc2adcc4192a8eL,
+ 0xc77e27e242f4535aL } },
+ /* 33 << 217 */
+ { { 0x9cdbab36d2b713c5L,0x86274ea0cf7b0cd3L,0x784680f309af826bL,
+ 0xbfcc837a0c72dea3L },
+ { 0xa8bdfe9dd6529b73L,0x708aa22863a88002L,0x6c7a9a54c91d45b9L,
+ 0xdf1a38bbfd004f56L } },
+ /* 34 << 217 */
+ { { 0x2e8c9a26b8bad853L,0x2d52cea33723eae7L,0x054d6d8156ca2830L,
+ 0xa3317d149a8dc411L },
+ { 0xa08662fefd4ddedaL,0xed2a153ab55d792bL,0x7035c16abfc6e944L,
+ 0xb6bc583400171cf3L } },
+ /* 35 << 217 */
+ { { 0xe27152b383d102b6L,0xfe695a470646b848L,0xa5bb09d8916e6d37L,
+ 0xb4269d640d17015eL },
+ { 0x8d8156a10a1d2285L,0xfeef6c5146d26d72L,0x9dac57c84c5434a7L,
+ 0x0282e5be59d39e31L } },
+ /* 36 << 217 */
+ { { 0xedfff181721c486dL,0x301baf10bc58824eL,0x8136a6aa00570031L,
+ 0x55aaf78c1cddde68L },
+ { 0x2682937159c63952L,0x3a3bd2748bc25bafL,0xecdf8657b7e52dc3L,
+ 0x2dd8c087fd78e6c8L } },
+ /* 37 << 217 */
+ { { 0x20553274f5531461L,0x8b4a12815d95499bL,0xe2c8763a1a80f9d2L,
+ 0xd1dbe32b4ddec758L },
+ { 0xaf12210d30c34169L,0xba74a95378baa533L,0x3d133c6ea438f254L,
+ 0xa431531a201bef5bL } },
+ /* 38 << 217 */
+ { { 0x15295e22f669d7ecL,0xca374f64357fb515L,0x8a8406ffeaa3fdb3L,
+ 0x106ae448df3f2da8L },
+ { 0x8f9b0a9033c8e9a1L,0x234645e271ad5885L,0x3d0832241c0aed14L,
+ 0xf10a7d3e7a942d46L } },
+ /* 39 << 217 */
+ { { 0x7c11deee40d5c9beL,0xb2bae7ffba84ed98L,0x93e97139aad58dddL,
+ 0x3d8727963f6d1fa3L },
+ { 0x483aca818569ff13L,0x8b89a5fb9a600f72L,0x4cbc27c3c06f2b86L,
+ 0x2213071363ad9c0bL } },
+ /* 40 << 217 */
+ { { 0xb5358b1e48ac2840L,0x18311294ecba9477L,0xda58f990a6946b43L,
+ 0x3098baf99ab41819L },
+ { 0x66c4c1584198da52L,0xab4fc17c146bfd1bL,0x2f0a4c3cbf36a908L,
+ 0x2ae9e34b58cf7838L } },
+ /* 41 << 217 */
+ { { 0xf411529e3fa11b1fL,0x21e43677974af2b4L,0x7c20958ec230793bL,
+ 0x710ea88516e840f3L },
+ { 0xfc0b21fcc5dc67cfL,0x08d5164788405718L,0xd955c21fcfe49eb7L,
+ 0x9722a5d556dd4a1fL } },
+ /* 42 << 217 */
+ { { 0xc9ef50e2c861baa5L,0xc0c21a5d9505ac3eL,0xaf6b9a338b7c063fL,
+ 0xc63703392f4779c1L },
+ { 0x22df99c7638167c3L,0xfe6ffe76795db30cL,0x2b822d33a4854989L,
+ 0xfef031dd30563aa5L } },
+ /* 43 << 217 */
+ { { 0x16b09f82d57c667fL,0xc70312cecc0b76f1L,0xbf04a9e6c9118aecL,
+ 0x82fcb4193409d133L },
+ { 0x1a8ab385ab45d44dL,0xfba07222617b83a3L,0xb05f50dd58e81b52L,
+ 0x1d8db55321ce5affL } },
+ /* 44 << 217 */
+ { { 0x3097b8d4e344a873L,0x7d8d116dfe36d53eL,0x6db22f587875e750L,
+ 0x2dc5e37343e144eaL },
+ { 0xc05f32e6e799eb95L,0xe9e5f4df6899e6ecL,0xbdc3bd681fab23d5L,
+ 0xb72b8ab773af60e6L } },
+ /* 45 << 217 */
+ { { 0x8db27ae02cecc84aL,0x600016d87bdb871cL,0x42a44b13d7c46f58L,
+ 0xb8919727c3a77d39L },
+ { 0xcfc6bbbddafd6088L,0x1a7401466bd20d39L,0x8c747abd98c41072L,
+ 0x4c91e765bdf68ea1L } },
+ /* 46 << 217 */
+ { { 0x7c95e5ca08819a78L,0xcf48b729c9587921L,0x091c7c5fdebbcc7dL,
+ 0x6f287404f0e05149L },
+ { 0xf83b5ac226cd44ecL,0x88ae32a6cfea250eL,0x6ac5047a1d06ebc5L,
+ 0xc7e550b4d434f781L } },
+ /* 47 << 217 */
+ { { 0x61ab1cf25c727bd2L,0x2e4badb11cf915b0L,0x1b4dadecf69d3920L,
+ 0xe61b1ca6f14c1dfeL },
+ { 0x90b479ccbd6bd51fL,0x8024e4018045ec30L,0xcab29ca325ef0e62L,
+ 0x4f2e941649e4ebc0L } },
+ /* 48 << 217 */
+ { { 0x45eb40ec0ccced58L,0x25cd4b9c0da44f98L,0x43e06458871812c6L,
+ 0x99f80d5516cef651L },
+ { 0x571340c9ce6dc153L,0x138d5117d8665521L,0xacdb45bc4e07014dL,
+ 0x2f34bb3884b60b91L } },
+ /* 49 << 217 */
+ { { 0xf44a4fd22ae8921eL,0xb039288e892ba1e2L,0x9da50174b1c180b2L,
+ 0x6b70ab661693dc87L },
+ { 0x7e9babc9e7057481L,0x4581ddef9c80dc41L,0x0c890da951294682L,
+ 0x0b5629d33f4736e5L } },
+ /* 50 << 217 */
+ { { 0x2340c79eb06f5b41L,0xa42e84ce4e243469L,0xf9a20135045a71a9L,
+ 0xefbfb415d27b6fb6L },
+ { 0x25ebea239d33cd6fL,0x9caedb88aa6c0af8L,0x53dc7e9ad9ce6f96L,
+ 0x3897f9fd51e0b15aL } },
+ /* 51 << 217 */
+ { { 0xf51cb1f88e5d788eL,0x1aec7ba8e1d490eeL,0x265991e0cc58cb3cL,
+ 0x9f306e8c9fc3ad31L },
+ { 0x5fed006e5040a0acL,0xca9d5043fb476f2eL,0xa19c06e8beea7a23L,
+ 0xd28658010edabb63L } },
+ /* 52 << 217 */
+ { { 0xdb92293f6967469aL,0x2894d8398d8a8ed8L,0x87c9e406bbc77122L,
+ 0x8671c6f12ea3a26aL },
+ { 0xe42df8d6d7de9853L,0x2e3ce346b1f2bcc7L,0xda601dfc899d50cfL,
+ 0xbfc913defb1b598fL } },
+ /* 53 << 217 */
+ { { 0x81c4909fe61f7908L,0x192e304f9bbc7b29L,0xc3ed8738c104b338L,
+ 0xedbe9e47783f5d61L },
+ { 0x0c06e9be2db30660L,0xda3e613fc0eb7d8eL,0xd8fa3e97322e096eL,
+ 0xfebd91e8d336e247L } },
+ /* 54 << 217 */
+ { { 0x8f13ccc4df655a49L,0xa9e00dfc5eb20210L,0x84631d0fc656b6eaL,
+ 0x93a058cdd8c0d947L },
+ { 0x6846904a67bd3448L,0x4a3d4e1af394fd5cL,0xc102c1a5db225f52L,
+ 0xe3455bbafc4f5e9aL } },
+ /* 55 << 217 */
+ { { 0x6b36985b4b9ad1ceL,0xa98185365bb7f793L,0x6c25e1d048b1a416L,
+ 0x1381dd533c81bee7L },
+ { 0xd2a30d617a4a7620L,0xc841292639b8944cL,0x3c1c6fbe7a97c33aL,
+ 0x941e541d938664e7L } },
+ /* 56 << 217 */
+ { { 0x417499e84a34f239L,0x15fdb83cb90402d5L,0xb75f46bf433aa832L,
+ 0xb61e15af63215db1L },
+ { 0xaabe59d4a127f89aL,0x5d541e0c07e816daL,0xaaba0659a618b692L,
+ 0x5532773317266026L } },
+ /* 57 << 217 */
+ { { 0xaf53a0fc95f57552L,0x329476506cacb0c9L,0x253ff58dc821be01L,
+ 0xb0309531a06f1146L },
+ { 0x59bbbdf505c2e54dL,0x158f27ad26e8dd22L,0xcc5b7ffb397e1e53L,
+ 0xae03f65b7fc1e50dL } },
+ /* 58 << 217 */
+ { { 0xa9784ebd9c95f0f9L,0x5ed9deb224640771L,0x31244af7035561c4L,
+ 0x87332f3a7ee857deL },
+ { 0x09e16e9e2b9e0d88L,0x52d910f456a06049L,0x507ed477a9592f48L,
+ 0x85cb917b2365d678L } },
+ /* 59 << 217 */
+ { { 0xf8511c934c8998d1L,0x2186a3f1730ea58fL,0x50189626b2029db0L,
+ 0x9137a6d902ceb75aL },
+ { 0x2fe17f37748bc82cL,0x87c2e93180469f8cL,0x850f71cdbf891aa2L,
+ 0x0ca1b89b75ec3d8dL } },
+ /* 60 << 217 */
+ { { 0x516c43aa5e1cd3cdL,0x893978089a887c28L,0x0059c699ddea1f9fL,
+ 0x7737d6fa8e6868f7L },
+ { 0x6d93746a60f1524bL,0x36985e55ba052aa7L,0x41b1d322ed923ea5L,
+ 0x3429759f25852a11L } },
+ /* 61 << 217 */
+ { { 0xbeca6ec3092e9f41L,0x3a238c6662256bbdL,0xd82958ea70ad487dL,
+ 0x4ac8aaf965610d93L },
+ { 0x3fa101b15e4ccab0L,0x9bf430f29de14bfbL,0xa10f5cc66531899dL,
+ 0x590005fbea8ce17dL } },
+ /* 62 << 217 */
+ { { 0xc437912f24544cb6L,0x9987b71ad79ac2e3L,0x13e3d9ddc058a212L,
+ 0x00075aacd2de9606L },
+ { 0x80ab508b6cac8369L,0x87842be7f54f6c89L,0xa7ad663d6bc532a4L,
+ 0x67813de778a91bc8L } },
+ /* 63 << 217 */
+ { { 0x5dcb61cec3427239L,0x5f3c7cf0c56934d9L,0xc079e0fbe3191591L,
+ 0xe40896bdb01aada7L },
+ { 0x8d4667910492d25fL,0x8aeb30c9e7408276L,0xe94374959287aaccL,
+ 0x23d4708d79fe03d4L } },
+ /* 64 << 217 */
+ { { 0x8cda9cf2d0c05199L,0x502fbc22fae78454L,0xc0bda9dff572a182L,
+ 0x5f9b71b86158b372L },
+ { 0xe0f33a592b82dd07L,0x763027359523032eL,0x7fe1a721c4505a32L,
+ 0x7b6e3e82f796409fL } },
+ /* 0 << 224 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 224 */
+ { { 0xe3417bc035d0b34aL,0x440b386b8327c0a7L,0x8fb7262dac0362d1L,
+ 0x2c41114ce0cdf943L },
+ { 0x2ba5cef1ad95a0b1L,0xc09b37a867d54362L,0x26d6cdd201e486c9L,
+ 0x20477abf42ff9297L } },
+ /* 2 << 224 */
+ { { 0xa004dcb3292a9287L,0xddc15cf677b092c7L,0x083a8464806c0605L,
+ 0x4a68df703db997b0L },
+ { 0x9c134e4505bf7dd0L,0xa4e63d398ccf7f8cL,0xa6e6517f41b5f8afL,
+ 0xaa8b9342ad7bc1ccL } },
+ /* 3 << 224 */
+ { { 0x126f35b51e706ad9L,0xb99cebb4c3a9ebdfL,0xa75389afbf608d90L,
+ 0x76113c4fc6c89858L },
+ { 0x80de8eb097e2b5aaL,0x7e1022cc63b91304L,0x3bdab6056ccc066cL,
+ 0x33cbb144b2edf900L } },
+ /* 4 << 224 */
+ { { 0xc41764717af715d2L,0xe2f7f594d0134a96L,0x2c1873efa41ec956L,
+ 0xe4e7b4f677821304L },
+ { 0xe5c8ff9788d5374aL,0x2b915e6380823d5bL,0xea6bc755b2ee8fe2L,
+ 0x6657624ce7112651L } },
+ /* 5 << 224 */
+ { { 0x157af101dace5acaL,0xc4fdbcf211a6a267L,0xdaddf340c49c8609L,
+ 0x97e49f52e9604a65L },
+ { 0x9be8e790937e2ad5L,0x846e2508326e17f1L,0x3f38007a0bbbc0dcL,
+ 0xcf03603fb11e16d6L } },
+ /* 6 << 224 */
+ { { 0xd6f800e07442f1d5L,0x475607d166e0e3abL,0x82807f16b7c64047L,
+ 0x8858e1e3a749883dL },
+ { 0x5859120b8231ee10L,0x1b80e7eb638a1eceL,0xcb72525ac6aa73a4L,
+ 0xa7cdea3d844423acL } },
+ /* 7 << 224 */
+ { { 0x5ed0c007f8ae7c38L,0x6db07a5c3d740192L,0xbe5e9c2a5fe36db3L,
+ 0xd5b9d57a76e95046L },
+ { 0x54ac32e78eba20f2L,0xef11ca8f71b9a352L,0x305e373eff98a658L,
+ 0xffe5a100823eb667L } },
+ /* 8 << 224 */
+ { { 0x57477b11e51732d2L,0xdfd6eb282538fc0eL,0x5c43b0cc3b39eec5L,
+ 0x6af12778cb36cc57L },
+ { 0x70b0852d06c425aeL,0x6df92f8c5c221b9bL,0x6c8d4f9ece826d9cL,
+ 0xf59aba7bb49359c3L } },
+ /* 9 << 224 */
+ { { 0x5c8ed8d5da64309dL,0x61a6de5691b30704L,0xd6b52f6a2f9b5808L,
+ 0x0eee419498c958a7L },
+ { 0xcddd9aab771e4caaL,0x83965dfd78bc21beL,0x02affce3b3b504f5L,
+ 0x30847a21561c8291L } },
+ /* 10 << 224 */
+ { { 0xd2eb2cf152bfda05L,0xe0e4c4e96197b98cL,0x1d35076cf8a1726fL,
+ 0x6c06085b2db11e3dL },
+ { 0x15c0c4d74463ba14L,0x9d292f830030238cL,0x1311ee8b3727536dL,
+ 0xfeea86efbeaedc1eL } },
+ /* 11 << 224 */
+ { { 0xb9d18cd366131e2eL,0xf31d974f80fe2682L,0xb6e49e0fe4160289L,
+ 0x7c48ec0b08e92799L },
+ { 0x818111d8d1989aa7L,0xb34fa0aaebf926f9L,0xdb5fe2f5a245474aL,
+ 0xf80a6ebb3c7ca756L } },
+ /* 12 << 224 */
+ { { 0xa7f96054afa05dd8L,0x26dfcf21fcaf119eL,0xe20ef2e30564bb59L,
+ 0xef4dca5061cb02b8L },
+ { 0xcda7838a65d30672L,0x8b08d534fd657e86L,0x4c5b439546d595c8L,
+ 0x39b58725425cb836L } },
+ /* 13 << 224 */
+ { { 0x8ea610593de9abe3L,0x404348819cdc03beL,0x9b261245cfedce8cL,
+ 0x78c318b4cf5234a1L },
+ { 0x510bcf16fde24c99L,0x2a77cb75a2c2ff5dL,0x9c895c2b27960fb4L,
+ 0xd30ce975b0eda42bL } },
+ /* 14 << 224 */
+ { { 0xfda853931a62cc26L,0x23c69b9650c0e052L,0xa227df15bfc633f3L,
+ 0x2ac788481bae7d48L },
+ { 0x487878f9187d073dL,0x6c2be919967f807dL,0x765861d8336e6d8fL,
+ 0x88b8974cce528a43L } },
+ /* 15 << 224 */
+ { { 0x09521177ff57d051L,0x2ff38037fb6a1961L,0xfc0aba74a3d76ad4L,
+ 0x7c76480325a7ec17L },
+ { 0x7532d75f48879bc8L,0xea7eacc058ce6bc1L,0xc82176b48e896c16L,
+ 0x9a30e0b22c750fedL } },
+ /* 16 << 224 */
+ { { 0xc37e2c2e421d3aa4L,0xf926407ce84fa840L,0x18abc03d1454e41cL,
+ 0x26605ecd3f7af644L },
+ { 0x242341a6d6a5eabfL,0x1edb84f4216b668eL,0xd836edb804010102L,
+ 0x5b337ce7945e1d8cL } },
+ /* 17 << 224 */
+ { { 0xd2075c77c055dc14L,0x2a0ffa2581d89cdfL,0x8ce815ea6ffdcbafL,
+ 0xa3428878fb648867L },
+ { 0x277699cf884655fbL,0xfa5b5bd6364d3e41L,0x01f680c6441e1cb7L,
+ 0x3fd61e66b70a7d67L } },
+ /* 18 << 224 */
+ { { 0x666ba2dccc78cf66L,0xb30181746fdbff77L,0x8d4dd0db168d4668L,
+ 0x259455d01dab3a2aL },
+ { 0xf58564c5cde3acecL,0x7714192513adb276L,0x527d725d8a303f65L,
+ 0x55deb6c9e6f38f7bL } },
+ /* 19 << 224 */
+ { { 0xfd5bb657b1fa70fbL,0xfa07f50fd8073a00L,0xf72e3aa7bca02500L,
+ 0xf68f895d9975740dL },
+ { 0x301120605cae2a6aL,0x01bd721802874842L,0x3d4238917ce47bd3L,
+ 0xa66663c1789544f6L } },
+ /* 20 << 224 */
+ { { 0x864d05d73272d838L,0xe22924f9fa6295c5L,0x8189593f6c2fda32L,
+ 0x330d7189b184b544L },
+ { 0x79efa62cbde1f714L,0x35771c94e5cb1a63L,0x2f4826b8641c8332L,
+ 0x00a894fbc8cee854L } },
+ /* 21 << 224 */
+ { { 0xb4b9a39b36194d40L,0xe857a7c577612601L,0xf4209dd24ecf2f58L,
+ 0x82b9e66d5a033487L },
+ { 0xc1e36934e4e8b9ddL,0xd2372c9da42377d7L,0x51dc94c70e3ae43bL,
+ 0x4c57761e04474f6fL } },
+ /* 22 << 224 */
+ { { 0xdcdacd0a1058a318L,0x369cf3f578053a9aL,0xc6c3de5031c68de2L,
+ 0x4653a5763c4b6d9fL },
+ { 0x1688dd5aaa4e5c97L,0x5be80aa1b7ab3c74L,0x70cefe7cbc65c283L,
+ 0x57f95f1306867091L } },
+ /* 23 << 224 */
+ { { 0xa39114e24415503bL,0xc08ff7c64cbb17e9L,0x1eff674dd7dec966L,
+ 0x6d4690af53376f63L },
+ { 0xff6fe32eea74237bL,0xc436d17ecd57508eL,0x15aa28e1edcc40feL,
+ 0x0d769c04581bbb44L } },
+ /* 24 << 224 */
+ { { 0xc240b6de34eaacdaL,0xd9e116e82ba0f1deL,0xcbe45ec779438e55L,
+ 0x91787c9d96f752d7L },
+ { 0x897f532bf129ac2fL,0xd307b7c85a36e22cL,0x91940675749fb8f3L,
+ 0xd14f95d0157fdb28L } },
+ /* 25 << 224 */
+ { { 0xfe51d0296ae55043L,0x8931e98f44a87de1L,0xe57f1cc609e4fee2L,
+ 0x0d063b674e072d92L },
+ { 0x70a998b9ed0e4316L,0xe74a736b306aca46L,0xecf0fbf24fda97c7L,
+ 0xa40f65cb3e178d93L } },
+ /* 26 << 224 */
+ { { 0x1625360416df4285L,0xb0c9babbd0c56ae2L,0x73032b19cfc5cfc3L,
+ 0xe497e5c309752056L },
+ { 0x12096bb4164bda96L,0x1ee42419a0b74da1L,0x8fc36243403826baL,
+ 0x0c8f0069dc09e660L } },
+ /* 27 << 224 */
+ { { 0x8667e981c27253c9L,0x05a6aefb92b36a45L,0xa62c4b369cb7bb46L,
+ 0x8394f37511f7027bL },
+ { 0x747bc79c5f109d0fL,0xcad88a765b8cc60aL,0x80c5a66b58f09e68L,
+ 0xe753d451f6127eacL } },
+ /* 28 << 224 */
+ { { 0xc44b74a15b0ec6f5L,0x47989fe45289b2b8L,0x745f848458d6fc73L,
+ 0xec362a6ff61c70abL },
+ { 0x070c98a7b3a8ad41L,0x73a20fc07b63db51L,0xed2c2173f44c35f4L,
+ 0x8a56149d9acc9dcaL } },
+ /* 29 << 224 */
+ { { 0x98f178819ac6e0f4L,0x360fdeafa413b5edL,0x0625b8f4a300b0fdL,
+ 0xf1f4d76a5b3222d3L },
+ { 0x9d6f5109587f76b8L,0x8b4ee08d2317fdb5L,0x88089bb78c68b095L,
+ 0x95570e9a5808d9b9L } },
+ /* 30 << 224 */
+ { { 0xa395c36f35d33ae7L,0x200ea12350bb5a94L,0x20c789bd0bafe84bL,
+ 0x243ef52d0919276aL },
+ { 0x3934c577e23ae233L,0xb93807afa460d1ecL,0xb72a53b1f8fa76a4L,
+ 0xd8914cb0c3ca4491L } },
+ /* 31 << 224 */
+ { { 0x2e1284943fb42622L,0x3b2700ac500907d5L,0xf370fb091a95ec63L,
+ 0xf8f30be231b6dfbdL },
+ { 0xf2b2f8d269e55f15L,0x1fead851cc1323e9L,0xfa366010d9e5eef6L,
+ 0x64d487b0e316107eL } },
+ /* 32 << 224 */
+ { { 0x4c076b86d23ddc82L,0x03fd344c7e0143f0L,0xa95362ff317af2c5L,
+ 0x0add3db7e18b7a4fL },
+ { 0x9c673e3f8260e01bL,0xfbeb49e554a1cc91L,0x91351bf292f2e433L,
+ 0xc755e7ec851141ebL } },
+ /* 33 << 224 */
+ { { 0xc9a9513929607745L,0x0ca07420a26f2b28L,0xcb2790e74bc6f9ddL,
+ 0x345bbb58adcaffc0L },
+ { 0xc65ea38cbe0f27a2L,0x67c24d7c641fcb56L,0x2c25f0a7a9e2c757L,
+ 0x93f5cdb016f16c49L } },
+ /* 34 << 224 */
+ { { 0x2ca5a9d7c5ee30a1L,0xd1593635b909b729L,0x804ce9f3dadeff48L,
+ 0xec464751b07c30c3L },
+ { 0x89d65ff39e49af6aL,0xf2d6238a6f3d01bcL,0x1095561e0bced843L,
+ 0x51789e12c8a13fd8L } },
+ /* 35 << 224 */
+ { { 0xd633f929763231dfL,0x46df9f7de7cbddefL,0x01c889c0cb265da8L,
+ 0xfce1ad10af4336d2L },
+ { 0x8d110df6fc6a0a7eL,0xdd431b986da425dcL,0xcdc4aeab1834aabeL,
+ 0x84deb1248439b7fcL } },
+ /* 36 << 224 */
+ { { 0x8796f1693c2a5998L,0x9b9247b47947190dL,0x55b9d9a511597014L,
+ 0x7e9dd70d7b1566eeL },
+ { 0x94ad78f7cbcd5e64L,0x0359ac179bd4c032L,0x3b11baaf7cc222aeL,
+ 0xa6a6e284ba78e812L } },
+ /* 37 << 224 */
+ { { 0x8392053f24cea1a0L,0xc97bce4a33621491L,0x7eb1db3435399ee9L,
+ 0x473f78efece81ad1L },
+ { 0x41d72fe0f63d3d0dL,0xe620b880afab62fcL,0x92096bc993158383L,
+ 0x41a213578f896f6cL } },
+ /* 38 << 224 */
+ { { 0x1b5ee2fac7dcfcabL,0x650acfde9546e007L,0xc081b749b1b02e07L,
+ 0xda9e41a0f9eca03dL },
+ { 0x013ba727175a54abL,0xca0cd190ea5d8d10L,0x85ea52c095fd96a9L,
+ 0x2c591b9fbc5c3940L } },
+ /* 39 << 224 */
+ { { 0x6fb4d4e42bad4d5fL,0xfa4c3590fef0059bL,0x6a10218af5122294L,
+ 0x9a78a81aa85751d1L },
+ { 0x04f20579a98e84e7L,0xfe1242c04997e5b5L,0xe77a273bca21e1e4L,
+ 0xfcc8b1ef9411939dL } },
+ /* 40 << 224 */
+ { { 0xe20ea30292d0487aL,0x1442dbec294b91feL,0x1f7a4afebb6b0e8fL,
+ 0x1700ef746889c318L },
+ { 0xf5bbffc370f1fc62L,0x3b31d4b669c79ccaL,0xe8bc2aaba7f6340dL,
+ 0xb0b08ab4a725e10aL } },
+ /* 41 << 224 */
+ { { 0x44f05701ae340050L,0xba4b30161cf0c569L,0x5aa29f83fbe19a51L,
+ 0x1b9ed428b71d752eL },
+ { 0x1666e54eeb4819f5L,0x616cdfed9e18b75bL,0x112ed5be3ee27b0bL,
+ 0xfbf2831944c7de4dL } },
+ /* 42 << 224 */
+ { { 0xd685ec85e0e60d84L,0x68037e301db7ee78L,0x5b65bdcd003c4d6eL,
+ 0x33e7363a93e29a6aL },
+ { 0x995b3a6108d0756cL,0xd727f85c2faf134bL,0xfac6edf71d337823L,
+ 0x99b9aa500439b8b4L } },
+ /* 43 << 224 */
+ { { 0x722eb104e2b4e075L,0x49987295437c4926L,0xb1e4c0e446a9b82dL,
+ 0xd0cb319757a006f5L },
+ { 0xf3de0f7dd7808c56L,0xb5c54d8f51f89772L,0x500a114aadbd31aaL,
+ 0x9afaaaa6295f6cabL } },
+ /* 44 << 224 */
+ { { 0x94705e2104cf667aL,0xfc2a811b9d3935d7L,0x560b02806d09267cL,
+ 0xf19ed119f780e53bL },
+ { 0xf0227c09067b6269L,0x967b85335caef599L,0x155b924368efeebcL,
+ 0xcd6d34f5c497bae6L } },
+ /* 45 << 224 */
+ { { 0x1dd8d5d36cceb370L,0x2aeac579a78d7bf9L,0x5d65017d70b67a62L,
+ 0x70c8e44f17c53f67L },
+ { 0xd1fc095086a34d09L,0xe0fca256e7134907L,0xe24fa29c80fdd315L,
+ 0x2c4acd03d87499adL } },
+ /* 46 << 224 */
+ { { 0xbaaf75173b5a9ba6L,0xb9cbe1f612e51a51L,0xd88edae35e154897L,
+ 0xe4309c3c77b66ca0L },
+ { 0xf5555805f67f3746L,0x85fc37baa36401ffL,0xdf86e2cad9499a53L,
+ 0x6270b2a3ecbc955bL } },
+ /* 47 << 224 */
+ { { 0xafae64f5974ad33bL,0x04d85977fe7b2df1L,0x2a3db3ff4ab03f73L,
+ 0x0b87878a8702740aL },
+ { 0x6d263f015a061732L,0xc25430cea32a1901L,0xf7ebab3ddb155018L,
+ 0x3a86f69363a9b78eL } },
+ /* 48 << 224 */
+ { { 0x349ae368da9f3804L,0x470f07fea164349cL,0xd52f4cc98562baa5L,
+ 0xc74a9e862b290df3L },
+ { 0xd3a1aa3543471a24L,0x239446beb8194511L,0xbec2dd0081dcd44dL,
+ 0xca3d7f0fc42ac82dL } },
+ /* 49 << 224 */
+ { { 0x1f3db085fdaf4520L,0xbb6d3e804549daf2L,0xf5969d8a19ad5c42L,
+ 0x7052b13ddbfd1511L },
+ { 0x11890d1b682b9060L,0xa71d3883ac34452cL,0xa438055b783805b4L,
+ 0x432412774725b23eL } },
+ /* 50 << 224 */
+ { { 0xf20cf96e4901bbedL,0x6419c710f432a2bbL,0x57a0fbb9dfa9cd7dL,
+ 0x589111e400daa249L },
+ { 0x19809a337b60554eL,0xea5f8887ede283a4L,0x2d713802503bfd35L,
+ 0x151bb0af585d2a53L } },
+ /* 51 << 224 */
+ { { 0x40b08f7443b30ca8L,0xe10b5bbad9934583L,0xe8a546d6b51110adL,
+ 0x1dd50e6628e0b6c5L },
+ { 0x292e9d54cff2b821L,0x3882555d47281760L,0x134838f83724d6e3L,
+ 0xf2c679e022ddcda1L } },
+ /* 52 << 224 */
+ { { 0x40ee88156d2a5768L,0x7f227bd21c1e7e2dL,0x487ba134d04ff443L,
+ 0x76e2ff3dc614e54bL },
+ { 0x36b88d6fa3177ec7L,0xbf731d512328fff5L,0x758caea249ba158eL,
+ 0x5ab8ff4c02938188L } },
+ /* 53 << 224 */
+ { { 0x33e1605635edc56dL,0x5a69d3497e940d79L,0x6c4fd00103866dcbL,
+ 0x20a38f574893cdefL },
+ { 0xfbf3e790fac3a15bL,0x6ed7ea2e7a4f8e6bL,0xa663eb4fbc3aca86L,
+ 0x22061ea5080d53f7L } },
+ /* 54 << 224 */
+ { { 0x2480dfe6f546783fL,0xd38bc6da5a0a641eL,0xfb093cd12ede8965L,
+ 0x89654db4acb455cfL },
+ { 0x413cbf9a26e1adeeL,0x291f3764373294d4L,0x00797257648083feL,
+ 0x25f504d3208cc341L } },
+ /* 55 << 224 */
+ { { 0x635a8e5ec3a0ee43L,0x70aaebca679898ffL,0x9ee9f5475dc63d56L,
+ 0xce987966ffb34d00L },
+ { 0xf9f86b195e26310aL,0x9e435484382a8ca8L,0x253bcb81c2352fe4L,
+ 0xa4eac8b04474b571L } },
+ /* 56 << 224 */
+ { { 0xc1b97512c1ad8cf8L,0x193b4e9e99e0b697L,0x939d271601e85df0L,
+ 0x4fb265b3cd44eafdL },
+ { 0x321e7dcde51e1ae2L,0x8e3a8ca6e3d8b096L,0x8de46cb052604998L,
+ 0x91099ad839072aa7L } },
+ /* 57 << 224 */
+ { { 0x2617f91c93aa96b8L,0x0fc8716b7fca2e13L,0xa7106f5e95328723L,
+ 0xd1c9c40b262e6522L },
+ { 0xb9bafe8642b7c094L,0x1873439d1543c021L,0xe1baa5de5cbefd5dL,
+ 0xa363fc5e521e8affL } },
+ /* 58 << 224 */
+ { { 0xefe6320df862eaacL,0x14419c6322c647dcL,0x0e06707c4e46d428L,
+ 0xcb6c834f4a178f8fL },
+ { 0x0f993a45d30f917cL,0xd4c4b0499879afeeL,0xb6142a1e70500063L,
+ 0x7c9b41c3a5d9d605L } },
+ /* 59 << 224 */
+ { { 0xbc00fc2f2f8ba2c7L,0x0966eb2f7c67aa28L,0x13f7b5165a786972L,
+ 0x3bfb75578a2fbba0L },
+ { 0x131c4f235a2b9620L,0xbff3ed276faf46beL,0x9b4473d17e172323L,
+ 0x421e8878339f6246L } },
+ /* 60 << 224 */
+ { { 0x0fa8587a25a41632L,0xc0814124a35b6c93L,0x2b18a9f559ebb8dbL,
+ 0x264e335776edb29cL },
+ { 0xaf245ccdc87c51e2L,0x16b3015b501e6214L,0xbb31c5600a3882ceL,
+ 0x6961bb94fec11e04L } },
+ /* 61 << 224 */
+ { { 0x3b825b8deff7a3a0L,0xbec33738b1df7326L,0x68ad747c99604a1fL,
+ 0xd154c9349a3bd499L },
+ { 0xac33506f1cc7a906L,0x73bb53926c560e8fL,0x6428fcbe263e3944L,
+ 0xc11828d51c387434L } },
+ /* 62 << 224 */
+ { { 0x3cd04be13e4b12ffL,0xc3aad9f92d88667cL,0xc52ddcf8248120cfL,
+ 0x985a892e2a389532L },
+ { 0xfbb4b21b3bb85fa0L,0xf95375e08dfc6269L,0xfb4fb06c7ee2aceaL,
+ 0x6785426e309c4d1fL } },
+ /* 63 << 224 */
+ { { 0x659b17c8d8ceb147L,0x9b649eeeb70a5554L,0x6b7fa0b5ac6bc634L,
+ 0xd99fe2c71d6e732fL },
+ { 0x30e6e7628d3abba2L,0x18fee6e7a797b799L,0x5c9d360dc696464dL,
+ 0xe3baeb4827bfde12L } },
+ /* 64 << 224 */
+ { { 0x2bf5db47f23206d5L,0x2f6d34201d260152L,0x17b876533f8ff89aL,
+ 0x5157c30c378fa458L },
+ { 0x7517c5c52d4fb936L,0xef22f7ace6518cdcL,0xdeb483e6bf847a64L,
+ 0xf508455892e0fa89L } },
+ /* 0 << 231 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 231 */
+ { { 0xab9659d8df7304d4L,0xb71bcf1bff210e8eL,0xa9a2438bd73fbd60L,
+ 0x4595cd1f5d11b4deL },
+ { 0x9c0d329a4835859dL,0x4a0f0d2d7dbb6e56L,0xc6038e5edf928a4eL,
+ 0xc94296218f5ad154L } },
+ /* 2 << 231 */
+ { { 0x91213462f23f2d92L,0x6cab71bd60b94078L,0x6bdd0a63176cde20L,
+ 0x54c9b20cee4d54bcL },
+ { 0x3cd2d8aa9f2ac02fL,0x03f8e617206eedb0L,0xc7f68e1693086434L,
+ 0x831469c592dd3db9L } },
+ /* 3 << 231 */
+ { { 0x8521df248f981354L,0x587e23ec3588a259L,0xcbedf281d7a0992cL,
+ 0x06930a5538961407L },
+ { 0x09320debbe5bbe21L,0xa7ffa5b52491817fL,0xe6c8b4d909065160L,
+ 0xac4f3992fff6d2a9L } },
+ /* 4 << 231 */
+ { { 0x7aa7a1583ae9c1bdL,0xe0af6d98e37ce240L,0xe54342d928ab38b4L,
+ 0xe8b750070a1c98caL },
+ { 0xefce86afe02358f2L,0x31b8b856ea921228L,0x052a19120a1c67fcL,
+ 0xb4069ea4e3aead59L } },
+ /* 5 << 231 */
+ { { 0x3232d6e27fa03cb3L,0xdb938e5b0fdd7d88L,0x04c1d2cd2ccbfc5dL,
+ 0xd2f45c12af3a580fL },
+ { 0x592620b57883e614L,0x5fd27e68be7c5f26L,0x139e45a91567e1e3L,
+ 0x2cc71d2d44d8aaafL } },
+ /* 6 << 231 */
+ { { 0x4a9090cde36d0757L,0xf722d7b1d9a29382L,0xfb7fb04c04b48ddfL,
+ 0x628ad2a7ebe16f43L },
+ { 0xcd3fbfb520226040L,0x6c34ecb15104b6c4L,0x30c0754ec903c188L,
+ 0xec336b082d23cab0L } },
+ /* 7 << 231 */
+ { { 0x473d62a21e206ee5L,0xf1e274808c49a633L,0x87ab956ce9f6b2c3L,
+ 0x61830b4862b606eaL },
+ { 0x67cd6846e78e815fL,0xfe40139f4c02082aL,0x52bbbfcb952ec365L,
+ 0x74c116426b9836abL } },
+ /* 8 << 231 */
+ { { 0x9f51439e558df019L,0x230da4baac712b27L,0x518919e355185a24L,
+ 0x4dcefcdd84b78f50L },
+ { 0xa7d90fb2a47d4c5aL,0x55ac9abfb30e009eL,0xfd2fc35974eed273L,
+ 0xb72d824cdbea8fafL } },
+ /* 9 << 231 */
+ { { 0xce721a744513e2caL,0x0b41861238240b2cL,0x05199968d5baa450L,
+ 0xeb1757ed2b0e8c25L },
+ { 0x6ebc3e283dfac6d5L,0xb2431e2e48a237f5L,0x2acb5e2352f61499L,
+ 0x5558a2a7e06c936bL } },
+ /* 10 << 231 */
+ { { 0xd213f923cbb13d1bL,0x98799f425bfb9bfeL,0x1ae8ddc9701144a9L,
+ 0x0b8b3bb64c5595eeL },
+ { 0x0ea9ef2e3ecebb21L,0x17cb6c4b3671f9a7L,0x47ef464f726f1d1fL,
+ 0x171b94846943a276L } },
+ /* 11 << 231 */
+ { { 0x51a4ae2d7ef0329cL,0x0850922291c4402aL,0x64a61d35afd45bbcL,
+ 0x38f096fe3035a851L },
+ { 0xc7468b74a1dec027L,0xe8cf10e74fc7dcbaL,0xea35ff40f4a06353L,
+ 0x0b4c0dfa8b77dd66L } },
+ /* 12 << 231 */
+ { { 0x779b8552de7e5c19L,0xfab28609c1c0256cL,0x64f58eeeabd4743dL,
+ 0x4e8ef8387b6cc93bL },
+ { 0xee650d264cb1bf3dL,0x4c1f9d0973dedf61L,0xaef7c9d7bfb70cedL,
+ 0x1ec0507e1641de1eL } },
+ /* 13 << 231 */
+ { { 0xcd7e5cc7cde45079L,0xde173c9a516ac9e4L,0x517a8494c170315cL,
+ 0x438fd90591d8e8fbL },
+ { 0x5145c506c7d9630bL,0x6457a87bf47d4d75L,0xd31646bf0d9a80e8L,
+ 0x453add2bcef3aabeL } },
+ /* 14 << 231 */
+ { { 0xc9941109a607419dL,0xfaa71e62bb6bca80L,0x34158c1307c431f3L,
+ 0x594abebc992bc47aL },
+ { 0x6dfea691eb78399fL,0x48aafb353f42cba4L,0xedcd65af077c04f0L,
+ 0x1a29a366e884491aL } },
+ /* 15 << 231 */
+ { { 0x023a40e51c21f2bfL,0xf99a513ca5057aeeL,0xa3fe7e25bcab072eL,
+ 0x8568d2e140e32bcfL },
+ { 0x904594ebd3f69d9fL,0x181a973307affab1L,0xe4d68d76b6e330f4L,
+ 0x87a6dafbc75a7fc1L } },
+ /* 16 << 231 */
+ { { 0x549db2b5ef7d9289L,0x2480d4a8197f015aL,0x61d5590bc40493b6L,
+ 0x3a55b52e6f780331L },
+ { 0x40eb8115309eadb0L,0xdea7de5a92e5c625L,0x64d631f0cc6a3d5aL,
+ 0x9d5e9d7c93e8dd61L } },
+ /* 17 << 231 */
+ { { 0xf297bef5206d3ffcL,0x23d5e0337d808bd4L,0x4a4f6912d24cf5baL,
+ 0xe4d8163b09cdaa8aL },
+ { 0x0e0de9efd3082e8eL,0x4fe1246c0192f360L,0x1f9001504b8eee0aL,
+ 0x5219da81f1da391bL } },
+ /* 18 << 231 */
+ { { 0x7bf6a5c1f7ea25aaL,0xd165e6bffbb07d5fL,0xe353936189e78671L,
+ 0xa3fcac892bac4219L },
+ { 0xdfab6fd4f0baa8abL,0x5a4adac1e2c1c2e5L,0x6cd75e3140d85849L,
+ 0xce263fea19b39181L } },
+ /* 19 << 231 */
+ { { 0xcb6803d307032c72L,0x7f40d5ce790968c8L,0xa6de86bddce978f0L,
+ 0x25547c4f368f751cL },
+ { 0xb1e685fd65fb2a9eL,0xce69336f1eb9179cL,0xb15d1c2712504442L,
+ 0xb7df465cb911a06bL } },
+ /* 20 << 231 */
+ { { 0xb8d804a3315980cdL,0x693bc492fa3bebf7L,0x3578aeee2253c504L,
+ 0x158de498cd2474a2L },
+ { 0x1331f5c7cfda8368L,0xd2d7bbb378d7177eL,0xdf61133af3c1e46eL,
+ 0x5836ce7dd30e7be8L } },
+ /* 21 << 231 */
+ { { 0x83084f1994f834cbL,0xd35653d4429ed782L,0xa542f16f59e58243L,
+ 0xc2b52f650470a22dL },
+ { 0xe3b6221b18f23d96L,0xcb05abac3f5252b4L,0xca00938b87d61402L,
+ 0x2f186cdd411933e4L } },
+ /* 22 << 231 */
+ { { 0xe042ece59a29a5c5L,0xb19b3c073b6c8402L,0xc97667c719d92684L,
+ 0xb5624622ebc66372L },
+ { 0x0cb96e653c04fa02L,0x83a7176c8eaa39aaL,0x2033561deaa1633fL,
+ 0x45a9d0864533df73L } },
+ /* 23 << 231 */
+ { { 0xe0542c1d3dc090bcL,0x82c996efaa59c167L,0xe3f735e80ee7fc4dL,
+ 0x7b1793937c35db79L },
+ { 0xb6419e25f8c5dbfdL,0x4d9d7a1e1f327b04L,0x979f6f9b298dfca8L,
+ 0xc7c5dff18de9366aL } },
+ /* 24 << 231 */
+ { { 0x1b7a588d04c82bddL,0x68005534f8319dfdL,0xde8a55b5d8eb9580L,
+ 0x5ea886da8d5bca81L },
+ { 0xe8530a01252a0b4dL,0x1bffb4fe35eaa0a1L,0x2ad828b1d8e99563L,
+ 0x7de96ef595f9cd87L } },
+ /* 25 << 231 */
+ { { 0x4abb2d0cd77d970cL,0x03cfb933d33ef9cbL,0xb0547c018b211fe9L,
+ 0x2fe64809a56ed1c6L },
+ { 0xcb7d5624c2ac98ccL,0x2a1372c01a393e33L,0xc8d1ec1c29660521L,
+ 0xf3d31b04b37ac3e9L } },
+ /* 26 << 231 */
+ { { 0xa29ae9df5ece6e7cL,0x0603ac8f0facfb55L,0xcfe85b7adda233a5L,
+ 0xe618919fbd75f0b8L },
+ { 0xf555a3d299bf1603L,0x1f43afc9f184255aL,0xdcdaf341319a3e02L,
+ 0xd3b117ef03903a39L } },
+ /* 27 << 231 */
+ { { 0xe095da1365d1d131L,0x86f16367c37ad03eL,0x5f37389e462cd8ddL,
+ 0xc103fa04d67a60e6L },
+ { 0x57c34344f4b478f0L,0xce91edd8e117c98dL,0x001777b0231fc12eL,
+ 0x11ae47f2b207bccbL } },
+ /* 28 << 231 */
+ { { 0xd983cf8d20f8a242L,0x7aff5b1df22e1ad8L,0x68fd11d07fc4feb3L,
+ 0x5d53ae90b0f1c3e1L },
+ { 0x50fb7905ec041803L,0x85e3c97714404888L,0x0e67faedac628d8fL,
+ 0x2e8651506668532cL } },
+ /* 29 << 231 */
+ { { 0x15acaaa46a67a6b0L,0xf4cdee25b25cec41L,0x49ee565ae4c6701eL,
+ 0x2a04ca66fc7d63d8L },
+ { 0xeb105018ef0543fbL,0xf709a4f5d1b0d81dL,0x5b906ee62915d333L,
+ 0xf4a8741296f1f0abL } },
+ /* 30 << 231 */
+ { { 0xb6b82fa74d82f4c2L,0x90725a606804efb3L,0xbc82ec46adc3425eL,
+ 0xb7b805812787843eL },
+ { 0xdf46d91cdd1fc74cL,0xdc1c62cbe783a6c4L,0x59d1b9f31a04cbbaL,
+ 0xd87f6f7295e40764L } },
+ /* 31 << 231 */
+ { { 0x02b4cfc1317f4a76L,0x8d2703eb91036bceL,0x98206cc6a5e72a56L,
+ 0x57be9ed1cf53fb0fL },
+ { 0x09374571ef0b17acL,0x74b2655ed9181b38L,0xc8f80ea889935d0eL,
+ 0xc0d9e94291529936L } },
+ /* 32 << 231 */
+ { { 0x196860411e84e0e5L,0xa5db84d3aea34c93L,0xf9d5bb197073a732L,
+ 0xb8d2fe566bcfd7c0L },
+ { 0x45775f36f3eb82faL,0x8cb20cccfdff8b58L,0x1659b65f8374c110L,
+ 0xb8b4a422330c789aL } },
+ /* 33 << 231 */
+ { { 0x75e3c3ea6fe8208bL,0xbd74b9e4286e78feL,0x0be2e81bd7d93a1aL,
+ 0x7ed06e27dd0a5aaeL },
+ { 0x721f5a586be8b800L,0x428299d1d846db28L,0x95cb8e6b5be88ed3L,
+ 0xc3186b231c034e11L } },
+ /* 34 << 231 */
+ { { 0xa6312c9e8977d99bL,0xbe94433183f531e7L,0x8232c0c218d3b1d4L,
+ 0x617aae8be1247b73L },
+ { 0x40153fc4282aec3bL,0xc6063d2ff7b8f823L,0x68f10e583304f94cL,
+ 0x31efae74ee676346L } },
+ /* 35 << 231 */
+ { { 0xbadb6c6d40a9b97cL,0x14702c634f666256L,0xdeb954f15184b2e3L,
+ 0x5184a52694b6ca40L },
+ { 0xfff05337003c32eaL,0x5aa374dd205974c7L,0x9a7638544b0dd71aL,
+ 0x459cd27fdeb947ecL } },
+ /* 36 << 231 */
+ { { 0xa6e28161459c2b92L,0x2f020fa875ee8ef5L,0xb132ec2d30b06310L,
+ 0xc3e15899bc6a4530L },
+ { 0xdc5f53feaa3f451aL,0x3a3c7f23c2d9acacL,0x2ec2f8926b27e58bL,
+ 0x68466ee7d742799fL } },
+ /* 37 << 231 */
+ { { 0x98324dd41fa26613L,0xa2dc6dabbdc29d63L,0xf9675faad712d657L,
+ 0x813994be21fd8d15L },
+ { 0x5ccbb722fd4f7553L,0x5135ff8bf3a36b20L,0x44be28af69559df5L,
+ 0x40b65bed9d41bf30L } },
+ /* 38 << 231 */
+ { { 0xd98bf2a43734e520L,0x5e3abbe3209bdcbaL,0x77c76553bc945b35L,
+ 0x5331c093c6ef14aaL },
+ { 0x518ffe2976b60c80L,0x2285593b7ace16f8L,0xab1f64ccbe2b9784L,
+ 0xe8f2c0d9ab2421b6L } },
+ /* 39 << 231 */
+ { { 0x617d7174c1df065cL,0xafeeb5ab5f6578faL,0x16ff1329263b54a8L,
+ 0x45c55808c990dce3L },
+ { 0x42eab6c0ecc8c177L,0x799ea9b55982ecaaL,0xf65da244b607ef8eL,
+ 0x8ab226ce32a3fc2cL } },
+ /* 40 << 231 */
+ { { 0x745741e57ea973dcL,0x5c00ca7020888f2eL,0x7cdce3cf45fd9cf1L,
+ 0x8a741ef15507f872L },
+ { 0x47c51c2f196b4cecL,0x70d08e43c97ea618L,0x930da15c15b18a2bL,
+ 0x33b6c6782f610514L } },
+ /* 41 << 231 */
+ { { 0xc662e4f807ac9794L,0x1eccf050ba06cb79L,0x1ff08623e7d954e5L,
+ 0x6ef2c5fb24cf71c3L },
+ { 0xb2c063d267978453L,0xa0cf37961d654af8L,0x7cb242ea7ebdaa37L,
+ 0x206e0b10b86747e0L } },
+ /* 42 << 231 */
+ { { 0x481dae5fd5ecfefcL,0x07084fd8c2bff8fcL,0x8040a01aea324596L,
+ 0x4c646980d4de4036L },
+ { 0x9eb8ab4ed65abfc3L,0xe01cb91f13541ec7L,0x8f029adbfd695012L,
+ 0x9ae284833c7569ecL } },
+ /* 43 << 231 */
+ { { 0xa5614c9ea66d80a1L,0x680a3e4475f5f911L,0x0c07b14dceba4fc1L,
+ 0x891c285ba13071c1L },
+ { 0xcac67ceb799ece3cL,0x29b910a941e07e27L,0x66bdb409f2e43123L,
+ 0x06f8b1377ac9ecbeL } },
+ /* 44 << 231 */
+ { { 0x5981fafd38547090L,0x19ab8b9f85e3415dL,0xfc28c194c7e31b27L,
+ 0x843be0aa6fbcbb42L },
+ { 0xf3b1ed43a6db836cL,0x2a1330e401a45c05L,0x4f19f3c595c1a377L,
+ 0xa85f39d044b5ee33L } },
+ /* 45 << 231 */
+ { { 0x3da18e6d4ae52834L,0x5a403b397423dcb0L,0xbb555e0af2374aefL,
+ 0x2ad599c41e8ca111L },
+ { 0x1b3a2fb9014b3bf8L,0x73092684f66d5007L,0x079f1426c4340102L,
+ 0x1827cf818fddf4deL } },
+ /* 46 << 231 */
+ { { 0xc83605f6f10ff927L,0xd387145123739fc6L,0x6d163450cac1c2ccL,
+ 0x6b521296a2ec1ac5L },
+ { 0x0606c4f96e3cb4a5L,0xe47d3f41778abff7L,0x425a8d5ebe8e3a45L,
+ 0x53ea9e97a6102160L } },
+ /* 47 << 231 */
+ { { 0x477a106e39cbb688L,0x532401d2f3386d32L,0x8e564f64b1b9b421L,
+ 0xca9b838881dad33fL },
+ { 0xb1422b4e2093913eL,0x533d2f9269bc8112L,0x3fa017beebe7b2c7L,
+ 0xb2767c4acaf197c6L } },
+ /* 48 << 231 */
+ { { 0xc925ff87aedbae9fL,0x7daf0eb936880a54L,0x9284ddf59c4d0e71L,
+ 0x1581cf93316f8cf5L },
+ { 0x3eeca8873ac1f452L,0xb417fce9fb6aeffeL,0xa5918046eefb8dc3L,
+ 0x73d318ac02209400L } },
+ /* 49 << 231 */
+ { { 0xe800400f728693e5L,0xe87d814b339927edL,0x93e94d3b57ea9910L,
+ 0xff8a35b62245fb69L },
+ { 0x043853d77f200d34L,0x470f1e680f653ce1L,0x81ac05bd59a06379L,
+ 0xa14052c203930c29L } },
+ /* 50 << 231 */
+ { { 0x6b72fab526bc2797L,0x13670d1699f16771L,0x001700521e3e48d1L,
+ 0x978fe401b7adf678L },
+ { 0x55ecfb92d41c5dd4L,0x5ff8e247c7b27da5L,0xe7518272013fb606L,
+ 0x5768d7e52f547a3cL } },
+ /* 51 << 231 */
+ { { 0xbb24eaa360017a5fL,0x6b18e6e49c64ce9bL,0xc225c655103dde07L,
+ 0xfc3672ae7592f7eaL },
+ { 0x9606ad77d06283a1L,0x542fc650e4d59d99L,0xabb57c492a40e7c2L,
+ 0xac948f13a8db9f55L } },
+ /* 52 << 231 */
+ { { 0x6d4c9682b04465c3L,0xe3d062fa6468bd15L,0xa51729ac5f318d7eL,
+ 0x1fc87df69eb6fc95L },
+ { 0x63d146a80591f652L,0xa861b8f7589621aaL,0x59f5f15ace31348cL,
+ 0x8f663391440da6daL } },
+ /* 53 << 231 */
+ { { 0xcfa778acb591ffa3L,0x027ca9c54cdfebceL,0xbe8e05a5444ea6b3L,
+ 0x8aab4e69a78d8254L },
+ { 0x2437f04fb474d6b8L,0x6597ffd4045b3855L,0xbb0aea4eca47ecaaL,
+ 0x568aae8385c7ebfcL } },
+ /* 54 << 231 */
+ { { 0x0e966e64c73b2383L,0x49eb3447d17d8762L,0xde1078218da05dabL,
+ 0x443d8baa016b7236L },
+ { 0x163b63a5ea7610d6L,0xe47e4185ce1ca979L,0xae648b6580baa132L,
+ 0xebf53de20e0d5b64L } },
+ /* 55 << 231 */
+ { { 0x8d3bfcb4d3c8c1caL,0x0d914ef35d04b309L,0x55ef64153de7d395L,
+ 0xbde1666f26b850e8L },
+ { 0xdbe1ca6ed449ab19L,0x8902b322e89a2672L,0xb1674b7edacb7a53L,
+ 0x8e9faf6ef52523ffL } },
+ /* 56 << 231 */
+ { { 0x6ba535da9a85788bL,0xd21f03aebd0626d4L,0x099f8c47e873dc64L,
+ 0xcda8564d018ec97eL },
+ { 0x3e8d7a5cde92c68cL,0x78e035a173323cc4L,0x3ef26275f880ff7cL,
+ 0xa4ee3dff273eedaaL } },
+ /* 57 << 231 */
+ { { 0x58823507af4e18f8L,0x967ec9b50672f328L,0x9ded19d9559d3186L,
+ 0x5e2ab3de6cdce39cL },
+ { 0xabad6e4d11c226dfL,0xf9783f4387723014L,0x9a49a0cf1a885719L,
+ 0xfc0c1a5a90da9dbfL } },
+ /* 58 << 231 */
+ { { 0x8bbaec49571d92acL,0x569e85fe4692517fL,0x8333b014a14ea4afL,
+ 0x32f2a62f12e5c5adL },
+ { 0x98c2ce3a06d89b85L,0xb90741aa2ff77a08L,0x2530defc01f795a2L,
+ 0xd6e5ba0b84b3c199L } },
+ /* 59 << 231 */
+ { { 0x7d8e845112e4c936L,0xae419f7dbd0be17bL,0xa583fc8c22262bc9L,
+ 0x6b842ac791bfe2bdL },
+ { 0x33cef4e9440d6827L,0x5f69f4deef81fb14L,0xf16cf6f6234fbb92L,
+ 0x76ae3fc3d9e7e158L } },
+ /* 60 << 231 */
+ { { 0x4e89f6c2e9740b33L,0x677bc85d4962d6a1L,0x6c6d8a7f68d10d15L,
+ 0x5f9a72240257b1cdL },
+ { 0x7096b9164ad85961L,0x5f8c47f7e657ab4aL,0xde57d7d0f7461d7eL,
+ 0x7eb6094d80ce5ee2L } },
+ /* 61 << 231 */
+ { { 0x0b1e1dfd34190547L,0x8a394f43f05dd150L,0x0a9eb24d97df44e6L,
+ 0x78ca06bf87675719L },
+ { 0x6f0b34626ffeec22L,0x9d91bcea36cdd8fbL,0xac83363ca105be47L,
+ 0x81ba76c1069710e3L } },
+ /* 62 << 231 */
+ { { 0x3d1b24cb28c682c6L,0x27f252288612575bL,0xb587c779e8e66e98L,
+ 0x7b0c03e9405eb1feL },
+ { 0xfdf0d03015b548e7L,0xa8be76e038b36af7L,0x4cdab04a4f310c40L,
+ 0x6287223ef47ecaecL } },
+ /* 63 << 231 */
+ { { 0x678e60558b399320L,0x61fe3fa6c01e4646L,0xc482866b03261a5eL,
+ 0xdfcf45b85c2f244aL },
+ { 0x8fab9a512f684b43L,0xf796c654c7220a66L,0x1d90707ef5afa58fL,
+ 0x2c421d974fdbe0deL } },
+ /* 64 << 231 */
+ { { 0xc4f4cda3af2ebc2fL,0xa0af843dcb4efe24L,0x53b857c19ccd10b1L,
+ 0xddc9d1eb914d3e04L },
+ { 0x7bdec8bb62771debL,0x829277aa91c5aa81L,0x7af18dd6832391aeL,
+ 0x1740f316c71a84caL } },
+ /* 0 << 238 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 238 */
+ { { 0x8928e99aeeaf8c49L,0xee7aa73d6e24d728L,0x4c5007c2e72b156cL,
+ 0x5fcf57c5ed408a1dL },
+ { 0x9f719e39b6057604L,0x7d343c01c2868bbfL,0x2cca254b7e103e2dL,
+ 0xe6eb38a9f131bea2L } },
+ /* 2 << 238 */
+ { { 0xb33e624f8be762b4L,0x2a9ee4d1058e3413L,0x968e636967d805faL,
+ 0x9848949b7db8bfd7L },
+ { 0x5308d7e5d23a8417L,0x892f3b1df3e29da5L,0xc95c139e3dee471fL,
+ 0x8631594dd757e089L } },
+ /* 3 << 238 */
+ { { 0xe0c82a3cde918dccL,0x2e7b599426fdcf4bL,0x82c5024932cb1b2dL,
+ 0xea613a9d7657ae07L },
+ { 0xc2eb5f6cf1fdc9f7L,0xb6eae8b8879fe682L,0x253dfee0591cbc7fL,
+ 0x000da7133e1290e6L } },
+ /* 4 << 238 */
+ { { 0x1083e2ea1f095615L,0x0a28ad7714e68c33L,0x6bfc02523d8818beL,
+ 0xb585113af35850cdL },
+ { 0x7d935f0b30df8aa1L,0xaddda07c4ab7e3acL,0x92c34299552f00cbL,
+ 0xc33ed1de2909df6cL } },
+ /* 5 << 238 */
+ { { 0x22c2195d80e87766L,0x9e99e6d89ddf4ac0L,0x09642e4e65e74934L,
+ 0x2610ffa2ff1ff241L },
+ { 0x4d1d47d4751c8159L,0x697b4985af3a9363L,0x0318ca4687477c33L,
+ 0xa90cb5659441eff3L } },
+ /* 6 << 238 */
+ { { 0x58bb384836f024cbL,0x85be1f7736016168L,0x6c59587cdc7e07f1L,
+ 0x191be071af1d8f02L },
+ { 0xbf169fa5cca5e55cL,0x3864ba3cf7d04eacL,0x915e367f8d7d05dbL,
+ 0xb48a876da6549e5dL } },
+ /* 7 << 238 */
+ { { 0xef89c656580e40a2L,0xf194ed8c728068bcL,0x74528045a47990c9L,
+ 0xf53fc7d75e1a4649L },
+ { 0xbec5ae9b78593e7dL,0x2cac4ee341db65d7L,0xa8c1eb2404a3d39bL,
+ 0x53b7d63403f8f3efL } },
+ /* 8 << 238 */
+ { { 0x2dc40d483e07113cL,0x6e4a5d397d8b63aeL,0x5582a94b79684c2bL,
+ 0x932b33d4622da26cL },
+ { 0xf534f6510dbbf08dL,0x211d07c964c23a52L,0x0eeece0fee5bdc9bL,
+ 0xdf178168f7015558L } },
+ /* 9 << 238 */
+ { { 0xd42946350a712229L,0x93cbe44809273f8cL,0x00b095ef8f13bc83L,
+ 0xbb7419728798978cL },
+ { 0x9d7309a256dbe6e7L,0xe578ec565a5d39ecL,0x3961151b851f9a31L,
+ 0x2da7715de5709eb4L } },
+ /* 10 << 238 */
+ { { 0x867f301753dfabf0L,0x728d2078b8e39259L,0x5c75a0cd815d9958L,
+ 0xf84867a616603be1L },
+ { 0xc865b13d70e35b1cL,0x0241446819b03e2cL,0xe46041daac1f3121L,
+ 0x7c9017ad6f028a7cL } },
+ /* 11 << 238 */
+ { { 0xabc96de90a482873L,0x4265d6b1b77e54d4L,0x68c38e79a57d88e7L,
+ 0xd461d7669ce82de3L },
+ { 0x817a9ec564a7e489L,0xcc5675cda0def5f2L,0x9a00e785985d494eL,
+ 0xc626833f1b03514aL } },
+ /* 12 << 238 */
+ { { 0xabe7905a83cdd60eL,0x50602fb5a1170184L,0x689886cdb023642aL,
+ 0xd568d090a6e1fb00L },
+ { 0x5b1922c70259217fL,0x93831cd9c43141e4L,0xdfca35870c95f86eL,
+ 0xdec2057a568ae828L } },
+ /* 13 << 238 */
+ { { 0xc44ea599f98a759aL,0x55a0a7a2f7c23c1dL,0xd5ffb6e694c4f687L,
+ 0x3563cce212848478L },
+ { 0x812b3517e7b1fbe1L,0x8a7dc9794f7338e0L,0x211ecee952d048dbL,
+ 0x2eea4056c86ea3b8L } },
+ /* 14 << 238 */
+ { { 0xd8cb68a7ba772b34L,0xe16ed3415f4e2541L,0x9b32f6a60fec14dbL,
+ 0xeee376f7391698beL },
+ { 0xe9a7aa1783674c02L,0x65832f975843022aL,0x29f3a8da5ba4990fL,
+ 0x79a59c3afb8e3216L } },
+ /* 15 << 238 */
+ { { 0x9cdc4d2ebd19bb16L,0xc6c7cfd0b3262d86L,0xd4ce14d0969c0b47L,
+ 0x1fa352b713e56128L },
+ { 0x383d55b8973db6d3L,0x71836850e8e5b7bfL,0xc7714596e6bb571fL,
+ 0x259df31f2d5b2dd2L } },
+ /* 16 << 238 */
+ { { 0x568f8925913cc16dL,0x18bc5b6de1a26f5aL,0xdfa413bef5f499aeL,
+ 0xf8835decc3f0ae84L },
+ { 0xb6e60bd865a40ab0L,0x65596439194b377eL,0xbcd8562592084a69L,
+ 0x5ce433b94f23ede0L } },
+ /* 17 << 238 */
+ { { 0xe8e8f04f6ad65143L,0x11511827d6e14af6L,0x3d390a108295c0c7L,
+ 0x71e29ee4621eba16L },
+ { 0xa588fc0963717b46L,0x02be02fee06ad4a2L,0x931558c604c22b22L,
+ 0xbb4d4bd612f3c849L } },
+ /* 18 << 238 */
+ { { 0x54a4f49620efd662L,0x92ba6d20c5952d14L,0x2db8ea1ecc9784c2L,
+ 0x81cc10ca4b353644L },
+ { 0x40b570ad4b4d7f6cL,0x5c9f1d9684a1dcd2L,0x01379f813147e797L,
+ 0xe5c6097b2bd499f5L } },
+ /* 19 << 238 */
+ { { 0x40dcafa6328e5e20L,0xf7b5244a54815550L,0xb9a4f11847bfc978L,
+ 0x0ea0e79fd25825b1L },
+ { 0xa50f96eb646c7ecfL,0xeb811493446dea9dL,0x2af04677dfabcf69L,
+ 0xbe3a068fc713f6e8L } },
+ /* 20 << 238 */
+ { { 0x860d523d42e06189L,0xbf0779414e3aff13L,0x0b616dcac1b20650L,
+ 0xe66dd6d12131300dL },
+ { 0xd4a0fd67ff99abdeL,0xc9903550c7aac50dL,0x022ecf8b7c46b2d7L,
+ 0x3333b1e83abf92afL } },
+ /* 21 << 238 */
+ { { 0x11cc113c6c491c14L,0x0597668880dd3f88L,0xf5b4d9e729d932edL,
+ 0xe982aad8a2c38b6dL },
+ { 0x6f9253478be0dcf0L,0x700080ae65ca53f2L,0xd8131156443ca77fL,
+ 0xe92d6942ec51f984L } },
+ /* 22 << 238 */
+ { { 0xd2a08af885dfe9aeL,0xd825d9a54d2a86caL,0x2c53988d39dff020L,
+ 0xf38b135a430cdc40L },
+ { 0x0c918ae062a7150bL,0xf31fd8de0c340e9bL,0xafa0e7ae4dbbf02eL,
+ 0x5847fb2a5eba6239L } },
+ /* 23 << 238 */
+ { { 0x6b1647dcdccbac8bL,0xb642aa7806f485c8L,0x873f37657038ecdfL,
+ 0x2ce5e865fa49d3feL },
+ { 0xea223788c98c4400L,0x8104a8cdf1fa5279L,0xbcf7cc7a06becfd7L,
+ 0x49424316c8f974aeL } },
+ /* 24 << 238 */
+ { { 0xc0da65e784d6365dL,0xbcb7443f8f759fb8L,0x35c712b17ae81930L,
+ 0x80428dff4c6e08abL },
+ { 0xf19dafefa4faf843L,0xced8538dffa9855fL,0x20ac409cbe3ac7ceL,
+ 0x358c1fb6882da71eL } },
+ /* 25 << 238 */
+ { { 0xafa9c0e5fd349961L,0x2b2cfa518421c2fcL,0x2a80db17f3a28d38L,
+ 0xa8aba5395d138e7eL },
+ { 0x52012d1d6e96eb8dL,0x65d8dea0cbaf9622L,0x57735447b264f56cL,
+ 0xbeebef3f1b6c8da2L } },
+ /* 26 << 238 */
+ { { 0xfc346d98ce785254L,0xd50e8d72bb64a161L,0xc03567c749794addL,
+ 0x15a76065752c7ef6L },
+ { 0x59f3a222961f23d6L,0x378e443873ecc0b0L,0xc74be4345a82fde4L,
+ 0xae509af2d8b9cf34L } },
+ /* 27 << 238 */
+ { { 0x4a61ee46577f44a1L,0xe09b748cb611deebL,0xc0481b2cf5f7b884L,
+ 0x3562667861acfa6bL },
+ { 0x37f4c518bf8d21e6L,0x22d96531b205a76dL,0x37fb85e1954073c0L,
+ 0xbceafe4f65b3a567L } },
+ /* 28 << 238 */
+ { { 0xefecdef7be42a582L,0xd3fc608065046be6L,0xc9af13c809e8dba9L,
+ 0x1e6c9847641491ffL },
+ { 0x3b574925d30c31f7L,0xb7eb72baac2a2122L,0x776a0dacef0859e7L,
+ 0x06fec31421900942L } },
+ /* 29 << 238 */
+ { { 0x2464bc10f8c22049L,0x9bfbcce7875ebf69L,0xd7a88e2a4336326bL,
+ 0xda05261c5bc2acfaL },
+ { 0xc29f5bdceba7efc8L,0x471237ca25dbbf2eL,0xa72773f22975f127L,
+ 0xdc744e8e04d0b326L } },
+ /* 30 << 238 */
+ { { 0x38a7ed16a56edb73L,0x64357e372c007e70L,0xa167d15b5080b400L,
+ 0x07b4116423de4be1L },
+ { 0xb2d91e3274c89883L,0x3c1628212882e7edL,0xad6b36ba7503e482L,
+ 0x48434e8e0ea34331L } },
+ /* 31 << 238 */
+ { { 0x79f4f24f2c7ae0b9L,0xc46fbf811939b44aL,0x76fefae856595eb1L,
+ 0x417b66abcd5f29c7L },
+ { 0x5f2332b2c5ceec20L,0xd69661ffe1a1cae2L,0x5ede7e529b0286e6L,
+ 0x9d062529e276b993L } },
+ /* 32 << 238 */
+ { { 0x324794b07e50122bL,0xdd744f8b4af07ca5L,0x30a12f08d63fc97bL,
+ 0x39650f1a76626d9dL },
+ { 0x101b47f71fa38477L,0x3d815f19d4dc124fL,0x1569ae95b26eb58aL,
+ 0xc3cde18895fb1887L } },
+ /* 33 << 238 */
+ { { 0x54e9f37bf9539a48L,0xb0100e067408c1a5L,0x821d9811ea580cbbL,
+ 0x8af52d3586e50c56L },
+ { 0xdfbd9d47dbbf698bL,0x2961a1ea03dc1c73L,0x203d38f8e76a5df8L,
+ 0x08a53a686def707aL } },
+ /* 34 << 238 */
+ { { 0x26eefb481bee45d4L,0xb3cee3463c688036L,0x463c5315c42f2469L,
+ 0x19d84d2e81378162L },
+ { 0x22d7c3c51c4d349fL,0x65965844163d59c5L,0xcf198c56b8abceaeL,
+ 0x6fb1fb1b628559d5L } },
+ /* 35 << 238 */
+ { { 0x8bbffd0607bf8fe3L,0x46259c583467734bL,0xd8953cea35f7f0d3L,
+ 0x1f0bece2d65b0ff1L },
+ { 0xf7d5b4b3f3c72914L,0x29e8ea953cb53389L,0x4a365626836b6d46L,
+ 0xe849f910ea174fdeL } },
+ /* 36 << 238 */
+ { { 0x7ec62fbbf4737f21L,0xd8dba5ab6209f5acL,0x24b5d7a9a5f9adbeL,
+ 0x707d28f7a61dc768L },
+ { 0x7711460bcaa999eaL,0xba7b174d1c92e4ccL,0x3c4bab6618d4bf2dL,
+ 0xb8f0c980eb8bd279L } },
+ /* 37 << 238 */
+ { { 0x024bea9a324b4737L,0xfba9e42332a83bcaL,0x6e635643a232dcedL,
+ 0x996193672571c8baL },
+ { 0xe8c9f35754b7032bL,0xf936b3ba2442d54aL,0x2263f0f08290c65aL,
+ 0x48989780ee2c7fdbL } },
+ /* 38 << 238 */
+ { { 0xadc5d55a13d4f95eL,0x737cff85ad9b8500L,0x271c557b8a73f43dL,
+ 0xbed617a4e18bc476L },
+ { 0x662454017dfd8ab2L,0xae7b89ae3a2870aaL,0x1b555f5323a7e545L,
+ 0x6791e247be057e4cL } },
+ /* 39 << 238 */
+ { { 0x860136ad324fa34dL,0xea1114474cbeae28L,0x023a4270bedd3299L,
+ 0x3d5c3a7fc1c35c34L },
+ { 0xb0f6db678d0412d2L,0xd92625e2fcdc6b9aL,0x92ae5ccc4e28a982L,
+ 0xea251c3647a3ce7eL } },
+ /* 40 << 238 */
+ { { 0x9d658932790691bfL,0xed61058906b736aeL,0x712c2f04c0d63b6eL,
+ 0x5cf06fd5c63d488fL },
+ { 0x97363facd9588e41L,0x1f9bf7622b93257eL,0xa9d1ffc4667acaceL,
+ 0x1cf4a1aa0a061ecfL } },
+ /* 41 << 238 */
+ { { 0x40e48a49dc1818d0L,0x0643ff39a3621ab0L,0x5768640ce39ef639L,
+ 0x1fc099ea04d86854L },
+ { 0x9130b9c3eccd28fdL,0xd743cbd27eec54abL,0x052b146fe5b475b6L,
+ 0x058d9a82900a7d1fL } },
+ /* 42 << 238 */
+ { { 0x65e0229291262b72L,0x96f924f9bb0edf03L,0x5cfa59c8fe206842L,
+ 0xf60370045eafa720L },
+ { 0x5f30699e18d7dd96L,0x381e8782cbab2495L,0x91669b46dd8be949L,
+ 0xb40606f526aae8efL } },
+ /* 43 << 238 */
+ { { 0x2812b839fc6751a4L,0x16196214fba800efL,0x4398d5ca4c1a2875L,
+ 0x720c00ee653d8349L },
+ { 0xc2699eb0d820007cL,0x880ee660a39b5825L,0x70694694471f6984L,
+ 0xf7d16ea8e3dda99aL } },
+ /* 44 << 238 */
+ { { 0x28d675b2c0519a23L,0x9ebf94fe4f6952e3L,0xf28bb767a2294a8aL,
+ 0x85512b4dfe0af3f5L },
+ { 0x18958ba899b16a0dL,0x95c2430cba7548a7L,0xb30d1b10a16be615L,
+ 0xe3ebbb9785bfb74cL } },
+ /* 45 << 238 */
+ { { 0xa3273cfe18549fdbL,0xf6e200bf4fcdb792L,0x54a76e1883aba56cL,
+ 0x73ec66f689ef6aa2L },
+ { 0x8d17add7d1b9a305L,0xa959c5b9b7ae1b9dL,0x886435226bcc094aL,
+ 0xcc5616c4d7d429b9L } },
+ /* 46 << 238 */
+ { { 0xa6dada01e6a33f7cL,0xc6217a079d4e70adL,0xd619a81809c15b7cL,
+ 0xea06b3290e80c854L },
+ { 0x174811cea5f5e7b9L,0x66dfc310787c65f4L,0x4ea7bd693316ab54L,
+ 0xc12c4acb1dcc0f70L } },
+ /* 47 << 238 */
+ { { 0xe4308d1a1e407dd9L,0xe8a3587c91afa997L,0xea296c12ab77b7a5L,
+ 0xb5ad49e4673c0d52L },
+ { 0x40f9b2b27006085aL,0xa88ff34087bf6ec2L,0x978603b14e3066a6L,
+ 0xb3f99fc2b5e486e2L } },
+ /* 48 << 238 */
+ { { 0x07b53f5eb2e63645L,0xbe57e54784c84232L,0xd779c2167214d5cfL,
+ 0x617969cd029a3acaL },
+ { 0xd17668cd8a7017a0L,0x77b4d19abe9b7ee8L,0x58fd0e939c161776L,
+ 0xa8c4f4efd5968a72L } },
+ /* 49 << 238 */
+ { { 0x296071cc67b3de77L,0xae3c0b8e634f7905L,0x67e440c28a7100c9L,
+ 0xbb8c3c1beb4b9b42L },
+ { 0x6d71e8eac51b3583L,0x7591f5af9525e642L,0xf73a2f7b13f509f3L,
+ 0x618487aa5619ac9bL } },
+ /* 50 << 238 */
+ { { 0x3a72e5f79d61718aL,0x00413bcc7592d28cL,0x7d9b11d3963c35cfL,
+ 0x77623bcfb90a46edL },
+ { 0xdeef273bdcdd2a50L,0x4a741f9b0601846eL,0x33b89e510ec6e929L,
+ 0xcb02319f8b7f22cdL } },
+ /* 51 << 238 */
+ { { 0xbbe1500d084bae24L,0x2f0ae8d7343d2693L,0xacffb5f27cdef811L,
+ 0xaa0c030a263fb94fL },
+ { 0x6eef0d61a0f442deL,0xf92e181727b139d3L,0x1ae6deb70ad8bc28L,
+ 0xa89e38dcc0514130L } },
+ /* 52 << 238 */
+ { { 0x81eeb865d2fdca23L,0x5a15ee08cc8ef895L,0x768fa10a01905614L,
+ 0xeff5b8ef880ee19bL },
+ { 0xf0c0cabbcb1c8a0eL,0x2e1ee9cdb8c838f9L,0x0587d8b88a4a14c0L,
+ 0xf6f278962ff698e5L } },
+ /* 53 << 238 */
+ { { 0xed38ef1c89ee6256L,0xf44ee1fe6b353b45L,0x9115c0c770e903b3L,
+ 0xc78ec0a1818f31dfL },
+ { 0x6c003324b7dccbc6L,0xd96dd1f3163bbc25L,0x33aa82dd5cedd805L,
+ 0x123aae4f7f7eb2f1L } },
+ /* 54 << 238 */
+ { { 0x1723fcf5a26262cdL,0x1f7f4d5d0060ebd5L,0xf19c5c01b2eaa3afL,
+ 0x2ccb9b149790accfL },
+ { 0x1f9c1cad52324aa6L,0x632005267247df54L,0x5732fe42bac96f82L,
+ 0x52fe771f01a1c384L } },
+ /* 55 << 238 */
+ { { 0x546ca13db1001684L,0xb56b4eeea1709f75L,0x266545a9d5db8672L,
+ 0xed971c901e8f3cfbL },
+ { 0x4e7d8691e3a07b29L,0x7570d9ece4b696b9L,0xdc5fa0677bc7e9aeL,
+ 0x68b44cafc82c4844L } },
+ /* 56 << 238 */
+ { { 0x519d34b3bf44da80L,0x283834f95ab32e66L,0x6e6087976278a000L,
+ 0x1e62960e627312f6L },
+ { 0x9b87b27be6901c55L,0x80e7853824fdbc1fL,0xbbbc09512facc27dL,
+ 0x06394239ac143b5aL } },
+ /* 57 << 238 */
+ { { 0x35bb4a40376c1944L,0x7cb6269463da1511L,0xafd29161b7148a3bL,
+ 0xa6f9d9ed4e2ea2eeL },
+ { 0x15dc2ca2880dd212L,0x903c3813a61139a9L,0x2aa7b46d6c0f8785L,
+ 0x36ce2871901c60ffL } },
+ /* 58 << 238 */
+ { { 0xc683b028e10d9c12L,0x7573baa2032f33d3L,0x87a9b1f667a31b58L,
+ 0xfd3ed11af4ffae12L },
+ { 0x83dcaa9a0cb2748eL,0x8239f0185d6fdf16L,0xba67b49c72753941L,
+ 0x2beec455c321cb36L } },
+ /* 59 << 238 */
+ { { 0x880156063f8b84ceL,0x764170838d38c86fL,0x054f1ca7598953ddL,
+ 0xc939e1104e8e7429L },
+ { 0x9b1ac2b35a914f2fL,0x39e35ed3e74b8f9cL,0xd0debdb2781b2fb0L,
+ 0x1585638f2d997ba2L } },
+ /* 60 << 238 */
+ { { 0x9c4b646e9e2fce99L,0x68a210811e80857fL,0x06d54e443643b52aL,
+ 0xde8d6d630d8eb843L },
+ { 0x7032156342146a0aL,0x8ba826f25eaa3622L,0x227a58bd86138787L,
+ 0x43b6c03c10281d37L } },
+ /* 61 << 238 */
+ { { 0x6326afbbb54dde39L,0x744e5e8adb6f2d5fL,0x48b2a99acff158e1L,
+ 0xa93c8fa0ef87918fL },
+ { 0x2182f956de058c5cL,0x216235d2936f9e7aL,0xace0c0dbd2e31e67L,
+ 0xc96449bff23ac3e7L } },
+ /* 62 << 238 */
+ { { 0x7e9a2874170693bdL,0xa28e14fda45e6335L,0x5757f6b356427344L,
+ 0x822e4556acf8edf9L },
+ { 0x2b7a6ee2e6a285cdL,0x5866f211a9df3af0L,0x40dde2ddf845b844L,
+ 0x986c3726110e5e49L } },
+ /* 63 << 238 */
+ { { 0x73680c2af7172277L,0x57b94f0f0cccb244L,0xbdff72672d438ca7L,
+ 0xbad1ce11cf4663fdL },
+ { 0x9813ed9dd8f71caeL,0xf43272a6961fdaa6L,0xbeff0119bd6d1637L,
+ 0xfebc4f9130361978L } },
+ /* 64 << 238 */
+ { { 0x02b37a952f41deffL,0x0e44a59ae63b89b7L,0x673257dc143ff951L,
+ 0x19c02205d752baf4L },
+ { 0x46c23069c4b7d692L,0x2e6392c3fd1502acL,0x6057b1a21b220846L,
+ 0xe51ff9460c1b5b63L } },
+ /* 0 << 245 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 245 */
+ { { 0x6e85cb51566c5c43L,0xcff9c9193597f046L,0x9354e90c4994d94aL,
+ 0xe0a393322147927dL },
+ { 0x8427fac10dc1eb2bL,0x88cfd8c22ff319faL,0xe2d4e68401965274L,
+ 0xfa2e067d67aaa746L } },
+ /* 2 << 245 */
+ { { 0xb6d92a7f3e5f9f11L,0x9afe153ad6cb3b8eL,0x4d1a6dd7ddf800bdL,
+ 0xf6c13cc0caf17e19L },
+ { 0x15f6c58e325fc3eeL,0x71095400a31dc3b2L,0x168e7c07afa3d3e7L,
+ 0x3f8417a194c7ae2dL } },
+ /* 3 << 245 */
+ { { 0xec234772813b230dL,0x634d0f5f17344427L,0x11548ab1d77fc56aL,
+ 0x7fab1750ce06af77L },
+ { 0xb62c10a74f7c4f83L,0xa7d2edc4220a67d9L,0x1c404170921209a0L,
+ 0x0b9815a0face59f0L } },
+ /* 4 << 245 */
+ { { 0x2842589b319540c3L,0x18490f59a283d6f8L,0xa2731f84daae9fcbL,
+ 0x3db6d960c3683ba0L },
+ { 0xc85c63bb14611069L,0xb19436af0788bf05L,0x905459df347460d2L,
+ 0x73f6e094e11a7db1L } },
+ /* 5 << 245 */
+ { { 0xdc7f938eb6357f37L,0xc5d00f792bd8aa62L,0xc878dcb92ca979fcL,
+ 0x37e83ed9eb023a99L },
+ { 0x6b23e2731560bf3dL,0x1086e4591d0fae61L,0x782483169a9414bdL,
+ 0x1b956bc0f0ea9ea1L } },
+ /* 6 << 245 */
+ { { 0x7b85bb91c31b9c38L,0x0c5aa90b48ef57b5L,0xdedeb169af3bab6fL,
+ 0xe610ad732d373685L },
+ { 0xf13870df02ba8e15L,0x0337edb68ca7f771L,0xe4acf747b62c036cL,
+ 0xd921d576b6b94e81L } },
+ /* 7 << 245 */
+ { { 0xdbc864392c422f7aL,0xfb635362ed348898L,0x83084668c45bfcd1L,
+ 0xc357c9e32b315e11L },
+ { 0xb173b5405b2e5b8cL,0x7e946931e102b9a4L,0x17c890eb7b0fb199L,
+ 0xec225a83d61b662bL } },
+ /* 8 << 245 */
+ { { 0xf306a3c8ee3c76cbL,0x3cf11623d32a1f6eL,0xe6d5ab646863e956L,
+ 0x3b8a4cbe5c005c26L },
+ { 0xdcd529a59ce6bb27L,0xc4afaa5204d4b16fL,0xb0624a267923798dL,
+ 0x85e56df66b307fabL } },
+ /* 9 << 245 */
+ { { 0x0281893c2bf29698L,0x91fc19a4d7ce7603L,0x75a5dca3ad9a558fL,
+ 0x40ceb3fa4d50bf77L },
+ { 0x1baf6060bc9ba369L,0x927e1037597888c2L,0xd936bf1986a34c07L,
+ 0xd4cf10c1c34ae980L } },
+ /* 10 << 245 */
+ { { 0x3a3e5334859dd614L,0x9c475b5b18d0c8eeL,0x63080d1f07cd51d5L,
+ 0xc9c0d0a6b88b4326L },
+ { 0x1ac98691c234296fL,0x2a0a83a494887fb6L,0x565114270cea9cf2L,
+ 0x5230a6e8a24802f5L } },
+ /* 11 << 245 */
+ { { 0xf7a2bf0f72e3d5c1L,0x377174464f21439eL,0xfedcbf259ce30334L,
+ 0xe0030a787ce202f9L },
+ { 0x6f2d9ebf1202e9caL,0xe79dde6c75e6e591L,0xf52072aff1dac4f8L,
+ 0x6c8d087ebb9b404dL } },
+ /* 12 << 245 */
+ { { 0xad0fc73dbce913afL,0x909e587b458a07cbL,0x1300da84d4f00c8aL,
+ 0x425cd048b54466acL },
+ { 0xb59cb9be90e9d8bfL,0x991616db3e431b0eL,0xd3aa117a531aecffL,
+ 0x91af92d359f4dc3bL } },
+ /* 13 << 245 */
+ { { 0x9b1ec292e93fda29L,0x76bb6c17e97d91bcL,0x7509d95faface1e6L,
+ 0x3653fe47be855ae3L },
+ { 0x73180b280f680e75L,0x75eefd1beeb6c26cL,0xa4cdf29fb66d4236L,
+ 0x2d70a9976b5821d8L } },
+ /* 14 << 245 */
+ { { 0x7a3ee20720445c36L,0x71d1ac8259877174L,0x0fc539f7949f73e9L,
+ 0xd05cf3d7982e3081L },
+ { 0x8758e20b7b1c7129L,0xffadcc20569e61f2L,0xb05d3a2f59544c2dL,
+ 0xbe16f5c19fff5e53L } },
+ /* 15 << 245 */
+ { { 0x73cf65b8aad58135L,0x622c2119037aa5beL,0x79373b3f646fd6a0L,
+ 0x0e029db50d3978cfL },
+ { 0x8bdfc43794fba037L,0xaefbd687620797a6L,0x3fa5382bbd30d38eL,
+ 0x7627cfbf585d7464L } },
+ /* 16 << 245 */
+ { { 0xb2330fef4e4ca463L,0xbcef72873566cc63L,0xd161d2cacf780900L,
+ 0x135dc5395b54827dL },
+ { 0x638f052e27bf1bc6L,0x10a224f007dfa06cL,0xe973586d6d3321daL,
+ 0x8b0c573826152c8fL } },
+ /* 17 << 245 */
+ { { 0x07ef4f2a34606074L,0x80fe7fe8a0f7047aL,0x3d1a8152e1a0e306L,
+ 0x32cf43d888da5222L },
+ { 0xbf89a95f5f02ffe6L,0x3d9eb9a4806ad3eaL,0x012c17bb79c8e55eL,
+ 0xfdcd1a7499c81dacL } },
+ /* 18 << 245 */
+ { { 0x7043178bb9556098L,0x4090a1df801c3886L,0x759800ff9b67b912L,
+ 0x3e5c0304232620c8L },
+ { 0x4b9d3c4b70dceecaL,0xbb2d3c15181f648eL,0xf981d8376e33345cL,
+ 0xb626289b0cf2297aL } },
+ /* 19 << 245 */
+ { { 0x766ac6598baebdcfL,0x1a28ae0975df01e5L,0xb71283da375876d8L,
+ 0x4865a96d607b9800L },
+ { 0x25dd1bcd237936b2L,0x332f4f4b60417494L,0xd0923d68370a2147L,
+ 0x497f5dfbdc842203L } },
+ /* 20 << 245 */
+ { { 0x9dc74cbd32be5e0fL,0x7475bcb717a01375L,0x438477c950d872b1L,
+ 0xcec67879ffe1d63dL },
+ { 0x9b006014d8578c70L,0xc9ad99a878bb6b8bL,0x6799008e11fb3806L,
+ 0xcfe81435cd44cab3L } },
+ /* 21 << 245 */
+ { { 0xa2ee15822f4fb344L,0xb8823450483fa6ebL,0x622d323d652c7749L,
+ 0xd8474a98beb0a15bL },
+ { 0xe43c154d5d1c00d0L,0x7fd581d90e3e7aacL,0x2b44c6192525ddf8L,
+ 0x67a033ebb8ae9739L } },
+ /* 22 << 245 */
+ { { 0x113ffec19ef2d2e4L,0x1bf6767ed5a0ea7fL,0x57fff75e03714c0aL,
+ 0xa23c422e0a23e9eeL },
+ { 0xdd5f6b2d540f83afL,0xc2c2c27e55ea46a7L,0xeb6b4246672a1208L,
+ 0xd13599f7ae634f7aL } },
+ /* 23 << 245 */
+ { { 0xcf914b5cd7b32c6eL,0x61a5a640eaf61814L,0x8dc3df8b208a1bbbL,
+ 0xef627fd6b6d79aa5L },
+ { 0x44232ffcc4c86bc8L,0xe6f9231b061539feL,0x1d04f25a958b9533L,
+ 0x180cf93449e8c885L } },
+ /* 24 << 245 */
+ { { 0x896895959884aaf7L,0xb1959be307b348a6L,0x96250e573c147c87L,
+ 0xae0efb3add0c61f8L },
+ { 0xed00745eca8c325eL,0x3c911696ecff3f70L,0x73acbc65319ad41dL,
+ 0x7b01a020f0b1c7efL } },
+ /* 25 << 245 */
+ { { 0xea32b29363a1483fL,0x89eabe717a248f96L,0x9c6231d3343157e5L,
+ 0x93a375e5df3c546dL },
+ { 0xe76e93436a2afe69L,0xc4f89100e166c88eL,0x248efd0d4f872093L,
+ 0xae0eb3ea8fe0ea61L } },
+ /* 26 << 245 */
+ { { 0xaf89790d9d79046eL,0x4d650f2d6cee0976L,0xa3935d9a43071ecaL,
+ 0x66fcd2c9283b0bfeL },
+ { 0x0e665eb5696605f1L,0xe77e5d07a54cd38dL,0x90ee050a43d950cfL,
+ 0x86ddebdad32e69b5L } },
+ /* 27 << 245 */
+ { { 0x6ad94a3dfddf7415L,0xf7fa13093f6e8d5aL,0xc4831d1de9957f75L,
+ 0x7de28501d5817447L },
+ { 0x6f1d70789e2aeb6bL,0xba2b9ff4f67a53c2L,0x36963767df9defc3L,
+ 0x479deed30d38022cL } },
+ /* 28 << 245 */
+ { { 0xd2edb89b3a8631e8L,0x8de855de7a213746L,0xb2056cb7b00c5f11L,
+ 0xdeaefbd02c9b85e4L },
+ { 0x03f39a8dd150892dL,0x37b84686218b7985L,0x36296dd8b7375f1aL,
+ 0x472cd4b1b78e898eL } },
+ /* 29 << 245 */
+ { { 0x15dff651e9f05de9L,0xd40450692ce98ba9L,0x8466a7ae9b38024cL,
+ 0xb910e700e5a6b5efL },
+ { 0xae1c56eab3aa8f0dL,0xbab2a5077eee74a6L,0x0dca11e24b4c4620L,
+ 0xfd896e2e4c47d1f4L } },
+ /* 30 << 245 */
+ { { 0xeb45ae53308fbd93L,0x46cd5a2e02c36fdaL,0x6a3d4e90baa48385L,
+ 0xdd55e62e9dbe9960L },
+ { 0xa1406aa02a81ede7L,0x6860dd14f9274ea7L,0xcfdcb0c280414f86L,
+ 0xff410b1022f94327L } },
+ /* 31 << 245 */
+ { { 0x5a33cc3849ad467bL,0xefb48b6c0a7335f1L,0x14fb54a4b153a360L,
+ 0x604aa9d2b52469ccL },
+ { 0x5e9dc486754e48e9L,0x693cb45537471e8eL,0xfb2fd7cd8d3b37b6L,
+ 0x63345e16cf09ff07L } },
+ /* 32 << 245 */
+ { { 0x9910ba6b23a5d896L,0x1fe19e357fe4364eL,0x6e1da8c39a33c677L,
+ 0x15b4488b29fd9fd0L },
+ { 0x1f4392541a1f22bfL,0x920a8a70ab8163e8L,0x3fd1b24907e5658eL,
+ 0xf2c4f79cb6ec839bL } },
+ /* 33 << 245 */
+ { { 0x1abbc3d04aa38d1bL,0x3b0db35cb5d9510eL,0x1754ac783e60dec0L,
+ 0x53272fd7ea099b33L },
+ { 0x5fb0494f07a8e107L,0x4a89e1376a8191faL,0xa113b7f63c4ad544L,
+ 0x88a2e9096cb9897bL } },
+ /* 34 << 245 */
+ { { 0x17d55de3b44a3f84L,0xacb2f34417c6c690L,0x3208816810232390L,
+ 0xf2e8a61f6c733bf7L },
+ { 0xa774aab69c2d7652L,0xfb5307e3ed95c5bcL,0xa05c73c24981f110L,
+ 0x1baae31ca39458c9L } },
+ /* 35 << 245 */
+ { { 0x1def185bcbea62e7L,0xe8ac9eaeeaf63059L,0x098a8cfd9921851cL,
+ 0xd959c3f13abe2f5bL },
+ { 0xa4f1952520e40ae5L,0x320789e307a24aa1L,0x259e69277392b2bcL,
+ 0x58f6c6671918668bL } },
+ /* 36 << 245 */
+ { { 0xce1db2bbc55d2d8bL,0x41d58bb7f4f6ca56L,0x7650b6808f877614L,
+ 0x905e16baf4c349edL },
+ { 0xed415140f661acacL,0x3b8784f0cb2270afL,0x3bc280ac8a402cbaL,
+ 0xd53f71460937921aL } },
+ /* 37 << 245 */
+ { { 0xc03c8ee5e5681e83L,0x62126105f6ac9e4aL,0x9503a53f936b1a38L,
+ 0x3d45e2d4782fecbdL },
+ { 0x69a5c43976e8ae98L,0xb53b2eebbfb4b00eL,0xf167471272386c89L,
+ 0x30ca34a24268bce4L } },
+ /* 38 << 245 */
+ { { 0x7f1ed86c78341730L,0x8ef5beb8b525e248L,0xbbc489fdb74fbf38L,
+ 0x38a92a0e91a0b382L },
+ { 0x7a77ba3f22433ccfL,0xde8362d6a29f05a9L,0x7f6a30ea61189afcL,
+ 0x693b550559ef114fL } },
+ /* 39 << 245 */
+ { { 0x50266bc0cd1797a1L,0xea17b47ef4b7af2dL,0xd6c4025c3df9483eL,
+ 0x8cbb9d9fa37b18c9L },
+ { 0x91cbfd9c4d8424cfL,0xdb7048f1ab1c3506L,0x9eaf641f028206a3L,
+ 0xf986f3f925bdf6ceL } },
+ /* 40 << 245 */
+ { { 0x262143b5224c08dcL,0x2bbb09b481b50c91L,0xc16ed709aca8c84fL,
+ 0xa6210d9db2850ca8L },
+ { 0x6d8df67a09cb54d6L,0x91eef6e0500919a4L,0x90f613810f132857L,
+ 0x9acede47f8d5028bL } },
+ /* 41 << 245 */
+ { { 0x844d1b7190b771c3L,0x563b71e4ba6426beL,0x2efa2e83bdb802ffL,
+ 0x3410cbabab5b4a41L },
+ { 0x555b2d2630da84ddL,0xd0711ae9ee1cc29aL,0xcf3e8c602f547792L,
+ 0x03d7d5dedc678b35L } },
+ /* 42 << 245 */
+ { { 0x071a2fa8ced806b8L,0x222e6134697f1478L,0xdc16fd5dabfcdbbfL,
+ 0x44912ebf121b53b8L },
+ { 0xac9436742496c27cL,0x8ea3176c1ffc26b0L,0xb6e224ac13debf2cL,
+ 0x524cc235f372a832L } },
+ /* 43 << 245 */
+ { { 0xd706e1d89f6f1b18L,0x2552f00544cce35bL,0x8c8326c2a88e31fcL,
+ 0xb5468b2cf9552047L },
+ { 0xce683e883ff90f2bL,0x77947bdf2f0a5423L,0xd0a1b28bed56e328L,
+ 0xaee35253c20134acL } },
+ /* 44 << 245 */
+ { { 0x7e98367d3567962fL,0x379ed61f8188bffbL,0x73bba348faf130a1L,
+ 0x6c1f75e1904ed734L },
+ { 0x189566423b4a79fcL,0xf20bc83d54ef4493L,0x836d425d9111eca1L,
+ 0xe5b5c318009a8dcfL } },
+ /* 45 << 245 */
+ { { 0x3360b25d13221bc5L,0x707baad26b3eeaf7L,0xd7279ed8743a95a1L,
+ 0x7450a875969e809fL },
+ { 0x32b6bd53e5d0338fL,0x1e77f7af2b883bbcL,0x90da12cc1063ecd0L,
+ 0xe2697b58c315be47L } },
+ /* 46 << 245 */
+ { { 0x2771a5bdda85d534L,0x53e78c1fff980eeaL,0xadf1cf84900385e7L,
+ 0x7d3b14f6c9387b62L },
+ { 0x170e74b0cb8f2bd2L,0x2d50b486827fa993L,0xcdbe8c9af6f32babL,
+ 0x55e906b0c3b93ab8L } },
+ /* 47 << 245 */
+ { { 0x747f22fc8fe280d1L,0xcd8e0de5b2e114abL,0x5ab7dbebe10b68b0L,
+ 0x9dc63a9ca480d4b2L },
+ { 0x78d4bc3b4be1495fL,0x25eb3db89359122dL,0x3f8ac05b0809cbdcL,
+ 0xbf4187bbd37c702fL } },
+ /* 48 << 245 */
+ { { 0x84cea0691416a6a5L,0x8f860c7943ef881cL,0x41311f8a38038a5dL,
+ 0xe78c2ec0fc612067L },
+ { 0x494d2e815ad73581L,0xb4cc9e0059604097L,0xff558aecf3612cbaL,
+ 0x35beef7a9e36c39eL } },
+ /* 49 << 245 */
+ { { 0x1845c7cfdbcf41b9L,0x5703662aaea997c0L,0x8b925afee402f6d8L,
+ 0xd0a1b1ae4dd72162L },
+ { 0x9f47b37503c41c4bL,0xa023829b0391d042L,0x5f5045c3503b8b0aL,
+ 0x123c268898c010e5L } },
+ /* 50 << 245 */
+ { { 0x324ec0cc36ba06eeL,0xface31153dd2cc0cL,0xb364f3bef333e91fL,
+ 0xef8aff7328e832b0L },
+ { 0x1e9bad042d05841bL,0x42f0e3df356a21e2L,0xa3270bcb4add627eL,
+ 0xb09a8158d322e711L } },
+ /* 51 << 245 */
+ { { 0x86e326a10fee104aL,0xad7788f83703f65dL,0x7e76543047bc4833L,
+ 0x6cee582b2b9b893aL },
+ { 0x9cd2a167e8f55a7bL,0xefbee3c6d9e4190dL,0x33ee7185d40c2e9dL,
+ 0x844cc9c5a380b548L } },
+ /* 52 << 245 */
+ { { 0x323f8ecd66926e04L,0x0001e38f8110c1baL,0x8dbcac12fc6a7f07L,
+ 0xd65e1d580cec0827L },
+ { 0xd2cd4141be76ca2dL,0x7895cf5ce892f33aL,0x956d230d367139d2L,
+ 0xa91abd3ed012c4c1L } },
+ /* 53 << 245 */
+ { { 0x34fa488387eb36bfL,0xc5f07102914b8fb4L,0x90f0e579adb9c95fL,
+ 0xfe6ea8cb28888195L },
+ { 0x7b9b5065edfa9284L,0x6c510bd22b8c8d65L,0xd7b8ebefcbe8aafdL,
+ 0xedb3af9896b1da07L } },
+ /* 54 << 245 */
+ { { 0x28ff779d6295d426L,0x0c4f6ac73fa3ad7bL,0xec44d0548b8e2604L,
+ 0x9b32a66d8b0050e1L },
+ { 0x1f943366f0476ce2L,0x7554d953a602c7b4L,0xbe35aca6524f2809L,
+ 0xb6881229fd4edbeaL } },
+ /* 55 << 245 */
+ { { 0xe8cd0c8f508efb63L,0x9eb5b5c86abcefc7L,0xf5621f5fb441ab4fL,
+ 0x79e6c046b76a2b22L },
+ { 0x74a4792ce37a1f69L,0xcbd252cb03542b60L,0x785f65d5b3c20bd3L,
+ 0x8dea61434fabc60cL } },
+ /* 56 << 245 */
+ { { 0x45e21446de673629L,0x57f7aa1e703c2d21L,0xa0e99b7f98c868c7L,
+ 0x4e42f66d8b641676L },
+ { 0x602884dc91077896L,0xa0d690cfc2c9885bL,0xfeb4da333b9a5187L,
+ 0x5f789598153c87eeL } },
+ /* 57 << 245 */
+ { { 0x2192dd4752b16dbaL,0xdeefc0e63524c1b1L,0x465ea76ee4383693L,
+ 0x79401711361b8d98L },
+ { 0xa5f9ace9f21a15cbL,0x73d26163efee9aebL,0xcca844b3e677016cL,
+ 0x6c122b0757eaee06L } },
+ /* 58 << 245 */
+ { { 0xb782dce715f09690L,0x508b9b122dfc0fc9L,0x9015ab4b65d89fc6L,
+ 0x5e79dab7d6d5bb0fL },
+ { 0x64f021f06c775aa2L,0xdf09d8cc37c7eca1L,0x9a761367ef2fa506L,
+ 0xed4ca4765b81eec6L } },
+ /* 59 << 245 */
+ { { 0x262ede3610bbb8b5L,0x0737ce830641ada3L,0x4c94288ae9831cccL,
+ 0x487fc1ce8065e635L },
+ { 0xb13d7ab3b8bb3659L,0xdea5df3e855e4120L,0xb9a1857385eb0244L,
+ 0x1a1b8ea3a7cfe0a3L } },
+ /* 60 << 245 */
+ { { 0x3b83711967b0867cL,0x8d5e0d089d364520L,0x52dccc1ed930f0e3L,
+ 0xefbbcec7bf20bbafL },
+ { 0x99cffcab0263ad10L,0xd8199e6dfcd18f8aL,0x64e2773fe9f10617L,
+ 0x0079e8e108704848L } },
+ /* 61 << 245 */
+ { { 0x1169989f8a342283L,0x8097799ca83012e6L,0xece966cb8a6a9001L,
+ 0x93b3afef072ac7fcL },
+ { 0xe6893a2a2db3d5baL,0x263dc46289bf4fdcL,0x8852dfc9e0396673L,
+ 0x7ac708953af362b6L } },
+ /* 62 << 245 */
+ { { 0xbb9cce4d5c2f342bL,0xbf80907ab52d7aaeL,0x97f3d3cd2161bcd0L,
+ 0xb25b08340962744dL },
+ { 0xc5b18ea56c3a1ddaL,0xfe4ec7eb06c92317L,0xb787b890ad1c4afeL,
+ 0xdccd9a920ede801aL } },
+ /* 63 << 245 */
+ { { 0x9ac6dddadb58da1fL,0x22bbc12fb8cae6eeL,0xc6f8bced815c4a43L,
+ 0x8105a92cf96480c7L },
+ { 0x0dc3dbf37a859d51L,0xe3ec7ce63041196bL,0xd9f64b250d1067c9L,
+ 0xf23213213d1f8dd8L } },
+ /* 64 << 245 */
+ { { 0x8b5c619c76497ee8L,0x5d2b0ac6c717370eL,0x98204cb64fcf68e1L,
+ 0x0bdec21162bc6792L },
+ { 0x6973ccefa63b1011L,0xf9e3fa97e0de1ac5L,0x5efb693e3d0e0c8bL,
+ 0x037248e9d2d4fcb4L } },
+ /* 0 << 252 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 252 */
+ { { 0x80802dc91ec34f9eL,0xd8772d3533810603L,0x3f06d66c530cb4f3L,
+ 0x7be5ed0dc475c129L },
+ { 0xcb9e3c1931e82b10L,0xc63d2857c9ff6b4cL,0xb92118c692a1b45eL,
+ 0x0aec44147285bbcaL } },
+ /* 2 << 252 */
+ { { 0xfc189ae71e29a3efL,0xcbe906f04c93302eL,0xd0107914ceaae10eL,
+ 0xb7a23f34b68e19f8L },
+ { 0xe9d875c2efd2119dL,0x03198c6efcadc9c8L,0x65591bf64da17113L,
+ 0x3cf0bbf83d443038L } },
+ /* 3 << 252 */
+ { { 0xae485bb72b724759L,0x945353e1b2d4c63aL,0x82159d07de7d6f2cL,
+ 0x389caef34ec5b109L },
+ { 0x4a8ebb53db65ef14L,0x2dc2cb7edd99de43L,0x816fa3ed83f2405fL,
+ 0x73429bb9c14208a3L } },
+ /* 4 << 252 */
+ { { 0xb618d590b01e6e27L,0x047e2ccde180b2dcL,0xd1b299b504aea4a9L,
+ 0x412c9e1e9fa403a4L },
+ { 0x88d28a3679407552L,0x49c50136f332b8e3L,0x3a1b6fcce668de19L,
+ 0x178851bc75122b97L } },
+ /* 5 << 252 */
+ { { 0xb1e13752fb85fa4cL,0xd61257ce383c8ce9L,0xd43da670d2f74daeL,
+ 0xa35aa23fbf846bbbL },
+ { 0x5e74235d4421fc83L,0xf6df8ee0c363473bL,0x34d7f52a3c4aa158L,
+ 0x50d05aab9bc6d22eL } },
+ /* 6 << 252 */
+ { { 0x8c56e735a64785f4L,0xbc56637b5f29cd07L,0x53b2bb803ee35067L,
+ 0x50235a0fdc919270L },
+ { 0x191ab6d8f2c4aa65L,0xc34758318396023bL,0x80400ba5f0f805baL,
+ 0x8881065b5ec0f80fL } },
+ /* 7 << 252 */
+ { { 0xc370e522cc1b5e83L,0xde2d4ad1860b8bfbL,0xad364df067b256dfL,
+ 0x8f12502ee0138997L },
+ { 0x503fa0dc7783920aL,0xe80014adc0bc866aL,0x3f89b744d3064ba6L,
+ 0x03511dcdcba5dba5L } },
+ /* 8 << 252 */
+ { { 0x197dd46d95a7b1a2L,0x9c4e7ad63c6341fbL,0x426eca29484c2eceL,
+ 0x9211e489de7f4f8aL },
+ { 0x14997f6ec78ef1f4L,0x2b2c091006574586L,0x17286a6e1c3eede8L,
+ 0x25f92e470f60e018L } },
+ /* 9 << 252 */
+ { { 0x805c564631890a36L,0x703ef60057feea5bL,0x389f747caf3c3030L,
+ 0xe0e5daeb54dd3739L },
+ { 0xfe24a4c3c9c9f155L,0x7e4bf176b5393962L,0x37183de2af20bf29L,
+ 0x4a1bd7b5f95a8c3bL } },
+ /* 10 << 252 */
+ { { 0xa83b969946191d3dL,0x281fc8dd7b87f257L,0xb18e2c1354107588L,
+ 0x6372def79b2bafe8L },
+ { 0xdaf4bb480d8972caL,0x3f2dd4b756167a3fL,0x1eace32d84310cf4L,
+ 0xe3bcefafe42700aaL } },
+ /* 11 << 252 */
+ { { 0x5fe5691ed785e73dL,0xa5db5ab62ea60467L,0x02e23d41dfc6514aL,
+ 0x35e8048ee03c3665L },
+ { 0x3f8b118f1adaa0f8L,0x28ec3b4584ce1a5aL,0xe8cacc6e2c6646b8L,
+ 0x1343d185dbd0e40fL } },
+ /* 12 << 252 */
+ { { 0xe5d7f844caaa358cL,0x1a1db7e49924182aL,0xd64cd42d9c875d9aL,
+ 0xb37b515f042eeec8L },
+ { 0x4d4dd4097b165fbeL,0xfc322ed9e206eff3L,0x7dee410259b7e17eL,
+ 0x55a481c08236ca00L } },
+ /* 13 << 252 */
+ { { 0x8c885312c23fc975L,0x1571580605d6297bL,0xa078868ef78edd39L,
+ 0x956b31e003c45e52L },
+ { 0x470275d5ff7b33a6L,0xc8d5dc3a0c7e673fL,0x419227b47e2f2598L,
+ 0x8b37b6344c14a975L } },
+ /* 14 << 252 */
+ { { 0xd0667ed68b11888cL,0x5e0e8c3e803e25dcL,0x34e5d0dcb987a24aL,
+ 0x9f40ac3bae920323L },
+ { 0x5463de9534e0f63aL,0xa128bf926b6328f9L,0x491ccd7cda64f1b7L,
+ 0x7ef1ec27c47bde35L } },
+ /* 15 << 252 */
+ { { 0xa857240fa36a2737L,0x35dc136663621bc1L,0x7a3a6453d4fb6897L,
+ 0x80f1a439c929319dL },
+ { 0xfc18274bf8cb0ba0L,0xb0b537668078c5ebL,0xfb0d49241e01d0efL,
+ 0x50d7c67d372ab09cL } },
+ /* 16 << 252 */
+ { { 0xb4e370af3aeac968L,0xe4f7fee9c4b63266L,0xb4acd4c2e3ac5664L,
+ 0xf8910bd2ceb38cbfL },
+ { 0x1c3ae50cc9c0726eL,0x15309569d97b40bfL,0x70884b7ffd5a5a1bL,
+ 0x3890896aef8314cdL } },
+ /* 17 << 252 */
+ { { 0x58e1515ca5618c93L,0xe665432b77d942d1L,0xb32181bfb6f767a8L,
+ 0x753794e83a604110L },
+ { 0x09afeb7ce8c0dbccL,0x31e02613598673a3L,0x5d98e5577d46db00L,
+ 0xfc21fb8c9d985b28L } },
+ /* 18 << 252 */
+ { { 0xc9040116b0843e0bL,0x53b1b3a869b04531L,0xdd1649f085d7d830L,
+ 0xbb3bcc87cb7427e8L },
+ { 0x77261100c93dce83L,0x7e79da61a1922a2aL,0x587a2b02f3149ce8L,
+ 0x147e1384de92ec83L } },
+ /* 19 << 252 */
+ { { 0x484c83d3af077f30L,0xea78f8440658b53aL,0x912076c2027aec53L,
+ 0xf34714e393c8177dL },
+ { 0x37ef5d15c2376c84L,0x8315b6593d1aa783L,0x3a75c484ef852a90L,
+ 0x0ba0c58a16086bd4L } },
+ /* 20 << 252 */
+ { { 0x29688d7a529a6d48L,0x9c7f250dc2f19203L,0x123042fb682e2df9L,
+ 0x2b7587e7ad8121bcL },
+ { 0x30fc0233e0182a65L,0xb82ecf87e3e1128aL,0x7168286193fb098fL,
+ 0x043e21ae85e9e6a7L } },
+ /* 21 << 252 */
+ { { 0xab5b49d666c834eaL,0x3be43e1847414287L,0xf40fb859219a2a47L,
+ 0x0e6559e9cc58df3cL },
+ { 0xfe1dfe8e0c6615b4L,0x14abc8fd56459d70L,0x7be0fa8e05de0386L,
+ 0x8e63ef68e9035c7cL } },
+ /* 22 << 252 */
+ { { 0x116401b453b31e91L,0x0cba7ad44436b4d8L,0x9151f9a0107afd66L,
+ 0xafaca8d01f0ee4c4L },
+ { 0x75fe5c1d9ee9761cL,0x3497a16bf0c0588fL,0x3ee2bebd0304804cL,
+ 0xa8fb9a60c2c990b9L } },
+ /* 23 << 252 */
+ { { 0xd14d32fe39251114L,0x36bf25bccac73366L,0xc9562c66dba7495cL,
+ 0x324d301b46ad348bL },
+ { 0x9f46620cd670407eL,0x0ea8d4f1e3733a01L,0xd396d532b0c324e0L,
+ 0x5b211a0e03c317cdL } },
+ /* 24 << 252 */
+ { { 0x090d7d205ffe7b37L,0x3b7f3efb1747d2daL,0xa2cb525fb54fc519L,
+ 0x6e220932f66a971eL },
+ { 0xddc160dfb486d440L,0x7fcfec463fe13465L,0x83da7e4e76e4c151L,
+ 0xd6fa48a1d8d302b5L } },
+ /* 25 << 252 */
+ { { 0xc6304f265872cd88L,0x806c1d3c278b90a1L,0x3553e725caf0bc1cL,
+ 0xff59e603bb9d8d5cL },
+ { 0xa4550f327a0b85ddL,0xdec5720a93ecc217L,0x0b88b74169d62213L,
+ 0x7212f2455b365955L } },
+ /* 26 << 252 */
+ { { 0x20764111b5cae787L,0x13cb7f581dfd3124L,0x2dca77da1175aefbL,
+ 0xeb75466bffaae775L },
+ { 0x74d76f3bdb6cff32L,0x7440f37a61fcda9aL,0x1bb3ac92b525028bL,
+ 0x20fbf8f7a1975f29L } },
+ /* 27 << 252 */
+ { { 0x982692e1df83097fL,0x28738f6c554b0800L,0xdc703717a2ce2f2fL,
+ 0x7913b93c40814194L },
+ { 0x049245931fe89636L,0x7b98443ff78834a6L,0x11c6ab015114a5a1L,
+ 0x60deb383ffba5f4cL } },
+ /* 28 << 252 */
+ { { 0x4caa54c601a982e6L,0x1dd35e113491cd26L,0x973c315f7cbd6b05L,
+ 0xcab0077552494724L },
+ { 0x04659b1f6565e15aL,0xbf30f5298c8fb026L,0xfc21641ba8a0de37L,
+ 0xe9c7a366fa5e5114L } },
+ /* 29 << 252 */
+ { { 0xdb849ca552f03ad8L,0xc7e8dbe9024e35c0L,0xa1a2bbaccfc3c789L,
+ 0xbf733e7d9c26f262L },
+ { 0x882ffbf5b8444823L,0xb7224e886bf8483bL,0x53023b8b65bef640L,
+ 0xaabfec91d4d5f8cdL } },
+ /* 30 << 252 */
+ { { 0xa40e1510079ea1bdL,0x1ad9addcd05d5d26L,0xdb3f2eab13e68d4fL,
+ 0x1cff1ae2640f803fL },
+ { 0xe0e7b749d4cee117L,0x8e9f275b4036d909L,0xce34e31d8f4d4c38L,
+ 0x22b37f69d75130fcL } },
+ /* 31 << 252 */
+ { { 0x83e0f1fdb4014604L,0xa8ce991989415078L,0x82375b7541792efeL,
+ 0x4f59bf5c97d4515bL },
+ { 0xac4f324f923a277dL,0xd9bc9b7d650f3406L,0xc6fa87d18a39bc51L,
+ 0x825885305ccc108fL } },
+ /* 32 << 252 */
+ { { 0x5ced3c9f82e4c634L,0x8efb83143a4464f8L,0xe706381b7a1dca25L,
+ 0x6cd15a3c5a2a412bL },
+ { 0x9347a8fdbfcd8fb5L,0x31db2eef6e54cd22L,0xc4aeb11ef8d8932fL,
+ 0x11e7c1ed344411afL } },
+ /* 33 << 252 */
+ { { 0x2653050cdc9a151eL,0x9edbfc083bb0a859L,0x926c81c7fd5691e7L,
+ 0x9c1b23426f39019aL },
+ { 0x64a81c8b7f8474b9L,0x90657c0701761819L,0x390b333155e0375aL,
+ 0xc676c626b6ebc47dL } },
+ /* 34 << 252 */
+ { { 0x51623247b7d6dee8L,0x0948d92779659313L,0x99700161e9ab35edL,
+ 0x06cc32b48ddde408L },
+ { 0x6f2fd664061ef338L,0x1606fa02c202e9edL,0x55388bc1929ba99bL,
+ 0xc4428c5e1e81df69L } },
+ /* 35 << 252 */
+ { { 0xce2028aef91b0b2aL,0xce870a23f03dfd3fL,0x66ec2c870affe8edL,
+ 0xb205fb46284d0c00L },
+ { 0xbf5dffe744cefa48L,0xb6fc37a8a19876d7L,0xbecfa84c08b72863L,
+ 0xd7205ff52576374fL } },
+ /* 36 << 252 */
+ { { 0x80330d328887de41L,0x5de0df0c869ea534L,0x13f427533c56ea17L,
+ 0xeb1f6069452b1a78L },
+ { 0x50474396e30ea15cL,0x575816a1c1494125L,0xbe1ce55bfe6bb38fL,
+ 0xb901a94896ae30f7L } },
+ /* 37 << 252 */
+ { { 0xe5af0f08d8fc3548L,0x5010b5d0d73bfd08L,0x993d288053fe655aL,
+ 0x99f2630b1c1309fdL },
+ { 0xd8677bafb4e3b76fL,0x14e51ddcb840784bL,0x326c750cbf0092ceL,
+ 0xc83d306bf528320fL } },
+ /* 38 << 252 */
+ { { 0xc445671577d4715cL,0xd30019f96b703235L,0x207ccb2ed669e986L,
+ 0x57c824aff6dbfc28L },
+ { 0xf0eb532fd8f92a23L,0x4a557fd49bb98fd2L,0xa57acea7c1e6199aL,
+ 0x0c6638208b94b1edL } },
+ /* 39 << 252 */
+ { { 0x9b42be8ff83a9266L,0xc7741c970101bd45L,0x95770c1107bd9cebL,
+ 0x1f50250a8b2e0744L },
+ { 0xf762eec81477b654L,0xc65b900e15efe59aL,0x88c961489546a897L,
+ 0x7e8025b3c30b4d7cL } },
+ /* 40 << 252 */
+ { { 0xae4065ef12045cf9L,0x6fcb2caf9ccce8bdL,0x1fa0ba4ef2cf6525L,
+ 0xf683125dcb72c312L },
+ { 0xa01da4eae312410eL,0x67e286776cd8e830L,0xabd9575298fb3f07L,
+ 0x05f11e11eef649a5L } },
+ /* 41 << 252 */
+ { { 0xba47faef9d3472c2L,0x3adff697c77d1345L,0x4761fa04dd15afeeL,
+ 0x64f1f61ab9e69462L },
+ { 0xfa691fab9bfb9093L,0x3df8ae8fa1133dfeL,0xcd5f896758cc710dL,
+ 0xfbb88d5016c7fe79L } },
+ /* 42 << 252 */
+ { { 0x8e011b4ce88c50d1L,0x7532e807a8771c4fL,0x64c78a48e2278ee4L,
+ 0x0b283e833845072aL },
+ { 0x98a6f29149e69274L,0xb96e96681868b21cL,0x38f0adc2b1a8908eL,
+ 0x90afcff71feb829dL } },
+ /* 43 << 252 */
+ { { 0x9915a383210b0856L,0xa5a80602def04889L,0x800e9af97c64d509L,
+ 0x81382d0bb8996f6fL },
+ { 0x490eba5381927e27L,0x46c63b324af50182L,0x784c5fd9d3ad62ceL,
+ 0xe4fa1870f8ae8736L } },
+ /* 44 << 252 */
+ { { 0x4ec9d0bcd7466b25L,0x84ddbe1adb235c65L,0x5e2645ee163c1688L,
+ 0x570bd00e00eba747L },
+ { 0xfa51b629128bfa0fL,0x92fce1bd6c1d3b68L,0x3e7361dcb66778b1L,
+ 0x9c7d249d5561d2bbL } },
+ /* 45 << 252 */
+ { { 0xa40b28bf0bbc6229L,0x1c83c05edfd91497L,0x5f9f5154f083df05L,
+ 0xbac38b3ceee66c9dL },
+ { 0xf71db7e3ec0dfcfdL,0xf2ecda8e8b0a8416L,0x52fddd867812aa66L,
+ 0x2896ef104e6f4272L } },
+ /* 46 << 252 */
+ { { 0xff27186a0fe9a745L,0x08249fcd49ca70dbL,0x7425a2e6441cac49L,
+ 0xf4a0885aece5ff57L },
+ { 0x6e2cb7317d7ead58L,0xf96cf7d61898d104L,0xafe67c9d4f2c9a89L,
+ 0x89895a501c7bf5bcL } },
+ /* 47 << 252 */
+ { { 0xdc7cb8e5573cecfaL,0x66497eaed15f03e6L,0x6bc0de693f084420L,
+ 0x323b9b36acd532b0L },
+ { 0xcfed390a0115a3c1L,0x9414c40b2d65ca0eL,0x641406bd2f530c78L,
+ 0x29369a44833438f2L } },
+ /* 48 << 252 */
+ { { 0x996884f5903fa271L,0xe6da0fd2b9da921eL,0xa6f2f2695db01e54L,
+ 0x1ee3e9bd6876214eL },
+ { 0xa26e181ce27a9497L,0x36d254e48e215e04L,0x42f32a6c252cabcaL,
+ 0x9948148780b57614L } },
+ /* 49 << 252 */
+ { { 0x4c4dfe6940d9cae1L,0x0586958011a10f09L,0xca287b573491b64bL,
+ 0x77862d5d3fd4a53bL },
+ { 0xbf94856e50349126L,0x2be30bd171c5268fL,0x10393f19cbb650a6L,
+ 0x639531fe778cf9fdL } },
+ /* 50 << 252 */
+ { { 0x02556a11b2935359L,0xda38aa96af8c126eL,0x47dbe6c20960167fL,
+ 0x37bbabb6501901cdL },
+ { 0xb6e979e02c947778L,0xd69a51757a1a1dc6L,0xc3ed50959d9faf0cL,
+ 0x4dd9c0961d5fa5f0L } },
+ /* 51 << 252 */
+ { { 0xa0c4304d64f16ea8L,0x8b1cac167e718623L,0x0b5765467c67f03eL,
+ 0x559cf5adcbd88c01L },
+ { 0x074877bb0e2af19aL,0x1f717ec1a1228c92L,0x70bcb800326e8920L,
+ 0xec6e2c5c4f312804L } },
+ /* 52 << 252 */
+ { { 0x426aea7d3fca4752L,0xf12c09492211f62aL,0x24beecd87be7b6b5L,
+ 0xb77eaf4c36d7a27dL },
+ { 0x154c2781fda78fd3L,0x848a83b0264eeabeL,0x81287ef04ffe2bc4L,
+ 0x7b6d88c6b6b6fc2aL } },
+ /* 53 << 252 */
+ { { 0x805fb947ce417d99L,0x4b93dcc38b916cc4L,0x72e65bb321273323L,
+ 0xbcc1badd6ea9886eL },
+ { 0x0e2230114bc5ee85L,0xa561be74c18ee1e4L,0x762fd2d4a6bcf1f1L,
+ 0x50e6a5a495231489L } },
+ /* 54 << 252 */
+ { { 0xca96001fa00b500bL,0x5c098cfc5d7dcdf5L,0xa64e2d2e8c446a85L,
+ 0xbae9bcf1971f3c62L },
+ { 0x4ec226838435a2c5L,0x8ceaed6c4bad4643L,0xe9f8fb47ccccf4e3L,
+ 0xbd4f3fa41ce3b21eL } },
+ /* 55 << 252 */
+ { { 0xd79fb110a3db3292L,0xe28a37dab536c66aL,0x279ce87b8e49e6a9L,
+ 0x70ccfe8dfdcec8e3L },
+ { 0x2193e4e03ba464b2L,0x0f39d60eaca9a398L,0x7d7932aff82c12abL,
+ 0xd8ff50ed91e7e0f7L } },
+ /* 56 << 252 */
+ { { 0xea961058fa28a7e0L,0xc726cf250bf5ec74L,0xe74d55c8db229666L,
+ 0x0bd9abbfa57f5799L },
+ { 0x7479ef074dfc47b3L,0xd9c65fc30c52f91dL,0x8e0283fe36a8bde2L,
+ 0xa32a8b5e7d4b7280L } },
+ /* 57 << 252 */
+ { { 0x6a677c6112e83233L,0x0fbb3512dcc9bf28L,0x562e8ea50d780f61L,
+ 0x0db8b22b1dc4e89cL },
+ { 0x0a6fd1fb89be0144L,0x8c77d246ca57113bL,0x4639075dff09c91cL,
+ 0x5b47b17f5060824cL } },
+ /* 58 << 252 */
+ { { 0x58aea2b016287b52L,0xa1343520d0cd8eb0L,0x6148b4d0c5d58573L,
+ 0xdd2b6170291c68aeL },
+ { 0xa61b39291da3b3b7L,0x5f946d7908c4ac10L,0x4105d4a57217d583L,
+ 0x5061da3d25e6de5eL } },
+ /* 59 << 252 */
+ { { 0x3113940dec1b4991L,0xf12195e136f485aeL,0xa7507fb2731a2ee0L,
+ 0x95057a8e6e9e196eL },
+ { 0xa3c2c9112e130136L,0x97dfbb3633c60d15L,0xcaf3c581b300ee2bL,
+ 0x77f25d90f4bac8b8L } },
+ /* 60 << 252 */
+ { { 0xdb1c4f986d840cd6L,0x471d62c0e634288cL,0x8ec2f85ecec8a161L,
+ 0x41f37cbcfa6f4ae2L },
+ { 0x6793a20f4b709985L,0x7a7bd33befa8985bL,0x2c6a3fbd938e6446L,
+ 0x190426192a8d47c1L } },
+ /* 61 << 252 */
+ { { 0x16848667cc36975fL,0x02acf1689d5f1dfbL,0x62d41ad4613baa94L,
+ 0xb56fbb929f684670L },
+ { 0xce610d0de9e40569L,0x7b99c65f35489fefL,0x0c88ad1b3df18b97L,
+ 0x81b7d9be5d0e9edbL } },
+ /* 62 << 252 */
+ { { 0xd85218c0c716cc0aL,0xf4b5ff9085691c49L,0xa4fd666bce356ac6L,
+ 0x17c728954b327a7aL },
+ { 0xf93d5085da6be7deL,0xff71530e3301d34eL,0x4cd96442d8f448e8L,
+ 0x9283d3312ed18ffaL } },
+ /* 63 << 252 */
+ { { 0x4d33dd992a849870L,0xa716964b41576335L,0xff5e3a9b179be0e5L,
+ 0x5b9d6b1b83b13632L },
+ { 0x3b8bd7d4a52f313bL,0xc9dd95a0637a4660L,0x300359620b3e218fL,
+ 0xce1481a3c7b28a3cL } },
+ /* 64 << 252 */
+ { { 0xab41b43a43228d83L,0x24ae1c304ad63f99L,0x8e525f1a46a51229L,
+ 0x14af860fcd26d2b4L },
+ { 0xd6baef613f714aa1L,0xf51865adeb78795eL,0xd3e21fcee6a9d694L,
+ 0x82ceb1dd8a37b527L } },
+};
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_add_only_4(sp_point_256* r, const sp_point_256* g,
+ const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 rtd;
+ sp_point_256 pd;
+ sp_digit tmpd[2 * 4 * 5];
+#endif
+ sp_point_256* rt;
+ sp_point_256* p = NULL;
+ sp_digit* tmp;
+ sp_digit* negy;
+ int i;
+ ecc_recode_256 v[37];
+ int err;
+
+ (void)g;
+ (void)heap;
+
+ err = sp_256_point_new_4(heap, rtd, rt);
+ if (err == MP_OKAY)
+ err = sp_256_point_new_4(heap, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ tmp = tmpd;
+#endif
+ negy = tmp;
+
+ if (err == MP_OKAY) {
+ sp_256_ecc_recode_7_4(k, v);
+
+ XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
+ XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+ i = 36;
+ XMEMCPY(rt->x, table[i * 65 + v[i].i].x, sizeof(table->x));
+ XMEMCPY(rt->y, table[i * 65 + v[i].i].y, sizeof(table->y));
+ rt->infinity = !v[i].i;
+ for (--i; i>=0; i--) {
+ XMEMCPY(p->x, table[i * 65 + v[i].i].x, sizeof(table->x));
+ XMEMCPY(p->y, table[i * 65 + v[i].i].y, sizeof(table->y));
+ p->infinity = !v[i].i;
+ sp_256_sub_4(negy, p256_mod, p->y);
+ sp_256_cond_copy_4(p->y, negy, 0 - v[i].neg);
+ sp_256_proj_point_add_qz1_4(rt, rt, p, tmp);
+ }
+ if (map != 0) {
+ sp_256_map_4(r, rt, tmp);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 4 * 5);
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmp, sizeof(sp_digit) * 2 * 4 * 5);
+#endif
+ sp_256_point_free_4(p, 0, heap);
+ sp_256_point_free_4(rt, 0, heap);
+
+ return MP_OKAY;
+}
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_4(sp_point_256* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_256_ecc_mulmod_add_only_4(r, NULL, p256_table,
+ k, map, heap);
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[4];
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_256_point_new_4(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 4, km);
+
+ err = sp_256_ecc_mulmod_base_4(point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_4(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_4(point, 0, heap);
+
+ return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_256_iszero_4(const sp_digit* a)
+{
+ return (a[0] | a[1] | a[2] | a[3]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+/* Add 1 to a. (a = a + 1)
+ *
+ * a A single precision integer.
+ */
+static void sp_256_add_one_4(sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "ldp x1, x2, [%[a], 0]\n\t"
+ "adds x1, x1, #1\n\t"
+ "ldr x3, [%[a], 16]\n\t"
+ "adcs x2, x2, xzr\n\t"
+ "ldr x4, [%[a], 24]\n\t"
+ "adcs x3, x3, xzr\n\t"
+ "stp x1, x2, [%[a], 0]\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "stp x3, x4, [%[a], 16]\n\t"
+ :
+ : [a] "r" (a)
+ : "memory", "x1", "x2", "x3", "x4"
+ );
+}
+
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j;
+ byte* d;
+
+ for (i = n - 1,j = 0; i >= 7; i -= 8) {
+ r[j] = ((sp_digit)a[i - 0] << 0) |
+ ((sp_digit)a[i - 1] << 8) |
+ ((sp_digit)a[i - 2] << 16) |
+ ((sp_digit)a[i - 3] << 24) |
+ ((sp_digit)a[i - 4] << 32) |
+ ((sp_digit)a[i - 5] << 40) |
+ ((sp_digit)a[i - 6] << 48) |
+ ((sp_digit)a[i - 7] << 56);
+ j++;
+ }
+
+ if (i >= 0) {
+ r[j] = 0;
+
+ d = (byte*)r;
+ switch (i) {
+ case 6: d[n - 1 - 6] = a[6]; //fallthrough
+ case 5: d[n - 1 - 5] = a[5]; //fallthrough
+ case 4: d[n - 1 - 4] = a[4]; //fallthrough
+ case 3: d[n - 1 - 3] = a[3]; //fallthrough
+ case 2: d[n - 1 - 2] = a[2]; //fallthrough
+ case 1: d[n - 1 - 1] = a[1]; //fallthrough
+ case 0: d[n - 1 - 0] = a[0]; //fallthrough
+ }
+ j++;
+ }
+
+ for (; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng Random number generator.
+ * k Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_256_ecc_gen_k_4(WC_RNG* rng, sp_digit* k)
+{
+ int err;
+ byte buf[32];
+
+ do {
+ err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+ if (err == 0) {
+ sp_256_from_bin(k, 4, buf, (int)sizeof(buf));
+ if (sp_256_cmp_4(k, p256_order2) < 0) {
+ sp_256_add_one_4(k);
+ break;
+ }
+ }
+ }
+ while (err == 0);
+
+ return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng Random number generator.
+ * priv Generated private value.
+ * pub Generated public point.
+ * heap Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[4];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_256 inf;
+#endif
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_256* infinity;
+#endif
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_4(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_4(heap, inf, infinity);
+ }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_gen_k_4(rng, k);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_base_4(point, k, 1, NULL);
+ }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_4(infinity, point, p256_order, 1, NULL);
+ }
+ if (err == MP_OKAY) {
+ if ((sp_256_iszero_4(point->x) == 0) || (sp_256_iszero_4(point->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(k, priv);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_4(point, pub);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_256_point_free_4(infinity, 1, heap);
+#endif
+ sp_256_point_free_4(point, 1, heap);
+
+ return err;
+}
+
+#ifdef HAVE_ECC_DHE
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 32
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_256_to_bin(sp_digit* r, byte* a)
+{
+ int i, j;
+
+ for (i = 3, j = 0; i >= 0; i--) {
+ a[j++] = r[i] >> 56;
+ a[j++] = r[i] >> 48;
+ a[j++] = r[i] >> 40;
+ a[j++] = r[i] >> 32;
+ a[j++] = r[i] >> 24;
+ a[j++] = r[i] >> 16;
+ a[j++] = r[i] >> 8;
+ a[j++] = r[i] >> 0;
+ }
+}
+
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv Scalar to multiply the point by.
+ * pub Point to multiply.
+ * out Buffer to hold X ordinate.
+ * outLen On entry, size of the buffer in bytes.
+ * On exit, length of data in buffer in bytes.
+ * heap Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
+ word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[4];
+#endif
+ sp_point_256* point = NULL;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ if (*outLen < 32U) {
+ err = BUFFER_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_4(heap, p, point);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 4, priv);
+ sp_256_point_from_ecc_point_4(point, pub);
+ err = sp_256_ecc_mulmod_4(point, point, k, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ sp_256_to_bin(point->x, out);
+ *outLen = 32;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_4(point, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_256_add_4(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[b], 0]\n\t"
+ "adds x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 16]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "cset %[r], cs\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+ return (sp_digit)r;
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_256_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_digit tmp[8];
+
+ __asm__ __volatile__ (
+ "mov x5, 0\n\t"
+ "mov x6, 0\n\t"
+ "mov x7, 0\n\t"
+ "mov x8, 0\n\t"
+ "\n1:\n\t"
+ "subs x3, x5, 24\n\t"
+ "csel x3, xzr, x3, cc\n\t"
+ "sub x4, x5, x3\n\t"
+ "\n2:\n\t"
+ "ldr x10, [%[a], x3]\n\t"
+ "ldr x11, [%[b], x4]\n\t"
+ "mul x9, x10, x11\n\t"
+ "umulh x10, x10, x11\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "add x3, x3, #8\n\t"
+ "sub x4, x4, #8\n\t"
+ "cmp x3, 32\n\t"
+ "b.eq 3f\n\t"
+ "cmp x3, x5\n\t"
+ "b.le 2b\n\t"
+ "\n3:\n\t"
+ "str x6, [%[r], x5]\n\t"
+ "mov x6, x7\n\t"
+ "mov x7, x8\n\t"
+ "mov x8, #0\n\t"
+ "add x5, x5, #8\n\t"
+ "cmp x5, 48\n\t"
+ "b.le 1b\n\t"
+ "str x6, [%[r], x5]\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+#else
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_256_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_digit tmp[4];
+
+ __asm__ __volatile__ (
+ "ldp x16, x17, [%[a], 0]\n\t"
+ "ldp x21, x22, [%[b], 0]\n\t"
+ "# A[0] * B[0]\n\t"
+ "mul x8, x16, x21\n\t"
+ "ldr x19, [%[a], 16]\n\t"
+ "umulh x9, x16, x21\n\t"
+ "ldr x23, [%[b], 16]\n\t"
+ "# A[0] * B[1]\n\t"
+ "mul x4, x16, x22\n\t"
+ "ldr x20, [%[a], 24]\n\t"
+ "umulh x5, x16, x22\n\t"
+ "ldr x24, [%[b], 24]\n\t"
+ "adds x9, x9, x4\n\t"
+ "# A[1] * B[0]\n\t"
+ "mul x4, x17, x21\n\t"
+ "adc x10, xzr, x5\n\t"
+ "umulh x5, x17, x21\n\t"
+ "adds x9, x9, x4\n\t"
+ "# A[0] * B[2]\n\t"
+ "mul x4, x16, x23\n\t"
+ "adcs x10, x10, x5\n\t"
+ "umulh x5, x16, x23\n\t"
+ "adc x11, xzr, xzr\n\t"
+ "adds x10, x10, x4\n\t"
+ "# A[1] * B[1]\n\t"
+ "mul x4, x17, x22\n\t"
+ "adc x11, x11, x5\n\t"
+ "umulh x5, x17, x22\n\t"
+ "adds x10, x10, x4\n\t"
+ "# A[2] * B[0]\n\t"
+ "mul x4, x19, x21\n\t"
+ "adcs x11, x11, x5\n\t"
+ "umulh x5, x19, x21\n\t"
+ "adc x12, xzr, xzr\n\t"
+ "adds x10, x10, x4\n\t"
+ "# A[0] * B[3]\n\t"
+ "mul x4, x16, x24\n\t"
+ "adcs x11, x11, x5\n\t"
+ "umulh x5, x16, x24\n\t"
+ "adc x12, x12, xzr\n\t"
+ "adds x11, x11, x4\n\t"
+ "# A[1] * B[2]\n\t"
+ "mul x4, x17, x23\n\t"
+ "adcs x12, x12, x5\n\t"
+ "umulh x5, x17, x23\n\t"
+ "adc x13, xzr, xzr\n\t"
+ "adds x11, x11, x4\n\t"
+ "# A[2] * B[1]\n\t"
+ "mul x4, x19, x22\n\t"
+ "adcs x12, x12, x5\n\t"
+ "umulh x5, x19, x22\n\t"
+ "adc x13, x13, xzr\n\t"
+ "adds x11, x11, x4\n\t"
+ "# A[3] * B[0]\n\t"
+ "mul x4, x20, x21\n\t"
+ "adcs x12, x12, x5\n\t"
+ "umulh x5, x20, x21\n\t"
+ "adc x13, x13, xzr\n\t"
+ "adds x11, x11, x4\n\t"
+ "# A[1] * B[3]\n\t"
+ "mul x4, x17, x24\n\t"
+ "adcs x12, x12, x5\n\t"
+ "umulh x5, x17, x24\n\t"
+ "adc x13, x13, xzr\n\t"
+ "adds x12, x12, x4\n\t"
+ "# A[2] * B[2]\n\t"
+ "mul x4, x19, x23\n\t"
+ "adcs x13, x13, x5\n\t"
+ "umulh x5, x19, x23\n\t"
+ "adc x14, xzr, xzr\n\t"
+ "adds x12, x12, x4\n\t"
+ "# A[3] * B[1]\n\t"
+ "mul x4, x20, x22\n\t"
+ "adcs x13, x13, x5\n\t"
+ "umulh x5, x20, x22\n\t"
+ "adc x14, x14, xzr\n\t"
+ "adds x12, x12, x4\n\t"
+ "# A[2] * B[3]\n\t"
+ "mul x4, x19, x24\n\t"
+ "adcs x13, x13, x5\n\t"
+ "umulh x5, x19, x24\n\t"
+ "adc x14, x14, xzr\n\t"
+ "adds x13, x13, x4\n\t"
+ "# A[3] * B[2]\n\t"
+ "mul x4, x20, x23\n\t"
+ "adcs x14, x14, x5\n\t"
+ "umulh x5, x20, x23\n\t"
+ "adc x15, xzr, xzr\n\t"
+ "adds x13, x13, x4\n\t"
+ "# A[3] * B[3]\n\t"
+ "mul x4, x20, x24\n\t"
+ "adcs x14, x14, x5\n\t"
+ "umulh x5, x20, x24\n\t"
+ "adc x15, x15, xzr\n\t"
+ "adds x14, x14, x4\n\t"
+ "adc x15, x15, x5\n\t"
+ "stp x8, x9, [%[r], 0]\n\t"
+ "stp x10, x11, [%[r], 16]\n\t"
+ "stp x12, x13, [%[r], 32]\n\t"
+ "stp x14, x15, [%[r], 48]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
+ : "memory", "x4", "x5", "x6", "x7", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+static sp_digit sp_256_sub_in_place_4(sp_digit* a, const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x2, x3, [%[a], 0]\n\t"
+ "ldp x6, x7, [%[b], 0]\n\t"
+ "subs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 16]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 16]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 0]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 16]\n\t"
+ "csetm %[a], cc\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+
+ return (sp_digit)a;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+static void sp_256_mul_d_4(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+ __asm__ __volatile__ (
+ "# A[0] * B\n\t"
+ "ldp x2, x3, [%[a]]\n\t"
+ "ldp x4, x5, [%[a], 16]\n\t"
+ "umulh x7, %[b], x2\n\t"
+ "mul x2, %[b], x2\n\t"
+ "# A[1] * B\n\t"
+ "mul x8, %[b], x3\n\t"
+ "umulh x9, %[b], x3\n\t"
+ "adds x3, x7, x8\n\t"
+ "# A[2] * B\n\t"
+ "mul x8, %[b], x4\n\t"
+ "adc x7, xzr, x9\n\t"
+ "umulh x9, %[b], x4\n\t"
+ "adds x4, x7, x8\n\t"
+ "# A[3] * B\n\t"
+ "mul x8, %[b], x5\n\t"
+ "adc x7, xzr, x9\n\t"
+ "umulh x9, %[b], x5\n\t"
+ "adds x5, x7, x8\n\t"
+ "str x2, [%[r]]\n\t"
+ "adc x6, xzr, x9\n\t"
+ "stp x3, x4, [%[r], 8]\n\t"
+ "stp x5, x6, [%[r], 24]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ */
+static sp_digit div_256_word_4(sp_digit d1, sp_digit d0, sp_digit div)
+{
+ sp_digit r;
+
+ __asm__ __volatile__ (
+ "lsr x5, %[div], 32\n\t"
+ "add x5, x5, 1\n\t"
+
+ "udiv x3, %[d1], x5\n\t"
+ "lsl x6, x3, 32\n\t"
+ "mul x4, %[div], x6\n\t"
+ "umulh x3, %[div], x6\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "udiv x3, %[d1], x5\n\t"
+ "lsl x3, x3, 32\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "umulh x3, %[div], x3\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "lsr x3, %[d0], 32\n\t"
+ "orr x3, x3, %[d1], lsl 32\n\t"
+
+ "udiv x3, x3, x5\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "umulh x3, %[div], x3\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "lsr x3, %[d0], 32\n\t"
+ "orr x3, x3, %[d1], lsl 32\n\t"
+
+ "udiv x3, x3, x5\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "sub %[d0], %[d0], x4\n\t"
+
+ "udiv x3, %[d0], %[div]\n\t"
+ "add %[r], x6, x3\n\t"
+
+ : [r] "=r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "x3", "x4", "x5", "x6"
+ );
+
+ return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_256_mask_4(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<4; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ r[0] = a[0] & m;
+ r[1] = a[1] & m;
+ r[2] = a[2] & m;
+ r[3] = a[3] & m;
+#endif
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_256_div_4(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[8], t2[5];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[3];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 4);
+ for (i=3; i>=0; i--) {
+ r1 = div_256_word_4(t1[4 + i], t1[4 + i - 1], div);
+
+ sp_256_mul_d_4(t2, d, r1);
+ t1[4 + i] += sp_256_sub_in_place_4(&t1[i], t2);
+ t1[4 + i] -= t2[4];
+ sp_256_mask_4(t2, d, t1[4 + i]);
+ t1[4 + i] += sp_256_add_4(&t1[i], &t1[i], t2);
+ sp_256_mask_4(t2, d, t1[4 + i]);
+ t1[4 + i] += sp_256_add_4(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_256_cmp_4(t1, d) >= 0;
+ sp_256_cond_sub_4(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_256_mod_4(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_256_div_4(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_256_sqr_4(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "ldp x16, x17, [%[a], 0]\n\t"
+ "# A[0] * A[1]\n\t"
+ "mul x9, x16, x17\n\t"
+ "ldr x19, [%[a], 16]\n\t"
+ "umulh x10, x16, x17\n\t"
+ "ldr x20, [%[a], 24]\n\t"
+ "# A[0] * A[2]\n\t"
+ "mul x4, x16, x19\n\t"
+ "umulh x5, x16, x19\n\t"
+ "adds x10, x10, x4\n\t"
+ "# A[0] * A[3]\n\t"
+ "mul x4, x16, x20\n\t"
+ "adc x11, xzr, x5\n\t"
+ "umulh x5, x16, x20\n\t"
+ "adds x11, x11, x4\n\t"
+ "# A[1] * A[2]\n\t"
+ "mul x4, x17, x19\n\t"
+ "adc x12, xzr, x5\n\t"
+ "umulh x5, x17, x19\n\t"
+ "adds x11, x11, x4\n\t"
+ "# A[1] * A[3]\n\t"
+ "mul x4, x17, x20\n\t"
+ "adcs x12, x12, x5\n\t"
+ "umulh x5, x17, x20\n\t"
+ "adc x13, xzr, xzr\n\t"
+ "adds x12, x12, x4\n\t"
+ "# A[2] * A[3]\n\t"
+ "mul x4, x19, x20\n\t"
+ "adc x13, x13, x5\n\t"
+ "umulh x5, x19, x20\n\t"
+ "adds x13, x13, x4\n\t"
+ "adc x14, xzr, x5\n\t"
+ "# Double\n\t"
+ "adds x9, x9, x9\n\t"
+ "adcs x10, x10, x10\n\t"
+ "adcs x11, x11, x11\n\t"
+ "adcs x12, x12, x12\n\t"
+ "adcs x13, x13, x13\n\t"
+ "# A[0] * A[0]\n\t"
+ "mul x8, x16, x16\n\t"
+ "adcs x14, x14, x14\n\t"
+ "umulh x3, x16, x16\n\t"
+ "cset x15, cs\n\t"
+ "# A[1] * A[1]\n\t"
+ "mul x4, x17, x17\n\t"
+ "adds x9, x9, x3\n\t"
+ "umulh x5, x17, x17\n\t"
+ "adcs x10, x10, x4\n\t"
+ "# A[2] * A[2]\n\t"
+ "mul x6, x19, x19\n\t"
+ "adcs x11, x11, x5\n\t"
+ "umulh x7, x19, x19\n\t"
+ "adcs x12, x12, x6\n\t"
+ "# A[3] * A[3]\n\t"
+ "mul x16, x20, x20\n\t"
+ "adcs x13, x13, x7\n\t"
+ "umulh x17, x20, x20\n\t"
+ "adcs x14, x14, x16\n\t"
+ "adc x15, x15, x17\n\t"
+ "stp x8, x9, [%[r], 0]\n\t"
+ "stp x10, x11, [%[r], 16]\n\t"
+ "stp x12, x13, [%[r], 32]\n\t"
+ "stp x14, x15, [%[r], 48]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20"
+ );
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P256 curve. */
+static const uint64_t p256_order_minus_2[4] = {
+ 0xf3b9cac2fc63254fU,0xbce6faada7179e84U,0xffffffffffffffffU,
+ 0xffffffff00000000U
+};
+#else
+/* The low half of the order-2 of the P256 curve. */
+static const uint64_t p256_order_low[2] = {
+ 0xf3b9cac2fc63254fU,0xbce6faada7179e84U
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
+ *
+ * r Result of the multiplication.
+ * a First operand of the multiplication.
+ * b Second operand of the multiplication.
+ */
+static void sp_256_mont_mul_order_4(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_256_mul_4(r, a, b);
+ sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order);
+}
+
+/* Square number mod the order of P256 curve. (r = a * a mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_256_mont_sqr_order_4(sp_digit* r, const sp_digit* a)
+{
+ sp_256_sqr_4(r, a);
+ sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P256 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_256_mont_sqr_n_order_4(sp_digit* r, const sp_digit* a, int n)
+{
+ int i;
+
+ sp_256_mont_sqr_order_4(r, a);
+ for (i=1; i<n; i++) {
+ sp_256_mont_sqr_order_4(r, r);
+ }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
+ * (r = 1 / a mod order)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_256_mont_inv_order_4(sp_digit* r, const sp_digit* a,
+ sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 4);
+ for (i=254; i>=0; i--) {
+ sp_256_mont_sqr_order_4(t, t);
+ if ((p256_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_256_mont_mul_order_4(t, t, a);
+ }
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 4U);
+#else
+ sp_digit* t = td;
+ sp_digit* t2 = td + 2 * 4;
+ sp_digit* t3 = td + 4 * 4;
+ int i;
+
+ /* t = a^2 */
+ sp_256_mont_sqr_order_4(t, a);
+ /* t = a^3 = t * a */
+ sp_256_mont_mul_order_4(t, t, a);
+ /* t2= a^c = t ^ 2 ^ 2 */
+ sp_256_mont_sqr_n_order_4(t2, t, 2);
+ /* t3= a^f = t2 * t */
+ sp_256_mont_mul_order_4(t3, t2, t);
+ /* t2= a^f0 = t3 ^ 2 ^ 4 */
+ sp_256_mont_sqr_n_order_4(t2, t3, 4);
+ /* t = a^ff = t2 * t3 */
+ sp_256_mont_mul_order_4(t, t2, t3);
+ /* t3= a^ff00 = t ^ 2 ^ 8 */
+ sp_256_mont_sqr_n_order_4(t2, t, 8);
+ /* t = a^ffff = t2 * t */
+ sp_256_mont_mul_order_4(t, t2, t);
+ /* t2= a^ffff0000 = t ^ 2 ^ 16 */
+ sp_256_mont_sqr_n_order_4(t2, t, 16);
+ /* t = a^ffffffff = t2 * t */
+ sp_256_mont_mul_order_4(t, t2, t);
+ /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */
+ sp_256_mont_sqr_n_order_4(t2, t, 64);
+ /* t2= a^ffffffff00000000ffffffff = t2 * t */
+ sp_256_mont_mul_order_4(t2, t2, t);
+ /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */
+ sp_256_mont_sqr_n_order_4(t2, t2, 32);
+ /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
+ sp_256_mont_mul_order_4(t2, t2, t);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
+ for (i=127; i>=112; i--) {
+ sp_256_mont_sqr_order_4(t2, t2);
+ if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_256_mont_mul_order_4(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
+ sp_256_mont_sqr_n_order_4(t2, t2, 4);
+ sp_256_mont_mul_order_4(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
+ for (i=107; i>=64; i--) {
+ sp_256_mont_sqr_order_4(t2, t2);
+ if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_256_mont_mul_order_4(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
+ sp_256_mont_sqr_n_order_4(t2, t2, 4);
+ sp_256_mont_mul_order_4(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
+ for (i=59; i>=32; i--) {
+ sp_256_mont_sqr_order_4(t2, t2);
+ if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_256_mont_mul_order_4(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
+ sp_256_mont_sqr_n_order_4(t2, t2, 4);
+ sp_256_mont_mul_order_4(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
+ for (i=27; i>=0; i--) {
+ sp_256_mont_sqr_order_4(t2, t2);
+ if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_256_mont_mul_order_4(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
+ sp_256_mont_sqr_n_order_4(t2, t2, 4);
+ /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
+ sp_256_mont_mul_order_4(r, t2, t3);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN 64
+#endif
+
+/* Sign the hash using the private key.
+ * e = [hash, 256 bits] from binary
+ * r = (k.G)->x mod order
+ * s = (r * x + e) / k mod order
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+ mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit ed[2*4];
+ sp_digit xd[2*4];
+ sp_digit kd[2*4];
+ sp_digit rd[2*4];
+ sp_digit td[3 * 2*4];
+ sp_point_256 p;
+#endif
+ sp_digit* e = NULL;
+ sp_digit* x = NULL;
+ sp_digit* k = NULL;
+ sp_digit* r = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_256* point = NULL;
+ sp_digit carry;
+ sp_digit* s = NULL;
+ sp_digit* kInv = NULL;
+ int err = MP_OKAY;
+ int64_t c;
+ int i;
+
+ (void)heap;
+
+ err = sp_256_point_new_4(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 4, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ e = d + 0 * 4;
+ x = d + 2 * 4;
+ k = d + 4 * 4;
+ r = d + 6 * 4;
+ tmp = d + 8 * 4;
+#else
+ e = ed;
+ x = xd;
+ k = kd;
+ r = rd;
+ tmp = td;
+#endif
+ s = e;
+ kInv = k;
+
+ if (hashLen > 32U) {
+ hashLen = 32U;
+ }
+
+ sp_256_from_bin(e, 4, hash, (int)hashLen);
+ }
+
+ for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+ sp_256_from_mp(x, 4, priv);
+
+ /* New random point. */
+ if (km == NULL || mp_iszero(km)) {
+ err = sp_256_ecc_gen_k_4(rng, k);
+ }
+ else {
+ sp_256_from_mp(k, 4, km);
+ mp_zero(km);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_base_4(point, k, 1, NULL);
+ }
+
+ if (err == MP_OKAY) {
+ /* r = point->x mod order */
+ XMEMCPY(r, point->x, sizeof(sp_digit) * 4U);
+ sp_256_norm_4(r);
+ c = sp_256_cmp_4(r, p256_order);
+ sp_256_cond_sub_4(r, r, p256_order, 0L - (sp_digit)(c >= 0));
+ sp_256_norm_4(r);
+
+ /* Conv k to Montgomery form (mod order) */
+ sp_256_mul_4(k, k, p256_norm_order);
+ err = sp_256_mod_4(k, k, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_4(k);
+ /* kInv = 1/k mod order */
+ sp_256_mont_inv_order_4(kInv, k, tmp);
+ sp_256_norm_4(kInv);
+
+ /* s = r * x + e */
+ sp_256_mul_4(x, x, r);
+ err = sp_256_mod_4(x, x, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_4(x);
+ carry = sp_256_add_4(s, e, x);
+ sp_256_cond_sub_4(s, s, p256_order, 0 - carry);
+ sp_256_norm_4(s);
+ c = sp_256_cmp_4(s, p256_order);
+ sp_256_cond_sub_4(s, s, p256_order, 0L - (sp_digit)(c >= 0));
+ sp_256_norm_4(s);
+
+ /* s = s * k^-1 mod order */
+ sp_256_mont_mul_order_4(s, s, kInv);
+ sp_256_norm_4(s);
+
+ /* Check that signature is usable. */
+ if (sp_256_iszero_4(s) == 0) {
+ break;
+ }
+ }
+ }
+
+ if (i == 0) {
+ err = RNG_FAILURE_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(r, rm);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(s, sm);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 8 * 4);
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 2U * 4U);
+ XMEMSET(x, 0, sizeof(sp_digit) * 2U * 4U);
+ XMEMSET(k, 0, sizeof(sp_digit) * 2U * 4U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 4U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 4U);
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 4U);
+#endif
+ sp_256_point_free_4(point, 1, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ * e = Truncate(hash, 256)
+ * u1 = e/s mod order
+ * u2 = r/s mod order
+ * r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
+ mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit u1d[2*4];
+ sp_digit u2d[2*4];
+ sp_digit sd[2*4];
+ sp_digit tmpd[2*4 * 5];
+ sp_point_256 p1d;
+ sp_point_256 p2d;
+#endif
+ sp_digit* u1 = NULL;
+ sp_digit* u2 = NULL;
+ sp_digit* s = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_256* p1;
+ sp_point_256* p2 = NULL;
+ sp_digit carry;
+ int64_t c;
+ int err;
+
+ err = sp_256_point_new_4(heap, p1d, p1);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_4(heap, p2d, p2);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 4, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ u1 = d + 0 * 4;
+ u2 = d + 2 * 4;
+ s = d + 4 * 4;
+ tmp = d + 6 * 4;
+#else
+ u1 = u1d;
+ u2 = u2d;
+ s = sd;
+ tmp = tmpd;
+#endif
+
+ if (hashLen > 32U) {
+ hashLen = 32U;
+ }
+
+ sp_256_from_bin(u1, 4, hash, (int)hashLen);
+ sp_256_from_mp(u2, 4, r);
+ sp_256_from_mp(s, 4, sm);
+ sp_256_from_mp(p2->x, 4, pX);
+ sp_256_from_mp(p2->y, 4, pY);
+ sp_256_from_mp(p2->z, 4, pZ);
+
+ {
+ sp_256_mul_4(s, s, p256_norm_order);
+ }
+ err = sp_256_mod_4(s, s, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_4(s);
+ {
+ sp_256_mont_inv_order_4(s, s, tmp);
+ sp_256_mont_mul_order_4(u1, u1, s);
+ sp_256_mont_mul_order_4(u2, u2, s);
+ }
+
+ err = sp_256_ecc_mulmod_base_4(p1, u1, 0, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_4(p2, p2, u2, 0, heap);
+ }
+
+ if (err == MP_OKAY) {
+ {
+ sp_256_proj_point_add_4(p1, p1, p2, tmp);
+ if (sp_256_iszero_4(p1->z)) {
+ if (sp_256_iszero_4(p1->x) && sp_256_iszero_4(p1->y)) {
+ sp_256_proj_point_dbl_4(p1, p2, tmp);
+ }
+ else {
+ /* Y ordinate is not used from here - don't set. */
+ p1->x[0] = 0;
+ p1->x[1] = 0;
+ p1->x[2] = 0;
+ p1->x[3] = 0;
+ XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod));
+ }
+ }
+ }
+
+ /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+ /* Reload r and convert to Montgomery form. */
+ sp_256_from_mp(u2, 4, r);
+ err = sp_256_mod_mul_norm_4(u2, u2, p256_mod);
+ }
+
+ if (err == MP_OKAY) {
+ /* u1 = r.z'.z' mod prime */
+ sp_256_mont_sqr_4(p1->z, p1->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(u1, u2, p1->z, p256_mod, p256_mp_mod);
+ *res = (int)(sp_256_cmp_4(p1->x, u1) == 0);
+ if (*res == 0) {
+ /* Reload r and add order. */
+ sp_256_from_mp(u2, 4, r);
+ carry = sp_256_add_4(u2, u2, p256_order);
+ /* Carry means result is greater than mod and is not valid. */
+ if (carry == 0) {
+ sp_256_norm_4(u2);
+
+ /* Compare with mod and if greater or equal then not valid. */
+ c = sp_256_cmp_4(u2, p256_mod);
+ if (c < 0) {
+ /* Convert to Montogomery form */
+ err = sp_256_mod_mul_norm_4(u2, u2, p256_mod);
+ if (err == MP_OKAY) {
+ /* u1 = (r + 1*order).z'.z' mod prime */
+ sp_256_mont_mul_4(u1, u2, p1->z, p256_mod,
+ p256_mp_mod);
+ *res = (int)(sp_256_cmp_4(p1->x, u1) == 0);
+ }
+ }
+ }
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL)
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_256_point_free_4(p1, 0, heap);
+ sp_256_point_free_4(p2, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point EC point.
+ * heap Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_256_ecc_is_point_4(sp_point_256* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit t1d[2*4];
+ sp_digit t2d[2*4];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 4, heap, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 4;
+ t2 = d + 2 * 4;
+#else
+ (void)heap;
+
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ sp_256_sqr_4(t1, point->y);
+ (void)sp_256_mod_4(t1, t1, p256_mod);
+ sp_256_sqr_4(t2, point->x);
+ (void)sp_256_mod_4(t2, t2, p256_mod);
+ sp_256_mul_4(t2, t2, point->x);
+ (void)sp_256_mod_4(t2, t2, p256_mod);
+ (void)sp_256_sub_4(t2, p256_mod, t2);
+ sp_256_mont_add_4(t1, t1, t2, p256_mod);
+
+ sp_256_mont_add_4(t1, t1, point->x, p256_mod);
+ sp_256_mont_add_4(t1, t1, point->x, p256_mod);
+ sp_256_mont_add_4(t1, t1, point->x, p256_mod);
+
+ if (sp_256_cmp_4(t1, p256_b) != 0) {
+ err = MP_VAL;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_256(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 pubd;
+#endif
+ sp_point_256* pub;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_256_point_new_4(NULL, pubd, pub);
+ if (err == MP_OKAY) {
+ sp_256_from_mp(pub->x, 4, pX);
+ sp_256_from_mp(pub->y, 4, pY);
+ sp_256_from_bin(pub->z, 4, one, (int)sizeof(one));
+
+ err = sp_256_ecc_is_point_4(pub, NULL);
+ }
+
+ sp_256_point_free_4(pub, 0, NULL);
+
+ return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * privm Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit privd[4];
+ sp_point_256 pubd;
+ sp_point_256 pd;
+#endif
+ sp_digit* priv = NULL;
+ sp_point_256* pub;
+ sp_point_256* p = NULL;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_256_point_new_4(heap, pubd, pub);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_4(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap,
+ DYNAMIC_TYPE_ECC);
+ if (priv == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ priv = privd;
+#endif
+
+ sp_256_from_mp(pub->x, 4, pX);
+ sp_256_from_mp(pub->y, 4, pY);
+ sp_256_from_bin(pub->z, 4, one, (int)sizeof(one));
+ sp_256_from_mp(priv, 4, privm);
+
+ /* Check point at infinitiy. */
+ if ((sp_256_iszero_4(pub->x) != 0) &&
+ (sp_256_iszero_4(pub->y) != 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check range of X and Y */
+ if (sp_256_cmp_4(pub->x, p256_mod) >= 0 ||
+ sp_256_cmp_4(pub->y, p256_mod) >= 0) {
+ err = ECC_OUT_OF_RANGE_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check point is on curve */
+ err = sp_256_ecc_is_point_4(pub, heap);
+ }
+
+ if (err == MP_OKAY) {
+ /* Point * order = infinity */
+ err = sp_256_ecc_mulmod_4(p, pub, p256_order, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is infinity */
+ if ((sp_256_iszero_4(p->x) == 0) ||
+ (sp_256_iszero_4(p->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Base * private = point */
+ err = sp_256_ecc_mulmod_base_4(p, priv, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is public key */
+ if (sp_256_cmp_4(p->x, pub->x) != 0 ||
+ sp_256_cmp_4(p->y, pub->y) != 0) {
+ err = ECC_PRIV_KEY_E;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (priv != NULL) {
+ XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_4(p, 0, heap);
+ sp_256_point_free_4(pub, 0, heap);
+
+ return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX First EC point's X ordinate.
+ * pY First EC point's Y ordinate.
+ * pZ First EC point's Z ordinate.
+ * qX Second EC point's X ordinate.
+ * qY Second EC point's Y ordinate.
+ * qZ Second EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* qX, mp_int* qY, mp_int* qZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 4 * 5];
+ sp_point_256 pd;
+ sp_point_256 qd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ sp_point_256* q = NULL;
+ int err;
+
+ err = sp_256_point_new_4(NULL, pd, p);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_4(NULL, qd, q);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 4, pX);
+ sp_256_from_mp(p->y, 4, pY);
+ sp_256_from_mp(p->z, 4, pZ);
+ sp_256_from_mp(q->x, 4, qX);
+ sp_256_from_mp(q->y, 4, qY);
+ sp_256_from_mp(q->z, 4, qZ);
+
+ sp_256_proj_point_add_4(p, p, q, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_4(q, 0, NULL);
+ sp_256_point_free_4(p, 0, NULL);
+
+ return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 4 * 2];
+ sp_point_256 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ int err;
+
+ err = sp_256_point_new_4(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 2, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 4, pX);
+ sp_256_from_mp(p->y, 4, pY);
+ sp_256_from_mp(p->z, 4, pZ);
+
+ sp_256_proj_point_dbl_4(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_4(p, 0, NULL);
+
+ return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 4 * 4];
+ sp_point_256 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ int err;
+
+ err = sp_256_point_new_4(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 4, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 4, pX);
+ sp_256_from_mp(p->y, 4, pY);
+ sp_256_from_mp(p->z, 4, pZ);
+
+ sp_256_map_4(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, pX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, pY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, pZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_4(p, 0, NULL);
+
+ return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_256_mont_sqrt_4(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit t1d[2 * 4];
+ sp_digit t2d[2 * 4];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 4, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 4;
+ t2 = d + 2 * 4;
+#else
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ {
+ /* t2 = y ^ 0x2 */
+ sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0x3 */
+ sp_256_mont_mul_4(t1, t2, y, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xc */
+ sp_256_mont_sqr_n_4(t2, t1, 2, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xf */
+ sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xf0 */
+ sp_256_mont_sqr_n_4(t2, t1, 4, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xff */
+ sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xff00 */
+ sp_256_mont_sqr_n_4(t2, t1, 8, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffff */
+ sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xffff0000 */
+ sp_256_mont_sqr_n_4(t2, t1, 16, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff */
+ sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000000 */
+ sp_256_mont_sqr_n_4(t1, t1, 32, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001 */
+ sp_256_mont_mul_4(t1, t1, y, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
+ sp_256_mont_sqr_n_4(t1, t1, 96, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
+ sp_256_mont_mul_4(t1, t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_n_4(y, t1, 94, p256_mod, p256_mp_mod);
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm X ordinate.
+ * odd Whether the Y ordinate is odd.
+ * ym Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit xd[2 * 4];
+ sp_digit yd[2 * 4];
+#endif
+ sp_digit* x = NULL;
+ sp_digit* y = NULL;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 4, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ x = d + 0 * 4;
+ y = d + 2 * 4;
+#else
+ x = xd;
+ y = yd;
+#endif
+
+ sp_256_from_mp(x, 4, xm);
+ err = sp_256_mod_mul_norm_4(x, x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ /* y = x^3 */
+ {
+ sp_256_mont_sqr_4(y, x, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(y, y, x, p256_mod, p256_mp_mod);
+ }
+ /* y = x^3 - 3x */
+ sp_256_mont_sub_4(y, y, x, p256_mod);
+ sp_256_mont_sub_4(y, y, x, p256_mod);
+ sp_256_mont_sub_4(y, y, x, p256_mod);
+ /* y = x^3 - 3x + b */
+ err = sp_256_mod_mul_norm_4(x, p256_b, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ sp_256_mont_add_4(y, y, x, p256_mod);
+ /* y = sqrt(x^3 - 3x + b) */
+ err = sp_256_mont_sqrt_4(y);
+ }
+ if (err == MP_OKAY) {
+ XMEMSET(y + 4, 0, 4U * sizeof(sp_digit));
+ sp_256_mont_reduce_4(y, p256_mod, p256_mp_mod);
+ if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+ sp_256_mont_sub_4(y, p256_mod, y, p256_mod);
+ }
+
+ err = sp_256_to_mp(y, ym);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+#endif
+#endif /* !WOLFSSL_SP_NO_256 */
+#ifdef WOLFSSL_SP_384
+
+/* Point structure to use. */
+typedef struct sp_point_384 {
+ sp_digit x[2 * 6];
+ sp_digit y[2 * 6];
+ sp_digit z[2 * 6];
+ int infinity;
+} sp_point_384;
+
+/* The modulus (prime) of the curve P384. */
+static const sp_digit p384_mod[6] = {
+ 0x00000000ffffffffL,0xffffffff00000000L,0xfffffffffffffffeL,
+ 0xffffffffffffffffL,0xffffffffffffffffL,0xffffffffffffffffL
+};
+/* The Montogmery normalizer for modulus of the curve P384. */
+static const sp_digit p384_norm_mod[6] = {
+ 0xffffffff00000001L,0x00000000ffffffffL,0x0000000000000001L,
+ 0x0000000000000000L,0x0000000000000000L,0x0000000000000000L
+};
+/* The Montogmery multiplier for modulus of the curve P384. */
+static sp_digit p384_mp_mod = 0x0000000100000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* The order of the curve P384. */
+static const sp_digit p384_order[6] = {
+ 0xecec196accc52973L,0x581a0db248b0a77aL,0xc7634d81f4372ddfL,
+ 0xffffffffffffffffL,0xffffffffffffffffL,0xffffffffffffffffL
+};
+#endif
+/* The order of the curve P384 minus 2. */
+static const sp_digit p384_order2[6] = {
+ 0xecec196accc52971L,0x581a0db248b0a77aL,0xc7634d81f4372ddfL,
+ 0xffffffffffffffffL,0xffffffffffffffffL,0xffffffffffffffffL
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P384. */
+static const sp_digit p384_norm_order[6] = {
+ 0x1313e695333ad68dL,0xa7e5f24db74f5885L,0x389cb27e0bc8d220L,
+ 0x0000000000000000L,0x0000000000000000L,0x0000000000000000L
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P384. */
+static sp_digit p384_mp_order = 0x6ed46089e88fdc45l;
+#endif
+/* The base point of curve P384. */
+static const sp_point_384 p384_base = {
+ /* X ordinate */
+ {
+ 0x3a545e3872760ab7L,0x5502f25dbf55296cL,0x59f741e082542a38L,
+ 0x6e1d3b628ba79b98L,0x8eb1c71ef320ad74L,0xaa87ca22be8b0537L,
+ 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Y ordinate */
+ {
+ 0x7a431d7c90ea0e5fL,0x0a60b1ce1d7e819dL,0xe9da3113b5f0b8c0L,
+ 0xf8f41dbd289a147cL,0x5d9e98bf9292dc29L,0x3617de4a96262c6fL,
+ 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Z ordinate */
+ {
+ 0x0000000000000001L,0x0000000000000000L,0x0000000000000000L,
+ 0x0000000000000000L,0x0000000000000000L,0x0000000000000000L,
+ 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* infinity */
+ 0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p384_b[6] = {
+ 0x2a85c8edd3ec2aefL,0xc656398d8a2ed19dL,0x0314088f5013875aL,
+ 0x181d9c6efe814112L,0x988e056be3f82d19L,0xb3312fa7e23ee7e4L
+};
+#endif
+
+static int sp_384_point_new_ex_6(void* heap, sp_point_384* sp, sp_point_384** p)
+{
+ int ret = MP_OKAY;
+ (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ (void)sp;
+ *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC);
+#else
+ *p = sp;
+#endif
+ if (*p == NULL) {
+ ret = MEMORY_E;
+ }
+ return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_384_point_new_6(heap, sp, p) sp_384_point_new_ex_6((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_384_point_new_6(heap, sp, p) sp_384_point_new_ex_6((heap), &(sp), &(p))
+#endif
+
+
+static void sp_384_point_free_6(sp_point_384* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+ if (p != NULL) {
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+ XFREE(p, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+/* Clear point data if requested. */
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+#endif
+ (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r The resulting Montgomery form number.
+ * a The number to convert.
+ * m The modulus (prime).
+ * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mod_mul_norm_6(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ int64_t* td;
+#else
+ int64_t td[12];
+ int64_t a32d[12];
+#endif
+ int64_t* t;
+ int64_t* a32;
+ int64_t o;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 12, NULL, DYNAMIC_TYPE_ECC);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = td;
+ a32 = td + 12;
+#else
+ t = td;
+ a32 = a32d;
+#endif
+
+ a32[0] = a[0] & 0xffffffff;
+ a32[1] = a[0] >> 32;
+ a32[2] = a[1] & 0xffffffff;
+ a32[3] = a[1] >> 32;
+ a32[4] = a[2] & 0xffffffff;
+ a32[5] = a[2] >> 32;
+ a32[6] = a[3] & 0xffffffff;
+ a32[7] = a[3] >> 32;
+ a32[8] = a[4] & 0xffffffff;
+ a32[9] = a[4] >> 32;
+ a32[10] = a[5] & 0xffffffff;
+ a32[11] = a[5] >> 32;
+
+ /* 1 0 0 0 0 0 0 0 1 1 0 -1 */
+ t[0] = 0 + a32[0] + a32[8] + a32[9] - a32[11];
+ /* -1 1 0 0 0 0 0 0 -1 0 1 1 */
+ t[1] = 0 - a32[0] + a32[1] - a32[8] + a32[10] + a32[11];
+ /* 0 -1 1 0 0 0 0 0 0 -1 0 1 */
+ t[2] = 0 - a32[1] + a32[2] - a32[9] + a32[11];
+ /* 1 0 -1 1 0 0 0 0 1 1 -1 -1 */
+ t[3] = 0 + a32[0] - a32[2] + a32[3] + a32[8] + a32[9] - a32[10] - a32[11];
+ /* 1 1 0 -1 1 0 0 0 1 2 1 -2 */
+ t[4] = 0 + a32[0] + a32[1] - a32[3] + a32[4] + a32[8] + 2 * a32[9] + a32[10] - 2 * a32[11];
+ /* 0 1 1 0 -1 1 0 0 0 1 2 1 */
+ t[5] = 0 + a32[1] + a32[2] - a32[4] + a32[5] + a32[9] + 2 * a32[10] + a32[11];
+ /* 0 0 1 1 0 -1 1 0 0 0 1 2 */
+ t[6] = 0 + a32[2] + a32[3] - a32[5] + a32[6] + a32[10] + 2 * a32[11];
+ /* 0 0 0 1 1 0 -1 1 0 0 0 1 */
+ t[7] = 0 + a32[3] + a32[4] - a32[6] + a32[7] + a32[11];
+ /* 0 0 0 0 1 1 0 -1 1 0 0 0 */
+ t[8] = 0 + a32[4] + a32[5] - a32[7] + a32[8];
+ /* 0 0 0 0 0 1 1 0 -1 1 0 0 */
+ t[9] = 0 + a32[5] + a32[6] - a32[8] + a32[9];
+ /* 0 0 0 0 0 0 1 1 0 -1 1 0 */
+ t[10] = 0 + a32[6] + a32[7] - a32[9] + a32[10];
+ /* 0 0 0 0 0 0 0 1 1 0 -1 1 */
+ t[11] = 0 + a32[7] + a32[8] - a32[10] + a32[11];
+
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+ t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+ t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+ t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+ o = t[11] >> 32; t[11] &= 0xffffffff;
+ t[0] += o;
+ t[1] -= o;
+ t[3] += o;
+ t[4] += o;
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+ t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+ t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+ t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+
+ r[0] = (t[1] << 32) | t[0];
+ r[1] = (t[3] << 32) | t[2];
+ r[2] = (t[5] << 32) | t[4];
+ r[3] = (t[7] << 32) | t[6];
+ r[4] = (t[9] << 32) | t[8];
+ r[5] = (t[11] << 32) | t[10];
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL)
+ XFREE(td, NULL, DYNAMIC_TYPE_ECC);
+#endif
+
+ return err;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 64
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 64
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffffffffffffl;
+ s = 64U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 64U) <= (word32)DIGIT_BIT) {
+ s += 64U;
+ r[j] &= 0xffffffffffffffffl;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 64) {
+ r[j] &= 0xffffffffffffffffl;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 64 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_384.
+ *
+ * p Point of type sp_point_384 (result).
+ * pm Point of type ecc_point.
+ */
+static void sp_384_point_from_ecc_point_6(sp_point_384* p, const ecc_point* pm)
+{
+ XMEMSET(p->x, 0, sizeof(p->x));
+ XMEMSET(p->y, 0, sizeof(p->y));
+ XMEMSET(p->z, 0, sizeof(p->z));
+ sp_384_from_mp(p->x, 6, pm->x);
+ sp_384_from_mp(p->y, 6, pm->y);
+ sp_384_from_mp(p->z, 6, pm->z);
+ p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_384_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 64
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 6);
+ r->used = 6;
+ mp_clamp(r);
+#elif DIGIT_BIT < 64
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 6; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 64) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 64 - s;
+ }
+ r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 6; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 64 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 64 - s;
+ }
+ else {
+ s += 64;
+ }
+ }
+ r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Convert a point of type sp_point_384 to type ecc_point.
+ *
+ * p Point of type sp_point_384.
+ * pm Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_384_point_to_ecc_point_6(const sp_point_384* p, ecc_point* pm)
+{
+ int err;
+
+ err = sp_384_to_mp(p->x, pm->x);
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, pm->y);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, pm->z);
+ }
+
+ return err;
+}
+
+/* Conditionally copy a into r using the mask m.
+ * m is -1 to copy and 0 when not.
+ *
+ * r A single precision number to copy over.
+ * a A single precision number to copy.
+ * m Mask value to apply.
+ */
+static void sp_384_cond_copy_6(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[r], 0]\n\t"
+ "ldp x5, x6, [%[r], 16]\n\t"
+ "ldp x7, x8, [%[r], 32]\n\t"
+ "ldp x9, x10, [%[a], 0]\n\t"
+ "ldp x11, x12, [%[a], 16]\n\t"
+ "ldp x13, x14, [%[a], 32]\n\t"
+ "eor x9, x9, x3\n\t"
+ "eor x10, x10, x4\n\t"
+ "eor x11, x11, x5\n\t"
+ "eor x12, x12, x6\n\t"
+ "eor x13, x13, x7\n\t"
+ "eor x14, x14, x8\n\t"
+ "and x9, x9, %[m]\n\t"
+ "and x10, x10, %[m]\n\t"
+ "and x11, x11, %[m]\n\t"
+ "and x12, x12, %[m]\n\t"
+ "and x13, x13, %[m]\n\t"
+ "and x14, x14, %[m]\n\t"
+ "eor x3, x3, x9\n\t"
+ "eor x4, x4, x10\n\t"
+ "eor x5, x5, x11\n\t"
+ "eor x6, x6, x12\n\t"
+ "eor x7, x7, x13\n\t"
+ "eor x8, x8, x14\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "stp x7, x8, [%[r], 32]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [m] "r" (m)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14"
+ );
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_384_mul_6(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_digit tmp[12];
+
+ __asm__ __volatile__ (
+ "mov x5, 0\n\t"
+ "mov x6, 0\n\t"
+ "mov x7, 0\n\t"
+ "mov x8, 0\n\t"
+ "\n1:\n\t"
+ "subs x3, x5, 40\n\t"
+ "csel x3, xzr, x3, cc\n\t"
+ "sub x4, x5, x3\n\t"
+ "\n2:\n\t"
+ "ldr x10, [%[a], x3]\n\t"
+ "ldr x11, [%[b], x4]\n\t"
+ "mul x9, x10, x11\n\t"
+ "umulh x10, x10, x11\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "add x3, x3, #8\n\t"
+ "sub x4, x4, #8\n\t"
+ "cmp x3, 48\n\t"
+ "b.eq 3f\n\t"
+ "cmp x3, x5\n\t"
+ "b.le 2b\n\t"
+ "\n3:\n\t"
+ "str x6, [%[r], x5]\n\t"
+ "mov x6, x7\n\t"
+ "mov x7, x8\n\t"
+ "mov x8, #0\n\t"
+ "add x5, x5, #8\n\t"
+ "cmp x5, 80\n\t"
+ "b.le 1b\n\t"
+ "str x6, [%[r], x5]\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+#else
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static void sp_384_mul_6(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_digit tmp[6];
+
+ __asm__ __volatile__ (
+ "ldp x9, x10, [%[a], 0]\n\t"
+ "ldp x11, x12, [%[a], 16]\n\t"
+ "ldp x13, x14, [%[a], 32]\n\t"
+ "ldp x15, x16, [%[b], 0]\n\t"
+ "ldp x17, x19, [%[b], 16]\n\t"
+ "ldp x20, x21, [%[b], 32]\n\t"
+ "# A[0] * B[0]\n\t"
+ "mul x4, x9, x15\n\t"
+ "umulh x5, x9, x15\n\t"
+ "str x4, [%[tmp]]\n\t"
+ "# A[0] * B[1]\n\t"
+ "mul x7, x9, x16\n\t"
+ "umulh x8, x9, x16\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[1] * B[0]\n\t"
+ "mul x7, x10, x15\n\t"
+ "adc x6, xzr, x8\n\t"
+ "umulh x8, x10, x15\n\t"
+ "adds x5, x5, x7\n\t"
+ "adcs x6, x6, x8\n\t"
+ "str x5, [%[tmp], 8]\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "# A[0] * B[2]\n\t"
+ "mul x7, x9, x17\n\t"
+ "umulh x8, x9, x17\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[1] * B[1]\n\t"
+ "mul x7, x10, x16\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x10, x16\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[2] * B[0]\n\t"
+ "mul x7, x11, x15\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x11, x15\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "adcs x4, x4, x8\n\t"
+ "str x6, [%[tmp], 16]\n\t"
+ "adc x5, x5, xzr\n\t"
+ "# A[0] * B[3]\n\t"
+ "mul x7, x9, x19\n\t"
+ "umulh x8, x9, x19\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[1] * B[2]\n\t"
+ "mul x7, x10, x17\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x10, x17\n\t"
+ "adc x6, xzr, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[2] * B[1]\n\t"
+ "mul x7, x11, x16\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x11, x16\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[3] * B[0]\n\t"
+ "mul x7, x12, x15\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x12, x15\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "adcs x5, x5, x8\n\t"
+ "str x4, [%[tmp], 24]\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# A[0] * B[4]\n\t"
+ "mul x7, x9, x20\n\t"
+ "umulh x8, x9, x20\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[1] * B[3]\n\t"
+ "mul x7, x10, x19\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x10, x19\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[2] * B[2]\n\t"
+ "mul x7, x11, x17\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x11, x17\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[3] * B[1]\n\t"
+ "mul x7, x12, x16\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x12, x16\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[4] * B[0]\n\t"
+ "mul x7, x13, x15\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x13, x15\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "adcs x6, x6, x8\n\t"
+ "str x5, [%[tmp], 32]\n\t"
+ "adc x4, x4, xzr\n\t"
+ "# A[0] * B[5]\n\t"
+ "mul x7, x9, x21\n\t"
+ "umulh x8, x9, x21\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[1] * B[4]\n\t"
+ "mul x7, x10, x20\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x10, x20\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[2] * B[3]\n\t"
+ "mul x7, x11, x19\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x11, x19\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[3] * B[2]\n\t"
+ "mul x7, x12, x17\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x12, x17\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[4] * B[1]\n\t"
+ "mul x7, x13, x16\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x13, x16\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[5] * B[0]\n\t"
+ "mul x7, x14, x15\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x14, x15\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "adcs x4, x4, x8\n\t"
+ "str x6, [%[tmp], 40]\n\t"
+ "adc x5, x5, xzr\n\t"
+ "# A[1] * B[5]\n\t"
+ "mul x7, x10, x21\n\t"
+ "umulh x8, x10, x21\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[2] * B[4]\n\t"
+ "mul x7, x11, x20\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x11, x20\n\t"
+ "adc x6, xzr, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[3] * B[3]\n\t"
+ "mul x7, x12, x19\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x12, x19\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[4] * B[2]\n\t"
+ "mul x7, x13, x17\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x13, x17\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[5] * B[1]\n\t"
+ "mul x7, x14, x16\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x14, x16\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "adcs x5, x5, x8\n\t"
+ "str x4, [%[r], 48]\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# A[2] * B[5]\n\t"
+ "mul x7, x11, x21\n\t"
+ "umulh x8, x11, x21\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[3] * B[4]\n\t"
+ "mul x7, x12, x20\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x12, x20\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[4] * B[3]\n\t"
+ "mul x7, x13, x19\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x13, x19\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "# A[5] * B[2]\n\t"
+ "mul x7, x14, x17\n\t"
+ "adcs x6, x6, x8\n\t"
+ "umulh x8, x14, x17\n\t"
+ "adc x4, x4, xzr\n\t"
+ "adds x5, x5, x7\n\t"
+ "adcs x6, x6, x8\n\t"
+ "str x5, [%[r], 56]\n\t"
+ "adc x4, x4, xzr\n\t"
+ "# A[3] * B[5]\n\t"
+ "mul x7, x12, x21\n\t"
+ "umulh x8, x12, x21\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[4] * B[4]\n\t"
+ "mul x7, x13, x20\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x13, x20\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "# A[5] * B[3]\n\t"
+ "mul x7, x14, x19\n\t"
+ "adcs x4, x4, x8\n\t"
+ "umulh x8, x14, x19\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x6, x6, x7\n\t"
+ "adcs x4, x4, x8\n\t"
+ "str x6, [%[r], 64]\n\t"
+ "adc x5, x5, xzr\n\t"
+ "# A[4] * B[5]\n\t"
+ "mul x7, x13, x21\n\t"
+ "umulh x8, x13, x21\n\t"
+ "adds x4, x4, x7\n\t"
+ "# A[5] * B[4]\n\t"
+ "mul x7, x14, x20\n\t"
+ "adcs x5, x5, x8\n\t"
+ "umulh x8, x14, x20\n\t"
+ "adc x6, xzr, xzr\n\t"
+ "adds x4, x4, x7\n\t"
+ "adcs x5, x5, x8\n\t"
+ "str x4, [%[r], 72]\n\t"
+ "adc x6, x6, xzr\n\t"
+ "# A[5] * B[5]\n\t"
+ "mul x7, x14, x21\n\t"
+ "umulh x8, x14, x21\n\t"
+ "adds x5, x5, x7\n\t"
+ "adc x6, x6, x8\n\t"
+ "stp x5, x6, [%[r], 80]\n\t"
+ "ldp x9, x10, [%[tmp], 0]\n\t"
+ "ldp x11, x12, [%[tmp], 16]\n\t"
+ "ldp x13, x14, [%[tmp], 32]\n\t"
+ "stp x9, x10, [%[r], 0]\n\t"
+ "stp x11, x12, [%[r], 16]\n\t"
+ "stp x13, x14, [%[r], 32]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
+ : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static sp_digit sp_384_cond_sub_6(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ __asm__ __volatile__ (
+
+ "ldp x5, x7, [%[b], 0]\n\t"
+ "ldp x11, x12, [%[b], 16]\n\t"
+ "ldp x4, x6, [%[a], 0]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 16]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "subs x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "sbcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 0]\n\t"
+ "sbcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 16]\n\t"
+ "ldp x5, x7, [%[b], 32]\n\t"
+ "ldp x4, x6, [%[a], 32]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "sbcs x4, x4, x5\n\t"
+ "sbcs x6, x6, x7\n\t"
+ "stp x4, x6, [%[r], 32]\n\t"
+ "csetm %[r], cc\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+
+ return (sp_digit)r;
+}
+
+#define sp_384_mont_reduce_order_6 sp_384_mont_reduce_6
+
+/* Reduce the number back to 384 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_384_mont_reduce_6(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "ldp x14, x15, [%[m], 0]\n\t"
+ "ldp x16, x17, [%[m], 16]\n\t"
+ "ldp x19, x20, [%[m], 32]\n\t"
+ "# i = 6\n\t"
+ "mov x4, 6\n\t"
+ "ldp x12, x13, [%[a], 0]\n\t"
+ "\n1:\n\t"
+ "# mu = a[i] * mp\n\t"
+ "mul x9, %[mp], x12\n\t"
+ "# a[i+0] += m[0] * mu\n\t"
+ "mul x7, x14, x9\n\t"
+ "umulh x8, x14, x9\n\t"
+ "adds x12, x12, x7\n\t"
+ "# a[i+1] += m[1] * mu\n\t"
+ "mul x7, x15, x9\n\t"
+ "adc x6, x8, xzr\n\t"
+ "umulh x8, x15, x9\n\t"
+ "adds x12, x13, x7\n\t"
+ "# a[i+2] += m[2] * mu\n\t"
+ "ldr x13, [%[a], 16]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "mul x7, x16, x9\n\t"
+ "adds x12, x12, x6\n\t"
+ "umulh x8, x16, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "adds x13, x13, x7\n\t"
+ "# a[i+3] += m[3] * mu\n\t"
+ "ldr x10, [%[a], 24]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "mul x7, x17, x9\n\t"
+ "adds x13, x13, x5\n\t"
+ "umulh x8, x17, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "adds x10, x10, x7\n\t"
+ "# a[i+4] += m[4] * mu\n\t"
+ "ldr x11, [%[a], 32]\n\t"
+ "adc x5, x8, xzr\n\t"
+ "adds x10, x10, x6\n\t"
+ "mul x7, x19, x9\n\t"
+ "adc x5, x5, xzr\n\t"
+ "umulh x8, x19, x9\n\t"
+ "str x10, [%[a], 24]\n\t"
+ "adds x11, x11, x7\n\t"
+ "# a[i+5] += m[5] * mu\n\t"
+ "ldr x10, [%[a], 40]\n\t"
+ "adc x6, x8, xzr\n\t"
+ "adds x11, x11, x5\n\t"
+ "mul x7, x20, x9\n\t"
+ "adc x6, x6, xzr\n\t"
+ "umulh x8, x20, x9\n\t"
+ "adds x6, x6, x7\n\t"
+ "adcs x8, x8, %[ca]\n\t"
+ "str x11, [%[a], 32]\n\t"
+ "cset %[ca], cs\n\t"
+ "adds x10, x10, x6\n\t"
+ "ldr x11, [%[a], 48]\n\t"
+ "str x10, [%[a], 40]\n\t"
+ "adcs x11, x11, x8\n\t"
+ "str x11, [%[a], 48]\n\t"
+ "adc %[ca], %[ca], xzr\n\t"
+ "subs x4, x4, 1\n\t"
+ "add %[a], %[a], 8\n\t"
+ "bne 1b\n\t"
+ "stp x12, x13, [%[a], 0]\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20"
+ );
+
+ sp_384_cond_sub_6(a - 6, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_mul_6(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_384_mul_6(r, a, b);
+ sp_384_mont_reduce_6(r, m, mp);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_384_sqr_6(sp_digit* r, const sp_digit* a)
+{
+ sp_digit tmp[12];
+
+ __asm__ __volatile__ (
+ "mov x6, 0\n\t"
+ "mov x7, 0\n\t"
+ "mov x8, 0\n\t"
+ "mov x5, 0\n\t"
+ "\n1:\n\t"
+ "subs x3, x5, 40\n\t"
+ "csel x3, xzr, x3, cc\n\t"
+ "sub x4, x5, x3\n\t"
+ "\n2:\n\t"
+ "cmp x4, x3\n\t"
+ "b.eq 4f\n\t"
+ "ldr x10, [%[a], x3]\n\t"
+ "ldr x11, [%[a], x4]\n\t"
+ "mul x9, x10, x11\n\t"
+ "umulh x10, x10, x11\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "b.al 5f\n\t"
+ "\n4:\n\t"
+ "ldr x10, [%[a], x3]\n\t"
+ "mul x9, x10, x10\n\t"
+ "umulh x10, x10, x10\n\t"
+ "adds x6, x6, x9\n\t"
+ "adcs x7, x7, x10\n\t"
+ "adc x8, x8, xzr\n\t"
+ "\n5:\n\t"
+ "add x3, x3, #8\n\t"
+ "sub x4, x4, #8\n\t"
+ "cmp x3, 48\n\t"
+ "b.eq 3f\n\t"
+ "cmp x3, x4\n\t"
+ "b.gt 3f\n\t"
+ "cmp x3, x5\n\t"
+ "b.le 2b\n\t"
+ "\n3:\n\t"
+ "str x6, [%[r], x5]\n\t"
+ "mov x6, x7\n\t"
+ "mov x7, x8\n\t"
+ "mov x8, #0\n\t"
+ "add x5, x5, #8\n\t"
+ "cmp x5, 80\n\t"
+ "b.le 1b\n\t"
+ "str x6, [%[r], x5]\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+#else
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+static void sp_384_sqr_6(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "ldp x17, x19, [%[a], 0]\n\t"
+ "ldp x20, x21, [%[a], 16]\n\t"
+ "ldp x22, x23, [%[a], 32]\n\t"
+ "# A[0] * A[1]\n\t"
+ "mul x3, x17, x19\n\t"
+ "umulh x7, x17, x19\n\t"
+ "# A[0] * A[2]\n\t"
+ "mul x4, x17, x20\n\t"
+ "umulh x5, x17, x20\n\t"
+ "adds x7, x7, x4\n\t"
+ "# A[0] * A[3]\n\t"
+ "mul x4, x17, x21\n\t"
+ "adc x8, xzr, x5\n\t"
+ "umulh x5, x17, x21\n\t"
+ "adds x8, x8, x4\n\t"
+ "# A[1] * A[2]\n\t"
+ "mul x4, x19, x20\n\t"
+ "adc x9, xzr, x5\n\t"
+ "umulh x5, x19, x20\n\t"
+ "adds x8, x8, x4\n\t"
+ "# A[0] * A[4]\n\t"
+ "mul x4, x17, x22\n\t"
+ "adcs x9, x9, x5\n\t"
+ "umulh x5, x17, x22\n\t"
+ "adc x10, xzr, xzr\n\t"
+ "adds x9, x9, x4\n\t"
+ "# A[1] * A[3]\n\t"
+ "mul x4, x19, x21\n\t"
+ "adc x10, x10, x5\n\t"
+ "umulh x5, x19, x21\n\t"
+ "adds x9, x9, x4\n\t"
+ "# A[0] * A[5]\n\t"
+ "mul x4, x17, x23\n\t"
+ "adcs x10, x10, x5\n\t"
+ "umulh x5, x17, x23\n\t"
+ "adc x11, xzr, xzr\n\t"
+ "adds x10, x10, x4\n\t"
+ "# A[1] * A[4]\n\t"
+ "mul x4, x19, x22\n\t"
+ "adc x11, x11, x5\n\t"
+ "umulh x5, x19, x22\n\t"
+ "adds x10, x10, x4\n\t"
+ "# A[2] * A[3]\n\t"
+ "mul x4, x20, x21\n\t"
+ "adcs x11, x11, x5\n\t"
+ "umulh x5, x20, x21\n\t"
+ "adc x12, xzr, xzr\n\t"
+ "adds x10, x10, x4\n\t"
+ "# A[1] * A[5]\n\t"
+ "mul x4, x19, x23\n\t"
+ "adcs x11, x11, x5\n\t"
+ "umulh x5, x19, x23\n\t"
+ "adc x12, x12, xzr\n\t"
+ "adds x11, x11, x4\n\t"
+ "# A[2] * A[4]\n\t"
+ "mul x4, x20, x22\n\t"
+ "adcs x12, x12, x5\n\t"
+ "umulh x5, x20, x22\n\t"
+ "adc x13, xzr, xzr\n\t"
+ "adds x11, x11, x4\n\t"
+ "# A[2] * A[5]\n\t"
+ "mul x4, x20, x23\n\t"
+ "adcs x12, x12, x5\n\t"
+ "umulh x5, x20, x23\n\t"
+ "adc x13, x13, xzr\n\t"
+ "adds x12, x12, x4\n\t"
+ "# A[3] * A[4]\n\t"
+ "mul x4, x21, x22\n\t"
+ "adcs x13, x13, x5\n\t"
+ "umulh x5, x21, x22\n\t"
+ "adc x14, xzr, xzr\n\t"
+ "adds x12, x12, x4\n\t"
+ "# A[3] * A[5]\n\t"
+ "mul x4, x21, x23\n\t"
+ "adcs x13, x13, x5\n\t"
+ "umulh x5, x21, x23\n\t"
+ "adc x14, x14, xzr\n\t"
+ "adds x13, x13, x4\n\t"
+ "# A[4] * A[5]\n\t"
+ "mul x4, x22, x23\n\t"
+ "adcs x14, x14, x5\n\t"
+ "umulh x5, x22, x23\n\t"
+ "adc x15, xzr, xzr\n\t"
+ "adds x14, x14, x4\n\t"
+ "adc x15, x15, x5\n\t"
+ "# Double\n\t"
+ "adds x3, x3, x3\n\t"
+ "adcs x7, x7, x7\n\t"
+ "adcs x8, x8, x8\n\t"
+ "adcs x9, x9, x9\n\t"
+ "adcs x10, x10, x10\n\t"
+ "adcs x11, x11, x11\n\t"
+ "adcs x12, x12, x12\n\t"
+ "adcs x13, x13, x13\n\t"
+ "adcs x14, x14, x14\n\t"
+ "# A[0] * A[0]\n\t"
+ "mul x2, x17, x17\n\t"
+ "adcs x15, x15, x15\n\t"
+ "umulh x4, x17, x17\n\t"
+ "cset x16, cs\n\t"
+ "# A[1] * A[1]\n\t"
+ "mul x5, x19, x19\n\t"
+ "adds x3, x3, x4\n\t"
+ "umulh x6, x19, x19\n\t"
+ "adcs x7, x7, x5\n\t"
+ "# A[2] * A[2]\n\t"
+ "mul x4, x20, x20\n\t"
+ "adcs x8, x8, x6\n\t"
+ "umulh x5, x20, x20\n\t"
+ "adcs x9, x9, x4\n\t"
+ "# A[3] * A[3]\n\t"
+ "mul x6, x21, x21\n\t"
+ "adcs x10, x10, x5\n\t"
+ "umulh x4, x21, x21\n\t"
+ "adcs x11, x11, x6\n\t"
+ "# A[4] * A[4]\n\t"
+ "mul x5, x22, x22\n\t"
+ "adcs x12, x12, x4\n\t"
+ "umulh x6, x22, x22\n\t"
+ "adcs x13, x13, x5\n\t"
+ "# A[5] * A[5]\n\t"
+ "mul x4, x23, x23\n\t"
+ "adcs x14, x14, x6\n\t"
+ "umulh x5, x23, x23\n\t"
+ "adcs x15, x15, x4\n\t"
+ "stp x2, x3, [%[r], 0]\n\t"
+ "adc x16, x16, x5\n\t"
+ "stp x7, x8, [%[r], 16]\n\t"
+ "stp x9, x10, [%[r], 32]\n\t"
+ "stp x11, x12, [%[r], 48]\n\t"
+ "stp x13, x14, [%[r], 64]\n\t"
+ "stp x15, x16, [%[r], 80]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "x4", "x5", "x6", "x2", "x3", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_6(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_384_sqr_6(r, a);
+ sp_384_mont_reduce_6(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * n Number of times to square.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_n_6(sp_digit* r, const sp_digit* a, int n,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_384_mont_sqr_6(r, a, m, mp);
+ for (; n > 1; n--) {
+ sp_384_mont_sqr_6(r, r, m, mp);
+ }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P384 curve. */
+static const uint64_t p384_mod_minus_2[6] = {
+ 0x00000000fffffffdU,0xffffffff00000000U,0xfffffffffffffffeU,
+ 0xffffffffffffffffU,0xffffffffffffffffU,0xffffffffffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P384 curve. (r = 1 / a mod m)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_384_mont_inv_6(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 6);
+ for (i=382; i>=0; i--) {
+ sp_384_mont_sqr_6(t, t, p384_mod, p384_mp_mod);
+ if (p384_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64)))
+ sp_384_mont_mul_6(t, t, a, p384_mod, p384_mp_mod);
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 6);
+#else
+ sp_digit* t1 = td;
+ sp_digit* t2 = td + 2 * 6;
+ sp_digit* t3 = td + 4 * 6;
+ sp_digit* t4 = td + 6 * 6;
+ sp_digit* t5 = td + 8 * 6;
+
+ /* 0x2 */
+ sp_384_mont_sqr_6(t1, a, p384_mod, p384_mp_mod);
+ /* 0x3 */
+ sp_384_mont_mul_6(t5, t1, a, p384_mod, p384_mp_mod);
+ /* 0xc */
+ sp_384_mont_sqr_n_6(t1, t5, 2, p384_mod, p384_mp_mod);
+ /* 0xf */
+ sp_384_mont_mul_6(t2, t5, t1, p384_mod, p384_mp_mod);
+ /* 0x1e */
+ sp_384_mont_sqr_6(t1, t2, p384_mod, p384_mp_mod);
+ /* 0x1f */
+ sp_384_mont_mul_6(t4, t1, a, p384_mod, p384_mp_mod);
+ /* 0x3e0 */
+ sp_384_mont_sqr_n_6(t1, t4, 5, p384_mod, p384_mp_mod);
+ /* 0x3ff */
+ sp_384_mont_mul_6(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0x7fe0 */
+ sp_384_mont_sqr_n_6(t1, t2, 5, p384_mod, p384_mp_mod);
+ /* 0x7fff */
+ sp_384_mont_mul_6(t4, t4, t1, p384_mod, p384_mp_mod);
+ /* 0x3fff8000 */
+ sp_384_mont_sqr_n_6(t1, t4, 15, p384_mod, p384_mp_mod);
+ /* 0x3fffffff */
+ sp_384_mont_mul_6(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffc */
+ sp_384_mont_sqr_n_6(t3, t2, 2, p384_mod, p384_mp_mod);
+ /* 0xfffffffd */
+ sp_384_mont_mul_6(r, t3, a, p384_mod, p384_mp_mod);
+ /* 0xffffffff */
+ sp_384_mont_mul_6(t3, t5, t3, p384_mod, p384_mp_mod);
+ /* 0xfffffffc0000000 */
+ sp_384_mont_sqr_n_6(t1, t2, 30, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffff */
+ sp_384_mont_mul_6(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffff000000000000000 */
+ sp_384_mont_sqr_n_6(t1, t2, 60, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_6(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+ sp_384_mont_sqr_n_6(t1, t2, 120, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_6(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+ sp_384_mont_sqr_n_6(t1, t2, 15, p384_mod, p384_mp_mod);
+ /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_6(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */
+ sp_384_mont_sqr_n_6(t1, t2, 33, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */
+ sp_384_mont_mul_6(t2, t3, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */
+ sp_384_mont_sqr_n_6(t1, t2, 96, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */
+ sp_384_mont_mul_6(r, r, t1, p384_mod, p384_mp_mod);
+
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static int64_t sp_384_cmp_6(const sp_digit* a, const sp_digit* b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "mov x2, -1\n\t"
+ "mov x3, 1\n\t"
+ "mov x4, -1\n\t"
+ "mov x5, 40\n\t"
+ "1:\n\t"
+ "ldr x6, [%[a], x5]\n\t"
+ "ldr x7, [%[b], x5]\n\t"
+ "and x6, x6, x4\n\t"
+ "and x7, x7, x4\n\t"
+ "subs x6, x6, x7\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "subs x5, x5, #8\n\t"
+ "b.cs 1b\n\t"
+ "eor %[a], x2, x4\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16"
+ );
+#else
+ __asm__ __volatile__ (
+ "mov x2, -1\n\t"
+ "mov x3, 1\n\t"
+ "mov x4, -1\n\t"
+ "ldp x5, x6, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[a], 16]\n\t"
+ "ldp x9, x10, [%[a], 32]\n\t"
+ "ldp x11, x12, [%[b], 0]\n\t"
+ "ldp x13, x14, [%[b], 16]\n\t"
+ "ldp x15, x16, [%[b], 32]\n\t"
+ "and x10, x10, x4\n\t"
+ "and x16, x16, x4\n\t"
+ "subs x10, x10, x16\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x9, x9, x4\n\t"
+ "and x15, x15, x4\n\t"
+ "subs x9, x9, x15\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x8, x8, x4\n\t"
+ "and x14, x14, x4\n\t"
+ "subs x8, x8, x14\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x7, x7, x4\n\t"
+ "and x13, x13, x4\n\t"
+ "subs x7, x7, x13\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x6, x6, x4\n\t"
+ "and x12, x12, x4\n\t"
+ "subs x6, x6, x12\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "and x5, x5, x4\n\t"
+ "and x11, x11, x4\n\t"
+ "subs x5, x5, x11\n\t"
+ "csel x2, x4, x2, lo\n\t"
+ "csel x4, x4, xzr, eq\n\t"
+ "csel x2, x3, x2, hi\n\t"
+ "eor %[a], x2, x4\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16"
+ );
+#endif
+
+ return (int64_t)a;
+}
+
+/* Normalize the values in each word to 64.
+ *
+ * a Array of sp_digit to normalize.
+ */
+#define sp_384_norm_6(a)
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r Resulting affine coordinate point.
+ * p Montgomery form projective coordinate point.
+ * t Temporary ordinate data.
+ */
+static void sp_384_map_6(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*6;
+ int64_t n;
+
+ sp_384_mont_inv_6(t1, p->z, t + 2*6);
+
+ sp_384_mont_sqr_6(t2, t1, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t1, t2, t1, p384_mod, p384_mp_mod);
+
+ /* x /= z^2 */
+ sp_384_mont_mul_6(r->x, p->x, t2, p384_mod, p384_mp_mod);
+ XMEMSET(r->x + 6, 0, sizeof(r->x) / 2U);
+ sp_384_mont_reduce_6(r->x, p384_mod, p384_mp_mod);
+ /* Reduce x to less than modulus */
+ n = sp_384_cmp_6(r->x, p384_mod);
+ sp_384_cond_sub_6(r->x, r->x, p384_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_6(r->x);
+
+ /* y /= z^3 */
+ sp_384_mont_mul_6(r->y, p->y, t1, p384_mod, p384_mp_mod);
+ XMEMSET(r->y + 6, 0, sizeof(r->y) / 2U);
+ sp_384_mont_reduce_6(r->y, p384_mod, p384_mp_mod);
+ /* Reduce y to less than modulus */
+ n = sp_384_cmp_6(r->y, p384_mod);
+ sp_384_cond_sub_6(r->y, r->y, p384_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_6(r->y);
+
+ XMEMSET(r->z, 0, sizeof(r->z));
+ r->z[0] = 1;
+
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_384_add_6(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[b], 0]\n\t"
+ "adds x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "adcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 16]\n\t"
+ "adcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "adcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldr x3, [%[a], 32]\n\t"
+ "ldr x4, [%[a], 40]\n\t"
+ "ldr x7, [%[b], 32]\n\t"
+ "ldr x8, [%[b], 40]\n\t"
+ "adcs x3, x3, x7\n\t"
+ "adcs x4, x4, x8\n\t"
+ "str x3, [%[r], 32]\n\t"
+ "str x4, [%[r], 40]\n\t"
+ "cset %[r], cs\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+ return (sp_digit)r;
+}
+
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r Result of addition.
+ * a First number to add in Montogmery form.
+ * b Second number to add in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_384_mont_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_add_6(r, a, b);
+ sp_384_cond_sub_6(r, r, m, 0 - o);
+}
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r Result of doubling.
+ * a Number to double in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_384_mont_dbl_6(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_add_6(r, a, a);
+ sp_384_cond_sub_6(r, r, m, 0 - o);
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r Result of Tripling.
+ * a Number to triple in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_384_mont_tpl_6(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_add_6(r, a, a);
+ sp_384_cond_sub_6(r, r, m, 0 - o);
+ o = sp_384_add_6(r, r, a);
+ sp_384_cond_sub_6(r, r, m, 0 - o);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+static sp_digit sp_384_sub_6(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x3, x4, [%[a], 0]\n\t"
+ "ldp x7, x8, [%[b], 0]\n\t"
+ "subs x3, x3, x7\n\t"
+ "ldp x5, x6, [%[a], 16]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "ldp x9, x10, [%[b], 16]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x3, x4, [%[r], 0]\n\t"
+ "sbcs x6, x6, x10\n\t"
+ "stp x5, x6, [%[r], 16]\n\t"
+ "ldr x3, [%[a], 32]\n\t"
+ "ldr x4, [%[a], 40]\n\t"
+ "ldr x7, [%[b], 32]\n\t"
+ "ldr x8, [%[b], 40]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "str x3, [%[r], 32]\n\t"
+ "str x4, [%[r], 40]\n\t"
+ "csetm %[r], cc\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10"
+ );
+
+ return (sp_digit)r;
+}
+
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static sp_digit sp_384_cond_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov x8, #0\n\t"
+ "1:\n\t"
+ "adds %[c], %[c], #-1\n\t"
+ "ldr x4, [%[a], x8]\n\t"
+ "ldr x5, [%[b], x8]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "adcs x4, x4, x5\n\t"
+ "cset %[c], cs\n\t"
+ "str x4, [%[r], x8]\n\t"
+ "add x8, x8, #8\n\t"
+ "cmp x8, 48\n\t"
+ "b.lt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+
+ return c;
+#else
+ __asm__ __volatile__ (
+
+ "ldp x5, x7, [%[b], 0]\n\t"
+ "ldp x11, x12, [%[b], 16]\n\t"
+ "ldp x4, x6, [%[a], 0]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "ldp x9, x10, [%[a], 16]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "adds x4, x4, x5\n\t"
+ "and x11, x11, %[m]\n\t"
+ "adcs x6, x6, x7\n\t"
+ "and x12, x12, %[m]\n\t"
+ "adcs x9, x9, x11\n\t"
+ "stp x4, x6, [%[r], 0]\n\t"
+ "adcs x10, x10, x12\n\t"
+ "stp x9, x10, [%[r], 16]\n\t"
+ "ldp x5, x7, [%[b], 32]\n\t"
+ "ldp x4, x6, [%[a], 32]\n\t"
+ "and x5, x5, %[m]\n\t"
+ "and x7, x7, %[m]\n\t"
+ "adcs x4, x4, x5\n\t"
+ "adcs x6, x6, x7\n\t"
+ "stp x4, x6, [%[r], 32]\n\t"
+ "cset %[r], cs\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "x4", "x6", "x5", "x7", "x8", "x9", "x10", "x11", "x12"
+ );
+
+ return (sp_digit)r;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r Result of subtration.
+ * a Number to subtract from in Montogmery form.
+ * b Number to subtract with in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_384_mont_sub_6(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_sub_6(r, a, b);
+ sp_384_cond_add_6(r, r, m, o);
+}
+
+static void sp_384_rshift1_6(sp_digit* r, sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "ldp x2, x3, [%[a]]\n\t"
+ "ldp x4, x5, [%[a], 16]\n\t"
+ "ldp x6, x7, [%[a], 32]\n\t"
+ "lsr x11, x6, 1\n\t"
+ "lsr x10, x5, 1\n\t"
+ "lsr x9, x4, 1\n\t"
+ "lsr x8, x3, 1\n\t"
+ "lsr x2, x2, 1\n\t"
+ "orr x2, x2, x3, lsl 63\n\t"
+ "orr x3, x8, x4, lsl 63\n\t"
+ "orr x4, x9, x5, lsl 63\n\t"
+ "orr x5, x10, x6, lsl 63\n\t"
+ "orr x6, x11, x7, lsl 63\n\t"
+ "lsr x7, x7, 1\n\t"
+ "stp x2, x3, [%[r]]\n\t"
+ "stp x4, x5, [%[r], 16]\n\t"
+ "stp x6, x7, [%[r], 32]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11"
+ );
+}
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r Result of division by 2.
+ * a Number to divide.
+ * m Modulus (prime).
+ */
+static void sp_384_div2_6(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_cond_add_6(r, a, m, 0 - (a[0] & 1));
+ sp_384_rshift1_6(r, r);
+ r[5] |= o << 63;
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r Result of doubling point.
+ * p Point to double.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_6(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*6;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = r->x;
+ y = r->y;
+ z = r->z;
+ /* Put infinity into result. */
+ if (r != p) {
+ r->infinity = p->infinity;
+ }
+
+ /* T1 = Z * Z */
+ sp_384_mont_sqr_6(t1, p->z, p384_mod, p384_mp_mod);
+ /* Z = Y * Z */
+ sp_384_mont_mul_6(z, p->y, p->z, p384_mod, p384_mp_mod);
+ /* Z = 2Z */
+ sp_384_mont_dbl_6(z, z, p384_mod);
+ /* T2 = X - T1 */
+ sp_384_mont_sub_6(t2, p->x, t1, p384_mod);
+ /* T1 = X + T1 */
+ sp_384_mont_add_6(t1, p->x, t1, p384_mod);
+ /* T2 = T1 * T2 */
+ sp_384_mont_mul_6(t2, t1, t2, p384_mod, p384_mp_mod);
+ /* T1 = 3T2 */
+ sp_384_mont_tpl_6(t1, t2, p384_mod);
+ /* Y = 2Y */
+ sp_384_mont_dbl_6(y, p->y, p384_mod);
+ /* Y = Y * Y */
+ sp_384_mont_sqr_6(y, y, p384_mod, p384_mp_mod);
+ /* T2 = Y * Y */
+ sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod);
+ /* T2 = T2/2 */
+ sp_384_div2_6(t2, t2, p384_mod);
+ /* Y = Y * X */
+ sp_384_mont_mul_6(y, y, p->x, p384_mod, p384_mp_mod);
+ /* X = T1 * T1 */
+ sp_384_mont_sqr_6(x, t1, p384_mod, p384_mp_mod);
+ /* X = X - Y */
+ sp_384_mont_sub_6(x, x, y, p384_mod);
+ /* X = X - Y */
+ sp_384_mont_sub_6(x, x, y, p384_mod);
+ /* Y = Y - X */
+ sp_384_mont_sub_6(y, y, x, p384_mod);
+ /* Y = Y * T1 */
+ sp_384_mont_mul_6(y, y, t1, p384_mod, p384_mp_mod);
+ /* Y = Y - T2 */
+ sp_384_mont_sub_6(y, y, t2, p384_mod);
+}
+
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int n, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*6;
+ sp_digit* b = t + 4*6;
+ sp_digit* t1 = t + 6*6;
+ sp_digit* t2 = t + 8*6;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = p->x;
+ y = p->y;
+ z = p->z;
+
+ /* Y = 2*Y */
+ sp_384_mont_dbl_6(y, y, p384_mod);
+ /* W = Z^4 */
+ sp_384_mont_sqr_6(w, z, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_6(w, w, p384_mod, p384_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+ while (--n > 0)
+#else
+ while (--n >= 0)
+#endif
+ {
+ /* A = 3*(X^2 - W) */
+ sp_384_mont_sqr_6(t1, x, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(t1, t1, w, p384_mod);
+ sp_384_mont_tpl_6(a, t1, p384_mod);
+ /* B = X*Y^2 */
+ sp_384_mont_sqr_6(t1, y, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(b, t1, x, p384_mod, p384_mp_mod);
+ /* X = A^2 - 2B */
+ sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_6(t2, b, p384_mod);
+ sp_384_mont_sub_6(x, x, t2, p384_mod);
+ /* Z = Z*Y */
+ sp_384_mont_mul_6(z, z, y, p384_mod, p384_mp_mod);
+ /* t2 = Y^4 */
+ sp_384_mont_sqr_6(t1, t1, p384_mod, p384_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+ if (n != 0)
+#endif
+ {
+ /* W = W*Y^4 */
+ sp_384_mont_mul_6(w, w, t1, p384_mod, p384_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_384_mont_sub_6(y, b, x, p384_mod);
+ sp_384_mont_mul_6(y, y, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_6(y, y, p384_mod);
+ sp_384_mont_sub_6(y, y, t1, p384_mod);
+ }
+#ifndef WOLFSSL_SP_SMALL
+ /* A = 3*(X^2 - W) */
+ sp_384_mont_sqr_6(t1, x, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(t1, t1, w, p384_mod);
+ sp_384_mont_tpl_6(a, t1, p384_mod);
+ /* B = X*Y^2 */
+ sp_384_mont_sqr_6(t1, y, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(b, t1, x, p384_mod, p384_mp_mod);
+ /* X = A^2 - 2B */
+ sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_6(t2, b, p384_mod);
+ sp_384_mont_sub_6(x, x, t2, p384_mod);
+ /* Z = Z*Y */
+ sp_384_mont_mul_6(z, z, y, p384_mod, p384_mp_mod);
+ /* t2 = Y^4 */
+ sp_384_mont_sqr_6(t1, t1, p384_mod, p384_mp_mod);
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_384_mont_sub_6(y, b, x, p384_mod);
+ sp_384_mont_mul_6(y, y, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_6(y, y, p384_mod);
+ sp_384_mont_sub_6(y, y, t1, p384_mod);
+#endif
+ /* Y = Y/2 */
+ sp_384_div2_6(y, y, p384_mod);
+}
+
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a First number to compare.
+ * b Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_384_cmp_equal_6(const sp_digit* a, const sp_digit* b)
+{
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+ (a[4] ^ b[4]) | (a[5] ^ b[5])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_6(sp_point_384* r, const sp_point_384* p, const sp_point_384* q,
+ sp_digit* t)
+{
+ const sp_point_384* ap[2];
+ sp_point_384* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*6;
+ sp_digit* t3 = t + 4*6;
+ sp_digit* t4 = t + 6*6;
+ sp_digit* t5 = t + 8*6;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Ensure only the first point is the same as the result. */
+ if (q == r) {
+ const sp_point_384* a = p;
+ p = q;
+ q = a;
+ }
+
+ /* Check double */
+ (void)sp_384_sub_6(t1, p384_mod, q->y);
+ sp_384_norm_6(t1);
+ if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) &
+ (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) {
+ sp_384_proj_point_dbl_6(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_384));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<6; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<6; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<6; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U1 = X1*Z2^2 */
+ sp_384_mont_sqr_6(t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t3, t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t1, t1, x, p384_mod, p384_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_384_mont_sqr_6(t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t4, t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_384_mont_mul_6(t3, t3, y, p384_mod, p384_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod);
+ /* H = U2 - U1 */
+ sp_384_mont_sub_6(t2, t2, t1, p384_mod);
+ /* R = S2 - S1 */
+ sp_384_mont_sub_6(t4, t4, t3, p384_mod);
+ /* Z3 = H*Z1*Z2 */
+ sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(z, z, t2, p384_mod, p384_mp_mod);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(x, x, t5, p384_mod);
+ sp_384_mont_dbl_6(t1, y, p384_mod);
+ sp_384_mont_sub_6(x, x, t1, p384_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ sp_384_mont_sub_6(y, y, x, p384_mod);
+ sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(y, y, t5, p384_mod);
+ }
+}
+
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_n_store_6(sp_point_384* r, const sp_point_384* p,
+ int n, int m, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*6;
+ sp_digit* b = t + 4*6;
+ sp_digit* t1 = t + 6*6;
+ sp_digit* t2 = t + 8*6;
+ sp_digit* x = r[2*m].x;
+ sp_digit* y = r[(1<<n)*m].y;
+ sp_digit* z = r[2*m].z;
+ int i;
+
+ for (i=0; i<6; i++) {
+ x[i] = p->x[i];
+ }
+ for (i=0; i<6; i++) {
+ y[i] = p->y[i];
+ }
+ for (i=0; i<6; i++) {
+ z[i] = p->z[i];
+ }
+
+ /* Y = 2*Y */
+ sp_384_mont_dbl_6(y, y, p384_mod);
+ /* W = Z^4 */
+ sp_384_mont_sqr_6(w, z, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_6(w, w, p384_mod, p384_mp_mod);
+ for (i=1; i<=n; i++) {
+ /* A = 3*(X^2 - W) */
+ sp_384_mont_sqr_6(t1, x, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(t1, t1, w, p384_mod);
+ sp_384_mont_tpl_6(a, t1, p384_mod);
+ /* B = X*Y^2 */
+ sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(b, t2, x, p384_mod, p384_mp_mod);
+ x = r[(1<<i)*m].x;
+ /* X = A^2 - 2B */
+ sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_6(t1, b, p384_mod);
+ sp_384_mont_sub_6(x, x, t1, p384_mod);
+ /* Z = Z*Y */
+ sp_384_mont_mul_6(r[(1<<i)*m].z, z, y, p384_mod, p384_mp_mod);
+ z = r[(1<<i)*m].z;
+ /* t2 = Y^4 */
+ sp_384_mont_sqr_6(t2, t2, p384_mod, p384_mp_mod);
+ if (i != n) {
+ /* W = W*Y^4 */
+ sp_384_mont_mul_6(w, w, t2, p384_mod, p384_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_384_mont_sub_6(y, b, x, p384_mod);
+ sp_384_mont_mul_6(y, y, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_6(y, y, p384_mod);
+ sp_384_mont_sub_6(y, y, t2, p384_mod);
+
+ /* Y = Y/2 */
+ sp_384_div2_6(r[(1<<i)*m].y, y, p384_mod);
+ r[(1<<i)*m].infinity = 0;
+ }
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * ra Result of addition.
+ * rs Result of subtraction.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_sub_6(sp_point_384* ra, sp_point_384* rs,
+ const sp_point_384* p, const sp_point_384* q, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*6;
+ sp_digit* t3 = t + 4*6;
+ sp_digit* t4 = t + 6*6;
+ sp_digit* t5 = t + 8*6;
+ sp_digit* t6 = t + 10*6;
+ sp_digit* x = ra->x;
+ sp_digit* y = ra->y;
+ sp_digit* z = ra->z;
+ sp_digit* xs = rs->x;
+ sp_digit* ys = rs->y;
+ sp_digit* zs = rs->z;
+
+
+ XMEMCPY(x, p->x, sizeof(p->x) / 2);
+ XMEMCPY(y, p->y, sizeof(p->y) / 2);
+ XMEMCPY(z, p->z, sizeof(p->z) / 2);
+ ra->infinity = 0;
+ rs->infinity = 0;
+
+ /* U1 = X1*Z2^2 */
+ sp_384_mont_sqr_6(t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t3, t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t1, t1, x, p384_mod, p384_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_384_mont_sqr_6(t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t4, t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_384_mont_mul_6(t3, t3, y, p384_mod, p384_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod);
+ /* H = U2 - U1 */
+ sp_384_mont_sub_6(t2, t2, t1, p384_mod);
+ /* RS = S2 + S1 */
+ sp_384_mont_add_6(t6, t4, t3, p384_mod);
+ /* R = S2 - S1 */
+ sp_384_mont_sub_6(t4, t4, t3, p384_mod);
+ /* Z3 = H*Z1*Z2 */
+ /* ZS = H*Z1*Z2 */
+ sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(z, z, t2, p384_mod, p384_mp_mod);
+ XMEMCPY(zs, z, sizeof(p->z)/2);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ /* XS = RS^2 - H^3 - 2*U1*H^2 */
+ sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_6(xs, t6, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(x, x, t5, p384_mod);
+ sp_384_mont_sub_6(xs, xs, t5, p384_mod);
+ sp_384_mont_dbl_6(t1, y, p384_mod);
+ sp_384_mont_sub_6(x, x, t1, p384_mod);
+ sp_384_mont_sub_6(xs, xs, t1, p384_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */
+ sp_384_mont_sub_6(ys, y, xs, p384_mod);
+ sp_384_mont_sub_6(y, y, x, p384_mod);
+ sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod);
+ sp_384_sub_6(t6, p384_mod, t6);
+ sp_384_mont_mul_6(ys, ys, t6, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(y, y, t5, p384_mod);
+ sp_384_mont_sub_6(ys, ys, t5, p384_mod);
+}
+
+/* Structure used to describe recoding of scalar multiplication. */
+typedef struct ecc_recode_384 {
+ /* Index into pre-computation table. */
+ uint8_t i;
+ /* Use the negative of the point. */
+ uint8_t neg;
+} ecc_recode_384;
+
+/* The index into pre-computation table to use. */
+static const uint8_t recode_index_6_6[66] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
+ 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
+ 0, 1,
+};
+
+/* Whether to negate y-ordinate. */
+static const uint8_t recode_neg_6_6[66] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0,
+};
+
+/* Recode the scalar for multiplication using pre-computed values and
+ * subtraction.
+ *
+ * k Scalar to multiply by.
+ * v Vector of operations to perform.
+ */
+static void sp_384_ecc_recode_6_6(const sp_digit* k, ecc_recode_384* v)
+{
+ int i, j;
+ uint8_t y;
+ int carry = 0;
+ int o;
+ sp_digit n;
+
+ j = 0;
+ n = k[j];
+ o = 0;
+ for (i=0; i<65; i++) {
+ y = n;
+ if (o + 6 < 64) {
+ y &= 0x3f;
+ n >>= 6;
+ o += 6;
+ }
+ else if (o + 6 == 64) {
+ n >>= 6;
+ if (++j < 6)
+ n = k[j];
+ o = 0;
+ }
+ else if (++j < 6) {
+ n = k[j];
+ y |= (n << (64 - o)) & 0x3f;
+ o -= 58;
+ n >>= o;
+ }
+
+ y += carry;
+ v[i].i = recode_index_6_6[y];
+ v[i].neg = recode_neg_6_6[y];
+ carry = (y >> 6) + v[i].neg;
+ }
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_win_add_sub_6(sp_point_384* r, const sp_point_384* g,
+ const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 td[33];
+ sp_point_384 rtd, pd;
+ sp_digit tmpd[2 * 6 * 6];
+#endif
+ sp_point_384* t;
+ sp_point_384* rt;
+ sp_point_384* p = NULL;
+ sp_digit* tmp;
+ sp_digit* negy;
+ int i;
+ ecc_recode_384 v[65];
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_6(heap, rtd, rt);
+ if (err == MP_OKAY)
+ err = sp_384_point_new_6(heap, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 33, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ t = td;
+ tmp = tmpd;
+#endif
+
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ err = sp_384_mod_mul_norm_6(t[1].x, g->x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_6(t[1].y, g->y, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_6(t[1].z, g->z, p384_mod);
+ }
+
+ if (err == MP_OKAY) {
+ t[1].infinity = 0;
+ /* t[2] ... t[32] */
+ sp_384_proj_point_dbl_n_store_6(t, &t[ 1], 5, 1, tmp);
+ sp_384_proj_point_add_6(&t[ 3], &t[ 2], &t[ 1], tmp);
+ sp_384_proj_point_dbl_6(&t[ 6], &t[ 3], tmp);
+ sp_384_proj_point_add_sub_6(&t[ 7], &t[ 5], &t[ 6], &t[ 1], tmp);
+ sp_384_proj_point_dbl_6(&t[10], &t[ 5], tmp);
+ sp_384_proj_point_add_sub_6(&t[11], &t[ 9], &t[10], &t[ 1], tmp);
+ sp_384_proj_point_dbl_6(&t[12], &t[ 6], tmp);
+ sp_384_proj_point_dbl_6(&t[14], &t[ 7], tmp);
+ sp_384_proj_point_add_sub_6(&t[15], &t[13], &t[14], &t[ 1], tmp);
+ sp_384_proj_point_dbl_6(&t[18], &t[ 9], tmp);
+ sp_384_proj_point_add_sub_6(&t[19], &t[17], &t[18], &t[ 1], tmp);
+ sp_384_proj_point_dbl_6(&t[20], &t[10], tmp);
+ sp_384_proj_point_dbl_6(&t[22], &t[11], tmp);
+ sp_384_proj_point_add_sub_6(&t[23], &t[21], &t[22], &t[ 1], tmp);
+ sp_384_proj_point_dbl_6(&t[24], &t[12], tmp);
+ sp_384_proj_point_dbl_6(&t[26], &t[13], tmp);
+ sp_384_proj_point_add_sub_6(&t[27], &t[25], &t[26], &t[ 1], tmp);
+ sp_384_proj_point_dbl_6(&t[28], &t[14], tmp);
+ sp_384_proj_point_dbl_6(&t[30], &t[15], tmp);
+ sp_384_proj_point_add_sub_6(&t[31], &t[29], &t[30], &t[ 1], tmp);
+
+ negy = t[0].y;
+
+ sp_384_ecc_recode_6_6(k, v);
+
+ i = 64;
+ XMEMCPY(rt, &t[v[i].i], sizeof(sp_point_384));
+ for (--i; i>=0; i--) {
+ sp_384_proj_point_dbl_n_6(rt, 6, tmp);
+
+ XMEMCPY(p, &t[v[i].i], sizeof(sp_point_384));
+ sp_384_sub_6(negy, p384_mod, p->y);
+ sp_384_cond_copy_6(p->y, negy, (sp_digit)0 - v[i].neg);
+ sp_384_proj_point_add_6(rt, rt, p, tmp);
+ }
+
+ if (map != 0) {
+ sp_384_map_6(r, rt, tmp);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL)
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ if (tmp != NULL)
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_384_point_free_6(p, 0, heap);
+ sp_384_point_free_6(rt, 0, heap);
+
+ return err;
+}
+
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_384 {
+ sp_digit x[6];
+ sp_digit y[6];
+} sp_table_entry_384;
+
+#ifdef FP_ECC
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_qz1_6(sp_point_384* r, const sp_point_384* p,
+ const sp_point_384* q, sp_digit* t)
+{
+ const sp_point_384* ap[2];
+ sp_point_384* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*6;
+ sp_digit* t3 = t + 4*6;
+ sp_digit* t4 = t + 6*6;
+ sp_digit* t5 = t + 8*6;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Check double */
+ (void)sp_384_sub_6(t1, p384_mod, q->y);
+ sp_384_norm_6(t1);
+ if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) &
+ (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) {
+ sp_384_proj_point_dbl_6(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_384));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<6; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<6; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<6; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U2 = X2*Z1^2 */
+ sp_384_mont_sqr_6(t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t4, t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod);
+ /* H = U2 - X1 */
+ sp_384_mont_sub_6(t2, t2, x, p384_mod);
+ /* R = S2 - Y1 */
+ sp_384_mont_sub_6(t4, t4, y, p384_mod);
+ /* Z3 = H*Z1 */
+ sp_384_mont_mul_6(z, z, t2, p384_mod, p384_mp_mod);
+ /* X3 = R^2 - H^3 - 2*X1*H^2 */
+ sp_384_mont_sqr_6(t1, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t3, x, t5, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(x, t1, t5, p384_mod);
+ sp_384_mont_dbl_6(t1, t3, p384_mod);
+ sp_384_mont_sub_6(x, x, t1, p384_mod);
+ /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+ sp_384_mont_sub_6(t3, t3, x, p384_mod);
+ sp_384_mont_mul_6(t3, t3, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t5, t5, y, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(y, t3, t5, p384_mod);
+ }
+}
+
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a Point to convert.
+ * t Temporary data.
+ */
+static void sp_384_proj_to_affine_6(sp_point_384* a, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2 * 6;
+ sp_digit* tmp = t + 4 * 6;
+
+ sp_384_mont_inv_6(t1, a->z, tmp);
+
+ sp_384_mont_sqr_6(t2, t1, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t1, t2, t1, p384_mod, p384_mp_mod);
+
+ sp_384_mont_mul_6(a->x, a->x, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(a->y, a->y, t1, p384_mod, p384_mp_mod);
+ XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod));
+}
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_384_gen_stripe_table_6(const sp_point_384* a,
+ sp_table_entry_384* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 td, s1d, s2d;
+#endif
+ sp_point_384* t;
+ sp_point_384* s1 = NULL;
+ sp_point_384* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_6(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_6(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_6(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_6(t->x, a->x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_6(t->y, a->y, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_6(t->z, a->z, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_384_proj_to_affine_6(t, tmp);
+
+ XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<8; i++) {
+ sp_384_proj_point_dbl_n_6(t, 48, tmp);
+ sp_384_proj_to_affine_6(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<8; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_384_proj_point_add_qz1_6(t, s1, s2, tmp);
+ sp_384_proj_to_affine_6(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_384_point_free_6(s2, 0, heap);
+ sp_384_point_free_6(s1, 0, heap);
+ sp_384_point_free_6( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_stripe_6(sp_point_384* r, const sp_point_384* g,
+ const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 rtd;
+ sp_point_384 pd;
+ sp_digit td[2 * 6 * 6];
+#endif
+ sp_point_384* rt;
+ sp_point_384* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_384_point_new_6(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_6(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
+ XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
+
+ y = 0;
+ for (j=0,x=47; j<8; j++,x+=48) {
+ y |= ((k[x / 64] >> (x % 64)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=46; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<8; j++,x+=48) {
+ y |= ((k[x / 64] >> (x % 64)) & 1) << j;
+ }
+
+ sp_384_proj_point_dbl_6(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_384_proj_point_add_qz1_6(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_384_map_6(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_6(p, 0, heap);
+ sp_384_point_free_6(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_384_t {
+ sp_digit x[6];
+ sp_digit y[6];
+ sp_table_entry_384 table[256];
+ uint32_t cnt;
+ int set;
+} sp_cache_384_t;
+
+static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_384_last = -1;
+static THREAD_LS_T int sp_cache_384_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex_384 = 0;
+ static wolfSSL_Mutex sp_cache_384_lock;
+#endif
+
+static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_384_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache_384[i].set = 0;
+ }
+ sp_cache_384_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache_384[i].set)
+ continue;
+
+ if (sp_384_cmp_equal_6(g->x, sp_cache_384[i].x) &
+ sp_384_cmp_equal_6(g->y, sp_cache_384[i].y)) {
+ sp_cache_384[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_384_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache_384[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_384_last) {
+ least = sp_cache_384[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache_384[j].cnt < least) {
+ i = j;
+ least = sp_cache_384[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
+ XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
+ sp_cache_384[i].set = 1;
+ sp_cache_384[i].cnt = 1;
+ }
+
+ *cache = &sp_cache_384[i];
+ sp_cache_384_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_6(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_384_ecc_mulmod_win_add_sub_6(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 6 * 7];
+ sp_cache_384_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_384 == 0) {
+ wc_InitMutex(&sp_cache_384_lock);
+ initCacheMutex_384 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_384_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_384(g, &cache);
+ if (cache->cnt == 2)
+ sp_384_gen_stripe_table_6(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_384_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_384_ecc_mulmod_win_add_sub_6(r, g, k, map, heap);
+ }
+ else {
+ err = sp_384_ecc_mulmod_stripe_6(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * p Point to multiply.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+ void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[6];
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_384_point_new_6(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 6, km);
+ sp_384_point_from_ecc_point_6(point, gm);
+
+ err = sp_384_ecc_mulmod_6(point, point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_6(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_6(point, 0, heap);
+
+ return err;
+}
+
+static const sp_table_entry_384 p384_table[256] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x3dd0756649c0b528L,0x20e378e2a0d6ce38L,0x879c3afc541b4d6eL,
+ 0x6454868459a30effL,0x812ff723614ede2bL,0x4d3aadc2299e1513L },
+ { 0x23043dad4b03a4feL,0xa1bfa8bf7bb4a9acL,0x8bade7562e83b050L,
+ 0xc6c3521968f4ffd9L,0xdd8002263969a840L,0x2b78abc25a15c5e9L } },
+ /* 2 */
+ { { 0x298647532b0c535bL,0x90dd695370506296L,0x038cd6b4216ab9acL,
+ 0x3df9b7b7be12d76aL,0x13f4d9785f347bdbL,0x222c5c9c13e94489L },
+ { 0x5f8e796f2680dc64L,0x120e7cb758352417L,0x254b5d8ad10740b8L,
+ 0xc38b8efb5337dee6L,0xf688c2e194f02247L,0x7b5c75f36c25bc4cL } },
+ /* 3 */
+ { { 0xe26a3cc39edffea5L,0x35bbfd1c37d7e9fcL,0xf0e7700d9bde3ef6L,
+ 0x0380eb471a538f5aL,0x2e9da8bb05bf9eb3L,0xdbb93c731a460c3eL },
+ { 0x37dba260f526b605L,0x95d4978efd785537L,0x24ed793aed72a04aL,
+ 0x2694837776005b1aL,0x99f557b99e681f82L,0xae5f9557d64954efL } },
+ /* 4 */
+ { { 0x24480c57f26feef9L,0xc31a26943a0e1240L,0x735002c3273e2bc7L,
+ 0x8c42e9c53ef1ed4cL,0x028babf67f4948e8L,0x6a502f438a978632L },
+ { 0xf5f13a46b74536feL,0x1d218babd8a9f0ebL,0x30f36bcc37232768L,
+ 0xc5317b31576e8c18L,0xef1d57a69bbcb766L,0x917c4930b3e3d4dcL } },
+ /* 5 */
+ { { 0x11426e2ee349ddd0L,0x9f117ef99b2fc250L,0xff36b480ec0174a6L,
+ 0x4f4bde7618458466L,0x2f2edb6d05806049L,0x8adc75d119dfca92L },
+ { 0xa619d097b7d5a7ceL,0x874275e5a34411e9L,0x5403e0470da4b4efL,
+ 0x2ebaafd977901d8fL,0x5e63ebcea747170fL,0x12a369447f9d8036L } },
+ /* 6 */
+ { { 0x28f9c07a4fc52870L,0xce0b37481a53a961L,0xd550fa180e1828d9L,
+ 0xa24abaf76adb225aL,0xd11ed0a56e58a348L,0xf3d811e6948acb62L },
+ { 0x8618dd774c61ed22L,0x0bb747f980b47c9dL,0x22bf796fde6b8559L,
+ 0xfdfd1c6d680a21e9L,0xc0db15772af2c9ddL,0xa09379e6c1e90f3dL } },
+ /* 7 */
+ { { 0x386c66efe085c629L,0x5fc2a461095bc89aL,0x1353d631203f4b41L,
+ 0x7ca1972b7e4bd8f5L,0xb077380aa7df8ce9L,0xd8a90389ee7e4ea3L },
+ { 0x1bc74dc7e7b14461L,0xdc2cb0140c9c4f78L,0x52b4b3a684ef0a10L,
+ 0xbde6ea5d20327fe2L,0xb71ec435660f9615L,0xeede5a04b8ad8173L } },
+ /* 8 */
+ { { 0x5584cbb3893b9a2dL,0x820c660b00850c5dL,0x4126d8267df2d43dL,
+ 0xdd5bbbf00109e801L,0x85b92ee338172f1cL,0x609d4f93f31430d9L },
+ { 0x1e059a07eadaf9d6L,0x70e6536c0f125fb0L,0xd6220751560f20e7L,
+ 0xa59489ae7aaf3a9aL,0x7b70e2f664bae14eL,0x0dd0370176d08249L } },
+ /* 9 */
+ { { 0x4cc13be88510521fL,0x87315ba9f724cc17L,0xb49d83bb353dc263L,
+ 0x8b677efe0c279257L,0x510a1c1cc93c9537L,0x33e30cd8a4702c99L },
+ { 0xf0ffc89d2208353fL,0x0170fa8dced42b2bL,0x090851ed26e2a5f5L,
+ 0x81276455ecb52c96L,0x0646c4e17fe1adf4L,0x513f047eb0868eabL } },
+ /* 10 */
+ { { 0xc07611f4df5bdf53L,0x45d331a758b11a6dL,0x58965daf1c4ee394L,
+ 0xba8bebe75a5878d1L,0xaecc0a1882dd3025L,0xcf2a3899a923eb8bL },
+ { 0xf98c9281d24fd048L,0x841bfb598bbb025dL,0xb8ddf8cec9ab9d53L,
+ 0x538a4cb67fef044eL,0x092ac21f23236662L,0xa919d3850b66f065L } },
+ /* 11 */
+ { { 0x3db03b4085d480d8L,0x8cd9f4791b287a7dL,0x8f24dc754a8f3baeL,
+ 0x482eb8003db41892L,0x38bf9eb39c56e0f5L,0x8b9773209a91dc6fL },
+ { 0xa31b05b27209cfc2L,0x4c49bf8505b2db70L,0x56462498d619527bL,
+ 0x3fe510391fac51baL,0xfb04f55eab4b8342L,0xc07c10dc04c6eabfL } },
+ /* 12 */
+ { { 0xad22fe4cdb32f048L,0x5f23bf91475ed6dfL,0xa50ce0c0aa66b6cbL,
+ 0xdf627a89f03405c0L,0x3674837df95e2d6aL,0x081c95b6ba42e64eL },
+ { 0xeba3e036e71d6cebL,0xb45bcccf6c6b0271L,0x67b47e630684701dL,
+ 0x60f8f942e712523fL,0x824234725cd47adcL,0x83027d7987649cbbL } },
+ /* 13 */
+ { { 0xb3929ea63615b0b8L,0xb41441fda54dac41L,0x8995d556b5b6a368L,
+ 0xa80d4529167ef05eL,0xf6bcb4a16d25a27fL,0x210d6a4c7bd55b68L },
+ { 0xf3804abb25351130L,0x1d2df699903e37ebL,0x5f201efc084c25c8L,
+ 0x31a28c87a1c68e91L,0x81dad253563f62a5L,0x5dd6de70d6c415d4L } },
+ /* 14 */
+ { { 0x29f470fd846612ceL,0x986f3eecda18d997L,0x6b84c1612f34af86L,
+ 0x5ef0a40846ddaf8bL,0x14405a00e49e795fL,0x5f491b16aa2f7a37L },
+ { 0xc7f07ae4db41b38dL,0xef7d119e18fbfcaaL,0x3a18e07614443b19L,
+ 0x4356841a79a19926L,0x91f4a91ce2226fbeL,0xdc77248c3cc88721L } },
+ /* 15 */
+ { { 0xd570ff1ae4b1ec9dL,0x21d23e0ee7eef706L,0x3cde40f4ca19e086L,
+ 0x7d6523c4cd4bb270L,0x16c1f06cbf13aa6cL,0x5aa7245ad14c4b60L },
+ { 0x37f8146744b74de8L,0x839e7a17620a934eL,0xf74d14e8de8b1aa1L,
+ 0x8789fa51f30d75e2L,0x09b24052c81c261eL,0x654e267833c565eeL } },
+ /* 16 */
+ { { 0x378205de2f9fbe67L,0xc4afcb837f728e44L,0xdbcec06c682e00f1L,
+ 0xf2a145c3114d5423L,0xa01d98747a52463eL,0xfc0935b17d717b0aL },
+ { 0x9653bc4fd4d01f95L,0x9aa83ea89560ad34L,0xf77943dcaf8e3f3fL,
+ 0x70774a10e86fe16eL,0x6b62e6f1bf9ffdcfL,0x8a72f39e588745c9L } },
+ /* 17 */
+ { { 0x73ade4da2341c342L,0xdd326e54ea704422L,0x336c7d983741cef3L,
+ 0x1eafa00d59e61549L,0xcd3ed892bd9a3efdL,0x03faf26cc5c6c7e4L },
+ { 0x087e2fcf3045f8acL,0x14a65532174f1e73L,0x2cf84f28fe0af9a7L,
+ 0xddfd7a842cdc935bL,0x4c0f117b6929c895L,0x356572d64c8bcfccL } },
+ /* 18 */
+ { { 0x7ecbac017d8c1bbaL,0x6058f9c390b0f3d5L,0xaee116e3f6197d0fL,
+ 0xc4dd70684033b128L,0xf084dba6c209b983L,0x97c7c2cf831dbc4aL },
+ { 0x2f4e61ddf96010e8L,0xd97e4e20529faa17L,0x4ee6666069d37f20L,
+ 0xccc139ed3d366d72L,0x690b6ee213488e0fL,0x7cad1dc5f3a6d533L } },
+ /* 19 */
+ { { 0x660a9a81da57a41fL,0xe74a0412ec0039b6L,0x42343c6b5e1dad15L,
+ 0x284f3ff546681d4cL,0xb51087f163749e89L,0x070f23cc6f9f2f13L },
+ { 0x542211da5d186e14L,0x84748f37fddb0dffL,0x41a3aab4db1f4180L,
+ 0x25ed667ba6402d0eL,0x2f2924a902f58355L,0x5844ee7cfa44a689L } },
+ /* 20 */
+ { { 0xfab086073f3b236fL,0x19e9d41d81e221daL,0xf3f6571e3927b428L,
+ 0x4348a9337550f1f6L,0x7167b996a85e62f0L,0x62d437597f5452bfL },
+ { 0xd85feb9ef2955926L,0x440a561f6df78353L,0x389668ec9ca36b59L,
+ 0x052bf1a1a22da016L,0xbdfbff72f6093254L,0x94e50f28e22209f3L } },
+ /* 21 */
+ { { 0x90b2e5b33062e8afL,0xa8572375e8a3d369L,0x3fe1b00b201db7b1L,
+ 0xe926def0ee651aa2L,0x6542c9beb9b10ad7L,0x098e309ba2fcbe74L },
+ { 0x779deeb3fff1d63fL,0x23d0e80a20bfd374L,0x8452bb3b8768f797L,
+ 0xcf75bb4d1f952856L,0x8fe6b40029ea3faaL,0x12bd3e4081373a53L } },
+ /* 22 */
+ { { 0xc023780d104cbba5L,0x6207e747fa35dd4cL,0x35c239281ca9b6a3L,
+ 0x4ff19be897987b10L,0xb8476bbf8022eee8L,0xaa0a4a14d3bbe74dL },
+ { 0x20f94331187d4543L,0x3215387079f6e066L,0x83b0f74eac7e82e1L,
+ 0xa7748ba2828f06abL,0xc5f0298ac26ef35fL,0x0f0c50708e9a7dbdL } },
+ /* 23 */
+ { { 0x0c5c244cdef029ddL,0x3dabc687850661b8L,0x9992b865fe11d981L,
+ 0xe9801b8f6274dbadL,0xe54e6319098da242L,0x9929a91a91a53d08L },
+ { 0x37bffd7235285887L,0xbc759425f1418102L,0x9280cc35fd2e6e20L,
+ 0x735c600cfbc42ee5L,0xb7ad28648837619aL,0xa3627231a778c57bL } },
+ /* 24 */
+ { { 0xae799b5c91361ed8L,0x47d71b756c63366cL,0x54cdd5211b265a6aL,
+ 0xe0215a5998d77b74L,0x4424d9b7bab29db0L,0x8b0ffacc7fd9e536L },
+ { 0x46d85d1237b5d9efL,0x5b106d62bfa91747L,0xed0479f85f99ba2dL,
+ 0x0e6f39231d104de4L,0x83a84c8425e8983fL,0xa9507e0af8105a70L } },
+ /* 25 */
+ { { 0xf6c68a6e14cf381cL,0xaf9d27bdc22e31ccL,0x23568d4daa8a5ccbL,
+ 0xe431eec0e338e4d2L,0xf1a828fe8f52ad1fL,0xdb6a0579e86acd80L },
+ { 0x2885672e4507832aL,0x73fc275f887e5289L,0x65f8027805610d08L,
+ 0x8d9b4554075ff5b0L,0x3a8e8fb109f712b5L,0x39f0ac862ebe9cf2L } },
+ /* 26 */
+ { { 0xd8fabf784c52edf5L,0xdcd737e5a589ae53L,0x94918bf0d791ab17L,
+ 0xb5fbd956bcff06c9L,0xf6d3032edca46d45L,0x2cdff7e141a3e486L },
+ { 0x6674b3ba61f47ec8L,0x8a882163eef84608L,0xa257c7054c687f90L,
+ 0xe30cb2edf6cdf227L,0x2c4c64ca7f6ea846L,0x186fa17ccc6bcd3cL } },
+ /* 27 */
+ { { 0x48a3f5361dfcb91eL,0x83595e13646d358aL,0xbd15827b91128798L,
+ 0x3ce612b82187757aL,0x873150a161bd7372L,0xf4684530b662f568L },
+ { 0x8833950b401896f6L,0xe11cb89a77f3e090L,0xb2f12cac48e7f4a5L,
+ 0x313dd769f606677eL,0xfdcf08b316579f93L,0x6429cec946b8f22bL } },
+ /* 28 */
+ { { 0x4984dd54bb75f9a4L,0x4aef06b929d3b570L,0xb5f84ca23d6e4c1eL,
+ 0x24c61c11b083ef35L,0xce4a7392392ca9ffL,0x865d65176730a800L },
+ { 0xca3dfe76722b4a2bL,0x12c04bf97b083e0eL,0x803ce5b51b86b8a5L,
+ 0x3fc7632d6a7e3e0cL,0xc89970c2c81adbe4L,0x3cbcd3ad120e16b1L } },
+ /* 29 */
+ { { 0xfbfb4cc7ec30ce93L,0x10ed6c7db72720a2L,0xec675bf747b55500L,
+ 0x90725903333ff7c3L,0xc7c3973e5075bfc0L,0xb049ecb007acf31bL },
+ { 0xb4076eaf4f58839cL,0x101896daa2b05e4fL,0x3f6033b0ab40c66eL,
+ 0x19ee9eebc8d864baL,0xeb6cf15547bf6d2aL,0x8e5a9663f826477dL } },
+ /* 30 */
+ { { 0x69e62fddf7fbd5e1L,0x38ecfe5476912b1dL,0x845a3d56d1da3bfbL,
+ 0x0494950e1c86f0d4L,0x83cadbf93bc36ce8L,0x41fce5724fccc8d1L },
+ { 0x05f939c28332c144L,0xb17f248b0871e46eL,0x3d8534e266e8aff6L,
+ 0x1d06f1dc3b85c629L,0xdb06a32ea3131b73L,0xf295184d8b3f64e5L } },
+ /* 31 */
+ { { 0xd9653ff736ddc103L,0x25f43e3795ef606fL,0x09e301fcfe06dce8L,
+ 0x85af234130b6eebfL,0x79b12b530ff56b20L,0x9b4fb499fe9a3c6bL },
+ { 0x0154f89251d27ac2L,0xd33167e356ca5389L,0x7828ec1fafc065a6L,
+ 0x0959a2587f746c9bL,0xb18f1be30c44f837L,0xa7946117c4132fdbL } },
+ /* 32 */
+ { { 0xc0426b775e3c647bL,0xbfcbd9398cf05348L,0x31d312e3172c0d3dL,
+ 0x5f49fde6ee754737L,0x895530f06da7ee61L,0xcf281b0ae8b3a5fbL },
+ { 0xfd14973541b8a543L,0x41a625a73080dd30L,0xe2baae07653908cfL,
+ 0xc3d01436ba02a278L,0xa0d0222e7b21b8f8L,0xfdc270e9d7ec1297L } },
+ /* 33 */
+ { { 0x00873c0cbc7f41d6L,0xd976113e1b7ad641L,0x2a536ff4238443fbL,
+ 0x030d00e241e62e45L,0x532e98675f545fc6L,0xcd0331088e91208cL },
+ { 0xd1a04c999797612cL,0xd4393e02eea674e2L,0xd56fa69ee19742a1L,
+ 0xdd2ab48085f0590eL,0xa5cefc5248a2243dL,0x48cc67b654383f41L } },
+ /* 34 */
+ { { 0x4e50430efc14ab48L,0x195b7f4f26706a74L,0x2fe8a228cc881ff6L,
+ 0xb1b968e2d945013dL,0x936aa5794b92162bL,0x4fb766b7364e754aL },
+ { 0x13f93bca31e1ff7fL,0x696eb5cace4f2691L,0xff754bf8a2b09e02L,
+ 0x58f13c9ce58e3ff8L,0xb757346f1678c0b0L,0xd54200dba86692b3L } },
+ /* 35 */
+ { { 0x9a030bbd6dda1265L,0xf7b4f3fce89718ddL,0xa6a4931f936065b8L,
+ 0xbce72d875f72241cL,0x6cbb51cb65775857L,0xc71618154e993675L },
+ { 0xe81a0f792ee32189L,0xef2fab26277dc0b2L,0x9e64f6feb71f469fL,
+ 0xb448ce33dfdaf859L,0x3f5c1c4cbe6b5df1L,0xfb8dfb001de45f7bL } },
+ /* 36 */
+ { { 0xc7345fa74d5bb921L,0x5c7e04be4d2b667eL,0x47ed3a80282d7a3eL,
+ 0x5c2777f87e47b2a4L,0x89b3b10008488e2eL,0x9aad77c2b2eb5b45L },
+ { 0xd681bca7daac34aeL,0x2452e4e526afb326L,0x0c88792441a1ee14L,
+ 0x743b04d4c2407adeL,0xcb5e999bfc17a2acL,0x4dca2f824a701a06L } },
+ /* 37 */
+ { { 0x68e31ca61127bc1aL,0xa3edd59b17ead3beL,0x67b6b645e25f5a15L,
+ 0x76221794a420e15eL,0x794fd83b4b1e872eL,0x7cab3f03b2dece1bL },
+ { 0x7119bf15ca9b3586L,0xa55459244d250bd7L,0x173633eacc6bcf24L,
+ 0x9bd308c2b1b6f884L,0x3bae06f5447d38c3L,0x54dcc135f341fe1cL } },
+ /* 38 */
+ { { 0x56d3598d943caf0dL,0xce044ea9225ff133L,0x9edf6a7c563fadeaL,
+ 0x632eb94473e8dc27L,0x814b467e3190dcabL,0x2d4f4f316dbb1e31L },
+ { 0x8d69811ca143b7caL,0x4ec1ac32de7cf950L,0x223ab5fd37b5fe82L,
+ 0xe82616e49390f1d9L,0xabff4b2075804610L,0x11b9be15875b08f0L } },
+ /* 39 */
+ { { 0x4ae31a3d3bbe682cL,0xbc7c5d2674eef2ddL,0x92afd10a3c47dd40L,
+ 0xec7e0a3bc14ab9e1L,0x6a6c3dd1b2e495e4L,0x085ee5e9309bcd85L },
+ { 0xf381a9088c2e67fdL,0x32083a80e261eaf2L,0x0fcd6a4996deee15L,
+ 0xe3b8fb035e524c79L,0x8dc360d91d5b08b9L,0x3a06e2c87f26719fL } },
+ /* 40 */
+ { { 0x5cd9f5a87237cac0L,0x93f0b59d43586794L,0x4384a764e94f6c4eL,
+ 0x8304ed2bb62782d3L,0x0b8db8b3cde06015L,0x4336dd535dbe190fL },
+ { 0x5744355392ab473aL,0x031c7275be5ed046L,0x3e78678c21909aa4L,
+ 0x4ab7e04f99202ddbL,0x2648d2066977e635L,0xd427d184093198beL } },
+ /* 41 */
+ { { 0x822848f50f9b5a31L,0xbb003468baadb62aL,0x233a04723357559cL,
+ 0x49ef688079aee843L,0xa89867a0aeb9e1e3L,0xc151931b1f6f9a55L },
+ { 0xd264eb0bad74251eL,0x37b9b2634abf295eL,0xb600921b04960d10L,
+ 0x0de53dbc4da77dc0L,0x01d9bab3d2b18697L,0xad54ec7af7156ddfL } },
+ /* 42 */
+ { { 0x8e74dc3579efdc58L,0x456bd3694ff68ddbL,0x724e74ccd32096a5L,
+ 0xe41cff42386783d0L,0xa04c7f217c70d8a4L,0x41199d2fe61a19a2L },
+ { 0xd389a3e029c05dd2L,0x535f2a6be7e3fda9L,0x26ecf72d7c2b4df8L,
+ 0x678275f4fe745294L,0x6319c9cc9d23f519L,0x1e05a02d88048fc4L } },
+ /* 43 */
+ { { 0x75cc8e2ed4d5ffe8L,0xf8bb4896dbea17f2L,0x35059790cee3cb4aL,
+ 0x4c06ee85a47c6165L,0xf98fff2592935d2fL,0x34c4a57232ffd7c7L },
+ { 0xc4b14806ea0376a2L,0x2ea5e7504f115e02L,0x532d76e21e55d7c0L,
+ 0x68dc9411f31044daL,0x9272e46571b77993L,0xadaa38bb93a8cfd5L } },
+ /* 44 */
+ { { 0x4bf0c7127d4ed72aL,0xda0e9264ba1f79a3L,0x48c0258bf4c39ea4L,
+ 0xa5394ed82a715138L,0x4af511cebf06c660L,0xfcebceefec5c37cdL },
+ { 0xf23b75aa779ae8c1L,0xdeff59ccad1e606eL,0xf3f526fd22755c82L,
+ 0x64c5ab44bb32cefdL,0xa96e11a2915bdefdL,0xab19746a1143813eL } },
+ /* 45 */
+ { { 0x43c78585ec837d7dL,0xca5b6fbcb8ee0ba4L,0x34e924d9d5dbb5eeL,
+ 0x3f4fa104bb4f1ca5L,0x15458b72398640f7L,0x4231faa9d7f407eaL },
+ { 0x53e0661ef96e6896L,0x554e4c69d03b0f9dL,0xd4fcb07b9c7858d1L,
+ 0x7e95279352cb04faL,0x5f5f15748974e7f7L,0x2e3fa5586b6d57c8L } },
+ /* 46 */
+ { { 0x42cd48036a9951a8L,0xa8b15b8842792ad0L,0x18e8bcf9abb29a73L,
+ 0xbfd9a092409933e8L,0x760a3594efb88dc4L,0x1441886340724458L },
+ { 0x162a56ee99caedc7L,0x8fb12ecd91d101c9L,0xea671967393202daL,
+ 0x1aac8c4aa4ccd796L,0x7db050361cf185a8L,0x0c9f86cd8cfd095aL } },
+ /* 47 */
+ { { 0x9a72814710b2a556L,0x767ca964327b70b2L,0x04ed9e125e3799b7L,
+ 0x6781d2dc22a3eb2aL,0x5bd116eb0d9450acL,0xeccac1fca7ebe08aL },
+ { 0xde68444fdc2d6e94L,0x3621f42935ecf21bL,0x14e2d54329e03a2cL,
+ 0x53e42cd57d3e7f0aL,0xbba26c0973ed00b9L,0x00297c39c57d2272L } },
+ /* 48 */
+ { { 0x3aaaab10b8243a7dL,0x6eeef93e8fa58c5bL,0xf866fca39ae7f764L,
+ 0x64105a2661ab04d3L,0xa3578d8a03945d66L,0xb08cd3e4791b848cL },
+ { 0x45edc5f8756d2411L,0xd4a790d9a755128cL,0xc2cf096349e5f6a0L,
+ 0xc66d267df649beaaL,0x3ce6d9688467039eL,0x50046c6b42f7816fL } },
+ /* 49 */
+ { { 0x92ae160266425043L,0x1ff66afdf08db890L,0x386f5a7f8f162ce5L,
+ 0x18d2dea0fcf5598fL,0x78372b3a1a8ca18eL,0xdf0d20eb8cd0e6f7L },
+ { 0x7edd5e1d75bb4045L,0x252a47ceb96d94b7L,0xbdb293582c626776L,
+ 0x853c394340dd1031L,0x9dc9becf7d5f47fdL,0x27c2302fbae4044aL } },
+ /* 50 */
+ { { 0x2d1d208a8f2d49ceL,0x0d91aa02162df0a2L,0x9c5cce8709a07f65L,
+ 0xdf07238b84339012L,0x5028e2c8419442cdL,0x2dcbd35872062abaL },
+ { 0xb5fbc3cbe4680967L,0x2a7bc6459f92d72cL,0x806c76e1116c369dL,
+ 0x5c50677a3177e8d8L,0x753739eb4569df57L,0x2d481ef636c3f40bL } },
+ /* 51 */
+ { { 0x1a2d39fdfea1103eL,0xeaae559295f81b17L,0xdbd0aa18f59b264aL,
+ 0x90c39c1acb592ee0L,0xdf62f80d9750cca3L,0xda4d8283df97cc6cL },
+ { 0x0a6dd3461e201067L,0x1531f85969fb1f6bL,0x4895e5521d60121fL,
+ 0x0b21aab04c041c91L,0x9d896c46bcc1ccf8L,0xd24da3b33141bde7L } },
+ /* 52 */
+ { { 0x575a053753b0a354L,0x392ff2f40c6ddcd8L,0x0b8e8cff56157b94L,
+ 0x073e57bd3b1b80d1L,0x2a75e0f03fedee15L,0x752380e4aa8e6f19L },
+ { 0x1f4e227c6558ffe9L,0x3a34861819ec5415L,0xab382d5ef7997085L,
+ 0x5e6deaffddc46ac2L,0xe5144078fc8d094cL,0xf674fe51f60e37c6L } },
+ /* 53 */
+ { { 0x6fb87ae5af63408fL,0xa39c36a9cd75a737L,0x7833313fcf4c618dL,
+ 0xfbcd4482f034c88dL,0x4469a76139b35288L,0x77a711c566b5d9c9L },
+ { 0x4a695dc7944f8d65L,0xe6da5f65161aaba8L,0x8654e9c324601669L,
+ 0xbc8b93f528ae7491L,0x5f1d1e838f5580d8L,0x8ccf9a1acea32cc8L } },
+ /* 54 */
+ { { 0x28ab110c7196fee2L,0x75799d63874c8945L,0xa262934829aedaddL,
+ 0x9714cc7b2be88ff4L,0xf71293cfd58d60d6L,0xda6b6cb332a564e9L },
+ { 0xf43fddb13dd821c2L,0xf2f2785f90dd323dL,0x91246419048489f8L,
+ 0x61660f26d24c6749L,0x961d9e8cc803c15cL,0x631c6158faadc4c9L } },
+ /* 55 */
+ { { 0xacf2ebe0fd752366L,0xb93c340e139be88bL,0x98f664850f20179eL,
+ 0x14820254ff1da785L,0x5278e2764f85c16eL,0xa246ee457aab1913L },
+ { 0x43861eb453763b33L,0xc49f03fc45c0bc0dL,0xafff16bcad6b1ea1L,
+ 0xce33908b6fd49c99L,0x5c51e9bff7fde8c3L,0x076a7a39ff142c5eL } },
+ /* 56 */
+ { { 0x04639dfe9e338d10L,0x8ee6996ff42b411bL,0x960461d1a875cef2L,
+ 0x1057b6d695b4d0baL,0x27639252a906e0bcL,0x2c19f09ae1c20f8aL },
+ { 0x5b8fc3f0eef4c43dL,0xe2e1b1a807a84aa9L,0x5f455528835d2bdbL,
+ 0x0f4aee4d207132ddL,0xe9f8338c3907f675L,0x7a874dc90e0531f0L } },
+ /* 57 */
+ { { 0x84b22d4597c27050L,0xbd0b8df759e70bf8L,0xb4d6740579738b9bL,
+ 0x47f4d5f5cd917c4fL,0x9099c4ce13ce6e33L,0x942bfd39521d0f8bL },
+ { 0x5028f0f6a43b566dL,0xaf6e866921bff7deL,0x83f6f856c44232cdL,
+ 0x65680579f915069aL,0xd12095a2ecfecb85L,0xcf7f06aedb01ba16L } },
+ /* 58 */
+ { { 0x0f56e3c48ef96c80L,0xd521f2b33ddb609cL,0x2be941027dc1450dL,
+ 0x2d21a07102a91fe2L,0x2e6f74fa1efa37deL,0x9a9a90b8156c28a1L },
+ { 0xc54ea9ea9dc7dfcbL,0xc74e66fc2c2c1d62L,0x9f23f96749d3e067L,
+ 0x1c7c3a4654dd38adL,0xc70058845946cee3L,0x8985636845cc045dL } },
+ /* 59 */
+ { { 0x29da7cd4fce73946L,0x8f697db523168563L,0x8e235e9ccba92ec6L,
+ 0x55d4655f9f91d3eaL,0xf3689f23aa50a6cdL,0xdcf21c2621e6a1a0L },
+ { 0xcffbc82e61b818bfL,0xc74a2f96da47a243L,0x234e980a8bc1a0cfL,
+ 0xf35fd6b57929cb6dL,0x81468e12efe17d6cL,0xddea6ae558b2dafbL } },
+ /* 60 */
+ { { 0x294de8877e787b2eL,0x258acc1f39a9310dL,0x92d9714aac14265dL,
+ 0x18b5591c708b48a0L,0x27cc6bb0e1abbf71L,0xc0581fa3568307b9L },
+ { 0x9e0f58a3f24d4d58L,0xfebe9bb8e0ce2327L,0x91fd6a419d1be702L,
+ 0x9a7d8a45facac993L,0xabc0a08c9e50d66dL,0x02c342f706498201L } },
+ /* 61 */
+ { { 0xccd71407157bdbc2L,0x72fa89c6ad0e1605L,0xb1d3da2bb92a015fL,
+ 0x8ad9e7cda0a3fe56L,0x160edcbd24f06737L,0x79d4db3361275be6L },
+ { 0xd3d31fd95f3497c4L,0x8cafeaee04192fb0L,0xe13ca74513a50af3L,
+ 0x188261678c85aae5L,0xce06cea89eb556ffL,0x2eef1995bdb549f3L } },
+ /* 62 */
+ { { 0x8ed7d3eb50596edcL,0xaa359362905243a2L,0xa212c2c2a4b6d02bL,
+ 0x611fd727c4fbec68L,0x8a0b8ff7b84f733dL,0xd85a6b905f0daf0eL },
+ { 0x60e899f5d4091cf7L,0x4fef2b672eff2768L,0xc1f195cb10c33964L,
+ 0x8275d36993626a8fL,0xc77904f40d6c840aL,0x88d8b7fd7a868acdL } },
+ /* 63 */
+ { { 0x85f237237bd98425L,0xd4463992c70b154eL,0xcbb00ee296687a2eL,
+ 0x905fdbf7c83214fdL,0x2019d29313593684L,0x0428c393ef51218eL },
+ { 0x40c7623f981e909aL,0x925133857be192daL,0x48fe480f4010907eL,
+ 0xdd7a187c3120b459L,0xc9d7702da1fd8f3cL,0x66e4753be358efc5L } },
+ /* 64 */
+ { { 0x070d34e116973cf4L,0x20aee08b7e4f34f7L,0x269af9b95eb8ad29L,
+ 0xdde0a036a6a45ddaL,0xa18b528e63df41e0L,0x03cc71b2a260df2aL },
+ { 0x24a6770aa06b1dd7L,0x5bfa9c119d2675d3L,0x73c1e2a196844432L,
+ 0x3660558d131a6cf0L,0xb0289c832ee79454L,0xa6aefb01c6d8ddcdL } },
+ /* 65 */
+ { { 0xba1464b401ab5245L,0x9b8d0b6dc48d93ffL,0x939867dc93ad272cL,
+ 0xbebe085eae9fdc77L,0x73ae5103894ea8bdL,0x740fc89a39ac22e1L },
+ { 0x5e28b0a328e23b23L,0x2352722ee13104d0L,0xf4667a18b0a2640dL,
+ 0xac74a72e49bb37c3L,0x79f734f0e81e183aL,0xbffe5b6c3fd9c0ebL } },
+ /* 66 */
+ { { 0xb1a358f5c6a2123fL,0x927b2d95fe28df6dL,0x89702753f199d2f9L,
+ 0x0a73754c1a3f82dcL,0x063d029d777affe1L,0x5439817edae6d34dL },
+ { 0xf7979eef6b8b83c4L,0x615cb2149d945682L,0x8f0e4facc5e57eaeL,
+ 0x042b89b8113047ddL,0x888356dc93f36508L,0xbf008d185fd1f32fL } },
+ /* 67 */
+ { { 0x8012aa244e8068dbL,0xc72cc641a5729a47L,0x3c33df2c43f0691dL,
+ 0xfa0573471d92145fL,0xaefc0f2fb97f7946L,0x813d75cb2f8121bfL },
+ { 0x05613c724383bba6L,0xa924ce70a4224b3fL,0xe59cecbe5f2179a6L,
+ 0x78e2e8aa79f62b61L,0x3ac2cc3b53ad8079L,0x55518d71d8f4fa96L } },
+ /* 68 */
+ { { 0x03cf292200623f3bL,0x095c71115f29ebffL,0x42d7224780aa6823L,
+ 0x044c7ba17458c0b0L,0xca62f7ef0959ec20L,0x40ae2ab7f8ca929fL },
+ { 0xb8c5377aa927b102L,0x398a86a0dc031771L,0x04908f9dc216a406L,
+ 0xb423a73a918d3300L,0x634b0ff1e0b94739L,0xe29de7252d69f697L } },
+ /* 69 */
+ { { 0x744d14008435af04L,0x5f255b1dfec192daL,0x1f17dc12336dc542L,
+ 0x5c90c2a7636a68a8L,0x960c9eb77704ca1eL,0x9de8cf1e6fb3d65aL },
+ { 0xc60fee0d511d3d06L,0x466e2313f9eb52c7L,0x743c0f5f206b0914L,
+ 0x42f55bac2191aa4dL,0xcefc7c8fffebdbc2L,0xd4fa6081e6e8ed1cL } },
+ /* 70 */
+ { { 0xb5e405d3b0ab9645L,0xaeec7f98d5f1f711L,0x8ad42311585c2a6eL,
+ 0x045acb9e512c6944L,0xae106c4ea90db1c6L,0xb89f33d5898e6563L },
+ { 0x43b07cd97fed2ce4L,0xf9934e17dd815b20L,0x6778d4d50a81a349L,
+ 0x9e616ade52918061L,0xfa06db06d7e67112L,0x1da23cf188488091L } },
+ /* 71 */
+ { { 0x821c46b342f2c4b5L,0x931513ef66059e47L,0x7030ae4366f50cd1L,
+ 0x43b536c943e7b127L,0x006258cf5fca5360L,0xe4e3ee796b557abfL },
+ { 0xbb6b390024c8b22fL,0x2eb5e2c1fcbf1054L,0x937b18c9567492afL,
+ 0xf09432e4acf53957L,0x585f5a9d1dbf3a56L,0xf86751fdbe0887cfL } },
+ /* 72 */
+ { { 0x157399cb9d10e0b2L,0x1c0d595660dc51b7L,0x1d496b8a1f583090L,
+ 0x6658bc2688590484L,0x88c08ab703213f28L,0x8d2e0f737ae58de4L },
+ { 0x9b79bc95486cfee6L,0x036a26c7e9e5bc57L,0x1ad03601cd8ae97aL,
+ 0x06907f87ff3a0494L,0x078f4bbf2c7eb584L,0xe3731bf57e8d0a5aL } },
+ /* 73 */
+ { { 0x72f2282be1cd0abeL,0xd4f9015e87efefa2L,0x9d1898066c3834bdL,
+ 0x9c8cdcc1b8a29cedL,0x0601b9f4fee82ebcL,0x371052bc7206a756L },
+ { 0x76fa109246f32562L,0xdaad534c17351bb4L,0xc3d64c37b3636bb5L,
+ 0x038a8c5145d54e00L,0x301e618032c09e7cL,0x9764eae795735151L } },
+ /* 74 */
+ { { 0x8791b19fcbd5256aL,0x4007e0f26ca13a3bL,0x03b794604cf06904L,
+ 0xb18a9c22b6c17589L,0xa1cb7d7d81d45908L,0x6e13fa9d21bb68f1L },
+ { 0x47183c62a71e6e16L,0x5cf0ef8ee18749edL,0x2c9c7f9b2e5ed409L,
+ 0x042eeacce6e117e1L,0xb86d481613fb5a7fL,0xea1cf0edc9e5feb1L } },
+ /* 75 */
+ { { 0x6e6573c9cea4cc9bL,0x5417961dafcec8f3L,0x804bf02aa438b6f6L,
+ 0xb894b03cdcd4ea88L,0xd0f807e93799571fL,0x3466a7f5862156e8L },
+ { 0x51e59acd56515664L,0x55b0f93ca3c5eb0bL,0x84a06b026a4279dbL,
+ 0x5c850579c5fae08eL,0xcf07b8dba663a1a2L,0x49a36bbcf46ffc8dL } },
+ /* 76 */
+ { { 0xe47f5acc46d93106L,0x65b7ade0aa897c9cL,0x37cf4c9412d7e4beL,
+ 0xa2ae9b80d4b2caa9L,0x5e7ce09ce60357a3L,0x29f77667c8ecd5f9L },
+ { 0xdf6868f5a8a0b1c5L,0x240858cf62978ad8L,0x0f7ac101dc0002a1L,
+ 0x1d28a9d7ffe9aa05L,0x744984d65b962c97L,0xa8a7c00b3d28c8b2L } },
+ /* 77 */
+ { { 0x7c58a852ae11a338L,0xa78613f1d1af96e7L,0x7e9767d25355cc73L,
+ 0x6ba37009792a2de6L,0x7d60f618124386b2L,0xab09b53111157674L },
+ { 0x95a0484198eb9dd0L,0xe6c17acc15070328L,0xafc6da45489c6e49L,
+ 0xab45a60abb211530L,0xc58d65927d7ea933L,0xa3ef3c65095642c6L } },
+ /* 78 */
+ { { 0x89d420e9df010879L,0x9d25255d39576179L,0x9cdefd50e39513b6L,
+ 0xe4efe45bd5d1c313L,0xc0149de73f7af771L,0x55a6b4f4340ab06bL },
+ { 0xf1325251ebeaf771L,0x2ab44128878d4288L,0xfcd5832e18e05afeL,
+ 0xef52a348cc1fb62bL,0x2bd08274c1c4792aL,0x345c5846877c6dc7L } },
+ /* 79 */
+ { { 0xde15ceb0bea65e90L,0x0987f72b2416d99cL,0x44db578dfd863decL,
+ 0xf617b74bac6a3578L,0x9e62bd7adb48e999L,0x877cae61eab1a1beL },
+ { 0x23adddaa3a358610L,0x2fc4d6d1325e2b07L,0x897198f51585754eL,
+ 0xf741852cb392b584L,0x9927804cb55f7de1L,0xe9e6c4ed1aa8efaeL } },
+ /* 80 */
+ { { 0x867db63998683186L,0xfb5cf424ddcc4ea9L,0xcc9a7ffed4f0e7bdL,
+ 0x7c57f71c7a779f7eL,0x90774079d6b25ef2L,0x90eae903b4081680L },
+ { 0xdf2aae5e0ee1fcebL,0x3ff1da24e86c1a1fL,0x80f587d6ca193edfL,
+ 0xa5695523dc9b9d6aL,0x7b84090085920303L,0x1efa4dfcba6dbdefL } },
+ /* 81 */
+ { { 0xfbd838f9e0540015L,0x2c323946c39077dcL,0x8b1fb9e6ad619124L,
+ 0x9612440c0ca62ea8L,0x9ad9b52c2dbe00ffL,0xf52abaa1ae197643L },
+ { 0xd0e898942cac32adL,0xdfb79e4262a98f91L,0x65452ecf276f55cbL,
+ 0xdb1ac0d27ad23e12L,0xf68c5f6ade4986f0L,0x389ac37b82ce327dL } },
+ /* 82 */
+ { { 0x511188b4f8e60f5bL,0x7fe6701548aa2adaL,0xdb333cb8381abca2L,
+ 0xb15e6d9ddaf3fc97L,0x4b24f6eb36aabc03L,0xc59789df72a748b4L },
+ { 0x26fcb8a529cf5279L,0x7a3c6bfc01ad9a6cL,0x866cf88d4b8bac9bL,
+ 0xf4c899899c80d041L,0xf0a0424170add148L,0x5a02f47945d81a41L } },
+ /* 83 */
+ { { 0xfa5c877cc1c90202L,0xd099d440f8ac7570L,0x428a5b1bd17881f7L,
+ 0x61e267db5b2501d7L,0xf889bf04f2e4465bL,0x4da3ae0876aa4cb8L },
+ { 0x3ef0fe26e3e66861L,0x5e7729533318b86dL,0xc3c35fbc747396dfL,
+ 0x5115a29c439ffd37L,0xbfc4bd97b2d70374L,0x088630ea56246b9dL } },
+ /* 84 */
+ { { 0xcd96866db8a9e8c9L,0xa11963b85bb8091eL,0xc7f90d53045b3cd2L,
+ 0x755a72b580f36504L,0x46f8b39921d3751cL,0x4bffdc9153c193deL },
+ { 0xcd15c049b89554e7L,0x353c6754f7a26be6L,0x79602370bd41d970L,
+ 0xde16470b12b176c0L,0x56ba117540c8809dL,0xe2db35c3e435fb1eL } },
+ /* 85 */
+ { { 0xd71e4aab6328e33fL,0x5486782baf8136d1L,0x07a4995f86d57231L,
+ 0xf1f0a5bd1651a968L,0xa5dc5b2476803b6dL,0x5c587cbc42dda935L },
+ { 0x2b6cdb32bae8b4c0L,0x66d1598bb1331138L,0x4a23b2d25d7e9614L,
+ 0x93e402a674a8c05dL,0x45ac94e6da7ce82eL,0xeb9f8281e463d465L } },
+ /* 86 */
+ { { 0x34e0f9d1fecf5b9bL,0xa115b12bf206966aL,0x5591cf3b1eaa0534L,
+ 0x5f0293cbfb1558f9L,0x1c8507a41bc703a5L,0x92e6b81c862c1f81L },
+ { 0xcc9ebc66cdaf24e3L,0x68917ecd72fcfc70L,0x6dc9a9308157ba48L,
+ 0x5d425c08b06ab2b2L,0x362f8ce736e929c4L,0x09f6f57c62e89324L } },
+ /* 87 */
+ { { 0x1c7d6b78d29375fbL,0xfabd851ee35d1157L,0xf6f62dcd4243ea47L,
+ 0x1dd924608fe30b0fL,0x08166dfaffc6e709L,0xc6c4c6930881e6a7L },
+ { 0x20368f87d6a53fb0L,0x38718e9f9eb4d1f9L,0x03f08acdafd7e790L,
+ 0x0835eb4472fe2a1cL,0x7e05090388076e5dL,0x538f765ea638e731L } },
+ /* 88 */
+ { { 0x0e0249d9c2663b4bL,0xe700ab5b47cd38ddL,0xb192559d2c46559fL,
+ 0x8f9f74a84bcde66dL,0xad1615233e2aced5L,0xc155c0473dd03a5bL },
+ { 0x346a87993be454ebL,0x66ee94db83b7dccdL,0x1f6d8378ab9d2abeL,
+ 0x4a396dd27733f355L,0x419bd40af53553c2L,0xd0ead98d731dd943L } },
+ /* 89 */
+ { { 0x908e0b0eec142408L,0x98943cb94114b310L,0x03dbf7d81742b1d7L,
+ 0xd270df6b693412f4L,0xc50654948f69e20cL,0xa76a90c3697e43a1L },
+ { 0xe0fa33844624825aL,0x82e48c0b8acc34c2L,0x7b24bd14e9a14f2bL,
+ 0x4f5dd5e24db30803L,0x0c77a9e7932da0a3L,0x20db90f274c653dcL } },
+ /* 90 */
+ { { 0x261179b70e6c5fd9L,0xf8bec1236c982eeaL,0x47683338d4957b7eL,
+ 0xcc47e6640a72f66aL,0xbd54bf6a1bad9350L,0xdfbf4c6af454e95aL },
+ { 0x3f7a7afa6907f4faL,0x7311fae0865ca735L,0x24737ab82a496adaL,
+ 0x13e425f115feb79bL,0xe9e97c50a1b93c21L,0xb26b6eac4ddd3eb5L } },
+ /* 91 */
+ { { 0x81cab9f52a2e5f2bL,0xf93caf29bf385ac4L,0xf4bf35c3c909963aL,
+ 0x081e730074c9143cL,0x3ea57fa8c281b4c5L,0xe497905c9b340741L },
+ { 0xf556dd8a55ab3cfbL,0xd444b96b518db6adL,0x34f5425a5ef4b955L,
+ 0xdda7a3acecd26aa3L,0xb57da11bda655e97L,0x02da3effc2024c70L } },
+ /* 92 */
+ { { 0xe24b00366481d0d9L,0x3740dbe5818fdfe2L,0xc1fc1f45190fda00L,
+ 0x329c92803cf27fdeL,0x7435cb536934f43eL,0x2b505a5d7884e8feL },
+ { 0x6cfcc6a6711adcc9L,0xf034325c531e21e1L,0xa2f4a9679b2a8a99L,
+ 0x9d5f38423c21bdffL,0xb25c781131b57d66L,0xdb5344d80b8093b9L } },
+ /* 93 */
+ { { 0x0d72e667ae50a2f5L,0x9b7f8d8ae4a861d1L,0xa129f70f330df1cbL,
+ 0xe90aa5d7e04fefc3L,0xff561ecbe72c3ae1L,0x0d8fb428cdb955faL },
+ { 0xd2235f73d7663784L,0xc05baec67e2c456aL,0xe5c292e42adbfcccL,
+ 0x4fd17988efb110d5L,0x27e57734d19d49f3L,0x188ac4ce84f679feL } },
+ /* 94 */
+ { { 0x7ee344cfa796c53eL,0xbbf6074d0868009bL,0x1f1594f7474a1295L,
+ 0x66776edcac11632dL,0x1862278b04e2fa5aL,0x52665cf2c854a89aL },
+ { 0x7e3764648104ab58L,0x167759137204fd6dL,0x86ca06a544ea1199L,
+ 0xaa3f765b1c9240ddL,0x5f8501a924746149L,0x7b982e30dcd251d7L } },
+ /* 95 */
+ { { 0xe44e9efcc15f3060L,0x5ad62f2ea87ebbe6L,0x36499d41c79500d4L,
+ 0xa66d6dc0336fa9d1L,0xf8afc4955afd3b1fL,0x1d8ccb24e5c9822bL },
+ { 0x4031422b79d7584bL,0xc54a0580ea3f20ddL,0x3f837c8f958468c5L,
+ 0x3d82f110fbea7735L,0x679a87787dffe2fcL,0x48eba63b20704803L } },
+ /* 96 */
+ { { 0x89b10d41df46e2f6L,0x13ab57f819514367L,0x067372b91d469c87L,
+ 0x0c195afa4f6c5798L,0xea43a12a272c9acfL,0x9dadd8cb678abdacL },
+ { 0xcce56c6be182579aL,0x86febadb2d26c2d8L,0x1c668ee12a44745cL,
+ 0x580acd8698dc047aL,0x5a2b79cc51b9ec2dL,0x007da6084054f6a0L } },
+ /* 97 */
+ { { 0x9e3ca35217b00dd0L,0x046779cb0e81a7a6L,0xb999fef3d482d871L,
+ 0xe6f38134d9233fbcL,0x112c3001f48cd0e0L,0x934e75763c6c66aeL },
+ { 0xb44d4fc3d73234dcL,0xfcae2062864eafc1L,0x843afe2526bef21aL,
+ 0x61355107f3b75fdfL,0x8367a5aa794c2e6bL,0x3d2629b18548a372L } },
+ /* 98 */
+ { { 0x6230618f437cfaf8L,0x5b8742cb2032c299L,0x949f72472293643aL,
+ 0xb8040f1a09464f79L,0x049462d24f254143L,0xabd6b522366c7e76L },
+ { 0x119b392bd5338f55L,0x1a80a9ce01495a0cL,0xf3118ca7f8d7537eL,
+ 0xb715adc26bf4b762L,0x24506165a8482b6cL,0xd958d7c696a7c84dL } },
+ /* 99 */
+ { { 0x9ad8aa87bdc21f31L,0xadb3cab48063e58cL,0xefd86283b07dd7b8L,
+ 0xc7b9b7621be7c6b4L,0x2ef58741015582deL,0xc970c52e299addf3L },
+ { 0x78f02e2a22f24d66L,0xefec1d1074cc100aL,0xaf2a6a3909316e1aL,
+ 0xce7c22055849dd49L,0x9c1fe75c96bffc4cL,0xcad98fd27ba06ec0L } },
+ /* 100 */
+ { { 0xed76e2d0b648b73eL,0xa9f92ce51cfd285eL,0xa8c86c062ed13de1L,
+ 0x1d3a574ea5191a93L,0x385cdf8b1ad1b8bfL,0xbbecc28a47d2cfe3L },
+ { 0x98d326c069cec548L,0x4f5bc1ddf240a0b2L,0x241a706229057236L,
+ 0x0fc6e9c5c68294a4L,0x4d04838ba319f17aL,0x8b612cf19ffc1c6fL } },
+ /* 101 */
+ { { 0x9bb0b5014c3830ebL,0x3d08f83c8ee0d0c5L,0xa4a6264279ba9389L,
+ 0x5d5d40449cbc2914L,0xae9eb83e074c46f0L,0x63bb758f74ead7d6L },
+ { 0x1c40d2eac6bb29e0L,0x95aa2d874b02f41eL,0x9298917553cb199aL,
+ 0xdd91bafe51584f6dL,0x3715efb931a1aaecL,0xc1b6ae5b46780f9eL } },
+ /* 102 */
+ { { 0xcded3e4b42772f41L,0x3a700d5d3bcb79d1L,0x4430d50e80feee60L,
+ 0x444ef1fcf5e5d4bbL,0xc660194fe6e358ffL,0xe68a2f326a91b43cL },
+ { 0x5842775c977fe4d2L,0x78fdef5c7e2a41ebL,0x5f3bec02ff8df00eL,
+ 0xf4b840cd5852525dL,0x0870483a4e6988bdL,0x39499e39cc64b837L } },
+ /* 103 */
+ { { 0xfc05de80b08df5feL,0x0c12957c63ba0362L,0xea379414d5cf1428L,
+ 0xc559132a54ef6216L,0x33d5f12fb9e65cf8L,0x09c602781695d663L },
+ { 0x3ac1ced461f7a2fbL,0xdd838444d4f5eeb8L,0x82a38c6c8318fcadL,
+ 0x315be2e5e9f1a864L,0x317b5771442daf47L,0x81b5904a95aa5f9eL } },
+ /* 104 */
+ { { 0x6b6b1c508b21d232L,0x87f3dbc08c2cba75L,0xa7e74b46ae9f0fafL,
+ 0x036a0985bb7b8079L,0x4f185b908d974a25L,0x5aa7cef0d9af5ec9L },
+ { 0xe0566a7057dcfffcL,0x6ea311dab8453225L,0x72ea1a8d23368aa9L,
+ 0xed9b208348cd552dL,0xb987967cc80ea435L,0xad735c756c104173L } },
+ /* 105 */
+ { { 0xaea85ab3cee76ef4L,0x44997444af1d2b93L,0x0851929beacb923fL,
+ 0xb080b59051e3bc0cL,0xc4ee1d8659be68a2L,0xf00de21964b26cdaL },
+ { 0x8d7fb5c0f2e90d4dL,0x00e219a777d9ec64L,0xc4e6febd5d1c491cL,
+ 0x080e37541a8f4585L,0x4a9b86c848d2af9cL,0x2ed70db6b6679851L } },
+ /* 106 */
+ { { 0xaee44116586f25cbL,0xf7b6861fa0fcf70fL,0x55d2cd2018a350e8L,
+ 0x861bf3e592dc286fL,0x9ab18ffa6226aba7L,0xd15827bea9857b03L },
+ { 0x26c1f54792e6acefL,0x422c63c8ac1fbac3L,0xa2d8760dfcbfd71dL,
+ 0x35f6a539b2511224L,0xbaa88fa1048d1a21L,0x49f1abe9ebf999dbL } },
+ /* 107 */
+ { { 0x16f9f4f4f7492b73L,0xcf28ec1ecb392b1aL,0x45b130d469ca6ffcL,
+ 0x28ba8d40b72efa58L,0xace987c75ca066f5L,0x3e3992464ad022ebL },
+ { 0x63a2d84e752555bbL,0xaaa93b4a9c2ae394L,0xcd80424ec89539caL,
+ 0x6d6b5a6daa119a99L,0xbd50334c379f2629L,0x899e925eef3cc7d3L } },
+ /* 108 */
+ { { 0xb7ff3651bf825dc4L,0x0f741cc440b9c462L,0x771ff5a95cc4fb5bL,
+ 0xcb9e9c9b47fd56feL,0xbdf053db5626c0d3L,0xa97ce675f7e14098L },
+ { 0x68afe5a36c934f5eL,0x6cd5e148ccefc46fL,0xc7758570d7a88586L,
+ 0x49978f5edd558d40L,0xa1d5088a64ae00c1L,0x58f2a720f1d65bb2L } },
+ /* 109 */
+ { { 0x66fdda4a3e4daedbL,0x38318c1265d1b052L,0x28d910a24c4bbf5cL,
+ 0x762fe5c478a9cd14L,0x08e5ebaad2cc0aeeL,0xd2cdf257ca0c654cL },
+ { 0x48f7c58b08b717d2L,0x3807184a386cd07aL,0x3240f626ae7d0112L,
+ 0x03e9361bc43917b0L,0xf261a87620aea018L,0x53f556a47e1e6372L } },
+ /* 110 */
+ { { 0xc84cee562f512a90L,0x24b3c0041b0ea9f1L,0x0ee15d2de26cc1eaL,
+ 0xd848762cf0c9ef7dL,0x1026e9c5d5341435L,0x8f5b73dcfdb16b31L },
+ { 0x1f69bef2d2c75d95L,0x8d33d581be064ddaL,0x8c024c1257ed35e6L,
+ 0xf8d435f9c309c281L,0xfd295061d6960193L,0x66618d78e9e49541L } },
+ /* 111 */
+ { { 0x571cfd458ce382deL,0x175806eede900ddeL,0x6184996534aba3b5L,
+ 0xe899778ade7aec95L,0xe8f00f6eff4aa97fL,0xae971cb5010b0c6dL },
+ { 0x1827eebc3af788f1L,0xd46229ffe413fe2dL,0x8a15455b4741c9b4L,
+ 0x5f02e690f8e424ebL,0x40a1202edae87712L,0x49b3bda264944f6dL } },
+ /* 112 */
+ { { 0xd63c6067035b2d69L,0xb507150d6bed91b0L,0x1f35f82f7afb39b2L,
+ 0xb9bd9c0116012b66L,0x00d97960ed0a5f50L,0xed7054512716f7c9L },
+ { 0x1576eff4127abdb4L,0x6850d698f01e701cL,0x9fa7d7493fc87e2fL,
+ 0x0b6bcc6fb0ce3e48L,0xf4fbe1f5f7d8c1c0L,0xcf75230e02719cc6L } },
+ /* 113 */
+ { { 0x6761d6c2722d94edL,0xd1ec3f213718820eL,0x65a40b7025d0e7c6L,
+ 0xd67f830ebaf3cf31L,0x633b3807b93ea430L,0x17faa0ea0bc96c69L },
+ { 0xe6bf3482df866b98L,0x205c1ee9a9db52d4L,0x51ef9bbdff9ab869L,
+ 0x3863dad175eeb985L,0xef216c3bd3cf442aL,0x3fb228e3f9c8e321L } },
+ /* 114 */
+ { { 0x94f9b70c0760ac07L,0xf3c9ccae9d79bf4dL,0x73cea084c5ffc83dL,
+ 0xef50f943dc49c38eL,0xf467a2aebc9e7330L,0x5ee534b644ea7fbaL },
+ { 0x20cb627203609e7fL,0x0984435562fdc9f0L,0xaf5c8e580f1457f7L,
+ 0xd1f50a6cb4b25941L,0x77cb247c2ec82395L,0xa5f3e1e5da3dca33L } },
+ /* 115 */
+ { { 0x023489d67d85fa94L,0x0ba405372db9ce47L,0x0fdf7a1faed7aad1L,
+ 0xa57b0d739a4ccb40L,0x48fcec995b18967cL,0xf30b5b6eb7274d24L },
+ { 0x7ccb4773c81c5338L,0xb85639e6a3ed6bd0L,0x7d9df95f1d56eadaL,
+ 0xe256d57f0a1607adL,0x6da7ffdc957574d6L,0x65f8404601c7a8c4L } },
+ /* 116 */
+ { { 0x8d45d0cbcba1e7f1L,0xef0a08c002b55f64L,0x771ca31b17e19892L,
+ 0xe1843ecb4885907eL,0x67797ebc364ce16aL,0x816d2b2d8df4b338L },
+ { 0xe870b0e539aa8671L,0x9f0db3e4c102b5f5L,0x342966591720c697L,
+ 0x0ad4c89e613c0d2aL,0x1af900b2418ddd61L,0xe087ca72d336e20eL } },
+ /* 117 */
+ { { 0x222831ffaba10079L,0x0dc5f87b6d64fff2L,0x445479073e8cb330L,
+ 0xe815aaa2702a33fbL,0x338d6b2e5fba3215L,0x0f7535cb79f549c8L },
+ { 0x471ecd972ee95923L,0x1e868b37c6d1c09fL,0x2bc7b8ecc666ef4eL,
+ 0xf5416589808a4bfcL,0xf23e9ee23fbc4d2eL,0x4357236c2d75125bL } },
+ /* 118 */
+ { { 0xfe176d95ba9cdb1bL,0x45a1ca012f82791eL,0x97654af24de4cca2L,
+ 0xbdbf9d0e5cc4bcb9L,0xf6a7df50ad97ac0aL,0xc52112b061359fd6L },
+ { 0x696d9ce34f05eae3L,0x903adc02e943ac2bL,0xa90753470848be17L,
+ 0x1e20f1702a3973e5L,0xe1aacc1c6feb67e9L,0x2ca0ac32e16bc6b9L } },
+ /* 119 */
+ { { 0xffea12e4ef871eb5L,0x94c2f25da8bf0a7aL,0x4d1e4c2a78134eaaL,
+ 0x11ed16fb0360fb10L,0x4029b6db85fc11beL,0x5e9f7ab7f4d390faL },
+ { 0x5076d72f30646612L,0xa0afed1ddda1d0d8L,0x2902225785a1d103L,
+ 0xcb499e174e276bcdL,0x16d1da7151246c3dL,0xc72d56d3589a0443L } },
+ /* 120 */
+ { { 0xdf5ffc74dae5bb45L,0x99068c4a261bd6dcL,0xdc0afa7aaa98ec7bL,
+ 0xedd2ee00f121e96dL,0x163cc7be1414045cL,0xb0b1bbce335af50eL },
+ { 0xd440d78501a06293L,0xcdebab7c6552e644L,0x48cb8dbc8c757e46L,
+ 0x81f9cf783cabe3cbL,0xddd02611b123f59aL,0x3dc7b88eeeb3784dL } },
+ /* 121 */
+ { { 0xe1b8d398c4741456L,0xa9dfa9026032a121L,0x1cbfc86d1263245bL,
+ 0xf411c7625244718cL,0x96521d5405b0fc54L,0x1afab46edbaa4985L },
+ { 0xa75902ba8674b4adL,0x486b43ad5ad87d12L,0x72b1c73636e0d099L,
+ 0x39890e07bb6cd6d6L,0x8128999c59bace4eL,0xd8da430b7b535e33L } },
+ /* 122 */
+ { { 0x39f65642c6b75791L,0x050947a621806bfbL,0x0ca3e3701362ef84L,
+ 0x9bc60aed8c3d2391L,0x9b488671732e1ddcL,0x12d10d9ea98ee077L },
+ { 0xb6f2822d3651b7dcL,0x6345a5ba80abd138L,0x62033262472d3c84L,
+ 0xd54a1d40acc57527L,0x6ea46b3a424447cbL,0x5bc410572fb1a496L } },
+ /* 123 */
+ { { 0xe70c57a3a751cd0eL,0x190d8419eba3c7d6L,0xb1c3bee79d47d55aL,
+ 0xda941266f912c6d8L,0x12e9aacc407a6ad6L,0xd6ce5f116e838911L },
+ { 0x063ca97b70e1f2ceL,0xa3e47c728213d434L,0xa016e24184df810aL,
+ 0x688ad7b0dfd881a4L,0xa37d99fca89bf0adL,0xd8e3f339a23c2d23L } },
+ /* 124 */
+ { { 0xbdf53163750bed6fL,0x808abc3283e68b0aL,0x85a366275bb08a33L,
+ 0xf72a3a0f6b0e4abeL,0xf7716d19faf0c6adL,0x22dcc0205379b25fL },
+ { 0x7400bf8df9a56e11L,0x6cb8bad756a47f21L,0x7c97176f7a6eb644L,
+ 0xe8fd84f7d1f5b646L,0x98320a9444ddb054L,0x07071ba31dde86f5L } },
+ /* 125 */
+ { { 0x6fdfa0e598f8fcb9L,0x89cec8e094d0d70cL,0xa0899397106d20a8L,
+ 0x915bfb9aba8acc9cL,0x1370c94b5507e01cL,0x83246a608a821ffbL },
+ { 0xa8273a9fbe3c378fL,0x7e54478935a25be9L,0x6cfa49724dd929d7L,
+ 0x987fed9d365bd878L,0x4982ac945c29a7aeL,0x4589a5d75ddd7ec5L } },
+ /* 126 */
+ { { 0x9fabb174a95540a9L,0x7cfb886f0162c5b0L,0x17be766bea3dee18L,
+ 0xff7da41fe88e624cL,0xad0b71eb8b919c38L,0x86a522e0f31ff9a9L },
+ { 0xbc8e6f72868bc259L,0x6130c6383ccef9e4L,0x09f1f4549a466555L,
+ 0x8e6c0f0919b2bfb4L,0x945c46c90ca7bb22L,0xacd871684dafb67bL } },
+ /* 127 */
+ { { 0x090c72ca10c53841L,0xc20ae01b55a4fcedL,0x03f7ebd5e10234adL,
+ 0xb3f42a6a85892064L,0xbdbc30c0b4a14722L,0x971bc4378ca124ccL },
+ { 0x6f79f46d517ff2ffL,0x6a9c96e2ecba947bL,0x5e79f2f462925122L,
+ 0x30a96bb16a4e91f1L,0x1147c9232d4c72daL,0x65bc311f5811e4dfL } },
+ /* 128 */
+ { { 0x87c7dd7d139b3239L,0x8b57824e4d833baeL,0xbcbc48789fff0015L,
+ 0x8ffcef8b909eaf1aL,0x9905f4eef1443a78L,0x020dd4a2e15cbfedL },
+ { 0xca2969eca306d695L,0xdf940cadb93caf60L,0x67f7fab787ea6e39L,
+ 0x0d0ee10ff98c4fe5L,0xc646879ac19cb91eL,0x4b4ea50c7d1d7ab4L } },
+ /* 129 */
+ { { 0x19e409457a0db57eL,0xe6017cad9a8c9702L,0xdbf739e51be5cff9L,
+ 0x3646b3cda7a938a2L,0x0451108568350dfcL,0xad3bd6f356e098b5L },
+ { 0x935ebabfee2e3e3eL,0xfbd01702473926cbL,0x7c735b029e9fb5aaL,
+ 0xc52a1b852e3feff0L,0x9199abd3046b405aL,0xe306fcec39039971L } },
+ /* 130 */
+ { { 0xd6d9aec823e4712cL,0x7ca8376cc3c198eeL,0xe6d8318731bebd8aL,
+ 0xed57aff3d88bfef3L,0x72a645eecf44edc7L,0xd4e63d0b5cbb1517L },
+ { 0x98ce7a1cceee0ecfL,0x8f0126335383ee8eL,0x3b879078a6b455e8L,
+ 0xcbcd3d96c7658c06L,0x721d6fe70783336aL,0xf21a72635a677136L } },
+ /* 131 */
+ { { 0x19d8b3cd9586ba11L,0xd9e0aeb28a5c0480L,0xe4261dbf2230ef5cL,
+ 0x095a9dee02e6bf09L,0x8963723c80dc7784L,0x5c97dbaf145157b1L },
+ { 0x97e744344bc4503eL,0x0fb1cb3185a6b370L,0x3e8df2becd205d4bL,
+ 0x497dd1bcf8f765daL,0x92ef95c76c988a1aL,0x3f924baa64dc4cfaL } },
+ /* 132 */
+ { { 0x6bf1b8dd7268b448L,0xd4c28ba1efd79b94L,0x2fa1f8c8e4e3551fL,
+ 0x769e3ad45c9187a9L,0x28843b4d40326c0dL,0xfefc809450d5d669L },
+ { 0x30c85bfd90339366L,0x4eeb56f15ccf6c3aL,0x0e72b14928ccd1dcL,
+ 0x73ee85b5f2ce978eL,0xcdeb2bf33165bb23L,0x8106c9234e410abfL } },
+ /* 133 */
+ { { 0xc8df01617d02f4eeL,0x8a78154718e21225L,0x4ea895eb6acf9e40L,
+ 0x8b000cb56e5a633dL,0xf31d86d57e981ffbL,0xf5c8029c4475bc32L },
+ { 0x764561ce1b568973L,0x2f809b81a62996ecL,0x9e513d64da085408L,
+ 0xc27d815de61ce309L,0x0da6ff99272999e0L,0xbd284779fead73f7L } },
+ /* 134 */
+ { { 0x6033c2f99b1cdf2bL,0x2a99cf06bc5fa151L,0x7d27d25912177b3bL,
+ 0xb1f15273c4485483L,0x5fd57d81102e2297L,0x3d43e017c7f6acb7L },
+ { 0x41a8bb0b3a70eb28L,0x67de2d8e3e80b06bL,0x09245a4170c28de5L,
+ 0xad7dbcb1a7b26023L,0x70b08a352cbc6c1eL,0xb504fb669b33041fL } },
+ /* 135 */
+ { { 0xa8e85ab5f97a27c2L,0x6ac5ec8bc10a011bL,0x55745533ffbcf161L,
+ 0x01780e8565790a60L,0xe451bf8599ee75b0L,0x8907a63b39c29881L },
+ { 0x76d46738260189edL,0x284a443647bd35cbL,0xd74e8c4020cab61eL,
+ 0x6264bf8c416cf20aL,0xfa5a6c955fd820ceL,0xfa7154d0f24bb5fcL } },
+ /* 136 */
+ { { 0x18482cec9b3f5034L,0x962d445acd9e68fdL,0x266fb1d695746f23L,
+ 0xc66ade5a58c94a4bL,0xdbbda826ed68a5b6L,0x05664a4d7ab0d6aeL },
+ { 0xbcd4fe51025e32fcL,0x61a5aebfa96df252L,0xd88a07e231592a31L,
+ 0x5d9d94de98905517L,0x96bb40105fd440e7L,0x1b0c47a2e807db4cL } },
+ /* 137 */
+ { { 0x5c2a6ac808223878L,0xba08c269e65a5558L,0xd22b1b9b9bbc27fdL,
+ 0x919171bf72b9607dL,0x9ab455f9e588dc58L,0x6d54916e23662d93L },
+ { 0x8da8e9383b1de0c1L,0xa84d186a804f278fL,0xbf4988ccd3461695L,
+ 0xf5eae3bee10eb0cbL,0x1ff8b68fbf2a66edL,0xa68daf67c305b570L } },
+ /* 138 */
+ { { 0xc1004cff44b2e045L,0x91b5e1364b1c05d4L,0x53ae409088a48a07L,
+ 0x73fb2995ea11bb1aL,0x320485703d93a4eaL,0xcce45de83bfc8a5fL },
+ { 0xaff4a97ec2b3106eL,0x9069c630b6848b4fL,0xeda837a6ed76241cL,
+ 0x8a0daf136cc3f6cfL,0x199d049d3da018a8L,0xf867c6b1d9093ba3L } },
+ /* 139 */
+ { { 0xe4d42a5656527296L,0xae26c73dce71178dL,0x70a0adac6c251664L,
+ 0x813483ae5dc0ae1dL,0x7574eacddaab2dafL,0xc56b52dcc2d55f4fL },
+ { 0x872bc16795f32923L,0x4be175815bdd2a89L,0x9b57f1e7a7699f00L,
+ 0x5fcd9c723ac2de02L,0x83af3ba192377739L,0xa64d4e2bfc50b97fL } },
+ /* 140 */
+ { { 0x2172dae20e552b40L,0x62f49725d34d52e8L,0x7930ee4007958f98L,
+ 0x56da2a90751fdd74L,0xf1192834f53e48c3L,0x34d2ac268e53c343L },
+ { 0x1073c21813111286L,0x201dac14da9d9827L,0xec2c29dbee95d378L,
+ 0x9316f1191f3ee0b1L,0x7890c9f0544ce71cL,0xd77138af27612127L } },
+ /* 141 */
+ { { 0x78045e6d3b4ad1cdL,0xcd86b94e4aa49bc1L,0x57e51f1dfd677a16L,
+ 0xd9290935fa613697L,0x7a3f959334f4d893L,0x8c9c248b5d5fcf9bL },
+ { 0x9f23a4826f70d4e9L,0x1727345463190ae9L,0x4bdd7c135b081a48L,
+ 0x1e2de38928d65271L,0x0bbaaa25e5841d1fL,0xc4c18a79746772e5L } },
+ /* 142 */
+ { { 0x10ee2681593375acL,0x4f3288be7dd5e113L,0x9a97b2fb240f3538L,
+ 0xfa11089f1de6b1e2L,0x516da5621351bc58L,0x573b61192dfa85b5L },
+ { 0x89e966836cba7df5L,0xf299be158c28ab40L,0xe91c9348ad43fcbfL,
+ 0xe9bbc7cc9a1cefb3L,0xc8add876738b2775L,0x6e3b1f2e775eaa01L } },
+ /* 143 */
+ { { 0x0365a888b677788bL,0x634ae8c43fd6173cL,0x304987619e498dbeL,
+ 0x08c43e6dc8f779abL,0x068ae3844c09aca9L,0x2380c70b2018d170L },
+ { 0xcf77fbc3a297c5ecL,0xdacbc853ca457948L,0x3690de04336bec7eL,
+ 0x26bbac6414eec461L,0xd1c23c7e1f713abfL,0xf08bbfcde6fd569eL } },
+ /* 144 */
+ { { 0x5f8163f484770ee3L,0x0e0c7f94744a1706L,0x9c8f05f7e1b2d46dL,
+ 0x417eafe7d01fd99aL,0x2ba15df511440e5bL,0xdc5c552a91a6fbcfL },
+ { 0x86271d74a270f721L,0x32c0a075a004485bL,0x9d1a87e38defa075L,
+ 0xb590a7acbf0d20feL,0x430c41c28feda1f5L,0x454d287958f6ec24L } },
+ /* 145 */
+ { { 0x52b7a6357c525435L,0x3d9ef57f37c4bdbcL,0x2bb93e9edffcc475L,
+ 0xf7b8ba987710f3beL,0x42ee86da21b727deL,0x55ac3f192e490d01L },
+ { 0x487e3a6ec0c1c390L,0x036fb345446cde7bL,0x089eb276496ae951L,
+ 0xedfed4d971ed1234L,0x661b0dd5900f0b46L,0x11bd6f1b8582f0d3L } },
+ /* 146 */
+ { { 0x5cf9350f076bc9d1L,0x15d903becf3cd2c3L,0x21cfc8c225af031cL,
+ 0xe0ad32488b1cc657L,0xdd9fb96370014e87L,0xf0f3a5a1297f1658L },
+ { 0xbb908fbaf1f703aaL,0x2f9cc4202f6760baL,0x00ceec6666a38b51L,
+ 0x4deda33005d645daL,0xb9cf5c72f7de3394L,0xaeef65021ad4c906L } },
+ /* 147 */
+ { { 0x0583c8b17a19045dL,0xae7c3102d052824cL,0x2a234979ff6cfa58L,
+ 0xfe9dffc962c733c0L,0x3a7fa2509c0c4b09L,0x516437bb4fe21805L },
+ { 0x9454e3d5c2a23ddbL,0x0726d887289c104eL,0x8977d9184fd15243L,
+ 0xc559e73f6d7790baL,0x8fd3e87d465af85fL,0xa2615c745feee46bL } },
+ /* 148 */
+ { { 0xc8d607a84335167dL,0x8b42d804e0f5c887L,0x5f9f13df398d11f9L,
+ 0x5aaa508720740c67L,0x83da9a6aa3d9234bL,0xbd3a5c4e2a54bad1L },
+ { 0xdd13914c2db0f658L,0x29dcb66e5a3f373aL,0xbfd62df55245a72bL,
+ 0x19d1802391e40847L,0xd9df74dbb136b1aeL,0x72a06b6b3f93bc5bL } },
+ /* 149 */
+ { { 0x6da19ec3ad19d96fL,0xb342daa4fb2a4099L,0x0e61633a662271eaL,
+ 0x3bcece81ce8c054bL,0x7cc8e0618bd62dc6L,0xae189e19ee578d8bL },
+ { 0x73e7a25ddced1eedL,0xc1257f0a7875d3abL,0x2cb2d5a21cfef026L,
+ 0xd98ef39bb1fdf61cL,0xcd8e6f6924e83e6cL,0xd71e7076c7b7088bL } },
+ /* 150 */
+ { { 0x339368309d4245bfL,0x22d962172ac2953bL,0xb3bf5a8256c3c3cdL,
+ 0x50c9be910d0699e8L,0xec0944638f366459L,0x6c056dba513b7c35L },
+ { 0x687a6a83045ab0e3L,0x8d40b57f445c9295L,0x0f345048a16f5954L,
+ 0x64b5c6393d8f0a87L,0x106353a29f71c5e2L,0xdd58b475874f0dd4L } },
+ /* 151 */
+ { { 0x67ec084f62230c72L,0xf14f6cca481385e3L,0xf58bb4074cda7774L,
+ 0xe15011b1aa2dbb6bL,0xd488369d0c035ab1L,0xef83c24a8245f2fdL },
+ { 0xfb57328f9fdc2538L,0x79808293191fe46aL,0xe28f5c4432ede548L,
+ 0x1b3cda99ea1a022cL,0x39e639b73df2ec7fL,0x77b6272b760e9a18L } },
+ /* 152 */
+ { { 0x2b1d51bda65d56d5L,0x3a9b71f97ea696e0L,0x95250ecc9904f4c4L,
+ 0x8bc4d6ebe75774b7L,0x0e343f8aeaeeb9aaL,0xc473c1d1930e04cbL },
+ { 0x282321b1064cd8aeL,0xf4b4371e5562221cL,0xc1cc81ecd1bf1221L,
+ 0xa52a07a9e2c8082fL,0x350d8e59ba64a958L,0x29e4f3de6fb32c9aL } },
+ /* 153 */
+ { { 0x0aa9d56cba89aaa5L,0xf0208ac0c4c6059eL,0x7400d9c6bd6ddca4L,
+ 0xb384e475f2c2f74aL,0x4c1061fcb1562dd3L,0x3924e2482e153b8dL },
+ { 0xf38b8d98849808abL,0x29bf3260a491aa36L,0x85159ada88220edeL,
+ 0x8b47915bbe5bc422L,0xa934d72ed7300967L,0xc4f303982e515d0dL } },
+ /* 154 */
+ { { 0xe3e9ee421b1de38bL,0xa124e25a42636760L,0x90bf73c090165b1aL,
+ 0x21802a34146434c5L,0x54aa83f22e1fa109L,0x1d4bd03ced9c51e9L },
+ { 0xc2d96a38798751e6L,0xed27235f8c3507f5L,0xb5fb80e2c8c24f88L,
+ 0xf873eefad37f4f78L,0x7229fd74f224ba96L,0x9dcd91999edd7149L } },
+ /* 155 */
+ { { 0xee9f81a64e94f22aL,0xe5609892f71ec341L,0x6c818ddda998284eL,
+ 0x9fd472953b54b098L,0x47a6ac030e8a7cc9L,0xde684e5eb207a382L },
+ { 0x4bdd1ecd2b6b956bL,0x09084414f01b3583L,0xe2f80b3255233b14L,
+ 0x5a0fec54ef5ebc5eL,0x74cf25e6bf8b29a2L,0x1c757fa07f29e014L } },
+ /* 156 */
+ { { 0x1bcb5c4aeb0fdfe4L,0xd7c649b3f0899367L,0xaef68e3f05bc083bL,
+ 0x57a06e46a78aa607L,0xa2136ecc21223a44L,0x89bd648452f5a50bL },
+ { 0x724411b94455f15aL,0x23dfa97008a9c0fdL,0x7b0da4d16db63befL,
+ 0x6f8a7ec1fb162443L,0xc1ac9ceee98284fbL,0x085a582b33566022L } },
+ /* 157 */
+ { { 0x15cb61f9ec1f138aL,0x11c9a230668f0c28L,0xac829729df93f38fL,
+ 0xcef256984048848dL,0x3f686da02bba8fbfL,0xed5fea78111c619aL },
+ { 0x9b4f73bcd6d1c833L,0x5095160686e7bf80L,0xa2a73508042b1d51L,
+ 0x9ef6ea495fb89ec2L,0xf1008ce95ef8b892L,0x78a7e6849ae8568bL } },
+ /* 158 */
+ { { 0x3fe83a7c10470cd8L,0x92734682f86df000L,0xb5dac06bda9409b5L,
+ 0x1e7a966094939c5fL,0xdec6c1505cc116dcL,0x1a52b40866bac8ccL },
+ { 0x5303a3656e864045L,0x45eae72a9139efc1L,0x83bec6466f31d54fL,
+ 0x2fb4a86f6e958a6dL,0x6760718e4ff44030L,0x008117e3e91ae0dfL } },
+ /* 159 */
+ { { 0x5d5833ba384310a2L,0xbdfb4edc1fd6c9fcL,0xb9a4f102849c4fb8L,
+ 0xe5fb239a581c1e1fL,0xba44b2e7d0a9746dL,0x78f7b7683bd942b9L },
+ { 0x076c8ca1c87607aeL,0x82b23c2ed5caaa7eL,0x6a581f392763e461L,
+ 0xca8a5e4a3886df11L,0xc87e90cf264e7f22L,0x04f74870215cfcfcL } },
+ /* 160 */
+ { { 0x5285d116141d161cL,0x67cd2e0e93c4ed17L,0x12c62a647c36187eL,
+ 0xf5329539ed2584caL,0xc4c777c442fbbd69L,0x107de7761bdfc50aL },
+ { 0x9976dcc5e96beebdL,0xbe2aff95a865a151L,0x0e0a9da19d8872afL,
+ 0x5e357a3da63c17ccL,0xd31fdfd8e15cc67cL,0xc44bbefd7970c6d8L } },
+ /* 161 */
+ { { 0x703f83e24c0c62f1L,0x9b1e28ee4e195572L,0x6a82858bfe26ccedL,
+ 0xd381c84bc43638faL,0x94f72867a5ba43d8L,0x3b4a783d10b82743L },
+ { 0xee1ad7b57576451eL,0xc3d0b59714b6b5c8L,0x3dc30954fcacc1b8L,
+ 0x55df110e472c9d7bL,0x97c86ed702f8a328L,0xd043341388dc098fL } },
+ /* 162 */
+ { { 0x1a60d1522ca8f2feL,0x61640948491bd41fL,0x6dae29a558dfe035L,
+ 0x9a615bea278e4863L,0xbbdb44779ad7c8e5L,0x1c7066302ceac2fcL },
+ { 0x5e2b54c699699b4bL,0xb509ca6d239e17e8L,0x728165feea063a82L,
+ 0x6b5e609db6a22e02L,0x12813905b26ee1dfL,0x07b9f722439491faL } },
+ /* 163 */
+ { { 0x1592ec1448ff4e49L,0x3e4e9f176d644129L,0x7acf82881156acc0L,
+ 0x5aa34ba8bb092b0bL,0xcd0f90227d38393dL,0x416724ddea4f8187L },
+ { 0x3c4e641cc0139e73L,0xe0fe46cf91e4d87dL,0xedb3c792cab61f8aL,
+ 0x4cb46de4d3868753L,0xe449c21d20f1098aL,0x5e5fd059f5b8ea6eL } },
+ /* 164 */
+ { { 0x7fcadd4675856031L,0x89c7a4cdeaf2fbd0L,0x1af523ce7a87c480L,
+ 0xe5fc109561d9ae90L,0x3fb5864fbcdb95f5L,0xbeb5188ebb5b2c7dL },
+ { 0x3d1563c33ae65825L,0x116854c40e57d641L,0x11f73d341942ebd3L,
+ 0x24dc5904c06955b3L,0x8a0d4c83995a0a62L,0xfb26b86d5d577b7dL } },
+ /* 165 */
+ { { 0xc53108e7c686ae17L,0x9090d739d1c1da56L,0x4583b0139aec50aeL,
+ 0xdd9a088ba49a6ab2L,0x28192eeaf382f850L,0xcc8df756f5fe910eL },
+ { 0x877823a39cab7630L,0x64984a9afb8e7fc1L,0x5448ef9c364bfc16L,
+ 0xbbb4f871c44e2a9aL,0x901a41ab435c95e9L,0xc6c23e5faaa50a06L } },
+ /* 166 */
+ { { 0xb78016c19034d8ddL,0x856bb44b0b13e79bL,0x85c6409ab3241a05L,
+ 0x8d2fe19a2d78ed21L,0xdcc7c26d726eddf2L,0x3ccaff5f25104f04L },
+ { 0x397d7edc6b21f843L,0xda88e4dde975de4cL,0x5273d3964f5ab69eL,
+ 0x537680e39aae6cc0L,0xf749cce53e6f9461L,0x021ddbd9957bffd3L } },
+ /* 167 */
+ { { 0x7b64585f777233cfL,0xfe6771f60942a6f0L,0x636aba7adfe6eef0L,
+ 0x63bbeb5686038029L,0xacee5842de8fcf36L,0x48d9aa99d4a20524L },
+ { 0xcff7a74c0da5e57aL,0xc232593ce549d6c9L,0x68504bccf0f2287bL,
+ 0x6d7d098dbc8360b5L,0xeac5f1495b402f41L,0x61936f11b87d1bf1L } },
+ /* 168 */
+ { { 0xaa9da167b8153a9dL,0xa49fe3ac9e83ecf0L,0x14c18f8e1b661384L,
+ 0x61c24dab38434de1L,0x3d973c3a283dae96L,0xc99baa0182754fc9L },
+ { 0x477d198f4c26b1e3L,0x12e8e186a7516202L,0x386e52f6362addfaL,
+ 0x31e8f695c3962853L,0xdec2af136aaedb60L,0xfcfdb4c629cf74acL } },
+ /* 169 */
+ { { 0x6b3ee958cca40298L,0xc3878153f2f5d195L,0x0c565630ed2eae5bL,
+ 0xd089b37e3a697cf2L,0xc2ed2ac7ad5029eaL,0x7e5cdfad0f0dda6aL },
+ { 0xf98426dfd9b86202L,0xed1960b14335e054L,0x1fdb02463f14639eL,
+ 0x17f709c30db6c670L,0xbfc687ae773421e1L,0x13fefc4a26c1a8acL } },
+ /* 170 */
+ { { 0xe361a1987ffa0a5fL,0xf4b26102c63fe109L,0x264acbc56c74e111L,
+ 0x4af445fa77abebafL,0x448c4fdd24cddb75L,0x0b13157d44506eeaL },
+ { 0x22a6b15972e9993dL,0x2c3c57e485e5ecbeL,0xa673560bfd83e1a1L,
+ 0x6be23f82c3b8c83bL,0x40b13a9640bbe38eL,0x66eea033ad17399bL } },
+ /* 171 */
+ { { 0x49fc6e95b4c6c693L,0xefc735de36af7d38L,0xe053343d35fe42fcL,
+ 0xf0aa427c6a9ab7c3L,0xc79f04364a0fcb24L,0x1628724393ebbc50L },
+ { 0x5c3d6bd016927e1eL,0x40158ed2673b984cL,0xa7f86fc84cd48b9aL,
+ 0x1643eda660ea282dL,0x45b393eae2a1beedL,0x664c839e19571a94L } },
+ /* 172 */
+ { { 0x5774575027eeaf94L,0x2875c925ea99e1e7L,0xc127e7ba5086adeaL,
+ 0x765252a086fe424fL,0x1143cc6c2b6c0281L,0xc9bb2989d671312dL },
+ { 0x880c337c51acb0a5L,0xa3710915d3c60f78L,0x496113c09262b6edL,
+ 0x5d25d9f89ce48182L,0x53b6ad72b3813586L,0x0ea3bebc4c0e159cL } },
+ /* 173 */
+ { { 0xcaba450ac5e49beaL,0x684e54157c05da59L,0xa2e9cab9de7ac36cL,
+ 0x4ca79b5f2e6f957bL,0xef7b024709b817b1L,0xeb3049907d89df0fL },
+ { 0x508f730746fe5096L,0x695810e82e04eaafL,0x88ef1bd93512f76cL,
+ 0x776613513ebca06bL,0xf7d4863accf158b7L,0xb2a81e4494ee57daL } },
+ /* 174 */
+ { { 0xff288e5b6d53e6baL,0xa90de1a914484ea2L,0x2fadb60ced33c8ecL,
+ 0x579d6ef328b66a40L,0x4f2dd6ddec24372dL,0xe9e33fc91d66ec7dL },
+ { 0x110899d2039eab6eL,0xa31a667a3e97bb5eL,0x6200166dcfdce68eL,
+ 0xbe83ebae5137d54bL,0x085f7d874800acdfL,0xcf4ab1330c6f8c86L } },
+ /* 175 */
+ { { 0x03f65845931e08fbL,0x6438551e1506e2c0L,0x5791f0dc9c36961fL,
+ 0x68107b29e3dcc916L,0x83242374f495d2caL,0xd8cfb6636ee5895bL },
+ { 0x525e0f16a0349b1bL,0x33cd2c6c4a0fab86L,0x46c12ee82af8dda9L,
+ 0x7cc424ba71e97ad3L,0x69766ddf37621eb0L,0x95565f56a5f0d390L } },
+ /* 176 */
+ { { 0xe0e7bbf21a0f5e94L,0xf771e1151d82d327L,0x10033e3dceb111faL,
+ 0xd269744dd3426638L,0xbdf2d9da00d01ef6L,0x1cb80c71a049ceafL },
+ { 0x17f183289e21c677L,0x6452af0519c8f98bL,0x35b9c5f780b67997L,
+ 0x5c2e1cbe40f8f3d4L,0x43f9165666d667caL,0x9faaa059cf9d6e79L } },
+ /* 177 */
+ { { 0x8ad246180a078fe6L,0xf6cc73e6464fd1ddL,0x4d2ce34dc3e37448L,
+ 0x624950c5e3271b5fL,0x62910f5eefc5af72L,0x8b585bf8aa132bc6L },
+ { 0x11723985a839327fL,0x34e2d27d4aac252fL,0x402f59ef6296cc4eL,
+ 0x00ae055c47053de9L,0xfc22a97228b4f09bL,0xa9e86264fa0c180eL } },
+ /* 178 */
+ { { 0x0b7b6224bc310eccL,0x8a1a74f167fa14edL,0x87dd09607214395cL,
+ 0xdf1b3d09f5c91128L,0x39ff23c686b264a8L,0xdc2d49d03e58d4c5L },
+ { 0x2152b7d3a9d6f501L,0xf4c32e24c04094f7L,0xc6366596d938990fL,
+ 0x084d078f94fb207fL,0xfd99f1d7328594cbL,0x36defa64cb2d96b3L } },
+ /* 179 */
+ { { 0x4619b78113ed7cbeL,0x95e500159784bd0eL,0x2a32251c2c7705feL,
+ 0xa376af995f0dd083L,0x55425c6c0361a45bL,0x812d2cef1f291e7bL },
+ { 0xccf581a05fd94972L,0x26e20e39e56dc383L,0x0093685d63dbfbf0L,
+ 0x1fc164cc36b8c575L,0xb9c5ab81390ef5e7L,0x40086beb26908c66L } },
+ /* 180 */
+ { { 0xe5e54f7937e3c115L,0x69b8ee8cc1445a8aL,0x79aedff2b7659709L,
+ 0xe288e1631b46fbe6L,0xdb4844f0d18d7bb7L,0xe0ea23d048aa6424L },
+ { 0x714c0e4ef3d80a73L,0x87a0aa9e3bd64f98L,0x8844b8a82ec63080L,
+ 0xe0ac9c30255d81a3L,0x86151237455397fcL,0x0b9794642f820155L } },
+ /* 181 */
+ { { 0x127a255a4ae03080L,0x232306b4580a89fbL,0x04e8cd6a6416f539L,
+ 0xaeb70dee13b02a0eL,0xa3038cf84c09684aL,0xa710ec3c28e433eeL },
+ { 0x77a72567681b1f7dL,0x86fbce952fc28170L,0xd3408683f5735ac8L,
+ 0x3a324e2a6bd68e93L,0x7ec74353c027d155L,0xab60354cd4427177L } },
+ /* 182 */
+ { { 0x32a5342aef4c209dL,0x2ba7527408d62704L,0x4bb4af6fc825d5feL,
+ 0x1c3919ced28e7ff1L,0x1dfc2fdcde0340f6L,0xc6580baf29f33ba9L },
+ { 0xae121e7541d442cbL,0x4c7727fd3a4724e4L,0xe556d6a4524f3474L,
+ 0x87e13cc7785642a2L,0x182efbb1a17845fdL,0xdcec0cf14e144857L } },
+ /* 183 */
+ { { 0x1cb89541e9539819L,0xc8cb3b4f9d94dbf1L,0x1d353f63417da578L,
+ 0xb7a697fb8053a09eL,0x8d841731c35d8b78L,0x85748d6fb656a7a9L },
+ { 0x1fd03947c1859c5dL,0x6ce965c1535d22a2L,0x1966a13e0ca3aadcL,
+ 0x9802e41d4fb14effL,0xa9048cbb76dd3fcdL,0x89b182b5e9455bbaL } },
+ /* 184 */
+ { { 0xd777ad6a43360710L,0x841287ef55e9936bL,0xbaf5c67004a21b24L,
+ 0xf2c0725f35ad86f1L,0x338fa650c707e72eL,0x2bf8ed2ed8883e52L },
+ { 0xb0212cf4b56e0d6aL,0x50537e126843290cL,0xd8b184a198b3dc6fL,
+ 0xd2be9a350210b722L,0x407406db559781eeL,0x5a78d5910bc18534L } },
+ /* 185 */
+ { { 0x4d57aa2ad748b02cL,0xbe5b3451a12b3b95L,0xadca7a4564711258L,
+ 0x597e091a322153dbL,0xf327100632eb1eabL,0xbd9adcba2873f301L },
+ { 0xd1dc79d138543f7fL,0x00022092921b1fefL,0x86db3ef51e5df8edL,
+ 0x888cae049e6b944aL,0x71bd29ec791a32b4L,0xd3516206a6d1c13eL } },
+ /* 186 */
+ { { 0x2ef6b95255924f43L,0xd2f401ae4f9de8d5L,0xfc73e8d7adc68042L,
+ 0x627ea70c0d9d1bb4L,0xc3bb3e3ebbf35679L,0x7e8a254ad882dee4L },
+ { 0x08906f50b5924407L,0xf14a0e61a1ad444aL,0xaa0efa2165f3738eL,
+ 0xd60c7dd6ae71f161L,0x9e8390faf175894dL,0xd115cd20149f4c00L } },
+ /* 187 */
+ { { 0x2f2e2c1da52abf77L,0xc2a0dca554232568L,0xed423ea254966dccL,
+ 0xe48c93c7cd0dd039L,0x1e54a225176405c7L,0x1efb5b1670d58f2eL },
+ { 0xa751f9d994fb1471L,0xfdb31e1f67d2941dL,0xa6c74eb253733698L,
+ 0xd3155d1189a0f64aL,0x4414cfe4a4b8d2b6L,0x8d5a4be8f7a8e9e3L } },
+ /* 188 */
+ { { 0x5c96b4d452669e98L,0x4547f9228fd42a03L,0xcf5c1319d285174eL,
+ 0x805cd1ae064bffa0L,0x50e8bc4f246d27e7L,0xf89ef98fd5781e11L },
+ { 0xb4ff95f6dee0b63fL,0xad850047222663a4L,0x026918604d23ce9cL,
+ 0x3e5309ce50019f59L,0x27e6f72269a508aeL,0xe9376652267ba52cL } },
+ /* 189 */
+ { { 0xa04d289cc0368708L,0xc458872f5e306e1dL,0x76fa23de33112feaL,
+ 0x718e39746efde42eL,0xf0c98cdc1d206091L,0x5fa3ca6214a71987L },
+ { 0xeee8188bdcaa9f2aL,0x312cc732589a860dL,0xf9808dd6c63aeb1fL,
+ 0x70fd43db4ea62b53L,0x2c2bfe34890b6e97L,0x105f863cfa426aa6L } },
+ /* 190 */
+ { { 0x0b29795db38059adL,0x5686b77e90647ea0L,0xeff0470edb473a3eL,
+ 0x278d2340f9b6d1e2L,0xebbff95bbd594ec7L,0xf4b72334d3a7f23dL },
+ { 0x2a285980a5a83f0bL,0x0786c41a9716a8b3L,0x138901bd22511812L,
+ 0xd1b55221e2fede6eL,0x0806e264df4eb590L,0x6c4c897e762e462eL } },
+ /* 191 */
+ { { 0xd10b905fb4b41d9dL,0x826ca4664523a65bL,0x535bbd13b699fa37L,
+ 0x5b9933d773bc8f90L,0x9332d61fcd2118adL,0x158c693ed4a65fd0L },
+ { 0x4ddfb2a8e6806e63L,0xe31ed3ecb5de651bL,0xf9460e51819bc69aL,
+ 0x6229c0d62c76b1f8L,0xbb78f231901970a3L,0x31f3820f9cee72b8L } },
+ /* 192 */
+ { { 0xe931caf2c09e1c72L,0x0715f29812990cf4L,0x33aad81d943262d8L,
+ 0x5d292b7a73048d3fL,0xb152aaa4dc7415f6L,0xc3d10fd90fd19587L },
+ { 0xf76b35c575ddadd0L,0x9f5f4a511e7b694cL,0x2f1ab7ebc0663025L,
+ 0x01c9cc87920260b0L,0xc4b1f61a05d39da6L,0x6dcd76c4eb4a9c4eL } },
+ /* 193 */
+ { { 0x0ba0916ffdc83f01L,0x354c8b449553e4f9L,0xa6cc511affc5e622L,
+ 0xb954726ae95be787L,0xcb04811575b41a62L,0xfa2ae6cdebfde989L },
+ { 0x6376bbc70f24659aL,0x13a999fd4c289c43L,0xc7134184ec9abd8bL,
+ 0x28c02bf6a789ab04L,0xff841ebcd3e526ecL,0x442b191e640893a8L } },
+ /* 194 */
+ { { 0x4cac6c62fa2b6e20L,0x97f29e9bf6d69861L,0x228ab1dbbc96d12dL,
+ 0x6eb913275e8e108dL,0xd4b3d4d140771245L,0x61b20623ca8a803aL },
+ { 0x2c2f3b41a6a560b1L,0x879e1d403859fcf4L,0x7cdb5145024dbfc3L,
+ 0x55d08f153bfa5315L,0x2f57d773aa93823aL,0xa97f259cc6a2c9a2L } },
+ /* 195 */
+ { { 0xc306317be58edbbbL,0x25ade51c79dfdf13L,0x6b5beaf116d83dd6L,
+ 0xe8038a441dd8f925L,0x7f00143cb2a87b6bL,0xa885d00df5b438deL },
+ { 0xe9f76790cf9e48bdL,0xf0bdf9f0a5162768L,0x0436709fad7b57cbL,
+ 0x7e151c12f7c15db7L,0x3514f0225d90ee3bL,0x2e84e8032c361a8dL } },
+ /* 196 */
+ { { 0x2277607d563ec8d8L,0xa661811fe3934cb7L,0x3ca72e7af58fd5deL,
+ 0x7989da0462294c6aL,0x88b3708bf6bbefe9L,0x0d524cf753ed7c82L },
+ { 0x69f699ca2f30c073L,0xf0fa264b9dc1dcf3L,0x44ca456805f0aaf6L,
+ 0x0f5b23c7d19b9bafL,0x39193f41eabd1107L,0x9e3e10ad2a7c9b83L } },
+ /* 197 */
+ { { 0xa90824f0d4ae972fL,0x43eef02bc6e846e7L,0x7e46061229d2160aL,
+ 0x29a178acfe604e91L,0x23056f044eb184b2L,0x4fcad55feb54cdf4L },
+ { 0xa0ff96f3ae728d15L,0x8a2680c6c6a00331L,0x5f84cae07ee52556L,
+ 0x5e462c3ac5a65dadL,0x5d2b81dfe2d23f4fL,0x6e47301bc5b1eb07L } },
+ /* 198 */
+ { { 0x77411d68af8219b9L,0xcb883ce651b1907aL,0x25c87e57101383b5L,
+ 0x9c7d9859982f970dL,0xaa6abca5118305d2L,0x725fed2f9013a5dbL },
+ { 0x487cdbafababd109L,0xc0f8cf5687586528L,0xa02591e68ad58254L,
+ 0xc071b1d1debbd526L,0x927dfe8b961e7e31L,0x55f895f99263dfe1L } },
+ /* 199 */
+ { { 0xf899b00db175645bL,0x51f3a627b65b4b92L,0xa2f3ac8db67399efL,
+ 0xe717867fe400bc20L,0x42cc90201967b952L,0x3d5967513ecd1de1L },
+ { 0xd41ebcdedb979775L,0x99ba61bc6a2e7e88L,0x039149a5321504f2L,
+ 0xe7dc231427ba2fadL,0x9f556308b57d8368L,0x2b6d16c957da80a7L } },
+ /* 200 */
+ { { 0x84af5e76279ad982L,0x9bb4c92d9c8b81a6L,0xd79ad44e0e698e67L,
+ 0xe8be9048265fc167L,0xf135f7e60c3a4cccL,0xa0a10d38b8863a33L },
+ { 0xe197247cd386efd9L,0x0eefd3f9b52346c2L,0xc22415f978607bc8L,
+ 0xa2a8f862508674ceL,0xa72ad09ec8c9d607L,0xcd9f0ede50fa764fL } },
+ /* 201 */
+ { { 0x063391c7d1a46d4dL,0x2df51c119eb01693L,0xc5849800849e83deL,
+ 0x48fd09aa8ad08382L,0xa405d873aa742736L,0xee49e61ee1f9600cL },
+ { 0xd76676be48c76f73L,0xd9c100f601274b2aL,0x110bb67c83f8718dL,
+ 0xec85a42002fc0d73L,0xc0449e1e744656adL,0x28ce737637d9939bL } },
+ /* 202 */
+ { { 0x97e9af7244544ac7L,0xf2c658d5ba010426L,0x732dec39fb3adfbdL,
+ 0xd12faf91a2df0b07L,0x8ac267252171e208L,0xf820cdc85b24fa54L },
+ { 0x307a6eea94f4cf77L,0x18c783d2944a33c6L,0x4b939d4c0b741ac5L,
+ 0x1d7acd153ffbb6e4L,0x06a248587a255e44L,0x14fbc494ce336d50L } },
+ /* 203 */
+ { { 0x9b920c0c51584e3cL,0xc7733c59f7e54027L,0xe24ce13988422bbeL,
+ 0x11ada812523bd6abL,0xde068800b88e6defL,0x7b872671fe8c582dL },
+ { 0x4e746f287de53510L,0x492f8b99f7971968L,0x1ec80bc77d928ac2L,
+ 0xb3913e48432eb1b5L,0xad08486632028f6eL,0x122bb8358fc2f38bL } },
+ /* 204 */
+ { { 0x0a9f3b1e3b0b29c3L,0x837b64324fa44151L,0xb9905c9217b28ea7L,
+ 0xf39bc93798451750L,0xcd383c24ce8b6da1L,0x299f57db010620b2L },
+ { 0x7b6ac39658afdce3L,0xa15206b33d05ef47L,0xa0ae37e2b9bb02ffL,
+ 0x107760ab9db3964cL,0xe29de9a067954beaL,0x446a1ad8431c3f82L } },
+ /* 205 */
+ { { 0xc6fecea05c6b8195L,0xd744a7c5f49e71b9L,0xa8e96acc177a7ae7L,
+ 0x1a05746c358773a7L,0xa416214637567369L,0xaa0217f787d1c971L },
+ { 0x61e9d15877fd3226L,0x0f6f2304e4f600beL,0xa9c4cebc7a6dff07L,
+ 0xd15afa0109f12a24L,0x2bbadb228c863ee9L,0xa28290e4e5eb8c78L } },
+ /* 206 */
+ { { 0x55b87fa03e9de330L,0x12b26066195c145bL,0xe08536e0a920bef0L,
+ 0x7bff6f2c4d195adcL,0x7f319e9d945f4187L,0xf9848863f892ce47L },
+ { 0xd0efc1d34fe37657L,0x3c58de825cf0e45aL,0x626ad21a8b0ccbbeL,
+ 0xd2a31208af952fc5L,0x81791995eb437357L,0x5f19d30f98e95d4fL } },
+ /* 207 */
+ { { 0x72e83d9a0e6865bbL,0x22f5af3bf63456a6L,0x409e9c73463c8d9eL,
+ 0x40e9e578dfe6970eL,0x876b6efa711b91caL,0x895512cf942625a3L },
+ { 0x84c8eda8cb4e462bL,0x84c0154a4412e7c8L,0x04325db1ceb7b71fL,
+ 0x1537dde366f70877L,0xf3a093991992b9acL,0xa7316606d498ae77L } },
+ /* 208 */
+ { { 0x13990d2fcad260f5L,0x76c3be29eec0e8c0L,0x7dc5bee00f7bd7d5L,
+ 0x9be167d2efebda4bL,0xcce3dde69122b87eL,0x75a28b0982b5415cL },
+ { 0xf6810bcde84607a6L,0xc6d581286f4dbf0dL,0xfead577d1b4dafebL,
+ 0x9bc440b2066b28ebL,0x53f1da978b17e84bL,0x0459504bcda9a575L } },
+ /* 209 */
+ { { 0x13e39a02329e5836L,0x2c9e7d51f717269dL,0xc5ac58d6f26c963bL,
+ 0x3b0c6c4379967bf5L,0x60bbea3f55908d9dL,0xd84811e7f07c9ad1L },
+ { 0xfe7609a75bd20e4aL,0xe4325dd20a70baa8L,0x3711f370b3600386L,
+ 0x97f9562fd0924302L,0x040dc0c34acc4436L,0xfd6d725cde79cdd4L } },
+ /* 210 */
+ { { 0xb3efd0e3cf13eafbL,0x21009cbb5aa0ae5fL,0xe480c55379022279L,
+ 0x755cf334b2fc9a6dL,0x8564a5bf07096ae7L,0xddd649d0bd238139L },
+ { 0xd0de10b18a045041L,0x6e05b413c957d572L,0x5c5ff8064e0fb25cL,
+ 0xd933179b641162fbL,0x42d48485e57439f9L,0x70c5bd0a8a8d72aaL } },
+ /* 211 */
+ { { 0xa767173897bdf646L,0xaa1485b4ab329f7cL,0xce3e11d6f8f25fdfL,
+ 0x76a3fc7ec6221824L,0x045f281ff3924740L,0x24557d4e96d13a9aL },
+ { 0x875c804bdd4c27cdL,0x11c5f0f40f5c7feaL,0xac8c880bdc55ff7eL,
+ 0x2acddec51103f101L,0x38341a21f99faa89L,0xc7b67a2cce9d6b57L } },
+ /* 212 */
+ { { 0x9a0d724f8e357586L,0x1d7f4ff5df648da0L,0x9c3e6c9bfdee62a5L,
+ 0x0499cef00389b372L,0xe904050d98eab879L,0xe8eef1b66c051617L },
+ { 0xebf5bfebc37e3ca9L,0x7c5e946da4e0b91dL,0x790973142c4bea28L,
+ 0x81f6c109ee67b2b7L,0xaf237d9bdafc5edeL,0xd2e602012abb04c7L } },
+ /* 213 */
+ { { 0x6156060c8a4f57bfL,0xf9758696ff11182aL,0x8336773c6296ef00L,
+ 0x9c054bceff666899L,0xd6a11611719cd11cL,0x9824a641dbe1acfaL },
+ { 0x0b7b7a5fba89fd01L,0xf8d3b809889f79d8L,0xc5e1ea08f578285cL,
+ 0x7ac74536ae6d8288L,0x5d37a2007521ef5fL,0x5ecc4184b260a25dL } },
+ /* 214 */
+ { { 0xddcebb19a708c8d3L,0xe63ed04fc63f81ecL,0xd045f5a011873f95L,
+ 0x3b5ad54479f276d5L,0x81272a3d425ae5b3L,0x8bfeb50110ce1605L },
+ { 0x4233809c888228bfL,0x4bd82acfb2aff7dfL,0x9c68f1800cbd4a7fL,
+ 0xfcd771246b44323dL,0x60c0fcf6891db957L,0xcfbb4d8904da8f7fL } },
+ /* 215 */
+ { { 0x9a6a5df93b26139aL,0x3e076a83b2cc7eb8L,0x47a8e82d5a964bcdL,
+ 0x8a4e2a39b9278d6bL,0x93506c98e4443549L,0x06497a8ff1e0d566L },
+ { 0x3dee8d992b1efa05L,0x2da63ca845393e33L,0xa4af7277cf0579adL,
+ 0xaf4b46393236d8eaL,0x6ccad95b32b617f5L,0xce76d8b8b88bb124L } },
+ /* 216 */
+ { { 0x63d2537a083843dcL,0x89eb35141e4153b4L,0x5175ebc4ea9afc94L,
+ 0x7a6525808ed1aed7L,0x67295611d85e8297L,0x8dd2d68bb584b73dL },
+ { 0x237139e60133c3a4L,0x9de838ab4bd278eaL,0xe829b072c062fcd9L,
+ 0x70730d4f63ba8706L,0x6080483fd3cd05ecL,0x872ab5b80c85f84dL } },
+ /* 217 */
+ { { 0xfc0776d3999d4d49L,0xa3eb59deec3f45e7L,0xbc990e440dae1fc1L,
+ 0x33596b1ea15371ffL,0xd447dcb29bc7ab25L,0xcd5b63e935979582L },
+ { 0xae3366fa77d1ff11L,0x59f28f05edee6903L,0x6f43fed1a4433bf2L,
+ 0x15409c9bdf9ce00eL,0x21b5cdedaca9c5dcL,0xf9f3359582d7bdb4L } },
+ /* 218 */
+ { { 0x959443789422c792L,0x239ea923c958b8bfL,0x4b61a247df076541L,
+ 0x4d29ce85bb9fc544L,0x9a692a670b424559L,0x6e0ca5a00e486900L },
+ { 0x6b79a78285b3beceL,0x41f35e39c61f9892L,0xff82099aae747f82L,
+ 0x58c8ae3fd0ca59d6L,0x4ac930e299406b5fL,0x2ce04eb99df24243L } },
+ /* 219 */
+ { { 0x4366b9941ac37b82L,0xff0c728d25b04d83L,0x1f55136119c47b7cL,
+ 0xdbf2d5edbeff13e7L,0xf78efd51e12a683dL,0x82cd85b9989cf9c4L },
+ { 0xe23c6db6e0cb5d37L,0x818aeebd72ee1a15L,0x8212aafd28771b14L,
+ 0x7bc221d91def817dL,0xdac403a29445c51fL,0x711b051712c3746bL } },
+ /* 220 */
+ { { 0x0ed9ed485ea99eccL,0xf799500db8cab5e1L,0xa8ec87dcb570cbdcL,
+ 0x52cfb2c2d35dfaecL,0x8d31fae26e4d80a4L,0xe6a37dc9dcdeabe5L },
+ { 0x5d365a341deca452L,0x09a5f8a50d68b44eL,0x59238ea5a60744b1L,
+ 0xf2fedc0dbb4249e9L,0xe395c74ea909b2e3L,0xe156d1a539388250L } },
+ /* 221 */
+ { { 0xd796b3d047181ae9L,0xbaf44ba844197808L,0xe693309434cf3facL,
+ 0x41aa6adec3bd5c46L,0x4fda75d8eed947c6L,0xacd9d4129ea5a525L },
+ { 0x65cc55a3d430301bL,0x3c9a5bcf7b52ea49L,0x22d319cf159507f0L,
+ 0x2ee0b9b5de74a8ddL,0x20c26a1e877ac2b6L,0x387d73da92e7c314L } },
+ /* 222 */
+ { { 0x13c4833e8cd3fdacL,0x76fcd473332e5b8eL,0xff671b4be2fe1fd3L,
+ 0x4d734e8b5d98d8ecL,0xb1ead3c6514bbc11L,0xd14ca8587b390494L },
+ { 0x95a443af5d2d37e9L,0x73c6ea7300464622L,0xa44aeb4b15755044L,
+ 0xba3f8575fab58feeL,0x9779dbc9dc680a6fL,0xe1ee5f5a7b37ddfcL } },
+ /* 223 */
+ { { 0xcd0b464812d29f46L,0x93295b0b0ed53137L,0xbfe2609480bef6c9L,
+ 0xa656578854248b00L,0x69c43fca80e7f9c4L,0x2190837bbe141ea1L },
+ { 0x875e159aa1b26cfbL,0x90ca9f877affe852L,0x15e6550d92ca598eL,
+ 0xe3e0945d1938ad11L,0xef7636bb366ef937L,0xb6034d0bb39869e5L } },
+ /* 224 */
+ { { 0x4d255e3026d8356eL,0xf83666edd314626fL,0x421ddf61d0c8ed64L,
+ 0x96e473c526677b61L,0xdad4af7e9e9b18b3L,0xfceffd4aa9393f75L },
+ { 0x843138a111c731d5L,0x05bcb3a1b2f141d9L,0x20e1fa95617b7671L,
+ 0xbefce81288ccec7bL,0x582073dc90f1b568L,0xf572261a1f055cb7L } },
+ /* 225 */
+ { { 0xf314827736973088L,0xc008e70886a9f980L,0x1b795947e046c261L,
+ 0xdf1e6a7dca76bca0L,0xabafd88671acddf0L,0xff7054d91364d8f4L },
+ { 0x2cf63547e2260594L,0x468a5372d73b277eL,0xc7419e24ef9bd35eL,
+ 0x2b4a1c2024043cc3L,0xa28f047a890b39cdL,0xdca2cea146f9a2e3L } },
+ /* 226 */
+ { { 0xab78873653277538L,0xa734e225cf697738L,0x66ee1d1e6b22e2c1L,
+ 0x2c615389ebe1d212L,0xf36cad4002bb0766L,0x120885c33e64f207L },
+ { 0x59e77d5690fbfec2L,0xf9e781aad7a574aeL,0x801410b05d045e53L,
+ 0xd3b5f0aaa91b5f0eL,0xb3d1df007fbb3521L,0x11c4b33ec72bee9aL } },
+ /* 227 */
+ { { 0xd32b983283c3a7f3L,0x8083abcf88d8a354L,0xdeb1640450f4ec5aL,
+ 0x18d747f0641e2907L,0x4e8978aef1bbf03eL,0x932447dc88a0cd89L },
+ { 0x561e0febcf3d5897L,0xfc3a682f13600e6dL,0xc78b9d73d16a6b73L,
+ 0xe713feded29bf580L,0x0a22522308d69e5cL,0x3a924a571ff7fda4L } },
+ /* 228 */
+ { { 0xfb64554cb4093beeL,0xa6d65a25a58c6ec0L,0x4126994d43d0ed37L,
+ 0xa5689a5155152d44L,0xb8e5ea8c284caa8dL,0x33f05d4fd1f25538L },
+ { 0xe0fdfe091b615d6eL,0x2ded7e8f705507daL,0xdd5631e517bbcc80L,
+ 0x4f87453e267fd11fL,0xc6da723fff89d62dL,0x55cbcae2e3cda21dL } },
+ /* 229 */
+ { { 0x336bc94e6b4e84f3L,0x728630314ef72c35L,0x6d85fdeeeeb57f99L,
+ 0x7f4e3272a42ece1bL,0x7f86cbb536f0320aL,0xf09b6a2b923331e6L },
+ { 0x21d3ecf156778435L,0x2977ba998323b2d2L,0x6a1b57fb1704bc0fL,
+ 0xd777cf8b389f048aL,0x9ce2174fac6b42cdL,0x404e2bff09e6c55aL } },
+ /* 230 */
+ { { 0x9b9b135e204c5ddbL,0x9dbfe0443eff550eL,0x35eab4bfec3be0f6L,
+ 0x8b4c3f0d0a43e56fL,0x4c1c66730e73f9b3L,0x92ed38bd2c78c905L },
+ { 0xc7003f6aa386e27cL,0xb9c4f46faced8507L,0xea024ec859df5464L,
+ 0x4af96152429572eaL,0x279cd5e2e1fc1194L,0xaa376a03281e358cL } },
+ /* 231 */
+ { { 0x078592233cdbc95cL,0xaae1aa6aef2e337aL,0xc040108d472a8544L,
+ 0x80c853e68d037b7dL,0xd221315c8c7eee24L,0x195d38568ee47752L },
+ { 0xd4b1ba03dacd7fbeL,0x4b5ac61ed3e0c52bL,0x68d3c0526aab7b52L,
+ 0xf0d7248c660e3feaL,0xafdb3f893145efb4L,0xa73fd9a38f40936dL } },
+ /* 232 */
+ { { 0x891b9ef3bb1b17ceL,0x14023667c6127f31L,0x12b2e58d305521fdL,
+ 0x3a47e449e3508088L,0xe49fc84bff751507L,0x4023f7225310d16eL },
+ { 0xa608e5edb73399faL,0xf12632d8d532aa3eL,0x13a2758e845e8415L,
+ 0xae4b6f851fc2d861L,0x3879f5b1339d02f2L,0x446d22a680d99ebdL } },
+ /* 233 */
+ { { 0x0f5023024be164f1L,0x8d09d2d688b81920L,0x514056f1984aceffL,
+ 0xa5c4ddf075e9e80dL,0x38cb47e6df496a93L,0x899e1d6b38df6bf7L },
+ { 0x69e87e88b59eb2a6L,0x280d9d639b47f38bL,0x599411ea3654e955L,
+ 0xcf8dd4fd969aa581L,0xff5c2baf530742a7L,0xa43915361a373085L } },
+ /* 234 */
+ { { 0x6ace72a3a8a4bdd2L,0xc656cdd1b68ef702L,0xd4a33e7e90c4dad8L,
+ 0x4aece08a9d951c50L,0xea8005ae085d68e6L,0xfdd7a7d76f7502b8L },
+ { 0xce6fb0a698d6fa45L,0x228f86721104eb8cL,0xd23d8787da09d7dcL,
+ 0x5521428b2ae93065L,0x95faba3dea56c366L,0xedbe50390a88aca5L } },
+ /* 235 */
+ { { 0xd64da0adbfb26c82L,0xe5d70b3c952c2f9cL,0xf5e8f365f7e77f68L,
+ 0x7234e00208f2d695L,0xfaf900eed12e7be6L,0x27dc69344acf734eL },
+ { 0x80e4ff5ec260a46aL,0x7da5ebce2dc31c28L,0x485c5d73ca69f552L,
+ 0xcdfb6b2969cc84c2L,0x031c5afeed6d4ecaL,0xc7bbf4c822247637L } },
+ /* 236 */
+ { { 0x9d5b72c749fe01b2L,0x34785186793a91b8L,0xa3ba3c54cf460438L,
+ 0x73e8e43d3ab21b6fL,0x50cde8e0be57b8abL,0x6488b3a7dd204264L },
+ { 0xa9e398b3dddc4582L,0x1698c1a95bec46feL,0x7f1446ef156d3843L,
+ 0x3fd25dd8770329a2L,0x05b1221a2c710668L,0x65b2dc2aa72ee6cfL } },
+ /* 237 */
+ { { 0x21a885f7cd021d63L,0x3f344b15fea61f08L,0xad5ba6ddc5cf73e6L,
+ 0x154d0d8f227a8b23L,0x9b74373cdc559311L,0x4feab71598620fa1L },
+ { 0x5098938e7d9ec924L,0x84d54a5e6d47e550L,0x1a2d1bdc1b617506L,
+ 0x99fe1782615868a4L,0x171da7803005a924L,0xa70bf5ed7d8f79b6L } },
+ /* 238 */
+ { { 0x0bc1250dfe2216c5L,0x2c37e2507601b351L,0xb6300175d6f06b7eL,
+ 0x4dde8ca18bfeb9b7L,0x4f210432b82f843dL,0x8d70e2f9b1ac0afdL },
+ { 0x25c73b78aae91abbL,0x0230dca3863028f2L,0x8b923ecfe5cf30b7L,
+ 0xed754ec25506f265L,0x8e41b88c729a5e39L,0xee67cec2babf889bL } },
+ /* 239 */
+ { { 0xe183acf51be46c65L,0x9789538fe7565d7aL,0x87873391d9627b4eL,
+ 0xbf4ac4c19f1d9187L,0x5db99f634691f5c8L,0xa68df80374a1fb98L },
+ { 0x3c448ed1bf92b5faL,0xa098c8413e0bdc32L,0x8e74cd5579bf016cL,
+ 0x5df0d09c115e244dL,0x9418ad013410b66eL,0x8b6124cb17a02130L } },
+ /* 240 */
+ { { 0x425ec3afc26e3392L,0xc07f8470a1722e00L,0xdcc28190e2356b43L,
+ 0x4ed97dffb1ef59a6L,0xc22b3ad1c63028c1L,0x070723c268c18988L },
+ { 0x70da302f4cf49e7dL,0xc5e87c933f12a522L,0x74acdd1d18594148L,
+ 0xad5f73abca74124cL,0xe72e4a3ed69fd478L,0x615938687b117cc3L } },
+ /* 241 */
+ { { 0x7b7b9577a9aa0486L,0x6e41fb35a063d557L,0xb017d5c7da9047d7L,
+ 0x8c74828068a87ba9L,0xab45fa5cdf08ad93L,0xcd9fb2174c288a28L },
+ { 0x595446425747843dL,0x34d64c6ca56111e3L,0x12e47ea14bfce8d5L,
+ 0x17740e056169267fL,0x5c49438eeed03fb5L,0x9da30add4fc3f513L } },
+ /* 242 */
+ { { 0xc4e85282ccfa5200L,0x2707608f6a19b13dL,0xdcb9a53df5726e2fL,
+ 0x612407c9e9427de5L,0x3e5a17e1d54d582aL,0xb99877de655ae118L },
+ { 0x6f0e972b015254deL,0x92a56db1f0a6f7c5L,0xd297e4e1a656f8b2L,
+ 0x99fe0052ad981983L,0xd3652d2f07cfed84L,0xc784352e843c1738L } },
+ /* 243 */
+ { { 0x6ee90af07e9b2d8aL,0xac8d701857cf1964L,0xf6ed903171f28efcL,
+ 0x7f70d5a96812b20eL,0x27b557f4f1c61eeeL,0xf1c9bd57c6263758L },
+ { 0x5cf7d0142a1a6194L,0xdd614e0b1890ab84L,0x3ef9de100e93c2a6L,
+ 0xf98cf575e0cd91c5L,0x504ec0c614befc32L,0xd0513a666279d68cL } },
+ /* 244 */
+ { { 0xa8eadbada859fb6aL,0xcf8346e7db283666L,0x7b35e61a3e22e355L,
+ 0x293ece2c99639c6bL,0xfa0162e256f241c8L,0xd2e6c7b9bf7a1ddaL },
+ { 0xd0de625340075e63L,0x2405aa61f9ec8286L,0x2237830a8fe45494L,
+ 0x4fd01ac7364e9c8cL,0x4d9c3d21904ba750L,0xd589be14af1b520bL } },
+ /* 245 */
+ { { 0x13576a4f4662e53bL,0x35ec2f51f9077676L,0x66297d1397c0af97L,
+ 0xed3201fe9e598b58L,0x49bc752a5e70f604L,0xb54af535bb12d951L },
+ { 0x36ea4c2b212c1c76L,0x18f5bbc7eb250dfdL,0xa0d466cc9a0a1a46L,
+ 0x52564da4dac2d917L,0x206559f48e95fab5L,0x7487c1909ca67a33L } },
+ /* 246 */
+ { { 0x75abfe37dde98e9cL,0x99b90b262a411199L,0x1b410996dcdb1f7cL,
+ 0xab346f118b3b5675L,0x04852193f1f8ae1eL,0x1ec4d2276b8b98c1L },
+ { 0xba3bc92645452baaL,0x387d1858acc4a572L,0x9478eff6e51f171eL,
+ 0xf357077d931e1c00L,0xffee77cde54c8ca8L,0xfb4892ff551dc9a4L } },
+ /* 247 */
+ { { 0x5b1bdad02db8dff8L,0xd462f4fd5a2285a2L,0x1d6aad8eda00b461L,
+ 0x43fbefcf41306d1bL,0x428e86f36a13fe19L,0xc8b2f11817f89404L },
+ { 0x762528aaf0d51afbL,0xa3e2fea4549b1d06L,0x86fad8f2ea3ddf66L,
+ 0x0d9ccc4b4fbdd206L,0xcde97d4cc189ff5aL,0xc36793d6199f19a6L } },
+ /* 248 */
+ { { 0xea38909b51b85197L,0xffb17dd0b4c92895L,0x0eb0878b1ddb3f3fL,
+ 0xb05d28ffc57cf0f2L,0xd8bde2e71abd57e2L,0x7f2be28dc40c1b20L },
+ { 0x6554dca2299a2d48L,0x5130ba2e8377982dL,0x8863205f1071971aL,
+ 0x15ee62827cf2825dL,0xd4b6c57f03748f2bL,0xa9e3f4da430385a0L } },
+ /* 249 */
+ { { 0x33eb7cec83fbc9c6L,0x24a311c74541777eL,0xc81377f74f0767fcL,
+ 0x12adae364ab702daL,0xb7fcb6db2a779696L,0x4a6fb28401cea6adL },
+ { 0x5e8b1d2acdfc73deL,0xd0efae8d1b02fd32L,0x3f99c190d81d8519L,
+ 0x3c18f7fafc808971L,0x41f713e751b7ae7bL,0x0a4b3435f07fc3f8L } },
+ /* 250 */
+ { { 0x7dda3c4c019b7d2eL,0x631c8d1ad4dc4b89L,0x5489cd6e1cdb313cL,
+ 0xd44aed104c07bb06L,0x8f97e13a75f000d1L,0x0e9ee64fdda5df4dL },
+ { 0xeaa99f3b3e346910L,0x622f6921fa294ad7L,0x22aaa20d0d0b2fe9L,
+ 0x4fed2f991e5881baL,0x9af3b2d6c1571802L,0x919e67a8dc7ee17cL } },
+ /* 251 */
+ { { 0xc724fe4c76250533L,0x8a2080e57d817ef8L,0xa2afb0f4172c9751L,
+ 0x9b10cdeb17c0702eL,0xbf3975e3c9b7e3e9L,0x206117df1cd0cdc5L },
+ { 0xfb049e61be05ebd5L,0xeb0bb55c16c782c0L,0x13a331b8ab7fed09L,
+ 0xf6c58b1d632863f0L,0x6264ef6e4d3b6195L,0x92c51b639a53f116L } },
+ /* 252 */
+ { { 0xa57c7bc8288b364dL,0x4a562e087b41e5c4L,0x699d21c6698a9a11L,
+ 0xa4ed9581f3f849b9L,0xa223eef39eb726baL,0x13159c23cc2884f9L },
+ { 0x73931e583a3f4963L,0x965003890ada6a81L,0x3ee8a1c65ab2950bL,
+ 0xeedf4949775fab52L,0x63d652e14f2671b6L,0xfed4491c3c4e2f55L } },
+ /* 253 */
+ { { 0x335eadc3f4eb453eL,0x5ff74b63cadd1a5bL,0x6933d0d75d84a91aL,
+ 0x9ca3eeb9b49ba337L,0x1f6faccec04c15b8L,0x4ef19326dc09a7e4L },
+ { 0x53d2d3243dca3233L,0x0ee40590a2259d4bL,0x18c22edb5546f002L,
+ 0x9242980109ea6b71L,0xaada0addb0e91e61L,0x5fe53ef499963c50L } },
+ /* 254 */
+ { { 0x372dd06b90c28c65L,0x1765242c119ce47dL,0xc041fb806b22fc82L,
+ 0x667edf07b0a7ccc1L,0xc79599e71261beceL,0xbc69d9ba19cff22aL },
+ { 0x009d77cd13c06819L,0x635a66aee282b79dL,0x4edac4a6225b1be8L,
+ 0x57d4f4e4524008f9L,0xee299ac5b056af84L,0xcc38444c3a0bc386L } },
+ /* 255 */
+ { { 0x490643b1cd4c2356L,0x740a4851750547beL,0x643eaf29d4944c04L,
+ 0xba572479299a98a0L,0x48b29f16ee05fdf9L,0x33fb4f61089b2d7bL },
+ { 0x86704902a950f955L,0x97e1034dfedc3ddfL,0x211320b605fbb6a2L,
+ 0x23d7b93f432299bbL,0x1fe1a0578590e4a3L,0x8e1d0586f58c0ce6L } },
+};
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_6(sp_point_384* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_384_ecc_mulmod_stripe_6(r, &p384_base, p384_table,
+ k, map, heap);
+}
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[6];
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_384_point_new_6(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 6, km);
+
+ err = sp_384_ecc_mulmod_base_6(point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_6(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_6(point, 0, heap);
+
+ return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_384_iszero_6(const sp_digit* a)
+{
+ return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+/* Add 1 to a. (a = a + 1)
+ *
+ * a A single precision integer.
+ */
+static void sp_384_add_one_6(sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "ldp x1, x2, [%[a], 0]\n\t"
+ "adds x1, x1, #1\n\t"
+ "ldr x3, [%[a], 16]\n\t"
+ "adcs x2, x2, xzr\n\t"
+ "ldr x4, [%[a], 24]\n\t"
+ "adcs x3, x3, xzr\n\t"
+ "stp x1, x2, [%[a], 0]\n\t"
+ "adcs x4, x4, xzr\n\t"
+ "stp x3, x4, [%[a], 16]\n\t"
+ "ldp x1, x2, [%[a], 32]\n\t"
+ "adcs x1, x1, xzr\n\t"
+ "adcs x2, x2, xzr\n\t"
+ "stp x1, x2, [%[a], 32]\n\t"
+ :
+ : [a] "r" (a)
+ : "memory", "x1", "x2", "x3", "x4"
+ );
+}
+
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j;
+ byte* d;
+
+ for (i = n - 1,j = 0; i >= 7; i -= 8) {
+ r[j] = ((sp_digit)a[i - 0] << 0) |
+ ((sp_digit)a[i - 1] << 8) |
+ ((sp_digit)a[i - 2] << 16) |
+ ((sp_digit)a[i - 3] << 24) |
+ ((sp_digit)a[i - 4] << 32) |
+ ((sp_digit)a[i - 5] << 40) |
+ ((sp_digit)a[i - 6] << 48) |
+ ((sp_digit)a[i - 7] << 56);
+ j++;
+ }
+
+ if (i >= 0) {
+ r[j] = 0;
+
+ d = (byte*)r;
+ switch (i) {
+ case 6: d[n - 1 - 6] = a[6]; //fallthrough
+ case 5: d[n - 1 - 5] = a[5]; //fallthrough
+ case 4: d[n - 1 - 4] = a[4]; //fallthrough
+ case 3: d[n - 1 - 3] = a[3]; //fallthrough
+ case 2: d[n - 1 - 2] = a[2]; //fallthrough
+ case 1: d[n - 1 - 1] = a[1]; //fallthrough
+ case 0: d[n - 1 - 0] = a[0]; //fallthrough
+ }
+ j++;
+ }
+
+ for (; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng Random number generator.
+ * k Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_384_ecc_gen_k_6(WC_RNG* rng, sp_digit* k)
+{
+ int err;
+ byte buf[48];
+
+ do {
+ err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+ if (err == 0) {
+ sp_384_from_bin(k, 6, buf, (int)sizeof(buf));
+ if (sp_384_cmp_6(k, p384_order2) < 0) {
+ sp_384_add_one_6(k);
+ break;
+ }
+ }
+ }
+ while (err == 0);
+
+ return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng Random number generator.
+ * priv Generated private value.
+ * pub Generated public point.
+ * heap Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[6];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_384 inf;
+#endif
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_384* infinity;
+#endif
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_6(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_6(heap, inf, infinity);
+ }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_gen_k_6(rng, k);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_base_6(point, k, 1, NULL);
+ }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_6(infinity, point, p384_order, 1, NULL);
+ }
+ if (err == MP_OKAY) {
+ if ((sp_384_iszero_6(point->x) == 0) || (sp_384_iszero_6(point->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(k, priv);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_6(point, pub);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_384_point_free_6(infinity, 1, heap);
+#endif
+ sp_384_point_free_6(point, 1, heap);
+
+ return err;
+}
+
+#ifdef HAVE_ECC_DHE
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 48
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_384_to_bin(sp_digit* r, byte* a)
+{
+ int i, j;
+
+ for (i = 5, j = 0; i >= 0; i--) {
+ a[j++] = r[i] >> 56;
+ a[j++] = r[i] >> 48;
+ a[j++] = r[i] >> 40;
+ a[j++] = r[i] >> 32;
+ a[j++] = r[i] >> 24;
+ a[j++] = r[i] >> 16;
+ a[j++] = r[i] >> 8;
+ a[j++] = r[i] >> 0;
+ }
+}
+
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv Scalar to multiply the point by.
+ * pub Point to multiply.
+ * out Buffer to hold X ordinate.
+ * outLen On entry, size of the buffer in bytes.
+ * On exit, length of data in buffer in bytes.
+ * heap Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out,
+ word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[6];
+#endif
+ sp_point_384* point = NULL;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ if (*outLen < 48U) {
+ err = BUFFER_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_6(heap, p, point);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 6, priv);
+ sp_384_point_from_ecc_point_6(point, pub);
+ err = sp_384_ecc_mulmod_6(point, point, k, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ sp_384_to_bin(point->x, out);
+ *outLen = 48;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_6(point, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+static sp_digit sp_384_sub_in_place_6(sp_digit* a, const sp_digit* b)
+{
+ __asm__ __volatile__ (
+ "ldp x2, x3, [%[a], 0]\n\t"
+ "ldp x6, x7, [%[b], 0]\n\t"
+ "subs x2, x2, x6\n\t"
+ "ldp x4, x5, [%[a], 16]\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "ldp x8, x9, [%[b], 16]\n\t"
+ "sbcs x4, x4, x8\n\t"
+ "stp x2, x3, [%[a], 0]\n\t"
+ "sbcs x5, x5, x9\n\t"
+ "stp x4, x5, [%[a], 16]\n\t"
+ "ldr x2, [%[a], 32]\n\t"
+ "ldr x3, [%[a], 40]\n\t"
+ "ldr x6, [%[b], 32]\n\t"
+ "ldr x7, [%[b], 40]\n\t"
+ "sbcs x2, x2, x6\n\t"
+ "sbcs x3, x3, x7\n\t"
+ "str x2, [%[a], 32]\n\t"
+ "str x3, [%[a], 40]\n\t"
+ "csetm %[a], cc\n\t"
+ : [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+
+ return (sp_digit)a;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+static void sp_384_mul_d_6(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ __asm__ __volatile__ (
+ "# A[0] * B\n\t"
+ "ldr x8, [%[a]]\n\t"
+ "mul x5, %[b], x8\n\t"
+ "umulh x3, %[b], x8\n\t"
+ "mov x4, 0\n\t"
+ "str x5, [%[r]]\n\t"
+ "mov x5, 0\n\t"
+ "mov x9, #8\n\t"
+ "1:\n\t"
+ "ldr x8, [%[a], x9]\n\t"
+ "mul x6, %[b], x8\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adds x3, x3, x6\n\t"
+ "adcs x4, x4, x7\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "str x3, [%[r], x9]\n\t"
+ "mov x3, x4\n\t"
+ "mov x4, x5\n\t"
+ "mov x5, #0\n\t"
+ "add x9, x9, #8\n\t"
+ "cmp x9, 48\n\t"
+ "b.lt 1b\n\t"
+ "str x3, [%[r], 48]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+#else
+ __asm__ __volatile__ (
+ "# A[0] * B\n\t"
+ "ldp x8, x9, [%[a]]\n\t"
+ "mul x3, %[b], x8\n\t"
+ "umulh x4, %[b], x8\n\t"
+ "mov x5, 0\n\t"
+ "# A[1] * B\n\t"
+ "str x3, [%[r]]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[2] * B\n\t"
+ "ldp x8, x9, [%[a], 16]\n\t"
+ "str x4, [%[r], 8]\n\t"
+ "mov x4, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "# A[3] * B\n\t"
+ "str x5, [%[r], 16]\n\t"
+ "mov x5, 0\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x3, x3, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x4, xzr, xzr\n\t"
+ "adds x3, x3, x6\n\t"
+ "# A[4] * B\n\t"
+ "ldp x8, x9, [%[a], 32]\n\t"
+ "str x3, [%[r], 24]\n\t"
+ "mov x3, 0\n\t"
+ "mul x6, %[b], x8\n\t"
+ "adcs x4, x4, x7\n\t"
+ "umulh x7, %[b], x8\n\t"
+ "adc x5, xzr, xzr\n\t"
+ "adds x4, x4, x6\n\t"
+ "# A[5] * B\n\t"
+ "str x4, [%[r], 32]\n\t"
+ "mul x6, %[b], x9\n\t"
+ "adcs x5, x5, x7\n\t"
+ "umulh x7, %[b], x9\n\t"
+ "adc x3, xzr, xzr\n\t"
+ "adds x5, x5, x6\n\t"
+ "adc x3, x3, x7\n\t"
+ "stp x5, x3, [%[r], 40]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9"
+ );
+#endif
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ */
+static sp_digit div_384_word_6(sp_digit d1, sp_digit d0, sp_digit div)
+{
+ sp_digit r;
+
+ __asm__ __volatile__ (
+ "lsr x5, %[div], 32\n\t"
+ "add x5, x5, 1\n\t"
+
+ "udiv x3, %[d1], x5\n\t"
+ "lsl x6, x3, 32\n\t"
+ "mul x4, %[div], x6\n\t"
+ "umulh x3, %[div], x6\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "udiv x3, %[d1], x5\n\t"
+ "lsl x3, x3, 32\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "umulh x3, %[div], x3\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "lsr x3, %[d0], 32\n\t"
+ "orr x3, x3, %[d1], lsl 32\n\t"
+
+ "udiv x3, x3, x5\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "umulh x3, %[div], x3\n\t"
+ "subs %[d0], %[d0], x4\n\t"
+ "sbc %[d1], %[d1], x3\n\t"
+
+ "lsr x3, %[d0], 32\n\t"
+ "orr x3, x3, %[d1], lsl 32\n\t"
+
+ "udiv x3, x3, x5\n\t"
+ "add x6, x6, x3\n\t"
+ "mul x4, %[div], x3\n\t"
+ "sub %[d0], %[d0], x4\n\t"
+
+ "udiv x3, %[d0], %[div]\n\t"
+ "add %[r], x6, x3\n\t"
+
+ : [r] "=r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "x3", "x4", "x5", "x6"
+ );
+
+ return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_384_mask_6(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<6; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ r[0] = a[0] & m;
+ r[1] = a[1] & m;
+ r[2] = a[2] & m;
+ r[3] = a[3] & m;
+ r[4] = a[4] & m;
+ r[5] = a[5] & m;
+#endif
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_384_div_6(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[12], t2[7];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[5];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 6);
+ for (i=5; i>=0; i--) {
+ r1 = div_384_word_6(t1[6 + i], t1[6 + i - 1], div);
+
+ sp_384_mul_d_6(t2, d, r1);
+ t1[6 + i] += sp_384_sub_in_place_6(&t1[i], t2);
+ t1[6 + i] -= t2[6];
+ sp_384_mask_6(t2, d, t1[6 + i]);
+ t1[6 + i] += sp_384_add_6(&t1[i], &t1[i], t2);
+ sp_384_mask_6(t2, d, t1[6 + i]);
+ t1[6 + i] += sp_384_add_6(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_384_cmp_6(t1, d) >= 0;
+ sp_384_cond_sub_6(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_384_mod_6(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_384_div_6(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P384 curve. */
+static const uint64_t p384_order_minus_2[6] = {
+ 0xecec196accc52971U,0x581a0db248b0a77aU,0xc7634d81f4372ddfU,
+ 0xffffffffffffffffU,0xffffffffffffffffU,0xffffffffffffffffU
+};
+#else
+/* The low half of the order-2 of the P384 curve. */
+static const uint64_t p384_order_low[3] = {
+ 0xecec196accc52971U,0x581a0db248b0a77aU,0xc7634d81f4372ddfU
+
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P384 curve. (r = a * b mod order)
+ *
+ * r Result of the multiplication.
+ * a First operand of the multiplication.
+ * b Second operand of the multiplication.
+ */
+static void sp_384_mont_mul_order_6(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_384_mul_6(r, a, b);
+ sp_384_mont_reduce_order_6(r, p384_order, p384_mp_order);
+}
+
+/* Square number mod the order of P384 curve. (r = a * a mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_384_mont_sqr_order_6(sp_digit* r, const sp_digit* a)
+{
+ sp_384_sqr_6(r, a);
+ sp_384_mont_reduce_order_6(r, p384_order, p384_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P384 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_384_mont_sqr_n_order_6(sp_digit* r, const sp_digit* a, int n)
+{
+ int i;
+
+ sp_384_mont_sqr_order_6(r, a);
+ for (i=1; i<n; i++) {
+ sp_384_mont_sqr_order_6(r, r);
+ }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P384 curve.
+ * (r = 1 / a mod order)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_384_mont_inv_order_6(sp_digit* r, const sp_digit* a,
+ sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 6);
+ for (i=382; i>=0; i--) {
+ sp_384_mont_sqr_order_6(t, t);
+ if ((p384_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_384_mont_mul_order_6(t, t, a);
+ }
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 6U);
+#else
+ sp_digit* t = td;
+ sp_digit* t2 = td + 2 * 6;
+ sp_digit* t3 = td + 4 * 6;
+ int i;
+
+ /* t = a^2 */
+ sp_384_mont_sqr_order_6(t, a);
+ /* t = a^3 = t * a */
+ sp_384_mont_mul_order_6(t, t, a);
+ /* t2= a^c = t ^ 2 ^ 2 */
+ sp_384_mont_sqr_n_order_6(t2, t, 2);
+ /* t = a^f = t2 * t */
+ sp_384_mont_mul_order_6(t, t2, t);
+ /* t2= a^f0 = t ^ 2 ^ 4 */
+ sp_384_mont_sqr_n_order_6(t2, t, 4);
+ /* t = a^ff = t2 * t */
+ sp_384_mont_mul_order_6(t, t2, t);
+ /* t2= a^ff00 = t ^ 2 ^ 8 */
+ sp_384_mont_sqr_n_order_6(t2, t, 8);
+ /* t3= a^ffff = t2 * t */
+ sp_384_mont_mul_order_6(t3, t2, t);
+ /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */
+ sp_384_mont_sqr_n_order_6(t2, t3, 16);
+ /* t = a^ffffffff = t2 * t3 */
+ sp_384_mont_mul_order_6(t, t2, t3);
+ /* t2= a^ffffffff0000 = t ^ 2 ^ 16 */
+ sp_384_mont_sqr_n_order_6(t2, t, 16);
+ /* t = a^ffffffffffff = t2 * t3 */
+ sp_384_mont_mul_order_6(t, t2, t3);
+ /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48 */
+ sp_384_mont_sqr_n_order_6(t2, t, 48);
+ /* t= a^fffffffffffffffffffffffff = t2 * t */
+ sp_384_mont_mul_order_6(t, t2, t);
+ /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */
+ sp_384_mont_sqr_n_order_6(t2, t, 96);
+ /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */
+ sp_384_mont_mul_order_6(t2, t2, t);
+ for (i=191; i>=1; i--) {
+ sp_384_mont_sqr_order_6(t2, t2);
+ if (((sp_digit)p384_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_384_mont_mul_order_6(t2, t2, a);
+ }
+ }
+ sp_384_mont_sqr_order_6(t2, t2);
+ sp_384_mont_mul_order_6(r, t2, a);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN 64
+#endif
+
+/* Sign the hash using the private key.
+ * e = [hash, 384 bits] from binary
+ * r = (k.G)->x mod order
+ * s = (r * x + e) / k mod order
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+ mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit ed[2*6];
+ sp_digit xd[2*6];
+ sp_digit kd[2*6];
+ sp_digit rd[2*6];
+ sp_digit td[3 * 2*6];
+ sp_point_384 p;
+#endif
+ sp_digit* e = NULL;
+ sp_digit* x = NULL;
+ sp_digit* k = NULL;
+ sp_digit* r = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_384* point = NULL;
+ sp_digit carry;
+ sp_digit* s = NULL;
+ sp_digit* kInv = NULL;
+ int err = MP_OKAY;
+ int64_t c;
+ int i;
+
+ (void)heap;
+
+ err = sp_384_point_new_6(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ e = d + 0 * 6;
+ x = d + 2 * 6;
+ k = d + 4 * 6;
+ r = d + 6 * 6;
+ tmp = d + 8 * 6;
+#else
+ e = ed;
+ x = xd;
+ k = kd;
+ r = rd;
+ tmp = td;
+#endif
+ s = e;
+ kInv = k;
+
+ if (hashLen > 48U) {
+ hashLen = 48U;
+ }
+
+ sp_384_from_bin(e, 6, hash, (int)hashLen);
+ }
+
+ for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+ sp_384_from_mp(x, 6, priv);
+
+ /* New random point. */
+ if (km == NULL || mp_iszero(km)) {
+ err = sp_384_ecc_gen_k_6(rng, k);
+ }
+ else {
+ sp_384_from_mp(k, 6, km);
+ mp_zero(km);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_base_6(point, k, 1, NULL);
+ }
+
+ if (err == MP_OKAY) {
+ /* r = point->x mod order */
+ XMEMCPY(r, point->x, sizeof(sp_digit) * 6U);
+ sp_384_norm_6(r);
+ c = sp_384_cmp_6(r, p384_order);
+ sp_384_cond_sub_6(r, r, p384_order, 0L - (sp_digit)(c >= 0));
+ sp_384_norm_6(r);
+
+ /* Conv k to Montgomery form (mod order) */
+ sp_384_mul_6(k, k, p384_norm_order);
+ err = sp_384_mod_6(k, k, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_6(k);
+ /* kInv = 1/k mod order */
+ sp_384_mont_inv_order_6(kInv, k, tmp);
+ sp_384_norm_6(kInv);
+
+ /* s = r * x + e */
+ sp_384_mul_6(x, x, r);
+ err = sp_384_mod_6(x, x, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_6(x);
+ carry = sp_384_add_6(s, e, x);
+ sp_384_cond_sub_6(s, s, p384_order, 0 - carry);
+ sp_384_norm_6(s);
+ c = sp_384_cmp_6(s, p384_order);
+ sp_384_cond_sub_6(s, s, p384_order, 0L - (sp_digit)(c >= 0));
+ sp_384_norm_6(s);
+
+ /* s = s * k^-1 mod order */
+ sp_384_mont_mul_order_6(s, s, kInv);
+ sp_384_norm_6(s);
+
+ /* Check that signature is usable. */
+ if (sp_384_iszero_6(s) == 0) {
+ break;
+ }
+ }
+ }
+
+ if (i == 0) {
+ err = RNG_FAILURE_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(r, rm);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(s, sm);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 8 * 6);
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 2U * 6U);
+ XMEMSET(x, 0, sizeof(sp_digit) * 2U * 6U);
+ XMEMSET(k, 0, sizeof(sp_digit) * 2U * 6U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 6U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 6U);
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 6U);
+#endif
+ sp_384_point_free_6(point, 1, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ * e = Truncate(hash, 384)
+ * u1 = e/s mod order
+ * u2 = r/s mod order
+ * r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX,
+ mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit u1d[2*6];
+ sp_digit u2d[2*6];
+ sp_digit sd[2*6];
+ sp_digit tmpd[2*6 * 5];
+ sp_point_384 p1d;
+ sp_point_384 p2d;
+#endif
+ sp_digit* u1 = NULL;
+ sp_digit* u2 = NULL;
+ sp_digit* s = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_384* p1;
+ sp_point_384* p2 = NULL;
+ sp_digit carry;
+ int64_t c;
+ int err;
+
+ err = sp_384_point_new_6(heap, p1d, p1);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_6(heap, p2d, p2);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ u1 = d + 0 * 6;
+ u2 = d + 2 * 6;
+ s = d + 4 * 6;
+ tmp = d + 6 * 6;
+#else
+ u1 = u1d;
+ u2 = u2d;
+ s = sd;
+ tmp = tmpd;
+#endif
+
+ if (hashLen > 48U) {
+ hashLen = 48U;
+ }
+
+ sp_384_from_bin(u1, 6, hash, (int)hashLen);
+ sp_384_from_mp(u2, 6, r);
+ sp_384_from_mp(s, 6, sm);
+ sp_384_from_mp(p2->x, 6, pX);
+ sp_384_from_mp(p2->y, 6, pY);
+ sp_384_from_mp(p2->z, 6, pZ);
+
+ {
+ sp_384_mul_6(s, s, p384_norm_order);
+ }
+ err = sp_384_mod_6(s, s, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_6(s);
+ {
+ sp_384_mont_inv_order_6(s, s, tmp);
+ sp_384_mont_mul_order_6(u1, u1, s);
+ sp_384_mont_mul_order_6(u2, u2, s);
+ }
+
+ err = sp_384_ecc_mulmod_base_6(p1, u1, 0, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_6(p2, p2, u2, 0, heap);
+ }
+
+ if (err == MP_OKAY) {
+ {
+ sp_384_proj_point_add_6(p1, p1, p2, tmp);
+ if (sp_384_iszero_6(p1->z)) {
+ if (sp_384_iszero_6(p1->x) && sp_384_iszero_6(p1->y)) {
+ sp_384_proj_point_dbl_6(p1, p2, tmp);
+ }
+ else {
+ /* Y ordinate is not used from here - don't set. */
+ p1->x[0] = 0;
+ p1->x[1] = 0;
+ p1->x[2] = 0;
+ p1->x[3] = 0;
+ p1->x[4] = 0;
+ p1->x[5] = 0;
+ XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod));
+ }
+ }
+ }
+
+ /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+ /* Reload r and convert to Montgomery form. */
+ sp_384_from_mp(u2, 6, r);
+ err = sp_384_mod_mul_norm_6(u2, u2, p384_mod);
+ }
+
+ if (err == MP_OKAY) {
+ /* u1 = r.z'.z' mod prime */
+ sp_384_mont_sqr_6(p1->z, p1->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(u1, u2, p1->z, p384_mod, p384_mp_mod);
+ *res = (int)(sp_384_cmp_6(p1->x, u1) == 0);
+ if (*res == 0) {
+ /* Reload r and add order. */
+ sp_384_from_mp(u2, 6, r);
+ carry = sp_384_add_6(u2, u2, p384_order);
+ /* Carry means result is greater than mod and is not valid. */
+ if (carry == 0) {
+ sp_384_norm_6(u2);
+
+ /* Compare with mod and if greater or equal then not valid. */
+ c = sp_384_cmp_6(u2, p384_mod);
+ if (c < 0) {
+ /* Convert to Montogomery form */
+ err = sp_384_mod_mul_norm_6(u2, u2, p384_mod);
+ if (err == MP_OKAY) {
+ /* u1 = (r + 1*order).z'.z' mod prime */
+ sp_384_mont_mul_6(u1, u2, p1->z, p384_mod,
+ p384_mp_mod);
+ *res = (int)(sp_384_cmp_6(p1->x, u1) == 0);
+ }
+ }
+ }
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL)
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_384_point_free_6(p1, 0, heap);
+ sp_384_point_free_6(p2, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point EC point.
+ * heap Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_384_ecc_is_point_6(sp_point_384* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit t1d[2*6];
+ sp_digit t2d[2*6];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6 * 4, heap, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 6;
+ t2 = d + 2 * 6;
+#else
+ (void)heap;
+
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ sp_384_sqr_6(t1, point->y);
+ (void)sp_384_mod_6(t1, t1, p384_mod);
+ sp_384_sqr_6(t2, point->x);
+ (void)sp_384_mod_6(t2, t2, p384_mod);
+ sp_384_mul_6(t2, t2, point->x);
+ (void)sp_384_mod_6(t2, t2, p384_mod);
+ (void)sp_384_sub_6(t2, p384_mod, t2);
+ sp_384_mont_add_6(t1, t1, t2, p384_mod);
+
+ sp_384_mont_add_6(t1, t1, point->x, p384_mod);
+ sp_384_mont_add_6(t1, t1, point->x, p384_mod);
+ sp_384_mont_add_6(t1, t1, point->x, p384_mod);
+
+ if (sp_384_cmp_6(t1, p384_b) != 0) {
+ err = MP_VAL;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_384(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 pubd;
+#endif
+ sp_point_384* pub;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_384_point_new_6(NULL, pubd, pub);
+ if (err == MP_OKAY) {
+ sp_384_from_mp(pub->x, 6, pX);
+ sp_384_from_mp(pub->y, 6, pY);
+ sp_384_from_bin(pub->z, 6, one, (int)sizeof(one));
+
+ err = sp_384_ecc_is_point_6(pub, NULL);
+ }
+
+ sp_384_point_free_6(pub, 0, NULL);
+
+ return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * privm Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit privd[6];
+ sp_point_384 pubd;
+ sp_point_384 pd;
+#endif
+ sp_digit* priv = NULL;
+ sp_point_384* pub;
+ sp_point_384* p = NULL;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_384_point_new_6(heap, pubd, pub);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_6(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (priv == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ priv = privd;
+#endif
+
+ sp_384_from_mp(pub->x, 6, pX);
+ sp_384_from_mp(pub->y, 6, pY);
+ sp_384_from_bin(pub->z, 6, one, (int)sizeof(one));
+ sp_384_from_mp(priv, 6, privm);
+
+ /* Check point at infinitiy. */
+ if ((sp_384_iszero_6(pub->x) != 0) &&
+ (sp_384_iszero_6(pub->y) != 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check range of X and Y */
+ if (sp_384_cmp_6(pub->x, p384_mod) >= 0 ||
+ sp_384_cmp_6(pub->y, p384_mod) >= 0) {
+ err = ECC_OUT_OF_RANGE_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check point is on curve */
+ err = sp_384_ecc_is_point_6(pub, heap);
+ }
+
+ if (err == MP_OKAY) {
+ /* Point * order = infinity */
+ err = sp_384_ecc_mulmod_6(p, pub, p384_order, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is infinity */
+ if ((sp_384_iszero_6(p->x) == 0) ||
+ (sp_384_iszero_6(p->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Base * private = point */
+ err = sp_384_ecc_mulmod_base_6(p, priv, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is public key */
+ if (sp_384_cmp_6(p->x, pub->x) != 0 ||
+ sp_384_cmp_6(p->y, pub->y) != 0) {
+ err = ECC_PRIV_KEY_E;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (priv != NULL) {
+ XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_6(p, 0, heap);
+ sp_384_point_free_6(pub, 0, heap);
+
+ return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX First EC point's X ordinate.
+ * pY First EC point's Y ordinate.
+ * pZ First EC point's Z ordinate.
+ * qX Second EC point's X ordinate.
+ * qY Second EC point's Y ordinate.
+ * qZ Second EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* qX, mp_int* qY, mp_int* qZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 6 * 5];
+ sp_point_384 pd;
+ sp_point_384 qd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ sp_point_384* q = NULL;
+ int err;
+
+ err = sp_384_point_new_6(NULL, pd, p);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_6(NULL, qd, q);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 5, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 6, pX);
+ sp_384_from_mp(p->y, 6, pY);
+ sp_384_from_mp(p->z, 6, pZ);
+ sp_384_from_mp(q->x, 6, qX);
+ sp_384_from_mp(q->y, 6, qY);
+ sp_384_from_mp(q->z, 6, qZ);
+
+ sp_384_proj_point_add_6(p, p, q, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_6(q, 0, NULL);
+ sp_384_point_free_6(p, 0, NULL);
+
+ return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 6 * 2];
+ sp_point_384 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ int err;
+
+ err = sp_384_point_new_6(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 2, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 6, pX);
+ sp_384_from_mp(p->y, 6, pY);
+ sp_384_from_mp(p->z, 6, pZ);
+
+ sp_384_proj_point_dbl_6(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_6(p, 0, NULL);
+
+ return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 6 * 6];
+ sp_point_384 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ int err;
+
+ err = sp_384_point_new_6(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 6, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 6, pX);
+ sp_384_from_mp(p->y, 6, pY);
+ sp_384_from_mp(p->z, 6, pZ);
+
+ sp_384_map_6(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, pX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, pY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, pZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_6(p, 0, NULL);
+
+ return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mont_sqrt_6(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit t1d[2 * 6];
+ sp_digit t2d[2 * 6];
+ sp_digit t3d[2 * 6];
+ sp_digit t4d[2 * 6];
+ sp_digit t5d[2 * 6];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ sp_digit* t3;
+ sp_digit* t4;
+ sp_digit* t5;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 6, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 6;
+ t2 = d + 2 * 6;
+ t3 = d + 4 * 6;
+ t4 = d + 6 * 6;
+ t5 = d + 8 * 6;
+#else
+ t1 = t1d;
+ t2 = t2d;
+ t3 = t3d;
+ t4 = t4d;
+ t5 = t5d;
+#endif
+
+ {
+ /* t2 = y ^ 0x2 */
+ sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3 */
+ sp_384_mont_mul_6(t1, t2, y, p384_mod, p384_mp_mod);
+ /* t5 = y ^ 0xc */
+ sp_384_mont_sqr_n_6(t5, t1, 2, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xf */
+ sp_384_mont_mul_6(t1, t1, t5, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x1e */
+ sp_384_mont_sqr_6(t2, t1, p384_mod, p384_mp_mod);
+ /* t3 = y ^ 0x1f */
+ sp_384_mont_mul_6(t3, t2, y, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3e0 */
+ sp_384_mont_sqr_n_6(t2, t3, 5, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3ff */
+ sp_384_mont_mul_6(t1, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x7fe0 */
+ sp_384_mont_sqr_n_6(t2, t1, 5, p384_mod, p384_mp_mod);
+ /* t3 = y ^ 0x7fff */
+ sp_384_mont_mul_6(t3, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fff800 */
+ sp_384_mont_sqr_n_6(t2, t3, 15, p384_mod, p384_mp_mod);
+ /* t4 = y ^ 0x3ffffff */
+ sp_384_mont_mul_6(t4, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xffffffc000000 */
+ sp_384_mont_sqr_n_6(t2, t4, 30, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xfffffffffffff */
+ sp_384_mont_mul_6(t1, t4, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xfffffffffffffff000000000000000 */
+ sp_384_mont_sqr_n_6(t2, t1, 60, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_6(t1, t1, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+ sp_384_mont_sqr_n_6(t2, t1, 120, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_6(t1, t1, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+ sp_384_mont_sqr_n_6(t2, t1, 15, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_6(t1, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */
+ sp_384_mont_sqr_n_6(t2, t1, 31, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */
+ sp_384_mont_mul_6(t1, t4, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */
+ sp_384_mont_sqr_n_6(t2, t1, 4, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */
+ sp_384_mont_mul_6(t1, t5, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */
+ sp_384_mont_sqr_n_6(t2, t1, 62, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */
+ sp_384_mont_mul_6(t1, y, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */
+ sp_384_mont_sqr_n_6(y, t1, 30, p384_mod, p384_mp_mod);
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm X ordinate.
+ * odd Whether the Y ordinate is odd.
+ * ym Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit xd[2 * 6];
+ sp_digit yd[2 * 6];
+#endif
+ sp_digit* x = NULL;
+ sp_digit* y = NULL;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 6, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ x = d + 0 * 6;
+ y = d + 2 * 6;
+#else
+ x = xd;
+ y = yd;
+#endif
+
+ sp_384_from_mp(x, 6, xm);
+ err = sp_384_mod_mul_norm_6(x, x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ /* y = x^3 */
+ {
+ sp_384_mont_sqr_6(y, x, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(y, y, x, p384_mod, p384_mp_mod);
+ }
+ /* y = x^3 - 3x */
+ sp_384_mont_sub_6(y, y, x, p384_mod);
+ sp_384_mont_sub_6(y, y, x, p384_mod);
+ sp_384_mont_sub_6(y, y, x, p384_mod);
+ /* y = x^3 - 3x + b */
+ err = sp_384_mod_mul_norm_6(x, p384_b, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ sp_384_mont_add_6(y, y, x, p384_mod);
+ /* y = sqrt(x^3 - 3x + b) */
+ err = sp_384_mont_sqrt_6(y);
+ }
+ if (err == MP_OKAY) {
+ XMEMSET(y + 6, 0, 6U * sizeof(sp_digit));
+ sp_384_mont_reduce_6(y, p384_mod, p384_mp_mod);
+ if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+ sp_384_mont_sub_6(y, p384_mod, y, p384_mod);
+ }
+
+ err = sp_384_to_mp(y, ym);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+#endif
+#endif /* WOLFSSL_SP_384 */
+#endif /* WOLFSSL_HAVE_SP_ECC */
+#endif /* WOLFSSL_SP_ARM64_ASM */
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_armthumb.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_armthumb.c
new file mode 100644
index 000000000..40cb431a3
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_armthumb.c
@@ -0,0 +1,27863 @@
+/* sp.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Implementation by Sean Parkinson. */
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/cpuid.h>
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \
+ defined(WOLFSSL_HAVE_SP_ECC)
+
+#ifdef RSA_LOW_MEM
+#ifndef WOLFSSL_SP_SMALL
+#define WOLFSSL_SP_SMALL
+#endif
+#endif
+
+#include <wolfssl/wolfcrypt/sp.h>
+
+#ifdef WOLFSSL_SP_ARM_THUMB_ASM
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+#ifndef WOLFSSL_SP_NO_2048
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 24U) {
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 32
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 32U) <= (word32)DIGIT_BIT) {
+ s += 32U;
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 32) {
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 32 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 256
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_2048_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ j = 2048 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<64 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 32) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 32);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit tmp[8 * 2];
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r8, r3\n\t"
+ "mov r11, %[r]\n\t"
+ "mov r9, %[a]\n\t"
+ "mov r10, %[b]\n\t"
+ "mov r6, #32\n\t"
+ "add r6, r9\n\t"
+ "mov r12, r6\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #28\n\t"
+ "mov %[a], r8\n\t"
+ "sub %[a], r6\n\t"
+ "sbc r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], r6\n\t"
+ "mov %[b], r8\n\t"
+ "sub %[b], %[a]\n\t"
+ "add %[a], r9\n\t"
+ "add %[b], r10\n\t"
+ "\n2:\n\t"
+ "# Multiply Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Multiply Done\n\t"
+ "add %[a], #4\n\t"
+ "sub %[b], #4\n\t"
+ "cmp %[a], r12\n\t"
+ "beq 3f\n\t"
+ "mov r6, r8\n\t"
+ "add r6, r9\n\t"
+ "cmp %[a], r6\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r11\n\t"
+ "mov r7, r8\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add r7, #4\n\t"
+ "mov r8, r7\n\t"
+ "mov r6, #56\n\t"
+ "cmp r7, r6\n\t"
+ "ble 1b\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov %[a], r9\n\t"
+ "mov %[b], r10\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r8, r3\n\t"
+ "mov r11, %[r]\n\t"
+ "mov r6, #64\n\t"
+ "neg r6, r6\n\t"
+ "add sp, r6\n\t"
+ "mov r10, sp\n\t"
+ "mov r9, %[a]\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r6, #28\n\t"
+ "mov %[a], r8\n\t"
+ "sub %[a], r6\n\t"
+ "sbc r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], r6\n\t"
+ "mov r2, r8\n\t"
+ "sub r2, %[a]\n\t"
+ "add %[a], r9\n\t"
+ "add r2, r9\n\t"
+ "\n2:\n\t"
+ "cmp r2, %[a]\n\t"
+ "beq 4f\n\t"
+ "# Multiply * 2: Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Multiply * 2: Done\n\t"
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ "# Square: Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul r6, r6\n\t"
+ "add r3, r6\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "mul r7, r7\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #15\n\t"
+ "lsl r6, r6, #17\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Square: Done\n\t"
+ "\n5:\n\t"
+ "add %[a], #4\n\t"
+ "sub r2, #4\n\t"
+ "mov r6, #32\n\t"
+ "add r6, r9\n\t"
+ "cmp %[a], r6\n\t"
+ "beq 3f\n\t"
+ "cmp %[a], r2\n\t"
+ "bgt 3f\n\t"
+ "mov r7, r8\n\t"
+ "add r7, r9\n\t"
+ "cmp %[a], r7\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r10\n\t"
+ "mov r7, r8\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r7, #4\n\t"
+ "mov r8, r7\n\t"
+ "mov r6, #56\n\t"
+ "cmp r7, r6\n\t"
+ "ble 1b\n\t"
+ "mov %[a], r9\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov %[r], r11\n\t"
+ "mov %[a], r10\n\t"
+ "mov r3, #60\n\t"
+ "\n4:\n\t"
+ "ldr r6, [%[a], r3]\n\t"
+ "str r6, [%[r], r3]\n\t"
+ "sub r3, #4\n\t"
+ "bge 4b\n\t"
+ "mov r6, #64\n\t"
+ "add sp, r6\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+ );
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "add r4, r5\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5"
+ );
+
+ return c;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_sub_in_place_16(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sub r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #0]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r6, [%[b], #12]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #8]\n\t"
+ "str r4, [%[a], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r6, [%[b], #20]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #16]\n\t"
+ "str r4, [%[a], #20]\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r6, [%[b], #28]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #24]\n\t"
+ "str r4, [%[a], #28]\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r6, [%[b], #36]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #32]\n\t"
+ "str r4, [%[a], #36]\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r6, [%[b], #44]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #40]\n\t"
+ "str r4, [%[a], #44]\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r6, [%[b], #52]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #48]\n\t"
+ "str r4, [%[a], #52]\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r6, [%[b], #60]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #56]\n\t"
+ "str r4, [%[a], #60]\n\t"
+ "sbc %[c], %[c]\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "add r4, r5\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5"
+ );
+
+ return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<8; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ r[0] = a[0] & m;
+ r[1] = a[1] & m;
+ r[2] = a[2] & m;
+ r[3] = a[3] & m;
+ r[4] = a[4] & m;
+ r[5] = a[5] & m;
+ r[6] = a[6] & m;
+ r[7] = a[7] & m;
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[16];
+ sp_digit a1[8];
+ sp_digit b1[8];
+ sp_digit z2[16];
+ sp_digit u, ca, cb;
+
+ ca = sp_2048_add_8(a1, a, &a[8]);
+ cb = sp_2048_add_8(b1, b, &b[8]);
+ u = ca & cb;
+ sp_2048_mul_8(z1, a1, b1);
+ sp_2048_mul_8(z2, &a[8], &b[8]);
+ sp_2048_mul_8(z0, a, b);
+ sp_2048_mask_8(r + 16, a1, 0 - cb);
+ sp_2048_mask_8(b1, b1, 0 - ca);
+ u += sp_2048_add_8(r + 16, r + 16, b1);
+ u += sp_2048_sub_in_place_16(z1, z2);
+ u += sp_2048_sub_in_place_16(z1, z0);
+ u += sp_2048_add_16(r + 8, r + 8, z1);
+ r[24] = u;
+ XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
+ (void)sp_2048_add_16(r + 16, r + 16, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[16];
+ sp_digit z1[16];
+ sp_digit a1[8];
+ sp_digit u;
+
+ u = sp_2048_add_8(a1, a, &a[8]);
+ sp_2048_sqr_8(z1, a1);
+ sp_2048_sqr_8(z2, &a[8]);
+ sp_2048_sqr_8(z0, a);
+ sp_2048_mask_8(r + 16, a1, 0 - u);
+ u += sp_2048_add_8(r + 16, r + 16, r + 16);
+ u += sp_2048_sub_in_place_16(z1, z2);
+ u += sp_2048_sub_in_place_16(z1, z0);
+ u += sp_2048_add_16(r + 8, r + 8, z1);
+ r[24] = u;
+ XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
+ (void)sp_2048_add_16(r + 16, r + 16, z2);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sub r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #0]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r6, [%[b], #12]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #8]\n\t"
+ "str r4, [%[a], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r6, [%[b], #20]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #16]\n\t"
+ "str r4, [%[a], #20]\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r6, [%[b], #28]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #24]\n\t"
+ "str r4, [%[a], #28]\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r6, [%[b], #36]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #32]\n\t"
+ "str r4, [%[a], #36]\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r6, [%[b], #44]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #40]\n\t"
+ "str r4, [%[a], #44]\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r6, [%[b], #52]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #48]\n\t"
+ "str r4, [%[a], #52]\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r6, [%[b], #60]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #56]\n\t"
+ "str r4, [%[a], #60]\n\t"
+ "ldr r3, [%[a], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "ldr r6, [%[b], #68]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #64]\n\t"
+ "str r4, [%[a], #68]\n\t"
+ "ldr r3, [%[a], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "ldr r6, [%[b], #76]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #72]\n\t"
+ "str r4, [%[a], #76]\n\t"
+ "ldr r3, [%[a], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "ldr r6, [%[b], #84]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #80]\n\t"
+ "str r4, [%[a], #84]\n\t"
+ "ldr r3, [%[a], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "ldr r6, [%[b], #92]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #88]\n\t"
+ "str r4, [%[a], #92]\n\t"
+ "ldr r3, [%[a], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "ldr r6, [%[b], #100]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #96]\n\t"
+ "str r4, [%[a], #100]\n\t"
+ "ldr r3, [%[a], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "ldr r6, [%[b], #108]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #104]\n\t"
+ "str r4, [%[a], #108]\n\t"
+ "ldr r3, [%[a], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "ldr r6, [%[b], #116]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #112]\n\t"
+ "str r4, [%[a], #116]\n\t"
+ "ldr r3, [%[a], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "ldr r6, [%[b], #124]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #120]\n\t"
+ "str r4, [%[a], #124]\n\t"
+ "sbc %[c], %[c]\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "add r4, r5\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #68]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #76]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #84]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #92]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #100]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #108]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #116]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #124]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #124]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5"
+ );
+
+ return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<16; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 16; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[32];
+ sp_digit a1[16];
+ sp_digit b1[16];
+ sp_digit z2[32];
+ sp_digit u, ca, cb;
+
+ ca = sp_2048_add_16(a1, a, &a[16]);
+ cb = sp_2048_add_16(b1, b, &b[16]);
+ u = ca & cb;
+ sp_2048_mul_16(z1, a1, b1);
+ sp_2048_mul_16(z2, &a[16], &b[16]);
+ sp_2048_mul_16(z0, a, b);
+ sp_2048_mask_16(r + 32, a1, 0 - cb);
+ sp_2048_mask_16(b1, b1, 0 - ca);
+ u += sp_2048_add_16(r + 32, r + 32, b1);
+ u += sp_2048_sub_in_place_32(z1, z2);
+ u += sp_2048_sub_in_place_32(z1, z0);
+ u += sp_2048_add_32(r + 16, r + 16, z1);
+ r[48] = u;
+ XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
+ (void)sp_2048_add_32(r + 32, r + 32, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[32];
+ sp_digit z1[32];
+ sp_digit a1[16];
+ sp_digit u;
+
+ u = sp_2048_add_16(a1, a, &a[16]);
+ sp_2048_sqr_16(z1, a1);
+ sp_2048_sqr_16(z2, &a[16]);
+ sp_2048_sqr_16(z0, a);
+ sp_2048_mask_16(r + 32, a1, 0 - u);
+ u += sp_2048_add_16(r + 32, r + 32, r + 32);
+ u += sp_2048_sub_in_place_32(z1, z2);
+ u += sp_2048_sub_in_place_32(z1, z0);
+ u += sp_2048_add_32(r + 16, r + 16, z1);
+ r[48] = u;
+ XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
+ (void)sp_2048_add_32(r + 32, r + 32, z2);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sub r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #0]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r6, [%[b], #12]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #8]\n\t"
+ "str r4, [%[a], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r6, [%[b], #20]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #16]\n\t"
+ "str r4, [%[a], #20]\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r6, [%[b], #28]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #24]\n\t"
+ "str r4, [%[a], #28]\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r6, [%[b], #36]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #32]\n\t"
+ "str r4, [%[a], #36]\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r6, [%[b], #44]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #40]\n\t"
+ "str r4, [%[a], #44]\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r6, [%[b], #52]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #48]\n\t"
+ "str r4, [%[a], #52]\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r6, [%[b], #60]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #56]\n\t"
+ "str r4, [%[a], #60]\n\t"
+ "ldr r3, [%[a], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "ldr r6, [%[b], #68]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #64]\n\t"
+ "str r4, [%[a], #68]\n\t"
+ "ldr r3, [%[a], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "ldr r6, [%[b], #76]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #72]\n\t"
+ "str r4, [%[a], #76]\n\t"
+ "ldr r3, [%[a], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "ldr r6, [%[b], #84]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #80]\n\t"
+ "str r4, [%[a], #84]\n\t"
+ "ldr r3, [%[a], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "ldr r6, [%[b], #92]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #88]\n\t"
+ "str r4, [%[a], #92]\n\t"
+ "ldr r3, [%[a], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "ldr r6, [%[b], #100]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #96]\n\t"
+ "str r4, [%[a], #100]\n\t"
+ "ldr r3, [%[a], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "ldr r6, [%[b], #108]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #104]\n\t"
+ "str r4, [%[a], #108]\n\t"
+ "ldr r3, [%[a], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "ldr r6, [%[b], #116]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #112]\n\t"
+ "str r4, [%[a], #116]\n\t"
+ "ldr r3, [%[a], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "ldr r6, [%[b], #124]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #120]\n\t"
+ "str r4, [%[a], #124]\n\t"
+ "sbc %[c], %[c]\n\t"
+ "add %[a], #0x80\n\t"
+ "add %[b], #0x80\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #0]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r6, [%[b], #12]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #8]\n\t"
+ "str r4, [%[a], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r6, [%[b], #20]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #16]\n\t"
+ "str r4, [%[a], #20]\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r6, [%[b], #28]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #24]\n\t"
+ "str r4, [%[a], #28]\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r6, [%[b], #36]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #32]\n\t"
+ "str r4, [%[a], #36]\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r6, [%[b], #44]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #40]\n\t"
+ "str r4, [%[a], #44]\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r6, [%[b], #52]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #48]\n\t"
+ "str r4, [%[a], #52]\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r6, [%[b], #60]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #56]\n\t"
+ "str r4, [%[a], #60]\n\t"
+ "ldr r3, [%[a], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "ldr r6, [%[b], #68]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #64]\n\t"
+ "str r4, [%[a], #68]\n\t"
+ "ldr r3, [%[a], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "ldr r6, [%[b], #76]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #72]\n\t"
+ "str r4, [%[a], #76]\n\t"
+ "ldr r3, [%[a], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "ldr r6, [%[b], #84]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #80]\n\t"
+ "str r4, [%[a], #84]\n\t"
+ "ldr r3, [%[a], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "ldr r6, [%[b], #92]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #88]\n\t"
+ "str r4, [%[a], #92]\n\t"
+ "ldr r3, [%[a], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "ldr r6, [%[b], #100]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #96]\n\t"
+ "str r4, [%[a], #100]\n\t"
+ "ldr r3, [%[a], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "ldr r6, [%[b], #108]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #104]\n\t"
+ "str r4, [%[a], #108]\n\t"
+ "ldr r3, [%[a], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "ldr r6, [%[b], #116]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #112]\n\t"
+ "str r4, [%[a], #116]\n\t"
+ "ldr r3, [%[a], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "ldr r6, [%[b], #124]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #120]\n\t"
+ "str r4, [%[a], #124]\n\t"
+ "sbc %[c], %[c]\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r7, #0\n\t"
+ "mvn r7, r7\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "add r4, r5\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #68]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #76]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #84]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #92]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #100]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #108]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #116]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #124]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #124]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ "add %[a], #0x80\n\t"
+ "add %[b], #0x80\n\t"
+ "add %[r], #0x80\n\t"
+ "add %[c], r7\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #68]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #76]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #84]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #92]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #100]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #108]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #116]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #124]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #124]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r7"
+ );
+
+ return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<32; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 32; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[64];
+ sp_digit a1[32];
+ sp_digit b1[32];
+ sp_digit z2[64];
+ sp_digit u, ca, cb;
+
+ ca = sp_2048_add_32(a1, a, &a[32]);
+ cb = sp_2048_add_32(b1, b, &b[32]);
+ u = ca & cb;
+ sp_2048_mul_32(z1, a1, b1);
+ sp_2048_mul_32(z2, &a[32], &b[32]);
+ sp_2048_mul_32(z0, a, b);
+ sp_2048_mask_32(r + 64, a1, 0 - cb);
+ sp_2048_mask_32(b1, b1, 0 - ca);
+ u += sp_2048_add_32(r + 64, r + 64, b1);
+ u += sp_2048_sub_in_place_64(z1, z2);
+ u += sp_2048_sub_in_place_64(z1, z0);
+ u += sp_2048_add_64(r + 32, r + 32, z1);
+ r[96] = u;
+ XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
+ (void)sp_2048_add_64(r + 64, r + 64, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[64];
+ sp_digit z1[64];
+ sp_digit a1[32];
+ sp_digit u;
+
+ u = sp_2048_add_32(a1, a, &a[32]);
+ sp_2048_sqr_32(z1, a1);
+ sp_2048_sqr_32(z2, &a[32]);
+ sp_2048_sqr_32(z0, a);
+ sp_2048_mask_32(r + 64, a1, 0 - u);
+ u += sp_2048_add_32(r + 64, r + 64, r + 64);
+ u += sp_2048_sub_in_place_64(z1, z2);
+ u += sp_2048_sub_in_place_64(z1, z0);
+ u += sp_2048_add_64(r + 32, r + 32, z1);
+ r[96] = u;
+ XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
+ (void)sp_2048_add_64(r + 64, r + 64, z2);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r6, %[a]\n\t"
+ "mov r7, #0\n\t"
+ "mov r4, #1\n\t"
+ "lsl r4, #8\n\t"
+ "sub r7, #1\n\t"
+ "add r6, r4\n\t"
+ "\n1:\n\t"
+ "add %[c], r7\n\t"
+ "ldr r4, [%[a]]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ "add %[a], #4\n\t"
+ "add %[b], #4\n\t"
+ "add %[r], #4\n\t"
+ "cmp %[a], r6\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r7"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+ __asm__ __volatile__ (
+ "mov r7, %[a]\n\t"
+ "mov r5, #1\n\t"
+ "lsl r5, #8\n\t"
+ "add r7, r5\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a]]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "sbc %[c], %[c]\n\t"
+ "add %[a], #8\n\t"
+ "add %[b], #8\n\t"
+ "cmp %[a], r7\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r7"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit tmp[64 * 2];
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r8, r3\n\t"
+ "mov r11, %[r]\n\t"
+ "mov r9, %[a]\n\t"
+ "mov r10, %[b]\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r9\n\t"
+ "mov r12, r6\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #252\n\t"
+ "mov %[a], r8\n\t"
+ "sub %[a], r6\n\t"
+ "sbc r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], r6\n\t"
+ "mov %[b], r8\n\t"
+ "sub %[b], %[a]\n\t"
+ "add %[a], r9\n\t"
+ "add %[b], r10\n\t"
+ "\n2:\n\t"
+ "# Multiply Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Multiply Done\n\t"
+ "add %[a], #4\n\t"
+ "sub %[b], #4\n\t"
+ "cmp %[a], r12\n\t"
+ "beq 3f\n\t"
+ "mov r6, r8\n\t"
+ "add r6, r9\n\t"
+ "cmp %[a], r6\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r11\n\t"
+ "mov r7, r8\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add r7, #4\n\t"
+ "mov r8, r7\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #248\n\t"
+ "cmp r7, r6\n\t"
+ "ble 1b\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov %[a], r9\n\t"
+ "mov %[b], r10\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r8, r3\n\t"
+ "mov r11, %[r]\n\t"
+ "mov r6, #2\n\t"
+ "lsl r6, r6, #8\n\t"
+ "neg r6, r6\n\t"
+ "add sp, r6\n\t"
+ "mov r10, sp\n\t"
+ "mov r9, %[a]\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r6, #252\n\t"
+ "mov %[a], r8\n\t"
+ "sub %[a], r6\n\t"
+ "sbc r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], r6\n\t"
+ "mov r2, r8\n\t"
+ "sub r2, %[a]\n\t"
+ "add %[a], r9\n\t"
+ "add r2, r9\n\t"
+ "\n2:\n\t"
+ "cmp r2, %[a]\n\t"
+ "beq 4f\n\t"
+ "# Multiply * 2: Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Multiply * 2: Done\n\t"
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ "# Square: Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul r6, r6\n\t"
+ "add r3, r6\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "mul r7, r7\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #15\n\t"
+ "lsl r6, r6, #17\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Square: Done\n\t"
+ "\n5:\n\t"
+ "add %[a], #4\n\t"
+ "sub r2, #4\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r9\n\t"
+ "cmp %[a], r6\n\t"
+ "beq 3f\n\t"
+ "cmp %[a], r2\n\t"
+ "bgt 3f\n\t"
+ "mov r7, r8\n\t"
+ "add r7, r9\n\t"
+ "cmp %[a], r7\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r10\n\t"
+ "mov r7, r8\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r7, #4\n\t"
+ "mov r8, r7\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #248\n\t"
+ "cmp r7, r6\n\t"
+ "ble 1b\n\t"
+ "mov %[a], r9\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov %[r], r11\n\t"
+ "mov %[a], r10\n\t"
+ "mov r3, #1\n\t"
+ "lsl r3, r3, #8\n\t"
+ "add r3, #252\n\t"
+ "\n4:\n\t"
+ "ldr r6, [%[a], r3]\n\t"
+ "str r6, [%[r], r3]\n\t"
+ "sub r3, #4\n\t"
+ "bge 4b\n\t"
+ "mov r6, #2\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add sp, r6\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#ifdef WOLFSSL_SP_SMALL
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+ int i;
+
+ for (i=0; i<32; i++) {
+ r[i] = a[i] & m;
+ }
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r6, %[a]\n\t"
+ "mov r7, #0\n\t"
+ "add r6, #128\n\t"
+ "sub r7, #1\n\t"
+ "\n1:\n\t"
+ "add %[c], r7\n\t"
+ "ldr r4, [%[a]]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ "add %[a], #4\n\t"
+ "add %[b], #4\n\t"
+ "add %[r], #4\n\t"
+ "cmp %[a], r6\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r7"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+ __asm__ __volatile__ (
+ "mov r7, %[a]\n\t"
+ "add r7, #128\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a]]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "sbc %[c], %[c]\n\t"
+ "add %[a], #8\n\t"
+ "add %[b], #8\n\t"
+ "cmp %[a], r7\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r7"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit tmp[32 * 2];
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r8, r3\n\t"
+ "mov r11, %[r]\n\t"
+ "mov r9, %[a]\n\t"
+ "mov r10, %[b]\n\t"
+ "mov r6, #128\n\t"
+ "add r6, r9\n\t"
+ "mov r12, r6\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #124\n\t"
+ "mov %[a], r8\n\t"
+ "sub %[a], r6\n\t"
+ "sbc r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], r6\n\t"
+ "mov %[b], r8\n\t"
+ "sub %[b], %[a]\n\t"
+ "add %[a], r9\n\t"
+ "add %[b], r10\n\t"
+ "\n2:\n\t"
+ "# Multiply Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Multiply Done\n\t"
+ "add %[a], #4\n\t"
+ "sub %[b], #4\n\t"
+ "cmp %[a], r12\n\t"
+ "beq 3f\n\t"
+ "mov r6, r8\n\t"
+ "add r6, r9\n\t"
+ "cmp %[a], r6\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r11\n\t"
+ "mov r7, r8\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add r7, #4\n\t"
+ "mov r8, r7\n\t"
+ "mov r6, #248\n\t"
+ "cmp r7, r6\n\t"
+ "ble 1b\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov %[a], r9\n\t"
+ "mov %[b], r10\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r8, r3\n\t"
+ "mov r11, %[r]\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "neg r6, r6\n\t"
+ "add sp, r6\n\t"
+ "mov r10, sp\n\t"
+ "mov r9, %[a]\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r6, #124\n\t"
+ "mov %[a], r8\n\t"
+ "sub %[a], r6\n\t"
+ "sbc r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], r6\n\t"
+ "mov r2, r8\n\t"
+ "sub r2, %[a]\n\t"
+ "add %[a], r9\n\t"
+ "add r2, r9\n\t"
+ "\n2:\n\t"
+ "cmp r2, %[a]\n\t"
+ "beq 4f\n\t"
+ "# Multiply * 2: Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Multiply * 2: Done\n\t"
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ "# Square: Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul r6, r6\n\t"
+ "add r3, r6\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "mul r7, r7\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #15\n\t"
+ "lsl r6, r6, #17\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Square: Done\n\t"
+ "\n5:\n\t"
+ "add %[a], #4\n\t"
+ "sub r2, #4\n\t"
+ "mov r6, #128\n\t"
+ "add r6, r9\n\t"
+ "cmp %[a], r6\n\t"
+ "beq 3f\n\t"
+ "cmp %[a], r2\n\t"
+ "bgt 3f\n\t"
+ "mov r7, r8\n\t"
+ "add r7, r9\n\t"
+ "cmp %[a], r7\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r10\n\t"
+ "mov r7, r8\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r7, #4\n\t"
+ "mov r8, r7\n\t"
+ "mov r6, #248\n\t"
+ "cmp r7, r6\n\t"
+ "ble 1b\n\t"
+ "mov %[a], r9\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov %[r], r11\n\t"
+ "mov %[a], r10\n\t"
+ "mov r3, #252\n\t"
+ "\n4:\n\t"
+ "ldr r6, [%[a], r3]\n\t"
+ "str r6, [%[r], r3]\n\t"
+ "sub r3, #4\n\t"
+ "bge 4b\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add sp, r6\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+
+ /* rho = -1/m mod b */
+ *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+ __asm__ __volatile__ (
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, %[a]\n\t"
+ "mov r8, %[r]\n\t"
+ "mov r9, r6\n\t"
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "# A[] * B\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, %[b], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "lsr r7, %[b], #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, %[b], #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "lsl r7, %[b], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# A[] * B - Done\n\t"
+ "mov %[r], r8\n\t"
+ "str r3, [%[r]]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add %[r], #4\n\t"
+ "add %[a], #4\n\t"
+ "mov r8, %[r]\n\t"
+ "cmp %[a], r9\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r]]\n\t"
+ : [r] "+r" (r), [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+ );
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 2048 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 32);
+
+ /* r = 2^n mod m */
+ sp_2048_sub_in_place_32(r, m);
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #128\n\t"
+ "mov r8, r5\n\t"
+ "mov r7, #0\n\t"
+ "1:\n\t"
+ "ldr r6, [%[b], r7]\n\t"
+ "and r6, %[m]\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r5, [%[a], r7]\n\t"
+ "sbc r5, r6\n\t"
+ "sbc %[c], %[c]\n\t"
+ "str r5, [%[r], r7]\n\t"
+ "add r7, #4\n\t"
+ "cmp r7, r8\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r7", "r8"
+ );
+
+ return c;
+}
+
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "mov r8, %[mp]\n\t"
+ "mov r12, %[ca]\n\t"
+ "mov r14, %[m]\n\t"
+ "mov r9, %[a]\n\t"
+ "mov r4, #0\n\t"
+ "# i = 0\n\t"
+ "mov r11, r4\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "mov %[ca], #0\n\t"
+ "# mu = a[i] * mp\n\t"
+ "mov %[mp], r8\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mul %[mp], %[a]\n\t"
+ "mov %[m], r14\n\t"
+ "mov r10, r9\n\t"
+ "\n2:\n\t"
+ "# a[i+j] += m[j] * mu\n\t"
+ "mov %[a], r10\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mov %[ca], #0\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "# Multiply m[j] and mu - Start\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r6, %[mp], #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add %[a], r7\n\t"
+ "adc r5, %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add %[a], r6\n\t"
+ "adc r5, r7\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r6, %[mp], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r5, r7\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add %[a], r6\n\t"
+ "adc r5, r7\n\t"
+ "# Multiply m[j] and mu - Done\n\t"
+ "add r4, %[a]\n\t"
+ "adc r5, %[ca]\n\t"
+ "mov %[a], r10\n\t"
+ "str r4, [%[a]]\n\t"
+ "mov r6, #4\n\t"
+ "add %[m], #4\n\t"
+ "add r10, r6\n\t"
+ "mov r4, #124\n\t"
+ "add r4, r9\n\t"
+ "cmp r10, r4\n\t"
+ "blt 2b\n\t"
+ "# a[i+31] += m[31] * mu\n\t"
+ "mov %[ca], #0\n\t"
+ "mov r4, r12\n\t"
+ "mov %[a], #0\n\t"
+ "# Multiply m[31] and mu - Start\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r6, %[mp], #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r5, r7\n\t"
+ "adc r4, %[ca]\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r5, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r6, %[mp], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r5, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "# Multiply m[31] and mu - Done\n\t"
+ "mov %[ca], %[a]\n\t"
+ "mov %[a], r10\n\t"
+ "ldr r7, [%[a], #4]\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mov r6, #0\n\t"
+ "add r5, %[a]\n\t"
+ "adc r7, r4\n\t"
+ "adc %[ca], r6\n\t"
+ "mov %[a], r10\n\t"
+ "str r5, [%[a]]\n\t"
+ "str r7, [%[a], #4]\n\t"
+ "# i += 1\n\t"
+ "mov r6, #4\n\t"
+ "add r9, r6\n\t"
+ "add r11, r6\n\t"
+ "mov r12, %[ca]\n\t"
+ "mov %[a], r9\n\t"
+ "mov r4, #128\n\t"
+ "cmp r11, r4\n\t"
+ "blt 1b\n\t"
+ "mov %[m], r14\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_2048_mul_32(r, a, b);
+ sp_2048_mont_reduce_32(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_2048_sqr_32(r, a);
+ sp_2048_mont_reduce_32(r, m, mp);
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+ __asm__ __volatile__ (
+ "mov r6, #128\n\t"
+ "add r6, %[a]\n\t"
+ "mov r8, %[r]\n\t"
+ "mov r9, r6\n\t"
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "# A[] * B\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, %[b], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "lsr r7, %[b], #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, %[b], #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "lsl r7, %[b], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# A[] * B - Done\n\t"
+ "mov %[r], r8\n\t"
+ "str r3, [%[r]]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add %[r], #4\n\t"
+ "add %[a], #4\n\t"
+ "mov r8, %[r]\n\t"
+ "cmp %[a], r9\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r]]\n\t"
+ : [r] "+r" (r), [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+ );
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r5, %[div], #1\n\t"
+ "add r5, #1\n\t"
+ "mov r8, %[d0]\n\t"
+ "mov r9, %[d1]\n\t"
+ "# Do top 32\n\t"
+ "mov r6, r5\n\t"
+ "sub r6, %[d1]\n\t"
+ "sbc r6, r6\n\t"
+ "add %[r], %[r]\n\t"
+ "sub %[r], r6\n\t"
+ "and r6, r5\n\t"
+ "sub %[d1], r6\n\t"
+ "# Next 30 bits\n\t"
+ "mov r4, #29\n\t"
+ "1:\n\t"
+ "lsl %[d0], %[d0], #1\n\t"
+ "adc %[d1], %[d1]\n\t"
+ "mov r6, r5\n\t"
+ "sub r6, %[d1]\n\t"
+ "sbc r6, r6\n\t"
+ "add %[r], %[r]\n\t"
+ "sub %[r], r6\n\t"
+ "and r6, r5\n\t"
+ "sub %[d1], r6\n\t"
+ "sub r4, #1\n\t"
+ "bpl 1b\n\t"
+ "mov r7, #0\n\t"
+ "add %[r], %[r]\n\t"
+ "add %[r], #1\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "sub %[d1], r4\n\t"
+ "mov r4, %[d1]\n\t"
+ "mov %[d1], r9\n\t"
+ "sbc %[d1], r5\n\t"
+ "mov r5, %[d1]\n\t"
+ "add %[r], r5\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "mov r6, r9\n\t"
+ "sub r4, %[d1], r4\n\t"
+ "sbc r6, r5\n\t"
+ "mov r5, r6\n\t"
+ "add %[r], r5\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "mov r6, r9\n\t"
+ "sub r4, %[d1], r4\n\t"
+ "sbc r6, r5\n\t"
+ "mov r5, r6\n\t"
+ "add %[r], r5\n\t"
+ "mov r6, %[div]\n\t"
+ "sub r6, r4\n\t"
+ "sbc r6, r6\n\t"
+ "sub %[r], r6\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r7", "r6", "r8", "r9"
+ );
+ return r;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+
+
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mvn r3, r3\n\t"
+ "mov r6, #124\n\t"
+ "1:\n\t"
+ "ldr r7, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r7, r3\n\t"
+ "and r5, r3\n\t"
+ "mov r4, r7\n\t"
+ "sub r7, r5\n\t"
+ "sbc r7, r7\n\t"
+ "add %[r], r7\n\t"
+ "mvn r7, r7\n\t"
+ "and r3, r7\n\t"
+ "sub r5, r4\n\t"
+ "sbc r7, r7\n\t"
+ "sub %[r], r7\n\t"
+ "mvn r7, r7\n\t"
+ "and r3, r7\n\t"
+ "sub r6, #4\n\t"
+ "cmp r6, #0\n\t"
+ "bge 1b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+
+ return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[64], t2[33];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[31];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
+ for (i=31; i>=0; i--) {
+ r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div);
+
+ sp_2048_mul_d_32(t2, d, r1);
+ t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
+ t1[32 + i] -= t2[32];
+ sp_2048_mask_32(t2, d, t1[32 + i]);
+ t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
+ sp_2048_mask_32(t2, d, t1[32 + i]);
+ t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_2048_cmp_32(t1, d) >= 0;
+ sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_2048_div_32(a, m, NULL, r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[16][64];
+#else
+ sp_digit* t[16];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 64, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<16; i++) {
+ t[i] = td + i * 64;
+ }
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_32(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
+ if (reduceA != 0) {
+ err = sp_2048_mod_32(t[1] + 32, a, m);
+ if (err == MP_OKAY) {
+ err = sp_2048_mod_32(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
+ err = sp_2048_mod_32(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 4;
+ if (c == 32) {
+ c = 28;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
+ for (; i>=0 || c>=4; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 28;
+ n <<= 4;
+ c = 28;
+ }
+ else if (c < 4) {
+ y = n >> 28;
+ n = e[i--];
+ c = 4 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+ }
+
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+
+ sp_2048_mont_mul_32(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
+ sp_2048_mont_reduce_32(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
+ sp_2048_cond_sub_32(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][64];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 64, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++) {
+ t[i] = td + i * 64;
+ }
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_32(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
+ if (reduceA != 0) {
+ err = sp_2048_mod_32(t[1] + 32, a, m);
+ if (err == MP_OKAY) {
+ err = sp_2048_mod_32(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
+ err = sp_2048_mod_32(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
+ sp_2048_mont_sqr_32(t[16], t[ 8], m, mp);
+ sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp);
+ sp_2048_mont_sqr_32(t[18], t[ 9], m, mp);
+ sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp);
+ sp_2048_mont_sqr_32(t[20], t[10], m, mp);
+ sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp);
+ sp_2048_mont_sqr_32(t[22], t[11], m, mp);
+ sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp);
+ sp_2048_mont_sqr_32(t[24], t[12], m, mp);
+ sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp);
+ sp_2048_mont_sqr_32(t[26], t[13], m, mp);
+ sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp);
+ sp_2048_mont_sqr_32(t[28], t[14], m, mp);
+ sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp);
+ sp_2048_mont_sqr_32(t[30], t[15], m, mp);
+ sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+
+ sp_2048_mont_mul_32(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
+ sp_2048_mont_reduce_32(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
+ sp_2048_cond_sub_32(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 2048 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 64);
+
+ /* r = 2^n mod m */
+ sp_2048_sub_in_place_64(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #1\n\t"
+ "lsl r5, r5, #8\n\t"
+ "mov r8, r5\n\t"
+ "mov r7, #0\n\t"
+ "1:\n\t"
+ "ldr r6, [%[b], r7]\n\t"
+ "and r6, %[m]\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r5, [%[a], r7]\n\t"
+ "sbc r5, r6\n\t"
+ "sbc %[c], %[c]\n\t"
+ "str r5, [%[r], r7]\n\t"
+ "add r7, #4\n\t"
+ "cmp r7, r8\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r7", "r8"
+ );
+
+ return c;
+}
+
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "mov r8, %[mp]\n\t"
+ "mov r12, %[ca]\n\t"
+ "mov r14, %[m]\n\t"
+ "mov r9, %[a]\n\t"
+ "mov r4, #0\n\t"
+ "# i = 0\n\t"
+ "mov r11, r4\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "mov %[ca], #0\n\t"
+ "# mu = a[i] * mp\n\t"
+ "mov %[mp], r8\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mul %[mp], %[a]\n\t"
+ "mov %[m], r14\n\t"
+ "mov r10, r9\n\t"
+ "\n2:\n\t"
+ "# a[i+j] += m[j] * mu\n\t"
+ "mov %[a], r10\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mov %[ca], #0\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "# Multiply m[j] and mu - Start\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r6, %[mp], #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add %[a], r7\n\t"
+ "adc r5, %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add %[a], r6\n\t"
+ "adc r5, r7\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r6, %[mp], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r5, r7\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add %[a], r6\n\t"
+ "adc r5, r7\n\t"
+ "# Multiply m[j] and mu - Done\n\t"
+ "add r4, %[a]\n\t"
+ "adc r5, %[ca]\n\t"
+ "mov %[a], r10\n\t"
+ "str r4, [%[a]]\n\t"
+ "mov r6, #4\n\t"
+ "add %[m], #4\n\t"
+ "add r10, r6\n\t"
+ "mov r4, #252\n\t"
+ "add r4, r9\n\t"
+ "cmp r10, r4\n\t"
+ "blt 2b\n\t"
+ "# a[i+63] += m[63] * mu\n\t"
+ "mov %[ca], #0\n\t"
+ "mov r4, r12\n\t"
+ "mov %[a], #0\n\t"
+ "# Multiply m[63] and mu - Start\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r6, %[mp], #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r5, r7\n\t"
+ "adc r4, %[ca]\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r5, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r6, %[mp], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r5, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "# Multiply m[63] and mu - Done\n\t"
+ "mov %[ca], %[a]\n\t"
+ "mov %[a], r10\n\t"
+ "ldr r7, [%[a], #4]\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mov r6, #0\n\t"
+ "add r5, %[a]\n\t"
+ "adc r7, r4\n\t"
+ "adc %[ca], r6\n\t"
+ "mov %[a], r10\n\t"
+ "str r5, [%[a]]\n\t"
+ "str r7, [%[a], #4]\n\t"
+ "# i += 1\n\t"
+ "mov r6, #4\n\t"
+ "add r9, r6\n\t"
+ "add r11, r6\n\t"
+ "mov r12, %[ca]\n\t"
+ "mov %[a], r9\n\t"
+ "mov r4, #1\n\t"
+ "lsl r4, r4, #8\n\t"
+ "cmp r11, r4\n\t"
+ "blt 1b\n\t"
+ "mov %[m], r14\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_2048_mul_64(r, a, b);
+ sp_2048_mont_reduce_64(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_2048_sqr_64(r, a);
+ sp_2048_mont_reduce_64(r, m, mp);
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r5, %[div], #1\n\t"
+ "add r5, #1\n\t"
+ "mov r8, %[d0]\n\t"
+ "mov r9, %[d1]\n\t"
+ "# Do top 32\n\t"
+ "mov r6, r5\n\t"
+ "sub r6, %[d1]\n\t"
+ "sbc r6, r6\n\t"
+ "add %[r], %[r]\n\t"
+ "sub %[r], r6\n\t"
+ "and r6, r5\n\t"
+ "sub %[d1], r6\n\t"
+ "# Next 30 bits\n\t"
+ "mov r4, #29\n\t"
+ "1:\n\t"
+ "lsl %[d0], %[d0], #1\n\t"
+ "adc %[d1], %[d1]\n\t"
+ "mov r6, r5\n\t"
+ "sub r6, %[d1]\n\t"
+ "sbc r6, r6\n\t"
+ "add %[r], %[r]\n\t"
+ "sub %[r], r6\n\t"
+ "and r6, r5\n\t"
+ "sub %[d1], r6\n\t"
+ "sub r4, #1\n\t"
+ "bpl 1b\n\t"
+ "mov r7, #0\n\t"
+ "add %[r], %[r]\n\t"
+ "add %[r], #1\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "sub %[d1], r4\n\t"
+ "mov r4, %[d1]\n\t"
+ "mov %[d1], r9\n\t"
+ "sbc %[d1], r5\n\t"
+ "mov r5, %[d1]\n\t"
+ "add %[r], r5\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "mov r6, r9\n\t"
+ "sub r4, %[d1], r4\n\t"
+ "sbc r6, r5\n\t"
+ "mov r5, r6\n\t"
+ "add %[r], r5\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "mov r6, r9\n\t"
+ "sub r4, %[d1], r4\n\t"
+ "sbc r6, r5\n\t"
+ "mov r5, r6\n\t"
+ "add %[r], r5\n\t"
+ "mov r6, %[div]\n\t"
+ "sub r6, r4\n\t"
+ "sbc r6, r6\n\t"
+ "sub %[r], r6\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r7", "r6", "r8", "r9"
+ );
+ return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<64; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 64; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_2048_cmp_64(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+
+
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mvn r3, r3\n\t"
+ "mov r6, #252\n\t"
+ "1:\n\t"
+ "ldr r7, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r7, r3\n\t"
+ "and r5, r3\n\t"
+ "mov r4, r7\n\t"
+ "sub r7, r5\n\t"
+ "sbc r7, r7\n\t"
+ "add %[r], r7\n\t"
+ "mvn r7, r7\n\t"
+ "and r3, r7\n\t"
+ "sub r5, r4\n\t"
+ "sbc r7, r7\n\t"
+ "sub %[r], r7\n\t"
+ "mvn r7, r7\n\t"
+ "and r3, r7\n\t"
+ "sub r6, #4\n\t"
+ "cmp r6, #0\n\t"
+ "bge 1b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+
+ return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[128], t2[65];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[63];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
+ for (i=63; i>=0; i--) {
+ r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div);
+
+ sp_2048_mul_d_64(t2, d, r1);
+ t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
+ t1[64 + i] -= t2[64];
+ sp_2048_mask_64(t2, d, t1[64 + i]);
+ t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
+ sp_2048_mask_64(t2, d, t1[64 + i]);
+ t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_2048_cmp_64(t1, d) >= 0;
+ sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_2048_div_64(a, m, NULL, r);
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[128], t2[65];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[63];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
+ for (i=63; i>=0; i--) {
+ r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div);
+
+ sp_2048_mul_d_64(t2, d, r1);
+ t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
+ t1[64 + i] -= t2[64];
+ if (t1[64 + i] != 0) {
+ t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
+ if (t1[64 + i] != 0)
+ t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
+ }
+ }
+
+ r1 = sp_2048_cmp_64(t1, d) >= 0;
+ sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_2048_div_64_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+ defined(WOLFSSL_HAVE_SP_DH)
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[16][128];
+#else
+ sp_digit* t[16];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 128, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<16; i++) {
+ t[i] = td + i * 128;
+ }
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_64(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
+ if (reduceA != 0) {
+ err = sp_2048_mod_64(t[1] + 64, a, m);
+ if (err == MP_OKAY) {
+ err = sp_2048_mod_64(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
+ err = sp_2048_mod_64(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_64(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_64(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_64(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 4;
+ if (c == 32) {
+ c = 28;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
+ for (; i>=0 || c>=4; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 28;
+ n <<= 4;
+ c = 28;
+ }
+ else if (c < 4) {
+ y = n >> 28;
+ n = e[i--];
+ c = 4 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+ }
+
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+
+ sp_2048_mont_mul_64(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+ sp_2048_mont_reduce_64(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
+ sp_2048_cond_sub_64(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][128];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 128, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++) {
+ t[i] = td + i * 128;
+ }
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_64(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
+ if (reduceA != 0) {
+ err = sp_2048_mod_64(t[1] + 64, a, m);
+ if (err == MP_OKAY) {
+ err = sp_2048_mod_64(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
+ err = sp_2048_mod_64(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_64(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_64(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_64(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
+ sp_2048_mont_sqr_64(t[16], t[ 8], m, mp);
+ sp_2048_mont_mul_64(t[17], t[ 9], t[ 8], m, mp);
+ sp_2048_mont_sqr_64(t[18], t[ 9], m, mp);
+ sp_2048_mont_mul_64(t[19], t[10], t[ 9], m, mp);
+ sp_2048_mont_sqr_64(t[20], t[10], m, mp);
+ sp_2048_mont_mul_64(t[21], t[11], t[10], m, mp);
+ sp_2048_mont_sqr_64(t[22], t[11], m, mp);
+ sp_2048_mont_mul_64(t[23], t[12], t[11], m, mp);
+ sp_2048_mont_sqr_64(t[24], t[12], m, mp);
+ sp_2048_mont_mul_64(t[25], t[13], t[12], m, mp);
+ sp_2048_mont_sqr_64(t[26], t[13], m, mp);
+ sp_2048_mont_mul_64(t[27], t[14], t[13], m, mp);
+ sp_2048_mont_sqr_64(t[28], t[14], m, mp);
+ sp_2048_mont_mul_64(t[29], t[15], t[14], m, mp);
+ sp_2048_mont_sqr_64(t[30], t[15], m, mp);
+ sp_2048_mont_mul_64(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+
+ sp_2048_mont_mul_64(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+ sp_2048_mont_reduce_64(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
+ sp_2048_cond_sub_64(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[128], m[64], r[128];
+#else
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+#endif
+ sp_digit *ah;
+ sp_digit e[1];
+ int err = MP_OKAY;
+
+ if (*outLen < 256)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 256 ||
+ mp_count_bits(mm) != 2048))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 64 * 2;
+ m = r + 64 * 2;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ ah = a + 64;
+
+ sp_2048_from_bin(ah, 64, in, inLen);
+#if DIGIT_BIT >= 32
+ e[0] = em->dp[0];
+#else
+ e[0] = em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(m, 64, mm);
+
+ if (e[0] == 0x3) {
+ if (err == MP_OKAY) {
+ sp_2048_sqr_64(r, ah);
+ err = sp_2048_mod_64_cond(r, r, m);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_mul_64(r, ah, r);
+ err = sp_2048_mod_64_cond(r, r, m);
+ }
+ }
+ else {
+ int i;
+ sp_digit mp;
+
+ sp_2048_mont_setup(m, &mp);
+
+ /* Convert to Montgomery form. */
+ XMEMSET(a, 0, sizeof(sp_digit) * 64);
+ err = sp_2048_mod_64_cond(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i = 31; i >= 0; i--) {
+ if (e[0] >> i) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 64);
+ for (i--; i>=0; i--) {
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ if (((e[0] >> i) & 1) == 1) {
+ sp_2048_mont_mul_64(r, r, a, m, mp);
+ }
+ }
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
+ sp_2048_mont_reduce_64(r, m, mp);
+
+ for (i = 63; i > 0; i--) {
+ if (r[i] != m[i]) {
+ break;
+ }
+ }
+ if (r[i] >= m[i]) {
+ sp_2048_sub_in_place_64(r, m);
+ }
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#endif
+
+ return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+ sp_digit* a;
+ sp_digit* d = NULL;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 256U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 2048) {
+ err = MP_READ_E;
+ }
+ if (inLen > 256) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = d + 64;
+ m = a + 128;
+ r = a;
+
+ sp_2048_from_bin(a, 64, in, inLen);
+ sp_2048_from_mp(d, 64, dm);
+ sp_2048_from_mp(m, 64, mm);
+ err = sp_2048_mod_exp_64(r, a, d, 2048, m, 0);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 64);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #128\n\t"
+ "mov r8, r5\n\t"
+ "mov r7, #0\n\t"
+ "1:\n\t"
+ "ldr r6, [%[b], r7]\n\t"
+ "and r6, %[m]\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, #1\n\t"
+ "add r5, %[c]\n\t"
+ "ldr r5, [%[a], r7]\n\t"
+ "adc r5, r6\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ "str r5, [%[r], r7]\n\t"
+ "add r7, #4\n\t"
+ "cmp r7, r8\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r7", "r8"
+ );
+
+ return c;
+}
+
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[64 * 2];
+ sp_digit p[32], q[32], dp[32];
+ sp_digit tmpa[64], tmpb[64];
+#else
+ sp_digit* t = NULL;
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+#endif
+ sp_digit* r;
+ sp_digit* qi;
+ sp_digit* dq;
+ sp_digit c;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 256)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL)
+ err = MEMORY_E;
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 64 * 2;
+ q = p + 32;
+ qi = dq = dp = q + 32;
+ tmpa = qi + 32;
+ tmpb = tmpa + 64;
+
+ r = t + 64;
+ }
+#else
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ r = a;
+ qi = dq = dp;
+#endif
+ sp_2048_from_bin(a, 64, in, inLen);
+ sp_2048_from_mp(p, 32, pm);
+ sp_2048_from_mp(q, 32, qm);
+ sp_2048_from_mp(dp, 32, dpm);
+
+ err = sp_2048_mod_exp_32(tmpa, a, dp, 1024, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(dq, 32, dqm);
+ err = sp_2048_mod_exp_32(tmpb, a, dq, 1024, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ c = sp_2048_sub_in_place_32(tmpa, tmpb);
+ c += sp_2048_cond_add_32(tmpa, tmpa, p, c);
+ sp_2048_cond_add_32(tmpa, tmpa, p, c);
+
+ sp_2048_from_mp(qi, 32, qim);
+ sp_2048_mul_32(tmpa, tmpa, qi);
+ err = sp_2048_mod_32(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mul_32(tmpa, q, tmpa);
+ XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32);
+ sp_2048_add_64(r, tmpb, tmpa);
+
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 32 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+#else
+ XMEMSET(tmpa, 0, sizeof(tmpa));
+ XMEMSET(tmpb, 0, sizeof(tmpb));
+ XMEMSET(p, 0, sizeof(p));
+ XMEMSET(q, 0, sizeof(q));
+ XMEMSET(dp, 0, sizeof(dp));
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_2048_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 64);
+ r->used = 64;
+ mp_clamp(r);
+#elif DIGIT_BIT < 32
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 64; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 32) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 32 - s;
+ }
+ r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 64; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 32 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 32 - s;
+ }
+ else {
+ s += 32;
+ }
+ }
+ r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[128], e[64], m[64];
+ sp_digit* r = b;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 2048) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 64, base);
+ sp_2048_from_mp(e, 64, exp);
+ sp_2048_from_mp(m, 64, mod);
+
+ err = sp_2048_mod_exp_64(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_2048_to_mp(r, res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_2048
+static void sp_2048_lshift_64(sp_digit* r, sp_digit* a, byte n)
+{
+ __asm__ __volatile__ (
+ "mov r6, #31\n\t"
+ "sub r6, r6, %[n]\n\t"
+ "add %[a], %[a], #192\n\t"
+ "add %[r], %[r], #192\n\t"
+ "ldr r3, [%[a], #60]\n\t"
+ "lsr r4, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r4, r4, r6\n\t"
+ "ldr r2, [%[a], #56]\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "str r2, [%[r], #56]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #44]\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "str r2, [%[r], #44]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "str r2, [%[r], #32]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #20]\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #12]\n\t"
+ "str r2, [%[r], #20]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #8]\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "str r2, [%[r], #8]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r2, [%[a], #60]\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "str r3, [%[r], #64]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #52]\n\t"
+ "str r2, [%[r], #60]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #48]\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "str r3, [%[r], #52]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "str r2, [%[r], #48]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #36]\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "str r3, [%[r], #40]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #28]\n\t"
+ "str r2, [%[r], #36]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #24]\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "str r3, [%[r], #28]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "str r2, [%[r], #24]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #12]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "str r2, [%[r], #12]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #0]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "str r3, [%[r], #68]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "str r2, [%[r], #64]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #52]\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "str r3, [%[r], #56]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #44]\n\t"
+ "str r2, [%[r], #52]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #40]\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "str r3, [%[r], #44]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "str r2, [%[r], #40]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #28]\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "str r3, [%[r], #32]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #20]\n\t"
+ "str r2, [%[r], #28]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #16]\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "str r3, [%[r], #20]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "str r2, [%[r], #16]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #4]\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "str r3, [%[r], #8]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r3, [%[a], #60]\n\t"
+ "str r2, [%[r], #68]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #56]\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "str r2, [%[r], #56]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #44]\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "str r2, [%[r], #44]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "str r2, [%[r], #32]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #20]\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #12]\n\t"
+ "str r2, [%[r], #20]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #8]\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "str r2, [%[r], #8]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "str r3, [%[r]]\n\t"
+ "str r4, [%[r], #4]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+ : "memory", "r2", "r3", "r4", "r5", "r6"
+ );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits,
+ const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[128];
+ sp_digit td[65];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 128;
+#else
+ norm = nd;
+ tmp = td;
+#endif
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_64(norm, m);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ sp_2048_lshift_64(r, norm, y);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+
+ sp_2048_lshift_64(r, r, y);
+ sp_2048_mul_d_64(tmp, norm, r[64]);
+ r[64] = 0;
+ o = sp_2048_add_64(r, r, tmp);
+ sp_2048_cond_sub_64(r, r, m, (sp_digit)0 - o);
+ }
+
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+ sp_2048_mont_reduce_64(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
+ sp_2048_cond_sub_64(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* HAVE_FFDHE_2048 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+ int err = MP_OKAY;
+ sp_digit b[128], e[64], m[64];
+ sp_digit* r = b;
+ word32 i;
+
+ if (mp_count_bits(base) > 2048) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 256) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 64, base);
+ sp_2048_from_bin(e, 64, exp, expLen);
+ sp_2048_from_mp(m, 64, mod);
+
+ #ifdef HAVE_FFDHE_2048
+ if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1)
+ err = sp_2048_mod_exp_2_64(r, e, expLen * 8, m);
+ else
+ #endif
+ err = sp_2048_mod_exp_64(r, b, e, expLen * 8, m, 0);
+
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ for (i=0; i<256 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[64], e[32], m[32];
+ sp_digit* r = b;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 1024) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 1024) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 1024) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 32, base);
+ sp_2048_from_mp(e, 32, exp);
+ sp_2048_from_mp(m, 32, mod);
+
+ err = sp_2048_mod_exp_32(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(r + 32, 0, sizeof(*r) * 32U);
+ err = sp_2048_to_mp(r, res);
+ res->used = mod->used;
+ mp_clamp(res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_2048 */
+
+#ifndef WOLFSSL_SP_NO_3072
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 24U) {
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 32
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 32U) <= (word32)DIGIT_BIT) {
+ s += 32U;
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 32) {
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 32 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 384
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_3072_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ j = 3072 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<96 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 32) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 32);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit tmp[12 * 2];
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r8, r3\n\t"
+ "mov r11, %[r]\n\t"
+ "mov r9, %[a]\n\t"
+ "mov r10, %[b]\n\t"
+ "mov r6, #48\n\t"
+ "add r6, r9\n\t"
+ "mov r12, r6\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #44\n\t"
+ "mov %[a], r8\n\t"
+ "sub %[a], r6\n\t"
+ "sbc r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], r6\n\t"
+ "mov %[b], r8\n\t"
+ "sub %[b], %[a]\n\t"
+ "add %[a], r9\n\t"
+ "add %[b], r10\n\t"
+ "\n2:\n\t"
+ "# Multiply Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Multiply Done\n\t"
+ "add %[a], #4\n\t"
+ "sub %[b], #4\n\t"
+ "cmp %[a], r12\n\t"
+ "beq 3f\n\t"
+ "mov r6, r8\n\t"
+ "add r6, r9\n\t"
+ "cmp %[a], r6\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r11\n\t"
+ "mov r7, r8\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add r7, #4\n\t"
+ "mov r8, r7\n\t"
+ "mov r6, #88\n\t"
+ "cmp r7, r6\n\t"
+ "ble 1b\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov %[a], r9\n\t"
+ "mov %[b], r10\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r8, r3\n\t"
+ "mov r11, %[r]\n\t"
+ "mov r6, #96\n\t"
+ "neg r6, r6\n\t"
+ "add sp, r6\n\t"
+ "mov r10, sp\n\t"
+ "mov r9, %[a]\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r6, #44\n\t"
+ "mov %[a], r8\n\t"
+ "sub %[a], r6\n\t"
+ "sbc r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], r6\n\t"
+ "mov r2, r8\n\t"
+ "sub r2, %[a]\n\t"
+ "add %[a], r9\n\t"
+ "add r2, r9\n\t"
+ "\n2:\n\t"
+ "cmp r2, %[a]\n\t"
+ "beq 4f\n\t"
+ "# Multiply * 2: Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Multiply * 2: Done\n\t"
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ "# Square: Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul r6, r6\n\t"
+ "add r3, r6\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "mul r7, r7\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #15\n\t"
+ "lsl r6, r6, #17\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Square: Done\n\t"
+ "\n5:\n\t"
+ "add %[a], #4\n\t"
+ "sub r2, #4\n\t"
+ "mov r6, #48\n\t"
+ "add r6, r9\n\t"
+ "cmp %[a], r6\n\t"
+ "beq 3f\n\t"
+ "cmp %[a], r2\n\t"
+ "bgt 3f\n\t"
+ "mov r7, r8\n\t"
+ "add r7, r9\n\t"
+ "cmp %[a], r7\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r10\n\t"
+ "mov r7, r8\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r7, #4\n\t"
+ "mov r8, r7\n\t"
+ "mov r6, #88\n\t"
+ "cmp r7, r6\n\t"
+ "ble 1b\n\t"
+ "mov %[a], r9\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov %[r], r11\n\t"
+ "mov %[a], r10\n\t"
+ "mov r3, #92\n\t"
+ "\n4:\n\t"
+ "ldr r6, [%[a], r3]\n\t"
+ "str r6, [%[r], r3]\n\t"
+ "sub r3, #4\n\t"
+ "bge 4b\n\t"
+ "mov r6, #96\n\t"
+ "add sp, r6\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+ );
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "add r4, r5\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5"
+ );
+
+ return c;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_sub_in_place_24(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sub r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #0]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r6, [%[b], #12]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #8]\n\t"
+ "str r4, [%[a], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r6, [%[b], #20]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #16]\n\t"
+ "str r4, [%[a], #20]\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r6, [%[b], #28]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #24]\n\t"
+ "str r4, [%[a], #28]\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r6, [%[b], #36]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #32]\n\t"
+ "str r4, [%[a], #36]\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r6, [%[b], #44]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #40]\n\t"
+ "str r4, [%[a], #44]\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r6, [%[b], #52]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #48]\n\t"
+ "str r4, [%[a], #52]\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r6, [%[b], #60]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #56]\n\t"
+ "str r4, [%[a], #60]\n\t"
+ "ldr r3, [%[a], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "ldr r6, [%[b], #68]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #64]\n\t"
+ "str r4, [%[a], #68]\n\t"
+ "ldr r3, [%[a], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "ldr r6, [%[b], #76]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #72]\n\t"
+ "str r4, [%[a], #76]\n\t"
+ "ldr r3, [%[a], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "ldr r6, [%[b], #84]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #80]\n\t"
+ "str r4, [%[a], #84]\n\t"
+ "ldr r3, [%[a], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "ldr r6, [%[b], #92]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #88]\n\t"
+ "str r4, [%[a], #92]\n\t"
+ "sbc %[c], %[c]\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "add r4, r5\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #68]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #76]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #84]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #92]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #92]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5"
+ );
+
+ return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<12; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ r[0] = a[0] & m;
+ r[1] = a[1] & m;
+ r[2] = a[2] & m;
+ r[3] = a[3] & m;
+ r[4] = a[4] & m;
+ r[5] = a[5] & m;
+ r[6] = a[6] & m;
+ r[7] = a[7] & m;
+ r[8] = a[8] & m;
+ r[9] = a[9] & m;
+ r[10] = a[10] & m;
+ r[11] = a[11] & m;
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[24];
+ sp_digit a1[12];
+ sp_digit b1[12];
+ sp_digit z2[24];
+ sp_digit u, ca, cb;
+
+ ca = sp_3072_add_12(a1, a, &a[12]);
+ cb = sp_3072_add_12(b1, b, &b[12]);
+ u = ca & cb;
+ sp_3072_mul_12(z1, a1, b1);
+ sp_3072_mul_12(z2, &a[12], &b[12]);
+ sp_3072_mul_12(z0, a, b);
+ sp_3072_mask_12(r + 24, a1, 0 - cb);
+ sp_3072_mask_12(b1, b1, 0 - ca);
+ u += sp_3072_add_12(r + 24, r + 24, b1);
+ u += sp_3072_sub_in_place_24(z1, z2);
+ u += sp_3072_sub_in_place_24(z1, z0);
+ u += sp_3072_add_24(r + 12, r + 12, z1);
+ r[36] = u;
+ XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
+ (void)sp_3072_add_24(r + 24, r + 24, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[24];
+ sp_digit z1[24];
+ sp_digit a1[12];
+ sp_digit u;
+
+ u = sp_3072_add_12(a1, a, &a[12]);
+ sp_3072_sqr_12(z1, a1);
+ sp_3072_sqr_12(z2, &a[12]);
+ sp_3072_sqr_12(z0, a);
+ sp_3072_mask_12(r + 24, a1, 0 - u);
+ u += sp_3072_add_12(r + 24, r + 24, r + 24);
+ u += sp_3072_sub_in_place_24(z1, z2);
+ u += sp_3072_sub_in_place_24(z1, z0);
+ u += sp_3072_add_24(r + 12, r + 12, z1);
+ r[36] = u;
+ XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
+ (void)sp_3072_add_24(r + 24, r + 24, z2);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sub r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #0]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r6, [%[b], #12]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #8]\n\t"
+ "str r4, [%[a], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r6, [%[b], #20]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #16]\n\t"
+ "str r4, [%[a], #20]\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r6, [%[b], #28]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #24]\n\t"
+ "str r4, [%[a], #28]\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r6, [%[b], #36]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #32]\n\t"
+ "str r4, [%[a], #36]\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r6, [%[b], #44]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #40]\n\t"
+ "str r4, [%[a], #44]\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r6, [%[b], #52]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #48]\n\t"
+ "str r4, [%[a], #52]\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r6, [%[b], #60]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #56]\n\t"
+ "str r4, [%[a], #60]\n\t"
+ "ldr r3, [%[a], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "ldr r6, [%[b], #68]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #64]\n\t"
+ "str r4, [%[a], #68]\n\t"
+ "ldr r3, [%[a], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "ldr r6, [%[b], #76]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #72]\n\t"
+ "str r4, [%[a], #76]\n\t"
+ "ldr r3, [%[a], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "ldr r6, [%[b], #84]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #80]\n\t"
+ "str r4, [%[a], #84]\n\t"
+ "ldr r3, [%[a], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "ldr r6, [%[b], #92]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #88]\n\t"
+ "str r4, [%[a], #92]\n\t"
+ "ldr r3, [%[a], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "ldr r6, [%[b], #100]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #96]\n\t"
+ "str r4, [%[a], #100]\n\t"
+ "ldr r3, [%[a], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "ldr r6, [%[b], #108]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #104]\n\t"
+ "str r4, [%[a], #108]\n\t"
+ "ldr r3, [%[a], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "ldr r6, [%[b], #116]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #112]\n\t"
+ "str r4, [%[a], #116]\n\t"
+ "ldr r3, [%[a], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "ldr r6, [%[b], #124]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #120]\n\t"
+ "str r4, [%[a], #124]\n\t"
+ "sbc %[c], %[c]\n\t"
+ "add %[a], #0x80\n\t"
+ "add %[b], #0x80\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #0]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r6, [%[b], #12]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #8]\n\t"
+ "str r4, [%[a], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r6, [%[b], #20]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #16]\n\t"
+ "str r4, [%[a], #20]\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r6, [%[b], #28]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #24]\n\t"
+ "str r4, [%[a], #28]\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r6, [%[b], #36]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #32]\n\t"
+ "str r4, [%[a], #36]\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r6, [%[b], #44]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #40]\n\t"
+ "str r4, [%[a], #44]\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r6, [%[b], #52]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #48]\n\t"
+ "str r4, [%[a], #52]\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r6, [%[b], #60]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #56]\n\t"
+ "str r4, [%[a], #60]\n\t"
+ "sbc %[c], %[c]\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r7, #0\n\t"
+ "mvn r7, r7\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "add r4, r5\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #68]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #76]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #84]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #92]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #100]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #108]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #116]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #124]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #124]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ "add %[a], #0x80\n\t"
+ "add %[b], #0x80\n\t"
+ "add %[r], #0x80\n\t"
+ "add %[c], r7\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r7"
+ );
+
+ return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<24; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 24; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[48];
+ sp_digit a1[24];
+ sp_digit b1[24];
+ sp_digit z2[48];
+ sp_digit u, ca, cb;
+
+ ca = sp_3072_add_24(a1, a, &a[24]);
+ cb = sp_3072_add_24(b1, b, &b[24]);
+ u = ca & cb;
+ sp_3072_mul_24(z1, a1, b1);
+ sp_3072_mul_24(z2, &a[24], &b[24]);
+ sp_3072_mul_24(z0, a, b);
+ sp_3072_mask_24(r + 48, a1, 0 - cb);
+ sp_3072_mask_24(b1, b1, 0 - ca);
+ u += sp_3072_add_24(r + 48, r + 48, b1);
+ u += sp_3072_sub_in_place_48(z1, z2);
+ u += sp_3072_sub_in_place_48(z1, z0);
+ u += sp_3072_add_48(r + 24, r + 24, z1);
+ r[72] = u;
+ XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
+ (void)sp_3072_add_48(r + 48, r + 48, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[48];
+ sp_digit z1[48];
+ sp_digit a1[24];
+ sp_digit u;
+
+ u = sp_3072_add_24(a1, a, &a[24]);
+ sp_3072_sqr_24(z1, a1);
+ sp_3072_sqr_24(z2, &a[24]);
+ sp_3072_sqr_24(z0, a);
+ sp_3072_mask_24(r + 48, a1, 0 - u);
+ u += sp_3072_add_24(r + 48, r + 48, r + 48);
+ u += sp_3072_sub_in_place_48(z1, z2);
+ u += sp_3072_sub_in_place_48(z1, z0);
+ u += sp_3072_add_48(r + 24, r + 24, z1);
+ r[72] = u;
+ XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
+ (void)sp_3072_add_48(r + 48, r + 48, z2);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sub r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #0]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r6, [%[b], #12]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #8]\n\t"
+ "str r4, [%[a], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r6, [%[b], #20]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #16]\n\t"
+ "str r4, [%[a], #20]\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r6, [%[b], #28]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #24]\n\t"
+ "str r4, [%[a], #28]\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r6, [%[b], #36]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #32]\n\t"
+ "str r4, [%[a], #36]\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r6, [%[b], #44]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #40]\n\t"
+ "str r4, [%[a], #44]\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r6, [%[b], #52]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #48]\n\t"
+ "str r4, [%[a], #52]\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r6, [%[b], #60]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #56]\n\t"
+ "str r4, [%[a], #60]\n\t"
+ "ldr r3, [%[a], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "ldr r6, [%[b], #68]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #64]\n\t"
+ "str r4, [%[a], #68]\n\t"
+ "ldr r3, [%[a], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "ldr r6, [%[b], #76]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #72]\n\t"
+ "str r4, [%[a], #76]\n\t"
+ "ldr r3, [%[a], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "ldr r6, [%[b], #84]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #80]\n\t"
+ "str r4, [%[a], #84]\n\t"
+ "ldr r3, [%[a], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "ldr r6, [%[b], #92]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #88]\n\t"
+ "str r4, [%[a], #92]\n\t"
+ "ldr r3, [%[a], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "ldr r6, [%[b], #100]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #96]\n\t"
+ "str r4, [%[a], #100]\n\t"
+ "ldr r3, [%[a], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "ldr r6, [%[b], #108]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #104]\n\t"
+ "str r4, [%[a], #108]\n\t"
+ "ldr r3, [%[a], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "ldr r6, [%[b], #116]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #112]\n\t"
+ "str r4, [%[a], #116]\n\t"
+ "ldr r3, [%[a], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "ldr r6, [%[b], #124]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #120]\n\t"
+ "str r4, [%[a], #124]\n\t"
+ "sbc %[c], %[c]\n\t"
+ "add %[a], #0x80\n\t"
+ "add %[b], #0x80\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #0]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r6, [%[b], #12]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #8]\n\t"
+ "str r4, [%[a], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r6, [%[b], #20]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #16]\n\t"
+ "str r4, [%[a], #20]\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r6, [%[b], #28]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #24]\n\t"
+ "str r4, [%[a], #28]\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r6, [%[b], #36]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #32]\n\t"
+ "str r4, [%[a], #36]\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r6, [%[b], #44]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #40]\n\t"
+ "str r4, [%[a], #44]\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r6, [%[b], #52]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #48]\n\t"
+ "str r4, [%[a], #52]\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r6, [%[b], #60]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #56]\n\t"
+ "str r4, [%[a], #60]\n\t"
+ "ldr r3, [%[a], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "ldr r6, [%[b], #68]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #64]\n\t"
+ "str r4, [%[a], #68]\n\t"
+ "ldr r3, [%[a], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "ldr r6, [%[b], #76]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #72]\n\t"
+ "str r4, [%[a], #76]\n\t"
+ "ldr r3, [%[a], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "ldr r6, [%[b], #84]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #80]\n\t"
+ "str r4, [%[a], #84]\n\t"
+ "ldr r3, [%[a], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "ldr r6, [%[b], #92]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #88]\n\t"
+ "str r4, [%[a], #92]\n\t"
+ "ldr r3, [%[a], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "ldr r6, [%[b], #100]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #96]\n\t"
+ "str r4, [%[a], #100]\n\t"
+ "ldr r3, [%[a], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "ldr r6, [%[b], #108]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #104]\n\t"
+ "str r4, [%[a], #108]\n\t"
+ "ldr r3, [%[a], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "ldr r6, [%[b], #116]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #112]\n\t"
+ "str r4, [%[a], #116]\n\t"
+ "ldr r3, [%[a], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "ldr r6, [%[b], #124]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #120]\n\t"
+ "str r4, [%[a], #124]\n\t"
+ "sbc %[c], %[c]\n\t"
+ "add %[a], #0x80\n\t"
+ "add %[b], #0x80\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #0]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r6, [%[b], #12]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #8]\n\t"
+ "str r4, [%[a], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r6, [%[b], #20]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #16]\n\t"
+ "str r4, [%[a], #20]\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r6, [%[b], #28]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #24]\n\t"
+ "str r4, [%[a], #28]\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r6, [%[b], #36]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #32]\n\t"
+ "str r4, [%[a], #36]\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r6, [%[b], #44]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #40]\n\t"
+ "str r4, [%[a], #44]\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r6, [%[b], #52]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #48]\n\t"
+ "str r4, [%[a], #52]\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r6, [%[b], #60]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #56]\n\t"
+ "str r4, [%[a], #60]\n\t"
+ "ldr r3, [%[a], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "ldr r6, [%[b], #68]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #64]\n\t"
+ "str r4, [%[a], #68]\n\t"
+ "ldr r3, [%[a], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "ldr r6, [%[b], #76]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #72]\n\t"
+ "str r4, [%[a], #76]\n\t"
+ "ldr r3, [%[a], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "ldr r6, [%[b], #84]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #80]\n\t"
+ "str r4, [%[a], #84]\n\t"
+ "ldr r3, [%[a], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "ldr r6, [%[b], #92]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #88]\n\t"
+ "str r4, [%[a], #92]\n\t"
+ "ldr r3, [%[a], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "ldr r6, [%[b], #100]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #96]\n\t"
+ "str r4, [%[a], #100]\n\t"
+ "ldr r3, [%[a], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "ldr r6, [%[b], #108]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #104]\n\t"
+ "str r4, [%[a], #108]\n\t"
+ "ldr r3, [%[a], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "ldr r6, [%[b], #116]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #112]\n\t"
+ "str r4, [%[a], #116]\n\t"
+ "ldr r3, [%[a], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "ldr r6, [%[b], #124]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #120]\n\t"
+ "str r4, [%[a], #124]\n\t"
+ "sbc %[c], %[c]\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r7, #0\n\t"
+ "mvn r7, r7\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "add r4, r5\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #68]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #76]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #84]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #92]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #100]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #108]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #116]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #124]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #124]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ "add %[a], #0x80\n\t"
+ "add %[b], #0x80\n\t"
+ "add %[r], #0x80\n\t"
+ "add %[c], r7\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #68]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #76]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #84]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #92]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #100]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #108]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #116]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #124]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #124]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ "add %[a], #0x80\n\t"
+ "add %[b], #0x80\n\t"
+ "add %[r], #0x80\n\t"
+ "add %[c], r7\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #68]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #76]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #84]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #92]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #100]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #108]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #116]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #124]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #124]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r7"
+ );
+
+ return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<48; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 48; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[96];
+ sp_digit a1[48];
+ sp_digit b1[48];
+ sp_digit z2[96];
+ sp_digit u, ca, cb;
+
+ ca = sp_3072_add_48(a1, a, &a[48]);
+ cb = sp_3072_add_48(b1, b, &b[48]);
+ u = ca & cb;
+ sp_3072_mul_48(z1, a1, b1);
+ sp_3072_mul_48(z2, &a[48], &b[48]);
+ sp_3072_mul_48(z0, a, b);
+ sp_3072_mask_48(r + 96, a1, 0 - cb);
+ sp_3072_mask_48(b1, b1, 0 - ca);
+ u += sp_3072_add_48(r + 96, r + 96, b1);
+ u += sp_3072_sub_in_place_96(z1, z2);
+ u += sp_3072_sub_in_place_96(z1, z0);
+ u += sp_3072_add_96(r + 48, r + 48, z1);
+ r[144] = u;
+ XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
+ (void)sp_3072_add_96(r + 96, r + 96, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[96];
+ sp_digit z1[96];
+ sp_digit a1[48];
+ sp_digit u;
+
+ u = sp_3072_add_48(a1, a, &a[48]);
+ sp_3072_sqr_48(z1, a1);
+ sp_3072_sqr_48(z2, &a[48]);
+ sp_3072_sqr_48(z0, a);
+ sp_3072_mask_48(r + 96, a1, 0 - u);
+ u += sp_3072_add_48(r + 96, r + 96, r + 96);
+ u += sp_3072_sub_in_place_96(z1, z2);
+ u += sp_3072_sub_in_place_96(z1, z0);
+ u += sp_3072_add_96(r + 48, r + 48, z1);
+ r[144] = u;
+ XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
+ (void)sp_3072_add_96(r + 96, r + 96, z2);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r6, %[a]\n\t"
+ "mov r7, #0\n\t"
+ "mov r4, #1\n\t"
+ "lsl r4, #8\n\t"
+ "add r4, #128\n\t"
+ "sub r7, #1\n\t"
+ "add r6, r4\n\t"
+ "\n1:\n\t"
+ "add %[c], r7\n\t"
+ "ldr r4, [%[a]]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ "add %[a], #4\n\t"
+ "add %[b], #4\n\t"
+ "add %[r], #4\n\t"
+ "cmp %[a], r6\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r7"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+ __asm__ __volatile__ (
+ "mov r7, %[a]\n\t"
+ "mov r5, #1\n\t"
+ "lsl r5, #8\n\t"
+ "add r5, #128\n\t"
+ "add r7, r5\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a]]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "sbc %[c], %[c]\n\t"
+ "add %[a], #8\n\t"
+ "add %[b], #8\n\t"
+ "cmp %[a], r7\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r7"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit tmp[96 * 2];
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r8, r3\n\t"
+ "mov r11, %[r]\n\t"
+ "mov r9, %[a]\n\t"
+ "mov r10, %[b]\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #128\n\t"
+ "add r6, r9\n\t"
+ "mov r12, r6\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #124\n\t"
+ "mov %[a], r8\n\t"
+ "sub %[a], r6\n\t"
+ "sbc r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], r6\n\t"
+ "mov %[b], r8\n\t"
+ "sub %[b], %[a]\n\t"
+ "add %[a], r9\n\t"
+ "add %[b], r10\n\t"
+ "\n2:\n\t"
+ "# Multiply Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Multiply Done\n\t"
+ "add %[a], #4\n\t"
+ "sub %[b], #4\n\t"
+ "cmp %[a], r12\n\t"
+ "beq 3f\n\t"
+ "mov r6, r8\n\t"
+ "add r6, r9\n\t"
+ "cmp %[a], r6\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r11\n\t"
+ "mov r7, r8\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add r7, #4\n\t"
+ "mov r8, r7\n\t"
+ "mov r6, #2\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #248\n\t"
+ "cmp r7, r6\n\t"
+ "ble 1b\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov %[a], r9\n\t"
+ "mov %[b], r10\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r8, r3\n\t"
+ "mov r11, %[r]\n\t"
+ "mov r6, #3\n\t"
+ "lsl r6, r6, #8\n\t"
+ "neg r6, r6\n\t"
+ "add sp, r6\n\t"
+ "mov r10, sp\n\t"
+ "mov r9, %[a]\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #124\n\t"
+ "mov %[a], r8\n\t"
+ "sub %[a], r6\n\t"
+ "sbc r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], r6\n\t"
+ "mov r2, r8\n\t"
+ "sub r2, %[a]\n\t"
+ "add %[a], r9\n\t"
+ "add r2, r9\n\t"
+ "\n2:\n\t"
+ "cmp r2, %[a]\n\t"
+ "beq 4f\n\t"
+ "# Multiply * 2: Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Multiply * 2: Done\n\t"
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ "# Square: Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul r6, r6\n\t"
+ "add r3, r6\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "mul r7, r7\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #15\n\t"
+ "lsl r6, r6, #17\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Square: Done\n\t"
+ "\n5:\n\t"
+ "add %[a], #4\n\t"
+ "sub r2, #4\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #128\n\t"
+ "add r6, r9\n\t"
+ "cmp %[a], r6\n\t"
+ "beq 3f\n\t"
+ "cmp %[a], r2\n\t"
+ "bgt 3f\n\t"
+ "mov r7, r8\n\t"
+ "add r7, r9\n\t"
+ "cmp %[a], r7\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r10\n\t"
+ "mov r7, r8\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r7, #4\n\t"
+ "mov r8, r7\n\t"
+ "mov r6, #2\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #248\n\t"
+ "cmp r7, r6\n\t"
+ "ble 1b\n\t"
+ "mov %[a], r9\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov %[r], r11\n\t"
+ "mov %[a], r10\n\t"
+ "mov r3, #2\n\t"
+ "lsl r3, r3, #8\n\t"
+ "add r3, #252\n\t"
+ "\n4:\n\t"
+ "ldr r6, [%[a], r3]\n\t"
+ "str r6, [%[r], r3]\n\t"
+ "sub r3, #4\n\t"
+ "bge 4b\n\t"
+ "mov r6, #3\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add sp, r6\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#ifdef WOLFSSL_SP_SMALL
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+ int i;
+
+ for (i=0; i<48; i++) {
+ r[i] = a[i] & m;
+ }
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r6, %[a]\n\t"
+ "mov r7, #0\n\t"
+ "add r6, #192\n\t"
+ "sub r7, #1\n\t"
+ "\n1:\n\t"
+ "add %[c], r7\n\t"
+ "ldr r4, [%[a]]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ "add %[a], #4\n\t"
+ "add %[b], #4\n\t"
+ "add %[r], #4\n\t"
+ "cmp %[a], r6\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r7"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+ __asm__ __volatile__ (
+ "mov r7, %[a]\n\t"
+ "add r7, #192\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a]]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "sbc %[c], %[c]\n\t"
+ "add %[a], #8\n\t"
+ "add %[b], #8\n\t"
+ "cmp %[a], r7\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r7"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit tmp[48 * 2];
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r8, r3\n\t"
+ "mov r11, %[r]\n\t"
+ "mov r9, %[a]\n\t"
+ "mov r10, %[b]\n\t"
+ "mov r6, #192\n\t"
+ "add r6, r9\n\t"
+ "mov r12, r6\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #188\n\t"
+ "mov %[a], r8\n\t"
+ "sub %[a], r6\n\t"
+ "sbc r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], r6\n\t"
+ "mov %[b], r8\n\t"
+ "sub %[b], %[a]\n\t"
+ "add %[a], r9\n\t"
+ "add %[b], r10\n\t"
+ "\n2:\n\t"
+ "# Multiply Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Multiply Done\n\t"
+ "add %[a], #4\n\t"
+ "sub %[b], #4\n\t"
+ "cmp %[a], r12\n\t"
+ "beq 3f\n\t"
+ "mov r6, r8\n\t"
+ "add r6, r9\n\t"
+ "cmp %[a], r6\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r11\n\t"
+ "mov r7, r8\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add r7, #4\n\t"
+ "mov r8, r7\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #120\n\t"
+ "cmp r7, r6\n\t"
+ "ble 1b\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov %[a], r9\n\t"
+ "mov %[b], r10\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r8, r3\n\t"
+ "mov r11, %[r]\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #128\n\t"
+ "neg r6, r6\n\t"
+ "add sp, r6\n\t"
+ "mov r10, sp\n\t"
+ "mov r9, %[a]\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r6, #188\n\t"
+ "mov %[a], r8\n\t"
+ "sub %[a], r6\n\t"
+ "sbc r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], r6\n\t"
+ "mov r2, r8\n\t"
+ "sub r2, %[a]\n\t"
+ "add %[a], r9\n\t"
+ "add r2, r9\n\t"
+ "\n2:\n\t"
+ "cmp r2, %[a]\n\t"
+ "beq 4f\n\t"
+ "# Multiply * 2: Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Multiply * 2: Done\n\t"
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ "# Square: Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul r6, r6\n\t"
+ "add r3, r6\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "mul r7, r7\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #15\n\t"
+ "lsl r6, r6, #17\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Square: Done\n\t"
+ "\n5:\n\t"
+ "add %[a], #4\n\t"
+ "sub r2, #4\n\t"
+ "mov r6, #192\n\t"
+ "add r6, r9\n\t"
+ "cmp %[a], r6\n\t"
+ "beq 3f\n\t"
+ "cmp %[a], r2\n\t"
+ "bgt 3f\n\t"
+ "mov r7, r8\n\t"
+ "add r7, r9\n\t"
+ "cmp %[a], r7\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r10\n\t"
+ "mov r7, r8\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r7, #4\n\t"
+ "mov r8, r7\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #120\n\t"
+ "cmp r7, r6\n\t"
+ "ble 1b\n\t"
+ "mov %[a], r9\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov %[r], r11\n\t"
+ "mov %[a], r10\n\t"
+ "mov r3, #1\n\t"
+ "lsl r3, r3, #8\n\t"
+ "add r3, #124\n\t"
+ "\n4:\n\t"
+ "ldr r6, [%[a], r3]\n\t"
+ "str r6, [%[r], r3]\n\t"
+ "sub r3, #4\n\t"
+ "bge 4b\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #128\n\t"
+ "add sp, r6\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+
+ /* rho = -1/m mod b */
+ *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+ __asm__ __volatile__ (
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #128\n\t"
+ "add r6, %[a]\n\t"
+ "mov r8, %[r]\n\t"
+ "mov r9, r6\n\t"
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "# A[] * B\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, %[b], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "lsr r7, %[b], #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, %[b], #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "lsl r7, %[b], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# A[] * B - Done\n\t"
+ "mov %[r], r8\n\t"
+ "str r3, [%[r]]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add %[r], #4\n\t"
+ "add %[a], #4\n\t"
+ "mov r8, %[r]\n\t"
+ "cmp %[a], r9\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r]]\n\t"
+ : [r] "+r" (r), [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+ );
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 3072 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 48);
+
+ /* r = 2^n mod m */
+ sp_3072_sub_in_place_48(r, m);
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #192\n\t"
+ "mov r8, r5\n\t"
+ "mov r7, #0\n\t"
+ "1:\n\t"
+ "ldr r6, [%[b], r7]\n\t"
+ "and r6, %[m]\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r5, [%[a], r7]\n\t"
+ "sbc r5, r6\n\t"
+ "sbc %[c], %[c]\n\t"
+ "str r5, [%[r], r7]\n\t"
+ "add r7, #4\n\t"
+ "cmp r7, r8\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r7", "r8"
+ );
+
+ return c;
+}
+
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "mov r8, %[mp]\n\t"
+ "mov r12, %[ca]\n\t"
+ "mov r14, %[m]\n\t"
+ "mov r9, %[a]\n\t"
+ "mov r4, #0\n\t"
+ "# i = 0\n\t"
+ "mov r11, r4\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "mov %[ca], #0\n\t"
+ "# mu = a[i] * mp\n\t"
+ "mov %[mp], r8\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mul %[mp], %[a]\n\t"
+ "mov %[m], r14\n\t"
+ "mov r10, r9\n\t"
+ "\n2:\n\t"
+ "# a[i+j] += m[j] * mu\n\t"
+ "mov %[a], r10\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mov %[ca], #0\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "# Multiply m[j] and mu - Start\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r6, %[mp], #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add %[a], r7\n\t"
+ "adc r5, %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add %[a], r6\n\t"
+ "adc r5, r7\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r6, %[mp], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r5, r7\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add %[a], r6\n\t"
+ "adc r5, r7\n\t"
+ "# Multiply m[j] and mu - Done\n\t"
+ "add r4, %[a]\n\t"
+ "adc r5, %[ca]\n\t"
+ "mov %[a], r10\n\t"
+ "str r4, [%[a]]\n\t"
+ "mov r6, #4\n\t"
+ "add %[m], #4\n\t"
+ "add r10, r6\n\t"
+ "mov r4, #188\n\t"
+ "add r4, r9\n\t"
+ "cmp r10, r4\n\t"
+ "blt 2b\n\t"
+ "# a[i+47] += m[47] * mu\n\t"
+ "mov %[ca], #0\n\t"
+ "mov r4, r12\n\t"
+ "mov %[a], #0\n\t"
+ "# Multiply m[47] and mu - Start\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r6, %[mp], #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r5, r7\n\t"
+ "adc r4, %[ca]\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r5, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r6, %[mp], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r5, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "# Multiply m[47] and mu - Done\n\t"
+ "mov %[ca], %[a]\n\t"
+ "mov %[a], r10\n\t"
+ "ldr r7, [%[a], #4]\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mov r6, #0\n\t"
+ "add r5, %[a]\n\t"
+ "adc r7, r4\n\t"
+ "adc %[ca], r6\n\t"
+ "mov %[a], r10\n\t"
+ "str r5, [%[a]]\n\t"
+ "str r7, [%[a], #4]\n\t"
+ "# i += 1\n\t"
+ "mov r6, #4\n\t"
+ "add r9, r6\n\t"
+ "add r11, r6\n\t"
+ "mov r12, %[ca]\n\t"
+ "mov %[a], r9\n\t"
+ "mov r4, #192\n\t"
+ "cmp r11, r4\n\t"
+ "blt 1b\n\t"
+ "mov %[m], r14\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_3072_mul_48(r, a, b);
+ sp_3072_mont_reduce_48(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_3072_sqr_48(r, a);
+ sp_3072_mont_reduce_48(r, m, mp);
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+ __asm__ __volatile__ (
+ "mov r6, #192\n\t"
+ "add r6, %[a]\n\t"
+ "mov r8, %[r]\n\t"
+ "mov r9, r6\n\t"
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "# A[] * B\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, %[b], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "lsr r7, %[b], #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, %[b], #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "lsl r7, %[b], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# A[] * B - Done\n\t"
+ "mov %[r], r8\n\t"
+ "str r3, [%[r]]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add %[r], #4\n\t"
+ "add %[a], #4\n\t"
+ "mov r8, %[r]\n\t"
+ "cmp %[a], r9\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r]]\n\t"
+ : [r] "+r" (r), [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+ );
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r5, %[div], #1\n\t"
+ "add r5, #1\n\t"
+ "mov r8, %[d0]\n\t"
+ "mov r9, %[d1]\n\t"
+ "# Do top 32\n\t"
+ "mov r6, r5\n\t"
+ "sub r6, %[d1]\n\t"
+ "sbc r6, r6\n\t"
+ "add %[r], %[r]\n\t"
+ "sub %[r], r6\n\t"
+ "and r6, r5\n\t"
+ "sub %[d1], r6\n\t"
+ "# Next 30 bits\n\t"
+ "mov r4, #29\n\t"
+ "1:\n\t"
+ "lsl %[d0], %[d0], #1\n\t"
+ "adc %[d1], %[d1]\n\t"
+ "mov r6, r5\n\t"
+ "sub r6, %[d1]\n\t"
+ "sbc r6, r6\n\t"
+ "add %[r], %[r]\n\t"
+ "sub %[r], r6\n\t"
+ "and r6, r5\n\t"
+ "sub %[d1], r6\n\t"
+ "sub r4, #1\n\t"
+ "bpl 1b\n\t"
+ "mov r7, #0\n\t"
+ "add %[r], %[r]\n\t"
+ "add %[r], #1\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "sub %[d1], r4\n\t"
+ "mov r4, %[d1]\n\t"
+ "mov %[d1], r9\n\t"
+ "sbc %[d1], r5\n\t"
+ "mov r5, %[d1]\n\t"
+ "add %[r], r5\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "mov r6, r9\n\t"
+ "sub r4, %[d1], r4\n\t"
+ "sbc r6, r5\n\t"
+ "mov r5, r6\n\t"
+ "add %[r], r5\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "mov r6, r9\n\t"
+ "sub r4, %[d1], r4\n\t"
+ "sbc r6, r5\n\t"
+ "mov r5, r6\n\t"
+ "add %[r], r5\n\t"
+ "mov r6, %[div]\n\t"
+ "sub r6, r4\n\t"
+ "sbc r6, r6\n\t"
+ "sub %[r], r6\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r7", "r6", "r8", "r9"
+ );
+ return r;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+
+
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mvn r3, r3\n\t"
+ "mov r6, #188\n\t"
+ "1:\n\t"
+ "ldr r7, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r7, r3\n\t"
+ "and r5, r3\n\t"
+ "mov r4, r7\n\t"
+ "sub r7, r5\n\t"
+ "sbc r7, r7\n\t"
+ "add %[r], r7\n\t"
+ "mvn r7, r7\n\t"
+ "and r3, r7\n\t"
+ "sub r5, r4\n\t"
+ "sbc r7, r7\n\t"
+ "sub %[r], r7\n\t"
+ "mvn r7, r7\n\t"
+ "and r3, r7\n\t"
+ "sub r6, #4\n\t"
+ "cmp r6, #0\n\t"
+ "bge 1b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+
+ return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[96], t2[49];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[47];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
+ for (i=47; i>=0; i--) {
+ r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div);
+
+ sp_3072_mul_d_48(t2, d, r1);
+ t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2);
+ t1[48 + i] -= t2[48];
+ sp_3072_mask_48(t2, d, t1[48 + i]);
+ t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
+ sp_3072_mask_48(t2, d, t1[48 + i]);
+ t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_3072_cmp_48(t1, d) >= 0;
+ sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_3072_div_48(a, m, NULL, r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[16][96];
+#else
+ sp_digit* t[16];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 96, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<16; i++) {
+ t[i] = td + i * 96;
+ }
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_48(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
+ if (reduceA != 0) {
+ err = sp_3072_mod_48(t[1] + 48, a, m);
+ if (err == MP_OKAY) {
+ err = sp_3072_mod_48(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
+ err = sp_3072_mod_48(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 4;
+ if (c == 32) {
+ c = 28;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
+ for (; i>=0 || c>=4; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 28;
+ n <<= 4;
+ c = 28;
+ }
+ else if (c < 4) {
+ y = n >> 28;
+ n = e[i--];
+ c = 4 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+ }
+
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+
+ sp_3072_mont_mul_48(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
+ sp_3072_mont_reduce_48(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
+ sp_3072_cond_sub_48(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][96];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 96, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++) {
+ t[i] = td + i * 96;
+ }
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_48(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
+ if (reduceA != 0) {
+ err = sp_3072_mod_48(t[1] + 48, a, m);
+ if (err == MP_OKAY) {
+ err = sp_3072_mod_48(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
+ err = sp_3072_mod_48(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
+ sp_3072_mont_sqr_48(t[16], t[ 8], m, mp);
+ sp_3072_mont_mul_48(t[17], t[ 9], t[ 8], m, mp);
+ sp_3072_mont_sqr_48(t[18], t[ 9], m, mp);
+ sp_3072_mont_mul_48(t[19], t[10], t[ 9], m, mp);
+ sp_3072_mont_sqr_48(t[20], t[10], m, mp);
+ sp_3072_mont_mul_48(t[21], t[11], t[10], m, mp);
+ sp_3072_mont_sqr_48(t[22], t[11], m, mp);
+ sp_3072_mont_mul_48(t[23], t[12], t[11], m, mp);
+ sp_3072_mont_sqr_48(t[24], t[12], m, mp);
+ sp_3072_mont_mul_48(t[25], t[13], t[12], m, mp);
+ sp_3072_mont_sqr_48(t[26], t[13], m, mp);
+ sp_3072_mont_mul_48(t[27], t[14], t[13], m, mp);
+ sp_3072_mont_sqr_48(t[28], t[14], m, mp);
+ sp_3072_mont_mul_48(t[29], t[15], t[14], m, mp);
+ sp_3072_mont_sqr_48(t[30], t[15], m, mp);
+ sp_3072_mont_mul_48(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+
+ sp_3072_mont_mul_48(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
+ sp_3072_mont_reduce_48(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
+ sp_3072_cond_sub_48(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 3072 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 96);
+
+ /* r = 2^n mod m */
+ sp_3072_sub_in_place_96(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #1\n\t"
+ "lsl r5, r5, #8\n\t"
+ "add r5, #128\n\t"
+ "mov r8, r5\n\t"
+ "mov r7, #0\n\t"
+ "1:\n\t"
+ "ldr r6, [%[b], r7]\n\t"
+ "and r6, %[m]\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r5, [%[a], r7]\n\t"
+ "sbc r5, r6\n\t"
+ "sbc %[c], %[c]\n\t"
+ "str r5, [%[r], r7]\n\t"
+ "add r7, #4\n\t"
+ "cmp r7, r8\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r7", "r8"
+ );
+
+ return c;
+}
+
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "mov r8, %[mp]\n\t"
+ "mov r12, %[ca]\n\t"
+ "mov r14, %[m]\n\t"
+ "mov r9, %[a]\n\t"
+ "mov r4, #0\n\t"
+ "# i = 0\n\t"
+ "mov r11, r4\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "mov %[ca], #0\n\t"
+ "# mu = a[i] * mp\n\t"
+ "mov %[mp], r8\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mul %[mp], %[a]\n\t"
+ "mov %[m], r14\n\t"
+ "mov r10, r9\n\t"
+ "\n2:\n\t"
+ "# a[i+j] += m[j] * mu\n\t"
+ "mov %[a], r10\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mov %[ca], #0\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "# Multiply m[j] and mu - Start\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r6, %[mp], #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add %[a], r7\n\t"
+ "adc r5, %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add %[a], r6\n\t"
+ "adc r5, r7\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r6, %[mp], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r5, r7\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add %[a], r6\n\t"
+ "adc r5, r7\n\t"
+ "# Multiply m[j] and mu - Done\n\t"
+ "add r4, %[a]\n\t"
+ "adc r5, %[ca]\n\t"
+ "mov %[a], r10\n\t"
+ "str r4, [%[a]]\n\t"
+ "mov r6, #4\n\t"
+ "add %[m], #4\n\t"
+ "add r10, r6\n\t"
+ "mov r4, #1\n\t"
+ "lsl r4, r4, #8\n\t"
+ "add r4, #124\n\t"
+ "add r4, r9\n\t"
+ "cmp r10, r4\n\t"
+ "blt 2b\n\t"
+ "# a[i+95] += m[95] * mu\n\t"
+ "mov %[ca], #0\n\t"
+ "mov r4, r12\n\t"
+ "mov %[a], #0\n\t"
+ "# Multiply m[95] and mu - Start\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r6, %[mp], #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r5, r7\n\t"
+ "adc r4, %[ca]\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r5, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r6, %[mp], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r5, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "# Multiply m[95] and mu - Done\n\t"
+ "mov %[ca], %[a]\n\t"
+ "mov %[a], r10\n\t"
+ "ldr r7, [%[a], #4]\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mov r6, #0\n\t"
+ "add r5, %[a]\n\t"
+ "adc r7, r4\n\t"
+ "adc %[ca], r6\n\t"
+ "mov %[a], r10\n\t"
+ "str r5, [%[a]]\n\t"
+ "str r7, [%[a], #4]\n\t"
+ "# i += 1\n\t"
+ "mov r6, #4\n\t"
+ "add r9, r6\n\t"
+ "add r11, r6\n\t"
+ "mov r12, %[ca]\n\t"
+ "mov %[a], r9\n\t"
+ "mov r4, #1\n\t"
+ "lsl r4, r4, #8\n\t"
+ "add r4, #128\n\t"
+ "cmp r11, r4\n\t"
+ "blt 1b\n\t"
+ "mov %[m], r14\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_3072_mul_96(r, a, b);
+ sp_3072_mont_reduce_96(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_3072_sqr_96(r, a);
+ sp_3072_mont_reduce_96(r, m, mp);
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r5, %[div], #1\n\t"
+ "add r5, #1\n\t"
+ "mov r8, %[d0]\n\t"
+ "mov r9, %[d1]\n\t"
+ "# Do top 32\n\t"
+ "mov r6, r5\n\t"
+ "sub r6, %[d1]\n\t"
+ "sbc r6, r6\n\t"
+ "add %[r], %[r]\n\t"
+ "sub %[r], r6\n\t"
+ "and r6, r5\n\t"
+ "sub %[d1], r6\n\t"
+ "# Next 30 bits\n\t"
+ "mov r4, #29\n\t"
+ "1:\n\t"
+ "lsl %[d0], %[d0], #1\n\t"
+ "adc %[d1], %[d1]\n\t"
+ "mov r6, r5\n\t"
+ "sub r6, %[d1]\n\t"
+ "sbc r6, r6\n\t"
+ "add %[r], %[r]\n\t"
+ "sub %[r], r6\n\t"
+ "and r6, r5\n\t"
+ "sub %[d1], r6\n\t"
+ "sub r4, #1\n\t"
+ "bpl 1b\n\t"
+ "mov r7, #0\n\t"
+ "add %[r], %[r]\n\t"
+ "add %[r], #1\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "sub %[d1], r4\n\t"
+ "mov r4, %[d1]\n\t"
+ "mov %[d1], r9\n\t"
+ "sbc %[d1], r5\n\t"
+ "mov r5, %[d1]\n\t"
+ "add %[r], r5\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "mov r6, r9\n\t"
+ "sub r4, %[d1], r4\n\t"
+ "sbc r6, r5\n\t"
+ "mov r5, r6\n\t"
+ "add %[r], r5\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "mov r6, r9\n\t"
+ "sub r4, %[d1], r4\n\t"
+ "sbc r6, r5\n\t"
+ "mov r5, r6\n\t"
+ "add %[r], r5\n\t"
+ "mov r6, %[div]\n\t"
+ "sub r6, r4\n\t"
+ "sbc r6, r6\n\t"
+ "sub %[r], r6\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r7", "r6", "r8", "r9"
+ );
+ return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<96; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 96; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_3072_cmp_96(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+
+
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mvn r3, r3\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #124\n\t"
+ "1:\n\t"
+ "ldr r7, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r7, r3\n\t"
+ "and r5, r3\n\t"
+ "mov r4, r7\n\t"
+ "sub r7, r5\n\t"
+ "sbc r7, r7\n\t"
+ "add %[r], r7\n\t"
+ "mvn r7, r7\n\t"
+ "and r3, r7\n\t"
+ "sub r5, r4\n\t"
+ "sbc r7, r7\n\t"
+ "sub %[r], r7\n\t"
+ "mvn r7, r7\n\t"
+ "and r3, r7\n\t"
+ "sub r6, #4\n\t"
+ "cmp r6, #0\n\t"
+ "bge 1b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+
+ return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[192], t2[97];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[95];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
+ for (i=95; i>=0; i--) {
+ r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div);
+
+ sp_3072_mul_d_96(t2, d, r1);
+ t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
+ t1[96 + i] -= t2[96];
+ sp_3072_mask_96(t2, d, t1[96 + i]);
+ t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2);
+ sp_3072_mask_96(t2, d, t1[96 + i]);
+ t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_3072_cmp_96(t1, d) >= 0;
+ sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_96(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_3072_div_96(a, m, NULL, r);
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[192], t2[97];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[95];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
+ for (i=95; i>=0; i--) {
+ r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div);
+
+ sp_3072_mul_d_96(t2, d, r1);
+ t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
+ t1[96 + i] -= t2[96];
+ if (t1[96 + i] != 0) {
+ t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d);
+ if (t1[96 + i] != 0)
+ t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d);
+ }
+ }
+
+ r1 = sp_3072_cmp_96(t1, d) >= 0;
+ sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_96_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_3072_div_96_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+ defined(WOLFSSL_HAVE_SP_DH)
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[16][192];
+#else
+ sp_digit* t[16];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 192, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<16; i++) {
+ t[i] = td + i * 192;
+ }
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_96(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 96U);
+ if (reduceA != 0) {
+ err = sp_3072_mod_96(t[1] + 96, a, m);
+ if (err == MP_OKAY) {
+ err = sp_3072_mod_96(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
+ err = sp_3072_mod_96(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_96(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_96(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_96(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 4;
+ if (c == 32) {
+ c = 28;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
+ for (; i>=0 || c>=4; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 28;
+ n <<= 4;
+ c = 28;
+ }
+ else if (c < 4) {
+ y = n >> 28;
+ n = e[i--];
+ c = 4 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+ }
+
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+
+ sp_3072_mont_mul_96(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
+ sp_3072_mont_reduce_96(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
+ sp_3072_cond_sub_96(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][192];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 192, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++) {
+ t[i] = td + i * 192;
+ }
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_96(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 96U);
+ if (reduceA != 0) {
+ err = sp_3072_mod_96(t[1] + 96, a, m);
+ if (err == MP_OKAY) {
+ err = sp_3072_mod_96(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
+ err = sp_3072_mod_96(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_96(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_96(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_96(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp);
+ sp_3072_mont_sqr_96(t[16], t[ 8], m, mp);
+ sp_3072_mont_mul_96(t[17], t[ 9], t[ 8], m, mp);
+ sp_3072_mont_sqr_96(t[18], t[ 9], m, mp);
+ sp_3072_mont_mul_96(t[19], t[10], t[ 9], m, mp);
+ sp_3072_mont_sqr_96(t[20], t[10], m, mp);
+ sp_3072_mont_mul_96(t[21], t[11], t[10], m, mp);
+ sp_3072_mont_sqr_96(t[22], t[11], m, mp);
+ sp_3072_mont_mul_96(t[23], t[12], t[11], m, mp);
+ sp_3072_mont_sqr_96(t[24], t[12], m, mp);
+ sp_3072_mont_mul_96(t[25], t[13], t[12], m, mp);
+ sp_3072_mont_sqr_96(t[26], t[13], m, mp);
+ sp_3072_mont_mul_96(t[27], t[14], t[13], m, mp);
+ sp_3072_mont_sqr_96(t[28], t[14], m, mp);
+ sp_3072_mont_mul_96(t[29], t[15], t[14], m, mp);
+ sp_3072_mont_sqr_96(t[30], t[15], m, mp);
+ sp_3072_mont_mul_96(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+
+ sp_3072_mont_mul_96(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
+ sp_3072_mont_reduce_96(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
+ sp_3072_cond_sub_96(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[192], m[96], r[192];
+#else
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+#endif
+ sp_digit *ah;
+ sp_digit e[1];
+ int err = MP_OKAY;
+
+ if (*outLen < 384)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 384 ||
+ mp_count_bits(mm) != 3072))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 96 * 2;
+ m = r + 96 * 2;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ ah = a + 96;
+
+ sp_3072_from_bin(ah, 96, in, inLen);
+#if DIGIT_BIT >= 32
+ e[0] = em->dp[0];
+#else
+ e[0] = em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(m, 96, mm);
+
+ if (e[0] == 0x3) {
+ if (err == MP_OKAY) {
+ sp_3072_sqr_96(r, ah);
+ err = sp_3072_mod_96_cond(r, r, m);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_mul_96(r, ah, r);
+ err = sp_3072_mod_96_cond(r, r, m);
+ }
+ }
+ else {
+ int i;
+ sp_digit mp;
+
+ sp_3072_mont_setup(m, &mp);
+
+ /* Convert to Montgomery form. */
+ XMEMSET(a, 0, sizeof(sp_digit) * 96);
+ err = sp_3072_mod_96_cond(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i = 31; i >= 0; i--) {
+ if (e[0] >> i) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 96);
+ for (i--; i>=0; i--) {
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ if (((e[0] >> i) & 1) == 1) {
+ sp_3072_mont_mul_96(r, r, a, m, mp);
+ }
+ }
+ XMEMSET(&r[96], 0, sizeof(sp_digit) * 96);
+ sp_3072_mont_reduce_96(r, m, mp);
+
+ for (i = 95; i > 0; i--) {
+ if (r[i] != m[i]) {
+ break;
+ }
+ }
+ if (r[i] >= m[i]) {
+ sp_3072_sub_in_place_96(r, m);
+ }
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#endif
+
+ return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+ sp_digit* a;
+ sp_digit* d = NULL;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 384U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 3072) {
+ err = MP_READ_E;
+ }
+ if (inLen > 384) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = d + 96;
+ m = a + 192;
+ r = a;
+
+ sp_3072_from_bin(a, 96, in, inLen);
+ sp_3072_from_mp(d, 96, dm);
+ sp_3072_from_mp(m, 96, mm);
+ err = sp_3072_mod_exp_96(r, a, d, 3072, m, 0);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 96);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #192\n\t"
+ "mov r8, r5\n\t"
+ "mov r7, #0\n\t"
+ "1:\n\t"
+ "ldr r6, [%[b], r7]\n\t"
+ "and r6, %[m]\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, #1\n\t"
+ "add r5, %[c]\n\t"
+ "ldr r5, [%[a], r7]\n\t"
+ "adc r5, r6\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ "str r5, [%[r], r7]\n\t"
+ "add r7, #4\n\t"
+ "cmp r7, r8\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r7", "r8"
+ );
+
+ return c;
+}
+
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[96 * 2];
+ sp_digit p[48], q[48], dp[48];
+ sp_digit tmpa[96], tmpb[96];
+#else
+ sp_digit* t = NULL;
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+#endif
+ sp_digit* r;
+ sp_digit* qi;
+ sp_digit* dq;
+ sp_digit c;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 384)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL)
+ err = MEMORY_E;
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 96 * 2;
+ q = p + 48;
+ qi = dq = dp = q + 48;
+ tmpa = qi + 48;
+ tmpb = tmpa + 96;
+
+ r = t + 96;
+ }
+#else
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ r = a;
+ qi = dq = dp;
+#endif
+ sp_3072_from_bin(a, 96, in, inLen);
+ sp_3072_from_mp(p, 48, pm);
+ sp_3072_from_mp(q, 48, qm);
+ sp_3072_from_mp(dp, 48, dpm);
+
+ err = sp_3072_mod_exp_48(tmpa, a, dp, 1536, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(dq, 48, dqm);
+ err = sp_3072_mod_exp_48(tmpb, a, dq, 1536, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ c = sp_3072_sub_in_place_48(tmpa, tmpb);
+ c += sp_3072_cond_add_48(tmpa, tmpa, p, c);
+ sp_3072_cond_add_48(tmpa, tmpa, p, c);
+
+ sp_3072_from_mp(qi, 48, qim);
+ sp_3072_mul_48(tmpa, tmpa, qi);
+ err = sp_3072_mod_48(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mul_48(tmpa, q, tmpa);
+ XMEMSET(&tmpb[48], 0, sizeof(sp_digit) * 48);
+ sp_3072_add_96(r, tmpb, tmpa);
+
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 48 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+#else
+ XMEMSET(tmpa, 0, sizeof(tmpa));
+ XMEMSET(tmpb, 0, sizeof(tmpb));
+ XMEMSET(p, 0, sizeof(p));
+ XMEMSET(q, 0, sizeof(q));
+ XMEMSET(dp, 0, sizeof(dp));
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_3072_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 96);
+ r->used = 96;
+ mp_clamp(r);
+#elif DIGIT_BIT < 32
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 96; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 32) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 32 - s;
+ }
+ r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 96; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 32 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 32 - s;
+ }
+ else {
+ s += 32;
+ }
+ }
+ r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[192], e[96], m[96];
+ sp_digit* r = b;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 3072) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 96, base);
+ sp_3072_from_mp(e, 96, exp);
+ sp_3072_from_mp(m, 96, mod);
+
+ err = sp_3072_mod_exp_96(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_3072_to_mp(r, res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_3072
+static void sp_3072_lshift_96(sp_digit* r, sp_digit* a, byte n)
+{
+ __asm__ __volatile__ (
+ "mov r6, #31\n\t"
+ "sub r6, r6, %[n]\n\t"
+ "add %[a], %[a], #255\n\t"
+ "add %[r], %[r], #255\n\t"
+ "add %[a], %[a], #65\n\t"
+ "add %[r], %[r], #65\n\t"
+ "ldr r3, [%[a], #60]\n\t"
+ "lsr r4, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r4, r4, r6\n\t"
+ "ldr r2, [%[a], #56]\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "str r2, [%[r], #56]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #44]\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "str r2, [%[r], #44]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "str r2, [%[r], #32]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #20]\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #12]\n\t"
+ "str r2, [%[r], #20]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #8]\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "str r2, [%[r], #8]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r2, [%[a], #60]\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "str r3, [%[r], #64]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #52]\n\t"
+ "str r2, [%[r], #60]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #48]\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "str r3, [%[r], #52]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "str r2, [%[r], #48]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #36]\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "str r3, [%[r], #40]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #28]\n\t"
+ "str r2, [%[r], #36]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #24]\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "str r3, [%[r], #28]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "str r2, [%[r], #24]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #12]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "str r2, [%[r], #12]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #0]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "str r3, [%[r], #68]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "str r2, [%[r], #64]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #52]\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "str r3, [%[r], #56]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #44]\n\t"
+ "str r2, [%[r], #52]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #40]\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "str r3, [%[r], #44]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "str r2, [%[r], #40]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #28]\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "str r3, [%[r], #32]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #20]\n\t"
+ "str r2, [%[r], #28]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #16]\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "str r3, [%[r], #20]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "str r2, [%[r], #16]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #4]\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "str r3, [%[r], #8]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r3, [%[a], #60]\n\t"
+ "str r2, [%[r], #68]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #56]\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "str r2, [%[r], #56]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #44]\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "str r2, [%[r], #44]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "str r2, [%[r], #32]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #20]\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #12]\n\t"
+ "str r2, [%[r], #20]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #8]\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "str r2, [%[r], #8]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r2, [%[a], #60]\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "str r3, [%[r], #64]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #52]\n\t"
+ "str r2, [%[r], #60]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #48]\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "str r3, [%[r], #52]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "str r2, [%[r], #48]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #36]\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "str r3, [%[r], #40]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #28]\n\t"
+ "str r2, [%[r], #36]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #24]\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "str r3, [%[r], #28]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "str r2, [%[r], #24]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #12]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "str r2, [%[r], #12]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #0]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "str r3, [%[r], #68]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "str r2, [%[r], #64]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #52]\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "str r3, [%[r], #56]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #44]\n\t"
+ "str r2, [%[r], #52]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #40]\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "str r3, [%[r], #44]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "str r2, [%[r], #40]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #28]\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "str r3, [%[r], #32]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #20]\n\t"
+ "str r2, [%[r], #28]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #16]\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "str r3, [%[r], #20]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "str r2, [%[r], #16]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #4]\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "str r3, [%[r], #8]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "str r2, [%[r], #4]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+ : "memory", "r2", "r3", "r4", "r5", "r6"
+ );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_2_96(sp_digit* r, const sp_digit* e, int bits,
+ const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[192];
+ sp_digit td[97];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 289, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 192;
+#else
+ norm = nd;
+ tmp = td;
+#endif
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_96(norm, m);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ sp_3072_lshift_96(r, norm, y);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+
+ sp_3072_lshift_96(r, r, y);
+ sp_3072_mul_d_96(tmp, norm, r[96]);
+ r[96] = 0;
+ o = sp_3072_add_96(r, r, tmp);
+ sp_3072_cond_sub_96(r, r, m, (sp_digit)0 - o);
+ }
+
+ XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
+ sp_3072_mont_reduce_96(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
+ sp_3072_cond_sub_96(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* HAVE_FFDHE_3072 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+ int err = MP_OKAY;
+ sp_digit b[192], e[96], m[96];
+ sp_digit* r = b;
+ word32 i;
+
+ if (mp_count_bits(base) > 3072) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 384) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 96, base);
+ sp_3072_from_bin(e, 96, exp, expLen);
+ sp_3072_from_mp(m, 96, mod);
+
+ #ifdef HAVE_FFDHE_3072
+ if (base->used == 1 && base->dp[0] == 2 && m[95] == (sp_digit)-1)
+ err = sp_3072_mod_exp_2_96(r, e, expLen * 8, m);
+ else
+ #endif
+ err = sp_3072_mod_exp_96(r, b, e, expLen * 8, m, 0);
+
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ for (i=0; i<384 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[96], e[48], m[48];
+ sp_digit* r = b;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 1536) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 1536) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 1536) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 48, base);
+ sp_3072_from_mp(e, 48, exp);
+ sp_3072_from_mp(m, 48, mod);
+
+ err = sp_3072_mod_exp_48(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(r + 48, 0, sizeof(*r) * 48U);
+ err = sp_3072_to_mp(r, res);
+ res->used = mod->used;
+ mp_clamp(res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_3072 */
+
+#ifdef WOLFSSL_SP_4096
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 24U) {
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 32
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 32U) <= (word32)DIGIT_BIT) {
+ s += 32U;
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 32) {
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 32 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 512
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_4096_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ j = 4096 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<128 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 32) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 32);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r7, #0\n\t"
+ "mvn r7, r7\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "add r4, r5\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #68]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #76]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #84]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #92]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #100]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #108]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #116]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #124]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #124]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ "add %[a], #0x80\n\t"
+ "add %[b], #0x80\n\t"
+ "add %[r], #0x80\n\t"
+ "add %[c], r7\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #68]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #76]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #84]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #92]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #100]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #108]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #116]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #124]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #124]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r7"
+ );
+
+ return c;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sub r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #0]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r6, [%[b], #12]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #8]\n\t"
+ "str r4, [%[a], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r6, [%[b], #20]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #16]\n\t"
+ "str r4, [%[a], #20]\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r6, [%[b], #28]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #24]\n\t"
+ "str r4, [%[a], #28]\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r6, [%[b], #36]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #32]\n\t"
+ "str r4, [%[a], #36]\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r6, [%[b], #44]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #40]\n\t"
+ "str r4, [%[a], #44]\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r6, [%[b], #52]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #48]\n\t"
+ "str r4, [%[a], #52]\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r6, [%[b], #60]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #56]\n\t"
+ "str r4, [%[a], #60]\n\t"
+ "ldr r3, [%[a], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "ldr r6, [%[b], #68]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #64]\n\t"
+ "str r4, [%[a], #68]\n\t"
+ "ldr r3, [%[a], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "ldr r6, [%[b], #76]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #72]\n\t"
+ "str r4, [%[a], #76]\n\t"
+ "ldr r3, [%[a], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "ldr r6, [%[b], #84]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #80]\n\t"
+ "str r4, [%[a], #84]\n\t"
+ "ldr r3, [%[a], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "ldr r6, [%[b], #92]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #88]\n\t"
+ "str r4, [%[a], #92]\n\t"
+ "ldr r3, [%[a], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "ldr r6, [%[b], #100]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #96]\n\t"
+ "str r4, [%[a], #100]\n\t"
+ "ldr r3, [%[a], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "ldr r6, [%[b], #108]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #104]\n\t"
+ "str r4, [%[a], #108]\n\t"
+ "ldr r3, [%[a], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "ldr r6, [%[b], #116]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #112]\n\t"
+ "str r4, [%[a], #116]\n\t"
+ "ldr r3, [%[a], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "ldr r6, [%[b], #124]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #120]\n\t"
+ "str r4, [%[a], #124]\n\t"
+ "sbc %[c], %[c]\n\t"
+ "add %[a], #0x80\n\t"
+ "add %[b], #0x80\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #0]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r6, [%[b], #12]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #8]\n\t"
+ "str r4, [%[a], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r6, [%[b], #20]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #16]\n\t"
+ "str r4, [%[a], #20]\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r6, [%[b], #28]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #24]\n\t"
+ "str r4, [%[a], #28]\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r6, [%[b], #36]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #32]\n\t"
+ "str r4, [%[a], #36]\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r6, [%[b], #44]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #40]\n\t"
+ "str r4, [%[a], #44]\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r6, [%[b], #52]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #48]\n\t"
+ "str r4, [%[a], #52]\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r6, [%[b], #60]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #56]\n\t"
+ "str r4, [%[a], #60]\n\t"
+ "ldr r3, [%[a], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "ldr r6, [%[b], #68]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #64]\n\t"
+ "str r4, [%[a], #68]\n\t"
+ "ldr r3, [%[a], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "ldr r6, [%[b], #76]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #72]\n\t"
+ "str r4, [%[a], #76]\n\t"
+ "ldr r3, [%[a], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "ldr r6, [%[b], #84]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #80]\n\t"
+ "str r4, [%[a], #84]\n\t"
+ "ldr r3, [%[a], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "ldr r6, [%[b], #92]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #88]\n\t"
+ "str r4, [%[a], #92]\n\t"
+ "ldr r3, [%[a], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "ldr r6, [%[b], #100]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #96]\n\t"
+ "str r4, [%[a], #100]\n\t"
+ "ldr r3, [%[a], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "ldr r6, [%[b], #108]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #104]\n\t"
+ "str r4, [%[a], #108]\n\t"
+ "ldr r3, [%[a], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "ldr r6, [%[b], #116]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #112]\n\t"
+ "str r4, [%[a], #116]\n\t"
+ "ldr r3, [%[a], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "ldr r6, [%[b], #124]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #120]\n\t"
+ "str r4, [%[a], #124]\n\t"
+ "sbc %[c], %[c]\n\t"
+ "add %[a], #0x80\n\t"
+ "add %[b], #0x80\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #0]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r6, [%[b], #12]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #8]\n\t"
+ "str r4, [%[a], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r6, [%[b], #20]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #16]\n\t"
+ "str r4, [%[a], #20]\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r6, [%[b], #28]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #24]\n\t"
+ "str r4, [%[a], #28]\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r6, [%[b], #36]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #32]\n\t"
+ "str r4, [%[a], #36]\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r6, [%[b], #44]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #40]\n\t"
+ "str r4, [%[a], #44]\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r6, [%[b], #52]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #48]\n\t"
+ "str r4, [%[a], #52]\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r6, [%[b], #60]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #56]\n\t"
+ "str r4, [%[a], #60]\n\t"
+ "ldr r3, [%[a], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "ldr r6, [%[b], #68]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #64]\n\t"
+ "str r4, [%[a], #68]\n\t"
+ "ldr r3, [%[a], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "ldr r6, [%[b], #76]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #72]\n\t"
+ "str r4, [%[a], #76]\n\t"
+ "ldr r3, [%[a], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "ldr r6, [%[b], #84]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #80]\n\t"
+ "str r4, [%[a], #84]\n\t"
+ "ldr r3, [%[a], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "ldr r6, [%[b], #92]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #88]\n\t"
+ "str r4, [%[a], #92]\n\t"
+ "ldr r3, [%[a], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "ldr r6, [%[b], #100]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #96]\n\t"
+ "str r4, [%[a], #100]\n\t"
+ "ldr r3, [%[a], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "ldr r6, [%[b], #108]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #104]\n\t"
+ "str r4, [%[a], #108]\n\t"
+ "ldr r3, [%[a], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "ldr r6, [%[b], #116]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #112]\n\t"
+ "str r4, [%[a], #116]\n\t"
+ "ldr r3, [%[a], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "ldr r6, [%[b], #124]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #120]\n\t"
+ "str r4, [%[a], #124]\n\t"
+ "sbc %[c], %[c]\n\t"
+ "add %[a], #0x80\n\t"
+ "add %[b], #0x80\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #0]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r6, [%[b], #12]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #8]\n\t"
+ "str r4, [%[a], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r6, [%[b], #20]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #16]\n\t"
+ "str r4, [%[a], #20]\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r6, [%[b], #28]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #24]\n\t"
+ "str r4, [%[a], #28]\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r6, [%[b], #36]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #32]\n\t"
+ "str r4, [%[a], #36]\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r6, [%[b], #44]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #40]\n\t"
+ "str r4, [%[a], #44]\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "ldr r6, [%[b], #52]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #48]\n\t"
+ "str r4, [%[a], #52]\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "ldr r6, [%[b], #60]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #56]\n\t"
+ "str r4, [%[a], #60]\n\t"
+ "ldr r3, [%[a], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "ldr r6, [%[b], #68]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #64]\n\t"
+ "str r4, [%[a], #68]\n\t"
+ "ldr r3, [%[a], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "ldr r6, [%[b], #76]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #72]\n\t"
+ "str r4, [%[a], #76]\n\t"
+ "ldr r3, [%[a], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "ldr r6, [%[b], #84]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #80]\n\t"
+ "str r4, [%[a], #84]\n\t"
+ "ldr r3, [%[a], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "ldr r6, [%[b], #92]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #88]\n\t"
+ "str r4, [%[a], #92]\n\t"
+ "ldr r3, [%[a], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "ldr r6, [%[b], #100]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #96]\n\t"
+ "str r4, [%[a], #100]\n\t"
+ "ldr r3, [%[a], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "ldr r6, [%[b], #108]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #104]\n\t"
+ "str r4, [%[a], #108]\n\t"
+ "ldr r3, [%[a], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "ldr r6, [%[b], #116]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #112]\n\t"
+ "str r4, [%[a], #116]\n\t"
+ "ldr r3, [%[a], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "ldr r6, [%[b], #124]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #120]\n\t"
+ "str r4, [%[a], #124]\n\t"
+ "sbc %[c], %[c]\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r7, #0\n\t"
+ "mvn r7, r7\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "add r4, r5\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #68]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #76]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #84]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #92]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #100]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #108]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #116]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #124]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #124]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ "add %[a], #0x80\n\t"
+ "add %[b], #0x80\n\t"
+ "add %[r], #0x80\n\t"
+ "add %[c], r7\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #68]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #76]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #84]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #92]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #100]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #108]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #116]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #124]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #124]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ "add %[a], #0x80\n\t"
+ "add %[b], #0x80\n\t"
+ "add %[r], #0x80\n\t"
+ "add %[c], r7\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #68]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #76]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #84]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #92]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #100]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #108]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #116]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #124]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #124]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ "add %[a], #0x80\n\t"
+ "add %[b], #0x80\n\t"
+ "add %[r], #0x80\n\t"
+ "add %[c], r7\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "ldr r5, [%[b], #48]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "ldr r5, [%[b], #52]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "ldr r5, [%[b], #56]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "ldr r5, [%[b], #60]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "ldr r4, [%[a], #64]\n\t"
+ "ldr r5, [%[b], #64]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "ldr r4, [%[a], #68]\n\t"
+ "ldr r5, [%[b], #68]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "ldr r4, [%[a], #72]\n\t"
+ "ldr r5, [%[b], #72]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #72]\n\t"
+ "ldr r4, [%[a], #76]\n\t"
+ "ldr r5, [%[b], #76]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #76]\n\t"
+ "ldr r4, [%[a], #80]\n\t"
+ "ldr r5, [%[b], #80]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #80]\n\t"
+ "ldr r4, [%[a], #84]\n\t"
+ "ldr r5, [%[b], #84]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #84]\n\t"
+ "ldr r4, [%[a], #88]\n\t"
+ "ldr r5, [%[b], #88]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #88]\n\t"
+ "ldr r4, [%[a], #92]\n\t"
+ "ldr r5, [%[b], #92]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #92]\n\t"
+ "ldr r4, [%[a], #96]\n\t"
+ "ldr r5, [%[b], #96]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #96]\n\t"
+ "ldr r4, [%[a], #100]\n\t"
+ "ldr r5, [%[b], #100]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #100]\n\t"
+ "ldr r4, [%[a], #104]\n\t"
+ "ldr r5, [%[b], #104]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #104]\n\t"
+ "ldr r4, [%[a], #108]\n\t"
+ "ldr r5, [%[b], #108]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #108]\n\t"
+ "ldr r4, [%[a], #112]\n\t"
+ "ldr r5, [%[b], #112]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #112]\n\t"
+ "ldr r4, [%[a], #116]\n\t"
+ "ldr r5, [%[b], #116]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #116]\n\t"
+ "ldr r4, [%[a], #120]\n\t"
+ "ldr r5, [%[b], #120]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #120]\n\t"
+ "ldr r4, [%[a], #124]\n\t"
+ "ldr r5, [%[b], #124]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #124]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r7"
+ );
+
+ return c;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit tmp[64 * 2];
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r8, r3\n\t"
+ "mov r11, %[r]\n\t"
+ "mov r9, %[a]\n\t"
+ "mov r10, %[b]\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r9\n\t"
+ "mov r12, r6\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #252\n\t"
+ "mov %[a], r8\n\t"
+ "sub %[a], r6\n\t"
+ "sbc r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], r6\n\t"
+ "mov %[b], r8\n\t"
+ "sub %[b], %[a]\n\t"
+ "add %[a], r9\n\t"
+ "add %[b], r10\n\t"
+ "\n2:\n\t"
+ "# Multiply Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Multiply Done\n\t"
+ "add %[a], #4\n\t"
+ "sub %[b], #4\n\t"
+ "cmp %[a], r12\n\t"
+ "beq 3f\n\t"
+ "mov r6, r8\n\t"
+ "add r6, r9\n\t"
+ "cmp %[a], r6\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r11\n\t"
+ "mov r7, r8\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add r7, #4\n\t"
+ "mov r8, r7\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #248\n\t"
+ "cmp r7, r6\n\t"
+ "ble 1b\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov %[a], r9\n\t"
+ "mov %[b], r10\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<64; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 64; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[128];
+ sp_digit a1[64];
+ sp_digit b1[64];
+ sp_digit z2[128];
+ sp_digit u, ca, cb;
+
+ ca = sp_2048_add_64(a1, a, &a[64]);
+ cb = sp_2048_add_64(b1, b, &b[64]);
+ u = ca & cb;
+ sp_2048_mul_64(z1, a1, b1);
+ sp_2048_mul_64(z2, &a[64], &b[64]);
+ sp_2048_mul_64(z0, a, b);
+ sp_2048_mask_64(r + 128, a1, 0 - cb);
+ sp_2048_mask_64(b1, b1, 0 - ca);
+ u += sp_2048_add_64(r + 128, r + 128, b1);
+ u += sp_4096_sub_in_place_128(z1, z2);
+ u += sp_4096_sub_in_place_128(z1, z0);
+ u += sp_4096_add_128(r + 64, r + 64, z1);
+ r[192] = u;
+ XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1));
+ (void)sp_4096_add_128(r + 128, r + 128, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r8, r3\n\t"
+ "mov r11, %[r]\n\t"
+ "mov r6, #2\n\t"
+ "lsl r6, r6, #8\n\t"
+ "neg r6, r6\n\t"
+ "add sp, r6\n\t"
+ "mov r10, sp\n\t"
+ "mov r9, %[a]\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r6, #252\n\t"
+ "mov %[a], r8\n\t"
+ "sub %[a], r6\n\t"
+ "sbc r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], r6\n\t"
+ "mov r2, r8\n\t"
+ "sub r2, %[a]\n\t"
+ "add %[a], r9\n\t"
+ "add r2, r9\n\t"
+ "\n2:\n\t"
+ "cmp r2, %[a]\n\t"
+ "beq 4f\n\t"
+ "# Multiply * 2: Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Multiply * 2: Done\n\t"
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ "# Square: Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul r6, r6\n\t"
+ "add r3, r6\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "mul r7, r7\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #15\n\t"
+ "lsl r6, r6, #17\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Square: Done\n\t"
+ "\n5:\n\t"
+ "add %[a], #4\n\t"
+ "sub r2, #4\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r9\n\t"
+ "cmp %[a], r6\n\t"
+ "beq 3f\n\t"
+ "cmp %[a], r2\n\t"
+ "bgt 3f\n\t"
+ "mov r7, r8\n\t"
+ "add r7, r9\n\t"
+ "cmp %[a], r7\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r10\n\t"
+ "mov r7, r8\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r7, #4\n\t"
+ "mov r8, r7\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #248\n\t"
+ "cmp r7, r6\n\t"
+ "ble 1b\n\t"
+ "mov %[a], r9\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov %[r], r11\n\t"
+ "mov %[a], r10\n\t"
+ "mov r3, #1\n\t"
+ "lsl r3, r3, #8\n\t"
+ "add r3, #252\n\t"
+ "\n4:\n\t"
+ "ldr r6, [%[a], r3]\n\t"
+ "str r6, [%[r], r3]\n\t"
+ "sub r3, #4\n\t"
+ "bge 4b\n\t"
+ "mov r6, #2\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add sp, r6\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+ );
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[128];
+ sp_digit z1[128];
+ sp_digit a1[64];
+ sp_digit u;
+
+ u = sp_2048_add_64(a1, a, &a[64]);
+ sp_2048_sqr_64(z1, a1);
+ sp_2048_sqr_64(z2, &a[64]);
+ sp_2048_sqr_64(z0, a);
+ sp_2048_mask_64(r + 128, a1, 0 - u);
+ u += sp_2048_add_64(r + 128, r + 128, r + 128);
+ u += sp_4096_sub_in_place_128(z1, z2);
+ u += sp_4096_sub_in_place_128(z1, z0);
+ u += sp_4096_add_128(r + 64, r + 64, z1);
+ r[192] = u;
+ XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1));
+ (void)sp_4096_add_128(r + 128, r + 128, z2);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r6, %[a]\n\t"
+ "mov r7, #0\n\t"
+ "mov r4, #2\n\t"
+ "lsl r4, #8\n\t"
+ "sub r7, #1\n\t"
+ "add r6, r4\n\t"
+ "\n1:\n\t"
+ "add %[c], r7\n\t"
+ "ldr r4, [%[a]]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ "add %[a], #4\n\t"
+ "add %[b], #4\n\t"
+ "add %[r], #4\n\t"
+ "cmp %[a], r6\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r7"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+ __asm__ __volatile__ (
+ "mov r7, %[a]\n\t"
+ "mov r5, #2\n\t"
+ "lsl r5, #8\n\t"
+ "add r7, r5\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a]]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "sbc %[c], %[c]\n\t"
+ "add %[a], #8\n\t"
+ "add %[b], #8\n\t"
+ "cmp %[a], r7\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r7"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit tmp[128 * 2];
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r8, r3\n\t"
+ "mov r11, %[r]\n\t"
+ "mov r9, %[a]\n\t"
+ "mov r10, %[b]\n\t"
+ "mov r6, #2\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r9\n\t"
+ "mov r12, r6\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #252\n\t"
+ "mov %[a], r8\n\t"
+ "sub %[a], r6\n\t"
+ "sbc r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], r6\n\t"
+ "mov %[b], r8\n\t"
+ "sub %[b], %[a]\n\t"
+ "add %[a], r9\n\t"
+ "add %[b], r10\n\t"
+ "\n2:\n\t"
+ "# Multiply Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Multiply Done\n\t"
+ "add %[a], #4\n\t"
+ "sub %[b], #4\n\t"
+ "cmp %[a], r12\n\t"
+ "beq 3f\n\t"
+ "mov r6, r8\n\t"
+ "add r6, r9\n\t"
+ "cmp %[a], r6\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r11\n\t"
+ "mov r7, r8\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add r7, #4\n\t"
+ "mov r8, r7\n\t"
+ "mov r6, #3\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #248\n\t"
+ "cmp r7, r6\n\t"
+ "ble 1b\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov %[a], r9\n\t"
+ "mov %[b], r10\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r8, r3\n\t"
+ "mov r11, %[r]\n\t"
+ "mov r6, #4\n\t"
+ "lsl r6, r6, #8\n\t"
+ "neg r6, r6\n\t"
+ "add sp, r6\n\t"
+ "mov r10, sp\n\t"
+ "mov r9, %[a]\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #252\n\t"
+ "mov %[a], r8\n\t"
+ "sub %[a], r6\n\t"
+ "sbc r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], r6\n\t"
+ "mov r2, r8\n\t"
+ "sub r2, %[a]\n\t"
+ "add %[a], r9\n\t"
+ "add r2, r9\n\t"
+ "\n2:\n\t"
+ "cmp r2, %[a]\n\t"
+ "beq 4f\n\t"
+ "# Multiply * 2: Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Multiply * 2: Done\n\t"
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ "# Square: Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul r6, r6\n\t"
+ "add r3, r6\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "mul r7, r7\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #15\n\t"
+ "lsl r6, r6, #17\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Square: Done\n\t"
+ "\n5:\n\t"
+ "add %[a], #4\n\t"
+ "sub r2, #4\n\t"
+ "mov r6, #2\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r9\n\t"
+ "cmp %[a], r6\n\t"
+ "beq 3f\n\t"
+ "cmp %[a], r2\n\t"
+ "bgt 3f\n\t"
+ "mov r7, r8\n\t"
+ "add r7, r9\n\t"
+ "cmp %[a], r7\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r10\n\t"
+ "mov r7, r8\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r7, #4\n\t"
+ "mov r8, r7\n\t"
+ "mov r6, #3\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #248\n\t"
+ "cmp r7, r6\n\t"
+ "ble 1b\n\t"
+ "mov %[a], r9\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov %[r], r11\n\t"
+ "mov %[a], r10\n\t"
+ "mov r3, #3\n\t"
+ "lsl r3, r3, #8\n\t"
+ "add r3, #252\n\t"
+ "\n4:\n\t"
+ "ldr r6, [%[a], r3]\n\t"
+ "str r6, [%[r], r3]\n\t"
+ "sub r3, #4\n\t"
+ "bge 4b\n\t"
+ "mov r6, #4\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add sp, r6\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+
+ /* rho = -1/m mod b */
+ *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+SP_NOINLINE static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+ __asm__ __volatile__ (
+ "mov r6, #2\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, %[a]\n\t"
+ "mov r8, %[r]\n\t"
+ "mov r9, r6\n\t"
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "# A[] * B\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, %[b], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "lsr r7, %[b], #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, %[b], #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "lsl r7, %[b], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# A[] * B - Done\n\t"
+ "mov %[r], r8\n\t"
+ "str r3, [%[r]]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add %[r], #4\n\t"
+ "add %[a], #4\n\t"
+ "mov r8, %[r]\n\t"
+ "cmp %[a], r9\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r]]\n\t"
+ : [r] "+r" (r), [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+ );
+}
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 4096 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 128);
+
+ /* r = 2^n mod m */
+ sp_4096_sub_in_place_128(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #2\n\t"
+ "lsl r5, r5, #8\n\t"
+ "mov r8, r5\n\t"
+ "mov r7, #0\n\t"
+ "1:\n\t"
+ "ldr r6, [%[b], r7]\n\t"
+ "and r6, %[m]\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r5, [%[a], r7]\n\t"
+ "sbc r5, r6\n\t"
+ "sbc %[c], %[c]\n\t"
+ "str r5, [%[r], r7]\n\t"
+ "add r7, #4\n\t"
+ "cmp r7, r8\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r7", "r8"
+ );
+
+ return c;
+}
+
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "mov r8, %[mp]\n\t"
+ "mov r12, %[ca]\n\t"
+ "mov r14, %[m]\n\t"
+ "mov r9, %[a]\n\t"
+ "mov r4, #0\n\t"
+ "# i = 0\n\t"
+ "mov r11, r4\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "mov %[ca], #0\n\t"
+ "# mu = a[i] * mp\n\t"
+ "mov %[mp], r8\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mul %[mp], %[a]\n\t"
+ "mov %[m], r14\n\t"
+ "mov r10, r9\n\t"
+ "\n2:\n\t"
+ "# a[i+j] += m[j] * mu\n\t"
+ "mov %[a], r10\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mov %[ca], #0\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "# Multiply m[j] and mu - Start\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r6, %[mp], #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add %[a], r7\n\t"
+ "adc r5, %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add %[a], r6\n\t"
+ "adc r5, r7\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r6, %[mp], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r5, r7\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add %[a], r6\n\t"
+ "adc r5, r7\n\t"
+ "# Multiply m[j] and mu - Done\n\t"
+ "add r4, %[a]\n\t"
+ "adc r5, %[ca]\n\t"
+ "mov %[a], r10\n\t"
+ "str r4, [%[a]]\n\t"
+ "mov r6, #4\n\t"
+ "add %[m], #4\n\t"
+ "add r10, r6\n\t"
+ "mov r4, #1\n\t"
+ "lsl r4, r4, #8\n\t"
+ "add r4, #252\n\t"
+ "add r4, r9\n\t"
+ "cmp r10, r4\n\t"
+ "blt 2b\n\t"
+ "# a[i+127] += m[127] * mu\n\t"
+ "mov %[ca], #0\n\t"
+ "mov r4, r12\n\t"
+ "mov %[a], #0\n\t"
+ "# Multiply m[127] and mu - Start\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r6, %[mp], #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r5, r7\n\t"
+ "adc r4, %[ca]\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r5, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r6, %[mp], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r5, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "# Multiply m[127] and mu - Done\n\t"
+ "mov %[ca], %[a]\n\t"
+ "mov %[a], r10\n\t"
+ "ldr r7, [%[a], #4]\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mov r6, #0\n\t"
+ "add r5, %[a]\n\t"
+ "adc r7, r4\n\t"
+ "adc %[ca], r6\n\t"
+ "mov %[a], r10\n\t"
+ "str r5, [%[a]]\n\t"
+ "str r7, [%[a], #4]\n\t"
+ "# i += 1\n\t"
+ "mov r6, #4\n\t"
+ "add r9, r6\n\t"
+ "add r11, r6\n\t"
+ "mov r12, %[ca]\n\t"
+ "mov %[a], r9\n\t"
+ "mov r4, #2\n\t"
+ "lsl r4, r4, #8\n\t"
+ "cmp r11, r4\n\t"
+ "blt 1b\n\t"
+ "mov %[m], r14\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_4096_mont_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_4096_mul_128(r, a, b);
+ sp_4096_mont_reduce_128(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_4096_sqr_128(r, a);
+ sp_4096_mont_reduce_128(r, m, mp);
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r5, %[div], #1\n\t"
+ "add r5, #1\n\t"
+ "mov r8, %[d0]\n\t"
+ "mov r9, %[d1]\n\t"
+ "# Do top 32\n\t"
+ "mov r6, r5\n\t"
+ "sub r6, %[d1]\n\t"
+ "sbc r6, r6\n\t"
+ "add %[r], %[r]\n\t"
+ "sub %[r], r6\n\t"
+ "and r6, r5\n\t"
+ "sub %[d1], r6\n\t"
+ "# Next 30 bits\n\t"
+ "mov r4, #29\n\t"
+ "1:\n\t"
+ "lsl %[d0], %[d0], #1\n\t"
+ "adc %[d1], %[d1]\n\t"
+ "mov r6, r5\n\t"
+ "sub r6, %[d1]\n\t"
+ "sbc r6, r6\n\t"
+ "add %[r], %[r]\n\t"
+ "sub %[r], r6\n\t"
+ "and r6, r5\n\t"
+ "sub %[d1], r6\n\t"
+ "sub r4, #1\n\t"
+ "bpl 1b\n\t"
+ "mov r7, #0\n\t"
+ "add %[r], %[r]\n\t"
+ "add %[r], #1\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "sub %[d1], r4\n\t"
+ "mov r4, %[d1]\n\t"
+ "mov %[d1], r9\n\t"
+ "sbc %[d1], r5\n\t"
+ "mov r5, %[d1]\n\t"
+ "add %[r], r5\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "mov r6, r9\n\t"
+ "sub r4, %[d1], r4\n\t"
+ "sbc r6, r5\n\t"
+ "mov r5, r6\n\t"
+ "add %[r], r5\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "mov r6, r9\n\t"
+ "sub r4, %[d1], r4\n\t"
+ "sbc r6, r5\n\t"
+ "mov r5, r6\n\t"
+ "add %[r], r5\n\t"
+ "mov r6, %[div]\n\t"
+ "sub r6, r4\n\t"
+ "sbc r6, r6\n\t"
+ "sub %[r], r6\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r7", "r6", "r8", "r9"
+ );
+ return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<128; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 128; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_4096_cmp_128(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+
+
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mvn r3, r3\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, #252\n\t"
+ "1:\n\t"
+ "ldr r7, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r7, r3\n\t"
+ "and r5, r3\n\t"
+ "mov r4, r7\n\t"
+ "sub r7, r5\n\t"
+ "sbc r7, r7\n\t"
+ "add %[r], r7\n\t"
+ "mvn r7, r7\n\t"
+ "and r3, r7\n\t"
+ "sub r5, r4\n\t"
+ "sbc r7, r7\n\t"
+ "sub %[r], r7\n\t"
+ "mvn r7, r7\n\t"
+ "and r3, r7\n\t"
+ "sub r6, #4\n\t"
+ "cmp r6, #0\n\t"
+ "bge 1b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+
+ return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[256], t2[129];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[127];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
+ for (i=127; i>=0; i--) {
+ r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div);
+
+ sp_4096_mul_d_128(t2, d, r1);
+ t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
+ t1[128 + i] -= t2[128];
+ sp_4096_mask_128(t2, d, t1[128 + i]);
+ t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2);
+ sp_4096_mask_128(t2, d, t1[128 + i]);
+ t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_4096_cmp_128(t1, d) >= 0;
+ sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_4096_div_128(a, m, NULL, r);
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[256], t2[129];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[127];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
+ for (i=127; i>=0; i--) {
+ r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div);
+
+ sp_4096_mul_d_128(t2, d, r1);
+ t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
+ t1[128 + i] -= t2[128];
+ if (t1[128 + i] != 0) {
+ t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d);
+ if (t1[128 + i] != 0)
+ t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d);
+ }
+ }
+
+ r1 = sp_4096_cmp_128(t1, d) >= 0;
+ sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_mod_128_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_4096_div_128_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+ defined(WOLFSSL_HAVE_SP_DH)
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[16][256];
+#else
+ sp_digit* t[16];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 256, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<16; i++) {
+ t[i] = td + i * 256;
+ }
+#endif
+ norm = t[0];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_128(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 128U);
+ if (reduceA != 0) {
+ err = sp_4096_mod_128(t[1] + 128, a, m);
+ if (err == MP_OKAY) {
+ err = sp_4096_mod_128(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128);
+ err = sp_4096_mod_128(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp);
+ sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp);
+ sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp);
+ sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp);
+ sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_4096_mont_sqr_128(t[10], t[ 5], m, mp);
+ sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp);
+ sp_4096_mont_sqr_128(t[12], t[ 6], m, mp);
+ sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp);
+ sp_4096_mont_sqr_128(t[14], t[ 7], m, mp);
+ sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 4;
+ if (c == 32) {
+ c = 28;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 128);
+ for (; i>=0 || c>=4; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 28;
+ n <<= 4;
+ c = 28;
+ }
+ else if (c < 4) {
+ y = n >> 28;
+ n = e[i--];
+ c = 4 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+ }
+
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+
+ sp_4096_mont_mul_128(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
+ sp_4096_mont_reduce_128(r, m, mp);
+
+ mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
+ sp_4096_cond_sub_128(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][256];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 256, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++) {
+ t[i] = td + i * 256;
+ }
+#endif
+ norm = t[0];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_128(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 128U);
+ if (reduceA != 0) {
+ err = sp_4096_mod_128(t[1] + 128, a, m);
+ if (err == MP_OKAY) {
+ err = sp_4096_mod_128(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128);
+ err = sp_4096_mod_128(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp);
+ sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp);
+ sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp);
+ sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp);
+ sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_4096_mont_sqr_128(t[10], t[ 5], m, mp);
+ sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp);
+ sp_4096_mont_sqr_128(t[12], t[ 6], m, mp);
+ sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp);
+ sp_4096_mont_sqr_128(t[14], t[ 7], m, mp);
+ sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp);
+ sp_4096_mont_sqr_128(t[16], t[ 8], m, mp);
+ sp_4096_mont_mul_128(t[17], t[ 9], t[ 8], m, mp);
+ sp_4096_mont_sqr_128(t[18], t[ 9], m, mp);
+ sp_4096_mont_mul_128(t[19], t[10], t[ 9], m, mp);
+ sp_4096_mont_sqr_128(t[20], t[10], m, mp);
+ sp_4096_mont_mul_128(t[21], t[11], t[10], m, mp);
+ sp_4096_mont_sqr_128(t[22], t[11], m, mp);
+ sp_4096_mont_mul_128(t[23], t[12], t[11], m, mp);
+ sp_4096_mont_sqr_128(t[24], t[12], m, mp);
+ sp_4096_mont_mul_128(t[25], t[13], t[12], m, mp);
+ sp_4096_mont_sqr_128(t[26], t[13], m, mp);
+ sp_4096_mont_mul_128(t[27], t[14], t[13], m, mp);
+ sp_4096_mont_sqr_128(t[28], t[14], m, mp);
+ sp_4096_mont_mul_128(t[29], t[15], t[14], m, mp);
+ sp_4096_mont_sqr_128(t[30], t[15], m, mp);
+ sp_4096_mont_mul_128(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 128);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+
+ sp_4096_mont_mul_128(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
+ sp_4096_mont_reduce_128(r, m, mp);
+
+ mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
+ sp_4096_cond_sub_128(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[256], m[128], r[256];
+#else
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+#endif
+ sp_digit *ah;
+ sp_digit e[1];
+ int err = MP_OKAY;
+
+ if (*outLen < 512)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 512 ||
+ mp_count_bits(mm) != 4096))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 128 * 2;
+ m = r + 128 * 2;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ ah = a + 128;
+
+ sp_4096_from_bin(ah, 128, in, inLen);
+#if DIGIT_BIT >= 32
+ e[0] = em->dp[0];
+#else
+ e[0] = em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(m, 128, mm);
+
+ if (e[0] == 0x3) {
+ if (err == MP_OKAY) {
+ sp_4096_sqr_128(r, ah);
+ err = sp_4096_mod_128_cond(r, r, m);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_mul_128(r, ah, r);
+ err = sp_4096_mod_128_cond(r, r, m);
+ }
+ }
+ else {
+ int i;
+ sp_digit mp;
+
+ sp_4096_mont_setup(m, &mp);
+
+ /* Convert to Montgomery form. */
+ XMEMSET(a, 0, sizeof(sp_digit) * 128);
+ err = sp_4096_mod_128_cond(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i = 31; i >= 0; i--) {
+ if (e[0] >> i) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 128);
+ for (i--; i>=0; i--) {
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ if (((e[0] >> i) & 1) == 1) {
+ sp_4096_mont_mul_128(r, r, a, m, mp);
+ }
+ }
+ XMEMSET(&r[128], 0, sizeof(sp_digit) * 128);
+ sp_4096_mont_reduce_128(r, m, mp);
+
+ for (i = 127; i > 0; i--) {
+ if (r[i] != m[i]) {
+ break;
+ }
+ }
+ if (r[i] >= m[i]) {
+ sp_4096_sub_in_place_128(r, m);
+ }
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#endif
+
+ return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+ sp_digit* a;
+ sp_digit* d = NULL;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 512U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 4096) {
+ err = MP_READ_E;
+ }
+ if (inLen > 512) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = d + 128;
+ m = a + 256;
+ r = a;
+
+ sp_4096_from_bin(a, 128, in, inLen);
+ sp_4096_from_mp(d, 128, dm);
+ sp_4096_from_mp(m, 128, mm);
+ err = sp_4096_mod_exp_128(r, a, d, 4096, m, 0);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 128);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #1\n\t"
+ "lsl r5, r5, #8\n\t"
+ "mov r8, r5\n\t"
+ "mov r7, #0\n\t"
+ "1:\n\t"
+ "ldr r6, [%[b], r7]\n\t"
+ "and r6, %[m]\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, #1\n\t"
+ "add r5, %[c]\n\t"
+ "ldr r5, [%[a], r7]\n\t"
+ "adc r5, r6\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ "str r5, [%[r], r7]\n\t"
+ "add r7, #4\n\t"
+ "cmp r7, r8\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r7", "r8"
+ );
+
+ return c;
+}
+
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[128 * 2];
+ sp_digit p[64], q[64], dp[64];
+ sp_digit tmpa[128], tmpb[128];
+#else
+ sp_digit* t = NULL;
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+#endif
+ sp_digit* r;
+ sp_digit* qi;
+ sp_digit* dq;
+ sp_digit c;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 512)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (inLen > 512 || mp_count_bits(mm) != 4096))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL)
+ err = MEMORY_E;
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 128 * 2;
+ q = p + 64;
+ qi = dq = dp = q + 64;
+ tmpa = qi + 64;
+ tmpb = tmpa + 128;
+
+ r = t + 128;
+ }
+#else
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ r = a;
+ qi = dq = dp;
+#endif
+ sp_4096_from_bin(a, 128, in, inLen);
+ sp_4096_from_mp(p, 64, pm);
+ sp_4096_from_mp(q, 64, qm);
+ sp_4096_from_mp(dp, 64, dpm);
+
+ err = sp_2048_mod_exp_64(tmpa, a, dp, 2048, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(dq, 64, dqm);
+ err = sp_2048_mod_exp_64(tmpb, a, dq, 2048, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ c = sp_2048_sub_in_place_64(tmpa, tmpb);
+ c += sp_4096_cond_add_64(tmpa, tmpa, p, c);
+ sp_4096_cond_add_64(tmpa, tmpa, p, c);
+
+ sp_2048_from_mp(qi, 64, qim);
+ sp_2048_mul_64(tmpa, tmpa, qi);
+ err = sp_2048_mod_64(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mul_64(tmpa, q, tmpa);
+ XMEMSET(&tmpb[64], 0, sizeof(sp_digit) * 64);
+ sp_4096_add_128(r, tmpb, tmpa);
+
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 64 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+#else
+ XMEMSET(tmpa, 0, sizeof(tmpa));
+ XMEMSET(tmpb, 0, sizeof(tmpb));
+ XMEMSET(p, 0, sizeof(p));
+ XMEMSET(q, 0, sizeof(q));
+ XMEMSET(dp, 0, sizeof(dp));
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_4096_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 128);
+ r->used = 128;
+ mp_clamp(r);
+#elif DIGIT_BIT < 32
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 128; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 32) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 32 - s;
+ }
+ r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 128; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 32 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 32 - s;
+ }
+ else {
+ s += 32;
+ }
+ }
+ r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[256], e[128], m[128];
+ sp_digit* r = b;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 4096) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(b, 128, base);
+ sp_4096_from_mp(e, 128, exp);
+ sp_4096_from_mp(m, 128, mod);
+
+ err = sp_4096_mod_exp_128(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_4096_to_mp(r, res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_4096
+static void sp_4096_lshift_128(sp_digit* r, sp_digit* a, byte n)
+{
+ __asm__ __volatile__ (
+ "mov r6, #31\n\t"
+ "sub r6, r6, %[n]\n\t"
+ "add %[a], %[a], #255\n\t"
+ "add %[r], %[r], #255\n\t"
+ "add %[a], %[a], #193\n\t"
+ "add %[r], %[r], #193\n\t"
+ "ldr r3, [%[a], #60]\n\t"
+ "lsr r4, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r4, r4, r6\n\t"
+ "ldr r2, [%[a], #56]\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "str r2, [%[r], #56]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #44]\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "str r2, [%[r], #44]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "str r2, [%[r], #32]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #20]\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #12]\n\t"
+ "str r2, [%[r], #20]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #8]\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "str r2, [%[r], #8]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r2, [%[a], #60]\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "str r3, [%[r], #64]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #52]\n\t"
+ "str r2, [%[r], #60]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #48]\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "str r3, [%[r], #52]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "str r2, [%[r], #48]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #36]\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "str r3, [%[r], #40]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #28]\n\t"
+ "str r2, [%[r], #36]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #24]\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "str r3, [%[r], #28]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "str r2, [%[r], #24]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #12]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "str r2, [%[r], #12]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #0]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "str r3, [%[r], #68]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "str r2, [%[r], #64]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #52]\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "str r3, [%[r], #56]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #44]\n\t"
+ "str r2, [%[r], #52]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #40]\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "str r3, [%[r], #44]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "str r2, [%[r], #40]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #28]\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "str r3, [%[r], #32]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #20]\n\t"
+ "str r2, [%[r], #28]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #16]\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "str r3, [%[r], #20]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "str r2, [%[r], #16]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #4]\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "str r3, [%[r], #8]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r3, [%[a], #60]\n\t"
+ "str r2, [%[r], #68]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #56]\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "str r2, [%[r], #56]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #44]\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "str r2, [%[r], #44]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "str r2, [%[r], #32]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #20]\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #12]\n\t"
+ "str r2, [%[r], #20]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #8]\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "str r2, [%[r], #8]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r2, [%[a], #60]\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "str r3, [%[r], #64]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #52]\n\t"
+ "str r2, [%[r], #60]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #48]\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "str r3, [%[r], #52]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "str r2, [%[r], #48]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #36]\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "str r3, [%[r], #40]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #28]\n\t"
+ "str r2, [%[r], #36]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #24]\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "str r3, [%[r], #28]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "str r2, [%[r], #24]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #12]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "str r2, [%[r], #12]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #0]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "str r3, [%[r], #68]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "str r2, [%[r], #64]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #52]\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "str r3, [%[r], #56]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #44]\n\t"
+ "str r2, [%[r], #52]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #40]\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "str r3, [%[r], #44]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "str r2, [%[r], #40]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #28]\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "str r3, [%[r], #32]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #20]\n\t"
+ "str r2, [%[r], #28]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #16]\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "str r3, [%[r], #20]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "str r2, [%[r], #16]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #4]\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "str r3, [%[r], #8]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r3, [%[a], #60]\n\t"
+ "str r2, [%[r], #68]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #56]\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "str r2, [%[r], #56]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #44]\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "str r2, [%[r], #44]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "str r2, [%[r], #32]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #20]\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #12]\n\t"
+ "str r2, [%[r], #20]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #8]\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "str r2, [%[r], #8]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r2, [%[a], #60]\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "str r3, [%[r], #64]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #52]\n\t"
+ "str r2, [%[r], #60]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #48]\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "str r3, [%[r], #52]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "str r2, [%[r], #48]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #36]\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "str r3, [%[r], #40]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #28]\n\t"
+ "str r2, [%[r], #36]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #24]\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "str r3, [%[r], #28]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "str r2, [%[r], #24]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #12]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "str r2, [%[r], #12]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #0]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "str r2, [%[r]]\n\t"
+ "str r3, [%[r], #4]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+ : "memory", "r2", "r3", "r4", "r5", "r6"
+ );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits,
+ const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[256];
+ sp_digit td[129];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 385, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 256;
+#else
+ norm = nd;
+ tmp = td;
+#endif
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_128(norm, m);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ sp_4096_lshift_128(r, norm, y);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+
+ sp_4096_lshift_128(r, r, y);
+ sp_4096_mul_d_128(tmp, norm, r[128]);
+ r[128] = 0;
+ o = sp_4096_add_128(r, r, tmp);
+ sp_4096_cond_sub_128(r, r, m, (sp_digit)0 - o);
+ }
+
+ XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
+ sp_4096_mont_reduce_128(r, m, mp);
+
+ mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
+ sp_4096_cond_sub_128(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* HAVE_FFDHE_4096 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+ int err = MP_OKAY;
+ sp_digit b[256], e[128], m[128];
+ sp_digit* r = b;
+ word32 i;
+
+ if (mp_count_bits(base) > 4096) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 512) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(b, 128, base);
+ sp_4096_from_bin(e, 128, exp, expLen);
+ sp_4096_from_mp(m, 128, mod);
+
+ #ifdef HAVE_FFDHE_4096
+ if (base->used == 1 && base->dp[0] == 2 && m[127] == (sp_digit)-1)
+ err = sp_4096_mod_exp_2_128(r, e, expLen * 8, m);
+ else
+ #endif
+ err = sp_4096_mod_exp_128(r, b, e, expLen * 8, m, 0);
+
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ for (i=0; i<512 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* WOLFSSL_SP_4096 */
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+#ifdef WOLFSSL_HAVE_SP_ECC
+#ifndef WOLFSSL_SP_NO_256
+
+/* Point structure to use. */
+typedef struct sp_point_256 {
+ sp_digit x[2 * 8];
+ sp_digit y[2 * 8];
+ sp_digit z[2 * 8];
+ int infinity;
+} sp_point_256;
+
+/* The modulus (prime) of the curve P256. */
+static const sp_digit p256_mod[8] = {
+ 0xffffffff,0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000,
+ 0x00000001,0xffffffff
+};
+/* The Montogmery normalizer for modulus of the curve P256. */
+static const sp_digit p256_norm_mod[8] = {
+ 0x00000001,0x00000000,0x00000000,0xffffffff,0xffffffff,0xffffffff,
+ 0xfffffffe,0x00000000
+};
+/* The Montogmery multiplier for modulus of the curve P256. */
+static const sp_digit p256_mp_mod = 0x00000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* The order of the curve P256. */
+static const sp_digit p256_order[8] = {
+ 0xfc632551,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
+ 0x00000000,0xffffffff
+};
+#endif
+/* The order of the curve P256 minus 2. */
+static const sp_digit p256_order2[8] = {
+ 0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
+ 0x00000000,0xffffffff
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P256. */
+static const sp_digit p256_norm_order[8] = {
+ 0x039cdaaf,0x0c46353d,0x58e8617b,0x43190552,0x00000000,0x00000000,
+ 0xffffffff,0x00000000
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P256. */
+static const sp_digit p256_mp_order = 0xee00bc4f;
+#endif
+/* The base point of curve P256. */
+static const sp_point_256 p256_base = {
+ /* X ordinate */
+ {
+ 0xd898c296,0xf4a13945,0x2deb33a0,0x77037d81,0x63a440f2,0xf8bce6e5,
+ 0xe12c4247,0x6b17d1f2,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Y ordinate */
+ {
+ 0x37bf51f5,0xcbb64068,0x6b315ece,0x2bce3357,0x7c0f9e16,0x8ee7eb4a,
+ 0xfe1a7f9b,0x4fe342e2,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Z ordinate */
+ {
+ 0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
+ 0x00000000,0x00000000,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* infinity */
+ 0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p256_b[8] = {
+ 0x27d2604b,0x3bce3c3e,0xcc53b0f6,0x651d06b0,0x769886bc,0xb3ebbd55,
+ 0xaa3a93e7,0x5ac635d8
+};
+#endif
+
+static int sp_256_point_new_ex_8(void* heap, sp_point_256* sp, sp_point_256** p)
+{
+ int ret = MP_OKAY;
+ (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ (void)sp;
+ *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC);
+#else
+ *p = sp;
+#endif
+ if (*p == NULL) {
+ ret = MEMORY_E;
+ }
+ return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), &(sp), &(p))
+#endif
+
+
+static void sp_256_point_free_8(sp_point_256* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+ if (p != NULL) {
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+ XFREE(p, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+/* Clear point data if requested. */
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+#endif
+ (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r The resulting Montgomery form number.
+ * a The number to convert.
+ * m The modulus (prime).
+ */
+static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ int64_t t[8];
+ int64_t a64[8];
+ int64_t o;
+
+ (void)m;
+
+ a64[0] = a[0];
+ a64[1] = a[1];
+ a64[2] = a[2];
+ a64[3] = a[3];
+ a64[4] = a[4];
+ a64[5] = a[5];
+ a64[6] = a[6];
+ a64[7] = a[7];
+
+ /* 1 1 0 -1 -1 -1 -1 0 */
+ t[0] = 0 + a64[0] + a64[1] - a64[3] - a64[4] - a64[5] - a64[6];
+ /* 0 1 1 0 -1 -1 -1 -1 */
+ t[1] = 0 + a64[1] + a64[2] - a64[4] - a64[5] - a64[6] - a64[7];
+ /* 0 0 1 1 0 -1 -1 -1 */
+ t[2] = 0 + a64[2] + a64[3] - a64[5] - a64[6] - a64[7];
+ /* -1 -1 0 2 2 1 0 -1 */
+ t[3] = 0 - a64[0] - a64[1] + 2 * a64[3] + 2 * a64[4] + a64[5] - a64[7];
+ /* 0 -1 -1 0 2 2 1 0 */
+ t[4] = 0 - a64[1] - a64[2] + 2 * a64[4] + 2 * a64[5] + a64[6];
+ /* 0 0 -1 -1 0 2 2 1 */
+ t[5] = 0 - a64[2] - a64[3] + 2 * a64[5] + 2 * a64[6] + a64[7];
+ /* -1 -1 0 0 0 1 3 2 */
+ t[6] = 0 - a64[0] - a64[1] + a64[5] + 3 * a64[6] + 2 * a64[7];
+ /* 1 0 -1 -1 -1 -1 0 3 */
+ t[7] = 0 + a64[0] - a64[2] - a64[3] - a64[4] - a64[5] + 3 * a64[7];
+
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ o = t[7] >> 32; t[7] &= 0xffffffff;
+ t[0] += o;
+ t[3] -= o;
+ t[6] -= o;
+ t[7] += o;
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ r[0] = t[0];
+ r[1] = t[1];
+ r[2] = t[2];
+ r[3] = t[3];
+ r[4] = t[4];
+ r[5] = t[5];
+ r[6] = t[6];
+ r[7] = t[7];
+
+ return MP_OKAY;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 32
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 32U) <= (word32)DIGIT_BIT) {
+ s += 32U;
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 32) {
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 32 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_256.
+ *
+ * p Point of type sp_point_256 (result).
+ * pm Point of type ecc_point.
+ */
+static void sp_256_point_from_ecc_point_8(sp_point_256* p, const ecc_point* pm)
+{
+ XMEMSET(p->x, 0, sizeof(p->x));
+ XMEMSET(p->y, 0, sizeof(p->y));
+ XMEMSET(p->z, 0, sizeof(p->z));
+ sp_256_from_mp(p->x, 8, pm->x);
+ sp_256_from_mp(p->y, 8, pm->y);
+ sp_256_from_mp(p->z, 8, pm->z);
+ p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_256_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 8);
+ r->used = 8;
+ mp_clamp(r);
+#elif DIGIT_BIT < 32
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 8; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 32) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 32 - s;
+ }
+ r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 8; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 32 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 32 - s;
+ }
+ else {
+ s += 32;
+ }
+ }
+ r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Convert a point of type sp_point_256 to type ecc_point.
+ *
+ * p Point of type sp_point_256.
+ * pm Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm)
+{
+ int err;
+
+ err = sp_256_to_mp(p->x, pm->x);
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, pm->y);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, pm->z);
+ }
+
+ return err;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit tmp[8 * 2];
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r8, r3\n\t"
+ "mov r11, %[r]\n\t"
+ "mov r9, %[a]\n\t"
+ "mov r10, %[b]\n\t"
+ "mov r6, #32\n\t"
+ "add r6, r9\n\t"
+ "mov r12, r6\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #28\n\t"
+ "mov %[a], r8\n\t"
+ "sub %[a], r6\n\t"
+ "sbc r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], r6\n\t"
+ "mov %[b], r8\n\t"
+ "sub %[b], %[a]\n\t"
+ "add %[a], r9\n\t"
+ "add %[b], r10\n\t"
+ "\n2:\n\t"
+ "# Multiply Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Multiply Done\n\t"
+ "add %[a], #4\n\t"
+ "sub %[b], #4\n\t"
+ "cmp %[a], r12\n\t"
+ "beq 3f\n\t"
+ "mov r6, r8\n\t"
+ "add r6, r9\n\t"
+ "cmp %[a], r6\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r11\n\t"
+ "mov r7, r8\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add r7, #4\n\t"
+ "mov r8, r7\n\t"
+ "mov r6, #56\n\t"
+ "cmp r7, r6\n\t"
+ "ble 1b\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov %[a], r9\n\t"
+ "mov %[b], r10\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #32\n\t"
+ "mov r8, r5\n\t"
+ "mov r7, #0\n\t"
+ "1:\n\t"
+ "ldr r6, [%[b], r7]\n\t"
+ "and r6, %[m]\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r5, [%[a], r7]\n\t"
+ "sbc r5, r6\n\t"
+ "sbc %[c], %[c]\n\t"
+ "str r5, [%[r], r7]\n\t"
+ "add r7, #4\n\t"
+ "cmp r7, r8\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r7", "r8"
+ );
+
+ return c;
+}
+
+/* Reduce the number back to 256 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ (void)mp;
+ (void)m;
+
+ __asm__ __volatile__ (
+ "mov r2, #0\n\t"
+ "mov r1, #0\n\t"
+ "# i = 0\n\t"
+ "mov r8, r2\n\t"
+ "\n1:\n\t"
+ "mov r4, #0\n\t"
+ "# mu = a[i] * 1 (mp) = a[i]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "# a[i+0] += -1 * mu\n\t"
+ "mov r5, r3\n\t"
+ "str r4, [%[a], #0]\n\t"
+ "# a[i+1] += -1 * mu\n\t"
+ "ldr r6, [%[a], #4]\n\t"
+ "mov r4, r3\n\t"
+ "sub r5, r3\n\t"
+ "sbc r4, r2\n\t"
+ "add r5, r6\n\t"
+ "adc r4, r2\n\t"
+ "str r5, [%[a], #4]\n\t"
+ "# a[i+2] += -1 * mu\n\t"
+ "ldr r6, [%[a], #8]\n\t"
+ "mov r5, r3\n\t"
+ "sub r4, r3\n\t"
+ "sbc r5, r2\n\t"
+ "add r4, r6\n\t"
+ "adc r5, r2\n\t"
+ "str r4, [%[a], #8]\n\t"
+ "# a[i+3] += 0 * mu\n\t"
+ "ldr r6, [%[a], #12]\n\t"
+ "mov r4, #0\n\t"
+ "add r5, r6\n\t"
+ "adc r4, r2\n\t"
+ "str r5, [%[a], #12]\n\t"
+ "# a[i+4] += 0 * mu\n\t"
+ "ldr r6, [%[a], #16]\n\t"
+ "mov r5, #0\n\t"
+ "add r4, r6\n\t"
+ "adc r5, r2\n\t"
+ "str r4, [%[a], #16]\n\t"
+ "# a[i+5] += 0 * mu\n\t"
+ "ldr r6, [%[a], #20]\n\t"
+ "mov r4, #0\n\t"
+ "add r5, r6\n\t"
+ "adc r4, r2\n\t"
+ "str r5, [%[a], #20]\n\t"
+ "# a[i+6] += 1 * mu\n\t"
+ "ldr r6, [%[a], #24]\n\t"
+ "mov r5, #0\n\t"
+ "add r4, r3\n\t"
+ "adc r5, r2\n\t"
+ "add r4, r6\n\t"
+ "adc r5, r2\n\t"
+ "str r4, [%[a], #24]\n\t"
+ "# a[i+7] += -1 * mu\n\t"
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r7, [%[a], #32]\n\t"
+ "add r4, r1, r3\n\t"
+ "mov r1, #0\n\t"
+ "adc r1, r2\n\t"
+ "sub r5, r3\n\t"
+ "sbc r4, r2\n\t"
+ "sbc r1, r2\n\t"
+ "add r5, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r1, r2\n\t"
+ "str r5, [%[a], #28]\n\t"
+ "str r4, [%[a], #32]\n\t"
+ "# i += 1\n\t"
+ "mov r6, #4\n\t"
+ "add r8, r6\n\t"
+ "add %[a], #4\n\t"
+ "mov r6, #32\n\t"
+ "cmp r8, r6\n\t"
+ "blt 1b\n\t"
+ "sub %[a], #32\n\t"
+ "mov r3, r1\n\t"
+ "sub r1, #1\n\t"
+ "mvn r1, r1\n\t"
+ "ldr r5, [%[a],#32]\n\t"
+ "ldr r4, [%[a],#36]\n\t"
+ "ldr r6, [%[a],#40]\n\t"
+ "ldr r7, [%[a],#44]\n\t"
+ "sub r5, r1\n\t"
+ "sbc r4, r1\n\t"
+ "sbc r6, r1\n\t"
+ "sbc r7, r2\n\t"
+ "str r5, [%[a],#0]\n\t"
+ "str r4, [%[a],#4]\n\t"
+ "str r6, [%[a],#8]\n\t"
+ "str r7, [%[a],#12]\n\t"
+ "ldr r5, [%[a],#48]\n\t"
+ "ldr r4, [%[a],#52]\n\t"
+ "ldr r6, [%[a],#56]\n\t"
+ "ldr r7, [%[a],#60]\n\t"
+ "sbc r5, r2\n\t"
+ "sbc r4, r2\n\t"
+ "sbc r6, r3\n\t"
+ "sbc r7, r1\n\t"
+ "str r5, [%[a],#16]\n\t"
+ "str r4, [%[a],#20]\n\t"
+ "str r6, [%[a],#24]\n\t"
+ "str r7, [%[a],#28]\n\t"
+ : [a] "+r" (a)
+ :
+ : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8"
+ );
+
+
+ (void)m;
+ (void)mp;
+}
+
+/* Reduce the number back to 256 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "mov r8, %[mp]\n\t"
+ "mov r12, %[ca]\n\t"
+ "mov r14, %[m]\n\t"
+ "mov r9, %[a]\n\t"
+ "mov r4, #0\n\t"
+ "# i = 0\n\t"
+ "mov r11, r4\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "mov %[ca], #0\n\t"
+ "# mu = a[i] * mp\n\t"
+ "mov %[mp], r8\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mul %[mp], %[a]\n\t"
+ "mov %[m], r14\n\t"
+ "mov r10, r9\n\t"
+ "\n2:\n\t"
+ "# a[i+j] += m[j] * mu\n\t"
+ "mov %[a], r10\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mov %[ca], #0\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "# Multiply m[j] and mu - Start\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r6, %[mp], #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add %[a], r7\n\t"
+ "adc r5, %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add %[a], r6\n\t"
+ "adc r5, r7\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r6, %[mp], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r5, r7\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add %[a], r6\n\t"
+ "adc r5, r7\n\t"
+ "# Multiply m[j] and mu - Done\n\t"
+ "add r4, %[a]\n\t"
+ "adc r5, %[ca]\n\t"
+ "mov %[a], r10\n\t"
+ "str r4, [%[a]]\n\t"
+ "mov r6, #4\n\t"
+ "add %[m], #4\n\t"
+ "add r10, r6\n\t"
+ "mov r4, #28\n\t"
+ "add r4, r9\n\t"
+ "cmp r10, r4\n\t"
+ "blt 2b\n\t"
+ "# a[i+7] += m[7] * mu\n\t"
+ "mov %[ca], #0\n\t"
+ "mov r4, r12\n\t"
+ "mov %[a], #0\n\t"
+ "# Multiply m[7] and mu - Start\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r6, %[mp], #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r5, r7\n\t"
+ "adc r4, %[ca]\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r5, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r6, %[mp], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r5, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "# Multiply m[7] and mu - Done\n\t"
+ "mov %[ca], %[a]\n\t"
+ "mov %[a], r10\n\t"
+ "ldr r7, [%[a], #4]\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mov r6, #0\n\t"
+ "add r5, %[a]\n\t"
+ "adc r7, r4\n\t"
+ "adc %[ca], r6\n\t"
+ "mov %[a], r10\n\t"
+ "str r5, [%[a]]\n\t"
+ "str r7, [%[a], #4]\n\t"
+ "# i += 1\n\t"
+ "mov r6, #4\n\t"
+ "add r9, r6\n\t"
+ "add r11, r6\n\t"
+ "mov r12, %[ca]\n\t"
+ "mov %[a], r9\n\t"
+ "mov r4, #32\n\t"
+ "cmp r11, r4\n\t"
+ "blt 1b\n\t"
+ "mov %[m], r14\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_256_mul_8(r, a, b);
+ sp_256_mont_reduce_8(r, m, mp);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r8, r3\n\t"
+ "mov r11, %[r]\n\t"
+ "mov r6, #64\n\t"
+ "neg r6, r6\n\t"
+ "add sp, r6\n\t"
+ "mov r10, sp\n\t"
+ "mov r9, %[a]\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r6, #28\n\t"
+ "mov %[a], r8\n\t"
+ "sub %[a], r6\n\t"
+ "sbc r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], r6\n\t"
+ "mov r2, r8\n\t"
+ "sub r2, %[a]\n\t"
+ "add %[a], r9\n\t"
+ "add r2, r9\n\t"
+ "\n2:\n\t"
+ "cmp r2, %[a]\n\t"
+ "beq 4f\n\t"
+ "# Multiply * 2: Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Multiply * 2: Done\n\t"
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ "# Square: Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul r6, r6\n\t"
+ "add r3, r6\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "mul r7, r7\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #15\n\t"
+ "lsl r6, r6, #17\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Square: Done\n\t"
+ "\n5:\n\t"
+ "add %[a], #4\n\t"
+ "sub r2, #4\n\t"
+ "mov r6, #32\n\t"
+ "add r6, r9\n\t"
+ "cmp %[a], r6\n\t"
+ "beq 3f\n\t"
+ "cmp %[a], r2\n\t"
+ "bgt 3f\n\t"
+ "mov r7, r8\n\t"
+ "add r7, r9\n\t"
+ "cmp %[a], r7\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r10\n\t"
+ "mov r7, r8\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r7, #4\n\t"
+ "mov r8, r7\n\t"
+ "mov r6, #56\n\t"
+ "cmp r7, r6\n\t"
+ "ble 1b\n\t"
+ "mov %[a], r9\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov %[r], r11\n\t"
+ "mov %[a], r10\n\t"
+ "mov r3, #60\n\t"
+ "\n4:\n\t"
+ "ldr r6, [%[a], r3]\n\t"
+ "str r6, [%[r], r3]\n\t"
+ "sub r3, #4\n\t"
+ "bge 4b\n\t"
+ "mov r6, #64\n\t"
+ "add sp, r6\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+ );
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_256_sqr_8(r, a);
+ sp_256_mont_reduce_8(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * n Number of times to square.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_n_8(sp_digit* r, const sp_digit* a, int n,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_256_mont_sqr_8(r, a, m, mp);
+ for (; n > 1; n--) {
+ sp_256_mont_sqr_8(r, r, m, mp);
+ }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P256 curve. */
+static const uint32_t p256_mod_minus_2[8] = {
+ 0xfffffffdU,0xffffffffU,0xffffffffU,0x00000000U,0x00000000U,0x00000000U,
+ 0x00000001U,0xffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P256 curve. (r = 1 / a mod m)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_256_mont_inv_8(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 8);
+ for (i=254; i>=0; i--) {
+ sp_256_mont_sqr_8(t, t, p256_mod, p256_mp_mod);
+ if (p256_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
+ sp_256_mont_mul_8(t, t, a, p256_mod, p256_mp_mod);
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 8);
+#else
+ sp_digit* t1 = td;
+ sp_digit* t2 = td + 2 * 8;
+ sp_digit* t3 = td + 4 * 8;
+ /* 0x2 */
+ sp_256_mont_sqr_8(t1, a, p256_mod, p256_mp_mod);
+ /* 0x3 */
+ sp_256_mont_mul_8(t2, t1, a, p256_mod, p256_mp_mod);
+ /* 0xc */
+ sp_256_mont_sqr_n_8(t1, t2, 2, p256_mod, p256_mp_mod);
+ /* 0xd */
+ sp_256_mont_mul_8(t3, t1, a, p256_mod, p256_mp_mod);
+ /* 0xf */
+ sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xf0 */
+ sp_256_mont_sqr_n_8(t1, t2, 4, p256_mod, p256_mp_mod);
+ /* 0xfd */
+ sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xff */
+ sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xff00 */
+ sp_256_mont_sqr_n_8(t1, t2, 8, p256_mod, p256_mp_mod);
+ /* 0xfffd */
+ sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xffff */
+ sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffff0000 */
+ sp_256_mont_sqr_n_8(t1, t2, 16, p256_mod, p256_mp_mod);
+ /* 0xfffffffd */
+ sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff */
+ sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000000 */
+ sp_256_mont_sqr_n_8(t1, t2, 32, p256_mod, p256_mp_mod);
+ /* 0xffffffffffffffff */
+ sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001 */
+ sp_256_mont_mul_8(r, t1, a, p256_mod, p256_mp_mod);
+ /* 0xffffffff000000010000000000000000000000000000000000000000 */
+ sp_256_mont_sqr_n_8(r, r, 160, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */
+ sp_256_mont_mul_8(r, r, t2, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */
+ sp_256_mont_sqr_n_8(r, r, 32, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */
+ sp_256_mont_mul_8(r, r, t3, p256_mod, p256_mp_mod);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_256_cmp_8(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+
+
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mvn r3, r3\n\t"
+ "mov r6, #28\n\t"
+ "1:\n\t"
+ "ldr r7, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r7, r3\n\t"
+ "and r5, r3\n\t"
+ "mov r4, r7\n\t"
+ "sub r7, r5\n\t"
+ "sbc r7, r7\n\t"
+ "add %[r], r7\n\t"
+ "mvn r7, r7\n\t"
+ "and r3, r7\n\t"
+ "sub r5, r4\n\t"
+ "sbc r7, r7\n\t"
+ "sub %[r], r7\n\t"
+ "mvn r7, r7\n\t"
+ "and r3, r7\n\t"
+ "sub r6, #4\n\t"
+ "cmp r6, #0\n\t"
+ "bge 1b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+
+ return r;
+}
+
+/* Normalize the values in each word to 32.
+ *
+ * a Array of sp_digit to normalize.
+ */
+#define sp_256_norm_8(a)
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r Resulting affine coordinate point.
+ * p Montgomery form projective coordinate point.
+ * t Temporary ordinate data.
+ */
+static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*8;
+ int32_t n;
+
+ sp_256_mont_inv_8(t1, p->z, t + 2*8);
+
+ sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod);
+
+ /* x /= z^2 */
+ sp_256_mont_mul_8(r->x, p->x, t2, p256_mod, p256_mp_mod);
+ XMEMSET(r->x + 8, 0, sizeof(r->x) / 2U);
+ sp_256_mont_reduce_8(r->x, p256_mod, p256_mp_mod);
+ /* Reduce x to less than modulus */
+ n = sp_256_cmp_8(r->x, p256_mod);
+ sp_256_cond_sub_8(r->x, r->x, p256_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_8(r->x);
+
+ /* y /= z^3 */
+ sp_256_mont_mul_8(r->y, p->y, t1, p256_mod, p256_mp_mod);
+ XMEMSET(r->y + 8, 0, sizeof(r->y) / 2U);
+ sp_256_mont_reduce_8(r->y, p256_mod, p256_mp_mod);
+ /* Reduce y to less than modulus */
+ n = sp_256_cmp_8(r->y, p256_mod);
+ sp_256_cond_sub_8(r->y, r->y, p256_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_8(r->y);
+
+ XMEMSET(r->z, 0, sizeof(r->z));
+ r->z[0] = 1;
+
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r6, %[a]\n\t"
+ "mov r7, #0\n\t"
+ "add r6, #32\n\t"
+ "sub r7, #1\n\t"
+ "\n1:\n\t"
+ "add %[c], r7\n\t"
+ "ldr r4, [%[a]]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ "add %[a], #4\n\t"
+ "add %[b], #4\n\t"
+ "add %[r], #4\n\t"
+ "cmp %[a], r6\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r7"
+ );
+
+ return c;
+}
+
+#else
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "add r4, r5\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r Result of addition.
+ * a First number to add in Montogmery form.
+ * b Second number to add in Montogmery form.
+ * m Modulus (prime).
+ */
+SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ (void)m;
+
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "ldr r4, [%[a],#0]\n\t"
+ "ldr r5, [%[a],#4]\n\t"
+ "ldr r6, [%[b],#0]\n\t"
+ "ldr r7, [%[b],#4]\n\t"
+ "add r4, r6\n\t"
+ "adc r5, r7\n\t"
+ "str r4, [%[r],#0]\n\t"
+ "str r5, [%[r],#4]\n\t"
+ "ldr r4, [%[a],#8]\n\t"
+ "ldr r5, [%[a],#12]\n\t"
+ "ldr r6, [%[b],#8]\n\t"
+ "ldr r7, [%[b],#12]\n\t"
+ "adc r4, r6\n\t"
+ "adc r5, r7\n\t"
+ "str r4, [%[r],#8]\n\t"
+ "str r5, [%[r],#12]\n\t"
+ "ldr r4, [%[a],#16]\n\t"
+ "ldr r5, [%[a],#20]\n\t"
+ "ldr r6, [%[b],#16]\n\t"
+ "ldr r7, [%[b],#20]\n\t"
+ "adc r4, r6\n\t"
+ "adc r5, r7\n\t"
+ "mov r8, r4\n\t"
+ "mov r9, r5\n\t"
+ "ldr r4, [%[a],#24]\n\t"
+ "ldr r5, [%[a],#28]\n\t"
+ "ldr r6, [%[b],#24]\n\t"
+ "ldr r7, [%[b],#28]\n\t"
+ "adc r4, r6\n\t"
+ "adc r5, r7\n\t"
+ "mov r10, r4\n\t"
+ "mov r11, r5\n\t"
+ "adc r3, r3\n\t"
+ "mov r6, r3\n\t"
+ "sub r3, #1\n\t"
+ "mvn r3, r3\n\t"
+ "mov r7, #0\n\t"
+ "ldr r4, [%[r],#0]\n\t"
+ "ldr r5, [%[r],#4]\n\t"
+ "sub r4, r3\n\t"
+ "sbc r5, r3\n\t"
+ "str r4, [%[r],#0]\n\t"
+ "str r5, [%[r],#4]\n\t"
+ "ldr r4, [%[r],#8]\n\t"
+ "ldr r5, [%[r],#12]\n\t"
+ "sbc r4, r3\n\t"
+ "sbc r5, r7\n\t"
+ "str r4, [%[r],#8]\n\t"
+ "str r5, [%[r],#12]\n\t"
+ "mov r4, r8\n\t"
+ "mov r5, r9\n\t"
+ "sbc r4, r7\n\t"
+ "sbc r5, r7\n\t"
+ "str r4, [%[r],#16]\n\t"
+ "str r5, [%[r],#20]\n\t"
+ "mov r4, r10\n\t"
+ "mov r5, r11\n\t"
+ "sbc r4, r6\n\t"
+ "sbc r5, r3\n\t"
+ "str r4, [%[r],#24]\n\t"
+ "str r5, [%[r],#28]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+ );
+}
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r Result of doubling.
+ * a Number to double in Montogmery form.
+ * m Modulus (prime).
+ */
+SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ (void)m;
+
+ __asm__ __volatile__ (
+ "ldr r4, [%[a],#0]\n\t"
+ "ldr r5, [%[a],#4]\n\t"
+ "ldr r6, [%[a],#8]\n\t"
+ "ldr r7, [%[a],#12]\n\t"
+ "add r4, r4\n\t"
+ "adc r5, r5\n\t"
+ "adc r6, r6\n\t"
+ "adc r7, r7\n\t"
+ "str r4, [%[r],#0]\n\t"
+ "str r5, [%[r],#4]\n\t"
+ "str r6, [%[r],#8]\n\t"
+ "str r7, [%[r],#12]\n\t"
+ "ldr r4, [%[a],#16]\n\t"
+ "ldr r5, [%[a],#20]\n\t"
+ "ldr r6, [%[a],#24]\n\t"
+ "ldr r7, [%[a],#28]\n\t"
+ "adc r4, r4\n\t"
+ "adc r5, r5\n\t"
+ "adc r6, r6\n\t"
+ "adc r7, r7\n\t"
+ "mov r8, r4\n\t"
+ "mov r9, r5\n\t"
+ "mov r10, r6\n\t"
+ "mov r11, r7\n\t"
+ "mov r3, #0\n\t"
+ "mov r7, #0\n\t"
+ "adc r3, r3\n\t"
+ "mov r2, r3\n\t"
+ "sub r3, #1\n\t"
+ "mvn r3, r3\n\t"
+ "ldr r4, [%[r],#0]\n\t"
+ "ldr r5, [%[r],#4]\n\t"
+ "ldr r6, [%[r],#8]\n\t"
+ "sub r4, r3\n\t"
+ "sbc r5, r3\n\t"
+ "sbc r6, r3\n\t"
+ "str r4, [%[r],#0]\n\t"
+ "str r5, [%[r],#4]\n\t"
+ "str r6, [%[r],#8]\n\t"
+ "ldr r4, [%[r],#12]\n\t"
+ "mov r5, r8\n\t"
+ "mov r6, r9\n\t"
+ "sbc r4, r7\n\t"
+ "sbc r5, r7\n\t"
+ "sbc r6, r7\n\t"
+ "str r4, [%[r],#12]\n\t"
+ "str r5, [%[r],#16]\n\t"
+ "str r6, [%[r],#20]\n\t"
+ "mov r4, r10\n\t"
+ "mov r5, r11\n\t"
+ "sbc r4, r2\n\t"
+ "sbc r5, r3\n\t"
+ "str r4, [%[r],#24]\n\t"
+ "str r5, [%[r],#28]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r3", "r2", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+ );
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r Result of Tripling.
+ * a Number to triple in Montogmery form.
+ * m Modulus (prime).
+ */
+SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ (void)m;
+
+ __asm__ __volatile__ (
+ "ldr r6, [%[a],#0]\n\t"
+ "ldr r7, [%[a],#4]\n\t"
+ "ldr r4, [%[a],#8]\n\t"
+ "ldr r5, [%[a],#12]\n\t"
+ "add r6, r6\n\t"
+ "adc r7, r7\n\t"
+ "adc r4, r4\n\t"
+ "adc r5, r5\n\t"
+ "mov r8, r4\n\t"
+ "mov r9, r5\n\t"
+ "ldr r2, [%[a],#16]\n\t"
+ "ldr r3, [%[a],#20]\n\t"
+ "ldr r4, [%[a],#24]\n\t"
+ "ldr r5, [%[a],#28]\n\t"
+ "adc r2, r2\n\t"
+ "adc r3, r3\n\t"
+ "adc r4, r4\n\t"
+ "adc r5, r5\n\t"
+ "mov r10, r2\n\t"
+ "mov r11, r3\n\t"
+ "mov r12, r4\n\t"
+ "mov r14, r5\n\t"
+ "mov r3, #0\n\t"
+ "mov r5, #0\n\t"
+ "adc r3, r3\n\t"
+ "mov r4, r3\n\t"
+ "sub r3, #1\n\t"
+ "mvn r3, r3\n\t"
+ "sub r6, r3\n\t"
+ "sbc r7, r3\n\t"
+ "mov r2, r8\n\t"
+ "sbc r2, r3\n\t"
+ "mov r8, r2\n\t"
+ "mov r2, r9\n\t"
+ "sbc r2, r5\n\t"
+ "mov r9, r2\n\t"
+ "mov r2, r10\n\t"
+ "sbc r2, r5\n\t"
+ "mov r10, r2\n\t"
+ "mov r2, r11\n\t"
+ "sbc r2, r5\n\t"
+ "mov r11, r2\n\t"
+ "mov r2, r12\n\t"
+ "sbc r2, r4\n\t"
+ "mov r12, r2\n\t"
+ "mov r2, r14\n\t"
+ "sbc r2, r3\n\t"
+ "mov r14, r2\n\t"
+ "ldr r2, [%[a],#0]\n\t"
+ "ldr r3, [%[a],#4]\n\t"
+ "add r6, r2\n\t"
+ "adc r7, r3\n\t"
+ "ldr r2, [%[a],#8]\n\t"
+ "ldr r3, [%[a],#12]\n\t"
+ "mov r4, r8\n\t"
+ "mov r5, r9\n\t"
+ "adc r2, r4\n\t"
+ "adc r3, r5\n\t"
+ "mov r8, r2\n\t"
+ "mov r9, r3\n\t"
+ "ldr r2, [%[a],#16]\n\t"
+ "ldr r3, [%[a],#20]\n\t"
+ "mov r4, r10\n\t"
+ "mov r5, r11\n\t"
+ "adc r2, r4\n\t"
+ "adc r3, r5\n\t"
+ "mov r10, r2\n\t"
+ "mov r11, r3\n\t"
+ "ldr r2, [%[a],#24]\n\t"
+ "ldr r3, [%[a],#28]\n\t"
+ "mov r4, r12\n\t"
+ "mov r5, r14\n\t"
+ "adc r2, r4\n\t"
+ "adc r3, r5\n\t"
+ "mov r12, r2\n\t"
+ "mov r14, r3\n\t"
+ "mov r3, #0\n\t"
+ "mov r5, #0\n\t"
+ "adc r3, r3\n\t"
+ "mov r4, r3\n\t"
+ "sub r3, #1\n\t"
+ "mvn r3, r3\n\t"
+ "sub r6, r3\n\t"
+ "str r6, [%[r],#0]\n\t"
+ "sbc r7, r3\n\t"
+ "str r7, [%[r],#4]\n\t"
+ "mov r2, r8\n\t"
+ "sbc r2, r3\n\t"
+ "str r2, [%[r],#8]\n\t"
+ "mov r2, r9\n\t"
+ "sbc r2, r5\n\t"
+ "str r2, [%[r],#12]\n\t"
+ "mov r2, r10\n\t"
+ "sbc r2, r5\n\t"
+ "str r2, [%[r],#16]\n\t"
+ "mov r2, r11\n\t"
+ "sbc r2, r5\n\t"
+ "str r2, [%[r],#20]\n\t"
+ "mov r2, r12\n\t"
+ "sbc r2, r4\n\t"
+ "str r2, [%[r],#24]\n\t"
+ "mov r2, r14\n\t"
+ "sbc r2, r3\n\t"
+ "str r2, [%[r],#28]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r Result of subtration.
+ * a Number to subtract from in Montogmery form.
+ * b Number to subtract with in Montogmery form.
+ * m Modulus (prime).
+ */
+SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ (void)m;
+
+ __asm__ __volatile__ (
+ "ldr r4, [%[a],#0]\n\t"
+ "ldr r5, [%[a],#4]\n\t"
+ "ldr r6, [%[b],#0]\n\t"
+ "ldr r7, [%[b],#4]\n\t"
+ "sub r4, r6\n\t"
+ "sbc r5, r7\n\t"
+ "str r4, [%[r],#0]\n\t"
+ "str r5, [%[r],#4]\n\t"
+ "ldr r4, [%[a],#8]\n\t"
+ "ldr r5, [%[a],#12]\n\t"
+ "ldr r6, [%[b],#8]\n\t"
+ "ldr r7, [%[b],#12]\n\t"
+ "sbc r4, r6\n\t"
+ "sbc r5, r7\n\t"
+ "str r4, [%[r],#8]\n\t"
+ "str r5, [%[r],#12]\n\t"
+ "ldr r4, [%[a],#16]\n\t"
+ "ldr r5, [%[a],#20]\n\t"
+ "ldr r6, [%[b],#16]\n\t"
+ "ldr r7, [%[b],#20]\n\t"
+ "sbc r4, r6\n\t"
+ "sbc r5, r7\n\t"
+ "mov r8, r4\n\t"
+ "mov r9, r5\n\t"
+ "ldr r4, [%[a],#24]\n\t"
+ "ldr r5, [%[a],#28]\n\t"
+ "ldr r6, [%[b],#24]\n\t"
+ "ldr r7, [%[b],#28]\n\t"
+ "sbc r4, r6\n\t"
+ "sbc r5, r7\n\t"
+ "mov r10, r4\n\t"
+ "mov r11, r5\n\t"
+ "sbc r3, r3\n\t"
+ "lsr r7, r3, #31\n\t"
+ "mov r6, #0\n\t"
+ "ldr r4, [%[r],#0]\n\t"
+ "ldr r5, [%[r],#4]\n\t"
+ "add r4, r3\n\t"
+ "adc r5, r3\n\t"
+ "str r4, [%[r],#0]\n\t"
+ "str r5, [%[r],#4]\n\t"
+ "ldr r4, [%[r],#8]\n\t"
+ "ldr r5, [%[r],#12]\n\t"
+ "adc r4, r3\n\t"
+ "adc r5, r6\n\t"
+ "str r4, [%[r],#8]\n\t"
+ "str r5, [%[r],#12]\n\t"
+ "mov r4, r8\n\t"
+ "mov r5, r9\n\t"
+ "adc r4, r6\n\t"
+ "adc r5, r6\n\t"
+ "str r4, [%[r],#16]\n\t"
+ "str r5, [%[r],#20]\n\t"
+ "mov r4, r10\n\t"
+ "mov r5, r11\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, r3\n\t"
+ "str r4, [%[r],#24]\n\t"
+ "str r5, [%[r],#28]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+ );
+}
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r Result of division by 2.
+ * a Number to divide.
+ * m Modulus (prime).
+ */
+SP_NOINLINE static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ __asm__ __volatile__ (
+ "ldr r7, [%[a], #0]\n\t"
+ "lsl r7, r7, #31\n\t"
+ "lsr r7, r7, #31\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, r7\n\t"
+ "mov r7, #0\n\t"
+ "lsl r6, r5, #31\n\t"
+ "lsr r6, r6, #31\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "add r3, r5\n\t"
+ "adc r4, r5\n\t"
+ "str r3, [%[r], #0]\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "adc r3, r5\n\t"
+ "adc r4, r7\n\t"
+ "str r3, [%[r], #8]\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "adc r3, r7\n\t"
+ "adc r4, r7\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "adc r3, r6\n\t"
+ "adc r4, r5\n\t"
+ "adc r7, r7\n\t"
+ "lsl r7, r7, #31\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, #31\n\t"
+ "lsr r6, r4, #1\n\t"
+ "lsl r4, r4, #31\n\t"
+ "orr r5, r4\n\t"
+ "orr r6, r7\n\t"
+ "mov r7, r3\n\t"
+ "str r5, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, #31\n\t"
+ "lsr r6, r4, #1\n\t"
+ "lsl r4, r4, #31\n\t"
+ "orr r5, r4\n\t"
+ "orr r6, r7\n\t"
+ "mov r7, r3\n\t"
+ "str r5, [%[r], #16]\n\t"
+ "str r6, [%[r], #20]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, #31\n\t"
+ "lsr r6, r4, #1\n\t"
+ "lsl r4, r4, #31\n\t"
+ "orr r5, r4\n\t"
+ "orr r6, r7\n\t"
+ "mov r7, r3\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r3, [%[r], #0]\n\t"
+ "ldr r4, [%[r], #4]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsr r6, r4, #1\n\t"
+ "lsl r4, r4, #31\n\t"
+ "orr r5, r4\n\t"
+ "orr r6, r7\n\t"
+ "str r5, [%[r], #0]\n\t"
+ "str r6, [%[r], #4]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [m] "r" (m)
+ : "memory", "r3", "r4", "r5", "r6", "r7"
+ );
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r Result of doubling point.
+ * p Point to double.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*8;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = r->x;
+ y = r->y;
+ z = r->z;
+ /* Put infinity into result. */
+ if (r != p) {
+ r->infinity = p->infinity;
+ }
+
+ /* T1 = Z * Z */
+ sp_256_mont_sqr_8(t1, p->z, p256_mod, p256_mp_mod);
+ /* Z = Y * Z */
+ sp_256_mont_mul_8(z, p->y, p->z, p256_mod, p256_mp_mod);
+ /* Z = 2Z */
+ sp_256_mont_dbl_8(z, z, p256_mod);
+ /* T2 = X - T1 */
+ sp_256_mont_sub_8(t2, p->x, t1, p256_mod);
+ /* T1 = X + T1 */
+ sp_256_mont_add_8(t1, p->x, t1, p256_mod);
+ /* T2 = T1 * T2 */
+ sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod);
+ /* T1 = 3T2 */
+ sp_256_mont_tpl_8(t1, t2, p256_mod);
+ /* Y = 2Y */
+ sp_256_mont_dbl_8(y, p->y, p256_mod);
+ /* Y = Y * Y */
+ sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod);
+ /* T2 = Y * Y */
+ sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
+ /* T2 = T2/2 */
+ sp_256_div2_8(t2, t2, p256_mod);
+ /* Y = Y * X */
+ sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod);
+ /* X = T1 * T1 */
+ sp_256_mont_sqr_8(x, t1, p256_mod, p256_mp_mod);
+ /* X = X - Y */
+ sp_256_mont_sub_8(x, x, y, p256_mod);
+ /* X = X - Y */
+ sp_256_mont_sub_8(x, x, y, p256_mod);
+ /* Y = Y - X */
+ sp_256_mont_sub_8(y, y, x, p256_mod);
+ /* Y = Y * T1 */
+ sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod);
+ /* Y = Y - T2 */
+ sp_256_mont_sub_8(y, y, t2, p256_mod);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r6, %[a]\n\t"
+ "add r6, #32\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r4, [%[a]]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "sbc r4, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "sbc %[c], %[c]\n\t"
+ "add %[a], #4\n\t"
+ "add %[b], #4\n\t"
+ "add %[r], #4\n\t"
+ "cmp %[a], r6\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[a], #4]\n\t"
+ "ldr r6, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "sub r4, r6\n\t"
+ "sbc r5, r7\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r5, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[a], #12]\n\t"
+ "ldr r6, [%[b], #8]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "sbc r4, r6\n\t"
+ "sbc r5, r7\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r5, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[a], #20]\n\t"
+ "ldr r6, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "sbc r4, r6\n\t"
+ "sbc r5, r7\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[a], #28]\n\t"
+ "ldr r6, [%[b], #24]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "sbc r4, r6\n\t"
+ "sbc r5, r7\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "str r5, [%[r], #28]\n\t"
+ "sbc %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r7"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a First number to compare.
+ * b Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_256_cmp_equal_8(const sp_digit* a, const sp_digit* b)
+{
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+ (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_8(sp_point_256* r, const sp_point_256* p, const sp_point_256* q,
+ sp_digit* t)
+{
+ const sp_point_256* ap[2];
+ sp_point_256* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*8;
+ sp_digit* t3 = t + 4*8;
+ sp_digit* t4 = t + 6*8;
+ sp_digit* t5 = t + 8*8;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Ensure only the first point is the same as the result. */
+ if (q == r) {
+ const sp_point_256* a = p;
+ p = q;
+ q = a;
+ }
+
+ /* Check double */
+ (void)sp_256_sub_8(t1, p256_mod, q->y);
+ sp_256_norm_8(t1);
+ if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
+ (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) {
+ sp_256_proj_point_dbl_8(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_256));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<8; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<8; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<8; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U1 = X1*Z2^2 */
+ sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t1, t1, x, p256_mod, p256_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_256_mont_mul_8(t3, t3, y, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - U1 */
+ sp_256_mont_sub_8(t2, t2, t1, p256_mod);
+ /* R = S2 - S1 */
+ sp_256_mont_sub_8(t4, t4, t3, p256_mod);
+ /* Z3 = H*Z1*Z2 */
+ sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_8(x, x, t5, p256_mod);
+ sp_256_mont_dbl_8(t1, y, p256_mod);
+ sp_256_mont_sub_8(x, x, t1, p256_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ sp_256_mont_sub_8(y, y, x, p256_mod);
+ sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_8(y, y, t5, p256_mod);
+ }
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_fast_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+ int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 td[16];
+ sp_point_256 rtd;
+ sp_digit tmpd[2 * 8 * 5];
+#endif
+ sp_point_256* t;
+ sp_point_256* rt;
+ sp_digit* tmp;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_8(heap, rtd, rt);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 16, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ t = td;
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ (void)sp_256_mod_mul_norm_8(t[1].x, g->x, p256_mod);
+ (void)sp_256_mod_mul_norm_8(t[1].y, g->y, p256_mod);
+ (void)sp_256_mod_mul_norm_8(t[1].z, g->z, p256_mod);
+ t[1].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[ 2], &t[ 1], tmp);
+ t[ 2].infinity = 0;
+ sp_256_proj_point_add_8(&t[ 3], &t[ 2], &t[ 1], tmp);
+ t[ 3].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[ 4], &t[ 2], tmp);
+ t[ 4].infinity = 0;
+ sp_256_proj_point_add_8(&t[ 5], &t[ 3], &t[ 2], tmp);
+ t[ 5].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[ 6], &t[ 3], tmp);
+ t[ 6].infinity = 0;
+ sp_256_proj_point_add_8(&t[ 7], &t[ 4], &t[ 3], tmp);
+ t[ 7].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[ 8], &t[ 4], tmp);
+ t[ 8].infinity = 0;
+ sp_256_proj_point_add_8(&t[ 9], &t[ 5], &t[ 4], tmp);
+ t[ 9].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[10], &t[ 5], tmp);
+ t[10].infinity = 0;
+ sp_256_proj_point_add_8(&t[11], &t[ 6], &t[ 5], tmp);
+ t[11].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[12], &t[ 6], tmp);
+ t[12].infinity = 0;
+ sp_256_proj_point_add_8(&t[13], &t[ 7], &t[ 6], tmp);
+ t[13].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[14], &t[ 7], tmp);
+ t[14].infinity = 0;
+ sp_256_proj_point_add_8(&t[15], &t[ 8], &t[ 7], tmp);
+ t[15].infinity = 0;
+
+ i = 6;
+ n = k[i+1] << 0;
+ c = 28;
+ y = n >> 28;
+ XMEMCPY(rt, &t[y], sizeof(sp_point_256));
+ n <<= 4;
+ for (; i>=0 || c>=4; ) {
+ if (c < 4) {
+ n |= k[i--];
+ c += 32;
+ }
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+
+ sp_256_proj_point_dbl_8(rt, rt, tmp);
+ sp_256_proj_point_dbl_8(rt, rt, tmp);
+ sp_256_proj_point_dbl_8(rt, rt, tmp);
+ sp_256_proj_point_dbl_8(rt, rt, tmp);
+
+ sp_256_proj_point_add_8(rt, rt, &t[y], tmp);
+ }
+
+ if (map != 0) {
+ sp_256_map_8(r, rt, tmp);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 8 * 5);
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+ }
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_point_256) * 16);
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmpd, sizeof(tmpd));
+ ForceZero(td, sizeof(td));
+#endif
+ sp_256_point_free_8(rt, 1, heap);
+
+ return err;
+}
+
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_256 {
+ sp_digit x[8];
+ sp_digit y[8];
+} sp_table_entry_256;
+
+#ifdef FP_ECC
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int n, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*8;
+ sp_digit* b = t + 4*8;
+ sp_digit* t1 = t + 6*8;
+ sp_digit* t2 = t + 8*8;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = p->x;
+ y = p->y;
+ z = p->z;
+
+ /* Y = 2*Y */
+ sp_256_mont_dbl_8(y, y, p256_mod);
+ /* W = Z^4 */
+ sp_256_mont_sqr_8(w, z, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_8(w, w, p256_mod, p256_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+ while (--n > 0)
+#else
+ while (--n >= 0)
+#endif
+ {
+ /* A = 3*(X^2 - W) */
+ sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_8(t1, t1, w, p256_mod);
+ sp_256_mont_tpl_8(a, t1, p256_mod);
+ /* B = X*Y^2 */
+ sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod);
+ /* X = A^2 - 2B */
+ sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_8(t2, b, p256_mod);
+ sp_256_mont_sub_8(x, x, t2, p256_mod);
+ /* Z = Z*Y */
+ sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod);
+ /* t2 = Y^4 */
+ sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+ if (n != 0)
+#endif
+ {
+ /* W = W*Y^4 */
+ sp_256_mont_mul_8(w, w, t1, p256_mod, p256_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_256_mont_sub_8(y, b, x, p256_mod);
+ sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_8(y, y, p256_mod);
+ sp_256_mont_sub_8(y, y, t1, p256_mod);
+ }
+#ifndef WOLFSSL_SP_SMALL
+ /* A = 3*(X^2 - W) */
+ sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_8(t1, t1, w, p256_mod);
+ sp_256_mont_tpl_8(a, t1, p256_mod);
+ /* B = X*Y^2 */
+ sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod);
+ /* X = A^2 - 2B */
+ sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_8(t2, b, p256_mod);
+ sp_256_mont_sub_8(x, x, t2, p256_mod);
+ /* Z = Z*Y */
+ sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod);
+ /* t2 = Y^4 */
+ sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod);
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_256_mont_sub_8(y, b, x, p256_mod);
+ sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_8(y, y, p256_mod);
+ sp_256_mont_sub_8(y, y, t1, p256_mod);
+#endif
+ /* Y = Y/2 */
+ sp_256_div2_8(y, y, p256_mod);
+}
+
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p,
+ const sp_point_256* q, sp_digit* t)
+{
+ const sp_point_256* ap[2];
+ sp_point_256* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*8;
+ sp_digit* t3 = t + 4*8;
+ sp_digit* t4 = t + 6*8;
+ sp_digit* t5 = t + 8*8;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Check double */
+ (void)sp_256_sub_8(t1, p256_mod, q->y);
+ sp_256_norm_8(t1);
+ if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
+ (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) {
+ sp_256_proj_point_dbl_8(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_256));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<8; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<8; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<8; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - X1 */
+ sp_256_mont_sub_8(t2, t2, x, p256_mod);
+ /* R = S2 - Y1 */
+ sp_256_mont_sub_8(t4, t4, y, p256_mod);
+ /* Z3 = H*Z1 */
+ sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod);
+ /* X3 = R^2 - H^3 - 2*X1*H^2 */
+ sp_256_mont_sqr_8(t1, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t3, x, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_8(x, t1, t5, p256_mod);
+ sp_256_mont_dbl_8(t1, t3, p256_mod);
+ sp_256_mont_sub_8(x, x, t1, p256_mod);
+ /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+ sp_256_mont_sub_8(t3, t3, x, p256_mod);
+ sp_256_mont_mul_8(t3, t3, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t5, t5, y, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_8(y, t3, t5, p256_mod);
+ }
+}
+
+#ifdef WOLFSSL_SP_SMALL
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a Point to convert.
+ * t Temporary data.
+ */
+static void sp_256_proj_to_affine_8(sp_point_256* a, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2 * 8;
+ sp_digit* tmp = t + 4 * 8;
+
+ sp_256_mont_inv_8(t1, a->z, tmp);
+
+ sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod);
+
+ sp_256_mont_mul_8(a->x, a->x, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(a->y, a->y, t1, p256_mod, p256_mp_mod);
+ XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
+}
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_256_gen_stripe_table_8(const sp_point_256* a,
+ sp_table_entry_256* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 td, s1d, s2d;
+#endif
+ sp_point_256* t;
+ sp_point_256* s1 = NULL;
+ sp_point_256* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_8(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_256_proj_to_affine_8(t, tmp);
+
+ XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<4; i++) {
+ sp_256_proj_point_dbl_n_8(t, 64, tmp);
+ sp_256_proj_to_affine_8(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<4; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_256_proj_point_add_qz1_8(t, s1, s2, tmp);
+ sp_256_proj_to_affine_8(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_256_point_free_8(s2, 0, heap);
+ sp_256_point_free_8(s1, 0, heap);
+ sp_256_point_free_8( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g,
+ const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 rtd;
+ sp_point_256 pd;
+ sp_digit td[2 * 8 * 5];
+#endif
+ sp_point_256* rt;
+ sp_point_256* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_256_point_new_8(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
+ XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+ y = 0;
+ for (j=0,x=63; j<4; j++,x+=64) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=62; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<4; j++,x+=64) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+
+ sp_256_proj_point_dbl_8(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_256_proj_point_add_qz1_8(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_256_map_8(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(p, 0, heap);
+ sp_256_point_free_8(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_256_t {
+ sp_digit x[8];
+ sp_digit y[8];
+ sp_table_entry_256 table[16];
+ uint32_t cnt;
+ int set;
+} sp_cache_256_t;
+
+static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_256_last = -1;
+static THREAD_LS_T int sp_cache_256_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex_256 = 0;
+ static wolfSSL_Mutex sp_cache_256_lock;
+#endif
+
+static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_256_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache_256[i].set = 0;
+ }
+ sp_cache_256_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache_256[i].set)
+ continue;
+
+ if (sp_256_cmp_equal_8(g->x, sp_cache_256[i].x) &
+ sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) {
+ sp_cache_256[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_256_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache_256[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_256_last) {
+ least = sp_cache_256[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache_256[j].cnt < least) {
+ i = j;
+ least = sp_cache_256[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
+ XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
+ sp_cache_256[i].set = 1;
+ sp_cache_256[i].cnt = 1;
+ }
+
+ *cache = &sp_cache_256[i];
+ sp_cache_256_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 8 * 5];
+ sp_cache_256_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_256 == 0) {
+ wc_InitMutex(&sp_cache_256_lock);
+ initCacheMutex_256 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_256_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_256(g, &cache);
+ if (cache->cnt == 2)
+ sp_256_gen_stripe_table_8(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_256_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
+ }
+ else {
+ err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#else
+#ifdef FP_ECC
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_256_gen_stripe_table_8(const sp_point_256* a,
+ sp_table_entry_256* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 td, s1d, s2d;
+#endif
+ sp_point_256* t;
+ sp_point_256* s1 = NULL;
+ sp_point_256* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_8(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_256_proj_to_affine_8(t, tmp);
+
+ XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<8; i++) {
+ sp_256_proj_point_dbl_n_8(t, 32, tmp);
+ sp_256_proj_to_affine_8(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<8; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_256_proj_point_add_qz1_8(t, s1, s2, tmp);
+ sp_256_proj_to_affine_8(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_256_point_free_8(s2, 0, heap);
+ sp_256_point_free_8(s1, 0, heap);
+ sp_256_point_free_8( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g,
+ const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 rtd;
+ sp_point_256 pd;
+ sp_digit td[2 * 8 * 5];
+#endif
+ sp_point_256* rt;
+ sp_point_256* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_256_point_new_8(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
+ XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+ y = 0;
+ for (j=0,x=31; j<8; j++,x+=32) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=30; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<8; j++,x+=32) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+
+ sp_256_proj_point_dbl_8(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_256_proj_point_add_qz1_8(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_256_map_8(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(p, 0, heap);
+ sp_256_point_free_8(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_256_t {
+ sp_digit x[8];
+ sp_digit y[8];
+ sp_table_entry_256 table[256];
+ uint32_t cnt;
+ int set;
+} sp_cache_256_t;
+
+static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_256_last = -1;
+static THREAD_LS_T int sp_cache_256_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex_256 = 0;
+ static wolfSSL_Mutex sp_cache_256_lock;
+#endif
+
+static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_256_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache_256[i].set = 0;
+ }
+ sp_cache_256_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache_256[i].set)
+ continue;
+
+ if (sp_256_cmp_equal_8(g->x, sp_cache_256[i].x) &
+ sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) {
+ sp_cache_256[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_256_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache_256[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_256_last) {
+ least = sp_cache_256[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache_256[j].cnt < least) {
+ i = j;
+ least = sp_cache_256[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
+ XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
+ sp_cache_256[i].set = 1;
+ sp_cache_256[i].cnt = 1;
+ }
+
+ *cache = &sp_cache_256[i];
+ sp_cache_256_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 8 * 5];
+ sp_cache_256_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_256 == 0) {
+ wc_InitMutex(&sp_cache_256_lock);
+ initCacheMutex_256 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_256_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_256(g, &cache);
+ if (cache->cnt == 2)
+ sp_256_gen_stripe_table_8(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_256_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
+ }
+ else {
+ err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * p Point to multiply.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+ void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[8];
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_256_point_new_8(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 8, km);
+ sp_256_point_from_ecc_point_8(point, gm);
+
+ err = sp_256_ecc_mulmod_8(point, point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_8(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(point, 0, heap);
+
+ return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+static const sp_table_entry_256 p256_table[16] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,
+ 0xa53755c6,0x18905f76 },
+ { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,
+ 0x25885d85,0x8571ff18 } },
+ /* 2 */
+ { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,
+ 0xfd1b667f,0x2f5e6961 },
+ { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,
+ 0x8d6f0f7b,0xf648f916 } },
+ /* 3 */
+ { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761,
+ 0x133d0015,0x5abe0285 },
+ { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562,
+ 0x6b6f7383,0x94bb725b } },
+ /* 4 */
+ { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,
+ 0x21d324f6,0x61d587d4 },
+ { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,
+ 0x4621efbe,0xfa11fe12 } },
+ /* 5 */
+ { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67,
+ 0x1f13bedc,0x586eb04c },
+ { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0,
+ 0x70864f11,0x19d5ac08 } },
+ /* 6 */
+ { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a,
+ 0xc3b266b1,0xbb6de651 },
+ { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1,
+ 0x5d18b99b,0x60b4619a } },
+ /* 7 */
+ { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014,
+ 0xaeebffcd,0x9d0f27b2 },
+ { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0,
+ 0x356ec48d,0x244a566d } },
+ /* 8 */
+ { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,
+ 0xcd42ab1b,0x803f3e02 },
+ { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,
+ 0x5067adc1,0xc097440e } },
+ /* 9 */
+ { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459,
+ 0x915f1f30,0xf1af32d5 },
+ { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418,
+ 0xe2d41c8b,0x23d0f130 } },
+ /* 10 */
+ { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926,
+ 0x7990216a,0x50bbb4d9 },
+ { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b,
+ 0x01fe49c3,0x2b100118 } },
+ /* 11 */
+ { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa,
+ 0x83fbae0c,0xdd558999 },
+ { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf,
+ 0x149d6041,0xe6e4c551 } },
+ /* 12 */
+ { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07,
+ 0xdb7e63af,0xfad27148 },
+ { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875,
+ 0x9f0e1a84,0x77387de3 } },
+ /* 13 */
+ { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408,
+ 0xbef0c47e,0xb37b85c0 },
+ { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa,
+ 0xf9f628d5,0x9c135ac8 } },
+ /* 14 */
+ { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403,
+ 0x91ece900,0xc109f9cb },
+ { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d,
+ 0x2eee1ee1,0x9bc3344f } },
+ /* 15 */
+ { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665,
+ 0x5f1a4cc1,0x29591d52 },
+ { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496,
+ 0x18ef332c,0x6376551f } },
+};
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
+ k, map, heap);
+}
+
+#else
+static const sp_table_entry_256 p256_table[256] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,
+ 0xa53755c6,0x18905f76 },
+ { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,
+ 0x25885d85,0x8571ff18 } },
+ /* 2 */
+ { { 0x4147519a,0x20288602,0x26b372f0,0xd0981eac,0xa785ebc8,0xa9d4a7ca,
+ 0xdbdf58e9,0xd953c50d },
+ { 0xfd590f8f,0x9d6361cc,0x44e6c917,0x72e9626b,0x22eb64cf,0x7fd96110,
+ 0x9eb288f3,0x863ebb7e } },
+ /* 3 */
+ { { 0x5cdb6485,0x7856b623,0x2f0a2f97,0x808f0ea2,0x4f7e300b,0x3e68d954,
+ 0xb5ff80a0,0x00076055 },
+ { 0x838d2010,0x7634eb9b,0x3243708a,0x54014fbb,0x842a6606,0xe0e47d39,
+ 0x34373ee0,0x83087761 } },
+ /* 4 */
+ { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,
+ 0xfd1b667f,0x2f5e6961 },
+ { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,
+ 0x8d6f0f7b,0xf648f916 } },
+ /* 5 */
+ { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761,
+ 0x133d0015,0x5abe0285 },
+ { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562,
+ 0x6b6f7383,0x94bb725b } },
+ /* 6 */
+ { { 0x720f141c,0xbbf9b48f,0x2df5bc74,0x6199b3cd,0x411045c4,0xdc3f6129,
+ 0x2f7dc4ef,0xcdd6bbcb },
+ { 0xeaf436fd,0xcca6700b,0xb99326be,0x6f647f6d,0x014f2522,0x0c0fa792,
+ 0x4bdae5f6,0xa361bebd } },
+ /* 7 */
+ { { 0x597c13c7,0x28aa2558,0x50b7c3e1,0xc38d635f,0xf3c09d1d,0x07039aec,
+ 0xc4b5292c,0xba12ca09 },
+ { 0x59f91dfd,0x9e408fa4,0xceea07fb,0x3af43b66,0x9d780b29,0x1eceb089,
+ 0x701fef4b,0x53ebb99d } },
+ /* 8 */
+ { { 0xb0e63d34,0x4fe7ee31,0xa9e54fab,0xf4600572,0xd5e7b5a4,0xc0493334,
+ 0x06d54831,0x8589fb92 },
+ { 0x6583553a,0xaa70f5cc,0xe25649e5,0x0879094a,0x10044652,0xcc904507,
+ 0x02541c4f,0xebb0696d } },
+ /* 9 */
+ { { 0xac1647c5,0x4616ca15,0xc4cf5799,0xb8127d47,0x764dfbac,0xdc666aa3,
+ 0xd1b27da3,0xeb2820cb },
+ { 0x6a87e008,0x9406f8d8,0x922378f3,0xd87dfa9d,0x80ccecb2,0x56ed2e42,
+ 0x55a7da1d,0x1f28289b } },
+ /* 10 */
+ { { 0x3b89da99,0xabbaa0c0,0xb8284022,0xa6f2d79e,0xb81c05e8,0x27847862,
+ 0x05e54d63,0x337a4b59 },
+ { 0x21f7794a,0x3c67500d,0x7d6d7f61,0x207005b7,0x04cfd6e8,0x0a5a3781,
+ 0xf4c2fbd6,0x0d65e0d5 } },
+ /* 11 */
+ { { 0xb5275d38,0xd9d09bbe,0x0be0a358,0x4268a745,0x973eb265,0xf0762ff4,
+ 0x52f4a232,0xc23da242 },
+ { 0x0b94520c,0x5da1b84f,0xb05bd78e,0x09666763,0x94d29ea1,0x3a4dcb86,
+ 0xc790cff1,0x19de3b8c } },
+ /* 12 */
+ { { 0x26c5fe04,0x183a716c,0x3bba1bdb,0x3b28de0b,0xa4cb712c,0x7432c586,
+ 0x91fccbfd,0xe34dcbd4 },
+ { 0xaaa58403,0xb408d46b,0x82e97a53,0x9a697486,0x36aaa8af,0x9e390127,
+ 0x7b4e0f7f,0xe7641f44 } },
+ /* 13 */
+ { { 0xdf64ba59,0x7d753941,0x0b0242fc,0xd33f10ec,0xa1581859,0x4f06dfc6,
+ 0x052a57bf,0x4a12df57 },
+ { 0x9439dbd0,0xbfa6338f,0xbde53e1f,0xd3c24bd4,0x21f1b314,0xfd5e4ffa,
+ 0xbb5bea46,0x6af5aa93 } },
+ /* 14 */
+ { { 0x10c91999,0xda10b699,0x2a580491,0x0a24b440,0xb8cc2090,0x3e0094b4,
+ 0x66a44013,0x5fe3475a },
+ { 0xf93e7b4b,0xb0f8cabd,0x7c23f91a,0x292b501a,0xcd1e6263,0x42e889ae,
+ 0xecfea916,0xb544e308 } },
+ /* 15 */
+ { { 0x16ddfdce,0x6478c6e9,0xf89179e6,0x2c329166,0x4d4e67e1,0x4e8d6e76,
+ 0xa6b0c20b,0xe0b6b2bd },
+ { 0xbb7efb57,0x0d312df2,0x790c4007,0x1aac0dde,0x679bc944,0xf90336ad,
+ 0x25a63774,0x71c023de } },
+ /* 16 */
+ { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,
+ 0x21d324f6,0x61d587d4 },
+ { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,
+ 0x4621efbe,0xfa11fe12 } },
+ /* 17 */
+ { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67,
+ 0x1f13bedc,0x586eb04c },
+ { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0,
+ 0x70864f11,0x19d5ac08 } },
+ /* 18 */
+ { { 0x309a4e1f,0x1e99f581,0xe9270074,0xab7de71b,0xefd28d20,0x26a5ef0b,
+ 0x7f9c563f,0xe7c0073f },
+ { 0x0ef59f76,0x1f6d663a,0x20fcb050,0x669b3b54,0x7a6602d4,0xc08c1f7a,
+ 0xc65b3c0a,0xe08504fe } },
+ /* 19 */
+ { { 0xa031b3ca,0xf098f68d,0xe6da6d66,0x6d1cab9e,0x94f246e8,0x5bfd81fa,
+ 0x5b0996b4,0x78f01882 },
+ { 0x3a25787f,0xb7eefde4,0x1dccac9b,0x8016f80d,0xb35bfc36,0x0cea4877,
+ 0x7e94747a,0x43a773b8 } },
+ /* 20 */
+ { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a,
+ 0xc3b266b1,0xbb6de651 },
+ { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1,
+ 0x5d18b99b,0x60b4619a } },
+ /* 21 */
+ { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014,
+ 0xaeebffcd,0x9d0f27b2 },
+ { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0,
+ 0x356ec48d,0x244a566d } },
+ /* 22 */
+ { { 0xeacf1f96,0x6db0394a,0x024c271c,0x9f2122a9,0x82cbd3b9,0x2626ac1b,
+ 0x3581ef69,0x45e58c87 },
+ { 0xa38f9dbc,0xd3ff479d,0xe888a040,0xa8aaf146,0x46e0bed7,0x945adfb2,
+ 0xc1e4b7a4,0xc040e21c } },
+ /* 23 */
+ { { 0x6f8117b6,0x847af000,0x73a35433,0x651969ff,0x1d9475eb,0x482b3576,
+ 0x682c6ec7,0x1cdf5c97 },
+ { 0x11f04839,0x7db775b4,0x48de1698,0x7dbeacf4,0xb70b3219,0xb2921dd1,
+ 0xa92dff3d,0x046755f8 } },
+ /* 24 */
+ { { 0xbce8ffcd,0xcc8ac5d2,0x2fe61a82,0x0d53c48b,0x7202d6c7,0xf6f16172,
+ 0x3b83a5f3,0x046e5e11 },
+ { 0xd8007f01,0xe7b8ff64,0x5af43183,0x7fb1ef12,0x35e1a03c,0x045c5ea6,
+ 0x303d005b,0x6e0106c3 } },
+ /* 25 */
+ { { 0x88dd73b1,0x48c73584,0x995ed0d9,0x7670708f,0xc56a2ab7,0x38385ea8,
+ 0xe901cf1f,0x442594ed },
+ { 0x12d4b65b,0xf8faa2c9,0x96c90c37,0x94c2343b,0x5e978d1f,0xd326e4a1,
+ 0x4c2ee68e,0xa796fa51 } },
+ /* 26 */
+ { { 0x823addd7,0x359fb604,0xe56693b3,0x9e2a6183,0x3cbf3c80,0xf885b78e,
+ 0xc69766e9,0xe4ad2da9 },
+ { 0x8e048a61,0x357f7f42,0xc092d9a0,0x082d198c,0xc03ed8ef,0xfc3a1af4,
+ 0xc37b5143,0xc5e94046 } },
+ /* 27 */
+ { { 0x2be75f9e,0x476a538c,0xcb123a78,0x6fd1a9e8,0xb109c04b,0xd85e4df0,
+ 0xdb464747,0x63283daf },
+ { 0xbaf2df15,0xce728cf7,0x0ad9a7f4,0xe592c455,0xe834bcc3,0xfab226ad,
+ 0x1981a938,0x68bd19ab } },
+ /* 28 */
+ { { 0x1887d659,0xc08ead51,0xb359305a,0x3374d5f4,0xcfe74fe3,0x96986981,
+ 0x3c6fdfd6,0x495292f5 },
+ { 0x1acec896,0x4a878c9e,0xec5b4484,0xd964b210,0x664d60a7,0x6696f7e2,
+ 0x26036837,0x0ec7530d } },
+ /* 29 */
+ { { 0xad2687bb,0x2da13a05,0xf32e21fa,0xa1f83b6a,0x1dd4607b,0x390f5ef5,
+ 0x64863f0b,0x0f6207a6 },
+ { 0x0f138233,0xbd67e3bb,0x272aa718,0xdd66b96c,0x26ec88ae,0x8ed00407,
+ 0x08ed6dcf,0xff0db072 } },
+ /* 30 */
+ { { 0x4c95d553,0x749fa101,0x5d680a8a,0xa44052fd,0xff3b566f,0x183b4317,
+ 0x88740ea3,0x313b513c },
+ { 0x08d11549,0xb402e2ac,0xb4dee21c,0x071ee10b,0x47f2320e,0x26b987dd,
+ 0x86f19f81,0x2d3abcf9 } },
+ /* 31 */
+ { { 0x815581a2,0x4c288501,0x632211af,0x9a0a6d56,0x0cab2e99,0x19ba7a0f,
+ 0xded98cdf,0xc036fa10 },
+ { 0xc1fbd009,0x29ae08ba,0x06d15816,0x0b68b190,0x9b9e0d8f,0xc2eb3277,
+ 0xb6d40194,0xa6b2a2c4 } },
+ /* 32 */
+ { { 0x6d3549cf,0xd433e50f,0xfacd665e,0x6f33696f,0xce11fcb4,0x695bfdac,
+ 0xaf7c9860,0x810ee252 },
+ { 0x7159bb2c,0x65450fe1,0x758b357b,0xf7dfbebe,0xd69fea72,0x2b057e74,
+ 0x92731745,0xd485717a } },
+ /* 33 */
+ { { 0xf0cb5a98,0x11741a8a,0x1f3110bf,0xd3da8f93,0xab382adf,0x1994e2cb,
+ 0x2f9a604e,0x6a6045a7 },
+ { 0xa2b2411d,0x170c0d3f,0x510e96e0,0xbe0eb83e,0x8865b3cc,0x3bcc9f73,
+ 0xf9e15790,0xd3e45cfa } },
+ /* 34 */
+ { { 0xe83f7669,0xce1f69bb,0x72877d6b,0x09f8ae82,0x3244278d,0x9548ae54,
+ 0xe3c2c19c,0x207755de },
+ { 0x6fef1945,0x87bd61d9,0xb12d28c3,0x18813cef,0x72df64aa,0x9fbcd1d6,
+ 0x7154b00d,0x48dc5ee5 } },
+ /* 35 */
+ { { 0xf7e5a199,0x123790bf,0x989ccbb7,0xe0efb8cf,0x0a519c79,0xc27a2bfe,
+ 0xdff6f445,0xf2fb0aed },
+ { 0xf0b5025f,0x41c09575,0x40fa9f22,0x550543d7,0x380bfbd0,0x8fa3c8ad,
+ 0xdb28d525,0xa13e9015 } },
+ /* 36 */
+ { { 0xa2b65cbc,0xf9f7a350,0x2a464226,0x0b04b972,0xe23f07a1,0x265ce241,
+ 0x1497526f,0x2bf0d6b0 },
+ { 0x4b216fb7,0xd3d4dd3f,0xfbdda26a,0xf7d7b867,0x6708505c,0xaeb7b83f,
+ 0x162fe89f,0x42a94a5a } },
+ /* 37 */
+ { { 0xeaadf191,0x5846ad0b,0x25a268d7,0x0f8a4890,0x494dc1f6,0xe8603050,
+ 0xc65ede3d,0x2c2dd969 },
+ { 0x93849c17,0x6d02171d,0x1da250dd,0x460488ba,0x3c3a5485,0x4810c706,
+ 0x42c56dbc,0xf437fa1f } },
+ /* 38 */
+ { { 0x4a0f7dab,0x6aa0d714,0x1776e9ac,0x0f049793,0xf5f39786,0x52c0a050,
+ 0x54707aa8,0xaaf45b33 },
+ { 0xc18d364a,0x85e37c33,0x3e497165,0xd40b9b06,0x15ec5444,0xf4171681,
+ 0xf4f272bc,0xcdf6310d } },
+ /* 39 */
+ { { 0x8ea8b7ef,0x7473c623,0x85bc2287,0x08e93518,0x2bda8e34,0x41956772,
+ 0xda9e2ff2,0xf0d008ba },
+ { 0x2414d3b1,0x2912671d,0xb019ea76,0xb3754985,0x453bcbdb,0x5c61b96d,
+ 0xca887b8b,0x5bd5c2f5 } },
+ /* 40 */
+ { { 0xf49a3154,0xef0f469e,0x6e2b2e9a,0x3e85a595,0xaa924a9c,0x45aaec1e,
+ 0xa09e4719,0xaa12dfc8 },
+ { 0x4df69f1d,0x26f27227,0xa2ff5e73,0xe0e4c82c,0xb7a9dd44,0xb9d8ce73,
+ 0xe48ca901,0x6c036e73 } },
+ /* 41 */
+ { { 0x0f6e3138,0x5cfae12a,0x25ad345a,0x6966ef00,0x45672bc5,0x8993c64b,
+ 0x96afbe24,0x292ff658 },
+ { 0x5e213402,0xd5250d44,0x4392c9fe,0xf6580e27,0xda1c72e8,0x097b397f,
+ 0x311b7276,0x644e0c90 } },
+ /* 42 */
+ { { 0xa47153f0,0xe1e421e1,0x920418c9,0xb86c3b79,0x705d7672,0x93bdce87,
+ 0xcab79a77,0xf25ae793 },
+ { 0x6d869d0c,0x1f3194a3,0x4986c264,0x9d55c882,0x096e945e,0x49fb5ea3,
+ 0x13db0a3e,0x39b8e653 } },
+ /* 43 */
+ { { 0xb6fd2e59,0x37754200,0x9255c98f,0x35e2c066,0x0e2a5739,0xd9dab21a,
+ 0x0f19db06,0x39122f2f },
+ { 0x03cad53c,0xcfbce1e0,0xe65c17e3,0x225b2c0f,0x9aa13877,0x72baf1d2,
+ 0xce80ff8d,0x8de80af8 } },
+ /* 44 */
+ { { 0x207bbb76,0xafbea8d9,0x21782758,0x921c7e7c,0x1c0436b1,0xdfa2b74b,
+ 0x2e368c04,0x87194906 },
+ { 0xa3993df5,0xb5f928bb,0xf3b3d26a,0x639d75b5,0x85b55050,0x011aa78a,
+ 0x5b74fde1,0xfc315e6a } },
+ /* 45 */
+ { { 0xe8d6ecfa,0x561fd41a,0x1aec7f86,0x5f8c44f6,0x4924741d,0x98452a7b,
+ 0xee389088,0xe6d4a7ad },
+ { 0x4593c75d,0x60552ed1,0xdd271162,0x70a70da4,0x7ba2c7db,0xd2aede93,
+ 0x9be2ae57,0x35dfaf9a } },
+ /* 46 */
+ { { 0xaa736636,0x6b956fcd,0xae2cab7e,0x09f51d97,0x0f349966,0xfb10bf41,
+ 0x1c830d2b,0x1da5c7d7 },
+ { 0x3cce6825,0x5c41e483,0xf9573c3b,0x15ad118f,0xf23036b8,0xa28552c7,
+ 0xdbf4b9d6,0x7077c0fd } },
+ /* 47 */
+ { { 0x46b9661c,0xbf63ff8d,0x0d2cfd71,0xa1dfd36b,0xa847f8f7,0x0373e140,
+ 0xe50efe44,0x53a8632e },
+ { 0x696d8051,0x0976ff68,0xc74f468a,0xdaec0c95,0x5e4e26bd,0x62994dc3,
+ 0x34e1fcc1,0x028ca76d } },
+ /* 48 */
+ { { 0xfc9877ee,0xd11d47dc,0x801d0002,0xc8b36210,0x54c260b6,0xd002c117,
+ 0x6962f046,0x04c17cd8 },
+ { 0xb0daddf5,0x6d9bd094,0x24ce55c0,0xbea23575,0x72da03b5,0x663356e6,
+ 0xfed97474,0xf7ba4de9 } },
+ /* 49 */
+ { { 0xebe1263f,0xd0dbfa34,0x71ae7ce6,0x55763735,0x82a6f523,0xd2440553,
+ 0x52131c41,0xe31f9600 },
+ { 0xea6b6ec6,0xd1bb9216,0x73c2fc44,0x37a1d12e,0x89d0a294,0xc10e7eac,
+ 0xce34d47b,0xaa3a6259 } },
+ /* 50 */
+ { { 0x36f3dcd3,0xfbcf9df5,0xd2bf7360,0x6ceded50,0xdf504f5b,0x491710fa,
+ 0x7e79daee,0x2398dd62 },
+ { 0x6d09569e,0xcf4705a3,0x5149f769,0xea0619bb,0x35f6034c,0xff9c0377,
+ 0x1c046210,0x5717f5b2 } },
+ /* 51 */
+ { { 0x21dd895e,0x9fe229c9,0x40c28451,0x8e518500,0x1d637ecd,0xfa13d239,
+ 0x0e3c28de,0x660a2c56 },
+ { 0xd67fcbd0,0x9cca88ae,0x0ea9f096,0xc8472478,0x72e92b4d,0x32b2f481,
+ 0x4f522453,0x624ee54c } },
+ /* 52 */
+ { { 0xd897eccc,0x09549ce4,0x3f9880aa,0x4d49d1d9,0x043a7c20,0x723c2423,
+ 0x92bdfbc0,0x4f392afb },
+ { 0x7de44fd9,0x6969f8fa,0x57b32156,0xb66cfbe4,0x368ebc3c,0xdb2fa803,
+ 0xccdb399c,0x8a3e7977 } },
+ /* 53 */
+ { { 0x06c4b125,0xdde1881f,0xf6e3ca8c,0xae34e300,0x5c7a13e9,0xef6999de,
+ 0x70c24404,0x3888d023 },
+ { 0x44f91081,0x76280356,0x5f015504,0x3d9fcf61,0x632cd36e,0x1827edc8,
+ 0x18102336,0xa5e62e47 } },
+ /* 54 */
+ { { 0x2facd6c8,0x1a825ee3,0x54bcbc66,0x699c6354,0x98df9931,0x0ce3edf7,
+ 0x466a5adc,0x2c4768e6 },
+ { 0x90a64bc9,0xb346ff8c,0xe4779f5c,0x630a6020,0xbc05e884,0xd949d064,
+ 0xf9e652a0,0x7b5e6441 } },
+ /* 55 */
+ { { 0x1d28444a,0x2169422c,0xbe136a39,0xe996c5d8,0xfb0c7fce,0x2387afe5,
+ 0x0c8d744a,0xb8af73cb },
+ { 0x338b86fd,0x5fde83aa,0xa58a5cff,0xfee3f158,0x20ac9433,0xc9ee8f6f,
+ 0x7f3f0895,0xa036395f } },
+ /* 56 */
+ { { 0xa10f7770,0x8c73c6bb,0xa12a0e24,0xa6f16d81,0x51bc2b9f,0x100df682,
+ 0x875fb533,0x4be36b01 },
+ { 0x9fb56dbb,0x9226086e,0x07e7a4f8,0x306fef8b,0x66d52f20,0xeeaccc05,
+ 0x1bdc00c0,0x8cbc9a87 } },
+ /* 57 */
+ { { 0xc0dac4ab,0xe131895c,0x712ff112,0xa874a440,0x6a1cee57,0x6332ae7c,
+ 0x0c0835f8,0x44e7553e },
+ { 0x7734002d,0x6d503fff,0x0b34425c,0x9d35cb8b,0x0e8738b5,0x95f70276,
+ 0x5eb8fc18,0x470a683a } },
+ /* 58 */
+ { { 0x90513482,0x81b761dc,0x01e9276a,0x0287202a,0x0ce73083,0xcda441ee,
+ 0xc63dc6ef,0x16410690 },
+ { 0x6d06a2ed,0xf5034a06,0x189b100b,0xdd4d7745,0xab8218c9,0xd914ae72,
+ 0x7abcbb4f,0xd73479fd } },
+ /* 59 */
+ { { 0x5ad4c6e5,0x7edefb16,0x5b06d04d,0x262cf08f,0x8575cb14,0x12ed5bb1,
+ 0x0771666b,0x816469e3 },
+ { 0x561e291e,0xd7ab9d79,0xc1de1661,0xeb9daf22,0x135e0513,0xf49827eb,
+ 0xf0dd3f9c,0x0a36dd23 } },
+ /* 60 */
+ { { 0x41d5533c,0x098d32c7,0x8684628f,0x7c5f5a9e,0xe349bd11,0x39a228ad,
+ 0xfdbab118,0xe331dfd6 },
+ { 0x6bcc6ed8,0x5100ab68,0xef7a260e,0x7160c3bd,0xbce850d7,0x9063d9a7,
+ 0x492e3389,0xd3b4782a } },
+ /* 61 */
+ { { 0xf3821f90,0xa149b6e8,0x66eb7aad,0x92edd9ed,0x1a013116,0x0bb66953,
+ 0x4c86a5bd,0x7281275a },
+ { 0xd3ff47e5,0x503858f7,0x61016441,0x5e1616bc,0x7dfd9bb1,0x62b0f11a,
+ 0xce145059,0x2c062e7e } },
+ /* 62 */
+ { { 0x0159ac2e,0xa76f996f,0xcbdb2713,0x281e7736,0x08e46047,0x2ad6d288,
+ 0x2c4e7ef1,0x282a35f9 },
+ { 0xc0ce5cd2,0x9c354b1e,0x1379c229,0xcf99efc9,0x3e82c11e,0x992caf38,
+ 0x554d2abd,0xc71cd513 } },
+ /* 63 */
+ { { 0x09b578f4,0x4885de9c,0xe3affa7a,0x1884e258,0x59182f1f,0x8f76b1b7,
+ 0xcf47f3a3,0xc50f6740 },
+ { 0x374b68ea,0xa9c4adf3,0x69965fe2,0xa406f323,0x85a53050,0x2f86a222,
+ 0x212958dc,0xb9ecb3a7 } },
+ /* 64 */
+ { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,
+ 0xcd42ab1b,0x803f3e02 },
+ { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,
+ 0x5067adc1,0xc097440e } },
+ /* 65 */
+ { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459,
+ 0x915f1f30,0xf1af32d5 },
+ { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418,
+ 0xe2d41c8b,0x23d0f130 } },
+ /* 66 */
+ { { 0xf41500d9,0x857ab6ed,0xfcbeada8,0x0d890ae5,0x89725951,0x52fe8648,
+ 0xc0a3fadd,0xb0288dd6 },
+ { 0x650bcb08,0x85320f30,0x695d6e16,0x71af6313,0xb989aa76,0x31f520a7,
+ 0xf408c8d2,0xffd3724f } },
+ /* 67 */
+ { { 0xb458e6cb,0x53968e64,0x317a5d28,0x992dad20,0x7aa75f56,0x3814ae0b,
+ 0xd78c26df,0xf5590f4a },
+ { 0xcf0ba55a,0x0fc24bd3,0x0c778bae,0x0fc4724a,0x683b674a,0x1ce9864f,
+ 0xf6f74a20,0x18d6da54 } },
+ /* 68 */
+ { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926,
+ 0x7990216a,0x50bbb4d9 },
+ { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b,
+ 0x01fe49c3,0x2b100118 } },
+ /* 69 */
+ { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa,
+ 0x83fbae0c,0xdd558999 },
+ { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf,
+ 0x149d6041,0xe6e4c551 } },
+ /* 70 */
+ { { 0x1e9af288,0x55f655bb,0xf7ada931,0x647e1a64,0xcb2820e5,0x43697e4b,
+ 0x07ed56ff,0x51e00db1 },
+ { 0x771c327e,0x43d169b8,0x4a96c2ad,0x29cdb20b,0x3deb4779,0xc07d51f5,
+ 0x49829177,0xe22f4241 } },
+ /* 71 */
+ { { 0x635f1abb,0xcd45e8f4,0x68538874,0x7edc0cb5,0xb5a8034d,0xc9472c1f,
+ 0x52dc48c9,0xf709373d },
+ { 0xa8af30d6,0x401966bb,0xf137b69c,0x95bf5f4a,0x9361c47e,0x3966162a,
+ 0xe7275b11,0xbd52d288 } },
+ /* 72 */
+ { { 0x9c5fa877,0xab155c7a,0x7d3a3d48,0x17dad672,0x73d189d8,0x43f43f9e,
+ 0xc8aa77a6,0xa0d0f8e4 },
+ { 0xcc94f92d,0x0bbeafd8,0x0c4ddb3a,0xd818c8be,0xb82eba14,0x22cc65f8,
+ 0x946d6a00,0xa56c78c7 } },
+ /* 73 */
+ { { 0x0dd09529,0x2962391b,0x3daddfcf,0x803e0ea6,0x5b5bf481,0x2c77351f,
+ 0x731a367a,0xd8befdf8 },
+ { 0xfc0157f4,0xab919d42,0xfec8e650,0xf51caed7,0x02d48b0a,0xcdf9cb40,
+ 0xce9f6478,0x854a68a5 } },
+ /* 74 */
+ { { 0x63506ea5,0xdc35f67b,0xa4fe0d66,0x9286c489,0xfe95cd4d,0x3f101d3b,
+ 0x98846a95,0x5cacea0b },
+ { 0x9ceac44d,0xa90df60c,0x354d1c3a,0x3db29af4,0xad5dbabe,0x08dd3de8,
+ 0x35e4efa9,0xe4982d12 } },
+ /* 75 */
+ { { 0xc34cd55e,0x23104a22,0x2680d132,0x58695bb3,0x1fa1d943,0xfb345afa,
+ 0x16b20499,0x8046b7f6 },
+ { 0x38e7d098,0xb533581e,0xf46f0b70,0xd7f61e8d,0x44cb78c4,0x30dea9ea,
+ 0x9082af55,0xeb17ca7b } },
+ /* 76 */
+ { { 0x76a145b9,0x1751b598,0xc1bc71ec,0xa5cf6b0f,0x392715bb,0xd3e03565,
+ 0xfab5e131,0x097b00ba },
+ { 0x565f69e1,0xaa66c8e9,0xb5be5199,0x77e8f75a,0xda4fd984,0x6033ba11,
+ 0xafdbcc9e,0xf95c747b } },
+ /* 77 */
+ { { 0xbebae45e,0x558f01d3,0xc4bc6955,0xa8ebe9f0,0xdbc64fc6,0xaeb705b1,
+ 0x566ed837,0x3512601e },
+ { 0xfa1161cd,0x9336f1e1,0x4c65ef87,0x328ab8d5,0x724f21e5,0x4757eee2,
+ 0x6068ab6b,0x0ef97123 } },
+ /* 78 */
+ { { 0x54ca4226,0x02598cf7,0xf8642c8e,0x5eede138,0x468e1790,0x48963f74,
+ 0x3b4fbc95,0xfc16d933 },
+ { 0xe7c800ca,0xbe96fb31,0x2678adaa,0x13806331,0x6ff3e8b5,0x3d624497,
+ 0xb95d7a17,0x14ca4af1 } },
+ /* 79 */
+ { { 0xbd2f81d5,0x7a4771ba,0x01f7d196,0x1a5f9d69,0xcad9c907,0xd898bef7,
+ 0xf59c231d,0x4057b063 },
+ { 0x89c05c0a,0xbffd82fe,0x1dc0df85,0xe4911c6f,0xa35a16db,0x3befccae,
+ 0xf1330b13,0x1c3b5d64 } },
+ /* 80 */
+ { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07,
+ 0xdb7e63af,0xfad27148 },
+ { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875,
+ 0x9f0e1a84,0x77387de3 } },
+ /* 81 */
+ { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408,
+ 0xbef0c47e,0xb37b85c0 },
+ { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa,
+ 0xf9f628d5,0x9c135ac8 } },
+ /* 82 */
+ { { 0x84e35743,0x32aa3202,0x85a3cdef,0x320d6ab1,0x1df19819,0xb821b176,
+ 0xc433851f,0x5721361f },
+ { 0x71fc9168,0x1f0db36a,0x5e5c403c,0x5f98ba73,0x37bcd8f5,0xf64ca87e,
+ 0xe6bb11bd,0xdcbac3c9 } },
+ /* 83 */
+ { { 0x4518cbe2,0xf01d9968,0x9c9eb04e,0xd242fc18,0xe47feebf,0x727663c7,
+ 0x2d626862,0xb8c1c89e },
+ { 0xc8e1d569,0x51a58bdd,0xb7d88cd0,0x563809c8,0xf11f31eb,0x26c27fd9,
+ 0x2f9422d4,0x5d23bbda } },
+ /* 84 */
+ { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403,
+ 0x91ece900,0xc109f9cb },
+ { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d,
+ 0x2eee1ee1,0x9bc3344f } },
+ /* 85 */
+ { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665,
+ 0x5f1a4cc1,0x29591d52 },
+ { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496,
+ 0x18ef332c,0x6376551f } },
+ /* 86 */
+ { { 0x562976cc,0xbda5f14e,0x0ef12c38,0x22bca3e6,0x6cca9852,0xbbfa3064,
+ 0x08e2987a,0xbdb79dc8 },
+ { 0xcb06a772,0xfd2cb5c9,0xfe536dce,0x38f475aa,0x7c2b5db8,0xc2a3e022,
+ 0xadd3c14a,0x8ee86001 } },
+ /* 87 */
+ { { 0xa4ade873,0xcbe96981,0xc4fba48c,0x7ee9aa4d,0x5a054ba5,0x2cee2899,
+ 0x6f77aa4b,0x92e51d7a },
+ { 0x7190a34d,0x948bafa8,0xf6bd1ed1,0xd698f75b,0x0caf1144,0xd00ee6e3,
+ 0x0a56aaaa,0x5182f86f } },
+ /* 88 */
+ { { 0x7a4cc99c,0xfba6212c,0x3e6d9ca1,0xff609b68,0x5ac98c5a,0x5dbb27cb,
+ 0x4073a6f2,0x91dcab5d },
+ { 0x5f575a70,0x01b6cc3d,0x6f8d87fa,0x0cb36139,0x89981736,0x165d4e8c,
+ 0x97974f2b,0x17a0cedb } },
+ /* 89 */
+ { { 0x076c8d3a,0x38861e2a,0x210f924b,0x701aad39,0x13a835d9,0x94d0eae4,
+ 0x7f4cdf41,0x2e8ce36c },
+ { 0x037a862b,0x91273dab,0x60e4c8fa,0x01ba9bb7,0x33baf2dd,0xf9645388,
+ 0x34f668f3,0xf4ccc6cb } },
+ /* 90 */
+ { { 0xf1f79687,0x44ef525c,0x92efa815,0x7c595495,0xa5c78d29,0xe1231741,
+ 0x9a0df3c9,0xac0db488 },
+ { 0xdf01747f,0x86bfc711,0xef17df13,0x592b9358,0x5ccb6bb5,0xe5880e4f,
+ 0x94c974a2,0x95a64a61 } },
+ /* 91 */
+ { { 0xc15a4c93,0x72c1efda,0x82585141,0x40269b73,0x16cb0bad,0x6a8dfb1c,
+ 0x29210677,0x231e54ba },
+ { 0x8ae6d2dc,0xa70df917,0x39112918,0x4d6aa63f,0x5e5b7223,0xf627726b,
+ 0xd8a731e1,0xab0be032 } },
+ /* 92 */
+ { { 0x8d131f2d,0x097ad0e9,0x3b04f101,0x637f09e3,0xd5e9a748,0x1ac86196,
+ 0x2cf6a679,0xf1bcc880 },
+ { 0xe8daacb4,0x25c69140,0x60f65009,0x3c4e4055,0x477937a6,0x591cc8fc,
+ 0x5aebb271,0x85169469 } },
+ /* 93 */
+ { { 0xf1dcf593,0xde35c143,0xb018be3b,0x78202b29,0x9bdd9d3d,0xe9cdadc2,
+ 0xdaad55d8,0x8f67d9d2 },
+ { 0x7481ea5f,0x84111656,0xe34c590c,0xe7d2dde9,0x05053fa8,0xffdd43f4,
+ 0xc0728b5d,0xf84572b9 } },
+ /* 94 */
+ { { 0x97af71c9,0x5e1a7a71,0x7a736565,0xa1449444,0x0e1d5063,0xa1b4ae07,
+ 0x616b2c19,0xedee2710 },
+ { 0x11734121,0xb2f034f5,0x4a25e9f0,0x1cac6e55,0xa40c2ecf,0x8dc148f3,
+ 0x44ebd7f4,0x9fd27e9b } },
+ /* 95 */
+ { { 0xf6e2cb16,0x3cc7658a,0xfe5919b6,0xe3eb7d2c,0x168d5583,0x5a8c5816,
+ 0x958ff387,0xa40c2fb6 },
+ { 0xfedcc158,0x8c9ec560,0x55f23056,0x7ad804c6,0x9a307e12,0xd9396704,
+ 0x7dc6decf,0x99bc9bb8 } },
+ /* 96 */
+ { { 0x927dafc6,0x84a9521d,0x5c09cd19,0x52c1fb69,0xf9366dde,0x9d9581a0,
+ 0xa16d7e64,0x9abe210b },
+ { 0x48915220,0x480af84a,0x4dd816c6,0xfa73176a,0x1681ca5a,0xc7d53987,
+ 0x87f344b0,0x7881c257 } },
+ /* 97 */
+ { { 0xe0bcf3ff,0x93399b51,0x127f74f6,0x0d02cbc5,0xdd01d968,0x8fb465a2,
+ 0xa30e8940,0x15e6e319 },
+ { 0x3e0e05f4,0x646d6e0d,0x43588404,0xfad7bddc,0xc4f850d3,0xbe61c7d1,
+ 0x191172ce,0x0e55facf } },
+ /* 98 */
+ { { 0xf8787564,0x7e9d9806,0x31e85ce6,0x1a331721,0xb819e8d6,0x6b0158ca,
+ 0x6fe96577,0xd73d0976 },
+ { 0x1eb7206e,0x42483425,0xc618bb42,0xa519290f,0x5e30a520,0x5dcbb859,
+ 0x8f15a50b,0x9250a374 } },
+ /* 99 */
+ { { 0xbe577410,0xcaff08f8,0x5077a8c6,0xfd408a03,0xec0a63a4,0xf1f63289,
+ 0xc1cc8c0b,0x77414082 },
+ { 0xeb0991cd,0x05a40fa6,0x49fdc296,0xc1ca0866,0xb324fd40,0x3a68a3c7,
+ 0x12eb20b9,0x8cb04f4d } },
+ /* 100 */
+ { { 0x6906171c,0xb1c2d055,0xb0240c3f,0x9073e9cd,0xd8906841,0xdb8e6b4f,
+ 0x47123b51,0xe4e429ef },
+ { 0x38ec36f4,0x0b8dd53c,0xff4b6a27,0xf9d2dc01,0x879a9a48,0x5d066e07,
+ 0x3c6e6552,0x37bca2ff } },
+ /* 101 */
+ { { 0xdf562470,0x4cd2e3c7,0xc0964ac9,0x44f272a2,0x80c793be,0x7c6d5df9,
+ 0x3002b22a,0x59913edc },
+ { 0x5750592a,0x7a139a83,0xe783de02,0x99e01d80,0xea05d64f,0xcf8c0375,
+ 0xb013e226,0x43786e4a } },
+ /* 102 */
+ { { 0x9e56b5a6,0xff32b0ed,0xd9fc68f9,0x0750d9a6,0x597846a7,0xec15e845,
+ 0xb7e79e7a,0x8638ca98 },
+ { 0x0afc24b2,0x2f5ae096,0x4dace8f2,0x05398eaf,0xaecba78f,0x3b765dd0,
+ 0x7b3aa6f0,0x1ecdd36a } },
+ /* 103 */
+ { { 0x6c5ff2f3,0x5d3acd62,0x2873a978,0xa2d516c0,0xd2110d54,0xad94c9fa,
+ 0xd459f32d,0xd85d0f85 },
+ { 0x10b11da3,0x9f700b8d,0xa78318c4,0xd2c22c30,0x9208decd,0x556988f4,
+ 0xb4ed3c62,0xa04f19c3 } },
+ /* 104 */
+ { { 0xed7f93bd,0x087924c8,0x392f51f6,0xcb64ac5d,0x821b71af,0x7cae330a,
+ 0x5c0950b0,0x92b2eeea },
+ { 0x85b6e235,0x85ac4c94,0x2936c0f0,0xab2ca4a9,0xe0508891,0x80faa6b3,
+ 0x5834276c,0x1ee78221 } },
+ /* 105 */
+ { { 0xe63e79f7,0xa60a2e00,0xf399d906,0xf590e7b2,0x6607c09d,0x9021054a,
+ 0x57a6e150,0xf3f2ced8 },
+ { 0xf10d9b55,0x200510f3,0xd8642648,0x9d2fcfac,0xe8bd0e7c,0xe5631aa7,
+ 0x3da3e210,0x0f56a454 } },
+ /* 106 */
+ { { 0x1043e0df,0x5b21bffa,0x9c007e6d,0x6c74b6cc,0xd4a8517a,0x1a656ec0,
+ 0x1969e263,0xbd8f1741 },
+ { 0xbeb7494a,0x8a9bbb86,0x45f3b838,0x1567d46f,0xa4e5a79a,0xdf7a12a7,
+ 0x30ccfa09,0x2d1a1c35 } },
+ /* 107 */
+ { { 0x506508da,0x192e3813,0xa1d795a7,0x336180c4,0x7a9944b3,0xcddb5949,
+ 0xb91fba46,0xa107a65e },
+ { 0x0f94d639,0xe6d1d1c5,0x8a58b7d7,0x8b4af375,0xbd37ca1c,0x1a7c5584,
+ 0xf87a9af2,0x183d760a } },
+ /* 108 */
+ { { 0x0dde59a4,0x29d69711,0x0e8bef87,0xf1ad8d07,0x4f2ebe78,0x229b4963,
+ 0xc269d754,0x1d44179d },
+ { 0x8390d30e,0xb32dc0cf,0x0de8110c,0x0a3b2753,0x2bc0339a,0x31af1dc5,
+ 0x9606d262,0x771f9cc2 } },
+ /* 109 */
+ { { 0x85040739,0x99993e77,0x8026a939,0x44539db9,0xf5f8fc26,0xcf40f6f2,
+ 0x0362718e,0x64427a31 },
+ { 0x85428aa8,0x4f4f2d87,0xebfb49a8,0x7b7adc3f,0xf23d01ac,0x201b2c6d,
+ 0x6ae90d6d,0x49d9b749 } },
+ /* 110 */
+ { { 0x435d1099,0xcc78d8bc,0x8e8d1a08,0x2adbcd4e,0x2cb68a41,0x02c2e2a0,
+ 0x3f605445,0x9037d81b },
+ { 0x074c7b61,0x7cdbac27,0x57bfd72e,0xfe2031ab,0x596d5352,0x61ccec96,
+ 0x7cc0639c,0x08c3de6a } },
+ /* 111 */
+ { { 0xf6d552ab,0x20fdd020,0x05cd81f1,0x56baff98,0x91351291,0x06fb7c3e,
+ 0x45796b2f,0xc6909442 },
+ { 0x41231bd1,0x17b3ae9c,0x5cc58205,0x1eac6e87,0xf9d6a122,0x208837ab,
+ 0xcafe3ac0,0x3fa3db02 } },
+ /* 112 */
+ { { 0x05058880,0xd75a3e65,0x643943f2,0x7da365ef,0xfab24925,0x4147861c,
+ 0xfdb808ff,0xc5c4bdb0 },
+ { 0xb272b56b,0x73513e34,0x11b9043a,0xc8327e95,0xf8844969,0xfd8ce37d,
+ 0x46c2b6b5,0x2d56db94 } },
+ /* 113 */
+ { { 0xff46ac6b,0x2461782f,0x07a2e425,0xd19f7926,0x09a48de1,0xfafea3c4,
+ 0xe503ba42,0x0f56bd9d },
+ { 0x345cda49,0x137d4ed1,0x816f299d,0x821158fc,0xaeb43402,0xe7c6a54a,
+ 0x1173b5f1,0x4003bb9d } },
+ /* 114 */
+ { { 0xa0803387,0x3b8e8189,0x39cbd404,0xece115f5,0xd2877f21,0x4297208d,
+ 0xa07f2f9e,0x53765522 },
+ { 0xa8a4182d,0xa4980a21,0x3219df79,0xa2bbd07a,0x1a19a2d4,0x674d0a2e,
+ 0x6c5d4549,0x7a056f58 } },
+ /* 115 */
+ { { 0x9d8a2a47,0x646b2558,0xc3df2773,0x5b582948,0xabf0d539,0x51ec000e,
+ 0x7a1a2675,0x77d482f1 },
+ { 0x87853948,0xb8a1bd95,0x6cfbffee,0xa6f817bd,0x80681e47,0xab6ec057,
+ 0x2b38b0e4,0x4115012b } },
+ /* 116 */
+ { { 0x6de28ced,0x3c73f0f4,0x9b13ec47,0x1d5da760,0x6e5c6392,0x61b8ce9e,
+ 0xfbea0946,0xcdf04572 },
+ { 0x6c53c3b0,0x1cb3c58b,0x447b843c,0x97fe3c10,0x2cb9780e,0xfb2b8ae1,
+ 0x97383109,0xee703dda } },
+ /* 117 */
+ { { 0xff57e43a,0x34515140,0xb1b811b8,0xd44660d3,0x8f42b986,0x2b3b5dff,
+ 0xa162ce21,0x2a0ad89d },
+ { 0x6bc277ba,0x64e4a694,0xc141c276,0xc788c954,0xcabf6274,0x141aa64c,
+ 0xac2b4659,0xd62d0b67 } },
+ /* 118 */
+ { { 0x2c054ac4,0x39c5d87b,0xf27df788,0x57005859,0xb18128d6,0xedf7cbf3,
+ 0x991c2426,0xb39a23f2 },
+ { 0xf0b16ae5,0x95284a15,0xa136f51b,0x0c6a05b1,0xf2700783,0x1d63c137,
+ 0xc0674cc5,0x04ed0092 } },
+ /* 119 */
+ { { 0x9ae90393,0x1f4185d1,0x4a3d64e6,0x3047b429,0x9854fc14,0xae0001a6,
+ 0x0177c387,0xa0a91fc1 },
+ { 0xae2c831e,0xff0a3f01,0x2b727e16,0xbb76ae82,0x5a3075b4,0x8f12c8a1,
+ 0x9ed20c41,0x084cf988 } },
+ /* 120 */
+ { { 0xfca6becf,0xd98509de,0x7dffb328,0x2fceae80,0x4778e8b9,0x5d8a15c4,
+ 0x73abf77e,0xd57955b2 },
+ { 0x31b5d4f1,0x210da79e,0x3cfa7a1c,0xaa52f04b,0xdc27c20b,0xd4d12089,
+ 0x02d141f1,0x8e14ea42 } },
+ /* 121 */
+ { { 0xf2897042,0xeed50345,0x43402c4a,0x8d05331f,0xc8bdfb21,0xc8d9c194,
+ 0x2aa4d158,0x597e1a37 },
+ { 0xcf0bd68c,0x0327ec1a,0xab024945,0x6d4be0dc,0xc9fe3e84,0x5b9c8d7a,
+ 0x199b4dea,0xca3f0236 } },
+ /* 122 */
+ { { 0x6170bd20,0x592a10b5,0x6d3f5de7,0x0ea897f1,0x44b2ade2,0xa3363ff1,
+ 0x309c07e4,0xbde7fd7e },
+ { 0xb8f5432c,0x516bb6d2,0xe043444b,0x210dc1cb,0xf8f95b5a,0x3db01e6f,
+ 0x0a7dd198,0xb623ad0e } },
+ /* 123 */
+ { { 0x60c7b65b,0xa75bd675,0x23a4a289,0xab8c5590,0xd7b26795,0xf8220fd0,
+ 0x58ec137b,0xd6aa2e46 },
+ { 0x5138bb85,0x10abc00b,0xd833a95c,0x8c31d121,0x1702a32e,0xb24ff00b,
+ 0x2dcc513a,0x111662e0 } },
+ /* 124 */
+ { { 0xefb42b87,0x78114015,0x1b6c4dff,0xbd9f5d70,0xa7d7c129,0x66ecccd7,
+ 0x94b750f8,0xdb3ee1cb },
+ { 0xf34837cf,0xb26f3db0,0xb9578d4f,0xe7eed18b,0x7c56657d,0x5d2cdf93,
+ 0x52206a59,0x886a6442 } },
+ /* 125 */
+ { { 0x65b569ea,0x3c234cfb,0xf72119c1,0x20011141,0xa15a619e,0x8badc85d,
+ 0x018a17bc,0xa70cf4eb },
+ { 0x8c4a6a65,0x224f97ae,0x0134378f,0x36e5cf27,0x4f7e0960,0xbe3a609e,
+ 0xd1747b77,0xaa4772ab } },
+ /* 126 */
+ { { 0x7aa60cc0,0x67676131,0x0368115f,0xc7916361,0xbbc1bb5a,0xded98bb4,
+ 0x30faf974,0x611a6ddc },
+ { 0xc15ee47a,0x30e78cbc,0x4e0d96a5,0x2e896282,0x3dd9ed88,0x36f35adf,
+ 0x16429c88,0x5cfffaf8 } },
+ /* 127 */
+ { { 0x9b7a99cd,0xc0d54cff,0x843c45a1,0x7bf3b99d,0x62c739e1,0x038a908f,
+ 0x7dc1994c,0x6e5a6b23 },
+ { 0x0ba5db77,0xef8b454e,0xacf60d63,0xb7b8807f,0x76608378,0xe591c0c6,
+ 0x242dabcc,0x481a238d } },
+ /* 128 */
+ { { 0x35d0b34a,0xe3417bc0,0x8327c0a7,0x440b386b,0xac0362d1,0x8fb7262d,
+ 0xe0cdf943,0x2c41114c },
+ { 0xad95a0b1,0x2ba5cef1,0x67d54362,0xc09b37a8,0x01e486c9,0x26d6cdd2,
+ 0x42ff9297,0x20477abf } },
+ /* 129 */
+ { { 0x18d65dbf,0x2f75173c,0x339edad8,0x77bf940e,0xdcf1001c,0x7022d26b,
+ 0xc77396b6,0xac66409a },
+ { 0xc6261cc3,0x8b0bb36f,0x190e7e90,0x213f7bc9,0xa45e6c10,0x6541ceba,
+ 0xcc122f85,0xce8e6975 } },
+ /* 130 */
+ { { 0xbc0a67d2,0x0f121b41,0x444d248a,0x62d4760a,0x659b4737,0x0e044f1d,
+ 0x250bb4a8,0x08fde365 },
+ { 0x848bf287,0xaceec3da,0xd3369d6e,0xc2a62182,0x92449482,0x3582dfdc,
+ 0x565d6cd7,0x2f7e2fd2 } },
+ /* 131 */
+ { { 0xc3770fa7,0xae4b92db,0x379043f9,0x095e8d5c,0x17761171,0x54f34e9d,
+ 0x907702ae,0xc65be92e },
+ { 0xf6fd0a40,0x2758a303,0xbcce784b,0xe7d822e3,0x4f9767bf,0x7ae4f585,
+ 0xd1193b3a,0x4bff8e47 } },
+ /* 132 */
+ { { 0x00ff1480,0xcd41d21f,0x0754db16,0x2ab8fb7d,0xbbe0f3ea,0xac81d2ef,
+ 0x5772967d,0x3e4e4ae6 },
+ { 0x3c5303e6,0x7e18f36d,0x92262397,0x3bd9994b,0x1324c3c0,0x9ed70e26,
+ 0x58ec6028,0x5388aefd } },
+ /* 133 */
+ { { 0x5e5d7713,0xad1317eb,0x75de49da,0x09b985ee,0xc74fb261,0x32f5bc4f,
+ 0x4f75be0e,0x5cf908d1 },
+ { 0x8e657b12,0x76043510,0xb96ed9e6,0xbfd421a5,0x8970ccc2,0x0e29f51f,
+ 0x60f00ce2,0xa698ba40 } },
+ /* 134 */
+ { { 0xef748fec,0x73db1686,0x7e9d2cf9,0xe6e755a2,0xce265eff,0x630b6544,
+ 0x7aebad8d,0xb142ef8a },
+ { 0x17d5770a,0xad31af9f,0x2cb3412f,0x66af3b67,0xdf3359de,0x6bd60d1b,
+ 0x58515075,0xd1896a96 } },
+ /* 135 */
+ { { 0x33c41c08,0xec5957ab,0x5468e2e1,0x87de94ac,0xac472f6c,0x18816b73,
+ 0x7981da39,0x267b0e0b },
+ { 0x8e62b988,0x6e554e5d,0x116d21e7,0xd8ddc755,0x3d2a6f99,0x4610faf0,
+ 0xa1119393,0xb54e287a } },
+ /* 136 */
+ { { 0x178a876b,0x0a0122b5,0x085104b4,0x51ff96ff,0x14f29f76,0x050b31ab,
+ 0x5f87d4e6,0x84abb28b },
+ { 0x8270790a,0xd5ed439f,0x85e3f46b,0x2d6cb59d,0x6c1e2212,0x75f55c1b,
+ 0x17655640,0xe5436f67 } },
+ /* 137 */
+ { { 0x2286e8d5,0x53f9025e,0x864453be,0x353c95b4,0xe408e3a0,0xd832f5bd,
+ 0x5b9ce99e,0x0404f68b },
+ { 0xa781e8e5,0xcad33bde,0x163c2f5b,0x3cdf5018,0x0119caa3,0x57576960,
+ 0x0ac1c701,0x3a4263df } },
+ /* 138 */
+ { { 0x9aeb596d,0xc2965ecc,0x023c92b4,0x01ea03e7,0x2e013961,0x4704b4b6,
+ 0x905ea367,0x0ca8fd3f },
+ { 0x551b2b61,0x92523a42,0x390fcd06,0x1eb7a89c,0x0392a63e,0xe7f1d2be,
+ 0x4ddb0c33,0x96dca264 } },
+ /* 139 */
+ { { 0x387510af,0x203bb43a,0xa9a36a01,0x846feaa8,0x2f950378,0xd23a5770,
+ 0x3aad59dc,0x4363e212 },
+ { 0x40246a47,0xca43a1c7,0xe55dd24d,0xb362b8d2,0x5d8faf96,0xf9b08604,
+ 0xd8bb98c4,0x840e115c } },
+ /* 140 */
+ { { 0x1023e8a7,0xf12205e2,0xd8dc7a0b,0xc808a8cd,0x163a5ddf,0xe292a272,
+ 0x30ded6d4,0x5e0d6abd },
+ { 0x7cfc0f64,0x07a721c2,0x0e55ed88,0x42eec01d,0x1d1f9db2,0x26a7bef9,
+ 0x2945a25a,0x7dea48f4 } },
+ /* 141 */
+ { { 0xe5060a81,0xabdf6f1c,0xf8f95615,0xe79f9c72,0x06ac268b,0xcfd36c54,
+ 0xebfd16d1,0xabc2a2be },
+ { 0xd3e2eac7,0x8ac66f91,0xd2dd0466,0x6f10ba63,0x0282d31b,0x6790e377,
+ 0x6c7eefc1,0x4ea35394 } },
+ /* 142 */
+ { { 0x5266309d,0xed8a2f8d,0x81945a3e,0x0a51c6c0,0x578c5dc1,0xcecaf45a,
+ 0x1c94ffc3,0x3a76e689 },
+ { 0x7d7b0d0f,0x9aace8a4,0x8f584a5f,0x963ace96,0x4e697fbe,0x51a30c72,
+ 0x465e6464,0x8212a10a } },
+ /* 143 */
+ { { 0xcfab8caa,0xef7c61c3,0x0e142390,0x18eb8e84,0x7e9733ca,0xcd1dff67,
+ 0x599cb164,0xaa7cab71 },
+ { 0xbc837bd1,0x02fc9273,0xc36af5d7,0xc06407d0,0xf423da49,0x17621292,
+ 0xfe0617c3,0x40e38073 } },
+ /* 144 */
+ { { 0xa7bf9b7c,0xf4f80824,0x3fbe30d0,0x365d2320,0x97cf9ce3,0xbfbe5320,
+ 0xb3055526,0xe3604700 },
+ { 0x6cc6c2c7,0x4dcb9911,0xba4cbee6,0x72683708,0x637ad9ec,0xdcded434,
+ 0xa3dee15f,0x6542d677 } },
+ /* 145 */
+ { { 0x7b6c377a,0x3f32b6d0,0x903448be,0x6cb03847,0x20da8af7,0xd6fdd3a8,
+ 0x09bb6f21,0xa6534aee },
+ { 0x1035facf,0x30a1780d,0x9dcb47e6,0x35e55a33,0xc447f393,0x6ea50fe1,
+ 0xdc9aef22,0xf3cb672f } },
+ /* 146 */
+ { { 0x3b55fd83,0xeb3719fe,0x875ddd10,0xe0d7a46c,0x05cea784,0x33ac9fa9,
+ 0xaae870e7,0x7cafaa2e },
+ { 0x1d53b338,0x9b814d04,0xef87e6c6,0xe0acc0a0,0x11672b0f,0xfb93d108,
+ 0xb9bd522e,0x0aab13c1 } },
+ /* 147 */
+ { { 0xd2681297,0xddcce278,0xb509546a,0xcb350eb1,0x7661aaf2,0x2dc43173,
+ 0x847012e9,0x4b91a602 },
+ { 0x72f8ddcf,0xdcff1095,0x9a911af4,0x08ebf61e,0xc372430e,0x48f4360a,
+ 0x72321cab,0x49534c53 } },
+ /* 148 */
+ { { 0xf07b7e9d,0x83df7d71,0x13cd516f,0xa478efa3,0x6c047ee3,0x78ef264b,
+ 0xd65ac5ee,0xcaf46c4f },
+ { 0x92aa8266,0xa04d0c77,0x913684bb,0xedf45466,0xae4b16b0,0x56e65168,
+ 0x04c6770f,0x14ce9e57 } },
+ /* 149 */
+ { { 0x965e8f91,0x99445e3e,0xcb0f2492,0xd3aca1ba,0x90c8a0a0,0xd31cc70f,
+ 0x3e4c9a71,0x1bb708a5 },
+ { 0x558bdd7a,0xd5ca9e69,0x018a26b1,0x734a0508,0x4c9cf1ec,0xb093aa71,
+ 0xda300102,0xf9d126f2 } },
+ /* 150 */
+ { { 0xaff9563e,0x749bca7a,0xb49914a0,0xdd077afe,0xbf5f1671,0xe27a0311,
+ 0x729ecc69,0x807afcb9 },
+ { 0xc9b08b77,0x7f8a9337,0x443c7e38,0x86c3a785,0x476fd8ba,0x85fafa59,
+ 0x6568cd8c,0x751adcd1 } },
+ /* 151 */
+ { { 0x10715c0d,0x8aea38b4,0x8f7697f7,0xd113ea71,0x93fbf06d,0x665eab14,
+ 0x2537743f,0x29ec4468 },
+ { 0xb50bebbc,0x3d94719c,0xe4505422,0x399ee5bf,0x8d2dedb1,0x90cd5b3a,
+ 0x92a4077d,0xff9370e3 } },
+ /* 152 */
+ { { 0xc6b75b65,0x59a2d69b,0x266651c5,0x4188f8d5,0x3de9d7d2,0x28a9f33e,
+ 0xa2a9d01a,0x9776478b },
+ { 0x929af2c7,0x8852622d,0x4e690923,0x334f5d6d,0xa89a51e9,0xce6cc7e5,
+ 0xac2f82fa,0x74a6313f } },
+ /* 153 */
+ { { 0xb75f079c,0xb2f4dfdd,0x18e36fbb,0x85b07c95,0xe7cd36dd,0x1b6cfcf0,
+ 0x0ff4863d,0xab75be15 },
+ { 0x173fc9b7,0x81b367c0,0xd2594fd0,0xb90a7420,0xc4091236,0x15fdbf03,
+ 0x0b4459f6,0x4ebeac2e } },
+ /* 154 */
+ { { 0x5c9f2c53,0xeb6c5fe7,0x8eae9411,0xd2522011,0xf95ac5d8,0xc8887633,
+ 0x2c1baffc,0xdf99887b },
+ { 0x850aaecb,0xbb78eed2,0x01d6a272,0x9d49181b,0xb1cdbcac,0x978dd511,
+ 0x779f4058,0x27b040a7 } },
+ /* 155 */
+ { { 0xf73b2eb2,0x90405db7,0x8e1b2118,0xe0df8508,0x5962327e,0x501b7152,
+ 0xe4cfa3f5,0xb393dd37 },
+ { 0x3fd75165,0xa1230e7b,0xbcd33554,0xd66344c2,0x0f7b5022,0x6c36f1be,
+ 0xd0463419,0x09588c12 } },
+ /* 156 */
+ { { 0x02601c3b,0xe086093f,0xcf5c335f,0xfb0252f8,0x894aff28,0x955cf280,
+ 0xdb9f648b,0x81c879a9 },
+ { 0xc6f56c51,0x040e687c,0x3f17618c,0xfed47169,0x9059353b,0x44f88a41,
+ 0x5fc11bc4,0xfa0d48f5 } },
+ /* 157 */
+ { { 0xe1608e4d,0xbc6e1c9d,0x3582822c,0x010dda11,0x157ec2d7,0xf6b7ddc1,
+ 0xb6a367d6,0x8ea0e156 },
+ { 0x2383b3b4,0xa354e02f,0x3f01f53c,0x69966b94,0x2de03ca5,0x4ff6632b,
+ 0xfa00b5ac,0x3f5ab924 } },
+ /* 158 */
+ { { 0x59739efb,0x337bb0d9,0xe7ebec0d,0xc751b0f4,0x411a67d1,0x2da52dd6,
+ 0x2b74256e,0x8bc76887 },
+ { 0x82d3d253,0xa5be3b72,0xf58d779f,0xa9f679a1,0xe16767bb,0xa1cac168,
+ 0x60fcf34f,0xb386f190 } },
+ /* 159 */
+ { { 0x2fedcfc2,0x31f3c135,0x62f8af0d,0x5396bf62,0xe57288c2,0x9a02b4ea,
+ 0x1b069c4d,0x4cb460f7 },
+ { 0x5b8095ea,0xae67b4d3,0x6fc07603,0x92bbf859,0xb614a165,0xe1475f66,
+ 0x95ef5223,0x52c0d508 } },
+ /* 160 */
+ { { 0x15339848,0x231c210e,0x70778c8d,0xe87a28e8,0x6956e170,0x9d1de661,
+ 0x2bb09c0b,0x4ac3c938 },
+ { 0x6998987d,0x19be0551,0xae09f4d6,0x8b2376c4,0x1a3f933d,0x1de0b765,
+ 0xe39705f4,0x380d94c7 } },
+ /* 161 */
+ { { 0x81542e75,0x01a355aa,0xee01b9b7,0x96c724a1,0x624d7087,0x6b3a2977,
+ 0xde2637af,0x2ce3e171 },
+ { 0xf5d5bc1a,0xcfefeb49,0x2777e2b5,0xa655607e,0x9513756c,0x4feaac2f,
+ 0x0b624e4d,0x2e6cd852 } },
+ /* 162 */
+ { { 0x8c31c31d,0x3685954b,0x5bf21a0c,0x68533d00,0x75c79ec9,0x0bd7626e,
+ 0x42c69d54,0xca177547 },
+ { 0xf6d2dbb2,0xcc6edaff,0x174a9d18,0xfd0d8cbd,0xaa4578e8,0x875e8793,
+ 0x9cab2ce6,0xa976a713 } },
+ /* 163 */
+ { { 0x93fb353d,0x0a651f1b,0x57fcfa72,0xd75cab8b,0x31b15281,0xaa88cfa7,
+ 0x0a1f4999,0x8720a717 },
+ { 0x693e1b90,0x8c3e8d37,0x16f6dfc3,0xd345dc0b,0xb52a8742,0x8ea8d00a,
+ 0xc769893c,0x9719ef29 } },
+ /* 164 */
+ { { 0x58e35909,0x820eed8d,0x33ddc116,0x9366d8dc,0x6e205026,0xd7f999d0,
+ 0xe15704c1,0xa5072976 },
+ { 0xc4e70b2e,0x002a37ea,0x6890aa8a,0x84dcf657,0x645b2a5c,0xcd71bf18,
+ 0xf7b77725,0x99389c9d } },
+ /* 165 */
+ { { 0x7ada7a4b,0x238c08f2,0xfd389366,0x3abe9d03,0x766f512c,0x6b672e89,
+ 0x202c82e4,0xa88806aa },
+ { 0xd380184e,0x6602044a,0x126a8b85,0xa8cb78c4,0xad844f17,0x79d670c0,
+ 0x4738dcfe,0x0043bffb } },
+ /* 166 */
+ { { 0x36d5192e,0x8d59b5dc,0x4590b2af,0xacf885d3,0x11601781,0x83566d0a,
+ 0xba6c4866,0x52f3ef01 },
+ { 0x0edcb64d,0x3986732a,0x8068379f,0x0a482c23,0x7040f309,0x16cbe5fa,
+ 0x9ef27e75,0x3296bd89 } },
+ /* 167 */
+ { { 0x454d81d7,0x476aba89,0x51eb9b3c,0x9eade7ef,0x81c57986,0x619a21cd,
+ 0xaee571e9,0x3b90febf },
+ { 0x5496f7cb,0x9393023e,0x7fb51bc4,0x55be41d8,0x99beb5ce,0x03f1dd48,
+ 0x9f810b18,0x6e88069d } },
+ /* 168 */
+ { { 0xb43ea1db,0xce37ab11,0x5259d292,0x0a7ff1a9,0x8f84f186,0x851b0221,
+ 0xdefaad13,0xa7222bea },
+ { 0x2b0a9144,0xa2ac78ec,0xf2fa59c5,0x5a024051,0x6147ce38,0x91d1eca5,
+ 0xbc2ac690,0xbe94d523 } },
+ /* 169 */
+ { { 0x0b226ce7,0x72f4945e,0x967e8b70,0xb8afd747,0x85a6c63e,0xedea46f1,
+ 0x9be8c766,0x7782defe },
+ { 0x3db38626,0x760d2aa4,0x76f67ad1,0x460ae787,0x54499cdb,0x341b86fc,
+ 0xa2892e4b,0x03838567 } },
+ /* 170 */
+ { { 0x79ec1a0f,0x2d8daefd,0xceb39c97,0x3bbcd6fd,0x58f61a95,0xf5575ffc,
+ 0xadf7b420,0xdbd986c4 },
+ { 0x15f39eb7,0x81aa8814,0xb98d976c,0x6ee2fcf5,0xcf2f717d,0x5465475d,
+ 0x6860bbd0,0x8e24d3c4 } },
+ /* 171 */
+ { { 0x9a587390,0x749d8e54,0x0cbec588,0x12bb194f,0xb25983c6,0x46e07da4,
+ 0x407bafc8,0x541a99c4 },
+ { 0x624c8842,0xdb241692,0xd86c05ff,0x6044c12a,0x4f7fcf62,0xc59d14b4,
+ 0xf57d35d1,0xc0092c49 } },
+ /* 172 */
+ { { 0xdf2e61ef,0xd3cc75c3,0x2e1b35ca,0x7e8841c8,0x909f29f4,0xc62d30d1,
+ 0x7286944d,0x75e40634 },
+ { 0xbbc237d0,0xe7d41fc5,0xec4f01c9,0xc9537bf0,0x282bd534,0x91c51a16,
+ 0xc7848586,0x5b7cb658 } },
+ /* 173 */
+ { { 0x8a28ead1,0x964a7084,0xfd3b47f6,0x802dc508,0x767e5b39,0x9ae4bfd1,
+ 0x8df097a1,0x7ae13eba },
+ { 0xeadd384e,0xfd216ef8,0xb6b2ff06,0x0361a2d9,0x4bcdb5f3,0x204b9878,
+ 0xe2a8e3fd,0x787d8074 } },
+ /* 174 */
+ { { 0x757fbb1c,0xc5e25d6b,0xca201deb,0xe47bddb2,0x6d2233ff,0x4a55e9a3,
+ 0x9ef28484,0x5c222819 },
+ { 0x88315250,0x773d4a85,0x827097c1,0x21b21a2b,0xdef5d33f,0xab7c4ea1,
+ 0xbaf0f2b0,0xe45d37ab } },
+ /* 175 */
+ { { 0x28511c8a,0xd2df1e34,0xbdca6cd3,0xebb229c8,0x627c39a7,0x578a71a7,
+ 0x84dfb9d3,0xed7bc122 },
+ { 0x93dea561,0xcf22a6df,0xd48f0ed1,0x5443f18d,0x5bad23e8,0xd8b86140,
+ 0x45ca6d27,0xaac97cc9 } },
+ /* 176 */
+ { { 0xa16bd00a,0xeb54ea74,0xf5c0bcc1,0xd839e9ad,0x1f9bfc06,0x092bb7f1,
+ 0x1163dc4e,0x318f97b3 },
+ { 0xc30d7138,0xecc0c5be,0xabc30220,0x44e8df23,0xb0223606,0x2bb7972f,
+ 0x9a84ff4d,0xfa41faa1 } },
+ /* 177 */
+ { { 0xa6642269,0x4402d974,0x9bb783bd,0xc81814ce,0x7941e60b,0x398d38e4,
+ 0x1d26e9e2,0x38bb6b2c },
+ { 0x6a577f87,0xc64e4a25,0xdc11fe1c,0x8b52d253,0x62280728,0xff336abf,
+ 0xce7601a5,0x94dd0905 } },
+ /* 178 */
+ { { 0xde93f92a,0x156cf7dc,0x89b5f315,0xa01333cb,0xc995e750,0x02404df9,
+ 0xd25c2ae9,0x92077867 },
+ { 0x0bf39d44,0xe2471e01,0x96bb53d7,0x5f2c9020,0x5c9c3d8f,0x4c44b7b3,
+ 0xd29beb51,0x81e8428b } },
+ /* 179 */
+ { { 0xc477199f,0x6dd9c2ba,0x6b5ecdd9,0x8cb8eeee,0xee40fd0e,0x8af7db3f,
+ 0xdbbfa4b1,0x1b94ab62 },
+ { 0xce47f143,0x44f0d8b3,0x63f46163,0x51e623fc,0xcc599383,0xf18f270f,
+ 0x055590ee,0x06a38e28 } },
+ /* 180 */
+ { { 0xb3355b49,0x2e5b0139,0xb4ebf99b,0x20e26560,0xd269f3dc,0xc08ffa6b,
+ 0x83d9d4f8,0xa7b36c20 },
+ { 0x1b3e8830,0x64d15c3a,0xa89f9c0b,0xd5fceae1,0xe2d16930,0xcfeee4a2,
+ 0xa2822a20,0xbe54c6b4 } },
+ /* 181 */
+ { { 0x8d91167c,0xd6cdb3df,0xe7a6625e,0x517c3f79,0x346ac7f4,0x7105648f,
+ 0xeae022bb,0xbf30a5ab },
+ { 0x93828a68,0x8e7785be,0x7f3ef036,0x5161c332,0x592146b2,0xe11b5feb,
+ 0x2732d13a,0xd1c820de } },
+ /* 182 */
+ { { 0x9038b363,0x043e1347,0x6b05e519,0x58c11f54,0x6026cad1,0x4fe57abe,
+ 0x68a18da3,0xb7d17bed },
+ { 0xe29c2559,0x44ca5891,0x5bfffd84,0x4f7a0376,0x74e46948,0x498de4af,
+ 0x6412cc64,0x3997fd5e } },
+ /* 183 */
+ { { 0x8bd61507,0xf2074682,0x34a64d2a,0x29e132d5,0x8a8a15e3,0xffeddfb0,
+ 0x3c6c13e8,0x0eeb8929 },
+ { 0xa7e259f8,0xe9b69a3e,0xd13e7e67,0xce1db7e6,0xad1fa685,0x277318f6,
+ 0xc922b6ef,0x228916f8 } },
+ /* 184 */
+ { { 0x0a12ab5b,0x959ae25b,0x957bc136,0xcc11171f,0xd16e2b0c,0x8058429e,
+ 0x6e93097e,0xec05ad1d },
+ { 0xac3f3708,0x157ba5be,0x30b59d77,0x31baf935,0x118234e5,0x47b55237,
+ 0x7ff11b37,0x7d314156 } },
+ /* 185 */
+ { { 0xf6dfefab,0x7bd9c05c,0xdcb37707,0xbe2f2268,0x3a38bb95,0xe53ead97,
+ 0x9bc1d7a3,0xe9ce66fc },
+ { 0x6f6a02a1,0x75aa1576,0x60e600ed,0x38c087df,0x68cdc1b9,0xf8947f34,
+ 0x72280651,0xd9650b01 } },
+ /* 186 */
+ { { 0x5a057e60,0x504b4c4a,0x8def25e4,0xcbccc3be,0x17c1ccbd,0xa6353208,
+ 0x804eb7a2,0x14d6699a },
+ { 0xdb1f411a,0x2c8a8415,0xf80d769c,0x09fbaf0b,0x1c2f77ad,0xb4deef90,
+ 0x0d43598a,0x6f4c6841 } },
+ /* 187 */
+ { { 0x96c24a96,0x8726df4e,0xfcbd99a3,0x534dbc85,0x8b2ae30a,0x3c466ef2,
+ 0x61189abb,0x4c4350fd },
+ { 0xf855b8da,0x2967f716,0x463c38a1,0x41a42394,0xeae93343,0xc37e1413,
+ 0x5a3118b5,0xa726d242 } },
+ /* 188 */
+ { { 0x948c1086,0xdae6b3ee,0xcbd3a2e1,0xf1de503d,0x03d022f3,0x3f35ed3f,
+ 0xcc6cf392,0x13639e82 },
+ { 0xcdafaa86,0x9ac938fb,0x2654a258,0xf45bc5fb,0x45051329,0x1963b26e,
+ 0xc1a335a3,0xca9365e1 } },
+ /* 189 */
+ { { 0x4c3b2d20,0x3615ac75,0x904e241b,0x742a5417,0xcc9d071d,0xb08521c4,
+ 0x970b72a5,0x9ce29c34 },
+ { 0x6d3e0ad6,0x8cc81f73,0xf2f8434c,0x8060da9e,0x6ce862d9,0x35ed1d1a,
+ 0xab42af98,0x48c4abd7 } },
+ /* 190 */
+ { { 0x40c7485a,0xd221b0cc,0xe5274dbf,0xead455bb,0x9263d2e8,0x493c7698,
+ 0xf67b33cb,0x78017c32 },
+ { 0x930cb5ee,0xb9d35769,0x0c408ed2,0xc0d14e94,0x272f1a4d,0xf8b7bf55,
+ 0xde5c1c04,0x53cd0454 } },
+ /* 191 */
+ { { 0x5d28ccac,0xbcd585fa,0x005b746e,0x5f823e56,0xcd0123aa,0x7c79f0a1,
+ 0xd3d7fa8f,0xeea465c1 },
+ { 0x0551803b,0x7810659f,0x7ce6af70,0x6c0b599f,0x29288e70,0x4195a770,
+ 0x7ae69193,0x1b6e42a4 } },
+ /* 192 */
+ { { 0xf67d04c3,0x2e80937c,0x89eeb811,0x1e312be2,0x92594d60,0x56b5d887,
+ 0x187fbd3d,0x0224da14 },
+ { 0x0c5fe36f,0x87abb863,0x4ef51f5f,0x580f3c60,0xb3b429ec,0x964fb1bf,
+ 0x42bfff33,0x60838ef0 } },
+ /* 193 */
+ { { 0x7e0bbe99,0x432cb2f2,0x04aa39ee,0x7bda44f3,0x9fa93903,0x5f497c7a,
+ 0x2d331643,0x636eb202 },
+ { 0x93ae00aa,0xfcfd0e61,0x31ae6d2f,0x875a00fe,0x9f93901c,0xf43658a2,
+ 0x39218bac,0x8844eeb6 } },
+ /* 194 */
+ { { 0x6b3bae58,0x114171d2,0x17e39f3e,0x7db3df71,0x81a8eada,0xcd37bc7f,
+ 0x51fb789e,0x27ba83dc },
+ { 0xfbf54de5,0xa7df439f,0xb5fe1a71,0x7277030b,0xdb297a48,0x42ee8e35,
+ 0x87f3a4ab,0xadb62d34 } },
+ /* 195 */
+ { { 0xa175df2a,0x9b1168a2,0x618c32e9,0x082aa04f,0x146b0916,0xc9e4f2e7,
+ 0x75e7c8b2,0xb990fd76 },
+ { 0x4df37313,0x0829d96b,0xd0b40789,0x1c205579,0x78087711,0x66c9ae4a,
+ 0x4d10d18d,0x81707ef9 } },
+ /* 196 */
+ { { 0x03d6ff96,0x97d7cab2,0x0d843360,0x5b851bfc,0xd042db4b,0x268823c4,
+ 0xd5a8aa5c,0x3792daea },
+ { 0x941afa0b,0x52818865,0x42d83671,0xf3e9e741,0x5be4e0a7,0x17c82527,
+ 0x94b001ba,0x5abd635e } },
+ /* 197 */
+ { { 0x0ac4927c,0x727fa84e,0xa7c8cf23,0xe3886035,0x4adca0df,0xa4bcd5ea,
+ 0x846ab610,0x5995bf21 },
+ { 0x829dfa33,0xe90f860b,0x958fc18b,0xcaafe2ae,0x78630366,0x9b3baf44,
+ 0xd483411e,0x44c32ca2 } },
+ /* 198 */
+ { { 0xe40ed80c,0xa74a97f1,0x31d2ca82,0x5f938cb1,0x7c2d6ad9,0x53f2124b,
+ 0x8082a54c,0x1f2162fb },
+ { 0x720b173e,0x7e467cc5,0x085f12f9,0x40e8a666,0x4c9d65dc,0x8cebc20e,
+ 0xc3e907c9,0x8f1d402b } },
+ /* 199 */
+ { { 0xfbc4058a,0x4f592f9c,0x292f5670,0xb15e14b6,0xbc1d8c57,0xc55cfe37,
+ 0x926edbf9,0xb1980f43 },
+ { 0x32c76b09,0x98c33e09,0x33b07f78,0x1df5279d,0x863bb461,0x6f08ead4,
+ 0x37448e45,0x2828ad9b } },
+ /* 200 */
+ { { 0xc4cf4ac5,0x696722c4,0xdde64afb,0xf5ac1a3f,0xe0890832,0x0551baa2,
+ 0x5a14b390,0x4973f127 },
+ { 0x322eac5d,0xe59d8335,0x0bd9b568,0x5e07eef5,0xa2588393,0xab36720f,
+ 0xdb168ac7,0x6dac8ed0 } },
+ /* 201 */
+ { { 0xeda835ef,0xf7b545ae,0x1d10ed51,0x4aa113d2,0x13741b09,0x035a65e0,
+ 0x20b9de4c,0x4b23ef59 },
+ { 0x3c4c7341,0xe82bb680,0x3f58bc37,0xd457706d,0xa51e3ee8,0x73527863,
+ 0xddf49a4e,0x4dd71534 } },
+ /* 202 */
+ { { 0x95476cd9,0xbf944672,0xe31a725b,0x648d072f,0xfc4b67e0,0x1441c8b8,
+ 0x2f4a4dbb,0xfd317000 },
+ { 0x8995d0e1,0x1cb43ff4,0x0ef729aa,0x76e695d1,0x41798982,0xe0d5f976,
+ 0x9569f365,0x14fac58c } },
+ /* 203 */
+ { { 0xf312ae18,0xad9a0065,0xfcc93fc9,0x51958dc0,0x8a7d2846,0xd9a14240,
+ 0x36abda50,0xed7c7651 },
+ { 0x25d4abbc,0x46270f1a,0xf1a113ea,0x9b5dd8f3,0x5b51952f,0xc609b075,
+ 0x4d2e9f53,0xfefcb7f7 } },
+ /* 204 */
+ { { 0xba119185,0xbd09497a,0xaac45ba4,0xd54e8c30,0xaa521179,0x492479de,
+ 0x87e0d80b,0x1801a57e },
+ { 0xfcafffb0,0x073d3f8d,0xae255240,0x6cf33c0b,0x5b5fdfbc,0x781d763b,
+ 0x1ead1064,0x9f8fc11e } },
+ /* 205 */
+ { { 0x5e69544c,0x1583a171,0xf04b7813,0x0eaf8567,0x278a4c32,0x1e22a8fd,
+ 0x3d3a69a9,0xa9d3809d },
+ { 0x59a2da3b,0x936c2c2c,0x1895c847,0x38ccbcf6,0x63d50869,0x5e65244e,
+ 0xe1178ef7,0x3006b9ae } },
+ /* 206 */
+ { { 0xc9eead28,0x0bb1f2b0,0x89f4dfbc,0x7eef635d,0xb2ce8939,0x074757fd,
+ 0x45f8f761,0x0ab85fd7 },
+ { 0x3e5b4549,0xecda7c93,0x97922f21,0x4be2bb5c,0xb43b8040,0x261a1274,
+ 0x11e942c2,0xb122d675 } },
+ /* 207 */
+ { { 0x66a5ae7a,0x3be607be,0x76adcbe3,0x01e703fa,0x4eb6e5c5,0xaf904301,
+ 0x097dbaec,0x9f599dc1 },
+ { 0x0ff250ed,0x6d75b718,0x349a20dc,0x8eb91574,0x10b227a3,0x425605a4,
+ 0x8a294b78,0x7d5528e0 } },
+ /* 208 */
+ { { 0x20c26def,0xf0f58f66,0x582b2d1e,0x025585ea,0x01ce3881,0xfbe7d79b,
+ 0x303f1730,0x28ccea01 },
+ { 0x79644ba5,0xd1dabcd1,0x06fff0b8,0x1fc643e8,0x66b3e17b,0xa60a76fc,
+ 0xa1d013bf,0xc18baf48 } },
+ /* 209 */
+ { { 0x5dc4216d,0x34e638c8,0x206142ac,0x00c01067,0x95f5064a,0xd453a171,
+ 0xb7a9596b,0x9def809d },
+ { 0x67ab8d2c,0x41e8642e,0x6237a2b6,0xb4240433,0x64c4218b,0x7d506a6d,
+ 0x68808ce5,0x0357f8b0 } },
+ /* 210 */
+ { { 0x4cd2cc88,0x8e9dbe64,0xf0b8f39d,0xcc61c28d,0xcd30a0c8,0x4a309874,
+ 0x1b489887,0xe4a01add },
+ { 0xf57cd8f9,0x2ed1eeac,0xbd594c48,0x1b767d3e,0x7bd2f787,0xa7295c71,
+ 0xce10cc30,0x466d7d79 } },
+ /* 211 */
+ { { 0x9dada2c7,0x47d31892,0x8f9aa27d,0x4fa0a6c3,0x820a59e1,0x90e4fd28,
+ 0x451ead1a,0xc672a522 },
+ { 0x5d86b655,0x30607cc8,0xf9ad4af1,0xf0235d3b,0x571172a6,0x99a08680,
+ 0xf2a67513,0x5e3d64fa } },
+ /* 212 */
+ { { 0x9b3b4416,0xaa6410c7,0xeab26d99,0xcd8fcf85,0xdb656a74,0x5ebff74a,
+ 0xeb8e42fc,0x6c8a7a95 },
+ { 0xb02a63bd,0x10c60ba7,0x8b8f0047,0x6b2f2303,0x312d90b0,0x8c6c3738,
+ 0xad82ca91,0x348ae422 } },
+ /* 213 */
+ { { 0x5ccda2fb,0x7f474663,0x8e0726d2,0x22accaa1,0x492b1f20,0x85adf782,
+ 0xd9ef2d2e,0xc1074de0 },
+ { 0xae9a65b3,0xfcf3ce44,0x05d7151b,0xfd71e4ac,0xce6a9788,0xd4711f50,
+ 0xc9e54ffc,0xfbadfbdb } },
+ /* 214 */
+ { { 0x20a99363,0x1713f1cd,0x6cf22775,0xb915658f,0x24d359b2,0x968175cd,
+ 0x83716fcd,0xb7f976b4 },
+ { 0x5d6dbf74,0x5758e24d,0x71c3af36,0x8d23bafd,0x0243dfe3,0x48f47760,
+ 0xcafcc805,0xf4d41b2e } },
+ /* 215 */
+ { { 0xfdabd48d,0x51f1cf28,0x32c078a4,0xce81be36,0x117146e9,0x6ace2974,
+ 0xe0160f10,0x180824ea },
+ { 0x66e58358,0x0387698b,0xce6ca358,0x63568752,0x5e41e6c5,0x82380e34,
+ 0x83cf6d25,0x67e5f639 } },
+ /* 216 */
+ { { 0xcf4899ef,0xf89ccb8d,0x9ebb44c0,0x949015f0,0xb2598ec9,0x546f9276,
+ 0x04c11fc6,0x9fef789a },
+ { 0x53d2a071,0x6d367ecf,0xa4519b09,0xb10e1a7f,0x611e2eef,0xca6b3fb0,
+ 0xa99c4e20,0xbc80c181 } },
+ /* 217 */
+ { { 0xe5eb82e6,0x972536f8,0xf56cb920,0x1a484fc7,0x50b5da5e,0xc78e2171,
+ 0x9f8cdf10,0x49270e62 },
+ { 0xea6b50ad,0x1a39b7bb,0xa2388ffc,0x9a0284c1,0x8107197b,0x5403eb17,
+ 0x61372f7f,0xd2ee52f9 } },
+ /* 218 */
+ { { 0x88e0362a,0xd37cd285,0x8fa5d94d,0x442fa8a7,0xa434a526,0xaff836e5,
+ 0xe5abb733,0xdfb478be },
+ { 0x673eede6,0xa91f1ce7,0x2b5b2f04,0xa5390ad4,0x5530da2f,0x5e66f7bf,
+ 0x08df473a,0xd9a140b4 } },
+ /* 219 */
+ { { 0x6e8ea498,0x0e0221b5,0x3563ee09,0x62347829,0x335d2ade,0xe06b8391,
+ 0x623f4b1a,0x760c058d },
+ { 0xc198aa79,0x0b89b58c,0xf07aba7f,0xf74890d2,0xfde2556a,0x4e204110,
+ 0x8f190409,0x7141982d } },
+ /* 220 */
+ { { 0x4d4b0f45,0x6f0a0e33,0x392a94e1,0xd9280b38,0xb3c61d5e,0x3af324c6,
+ 0x89d54e47,0x3af9d1ce },
+ { 0x20930371,0xfd8f7981,0x21c17097,0xeda2664c,0xdc42309b,0x0e9545dc,
+ 0x73957dd6,0xb1f815c3 } },
+ /* 221 */
+ { { 0x89fec44a,0x84faa78e,0x3caa4caf,0xc8c2ae47,0xc1b6a624,0x691c807d,
+ 0x1543f052,0xa41aed14 },
+ { 0x7d5ffe04,0x42435399,0x625b6e20,0x8bacb2df,0x87817775,0x85d660be,
+ 0x86fb60ef,0xd6e9c1dd } },
+ /* 222 */
+ { { 0xc6853264,0x3aa2e97e,0xe2304a0b,0x771533b7,0xb8eae9be,0x1b912bb7,
+ 0xae9bf8c2,0x9c9c6e10 },
+ { 0xe030b74c,0xa2309a59,0x6a631e90,0x4ed7494d,0xa49b79f2,0x89f44b23,
+ 0x40fa61b6,0x566bd596 } },
+ /* 223 */
+ { { 0xc18061f3,0x066c0118,0x7c83fc70,0x190b25d3,0x27273245,0xf05fc8e0,
+ 0xf525345e,0xcf2c7390 },
+ { 0x10eb30cf,0xa09bceb4,0x0d77703a,0xcfd2ebba,0x150ff255,0xe842c43a,
+ 0x8aa20979,0x02f51755 } },
+ /* 224 */
+ { { 0xaddb7d07,0x396ef794,0x24455500,0x0b4fc742,0xc78aa3ce,0xfaff8eac,
+ 0xe8d4d97d,0x14e9ada5 },
+ { 0x2f7079e2,0xdaa480a1,0xe4b0800e,0x45baa3cd,0x7838157d,0x01765e2d,
+ 0x8e9d9ae8,0xa0ad4fab } },
+ /* 225 */
+ { { 0x4a653618,0x0bfb7621,0x31eaaa5f,0x1872813c,0x44949d5e,0x1553e737,
+ 0x6e56ed1e,0xbcd530b8 },
+ { 0x32e9c47b,0x169be853,0xb50059ab,0xdc2776fe,0x192bfbb4,0xcdba9761,
+ 0x6979341d,0x909283cf } },
+ /* 226 */
+ { { 0x76e81a13,0x67b00324,0x62171239,0x9bee1a99,0xd32e19d6,0x08ed361b,
+ 0xace1549a,0x35eeb7c9 },
+ { 0x7e4e5bdc,0x1280ae5a,0xb6ceec6e,0x2dcd2cd3,0x6e266bc1,0x52e4224c,
+ 0x448ae864,0x9a8b2cf4 } },
+ /* 227 */
+ { { 0x09d03b59,0xf6471bf2,0xb65af2ab,0xc90e62a3,0xebd5eec9,0xff7ff168,
+ 0xd4491379,0x6bdb60f4 },
+ { 0x8a55bc30,0xdadafebc,0x10097fe0,0xc79ead16,0x4c1e3bdd,0x42e19741,
+ 0x94ba08a9,0x01ec3cfd } },
+ /* 228 */
+ { { 0xdc9485c2,0xba6277eb,0x22fb10c7,0x48cc9a79,0x70a28d8a,0x4f61d60f,
+ 0x475464f6,0xd1acb1c0 },
+ { 0x26f36612,0xd26902b1,0xe0618d8b,0x59c3a44e,0x308357ee,0x4df8a813,
+ 0x405626c2,0x7dcd079d } },
+ /* 229 */
+ { { 0xf05a4b48,0x5ce7d4d3,0x37230772,0xadcd2952,0x812a915a,0xd18f7971,
+ 0x377d19b8,0x0bf53589 },
+ { 0x6c68ea73,0x35ecd95a,0x823a584d,0xc7f3bbca,0xf473a723,0x9fb674c6,
+ 0xe16686fc,0xd28be4d9 } },
+ /* 230 */
+ { { 0x38fa8e4b,0x5d2b9906,0x893fd8fc,0x559f186e,0x436fb6fc,0x3a6de2aa,
+ 0x510f88ce,0xd76007aa },
+ { 0x523a4988,0x2d10aab6,0x74dd0273,0xb455cf44,0xa3407278,0x7f467082,
+ 0xb303bb01,0xf2b52f68 } },
+ /* 231 */
+ { { 0x9835b4ca,0x0d57eafa,0xbb669cbc,0x2d2232fc,0xc6643198,0x8eeeb680,
+ 0xcc5aed3a,0xd8dbe98e },
+ { 0xc5a02709,0xcba9be3f,0xf5ba1fa8,0x30be68e5,0xf10ea852,0xfebd43cd,
+ 0xee559705,0xe01593a3 } },
+ /* 232 */
+ { { 0xea75a0a6,0xd3e5af50,0x57858033,0x512226ac,0xd0176406,0x6fe6d50f,
+ 0xaeb8ef06,0xafec07b1 },
+ { 0x80bb0a31,0x7fb99567,0x37309aae,0x6f1af3cc,0x01abf389,0x9153a15a,
+ 0x6e2dbfdd,0xa71b9354 } },
+ /* 233 */
+ { { 0x18f593d2,0xbf8e12e0,0xa078122b,0xd1a90428,0x0ba4f2ad,0x150505db,
+ 0x628523d9,0x53a2005c },
+ { 0xe7f2b935,0x07c8b639,0xc182961a,0x2bff975a,0x7518ca2c,0x86bceea7,
+ 0x3d588e3d,0xbf47d19b } },
+ /* 234 */
+ { { 0xdd7665d5,0x672967a7,0x2f2f4de5,0x4e303057,0x80d4903f,0x144005ae,
+ 0x39c9a1b6,0x001c2c7f },
+ { 0x69efc6d6,0x143a8014,0x7bc7a724,0xc810bdaa,0xa78150a4,0x5f65670b,
+ 0x86ffb99b,0xfdadf8e7 } },
+ /* 235 */
+ { { 0xffc00785,0xfd38cb88,0x3b48eb67,0x77fa7591,0xbf368fbc,0x0454d055,
+ 0x5aa43c94,0x3a838e4d },
+ { 0x3e97bb9a,0x56166329,0x441d94d9,0x9eb93363,0x0adb2a83,0x515591a6,
+ 0x873e1da3,0x3cdb8257 } },
+ /* 236 */
+ { { 0x7de77eab,0x137140a9,0x41648109,0xf7e1c50d,0xceb1d0df,0x762dcad2,
+ 0xf1f57fba,0x5a60cc89 },
+ { 0x40d45673,0x80b36382,0x5913c655,0x1b82be19,0xdd64b741,0x057284b8,
+ 0xdbfd8fc0,0x922ff56f } },
+ /* 237 */
+ { { 0xc9a129a1,0x1b265dee,0xcc284e04,0xa5b1ce57,0xcebfbe3c,0x04380c46,
+ 0xf6c5cd62,0x72919a7d },
+ { 0x8fb90f9a,0x298f453a,0x88e4031b,0xd719c00b,0x796f1856,0xe32c0e77,
+ 0x3624089a,0x5e791780 } },
+ /* 238 */
+ { { 0x7f63cdfb,0x5c16ec55,0xf1cae4fd,0x8e6a3571,0x560597ca,0xfce26bea,
+ 0xe24c2fab,0x4e0a5371 },
+ { 0xa5765357,0x276a40d3,0x0d73a2b4,0x3c89af44,0x41d11a32,0xb8f370ae,
+ 0xd56604ee,0xf5ff7818 } },
+ /* 239 */
+ { { 0x1a09df21,0xfbf3e3fe,0xe66e8e47,0x26d5d28e,0x29c89015,0x2096bd0a,
+ 0x533f5e64,0xe41df0e9 },
+ { 0xb3ba9e3f,0x305fda40,0x2604d895,0xf2340ceb,0x7f0367c7,0x0866e192,
+ 0xac4f155f,0x8edd7d6e } },
+ /* 240 */
+ { { 0x0bfc8ff3,0xc9a1dc0e,0xe936f42f,0x14efd82b,0xcca381ef,0x67016f7c,
+ 0xed8aee96,0x1432c1ca },
+ { 0x70b23c26,0xec684829,0x0735b273,0xa64fe873,0xeaef0f5a,0xe389f6e5,
+ 0x5ac8d2c6,0xcaef480b } },
+ /* 241 */
+ { { 0x75315922,0x5245c978,0x3063cca5,0xd8295171,0xb64ef2cb,0xf3ce60d0,
+ 0x8efae236,0xd0ba177e },
+ { 0xb1b3af60,0x53a9ae8f,0x3d2da20e,0x1a796ae5,0xdf9eef28,0x01d63605,
+ 0x1c54ae16,0xf31c957c } },
+ /* 242 */
+ { { 0x49cc4597,0xc0f58d52,0xbae0a028,0xdc5015b0,0x734a814a,0xefc5fc55,
+ 0x96e17c3a,0x013404cb },
+ { 0xc9a824bf,0xb29e2585,0x001eaed7,0xd593185e,0x61ef68ac,0x8d6ee682,
+ 0x91933e6c,0x6f377c4b } },
+ /* 243 */
+ { { 0xa8333fd2,0x9f93bad1,0x5a2a95b8,0xa8930202,0xeaf75ace,0x211e5037,
+ 0xd2d09506,0x6dba3e4e },
+ { 0xd04399cd,0xa48ef98c,0xe6b73ade,0x1811c66e,0xc17ecaf3,0x72f60752,
+ 0x3becf4a7,0xf13cf342 } },
+ /* 244 */
+ { { 0xa919e2eb,0xceeb9ec0,0xf62c0f68,0x83a9a195,0x7aba2299,0xcfba3bb6,
+ 0x274bbad3,0xc83fa9a9 },
+ { 0x62fa1ce0,0x0d7d1b0b,0x3418efbf,0xe58b60f5,0x52706f04,0xbfa8ef9e,
+ 0x5d702683,0xb49d70f4 } },
+ /* 245 */
+ { { 0xfad5513b,0x914c7510,0xb1751e2d,0x05f32eec,0xd9fb9d59,0x6d850418,
+ 0x0c30f1cf,0x59cfadbb },
+ { 0x55cb7fd6,0xe167ac23,0x820426a3,0x249367b8,0x90a78864,0xeaeec58c,
+ 0x354a4b67,0x5babf362 } },
+ /* 246 */
+ { { 0xee424865,0x37c981d1,0xf2e5577f,0x8b002878,0xb9e0c058,0x702970f1,
+ 0x9026c8f0,0x6188c6a7 },
+ { 0xd0f244da,0x06f9a19b,0xfb080873,0x1ecced5c,0x9f213637,0x35470f9b,
+ 0xdf50b9d9,0x993fe475 } },
+ /* 247 */
+ { { 0x9b2c3609,0x68e31cdf,0x2c46d4ea,0x84eb19c0,0x9a775101,0x7ac9ec1a,
+ 0x4c80616b,0x81f76466 },
+ { 0x75fbe978,0x1d7c2a5a,0xf183b356,0x6743fed3,0x501dd2bf,0x838d1f04,
+ 0x5fe9060d,0x564a812a } },
+ /* 248 */
+ { { 0xfa817d1d,0x7a5a64f4,0xbea82e0f,0x55f96844,0xcd57f9aa,0xb5ff5a0f,
+ 0x00e51d6c,0x226bf3cf },
+ { 0x2f2833cf,0xd6d1a9f9,0x4f4f89a8,0x20a0a35a,0x8f3f7f77,0x11536c49,
+ 0xff257836,0x68779f47 } },
+ /* 249 */
+ { { 0x73043d08,0x79b0c1c1,0x1fc020fa,0xa5446774,0x9a6d26d0,0xd3767e28,
+ 0xeb092e0b,0x97bcb0d1 },
+ { 0xf32ed3c3,0x2ab6eaa8,0xb281bc48,0xc8a4f151,0xbfa178f3,0x4d1bf4f3,
+ 0x0a784655,0xa872ffe8 } },
+ /* 250 */
+ { { 0xa32b2086,0xb1ab7935,0x8160f486,0xe1eb710e,0x3b6ae6be,0x9bd0cd91,
+ 0xb732a36a,0x02812bfc },
+ { 0xcf605318,0xa63fd7ca,0xfdfd6d1d,0x646e5d50,0x2102d619,0xa1d68398,
+ 0xfe5396af,0x07391cc9 } },
+ /* 251 */
+ { { 0x8b80d02b,0xc50157f0,0x62877f7f,0x6b8333d1,0x78d542ae,0x7aca1af8,
+ 0x7e6d2a08,0x355d2adc },
+ { 0x287386e1,0xb41f335a,0xf8e43275,0xfd272a94,0xe79989ea,0x286ca2cd,
+ 0x7c2a3a79,0x3dc2b1e3 } },
+ /* 252 */
+ { { 0x04581352,0xd689d21c,0x376782be,0x0a00c825,0x9fed701f,0x203bd590,
+ 0x3ccd846b,0xc4786910 },
+ { 0x24c768ed,0x5dba7708,0x6841f657,0x72feea02,0x6accce0e,0x73313ed5,
+ 0xd5bb4d32,0xccc42968 } },
+ /* 253 */
+ { { 0x3d7620b9,0x94e50de1,0x5992a56a,0xd89a5c8a,0x675487c9,0xdc007640,
+ 0xaa4871cf,0xe147eb42 },
+ { 0xacf3ae46,0x274ab4ee,0x50350fbe,0xfd4936fb,0x48c840ea,0xdf2afe47,
+ 0x080e96e3,0x239ac047 } },
+ /* 254 */
+ { { 0x2bfee8d4,0x481d1f35,0xfa7b0fec,0xce80b5cf,0x2ce9af3c,0x105c4c9e,
+ 0xf5f7e59d,0xc55fa1a3 },
+ { 0x8257c227,0x3186f14e,0x342be00b,0xc5b1653f,0xaa904fb2,0x09afc998,
+ 0xd4f4b699,0x094cd99c } },
+ /* 255 */
+ { { 0xd703beba,0x8a981c84,0x32ceb291,0x8631d150,0xe3bd49ec,0xa445f2c9,
+ 0x42abad33,0xb90a30b6 },
+ { 0xb4a5abf9,0xb465404f,0x75db7603,0x004750c3,0xca35d89f,0x6f9a42cc,
+ 0x1b7924f7,0x019f8b9a } },
+};
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
+ k, map, heap);
+}
+
+#endif
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[8];
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_256_point_new_8(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 8, km);
+
+ err = sp_256_ecc_mulmod_base_8(point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_8(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(point, 0, heap);
+
+ return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_256_iszero_8(const sp_digit* a)
+{
+ return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+/* Add 1 to a. (a = a + 1)
+ *
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_256_add_one_8(sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r2, #1\n\t"
+ "ldr r1, [%[a], #0]\n\t"
+ "add r1, r2\n\t"
+ "mov r2, #0\n\t"
+ "str r1, [%[a], #0]\n\t"
+ "ldr r1, [%[a], #4]\n\t"
+ "adc r1, r2\n\t"
+ "str r1, [%[a], #4]\n\t"
+ "ldr r1, [%[a], #8]\n\t"
+ "adc r1, r2\n\t"
+ "str r1, [%[a], #8]\n\t"
+ "ldr r1, [%[a], #12]\n\t"
+ "adc r1, r2\n\t"
+ "str r1, [%[a], #12]\n\t"
+ "ldr r1, [%[a], #16]\n\t"
+ "adc r1, r2\n\t"
+ "str r1, [%[a], #16]\n\t"
+ "ldr r1, [%[a], #20]\n\t"
+ "adc r1, r2\n\t"
+ "str r1, [%[a], #20]\n\t"
+ "ldr r1, [%[a], #24]\n\t"
+ "adc r1, r2\n\t"
+ "str r1, [%[a], #24]\n\t"
+ "ldr r1, [%[a], #28]\n\t"
+ "adc r1, r2\n\t"
+ "str r1, [%[a], #28]\n\t"
+ :
+ : [a] "r" (a)
+ : "memory", "r1", "r2"
+ );
+}
+
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 24U) {
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng Random number generator.
+ * k Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_256_ecc_gen_k_8(WC_RNG* rng, sp_digit* k)
+{
+ int err;
+ byte buf[32];
+
+ do {
+ err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+ if (err == 0) {
+ sp_256_from_bin(k, 8, buf, (int)sizeof(buf));
+ if (sp_256_cmp_8(k, p256_order2) < 0) {
+ sp_256_add_one_8(k);
+ break;
+ }
+ }
+ }
+ while (err == 0);
+
+ return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng Random number generator.
+ * priv Generated private value.
+ * pub Generated public point.
+ * heap Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[8];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_256 inf;
+#endif
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_256* infinity;
+#endif
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_8(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, inf, infinity);
+ }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_gen_k_8(rng, k);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL);
+ }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_8(infinity, point, p256_order, 1, NULL);
+ }
+ if (err == MP_OKAY) {
+ if ((sp_256_iszero_8(point->x) == 0) || (sp_256_iszero_8(point->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(k, priv);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_8(point, pub);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_256_point_free_8(infinity, 1, heap);
+#endif
+ sp_256_point_free_8(point, 1, heap);
+
+ return err;
+}
+
+#ifdef HAVE_ECC_DHE
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 32
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_256_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ j = 256 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<8 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 32) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 32);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv Scalar to multiply the point by.
+ * pub Point to multiply.
+ * out Buffer to hold X ordinate.
+ * outLen On entry, size of the buffer in bytes.
+ * On exit, length of data in buffer in bytes.
+ * heap Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
+ word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[8];
+#endif
+ sp_point_256* point = NULL;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ if (*outLen < 32U) {
+ err = BUFFER_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, p, point);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 8, priv);
+ sp_256_point_from_ecc_point_8(point, pub);
+ err = sp_256_ecc_mulmod_8(point, point, k, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ sp_256_to_bin(point->x, out);
+ *outLen = 32;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(point, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+ __asm__ __volatile__ (
+ "mov r7, %[a]\n\t"
+ "add r7, #32\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a]]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "sbc %[c], %[c]\n\t"
+ "add %[a], #8\n\t"
+ "add %[b], #8\n\t"
+ "cmp %[a], r7\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r7"
+ );
+
+ return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sub r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #0]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r6, [%[b], #12]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #8]\n\t"
+ "str r4, [%[a], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r6, [%[b], #20]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #16]\n\t"
+ "str r4, [%[a], #20]\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r6, [%[b], #28]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #24]\n\t"
+ "str r4, [%[a], #28]\n\t"
+ "sbc %[c], %[c]\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+SP_NOINLINE static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+ __asm__ __volatile__ (
+ "mov r6, #32\n\t"
+ "add r6, %[a]\n\t"
+ "mov r8, %[r]\n\t"
+ "mov r9, r6\n\t"
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "# A[] * B\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, %[b], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "lsr r7, %[b], #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, %[b], #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "lsl r7, %[b], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# A[] * B - Done\n\t"
+ "mov %[r], r8\n\t"
+ "str r3, [%[r]]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add %[r], #4\n\t"
+ "add %[a], #4\n\t"
+ "mov r8, %[r]\n\t"
+ "cmp %[a], r9\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r]]\n\t"
+ : [r] "+r" (r), [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+ );
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r5, %[div], #1\n\t"
+ "add r5, #1\n\t"
+ "mov r8, %[d0]\n\t"
+ "mov r9, %[d1]\n\t"
+ "# Do top 32\n\t"
+ "mov r6, r5\n\t"
+ "sub r6, %[d1]\n\t"
+ "sbc r6, r6\n\t"
+ "add %[r], %[r]\n\t"
+ "sub %[r], r6\n\t"
+ "and r6, r5\n\t"
+ "sub %[d1], r6\n\t"
+ "# Next 30 bits\n\t"
+ "mov r4, #29\n\t"
+ "1:\n\t"
+ "lsl %[d0], %[d0], #1\n\t"
+ "adc %[d1], %[d1]\n\t"
+ "mov r6, r5\n\t"
+ "sub r6, %[d1]\n\t"
+ "sbc r6, r6\n\t"
+ "add %[r], %[r]\n\t"
+ "sub %[r], r6\n\t"
+ "and r6, r5\n\t"
+ "sub %[d1], r6\n\t"
+ "sub r4, #1\n\t"
+ "bpl 1b\n\t"
+ "mov r7, #0\n\t"
+ "add %[r], %[r]\n\t"
+ "add %[r], #1\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "sub %[d1], r4\n\t"
+ "mov r4, %[d1]\n\t"
+ "mov %[d1], r9\n\t"
+ "sbc %[d1], r5\n\t"
+ "mov r5, %[d1]\n\t"
+ "add %[r], r5\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "mov r6, r9\n\t"
+ "sub r4, %[d1], r4\n\t"
+ "sbc r6, r5\n\t"
+ "mov r5, r6\n\t"
+ "add %[r], r5\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "mov r6, r9\n\t"
+ "sub r4, %[d1], r4\n\t"
+ "sbc r6, r5\n\t"
+ "mov r5, r6\n\t"
+ "add %[r], r5\n\t"
+ "mov r6, %[div]\n\t"
+ "sub r6, r4\n\t"
+ "sbc r6, r6\n\t"
+ "sub %[r], r6\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r7", "r6", "r8", "r9"
+ );
+ return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_256_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<8; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ r[0] = a[0] & m;
+ r[1] = a[1] & m;
+ r[2] = a[2] & m;
+ r[3] = a[3] & m;
+ r[4] = a[4] & m;
+ r[5] = a[5] & m;
+ r[6] = a[6] & m;
+ r[7] = a[7] & m;
+#endif
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[16], t2[9];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[7];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 8);
+ for (i=7; i>=0; i--) {
+ r1 = div_256_word_8(t1[8 + i], t1[8 + i - 1], div);
+
+ sp_256_mul_d_8(t2, d, r1);
+ t1[8 + i] += sp_256_sub_in_place_8(&t1[i], t2);
+ t1[8 + i] -= t2[8];
+ sp_256_mask_8(t2, d, t1[8 + i]);
+ t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2);
+ sp_256_mask_8(t2, d, t1[8 + i]);
+ t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_256_cmp_8(t1, d) >= 0;
+ sp_256_cond_sub_8(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_256_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_256_div_8(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P256 curve. */
+static const uint32_t p256_order_minus_2[8] = {
+ 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU,0xffffffffU,0xffffffffU,
+ 0x00000000U,0xffffffffU
+};
+#else
+/* The low half of the order-2 of the P256 curve. */
+static const uint32_t p256_order_low[4] = {
+ 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
+ *
+ * r Result of the multiplication.
+ * a First operand of the multiplication.
+ * b Second operand of the multiplication.
+ */
+static void sp_256_mont_mul_order_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_256_mul_8(r, a, b);
+ sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order);
+}
+
+/* Square number mod the order of P256 curve. (r = a * a mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_256_mont_sqr_order_8(sp_digit* r, const sp_digit* a)
+{
+ sp_256_sqr_8(r, a);
+ sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P256 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_256_mont_sqr_n_order_8(sp_digit* r, const sp_digit* a, int n)
+{
+ int i;
+
+ sp_256_mont_sqr_order_8(r, a);
+ for (i=1; i<n; i++) {
+ sp_256_mont_sqr_order_8(r, r);
+ }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
+ * (r = 1 / a mod order)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_256_mont_inv_order_8(sp_digit* r, const sp_digit* a,
+ sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 8);
+ for (i=254; i>=0; i--) {
+ sp_256_mont_sqr_order_8(t, t);
+ if ((p256_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_8(t, t, a);
+ }
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 8U);
+#else
+ sp_digit* t = td;
+ sp_digit* t2 = td + 2 * 8;
+ sp_digit* t3 = td + 4 * 8;
+ int i;
+
+ /* t = a^2 */
+ sp_256_mont_sqr_order_8(t, a);
+ /* t = a^3 = t * a */
+ sp_256_mont_mul_order_8(t, t, a);
+ /* t2= a^c = t ^ 2 ^ 2 */
+ sp_256_mont_sqr_n_order_8(t2, t, 2);
+ /* t3= a^f = t2 * t */
+ sp_256_mont_mul_order_8(t3, t2, t);
+ /* t2= a^f0 = t3 ^ 2 ^ 4 */
+ sp_256_mont_sqr_n_order_8(t2, t3, 4);
+ /* t = a^ff = t2 * t3 */
+ sp_256_mont_mul_order_8(t, t2, t3);
+ /* t3= a^ff00 = t ^ 2 ^ 8 */
+ sp_256_mont_sqr_n_order_8(t2, t, 8);
+ /* t = a^ffff = t2 * t */
+ sp_256_mont_mul_order_8(t, t2, t);
+ /* t2= a^ffff0000 = t ^ 2 ^ 16 */
+ sp_256_mont_sqr_n_order_8(t2, t, 16);
+ /* t = a^ffffffff = t2 * t */
+ sp_256_mont_mul_order_8(t, t2, t);
+ /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */
+ sp_256_mont_sqr_n_order_8(t2, t, 64);
+ /* t2= a^ffffffff00000000ffffffff = t2 * t */
+ sp_256_mont_mul_order_8(t2, t2, t);
+ /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */
+ sp_256_mont_sqr_n_order_8(t2, t2, 32);
+ /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
+ sp_256_mont_mul_order_8(t2, t2, t);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
+ for (i=127; i>=112; i--) {
+ sp_256_mont_sqr_order_8(t2, t2);
+ if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_8(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
+ sp_256_mont_sqr_n_order_8(t2, t2, 4);
+ sp_256_mont_mul_order_8(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
+ for (i=107; i>=64; i--) {
+ sp_256_mont_sqr_order_8(t2, t2);
+ if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_8(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
+ sp_256_mont_sqr_n_order_8(t2, t2, 4);
+ sp_256_mont_mul_order_8(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
+ for (i=59; i>=32; i--) {
+ sp_256_mont_sqr_order_8(t2, t2);
+ if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_8(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
+ sp_256_mont_sqr_n_order_8(t2, t2, 4);
+ sp_256_mont_mul_order_8(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
+ for (i=27; i>=0; i--) {
+ sp_256_mont_sqr_order_8(t2, t2);
+ if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_8(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
+ sp_256_mont_sqr_n_order_8(t2, t2, 4);
+ /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
+ sp_256_mont_mul_order_8(r, t2, t3);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN 64
+#endif
+
+/* Sign the hash using the private key.
+ * e = [hash, 256 bits] from binary
+ * r = (k.G)->x mod order
+ * s = (r * x + e) / k mod order
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+ mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit ed[2*8];
+ sp_digit xd[2*8];
+ sp_digit kd[2*8];
+ sp_digit rd[2*8];
+ sp_digit td[3 * 2*8];
+ sp_point_256 p;
+#endif
+ sp_digit* e = NULL;
+ sp_digit* x = NULL;
+ sp_digit* k = NULL;
+ sp_digit* r = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_256* point = NULL;
+ sp_digit carry;
+ sp_digit* s = NULL;
+ sp_digit* kInv = NULL;
+ int err = MP_OKAY;
+ int32_t c;
+ int i;
+
+ (void)heap;
+
+ err = sp_256_point_new_8(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ e = d + 0 * 8;
+ x = d + 2 * 8;
+ k = d + 4 * 8;
+ r = d + 6 * 8;
+ tmp = d + 8 * 8;
+#else
+ e = ed;
+ x = xd;
+ k = kd;
+ r = rd;
+ tmp = td;
+#endif
+ s = e;
+ kInv = k;
+
+ if (hashLen > 32U) {
+ hashLen = 32U;
+ }
+
+ sp_256_from_bin(e, 8, hash, (int)hashLen);
+ }
+
+ for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+ sp_256_from_mp(x, 8, priv);
+
+ /* New random point. */
+ if (km == NULL || mp_iszero(km)) {
+ err = sp_256_ecc_gen_k_8(rng, k);
+ }
+ else {
+ sp_256_from_mp(k, 8, km);
+ mp_zero(km);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL);
+ }
+
+ if (err == MP_OKAY) {
+ /* r = point->x mod order */
+ XMEMCPY(r, point->x, sizeof(sp_digit) * 8U);
+ sp_256_norm_8(r);
+ c = sp_256_cmp_8(r, p256_order);
+ sp_256_cond_sub_8(r, r, p256_order, 0L - (sp_digit)(c >= 0));
+ sp_256_norm_8(r);
+
+ /* Conv k to Montgomery form (mod order) */
+ sp_256_mul_8(k, k, p256_norm_order);
+ err = sp_256_mod_8(k, k, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_8(k);
+ /* kInv = 1/k mod order */
+ sp_256_mont_inv_order_8(kInv, k, tmp);
+ sp_256_norm_8(kInv);
+
+ /* s = r * x + e */
+ sp_256_mul_8(x, x, r);
+ err = sp_256_mod_8(x, x, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_8(x);
+ carry = sp_256_add_8(s, e, x);
+ sp_256_cond_sub_8(s, s, p256_order, 0 - carry);
+ sp_256_norm_8(s);
+ c = sp_256_cmp_8(s, p256_order);
+ sp_256_cond_sub_8(s, s, p256_order, 0L - (sp_digit)(c >= 0));
+ sp_256_norm_8(s);
+
+ /* s = s * k^-1 mod order */
+ sp_256_mont_mul_order_8(s, s, kInv);
+ sp_256_norm_8(s);
+
+ /* Check that signature is usable. */
+ if (sp_256_iszero_8(s) == 0) {
+ break;
+ }
+ }
+ }
+
+ if (i == 0) {
+ err = RNG_FAILURE_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(r, rm);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(s, sm);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 8 * 8);
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 2U * 8U);
+ XMEMSET(x, 0, sizeof(sp_digit) * 2U * 8U);
+ XMEMSET(k, 0, sizeof(sp_digit) * 2U * 8U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U);
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 8U);
+#endif
+ sp_256_point_free_8(point, 1, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ * e = Truncate(hash, 256)
+ * u1 = e/s mod order
+ * u2 = r/s mod order
+ * r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
+ mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit u1d[2*8];
+ sp_digit u2d[2*8];
+ sp_digit sd[2*8];
+ sp_digit tmpd[2*8 * 5];
+ sp_point_256 p1d;
+ sp_point_256 p2d;
+#endif
+ sp_digit* u1 = NULL;
+ sp_digit* u2 = NULL;
+ sp_digit* s = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_256* p1;
+ sp_point_256* p2 = NULL;
+ sp_digit carry;
+ int32_t c;
+ int err;
+
+ err = sp_256_point_new_8(heap, p1d, p1);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, p2d, p2);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ u1 = d + 0 * 8;
+ u2 = d + 2 * 8;
+ s = d + 4 * 8;
+ tmp = d + 6 * 8;
+#else
+ u1 = u1d;
+ u2 = u2d;
+ s = sd;
+ tmp = tmpd;
+#endif
+
+ if (hashLen > 32U) {
+ hashLen = 32U;
+ }
+
+ sp_256_from_bin(u1, 8, hash, (int)hashLen);
+ sp_256_from_mp(u2, 8, r);
+ sp_256_from_mp(s, 8, sm);
+ sp_256_from_mp(p2->x, 8, pX);
+ sp_256_from_mp(p2->y, 8, pY);
+ sp_256_from_mp(p2->z, 8, pZ);
+
+ {
+ sp_256_mul_8(s, s, p256_norm_order);
+ }
+ err = sp_256_mod_8(s, s, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_8(s);
+ {
+ sp_256_mont_inv_order_8(s, s, tmp);
+ sp_256_mont_mul_order_8(u1, u1, s);
+ sp_256_mont_mul_order_8(u2, u2, s);
+ }
+
+ err = sp_256_ecc_mulmod_base_8(p1, u1, 0, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_8(p2, p2, u2, 0, heap);
+ }
+
+ if (err == MP_OKAY) {
+ {
+ sp_256_proj_point_add_8(p1, p1, p2, tmp);
+ if (sp_256_iszero_8(p1->z)) {
+ if (sp_256_iszero_8(p1->x) && sp_256_iszero_8(p1->y)) {
+ sp_256_proj_point_dbl_8(p1, p2, tmp);
+ }
+ else {
+ /* Y ordinate is not used from here - don't set. */
+ p1->x[0] = 0;
+ p1->x[1] = 0;
+ p1->x[2] = 0;
+ p1->x[3] = 0;
+ p1->x[4] = 0;
+ p1->x[5] = 0;
+ p1->x[6] = 0;
+ p1->x[7] = 0;
+ XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod));
+ }
+ }
+ }
+
+ /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+ /* Reload r and convert to Montgomery form. */
+ sp_256_from_mp(u2, 8, r);
+ err = sp_256_mod_mul_norm_8(u2, u2, p256_mod);
+ }
+
+ if (err == MP_OKAY) {
+ /* u1 = r.z'.z' mod prime */
+ sp_256_mont_sqr_8(p1->z, p1->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(u1, u2, p1->z, p256_mod, p256_mp_mod);
+ *res = (int)(sp_256_cmp_8(p1->x, u1) == 0);
+ if (*res == 0) {
+ /* Reload r and add order. */
+ sp_256_from_mp(u2, 8, r);
+ carry = sp_256_add_8(u2, u2, p256_order);
+ /* Carry means result is greater than mod and is not valid. */
+ if (carry == 0) {
+ sp_256_norm_8(u2);
+
+ /* Compare with mod and if greater or equal then not valid. */
+ c = sp_256_cmp_8(u2, p256_mod);
+ if (c < 0) {
+ /* Convert to Montogomery form */
+ err = sp_256_mod_mul_norm_8(u2, u2, p256_mod);
+ if (err == MP_OKAY) {
+ /* u1 = (r + 1*order).z'.z' mod prime */
+ sp_256_mont_mul_8(u1, u2, p1->z, p256_mod,
+ p256_mp_mod);
+ *res = (int)(sp_256_cmp_8(p1->x, u1) == 0);
+ }
+ }
+ }
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL)
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_256_point_free_8(p1, 0, heap);
+ sp_256_point_free_8(p2, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point EC point.
+ * heap Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_256_ecc_is_point_8(sp_point_256* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit t1d[2*8];
+ sp_digit t2d[2*8];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8 * 4, heap, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 8;
+ t2 = d + 2 * 8;
+#else
+ (void)heap;
+
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ sp_256_sqr_8(t1, point->y);
+ (void)sp_256_mod_8(t1, t1, p256_mod);
+ sp_256_sqr_8(t2, point->x);
+ (void)sp_256_mod_8(t2, t2, p256_mod);
+ sp_256_mul_8(t2, t2, point->x);
+ (void)sp_256_mod_8(t2, t2, p256_mod);
+ (void)sp_256_sub_8(t2, p256_mod, t2);
+ sp_256_mont_add_8(t1, t1, t2, p256_mod);
+
+ sp_256_mont_add_8(t1, t1, point->x, p256_mod);
+ sp_256_mont_add_8(t1, t1, point->x, p256_mod);
+ sp_256_mont_add_8(t1, t1, point->x, p256_mod);
+
+ if (sp_256_cmp_8(t1, p256_b) != 0) {
+ err = MP_VAL;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_256(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 pubd;
+#endif
+ sp_point_256* pub;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_256_point_new_8(NULL, pubd, pub);
+ if (err == MP_OKAY) {
+ sp_256_from_mp(pub->x, 8, pX);
+ sp_256_from_mp(pub->y, 8, pY);
+ sp_256_from_bin(pub->z, 8, one, (int)sizeof(one));
+
+ err = sp_256_ecc_is_point_8(pub, NULL);
+ }
+
+ sp_256_point_free_8(pub, 0, NULL);
+
+ return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * privm Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit privd[8];
+ sp_point_256 pubd;
+ sp_point_256 pd;
+#endif
+ sp_digit* priv = NULL;
+ sp_point_256* pub;
+ sp_point_256* p = NULL;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_256_point_new_8(heap, pubd, pub);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (priv == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ priv = privd;
+#endif
+
+ sp_256_from_mp(pub->x, 8, pX);
+ sp_256_from_mp(pub->y, 8, pY);
+ sp_256_from_bin(pub->z, 8, one, (int)sizeof(one));
+ sp_256_from_mp(priv, 8, privm);
+
+ /* Check point at infinitiy. */
+ if ((sp_256_iszero_8(pub->x) != 0) &&
+ (sp_256_iszero_8(pub->y) != 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check range of X and Y */
+ if (sp_256_cmp_8(pub->x, p256_mod) >= 0 ||
+ sp_256_cmp_8(pub->y, p256_mod) >= 0) {
+ err = ECC_OUT_OF_RANGE_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check point is on curve */
+ err = sp_256_ecc_is_point_8(pub, heap);
+ }
+
+ if (err == MP_OKAY) {
+ /* Point * order = infinity */
+ err = sp_256_ecc_mulmod_8(p, pub, p256_order, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is infinity */
+ if ((sp_256_iszero_8(p->x) == 0) ||
+ (sp_256_iszero_8(p->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Base * private = point */
+ err = sp_256_ecc_mulmod_base_8(p, priv, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is public key */
+ if (sp_256_cmp_8(p->x, pub->x) != 0 ||
+ sp_256_cmp_8(p->y, pub->y) != 0) {
+ err = ECC_PRIV_KEY_E;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (priv != NULL) {
+ XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(p, 0, heap);
+ sp_256_point_free_8(pub, 0, heap);
+
+ return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX First EC point's X ordinate.
+ * pY First EC point's Y ordinate.
+ * pZ First EC point's Z ordinate.
+ * qX Second EC point's X ordinate.
+ * qY Second EC point's Y ordinate.
+ * qZ Second EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* qX, mp_int* qY, mp_int* qZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 8 * 5];
+ sp_point_256 pd;
+ sp_point_256 qd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ sp_point_256* q = NULL;
+ int err;
+
+ err = sp_256_point_new_8(NULL, pd, p);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(NULL, qd, q);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 8, pX);
+ sp_256_from_mp(p->y, 8, pY);
+ sp_256_from_mp(p->z, 8, pZ);
+ sp_256_from_mp(q->x, 8, qX);
+ sp_256_from_mp(q->y, 8, qY);
+ sp_256_from_mp(q->z, 8, qZ);
+
+ sp_256_proj_point_add_8(p, p, q, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(q, 0, NULL);
+ sp_256_point_free_8(p, 0, NULL);
+
+ return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 8 * 2];
+ sp_point_256 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ int err;
+
+ err = sp_256_point_new_8(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 2, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 8, pX);
+ sp_256_from_mp(p->y, 8, pY);
+ sp_256_from_mp(p->z, 8, pZ);
+
+ sp_256_proj_point_dbl_8(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(p, 0, NULL);
+
+ return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 8 * 4];
+ sp_point_256 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ int err;
+
+ err = sp_256_point_new_8(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 4, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 8, pX);
+ sp_256_from_mp(p->y, 8, pY);
+ sp_256_from_mp(p->z, 8, pZ);
+
+ sp_256_map_8(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, pX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, pY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, pZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(p, 0, NULL);
+
+ return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_256_mont_sqrt_8(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit t1d[2 * 8];
+ sp_digit t2d[2 * 8];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 8;
+ t2 = d + 2 * 8;
+#else
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ {
+ /* t2 = y ^ 0x2 */
+ sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0x3 */
+ sp_256_mont_mul_8(t1, t2, y, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xc */
+ sp_256_mont_sqr_n_8(t2, t1, 2, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xf */
+ sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xf0 */
+ sp_256_mont_sqr_n_8(t2, t1, 4, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xff */
+ sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xff00 */
+ sp_256_mont_sqr_n_8(t2, t1, 8, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffff */
+ sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xffff0000 */
+ sp_256_mont_sqr_n_8(t2, t1, 16, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff */
+ sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000000 */
+ sp_256_mont_sqr_n_8(t1, t1, 32, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001 */
+ sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
+ sp_256_mont_sqr_n_8(t1, t1, 96, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
+ sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_n_8(y, t1, 94, p256_mod, p256_mp_mod);
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm X ordinate.
+ * odd Whether the Y ordinate is odd.
+ * ym Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit xd[2 * 8];
+ sp_digit yd[2 * 8];
+#endif
+ sp_digit* x = NULL;
+ sp_digit* y = NULL;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ x = d + 0 * 8;
+ y = d + 2 * 8;
+#else
+ x = xd;
+ y = yd;
+#endif
+
+ sp_256_from_mp(x, 8, xm);
+ err = sp_256_mod_mul_norm_8(x, x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ /* y = x^3 */
+ {
+ sp_256_mont_sqr_8(y, x, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(y, y, x, p256_mod, p256_mp_mod);
+ }
+ /* y = x^3 - 3x */
+ sp_256_mont_sub_8(y, y, x, p256_mod);
+ sp_256_mont_sub_8(y, y, x, p256_mod);
+ sp_256_mont_sub_8(y, y, x, p256_mod);
+ /* y = x^3 - 3x + b */
+ err = sp_256_mod_mul_norm_8(x, p256_b, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ sp_256_mont_add_8(y, y, x, p256_mod);
+ /* y = sqrt(x^3 - 3x + b) */
+ err = sp_256_mont_sqrt_8(y);
+ }
+ if (err == MP_OKAY) {
+ XMEMSET(y + 8, 0, 8U * sizeof(sp_digit));
+ sp_256_mont_reduce_8(y, p256_mod, p256_mp_mod);
+ if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+ sp_256_mont_sub_8(y, p256_mod, y, p256_mod);
+ }
+
+ err = sp_256_to_mp(y, ym);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+#endif
+#endif /* !WOLFSSL_SP_NO_256 */
+#ifdef WOLFSSL_SP_384
+
+/* Point structure to use. */
+typedef struct sp_point_384 {
+ sp_digit x[2 * 12];
+ sp_digit y[2 * 12];
+ sp_digit z[2 * 12];
+ int infinity;
+} sp_point_384;
+
+/* The modulus (prime) of the curve P384. */
+static const sp_digit p384_mod[12] = {
+ 0xffffffff,0x00000000,0x00000000,0xffffffff,0xfffffffe,0xffffffff,
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
+};
+/* The Montogmery normalizer for modulus of the curve P384. */
+static const sp_digit p384_norm_mod[12] = {
+ 0x00000001,0xffffffff,0xffffffff,0x00000000,0x00000001,0x00000000,
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000
+};
+/* The Montogmery multiplier for modulus of the curve P384. */
+static sp_digit p384_mp_mod = 0x00000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* The order of the curve P384. */
+static const sp_digit p384_order[12] = {
+ 0xccc52973,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81,
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
+};
+#endif
+/* The order of the curve P384 minus 2. */
+static const sp_digit p384_order2[12] = {
+ 0xccc52971,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81,
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P384. */
+static const sp_digit p384_norm_order[12] = {
+ 0x333ad68d,0x1313e695,0xb74f5885,0xa7e5f24d,0x0bc8d220,0x389cb27e,
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P384. */
+static sp_digit p384_mp_order = 0xe88fdc45;
+#endif
+/* The base point of curve P384. */
+static const sp_point_384 p384_base = {
+ /* X ordinate */
+ {
+ 0x72760ab7,0x3a545e38,0xbf55296c,0x5502f25d,0x82542a38,0x59f741e0,
+ 0x8ba79b98,0x6e1d3b62,0xf320ad74,0x8eb1c71e,0xbe8b0537,0xaa87ca22,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Y ordinate */
+ {
+ 0x90ea0e5f,0x7a431d7c,0x1d7e819d,0x0a60b1ce,0xb5f0b8c0,0xe9da3113,
+ 0x289a147c,0xf8f41dbd,0x9292dc29,0x5d9e98bf,0x96262c6f,0x3617de4a,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Z ordinate */
+ {
+ 0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* infinity */
+ 0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p384_b[12] = {
+ 0xd3ec2aef,0x2a85c8ed,0x8a2ed19d,0xc656398d,0x5013875a,0x0314088f,
+ 0xfe814112,0x181d9c6e,0xe3f82d19,0x988e056b,0xe23ee7e4,0xb3312fa7
+};
+#endif
+
+static int sp_384_point_new_ex_12(void* heap, sp_point_384* sp, sp_point_384** p)
+{
+ int ret = MP_OKAY;
+ (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ (void)sp;
+ *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC);
+#else
+ *p = sp;
+#endif
+ if (*p == NULL) {
+ ret = MEMORY_E;
+ }
+ return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), &(sp), &(p))
+#endif
+
+
+static void sp_384_point_free_12(sp_point_384* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+ if (p != NULL) {
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+ XFREE(p, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+/* Clear point data if requested. */
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+#endif
+ (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r The resulting Montgomery form number.
+ * a The number to convert.
+ * m The modulus (prime).
+ * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mod_mul_norm_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ int64_t* t;
+#else
+ int64_t t[12];
+#endif
+ int64_t o;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (int64_t*)XMALLOC(sizeof(int64_t) * 12, NULL, DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ /* 1 0 0 0 0 0 0 0 1 1 0 -1 */
+ t[0] = 0 + (uint64_t)a[0] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[11];
+ /* -1 1 0 0 0 0 0 0 -1 0 1 1 */
+ t[1] = 0 - (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[8] + (uint64_t)a[10] + (uint64_t)a[11];
+ /* 0 -1 1 0 0 0 0 0 0 -1 0 1 */
+ t[2] = 0 - (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[9] + (uint64_t)a[11];
+ /* 1 0 -1 1 0 0 0 0 1 1 -1 -1 */
+ t[3] = 0 + (uint64_t)a[0] - (uint64_t)a[2] + (uint64_t)a[3] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[10] - (uint64_t)a[11];
+ /* 1 1 0 -1 1 0 0 0 1 2 1 -2 */
+ t[4] = 0 + (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[3] + (uint64_t)a[4] + (uint64_t)a[8] + 2 * (uint64_t)a[9] + (uint64_t)a[10] - 2 * (uint64_t)a[11];
+ /* 0 1 1 0 -1 1 0 0 0 1 2 1 */
+ t[5] = 0 + (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[4] + (uint64_t)a[5] + (uint64_t)a[9] + 2 * (uint64_t)a[10] + (uint64_t)a[11];
+ /* 0 0 1 1 0 -1 1 0 0 0 1 2 */
+ t[6] = 0 + (uint64_t)a[2] + (uint64_t)a[3] - (uint64_t)a[5] + (uint64_t)a[6] + (uint64_t)a[10] + 2 * (uint64_t)a[11];
+ /* 0 0 0 1 1 0 -1 1 0 0 0 1 */
+ t[7] = 0 + (uint64_t)a[3] + (uint64_t)a[4] - (uint64_t)a[6] + (uint64_t)a[7] + (uint64_t)a[11];
+ /* 0 0 0 0 1 1 0 -1 1 0 0 0 */
+ t[8] = 0 + (uint64_t)a[4] + (uint64_t)a[5] - (uint64_t)a[7] + (uint64_t)a[8];
+ /* 0 0 0 0 0 1 1 0 -1 1 0 0 */
+ t[9] = 0 + (uint64_t)a[5] + (uint64_t)a[6] - (uint64_t)a[8] + (uint64_t)a[9];
+ /* 0 0 0 0 0 0 1 1 0 -1 1 0 */
+ t[10] = 0 + (uint64_t)a[6] + (uint64_t)a[7] - (uint64_t)a[9] + (uint64_t)a[10];
+ /* 0 0 0 0 0 0 0 1 1 0 -1 1 */
+ t[11] = 0 + (uint64_t)a[7] + (uint64_t)a[8] - (uint64_t)a[10] + (uint64_t)a[11];
+
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+ t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+ t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+ t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+ o = t[11] >> 32; t[11] &= 0xffffffff;
+ t[0] += o;
+ t[1] -= o;
+ t[3] += o;
+ t[4] += o;
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+ t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+ t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+ t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+
+ r[0] = t[0];
+ r[1] = t[1];
+ r[2] = t[2];
+ r[3] = t[3];
+ r[4] = t[4];
+ r[5] = t[5];
+ r[6] = t[6];
+ r[7] = t[7];
+ r[8] = t[8];
+ r[9] = t[9];
+ r[10] = t[10];
+ r[11] = t[11];
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL)
+ XFREE(t, NULL, DYNAMIC_TYPE_ECC);
+#endif
+
+ return err;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 32
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 32U) <= (word32)DIGIT_BIT) {
+ s += 32U;
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 32) {
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 32 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_384.
+ *
+ * p Point of type sp_point_384 (result).
+ * pm Point of type ecc_point.
+ */
+static void sp_384_point_from_ecc_point_12(sp_point_384* p, const ecc_point* pm)
+{
+ XMEMSET(p->x, 0, sizeof(p->x));
+ XMEMSET(p->y, 0, sizeof(p->y));
+ XMEMSET(p->z, 0, sizeof(p->z));
+ sp_384_from_mp(p->x, 12, pm->x);
+ sp_384_from_mp(p->y, 12, pm->y);
+ sp_384_from_mp(p->z, 12, pm->z);
+ p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_384_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 12);
+ r->used = 12;
+ mp_clamp(r);
+#elif DIGIT_BIT < 32
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 12; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 32) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 32 - s;
+ }
+ r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 12; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 32 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 32 - s;
+ }
+ else {
+ s += 32;
+ }
+ }
+ r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Convert a point of type sp_point_384 to type ecc_point.
+ *
+ * p Point of type sp_point_384.
+ * pm Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm)
+{
+ int err;
+
+ err = sp_384_to_mp(p->x, pm->x);
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, pm->y);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, pm->z);
+ }
+
+ return err;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_384_mul_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit tmp[12 * 2];
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r8, r3\n\t"
+ "mov r11, %[r]\n\t"
+ "mov r9, %[a]\n\t"
+ "mov r10, %[b]\n\t"
+ "mov r6, #48\n\t"
+ "add r6, r9\n\t"
+ "mov r12, r6\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #44\n\t"
+ "mov %[a], r8\n\t"
+ "sub %[a], r6\n\t"
+ "sbc r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], r6\n\t"
+ "mov %[b], r8\n\t"
+ "sub %[b], %[a]\n\t"
+ "add %[a], r9\n\t"
+ "add %[b], r10\n\t"
+ "\n2:\n\t"
+ "# Multiply Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [%[b]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Multiply Done\n\t"
+ "add %[a], #4\n\t"
+ "sub %[b], #4\n\t"
+ "cmp %[a], r12\n\t"
+ "beq 3f\n\t"
+ "mov r6, r8\n\t"
+ "add r6, r9\n\t"
+ "cmp %[a], r6\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r11\n\t"
+ "mov r7, r8\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add r7, #4\n\t"
+ "mov r8, r7\n\t"
+ "mov r6, #88\n\t"
+ "cmp r7, r6\n\t"
+ "ble 1b\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov %[a], r9\n\t"
+ "mov %[b], r10\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #48\n\t"
+ "mov r8, r5\n\t"
+ "mov r7, #0\n\t"
+ "1:\n\t"
+ "ldr r6, [%[b], r7]\n\t"
+ "and r6, %[m]\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r5, [%[a], r7]\n\t"
+ "sbc r5, r6\n\t"
+ "sbc %[c], %[c]\n\t"
+ "str r5, [%[r], r7]\n\t"
+ "add r7, #4\n\t"
+ "cmp r7, r8\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r7", "r8"
+ );
+
+ return c;
+}
+
+#define sp_384_mont_reduce_order_12 sp_384_mont_reduce_12
+
+/* Reduce the number back to 384 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "mov r8, %[mp]\n\t"
+ "mov r12, %[ca]\n\t"
+ "mov r14, %[m]\n\t"
+ "mov r9, %[a]\n\t"
+ "mov r4, #0\n\t"
+ "# i = 0\n\t"
+ "mov r11, r4\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "mov %[ca], #0\n\t"
+ "# mu = a[i] * mp\n\t"
+ "mov %[mp], r8\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mul %[mp], %[a]\n\t"
+ "mov %[m], r14\n\t"
+ "mov r10, r9\n\t"
+ "\n2:\n\t"
+ "# a[i+j] += m[j] * mu\n\t"
+ "mov %[a], r10\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mov %[ca], #0\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "# Multiply m[j] and mu - Start\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r6, %[mp], #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add %[a], r7\n\t"
+ "adc r5, %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add %[a], r6\n\t"
+ "adc r5, r7\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r6, %[mp], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r5, r7\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add %[a], r6\n\t"
+ "adc r5, r7\n\t"
+ "# Multiply m[j] and mu - Done\n\t"
+ "add r4, %[a]\n\t"
+ "adc r5, %[ca]\n\t"
+ "mov %[a], r10\n\t"
+ "str r4, [%[a]]\n\t"
+ "mov r6, #4\n\t"
+ "add %[m], #4\n\t"
+ "add r10, r6\n\t"
+ "mov r4, #44\n\t"
+ "add r4, r9\n\t"
+ "cmp r10, r4\n\t"
+ "blt 2b\n\t"
+ "# a[i+11] += m[11] * mu\n\t"
+ "mov %[ca], #0\n\t"
+ "mov r4, r12\n\t"
+ "mov %[a], #0\n\t"
+ "# Multiply m[11] and mu - Start\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r6, %[mp], #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r5, r7\n\t"
+ "adc r4, %[ca]\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r5, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsr r6, %[mp], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "ldr r7, [%[m]]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r5, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc %[a], %[ca]\n\t"
+ "# Multiply m[11] and mu - Done\n\t"
+ "mov %[ca], %[a]\n\t"
+ "mov %[a], r10\n\t"
+ "ldr r7, [%[a], #4]\n\t"
+ "ldr %[a], [%[a]]\n\t"
+ "mov r6, #0\n\t"
+ "add r5, %[a]\n\t"
+ "adc r7, r4\n\t"
+ "adc %[ca], r6\n\t"
+ "mov %[a], r10\n\t"
+ "str r5, [%[a]]\n\t"
+ "str r7, [%[a], #4]\n\t"
+ "# i += 1\n\t"
+ "mov r6, #4\n\t"
+ "add r9, r6\n\t"
+ "add r11, r6\n\t"
+ "mov r12, %[ca]\n\t"
+ "mov %[a], r9\n\t"
+ "mov r4, #48\n\t"
+ "cmp r11, r4\n\t"
+ "blt 1b\n\t"
+ "mov %[m], r14\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_384_mul_12(r, a, b);
+ sp_384_mont_reduce_12(r, m, mp);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r8, r3\n\t"
+ "mov r11, %[r]\n\t"
+ "mov r6, #96\n\t"
+ "neg r6, r6\n\t"
+ "add sp, r6\n\t"
+ "mov r10, sp\n\t"
+ "mov r9, %[a]\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r6, #44\n\t"
+ "mov %[a], r8\n\t"
+ "sub %[a], r6\n\t"
+ "sbc r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], r6\n\t"
+ "mov r2, r8\n\t"
+ "sub r2, %[a]\n\t"
+ "add %[a], r9\n\t"
+ "add r2, r9\n\t"
+ "\n2:\n\t"
+ "cmp r2, %[a]\n\t"
+ "beq 4f\n\t"
+ "# Multiply * 2: Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r7, [r2]\n\t"
+ "lsl r7, r7, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Multiply * 2: Done\n\t"
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ "# Square: Start\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul r6, r6\n\t"
+ "add r3, r6\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "mul r7, r7\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #15\n\t"
+ "lsl r6, r6, #17\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# Square: Done\n\t"
+ "\n5:\n\t"
+ "add %[a], #4\n\t"
+ "sub r2, #4\n\t"
+ "mov r6, #48\n\t"
+ "add r6, r9\n\t"
+ "cmp %[a], r6\n\t"
+ "beq 3f\n\t"
+ "cmp %[a], r2\n\t"
+ "bgt 3f\n\t"
+ "mov r7, r8\n\t"
+ "add r7, r9\n\t"
+ "cmp %[a], r7\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r10\n\t"
+ "mov r7, r8\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r7, #4\n\t"
+ "mov r8, r7\n\t"
+ "mov r6, #88\n\t"
+ "cmp r7, r6\n\t"
+ "ble 1b\n\t"
+ "mov %[a], r9\n\t"
+ "str r3, [%[r], r7]\n\t"
+ "mov %[r], r11\n\t"
+ "mov %[a], r10\n\t"
+ "mov r3, #92\n\t"
+ "\n4:\n\t"
+ "ldr r6, [%[a], r3]\n\t"
+ "str r6, [%[r], r3]\n\t"
+ "sub r3, #4\n\t"
+ "bge 4b\n\t"
+ "mov r6, #96\n\t"
+ "add sp, r6\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+ );
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_12(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_384_sqr_12(r, a);
+ sp_384_mont_reduce_12(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * n Number of times to square.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_n_12(sp_digit* r, const sp_digit* a, int n,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_384_mont_sqr_12(r, a, m, mp);
+ for (; n > 1; n--) {
+ sp_384_mont_sqr_12(r, r, m, mp);
+ }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P384 curve. */
+static const uint32_t p384_mod_minus_2[12] = {
+ 0xfffffffdU,0x00000000U,0x00000000U,0xffffffffU,0xfffffffeU,0xffffffffU,
+ 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P384 curve. (r = 1 / a mod m)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_384_mont_inv_12(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 12);
+ for (i=382; i>=0; i--) {
+ sp_384_mont_sqr_12(t, t, p384_mod, p384_mp_mod);
+ if (p384_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
+ sp_384_mont_mul_12(t, t, a, p384_mod, p384_mp_mod);
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 12);
+#else
+ sp_digit* t1 = td;
+ sp_digit* t2 = td + 2 * 12;
+ sp_digit* t3 = td + 4 * 12;
+ sp_digit* t4 = td + 6 * 12;
+ sp_digit* t5 = td + 8 * 12;
+
+ /* 0x2 */
+ sp_384_mont_sqr_12(t1, a, p384_mod, p384_mp_mod);
+ /* 0x3 */
+ sp_384_mont_mul_12(t5, t1, a, p384_mod, p384_mp_mod);
+ /* 0xc */
+ sp_384_mont_sqr_n_12(t1, t5, 2, p384_mod, p384_mp_mod);
+ /* 0xf */
+ sp_384_mont_mul_12(t2, t5, t1, p384_mod, p384_mp_mod);
+ /* 0x1e */
+ sp_384_mont_sqr_12(t1, t2, p384_mod, p384_mp_mod);
+ /* 0x1f */
+ sp_384_mont_mul_12(t4, t1, a, p384_mod, p384_mp_mod);
+ /* 0x3e0 */
+ sp_384_mont_sqr_n_12(t1, t4, 5, p384_mod, p384_mp_mod);
+ /* 0x3ff */
+ sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0x7fe0 */
+ sp_384_mont_sqr_n_12(t1, t2, 5, p384_mod, p384_mp_mod);
+ /* 0x7fff */
+ sp_384_mont_mul_12(t4, t4, t1, p384_mod, p384_mp_mod);
+ /* 0x3fff8000 */
+ sp_384_mont_sqr_n_12(t1, t4, 15, p384_mod, p384_mp_mod);
+ /* 0x3fffffff */
+ sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffc */
+ sp_384_mont_sqr_n_12(t3, t2, 2, p384_mod, p384_mp_mod);
+ /* 0xfffffffd */
+ sp_384_mont_mul_12(r, t3, a, p384_mod, p384_mp_mod);
+ /* 0xffffffff */
+ sp_384_mont_mul_12(t3, t5, t3, p384_mod, p384_mp_mod);
+ /* 0xfffffffc0000000 */
+ sp_384_mont_sqr_n_12(t1, t2, 30, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffff */
+ sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffff000000000000000 */
+ sp_384_mont_sqr_n_12(t1, t2, 60, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+ sp_384_mont_sqr_n_12(t1, t2, 120, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+ sp_384_mont_sqr_n_12(t1, t2, 15, p384_mod, p384_mp_mod);
+ /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */
+ sp_384_mont_sqr_n_12(t1, t2, 33, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */
+ sp_384_mont_mul_12(t2, t3, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */
+ sp_384_mont_sqr_n_12(t1, t2, 96, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */
+ sp_384_mont_mul_12(r, r, t1, p384_mod, p384_mp_mod);
+
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_384_cmp_12(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+
+
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mvn r3, r3\n\t"
+ "mov r6, #44\n\t"
+ "1:\n\t"
+ "ldr r7, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r7, r3\n\t"
+ "and r5, r3\n\t"
+ "mov r4, r7\n\t"
+ "sub r7, r5\n\t"
+ "sbc r7, r7\n\t"
+ "add %[r], r7\n\t"
+ "mvn r7, r7\n\t"
+ "and r3, r7\n\t"
+ "sub r5, r4\n\t"
+ "sbc r7, r7\n\t"
+ "sub %[r], r7\n\t"
+ "mvn r7, r7\n\t"
+ "and r3, r7\n\t"
+ "sub r6, #4\n\t"
+ "cmp r6, #0\n\t"
+ "bge 1b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "r3", "r4", "r5", "r6", "r7"
+ );
+
+ return r;
+}
+
+/* Normalize the values in each word to 32.
+ *
+ * a Array of sp_digit to normalize.
+ */
+#define sp_384_norm_12(a)
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r Resulting affine coordinate point.
+ * p Montgomery form projective coordinate point.
+ * t Temporary ordinate data.
+ */
+static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*12;
+ int32_t n;
+
+ sp_384_mont_inv_12(t1, p->z, t + 2*12);
+
+ sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod);
+
+ /* x /= z^2 */
+ sp_384_mont_mul_12(r->x, p->x, t2, p384_mod, p384_mp_mod);
+ XMEMSET(r->x + 12, 0, sizeof(r->x) / 2U);
+ sp_384_mont_reduce_12(r->x, p384_mod, p384_mp_mod);
+ /* Reduce x to less than modulus */
+ n = sp_384_cmp_12(r->x, p384_mod);
+ sp_384_cond_sub_12(r->x, r->x, p384_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_12(r->x);
+
+ /* y /= z^3 */
+ sp_384_mont_mul_12(r->y, p->y, t1, p384_mod, p384_mp_mod);
+ XMEMSET(r->y + 12, 0, sizeof(r->y) / 2U);
+ sp_384_mont_reduce_12(r->y, p384_mod, p384_mp_mod);
+ /* Reduce y to less than modulus */
+ n = sp_384_cmp_12(r->y, p384_mod);
+ sp_384_cond_sub_12(r->y, r->y, p384_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_12(r->y);
+
+ XMEMSET(r->z, 0, sizeof(r->z));
+ r->z[0] = 1;
+
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r6, %[a]\n\t"
+ "mov r7, #0\n\t"
+ "add r6, #48\n\t"
+ "sub r7, #1\n\t"
+ "\n1:\n\t"
+ "add %[c], r7\n\t"
+ "ldr r4, [%[a]]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ "add %[a], #4\n\t"
+ "add %[b], #4\n\t"
+ "add %[r], #4\n\t"
+ "cmp %[a], r6\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r7"
+ );
+
+ return c;
+}
+
+#else
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "add r4, r5\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #4]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #12]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #20]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #28]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #36]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #44]\n\t"
+ "adc r4, r5\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r Result of addition.
+ * a First number to add in Montogmery form.
+ * b Second number to add in Montogmery form.
+ * m Modulus (prime).
+ */
+SP_NOINLINE static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_add_12(r, a, b);
+ sp_384_cond_sub_12(r, r, m, 0 - o);
+}
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r Result of doubling.
+ * a Number to double in Montogmery form.
+ * m Modulus (prime).
+ */
+SP_NOINLINE static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_add_12(r, a, a);
+ sp_384_cond_sub_12(r, r, m, 0 - o);
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r Result of Tripling.
+ * a Number to triple in Montogmery form.
+ * m Modulus (prime).
+ */
+SP_NOINLINE static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_add_12(r, a, a);
+ sp_384_cond_sub_12(r, r, m, 0 - o);
+ o = sp_384_add_12(r, r, a);
+ sp_384_cond_sub_12(r, r, m, 0 - o);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r6, %[a]\n\t"
+ "add r6, #48\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r4, [%[a]]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "sbc r4, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "sbc %[c], %[c]\n\t"
+ "add %[a], #4\n\t"
+ "add %[b], #4\n\t"
+ "add %[r], #4\n\t"
+ "cmp %[a], r6\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[a], #4]\n\t"
+ "ldr r6, [%[b], #0]\n\t"
+ "ldr r7, [%[b], #4]\n\t"
+ "sub r4, r6\n\t"
+ "sbc r5, r7\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r5, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[a], #12]\n\t"
+ "ldr r6, [%[b], #8]\n\t"
+ "ldr r7, [%[b], #12]\n\t"
+ "sbc r4, r6\n\t"
+ "sbc r5, r7\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r5, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[a], #20]\n\t"
+ "ldr r6, [%[b], #16]\n\t"
+ "ldr r7, [%[b], #20]\n\t"
+ "sbc r4, r6\n\t"
+ "sbc r5, r7\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[a], #28]\n\t"
+ "ldr r6, [%[b], #24]\n\t"
+ "ldr r7, [%[b], #28]\n\t"
+ "sbc r4, r6\n\t"
+ "sbc r5, r7\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "str r5, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[a], #36]\n\t"
+ "ldr r6, [%[b], #32]\n\t"
+ "ldr r7, [%[b], #36]\n\t"
+ "sbc r4, r6\n\t"
+ "sbc r5, r7\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r5, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[a], #44]\n\t"
+ "ldr r6, [%[b], #40]\n\t"
+ "ldr r7, [%[b], #44]\n\t"
+ "sbc r4, r6\n\t"
+ "sbc r5, r7\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "str r5, [%[r], #44]\n\t"
+ "sbc %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r7"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #48\n\t"
+ "mov r8, r5\n\t"
+ "mov r7, #0\n\t"
+ "1:\n\t"
+ "ldr r6, [%[b], r7]\n\t"
+ "and r6, %[m]\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, #1\n\t"
+ "add r5, %[c]\n\t"
+ "ldr r5, [%[a], r7]\n\t"
+ "adc r5, r6\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c]\n\t"
+ "str r5, [%[r], r7]\n\t"
+ "add r7, #4\n\t"
+ "cmp r7, r8\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r7", "r8"
+ );
+
+ return c;
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r Result of subtration.
+ * a Number to subtract from in Montogmery form.
+ * b Number to subtract with in Montogmery form.
+ * m Modulus (prime).
+ */
+SP_NOINLINE static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_sub_12(r, a, b);
+ sp_384_cond_add_12(r, r, m, o);
+}
+
+static void sp_384_rshift1_12(sp_digit* r, sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "ldr r2, [%[a]]\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "lsr r2, r2, #1\n\t"
+ "lsl r5, r3, #31\n\t"
+ "lsr r3, r3, #1\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "str r2, [%[r], #0]\n\t"
+ "lsl r5, r4, #31\n\t"
+ "lsr r4, r4, #1\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r2, [%[a], #12]\n\t"
+ "str r3, [%[r], #4]\n\t"
+ "lsl r5, r2, #31\n\t"
+ "lsr r2, r2, #1\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "lsl r5, r3, #31\n\t"
+ "lsr r3, r3, #1\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "str r2, [%[r], #12]\n\t"
+ "lsl r5, r4, #31\n\t"
+ "lsr r4, r4, #1\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r2, [%[a], #24]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "lsl r5, r2, #31\n\t"
+ "lsr r2, r2, #1\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r3, [%[a], #28]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "lsl r5, r3, #31\n\t"
+ "lsr r3, r3, #1\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "str r2, [%[r], #24]\n\t"
+ "lsl r5, r4, #31\n\t"
+ "lsr r4, r4, #1\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r2, [%[a], #36]\n\t"
+ "str r3, [%[r], #28]\n\t"
+ "lsl r5, r2, #31\n\t"
+ "lsr r2, r2, #1\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "lsl r5, r3, #31\n\t"
+ "lsr r3, r3, #1\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "str r2, [%[r], #36]\n\t"
+ "lsl r5, r4, #31\n\t"
+ "lsr r4, r4, #1\n\t"
+ "orr r3, r3, r5\n\t"
+ "str r3, [%[r], #40]\n\t"
+ "str r4, [%[r], #44]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5"
+ );
+}
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r Result of division by 2.
+ * a Number to divide.
+ * m Modulus (prime).
+ */
+SP_NOINLINE static void sp_384_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_cond_add_12(r, a, m, 0 - (a[0] & 1));
+ sp_384_rshift1_12(r, r);
+ r[11] |= o << 31;
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r Result of doubling point.
+ * p Point to double.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*12;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = r->x;
+ y = r->y;
+ z = r->z;
+ /* Put infinity into result. */
+ if (r != p) {
+ r->infinity = p->infinity;
+ }
+
+ /* T1 = Z * Z */
+ sp_384_mont_sqr_12(t1, p->z, p384_mod, p384_mp_mod);
+ /* Z = Y * Z */
+ sp_384_mont_mul_12(z, p->y, p->z, p384_mod, p384_mp_mod);
+ /* Z = 2Z */
+ sp_384_mont_dbl_12(z, z, p384_mod);
+ /* T2 = X - T1 */
+ sp_384_mont_sub_12(t2, p->x, t1, p384_mod);
+ /* T1 = X + T1 */
+ sp_384_mont_add_12(t1, p->x, t1, p384_mod);
+ /* T2 = T1 * T2 */
+ sp_384_mont_mul_12(t2, t1, t2, p384_mod, p384_mp_mod);
+ /* T1 = 3T2 */
+ sp_384_mont_tpl_12(t1, t2, p384_mod);
+ /* Y = 2Y */
+ sp_384_mont_dbl_12(y, p->y, p384_mod);
+ /* Y = Y * Y */
+ sp_384_mont_sqr_12(y, y, p384_mod, p384_mp_mod);
+ /* T2 = Y * Y */
+ sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod);
+ /* T2 = T2/2 */
+ sp_384_div2_12(t2, t2, p384_mod);
+ /* Y = Y * X */
+ sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod);
+ /* X = T1 * T1 */
+ sp_384_mont_sqr_12(x, t1, p384_mod, p384_mp_mod);
+ /* X = X - Y */
+ sp_384_mont_sub_12(x, x, y, p384_mod);
+ /* X = X - Y */
+ sp_384_mont_sub_12(x, x, y, p384_mod);
+ /* Y = Y - X */
+ sp_384_mont_sub_12(y, y, x, p384_mod);
+ /* Y = Y * T1 */
+ sp_384_mont_mul_12(y, y, t1, p384_mod, p384_mp_mod);
+ /* Y = Y - T2 */
+ sp_384_mont_sub_12(y, y, t2, p384_mod);
+}
+
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a First number to compare.
+ * b Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_384_cmp_equal_12(const sp_digit* a, const sp_digit* b)
+{
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+ (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) |
+ (a[8] ^ b[8]) | (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_12(sp_point_384* r, const sp_point_384* p, const sp_point_384* q,
+ sp_digit* t)
+{
+ const sp_point_384* ap[2];
+ sp_point_384* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*12;
+ sp_digit* t3 = t + 4*12;
+ sp_digit* t4 = t + 6*12;
+ sp_digit* t5 = t + 8*12;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Ensure only the first point is the same as the result. */
+ if (q == r) {
+ const sp_point_384* a = p;
+ p = q;
+ q = a;
+ }
+
+ /* Check double */
+ (void)sp_384_sub_12(t1, p384_mod, q->y);
+ sp_384_norm_12(t1);
+ if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) &
+ (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) {
+ sp_384_proj_point_dbl_12(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_384));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<12; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<12; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<12; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U1 = X1*Z2^2 */
+ sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t1, t1, x, p384_mod, p384_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_384_mont_mul_12(t3, t3, y, p384_mod, p384_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod);
+ /* H = U2 - U1 */
+ sp_384_mont_sub_12(t2, t2, t1, p384_mod);
+ /* R = S2 - S1 */
+ sp_384_mont_sub_12(t4, t4, t3, p384_mod);
+ /* Z3 = H*Z1*Z2 */
+ sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_12(x, x, t5, p384_mod);
+ sp_384_mont_dbl_12(t1, y, p384_mod);
+ sp_384_mont_sub_12(x, x, t1, p384_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ sp_384_mont_sub_12(y, y, x, p384_mod);
+ sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_12(y, y, t5, p384_mod);
+ }
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_fast_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+ int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 td[16];
+ sp_point_384 rtd;
+ sp_digit tmpd[2 * 12 * 6];
+#endif
+ sp_point_384* t;
+ sp_point_384* rt;
+ sp_digit* tmp;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_12(heap, rtd, rt);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 16, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ t = td;
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ (void)sp_384_mod_mul_norm_12(t[1].x, g->x, p384_mod);
+ (void)sp_384_mod_mul_norm_12(t[1].y, g->y, p384_mod);
+ (void)sp_384_mod_mul_norm_12(t[1].z, g->z, p384_mod);
+ t[1].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[ 2], &t[ 1], tmp);
+ t[ 2].infinity = 0;
+ sp_384_proj_point_add_12(&t[ 3], &t[ 2], &t[ 1], tmp);
+ t[ 3].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[ 4], &t[ 2], tmp);
+ t[ 4].infinity = 0;
+ sp_384_proj_point_add_12(&t[ 5], &t[ 3], &t[ 2], tmp);
+ t[ 5].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[ 6], &t[ 3], tmp);
+ t[ 6].infinity = 0;
+ sp_384_proj_point_add_12(&t[ 7], &t[ 4], &t[ 3], tmp);
+ t[ 7].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[ 8], &t[ 4], tmp);
+ t[ 8].infinity = 0;
+ sp_384_proj_point_add_12(&t[ 9], &t[ 5], &t[ 4], tmp);
+ t[ 9].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[10], &t[ 5], tmp);
+ t[10].infinity = 0;
+ sp_384_proj_point_add_12(&t[11], &t[ 6], &t[ 5], tmp);
+ t[11].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[12], &t[ 6], tmp);
+ t[12].infinity = 0;
+ sp_384_proj_point_add_12(&t[13], &t[ 7], &t[ 6], tmp);
+ t[13].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[14], &t[ 7], tmp);
+ t[14].infinity = 0;
+ sp_384_proj_point_add_12(&t[15], &t[ 8], &t[ 7], tmp);
+ t[15].infinity = 0;
+
+ i = 10;
+ n = k[i+1] << 0;
+ c = 28;
+ y = n >> 28;
+ XMEMCPY(rt, &t[y], sizeof(sp_point_384));
+ n <<= 4;
+ for (; i>=0 || c>=4; ) {
+ if (c < 4) {
+ n |= k[i--];
+ c += 32;
+ }
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+
+ sp_384_proj_point_dbl_12(rt, rt, tmp);
+ sp_384_proj_point_dbl_12(rt, rt, tmp);
+ sp_384_proj_point_dbl_12(rt, rt, tmp);
+ sp_384_proj_point_dbl_12(rt, rt, tmp);
+
+ sp_384_proj_point_add_12(rt, rt, &t[y], tmp);
+ }
+
+ if (map != 0) {
+ sp_384_map_12(r, rt, tmp);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 12 * 6);
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+ }
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_point_384) * 16);
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmpd, sizeof(tmpd));
+ ForceZero(td, sizeof(td));
+#endif
+ sp_384_point_free_12(rt, 1, heap);
+
+ return err;
+}
+
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_384 {
+ sp_digit x[12];
+ sp_digit y[12];
+} sp_table_entry_384;
+
+#ifdef FP_ECC
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int n, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*12;
+ sp_digit* b = t + 4*12;
+ sp_digit* t1 = t + 6*12;
+ sp_digit* t2 = t + 8*12;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = p->x;
+ y = p->y;
+ z = p->z;
+
+ /* Y = 2*Y */
+ sp_384_mont_dbl_12(y, y, p384_mod);
+ /* W = Z^4 */
+ sp_384_mont_sqr_12(w, z, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_12(w, w, p384_mod, p384_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+ while (--n > 0)
+#else
+ while (--n >= 0)
+#endif
+ {
+ /* A = 3*(X^2 - W) */
+ sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_12(t1, t1, w, p384_mod);
+ sp_384_mont_tpl_12(a, t1, p384_mod);
+ /* B = X*Y^2 */
+ sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod);
+ /* X = A^2 - 2B */
+ sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_12(t2, b, p384_mod);
+ sp_384_mont_sub_12(x, x, t2, p384_mod);
+ /* Z = Z*Y */
+ sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod);
+ /* t2 = Y^4 */
+ sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+ if (n != 0)
+#endif
+ {
+ /* W = W*Y^4 */
+ sp_384_mont_mul_12(w, w, t1, p384_mod, p384_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_384_mont_sub_12(y, b, x, p384_mod);
+ sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_12(y, y, p384_mod);
+ sp_384_mont_sub_12(y, y, t1, p384_mod);
+ }
+#ifndef WOLFSSL_SP_SMALL
+ /* A = 3*(X^2 - W) */
+ sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_12(t1, t1, w, p384_mod);
+ sp_384_mont_tpl_12(a, t1, p384_mod);
+ /* B = X*Y^2 */
+ sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod);
+ /* X = A^2 - 2B */
+ sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_12(t2, b, p384_mod);
+ sp_384_mont_sub_12(x, x, t2, p384_mod);
+ /* Z = Z*Y */
+ sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod);
+ /* t2 = Y^4 */
+ sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod);
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_384_mont_sub_12(y, b, x, p384_mod);
+ sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_12(y, y, p384_mod);
+ sp_384_mont_sub_12(y, y, t1, p384_mod);
+#endif
+ /* Y = Y/2 */
+ sp_384_div2_12(y, y, p384_mod);
+}
+
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p,
+ const sp_point_384* q, sp_digit* t)
+{
+ const sp_point_384* ap[2];
+ sp_point_384* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*12;
+ sp_digit* t3 = t + 4*12;
+ sp_digit* t4 = t + 6*12;
+ sp_digit* t5 = t + 8*12;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Check double */
+ (void)sp_384_sub_12(t1, p384_mod, q->y);
+ sp_384_norm_12(t1);
+ if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) &
+ (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) {
+ sp_384_proj_point_dbl_12(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_384));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<12; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<12; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<12; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U2 = X2*Z1^2 */
+ sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod);
+ /* H = U2 - X1 */
+ sp_384_mont_sub_12(t2, t2, x, p384_mod);
+ /* R = S2 - Y1 */
+ sp_384_mont_sub_12(t4, t4, y, p384_mod);
+ /* Z3 = H*Z1 */
+ sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod);
+ /* X3 = R^2 - H^3 - 2*X1*H^2 */
+ sp_384_mont_sqr_12(t1, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t3, x, t5, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_12(x, t1, t5, p384_mod);
+ sp_384_mont_dbl_12(t1, t3, p384_mod);
+ sp_384_mont_sub_12(x, x, t1, p384_mod);
+ /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+ sp_384_mont_sub_12(t3, t3, x, p384_mod);
+ sp_384_mont_mul_12(t3, t3, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t5, t5, y, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_12(y, t3, t5, p384_mod);
+ }
+}
+
+#ifdef WOLFSSL_SP_SMALL
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a Point to convert.
+ * t Temporary data.
+ */
+static void sp_384_proj_to_affine_12(sp_point_384* a, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2 * 12;
+ sp_digit* tmp = t + 4 * 12;
+
+ sp_384_mont_inv_12(t1, a->z, tmp);
+
+ sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod);
+
+ sp_384_mont_mul_12(a->x, a->x, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(a->y, a->y, t1, p384_mod, p384_mp_mod);
+ XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod));
+}
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_384_gen_stripe_table_12(const sp_point_384* a,
+ sp_table_entry_384* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 td, s1d, s2d;
+#endif
+ sp_point_384* t;
+ sp_point_384* s1 = NULL;
+ sp_point_384* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_12(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_384_proj_to_affine_12(t, tmp);
+
+ XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<4; i++) {
+ sp_384_proj_point_dbl_n_12(t, 96, tmp);
+ sp_384_proj_to_affine_12(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<4; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_384_proj_point_add_qz1_12(t, s1, s2, tmp);
+ sp_384_proj_to_affine_12(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_384_point_free_12(s2, 0, heap);
+ sp_384_point_free_12(s1, 0, heap);
+ sp_384_point_free_12( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g,
+ const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 rtd;
+ sp_point_384 pd;
+ sp_digit td[2 * 12 * 6];
+#endif
+ sp_point_384* rt;
+ sp_point_384* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_384_point_new_12(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
+ XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
+
+ y = 0;
+ for (j=0,x=95; j<4; j++,x+=96) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=94; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<4; j++,x+=96) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+
+ sp_384_proj_point_dbl_12(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_384_proj_point_add_qz1_12(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_384_map_12(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(p, 0, heap);
+ sp_384_point_free_12(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_384_t {
+ sp_digit x[12];
+ sp_digit y[12];
+ sp_table_entry_384 table[16];
+ uint32_t cnt;
+ int set;
+} sp_cache_384_t;
+
+static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_384_last = -1;
+static THREAD_LS_T int sp_cache_384_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex_384 = 0;
+ static wolfSSL_Mutex sp_cache_384_lock;
+#endif
+
+static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_384_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache_384[i].set = 0;
+ }
+ sp_cache_384_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache_384[i].set)
+ continue;
+
+ if (sp_384_cmp_equal_12(g->x, sp_cache_384[i].x) &
+ sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) {
+ sp_cache_384[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_384_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache_384[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_384_last) {
+ least = sp_cache_384[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache_384[j].cnt < least) {
+ i = j;
+ least = sp_cache_384[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
+ XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
+ sp_cache_384[i].set = 1;
+ sp_cache_384[i].cnt = 1;
+ }
+
+ *cache = &sp_cache_384[i];
+ sp_cache_384_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 12 * 7];
+ sp_cache_384_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_384 == 0) {
+ wc_InitMutex(&sp_cache_384_lock);
+ initCacheMutex_384 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_384_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_384(g, &cache);
+ if (cache->cnt == 2)
+ sp_384_gen_stripe_table_12(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_384_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+ }
+ else {
+ err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#else
+#ifdef FP_ECC
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_384_gen_stripe_table_12(const sp_point_384* a,
+ sp_table_entry_384* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 td, s1d, s2d;
+#endif
+ sp_point_384* t;
+ sp_point_384* s1 = NULL;
+ sp_point_384* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_12(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_384_proj_to_affine_12(t, tmp);
+
+ XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<8; i++) {
+ sp_384_proj_point_dbl_n_12(t, 48, tmp);
+ sp_384_proj_to_affine_12(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<8; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_384_proj_point_add_qz1_12(t, s1, s2, tmp);
+ sp_384_proj_to_affine_12(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_384_point_free_12(s2, 0, heap);
+ sp_384_point_free_12(s1, 0, heap);
+ sp_384_point_free_12( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g,
+ const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 rtd;
+ sp_point_384 pd;
+ sp_digit td[2 * 12 * 6];
+#endif
+ sp_point_384* rt;
+ sp_point_384* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_384_point_new_12(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
+ XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
+
+ y = 0;
+ for (j=0,x=47; j<8; j++,x+=48) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=46; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<8; j++,x+=48) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+
+ sp_384_proj_point_dbl_12(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_384_proj_point_add_qz1_12(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_384_map_12(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(p, 0, heap);
+ sp_384_point_free_12(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_384_t {
+ sp_digit x[12];
+ sp_digit y[12];
+ sp_table_entry_384 table[256];
+ uint32_t cnt;
+ int set;
+} sp_cache_384_t;
+
+static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_384_last = -1;
+static THREAD_LS_T int sp_cache_384_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex_384 = 0;
+ static wolfSSL_Mutex sp_cache_384_lock;
+#endif
+
+static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_384_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache_384[i].set = 0;
+ }
+ sp_cache_384_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache_384[i].set)
+ continue;
+
+ if (sp_384_cmp_equal_12(g->x, sp_cache_384[i].x) &
+ sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) {
+ sp_cache_384[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_384_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache_384[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_384_last) {
+ least = sp_cache_384[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache_384[j].cnt < least) {
+ i = j;
+ least = sp_cache_384[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
+ XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
+ sp_cache_384[i].set = 1;
+ sp_cache_384[i].cnt = 1;
+ }
+
+ *cache = &sp_cache_384[i];
+ sp_cache_384_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 12 * 7];
+ sp_cache_384_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_384 == 0) {
+ wc_InitMutex(&sp_cache_384_lock);
+ initCacheMutex_384 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_384_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_384(g, &cache);
+ if (cache->cnt == 2)
+ sp_384_gen_stripe_table_12(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_384_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+ }
+ else {
+ err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * p Point to multiply.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+ void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[12];
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_384_point_new_12(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 12, km);
+ sp_384_point_from_ecc_point_12(point, gm);
+
+ err = sp_384_ecc_mulmod_12(point, point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_12(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(point, 0, heap);
+
+ return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+static const sp_table_entry_384 p384_table[16] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc,
+ 0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 },
+ { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756,
+ 0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } },
+ /* 2 */
+ { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3,
+ 0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 },
+ { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc,
+ 0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } },
+ /* 3 */
+ { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480,
+ 0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 },
+ { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047,
+ 0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } },
+ /* 4 */
+ { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c,
+ 0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 },
+ { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc,
+ 0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } },
+ /* 5 */
+ { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98,
+ 0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c },
+ { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28,
+ 0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } },
+ /* 6 */
+ { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e,
+ 0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 },
+ { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec,
+ 0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } },
+ /* 7 */
+ { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b,
+ 0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b },
+ { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b,
+ 0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } },
+ /* 8 */
+ { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9,
+ 0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 },
+ { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1,
+ 0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } },
+ /* 9 */
+ { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc,
+ 0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a },
+ { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18,
+ 0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } },
+ /* 10 */
+ { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247,
+ 0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 },
+ { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d,
+ 0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } },
+ /* 11 */
+ { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12,
+ 0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e },
+ { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f,
+ 0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } },
+ /* 12 */
+ { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe,
+ 0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 },
+ { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6,
+ 0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } },
+ /* 13 */
+ { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6,
+ 0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 },
+ { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf,
+ 0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } },
+ /* 14 */
+ { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53,
+ 0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 },
+ { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370,
+ 0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } },
+ /* 15 */
+ { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f,
+ 0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc },
+ { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2,
+ 0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } },
+};
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table,
+ k, map, heap);
+}
+
+#else
+static const sp_table_entry_384 p384_table[256] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc,
+ 0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 },
+ { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756,
+ 0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } },
+ /* 2 */
+ { { 0x2b0c535b,0x29864753,0x70506296,0x90dd6953,0x216ab9ac,0x038cd6b4,
+ 0xbe12d76a,0x3df9b7b7,0x5f347bdb,0x13f4d978,0x13e94489,0x222c5c9c },
+ { 0x2680dc64,0x5f8e796f,0x58352417,0x120e7cb7,0xd10740b8,0x254b5d8a,
+ 0x5337dee6,0xc38b8efb,0x94f02247,0xf688c2e1,0x6c25bc4c,0x7b5c75f3 } },
+ /* 3 */
+ { { 0x9edffea5,0xe26a3cc3,0x37d7e9fc,0x35bbfd1c,0x9bde3ef6,0xf0e7700d,
+ 0x1a538f5a,0x0380eb47,0x05bf9eb3,0x2e9da8bb,0x1a460c3e,0xdbb93c73 },
+ { 0xf526b605,0x37dba260,0xfd785537,0x95d4978e,0xed72a04a,0x24ed793a,
+ 0x76005b1a,0x26948377,0x9e681f82,0x99f557b9,0xd64954ef,0xae5f9557 } },
+ /* 4 */
+ { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3,
+ 0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 },
+ { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc,
+ 0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } },
+ /* 5 */
+ { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480,
+ 0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 },
+ { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047,
+ 0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } },
+ /* 6 */
+ { { 0x4fc52870,0x28f9c07a,0x1a53a961,0xce0b3748,0x0e1828d9,0xd550fa18,
+ 0x6adb225a,0xa24abaf7,0x6e58a348,0xd11ed0a5,0x948acb62,0xf3d811e6 },
+ { 0x4c61ed22,0x8618dd77,0x80b47c9d,0x0bb747f9,0xde6b8559,0x22bf796f,
+ 0x680a21e9,0xfdfd1c6d,0x2af2c9dd,0xc0db1577,0xc1e90f3d,0xa09379e6 } },
+ /* 7 */
+ { { 0xe085c629,0x386c66ef,0x095bc89a,0x5fc2a461,0x203f4b41,0x1353d631,
+ 0x7e4bd8f5,0x7ca1972b,0xa7df8ce9,0xb077380a,0xee7e4ea3,0xd8a90389 },
+ { 0xe7b14461,0x1bc74dc7,0x0c9c4f78,0xdc2cb014,0x84ef0a10,0x52b4b3a6,
+ 0x20327fe2,0xbde6ea5d,0x660f9615,0xb71ec435,0xb8ad8173,0xeede5a04 } },
+ /* 8 */
+ { { 0x893b9a2d,0x5584cbb3,0x00850c5d,0x820c660b,0x7df2d43d,0x4126d826,
+ 0x0109e801,0xdd5bbbf0,0x38172f1c,0x85b92ee3,0xf31430d9,0x609d4f93 },
+ { 0xeadaf9d6,0x1e059a07,0x0f125fb0,0x70e6536c,0x560f20e7,0xd6220751,
+ 0x7aaf3a9a,0xa59489ae,0x64bae14e,0x7b70e2f6,0x76d08249,0x0dd03701 } },
+ /* 9 */
+ { { 0x8510521f,0x4cc13be8,0xf724cc17,0x87315ba9,0x353dc263,0xb49d83bb,
+ 0x0c279257,0x8b677efe,0xc93c9537,0x510a1c1c,0xa4702c99,0x33e30cd8 },
+ { 0x2208353f,0xf0ffc89d,0xced42b2b,0x0170fa8d,0x26e2a5f5,0x090851ed,
+ 0xecb52c96,0x81276455,0x7fe1adf4,0x0646c4e1,0xb0868eab,0x513f047e } },
+ /* 10 */
+ { { 0xdf5bdf53,0xc07611f4,0x58b11a6d,0x45d331a7,0x1c4ee394,0x58965daf,
+ 0x5a5878d1,0xba8bebe7,0x82dd3025,0xaecc0a18,0xa923eb8b,0xcf2a3899 },
+ { 0xd24fd048,0xf98c9281,0x8bbb025d,0x841bfb59,0xc9ab9d53,0xb8ddf8ce,
+ 0x7fef044e,0x538a4cb6,0x23236662,0x092ac21f,0x0b66f065,0xa919d385 } },
+ /* 11 */
+ { { 0x85d480d8,0x3db03b40,0x1b287a7d,0x8cd9f479,0x4a8f3bae,0x8f24dc75,
+ 0x3db41892,0x482eb800,0x9c56e0f5,0x38bf9eb3,0x9a91dc6f,0x8b977320 },
+ { 0x7209cfc2,0xa31b05b2,0x05b2db70,0x4c49bf85,0xd619527b,0x56462498,
+ 0x1fac51ba,0x3fe51039,0xab4b8342,0xfb04f55e,0x04c6eabf,0xc07c10dc } },
+ /* 12 */
+ { { 0xdb32f048,0xad22fe4c,0x475ed6df,0x5f23bf91,0xaa66b6cb,0xa50ce0c0,
+ 0xf03405c0,0xdf627a89,0xf95e2d6a,0x3674837d,0xba42e64e,0x081c95b6 },
+ { 0xe71d6ceb,0xeba3e036,0x6c6b0271,0xb45bcccf,0x0684701d,0x67b47e63,
+ 0xe712523f,0x60f8f942,0x5cd47adc,0x82423472,0x87649cbb,0x83027d79 } },
+ /* 13 */
+ { { 0x3615b0b8,0xb3929ea6,0xa54dac41,0xb41441fd,0xb5b6a368,0x8995d556,
+ 0x167ef05e,0xa80d4529,0x6d25a27f,0xf6bcb4a1,0x7bd55b68,0x210d6a4c },
+ { 0x25351130,0xf3804abb,0x903e37eb,0x1d2df699,0x084c25c8,0x5f201efc,
+ 0xa1c68e91,0x31a28c87,0x563f62a5,0x81dad253,0xd6c415d4,0x5dd6de70 } },
+ /* 14 */
+ { { 0x846612ce,0x29f470fd,0xda18d997,0x986f3eec,0x2f34af86,0x6b84c161,
+ 0x46ddaf8b,0x5ef0a408,0xe49e795f,0x14405a00,0xaa2f7a37,0x5f491b16 },
+ { 0xdb41b38d,0xc7f07ae4,0x18fbfcaa,0xef7d119e,0x14443b19,0x3a18e076,
+ 0x79a19926,0x4356841a,0xe2226fbe,0x91f4a91c,0x3cc88721,0xdc77248c } },
+ /* 15 */
+ { { 0xe4b1ec9d,0xd570ff1a,0xe7eef706,0x21d23e0e,0xca19e086,0x3cde40f4,
+ 0xcd4bb270,0x7d6523c4,0xbf13aa6c,0x16c1f06c,0xd14c4b60,0x5aa7245a },
+ { 0x44b74de8,0x37f81467,0x620a934e,0x839e7a17,0xde8b1aa1,0xf74d14e8,
+ 0xf30d75e2,0x8789fa51,0xc81c261e,0x09b24052,0x33c565ee,0x654e2678 } },
+ /* 16 */
+ { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c,
+ 0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 },
+ { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc,
+ 0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } },
+ /* 17 */
+ { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98,
+ 0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c },
+ { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28,
+ 0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } },
+ /* 18 */
+ { { 0x7d8c1bba,0x7ecbac01,0x90b0f3d5,0x6058f9c3,0xf6197d0f,0xaee116e3,
+ 0x4033b128,0xc4dd7068,0xc209b983,0xf084dba6,0x831dbc4a,0x97c7c2cf },
+ { 0xf96010e8,0x2f4e61dd,0x529faa17,0xd97e4e20,0x69d37f20,0x4ee66660,
+ 0x3d366d72,0xccc139ed,0x13488e0f,0x690b6ee2,0xf3a6d533,0x7cad1dc5 } },
+ /* 19 */
+ { { 0xda57a41f,0x660a9a81,0xec0039b6,0xe74a0412,0x5e1dad15,0x42343c6b,
+ 0x46681d4c,0x284f3ff5,0x63749e89,0xb51087f1,0x6f9f2f13,0x070f23cc },
+ { 0x5d186e14,0x542211da,0xfddb0dff,0x84748f37,0xdb1f4180,0x41a3aab4,
+ 0xa6402d0e,0x25ed667b,0x02f58355,0x2f2924a9,0xfa44a689,0x5844ee7c } },
+ /* 20 */
+ { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e,
+ 0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 },
+ { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec,
+ 0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } },
+ /* 21 */
+ { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b,
+ 0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b },
+ { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b,
+ 0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } },
+ /* 22 */
+ { { 0x104cbba5,0xc023780d,0xfa35dd4c,0x6207e747,0x1ca9b6a3,0x35c23928,
+ 0x97987b10,0x4ff19be8,0x8022eee8,0xb8476bbf,0xd3bbe74d,0xaa0a4a14 },
+ { 0x187d4543,0x20f94331,0x79f6e066,0x32153870,0xac7e82e1,0x83b0f74e,
+ 0x828f06ab,0xa7748ba2,0xc26ef35f,0xc5f0298a,0x8e9a7dbd,0x0f0c5070 } },
+ /* 23 */
+ { { 0xdef029dd,0x0c5c244c,0x850661b8,0x3dabc687,0xfe11d981,0x9992b865,
+ 0x6274dbad,0xe9801b8f,0x098da242,0xe54e6319,0x91a53d08,0x9929a91a },
+ { 0x35285887,0x37bffd72,0xf1418102,0xbc759425,0xfd2e6e20,0x9280cc35,
+ 0xfbc42ee5,0x735c600c,0x8837619a,0xb7ad2864,0xa778c57b,0xa3627231 } },
+ /* 24 */
+ { { 0x91361ed8,0xae799b5c,0x6c63366c,0x47d71b75,0x1b265a6a,0x54cdd521,
+ 0x98d77b74,0xe0215a59,0xbab29db0,0x4424d9b7,0x7fd9e536,0x8b0ffacc },
+ { 0x37b5d9ef,0x46d85d12,0xbfa91747,0x5b106d62,0x5f99ba2d,0xed0479f8,
+ 0x1d104de4,0x0e6f3923,0x25e8983f,0x83a84c84,0xf8105a70,0xa9507e0a } },
+ /* 25 */
+ { { 0x14cf381c,0xf6c68a6e,0xc22e31cc,0xaf9d27bd,0xaa8a5ccb,0x23568d4d,
+ 0xe338e4d2,0xe431eec0,0x8f52ad1f,0xf1a828fe,0xe86acd80,0xdb6a0579 },
+ { 0x4507832a,0x2885672e,0x887e5289,0x73fc275f,0x05610d08,0x65f80278,
+ 0x075ff5b0,0x8d9b4554,0x09f712b5,0x3a8e8fb1,0x2ebe9cf2,0x39f0ac86 } },
+ /* 26 */
+ { { 0x4c52edf5,0xd8fabf78,0xa589ae53,0xdcd737e5,0xd791ab17,0x94918bf0,
+ 0xbcff06c9,0xb5fbd956,0xdca46d45,0xf6d3032e,0x41a3e486,0x2cdff7e1 },
+ { 0x61f47ec8,0x6674b3ba,0xeef84608,0x8a882163,0x4c687f90,0xa257c705,
+ 0xf6cdf227,0xe30cb2ed,0x7f6ea846,0x2c4c64ca,0xcc6bcd3c,0x186fa17c } },
+ /* 27 */
+ { { 0x1dfcb91e,0x48a3f536,0x646d358a,0x83595e13,0x91128798,0xbd15827b,
+ 0x2187757a,0x3ce612b8,0x61bd7372,0x873150a1,0xb662f568,0xf4684530 },
+ { 0x401896f6,0x8833950b,0x77f3e090,0xe11cb89a,0x48e7f4a5,0xb2f12cac,
+ 0xf606677e,0x313dd769,0x16579f93,0xfdcf08b3,0x46b8f22b,0x6429cec9 } },
+ /* 28 */
+ { { 0xbb75f9a4,0x4984dd54,0x29d3b570,0x4aef06b9,0x3d6e4c1e,0xb5f84ca2,
+ 0xb083ef35,0x24c61c11,0x392ca9ff,0xce4a7392,0x6730a800,0x865d6517 },
+ { 0x722b4a2b,0xca3dfe76,0x7b083e0e,0x12c04bf9,0x1b86b8a5,0x803ce5b5,
+ 0x6a7e3e0c,0x3fc7632d,0xc81adbe4,0xc89970c2,0x120e16b1,0x3cbcd3ad } },
+ /* 29 */
+ { { 0xec30ce93,0xfbfb4cc7,0xb72720a2,0x10ed6c7d,0x47b55500,0xec675bf7,
+ 0x333ff7c3,0x90725903,0x5075bfc0,0xc7c3973e,0x07acf31b,0xb049ecb0 },
+ { 0x4f58839c,0xb4076eaf,0xa2b05e4f,0x101896da,0xab40c66e,0x3f6033b0,
+ 0xc8d864ba,0x19ee9eeb,0x47bf6d2a,0xeb6cf155,0xf826477d,0x8e5a9663 } },
+ /* 30 */
+ { { 0xf7fbd5e1,0x69e62fdd,0x76912b1d,0x38ecfe54,0xd1da3bfb,0x845a3d56,
+ 0x1c86f0d4,0x0494950e,0x3bc36ce8,0x83cadbf9,0x4fccc8d1,0x41fce572 },
+ { 0x8332c144,0x05f939c2,0x0871e46e,0xb17f248b,0x66e8aff6,0x3d8534e2,
+ 0x3b85c629,0x1d06f1dc,0xa3131b73,0xdb06a32e,0x8b3f64e5,0xf295184d } },
+ /* 31 */
+ { { 0x36ddc103,0xd9653ff7,0x95ef606f,0x25f43e37,0xfe06dce8,0x09e301fc,
+ 0x30b6eebf,0x85af2341,0x0ff56b20,0x79b12b53,0xfe9a3c6b,0x9b4fb499 },
+ { 0x51d27ac2,0x0154f892,0x56ca5389,0xd33167e3,0xafc065a6,0x7828ec1f,
+ 0x7f746c9b,0x0959a258,0x0c44f837,0xb18f1be3,0xc4132fdb,0xa7946117 } },
+ /* 32 */
+ { { 0x5e3c647b,0xc0426b77,0x8cf05348,0xbfcbd939,0x172c0d3d,0x31d312e3,
+ 0xee754737,0x5f49fde6,0x6da7ee61,0x895530f0,0xe8b3a5fb,0xcf281b0a },
+ { 0x41b8a543,0xfd149735,0x3080dd30,0x41a625a7,0x653908cf,0xe2baae07,
+ 0xba02a278,0xc3d01436,0x7b21b8f8,0xa0d0222e,0xd7ec1297,0xfdc270e9 } },
+ /* 33 */
+ { { 0xbc7f41d6,0x00873c0c,0x1b7ad641,0xd976113e,0x238443fb,0x2a536ff4,
+ 0x41e62e45,0x030d00e2,0x5f545fc6,0x532e9867,0x8e91208c,0xcd033108 },
+ { 0x9797612c,0xd1a04c99,0xeea674e2,0xd4393e02,0xe19742a1,0xd56fa69e,
+ 0x85f0590e,0xdd2ab480,0x48a2243d,0xa5cefc52,0x54383f41,0x48cc67b6 } },
+ /* 34 */
+ { { 0xfc14ab48,0x4e50430e,0x26706a74,0x195b7f4f,0xcc881ff6,0x2fe8a228,
+ 0xd945013d,0xb1b968e2,0x4b92162b,0x936aa579,0x364e754a,0x4fb766b7 },
+ { 0x31e1ff7f,0x13f93bca,0xce4f2691,0x696eb5ca,0xa2b09e02,0xff754bf8,
+ 0xe58e3ff8,0x58f13c9c,0x1678c0b0,0xb757346f,0xa86692b3,0xd54200db } },
+ /* 35 */
+ { { 0x6dda1265,0x9a030bbd,0xe89718dd,0xf7b4f3fc,0x936065b8,0xa6a4931f,
+ 0x5f72241c,0xbce72d87,0x65775857,0x6cbb51cb,0x4e993675,0xc7161815 },
+ { 0x2ee32189,0xe81a0f79,0x277dc0b2,0xef2fab26,0xb71f469f,0x9e64f6fe,
+ 0xdfdaf859,0xb448ce33,0xbe6b5df1,0x3f5c1c4c,0x1de45f7b,0xfb8dfb00 } },
+ /* 36 */
+ { { 0x4d5bb921,0xc7345fa7,0x4d2b667e,0x5c7e04be,0x282d7a3e,0x47ed3a80,
+ 0x7e47b2a4,0x5c2777f8,0x08488e2e,0x89b3b100,0xb2eb5b45,0x9aad77c2 },
+ { 0xdaac34ae,0xd681bca7,0x26afb326,0x2452e4e5,0x41a1ee14,0x0c887924,
+ 0xc2407ade,0x743b04d4,0xfc17a2ac,0xcb5e999b,0x4a701a06,0x4dca2f82 } },
+ /* 37 */
+ { { 0x1127bc1a,0x68e31ca6,0x17ead3be,0xa3edd59b,0xe25f5a15,0x67b6b645,
+ 0xa420e15e,0x76221794,0x4b1e872e,0x794fd83b,0xb2dece1b,0x7cab3f03 },
+ { 0xca9b3586,0x7119bf15,0x4d250bd7,0xa5545924,0xcc6bcf24,0x173633ea,
+ 0xb1b6f884,0x9bd308c2,0x447d38c3,0x3bae06f5,0xf341fe1c,0x54dcc135 } },
+ /* 38 */
+ { { 0x943caf0d,0x56d3598d,0x225ff133,0xce044ea9,0x563fadea,0x9edf6a7c,
+ 0x73e8dc27,0x632eb944,0x3190dcab,0x814b467e,0x6dbb1e31,0x2d4f4f31 },
+ { 0xa143b7ca,0x8d69811c,0xde7cf950,0x4ec1ac32,0x37b5fe82,0x223ab5fd,
+ 0x9390f1d9,0xe82616e4,0x75804610,0xabff4b20,0x875b08f0,0x11b9be15 } },
+ /* 39 */
+ { { 0x3bbe682c,0x4ae31a3d,0x74eef2dd,0xbc7c5d26,0x3c47dd40,0x92afd10a,
+ 0xc14ab9e1,0xec7e0a3b,0xb2e495e4,0x6a6c3dd1,0x309bcd85,0x085ee5e9 },
+ { 0x8c2e67fd,0xf381a908,0xe261eaf2,0x32083a80,0x96deee15,0x0fcd6a49,
+ 0x5e524c79,0xe3b8fb03,0x1d5b08b9,0x8dc360d9,0x7f26719f,0x3a06e2c8 } },
+ /* 40 */
+ { { 0x7237cac0,0x5cd9f5a8,0x43586794,0x93f0b59d,0xe94f6c4e,0x4384a764,
+ 0xb62782d3,0x8304ed2b,0xcde06015,0x0b8db8b3,0x5dbe190f,0x4336dd53 },
+ { 0x92ab473a,0x57443553,0xbe5ed046,0x031c7275,0x21909aa4,0x3e78678c,
+ 0x99202ddb,0x4ab7e04f,0x6977e635,0x2648d206,0x093198be,0xd427d184 } },
+ /* 41 */
+ { { 0x0f9b5a31,0x822848f5,0xbaadb62a,0xbb003468,0x3357559c,0x233a0472,
+ 0x79aee843,0x49ef6880,0xaeb9e1e3,0xa89867a0,0x1f6f9a55,0xc151931b },
+ { 0xad74251e,0xd264eb0b,0x4abf295e,0x37b9b263,0x04960d10,0xb600921b,
+ 0x4da77dc0,0x0de53dbc,0xd2b18697,0x01d9bab3,0xf7156ddf,0xad54ec7a } },
+ /* 42 */
+ { { 0x79efdc58,0x8e74dc35,0x4ff68ddb,0x456bd369,0xd32096a5,0x724e74cc,
+ 0x386783d0,0xe41cff42,0x7c70d8a4,0xa04c7f21,0xe61a19a2,0x41199d2f },
+ { 0x29c05dd2,0xd389a3e0,0xe7e3fda9,0x535f2a6b,0x7c2b4df8,0x26ecf72d,
+ 0xfe745294,0x678275f4,0x9d23f519,0x6319c9cc,0x88048fc4,0x1e05a02d } },
+ /* 43 */
+ { { 0xd4d5ffe8,0x75cc8e2e,0xdbea17f2,0xf8bb4896,0xcee3cb4a,0x35059790,
+ 0xa47c6165,0x4c06ee85,0x92935d2f,0xf98fff25,0x32ffd7c7,0x34c4a572 },
+ { 0xea0376a2,0xc4b14806,0x4f115e02,0x2ea5e750,0x1e55d7c0,0x532d76e2,
+ 0xf31044da,0x68dc9411,0x71b77993,0x9272e465,0x93a8cfd5,0xadaa38bb } },
+ /* 44 */
+ { { 0x7d4ed72a,0x4bf0c712,0xba1f79a3,0xda0e9264,0xf4c39ea4,0x48c0258b,
+ 0x2a715138,0xa5394ed8,0xbf06c660,0x4af511ce,0xec5c37cd,0xfcebceef },
+ { 0x779ae8c1,0xf23b75aa,0xad1e606e,0xdeff59cc,0x22755c82,0xf3f526fd,
+ 0xbb32cefd,0x64c5ab44,0x915bdefd,0xa96e11a2,0x1143813e,0xab19746a } },
+ /* 45 */
+ { { 0xec837d7d,0x43c78585,0xb8ee0ba4,0xca5b6fbc,0xd5dbb5ee,0x34e924d9,
+ 0xbb4f1ca5,0x3f4fa104,0x398640f7,0x15458b72,0xd7f407ea,0x4231faa9 },
+ { 0xf96e6896,0x53e0661e,0xd03b0f9d,0x554e4c69,0x9c7858d1,0xd4fcb07b,
+ 0x52cb04fa,0x7e952793,0x8974e7f7,0x5f5f1574,0x6b6d57c8,0x2e3fa558 } },
+ /* 46 */
+ { { 0x6a9951a8,0x42cd4803,0x42792ad0,0xa8b15b88,0xabb29a73,0x18e8bcf9,
+ 0x409933e8,0xbfd9a092,0xefb88dc4,0x760a3594,0x40724458,0x14418863 },
+ { 0x99caedc7,0x162a56ee,0x91d101c9,0x8fb12ecd,0x393202da,0xea671967,
+ 0xa4ccd796,0x1aac8c4a,0x1cf185a8,0x7db05036,0x8cfd095a,0x0c9f86cd } },
+ /* 47 */
+ { { 0x10b2a556,0x9a728147,0x327b70b2,0x767ca964,0x5e3799b7,0x04ed9e12,
+ 0x22a3eb2a,0x6781d2dc,0x0d9450ac,0x5bd116eb,0xa7ebe08a,0xeccac1fc },
+ { 0xdc2d6e94,0xde68444f,0x35ecf21b,0x3621f429,0x29e03a2c,0x14e2d543,
+ 0x7d3e7f0a,0x53e42cd5,0x73ed00b9,0xbba26c09,0xc57d2272,0x00297c39 } },
+ /* 48 */
+ { { 0xb8243a7d,0x3aaaab10,0x8fa58c5b,0x6eeef93e,0x9ae7f764,0xf866fca3,
+ 0x61ab04d3,0x64105a26,0x03945d66,0xa3578d8a,0x791b848c,0xb08cd3e4 },
+ { 0x756d2411,0x45edc5f8,0xa755128c,0xd4a790d9,0x49e5f6a0,0xc2cf0963,
+ 0xf649beaa,0xc66d267d,0x8467039e,0x3ce6d968,0x42f7816f,0x50046c6b } },
+ /* 49 */
+ { { 0x66425043,0x92ae1602,0xf08db890,0x1ff66afd,0x8f162ce5,0x386f5a7f,
+ 0xfcf5598f,0x18d2dea0,0x1a8ca18e,0x78372b3a,0x8cd0e6f7,0xdf0d20eb },
+ { 0x75bb4045,0x7edd5e1d,0xb96d94b7,0x252a47ce,0x2c626776,0xbdb29358,
+ 0x40dd1031,0x853c3943,0x7d5f47fd,0x9dc9becf,0xbae4044a,0x27c2302f } },
+ /* 50 */
+ { { 0x8f2d49ce,0x2d1d208a,0x162df0a2,0x0d91aa02,0x09a07f65,0x9c5cce87,
+ 0x84339012,0xdf07238b,0x419442cd,0x5028e2c8,0x72062aba,0x2dcbd358 },
+ { 0xe4680967,0xb5fbc3cb,0x9f92d72c,0x2a7bc645,0x116c369d,0x806c76e1,
+ 0x3177e8d8,0x5c50677a,0x4569df57,0x753739eb,0x36c3f40b,0x2d481ef6 } },
+ /* 51 */
+ { { 0xfea1103e,0x1a2d39fd,0x95f81b17,0xeaae5592,0xf59b264a,0xdbd0aa18,
+ 0xcb592ee0,0x90c39c1a,0x9750cca3,0xdf62f80d,0xdf97cc6c,0xda4d8283 },
+ { 0x1e201067,0x0a6dd346,0x69fb1f6b,0x1531f859,0x1d60121f,0x4895e552,
+ 0x4c041c91,0x0b21aab0,0xbcc1ccf8,0x9d896c46,0x3141bde7,0xd24da3b3 } },
+ /* 52 */
+ { { 0x53b0a354,0x575a0537,0x0c6ddcd8,0x392ff2f4,0x56157b94,0x0b8e8cff,
+ 0x3b1b80d1,0x073e57bd,0x3fedee15,0x2a75e0f0,0xaa8e6f19,0x752380e4 },
+ { 0x6558ffe9,0x1f4e227c,0x19ec5415,0x3a348618,0xf7997085,0xab382d5e,
+ 0xddc46ac2,0x5e6deaff,0xfc8d094c,0xe5144078,0xf60e37c6,0xf674fe51 } },
+ /* 53 */
+ { { 0xaf63408f,0x6fb87ae5,0xcd75a737,0xa39c36a9,0xcf4c618d,0x7833313f,
+ 0xf034c88d,0xfbcd4482,0x39b35288,0x4469a761,0x66b5d9c9,0x77a711c5 },
+ { 0x944f8d65,0x4a695dc7,0x161aaba8,0xe6da5f65,0x24601669,0x8654e9c3,
+ 0x28ae7491,0xbc8b93f5,0x8f5580d8,0x5f1d1e83,0xcea32cc8,0x8ccf9a1a } },
+ /* 54 */
+ { { 0x7196fee2,0x28ab110c,0x874c8945,0x75799d63,0x29aedadd,0xa2629348,
+ 0x2be88ff4,0x9714cc7b,0xd58d60d6,0xf71293cf,0x32a564e9,0xda6b6cb3 },
+ { 0x3dd821c2,0xf43fddb1,0x90dd323d,0xf2f2785f,0x048489f8,0x91246419,
+ 0xd24c6749,0x61660f26,0xc803c15c,0x961d9e8c,0xfaadc4c9,0x631c6158 } },
+ /* 55 */
+ { { 0xfd752366,0xacf2ebe0,0x139be88b,0xb93c340e,0x0f20179e,0x98f66485,
+ 0xff1da785,0x14820254,0x4f85c16e,0x5278e276,0x7aab1913,0xa246ee45 },
+ { 0x53763b33,0x43861eb4,0x45c0bc0d,0xc49f03fc,0xad6b1ea1,0xafff16bc,
+ 0x6fd49c99,0xce33908b,0xf7fde8c3,0x5c51e9bf,0xff142c5e,0x076a7a39 } },
+ /* 56 */
+ { { 0x9e338d10,0x04639dfe,0xf42b411b,0x8ee6996f,0xa875cef2,0x960461d1,
+ 0x95b4d0ba,0x1057b6d6,0xa906e0bc,0x27639252,0xe1c20f8a,0x2c19f09a },
+ { 0xeef4c43d,0x5b8fc3f0,0x07a84aa9,0xe2e1b1a8,0x835d2bdb,0x5f455528,
+ 0x207132dd,0x0f4aee4d,0x3907f675,0xe9f8338c,0x0e0531f0,0x7a874dc9 } },
+ /* 57 */
+ { { 0x97c27050,0x84b22d45,0x59e70bf8,0xbd0b8df7,0x79738b9b,0xb4d67405,
+ 0xcd917c4f,0x47f4d5f5,0x13ce6e33,0x9099c4ce,0x521d0f8b,0x942bfd39 },
+ { 0xa43b566d,0x5028f0f6,0x21bff7de,0xaf6e8669,0xc44232cd,0x83f6f856,
+ 0xf915069a,0x65680579,0xecfecb85,0xd12095a2,0xdb01ba16,0xcf7f06ae } },
+ /* 58 */
+ { { 0x8ef96c80,0x0f56e3c4,0x3ddb609c,0xd521f2b3,0x7dc1450d,0x2be94102,
+ 0x02a91fe2,0x2d21a071,0x1efa37de,0x2e6f74fa,0x156c28a1,0x9a9a90b8 },
+ { 0x9dc7dfcb,0xc54ea9ea,0x2c2c1d62,0xc74e66fc,0x49d3e067,0x9f23f967,
+ 0x54dd38ad,0x1c7c3a46,0x5946cee3,0xc7005884,0x45cc045d,0x89856368 } },
+ /* 59 */
+ { { 0xfce73946,0x29da7cd4,0x23168563,0x8f697db5,0xcba92ec6,0x8e235e9c,
+ 0x9f91d3ea,0x55d4655f,0xaa50a6cd,0xf3689f23,0x21e6a1a0,0xdcf21c26 },
+ { 0x61b818bf,0xcffbc82e,0xda47a243,0xc74a2f96,0x8bc1a0cf,0x234e980a,
+ 0x7929cb6d,0xf35fd6b5,0xefe17d6c,0x81468e12,0x58b2dafb,0xddea6ae5 } },
+ /* 60 */
+ { { 0x7e787b2e,0x294de887,0x39a9310d,0x258acc1f,0xac14265d,0x92d9714a,
+ 0x708b48a0,0x18b5591c,0xe1abbf71,0x27cc6bb0,0x568307b9,0xc0581fa3 },
+ { 0xf24d4d58,0x9e0f58a3,0xe0ce2327,0xfebe9bb8,0x9d1be702,0x91fd6a41,
+ 0xfacac993,0x9a7d8a45,0x9e50d66d,0xabc0a08c,0x06498201,0x02c342f7 } },
+ /* 61 */
+ { { 0x157bdbc2,0xccd71407,0xad0e1605,0x72fa89c6,0xb92a015f,0xb1d3da2b,
+ 0xa0a3fe56,0x8ad9e7cd,0x24f06737,0x160edcbd,0x61275be6,0x79d4db33 },
+ { 0x5f3497c4,0xd3d31fd9,0x04192fb0,0x8cafeaee,0x13a50af3,0xe13ca745,
+ 0x8c85aae5,0x18826167,0x9eb556ff,0xce06cea8,0xbdb549f3,0x2eef1995 } },
+ /* 62 */
+ { { 0x50596edc,0x8ed7d3eb,0x905243a2,0xaa359362,0xa4b6d02b,0xa212c2c2,
+ 0xc4fbec68,0x611fd727,0xb84f733d,0x8a0b8ff7,0x5f0daf0e,0xd85a6b90 },
+ { 0xd4091cf7,0x60e899f5,0x2eff2768,0x4fef2b67,0x10c33964,0xc1f195cb,
+ 0x93626a8f,0x8275d369,0x0d6c840a,0xc77904f4,0x7a868acd,0x88d8b7fd } },
+ /* 63 */
+ { { 0x7bd98425,0x85f23723,0xc70b154e,0xd4463992,0x96687a2e,0xcbb00ee2,
+ 0xc83214fd,0x905fdbf7,0x13593684,0x2019d293,0xef51218e,0x0428c393 },
+ { 0x981e909a,0x40c7623f,0x7be192da,0x92513385,0x4010907e,0x48fe480f,
+ 0x3120b459,0xdd7a187c,0xa1fd8f3c,0xc9d7702d,0xe358efc5,0x66e4753b } },
+ /* 64 */
+ { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9,
+ 0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 },
+ { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1,
+ 0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } },
+ /* 65 */
+ { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc,
+ 0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a },
+ { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18,
+ 0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } },
+ /* 66 */
+ { { 0xc6a2123f,0xb1a358f5,0xfe28df6d,0x927b2d95,0xf199d2f9,0x89702753,
+ 0x1a3f82dc,0x0a73754c,0x777affe1,0x063d029d,0xdae6d34d,0x5439817e },
+ { 0x6b8b83c4,0xf7979eef,0x9d945682,0x615cb214,0xc5e57eae,0x8f0e4fac,
+ 0x113047dd,0x042b89b8,0x93f36508,0x888356dc,0x5fd1f32f,0xbf008d18 } },
+ /* 67 */
+ { { 0x4e8068db,0x8012aa24,0xa5729a47,0xc72cc641,0x43f0691d,0x3c33df2c,
+ 0x1d92145f,0xfa057347,0xb97f7946,0xaefc0f2f,0x2f8121bf,0x813d75cb },
+ { 0x4383bba6,0x05613c72,0xa4224b3f,0xa924ce70,0x5f2179a6,0xe59cecbe,
+ 0x79f62b61,0x78e2e8aa,0x53ad8079,0x3ac2cc3b,0xd8f4fa96,0x55518d71 } },
+ /* 68 */
+ { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247,
+ 0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 },
+ { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d,
+ 0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } },
+ /* 69 */
+ { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12,
+ 0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e },
+ { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f,
+ 0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } },
+ /* 70 */
+ { { 0xb0ab9645,0xb5e405d3,0xd5f1f711,0xaeec7f98,0x585c2a6e,0x8ad42311,
+ 0x512c6944,0x045acb9e,0xa90db1c6,0xae106c4e,0x898e6563,0xb89f33d5 },
+ { 0x7fed2ce4,0x43b07cd9,0xdd815b20,0xf9934e17,0x0a81a349,0x6778d4d5,
+ 0x52918061,0x9e616ade,0xd7e67112,0xfa06db06,0x88488091,0x1da23cf1 } },
+ /* 71 */
+ { { 0x42f2c4b5,0x821c46b3,0x66059e47,0x931513ef,0x66f50cd1,0x7030ae43,
+ 0x43e7b127,0x43b536c9,0x5fca5360,0x006258cf,0x6b557abf,0xe4e3ee79 },
+ { 0x24c8b22f,0xbb6b3900,0xfcbf1054,0x2eb5e2c1,0x567492af,0x937b18c9,
+ 0xacf53957,0xf09432e4,0x1dbf3a56,0x585f5a9d,0xbe0887cf,0xf86751fd } },
+ /* 72 */
+ { { 0x9d10e0b2,0x157399cb,0x60dc51b7,0x1c0d5956,0x1f583090,0x1d496b8a,
+ 0x88590484,0x6658bc26,0x03213f28,0x88c08ab7,0x7ae58de4,0x8d2e0f73 },
+ { 0x486cfee6,0x9b79bc95,0xe9e5bc57,0x036a26c7,0xcd8ae97a,0x1ad03601,
+ 0xff3a0494,0x06907f87,0x2c7eb584,0x078f4bbf,0x7e8d0a5a,0xe3731bf5 } },
+ /* 73 */
+ { { 0xe1cd0abe,0x72f2282b,0x87efefa2,0xd4f9015e,0x6c3834bd,0x9d189806,
+ 0xb8a29ced,0x9c8cdcc1,0xfee82ebc,0x0601b9f4,0x7206a756,0x371052bc },
+ { 0x46f32562,0x76fa1092,0x17351bb4,0xdaad534c,0xb3636bb5,0xc3d64c37,
+ 0x45d54e00,0x038a8c51,0x32c09e7c,0x301e6180,0x95735151,0x9764eae7 } },
+ /* 74 */
+ { { 0xcbd5256a,0x8791b19f,0x6ca13a3b,0x4007e0f2,0x4cf06904,0x03b79460,
+ 0xb6c17589,0xb18a9c22,0x81d45908,0xa1cb7d7d,0x21bb68f1,0x6e13fa9d },
+ { 0xa71e6e16,0x47183c62,0xe18749ed,0x5cf0ef8e,0x2e5ed409,0x2c9c7f9b,
+ 0xe6e117e1,0x042eeacc,0x13fb5a7f,0xb86d4816,0xc9e5feb1,0xea1cf0ed } },
+ /* 75 */
+ { { 0xcea4cc9b,0x6e6573c9,0xafcec8f3,0x5417961d,0xa438b6f6,0x804bf02a,
+ 0xdcd4ea88,0xb894b03c,0x3799571f,0xd0f807e9,0x862156e8,0x3466a7f5 },
+ { 0x56515664,0x51e59acd,0xa3c5eb0b,0x55b0f93c,0x6a4279db,0x84a06b02,
+ 0xc5fae08e,0x5c850579,0xa663a1a2,0xcf07b8db,0xf46ffc8d,0x49a36bbc } },
+ /* 76 */
+ { { 0x46d93106,0xe47f5acc,0xaa897c9c,0x65b7ade0,0x12d7e4be,0x37cf4c94,
+ 0xd4b2caa9,0xa2ae9b80,0xe60357a3,0x5e7ce09c,0xc8ecd5f9,0x29f77667 },
+ { 0xa8a0b1c5,0xdf6868f5,0x62978ad8,0x240858cf,0xdc0002a1,0x0f7ac101,
+ 0xffe9aa05,0x1d28a9d7,0x5b962c97,0x744984d6,0x3d28c8b2,0xa8a7c00b } },
+ /* 77 */
+ { { 0xae11a338,0x7c58a852,0xd1af96e7,0xa78613f1,0x5355cc73,0x7e9767d2,
+ 0x792a2de6,0x6ba37009,0x124386b2,0x7d60f618,0x11157674,0xab09b531 },
+ { 0x98eb9dd0,0x95a04841,0x15070328,0xe6c17acc,0x489c6e49,0xafc6da45,
+ 0xbb211530,0xab45a60a,0x7d7ea933,0xc58d6592,0x095642c6,0xa3ef3c65 } },
+ /* 78 */
+ { { 0xdf010879,0x89d420e9,0x39576179,0x9d25255d,0xe39513b6,0x9cdefd50,
+ 0xd5d1c313,0xe4efe45b,0x3f7af771,0xc0149de7,0x340ab06b,0x55a6b4f4 },
+ { 0xebeaf771,0xf1325251,0x878d4288,0x2ab44128,0x18e05afe,0xfcd5832e,
+ 0xcc1fb62b,0xef52a348,0xc1c4792a,0x2bd08274,0x877c6dc7,0x345c5846 } },
+ /* 79 */
+ { { 0xbea65e90,0xde15ceb0,0x2416d99c,0x0987f72b,0xfd863dec,0x44db578d,
+ 0xac6a3578,0xf617b74b,0xdb48e999,0x9e62bd7a,0xeab1a1be,0x877cae61 },
+ { 0x3a358610,0x23adddaa,0x325e2b07,0x2fc4d6d1,0x1585754e,0x897198f5,
+ 0xb392b584,0xf741852c,0xb55f7de1,0x9927804c,0x1aa8efae,0xe9e6c4ed } },
+ /* 80 */
+ { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe,
+ 0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 },
+ { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6,
+ 0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } },
+ /* 81 */
+ { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6,
+ 0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 },
+ { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf,
+ 0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } },
+ /* 82 */
+ { { 0xf8e60f5b,0x511188b4,0x48aa2ada,0x7fe67015,0x381abca2,0xdb333cb8,
+ 0xdaf3fc97,0xb15e6d9d,0x36aabc03,0x4b24f6eb,0x72a748b4,0xc59789df },
+ { 0x29cf5279,0x26fcb8a5,0x01ad9a6c,0x7a3c6bfc,0x4b8bac9b,0x866cf88d,
+ 0x9c80d041,0xf4c89989,0x70add148,0xf0a04241,0x45d81a41,0x5a02f479 } },
+ /* 83 */
+ { { 0xc1c90202,0xfa5c877c,0xf8ac7570,0xd099d440,0xd17881f7,0x428a5b1b,
+ 0x5b2501d7,0x61e267db,0xf2e4465b,0xf889bf04,0x76aa4cb8,0x4da3ae08 },
+ { 0xe3e66861,0x3ef0fe26,0x3318b86d,0x5e772953,0x747396df,0xc3c35fbc,
+ 0x439ffd37,0x5115a29c,0xb2d70374,0xbfc4bd97,0x56246b9d,0x088630ea } },
+ /* 84 */
+ { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53,
+ 0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 },
+ { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370,
+ 0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } },
+ /* 85 */
+ { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f,
+ 0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc },
+ { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2,
+ 0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } },
+ /* 86 */
+ { { 0xfecf5b9b,0x34e0f9d1,0xf206966a,0xa115b12b,0x1eaa0534,0x5591cf3b,
+ 0xfb1558f9,0x5f0293cb,0x1bc703a5,0x1c8507a4,0x862c1f81,0x92e6b81c },
+ { 0xcdaf24e3,0xcc9ebc66,0x72fcfc70,0x68917ecd,0x8157ba48,0x6dc9a930,
+ 0xb06ab2b2,0x5d425c08,0x36e929c4,0x362f8ce7,0x62e89324,0x09f6f57c } },
+ /* 87 */
+ { { 0xd29375fb,0x1c7d6b78,0xe35d1157,0xfabd851e,0x4243ea47,0xf6f62dcd,
+ 0x8fe30b0f,0x1dd92460,0xffc6e709,0x08166dfa,0x0881e6a7,0xc6c4c693 },
+ { 0xd6a53fb0,0x20368f87,0x9eb4d1f9,0x38718e9f,0xafd7e790,0x03f08acd,
+ 0x72fe2a1c,0x0835eb44,0x88076e5d,0x7e050903,0xa638e731,0x538f765e } },
+ /* 88 */
+ { { 0xc2663b4b,0x0e0249d9,0x47cd38dd,0xe700ab5b,0x2c46559f,0xb192559d,
+ 0x4bcde66d,0x8f9f74a8,0x3e2aced5,0xad161523,0x3dd03a5b,0xc155c047 },
+ { 0x3be454eb,0x346a8799,0x83b7dccd,0x66ee94db,0xab9d2abe,0x1f6d8378,
+ 0x7733f355,0x4a396dd2,0xf53553c2,0x419bd40a,0x731dd943,0xd0ead98d } },
+ /* 89 */
+ { { 0xec142408,0x908e0b0e,0x4114b310,0x98943cb9,0x1742b1d7,0x03dbf7d8,
+ 0x693412f4,0xd270df6b,0x8f69e20c,0xc5065494,0x697e43a1,0xa76a90c3 },
+ { 0x4624825a,0xe0fa3384,0x8acc34c2,0x82e48c0b,0xe9a14f2b,0x7b24bd14,
+ 0x4db30803,0x4f5dd5e2,0x932da0a3,0x0c77a9e7,0x74c653dc,0x20db90f2 } },
+ /* 90 */
+ { { 0x0e6c5fd9,0x261179b7,0x6c982eea,0xf8bec123,0xd4957b7e,0x47683338,
+ 0x0a72f66a,0xcc47e664,0x1bad9350,0xbd54bf6a,0xf454e95a,0xdfbf4c6a },
+ { 0x6907f4fa,0x3f7a7afa,0x865ca735,0x7311fae0,0x2a496ada,0x24737ab8,
+ 0x15feb79b,0x13e425f1,0xa1b93c21,0xe9e97c50,0x4ddd3eb5,0xb26b6eac } },
+ /* 91 */
+ { { 0x2a2e5f2b,0x81cab9f5,0xbf385ac4,0xf93caf29,0xc909963a,0xf4bf35c3,
+ 0x74c9143c,0x081e7300,0xc281b4c5,0x3ea57fa8,0x9b340741,0xe497905c },
+ { 0x55ab3cfb,0xf556dd8a,0x518db6ad,0xd444b96b,0x5ef4b955,0x34f5425a,
+ 0xecd26aa3,0xdda7a3ac,0xda655e97,0xb57da11b,0xc2024c70,0x02da3eff } },
+ /* 92 */
+ { { 0x6481d0d9,0xe24b0036,0x818fdfe2,0x3740dbe5,0x190fda00,0xc1fc1f45,
+ 0x3cf27fde,0x329c9280,0x6934f43e,0x7435cb53,0x7884e8fe,0x2b505a5d },
+ { 0x711adcc9,0x6cfcc6a6,0x531e21e1,0xf034325c,0x9b2a8a99,0xa2f4a967,
+ 0x3c21bdff,0x9d5f3842,0x31b57d66,0xb25c7811,0x0b8093b9,0xdb5344d8 } },
+ /* 93 */
+ { { 0xae50a2f5,0x0d72e667,0xe4a861d1,0x9b7f8d8a,0x330df1cb,0xa129f70f,
+ 0xe04fefc3,0xe90aa5d7,0xe72c3ae1,0xff561ecb,0xcdb955fa,0x0d8fb428 },
+ { 0xd7663784,0xd2235f73,0x7e2c456a,0xc05baec6,0x2adbfccc,0xe5c292e4,
+ 0xefb110d5,0x4fd17988,0xd19d49f3,0x27e57734,0x84f679fe,0x188ac4ce } },
+ /* 94 */
+ { { 0xa796c53e,0x7ee344cf,0x0868009b,0xbbf6074d,0x474a1295,0x1f1594f7,
+ 0xac11632d,0x66776edc,0x04e2fa5a,0x1862278b,0xc854a89a,0x52665cf2 },
+ { 0x8104ab58,0x7e376464,0x7204fd6d,0x16775913,0x44ea1199,0x86ca06a5,
+ 0x1c9240dd,0xaa3f765b,0x24746149,0x5f8501a9,0xdcd251d7,0x7b982e30 } },
+ /* 95 */
+ { { 0xc15f3060,0xe44e9efc,0xa87ebbe6,0x5ad62f2e,0xc79500d4,0x36499d41,
+ 0x336fa9d1,0xa66d6dc0,0x5afd3b1f,0xf8afc495,0xe5c9822b,0x1d8ccb24 },
+ { 0x79d7584b,0x4031422b,0xea3f20dd,0xc54a0580,0x958468c5,0x3f837c8f,
+ 0xfbea7735,0x3d82f110,0x7dffe2fc,0x679a8778,0x20704803,0x48eba63b } },
+ /* 96 */
+ { { 0xdf46e2f6,0x89b10d41,0x19514367,0x13ab57f8,0x1d469c87,0x067372b9,
+ 0x4f6c5798,0x0c195afa,0x272c9acf,0xea43a12a,0x678abdac,0x9dadd8cb },
+ { 0xe182579a,0xcce56c6b,0x2d26c2d8,0x86febadb,0x2a44745c,0x1c668ee1,
+ 0x98dc047a,0x580acd86,0x51b9ec2d,0x5a2b79cc,0x4054f6a0,0x007da608 } },
+ /* 97 */
+ { { 0x17b00dd0,0x9e3ca352,0x0e81a7a6,0x046779cb,0xd482d871,0xb999fef3,
+ 0xd9233fbc,0xe6f38134,0xf48cd0e0,0x112c3001,0x3c6c66ae,0x934e7576 },
+ { 0xd73234dc,0xb44d4fc3,0x864eafc1,0xfcae2062,0x26bef21a,0x843afe25,
+ 0xf3b75fdf,0x61355107,0x794c2e6b,0x8367a5aa,0x8548a372,0x3d2629b1 } },
+ /* 98 */
+ { { 0x437cfaf8,0x6230618f,0x2032c299,0x5b8742cb,0x2293643a,0x949f7247,
+ 0x09464f79,0xb8040f1a,0x4f254143,0x049462d2,0x366c7e76,0xabd6b522 },
+ { 0xd5338f55,0x119b392b,0x01495a0c,0x1a80a9ce,0xf8d7537e,0xf3118ca7,
+ 0x6bf4b762,0xb715adc2,0xa8482b6c,0x24506165,0x96a7c84d,0xd958d7c6 } },
+ /* 99 */
+ { { 0xbdc21f31,0x9ad8aa87,0x8063e58c,0xadb3cab4,0xb07dd7b8,0xefd86283,
+ 0x1be7c6b4,0xc7b9b762,0x015582de,0x2ef58741,0x299addf3,0xc970c52e },
+ { 0x22f24d66,0x78f02e2a,0x74cc100a,0xefec1d10,0x09316e1a,0xaf2a6a39,
+ 0x5849dd49,0xce7c2205,0x96bffc4c,0x9c1fe75c,0x7ba06ec0,0xcad98fd2 } },
+ /* 100 */
+ { { 0xb648b73e,0xed76e2d0,0x1cfd285e,0xa9f92ce5,0x2ed13de1,0xa8c86c06,
+ 0xa5191a93,0x1d3a574e,0x1ad1b8bf,0x385cdf8b,0x47d2cfe3,0xbbecc28a },
+ { 0x69cec548,0x98d326c0,0xf240a0b2,0x4f5bc1dd,0x29057236,0x241a7062,
+ 0xc68294a4,0x0fc6e9c5,0xa319f17a,0x4d04838b,0x9ffc1c6f,0x8b612cf1 } },
+ /* 101 */
+ { { 0x4c3830eb,0x9bb0b501,0x8ee0d0c5,0x3d08f83c,0x79ba9389,0xa4a62642,
+ 0x9cbc2914,0x5d5d4044,0x074c46f0,0xae9eb83e,0x74ead7d6,0x63bb758f },
+ { 0xc6bb29e0,0x1c40d2ea,0x4b02f41e,0x95aa2d87,0x53cb199a,0x92989175,
+ 0x51584f6d,0xdd91bafe,0x31a1aaec,0x3715efb9,0x46780f9e,0xc1b6ae5b } },
+ /* 102 */
+ { { 0x42772f41,0xcded3e4b,0x3bcb79d1,0x3a700d5d,0x80feee60,0x4430d50e,
+ 0xf5e5d4bb,0x444ef1fc,0xe6e358ff,0xc660194f,0x6a91b43c,0xe68a2f32 },
+ { 0x977fe4d2,0x5842775c,0x7e2a41eb,0x78fdef5c,0xff8df00e,0x5f3bec02,
+ 0x5852525d,0xf4b840cd,0x4e6988bd,0x0870483a,0xcc64b837,0x39499e39 } },
+ /* 103 */
+ { { 0xb08df5fe,0xfc05de80,0x63ba0362,0x0c12957c,0xd5cf1428,0xea379414,
+ 0x54ef6216,0xc559132a,0xb9e65cf8,0x33d5f12f,0x1695d663,0x09c60278 },
+ { 0x61f7a2fb,0x3ac1ced4,0xd4f5eeb8,0xdd838444,0x8318fcad,0x82a38c6c,
+ 0xe9f1a864,0x315be2e5,0x442daf47,0x317b5771,0x95aa5f9e,0x81b5904a } },
+ /* 104 */
+ { { 0x8b21d232,0x6b6b1c50,0x8c2cba75,0x87f3dbc0,0xae9f0faf,0xa7e74b46,
+ 0xbb7b8079,0x036a0985,0x8d974a25,0x4f185b90,0xd9af5ec9,0x5aa7cef0 },
+ { 0x57dcfffc,0xe0566a70,0xb8453225,0x6ea311da,0x23368aa9,0x72ea1a8d,
+ 0x48cd552d,0xed9b2083,0xc80ea435,0xb987967c,0x6c104173,0xad735c75 } },
+ /* 105 */
+ { { 0xcee76ef4,0xaea85ab3,0xaf1d2b93,0x44997444,0xeacb923f,0x0851929b,
+ 0x51e3bc0c,0xb080b590,0x59be68a2,0xc4ee1d86,0x64b26cda,0xf00de219 },
+ { 0xf2e90d4d,0x8d7fb5c0,0x77d9ec64,0x00e219a7,0x5d1c491c,0xc4e6febd,
+ 0x1a8f4585,0x080e3754,0x48d2af9c,0x4a9b86c8,0xb6679851,0x2ed70db6 } },
+ /* 106 */
+ { { 0x586f25cb,0xaee44116,0xa0fcf70f,0xf7b6861f,0x18a350e8,0x55d2cd20,
+ 0x92dc286f,0x861bf3e5,0x6226aba7,0x9ab18ffa,0xa9857b03,0xd15827be },
+ { 0x92e6acef,0x26c1f547,0xac1fbac3,0x422c63c8,0xfcbfd71d,0xa2d8760d,
+ 0xb2511224,0x35f6a539,0x048d1a21,0xbaa88fa1,0xebf999db,0x49f1abe9 } },
+ /* 107 */
+ { { 0xf7492b73,0x16f9f4f4,0xcb392b1a,0xcf28ec1e,0x69ca6ffc,0x45b130d4,
+ 0xb72efa58,0x28ba8d40,0x5ca066f5,0xace987c7,0x4ad022eb,0x3e399246 },
+ { 0x752555bb,0x63a2d84e,0x9c2ae394,0xaaa93b4a,0xc89539ca,0xcd80424e,
+ 0xaa119a99,0x6d6b5a6d,0x379f2629,0xbd50334c,0xef3cc7d3,0x899e925e } },
+ /* 108 */
+ { { 0xbf825dc4,0xb7ff3651,0x40b9c462,0x0f741cc4,0x5cc4fb5b,0x771ff5a9,
+ 0x47fd56fe,0xcb9e9c9b,0x5626c0d3,0xbdf053db,0xf7e14098,0xa97ce675 },
+ { 0x6c934f5e,0x68afe5a3,0xccefc46f,0x6cd5e148,0xd7a88586,0xc7758570,
+ 0xdd558d40,0x49978f5e,0x64ae00c1,0xa1d5088a,0xf1d65bb2,0x58f2a720 } },
+ /* 109 */
+ { { 0x3e4daedb,0x66fdda4a,0x65d1b052,0x38318c12,0x4c4bbf5c,0x28d910a2,
+ 0x78a9cd14,0x762fe5c4,0xd2cc0aee,0x08e5ebaa,0xca0c654c,0xd2cdf257 },
+ { 0x08b717d2,0x48f7c58b,0x386cd07a,0x3807184a,0xae7d0112,0x3240f626,
+ 0xc43917b0,0x03e9361b,0x20aea018,0xf261a876,0x7e1e6372,0x53f556a4 } },
+ /* 110 */
+ { { 0x2f512a90,0xc84cee56,0x1b0ea9f1,0x24b3c004,0xe26cc1ea,0x0ee15d2d,
+ 0xf0c9ef7d,0xd848762c,0xd5341435,0x1026e9c5,0xfdb16b31,0x8f5b73dc },
+ { 0xd2c75d95,0x1f69bef2,0xbe064dda,0x8d33d581,0x57ed35e6,0x8c024c12,
+ 0xc309c281,0xf8d435f9,0xd6960193,0xfd295061,0xe9e49541,0x66618d78 } },
+ /* 111 */
+ { { 0x8ce382de,0x571cfd45,0xde900dde,0x175806ee,0x34aba3b5,0x61849965,
+ 0xde7aec95,0xe899778a,0xff4aa97f,0xe8f00f6e,0x010b0c6d,0xae971cb5 },
+ { 0x3af788f1,0x1827eebc,0xe413fe2d,0xd46229ff,0x4741c9b4,0x8a15455b,
+ 0xf8e424eb,0x5f02e690,0xdae87712,0x40a1202e,0x64944f6d,0x49b3bda2 } },
+ /* 112 */
+ { { 0x035b2d69,0xd63c6067,0x6bed91b0,0xb507150d,0x7afb39b2,0x1f35f82f,
+ 0x16012b66,0xb9bd9c01,0xed0a5f50,0x00d97960,0x2716f7c9,0xed705451 },
+ { 0x127abdb4,0x1576eff4,0xf01e701c,0x6850d698,0x3fc87e2f,0x9fa7d749,
+ 0xb0ce3e48,0x0b6bcc6f,0xf7d8c1c0,0xf4fbe1f5,0x02719cc6,0xcf75230e } },
+ /* 113 */
+ { { 0x722d94ed,0x6761d6c2,0x3718820e,0xd1ec3f21,0x25d0e7c6,0x65a40b70,
+ 0xbaf3cf31,0xd67f830e,0xb93ea430,0x633b3807,0x0bc96c69,0x17faa0ea },
+ { 0xdf866b98,0xe6bf3482,0xa9db52d4,0x205c1ee9,0xff9ab869,0x51ef9bbd,
+ 0x75eeb985,0x3863dad1,0xd3cf442a,0xef216c3b,0xf9c8e321,0x3fb228e3 } },
+ /* 114 */
+ { { 0x0760ac07,0x94f9b70c,0x9d79bf4d,0xf3c9ccae,0xc5ffc83d,0x73cea084,
+ 0xdc49c38e,0xef50f943,0xbc9e7330,0xf467a2ae,0x44ea7fba,0x5ee534b6 },
+ { 0x03609e7f,0x20cb6272,0x62fdc9f0,0x09844355,0x0f1457f7,0xaf5c8e58,
+ 0xb4b25941,0xd1f50a6c,0x2ec82395,0x77cb247c,0xda3dca33,0xa5f3e1e5 } },
+ /* 115 */
+ { { 0x7d85fa94,0x023489d6,0x2db9ce47,0x0ba40537,0xaed7aad1,0x0fdf7a1f,
+ 0x9a4ccb40,0xa57b0d73,0x5b18967c,0x48fcec99,0xb7274d24,0xf30b5b6e },
+ { 0xc81c5338,0x7ccb4773,0xa3ed6bd0,0xb85639e6,0x1d56eada,0x7d9df95f,
+ 0x0a1607ad,0xe256d57f,0x957574d6,0x6da7ffdc,0x01c7a8c4,0x65f84046 } },
+ /* 116 */
+ { { 0xcba1e7f1,0x8d45d0cb,0x02b55f64,0xef0a08c0,0x17e19892,0x771ca31b,
+ 0x4885907e,0xe1843ecb,0x364ce16a,0x67797ebc,0x8df4b338,0x816d2b2d },
+ { 0x39aa8671,0xe870b0e5,0xc102b5f5,0x9f0db3e4,0x1720c697,0x34296659,
+ 0x613c0d2a,0x0ad4c89e,0x418ddd61,0x1af900b2,0xd336e20e,0xe087ca72 } },
+ /* 117 */
+ { { 0xaba10079,0x222831ff,0x6d64fff2,0x0dc5f87b,0x3e8cb330,0x44547907,
+ 0x702a33fb,0xe815aaa2,0x5fba3215,0x338d6b2e,0x79f549c8,0x0f7535cb },
+ { 0x2ee95923,0x471ecd97,0xc6d1c09f,0x1e868b37,0xc666ef4e,0x2bc7b8ec,
+ 0x808a4bfc,0xf5416589,0x3fbc4d2e,0xf23e9ee2,0x2d75125b,0x4357236c } },
+ /* 118 */
+ { { 0xba9cdb1b,0xfe176d95,0x2f82791e,0x45a1ca01,0x4de4cca2,0x97654af2,
+ 0x5cc4bcb9,0xbdbf9d0e,0xad97ac0a,0xf6a7df50,0x61359fd6,0xc52112b0 },
+ { 0x4f05eae3,0x696d9ce3,0xe943ac2b,0x903adc02,0x0848be17,0xa9075347,
+ 0x2a3973e5,0x1e20f170,0x6feb67e9,0xe1aacc1c,0xe16bc6b9,0x2ca0ac32 } },
+ /* 119 */
+ { { 0xef871eb5,0xffea12e4,0xa8bf0a7a,0x94c2f25d,0x78134eaa,0x4d1e4c2a,
+ 0x0360fb10,0x11ed16fb,0x85fc11be,0x4029b6db,0xf4d390fa,0x5e9f7ab7 },
+ { 0x30646612,0x5076d72f,0xdda1d0d8,0xa0afed1d,0x85a1d103,0x29022257,
+ 0x4e276bcd,0xcb499e17,0x51246c3d,0x16d1da71,0x589a0443,0xc72d56d3 } },
+ /* 120 */
+ { { 0xdae5bb45,0xdf5ffc74,0x261bd6dc,0x99068c4a,0xaa98ec7b,0xdc0afa7a,
+ 0xf121e96d,0xedd2ee00,0x1414045c,0x163cc7be,0x335af50e,0xb0b1bbce },
+ { 0x01a06293,0xd440d785,0x6552e644,0xcdebab7c,0x8c757e46,0x48cb8dbc,
+ 0x3cabe3cb,0x81f9cf78,0xb123f59a,0xddd02611,0xeeb3784d,0x3dc7b88e } },
+ /* 121 */
+ { { 0xc4741456,0xe1b8d398,0x6032a121,0xa9dfa902,0x1263245b,0x1cbfc86d,
+ 0x5244718c,0xf411c762,0x05b0fc54,0x96521d54,0xdbaa4985,0x1afab46e },
+ { 0x8674b4ad,0xa75902ba,0x5ad87d12,0x486b43ad,0x36e0d099,0x72b1c736,
+ 0xbb6cd6d6,0x39890e07,0x59bace4e,0x8128999c,0x7b535e33,0xd8da430b } },
+ /* 122 */
+ { { 0xc6b75791,0x39f65642,0x21806bfb,0x050947a6,0x1362ef84,0x0ca3e370,
+ 0x8c3d2391,0x9bc60aed,0x732e1ddc,0x9b488671,0xa98ee077,0x12d10d9e },
+ { 0x3651b7dc,0xb6f2822d,0x80abd138,0x6345a5ba,0x472d3c84,0x62033262,
+ 0xacc57527,0xd54a1d40,0x424447cb,0x6ea46b3a,0x2fb1a496,0x5bc41057 } },
+ /* 123 */
+ { { 0xa751cd0e,0xe70c57a3,0xeba3c7d6,0x190d8419,0x9d47d55a,0xb1c3bee7,
+ 0xf912c6d8,0xda941266,0x407a6ad6,0x12e9aacc,0x6e838911,0xd6ce5f11 },
+ { 0x70e1f2ce,0x063ca97b,0x8213d434,0xa3e47c72,0x84df810a,0xa016e241,
+ 0xdfd881a4,0x688ad7b0,0xa89bf0ad,0xa37d99fc,0xa23c2d23,0xd8e3f339 } },
+ /* 124 */
+ { { 0x750bed6f,0xbdf53163,0x83e68b0a,0x808abc32,0x5bb08a33,0x85a36627,
+ 0x6b0e4abe,0xf72a3a0f,0xfaf0c6ad,0xf7716d19,0x5379b25f,0x22dcc020 },
+ { 0xf9a56e11,0x7400bf8d,0x56a47f21,0x6cb8bad7,0x7a6eb644,0x7c97176f,
+ 0xd1f5b646,0xe8fd84f7,0x44ddb054,0x98320a94,0x1dde86f5,0x07071ba3 } },
+ /* 125 */
+ { { 0x98f8fcb9,0x6fdfa0e5,0x94d0d70c,0x89cec8e0,0x106d20a8,0xa0899397,
+ 0xba8acc9c,0x915bfb9a,0x5507e01c,0x1370c94b,0x8a821ffb,0x83246a60 },
+ { 0xbe3c378f,0xa8273a9f,0x35a25be9,0x7e544789,0x4dd929d7,0x6cfa4972,
+ 0x365bd878,0x987fed9d,0x5c29a7ae,0x4982ac94,0x5ddd7ec5,0x4589a5d7 } },
+ /* 126 */
+ { { 0xa95540a9,0x9fabb174,0x0162c5b0,0x7cfb886f,0xea3dee18,0x17be766b,
+ 0xe88e624c,0xff7da41f,0x8b919c38,0xad0b71eb,0xf31ff9a9,0x86a522e0 },
+ { 0x868bc259,0xbc8e6f72,0x3ccef9e4,0x6130c638,0x9a466555,0x09f1f454,
+ 0x19b2bfb4,0x8e6c0f09,0x0ca7bb22,0x945c46c9,0x4dafb67b,0xacd87168 } },
+ /* 127 */
+ { { 0x10c53841,0x090c72ca,0x55a4fced,0xc20ae01b,0xe10234ad,0x03f7ebd5,
+ 0x85892064,0xb3f42a6a,0xb4a14722,0xbdbc30c0,0x8ca124cc,0x971bc437 },
+ { 0x517ff2ff,0x6f79f46d,0xecba947b,0x6a9c96e2,0x62925122,0x5e79f2f4,
+ 0x6a4e91f1,0x30a96bb1,0x2d4c72da,0x1147c923,0x5811e4df,0x65bc311f } },
+ /* 128 */
+ { { 0x139b3239,0x87c7dd7d,0x4d833bae,0x8b57824e,0x9fff0015,0xbcbc4878,
+ 0x909eaf1a,0x8ffcef8b,0xf1443a78,0x9905f4ee,0xe15cbfed,0x020dd4a2 },
+ { 0xa306d695,0xca2969ec,0xb93caf60,0xdf940cad,0x87ea6e39,0x67f7fab7,
+ 0xf98c4fe5,0x0d0ee10f,0xc19cb91e,0xc646879a,0x7d1d7ab4,0x4b4ea50c } },
+ /* 129 */
+ { { 0x7a0db57e,0x19e40945,0x9a8c9702,0xe6017cad,0x1be5cff9,0xdbf739e5,
+ 0xa7a938a2,0x3646b3cd,0x68350dfc,0x04511085,0x56e098b5,0xad3bd6f3 },
+ { 0xee2e3e3e,0x935ebabf,0x473926cb,0xfbd01702,0x9e9fb5aa,0x7c735b02,
+ 0x2e3feff0,0xc52a1b85,0x046b405a,0x9199abd3,0x39039971,0xe306fcec } },
+ /* 130 */
+ { { 0x23e4712c,0xd6d9aec8,0xc3c198ee,0x7ca8376c,0x31bebd8a,0xe6d83187,
+ 0xd88bfef3,0xed57aff3,0xcf44edc7,0x72a645ee,0x5cbb1517,0xd4e63d0b },
+ { 0xceee0ecf,0x98ce7a1c,0x5383ee8e,0x8f012633,0xa6b455e8,0x3b879078,
+ 0xc7658c06,0xcbcd3d96,0x0783336a,0x721d6fe7,0x5a677136,0xf21a7263 } },
+ /* 131 */
+ { { 0x9586ba11,0x19d8b3cd,0x8a5c0480,0xd9e0aeb2,0x2230ef5c,0xe4261dbf,
+ 0x02e6bf09,0x095a9dee,0x80dc7784,0x8963723c,0x145157b1,0x5c97dbaf },
+ { 0x4bc4503e,0x97e74434,0x85a6b370,0x0fb1cb31,0xcd205d4b,0x3e8df2be,
+ 0xf8f765da,0x497dd1bc,0x6c988a1a,0x92ef95c7,0x64dc4cfa,0x3f924baa } },
+ /* 132 */
+ { { 0x7268b448,0x6bf1b8dd,0xefd79b94,0xd4c28ba1,0xe4e3551f,0x2fa1f8c8,
+ 0x5c9187a9,0x769e3ad4,0x40326c0d,0x28843b4d,0x50d5d669,0xfefc8094 },
+ { 0x90339366,0x30c85bfd,0x5ccf6c3a,0x4eeb56f1,0x28ccd1dc,0x0e72b149,
+ 0xf2ce978e,0x73ee85b5,0x3165bb23,0xcdeb2bf3,0x4e410abf,0x8106c923 } },
+ /* 133 */
+ { { 0x7d02f4ee,0xc8df0161,0x18e21225,0x8a781547,0x6acf9e40,0x4ea895eb,
+ 0x6e5a633d,0x8b000cb5,0x7e981ffb,0xf31d86d5,0x4475bc32,0xf5c8029c },
+ { 0x1b568973,0x764561ce,0xa62996ec,0x2f809b81,0xda085408,0x9e513d64,
+ 0xe61ce309,0xc27d815d,0x272999e0,0x0da6ff99,0xfead73f7,0xbd284779 } },
+ /* 134 */
+ { { 0x9b1cdf2b,0x6033c2f9,0xbc5fa151,0x2a99cf06,0x12177b3b,0x7d27d259,
+ 0xc4485483,0xb1f15273,0x102e2297,0x5fd57d81,0xc7f6acb7,0x3d43e017 },
+ { 0x3a70eb28,0x41a8bb0b,0x3e80b06b,0x67de2d8e,0x70c28de5,0x09245a41,
+ 0xa7b26023,0xad7dbcb1,0x2cbc6c1e,0x70b08a35,0x9b33041f,0xb504fb66 } },
+ /* 135 */
+ { { 0xf97a27c2,0xa8e85ab5,0xc10a011b,0x6ac5ec8b,0xffbcf161,0x55745533,
+ 0x65790a60,0x01780e85,0x99ee75b0,0xe451bf85,0x39c29881,0x8907a63b },
+ { 0x260189ed,0x76d46738,0x47bd35cb,0x284a4436,0x20cab61e,0xd74e8c40,
+ 0x416cf20a,0x6264bf8c,0x5fd820ce,0xfa5a6c95,0xf24bb5fc,0xfa7154d0 } },
+ /* 136 */
+ { { 0x9b3f5034,0x18482cec,0xcd9e68fd,0x962d445a,0x95746f23,0x266fb1d6,
+ 0x58c94a4b,0xc66ade5a,0xed68a5b6,0xdbbda826,0x7ab0d6ae,0x05664a4d },
+ { 0x025e32fc,0xbcd4fe51,0xa96df252,0x61a5aebf,0x31592a31,0xd88a07e2,
+ 0x98905517,0x5d9d94de,0x5fd440e7,0x96bb4010,0xe807db4c,0x1b0c47a2 } },
+ /* 137 */
+ { { 0x08223878,0x5c2a6ac8,0xe65a5558,0xba08c269,0x9bbc27fd,0xd22b1b9b,
+ 0x72b9607d,0x919171bf,0xe588dc58,0x9ab455f9,0x23662d93,0x6d54916e },
+ { 0x3b1de0c1,0x8da8e938,0x804f278f,0xa84d186a,0xd3461695,0xbf4988cc,
+ 0xe10eb0cb,0xf5eae3be,0xbf2a66ed,0x1ff8b68f,0xc305b570,0xa68daf67 } },
+ /* 138 */
+ { { 0x44b2e045,0xc1004cff,0x4b1c05d4,0x91b5e136,0x88a48a07,0x53ae4090,
+ 0xea11bb1a,0x73fb2995,0x3d93a4ea,0x32048570,0x3bfc8a5f,0xcce45de8 },
+ { 0xc2b3106e,0xaff4a97e,0xb6848b4f,0x9069c630,0xed76241c,0xeda837a6,
+ 0x6cc3f6cf,0x8a0daf13,0x3da018a8,0x199d049d,0xd9093ba3,0xf867c6b1 } },
+ /* 139 */
+ { { 0x56527296,0xe4d42a56,0xce71178d,0xae26c73d,0x6c251664,0x70a0adac,
+ 0x5dc0ae1d,0x813483ae,0xdaab2daf,0x7574eacd,0xc2d55f4f,0xc56b52dc },
+ { 0x95f32923,0x872bc167,0x5bdd2a89,0x4be17581,0xa7699f00,0x9b57f1e7,
+ 0x3ac2de02,0x5fcd9c72,0x92377739,0x83af3ba1,0xfc50b97f,0xa64d4e2b } },
+ /* 140 */
+ { { 0x0e552b40,0x2172dae2,0xd34d52e8,0x62f49725,0x07958f98,0x7930ee40,
+ 0x751fdd74,0x56da2a90,0xf53e48c3,0xf1192834,0x8e53c343,0x34d2ac26 },
+ { 0x13111286,0x1073c218,0xda9d9827,0x201dac14,0xee95d378,0xec2c29db,
+ 0x1f3ee0b1,0x9316f119,0x544ce71c,0x7890c9f0,0x27612127,0xd77138af } },
+ /* 141 */
+ { { 0x3b4ad1cd,0x78045e6d,0x4aa49bc1,0xcd86b94e,0xfd677a16,0x57e51f1d,
+ 0xfa613697,0xd9290935,0x34f4d893,0x7a3f9593,0x5d5fcf9b,0x8c9c248b },
+ { 0x6f70d4e9,0x9f23a482,0x63190ae9,0x17273454,0x5b081a48,0x4bdd7c13,
+ 0x28d65271,0x1e2de389,0xe5841d1f,0x0bbaaa25,0x746772e5,0xc4c18a79 } },
+ /* 142 */
+ { { 0x593375ac,0x10ee2681,0x7dd5e113,0x4f3288be,0x240f3538,0x9a97b2fb,
+ 0x1de6b1e2,0xfa11089f,0x1351bc58,0x516da562,0x2dfa85b5,0x573b6119 },
+ { 0x6cba7df5,0x89e96683,0x8c28ab40,0xf299be15,0xad43fcbf,0xe91c9348,
+ 0x9a1cefb3,0xe9bbc7cc,0x738b2775,0xc8add876,0x775eaa01,0x6e3b1f2e } },
+ /* 143 */
+ { { 0xb677788b,0x0365a888,0x3fd6173c,0x634ae8c4,0x9e498dbe,0x30498761,
+ 0xc8f779ab,0x08c43e6d,0x4c09aca9,0x068ae384,0x2018d170,0x2380c70b },
+ { 0xa297c5ec,0xcf77fbc3,0xca457948,0xdacbc853,0x336bec7e,0x3690de04,
+ 0x14eec461,0x26bbac64,0x1f713abf,0xd1c23c7e,0xe6fd569e,0xf08bbfcd } },
+ /* 144 */
+ { { 0x84770ee3,0x5f8163f4,0x744a1706,0x0e0c7f94,0xe1b2d46d,0x9c8f05f7,
+ 0xd01fd99a,0x417eafe7,0x11440e5b,0x2ba15df5,0x91a6fbcf,0xdc5c552a },
+ { 0xa270f721,0x86271d74,0xa004485b,0x32c0a075,0x8defa075,0x9d1a87e3,
+ 0xbf0d20fe,0xb590a7ac,0x8feda1f5,0x430c41c2,0x58f6ec24,0x454d2879 } },
+ /* 145 */
+ { { 0x7c525435,0x52b7a635,0x37c4bdbc,0x3d9ef57f,0xdffcc475,0x2bb93e9e,
+ 0x7710f3be,0xf7b8ba98,0x21b727de,0x42ee86da,0x2e490d01,0x55ac3f19 },
+ { 0xc0c1c390,0x487e3a6e,0x446cde7b,0x036fb345,0x496ae951,0x089eb276,
+ 0x71ed1234,0xedfed4d9,0x900f0b46,0x661b0dd5,0x8582f0d3,0x11bd6f1b } },
+ /* 146 */
+ { { 0x076bc9d1,0x5cf9350f,0xcf3cd2c3,0x15d903be,0x25af031c,0x21cfc8c2,
+ 0x8b1cc657,0xe0ad3248,0x70014e87,0xdd9fb963,0x297f1658,0xf0f3a5a1 },
+ { 0xf1f703aa,0xbb908fba,0x2f6760ba,0x2f9cc420,0x66a38b51,0x00ceec66,
+ 0x05d645da,0x4deda330,0xf7de3394,0xb9cf5c72,0x1ad4c906,0xaeef6502 } },
+ /* 147 */
+ { { 0x7a19045d,0x0583c8b1,0xd052824c,0xae7c3102,0xff6cfa58,0x2a234979,
+ 0x62c733c0,0xfe9dffc9,0x9c0c4b09,0x3a7fa250,0x4fe21805,0x516437bb },
+ { 0xc2a23ddb,0x9454e3d5,0x289c104e,0x0726d887,0x4fd15243,0x8977d918,
+ 0x6d7790ba,0xc559e73f,0x465af85f,0x8fd3e87d,0x5feee46b,0xa2615c74 } },
+ /* 148 */
+ { { 0x4335167d,0xc8d607a8,0xe0f5c887,0x8b42d804,0x398d11f9,0x5f9f13df,
+ 0x20740c67,0x5aaa5087,0xa3d9234b,0x83da9a6a,0x2a54bad1,0xbd3a5c4e },
+ { 0x2db0f658,0xdd13914c,0x5a3f373a,0x29dcb66e,0x5245a72b,0xbfd62df5,
+ 0x91e40847,0x19d18023,0xb136b1ae,0xd9df74db,0x3f93bc5b,0x72a06b6b } },
+ /* 149 */
+ { { 0xad19d96f,0x6da19ec3,0xfb2a4099,0xb342daa4,0x662271ea,0x0e61633a,
+ 0xce8c054b,0x3bcece81,0x8bd62dc6,0x7cc8e061,0xee578d8b,0xae189e19 },
+ { 0xdced1eed,0x73e7a25d,0x7875d3ab,0xc1257f0a,0x1cfef026,0x2cb2d5a2,
+ 0xb1fdf61c,0xd98ef39b,0x24e83e6c,0xcd8e6f69,0xc7b7088b,0xd71e7076 } },
+ /* 150 */
+ { { 0x9d4245bf,0x33936830,0x2ac2953b,0x22d96217,0x56c3c3cd,0xb3bf5a82,
+ 0x0d0699e8,0x50c9be91,0x8f366459,0xec094463,0x513b7c35,0x6c056dba },
+ { 0x045ab0e3,0x687a6a83,0x445c9295,0x8d40b57f,0xa16f5954,0x0f345048,
+ 0x3d8f0a87,0x64b5c639,0x9f71c5e2,0x106353a2,0x874f0dd4,0xdd58b475 } },
+ /* 151 */
+ { { 0x62230c72,0x67ec084f,0x481385e3,0xf14f6cca,0x4cda7774,0xf58bb407,
+ 0xaa2dbb6b,0xe15011b1,0x0c035ab1,0xd488369d,0x8245f2fd,0xef83c24a },
+ { 0x9fdc2538,0xfb57328f,0x191fe46a,0x79808293,0x32ede548,0xe28f5c44,
+ 0xea1a022c,0x1b3cda99,0x3df2ec7f,0x39e639b7,0x760e9a18,0x77b6272b } },
+ /* 152 */
+ { { 0xa65d56d5,0x2b1d51bd,0x7ea696e0,0x3a9b71f9,0x9904f4c4,0x95250ecc,
+ 0xe75774b7,0x8bc4d6eb,0xeaeeb9aa,0x0e343f8a,0x930e04cb,0xc473c1d1 },
+ { 0x064cd8ae,0x282321b1,0x5562221c,0xf4b4371e,0xd1bf1221,0xc1cc81ec,
+ 0xe2c8082f,0xa52a07a9,0xba64a958,0x350d8e59,0x6fb32c9a,0x29e4f3de } },
+ /* 153 */
+ { { 0xba89aaa5,0x0aa9d56c,0xc4c6059e,0xf0208ac0,0xbd6ddca4,0x7400d9c6,
+ 0xf2c2f74a,0xb384e475,0xb1562dd3,0x4c1061fc,0x2e153b8d,0x3924e248 },
+ { 0x849808ab,0xf38b8d98,0xa491aa36,0x29bf3260,0x88220ede,0x85159ada,
+ 0xbe5bc422,0x8b47915b,0xd7300967,0xa934d72e,0x2e515d0d,0xc4f30398 } },
+ /* 154 */
+ { { 0x1b1de38b,0xe3e9ee42,0x42636760,0xa124e25a,0x90165b1a,0x90bf73c0,
+ 0x146434c5,0x21802a34,0x2e1fa109,0x54aa83f2,0xed9c51e9,0x1d4bd03c },
+ { 0x798751e6,0xc2d96a38,0x8c3507f5,0xed27235f,0xc8c24f88,0xb5fb80e2,
+ 0xd37f4f78,0xf873eefa,0xf224ba96,0x7229fd74,0x9edd7149,0x9dcd9199 } },
+ /* 155 */
+ { { 0x4e94f22a,0xee9f81a6,0xf71ec341,0xe5609892,0xa998284e,0x6c818ddd,
+ 0x3b54b098,0x9fd47295,0x0e8a7cc9,0x47a6ac03,0xb207a382,0xde684e5e },
+ { 0x2b6b956b,0x4bdd1ecd,0xf01b3583,0x09084414,0x55233b14,0xe2f80b32,
+ 0xef5ebc5e,0x5a0fec54,0xbf8b29a2,0x74cf25e6,0x7f29e014,0x1c757fa0 } },
+ /* 156 */
+ { { 0xeb0fdfe4,0x1bcb5c4a,0xf0899367,0xd7c649b3,0x05bc083b,0xaef68e3f,
+ 0xa78aa607,0x57a06e46,0x21223a44,0xa2136ecc,0x52f5a50b,0x89bd6484 },
+ { 0x4455f15a,0x724411b9,0x08a9c0fd,0x23dfa970,0x6db63bef,0x7b0da4d1,
+ 0xfb162443,0x6f8a7ec1,0xe98284fb,0xc1ac9cee,0x33566022,0x085a582b } },
+ /* 157 */
+ { { 0xec1f138a,0x15cb61f9,0x668f0c28,0x11c9a230,0xdf93f38f,0xac829729,
+ 0x4048848d,0xcef25698,0x2bba8fbf,0x3f686da0,0x111c619a,0xed5fea78 },
+ { 0xd6d1c833,0x9b4f73bc,0x86e7bf80,0x50951606,0x042b1d51,0xa2a73508,
+ 0x5fb89ec2,0x9ef6ea49,0x5ef8b892,0xf1008ce9,0x9ae8568b,0x78a7e684 } },
+ /* 158 */
+ { { 0x10470cd8,0x3fe83a7c,0xf86df000,0x92734682,0xda9409b5,0xb5dac06b,
+ 0x94939c5f,0x1e7a9660,0x5cc116dc,0xdec6c150,0x66bac8cc,0x1a52b408 },
+ { 0x6e864045,0x5303a365,0x9139efc1,0x45eae72a,0x6f31d54f,0x83bec646,
+ 0x6e958a6d,0x2fb4a86f,0x4ff44030,0x6760718e,0xe91ae0df,0x008117e3 } },
+ /* 159 */
+ { { 0x384310a2,0x5d5833ba,0x1fd6c9fc,0xbdfb4edc,0x849c4fb8,0xb9a4f102,
+ 0x581c1e1f,0xe5fb239a,0xd0a9746d,0xba44b2e7,0x3bd942b9,0x78f7b768 },
+ { 0xc87607ae,0x076c8ca1,0xd5caaa7e,0x82b23c2e,0x2763e461,0x6a581f39,
+ 0x3886df11,0xca8a5e4a,0x264e7f22,0xc87e90cf,0x215cfcfc,0x04f74870 } },
+ /* 160 */
+ { { 0x141d161c,0x5285d116,0x93c4ed17,0x67cd2e0e,0x7c36187e,0x12c62a64,
+ 0xed2584ca,0xf5329539,0x42fbbd69,0xc4c777c4,0x1bdfc50a,0x107de776 },
+ { 0xe96beebd,0x9976dcc5,0xa865a151,0xbe2aff95,0x9d8872af,0x0e0a9da1,
+ 0xa63c17cc,0x5e357a3d,0xe15cc67c,0xd31fdfd8,0x7970c6d8,0xc44bbefd } },
+ /* 161 */
+ { { 0x4c0c62f1,0x703f83e2,0x4e195572,0x9b1e28ee,0xfe26cced,0x6a82858b,
+ 0xc43638fa,0xd381c84b,0xa5ba43d8,0x94f72867,0x10b82743,0x3b4a783d },
+ { 0x7576451e,0xee1ad7b5,0x14b6b5c8,0xc3d0b597,0xfcacc1b8,0x3dc30954,
+ 0x472c9d7b,0x55df110e,0x02f8a328,0x97c86ed7,0x88dc098f,0xd0433413 } },
+ /* 162 */
+ { { 0x2ca8f2fe,0x1a60d152,0x491bd41f,0x61640948,0x58dfe035,0x6dae29a5,
+ 0x278e4863,0x9a615bea,0x9ad7c8e5,0xbbdb4477,0x2ceac2fc,0x1c706630 },
+ { 0x99699b4b,0x5e2b54c6,0x239e17e8,0xb509ca6d,0xea063a82,0x728165fe,
+ 0xb6a22e02,0x6b5e609d,0xb26ee1df,0x12813905,0x439491fa,0x07b9f722 } },
+ /* 163 */
+ { { 0x48ff4e49,0x1592ec14,0x6d644129,0x3e4e9f17,0x1156acc0,0x7acf8288,
+ 0xbb092b0b,0x5aa34ba8,0x7d38393d,0xcd0f9022,0xea4f8187,0x416724dd },
+ { 0xc0139e73,0x3c4e641c,0x91e4d87d,0xe0fe46cf,0xcab61f8a,0xedb3c792,
+ 0xd3868753,0x4cb46de4,0x20f1098a,0xe449c21d,0xf5b8ea6e,0x5e5fd059 } },
+ /* 164 */
+ { { 0x75856031,0x7fcadd46,0xeaf2fbd0,0x89c7a4cd,0x7a87c480,0x1af523ce,
+ 0x61d9ae90,0xe5fc1095,0xbcdb95f5,0x3fb5864f,0xbb5b2c7d,0xbeb5188e },
+ { 0x3ae65825,0x3d1563c3,0x0e57d641,0x116854c4,0x1942ebd3,0x11f73d34,
+ 0xc06955b3,0x24dc5904,0x995a0a62,0x8a0d4c83,0x5d577b7d,0xfb26b86d } },
+ /* 165 */
+ { { 0xc686ae17,0xc53108e7,0xd1c1da56,0x9090d739,0x9aec50ae,0x4583b013,
+ 0xa49a6ab2,0xdd9a088b,0xf382f850,0x28192eea,0xf5fe910e,0xcc8df756 },
+ { 0x9cab7630,0x877823a3,0xfb8e7fc1,0x64984a9a,0x364bfc16,0x5448ef9c,
+ 0xc44e2a9a,0xbbb4f871,0x435c95e9,0x901a41ab,0xaaa50a06,0xc6c23e5f } },
+ /* 166 */
+ { { 0x9034d8dd,0xb78016c1,0x0b13e79b,0x856bb44b,0xb3241a05,0x85c6409a,
+ 0x2d78ed21,0x8d2fe19a,0x726eddf2,0xdcc7c26d,0x25104f04,0x3ccaff5f },
+ { 0x6b21f843,0x397d7edc,0xe975de4c,0xda88e4dd,0x4f5ab69e,0x5273d396,
+ 0x9aae6cc0,0x537680e3,0x3e6f9461,0xf749cce5,0x957bffd3,0x021ddbd9 } },
+ /* 167 */
+ { { 0x777233cf,0x7b64585f,0x0942a6f0,0xfe6771f6,0xdfe6eef0,0x636aba7a,
+ 0x86038029,0x63bbeb56,0xde8fcf36,0xacee5842,0xd4a20524,0x48d9aa99 },
+ { 0x0da5e57a,0xcff7a74c,0xe549d6c9,0xc232593c,0xf0f2287b,0x68504bcc,
+ 0xbc8360b5,0x6d7d098d,0x5b402f41,0xeac5f149,0xb87d1bf1,0x61936f11 } },
+ /* 168 */
+ { { 0xb8153a9d,0xaa9da167,0x9e83ecf0,0xa49fe3ac,0x1b661384,0x14c18f8e,
+ 0x38434de1,0x61c24dab,0x283dae96,0x3d973c3a,0x82754fc9,0xc99baa01 },
+ { 0x4c26b1e3,0x477d198f,0xa7516202,0x12e8e186,0x362addfa,0x386e52f6,
+ 0xc3962853,0x31e8f695,0x6aaedb60,0xdec2af13,0x29cf74ac,0xfcfdb4c6 } },
+ /* 169 */
+ { { 0xcca40298,0x6b3ee958,0xf2f5d195,0xc3878153,0xed2eae5b,0x0c565630,
+ 0x3a697cf2,0xd089b37e,0xad5029ea,0xc2ed2ac7,0x0f0dda6a,0x7e5cdfad },
+ { 0xd9b86202,0xf98426df,0x4335e054,0xed1960b1,0x3f14639e,0x1fdb0246,
+ 0x0db6c670,0x17f709c3,0x773421e1,0xbfc687ae,0x26c1a8ac,0x13fefc4a } },
+ /* 170 */
+ { { 0x7ffa0a5f,0xe361a198,0xc63fe109,0xf4b26102,0x6c74e111,0x264acbc5,
+ 0x77abebaf,0x4af445fa,0x24cddb75,0x448c4fdd,0x44506eea,0x0b13157d },
+ { 0x72e9993d,0x22a6b159,0x85e5ecbe,0x2c3c57e4,0xfd83e1a1,0xa673560b,
+ 0xc3b8c83b,0x6be23f82,0x40bbe38e,0x40b13a96,0xad17399b,0x66eea033 } },
+ /* 171 */
+ { { 0xb4c6c693,0x49fc6e95,0x36af7d38,0xefc735de,0x35fe42fc,0xe053343d,
+ 0x6a9ab7c3,0xf0aa427c,0x4a0fcb24,0xc79f0436,0x93ebbc50,0x16287243 },
+ { 0x16927e1e,0x5c3d6bd0,0x673b984c,0x40158ed2,0x4cd48b9a,0xa7f86fc8,
+ 0x60ea282d,0x1643eda6,0xe2a1beed,0x45b393ea,0x19571a94,0x664c839e } },
+ /* 172 */
+ { { 0x27eeaf94,0x57745750,0xea99e1e7,0x2875c925,0x5086adea,0xc127e7ba,
+ 0x86fe424f,0x765252a0,0x2b6c0281,0x1143cc6c,0xd671312d,0xc9bb2989 },
+ { 0x51acb0a5,0x880c337c,0xd3c60f78,0xa3710915,0x9262b6ed,0x496113c0,
+ 0x9ce48182,0x5d25d9f8,0xb3813586,0x53b6ad72,0x4c0e159c,0x0ea3bebc } },
+ /* 173 */
+ { { 0xc5e49bea,0xcaba450a,0x7c05da59,0x684e5415,0xde7ac36c,0xa2e9cab9,
+ 0x2e6f957b,0x4ca79b5f,0x09b817b1,0xef7b0247,0x7d89df0f,0xeb304990 },
+ { 0x46fe5096,0x508f7307,0x2e04eaaf,0x695810e8,0x3512f76c,0x88ef1bd9,
+ 0x3ebca06b,0x77661351,0xccf158b7,0xf7d4863a,0x94ee57da,0xb2a81e44 } },
+ /* 174 */
+ { { 0x6d53e6ba,0xff288e5b,0x14484ea2,0xa90de1a9,0xed33c8ec,0x2fadb60c,
+ 0x28b66a40,0x579d6ef3,0xec24372d,0x4f2dd6dd,0x1d66ec7d,0xe9e33fc9 },
+ { 0x039eab6e,0x110899d2,0x3e97bb5e,0xa31a667a,0xcfdce68e,0x6200166d,
+ 0x5137d54b,0xbe83ebae,0x4800acdf,0x085f7d87,0x0c6f8c86,0xcf4ab133 } },
+ /* 175 */
+ { { 0x931e08fb,0x03f65845,0x1506e2c0,0x6438551e,0x9c36961f,0x5791f0dc,
+ 0xe3dcc916,0x68107b29,0xf495d2ca,0x83242374,0x6ee5895b,0xd8cfb663 },
+ { 0xa0349b1b,0x525e0f16,0x4a0fab86,0x33cd2c6c,0x2af8dda9,0x46c12ee8,
+ 0x71e97ad3,0x7cc424ba,0x37621eb0,0x69766ddf,0xa5f0d390,0x95565f56 } },
+ /* 176 */
+ { { 0x1a0f5e94,0xe0e7bbf2,0x1d82d327,0xf771e115,0xceb111fa,0x10033e3d,
+ 0xd3426638,0xd269744d,0x00d01ef6,0xbdf2d9da,0xa049ceaf,0x1cb80c71 },
+ { 0x9e21c677,0x17f18328,0x19c8f98b,0x6452af05,0x80b67997,0x35b9c5f7,
+ 0x40f8f3d4,0x5c2e1cbe,0x66d667ca,0x43f91656,0xcf9d6e79,0x9faaa059 } },
+ /* 177 */
+ { { 0x0a078fe6,0x8ad24618,0x464fd1dd,0xf6cc73e6,0xc3e37448,0x4d2ce34d,
+ 0xe3271b5f,0x624950c5,0xefc5af72,0x62910f5e,0xaa132bc6,0x8b585bf8 },
+ { 0xa839327f,0x11723985,0x4aac252f,0x34e2d27d,0x6296cc4e,0x402f59ef,
+ 0x47053de9,0x00ae055c,0x28b4f09b,0xfc22a972,0xfa0c180e,0xa9e86264 } },
+ /* 178 */
+ { { 0xbc310ecc,0x0b7b6224,0x67fa14ed,0x8a1a74f1,0x7214395c,0x87dd0960,
+ 0xf5c91128,0xdf1b3d09,0x86b264a8,0x39ff23c6,0x3e58d4c5,0xdc2d49d0 },
+ { 0xa9d6f501,0x2152b7d3,0xc04094f7,0xf4c32e24,0xd938990f,0xc6366596,
+ 0x94fb207f,0x084d078f,0x328594cb,0xfd99f1d7,0xcb2d96b3,0x36defa64 } },
+ /* 179 */
+ { { 0x13ed7cbe,0x4619b781,0x9784bd0e,0x95e50015,0x2c7705fe,0x2a32251c,
+ 0x5f0dd083,0xa376af99,0x0361a45b,0x55425c6c,0x1f291e7b,0x812d2cef },
+ { 0x5fd94972,0xccf581a0,0xe56dc383,0x26e20e39,0x63dbfbf0,0x0093685d,
+ 0x36b8c575,0x1fc164cc,0x390ef5e7,0xb9c5ab81,0x26908c66,0x40086beb } },
+ /* 180 */
+ { { 0x37e3c115,0xe5e54f79,0xc1445a8a,0x69b8ee8c,0xb7659709,0x79aedff2,
+ 0x1b46fbe6,0xe288e163,0xd18d7bb7,0xdb4844f0,0x48aa6424,0xe0ea23d0 },
+ { 0xf3d80a73,0x714c0e4e,0x3bd64f98,0x87a0aa9e,0x2ec63080,0x8844b8a8,
+ 0x255d81a3,0xe0ac9c30,0x455397fc,0x86151237,0x2f820155,0x0b979464 } },
+ /* 181 */
+ { { 0x4ae03080,0x127a255a,0x580a89fb,0x232306b4,0x6416f539,0x04e8cd6a,
+ 0x13b02a0e,0xaeb70dee,0x4c09684a,0xa3038cf8,0x28e433ee,0xa710ec3c },
+ { 0x681b1f7d,0x77a72567,0x2fc28170,0x86fbce95,0xf5735ac8,0xd3408683,
+ 0x6bd68e93,0x3a324e2a,0xc027d155,0x7ec74353,0xd4427177,0xab60354c } },
+ /* 182 */
+ { { 0xef4c209d,0x32a5342a,0x08d62704,0x2ba75274,0xc825d5fe,0x4bb4af6f,
+ 0xd28e7ff1,0x1c3919ce,0xde0340f6,0x1dfc2fdc,0x29f33ba9,0xc6580baf },
+ { 0x41d442cb,0xae121e75,0x3a4724e4,0x4c7727fd,0x524f3474,0xe556d6a4,
+ 0x785642a2,0x87e13cc7,0xa17845fd,0x182efbb1,0x4e144857,0xdcec0cf1 } },
+ /* 183 */
+ { { 0xe9539819,0x1cb89541,0x9d94dbf1,0xc8cb3b4f,0x417da578,0x1d353f63,
+ 0x8053a09e,0xb7a697fb,0xc35d8b78,0x8d841731,0xb656a7a9,0x85748d6f },
+ { 0xc1859c5d,0x1fd03947,0x535d22a2,0x6ce965c1,0x0ca3aadc,0x1966a13e,
+ 0x4fb14eff,0x9802e41d,0x76dd3fcd,0xa9048cbb,0xe9455bba,0x89b182b5 } },
+ /* 184 */
+ { { 0x43360710,0xd777ad6a,0x55e9936b,0x841287ef,0x04a21b24,0xbaf5c670,
+ 0x35ad86f1,0xf2c0725f,0xc707e72e,0x338fa650,0xd8883e52,0x2bf8ed2e },
+ { 0xb56e0d6a,0xb0212cf4,0x6843290c,0x50537e12,0x98b3dc6f,0xd8b184a1,
+ 0x0210b722,0xd2be9a35,0x559781ee,0x407406db,0x0bc18534,0x5a78d591 } },
+ /* 185 */
+ { { 0xd748b02c,0x4d57aa2a,0xa12b3b95,0xbe5b3451,0x64711258,0xadca7a45,
+ 0x322153db,0x597e091a,0x32eb1eab,0xf3271006,0x2873f301,0xbd9adcba },
+ { 0x38543f7f,0xd1dc79d1,0x921b1fef,0x00022092,0x1e5df8ed,0x86db3ef5,
+ 0x9e6b944a,0x888cae04,0x791a32b4,0x71bd29ec,0xa6d1c13e,0xd3516206 } },
+ /* 186 */
+ { { 0x55924f43,0x2ef6b952,0x4f9de8d5,0xd2f401ae,0xadc68042,0xfc73e8d7,
+ 0x0d9d1bb4,0x627ea70c,0xbbf35679,0xc3bb3e3e,0xd882dee4,0x7e8a254a },
+ { 0xb5924407,0x08906f50,0xa1ad444a,0xf14a0e61,0x65f3738e,0xaa0efa21,
+ 0xae71f161,0xd60c7dd6,0xf175894d,0x9e8390fa,0x149f4c00,0xd115cd20 } },
+ /* 187 */
+ { { 0xa52abf77,0x2f2e2c1d,0x54232568,0xc2a0dca5,0x54966dcc,0xed423ea2,
+ 0xcd0dd039,0xe48c93c7,0x176405c7,0x1e54a225,0x70d58f2e,0x1efb5b16 },
+ { 0x94fb1471,0xa751f9d9,0x67d2941d,0xfdb31e1f,0x53733698,0xa6c74eb2,
+ 0x89a0f64a,0xd3155d11,0xa4b8d2b6,0x4414cfe4,0xf7a8e9e3,0x8d5a4be8 } },
+ /* 188 */
+ { { 0x52669e98,0x5c96b4d4,0x8fd42a03,0x4547f922,0xd285174e,0xcf5c1319,
+ 0x064bffa0,0x805cd1ae,0x246d27e7,0x50e8bc4f,0xd5781e11,0xf89ef98f },
+ { 0xdee0b63f,0xb4ff95f6,0x222663a4,0xad850047,0x4d23ce9c,0x02691860,
+ 0x50019f59,0x3e5309ce,0x69a508ae,0x27e6f722,0x267ba52c,0xe9376652 } },
+ /* 189 */
+ { { 0xc0368708,0xa04d289c,0x5e306e1d,0xc458872f,0x33112fea,0x76fa23de,
+ 0x6efde42e,0x718e3974,0x1d206091,0xf0c98cdc,0x14a71987,0x5fa3ca62 },
+ { 0xdcaa9f2a,0xeee8188b,0x589a860d,0x312cc732,0xc63aeb1f,0xf9808dd6,
+ 0x4ea62b53,0x70fd43db,0x890b6e97,0x2c2bfe34,0xfa426aa6,0x105f863c } },
+ /* 190 */
+ { { 0xb38059ad,0x0b29795d,0x90647ea0,0x5686b77e,0xdb473a3e,0xeff0470e,
+ 0xf9b6d1e2,0x278d2340,0xbd594ec7,0xebbff95b,0xd3a7f23d,0xf4b72334 },
+ { 0xa5a83f0b,0x2a285980,0x9716a8b3,0x0786c41a,0x22511812,0x138901bd,
+ 0xe2fede6e,0xd1b55221,0xdf4eb590,0x0806e264,0x762e462e,0x6c4c897e } },
+ /* 191 */
+ { { 0xb4b41d9d,0xd10b905f,0x4523a65b,0x826ca466,0xb699fa37,0x535bbd13,
+ 0x73bc8f90,0x5b9933d7,0xcd2118ad,0x9332d61f,0xd4a65fd0,0x158c693e },
+ { 0xe6806e63,0x4ddfb2a8,0xb5de651b,0xe31ed3ec,0x819bc69a,0xf9460e51,
+ 0x2c76b1f8,0x6229c0d6,0x901970a3,0xbb78f231,0x9cee72b8,0x31f3820f } },
+ /* 192 */
+ { { 0xc09e1c72,0xe931caf2,0x12990cf4,0x0715f298,0x943262d8,0x33aad81d,
+ 0x73048d3f,0x5d292b7a,0xdc7415f6,0xb152aaa4,0x0fd19587,0xc3d10fd9 },
+ { 0x75ddadd0,0xf76b35c5,0x1e7b694c,0x9f5f4a51,0xc0663025,0x2f1ab7eb,
+ 0x920260b0,0x01c9cc87,0x05d39da6,0xc4b1f61a,0xeb4a9c4e,0x6dcd76c4 } },
+ /* 193 */
+ { { 0xfdc83f01,0x0ba0916f,0x9553e4f9,0x354c8b44,0xffc5e622,0xa6cc511a,
+ 0xe95be787,0xb954726a,0x75b41a62,0xcb048115,0xebfde989,0xfa2ae6cd },
+ { 0x0f24659a,0x6376bbc7,0x4c289c43,0x13a999fd,0xec9abd8b,0xc7134184,
+ 0xa789ab04,0x28c02bf6,0xd3e526ec,0xff841ebc,0x640893a8,0x442b191e } },
+ /* 194 */
+ { { 0xfa2b6e20,0x4cac6c62,0xf6d69861,0x97f29e9b,0xbc96d12d,0x228ab1db,
+ 0x5e8e108d,0x6eb91327,0x40771245,0xd4b3d4d1,0xca8a803a,0x61b20623 },
+ { 0xa6a560b1,0x2c2f3b41,0x3859fcf4,0x879e1d40,0x024dbfc3,0x7cdb5145,
+ 0x3bfa5315,0x55d08f15,0xaa93823a,0x2f57d773,0xc6a2c9a2,0xa97f259c } },
+ /* 195 */
+ { { 0xe58edbbb,0xc306317b,0x79dfdf13,0x25ade51c,0x16d83dd6,0x6b5beaf1,
+ 0x1dd8f925,0xe8038a44,0xb2a87b6b,0x7f00143c,0xf5b438de,0xa885d00d },
+ { 0xcf9e48bd,0xe9f76790,0xa5162768,0xf0bdf9f0,0xad7b57cb,0x0436709f,
+ 0xf7c15db7,0x7e151c12,0x5d90ee3b,0x3514f022,0x2c361a8d,0x2e84e803 } },
+ /* 196 */
+ { { 0x563ec8d8,0x2277607d,0xe3934cb7,0xa661811f,0xf58fd5de,0x3ca72e7a,
+ 0x62294c6a,0x7989da04,0xf6bbefe9,0x88b3708b,0x53ed7c82,0x0d524cf7 },
+ { 0x2f30c073,0x69f699ca,0x9dc1dcf3,0xf0fa264b,0x05f0aaf6,0x44ca4568,
+ 0xd19b9baf,0x0f5b23c7,0xeabd1107,0x39193f41,0x2a7c9b83,0x9e3e10ad } },
+ /* 197 */
+ { { 0xd4ae972f,0xa90824f0,0xc6e846e7,0x43eef02b,0x29d2160a,0x7e460612,
+ 0xfe604e91,0x29a178ac,0x4eb184b2,0x23056f04,0xeb54cdf4,0x4fcad55f },
+ { 0xae728d15,0xa0ff96f3,0xc6a00331,0x8a2680c6,0x7ee52556,0x5f84cae0,
+ 0xc5a65dad,0x5e462c3a,0xe2d23f4f,0x5d2b81df,0xc5b1eb07,0x6e47301b } },
+ /* 198 */
+ { { 0xaf8219b9,0x77411d68,0x51b1907a,0xcb883ce6,0x101383b5,0x25c87e57,
+ 0x982f970d,0x9c7d9859,0x118305d2,0xaa6abca5,0x9013a5db,0x725fed2f },
+ { 0xababd109,0x487cdbaf,0x87586528,0xc0f8cf56,0x8ad58254,0xa02591e6,
+ 0xdebbd526,0xc071b1d1,0x961e7e31,0x927dfe8b,0x9263dfe1,0x55f895f9 } },
+ /* 199 */
+ { { 0xb175645b,0xf899b00d,0xb65b4b92,0x51f3a627,0xb67399ef,0xa2f3ac8d,
+ 0xe400bc20,0xe717867f,0x1967b952,0x42cc9020,0x3ecd1de1,0x3d596751 },
+ { 0xdb979775,0xd41ebcde,0x6a2e7e88,0x99ba61bc,0x321504f2,0x039149a5,
+ 0x27ba2fad,0xe7dc2314,0xb57d8368,0x9f556308,0x57da80a7,0x2b6d16c9 } },
+ /* 200 */
+ { { 0x279ad982,0x84af5e76,0x9c8b81a6,0x9bb4c92d,0x0e698e67,0xd79ad44e,
+ 0x265fc167,0xe8be9048,0x0c3a4ccc,0xf135f7e6,0xb8863a33,0xa0a10d38 },
+ { 0xd386efd9,0xe197247c,0xb52346c2,0x0eefd3f9,0x78607bc8,0xc22415f9,
+ 0x508674ce,0xa2a8f862,0xc8c9d607,0xa72ad09e,0x50fa764f,0xcd9f0ede } },
+ /* 201 */
+ { { 0xd1a46d4d,0x063391c7,0x9eb01693,0x2df51c11,0x849e83de,0xc5849800,
+ 0x8ad08382,0x48fd09aa,0xaa742736,0xa405d873,0xe1f9600c,0xee49e61e },
+ { 0x48c76f73,0xd76676be,0x01274b2a,0xd9c100f6,0x83f8718d,0x110bb67c,
+ 0x02fc0d73,0xec85a420,0x744656ad,0xc0449e1e,0x37d9939b,0x28ce7376 } },
+ /* 202 */
+ { { 0x44544ac7,0x97e9af72,0xba010426,0xf2c658d5,0xfb3adfbd,0x732dec39,
+ 0xa2df0b07,0xd12faf91,0x2171e208,0x8ac26725,0x5b24fa54,0xf820cdc8 },
+ { 0x94f4cf77,0x307a6eea,0x944a33c6,0x18c783d2,0x0b741ac5,0x4b939d4c,
+ 0x3ffbb6e4,0x1d7acd15,0x7a255e44,0x06a24858,0xce336d50,0x14fbc494 } },
+ /* 203 */
+ { { 0x51584e3c,0x9b920c0c,0xf7e54027,0xc7733c59,0x88422bbe,0xe24ce139,
+ 0x523bd6ab,0x11ada812,0xb88e6def,0xde068800,0xfe8c582d,0x7b872671 },
+ { 0x7de53510,0x4e746f28,0xf7971968,0x492f8b99,0x7d928ac2,0x1ec80bc7,
+ 0x432eb1b5,0xb3913e48,0x32028f6e,0xad084866,0x8fc2f38b,0x122bb835 } },
+ /* 204 */
+ { { 0x3b0b29c3,0x0a9f3b1e,0x4fa44151,0x837b6432,0x17b28ea7,0xb9905c92,
+ 0x98451750,0xf39bc937,0xce8b6da1,0xcd383c24,0x010620b2,0x299f57db },
+ { 0x58afdce3,0x7b6ac396,0x3d05ef47,0xa15206b3,0xb9bb02ff,0xa0ae37e2,
+ 0x9db3964c,0x107760ab,0x67954bea,0xe29de9a0,0x431c3f82,0x446a1ad8 } },
+ /* 205 */
+ { { 0x5c6b8195,0xc6fecea0,0xf49e71b9,0xd744a7c5,0x177a7ae7,0xa8e96acc,
+ 0x358773a7,0x1a05746c,0x37567369,0xa4162146,0x87d1c971,0xaa0217f7 },
+ { 0x77fd3226,0x61e9d158,0xe4f600be,0x0f6f2304,0x7a6dff07,0xa9c4cebc,
+ 0x09f12a24,0xd15afa01,0x8c863ee9,0x2bbadb22,0xe5eb8c78,0xa28290e4 } },
+ /* 206 */
+ { { 0x3e9de330,0x55b87fa0,0x195c145b,0x12b26066,0xa920bef0,0xe08536e0,
+ 0x4d195adc,0x7bff6f2c,0x945f4187,0x7f319e9d,0xf892ce47,0xf9848863 },
+ { 0x4fe37657,0xd0efc1d3,0x5cf0e45a,0x3c58de82,0x8b0ccbbe,0x626ad21a,
+ 0xaf952fc5,0xd2a31208,0xeb437357,0x81791995,0x98e95d4f,0x5f19d30f } },
+ /* 207 */
+ { { 0x0e6865bb,0x72e83d9a,0xf63456a6,0x22f5af3b,0x463c8d9e,0x409e9c73,
+ 0xdfe6970e,0x40e9e578,0x711b91ca,0x876b6efa,0x942625a3,0x895512cf },
+ { 0xcb4e462b,0x84c8eda8,0x4412e7c8,0x84c0154a,0xceb7b71f,0x04325db1,
+ 0x66f70877,0x1537dde3,0x1992b9ac,0xf3a09399,0xd498ae77,0xa7316606 } },
+ /* 208 */
+ { { 0xcad260f5,0x13990d2f,0xeec0e8c0,0x76c3be29,0x0f7bd7d5,0x7dc5bee0,
+ 0xefebda4b,0x9be167d2,0x9122b87e,0xcce3dde6,0x82b5415c,0x75a28b09 },
+ { 0xe84607a6,0xf6810bcd,0x6f4dbf0d,0xc6d58128,0x1b4dafeb,0xfead577d,
+ 0x066b28eb,0x9bc440b2,0x8b17e84b,0x53f1da97,0xcda9a575,0x0459504b } },
+ /* 209 */
+ { { 0x329e5836,0x13e39a02,0xf717269d,0x2c9e7d51,0xf26c963b,0xc5ac58d6,
+ 0x79967bf5,0x3b0c6c43,0x55908d9d,0x60bbea3f,0xf07c9ad1,0xd84811e7 },
+ { 0x5bd20e4a,0xfe7609a7,0x0a70baa8,0xe4325dd2,0xb3600386,0x3711f370,
+ 0xd0924302,0x97f9562f,0x4acc4436,0x040dc0c3,0xde79cdd4,0xfd6d725c } },
+ /* 210 */
+ { { 0xcf13eafb,0xb3efd0e3,0x5aa0ae5f,0x21009cbb,0x79022279,0xe480c553,
+ 0xb2fc9a6d,0x755cf334,0x07096ae7,0x8564a5bf,0xbd238139,0xddd649d0 },
+ { 0x8a045041,0xd0de10b1,0xc957d572,0x6e05b413,0x4e0fb25c,0x5c5ff806,
+ 0x641162fb,0xd933179b,0xe57439f9,0x42d48485,0x8a8d72aa,0x70c5bd0a } },
+ /* 211 */
+ { { 0x97bdf646,0xa7671738,0xab329f7c,0xaa1485b4,0xf8f25fdf,0xce3e11d6,
+ 0xc6221824,0x76a3fc7e,0xf3924740,0x045f281f,0x96d13a9a,0x24557d4e },
+ { 0xdd4c27cd,0x875c804b,0x0f5c7fea,0x11c5f0f4,0xdc55ff7e,0xac8c880b,
+ 0x1103f101,0x2acddec5,0xf99faa89,0x38341a21,0xce9d6b57,0xc7b67a2c } },
+ /* 212 */
+ { { 0x8e357586,0x9a0d724f,0xdf648da0,0x1d7f4ff5,0xfdee62a5,0x9c3e6c9b,
+ 0x0389b372,0x0499cef0,0x98eab879,0xe904050d,0x6c051617,0xe8eef1b6 },
+ { 0xc37e3ca9,0xebf5bfeb,0xa4e0b91d,0x7c5e946d,0x2c4bea28,0x79097314,
+ 0xee67b2b7,0x81f6c109,0xdafc5ede,0xaf237d9b,0x2abb04c7,0xd2e60201 } },
+ /* 213 */
+ { { 0x8a4f57bf,0x6156060c,0xff11182a,0xf9758696,0x6296ef00,0x8336773c,
+ 0xff666899,0x9c054bce,0x719cd11c,0xd6a11611,0xdbe1acfa,0x9824a641 },
+ { 0xba89fd01,0x0b7b7a5f,0x889f79d8,0xf8d3b809,0xf578285c,0xc5e1ea08,
+ 0xae6d8288,0x7ac74536,0x7521ef5f,0x5d37a200,0xb260a25d,0x5ecc4184 } },
+ /* 214 */
+ { { 0xa708c8d3,0xddcebb19,0xc63f81ec,0xe63ed04f,0x11873f95,0xd045f5a0,
+ 0x79f276d5,0x3b5ad544,0x425ae5b3,0x81272a3d,0x10ce1605,0x8bfeb501 },
+ { 0x888228bf,0x4233809c,0xb2aff7df,0x4bd82acf,0x0cbd4a7f,0x9c68f180,
+ 0x6b44323d,0xfcd77124,0x891db957,0x60c0fcf6,0x04da8f7f,0xcfbb4d89 } },
+ /* 215 */
+ { { 0x3b26139a,0x9a6a5df9,0xb2cc7eb8,0x3e076a83,0x5a964bcd,0x47a8e82d,
+ 0xb9278d6b,0x8a4e2a39,0xe4443549,0x93506c98,0xf1e0d566,0x06497a8f },
+ { 0x2b1efa05,0x3dee8d99,0x45393e33,0x2da63ca8,0xcf0579ad,0xa4af7277,
+ 0x3236d8ea,0xaf4b4639,0x32b617f5,0x6ccad95b,0xb88bb124,0xce76d8b8 } },
+ /* 216 */
+ { { 0x083843dc,0x63d2537a,0x1e4153b4,0x89eb3514,0xea9afc94,0x5175ebc4,
+ 0x8ed1aed7,0x7a652580,0xd85e8297,0x67295611,0xb584b73d,0x8dd2d68b },
+ { 0x0133c3a4,0x237139e6,0x4bd278ea,0x9de838ab,0xc062fcd9,0xe829b072,
+ 0x63ba8706,0x70730d4f,0xd3cd05ec,0x6080483f,0x0c85f84d,0x872ab5b8 } },
+ /* 217 */
+ { { 0x999d4d49,0xfc0776d3,0xec3f45e7,0xa3eb59de,0x0dae1fc1,0xbc990e44,
+ 0xa15371ff,0x33596b1e,0x9bc7ab25,0xd447dcb2,0x35979582,0xcd5b63e9 },
+ { 0x77d1ff11,0xae3366fa,0xedee6903,0x59f28f05,0xa4433bf2,0x6f43fed1,
+ 0xdf9ce00e,0x15409c9b,0xaca9c5dc,0x21b5cded,0x82d7bdb4,0xf9f33595 } },
+ /* 218 */
+ { { 0x9422c792,0x95944378,0xc958b8bf,0x239ea923,0xdf076541,0x4b61a247,
+ 0xbb9fc544,0x4d29ce85,0x0b424559,0x9a692a67,0x0e486900,0x6e0ca5a0 },
+ { 0x85b3bece,0x6b79a782,0xc61f9892,0x41f35e39,0xae747f82,0xff82099a,
+ 0xd0ca59d6,0x58c8ae3f,0x99406b5f,0x4ac930e2,0x9df24243,0x2ce04eb9 } },
+ /* 219 */
+ { { 0x1ac37b82,0x4366b994,0x25b04d83,0xff0c728d,0x19c47b7c,0x1f551361,
+ 0xbeff13e7,0xdbf2d5ed,0xe12a683d,0xf78efd51,0x989cf9c4,0x82cd85b9 },
+ { 0xe0cb5d37,0xe23c6db6,0x72ee1a15,0x818aeebd,0x28771b14,0x8212aafd,
+ 0x1def817d,0x7bc221d9,0x9445c51f,0xdac403a2,0x12c3746b,0x711b0517 } },
+ /* 220 */
+ { { 0x5ea99ecc,0x0ed9ed48,0xb8cab5e1,0xf799500d,0xb570cbdc,0xa8ec87dc,
+ 0xd35dfaec,0x52cfb2c2,0x6e4d80a4,0x8d31fae2,0xdcdeabe5,0xe6a37dc9 },
+ { 0x1deca452,0x5d365a34,0x0d68b44e,0x09a5f8a5,0xa60744b1,0x59238ea5,
+ 0xbb4249e9,0xf2fedc0d,0xa909b2e3,0xe395c74e,0x39388250,0xe156d1a5 } },
+ /* 221 */
+ { { 0x47181ae9,0xd796b3d0,0x44197808,0xbaf44ba8,0x34cf3fac,0xe6933094,
+ 0xc3bd5c46,0x41aa6ade,0xeed947c6,0x4fda75d8,0x9ea5a525,0xacd9d412 },
+ { 0xd430301b,0x65cc55a3,0x7b52ea49,0x3c9a5bcf,0x159507f0,0x22d319cf,
+ 0xde74a8dd,0x2ee0b9b5,0x877ac2b6,0x20c26a1e,0x92e7c314,0x387d73da } },
+ /* 222 */
+ { { 0x8cd3fdac,0x13c4833e,0x332e5b8e,0x76fcd473,0xe2fe1fd3,0xff671b4b,
+ 0x5d98d8ec,0x4d734e8b,0x514bbc11,0xb1ead3c6,0x7b390494,0xd14ca858 },
+ { 0x5d2d37e9,0x95a443af,0x00464622,0x73c6ea73,0x15755044,0xa44aeb4b,
+ 0xfab58fee,0xba3f8575,0xdc680a6f,0x9779dbc9,0x7b37ddfc,0xe1ee5f5a } },
+ /* 223 */
+ { { 0x12d29f46,0xcd0b4648,0x0ed53137,0x93295b0b,0x80bef6c9,0xbfe26094,
+ 0x54248b00,0xa6565788,0x80e7f9c4,0x69c43fca,0xbe141ea1,0x2190837b },
+ { 0xa1b26cfb,0x875e159a,0x7affe852,0x90ca9f87,0x92ca598e,0x15e6550d,
+ 0x1938ad11,0xe3e0945d,0x366ef937,0xef7636bb,0xb39869e5,0xb6034d0b } },
+ /* 224 */
+ { { 0x26d8356e,0x4d255e30,0xd314626f,0xf83666ed,0xd0c8ed64,0x421ddf61,
+ 0x26677b61,0x96e473c5,0x9e9b18b3,0xdad4af7e,0xa9393f75,0xfceffd4a },
+ { 0x11c731d5,0x843138a1,0xb2f141d9,0x05bcb3a1,0x617b7671,0x20e1fa95,
+ 0x88ccec7b,0xbefce812,0x90f1b568,0x582073dc,0x1f055cb7,0xf572261a } },
+ /* 225 */
+ { { 0x36973088,0xf3148277,0x86a9f980,0xc008e708,0xe046c261,0x1b795947,
+ 0xca76bca0,0xdf1e6a7d,0x71acddf0,0xabafd886,0x1364d8f4,0xff7054d9 },
+ { 0xe2260594,0x2cf63547,0xd73b277e,0x468a5372,0xef9bd35e,0xc7419e24,
+ 0x24043cc3,0x2b4a1c20,0x890b39cd,0xa28f047a,0x46f9a2e3,0xdca2cea1 } },
+ /* 226 */
+ { { 0x53277538,0xab788736,0xcf697738,0xa734e225,0x6b22e2c1,0x66ee1d1e,
+ 0xebe1d212,0x2c615389,0x02bb0766,0xf36cad40,0x3e64f207,0x120885c3 },
+ { 0x90fbfec2,0x59e77d56,0xd7a574ae,0xf9e781aa,0x5d045e53,0x801410b0,
+ 0xa91b5f0e,0xd3b5f0aa,0x7fbb3521,0xb3d1df00,0xc72bee9a,0x11c4b33e } },
+ /* 227 */
+ { { 0x83c3a7f3,0xd32b9832,0x88d8a354,0x8083abcf,0x50f4ec5a,0xdeb16404,
+ 0x641e2907,0x18d747f0,0xf1bbf03e,0x4e8978ae,0x88a0cd89,0x932447dc },
+ { 0xcf3d5897,0x561e0feb,0x13600e6d,0xfc3a682f,0xd16a6b73,0xc78b9d73,
+ 0xd29bf580,0xe713fede,0x08d69e5c,0x0a225223,0x1ff7fda4,0x3a924a57 } },
+ /* 228 */
+ { { 0xb4093bee,0xfb64554c,0xa58c6ec0,0xa6d65a25,0x43d0ed37,0x4126994d,
+ 0x55152d44,0xa5689a51,0x284caa8d,0xb8e5ea8c,0xd1f25538,0x33f05d4f },
+ { 0x1b615d6e,0xe0fdfe09,0x705507da,0x2ded7e8f,0x17bbcc80,0xdd5631e5,
+ 0x267fd11f,0x4f87453e,0xff89d62d,0xc6da723f,0xe3cda21d,0x55cbcae2 } },
+ /* 229 */
+ { { 0x6b4e84f3,0x336bc94e,0x4ef72c35,0x72863031,0xeeb57f99,0x6d85fdee,
+ 0xa42ece1b,0x7f4e3272,0x36f0320a,0x7f86cbb5,0x923331e6,0xf09b6a2b },
+ { 0x56778435,0x21d3ecf1,0x8323b2d2,0x2977ba99,0x1704bc0f,0x6a1b57fb,
+ 0x389f048a,0xd777cf8b,0xac6b42cd,0x9ce2174f,0x09e6c55a,0x404e2bff } },
+ /* 230 */
+ { { 0x204c5ddb,0x9b9b135e,0x3eff550e,0x9dbfe044,0xec3be0f6,0x35eab4bf,
+ 0x0a43e56f,0x8b4c3f0d,0x0e73f9b3,0x4c1c6673,0x2c78c905,0x92ed38bd },
+ { 0xa386e27c,0xc7003f6a,0xaced8507,0xb9c4f46f,0x59df5464,0xea024ec8,
+ 0x429572ea,0x4af96152,0xe1fc1194,0x279cd5e2,0x281e358c,0xaa376a03 } },
+ /* 231 */
+ { { 0x3cdbc95c,0x07859223,0xef2e337a,0xaae1aa6a,0x472a8544,0xc040108d,
+ 0x8d037b7d,0x80c853e6,0x8c7eee24,0xd221315c,0x8ee47752,0x195d3856 },
+ { 0xdacd7fbe,0xd4b1ba03,0xd3e0c52b,0x4b5ac61e,0x6aab7b52,0x68d3c052,
+ 0x660e3fea,0xf0d7248c,0x3145efb4,0xafdb3f89,0x8f40936d,0xa73fd9a3 } },
+ /* 232 */
+ { { 0xbb1b17ce,0x891b9ef3,0xc6127f31,0x14023667,0x305521fd,0x12b2e58d,
+ 0xe3508088,0x3a47e449,0xff751507,0xe49fc84b,0x5310d16e,0x4023f722 },
+ { 0xb73399fa,0xa608e5ed,0xd532aa3e,0xf12632d8,0x845e8415,0x13a2758e,
+ 0x1fc2d861,0xae4b6f85,0x339d02f2,0x3879f5b1,0x80d99ebd,0x446d22a6 } },
+ /* 233 */
+ { { 0x4be164f1,0x0f502302,0x88b81920,0x8d09d2d6,0x984aceff,0x514056f1,
+ 0x75e9e80d,0xa5c4ddf0,0xdf496a93,0x38cb47e6,0x38df6bf7,0x899e1d6b },
+ { 0xb59eb2a6,0x69e87e88,0x9b47f38b,0x280d9d63,0x3654e955,0x599411ea,
+ 0x969aa581,0xcf8dd4fd,0x530742a7,0xff5c2baf,0x1a373085,0xa4391536 } },
+ /* 234 */
+ { { 0xa8a4bdd2,0x6ace72a3,0xb68ef702,0xc656cdd1,0x90c4dad8,0xd4a33e7e,
+ 0x9d951c50,0x4aece08a,0x085d68e6,0xea8005ae,0x6f7502b8,0xfdd7a7d7 },
+ { 0x98d6fa45,0xce6fb0a6,0x1104eb8c,0x228f8672,0xda09d7dc,0xd23d8787,
+ 0x2ae93065,0x5521428b,0xea56c366,0x95faba3d,0x0a88aca5,0xedbe5039 } },
+ /* 235 */
+ { { 0xbfb26c82,0xd64da0ad,0x952c2f9c,0xe5d70b3c,0xf7e77f68,0xf5e8f365,
+ 0x08f2d695,0x7234e002,0xd12e7be6,0xfaf900ee,0x4acf734e,0x27dc6934 },
+ { 0xc260a46a,0x80e4ff5e,0x2dc31c28,0x7da5ebce,0xca69f552,0x485c5d73,
+ 0x69cc84c2,0xcdfb6b29,0xed6d4eca,0x031c5afe,0x22247637,0xc7bbf4c8 } },
+ /* 236 */
+ { { 0x49fe01b2,0x9d5b72c7,0x793a91b8,0x34785186,0xcf460438,0xa3ba3c54,
+ 0x3ab21b6f,0x73e8e43d,0xbe57b8ab,0x50cde8e0,0xdd204264,0x6488b3a7 },
+ { 0xdddc4582,0xa9e398b3,0x5bec46fe,0x1698c1a9,0x156d3843,0x7f1446ef,
+ 0x770329a2,0x3fd25dd8,0x2c710668,0x05b1221a,0xa72ee6cf,0x65b2dc2a } },
+ /* 237 */
+ { { 0xcd021d63,0x21a885f7,0xfea61f08,0x3f344b15,0xc5cf73e6,0xad5ba6dd,
+ 0x227a8b23,0x154d0d8f,0xdc559311,0x9b74373c,0x98620fa1,0x4feab715 },
+ { 0x7d9ec924,0x5098938e,0x6d47e550,0x84d54a5e,0x1b617506,0x1a2d1bdc,
+ 0x615868a4,0x99fe1782,0x3005a924,0x171da780,0x7d8f79b6,0xa70bf5ed } },
+ /* 238 */
+ { { 0xfe2216c5,0x0bc1250d,0x7601b351,0x2c37e250,0xd6f06b7e,0xb6300175,
+ 0x8bfeb9b7,0x4dde8ca1,0xb82f843d,0x4f210432,0xb1ac0afd,0x8d70e2f9 },
+ { 0xaae91abb,0x25c73b78,0x863028f2,0x0230dca3,0xe5cf30b7,0x8b923ecf,
+ 0x5506f265,0xed754ec2,0x729a5e39,0x8e41b88c,0xbabf889b,0xee67cec2 } },
+ /* 239 */
+ { { 0x1be46c65,0xe183acf5,0xe7565d7a,0x9789538f,0xd9627b4e,0x87873391,
+ 0x9f1d9187,0xbf4ac4c1,0x4691f5c8,0x5db99f63,0x74a1fb98,0xa68df803 },
+ { 0xbf92b5fa,0x3c448ed1,0x3e0bdc32,0xa098c841,0x79bf016c,0x8e74cd55,
+ 0x115e244d,0x5df0d09c,0x3410b66e,0x9418ad01,0x17a02130,0x8b6124cb } },
+ /* 240 */
+ { { 0xc26e3392,0x425ec3af,0xa1722e00,0xc07f8470,0xe2356b43,0xdcc28190,
+ 0xb1ef59a6,0x4ed97dff,0xc63028c1,0xc22b3ad1,0x68c18988,0x070723c2 },
+ { 0x4cf49e7d,0x70da302f,0x3f12a522,0xc5e87c93,0x18594148,0x74acdd1d,
+ 0xca74124c,0xad5f73ab,0xd69fd478,0xe72e4a3e,0x7b117cc3,0x61593868 } },
+ /* 241 */
+ { { 0xa9aa0486,0x7b7b9577,0xa063d557,0x6e41fb35,0xda9047d7,0xb017d5c7,
+ 0x68a87ba9,0x8c748280,0xdf08ad93,0xab45fa5c,0x4c288a28,0xcd9fb217 },
+ { 0x5747843d,0x59544642,0xa56111e3,0x34d64c6c,0x4bfce8d5,0x12e47ea1,
+ 0x6169267f,0x17740e05,0xeed03fb5,0x5c49438e,0x4fc3f513,0x9da30add } },
+ /* 242 */
+ { { 0xccfa5200,0xc4e85282,0x6a19b13d,0x2707608f,0xf5726e2f,0xdcb9a53d,
+ 0xe9427de5,0x612407c9,0xd54d582a,0x3e5a17e1,0x655ae118,0xb99877de },
+ { 0x015254de,0x6f0e972b,0xf0a6f7c5,0x92a56db1,0xa656f8b2,0xd297e4e1,
+ 0xad981983,0x99fe0052,0x07cfed84,0xd3652d2f,0x843c1738,0xc784352e } },
+ /* 243 */
+ { { 0x7e9b2d8a,0x6ee90af0,0x57cf1964,0xac8d7018,0x71f28efc,0xf6ed9031,
+ 0x6812b20e,0x7f70d5a9,0xf1c61eee,0x27b557f4,0xc6263758,0xf1c9bd57 },
+ { 0x2a1a6194,0x5cf7d014,0x1890ab84,0xdd614e0b,0x0e93c2a6,0x3ef9de10,
+ 0xe0cd91c5,0xf98cf575,0x14befc32,0x504ec0c6,0x6279d68c,0xd0513a66 } },
+ /* 244 */
+ { { 0xa859fb6a,0xa8eadbad,0xdb283666,0xcf8346e7,0x3e22e355,0x7b35e61a,
+ 0x99639c6b,0x293ece2c,0x56f241c8,0xfa0162e2,0xbf7a1dda,0xd2e6c7b9 },
+ { 0x40075e63,0xd0de6253,0xf9ec8286,0x2405aa61,0x8fe45494,0x2237830a,
+ 0x364e9c8c,0x4fd01ac7,0x904ba750,0x4d9c3d21,0xaf1b520b,0xd589be14 } },
+ /* 245 */
+ { { 0x4662e53b,0x13576a4f,0xf9077676,0x35ec2f51,0x97c0af97,0x66297d13,
+ 0x9e598b58,0xed3201fe,0x5e70f604,0x49bc752a,0xbb12d951,0xb54af535 },
+ { 0x212c1c76,0x36ea4c2b,0xeb250dfd,0x18f5bbc7,0x9a0a1a46,0xa0d466cc,
+ 0xdac2d917,0x52564da4,0x8e95fab5,0x206559f4,0x9ca67a33,0x7487c190 } },
+ /* 246 */
+ { { 0xdde98e9c,0x75abfe37,0x2a411199,0x99b90b26,0xdcdb1f7c,0x1b410996,
+ 0x8b3b5675,0xab346f11,0xf1f8ae1e,0x04852193,0x6b8b98c1,0x1ec4d227 },
+ { 0x45452baa,0xba3bc926,0xacc4a572,0x387d1858,0xe51f171e,0x9478eff6,
+ 0x931e1c00,0xf357077d,0xe54c8ca8,0xffee77cd,0x551dc9a4,0xfb4892ff } },
+ /* 247 */
+ { { 0x2db8dff8,0x5b1bdad0,0x5a2285a2,0xd462f4fd,0xda00b461,0x1d6aad8e,
+ 0x41306d1b,0x43fbefcf,0x6a13fe19,0x428e86f3,0x17f89404,0xc8b2f118 },
+ { 0xf0d51afb,0x762528aa,0x549b1d06,0xa3e2fea4,0xea3ddf66,0x86fad8f2,
+ 0x4fbdd206,0x0d9ccc4b,0xc189ff5a,0xcde97d4c,0x199f19a6,0xc36793d6 } },
+ /* 248 */
+ { { 0x51b85197,0xea38909b,0xb4c92895,0xffb17dd0,0x1ddb3f3f,0x0eb0878b,
+ 0xc57cf0f2,0xb05d28ff,0x1abd57e2,0xd8bde2e7,0xc40c1b20,0x7f2be28d },
+ { 0x299a2d48,0x6554dca2,0x8377982d,0x5130ba2e,0x1071971a,0x8863205f,
+ 0x7cf2825d,0x15ee6282,0x03748f2b,0xd4b6c57f,0x430385a0,0xa9e3f4da } },
+ /* 249 */
+ { { 0x83fbc9c6,0x33eb7cec,0x4541777e,0x24a311c7,0x4f0767fc,0xc81377f7,
+ 0x4ab702da,0x12adae36,0x2a779696,0xb7fcb6db,0x01cea6ad,0x4a6fb284 },
+ { 0xcdfc73de,0x5e8b1d2a,0x1b02fd32,0xd0efae8d,0xd81d8519,0x3f99c190,
+ 0xfc808971,0x3c18f7fa,0x51b7ae7b,0x41f713e7,0xf07fc3f8,0x0a4b3435 } },
+ /* 250 */
+ { { 0x019b7d2e,0x7dda3c4c,0xd4dc4b89,0x631c8d1a,0x1cdb313c,0x5489cd6e,
+ 0x4c07bb06,0xd44aed10,0x75f000d1,0x8f97e13a,0xdda5df4d,0x0e9ee64f },
+ { 0x3e346910,0xeaa99f3b,0xfa294ad7,0x622f6921,0x0d0b2fe9,0x22aaa20d,
+ 0x1e5881ba,0x4fed2f99,0xc1571802,0x9af3b2d6,0xdc7ee17c,0x919e67a8 } },
+ /* 251 */
+ { { 0x76250533,0xc724fe4c,0x7d817ef8,0x8a2080e5,0x172c9751,0xa2afb0f4,
+ 0x17c0702e,0x9b10cdeb,0xc9b7e3e9,0xbf3975e3,0x1cd0cdc5,0x206117df },
+ { 0xbe05ebd5,0xfb049e61,0x16c782c0,0xeb0bb55c,0xab7fed09,0x13a331b8,
+ 0x632863f0,0xf6c58b1d,0x4d3b6195,0x6264ef6e,0x9a53f116,0x92c51b63 } },
+ /* 252 */
+ { { 0x288b364d,0xa57c7bc8,0x7b41e5c4,0x4a562e08,0x698a9a11,0x699d21c6,
+ 0xf3f849b9,0xa4ed9581,0x9eb726ba,0xa223eef3,0xcc2884f9,0x13159c23 },
+ { 0x3a3f4963,0x73931e58,0x0ada6a81,0x96500389,0x5ab2950b,0x3ee8a1c6,
+ 0x775fab52,0xeedf4949,0x4f2671b6,0x63d652e1,0x3c4e2f55,0xfed4491c } },
+ /* 253 */
+ { { 0xf4eb453e,0x335eadc3,0xcadd1a5b,0x5ff74b63,0x5d84a91a,0x6933d0d7,
+ 0xb49ba337,0x9ca3eeb9,0xc04c15b8,0x1f6facce,0xdc09a7e4,0x4ef19326 },
+ { 0x3dca3233,0x53d2d324,0xa2259d4b,0x0ee40590,0x5546f002,0x18c22edb,
+ 0x09ea6b71,0x92429801,0xb0e91e61,0xaada0add,0x99963c50,0x5fe53ef4 } },
+ /* 254 */
+ { { 0x90c28c65,0x372dd06b,0x119ce47d,0x1765242c,0x6b22fc82,0xc041fb80,
+ 0xb0a7ccc1,0x667edf07,0x1261bece,0xc79599e7,0x19cff22a,0xbc69d9ba },
+ { 0x13c06819,0x009d77cd,0xe282b79d,0x635a66ae,0x225b1be8,0x4edac4a6,
+ 0x524008f9,0x57d4f4e4,0xb056af84,0xee299ac5,0x3a0bc386,0xcc38444c } },
+ /* 255 */
+ { { 0xcd4c2356,0x490643b1,0x750547be,0x740a4851,0xd4944c04,0x643eaf29,
+ 0x299a98a0,0xba572479,0xee05fdf9,0x48b29f16,0x089b2d7b,0x33fb4f61 },
+ { 0xa950f955,0x86704902,0xfedc3ddf,0x97e1034d,0x05fbb6a2,0x211320b6,
+ 0x432299bb,0x23d7b93f,0x8590e4a3,0x1fe1a057,0xf58c0ce6,0x8e1d0586 } },
+};
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table,
+ k, map, heap);
+}
+
+#endif
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[12];
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_384_point_new_12(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 12, km);
+
+ err = sp_384_ecc_mulmod_base_12(point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_12(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(point, 0, heap);
+
+ return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_384_iszero_12(const sp_digit* a)
+{
+ return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] |
+ a[8] | a[9] | a[10] | a[11]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+/* Add 1 to a. (a = a + 1)
+ *
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_384_add_one_12(sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r2, #1\n\t"
+ "ldr r1, [%[a], #0]\n\t"
+ "add r1, r2\n\t"
+ "mov r2, #0\n\t"
+ "str r1, [%[a], #0]\n\t"
+ "ldr r1, [%[a], #4]\n\t"
+ "adc r1, r2\n\t"
+ "str r1, [%[a], #4]\n\t"
+ "ldr r1, [%[a], #8]\n\t"
+ "adc r1, r2\n\t"
+ "str r1, [%[a], #8]\n\t"
+ "ldr r1, [%[a], #12]\n\t"
+ "adc r1, r2\n\t"
+ "str r1, [%[a], #12]\n\t"
+ "ldr r1, [%[a], #16]\n\t"
+ "adc r1, r2\n\t"
+ "str r1, [%[a], #16]\n\t"
+ "ldr r1, [%[a], #20]\n\t"
+ "adc r1, r2\n\t"
+ "str r1, [%[a], #20]\n\t"
+ "ldr r1, [%[a], #24]\n\t"
+ "adc r1, r2\n\t"
+ "str r1, [%[a], #24]\n\t"
+ "ldr r1, [%[a], #28]\n\t"
+ "adc r1, r2\n\t"
+ "str r1, [%[a], #28]\n\t"
+ "ldr r1, [%[a], #32]\n\t"
+ "adc r1, r2\n\t"
+ "str r1, [%[a], #32]\n\t"
+ "ldr r1, [%[a], #36]\n\t"
+ "adc r1, r2\n\t"
+ "str r1, [%[a], #36]\n\t"
+ "ldr r1, [%[a], #40]\n\t"
+ "adc r1, r2\n\t"
+ "str r1, [%[a], #40]\n\t"
+ "ldr r1, [%[a], #44]\n\t"
+ "adc r1, r2\n\t"
+ "str r1, [%[a], #44]\n\t"
+ :
+ : [a] "r" (a)
+ : "memory", "r1", "r2"
+ );
+}
+
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 24U) {
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng Random number generator.
+ * k Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_384_ecc_gen_k_12(WC_RNG* rng, sp_digit* k)
+{
+ int err;
+ byte buf[48];
+
+ do {
+ err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+ if (err == 0) {
+ sp_384_from_bin(k, 12, buf, (int)sizeof(buf));
+ if (sp_384_cmp_12(k, p384_order2) < 0) {
+ sp_384_add_one_12(k);
+ break;
+ }
+ }
+ }
+ while (err == 0);
+
+ return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng Random number generator.
+ * priv Generated private value.
+ * pub Generated public point.
+ * heap Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[12];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_384 inf;
+#endif
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_384* infinity;
+#endif
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_12(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, inf, infinity);
+ }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_gen_k_12(rng, k);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_base_12(point, k, 1, NULL);
+ }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_12(infinity, point, p384_order, 1, NULL);
+ }
+ if (err == MP_OKAY) {
+ if ((sp_384_iszero_12(point->x) == 0) || (sp_384_iszero_12(point->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(k, priv);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_12(point, pub);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_384_point_free_12(infinity, 1, heap);
+#endif
+ sp_384_point_free_12(point, 1, heap);
+
+ return err;
+}
+
+#ifdef HAVE_ECC_DHE
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 48
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_384_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ j = 384 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<12 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 32) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 32);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv Scalar to multiply the point by.
+ * pub Point to multiply.
+ * out Buffer to hold X ordinate.
+ * outLen On entry, size of the buffer in bytes.
+ * On exit, length of data in buffer in bytes.
+ * heap Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out,
+ word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[12];
+#endif
+ sp_point_384* point = NULL;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ if (*outLen < 48U) {
+ err = BUFFER_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, p, point);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 12, priv);
+ sp_384_point_from_ecc_point_12(point, pub);
+ err = sp_384_ecc_mulmod_12(point, point, k, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ sp_384_to_bin(point->x, out);
+ *outLen = 48;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(point, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+ __asm__ __volatile__ (
+ "mov r7, %[a]\n\t"
+ "add r7, #48\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a]]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "sbc %[c], %[c]\n\t"
+ "add %[a], #8\n\t"
+ "add %[b], #8\n\t"
+ "cmp %[a], r7\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r7"
+ );
+
+ return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b], #0]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sub r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #0]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "ldr r5, [%[b], #8]\n\t"
+ "ldr r6, [%[b], #12]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #8]\n\t"
+ "str r4, [%[a], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "ldr r5, [%[b], #16]\n\t"
+ "ldr r6, [%[b], #20]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #16]\n\t"
+ "str r4, [%[a], #20]\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "ldr r5, [%[b], #24]\n\t"
+ "ldr r6, [%[b], #28]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #24]\n\t"
+ "str r4, [%[a], #28]\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "ldr r5, [%[b], #32]\n\t"
+ "ldr r6, [%[b], #36]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #32]\n\t"
+ "str r4, [%[a], #36]\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "ldr r5, [%[b], #40]\n\t"
+ "ldr r6, [%[b], #44]\n\t"
+ "sbc r3, r5\n\t"
+ "sbc r4, r6\n\t"
+ "str r3, [%[a], #40]\n\t"
+ "str r4, [%[a], #44]\n\t"
+ "sbc %[c], %[c]\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+SP_NOINLINE static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+ __asm__ __volatile__ (
+ "mov r6, #48\n\t"
+ "add r6, %[a]\n\t"
+ "mov r8, %[r]\n\t"
+ "mov r9, r6\n\t"
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "# A[] * B\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsl r6, r6, #16\n\t"
+ "lsl r7, %[b], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r7, r6\n\t"
+ "add r3, r7\n\t"
+ "adc r4, %[r]\n\t"
+ "adc r5, %[r]\n\t"
+ "lsr r7, %[b], #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "ldr r6, [%[a]]\n\t"
+ "lsr r6, r6, #16\n\t"
+ "lsr r7, %[b], #16\n\t"
+ "mul r7, r6\n\t"
+ "add r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "lsl r7, %[b], #16\n\t"
+ "lsr r7, r7, #16\n\t"
+ "mul r6, r7\n\t"
+ "lsr r7, r6, #16\n\t"
+ "lsl r6, r6, #16\n\t"
+ "add r3, r6\n\t"
+ "adc r4, r7\n\t"
+ "adc r5, %[r]\n\t"
+ "# A[] * B - Done\n\t"
+ "mov %[r], r8\n\t"
+ "str r3, [%[r]]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add %[r], #4\n\t"
+ "add %[a], #4\n\t"
+ "mov r8, %[r]\n\t"
+ "cmp %[a], r9\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r]]\n\t"
+ : [r] "+r" (r), [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+ );
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r5, %[div], #1\n\t"
+ "add r5, #1\n\t"
+ "mov r8, %[d0]\n\t"
+ "mov r9, %[d1]\n\t"
+ "# Do top 32\n\t"
+ "mov r6, r5\n\t"
+ "sub r6, %[d1]\n\t"
+ "sbc r6, r6\n\t"
+ "add %[r], %[r]\n\t"
+ "sub %[r], r6\n\t"
+ "and r6, r5\n\t"
+ "sub %[d1], r6\n\t"
+ "# Next 30 bits\n\t"
+ "mov r4, #29\n\t"
+ "1:\n\t"
+ "lsl %[d0], %[d0], #1\n\t"
+ "adc %[d1], %[d1]\n\t"
+ "mov r6, r5\n\t"
+ "sub r6, %[d1]\n\t"
+ "sbc r6, r6\n\t"
+ "add %[r], %[r]\n\t"
+ "sub %[r], r6\n\t"
+ "and r6, r5\n\t"
+ "sub %[d1], r6\n\t"
+ "sub r4, #1\n\t"
+ "bpl 1b\n\t"
+ "mov r7, #0\n\t"
+ "add %[r], %[r]\n\t"
+ "add %[r], #1\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "sub %[d1], r4\n\t"
+ "mov r4, %[d1]\n\t"
+ "mov %[d1], r9\n\t"
+ "sbc %[d1], r5\n\t"
+ "mov r5, %[d1]\n\t"
+ "add %[r], r5\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "mov r6, r9\n\t"
+ "sub r4, %[d1], r4\n\t"
+ "sbc r6, r5\n\t"
+ "mov r5, r6\n\t"
+ "add %[r], r5\n\t"
+ "# r * div - Start\n\t"
+ "lsl %[d1], %[r], #16\n\t"
+ "lsl r4, %[div], #16\n\t"
+ "lsr %[d1], %[d1], #16\n\t"
+ "lsr r4, r4, #16\n\t"
+ "mul r4, %[d1]\n\t"
+ "lsr r6, %[div], #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r5, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r7\n\t"
+ "lsr %[d1], %[r], #16\n\t"
+ "mul r6, %[d1]\n\t"
+ "add r5, r6\n\t"
+ "lsl r6, %[div], #16\n\t"
+ "lsr r6, r6, #16\n\t"
+ "mul %[d1], r6\n\t"
+ "lsr r6, %[d1], #16\n\t"
+ "lsl %[d1], %[d1], #16\n\t"
+ "add r4, %[d1]\n\t"
+ "adc r5, r6\n\t"
+ "# r * div - Done\n\t"
+ "mov %[d1], r8\n\t"
+ "mov r6, r9\n\t"
+ "sub r4, %[d1], r4\n\t"
+ "sbc r6, r5\n\t"
+ "mov r5, r6\n\t"
+ "add %[r], r5\n\t"
+ "mov r6, %[div]\n\t"
+ "sub r6, r4\n\t"
+ "sbc r6, r6\n\t"
+ "sub %[r], r6\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r7", "r6", "r8", "r9"
+ );
+ return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_384_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<12; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ r[0] = a[0] & m;
+ r[1] = a[1] & m;
+ r[2] = a[2] & m;
+ r[3] = a[3] & m;
+ r[4] = a[4] & m;
+ r[5] = a[5] & m;
+ r[6] = a[6] & m;
+ r[7] = a[7] & m;
+ r[8] = a[8] & m;
+ r[9] = a[9] & m;
+ r[10] = a[10] & m;
+ r[11] = a[11] & m;
+#endif
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[24], t2[13];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[11];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 12);
+ for (i=11; i>=0; i--) {
+ r1 = div_384_word_12(t1[12 + i], t1[12 + i - 1], div);
+
+ sp_384_mul_d_12(t2, d, r1);
+ t1[12 + i] += sp_384_sub_in_place_12(&t1[i], t2);
+ t1[12 + i] -= t2[12];
+ sp_384_mask_12(t2, d, t1[12 + i]);
+ t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2);
+ sp_384_mask_12(t2, d, t1[12 + i]);
+ t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_384_cmp_12(t1, d) >= 0;
+ sp_384_cond_sub_12(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_384_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_384_div_12(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P384 curve. */
+static const uint32_t p384_order_minus_2[12] = {
+ 0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U,
+ 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
+};
+#else
+/* The low half of the order-2 of the P384 curve. */
+static const uint32_t p384_order_low[6] = {
+ 0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U
+
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P384 curve. (r = a * b mod order)
+ *
+ * r Result of the multiplication.
+ * a First operand of the multiplication.
+ * b Second operand of the multiplication.
+ */
+static void sp_384_mont_mul_order_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_384_mul_12(r, a, b);
+ sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order);
+}
+
+/* Square number mod the order of P384 curve. (r = a * a mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_384_mont_sqr_order_12(sp_digit* r, const sp_digit* a)
+{
+ sp_384_sqr_12(r, a);
+ sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P384 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_384_mont_sqr_n_order_12(sp_digit* r, const sp_digit* a, int n)
+{
+ int i;
+
+ sp_384_mont_sqr_order_12(r, a);
+ for (i=1; i<n; i++) {
+ sp_384_mont_sqr_order_12(r, r);
+ }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P384 curve.
+ * (r = 1 / a mod order)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_384_mont_inv_order_12(sp_digit* r, const sp_digit* a,
+ sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 12);
+ for (i=382; i>=0; i--) {
+ sp_384_mont_sqr_order_12(t, t);
+ if ((p384_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_384_mont_mul_order_12(t, t, a);
+ }
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 12U);
+#else
+ sp_digit* t = td;
+ sp_digit* t2 = td + 2 * 12;
+ sp_digit* t3 = td + 4 * 12;
+ int i;
+
+ /* t = a^2 */
+ sp_384_mont_sqr_order_12(t, a);
+ /* t = a^3 = t * a */
+ sp_384_mont_mul_order_12(t, t, a);
+ /* t2= a^c = t ^ 2 ^ 2 */
+ sp_384_mont_sqr_n_order_12(t2, t, 2);
+ /* t = a^f = t2 * t */
+ sp_384_mont_mul_order_12(t, t2, t);
+ /* t2= a^f0 = t ^ 2 ^ 4 */
+ sp_384_mont_sqr_n_order_12(t2, t, 4);
+ /* t = a^ff = t2 * t */
+ sp_384_mont_mul_order_12(t, t2, t);
+ /* t2= a^ff00 = t ^ 2 ^ 8 */
+ sp_384_mont_sqr_n_order_12(t2, t, 8);
+ /* t3= a^ffff = t2 * t */
+ sp_384_mont_mul_order_12(t3, t2, t);
+ /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */
+ sp_384_mont_sqr_n_order_12(t2, t3, 16);
+ /* t = a^ffffffff = t2 * t3 */
+ sp_384_mont_mul_order_12(t, t2, t3);
+ /* t2= a^ffffffff0000 = t ^ 2 ^ 16 */
+ sp_384_mont_sqr_n_order_12(t2, t, 16);
+ /* t = a^ffffffffffff = t2 * t3 */
+ sp_384_mont_mul_order_12(t, t2, t3);
+ /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48 */
+ sp_384_mont_sqr_n_order_12(t2, t, 48);
+ /* t= a^fffffffffffffffffffffffff = t2 * t */
+ sp_384_mont_mul_order_12(t, t2, t);
+ /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */
+ sp_384_mont_sqr_n_order_12(t2, t, 96);
+ /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */
+ sp_384_mont_mul_order_12(t2, t2, t);
+ for (i=191; i>=1; i--) {
+ sp_384_mont_sqr_order_12(t2, t2);
+ if (((sp_digit)p384_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_384_mont_mul_order_12(t2, t2, a);
+ }
+ }
+ sp_384_mont_sqr_order_12(t2, t2);
+ sp_384_mont_mul_order_12(r, t2, a);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN 64
+#endif
+
+/* Sign the hash using the private key.
+ * e = [hash, 384 bits] from binary
+ * r = (k.G)->x mod order
+ * s = (r * x + e) / k mod order
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+ mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit ed[2*12];
+ sp_digit xd[2*12];
+ sp_digit kd[2*12];
+ sp_digit rd[2*12];
+ sp_digit td[3 * 2*12];
+ sp_point_384 p;
+#endif
+ sp_digit* e = NULL;
+ sp_digit* x = NULL;
+ sp_digit* k = NULL;
+ sp_digit* r = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_384* point = NULL;
+ sp_digit carry;
+ sp_digit* s = NULL;
+ sp_digit* kInv = NULL;
+ int err = MP_OKAY;
+ int32_t c;
+ int i;
+
+ (void)heap;
+
+ err = sp_384_point_new_12(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ e = d + 0 * 12;
+ x = d + 2 * 12;
+ k = d + 4 * 12;
+ r = d + 6 * 12;
+ tmp = d + 8 * 12;
+#else
+ e = ed;
+ x = xd;
+ k = kd;
+ r = rd;
+ tmp = td;
+#endif
+ s = e;
+ kInv = k;
+
+ if (hashLen > 48U) {
+ hashLen = 48U;
+ }
+
+ sp_384_from_bin(e, 12, hash, (int)hashLen);
+ }
+
+ for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+ sp_384_from_mp(x, 12, priv);
+
+ /* New random point. */
+ if (km == NULL || mp_iszero(km)) {
+ err = sp_384_ecc_gen_k_12(rng, k);
+ }
+ else {
+ sp_384_from_mp(k, 12, km);
+ mp_zero(km);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_base_12(point, k, 1, NULL);
+ }
+
+ if (err == MP_OKAY) {
+ /* r = point->x mod order */
+ XMEMCPY(r, point->x, sizeof(sp_digit) * 12U);
+ sp_384_norm_12(r);
+ c = sp_384_cmp_12(r, p384_order);
+ sp_384_cond_sub_12(r, r, p384_order, 0L - (sp_digit)(c >= 0));
+ sp_384_norm_12(r);
+
+ /* Conv k to Montgomery form (mod order) */
+ sp_384_mul_12(k, k, p384_norm_order);
+ err = sp_384_mod_12(k, k, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_12(k);
+ /* kInv = 1/k mod order */
+ sp_384_mont_inv_order_12(kInv, k, tmp);
+ sp_384_norm_12(kInv);
+
+ /* s = r * x + e */
+ sp_384_mul_12(x, x, r);
+ err = sp_384_mod_12(x, x, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_12(x);
+ carry = sp_384_add_12(s, e, x);
+ sp_384_cond_sub_12(s, s, p384_order, 0 - carry);
+ sp_384_norm_12(s);
+ c = sp_384_cmp_12(s, p384_order);
+ sp_384_cond_sub_12(s, s, p384_order, 0L - (sp_digit)(c >= 0));
+ sp_384_norm_12(s);
+
+ /* s = s * k^-1 mod order */
+ sp_384_mont_mul_order_12(s, s, kInv);
+ sp_384_norm_12(s);
+
+ /* Check that signature is usable. */
+ if (sp_384_iszero_12(s) == 0) {
+ break;
+ }
+ }
+ }
+
+ if (i == 0) {
+ err = RNG_FAILURE_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(r, rm);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(s, sm);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 8 * 12);
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 2U * 12U);
+ XMEMSET(x, 0, sizeof(sp_digit) * 2U * 12U);
+ XMEMSET(k, 0, sizeof(sp_digit) * 2U * 12U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U);
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 12U);
+#endif
+ sp_384_point_free_12(point, 1, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ * e = Truncate(hash, 384)
+ * u1 = e/s mod order
+ * u2 = r/s mod order
+ * r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX,
+ mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit u1d[2*12];
+ sp_digit u2d[2*12];
+ sp_digit sd[2*12];
+ sp_digit tmpd[2*12 * 5];
+ sp_point_384 p1d;
+ sp_point_384 p2d;
+#endif
+ sp_digit* u1 = NULL;
+ sp_digit* u2 = NULL;
+ sp_digit* s = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_384* p1;
+ sp_point_384* p2 = NULL;
+ sp_digit carry;
+ int32_t c;
+ int err;
+
+ err = sp_384_point_new_12(heap, p1d, p1);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, p2d, p2);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ u1 = d + 0 * 12;
+ u2 = d + 2 * 12;
+ s = d + 4 * 12;
+ tmp = d + 6 * 12;
+#else
+ u1 = u1d;
+ u2 = u2d;
+ s = sd;
+ tmp = tmpd;
+#endif
+
+ if (hashLen > 48U) {
+ hashLen = 48U;
+ }
+
+ sp_384_from_bin(u1, 12, hash, (int)hashLen);
+ sp_384_from_mp(u2, 12, r);
+ sp_384_from_mp(s, 12, sm);
+ sp_384_from_mp(p2->x, 12, pX);
+ sp_384_from_mp(p2->y, 12, pY);
+ sp_384_from_mp(p2->z, 12, pZ);
+
+ {
+ sp_384_mul_12(s, s, p384_norm_order);
+ }
+ err = sp_384_mod_12(s, s, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_12(s);
+ {
+ sp_384_mont_inv_order_12(s, s, tmp);
+ sp_384_mont_mul_order_12(u1, u1, s);
+ sp_384_mont_mul_order_12(u2, u2, s);
+ }
+
+ err = sp_384_ecc_mulmod_base_12(p1, u1, 0, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_12(p2, p2, u2, 0, heap);
+ }
+
+ if (err == MP_OKAY) {
+ {
+ sp_384_proj_point_add_12(p1, p1, p2, tmp);
+ if (sp_384_iszero_12(p1->z)) {
+ if (sp_384_iszero_12(p1->x) && sp_384_iszero_12(p1->y)) {
+ sp_384_proj_point_dbl_12(p1, p2, tmp);
+ }
+ else {
+ /* Y ordinate is not used from here - don't set. */
+ p1->x[0] = 0;
+ p1->x[1] = 0;
+ p1->x[2] = 0;
+ p1->x[3] = 0;
+ p1->x[4] = 0;
+ p1->x[5] = 0;
+ p1->x[6] = 0;
+ p1->x[7] = 0;
+ p1->x[8] = 0;
+ p1->x[9] = 0;
+ p1->x[10] = 0;
+ p1->x[11] = 0;
+ XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod));
+ }
+ }
+ }
+
+ /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+ /* Reload r and convert to Montgomery form. */
+ sp_384_from_mp(u2, 12, r);
+ err = sp_384_mod_mul_norm_12(u2, u2, p384_mod);
+ }
+
+ if (err == MP_OKAY) {
+ /* u1 = r.z'.z' mod prime */
+ sp_384_mont_sqr_12(p1->z, p1->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(u1, u2, p1->z, p384_mod, p384_mp_mod);
+ *res = (int)(sp_384_cmp_12(p1->x, u1) == 0);
+ if (*res == 0) {
+ /* Reload r and add order. */
+ sp_384_from_mp(u2, 12, r);
+ carry = sp_384_add_12(u2, u2, p384_order);
+ /* Carry means result is greater than mod and is not valid. */
+ if (carry == 0) {
+ sp_384_norm_12(u2);
+
+ /* Compare with mod and if greater or equal then not valid. */
+ c = sp_384_cmp_12(u2, p384_mod);
+ if (c < 0) {
+ /* Convert to Montogomery form */
+ err = sp_384_mod_mul_norm_12(u2, u2, p384_mod);
+ if (err == MP_OKAY) {
+ /* u1 = (r + 1*order).z'.z' mod prime */
+ sp_384_mont_mul_12(u1, u2, p1->z, p384_mod,
+ p384_mp_mod);
+ *res = (int)(sp_384_cmp_12(p1->x, u1) == 0);
+ }
+ }
+ }
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL)
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_384_point_free_12(p1, 0, heap);
+ sp_384_point_free_12(p2, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point EC point.
+ * heap Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_384_ecc_is_point_12(sp_point_384* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit t1d[2*12];
+ sp_digit t2d[2*12];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12 * 4, heap, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 12;
+ t2 = d + 2 * 12;
+#else
+ (void)heap;
+
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ sp_384_sqr_12(t1, point->y);
+ (void)sp_384_mod_12(t1, t1, p384_mod);
+ sp_384_sqr_12(t2, point->x);
+ (void)sp_384_mod_12(t2, t2, p384_mod);
+ sp_384_mul_12(t2, t2, point->x);
+ (void)sp_384_mod_12(t2, t2, p384_mod);
+ (void)sp_384_sub_12(t2, p384_mod, t2);
+ sp_384_mont_add_12(t1, t1, t2, p384_mod);
+
+ sp_384_mont_add_12(t1, t1, point->x, p384_mod);
+ sp_384_mont_add_12(t1, t1, point->x, p384_mod);
+ sp_384_mont_add_12(t1, t1, point->x, p384_mod);
+
+ if (sp_384_cmp_12(t1, p384_b) != 0) {
+ err = MP_VAL;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_384(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 pubd;
+#endif
+ sp_point_384* pub;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_384_point_new_12(NULL, pubd, pub);
+ if (err == MP_OKAY) {
+ sp_384_from_mp(pub->x, 12, pX);
+ sp_384_from_mp(pub->y, 12, pY);
+ sp_384_from_bin(pub->z, 12, one, (int)sizeof(one));
+
+ err = sp_384_ecc_is_point_12(pub, NULL);
+ }
+
+ sp_384_point_free_12(pub, 0, NULL);
+
+ return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * privm Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit privd[12];
+ sp_point_384 pubd;
+ sp_point_384 pd;
+#endif
+ sp_digit* priv = NULL;
+ sp_point_384* pub;
+ sp_point_384* p = NULL;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_384_point_new_12(heap, pubd, pub);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (priv == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ priv = privd;
+#endif
+
+ sp_384_from_mp(pub->x, 12, pX);
+ sp_384_from_mp(pub->y, 12, pY);
+ sp_384_from_bin(pub->z, 12, one, (int)sizeof(one));
+ sp_384_from_mp(priv, 12, privm);
+
+ /* Check point at infinitiy. */
+ if ((sp_384_iszero_12(pub->x) != 0) &&
+ (sp_384_iszero_12(pub->y) != 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check range of X and Y */
+ if (sp_384_cmp_12(pub->x, p384_mod) >= 0 ||
+ sp_384_cmp_12(pub->y, p384_mod) >= 0) {
+ err = ECC_OUT_OF_RANGE_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check point is on curve */
+ err = sp_384_ecc_is_point_12(pub, heap);
+ }
+
+ if (err == MP_OKAY) {
+ /* Point * order = infinity */
+ err = sp_384_ecc_mulmod_12(p, pub, p384_order, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is infinity */
+ if ((sp_384_iszero_12(p->x) == 0) ||
+ (sp_384_iszero_12(p->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Base * private = point */
+ err = sp_384_ecc_mulmod_base_12(p, priv, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is public key */
+ if (sp_384_cmp_12(p->x, pub->x) != 0 ||
+ sp_384_cmp_12(p->y, pub->y) != 0) {
+ err = ECC_PRIV_KEY_E;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (priv != NULL) {
+ XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(p, 0, heap);
+ sp_384_point_free_12(pub, 0, heap);
+
+ return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX First EC point's X ordinate.
+ * pY First EC point's Y ordinate.
+ * pZ First EC point's Z ordinate.
+ * qX Second EC point's X ordinate.
+ * qY Second EC point's Y ordinate.
+ * qZ Second EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* qX, mp_int* qY, mp_int* qZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 12 * 5];
+ sp_point_384 pd;
+ sp_point_384 qd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ sp_point_384* q = NULL;
+ int err;
+
+ err = sp_384_point_new_12(NULL, pd, p);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(NULL, qd, q);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 5, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 12, pX);
+ sp_384_from_mp(p->y, 12, pY);
+ sp_384_from_mp(p->z, 12, pZ);
+ sp_384_from_mp(q->x, 12, qX);
+ sp_384_from_mp(q->y, 12, qY);
+ sp_384_from_mp(q->z, 12, qZ);
+
+ sp_384_proj_point_add_12(p, p, q, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(q, 0, NULL);
+ sp_384_point_free_12(p, 0, NULL);
+
+ return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 12 * 2];
+ sp_point_384 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ int err;
+
+ err = sp_384_point_new_12(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 2, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 12, pX);
+ sp_384_from_mp(p->y, 12, pY);
+ sp_384_from_mp(p->z, 12, pZ);
+
+ sp_384_proj_point_dbl_12(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(p, 0, NULL);
+
+ return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 12 * 6];
+ sp_point_384 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ int err;
+
+ err = sp_384_point_new_12(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 12, pX);
+ sp_384_from_mp(p->y, 12, pY);
+ sp_384_from_mp(p->z, 12, pZ);
+
+ sp_384_map_12(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, pX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, pY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, pZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(p, 0, NULL);
+
+ return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mont_sqrt_12(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit t1d[2 * 12];
+ sp_digit t2d[2 * 12];
+ sp_digit t3d[2 * 12];
+ sp_digit t4d[2 * 12];
+ sp_digit t5d[2 * 12];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ sp_digit* t3;
+ sp_digit* t4;
+ sp_digit* t5;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 12, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 12;
+ t2 = d + 2 * 12;
+ t3 = d + 4 * 12;
+ t4 = d + 6 * 12;
+ t5 = d + 8 * 12;
+#else
+ t1 = t1d;
+ t2 = t2d;
+ t3 = t3d;
+ t4 = t4d;
+ t5 = t5d;
+#endif
+
+ {
+ /* t2 = y ^ 0x2 */
+ sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3 */
+ sp_384_mont_mul_12(t1, t2, y, p384_mod, p384_mp_mod);
+ /* t5 = y ^ 0xc */
+ sp_384_mont_sqr_n_12(t5, t1, 2, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xf */
+ sp_384_mont_mul_12(t1, t1, t5, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x1e */
+ sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
+ /* t3 = y ^ 0x1f */
+ sp_384_mont_mul_12(t3, t2, y, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3e0 */
+ sp_384_mont_sqr_n_12(t2, t3, 5, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3ff */
+ sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x7fe0 */
+ sp_384_mont_sqr_n_12(t2, t1, 5, p384_mod, p384_mp_mod);
+ /* t3 = y ^ 0x7fff */
+ sp_384_mont_mul_12(t3, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fff800 */
+ sp_384_mont_sqr_n_12(t2, t3, 15, p384_mod, p384_mp_mod);
+ /* t4 = y ^ 0x3ffffff */
+ sp_384_mont_mul_12(t4, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xffffffc000000 */
+ sp_384_mont_sqr_n_12(t2, t4, 30, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xfffffffffffff */
+ sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xfffffffffffffff000000000000000 */
+ sp_384_mont_sqr_n_12(t2, t1, 60, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+ sp_384_mont_sqr_n_12(t2, t1, 120, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+ sp_384_mont_sqr_n_12(t2, t1, 15, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */
+ sp_384_mont_sqr_n_12(t2, t1, 31, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */
+ sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */
+ sp_384_mont_sqr_n_12(t2, t1, 4, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */
+ sp_384_mont_mul_12(t1, t5, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */
+ sp_384_mont_sqr_n_12(t2, t1, 62, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */
+ sp_384_mont_mul_12(t1, y, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */
+ sp_384_mont_sqr_n_12(y, t1, 30, p384_mod, p384_mp_mod);
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm X ordinate.
+ * odd Whether the Y ordinate is odd.
+ * ym Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit xd[2 * 12];
+ sp_digit yd[2 * 12];
+#endif
+ sp_digit* x = NULL;
+ sp_digit* y = NULL;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 12, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ x = d + 0 * 12;
+ y = d + 2 * 12;
+#else
+ x = xd;
+ y = yd;
+#endif
+
+ sp_384_from_mp(x, 12, xm);
+ err = sp_384_mod_mul_norm_12(x, x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ /* y = x^3 */
+ {
+ sp_384_mont_sqr_12(y, x, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(y, y, x, p384_mod, p384_mp_mod);
+ }
+ /* y = x^3 - 3x */
+ sp_384_mont_sub_12(y, y, x, p384_mod);
+ sp_384_mont_sub_12(y, y, x, p384_mod);
+ sp_384_mont_sub_12(y, y, x, p384_mod);
+ /* y = x^3 - 3x + b */
+ err = sp_384_mod_mul_norm_12(x, p384_b, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ sp_384_mont_add_12(y, y, x, p384_mod);
+ /* y = sqrt(x^3 - 3x + b) */
+ err = sp_384_mont_sqrt_12(y);
+ }
+ if (err == MP_OKAY) {
+ XMEMSET(y + 12, 0, 12U * sizeof(sp_digit));
+ sp_384_mont_reduce_12(y, p384_mod, p384_mp_mod);
+ if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+ sp_384_mont_sub_12(y, p384_mod, y, p384_mod);
+ }
+
+ err = sp_384_to_mp(y, ym);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+#endif
+#endif /* WOLFSSL_SP_384 */
+#endif /* WOLFSSL_HAVE_SP_ECC */
+#endif /* WOLFSSL_SP_ARM_THUMB_ASM */
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_c32.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_c32.c
new file mode 100644
index 000000000..4b9596dc7
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_c32.c
@@ -0,0 +1,23857 @@
+/* sp.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Implementation by Sean Parkinson. */
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/cpuid.h>
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \
+ defined(WOLFSSL_HAVE_SP_ECC)
+
+#ifdef RSA_LOW_MEM
+#ifndef SP_RSA_PRIVATE_EXP_D
+#define SP_RSA_PRIVATE_EXP_D
+#endif
+
+#ifndef WOLFSSL_SP_SMALL
+#define WOLFSSL_SP_SMALL
+#endif
+#endif
+
+#include <wolfssl/wolfcrypt/sp.h>
+
+#ifndef WOLFSSL_SP_ASM
+#if SP_WORD_SIZE == 32
+#if (defined(WOLFSSL_SP_CACHE_RESISTANT) || defined(WOLFSSL_SP_SMALL)) && (defined(WOLFSSL_HAVE_SP_ECC) || !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Mask for address to obfuscate which of the two address will be used. */
+static const size_t addr_mask[2] = { 0, (size_t)-1 };
+#endif
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+#ifndef WOLFSSL_SP_NO_2048
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 15U) {
+ r[j] &= 0x7fffff;
+ s = 23U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 23
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 23
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0x7fffff;
+ s = 23U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 23U) <= (word32)DIGIT_BIT) {
+ s += 23U;
+ r[j] &= 0x7fffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 23) {
+ r[j] &= 0x7fffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 23 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 256
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_2048_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ for (i=0; i<89; i++) {
+ r[i+1] += r[i] >> 23;
+ r[i] &= 0x7fffff;
+ }
+ j = 2048 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<90 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 23) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 23);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_15(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int64_t t0 = ((int64_t)a[ 0]) * b[ 0];
+ int64_t t1 = ((int64_t)a[ 0]) * b[ 1]
+ + ((int64_t)a[ 1]) * b[ 0];
+ int64_t t2 = ((int64_t)a[ 0]) * b[ 2]
+ + ((int64_t)a[ 1]) * b[ 1]
+ + ((int64_t)a[ 2]) * b[ 0];
+ int64_t t3 = ((int64_t)a[ 0]) * b[ 3]
+ + ((int64_t)a[ 1]) * b[ 2]
+ + ((int64_t)a[ 2]) * b[ 1]
+ + ((int64_t)a[ 3]) * b[ 0];
+ int64_t t4 = ((int64_t)a[ 0]) * b[ 4]
+ + ((int64_t)a[ 1]) * b[ 3]
+ + ((int64_t)a[ 2]) * b[ 2]
+ + ((int64_t)a[ 3]) * b[ 1]
+ + ((int64_t)a[ 4]) * b[ 0];
+ int64_t t5 = ((int64_t)a[ 0]) * b[ 5]
+ + ((int64_t)a[ 1]) * b[ 4]
+ + ((int64_t)a[ 2]) * b[ 3]
+ + ((int64_t)a[ 3]) * b[ 2]
+ + ((int64_t)a[ 4]) * b[ 1]
+ + ((int64_t)a[ 5]) * b[ 0];
+ int64_t t6 = ((int64_t)a[ 0]) * b[ 6]
+ + ((int64_t)a[ 1]) * b[ 5]
+ + ((int64_t)a[ 2]) * b[ 4]
+ + ((int64_t)a[ 3]) * b[ 3]
+ + ((int64_t)a[ 4]) * b[ 2]
+ + ((int64_t)a[ 5]) * b[ 1]
+ + ((int64_t)a[ 6]) * b[ 0];
+ int64_t t7 = ((int64_t)a[ 0]) * b[ 7]
+ + ((int64_t)a[ 1]) * b[ 6]
+ + ((int64_t)a[ 2]) * b[ 5]
+ + ((int64_t)a[ 3]) * b[ 4]
+ + ((int64_t)a[ 4]) * b[ 3]
+ + ((int64_t)a[ 5]) * b[ 2]
+ + ((int64_t)a[ 6]) * b[ 1]
+ + ((int64_t)a[ 7]) * b[ 0];
+ int64_t t8 = ((int64_t)a[ 0]) * b[ 8]
+ + ((int64_t)a[ 1]) * b[ 7]
+ + ((int64_t)a[ 2]) * b[ 6]
+ + ((int64_t)a[ 3]) * b[ 5]
+ + ((int64_t)a[ 4]) * b[ 4]
+ + ((int64_t)a[ 5]) * b[ 3]
+ + ((int64_t)a[ 6]) * b[ 2]
+ + ((int64_t)a[ 7]) * b[ 1]
+ + ((int64_t)a[ 8]) * b[ 0];
+ int64_t t9 = ((int64_t)a[ 0]) * b[ 9]
+ + ((int64_t)a[ 1]) * b[ 8]
+ + ((int64_t)a[ 2]) * b[ 7]
+ + ((int64_t)a[ 3]) * b[ 6]
+ + ((int64_t)a[ 4]) * b[ 5]
+ + ((int64_t)a[ 5]) * b[ 4]
+ + ((int64_t)a[ 6]) * b[ 3]
+ + ((int64_t)a[ 7]) * b[ 2]
+ + ((int64_t)a[ 8]) * b[ 1]
+ + ((int64_t)a[ 9]) * b[ 0];
+ int64_t t10 = ((int64_t)a[ 0]) * b[10]
+ + ((int64_t)a[ 1]) * b[ 9]
+ + ((int64_t)a[ 2]) * b[ 8]
+ + ((int64_t)a[ 3]) * b[ 7]
+ + ((int64_t)a[ 4]) * b[ 6]
+ + ((int64_t)a[ 5]) * b[ 5]
+ + ((int64_t)a[ 6]) * b[ 4]
+ + ((int64_t)a[ 7]) * b[ 3]
+ + ((int64_t)a[ 8]) * b[ 2]
+ + ((int64_t)a[ 9]) * b[ 1]
+ + ((int64_t)a[10]) * b[ 0];
+ int64_t t11 = ((int64_t)a[ 0]) * b[11]
+ + ((int64_t)a[ 1]) * b[10]
+ + ((int64_t)a[ 2]) * b[ 9]
+ + ((int64_t)a[ 3]) * b[ 8]
+ + ((int64_t)a[ 4]) * b[ 7]
+ + ((int64_t)a[ 5]) * b[ 6]
+ + ((int64_t)a[ 6]) * b[ 5]
+ + ((int64_t)a[ 7]) * b[ 4]
+ + ((int64_t)a[ 8]) * b[ 3]
+ + ((int64_t)a[ 9]) * b[ 2]
+ + ((int64_t)a[10]) * b[ 1]
+ + ((int64_t)a[11]) * b[ 0];
+ int64_t t12 = ((int64_t)a[ 0]) * b[12]
+ + ((int64_t)a[ 1]) * b[11]
+ + ((int64_t)a[ 2]) * b[10]
+ + ((int64_t)a[ 3]) * b[ 9]
+ + ((int64_t)a[ 4]) * b[ 8]
+ + ((int64_t)a[ 5]) * b[ 7]
+ + ((int64_t)a[ 6]) * b[ 6]
+ + ((int64_t)a[ 7]) * b[ 5]
+ + ((int64_t)a[ 8]) * b[ 4]
+ + ((int64_t)a[ 9]) * b[ 3]
+ + ((int64_t)a[10]) * b[ 2]
+ + ((int64_t)a[11]) * b[ 1]
+ + ((int64_t)a[12]) * b[ 0];
+ int64_t t13 = ((int64_t)a[ 0]) * b[13]
+ + ((int64_t)a[ 1]) * b[12]
+ + ((int64_t)a[ 2]) * b[11]
+ + ((int64_t)a[ 3]) * b[10]
+ + ((int64_t)a[ 4]) * b[ 9]
+ + ((int64_t)a[ 5]) * b[ 8]
+ + ((int64_t)a[ 6]) * b[ 7]
+ + ((int64_t)a[ 7]) * b[ 6]
+ + ((int64_t)a[ 8]) * b[ 5]
+ + ((int64_t)a[ 9]) * b[ 4]
+ + ((int64_t)a[10]) * b[ 3]
+ + ((int64_t)a[11]) * b[ 2]
+ + ((int64_t)a[12]) * b[ 1]
+ + ((int64_t)a[13]) * b[ 0];
+ int64_t t14 = ((int64_t)a[ 0]) * b[14]
+ + ((int64_t)a[ 1]) * b[13]
+ + ((int64_t)a[ 2]) * b[12]
+ + ((int64_t)a[ 3]) * b[11]
+ + ((int64_t)a[ 4]) * b[10]
+ + ((int64_t)a[ 5]) * b[ 9]
+ + ((int64_t)a[ 6]) * b[ 8]
+ + ((int64_t)a[ 7]) * b[ 7]
+ + ((int64_t)a[ 8]) * b[ 6]
+ + ((int64_t)a[ 9]) * b[ 5]
+ + ((int64_t)a[10]) * b[ 4]
+ + ((int64_t)a[11]) * b[ 3]
+ + ((int64_t)a[12]) * b[ 2]
+ + ((int64_t)a[13]) * b[ 1]
+ + ((int64_t)a[14]) * b[ 0];
+ int64_t t15 = ((int64_t)a[ 1]) * b[14]
+ + ((int64_t)a[ 2]) * b[13]
+ + ((int64_t)a[ 3]) * b[12]
+ + ((int64_t)a[ 4]) * b[11]
+ + ((int64_t)a[ 5]) * b[10]
+ + ((int64_t)a[ 6]) * b[ 9]
+ + ((int64_t)a[ 7]) * b[ 8]
+ + ((int64_t)a[ 8]) * b[ 7]
+ + ((int64_t)a[ 9]) * b[ 6]
+ + ((int64_t)a[10]) * b[ 5]
+ + ((int64_t)a[11]) * b[ 4]
+ + ((int64_t)a[12]) * b[ 3]
+ + ((int64_t)a[13]) * b[ 2]
+ + ((int64_t)a[14]) * b[ 1];
+ int64_t t16 = ((int64_t)a[ 2]) * b[14]
+ + ((int64_t)a[ 3]) * b[13]
+ + ((int64_t)a[ 4]) * b[12]
+ + ((int64_t)a[ 5]) * b[11]
+ + ((int64_t)a[ 6]) * b[10]
+ + ((int64_t)a[ 7]) * b[ 9]
+ + ((int64_t)a[ 8]) * b[ 8]
+ + ((int64_t)a[ 9]) * b[ 7]
+ + ((int64_t)a[10]) * b[ 6]
+ + ((int64_t)a[11]) * b[ 5]
+ + ((int64_t)a[12]) * b[ 4]
+ + ((int64_t)a[13]) * b[ 3]
+ + ((int64_t)a[14]) * b[ 2];
+ int64_t t17 = ((int64_t)a[ 3]) * b[14]
+ + ((int64_t)a[ 4]) * b[13]
+ + ((int64_t)a[ 5]) * b[12]
+ + ((int64_t)a[ 6]) * b[11]
+ + ((int64_t)a[ 7]) * b[10]
+ + ((int64_t)a[ 8]) * b[ 9]
+ + ((int64_t)a[ 9]) * b[ 8]
+ + ((int64_t)a[10]) * b[ 7]
+ + ((int64_t)a[11]) * b[ 6]
+ + ((int64_t)a[12]) * b[ 5]
+ + ((int64_t)a[13]) * b[ 4]
+ + ((int64_t)a[14]) * b[ 3];
+ int64_t t18 = ((int64_t)a[ 4]) * b[14]
+ + ((int64_t)a[ 5]) * b[13]
+ + ((int64_t)a[ 6]) * b[12]
+ + ((int64_t)a[ 7]) * b[11]
+ + ((int64_t)a[ 8]) * b[10]
+ + ((int64_t)a[ 9]) * b[ 9]
+ + ((int64_t)a[10]) * b[ 8]
+ + ((int64_t)a[11]) * b[ 7]
+ + ((int64_t)a[12]) * b[ 6]
+ + ((int64_t)a[13]) * b[ 5]
+ + ((int64_t)a[14]) * b[ 4];
+ int64_t t19 = ((int64_t)a[ 5]) * b[14]
+ + ((int64_t)a[ 6]) * b[13]
+ + ((int64_t)a[ 7]) * b[12]
+ + ((int64_t)a[ 8]) * b[11]
+ + ((int64_t)a[ 9]) * b[10]
+ + ((int64_t)a[10]) * b[ 9]
+ + ((int64_t)a[11]) * b[ 8]
+ + ((int64_t)a[12]) * b[ 7]
+ + ((int64_t)a[13]) * b[ 6]
+ + ((int64_t)a[14]) * b[ 5];
+ int64_t t20 = ((int64_t)a[ 6]) * b[14]
+ + ((int64_t)a[ 7]) * b[13]
+ + ((int64_t)a[ 8]) * b[12]
+ + ((int64_t)a[ 9]) * b[11]
+ + ((int64_t)a[10]) * b[10]
+ + ((int64_t)a[11]) * b[ 9]
+ + ((int64_t)a[12]) * b[ 8]
+ + ((int64_t)a[13]) * b[ 7]
+ + ((int64_t)a[14]) * b[ 6];
+ int64_t t21 = ((int64_t)a[ 7]) * b[14]
+ + ((int64_t)a[ 8]) * b[13]
+ + ((int64_t)a[ 9]) * b[12]
+ + ((int64_t)a[10]) * b[11]
+ + ((int64_t)a[11]) * b[10]
+ + ((int64_t)a[12]) * b[ 9]
+ + ((int64_t)a[13]) * b[ 8]
+ + ((int64_t)a[14]) * b[ 7];
+ int64_t t22 = ((int64_t)a[ 8]) * b[14]
+ + ((int64_t)a[ 9]) * b[13]
+ + ((int64_t)a[10]) * b[12]
+ + ((int64_t)a[11]) * b[11]
+ + ((int64_t)a[12]) * b[10]
+ + ((int64_t)a[13]) * b[ 9]
+ + ((int64_t)a[14]) * b[ 8];
+ int64_t t23 = ((int64_t)a[ 9]) * b[14]
+ + ((int64_t)a[10]) * b[13]
+ + ((int64_t)a[11]) * b[12]
+ + ((int64_t)a[12]) * b[11]
+ + ((int64_t)a[13]) * b[10]
+ + ((int64_t)a[14]) * b[ 9];
+ int64_t t24 = ((int64_t)a[10]) * b[14]
+ + ((int64_t)a[11]) * b[13]
+ + ((int64_t)a[12]) * b[12]
+ + ((int64_t)a[13]) * b[11]
+ + ((int64_t)a[14]) * b[10];
+ int64_t t25 = ((int64_t)a[11]) * b[14]
+ + ((int64_t)a[12]) * b[13]
+ + ((int64_t)a[13]) * b[12]
+ + ((int64_t)a[14]) * b[11];
+ int64_t t26 = ((int64_t)a[12]) * b[14]
+ + ((int64_t)a[13]) * b[13]
+ + ((int64_t)a[14]) * b[12];
+ int64_t t27 = ((int64_t)a[13]) * b[14]
+ + ((int64_t)a[14]) * b[13];
+ int64_t t28 = ((int64_t)a[14]) * b[14];
+
+ t1 += t0 >> 23; r[ 0] = t0 & 0x7fffff;
+ t2 += t1 >> 23; r[ 1] = t1 & 0x7fffff;
+ t3 += t2 >> 23; r[ 2] = t2 & 0x7fffff;
+ t4 += t3 >> 23; r[ 3] = t3 & 0x7fffff;
+ t5 += t4 >> 23; r[ 4] = t4 & 0x7fffff;
+ t6 += t5 >> 23; r[ 5] = t5 & 0x7fffff;
+ t7 += t6 >> 23; r[ 6] = t6 & 0x7fffff;
+ t8 += t7 >> 23; r[ 7] = t7 & 0x7fffff;
+ t9 += t8 >> 23; r[ 8] = t8 & 0x7fffff;
+ t10 += t9 >> 23; r[ 9] = t9 & 0x7fffff;
+ t11 += t10 >> 23; r[10] = t10 & 0x7fffff;
+ t12 += t11 >> 23; r[11] = t11 & 0x7fffff;
+ t13 += t12 >> 23; r[12] = t12 & 0x7fffff;
+ t14 += t13 >> 23; r[13] = t13 & 0x7fffff;
+ t15 += t14 >> 23; r[14] = t14 & 0x7fffff;
+ t16 += t15 >> 23; r[15] = t15 & 0x7fffff;
+ t17 += t16 >> 23; r[16] = t16 & 0x7fffff;
+ t18 += t17 >> 23; r[17] = t17 & 0x7fffff;
+ t19 += t18 >> 23; r[18] = t18 & 0x7fffff;
+ t20 += t19 >> 23; r[19] = t19 & 0x7fffff;
+ t21 += t20 >> 23; r[20] = t20 & 0x7fffff;
+ t22 += t21 >> 23; r[21] = t21 & 0x7fffff;
+ t23 += t22 >> 23; r[22] = t22 & 0x7fffff;
+ t24 += t23 >> 23; r[23] = t23 & 0x7fffff;
+ t25 += t24 >> 23; r[24] = t24 & 0x7fffff;
+ t26 += t25 >> 23; r[25] = t25 & 0x7fffff;
+ t27 += t26 >> 23; r[26] = t26 & 0x7fffff;
+ t28 += t27 >> 23; r[27] = t27 & 0x7fffff;
+ r[29] = (sp_digit)(t28 >> 23);
+ r[28] = t28 & 0x7fffff;
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_15(sp_digit* r, const sp_digit* a)
+{
+ int64_t t0 = ((int64_t)a[ 0]) * a[ 0];
+ int64_t t1 = (((int64_t)a[ 0]) * a[ 1]) * 2;
+ int64_t t2 = (((int64_t)a[ 0]) * a[ 2]) * 2
+ + ((int64_t)a[ 1]) * a[ 1];
+ int64_t t3 = (((int64_t)a[ 0]) * a[ 3]
+ + ((int64_t)a[ 1]) * a[ 2]) * 2;
+ int64_t t4 = (((int64_t)a[ 0]) * a[ 4]
+ + ((int64_t)a[ 1]) * a[ 3]) * 2
+ + ((int64_t)a[ 2]) * a[ 2];
+ int64_t t5 = (((int64_t)a[ 0]) * a[ 5]
+ + ((int64_t)a[ 1]) * a[ 4]
+ + ((int64_t)a[ 2]) * a[ 3]) * 2;
+ int64_t t6 = (((int64_t)a[ 0]) * a[ 6]
+ + ((int64_t)a[ 1]) * a[ 5]
+ + ((int64_t)a[ 2]) * a[ 4]) * 2
+ + ((int64_t)a[ 3]) * a[ 3];
+ int64_t t7 = (((int64_t)a[ 0]) * a[ 7]
+ + ((int64_t)a[ 1]) * a[ 6]
+ + ((int64_t)a[ 2]) * a[ 5]
+ + ((int64_t)a[ 3]) * a[ 4]) * 2;
+ int64_t t8 = (((int64_t)a[ 0]) * a[ 8]
+ + ((int64_t)a[ 1]) * a[ 7]
+ + ((int64_t)a[ 2]) * a[ 6]
+ + ((int64_t)a[ 3]) * a[ 5]) * 2
+ + ((int64_t)a[ 4]) * a[ 4];
+ int64_t t9 = (((int64_t)a[ 0]) * a[ 9]
+ + ((int64_t)a[ 1]) * a[ 8]
+ + ((int64_t)a[ 2]) * a[ 7]
+ + ((int64_t)a[ 3]) * a[ 6]
+ + ((int64_t)a[ 4]) * a[ 5]) * 2;
+ int64_t t10 = (((int64_t)a[ 0]) * a[10]
+ + ((int64_t)a[ 1]) * a[ 9]
+ + ((int64_t)a[ 2]) * a[ 8]
+ + ((int64_t)a[ 3]) * a[ 7]
+ + ((int64_t)a[ 4]) * a[ 6]) * 2
+ + ((int64_t)a[ 5]) * a[ 5];
+ int64_t t11 = (((int64_t)a[ 0]) * a[11]
+ + ((int64_t)a[ 1]) * a[10]
+ + ((int64_t)a[ 2]) * a[ 9]
+ + ((int64_t)a[ 3]) * a[ 8]
+ + ((int64_t)a[ 4]) * a[ 7]
+ + ((int64_t)a[ 5]) * a[ 6]) * 2;
+ int64_t t12 = (((int64_t)a[ 0]) * a[12]
+ + ((int64_t)a[ 1]) * a[11]
+ + ((int64_t)a[ 2]) * a[10]
+ + ((int64_t)a[ 3]) * a[ 9]
+ + ((int64_t)a[ 4]) * a[ 8]
+ + ((int64_t)a[ 5]) * a[ 7]) * 2
+ + ((int64_t)a[ 6]) * a[ 6];
+ int64_t t13 = (((int64_t)a[ 0]) * a[13]
+ + ((int64_t)a[ 1]) * a[12]
+ + ((int64_t)a[ 2]) * a[11]
+ + ((int64_t)a[ 3]) * a[10]
+ + ((int64_t)a[ 4]) * a[ 9]
+ + ((int64_t)a[ 5]) * a[ 8]
+ + ((int64_t)a[ 6]) * a[ 7]) * 2;
+ int64_t t14 = (((int64_t)a[ 0]) * a[14]
+ + ((int64_t)a[ 1]) * a[13]
+ + ((int64_t)a[ 2]) * a[12]
+ + ((int64_t)a[ 3]) * a[11]
+ + ((int64_t)a[ 4]) * a[10]
+ + ((int64_t)a[ 5]) * a[ 9]
+ + ((int64_t)a[ 6]) * a[ 8]) * 2
+ + ((int64_t)a[ 7]) * a[ 7];
+ int64_t t15 = (((int64_t)a[ 1]) * a[14]
+ + ((int64_t)a[ 2]) * a[13]
+ + ((int64_t)a[ 3]) * a[12]
+ + ((int64_t)a[ 4]) * a[11]
+ + ((int64_t)a[ 5]) * a[10]
+ + ((int64_t)a[ 6]) * a[ 9]
+ + ((int64_t)a[ 7]) * a[ 8]) * 2;
+ int64_t t16 = (((int64_t)a[ 2]) * a[14]
+ + ((int64_t)a[ 3]) * a[13]
+ + ((int64_t)a[ 4]) * a[12]
+ + ((int64_t)a[ 5]) * a[11]
+ + ((int64_t)a[ 6]) * a[10]
+ + ((int64_t)a[ 7]) * a[ 9]) * 2
+ + ((int64_t)a[ 8]) * a[ 8];
+ int64_t t17 = (((int64_t)a[ 3]) * a[14]
+ + ((int64_t)a[ 4]) * a[13]
+ + ((int64_t)a[ 5]) * a[12]
+ + ((int64_t)a[ 6]) * a[11]
+ + ((int64_t)a[ 7]) * a[10]
+ + ((int64_t)a[ 8]) * a[ 9]) * 2;
+ int64_t t18 = (((int64_t)a[ 4]) * a[14]
+ + ((int64_t)a[ 5]) * a[13]
+ + ((int64_t)a[ 6]) * a[12]
+ + ((int64_t)a[ 7]) * a[11]
+ + ((int64_t)a[ 8]) * a[10]) * 2
+ + ((int64_t)a[ 9]) * a[ 9];
+ int64_t t19 = (((int64_t)a[ 5]) * a[14]
+ + ((int64_t)a[ 6]) * a[13]
+ + ((int64_t)a[ 7]) * a[12]
+ + ((int64_t)a[ 8]) * a[11]
+ + ((int64_t)a[ 9]) * a[10]) * 2;
+ int64_t t20 = (((int64_t)a[ 6]) * a[14]
+ + ((int64_t)a[ 7]) * a[13]
+ + ((int64_t)a[ 8]) * a[12]
+ + ((int64_t)a[ 9]) * a[11]) * 2
+ + ((int64_t)a[10]) * a[10];
+ int64_t t21 = (((int64_t)a[ 7]) * a[14]
+ + ((int64_t)a[ 8]) * a[13]
+ + ((int64_t)a[ 9]) * a[12]
+ + ((int64_t)a[10]) * a[11]) * 2;
+ int64_t t22 = (((int64_t)a[ 8]) * a[14]
+ + ((int64_t)a[ 9]) * a[13]
+ + ((int64_t)a[10]) * a[12]) * 2
+ + ((int64_t)a[11]) * a[11];
+ int64_t t23 = (((int64_t)a[ 9]) * a[14]
+ + ((int64_t)a[10]) * a[13]
+ + ((int64_t)a[11]) * a[12]) * 2;
+ int64_t t24 = (((int64_t)a[10]) * a[14]
+ + ((int64_t)a[11]) * a[13]) * 2
+ + ((int64_t)a[12]) * a[12];
+ int64_t t25 = (((int64_t)a[11]) * a[14]
+ + ((int64_t)a[12]) * a[13]) * 2;
+ int64_t t26 = (((int64_t)a[12]) * a[14]) * 2
+ + ((int64_t)a[13]) * a[13];
+ int64_t t27 = (((int64_t)a[13]) * a[14]) * 2;
+ int64_t t28 = ((int64_t)a[14]) * a[14];
+
+ t1 += t0 >> 23; r[ 0] = t0 & 0x7fffff;
+ t2 += t1 >> 23; r[ 1] = t1 & 0x7fffff;
+ t3 += t2 >> 23; r[ 2] = t2 & 0x7fffff;
+ t4 += t3 >> 23; r[ 3] = t3 & 0x7fffff;
+ t5 += t4 >> 23; r[ 4] = t4 & 0x7fffff;
+ t6 += t5 >> 23; r[ 5] = t5 & 0x7fffff;
+ t7 += t6 >> 23; r[ 6] = t6 & 0x7fffff;
+ t8 += t7 >> 23; r[ 7] = t7 & 0x7fffff;
+ t9 += t8 >> 23; r[ 8] = t8 & 0x7fffff;
+ t10 += t9 >> 23; r[ 9] = t9 & 0x7fffff;
+ t11 += t10 >> 23; r[10] = t10 & 0x7fffff;
+ t12 += t11 >> 23; r[11] = t11 & 0x7fffff;
+ t13 += t12 >> 23; r[12] = t12 & 0x7fffff;
+ t14 += t13 >> 23; r[13] = t13 & 0x7fffff;
+ t15 += t14 >> 23; r[14] = t14 & 0x7fffff;
+ t16 += t15 >> 23; r[15] = t15 & 0x7fffff;
+ t17 += t16 >> 23; r[16] = t16 & 0x7fffff;
+ t18 += t17 >> 23; r[17] = t17 & 0x7fffff;
+ t19 += t18 >> 23; r[18] = t18 & 0x7fffff;
+ t20 += t19 >> 23; r[19] = t19 & 0x7fffff;
+ t21 += t20 >> 23; r[20] = t20 & 0x7fffff;
+ t22 += t21 >> 23; r[21] = t21 & 0x7fffff;
+ t23 += t22 >> 23; r[22] = t22 & 0x7fffff;
+ t24 += t23 >> 23; r[23] = t23 & 0x7fffff;
+ t25 += t24 >> 23; r[24] = t24 & 0x7fffff;
+ t26 += t25 >> 23; r[25] = t25 & 0x7fffff;
+ t27 += t26 >> 23; r[26] = t26 & 0x7fffff;
+ t28 += t27 >> 23; r[27] = t27 & 0x7fffff;
+ r[29] = (sp_digit)(t28 >> 23);
+ r[28] = t28 & 0x7fffff;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_add_15(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ r[ 0] = a[ 0] + b[ 0];
+ r[ 1] = a[ 1] + b[ 1];
+ r[ 2] = a[ 2] + b[ 2];
+ r[ 3] = a[ 3] + b[ 3];
+ r[ 4] = a[ 4] + b[ 4];
+ r[ 5] = a[ 5] + b[ 5];
+ r[ 6] = a[ 6] + b[ 6];
+ r[ 7] = a[ 7] + b[ 7];
+ r[ 8] = a[ 8] + b[ 8];
+ r[ 9] = a[ 9] + b[ 9];
+ r[10] = a[10] + b[10];
+ r[11] = a[11] + b[11];
+ r[12] = a[12] + b[12];
+ r[13] = a[13] + b[13];
+ r[14] = a[14] + b[14];
+
+ return 0;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_sub_30(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 24; i += 8) {
+ r[i + 0] = a[i + 0] - b[i + 0];
+ r[i + 1] = a[i + 1] - b[i + 1];
+ r[i + 2] = a[i + 2] - b[i + 2];
+ r[i + 3] = a[i + 3] - b[i + 3];
+ r[i + 4] = a[i + 4] - b[i + 4];
+ r[i + 5] = a[i + 5] - b[i + 5];
+ r[i + 6] = a[i + 6] - b[i + 6];
+ r[i + 7] = a[i + 7] - b[i + 7];
+ }
+ r[24] = a[24] - b[24];
+ r[25] = a[25] - b[25];
+ r[26] = a[26] - b[26];
+ r[27] = a[27] - b[27];
+ r[28] = a[28] - b[28];
+ r[29] = a[29] - b[29];
+
+ return 0;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_add_30(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 24; i += 8) {
+ r[i + 0] = a[i + 0] + b[i + 0];
+ r[i + 1] = a[i + 1] + b[i + 1];
+ r[i + 2] = a[i + 2] + b[i + 2];
+ r[i + 3] = a[i + 3] + b[i + 3];
+ r[i + 4] = a[i + 4] + b[i + 4];
+ r[i + 5] = a[i + 5] + b[i + 5];
+ r[i + 6] = a[i + 6] + b[i + 6];
+ r[i + 7] = a[i + 7] + b[i + 7];
+ }
+ r[24] = a[24] + b[24];
+ r[25] = a[25] + b[25];
+ r[26] = a[26] + b[26];
+ r[27] = a[27] + b[27];
+ r[28] = a[28] + b[28];
+ r[29] = a[29] + b[29];
+
+ return 0;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_45(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit p0[30];
+ sp_digit p1[30];
+ sp_digit p2[30];
+ sp_digit p3[30];
+ sp_digit p4[30];
+ sp_digit p5[30];
+ sp_digit t0[30];
+ sp_digit t1[30];
+ sp_digit t2[30];
+ sp_digit a0[15];
+ sp_digit a1[15];
+ sp_digit a2[15];
+ sp_digit b0[15];
+ sp_digit b1[15];
+ sp_digit b2[15];
+ (void)sp_2048_add_15(a0, a, &a[15]);
+ (void)sp_2048_add_15(b0, b, &b[15]);
+ (void)sp_2048_add_15(a1, &a[15], &a[30]);
+ (void)sp_2048_add_15(b1, &b[15], &b[30]);
+ (void)sp_2048_add_15(a2, a0, &a[30]);
+ (void)sp_2048_add_15(b2, b0, &b[30]);
+ sp_2048_mul_15(p0, a, b);
+ sp_2048_mul_15(p2, &a[15], &b[15]);
+ sp_2048_mul_15(p4, &a[30], &b[30]);
+ sp_2048_mul_15(p1, a0, b0);
+ sp_2048_mul_15(p3, a1, b1);
+ sp_2048_mul_15(p5, a2, b2);
+ XMEMSET(r, 0, sizeof(*r)*2U*45U);
+ (void)sp_2048_sub_30(t0, p3, p2);
+ (void)sp_2048_sub_30(t1, p1, p2);
+ (void)sp_2048_sub_30(t2, p5, t0);
+ (void)sp_2048_sub_30(t2, t2, t1);
+ (void)sp_2048_sub_30(t0, t0, p4);
+ (void)sp_2048_sub_30(t1, t1, p0);
+ (void)sp_2048_add_30(r, r, p0);
+ (void)sp_2048_add_30(&r[15], &r[15], t1);
+ (void)sp_2048_add_30(&r[30], &r[30], t2);
+ (void)sp_2048_add_30(&r[45], &r[45], t0);
+ (void)sp_2048_add_30(&r[60], &r[60], p4);
+}
+
+/* Square a into r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_45(sp_digit* r, const sp_digit* a)
+{
+ sp_digit p0[30];
+ sp_digit p1[30];
+ sp_digit p2[30];
+ sp_digit p3[30];
+ sp_digit p4[30];
+ sp_digit p5[30];
+ sp_digit t0[30];
+ sp_digit t1[30];
+ sp_digit t2[30];
+ sp_digit a0[15];
+ sp_digit a1[15];
+ sp_digit a2[15];
+ (void)sp_2048_add_15(a0, a, &a[15]);
+ (void)sp_2048_add_15(a1, &a[15], &a[30]);
+ (void)sp_2048_add_15(a2, a0, &a[30]);
+ sp_2048_sqr_15(p0, a);
+ sp_2048_sqr_15(p2, &a[15]);
+ sp_2048_sqr_15(p4, &a[30]);
+ sp_2048_sqr_15(p1, a0);
+ sp_2048_sqr_15(p3, a1);
+ sp_2048_sqr_15(p5, a2);
+ XMEMSET(r, 0, sizeof(*r)*2U*45U);
+ (void)sp_2048_sub_30(t0, p3, p2);
+ (void)sp_2048_sub_30(t1, p1, p2);
+ (void)sp_2048_sub_30(t2, p5, t0);
+ (void)sp_2048_sub_30(t2, t2, t1);
+ (void)sp_2048_sub_30(t0, t0, p4);
+ (void)sp_2048_sub_30(t1, t1, p0);
+ (void)sp_2048_add_30(r, r, p0);
+ (void)sp_2048_add_30(&r[15], &r[15], t1);
+ (void)sp_2048_add_30(&r[30], &r[30], t2);
+ (void)sp_2048_add_30(&r[45], &r[45], t0);
+ (void)sp_2048_add_30(&r[60], &r[60], p4);
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_add_45(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 40; i += 8) {
+ r[i + 0] = a[i + 0] + b[i + 0];
+ r[i + 1] = a[i + 1] + b[i + 1];
+ r[i + 2] = a[i + 2] + b[i + 2];
+ r[i + 3] = a[i + 3] + b[i + 3];
+ r[i + 4] = a[i + 4] + b[i + 4];
+ r[i + 5] = a[i + 5] + b[i + 5];
+ r[i + 6] = a[i + 6] + b[i + 6];
+ r[i + 7] = a[i + 7] + b[i + 7];
+ }
+ r[40] = a[40] + b[40];
+ r[41] = a[41] + b[41];
+ r[42] = a[42] + b[42];
+ r[43] = a[43] + b[43];
+ r[44] = a[44] + b[44];
+
+ return 0;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_add_90(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 88; i += 8) {
+ r[i + 0] = a[i + 0] + b[i + 0];
+ r[i + 1] = a[i + 1] + b[i + 1];
+ r[i + 2] = a[i + 2] + b[i + 2];
+ r[i + 3] = a[i + 3] + b[i + 3];
+ r[i + 4] = a[i + 4] + b[i + 4];
+ r[i + 5] = a[i + 5] + b[i + 5];
+ r[i + 6] = a[i + 6] + b[i + 6];
+ r[i + 7] = a[i + 7] + b[i + 7];
+ }
+ r[88] = a[88] + b[88];
+ r[89] = a[89] + b[89];
+
+ return 0;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_sub_90(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 88; i += 8) {
+ r[i + 0] = a[i + 0] - b[i + 0];
+ r[i + 1] = a[i + 1] - b[i + 1];
+ r[i + 2] = a[i + 2] - b[i + 2];
+ r[i + 3] = a[i + 3] - b[i + 3];
+ r[i + 4] = a[i + 4] - b[i + 4];
+ r[i + 5] = a[i + 5] - b[i + 5];
+ r[i + 6] = a[i + 6] - b[i + 6];
+ r[i + 7] = a[i + 7] - b[i + 7];
+ }
+ r[88] = a[88] - b[88];
+ r[89] = a[89] - b[89];
+
+ return 0;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_90(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[90];
+ sp_digit* a1 = z1;
+ sp_digit b1[45];
+ sp_digit* z2 = r + 90;
+ (void)sp_2048_add_45(a1, a, &a[45]);
+ (void)sp_2048_add_45(b1, b, &b[45]);
+ sp_2048_mul_45(z2, &a[45], &b[45]);
+ sp_2048_mul_45(z0, a, b);
+ sp_2048_mul_45(z1, a1, b1);
+ (void)sp_2048_sub_90(z1, z1, z2);
+ (void)sp_2048_sub_90(z1, z1, z0);
+ (void)sp_2048_add_90(r + 45, r + 45, z1);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_90(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[90];
+ sp_digit* a1 = z1;
+ sp_digit* z2 = r + 90;
+ (void)sp_2048_add_45(a1, a, &a[45]);
+ sp_2048_sqr_45(z2, &a[45]);
+ sp_2048_sqr_45(z0, a);
+ sp_2048_sqr_45(z1, a1);
+ (void)sp_2048_sub_90(z1, z1, z2);
+ (void)sp_2048_sub_90(z1, z1, z0);
+ (void)sp_2048_add_90(r + 45, r + 45, z1);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_add_90(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 90; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_sub_90(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 90; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_90(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i, j, k;
+ int64_t c;
+
+ c = ((int64_t)a[89]) * b[89];
+ r[179] = (sp_digit)(c >> 23);
+ c = (c & 0x7fffff) << 23;
+ for (k = 177; k >= 0; k--) {
+ for (i = 89; i >= 0; i--) {
+ j = k - i;
+ if (j >= 90) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int64_t)a[i]) * b[j];
+ }
+ r[k + 2] += c >> 46;
+ r[k + 1] = (c >> 23) & 0x7fffff;
+ c = (c & 0x7fffff) << 23;
+ }
+ r[0] = (sp_digit)(c >> 23);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_90(sp_digit* r, const sp_digit* a)
+{
+ int i, j, k;
+ int64_t c;
+
+ c = ((int64_t)a[89]) * a[89];
+ r[179] = (sp_digit)(c >> 23);
+ c = (c & 0x7fffff) << 23;
+ for (k = 177; k >= 0; k--) {
+ for (i = 89; i >= 0; i--) {
+ j = k - i;
+ if (j >= 90 || i <= j) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int64_t)a[i]) * a[j] * 2;
+ }
+ if (i == j) {
+ c += ((int64_t)a[i]) * a[i];
+ }
+
+ r[k + 2] += c >> 46;
+ r[k + 1] = (c >> 23) & 0x7fffff;
+ c = (c & 0x7fffff) << 23;
+ }
+ r[0] = (sp_digit)(c >> 23);
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_add_45(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 45; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_sub_45(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 45; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_sub_45(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 40; i += 8) {
+ r[i + 0] = a[i + 0] - b[i + 0];
+ r[i + 1] = a[i + 1] - b[i + 1];
+ r[i + 2] = a[i + 2] - b[i + 2];
+ r[i + 3] = a[i + 3] - b[i + 3];
+ r[i + 4] = a[i + 4] - b[i + 4];
+ r[i + 5] = a[i + 5] - b[i + 5];
+ r[i + 6] = a[i + 6] - b[i + 6];
+ r[i + 7] = a[i + 7] - b[i + 7];
+ }
+ r[40] = a[40] - b[40];
+ r[41] = a[41] - b[41];
+ r[42] = a[42] - b[42];
+ r[43] = a[43] - b[43];
+ r[44] = a[44] - b[44];
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_45(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i, j, k;
+ int64_t c;
+
+ c = ((int64_t)a[44]) * b[44];
+ r[89] = (sp_digit)(c >> 23);
+ c = (c & 0x7fffff) << 23;
+ for (k = 87; k >= 0; k--) {
+ for (i = 44; i >= 0; i--) {
+ j = k - i;
+ if (j >= 45) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int64_t)a[i]) * b[j];
+ }
+ r[k + 2] += c >> 46;
+ r[k + 1] = (c >> 23) & 0x7fffff;
+ c = (c & 0x7fffff) << 23;
+ }
+ r[0] = (sp_digit)(c >> 23);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_45(sp_digit* r, const sp_digit* a)
+{
+ int i, j, k;
+ int64_t c;
+
+ c = ((int64_t)a[44]) * a[44];
+ r[89] = (sp_digit)(c >> 23);
+ c = (c & 0x7fffff) << 23;
+ for (k = 87; k >= 0; k--) {
+ for (i = 44; i >= 0; i--) {
+ j = k - i;
+ if (j >= 45 || i <= j) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int64_t)a[i]) * a[j] * 2;
+ }
+ if (i == j) {
+ c += ((int64_t)a[i]) * a[i];
+ }
+
+ r[k + 2] += c >> 46;
+ r[k + 1] = (c >> 23) & 0x7fffff;
+ c = (c & 0x7fffff) << 23;
+ }
+ r[0] = (sp_digit)(c >> 23);
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+ x &= 0x7fffff;
+
+ /* rho = -1/m mod b */
+ *rho = (1L << 23) - x;
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_2048_mul_d_90(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 90; i++) {
+ t += tb * a[i];
+ r[i] = t & 0x7fffff;
+ t >>= 23;
+ }
+ r[90] = (sp_digit)t;
+#else
+ int64_t tb = b;
+ int64_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff;
+ for (i = 0; i < 88; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
+ t[2] = tb * a[i+2];
+ r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
+ t[3] = tb * a[i+3];
+ r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
+ t[4] = tb * a[i+4];
+ r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff);
+ t[5] = tb * a[i+5];
+ r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff);
+ t[6] = tb * a[i+6];
+ r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff);
+ t[7] = tb * a[i+7];
+ r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff);
+ t[0] = tb * a[i+8];
+ r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff);
+ }
+ t[1] = tb * a[89];
+ r[89] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
+ r[90] = (sp_digit)(t[1] >> 23);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 2048 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_2048_mont_norm_45(sp_digit* r, const sp_digit* m)
+{
+ /* Set r = 2^n - 1. */
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<44; i++) {
+ r[i] = 0x7fffff;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 40; i += 8) {
+ r[i + 0] = 0x7fffff;
+ r[i + 1] = 0x7fffff;
+ r[i + 2] = 0x7fffff;
+ r[i + 3] = 0x7fffff;
+ r[i + 4] = 0x7fffff;
+ r[i + 5] = 0x7fffff;
+ r[i + 6] = 0x7fffff;
+ r[i + 7] = 0x7fffff;
+ }
+ r[40] = 0x7fffff;
+ r[41] = 0x7fffff;
+ r[42] = 0x7fffff;
+ r[43] = 0x7fffff;
+#endif
+ r[44] = 0xfffL;
+
+ /* r = (2^n - 1) mod n */
+ (void)sp_2048_sub_45(r, r, m);
+
+ /* Add one so r = 2^n mod m */
+ r[0] += 1;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_2048_cmp_45(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=44; i>=0; i--) {
+ r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#else
+ int i;
+
+ r |= (a[44] - b[44]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[43] - b[43]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[42] - b[42]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[41] - b[41]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[40] - b[40]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ for (i = 32; i >= 0; i -= 8) {
+ r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#endif /* WOLFSSL_SP_SMALL */
+
+ return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static void sp_2048_cond_sub_45(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 45; i++) {
+ r[i] = a[i] - (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 40; i += 8) {
+ r[i + 0] = a[i + 0] - (b[i + 0] & m);
+ r[i + 1] = a[i + 1] - (b[i + 1] & m);
+ r[i + 2] = a[i + 2] - (b[i + 2] & m);
+ r[i + 3] = a[i + 3] - (b[i + 3] & m);
+ r[i + 4] = a[i + 4] - (b[i + 4] & m);
+ r[i + 5] = a[i + 5] - (b[i + 5] & m);
+ r[i + 6] = a[i + 6] - (b[i + 6] & m);
+ r[i + 7] = a[i + 7] - (b[i + 7] & m);
+ }
+ r[40] = a[40] - (b[40] & m);
+ r[41] = a[41] - (b[41] & m);
+ r[42] = a[42] - (b[42] & m);
+ r[43] = a[43] - (b[43] & m);
+ r[44] = a[44] - (b[44] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_2048_mul_add_45(sp_digit* r, const sp_digit* a,
+ const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 45; i++) {
+ t += (tb * a[i]) + r[i];
+ r[i] = t & 0x7fffff;
+ t >>= 23;
+ }
+ r[45] += t;
+#else
+ int64_t tb = b;
+ int64_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff);
+ for (i = 0; i < 40; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
+ t[2] = tb * a[i+2];
+ r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
+ t[3] = tb * a[i+3];
+ r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff));
+ t[4] = tb * a[i+4];
+ r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff));
+ t[5] = tb * a[i+5];
+ r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff));
+ t[6] = tb * a[i+6];
+ r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff));
+ t[7] = tb * a[i+7];
+ r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff));
+ t[0] = tb * a[i+8];
+ r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff));
+ }
+ t[1] = tb * a[41]; r[41] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
+ t[2] = tb * a[42]; r[42] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
+ t[3] = tb * a[43]; r[43] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff));
+ t[4] = tb * a[44]; r[44] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff));
+ r[45] += (sp_digit)(t[4] >> 23);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 23.
+ *
+ * a Array of sp_digit to normalize.
+ */
+static void sp_2048_norm_45(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ for (i = 0; i < 44; i++) {
+ a[i+1] += a[i] >> 23;
+ a[i] &= 0x7fffff;
+ }
+#else
+ int i;
+ for (i = 0; i < 40; i += 8) {
+ a[i+1] += a[i+0] >> 23; a[i+0] &= 0x7fffff;
+ a[i+2] += a[i+1] >> 23; a[i+1] &= 0x7fffff;
+ a[i+3] += a[i+2] >> 23; a[i+2] &= 0x7fffff;
+ a[i+4] += a[i+3] >> 23; a[i+3] &= 0x7fffff;
+ a[i+5] += a[i+4] >> 23; a[i+4] &= 0x7fffff;
+ a[i+6] += a[i+5] >> 23; a[i+5] &= 0x7fffff;
+ a[i+7] += a[i+6] >> 23; a[i+6] &= 0x7fffff;
+ a[i+8] += a[i+7] >> 23; a[i+7] &= 0x7fffff;
+ a[i+9] += a[i+8] >> 23; a[i+8] &= 0x7fffff;
+ }
+ a[40+1] += a[40] >> 23;
+ a[40] &= 0x7fffff;
+ a[41+1] += a[41] >> 23;
+ a[41] &= 0x7fffff;
+ a[42+1] += a[42] >> 23;
+ a[42] &= 0x7fffff;
+ a[43+1] += a[43] >> 23;
+ a[43] &= 0x7fffff;
+#endif
+}
+
+/* Shift the result in the high 1024 bits down to the bottom.
+ *
+ * r A single precision number.
+ * a A single precision number.
+ */
+static void sp_2048_mont_shift_45(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ int64_t n = a[44] >> 12;
+ n += ((int64_t)a[45]) << 11;
+
+ for (i = 0; i < 44; i++) {
+ r[i] = n & 0x7fffff;
+ n >>= 23;
+ n += ((int64_t)a[46 + i]) << 11;
+ }
+ r[44] = (sp_digit)n;
+#else
+ int i;
+ int64_t n = a[44] >> 12;
+ n += ((int64_t)a[45]) << 11;
+ for (i = 0; i < 40; i += 8) {
+ r[i + 0] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 46]) << 11;
+ r[i + 1] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 47]) << 11;
+ r[i + 2] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 48]) << 11;
+ r[i + 3] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 49]) << 11;
+ r[i + 4] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 50]) << 11;
+ r[i + 5] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 51]) << 11;
+ r[i + 6] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 52]) << 11;
+ r[i + 7] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 53]) << 11;
+ }
+ r[40] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[86]) << 11;
+ r[41] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[87]) << 11;
+ r[42] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[88]) << 11;
+ r[43] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[89]) << 11;
+ r[44] = (sp_digit)n;
+#endif /* WOLFSSL_SP_SMALL */
+ XMEMSET(&r[45], 0, sizeof(*r) * 45U);
+}
+
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_2048_mont_reduce_45(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+ int i;
+ sp_digit mu;
+
+ sp_2048_norm_45(a + 45);
+
+ for (i=0; i<44; i++) {
+ mu = (a[i] * mp) & 0x7fffff;
+ sp_2048_mul_add_45(a+i, m, mu);
+ a[i+1] += a[i] >> 23;
+ }
+ mu = (a[i] * mp) & 0xfffL;
+ sp_2048_mul_add_45(a+i, m, mu);
+ a[i+1] += a[i] >> 23;
+ a[i] &= 0x7fffff;
+
+ sp_2048_mont_shift_45(a, a);
+ sp_2048_cond_sub_45(a, a, m, 0 - (((a[44] >> 12) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_2048_norm_45(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_mul_45(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_2048_mul_45(r, a, b);
+ sp_2048_mont_reduce_45(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_sqr_45(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_2048_sqr_45(r, a);
+ sp_2048_mont_reduce_45(r, m, mp);
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_2048_mul_d_45(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 45; i++) {
+ t += tb * a[i];
+ r[i] = t & 0x7fffff;
+ t >>= 23;
+ }
+ r[45] = (sp_digit)t;
+#else
+ int64_t tb = b;
+ int64_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff;
+ for (i = 0; i < 40; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
+ t[2] = tb * a[i+2];
+ r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
+ t[3] = tb * a[i+3];
+ r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
+ t[4] = tb * a[i+4];
+ r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff);
+ t[5] = tb * a[i+5];
+ r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff);
+ t[6] = tb * a[i+6];
+ r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff);
+ t[7] = tb * a[i+7];
+ r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff);
+ t[0] = tb * a[i+8];
+ r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff);
+ }
+ t[1] = tb * a[41];
+ r[41] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
+ t[2] = tb * a[42];
+ r[42] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
+ t[3] = tb * a[43];
+ r[43] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
+ t[4] = tb * a[44];
+ r[44] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff);
+ r[45] = (sp_digit)(t[4] >> 23);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static void sp_2048_cond_add_45(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 45; i++) {
+ r[i] = a[i] + (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 40; i += 8) {
+ r[i + 0] = a[i + 0] + (b[i + 0] & m);
+ r[i + 1] = a[i + 1] + (b[i + 1] & m);
+ r[i + 2] = a[i + 2] + (b[i + 2] & m);
+ r[i + 3] = a[i + 3] + (b[i + 3] & m);
+ r[i + 4] = a[i + 4] + (b[i + 4] & m);
+ r[i + 5] = a[i + 5] + (b[i + 5] & m);
+ r[i + 6] = a[i + 6] + (b[i + 6] & m);
+ r[i + 7] = a[i + 7] + (b[i + 7] & m);
+ }
+ r[40] = a[40] + (b[40] & m);
+ r[41] = a[41] + (b[41] & m);
+ r[42] = a[42] + (b[42] & m);
+ r[43] = a[43] + (b[43] & m);
+ r[44] = a[44] + (b[44] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_add_45(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 45; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#endif
+SP_NOINLINE static void sp_2048_rshift_45(sp_digit* r, sp_digit* a, byte n)
+{
+ int i;
+
+#ifdef WOLFSSL_SP_SMALL
+ for (i=0; i<44; i++) {
+ r[i] = ((a[i] >> n) | (a[i + 1] << (23 - n))) & 0x7fffff;
+ }
+#else
+ for (i=0; i<40; i += 8) {
+ r[i+0] = ((a[i+0] >> n) | (a[i+1] << (23 - n))) & 0x7fffff;
+ r[i+1] = ((a[i+1] >> n) | (a[i+2] << (23 - n))) & 0x7fffff;
+ r[i+2] = ((a[i+2] >> n) | (a[i+3] << (23 - n))) & 0x7fffff;
+ r[i+3] = ((a[i+3] >> n) | (a[i+4] << (23 - n))) & 0x7fffff;
+ r[i+4] = ((a[i+4] >> n) | (a[i+5] << (23 - n))) & 0x7fffff;
+ r[i+5] = ((a[i+5] >> n) | (a[i+6] << (23 - n))) & 0x7fffff;
+ r[i+6] = ((a[i+6] >> n) | (a[i+7] << (23 - n))) & 0x7fffff;
+ r[i+7] = ((a[i+7] >> n) | (a[i+8] << (23 - n))) & 0x7fffff;
+ }
+ r[40] = ((a[40] >> n) | (a[41] << (23 - n))) & 0x7fffff;
+ r[41] = ((a[41] >> n) | (a[42] << (23 - n))) & 0x7fffff;
+ r[42] = ((a[42] >> n) | (a[43] << (23 - n))) & 0x7fffff;
+ r[43] = ((a[43] >> n) | (a[44] << (23 - n))) & 0x7fffff;
+#endif
+ r[44] = a[44] >> n;
+}
+
+#ifdef WOLFSSL_SP_DIV_32
+static WC_INLINE sp_digit sp_2048_div_word_45(sp_digit d1, sp_digit d0,
+ sp_digit dv)
+{
+ sp_digit d, r, t;
+
+ /* All 23 bits from d1 and top 8 bits from d0. */
+ d = (d1 << 8) | (d0 >> 15);
+ r = d / dv;
+ d -= r * dv;
+ /* Up to 9 bits in r */
+ /* Next 8 bits from d0. */
+ r <<= 8;
+ d <<= 8;
+ d |= (d0 >> 7) & ((1 << 8) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 17 bits in r */
+ /* Remaining 7 bits from d0. */
+ r <<= 7;
+ d <<= 7;
+ d |= d0 & ((1 << 7) - 1);
+ t = d / dv;
+ r += t;
+
+ return r;
+}
+#endif /* WOLFSSL_SP_DIV_32 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_2048_div_45(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ int i;
+#ifndef WOLFSSL_SP_DIV_32
+ int64_t d1;
+#endif
+ sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* td;
+#else
+ sp_digit t1d[90 + 1], t2d[45 + 1], sdd[45 + 1];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ sp_digit* sd;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 45 + 3), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ (void)m;
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = td;
+ t2 = td + 90 + 1;
+ sd = t2 + 45 + 1;
+#else
+ t1 = t1d;
+ t2 = t2d;
+ sd = sdd;
+#endif
+
+ sp_2048_mul_d_45(sd, d, 1L << 11);
+ sp_2048_mul_d_90(t1, a, 1L << 11);
+ dv = sd[44];
+ for (i=45; i>=0; i--) {
+ t1[45 + i] += t1[45 + i - 1] >> 23;
+ t1[45 + i - 1] &= 0x7fffff;
+#ifndef WOLFSSL_SP_DIV_32
+ d1 = t1[45 + i];
+ d1 <<= 23;
+ d1 += t1[45 + i - 1];
+ r1 = (sp_digit)(d1 / dv);
+#else
+ r1 = sp_2048_div_word_45(t1[45 + i], t1[45 + i - 1], dv);
+#endif
+
+ sp_2048_mul_d_45(t2, sd, r1);
+ (void)sp_2048_sub_45(&t1[i], &t1[i], t2);
+ t1[45 + i] -= t2[45];
+ t1[45 + i] += t1[45 + i - 1] >> 23;
+ t1[45 + i - 1] &= 0x7fffff;
+ r1 = (((-t1[45 + i]) << 23) - t1[45 + i - 1]) / dv;
+ r1 -= t1[45 + i];
+ sp_2048_mul_d_45(t2, sd, r1);
+ (void)sp_2048_add_45(&t1[i], &t1[i], t2);
+ t1[45 + i] += t1[45 + i - 1] >> 23;
+ t1[45 + i - 1] &= 0x7fffff;
+ }
+ t1[45 - 1] += t1[45 - 2] >> 23;
+ t1[45 - 2] &= 0x7fffff;
+ r1 = t1[45 - 1] / dv;
+
+ sp_2048_mul_d_45(t2, sd, r1);
+ sp_2048_sub_45(t1, t1, t2);
+ XMEMCPY(r, t1, sizeof(*r) * 2U * 45U);
+ for (i=0; i<43; i++) {
+ r[i+1] += r[i] >> 23;
+ r[i] &= 0x7fffff;
+ }
+ sp_2048_cond_add_45(r, r, sd, 0 - ((r[44] < 0) ?
+ (sp_digit)1 : (sp_digit)0));
+
+ sp_2048_norm_45(r);
+ sp_2048_rshift_45(r, r, 11);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_2048_mod_45(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_2048_div_45(a, m, NULL, r);
+}
+
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_45(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+ const sp_digit* m, int reduceA)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* td;
+ sp_digit* t[3];
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 45 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(td, 0, sizeof(*td) * 3U * 45U * 2U);
+
+ norm = t[0] = td;
+ t[1] = &td[45 * 2];
+ t[2] = &td[2 * 45 * 2];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_45(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_2048_mod_45(t[1], a, m);
+ }
+ else {
+ XMEMCPY(t[1], a, sizeof(sp_digit) * 45U);
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_2048_mul_45(t[1], t[1], norm);
+ err = sp_2048_mod_45(t[1], t[1], m);
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 23;
+ c = bits % 23;
+ n = e[i--] << (23 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 23;
+ }
+
+ y = (n >> 22) & 1;
+ n <<= 1;
+
+ sp_2048_mont_mul_45(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])),
+ sizeof(*t[2]) * 45 * 2);
+ sp_2048_mont_sqr_45(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2],
+ sizeof(*t[2]) * 45 * 2);
+ }
+
+ sp_2048_mont_reduce_45(t[0], m, mp);
+ n = sp_2048_cmp_45(t[0], m);
+ sp_2048_cond_sub_45(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(*r) * 45 * 2);
+
+ }
+
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+
+ return err;
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[3][90];
+#else
+ sp_digit* td;
+ sp_digit* t[3];
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 45 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ t[0] = td;
+ t[1] = &td[45 * 2];
+ t[2] = &td[2 * 45 * 2];
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_45(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_2048_mod_45(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_2048_mul_45(t[1], t[1], norm);
+ err = sp_2048_mod_45(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_2048_mul_45(t[1], a, norm);
+ err = sp_2048_mod_45(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 23;
+ c = bits % 23;
+ n = e[i--] << (23 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 23;
+ }
+
+ y = (n >> 22) & 1;
+ n <<= 1;
+
+ sp_2048_mont_mul_45(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
+ sp_2048_mont_sqr_45(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
+ }
+
+ sp_2048_mont_reduce_45(t[0], m, mp);
+ n = sp_2048_cmp_45(t[0], m);
+ sp_2048_cond_sub_45(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(t[0]));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][90];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit rt[90];
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 90, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 90;
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_45(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_2048_mod_45(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_2048_mul_45(t[1], t[1], norm);
+ err = sp_2048_mod_45(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_2048_mul_45(t[1], a, norm);
+ err = sp_2048_mod_45(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_45(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_45(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_45(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_45(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_45(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_45(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_45(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_45(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_45(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_45(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_45(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_45(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_45(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_45(t[15], t[ 8], t[ 7], m, mp);
+ sp_2048_mont_sqr_45(t[16], t[ 8], m, mp);
+ sp_2048_mont_mul_45(t[17], t[ 9], t[ 8], m, mp);
+ sp_2048_mont_sqr_45(t[18], t[ 9], m, mp);
+ sp_2048_mont_mul_45(t[19], t[10], t[ 9], m, mp);
+ sp_2048_mont_sqr_45(t[20], t[10], m, mp);
+ sp_2048_mont_mul_45(t[21], t[11], t[10], m, mp);
+ sp_2048_mont_sqr_45(t[22], t[11], m, mp);
+ sp_2048_mont_mul_45(t[23], t[12], t[11], m, mp);
+ sp_2048_mont_sqr_45(t[24], t[12], m, mp);
+ sp_2048_mont_mul_45(t[25], t[13], t[12], m, mp);
+ sp_2048_mont_sqr_45(t[26], t[13], m, mp);
+ sp_2048_mont_mul_45(t[27], t[14], t[13], m, mp);
+ sp_2048_mont_sqr_45(t[28], t[14], m, mp);
+ sp_2048_mont_mul_45(t[29], t[15], t[14], m, mp);
+ sp_2048_mont_sqr_45(t[30], t[15], m, mp);
+ sp_2048_mont_mul_45(t[31], t[16], t[15], m, mp);
+
+ bits = ((bits + 4) / 5) * 5;
+ i = ((bits + 22) / 23) - 1;
+ c = bits % 23;
+ if (c == 0) {
+ c = 23;
+ }
+ if (i < 45) {
+ n = e[i--] << (32 - c);
+ }
+ else {
+ n = 0;
+ i--;
+ }
+ if (c < 5) {
+ n |= e[i--] << (9 - c);
+ c += 23;
+ }
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ XMEMCPY(rt, t[y], sizeof(rt));
+ for (; i>=0 || c>=5; ) {
+ if (c < 5) {
+ n |= e[i--] << (9 - c);
+ c += 23;
+ }
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+
+ sp_2048_mont_sqr_45(rt, rt, m, mp);
+ sp_2048_mont_sqr_45(rt, rt, m, mp);
+ sp_2048_mont_sqr_45(rt, rt, m, mp);
+ sp_2048_mont_sqr_45(rt, rt, m, mp);
+ sp_2048_mont_sqr_45(rt, rt, m, mp);
+
+ sp_2048_mont_mul_45(rt, rt, t[y], m, mp);
+ }
+
+ sp_2048_mont_reduce_45(rt, m, mp);
+ n = sp_2048_cmp_45(rt, m);
+ sp_2048_cond_sub_45(rt, rt, m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, rt, sizeof(rt));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#endif
+}
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 2048 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_2048_mont_norm_90(sp_digit* r, const sp_digit* m)
+{
+ /* Set r = 2^n - 1. */
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<89; i++) {
+ r[i] = 0x7fffff;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 88; i += 8) {
+ r[i + 0] = 0x7fffff;
+ r[i + 1] = 0x7fffff;
+ r[i + 2] = 0x7fffff;
+ r[i + 3] = 0x7fffff;
+ r[i + 4] = 0x7fffff;
+ r[i + 5] = 0x7fffff;
+ r[i + 6] = 0x7fffff;
+ r[i + 7] = 0x7fffff;
+ }
+ r[88] = 0x7fffff;
+#endif
+ r[89] = 0x1L;
+
+ /* r = (2^n - 1) mod n */
+ (void)sp_2048_sub_90(r, r, m);
+
+ /* Add one so r = 2^n mod m */
+ r[0] += 1;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_2048_cmp_90(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=89; i>=0; i--) {
+ r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#else
+ int i;
+
+ r |= (a[89] - b[89]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[88] - b[88]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ for (i = 80; i >= 0; i -= 8) {
+ r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#endif /* WOLFSSL_SP_SMALL */
+
+ return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static void sp_2048_cond_sub_90(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 90; i++) {
+ r[i] = a[i] - (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 88; i += 8) {
+ r[i + 0] = a[i + 0] - (b[i + 0] & m);
+ r[i + 1] = a[i + 1] - (b[i + 1] & m);
+ r[i + 2] = a[i + 2] - (b[i + 2] & m);
+ r[i + 3] = a[i + 3] - (b[i + 3] & m);
+ r[i + 4] = a[i + 4] - (b[i + 4] & m);
+ r[i + 5] = a[i + 5] - (b[i + 5] & m);
+ r[i + 6] = a[i + 6] - (b[i + 6] & m);
+ r[i + 7] = a[i + 7] - (b[i + 7] & m);
+ }
+ r[88] = a[88] - (b[88] & m);
+ r[89] = a[89] - (b[89] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_2048_mul_add_90(sp_digit* r, const sp_digit* a,
+ const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 90; i++) {
+ t += (tb * a[i]) + r[i];
+ r[i] = t & 0x7fffff;
+ t >>= 23;
+ }
+ r[90] += t;
+#else
+ int64_t tb = b;
+ int64_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff);
+ for (i = 0; i < 88; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
+ t[2] = tb * a[i+2];
+ r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
+ t[3] = tb * a[i+3];
+ r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff));
+ t[4] = tb * a[i+4];
+ r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff));
+ t[5] = tb * a[i+5];
+ r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff));
+ t[6] = tb * a[i+6];
+ r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff));
+ t[7] = tb * a[i+7];
+ r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff));
+ t[0] = tb * a[i+8];
+ r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff));
+ }
+ t[1] = tb * a[89]; r[89] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
+ r[90] += (sp_digit)(t[1] >> 23);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 23.
+ *
+ * a Array of sp_digit to normalize.
+ */
+static void sp_2048_norm_90(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ for (i = 0; i < 89; i++) {
+ a[i+1] += a[i] >> 23;
+ a[i] &= 0x7fffff;
+ }
+#else
+ int i;
+ for (i = 0; i < 88; i += 8) {
+ a[i+1] += a[i+0] >> 23; a[i+0] &= 0x7fffff;
+ a[i+2] += a[i+1] >> 23; a[i+1] &= 0x7fffff;
+ a[i+3] += a[i+2] >> 23; a[i+2] &= 0x7fffff;
+ a[i+4] += a[i+3] >> 23; a[i+3] &= 0x7fffff;
+ a[i+5] += a[i+4] >> 23; a[i+4] &= 0x7fffff;
+ a[i+6] += a[i+5] >> 23; a[i+5] &= 0x7fffff;
+ a[i+7] += a[i+6] >> 23; a[i+6] &= 0x7fffff;
+ a[i+8] += a[i+7] >> 23; a[i+7] &= 0x7fffff;
+ a[i+9] += a[i+8] >> 23; a[i+8] &= 0x7fffff;
+ }
+ a[88+1] += a[88] >> 23;
+ a[88] &= 0x7fffff;
+#endif
+}
+
+/* Shift the result in the high 2048 bits down to the bottom.
+ *
+ * r A single precision number.
+ * a A single precision number.
+ */
+static void sp_2048_mont_shift_90(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ int64_t n = a[89] >> 1;
+ n += ((int64_t)a[90]) << 22;
+
+ for (i = 0; i < 89; i++) {
+ r[i] = n & 0x7fffff;
+ n >>= 23;
+ n += ((int64_t)a[91 + i]) << 22;
+ }
+ r[89] = (sp_digit)n;
+#else
+ int i;
+ int64_t n = a[89] >> 1;
+ n += ((int64_t)a[90]) << 22;
+ for (i = 0; i < 88; i += 8) {
+ r[i + 0] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 91]) << 22;
+ r[i + 1] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 92]) << 22;
+ r[i + 2] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 93]) << 22;
+ r[i + 3] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 94]) << 22;
+ r[i + 4] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 95]) << 22;
+ r[i + 5] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 96]) << 22;
+ r[i + 6] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 97]) << 22;
+ r[i + 7] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 98]) << 22;
+ }
+ r[88] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[179]) << 22;
+ r[89] = (sp_digit)n;
+#endif /* WOLFSSL_SP_SMALL */
+ XMEMSET(&r[90], 0, sizeof(*r) * 90U);
+}
+
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_2048_mont_reduce_90(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+ int i;
+ sp_digit mu;
+
+ sp_2048_norm_90(a + 90);
+
+#ifdef WOLFSSL_SP_DH
+ if (mp != 1) {
+ for (i=0; i<89; i++) {
+ mu = (a[i] * mp) & 0x7fffff;
+ sp_2048_mul_add_90(a+i, m, mu);
+ a[i+1] += a[i] >> 23;
+ }
+ mu = (a[i] * mp) & 0x1L;
+ sp_2048_mul_add_90(a+i, m, mu);
+ a[i+1] += a[i] >> 23;
+ a[i] &= 0x7fffff;
+ }
+ else {
+ for (i=0; i<89; i++) {
+ mu = a[i] & 0x7fffff;
+ sp_2048_mul_add_90(a+i, m, mu);
+ a[i+1] += a[i] >> 23;
+ }
+ mu = a[i] & 0x1L;
+ sp_2048_mul_add_90(a+i, m, mu);
+ a[i+1] += a[i] >> 23;
+ a[i] &= 0x7fffff;
+ }
+#else
+ for (i=0; i<89; i++) {
+ mu = (a[i] * mp) & 0x7fffff;
+ sp_2048_mul_add_90(a+i, m, mu);
+ a[i+1] += a[i] >> 23;
+ }
+ mu = (a[i] * mp) & 0x1L;
+ sp_2048_mul_add_90(a+i, m, mu);
+ a[i+1] += a[i] >> 23;
+ a[i] &= 0x7fffff;
+#endif
+
+ sp_2048_mont_shift_90(a, a);
+ sp_2048_cond_sub_90(a, a, m, 0 - (((a[89] >> 1) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_2048_norm_90(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_mul_90(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_2048_mul_90(r, a, b);
+ sp_2048_mont_reduce_90(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_sqr_90(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_2048_sqr_90(r, a);
+ sp_2048_mont_reduce_90(r, m, mp);
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_2048_mul_d_180(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 180; i++) {
+ t += tb * a[i];
+ r[i] = t & 0x7fffff;
+ t >>= 23;
+ }
+ r[180] = (sp_digit)t;
+#else
+ int64_t tb = b;
+ int64_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff;
+ for (i = 0; i < 176; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
+ t[2] = tb * a[i+2];
+ r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
+ t[3] = tb * a[i+3];
+ r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
+ t[4] = tb * a[i+4];
+ r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff);
+ t[5] = tb * a[i+5];
+ r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff);
+ t[6] = tb * a[i+6];
+ r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff);
+ t[7] = tb * a[i+7];
+ r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff);
+ t[0] = tb * a[i+8];
+ r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff);
+ }
+ t[1] = tb * a[177];
+ r[177] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
+ t[2] = tb * a[178];
+ r[178] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
+ t[3] = tb * a[179];
+ r[179] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
+ r[180] = (sp_digit)(t[3] >> 23);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static void sp_2048_cond_add_90(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 90; i++) {
+ r[i] = a[i] + (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 88; i += 8) {
+ r[i + 0] = a[i + 0] + (b[i + 0] & m);
+ r[i + 1] = a[i + 1] + (b[i + 1] & m);
+ r[i + 2] = a[i + 2] + (b[i + 2] & m);
+ r[i + 3] = a[i + 3] + (b[i + 3] & m);
+ r[i + 4] = a[i + 4] + (b[i + 4] & m);
+ r[i + 5] = a[i + 5] + (b[i + 5] & m);
+ r[i + 6] = a[i + 6] + (b[i + 6] & m);
+ r[i + 7] = a[i + 7] + (b[i + 7] & m);
+ }
+ r[88] = a[88] + (b[88] & m);
+ r[89] = a[89] + (b[89] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_sub_90(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 90; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#endif
+#ifdef WOLFSSL_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_add_90(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 90; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#endif
+SP_NOINLINE static void sp_2048_rshift_90(sp_digit* r, sp_digit* a, byte n)
+{
+ int i;
+
+#ifdef WOLFSSL_SP_SMALL
+ for (i=0; i<89; i++) {
+ r[i] = ((a[i] >> n) | (a[i + 1] << (23 - n))) & 0x7fffff;
+ }
+#else
+ for (i=0; i<88; i += 8) {
+ r[i+0] = ((a[i+0] >> n) | (a[i+1] << (23 - n))) & 0x7fffff;
+ r[i+1] = ((a[i+1] >> n) | (a[i+2] << (23 - n))) & 0x7fffff;
+ r[i+2] = ((a[i+2] >> n) | (a[i+3] << (23 - n))) & 0x7fffff;
+ r[i+3] = ((a[i+3] >> n) | (a[i+4] << (23 - n))) & 0x7fffff;
+ r[i+4] = ((a[i+4] >> n) | (a[i+5] << (23 - n))) & 0x7fffff;
+ r[i+5] = ((a[i+5] >> n) | (a[i+6] << (23 - n))) & 0x7fffff;
+ r[i+6] = ((a[i+6] >> n) | (a[i+7] << (23 - n))) & 0x7fffff;
+ r[i+7] = ((a[i+7] >> n) | (a[i+8] << (23 - n))) & 0x7fffff;
+ }
+ r[88] = ((a[88] >> n) | (a[89] << (23 - n))) & 0x7fffff;
+#endif
+ r[89] = a[89] >> n;
+}
+
+#ifdef WOLFSSL_SP_DIV_32
+static WC_INLINE sp_digit sp_2048_div_word_90(sp_digit d1, sp_digit d0,
+ sp_digit dv)
+{
+ sp_digit d, r, t;
+
+ /* All 23 bits from d1 and top 8 bits from d0. */
+ d = (d1 << 8) | (d0 >> 15);
+ r = d / dv;
+ d -= r * dv;
+ /* Up to 9 bits in r */
+ /* Next 8 bits from d0. */
+ r <<= 8;
+ d <<= 8;
+ d |= (d0 >> 7) & ((1 << 8) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 17 bits in r */
+ /* Remaining 7 bits from d0. */
+ r <<= 7;
+ d <<= 7;
+ d |= d0 & ((1 << 7) - 1);
+ t = d / dv;
+ r += t;
+
+ return r;
+}
+#endif /* WOLFSSL_SP_DIV_32 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_2048_div_90(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ int i;
+#ifndef WOLFSSL_SP_DIV_32
+ int64_t d1;
+#endif
+ sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* td;
+#else
+ sp_digit t1d[180 + 1], t2d[90 + 1], sdd[90 + 1];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ sp_digit* sd;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 90 + 3), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ (void)m;
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = td;
+ t2 = td + 180 + 1;
+ sd = t2 + 90 + 1;
+#else
+ t1 = t1d;
+ t2 = t2d;
+ sd = sdd;
+#endif
+
+ sp_2048_mul_d_90(sd, d, 1L << 22);
+ sp_2048_mul_d_180(t1, a, 1L << 22);
+ dv = sd[89];
+ for (i=90; i>=0; i--) {
+ t1[90 + i] += t1[90 + i - 1] >> 23;
+ t1[90 + i - 1] &= 0x7fffff;
+#ifndef WOLFSSL_SP_DIV_32
+ d1 = t1[90 + i];
+ d1 <<= 23;
+ d1 += t1[90 + i - 1];
+ r1 = (sp_digit)(d1 / dv);
+#else
+ r1 = sp_2048_div_word_90(t1[90 + i], t1[90 + i - 1], dv);
+#endif
+
+ sp_2048_mul_d_90(t2, sd, r1);
+ (void)sp_2048_sub_90(&t1[i], &t1[i], t2);
+ t1[90 + i] -= t2[90];
+ t1[90 + i] += t1[90 + i - 1] >> 23;
+ t1[90 + i - 1] &= 0x7fffff;
+ r1 = (((-t1[90 + i]) << 23) - t1[90 + i - 1]) / dv;
+ r1 -= t1[90 + i];
+ sp_2048_mul_d_90(t2, sd, r1);
+ (void)sp_2048_add_90(&t1[i], &t1[i], t2);
+ t1[90 + i] += t1[90 + i - 1] >> 23;
+ t1[90 + i - 1] &= 0x7fffff;
+ }
+ t1[90 - 1] += t1[90 - 2] >> 23;
+ t1[90 - 2] &= 0x7fffff;
+ r1 = t1[90 - 1] / dv;
+
+ sp_2048_mul_d_90(t2, sd, r1);
+ sp_2048_sub_90(t1, t1, t2);
+ XMEMCPY(r, t1, sizeof(*r) * 2U * 90U);
+ for (i=0; i<88; i++) {
+ r[i+1] += r[i] >> 23;
+ r[i] &= 0x7fffff;
+ }
+ sp_2048_cond_add_90(r, r, sd, 0 - ((r[89] < 0) ?
+ (sp_digit)1 : (sp_digit)0));
+
+ sp_2048_norm_90(r);
+ sp_2048_rshift_90(r, r, 22);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_2048_mod_90(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_2048_div_90(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+ defined(WOLFSSL_HAVE_SP_DH)
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_90(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+ const sp_digit* m, int reduceA)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* td;
+ sp_digit* t[3];
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 90 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(td, 0, sizeof(*td) * 3U * 90U * 2U);
+
+ norm = t[0] = td;
+ t[1] = &td[90 * 2];
+ t[2] = &td[2 * 90 * 2];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_90(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_2048_mod_90(t[1], a, m);
+ }
+ else {
+ XMEMCPY(t[1], a, sizeof(sp_digit) * 90U);
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_2048_mul_90(t[1], t[1], norm);
+ err = sp_2048_mod_90(t[1], t[1], m);
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 23;
+ c = bits % 23;
+ n = e[i--] << (23 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 23;
+ }
+
+ y = (n >> 22) & 1;
+ n <<= 1;
+
+ sp_2048_mont_mul_90(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])),
+ sizeof(*t[2]) * 90 * 2);
+ sp_2048_mont_sqr_90(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2],
+ sizeof(*t[2]) * 90 * 2);
+ }
+
+ sp_2048_mont_reduce_90(t[0], m, mp);
+ n = sp_2048_cmp_90(t[0], m);
+ sp_2048_cond_sub_90(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(*r) * 90 * 2);
+
+ }
+
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+
+ return err;
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[3][180];
+#else
+ sp_digit* td;
+ sp_digit* t[3];
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 90 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ t[0] = td;
+ t[1] = &td[90 * 2];
+ t[2] = &td[2 * 90 * 2];
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_90(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_2048_mod_90(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_2048_mul_90(t[1], t[1], norm);
+ err = sp_2048_mod_90(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_2048_mul_90(t[1], a, norm);
+ err = sp_2048_mod_90(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 23;
+ c = bits % 23;
+ n = e[i--] << (23 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 23;
+ }
+
+ y = (n >> 22) & 1;
+ n <<= 1;
+
+ sp_2048_mont_mul_90(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
+ sp_2048_mont_sqr_90(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
+ }
+
+ sp_2048_mont_reduce_90(t[0], m, mp);
+ n = sp_2048_cmp_90(t[0], m);
+ sp_2048_cond_sub_90(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(t[0]));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][180];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit rt[180];
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 180, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 180;
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_90(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_2048_mod_90(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_2048_mul_90(t[1], t[1], norm);
+ err = sp_2048_mod_90(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_2048_mul_90(t[1], a, norm);
+ err = sp_2048_mod_90(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_90(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_90(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_90(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_90(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_90(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_90(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_90(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_90(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_90(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_90(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_90(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_90(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_90(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_90(t[15], t[ 8], t[ 7], m, mp);
+ sp_2048_mont_sqr_90(t[16], t[ 8], m, mp);
+ sp_2048_mont_mul_90(t[17], t[ 9], t[ 8], m, mp);
+ sp_2048_mont_sqr_90(t[18], t[ 9], m, mp);
+ sp_2048_mont_mul_90(t[19], t[10], t[ 9], m, mp);
+ sp_2048_mont_sqr_90(t[20], t[10], m, mp);
+ sp_2048_mont_mul_90(t[21], t[11], t[10], m, mp);
+ sp_2048_mont_sqr_90(t[22], t[11], m, mp);
+ sp_2048_mont_mul_90(t[23], t[12], t[11], m, mp);
+ sp_2048_mont_sqr_90(t[24], t[12], m, mp);
+ sp_2048_mont_mul_90(t[25], t[13], t[12], m, mp);
+ sp_2048_mont_sqr_90(t[26], t[13], m, mp);
+ sp_2048_mont_mul_90(t[27], t[14], t[13], m, mp);
+ sp_2048_mont_sqr_90(t[28], t[14], m, mp);
+ sp_2048_mont_mul_90(t[29], t[15], t[14], m, mp);
+ sp_2048_mont_sqr_90(t[30], t[15], m, mp);
+ sp_2048_mont_mul_90(t[31], t[16], t[15], m, mp);
+
+ bits = ((bits + 4) / 5) * 5;
+ i = ((bits + 22) / 23) - 1;
+ c = bits % 23;
+ if (c == 0) {
+ c = 23;
+ }
+ if (i < 90) {
+ n = e[i--] << (32 - c);
+ }
+ else {
+ n = 0;
+ i--;
+ }
+ if (c < 5) {
+ n |= e[i--] << (9 - c);
+ c += 23;
+ }
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ XMEMCPY(rt, t[y], sizeof(rt));
+ for (; i>=0 || c>=5; ) {
+ if (c < 5) {
+ n |= e[i--] << (9 - c);
+ c += 23;
+ }
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+
+ sp_2048_mont_sqr_90(rt, rt, m, mp);
+ sp_2048_mont_sqr_90(rt, rt, m, mp);
+ sp_2048_mont_sqr_90(rt, rt, m, mp);
+ sp_2048_mont_sqr_90(rt, rt, m, mp);
+ sp_2048_mont_sqr_90(rt, rt, m, mp);
+
+ sp_2048_mont_mul_90(rt, rt, t[y], m, mp);
+ }
+
+ sp_2048_mont_reduce_90(rt, m, mp);
+ n = sp_2048_cmp_90(rt, m);
+ sp_2048_cond_sub_90(rt, rt, m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, rt, sizeof(rt));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#endif
+}
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */
+ /* WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+ sp_digit* norm;
+ sp_digit e[1] = {0};
+ sp_digit mp;
+ int i;
+ int err = MP_OKAY;
+
+ if (*outLen < 256U) {
+ err = MP_TO_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(em) > 23) {
+ err = MP_READ_E;
+ }
+ if (inLen > 256U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 90 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 90 * 2;
+ m = r + 90 * 2;
+ norm = r;
+
+ sp_2048_from_bin(a, 90, in, inLen);
+#if DIGIT_BIT >= 23
+ e[0] = (sp_digit)em->dp[0];
+#else
+ e[0] = (sp_digit)em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(m, 90, mm);
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_90(norm, m);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_mul_90(a, a, norm);
+ err = sp_2048_mod_90(a, a, m);
+ }
+ if (err == MP_OKAY) {
+ for (i=22; i>=0; i--) {
+ if ((e[0] >> i) != 0) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 90 * 2);
+ for (i--; i>=0; i--) {
+ sp_2048_mont_sqr_90(r, r, m, mp);
+
+ if (((e[0] >> i) & 1) == 1) {
+ sp_2048_mont_mul_90(r, r, a, m, mp);
+ }
+ }
+ sp_2048_mont_reduce_90(r, m, mp);
+ mp = sp_2048_cmp_90(r, m);
+ sp_2048_cond_sub_90(r, r, m, ((mp < 0) ?
+ (sp_digit)1 : (sp_digit)0)- 1);
+
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit ad[180], md[90], rd[180];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+ sp_digit e[1] = {0};
+ int err = MP_OKAY;
+
+ if (*outLen < 256U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(em) > 23) {
+ err = MP_READ_E;
+ }
+ if (inLen > 256U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 90 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 90 * 2;
+ m = r + 90 * 2;
+ }
+#else
+ a = ad;
+ m = md;
+ r = rd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_2048_from_bin(a, 90, in, inLen);
+#if DIGIT_BIT >= 23
+ e[0] = (sp_digit)em->dp[0];
+#else
+ e[0] = (sp_digit)em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(m, 90, mm);
+
+ if (e[0] == 0x3) {
+ sp_2048_sqr_90(r, a);
+ err = sp_2048_mod_90(r, r, m);
+ if (err == MP_OKAY) {
+ sp_2048_mul_90(r, a, r);
+ err = sp_2048_mod_90(r, r, m);
+ }
+ }
+ else {
+ sp_digit* norm = r;
+ int i;
+ sp_digit mp;
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_90(norm, m);
+
+ sp_2048_mul_90(a, a, norm);
+ err = sp_2048_mod_90(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i=22; i>=0; i--) {
+ if ((e[0] >> i) != 0) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 180U);
+ for (i--; i>=0; i--) {
+ sp_2048_mont_sqr_90(r, r, m, mp);
+
+ if (((e[0] >> i) & 1) == 1) {
+ sp_2048_mont_mul_90(r, r, a, m, mp);
+ }
+ }
+ sp_2048_mont_reduce_90(r, m, mp);
+ mp = sp_2048_cmp_90(r, m);
+ sp_2048_cond_sub_90(r, r, m, ((mp < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#endif
+
+ return err;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM)
+#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* a;
+ sp_digit* d = NULL;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 256U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 2048) {
+ err = MP_READ_E;
+ }
+ if (inLen > 256) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 90 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = d + 90;
+ m = a + 180;
+ r = a;
+
+ sp_2048_from_bin(a, 90, in, inLen);
+ sp_2048_from_mp(d, 90, dm);
+ sp_2048_from_mp(m, 90, mm);
+ err = sp_2048_mod_exp_90(r, a, d, 2048, m, 0);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 90);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+ sp_digit a[180], d[90], m[90];
+ sp_digit* r = a;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 256U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 2048) {
+ err = MP_READ_E;
+ }
+ if (inLen > 256U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_bin(a, 90, in, inLen);
+ sp_2048_from_mp(d, 90, dm);
+ sp_2048_from_mp(m, 90, mm);
+ err = sp_2048_mod_exp_90(r, a, d, 2048, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+ XMEMSET(d, 0, sizeof(sp_digit) * 90);
+
+ return err;
+#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
+#else
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* t = NULL;
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* dq;
+ sp_digit* qi;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 256U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (inLen > 256) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 45 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 90 * 2;
+ q = p + 45;
+ qi = dq = dp = q + 45;
+ tmpa = qi + 45;
+ tmpb = tmpa + 90;
+
+ r = t + 90;
+
+ sp_2048_from_bin(a, 90, in, inLen);
+ sp_2048_from_mp(p, 45, pm);
+ sp_2048_from_mp(q, 45, qm);
+ sp_2048_from_mp(dp, 45, dpm);
+ err = sp_2048_mod_exp_45(tmpa, a, dp, 1024, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(dq, 45, dqm);
+ err = sp_2048_mod_exp_45(tmpb, a, dq, 1024, q, 1);
+ }
+ if (err == MP_OKAY) {
+ (void)sp_2048_sub_45(tmpa, tmpa, tmpb);
+ sp_2048_cond_add_45(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[44] >> 31));
+ sp_2048_cond_add_45(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[44] >> 31));
+
+ sp_2048_from_mp(qi, 45, qim);
+ sp_2048_mul_45(tmpa, tmpa, qi);
+ err = sp_2048_mod_45(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mul_45(tmpa, q, tmpa);
+ (void)sp_2048_add_90(r, tmpb, tmpa);
+ sp_2048_norm_90(r);
+
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 45 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+ sp_digit a[90 * 2];
+ sp_digit p[45], q[45], dp[45], dq[45], qi[45];
+ sp_digit tmpa[90], tmpb[90];
+ sp_digit* r = a;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 256U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (inLen > 256U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_bin(a, 90, in, inLen);
+ sp_2048_from_mp(p, 45, pm);
+ sp_2048_from_mp(q, 45, qm);
+ sp_2048_from_mp(dp, 45, dpm);
+ sp_2048_from_mp(dq, 45, dqm);
+ sp_2048_from_mp(qi, 45, qim);
+
+ err = sp_2048_mod_exp_45(tmpa, a, dp, 1024, p, 1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_2048_mod_exp_45(tmpb, a, dq, 1024, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ (void)sp_2048_sub_45(tmpa, tmpa, tmpb);
+ sp_2048_cond_add_45(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[44] >> 31));
+ sp_2048_cond_add_45(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[44] >> 31));
+ sp_2048_mul_45(tmpa, tmpa, qi);
+ err = sp_2048_mod_45(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mul_45(tmpa, tmpa, q);
+ (void)sp_2048_add_90(r, tmpb, tmpa);
+ sp_2048_norm_90(r);
+
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+ XMEMSET(tmpa, 0, sizeof(tmpa));
+ XMEMSET(tmpb, 0, sizeof(tmpb));
+ XMEMSET(p, 0, sizeof(p));
+ XMEMSET(q, 0, sizeof(q));
+ XMEMSET(dp, 0, sizeof(dp));
+ XMEMSET(dq, 0, sizeof(dq));
+ XMEMSET(qi, 0, sizeof(qi));
+
+ return err;
+#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+}
+
+#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_2048_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 23
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 90);
+ r->used = 90;
+ mp_clamp(r);
+#elif DIGIT_BIT < 23
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 90; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 23) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 23 - s;
+ }
+ r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 90; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 23 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 23 - s;
+ }
+ else {
+ s += 23;
+ }
+ }
+ r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int err = MP_OKAY;
+ sp_digit* d = NULL;
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 2048) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 90 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 90 * 2;
+ m = e + 90;
+ r = b;
+
+ sp_2048_from_mp(b, 90, base);
+ sp_2048_from_mp(e, 90, exp);
+ sp_2048_from_mp(m, 90, mod);
+
+ err = sp_2048_mod_exp_90(r, b, e, mp_count_bits(exp), m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_2048_to_mp(r, res);
+ }
+
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 90U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit bd[180], ed[90], md[90];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 2048) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 90 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 90 * 2;
+ m = e + 90;
+ r = b;
+ }
+#else
+ r = b = bd;
+ e = ed;
+ m = md;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 90, base);
+ sp_2048_from_mp(e, 90, exp);
+ sp_2048_from_mp(m, 90, mod);
+
+ err = sp_2048_mod_exp_90(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_2048_to_mp(r, res);
+ }
+
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 90U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 90U);
+#endif
+
+ return err;
+#endif
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_2048
+SP_NOINLINE static void sp_2048_lshift_90(sp_digit* r, sp_digit* a, byte n)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ r[90] = a[89] >> (23 - n);
+ for (i=89; i>0; i--) {
+ r[i] = ((a[i] << n) | (a[i-1] >> (23 - n))) & 0x7fffff;
+ }
+#else
+ sp_int_digit s, t;
+
+ s = (sp_int_digit)a[89];
+ r[90] = s >> (23U - n);
+ s = (sp_int_digit)(a[89]); t = (sp_int_digit)(a[88]);
+ r[89] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[88]); t = (sp_int_digit)(a[87]);
+ r[88] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[87]); t = (sp_int_digit)(a[86]);
+ r[87] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[86]); t = (sp_int_digit)(a[85]);
+ r[86] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[85]); t = (sp_int_digit)(a[84]);
+ r[85] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[84]); t = (sp_int_digit)(a[83]);
+ r[84] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[83]); t = (sp_int_digit)(a[82]);
+ r[83] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[82]); t = (sp_int_digit)(a[81]);
+ r[82] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[81]); t = (sp_int_digit)(a[80]);
+ r[81] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[80]); t = (sp_int_digit)(a[79]);
+ r[80] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[79]); t = (sp_int_digit)(a[78]);
+ r[79] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[78]); t = (sp_int_digit)(a[77]);
+ r[78] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[77]); t = (sp_int_digit)(a[76]);
+ r[77] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[76]); t = (sp_int_digit)(a[75]);
+ r[76] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[75]); t = (sp_int_digit)(a[74]);
+ r[75] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[74]); t = (sp_int_digit)(a[73]);
+ r[74] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[73]); t = (sp_int_digit)(a[72]);
+ r[73] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[72]); t = (sp_int_digit)(a[71]);
+ r[72] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[71]); t = (sp_int_digit)(a[70]);
+ r[71] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[70]); t = (sp_int_digit)(a[69]);
+ r[70] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[69]); t = (sp_int_digit)(a[68]);
+ r[69] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[68]); t = (sp_int_digit)(a[67]);
+ r[68] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[67]); t = (sp_int_digit)(a[66]);
+ r[67] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[66]); t = (sp_int_digit)(a[65]);
+ r[66] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[65]); t = (sp_int_digit)(a[64]);
+ r[65] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[64]); t = (sp_int_digit)(a[63]);
+ r[64] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[63]); t = (sp_int_digit)(a[62]);
+ r[63] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[62]); t = (sp_int_digit)(a[61]);
+ r[62] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[61]); t = (sp_int_digit)(a[60]);
+ r[61] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[60]); t = (sp_int_digit)(a[59]);
+ r[60] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[59]); t = (sp_int_digit)(a[58]);
+ r[59] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[58]); t = (sp_int_digit)(a[57]);
+ r[58] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[57]); t = (sp_int_digit)(a[56]);
+ r[57] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[56]); t = (sp_int_digit)(a[55]);
+ r[56] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[55]); t = (sp_int_digit)(a[54]);
+ r[55] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[54]); t = (sp_int_digit)(a[53]);
+ r[54] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[53]); t = (sp_int_digit)(a[52]);
+ r[53] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[52]); t = (sp_int_digit)(a[51]);
+ r[52] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[51]); t = (sp_int_digit)(a[50]);
+ r[51] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[50]); t = (sp_int_digit)(a[49]);
+ r[50] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[49]); t = (sp_int_digit)(a[48]);
+ r[49] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[48]); t = (sp_int_digit)(a[47]);
+ r[48] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[47]); t = (sp_int_digit)(a[46]);
+ r[47] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[46]); t = (sp_int_digit)(a[45]);
+ r[46] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[45]); t = (sp_int_digit)(a[44]);
+ r[45] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[44]); t = (sp_int_digit)(a[43]);
+ r[44] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[43]); t = (sp_int_digit)(a[42]);
+ r[43] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[42]); t = (sp_int_digit)(a[41]);
+ r[42] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[41]); t = (sp_int_digit)(a[40]);
+ r[41] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[40]); t = (sp_int_digit)(a[39]);
+ r[40] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[39]); t = (sp_int_digit)(a[38]);
+ r[39] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[38]); t = (sp_int_digit)(a[37]);
+ r[38] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[37]); t = (sp_int_digit)(a[36]);
+ r[37] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[36]); t = (sp_int_digit)(a[35]);
+ r[36] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]);
+ r[35] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]);
+ r[34] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]);
+ r[33] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]);
+ r[32] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]);
+ r[31] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]);
+ r[30] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]);
+ r[29] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]);
+ r[28] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]);
+ r[27] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]);
+ r[26] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]);
+ r[25] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]);
+ r[24] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]);
+ r[23] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]);
+ r[22] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]);
+ r[21] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]);
+ r[20] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]);
+ r[19] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]);
+ r[18] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]);
+ r[17] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]);
+ r[16] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]);
+ r[15] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]);
+ r[14] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]);
+ r[13] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]);
+ r[12] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]);
+ r[11] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]);
+ r[10] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]);
+ r[9] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]);
+ r[8] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]);
+ r[7] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]);
+ r[6] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]);
+ r[5] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]);
+ r[4] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]);
+ r[3] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]);
+ r[2] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]);
+ r[1] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+#endif
+ r[0] = (a[0] << n) & 0x7fffff;
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_2_90(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[180];
+ sp_digit td[91];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 271, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 180;
+ XMEMSET(td, 0, sizeof(sp_digit) * 271);
+#else
+ norm = nd;
+ tmp = td;
+ XMEMSET(td, 0, sizeof(td));
+#endif
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_90(norm, m);
+
+ bits = ((bits + 3) / 4) * 4;
+ i = ((bits + 22) / 23) - 1;
+ c = bits % 23;
+ if (c == 0) {
+ c = 23;
+ }
+ if (i < 90) {
+ n = e[i--] << (32 - c);
+ }
+ else {
+ n = 0;
+ i--;
+ }
+ if (c < 4) {
+ n |= e[i--] << (9 - c);
+ c += 23;
+ }
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+ sp_2048_lshift_90(r, norm, y);
+ for (; i>=0 || c>=4; ) {
+ if (c < 4) {
+ n |= e[i--] << (9 - c);
+ c += 23;
+ }
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+
+ sp_2048_mont_sqr_90(r, r, m, mp);
+ sp_2048_mont_sqr_90(r, r, m, mp);
+ sp_2048_mont_sqr_90(r, r, m, mp);
+ sp_2048_mont_sqr_90(r, r, m, mp);
+
+ sp_2048_lshift_90(r, r, y);
+ sp_2048_mul_d_90(tmp, norm, (r[90] << 22) + (r[89] >> 1));
+ r[90] = 0;
+ r[89] &= 0x1L;
+ (void)sp_2048_add_90(r, r, tmp);
+ sp_2048_norm_90(r);
+ o = sp_2048_cmp_90(r, m);
+ sp_2048_cond_sub_90(r, r, m, ((o < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ }
+
+ sp_2048_mont_reduce_90(r, m, mp);
+ n = sp_2048_cmp_90(r, m);
+ sp_2048_cond_sub_90(r, r, m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+#endif /* HAVE_FFDHE_2048 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int err = MP_OKAY;
+ sp_digit* d = NULL;
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ word32 i;
+
+ if (mp_count_bits(base) > 2048) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 256) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 90 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 90 * 2;
+ m = e + 90;
+ r = b;
+
+ sp_2048_from_mp(b, 90, base);
+ sp_2048_from_bin(e, 90, exp, expLen);
+ sp_2048_from_mp(m, 90, mod);
+
+ #ifdef HAVE_FFDHE_2048
+ if (base->used == 1 && base->dp[0] == 2 &&
+ ((m[89] << 15) | (m[88] >> 8)) == 0xffffL) {
+ err = sp_2048_mod_exp_2_90(r, e, expLen * 8, m);
+ }
+ else
+ #endif
+ err = sp_2048_mod_exp_90(r, b, e, expLen * 8, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ for (i=0; i<256 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+ }
+
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 90U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit bd[180], ed[90], md[90];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ word32 i;
+ int err = MP_OKAY;
+
+ if (mp_count_bits(base) > 2048) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 256U) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+#ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 90 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 90 * 2;
+ m = e + 90;
+ r = b;
+ }
+#else
+ r = b = bd;
+ e = ed;
+ m = md;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 90, base);
+ sp_2048_from_bin(e, 90, exp, expLen);
+ sp_2048_from_mp(m, 90, mod);
+
+ #ifdef HAVE_FFDHE_2048
+ if (base->used == 1 && base->dp[0] == 2U &&
+ ((m[89] << 15) | (m[88] >> 8)) == 0xffffL) {
+ err = sp_2048_mod_exp_2_90(r, e, expLen * 8U, m);
+ }
+ else {
+ #endif
+ err = sp_2048_mod_exp_90(r, b, e, expLen * 8U, m, 0);
+ #ifdef HAVE_FFDHE_2048
+ }
+ #endif
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ for (i=0; i<256U && out[i] == 0U; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 90U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 90U);
+#endif
+
+ return err;
+#endif
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int err = MP_OKAY;
+ sp_digit* d = NULL;
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 1024) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 1024) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 1024) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 45 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 45 * 2;
+ m = e + 45;
+ r = b;
+
+ sp_2048_from_mp(b, 45, base);
+ sp_2048_from_mp(e, 45, exp);
+ sp_2048_from_mp(m, 45, mod);
+
+ err = sp_2048_mod_exp_45(r, b, e, mp_count_bits(exp), m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(r + 45, 0, sizeof(*r) * 45U);
+ err = sp_2048_to_mp(r, res);
+ }
+
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 45U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit bd[90], ed[45], md[45];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 1024) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 1024) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 1024) {
+ err = MP_READ_E;
+ }
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 45 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 45 * 2;
+ m = e + 45;
+ r = b;
+ }
+#else
+ r = b = bd;
+ e = ed;
+ m = md;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 45, base);
+ sp_2048_from_mp(e, 45, exp);
+ sp_2048_from_mp(m, 45, mod);
+
+ err = sp_2048_mod_exp_45(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(r + 45, 0, sizeof(*r) * 45U);
+ err = sp_2048_to_mp(r, res);
+ }
+
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 45U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 45U);
+#endif
+
+ return err;
+#endif
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_2048 */
+
+#ifndef WOLFSSL_SP_NO_3072
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 15U) {
+ r[j] &= 0x7fffff;
+ s = 23U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 23
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 23
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0x7fffff;
+ s = 23U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 23U) <= (word32)DIGIT_BIT) {
+ s += 23U;
+ r[j] &= 0x7fffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 23) {
+ r[j] &= 0x7fffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 23 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 384
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_3072_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ for (i=0; i<133; i++) {
+ r[i+1] += r[i] >> 23;
+ r[i] &= 0x7fffff;
+ }
+ j = 3072 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<134 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 23) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 23);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_67(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i, j;
+ int64_t t[134];
+
+ XMEMSET(t, 0, sizeof(t));
+ for (i=0; i<67; i++) {
+ for (j=0; j<67; j++) {
+ t[i+j] += ((int64_t)a[i]) * b[j];
+ }
+ }
+ for (i=0; i<133; i++) {
+ r[i] = t[i] & 0x7fffff;
+ t[i+1] += t[i] >> 23;
+ }
+ r[133] = (sp_digit)t[133];
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_67(sp_digit* r, const sp_digit* a)
+{
+ int i, j;
+ int64_t t[134];
+
+ XMEMSET(t, 0, sizeof(t));
+ for (i=0; i<67; i++) {
+ for (j=0; j<i; j++) {
+ t[i+j] += (((int64_t)a[i]) * a[j]) * 2;
+ }
+ t[i+i] += ((int64_t)a[i]) * a[i];
+ }
+ for (i=0; i<133; i++) {
+ r[i] = t[i] & 0x7fffff;
+ t[i+1] += t[i] >> 23;
+ }
+ r[133] = (sp_digit)t[133];
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_add_67(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 64; i += 8) {
+ r[i + 0] = a[i + 0] + b[i + 0];
+ r[i + 1] = a[i + 1] + b[i + 1];
+ r[i + 2] = a[i + 2] + b[i + 2];
+ r[i + 3] = a[i + 3] + b[i + 3];
+ r[i + 4] = a[i + 4] + b[i + 4];
+ r[i + 5] = a[i + 5] + b[i + 5];
+ r[i + 6] = a[i + 6] + b[i + 6];
+ r[i + 7] = a[i + 7] + b[i + 7];
+ }
+ r[64] = a[64] + b[64];
+ r[65] = a[65] + b[65];
+ r[66] = a[66] + b[66];
+
+ return 0;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_add_134(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 128; i += 8) {
+ r[i + 0] = a[i + 0] + b[i + 0];
+ r[i + 1] = a[i + 1] + b[i + 1];
+ r[i + 2] = a[i + 2] + b[i + 2];
+ r[i + 3] = a[i + 3] + b[i + 3];
+ r[i + 4] = a[i + 4] + b[i + 4];
+ r[i + 5] = a[i + 5] + b[i + 5];
+ r[i + 6] = a[i + 6] + b[i + 6];
+ r[i + 7] = a[i + 7] + b[i + 7];
+ }
+ r[128] = a[128] + b[128];
+ r[129] = a[129] + b[129];
+ r[130] = a[130] + b[130];
+ r[131] = a[131] + b[131];
+ r[132] = a[132] + b[132];
+ r[133] = a[133] + b[133];
+
+ return 0;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_sub_134(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 128; i += 8) {
+ r[i + 0] = a[i + 0] - b[i + 0];
+ r[i + 1] = a[i + 1] - b[i + 1];
+ r[i + 2] = a[i + 2] - b[i + 2];
+ r[i + 3] = a[i + 3] - b[i + 3];
+ r[i + 4] = a[i + 4] - b[i + 4];
+ r[i + 5] = a[i + 5] - b[i + 5];
+ r[i + 6] = a[i + 6] - b[i + 6];
+ r[i + 7] = a[i + 7] - b[i + 7];
+ }
+ r[128] = a[128] - b[128];
+ r[129] = a[129] - b[129];
+ r[130] = a[130] - b[130];
+ r[131] = a[131] - b[131];
+ r[132] = a[132] - b[132];
+ r[133] = a[133] - b[133];
+
+ return 0;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_134(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[134];
+ sp_digit* a1 = z1;
+ sp_digit b1[67];
+ sp_digit* z2 = r + 134;
+ (void)sp_3072_add_67(a1, a, &a[67]);
+ (void)sp_3072_add_67(b1, b, &b[67]);
+ sp_3072_mul_67(z2, &a[67], &b[67]);
+ sp_3072_mul_67(z0, a, b);
+ sp_3072_mul_67(z1, a1, b1);
+ (void)sp_3072_sub_134(z1, z1, z2);
+ (void)sp_3072_sub_134(z1, z1, z0);
+ (void)sp_3072_add_134(r + 67, r + 67, z1);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_134(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[134];
+ sp_digit* a1 = z1;
+ sp_digit* z2 = r + 134;
+ (void)sp_3072_add_67(a1, a, &a[67]);
+ sp_3072_sqr_67(z2, &a[67]);
+ sp_3072_sqr_67(z0, a);
+ sp_3072_sqr_67(z1, a1);
+ (void)sp_3072_sub_134(z1, z1, z2);
+ (void)sp_3072_sub_134(z1, z1, z0);
+ (void)sp_3072_add_134(r + 67, r + 67, z1);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_add_134(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 134; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_sub_134(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 134; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_134(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i, j, k;
+ int64_t c;
+
+ c = ((int64_t)a[133]) * b[133];
+ r[267] = (sp_digit)(c >> 23);
+ c = (c & 0x7fffff) << 23;
+ for (k = 265; k >= 0; k--) {
+ for (i = 133; i >= 0; i--) {
+ j = k - i;
+ if (j >= 134) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int64_t)a[i]) * b[j];
+ }
+ r[k + 2] += c >> 46;
+ r[k + 1] = (c >> 23) & 0x7fffff;
+ c = (c & 0x7fffff) << 23;
+ }
+ r[0] = (sp_digit)(c >> 23);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_134(sp_digit* r, const sp_digit* a)
+{
+ int i, j, k;
+ int64_t c;
+
+ c = ((int64_t)a[133]) * a[133];
+ r[267] = (sp_digit)(c >> 23);
+ c = (c & 0x7fffff) << 23;
+ for (k = 265; k >= 0; k--) {
+ for (i = 133; i >= 0; i--) {
+ j = k - i;
+ if (j >= 134 || i <= j) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int64_t)a[i]) * a[j] * 2;
+ }
+ if (i == j) {
+ c += ((int64_t)a[i]) * a[i];
+ }
+
+ r[k + 2] += c >> 46;
+ r[k + 1] = (c >> 23) & 0x7fffff;
+ c = (c & 0x7fffff) << 23;
+ }
+ r[0] = (sp_digit)(c >> 23);
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_add_67(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 67; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_sub_67(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 67; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_sub_67(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 64; i += 8) {
+ r[i + 0] = a[i + 0] - b[i + 0];
+ r[i + 1] = a[i + 1] - b[i + 1];
+ r[i + 2] = a[i + 2] - b[i + 2];
+ r[i + 3] = a[i + 3] - b[i + 3];
+ r[i + 4] = a[i + 4] - b[i + 4];
+ r[i + 5] = a[i + 5] - b[i + 5];
+ r[i + 6] = a[i + 6] - b[i + 6];
+ r[i + 7] = a[i + 7] - b[i + 7];
+ }
+ r[64] = a[64] - b[64];
+ r[65] = a[65] - b[65];
+ r[66] = a[66] - b[66];
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_67(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i, j, k;
+ int64_t c;
+
+ c = ((int64_t)a[66]) * b[66];
+ r[133] = (sp_digit)(c >> 23);
+ c = (c & 0x7fffff) << 23;
+ for (k = 131; k >= 0; k--) {
+ for (i = 66; i >= 0; i--) {
+ j = k - i;
+ if (j >= 67) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int64_t)a[i]) * b[j];
+ }
+ r[k + 2] += c >> 46;
+ r[k + 1] = (c >> 23) & 0x7fffff;
+ c = (c & 0x7fffff) << 23;
+ }
+ r[0] = (sp_digit)(c >> 23);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_67(sp_digit* r, const sp_digit* a)
+{
+ int i, j, k;
+ int64_t c;
+
+ c = ((int64_t)a[66]) * a[66];
+ r[133] = (sp_digit)(c >> 23);
+ c = (c & 0x7fffff) << 23;
+ for (k = 131; k >= 0; k--) {
+ for (i = 66; i >= 0; i--) {
+ j = k - i;
+ if (j >= 67 || i <= j) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int64_t)a[i]) * a[j] * 2;
+ }
+ if (i == j) {
+ c += ((int64_t)a[i]) * a[i];
+ }
+
+ r[k + 2] += c >> 46;
+ r[k + 1] = (c >> 23) & 0x7fffff;
+ c = (c & 0x7fffff) << 23;
+ }
+ r[0] = (sp_digit)(c >> 23);
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+ x &= 0x7fffff;
+
+ /* rho = -1/m mod b */
+ *rho = (1L << 23) - x;
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_3072_mul_d_134(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 134; i++) {
+ t += tb * a[i];
+ r[i] = t & 0x7fffff;
+ t >>= 23;
+ }
+ r[134] = (sp_digit)t;
+#else
+ int64_t tb = b;
+ int64_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff;
+ for (i = 0; i < 128; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
+ t[2] = tb * a[i+2];
+ r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
+ t[3] = tb * a[i+3];
+ r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
+ t[4] = tb * a[i+4];
+ r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff);
+ t[5] = tb * a[i+5];
+ r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff);
+ t[6] = tb * a[i+6];
+ r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff);
+ t[7] = tb * a[i+7];
+ r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff);
+ t[0] = tb * a[i+8];
+ r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff);
+ }
+ t[1] = tb * a[129];
+ r[129] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
+ t[2] = tb * a[130];
+ r[130] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
+ t[3] = tb * a[131];
+ r[131] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
+ t[4] = tb * a[132];
+ r[132] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff);
+ t[5] = tb * a[133];
+ r[133] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff);
+ r[134] = (sp_digit)(t[5] >> 23);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 3072 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_3072_mont_norm_67(sp_digit* r, const sp_digit* m)
+{
+ /* Set r = 2^n - 1. */
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<66; i++) {
+ r[i] = 0x7fffff;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 64; i += 8) {
+ r[i + 0] = 0x7fffff;
+ r[i + 1] = 0x7fffff;
+ r[i + 2] = 0x7fffff;
+ r[i + 3] = 0x7fffff;
+ r[i + 4] = 0x7fffff;
+ r[i + 5] = 0x7fffff;
+ r[i + 6] = 0x7fffff;
+ r[i + 7] = 0x7fffff;
+ }
+ r[64] = 0x7fffff;
+ r[65] = 0x7fffff;
+#endif
+ r[66] = 0x3ffffL;
+
+ /* r = (2^n - 1) mod n */
+ (void)sp_3072_sub_67(r, r, m);
+
+ /* Add one so r = 2^n mod m */
+ r[0] += 1;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_3072_cmp_67(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=66; i>=0; i--) {
+ r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#else
+ int i;
+
+ r |= (a[66] - b[66]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[65] - b[65]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[64] - b[64]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ for (i = 56; i >= 0; i -= 8) {
+ r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#endif /* WOLFSSL_SP_SMALL */
+
+ return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static void sp_3072_cond_sub_67(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 67; i++) {
+ r[i] = a[i] - (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 64; i += 8) {
+ r[i + 0] = a[i + 0] - (b[i + 0] & m);
+ r[i + 1] = a[i + 1] - (b[i + 1] & m);
+ r[i + 2] = a[i + 2] - (b[i + 2] & m);
+ r[i + 3] = a[i + 3] - (b[i + 3] & m);
+ r[i + 4] = a[i + 4] - (b[i + 4] & m);
+ r[i + 5] = a[i + 5] - (b[i + 5] & m);
+ r[i + 6] = a[i + 6] - (b[i + 6] & m);
+ r[i + 7] = a[i + 7] - (b[i + 7] & m);
+ }
+ r[64] = a[64] - (b[64] & m);
+ r[65] = a[65] - (b[65] & m);
+ r[66] = a[66] - (b[66] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_3072_mul_add_67(sp_digit* r, const sp_digit* a,
+ const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 67; i++) {
+ t += (tb * a[i]) + r[i];
+ r[i] = t & 0x7fffff;
+ t >>= 23;
+ }
+ r[67] += t;
+#else
+ int64_t tb = b;
+ int64_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff);
+ for (i = 0; i < 64; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
+ t[2] = tb * a[i+2];
+ r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
+ t[3] = tb * a[i+3];
+ r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff));
+ t[4] = tb * a[i+4];
+ r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff));
+ t[5] = tb * a[i+5];
+ r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff));
+ t[6] = tb * a[i+6];
+ r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff));
+ t[7] = tb * a[i+7];
+ r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff));
+ t[0] = tb * a[i+8];
+ r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff));
+ }
+ t[1] = tb * a[65]; r[65] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
+ t[2] = tb * a[66]; r[66] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
+ r[67] += (sp_digit)(t[2] >> 23);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 23.
+ *
+ * a Array of sp_digit to normalize.
+ */
+static void sp_3072_norm_67(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ for (i = 0; i < 66; i++) {
+ a[i+1] += a[i] >> 23;
+ a[i] &= 0x7fffff;
+ }
+#else
+ int i;
+ for (i = 0; i < 64; i += 8) {
+ a[i+1] += a[i+0] >> 23; a[i+0] &= 0x7fffff;
+ a[i+2] += a[i+1] >> 23; a[i+1] &= 0x7fffff;
+ a[i+3] += a[i+2] >> 23; a[i+2] &= 0x7fffff;
+ a[i+4] += a[i+3] >> 23; a[i+3] &= 0x7fffff;
+ a[i+5] += a[i+4] >> 23; a[i+4] &= 0x7fffff;
+ a[i+6] += a[i+5] >> 23; a[i+5] &= 0x7fffff;
+ a[i+7] += a[i+6] >> 23; a[i+6] &= 0x7fffff;
+ a[i+8] += a[i+7] >> 23; a[i+7] &= 0x7fffff;
+ a[i+9] += a[i+8] >> 23; a[i+8] &= 0x7fffff;
+ }
+ a[64+1] += a[64] >> 23;
+ a[64] &= 0x7fffff;
+ a[65+1] += a[65] >> 23;
+ a[65] &= 0x7fffff;
+#endif
+}
+
+/* Shift the result in the high 1536 bits down to the bottom.
+ *
+ * r A single precision number.
+ * a A single precision number.
+ */
+static void sp_3072_mont_shift_67(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ sp_digit n, s;
+
+ s = a[67];
+ n = a[66] >> 18;
+ for (i = 0; i < 66; i++) {
+ n += (s & 0x7fffff) << 5;
+ r[i] = n & 0x7fffff;
+ n >>= 23;
+ s = a[68 + i] + (s >> 23);
+ }
+ n += s << 5;
+ r[66] = n;
+#else
+ sp_digit n, s;
+ int i;
+
+ s = a[67]; n = a[66] >> 18;
+ for (i = 0; i < 64; i += 8) {
+ n += (s & 0x7fffff) << 5; r[i+0] = n & 0x7fffff;
+ n >>= 23; s = a[i+68] + (s >> 23);
+ n += (s & 0x7fffff) << 5; r[i+1] = n & 0x7fffff;
+ n >>= 23; s = a[i+69] + (s >> 23);
+ n += (s & 0x7fffff) << 5; r[i+2] = n & 0x7fffff;
+ n >>= 23; s = a[i+70] + (s >> 23);
+ n += (s & 0x7fffff) << 5; r[i+3] = n & 0x7fffff;
+ n >>= 23; s = a[i+71] + (s >> 23);
+ n += (s & 0x7fffff) << 5; r[i+4] = n & 0x7fffff;
+ n >>= 23; s = a[i+72] + (s >> 23);
+ n += (s & 0x7fffff) << 5; r[i+5] = n & 0x7fffff;
+ n >>= 23; s = a[i+73] + (s >> 23);
+ n += (s & 0x7fffff) << 5; r[i+6] = n & 0x7fffff;
+ n >>= 23; s = a[i+74] + (s >> 23);
+ n += (s & 0x7fffff) << 5; r[i+7] = n & 0x7fffff;
+ n >>= 23; s = a[i+75] + (s >> 23);
+ }
+ n += (s & 0x7fffff) << 5; r[64] = n & 0x7fffff;
+ n >>= 23; s = a[132] + (s >> 23);
+ n += (s & 0x7fffff) << 5; r[65] = n & 0x7fffff;
+ n >>= 23; s = a[133] + (s >> 23);
+ n += s << 5; r[66] = n;
+#endif /* WOLFSSL_SP_SMALL */
+ XMEMSET(&r[67], 0, sizeof(*r) * 67U);
+}
+
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_3072_mont_reduce_67(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+ int i;
+ sp_digit mu;
+
+ sp_3072_norm_67(a + 67);
+
+ for (i=0; i<66; i++) {
+ mu = (a[i] * mp) & 0x7fffff;
+ sp_3072_mul_add_67(a+i, m, mu);
+ a[i+1] += a[i] >> 23;
+ }
+ mu = (a[i] * mp) & 0x3ffffL;
+ sp_3072_mul_add_67(a+i, m, mu);
+ a[i+1] += a[i] >> 23;
+ a[i] &= 0x7fffff;
+
+ sp_3072_mont_shift_67(a, a);
+ sp_3072_cond_sub_67(a, a, m, 0 - (((a[66] >> 18) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_3072_norm_67(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_mul_67(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_3072_mul_67(r, a, b);
+ sp_3072_mont_reduce_67(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_sqr_67(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_3072_sqr_67(r, a);
+ sp_3072_mont_reduce_67(r, m, mp);
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_3072_mul_d_67(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 67; i++) {
+ t += tb * a[i];
+ r[i] = t & 0x7fffff;
+ t >>= 23;
+ }
+ r[67] = (sp_digit)t;
+#else
+ int64_t tb = b;
+ int64_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff;
+ for (i = 0; i < 64; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
+ t[2] = tb * a[i+2];
+ r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
+ t[3] = tb * a[i+3];
+ r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
+ t[4] = tb * a[i+4];
+ r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff);
+ t[5] = tb * a[i+5];
+ r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff);
+ t[6] = tb * a[i+6];
+ r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff);
+ t[7] = tb * a[i+7];
+ r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff);
+ t[0] = tb * a[i+8];
+ r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff);
+ }
+ t[1] = tb * a[65];
+ r[65] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
+ t[2] = tb * a[66];
+ r[66] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
+ r[67] = (sp_digit)(t[2] >> 23);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static void sp_3072_cond_add_67(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 67; i++) {
+ r[i] = a[i] + (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 64; i += 8) {
+ r[i + 0] = a[i + 0] + (b[i + 0] & m);
+ r[i + 1] = a[i + 1] + (b[i + 1] & m);
+ r[i + 2] = a[i + 2] + (b[i + 2] & m);
+ r[i + 3] = a[i + 3] + (b[i + 3] & m);
+ r[i + 4] = a[i + 4] + (b[i + 4] & m);
+ r[i + 5] = a[i + 5] + (b[i + 5] & m);
+ r[i + 6] = a[i + 6] + (b[i + 6] & m);
+ r[i + 7] = a[i + 7] + (b[i + 7] & m);
+ }
+ r[64] = a[64] + (b[64] & m);
+ r[65] = a[65] + (b[65] & m);
+ r[66] = a[66] + (b[66] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_add_67(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 67; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#endif
+#ifdef WOLFSSL_SP_DIV_32
+static WC_INLINE sp_digit sp_3072_div_word_67(sp_digit d1, sp_digit d0,
+ sp_digit dv)
+{
+ sp_digit d, r, t;
+
+ /* All 23 bits from d1 and top 8 bits from d0. */
+ d = (d1 << 8) | (d0 >> 15);
+ r = d / dv;
+ d -= r * dv;
+ /* Up to 9 bits in r */
+ /* Next 8 bits from d0. */
+ r <<= 8;
+ d <<= 8;
+ d |= (d0 >> 7) & ((1 << 8) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 17 bits in r */
+ /* Remaining 7 bits from d0. */
+ r <<= 7;
+ d <<= 7;
+ d |= d0 & ((1 << 7) - 1);
+ t = d / dv;
+ r += t;
+
+ return r;
+}
+#endif /* WOLFSSL_SP_DIV_32 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Number to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_3072_div_67(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ int i;
+#ifndef WOLFSSL_SP_DIV_32
+ int64_t d1;
+#endif
+ sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* td;
+#else
+ sp_digit t1d[134], t2d[67 + 1];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 67 + 1), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = td;
+ t2 = td + 2 * 67;
+#else
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ dv = d[66];
+ XMEMCPY(t1, a, sizeof(*t1) * 2U * 67U);
+ for (i=66; i>=0; i--) {
+ t1[67 + i] += t1[67 + i - 1] >> 23;
+ t1[67 + i - 1] &= 0x7fffff;
+#ifndef WOLFSSL_SP_DIV_32
+ d1 = t1[67 + i];
+ d1 <<= 23;
+ d1 += t1[67 + i - 1];
+ r1 = (sp_digit)(d1 / dv);
+#else
+ r1 = sp_3072_div_word_67(t1[67 + i], t1[67 + i - 1], dv);
+#endif
+
+ sp_3072_mul_d_67(t2, d, r1);
+ (void)sp_3072_sub_67(&t1[i], &t1[i], t2);
+ t1[67 + i] -= t2[67];
+ t1[67 + i] += t1[67 + i - 1] >> 23;
+ t1[67 + i - 1] &= 0x7fffff;
+ r1 = (((-t1[67 + i]) << 23) - t1[67 + i - 1]) / dv;
+ r1++;
+ sp_3072_mul_d_67(t2, d, r1);
+ (void)sp_3072_add_67(&t1[i], &t1[i], t2);
+ t1[67 + i] += t1[67 + i - 1] >> 23;
+ t1[67 + i - 1] &= 0x7fffff;
+ }
+ t1[67 - 1] += t1[67 - 2] >> 23;
+ t1[67 - 2] &= 0x7fffff;
+ r1 = t1[67 - 1] / dv;
+
+ sp_3072_mul_d_67(t2, d, r1);
+ (void)sp_3072_sub_67(t1, t1, t2);
+ XMEMCPY(r, t1, sizeof(*r) * 2U * 67U);
+ for (i=0; i<65; i++) {
+ r[i+1] += r[i] >> 23;
+ r[i] &= 0x7fffff;
+ }
+ sp_3072_cond_add_67(r, r, d, 0 - ((r[66] < 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_3072_mod_67(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_3072_div_67(a, m, NULL, r);
+}
+
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_67(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+ const sp_digit* m, int reduceA)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* td;
+ sp_digit* t[3];
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 67 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(td, 0, sizeof(*td) * 3U * 67U * 2U);
+
+ norm = t[0] = td;
+ t[1] = &td[67 * 2];
+ t[2] = &td[2 * 67 * 2];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_67(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_3072_mod_67(t[1], a, m);
+ }
+ else {
+ XMEMCPY(t[1], a, sizeof(sp_digit) * 67U);
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_3072_mul_67(t[1], t[1], norm);
+ err = sp_3072_mod_67(t[1], t[1], m);
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 23;
+ c = bits % 23;
+ n = e[i--] << (23 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 23;
+ }
+
+ y = (n >> 22) & 1;
+ n <<= 1;
+
+ sp_3072_mont_mul_67(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])),
+ sizeof(*t[2]) * 67 * 2);
+ sp_3072_mont_sqr_67(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2],
+ sizeof(*t[2]) * 67 * 2);
+ }
+
+ sp_3072_mont_reduce_67(t[0], m, mp);
+ n = sp_3072_cmp_67(t[0], m);
+ sp_3072_cond_sub_67(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(*r) * 67 * 2);
+
+ }
+
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+
+ return err;
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[3][134];
+#else
+ sp_digit* td;
+ sp_digit* t[3];
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 67 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ t[0] = td;
+ t[1] = &td[67 * 2];
+ t[2] = &td[2 * 67 * 2];
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_67(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_3072_mod_67(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_3072_mul_67(t[1], t[1], norm);
+ err = sp_3072_mod_67(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_3072_mul_67(t[1], a, norm);
+ err = sp_3072_mod_67(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 23;
+ c = bits % 23;
+ n = e[i--] << (23 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 23;
+ }
+
+ y = (n >> 22) & 1;
+ n <<= 1;
+
+ sp_3072_mont_mul_67(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
+ sp_3072_mont_sqr_67(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
+ }
+
+ sp_3072_mont_reduce_67(t[0], m, mp);
+ n = sp_3072_cmp_67(t[0], m);
+ sp_3072_cond_sub_67(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(t[0]));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][134];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit rt[134];
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 134, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 134;
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_67(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_3072_mod_67(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_3072_mul_67(t[1], t[1], norm);
+ err = sp_3072_mod_67(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_3072_mul_67(t[1], a, norm);
+ err = sp_3072_mod_67(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_67(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_67(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_67(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_67(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_67(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_67(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_67(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_67(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_67(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_67(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_67(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_67(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_67(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_67(t[15], t[ 8], t[ 7], m, mp);
+ sp_3072_mont_sqr_67(t[16], t[ 8], m, mp);
+ sp_3072_mont_mul_67(t[17], t[ 9], t[ 8], m, mp);
+ sp_3072_mont_sqr_67(t[18], t[ 9], m, mp);
+ sp_3072_mont_mul_67(t[19], t[10], t[ 9], m, mp);
+ sp_3072_mont_sqr_67(t[20], t[10], m, mp);
+ sp_3072_mont_mul_67(t[21], t[11], t[10], m, mp);
+ sp_3072_mont_sqr_67(t[22], t[11], m, mp);
+ sp_3072_mont_mul_67(t[23], t[12], t[11], m, mp);
+ sp_3072_mont_sqr_67(t[24], t[12], m, mp);
+ sp_3072_mont_mul_67(t[25], t[13], t[12], m, mp);
+ sp_3072_mont_sqr_67(t[26], t[13], m, mp);
+ sp_3072_mont_mul_67(t[27], t[14], t[13], m, mp);
+ sp_3072_mont_sqr_67(t[28], t[14], m, mp);
+ sp_3072_mont_mul_67(t[29], t[15], t[14], m, mp);
+ sp_3072_mont_sqr_67(t[30], t[15], m, mp);
+ sp_3072_mont_mul_67(t[31], t[16], t[15], m, mp);
+
+ bits = ((bits + 4) / 5) * 5;
+ i = ((bits + 22) / 23) - 1;
+ c = bits % 23;
+ if (c == 0) {
+ c = 23;
+ }
+ if (i < 67) {
+ n = e[i--] << (32 - c);
+ }
+ else {
+ n = 0;
+ i--;
+ }
+ if (c < 5) {
+ n |= e[i--] << (9 - c);
+ c += 23;
+ }
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ XMEMCPY(rt, t[y], sizeof(rt));
+ for (; i>=0 || c>=5; ) {
+ if (c < 5) {
+ n |= e[i--] << (9 - c);
+ c += 23;
+ }
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+
+ sp_3072_mont_sqr_67(rt, rt, m, mp);
+ sp_3072_mont_sqr_67(rt, rt, m, mp);
+ sp_3072_mont_sqr_67(rt, rt, m, mp);
+ sp_3072_mont_sqr_67(rt, rt, m, mp);
+ sp_3072_mont_sqr_67(rt, rt, m, mp);
+
+ sp_3072_mont_mul_67(rt, rt, t[y], m, mp);
+ }
+
+ sp_3072_mont_reduce_67(rt, m, mp);
+ n = sp_3072_cmp_67(rt, m);
+ sp_3072_cond_sub_67(rt, rt, m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, rt, sizeof(rt));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#endif
+}
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 3072 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_3072_mont_norm_134(sp_digit* r, const sp_digit* m)
+{
+ /* Set r = 2^n - 1. */
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<133; i++) {
+ r[i] = 0x7fffff;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 128; i += 8) {
+ r[i + 0] = 0x7fffff;
+ r[i + 1] = 0x7fffff;
+ r[i + 2] = 0x7fffff;
+ r[i + 3] = 0x7fffff;
+ r[i + 4] = 0x7fffff;
+ r[i + 5] = 0x7fffff;
+ r[i + 6] = 0x7fffff;
+ r[i + 7] = 0x7fffff;
+ }
+ r[128] = 0x7fffff;
+ r[129] = 0x7fffff;
+ r[130] = 0x7fffff;
+ r[131] = 0x7fffff;
+ r[132] = 0x7fffff;
+#endif
+ r[133] = 0x1fffL;
+
+ /* r = (2^n - 1) mod n */
+ (void)sp_3072_sub_134(r, r, m);
+
+ /* Add one so r = 2^n mod m */
+ r[0] += 1;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_3072_cmp_134(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=133; i>=0; i--) {
+ r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#else
+ int i;
+
+ r |= (a[133] - b[133]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[132] - b[132]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[131] - b[131]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[130] - b[130]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[129] - b[129]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[128] - b[128]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ for (i = 120; i >= 0; i -= 8) {
+ r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#endif /* WOLFSSL_SP_SMALL */
+
+ return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static void sp_3072_cond_sub_134(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 134; i++) {
+ r[i] = a[i] - (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 128; i += 8) {
+ r[i + 0] = a[i + 0] - (b[i + 0] & m);
+ r[i + 1] = a[i + 1] - (b[i + 1] & m);
+ r[i + 2] = a[i + 2] - (b[i + 2] & m);
+ r[i + 3] = a[i + 3] - (b[i + 3] & m);
+ r[i + 4] = a[i + 4] - (b[i + 4] & m);
+ r[i + 5] = a[i + 5] - (b[i + 5] & m);
+ r[i + 6] = a[i + 6] - (b[i + 6] & m);
+ r[i + 7] = a[i + 7] - (b[i + 7] & m);
+ }
+ r[128] = a[128] - (b[128] & m);
+ r[129] = a[129] - (b[129] & m);
+ r[130] = a[130] - (b[130] & m);
+ r[131] = a[131] - (b[131] & m);
+ r[132] = a[132] - (b[132] & m);
+ r[133] = a[133] - (b[133] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_3072_mul_add_134(sp_digit* r, const sp_digit* a,
+ const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 134; i++) {
+ t += (tb * a[i]) + r[i];
+ r[i] = t & 0x7fffff;
+ t >>= 23;
+ }
+ r[134] += t;
+#else
+ int64_t tb = b;
+ int64_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff);
+ for (i = 0; i < 128; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
+ t[2] = tb * a[i+2];
+ r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
+ t[3] = tb * a[i+3];
+ r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff));
+ t[4] = tb * a[i+4];
+ r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff));
+ t[5] = tb * a[i+5];
+ r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff));
+ t[6] = tb * a[i+6];
+ r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff));
+ t[7] = tb * a[i+7];
+ r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff));
+ t[0] = tb * a[i+8];
+ r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff));
+ }
+ t[1] = tb * a[129]; r[129] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
+ t[2] = tb * a[130]; r[130] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
+ t[3] = tb * a[131]; r[131] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff));
+ t[4] = tb * a[132]; r[132] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff));
+ t[5] = tb * a[133]; r[133] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff));
+ r[134] += (sp_digit)(t[5] >> 23);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 23.
+ *
+ * a Array of sp_digit to normalize.
+ */
+static void sp_3072_norm_134(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ for (i = 0; i < 133; i++) {
+ a[i+1] += a[i] >> 23;
+ a[i] &= 0x7fffff;
+ }
+#else
+ int i;
+ for (i = 0; i < 128; i += 8) {
+ a[i+1] += a[i+0] >> 23; a[i+0] &= 0x7fffff;
+ a[i+2] += a[i+1] >> 23; a[i+1] &= 0x7fffff;
+ a[i+3] += a[i+2] >> 23; a[i+2] &= 0x7fffff;
+ a[i+4] += a[i+3] >> 23; a[i+3] &= 0x7fffff;
+ a[i+5] += a[i+4] >> 23; a[i+4] &= 0x7fffff;
+ a[i+6] += a[i+5] >> 23; a[i+5] &= 0x7fffff;
+ a[i+7] += a[i+6] >> 23; a[i+6] &= 0x7fffff;
+ a[i+8] += a[i+7] >> 23; a[i+7] &= 0x7fffff;
+ a[i+9] += a[i+8] >> 23; a[i+8] &= 0x7fffff;
+ }
+ a[128+1] += a[128] >> 23;
+ a[128] &= 0x7fffff;
+ a[129+1] += a[129] >> 23;
+ a[129] &= 0x7fffff;
+ a[130+1] += a[130] >> 23;
+ a[130] &= 0x7fffff;
+ a[131+1] += a[131] >> 23;
+ a[131] &= 0x7fffff;
+ a[132+1] += a[132] >> 23;
+ a[132] &= 0x7fffff;
+#endif
+}
+
+/* Shift the result in the high 3072 bits down to the bottom.
+ *
+ * r A single precision number.
+ * a A single precision number.
+ */
+static void sp_3072_mont_shift_134(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ int64_t n = a[133] >> 13;
+ n += ((int64_t)a[134]) << 10;
+
+ for (i = 0; i < 133; i++) {
+ r[i] = n & 0x7fffff;
+ n >>= 23;
+ n += ((int64_t)a[135 + i]) << 10;
+ }
+ r[133] = (sp_digit)n;
+#else
+ int i;
+ int64_t n = a[133] >> 13;
+ n += ((int64_t)a[134]) << 10;
+ for (i = 0; i < 128; i += 8) {
+ r[i + 0] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 135]) << 10;
+ r[i + 1] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 136]) << 10;
+ r[i + 2] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 137]) << 10;
+ r[i + 3] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 138]) << 10;
+ r[i + 4] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 139]) << 10;
+ r[i + 5] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 140]) << 10;
+ r[i + 6] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 141]) << 10;
+ r[i + 7] = n & 0x7fffff;
+ n >>= 23; n += ((int64_t)a[i + 142]) << 10;
+ }
+ r[128] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[263]) << 10;
+ r[129] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[264]) << 10;
+ r[130] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[265]) << 10;
+ r[131] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[266]) << 10;
+ r[132] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[267]) << 10;
+ r[133] = (sp_digit)n;
+#endif /* WOLFSSL_SP_SMALL */
+ XMEMSET(&r[134], 0, sizeof(*r) * 134U);
+}
+
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_3072_mont_reduce_134(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+ int i;
+ sp_digit mu;
+
+ sp_3072_norm_134(a + 134);
+
+#ifdef WOLFSSL_SP_DH
+ if (mp != 1) {
+ for (i=0; i<133; i++) {
+ mu = (a[i] * mp) & 0x7fffff;
+ sp_3072_mul_add_134(a+i, m, mu);
+ a[i+1] += a[i] >> 23;
+ }
+ mu = (a[i] * mp) & 0x1fffL;
+ sp_3072_mul_add_134(a+i, m, mu);
+ a[i+1] += a[i] >> 23;
+ a[i] &= 0x7fffff;
+ }
+ else {
+ for (i=0; i<133; i++) {
+ mu = a[i] & 0x7fffff;
+ sp_3072_mul_add_134(a+i, m, mu);
+ a[i+1] += a[i] >> 23;
+ }
+ mu = a[i] & 0x1fffL;
+ sp_3072_mul_add_134(a+i, m, mu);
+ a[i+1] += a[i] >> 23;
+ a[i] &= 0x7fffff;
+ }
+#else
+ for (i=0; i<133; i++) {
+ mu = (a[i] * mp) & 0x7fffff;
+ sp_3072_mul_add_134(a+i, m, mu);
+ a[i+1] += a[i] >> 23;
+ }
+ mu = (a[i] * mp) & 0x1fffL;
+ sp_3072_mul_add_134(a+i, m, mu);
+ a[i+1] += a[i] >> 23;
+ a[i] &= 0x7fffff;
+#endif
+
+ sp_3072_mont_shift_134(a, a);
+ sp_3072_cond_sub_134(a, a, m, 0 - (((a[133] >> 13) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_3072_norm_134(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_mul_134(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_3072_mul_134(r, a, b);
+ sp_3072_mont_reduce_134(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_sqr_134(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_3072_sqr_134(r, a);
+ sp_3072_mont_reduce_134(r, m, mp);
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_3072_mul_d_268(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 268; i++) {
+ t += tb * a[i];
+ r[i] = t & 0x7fffff;
+ t >>= 23;
+ }
+ r[268] = (sp_digit)t;
+#else
+ int64_t tb = b;
+ int64_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff;
+ for (i = 0; i < 264; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
+ t[2] = tb * a[i+2];
+ r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
+ t[3] = tb * a[i+3];
+ r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
+ t[4] = tb * a[i+4];
+ r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff);
+ t[5] = tb * a[i+5];
+ r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff);
+ t[6] = tb * a[i+6];
+ r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff);
+ t[7] = tb * a[i+7];
+ r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff);
+ t[0] = tb * a[i+8];
+ r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff);
+ }
+ t[1] = tb * a[265];
+ r[265] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
+ t[2] = tb * a[266];
+ r[266] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
+ t[3] = tb * a[267];
+ r[267] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
+ r[268] = (sp_digit)(t[3] >> 23);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static void sp_3072_cond_add_134(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 134; i++) {
+ r[i] = a[i] + (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 128; i += 8) {
+ r[i + 0] = a[i + 0] + (b[i + 0] & m);
+ r[i + 1] = a[i + 1] + (b[i + 1] & m);
+ r[i + 2] = a[i + 2] + (b[i + 2] & m);
+ r[i + 3] = a[i + 3] + (b[i + 3] & m);
+ r[i + 4] = a[i + 4] + (b[i + 4] & m);
+ r[i + 5] = a[i + 5] + (b[i + 5] & m);
+ r[i + 6] = a[i + 6] + (b[i + 6] & m);
+ r[i + 7] = a[i + 7] + (b[i + 7] & m);
+ }
+ r[128] = a[128] + (b[128] & m);
+ r[129] = a[129] + (b[129] & m);
+ r[130] = a[130] + (b[130] & m);
+ r[131] = a[131] + (b[131] & m);
+ r[132] = a[132] + (b[132] & m);
+ r[133] = a[133] + (b[133] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_sub_134(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 134; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#endif
+#ifdef WOLFSSL_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_add_134(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 134; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#endif
+SP_NOINLINE static void sp_3072_rshift_134(sp_digit* r, sp_digit* a, byte n)
+{
+ int i;
+
+#ifdef WOLFSSL_SP_SMALL
+ for (i=0; i<133; i++) {
+ r[i] = ((a[i] >> n) | (a[i + 1] << (23 - n))) & 0x7fffff;
+ }
+#else
+ for (i=0; i<128; i += 8) {
+ r[i+0] = ((a[i+0] >> n) | (a[i+1] << (23 - n))) & 0x7fffff;
+ r[i+1] = ((a[i+1] >> n) | (a[i+2] << (23 - n))) & 0x7fffff;
+ r[i+2] = ((a[i+2] >> n) | (a[i+3] << (23 - n))) & 0x7fffff;
+ r[i+3] = ((a[i+3] >> n) | (a[i+4] << (23 - n))) & 0x7fffff;
+ r[i+4] = ((a[i+4] >> n) | (a[i+5] << (23 - n))) & 0x7fffff;
+ r[i+5] = ((a[i+5] >> n) | (a[i+6] << (23 - n))) & 0x7fffff;
+ r[i+6] = ((a[i+6] >> n) | (a[i+7] << (23 - n))) & 0x7fffff;
+ r[i+7] = ((a[i+7] >> n) | (a[i+8] << (23 - n))) & 0x7fffff;
+ }
+ r[128] = ((a[128] >> n) | (a[129] << (23 - n))) & 0x7fffff;
+ r[129] = ((a[129] >> n) | (a[130] << (23 - n))) & 0x7fffff;
+ r[130] = ((a[130] >> n) | (a[131] << (23 - n))) & 0x7fffff;
+ r[131] = ((a[131] >> n) | (a[132] << (23 - n))) & 0x7fffff;
+ r[132] = ((a[132] >> n) | (a[133] << (23 - n))) & 0x7fffff;
+#endif
+ r[133] = a[133] >> n;
+}
+
+#ifdef WOLFSSL_SP_DIV_32
+static WC_INLINE sp_digit sp_3072_div_word_134(sp_digit d1, sp_digit d0,
+ sp_digit dv)
+{
+ sp_digit d, r, t;
+
+ /* All 23 bits from d1 and top 8 bits from d0. */
+ d = (d1 << 8) | (d0 >> 15);
+ r = d / dv;
+ d -= r * dv;
+ /* Up to 9 bits in r */
+ /* Next 8 bits from d0. */
+ r <<= 8;
+ d <<= 8;
+ d |= (d0 >> 7) & ((1 << 8) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 17 bits in r */
+ /* Remaining 7 bits from d0. */
+ r <<= 7;
+ d <<= 7;
+ d |= d0 & ((1 << 7) - 1);
+ t = d / dv;
+ r += t;
+
+ return r;
+}
+#endif /* WOLFSSL_SP_DIV_32 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_3072_div_134(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ int i;
+#ifndef WOLFSSL_SP_DIV_32
+ int64_t d1;
+#endif
+ sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* td;
+#else
+ sp_digit t1d[268 + 1], t2d[134 + 1], sdd[134 + 1];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ sp_digit* sd;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 134 + 3), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ (void)m;
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = td;
+ t2 = td + 268 + 1;
+ sd = t2 + 134 + 1;
+#else
+ t1 = t1d;
+ t2 = t2d;
+ sd = sdd;
+#endif
+
+ sp_3072_mul_d_134(sd, d, 1L << 10);
+ sp_3072_mul_d_268(t1, a, 1L << 10);
+ dv = sd[133];
+ for (i=134; i>=0; i--) {
+ t1[134 + i] += t1[134 + i - 1] >> 23;
+ t1[134 + i - 1] &= 0x7fffff;
+#ifndef WOLFSSL_SP_DIV_32
+ d1 = t1[134 + i];
+ d1 <<= 23;
+ d1 += t1[134 + i - 1];
+ r1 = (sp_digit)(d1 / dv);
+#else
+ r1 = sp_3072_div_word_134(t1[134 + i], t1[134 + i - 1], dv);
+#endif
+
+ sp_3072_mul_d_134(t2, sd, r1);
+ (void)sp_3072_sub_134(&t1[i], &t1[i], t2);
+ t1[134 + i] -= t2[134];
+ t1[134 + i] += t1[134 + i - 1] >> 23;
+ t1[134 + i - 1] &= 0x7fffff;
+ r1 = (((-t1[134 + i]) << 23) - t1[134 + i - 1]) / dv;
+ r1 -= t1[134 + i];
+ sp_3072_mul_d_134(t2, sd, r1);
+ (void)sp_3072_add_134(&t1[i], &t1[i], t2);
+ t1[134 + i] += t1[134 + i - 1] >> 23;
+ t1[134 + i - 1] &= 0x7fffff;
+ }
+ t1[134 - 1] += t1[134 - 2] >> 23;
+ t1[134 - 2] &= 0x7fffff;
+ r1 = t1[134 - 1] / dv;
+
+ sp_3072_mul_d_134(t2, sd, r1);
+ sp_3072_sub_134(t1, t1, t2);
+ XMEMCPY(r, t1, sizeof(*r) * 2U * 134U);
+ for (i=0; i<132; i++) {
+ r[i+1] += r[i] >> 23;
+ r[i] &= 0x7fffff;
+ }
+ sp_3072_cond_add_134(r, r, sd, 0 - ((r[133] < 0) ?
+ (sp_digit)1 : (sp_digit)0));
+
+ sp_3072_norm_134(r);
+ sp_3072_rshift_134(r, r, 10);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_3072_mod_134(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_3072_div_134(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+ defined(WOLFSSL_HAVE_SP_DH)
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_134(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+ const sp_digit* m, int reduceA)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* td;
+ sp_digit* t[3];
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 134 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(td, 0, sizeof(*td) * 3U * 134U * 2U);
+
+ norm = t[0] = td;
+ t[1] = &td[134 * 2];
+ t[2] = &td[2 * 134 * 2];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_134(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_3072_mod_134(t[1], a, m);
+ }
+ else {
+ XMEMCPY(t[1], a, sizeof(sp_digit) * 134U);
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_3072_mul_134(t[1], t[1], norm);
+ err = sp_3072_mod_134(t[1], t[1], m);
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 23;
+ c = bits % 23;
+ n = e[i--] << (23 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 23;
+ }
+
+ y = (n >> 22) & 1;
+ n <<= 1;
+
+ sp_3072_mont_mul_134(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])),
+ sizeof(*t[2]) * 134 * 2);
+ sp_3072_mont_sqr_134(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2],
+ sizeof(*t[2]) * 134 * 2);
+ }
+
+ sp_3072_mont_reduce_134(t[0], m, mp);
+ n = sp_3072_cmp_134(t[0], m);
+ sp_3072_cond_sub_134(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(*r) * 134 * 2);
+
+ }
+
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+
+ return err;
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[3][268];
+#else
+ sp_digit* td;
+ sp_digit* t[3];
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 134 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ t[0] = td;
+ t[1] = &td[134 * 2];
+ t[2] = &td[2 * 134 * 2];
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_134(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_3072_mod_134(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_3072_mul_134(t[1], t[1], norm);
+ err = sp_3072_mod_134(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_3072_mul_134(t[1], a, norm);
+ err = sp_3072_mod_134(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 23;
+ c = bits % 23;
+ n = e[i--] << (23 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 23;
+ }
+
+ y = (n >> 22) & 1;
+ n <<= 1;
+
+ sp_3072_mont_mul_134(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
+ sp_3072_mont_sqr_134(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
+ }
+
+ sp_3072_mont_reduce_134(t[0], m, mp);
+ n = sp_3072_cmp_134(t[0], m);
+ sp_3072_cond_sub_134(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(t[0]));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][268];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit rt[268];
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 268, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 268;
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_134(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_3072_mod_134(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_3072_mul_134(t[1], t[1], norm);
+ err = sp_3072_mod_134(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_3072_mul_134(t[1], a, norm);
+ err = sp_3072_mod_134(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_134(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_134(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_134(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_134(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_134(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_134(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_134(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_134(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_134(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_134(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_134(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_134(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_134(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_134(t[15], t[ 8], t[ 7], m, mp);
+ sp_3072_mont_sqr_134(t[16], t[ 8], m, mp);
+ sp_3072_mont_mul_134(t[17], t[ 9], t[ 8], m, mp);
+ sp_3072_mont_sqr_134(t[18], t[ 9], m, mp);
+ sp_3072_mont_mul_134(t[19], t[10], t[ 9], m, mp);
+ sp_3072_mont_sqr_134(t[20], t[10], m, mp);
+ sp_3072_mont_mul_134(t[21], t[11], t[10], m, mp);
+ sp_3072_mont_sqr_134(t[22], t[11], m, mp);
+ sp_3072_mont_mul_134(t[23], t[12], t[11], m, mp);
+ sp_3072_mont_sqr_134(t[24], t[12], m, mp);
+ sp_3072_mont_mul_134(t[25], t[13], t[12], m, mp);
+ sp_3072_mont_sqr_134(t[26], t[13], m, mp);
+ sp_3072_mont_mul_134(t[27], t[14], t[13], m, mp);
+ sp_3072_mont_sqr_134(t[28], t[14], m, mp);
+ sp_3072_mont_mul_134(t[29], t[15], t[14], m, mp);
+ sp_3072_mont_sqr_134(t[30], t[15], m, mp);
+ sp_3072_mont_mul_134(t[31], t[16], t[15], m, mp);
+
+ bits = ((bits + 4) / 5) * 5;
+ i = ((bits + 22) / 23) - 1;
+ c = bits % 23;
+ if (c == 0) {
+ c = 23;
+ }
+ if (i < 134) {
+ n = e[i--] << (32 - c);
+ }
+ else {
+ n = 0;
+ i--;
+ }
+ if (c < 5) {
+ n |= e[i--] << (9 - c);
+ c += 23;
+ }
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ XMEMCPY(rt, t[y], sizeof(rt));
+ for (; i>=0 || c>=5; ) {
+ if (c < 5) {
+ n |= e[i--] << (9 - c);
+ c += 23;
+ }
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+
+ sp_3072_mont_sqr_134(rt, rt, m, mp);
+ sp_3072_mont_sqr_134(rt, rt, m, mp);
+ sp_3072_mont_sqr_134(rt, rt, m, mp);
+ sp_3072_mont_sqr_134(rt, rt, m, mp);
+ sp_3072_mont_sqr_134(rt, rt, m, mp);
+
+ sp_3072_mont_mul_134(rt, rt, t[y], m, mp);
+ }
+
+ sp_3072_mont_reduce_134(rt, m, mp);
+ n = sp_3072_cmp_134(rt, m);
+ sp_3072_cond_sub_134(rt, rt, m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, rt, sizeof(rt));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#endif
+}
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */
+ /* WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+ sp_digit* norm;
+ sp_digit e[1] = {0};
+ sp_digit mp;
+ int i;
+ int err = MP_OKAY;
+
+ if (*outLen < 384U) {
+ err = MP_TO_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(em) > 23) {
+ err = MP_READ_E;
+ }
+ if (inLen > 384U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 134 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 134 * 2;
+ m = r + 134 * 2;
+ norm = r;
+
+ sp_3072_from_bin(a, 134, in, inLen);
+#if DIGIT_BIT >= 23
+ e[0] = (sp_digit)em->dp[0];
+#else
+ e[0] = (sp_digit)em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(m, 134, mm);
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_134(norm, m);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_mul_134(a, a, norm);
+ err = sp_3072_mod_134(a, a, m);
+ }
+ if (err == MP_OKAY) {
+ for (i=22; i>=0; i--) {
+ if ((e[0] >> i) != 0) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 134 * 2);
+ for (i--; i>=0; i--) {
+ sp_3072_mont_sqr_134(r, r, m, mp);
+
+ if (((e[0] >> i) & 1) == 1) {
+ sp_3072_mont_mul_134(r, r, a, m, mp);
+ }
+ }
+ sp_3072_mont_reduce_134(r, m, mp);
+ mp = sp_3072_cmp_134(r, m);
+ sp_3072_cond_sub_134(r, r, m, ((mp < 0) ?
+ (sp_digit)1 : (sp_digit)0)- 1);
+
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit ad[268], md[134], rd[268];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+ sp_digit e[1] = {0};
+ int err = MP_OKAY;
+
+ if (*outLen < 384U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(em) > 23) {
+ err = MP_READ_E;
+ }
+ if (inLen > 384U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 134 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 134 * 2;
+ m = r + 134 * 2;
+ }
+#else
+ a = ad;
+ m = md;
+ r = rd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_3072_from_bin(a, 134, in, inLen);
+#if DIGIT_BIT >= 23
+ e[0] = (sp_digit)em->dp[0];
+#else
+ e[0] = (sp_digit)em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(m, 134, mm);
+
+ if (e[0] == 0x3) {
+ sp_3072_sqr_134(r, a);
+ err = sp_3072_mod_134(r, r, m);
+ if (err == MP_OKAY) {
+ sp_3072_mul_134(r, a, r);
+ err = sp_3072_mod_134(r, r, m);
+ }
+ }
+ else {
+ sp_digit* norm = r;
+ int i;
+ sp_digit mp;
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_134(norm, m);
+
+ sp_3072_mul_134(a, a, norm);
+ err = sp_3072_mod_134(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i=22; i>=0; i--) {
+ if ((e[0] >> i) != 0) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 268U);
+ for (i--; i>=0; i--) {
+ sp_3072_mont_sqr_134(r, r, m, mp);
+
+ if (((e[0] >> i) & 1) == 1) {
+ sp_3072_mont_mul_134(r, r, a, m, mp);
+ }
+ }
+ sp_3072_mont_reduce_134(r, m, mp);
+ mp = sp_3072_cmp_134(r, m);
+ sp_3072_cond_sub_134(r, r, m, ((mp < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#endif
+
+ return err;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM)
+#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* a;
+ sp_digit* d = NULL;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 384U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 3072) {
+ err = MP_READ_E;
+ }
+ if (inLen > 384) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 134 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = d + 134;
+ m = a + 268;
+ r = a;
+
+ sp_3072_from_bin(a, 134, in, inLen);
+ sp_3072_from_mp(d, 134, dm);
+ sp_3072_from_mp(m, 134, mm);
+ err = sp_3072_mod_exp_134(r, a, d, 3072, m, 0);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 134);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+ sp_digit a[268], d[134], m[134];
+ sp_digit* r = a;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 384U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 3072) {
+ err = MP_READ_E;
+ }
+ if (inLen > 384U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_bin(a, 134, in, inLen);
+ sp_3072_from_mp(d, 134, dm);
+ sp_3072_from_mp(m, 134, mm);
+ err = sp_3072_mod_exp_134(r, a, d, 3072, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+ XMEMSET(d, 0, sizeof(sp_digit) * 134);
+
+ return err;
+#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
+#else
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* t = NULL;
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* dq;
+ sp_digit* qi;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 384U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (inLen > 384) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 67 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 134 * 2;
+ q = p + 67;
+ qi = dq = dp = q + 67;
+ tmpa = qi + 67;
+ tmpb = tmpa + 134;
+
+ r = t + 134;
+
+ sp_3072_from_bin(a, 134, in, inLen);
+ sp_3072_from_mp(p, 67, pm);
+ sp_3072_from_mp(q, 67, qm);
+ sp_3072_from_mp(dp, 67, dpm);
+ err = sp_3072_mod_exp_67(tmpa, a, dp, 1536, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(dq, 67, dqm);
+ err = sp_3072_mod_exp_67(tmpb, a, dq, 1536, q, 1);
+ }
+ if (err == MP_OKAY) {
+ (void)sp_3072_sub_67(tmpa, tmpa, tmpb);
+ sp_3072_cond_add_67(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[66] >> 31));
+ sp_3072_cond_add_67(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[66] >> 31));
+
+ sp_3072_from_mp(qi, 67, qim);
+ sp_3072_mul_67(tmpa, tmpa, qi);
+ err = sp_3072_mod_67(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mul_67(tmpa, q, tmpa);
+ (void)sp_3072_add_134(r, tmpb, tmpa);
+ sp_3072_norm_134(r);
+
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 67 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+ sp_digit a[134 * 2];
+ sp_digit p[67], q[67], dp[67], dq[67], qi[67];
+ sp_digit tmpa[134], tmpb[134];
+ sp_digit* r = a;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 384U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (inLen > 384U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_bin(a, 134, in, inLen);
+ sp_3072_from_mp(p, 67, pm);
+ sp_3072_from_mp(q, 67, qm);
+ sp_3072_from_mp(dp, 67, dpm);
+ sp_3072_from_mp(dq, 67, dqm);
+ sp_3072_from_mp(qi, 67, qim);
+
+ err = sp_3072_mod_exp_67(tmpa, a, dp, 1536, p, 1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_3072_mod_exp_67(tmpb, a, dq, 1536, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ (void)sp_3072_sub_67(tmpa, tmpa, tmpb);
+ sp_3072_cond_add_67(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[66] >> 31));
+ sp_3072_cond_add_67(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[66] >> 31));
+ sp_3072_mul_67(tmpa, tmpa, qi);
+ err = sp_3072_mod_67(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mul_67(tmpa, tmpa, q);
+ (void)sp_3072_add_134(r, tmpb, tmpa);
+ sp_3072_norm_134(r);
+
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+ XMEMSET(tmpa, 0, sizeof(tmpa));
+ XMEMSET(tmpb, 0, sizeof(tmpb));
+ XMEMSET(p, 0, sizeof(p));
+ XMEMSET(q, 0, sizeof(q));
+ XMEMSET(dp, 0, sizeof(dp));
+ XMEMSET(dq, 0, sizeof(dq));
+ XMEMSET(qi, 0, sizeof(qi));
+
+ return err;
+#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+}
+
+#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_3072_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 23
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 134);
+ r->used = 134;
+ mp_clamp(r);
+#elif DIGIT_BIT < 23
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 134; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 23) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 23 - s;
+ }
+ r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 134; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 23 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 23 - s;
+ }
+ else {
+ s += 23;
+ }
+ }
+ r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int err = MP_OKAY;
+ sp_digit* d = NULL;
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 3072) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 134 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 134 * 2;
+ m = e + 134;
+ r = b;
+
+ sp_3072_from_mp(b, 134, base);
+ sp_3072_from_mp(e, 134, exp);
+ sp_3072_from_mp(m, 134, mod);
+
+ err = sp_3072_mod_exp_134(r, b, e, mp_count_bits(exp), m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_3072_to_mp(r, res);
+ }
+
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 134U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit bd[268], ed[134], md[134];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 3072) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 134 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 134 * 2;
+ m = e + 134;
+ r = b;
+ }
+#else
+ r = b = bd;
+ e = ed;
+ m = md;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 134, base);
+ sp_3072_from_mp(e, 134, exp);
+ sp_3072_from_mp(m, 134, mod);
+
+ err = sp_3072_mod_exp_134(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_3072_to_mp(r, res);
+ }
+
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 134U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 134U);
+#endif
+
+ return err;
+#endif
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_3072
+SP_NOINLINE static void sp_3072_lshift_134(sp_digit* r, sp_digit* a, byte n)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ r[134] = a[133] >> (23 - n);
+ for (i=133; i>0; i--) {
+ r[i] = ((a[i] << n) | (a[i-1] >> (23 - n))) & 0x7fffff;
+ }
+#else
+ sp_int_digit s, t;
+
+ s = (sp_int_digit)a[133];
+ r[134] = s >> (23U - n);
+ s = (sp_int_digit)(a[133]); t = (sp_int_digit)(a[132]);
+ r[133] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[132]); t = (sp_int_digit)(a[131]);
+ r[132] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[131]); t = (sp_int_digit)(a[130]);
+ r[131] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[130]); t = (sp_int_digit)(a[129]);
+ r[130] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[129]); t = (sp_int_digit)(a[128]);
+ r[129] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[128]); t = (sp_int_digit)(a[127]);
+ r[128] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[127]); t = (sp_int_digit)(a[126]);
+ r[127] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[126]); t = (sp_int_digit)(a[125]);
+ r[126] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[125]); t = (sp_int_digit)(a[124]);
+ r[125] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[124]); t = (sp_int_digit)(a[123]);
+ r[124] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[123]); t = (sp_int_digit)(a[122]);
+ r[123] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[122]); t = (sp_int_digit)(a[121]);
+ r[122] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[121]); t = (sp_int_digit)(a[120]);
+ r[121] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[120]); t = (sp_int_digit)(a[119]);
+ r[120] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[119]); t = (sp_int_digit)(a[118]);
+ r[119] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[118]); t = (sp_int_digit)(a[117]);
+ r[118] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[117]); t = (sp_int_digit)(a[116]);
+ r[117] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[116]); t = (sp_int_digit)(a[115]);
+ r[116] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[115]); t = (sp_int_digit)(a[114]);
+ r[115] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[114]); t = (sp_int_digit)(a[113]);
+ r[114] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[113]); t = (sp_int_digit)(a[112]);
+ r[113] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[112]); t = (sp_int_digit)(a[111]);
+ r[112] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[111]); t = (sp_int_digit)(a[110]);
+ r[111] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[110]); t = (sp_int_digit)(a[109]);
+ r[110] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[109]); t = (sp_int_digit)(a[108]);
+ r[109] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[108]); t = (sp_int_digit)(a[107]);
+ r[108] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[107]); t = (sp_int_digit)(a[106]);
+ r[107] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[106]); t = (sp_int_digit)(a[105]);
+ r[106] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[105]); t = (sp_int_digit)(a[104]);
+ r[105] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[104]); t = (sp_int_digit)(a[103]);
+ r[104] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[103]); t = (sp_int_digit)(a[102]);
+ r[103] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[102]); t = (sp_int_digit)(a[101]);
+ r[102] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[101]); t = (sp_int_digit)(a[100]);
+ r[101] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[100]); t = (sp_int_digit)(a[99]);
+ r[100] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[99]); t = (sp_int_digit)(a[98]);
+ r[99] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[98]); t = (sp_int_digit)(a[97]);
+ r[98] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[97]); t = (sp_int_digit)(a[96]);
+ r[97] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[96]); t = (sp_int_digit)(a[95]);
+ r[96] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[95]); t = (sp_int_digit)(a[94]);
+ r[95] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[94]); t = (sp_int_digit)(a[93]);
+ r[94] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[93]); t = (sp_int_digit)(a[92]);
+ r[93] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[92]); t = (sp_int_digit)(a[91]);
+ r[92] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[91]); t = (sp_int_digit)(a[90]);
+ r[91] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[90]); t = (sp_int_digit)(a[89]);
+ r[90] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[89]); t = (sp_int_digit)(a[88]);
+ r[89] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[88]); t = (sp_int_digit)(a[87]);
+ r[88] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[87]); t = (sp_int_digit)(a[86]);
+ r[87] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[86]); t = (sp_int_digit)(a[85]);
+ r[86] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[85]); t = (sp_int_digit)(a[84]);
+ r[85] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[84]); t = (sp_int_digit)(a[83]);
+ r[84] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[83]); t = (sp_int_digit)(a[82]);
+ r[83] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[82]); t = (sp_int_digit)(a[81]);
+ r[82] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[81]); t = (sp_int_digit)(a[80]);
+ r[81] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[80]); t = (sp_int_digit)(a[79]);
+ r[80] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[79]); t = (sp_int_digit)(a[78]);
+ r[79] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[78]); t = (sp_int_digit)(a[77]);
+ r[78] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[77]); t = (sp_int_digit)(a[76]);
+ r[77] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[76]); t = (sp_int_digit)(a[75]);
+ r[76] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[75]); t = (sp_int_digit)(a[74]);
+ r[75] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[74]); t = (sp_int_digit)(a[73]);
+ r[74] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[73]); t = (sp_int_digit)(a[72]);
+ r[73] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[72]); t = (sp_int_digit)(a[71]);
+ r[72] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[71]); t = (sp_int_digit)(a[70]);
+ r[71] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[70]); t = (sp_int_digit)(a[69]);
+ r[70] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[69]); t = (sp_int_digit)(a[68]);
+ r[69] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[68]); t = (sp_int_digit)(a[67]);
+ r[68] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[67]); t = (sp_int_digit)(a[66]);
+ r[67] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[66]); t = (sp_int_digit)(a[65]);
+ r[66] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[65]); t = (sp_int_digit)(a[64]);
+ r[65] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[64]); t = (sp_int_digit)(a[63]);
+ r[64] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[63]); t = (sp_int_digit)(a[62]);
+ r[63] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[62]); t = (sp_int_digit)(a[61]);
+ r[62] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[61]); t = (sp_int_digit)(a[60]);
+ r[61] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[60]); t = (sp_int_digit)(a[59]);
+ r[60] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[59]); t = (sp_int_digit)(a[58]);
+ r[59] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[58]); t = (sp_int_digit)(a[57]);
+ r[58] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[57]); t = (sp_int_digit)(a[56]);
+ r[57] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[56]); t = (sp_int_digit)(a[55]);
+ r[56] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[55]); t = (sp_int_digit)(a[54]);
+ r[55] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[54]); t = (sp_int_digit)(a[53]);
+ r[54] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[53]); t = (sp_int_digit)(a[52]);
+ r[53] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[52]); t = (sp_int_digit)(a[51]);
+ r[52] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[51]); t = (sp_int_digit)(a[50]);
+ r[51] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[50]); t = (sp_int_digit)(a[49]);
+ r[50] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[49]); t = (sp_int_digit)(a[48]);
+ r[49] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[48]); t = (sp_int_digit)(a[47]);
+ r[48] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[47]); t = (sp_int_digit)(a[46]);
+ r[47] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[46]); t = (sp_int_digit)(a[45]);
+ r[46] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[45]); t = (sp_int_digit)(a[44]);
+ r[45] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[44]); t = (sp_int_digit)(a[43]);
+ r[44] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[43]); t = (sp_int_digit)(a[42]);
+ r[43] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[42]); t = (sp_int_digit)(a[41]);
+ r[42] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[41]); t = (sp_int_digit)(a[40]);
+ r[41] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[40]); t = (sp_int_digit)(a[39]);
+ r[40] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[39]); t = (sp_int_digit)(a[38]);
+ r[39] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[38]); t = (sp_int_digit)(a[37]);
+ r[38] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[37]); t = (sp_int_digit)(a[36]);
+ r[37] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[36]); t = (sp_int_digit)(a[35]);
+ r[36] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]);
+ r[35] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]);
+ r[34] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]);
+ r[33] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]);
+ r[32] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]);
+ r[31] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]);
+ r[30] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]);
+ r[29] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]);
+ r[28] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]);
+ r[27] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]);
+ r[26] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]);
+ r[25] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]);
+ r[24] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]);
+ r[23] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]);
+ r[22] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]);
+ r[21] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]);
+ r[20] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]);
+ r[19] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]);
+ r[18] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]);
+ r[17] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]);
+ r[16] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]);
+ r[15] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]);
+ r[14] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]);
+ r[13] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]);
+ r[12] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]);
+ r[11] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]);
+ r[10] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]);
+ r[9] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]);
+ r[8] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]);
+ r[7] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]);
+ r[6] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]);
+ r[5] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]);
+ r[4] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]);
+ r[3] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]);
+ r[2] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+ s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]);
+ r[1] = ((s << n) | (t >> (23U - n))) & 0x7fffff;
+#endif
+ r[0] = (a[0] << n) & 0x7fffff;
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_2_134(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[268];
+ sp_digit td[135];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 403, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 268;
+ XMEMSET(td, 0, sizeof(sp_digit) * 403);
+#else
+ norm = nd;
+ tmp = td;
+ XMEMSET(td, 0, sizeof(td));
+#endif
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_134(norm, m);
+
+ bits = ((bits + 3) / 4) * 4;
+ i = ((bits + 22) / 23) - 1;
+ c = bits % 23;
+ if (c == 0) {
+ c = 23;
+ }
+ if (i < 134) {
+ n = e[i--] << (32 - c);
+ }
+ else {
+ n = 0;
+ i--;
+ }
+ if (c < 4) {
+ n |= e[i--] << (9 - c);
+ c += 23;
+ }
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+ sp_3072_lshift_134(r, norm, y);
+ for (; i>=0 || c>=4; ) {
+ if (c < 4) {
+ n |= e[i--] << (9 - c);
+ c += 23;
+ }
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+
+ sp_3072_mont_sqr_134(r, r, m, mp);
+ sp_3072_mont_sqr_134(r, r, m, mp);
+ sp_3072_mont_sqr_134(r, r, m, mp);
+ sp_3072_mont_sqr_134(r, r, m, mp);
+
+ sp_3072_lshift_134(r, r, y);
+ sp_3072_mul_d_134(tmp, norm, (r[134] << 10) + (r[133] >> 13));
+ r[134] = 0;
+ r[133] &= 0x1fffL;
+ (void)sp_3072_add_134(r, r, tmp);
+ sp_3072_norm_134(r);
+ o = sp_3072_cmp_134(r, m);
+ sp_3072_cond_sub_134(r, r, m, ((o < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ }
+
+ sp_3072_mont_reduce_134(r, m, mp);
+ n = sp_3072_cmp_134(r, m);
+ sp_3072_cond_sub_134(r, r, m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+#endif /* HAVE_FFDHE_3072 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int err = MP_OKAY;
+ sp_digit* d = NULL;
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ word32 i;
+
+ if (mp_count_bits(base) > 3072) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 384) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 134 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 134 * 2;
+ m = e + 134;
+ r = b;
+
+ sp_3072_from_mp(b, 134, base);
+ sp_3072_from_bin(e, 134, exp, expLen);
+ sp_3072_from_mp(m, 134, mod);
+
+ #ifdef HAVE_FFDHE_3072
+ if (base->used == 1 && base->dp[0] == 2 &&
+ ((m[133] << 3) | (m[132] >> 20)) == 0xffffL) {
+ err = sp_3072_mod_exp_2_134(r, e, expLen * 8, m);
+ }
+ else
+ #endif
+ err = sp_3072_mod_exp_134(r, b, e, expLen * 8, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ for (i=0; i<384 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+ }
+
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 134U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit bd[268], ed[134], md[134];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ word32 i;
+ int err = MP_OKAY;
+
+ if (mp_count_bits(base) > 3072) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 384U) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+#ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 134 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 134 * 2;
+ m = e + 134;
+ r = b;
+ }
+#else
+ r = b = bd;
+ e = ed;
+ m = md;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 134, base);
+ sp_3072_from_bin(e, 134, exp, expLen);
+ sp_3072_from_mp(m, 134, mod);
+
+ #ifdef HAVE_FFDHE_3072
+ if (base->used == 1 && base->dp[0] == 2U &&
+ ((m[133] << 3) | (m[132] >> 20)) == 0xffffL) {
+ err = sp_3072_mod_exp_2_134(r, e, expLen * 8U, m);
+ }
+ else {
+ #endif
+ err = sp_3072_mod_exp_134(r, b, e, expLen * 8U, m, 0);
+ #ifdef HAVE_FFDHE_3072
+ }
+ #endif
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ for (i=0; i<384U && out[i] == 0U; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 134U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 134U);
+#endif
+
+ return err;
+#endif
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int err = MP_OKAY;
+ sp_digit* d = NULL;
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 1536) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 1536) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 1536) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 67 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 67 * 2;
+ m = e + 67;
+ r = b;
+
+ sp_3072_from_mp(b, 67, base);
+ sp_3072_from_mp(e, 67, exp);
+ sp_3072_from_mp(m, 67, mod);
+
+ err = sp_3072_mod_exp_67(r, b, e, mp_count_bits(exp), m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(r + 67, 0, sizeof(*r) * 67U);
+ err = sp_3072_to_mp(r, res);
+ }
+
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 67U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit bd[134], ed[67], md[67];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 1536) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 1536) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 1536) {
+ err = MP_READ_E;
+ }
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 67 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 67 * 2;
+ m = e + 67;
+ r = b;
+ }
+#else
+ r = b = bd;
+ e = ed;
+ m = md;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 67, base);
+ sp_3072_from_mp(e, 67, exp);
+ sp_3072_from_mp(m, 67, mod);
+
+ err = sp_3072_mod_exp_67(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(r + 67, 0, sizeof(*r) * 67U);
+ err = sp_3072_to_mp(r, res);
+ }
+
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 67U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 67U);
+#endif
+
+ return err;
+#endif
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_3072 */
+
+#ifdef WOLFSSL_SP_4096
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 13U) {
+ r[j] &= 0x1fffff;
+ s = 21U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 21
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 21
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0x1fffff;
+ s = 21U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 21U) <= (word32)DIGIT_BIT) {
+ s += 21U;
+ r[j] &= 0x1fffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 21) {
+ r[j] &= 0x1fffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 21 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 512
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_4096_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ for (i=0; i<195; i++) {
+ r[i+1] += r[i] >> 21;
+ r[i] &= 0x1fffff;
+ }
+ j = 4096 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<196 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 21) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 21);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_49(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i, j;
+ int64_t t[98];
+
+ XMEMSET(t, 0, sizeof(t));
+ for (i=0; i<49; i++) {
+ for (j=0; j<49; j++) {
+ t[i+j] += ((int64_t)a[i]) * b[j];
+ }
+ }
+ for (i=0; i<97; i++) {
+ r[i] = t[i] & 0x1fffff;
+ t[i+1] += t[i] >> 21;
+ }
+ r[97] = (sp_digit)t[97];
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_49(sp_digit* r, const sp_digit* a)
+{
+ int i, j;
+ int64_t t[98];
+
+ XMEMSET(t, 0, sizeof(t));
+ for (i=0; i<49; i++) {
+ for (j=0; j<i; j++) {
+ t[i+j] += (((int64_t)a[i]) * a[j]) * 2;
+ }
+ t[i+i] += ((int64_t)a[i]) * a[i];
+ }
+ for (i=0; i<97; i++) {
+ r[i] = t[i] & 0x1fffff;
+ t[i+1] += t[i] >> 21;
+ }
+ r[97] = (sp_digit)t[97];
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_49(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 48; i += 8) {
+ r[i + 0] = a[i + 0] + b[i + 0];
+ r[i + 1] = a[i + 1] + b[i + 1];
+ r[i + 2] = a[i + 2] + b[i + 2];
+ r[i + 3] = a[i + 3] + b[i + 3];
+ r[i + 4] = a[i + 4] + b[i + 4];
+ r[i + 5] = a[i + 5] + b[i + 5];
+ r[i + 6] = a[i + 6] + b[i + 6];
+ r[i + 7] = a[i + 7] + b[i + 7];
+ }
+ r[48] = a[48] + b[48];
+
+ return 0;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_98(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 96; i += 8) {
+ r[i + 0] = a[i + 0] + b[i + 0];
+ r[i + 1] = a[i + 1] + b[i + 1];
+ r[i + 2] = a[i + 2] + b[i + 2];
+ r[i + 3] = a[i + 3] + b[i + 3];
+ r[i + 4] = a[i + 4] + b[i + 4];
+ r[i + 5] = a[i + 5] + b[i + 5];
+ r[i + 6] = a[i + 6] + b[i + 6];
+ r[i + 7] = a[i + 7] + b[i + 7];
+ }
+ r[96] = a[96] + b[96];
+ r[97] = a[97] + b[97];
+
+ return 0;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_98(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 96; i += 8) {
+ r[i + 0] = a[i + 0] - b[i + 0];
+ r[i + 1] = a[i + 1] - b[i + 1];
+ r[i + 2] = a[i + 2] - b[i + 2];
+ r[i + 3] = a[i + 3] - b[i + 3];
+ r[i + 4] = a[i + 4] - b[i + 4];
+ r[i + 5] = a[i + 5] - b[i + 5];
+ r[i + 6] = a[i + 6] - b[i + 6];
+ r[i + 7] = a[i + 7] - b[i + 7];
+ }
+ r[96] = a[96] - b[96];
+ r[97] = a[97] - b[97];
+
+ return 0;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_98(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[98];
+ sp_digit* a1 = z1;
+ sp_digit b1[49];
+ sp_digit* z2 = r + 98;
+ (void)sp_4096_add_49(a1, a, &a[49]);
+ (void)sp_4096_add_49(b1, b, &b[49]);
+ sp_4096_mul_49(z2, &a[49], &b[49]);
+ sp_4096_mul_49(z0, a, b);
+ sp_4096_mul_49(z1, a1, b1);
+ (void)sp_4096_sub_98(z1, z1, z2);
+ (void)sp_4096_sub_98(z1, z1, z0);
+ (void)sp_4096_add_98(r + 49, r + 49, z1);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_98(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[98];
+ sp_digit* a1 = z1;
+ sp_digit* z2 = r + 98;
+ (void)sp_4096_add_49(a1, a, &a[49]);
+ sp_4096_sqr_49(z2, &a[49]);
+ sp_4096_sqr_49(z0, a);
+ sp_4096_sqr_49(z1, a1);
+ (void)sp_4096_sub_98(z1, z1, z2);
+ (void)sp_4096_sub_98(z1, z1, z0);
+ (void)sp_4096_add_98(r + 49, r + 49, z1);
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_196(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 192; i += 8) {
+ r[i + 0] = a[i + 0] + b[i + 0];
+ r[i + 1] = a[i + 1] + b[i + 1];
+ r[i + 2] = a[i + 2] + b[i + 2];
+ r[i + 3] = a[i + 3] + b[i + 3];
+ r[i + 4] = a[i + 4] + b[i + 4];
+ r[i + 5] = a[i + 5] + b[i + 5];
+ r[i + 6] = a[i + 6] + b[i + 6];
+ r[i + 7] = a[i + 7] + b[i + 7];
+ }
+ r[192] = a[192] + b[192];
+ r[193] = a[193] + b[193];
+ r[194] = a[194] + b[194];
+ r[195] = a[195] + b[195];
+
+ return 0;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_196(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 192; i += 8) {
+ r[i + 0] = a[i + 0] - b[i + 0];
+ r[i + 1] = a[i + 1] - b[i + 1];
+ r[i + 2] = a[i + 2] - b[i + 2];
+ r[i + 3] = a[i + 3] - b[i + 3];
+ r[i + 4] = a[i + 4] - b[i + 4];
+ r[i + 5] = a[i + 5] - b[i + 5];
+ r[i + 6] = a[i + 6] - b[i + 6];
+ r[i + 7] = a[i + 7] - b[i + 7];
+ }
+ r[192] = a[192] - b[192];
+ r[193] = a[193] - b[193];
+ r[194] = a[194] - b[194];
+ r[195] = a[195] - b[195];
+
+ return 0;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_196(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[196];
+ sp_digit* a1 = z1;
+ sp_digit b1[98];
+ sp_digit* z2 = r + 196;
+ (void)sp_4096_add_98(a1, a, &a[98]);
+ (void)sp_4096_add_98(b1, b, &b[98]);
+ sp_4096_mul_98(z2, &a[98], &b[98]);
+ sp_4096_mul_98(z0, a, b);
+ sp_4096_mul_98(z1, a1, b1);
+ (void)sp_4096_sub_196(z1, z1, z2);
+ (void)sp_4096_sub_196(z1, z1, z0);
+ (void)sp_4096_add_196(r + 98, r + 98, z1);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_196(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[196];
+ sp_digit* a1 = z1;
+ sp_digit* z2 = r + 196;
+ (void)sp_4096_add_98(a1, a, &a[98]);
+ sp_4096_sqr_98(z2, &a[98]);
+ sp_4096_sqr_98(z0, a);
+ sp_4096_sqr_98(z1, a1);
+ (void)sp_4096_sub_196(z1, z1, z2);
+ (void)sp_4096_sub_196(z1, z1, z0);
+ (void)sp_4096_add_196(r + 98, r + 98, z1);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_196(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 196; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_196(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 196; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_196(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i, j, k;
+ int64_t c;
+
+ c = ((int64_t)a[195]) * b[195];
+ r[391] = (sp_digit)(c >> 21);
+ c = (c & 0x1fffff) << 21;
+ for (k = 389; k >= 0; k--) {
+ for (i = 195; i >= 0; i--) {
+ j = k - i;
+ if (j >= 196) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int64_t)a[i]) * b[j];
+ }
+ r[k + 2] += c >> 42;
+ r[k + 1] = (c >> 21) & 0x1fffff;
+ c = (c & 0x1fffff) << 21;
+ }
+ r[0] = (sp_digit)(c >> 21);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_196(sp_digit* r, const sp_digit* a)
+{
+ int i, j, k;
+ int64_t c;
+
+ c = ((int64_t)a[195]) * a[195];
+ r[391] = (sp_digit)(c >> 21);
+ c = (c & 0x1fffff) << 21;
+ for (k = 389; k >= 0; k--) {
+ for (i = 195; i >= 0; i--) {
+ j = k - i;
+ if (j >= 196 || i <= j) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int64_t)a[i]) * a[j] * 2;
+ }
+ if (i == j) {
+ c += ((int64_t)a[i]) * a[i];
+ }
+
+ r[k + 2] += c >> 42;
+ r[k + 1] = (c >> 21) & 0x1fffff;
+ c = (c & 0x1fffff) << 21;
+ }
+ r[0] = (sp_digit)(c >> 21);
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D)
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_98(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 98; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_98(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 98; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_98(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i, j, k;
+ int64_t c;
+
+ c = ((int64_t)a[97]) * b[97];
+ r[195] = (sp_digit)(c >> 21);
+ c = (c & 0x1fffff) << 21;
+ for (k = 193; k >= 0; k--) {
+ for (i = 97; i >= 0; i--) {
+ j = k - i;
+ if (j >= 98) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int64_t)a[i]) * b[j];
+ }
+ r[k + 2] += c >> 42;
+ r[k + 1] = (c >> 21) & 0x1fffff;
+ c = (c & 0x1fffff) << 21;
+ }
+ r[0] = (sp_digit)(c >> 21);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_98(sp_digit* r, const sp_digit* a)
+{
+ int i, j, k;
+ int64_t c;
+
+ c = ((int64_t)a[97]) * a[97];
+ r[195] = (sp_digit)(c >> 21);
+ c = (c & 0x1fffff) << 21;
+ for (k = 193; k >= 0; k--) {
+ for (i = 97; i >= 0; i--) {
+ j = k - i;
+ if (j >= 98 || i <= j) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int64_t)a[i]) * a[j] * 2;
+ }
+ if (i == j) {
+ c += ((int64_t)a[i]) * a[i];
+ }
+
+ r[k + 2] += c >> 42;
+ r[k + 1] = (c >> 21) & 0x1fffff;
+ c = (c & 0x1fffff) << 21;
+ }
+ r[0] = (sp_digit)(c >> 21);
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* WOLFSSL_HAVE_SP_RSA && !SP_RSA_PRIVATE_EXP_D */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+ x &= 0x1fffff;
+
+ /* rho = -1/m mod b */
+ *rho = (1L << 21) - x;
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_4096_mul_d_196(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 196; i++) {
+ t += tb * a[i];
+ r[i] = t & 0x1fffff;
+ t >>= 21;
+ }
+ r[196] = (sp_digit)t;
+#else
+ int64_t tb = b;
+ int64_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] = t[0] & 0x1fffff;
+ for (i = 0; i < 192; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] = (sp_digit)(t[0] >> 21) + (t[1] & 0x1fffff);
+ t[2] = tb * a[i+2];
+ r[i+2] = (sp_digit)(t[1] >> 21) + (t[2] & 0x1fffff);
+ t[3] = tb * a[i+3];
+ r[i+3] = (sp_digit)(t[2] >> 21) + (t[3] & 0x1fffff);
+ t[4] = tb * a[i+4];
+ r[i+4] = (sp_digit)(t[3] >> 21) + (t[4] & 0x1fffff);
+ t[5] = tb * a[i+5];
+ r[i+5] = (sp_digit)(t[4] >> 21) + (t[5] & 0x1fffff);
+ t[6] = tb * a[i+6];
+ r[i+6] = (sp_digit)(t[5] >> 21) + (t[6] & 0x1fffff);
+ t[7] = tb * a[i+7];
+ r[i+7] = (sp_digit)(t[6] >> 21) + (t[7] & 0x1fffff);
+ t[0] = tb * a[i+8];
+ r[i+8] = (sp_digit)(t[7] >> 21) + (t[0] & 0x1fffff);
+ }
+ t[1] = tb * a[193];
+ r[193] = (sp_digit)(t[0] >> 21) + (t[1] & 0x1fffff);
+ t[2] = tb * a[194];
+ r[194] = (sp_digit)(t[1] >> 21) + (t[2] & 0x1fffff);
+ t[3] = tb * a[195];
+ r[195] = (sp_digit)(t[2] >> 21) + (t[3] & 0x1fffff);
+ r[196] = (sp_digit)(t[3] >> 21);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 4096 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_4096_mont_norm_98(sp_digit* r, const sp_digit* m)
+{
+ /* Set r = 2^n - 1. */
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<97; i++) {
+ r[i] = 0x1fffff;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 96; i += 8) {
+ r[i + 0] = 0x1fffff;
+ r[i + 1] = 0x1fffff;
+ r[i + 2] = 0x1fffff;
+ r[i + 3] = 0x1fffff;
+ r[i + 4] = 0x1fffff;
+ r[i + 5] = 0x1fffff;
+ r[i + 6] = 0x1fffff;
+ r[i + 7] = 0x1fffff;
+ }
+ r[96] = 0x1fffff;
+#endif
+ r[97] = 0x7ffL;
+
+ /* r = (2^n - 1) mod n */
+ (void)sp_4096_sub_98(r, r, m);
+
+ /* Add one so r = 2^n mod m */
+ r[0] += 1;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_4096_cmp_98(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=97; i>=0; i--) {
+ r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#else
+ int i;
+
+ r |= (a[97] - b[97]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[96] - b[96]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ for (i = 88; i >= 0; i -= 8) {
+ r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#endif /* WOLFSSL_SP_SMALL */
+
+ return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static void sp_4096_cond_sub_98(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 98; i++) {
+ r[i] = a[i] - (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 96; i += 8) {
+ r[i + 0] = a[i + 0] - (b[i + 0] & m);
+ r[i + 1] = a[i + 1] - (b[i + 1] & m);
+ r[i + 2] = a[i + 2] - (b[i + 2] & m);
+ r[i + 3] = a[i + 3] - (b[i + 3] & m);
+ r[i + 4] = a[i + 4] - (b[i + 4] & m);
+ r[i + 5] = a[i + 5] - (b[i + 5] & m);
+ r[i + 6] = a[i + 6] - (b[i + 6] & m);
+ r[i + 7] = a[i + 7] - (b[i + 7] & m);
+ }
+ r[96] = a[96] - (b[96] & m);
+ r[97] = a[97] - (b[97] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_4096_mul_add_98(sp_digit* r, const sp_digit* a,
+ const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 98; i++) {
+ t += (tb * a[i]) + r[i];
+ r[i] = t & 0x1fffff;
+ t >>= 21;
+ }
+ r[98] += t;
+#else
+ int64_t tb = b;
+ int64_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffff);
+ for (i = 0; i < 96; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff));
+ t[2] = tb * a[i+2];
+ r[i+2] += (sp_digit)((t[1] >> 21) + (t[2] & 0x1fffff));
+ t[3] = tb * a[i+3];
+ r[i+3] += (sp_digit)((t[2] >> 21) + (t[3] & 0x1fffff));
+ t[4] = tb * a[i+4];
+ r[i+4] += (sp_digit)((t[3] >> 21) + (t[4] & 0x1fffff));
+ t[5] = tb * a[i+5];
+ r[i+5] += (sp_digit)((t[4] >> 21) + (t[5] & 0x1fffff));
+ t[6] = tb * a[i+6];
+ r[i+6] += (sp_digit)((t[5] >> 21) + (t[6] & 0x1fffff));
+ t[7] = tb * a[i+7];
+ r[i+7] += (sp_digit)((t[6] >> 21) + (t[7] & 0x1fffff));
+ t[0] = tb * a[i+8];
+ r[i+8] += (sp_digit)((t[7] >> 21) + (t[0] & 0x1fffff));
+ }
+ t[1] = tb * a[97]; r[97] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff));
+ r[98] += (sp_digit)(t[1] >> 21);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 21.
+ *
+ * a Array of sp_digit to normalize.
+ */
+static void sp_4096_norm_98(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ for (i = 0; i < 97; i++) {
+ a[i+1] += a[i] >> 21;
+ a[i] &= 0x1fffff;
+ }
+#else
+ int i;
+ for (i = 0; i < 96; i += 8) {
+ a[i+1] += a[i+0] >> 21; a[i+0] &= 0x1fffff;
+ a[i+2] += a[i+1] >> 21; a[i+1] &= 0x1fffff;
+ a[i+3] += a[i+2] >> 21; a[i+2] &= 0x1fffff;
+ a[i+4] += a[i+3] >> 21; a[i+3] &= 0x1fffff;
+ a[i+5] += a[i+4] >> 21; a[i+4] &= 0x1fffff;
+ a[i+6] += a[i+5] >> 21; a[i+5] &= 0x1fffff;
+ a[i+7] += a[i+6] >> 21; a[i+6] &= 0x1fffff;
+ a[i+8] += a[i+7] >> 21; a[i+7] &= 0x1fffff;
+ a[i+9] += a[i+8] >> 21; a[i+8] &= 0x1fffff;
+ }
+ a[96+1] += a[96] >> 21;
+ a[96] &= 0x1fffff;
+#endif
+}
+
+/* Shift the result in the high 2048 bits down to the bottom.
+ *
+ * r A single precision number.
+ * a A single precision number.
+ */
+static void sp_4096_mont_shift_98(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ int64_t n = a[97] >> 11;
+ n += ((int64_t)a[98]) << 10;
+
+ for (i = 0; i < 97; i++) {
+ r[i] = n & 0x1fffff;
+ n >>= 21;
+ n += ((int64_t)a[99 + i]) << 10;
+ }
+ r[97] = (sp_digit)n;
+#else
+ int i;
+ int64_t n = a[97] >> 11;
+ n += ((int64_t)a[98]) << 10;
+ for (i = 0; i < 96; i += 8) {
+ r[i + 0] = n & 0x1fffff;
+ n >>= 21; n += ((int64_t)a[i + 99]) << 10;
+ r[i + 1] = n & 0x1fffff;
+ n >>= 21; n += ((int64_t)a[i + 100]) << 10;
+ r[i + 2] = n & 0x1fffff;
+ n >>= 21; n += ((int64_t)a[i + 101]) << 10;
+ r[i + 3] = n & 0x1fffff;
+ n >>= 21; n += ((int64_t)a[i + 102]) << 10;
+ r[i + 4] = n & 0x1fffff;
+ n >>= 21; n += ((int64_t)a[i + 103]) << 10;
+ r[i + 5] = n & 0x1fffff;
+ n >>= 21; n += ((int64_t)a[i + 104]) << 10;
+ r[i + 6] = n & 0x1fffff;
+ n >>= 21; n += ((int64_t)a[i + 105]) << 10;
+ r[i + 7] = n & 0x1fffff;
+ n >>= 21; n += ((int64_t)a[i + 106]) << 10;
+ }
+ r[96] = n & 0x1fffff; n >>= 21; n += ((int64_t)a[195]) << 10;
+ r[97] = (sp_digit)n;
+#endif /* WOLFSSL_SP_SMALL */
+ XMEMSET(&r[98], 0, sizeof(*r) * 98U);
+}
+
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_4096_mont_reduce_98(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+ int i;
+ sp_digit mu;
+
+ sp_4096_norm_98(a + 98);
+
+ for (i=0; i<97; i++) {
+ mu = (a[i] * mp) & 0x1fffff;
+ sp_4096_mul_add_98(a+i, m, mu);
+ a[i+1] += a[i] >> 21;
+ }
+ mu = (a[i] * mp) & 0x7ffL;
+ sp_4096_mul_add_98(a+i, m, mu);
+ a[i+1] += a[i] >> 21;
+ a[i] &= 0x1fffff;
+
+ sp_4096_mont_shift_98(a, a);
+ sp_4096_cond_sub_98(a, a, m, 0 - (((a[97] >> 11) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_4096_norm_98(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_4096_mont_mul_98(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_4096_mul_98(r, a, b);
+ sp_4096_mont_reduce_98(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_4096_mont_sqr_98(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_4096_sqr_98(r, a);
+ sp_4096_mont_reduce_98(r, m, mp);
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_4096_mul_d_98(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 98; i++) {
+ t += tb * a[i];
+ r[i] = t & 0x1fffff;
+ t >>= 21;
+ }
+ r[98] = (sp_digit)t;
+#else
+ int64_t tb = b;
+ int64_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] = t[0] & 0x1fffff;
+ for (i = 0; i < 96; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] = (sp_digit)(t[0] >> 21) + (t[1] & 0x1fffff);
+ t[2] = tb * a[i+2];
+ r[i+2] = (sp_digit)(t[1] >> 21) + (t[2] & 0x1fffff);
+ t[3] = tb * a[i+3];
+ r[i+3] = (sp_digit)(t[2] >> 21) + (t[3] & 0x1fffff);
+ t[4] = tb * a[i+4];
+ r[i+4] = (sp_digit)(t[3] >> 21) + (t[4] & 0x1fffff);
+ t[5] = tb * a[i+5];
+ r[i+5] = (sp_digit)(t[4] >> 21) + (t[5] & 0x1fffff);
+ t[6] = tb * a[i+6];
+ r[i+6] = (sp_digit)(t[5] >> 21) + (t[6] & 0x1fffff);
+ t[7] = tb * a[i+7];
+ r[i+7] = (sp_digit)(t[6] >> 21) + (t[7] & 0x1fffff);
+ t[0] = tb * a[i+8];
+ r[i+8] = (sp_digit)(t[7] >> 21) + (t[0] & 0x1fffff);
+ }
+ t[1] = tb * a[97];
+ r[97] = (sp_digit)(t[0] >> 21) + (t[1] & 0x1fffff);
+ r[98] = (sp_digit)(t[1] >> 21);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static void sp_4096_cond_add_98(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 98; i++) {
+ r[i] = a[i] + (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 96; i += 8) {
+ r[i + 0] = a[i + 0] + (b[i + 0] & m);
+ r[i + 1] = a[i + 1] + (b[i + 1] & m);
+ r[i + 2] = a[i + 2] + (b[i + 2] & m);
+ r[i + 3] = a[i + 3] + (b[i + 3] & m);
+ r[i + 4] = a[i + 4] + (b[i + 4] & m);
+ r[i + 5] = a[i + 5] + (b[i + 5] & m);
+ r[i + 6] = a[i + 6] + (b[i + 6] & m);
+ r[i + 7] = a[i + 7] + (b[i + 7] & m);
+ }
+ r[96] = a[96] + (b[96] & m);
+ r[97] = a[97] + (b[97] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_98(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 98; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#endif
+#ifdef WOLFSSL_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_98(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 98; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#endif
+SP_NOINLINE static void sp_4096_rshift_98(sp_digit* r, sp_digit* a, byte n)
+{
+ int i;
+
+#ifdef WOLFSSL_SP_SMALL
+ for (i=0; i<97; i++) {
+ r[i] = ((a[i] >> n) | (a[i + 1] << (21 - n))) & 0x1fffff;
+ }
+#else
+ for (i=0; i<96; i += 8) {
+ r[i+0] = ((a[i+0] >> n) | (a[i+1] << (21 - n))) & 0x1fffff;
+ r[i+1] = ((a[i+1] >> n) | (a[i+2] << (21 - n))) & 0x1fffff;
+ r[i+2] = ((a[i+2] >> n) | (a[i+3] << (21 - n))) & 0x1fffff;
+ r[i+3] = ((a[i+3] >> n) | (a[i+4] << (21 - n))) & 0x1fffff;
+ r[i+4] = ((a[i+4] >> n) | (a[i+5] << (21 - n))) & 0x1fffff;
+ r[i+5] = ((a[i+5] >> n) | (a[i+6] << (21 - n))) & 0x1fffff;
+ r[i+6] = ((a[i+6] >> n) | (a[i+7] << (21 - n))) & 0x1fffff;
+ r[i+7] = ((a[i+7] >> n) | (a[i+8] << (21 - n))) & 0x1fffff;
+ }
+ r[96] = ((a[96] >> n) | (a[97] << (21 - n))) & 0x1fffff;
+#endif
+ r[97] = a[97] >> n;
+}
+
+#ifdef WOLFSSL_SP_DIV_32
+static WC_INLINE sp_digit sp_4096_div_word_98(sp_digit d1, sp_digit d0,
+ sp_digit dv)
+{
+ sp_digit d, r, t;
+
+ /* All 21 bits from d1 and top 10 bits from d0. */
+ d = (d1 << 10) | (d0 >> 11);
+ r = d / dv;
+ d -= r * dv;
+ /* Up to 11 bits in r */
+ /* Next 10 bits from d0. */
+ r <<= 10;
+ d <<= 10;
+ d |= (d0 >> 1) & ((1 << 10) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 21 bits in r */
+ /* Remaining 1 bits from d0. */
+ r <<= 1;
+ d <<= 1;
+ d |= d0 & ((1 << 1) - 1);
+ t = d / dv;
+ r += t;
+
+ return r;
+}
+#endif /* WOLFSSL_SP_DIV_32 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_4096_div_98(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ int i;
+#ifndef WOLFSSL_SP_DIV_32
+ int64_t d1;
+#endif
+ sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* td;
+#else
+ sp_digit t1d[196 + 1], t2d[98 + 1], sdd[98 + 1];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ sp_digit* sd;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 98 + 3), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ (void)m;
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = td;
+ t2 = td + 196 + 1;
+ sd = t2 + 98 + 1;
+#else
+ t1 = t1d;
+ t2 = t2d;
+ sd = sdd;
+#endif
+
+ sp_4096_mul_d_98(sd, d, 1L << 10);
+ sp_4096_mul_d_196(t1, a, 1L << 10);
+ dv = sd[97];
+ for (i=98; i>=0; i--) {
+ t1[98 + i] += t1[98 + i - 1] >> 21;
+ t1[98 + i - 1] &= 0x1fffff;
+#ifndef WOLFSSL_SP_DIV_32
+ d1 = t1[98 + i];
+ d1 <<= 21;
+ d1 += t1[98 + i - 1];
+ r1 = (sp_digit)(d1 / dv);
+#else
+ r1 = sp_4096_div_word_98(t1[98 + i], t1[98 + i - 1], dv);
+#endif
+
+ sp_4096_mul_d_98(t2, sd, r1);
+ (void)sp_4096_sub_98(&t1[i], &t1[i], t2);
+ t1[98 + i] -= t2[98];
+ t1[98 + i] += t1[98 + i - 1] >> 21;
+ t1[98 + i - 1] &= 0x1fffff;
+ r1 = (((-t1[98 + i]) << 21) - t1[98 + i - 1]) / dv;
+ r1 -= t1[98 + i];
+ sp_4096_mul_d_98(t2, sd, r1);
+ (void)sp_4096_add_98(&t1[i], &t1[i], t2);
+ t1[98 + i] += t1[98 + i - 1] >> 21;
+ t1[98 + i - 1] &= 0x1fffff;
+ }
+ t1[98 - 1] += t1[98 - 2] >> 21;
+ t1[98 - 2] &= 0x1fffff;
+ r1 = t1[98 - 1] / dv;
+
+ sp_4096_mul_d_98(t2, sd, r1);
+ sp_4096_sub_98(t1, t1, t2);
+ XMEMCPY(r, t1, sizeof(*r) * 2U * 98U);
+ for (i=0; i<96; i++) {
+ r[i+1] += r[i] >> 21;
+ r[i] &= 0x1fffff;
+ }
+ sp_4096_cond_add_98(r, r, sd, 0 - ((r[97] < 0) ?
+ (sp_digit)1 : (sp_digit)0));
+
+ sp_4096_norm_98(r);
+ sp_4096_rshift_98(r, r, 10);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_4096_mod_98(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_4096_div_98(a, m, NULL, r);
+}
+
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_98(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+ const sp_digit* m, int reduceA)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* td;
+ sp_digit* t[3];
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 98 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(td, 0, sizeof(*td) * 3U * 98U * 2U);
+
+ norm = t[0] = td;
+ t[1] = &td[98 * 2];
+ t[2] = &td[2 * 98 * 2];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_98(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_4096_mod_98(t[1], a, m);
+ }
+ else {
+ XMEMCPY(t[1], a, sizeof(sp_digit) * 98U);
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_4096_mul_98(t[1], t[1], norm);
+ err = sp_4096_mod_98(t[1], t[1], m);
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 21;
+ c = bits % 21;
+ n = e[i--] << (21 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 21;
+ }
+
+ y = (n >> 20) & 1;
+ n <<= 1;
+
+ sp_4096_mont_mul_98(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])),
+ sizeof(*t[2]) * 98 * 2);
+ sp_4096_mont_sqr_98(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2],
+ sizeof(*t[2]) * 98 * 2);
+ }
+
+ sp_4096_mont_reduce_98(t[0], m, mp);
+ n = sp_4096_cmp_98(t[0], m);
+ sp_4096_cond_sub_98(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(*r) * 98 * 2);
+
+ }
+
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+
+ return err;
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[3][196];
+#else
+ sp_digit* td;
+ sp_digit* t[3];
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 98 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ t[0] = td;
+ t[1] = &td[98 * 2];
+ t[2] = &td[2 * 98 * 2];
+#endif
+ norm = t[0];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_98(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_4096_mod_98(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_4096_mul_98(t[1], t[1], norm);
+ err = sp_4096_mod_98(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_4096_mul_98(t[1], a, norm);
+ err = sp_4096_mod_98(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 21;
+ c = bits % 21;
+ n = e[i--] << (21 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 21;
+ }
+
+ y = (n >> 20) & 1;
+ n <<= 1;
+
+ sp_4096_mont_mul_98(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
+ sp_4096_mont_sqr_98(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
+ }
+
+ sp_4096_mont_reduce_98(t[0], m, mp);
+ n = sp_4096_cmp_98(t[0], m);
+ sp_4096_cond_sub_98(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(t[0]));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][196];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit rt[196];
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 196, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 196;
+#endif
+ norm = t[0];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_98(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_4096_mod_98(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_4096_mul_98(t[1], t[1], norm);
+ err = sp_4096_mod_98(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_4096_mul_98(t[1], a, norm);
+ err = sp_4096_mod_98(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_mont_sqr_98(t[ 2], t[ 1], m, mp);
+ sp_4096_mont_mul_98(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_4096_mont_sqr_98(t[ 4], t[ 2], m, mp);
+ sp_4096_mont_mul_98(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_4096_mont_sqr_98(t[ 6], t[ 3], m, mp);
+ sp_4096_mont_mul_98(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_4096_mont_sqr_98(t[ 8], t[ 4], m, mp);
+ sp_4096_mont_mul_98(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_4096_mont_sqr_98(t[10], t[ 5], m, mp);
+ sp_4096_mont_mul_98(t[11], t[ 6], t[ 5], m, mp);
+ sp_4096_mont_sqr_98(t[12], t[ 6], m, mp);
+ sp_4096_mont_mul_98(t[13], t[ 7], t[ 6], m, mp);
+ sp_4096_mont_sqr_98(t[14], t[ 7], m, mp);
+ sp_4096_mont_mul_98(t[15], t[ 8], t[ 7], m, mp);
+ sp_4096_mont_sqr_98(t[16], t[ 8], m, mp);
+ sp_4096_mont_mul_98(t[17], t[ 9], t[ 8], m, mp);
+ sp_4096_mont_sqr_98(t[18], t[ 9], m, mp);
+ sp_4096_mont_mul_98(t[19], t[10], t[ 9], m, mp);
+ sp_4096_mont_sqr_98(t[20], t[10], m, mp);
+ sp_4096_mont_mul_98(t[21], t[11], t[10], m, mp);
+ sp_4096_mont_sqr_98(t[22], t[11], m, mp);
+ sp_4096_mont_mul_98(t[23], t[12], t[11], m, mp);
+ sp_4096_mont_sqr_98(t[24], t[12], m, mp);
+ sp_4096_mont_mul_98(t[25], t[13], t[12], m, mp);
+ sp_4096_mont_sqr_98(t[26], t[13], m, mp);
+ sp_4096_mont_mul_98(t[27], t[14], t[13], m, mp);
+ sp_4096_mont_sqr_98(t[28], t[14], m, mp);
+ sp_4096_mont_mul_98(t[29], t[15], t[14], m, mp);
+ sp_4096_mont_sqr_98(t[30], t[15], m, mp);
+ sp_4096_mont_mul_98(t[31], t[16], t[15], m, mp);
+
+ bits = ((bits + 4) / 5) * 5;
+ i = ((bits + 20) / 21) - 1;
+ c = bits % 21;
+ if (c == 0) {
+ c = 21;
+ }
+ if (i < 98) {
+ n = e[i--] << (32 - c);
+ }
+ else {
+ n = 0;
+ i--;
+ }
+ if (c < 5) {
+ n |= e[i--] << (11 - c);
+ c += 21;
+ }
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ XMEMCPY(rt, t[y], sizeof(rt));
+ for (; i>=0 || c>=5; ) {
+ if (c < 5) {
+ n |= e[i--] << (11 - c);
+ c += 21;
+ }
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+
+ sp_4096_mont_sqr_98(rt, rt, m, mp);
+ sp_4096_mont_sqr_98(rt, rt, m, mp);
+ sp_4096_mont_sqr_98(rt, rt, m, mp);
+ sp_4096_mont_sqr_98(rt, rt, m, mp);
+ sp_4096_mont_sqr_98(rt, rt, m, mp);
+
+ sp_4096_mont_mul_98(rt, rt, t[y], m, mp);
+ }
+
+ sp_4096_mont_reduce_98(rt, m, mp);
+ n = sp_4096_cmp_98(rt, m);
+ sp_4096_cond_sub_98(rt, rt, m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, rt, sizeof(rt));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#endif
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA && !SP_RSA_PRIVATE_EXP_D */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 4096 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_4096_mont_norm_196(sp_digit* r, const sp_digit* m)
+{
+ /* Set r = 2^n - 1. */
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<195; i++) {
+ r[i] = 0x1fffff;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 192; i += 8) {
+ r[i + 0] = 0x1fffff;
+ r[i + 1] = 0x1fffff;
+ r[i + 2] = 0x1fffff;
+ r[i + 3] = 0x1fffff;
+ r[i + 4] = 0x1fffff;
+ r[i + 5] = 0x1fffff;
+ r[i + 6] = 0x1fffff;
+ r[i + 7] = 0x1fffff;
+ }
+ r[192] = 0x1fffff;
+ r[193] = 0x1fffff;
+ r[194] = 0x1fffff;
+#endif
+ r[195] = 0x1L;
+
+ /* r = (2^n - 1) mod n */
+ (void)sp_4096_sub_196(r, r, m);
+
+ /* Add one so r = 2^n mod m */
+ r[0] += 1;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_4096_cmp_196(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=195; i>=0; i--) {
+ r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#else
+ int i;
+
+ r |= (a[195] - b[195]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[194] - b[194]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[193] - b[193]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[192] - b[192]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ for (i = 184; i >= 0; i -= 8) {
+ r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#endif /* WOLFSSL_SP_SMALL */
+
+ return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static void sp_4096_cond_sub_196(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 196; i++) {
+ r[i] = a[i] - (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 192; i += 8) {
+ r[i + 0] = a[i + 0] - (b[i + 0] & m);
+ r[i + 1] = a[i + 1] - (b[i + 1] & m);
+ r[i + 2] = a[i + 2] - (b[i + 2] & m);
+ r[i + 3] = a[i + 3] - (b[i + 3] & m);
+ r[i + 4] = a[i + 4] - (b[i + 4] & m);
+ r[i + 5] = a[i + 5] - (b[i + 5] & m);
+ r[i + 6] = a[i + 6] - (b[i + 6] & m);
+ r[i + 7] = a[i + 7] - (b[i + 7] & m);
+ }
+ r[192] = a[192] - (b[192] & m);
+ r[193] = a[193] - (b[193] & m);
+ r[194] = a[194] - (b[194] & m);
+ r[195] = a[195] - (b[195] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_4096_mul_add_196(sp_digit* r, const sp_digit* a,
+ const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 196; i++) {
+ t += (tb * a[i]) + r[i];
+ r[i] = t & 0x1fffff;
+ t >>= 21;
+ }
+ r[196] += t;
+#else
+ int64_t tb = b;
+ int64_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffff);
+ for (i = 0; i < 192; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff));
+ t[2] = tb * a[i+2];
+ r[i+2] += (sp_digit)((t[1] >> 21) + (t[2] & 0x1fffff));
+ t[3] = tb * a[i+3];
+ r[i+3] += (sp_digit)((t[2] >> 21) + (t[3] & 0x1fffff));
+ t[4] = tb * a[i+4];
+ r[i+4] += (sp_digit)((t[3] >> 21) + (t[4] & 0x1fffff));
+ t[5] = tb * a[i+5];
+ r[i+5] += (sp_digit)((t[4] >> 21) + (t[5] & 0x1fffff));
+ t[6] = tb * a[i+6];
+ r[i+6] += (sp_digit)((t[5] >> 21) + (t[6] & 0x1fffff));
+ t[7] = tb * a[i+7];
+ r[i+7] += (sp_digit)((t[6] >> 21) + (t[7] & 0x1fffff));
+ t[0] = tb * a[i+8];
+ r[i+8] += (sp_digit)((t[7] >> 21) + (t[0] & 0x1fffff));
+ }
+ t[1] = tb * a[193]; r[193] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff));
+ t[2] = tb * a[194]; r[194] += (sp_digit)((t[1] >> 21) + (t[2] & 0x1fffff));
+ t[3] = tb * a[195]; r[195] += (sp_digit)((t[2] >> 21) + (t[3] & 0x1fffff));
+ r[196] += (sp_digit)(t[3] >> 21);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 21.
+ *
+ * a Array of sp_digit to normalize.
+ */
+static void sp_4096_norm_196(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ for (i = 0; i < 195; i++) {
+ a[i+1] += a[i] >> 21;
+ a[i] &= 0x1fffff;
+ }
+#else
+ int i;
+ for (i = 0; i < 192; i += 8) {
+ a[i+1] += a[i+0] >> 21; a[i+0] &= 0x1fffff;
+ a[i+2] += a[i+1] >> 21; a[i+1] &= 0x1fffff;
+ a[i+3] += a[i+2] >> 21; a[i+2] &= 0x1fffff;
+ a[i+4] += a[i+3] >> 21; a[i+3] &= 0x1fffff;
+ a[i+5] += a[i+4] >> 21; a[i+4] &= 0x1fffff;
+ a[i+6] += a[i+5] >> 21; a[i+5] &= 0x1fffff;
+ a[i+7] += a[i+6] >> 21; a[i+6] &= 0x1fffff;
+ a[i+8] += a[i+7] >> 21; a[i+7] &= 0x1fffff;
+ a[i+9] += a[i+8] >> 21; a[i+8] &= 0x1fffff;
+ }
+ a[192+1] += a[192] >> 21;
+ a[192] &= 0x1fffff;
+ a[193+1] += a[193] >> 21;
+ a[193] &= 0x1fffff;
+ a[194+1] += a[194] >> 21;
+ a[194] &= 0x1fffff;
+#endif
+}
+
+/* Shift the result in the high 4096 bits down to the bottom.
+ *
+ * r A single precision number.
+ * a A single precision number.
+ */
+static void sp_4096_mont_shift_196(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ int64_t n = a[195] >> 1;
+ n += ((int64_t)a[196]) << 20;
+
+ for (i = 0; i < 195; i++) {
+ r[i] = n & 0x1fffff;
+ n >>= 21;
+ n += ((int64_t)a[197 + i]) << 20;
+ }
+ r[195] = (sp_digit)n;
+#else
+ int i;
+ int64_t n = a[195] >> 1;
+ n += ((int64_t)a[196]) << 20;
+ for (i = 0; i < 192; i += 8) {
+ r[i + 0] = n & 0x1fffff;
+ n >>= 21; n += ((int64_t)a[i + 197]) << 20;
+ r[i + 1] = n & 0x1fffff;
+ n >>= 21; n += ((int64_t)a[i + 198]) << 20;
+ r[i + 2] = n & 0x1fffff;
+ n >>= 21; n += ((int64_t)a[i + 199]) << 20;
+ r[i + 3] = n & 0x1fffff;
+ n >>= 21; n += ((int64_t)a[i + 200]) << 20;
+ r[i + 4] = n & 0x1fffff;
+ n >>= 21; n += ((int64_t)a[i + 201]) << 20;
+ r[i + 5] = n & 0x1fffff;
+ n >>= 21; n += ((int64_t)a[i + 202]) << 20;
+ r[i + 6] = n & 0x1fffff;
+ n >>= 21; n += ((int64_t)a[i + 203]) << 20;
+ r[i + 7] = n & 0x1fffff;
+ n >>= 21; n += ((int64_t)a[i + 204]) << 20;
+ }
+ r[192] = n & 0x1fffff; n >>= 21; n += ((int64_t)a[389]) << 20;
+ r[193] = n & 0x1fffff; n >>= 21; n += ((int64_t)a[390]) << 20;
+ r[194] = n & 0x1fffff; n >>= 21; n += ((int64_t)a[391]) << 20;
+ r[195] = (sp_digit)n;
+#endif /* WOLFSSL_SP_SMALL */
+ XMEMSET(&r[196], 0, sizeof(*r) * 196U);
+}
+
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_4096_mont_reduce_196(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+ int i;
+ sp_digit mu;
+
+ sp_4096_norm_196(a + 196);
+
+#ifdef WOLFSSL_SP_DH
+ if (mp != 1) {
+ for (i=0; i<195; i++) {
+ mu = (a[i] * mp) & 0x1fffff;
+ sp_4096_mul_add_196(a+i, m, mu);
+ a[i+1] += a[i] >> 21;
+ }
+ mu = (a[i] * mp) & 0x1L;
+ sp_4096_mul_add_196(a+i, m, mu);
+ a[i+1] += a[i] >> 21;
+ a[i] &= 0x1fffff;
+ }
+ else {
+ for (i=0; i<195; i++) {
+ mu = a[i] & 0x1fffff;
+ sp_4096_mul_add_196(a+i, m, mu);
+ a[i+1] += a[i] >> 21;
+ }
+ mu = a[i] & 0x1L;
+ sp_4096_mul_add_196(a+i, m, mu);
+ a[i+1] += a[i] >> 21;
+ a[i] &= 0x1fffff;
+ }
+#else
+ for (i=0; i<195; i++) {
+ mu = (a[i] * mp) & 0x1fffff;
+ sp_4096_mul_add_196(a+i, m, mu);
+ a[i+1] += a[i] >> 21;
+ }
+ mu = (a[i] * mp) & 0x1L;
+ sp_4096_mul_add_196(a+i, m, mu);
+ a[i+1] += a[i] >> 21;
+ a[i] &= 0x1fffff;
+#endif
+
+ sp_4096_mont_shift_196(a, a);
+ sp_4096_cond_sub_196(a, a, m, 0 - (((a[195] >> 1) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_4096_norm_196(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_4096_mont_mul_196(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_4096_mul_196(r, a, b);
+ sp_4096_mont_reduce_196(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_4096_mont_sqr_196(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_4096_sqr_196(r, a);
+ sp_4096_mont_reduce_196(r, m, mp);
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_4096_mul_d_392(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 392; i++) {
+ t += tb * a[i];
+ r[i] = t & 0x1fffff;
+ t >>= 21;
+ }
+ r[392] = (sp_digit)t;
+#else
+ int64_t tb = b;
+ int64_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] = t[0] & 0x1fffff;
+ for (i = 0; i < 392; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] = (sp_digit)(t[0] >> 21) + (t[1] & 0x1fffff);
+ t[2] = tb * a[i+2];
+ r[i+2] = (sp_digit)(t[1] >> 21) + (t[2] & 0x1fffff);
+ t[3] = tb * a[i+3];
+ r[i+3] = (sp_digit)(t[2] >> 21) + (t[3] & 0x1fffff);
+ t[4] = tb * a[i+4];
+ r[i+4] = (sp_digit)(t[3] >> 21) + (t[4] & 0x1fffff);
+ t[5] = tb * a[i+5];
+ r[i+5] = (sp_digit)(t[4] >> 21) + (t[5] & 0x1fffff);
+ t[6] = tb * a[i+6];
+ r[i+6] = (sp_digit)(t[5] >> 21) + (t[6] & 0x1fffff);
+ t[7] = tb * a[i+7];
+ r[i+7] = (sp_digit)(t[6] >> 21) + (t[7] & 0x1fffff);
+ t[0] = tb * a[i+8];
+ r[i+8] = (sp_digit)(t[7] >> 21) + (t[0] & 0x1fffff);
+ }
+ r[392] = (sp_digit)(t[7] >> 21);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static void sp_4096_cond_add_196(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 196; i++) {
+ r[i] = a[i] + (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 192; i += 8) {
+ r[i + 0] = a[i + 0] + (b[i + 0] & m);
+ r[i + 1] = a[i + 1] + (b[i + 1] & m);
+ r[i + 2] = a[i + 2] + (b[i + 2] & m);
+ r[i + 3] = a[i + 3] + (b[i + 3] & m);
+ r[i + 4] = a[i + 4] + (b[i + 4] & m);
+ r[i + 5] = a[i + 5] + (b[i + 5] & m);
+ r[i + 6] = a[i + 6] + (b[i + 6] & m);
+ r[i + 7] = a[i + 7] + (b[i + 7] & m);
+ }
+ r[192] = a[192] + (b[192] & m);
+ r[193] = a[193] + (b[193] & m);
+ r[194] = a[194] + (b[194] & m);
+ r[195] = a[195] + (b[195] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_196(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 196; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#endif
+#ifdef WOLFSSL_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_196(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 196; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#endif
+SP_NOINLINE static void sp_4096_rshift_196(sp_digit* r, sp_digit* a, byte n)
+{
+ int i;
+
+#ifdef WOLFSSL_SP_SMALL
+ for (i=0; i<195; i++) {
+ r[i] = ((a[i] >> n) | (a[i + 1] << (21 - n))) & 0x1fffff;
+ }
+#else
+ for (i=0; i<192; i += 8) {
+ r[i+0] = ((a[i+0] >> n) | (a[i+1] << (21 - n))) & 0x1fffff;
+ r[i+1] = ((a[i+1] >> n) | (a[i+2] << (21 - n))) & 0x1fffff;
+ r[i+2] = ((a[i+2] >> n) | (a[i+3] << (21 - n))) & 0x1fffff;
+ r[i+3] = ((a[i+3] >> n) | (a[i+4] << (21 - n))) & 0x1fffff;
+ r[i+4] = ((a[i+4] >> n) | (a[i+5] << (21 - n))) & 0x1fffff;
+ r[i+5] = ((a[i+5] >> n) | (a[i+6] << (21 - n))) & 0x1fffff;
+ r[i+6] = ((a[i+6] >> n) | (a[i+7] << (21 - n))) & 0x1fffff;
+ r[i+7] = ((a[i+7] >> n) | (a[i+8] << (21 - n))) & 0x1fffff;
+ }
+ r[192] = ((a[192] >> n) | (a[193] << (21 - n))) & 0x1fffff;
+ r[193] = ((a[193] >> n) | (a[194] << (21 - n))) & 0x1fffff;
+ r[194] = ((a[194] >> n) | (a[195] << (21 - n))) & 0x1fffff;
+#endif
+ r[195] = a[195] >> n;
+}
+
+#ifdef WOLFSSL_SP_DIV_32
+static WC_INLINE sp_digit sp_4096_div_word_196(sp_digit d1, sp_digit d0,
+ sp_digit dv)
+{
+ sp_digit d, r, t;
+
+ /* All 21 bits from d1 and top 10 bits from d0. */
+ d = (d1 << 10) | (d0 >> 11);
+ r = d / dv;
+ d -= r * dv;
+ /* Up to 11 bits in r */
+ /* Next 10 bits from d0. */
+ r <<= 10;
+ d <<= 10;
+ d |= (d0 >> 1) & ((1 << 10) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 21 bits in r */
+ /* Remaining 1 bits from d0. */
+ r <<= 1;
+ d <<= 1;
+ d |= d0 & ((1 << 1) - 1);
+ t = d / dv;
+ r += t;
+
+ return r;
+}
+#endif /* WOLFSSL_SP_DIV_32 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_4096_div_196(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ int i;
+#ifndef WOLFSSL_SP_DIV_32
+ int64_t d1;
+#endif
+ sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* td;
+#else
+ sp_digit t1d[392 + 1], t2d[196 + 1], sdd[196 + 1];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ sp_digit* sd;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 196 + 3), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ (void)m;
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = td;
+ t2 = td + 392 + 1;
+ sd = t2 + 196 + 1;
+#else
+ t1 = t1d;
+ t2 = t2d;
+ sd = sdd;
+#endif
+
+ sp_4096_mul_d_196(sd, d, 1L << 20);
+ sp_4096_mul_d_392(t1, a, 1L << 20);
+ dv = sd[195];
+ for (i=196; i>=0; i--) {
+ t1[196 + i] += t1[196 + i - 1] >> 21;
+ t1[196 + i - 1] &= 0x1fffff;
+#ifndef WOLFSSL_SP_DIV_32
+ d1 = t1[196 + i];
+ d1 <<= 21;
+ d1 += t1[196 + i - 1];
+ r1 = (sp_digit)(d1 / dv);
+#else
+ r1 = sp_4096_div_word_196(t1[196 + i], t1[196 + i - 1], dv);
+#endif
+
+ sp_4096_mul_d_196(t2, sd, r1);
+ (void)sp_4096_sub_196(&t1[i], &t1[i], t2);
+ t1[196 + i] -= t2[196];
+ t1[196 + i] += t1[196 + i - 1] >> 21;
+ t1[196 + i - 1] &= 0x1fffff;
+ r1 = (((-t1[196 + i]) << 21) - t1[196 + i - 1]) / dv;
+ r1 -= t1[196 + i];
+ sp_4096_mul_d_196(t2, sd, r1);
+ (void)sp_4096_add_196(&t1[i], &t1[i], t2);
+ t1[196 + i] += t1[196 + i - 1] >> 21;
+ t1[196 + i - 1] &= 0x1fffff;
+ }
+ t1[196 - 1] += t1[196 - 2] >> 21;
+ t1[196 - 2] &= 0x1fffff;
+ r1 = t1[196 - 1] / dv;
+
+ sp_4096_mul_d_196(t2, sd, r1);
+ sp_4096_sub_196(t1, t1, t2);
+ XMEMCPY(r, t1, sizeof(*r) * 2U * 196U);
+ for (i=0; i<194; i++) {
+ r[i+1] += r[i] >> 21;
+ r[i] &= 0x1fffff;
+ }
+ sp_4096_cond_add_196(r, r, sd, 0 - ((r[195] < 0) ?
+ (sp_digit)1 : (sp_digit)0));
+
+ sp_4096_norm_196(r);
+ sp_4096_rshift_196(r, r, 20);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_4096_mod_196(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_4096_div_196(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+ defined(WOLFSSL_HAVE_SP_DH)
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_196(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+ const sp_digit* m, int reduceA)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* td;
+ sp_digit* t[3];
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 196 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(td, 0, sizeof(*td) * 3U * 196U * 2U);
+
+ norm = t[0] = td;
+ t[1] = &td[196 * 2];
+ t[2] = &td[2 * 196 * 2];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_196(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_4096_mod_196(t[1], a, m);
+ }
+ else {
+ XMEMCPY(t[1], a, sizeof(sp_digit) * 196U);
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_4096_mul_196(t[1], t[1], norm);
+ err = sp_4096_mod_196(t[1], t[1], m);
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 21;
+ c = bits % 21;
+ n = e[i--] << (21 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 21;
+ }
+
+ y = (n >> 20) & 1;
+ n <<= 1;
+
+ sp_4096_mont_mul_196(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])),
+ sizeof(*t[2]) * 196 * 2);
+ sp_4096_mont_sqr_196(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2],
+ sizeof(*t[2]) * 196 * 2);
+ }
+
+ sp_4096_mont_reduce_196(t[0], m, mp);
+ n = sp_4096_cmp_196(t[0], m);
+ sp_4096_cond_sub_196(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(*r) * 196 * 2);
+
+ }
+
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+
+ return err;
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[3][392];
+#else
+ sp_digit* td;
+ sp_digit* t[3];
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 196 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ t[0] = td;
+ t[1] = &td[196 * 2];
+ t[2] = &td[2 * 196 * 2];
+#endif
+ norm = t[0];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_196(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_4096_mod_196(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_4096_mul_196(t[1], t[1], norm);
+ err = sp_4096_mod_196(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_4096_mul_196(t[1], a, norm);
+ err = sp_4096_mod_196(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 21;
+ c = bits % 21;
+ n = e[i--] << (21 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 21;
+ }
+
+ y = (n >> 20) & 1;
+ n <<= 1;
+
+ sp_4096_mont_mul_196(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
+ sp_4096_mont_sqr_196(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
+ }
+
+ sp_4096_mont_reduce_196(t[0], m, mp);
+ n = sp_4096_cmp_196(t[0], m);
+ sp_4096_cond_sub_196(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(t[0]));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][392];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit rt[392];
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 392, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 392;
+#endif
+ norm = t[0];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_196(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_4096_mod_196(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_4096_mul_196(t[1], t[1], norm);
+ err = sp_4096_mod_196(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_4096_mul_196(t[1], a, norm);
+ err = sp_4096_mod_196(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_mont_sqr_196(t[ 2], t[ 1], m, mp);
+ sp_4096_mont_mul_196(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_4096_mont_sqr_196(t[ 4], t[ 2], m, mp);
+ sp_4096_mont_mul_196(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_4096_mont_sqr_196(t[ 6], t[ 3], m, mp);
+ sp_4096_mont_mul_196(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_4096_mont_sqr_196(t[ 8], t[ 4], m, mp);
+ sp_4096_mont_mul_196(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_4096_mont_sqr_196(t[10], t[ 5], m, mp);
+ sp_4096_mont_mul_196(t[11], t[ 6], t[ 5], m, mp);
+ sp_4096_mont_sqr_196(t[12], t[ 6], m, mp);
+ sp_4096_mont_mul_196(t[13], t[ 7], t[ 6], m, mp);
+ sp_4096_mont_sqr_196(t[14], t[ 7], m, mp);
+ sp_4096_mont_mul_196(t[15], t[ 8], t[ 7], m, mp);
+ sp_4096_mont_sqr_196(t[16], t[ 8], m, mp);
+ sp_4096_mont_mul_196(t[17], t[ 9], t[ 8], m, mp);
+ sp_4096_mont_sqr_196(t[18], t[ 9], m, mp);
+ sp_4096_mont_mul_196(t[19], t[10], t[ 9], m, mp);
+ sp_4096_mont_sqr_196(t[20], t[10], m, mp);
+ sp_4096_mont_mul_196(t[21], t[11], t[10], m, mp);
+ sp_4096_mont_sqr_196(t[22], t[11], m, mp);
+ sp_4096_mont_mul_196(t[23], t[12], t[11], m, mp);
+ sp_4096_mont_sqr_196(t[24], t[12], m, mp);
+ sp_4096_mont_mul_196(t[25], t[13], t[12], m, mp);
+ sp_4096_mont_sqr_196(t[26], t[13], m, mp);
+ sp_4096_mont_mul_196(t[27], t[14], t[13], m, mp);
+ sp_4096_mont_sqr_196(t[28], t[14], m, mp);
+ sp_4096_mont_mul_196(t[29], t[15], t[14], m, mp);
+ sp_4096_mont_sqr_196(t[30], t[15], m, mp);
+ sp_4096_mont_mul_196(t[31], t[16], t[15], m, mp);
+
+ bits = ((bits + 4) / 5) * 5;
+ i = ((bits + 20) / 21) - 1;
+ c = bits % 21;
+ if (c == 0) {
+ c = 21;
+ }
+ if (i < 196) {
+ n = e[i--] << (32 - c);
+ }
+ else {
+ n = 0;
+ i--;
+ }
+ if (c < 5) {
+ n |= e[i--] << (11 - c);
+ c += 21;
+ }
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ XMEMCPY(rt, t[y], sizeof(rt));
+ for (; i>=0 || c>=5; ) {
+ if (c < 5) {
+ n |= e[i--] << (11 - c);
+ c += 21;
+ }
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+
+ sp_4096_mont_sqr_196(rt, rt, m, mp);
+ sp_4096_mont_sqr_196(rt, rt, m, mp);
+ sp_4096_mont_sqr_196(rt, rt, m, mp);
+ sp_4096_mont_sqr_196(rt, rt, m, mp);
+ sp_4096_mont_sqr_196(rt, rt, m, mp);
+
+ sp_4096_mont_mul_196(rt, rt, t[y], m, mp);
+ }
+
+ sp_4096_mont_reduce_196(rt, m, mp);
+ n = sp_4096_cmp_196(rt, m);
+ sp_4096_cond_sub_196(rt, rt, m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, rt, sizeof(rt));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#endif
+}
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */
+ /* WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+ sp_digit* norm;
+ sp_digit e[1] = {0};
+ sp_digit mp;
+ int i;
+ int err = MP_OKAY;
+
+ if (*outLen < 512U) {
+ err = MP_TO_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(em) > 21) {
+ err = MP_READ_E;
+ }
+ if (inLen > 512U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 196 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 196 * 2;
+ m = r + 196 * 2;
+ norm = r;
+
+ sp_4096_from_bin(a, 196, in, inLen);
+#if DIGIT_BIT >= 21
+ e[0] = (sp_digit)em->dp[0];
+#else
+ e[0] = (sp_digit)em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(m, 196, mm);
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_196(norm, m);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_mul_196(a, a, norm);
+ err = sp_4096_mod_196(a, a, m);
+ }
+ if (err == MP_OKAY) {
+ for (i=20; i>=0; i--) {
+ if ((e[0] >> i) != 0) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 196 * 2);
+ for (i--; i>=0; i--) {
+ sp_4096_mont_sqr_196(r, r, m, mp);
+
+ if (((e[0] >> i) & 1) == 1) {
+ sp_4096_mont_mul_196(r, r, a, m, mp);
+ }
+ }
+ sp_4096_mont_reduce_196(r, m, mp);
+ mp = sp_4096_cmp_196(r, m);
+ sp_4096_cond_sub_196(r, r, m, ((mp < 0) ?
+ (sp_digit)1 : (sp_digit)0)- 1);
+
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit ad[392], md[196], rd[392];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+ sp_digit e[1] = {0};
+ int err = MP_OKAY;
+
+ if (*outLen < 512U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(em) > 21) {
+ err = MP_READ_E;
+ }
+ if (inLen > 512U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 196 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 196 * 2;
+ m = r + 196 * 2;
+ }
+#else
+ a = ad;
+ m = md;
+ r = rd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_4096_from_bin(a, 196, in, inLen);
+#if DIGIT_BIT >= 21
+ e[0] = (sp_digit)em->dp[0];
+#else
+ e[0] = (sp_digit)em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(m, 196, mm);
+
+ if (e[0] == 0x3) {
+ sp_4096_sqr_196(r, a);
+ err = sp_4096_mod_196(r, r, m);
+ if (err == MP_OKAY) {
+ sp_4096_mul_196(r, a, r);
+ err = sp_4096_mod_196(r, r, m);
+ }
+ }
+ else {
+ sp_digit* norm = r;
+ int i;
+ sp_digit mp;
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_196(norm, m);
+
+ sp_4096_mul_196(a, a, norm);
+ err = sp_4096_mod_196(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i=20; i>=0; i--) {
+ if ((e[0] >> i) != 0) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 392U);
+ for (i--; i>=0; i--) {
+ sp_4096_mont_sqr_196(r, r, m, mp);
+
+ if (((e[0] >> i) & 1) == 1) {
+ sp_4096_mont_mul_196(r, r, a, m, mp);
+ }
+ }
+ sp_4096_mont_reduce_196(r, m, mp);
+ mp = sp_4096_cmp_196(r, m);
+ sp_4096_cond_sub_196(r, r, m, ((mp < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#endif
+
+ return err;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM)
+#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* a;
+ sp_digit* d = NULL;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 512U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 4096) {
+ err = MP_READ_E;
+ }
+ if (inLen > 512) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 196 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = d + 196;
+ m = a + 392;
+ r = a;
+
+ sp_4096_from_bin(a, 196, in, inLen);
+ sp_4096_from_mp(d, 196, dm);
+ sp_4096_from_mp(m, 196, mm);
+ err = sp_4096_mod_exp_196(r, a, d, 4096, m, 0);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 196);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+ sp_digit a[392], d[196], m[196];
+ sp_digit* r = a;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 512U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 4096) {
+ err = MP_READ_E;
+ }
+ if (inLen > 512U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_from_bin(a, 196, in, inLen);
+ sp_4096_from_mp(d, 196, dm);
+ sp_4096_from_mp(m, 196, mm);
+ err = sp_4096_mod_exp_196(r, a, d, 4096, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+ XMEMSET(d, 0, sizeof(sp_digit) * 196);
+
+ return err;
+#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
+#else
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* t = NULL;
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* dq;
+ sp_digit* qi;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 512U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (inLen > 512) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 98 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 196 * 2;
+ q = p + 98;
+ qi = dq = dp = q + 98;
+ tmpa = qi + 98;
+ tmpb = tmpa + 196;
+
+ r = t + 196;
+
+ sp_4096_from_bin(a, 196, in, inLen);
+ sp_4096_from_mp(p, 98, pm);
+ sp_4096_from_mp(q, 98, qm);
+ sp_4096_from_mp(dp, 98, dpm);
+ err = sp_4096_mod_exp_98(tmpa, a, dp, 2048, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(dq, 98, dqm);
+ err = sp_4096_mod_exp_98(tmpb, a, dq, 2048, q, 1);
+ }
+ if (err == MP_OKAY) {
+ (void)sp_4096_sub_98(tmpa, tmpa, tmpb);
+ sp_4096_cond_add_98(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[97] >> 31));
+ sp_4096_cond_add_98(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[97] >> 31));
+
+ sp_4096_from_mp(qi, 98, qim);
+ sp_4096_mul_98(tmpa, tmpa, qi);
+ err = sp_4096_mod_98(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_mul_98(tmpa, q, tmpa);
+ (void)sp_4096_add_196(r, tmpb, tmpa);
+ sp_4096_norm_196(r);
+
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 98 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+ sp_digit a[196 * 2];
+ sp_digit p[98], q[98], dp[98], dq[98], qi[98];
+ sp_digit tmpa[196], tmpb[196];
+ sp_digit* r = a;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 512U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (inLen > 512U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_from_bin(a, 196, in, inLen);
+ sp_4096_from_mp(p, 98, pm);
+ sp_4096_from_mp(q, 98, qm);
+ sp_4096_from_mp(dp, 98, dpm);
+ sp_4096_from_mp(dq, 98, dqm);
+ sp_4096_from_mp(qi, 98, qim);
+
+ err = sp_4096_mod_exp_98(tmpa, a, dp, 2048, p, 1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_4096_mod_exp_98(tmpb, a, dq, 2048, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ (void)sp_4096_sub_98(tmpa, tmpa, tmpb);
+ sp_4096_cond_add_98(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[97] >> 31));
+ sp_4096_cond_add_98(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[97] >> 31));
+ sp_4096_mul_98(tmpa, tmpa, qi);
+ err = sp_4096_mod_98(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_mul_98(tmpa, tmpa, q);
+ (void)sp_4096_add_196(r, tmpb, tmpa);
+ sp_4096_norm_196(r);
+
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+ XMEMSET(tmpa, 0, sizeof(tmpa));
+ XMEMSET(tmpb, 0, sizeof(tmpb));
+ XMEMSET(p, 0, sizeof(p));
+ XMEMSET(q, 0, sizeof(q));
+ XMEMSET(dp, 0, sizeof(dp));
+ XMEMSET(dq, 0, sizeof(dq));
+ XMEMSET(qi, 0, sizeof(qi));
+
+ return err;
+#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+}
+
+#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_4096_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 21
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 196);
+ r->used = 196;
+ mp_clamp(r);
+#elif DIGIT_BIT < 21
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 196; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 21) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 21 - s;
+ }
+ r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 196; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 21 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 21 - s;
+ }
+ else {
+ s += 21;
+ }
+ }
+ r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int err = MP_OKAY;
+ sp_digit* d = NULL;
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 4096) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 196 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 196 * 2;
+ m = e + 196;
+ r = b;
+
+ sp_4096_from_mp(b, 196, base);
+ sp_4096_from_mp(e, 196, exp);
+ sp_4096_from_mp(m, 196, mod);
+
+ err = sp_4096_mod_exp_196(r, b, e, mp_count_bits(exp), m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_4096_to_mp(r, res);
+ }
+
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 196U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit bd[392], ed[196], md[196];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 4096) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 196 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 196 * 2;
+ m = e + 196;
+ r = b;
+ }
+#else
+ r = b = bd;
+ e = ed;
+ m = md;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(b, 196, base);
+ sp_4096_from_mp(e, 196, exp);
+ sp_4096_from_mp(m, 196, mod);
+
+ err = sp_4096_mod_exp_196(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_4096_to_mp(r, res);
+ }
+
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 196U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 196U);
+#endif
+
+ return err;
+#endif
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_4096
+SP_NOINLINE static void sp_4096_lshift_196(sp_digit* r, sp_digit* a, byte n)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ r[196] = a[195] >> (21 - n);
+ for (i=195; i>0; i--) {
+ r[i] = ((a[i] << n) | (a[i-1] >> (21 - n))) & 0x1fffff;
+ }
+#else
+ sp_int_digit s, t;
+
+ s = (sp_int_digit)a[195];
+ r[196] = s >> (21U - n);
+ s = (sp_int_digit)(a[195]); t = (sp_int_digit)(a[194]);
+ r[195] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[194]); t = (sp_int_digit)(a[193]);
+ r[194] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[193]); t = (sp_int_digit)(a[192]);
+ r[193] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[192]); t = (sp_int_digit)(a[191]);
+ r[192] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[191]); t = (sp_int_digit)(a[190]);
+ r[191] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[190]); t = (sp_int_digit)(a[189]);
+ r[190] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[189]); t = (sp_int_digit)(a[188]);
+ r[189] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[188]); t = (sp_int_digit)(a[187]);
+ r[188] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[187]); t = (sp_int_digit)(a[186]);
+ r[187] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[186]); t = (sp_int_digit)(a[185]);
+ r[186] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[185]); t = (sp_int_digit)(a[184]);
+ r[185] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[184]); t = (sp_int_digit)(a[183]);
+ r[184] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[183]); t = (sp_int_digit)(a[182]);
+ r[183] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[182]); t = (sp_int_digit)(a[181]);
+ r[182] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[181]); t = (sp_int_digit)(a[180]);
+ r[181] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[180]); t = (sp_int_digit)(a[179]);
+ r[180] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[179]); t = (sp_int_digit)(a[178]);
+ r[179] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[178]); t = (sp_int_digit)(a[177]);
+ r[178] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[177]); t = (sp_int_digit)(a[176]);
+ r[177] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[176]); t = (sp_int_digit)(a[175]);
+ r[176] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[175]); t = (sp_int_digit)(a[174]);
+ r[175] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[174]); t = (sp_int_digit)(a[173]);
+ r[174] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[173]); t = (sp_int_digit)(a[172]);
+ r[173] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[172]); t = (sp_int_digit)(a[171]);
+ r[172] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[171]); t = (sp_int_digit)(a[170]);
+ r[171] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[170]); t = (sp_int_digit)(a[169]);
+ r[170] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[169]); t = (sp_int_digit)(a[168]);
+ r[169] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[168]); t = (sp_int_digit)(a[167]);
+ r[168] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[167]); t = (sp_int_digit)(a[166]);
+ r[167] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[166]); t = (sp_int_digit)(a[165]);
+ r[166] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[165]); t = (sp_int_digit)(a[164]);
+ r[165] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[164]); t = (sp_int_digit)(a[163]);
+ r[164] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[163]); t = (sp_int_digit)(a[162]);
+ r[163] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[162]); t = (sp_int_digit)(a[161]);
+ r[162] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[161]); t = (sp_int_digit)(a[160]);
+ r[161] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[160]); t = (sp_int_digit)(a[159]);
+ r[160] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[159]); t = (sp_int_digit)(a[158]);
+ r[159] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[158]); t = (sp_int_digit)(a[157]);
+ r[158] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[157]); t = (sp_int_digit)(a[156]);
+ r[157] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[156]); t = (sp_int_digit)(a[155]);
+ r[156] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[155]); t = (sp_int_digit)(a[154]);
+ r[155] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[154]); t = (sp_int_digit)(a[153]);
+ r[154] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[153]); t = (sp_int_digit)(a[152]);
+ r[153] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[152]); t = (sp_int_digit)(a[151]);
+ r[152] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[151]); t = (sp_int_digit)(a[150]);
+ r[151] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[150]); t = (sp_int_digit)(a[149]);
+ r[150] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[149]); t = (sp_int_digit)(a[148]);
+ r[149] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[148]); t = (sp_int_digit)(a[147]);
+ r[148] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[147]); t = (sp_int_digit)(a[146]);
+ r[147] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[146]); t = (sp_int_digit)(a[145]);
+ r[146] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[145]); t = (sp_int_digit)(a[144]);
+ r[145] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[144]); t = (sp_int_digit)(a[143]);
+ r[144] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[143]); t = (sp_int_digit)(a[142]);
+ r[143] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[142]); t = (sp_int_digit)(a[141]);
+ r[142] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[141]); t = (sp_int_digit)(a[140]);
+ r[141] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[140]); t = (sp_int_digit)(a[139]);
+ r[140] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[139]); t = (sp_int_digit)(a[138]);
+ r[139] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[138]); t = (sp_int_digit)(a[137]);
+ r[138] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[137]); t = (sp_int_digit)(a[136]);
+ r[137] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[136]); t = (sp_int_digit)(a[135]);
+ r[136] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[135]); t = (sp_int_digit)(a[134]);
+ r[135] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[134]); t = (sp_int_digit)(a[133]);
+ r[134] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[133]); t = (sp_int_digit)(a[132]);
+ r[133] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[132]); t = (sp_int_digit)(a[131]);
+ r[132] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[131]); t = (sp_int_digit)(a[130]);
+ r[131] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[130]); t = (sp_int_digit)(a[129]);
+ r[130] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[129]); t = (sp_int_digit)(a[128]);
+ r[129] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[128]); t = (sp_int_digit)(a[127]);
+ r[128] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[127]); t = (sp_int_digit)(a[126]);
+ r[127] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[126]); t = (sp_int_digit)(a[125]);
+ r[126] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[125]); t = (sp_int_digit)(a[124]);
+ r[125] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[124]); t = (sp_int_digit)(a[123]);
+ r[124] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[123]); t = (sp_int_digit)(a[122]);
+ r[123] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[122]); t = (sp_int_digit)(a[121]);
+ r[122] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[121]); t = (sp_int_digit)(a[120]);
+ r[121] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[120]); t = (sp_int_digit)(a[119]);
+ r[120] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[119]); t = (sp_int_digit)(a[118]);
+ r[119] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[118]); t = (sp_int_digit)(a[117]);
+ r[118] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[117]); t = (sp_int_digit)(a[116]);
+ r[117] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[116]); t = (sp_int_digit)(a[115]);
+ r[116] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[115]); t = (sp_int_digit)(a[114]);
+ r[115] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[114]); t = (sp_int_digit)(a[113]);
+ r[114] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[113]); t = (sp_int_digit)(a[112]);
+ r[113] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[112]); t = (sp_int_digit)(a[111]);
+ r[112] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[111]); t = (sp_int_digit)(a[110]);
+ r[111] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[110]); t = (sp_int_digit)(a[109]);
+ r[110] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[109]); t = (sp_int_digit)(a[108]);
+ r[109] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[108]); t = (sp_int_digit)(a[107]);
+ r[108] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[107]); t = (sp_int_digit)(a[106]);
+ r[107] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[106]); t = (sp_int_digit)(a[105]);
+ r[106] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[105]); t = (sp_int_digit)(a[104]);
+ r[105] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[104]); t = (sp_int_digit)(a[103]);
+ r[104] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[103]); t = (sp_int_digit)(a[102]);
+ r[103] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[102]); t = (sp_int_digit)(a[101]);
+ r[102] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[101]); t = (sp_int_digit)(a[100]);
+ r[101] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[100]); t = (sp_int_digit)(a[99]);
+ r[100] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[99]); t = (sp_int_digit)(a[98]);
+ r[99] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[98]); t = (sp_int_digit)(a[97]);
+ r[98] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[97]); t = (sp_int_digit)(a[96]);
+ r[97] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[96]); t = (sp_int_digit)(a[95]);
+ r[96] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[95]); t = (sp_int_digit)(a[94]);
+ r[95] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[94]); t = (sp_int_digit)(a[93]);
+ r[94] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[93]); t = (sp_int_digit)(a[92]);
+ r[93] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[92]); t = (sp_int_digit)(a[91]);
+ r[92] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[91]); t = (sp_int_digit)(a[90]);
+ r[91] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[90]); t = (sp_int_digit)(a[89]);
+ r[90] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[89]); t = (sp_int_digit)(a[88]);
+ r[89] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[88]); t = (sp_int_digit)(a[87]);
+ r[88] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[87]); t = (sp_int_digit)(a[86]);
+ r[87] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[86]); t = (sp_int_digit)(a[85]);
+ r[86] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[85]); t = (sp_int_digit)(a[84]);
+ r[85] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[84]); t = (sp_int_digit)(a[83]);
+ r[84] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[83]); t = (sp_int_digit)(a[82]);
+ r[83] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[82]); t = (sp_int_digit)(a[81]);
+ r[82] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[81]); t = (sp_int_digit)(a[80]);
+ r[81] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[80]); t = (sp_int_digit)(a[79]);
+ r[80] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[79]); t = (sp_int_digit)(a[78]);
+ r[79] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[78]); t = (sp_int_digit)(a[77]);
+ r[78] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[77]); t = (sp_int_digit)(a[76]);
+ r[77] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[76]); t = (sp_int_digit)(a[75]);
+ r[76] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[75]); t = (sp_int_digit)(a[74]);
+ r[75] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[74]); t = (sp_int_digit)(a[73]);
+ r[74] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[73]); t = (sp_int_digit)(a[72]);
+ r[73] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[72]); t = (sp_int_digit)(a[71]);
+ r[72] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[71]); t = (sp_int_digit)(a[70]);
+ r[71] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[70]); t = (sp_int_digit)(a[69]);
+ r[70] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[69]); t = (sp_int_digit)(a[68]);
+ r[69] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[68]); t = (sp_int_digit)(a[67]);
+ r[68] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[67]); t = (sp_int_digit)(a[66]);
+ r[67] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[66]); t = (sp_int_digit)(a[65]);
+ r[66] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[65]); t = (sp_int_digit)(a[64]);
+ r[65] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[64]); t = (sp_int_digit)(a[63]);
+ r[64] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[63]); t = (sp_int_digit)(a[62]);
+ r[63] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[62]); t = (sp_int_digit)(a[61]);
+ r[62] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[61]); t = (sp_int_digit)(a[60]);
+ r[61] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[60]); t = (sp_int_digit)(a[59]);
+ r[60] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[59]); t = (sp_int_digit)(a[58]);
+ r[59] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[58]); t = (sp_int_digit)(a[57]);
+ r[58] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[57]); t = (sp_int_digit)(a[56]);
+ r[57] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[56]); t = (sp_int_digit)(a[55]);
+ r[56] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[55]); t = (sp_int_digit)(a[54]);
+ r[55] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[54]); t = (sp_int_digit)(a[53]);
+ r[54] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[53]); t = (sp_int_digit)(a[52]);
+ r[53] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[52]); t = (sp_int_digit)(a[51]);
+ r[52] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[51]); t = (sp_int_digit)(a[50]);
+ r[51] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[50]); t = (sp_int_digit)(a[49]);
+ r[50] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[49]); t = (sp_int_digit)(a[48]);
+ r[49] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[48]); t = (sp_int_digit)(a[47]);
+ r[48] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[47]); t = (sp_int_digit)(a[46]);
+ r[47] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[46]); t = (sp_int_digit)(a[45]);
+ r[46] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[45]); t = (sp_int_digit)(a[44]);
+ r[45] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[44]); t = (sp_int_digit)(a[43]);
+ r[44] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[43]); t = (sp_int_digit)(a[42]);
+ r[43] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[42]); t = (sp_int_digit)(a[41]);
+ r[42] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[41]); t = (sp_int_digit)(a[40]);
+ r[41] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[40]); t = (sp_int_digit)(a[39]);
+ r[40] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[39]); t = (sp_int_digit)(a[38]);
+ r[39] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[38]); t = (sp_int_digit)(a[37]);
+ r[38] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[37]); t = (sp_int_digit)(a[36]);
+ r[37] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[36]); t = (sp_int_digit)(a[35]);
+ r[36] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]);
+ r[35] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]);
+ r[34] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]);
+ r[33] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]);
+ r[32] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]);
+ r[31] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]);
+ r[30] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]);
+ r[29] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]);
+ r[28] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]);
+ r[27] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]);
+ r[26] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]);
+ r[25] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]);
+ r[24] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]);
+ r[23] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]);
+ r[22] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]);
+ r[21] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]);
+ r[20] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]);
+ r[19] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]);
+ r[18] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]);
+ r[17] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]);
+ r[16] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]);
+ r[15] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]);
+ r[14] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]);
+ r[13] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]);
+ r[12] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]);
+ r[11] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]);
+ r[10] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]);
+ r[9] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]);
+ r[8] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]);
+ r[7] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]);
+ r[6] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]);
+ r[5] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]);
+ r[4] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]);
+ r[3] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]);
+ r[2] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+ s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]);
+ r[1] = ((s << n) | (t >> (21U - n))) & 0x1fffff;
+#endif
+ r[0] = (a[0] << n) & 0x1fffff;
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_2_196(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[392];
+ sp_digit td[197];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 589, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 392;
+ XMEMSET(td, 0, sizeof(sp_digit) * 589);
+#else
+ norm = nd;
+ tmp = td;
+ XMEMSET(td, 0, sizeof(td));
+#endif
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_196(norm, m);
+
+ bits = ((bits + 3) / 4) * 4;
+ i = ((bits + 20) / 21) - 1;
+ c = bits % 21;
+ if (c == 0) {
+ c = 21;
+ }
+ if (i < 196) {
+ n = e[i--] << (32 - c);
+ }
+ else {
+ n = 0;
+ i--;
+ }
+ if (c < 4) {
+ n |= e[i--] << (11 - c);
+ c += 21;
+ }
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+ sp_4096_lshift_196(r, norm, y);
+ for (; i>=0 || c>=4; ) {
+ if (c < 4) {
+ n |= e[i--] << (11 - c);
+ c += 21;
+ }
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+
+ sp_4096_mont_sqr_196(r, r, m, mp);
+ sp_4096_mont_sqr_196(r, r, m, mp);
+ sp_4096_mont_sqr_196(r, r, m, mp);
+ sp_4096_mont_sqr_196(r, r, m, mp);
+
+ sp_4096_lshift_196(r, r, y);
+ sp_4096_mul_d_196(tmp, norm, (r[196] << 20) + (r[195] >> 1));
+ r[196] = 0;
+ r[195] &= 0x1L;
+ (void)sp_4096_add_196(r, r, tmp);
+ sp_4096_norm_196(r);
+ o = sp_4096_cmp_196(r, m);
+ sp_4096_cond_sub_196(r, r, m, ((o < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ }
+
+ sp_4096_mont_reduce_196(r, m, mp);
+ n = sp_4096_cmp_196(r, m);
+ sp_4096_cond_sub_196(r, r, m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+#endif /* HAVE_FFDHE_4096 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int err = MP_OKAY;
+ sp_digit* d = NULL;
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ word32 i;
+
+ if (mp_count_bits(base) > 4096) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 512) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 196 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 196 * 2;
+ m = e + 196;
+ r = b;
+
+ sp_4096_from_mp(b, 196, base);
+ sp_4096_from_bin(e, 196, exp, expLen);
+ sp_4096_from_mp(m, 196, mod);
+
+ #ifdef HAVE_FFDHE_4096
+ if (base->used == 1 && base->dp[0] == 2 &&
+ ((m[195] << 15) | (m[194] >> 6)) == 0xffffL) {
+ err = sp_4096_mod_exp_2_196(r, e, expLen * 8, m);
+ }
+ else
+ #endif
+ err = sp_4096_mod_exp_196(r, b, e, expLen * 8, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ for (i=0; i<512 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+ }
+
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 196U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit bd[392], ed[196], md[196];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ word32 i;
+ int err = MP_OKAY;
+
+ if (mp_count_bits(base) > 4096) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 512U) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+#ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 196 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 196 * 2;
+ m = e + 196;
+ r = b;
+ }
+#else
+ r = b = bd;
+ e = ed;
+ m = md;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(b, 196, base);
+ sp_4096_from_bin(e, 196, exp, expLen);
+ sp_4096_from_mp(m, 196, mod);
+
+ #ifdef HAVE_FFDHE_4096
+ if (base->used == 1 && base->dp[0] == 2U &&
+ ((m[195] << 15) | (m[194] >> 6)) == 0xffffL) {
+ err = sp_4096_mod_exp_2_196(r, e, expLen * 8U, m);
+ }
+ else {
+ #endif
+ err = sp_4096_mod_exp_196(r, b, e, expLen * 8U, m, 0);
+ #ifdef HAVE_FFDHE_4096
+ }
+ #endif
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ for (i=0; i<512U && out[i] == 0U; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 196U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 196U);
+#endif
+
+ return err;
+#endif
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* WOLFSSL_SP_4096 */
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+#ifdef WOLFSSL_HAVE_SP_ECC
+#ifndef WOLFSSL_SP_NO_256
+
+/* Point structure to use. */
+typedef struct sp_point_256 {
+ sp_digit x[2 * 10];
+ sp_digit y[2 * 10];
+ sp_digit z[2 * 10];
+ int infinity;
+} sp_point_256;
+
+/* The modulus (prime) of the curve P256. */
+static const sp_digit p256_mod[10] = {
+ 0x3ffffff,0x3ffffff,0x3ffffff,0x003ffff,0x0000000,0x0000000,0x0000000,
+ 0x0000400,0x3ff0000,0x03fffff
+};
+/* The Montogmery normalizer for modulus of the curve P256. */
+static const sp_digit p256_norm_mod[10] = {
+ 0x0000001,0x0000000,0x0000000,0x3fc0000,0x3ffffff,0x3ffffff,0x3ffffff,
+ 0x3fffbff,0x000ffff,0x0000000
+};
+/* The Montogmery multiplier for modulus of the curve P256. */
+static const sp_digit p256_mp_mod = 0x000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* The order of the curve P256. */
+static const sp_digit p256_order[10] = {
+ 0x0632551,0x272b0bf,0x1e84f3b,0x2b69c5e,0x3bce6fa,0x3ffffff,0x3ffffff,
+ 0x00003ff,0x3ff0000,0x03fffff
+};
+#endif
+/* The order of the curve P256 minus 2. */
+static const sp_digit p256_order2[10] = {
+ 0x063254f,0x272b0bf,0x1e84f3b,0x2b69c5e,0x3bce6fa,0x3ffffff,0x3ffffff,
+ 0x00003ff,0x3ff0000,0x03fffff
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P256. */
+static const sp_digit p256_norm_order[10] = {
+ 0x39cdaaf,0x18d4f40,0x217b0c4,0x14963a1,0x0431905,0x0000000,0x0000000,
+ 0x3fffc00,0x000ffff,0x0000000
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P256. */
+static const sp_digit p256_mp_order = 0x200bc4f;
+#endif
+/* The base point of curve P256. */
+static const sp_point_256 p256_base = {
+ /* X ordinate */
+ {
+ 0x098c296,0x04e5176,0x33a0f4a,0x204b7ac,0x277037d,0x0e9103c,0x3ce6e56,
+ 0x1091fe2,0x1f2e12c,0x01ac5f4,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Y ordinate */
+ {
+ 0x3bf51f5,0x1901a0d,0x1ececbb,0x15dacc5,0x22bce33,0x303e785,0x27eb4a7,
+ 0x1fe6e3b,0x2e2fe1a,0x013f8d0,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Z ordinate */
+ {
+ 0x0000001,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,
+ 0x0000000,0x0000000,0x0000000,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* infinity */
+ 0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p256_b[10] = {
+ 0x3d2604b,0x38f0f89,0x30f63bc,0x2c3314e,0x0651d06,0x1a621af,0x2bbd557,
+ 0x24f9ecf,0x1d8aa3a,0x016b18d
+};
+#endif
+
+static int sp_256_point_new_ex_10(void* heap, sp_point_256* sp, sp_point_256** p)
+{
+ int ret = MP_OKAY;
+ (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ (void)sp;
+ *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC);
+#else
+ *p = sp;
+#endif
+ if (*p == NULL) {
+ ret = MEMORY_E;
+ }
+ return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_256_point_new_10(heap, sp, p) sp_256_point_new_ex_10((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_256_point_new_10(heap, sp, p) sp_256_point_new_ex_10((heap), &(sp), &(p))
+#endif
+
+
+static void sp_256_point_free_10(sp_point_256* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+ if (p != NULL) {
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+ XFREE(p, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+/* Clear point data if requested. */
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+#endif
+ (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r The resulting Montgomery form number.
+ * a The number to convert.
+ * m The modulus (prime).
+ * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_256_mod_mul_norm_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ int64_t* td;
+#else
+ int64_t td[8];
+ int64_t a32d[8];
+#endif
+ int64_t* t;
+ int64_t* a32;
+ int64_t o;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 8, NULL, DYNAMIC_TYPE_ECC);
+ if (td == NULL) {
+ return MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = td;
+ a32 = td + 8;
+#else
+ t = td;
+ a32 = a32d;
+#endif
+
+ a32[0] = a[0];
+ a32[0] |= a[1] << 26U;
+ a32[0] &= 0xffffffffL;
+ a32[1] = (sp_digit)(a[1] >> 6);
+ a32[1] |= a[2] << 20U;
+ a32[1] &= 0xffffffffL;
+ a32[2] = (sp_digit)(a[2] >> 12);
+ a32[2] |= a[3] << 14U;
+ a32[2] &= 0xffffffffL;
+ a32[3] = (sp_digit)(a[3] >> 18);
+ a32[3] |= a[4] << 8U;
+ a32[3] &= 0xffffffffL;
+ a32[4] = (sp_digit)(a[4] >> 24);
+ a32[4] |= a[5] << 2U;
+ a32[4] |= a[6] << 28U;
+ a32[4] &= 0xffffffffL;
+ a32[5] = (sp_digit)(a[6] >> 4);
+ a32[5] |= a[7] << 22U;
+ a32[5] &= 0xffffffffL;
+ a32[6] = (sp_digit)(a[7] >> 10);
+ a32[6] |= a[8] << 16U;
+ a32[6] &= 0xffffffffL;
+ a32[7] = (sp_digit)(a[8] >> 16);
+ a32[7] |= a[9] << 10U;
+ a32[7] &= 0xffffffffL;
+
+ /* 1 1 0 -1 -1 -1 -1 0 */
+ t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6];
+ /* 0 1 1 0 -1 -1 -1 -1 */
+ t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7];
+ /* 0 0 1 1 0 -1 -1 -1 */
+ t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7];
+ /* -1 -1 0 2 2 1 0 -1 */
+ t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7];
+ /* 0 -1 -1 0 2 2 1 0 */
+ t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6];
+ /* 0 0 -1 -1 0 2 2 1 */
+ t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7];
+ /* -1 -1 0 0 0 1 3 2 */
+ t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7];
+ /* 1 0 -1 -1 -1 -1 0 3 */
+ t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7];
+
+ t[1] += t[0] >> 32U; t[0] &= 0xffffffffL;
+ t[2] += t[1] >> 32U; t[1] &= 0xffffffffL;
+ t[3] += t[2] >> 32U; t[2] &= 0xffffffffL;
+ t[4] += t[3] >> 32U; t[3] &= 0xffffffffL;
+ t[5] += t[4] >> 32U; t[4] &= 0xffffffffL;
+ t[6] += t[5] >> 32U; t[5] &= 0xffffffffL;
+ t[7] += t[6] >> 32U; t[6] &= 0xffffffffL;
+ o = t[7] >> 32U; t[7] &= 0xffffffffL;
+ t[0] += o;
+ t[3] -= o;
+ t[6] -= o;
+ t[7] += o;
+ t[1] += t[0] >> 32U; t[0] &= 0xffffffffL;
+ t[2] += t[1] >> 32U; t[1] &= 0xffffffffL;
+ t[3] += t[2] >> 32U; t[2] &= 0xffffffffL;
+ t[4] += t[3] >> 32U; t[3] &= 0xffffffffL;
+ t[5] += t[4] >> 32U; t[4] &= 0xffffffffL;
+ t[6] += t[5] >> 32U; t[5] &= 0xffffffffL;
+ t[7] += t[6] >> 32U; t[6] &= 0xffffffffL;
+
+ r[0] = (sp_digit)(t[0]) & 0x3ffffffL;
+ r[1] = (sp_digit)(t[0] >> 26U);
+ r[1] |= t[1] << 6U;
+ r[1] &= 0x3ffffffL;
+ r[2] = (sp_digit)(t[1] >> 20U);
+ r[2] |= t[2] << 12U;
+ r[2] &= 0x3ffffffL;
+ r[3] = (sp_digit)(t[2] >> 14U);
+ r[3] |= t[3] << 18U;
+ r[3] &= 0x3ffffffL;
+ r[4] = (sp_digit)(t[3] >> 8U);
+ r[4] |= t[4] << 24U;
+ r[4] &= 0x3ffffffL;
+ r[5] = (sp_digit)(t[4] >> 2U) & 0x3ffffffL;
+ r[6] = (sp_digit)(t[4] >> 28U);
+ r[6] |= t[5] << 4U;
+ r[6] &= 0x3ffffffL;
+ r[7] = (sp_digit)(t[5] >> 22U);
+ r[7] |= t[6] << 10U;
+ r[7] &= 0x3ffffffL;
+ r[8] = (sp_digit)(t[6] >> 16U);
+ r[8] |= t[7] << 16U;
+ r[8] &= 0x3ffffffL;
+ r[9] = (sp_digit)(t[7] >> 10U);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 26
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 26
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0x3ffffff;
+ s = 26U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 26U) <= (word32)DIGIT_BIT) {
+ s += 26U;
+ r[j] &= 0x3ffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 26) {
+ r[j] &= 0x3ffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 26 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_256.
+ *
+ * p Point of type sp_point_256 (result).
+ * pm Point of type ecc_point.
+ */
+static void sp_256_point_from_ecc_point_10(sp_point_256* p, const ecc_point* pm)
+{
+ XMEMSET(p->x, 0, sizeof(p->x));
+ XMEMSET(p->y, 0, sizeof(p->y));
+ XMEMSET(p->z, 0, sizeof(p->z));
+ sp_256_from_mp(p->x, 10, pm->x);
+ sp_256_from_mp(p->y, 10, pm->y);
+ sp_256_from_mp(p->z, 10, pm->z);
+ p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_256_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 26
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 10);
+ r->used = 10;
+ mp_clamp(r);
+#elif DIGIT_BIT < 26
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 10; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 26) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 26 - s;
+ }
+ r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 10; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 26 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 26 - s;
+ }
+ else {
+ s += 26;
+ }
+ }
+ r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Convert a point of type sp_point_256 to type ecc_point.
+ *
+ * p Point of type sp_point_256.
+ * pm Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_256_point_to_ecc_point_10(const sp_point_256* p, ecc_point* pm)
+{
+ int err;
+
+ err = sp_256_to_mp(p->x, pm->x);
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, pm->y);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, pm->z);
+ }
+
+ return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_256_mul_10(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i, j, k;
+ int64_t c;
+
+ c = ((int64_t)a[9]) * b[9];
+ r[19] = (sp_digit)(c >> 26);
+ c = (c & 0x3ffffff) << 26;
+ for (k = 17; k >= 0; k--) {
+ for (i = 9; i >= 0; i--) {
+ j = k - i;
+ if (j >= 10) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int64_t)a[i]) * b[j];
+ }
+ r[k + 2] += c >> 52;
+ r[k + 1] = (c >> 26) & 0x3ffffff;
+ c = (c & 0x3ffffff) << 26;
+ }
+ r[0] = (sp_digit)(c >> 26);
+}
+
+#else
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_256_mul_10(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int64_t t0 = ((int64_t)a[ 0]) * b[ 0];
+ int64_t t1 = ((int64_t)a[ 0]) * b[ 1]
+ + ((int64_t)a[ 1]) * b[ 0];
+ int64_t t2 = ((int64_t)a[ 0]) * b[ 2]
+ + ((int64_t)a[ 1]) * b[ 1]
+ + ((int64_t)a[ 2]) * b[ 0];
+ int64_t t3 = ((int64_t)a[ 0]) * b[ 3]
+ + ((int64_t)a[ 1]) * b[ 2]
+ + ((int64_t)a[ 2]) * b[ 1]
+ + ((int64_t)a[ 3]) * b[ 0];
+ int64_t t4 = ((int64_t)a[ 0]) * b[ 4]
+ + ((int64_t)a[ 1]) * b[ 3]
+ + ((int64_t)a[ 2]) * b[ 2]
+ + ((int64_t)a[ 3]) * b[ 1]
+ + ((int64_t)a[ 4]) * b[ 0];
+ int64_t t5 = ((int64_t)a[ 0]) * b[ 5]
+ + ((int64_t)a[ 1]) * b[ 4]
+ + ((int64_t)a[ 2]) * b[ 3]
+ + ((int64_t)a[ 3]) * b[ 2]
+ + ((int64_t)a[ 4]) * b[ 1]
+ + ((int64_t)a[ 5]) * b[ 0];
+ int64_t t6 = ((int64_t)a[ 0]) * b[ 6]
+ + ((int64_t)a[ 1]) * b[ 5]
+ + ((int64_t)a[ 2]) * b[ 4]
+ + ((int64_t)a[ 3]) * b[ 3]
+ + ((int64_t)a[ 4]) * b[ 2]
+ + ((int64_t)a[ 5]) * b[ 1]
+ + ((int64_t)a[ 6]) * b[ 0];
+ int64_t t7 = ((int64_t)a[ 0]) * b[ 7]
+ + ((int64_t)a[ 1]) * b[ 6]
+ + ((int64_t)a[ 2]) * b[ 5]
+ + ((int64_t)a[ 3]) * b[ 4]
+ + ((int64_t)a[ 4]) * b[ 3]
+ + ((int64_t)a[ 5]) * b[ 2]
+ + ((int64_t)a[ 6]) * b[ 1]
+ + ((int64_t)a[ 7]) * b[ 0];
+ int64_t t8 = ((int64_t)a[ 0]) * b[ 8]
+ + ((int64_t)a[ 1]) * b[ 7]
+ + ((int64_t)a[ 2]) * b[ 6]
+ + ((int64_t)a[ 3]) * b[ 5]
+ + ((int64_t)a[ 4]) * b[ 4]
+ + ((int64_t)a[ 5]) * b[ 3]
+ + ((int64_t)a[ 6]) * b[ 2]
+ + ((int64_t)a[ 7]) * b[ 1]
+ + ((int64_t)a[ 8]) * b[ 0];
+ int64_t t9 = ((int64_t)a[ 0]) * b[ 9]
+ + ((int64_t)a[ 1]) * b[ 8]
+ + ((int64_t)a[ 2]) * b[ 7]
+ + ((int64_t)a[ 3]) * b[ 6]
+ + ((int64_t)a[ 4]) * b[ 5]
+ + ((int64_t)a[ 5]) * b[ 4]
+ + ((int64_t)a[ 6]) * b[ 3]
+ + ((int64_t)a[ 7]) * b[ 2]
+ + ((int64_t)a[ 8]) * b[ 1]
+ + ((int64_t)a[ 9]) * b[ 0];
+ int64_t t10 = ((int64_t)a[ 1]) * b[ 9]
+ + ((int64_t)a[ 2]) * b[ 8]
+ + ((int64_t)a[ 3]) * b[ 7]
+ + ((int64_t)a[ 4]) * b[ 6]
+ + ((int64_t)a[ 5]) * b[ 5]
+ + ((int64_t)a[ 6]) * b[ 4]
+ + ((int64_t)a[ 7]) * b[ 3]
+ + ((int64_t)a[ 8]) * b[ 2]
+ + ((int64_t)a[ 9]) * b[ 1];
+ int64_t t11 = ((int64_t)a[ 2]) * b[ 9]
+ + ((int64_t)a[ 3]) * b[ 8]
+ + ((int64_t)a[ 4]) * b[ 7]
+ + ((int64_t)a[ 5]) * b[ 6]
+ + ((int64_t)a[ 6]) * b[ 5]
+ + ((int64_t)a[ 7]) * b[ 4]
+ + ((int64_t)a[ 8]) * b[ 3]
+ + ((int64_t)a[ 9]) * b[ 2];
+ int64_t t12 = ((int64_t)a[ 3]) * b[ 9]
+ + ((int64_t)a[ 4]) * b[ 8]
+ + ((int64_t)a[ 5]) * b[ 7]
+ + ((int64_t)a[ 6]) * b[ 6]
+ + ((int64_t)a[ 7]) * b[ 5]
+ + ((int64_t)a[ 8]) * b[ 4]
+ + ((int64_t)a[ 9]) * b[ 3];
+ int64_t t13 = ((int64_t)a[ 4]) * b[ 9]
+ + ((int64_t)a[ 5]) * b[ 8]
+ + ((int64_t)a[ 6]) * b[ 7]
+ + ((int64_t)a[ 7]) * b[ 6]
+ + ((int64_t)a[ 8]) * b[ 5]
+ + ((int64_t)a[ 9]) * b[ 4];
+ int64_t t14 = ((int64_t)a[ 5]) * b[ 9]
+ + ((int64_t)a[ 6]) * b[ 8]
+ + ((int64_t)a[ 7]) * b[ 7]
+ + ((int64_t)a[ 8]) * b[ 6]
+ + ((int64_t)a[ 9]) * b[ 5];
+ int64_t t15 = ((int64_t)a[ 6]) * b[ 9]
+ + ((int64_t)a[ 7]) * b[ 8]
+ + ((int64_t)a[ 8]) * b[ 7]
+ + ((int64_t)a[ 9]) * b[ 6];
+ int64_t t16 = ((int64_t)a[ 7]) * b[ 9]
+ + ((int64_t)a[ 8]) * b[ 8]
+ + ((int64_t)a[ 9]) * b[ 7];
+ int64_t t17 = ((int64_t)a[ 8]) * b[ 9]
+ + ((int64_t)a[ 9]) * b[ 8];
+ int64_t t18 = ((int64_t)a[ 9]) * b[ 9];
+
+ t1 += t0 >> 26; r[ 0] = t0 & 0x3ffffff;
+ t2 += t1 >> 26; r[ 1] = t1 & 0x3ffffff;
+ t3 += t2 >> 26; r[ 2] = t2 & 0x3ffffff;
+ t4 += t3 >> 26; r[ 3] = t3 & 0x3ffffff;
+ t5 += t4 >> 26; r[ 4] = t4 & 0x3ffffff;
+ t6 += t5 >> 26; r[ 5] = t5 & 0x3ffffff;
+ t7 += t6 >> 26; r[ 6] = t6 & 0x3ffffff;
+ t8 += t7 >> 26; r[ 7] = t7 & 0x3ffffff;
+ t9 += t8 >> 26; r[ 8] = t8 & 0x3ffffff;
+ t10 += t9 >> 26; r[ 9] = t9 & 0x3ffffff;
+ t11 += t10 >> 26; r[10] = t10 & 0x3ffffff;
+ t12 += t11 >> 26; r[11] = t11 & 0x3ffffff;
+ t13 += t12 >> 26; r[12] = t12 & 0x3ffffff;
+ t14 += t13 >> 26; r[13] = t13 & 0x3ffffff;
+ t15 += t14 >> 26; r[14] = t14 & 0x3ffffff;
+ t16 += t15 >> 26; r[15] = t15 & 0x3ffffff;
+ t17 += t16 >> 26; r[16] = t16 & 0x3ffffff;
+ t18 += t17 >> 26; r[17] = t17 & 0x3ffffff;
+ r[19] = (sp_digit)(t18 >> 26);
+ r[18] = t18 & 0x3ffffff;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#define sp_256_mont_reduce_order_10 sp_256_mont_reduce_10
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_256_cmp_10(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=9; i>=0; i--) {
+ r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#else
+ r |= (a[ 9] - b[ 9]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 8] - b[ 8]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 7] - b[ 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 6] - b[ 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 5] - b[ 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 4] - b[ 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 3] - b[ 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 2] - b[ 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 1] - b[ 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 0] - b[ 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+#endif /* WOLFSSL_SP_SMALL */
+
+ return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static void sp_256_cond_sub_10(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ r[i] = a[i] - (b[i] & m);
+ }
+#else
+ r[ 0] = a[ 0] - (b[ 0] & m);
+ r[ 1] = a[ 1] - (b[ 1] & m);
+ r[ 2] = a[ 2] - (b[ 2] & m);
+ r[ 3] = a[ 3] - (b[ 3] & m);
+ r[ 4] = a[ 4] - (b[ 4] & m);
+ r[ 5] = a[ 5] - (b[ 5] & m);
+ r[ 6] = a[ 6] - (b[ 6] & m);
+ r[ 7] = a[ 7] - (b[ 7] & m);
+ r[ 8] = a[ 8] - (b[ 8] & m);
+ r[ 9] = a[ 9] - (b[ 9] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a,
+ const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ t += (tb * a[i]) + r[i];
+ r[i] = t & 0x3ffffff;
+ t >>= 26;
+ }
+ r[10] += t;
+#else
+ int64_t tb = b;
+ int64_t t[10];
+
+ t[ 0] = tb * a[ 0];
+ t[ 1] = tb * a[ 1];
+ t[ 2] = tb * a[ 2];
+ t[ 3] = tb * a[ 3];
+ t[ 4] = tb * a[ 4];
+ t[ 5] = tb * a[ 5];
+ t[ 6] = tb * a[ 6];
+ t[ 7] = tb * a[ 7];
+ t[ 8] = tb * a[ 8];
+ t[ 9] = tb * a[ 9];
+ r[ 0] += (sp_digit) (t[ 0] & 0x3ffffff);
+ r[ 1] += (sp_digit)((t[ 0] >> 26) + (t[ 1] & 0x3ffffff));
+ r[ 2] += (sp_digit)((t[ 1] >> 26) + (t[ 2] & 0x3ffffff));
+ r[ 3] += (sp_digit)((t[ 2] >> 26) + (t[ 3] & 0x3ffffff));
+ r[ 4] += (sp_digit)((t[ 3] >> 26) + (t[ 4] & 0x3ffffff));
+ r[ 5] += (sp_digit)((t[ 4] >> 26) + (t[ 5] & 0x3ffffff));
+ r[ 6] += (sp_digit)((t[ 5] >> 26) + (t[ 6] & 0x3ffffff));
+ r[ 7] += (sp_digit)((t[ 6] >> 26) + (t[ 7] & 0x3ffffff));
+ r[ 8] += (sp_digit)((t[ 7] >> 26) + (t[ 8] & 0x3ffffff));
+ r[ 9] += (sp_digit)((t[ 8] >> 26) + (t[ 9] & 0x3ffffff));
+ r[10] += (sp_digit) (t[ 9] >> 26);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 26.
+ *
+ * a Array of sp_digit to normalize.
+ */
+static void sp_256_norm_10(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ for (i = 0; i < 9; i++) {
+ a[i+1] += a[i] >> 26;
+ a[i] &= 0x3ffffff;
+ }
+#else
+ a[1] += a[0] >> 26; a[0] &= 0x3ffffff;
+ a[2] += a[1] >> 26; a[1] &= 0x3ffffff;
+ a[3] += a[2] >> 26; a[2] &= 0x3ffffff;
+ a[4] += a[3] >> 26; a[3] &= 0x3ffffff;
+ a[5] += a[4] >> 26; a[4] &= 0x3ffffff;
+ a[6] += a[5] >> 26; a[5] &= 0x3ffffff;
+ a[7] += a[6] >> 26; a[6] &= 0x3ffffff;
+ a[8] += a[7] >> 26; a[7] &= 0x3ffffff;
+ a[9] += a[8] >> 26; a[8] &= 0x3ffffff;
+#endif
+}
+
+/* Shift the result in the high 256 bits down to the bottom.
+ *
+ * r A single precision number.
+ * a A single precision number.
+ */
+static void sp_256_mont_shift_10(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ sp_digit n, s;
+
+ s = a[10];
+ n = a[9] >> 22;
+ for (i = 0; i < 9; i++) {
+ n += (s & 0x3ffffff) << 4;
+ r[i] = n & 0x3ffffff;
+ n >>= 26;
+ s = a[11 + i] + (s >> 26);
+ }
+ n += s << 4;
+ r[9] = n;
+#else
+ sp_digit n, s;
+
+ s = a[10]; n = a[9] >> 22;
+ n += (s & 0x3ffffff) << 4; r[ 0] = n & 0x3ffffff;
+ n >>= 26; s = a[11] + (s >> 26);
+ n += (s & 0x3ffffff) << 4; r[ 1] = n & 0x3ffffff;
+ n >>= 26; s = a[12] + (s >> 26);
+ n += (s & 0x3ffffff) << 4; r[ 2] = n & 0x3ffffff;
+ n >>= 26; s = a[13] + (s >> 26);
+ n += (s & 0x3ffffff) << 4; r[ 3] = n & 0x3ffffff;
+ n >>= 26; s = a[14] + (s >> 26);
+ n += (s & 0x3ffffff) << 4; r[ 4] = n & 0x3ffffff;
+ n >>= 26; s = a[15] + (s >> 26);
+ n += (s & 0x3ffffff) << 4; r[ 5] = n & 0x3ffffff;
+ n >>= 26; s = a[16] + (s >> 26);
+ n += (s & 0x3ffffff) << 4; r[ 6] = n & 0x3ffffff;
+ n >>= 26; s = a[17] + (s >> 26);
+ n += (s & 0x3ffffff) << 4; r[ 7] = n & 0x3ffffff;
+ n >>= 26; s = a[18] + (s >> 26);
+ n += (s & 0x3ffffff) << 4; r[ 8] = n & 0x3ffffff;
+ n >>= 26; s = a[19] + (s >> 26);
+ n += s << 4; r[ 9] = n;
+#endif /* WOLFSSL_SP_SMALL */
+ XMEMSET(&r[10], 0, sizeof(*r) * 10U);
+}
+
+/* Reduce the number back to 256 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_256_mont_reduce_10(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+ int i;
+ sp_digit mu;
+
+ if (mp != 1) {
+ for (i=0; i<9; i++) {
+ mu = (a[i] * mp) & 0x3ffffff;
+ sp_256_mul_add_10(a+i, m, mu);
+ a[i+1] += a[i] >> 26;
+ }
+ mu = (a[i] * mp) & 0x3fffffL;
+ sp_256_mul_add_10(a+i, m, mu);
+ a[i+1] += a[i] >> 26;
+ a[i] &= 0x3ffffff;
+ }
+ else {
+ for (i=0; i<9; i++) {
+ mu = a[i] & 0x3ffffff;
+ sp_256_mul_add_10(a+i, p256_mod, mu);
+ a[i+1] += a[i] >> 26;
+ }
+ mu = a[i] & 0x3fffffL;
+ sp_256_mul_add_10(a+i, p256_mod, mu);
+ a[i+1] += a[i] >> 26;
+ a[i] &= 0x3ffffff;
+ }
+
+ sp_256_mont_shift_10(a, a);
+ sp_256_cond_sub_10(a, a, m, 0 - (((a[9] >> 22) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_10(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_256_mont_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_256_mul_10(r, a, b);
+ sp_256_mont_reduce_10(r, m, mp);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_256_sqr_10(sp_digit* r, const sp_digit* a)
+{
+ int i, j, k;
+ int64_t c;
+
+ c = ((int64_t)a[9]) * a[9];
+ r[19] = (sp_digit)(c >> 26);
+ c = (c & 0x3ffffff) << 26;
+ for (k = 17; k >= 0; k--) {
+ for (i = 9; i >= 0; i--) {
+ j = k - i;
+ if (j >= 10 || i <= j) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int64_t)a[i]) * a[j] * 2;
+ }
+ if (i == j) {
+ c += ((int64_t)a[i]) * a[i];
+ }
+
+ r[k + 2] += c >> 52;
+ r[k + 1] = (c >> 26) & 0x3ffffff;
+ c = (c & 0x3ffffff) << 26;
+ }
+ r[0] = (sp_digit)(c >> 26);
+}
+
+#else
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_256_sqr_10(sp_digit* r, const sp_digit* a)
+{
+ int64_t t0 = ((int64_t)a[ 0]) * a[ 0];
+ int64_t t1 = (((int64_t)a[ 0]) * a[ 1]) * 2;
+ int64_t t2 = (((int64_t)a[ 0]) * a[ 2]) * 2
+ + ((int64_t)a[ 1]) * a[ 1];
+ int64_t t3 = (((int64_t)a[ 0]) * a[ 3]
+ + ((int64_t)a[ 1]) * a[ 2]) * 2;
+ int64_t t4 = (((int64_t)a[ 0]) * a[ 4]
+ + ((int64_t)a[ 1]) * a[ 3]) * 2
+ + ((int64_t)a[ 2]) * a[ 2];
+ int64_t t5 = (((int64_t)a[ 0]) * a[ 5]
+ + ((int64_t)a[ 1]) * a[ 4]
+ + ((int64_t)a[ 2]) * a[ 3]) * 2;
+ int64_t t6 = (((int64_t)a[ 0]) * a[ 6]
+ + ((int64_t)a[ 1]) * a[ 5]
+ + ((int64_t)a[ 2]) * a[ 4]) * 2
+ + ((int64_t)a[ 3]) * a[ 3];
+ int64_t t7 = (((int64_t)a[ 0]) * a[ 7]
+ + ((int64_t)a[ 1]) * a[ 6]
+ + ((int64_t)a[ 2]) * a[ 5]
+ + ((int64_t)a[ 3]) * a[ 4]) * 2;
+ int64_t t8 = (((int64_t)a[ 0]) * a[ 8]
+ + ((int64_t)a[ 1]) * a[ 7]
+ + ((int64_t)a[ 2]) * a[ 6]
+ + ((int64_t)a[ 3]) * a[ 5]) * 2
+ + ((int64_t)a[ 4]) * a[ 4];
+ int64_t t9 = (((int64_t)a[ 0]) * a[ 9]
+ + ((int64_t)a[ 1]) * a[ 8]
+ + ((int64_t)a[ 2]) * a[ 7]
+ + ((int64_t)a[ 3]) * a[ 6]
+ + ((int64_t)a[ 4]) * a[ 5]) * 2;
+ int64_t t10 = (((int64_t)a[ 1]) * a[ 9]
+ + ((int64_t)a[ 2]) * a[ 8]
+ + ((int64_t)a[ 3]) * a[ 7]
+ + ((int64_t)a[ 4]) * a[ 6]) * 2
+ + ((int64_t)a[ 5]) * a[ 5];
+ int64_t t11 = (((int64_t)a[ 2]) * a[ 9]
+ + ((int64_t)a[ 3]) * a[ 8]
+ + ((int64_t)a[ 4]) * a[ 7]
+ + ((int64_t)a[ 5]) * a[ 6]) * 2;
+ int64_t t12 = (((int64_t)a[ 3]) * a[ 9]
+ + ((int64_t)a[ 4]) * a[ 8]
+ + ((int64_t)a[ 5]) * a[ 7]) * 2
+ + ((int64_t)a[ 6]) * a[ 6];
+ int64_t t13 = (((int64_t)a[ 4]) * a[ 9]
+ + ((int64_t)a[ 5]) * a[ 8]
+ + ((int64_t)a[ 6]) * a[ 7]) * 2;
+ int64_t t14 = (((int64_t)a[ 5]) * a[ 9]
+ + ((int64_t)a[ 6]) * a[ 8]) * 2
+ + ((int64_t)a[ 7]) * a[ 7];
+ int64_t t15 = (((int64_t)a[ 6]) * a[ 9]
+ + ((int64_t)a[ 7]) * a[ 8]) * 2;
+ int64_t t16 = (((int64_t)a[ 7]) * a[ 9]) * 2
+ + ((int64_t)a[ 8]) * a[ 8];
+ int64_t t17 = (((int64_t)a[ 8]) * a[ 9]) * 2;
+ int64_t t18 = ((int64_t)a[ 9]) * a[ 9];
+
+ t1 += t0 >> 26; r[ 0] = t0 & 0x3ffffff;
+ t2 += t1 >> 26; r[ 1] = t1 & 0x3ffffff;
+ t3 += t2 >> 26; r[ 2] = t2 & 0x3ffffff;
+ t4 += t3 >> 26; r[ 3] = t3 & 0x3ffffff;
+ t5 += t4 >> 26; r[ 4] = t4 & 0x3ffffff;
+ t6 += t5 >> 26; r[ 5] = t5 & 0x3ffffff;
+ t7 += t6 >> 26; r[ 6] = t6 & 0x3ffffff;
+ t8 += t7 >> 26; r[ 7] = t7 & 0x3ffffff;
+ t9 += t8 >> 26; r[ 8] = t8 & 0x3ffffff;
+ t10 += t9 >> 26; r[ 9] = t9 & 0x3ffffff;
+ t11 += t10 >> 26; r[10] = t10 & 0x3ffffff;
+ t12 += t11 >> 26; r[11] = t11 & 0x3ffffff;
+ t13 += t12 >> 26; r[12] = t12 & 0x3ffffff;
+ t14 += t13 >> 26; r[13] = t13 & 0x3ffffff;
+ t15 += t14 >> 26; r[14] = t14 & 0x3ffffff;
+ t16 += t15 >> 26; r[15] = t15 & 0x3ffffff;
+ t17 += t16 >> 26; r[16] = t16 & 0x3ffffff;
+ t18 += t17 >> 26; r[17] = t17 & 0x3ffffff;
+ r[19] = (sp_digit)(t18 >> 26);
+ r[18] = t18 & 0x3ffffff;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_10(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_256_sqr_10(r, a);
+ sp_256_mont_reduce_10(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * n Number of times to square.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_n_10(sp_digit* r, const sp_digit* a, int n,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_256_mont_sqr_10(r, a, m, mp);
+ for (; n > 1; n--) {
+ sp_256_mont_sqr_10(r, r, m, mp);
+ }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P256 curve. */
+static const uint32_t p256_mod_minus_2[8] = {
+ 0xfffffffdU,0xffffffffU,0xffffffffU,0x00000000U,0x00000000U,0x00000000U,
+ 0x00000001U,0xffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P256 curve. (r = 1 / a mod m)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_256_mont_inv_10(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 10);
+ for (i=254; i>=0; i--) {
+ sp_256_mont_sqr_10(t, t, p256_mod, p256_mp_mod);
+ if (p256_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
+ sp_256_mont_mul_10(t, t, a, p256_mod, p256_mp_mod);
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 10);
+#else
+ sp_digit* t1 = td;
+ sp_digit* t2 = td + 2 * 10;
+ sp_digit* t3 = td + 4 * 10;
+ /* 0x2 */
+ sp_256_mont_sqr_10(t1, a, p256_mod, p256_mp_mod);
+ /* 0x3 */
+ sp_256_mont_mul_10(t2, t1, a, p256_mod, p256_mp_mod);
+ /* 0xc */
+ sp_256_mont_sqr_n_10(t1, t2, 2, p256_mod, p256_mp_mod);
+ /* 0xd */
+ sp_256_mont_mul_10(t3, t1, a, p256_mod, p256_mp_mod);
+ /* 0xf */
+ sp_256_mont_mul_10(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xf0 */
+ sp_256_mont_sqr_n_10(t1, t2, 4, p256_mod, p256_mp_mod);
+ /* 0xfd */
+ sp_256_mont_mul_10(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xff */
+ sp_256_mont_mul_10(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xff00 */
+ sp_256_mont_sqr_n_10(t1, t2, 8, p256_mod, p256_mp_mod);
+ /* 0xfffd */
+ sp_256_mont_mul_10(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xffff */
+ sp_256_mont_mul_10(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffff0000 */
+ sp_256_mont_sqr_n_10(t1, t2, 16, p256_mod, p256_mp_mod);
+ /* 0xfffffffd */
+ sp_256_mont_mul_10(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff */
+ sp_256_mont_mul_10(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000000 */
+ sp_256_mont_sqr_n_10(t1, t2, 32, p256_mod, p256_mp_mod);
+ /* 0xffffffffffffffff */
+ sp_256_mont_mul_10(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001 */
+ sp_256_mont_mul_10(r, t1, a, p256_mod, p256_mp_mod);
+ /* 0xffffffff000000010000000000000000000000000000000000000000 */
+ sp_256_mont_sqr_n_10(r, r, 160, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */
+ sp_256_mont_mul_10(r, r, t2, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */
+ sp_256_mont_sqr_n_10(r, r, 32, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */
+ sp_256_mont_mul_10(r, r, t3, p256_mod, p256_mp_mod);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r Resulting affine coordinate point.
+ * p Montgomery form projective coordinate point.
+ * t Temporary ordinate data.
+ */
+static void sp_256_map_10(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*10;
+ int32_t n;
+
+ sp_256_mont_inv_10(t1, p->z, t + 2*10);
+
+ sp_256_mont_sqr_10(t2, t1, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t1, t2, t1, p256_mod, p256_mp_mod);
+
+ /* x /= z^2 */
+ sp_256_mont_mul_10(r->x, p->x, t2, p256_mod, p256_mp_mod);
+ XMEMSET(r->x + 10, 0, sizeof(r->x) / 2U);
+ sp_256_mont_reduce_10(r->x, p256_mod, p256_mp_mod);
+ /* Reduce x to less than modulus */
+ n = sp_256_cmp_10(r->x, p256_mod);
+ sp_256_cond_sub_10(r->x, r->x, p256_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_10(r->x);
+
+ /* y /= z^3 */
+ sp_256_mont_mul_10(r->y, p->y, t1, p256_mod, p256_mp_mod);
+ XMEMSET(r->y + 10, 0, sizeof(r->y) / 2U);
+ sp_256_mont_reduce_10(r->y, p256_mod, p256_mp_mod);
+ /* Reduce y to less than modulus */
+ n = sp_256_cmp_10(r->y, p256_mod);
+ sp_256_cond_sub_10(r->y, r->y, p256_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_10(r->y);
+
+ XMEMSET(r->z, 0, sizeof(r->z));
+ r->z[0] = 1;
+
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_256_add_10(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#else
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_256_add_10(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ r[ 0] = a[ 0] + b[ 0];
+ r[ 1] = a[ 1] + b[ 1];
+ r[ 2] = a[ 2] + b[ 2];
+ r[ 3] = a[ 3] + b[ 3];
+ r[ 4] = a[ 4] + b[ 4];
+ r[ 5] = a[ 5] + b[ 5];
+ r[ 6] = a[ 6] + b[ 6];
+ r[ 7] = a[ 7] + b[ 7];
+ r[ 8] = a[ 8] + b[ 8];
+ r[ 9] = a[ 9] + b[ 9];
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r Result of addition.
+ * a First number to add in Montogmery form.
+ * b Second number to add in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_add_10(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ (void)sp_256_add_10(r, a, b);
+ sp_256_norm_10(r);
+ sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_10(r);
+}
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r Result of doubling.
+ * a Number to double in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_dbl_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ (void)sp_256_add_10(r, a, a);
+ sp_256_norm_10(r);
+ sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_10(r);
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r Result of Tripling.
+ * a Number to triple in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_tpl_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ (void)sp_256_add_10(r, a, a);
+ sp_256_norm_10(r);
+ sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_10(r);
+ (void)sp_256_add_10(r, r, a);
+ sp_256_norm_10(r);
+ sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_10(r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_256_sub_10(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_256_sub_10(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ r[ 0] = a[ 0] - b[ 0];
+ r[ 1] = a[ 1] - b[ 1];
+ r[ 2] = a[ 2] - b[ 2];
+ r[ 3] = a[ 3] - b[ 3];
+ r[ 4] = a[ 4] - b[ 4];
+ r[ 5] = a[ 5] - b[ 5];
+ r[ 6] = a[ 6] - b[ 6];
+ r[ 7] = a[ 7] - b[ 7];
+ r[ 8] = a[ 8] - b[ 8];
+ r[ 9] = a[ 9] - b[ 9];
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static void sp_256_cond_add_10(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ r[i] = a[i] + (b[i] & m);
+ }
+#else
+ r[ 0] = a[ 0] + (b[ 0] & m);
+ r[ 1] = a[ 1] + (b[ 1] & m);
+ r[ 2] = a[ 2] + (b[ 2] & m);
+ r[ 3] = a[ 3] + (b[ 3] & m);
+ r[ 4] = a[ 4] + (b[ 4] & m);
+ r[ 5] = a[ 5] + (b[ 5] & m);
+ r[ 6] = a[ 6] + (b[ 6] & m);
+ r[ 7] = a[ 7] + (b[ 7] & m);
+ r[ 8] = a[ 8] + (b[ 8] & m);
+ r[ 9] = a[ 9] + (b[ 9] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r Result of subtration.
+ * a Number to subtract from in Montogmery form.
+ * b Number to subtract with in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_sub_10(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ (void)sp_256_sub_10(r, a, b);
+ sp_256_cond_add_10(r, r, m, r[9] >> 22);
+ sp_256_norm_10(r);
+}
+
+/* Shift number left one bit.
+ * Bottom bit is lost.
+ *
+ * r Result of shift.
+ * a Number to shift.
+ */
+SP_NOINLINE static void sp_256_rshift1_10(sp_digit* r, sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<9; i++) {
+ r[i] = ((a[i] >> 1) | (a[i + 1] << 25)) & 0x3ffffff;
+ }
+#else
+ r[0] = ((a[0] >> 1) | (a[1] << 25)) & 0x3ffffff;
+ r[1] = ((a[1] >> 1) | (a[2] << 25)) & 0x3ffffff;
+ r[2] = ((a[2] >> 1) | (a[3] << 25)) & 0x3ffffff;
+ r[3] = ((a[3] >> 1) | (a[4] << 25)) & 0x3ffffff;
+ r[4] = ((a[4] >> 1) | (a[5] << 25)) & 0x3ffffff;
+ r[5] = ((a[5] >> 1) | (a[6] << 25)) & 0x3ffffff;
+ r[6] = ((a[6] >> 1) | (a[7] << 25)) & 0x3ffffff;
+ r[7] = ((a[7] >> 1) | (a[8] << 25)) & 0x3ffffff;
+ r[8] = ((a[8] >> 1) | (a[9] << 25)) & 0x3ffffff;
+#endif
+ r[9] = a[9] >> 1;
+}
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r Result of division by 2.
+ * a Number to divide.
+ * m Modulus (prime).
+ */
+static void sp_256_div2_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ sp_256_cond_add_10(r, a, m, 0 - (a[0] & 1));
+ sp_256_norm_10(r);
+ sp_256_rshift1_10(r, r);
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r Result of doubling point.
+ * p Point to double.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_10(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*10;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = r->x;
+ y = r->y;
+ z = r->z;
+ /* Put infinity into result. */
+ if (r != p) {
+ r->infinity = p->infinity;
+ }
+
+ /* T1 = Z * Z */
+ sp_256_mont_sqr_10(t1, p->z, p256_mod, p256_mp_mod);
+ /* Z = Y * Z */
+ sp_256_mont_mul_10(z, p->y, p->z, p256_mod, p256_mp_mod);
+ /* Z = 2Z */
+ sp_256_mont_dbl_10(z, z, p256_mod);
+ /* T2 = X - T1 */
+ sp_256_mont_sub_10(t2, p->x, t1, p256_mod);
+ /* T1 = X + T1 */
+ sp_256_mont_add_10(t1, p->x, t1, p256_mod);
+ /* T2 = T1 * T2 */
+ sp_256_mont_mul_10(t2, t1, t2, p256_mod, p256_mp_mod);
+ /* T1 = 3T2 */
+ sp_256_mont_tpl_10(t1, t2, p256_mod);
+ /* Y = 2Y */
+ sp_256_mont_dbl_10(y, p->y, p256_mod);
+ /* Y = Y * Y */
+ sp_256_mont_sqr_10(y, y, p256_mod, p256_mp_mod);
+ /* T2 = Y * Y */
+ sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod);
+ /* T2 = T2/2 */
+ sp_256_div2_10(t2, t2, p256_mod);
+ /* Y = Y * X */
+ sp_256_mont_mul_10(y, y, p->x, p256_mod, p256_mp_mod);
+ /* X = T1 * T1 */
+ sp_256_mont_sqr_10(x, t1, p256_mod, p256_mp_mod);
+ /* X = X - Y */
+ sp_256_mont_sub_10(x, x, y, p256_mod);
+ /* X = X - Y */
+ sp_256_mont_sub_10(x, x, y, p256_mod);
+ /* Y = Y - X */
+ sp_256_mont_sub_10(y, y, x, p256_mod);
+ /* Y = Y * T1 */
+ sp_256_mont_mul_10(y, y, t1, p256_mod, p256_mp_mod);
+ /* Y = Y - T2 */
+ sp_256_mont_sub_10(y, y, t2, p256_mod);
+}
+
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a First number to compare.
+ * b Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_256_cmp_equal_10(const sp_digit* a, const sp_digit* b)
+{
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+ (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) |
+ (a[8] ^ b[8]) | (a[9] ^ b[9])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_10(sp_point_256* r, const sp_point_256* p, const sp_point_256* q,
+ sp_digit* t)
+{
+ const sp_point_256* ap[2];
+ sp_point_256* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*10;
+ sp_digit* t3 = t + 4*10;
+ sp_digit* t4 = t + 6*10;
+ sp_digit* t5 = t + 8*10;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Ensure only the first point is the same as the result. */
+ if (q == r) {
+ const sp_point_256* a = p;
+ p = q;
+ q = a;
+ }
+
+ /* Check double */
+ (void)sp_256_sub_10(t1, p256_mod, q->y);
+ sp_256_norm_10(t1);
+ if ((sp_256_cmp_equal_10(p->x, q->x) & sp_256_cmp_equal_10(p->z, q->z) &
+ (sp_256_cmp_equal_10(p->y, q->y) | sp_256_cmp_equal_10(p->y, t1))) != 0) {
+ sp_256_proj_point_dbl_10(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_256));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<10; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<10; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<10; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U1 = X1*Z2^2 */
+ sp_256_mont_sqr_10(t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t3, t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t1, t1, x, p256_mod, p256_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_10(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_256_mont_mul_10(t3, t3, y, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_10(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - U1 */
+ sp_256_mont_sub_10(t2, t2, t1, p256_mod);
+ /* R = S2 - S1 */
+ sp_256_mont_sub_10(t4, t4, t3, p256_mod);
+ /* Z3 = H*Z1*Z2 */
+ sp_256_mont_mul_10(z, z, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(z, z, t2, p256_mod, p256_mp_mod);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ sp_256_mont_sqr_10(x, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_10(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(y, t1, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_10(x, x, t5, p256_mod);
+ sp_256_mont_dbl_10(t1, y, p256_mod);
+ sp_256_mont_sub_10(x, x, t1, p256_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ sp_256_mont_sub_10(y, y, x, p256_mod);
+ sp_256_mont_mul_10(y, y, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t5, t5, t3, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_10(y, y, t5, p256_mod);
+ }
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_10(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifdef WOLFSSL_SP_NO_MALLOC
+ sp_point_256 t[3];
+ sp_digit tmp[2 * 10 * 5];
+#else
+ sp_point_256* t;
+ sp_digit* tmp;
+#endif
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ (void)heap;
+
+#ifndef WOLFSSL_SP_NO_MALLOC
+ t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 3, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMSET(t, 0, sizeof(sp_point_256) * 3);
+
+ /* t[0] = {0, 0, 1} * norm */
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ err = sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod);
+ }
+ if (err == MP_OKAY)
+ err = sp_256_mod_mul_norm_10(t[1].y, g->y, p256_mod);
+ if (err == MP_OKAY)
+ err = sp_256_mod_mul_norm_10(t[1].z, g->z, p256_mod);
+
+ if (err == MP_OKAY) {
+ i = 9;
+ c = 22;
+ n = k[i--] << (26 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1)
+ break;
+
+ n = k[i--];
+ c = 26;
+ }
+
+ y = (n >> 25) & 1;
+ n <<= 1;
+
+ sp_256_proj_point_add_10(&t[y^1], &t[0], &t[1], tmp);
+
+ XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) +
+ ((size_t)&t[1] & addr_mask[y])),
+ sizeof(sp_point_256));
+ sp_256_proj_point_dbl_10(&t[2], &t[2], tmp);
+ XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
+ ((size_t)&t[1] & addr_mask[y])), &t[2],
+ sizeof(sp_point_256));
+ }
+
+ if (map != 0) {
+ sp_256_map_10(r, &t[0], tmp);
+ }
+ else {
+ XMEMCPY(r, &t[0], sizeof(sp_point_256));
+ }
+ }
+
+#ifndef WOLFSSL_SP_NO_MALLOC
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5);
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_point_256) * 3);
+ XFREE(t, NULL, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmp, sizeof(tmp));
+ ForceZero(t, sizeof(t));
+#endif
+
+ return err;
+}
+
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_10(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+ int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 t[3];
+ sp_digit tmp[2 * 10 * 5];
+#else
+ sp_point_256* t;
+ sp_digit* tmp;
+#endif
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ (void)heap;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_point*)XMALLOC(sizeof(*t) * 3, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#endif
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ t[1].infinity = 0;
+ err = sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod);
+ }
+ if (err == MP_OKAY)
+ err = sp_256_mod_mul_norm_10(t[1].y, g->y, p256_mod);
+ if (err == MP_OKAY)
+ err = sp_256_mod_mul_norm_10(t[1].z, g->z, p256_mod);
+
+ if (err == MP_OKAY) {
+ i = 9;
+ c = 22;
+ n = k[i--] << (26 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1)
+ break;
+
+ n = k[i--];
+ c = 26;
+ }
+
+ y = (n >> 25) & 1;
+ n <<= 1;
+
+ sp_256_proj_point_add_10(&t[y^1], &t[0], &t[1], tmp);
+
+ XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) +
+ ((size_t)&t[1] & addr_mask[y])), sizeof(t[2]));
+ sp_256_proj_point_dbl_10(&t[2], &t[2], tmp);
+ XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
+ ((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
+ }
+
+ if (map != 0) {
+ sp_256_map_10(r, &t[0], tmp);
+ }
+ else {
+ XMEMCPY(r, &t[0], sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5);
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+ }
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_point_256) * 3);
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmp, sizeof(tmp));
+ ForceZero(t, sizeof(t));
+#endif
+
+ return err;
+}
+
+#else
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_256 {
+ sp_digit x[10];
+ sp_digit y[10];
+} sp_table_entry_256;
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_fast_10(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+ int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 td[16];
+ sp_point_256 rtd;
+ sp_digit tmpd[2 * 10 * 5];
+#endif
+ sp_point_256* t;
+ sp_point_256* rt;
+ sp_digit* tmp;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_10(heap, rtd, rt);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 16, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ t = td;
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ (void)sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod);
+ (void)sp_256_mod_mul_norm_10(t[1].y, g->y, p256_mod);
+ (void)sp_256_mod_mul_norm_10(t[1].z, g->z, p256_mod);
+ t[1].infinity = 0;
+ sp_256_proj_point_dbl_10(&t[ 2], &t[ 1], tmp);
+ t[ 2].infinity = 0;
+ sp_256_proj_point_add_10(&t[ 3], &t[ 2], &t[ 1], tmp);
+ t[ 3].infinity = 0;
+ sp_256_proj_point_dbl_10(&t[ 4], &t[ 2], tmp);
+ t[ 4].infinity = 0;
+ sp_256_proj_point_add_10(&t[ 5], &t[ 3], &t[ 2], tmp);
+ t[ 5].infinity = 0;
+ sp_256_proj_point_dbl_10(&t[ 6], &t[ 3], tmp);
+ t[ 6].infinity = 0;
+ sp_256_proj_point_add_10(&t[ 7], &t[ 4], &t[ 3], tmp);
+ t[ 7].infinity = 0;
+ sp_256_proj_point_dbl_10(&t[ 8], &t[ 4], tmp);
+ t[ 8].infinity = 0;
+ sp_256_proj_point_add_10(&t[ 9], &t[ 5], &t[ 4], tmp);
+ t[ 9].infinity = 0;
+ sp_256_proj_point_dbl_10(&t[10], &t[ 5], tmp);
+ t[10].infinity = 0;
+ sp_256_proj_point_add_10(&t[11], &t[ 6], &t[ 5], tmp);
+ t[11].infinity = 0;
+ sp_256_proj_point_dbl_10(&t[12], &t[ 6], tmp);
+ t[12].infinity = 0;
+ sp_256_proj_point_add_10(&t[13], &t[ 7], &t[ 6], tmp);
+ t[13].infinity = 0;
+ sp_256_proj_point_dbl_10(&t[14], &t[ 7], tmp);
+ t[14].infinity = 0;
+ sp_256_proj_point_add_10(&t[15], &t[ 8], &t[ 7], tmp);
+ t[15].infinity = 0;
+
+ i = 8;
+ n = k[i+1] << 6;
+ c = 18;
+ y = n >> 24;
+ XMEMCPY(rt, &t[y], sizeof(sp_point_256));
+ n <<= 8;
+ for (; i>=0 || c>=4; ) {
+ if (c < 4) {
+ n |= k[i--] << (6 - c);
+ c += 26;
+ }
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+
+ sp_256_proj_point_dbl_10(rt, rt, tmp);
+ sp_256_proj_point_dbl_10(rt, rt, tmp);
+ sp_256_proj_point_dbl_10(rt, rt, tmp);
+ sp_256_proj_point_dbl_10(rt, rt, tmp);
+
+ sp_256_proj_point_add_10(rt, rt, &t[y], tmp);
+ }
+
+ if (map != 0) {
+ sp_256_map_10(r, rt, tmp);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5);
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+ }
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_point_256) * 16);
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmpd, sizeof(tmpd));
+ ForceZero(td, sizeof(td));
+#endif
+ sp_256_point_free_10(rt, 1, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_n_10(sp_point_256* p, int n, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*10;
+ sp_digit* b = t + 4*10;
+ sp_digit* t1 = t + 6*10;
+ sp_digit* t2 = t + 8*10;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = p->x;
+ y = p->y;
+ z = p->z;
+
+ /* Y = 2*Y */
+ sp_256_mont_dbl_10(y, y, p256_mod);
+ /* W = Z^4 */
+ sp_256_mont_sqr_10(w, z, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_10(w, w, p256_mod, p256_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+ while (--n > 0)
+#else
+ while (--n >= 0)
+#endif
+ {
+ /* A = 3*(X^2 - W) */
+ sp_256_mont_sqr_10(t1, x, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_10(t1, t1, w, p256_mod);
+ sp_256_mont_tpl_10(a, t1, p256_mod);
+ /* B = X*Y^2 */
+ sp_256_mont_sqr_10(t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(b, t1, x, p256_mod, p256_mp_mod);
+ /* X = A^2 - 2B */
+ sp_256_mont_sqr_10(x, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_10(t2, b, p256_mod);
+ sp_256_mont_sub_10(x, x, t2, p256_mod);
+ /* Z = Z*Y */
+ sp_256_mont_mul_10(z, z, y, p256_mod, p256_mp_mod);
+ /* t2 = Y^4 */
+ sp_256_mont_sqr_10(t1, t1, p256_mod, p256_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+ if (n != 0)
+#endif
+ {
+ /* W = W*Y^4 */
+ sp_256_mont_mul_10(w, w, t1, p256_mod, p256_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_256_mont_sub_10(y, b, x, p256_mod);
+ sp_256_mont_mul_10(y, y, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_10(y, y, p256_mod);
+ sp_256_mont_sub_10(y, y, t1, p256_mod);
+ }
+#ifndef WOLFSSL_SP_SMALL
+ /* A = 3*(X^2 - W) */
+ sp_256_mont_sqr_10(t1, x, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_10(t1, t1, w, p256_mod);
+ sp_256_mont_tpl_10(a, t1, p256_mod);
+ /* B = X*Y^2 */
+ sp_256_mont_sqr_10(t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(b, t1, x, p256_mod, p256_mp_mod);
+ /* X = A^2 - 2B */
+ sp_256_mont_sqr_10(x, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_10(t2, b, p256_mod);
+ sp_256_mont_sub_10(x, x, t2, p256_mod);
+ /* Z = Z*Y */
+ sp_256_mont_mul_10(z, z, y, p256_mod, p256_mp_mod);
+ /* t2 = Y^4 */
+ sp_256_mont_sqr_10(t1, t1, p256_mod, p256_mp_mod);
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_256_mont_sub_10(y, b, x, p256_mod);
+ sp_256_mont_mul_10(y, y, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_10(y, y, p256_mod);
+ sp_256_mont_sub_10(y, y, t1, p256_mod);
+#endif
+ /* Y = Y/2 */
+ sp_256_div2_10(y, y, p256_mod);
+}
+
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_qz1_10(sp_point_256* r, const sp_point_256* p,
+ const sp_point_256* q, sp_digit* t)
+{
+ const sp_point_256* ap[2];
+ sp_point_256* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*10;
+ sp_digit* t3 = t + 4*10;
+ sp_digit* t4 = t + 6*10;
+ sp_digit* t5 = t + 8*10;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Check double */
+ (void)sp_256_sub_10(t1, p256_mod, q->y);
+ sp_256_norm_10(t1);
+ if ((sp_256_cmp_equal_10(p->x, q->x) & sp_256_cmp_equal_10(p->z, q->z) &
+ (sp_256_cmp_equal_10(p->y, q->y) | sp_256_cmp_equal_10(p->y, t1))) != 0) {
+ sp_256_proj_point_dbl_10(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_256));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<10; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<10; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<10; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_10(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_10(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - X1 */
+ sp_256_mont_sub_10(t2, t2, x, p256_mod);
+ /* R = S2 - Y1 */
+ sp_256_mont_sub_10(t4, t4, y, p256_mod);
+ /* Z3 = H*Z1 */
+ sp_256_mont_mul_10(z, z, t2, p256_mod, p256_mp_mod);
+ /* X3 = R^2 - H^3 - 2*X1*H^2 */
+ sp_256_mont_sqr_10(t1, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_10(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t3, x, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_10(x, t1, t5, p256_mod);
+ sp_256_mont_dbl_10(t1, t3, p256_mod);
+ sp_256_mont_sub_10(x, x, t1, p256_mod);
+ /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+ sp_256_mont_sub_10(t3, t3, x, p256_mod);
+ sp_256_mont_mul_10(t3, t3, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t5, t5, y, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_10(y, t3, t5, p256_mod);
+ }
+}
+
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a Point to convert.
+ * t Temporary data.
+ */
+static void sp_256_proj_to_affine_10(sp_point_256* a, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2 * 10;
+ sp_digit* tmp = t + 4 * 10;
+
+ sp_256_mont_inv_10(t1, a->z, tmp);
+
+ sp_256_mont_sqr_10(t2, t1, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t1, t2, t1, p256_mod, p256_mp_mod);
+
+ sp_256_mont_mul_10(a->x, a->x, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(a->y, a->y, t1, p256_mod, p256_mp_mod);
+ XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
+}
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_256_gen_stripe_table_10(const sp_point_256* a,
+ sp_table_entry_256* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 td, s1d, s2d;
+#endif
+ sp_point_256* t;
+ sp_point_256* s1 = NULL;
+ sp_point_256* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_10(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_10(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_10(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_10(t->x, a->x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_10(t->y, a->y, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_10(t->z, a->z, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_256_proj_to_affine_10(t, tmp);
+
+ XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<8; i++) {
+ sp_256_proj_point_dbl_n_10(t, 32, tmp);
+ sp_256_proj_to_affine_10(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<8; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_256_proj_point_add_qz1_10(t, s1, s2, tmp);
+ sp_256_proj_to_affine_10(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_256_point_free_10(s2, 0, heap);
+ sp_256_point_free_10(s1, 0, heap);
+ sp_256_point_free_10( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_stripe_10(sp_point_256* r, const sp_point_256* g,
+ const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 rtd;
+ sp_point_256 pd;
+ sp_digit td[2 * 10 * 5];
+#endif
+ sp_point_256* rt;
+ sp_point_256* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_256_point_new_10(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_10(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
+ XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+ y = 0;
+ for (j=0,x=31; j<8; j++,x+=32) {
+ y |= ((k[x / 26] >> (x % 26)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=30; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<8; j++,x+=32) {
+ y |= ((k[x / 26] >> (x % 26)) & 1) << j;
+ }
+
+ sp_256_proj_point_dbl_10(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_256_proj_point_add_qz1_10(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_256_map_10(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_10(p, 0, heap);
+ sp_256_point_free_10(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_256_t {
+ sp_digit x[10];
+ sp_digit y[10];
+ sp_table_entry_256 table[256];
+ uint32_t cnt;
+ int set;
+} sp_cache_256_t;
+
+static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_256_last = -1;
+static THREAD_LS_T int sp_cache_256_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex_256 = 0;
+ static wolfSSL_Mutex sp_cache_256_lock;
+#endif
+
+static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_256_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache_256[i].set = 0;
+ }
+ sp_cache_256_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache_256[i].set)
+ continue;
+
+ if (sp_256_cmp_equal_10(g->x, sp_cache_256[i].x) &
+ sp_256_cmp_equal_10(g->y, sp_cache_256[i].y)) {
+ sp_cache_256[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_256_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache_256[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_256_last) {
+ least = sp_cache_256[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache_256[j].cnt < least) {
+ i = j;
+ least = sp_cache_256[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
+ XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
+ sp_cache_256[i].set = 1;
+ sp_cache_256[i].cnt = 1;
+ }
+
+ *cache = &sp_cache_256[i];
+ sp_cache_256_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_10(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_256_ecc_mulmod_fast_10(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 10 * 5];
+ sp_cache_256_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_256 == 0) {
+ wc_InitMutex(&sp_cache_256_lock);
+ initCacheMutex_256 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_256_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_256(g, &cache);
+ if (cache->cnt == 2)
+ sp_256_gen_stripe_table_10(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_256_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_256_ecc_mulmod_fast_10(r, g, k, map, heap);
+ }
+ else {
+ err = sp_256_ecc_mulmod_stripe_10(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#endif
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * p Point to multiply.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+ void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[10];
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_256_point_new_10(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 10, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 10, km);
+ sp_256_point_from_ecc_point_10(point, gm);
+
+ err = sp_256_ecc_mulmod_10(point, point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_10(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_10(point, 0, heap);
+
+ return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_10(sp_point_256* r, const sp_digit* k,
+ int map, void* heap)
+{
+ /* No pre-computed values. */
+ return sp_256_ecc_mulmod_10(r, &p256_base, k, map, heap);
+}
+
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_10(sp_point_256* r, const sp_digit* k,
+ int map, void* heap)
+{
+ /* No pre-computed values. */
+ return sp_256_ecc_mulmod_10(r, &p256_base, k, map, heap);
+}
+
+#else
+static const sp_table_entry_256 p256_table[256] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x0a9143c,0x1cc3506,0x360179e,0x3f17fb6,0x075ba95,0x1d88944,
+ 0x3b732b7,0x15719e7,0x376a537,0x0062417 },
+ { 0x295560a,0x094d5f3,0x245cddf,0x392e867,0x18b4ab8,0x3487cc9,
+ 0x288688d,0x176174b,0x3182588,0x0215c7f } },
+ /* 2 */
+ { { 0x147519a,0x2218090,0x32f0202,0x2b09acd,0x0d0981e,0x1e17af2,
+ 0x14a7caa,0x163a6a7,0x10ddbdf,0x03654f1 },
+ { 0x1590f8f,0x0d8733f,0x09179d6,0x1ad139b,0x372e962,0x0bad933,
+ 0x1961102,0x223cdff,0x37e9eb2,0x0218fae } },
+ /* 3 */
+ { { 0x0db6485,0x1ad88d7,0x2f97785,0x288bc28,0x3808f0e,0x3df8c02,
+ 0x28d9544,0x20280f9,0x055b5ff,0x00001d8 },
+ { 0x38d2010,0x13ae6e0,0x308a763,0x2ecc90d,0x254014f,0x10a9981,
+ 0x247d398,0x0fb8383,0x3613437,0x020c21d } },
+ /* 4 */
+ { { 0x2a0d2bb,0x08bf145,0x34994f9,0x1b06988,0x30d5cc1,0x1f18b22,
+ 0x01cf3a5,0x199fe49,0x161fd1b,0x00bd79a },
+ { 0x1a01797,0x171c2fd,0x21925c1,0x1358255,0x23d20b4,0x1c7f6d4,
+ 0x111b370,0x03dec12,0x1168d6f,0x03d923e } },
+ /* 5 */
+ { { 0x137bbbc,0x19a11f8,0x0bec9e5,0x27a29a8,0x3e43446,0x275cd18,
+ 0x0427617,0x00056c7,0x285133d,0x016af80 },
+ { 0x04c7dab,0x2a0df30,0x0c0792a,0x1310c98,0x3573d9f,0x239b30d,
+ 0x1315627,0x1ce0c32,0x25b6b6f,0x0252edc } },
+ /* 6 */
+ { { 0x20f141c,0x26d23dc,0x3c74bbf,0x334b7d6,0x06199b3,0x0441171,
+ 0x3f61294,0x313bf70,0x3cb2f7d,0x03375ae },
+ { 0x2f436fd,0x19c02fa,0x26becca,0x1b6e64c,0x26f647f,0x053c948,
+ 0x0fa7920,0x397d830,0x2bd4bda,0x028d86f } },
+ /* 7 */
+ { { 0x17c13c7,0x2895616,0x03e128a,0x17d42df,0x1c38d63,0x0f02747,
+ 0x039aecf,0x0a4b01c,0x209c4b5,0x02e84b2 },
+ { 0x1f91dfd,0x023e916,0x07fb9e4,0x19b3ba8,0x13af43b,0x35e02ca,
+ 0x0eb0899,0x3bd2c7b,0x19d701f,0x014faee } },
+ /* 8 */
+ { { 0x0e63d34,0x1fb8c6c,0x0fab4fe,0x1caa795,0x0f46005,0x179ed69,
+ 0x093334d,0x120c701,0x39206d5,0x021627e },
+ { 0x183553a,0x03d7319,0x09e5aa7,0x12b8959,0x2087909,0x0011194,
+ 0x1045071,0x0713f32,0x16d0254,0x03aec1a } },
+ /* 9 */
+ { { 0x01647c5,0x1b2856b,0x1799461,0x11f133d,0x0b8127d,0x1937eeb,
+ 0x266aa37,0x1f68f71,0x0cbd1b2,0x03aca08 },
+ { 0x287e008,0x1be361a,0x38f3940,0x276488d,0x2d87dfa,0x0333b2c,
+ 0x2d2e428,0x368755b,0x09b55a7,0x007ca0a } },
+ /* 10 */
+ { { 0x389da99,0x2a8300e,0x0022abb,0x27ae0a1,0x0a6f2d7,0x207017a,
+ 0x047862b,0x1358c9e,0x35905e5,0x00cde92 },
+ { 0x1f7794a,0x1d40348,0x3f613c6,0x2ddf5b5,0x0207005,0x133f5ba,
+ 0x1a37810,0x3ef5829,0x0d5f4c2,0x0035978 } },
+ /* 11 */
+ { { 0x1275d38,0x026efad,0x2358d9d,0x1142f82,0x14268a7,0x1cfac99,
+ 0x362ff49,0x288cbc1,0x24252f4,0x0308f68 },
+ { 0x394520c,0x06e13c2,0x178e5da,0x18ec16f,0x1096667,0x134a7a8,
+ 0x0dcb869,0x33fc4e9,0x38cc790,0x006778e } },
+ /* 12 */
+ { { 0x2c5fe04,0x29c5b09,0x1bdb183,0x02ceee8,0x03b28de,0x132dc4b,
+ 0x32c586a,0x32ff5d0,0x3d491fc,0x038d372 },
+ { 0x2a58403,0x2351aea,0x3a53b40,0x21a0ba5,0x39a6974,0x1aaaa2b,
+ 0x3901273,0x03dfe78,0x3447b4e,0x039d907 } },
+ /* 13 */
+ { { 0x364ba59,0x14e5077,0x02fc7d7,0x3b02c09,0x1d33f10,0x0560616,
+ 0x06dfc6a,0x15efd3c,0x357052a,0x01284b7 },
+ { 0x039dbd0,0x18ce3e5,0x3e1fbfa,0x352f794,0x0d3c24b,0x07c6cc5,
+ 0x1e4ffa2,0x3a91bf5,0x293bb5b,0x01abd6a } },
+ /* 14 */
+ { { 0x0c91999,0x02da644,0x0491da1,0x100a960,0x00a24b4,0x2330824,
+ 0x0094b4b,0x1004cf8,0x35a66a4,0x017f8d1 },
+ { 0x13e7b4b,0x232af7e,0x391ab0f,0x069f08f,0x3292b50,0x3479898,
+ 0x2889aec,0x2a4590b,0x308ecfe,0x02d5138 } },
+ /* 15 */
+ { { 0x2ddfdce,0x231ba45,0x39e6647,0x19be245,0x12c3291,0x35399f8,
+ 0x0d6e764,0x3082d3a,0x2bda6b0,0x0382dac },
+ { 0x37efb57,0x04b7cae,0x00070d3,0x379e431,0x01aac0d,0x1e6f251,
+ 0x0336ad6,0x0ddd3e4,0x3de25a6,0x01c7008 } },
+ /* 16 */
+ { { 0x3e20925,0x230912f,0x286762a,0x30e3f73,0x391c19a,0x34e1c18,
+ 0x16a5d5d,0x093d96a,0x3d421d3,0x0187561 },
+ { 0x37173ea,0x19ce8a8,0x0b65e87,0x0214dde,0x2238480,0x16ead0f,
+ 0x38441e0,0x3bef843,0x2124621,0x03e847f } },
+ /* 17 */
+ { { 0x0b19ffd,0x247cacb,0x3c231c8,0x16ec648,0x201ba8d,0x2b172a3,
+ 0x103d678,0x2fb72db,0x04c1f13,0x0161bac },
+ { 0x3e8ed09,0x171b949,0x2de20c3,0x0f06067,0x21e81a3,0x1b194be,
+ 0x0fd6c05,0x13c449e,0x0087086,0x006756b } },
+ /* 18 */
+ { { 0x09a4e1f,0x27d604c,0x00741e9,0x06fa49c,0x0ab7de7,0x3f4a348,
+ 0x25ef0be,0x158fc9a,0x33f7f9c,0x039f001 },
+ { 0x2f59f76,0x3598e83,0x30501f6,0x15083f2,0x0669b3b,0x29980b5,
+ 0x0c1f7a7,0x0f02b02,0x0fec65b,0x0382141 } },
+ /* 19 */
+ { { 0x031b3ca,0x23da368,0x2d66f09,0x27b9b69,0x06d1cab,0x13c91ba,
+ 0x3d81fa9,0x25ad16f,0x0825b09,0x01e3c06 },
+ { 0x225787f,0x3bf790e,0x2c9bb7e,0x0347732,0x28016f8,0x0d6ff0d,
+ 0x2a4877b,0x1d1e833,0x3b87e94,0x010e9dc } },
+ /* 20 */
+ { { 0x2b533d5,0x1ddcd34,0x1dc0625,0x3da86f7,0x3673b8a,0x1e7b0a4,
+ 0x3e7c9aa,0x19ac55d,0x251c3b2,0x02edb79 },
+ { 0x25259b3,0x24c0ead,0x3480e7e,0x34f40e9,0x3d6a0af,0x2cf3f09,
+ 0x2c83d19,0x2e66f16,0x19a5d18,0x0182d18 } },
+ /* 21 */
+ { { 0x2e5aa1c,0x28e3846,0x3658bd6,0x0ad279c,0x1b8b765,0x397e1fb,
+ 0x130014e,0x3ff342c,0x3b2aeeb,0x02743c9 },
+ { 0x2730a55,0x0918c5e,0x083aca9,0x0bf76ef,0x19c955b,0x300669c,
+ 0x01dfe0a,0x312341f,0x26d356e,0x0091295 } },
+ /* 22 */
+ { { 0x2cf1f96,0x00e52ba,0x271c6db,0x2a40930,0x19f2122,0x0b2f4ee,
+ 0x26ac1b8,0x3bda498,0x0873581,0x0117963 },
+ { 0x38f9dbc,0x3d1e768,0x2040d3f,0x11ba222,0x3a8aaf1,0x1b82fb5,
+ 0x1adfb24,0x2de9251,0x21cc1e4,0x0301038 } },
+ /* 23 */
+ { { 0x38117b6,0x2bc001b,0x1433847,0x3fdce8d,0x3651969,0x3651d7a,
+ 0x2b35761,0x1bb1d20,0x097682c,0x00737d7 },
+ { 0x1f04839,0x1dd6d04,0x16987db,0x3d12378,0x17dbeac,0x1c2cc86,
+ 0x121dd1b,0x3fcf6ca,0x1f8a92d,0x00119d5 } },
+ /* 24 */
+ { { 0x0e8ffcd,0x2b174af,0x1a82cc8,0x22cbf98,0x30d53c4,0x080b5b1,
+ 0x3161727,0x297cfdb,0x2113b83,0x0011b97 },
+ { 0x0007f01,0x23fd936,0x3183e7b,0x0496bd0,0x07fb1ef,0x178680f,
+ 0x1c5ea63,0x0016c11,0x2c3303d,0x01b8041 } },
+ /* 25 */
+ { { 0x0dd73b1,0x1cd6122,0x10d948c,0x23e657b,0x3767070,0x15a8aad,
+ 0x385ea8c,0x33c7ce0,0x0ede901,0x0110965 },
+ { 0x2d4b65b,0x2a8b244,0x0c37f8f,0x0ee5b24,0x394c234,0x3a5e347,
+ 0x26e4a15,0x39a3b4c,0x2514c2e,0x029e5be } },
+ /* 26 */
+ { { 0x23addd7,0x3ed8120,0x13b3359,0x20f959a,0x09e2a61,0x32fcf20,
+ 0x05b78e3,0x19ba7e2,0x1a9c697,0x0392b4b },
+ { 0x2048a61,0x3dfd0a3,0x19a0357,0x233024b,0x3082d19,0x00fb63b,
+ 0x3a1af4c,0x1450ff0,0x046c37b,0x0317a50 } },
+ /* 27 */
+ { { 0x3e75f9e,0x294e30a,0x3a78476,0x3a32c48,0x36fd1a9,0x0427012,
+ 0x1e4df0b,0x11d1f61,0x1afdb46,0x018ca0f },
+ { 0x2f2df15,0x0a33dee,0x27f4ce7,0x1542b66,0x3e592c4,0x20d2f30,
+ 0x3226ade,0x2a4e3ea,0x1ab1981,0x01a2f46 } },
+ /* 28 */
+ { { 0x087d659,0x3ab5446,0x305ac08,0x3d2cd64,0x33374d5,0x3f9d3f8,
+ 0x186981c,0x37f5a5a,0x2f53c6f,0x01254a4 },
+ { 0x2cec896,0x1e32786,0x04844a8,0x043b16d,0x3d964b2,0x1935829,
+ 0x16f7e26,0x1a0dd9a,0x30d2603,0x003b1d4 } },
+ /* 29 */
+ { { 0x12687bb,0x04e816b,0x21fa2da,0x1abccb8,0x3a1f83b,0x375181e,
+ 0x0f5ef51,0x0fc2ce4,0x3a66486,0x003d881 },
+ { 0x3138233,0x1f8eec3,0x2718bd6,0x1b09caa,0x2dd66b9,0x1bb222b,
+ 0x1004072,0x1b73e3b,0x07208ed,0x03fc36c } },
+ /* 30 */
+ { { 0x095d553,0x3e84053,0x0a8a749,0x3f575a0,0x3a44052,0x3ced59b,
+ 0x3b4317f,0x03a8c60,0x13c8874,0x00c4ed4 },
+ { 0x0d11549,0x0b8ab02,0x221cb40,0x02ed37b,0x2071ee1,0x1fc8c83,
+ 0x3987dd4,0x27e049a,0x0f986f1,0x00b4eaf } },
+ /* 31 */
+ { { 0x15581a2,0x2214060,0x11af4c2,0x1598c88,0x19a0a6d,0x32acba6,
+ 0x3a7a0f0,0x2337c66,0x210ded9,0x0300dbe },
+ { 0x1fbd009,0x3822eb0,0x181629a,0x2401b45,0x30b68b1,0x2e78363,
+ 0x2b32779,0x006530b,0x2c4b6d4,0x029aca8 } },
+ /* 32 */
+ { { 0x13549cf,0x0f943db,0x265ed43,0x1bfeb35,0x06f3369,0x3847f2d,
+ 0x1bfdacc,0x26181a5,0x252af7c,0x02043b8 },
+ { 0x159bb2c,0x143f85c,0x357b654,0x2f9d62c,0x2f7dfbe,0x1a7fa9c,
+ 0x057e74d,0x05d14ac,0x17a9273,0x035215c } },
+ /* 33 */
+ { { 0x0cb5a98,0x106a2bc,0x10bf117,0x24c7cc4,0x3d3da8f,0x2ce0ab7,
+ 0x14e2cba,0x1813866,0x1a72f9a,0x01a9811 },
+ { 0x2b2411d,0x3034fe8,0x16e0170,0x0f9443a,0x0be0eb8,0x2196cf3,
+ 0x0c9f738,0x15e40ef,0x0faf9e1,0x034f917 } },
+ /* 34 */
+ { { 0x03f7669,0x3da6efa,0x3d6bce1,0x209ca1d,0x109f8ae,0x09109e3,
+ 0x08ae543,0x3067255,0x1dee3c2,0x0081dd5 },
+ { 0x3ef1945,0x358765b,0x28c387b,0x3bec4b4,0x218813c,0x0b7d92a,
+ 0x3cd1d67,0x2c0367e,0x2e57154,0x0123717 } },
+ /* 35 */
+ { { 0x3e5a199,0x1e42ffd,0x0bb7123,0x33e6273,0x1e0efb8,0x294671e,
+ 0x3a2bfe0,0x3d11709,0x2eddff6,0x03cbec2 },
+ { 0x0b5025f,0x0255d7c,0x1f2241c,0x35d03ea,0x0550543,0x202fef4,
+ 0x23c8ad3,0x354963e,0x015db28,0x0284fa4 } },
+ /* 36 */
+ { { 0x2b65cbc,0x1e8d428,0x0226f9f,0x1c8a919,0x10b04b9,0x08fc1e8,
+ 0x1ce241e,0x149bc99,0x2b01497,0x00afc35 },
+ { 0x3216fb7,0x1374fd2,0x226ad3d,0x19fef76,0x0f7d7b8,0x1c21417,
+ 0x37b83f6,0x3a27eba,0x25a162f,0x010aa52 } },
+ /* 37 */
+ { { 0x2adf191,0x1ab42fa,0x28d7584,0x2409689,0x20f8a48,0x253707d,
+ 0x2030504,0x378f7a1,0x169c65e,0x00b0b76 },
+ { 0x3849c17,0x085c764,0x10dd6d0,0x2e87689,0x1460488,0x30e9521,
+ 0x10c7063,0x1b6f120,0x21f42c5,0x03d0dfe } },
+ /* 38 */
+ { { 0x20f7dab,0x035c512,0x29ac6aa,0x24c5ddb,0x20f0497,0x17ce5e1,
+ 0x00a050f,0x1eaa14b,0x3335470,0x02abd16 },
+ { 0x18d364a,0x0df0cf0,0x316585e,0x018f925,0x0d40b9b,0x17b1511,
+ 0x1716811,0x1caf3d0,0x10df4f2,0x0337d8c } },
+ /* 39 */
+ { { 0x2a8b7ef,0x0f188e3,0x2287747,0x06216f0,0x008e935,0x2f6a38d,
+ 0x1567722,0x0bfc906,0x0bada9e,0x03c3402 },
+ { 0x014d3b1,0x099c749,0x2a76291,0x216c067,0x3b37549,0x14ef2f6,
+ 0x21b96d4,0x1ee2d71,0x2f5ca88,0x016f570 } },
+ /* 40 */
+ { { 0x09a3154,0x3d1a7bd,0x2e9aef0,0x255b8ac,0x03e85a5,0x2a492a7,
+ 0x2aec1ea,0x11c6516,0x3c8a09e,0x02a84b7 },
+ { 0x1f69f1d,0x09c89d3,0x1e7326f,0x0b28bfd,0x0e0e4c8,0x1ea7751,
+ 0x18ce73b,0x2a406e7,0x273e48c,0x01b00db } },
+ /* 41 */
+ { { 0x36e3138,0x2b84a83,0x345a5cf,0x00096b4,0x16966ef,0x159caf1,
+ 0x13c64b4,0x2f89226,0x25896af,0x00a4bfd },
+ { 0x2213402,0x1435117,0x09fed52,0x09d0e4b,0x0f6580e,0x2871cba,
+ 0x3b397fd,0x1c9d825,0x090311b,0x0191383 } },
+ /* 42 */
+ { { 0x07153f0,0x1087869,0x18c9e1e,0x1e64810,0x2b86c3b,0x0175d9c,
+ 0x3dce877,0x269de4e,0x393cab7,0x03c96b9 },
+ { 0x1869d0c,0x06528db,0x02641f3,0x209261b,0x29d55c8,0x25ba517,
+ 0x3b5ea30,0x028f927,0x25313db,0x00e6e39 } },
+ /* 43 */
+ { { 0x2fd2e59,0x150802d,0x098f377,0x19a4957,0x135e2c0,0x38a95ce,
+ 0x1ab21a0,0x36c1b67,0x32f0f19,0x00e448b },
+ { 0x3cad53c,0x3387800,0x17e3cfb,0x03f9970,0x3225b2c,0x2a84e1d,
+ 0x3af1d29,0x3fe35ca,0x2f8ce80,0x0237a02 } },
+ /* 44 */
+ { { 0x07bbb76,0x3aa3648,0x2758afb,0x1f085e0,0x1921c7e,0x3010dac,
+ 0x22b74b1,0x230137e,0x1062e36,0x021c652 },
+ { 0x3993df5,0x24a2ee8,0x126ab5f,0x2d7cecf,0x0639d75,0x16d5414,
+ 0x1aa78a8,0x3f78404,0x26a5b74,0x03f0c57 } },
+ /* 45 */
+ { { 0x0d6ecfa,0x3f506ba,0x3f86561,0x3d86bb1,0x15f8c44,0x2491d07,
+ 0x052a7b4,0x2422261,0x3adee38,0x039b529 },
+ { 0x193c75d,0x14bb451,0x1162605,0x293749c,0x370a70d,0x2e8b1f6,
+ 0x2ede937,0x2b95f4a,0x39a9be2,0x00d77eb } },
+ /* 46 */
+ { { 0x2736636,0x15bf36a,0x2b7e6b9,0x25eb8b2,0x209f51d,0x3cd2659,
+ 0x10bf410,0x034afec,0x3d71c83,0x0076971 },
+ { 0x0ce6825,0x07920cf,0x3c3b5c4,0x23fe55c,0x015ad11,0x08c0dae,
+ 0x0552c7f,0x2e75a8a,0x0fddbf4,0x01c1df0 } },
+ /* 47 */
+ { { 0x2b9661c,0x0ffe351,0x3d71bf6,0x1ac34b3,0x3a1dfd3,0x211fe3d,
+ 0x33e140a,0x3f9100d,0x32ee50e,0x014ea18 },
+ { 0x16d8051,0x1bfda1a,0x068a097,0x2571d3d,0x1daec0c,0x39389af,
+ 0x194dc35,0x3f3058a,0x36d34e1,0x000a329 } },
+ /* 48 */
+ { { 0x09877ee,0x351f73f,0x0002d11,0x0420074,0x2c8b362,0x130982d,
+ 0x02c1175,0x3c11b40,0x0d86962,0x001305f },
+ { 0x0daddf5,0x2f4252c,0x15c06d9,0x1d49339,0x1bea235,0x0b680ed,
+ 0x3356e67,0x1d1d198,0x1e9fed9,0x03dee93 } },
+ /* 49 */
+ { { 0x3e1263f,0x2fe8d3a,0x3ce6d0d,0x0d5c6b9,0x3557637,0x0a9bd48,
+ 0x0405538,0x0710749,0x2005213,0x038c7e5 },
+ { 0x26b6ec6,0x2e485ba,0x3c44d1b,0x0b9cf0b,0x037a1d1,0x27428a5,
+ 0x0e7eac8,0x351ef04,0x259ce34,0x02a8e98 } },
+ /* 50 */
+ { { 0x2f3dcd3,0x3e77d4d,0x3360fbc,0x1434afd,0x36ceded,0x3d413d6,
+ 0x1710fad,0x36bb924,0x1627e79,0x008e637 },
+ { 0x109569e,0x1c168db,0x3769cf4,0x2ed4527,0x0ea0619,0x17d80d3,
+ 0x1c03773,0x18843fe,0x1b21c04,0x015c5fd } },
+ /* 51 */
+ { { 0x1dd895e,0x08a7248,0x04519fe,0x001030a,0x18e5185,0x358dfb3,
+ 0x13d2391,0x0a37be8,0x0560e3c,0x019828b },
+ { 0x27fcbd0,0x2a22bb5,0x30969cc,0x1e03aa7,0x1c84724,0x0ba4ad3,
+ 0x32f4817,0x0914cca,0x14c4f52,0x01893b9 } },
+ /* 52 */
+ { { 0x097eccc,0x1273936,0x00aa095,0x364fe62,0x04d49d1,0x10e9f08,
+ 0x3c24230,0x3ef01c8,0x2fb92bd,0x013ce4a },
+ { 0x1e44fd9,0x27e3e9f,0x2156696,0x3915ecc,0x0b66cfb,0x1a3af0f,
+ 0x2fa8033,0x0e6736c,0x177ccdb,0x0228f9e } },
+ /* 53 */
+ { { 0x2c4b125,0x06207c1,0x0a8cdde,0x003db8f,0x1ae34e3,0x31e84fa,
+ 0x2999de5,0x11013bd,0x02370c2,0x00e2234 },
+ { 0x0f91081,0x200d591,0x1504762,0x1857c05,0x23d9fcf,0x0cb34db,
+ 0x27edc86,0x08cd860,0x2471810,0x029798b } },
+ /* 54 */
+ { { 0x3acd6c8,0x097b8cb,0x3c661a8,0x15152f2,0x1699c63,0x237e64c,
+ 0x23edf79,0x16b7033,0x0e6466a,0x00b11da },
+ { 0x0a64bc9,0x1bfe324,0x1f5cb34,0x08391de,0x0630a60,0x3017a21,
+ 0x09d064b,0x14a8365,0x041f9e6,0x01ed799 } },
+ /* 55 */
+ { { 0x128444a,0x2508b07,0x2a39216,0x362f84d,0x2e996c5,0x2c31ff3,
+ 0x07afe5f,0x1d1288e,0x3cb0c8d,0x02e2bdc },
+ { 0x38b86fd,0x3a0ea8c,0x1cff5fd,0x1629629,0x3fee3f1,0x02b250c,
+ 0x2e8f6f2,0x0225727,0x15f7f3f,0x0280d8e } },
+ /* 56 */
+ { { 0x10f7770,0x0f1aee8,0x0e248c7,0x20684a8,0x3a6f16d,0x06f0ae7,
+ 0x0df6825,0x2d4cc40,0x301875f,0x012f8da },
+ { 0x3b56dbb,0x1821ba7,0x24f8922,0x22c1f9e,0x0306fef,0x1b54bc8,
+ 0x2ccc056,0x00303ba,0x2871bdc,0x0232f26 } },
+ /* 57 */
+ { { 0x0dac4ab,0x0625730,0x3112e13,0x101c4bf,0x3a874a4,0x2873b95,
+ 0x32ae7c6,0x0d7e18c,0x13e0c08,0x01139d5 },
+ { 0x334002d,0x00fffdd,0x025c6d5,0x22c2cd1,0x19d35cb,0x3a1ce2d,
+ 0x3702760,0x3f06257,0x03a5eb8,0x011c29a } },
+ /* 58 */
+ { { 0x0513482,0x1d87724,0x276a81b,0x0a807a4,0x3028720,0x339cc20,
+ 0x2441ee0,0x31bbf36,0x290c63d,0x0059041 },
+ { 0x106a2ed,0x0d2819b,0x100bf50,0x114626c,0x1dd4d77,0x2e08632,
+ 0x14ae72a,0x2ed3f64,0x1fd7abc,0x035cd1e } },
+ /* 59 */
+ { { 0x2d4c6e5,0x3bec596,0x104d7ed,0x23d6c1b,0x0262cf0,0x15d72c5,
+ 0x2d5bb18,0x199ac4b,0x1e30771,0x020591a },
+ { 0x21e291e,0x2e75e55,0x1661d7a,0x08b0778,0x3eb9daf,0x0d78144,
+ 0x1827eb1,0x0fe73d2,0x123f0dd,0x0028db7 } },
+ /* 60 */
+ { { 0x1d5533c,0x34cb1d0,0x228f098,0x27a1a11,0x17c5f5a,0x0d26f44,
+ 0x2228ade,0x2c460e6,0x3d6fdba,0x038cc77 },
+ { 0x3cc6ed8,0x02ada1a,0x260e510,0x2f7bde8,0x37160c3,0x33a1435,
+ 0x23d9a7b,0x0ce2641,0x02a492e,0x034ed1e } },
+ /* 61 */
+ { { 0x3821f90,0x26dba3c,0x3aada14,0x3b59bad,0x292edd9,0x2804c45,
+ 0x3669531,0x296f42e,0x35a4c86,0x01ca049 },
+ { 0x3ff47e5,0x2163df4,0x2441503,0x2f18405,0x15e1616,0x37f66ec,
+ 0x30f11a7,0x141658a,0x27ece14,0x00b018b } },
+ /* 62 */
+ { { 0x159ac2e,0x3e65bc0,0x2713a76,0x0db2f6c,0x3281e77,0x2391811,
+ 0x16d2880,0x1fbc4ab,0x1f92c4e,0x00a0a8d },
+ { 0x0ce5cd2,0x152c7b0,0x02299c3,0x3244de7,0x2cf99ef,0x3a0b047,
+ 0x2caf383,0x0aaf664,0x113554d,0x031c735 } },
+ /* 63 */
+ { { 0x1b578f4,0x177a702,0x3a7a488,0x1638ebf,0x31884e2,0x2460bc7,
+ 0x36b1b75,0x3ce8e3d,0x340cf47,0x03143d9 },
+ { 0x34b68ea,0x12b7ccd,0x1fe2a9c,0x08da659,0x0a406f3,0x1694c14,
+ 0x06a2228,0x16370be,0x3a72129,0x02e7b2c } },
+ /* 64 */
+ { { 0x0f8b16a,0x21043bd,0x266a56f,0x3fb11ec,0x197241a,0x36721f0,
+ 0x006b8e6,0x2ac6c29,0x202cd42,0x0200fcf },
+ { 0x0dbec69,0x0c26a01,0x105f7f0,0x3dceeeb,0x3a83b85,0x363865f,
+ 0x097273a,0x2b70718,0x00e5067,0x03025d1 } },
+ /* 65 */
+ { { 0x379ab34,0x295bcb0,0x38d1846,0x22e1077,0x3a8ee06,0x1db1a3b,
+ 0x3144591,0x07cc080,0x2d5915f,0x03c6bcc },
+ { 0x175bd50,0x0dd4c57,0x27bc99c,0x2ebdcbd,0x3837cff,0x235dc8f,
+ 0x13a4184,0x0722c18,0x130e2d4,0x008f43c } },
+ /* 66 */
+ { { 0x01500d9,0x2adbb7d,0x2da8857,0x397f2fa,0x10d890a,0x25c9654,
+ 0x3e86488,0x3eb754b,0x1d6c0a3,0x02c0a23 },
+ { 0x10bcb08,0x083cc19,0x2e16853,0x04da575,0x271af63,0x2626a9d,
+ 0x3520a7b,0x32348c7,0x24ff408,0x03ff4dc } },
+ /* 67 */
+ { { 0x058e6cb,0x1a3992d,0x1d28539,0x080c5e9,0x2992dad,0x2a9d7d5,
+ 0x14ae0b7,0x09b7ce0,0x34ad78c,0x03d5643 },
+ { 0x30ba55a,0x092f4f3,0x0bae0fc,0x12831de,0x20fc472,0x20ed9d2,
+ 0x29864f6,0x1288073,0x254f6f7,0x00635b6 } },
+ /* 68 */
+ { { 0x1be5a2b,0x0f88975,0x33c6ed9,0x20d64d3,0x06fe799,0x0989bff,
+ 0x1409262,0x085a90c,0x0d97990,0x0142eed },
+ { 0x17ec63e,0x06471b9,0x0db2378,0x1006077,0x265422c,0x08db83d,
+ 0x28099b0,0x1270d06,0x11801fe,0x00ac400 } },
+ /* 69 */
+ { { 0x3391593,0x22d7166,0x30fcfc6,0x2896609,0x3c385f5,0x066b72e,
+ 0x04f3aad,0x2b831c5,0x19983fb,0x0375562 },
+ { 0x0b82ff4,0x222e39d,0x34c993b,0x101c79c,0x2d2e03c,0x0f00c8a,
+ 0x3a9eaf4,0x1810669,0x151149d,0x039b931 } },
+ /* 70 */
+ { { 0x29af288,0x1956ec7,0x293155f,0x193deb6,0x1647e1a,0x2ca0839,
+ 0x297e4bc,0x15bfd0d,0x1b107ed,0x0147803 },
+ { 0x31c327e,0x05a6e1d,0x02ad43d,0x02d2a5b,0x129cdb2,0x37ad1de,
+ 0x3d51f53,0x245df01,0x2414982,0x0388bd0 } },
+ /* 71 */
+ { { 0x35f1abb,0x17a3d18,0x0874cd4,0x2d5a14e,0x17edc0c,0x16a00d3,
+ 0x072c1fb,0x1232725,0x33d52dc,0x03dc24d },
+ { 0x0af30d6,0x259aeea,0x369c401,0x12bc4de,0x295bf5f,0x0d8711f,
+ 0x26162a9,0x16c44e5,0x288e727,0x02f54b4 } },
+ /* 72 */
+ { { 0x05fa877,0x1571ea7,0x3d48ab1,0x1c9f4e8,0x017dad6,0x0f46276,
+ 0x343f9e7,0x1de990f,0x0e4c8aa,0x028343e },
+ { 0x094f92d,0x3abf633,0x1b3a0bb,0x2f83137,0x0d818c8,0x20bae85,
+ 0x0c65f8b,0x1a8008b,0x0c7946d,0x0295b1e } },
+ /* 73 */
+ { { 0x1d09529,0x08e46c3,0x1fcf296,0x298f6b7,0x1803e0e,0x2d6fd20,
+ 0x37351f5,0x0d9e8b1,0x1f8731a,0x0362fbf },
+ { 0x00157f4,0x06750bf,0x2650ab9,0x35ffb23,0x2f51cae,0x0b522c2,
+ 0x39cb400,0x191e337,0x0a5ce9f,0x021529a } },
+ /* 74 */
+ { { 0x3506ea5,0x17d9ed8,0x0d66dc3,0x22693f8,0x19286c4,0x3a57353,
+ 0x101d3bf,0x1aa54fc,0x20b9884,0x0172b3a },
+ { 0x0eac44d,0x37d8327,0x1c3aa90,0x3d0d534,0x23db29a,0x3576eaf,
+ 0x1d3de8a,0x3bea423,0x11235e4,0x039260b } },
+ /* 75 */
+ { { 0x34cd55e,0x01288b0,0x1132231,0x2cc9a03,0x358695b,0x3e87650,
+ 0x345afa1,0x01267ec,0x3f616b2,0x02011ad },
+ { 0x0e7d098,0x0d6078e,0x0b70b53,0x237d1bc,0x0d7f61e,0x132de31,
+ 0x1ea9ea4,0x2bd54c3,0x27b9082,0x03ac5f2 } },
+ /* 76 */
+ { { 0x2a145b9,0x06d661d,0x31ec175,0x03f06f1,0x3a5cf6b,0x249c56e,
+ 0x2035653,0x384c74f,0x0bafab5,0x0025ec0 },
+ { 0x25f69e1,0x1b23a55,0x1199aa6,0x16ad6f9,0x077e8f7,0x293f661,
+ 0x33ba11d,0x3327980,0x07bafdb,0x03e571d } },
+ /* 77 */
+ { { 0x2bae45e,0x3c074ef,0x2955558,0x3c312f1,0x2a8ebe9,0x2f193f1,
+ 0x3705b1d,0x360deba,0x01e566e,0x00d4498 },
+ { 0x21161cd,0x1bc787e,0x2f87933,0x3553197,0x1328ab8,0x093c879,
+ 0x17eee27,0x2adad1d,0x1236068,0x003be5c } },
+ /* 78 */
+ { { 0x0ca4226,0x2633dd5,0x2c8e025,0x0e3e190,0x05eede1,0x1a385e4,
+ 0x163f744,0x2f25522,0x1333b4f,0x03f05b6 },
+ { 0x3c800ca,0x1becc79,0x2daabe9,0x0c499e2,0x1138063,0x3fcfa2d,
+ 0x2244976,0x1e85cf5,0x2f1b95d,0x0053292 } },
+ /* 79 */
+ { { 0x12f81d5,0x1dc6eaf,0x11967a4,0x1a407df,0x31a5f9d,0x2b67241,
+ 0x18bef7c,0x08c7762,0x063f59c,0x01015ec },
+ { 0x1c05c0a,0x360bfa2,0x1f85bff,0x1bc7703,0x3e4911c,0x0d685b6,
+ 0x2fccaea,0x02c4cef,0x164f133,0x0070ed7 } },
+ /* 80 */
+ { { 0x0ec21fe,0x052ffa0,0x3e825fe,0x1ab0956,0x3f6ce11,0x3d29759,
+ 0x3c5a072,0x18ebe62,0x148db7e,0x03eb49c },
+ { 0x1ab05b3,0x02dab0a,0x1ae690c,0x0f13894,0x137a9a8,0x0aab79f,
+ 0x3dc875c,0x06a1029,0x1e39f0e,0x01dce1f } },
+ /* 81 */
+ { { 0x16c0dd7,0x3b31269,0x2c741e9,0x3611821,0x2a5cffc,0x1416bb3,
+ 0x3a1408f,0x311fa3d,0x1c0bef0,0x02cdee1 },
+ { 0x00e6a8f,0x1adb933,0x0f23359,0x2fdace2,0x2fd6d4b,0x0e73bd3,
+ 0x2453fac,0x0a356ae,0x2c8f9f6,0x02704d6 } },
+ /* 82 */
+ { { 0x0e35743,0x28c80a1,0x0def32a,0x2c6168f,0x1320d6a,0x37c6606,
+ 0x21b1761,0x2147ee0,0x21fc433,0x015c84d },
+ { 0x1fc9168,0x36cda9c,0x003c1f0,0x1cd7971,0x15f98ba,0x1ef363d,
+ 0x0ca87e3,0x046f7d9,0x3c9e6bb,0x0372eb0 } },
+ /* 83 */
+ { { 0x118cbe2,0x3665a11,0x304ef01,0x062727a,0x3d242fc,0x11ffbaf,
+ 0x3663c7e,0x1a189c9,0x09e2d62,0x02e3072 },
+ { 0x0e1d569,0x162f772,0x0cd051a,0x322df62,0x3563809,0x047cc7a,
+ 0x027fd9f,0x08b509b,0x3da2f94,0x01748ee } },
+ /* 84 */
+ { { 0x1c8f8be,0x31ca525,0x22bf0a1,0x200efcd,0x02961c4,0x3d8f52b,
+ 0x018403d,0x3a40279,0x1cb91ec,0x030427e },
+ { 0x0945705,0x0257416,0x05c0c2d,0x25b77ae,0x3b9083d,0x2901126,
+ 0x292b8d7,0x07b8611,0x04f2eee,0x026f0cd } },
+ /* 85 */
+ { { 0x2913074,0x2b8d590,0x02b10d5,0x09d2295,0x255491b,0x0c41cca,
+ 0x1ca665b,0x133051a,0x1525f1a,0x00a5647 },
+ { 0x04f983f,0x3d6daee,0x04e1e76,0x1067d7e,0x1be7eef,0x02ea862,
+ 0x00d4968,0x0ccb048,0x11f18ef,0x018dd95 } },
+ /* 86 */
+ { { 0x22976cc,0x17c5395,0x2c38bda,0x3983bc4,0x222bca3,0x332a614,
+ 0x3a30646,0x261eaef,0x1c808e2,0x02f6de7 },
+ { 0x306a772,0x32d7272,0x2dcefd2,0x2abf94d,0x038f475,0x30ad76e,
+ 0x23e0227,0x3052b0a,0x001add3,0x023ba18 } },
+ /* 87 */
+ { { 0x0ade873,0x25a6069,0x248ccbe,0x13713ee,0x17ee9aa,0x28152e9,
+ 0x2e28995,0x2a92cb3,0x17a6f77,0x024b947 },
+ { 0x190a34d,0x2ebea1c,0x1ed1948,0x16fdaf4,0x0d698f7,0x32bc451,
+ 0x0ee6e30,0x2aaab40,0x06f0a56,0x01460be } },
+ /* 88 */
+ { { 0x24cc99c,0x1884b1e,0x1ca1fba,0x1a0f9b6,0x2ff609b,0x2b26316,
+ 0x3b27cb5,0x29bc976,0x35d4073,0x024772a },
+ { 0x3575a70,0x1b30f57,0x07fa01b,0x0e5be36,0x20cb361,0x26605cd,
+ 0x1d4e8c8,0x13cac59,0x2db9797,0x005e833 } },
+ /* 89 */
+ { { 0x36c8d3a,0x1878a81,0x124b388,0x0e4843e,0x1701aad,0x0ea0d76,
+ 0x10eae41,0x37d0653,0x36c7f4c,0x00ba338 },
+ { 0x37a862b,0x1cf6ac0,0x08fa912,0x2dd8393,0x101ba9b,0x0eebcb7,
+ 0x2453883,0x1a3cfe5,0x2cb34f6,0x03d3331 } },
+ /* 90 */
+ { { 0x1f79687,0x3d4973c,0x281544e,0x2564bbe,0x17c5954,0x171e34a,
+ 0x231741a,0x3cf2784,0x0889a0d,0x02b036d },
+ { 0x301747f,0x3f1c477,0x1f1386b,0x163bc5f,0x1592b93,0x332daed,
+ 0x080e4f5,0x1d28b96,0x26194c9,0x0256992 } },
+ /* 91 */
+ { { 0x15a4c93,0x07bf6b0,0x114172c,0x1ce0961,0x140269b,0x1b2c2eb,
+ 0x0dfb1c1,0x019ddaa,0x0ba2921,0x008c795 },
+ { 0x2e6d2dc,0x37e45e2,0x2918a70,0x0fce444,0x34d6aa6,0x396dc88,
+ 0x27726b5,0x0c787d8,0x032d8a7,0x02ac2f8 } },
+ /* 92 */
+ { { 0x1131f2d,0x2b43a63,0x3101097,0x38cec13,0x0637f09,0x17a69d2,
+ 0x086196d,0x299e46b,0x0802cf6,0x03c6f32 },
+ { 0x0daacb4,0x1a4503a,0x100925c,0x15583d9,0x23c4e40,0x1de4de9,
+ 0x1cc8fc4,0x2c9c564,0x0695aeb,0x02145a5 } },
+ /* 93 */
+ { { 0x1dcf593,0x17050fc,0x3e3bde3,0x0a6c062,0x178202b,0x2f7674f,
+ 0x0dadc29,0x15763a7,0x1d2daad,0x023d9f6 },
+ { 0x081ea5f,0x045959d,0x190c841,0x3a78d31,0x0e7d2dd,0x1414fea,
+ 0x1d43f40,0x22d77ff,0x2b9c072,0x03e115c } },
+ /* 94 */
+ { { 0x3af71c9,0x29e9c65,0x25655e1,0x111e9cd,0x3a14494,0x3875418,
+ 0x34ae070,0x0b06686,0x310616b,0x03b7b89 },
+ { 0x1734121,0x00d3d44,0x29f0b2f,0x1552897,0x31cac6e,0x1030bb3,
+ 0x0148f3a,0x35fd237,0x29b44eb,0x027f49f } },
+ /* 95 */
+ { { 0x2e2cb16,0x1d962bd,0x19b63cc,0x0b3f964,0x3e3eb7d,0x1a35560,
+ 0x0c58161,0x3ce1d6a,0x3b6958f,0x029030b },
+ { 0x2dcc158,0x3b1583f,0x30568c9,0x31957c8,0x27ad804,0x28c1f84,
+ 0x3967049,0x37b3f64,0x3b87dc6,0x0266f26 } },
+ /* 96 */
+ { { 0x27dafc6,0x2548764,0x0d1984a,0x1a57027,0x252c1fb,0x24d9b77,
+ 0x1581a0f,0x1f99276,0x10ba16d,0x026af88 },
+ { 0x0915220,0x2be1292,0x16c6480,0x1a93760,0x2fa7317,0x1a07296,
+ 0x1539871,0x112c31f,0x25787f3,0x01e2070 } },
+ /* 97 */
+ { { 0x0bcf3ff,0x266d478,0x34f6933,0x31449fd,0x00d02cb,0x340765a,
+ 0x3465a2d,0x225023e,0x319a30e,0x00579b8 },
+ { 0x20e05f4,0x35b834f,0x0404646,0x3710d62,0x3fad7bd,0x13e1434,
+ 0x21c7d1c,0x1cb3af9,0x2cf1911,0x003957e } },
+ /* 98 */
+ { { 0x0787564,0x36601be,0x1ce67e9,0x084c7a1,0x21a3317,0x2067a35,
+ 0x0158cab,0x195ddac,0x1766fe9,0x035cf42 },
+ { 0x2b7206e,0x20d0947,0x3b42424,0x03f1862,0x0a51929,0x38c2948,
+ 0x0bb8595,0x2942d77,0x3748f15,0x0249428 } },
+ /* 99 */
+ { { 0x2577410,0x3c23e2f,0x28c6caf,0x00d41de,0x0fd408a,0x30298e9,
+ 0x363289e,0x2302fc7,0x082c1cc,0x01dd050 },
+ { 0x30991cd,0x103e9ba,0x029605a,0x19927f7,0x0c1ca08,0x0c93f50,
+ 0x28a3c7b,0x082e4e9,0x34d12eb,0x0232c13 } },
+ /* 100 */
+ { { 0x106171c,0x0b4155a,0x0c3fb1c,0x336c090,0x19073e9,0x2241a10,
+ 0x0e6b4fd,0x0ed476e,0x1ef4712,0x039390a },
+ { 0x0ec36f4,0x3754f0e,0x2a270b8,0x007fd2d,0x0f9d2dc,0x1e6a692,
+ 0x066e078,0x1954974,0x2ff3c6e,0x00def28 } },
+ /* 101 */
+ { { 0x3562470,0x0b8f1f7,0x0ac94cd,0x28b0259,0x244f272,0x031e4ef,
+ 0x2d5df98,0x2c8a9f1,0x2dc3002,0x016644f },
+ { 0x350592a,0x0e6a0d5,0x1e027a1,0x2039e0f,0x399e01d,0x2817593,
+ 0x0c0375e,0x3889b3e,0x24ab013,0x010de1b } },
+ /* 102 */
+ { { 0x256b5a6,0x0ac3b67,0x28f9ff3,0x29b67f1,0x30750d9,0x25e11a9,
+ 0x15e8455,0x279ebb0,0x298b7e7,0x0218e32 },
+ { 0x2fc24b2,0x2b82582,0x28f22f5,0x2bd36b3,0x305398e,0x3b2e9e3,
+ 0x365dd0a,0x29bc0ed,0x36a7b3a,0x007b374 } },
+ /* 103 */
+ { { 0x05ff2f3,0x2b3589b,0x29785d3,0x300a1ce,0x0a2d516,0x0844355,
+ 0x14c9fad,0x3ccb6b6,0x385d459,0x0361743 },
+ { 0x0b11da3,0x002e344,0x18c49f7,0x0c29e0c,0x1d2c22c,0x08237b3,
+ 0x2988f49,0x0f18955,0x1c3b4ed,0x02813c6 } },
+ /* 104 */
+ { { 0x17f93bd,0x249323b,0x11f6087,0x174e4bd,0x3cb64ac,0x086dc6b,
+ 0x2e330a8,0x142c1f2,0x2ea5c09,0x024acbb },
+ { 0x1b6e235,0x3132521,0x00f085a,0x2a4a4db,0x1ab2ca4,0x0142224,
+ 0x3aa6b3e,0x09db203,0x2215834,0x007b9e0 } },
+ /* 105 */
+ { { 0x23e79f7,0x28b8039,0x1906a60,0x2cbce67,0x1f590e7,0x181f027,
+ 0x21054a6,0x3854240,0x2d857a6,0x03cfcb3 },
+ { 0x10d9b55,0x1443cfc,0x2648200,0x2b36190,0x09d2fcf,0x22f439f,
+ 0x231aa7e,0x3884395,0x0543da3,0x003d5a9 } },
+ /* 106 */
+ { { 0x043e0df,0x06ffe84,0x3e6d5b2,0x3327001,0x26c74b6,0x12a145e,
+ 0x256ec0d,0x3898c69,0x3411969,0x02f63c5 },
+ { 0x2b7494a,0x2eee1af,0x38388a9,0x1bd17ce,0x21567d4,0x13969e6,
+ 0x3a12a7a,0x3e8277d,0x03530cc,0x00b4687 } },
+ /* 107 */
+ { { 0x06508da,0x38e04d4,0x15a7192,0x312875e,0x3336180,0x2a6512c,
+ 0x1b59497,0x2e91b37,0x25eb91f,0x02841e9 },
+ { 0x394d639,0x0747143,0x37d7e6d,0x1d62962,0x08b4af3,0x34df287,
+ 0x3c5584b,0x26bc869,0x20af87a,0x0060f5d } },
+ /* 108 */
+ { { 0x1de59a4,0x1a5c443,0x2f8729d,0x01c3a2f,0x0f1ad8d,0x3cbaf9e,
+ 0x1b49634,0x35d508a,0x39dc269,0x0075105 },
+ { 0x390d30e,0x37033e0,0x110cb32,0x14c37a0,0x20a3b27,0x2f00ce6,
+ 0x2f1dc52,0x34988c6,0x0c29606,0x01dc7e7 } },
+ /* 109 */
+ { { 0x1040739,0x24f9de1,0x2939999,0x2e6009a,0x244539d,0x17e3f09,
+ 0x00f6f2f,0x1c63b3d,0x2310362,0x019109e },
+ { 0x1428aa8,0x3cb61e1,0x09a84f4,0x0ffafed,0x07b7adc,0x08f406b,
+ 0x1b2c6df,0x035b480,0x3496ae9,0x012766d } },
+ /* 110 */
+ { { 0x35d1099,0x2362f10,0x1a08cc7,0x13a3a34,0x12adbcd,0x32da290,
+ 0x02e2a02,0x151140b,0x01b3f60,0x0240df6 },
+ { 0x34c7b61,0x2eb09c1,0x172e7cd,0x2ad5eff,0x2fe2031,0x25b54d4,
+ 0x0cec965,0x18e7187,0x26a7cc0,0x00230f7 } },
+ /* 111 */
+ { { 0x2d552ab,0x374083d,0x01f120f,0x2601736,0x156baff,0x04d44a4,
+ 0x3b7c3e9,0x1acbc1b,0x0424579,0x031a425 },
+ { 0x1231bd1,0x0eba710,0x020517b,0x21d7316,0x21eac6e,0x275a848,
+ 0x0837abf,0x0eb0082,0x302cafe,0x00fe8f6 } },
+ /* 112 */
+ { { 0x1058880,0x28f9941,0x03f2d75,0x3bd90e5,0x17da365,0x2ac9249,
+ 0x07861cf,0x023fd05,0x1b0fdb8,0x031712f },
+ { 0x272b56b,0x04f8d2c,0x043a735,0x25446e4,0x1c8327e,0x221125a,
+ 0x0ce37df,0x2dad7f6,0x39446c2,0x00b55b6 } },
+ /* 113 */
+ { { 0x346ac6b,0x05e0bff,0x2425246,0x0981e8b,0x1d19f79,0x2692378,
+ 0x3ea3c40,0x2e90beb,0x19de503,0x003d5af },
+ { 0x05cda49,0x353b44d,0x299d137,0x3f205bc,0x2821158,0x3ad0d00,
+ 0x06a54aa,0x2d7c79f,0x39d1173,0x01000ee } },
+ /* 114 */
+ { { 0x0803387,0x3a06268,0x14043b8,0x3d4e72f,0x1ece115,0x0a1dfc8,
+ 0x17208dd,0x0be790a,0x122a07f,0x014dd95 },
+ { 0x0a4182d,0x202886a,0x1f79a49,0x1e8c867,0x0a2bbd0,0x28668b5,
+ 0x0d0a2e1,0x115259d,0x3586c5d,0x01e815b } },
+ /* 115 */
+ { { 0x18a2a47,0x2c95627,0x2773646,0x1230f7c,0x15b5829,0x2fc354e,
+ 0x2c000ea,0x099d547,0x2f17a1a,0x01df520 },
+ { 0x3853948,0x06f6561,0x3feeb8a,0x2f5b3ef,0x3a6f817,0x01a0791,
+ 0x2ec0578,0x2c392ad,0x12b2b38,0x0104540 } },
+ /* 116 */
+ { { 0x1e28ced,0x0fc3d1b,0x2c473c7,0x1826c4f,0x21d5da7,0x39718e4,
+ 0x38ce9e6,0x0251986,0x172fbea,0x0337c11 },
+ { 0x053c3b0,0x0f162db,0x043c1cb,0x04111ee,0x297fe3c,0x32e5e03,
+ 0x2b8ae12,0x0c427ec,0x1da9738,0x03b9c0f } },
+ /* 117 */
+ { { 0x357e43a,0x054503f,0x11b8345,0x34ec6e0,0x2d44660,0x3d0ae61,
+ 0x3b5dff8,0x33884ac,0x09da162,0x00a82b6 },
+ { 0x3c277ba,0x129a51a,0x027664e,0x1530507,0x0c788c9,0x2afd89d,
+ 0x1aa64cc,0x1196450,0x367ac2b,0x0358b42 } },
+ /* 118 */
+ { { 0x0054ac4,0x1761ecb,0x378839c,0x167c9f7,0x2570058,0x0604a35,
+ 0x37cbf3b,0x0909bb7,0x3f2991c,0x02ce688 },
+ { 0x0b16ae5,0x212857c,0x351b952,0x2c684db,0x30c6a05,0x09c01e0,
+ 0x23c137f,0x1331475,0x092c067,0x0013b40 } },
+ /* 119 */
+ { { 0x2e90393,0x0617466,0x24e61f4,0x0a528f5,0x03047b4,0x2153f05,
+ 0x0001a69,0x30e1eb8,0x3c10177,0x0282a47 },
+ { 0x22c831e,0x28fc06b,0x3e16ff0,0x208adc9,0x0bb76ae,0x28c1d6d,
+ 0x12c8a15,0x031063c,0x1889ed2,0x002133e } },
+ /* 120 */
+ { { 0x0a6becf,0x14277bf,0x3328d98,0x201f7fe,0x12fceae,0x1de3a2e,
+ 0x0a15c44,0x3ddf976,0x1b273ab,0x0355e55 },
+ { 0x1b5d4f1,0x369e78c,0x3a1c210,0x12cf3e9,0x3aa52f0,0x309f082,
+ 0x112089d,0x107c753,0x24202d1,0x023853a } },
+ /* 121 */
+ { { 0x2897042,0x140d17c,0x2c4aeed,0x07d0d00,0x18d0533,0x22f7ec8,
+ 0x19c194c,0x3456323,0x2372aa4,0x0165f86 },
+ { 0x30bd68c,0x1fb06b3,0x0945032,0x372ac09,0x06d4be0,0x27f8fa1,
+ 0x1c8d7ac,0x137a96e,0x236199b,0x0328fc0 } },
+ /* 122 */
+ { { 0x170bd20,0x2842d58,0x1de7592,0x3c5b4fd,0x20ea897,0x12cab78,
+ 0x363ff14,0x01f928c,0x17e309c,0x02f79ff },
+ { 0x0f5432c,0x2edb4ae,0x044b516,0x32f810d,0x2210dc1,0x23e56d6,
+ 0x301e6ff,0x34660f6,0x10e0a7d,0x02d88eb } },
+ /* 123 */
+ { { 0x0c7b65b,0x2f59d58,0x2289a75,0x2408e92,0x1ab8c55,0x1ec99e5,
+ 0x220fd0d,0x04defe0,0x24658ec,0x035aa8b },
+ { 0x138bb85,0x2f002d4,0x295c10a,0x08760ce,0x28c31d1,0x1c0a8cb,
+ 0x0ff00b1,0x144eac9,0x2e02dcc,0x0044598 } },
+ /* 124 */
+ { { 0x3b42b87,0x050057b,0x0dff781,0x1c06db1,0x1bd9f5d,0x1f5f04a,
+ 0x2cccd7a,0x143e19b,0x1cb94b7,0x036cfb8 },
+ { 0x34837cf,0x3cf6c3c,0x0d4fb26,0x22ee55e,0x1e7eed1,0x315995f,
+ 0x2cdf937,0x1a96574,0x0425220,0x0221a99 } },
+ /* 125 */
+ { { 0x1b569ea,0x0d33ed9,0x19c13c2,0x107dc84,0x2200111,0x0569867,
+ 0x2dc85da,0x05ef22e,0x0eb018a,0x029c33d },
+ { 0x04a6a65,0x3e5eba3,0x378f224,0x09c04d0,0x036e5cf,0x3df8258,
+ 0x3a609e4,0x1eddef8,0x2abd174,0x02a91dc } },
+ /* 126 */
+ { { 0x2a60cc0,0x1d84c5e,0x115f676,0x1840da0,0x2c79163,0x2f06ed6,
+ 0x198bb4b,0x3e5d37b,0x1dc30fa,0x018469b },
+ { 0x15ee47a,0x1e32f30,0x16a530e,0x2093836,0x02e8962,0x3767b62,
+ 0x335adf3,0x27220db,0x2f81642,0x0173ffe } },
+ /* 127 */
+ { { 0x37a99cd,0x1533fe6,0x05a1c0d,0x27610f1,0x17bf3b9,0x0b1ce78,
+ 0x0a908f6,0x265300e,0x3237dc1,0x01b969a },
+ { 0x3a5db77,0x2d15382,0x0d63ef8,0x1feb3d8,0x0b7b880,0x19820de,
+ 0x11c0c67,0x2af3396,0x38d242d,0x0120688 } },
+ /* 128 */
+ { { 0x1d0b34a,0x05ef00d,0x00a7e34,0x1ae0c9f,0x1440b38,0x300d8b4,
+ 0x37262da,0x3e50e3e,0x14ce0cd,0x00b1044 },
+ { 0x195a0b1,0x173bc6b,0x03622ba,0x2a19f55,0x1c09b37,0x07921b2,
+ 0x16cdd20,0x24a5c9b,0x2bf42ff,0x00811de } },
+ /* 129 */
+ { { 0x0d65dbf,0x145cf06,0x1ad82f7,0x038ce7b,0x077bf94,0x33c4007,
+ 0x22d26bd,0x25ad9c0,0x09ac773,0x02b1990 },
+ { 0x2261cc3,0x2ecdbf1,0x3e908b0,0x3246439,0x0213f7b,0x1179b04,
+ 0x01cebaa,0x0be1595,0x175cc12,0x033a39a } },
+ /* 130 */
+ { { 0x00a67d2,0x086d06f,0x248a0f1,0x0291134,0x362d476,0x166d1cd,
+ 0x044f1d6,0x2d2a038,0x365250b,0x0023f78 },
+ { 0x08bf287,0x3b0f6a1,0x1d6eace,0x20b4cda,0x2c2a621,0x0912520,
+ 0x02dfdc9,0x1b35cd6,0x3d2565d,0x00bdf8b } },
+ /* 131 */
+ { { 0x3770fa7,0x2e4b6f0,0x03f9ae4,0x170de41,0x1095e8d,0x1dd845c,
+ 0x334e9d1,0x00ab953,0x12e9077,0x03196fa },
+ { 0x2fd0a40,0x228c0fd,0x384b275,0x38ef339,0x3e7d822,0x3e5d9ef,
+ 0x24f5854,0x0ece9eb,0x247d119,0x012ffe3 } },
+ /* 132 */
+ { { 0x0ff1480,0x07487c0,0x1b16cd4,0x1f41d53,0x22ab8fb,0x2f83cfa,
+ 0x01d2efb,0x259f6b2,0x2e65772,0x00f9392 },
+ { 0x05303e6,0x23cdb4f,0x23977e1,0x12e4898,0x03bd999,0x0c930f0,
+ 0x170e261,0x180a27b,0x2fd58ec,0x014e22b } },
+ /* 133 */
+ { { 0x25d7713,0x0c5fad7,0x09daad1,0x3b9d779,0x109b985,0x1d3ec98,
+ 0x35bc4fc,0x2f838cb,0x0d14f75,0x0173e42 },
+ { 0x2657b12,0x10d4423,0x19e6760,0x296e5bb,0x2bfd421,0x25c3330,
+ 0x29f51f8,0x0338838,0x24060f0,0x029a62e } },
+ /* 134 */
+ { { 0x3748fec,0x2c5a1bb,0x2cf973d,0x289fa74,0x3e6e755,0x38997bf,
+ 0x0b6544c,0x2b6358c,0x38a7aeb,0x02c50bb },
+ { 0x3d5770a,0x06be7c5,0x012fad3,0x19cb2cd,0x266af3b,0x3ccd677,
+ 0x160d1bd,0x141d5af,0x2965851,0x034625a } },
+ /* 135 */
+ { { 0x3c41c08,0x255eacc,0x22e1ec5,0x2b151a3,0x087de94,0x311cbdb,
+ 0x016b73a,0x368e462,0x20b7981,0x0099ec3 },
+ { 0x262b988,0x1539763,0x21e76e5,0x15445b4,0x1d8ddc7,0x34a9be6,
+ 0x10faf03,0x24e4d18,0x07aa111,0x02d538a } },
+ /* 136 */
+ { { 0x38a876b,0x048ad45,0x04b40a0,0x3fc2144,0x251ff96,0x13ca7dd,
+ 0x0b31ab1,0x3539814,0x28b5f87,0x0212aec },
+ { 0x270790a,0x350e7e0,0x346bd5e,0x276178f,0x22d6cb5,0x3078884,
+ 0x355c1b6,0x15901d7,0x3671765,0x03950db } },
+ /* 137 */
+ { { 0x286e8d5,0x2409788,0x13be53f,0x2d21911,0x0353c95,0x10238e8,
+ 0x32f5bde,0x3a67b60,0x28b5b9c,0x001013d },
+ { 0x381e8e5,0x0cef7a9,0x2f5bcad,0x06058f0,0x33cdf50,0x04672a8,
+ 0x1769600,0x31c055d,0x3df0ac1,0x00e9098 } },
+ /* 138 */
+ { { 0x2eb596d,0x197b326,0x12b4c29,0x39c08f2,0x101ea03,0x3804e58,
+ 0x04b4b62,0x28d9d1c,0x13f905e,0x0032a3f },
+ { 0x11b2b61,0x08e9095,0x0d06925,0x270e43f,0x21eb7a8,0x0e4a98f,
+ 0x31d2be0,0x030cf9f,0x2644ddb,0x025b728 } },
+ /* 139 */
+ { { 0x07510af,0x2ed0e8e,0x2a01203,0x2a2a68d,0x0846fea,0x3e540de,
+ 0x3a57702,0x1677348,0x2123aad,0x010d8f8 },
+ { 0x0246a47,0x0e871d0,0x124dca4,0x34b9577,0x2b362b8,0x363ebe5,
+ 0x3086045,0x26313e6,0x15cd8bb,0x0210384 } },
+ /* 140 */
+ { { 0x023e8a7,0x0817884,0x3a0bf12,0x3376371,0x3c808a8,0x18e9777,
+ 0x12a2721,0x35b538a,0x2bd30de,0x017835a },
+ { 0x0fc0f64,0x1c8709f,0x2d8807a,0x0743957,0x242eec0,0x347e76c,
+ 0x27bef91,0x289689a,0x0f42945,0x01f7a92 } },
+ /* 141 */
+ { { 0x1060a81,0x3dbc739,0x1615abd,0x1cbe3e5,0x3e79f9c,0x1ab09a2,
+ 0x136c540,0x05b473f,0x2beebfd,0x02af0a8 },
+ { 0x3e2eac7,0x19be474,0x04668ac,0x18f4b74,0x36f10ba,0x0a0b4c6,
+ 0x10e3770,0x3bf059e,0x3946c7e,0x013a8d4 } },
+ /* 142 */
+ { { 0x266309d,0x28be354,0x1a3eed8,0x3020651,0x10a51c6,0x1e31770,
+ 0x0af45a5,0x3ff0f3b,0x2891c94,0x00e9db9 },
+ { 0x17b0d0f,0x33a291f,0x0a5f9aa,0x25a3d61,0x2963ace,0x39a5fef,
+ 0x230c724,0x1919146,0x10a465e,0x02084a8 } },
+ /* 143 */
+ { { 0x3ab8caa,0x31870f3,0x2390ef7,0x2103850,0x218eb8e,0x3a5ccf2,
+ 0x1dff677,0x2c59334,0x371599c,0x02a9f2a },
+ { 0x0837bd1,0x3249cef,0x35d702f,0x3430dab,0x1c06407,0x108f692,
+ 0x221292f,0x05f0c5d,0x073fe06,0x01038e0 } },
+ /* 144 */
+ { { 0x3bf9b7c,0x2020929,0x30d0f4f,0x080fef8,0x3365d23,0x1f3e738,
+ 0x3e53209,0x1549afe,0x300b305,0x038d811 },
+ { 0x0c6c2c7,0x2e6445b,0x3ee64dc,0x022e932,0x0726837,0x0deb67b,
+ 0x1ed4346,0x3857f73,0x277a3de,0x01950b5 } },
+ /* 145 */
+ { { 0x36c377a,0x0adb41e,0x08be3f3,0x11e40d1,0x36cb038,0x036a2bd,
+ 0x3dd3a82,0x1bc875b,0x2ee09bb,0x02994d2 },
+ { 0x035facf,0x05e0344,0x07e630a,0x0ce772d,0x335e55a,0x111fce4,
+ 0x250fe1c,0x3bc89ba,0x32fdc9a,0x03cf2d9 } },
+ /* 146 */
+ { { 0x355fd83,0x1c67f8e,0x1d10eb3,0x1b21d77,0x0e0d7a4,0x173a9e1,
+ 0x2c9fa90,0x1c39cce,0x22eaae8,0x01f2bea },
+ { 0x153b338,0x0534107,0x26c69b8,0x283be1f,0x3e0acc0,0x059cac3,
+ 0x13d1081,0x148bbee,0x3c1b9bd,0x002aac4 } },
+ /* 147 */
+ { { 0x2681297,0x3389e34,0x146addc,0x2c6d425,0x2cb350e,0x1986abc,
+ 0x0431737,0x04ba4b7,0x2028470,0x012e469 },
+ { 0x2f8ddcf,0x3c4255c,0x1af4dcf,0x07a6a44,0x208ebf6,0x0dc90c3,
+ 0x34360ac,0x072ad23,0x0537232,0x01254d3 } },
+ /* 148 */
+ { { 0x07b7e9d,0x3df5c7c,0x116f83d,0x28c4f35,0x3a478ef,0x3011fb8,
+ 0x2f264b6,0x317b9e3,0x04fd65a,0x032bd1b },
+ { 0x2aa8266,0x3431de4,0x04bba04,0x19a44da,0x0edf454,0x392c5ac,
+ 0x265168a,0x1dc3d5b,0x25704c6,0x00533a7 } },
+ /* 149 */
+ { { 0x25e8f91,0x1178fa5,0x2492994,0x2eb2c3c,0x0d3aca1,0x0322828,
+ 0x1cc70f9,0x269c74c,0x0a53e4c,0x006edc2 },
+ { 0x18bdd7a,0x2a79a55,0x26b1d5c,0x0200628,0x0734a05,0x3273c7b,
+ 0x13aa714,0x0040ac2,0x2f2da30,0x03e7449 } },
+ /* 150 */
+ { { 0x3f9563e,0x2f29eab,0x14a0749,0x3fad264,0x1dd077a,0x3d7c59c,
+ 0x3a0311b,0x331a789,0x0b9729e,0x0201ebf },
+ { 0x1b08b77,0x2a4cdf2,0x3e387f8,0x21510f1,0x286c3a7,0x1dbf62e,
+ 0x3afa594,0x3363217,0x0d16568,0x01d46b7 } },
+ /* 151 */
+ { { 0x0715c0d,0x28e2d04,0x17f78ae,0x1c63dda,0x1d113ea,0x0fefc1b,
+ 0x1eab149,0x1d0fd99,0x0682537,0x00a7b11 },
+ { 0x10bebbc,0x11c672d,0x14223d9,0x2ff9141,0x1399ee5,0x34b7b6c,
+ 0x0d5b3a8,0x01df643,0x0e392a4,0x03fe4dc } },
+ /* 152 */
+ { { 0x2b75b65,0x0b5a6f1,0x11c559a,0x3549999,0x24188f8,0x37a75f4,
+ 0x29f33e3,0x34068a2,0x38ba2a9,0x025dd91 },
+ { 0x29af2c7,0x0988b64,0x0923885,0x1b539a4,0x1334f5d,0x226947a,
+ 0x2cc7e5a,0x20beb39,0x13fac2f,0x01d298c } },
+ /* 153 */
+ { { 0x35f079c,0x137f76d,0x2fbbb2f,0x254638d,0x185b07c,0x1f34db7,
+ 0x2cfcf0e,0x218f46d,0x2150ff4,0x02add6f },
+ { 0x33fc9b7,0x0d9f005,0x0fd081b,0x0834965,0x2b90a74,0x102448d,
+ 0x3dbf03c,0x167d857,0x02e0b44,0x013afab } },
+ /* 154 */
+ { { 0x09f2c53,0x317f9d7,0x1411eb6,0x0463aba,0x0d25220,0x256b176,
+ 0x087633f,0x2bff322,0x07b2c1b,0x037e662 },
+ { 0x10aaecb,0x23bb4a1,0x2272bb7,0x06c075a,0x09d4918,0x0736f2b,
+ 0x0dd511b,0x101625e,0x0a7779f,0x009ec10 } },
+ /* 155 */
+ { { 0x33b2eb2,0x0176dfd,0x2118904,0x022386c,0x2e0df85,0x2588c9f,
+ 0x1b71525,0x28fd540,0x137e4cf,0x02ce4f7 },
+ { 0x3d75165,0x0c39ecf,0x3554a12,0x30af34c,0x2d66344,0x3ded408,
+ 0x36f1be0,0x0d065b0,0x012d046,0x0025623 } },
+ /* 156 */
+ { { 0x2601c3b,0x1824fc0,0x335fe08,0x3e33d70,0x0fb0252,0x252bfca,
+ 0x1cf2808,0x1922e55,0x1a9db9f,0x020721e },
+ { 0x2f56c51,0x39a1f31,0x218c040,0x1a4fc5d,0x3fed471,0x0164d4e,
+ 0x388a419,0x06f1113,0x0f55fc1,0x03e8352 } },
+ /* 157 */
+ { { 0x1608e4d,0x3872778,0x022cbc6,0x044d60a,0x3010dda,0x15fb0b5,
+ 0x37ddc11,0x19f5bda,0x156b6a3,0x023a838 },
+ { 0x383b3b4,0x1380bc8,0x353ca35,0x250fc07,0x169966b,0x3780f29,
+ 0x36632b2,0x2d6b13f,0x124fa00,0x00fd6ae } },
+ /* 158 */
+ { { 0x1739efb,0x2ec3656,0x2c0d337,0x3d39faf,0x1c751b0,0x04699f4,
+ 0x252dd64,0x095b8b6,0x0872b74,0x022f1da },
+ { 0x2d3d253,0x38edca0,0x379fa5b,0x287d635,0x3a9f679,0x059d9ee,
+ 0x0ac168e,0x3cd3e87,0x19060fc,0x02ce1bc } },
+ /* 159 */
+ { { 0x3edcfc2,0x0f04d4b,0x2f0d31f,0x1898be2,0x25396bf,0x15ca230,
+ 0x02b4eae,0x2713668,0x0f71b06,0x0132d18 },
+ { 0x38095ea,0x1ed34d6,0x3603ae6,0x165bf01,0x192bbf8,0x1852859,
+ 0x075f66b,0x1488f85,0x10895ef,0x014b035 } },
+ /* 160 */
+ { { 0x1339848,0x3084385,0x0c8d231,0x3a1c1de,0x0e87a28,0x255b85c,
+ 0x1de6616,0x2702e74,0x1382bb0,0x012b0f2 },
+ { 0x198987d,0x381545a,0x34d619b,0x312b827,0x18b2376,0x28fe4cf,
+ 0x20b7651,0x017d077,0x0c7e397,0x00e0365 } },
+ /* 161 */
+ { { 0x1542e75,0x0d56aa0,0x39b701a,0x287b806,0x396c724,0x0935c21,
+ 0x3a29776,0x0debdac,0x171de26,0x00b38f8 },
+ { 0x1d5bc1a,0x3fad27d,0x22b5cfe,0x1f89ddf,0x0a65560,0x144dd5b,
+ 0x2aac2f9,0x139353f,0x0520b62,0x00b9b36 } },
+ /* 162 */
+ { { 0x031c31d,0x16552e3,0x1a0c368,0x0016fc8,0x168533d,0x171e7b2,
+ 0x17626e7,0x275502f,0x14742c6,0x03285dd },
+ { 0x2d2dbb2,0x3b6bffd,0x1d18cc6,0x2f45d2a,0x0fd0d8c,0x2915e3a,
+ 0x1e8793a,0x0b39a1d,0x3139cab,0x02a5da9 } },
+ /* 163 */
+ { { 0x3fb353d,0x147c6e4,0x3a720a6,0x22d5ff3,0x1d75cab,0x06c54a0,
+ 0x08cfa73,0x12666aa,0x3170a1f,0x021c829 },
+ { 0x13e1b90,0x3a34dda,0x1fc38c3,0x02c5bdb,0x2d345dc,0x14aa1d0,
+ 0x28d00ab,0x224f23a,0x329c769,0x025c67b } },
+ /* 164 */
+ { { 0x0e35909,0x3bb6356,0x0116820,0x370cf77,0x29366d8,0x3881409,
+ 0x3999d06,0x013075f,0x176e157,0x02941ca },
+ { 0x0e70b2e,0x28dfab1,0x2a8a002,0x15da242,0x084dcf6,0x116ca97,
+ 0x31bf186,0x1dc9735,0x09df7b7,0x0264e27 } },
+ /* 165 */
+ { { 0x2da7a4b,0x3023c9e,0x1366238,0x00ff4e2,0x03abe9d,0x19bd44b,
+ 0x272e897,0x20b91ad,0x2aa202c,0x02a2201 },
+ { 0x380184e,0x08112b4,0x0b85660,0x31049aa,0x3a8cb78,0x36113c5,
+ 0x1670c0a,0x373f9e7,0x3fb4738,0x00010ef } },
+ /* 166 */
+ { { 0x2d5192e,0x26d770d,0x32af8d5,0x34d1642,0x1acf885,0x05805e0,
+ 0x166d0a1,0x1219a0d,0x301ba6c,0x014bcfb },
+ { 0x2dcb64d,0x19cca83,0x379f398,0x08e01a0,0x10a482c,0x0103cc2,
+ 0x0be5fa7,0x1f9d45b,0x1899ef2,0x00ca5af } },
+ /* 167 */
+ { { 0x14d81d7,0x2aea251,0x1b3c476,0x3bd47ae,0x29eade7,0x0715e61,
+ 0x1a21cd8,0x1c7a586,0x2bfaee5,0x00ee43f },
+ { 0x096f7cb,0x0c08f95,0x1bc4939,0x361fed4,0x255be41,0x26fad73,
+ 0x31dd489,0x02c600f,0x29d9f81,0x01ba201 } },
+ /* 168 */
+ { { 0x03ea1db,0x1eac46d,0x1292ce3,0x2a54967,0x20a7ff1,0x3e13c61,
+ 0x1b02218,0x2b44e14,0x3eadefa,0x029c88a },
+ { 0x30a9144,0x31e3b0a,0x19c5a2a,0x147cbe9,0x05a0240,0x051f38e,
+ 0x11eca56,0x31a4247,0x123bc2a,0x02fa535 } },
+ /* 169 */
+ { { 0x3226ce7,0x1251782,0x0b7072f,0x11e59fa,0x2b8afd7,0x169b18f,
+ 0x2a46f18,0x31d9bb7,0x2fe9be8,0x01de0b7 },
+ { 0x1b38626,0x34aa90f,0x3ad1760,0x21ddbd9,0x3460ae7,0x1126736,
+ 0x1b86fc5,0x0b92cd0,0x167a289,0x000e0e1 } },
+ /* 170 */
+ { { 0x1ec1a0f,0x36bbf5e,0x1c972d8,0x3f73ace,0x13bbcd6,0x23d86a5,
+ 0x175ffc5,0x2d083d5,0x2c4adf7,0x036f661 },
+ { 0x1f39eb7,0x2a20505,0x176c81a,0x3d6e636,0x16ee2fc,0x3cbdc5f,
+ 0x25475dc,0x2ef4151,0x3c46860,0x0238934 } },
+ /* 171 */
+ { { 0x2587390,0x3639526,0x0588749,0x13c32fb,0x212bb19,0x09660f1,
+ 0x207da4b,0x2bf211b,0x1c4407b,0x01506a6 },
+ { 0x24c8842,0x105a498,0x05ffdb2,0x0ab61b0,0x26044c1,0x3dff3d8,
+ 0x1d14b44,0x0d74716,0x049f57d,0x030024b } },
+ /* 172 */
+ { { 0x32e61ef,0x31d70f7,0x35cad3c,0x320b86c,0x07e8841,0x027ca7d,
+ 0x2d30d19,0x2513718,0x2347286,0x01d7901 },
+ { 0x3c237d0,0x107f16e,0x01c9e7d,0x3c3b13c,0x0c9537b,0x20af54d,
+ 0x051a162,0x2161a47,0x258c784,0x016df2d } },
+ /* 173 */
+ { { 0x228ead1,0x29c2122,0x07f6964,0x023f4ed,0x1802dc5,0x19f96ce,
+ 0x24bfd17,0x25e866b,0x2ba8df0,0x01eb84f },
+ { 0x2dd384e,0x05bbe3a,0x3f06fd2,0x366dacb,0x30361a2,0x2f36d7c,
+ 0x0b98784,0x38ff481,0x074e2a8,0x01e1f60 } },
+ /* 174 */
+ { { 0x17fbb1c,0x0975add,0x1debc5e,0x2cb2880,0x3e47bdd,0x3488cff,
+ 0x15e9a36,0x2121129,0x0199ef2,0x017088a },
+ { 0x0315250,0x352a162,0x17c1773,0x0ae09c2,0x321b21a,0x3bd74cf,
+ 0x3c4ea1d,0x3cac2ad,0x3abbaf0,0x039174d } },
+ /* 175 */
+ { { 0x0511c8a,0x3c78d0a,0x2cd3d2d,0x322f729,0x3ebb229,0x09f0e69,
+ 0x0a71a76,0x2e74d5e,0x12284df,0x03b5ef0 },
+ { 0x3dea561,0x0a9b7e4,0x0ed1cf2,0x237523c,0x05443f1,0x2eb48fa,
+ 0x3861405,0x1b49f62,0x0c945ca,0x02ab25f } },
+ /* 176 */
+ { { 0x16bd00a,0x13a9d28,0x3cc1eb5,0x2b7d702,0x2d839e9,0x3e6ff01,
+ 0x2bb7f11,0x3713824,0x3b31163,0x00c63e5 },
+ { 0x30d7138,0x0316fb0,0x0220ecc,0x08eaf0c,0x244e8df,0x0088d81,
+ 0x37972fb,0x3fd34ae,0x2a19a84,0x03e907e } },
+ /* 177 */
+ { { 0x2642269,0x0b65d29,0x03bd440,0x33a6ede,0x3c81814,0x2507982,
+ 0x0d38e47,0x3a788e6,0x32c1d26,0x00e2eda },
+ { 0x2577f87,0x392895a,0x3e1cc64,0x14f7047,0x08b52d2,0x08a01ca,
+ 0x336abf6,0x00697fc,0x105ce76,0x0253742 } },
+ /* 178 */
+ { { 0x293f92a,0x33df737,0x3315156,0x32e26d7,0x0a01333,0x26579d4,
+ 0x004df9c,0x0aba409,0x067d25c,0x02481de },
+ { 0x3f39d44,0x1c78042,0x13d7e24,0x0825aed,0x35f2c90,0x3270f63,
+ 0x04b7b35,0x3ad4531,0x28bd29b,0x0207a10 } },
+ /* 179 */
+ { { 0x077199f,0x270aeb1,0x0dd96dd,0x3b9ad7b,0x28cb8ee,0x3903f43,
+ 0x37db3fe,0x292c62b,0x362dbbf,0x006e52a },
+ { 0x247f143,0x0362cf3,0x216344f,0x3f18fd1,0x351e623,0x31664e0,
+ 0x0f270fc,0x243bbc6,0x2280555,0x001a8e3 } },
+ /* 180 */
+ { { 0x3355b49,0x2c04e6c,0x399b2e5,0x182d3af,0x020e265,0x09a7cf7,
+ 0x0ffa6bd,0x353e302,0x02083d9,0x029ecdb },
+ { 0x33e8830,0x0570e86,0x1c0b64d,0x386a27e,0x0d5fcea,0x0b45a4c,
+ 0x2ee4a2e,0x0a8833f,0x2b4a282,0x02f9531 } },
+ /* 181 */
+ { { 0x191167c,0x36cf7e3,0x225ed6c,0x1e79e99,0x0517c3f,0x11ab1fd,
+ 0x05648f3,0x08aedc4,0x1abeae0,0x02fcc29 },
+ { 0x3828a68,0x1e16fa4,0x30368e7,0x0c9fcfb,0x25161c3,0x24851ac,
+ 0x1b5feb5,0x344eb84,0x0de2732,0x0347208 } },
+ /* 182 */
+ { { 0x038b363,0x384d1e4,0x2519043,0x151ac17,0x158c11f,0x009b2b4,
+ 0x257abe6,0x2368d3f,0x3ed68a1,0x02df45e },
+ { 0x29c2559,0x2962478,0x3d8444c,0x1d96fff,0x04f7a03,0x1391a52,
+ 0x0de4af7,0x3319126,0x15e6412,0x00e65ff } },
+ /* 183 */
+ { { 0x3d61507,0x1d1a0a2,0x0d2af20,0x354d299,0x329e132,0x2a28578,
+ 0x2ddfb08,0x04fa3ff,0x1293c6c,0x003bae2 },
+ { 0x3e259f8,0x1a68fa9,0x3e67e9b,0x39b44f9,0x1ce1db7,0x347e9a1,
+ 0x3318f6a,0x2dbbc9d,0x2f8c922,0x008a245 } },
+ /* 184 */
+ { { 0x212ab5b,0x2b896c2,0x0136959,0x07e55ef,0x0cc1117,0x05b8ac3,
+ 0x18429ed,0x025fa01,0x11d6e93,0x03b016b },
+ { 0x03f3708,0x2e96fab,0x1d77157,0x0d4c2d6,0x131baf9,0x0608d39,
+ 0x3552371,0x06cdd1e,0x1567ff1,0x01f4c50 } },
+ /* 185 */
+ { { 0x2dfefab,0x270173d,0x37077bd,0x1a372cd,0x1be2f22,0x28e2ee5,
+ 0x3ead973,0x35e8f94,0x2fc9bc1,0x03a7399 },
+ { 0x36a02a1,0x2855d9b,0x00ed75a,0x37d8398,0x138c087,0x233706e,
+ 0x147f346,0x01947e2,0x3017228,0x0365942 } },
+ /* 186 */
+ { { 0x2057e60,0x2d31296,0x25e4504,0x2fa37bc,0x1cbccc3,0x1f0732f,
+ 0x3532081,0x2de8a98,0x19a804e,0x005359a },
+ { 0x31f411a,0x2a10576,0x369c2c8,0x02fe035,0x109fbaf,0x30bddeb,
+ 0x1eef901,0x1662ad3,0x0410d43,0x01bd31a } },
+ /* 187 */
+ { { 0x2c24a96,0x1b7d3a5,0x19a3872,0x217f2f6,0x2534dbc,0x2cab8c2,
+ 0x066ef28,0x26aecf1,0x0fd6118,0x01310d4 },
+ { 0x055b8da,0x1fdc5be,0x38a1296,0x25118f0,0x341a423,0x2ba4cd0,
+ 0x3e1413e,0x062d70d,0x2425a31,0x029c9b4 } },
+ /* 188 */
+ { { 0x08c1086,0x1acfba5,0x22e1dae,0x0f72f4e,0x3f1de50,0x0f408bc,
+ 0x35ed3f0,0x3ce48fc,0x282cc6c,0x004d8e7 },
+ { 0x1afaa86,0x24e3ef3,0x22589ac,0x3ec9952,0x1f45bc5,0x14144ca,
+ 0x23b26e4,0x0d68c65,0x1e1c1a3,0x032a4d9 } },
+ /* 189 */
+ { { 0x03b2d20,0x16b1d53,0x241b361,0x05e4138,0x1742a54,0x32741c7,
+ 0x0521c4c,0x1ca96c2,0x034970b,0x02738a7 },
+ { 0x13e0ad6,0x207dcdb,0x034c8cc,0x27bcbe1,0x18060da,0x33a18b6,
+ 0x2d1d1a6,0x2be60d7,0x3d7ab42,0x012312a } },
+ /* 190 */
+ { { 0x0c7485a,0x06c3310,0x0dbfd22,0x2ef949d,0x0ead455,0x098f4ba,
+ 0x3c76989,0x0cf2d24,0x032f67b,0x01e005f },
+ { 0x30cb5ee,0x0d5da64,0x0ed2b9d,0x2503102,0x1c0d14e,0x1cbc693,
+ 0x37bf552,0x07013e2,0x054de5c,0x014f341 } },
+ /* 191 */
+ { { 0x128ccac,0x1617e97,0x346ebcd,0x158016d,0x25f823e,0x34048ea,
+ 0x39f0a1c,0x3ea3df1,0x1c1d3d7,0x03ba919 },
+ { 0x151803b,0x01967c1,0x2f70781,0x27df39a,0x06c0b59,0x24a239c,
+ 0x15a7702,0x2464d06,0x2a47ae6,0x006db90 } },
+ /* 192 */
+ { { 0x27d04c3,0x024df3d,0x38112e8,0x38a27ba,0x01e312b,0x0965358,
+ 0x35d8879,0x2f4f55a,0x214187f,0x0008936 },
+ { 0x05fe36f,0x2ee18c3,0x1f5f87a,0x1813bd4,0x0580f3c,0x0ed0a7b,
+ 0x0fb1bfb,0x3fcce59,0x2f042bf,0x01820e3 } },
+ /* 193 */
+ { { 0x20bbe99,0x32cbc9f,0x39ee432,0x3cc12a8,0x37bda44,0x3ea4e40,
+ 0x097c7a9,0x0590d7d,0x2022d33,0x018dbac },
+ { 0x3ae00aa,0x3439864,0x2d2ffcf,0x3f8c6b9,0x0875a00,0x3e4e407,
+ 0x3658a29,0x22eb3d0,0x2b63921,0x022113b } },
+ /* 194 */
+ { { 0x33bae58,0x05c749a,0x1f3e114,0x1c45f8e,0x27db3df,0x06a3ab6,
+ 0x37bc7f8,0x1e27b34,0x3dc51fb,0x009eea0 },
+ { 0x3f54de5,0x3d0e7fe,0x1a71a7d,0x02ed7f8,0x0727703,0x2ca5e92,
+ 0x2e8e35d,0x292ad0b,0x13487f3,0x02b6d8b } },
+ /* 195 */
+ { { 0x175df2a,0x05a28a8,0x32e99b1,0x13d8630,0x2082aa0,0x11ac245,
+ 0x24f2e71,0x322cb27,0x17675e7,0x02e643f },
+ { 0x1f37313,0x2765ad3,0x0789082,0x1e742d0,0x11c2055,0x2021dc4,
+ 0x09ae4a7,0x346359b,0x2f94d10,0x0205c1f } },
+ /* 196 */
+ { { 0x3d6ff96,0x1f2ac80,0x336097d,0x3f03610,0x35b851b,0x010b6d2,
+ 0x0823c4d,0x2a9709a,0x2ead5a8,0x00de4b6 },
+ { 0x01afa0b,0x0621965,0x3671528,0x1050b60,0x3f3e9e7,0x2f93829,
+ 0x0825275,0x006e85f,0x35e94b0,0x016af58 } },
+ /* 197 */
+ { { 0x2c4927c,0x3ea1382,0x0f23727,0x0d69f23,0x3e38860,0x2b72837,
+ 0x3cd5ea4,0x2d84292,0x321846a,0x016656f },
+ { 0x29dfa33,0x3e182e0,0x018be90,0x2ba563f,0x2caafe2,0x218c0d9,
+ 0x3baf447,0x1047a6c,0x0a2d483,0x01130cb } },
+ /* 198 */
+ { { 0x00ed80c,0x2a5fc79,0x0a82a74,0x2c4c74b,0x15f938c,0x30b5ab6,
+ 0x32124b7,0x295314f,0x2fb8082,0x007c858 },
+ { 0x20b173e,0x19f315c,0x12f97e4,0x198217c,0x040e8a6,0x3275977,
+ 0x2bc20e4,0x01f2633,0x02bc3e9,0x023c750 } },
+ /* 199 */
+ { { 0x3c4058a,0x24be73e,0x16704f5,0x2d8a4bd,0x3b15e14,0x3076315,
+ 0x1cfe37b,0x36fe715,0x343926e,0x02c6603 },
+ { 0x2c76b09,0x0cf824c,0x3f7898c,0x274cec1,0x11df527,0x18eed18,
+ 0x08ead48,0x23915bc,0x19b3744,0x00a0a2b } },
+ /* 200 */
+ { { 0x0cf4ac5,0x1c8b131,0x0afb696,0x0ff7799,0x2f5ac1a,0x022420c,
+ 0x11baa2e,0x2ce4015,0x1275a14,0x0125cfc },
+ { 0x22eac5d,0x360cd4c,0x3568e59,0x3d42f66,0x35e07ee,0x09620e4,
+ 0x36720fa,0x22b1eac,0x2d0db16,0x01b6b23 } },
+ /* 201 */
+ { { 0x1a835ef,0x1516bbb,0x2d51f7b,0x3487443,0x14aa113,0x0dd06c2,
+ 0x1a65e01,0x379300d,0x35920b9,0x012c8fb },
+ { 0x04c7341,0x2eda00f,0x3c37e82,0x1b4fd62,0x0d45770,0x1478fba,
+ 0x127863a,0x26939cd,0x134ddf4,0x01375c5 } },
+ /* 202 */
+ { { 0x1476cd9,0x1119ca5,0x325bbf9,0x0bf8c69,0x0648d07,0x312d9f8,
+ 0x01c8b8f,0x136ec51,0x0002f4a,0x03f4c5c },
+ { 0x195d0e1,0x10ffd22,0x29aa1cb,0x3443bdc,0x276e695,0x05e6260,
+ 0x15f9764,0x3cd9783,0x18c9569,0x0053eb1 } },
+ /* 203 */
+ { { 0x312ae18,0x280197c,0x3fc9ad9,0x303f324,0x251958d,0x29f4a11,
+ 0x2142408,0x3694366,0x25136ab,0x03b5f1d },
+ { 0x1d4abbc,0x1c3c689,0x13ea462,0x3cfc684,0x39b5dd8,0x2d4654b,
+ 0x09b0755,0x27d4f18,0x3f74d2e,0x03fbf2d } },
+ /* 204 */
+ { { 0x2119185,0x2525eae,0x1ba4bd0,0x0c2ab11,0x1d54e8c,0x294845e,
+ 0x2479dea,0x3602d24,0x17e87e0,0x0060069 },
+ { 0x0afffb0,0x34fe37f,0x1240073,0x02eb895,0x06cf33c,0x2d7f7ef,
+ 0x1d763b5,0x04191e0,0x11e1ead,0x027e3f0 } },
+ /* 205 */
+ { { 0x269544c,0x0e85c57,0x3813158,0x19fc12d,0x20eaf85,0x1e2930c,
+ 0x22a8fd2,0x1a6a478,0x09d3d3a,0x02a74e0 },
+ { 0x1a2da3b,0x30b0b16,0x0847936,0x3d86257,0x138ccbc,0x0f5421a,
+ 0x25244e6,0x23bdd79,0x1aee117,0x00c01ae } },
+ /* 206 */
+ { { 0x1eead28,0x07cac32,0x1fbc0bb,0x17627d3,0x17eef63,0x0b3a24e,
+ 0x0757fdb,0x3dd841d,0x3d745f8,0x002ae17 },
+ { 0x25b4549,0x29f24cf,0x2f21ecd,0x1725e48,0x04be2bb,0x10ee010,
+ 0x1a1274b,0x10b0898,0x27511e9,0x02c48b5 } },
+ /* 207 */
+ { { 0x2a5ae7a,0x181ef99,0x0be33be,0x3e9dab7,0x101e703,0x3adb971,
+ 0x1043014,0x2ebb2be,0x1c1097d,0x027d667 },
+ { 0x3f250ed,0x16dc603,0x20dc6d7,0x1d0d268,0x38eb915,0x02c89e8,
+ 0x1605a41,0x12de109,0x0e08a29,0x01f554a } },
+ /* 208 */
+ { { 0x0c26def,0x163d988,0x2d1ef0f,0x3a960ac,0x1025585,0x0738e20,
+ 0x27d79b0,0x05cc3ef,0x201303f,0x00a333a },
+ { 0x1644ba5,0x2af345e,0x30b8d1d,0x3a01bff,0x31fc643,0x1acf85e,
+ 0x0a76fc6,0x04efe98,0x348a1d0,0x03062eb } },
+ /* 209 */
+ { { 0x1c4216d,0x18e3217,0x02ac34e,0x19c8185,0x200c010,0x17d4192,
+ 0x13a1719,0x165af51,0x09db7a9,0x0277be0 },
+ { 0x3ab8d2c,0x2190b99,0x22b641e,0x0cd88de,0x3b42404,0x1310862,
+ 0x106a6d6,0x23395f5,0x0b06880,0x000d5fe } },
+ /* 210 */
+ { { 0x0d2cc88,0x36f9913,0x339d8e9,0x237c2e3,0x0cc61c2,0x34c2832,
+ 0x309874c,0x2621d28,0x2dd1b48,0x0392806 },
+ { 0x17cd8f9,0x07bab3d,0x0c482ed,0x0faf565,0x31b767d,0x2f4bde1,
+ 0x295c717,0x330c29c,0x179ce10,0x0119b5f } },
+ /* 211 */
+ { { 0x1ada2c7,0x0c624a7,0x227d47d,0x30e3e6a,0x14fa0a6,0x0829678,
+ 0x24fd288,0x2b46a43,0x122451e,0x0319ca9 },
+ { 0x186b655,0x01f3217,0x0af1306,0x0efe6b5,0x2f0235d,0x1c45ca9,
+ 0x2086805,0x1d44e66,0x0faf2a6,0x0178f59 } },
+ /* 212 */
+ { { 0x33b4416,0x10431e6,0x2d99aa6,0x217aac9,0x0cd8fcf,0x2d95a9d,
+ 0x3ff74ad,0x10bf17a,0x295eb8e,0x01b229e },
+ { 0x02a63bd,0x182e9ec,0x004710c,0x00e2e3c,0x06b2f23,0x04b642c,
+ 0x2c37383,0x32a4631,0x022ad82,0x00d22b9 } },
+ /* 213 */
+ { { 0x0cda2fb,0x1d198d7,0x26d27f4,0x286381c,0x022acca,0x24ac7c8,
+ 0x2df7824,0x0b4ba16,0x1e0d9ef,0x03041d3 },
+ { 0x29a65b3,0x0f3912b,0x151bfcf,0x2b0175c,0x0fd71e4,0x39aa5e2,
+ 0x311f50c,0x13ff351,0x3dbc9e5,0x03eeb7e } },
+ /* 214 */
+ { { 0x0a99363,0x0fc7348,0x2775171,0x23db3c8,0x2b91565,0x134d66c,
+ 0x0175cd2,0x1bf365a,0x2b48371,0x02dfe5d },
+ { 0x16dbf74,0x2389357,0x2f36575,0x3f5c70e,0x38d23ba,0x090f7f8,
+ 0x3477600,0x3201523,0x32ecafc,0x03d3506 } },
+ /* 215 */
+ { { 0x1abd48d,0x073ca3f,0x38a451f,0x0d8cb01,0x1ce81be,0x05c51ba,
+ 0x0e29741,0x03c41ab,0x0eae016,0x0060209 },
+ { 0x2e58358,0x1da62d9,0x2358038,0x14b39b2,0x1635687,0x39079b1,
+ 0x380e345,0x1b49608,0x23983cf,0x019f97d } },
+ /* 216 */
+ { { 0x34899ef,0x332e373,0x04c0f89,0x3c27aed,0x1949015,0x09663b2,
+ 0x2f9276b,0x07f1951,0x09a04c1,0x027fbde },
+ { 0x3d2a071,0x19fb3d4,0x1b096d3,0x1fe9146,0x3b10e1a,0x0478bbb,
+ 0x2b3fb06,0x1388329,0x181a99c,0x02f2030 } },
+ /* 217 */
+ { { 0x1eb82e6,0x14dbe39,0x3920972,0x31fd5b2,0x21a484f,0x02d7697,
+ 0x0e21715,0x37c431e,0x2629f8c,0x01249c3 },
+ { 0x26b50ad,0x26deefa,0x0ffc1a3,0x30688e2,0x39a0284,0x041c65e,
+ 0x03eb178,0x0bdfd50,0x2f96137,0x034bb94 } },
+ /* 218 */
+ { { 0x0e0362a,0x334a162,0x194dd37,0x29e3e97,0x2442fa8,0x10d2949,
+ 0x3836e5a,0x2dccebf,0x0bee5ab,0x037ed1e },
+ { 0x33eede6,0x3c739d9,0x2f04a91,0x350ad6c,0x3a5390a,0x14c368b,
+ 0x26f7bf5,0x11ce979,0x0b408df,0x0366850 } },
+ /* 219 */
+ { { 0x28ea498,0x0886d5b,0x2e090e0,0x0a4d58f,0x2623478,0x0d74ab7,
+ 0x2b83913,0x12c6b81,0x18d623f,0x01d8301 },
+ { 0x198aa79,0x26d6330,0x3a7f0b8,0x34bc1ea,0x2f74890,0x378955a,
+ 0x204110f,0x0102538,0x02d8f19,0x01c5066 } },
+ /* 220 */
+ { { 0x14b0f45,0x2838cd3,0x14e16f0,0x0e0e4aa,0x2d9280b,0x0f18757,
+ 0x3324c6b,0x1391ceb,0x1ce89d5,0x00ebe74 },
+ { 0x0930371,0x3de6048,0x3097fd8,0x1308705,0x3eda266,0x3108c26,
+ 0x1545dcd,0x1f7583a,0x1c37395,0x02c7e05 } },
+ /* 221 */
+ { { 0x1fec44a,0x2a9e3a2,0x0caf84f,0x11cf2a9,0x0c8c2ae,0x06da989,
+ 0x1c807dc,0x3c149a4,0x1141543,0x02906bb },
+ { 0x15ffe04,0x0d4e65f,0x2e20424,0x37d896d,0x18bacb2,0x1e05ddd,
+ 0x1660be8,0x183be17,0x1dd86fb,0x035ba70 } },
+ /* 222 */
+ { { 0x2853264,0x0ba5fb1,0x0a0b3aa,0x2df88c1,0x2771533,0x23aba6f,
+ 0x112bb7b,0x3e3086e,0x210ae9b,0x027271b },
+ { 0x030b74c,0x0269678,0x1e90a23,0x135a98c,0x24ed749,0x126de7c,
+ 0x344b23a,0x186da27,0x19640fa,0x0159af5 } },
+ /* 223 */
+ { { 0x18061f3,0x3004630,0x3c70066,0x34df20f,0x1190b25,0x1c9cc91,
+ 0x1fc8e02,0x0d17bc1,0x390f525,0x033cb1c },
+ { 0x0eb30cf,0x2f3ad04,0x303aa09,0x2e835dd,0x1cfd2eb,0x143fc95,
+ 0x02c43a1,0x025e7a1,0x3558aa2,0x000bd45 } },
+ /* 224 */
+ { { 0x1db7d07,0x3bde52b,0x1500396,0x1089115,0x20b4fc7,0x1e2a8f3,
+ 0x3f8eacc,0x365f7eb,0x1a5e8d4,0x0053a6b },
+ { 0x37079e2,0x120284b,0x000edaa,0x33792c2,0x145baa3,0x20e055f,
+ 0x365e2d7,0x26ba005,0x3ab8e9d,0x0282b53 } },
+ /* 225 */
+ { { 0x2653618,0x2dd8852,0x2a5f0bf,0x0f0c7aa,0x2187281,0x1252757,
+ 0x13e7374,0x3b47855,0x0b86e56,0x02f354c },
+ { 0x2e9c47b,0x2fa14cc,0x19ab169,0x3fad401,0x0dc2776,0x24afeed,
+ 0x3a97611,0x0d07736,0x3cf6979,0x02424a0 } },
+ /* 226 */
+ { { 0x2e81a13,0x000c91d,0x123967b,0x265885c,0x29bee1a,0x0cb8675,
+ 0x2d361bd,0x1526823,0x3c9ace1,0x00d7bad },
+ { 0x24e5bdc,0x02b969f,0x2c6e128,0x34edb3b,0x12dcd2c,0x3899af0,
+ 0x24224c6,0x3a1914b,0x0f4448a,0x026a2cb } },
+ /* 227 */
+ { { 0x1d03b59,0x1c6fc82,0x32abf64,0x28ed96b,0x1c90e62,0x2f57bb2,
+ 0x3ff168e,0x04de7fd,0x0f4d449,0x01af6d8 },
+ { 0x255bc30,0x2bfaf22,0x3fe0dad,0x0584025,0x1c79ead,0x3078ef7,
+ 0x2197414,0x022a50b,0x0fd94ba,0x0007b0f } },
+ /* 228 */
+ { { 0x09485c2,0x09dfaf7,0x10c7ba6,0x1e48bec,0x248cc9a,0x028a362,
+ 0x21d60f7,0x193d93d,0x1c04754,0x0346b2c },
+ { 0x2f36612,0x240ac49,0x0d8bd26,0x13b8186,0x259c3a4,0x020d5fb,
+ 0x38a8133,0x09b0937,0x39d4056,0x01f7341 } },
+ /* 229 */
+ { { 0x05a4b48,0x1f534fc,0x07725ce,0x148dc8c,0x2adcd29,0x04aa456,
+ 0x0f79718,0x066e346,0x189377d,0x002fd4d },
+ { 0x068ea73,0x336569b,0x184d35e,0x32a08e9,0x3c7f3bb,0x11ce9c8,
+ 0x3674c6f,0x21bf27e,0x0d9e166,0x034a2f9 } },
+ /* 230 */
+ { { 0x0fa8e4b,0x2e6418e,0x18fc5d2,0x1ba24ff,0x0559f18,0x0dbedbf,
+ 0x2de2aa4,0x22338e9,0x3aa510f,0x035d801 },
+ { 0x23a4988,0x02aad94,0x02732d1,0x111d374,0x0b455cf,0x0d01c9e,
+ 0x067082a,0x2ec05fd,0x368b303,0x03cad4b } },
+ /* 231 */
+ { { 0x035b4ca,0x1fabea6,0x1cbc0d5,0x3f2ed9a,0x02d2232,0x1990c66,
+ 0x2eb680c,0x3b4ea3b,0x18ecc5a,0x03636fa },
+ { 0x1a02709,0x26f8ff1,0x1fa8cba,0x397d6e8,0x230be68,0x043aa14,
+ 0x3d43cdf,0x25c17fa,0x3a3ee55,0x0380564 } },
+ /* 232 */
+ { { 0x275a0a6,0x16bd43a,0x0033d3e,0x2b15e16,0x2512226,0x005d901,
+ 0x26d50fd,0x3bc19bf,0x3b1aeb8,0x02bfb01 },
+ { 0x0bb0a31,0x26559e0,0x1aae7fb,0x330dcc2,0x16f1af3,0x06afce2,
+ 0x13a15a0,0x2ff7645,0x3546e2d,0x029c6e4 } },
+ /* 233 */
+ { { 0x0f593d2,0x384b806,0x122bbf8,0x0a281e0,0x1d1a904,0x2e93cab,
+ 0x0505db0,0x08f6454,0x05c6285,0x014e880 },
+ { 0x3f2b935,0x22d8e79,0x161a07c,0x16b060a,0x02bff97,0x146328b,
+ 0x3ceea77,0x238f61a,0x19b3d58,0x02fd1f4 } },
+ /* 234 */
+ { { 0x17665d5,0x259e9f7,0x0de5672,0x15cbcbd,0x34e3030,0x035240f,
+ 0x0005ae8,0x286d851,0x07f39c9,0x000070b },
+ { 0x1efc6d6,0x2a0051a,0x2724143,0x2a9ef1e,0x0c810bd,0x1e05429,
+ 0x25670ba,0x2e66d7d,0x0e786ff,0x03f6b7e } },
+ /* 235 */
+ { { 0x3c00785,0x232e23f,0x2b67fd3,0x244ed23,0x077fa75,0x3cda3ef,
+ 0x14d055b,0x0f25011,0x24d5aa4,0x00ea0e3 },
+ { 0x297bb9a,0x198ca4f,0x14d9561,0x18d1076,0x39eb933,0x2b6caa0,
+ 0x1591a60,0x0768d45,0x257873e,0x00f36e0 } },
+ /* 236 */
+ { { 0x1e77eab,0x0502a5f,0x0109137,0x0350592,0x3f7e1c5,0x3ac7437,
+ 0x2dcad2c,0x1fee9d8,0x089f1f5,0x0169833 },
+ { 0x0d45673,0x0d8e090,0x065580b,0x065644f,0x11b82be,0x3592dd0,
+ 0x3284b8d,0x23f0015,0x16fdbfd,0x0248bfd } },
+ /* 237 */
+ { { 0x1a129a1,0x1977bb2,0x0e041b2,0x15f30a1,0x0a5b1ce,0x3afef8f,
+ 0x380c46c,0x3358810,0x27df6c5,0x01ca466 },
+ { 0x3b90f9a,0x3d14ea3,0x031b298,0x02e2390,0x2d719c0,0x25bc615,
+ 0x2c0e777,0x0226b8c,0x3803624,0x0179e45 } },
+ /* 238 */
+ { { 0x363cdfb,0x1bb155f,0x24fd5c1,0x1c7c72b,0x28e6a35,0x18165f2,
+ 0x226bea5,0x0beaff3,0x371e24c,0x0138294 },
+ { 0x1765357,0x29034e9,0x22b4276,0x11035ce,0x23c89af,0x074468c,
+ 0x3370ae4,0x013bae3,0x018d566,0x03d7fde } },
+ /* 239 */
+ { { 0x209df21,0x0f8ff86,0x0e47fbf,0x23b99ba,0x126d5d2,0x2722405,
+ 0x16bd0a2,0x1799082,0x0e9533f,0x039077c },
+ { 0x3ba9e3f,0x3f6902c,0x1895305,0x3ac9813,0x3f2340c,0x3c0d9f1,
+ 0x26e1927,0x0557c21,0x16eac4f,0x023b75f } },
+ /* 240 */
+ { { 0x3fc8ff3,0x0770382,0x342fc9a,0x0afa4db,0x314efd8,0x328e07b,
+ 0x016f7cc,0x3ba599c,0x1caed8a,0x0050cb0 },
+ { 0x0b23c26,0x2120a5c,0x3273ec6,0x1cc1cd6,0x2a64fe8,0x2bbc3d6,
+ 0x09f6e5e,0x34b1b8e,0x00b5ac8,0x032bbd2 } },
+ /* 241 */
+ { { 0x1315922,0x1725e1d,0x0ca5524,0x1c4c18f,0x3d82951,0x193bcb2,
+ 0x0e60d0b,0x388dbcf,0x37e8efa,0x0342e85 },
+ { 0x1b3af60,0x26ba3ec,0x220e53a,0x394f4b6,0x01a796a,0x3e7bbca,
+ 0x163605d,0x2b85807,0x17c1c54,0x03cc725 } },
+ /* 242 */
+ { { 0x1cc4597,0x1635492,0x2028c0f,0x2c2eb82,0x2dc5015,0x0d2a052,
+ 0x05fc557,0x1f0ebbf,0x0cb96e1,0x0004d01 },
+ { 0x1a824bf,0x3896172,0x2ed7b29,0x178007a,0x0d59318,0x07bda2b,
+ 0x2ee6826,0x0f9b235,0x04b9193,0x01bcddf } },
+ /* 243 */
+ { { 0x0333fd2,0x0eeb46a,0x15b89f9,0x00968aa,0x2a89302,0x2bdd6b3,
+ 0x1e5037e,0x2541884,0x24ed2d0,0x01b6e8f },
+ { 0x04399cd,0x3be6334,0x3adea48,0x1bb9adc,0x31811c6,0x05fb2bc,
+ 0x360752c,0x3d29dcb,0x3423bec,0x03c4f3c } },
+ /* 244 */
+ { { 0x119e2eb,0x2e7b02a,0x0f68cee,0x257d8b0,0x183a9a1,0x2ae88a6,
+ 0x3a3bb67,0x2eb4f3e,0x1a9274b,0x0320fea },
+ { 0x2fa1ce0,0x346c2d8,0x2fbf0d7,0x3d4d063,0x0e58b60,0x09c1bc1,
+ 0x28ef9e5,0x09a0efe,0x0f45d70,0x02d275c } },
+ /* 245 */
+ { { 0x2d5513b,0x31d443e,0x1e2d914,0x3b2c5d4,0x105f32e,0x27ee756,
+ 0x050418d,0x3c73db6,0x1bb0c30,0x01673eb },
+ { 0x1cb7fd6,0x1eb08d5,0x26a3e16,0x2e20810,0x0249367,0x029e219,
+ 0x2ec58c9,0x12d9fab,0x362354a,0x016eafc } },
+ /* 246 */
+ { { 0x2424865,0x260747b,0x177f37c,0x1e3cb95,0x08b0028,0x2783016,
+ 0x2970f1b,0x323c1c0,0x2a79026,0x0186231 },
+ { 0x0f244da,0x26866f4,0x087306f,0x173ec20,0x31ecced,0x3c84d8d,
+ 0x070f9b9,0x2e764d5,0x075df50,0x0264ff9 } },
+ /* 247 */
+ { { 0x32c3609,0x0c737e6,0x14ea68e,0x300b11b,0x184eb19,0x29dd440,
+ 0x09ec1a9,0x185adeb,0x0664c80,0x0207dd9 },
+ { 0x1fbe978,0x30a969d,0x33561d7,0x34fc60e,0x36743fe,0x00774af,
+ 0x0d1f045,0x018360e,0x12a5fe9,0x01592a0 } },
+ /* 248 */
+ { { 0x2817d1d,0x2993d3e,0x2e0f7a5,0x112faa0,0x255f968,0x355fe6a,
+ 0x3f5a0fc,0x075b2d7,0x3cf00e5,0x0089afc },
+ { 0x32833cf,0x06a7e4b,0x09a8d6d,0x1693d3e,0x320a0a3,0x3cfdfdd,
+ 0x136c498,0x1e0d845,0x347ff25,0x01a1de7 } },
+ /* 249 */
+ { { 0x3043d08,0x030705c,0x20fa79b,0x1d07f00,0x0a54467,0x29b49b4,
+ 0x367e289,0x0b82f4d,0x0d1eb09,0x025ef2c },
+ { 0x32ed3c3,0x1baaa3c,0x3c482ab,0x146ca06,0x3c8a4f1,0x3e85e3c,
+ 0x1bf4f3b,0x1195534,0x3e80a78,0x02a1cbf } },
+ /* 250 */
+ { { 0x32b2086,0x2de4d68,0x3486b1a,0x03a0583,0x2e1eb71,0x2dab9af,
+ 0x10cd913,0x28daa6f,0x3fcb732,0x000a04a },
+ { 0x3605318,0x3f5f2b3,0x2d1da63,0x143f7f5,0x1646e5d,0x040b586,
+ 0x1683982,0x25abe87,0x0c9fe53,0x001ce47 } },
+ /* 251 */
+ { { 0x380d02b,0x055fc22,0x3f7fc50,0x3458a1d,0x26b8333,0x23550ab,
+ 0x0a1af87,0x0a821eb,0x2dc7e6d,0x00d574a },
+ { 0x07386e1,0x3ccd68a,0x3275b41,0x253e390,0x2fd272a,0x1e6627a,
+ 0x2ca2cde,0x0e9e4a1,0x1e37c2a,0x00f70ac } },
+ /* 252 */
+ { { 0x0581352,0x2748701,0x02bed68,0x094dd9e,0x30a00c8,0x3fb5c07,
+ 0x3bd5909,0x211ac80,0x1103ccd,0x0311e1a },
+ { 0x0c768ed,0x29dc209,0x36575db,0x009a107,0x272feea,0x2b33383,
+ 0x313ed56,0x134c9cc,0x168d5bb,0x033310a } },
+ /* 253 */
+ { { 0x17620b9,0x143784f,0x256a94e,0x229664a,0x1d89a5c,0x1d521f2,
+ 0x0076406,0x1c73f70,0x342aa48,0x03851fa },
+ { 0x0f3ae46,0x2ad3bab,0x0fbe274,0x3ed40d4,0x2fd4936,0x232103a,
+ 0x2afe474,0x25b8f7c,0x047080e,0x008e6b0 } },
+ /* 254 */
+ { { 0x3fee8d4,0x347cd4a,0x0fec481,0x33fe9ec,0x0ce80b5,0x33a6bcf,
+ 0x1c4c9e2,0x3967441,0x1a3f5f7,0x03157e8 },
+ { 0x257c227,0x1bc53a0,0x200b318,0x0fcd0af,0x2c5b165,0x2a413ec,
+ 0x2fc998a,0x2da6426,0x19cd4f4,0x0025336 } },
+ /* 255 */
+ { { 0x303beba,0x2072135,0x32918a9,0x140cb3a,0x08631d1,0x0ef527b,
+ 0x05f2c9e,0x2b4ce91,0x0b642ab,0x02e428c },
+ { 0x0a5abf9,0x15013ed,0x3603b46,0x30dd76d,0x3004750,0x28d7627,
+ 0x1a42ccc,0x093ddbe,0x39a1b79,0x00067e2 } },
+};
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_10(sp_point_256* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_256_ecc_mulmod_stripe_10(r, &p256_base, p256_table,
+ k, map, heap);
+}
+
+#endif
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[10];
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_256_point_new_10(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 10, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 10, km);
+
+ err = sp_256_ecc_mulmod_base_10(point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_10(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_10(point, 0, heap);
+
+ return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_256_iszero_10(const sp_digit* a)
+{
+ return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] |
+ a[8] | a[9]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+/* Add 1 to a. (a = a + 1)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_256_add_one_10(sp_digit* a)
+{
+ a[0]++;
+ sp_256_norm_10(a);
+}
+
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 18U) {
+ r[j] &= 0x3ffffff;
+ s = 26U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng Random number generator.
+ * k Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_256_ecc_gen_k_10(WC_RNG* rng, sp_digit* k)
+{
+ int err;
+ byte buf[32];
+
+ do {
+ err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+ if (err == 0) {
+ sp_256_from_bin(k, 10, buf, (int)sizeof(buf));
+ if (sp_256_cmp_10(k, p256_order2) < 0) {
+ sp_256_add_one_10(k);
+ break;
+ }
+ }
+ }
+ while (err == 0);
+
+ return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng Random number generator.
+ * priv Generated private value.
+ * pub Generated public point.
+ * heap Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[10];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_256 inf;
+#endif
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_256* infinity;
+#endif
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_10(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_10(heap, inf, infinity);
+ }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 10, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_gen_k_10(rng, k);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_base_10(point, k, 1, NULL);
+ }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_10(infinity, point, p256_order, 1, NULL);
+ }
+ if (err == MP_OKAY) {
+ if ((sp_256_iszero_10(point->x) == 0) || (sp_256_iszero_10(point->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(k, priv);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_10(point, pub);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_256_point_free_10(infinity, 1, heap);
+#endif
+ sp_256_point_free_10(point, 1, heap);
+
+ return err;
+}
+
+#ifdef HAVE_ECC_DHE
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 32
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_256_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ for (i=0; i<9; i++) {
+ r[i+1] += r[i] >> 26;
+ r[i] &= 0x3ffffff;
+ }
+ j = 256 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<10 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 26) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 26);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv Scalar to multiply the point by.
+ * pub Point to multiply.
+ * out Buffer to hold X ordinate.
+ * outLen On entry, size of the buffer in bytes.
+ * On exit, length of data in buffer in bytes.
+ * heap Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
+ word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[10];
+#endif
+ sp_point_256* point = NULL;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ if (*outLen < 32U) {
+ err = BUFFER_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_10(heap, p, point);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 10, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 10, priv);
+ sp_256_point_from_ecc_point_10(point, pub);
+ err = sp_256_ecc_mulmod_10(point, point, k, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ sp_256_to_bin(point->x, out);
+ *outLen = 32;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_10(point, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_256_mul_d_10(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ t += tb * a[i];
+ r[i] = t & 0x3ffffff;
+ t >>= 26;
+ }
+ r[10] = (sp_digit)t;
+#else
+ int64_t tb = b;
+ int64_t t[10];
+
+ t[ 0] = tb * a[ 0];
+ t[ 1] = tb * a[ 1];
+ t[ 2] = tb * a[ 2];
+ t[ 3] = tb * a[ 3];
+ t[ 4] = tb * a[ 4];
+ t[ 5] = tb * a[ 5];
+ t[ 6] = tb * a[ 6];
+ t[ 7] = tb * a[ 7];
+ t[ 8] = tb * a[ 8];
+ t[ 9] = tb * a[ 9];
+ r[ 0] = (t[ 0] & 0x3ffffff);
+ r[ 1] = (sp_digit)(t[ 0] >> 26) + (t[ 1] & 0x3ffffff);
+ r[ 2] = (sp_digit)(t[ 1] >> 26) + (t[ 2] & 0x3ffffff);
+ r[ 3] = (sp_digit)(t[ 2] >> 26) + (t[ 3] & 0x3ffffff);
+ r[ 4] = (sp_digit)(t[ 3] >> 26) + (t[ 4] & 0x3ffffff);
+ r[ 5] = (sp_digit)(t[ 4] >> 26) + (t[ 5] & 0x3ffffff);
+ r[ 6] = (sp_digit)(t[ 5] >> 26) + (t[ 6] & 0x3ffffff);
+ r[ 7] = (sp_digit)(t[ 6] >> 26) + (t[ 7] & 0x3ffffff);
+ r[ 8] = (sp_digit)(t[ 7] >> 26) + (t[ 8] & 0x3ffffff);
+ r[ 9] = (sp_digit)(t[ 8] >> 26) + (t[ 9] & 0x3ffffff);
+ r[10] = (sp_digit)(t[ 9] >> 26);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SP_DIV_32
+static WC_INLINE sp_digit sp_256_div_word_10(sp_digit d1, sp_digit d0,
+ sp_digit dv)
+{
+ sp_digit d, r, t;
+
+ /* All 26 bits from d1 and top 5 bits from d0. */
+ d = (d1 << 5) | (d0 >> 21);
+ r = d / dv;
+ d -= r * dv;
+ /* Up to 6 bits in r */
+ /* Next 5 bits from d0. */
+ r <<= 5;
+ d <<= 5;
+ d |= (d0 >> 16) & ((1 << 5) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 11 bits in r */
+ /* Next 5 bits from d0. */
+ r <<= 5;
+ d <<= 5;
+ d |= (d0 >> 11) & ((1 << 5) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 16 bits in r */
+ /* Next 5 bits from d0. */
+ r <<= 5;
+ d <<= 5;
+ d |= (d0 >> 6) & ((1 << 5) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 21 bits in r */
+ /* Next 5 bits from d0. */
+ r <<= 5;
+ d <<= 5;
+ d |= (d0 >> 1) & ((1 << 5) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 26 bits in r */
+ /* Remaining 1 bits from d0. */
+ r <<= 1;
+ d <<= 1;
+ d |= d0 & ((1 << 1) - 1);
+ t = d / dv;
+ r += t;
+
+ return r;
+}
+#endif /* WOLFSSL_SP_DIV_32 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Number to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_256_div_10(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ int i;
+#ifndef WOLFSSL_SP_DIV_32
+ int64_t d1;
+#endif
+ sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* td;
+#else
+ sp_digit t1d[20], t2d[10 + 1];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 10 + 1), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = td;
+ t2 = td + 2 * 10;
+#else
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ dv = d[9];
+ XMEMCPY(t1, a, sizeof(*t1) * 2U * 10U);
+ for (i=9; i>=0; i--) {
+ t1[10 + i] += t1[10 + i - 1] >> 26;
+ t1[10 + i - 1] &= 0x3ffffff;
+#ifndef WOLFSSL_SP_DIV_32
+ d1 = t1[10 + i];
+ d1 <<= 26;
+ d1 += t1[10 + i - 1];
+ r1 = (sp_digit)(d1 / dv);
+#else
+ r1 = sp_256_div_word_10(t1[10 + i], t1[10 + i - 1], dv);
+#endif
+
+ sp_256_mul_d_10(t2, d, r1);
+ (void)sp_256_sub_10(&t1[i], &t1[i], t2);
+ t1[10 + i] -= t2[10];
+ t1[10 + i] += t1[10 + i - 1] >> 26;
+ t1[10 + i - 1] &= 0x3ffffff;
+ r1 = (((-t1[10 + i]) << 26) - t1[10 + i - 1]) / dv;
+ r1++;
+ sp_256_mul_d_10(t2, d, r1);
+ (void)sp_256_add_10(&t1[i], &t1[i], t2);
+ t1[10 + i] += t1[10 + i - 1] >> 26;
+ t1[10 + i - 1] &= 0x3ffffff;
+ }
+ t1[10 - 1] += t1[10 - 2] >> 26;
+ t1[10 - 2] &= 0x3ffffff;
+ r1 = t1[10 - 1] / dv;
+
+ sp_256_mul_d_10(t2, d, r1);
+ (void)sp_256_sub_10(t1, t1, t2);
+ XMEMCPY(r, t1, sizeof(*r) * 2U * 10U);
+ for (i=0; i<8; i++) {
+ r[i+1] += r[i] >> 26;
+ r[i] &= 0x3ffffff;
+ }
+ sp_256_cond_add_10(r, r, d, 0 - ((r[9] < 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_256_mod_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_256_div_10(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P256 curve. */
+static const uint32_t p256_order_minus_2[8] = {
+ 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU,0xffffffffU,0xffffffffU,
+ 0x00000000U,0xffffffffU
+};
+#else
+/* The low half of the order-2 of the P256 curve. */
+static const uint32_t p256_order_low[4] = {
+ 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
+ *
+ * r Result of the multiplication.
+ * a First operand of the multiplication.
+ * b Second operand of the multiplication.
+ */
+static void sp_256_mont_mul_order_10(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_256_mul_10(r, a, b);
+ sp_256_mont_reduce_order_10(r, p256_order, p256_mp_order);
+}
+
+/* Square number mod the order of P256 curve. (r = a * a mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_256_mont_sqr_order_10(sp_digit* r, const sp_digit* a)
+{
+ sp_256_sqr_10(r, a);
+ sp_256_mont_reduce_order_10(r, p256_order, p256_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P256 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_256_mont_sqr_n_order_10(sp_digit* r, const sp_digit* a, int n)
+{
+ int i;
+
+ sp_256_mont_sqr_order_10(r, a);
+ for (i=1; i<n; i++) {
+ sp_256_mont_sqr_order_10(r, r);
+ }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
+ * (r = 1 / a mod order)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_256_mont_inv_order_10(sp_digit* r, const sp_digit* a,
+ sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 10);
+ for (i=254; i>=0; i--) {
+ sp_256_mont_sqr_order_10(t, t);
+ if ((p256_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_10(t, t, a);
+ }
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 10U);
+#else
+ sp_digit* t = td;
+ sp_digit* t2 = td + 2 * 10;
+ sp_digit* t3 = td + 4 * 10;
+ int i;
+
+ /* t = a^2 */
+ sp_256_mont_sqr_order_10(t, a);
+ /* t = a^3 = t * a */
+ sp_256_mont_mul_order_10(t, t, a);
+ /* t2= a^c = t ^ 2 ^ 2 */
+ sp_256_mont_sqr_n_order_10(t2, t, 2);
+ /* t3= a^f = t2 * t */
+ sp_256_mont_mul_order_10(t3, t2, t);
+ /* t2= a^f0 = t3 ^ 2 ^ 4 */
+ sp_256_mont_sqr_n_order_10(t2, t3, 4);
+ /* t = a^ff = t2 * t3 */
+ sp_256_mont_mul_order_10(t, t2, t3);
+ /* t3= a^ff00 = t ^ 2 ^ 8 */
+ sp_256_mont_sqr_n_order_10(t2, t, 8);
+ /* t = a^ffff = t2 * t */
+ sp_256_mont_mul_order_10(t, t2, t);
+ /* t2= a^ffff0000 = t ^ 2 ^ 16 */
+ sp_256_mont_sqr_n_order_10(t2, t, 16);
+ /* t = a^ffffffff = t2 * t */
+ sp_256_mont_mul_order_10(t, t2, t);
+ /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */
+ sp_256_mont_sqr_n_order_10(t2, t, 64);
+ /* t2= a^ffffffff00000000ffffffff = t2 * t */
+ sp_256_mont_mul_order_10(t2, t2, t);
+ /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */
+ sp_256_mont_sqr_n_order_10(t2, t2, 32);
+ /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
+ sp_256_mont_mul_order_10(t2, t2, t);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
+ for (i=127; i>=112; i--) {
+ sp_256_mont_sqr_order_10(t2, t2);
+ if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_10(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
+ sp_256_mont_sqr_n_order_10(t2, t2, 4);
+ sp_256_mont_mul_order_10(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
+ for (i=107; i>=64; i--) {
+ sp_256_mont_sqr_order_10(t2, t2);
+ if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_10(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
+ sp_256_mont_sqr_n_order_10(t2, t2, 4);
+ sp_256_mont_mul_order_10(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
+ for (i=59; i>=32; i--) {
+ sp_256_mont_sqr_order_10(t2, t2);
+ if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_10(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
+ sp_256_mont_sqr_n_order_10(t2, t2, 4);
+ sp_256_mont_mul_order_10(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
+ for (i=27; i>=0; i--) {
+ sp_256_mont_sqr_order_10(t2, t2);
+ if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_10(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
+ sp_256_mont_sqr_n_order_10(t2, t2, 4);
+ /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
+ sp_256_mont_mul_order_10(r, t2, t3);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN 64
+#endif
+
+/* Sign the hash using the private key.
+ * e = [hash, 256 bits] from binary
+ * r = (k.G)->x mod order
+ * s = (r * x + e) / k mod order
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+ mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit ed[2*10];
+ sp_digit xd[2*10];
+ sp_digit kd[2*10];
+ sp_digit rd[2*10];
+ sp_digit td[3 * 2*10];
+ sp_point_256 p;
+#endif
+ sp_digit* e = NULL;
+ sp_digit* x = NULL;
+ sp_digit* k = NULL;
+ sp_digit* r = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_256* point = NULL;
+ sp_digit carry;
+ sp_digit* s = NULL;
+ sp_digit* kInv = NULL;
+ int err = MP_OKAY;
+ int32_t c;
+ int i;
+
+ (void)heap;
+
+ err = sp_256_point_new_10(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 10, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ e = d + 0 * 10;
+ x = d + 2 * 10;
+ k = d + 4 * 10;
+ r = d + 6 * 10;
+ tmp = d + 8 * 10;
+#else
+ e = ed;
+ x = xd;
+ k = kd;
+ r = rd;
+ tmp = td;
+#endif
+ s = e;
+ kInv = k;
+
+ if (hashLen > 32U) {
+ hashLen = 32U;
+ }
+
+ sp_256_from_bin(e, 10, hash, (int)hashLen);
+ }
+
+ for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+ sp_256_from_mp(x, 10, priv);
+
+ /* New random point. */
+ if (km == NULL || mp_iszero(km)) {
+ err = sp_256_ecc_gen_k_10(rng, k);
+ }
+ else {
+ sp_256_from_mp(k, 10, km);
+ mp_zero(km);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_base_10(point, k, 1, NULL);
+ }
+
+ if (err == MP_OKAY) {
+ /* r = point->x mod order */
+ XMEMCPY(r, point->x, sizeof(sp_digit) * 10U);
+ sp_256_norm_10(r);
+ c = sp_256_cmp_10(r, p256_order);
+ sp_256_cond_sub_10(r, r, p256_order, 0L - (sp_digit)(c >= 0));
+ sp_256_norm_10(r);
+
+ /* Conv k to Montgomery form (mod order) */
+ sp_256_mul_10(k, k, p256_norm_order);
+ err = sp_256_mod_10(k, k, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_10(k);
+ /* kInv = 1/k mod order */
+ sp_256_mont_inv_order_10(kInv, k, tmp);
+ sp_256_norm_10(kInv);
+
+ /* s = r * x + e */
+ sp_256_mul_10(x, x, r);
+ err = sp_256_mod_10(x, x, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_10(x);
+ carry = sp_256_add_10(s, e, x);
+ sp_256_cond_sub_10(s, s, p256_order, 0 - carry);
+ sp_256_norm_10(s);
+ c = sp_256_cmp_10(s, p256_order);
+ sp_256_cond_sub_10(s, s, p256_order, 0L - (sp_digit)(c >= 0));
+ sp_256_norm_10(s);
+
+ /* s = s * k^-1 mod order */
+ sp_256_mont_mul_order_10(s, s, kInv);
+ sp_256_norm_10(s);
+
+ /* Check that signature is usable. */
+ if (sp_256_iszero_10(s) == 0) {
+ break;
+ }
+ }
+ }
+
+ if (i == 0) {
+ err = RNG_FAILURE_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(r, rm);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(s, sm);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 8 * 10);
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 2U * 10U);
+ XMEMSET(x, 0, sizeof(sp_digit) * 2U * 10U);
+ XMEMSET(k, 0, sizeof(sp_digit) * 2U * 10U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 10U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 10U);
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 10U);
+#endif
+ sp_256_point_free_10(point, 1, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ * e = Truncate(hash, 256)
+ * u1 = e/s mod order
+ * u2 = r/s mod order
+ * r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
+ mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit u1d[2*10];
+ sp_digit u2d[2*10];
+ sp_digit sd[2*10];
+ sp_digit tmpd[2*10 * 5];
+ sp_point_256 p1d;
+ sp_point_256 p2d;
+#endif
+ sp_digit* u1 = NULL;
+ sp_digit* u2 = NULL;
+ sp_digit* s = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_256* p1;
+ sp_point_256* p2 = NULL;
+ sp_digit carry;
+ int32_t c;
+ int err;
+
+ err = sp_256_point_new_10(heap, p1d, p1);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_10(heap, p2d, p2);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 10, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ u1 = d + 0 * 10;
+ u2 = d + 2 * 10;
+ s = d + 4 * 10;
+ tmp = d + 6 * 10;
+#else
+ u1 = u1d;
+ u2 = u2d;
+ s = sd;
+ tmp = tmpd;
+#endif
+
+ if (hashLen > 32U) {
+ hashLen = 32U;
+ }
+
+ sp_256_from_bin(u1, 10, hash, (int)hashLen);
+ sp_256_from_mp(u2, 10, r);
+ sp_256_from_mp(s, 10, sm);
+ sp_256_from_mp(p2->x, 10, pX);
+ sp_256_from_mp(p2->y, 10, pY);
+ sp_256_from_mp(p2->z, 10, pZ);
+
+ {
+ sp_256_mul_10(s, s, p256_norm_order);
+ }
+ err = sp_256_mod_10(s, s, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_10(s);
+ {
+ sp_256_mont_inv_order_10(s, s, tmp);
+ sp_256_mont_mul_order_10(u1, u1, s);
+ sp_256_mont_mul_order_10(u2, u2, s);
+ }
+
+ err = sp_256_ecc_mulmod_base_10(p1, u1, 0, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_10(p2, p2, u2, 0, heap);
+ }
+
+ if (err == MP_OKAY) {
+ {
+ sp_256_proj_point_add_10(p1, p1, p2, tmp);
+ if (sp_256_iszero_10(p1->z)) {
+ if (sp_256_iszero_10(p1->x) && sp_256_iszero_10(p1->y)) {
+ sp_256_proj_point_dbl_10(p1, p2, tmp);
+ }
+ else {
+ /* Y ordinate is not used from here - don't set. */
+ p1->x[0] = 0;
+ p1->x[1] = 0;
+ p1->x[2] = 0;
+ p1->x[3] = 0;
+ p1->x[4] = 0;
+ p1->x[5] = 0;
+ p1->x[6] = 0;
+ p1->x[7] = 0;
+ p1->x[8] = 0;
+ p1->x[9] = 0;
+ XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod));
+ }
+ }
+ }
+
+ /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+ /* Reload r and convert to Montgomery form. */
+ sp_256_from_mp(u2, 10, r);
+ err = sp_256_mod_mul_norm_10(u2, u2, p256_mod);
+ }
+
+ if (err == MP_OKAY) {
+ /* u1 = r.z'.z' mod prime */
+ sp_256_mont_sqr_10(p1->z, p1->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(u1, u2, p1->z, p256_mod, p256_mp_mod);
+ *res = (int)(sp_256_cmp_10(p1->x, u1) == 0);
+ if (*res == 0) {
+ /* Reload r and add order. */
+ sp_256_from_mp(u2, 10, r);
+ carry = sp_256_add_10(u2, u2, p256_order);
+ /* Carry means result is greater than mod and is not valid. */
+ if (carry == 0) {
+ sp_256_norm_10(u2);
+
+ /* Compare with mod and if greater or equal then not valid. */
+ c = sp_256_cmp_10(u2, p256_mod);
+ if (c < 0) {
+ /* Convert to Montogomery form */
+ err = sp_256_mod_mul_norm_10(u2, u2, p256_mod);
+ if (err == MP_OKAY) {
+ /* u1 = (r + 1*order).z'.z' mod prime */
+ sp_256_mont_mul_10(u1, u2, p1->z, p256_mod,
+ p256_mp_mod);
+ *res = (int)(sp_256_cmp_10(p1->x, u1) == 0);
+ }
+ }
+ }
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL)
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_256_point_free_10(p1, 0, heap);
+ sp_256_point_free_10(p2, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point EC point.
+ * heap Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_256_ecc_is_point_10(sp_point_256* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit t1d[2*10];
+ sp_digit t2d[2*10];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 10 * 4, heap, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 10;
+ t2 = d + 2 * 10;
+#else
+ (void)heap;
+
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ sp_256_sqr_10(t1, point->y);
+ (void)sp_256_mod_10(t1, t1, p256_mod);
+ sp_256_sqr_10(t2, point->x);
+ (void)sp_256_mod_10(t2, t2, p256_mod);
+ sp_256_mul_10(t2, t2, point->x);
+ (void)sp_256_mod_10(t2, t2, p256_mod);
+ (void)sp_256_sub_10(t2, p256_mod, t2);
+ sp_256_mont_add_10(t1, t1, t2, p256_mod);
+
+ sp_256_mont_add_10(t1, t1, point->x, p256_mod);
+ sp_256_mont_add_10(t1, t1, point->x, p256_mod);
+ sp_256_mont_add_10(t1, t1, point->x, p256_mod);
+
+ if (sp_256_cmp_10(t1, p256_b) != 0) {
+ err = MP_VAL;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_256(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 pubd;
+#endif
+ sp_point_256* pub;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_256_point_new_10(NULL, pubd, pub);
+ if (err == MP_OKAY) {
+ sp_256_from_mp(pub->x, 10, pX);
+ sp_256_from_mp(pub->y, 10, pY);
+ sp_256_from_bin(pub->z, 10, one, (int)sizeof(one));
+
+ err = sp_256_ecc_is_point_10(pub, NULL);
+ }
+
+ sp_256_point_free_10(pub, 0, NULL);
+
+ return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * privm Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit privd[10];
+ sp_point_256 pubd;
+ sp_point_256 pd;
+#endif
+ sp_digit* priv = NULL;
+ sp_point_256* pub;
+ sp_point_256* p = NULL;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_256_point_new_10(heap, pubd, pub);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_10(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 10, heap,
+ DYNAMIC_TYPE_ECC);
+ if (priv == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ priv = privd;
+#endif
+
+ sp_256_from_mp(pub->x, 10, pX);
+ sp_256_from_mp(pub->y, 10, pY);
+ sp_256_from_bin(pub->z, 10, one, (int)sizeof(one));
+ sp_256_from_mp(priv, 10, privm);
+
+ /* Check point at infinitiy. */
+ if ((sp_256_iszero_10(pub->x) != 0) &&
+ (sp_256_iszero_10(pub->y) != 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check range of X and Y */
+ if (sp_256_cmp_10(pub->x, p256_mod) >= 0 ||
+ sp_256_cmp_10(pub->y, p256_mod) >= 0) {
+ err = ECC_OUT_OF_RANGE_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check point is on curve */
+ err = sp_256_ecc_is_point_10(pub, heap);
+ }
+
+ if (err == MP_OKAY) {
+ /* Point * order = infinity */
+ err = sp_256_ecc_mulmod_10(p, pub, p256_order, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is infinity */
+ if ((sp_256_iszero_10(p->x) == 0) ||
+ (sp_256_iszero_10(p->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Base * private = point */
+ err = sp_256_ecc_mulmod_base_10(p, priv, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is public key */
+ if (sp_256_cmp_10(p->x, pub->x) != 0 ||
+ sp_256_cmp_10(p->y, pub->y) != 0) {
+ err = ECC_PRIV_KEY_E;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (priv != NULL) {
+ XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_10(p, 0, heap);
+ sp_256_point_free_10(pub, 0, heap);
+
+ return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX First EC point's X ordinate.
+ * pY First EC point's Y ordinate.
+ * pZ First EC point's Z ordinate.
+ * qX Second EC point's X ordinate.
+ * qY Second EC point's Y ordinate.
+ * qZ Second EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* qX, mp_int* qY, mp_int* qZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 10 * 5];
+ sp_point_256 pd;
+ sp_point_256 qd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ sp_point_256* q = NULL;
+ int err;
+
+ err = sp_256_point_new_10(NULL, pd, p);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_10(NULL, qd, q);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 10, pX);
+ sp_256_from_mp(p->y, 10, pY);
+ sp_256_from_mp(p->z, 10, pZ);
+ sp_256_from_mp(q->x, 10, qX);
+ sp_256_from_mp(q->y, 10, qY);
+ sp_256_from_mp(q->z, 10, qZ);
+
+ sp_256_proj_point_add_10(p, p, q, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_10(q, 0, NULL);
+ sp_256_point_free_10(p, 0, NULL);
+
+ return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 10 * 2];
+ sp_point_256 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ int err;
+
+ err = sp_256_point_new_10(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 2, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 10, pX);
+ sp_256_from_mp(p->y, 10, pY);
+ sp_256_from_mp(p->z, 10, pZ);
+
+ sp_256_proj_point_dbl_10(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_10(p, 0, NULL);
+
+ return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 10 * 4];
+ sp_point_256 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ int err;
+
+ err = sp_256_point_new_10(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 4, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 10, pX);
+ sp_256_from_mp(p->y, 10, pY);
+ sp_256_from_mp(p->z, 10, pZ);
+
+ sp_256_map_10(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, pX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, pY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, pZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_10(p, 0, NULL);
+
+ return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_256_mont_sqrt_10(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit t1d[2 * 10];
+ sp_digit t2d[2 * 10];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 10;
+ t2 = d + 2 * 10;
+#else
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ {
+ /* t2 = y ^ 0x2 */
+ sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0x3 */
+ sp_256_mont_mul_10(t1, t2, y, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xc */
+ sp_256_mont_sqr_n_10(t2, t1, 2, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xf */
+ sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xf0 */
+ sp_256_mont_sqr_n_10(t2, t1, 4, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xff */
+ sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xff00 */
+ sp_256_mont_sqr_n_10(t2, t1, 8, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffff */
+ sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xffff0000 */
+ sp_256_mont_sqr_n_10(t2, t1, 16, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff */
+ sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000000 */
+ sp_256_mont_sqr_n_10(t1, t1, 32, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001 */
+ sp_256_mont_mul_10(t1, t1, y, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
+ sp_256_mont_sqr_n_10(t1, t1, 96, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
+ sp_256_mont_mul_10(t1, t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_n_10(y, t1, 94, p256_mod, p256_mp_mod);
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm X ordinate.
+ * odd Whether the Y ordinate is odd.
+ * ym Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit xd[2 * 10];
+ sp_digit yd[2 * 10];
+#endif
+ sp_digit* x = NULL;
+ sp_digit* y = NULL;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ x = d + 0 * 10;
+ y = d + 2 * 10;
+#else
+ x = xd;
+ y = yd;
+#endif
+
+ sp_256_from_mp(x, 10, xm);
+ err = sp_256_mod_mul_norm_10(x, x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ /* y = x^3 */
+ {
+ sp_256_mont_sqr_10(y, x, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(y, y, x, p256_mod, p256_mp_mod);
+ }
+ /* y = x^3 - 3x */
+ sp_256_mont_sub_10(y, y, x, p256_mod);
+ sp_256_mont_sub_10(y, y, x, p256_mod);
+ sp_256_mont_sub_10(y, y, x, p256_mod);
+ /* y = x^3 - 3x + b */
+ err = sp_256_mod_mul_norm_10(x, p256_b, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ sp_256_mont_add_10(y, y, x, p256_mod);
+ /* y = sqrt(x^3 - 3x + b) */
+ err = sp_256_mont_sqrt_10(y);
+ }
+ if (err == MP_OKAY) {
+ XMEMSET(y + 10, 0, 10U * sizeof(sp_digit));
+ sp_256_mont_reduce_10(y, p256_mod, p256_mp_mod);
+ if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+ sp_256_mont_sub_10(y, p256_mod, y, p256_mod);
+ }
+
+ err = sp_256_to_mp(y, ym);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+#endif
+#endif /* !WOLFSSL_SP_NO_256 */
+#ifdef WOLFSSL_SP_384
+
+/* Point structure to use. */
+typedef struct sp_point_384 {
+ sp_digit x[2 * 15];
+ sp_digit y[2 * 15];
+ sp_digit z[2 * 15];
+ int infinity;
+} sp_point_384;
+
+/* The modulus (prime) of the curve P384. */
+static const sp_digit p384_mod[15] = {
+ 0x3ffffff,0x000003f,0x0000000,0x3fc0000,0x2ffffff,0x3ffffff,0x3ffffff,
+ 0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x00fffff
+
+};
+/* The Montogmery normalizer for modulus of the curve P384. */
+static const sp_digit p384_norm_mod[15] = {
+ 0x0000001,0x3ffffc0,0x3ffffff,0x003ffff,0x1000000,0x0000000,0x0000000,
+ 0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000
+
+};
+/* The Montogmery multiplier for modulus of the curve P384. */
+static sp_digit p384_mp_mod = 0x000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* The order of the curve P384. */
+static const sp_digit p384_order[15] = {
+ 0x0c52973,0x3065ab3,0x277aece,0x2c922c2,0x3581a0d,0x10dcb77,0x234d81f,
+ 0x3ffff1d,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x00fffff
+
+};
+#endif
+/* The order of the curve P384 minus 2. */
+static const sp_digit p384_order2[15] = {
+ 0x0c52971,0x3065ab3,0x277aece,0x2c922c2,0x3581a0d,0x10dcb77,0x234d81f,
+ 0x3ffff1d,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x3ffffff,0x00fffff
+
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P384. */
+static const sp_digit p384_norm_order[15] = {
+ 0x33ad68d,0x0f9a54c,0x1885131,0x136dd3d,0x0a7e5f2,0x2f23488,0x1cb27e0,
+ 0x00000e2,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000
+
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P384. */
+static sp_digit p384_mp_order = 0x8fdc45;
+#endif
+/* The base point of curve P384. */
+static const sp_point_384 p384_base = {
+ /* X ordinate */
+ {
+ 0x2760ab7,0x1178e1c,0x296c3a5,0x176fd54,0x05502f2,0x0950a8e,0x3741e08,
+ 0x26e6167,0x3628ba7,0x11b874e,0x3320ad7,0x2c71c7b,0x305378e,0x288afa2,0x00aa87c,
+
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Y ordinate */
+ {
+ 0x0ea0e5f,0x0c75f24,0x019d7a4,0x33875fa,0x00a60b1,0x17c2e30,0x1a3113b,
+ 0x051f3a7,0x1bd289a,0x27e3d07,0x1292dc2,0x27a62fe,0x22c6f5d,0x392a589,0x003617d,
+
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Z ordinate */
+ {
+ 0x0000001,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,
+ 0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,
+
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* infinity */
+ 0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p384_b[15] = {
+ 0x3ec2aef,0x1723b74,0x119d2a8,0x23628bb,0x2c65639,0x004e1d6,0x14088f5,
+ 0x104480c,0x06efe81,0x2460767,0x23f82d1,0x23815af,0x2e7e498,0x3e9f88f,0x00b3312
+
+};
+#endif
+
+static int sp_384_point_new_ex_15(void* heap, sp_point_384* sp, sp_point_384** p)
+{
+ int ret = MP_OKAY;
+ (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ (void)sp;
+ *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC);
+#else
+ *p = sp;
+#endif
+ if (*p == NULL) {
+ ret = MEMORY_E;
+ }
+ return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_384_point_new_15(heap, sp, p) sp_384_point_new_ex_15((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_384_point_new_15(heap, sp, p) sp_384_point_new_ex_15((heap), &(sp), &(p))
+#endif
+
+
+static void sp_384_point_free_15(sp_point_384* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+ if (p != NULL) {
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+ XFREE(p, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+/* Clear point data if requested. */
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+#endif
+ (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r The resulting Montgomery form number.
+ * a The number to convert.
+ * m The modulus (prime).
+ * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mod_mul_norm_15(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ int64_t* td;
+#else
+ int64_t td[12];
+ int64_t a32d[12];
+#endif
+ int64_t* t;
+ int64_t* a32;
+ int64_t o;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 12, NULL, DYNAMIC_TYPE_ECC);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = td;
+ a32 = td + 12;
+#else
+ t = td;
+ a32 = a32d;
+#endif
+
+ a32[0] = a[0];
+ a32[0] |= a[1] << 26U;
+ a32[0] &= 0xffffffffL;
+ a32[1] = (sp_digit)(a[1] >> 6);
+ a32[1] |= a[2] << 20U;
+ a32[1] &= 0xffffffffL;
+ a32[2] = (sp_digit)(a[2] >> 12);
+ a32[2] |= a[3] << 14U;
+ a32[2] &= 0xffffffffL;
+ a32[3] = (sp_digit)(a[3] >> 18);
+ a32[3] |= a[4] << 8U;
+ a32[3] &= 0xffffffffL;
+ a32[4] = (sp_digit)(a[4] >> 24);
+ a32[4] |= a[5] << 2U;
+ a32[4] |= a[6] << 28U;
+ a32[4] &= 0xffffffffL;
+ a32[5] = (sp_digit)(a[6] >> 4);
+ a32[5] |= a[7] << 22U;
+ a32[5] &= 0xffffffffL;
+ a32[6] = (sp_digit)(a[7] >> 10);
+ a32[6] |= a[8] << 16U;
+ a32[6] &= 0xffffffffL;
+ a32[7] = (sp_digit)(a[8] >> 16);
+ a32[7] |= a[9] << 10U;
+ a32[7] &= 0xffffffffL;
+ a32[8] = (sp_digit)(a[9] >> 22);
+ a32[8] |= a[10] << 4U;
+ a32[8] |= a[11] << 30U;
+ a32[8] &= 0xffffffffL;
+ a32[9] = (sp_digit)(a[11] >> 2);
+ a32[9] |= a[12] << 24U;
+ a32[9] &= 0xffffffffL;
+ a32[10] = (sp_digit)(a[12] >> 8);
+ a32[10] |= a[13] << 18U;
+ a32[10] &= 0xffffffffL;
+ a32[11] = (sp_digit)(a[13] >> 14);
+ a32[11] |= a[14] << 12U;
+ a32[11] &= 0xffffffffL;
+
+ /* 1 0 0 0 0 0 0 0 1 1 0 -1 */
+ t[0] = 0 + a32[0] + a32[8] + a32[9] - a32[11];
+ /* -1 1 0 0 0 0 0 0 -1 0 1 1 */
+ t[1] = 0 - a32[0] + a32[1] - a32[8] + a32[10] + a32[11];
+ /* 0 -1 1 0 0 0 0 0 0 -1 0 1 */
+ t[2] = 0 - a32[1] + a32[2] - a32[9] + a32[11];
+ /* 1 0 -1 1 0 0 0 0 1 1 -1 -1 */
+ t[3] = 0 + a32[0] - a32[2] + a32[3] + a32[8] + a32[9] - a32[10] - a32[11];
+ /* 1 1 0 -1 1 0 0 0 1 2 1 -2 */
+ t[4] = 0 + a32[0] + a32[1] - a32[3] + a32[4] + a32[8] + 2 * a32[9] + a32[10] - 2 * a32[11];
+ /* 0 1 1 0 -1 1 0 0 0 1 2 1 */
+ t[5] = 0 + a32[1] + a32[2] - a32[4] + a32[5] + a32[9] + 2 * a32[10] + a32[11];
+ /* 0 0 1 1 0 -1 1 0 0 0 1 2 */
+ t[6] = 0 + a32[2] + a32[3] - a32[5] + a32[6] + a32[10] + 2 * a32[11];
+ /* 0 0 0 1 1 0 -1 1 0 0 0 1 */
+ t[7] = 0 + a32[3] + a32[4] - a32[6] + a32[7] + a32[11];
+ /* 0 0 0 0 1 1 0 -1 1 0 0 0 */
+ t[8] = 0 + a32[4] + a32[5] - a32[7] + a32[8];
+ /* 0 0 0 0 0 1 1 0 -1 1 0 0 */
+ t[9] = 0 + a32[5] + a32[6] - a32[8] + a32[9];
+ /* 0 0 0 0 0 0 1 1 0 -1 1 0 */
+ t[10] = 0 + a32[6] + a32[7] - a32[9] + a32[10];
+ /* 0 0 0 0 0 0 0 1 1 0 -1 1 */
+ t[11] = 0 + a32[7] + a32[8] - a32[10] + a32[11];
+
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+ t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+ t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+ t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+ o = t[11] >> 32; t[11] &= 0xffffffff;
+ t[0] += o;
+ t[1] -= o;
+ t[3] += o;
+ t[4] += o;
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+ t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+ t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+ t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+
+ r[0] = (sp_digit)(t[0]) & 0x3ffffffL;
+ r[1] = (sp_digit)(t[0] >> 26U);
+ r[1] |= t[1] << 6U;
+ r[1] &= 0x3ffffffL;
+ r[2] = (sp_digit)(t[1] >> 20U);
+ r[2] |= t[2] << 12U;
+ r[2] &= 0x3ffffffL;
+ r[3] = (sp_digit)(t[2] >> 14U);
+ r[3] |= t[3] << 18U;
+ r[3] &= 0x3ffffffL;
+ r[4] = (sp_digit)(t[3] >> 8U);
+ r[4] |= t[4] << 24U;
+ r[4] &= 0x3ffffffL;
+ r[5] = (sp_digit)(t[4] >> 2U) & 0x3ffffffL;
+ r[6] = (sp_digit)(t[4] >> 28U);
+ r[6] |= t[5] << 4U;
+ r[6] &= 0x3ffffffL;
+ r[7] = (sp_digit)(t[5] >> 22U);
+ r[7] |= t[6] << 10U;
+ r[7] &= 0x3ffffffL;
+ r[8] = (sp_digit)(t[6] >> 16U);
+ r[8] |= t[7] << 16U;
+ r[8] &= 0x3ffffffL;
+ r[9] = (sp_digit)(t[7] >> 10U);
+ r[9] |= t[8] << 22U;
+ r[9] &= 0x3ffffffL;
+ r[10] = (sp_digit)(t[8] >> 4U) & 0x3ffffffL;
+ r[11] = (sp_digit)(t[8] >> 30U);
+ r[11] |= t[9] << 2U;
+ r[11] &= 0x3ffffffL;
+ r[12] = (sp_digit)(t[9] >> 24U);
+ r[12] |= t[10] << 8U;
+ r[12] &= 0x3ffffffL;
+ r[13] = (sp_digit)(t[10] >> 18U);
+ r[13] |= t[11] << 14U;
+ r[13] &= 0x3ffffffL;
+ r[14] = (sp_digit)(t[11] >> 12U);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL)
+ XFREE(td, NULL, DYNAMIC_TYPE_ECC);
+#endif
+
+ return err;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 26
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 26
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0x3ffffff;
+ s = 26U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 26U) <= (word32)DIGIT_BIT) {
+ s += 26U;
+ r[j] &= 0x3ffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 26) {
+ r[j] &= 0x3ffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 26 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_384.
+ *
+ * p Point of type sp_point_384 (result).
+ * pm Point of type ecc_point.
+ */
+static void sp_384_point_from_ecc_point_15(sp_point_384* p, const ecc_point* pm)
+{
+ XMEMSET(p->x, 0, sizeof(p->x));
+ XMEMSET(p->y, 0, sizeof(p->y));
+ XMEMSET(p->z, 0, sizeof(p->z));
+ sp_384_from_mp(p->x, 15, pm->x);
+ sp_384_from_mp(p->y, 15, pm->y);
+ sp_384_from_mp(p->z, 15, pm->z);
+ p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_384_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 26
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 15);
+ r->used = 15;
+ mp_clamp(r);
+#elif DIGIT_BIT < 26
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 15; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 26) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 26 - s;
+ }
+ r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 15; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 26 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 26 - s;
+ }
+ else {
+ s += 26;
+ }
+ }
+ r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Convert a point of type sp_point_384 to type ecc_point.
+ *
+ * p Point of type sp_point_384.
+ * pm Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_384_point_to_ecc_point_15(const sp_point_384* p, ecc_point* pm)
+{
+ int err;
+
+ err = sp_384_to_mp(p->x, pm->x);
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, pm->y);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, pm->z);
+ }
+
+ return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_384_mul_15(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i, j, k;
+ int64_t c;
+
+ c = ((int64_t)a[14]) * b[14];
+ r[29] = (sp_digit)(c >> 26);
+ c = (c & 0x3ffffff) << 26;
+ for (k = 27; k >= 0; k--) {
+ for (i = 14; i >= 0; i--) {
+ j = k - i;
+ if (j >= 15) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int64_t)a[i]) * b[j];
+ }
+ r[k + 2] += c >> 52;
+ r[k + 1] = (c >> 26) & 0x3ffffff;
+ c = (c & 0x3ffffff) << 26;
+ }
+ r[0] = (sp_digit)(c >> 26);
+}
+
+#else
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_384_mul_15(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int64_t t0 = ((int64_t)a[ 0]) * b[ 0];
+ int64_t t1 = ((int64_t)a[ 0]) * b[ 1]
+ + ((int64_t)a[ 1]) * b[ 0];
+ int64_t t2 = ((int64_t)a[ 0]) * b[ 2]
+ + ((int64_t)a[ 1]) * b[ 1]
+ + ((int64_t)a[ 2]) * b[ 0];
+ int64_t t3 = ((int64_t)a[ 0]) * b[ 3]
+ + ((int64_t)a[ 1]) * b[ 2]
+ + ((int64_t)a[ 2]) * b[ 1]
+ + ((int64_t)a[ 3]) * b[ 0];
+ int64_t t4 = ((int64_t)a[ 0]) * b[ 4]
+ + ((int64_t)a[ 1]) * b[ 3]
+ + ((int64_t)a[ 2]) * b[ 2]
+ + ((int64_t)a[ 3]) * b[ 1]
+ + ((int64_t)a[ 4]) * b[ 0];
+ int64_t t5 = ((int64_t)a[ 0]) * b[ 5]
+ + ((int64_t)a[ 1]) * b[ 4]
+ + ((int64_t)a[ 2]) * b[ 3]
+ + ((int64_t)a[ 3]) * b[ 2]
+ + ((int64_t)a[ 4]) * b[ 1]
+ + ((int64_t)a[ 5]) * b[ 0];
+ int64_t t6 = ((int64_t)a[ 0]) * b[ 6]
+ + ((int64_t)a[ 1]) * b[ 5]
+ + ((int64_t)a[ 2]) * b[ 4]
+ + ((int64_t)a[ 3]) * b[ 3]
+ + ((int64_t)a[ 4]) * b[ 2]
+ + ((int64_t)a[ 5]) * b[ 1]
+ + ((int64_t)a[ 6]) * b[ 0];
+ int64_t t7 = ((int64_t)a[ 0]) * b[ 7]
+ + ((int64_t)a[ 1]) * b[ 6]
+ + ((int64_t)a[ 2]) * b[ 5]
+ + ((int64_t)a[ 3]) * b[ 4]
+ + ((int64_t)a[ 4]) * b[ 3]
+ + ((int64_t)a[ 5]) * b[ 2]
+ + ((int64_t)a[ 6]) * b[ 1]
+ + ((int64_t)a[ 7]) * b[ 0];
+ int64_t t8 = ((int64_t)a[ 0]) * b[ 8]
+ + ((int64_t)a[ 1]) * b[ 7]
+ + ((int64_t)a[ 2]) * b[ 6]
+ + ((int64_t)a[ 3]) * b[ 5]
+ + ((int64_t)a[ 4]) * b[ 4]
+ + ((int64_t)a[ 5]) * b[ 3]
+ + ((int64_t)a[ 6]) * b[ 2]
+ + ((int64_t)a[ 7]) * b[ 1]
+ + ((int64_t)a[ 8]) * b[ 0];
+ int64_t t9 = ((int64_t)a[ 0]) * b[ 9]
+ + ((int64_t)a[ 1]) * b[ 8]
+ + ((int64_t)a[ 2]) * b[ 7]
+ + ((int64_t)a[ 3]) * b[ 6]
+ + ((int64_t)a[ 4]) * b[ 5]
+ + ((int64_t)a[ 5]) * b[ 4]
+ + ((int64_t)a[ 6]) * b[ 3]
+ + ((int64_t)a[ 7]) * b[ 2]
+ + ((int64_t)a[ 8]) * b[ 1]
+ + ((int64_t)a[ 9]) * b[ 0];
+ int64_t t10 = ((int64_t)a[ 0]) * b[10]
+ + ((int64_t)a[ 1]) * b[ 9]
+ + ((int64_t)a[ 2]) * b[ 8]
+ + ((int64_t)a[ 3]) * b[ 7]
+ + ((int64_t)a[ 4]) * b[ 6]
+ + ((int64_t)a[ 5]) * b[ 5]
+ + ((int64_t)a[ 6]) * b[ 4]
+ + ((int64_t)a[ 7]) * b[ 3]
+ + ((int64_t)a[ 8]) * b[ 2]
+ + ((int64_t)a[ 9]) * b[ 1]
+ + ((int64_t)a[10]) * b[ 0];
+ int64_t t11 = ((int64_t)a[ 0]) * b[11]
+ + ((int64_t)a[ 1]) * b[10]
+ + ((int64_t)a[ 2]) * b[ 9]
+ + ((int64_t)a[ 3]) * b[ 8]
+ + ((int64_t)a[ 4]) * b[ 7]
+ + ((int64_t)a[ 5]) * b[ 6]
+ + ((int64_t)a[ 6]) * b[ 5]
+ + ((int64_t)a[ 7]) * b[ 4]
+ + ((int64_t)a[ 8]) * b[ 3]
+ + ((int64_t)a[ 9]) * b[ 2]
+ + ((int64_t)a[10]) * b[ 1]
+ + ((int64_t)a[11]) * b[ 0];
+ int64_t t12 = ((int64_t)a[ 0]) * b[12]
+ + ((int64_t)a[ 1]) * b[11]
+ + ((int64_t)a[ 2]) * b[10]
+ + ((int64_t)a[ 3]) * b[ 9]
+ + ((int64_t)a[ 4]) * b[ 8]
+ + ((int64_t)a[ 5]) * b[ 7]
+ + ((int64_t)a[ 6]) * b[ 6]
+ + ((int64_t)a[ 7]) * b[ 5]
+ + ((int64_t)a[ 8]) * b[ 4]
+ + ((int64_t)a[ 9]) * b[ 3]
+ + ((int64_t)a[10]) * b[ 2]
+ + ((int64_t)a[11]) * b[ 1]
+ + ((int64_t)a[12]) * b[ 0];
+ int64_t t13 = ((int64_t)a[ 0]) * b[13]
+ + ((int64_t)a[ 1]) * b[12]
+ + ((int64_t)a[ 2]) * b[11]
+ + ((int64_t)a[ 3]) * b[10]
+ + ((int64_t)a[ 4]) * b[ 9]
+ + ((int64_t)a[ 5]) * b[ 8]
+ + ((int64_t)a[ 6]) * b[ 7]
+ + ((int64_t)a[ 7]) * b[ 6]
+ + ((int64_t)a[ 8]) * b[ 5]
+ + ((int64_t)a[ 9]) * b[ 4]
+ + ((int64_t)a[10]) * b[ 3]
+ + ((int64_t)a[11]) * b[ 2]
+ + ((int64_t)a[12]) * b[ 1]
+ + ((int64_t)a[13]) * b[ 0];
+ int64_t t14 = ((int64_t)a[ 0]) * b[14]
+ + ((int64_t)a[ 1]) * b[13]
+ + ((int64_t)a[ 2]) * b[12]
+ + ((int64_t)a[ 3]) * b[11]
+ + ((int64_t)a[ 4]) * b[10]
+ + ((int64_t)a[ 5]) * b[ 9]
+ + ((int64_t)a[ 6]) * b[ 8]
+ + ((int64_t)a[ 7]) * b[ 7]
+ + ((int64_t)a[ 8]) * b[ 6]
+ + ((int64_t)a[ 9]) * b[ 5]
+ + ((int64_t)a[10]) * b[ 4]
+ + ((int64_t)a[11]) * b[ 3]
+ + ((int64_t)a[12]) * b[ 2]
+ + ((int64_t)a[13]) * b[ 1]
+ + ((int64_t)a[14]) * b[ 0];
+ int64_t t15 = ((int64_t)a[ 1]) * b[14]
+ + ((int64_t)a[ 2]) * b[13]
+ + ((int64_t)a[ 3]) * b[12]
+ + ((int64_t)a[ 4]) * b[11]
+ + ((int64_t)a[ 5]) * b[10]
+ + ((int64_t)a[ 6]) * b[ 9]
+ + ((int64_t)a[ 7]) * b[ 8]
+ + ((int64_t)a[ 8]) * b[ 7]
+ + ((int64_t)a[ 9]) * b[ 6]
+ + ((int64_t)a[10]) * b[ 5]
+ + ((int64_t)a[11]) * b[ 4]
+ + ((int64_t)a[12]) * b[ 3]
+ + ((int64_t)a[13]) * b[ 2]
+ + ((int64_t)a[14]) * b[ 1];
+ int64_t t16 = ((int64_t)a[ 2]) * b[14]
+ + ((int64_t)a[ 3]) * b[13]
+ + ((int64_t)a[ 4]) * b[12]
+ + ((int64_t)a[ 5]) * b[11]
+ + ((int64_t)a[ 6]) * b[10]
+ + ((int64_t)a[ 7]) * b[ 9]
+ + ((int64_t)a[ 8]) * b[ 8]
+ + ((int64_t)a[ 9]) * b[ 7]
+ + ((int64_t)a[10]) * b[ 6]
+ + ((int64_t)a[11]) * b[ 5]
+ + ((int64_t)a[12]) * b[ 4]
+ + ((int64_t)a[13]) * b[ 3]
+ + ((int64_t)a[14]) * b[ 2];
+ int64_t t17 = ((int64_t)a[ 3]) * b[14]
+ + ((int64_t)a[ 4]) * b[13]
+ + ((int64_t)a[ 5]) * b[12]
+ + ((int64_t)a[ 6]) * b[11]
+ + ((int64_t)a[ 7]) * b[10]
+ + ((int64_t)a[ 8]) * b[ 9]
+ + ((int64_t)a[ 9]) * b[ 8]
+ + ((int64_t)a[10]) * b[ 7]
+ + ((int64_t)a[11]) * b[ 6]
+ + ((int64_t)a[12]) * b[ 5]
+ + ((int64_t)a[13]) * b[ 4]
+ + ((int64_t)a[14]) * b[ 3];
+ int64_t t18 = ((int64_t)a[ 4]) * b[14]
+ + ((int64_t)a[ 5]) * b[13]
+ + ((int64_t)a[ 6]) * b[12]
+ + ((int64_t)a[ 7]) * b[11]
+ + ((int64_t)a[ 8]) * b[10]
+ + ((int64_t)a[ 9]) * b[ 9]
+ + ((int64_t)a[10]) * b[ 8]
+ + ((int64_t)a[11]) * b[ 7]
+ + ((int64_t)a[12]) * b[ 6]
+ + ((int64_t)a[13]) * b[ 5]
+ + ((int64_t)a[14]) * b[ 4];
+ int64_t t19 = ((int64_t)a[ 5]) * b[14]
+ + ((int64_t)a[ 6]) * b[13]
+ + ((int64_t)a[ 7]) * b[12]
+ + ((int64_t)a[ 8]) * b[11]
+ + ((int64_t)a[ 9]) * b[10]
+ + ((int64_t)a[10]) * b[ 9]
+ + ((int64_t)a[11]) * b[ 8]
+ + ((int64_t)a[12]) * b[ 7]
+ + ((int64_t)a[13]) * b[ 6]
+ + ((int64_t)a[14]) * b[ 5];
+ int64_t t20 = ((int64_t)a[ 6]) * b[14]
+ + ((int64_t)a[ 7]) * b[13]
+ + ((int64_t)a[ 8]) * b[12]
+ + ((int64_t)a[ 9]) * b[11]
+ + ((int64_t)a[10]) * b[10]
+ + ((int64_t)a[11]) * b[ 9]
+ + ((int64_t)a[12]) * b[ 8]
+ + ((int64_t)a[13]) * b[ 7]
+ + ((int64_t)a[14]) * b[ 6];
+ int64_t t21 = ((int64_t)a[ 7]) * b[14]
+ + ((int64_t)a[ 8]) * b[13]
+ + ((int64_t)a[ 9]) * b[12]
+ + ((int64_t)a[10]) * b[11]
+ + ((int64_t)a[11]) * b[10]
+ + ((int64_t)a[12]) * b[ 9]
+ + ((int64_t)a[13]) * b[ 8]
+ + ((int64_t)a[14]) * b[ 7];
+ int64_t t22 = ((int64_t)a[ 8]) * b[14]
+ + ((int64_t)a[ 9]) * b[13]
+ + ((int64_t)a[10]) * b[12]
+ + ((int64_t)a[11]) * b[11]
+ + ((int64_t)a[12]) * b[10]
+ + ((int64_t)a[13]) * b[ 9]
+ + ((int64_t)a[14]) * b[ 8];
+ int64_t t23 = ((int64_t)a[ 9]) * b[14]
+ + ((int64_t)a[10]) * b[13]
+ + ((int64_t)a[11]) * b[12]
+ + ((int64_t)a[12]) * b[11]
+ + ((int64_t)a[13]) * b[10]
+ + ((int64_t)a[14]) * b[ 9];
+ int64_t t24 = ((int64_t)a[10]) * b[14]
+ + ((int64_t)a[11]) * b[13]
+ + ((int64_t)a[12]) * b[12]
+ + ((int64_t)a[13]) * b[11]
+ + ((int64_t)a[14]) * b[10];
+ int64_t t25 = ((int64_t)a[11]) * b[14]
+ + ((int64_t)a[12]) * b[13]
+ + ((int64_t)a[13]) * b[12]
+ + ((int64_t)a[14]) * b[11];
+ int64_t t26 = ((int64_t)a[12]) * b[14]
+ + ((int64_t)a[13]) * b[13]
+ + ((int64_t)a[14]) * b[12];
+ int64_t t27 = ((int64_t)a[13]) * b[14]
+ + ((int64_t)a[14]) * b[13];
+ int64_t t28 = ((int64_t)a[14]) * b[14];
+
+ t1 += t0 >> 26; r[ 0] = t0 & 0x3ffffff;
+ t2 += t1 >> 26; r[ 1] = t1 & 0x3ffffff;
+ t3 += t2 >> 26; r[ 2] = t2 & 0x3ffffff;
+ t4 += t3 >> 26; r[ 3] = t3 & 0x3ffffff;
+ t5 += t4 >> 26; r[ 4] = t4 & 0x3ffffff;
+ t6 += t5 >> 26; r[ 5] = t5 & 0x3ffffff;
+ t7 += t6 >> 26; r[ 6] = t6 & 0x3ffffff;
+ t8 += t7 >> 26; r[ 7] = t7 & 0x3ffffff;
+ t9 += t8 >> 26; r[ 8] = t8 & 0x3ffffff;
+ t10 += t9 >> 26; r[ 9] = t9 & 0x3ffffff;
+ t11 += t10 >> 26; r[10] = t10 & 0x3ffffff;
+ t12 += t11 >> 26; r[11] = t11 & 0x3ffffff;
+ t13 += t12 >> 26; r[12] = t12 & 0x3ffffff;
+ t14 += t13 >> 26; r[13] = t13 & 0x3ffffff;
+ t15 += t14 >> 26; r[14] = t14 & 0x3ffffff;
+ t16 += t15 >> 26; r[15] = t15 & 0x3ffffff;
+ t17 += t16 >> 26; r[16] = t16 & 0x3ffffff;
+ t18 += t17 >> 26; r[17] = t17 & 0x3ffffff;
+ t19 += t18 >> 26; r[18] = t18 & 0x3ffffff;
+ t20 += t19 >> 26; r[19] = t19 & 0x3ffffff;
+ t21 += t20 >> 26; r[20] = t20 & 0x3ffffff;
+ t22 += t21 >> 26; r[21] = t21 & 0x3ffffff;
+ t23 += t22 >> 26; r[22] = t22 & 0x3ffffff;
+ t24 += t23 >> 26; r[23] = t23 & 0x3ffffff;
+ t25 += t24 >> 26; r[24] = t24 & 0x3ffffff;
+ t26 += t25 >> 26; r[25] = t25 & 0x3ffffff;
+ t27 += t26 >> 26; r[26] = t26 & 0x3ffffff;
+ t28 += t27 >> 26; r[27] = t27 & 0x3ffffff;
+ r[29] = (sp_digit)(t28 >> 26);
+ r[28] = t28 & 0x3ffffff;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#define sp_384_mont_reduce_order_15 sp_384_mont_reduce_15
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_384_cmp_15(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=14; i>=0; i--) {
+ r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#else
+ r |= (a[14] - b[14]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[13] - b[13]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[12] - b[12]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[11] - b[11]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[10] - b[10]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 9] - b[ 9]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 8] - b[ 8]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 7] - b[ 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 6] - b[ 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 5] - b[ 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 4] - b[ 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 3] - b[ 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 2] - b[ 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 1] - b[ 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 0] - b[ 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+#endif /* WOLFSSL_SP_SMALL */
+
+ return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static void sp_384_cond_sub_15(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 15; i++) {
+ r[i] = a[i] - (b[i] & m);
+ }
+#else
+ r[ 0] = a[ 0] - (b[ 0] & m);
+ r[ 1] = a[ 1] - (b[ 1] & m);
+ r[ 2] = a[ 2] - (b[ 2] & m);
+ r[ 3] = a[ 3] - (b[ 3] & m);
+ r[ 4] = a[ 4] - (b[ 4] & m);
+ r[ 5] = a[ 5] - (b[ 5] & m);
+ r[ 6] = a[ 6] - (b[ 6] & m);
+ r[ 7] = a[ 7] - (b[ 7] & m);
+ r[ 8] = a[ 8] - (b[ 8] & m);
+ r[ 9] = a[ 9] - (b[ 9] & m);
+ r[10] = a[10] - (b[10] & m);
+ r[11] = a[11] - (b[11] & m);
+ r[12] = a[12] - (b[12] & m);
+ r[13] = a[13] - (b[13] & m);
+ r[14] = a[14] - (b[14] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_384_mul_add_15(sp_digit* r, const sp_digit* a,
+ const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 15; i++) {
+ t += (tb * a[i]) + r[i];
+ r[i] = t & 0x3ffffff;
+ t >>= 26;
+ }
+ r[15] += t;
+#else
+ int64_t tb = b;
+ int64_t t[15];
+
+ t[ 0] = tb * a[ 0];
+ t[ 1] = tb * a[ 1];
+ t[ 2] = tb * a[ 2];
+ t[ 3] = tb * a[ 3];
+ t[ 4] = tb * a[ 4];
+ t[ 5] = tb * a[ 5];
+ t[ 6] = tb * a[ 6];
+ t[ 7] = tb * a[ 7];
+ t[ 8] = tb * a[ 8];
+ t[ 9] = tb * a[ 9];
+ t[10] = tb * a[10];
+ t[11] = tb * a[11];
+ t[12] = tb * a[12];
+ t[13] = tb * a[13];
+ t[14] = tb * a[14];
+ r[ 0] += (sp_digit) (t[ 0] & 0x3ffffff);
+ r[ 1] += (sp_digit)((t[ 0] >> 26) + (t[ 1] & 0x3ffffff));
+ r[ 2] += (sp_digit)((t[ 1] >> 26) + (t[ 2] & 0x3ffffff));
+ r[ 3] += (sp_digit)((t[ 2] >> 26) + (t[ 3] & 0x3ffffff));
+ r[ 4] += (sp_digit)((t[ 3] >> 26) + (t[ 4] & 0x3ffffff));
+ r[ 5] += (sp_digit)((t[ 4] >> 26) + (t[ 5] & 0x3ffffff));
+ r[ 6] += (sp_digit)((t[ 5] >> 26) + (t[ 6] & 0x3ffffff));
+ r[ 7] += (sp_digit)((t[ 6] >> 26) + (t[ 7] & 0x3ffffff));
+ r[ 8] += (sp_digit)((t[ 7] >> 26) + (t[ 8] & 0x3ffffff));
+ r[ 9] += (sp_digit)((t[ 8] >> 26) + (t[ 9] & 0x3ffffff));
+ r[10] += (sp_digit)((t[ 9] >> 26) + (t[10] & 0x3ffffff));
+ r[11] += (sp_digit)((t[10] >> 26) + (t[11] & 0x3ffffff));
+ r[12] += (sp_digit)((t[11] >> 26) + (t[12] & 0x3ffffff));
+ r[13] += (sp_digit)((t[12] >> 26) + (t[13] & 0x3ffffff));
+ r[14] += (sp_digit)((t[13] >> 26) + (t[14] & 0x3ffffff));
+ r[15] += (sp_digit) (t[14] >> 26);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 26.
+ *
+ * a Array of sp_digit to normalize.
+ */
+static void sp_384_norm_15(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ for (i = 0; i < 14; i++) {
+ a[i+1] += a[i] >> 26;
+ a[i] &= 0x3ffffff;
+ }
+#else
+ a[1] += a[0] >> 26; a[0] &= 0x3ffffff;
+ a[2] += a[1] >> 26; a[1] &= 0x3ffffff;
+ a[3] += a[2] >> 26; a[2] &= 0x3ffffff;
+ a[4] += a[3] >> 26; a[3] &= 0x3ffffff;
+ a[5] += a[4] >> 26; a[4] &= 0x3ffffff;
+ a[6] += a[5] >> 26; a[5] &= 0x3ffffff;
+ a[7] += a[6] >> 26; a[6] &= 0x3ffffff;
+ a[8] += a[7] >> 26; a[7] &= 0x3ffffff;
+ a[9] += a[8] >> 26; a[8] &= 0x3ffffff;
+ a[10] += a[9] >> 26; a[9] &= 0x3ffffff;
+ a[11] += a[10] >> 26; a[10] &= 0x3ffffff;
+ a[12] += a[11] >> 26; a[11] &= 0x3ffffff;
+ a[13] += a[12] >> 26; a[12] &= 0x3ffffff;
+ a[14] += a[13] >> 26; a[13] &= 0x3ffffff;
+#endif
+}
+
+/* Shift the result in the high 384 bits down to the bottom.
+ *
+ * r A single precision number.
+ * a A single precision number.
+ */
+static void sp_384_mont_shift_15(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ int64_t n = a[14] >> 20;
+ n += ((int64_t)a[15]) << 6;
+
+ for (i = 0; i < 14; i++) {
+ r[i] = n & 0x3ffffff;
+ n >>= 26;
+ n += ((int64_t)a[16 + i]) << 6;
+ }
+ r[14] = (sp_digit)n;
+#else
+ int64_t n = a[14] >> 20;
+ n += ((int64_t)a[15]) << 6;
+ r[ 0] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[16]) << 6;
+ r[ 1] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[17]) << 6;
+ r[ 2] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[18]) << 6;
+ r[ 3] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[19]) << 6;
+ r[ 4] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[20]) << 6;
+ r[ 5] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[21]) << 6;
+ r[ 6] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[22]) << 6;
+ r[ 7] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[23]) << 6;
+ r[ 8] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[24]) << 6;
+ r[ 9] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[25]) << 6;
+ r[10] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[26]) << 6;
+ r[11] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[27]) << 6;
+ r[12] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[28]) << 6;
+ r[13] = n & 0x3ffffff; n >>= 26; n += ((int64_t)a[29]) << 6;
+ r[14] = (sp_digit)n;
+#endif /* WOLFSSL_SP_SMALL */
+ XMEMSET(&r[15], 0, sizeof(*r) * 15U);
+}
+
+/* Reduce the number back to 384 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_384_mont_reduce_15(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+ int i;
+ sp_digit mu;
+
+ sp_384_norm_15(a + 15);
+
+ for (i=0; i<14; i++) {
+ mu = (a[i] * mp) & 0x3ffffff;
+ sp_384_mul_add_15(a+i, m, mu);
+ a[i+1] += a[i] >> 26;
+ }
+ mu = (a[i] * mp) & 0xfffffL;
+ sp_384_mul_add_15(a+i, m, mu);
+ a[i+1] += a[i] >> 26;
+ a[i] &= 0x3ffffff;
+
+ sp_384_mont_shift_15(a, a);
+ sp_384_cond_sub_15(a, a, m, 0 - (((a[14] >> 20) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_15(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_mul_15(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_384_mul_15(r, a, b);
+ sp_384_mont_reduce_15(r, m, mp);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_384_sqr_15(sp_digit* r, const sp_digit* a)
+{
+ int i, j, k;
+ int64_t c;
+
+ c = ((int64_t)a[14]) * a[14];
+ r[29] = (sp_digit)(c >> 26);
+ c = (c & 0x3ffffff) << 26;
+ for (k = 27; k >= 0; k--) {
+ for (i = 14; i >= 0; i--) {
+ j = k - i;
+ if (j >= 15 || i <= j) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int64_t)a[i]) * a[j] * 2;
+ }
+ if (i == j) {
+ c += ((int64_t)a[i]) * a[i];
+ }
+
+ r[k + 2] += c >> 52;
+ r[k + 1] = (c >> 26) & 0x3ffffff;
+ c = (c & 0x3ffffff) << 26;
+ }
+ r[0] = (sp_digit)(c >> 26);
+}
+
+#else
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_384_sqr_15(sp_digit* r, const sp_digit* a)
+{
+ int64_t t0 = ((int64_t)a[ 0]) * a[ 0];
+ int64_t t1 = (((int64_t)a[ 0]) * a[ 1]) * 2;
+ int64_t t2 = (((int64_t)a[ 0]) * a[ 2]) * 2
+ + ((int64_t)a[ 1]) * a[ 1];
+ int64_t t3 = (((int64_t)a[ 0]) * a[ 3]
+ + ((int64_t)a[ 1]) * a[ 2]) * 2;
+ int64_t t4 = (((int64_t)a[ 0]) * a[ 4]
+ + ((int64_t)a[ 1]) * a[ 3]) * 2
+ + ((int64_t)a[ 2]) * a[ 2];
+ int64_t t5 = (((int64_t)a[ 0]) * a[ 5]
+ + ((int64_t)a[ 1]) * a[ 4]
+ + ((int64_t)a[ 2]) * a[ 3]) * 2;
+ int64_t t6 = (((int64_t)a[ 0]) * a[ 6]
+ + ((int64_t)a[ 1]) * a[ 5]
+ + ((int64_t)a[ 2]) * a[ 4]) * 2
+ + ((int64_t)a[ 3]) * a[ 3];
+ int64_t t7 = (((int64_t)a[ 0]) * a[ 7]
+ + ((int64_t)a[ 1]) * a[ 6]
+ + ((int64_t)a[ 2]) * a[ 5]
+ + ((int64_t)a[ 3]) * a[ 4]) * 2;
+ int64_t t8 = (((int64_t)a[ 0]) * a[ 8]
+ + ((int64_t)a[ 1]) * a[ 7]
+ + ((int64_t)a[ 2]) * a[ 6]
+ + ((int64_t)a[ 3]) * a[ 5]) * 2
+ + ((int64_t)a[ 4]) * a[ 4];
+ int64_t t9 = (((int64_t)a[ 0]) * a[ 9]
+ + ((int64_t)a[ 1]) * a[ 8]
+ + ((int64_t)a[ 2]) * a[ 7]
+ + ((int64_t)a[ 3]) * a[ 6]
+ + ((int64_t)a[ 4]) * a[ 5]) * 2;
+ int64_t t10 = (((int64_t)a[ 0]) * a[10]
+ + ((int64_t)a[ 1]) * a[ 9]
+ + ((int64_t)a[ 2]) * a[ 8]
+ + ((int64_t)a[ 3]) * a[ 7]
+ + ((int64_t)a[ 4]) * a[ 6]) * 2
+ + ((int64_t)a[ 5]) * a[ 5];
+ int64_t t11 = (((int64_t)a[ 0]) * a[11]
+ + ((int64_t)a[ 1]) * a[10]
+ + ((int64_t)a[ 2]) * a[ 9]
+ + ((int64_t)a[ 3]) * a[ 8]
+ + ((int64_t)a[ 4]) * a[ 7]
+ + ((int64_t)a[ 5]) * a[ 6]) * 2;
+ int64_t t12 = (((int64_t)a[ 0]) * a[12]
+ + ((int64_t)a[ 1]) * a[11]
+ + ((int64_t)a[ 2]) * a[10]
+ + ((int64_t)a[ 3]) * a[ 9]
+ + ((int64_t)a[ 4]) * a[ 8]
+ + ((int64_t)a[ 5]) * a[ 7]) * 2
+ + ((int64_t)a[ 6]) * a[ 6];
+ int64_t t13 = (((int64_t)a[ 0]) * a[13]
+ + ((int64_t)a[ 1]) * a[12]
+ + ((int64_t)a[ 2]) * a[11]
+ + ((int64_t)a[ 3]) * a[10]
+ + ((int64_t)a[ 4]) * a[ 9]
+ + ((int64_t)a[ 5]) * a[ 8]
+ + ((int64_t)a[ 6]) * a[ 7]) * 2;
+ int64_t t14 = (((int64_t)a[ 0]) * a[14]
+ + ((int64_t)a[ 1]) * a[13]
+ + ((int64_t)a[ 2]) * a[12]
+ + ((int64_t)a[ 3]) * a[11]
+ + ((int64_t)a[ 4]) * a[10]
+ + ((int64_t)a[ 5]) * a[ 9]
+ + ((int64_t)a[ 6]) * a[ 8]) * 2
+ + ((int64_t)a[ 7]) * a[ 7];
+ int64_t t15 = (((int64_t)a[ 1]) * a[14]
+ + ((int64_t)a[ 2]) * a[13]
+ + ((int64_t)a[ 3]) * a[12]
+ + ((int64_t)a[ 4]) * a[11]
+ + ((int64_t)a[ 5]) * a[10]
+ + ((int64_t)a[ 6]) * a[ 9]
+ + ((int64_t)a[ 7]) * a[ 8]) * 2;
+ int64_t t16 = (((int64_t)a[ 2]) * a[14]
+ + ((int64_t)a[ 3]) * a[13]
+ + ((int64_t)a[ 4]) * a[12]
+ + ((int64_t)a[ 5]) * a[11]
+ + ((int64_t)a[ 6]) * a[10]
+ + ((int64_t)a[ 7]) * a[ 9]) * 2
+ + ((int64_t)a[ 8]) * a[ 8];
+ int64_t t17 = (((int64_t)a[ 3]) * a[14]
+ + ((int64_t)a[ 4]) * a[13]
+ + ((int64_t)a[ 5]) * a[12]
+ + ((int64_t)a[ 6]) * a[11]
+ + ((int64_t)a[ 7]) * a[10]
+ + ((int64_t)a[ 8]) * a[ 9]) * 2;
+ int64_t t18 = (((int64_t)a[ 4]) * a[14]
+ + ((int64_t)a[ 5]) * a[13]
+ + ((int64_t)a[ 6]) * a[12]
+ + ((int64_t)a[ 7]) * a[11]
+ + ((int64_t)a[ 8]) * a[10]) * 2
+ + ((int64_t)a[ 9]) * a[ 9];
+ int64_t t19 = (((int64_t)a[ 5]) * a[14]
+ + ((int64_t)a[ 6]) * a[13]
+ + ((int64_t)a[ 7]) * a[12]
+ + ((int64_t)a[ 8]) * a[11]
+ + ((int64_t)a[ 9]) * a[10]) * 2;
+ int64_t t20 = (((int64_t)a[ 6]) * a[14]
+ + ((int64_t)a[ 7]) * a[13]
+ + ((int64_t)a[ 8]) * a[12]
+ + ((int64_t)a[ 9]) * a[11]) * 2
+ + ((int64_t)a[10]) * a[10];
+ int64_t t21 = (((int64_t)a[ 7]) * a[14]
+ + ((int64_t)a[ 8]) * a[13]
+ + ((int64_t)a[ 9]) * a[12]
+ + ((int64_t)a[10]) * a[11]) * 2;
+ int64_t t22 = (((int64_t)a[ 8]) * a[14]
+ + ((int64_t)a[ 9]) * a[13]
+ + ((int64_t)a[10]) * a[12]) * 2
+ + ((int64_t)a[11]) * a[11];
+ int64_t t23 = (((int64_t)a[ 9]) * a[14]
+ + ((int64_t)a[10]) * a[13]
+ + ((int64_t)a[11]) * a[12]) * 2;
+ int64_t t24 = (((int64_t)a[10]) * a[14]
+ + ((int64_t)a[11]) * a[13]) * 2
+ + ((int64_t)a[12]) * a[12];
+ int64_t t25 = (((int64_t)a[11]) * a[14]
+ + ((int64_t)a[12]) * a[13]) * 2;
+ int64_t t26 = (((int64_t)a[12]) * a[14]) * 2
+ + ((int64_t)a[13]) * a[13];
+ int64_t t27 = (((int64_t)a[13]) * a[14]) * 2;
+ int64_t t28 = ((int64_t)a[14]) * a[14];
+
+ t1 += t0 >> 26; r[ 0] = t0 & 0x3ffffff;
+ t2 += t1 >> 26; r[ 1] = t1 & 0x3ffffff;
+ t3 += t2 >> 26; r[ 2] = t2 & 0x3ffffff;
+ t4 += t3 >> 26; r[ 3] = t3 & 0x3ffffff;
+ t5 += t4 >> 26; r[ 4] = t4 & 0x3ffffff;
+ t6 += t5 >> 26; r[ 5] = t5 & 0x3ffffff;
+ t7 += t6 >> 26; r[ 6] = t6 & 0x3ffffff;
+ t8 += t7 >> 26; r[ 7] = t7 & 0x3ffffff;
+ t9 += t8 >> 26; r[ 8] = t8 & 0x3ffffff;
+ t10 += t9 >> 26; r[ 9] = t9 & 0x3ffffff;
+ t11 += t10 >> 26; r[10] = t10 & 0x3ffffff;
+ t12 += t11 >> 26; r[11] = t11 & 0x3ffffff;
+ t13 += t12 >> 26; r[12] = t12 & 0x3ffffff;
+ t14 += t13 >> 26; r[13] = t13 & 0x3ffffff;
+ t15 += t14 >> 26; r[14] = t14 & 0x3ffffff;
+ t16 += t15 >> 26; r[15] = t15 & 0x3ffffff;
+ t17 += t16 >> 26; r[16] = t16 & 0x3ffffff;
+ t18 += t17 >> 26; r[17] = t17 & 0x3ffffff;
+ t19 += t18 >> 26; r[18] = t18 & 0x3ffffff;
+ t20 += t19 >> 26; r[19] = t19 & 0x3ffffff;
+ t21 += t20 >> 26; r[20] = t20 & 0x3ffffff;
+ t22 += t21 >> 26; r[21] = t21 & 0x3ffffff;
+ t23 += t22 >> 26; r[22] = t22 & 0x3ffffff;
+ t24 += t23 >> 26; r[23] = t23 & 0x3ffffff;
+ t25 += t24 >> 26; r[24] = t24 & 0x3ffffff;
+ t26 += t25 >> 26; r[25] = t25 & 0x3ffffff;
+ t27 += t26 >> 26; r[26] = t26 & 0x3ffffff;
+ t28 += t27 >> 26; r[27] = t27 & 0x3ffffff;
+ r[29] = (sp_digit)(t28 >> 26);
+ r[28] = t28 & 0x3ffffff;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_15(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_384_sqr_15(r, a);
+ sp_384_mont_reduce_15(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * n Number of times to square.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_n_15(sp_digit* r, const sp_digit* a, int n,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_384_mont_sqr_15(r, a, m, mp);
+ for (; n > 1; n--) {
+ sp_384_mont_sqr_15(r, r, m, mp);
+ }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P384 curve. */
+static const uint32_t p384_mod_minus_2[12] = {
+ 0xfffffffdU,0x00000000U,0x00000000U,0xffffffffU,0xfffffffeU,0xffffffffU,
+ 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P384 curve. (r = 1 / a mod m)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_384_mont_inv_15(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 15);
+ for (i=382; i>=0; i--) {
+ sp_384_mont_sqr_15(t, t, p384_mod, p384_mp_mod);
+ if (p384_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
+ sp_384_mont_mul_15(t, t, a, p384_mod, p384_mp_mod);
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 15);
+#else
+ sp_digit* t1 = td;
+ sp_digit* t2 = td + 2 * 15;
+ sp_digit* t3 = td + 4 * 15;
+ sp_digit* t4 = td + 6 * 15;
+ sp_digit* t5 = td + 8 * 15;
+
+ /* 0x2 */
+ sp_384_mont_sqr_15(t1, a, p384_mod, p384_mp_mod);
+ /* 0x3 */
+ sp_384_mont_mul_15(t5, t1, a, p384_mod, p384_mp_mod);
+ /* 0xc */
+ sp_384_mont_sqr_n_15(t1, t5, 2, p384_mod, p384_mp_mod);
+ /* 0xf */
+ sp_384_mont_mul_15(t2, t5, t1, p384_mod, p384_mp_mod);
+ /* 0x1e */
+ sp_384_mont_sqr_15(t1, t2, p384_mod, p384_mp_mod);
+ /* 0x1f */
+ sp_384_mont_mul_15(t4, t1, a, p384_mod, p384_mp_mod);
+ /* 0x3e0 */
+ sp_384_mont_sqr_n_15(t1, t4, 5, p384_mod, p384_mp_mod);
+ /* 0x3ff */
+ sp_384_mont_mul_15(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0x7fe0 */
+ sp_384_mont_sqr_n_15(t1, t2, 5, p384_mod, p384_mp_mod);
+ /* 0x7fff */
+ sp_384_mont_mul_15(t4, t4, t1, p384_mod, p384_mp_mod);
+ /* 0x3fff8000 */
+ sp_384_mont_sqr_n_15(t1, t4, 15, p384_mod, p384_mp_mod);
+ /* 0x3fffffff */
+ sp_384_mont_mul_15(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffc */
+ sp_384_mont_sqr_n_15(t3, t2, 2, p384_mod, p384_mp_mod);
+ /* 0xfffffffd */
+ sp_384_mont_mul_15(r, t3, a, p384_mod, p384_mp_mod);
+ /* 0xffffffff */
+ sp_384_mont_mul_15(t3, t5, t3, p384_mod, p384_mp_mod);
+ /* 0xfffffffc0000000 */
+ sp_384_mont_sqr_n_15(t1, t2, 30, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffff */
+ sp_384_mont_mul_15(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffff000000000000000 */
+ sp_384_mont_sqr_n_15(t1, t2, 60, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_15(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+ sp_384_mont_sqr_n_15(t1, t2, 120, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_15(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+ sp_384_mont_sqr_n_15(t1, t2, 15, p384_mod, p384_mp_mod);
+ /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_15(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */
+ sp_384_mont_sqr_n_15(t1, t2, 33, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */
+ sp_384_mont_mul_15(t2, t3, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */
+ sp_384_mont_sqr_n_15(t1, t2, 96, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */
+ sp_384_mont_mul_15(r, r, t1, p384_mod, p384_mp_mod);
+
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r Resulting affine coordinate point.
+ * p Montgomery form projective coordinate point.
+ * t Temporary ordinate data.
+ */
+static void sp_384_map_15(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*15;
+ int32_t n;
+
+ sp_384_mont_inv_15(t1, p->z, t + 2*15);
+
+ sp_384_mont_sqr_15(t2, t1, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_15(t1, t2, t1, p384_mod, p384_mp_mod);
+
+ /* x /= z^2 */
+ sp_384_mont_mul_15(r->x, p->x, t2, p384_mod, p384_mp_mod);
+ XMEMSET(r->x + 15, 0, sizeof(r->x) / 2U);
+ sp_384_mont_reduce_15(r->x, p384_mod, p384_mp_mod);
+ /* Reduce x to less than modulus */
+ n = sp_384_cmp_15(r->x, p384_mod);
+ sp_384_cond_sub_15(r->x, r->x, p384_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_15(r->x);
+
+ /* y /= z^3 */
+ sp_384_mont_mul_15(r->y, p->y, t1, p384_mod, p384_mp_mod);
+ XMEMSET(r->y + 15, 0, sizeof(r->y) / 2U);
+ sp_384_mont_reduce_15(r->y, p384_mod, p384_mp_mod);
+ /* Reduce y to less than modulus */
+ n = sp_384_cmp_15(r->y, p384_mod);
+ sp_384_cond_sub_15(r->y, r->y, p384_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_15(r->y);
+
+ XMEMSET(r->z, 0, sizeof(r->z));
+ r->z[0] = 1;
+
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_384_add_15(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 15; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#else
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_384_add_15(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ r[ 0] = a[ 0] + b[ 0];
+ r[ 1] = a[ 1] + b[ 1];
+ r[ 2] = a[ 2] + b[ 2];
+ r[ 3] = a[ 3] + b[ 3];
+ r[ 4] = a[ 4] + b[ 4];
+ r[ 5] = a[ 5] + b[ 5];
+ r[ 6] = a[ 6] + b[ 6];
+ r[ 7] = a[ 7] + b[ 7];
+ r[ 8] = a[ 8] + b[ 8];
+ r[ 9] = a[ 9] + b[ 9];
+ r[10] = a[10] + b[10];
+ r[11] = a[11] + b[11];
+ r[12] = a[12] + b[12];
+ r[13] = a[13] + b[13];
+ r[14] = a[14] + b[14];
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r Result of addition.
+ * a First number to add in Montogmery form.
+ * b Second number to add in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_384_mont_add_15(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ (void)sp_384_add_15(r, a, b);
+ sp_384_norm_15(r);
+ sp_384_cond_sub_15(r, r, m, 0 - (((r[14] >> 20) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_15(r);
+}
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r Result of doubling.
+ * a Number to double in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_384_mont_dbl_15(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ (void)sp_384_add_15(r, a, a);
+ sp_384_norm_15(r);
+ sp_384_cond_sub_15(r, r, m, 0 - (((r[14] >> 20) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_15(r);
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r Result of Tripling.
+ * a Number to triple in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_384_mont_tpl_15(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ (void)sp_384_add_15(r, a, a);
+ sp_384_norm_15(r);
+ sp_384_cond_sub_15(r, r, m, 0 - (((r[14] >> 20) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_15(r);
+ (void)sp_384_add_15(r, r, a);
+ sp_384_norm_15(r);
+ sp_384_cond_sub_15(r, r, m, 0 - (((r[14] >> 20) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_15(r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_384_sub_15(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 15; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_384_sub_15(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ r[ 0] = a[ 0] - b[ 0];
+ r[ 1] = a[ 1] - b[ 1];
+ r[ 2] = a[ 2] - b[ 2];
+ r[ 3] = a[ 3] - b[ 3];
+ r[ 4] = a[ 4] - b[ 4];
+ r[ 5] = a[ 5] - b[ 5];
+ r[ 6] = a[ 6] - b[ 6];
+ r[ 7] = a[ 7] - b[ 7];
+ r[ 8] = a[ 8] - b[ 8];
+ r[ 9] = a[ 9] - b[ 9];
+ r[10] = a[10] - b[10];
+ r[11] = a[11] - b[11];
+ r[12] = a[12] - b[12];
+ r[13] = a[13] - b[13];
+ r[14] = a[14] - b[14];
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static void sp_384_cond_add_15(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 15; i++) {
+ r[i] = a[i] + (b[i] & m);
+ }
+#else
+ r[ 0] = a[ 0] + (b[ 0] & m);
+ r[ 1] = a[ 1] + (b[ 1] & m);
+ r[ 2] = a[ 2] + (b[ 2] & m);
+ r[ 3] = a[ 3] + (b[ 3] & m);
+ r[ 4] = a[ 4] + (b[ 4] & m);
+ r[ 5] = a[ 5] + (b[ 5] & m);
+ r[ 6] = a[ 6] + (b[ 6] & m);
+ r[ 7] = a[ 7] + (b[ 7] & m);
+ r[ 8] = a[ 8] + (b[ 8] & m);
+ r[ 9] = a[ 9] + (b[ 9] & m);
+ r[10] = a[10] + (b[10] & m);
+ r[11] = a[11] + (b[11] & m);
+ r[12] = a[12] + (b[12] & m);
+ r[13] = a[13] + (b[13] & m);
+ r[14] = a[14] + (b[14] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r Result of subtration.
+ * a Number to subtract from in Montogmery form.
+ * b Number to subtract with in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_384_mont_sub_15(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ (void)sp_384_sub_15(r, a, b);
+ sp_384_cond_add_15(r, r, m, r[14] >> 20);
+ sp_384_norm_15(r);
+}
+
+/* Shift number left one bit.
+ * Bottom bit is lost.
+ *
+ * r Result of shift.
+ * a Number to shift.
+ */
+SP_NOINLINE static void sp_384_rshift1_15(sp_digit* r, sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<14; i++) {
+ r[i] = ((a[i] >> 1) | (a[i + 1] << 25)) & 0x3ffffff;
+ }
+#else
+ r[0] = ((a[0] >> 1) | (a[1] << 25)) & 0x3ffffff;
+ r[1] = ((a[1] >> 1) | (a[2] << 25)) & 0x3ffffff;
+ r[2] = ((a[2] >> 1) | (a[3] << 25)) & 0x3ffffff;
+ r[3] = ((a[3] >> 1) | (a[4] << 25)) & 0x3ffffff;
+ r[4] = ((a[4] >> 1) | (a[5] << 25)) & 0x3ffffff;
+ r[5] = ((a[5] >> 1) | (a[6] << 25)) & 0x3ffffff;
+ r[6] = ((a[6] >> 1) | (a[7] << 25)) & 0x3ffffff;
+ r[7] = ((a[7] >> 1) | (a[8] << 25)) & 0x3ffffff;
+ r[8] = ((a[8] >> 1) | (a[9] << 25)) & 0x3ffffff;
+ r[9] = ((a[9] >> 1) | (a[10] << 25)) & 0x3ffffff;
+ r[10] = ((a[10] >> 1) | (a[11] << 25)) & 0x3ffffff;
+ r[11] = ((a[11] >> 1) | (a[12] << 25)) & 0x3ffffff;
+ r[12] = ((a[12] >> 1) | (a[13] << 25)) & 0x3ffffff;
+ r[13] = ((a[13] >> 1) | (a[14] << 25)) & 0x3ffffff;
+#endif
+ r[14] = a[14] >> 1;
+}
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r Result of division by 2.
+ * a Number to divide.
+ * m Modulus (prime).
+ */
+static void sp_384_div2_15(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ sp_384_cond_add_15(r, a, m, 0 - (a[0] & 1));
+ sp_384_norm_15(r);
+ sp_384_rshift1_15(r, r);
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r Result of doubling point.
+ * p Point to double.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_15(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*15;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = r->x;
+ y = r->y;
+ z = r->z;
+ /* Put infinity into result. */
+ if (r != p) {
+ r->infinity = p->infinity;
+ }
+
+ /* T1 = Z * Z */
+ sp_384_mont_sqr_15(t1, p->z, p384_mod, p384_mp_mod);
+ /* Z = Y * Z */
+ sp_384_mont_mul_15(z, p->y, p->z, p384_mod, p384_mp_mod);
+ /* Z = 2Z */
+ sp_384_mont_dbl_15(z, z, p384_mod);
+ /* T2 = X - T1 */
+ sp_384_mont_sub_15(t2, p->x, t1, p384_mod);
+ /* T1 = X + T1 */
+ sp_384_mont_add_15(t1, p->x, t1, p384_mod);
+ /* T2 = T1 * T2 */
+ sp_384_mont_mul_15(t2, t1, t2, p384_mod, p384_mp_mod);
+ /* T1 = 3T2 */
+ sp_384_mont_tpl_15(t1, t2, p384_mod);
+ /* Y = 2Y */
+ sp_384_mont_dbl_15(y, p->y, p384_mod);
+ /* Y = Y * Y */
+ sp_384_mont_sqr_15(y, y, p384_mod, p384_mp_mod);
+ /* T2 = Y * Y */
+ sp_384_mont_sqr_15(t2, y, p384_mod, p384_mp_mod);
+ /* T2 = T2/2 */
+ sp_384_div2_15(t2, t2, p384_mod);
+ /* Y = Y * X */
+ sp_384_mont_mul_15(y, y, p->x, p384_mod, p384_mp_mod);
+ /* X = T1 * T1 */
+ sp_384_mont_sqr_15(x, t1, p384_mod, p384_mp_mod);
+ /* X = X - Y */
+ sp_384_mont_sub_15(x, x, y, p384_mod);
+ /* X = X - Y */
+ sp_384_mont_sub_15(x, x, y, p384_mod);
+ /* Y = Y - X */
+ sp_384_mont_sub_15(y, y, x, p384_mod);
+ /* Y = Y * T1 */
+ sp_384_mont_mul_15(y, y, t1, p384_mod, p384_mp_mod);
+ /* Y = Y - T2 */
+ sp_384_mont_sub_15(y, y, t2, p384_mod);
+}
+
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a First number to compare.
+ * b Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_384_cmp_equal_15(const sp_digit* a, const sp_digit* b)
+{
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+ (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) |
+ (a[8] ^ b[8]) | (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11]) |
+ (a[12] ^ b[12]) | (a[13] ^ b[13]) | (a[14] ^ b[14])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_15(sp_point_384* r, const sp_point_384* p, const sp_point_384* q,
+ sp_digit* t)
+{
+ const sp_point_384* ap[2];
+ sp_point_384* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*15;
+ sp_digit* t3 = t + 4*15;
+ sp_digit* t4 = t + 6*15;
+ sp_digit* t5 = t + 8*15;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Ensure only the first point is the same as the result. */
+ if (q == r) {
+ const sp_point_384* a = p;
+ p = q;
+ q = a;
+ }
+
+ /* Check double */
+ (void)sp_384_sub_15(t1, p384_mod, q->y);
+ sp_384_norm_15(t1);
+ if ((sp_384_cmp_equal_15(p->x, q->x) & sp_384_cmp_equal_15(p->z, q->z) &
+ (sp_384_cmp_equal_15(p->y, q->y) | sp_384_cmp_equal_15(p->y, t1))) != 0) {
+ sp_384_proj_point_dbl_15(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_384));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<15; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<15; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<15; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U1 = X1*Z2^2 */
+ sp_384_mont_sqr_15(t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_15(t3, t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_15(t1, t1, x, p384_mod, p384_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_384_mont_sqr_15(t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_15(t4, t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_15(t2, t2, q->x, p384_mod, p384_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_384_mont_mul_15(t3, t3, y, p384_mod, p384_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_384_mont_mul_15(t4, t4, q->y, p384_mod, p384_mp_mod);
+ /* H = U2 - U1 */
+ sp_384_mont_sub_15(t2, t2, t1, p384_mod);
+ /* R = S2 - S1 */
+ sp_384_mont_sub_15(t4, t4, t3, p384_mod);
+ /* Z3 = H*Z1*Z2 */
+ sp_384_mont_mul_15(z, z, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_15(z, z, t2, p384_mod, p384_mp_mod);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ sp_384_mont_sqr_15(x, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_15(t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_15(y, t1, t5, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_15(t5, t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_15(x, x, t5, p384_mod);
+ sp_384_mont_dbl_15(t1, y, p384_mod);
+ sp_384_mont_sub_15(x, x, t1, p384_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ sp_384_mont_sub_15(y, y, x, p384_mod);
+ sp_384_mont_mul_15(y, y, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_15(t5, t5, t3, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_15(y, y, t5, p384_mod);
+ }
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_15(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifdef WOLFSSL_SP_NO_MALLOC
+ sp_point_384 t[3];
+ sp_digit tmp[2 * 15 * 6];
+#else
+ sp_point_384* t;
+ sp_digit* tmp;
+#endif
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ (void)heap;
+
+#ifndef WOLFSSL_SP_NO_MALLOC
+ t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 3, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMSET(t, 0, sizeof(sp_point_384) * 3);
+
+ /* t[0] = {0, 0, 1} * norm */
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ err = sp_384_mod_mul_norm_15(t[1].x, g->x, p384_mod);
+ }
+ if (err == MP_OKAY)
+ err = sp_384_mod_mul_norm_15(t[1].y, g->y, p384_mod);
+ if (err == MP_OKAY)
+ err = sp_384_mod_mul_norm_15(t[1].z, g->z, p384_mod);
+
+ if (err == MP_OKAY) {
+ i = 14;
+ c = 20;
+ n = k[i--] << (26 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1)
+ break;
+
+ n = k[i--];
+ c = 26;
+ }
+
+ y = (n >> 25) & 1;
+ n <<= 1;
+
+ sp_384_proj_point_add_15(&t[y^1], &t[0], &t[1], tmp);
+
+ XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) +
+ ((size_t)&t[1] & addr_mask[y])),
+ sizeof(sp_point_384));
+ sp_384_proj_point_dbl_15(&t[2], &t[2], tmp);
+ XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
+ ((size_t)&t[1] & addr_mask[y])), &t[2],
+ sizeof(sp_point_384));
+ }
+
+ if (map != 0) {
+ sp_384_map_15(r, &t[0], tmp);
+ }
+ else {
+ XMEMCPY(r, &t[0], sizeof(sp_point_384));
+ }
+ }
+
+#ifndef WOLFSSL_SP_NO_MALLOC
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 15 * 6);
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_point_384) * 3);
+ XFREE(t, NULL, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmp, sizeof(tmp));
+ ForceZero(t, sizeof(t));
+#endif
+
+ return err;
+}
+
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_15(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+ int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 t[3];
+ sp_digit tmp[2 * 15 * 6];
+#else
+ sp_point_384* t;
+ sp_digit* tmp;
+#endif
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ (void)heap;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_point*)XMALLOC(sizeof(*t) * 3, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#endif
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ t[1].infinity = 0;
+ err = sp_384_mod_mul_norm_15(t[1].x, g->x, p384_mod);
+ }
+ if (err == MP_OKAY)
+ err = sp_384_mod_mul_norm_15(t[1].y, g->y, p384_mod);
+ if (err == MP_OKAY)
+ err = sp_384_mod_mul_norm_15(t[1].z, g->z, p384_mod);
+
+ if (err == MP_OKAY) {
+ i = 14;
+ c = 20;
+ n = k[i--] << (26 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1)
+ break;
+
+ n = k[i--];
+ c = 26;
+ }
+
+ y = (n >> 25) & 1;
+ n <<= 1;
+
+ sp_384_proj_point_add_15(&t[y^1], &t[0], &t[1], tmp);
+
+ XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) +
+ ((size_t)&t[1] & addr_mask[y])), sizeof(t[2]));
+ sp_384_proj_point_dbl_15(&t[2], &t[2], tmp);
+ XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
+ ((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
+ }
+
+ if (map != 0) {
+ sp_384_map_15(r, &t[0], tmp);
+ }
+ else {
+ XMEMCPY(r, &t[0], sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 15 * 6);
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+ }
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_point_384) * 3);
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmp, sizeof(tmp));
+ ForceZero(t, sizeof(t));
+#endif
+
+ return err;
+}
+
+#else
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_384 {
+ sp_digit x[15];
+ sp_digit y[15];
+} sp_table_entry_384;
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_fast_15(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+ int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 td[16];
+ sp_point_384 rtd;
+ sp_digit tmpd[2 * 15 * 6];
+#endif
+ sp_point_384* t;
+ sp_point_384* rt;
+ sp_digit* tmp;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_15(heap, rtd, rt);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 16, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ t = td;
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ (void)sp_384_mod_mul_norm_15(t[1].x, g->x, p384_mod);
+ (void)sp_384_mod_mul_norm_15(t[1].y, g->y, p384_mod);
+ (void)sp_384_mod_mul_norm_15(t[1].z, g->z, p384_mod);
+ t[1].infinity = 0;
+ sp_384_proj_point_dbl_15(&t[ 2], &t[ 1], tmp);
+ t[ 2].infinity = 0;
+ sp_384_proj_point_add_15(&t[ 3], &t[ 2], &t[ 1], tmp);
+ t[ 3].infinity = 0;
+ sp_384_proj_point_dbl_15(&t[ 4], &t[ 2], tmp);
+ t[ 4].infinity = 0;
+ sp_384_proj_point_add_15(&t[ 5], &t[ 3], &t[ 2], tmp);
+ t[ 5].infinity = 0;
+ sp_384_proj_point_dbl_15(&t[ 6], &t[ 3], tmp);
+ t[ 6].infinity = 0;
+ sp_384_proj_point_add_15(&t[ 7], &t[ 4], &t[ 3], tmp);
+ t[ 7].infinity = 0;
+ sp_384_proj_point_dbl_15(&t[ 8], &t[ 4], tmp);
+ t[ 8].infinity = 0;
+ sp_384_proj_point_add_15(&t[ 9], &t[ 5], &t[ 4], tmp);
+ t[ 9].infinity = 0;
+ sp_384_proj_point_dbl_15(&t[10], &t[ 5], tmp);
+ t[10].infinity = 0;
+ sp_384_proj_point_add_15(&t[11], &t[ 6], &t[ 5], tmp);
+ t[11].infinity = 0;
+ sp_384_proj_point_dbl_15(&t[12], &t[ 6], tmp);
+ t[12].infinity = 0;
+ sp_384_proj_point_add_15(&t[13], &t[ 7], &t[ 6], tmp);
+ t[13].infinity = 0;
+ sp_384_proj_point_dbl_15(&t[14], &t[ 7], tmp);
+ t[14].infinity = 0;
+ sp_384_proj_point_add_15(&t[15], &t[ 8], &t[ 7], tmp);
+ t[15].infinity = 0;
+
+ i = 13;
+ n = k[i+1] << 6;
+ c = 16;
+ y = n >> 22;
+ XMEMCPY(rt, &t[y], sizeof(sp_point_384));
+ n <<= 10;
+ for (; i>=0 || c>=4; ) {
+ if (c < 4) {
+ n |= k[i--] << (6 - c);
+ c += 26;
+ }
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+
+ sp_384_proj_point_dbl_15(rt, rt, tmp);
+ sp_384_proj_point_dbl_15(rt, rt, tmp);
+ sp_384_proj_point_dbl_15(rt, rt, tmp);
+ sp_384_proj_point_dbl_15(rt, rt, tmp);
+
+ sp_384_proj_point_add_15(rt, rt, &t[y], tmp);
+ }
+
+ if (map != 0) {
+ sp_384_map_15(r, rt, tmp);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 15 * 6);
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+ }
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_point_384) * 16);
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmpd, sizeof(tmpd));
+ ForceZero(td, sizeof(td));
+#endif
+ sp_384_point_free_15(rt, 1, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_n_15(sp_point_384* p, int n, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*15;
+ sp_digit* b = t + 4*15;
+ sp_digit* t1 = t + 6*15;
+ sp_digit* t2 = t + 8*15;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = p->x;
+ y = p->y;
+ z = p->z;
+
+ /* Y = 2*Y */
+ sp_384_mont_dbl_15(y, y, p384_mod);
+ /* W = Z^4 */
+ sp_384_mont_sqr_15(w, z, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_15(w, w, p384_mod, p384_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+ while (--n > 0)
+#else
+ while (--n >= 0)
+#endif
+ {
+ /* A = 3*(X^2 - W) */
+ sp_384_mont_sqr_15(t1, x, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_15(t1, t1, w, p384_mod);
+ sp_384_mont_tpl_15(a, t1, p384_mod);
+ /* B = X*Y^2 */
+ sp_384_mont_sqr_15(t1, y, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_15(b, t1, x, p384_mod, p384_mp_mod);
+ /* X = A^2 - 2B */
+ sp_384_mont_sqr_15(x, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_15(t2, b, p384_mod);
+ sp_384_mont_sub_15(x, x, t2, p384_mod);
+ /* Z = Z*Y */
+ sp_384_mont_mul_15(z, z, y, p384_mod, p384_mp_mod);
+ /* t2 = Y^4 */
+ sp_384_mont_sqr_15(t1, t1, p384_mod, p384_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+ if (n != 0)
+#endif
+ {
+ /* W = W*Y^4 */
+ sp_384_mont_mul_15(w, w, t1, p384_mod, p384_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_384_mont_sub_15(y, b, x, p384_mod);
+ sp_384_mont_mul_15(y, y, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_15(y, y, p384_mod);
+ sp_384_mont_sub_15(y, y, t1, p384_mod);
+ }
+#ifndef WOLFSSL_SP_SMALL
+ /* A = 3*(X^2 - W) */
+ sp_384_mont_sqr_15(t1, x, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_15(t1, t1, w, p384_mod);
+ sp_384_mont_tpl_15(a, t1, p384_mod);
+ /* B = X*Y^2 */
+ sp_384_mont_sqr_15(t1, y, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_15(b, t1, x, p384_mod, p384_mp_mod);
+ /* X = A^2 - 2B */
+ sp_384_mont_sqr_15(x, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_15(t2, b, p384_mod);
+ sp_384_mont_sub_15(x, x, t2, p384_mod);
+ /* Z = Z*Y */
+ sp_384_mont_mul_15(z, z, y, p384_mod, p384_mp_mod);
+ /* t2 = Y^4 */
+ sp_384_mont_sqr_15(t1, t1, p384_mod, p384_mp_mod);
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_384_mont_sub_15(y, b, x, p384_mod);
+ sp_384_mont_mul_15(y, y, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_15(y, y, p384_mod);
+ sp_384_mont_sub_15(y, y, t1, p384_mod);
+#endif
+ /* Y = Y/2 */
+ sp_384_div2_15(y, y, p384_mod);
+}
+
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_qz1_15(sp_point_384* r, const sp_point_384* p,
+ const sp_point_384* q, sp_digit* t)
+{
+ const sp_point_384* ap[2];
+ sp_point_384* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*15;
+ sp_digit* t3 = t + 4*15;
+ sp_digit* t4 = t + 6*15;
+ sp_digit* t5 = t + 8*15;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Check double */
+ (void)sp_384_sub_15(t1, p384_mod, q->y);
+ sp_384_norm_15(t1);
+ if ((sp_384_cmp_equal_15(p->x, q->x) & sp_384_cmp_equal_15(p->z, q->z) &
+ (sp_384_cmp_equal_15(p->y, q->y) | sp_384_cmp_equal_15(p->y, t1))) != 0) {
+ sp_384_proj_point_dbl_15(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_384));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<15; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<15; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<15; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U2 = X2*Z1^2 */
+ sp_384_mont_sqr_15(t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_15(t4, t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_15(t2, t2, q->x, p384_mod, p384_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_384_mont_mul_15(t4, t4, q->y, p384_mod, p384_mp_mod);
+ /* H = U2 - X1 */
+ sp_384_mont_sub_15(t2, t2, x, p384_mod);
+ /* R = S2 - Y1 */
+ sp_384_mont_sub_15(t4, t4, y, p384_mod);
+ /* Z3 = H*Z1 */
+ sp_384_mont_mul_15(z, z, t2, p384_mod, p384_mp_mod);
+ /* X3 = R^2 - H^3 - 2*X1*H^2 */
+ sp_384_mont_sqr_15(t1, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_15(t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_15(t3, x, t5, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_15(t5, t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_15(x, t1, t5, p384_mod);
+ sp_384_mont_dbl_15(t1, t3, p384_mod);
+ sp_384_mont_sub_15(x, x, t1, p384_mod);
+ /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+ sp_384_mont_sub_15(t3, t3, x, p384_mod);
+ sp_384_mont_mul_15(t3, t3, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_15(t5, t5, y, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_15(y, t3, t5, p384_mod);
+ }
+}
+
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a Point to convert.
+ * t Temporary data.
+ */
+static void sp_384_proj_to_affine_15(sp_point_384* a, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2 * 15;
+ sp_digit* tmp = t + 4 * 15;
+
+ sp_384_mont_inv_15(t1, a->z, tmp);
+
+ sp_384_mont_sqr_15(t2, t1, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_15(t1, t2, t1, p384_mod, p384_mp_mod);
+
+ sp_384_mont_mul_15(a->x, a->x, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_15(a->y, a->y, t1, p384_mod, p384_mp_mod);
+ XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod));
+}
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_384_gen_stripe_table_15(const sp_point_384* a,
+ sp_table_entry_384* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 td, s1d, s2d;
+#endif
+ sp_point_384* t;
+ sp_point_384* s1 = NULL;
+ sp_point_384* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_15(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_15(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_15(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_15(t->x, a->x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_15(t->y, a->y, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_15(t->z, a->z, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_384_proj_to_affine_15(t, tmp);
+
+ XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<8; i++) {
+ sp_384_proj_point_dbl_n_15(t, 48, tmp);
+ sp_384_proj_to_affine_15(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<8; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_384_proj_point_add_qz1_15(t, s1, s2, tmp);
+ sp_384_proj_to_affine_15(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_384_point_free_15(s2, 0, heap);
+ sp_384_point_free_15(s1, 0, heap);
+ sp_384_point_free_15( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_stripe_15(sp_point_384* r, const sp_point_384* g,
+ const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 rtd;
+ sp_point_384 pd;
+ sp_digit td[2 * 15 * 6];
+#endif
+ sp_point_384* rt;
+ sp_point_384* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_384_point_new_15(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_15(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
+ XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
+
+ y = 0;
+ for (j=0,x=47; j<8; j++,x+=48) {
+ y |= ((k[x / 26] >> (x % 26)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=46; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<8; j++,x+=48) {
+ y |= ((k[x / 26] >> (x % 26)) & 1) << j;
+ }
+
+ sp_384_proj_point_dbl_15(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_384_proj_point_add_qz1_15(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_384_map_15(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_15(p, 0, heap);
+ sp_384_point_free_15(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_384_t {
+ sp_digit x[15];
+ sp_digit y[15];
+ sp_table_entry_384 table[256];
+ uint32_t cnt;
+ int set;
+} sp_cache_384_t;
+
+static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_384_last = -1;
+static THREAD_LS_T int sp_cache_384_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex_384 = 0;
+ static wolfSSL_Mutex sp_cache_384_lock;
+#endif
+
+static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_384_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache_384[i].set = 0;
+ }
+ sp_cache_384_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache_384[i].set)
+ continue;
+
+ if (sp_384_cmp_equal_15(g->x, sp_cache_384[i].x) &
+ sp_384_cmp_equal_15(g->y, sp_cache_384[i].y)) {
+ sp_cache_384[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_384_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache_384[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_384_last) {
+ least = sp_cache_384[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache_384[j].cnt < least) {
+ i = j;
+ least = sp_cache_384[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
+ XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
+ sp_cache_384[i].set = 1;
+ sp_cache_384[i].cnt = 1;
+ }
+
+ *cache = &sp_cache_384[i];
+ sp_cache_384_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_15(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_384_ecc_mulmod_fast_15(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 15 * 7];
+ sp_cache_384_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_384 == 0) {
+ wc_InitMutex(&sp_cache_384_lock);
+ initCacheMutex_384 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_384_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_384(g, &cache);
+ if (cache->cnt == 2)
+ sp_384_gen_stripe_table_15(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_384_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_384_ecc_mulmod_fast_15(r, g, k, map, heap);
+ }
+ else {
+ err = sp_384_ecc_mulmod_stripe_15(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#endif
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * p Point to multiply.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+ void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[15];
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_384_point_new_15(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 15, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 15, km);
+ sp_384_point_from_ecc_point_15(point, gm);
+
+ err = sp_384_ecc_mulmod_15(point, point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_15(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_15(point, 0, heap);
+
+ return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_15(sp_point_384* r, const sp_digit* k,
+ int map, void* heap)
+{
+ /* No pre-computed values. */
+ return sp_384_ecc_mulmod_15(r, &p384_base, k, map, heap);
+}
+
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_15(sp_point_384* r, const sp_digit* k,
+ int map, void* heap)
+{
+ /* No pre-computed values. */
+ return sp_384_ecc_mulmod_15(r, &p384_base, k, map, heap);
+}
+
+#else
+static const sp_table_entry_384 p384_table[256] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x1c0b528,0x01d5992,0x0e383dd,0x38a835b,0x220e378,0x106d35b,
+ 0x1c3afc5,0x03bfe1e,0x28459a3,0x2d91521,0x214ede2,0x0bfdc8d,
+ 0x2151381,0x3708a67,0x004d3aa },
+ { 0x303a4fe,0x10f6b52,0x29ac230,0x2fdeed2,0x0a1bfa8,0x3a0ec14,
+ 0x2de7562,0x3ff662e,0x21968f4,0x031b0d4,0x3969a84,0x2000898,
+ 0x1c5e9dd,0x2f09685,0x002b78a } },
+ /* 2 */
+ { { 0x30c535b,0x191d4ca,0x2296298,0x14dc141,0x090dd69,0x05aae6b,
+ 0x0cd6b42,0x35da80e,0x3b7be12,0x2cf7e6d,0x1f347bd,0x3d365e1,
+ 0x1448913,0x32704fa,0x00222c5 },
+ { 0x280dc64,0x39e5bc9,0x24175f8,0x2dd60d4,0x0120e7c,0x041d02e,
+ 0x0b5d8ad,0x37b9895,0x2fb5337,0x1f0e2e3,0x14f0224,0x2230b86,
+ 0x1bc4cf6,0x17cdb09,0x007b5c7 } },
+ /* 3 */
+ { { 0x2dffea5,0x28f30e7,0x29fce26,0x070df5f,0x235bbfd,0x2f78fbd,
+ 0x27700d9,0x23d6bc3,0x3471a53,0x0c0e03a,0x05bf9eb,0x276a2ec,
+ 0x20c3e2e,0x31cc691,0x00dbb93 },
+ { 0x126b605,0x2e8983d,0x153737d,0x23bf5e1,0x295d497,0x35ca812,
+ 0x2d793ae,0x16c6893,0x3777600,0x089a520,0x1e681f8,0x3d55ee6,
+ 0x154ef99,0x155f592,0x00ae5f9 } },
+ /* 4 */
+ { { 0x26feef9,0x20315fc,0x1240244,0x250e838,0x3c31a26,0x1cf8af1,
+ 0x1002c32,0x3b531cd,0x1c53ef1,0x22310ba,0x3f4948e,0x22eafd9,
+ 0x3863202,0x3d0e2a5,0x006a502 },
+ { 0x34536fe,0x04e91ad,0x30ebf5f,0x2af62a7,0x01d218b,0x1c8c9da,
+ 0x336bcc3,0x23060c3,0x331576e,0x1b14c5e,0x1bbcb76,0x0755e9a,
+ 0x3d4dcef,0x24c2cf8,0x00917c4 } },
+ /* 5 */
+ { { 0x349ddd0,0x09b8bb8,0x0250114,0x3e66cbf,0x29f117e,0x3005d29,
+ 0x36b480e,0x2119bfc,0x2761845,0x253d2f7,0x0580604,0x0bb6db4,
+ 0x3ca922f,0x1744677,0x008adc7 },
+ { 0x3d5a7ce,0x27425ed,0x11e9a61,0x3968d10,0x3874275,0x3692d3b,
+ 0x03e0470,0x0763d50,0x3d97790,0x3cbaeab,0x2747170,0x18faf3a,
+ 0x180365e,0x2511fe7,0x0012a36 } },
+ /* 6 */
+ { { 0x3c52870,0x2701e93,0x296128f,0x120694e,0x1ce0b37,0x3860a36,
+ 0x10fa180,0x0896b55,0x2f76adb,0x22892ae,0x2e58a34,0x07b4295,
+ 0x2cb62d1,0x079a522,0x00f3d81 },
+ { 0x061ed22,0x2375dd3,0x3c9d861,0x3e602d1,0x10bb747,0x39ae156,
+ 0x3f796fd,0x087a48a,0x06d680a,0x37f7f47,0x2af2c9d,0x36c55dc,
+ 0x10f3dc0,0x279b07a,0x00a0937 } },
+ /* 7 */
+ { { 0x085c629,0x319bbf8,0x089a386,0x184256f,0x15fc2a4,0x00fd2d0,
+ 0x13d6312,0x363d44d,0x32b7e4b,0x25f2865,0x27df8ce,0x1dce02a,
+ 0x24ea3b0,0x0e27b9f,0x00d8a90 },
+ { 0x3b14461,0x1d371f9,0x0f781bc,0x0503271,0x0dc2cb0,0x13bc284,
+ 0x34b3a68,0x1ff894a,0x25d2032,0x16f79ba,0x260f961,0x07b10d5,
+ 0x18173b7,0x2812e2b,0x00eede5 } },
+ /* 8 */
+ { { 0x13b9a2d,0x132ece2,0x0c5d558,0x02c0214,0x1820c66,0x37cb50f,
+ 0x26d8267,0x3a00504,0x3f00109,0x33756ee,0x38172f1,0x2e4bb8c,
+ 0x030d985,0x3e4fcc5,0x00609d4 },
+ { 0x2daf9d6,0x16681fa,0x1fb01e0,0x1b03c49,0x370e653,0x183c839,
+ 0x2207515,0x0ea6b58,0x1ae7aaf,0x3a96522,0x24bae14,0x1c38bd9,
+ 0x082497b,0x1c05db4,0x000dd03 } },
+ /* 9 */
+ { { 0x110521f,0x04efa21,0x0c174cc,0x2a7dc93,0x387315b,0x14f7098,
+ 0x1d83bb3,0x2495ed2,0x2fe0c27,0x1e2d9df,0x093c953,0x0287073,
+ 0x02c9951,0x336291c,0x0033e30 },
+ { 0x208353f,0x3f22748,0x2b2bf0f,0x2373b50,0x10170fa,0x1b8a97d,
+ 0x0851ed2,0x0b25824,0x055ecb5,0x12049d9,0x3fe1adf,0x11b1385,
+ 0x28eab06,0x11fac21,0x00513f0 } },
+ /* 10 */
+ { { 0x35bdf53,0x1847d37,0x1a6dc07,0x29d62c4,0x045d331,0x313b8e5,
+ 0x165daf1,0x1e34562,0x3e75a58,0x16ea2fa,0x02dd302,0x3302862,
+ 0x3eb8bae,0x2266a48,0x00cf2a3 },
+ { 0x24fd048,0x324a074,0x025df98,0x1662eec,0x3841bfb,0x26ae754,
+ 0x1df8cec,0x0113ae3,0x0b67fef,0x094e293,0x2323666,0x0ab087c,
+ 0x2f06509,0x0e142d9,0x00a919d } },
+ /* 11 */
+ { { 0x1d480d8,0x00ed021,0x3a7d3db,0x1e46ca1,0x28cd9f4,0x2a3ceeb,
+ 0x24dc754,0x0624a3c,0x0003db4,0x1520bae,0x1c56e0f,0x2fe7ace,
+ 0x1dc6f38,0x0c826a4,0x008b977 },
+ { 0x209cfc2,0x2c16c9c,0x1b70a31,0x21416cb,0x34c49bf,0x186549e,
+ 0x062498d,0x146e959,0x0391fac,0x08ff944,0x2b4b834,0x013d57a,
+ 0x2eabffb,0x0370131,0x00c07c1 } },
+ /* 12 */
+ { { 0x332f048,0x0bf9336,0x16dfad2,0x2451d7b,0x35f23bf,0x299adb2,
+ 0x0ce0c0a,0x0170294,0x289f034,0x2b7d89e,0x395e2d6,0x1d20df7,
+ 0x2e64e36,0x16dae90,0x00081c9 },
+ { 0x31d6ceb,0x0f80db9,0x0271eba,0x33db1ac,0x1b45bcc,0x1a11c07,
+ 0x347e630,0x148fd9e,0x142e712,0x3183e3e,0x1cd47ad,0x108d1c9,
+ 0x09cbb82,0x35e61d9,0x0083027 } },
+ /* 13 */
+ { { 0x215b0b8,0x0a7a98d,0x2c41b39,0x3f69536,0x0b41441,0x16da8da,
+ 0x15d556b,0x3c17a26,0x129167e,0x3ea0351,0x2d25a27,0x2f2d285,
+ 0x15b68f6,0x2931ef5,0x00210d6 },
+ { 0x1351130,0x012aec9,0x37ebf38,0x26640f8,0x01d2df6,0x2130972,
+ 0x201efc0,0x23a457c,0x087a1c6,0x14c68a3,0x163f62a,0x36b494d,
+ 0x015d481,0x39c35b1,0x005dd6d } },
+ /* 14 */
+ { { 0x06612ce,0x11c3f61,0x199729f,0x3b36863,0x2986f3e,0x3cd2be1,
+ 0x04c1612,0x2be2dae,0x00846dd,0x3d7bc29,0x249e795,0x1016803,
+ 0x37a3714,0x2c5aa8b,0x005f491 },
+ { 0x341b38d,0x01eb936,0x3caac7f,0x27863ef,0x1ef7d11,0x1110ec6,
+ 0x18e0761,0x26498e8,0x01a79a1,0x390d5a1,0x22226fb,0x3d2a473,
+ 0x0872191,0x1230f32,0x00dc772 } },
+ /* 15 */
+ { { 0x0b1ec9d,0x03fc6b9,0x3706d57,0x03b9fbb,0x221d23e,0x2867821,
+ 0x1e40f4c,0x2c9c0f3,0x3c4cd4b,0x31f5948,0x3f13aa6,0x307c1b2,
+ 0x04b6016,0x116b453,0x005aa72 },
+ { 0x0b74de8,0x20519d1,0x134e37f,0x05d882a,0x1839e7a,0x3a2c6a8,
+ 0x0d14e8d,0x1d78bdd,0x251f30d,0x3a1e27e,0x081c261,0x2c9014b,
+ 0x165ee09,0x19e0cf1,0x00654e2 } },
+ /* 16 */
+ { { 0x39fbe67,0x081778b,0x0e44378,0x20dfdca,0x1c4afcb,0x20b803c,
+ 0x0ec06c6,0x1508f6f,0x1c3114d,0x3bca851,0x3a52463,0x07661d1,
+ 0x17b0aa0,0x16c5f5c,0x00fc093 },
+ { 0x0d01f95,0x0ef13f5,0x2d34965,0x2a25582,0x39aa83e,0x3e38fcf,
+ 0x3943dca,0x385bbdd,0x210e86f,0x3dc1dd2,0x3f9ffdc,0x18b9bc6,
+ 0x345c96b,0x0e79621,0x008a72f } },
+ /* 17 */
+ { { 0x341c342,0x3793688,0x042273a,0x153a9c1,0x3dd326e,0x1d073bc,
+ 0x2c7d983,0x05524cd,0x00d59e6,0x347abe8,0x3d9a3ef,0x0fb624a,
+ 0x2c7e4cd,0x09b3171,0x0003faf },
+ { 0x045f8ac,0x38bf3cc,0x1e73087,0x0c85d3c,0x314a655,0x382be69,
+ 0x384f28f,0x24d6cb3,0x2842cdc,0x1777f5e,0x2929c89,0x03c45ed,
+ 0x3cfcc4c,0x0b59322,0x0035657 } },
+ /* 18 */
+ { { 0x18c1bba,0x2eb005f,0x33d57ec,0x30e42c3,0x36058f9,0x1865f43,
+ 0x2116e3f,0x2c4a2bb,0x0684033,0x0f1375c,0x0209b98,0x2136e9b,
+ 0x1bc4af0,0x0b3e0c7,0x0097c7c },
+ { 0x16010e8,0x398777e,0x2a172f4,0x0814a7e,0x0d97e4e,0x274dfc8,
+ 0x2666606,0x1b5c93b,0x1ed3d36,0x3f3304e,0x13488e0,0x02dbb88,
+ 0x2d53369,0x3717ce9,0x007cad1 } },
+ /* 19 */
+ { { 0x257a41f,0x2a6a076,0x39b6660,0x04bb000,0x1e74a04,0x3876b45,
+ 0x343c6b5,0x0753108,0x3f54668,0x24a13cf,0x23749e8,0x0421fc5,
+ 0x32f13b5,0x0f31be7,0x00070f2 },
+ { 0x1186e14,0x0847697,0x0dff542,0x0dff76c,0x084748f,0x2c7d060,
+ 0x23aab4d,0x0b43906,0x27ba640,0x1497b59,0x02f5835,0x0a492a4,
+ 0x0a6892f,0x39f3e91,0x005844e } },
+ /* 20 */
+ { { 0x33b236f,0x02181cf,0x21dafab,0x0760788,0x019e9d4,0x249ed0a,
+ 0x36571e3,0x3c7dbcf,0x1337550,0x010d22a,0x285e62f,0x19ee65a,
+ 0x052bf71,0x1d65fd5,0x0062d43 },
+ { 0x2955926,0x3fae7bc,0x0353d85,0x07db7de,0x1440a56,0x328dad6,
+ 0x1668ec9,0x28058e2,0x1a1a22d,0x1014afc,0x3609325,0x3effdcb,
+ 0x209f3bd,0x3ca3888,0x0094e50 } },
+ /* 21 */
+ { { 0x062e8af,0x0b96ccc,0x136990b,0x1d7a28f,0x1a85723,0x0076dec,
+ 0x21b00b2,0x06a88ff,0x2f0ee65,0x1fa49b7,0x39b10ad,0x10b26fa,
+ 0x0be7465,0x026e8bf,0x00098e3 },
+ { 0x3f1d63f,0x37bacff,0x1374779,0x02882ff,0x323d0e8,0x1da3de5,
+ 0x12bb3b8,0x0a15a11,0x34d1f95,0x2b3dd6e,0x29ea3fa,0x39ad000,
+ 0x33a538f,0x390204d,0x0012bd3 } },
+ /* 22 */
+ { { 0x04cbba5,0x0de0344,0x1d4cc02,0x11fe8d7,0x36207e7,0x32a6da8,
+ 0x0239281,0x1ec40d7,0x3e89798,0x213fc66,0x0022eee,0x11daefe,
+ 0x3e74db8,0x28534ee,0x00aa0a4 },
+ { 0x07d4543,0x250cc46,0x206620f,0x1c1e7db,0x1321538,0x31fa0b8,
+ 0x30f74ea,0x01aae0e,0x3a2828f,0x3e9dd22,0x026ef35,0x3c0a62b,
+ 0x27dbdc5,0x01c23a6,0x000f0c5 } },
+ /* 23 */
+ { { 0x2f029dd,0x3091337,0x21b80c5,0x21e1419,0x13dabc6,0x3847660,
+ 0x12b865f,0x36eb666,0x38f6274,0x0ba6006,0x098da24,0x1398c64,
+ 0x13d08e5,0x246a469,0x009929a },
+ { 0x1285887,0x3ff5c8d,0x010237b,0x097c506,0x0bc7594,0x34b9b88,
+ 0x00cc35f,0x0bb964a,0x00cfbc4,0x29cd718,0x0837619,0x2b4a192,
+ 0x0c57bb7,0x08c69de,0x00a3627 } },
+ /* 24 */
+ { { 0x1361ed8,0x266d724,0x366cae7,0x1d5b18c,0x247d71b,0x2c9969a,
+ 0x0dd5211,0x1edd153,0x25998d7,0x0380856,0x3ab29db,0x09366de,
+ 0x1e53644,0x2b31ff6,0x008b0ff },
+ { 0x3b5d9ef,0x217448d,0x174746d,0x18afea4,0x15b106d,0x3e66e8b,
+ 0x0479f85,0x13793b4,0x1231d10,0x3c39bce,0x25e8983,0x2a13210,
+ 0x05a7083,0x382be04,0x00a9507 } },
+ /* 25 */
+ { { 0x0cf381c,0x1a29b85,0x31ccf6c,0x2f708b8,0x3af9d27,0x2a29732,
+ 0x168d4da,0x393488d,0x2c0e338,0x3f90c7b,0x0f52ad1,0x2a0a3fa,
+ 0x2cd80f1,0x15e7a1a,0x00db6a0 },
+ { 0x107832a,0x159cb91,0x1289288,0x17e21f9,0x073fc27,0x1584342,
+ 0x3802780,0x3d6c197,0x154075f,0x16366d1,0x09f712b,0x23a3ec4,
+ 0x29cf23a,0x3218baf,0x0039f0a } },
+ /* 26 */
+ { { 0x052edf5,0x2afde13,0x2e53d8f,0x3969626,0x3dcd737,0x1e46ac5,
+ 0x118bf0d,0x01b2652,0x156bcff,0x16d7ef6,0x1ca46d4,0x34c0cbb,
+ 0x3e486f6,0x1f85068,0x002cdff },
+ { 0x1f47ec8,0x12cee98,0x0608667,0x18fbbe1,0x08a8821,0x31a1fe4,
+ 0x17c7054,0x3c89e89,0x2edf6cd,0x1b8c32c,0x3f6ea84,0x1319329,
+ 0x3cd3c2c,0x05f331a,0x00186fa } },
+ /* 27 */
+ { { 0x1fcb91e,0x0fd4d87,0x358a48a,0x04d91b4,0x083595e,0x044a1e6,
+ 0x15827b9,0x1d5eaf4,0x2b82187,0x08f3984,0x21bd737,0x0c54285,
+ 0x2f56887,0x14c2d98,0x00f4684 },
+ { 0x01896f6,0x0e542d0,0x2090883,0x269dfcf,0x1e11cb8,0x239fd29,
+ 0x312cac4,0x19dfacb,0x369f606,0x0cc4f75,0x16579f9,0x33c22cc,
+ 0x0f22bfd,0x3b251ae,0x006429c } },
+ /* 28 */
+ { { 0x375f9a4,0x137552e,0x3570498,0x2e4a74e,0x24aef06,0x35b9307,
+ 0x384ca23,0x3bcd6d7,0x011b083,0x3c93187,0x392ca9f,0x129ce48,
+ 0x0a800ce,0x145d9cc,0x00865d6 },
+ { 0x22b4a2b,0x37f9d9c,0x3e0eca3,0x3e5ec20,0x112c04b,0x2e1ae29,
+ 0x3ce5b51,0x0f83200,0x32d6a7e,0x10ff1d8,0x081adbe,0x265c30b,
+ 0x216b1c8,0x0eb4483,0x003cbcd } },
+ /* 29 */
+ { { 0x030ce93,0x2d331fb,0x20a2fbf,0x1f6dc9c,0x010ed6c,0x1ed5540,
+ 0x275bf74,0x3df0fb1,0x103333f,0x0241c96,0x1075bfc,0x30e5cf9,
+ 0x0f31bc7,0x32c01eb,0x00b049e },
+ { 0x358839c,0x1dbabd3,0x1e4fb40,0x36a8ac1,0x2101896,0x2d0319b,
+ 0x2033b0a,0x192e8fd,0x2ebc8d8,0x2867ba7,0x07bf6d2,0x1b3c555,
+ 0x2477deb,0x198fe09,0x008e5a9 } },
+ /* 30 */
+ { { 0x3fbd5e1,0x18bf77d,0x2b1d69e,0x151da44,0x338ecfe,0x0768efe,
+ 0x1a3d56d,0x3c35211,0x10e1c86,0x2012525,0x3bc36ce,0x32b6fe4,
+ 0x0c8d183,0x15c93f3,0x0041fce },
+ { 0x332c144,0x24e70a0,0x246e05f,0x22c21c7,0x2b17f24,0x1ba2bfd,
+ 0x0534e26,0x318a4f6,0x1dc3b85,0x0c741bc,0x23131b7,0x01a8cba,
+ 0x364e5db,0x21362cf,0x00f2951 } },
+ /* 31 */
+ { { 0x2ddc103,0x14ffdcd,0x206fd96,0x0de57bd,0x025f43e,0x381b73a,
+ 0x2301fcf,0x3bafc27,0x34130b6,0x0216bc8,0x0ff56b2,0x2c4ad4c,
+ 0x23c6b79,0x1267fa6,0x009b4fb },
+ { 0x1d27ac2,0x13e2494,0x1389015,0x38d5b29,0x2d33167,0x3f01969,
+ 0x28ec1fa,0x1b26de0,0x2587f74,0x1c25668,0x0c44f83,0x23c6f8c,
+ 0x32fdbb1,0x045f104,0x00a7946 } },
+ /* 32 */
+ { { 0x23c647b,0x09addd7,0x1348c04,0x0e633c1,0x1bfcbd9,0x1cb034f,
+ 0x1312e31,0x11cdcc7,0x1e6ee75,0x057d27f,0x2da7ee6,0x154c3c1,
+ 0x3a5fb89,0x2c2ba2c,0x00cf281 },
+ { 0x1b8a543,0x125cd50,0x1d30fd1,0x29cc203,0x341a625,0x14e4233,
+ 0x3aae076,0x289e38a,0x036ba02,0x230f405,0x3b21b8f,0x34088b9,
+ 0x01297a0,0x03a75fb,0x00fdc27 } },
+ /* 33 */
+ { { 0x07f41d6,0x1cf032f,0x1641008,0x0f86deb,0x3d97611,0x0e110fe,
+ 0x136ff42,0x0b914a9,0x0e241e6,0x180c340,0x1f545fc,0x0ba619d,
+ 0x1208c53,0x04223a4,0x00cd033 },
+ { 0x397612c,0x0132665,0x34e2d1a,0x00bba99,0x1d4393e,0x065d0a8,
+ 0x2fa69ee,0x1643b55,0x08085f0,0x3774aad,0x08a2243,0x33bf149,
+ 0x03f41a5,0x1ed950e,0x0048cc6 } },
+ /* 34 */
+ { { 0x014ab48,0x010c3bf,0x2a744e5,0x13c99c1,0x2195b7f,0x32207fd,
+ 0x28a228c,0x004f4bf,0x0e2d945,0x2ec6e5a,0x0b92162,0x1aa95e5,
+ 0x2754a93,0x1adcd93,0x004fb76 },
+ { 0x1e1ff7f,0x24ef28c,0x269113f,0x32b393c,0x2696eb5,0x0ac2780,
+ 0x354bf8a,0x0ffe3fd,0x09ce58e,0x0163c4f,0x1678c0b,0x15cd1bc,
+ 0x292b3b7,0x036ea19,0x00d5420 } },
+ /* 35 */
+ { { 0x1da1265,0x0c2ef5b,0x18dd9a0,0x3f3a25c,0x0f7b4f3,0x0d8196e,
+ 0x24931f9,0x090729a,0x1875f72,0x1ef39cb,0x2577585,0x2ed472d,
+ 0x136756c,0x20553a6,0x00c7161 },
+ { 0x2e32189,0x283de4b,0x00b2e81,0x0989df7,0x3ef2fab,0x1c7d1a7,
+ 0x24f6feb,0x3e16679,0x233dfda,0x06d1233,0x3e6b5df,0x1707132,
+ 0x05f7b3f,0x2c00779,0x00fb8df } },
+ /* 36 */
+ { { 0x15bb921,0x117e9d3,0x267ec73,0x2f934ad,0x25c7e04,0x20b5e8f,
+ 0x2d3a802,0x2ca911f,0x3f87e47,0x39709dd,0x08488e2,0x2cec400,
+ 0x35b4589,0x1f0acba,0x009aad7 },
+ { 0x2ac34ae,0x06f29f6,0x3326d68,0x3949abe,0x02452e4,0x0687b85,
+ 0x0879244,0x1eb7832,0x0d4c240,0x31d0ec1,0x3c17a2a,0x17a666f,
+ 0x01a06cb,0x3e0929c,0x004dca2 } },
+ /* 37 */
+ { { 0x127bc1a,0x0c72984,0x13be68e,0x26c5fab,0x1a3edd5,0x097d685,
+ 0x36b645e,0x385799e,0x394a420,0x39d8885,0x0b1e872,0x13f60ed,
+ 0x2ce1b79,0x3c0ecb7,0x007cab3 },
+ { 0x29b3586,0x26fc572,0x0bd7711,0x0913494,0x0a55459,0x31af3c9,
+ 0x3633eac,0x3e2105c,0x0c2b1b6,0x0e6f4c2,0x047d38c,0x2b81bd5,
+ 0x1fe1c3b,0x04d7cd0,0x0054dcc } },
+ /* 38 */
+ { { 0x03caf0d,0x0d66365,0x313356d,0x2a4897f,0x2ce044e,0x18feb7a,
+ 0x1f6a7c5,0x3709e7b,0x14473e8,0x2d8cbae,0x3190dca,0x12d19f8,
+ 0x31e3181,0x3cc5b6e,0x002d4f4 },
+ { 0x143b7ca,0x2604728,0x39508d6,0x0cb79f3,0x24ec1ac,0x1ed7fa0,
+ 0x3ab5fd3,0x3c76488,0x2e49390,0x03a0985,0x3580461,0x3fd2c81,
+ 0x308f0ab,0x38561d6,0x0011b9b } },
+ /* 39 */
+ { { 0x3be682c,0x0c68f4e,0x32dd4ae,0x099d3bb,0x0bc7c5d,0x311f750,
+ 0x2fd10a3,0x2e7864a,0x23bc14a,0x13b1f82,0x32e495e,0x1b0f746,
+ 0x3cd856a,0x17a4c26,0x00085ee },
+ { 0x02e67fd,0x06a4223,0x2af2f38,0x2038987,0x132083a,0x1b7bb85,
+ 0x0d6a499,0x131e43f,0x3035e52,0x278ee3e,0x1d5b08b,0x30d8364,
+ 0x2719f8d,0x0b21fc9,0x003a06e } },
+ /* 40 */
+ { { 0x237cac0,0x27d6a1c,0x27945cd,0x2750d61,0x293f0b5,0x253db13,
+ 0x04a764e,0x20b4d0e,0x12bb627,0x160c13b,0x0de0601,0x236e2cf,
+ 0x2190f0b,0x354d76f,0x004336d },
+ { 0x2ab473a,0x10d54e4,0x1046574,0x1d6f97b,0x0031c72,0x06426a9,
+ 0x38678c2,0x0b76cf9,0x04f9920,0x152adf8,0x2977e63,0x1234819,
+ 0x198be26,0x061024c,0x00d427d } },
+ /* 41 */
+ { { 0x39b5a31,0x2123d43,0x362a822,0x1a2eab6,0x0bb0034,0x0d5d567,
+ 0x3a04723,0x3a10c8c,0x08079ae,0x0d27bda,0x2eb9e1e,0x2619e82,
+ 0x39a55a8,0x0c6c7db,0x00c1519 },
+ { 0x174251e,0x13ac2eb,0x295ed26,0x18d2afc,0x037b9b2,0x1258344,
+ 0x00921b0,0x1f702d8,0x1bc4da7,0x1c3794f,0x12b1869,0x366eacf,
+ 0x16ddf01,0x31ebdc5,0x00ad54e } },
+ /* 42 */
+ { { 0x1efdc58,0x1370d5e,0x0ddb8e7,0x1a53fda,0x1456bd3,0x0c825a9,
+ 0x0e74ccd,0x20f41c9,0x3423867,0x139073f,0x3c70d8a,0x131fc85,
+ 0x219a2a0,0x34bf986,0x0041199 },
+ { 0x1c05dd2,0x268f80a,0x3da9d38,0x1af9f8f,0x0535f2a,0x30ad37e,
+ 0x2cf72d7,0x14a509b,0x1f4fe74,0x259e09d,0x1d23f51,0x0672732,
+ 0x08fc463,0x00b6201,0x001e05a } },
+ /* 43 */
+ { { 0x0d5ffe8,0x3238bb5,0x17f275c,0x25b6fa8,0x2f8bb48,0x3b8f2d2,
+ 0x059790c,0x18594d4,0x285a47c,0x3d301bb,0x12935d2,0x23ffc96,
+ 0x3d7c7f9,0x15c8cbf,0x0034c4a },
+ { 0x20376a2,0x05201ba,0x1e02c4b,0x1413c45,0x02ea5e7,0x39575f0,
+ 0x2d76e21,0x113694c,0x011f310,0x0da3725,0x31b7799,0x1cb9195,
+ 0x0cfd592,0x22ee4ea,0x00adaa3 } },
+ /* 44 */
+ { { 0x14ed72a,0x031c49f,0x39a34bf,0x192e87d,0x0da0e92,0x130e7a9,
+ 0x00258bf,0x144e123,0x2d82a71,0x0294e53,0x3f06c66,0x3d4473a,
+ 0x037cd4a,0x3bbfb17,0x00fcebc },
+ { 0x39ae8c1,0x2dd6a9d,0x206ef23,0x332b479,0x2deff59,0x09d5720,
+ 0x3526fd2,0x33bf7cf,0x344bb32,0x359316a,0x115bdef,0x1b8468a,
+ 0x3813ea9,0x11a8450,0x00ab197 } },
+ /* 45 */
+ { { 0x0837d7d,0x1e1617b,0x0ba443c,0x2f2e3b8,0x2ca5b6f,0x176ed7b,
+ 0x2924d9d,0x07294d3,0x104bb4f,0x1cfd3e8,0x398640f,0x1162dc8,
+ 0x007ea15,0x2aa75fd,0x004231f },
+ { 0x16e6896,0x01987be,0x0f9d53e,0x1a740ec,0x1554e4c,0x31e1634,
+ 0x3cb07b9,0x013eb53,0x39352cb,0x1dfa549,0x0974e7f,0x17c55d2,
+ 0x157c85f,0x1561adb,0x002e3fa } },
+ /* 46 */
+ { { 0x29951a8,0x35200da,0x2ad042c,0x22109e4,0x3a8b15b,0x2eca69c,
+ 0x28bcf9a,0x0cfa063,0x0924099,0x12ff668,0x2fb88dc,0x028d653,
+ 0x2445876,0x218d01c,0x0014418 },
+ { 0x1caedc7,0x295bba6,0x01c9162,0x3364744,0x28fb12e,0x24c80b6,
+ 0x2719673,0x35e5ba9,0x04aa4cc,0x206ab23,0x1cf185a,0x2c140d8,
+ 0x1095a7d,0x1b3633f,0x000c9f8 } },
+ /* 47 */
+ { { 0x0b2a556,0x0a051c4,0x30b29a7,0x190c9ed,0x3767ca9,0x38de66d,
+ 0x2d9e125,0x3aca813,0x2dc22a3,0x319e074,0x0d9450a,0x3445bac,
+ 0x3e08a5b,0x07f29fa,0x00eccac },
+ { 0x02d6e94,0x21113f7,0x321bde6,0x0a4d7b3,0x03621f4,0x2780e8b,
+ 0x22d5432,0x1fc2853,0x0d57d3e,0x254f90b,0x33ed00b,0x289b025,
+ 0x12272bb,0x30e715f,0x0000297 } },
+ /* 48 */
+ { { 0x0243a7d,0x2aac42e,0x0c5b3aa,0x0fa3e96,0x06eeef9,0x2b9fdd9,
+ 0x26fca39,0x0134fe1,0x22661ab,0x1990416,0x03945d6,0x15e3628,
+ 0x3848ca3,0x0f91e46,0x00b08cd },
+ { 0x16d2411,0x3717e1d,0x128c45e,0x3669d54,0x0d4a790,0x2797da8,
+ 0x0f09634,0x2faab0b,0x27df649,0x3b19b49,0x0467039,0x39b65a2,
+ 0x3816f3c,0x31ad0bd,0x0050046 } },
+ /* 49 */
+ { { 0x2425043,0x3858099,0x389092a,0x3f7c236,0x11ff66a,0x3c58b39,
+ 0x2f5a7f8,0x1663ce1,0x2a0fcf5,0x38634b7,0x1a8ca18,0x0dcace8,
+ 0x0e6f778,0x03ae334,0x00df0d2 },
+ { 0x1bb4045,0x357875d,0x14b77ed,0x33ae5b6,0x2252a47,0x31899dd,
+ 0x3293582,0x040c6f6,0x14340dd,0x3614f0e,0x3d5f47f,0x326fb3d,
+ 0x0044a9d,0x00beeb9,0x0027c23 } },
+ /* 50 */
+ { { 0x32d49ce,0x34822a3,0x30a22d1,0x00858b7,0x10d91aa,0x2681fd9,
+ 0x1cce870,0x2404a71,0x38b8433,0x377c1c8,0x019442c,0x0a38b21,
+ 0x22aba50,0x0d61c81,0x002dcbd },
+ { 0x0680967,0x2f0f2f9,0x172cb5f,0x1167e4b,0x12a7bc6,0x05b0da7,
+ 0x2c76e11,0x3a36201,0x37a3177,0x1d71419,0x0569df5,0x0dce7ad,
+ 0x3f40b75,0x3bd8db0,0x002d481 } },
+ /* 51 */
+ { { 0x2a1103e,0x34e7f7f,0x1b171a2,0x24a57e0,0x2eaae55,0x166c992,
+ 0x10aa18f,0x0bb836f,0x01acb59,0x0e430e7,0x1750cca,0x18be036,
+ 0x3cc6cdf,0x0a0f7e5,0x00da4d8 },
+ { 0x2201067,0x374d187,0x1f6b0a6,0x165a7ec,0x31531f8,0x3580487,
+ 0x15e5521,0x0724522,0x2b04c04,0x202c86a,0x3cc1ccf,0x225b11a,
+ 0x1bde79d,0x0eccc50,0x00d24da } },
+ /* 52 */
+ { { 0x3b0a354,0x2814dd4,0x1cd8575,0x3d031b7,0x0392ff2,0x1855ee5,
+ 0x0e8cff5,0x203442e,0x3bd3b1b,0x141cf95,0x3fedee1,0x1d783c0,
+ 0x26f192a,0x0392aa3,0x0075238 },
+ { 0x158ffe9,0x3889f19,0x14151f4,0x06067b1,0x13a3486,0x1e65c21,
+ 0x382d5ef,0x1ab0aac,0x2ffddc4,0x3179b7a,0x3c8d094,0x05101e3,
+ 0x237c6e5,0x3947d83,0x00f674f } },
+ /* 53 */
+ { { 0x363408f,0x21eb96b,0x27376fb,0x2a735d6,0x1a39c36,0x3d31863,
+ 0x33313fc,0x32235e0,0x082f034,0x23ef351,0x39b3528,0x1a69d84,
+ 0x1d9c944,0x07159ad,0x0077a71 },
+ { 0x04f8d65,0x25771e5,0x2ba84a6,0x194586a,0x1e6da5f,0x118059a,
+ 0x14e9c32,0x1d24619,0x3f528ae,0x22f22e4,0x0f5580d,0x0747a0e,
+ 0x32cc85f,0x286b3a8,0x008ccf9 } },
+ /* 54 */
+ { { 0x196fee2,0x2c4431c,0x094528a,0x18e1d32,0x175799d,0x26bb6b7,
+ 0x2293482,0x23fd289,0x07b2be8,0x1a5c533,0x158d60d,0x04a4f3f,
+ 0x164e9f7,0x32ccca9,0x00da6b6 },
+ { 0x1d821c2,0x3f76c4f,0x323df43,0x17e4374,0x0f2f278,0x121227e,
+ 0x2464190,0x19d2644,0x326d24c,0x3185983,0x0803c15,0x0767a33,
+ 0x1c4c996,0x0563eab,0x00631c6 } },
+ /* 55 */
+ { { 0x1752366,0x0baf83f,0x288bacf,0x0384e6f,0x2b93c34,0x3c805e7,
+ 0x3664850,0x29e1663,0x254ff1d,0x3852080,0x0f85c16,0x1e389d9,
+ 0x3191352,0x3915eaa,0x00a246e },
+ { 0x3763b33,0x187ad14,0x3c0d438,0x3f11702,0x1c49f03,0x35ac7a8,
+ 0x3f16bca,0x27266bf,0x08b6fd4,0x0f38ce4,0x37fde8c,0x147a6ff,
+ 0x02c5e5c,0x28e7fc5,0x00076a7 } },
+ /* 56 */
+ { { 0x2338d10,0x0e77fa7,0x011b046,0x1bfd0ad,0x28ee699,0x21d73bc,
+ 0x0461d1a,0x342ea58,0x2d695b4,0x30415ed,0x2906e0b,0x18e494a,
+ 0x20f8a27,0x026b870,0x002c19f },
+ { 0x2f4c43d,0x3f0fc3b,0x0aa95b8,0x2a01ea1,0x3e2e1b1,0x0d74af6,
+ 0x0555288,0x0cb757d,0x24d2071,0x143d2bb,0x3907f67,0x3e0ce30,
+ 0x131f0e9,0x3724381,0x007a874 } },
+ /* 57 */
+ { { 0x3c27050,0x08b5165,0x0bf884b,0x3dd679c,0x3bd0b8d,0x25ce2e6,
+ 0x1674057,0x1f13ed3,0x1f5cd91,0x0d1fd35,0x13ce6e3,0x2671338,
+ 0x10f8b90,0x34e5487,0x00942bf },
+ { 0x03b566d,0x23c3da9,0x37de502,0x1a486ff,0x1af6e86,0x1108cb3,
+ 0x36f856c,0x01a6a0f,0x179f915,0x1595a01,0x2cfecb8,0x082568b,
+ 0x1ba16d1,0x1abb6c0,0x00cf7f0 } },
+ /* 58 */
+ { { 0x2f96c80,0x1b8f123,0x209c0f5,0x2ccf76d,0x1d521f2,0x3705143,
+ 0x2941027,0x07f88af,0x07102a9,0x38b4868,0x1efa37d,0x1bdd3e8,
+ 0x028a12e,0x02e055b,0x009a9a9 },
+ { 0x1c7dfcb,0x3aa7aa7,0x1d62c54,0x3f0b0b0,0x3c74e66,0x274f819,
+ 0x23f9674,0x0e2b67c,0x24654dd,0x0c71f0e,0x1946cee,0x0016211,
+ 0x0045dc7,0x0da1173,0x0089856 } },
+ /* 59 */
+ { { 0x0e73946,0x29f353f,0x056329d,0x2d48c5a,0x28f697d,0x2ea4bb1,
+ 0x235e9cc,0x34faa38,0x15f9f91,0x3557519,0x2a50a6c,0x1a27c8e,
+ 0x2a1a0f3,0x3098879,0x00dcf21 },
+ { 0x1b818bf,0x2f20b98,0x2243cff,0x25b691e,0x3c74a2f,0x2f06833,
+ 0x0e980a8,0x32db48d,0x2b57929,0x33cd7f5,0x2fe17d6,0x11a384b,
+ 0x2dafb81,0x2b9562c,0x00ddea6 } },
+ /* 60 */
+ { { 0x2787b2e,0x37a21df,0x310d294,0x07ce6a4,0x1258acc,0x3050997,
+ 0x19714aa,0x122824b,0x11c708b,0x0462d56,0x21abbf7,0x331aec3,
+ 0x307b927,0x3e8d5a0,0x00c0581 },
+ { 0x24d4d58,0x3d628fc,0x23279e0,0x2e38338,0x2febe9b,0x346f9c0,
+ 0x3d6a419,0x3264e47,0x245faca,0x3669f62,0x1e50d66,0x3028232,
+ 0x18201ab,0x0bdc192,0x0002c34 } },
+ /* 61 */
+ { { 0x17bdbc2,0x1c501c5,0x1605ccd,0x31ab438,0x372fa89,0x24a8057,
+ 0x13da2bb,0x3f95ac7,0x3cda0a3,0x1e2b679,0x24f0673,0x03b72f4,
+ 0x35be616,0x2ccd849,0x0079d4d },
+ { 0x33497c4,0x0c7f657,0x2fb0d3d,0x3b81064,0x38cafea,0x0e942bc,
+ 0x3ca7451,0x2ab9784,0x1678c85,0x3c62098,0x1eb556f,0x01b3aa2,
+ 0x149f3ce,0x2656f6d,0x002eef1 } },
+ /* 62 */
+ { { 0x0596edc,0x1f4fad4,0x03a28ed,0x18a4149,0x3aa3593,0x12db40a,
+ 0x12c2c2a,0x3b1a288,0x327c4fb,0x35847f5,0x384f733,0x02e3fde,
+ 0x1af0e8a,0x2e417c3,0x00d85a6 },
+ { 0x0091cf7,0x2267d75,0x276860e,0x19cbbfc,0x04fef2b,0x030ce59,
+ 0x3195cb1,0x1aa3f07,0x3699362,0x2a09d74,0x0d6c840,0x1e413d0,
+ 0x28acdc7,0x1ff5ea1,0x0088d8b } },
+ /* 63 */
+ { { 0x3d98425,0x08dc8de,0x154e85f,0x24b1c2c,0x2d44639,0x19a1e8b,
+ 0x300ee29,0x053f72e,0x3f7c832,0x12417f6,0x1359368,0x0674a4c,
+ 0x1218e20,0x0e4fbd4,0x000428c },
+ { 0x01e909a,0x1d88fe6,0x12da40c,0x215ef86,0x2925133,0x004241f,
+ 0x3e480f4,0x2d16523,0x07c3120,0x3375e86,0x21fd8f3,0x35dc0b6,
+ 0x0efc5c9,0x14ef8d6,0x0066e47 } },
+ /* 64 */
+ { { 0x2973cf4,0x34d3845,0x34f7070,0x22df93c,0x120aee0,0x3ae2b4a,
+ 0x1af9b95,0x177689a,0x036a6a4,0x0377828,0x23df41e,0x22d4a39,
+ 0x0df2aa1,0x06ca898,0x0003cc7 },
+ { 0x06b1dd7,0x19dc2a8,0x35d324a,0x0467499,0x25bfa9c,0x1a1110c,
+ 0x01e2a19,0x1b3c1cf,0x18d131a,0x10d9815,0x2ee7945,0x0a2720c,
+ 0x0ddcdb0,0x2c071b6,0x00a6aef } },
+ /* 65 */
+ { { 0x1ab5245,0x1192d00,0x13ffba1,0x1b71236,0x09b8d0b,0x0eb49cb,
+ 0x1867dc9,0x371de4e,0x05eae9f,0x36faf82,0x094ea8b,0x2b9440e,
+ 0x022e173,0x2268e6b,0x00740fc },
+ { 0x0e23b23,0x22c28ca,0x04d05e2,0x0bb84c4,0x1235272,0x0289903,
+ 0x267a18b,0x0df0fd1,0x32e49bb,0x2ab1d29,0x281e183,0x3dcd3c3,
+ 0x1c0eb79,0x2db0ff6,0x00bffe5 } },
+ /* 66 */
+ { { 0x2a2123f,0x0d63d71,0x1f6db1a,0x257f8a3,0x1927b2d,0x06674be,
+ 0x302753f,0x20b7225,0x14c1a3f,0x0429cdd,0x377affe,0x0f40a75,
+ 0x2d34d06,0x05fb6b9,0x0054398 },
+ { 0x38b83c4,0x1e7bbda,0x1682f79,0x0527651,0x2615cb2,0x1795fab,
+ 0x0e4facc,0x11f763c,0x1b81130,0x2010ae2,0x13f3650,0x20d5b72,
+ 0x1f32f88,0x34617f4,0x00bf008 } },
+ /* 67 */
+ { { 0x28068db,0x0aa8913,0x1a47801,0x10695ca,0x1c72cc6,0x0fc1a47,
+ 0x33df2c4,0x0517cf0,0x3471d92,0x1be815c,0x397f794,0x3f03cbe,
+ 0x121bfae,0x172cbe0,0x00813d7 },
+ { 0x383bba6,0x04f1c90,0x0b3f056,0x1c29089,0x2a924ce,0x3c85e69,
+ 0x1cecbe5,0x0ad8796,0x0aa79f6,0x25e38ba,0x13ad807,0x30b30ed,
+ 0x0fa963a,0x35c763d,0x0055518 } },
+ /* 68 */
+ { { 0x0623f3b,0x3ca4880,0x2bff03c,0x0457ca7,0x3095c71,0x02a9a08,
+ 0x1722478,0x302c10b,0x3a17458,0x001131e,0x0959ec2,0x18bdfbc,
+ 0x2929fca,0x2adfe32,0x0040ae2 },
+ { 0x127b102,0x14ddeaa,0x1771b8c,0x283700c,0x2398a86,0x085a901,
+ 0x108f9dc,0x0cc0012,0x33a918d,0x26d08e9,0x20b9473,0x12c3fc7,
+ 0x1f69763,0x1c94b5a,0x00e29de } },
+ /* 69 */
+ { { 0x035af04,0x3450021,0x12da744,0x077fb06,0x25f255b,0x0db7150,
+ 0x17dc123,0x1a2a07c,0x2a7636a,0x3972430,0x3704ca1,0x0327add,
+ 0x3d65a96,0x3c79bec,0x009de8c },
+ { 0x11d3d06,0x3fb8354,0x12c7c60,0x04fe7ad,0x0466e23,0x01ac245,
+ 0x3c0f5f2,0x2a935d0,0x3ac2191,0x090bd56,0x3febdbc,0x3f1f23f,
+ 0x0ed1cce,0x02079ba,0x00d4fa6 } },
+ /* 70 */
+ { { 0x0ab9645,0x10174ec,0x3711b5e,0x26357c7,0x2aeec7f,0x2170a9b,
+ 0x1423115,0x1a5122b,0x39e512c,0x18116b2,0x290db1c,0x041b13a,
+ 0x26563ae,0x0f56263,0x00b89f3 },
+ { 0x3ed2ce4,0x01f365f,0x1b2043b,0x05f7605,0x1f9934e,0x2a068d2,
+ 0x38d4d50,0x201859d,0x2de5291,0x0a7985a,0x17e6711,0x01b6c1b,
+ 0x08091fa,0x33c6212,0x001da23 } },
+ /* 71 */
+ { { 0x2f2c4b5,0x311acd0,0x1e47821,0x3bd9816,0x1931513,0x1bd4334,
+ 0x30ae436,0x2c49dc0,0x2c943e7,0x010ed4d,0x1fca536,0x189633d,
+ 0x17abf00,0x39e5ad5,0x00e4e3e },
+ { 0x0c8b22f,0x2ce4009,0x1054bb6,0x307f2fc,0x32eb5e2,0x19d24ab,
+ 0x3b18c95,0x0e55e4d,0x2e4acf5,0x1bc250c,0x1dbf3a5,0x17d6a74,
+ 0x087cf58,0x07f6f82,0x00f8675 } },
+ /* 72 */
+ { { 0x110e0b2,0x0e672e7,0x11b7157,0x1598371,0x01c0d59,0x3d60c24,
+ 0x096b8a1,0x0121075,0x0268859,0x219962f,0x03213f2,0x3022adc,
+ 0x18de488,0x3dcdeb9,0x008d2e0 },
+ { 0x06cfee6,0x26f2552,0x3c579b7,0x31fa796,0x2036a26,0x362ba5e,
+ 0x103601c,0x012506b,0x387ff3a,0x101a41f,0x2c7eb58,0x23d2efc,
+ 0x10a5a07,0x2fd5fa3,0x00e3731 } },
+ /* 73 */
+ { { 0x1cd0abe,0x08a0af8,0x2fa272f,0x17a1fbf,0x1d4f901,0x30e0d2f,
+ 0x1898066,0x273b674,0x0c1b8a2,0x3272337,0x3ee82eb,0x006e7d3,
+ 0x2a75606,0x0af1c81,0x0037105 },
+ { 0x2f32562,0x2842491,0x1bb476f,0x1305cd4,0x1daad53,0x0d8daed,
+ 0x164c37b,0x138030f,0x05145d5,0x300e2a3,0x32c09e7,0x0798600,
+ 0x3515130,0x2b9e55c,0x009764e } },
+ /* 74 */
+ { { 0x3d5256a,0x06c67f2,0x3a3b879,0x3c9b284,0x04007e0,0x33c1a41,
+ 0x3794604,0x1d6240e,0x022b6c1,0x22c62a7,0x01d4590,0x32df5f6,
+ 0x368f1a1,0x2a7486e,0x006e13f },
+ { 0x31e6e16,0x20f18a9,0x09ed471,0x23b861d,0x15cf0ef,0x397b502,
+ 0x1c7f9b2,0x05f84b2,0x2cce6e1,0x3c10bba,0x13fb5a7,0x1b52058,
+ 0x1feb1b8,0x03b7279,0x00ea1cf } },
+ /* 75 */
+ { { 0x2a4cc9b,0x15cf273,0x08f36e6,0x076bf3b,0x2541796,0x10e2dbd,
+ 0x0bf02aa,0x3aa2201,0x03cdcd4,0x3ee252c,0x3799571,0x3e01fa4,
+ 0x156e8d0,0x1fd6188,0x003466a },
+ { 0x2515664,0x166b355,0x2b0b51e,0x0f28f17,0x355b0f9,0x2909e76,
+ 0x206b026,0x3823a12,0x179c5fa,0x0972141,0x2663a1a,0x01ee36e,
+ 0x3fc8dcf,0x2ef3d1b,0x0049a36 } },
+ /* 76 */
+ { { 0x2d93106,0x3d6b311,0x3c9ce47,0x382aa25,0x265b7ad,0x0b5f92f,
+ 0x0f4c941,0x32aa4df,0x380d4b2,0x0e8aba6,0x260357a,0x1f38273,
+ 0x0d5f95e,0x199f23b,0x0029f77 },
+ { 0x0a0b1c5,0x21a3d6a,0x0ad8df6,0x33d8a5e,0x1240858,0x30000a8,
+ 0x3ac101d,0x2a8143d,0x1d7ffe9,0x1c74a2a,0x1b962c9,0x1261359,
+ 0x0c8b274,0x002cf4a,0x00a8a7c } },
+ /* 77 */
+ { { 0x211a338,0x22a14ab,0x16e77c5,0x3c746be,0x3a78613,0x0d5731c,
+ 0x1767d25,0x0b799fa,0x009792a,0x09ae8dc,0x124386b,0x183d860,
+ 0x176747d,0x14c4445,0x00ab09b },
+ { 0x0eb9dd0,0x0121066,0x032895a,0x330541c,0x1e6c17a,0x2271b92,
+ 0x06da454,0x054c2bf,0x20abb21,0x0ead169,0x3d7ea93,0x2359649,
+ 0x242c6c5,0x3194255,0x00a3ef3 } },
+ /* 78 */
+ { { 0x3010879,0x1083a77,0x217989d,0x174e55d,0x29d2525,0x0e544ed,
+ 0x1efd50e,0x30c4e73,0x05bd5d1,0x0793bf9,0x3f7af77,0x052779c,
+ 0x2b06bc0,0x13d0d02,0x0055a6b },
+ { 0x3eaf771,0x094947a,0x0288f13,0x0a21e35,0x22ab441,0x23816bf,
+ 0x15832e1,0x2d8aff3,0x348cc1f,0x2bbd4a8,0x01c4792,0x34209d3,
+ 0x06dc72b,0x211a1df,0x00345c5 } },
+ /* 79 */
+ { { 0x2a65e90,0x173ac2f,0x199cde1,0x0ac905b,0x00987f7,0x3618f7b,
+ 0x1b578df,0x0d5e113,0x34bac6a,0x27d85ed,0x1b48e99,0x18af5eb,
+ 0x1a1be9e,0x3987aac,0x00877ca },
+ { 0x2358610,0x3776a8e,0x2b0723a,0x344c978,0x22fc4d6,0x1615d53,
+ 0x3198f51,0x2d61225,0x12cb392,0x07dd061,0x355f7de,0x09e0132,
+ 0x0efae99,0x13b46aa,0x00e9e6c } },
+ /* 80 */
+ { { 0x0683186,0x36d8e66,0x0ea9867,0x0937731,0x1fb5cf4,0x13c39ef,
+ 0x1a7ffed,0x27dfb32,0x31c7a77,0x09f15fd,0x16b25ef,0x1dd01e7,
+ 0x0168090,0x240ed02,0x0090eae },
+ { 0x2e1fceb,0x2ab9783,0x1a1fdf2,0x093a1b0,0x33ff1da,0x2864fb7,
+ 0x3587d6c,0x275aa03,0x123dc9b,0x0e95a55,0x0592030,0x2102402,
+ 0x1bdef7b,0x37f2e9b,0x001efa4 } },
+ /* 81 */
+ { { 0x0540015,0x20e3e78,0x37dcfbd,0x11b0e41,0x02c3239,0x3586449,
+ 0x1fb9e6a,0x0baa22c,0x00c0ca6,0x3e58491,0x2dbe00f,0x366d4b0,
+ 0x176439a,0x2a86b86,0x00f52ab },
+ { 0x0ac32ad,0x226250b,0x0f91d0e,0x1098aa6,0x3dfb79e,0x1dbd572,
+ 0x052ecf2,0x0f84995,0x0d27ad2,0x036c6b0,0x1e4986f,0x2317dab,
+ 0x2327df6,0x0dee0b3,0x00389ac } },
+ /* 82 */
+ { { 0x0e60f5b,0x0622d3e,0x2ada511,0x05522a8,0x27fe670,0x206af28,
+ 0x333cb83,0x3f25f6c,0x19ddaf3,0x0ec579b,0x36aabc0,0x093dbac,
+ 0x348b44b,0x277dca9,0x00c5978 },
+ { 0x1cf5279,0x32e294a,0x1a6c26f,0x3f006b6,0x37a3c6b,0x2e2eb26,
+ 0x2cf88d4,0x3410619,0x1899c80,0x23d3226,0x30add14,0x2810905,
+ 0x01a41f0,0x11e5176,0x005a02f } },
+ /* 83 */
+ { { 0x1c90202,0x321df30,0x3570fa5,0x103e2b1,0x3d099d4,0x05e207d,
+ 0x0a5b1bd,0x0075d0a,0x3db5b25,0x2d87899,0x32e4465,0x226fc13,
+ 0x24cb8f8,0x3821daa,0x004da3a },
+ { 0x3e66861,0x03f89b8,0x386d3ef,0x14ccc62,0x35e7729,0x11ce5b7,
+ 0x035fbc7,0x3f4df0f,0x29c439f,0x1144568,0x32d7037,0x312f65e,
+ 0x06b9dbf,0x03a9589,0x0008863 } },
+ /* 84 */
+ { { 0x0a9e8c9,0x1a19b6e,0x091ecd9,0x2e16ee0,0x2a11963,0x116cf34,
+ 0x390d530,0x194131f,0x2b580f3,0x31d569c,0x21d3751,0x3e2ce64,
+ 0x193de46,0x32454f0,0x004bffd },
+ { 0x09554e7,0x170126e,0x2be6cd1,0x153de89,0x0353c67,0x350765c,
+ 0x202370b,0x1db01e5,0x30b12b1,0x3778591,0x00c8809,0x2e845d5,
+ 0x1fb1e56,0x170f90d,0x00e2db3 } },
+ /* 85 */
+ { { 0x328e33f,0x392aad8,0x36d1d71,0x0aebe04,0x1548678,0x1b55c8c,
+ 0x24995f8,0x2a5a01e,0x1bd1651,0x37c7c29,0x36803b6,0x3716c91,
+ 0x1a935a5,0x32f10b7,0x005c587 },
+ { 0x2e8b4c0,0x336ccae,0x11382b6,0x22ec4cc,0x066d159,0x35fa585,
+ 0x23b2d25,0x3017528,0x2a674a8,0x3a4f900,0x1a7ce82,0x2b2539b,
+ 0x3d46545,0x0a07918,0x00eb9f8 } },
+ /* 86 */
+ { { 0x2cf5b9b,0x03e747f,0x166a34e,0x0afc81a,0x0a115b1,0x3aa814d,
+ 0x11cf3b1,0x163e556,0x3cbfb15,0x157c0a4,0x1bc703a,0x2141e90,
+ 0x01f811c,0x207218b,0x0092e6b },
+ { 0x1af24e3,0x3af19b3,0x3c70cc9,0x335cbf3,0x068917e,0x055ee92,
+ 0x09a9308,0x2cac9b7,0x008b06a,0x1175097,0x36e929c,0x0be339c,
+ 0x0932436,0x15f18ba,0x0009f6f } },
+ /* 87 */
+ { { 0x29375fb,0x35ade34,0x11571c7,0x07b8d74,0x3fabd85,0x090fa91,
+ 0x362dcd4,0x02c3fdb,0x0608fe3,0x2477649,0x3fc6e70,0x059b7eb,
+ 0x1e6a708,0x1a4c220,0x00c6c4c },
+ { 0x2a53fb0,0x1a3e1f5,0x11f9203,0x27e7ad3,0x038718e,0x3f5f9e4,
+ 0x308acda,0x0a8700f,0x34472fe,0x3420d7a,0x08076e5,0x014240e,
+ 0x0e7317e,0x197a98e,0x00538f7 } },
+ /* 88 */
+ { { 0x2663b4b,0x0927670,0x38dd0e0,0x16d1f34,0x3e700ab,0x3119567,
+ 0x12559d2,0x399b6c6,0x0a84bcd,0x163e7dd,0x3e2aced,0x058548c,
+ 0x03a5bad,0x011cf74,0x00c155c },
+ { 0x3e454eb,0x2a1e64e,0x1ccd346,0x36e0edf,0x266ee94,0x2e74aaf,
+ 0x2d8378a,0x3cd547d,0x1d27733,0x0928e5b,0x353553c,0x26f502b,
+ 0x1d94341,0x2635cc7,0x00d0ead } },
+ /* 89 */
+ { { 0x0142408,0x382c3bb,0x3310908,0x2e50452,0x398943c,0x1d0ac75,
+ 0x1bf7d81,0x04bd00f,0x36b6934,0x3349c37,0x0f69e20,0x0195252,
+ 0x243a1c5,0x030da5f,0x00a76a9 },
+ { 0x224825a,0x28ce111,0x34c2e0f,0x02e2b30,0x382e48c,0x26853ca,
+ 0x24bd14e,0x0200dec,0x1e24db3,0x0d3d775,0x132da0a,0x1dea79e,
+ 0x253dc0c,0x03c9d31,0x0020db9 } },
+ /* 90 */
+ { { 0x26c5fd9,0x05e6dc3,0x2eea261,0x08db260,0x2f8bec1,0x1255edf,
+ 0x283338d,0x3d9a91d,0x2640a72,0x03311f9,0x1bad935,0x152fda8,
+ 0x0e95abd,0x31abd15,0x00dfbf4 },
+ { 0x107f4fa,0x29ebe9a,0x27353f7,0x3821972,0x27311fa,0x2925ab6,
+ 0x337ab82,0x2de6c91,0x1f115fe,0x044f909,0x21b93c2,0x3a5f142,
+ 0x13eb5e9,0x3ab1377,0x00b26b6 } },
+ /* 91 */
+ { { 0x22e5f2b,0x2ae7d4a,0x1ac481c,0x0a6fce1,0x2f93caf,0x242658e,
+ 0x3f35c3c,0x050f3d2,0x30074c9,0x142079c,0x0281b4c,0x295fea3,
+ 0x007413e,0x01726cd,0x00e4979 },
+ { 0x1ab3cfb,0x1b76295,0x36adf55,0x1ad4636,0x1d444b9,0x3bd2e55,
+ 0x35425a5,0x1aa8cd3,0x3acecd2,0x1f769e8,0x1a655e9,0x1f6846f,
+ 0x24c70b5,0x3bff080,0x0002da3 } },
+ /* 92 */
+ { { 0x081d0d9,0x2c00d99,0x1fe2e24,0x396063f,0x03740db,0x243f680,
+ 0x3c1f451,0x1ff7b07,0x2803cf2,0x38ca724,0x2934f43,0x0d72d4d,
+ 0x0e8fe74,0x2975e21,0x002b505 },
+ { 0x11adcc9,0x331a99c,0x21e16cf,0x1714c78,0x1f03432,0x2caa2a6,
+ 0x34a9679,0x2f7fe8b,0x0423c21,0x1a757ce,0x31b57d6,0x171e044,
+ 0x093b9b2,0x13602e0,0x00db534 } },
+ /* 93 */
+ { { 0x250a2f5,0x0b999eb,0x21d10d7,0x22b92a1,0x39b7f8d,0x0c37c72,
+ 0x29f70f3,0x3bf0e84,0x1d7e04f,0x07a42a9,0x272c3ae,0x1587b2f,
+ 0x155faff,0x10a336e,0x000d8fb },
+ { 0x3663784,0x0d7dcf5,0x056ad22,0x319f8b1,0x0c05bae,0x2b6ff33,
+ 0x0292e42,0x0435797,0x188efb1,0x0d3f45e,0x119d49f,0x395dcd3,
+ 0x279fe27,0x133a13d,0x00188ac } },
+ /* 94 */
+ { { 0x396c53e,0x0d133e9,0x009b7ee,0x13421a0,0x1bbf607,0x1d284a5,
+ 0x1594f74,0x18cb47c,0x2dcac11,0x2999ddb,0x04e2fa5,0x1889e2c,
+ 0x0a89a18,0x33cb215,0x0052665 },
+ { 0x104ab58,0x1d91920,0x3d6d7e3,0x04dc813,0x1167759,0x13a8466,
+ 0x0a06a54,0x103761b,0x25b1c92,0x26a8fdd,0x2474614,0x21406a4,
+ 0x251d75f,0x38c3734,0x007b982 } },
+ /* 95 */
+ { { 0x15f3060,0x3a7bf30,0x3be6e44,0x0baa1fa,0x05ad62f,0x1e54035,
+ 0x099d41c,0x2a744d9,0x1c0336f,0x3e99b5b,0x1afd3b1,0x2bf1255,
+ 0x1822bf8,0x2c93972,0x001d8cc },
+ { 0x1d7584b,0x0508ade,0x20dd403,0x203a8fc,0x1c54a05,0x1611a31,
+ 0x037c8f9,0x1dcd4fe,0x110fbea,0x30f60bc,0x3dffe2f,0x26a1de1,
+ 0x0480367,0x18ec81c,0x0048eba } },
+ /* 96 */
+ { { 0x346e2f6,0x0435077,0x036789b,0x3e06545,0x313ab57,0x351a721,
+ 0x3372b91,0x15e6019,0x2fa4f6c,0x3c30656,0x272c9ac,0x10e84a8,
+ 0x2bdacea,0x232d9e2,0x009dadd },
+ { 0x182579a,0x15b1af8,0x02d8cce,0x36cb49b,0x086feba,0x2911d17,
+ 0x268ee12,0x011e871,0x18698dc,0x35602b3,0x11b9ec2,0x0ade731,
+ 0x0f6a05a,0x1821015,0x00007da } },
+ /* 97 */
+ { { 0x3b00dd0,0x328d485,0x27a69e3,0x32c3a06,0x1046779,0x120b61c,
+ 0x19fef3d,0x0fef2e6,0x134d923,0x039bce0,0x348cd0e,0x0b0c007,
+ 0x066ae11,0x15d8f1b,0x00934e7 },
+ { 0x33234dc,0x353f0f5,0x2fc1b44,0x18a193a,0x2fcae20,0x1afbc86,
+ 0x3afe252,0x17f7e10,0x107f3b7,0x2d84d54,0x394c2e6,0x19e96a9,
+ 0x0a37283,0x26c6152,0x003d262 } },
+ /* 98 */
+ { { 0x37cfaf8,0x01863d0,0x0299623,0x32c80cb,0x25b8742,0x0a4d90e,
+ 0x1f72472,0x13de652,0x31a0946,0x0ee0103,0x0f25414,0x2518b49,
+ 0x07e7604,0x1488d9b,0x00abd6b },
+ { 0x1338f55,0x2ce4af5,0x1a0c119,0x3380525,0x21a80a9,0x235d4df,
+ 0x118ca7f,0x2dd8bcc,0x1c26bf4,0x32dc56b,0x28482b6,0x1418596,
+ 0x3c84d24,0x1f1a5a9,0x00d958d } },
+ /* 99 */
+ { { 0x1c21f31,0x22aa1ef,0x258c9ad,0x2d2018f,0x0adb3ca,0x01f75ee,
+ 0x186283b,0x31ad3bf,0x3621be7,0x3b1ee6d,0x015582d,0x3d61d04,
+ 0x2ddf32e,0x14b8a66,0x00c970c },
+ { 0x2f24d66,0x00b8a88,0x100a78f,0x041d330,0x2efec1d,0x24c5b86,
+ 0x2a6a390,0x37526bc,0x2055849,0x3339f08,0x16bffc4,0x07f9d72,
+ 0x06ec09c,0x3f49ee8,0x00cad98 } },
+ /* 100 */
+ { { 0x248b73e,0x1b8b42d,0x285eed7,0x39473f4,0x1a9f92c,0x3b44f78,
+ 0x086c062,0x06a4ea3,0x34ea519,0x3c74e95,0x1ad1b8b,0x1737e2c,
+ 0x2cfe338,0x0a291f4,0x00bbecc },
+ { 0x1cec548,0x0c9b01a,0x20b298d,0x377c902,0x24f5bc1,0x2415c8d,
+ 0x1a70622,0x2529090,0x1c5c682,0x283f1ba,0x2319f17,0x0120e2e,
+ 0x01c6f4d,0x33c67ff,0x008b612 } },
+ /* 101 */
+ { { 0x03830eb,0x02d4053,0x10c59bb,0x0f23b83,0x13d08f8,0x26ea4e2,
+ 0x2626427,0x0a45292,0x0449cbc,0x0175750,0x074c46f,0x27ae0f8,
+ 0x2d7d6ae,0x163dd3a,0x0063bb7 },
+ { 0x2bb29e0,0x034bab1,0x341e1c4,0x21d2c0b,0x295aa2d,0x0f2c666,
+ 0x1891755,0x13db64a,0x2fe5158,0x337646e,0x31a1aae,0x057bee4,
+ 0x00f9e37,0x396d19e,0x00c1b6a } },
+ /* 102 */
+ { { 0x2772f41,0x34f92d0,0x39d1cde,0x174ef2d,0x03a700d,0x03fbb98,
+ 0x30d50e8,0x352ed10,0x1fcf5e5,0x3d113bc,0x26e358f,0x180653f,
+ 0x1b43cc6,0x3cc9aa4,0x00e68a2 },
+ { 0x37fe4d2,0x09dd725,0x01eb584,0x171f8a9,0x278fdef,0x3e37c03,
+ 0x3bec02f,0x149757c,0x0cd5852,0x37d2e10,0x0e6988b,0x1c120e9,
+ 0x0b83708,0x38e7319,0x0039499 } },
+ /* 103 */
+ { { 0x08df5fe,0x177a02c,0x0362fc0,0x1f18ee8,0x00c1295,0x173c50a,
+ 0x379414d,0x1885ba8,0x32a54ef,0x2315644,0x39e65cf,0x357c4be,
+ 0x1d66333,0x09e05a5,0x0009c60 },
+ { 0x1f7a2fb,0x073b518,0x2eb83ac,0x11353d7,0x1dd8384,0x0c63f2b,
+ 0x238c6c8,0x2a1920a,0x2e5e9f1,0x1cc56f8,0x042daf4,0x1ed5dc5,
+ 0x25f9e31,0x012a56a,0x0081b59 } },
+ /* 104 */
+ { { 0x321d232,0x2c71422,0x3a756b6,0x30230b2,0x387f3db,0x3a7c3eb,
+ 0x274b46a,0x201e69f,0x185bb7b,0x140da82,0x0d974a2,0x0616e42,
+ 0x35ec94f,0x3bc366b,0x005aa7c },
+ { 0x3dcfffc,0x19a9c15,0x3225e05,0x36ae114,0x16ea311,0x0cda2aa,
+ 0x2a1a8d2,0x154b5cb,0x08348cd,0x17b66c8,0x080ea43,0x21e59f3,
+ 0x04173b9,0x31d5b04,0x00ad735 } },
+ /* 105 */
+ { { 0x2e76ef4,0x216acf3,0x2b93aea,0x112bc74,0x3449974,0x2b2e48f,
+ 0x11929be,0x2f03021,0x19051e3,0x0ac202d,0x19be68a,0x3b87619,
+ 0x26cdac4,0x086592c,0x00f00de },
+ { 0x2e90d4d,0x3ed703c,0x2c648d7,0x29ddf67,0x000e219,0x3471247,
+ 0x26febd5,0x1161713,0x3541a8f,0x302038d,0x08d2af9,0x26e1b21,
+ 0x398514a,0x36dad99,0x002ed70 } },
+ /* 106 */
+ { { 0x06f25cb,0x1104596,0x370faee,0x07e83f3,0x0f7b686,0x228d43a,
+ 0x12cd201,0x0a1bd57,0x3e592dc,0x1e186fc,0x2226aba,0x2c63fe9,
+ 0x17b039a,0x1efaa61,0x00d1582 },
+ { 0x2e6acef,0x07d51e4,0x3ac326c,0x322b07e,0x1422c63,0x32ff5c7,
+ 0x18760df,0x048928b,0x139b251,0x04d7da9,0x048d1a2,0x2a23e84,
+ 0x199dbba,0x2fa7afe,0x0049f1a } },
+ /* 107 */
+ { { 0x3492b73,0x27d3d3d,0x2b1a16f,0x07b2ce4,0x0cf28ec,0x2729bff,
+ 0x3130d46,0x3e96116,0x140b72e,0x14a2ea3,0x1ca066f,0x3a61f1d,
+ 0x022ebac,0x09192b4,0x003e399 },
+ { 0x12555bb,0x0b6139d,0x239463a,0x12a70ab,0x2aaa93b,0x2254e72,
+ 0x00424ec,0x26a6736,0x26daa11,0x25b5ad6,0x379f262,0x140cd30,
+ 0x0c7d3bd,0x097bbcf,0x00899e9 } },
+ /* 108 */
+ { { 0x3825dc4,0x3cd946f,0x0462b7f,0x31102e7,0x30f741c,0x3313ed6,
+ 0x1ff5a95,0x15bf9dc,0x09b47fd,0x0f2e7a7,0x1626c0d,0x3c14f6d,
+ 0x14098bd,0x19d7df8,0x00a97ce },
+ { 0x0934f5e,0x3f968db,0x046f68a,0x12333bf,0x26cd5e1,0x1ea2161,
+ 0x358570d,0x235031d,0x35edd55,0x05265e3,0x24ae00c,0x3542229,
+ 0x25bb2a1,0x1c83c75,0x0058f2a } },
+ /* 109 */
+ { { 0x24daedb,0x376928f,0x305266f,0x0499746,0x038318c,0x312efd7,
+ 0x1910a24,0x33450a3,0x1c478a9,0x39d8bf9,0x12cc0ae,0x397aeab,
+ 0x0654c08,0x095f283,0x00d2cdf },
+ { 0x0b717d2,0x1f162c2,0x107a48f,0x128e1b3,0x2380718,0x39f4044,
+ 0x00f626a,0x05ec0c9,0x21bc439,0x200fa4d,0x20aea01,0x186a1d8,
+ 0x26372f2,0x1a91f87,0x0053f55 } },
+ /* 110 */
+ { { 0x3512a90,0x33b958b,0x29f1c84,0x0106c3a,0x224b3c0,0x09b307a,
+ 0x215d2de,0x3bdf43b,0x22cf0c9,0x176121d,0x1534143,0x09ba717,
+ 0x16b3110,0x0f73f6c,0x008f5b7 },
+ { 0x2c75d95,0x26fbcb4,0x0dda1f6,0x206f819,0x28d33d5,0x1fb4d79,
+ 0x024c125,0x30a0630,0x1f9c309,0x0fe350d,0x1696019,0x0a54187,
+ 0x09541fd,0x35e3a79,0x0066618 } },
+ /* 111 */
+ { { 0x0e382de,0x33f5163,0x0dde571,0x3bb7a40,0x1175806,0x12ae8ed,
+ 0x0499653,0x3b25586,0x38ade7a,0x3fa265d,0x3f4aa97,0x3c03dbb,
+ 0x30c6de8,0x32d4042,0x00ae971 },
+ { 0x2f788f1,0x1fbaf0e,0x3e2d182,0x3ff904f,0x0d46229,0x1d0726d,
+ 0x15455b4,0x093ae28,0x290f8e4,0x097c0b9,0x1ae8771,0x28480bb,
+ 0x04f6d40,0x3689925,0x0049b3b } },
+ /* 112 */
+ { { 0x35b2d69,0x31819c0,0x11b0d63,0x035afb6,0x2b50715,0x2bece6c,
+ 0x35f82f7,0x0ad987c,0x0011601,0x02e6f67,0x2d0a5f5,0x365e583,
+ 0x2f7c900,0x11449c5,0x00ed705 },
+ { 0x27abdb4,0x1bbfd04,0x301c157,0x263c079,0x36850d6,0x3f21f8b,
+ 0x27d7493,0x0f9227e,0x06fb0ce,0x002daf3,0x37d8c1c,0x3ef87d7,
+ 0x19cc6f4,0x0c3809c,0x00cf752 } },
+ /* 113 */
+ { { 0x22d94ed,0x075b09c,0x020e676,0x084dc62,0x2d1ec3f,0x17439f1,
+ 0x240b702,0x33cc596,0x30ebaf3,0x0359fe0,0x393ea43,0x0ece01e,
+ 0x16c6963,0x03a82f2,0x0017faa },
+ { 0x3866b98,0x3cd20b7,0x12d4e6b,0x3a6a76d,0x1205c1e,0x3e6ae1a,
+ 0x2f9bbdf,0x2e61547,0x2d175ee,0x28e18f6,0x13cf442,0x085b0ef,
+ 0x0e321ef,0x238fe72,0x003fb22 } },
+ /* 114 */
+ { { 0x360ac07,0x26dc301,0x3f4d94f,0x2ba75e6,0x1f3c9cc,0x17ff20f,
+ 0x0ea084c,0x30e39cf,0x143dc49,0x03bd43e,0x3c9e733,0x19e8aba,
+ 0x27fbaf4,0x12d913a,0x005ee53 },
+ { 0x3609e7f,0x2d89c80,0x09f020c,0x1558bf7,0x3098443,0x3c515fd,
+ 0x1c8e580,0x16506bd,0x26cb4b2,0x1747d42,0x2ec8239,0x32c91f0,
+ 0x1ca3377,0x079768f,0x00a5f3e } },
+ /* 115 */
+ { { 0x185fa94,0x122759f,0x0e47023,0x0dcb6e7,0x10ba405,0x3b5eab4,
+ 0x1f7a1fa,0x32d003f,0x1739a4c,0x3295ec3,0x1b18967,0x3f3b265,
+ 0x34d2448,0x2dbadc9,0x00f30b5 },
+ { 0x01c5338,0x2d1dcf2,0x2bd07cc,0x39a8fb5,0x2b85639,0x355bab6,
+ 0x1df95f1,0x01eb5f6,0x17f0a16,0x1b895b5,0x157574d,0x29fff72,
+ 0x3a8c46d,0x0118071,0x0065f84 } },
+ /* 116 */
+ { { 0x3a1e7f1,0x17432f2,0x1f648d4,0x3000ad5,0x2ef0a08,0x1f86624,
+ 0x1ca31b1,0x241f9dc,0x2cb4885,0x2b8610f,0x364ce16,0x1e5faf0,
+ 0x0b33867,0x2cb637d,0x00816d2 },
+ { 0x1aa8671,0x02c394e,0x35f5e87,0x393040a,0x39f0db3,0x1c831a5,
+ 0x2966591,0x034a8d0,0x09e613c,0x042b532,0x018ddd6,0x3e402c9,
+ 0x2e20e1a,0x29cb4cd,0x00e087c } },
+ /* 117 */
+ { { 0x3a10079,0x20c7fea,0x3ff2222,0x1edb593,0x00dc5f8,0x3a32ccc,
+ 0x1479073,0x0cfed11,0x2a2702a,0x17a056a,0x1fba321,0x235acb9,
+ 0x149c833,0x172de7d,0x000f753 },
+ { 0x2e95923,0x3b365cb,0x009f471,0x0df1b47,0x21e868b,0x199bbd3,
+ 0x07b8ecc,0x12ff0af,0x189808a,0x3bd5059,0x3fbc4d2,0x0fa7b88,
+ 0x1125bf2,0x0db0b5d,0x0043572 } },
+ /* 118 */
+ { { 0x29cdb1b,0x1db656e,0x391efe1,0x004be09,0x245a1ca,0x3793328,
+ 0x254af24,0x2f2e65d,0x10e5cc4,0x2af6fe7,0x2d97ac0,0x29f7d42,
+ 0x19fd6f6,0x0ac184d,0x00c5211 },
+ { 0x305eae3,0x36738d3,0x2c2b696,0x00ba50e,0x3903adc,0x2122f85,
+ 0x0753470,0x1cf96a4,0x1702a39,0x247883c,0x2feb67e,0x2ab3071,
+ 0x3c6b9e1,0x30cb85a,0x002ca0a } },
+ /* 119 */
+ { { 0x3871eb5,0x284b93b,0x0a7affe,0x176a2fc,0x294c2f2,0x204d3aa,
+ 0x1e4c2a7,0x3ec4134,0x2fb0360,0x3847b45,0x05fc11b,0x0a6db6e,
+ 0x390fa40,0x2adfd34,0x005e9f7 },
+ { 0x0646612,0x1b5cbcc,0x10d8507,0x0777687,0x3a0afed,0x1687440,
+ 0x0222578,0x1af34a4,0x2174e27,0x372d267,0x11246c3,0x34769c5,
+ 0x2044316,0x1b4d626,0x00c72d5 } },
+ /* 120 */
+ { { 0x2e5bb45,0x3ff1d36,0x16dcdf5,0x128986f,0x399068c,0x2a63b1e,
+ 0x0afa7aa,0x3a5b770,0x200f121,0x33b74bb,0x1414045,0x0f31ef8,
+ 0x2f50e16,0x2f38cd6,0x00b0b1b },
+ { 0x1a06293,0x035e140,0x2644d44,0x1f1954b,0x2cdebab,0x31d5f91,
+ 0x0b8dbc8,0x38f2d23,0x3783cab,0x2a07e73,0x3123f59,0x3409846,
+ 0x3784ddd,0x223bbac,0x003dc7b } },
+ /* 121 */
+ { { 0x0741456,0x234e631,0x2121e1b,0x00980ca,0x3a9dfa9,0x098c916,
+ 0x3fc86d1,0x1c63072,0x3625244,0x13d0471,0x05b0fc5,0x1487550,
+ 0x2498596,0x11bb6ea,0x001afab },
+ { 0x274b4ad,0x240aea1,0x3d12a75,0x2b56b61,0x1486b43,0x1b83426,
+ 0x31c7363,0x35b59ca,0x207bb6c,0x38e6243,0x19bace4,0x0a26671,
+ 0x35e3381,0x0c2ded4,0x00d8da4 } },
+ /* 122 */
+ { { 0x2b75791,0x19590b1,0x2bfb39f,0x2988601,0x0050947,0x0d8bbe1,
+ 0x23e3701,0x08e4432,0x2ed8c3d,0x326f182,0x332e1dd,0x12219c5,
+ 0x2e0779b,0x367aa63,0x0012d10 },
+ { 0x251b7dc,0x0a08b4d,0x1138b6f,0x2ea02af,0x06345a5,0x1cb4f21,
+ 0x0332624,0x1d49d88,0x140acc5,0x2f55287,0x024447c,0x291ace9,
+ 0x1a4966e,0x015cbec,0x005bc41 } },
+ /* 123 */
+ { { 0x351cd0e,0x315e8e9,0x07d6e70,0x067ae8f,0x2190d84,0x351f556,
+ 0x03bee79,0x31b62c7,0x266f912,0x1b6a504,0x007a6ad,0x3a6ab31,
+ 0x3891112,0x3c45ba0,0x00d6ce5 },
+ { 0x0e1f2ce,0x32a5edc,0x1434063,0x1ca084f,0x2a3e47c,0x137e042,
+ 0x16e2418,0x2069280,0x3b0dfd8,0x35a22b5,0x289bf0a,0x1f667f2,
+ 0x02d23a3,0x0ce688f,0x00d8e3f } },
+ /* 124 */
+ { { 0x10bed6f,0x14c58dd,0x0b0abdf,0x0ca0f9a,0x3808abc,0x2ec228c,
+ 0x2366275,0x12afa16,0x20f6b0e,0x37dca8e,0x3af0c6a,0x1c5b467,
+ 0x1b25ff7,0x00814de,0x0022dcc },
+ { 0x1a56e11,0x02fe37e,0x3f21740,0x35d5a91,0x06cb8ba,0x29bad91,
+ 0x17176f7,0x2d919f2,0x0f7d1f5,0x13a3f61,0x04ddb05,0x0c82a51,
+ 0x286f598,0x2e8c777,0x0007071 } },
+ /* 125 */
+ { { 0x0f8fcb9,0x3e83966,0x170c6fd,0x3825343,0x089cec8,0x01b482a,
+ 0x0993971,0x3327282,0x39aba8a,0x32456fe,0x1507e01,0x1c3252d,
+ 0x21ffb13,0x29822a0,0x0083246 },
+ { 0x23c378f,0x1cea7ef,0x1be9a82,0x224d689,0x37e5447,0x3764a75,
+ 0x3a49724,0x361e1b3,0x19d365b,0x3a61ffb,0x1c29a7a,0x20ab251,
+ 0x17ec549,0x175d777,0x004589a } },
+ /* 126 */
+ { { 0x15540a9,0x2ec5d2a,0x05b09fa,0x1bc058b,0x07cfb88,0x28f7b86,
+ 0x3e766be,0x189305e,0x01fe88e,0x23fdf69,0x0b919c3,0x02dc7ae,
+ 0x3f9a9ad,0x0b83cc7,0x0086a52 },
+ { 0x28bc259,0x39bdca1,0x39e4bc8,0x0e0f33b,0x16130c6,0x2919955,
+ 0x31f4549,0x2fed027,0x30919b2,0x0a39b03,0x0ca7bb2,0x1711b24,
+ 0x3b67b94,0x05a136b,0x00acd87 } },
+ /* 127 */
+ { { 0x0c53841,0x31cb284,0x3ced090,0x06d5693,0x1c20ae0,0x0408d2b,
+ 0x37ebd5e,0x081900f,0x26a8589,0x0acfd0a,0x34a1472,0x2f0c302,
+ 0x124ccbd,0x10de328,0x00971bc },
+ { 0x17ff2ff,0x27d1b54,0x147b6f7,0x38bb2ea,0x26a9c96,0x0a49448,
+ 0x39f2f46,0x247c579,0x3b16a4e,0x28c2a5a,0x2d4c72d,0x11f248c,
+ 0x1e4df11,0x047d604,0x0065bc3 } },
+ /* 128 */
+ { { 0x39b3239,0x1f75f44,0x3bae87c,0x139360c,0x18b5782,0x3ffc005,
+ 0x3c48789,0x2bc6af2,0x38b909e,0x223ff3b,0x31443a7,0x017d3bb,
+ 0x0bfed99,0x128b857,0x00020dd },
+ { 0x306d695,0x25a7b28,0x2f60ca2,0x2b6e4f2,0x1df940c,0x1fa9b8e,
+ 0x37fab78,0x13f959f,0x10ff98c,0x38343b8,0x019cb91,0x11a1e6b,
+ 0x17ab4c6,0x1431f47,0x004b4ea } },
+ /* 129 */
+ { { 0x20db57e,0x102515e,0x170219e,0x2b66a32,0x1e6017c,0x2f973fe,
+ 0x3739e51,0x0e28b6f,0x3cda7a9,0x30d91ac,0x28350df,0x1444215,
+ 0x098b504,0x1bcd5b8,0x00ad3bd },
+ { 0x22e3e3e,0x3aeaffb,0x26cb935,0x0091ce4,0x2fbd017,0x3a7ed6a,
+ 0x335b029,0x3bfc1f1,0x3852e3f,0x2b14a86,0x046b405,0x266af4c,
+ 0x3997191,0x33b0e40,0x00e306f } },
+ /* 130 */
+ { { 0x3e4712c,0x26bb208,0x18eed6d,0x1b30f06,0x27ca837,0x06faf62,
+ 0x1831873,0x3fbcf9b,0x3f3d88b,0x1fb55eb,0x0f44edc,0x29917bb,
+ 0x3151772,0x342d72e,0x00d4e63 },
+ { 0x2ee0ecf,0x39e8733,0x2e8e98c,0x0cd4e0f,0x08f0126,0x1ad157a,
+ 0x079078a,0x23018ee,0x196c765,0x2b2f34f,0x0783336,0x075bf9c,
+ 0x3713672,0x098d699,0x00f21a7 } },
+ /* 131 */
+ { { 0x186ba11,0x22cf365,0x048019d,0x2ca2970,0x0d9e0ae,0x08c3bd7,
+ 0x261dbf2,0x2fc2790,0x1ee02e6,0x10256a7,0x00dc778,0x18dc8f2,
+ 0x157b189,0x2ebc514,0x005c97d },
+ { 0x3c4503e,0x1d10d12,0x337097e,0x0c6169a,0x30fb1cb,0x3481752,
+ 0x0df2bec,0x19768fa,0x1bcf8f7,0x2925f74,0x2c988a1,0x3be571d,
+ 0x04cfa92,0x2ea9937,0x003f924 } },
+ /* 132 */
+ { { 0x268b448,0x06e375c,0x1b946bf,0x287bf5e,0x3d4c28b,0x138d547,
+ 0x21f8c8e,0x21ea4be,0x2d45c91,0x35da78e,0x00326c0,0x210ed35,
+ 0x1d66928,0x0251435,0x00fefc8 },
+ { 0x0339366,0x216ff64,0x2c3a30c,0x3c5733d,0x04eeb56,0x2333477,
+ 0x32b1492,0x25e3839,0x1b5f2ce,0x0dcfba1,0x3165bb2,0x3acafcc,
+ 0x10abfcd,0x248d390,0x008106c } },
+ /* 133 */
+ { { 0x102f4ee,0x3c0585f,0x1225c8d,0x11c6388,0x08a7815,0x2b3e790,
+ 0x2895eb6,0x18cf53a,0x0b56e5a,0x2e2c003,0x3e981ff,0x0761b55,
+ 0x1bc32f3,0x0a7111d,0x00f5c80 },
+ { 0x3568973,0x1587386,0x16ec764,0x20698a6,0x02f809b,0x2821502,
+ 0x113d64d,0x38c2679,0x15de61c,0x0309f60,0x272999e,0x29bfe64,
+ 0x173f70d,0x1de7fab,0x00bd284 } },
+ /* 134 */
+ { { 0x31cdf2b,0x0f0be66,0x2151603,0x01af17e,0x32a99cf,0x085dece,
+ 0x27d2591,0x1520df4,0x273c448,0x1ec7c54,0x102e229,0x355f604,
+ 0x2acb75f,0x005f1fd,0x003d43e },
+ { 0x270eb28,0x22ec2ce,0x306b41a,0x238fa02,0x167de2d,0x030a379,
+ 0x245a417,0x1808c24,0x0b1a7b2,0x3ab5f6f,0x2cbc6c1,0x2c228d4,
+ 0x3041f70,0x2d9a6cc,0x00b504f } },
+ /* 135 */
+ { { 0x17a27c2,0x216ad7e,0x011ba8e,0x22f0428,0x16ac5ec,0x3ef3c58,
+ 0x345533f,0x0298155,0x2856579,0x0005e03,0x19ee75b,0x146fe16,
+ 0x29881e4,0x18ece70,0x008907a },
+ { 0x20189ed,0x119ce09,0x35cb76d,0x0d91ef4,0x2284a44,0x032ad87,
+ 0x0e8c402,0x3c82b5d,0x38c416c,0x398992f,0x1fd820c,0x169b255,
+ 0x3b5fcfa,0x1343c92,0x00fa715 } },
+ /* 136 */
+ { { 0x33f5034,0x20b3b26,0x28fd184,0x16b3679,0x3962d44,0x15d1bc8,
+ 0x2fb1d69,0x1292c99,0x25a58c9,0x1b19ab7,0x2d68a5b,0x2f6a09b,
+ 0x0d6aedb,0x2935eac,0x0005664 },
+ { 0x25e32fc,0x13f9440,0x3252bcd,0x2fea5b7,0x161a5ae,0x0564a8c,
+ 0x0a07e23,0x1545f62,0x0de9890,0x1d76765,0x1fd440e,0x2ed0041,
+ 0x3db4c96,0x1e8ba01,0x001b0c4 } },
+ /* 137 */
+ { { 0x0223878,0x29ab202,0x15585c2,0x1a79969,0x1ba08c2,0x2ef09ff,
+ 0x2b1b9b9,0x181f748,0x1bf72b9,0x224645c,0x2588dc5,0x2d157e7,
+ 0x22d939a,0x05b88d9,0x006d549 },
+ { 0x31de0c1,0x23a4e0e,0x278f8da,0x1aa013c,0x1a84d18,0x0d185a5,
+ 0x0988ccd,0x2c32efd,0x3bee10e,0x37d7ab8,0x3f2a66e,0x3e2da3e,
+ 0x1b5701f,0x3d9f0c1,0x00a68da } },
+ /* 138 */
+ { { 0x0b2e045,0x0133fd1,0x05d4c10,0x0d92c70,0x391b5e1,0x2292281,
+ 0x2e40908,0x2ec694e,0x195ea11,0x29cfeca,0x3d93a4e,0x01215c0,
+ 0x08a5f32,0x37a0eff,0x00cce45 },
+ { 0x2b3106e,0x12a5fb0,0x0b4faff,0x0c2da12,0x09069c6,0x35d8907,
+ 0x2837a6e,0x3db3fb6,0x3136cc3,0x222836b,0x3da018a,0x2741274,
+ 0x13ba319,0x1ac7642,0x00f867c } },
+ /* 139 */
+ { { 0x2527296,0x10a9595,0x178de4d,0x0f739c4,0x0ae26c7,0x3094599,
+ 0x20adac6,0x2b875c2,0x3ae5dc0,0x3e04d20,0x1aab2da,0x1d3ab37,
+ 0x15f4f75,0x0b730b5,0x00c56b5 },
+ { 0x1f32923,0x2f059e5,0x2a89872,0x2056f74,0x04be175,0x1da67c0,
+ 0x17f1e7a,0x3780a6d,0x0723ac2,0x257f367,0x1237773,0x2bcee86,
+ 0x0b97f83,0x38aff14,0x00a64d4 } },
+ /* 140 */
+ { { 0x2552b40,0x0b6b883,0x12e8217,0x0974d35,0x062f497,0x1e563e6,
+ 0x30ee400,0x375d1e4,0x290751f,0x0d5b68a,0x353e48c,0x064a0d3,
+ 0x3c343f1,0x309a394,0x0034d2a },
+ { 0x3111286,0x0f08604,0x1827107,0x0536a76,0x0201dac,0x3a574de,
+ 0x2c29dbe,0x382c7b0,0x1191f3e,0x324c5bc,0x144ce71,0x24327c1,
+ 0x1212778,0x22bc9d8,0x00d7713 } },
+ /* 141 */
+ { { 0x34ad1cd,0x1179b4e,0x1bc1780,0x1392a92,0x2cd86b9,0x359de85,
+ 0x251f1df,0x0da5d5f,0x135fa61,0x0f64a42,0x34f4d89,0x0fe564c,
+ 0x3cf9b7a,0x122d757,0x008c9c2 },
+ { 0x370d4e9,0x0e9209b,0x0ae99f2,0x1518c64,0x0172734,0x2c20692,
+ 0x1d7c135,0x149c52f,0x38928d6,0x3c78b78,0x25841d1,0x2eaa897,
+ 0x372e50b,0x29e5d19,0x00c4c18 } },
+ /* 142 */
+ { { 0x13375ac,0x389a056,0x211310e,0x2f9f757,0x04f3288,0x103cd4e,
+ 0x17b2fb2,0x2c78a6a,0x09f1de6,0x23e8442,0x1351bc5,0x1b69588,
+ 0x285b551,0x0464b7e,0x00573b6 },
+ { 0x0ba7df5,0x259a0db,0x2b4089e,0x05630a2,0x3f299be,0x350ff2f,
+ 0x1c9348a,0x3becfa4,0x3cc9a1c,0x17a6ef1,0x338b277,0x2b761d9,
+ 0x2aa01c8,0x3cb9dd7,0x006e3b1 } },
+ /* 143 */
+ { { 0x277788b,0x16a222d,0x173c036,0x310ff58,0x2634ae8,0x392636f,
+ 0x0987619,0x1e6acc1,0x26dc8f7,0x242310f,0x0c09aca,0x22b8e11,
+ 0x0d17006,0x1c2c806,0x002380c },
+ { 0x297c5ec,0x1fef0e8,0x3948cf7,0x14f2915,0x2dacbc8,0x0dafb1f,
+ 0x10de043,0x31184da,0x06414ee,0x3c9aeeb,0x1f713ab,0x308f1f8,
+ 0x1569ed1,0x3f379bf,0x00f08bb } },
+ /* 144 */
+ { { 0x0770ee3,0x058fd21,0x17065f8,0x251d128,0x10e0c7f,0x06cb51b,
+ 0x0f05f7e,0x3666a72,0x3e7d01f,0x2d05fab,0x11440e5,0x28577d4,
+ 0x2fbcf2b,0x14aa469,0x00dc5c5 },
+ { 0x270f721,0x1c75d28,0x085b862,0x1d68011,0x132c0a0,0x37be81d,
+ 0x1a87e38,0x083fa74,0x3acbf0d,0x16d6429,0x0feda1f,0x031070a,
+ 0x2ec2443,0x21e563d,0x00454d2 } },
+ /* 145 */
+ { { 0x0525435,0x1e98d5f,0x3dbc52b,0x1fcdf12,0x13d9ef5,0x3ff311d,
+ 0x393e9ed,0x3cef8ae,0x2987710,0x3bdee2e,0x21b727d,0x3ba1b68,
+ 0x10d0142,0x3c64b92,0x0055ac3 },
+ { 0x0c1c390,0x38e9bb0,0x1e7b487,0x11511b3,0x1036fb3,0x25aba54,
+ 0x1eb2764,0x048d022,0x0d971ed,0x1bb7fb5,0x100f0b4,0x06c3756,
+ 0x2f0d366,0x3c6e160,0x0011bd6 } },
+ /* 146 */
+ { { 0x36bc9d1,0x24d43c1,0x12c35cf,0x2fb3cf3,0x015d903,0x16bc0c7,
+ 0x0fc8c22,0x3195c87,0x2488b1c,0x1f82b4c,0x30014e8,0x27ee58d,
+ 0x31658dd,0x1684a5f,0x00f0f3a },
+ { 0x1f703aa,0x023eebc,0x20babb9,0x080bd9d,0x12f9cc4,0x1a8e2d4,
+ 0x0eec666,0x1176803,0x33005d6,0x1137b68,0x37de339,0x33d71cb,
+ 0x0c906b9,0x14086b5,0x00aeef6 } },
+ /* 147 */
+ { { 0x219045d,0x0f22c5e,0x024c058,0x00b414a,0x0ae7c31,0x3db3e96,
+ 0x234979f,0x0cf00a8,0x3c962c7,0x27fa77f,0x1c0c4b0,0x1fe8942,
+ 0x218053a,0x1eed3f8,0x0051643 },
+ { 0x2a23ddb,0x138f570,0x104e945,0x21ca270,0x30726d8,0x3f45490,
+ 0x37d9184,0x242ea25,0x33f6d77,0x3f15679,0x065af85,0x34fa1f5,
+ 0x2e46b8f,0x31d17fb,0x00a2615 } },
+ /* 148 */
+ { { 0x335167d,0x181ea10,0x0887c8d,0x01383d7,0x18b42d8,0x263447e,
+ 0x1f13df3,0x0319d7e,0x0872074,0x2d6aa94,0x23d9234,0x36a69aa,
+ 0x0bad183,0x3138a95,0x00bd3a5 },
+ { 0x1b0f658,0x0e4530b,0x373add1,0x1b968fc,0x329dcb6,0x09169ca,
+ 0x162df55,0x0211eff,0x02391e4,0x3867460,0x3136b1a,0x37dd36e,
+ 0x3bc5bd9,0x2dacfe4,0x0072a06 } },
+ /* 149 */
+ { { 0x119d96f,0x067b0eb,0x00996da,0x293eca9,0x2b342da,0x1889c7a,
+ 0x21633a6,0x0152c39,0x281ce8c,0x18ef3b3,0x0bd62dc,0x3238186,
+ 0x38d8b7c,0x3867b95,0x00ae189 },
+ { 0x0ed1eed,0x1e89777,0x13ab73e,0x029e1d7,0x2c1257f,0x33fbc09,
+ 0x32d5a21,0x3d870b2,0x39bb1fd,0x33663bc,0x24e83e6,0x239bda4,
+ 0x3088bcd,0x01db1ed,0x00d71e7 } },
+ /* 150 */
+ { { 0x14245bf,0x0da0c27,0x153b339,0x05cab0a,0x122d962,0x1b0f0f3,
+ 0x3f5a825,0x267a2ce,0x2910d06,0x254326f,0x0f36645,0x025118e,
+ 0x37c35ec,0x36e944e,0x006c056 },
+ { 0x05ab0e3,0x29aa0c1,0x1295687,0x1fd1172,0x08d40b5,0x05bd655,
+ 0x345048a,0x02a1c3c,0x2393d8f,0x0992d71,0x1f71c5e,0x18d4e8a,
+ 0x30dd410,0x11d61d3,0x00dd58b } },
+ /* 151 */
+ { { 0x2230c72,0x30213d8,0x05e367e,0x329204e,0x0f14f6c,0x3369ddd,
+ 0x0bb4074,0x2edafd6,0x1b1aa2d,0x0785404,0x0c035ab,0x220da74,
+ 0x1f2fdd4,0x092a091,0x00ef83c },
+ { 0x3dc2538,0x1cca3e7,0x246afb5,0x24c647f,0x0798082,0x0bb7952,
+ 0x0f5c443,0x008b38a,0x299ea1a,0x3c6cf36,0x3df2ec7,0x398e6dc,
+ 0x29a1839,0x1cadd83,0x0077b62 } },
+ /* 152 */
+ { { 0x25d56d5,0x3546f69,0x16e02b1,0x3e5fa9a,0x03a9b71,0x2413d31,
+ 0x250ecc9,0x1d2de54,0x2ebe757,0x2a2f135,0x2aeeb9a,0x0d0fe2b,
+ 0x204cb0e,0x07464c3,0x00c473c },
+ { 0x24cd8ae,0x0c86c41,0x221c282,0x0795588,0x1f4b437,0x06fc488,
+ 0x0c81ecd,0x020bf07,0x3a9e2c8,0x2294a81,0x3a64a95,0x0363966,
+ 0x32c9a35,0x0f79bec,0x0029e4f } },
+ /* 153 */
+ { { 0x289aaa5,0x2755b2e,0x059e0aa,0x3031318,0x0f0208a,0x35b7729,
+ 0x00d9c6b,0x3dd29d0,0x075f2c2,0x0ece139,0x31562dd,0x04187f2,
+ 0x13b8d4c,0x0920b85,0x003924e },
+ { 0x09808ab,0x2e36621,0x2a36f38,0x1829246,0x229bf32,0x20883b7,
+ 0x159ada8,0x3108a14,0x15bbe5b,0x1e2d1e4,0x1730096,0x0d35cbb,
+ 0x15d0da9,0x0e60b94,0x00c4f30 } },
+ /* 154 */
+ { { 0x31de38b,0x27b9086,0x2760e3e,0x169098d,0x2a124e2,0x00596c6,
+ 0x3f73c09,0x0d31642,0x2341464,0x248600a,0x2e1fa10,0x2aa0fc8,
+ 0x051e954,0x00f3b67,0x001d4bd },
+ { 0x18751e6,0x25a8e1e,0x07f5c2d,0x17e30d4,0x0ed2723,0x23093e2,
+ 0x3b80e2c,0x13de2d7,0x2fad37f,0x1be1cfb,0x3224ba9,0x0a7f5d3,
+ 0x1714972,0x06667b7,0x009dcd9 } },
+ /* 155 */
+ { { 0x294f22a,0x3e06993,0x0341ee9,0x24bdc7b,0x2e56098,0x2660a13,
+ 0x018ddda,0x2c261b2,0x2953b54,0x267f51c,0x0e8a7cc,0x29ab00c,
+ 0x3a38247,0x397ac81,0x00de684 },
+ { 0x36b956b,0x347b34a,0x35834bd,0x053c06c,0x0090844,0x148cec5,
+ 0x380b325,0x2f17b8b,0x054ef5e,0x09683fb,0x3f8b29a,0x33c979a,
+ 0x1e01474,0x3e81fca,0x001c757 } },
+ /* 156 */
+ { { 0x30fdfe4,0x2d712ba,0x13671bc,0x2cfc226,0x3d7c649,0x16f020e,
+ 0x368e3f0,0x2981ebb,0x246a78a,0x115e81b,0x21223a4,0x04dbb30,
+ 0x1a50ba2,0x12114bd,0x0089bd6 },
+ { 0x055f15a,0x1046e51,0x00fd724,0x1c022a7,0x323dfa9,0x36d8efb,
+ 0x0da4d16,0x0910dec,0x2c1fb16,0x2dbe29f,0x298284f,0x2b273bb,
+ 0x26022c1,0x20accd5,0x00085a5 } },
+ /* 157 */
+ { { 0x01f138a,0x2d87e7b,0x0c2815c,0x0c19a3c,0x311c9a2,0x3e4fce3,
+ 0x029729d,0x21236b2,0x2984048,0x3f3bc95,0x2bba8fb,0x1a1b680,
+ 0x0619a3f,0x29e0447,0x00ed5fe },
+ { 0x2d1c833,0x3dcef35,0x3f809b4,0x01a1b9e,0x1509516,0x10ac754,
+ 0x2735080,0x27b0a8a,0x2495fb8,0x0a7bdba,0x1ef8b89,0x00233a5,
+ 0x0568bf1,0x1a126ba,0x0078a7e } },
+ /* 158 */
+ { { 0x0470cd8,0x20e9f04,0x30003fe,0x20be1b7,0x1927346,0x2a5026d,
+ 0x1ac06bd,0x2717ed7,0x2609493,0x3079ea5,0x1cc116d,0x31b0541,
+ 0x2c8ccde,0x10219ae,0x001a52b },
+ { 0x2864045,0x0e8d95b,0x2fc1530,0x0aa44e7,0x345eae7,0x3cc7553,
+ 0x3ec6466,0x229b60e,0x06f6e95,0x00bed2a,0x0ff4403,0x181c639,
+ 0x2e0df67,0x1f8fa46,0x0000811 } },
+ /* 159 */
+ { { 0x04310a2,0x20cee8e,0x09fc5d5,0x3707f5b,0x0bdfb4e,0x12713ee,
+ 0x24f1028,0x0787ee6,0x39a581c,0x3797ec8,0x10a9746,0x112cb9f,
+ 0x142b9ba,0x1da0ef6,0x0078f7b },
+ { 0x07607ae,0x3232872,0x2a7e076,0x0bb572a,0x182b23c,0x1d8f918,
+ 0x181f392,0x37c45a9,0x24a3886,0x0b2a297,0x264e7f2,0x1fa433c,
+ 0x0fcfcc8,0x21c0857,0x0004f74 } },
+ /* 160 */
+ { { 0x01d161c,0x1744585,0x2d17528,0x03a4f13,0x267cd2e,0x30d861f,
+ 0x062a647,0x213284b,0x139ed25,0x27d4ca5,0x02fbbd6,0x31ddf11,
+ 0x3c50ac4,0x1dd86f7,0x00107de },
+ { 0x16beebd,0x1b7317a,0x2151997,0x256a196,0x3be2aff,0x3621cab,
+ 0x0a9da19,0x05f3038,0x23da63c,0x3178d5e,0x215cc67,0x07f7f63,
+ 0x0c6d8d3,0x3bf5e5c,0x00c44bb } },
+ /* 161 */
+ { { 0x00c62f1,0x3e0f893,0x1572703,0x3b93865,0x19b1e28,0x389b33b,
+ 0x02858bf,0x0e3e9aa,0x04bc436,0x234e072,0x25ba43d,0x3dca19e,
+ 0x0274394,0x20f442e,0x003b4a7 },
+ { 0x176451e,0x2b5ed5d,0x35c8ee1,0x25c52da,0x0c3d0b5,0x32b306e,
+ 0x030954f,0x275ecf7,0x10e472c,0x21577c4,0x02f8a32,0x321bb5c,
+ 0x0098f97,0x104e237,0x00d0433 } },
+ /* 162 */
+ { { 0x0a8f2fe,0x034548b,0x141f1a6,0x121246f,0x1616409,0x237f80d,
+ 0x2e29a55,0x1218db6,0x3ea278e,0x1669856,0x1ad7c8e,0x36d11de,
+ 0x2c2fcbb,0x18c0b3a,0x001c706 },
+ { 0x1699b4b,0x2d531a6,0x17e85e2,0x1b48e78,0x2b509ca,0x2818ea0,
+ 0x0165fee,0x0b809ca,0x09db6a2,0x3dad798,0x326ee1d,0x204e416,
+ 0x091fa12,0x1c890e5,0x0007b9f } },
+ /* 163 */
+ { { 0x0ff4e49,0x0bb0512,0x0129159,0x05db591,0x03e4e9f,0x055ab30,
+ 0x0f82881,0x0ac2deb,0x3a8bb09,0x356a8d2,0x3d38393,0x03e4089,
+ 0x38187cd,0x1377a93,0x0041672 },
+ { 0x0139e73,0x3990730,0x187d3c4,0x33e4793,0x2e0fe46,0x2ad87e2,
+ 0x33c792c,0x21d4fb6,0x1e4d386,0x2932d1b,0x20f1098,0x1270874,
+ 0x0ea6ee4,0x0167d6e,0x005e5fd } },
+ /* 164 */
+ { { 0x1856031,0x2b7519d,0x3bd07fc,0x337abcb,0x089c7a4,0x2a1f120,
+ 0x3523ce7,0x2ba406b,0x09561d9,0x1797f04,0x3cdb95f,0x2d6193e,
+ 0x32c7d3f,0x223aed6,0x00beb51 },
+ { 0x2e65825,0x158f0ce,0x16413d1,0x310395f,0x3116854,0x250baf4,
+ 0x373d341,0x156cc47,0x104c069,0x0893716,0x195a0a6,0x035320e,
+ 0x37b7d8a,0x21b5755,0x00fb26b } },
+ /* 165 */
+ { { 0x286ae17,0x04239f1,0x1a56c53,0x0e74707,0x29090d7,0x2bb142b,
+ 0x03b0139,0x1aac916,0x08ba49a,0x0376682,0x3382f85,0x064bbab,
+ 0x2910e28,0x1d5bd7f,0x00cc8df },
+ { 0x0ab7630,0x208e8e7,0x3fc1877,0x26bee39,0x264984a,0x192ff05,
+ 0x08ef9c3,0x0aa6951,0x071c44e,0x26eed3e,0x035c95e,0x06906ad,
+ 0x10a0690,0x397eaa9,0x00c6c23 } },
+ /* 166 */
+ { { 0x034d8dd,0x005b064,0x279bb78,0x12c2c4f,0x1856bb4,0x0c90681,
+ 0x06409ab,0x3b48617,0x19a2d78,0x0a34bf8,0x326eddf,0x31f09b5,
+ 0x04f04dc,0x3d7c944,0x003ccaf },
+ { 0x321f843,0x35fb71a,0x1e4c397,0x377a5d7,0x2da88e4,0x3d6ada7,
+ 0x33d3964,0x1b30149,0x0e39aae,0x054dda0,0x3e6f946,0x1273394,
+ 0x3ffd3f7,0x2f6655e,0x00021dd } },
+ /* 167 */
+ { { 0x37233cf,0x11617dd,0x26f07b6,0x3d8250a,0x0fe6771,0x3f9bbbc,
+ 0x2aba7ad,0x200a58d,0x3568603,0x198eefa,0x1e8fcf3,0x3b9610b,
+ 0x20524ac,0x2a67528,0x0048d9a },
+ { 0x1a5e57a,0x1e9d303,0x16c9cff,0x0f39527,0x3c23259,0x03c8a1e,
+ 0x104bccf,0x182d5a1,0x18dbc83,0x05b5f42,0x1b402f4,0x317c525,
+ 0x11bf1ea,0x3c46e1f,0x0061936 } },
+ /* 168 */
+ { { 0x0153a9d,0x36859ee,0x2cf0aa9,0x2b27a0f,0x0a49fe3,0x2d984e1,
+ 0x018f8e1,0x1378453,0x1ab3843,0x1987093,0x283dae9,0x25cf0e8,
+ 0x14fc93d,0x280609d,0x00c99ba },
+ { 0x026b1e3,0x34663d3,0x2202477,0x21a9d45,0x212e8e1,0x18ab77e,
+ 0x2e52f63,0x0a14ce1,0x295c396,0x00c7a3d,0x2aaedb6,0x30abc4d,
+ 0x374acde,0x1318a73,0x00fcfdb } },
+ /* 169 */
+ { { 0x0a40298,0x3ba5633,0x11956b3,0x14fcbd7,0x3c38781,0x34bab96,
+ 0x165630e,0x1f3c831,0x37e3a69,0x2b4226c,0x2d5029e,0x3b4ab1e,
+ 0x1da6ac2,0x3eb43c3,0x007e5cd },
+ { 0x1b86202,0x109b7f6,0x2054f98,0x2c50cd7,0x2ed1960,0x3c518e7,
+ 0x1b02463,0x319c07f,0x1c30db6,0x045fdc2,0x373421e,0x31a1eb9,
+ 0x1a8acbf,0x31289b0,0x0013fef } },
+ /* 170 */
+ { { 0x3fa0a5f,0x068661f,0x2109e36,0x00b18ff,0x1f4b261,0x31d3844,
+ 0x0acbc56,0x3aebc99,0x1fa77ab,0x152bd11,0x24cddb7,0x2313f74,
+ 0x06eea44,0x15f5114,0x000b131 },
+ { 0x2e9993d,0x1ac565c,0x2cbe22a,0x3921797,0x12c3c57,0x360f868,
+ 0x33560bf,0x320ee99,0x382c3b8,0x39af88f,0x00bbe38,0x2c4ea59,
+ 0x3399b40,0x00ceb45,0x0066eea } },
+ /* 171 */
+ { { 0x0c6c693,0x31ba56d,0x3d3849f,0x378dabd,0x0efc735,0x17f90bf,
+ 0x13343d3,0x2df0f81,0x27c6a9a,0x13c2a90,0x0a0fcb2,0x27c10d9,
+ 0x3bc50c7,0x090e4fa,0x0016287 },
+ { 0x2927e1e,0x35af405,0x184c5c3,0x3499cee,0x240158e,0x33522e6,
+ 0x386fc84,0x0a0b69f,0x1a660ea,0x34590fb,0x22a1bee,0x2ce4fab,
+ 0x31a9445,0x0e78655,0x00664c8 } },
+ /* 172 */
+ { { 0x3eeaf94,0x115d409,0x21e7577,0x097aa67,0x22875c9,0x021ab7a,
+ 0x27e7ba5,0x1093f04,0x2a086fe,0x05d9494,0x2b6c028,0x10f31b0,
+ 0x1312d11,0x262759c,0x00c9bb2 },
+ { 0x1acb0a5,0x30cdf14,0x0f78880,0x0574f18,0x1a37109,0x098adbb,
+ 0x2113c09,0x2060925,0x1f89ce4,0x1974976,0x3381358,0x2dab5ca,
+ 0x2159c53,0x3af1303,0x000ea3b } },
+ /* 173 */
+ { { 0x1e49bea,0x29142b1,0x1a59cab,0x055f017,0x0684e54,0x39eb0db,
+ 0x29cab9d,0x255ee8b,0x35f2e6f,0x05329e6,0x09b817b,0x1ec091c,
+ 0x1df0fef,0x2641f62,0x00eb304 },
+ { 0x2fe5096,0x3dcc1d1,0x2aaf508,0x3a0b813,0x0695810,0x144bddb,
+ 0x2f1bd93,0x281ae23,0x3513ebc,0x1ddd984,0x0cf158b,0x35218eb,
+ 0x257daf7,0x391253b,0x00b2a81 } },
+ /* 174 */
+ { { 0x153e6ba,0x22396db,0x0ea2ff2,0x2a45121,0x0a90de1,0x34cf23b,
+ 0x2db60ce,0x1a900be,0x2f328b6,0x355e75b,0x2c24372,0x0b75b77,
+ 0x2ec7d4f,0x3f24759,0x00e9e33 },
+ { 0x39eab6e,0x2267480,0x3b5e110,0x1e8fa5e,0x2a31a66,0x3f739a3,
+ 0x00166dc,0x3552d88,0x3ae5137,0x3efa0fa,0x0800acd,0x17df61d,
+ 0x38c8608,0x04cc31b,0x00cf4ab } },
+ /* 175 */
+ { { 0x31e08fb,0x1961164,0x22c003f,0x078541b,0x3643855,0x30da587,
+ 0x11f0dc9,0x324595e,0x329e3dc,0x29a041e,0x3495d2c,0x0908dd3,
+ 0x1895b83,0x198dbb9,0x00d8cfb },
+ { 0x0349b1b,0x383c5a8,0x2b86525,0x1b1283e,0x133cd2c,0x2be376a,
+ 0x012ee82,0x1eb4d1b,0x0ba71e9,0x01f3109,0x37621eb,0x1d9b77c,
+ 0x0d39069,0x3d5a97c,0x0095565 } },
+ /* 176 */
+ { { 0x20f5e94,0x1eefc86,0x1327e0e,0x054760b,0x2f771e1,0x3ac447e,
+ 0x033e3dc,0x198e040,0x04dd342,0x1b49a5d,0x00d01ef,0x3cb6768,
+ 0x1ceafbd,0x31c6812,0x001cb80 },
+ { 0x221c677,0x060ca27,0x398b17f,0x0146723,0x36452af,0x02d9e65,
+ 0x39c5f78,0x3cf50d6,0x0be40f8,0x2970b87,0x26d667c,0x3e45959,
+ 0x16e7943,0x01673e7,0x009faaa } },
+ /* 177 */
+ { { 0x2078fe6,0x0918602,0x11dd8ad,0x399193f,0x0f6cc73,0x0f8dd12,
+ 0x2ce34dc,0x06d7d34,0x0c5e327,0x0989254,0x2fc5af7,0x2443d7b,
+ 0x32bc662,0x2fe2a84,0x008b585 },
+ { 0x039327f,0x08e616a,0x252f117,0x1f52ab0,0x234e2d2,0x0a5b313,
+ 0x2f59ef6,0x0f7a500,0x15c4705,0x2c02b81,0x28b4f09,0x08aa5c8,
+ 0x0180efc,0x0993e83,0x00a9e86 } },
+ /* 178 */
+ { { 0x0310ecc,0x2d8892f,0x14ed0b7,0x3c59fe8,0x08a1a74,0x0850e57,
+ 0x1d09607,0x044a21f,0x109f5c9,0x237c6cf,0x06b264a,0x3fc8f1a,
+ 0x0d4c539,0x2740f96,0x00dc2d4 },
+ { 0x1d6f501,0x0adf4ea,0x14f7215,0x0930102,0x3f4c32e,0x24e2643,
+ 0x366596d,0x081ff18,0x38f94fb,0x2c21341,0x328594c,0x267c75c,
+ 0x196b3fd,0x29932cb,0x0036def } },
+ /* 179 */
+ { { 0x3ed7cbe,0x26de044,0x3d0e461,0x0565e12,0x295e500,0x31dc17f,
+ 0x32251c2,0x3420ca8,0x3995f0d,0x2e8ddab,0x0361a45,0x10971b0,
+ 0x11e7b55,0x33bc7ca,0x00812d2 },
+ { 0x3d94972,0x1606817,0x0383ccf,0x0e795b7,0x026e20e,0x0f6fefc,
+ 0x13685d6,0x315d402,0x0cc36b8,0x1c7f059,0x390ef5e,0x316ae04,
+ 0x08c66b9,0x2fac9a4,0x0040086 } },
+ /* 180 */
+ { { 0x3e3c115,0x153de4d,0x1a8ae5e,0x2330511,0x169b8ee,0x1d965c2,
+ 0x2edff2b,0x3ef99e6,0x1631b46,0x1f8a238,0x118d7bb,0x12113c3,
+ 0x26424db,0x0f4122a,0x00e0ea2 },
+ { 0x3d80a73,0x30393bc,0x0f98714,0x278ef59,0x087a0aa,0x3b18c20,
+ 0x04b8a82,0x2068e21,0x030255d,0x3382b27,0x055397f,0x05448dd,
+ 0x2015586,0x1190be0,0x000b979 } },
+ /* 181 */
+ { { 0x2e03080,0x2895692,0x09fb127,0x2d1602a,0x1232306,0x105bd4e,
+ 0x28cd6a6,0x0a83813,0x1ee13b0,0x2abadc3,0x0c09684,0x00e33e1,
+ 0x033eea3,0x30f0a39,0x00a710e },
+ { 0x01b1f7d,0x1c959da,0x017077a,0x254bf0a,0x086fbce,0x15cd6b2,
+ 0x008683f,0x23a4f4d,0x22a6bd6,0x14e8c93,0x0027d15,0x31d0d4f,
+ 0x271777e,0x1533510,0x00ab603 } },
+ /* 182 */
+ { { 0x34c209d,0x14d0abb,0x270432a,0x1d02358,0x22ba752,0x209757f,
+ 0x34af6fc,0x1ffc52e,0x1ced28e,0x1870e46,0x1e0340f,0x3f0bf73,
+ 0x33ba91d,0x2ebca7c,0x00c6580 },
+ { 0x1d442cb,0x0879d50,0x24e4ae1,0x3f4e91c,0x04c7727,0x093cd1d,
+ 0x16d6a45,0x10a8b95,0x0c77856,0x361f84f,0x217845f,0x0bbeec6,
+ 0x0485718,0x33c5385,0x00dcec0 } },
+ /* 183 */
+ { { 0x1539819,0x225507a,0x1bf11cb,0x13e7653,0x0c8cb3b,0x05f695e,
+ 0x353f634,0x2827874,0x3fb8053,0x22de9a5,0x035d8b7,0x2105cc7,
+ 0x2a7a98d,0x35bed95,0x0085748 },
+ { 0x1859c5d,0x00e51f0,0x22a21fd,0x3054d74,0x06ce965,0x328eab7,
+ 0x26a13e0,0x13bfc65,0x01d4fb1,0x36600b9,0x36dd3fc,0x01232ed,
+ 0x15bbaa9,0x0ad7a51,0x0089b18 } },
+ /* 184 */
+ { { 0x3360710,0x1eb5a90,0x136bd77,0x3bd57a6,0x0841287,0x12886c9,
+ 0x35c6700,0x21bc6eb,0x25f35ad,0x3bcb01c,0x0707e72,0x23e9943,
+ 0x03e5233,0x34bb622,0x002bf8e },
+ { 0x16e0d6a,0x04b3d2d,0x290cb02,0x049a10c,0x350537e,0x22cf71b,
+ 0x3184a19,0x2dc8b62,0x2350210,0x3b4afa6,0x159781e,0x1d01b6d,
+ 0x1853440,0x16442f0,0x005a78d } },
+ /* 185 */
+ { { 0x348b02c,0x1ea8ab5,0x3b954d5,0x14684ac,0x0be5b34,0x11c4496,
+ 0x0a7a456,0x14f6eb7,0x11a3221,0x2d65f82,0x32eb1ea,0x09c4018,
+ 0x3f301f3,0x32e8a1c,0x00bd9ad },
+ { 0x0543f7f,0x31e744e,0x1fefd1d,0x24a486c,0x1000220,0x3977e3b,
+ 0x1b3ef51,0x2512a1b,0x2049e6b,0x122232b,0x391a32b,0x2f4a7b1,
+ 0x1c13e71,0x081a9b4,0x00d3516 } },
+ /* 186 */
+ { { 0x1924f43,0x1ae5495,0x28d52ef,0x2b93e77,0x2d2f401,0x371a010,
+ 0x33e8d7a,0x06ed3f1,0x30c0d9d,0x2589fa9,0x3bf3567,0x2ecf8fa,
+ 0x2dee4c3,0x152b620,0x007e8a2 },
+ { 0x1924407,0x01bd42d,0x044a089,0x18686b5,0x2f14a0e,0x17cdce3,
+ 0x0efa216,0x3c586a8,0x1d6ae71,0x375831f,0x3175894,0x20e43eb,
+ 0x34c009e,0x3480527,0x00d115c } },
+ /* 187 */
+ { { 0x12abf77,0x38b0769,0x25682f2,0x295508c,0x0c2a0dc,0x1259b73,
+ 0x023ea25,0x340e7b5,0x3c7cd0d,0x1f92324,0x176405c,0x1528894,
+ 0x18f2e1e,0x2c59c35,0x001efb5 },
+ { 0x0fb1471,0x07e7665,0x141da75,0x07d9f4a,0x0fdb31e,0x0dccda6,
+ 0x074eb25,0x3d92a9b,0x11189a0,0x1b4c557,0x24b8d2b,0x0533f92,
+ 0x0e9e344,0x2fa3dea,0x008d5a4 } },
+ /* 188 */
+ { { 0x2669e98,0x1ad3514,0x2a035c9,0x08a3f50,0x24547f9,0x0a145d3,
+ 0x1c1319d,0x3fe833d,0x1ae064b,0x1e01734,0x246d27e,0x3a2f13c,
+ 0x01e1150,0x263f55e,0x00f89ef },
+ { 0x2e0b63f,0x3e57db7,0x23a4b4f,0x11c8899,0x0ad8500,0x348f3a7,
+ 0x2918604,0x27d6409,0x1ce5001,0x38f94c2,0x29a508a,0x39bdc89,
+ 0x3a52c27,0x194899e,0x00e9376 } },
+ /* 189 */
+ { { 0x0368708,0x34a2730,0x2e1da04,0x0bd78c1,0x2c45887,0x0c44bfa,
+ 0x3a23de3,0x390b9db,0x1746efd,0x05c638e,0x1d20609,0x3263370,
+ 0x31987f0,0x2988529,0x005fa3c },
+ { 0x0aa9f2a,0x20622f7,0x060deee,0x0c9626a,0x3312cc7,0x18ebac7,
+ 0x008dd6c,0x0ad4fe6,0x3db4ea6,0x1dc3f50,0x090b6e9,0x0aff8d2,
+ 0x26aa62c,0x18f3e90,0x00105f8 } },
+ /* 190 */
+ { { 0x38059ad,0x25e576c,0x3ea00b2,0x1fa4191,0x25686b7,0x2d1ce8f,
+ 0x30470ed,0x3478bbf,0x340f9b6,0x1c9e348,0x3d594ec,0x2ffe56e,
+ 0x3f23deb,0x0cd34e9,0x00f4b72 },
+ { 0x1a83f0b,0x2166029,0x28b32a2,0x06a5c5a,0x20786c4,0x0944604,
+ 0x0901bd2,0x379b84e,0x221e2fe,0x0346d54,0x1f4eb59,0x01b8993,
+ 0x2462e08,0x25f9d8b,0x006c4c8 } },
+ /* 191 */
+ { { 0x0b41d9d,0x2e417ed,0x265bd10,0x199148e,0x3826ca4,0x1a67e8d,
+ 0x1bbd13b,0x23e414d,0x3d773bc,0x356e64c,0x0d2118a,0x0cb587f,
+ 0x25fd093,0x24fb529,0x00158c6 },
+ { 0x2806e63,0x3ecaa39,0x251b4dd,0x3b2d779,0x2e31ed3,0x066f1a6,
+ 0x060e518,0x2c7e3e5,0x0d62c76,0x0d88a70,0x101970a,0x1e3c8c6,
+ 0x272b8bb,0x083e73b,0x0031f38 } },
+ /* 192 */
+ { { 0x09e1c72,0x072bcb0,0x0cf4e93,0x2604a64,0x00715f2,0x10c98b6,
+ 0x2ad81d9,0x234fcce,0x37a7304,0x1974a4a,0x1c7415f,0x14aaa93,
+ 0x19587b1,0x3f643f4,0x00c3d10 },
+ { 0x1ddadd0,0x2cd715d,0x294cf76,0x14479ed,0x19f5f4a,0x0198c09,
+ 0x1ab7ebc,0x182c0bc,0x0879202,0x1807273,0x05d39da,0x2c7d868,
+ 0x29c4ec4,0x1b13ad2,0x006dcd7 } },
+ /* 193 */
+ { { 0x1c83f01,0x0245bff,0x24f90ba,0x112554f,0x2354c8b,0x3f17988,
+ 0x0c511af,0x39e1e9b,0x26ae95b,0x0ae551c,0x35b41a6,0x0120455,
+ 0x1e989cb,0x1b37aff,0x00fa2ae },
+ { 0x324659a,0x1aef1c3,0x1c43637,0x3f530a2,0x313a999,0x326af62,
+ 0x134184e,0x2ac131c,0x3f6a789,0x30a300a,0x13e526e,0x2107af3,
+ 0x093a8ff,0x2479902,0x00442b1 } },
+ /* 194 */
+ { { 0x22b6e20,0x31b18be,0x18614ca,0x26fdb5a,0x197f29e,0x325b44b,
+ 0x0ab1dbb,0x042348a,0x3275e8e,0x15bae44,0x0077124,0x2cf5345,
+ 0x2803ad4,0x188f2a2,0x0061b20 },
+ { 0x2a560b1,0x3ced069,0x3cf42c2,0x100e167,0x3879e1d,0x0936ff0,
+ 0x1b51450,0x14c55f3,0x3153bfa,0x2957423,0x2a93823,0x15f5dce,
+ 0x2c9a22f,0x16731a8,0x00a97f2 } },
+ /* 195 */
+ { { 0x18edbbb,0x18c5ef9,0x1f13c30,0x071e77f,0x225ade5,0x1b60f75,
+ 0x1beaf11,0x3e495ad,0x2441dd8,0x2fa00e2,0x32a87b6,0x00050f2,
+ 0x038de7f,0x0037d6d,0x00a885d },
+ { 0x39e48bd,0x1d9e433,0x2768e9f,0x3c29458,0x3f0bdf9,0x35ed5f2,
+ 0x36709fa,0x176dc10,0x012f7c1,0x2df8547,0x1d90ee3,0x053c089,
+ 0x21a8d35,0x200cb0d,0x002e84e } },
+ /* 196 */
+ { { 0x23ec8d8,0x1d81f55,0x0cb7227,0x07f8e4d,0x2a66181,0x163f577,
+ 0x272e7af,0x131a8f2,0x2046229,0x25e6276,0x36bbefe,0x2cdc22f,
+ 0x17c8288,0x33dd4fb,0x000d524 },
+ { 0x330c073,0x1a6728b,0x1cf369f,0x12e7707,0x2f0fa26,0x17c2abd,
+ 0x0a45680,0x26ebd13,0x3c7d19b,0x1c3d6c8,0x2abd110,0x064fd07,
+ 0x09b8339,0x02b4a9f,0x009e3e1 } },
+ /* 197 */
+ { { 0x0ae972f,0x2093c35,0x06e7a90,0x0af1ba1,0x243eef0,0x2748582,
+ 0x0606122,0x13a45f9,0x0acfe60,0x08a685e,0x0eb184b,0x015bc11,
+ 0x0cdf423,0x157fad5,0x004fcad },
+ { 0x2728d15,0x3e5bceb,0x0331a0f,0x31b1a80,0x28a2680,0x3b94955,
+ 0x04cae07,0x176b57e,0x03ac5a6,0x3d7918b,0x22d23f4,0x0ae077f,
+ 0x1eb075d,0x006f16c,0x006e473 } },
+ /* 198 */
+ { { 0x38219b9,0x0475a2b,0x107a774,0x39946c6,0x1cb883c,0x004e0ed,
+ 0x087e571,0x25c3497,0x059982f,0x0a71f66,0x118305d,0x1aaf294,
+ 0x3a5dbaa,0x34be404,0x00725fe },
+ { 0x3abd109,0x336ebea,0x2528487,0x15a1d61,0x0c0f8cf,0x2b56095,
+ 0x2591e68,0x3549a80,0x1d1debb,0x0701c6c,0x161e7e3,0x1f7fa2e,
+ 0x3dfe192,0x17e6498,0x0055f89 } },
+ /* 199 */
+ { { 0x175645b,0x26c036c,0x0b92f89,0x09ed96d,0x351f3a6,0x19ce67b,
+ 0x33ac8db,0x2f0828b,0x27fe400,0x0b9c5e1,0x1967b95,0x3324080,
+ 0x11de142,0x1d44fb3,0x003d596 },
+ { 0x3979775,0x3af37b6,0x3e88d41,0x2f1a8b9,0x299ba61,0x085413c,
+ 0x1149a53,0x0beb40e,0x31427ba,0x239f708,0x357d836,0x1558c22,
+ 0x280a79f,0x1b255f6,0x002b6d1 } },
+ /* 200 */
+ { { 0x39ad982,0x3d79d89,0x01a684a,0x0b6722e,0x39bb4c9,0x39a6399,
+ 0x1ad44e0,0x3059f5e,0x048265f,0x33a2fa4,0x0c3a4cc,0x0d7df98,
+ 0x23a33f1,0x34e2e21,0x00a0a10 },
+ { 0x386efd9,0x1c91f34,0x06c2e19,0x3e6d48d,0x00eefd3,0x2181ef2,
+ 0x2415f97,0x1d33b08,0x0625086,0x1e8aa3e,0x08c9d60,0x0ab427b,
+ 0x2764fa7,0x3b7943e,0x00cd9f0 } },
+ /* 201 */
+ { { 0x1a46d4d,0x0e471f4,0x1693063,0x0467ac0,0x22df51c,0x127a0f7,
+ 0x0498008,0x20e0b16,0x1aa8ad0,0x1923f42,0x2a74273,0x01761ce,
+ 0x1600ca4,0x187b87e,0x00ee49e },
+ { 0x0c76f73,0x19daf92,0x0b2ad76,0x3d8049d,0x1d9c100,0x0fe1c63,
+ 0x0bb67c8,0x035cc44,0x02002fc,0x37b2169,0x344656a,0x1127879,
+ 0x1939bc0,0x0dd8df6,0x0028ce7 } },
+ /* 202 */
+ { { 0x0544ac7,0x26bdc91,0x042697e,0x356e804,0x1f2c658,0x2ceb7ef,
+ 0x2dec39f,0x02c1dcc,0x391a2df,0x2344beb,0x2171e20,0x3099c94,
+ 0x0fa548a,0x37216c9,0x00f820c },
+ { 0x0f4cf77,0x29bbaa5,0x33c6307,0x34a5128,0x118c783,0x2dd06b1,
+ 0x139d4c0,0x2db912e,0x1153ffb,0x1075eb3,0x3a255e4,0x2892161,
+ 0x36d5006,0x125338c,0x0014fbc } },
+ /* 203 */
+ { { 0x1584e3c,0x0830314,0x00279b9,0x167df95,0x2c7733c,0x2108aef,
+ 0x0ce1398,0x35aaf89,0x012523b,0x3c46b6a,0x388e6de,0x01a2002,
+ 0x0582dde,0x19c7fa3,0x007b872 },
+ { 0x1e53510,0x11bca1f,0x19684e7,0x267de5c,0x2492f8b,0x364a2b0,
+ 0x080bc77,0x2c6d47b,0x248432e,0x3ace44f,0x32028f6,0x0212198,
+ 0x2f38bad,0x20d63f0,0x00122bb } },
+ /* 204 */
+ { { 0x30b29c3,0x3cec78e,0x01510a9,0x0c93e91,0x3837b64,0x1eca3a9,
+ 0x105c921,0x05d42e6,0x1379845,0x07ce6f2,0x0e8b6da,0x0e0f093,
+ 0x220b2cd,0x1f6c041,0x00299f5 },
+ { 0x0afdce3,0x2b0e596,0x2f477b6,0x2ccf417,0x3a15206,0x26ec0bf,
+ 0x2e37e2b,0x2593282,0x0ab9db3,0x2841dd8,0x27954be,0x277a681,
+ 0x03f82e2,0x2b610c7,0x00446a1 } },
+ /* 205 */
+ { { 0x06b8195,0x3b3a817,0x31b9c6f,0x317d279,0x3d744a7,0x1de9eb9,
+ 0x296acc1,0x1ce9ea3,0x06c3587,0x246815d,0x3756736,0x0588518,
+ 0x1c971a4,0x1fde1f4,0x00aa021 },
+ { 0x3fd3226,0x274561d,0x00be61e,0x01393d8,0x30f6f23,0x29b7fc1,
+ 0x04cebc7,0x0a892a7,0x20109f1,0x27456be,0x0c863ee,0x2eb6c8a,
+ 0x38c782b,0x039397a,0x00a2829 } },
+ /* 206 */
+ { { 0x29de330,0x21fe80f,0x145b55b,0x1986570,0x012b260,0x2482fbc,
+ 0x0536e0a,0x16b7382,0x32c4d19,0x1deffdb,0x145f418,0x0c67a76,
+ 0x2ce477f,0x218fe24,0x00f9848 },
+ { 0x3e37657,0x3f074d3,0x245ad0e,0x20973c3,0x23c58de,0x2c332ef,
+ 0x2ad21a8,0x0bf1589,0x208af95,0x1f4a8c4,0x2b43735,0x1e46657,
+ 0x15d4f81,0x0c3e63a,0x005f19d } },
+ /* 207 */
+ { { 0x26865bb,0x20f6683,0x16a672e,0x0efd8d1,0x222f5af,0x18f2367,
+ 0x1e9c734,0x25c3902,0x178dfe6,0x2903a79,0x311b91c,0x1adbbe9,
+ 0x225a387,0x0b3e509,0x0089551 },
+ { 0x34e462b,0x23b6a32,0x27c884c,0x129104b,0x384c015,0x3adedc7,
+ 0x325db1c,0x021dc10,0x1e366f7,0x3054df7,0x1992b9a,0x2824e64,
+ 0x0ae77f3,0x181b526,0x00a7316 } },
+ /* 208 */
+ { { 0x2d260f5,0x2434bf2,0x28c0139,0x0a7bb03,0x176c3be,0x3def5f5,
+ 0x05bee00,0x3692df7,0x3d2efeb,0x3a6f859,0x1122b87,0x38f779a,
+ 0x1415ccc,0x2c260ad,0x0075a28 },
+ { 0x04607a6,0x042f37a,0x3f0df68,0x0a1bd36,0x3c6d581,0x2d36bfa,
+ 0x2d577d1,0x0a3affa,0x0b2066b,0x2e6f110,0x0b17e84,0x3c76a5e,
+ 0x1a57553,0x012f36a,0x0004595 } },
+ /* 209 */
+ { { 0x29e5836,0x0e6808c,0x269d13e,0x147dc5c,0x32c9e7d,0x09b258e,
+ 0x2c58d6f,0x1efd716,0x0437996,0x34ec31b,0x15908d9,0x2efa8fd,
+ 0x09ad160,0x079fc1f,0x00d8481 },
+ { 0x3d20e4a,0x18269d6,0x3aa8fe7,0x34829c2,0x2e4325d,0x0d800e1,
+ 0x11f370b,0x10c08dc,0x22fd092,0x1a5fe55,0x0acc443,0x037030d,
+ 0x1cdd404,0x097379e,0x00fd6d7 } },
+ /* 210 */
+ { { 0x313eafb,0x3f438f3,0x2e5fb3e,0x2ed6a82,0x121009c,0x240889e,
+ 0x00c5537,0x269b792,0x334b2fc,0x1dd573c,0x07096ae,0x19296fc,
+ 0x3813985,0x2742f48,0x00ddd64 },
+ { 0x2045041,0x3842c62,0x1572d0d,0x04f255f,0x06e05b4,0x383ec97,
+ 0x1ff8064,0x18bed71,0x39b6411,0x2764cc5,0x257439f,0x3521217,
+ 0x172aa42,0x342a2a3,0x0070c5b } },
+ /* 211 */
+ { { 0x3bdf646,0x1c5ce25,0x1f7ca76,0x2d2acca,0x3aa1485,0x23c97f7,
+ 0x3e11d6f,0x0609338,0x07ec622,0x01da8ff,0x3392474,0x17ca07f,
+ 0x13a9a04,0x353a5b4,0x0024557 },
+ { 0x14c27cd,0x32012f7,0x3fea875,0x3d03d71,0x211c5f0,0x3157fdf,
+ 0x0c880bd,0x3c406b2,0x2c51103,0x24ab377,0x399faa8,0x0d06887,
+ 0x16b5738,0x28b33a7,0x00c7b67 } },
+ /* 212 */
+ { { 0x2357586,0x35c93e3,0x0da09a0,0x3d77d92,0x11d7f4f,0x37b98a9,
+ 0x3e6c9bf,0x2cdca70,0x2f00389,0x2412673,0x18eab87,0x0101436,
+ 0x11617e9,0x06d9b01,0x00e8eef },
+ { 0x37e3ca9,0x16ffaf0,0x391debf,0x1b69382,0x07c5e94,0x312fa8a,
+ 0x0973142,0x2cadde4,0x109ee67,0x3a07db0,0x1afc5ed,0x08df66f,
+ 0x304c7af,0x0804aae,0x00d2e60 } },
+ /* 213 */
+ { { 0x24f57bf,0x1818322,0x182a615,0x25bfc44,0x0f97586,0x0a5bbc0,
+ 0x36773c6,0x1a2660c,0x3ceff66,0x3270152,0x319cd11,0x2845845,
+ 0x1acfad6,0x19076f8,0x009824a },
+ { 0x289fd01,0x2de97ee,0x39d80b7,0x026227d,0x0f8d3b8,0x15e0a17,
+ 0x21ea08f,0x20a2317,0x136ae6d,0x3deb1d1,0x3521ef5,0x0de8801,
+ 0x0a25d5d,0x0612c98,0x005ecc4 } },
+ /* 214 */
+ { { 0x308c8d3,0x3aec669,0x01ecddc,0x13f18fe,0x1e63ed0,0x061cfe5,
+ 0x05f5a01,0x1db5741,0x14479f2,0x0ced6b5,0x025ae5b,0x09ca8f5,
+ 0x2160581,0x1404433,0x008bfeb },
+ { 0x08228bf,0x0e02722,0x37df423,0x33ecabf,0x34bd82a,0x32f529f,
+ 0x28f1800,0x0c8f671,0x1246b44,0x1ff35dc,0x091db95,0x303f3da,
+ 0x28f7f60,0x3624136,0x00cfbb4 } },
+ /* 215 */
+ { { 0x326139a,0x2977e4e,0x3eb89a6,0x20ecb31,0x13e076a,0x2a592f3,
+ 0x28e82d5,0x235ad1e,0x239b927,0x262938a,0x2444354,0x141b263,
+ 0x0d56693,0x2a3fc78,0x0006497 },
+ { 0x31efa05,0x3a3664a,0x3e333de,0x2a114e4,0x12da63c,0x3c15e6b,
+ 0x2f7277c,0x363aa92,0x2393236,0x16bd2d1,0x32b617f,0x32b656c,
+ 0x3b1246c,0x22e2e22,0x00ce76d } },
+ /* 216 */
+ { { 0x03843dc,0x094de82,0x13b463d,0x0507905,0x089eb35,0x2a6bf25,
+ 0x35ebc4e,0x2bb5d45,0x1808ed1,0x1de9949,0x185e829,0x0a55847,
+ 0x0b73d67,0x1a2ed61,0x008dd2d },
+ { 0x133c3a4,0x04e7980,0x38ea237,0x2ad2f49,0x19de838,0x018bf36,
+ 0x29b072c,0x21c1ba0,0x14f63ba,0x31c1cc3,0x13cd05e,0x20120ff,
+ 0x1f84d60,0x16e0321,0x00872ab } },
+ /* 217 */
+ { { 0x19d4d49,0x1ddb4e6,0x05e7fc0,0x37bb0fd,0x1a3eb59,0x36b87f0,
+ 0x190e440,0x1c7fef2,0x31ea153,0x14cd65a,0x1bc7ab2,0x11f72ca,
+ 0x39582d4,0x0fa4d65,0x00cd5b6 },
+ { 0x3d1ff11,0x0d9be9d,0x2903ae3,0x017b7b9,0x259f28f,0x110cefc,
+ 0x03fed1a,0x38039bd,0x09bdf9c,0x3055027,0x2ca9c5d,0x2d737b6,
+ 0x3bdb421,0x16560b5,0x00f9f33 } },
+ /* 218 */
+ { { 0x022c792,0x110de25,0x38bf959,0x08f2562,0x1239ea9,0x3c1d950,
+ 0x21a247d,0x315112d,0x285bb9f,0x2534a73,0x0b42455,0x1a4a99c,
+ 0x069009a,0x1680392,0x006e0ca },
+ { 0x1b3bece,0x269e0a1,0x18926b7,0x0e7187e,0x241f35e,0x39d1fe0,
+ 0x02099aa,0x1675bfe,0x23fd0ca,0x3d6322b,0x19406b5,0x324c38a,
+ 0x242434a,0x3ae677c,0x002ce04 } },
+ /* 219 */
+ { { 0x2c37b82,0x1ae6506,0x0d83436,0x23496c1,0x0ff0c72,0x2711edf,
+ 0x1513611,0x04f9c7d,0x1edbeff,0x376fcb5,0x212a683,0x23bf547,
+ 0x0f9c4f7,0x16e6627,0x0082cd8 },
+ { 0x0cb5d37,0x31b6db8,0x1a15e23,0x2f5cbb8,0x0818aee,0x21dc6c5,
+ 0x12aafd2,0x205f608,0x1d91def,0x3def088,0x1445c51,0x3100e8a,
+ 0x3746bda,0x145c4b0,0x00711b0 } },
+ /* 220 */
+ { { 0x2a99ecc,0x27b5217,0x35e10ed,0x036e32a,0x0f79950,0x15c32f7,
+ 0x2c87dcb,0x3ebb2a3,0x2c2d35d,0x114b3ec,0x2e4d80a,0x0c7eb89,
+ 0x2abe58d,0x3727737,0x00e6a37 },
+ { 0x1eca452,0x1968d07,0x344e5d3,0x29435a2,0x109a5f8,0x181d12c,
+ 0x238ea5a,0x127a564,0x00dbb42,0x0fcbfb7,0x2909b2e,0x2571d3a,
+ 0x08250e3,0x0694e4e,0x00e156d } },
+ /* 221 */
+ { { 0x3181ae9,0x1acf411,0x3808d79,0x2a11065,0x0baf44b,0x133cfeb,
+ 0x1330943,0x1711b9a,0x2dec3bd,0x1906a9a,0x2ed947c,0x369d763,
+ 0x1a5254f,0x104a7a9,0x00acd9d },
+ { 0x030301b,0x31568f5,0x2a4965c,0x33ded4b,0x03c9a5b,0x16541fc,
+ 0x1319cf1,0x2a3748b,0x1b5de74,0x18bb82e,0x077ac2b,0x309a87a,
+ 0x3c31420,0x0f6a4b9,0x00387d7 } },
+ /* 222 */
+ { { 0x0d3fdac,0x120cfa3,0x1b8e13c,0x1ccccb9,0x376fcd4,0x0bf87f4,
+ 0x271b4be,0x363b3fd,0x28b5d98,0x0535cd3,0x114bbc1,0x3ab4f19,
+ 0x10494b1,0x2161ece,0x00d14ca },
+ { 0x12d37e9,0x110ebd7,0x062295a,0x1cc0119,0x073c6ea,0x15d5411,
+ 0x0aeb4b1,0x23fba91,0x175fab5,0x3ee8fe1,0x1c680a6,0x1e76f27,
+ 0x3ddfc97,0x3d69ecd,0x00e1ee5 } },
+ /* 223 */
+ { { 0x2d29f46,0x2d19204,0x3137cd0,0x02c3b54,0x193295b,0x02fbdb2,
+ 0x2260948,0x22c02ff,0x3885424,0x1299595,0x00e7f9c,0x310ff2a,
+ 0x01ea169,0x0deef85,0x0021908 },
+ { 0x1b26cfb,0x38566a8,0x2852875,0x21debff,0x290ca9f,0x0b29663,
+ 0x26550d9,0x2b44457,0x05d1938,0x1f8f825,0x366ef93,0x1d8daec,
+ 0x069e5ef,0x342ece6,0x00b6034 } },
+ /* 224 */
+ { { 0x2d8356e,0x1578c09,0x226f4d2,0x3b74c51,0x0f83666,0x0323b59,
+ 0x1ddf61d,0x1ed8508,0x3c52667,0x0e5b91c,0x1e9b18b,0x352bdfa,
+ 0x13f75da,0x352aa4e,0x00fceff },
+ { 0x1c731d5,0x04e2844,0x01d9843,0x286cbc5,0x105bcb3,0x05edd9c,
+ 0x21fa956,0x3b1ec83,0x01288cc,0x22fbf3a,0x10f1b56,0x081cf72,
+ 0x15cb758,0x18687c1,0x00f5722 } },
+ /* 225 */
+ { { 0x2973088,0x1209dcd,0x3980f31,0x0221aa7,0x1c008e7,0x011b098,
+ 0x395947e,0x2f2806d,0x27dca76,0x037c79a,0x31acddf,0x2bf6219,
+ 0x0d8f4ab,0x13644d9,0x00ff705 },
+ { 0x2260594,0x18d51f8,0x277e2cf,0x1cb5cec,0x2468a53,0x3e6f4d7,
+ 0x019e24e,0x0f30f1d,0x0202404,0x34ad287,0x090b39c,0x23c11ea,
+ 0x1a2e3a2,0x3a851be,0x00dca2c } },
+ /* 226 */
+ { { 0x3277538,0x221cd94,0x3738ab7,0x0973da5,0x1a734e2,0x2c8b8b0,
+ 0x2e1d1e6,0x348499b,0x389ebe1,0x18b1854,0x02bb076,0x1b2b500,
+ 0x0f207f3,0x170cf99,0x0012088 },
+ { 0x0fbfec2,0x1df55a4,0x34ae59e,0x2ab5e95,0x3f9e781,0x3411794,
+ 0x1410b05,0x17c3a00,0x0aaa91b,0x074ed7c,0x3fbb352,0x3477c01,
+ 0x3ee9ab3,0x0cfb1ca,0x0011c4b } },
+ /* 227 */
+ { { 0x3c3a7f3,0x2e60ca0,0x2354d32,0x33e2362,0x28083ab,0x03d3b16,
+ 0x3164045,0x0a41f7a,0x3f0641e,0x38635d1,0x31bbf03,0x225e2bb,
+ 0x0cd894e,0x1f72228,0x0093244 },
+ { 0x33d5897,0x383faf3,0x0e6d561,0x0bc4d80,0x3fc3a68,0x05a9adc,
+ 0x0b9d73d,0x3d6031e,0x2ded29b,0x339c4ff,0x08d69e5,0x089488c,
+ 0x3fda40a,0x295c7fd,0x003a924 } },
+ /* 228 */
+ { { 0x0093bee,0x115532d,0x2ec0fb6,0x0969631,0x3a6d65a,0x0f43b4d,
+ 0x26994d4,0x0b51104,0x2515515,0x3695a26,0x284caa8,0x397aa30,
+ 0x25538b8,0x353f47c,0x0033f05 },
+ { 0x3615d6e,0x37f8246,0x07dae0f,0x23dc154,0x02ded7e,0x1eef320,
+ 0x1631e51,0x3447f75,0x13e267f,0x353e1d1,0x3f89d62,0x369c8ff,
+ 0x1a21dc6,0x2b8b8f3,0x0055cbc } },
+ /* 229 */
+ { { 0x34e84f3,0x2f2539a,0x2c35336,0x0c53bdc,0x1728630,0x3ad5fe6,
+ 0x05fdeee,0x3386db6,0x272a42e,0x29fd38c,0x36f0320,0x21b2ed4,
+ 0x331e67f,0x28ae48c,0x00f09b6 },
+ { 0x2778435,0x0fb3c55,0x32d221d,0x2660c8e,0x32977ba,0x1c12f03,
+ 0x1b57fb1,0x01229a8,0x38b389f,0x375ddf3,0x2c6b42c,0x3885d3e,
+ 0x2c55a9c,0x2ffc279,0x00404e2 } },
+ /* 230 */
+ { { 0x04c5ddb,0x2c4d788,0x150e9b9,0x110fbfd,0x29dbfe0,0x30ef83d,
+ 0x2ab4bfe,0x395bcd7,0x30d0a43,0x0e2d30f,0x0e73f9b,0x07199cc,
+ 0x0c9054c,0x22f4b1e,0x0092ed3 },
+ { 0x386e27c,0x00fdaa8,0x0507c70,0x1beb3b6,0x0b9c4f4,0x277d519,
+ 0x024ec85,0x1cbaba8,0x1524295,0x112be58,0x21fc119,0x273578b,
+ 0x2358c27,0x280ca07,0x00aa376 } },
+ /* 231 */
+ { { 0x0dbc95c,0x16488cf,0x337a078,0x1abbcb8,0x0aae1aa,0x1caa151,
+ 0x00108d4,0x1edf701,0x3e68d03,0x1203214,0x0c7eee2,0x084c572,
+ 0x07752d2,0x215a3b9,0x00195d3 },
+ { 0x2cd7fbe,0x06e80f6,0x052bd4b,0x07b4f83,0x24b5ac6,0x2aaded4,
+ 0x13c0526,0x0ffa9a3,0x08c660e,0x13c35c9,0x3145efb,0x36cfe24,
+ 0x0936daf,0x268e3d0,0x00a73fd } },
+ /* 232 */
+ { { 0x31b17ce,0x2e7bcee,0x3f31891,0x19f1849,0x1140236,0x015487f,
+ 0x32e58d3,0x202204a,0x049e350,0x1ce91f9,0x3f75150,0x27f212f,
+ 0x0d16ee4,0x1c894c4,0x004023f },
+ { 0x33399fa,0x2397b6d,0x2a3ea60,0x36354ca,0x1f12632,0x117a105,
+ 0x22758e8,0x361844e,0x3851fc2,0x0ab92db,0x339d02f,0x1e7d6c4,
+ 0x19ebd38,0x0a9a036,0x00446d2 } },
+ /* 233 */
+ { { 0x3e164f1,0x008c092,0x19200f5,0x35a22e0,0x38d09d2,0x212b3bf,
+ 0x0056f19,0x3a03545,0x1f075e9,0x0e97137,0x1f496a9,0x32d1f9b,
+ 0x36bf738,0x35ace37,0x00899e1 },
+ { 0x19eb2a6,0x21fa22d,0x338b69e,0x18e6d1f,0x1280d9d,0x1953a55,
+ 0x1411ea3,0x2960566,0x0fd969a,0x1f3e375,0x130742a,0x170aebd,
+ 0x33085ff,0x14d868d,0x00a4391 } },
+ /* 234 */
+ { { 0x0a4bdd2,0x39ca8ea,0x37026ac,0x346da3b,0x0c656cd,0x03136b6,
+ 0x233e7e9,0x0714352,0x08a9d95,0x192bb38,0x085d68e,0x20016b8,
+ 0x102b8ea,0x1f5dbdd,0x00fdd7a },
+ { 0x0d6fa45,0x3ec29a6,0x2b8cce6,0x1c84413,0x0228f86,0x28275f7,
+ 0x3d8787d,0x0c19748,0x28b2ae9,0x1954850,0x2a56c36,0x3eae8f7,
+ 0x0aca595,0x00e42a2,0x00edbe5 } },
+ /* 235 */
+ { { 0x3b26c82,0x3682b6f,0x2f9cd64,0x0f254b0,0x0e5d70b,0x1f9dfda,
+ 0x28f365f,0x35a57d7,0x00208f2,0x19c8d38,0x112e7be,0x3e403bb,
+ 0x3734efa,0x24d12b3,0x0027dc6 },
+ { 0x260a46a,0x13fd7b0,0x1c2880e,0x338b70c,0x27da5eb,0x29a7d54,
+ 0x1c5d73c,0x2130921,0x32969cc,0x2b37eda,0x2d6d4ec,0x0716bfb,
+ 0x0763703,0x1320889,0x00c7bbf } },
+ /* 236 */
+ { { 0x1fe01b2,0x2dcb1d2,0x11b89d5,0x219e4ea,0x0347851,0x3d1810e,
+ 0x3a3c54c,0x06dbe8e,0x03d3ab2,0x2dcfa39,0x3e57b8a,0x337a382,
+ 0x0426450,0x0e9f748,0x006488b },
+ { 0x1dc4582,0x0e62cf7,0x06fea9e,0x2a56fb1,0x31698c1,0x15b4e10,
+ 0x1446ef1,0x0a689fc,0x1d87703,0x20ff497,0x2c71066,0x2c48868,
+ 0x2e6cf05,0x30aa9cb,0x0065b2d } },
+ /* 237 */
+ { { 0x1021d63,0x2217df3,0x1f0821a,0x057fa98,0x23f344b,0x173dcf9,
+ 0x1ba6ddc,0x22c8eb5,0x18f227a,0x0455343,0x1c55931,0x1d0dcf3,
+ 0x20fa19b,0x1c56618,0x004feab },
+ { 0x19ec924,0x224e39f,0x2550509,0x179b51f,0x284d54a,0x2d85d41,
+ 0x2d1bdc1,0x1a29068,0x3826158,0x1267f85,0x3005a92,0x0769e00,
+ 0x379b617,0x17b5f63,0x00a70bf } },
+ /* 238 */
+ { { 0x22216c5,0x049437f,0x33510bc,0x141d806,0x22c37e2,0x1bc1adf,
+ 0x300175d,0x2e6ded8,0x0a18bfe,0x35377a3,0x382f843,0x08410ca,
+ 0x00afd4f,0x0be6c6b,0x008d70e },
+ { 0x2e91abb,0x1cede2a,0x28f225c,0x28e18c0,0x30230dc,0x173cc2d,
+ 0x123ecfe,0x3c9962e,0x2c25506,0x27b5d53,0x329a5e3,0x106e231,
+ 0x3889b8e,0x3b0aeaf,0x00ee67c } },
+ /* 239 */
+ { { 0x3e46c65,0x0eb3d46,0x1d7ae18,0x23f9d59,0x2978953,0x2589ed3,
+ 0x073391d,0x2461e1e,0x0c19f1d,0x22fd2b1,0x0691f5c,0x2e67d8d,
+ 0x1fb985d,0x200dd28,0x00a68df },
+ { 0x392b5fa,0x123b46f,0x1c323c4,0x104f82f,0x0a098c8,0x26fc05b,
+ 0x34cd557,0x0913639,0x09c115e,0x3977c34,0x3410b66,0x062b404,
+ 0x0213094,0x132c5e8,0x008b612 } },
+ /* 240 */
+ { { 0x26e3392,0x3b0ebf0,0x2e00425,0x1c285c8,0x3c07f84,0x08d5ad0,
+ 0x028190e,0x1669b73,0x1ffb1ef,0x053b65f,0x063028c,0x0aceb47,
+ 0x18988c2,0x0f09a30,0x0007072 },
+ { 0x0f49e7d,0x28c0bd3,0x252270d,0x24cfc4a,0x0c5e87c,0x2165052,
+ 0x2cdd1d1,0x04931d2,0x3abca74,0x22b57dc,0x169fd47,0x0b928fb,
+ 0x17cc3e7,0x21a1ec4,0x0061593 } },
+ /* 241 */
+ { { 0x1aa0486,0x2e55dea,0x15577b7,0x0d6818f,0x36e41fb,0x2a411f5,
+ 0x17d5c7d,0x1eea6c0,0x28068a8,0x0e31d20,0x1f08ad9,0x117e973,
+ 0x08a28ab,0x085d30a,0x00cd9fb },
+ { 0x347843d,0x1119095,0x11e3595,0x1b29584,0x134d64c,0x2ff3a35,
+ 0x247ea14,0x099fc4b,0x2056169,0x145dd03,0x2ed03fb,0x1250e3b,
+ 0x3f5135c,0x2b753f0,0x009da30 } },
+ /* 242 */
+ { { 0x0fa5200,0x214a0b3,0x313dc4e,0x23da866,0x3270760,0x15c9b8b,
+ 0x39a53df,0x1f79772,0x3c9e942,0x2984901,0x154d582,0x1685f87,
+ 0x2e1183e,0x1f79956,0x00b9987 },
+ { 0x15254de,0x3a5cac0,0x37c56f0,0x2c7c29b,0x292a56d,0x195be2c,
+ 0x17e4e1a,0x0660f4a,0x052ad98,0x1267f80,0x07cfed8,0x194b4bc,
+ 0x01738d3,0x14ba10f,0x00c7843 } },
+ /* 243 */
+ { { 0x29b2d8a,0x242bc1f,0x19646ee,0x0615f3c,0x0ac8d70,0x07ca3bf,
+ 0x2d90317,0x2c83bdb,0x1a96812,0x39fdc35,0x31c61ee,0x2d55fd3,
+ 0x2375827,0x355f189,0x00f1c9b },
+ { 0x21a6194,0x1f4050a,0x2b845cf,0x02c6242,0x2dd614e,0x3a4f0a9,
+ 0x39de100,0x24714fb,0x175e0cd,0x0be633d,0x14befc3,0x13b0318,
+ 0x1d68c50,0x299989e,0x00d0513 } },
+ /* 244 */
+ { { 0x059fb6a,0x2b6eb6a,0x3666a8e,0x39f6ca0,0x1cf8346,0x388b8d5,
+ 0x35e61a3,0x271adec,0x22c9963,0x20a4fb3,0x16f241c,0x0058b89,
+ 0x21ddafa,0x1ee6fde,0x00d2e6c },
+ { 0x0075e63,0x39894d0,0x0286d0d,0x187e7b2,0x02405aa,0x3f91525,
+ 0x37830a8,0x2723088,0x2c7364e,0x013f406,0x104ba75,0x270f486,
+ 0x3520b4d,0x3852bc6,0x00d589b } },
+ /* 245 */
+ { { 0x262e53b,0x1da93d1,0x3676135,0x147e41d,0x335ec2f,0x1f02be5,
+ 0x297d139,0x22d6198,0x1fe9e59,0x13b4c80,0x1e70f60,0x2f1d4a9,
+ 0x2d95149,0x14d6ec4,0x00b54af },
+ { 0x12c1c76,0x2930ac8,0x0dfd36e,0x31fac94,0x218f5bb,0x2828691,
+ 0x1466cc9,0x3645e83,0x1a4dac2,0x1549593,0x0e95fab,0x19567d2,
+ 0x27a3320,0x0642729,0x007487c } },
+ /* 246 */
+ { { 0x1e98e9c,0x2ff8df7,0x119975a,0x098a904,0x099b90b,0x336c7df,
+ 0x010996d,0x159d46d,0x3118b3b,0x3aacd1b,0x31f8ae1,0x214864f,
+ 0x398c104,0x089dae2,0x001ec4d },
+ { 0x1452baa,0x2f24991,0x2572ba3,0x162b312,0x2387d18,0x147c5c7,
+ 0x38eff6e,0x0700251,0x37d931e,0x23cd5c1,0x254c8ca,0x3b9df37,
+ 0x1c9a4ff,0x0bfd547,0x00fb489 } },
+ /* 247 */
+ { { 0x1b8dff8,0x2f6b40b,0x05a25b1,0x3f5688a,0x1d462f4,0x2802d18,
+ 0x2aad8ed,0x1b46c75,0x3cf4130,0x250fefb,0x2a13fe1,0x23a1bcd,
+ 0x0940442,0x04605fe,0x00c8b2f },
+ { 0x0d51afb,0x14a2abc,0x1d06762,0x291526c,0x2a3e2fe,0x28f77d9,
+ 0x3ad8f2e,0x3481a1b,0x04b4fbd,0x2836733,0x0189ff5,0x3a5f533,
+ 0x319a6cd,0x0f58667,0x00c3679 } },
+ /* 248 */
+ { { 0x1b85197,0x22426d4,0x2895ea3,0x342d324,0x3ffb17d,0x376cfcf,
+ 0x30878b1,0x3c3c83a,0x0ffc57c,0x0ac174a,0x1abd57e,0x2f78b9c,
+ 0x01b20d8,0x0a37103,0x007f2be },
+ { 0x19a2d48,0x137288a,0x182d655,0x0ba0dde,0x25130ba,0x01c65c6,
+ 0x23205f1,0x2097621,0x2827cf2,0x2c57b98,0x03748f2,0x2db15fc,
+ 0x385a0d4,0x13690c0,0x00a9e3f } },
+ /* 249 */
+ { { 0x3fbc9c6,0x2df3b20,0x377e33e,0x31d1505,0x024a311,0x3c1d9ff,
+ 0x1377f74,0x00b6b20,0x2364ab7,0x184ab6b,0x2a77969,0x3f2db6c,
+ 0x2a6adb7,0x0a10073,0x004a6fb },
+ { 0x1fc73de,0x2c74ab3,0x3d325e8,0x2346c0b,0x1d0efae,0x2076146,
+ 0x19c190d,0x225c4fe,0x3fafc80,0x2cf063d,0x11b7ae7,0x3dc4f9d,
+ 0x3c3f841,0x10d7c1f,0x000a4b3 } },
+ /* 250 */
+ { { 0x19b7d2e,0x28f1300,0x0b897dd,0x06b5371,0x0631c8d,0x336cc4f,
+ 0x09cd6e1,0x2ec1952,0x1104c07,0x07512bb,0x35f000d,0x25f84e9,
+ 0x1df4d8f,0x193f769,0x000e9ee },
+ { 0x2346910,0x267cecf,0x0ad7eaa,0x087e8a5,0x1622f69,0x342cbfa,
+ 0x2aa20d0,0x206e88a,0x3991e58,0x093fb4b,0x0157180,0x3cecb5b,
+ 0x2e17c9a,0x1ea371f,0x00919e6 } },
+ /* 251 */
+ { { 0x2250533,0x13f931d,0x3ef8c72,0x395f605,0x18a2080,0x1cb25d4,
+ 0x2fb0f41,0x1c0ba8a,0x1eb17c0,0x266c433,0x09b7e3e,0x0e5d78f,
+ 0x0cdc5bf,0x1f7c734,0x0020611 },
+ { 0x205ebd5,0x127986f,0x02c0fb0,0x1705b1e,0x1eb0bb5,0x2dffb42,
+ 0x2331b8a,0x18fc04e,0x31d6328,0x17db162,0x0d3b619,0x193bdb9,
+ 0x3f11662,0x2d8e694,0x0092c51 } },
+ /* 252 */
+ { { 0x08b364d,0x31ef20a,0x25c4a57,0x021ed07,0x14a562e,0x262a684,
+ 0x1d21c66,0x126e5a6,0x181f3f8,0x2a93b65,0x1eb726b,0x08fbbce,
+ 0x084f9a2,0x308f30a,0x0013159 },
+ { 0x23f4963,0x0c7960e,0x2a81739,0x2242b69,0x3965003,0x2aca542,
+ 0x28a1c65,0x2ad48fb,0x149775f,0x1bbb7d2,0x0f2671b,0x3594b85,
+ 0x22f5563,0x2470f13,0x00fed44 } },
+ /* 253 */
+ { { 0x0eb453e,0x3ab70fd,0x1a5b335,0x18f2b74,0x25ff74b,0x3612a46,
+ 0x33d0d75,0x28cdda4,0x2b9b49b,0x22728fb,0x004c15b,0x1beb33b,
+ 0x1a7e41f,0x0c9b702,0x004ef19 },
+ { 0x1ca3233,0x0b4c90f,0x1d4b53d,0x2428896,0x20ee405,0x151bc00,
+ 0x022edb5,0x1adc463,0x00109ea,0x06490a6,0x30e91e6,0x3682b76,
+ 0x23c50aa,0x3bd2665,0x005fe53 } },
+ /* 254 */
+ { { 0x0c28c65,0x3741ae4,0x247d372,0x0b04673,0x2176524,0x2c8bf20,
+ 0x01fb806,0x3330701,0x307b0a7,0x3999fb7,0x1261bec,0x256679c,
+ 0x3f22ac7,0x26e8673,0x00bc69d },
+ { 0x3c06819,0x35df344,0x379d009,0x2bb8a0a,0x0635a66,0x096c6fa,
+ 0x1ac4a62,0x023e53b,0x0e45240,0x115f53d,0x3056af8,0x0a66b16,
+ 0x3c386ee,0x1130e82,0x00cc384 } },
+ /* 255 */
+ { { 0x14c2356,0x190ec73,0x07be490,0x145d415,0x0740a48,0x1251301,
+ 0x3eaf29d,0x2628190,0x079299a,0x26e95c9,0x2e05fdf,0x2ca7c5b,
+ 0x32d7b48,0x3d84226,0x0033fb4 },
+ { 0x150f955,0x01240aa,0x3ddf867,0x137fb70,0x297e103,0x17eeda8,
+ 0x1320b60,0x266ec84,0x13f4322,0x0c8f5ee,0x0590e4a,0x386815e,
+ 0x00ce61f,0x161bd63,0x008e1d0 } },
+};
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_15(sp_point_384* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_384_ecc_mulmod_stripe_15(r, &p384_base, p384_table,
+ k, map, heap);
+}
+
+#endif
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[15];
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_384_point_new_15(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 15, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 15, km);
+
+ err = sp_384_ecc_mulmod_base_15(point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_15(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_15(point, 0, heap);
+
+ return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_384_iszero_15(const sp_digit* a)
+{
+ return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] |
+ a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+/* Add 1 to a. (a = a + 1)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_384_add_one_15(sp_digit* a)
+{
+ a[0]++;
+ sp_384_norm_15(a);
+}
+
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 18U) {
+ r[j] &= 0x3ffffff;
+ s = 26U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng Random number generator.
+ * k Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_384_ecc_gen_k_15(WC_RNG* rng, sp_digit* k)
+{
+ int err;
+ byte buf[48];
+
+ do {
+ err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+ if (err == 0) {
+ sp_384_from_bin(k, 15, buf, (int)sizeof(buf));
+ if (sp_384_cmp_15(k, p384_order2) < 0) {
+ sp_384_add_one_15(k);
+ break;
+ }
+ }
+ }
+ while (err == 0);
+
+ return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng Random number generator.
+ * priv Generated private value.
+ * pub Generated public point.
+ * heap Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[15];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_384 inf;
+#endif
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_384* infinity;
+#endif
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_15(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_15(heap, inf, infinity);
+ }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 15, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_gen_k_15(rng, k);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_base_15(point, k, 1, NULL);
+ }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_15(infinity, point, p384_order, 1, NULL);
+ }
+ if (err == MP_OKAY) {
+ if ((sp_384_iszero_15(point->x) == 0) || (sp_384_iszero_15(point->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(k, priv);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_15(point, pub);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_384_point_free_15(infinity, 1, heap);
+#endif
+ sp_384_point_free_15(point, 1, heap);
+
+ return err;
+}
+
+#ifdef HAVE_ECC_DHE
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 48
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_384_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ for (i=0; i<14; i++) {
+ r[i+1] += r[i] >> 26;
+ r[i] &= 0x3ffffff;
+ }
+ j = 384 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<15 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 26) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 26);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv Scalar to multiply the point by.
+ * pub Point to multiply.
+ * out Buffer to hold X ordinate.
+ * outLen On entry, size of the buffer in bytes.
+ * On exit, length of data in buffer in bytes.
+ * heap Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out,
+ word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[15];
+#endif
+ sp_point_384* point = NULL;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ if (*outLen < 48U) {
+ err = BUFFER_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_15(heap, p, point);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 15, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 15, priv);
+ sp_384_point_from_ecc_point_15(point, pub);
+ err = sp_384_ecc_mulmod_15(point, point, k, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ sp_384_to_bin(point->x, out);
+ *outLen = 48;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_15(point, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_384_mul_d_15(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 15; i++) {
+ t += tb * a[i];
+ r[i] = t & 0x3ffffff;
+ t >>= 26;
+ }
+ r[15] = (sp_digit)t;
+#else
+ int64_t tb = b;
+ int64_t t[15];
+
+ t[ 0] = tb * a[ 0];
+ t[ 1] = tb * a[ 1];
+ t[ 2] = tb * a[ 2];
+ t[ 3] = tb * a[ 3];
+ t[ 4] = tb * a[ 4];
+ t[ 5] = tb * a[ 5];
+ t[ 6] = tb * a[ 6];
+ t[ 7] = tb * a[ 7];
+ t[ 8] = tb * a[ 8];
+ t[ 9] = tb * a[ 9];
+ t[10] = tb * a[10];
+ t[11] = tb * a[11];
+ t[12] = tb * a[12];
+ t[13] = tb * a[13];
+ t[14] = tb * a[14];
+ r[ 0] = (t[ 0] & 0x3ffffff);
+ r[ 1] = (sp_digit)(t[ 0] >> 26) + (t[ 1] & 0x3ffffff);
+ r[ 2] = (sp_digit)(t[ 1] >> 26) + (t[ 2] & 0x3ffffff);
+ r[ 3] = (sp_digit)(t[ 2] >> 26) + (t[ 3] & 0x3ffffff);
+ r[ 4] = (sp_digit)(t[ 3] >> 26) + (t[ 4] & 0x3ffffff);
+ r[ 5] = (sp_digit)(t[ 4] >> 26) + (t[ 5] & 0x3ffffff);
+ r[ 6] = (sp_digit)(t[ 5] >> 26) + (t[ 6] & 0x3ffffff);
+ r[ 7] = (sp_digit)(t[ 6] >> 26) + (t[ 7] & 0x3ffffff);
+ r[ 8] = (sp_digit)(t[ 7] >> 26) + (t[ 8] & 0x3ffffff);
+ r[ 9] = (sp_digit)(t[ 8] >> 26) + (t[ 9] & 0x3ffffff);
+ r[10] = (sp_digit)(t[ 9] >> 26) + (t[10] & 0x3ffffff);
+ r[11] = (sp_digit)(t[10] >> 26) + (t[11] & 0x3ffffff);
+ r[12] = (sp_digit)(t[11] >> 26) + (t[12] & 0x3ffffff);
+ r[13] = (sp_digit)(t[12] >> 26) + (t[13] & 0x3ffffff);
+ r[14] = (sp_digit)(t[13] >> 26) + (t[14] & 0x3ffffff);
+ r[15] = (sp_digit)(t[14] >> 26);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SP_DIV_32
+static WC_INLINE sp_digit sp_384_div_word_15(sp_digit d1, sp_digit d0,
+ sp_digit dv)
+{
+ sp_digit d, r, t;
+
+ /* All 26 bits from d1 and top 5 bits from d0. */
+ d = (d1 << 5) | (d0 >> 21);
+ r = d / dv;
+ d -= r * dv;
+ /* Up to 6 bits in r */
+ /* Next 5 bits from d0. */
+ r <<= 5;
+ d <<= 5;
+ d |= (d0 >> 16) & ((1 << 5) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 11 bits in r */
+ /* Next 5 bits from d0. */
+ r <<= 5;
+ d <<= 5;
+ d |= (d0 >> 11) & ((1 << 5) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 16 bits in r */
+ /* Next 5 bits from d0. */
+ r <<= 5;
+ d <<= 5;
+ d |= (d0 >> 6) & ((1 << 5) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 21 bits in r */
+ /* Next 5 bits from d0. */
+ r <<= 5;
+ d <<= 5;
+ d |= (d0 >> 1) & ((1 << 5) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 26 bits in r */
+ /* Remaining 1 bits from d0. */
+ r <<= 1;
+ d <<= 1;
+ d |= d0 & ((1 << 1) - 1);
+ t = d / dv;
+ r += t;
+
+ return r;
+}
+#endif /* WOLFSSL_SP_DIV_32 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Number to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_384_div_15(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ int i;
+#ifndef WOLFSSL_SP_DIV_32
+ int64_t d1;
+#endif
+ sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* td;
+#else
+ sp_digit t1d[30], t2d[15 + 1];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 15 + 1), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = td;
+ t2 = td + 2 * 15;
+#else
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ dv = d[14];
+ XMEMCPY(t1, a, sizeof(*t1) * 2U * 15U);
+ for (i=14; i>=0; i--) {
+ t1[15 + i] += t1[15 + i - 1] >> 26;
+ t1[15 + i - 1] &= 0x3ffffff;
+#ifndef WOLFSSL_SP_DIV_32
+ d1 = t1[15 + i];
+ d1 <<= 26;
+ d1 += t1[15 + i - 1];
+ r1 = (sp_digit)(d1 / dv);
+#else
+ r1 = sp_384_div_word_15(t1[15 + i], t1[15 + i - 1], dv);
+#endif
+
+ sp_384_mul_d_15(t2, d, r1);
+ (void)sp_384_sub_15(&t1[i], &t1[i], t2);
+ t1[15 + i] -= t2[15];
+ t1[15 + i] += t1[15 + i - 1] >> 26;
+ t1[15 + i - 1] &= 0x3ffffff;
+ r1 = (((-t1[15 + i]) << 26) - t1[15 + i - 1]) / dv;
+ r1++;
+ sp_384_mul_d_15(t2, d, r1);
+ (void)sp_384_add_15(&t1[i], &t1[i], t2);
+ t1[15 + i] += t1[15 + i - 1] >> 26;
+ t1[15 + i - 1] &= 0x3ffffff;
+ }
+ t1[15 - 1] += t1[15 - 2] >> 26;
+ t1[15 - 2] &= 0x3ffffff;
+ r1 = t1[15 - 1] / dv;
+
+ sp_384_mul_d_15(t2, d, r1);
+ (void)sp_384_sub_15(t1, t1, t2);
+ XMEMCPY(r, t1, sizeof(*r) * 2U * 15U);
+ for (i=0; i<13; i++) {
+ r[i+1] += r[i] >> 26;
+ r[i] &= 0x3ffffff;
+ }
+ sp_384_cond_add_15(r, r, d, 0 - ((r[14] < 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_384_mod_15(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_384_div_15(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P384 curve. */
+static const uint32_t p384_order_minus_2[12] = {
+ 0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U,
+ 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
+};
+#else
+/* The low half of the order-2 of the P384 curve. */
+static const uint32_t p384_order_low[6] = {
+ 0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U
+
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P384 curve. (r = a * b mod order)
+ *
+ * r Result of the multiplication.
+ * a First operand of the multiplication.
+ * b Second operand of the multiplication.
+ */
+static void sp_384_mont_mul_order_15(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_384_mul_15(r, a, b);
+ sp_384_mont_reduce_order_15(r, p384_order, p384_mp_order);
+}
+
+/* Square number mod the order of P384 curve. (r = a * a mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_384_mont_sqr_order_15(sp_digit* r, const sp_digit* a)
+{
+ sp_384_sqr_15(r, a);
+ sp_384_mont_reduce_order_15(r, p384_order, p384_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P384 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_384_mont_sqr_n_order_15(sp_digit* r, const sp_digit* a, int n)
+{
+ int i;
+
+ sp_384_mont_sqr_order_15(r, a);
+ for (i=1; i<n; i++) {
+ sp_384_mont_sqr_order_15(r, r);
+ }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P384 curve.
+ * (r = 1 / a mod order)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_384_mont_inv_order_15(sp_digit* r, const sp_digit* a,
+ sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 15);
+ for (i=382; i>=0; i--) {
+ sp_384_mont_sqr_order_15(t, t);
+ if ((p384_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_384_mont_mul_order_15(t, t, a);
+ }
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 15U);
+#else
+ sp_digit* t = td;
+ sp_digit* t2 = td + 2 * 15;
+ sp_digit* t3 = td + 4 * 15;
+ int i;
+
+ /* t = a^2 */
+ sp_384_mont_sqr_order_15(t, a);
+ /* t = a^3 = t * a */
+ sp_384_mont_mul_order_15(t, t, a);
+ /* t2= a^c = t ^ 2 ^ 2 */
+ sp_384_mont_sqr_n_order_15(t2, t, 2);
+ /* t = a^f = t2 * t */
+ sp_384_mont_mul_order_15(t, t2, t);
+ /* t2= a^f0 = t ^ 2 ^ 4 */
+ sp_384_mont_sqr_n_order_15(t2, t, 4);
+ /* t = a^ff = t2 * t */
+ sp_384_mont_mul_order_15(t, t2, t);
+ /* t2= a^ff00 = t ^ 2 ^ 8 */
+ sp_384_mont_sqr_n_order_15(t2, t, 8);
+ /* t3= a^ffff = t2 * t */
+ sp_384_mont_mul_order_15(t3, t2, t);
+ /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */
+ sp_384_mont_sqr_n_order_15(t2, t3, 16);
+ /* t = a^ffffffff = t2 * t3 */
+ sp_384_mont_mul_order_15(t, t2, t3);
+ /* t2= a^ffffffff0000 = t ^ 2 ^ 16 */
+ sp_384_mont_sqr_n_order_15(t2, t, 16);
+ /* t = a^ffffffffffff = t2 * t3 */
+ sp_384_mont_mul_order_15(t, t2, t3);
+ /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48 */
+ sp_384_mont_sqr_n_order_15(t2, t, 48);
+ /* t= a^fffffffffffffffffffffffff = t2 * t */
+ sp_384_mont_mul_order_15(t, t2, t);
+ /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */
+ sp_384_mont_sqr_n_order_15(t2, t, 96);
+ /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */
+ sp_384_mont_mul_order_15(t2, t2, t);
+ for (i=191; i>=1; i--) {
+ sp_384_mont_sqr_order_15(t2, t2);
+ if (((sp_digit)p384_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_384_mont_mul_order_15(t2, t2, a);
+ }
+ }
+ sp_384_mont_sqr_order_15(t2, t2);
+ sp_384_mont_mul_order_15(r, t2, a);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN 64
+#endif
+
+/* Sign the hash using the private key.
+ * e = [hash, 384 bits] from binary
+ * r = (k.G)->x mod order
+ * s = (r * x + e) / k mod order
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+ mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit ed[2*15];
+ sp_digit xd[2*15];
+ sp_digit kd[2*15];
+ sp_digit rd[2*15];
+ sp_digit td[3 * 2*15];
+ sp_point_384 p;
+#endif
+ sp_digit* e = NULL;
+ sp_digit* x = NULL;
+ sp_digit* k = NULL;
+ sp_digit* r = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_384* point = NULL;
+ sp_digit carry;
+ sp_digit* s = NULL;
+ sp_digit* kInv = NULL;
+ int err = MP_OKAY;
+ int32_t c;
+ int i;
+
+ (void)heap;
+
+ err = sp_384_point_new_15(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 15, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ e = d + 0 * 15;
+ x = d + 2 * 15;
+ k = d + 4 * 15;
+ r = d + 6 * 15;
+ tmp = d + 8 * 15;
+#else
+ e = ed;
+ x = xd;
+ k = kd;
+ r = rd;
+ tmp = td;
+#endif
+ s = e;
+ kInv = k;
+
+ if (hashLen > 48U) {
+ hashLen = 48U;
+ }
+
+ sp_384_from_bin(e, 15, hash, (int)hashLen);
+ }
+
+ for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+ sp_384_from_mp(x, 15, priv);
+
+ /* New random point. */
+ if (km == NULL || mp_iszero(km)) {
+ err = sp_384_ecc_gen_k_15(rng, k);
+ }
+ else {
+ sp_384_from_mp(k, 15, km);
+ mp_zero(km);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_base_15(point, k, 1, NULL);
+ }
+
+ if (err == MP_OKAY) {
+ /* r = point->x mod order */
+ XMEMCPY(r, point->x, sizeof(sp_digit) * 15U);
+ sp_384_norm_15(r);
+ c = sp_384_cmp_15(r, p384_order);
+ sp_384_cond_sub_15(r, r, p384_order, 0L - (sp_digit)(c >= 0));
+ sp_384_norm_15(r);
+
+ /* Conv k to Montgomery form (mod order) */
+ sp_384_mul_15(k, k, p384_norm_order);
+ err = sp_384_mod_15(k, k, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_15(k);
+ /* kInv = 1/k mod order */
+ sp_384_mont_inv_order_15(kInv, k, tmp);
+ sp_384_norm_15(kInv);
+
+ /* s = r * x + e */
+ sp_384_mul_15(x, x, r);
+ err = sp_384_mod_15(x, x, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_15(x);
+ carry = sp_384_add_15(s, e, x);
+ sp_384_cond_sub_15(s, s, p384_order, 0 - carry);
+ sp_384_norm_15(s);
+ c = sp_384_cmp_15(s, p384_order);
+ sp_384_cond_sub_15(s, s, p384_order, 0L - (sp_digit)(c >= 0));
+ sp_384_norm_15(s);
+
+ /* s = s * k^-1 mod order */
+ sp_384_mont_mul_order_15(s, s, kInv);
+ sp_384_norm_15(s);
+
+ /* Check that signature is usable. */
+ if (sp_384_iszero_15(s) == 0) {
+ break;
+ }
+ }
+ }
+
+ if (i == 0) {
+ err = RNG_FAILURE_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(r, rm);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(s, sm);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 8 * 15);
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 2U * 15U);
+ XMEMSET(x, 0, sizeof(sp_digit) * 2U * 15U);
+ XMEMSET(k, 0, sizeof(sp_digit) * 2U * 15U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 15U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 15U);
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 15U);
+#endif
+ sp_384_point_free_15(point, 1, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ * e = Truncate(hash, 384)
+ * u1 = e/s mod order
+ * u2 = r/s mod order
+ * r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX,
+ mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit u1d[2*15];
+ sp_digit u2d[2*15];
+ sp_digit sd[2*15];
+ sp_digit tmpd[2*15 * 5];
+ sp_point_384 p1d;
+ sp_point_384 p2d;
+#endif
+ sp_digit* u1 = NULL;
+ sp_digit* u2 = NULL;
+ sp_digit* s = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_384* p1;
+ sp_point_384* p2 = NULL;
+ sp_digit carry;
+ int32_t c;
+ int err;
+
+ err = sp_384_point_new_15(heap, p1d, p1);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_15(heap, p2d, p2);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 15, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ u1 = d + 0 * 15;
+ u2 = d + 2 * 15;
+ s = d + 4 * 15;
+ tmp = d + 6 * 15;
+#else
+ u1 = u1d;
+ u2 = u2d;
+ s = sd;
+ tmp = tmpd;
+#endif
+
+ if (hashLen > 48U) {
+ hashLen = 48U;
+ }
+
+ sp_384_from_bin(u1, 15, hash, (int)hashLen);
+ sp_384_from_mp(u2, 15, r);
+ sp_384_from_mp(s, 15, sm);
+ sp_384_from_mp(p2->x, 15, pX);
+ sp_384_from_mp(p2->y, 15, pY);
+ sp_384_from_mp(p2->z, 15, pZ);
+
+ {
+ sp_384_mul_15(s, s, p384_norm_order);
+ }
+ err = sp_384_mod_15(s, s, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_15(s);
+ {
+ sp_384_mont_inv_order_15(s, s, tmp);
+ sp_384_mont_mul_order_15(u1, u1, s);
+ sp_384_mont_mul_order_15(u2, u2, s);
+ }
+
+ err = sp_384_ecc_mulmod_base_15(p1, u1, 0, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_15(p2, p2, u2, 0, heap);
+ }
+
+ if (err == MP_OKAY) {
+ {
+ sp_384_proj_point_add_15(p1, p1, p2, tmp);
+ if (sp_384_iszero_15(p1->z)) {
+ if (sp_384_iszero_15(p1->x) && sp_384_iszero_15(p1->y)) {
+ sp_384_proj_point_dbl_15(p1, p2, tmp);
+ }
+ else {
+ /* Y ordinate is not used from here - don't set. */
+ p1->x[0] = 0;
+ p1->x[1] = 0;
+ p1->x[2] = 0;
+ p1->x[3] = 0;
+ p1->x[4] = 0;
+ p1->x[5] = 0;
+ p1->x[6] = 0;
+ p1->x[7] = 0;
+ p1->x[8] = 0;
+ p1->x[9] = 0;
+ p1->x[10] = 0;
+ p1->x[11] = 0;
+ p1->x[12] = 0;
+ p1->x[13] = 0;
+ p1->x[14] = 0;
+ XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod));
+ }
+ }
+ }
+
+ /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+ /* Reload r and convert to Montgomery form. */
+ sp_384_from_mp(u2, 15, r);
+ err = sp_384_mod_mul_norm_15(u2, u2, p384_mod);
+ }
+
+ if (err == MP_OKAY) {
+ /* u1 = r.z'.z' mod prime */
+ sp_384_mont_sqr_15(p1->z, p1->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_15(u1, u2, p1->z, p384_mod, p384_mp_mod);
+ *res = (int)(sp_384_cmp_15(p1->x, u1) == 0);
+ if (*res == 0) {
+ /* Reload r and add order. */
+ sp_384_from_mp(u2, 15, r);
+ carry = sp_384_add_15(u2, u2, p384_order);
+ /* Carry means result is greater than mod and is not valid. */
+ if (carry == 0) {
+ sp_384_norm_15(u2);
+
+ /* Compare with mod and if greater or equal then not valid. */
+ c = sp_384_cmp_15(u2, p384_mod);
+ if (c < 0) {
+ /* Convert to Montogomery form */
+ err = sp_384_mod_mul_norm_15(u2, u2, p384_mod);
+ if (err == MP_OKAY) {
+ /* u1 = (r + 1*order).z'.z' mod prime */
+ sp_384_mont_mul_15(u1, u2, p1->z, p384_mod,
+ p384_mp_mod);
+ *res = (int)(sp_384_cmp_15(p1->x, u1) == 0);
+ }
+ }
+ }
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL)
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_384_point_free_15(p1, 0, heap);
+ sp_384_point_free_15(p2, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point EC point.
+ * heap Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_384_ecc_is_point_15(sp_point_384* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit t1d[2*15];
+ sp_digit t2d[2*15];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 15 * 4, heap, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 15;
+ t2 = d + 2 * 15;
+#else
+ (void)heap;
+
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ sp_384_sqr_15(t1, point->y);
+ (void)sp_384_mod_15(t1, t1, p384_mod);
+ sp_384_sqr_15(t2, point->x);
+ (void)sp_384_mod_15(t2, t2, p384_mod);
+ sp_384_mul_15(t2, t2, point->x);
+ (void)sp_384_mod_15(t2, t2, p384_mod);
+ (void)sp_384_sub_15(t2, p384_mod, t2);
+ sp_384_mont_add_15(t1, t1, t2, p384_mod);
+
+ sp_384_mont_add_15(t1, t1, point->x, p384_mod);
+ sp_384_mont_add_15(t1, t1, point->x, p384_mod);
+ sp_384_mont_add_15(t1, t1, point->x, p384_mod);
+
+ if (sp_384_cmp_15(t1, p384_b) != 0) {
+ err = MP_VAL;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_384(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 pubd;
+#endif
+ sp_point_384* pub;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_384_point_new_15(NULL, pubd, pub);
+ if (err == MP_OKAY) {
+ sp_384_from_mp(pub->x, 15, pX);
+ sp_384_from_mp(pub->y, 15, pY);
+ sp_384_from_bin(pub->z, 15, one, (int)sizeof(one));
+
+ err = sp_384_ecc_is_point_15(pub, NULL);
+ }
+
+ sp_384_point_free_15(pub, 0, NULL);
+
+ return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * privm Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit privd[15];
+ sp_point_384 pubd;
+ sp_point_384 pd;
+#endif
+ sp_digit* priv = NULL;
+ sp_point_384* pub;
+ sp_point_384* p = NULL;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_384_point_new_15(heap, pubd, pub);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_15(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 15, heap,
+ DYNAMIC_TYPE_ECC);
+ if (priv == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ priv = privd;
+#endif
+
+ sp_384_from_mp(pub->x, 15, pX);
+ sp_384_from_mp(pub->y, 15, pY);
+ sp_384_from_bin(pub->z, 15, one, (int)sizeof(one));
+ sp_384_from_mp(priv, 15, privm);
+
+ /* Check point at infinitiy. */
+ if ((sp_384_iszero_15(pub->x) != 0) &&
+ (sp_384_iszero_15(pub->y) != 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check range of X and Y */
+ if (sp_384_cmp_15(pub->x, p384_mod) >= 0 ||
+ sp_384_cmp_15(pub->y, p384_mod) >= 0) {
+ err = ECC_OUT_OF_RANGE_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check point is on curve */
+ err = sp_384_ecc_is_point_15(pub, heap);
+ }
+
+ if (err == MP_OKAY) {
+ /* Point * order = infinity */
+ err = sp_384_ecc_mulmod_15(p, pub, p384_order, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is infinity */
+ if ((sp_384_iszero_15(p->x) == 0) ||
+ (sp_384_iszero_15(p->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Base * private = point */
+ err = sp_384_ecc_mulmod_base_15(p, priv, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is public key */
+ if (sp_384_cmp_15(p->x, pub->x) != 0 ||
+ sp_384_cmp_15(p->y, pub->y) != 0) {
+ err = ECC_PRIV_KEY_E;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (priv != NULL) {
+ XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_15(p, 0, heap);
+ sp_384_point_free_15(pub, 0, heap);
+
+ return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX First EC point's X ordinate.
+ * pY First EC point's Y ordinate.
+ * pZ First EC point's Z ordinate.
+ * qX Second EC point's X ordinate.
+ * qY Second EC point's Y ordinate.
+ * qZ Second EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* qX, mp_int* qY, mp_int* qZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 15 * 5];
+ sp_point_384 pd;
+ sp_point_384 qd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ sp_point_384* q = NULL;
+ int err;
+
+ err = sp_384_point_new_15(NULL, pd, p);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_15(NULL, qd, q);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 5, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 15, pX);
+ sp_384_from_mp(p->y, 15, pY);
+ sp_384_from_mp(p->z, 15, pZ);
+ sp_384_from_mp(q->x, 15, qX);
+ sp_384_from_mp(q->y, 15, qY);
+ sp_384_from_mp(q->z, 15, qZ);
+
+ sp_384_proj_point_add_15(p, p, q, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_15(q, 0, NULL);
+ sp_384_point_free_15(p, 0, NULL);
+
+ return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 15 * 2];
+ sp_point_384 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ int err;
+
+ err = sp_384_point_new_15(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 2, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 15, pX);
+ sp_384_from_mp(p->y, 15, pY);
+ sp_384_from_mp(p->z, 15, pZ);
+
+ sp_384_proj_point_dbl_15(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_15(p, 0, NULL);
+
+ return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 15 * 6];
+ sp_point_384 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ int err;
+
+ err = sp_384_point_new_15(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 6, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 15, pX);
+ sp_384_from_mp(p->y, 15, pY);
+ sp_384_from_mp(p->z, 15, pZ);
+
+ sp_384_map_15(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, pX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, pY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, pZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_15(p, 0, NULL);
+
+ return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mont_sqrt_15(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit t1d[2 * 15];
+ sp_digit t2d[2 * 15];
+ sp_digit t3d[2 * 15];
+ sp_digit t4d[2 * 15];
+ sp_digit t5d[2 * 15];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ sp_digit* t3;
+ sp_digit* t4;
+ sp_digit* t5;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 15, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 15;
+ t2 = d + 2 * 15;
+ t3 = d + 4 * 15;
+ t4 = d + 6 * 15;
+ t5 = d + 8 * 15;
+#else
+ t1 = t1d;
+ t2 = t2d;
+ t3 = t3d;
+ t4 = t4d;
+ t5 = t5d;
+#endif
+
+ {
+ /* t2 = y ^ 0x2 */
+ sp_384_mont_sqr_15(t2, y, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3 */
+ sp_384_mont_mul_15(t1, t2, y, p384_mod, p384_mp_mod);
+ /* t5 = y ^ 0xc */
+ sp_384_mont_sqr_n_15(t5, t1, 2, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xf */
+ sp_384_mont_mul_15(t1, t1, t5, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x1e */
+ sp_384_mont_sqr_15(t2, t1, p384_mod, p384_mp_mod);
+ /* t3 = y ^ 0x1f */
+ sp_384_mont_mul_15(t3, t2, y, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3e0 */
+ sp_384_mont_sqr_n_15(t2, t3, 5, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3ff */
+ sp_384_mont_mul_15(t1, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x7fe0 */
+ sp_384_mont_sqr_n_15(t2, t1, 5, p384_mod, p384_mp_mod);
+ /* t3 = y ^ 0x7fff */
+ sp_384_mont_mul_15(t3, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fff800 */
+ sp_384_mont_sqr_n_15(t2, t3, 15, p384_mod, p384_mp_mod);
+ /* t4 = y ^ 0x3ffffff */
+ sp_384_mont_mul_15(t4, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xffffffc000000 */
+ sp_384_mont_sqr_n_15(t2, t4, 30, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xfffffffffffff */
+ sp_384_mont_mul_15(t1, t4, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xfffffffffffffff000000000000000 */
+ sp_384_mont_sqr_n_15(t2, t1, 60, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_15(t1, t1, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+ sp_384_mont_sqr_n_15(t2, t1, 120, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_15(t1, t1, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+ sp_384_mont_sqr_n_15(t2, t1, 15, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_15(t1, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */
+ sp_384_mont_sqr_n_15(t2, t1, 31, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */
+ sp_384_mont_mul_15(t1, t4, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */
+ sp_384_mont_sqr_n_15(t2, t1, 4, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */
+ sp_384_mont_mul_15(t1, t5, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */
+ sp_384_mont_sqr_n_15(t2, t1, 62, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */
+ sp_384_mont_mul_15(t1, y, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */
+ sp_384_mont_sqr_n_15(y, t1, 30, p384_mod, p384_mp_mod);
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm X ordinate.
+ * odd Whether the Y ordinate is odd.
+ * ym Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit xd[2 * 15];
+ sp_digit yd[2 * 15];
+#endif
+ sp_digit* x = NULL;
+ sp_digit* y = NULL;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 15, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ x = d + 0 * 15;
+ y = d + 2 * 15;
+#else
+ x = xd;
+ y = yd;
+#endif
+
+ sp_384_from_mp(x, 15, xm);
+ err = sp_384_mod_mul_norm_15(x, x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ /* y = x^3 */
+ {
+ sp_384_mont_sqr_15(y, x, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_15(y, y, x, p384_mod, p384_mp_mod);
+ }
+ /* y = x^3 - 3x */
+ sp_384_mont_sub_15(y, y, x, p384_mod);
+ sp_384_mont_sub_15(y, y, x, p384_mod);
+ sp_384_mont_sub_15(y, y, x, p384_mod);
+ /* y = x^3 - 3x + b */
+ err = sp_384_mod_mul_norm_15(x, p384_b, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ sp_384_mont_add_15(y, y, x, p384_mod);
+ /* y = sqrt(x^3 - 3x + b) */
+ err = sp_384_mont_sqrt_15(y);
+ }
+ if (err == MP_OKAY) {
+ XMEMSET(y + 15, 0, 15U * sizeof(sp_digit));
+ sp_384_mont_reduce_15(y, p384_mod, p384_mp_mod);
+ if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+ sp_384_mont_sub_15(y, p384_mod, y, p384_mod);
+ }
+
+ err = sp_384_to_mp(y, ym);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+#endif
+#endif /* WOLFSSL_SP_384 */
+#endif /* WOLFSSL_HAVE_SP_ECC */
+#endif /* SP_WORD_SIZE == 32 */
+#endif /* !WOLFSSL_SP_ASM */
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_c64.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_c64.c
new file mode 100644
index 000000000..9038173ed
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_c64.c
@@ -0,0 +1,23220 @@
+/* sp.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Implementation by Sean Parkinson. */
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/cpuid.h>
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \
+ defined(WOLFSSL_HAVE_SP_ECC)
+
+#ifdef RSA_LOW_MEM
+#ifndef SP_RSA_PRIVATE_EXP_D
+#define SP_RSA_PRIVATE_EXP_D
+#endif
+
+#ifndef WOLFSSL_SP_SMALL
+#define WOLFSSL_SP_SMALL
+#endif
+#endif
+
+#include <wolfssl/wolfcrypt/sp.h>
+
+#ifndef WOLFSSL_SP_ASM
+#if SP_WORD_SIZE == 64
+#if (defined(WOLFSSL_SP_CACHE_RESISTANT) || defined(WOLFSSL_SP_SMALL)) && (defined(WOLFSSL_HAVE_SP_ECC) || !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Mask for address to obfuscate which of the two address will be used. */
+static const size_t addr_mask[2] = { 0, (size_t)-1 };
+#endif
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+#ifndef WOLFSSL_SP_NO_2048
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 49U) {
+ r[j] &= 0x1ffffffffffffffL;
+ s = 57U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 57
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 57
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0x1ffffffffffffffL;
+ s = 57U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 57U) <= (word32)DIGIT_BIT) {
+ s += 57U;
+ r[j] &= 0x1ffffffffffffffL;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 57) {
+ r[j] &= 0x1ffffffffffffffL;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 57 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 256
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_2048_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ for (i=0; i<35; i++) {
+ r[i+1] += r[i] >> 57;
+ r[i] &= 0x1ffffffffffffffL;
+ }
+ j = 2048 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<36 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 57) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 57);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_9(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int128_t t0 = ((int128_t)a[ 0]) * b[ 0];
+ int128_t t1 = ((int128_t)a[ 0]) * b[ 1]
+ + ((int128_t)a[ 1]) * b[ 0];
+ int128_t t2 = ((int128_t)a[ 0]) * b[ 2]
+ + ((int128_t)a[ 1]) * b[ 1]
+ + ((int128_t)a[ 2]) * b[ 0];
+ int128_t t3 = ((int128_t)a[ 0]) * b[ 3]
+ + ((int128_t)a[ 1]) * b[ 2]
+ + ((int128_t)a[ 2]) * b[ 1]
+ + ((int128_t)a[ 3]) * b[ 0];
+ int128_t t4 = ((int128_t)a[ 0]) * b[ 4]
+ + ((int128_t)a[ 1]) * b[ 3]
+ + ((int128_t)a[ 2]) * b[ 2]
+ + ((int128_t)a[ 3]) * b[ 1]
+ + ((int128_t)a[ 4]) * b[ 0];
+ int128_t t5 = ((int128_t)a[ 0]) * b[ 5]
+ + ((int128_t)a[ 1]) * b[ 4]
+ + ((int128_t)a[ 2]) * b[ 3]
+ + ((int128_t)a[ 3]) * b[ 2]
+ + ((int128_t)a[ 4]) * b[ 1]
+ + ((int128_t)a[ 5]) * b[ 0];
+ int128_t t6 = ((int128_t)a[ 0]) * b[ 6]
+ + ((int128_t)a[ 1]) * b[ 5]
+ + ((int128_t)a[ 2]) * b[ 4]
+ + ((int128_t)a[ 3]) * b[ 3]
+ + ((int128_t)a[ 4]) * b[ 2]
+ + ((int128_t)a[ 5]) * b[ 1]
+ + ((int128_t)a[ 6]) * b[ 0];
+ int128_t t7 = ((int128_t)a[ 0]) * b[ 7]
+ + ((int128_t)a[ 1]) * b[ 6]
+ + ((int128_t)a[ 2]) * b[ 5]
+ + ((int128_t)a[ 3]) * b[ 4]
+ + ((int128_t)a[ 4]) * b[ 3]
+ + ((int128_t)a[ 5]) * b[ 2]
+ + ((int128_t)a[ 6]) * b[ 1]
+ + ((int128_t)a[ 7]) * b[ 0];
+ int128_t t8 = ((int128_t)a[ 0]) * b[ 8]
+ + ((int128_t)a[ 1]) * b[ 7]
+ + ((int128_t)a[ 2]) * b[ 6]
+ + ((int128_t)a[ 3]) * b[ 5]
+ + ((int128_t)a[ 4]) * b[ 4]
+ + ((int128_t)a[ 5]) * b[ 3]
+ + ((int128_t)a[ 6]) * b[ 2]
+ + ((int128_t)a[ 7]) * b[ 1]
+ + ((int128_t)a[ 8]) * b[ 0];
+ int128_t t9 = ((int128_t)a[ 1]) * b[ 8]
+ + ((int128_t)a[ 2]) * b[ 7]
+ + ((int128_t)a[ 3]) * b[ 6]
+ + ((int128_t)a[ 4]) * b[ 5]
+ + ((int128_t)a[ 5]) * b[ 4]
+ + ((int128_t)a[ 6]) * b[ 3]
+ + ((int128_t)a[ 7]) * b[ 2]
+ + ((int128_t)a[ 8]) * b[ 1];
+ int128_t t10 = ((int128_t)a[ 2]) * b[ 8]
+ + ((int128_t)a[ 3]) * b[ 7]
+ + ((int128_t)a[ 4]) * b[ 6]
+ + ((int128_t)a[ 5]) * b[ 5]
+ + ((int128_t)a[ 6]) * b[ 4]
+ + ((int128_t)a[ 7]) * b[ 3]
+ + ((int128_t)a[ 8]) * b[ 2];
+ int128_t t11 = ((int128_t)a[ 3]) * b[ 8]
+ + ((int128_t)a[ 4]) * b[ 7]
+ + ((int128_t)a[ 5]) * b[ 6]
+ + ((int128_t)a[ 6]) * b[ 5]
+ + ((int128_t)a[ 7]) * b[ 4]
+ + ((int128_t)a[ 8]) * b[ 3];
+ int128_t t12 = ((int128_t)a[ 4]) * b[ 8]
+ + ((int128_t)a[ 5]) * b[ 7]
+ + ((int128_t)a[ 6]) * b[ 6]
+ + ((int128_t)a[ 7]) * b[ 5]
+ + ((int128_t)a[ 8]) * b[ 4];
+ int128_t t13 = ((int128_t)a[ 5]) * b[ 8]
+ + ((int128_t)a[ 6]) * b[ 7]
+ + ((int128_t)a[ 7]) * b[ 6]
+ + ((int128_t)a[ 8]) * b[ 5];
+ int128_t t14 = ((int128_t)a[ 6]) * b[ 8]
+ + ((int128_t)a[ 7]) * b[ 7]
+ + ((int128_t)a[ 8]) * b[ 6];
+ int128_t t15 = ((int128_t)a[ 7]) * b[ 8]
+ + ((int128_t)a[ 8]) * b[ 7];
+ int128_t t16 = ((int128_t)a[ 8]) * b[ 8];
+
+ t1 += t0 >> 57; r[ 0] = t0 & 0x1ffffffffffffffL;
+ t2 += t1 >> 57; r[ 1] = t1 & 0x1ffffffffffffffL;
+ t3 += t2 >> 57; r[ 2] = t2 & 0x1ffffffffffffffL;
+ t4 += t3 >> 57; r[ 3] = t3 & 0x1ffffffffffffffL;
+ t5 += t4 >> 57; r[ 4] = t4 & 0x1ffffffffffffffL;
+ t6 += t5 >> 57; r[ 5] = t5 & 0x1ffffffffffffffL;
+ t7 += t6 >> 57; r[ 6] = t6 & 0x1ffffffffffffffL;
+ t8 += t7 >> 57; r[ 7] = t7 & 0x1ffffffffffffffL;
+ t9 += t8 >> 57; r[ 8] = t8 & 0x1ffffffffffffffL;
+ t10 += t9 >> 57; r[ 9] = t9 & 0x1ffffffffffffffL;
+ t11 += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffL;
+ t12 += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffL;
+ t13 += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffL;
+ t14 += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffL;
+ t15 += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffL;
+ t16 += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffL;
+ r[17] = (sp_digit)(t16 >> 57);
+ r[16] = t16 & 0x1ffffffffffffffL;
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_9(sp_digit* r, const sp_digit* a)
+{
+ int128_t t0 = ((int128_t)a[ 0]) * a[ 0];
+ int128_t t1 = (((int128_t)a[ 0]) * a[ 1]) * 2;
+ int128_t t2 = (((int128_t)a[ 0]) * a[ 2]) * 2
+ + ((int128_t)a[ 1]) * a[ 1];
+ int128_t t3 = (((int128_t)a[ 0]) * a[ 3]
+ + ((int128_t)a[ 1]) * a[ 2]) * 2;
+ int128_t t4 = (((int128_t)a[ 0]) * a[ 4]
+ + ((int128_t)a[ 1]) * a[ 3]) * 2
+ + ((int128_t)a[ 2]) * a[ 2];
+ int128_t t5 = (((int128_t)a[ 0]) * a[ 5]
+ + ((int128_t)a[ 1]) * a[ 4]
+ + ((int128_t)a[ 2]) * a[ 3]) * 2;
+ int128_t t6 = (((int128_t)a[ 0]) * a[ 6]
+ + ((int128_t)a[ 1]) * a[ 5]
+ + ((int128_t)a[ 2]) * a[ 4]) * 2
+ + ((int128_t)a[ 3]) * a[ 3];
+ int128_t t7 = (((int128_t)a[ 0]) * a[ 7]
+ + ((int128_t)a[ 1]) * a[ 6]
+ + ((int128_t)a[ 2]) * a[ 5]
+ + ((int128_t)a[ 3]) * a[ 4]) * 2;
+ int128_t t8 = (((int128_t)a[ 0]) * a[ 8]
+ + ((int128_t)a[ 1]) * a[ 7]
+ + ((int128_t)a[ 2]) * a[ 6]
+ + ((int128_t)a[ 3]) * a[ 5]) * 2
+ + ((int128_t)a[ 4]) * a[ 4];
+ int128_t t9 = (((int128_t)a[ 1]) * a[ 8]
+ + ((int128_t)a[ 2]) * a[ 7]
+ + ((int128_t)a[ 3]) * a[ 6]
+ + ((int128_t)a[ 4]) * a[ 5]) * 2;
+ int128_t t10 = (((int128_t)a[ 2]) * a[ 8]
+ + ((int128_t)a[ 3]) * a[ 7]
+ + ((int128_t)a[ 4]) * a[ 6]) * 2
+ + ((int128_t)a[ 5]) * a[ 5];
+ int128_t t11 = (((int128_t)a[ 3]) * a[ 8]
+ + ((int128_t)a[ 4]) * a[ 7]
+ + ((int128_t)a[ 5]) * a[ 6]) * 2;
+ int128_t t12 = (((int128_t)a[ 4]) * a[ 8]
+ + ((int128_t)a[ 5]) * a[ 7]) * 2
+ + ((int128_t)a[ 6]) * a[ 6];
+ int128_t t13 = (((int128_t)a[ 5]) * a[ 8]
+ + ((int128_t)a[ 6]) * a[ 7]) * 2;
+ int128_t t14 = (((int128_t)a[ 6]) * a[ 8]) * 2
+ + ((int128_t)a[ 7]) * a[ 7];
+ int128_t t15 = (((int128_t)a[ 7]) * a[ 8]) * 2;
+ int128_t t16 = ((int128_t)a[ 8]) * a[ 8];
+
+ t1 += t0 >> 57; r[ 0] = t0 & 0x1ffffffffffffffL;
+ t2 += t1 >> 57; r[ 1] = t1 & 0x1ffffffffffffffL;
+ t3 += t2 >> 57; r[ 2] = t2 & 0x1ffffffffffffffL;
+ t4 += t3 >> 57; r[ 3] = t3 & 0x1ffffffffffffffL;
+ t5 += t4 >> 57; r[ 4] = t4 & 0x1ffffffffffffffL;
+ t6 += t5 >> 57; r[ 5] = t5 & 0x1ffffffffffffffL;
+ t7 += t6 >> 57; r[ 6] = t6 & 0x1ffffffffffffffL;
+ t8 += t7 >> 57; r[ 7] = t7 & 0x1ffffffffffffffL;
+ t9 += t8 >> 57; r[ 8] = t8 & 0x1ffffffffffffffL;
+ t10 += t9 >> 57; r[ 9] = t9 & 0x1ffffffffffffffL;
+ t11 += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffL;
+ t12 += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffL;
+ t13 += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffL;
+ t14 += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffL;
+ t15 += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffL;
+ t16 += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffL;
+ r[17] = (sp_digit)(t16 >> 57);
+ r[16] = t16 & 0x1ffffffffffffffL;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_add_9(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ r[ 0] = a[ 0] + b[ 0];
+ r[ 1] = a[ 1] + b[ 1];
+ r[ 2] = a[ 2] + b[ 2];
+ r[ 3] = a[ 3] + b[ 3];
+ r[ 4] = a[ 4] + b[ 4];
+ r[ 5] = a[ 5] + b[ 5];
+ r[ 6] = a[ 6] + b[ 6];
+ r[ 7] = a[ 7] + b[ 7];
+ r[ 8] = a[ 8] + b[ 8];
+
+ return 0;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_add_18(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 16; i += 8) {
+ r[i + 0] = a[i + 0] + b[i + 0];
+ r[i + 1] = a[i + 1] + b[i + 1];
+ r[i + 2] = a[i + 2] + b[i + 2];
+ r[i + 3] = a[i + 3] + b[i + 3];
+ r[i + 4] = a[i + 4] + b[i + 4];
+ r[i + 5] = a[i + 5] + b[i + 5];
+ r[i + 6] = a[i + 6] + b[i + 6];
+ r[i + 7] = a[i + 7] + b[i + 7];
+ }
+ r[16] = a[16] + b[16];
+ r[17] = a[17] + b[17];
+
+ return 0;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_sub_18(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 16; i += 8) {
+ r[i + 0] = a[i + 0] - b[i + 0];
+ r[i + 1] = a[i + 1] - b[i + 1];
+ r[i + 2] = a[i + 2] - b[i + 2];
+ r[i + 3] = a[i + 3] - b[i + 3];
+ r[i + 4] = a[i + 4] - b[i + 4];
+ r[i + 5] = a[i + 5] - b[i + 5];
+ r[i + 6] = a[i + 6] - b[i + 6];
+ r[i + 7] = a[i + 7] - b[i + 7];
+ }
+ r[16] = a[16] - b[16];
+ r[17] = a[17] - b[17];
+
+ return 0;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_18(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[18];
+ sp_digit* a1 = z1;
+ sp_digit b1[9];
+ sp_digit* z2 = r + 18;
+ (void)sp_2048_add_9(a1, a, &a[9]);
+ (void)sp_2048_add_9(b1, b, &b[9]);
+ sp_2048_mul_9(z2, &a[9], &b[9]);
+ sp_2048_mul_9(z0, a, b);
+ sp_2048_mul_9(z1, a1, b1);
+ (void)sp_2048_sub_18(z1, z1, z2);
+ (void)sp_2048_sub_18(z1, z1, z0);
+ (void)sp_2048_add_18(r + 9, r + 9, z1);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_18(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[18];
+ sp_digit* a1 = z1;
+ sp_digit* z2 = r + 18;
+ (void)sp_2048_add_9(a1, a, &a[9]);
+ sp_2048_sqr_9(z2, &a[9]);
+ sp_2048_sqr_9(z0, a);
+ sp_2048_sqr_9(z1, a1);
+ (void)sp_2048_sub_18(z1, z1, z2);
+ (void)sp_2048_sub_18(z1, z1, z0);
+ (void)sp_2048_add_18(r + 9, r + 9, z1);
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_add_36(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 32; i += 8) {
+ r[i + 0] = a[i + 0] + b[i + 0];
+ r[i + 1] = a[i + 1] + b[i + 1];
+ r[i + 2] = a[i + 2] + b[i + 2];
+ r[i + 3] = a[i + 3] + b[i + 3];
+ r[i + 4] = a[i + 4] + b[i + 4];
+ r[i + 5] = a[i + 5] + b[i + 5];
+ r[i + 6] = a[i + 6] + b[i + 6];
+ r[i + 7] = a[i + 7] + b[i + 7];
+ }
+ r[32] = a[32] + b[32];
+ r[33] = a[33] + b[33];
+ r[34] = a[34] + b[34];
+ r[35] = a[35] + b[35];
+
+ return 0;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_sub_36(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 32; i += 8) {
+ r[i + 0] = a[i + 0] - b[i + 0];
+ r[i + 1] = a[i + 1] - b[i + 1];
+ r[i + 2] = a[i + 2] - b[i + 2];
+ r[i + 3] = a[i + 3] - b[i + 3];
+ r[i + 4] = a[i + 4] - b[i + 4];
+ r[i + 5] = a[i + 5] - b[i + 5];
+ r[i + 6] = a[i + 6] - b[i + 6];
+ r[i + 7] = a[i + 7] - b[i + 7];
+ }
+ r[32] = a[32] - b[32];
+ r[33] = a[33] - b[33];
+ r[34] = a[34] - b[34];
+ r[35] = a[35] - b[35];
+
+ return 0;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_36(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[36];
+ sp_digit* a1 = z1;
+ sp_digit b1[18];
+ sp_digit* z2 = r + 36;
+ (void)sp_2048_add_18(a1, a, &a[18]);
+ (void)sp_2048_add_18(b1, b, &b[18]);
+ sp_2048_mul_18(z2, &a[18], &b[18]);
+ sp_2048_mul_18(z0, a, b);
+ sp_2048_mul_18(z1, a1, b1);
+ (void)sp_2048_sub_36(z1, z1, z2);
+ (void)sp_2048_sub_36(z1, z1, z0);
+ (void)sp_2048_add_36(r + 18, r + 18, z1);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_36(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[36];
+ sp_digit* a1 = z1;
+ sp_digit* z2 = r + 36;
+ (void)sp_2048_add_18(a1, a, &a[18]);
+ sp_2048_sqr_18(z2, &a[18]);
+ sp_2048_sqr_18(z0, a);
+ sp_2048_sqr_18(z1, a1);
+ (void)sp_2048_sub_36(z1, z1, z2);
+ (void)sp_2048_sub_36(z1, z1, z0);
+ (void)sp_2048_add_36(r + 18, r + 18, z1);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_add_36(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 36; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_sub_36(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 36; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_36(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i, j, k;
+ int128_t c;
+
+ c = ((int128_t)a[35]) * b[35];
+ r[71] = (sp_digit)(c >> 57);
+ c = (c & 0x1ffffffffffffffL) << 57;
+ for (k = 69; k >= 0; k--) {
+ for (i = 35; i >= 0; i--) {
+ j = k - i;
+ if (j >= 36) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int128_t)a[i]) * b[j];
+ }
+ r[k + 2] += c >> 114;
+ r[k + 1] = (c >> 57) & 0x1ffffffffffffffL;
+ c = (c & 0x1ffffffffffffffL) << 57;
+ }
+ r[0] = (sp_digit)(c >> 57);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_36(sp_digit* r, const sp_digit* a)
+{
+ int i, j, k;
+ int128_t c;
+
+ c = ((int128_t)a[35]) * a[35];
+ r[71] = (sp_digit)(c >> 57);
+ c = (c & 0x1ffffffffffffffL) << 57;
+ for (k = 69; k >= 0; k--) {
+ for (i = 35; i >= 0; i--) {
+ j = k - i;
+ if (j >= 36 || i <= j) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int128_t)a[i]) * a[j] * 2;
+ }
+ if (i == j) {
+ c += ((int128_t)a[i]) * a[i];
+ }
+
+ r[k + 2] += c >> 114;
+ r[k + 1] = (c >> 57) & 0x1ffffffffffffffL;
+ c = (c & 0x1ffffffffffffffL) << 57;
+ }
+ r[0] = (sp_digit)(c >> 57);
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_add_18(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 18; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_sub_18(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 18; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_18(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i, j, k;
+ int128_t c;
+
+ c = ((int128_t)a[17]) * b[17];
+ r[35] = (sp_digit)(c >> 57);
+ c = (c & 0x1ffffffffffffffL) << 57;
+ for (k = 33; k >= 0; k--) {
+ for (i = 17; i >= 0; i--) {
+ j = k - i;
+ if (j >= 18) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int128_t)a[i]) * b[j];
+ }
+ r[k + 2] += c >> 114;
+ r[k + 1] = (c >> 57) & 0x1ffffffffffffffL;
+ c = (c & 0x1ffffffffffffffL) << 57;
+ }
+ r[0] = (sp_digit)(c >> 57);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_18(sp_digit* r, const sp_digit* a)
+{
+ int i, j, k;
+ int128_t c;
+
+ c = ((int128_t)a[17]) * a[17];
+ r[35] = (sp_digit)(c >> 57);
+ c = (c & 0x1ffffffffffffffL) << 57;
+ for (k = 33; k >= 0; k--) {
+ for (i = 17; i >= 0; i--) {
+ j = k - i;
+ if (j >= 18 || i <= j) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int128_t)a[i]) * a[j] * 2;
+ }
+ if (i == j) {
+ c += ((int128_t)a[i]) * a[i];
+ }
+
+ r[k + 2] += c >> 114;
+ r[k + 1] = (c >> 57) & 0x1ffffffffffffffL;
+ c = (c & 0x1ffffffffffffffL) << 57;
+ }
+ r[0] = (sp_digit)(c >> 57);
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**64 */
+ x &= 0x1ffffffffffffffL;
+
+ /* rho = -1/m mod b */
+ *rho = (1L << 57) - x;
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_2048_mul_d_36(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int128_t tb = b;
+ int128_t t = 0;
+ int i;
+
+ for (i = 0; i < 36; i++) {
+ t += tb * a[i];
+ r[i] = t & 0x1ffffffffffffffL;
+ t >>= 57;
+ }
+ r[36] = (sp_digit)t;
+#else
+ int128_t tb = b;
+ int128_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffL;
+ for (i = 0; i < 32; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL);
+ t[2] = tb * a[i+2];
+ r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL);
+ t[3] = tb * a[i+3];
+ r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffL);
+ t[4] = tb * a[i+4];
+ r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffL);
+ t[5] = tb * a[i+5];
+ r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffL);
+ t[6] = tb * a[i+6];
+ r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffL);
+ t[7] = tb * a[i+7];
+ r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffL);
+ t[0] = tb * a[i+8];
+ r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffL);
+ }
+ t[1] = tb * a[33];
+ r[33] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL);
+ t[2] = tb * a[34];
+ r[34] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL);
+ t[3] = tb * a[35];
+ r[35] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffL);
+ r[36] = (sp_digit)(t[3] >> 57);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 2048 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_2048_mont_norm_18(sp_digit* r, const sp_digit* m)
+{
+ /* Set r = 2^n - 1. */
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<17; i++) {
+ r[i] = 0x1ffffffffffffffL;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 16; i += 8) {
+ r[i + 0] = 0x1ffffffffffffffL;
+ r[i + 1] = 0x1ffffffffffffffL;
+ r[i + 2] = 0x1ffffffffffffffL;
+ r[i + 3] = 0x1ffffffffffffffL;
+ r[i + 4] = 0x1ffffffffffffffL;
+ r[i + 5] = 0x1ffffffffffffffL;
+ r[i + 6] = 0x1ffffffffffffffL;
+ r[i + 7] = 0x1ffffffffffffffL;
+ }
+ r[16] = 0x1ffffffffffffffL;
+#endif
+ r[17] = 0x7fffffffffffffL;
+
+ /* r = (2^n - 1) mod n */
+ (void)sp_2048_sub_18(r, r, m);
+
+ /* Add one so r = 2^n mod m */
+ r[0] += 1;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_2048_cmp_18(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=17; i>=0; i--) {
+ r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#else
+ int i;
+
+ r |= (a[17] - b[17]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[16] - b[16]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ for (i = 8; i >= 0; i -= 8) {
+ r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#endif /* WOLFSSL_SP_SMALL */
+
+ return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static void sp_2048_cond_sub_18(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 18; i++) {
+ r[i] = a[i] - (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 16; i += 8) {
+ r[i + 0] = a[i + 0] - (b[i + 0] & m);
+ r[i + 1] = a[i + 1] - (b[i + 1] & m);
+ r[i + 2] = a[i + 2] - (b[i + 2] & m);
+ r[i + 3] = a[i + 3] - (b[i + 3] & m);
+ r[i + 4] = a[i + 4] - (b[i + 4] & m);
+ r[i + 5] = a[i + 5] - (b[i + 5] & m);
+ r[i + 6] = a[i + 6] - (b[i + 6] & m);
+ r[i + 7] = a[i + 7] - (b[i + 7] & m);
+ }
+ r[16] = a[16] - (b[16] & m);
+ r[17] = a[17] - (b[17] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_2048_mul_add_18(sp_digit* r, const sp_digit* a,
+ const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int128_t tb = b;
+ int128_t t = 0;
+ int i;
+
+ for (i = 0; i < 18; i++) {
+ t += (tb * a[i]) + r[i];
+ r[i] = t & 0x1ffffffffffffffL;
+ t >>= 57;
+ }
+ r[18] += t;
+#else
+ int128_t tb = b;
+ int128_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL);
+ for (i = 0; i < 16; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
+ t[2] = tb * a[i+2];
+ r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
+ t[3] = tb * a[i+3];
+ r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL));
+ t[4] = tb * a[i+4];
+ r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL));
+ t[5] = tb * a[i+5];
+ r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL));
+ t[6] = tb * a[i+6];
+ r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL));
+ t[7] = tb * a[i+7];
+ r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL));
+ t[0] = tb * a[i+8];
+ r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL));
+ }
+ t[1] = tb * a[17]; r[17] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
+ r[18] += (sp_digit)(t[1] >> 57);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 57.
+ *
+ * a Array of sp_digit to normalize.
+ */
+static void sp_2048_norm_18(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ for (i = 0; i < 17; i++) {
+ a[i+1] += a[i] >> 57;
+ a[i] &= 0x1ffffffffffffffL;
+ }
+#else
+ int i;
+ for (i = 0; i < 16; i += 8) {
+ a[i+1] += a[i+0] >> 57; a[i+0] &= 0x1ffffffffffffffL;
+ a[i+2] += a[i+1] >> 57; a[i+1] &= 0x1ffffffffffffffL;
+ a[i+3] += a[i+2] >> 57; a[i+2] &= 0x1ffffffffffffffL;
+ a[i+4] += a[i+3] >> 57; a[i+3] &= 0x1ffffffffffffffL;
+ a[i+5] += a[i+4] >> 57; a[i+4] &= 0x1ffffffffffffffL;
+ a[i+6] += a[i+5] >> 57; a[i+5] &= 0x1ffffffffffffffL;
+ a[i+7] += a[i+6] >> 57; a[i+6] &= 0x1ffffffffffffffL;
+ a[i+8] += a[i+7] >> 57; a[i+7] &= 0x1ffffffffffffffL;
+ a[i+9] += a[i+8] >> 57; a[i+8] &= 0x1ffffffffffffffL;
+ }
+ a[16+1] += a[16] >> 57;
+ a[16] &= 0x1ffffffffffffffL;
+#endif
+}
+
+/* Shift the result in the high 1024 bits down to the bottom.
+ *
+ * r A single precision number.
+ * a A single precision number.
+ */
+static void sp_2048_mont_shift_18(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ word64 n;
+
+ n = a[17] >> 55;
+ for (i = 0; i < 17; i++) {
+ n += (word64)a[18 + i] << 2;
+ r[i] = n & 0x1ffffffffffffffL;
+ n >>= 57;
+ }
+ n += (word64)a[35] << 2;
+ r[17] = n;
+#else
+ word64 n;
+ int i;
+
+ n = (word64)a[17];
+ n = n >> 55U;
+ for (i = 0; i < 16; i += 8) {
+ n += (word64)a[i+18] << 2U; r[i+0] = n & 0x1ffffffffffffffUL; n >>= 57U;
+ n += (word64)a[i+19] << 2U; r[i+1] = n & 0x1ffffffffffffffUL; n >>= 57U;
+ n += (word64)a[i+20] << 2U; r[i+2] = n & 0x1ffffffffffffffUL; n >>= 57U;
+ n += (word64)a[i+21] << 2U; r[i+3] = n & 0x1ffffffffffffffUL; n >>= 57U;
+ n += (word64)a[i+22] << 2U; r[i+4] = n & 0x1ffffffffffffffUL; n >>= 57U;
+ n += (word64)a[i+23] << 2U; r[i+5] = n & 0x1ffffffffffffffUL; n >>= 57U;
+ n += (word64)a[i+24] << 2U; r[i+6] = n & 0x1ffffffffffffffUL; n >>= 57U;
+ n += (word64)a[i+25] << 2U; r[i+7] = n & 0x1ffffffffffffffUL; n >>= 57U;
+ }
+ n += (word64)a[34] << 2U; r[16] = n & 0x1ffffffffffffffUL; n >>= 57U;
+ n += (word64)a[35] << 2U; r[17] = n;
+#endif /* WOLFSSL_SP_SMALL */
+ XMEMSET(&r[18], 0, sizeof(*r) * 18U);
+}
+
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_2048_mont_reduce_18(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+ int i;
+ sp_digit mu;
+
+ sp_2048_norm_18(a + 18);
+
+ for (i=0; i<17; i++) {
+ mu = (a[i] * mp) & 0x1ffffffffffffffL;
+ sp_2048_mul_add_18(a+i, m, mu);
+ a[i+1] += a[i] >> 57;
+ }
+ mu = (a[i] * mp) & 0x7fffffffffffffL;
+ sp_2048_mul_add_18(a+i, m, mu);
+ a[i+1] += a[i] >> 57;
+ a[i] &= 0x1ffffffffffffffL;
+
+ sp_2048_mont_shift_18(a, a);
+ sp_2048_cond_sub_18(a, a, m, 0 - (((a[17] >> 55) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_2048_norm_18(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_mul_18(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_2048_mul_18(r, a, b);
+ sp_2048_mont_reduce_18(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_sqr_18(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_2048_sqr_18(r, a);
+ sp_2048_mont_reduce_18(r, m, mp);
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_2048_mul_d_18(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int128_t tb = b;
+ int128_t t = 0;
+ int i;
+
+ for (i = 0; i < 18; i++) {
+ t += tb * a[i];
+ r[i] = t & 0x1ffffffffffffffL;
+ t >>= 57;
+ }
+ r[18] = (sp_digit)t;
+#else
+ int128_t tb = b;
+ int128_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffL;
+ for (i = 0; i < 16; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL);
+ t[2] = tb * a[i+2];
+ r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL);
+ t[3] = tb * a[i+3];
+ r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffL);
+ t[4] = tb * a[i+4];
+ r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffL);
+ t[5] = tb * a[i+5];
+ r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffL);
+ t[6] = tb * a[i+6];
+ r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffL);
+ t[7] = tb * a[i+7];
+ r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffL);
+ t[0] = tb * a[i+8];
+ r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffL);
+ }
+ t[1] = tb * a[17];
+ r[17] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL);
+ r[18] = (sp_digit)(t[1] >> 57);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static void sp_2048_cond_add_18(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 18; i++) {
+ r[i] = a[i] + (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 16; i += 8) {
+ r[i + 0] = a[i + 0] + (b[i + 0] & m);
+ r[i + 1] = a[i + 1] + (b[i + 1] & m);
+ r[i + 2] = a[i + 2] + (b[i + 2] & m);
+ r[i + 3] = a[i + 3] + (b[i + 3] & m);
+ r[i + 4] = a[i + 4] + (b[i + 4] & m);
+ r[i + 5] = a[i + 5] + (b[i + 5] & m);
+ r[i + 6] = a[i + 6] + (b[i + 6] & m);
+ r[i + 7] = a[i + 7] + (b[i + 7] & m);
+ }
+ r[16] = a[16] + (b[16] & m);
+ r[17] = a[17] + (b[17] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_sub_18(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 18; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#endif
+#ifdef WOLFSSL_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_add_18(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 18; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#endif
+#ifdef WOLFSSL_SP_DIV_64
+static WC_INLINE sp_digit sp_2048_div_word_18(sp_digit d1, sp_digit d0,
+ sp_digit dv)
+{
+ sp_digit d, r, t;
+
+ /* All 57 bits from d1 and top 6 bits from d0. */
+ d = (d1 << 6) | (d0 >> 51);
+ r = d / dv;
+ d -= r * dv;
+ /* Up to 7 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 45) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 13 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 39) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 19 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 33) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 25 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 27) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 31 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 21) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 37 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 15) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 43 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 9) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 49 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 3) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 55 bits in r */
+ /* Remaining 3 bits from d0. */
+ r <<= 3;
+ d <<= 3;
+ d |= d0 & ((1 << 3) - 1);
+ t = d / dv;
+ r += t;
+
+ return r;
+}
+#endif /* WOLFSSL_SP_DIV_64 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Number to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_2048_div_18(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ int i;
+#ifndef WOLFSSL_SP_DIV_64
+ int128_t d1;
+#endif
+ sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* td;
+#else
+ sp_digit t1d[36], t2d[18 + 1];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 18 + 1), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = td;
+ t2 = td + 2 * 18;
+#else
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ dv = d[17];
+ XMEMCPY(t1, a, sizeof(*t1) * 2U * 18U);
+ for (i=17; i>=0; i--) {
+ t1[18 + i] += t1[18 + i - 1] >> 57;
+ t1[18 + i - 1] &= 0x1ffffffffffffffL;
+#ifndef WOLFSSL_SP_DIV_64
+ d1 = t1[18 + i];
+ d1 <<= 57;
+ d1 += t1[18 + i - 1];
+ r1 = (sp_digit)(d1 / dv);
+#else
+ r1 = sp_2048_div_word_18(t1[18 + i], t1[18 + i - 1], dv);
+#endif
+
+ sp_2048_mul_d_18(t2, d, r1);
+ (void)sp_2048_sub_18(&t1[i], &t1[i], t2);
+ t1[18 + i] -= t2[18];
+ t1[18 + i] += t1[18 + i - 1] >> 57;
+ t1[18 + i - 1] &= 0x1ffffffffffffffL;
+ r1 = (((-t1[18 + i]) << 57) - t1[18 + i - 1]) / dv;
+ r1++;
+ sp_2048_mul_d_18(t2, d, r1);
+ (void)sp_2048_add_18(&t1[i], &t1[i], t2);
+ t1[18 + i] += t1[18 + i - 1] >> 57;
+ t1[18 + i - 1] &= 0x1ffffffffffffffL;
+ }
+ t1[18 - 1] += t1[18 - 2] >> 57;
+ t1[18 - 2] &= 0x1ffffffffffffffL;
+ r1 = t1[18 - 1] / dv;
+
+ sp_2048_mul_d_18(t2, d, r1);
+ (void)sp_2048_sub_18(t1, t1, t2);
+ XMEMCPY(r, t1, sizeof(*r) * 2U * 18U);
+ for (i=0; i<16; i++) {
+ r[i+1] += r[i] >> 57;
+ r[i] &= 0x1ffffffffffffffL;
+ }
+ sp_2048_cond_add_18(r, r, d, 0 - ((r[17] < 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_2048_mod_18(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_2048_div_18(a, m, NULL, r);
+}
+
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_18(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+ const sp_digit* m, int reduceA)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* td;
+ sp_digit* t[3];
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 18 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(td, 0, sizeof(*td) * 3U * 18U * 2U);
+
+ norm = t[0] = td;
+ t[1] = &td[18 * 2];
+ t[2] = &td[2 * 18 * 2];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_18(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_2048_mod_18(t[1], a, m);
+ }
+ else {
+ XMEMCPY(t[1], a, sizeof(sp_digit) * 18U);
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_2048_mul_18(t[1], t[1], norm);
+ err = sp_2048_mod_18(t[1], t[1], m);
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 57;
+ c = bits % 57;
+ n = e[i--] << (57 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 57;
+ }
+
+ y = (n >> 56) & 1;
+ n <<= 1;
+
+ sp_2048_mont_mul_18(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])),
+ sizeof(*t[2]) * 18 * 2);
+ sp_2048_mont_sqr_18(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2],
+ sizeof(*t[2]) * 18 * 2);
+ }
+
+ sp_2048_mont_reduce_18(t[0], m, mp);
+ n = sp_2048_cmp_18(t[0], m);
+ sp_2048_cond_sub_18(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(*r) * 18 * 2);
+
+ }
+
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+
+ return err;
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[3][36];
+#else
+ sp_digit* td;
+ sp_digit* t[3];
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 18 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ t[0] = td;
+ t[1] = &td[18 * 2];
+ t[2] = &td[2 * 18 * 2];
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_18(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_2048_mod_18(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_2048_mul_18(t[1], t[1], norm);
+ err = sp_2048_mod_18(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_2048_mul_18(t[1], a, norm);
+ err = sp_2048_mod_18(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 57;
+ c = bits % 57;
+ n = e[i--] << (57 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 57;
+ }
+
+ y = (n >> 56) & 1;
+ n <<= 1;
+
+ sp_2048_mont_mul_18(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
+ sp_2048_mont_sqr_18(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
+ }
+
+ sp_2048_mont_reduce_18(t[0], m, mp);
+ n = sp_2048_cmp_18(t[0], m);
+ sp_2048_cond_sub_18(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(t[0]));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][36];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit rt[36];
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 36, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 36;
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_18(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_2048_mod_18(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_2048_mul_18(t[1], t[1], norm);
+ err = sp_2048_mod_18(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_2048_mul_18(t[1], a, norm);
+ err = sp_2048_mod_18(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_18(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_18(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_18(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_18(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_18(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_18(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_18(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_18(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_18(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_18(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_18(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_18(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_18(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_18(t[15], t[ 8], t[ 7], m, mp);
+ sp_2048_mont_sqr_18(t[16], t[ 8], m, mp);
+ sp_2048_mont_mul_18(t[17], t[ 9], t[ 8], m, mp);
+ sp_2048_mont_sqr_18(t[18], t[ 9], m, mp);
+ sp_2048_mont_mul_18(t[19], t[10], t[ 9], m, mp);
+ sp_2048_mont_sqr_18(t[20], t[10], m, mp);
+ sp_2048_mont_mul_18(t[21], t[11], t[10], m, mp);
+ sp_2048_mont_sqr_18(t[22], t[11], m, mp);
+ sp_2048_mont_mul_18(t[23], t[12], t[11], m, mp);
+ sp_2048_mont_sqr_18(t[24], t[12], m, mp);
+ sp_2048_mont_mul_18(t[25], t[13], t[12], m, mp);
+ sp_2048_mont_sqr_18(t[26], t[13], m, mp);
+ sp_2048_mont_mul_18(t[27], t[14], t[13], m, mp);
+ sp_2048_mont_sqr_18(t[28], t[14], m, mp);
+ sp_2048_mont_mul_18(t[29], t[15], t[14], m, mp);
+ sp_2048_mont_sqr_18(t[30], t[15], m, mp);
+ sp_2048_mont_mul_18(t[31], t[16], t[15], m, mp);
+
+ bits = ((bits + 4) / 5) * 5;
+ i = ((bits + 56) / 57) - 1;
+ c = bits % 57;
+ if (c == 0) {
+ c = 57;
+ }
+ if (i < 18) {
+ n = e[i--] << (64 - c);
+ }
+ else {
+ n = 0;
+ i--;
+ }
+ if (c < 5) {
+ n |= e[i--] << (7 - c);
+ c += 57;
+ }
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ XMEMCPY(rt, t[y], sizeof(rt));
+ for (; i>=0 || c>=5; ) {
+ if (c < 5) {
+ n |= e[i--] << (7 - c);
+ c += 57;
+ }
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+
+ sp_2048_mont_sqr_18(rt, rt, m, mp);
+ sp_2048_mont_sqr_18(rt, rt, m, mp);
+ sp_2048_mont_sqr_18(rt, rt, m, mp);
+ sp_2048_mont_sqr_18(rt, rt, m, mp);
+ sp_2048_mont_sqr_18(rt, rt, m, mp);
+
+ sp_2048_mont_mul_18(rt, rt, t[y], m, mp);
+ }
+
+ sp_2048_mont_reduce_18(rt, m, mp);
+ n = sp_2048_cmp_18(rt, m);
+ sp_2048_cond_sub_18(rt, rt, m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, rt, sizeof(rt));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#endif
+}
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 2048 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_2048_mont_norm_36(sp_digit* r, const sp_digit* m)
+{
+ /* Set r = 2^n - 1. */
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<35; i++) {
+ r[i] = 0x1ffffffffffffffL;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 32; i += 8) {
+ r[i + 0] = 0x1ffffffffffffffL;
+ r[i + 1] = 0x1ffffffffffffffL;
+ r[i + 2] = 0x1ffffffffffffffL;
+ r[i + 3] = 0x1ffffffffffffffL;
+ r[i + 4] = 0x1ffffffffffffffL;
+ r[i + 5] = 0x1ffffffffffffffL;
+ r[i + 6] = 0x1ffffffffffffffL;
+ r[i + 7] = 0x1ffffffffffffffL;
+ }
+ r[32] = 0x1ffffffffffffffL;
+ r[33] = 0x1ffffffffffffffL;
+ r[34] = 0x1ffffffffffffffL;
+#endif
+ r[35] = 0x1fffffffffffffL;
+
+ /* r = (2^n - 1) mod n */
+ (void)sp_2048_sub_36(r, r, m);
+
+ /* Add one so r = 2^n mod m */
+ r[0] += 1;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_2048_cmp_36(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=35; i>=0; i--) {
+ r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#else
+ int i;
+
+ r |= (a[35] - b[35]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[34] - b[34]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[33] - b[33]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[32] - b[32]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ for (i = 24; i >= 0; i -= 8) {
+ r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#endif /* WOLFSSL_SP_SMALL */
+
+ return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static void sp_2048_cond_sub_36(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 36; i++) {
+ r[i] = a[i] - (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 32; i += 8) {
+ r[i + 0] = a[i + 0] - (b[i + 0] & m);
+ r[i + 1] = a[i + 1] - (b[i + 1] & m);
+ r[i + 2] = a[i + 2] - (b[i + 2] & m);
+ r[i + 3] = a[i + 3] - (b[i + 3] & m);
+ r[i + 4] = a[i + 4] - (b[i + 4] & m);
+ r[i + 5] = a[i + 5] - (b[i + 5] & m);
+ r[i + 6] = a[i + 6] - (b[i + 6] & m);
+ r[i + 7] = a[i + 7] - (b[i + 7] & m);
+ }
+ r[32] = a[32] - (b[32] & m);
+ r[33] = a[33] - (b[33] & m);
+ r[34] = a[34] - (b[34] & m);
+ r[35] = a[35] - (b[35] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_2048_mul_add_36(sp_digit* r, const sp_digit* a,
+ const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int128_t tb = b;
+ int128_t t = 0;
+ int i;
+
+ for (i = 0; i < 36; i++) {
+ t += (tb * a[i]) + r[i];
+ r[i] = t & 0x1ffffffffffffffL;
+ t >>= 57;
+ }
+ r[36] += t;
+#else
+ int128_t tb = b;
+ int128_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL);
+ for (i = 0; i < 32; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
+ t[2] = tb * a[i+2];
+ r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
+ t[3] = tb * a[i+3];
+ r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL));
+ t[4] = tb * a[i+4];
+ r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL));
+ t[5] = tb * a[i+5];
+ r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL));
+ t[6] = tb * a[i+6];
+ r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL));
+ t[7] = tb * a[i+7];
+ r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL));
+ t[0] = tb * a[i+8];
+ r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL));
+ }
+ t[1] = tb * a[33]; r[33] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
+ t[2] = tb * a[34]; r[34] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
+ t[3] = tb * a[35]; r[35] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL));
+ r[36] += (sp_digit)(t[3] >> 57);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 57.
+ *
+ * a Array of sp_digit to normalize.
+ */
+static void sp_2048_norm_36(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ for (i = 0; i < 35; i++) {
+ a[i+1] += a[i] >> 57;
+ a[i] &= 0x1ffffffffffffffL;
+ }
+#else
+ int i;
+ for (i = 0; i < 32; i += 8) {
+ a[i+1] += a[i+0] >> 57; a[i+0] &= 0x1ffffffffffffffL;
+ a[i+2] += a[i+1] >> 57; a[i+1] &= 0x1ffffffffffffffL;
+ a[i+3] += a[i+2] >> 57; a[i+2] &= 0x1ffffffffffffffL;
+ a[i+4] += a[i+3] >> 57; a[i+3] &= 0x1ffffffffffffffL;
+ a[i+5] += a[i+4] >> 57; a[i+4] &= 0x1ffffffffffffffL;
+ a[i+6] += a[i+5] >> 57; a[i+5] &= 0x1ffffffffffffffL;
+ a[i+7] += a[i+6] >> 57; a[i+6] &= 0x1ffffffffffffffL;
+ a[i+8] += a[i+7] >> 57; a[i+7] &= 0x1ffffffffffffffL;
+ a[i+9] += a[i+8] >> 57; a[i+8] &= 0x1ffffffffffffffL;
+ }
+ a[32+1] += a[32] >> 57;
+ a[32] &= 0x1ffffffffffffffL;
+ a[33+1] += a[33] >> 57;
+ a[33] &= 0x1ffffffffffffffL;
+ a[34+1] += a[34] >> 57;
+ a[34] &= 0x1ffffffffffffffL;
+#endif
+}
+
+/* Shift the result in the high 2048 bits down to the bottom.
+ *
+ * r A single precision number.
+ * a A single precision number.
+ */
+static void sp_2048_mont_shift_36(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ sp_digit n, s;
+
+ s = a[36];
+ n = a[35] >> 53;
+ for (i = 0; i < 35; i++) {
+ n += (s & 0x1ffffffffffffffL) << 4;
+ r[i] = n & 0x1ffffffffffffffL;
+ n >>= 57;
+ s = a[37 + i] + (s >> 57);
+ }
+ n += s << 4;
+ r[35] = n;
+#else
+ sp_digit n, s;
+ int i;
+
+ s = a[36]; n = a[35] >> 53;
+ for (i = 0; i < 32; i += 8) {
+ n += (s & 0x1ffffffffffffffL) << 4; r[i+0] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[i+37] + (s >> 57);
+ n += (s & 0x1ffffffffffffffL) << 4; r[i+1] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[i+38] + (s >> 57);
+ n += (s & 0x1ffffffffffffffL) << 4; r[i+2] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[i+39] + (s >> 57);
+ n += (s & 0x1ffffffffffffffL) << 4; r[i+3] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[i+40] + (s >> 57);
+ n += (s & 0x1ffffffffffffffL) << 4; r[i+4] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[i+41] + (s >> 57);
+ n += (s & 0x1ffffffffffffffL) << 4; r[i+5] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[i+42] + (s >> 57);
+ n += (s & 0x1ffffffffffffffL) << 4; r[i+6] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[i+43] + (s >> 57);
+ n += (s & 0x1ffffffffffffffL) << 4; r[i+7] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[i+44] + (s >> 57);
+ }
+ n += (s & 0x1ffffffffffffffL) << 4; r[32] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[69] + (s >> 57);
+ n += (s & 0x1ffffffffffffffL) << 4; r[33] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[70] + (s >> 57);
+ n += (s & 0x1ffffffffffffffL) << 4; r[34] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[71] + (s >> 57);
+ n += s << 4; r[35] = n;
+#endif /* WOLFSSL_SP_SMALL */
+ XMEMSET(&r[36], 0, sizeof(*r) * 36U);
+}
+
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_2048_mont_reduce_36(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+ int i;
+ sp_digit mu;
+
+ sp_2048_norm_36(a + 36);
+
+#ifdef WOLFSSL_SP_DH
+ if (mp != 1) {
+ for (i=0; i<35; i++) {
+ mu = (a[i] * mp) & 0x1ffffffffffffffL;
+ sp_2048_mul_add_36(a+i, m, mu);
+ a[i+1] += a[i] >> 57;
+ }
+ mu = (a[i] * mp) & 0x1fffffffffffffL;
+ sp_2048_mul_add_36(a+i, m, mu);
+ a[i+1] += a[i] >> 57;
+ a[i] &= 0x1ffffffffffffffL;
+ }
+ else {
+ for (i=0; i<35; i++) {
+ mu = a[i] & 0x1ffffffffffffffL;
+ sp_2048_mul_add_36(a+i, m, mu);
+ a[i+1] += a[i] >> 57;
+ }
+ mu = a[i] & 0x1fffffffffffffL;
+ sp_2048_mul_add_36(a+i, m, mu);
+ a[i+1] += a[i] >> 57;
+ a[i] &= 0x1ffffffffffffffL;
+ }
+#else
+ for (i=0; i<35; i++) {
+ mu = (a[i] * mp) & 0x1ffffffffffffffL;
+ sp_2048_mul_add_36(a+i, m, mu);
+ a[i+1] += a[i] >> 57;
+ }
+ mu = (a[i] * mp) & 0x1fffffffffffffL;
+ sp_2048_mul_add_36(a+i, m, mu);
+ a[i+1] += a[i] >> 57;
+ a[i] &= 0x1ffffffffffffffL;
+#endif
+
+ sp_2048_mont_shift_36(a, a);
+ sp_2048_cond_sub_36(a, a, m, 0 - (((a[35] >> 53) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_2048_norm_36(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_mul_36(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_2048_mul_36(r, a, b);
+ sp_2048_mont_reduce_36(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_sqr_36(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_2048_sqr_36(r, a);
+ sp_2048_mont_reduce_36(r, m, mp);
+}
+
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static void sp_2048_cond_add_36(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 36; i++) {
+ r[i] = a[i] + (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 32; i += 8) {
+ r[i + 0] = a[i + 0] + (b[i + 0] & m);
+ r[i + 1] = a[i + 1] + (b[i + 1] & m);
+ r[i + 2] = a[i + 2] + (b[i + 2] & m);
+ r[i + 3] = a[i + 3] + (b[i + 3] & m);
+ r[i + 4] = a[i + 4] + (b[i + 4] & m);
+ r[i + 5] = a[i + 5] + (b[i + 5] & m);
+ r[i + 6] = a[i + 6] + (b[i + 6] & m);
+ r[i + 7] = a[i + 7] + (b[i + 7] & m);
+ }
+ r[32] = a[32] + (b[32] & m);
+ r[33] = a[33] + (b[33] & m);
+ r[34] = a[34] + (b[34] & m);
+ r[35] = a[35] + (b[35] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_sub_36(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 36; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#endif
+#ifdef WOLFSSL_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_2048_add_36(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 36; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#endif
+#ifdef WOLFSSL_SP_DIV_64
+static WC_INLINE sp_digit sp_2048_div_word_36(sp_digit d1, sp_digit d0,
+ sp_digit dv)
+{
+ sp_digit d, r, t;
+
+ /* All 57 bits from d1 and top 6 bits from d0. */
+ d = (d1 << 6) | (d0 >> 51);
+ r = d / dv;
+ d -= r * dv;
+ /* Up to 7 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 45) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 13 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 39) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 19 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 33) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 25 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 27) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 31 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 21) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 37 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 15) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 43 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 9) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 49 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 3) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 55 bits in r */
+ /* Remaining 3 bits from d0. */
+ r <<= 3;
+ d <<= 3;
+ d |= d0 & ((1 << 3) - 1);
+ t = d / dv;
+ r += t;
+
+ return r;
+}
+#endif /* WOLFSSL_SP_DIV_64 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Number to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ int i;
+#ifndef WOLFSSL_SP_DIV_64
+ int128_t d1;
+#endif
+ sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* td;
+#else
+ sp_digit t1d[72], t2d[36 + 1];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 36 + 1), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = td;
+ t2 = td + 2 * 36;
+#else
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ dv = d[35];
+ XMEMCPY(t1, a, sizeof(*t1) * 2U * 36U);
+ for (i=35; i>=0; i--) {
+ t1[36 + i] += t1[36 + i - 1] >> 57;
+ t1[36 + i - 1] &= 0x1ffffffffffffffL;
+#ifndef WOLFSSL_SP_DIV_64
+ d1 = t1[36 + i];
+ d1 <<= 57;
+ d1 += t1[36 + i - 1];
+ r1 = (sp_digit)(d1 / dv);
+#else
+ r1 = sp_2048_div_word_36(t1[36 + i], t1[36 + i - 1], dv);
+#endif
+
+ sp_2048_mul_d_36(t2, d, r1);
+ (void)sp_2048_sub_36(&t1[i], &t1[i], t2);
+ t1[36 + i] -= t2[36];
+ t1[36 + i] += t1[36 + i - 1] >> 57;
+ t1[36 + i - 1] &= 0x1ffffffffffffffL;
+ r1 = (((-t1[36 + i]) << 57) - t1[36 + i - 1]) / dv;
+ r1++;
+ sp_2048_mul_d_36(t2, d, r1);
+ (void)sp_2048_add_36(&t1[i], &t1[i], t2);
+ t1[36 + i] += t1[36 + i - 1] >> 57;
+ t1[36 + i - 1] &= 0x1ffffffffffffffL;
+ }
+ t1[36 - 1] += t1[36 - 2] >> 57;
+ t1[36 - 2] &= 0x1ffffffffffffffL;
+ r1 = t1[36 - 1] / dv;
+
+ sp_2048_mul_d_36(t2, d, r1);
+ (void)sp_2048_sub_36(t1, t1, t2);
+ XMEMCPY(r, t1, sizeof(*r) * 2U * 36U);
+ for (i=0; i<34; i++) {
+ r[i+1] += r[i] >> 57;
+ r[i] &= 0x1ffffffffffffffL;
+ }
+ sp_2048_cond_add_36(r, r, d, 0 - ((r[35] < 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_2048_mod_36(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_2048_div_36(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+ defined(WOLFSSL_HAVE_SP_DH)
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+ const sp_digit* m, int reduceA)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* td;
+ sp_digit* t[3];
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 36 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(td, 0, sizeof(*td) * 3U * 36U * 2U);
+
+ norm = t[0] = td;
+ t[1] = &td[36 * 2];
+ t[2] = &td[2 * 36 * 2];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_36(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_2048_mod_36(t[1], a, m);
+ }
+ else {
+ XMEMCPY(t[1], a, sizeof(sp_digit) * 36U);
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_2048_mul_36(t[1], t[1], norm);
+ err = sp_2048_mod_36(t[1], t[1], m);
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 57;
+ c = bits % 57;
+ n = e[i--] << (57 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 57;
+ }
+
+ y = (n >> 56) & 1;
+ n <<= 1;
+
+ sp_2048_mont_mul_36(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])),
+ sizeof(*t[2]) * 36 * 2);
+ sp_2048_mont_sqr_36(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2],
+ sizeof(*t[2]) * 36 * 2);
+ }
+
+ sp_2048_mont_reduce_36(t[0], m, mp);
+ n = sp_2048_cmp_36(t[0], m);
+ sp_2048_cond_sub_36(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(*r) * 36 * 2);
+
+ }
+
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+
+ return err;
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[3][72];
+#else
+ sp_digit* td;
+ sp_digit* t[3];
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 36 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ t[0] = td;
+ t[1] = &td[36 * 2];
+ t[2] = &td[2 * 36 * 2];
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_36(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_2048_mod_36(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_2048_mul_36(t[1], t[1], norm);
+ err = sp_2048_mod_36(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_2048_mul_36(t[1], a, norm);
+ err = sp_2048_mod_36(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 57;
+ c = bits % 57;
+ n = e[i--] << (57 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 57;
+ }
+
+ y = (n >> 56) & 1;
+ n <<= 1;
+
+ sp_2048_mont_mul_36(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
+ sp_2048_mont_sqr_36(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
+ }
+
+ sp_2048_mont_reduce_36(t[0], m, mp);
+ n = sp_2048_cmp_36(t[0], m);
+ sp_2048_cond_sub_36(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(t[0]));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][72];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit rt[72];
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 72, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 72;
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_36(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_2048_mod_36(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_2048_mul_36(t[1], t[1], norm);
+ err = sp_2048_mod_36(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_2048_mul_36(t[1], a, norm);
+ err = sp_2048_mod_36(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_36(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_36(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_36(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_36(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_36(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_36(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_36(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_36(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_36(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_36(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_36(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_36(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_36(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_36(t[15], t[ 8], t[ 7], m, mp);
+ sp_2048_mont_sqr_36(t[16], t[ 8], m, mp);
+ sp_2048_mont_mul_36(t[17], t[ 9], t[ 8], m, mp);
+ sp_2048_mont_sqr_36(t[18], t[ 9], m, mp);
+ sp_2048_mont_mul_36(t[19], t[10], t[ 9], m, mp);
+ sp_2048_mont_sqr_36(t[20], t[10], m, mp);
+ sp_2048_mont_mul_36(t[21], t[11], t[10], m, mp);
+ sp_2048_mont_sqr_36(t[22], t[11], m, mp);
+ sp_2048_mont_mul_36(t[23], t[12], t[11], m, mp);
+ sp_2048_mont_sqr_36(t[24], t[12], m, mp);
+ sp_2048_mont_mul_36(t[25], t[13], t[12], m, mp);
+ sp_2048_mont_sqr_36(t[26], t[13], m, mp);
+ sp_2048_mont_mul_36(t[27], t[14], t[13], m, mp);
+ sp_2048_mont_sqr_36(t[28], t[14], m, mp);
+ sp_2048_mont_mul_36(t[29], t[15], t[14], m, mp);
+ sp_2048_mont_sqr_36(t[30], t[15], m, mp);
+ sp_2048_mont_mul_36(t[31], t[16], t[15], m, mp);
+
+ bits = ((bits + 4) / 5) * 5;
+ i = ((bits + 56) / 57) - 1;
+ c = bits % 57;
+ if (c == 0) {
+ c = 57;
+ }
+ if (i < 36) {
+ n = e[i--] << (64 - c);
+ }
+ else {
+ n = 0;
+ i--;
+ }
+ if (c < 5) {
+ n |= e[i--] << (7 - c);
+ c += 57;
+ }
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ XMEMCPY(rt, t[y], sizeof(rt));
+ for (; i>=0 || c>=5; ) {
+ if (c < 5) {
+ n |= e[i--] << (7 - c);
+ c += 57;
+ }
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+
+ sp_2048_mont_sqr_36(rt, rt, m, mp);
+ sp_2048_mont_sqr_36(rt, rt, m, mp);
+ sp_2048_mont_sqr_36(rt, rt, m, mp);
+ sp_2048_mont_sqr_36(rt, rt, m, mp);
+ sp_2048_mont_sqr_36(rt, rt, m, mp);
+
+ sp_2048_mont_mul_36(rt, rt, t[y], m, mp);
+ }
+
+ sp_2048_mont_reduce_36(rt, m, mp);
+ n = sp_2048_cmp_36(rt, m);
+ sp_2048_cond_sub_36(rt, rt, m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, rt, sizeof(rt));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#endif
+}
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */
+ /* WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+ sp_digit* norm;
+ sp_digit e[1] = {0};
+ sp_digit mp;
+ int i;
+ int err = MP_OKAY;
+
+ if (*outLen < 256U) {
+ err = MP_TO_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(em) > 57) {
+ err = MP_READ_E;
+ }
+ if (inLen > 256U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 36 * 2;
+ m = r + 36 * 2;
+ norm = r;
+
+ sp_2048_from_bin(a, 36, in, inLen);
+#if DIGIT_BIT >= 57
+ e[0] = (sp_digit)em->dp[0];
+#else
+ e[0] = (sp_digit)em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(m, 36, mm);
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_36(norm, m);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_mul_36(a, a, norm);
+ err = sp_2048_mod_36(a, a, m);
+ }
+ if (err == MP_OKAY) {
+ for (i=56; i>=0; i--) {
+ if ((e[0] >> i) != 0) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 36 * 2);
+ for (i--; i>=0; i--) {
+ sp_2048_mont_sqr_36(r, r, m, mp);
+
+ if (((e[0] >> i) & 1) == 1) {
+ sp_2048_mont_mul_36(r, r, a, m, mp);
+ }
+ }
+ sp_2048_mont_reduce_36(r, m, mp);
+ mp = sp_2048_cmp_36(r, m);
+ sp_2048_cond_sub_36(r, r, m, ((mp < 0) ?
+ (sp_digit)1 : (sp_digit)0)- 1);
+
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit ad[72], md[36], rd[72];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+ sp_digit e[1] = {0};
+ int err = MP_OKAY;
+
+ if (*outLen < 256U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(em) > 57) {
+ err = MP_READ_E;
+ }
+ if (inLen > 256U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 36 * 2;
+ m = r + 36 * 2;
+ }
+#else
+ a = ad;
+ m = md;
+ r = rd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_2048_from_bin(a, 36, in, inLen);
+#if DIGIT_BIT >= 57
+ e[0] = (sp_digit)em->dp[0];
+#else
+ e[0] = (sp_digit)em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(m, 36, mm);
+
+ if (e[0] == 0x3) {
+ sp_2048_sqr_36(r, a);
+ err = sp_2048_mod_36(r, r, m);
+ if (err == MP_OKAY) {
+ sp_2048_mul_36(r, a, r);
+ err = sp_2048_mod_36(r, r, m);
+ }
+ }
+ else {
+ sp_digit* norm = r;
+ int i;
+ sp_digit mp;
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_36(norm, m);
+
+ sp_2048_mul_36(a, a, norm);
+ err = sp_2048_mod_36(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i=56; i>=0; i--) {
+ if ((e[0] >> i) != 0) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 72U);
+ for (i--; i>=0; i--) {
+ sp_2048_mont_sqr_36(r, r, m, mp);
+
+ if (((e[0] >> i) & 1) == 1) {
+ sp_2048_mont_mul_36(r, r, a, m, mp);
+ }
+ }
+ sp_2048_mont_reduce_36(r, m, mp);
+ mp = sp_2048_cmp_36(r, m);
+ sp_2048_cond_sub_36(r, r, m, ((mp < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#endif
+
+ return err;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM)
+#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* a;
+ sp_digit* d = NULL;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 256U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 2048) {
+ err = MP_READ_E;
+ }
+ if (inLen > 256) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = d + 36;
+ m = a + 72;
+ r = a;
+
+ sp_2048_from_bin(a, 36, in, inLen);
+ sp_2048_from_mp(d, 36, dm);
+ sp_2048_from_mp(m, 36, mm);
+ err = sp_2048_mod_exp_36(r, a, d, 2048, m, 0);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 36);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+ sp_digit a[72], d[36], m[36];
+ sp_digit* r = a;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 256U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 2048) {
+ err = MP_READ_E;
+ }
+ if (inLen > 256U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_bin(a, 36, in, inLen);
+ sp_2048_from_mp(d, 36, dm);
+ sp_2048_from_mp(m, 36, mm);
+ err = sp_2048_mod_exp_36(r, a, d, 2048, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+ XMEMSET(d, 0, sizeof(sp_digit) * 36);
+
+ return err;
+#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
+#else
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* t = NULL;
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* dq;
+ sp_digit* qi;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 256U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (inLen > 256) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 36 * 2;
+ q = p + 18;
+ qi = dq = dp = q + 18;
+ tmpa = qi + 18;
+ tmpb = tmpa + 36;
+
+ r = t + 36;
+
+ sp_2048_from_bin(a, 36, in, inLen);
+ sp_2048_from_mp(p, 18, pm);
+ sp_2048_from_mp(q, 18, qm);
+ sp_2048_from_mp(dp, 18, dpm);
+ err = sp_2048_mod_exp_18(tmpa, a, dp, 1024, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(dq, 18, dqm);
+ err = sp_2048_mod_exp_18(tmpb, a, dq, 1024, q, 1);
+ }
+ if (err == MP_OKAY) {
+ (void)sp_2048_sub_18(tmpa, tmpa, tmpb);
+ sp_2048_cond_add_18(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[17] >> 63));
+ sp_2048_cond_add_18(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[17] >> 63));
+
+ sp_2048_from_mp(qi, 18, qim);
+ sp_2048_mul_18(tmpa, tmpa, qi);
+ err = sp_2048_mod_18(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mul_18(tmpa, q, tmpa);
+ (void)sp_2048_add_36(r, tmpb, tmpa);
+ sp_2048_norm_36(r);
+
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 18 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+ sp_digit a[36 * 2];
+ sp_digit p[18], q[18], dp[18], dq[18], qi[18];
+ sp_digit tmpa[36], tmpb[36];
+ sp_digit* r = a;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 256U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (inLen > 256U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_bin(a, 36, in, inLen);
+ sp_2048_from_mp(p, 18, pm);
+ sp_2048_from_mp(q, 18, qm);
+ sp_2048_from_mp(dp, 18, dpm);
+ sp_2048_from_mp(dq, 18, dqm);
+ sp_2048_from_mp(qi, 18, qim);
+
+ err = sp_2048_mod_exp_18(tmpa, a, dp, 1024, p, 1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_2048_mod_exp_18(tmpb, a, dq, 1024, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ (void)sp_2048_sub_18(tmpa, tmpa, tmpb);
+ sp_2048_cond_add_18(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[17] >> 63));
+ sp_2048_cond_add_18(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[17] >> 63));
+ sp_2048_mul_18(tmpa, tmpa, qi);
+ err = sp_2048_mod_18(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mul_18(tmpa, tmpa, q);
+ (void)sp_2048_add_36(r, tmpb, tmpa);
+ sp_2048_norm_36(r);
+
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+ XMEMSET(tmpa, 0, sizeof(tmpa));
+ XMEMSET(tmpb, 0, sizeof(tmpb));
+ XMEMSET(p, 0, sizeof(p));
+ XMEMSET(q, 0, sizeof(q));
+ XMEMSET(dp, 0, sizeof(dp));
+ XMEMSET(dq, 0, sizeof(dq));
+ XMEMSET(qi, 0, sizeof(qi));
+
+ return err;
+#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+}
+
+#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_2048_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 57
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 36);
+ r->used = 36;
+ mp_clamp(r);
+#elif DIGIT_BIT < 57
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 36; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 57) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 57 - s;
+ }
+ r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 36; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 57 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 57 - s;
+ }
+ else {
+ s += 57;
+ }
+ }
+ r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int err = MP_OKAY;
+ sp_digit* d = NULL;
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 2048) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 36 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 36 * 2;
+ m = e + 36;
+ r = b;
+
+ sp_2048_from_mp(b, 36, base);
+ sp_2048_from_mp(e, 36, exp);
+ sp_2048_from_mp(m, 36, mod);
+
+ err = sp_2048_mod_exp_36(r, b, e, mp_count_bits(exp), m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_2048_to_mp(r, res);
+ }
+
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 36U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit bd[72], ed[36], md[36];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 2048) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 36 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 36 * 2;
+ m = e + 36;
+ r = b;
+ }
+#else
+ r = b = bd;
+ e = ed;
+ m = md;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 36, base);
+ sp_2048_from_mp(e, 36, exp);
+ sp_2048_from_mp(m, 36, mod);
+
+ err = sp_2048_mod_exp_36(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_2048_to_mp(r, res);
+ }
+
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 36U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 36U);
+#endif
+
+ return err;
+#endif
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_2048
+SP_NOINLINE static void sp_2048_lshift_36(sp_digit* r, sp_digit* a, byte n)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ r[36] = a[35] >> (57 - n);
+ for (i=35; i>0; i--) {
+ r[i] = ((a[i] << n) | (a[i-1] >> (57 - n))) & 0x1ffffffffffffffL;
+ }
+#else
+ sp_int_digit s, t;
+
+ s = (sp_int_digit)a[35];
+ r[36] = s >> (57U - n);
+ s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]);
+ r[35] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]);
+ r[34] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]);
+ r[33] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]);
+ r[32] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]);
+ r[31] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]);
+ r[30] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]);
+ r[29] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]);
+ r[28] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]);
+ r[27] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]);
+ r[26] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]);
+ r[25] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]);
+ r[24] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]);
+ r[23] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]);
+ r[22] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]);
+ r[21] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]);
+ r[20] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]);
+ r[19] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]);
+ r[18] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]);
+ r[17] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]);
+ r[16] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]);
+ r[15] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]);
+ r[14] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]);
+ r[13] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]);
+ r[12] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]);
+ r[11] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]);
+ r[10] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]);
+ r[9] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]);
+ r[8] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]);
+ r[7] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]);
+ r[6] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]);
+ r[5] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]);
+ r[4] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]);
+ r[3] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]);
+ r[2] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]);
+ r[1] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+#endif
+ r[0] = (a[0] << n) & 0x1ffffffffffffffL;
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_2_36(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[72];
+ sp_digit td[37];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 109, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 72;
+ XMEMSET(td, 0, sizeof(sp_digit) * 109);
+#else
+ norm = nd;
+ tmp = td;
+ XMEMSET(td, 0, sizeof(td));
+#endif
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_36(norm, m);
+
+ bits = ((bits + 4) / 5) * 5;
+ i = ((bits + 56) / 57) - 1;
+ c = bits % 57;
+ if (c == 0) {
+ c = 57;
+ }
+ if (i < 36) {
+ n = e[i--] << (64 - c);
+ }
+ else {
+ n = 0;
+ i--;
+ }
+ if (c < 5) {
+ n |= e[i--] << (7 - c);
+ c += 57;
+ }
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ sp_2048_lshift_36(r, norm, y);
+ for (; i>=0 || c>=5; ) {
+ if (c < 5) {
+ n |= e[i--] << (7 - c);
+ c += 57;
+ }
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+
+ sp_2048_mont_sqr_36(r, r, m, mp);
+ sp_2048_mont_sqr_36(r, r, m, mp);
+ sp_2048_mont_sqr_36(r, r, m, mp);
+ sp_2048_mont_sqr_36(r, r, m, mp);
+ sp_2048_mont_sqr_36(r, r, m, mp);
+
+ sp_2048_lshift_36(r, r, y);
+ sp_2048_mul_d_36(tmp, norm, (r[36] << 4) + (r[35] >> 53));
+ r[36] = 0;
+ r[35] &= 0x1fffffffffffffL;
+ (void)sp_2048_add_36(r, r, tmp);
+ sp_2048_norm_36(r);
+ o = sp_2048_cmp_36(r, m);
+ sp_2048_cond_sub_36(r, r, m, ((o < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ }
+
+ sp_2048_mont_reduce_36(r, m, mp);
+ n = sp_2048_cmp_36(r, m);
+ sp_2048_cond_sub_36(r, r, m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+#endif /* HAVE_FFDHE_2048 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int err = MP_OKAY;
+ sp_digit* d = NULL;
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ word32 i;
+
+ if (mp_count_bits(base) > 2048) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 256) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 36 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 36 * 2;
+ m = e + 36;
+ r = b;
+
+ sp_2048_from_mp(b, 36, base);
+ sp_2048_from_bin(e, 36, exp, expLen);
+ sp_2048_from_mp(m, 36, mod);
+
+ #ifdef HAVE_FFDHE_2048
+ if (base->used == 1 && base->dp[0] == 2 &&
+ (m[35] >> 21) == 0xffffffffL) {
+ err = sp_2048_mod_exp_2_36(r, e, expLen * 8, m);
+ }
+ else
+ #endif
+ err = sp_2048_mod_exp_36(r, b, e, expLen * 8, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ for (i=0; i<256 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+ }
+
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 36U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit bd[72], ed[36], md[36];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ word32 i;
+ int err = MP_OKAY;
+
+ if (mp_count_bits(base) > 2048) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 256U) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+#ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 36 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 36 * 2;
+ m = e + 36;
+ r = b;
+ }
+#else
+ r = b = bd;
+ e = ed;
+ m = md;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 36, base);
+ sp_2048_from_bin(e, 36, exp, expLen);
+ sp_2048_from_mp(m, 36, mod);
+
+ #ifdef HAVE_FFDHE_2048
+ if (base->used == 1 && base->dp[0] == 2U &&
+ (m[35] >> 21) == 0xffffffffL) {
+ err = sp_2048_mod_exp_2_36(r, e, expLen * 8U, m);
+ }
+ else {
+ #endif
+ err = sp_2048_mod_exp_36(r, b, e, expLen * 8U, m, 0);
+ #ifdef HAVE_FFDHE_2048
+ }
+ #endif
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ for (i=0; i<256U && out[i] == 0U; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 36U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 36U);
+#endif
+
+ return err;
+#endif
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int err = MP_OKAY;
+ sp_digit* d = NULL;
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 1024) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 1024) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 1024) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 18 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 18 * 2;
+ m = e + 18;
+ r = b;
+
+ sp_2048_from_mp(b, 18, base);
+ sp_2048_from_mp(e, 18, exp);
+ sp_2048_from_mp(m, 18, mod);
+
+ err = sp_2048_mod_exp_18(r, b, e, mp_count_bits(exp), m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(r + 18, 0, sizeof(*r) * 18U);
+ err = sp_2048_to_mp(r, res);
+ }
+
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 18U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit bd[36], ed[18], md[18];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 1024) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 1024) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 1024) {
+ err = MP_READ_E;
+ }
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 18 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 18 * 2;
+ m = e + 18;
+ r = b;
+ }
+#else
+ r = b = bd;
+ e = ed;
+ m = md;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 18, base);
+ sp_2048_from_mp(e, 18, exp);
+ sp_2048_from_mp(m, 18, mod);
+
+ err = sp_2048_mod_exp_18(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(r + 18, 0, sizeof(*r) * 18U);
+ err = sp_2048_to_mp(r, res);
+ }
+
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 18U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 18U);
+#endif
+
+ return err;
+#endif
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_2048 */
+
+#ifndef WOLFSSL_SP_NO_3072
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 49U) {
+ r[j] &= 0x1ffffffffffffffL;
+ s = 57U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 57
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 57
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0x1ffffffffffffffL;
+ s = 57U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 57U) <= (word32)DIGIT_BIT) {
+ s += 57U;
+ r[j] &= 0x1ffffffffffffffL;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 57) {
+ r[j] &= 0x1ffffffffffffffL;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 57 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 384
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_3072_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ for (i=0; i<53; i++) {
+ r[i+1] += r[i] >> 57;
+ r[i] &= 0x1ffffffffffffffL;
+ }
+ j = 3072 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<54 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 57) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 57);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_9(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int128_t t0 = ((int128_t)a[ 0]) * b[ 0];
+ int128_t t1 = ((int128_t)a[ 0]) * b[ 1]
+ + ((int128_t)a[ 1]) * b[ 0];
+ int128_t t2 = ((int128_t)a[ 0]) * b[ 2]
+ + ((int128_t)a[ 1]) * b[ 1]
+ + ((int128_t)a[ 2]) * b[ 0];
+ int128_t t3 = ((int128_t)a[ 0]) * b[ 3]
+ + ((int128_t)a[ 1]) * b[ 2]
+ + ((int128_t)a[ 2]) * b[ 1]
+ + ((int128_t)a[ 3]) * b[ 0];
+ int128_t t4 = ((int128_t)a[ 0]) * b[ 4]
+ + ((int128_t)a[ 1]) * b[ 3]
+ + ((int128_t)a[ 2]) * b[ 2]
+ + ((int128_t)a[ 3]) * b[ 1]
+ + ((int128_t)a[ 4]) * b[ 0];
+ int128_t t5 = ((int128_t)a[ 0]) * b[ 5]
+ + ((int128_t)a[ 1]) * b[ 4]
+ + ((int128_t)a[ 2]) * b[ 3]
+ + ((int128_t)a[ 3]) * b[ 2]
+ + ((int128_t)a[ 4]) * b[ 1]
+ + ((int128_t)a[ 5]) * b[ 0];
+ int128_t t6 = ((int128_t)a[ 0]) * b[ 6]
+ + ((int128_t)a[ 1]) * b[ 5]
+ + ((int128_t)a[ 2]) * b[ 4]
+ + ((int128_t)a[ 3]) * b[ 3]
+ + ((int128_t)a[ 4]) * b[ 2]
+ + ((int128_t)a[ 5]) * b[ 1]
+ + ((int128_t)a[ 6]) * b[ 0];
+ int128_t t7 = ((int128_t)a[ 0]) * b[ 7]
+ + ((int128_t)a[ 1]) * b[ 6]
+ + ((int128_t)a[ 2]) * b[ 5]
+ + ((int128_t)a[ 3]) * b[ 4]
+ + ((int128_t)a[ 4]) * b[ 3]
+ + ((int128_t)a[ 5]) * b[ 2]
+ + ((int128_t)a[ 6]) * b[ 1]
+ + ((int128_t)a[ 7]) * b[ 0];
+ int128_t t8 = ((int128_t)a[ 0]) * b[ 8]
+ + ((int128_t)a[ 1]) * b[ 7]
+ + ((int128_t)a[ 2]) * b[ 6]
+ + ((int128_t)a[ 3]) * b[ 5]
+ + ((int128_t)a[ 4]) * b[ 4]
+ + ((int128_t)a[ 5]) * b[ 3]
+ + ((int128_t)a[ 6]) * b[ 2]
+ + ((int128_t)a[ 7]) * b[ 1]
+ + ((int128_t)a[ 8]) * b[ 0];
+ int128_t t9 = ((int128_t)a[ 1]) * b[ 8]
+ + ((int128_t)a[ 2]) * b[ 7]
+ + ((int128_t)a[ 3]) * b[ 6]
+ + ((int128_t)a[ 4]) * b[ 5]
+ + ((int128_t)a[ 5]) * b[ 4]
+ + ((int128_t)a[ 6]) * b[ 3]
+ + ((int128_t)a[ 7]) * b[ 2]
+ + ((int128_t)a[ 8]) * b[ 1];
+ int128_t t10 = ((int128_t)a[ 2]) * b[ 8]
+ + ((int128_t)a[ 3]) * b[ 7]
+ + ((int128_t)a[ 4]) * b[ 6]
+ + ((int128_t)a[ 5]) * b[ 5]
+ + ((int128_t)a[ 6]) * b[ 4]
+ + ((int128_t)a[ 7]) * b[ 3]
+ + ((int128_t)a[ 8]) * b[ 2];
+ int128_t t11 = ((int128_t)a[ 3]) * b[ 8]
+ + ((int128_t)a[ 4]) * b[ 7]
+ + ((int128_t)a[ 5]) * b[ 6]
+ + ((int128_t)a[ 6]) * b[ 5]
+ + ((int128_t)a[ 7]) * b[ 4]
+ + ((int128_t)a[ 8]) * b[ 3];
+ int128_t t12 = ((int128_t)a[ 4]) * b[ 8]
+ + ((int128_t)a[ 5]) * b[ 7]
+ + ((int128_t)a[ 6]) * b[ 6]
+ + ((int128_t)a[ 7]) * b[ 5]
+ + ((int128_t)a[ 8]) * b[ 4];
+ int128_t t13 = ((int128_t)a[ 5]) * b[ 8]
+ + ((int128_t)a[ 6]) * b[ 7]
+ + ((int128_t)a[ 7]) * b[ 6]
+ + ((int128_t)a[ 8]) * b[ 5];
+ int128_t t14 = ((int128_t)a[ 6]) * b[ 8]
+ + ((int128_t)a[ 7]) * b[ 7]
+ + ((int128_t)a[ 8]) * b[ 6];
+ int128_t t15 = ((int128_t)a[ 7]) * b[ 8]
+ + ((int128_t)a[ 8]) * b[ 7];
+ int128_t t16 = ((int128_t)a[ 8]) * b[ 8];
+
+ t1 += t0 >> 57; r[ 0] = t0 & 0x1ffffffffffffffL;
+ t2 += t1 >> 57; r[ 1] = t1 & 0x1ffffffffffffffL;
+ t3 += t2 >> 57; r[ 2] = t2 & 0x1ffffffffffffffL;
+ t4 += t3 >> 57; r[ 3] = t3 & 0x1ffffffffffffffL;
+ t5 += t4 >> 57; r[ 4] = t4 & 0x1ffffffffffffffL;
+ t6 += t5 >> 57; r[ 5] = t5 & 0x1ffffffffffffffL;
+ t7 += t6 >> 57; r[ 6] = t6 & 0x1ffffffffffffffL;
+ t8 += t7 >> 57; r[ 7] = t7 & 0x1ffffffffffffffL;
+ t9 += t8 >> 57; r[ 8] = t8 & 0x1ffffffffffffffL;
+ t10 += t9 >> 57; r[ 9] = t9 & 0x1ffffffffffffffL;
+ t11 += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffL;
+ t12 += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffL;
+ t13 += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffL;
+ t14 += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffL;
+ t15 += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffL;
+ t16 += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffL;
+ r[17] = (sp_digit)(t16 >> 57);
+ r[16] = t16 & 0x1ffffffffffffffL;
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_9(sp_digit* r, const sp_digit* a)
+{
+ int128_t t0 = ((int128_t)a[ 0]) * a[ 0];
+ int128_t t1 = (((int128_t)a[ 0]) * a[ 1]) * 2;
+ int128_t t2 = (((int128_t)a[ 0]) * a[ 2]) * 2
+ + ((int128_t)a[ 1]) * a[ 1];
+ int128_t t3 = (((int128_t)a[ 0]) * a[ 3]
+ + ((int128_t)a[ 1]) * a[ 2]) * 2;
+ int128_t t4 = (((int128_t)a[ 0]) * a[ 4]
+ + ((int128_t)a[ 1]) * a[ 3]) * 2
+ + ((int128_t)a[ 2]) * a[ 2];
+ int128_t t5 = (((int128_t)a[ 0]) * a[ 5]
+ + ((int128_t)a[ 1]) * a[ 4]
+ + ((int128_t)a[ 2]) * a[ 3]) * 2;
+ int128_t t6 = (((int128_t)a[ 0]) * a[ 6]
+ + ((int128_t)a[ 1]) * a[ 5]
+ + ((int128_t)a[ 2]) * a[ 4]) * 2
+ + ((int128_t)a[ 3]) * a[ 3];
+ int128_t t7 = (((int128_t)a[ 0]) * a[ 7]
+ + ((int128_t)a[ 1]) * a[ 6]
+ + ((int128_t)a[ 2]) * a[ 5]
+ + ((int128_t)a[ 3]) * a[ 4]) * 2;
+ int128_t t8 = (((int128_t)a[ 0]) * a[ 8]
+ + ((int128_t)a[ 1]) * a[ 7]
+ + ((int128_t)a[ 2]) * a[ 6]
+ + ((int128_t)a[ 3]) * a[ 5]) * 2
+ + ((int128_t)a[ 4]) * a[ 4];
+ int128_t t9 = (((int128_t)a[ 1]) * a[ 8]
+ + ((int128_t)a[ 2]) * a[ 7]
+ + ((int128_t)a[ 3]) * a[ 6]
+ + ((int128_t)a[ 4]) * a[ 5]) * 2;
+ int128_t t10 = (((int128_t)a[ 2]) * a[ 8]
+ + ((int128_t)a[ 3]) * a[ 7]
+ + ((int128_t)a[ 4]) * a[ 6]) * 2
+ + ((int128_t)a[ 5]) * a[ 5];
+ int128_t t11 = (((int128_t)a[ 3]) * a[ 8]
+ + ((int128_t)a[ 4]) * a[ 7]
+ + ((int128_t)a[ 5]) * a[ 6]) * 2;
+ int128_t t12 = (((int128_t)a[ 4]) * a[ 8]
+ + ((int128_t)a[ 5]) * a[ 7]) * 2
+ + ((int128_t)a[ 6]) * a[ 6];
+ int128_t t13 = (((int128_t)a[ 5]) * a[ 8]
+ + ((int128_t)a[ 6]) * a[ 7]) * 2;
+ int128_t t14 = (((int128_t)a[ 6]) * a[ 8]) * 2
+ + ((int128_t)a[ 7]) * a[ 7];
+ int128_t t15 = (((int128_t)a[ 7]) * a[ 8]) * 2;
+ int128_t t16 = ((int128_t)a[ 8]) * a[ 8];
+
+ t1 += t0 >> 57; r[ 0] = t0 & 0x1ffffffffffffffL;
+ t2 += t1 >> 57; r[ 1] = t1 & 0x1ffffffffffffffL;
+ t3 += t2 >> 57; r[ 2] = t2 & 0x1ffffffffffffffL;
+ t4 += t3 >> 57; r[ 3] = t3 & 0x1ffffffffffffffL;
+ t5 += t4 >> 57; r[ 4] = t4 & 0x1ffffffffffffffL;
+ t6 += t5 >> 57; r[ 5] = t5 & 0x1ffffffffffffffL;
+ t7 += t6 >> 57; r[ 6] = t6 & 0x1ffffffffffffffL;
+ t8 += t7 >> 57; r[ 7] = t7 & 0x1ffffffffffffffL;
+ t9 += t8 >> 57; r[ 8] = t8 & 0x1ffffffffffffffL;
+ t10 += t9 >> 57; r[ 9] = t9 & 0x1ffffffffffffffL;
+ t11 += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffL;
+ t12 += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffL;
+ t13 += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffL;
+ t14 += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffL;
+ t15 += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffL;
+ t16 += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffL;
+ r[17] = (sp_digit)(t16 >> 57);
+ r[16] = t16 & 0x1ffffffffffffffL;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_add_9(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ r[ 0] = a[ 0] + b[ 0];
+ r[ 1] = a[ 1] + b[ 1];
+ r[ 2] = a[ 2] + b[ 2];
+ r[ 3] = a[ 3] + b[ 3];
+ r[ 4] = a[ 4] + b[ 4];
+ r[ 5] = a[ 5] + b[ 5];
+ r[ 6] = a[ 6] + b[ 6];
+ r[ 7] = a[ 7] + b[ 7];
+ r[ 8] = a[ 8] + b[ 8];
+
+ return 0;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_add_18(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 16; i += 8) {
+ r[i + 0] = a[i + 0] + b[i + 0];
+ r[i + 1] = a[i + 1] + b[i + 1];
+ r[i + 2] = a[i + 2] + b[i + 2];
+ r[i + 3] = a[i + 3] + b[i + 3];
+ r[i + 4] = a[i + 4] + b[i + 4];
+ r[i + 5] = a[i + 5] + b[i + 5];
+ r[i + 6] = a[i + 6] + b[i + 6];
+ r[i + 7] = a[i + 7] + b[i + 7];
+ }
+ r[16] = a[16] + b[16];
+ r[17] = a[17] + b[17];
+
+ return 0;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_sub_18(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 16; i += 8) {
+ r[i + 0] = a[i + 0] - b[i + 0];
+ r[i + 1] = a[i + 1] - b[i + 1];
+ r[i + 2] = a[i + 2] - b[i + 2];
+ r[i + 3] = a[i + 3] - b[i + 3];
+ r[i + 4] = a[i + 4] - b[i + 4];
+ r[i + 5] = a[i + 5] - b[i + 5];
+ r[i + 6] = a[i + 6] - b[i + 6];
+ r[i + 7] = a[i + 7] - b[i + 7];
+ }
+ r[16] = a[16] - b[16];
+ r[17] = a[17] - b[17];
+
+ return 0;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_18(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[18];
+ sp_digit* a1 = z1;
+ sp_digit b1[9];
+ sp_digit* z2 = r + 18;
+ (void)sp_3072_add_9(a1, a, &a[9]);
+ (void)sp_3072_add_9(b1, b, &b[9]);
+ sp_3072_mul_9(z2, &a[9], &b[9]);
+ sp_3072_mul_9(z0, a, b);
+ sp_3072_mul_9(z1, a1, b1);
+ (void)sp_3072_sub_18(z1, z1, z2);
+ (void)sp_3072_sub_18(z1, z1, z0);
+ (void)sp_3072_add_18(r + 9, r + 9, z1);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_18(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[18];
+ sp_digit* a1 = z1;
+ sp_digit* z2 = r + 18;
+ (void)sp_3072_add_9(a1, a, &a[9]);
+ sp_3072_sqr_9(z2, &a[9]);
+ sp_3072_sqr_9(z0, a);
+ sp_3072_sqr_9(z1, a1);
+ (void)sp_3072_sub_18(z1, z1, z2);
+ (void)sp_3072_sub_18(z1, z1, z0);
+ (void)sp_3072_add_18(r + 9, r + 9, z1);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_sub_36(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 32; i += 8) {
+ r[i + 0] = a[i + 0] - b[i + 0];
+ r[i + 1] = a[i + 1] - b[i + 1];
+ r[i + 2] = a[i + 2] - b[i + 2];
+ r[i + 3] = a[i + 3] - b[i + 3];
+ r[i + 4] = a[i + 4] - b[i + 4];
+ r[i + 5] = a[i + 5] - b[i + 5];
+ r[i + 6] = a[i + 6] - b[i + 6];
+ r[i + 7] = a[i + 7] - b[i + 7];
+ }
+ r[32] = a[32] - b[32];
+ r[33] = a[33] - b[33];
+ r[34] = a[34] - b[34];
+ r[35] = a[35] - b[35];
+
+ return 0;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_add_36(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 32; i += 8) {
+ r[i + 0] = a[i + 0] + b[i + 0];
+ r[i + 1] = a[i + 1] + b[i + 1];
+ r[i + 2] = a[i + 2] + b[i + 2];
+ r[i + 3] = a[i + 3] + b[i + 3];
+ r[i + 4] = a[i + 4] + b[i + 4];
+ r[i + 5] = a[i + 5] + b[i + 5];
+ r[i + 6] = a[i + 6] + b[i + 6];
+ r[i + 7] = a[i + 7] + b[i + 7];
+ }
+ r[32] = a[32] + b[32];
+ r[33] = a[33] + b[33];
+ r[34] = a[34] + b[34];
+ r[35] = a[35] + b[35];
+
+ return 0;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_54(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit p0[36];
+ sp_digit p1[36];
+ sp_digit p2[36];
+ sp_digit p3[36];
+ sp_digit p4[36];
+ sp_digit p5[36];
+ sp_digit t0[36];
+ sp_digit t1[36];
+ sp_digit t2[36];
+ sp_digit a0[18];
+ sp_digit a1[18];
+ sp_digit a2[18];
+ sp_digit b0[18];
+ sp_digit b1[18];
+ sp_digit b2[18];
+ (void)sp_3072_add_18(a0, a, &a[18]);
+ (void)sp_3072_add_18(b0, b, &b[18]);
+ (void)sp_3072_add_18(a1, &a[18], &a[36]);
+ (void)sp_3072_add_18(b1, &b[18], &b[36]);
+ (void)sp_3072_add_18(a2, a0, &a[36]);
+ (void)sp_3072_add_18(b2, b0, &b[36]);
+ sp_3072_mul_18(p0, a, b);
+ sp_3072_mul_18(p2, &a[18], &b[18]);
+ sp_3072_mul_18(p4, &a[36], &b[36]);
+ sp_3072_mul_18(p1, a0, b0);
+ sp_3072_mul_18(p3, a1, b1);
+ sp_3072_mul_18(p5, a2, b2);
+ XMEMSET(r, 0, sizeof(*r)*2U*54U);
+ (void)sp_3072_sub_36(t0, p3, p2);
+ (void)sp_3072_sub_36(t1, p1, p2);
+ (void)sp_3072_sub_36(t2, p5, t0);
+ (void)sp_3072_sub_36(t2, t2, t1);
+ (void)sp_3072_sub_36(t0, t0, p4);
+ (void)sp_3072_sub_36(t1, t1, p0);
+ (void)sp_3072_add_36(r, r, p0);
+ (void)sp_3072_add_36(&r[18], &r[18], t1);
+ (void)sp_3072_add_36(&r[36], &r[36], t2);
+ (void)sp_3072_add_36(&r[54], &r[54], t0);
+ (void)sp_3072_add_36(&r[72], &r[72], p4);
+}
+
+/* Square a into r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_54(sp_digit* r, const sp_digit* a)
+{
+ sp_digit p0[36];
+ sp_digit p1[36];
+ sp_digit p2[36];
+ sp_digit p3[36];
+ sp_digit p4[36];
+ sp_digit p5[36];
+ sp_digit t0[36];
+ sp_digit t1[36];
+ sp_digit t2[36];
+ sp_digit a0[18];
+ sp_digit a1[18];
+ sp_digit a2[18];
+ (void)sp_3072_add_18(a0, a, &a[18]);
+ (void)sp_3072_add_18(a1, &a[18], &a[36]);
+ (void)sp_3072_add_18(a2, a0, &a[36]);
+ sp_3072_sqr_18(p0, a);
+ sp_3072_sqr_18(p2, &a[18]);
+ sp_3072_sqr_18(p4, &a[36]);
+ sp_3072_sqr_18(p1, a0);
+ sp_3072_sqr_18(p3, a1);
+ sp_3072_sqr_18(p5, a2);
+ XMEMSET(r, 0, sizeof(*r)*2U*54U);
+ (void)sp_3072_sub_36(t0, p3, p2);
+ (void)sp_3072_sub_36(t1, p1, p2);
+ (void)sp_3072_sub_36(t2, p5, t0);
+ (void)sp_3072_sub_36(t2, t2, t1);
+ (void)sp_3072_sub_36(t0, t0, p4);
+ (void)sp_3072_sub_36(t1, t1, p0);
+ (void)sp_3072_add_36(r, r, p0);
+ (void)sp_3072_add_36(&r[18], &r[18], t1);
+ (void)sp_3072_add_36(&r[36], &r[36], t2);
+ (void)sp_3072_add_36(&r[54], &r[54], t0);
+ (void)sp_3072_add_36(&r[72], &r[72], p4);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_add_54(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 54; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#else
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_add_54(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 48; i += 8) {
+ r[i + 0] = a[i + 0] + b[i + 0];
+ r[i + 1] = a[i + 1] + b[i + 1];
+ r[i + 2] = a[i + 2] + b[i + 2];
+ r[i + 3] = a[i + 3] + b[i + 3];
+ r[i + 4] = a[i + 4] + b[i + 4];
+ r[i + 5] = a[i + 5] + b[i + 5];
+ r[i + 6] = a[i + 6] + b[i + 6];
+ r[i + 7] = a[i + 7] + b[i + 7];
+ }
+ r[48] = a[48] + b[48];
+ r[49] = a[49] + b[49];
+ r[50] = a[50] + b[50];
+ r[51] = a[51] + b[51];
+ r[52] = a[52] + b[52];
+ r[53] = a[53] + b[53];
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_sub_54(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 54; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_sub_54(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 48; i += 8) {
+ r[i + 0] = a[i + 0] - b[i + 0];
+ r[i + 1] = a[i + 1] - b[i + 1];
+ r[i + 2] = a[i + 2] - b[i + 2];
+ r[i + 3] = a[i + 3] - b[i + 3];
+ r[i + 4] = a[i + 4] - b[i + 4];
+ r[i + 5] = a[i + 5] - b[i + 5];
+ r[i + 6] = a[i + 6] - b[i + 6];
+ r[i + 7] = a[i + 7] - b[i + 7];
+ }
+ r[48] = a[48] - b[48];
+ r[49] = a[49] - b[49];
+ r[50] = a[50] - b[50];
+ r[51] = a[51] - b[51];
+ r[52] = a[52] - b[52];
+ r[53] = a[53] - b[53];
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_54(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i, j, k;
+ int128_t c;
+
+ c = ((int128_t)a[53]) * b[53];
+ r[107] = (sp_digit)(c >> 57);
+ c = (c & 0x1ffffffffffffffL) << 57;
+ for (k = 105; k >= 0; k--) {
+ for (i = 53; i >= 0; i--) {
+ j = k - i;
+ if (j >= 54) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int128_t)a[i]) * b[j];
+ }
+ r[k + 2] += c >> 114;
+ r[k + 1] = (c >> 57) & 0x1ffffffffffffffL;
+ c = (c & 0x1ffffffffffffffL) << 57;
+ }
+ r[0] = (sp_digit)(c >> 57);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_54(sp_digit* r, const sp_digit* a)
+{
+ int i, j, k;
+ int128_t c;
+
+ c = ((int128_t)a[53]) * a[53];
+ r[107] = (sp_digit)(c >> 57);
+ c = (c & 0x1ffffffffffffffL) << 57;
+ for (k = 105; k >= 0; k--) {
+ for (i = 53; i >= 0; i--) {
+ j = k - i;
+ if (j >= 54 || i <= j) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int128_t)a[i]) * a[j] * 2;
+ }
+ if (i == j) {
+ c += ((int128_t)a[i]) * a[i];
+ }
+
+ r[k + 2] += c >> 114;
+ r[k + 1] = (c >> 57) & 0x1ffffffffffffffL;
+ c = (c & 0x1ffffffffffffffL) << 57;
+ }
+ r[0] = (sp_digit)(c >> 57);
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_add_27(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 27; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#else
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_add_27(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 24; i += 8) {
+ r[i + 0] = a[i + 0] + b[i + 0];
+ r[i + 1] = a[i + 1] + b[i + 1];
+ r[i + 2] = a[i + 2] + b[i + 2];
+ r[i + 3] = a[i + 3] + b[i + 3];
+ r[i + 4] = a[i + 4] + b[i + 4];
+ r[i + 5] = a[i + 5] + b[i + 5];
+ r[i + 6] = a[i + 6] + b[i + 6];
+ r[i + 7] = a[i + 7] + b[i + 7];
+ }
+ r[24] = a[24] + b[24];
+ r[25] = a[25] + b[25];
+ r[26] = a[26] + b[26];
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_sub_27(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 27; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_3072_sub_27(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 24; i += 8) {
+ r[i + 0] = a[i + 0] - b[i + 0];
+ r[i + 1] = a[i + 1] - b[i + 1];
+ r[i + 2] = a[i + 2] - b[i + 2];
+ r[i + 3] = a[i + 3] - b[i + 3];
+ r[i + 4] = a[i + 4] - b[i + 4];
+ r[i + 5] = a[i + 5] - b[i + 5];
+ r[i + 6] = a[i + 6] - b[i + 6];
+ r[i + 7] = a[i + 7] - b[i + 7];
+ }
+ r[24] = a[24] - b[24];
+ r[25] = a[25] - b[25];
+ r[26] = a[26] - b[26];
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_27(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i, j, k;
+ int128_t c;
+
+ c = ((int128_t)a[26]) * b[26];
+ r[53] = (sp_digit)(c >> 57);
+ c = (c & 0x1ffffffffffffffL) << 57;
+ for (k = 51; k >= 0; k--) {
+ for (i = 26; i >= 0; i--) {
+ j = k - i;
+ if (j >= 27) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int128_t)a[i]) * b[j];
+ }
+ r[k + 2] += c >> 114;
+ r[k + 1] = (c >> 57) & 0x1ffffffffffffffL;
+ c = (c & 0x1ffffffffffffffL) << 57;
+ }
+ r[0] = (sp_digit)(c >> 57);
+}
+
+#else
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_27(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i, j;
+ int128_t t[54];
+
+ XMEMSET(t, 0, sizeof(t));
+ for (i=0; i<27; i++) {
+ for (j=0; j<27; j++) {
+ t[i+j] += ((int128_t)a[i]) * b[j];
+ }
+ }
+ for (i=0; i<53; i++) {
+ r[i] = t[i] & 0x1ffffffffffffffL;
+ t[i+1] += t[i] >> 57;
+ }
+ r[53] = (sp_digit)t[53];
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_27(sp_digit* r, const sp_digit* a)
+{
+ int i, j, k;
+ int128_t c;
+
+ c = ((int128_t)a[26]) * a[26];
+ r[53] = (sp_digit)(c >> 57);
+ c = (c & 0x1ffffffffffffffL) << 57;
+ for (k = 51; k >= 0; k--) {
+ for (i = 26; i >= 0; i--) {
+ j = k - i;
+ if (j >= 27 || i <= j) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int128_t)a[i]) * a[j] * 2;
+ }
+ if (i == j) {
+ c += ((int128_t)a[i]) * a[i];
+ }
+
+ r[k + 2] += c >> 114;
+ r[k + 1] = (c >> 57) & 0x1ffffffffffffffL;
+ c = (c & 0x1ffffffffffffffL) << 57;
+ }
+ r[0] = (sp_digit)(c >> 57);
+}
+
+#else
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_27(sp_digit* r, const sp_digit* a)
+{
+ int i, j;
+ int128_t t[54];
+
+ XMEMSET(t, 0, sizeof(t));
+ for (i=0; i<27; i++) {
+ for (j=0; j<i; j++) {
+ t[i+j] += (((int128_t)a[i]) * a[j]) * 2;
+ }
+ t[i+i] += ((int128_t)a[i]) * a[i];
+ }
+ for (i=0; i<53; i++) {
+ r[i] = t[i] & 0x1ffffffffffffffL;
+ t[i+1] += t[i] >> 57;
+ }
+ r[53] = (sp_digit)t[53];
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**64 */
+ x &= 0x1ffffffffffffffL;
+
+ /* rho = -1/m mod b */
+ *rho = (1L << 57) - x;
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_3072_mul_d_54(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int128_t tb = b;
+ int128_t t = 0;
+ int i;
+
+ for (i = 0; i < 54; i++) {
+ t += tb * a[i];
+ r[i] = t & 0x1ffffffffffffffL;
+ t >>= 57;
+ }
+ r[54] = (sp_digit)t;
+#else
+ int128_t tb = b;
+ int128_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffL;
+ for (i = 0; i < 48; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL);
+ t[2] = tb * a[i+2];
+ r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL);
+ t[3] = tb * a[i+3];
+ r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffL);
+ t[4] = tb * a[i+4];
+ r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffL);
+ t[5] = tb * a[i+5];
+ r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffL);
+ t[6] = tb * a[i+6];
+ r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffL);
+ t[7] = tb * a[i+7];
+ r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffL);
+ t[0] = tb * a[i+8];
+ r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffL);
+ }
+ t[1] = tb * a[49];
+ r[49] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL);
+ t[2] = tb * a[50];
+ r[50] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL);
+ t[3] = tb * a[51];
+ r[51] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffL);
+ t[4] = tb * a[52];
+ r[52] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffL);
+ t[5] = tb * a[53];
+ r[53] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffL);
+ r[54] = (sp_digit)(t[5] >> 57);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 3072 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_3072_mont_norm_27(sp_digit* r, const sp_digit* m)
+{
+ /* Set r = 2^n - 1. */
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<26; i++) {
+ r[i] = 0x1ffffffffffffffL;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 24; i += 8) {
+ r[i + 0] = 0x1ffffffffffffffL;
+ r[i + 1] = 0x1ffffffffffffffL;
+ r[i + 2] = 0x1ffffffffffffffL;
+ r[i + 3] = 0x1ffffffffffffffL;
+ r[i + 4] = 0x1ffffffffffffffL;
+ r[i + 5] = 0x1ffffffffffffffL;
+ r[i + 6] = 0x1ffffffffffffffL;
+ r[i + 7] = 0x1ffffffffffffffL;
+ }
+ r[24] = 0x1ffffffffffffffL;
+ r[25] = 0x1ffffffffffffffL;
+#endif
+ r[26] = 0x3fffffffffffffL;
+
+ /* r = (2^n - 1) mod n */
+ (void)sp_3072_sub_27(r, r, m);
+
+ /* Add one so r = 2^n mod m */
+ r[0] += 1;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_3072_cmp_27(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=26; i>=0; i--) {
+ r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#else
+ int i;
+
+ r |= (a[26] - b[26]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[25] - b[25]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[24] - b[24]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ for (i = 16; i >= 0; i -= 8) {
+ r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#endif /* WOLFSSL_SP_SMALL */
+
+ return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static void sp_3072_cond_sub_27(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 27; i++) {
+ r[i] = a[i] - (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 24; i += 8) {
+ r[i + 0] = a[i + 0] - (b[i + 0] & m);
+ r[i + 1] = a[i + 1] - (b[i + 1] & m);
+ r[i + 2] = a[i + 2] - (b[i + 2] & m);
+ r[i + 3] = a[i + 3] - (b[i + 3] & m);
+ r[i + 4] = a[i + 4] - (b[i + 4] & m);
+ r[i + 5] = a[i + 5] - (b[i + 5] & m);
+ r[i + 6] = a[i + 6] - (b[i + 6] & m);
+ r[i + 7] = a[i + 7] - (b[i + 7] & m);
+ }
+ r[24] = a[24] - (b[24] & m);
+ r[25] = a[25] - (b[25] & m);
+ r[26] = a[26] - (b[26] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_3072_mul_add_27(sp_digit* r, const sp_digit* a,
+ const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int128_t tb = b;
+ int128_t t = 0;
+ int i;
+
+ for (i = 0; i < 27; i++) {
+ t += (tb * a[i]) + r[i];
+ r[i] = t & 0x1ffffffffffffffL;
+ t >>= 57;
+ }
+ r[27] += t;
+#else
+ int128_t tb = b;
+ int128_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL);
+ for (i = 0; i < 24; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
+ t[2] = tb * a[i+2];
+ r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
+ t[3] = tb * a[i+3];
+ r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL));
+ t[4] = tb * a[i+4];
+ r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL));
+ t[5] = tb * a[i+5];
+ r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL));
+ t[6] = tb * a[i+6];
+ r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL));
+ t[7] = tb * a[i+7];
+ r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL));
+ t[0] = tb * a[i+8];
+ r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL));
+ }
+ t[1] = tb * a[25]; r[25] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
+ t[2] = tb * a[26]; r[26] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
+ r[27] += (sp_digit)(t[2] >> 57);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 57.
+ *
+ * a Array of sp_digit to normalize.
+ */
+static void sp_3072_norm_27(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ for (i = 0; i < 26; i++) {
+ a[i+1] += a[i] >> 57;
+ a[i] &= 0x1ffffffffffffffL;
+ }
+#else
+ int i;
+ for (i = 0; i < 24; i += 8) {
+ a[i+1] += a[i+0] >> 57; a[i+0] &= 0x1ffffffffffffffL;
+ a[i+2] += a[i+1] >> 57; a[i+1] &= 0x1ffffffffffffffL;
+ a[i+3] += a[i+2] >> 57; a[i+2] &= 0x1ffffffffffffffL;
+ a[i+4] += a[i+3] >> 57; a[i+3] &= 0x1ffffffffffffffL;
+ a[i+5] += a[i+4] >> 57; a[i+4] &= 0x1ffffffffffffffL;
+ a[i+6] += a[i+5] >> 57; a[i+5] &= 0x1ffffffffffffffL;
+ a[i+7] += a[i+6] >> 57; a[i+6] &= 0x1ffffffffffffffL;
+ a[i+8] += a[i+7] >> 57; a[i+7] &= 0x1ffffffffffffffL;
+ a[i+9] += a[i+8] >> 57; a[i+8] &= 0x1ffffffffffffffL;
+ }
+ a[24+1] += a[24] >> 57;
+ a[24] &= 0x1ffffffffffffffL;
+ a[25+1] += a[25] >> 57;
+ a[25] &= 0x1ffffffffffffffL;
+#endif
+}
+
+/* Shift the result in the high 1536 bits down to the bottom.
+ *
+ * r A single precision number.
+ * a A single precision number.
+ */
+static void sp_3072_mont_shift_27(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ sp_digit n, s;
+
+ s = a[27];
+ n = a[26] >> 54;
+ for (i = 0; i < 26; i++) {
+ n += (s & 0x1ffffffffffffffL) << 3;
+ r[i] = n & 0x1ffffffffffffffL;
+ n >>= 57;
+ s = a[28 + i] + (s >> 57);
+ }
+ n += s << 3;
+ r[26] = n;
+#else
+ sp_digit n, s;
+ int i;
+
+ s = a[27]; n = a[26] >> 54;
+ for (i = 0; i < 24; i += 8) {
+ n += (s & 0x1ffffffffffffffL) << 3; r[i+0] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[i+28] + (s >> 57);
+ n += (s & 0x1ffffffffffffffL) << 3; r[i+1] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[i+29] + (s >> 57);
+ n += (s & 0x1ffffffffffffffL) << 3; r[i+2] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[i+30] + (s >> 57);
+ n += (s & 0x1ffffffffffffffL) << 3; r[i+3] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[i+31] + (s >> 57);
+ n += (s & 0x1ffffffffffffffL) << 3; r[i+4] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[i+32] + (s >> 57);
+ n += (s & 0x1ffffffffffffffL) << 3; r[i+5] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[i+33] + (s >> 57);
+ n += (s & 0x1ffffffffffffffL) << 3; r[i+6] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[i+34] + (s >> 57);
+ n += (s & 0x1ffffffffffffffL) << 3; r[i+7] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[i+35] + (s >> 57);
+ }
+ n += (s & 0x1ffffffffffffffL) << 3; r[24] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[52] + (s >> 57);
+ n += (s & 0x1ffffffffffffffL) << 3; r[25] = n & 0x1ffffffffffffffL;
+ n >>= 57; s = a[53] + (s >> 57);
+ n += s << 3; r[26] = n;
+#endif /* WOLFSSL_SP_SMALL */
+ XMEMSET(&r[27], 0, sizeof(*r) * 27U);
+}
+
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_3072_mont_reduce_27(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+ int i;
+ sp_digit mu;
+
+ sp_3072_norm_27(a + 27);
+
+ for (i=0; i<26; i++) {
+ mu = (a[i] * mp) & 0x1ffffffffffffffL;
+ sp_3072_mul_add_27(a+i, m, mu);
+ a[i+1] += a[i] >> 57;
+ }
+ mu = (a[i] * mp) & 0x3fffffffffffffL;
+ sp_3072_mul_add_27(a+i, m, mu);
+ a[i+1] += a[i] >> 57;
+ a[i] &= 0x1ffffffffffffffL;
+
+ sp_3072_mont_shift_27(a, a);
+ sp_3072_cond_sub_27(a, a, m, 0 - (((a[26] >> 54) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_3072_norm_27(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_mul_27(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_3072_mul_27(r, a, b);
+ sp_3072_mont_reduce_27(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_sqr_27(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_3072_sqr_27(r, a);
+ sp_3072_mont_reduce_27(r, m, mp);
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_3072_mul_d_27(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int128_t tb = b;
+ int128_t t = 0;
+ int i;
+
+ for (i = 0; i < 27; i++) {
+ t += tb * a[i];
+ r[i] = t & 0x1ffffffffffffffL;
+ t >>= 57;
+ }
+ r[27] = (sp_digit)t;
+#else
+ int128_t tb = b;
+ int128_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffL;
+ for (i = 0; i < 24; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL);
+ t[2] = tb * a[i+2];
+ r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL);
+ t[3] = tb * a[i+3];
+ r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffL);
+ t[4] = tb * a[i+4];
+ r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffL);
+ t[5] = tb * a[i+5];
+ r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffL);
+ t[6] = tb * a[i+6];
+ r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffL);
+ t[7] = tb * a[i+7];
+ r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffL);
+ t[0] = tb * a[i+8];
+ r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffL);
+ }
+ t[1] = tb * a[25];
+ r[25] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffL);
+ t[2] = tb * a[26];
+ r[26] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffL);
+ r[27] = (sp_digit)(t[2] >> 57);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static void sp_3072_cond_add_27(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 27; i++) {
+ r[i] = a[i] + (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 24; i += 8) {
+ r[i + 0] = a[i + 0] + (b[i + 0] & m);
+ r[i + 1] = a[i + 1] + (b[i + 1] & m);
+ r[i + 2] = a[i + 2] + (b[i + 2] & m);
+ r[i + 3] = a[i + 3] + (b[i + 3] & m);
+ r[i + 4] = a[i + 4] + (b[i + 4] & m);
+ r[i + 5] = a[i + 5] + (b[i + 5] & m);
+ r[i + 6] = a[i + 6] + (b[i + 6] & m);
+ r[i + 7] = a[i + 7] + (b[i + 7] & m);
+ }
+ r[24] = a[24] + (b[24] & m);
+ r[25] = a[25] + (b[25] & m);
+ r[26] = a[26] + (b[26] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SP_DIV_64
+static WC_INLINE sp_digit sp_3072_div_word_27(sp_digit d1, sp_digit d0,
+ sp_digit dv)
+{
+ sp_digit d, r, t;
+
+ /* All 57 bits from d1 and top 6 bits from d0. */
+ d = (d1 << 6) | (d0 >> 51);
+ r = d / dv;
+ d -= r * dv;
+ /* Up to 7 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 45) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 13 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 39) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 19 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 33) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 25 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 27) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 31 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 21) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 37 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 15) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 43 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 9) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 49 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 3) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 55 bits in r */
+ /* Remaining 3 bits from d0. */
+ r <<= 3;
+ d <<= 3;
+ d |= d0 & ((1 << 3) - 1);
+ t = d / dv;
+ r += t;
+
+ return r;
+}
+#endif /* WOLFSSL_SP_DIV_64 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Number to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_3072_div_27(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ int i;
+#ifndef WOLFSSL_SP_DIV_64
+ int128_t d1;
+#endif
+ sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* td;
+#else
+ sp_digit t1d[54], t2d[27 + 1];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 27 + 1), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = td;
+ t2 = td + 2 * 27;
+#else
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ dv = d[26];
+ XMEMCPY(t1, a, sizeof(*t1) * 2U * 27U);
+ for (i=26; i>=0; i--) {
+ t1[27 + i] += t1[27 + i - 1] >> 57;
+ t1[27 + i - 1] &= 0x1ffffffffffffffL;
+#ifndef WOLFSSL_SP_DIV_64
+ d1 = t1[27 + i];
+ d1 <<= 57;
+ d1 += t1[27 + i - 1];
+ r1 = (sp_digit)(d1 / dv);
+#else
+ r1 = sp_3072_div_word_27(t1[27 + i], t1[27 + i - 1], dv);
+#endif
+
+ sp_3072_mul_d_27(t2, d, r1);
+ (void)sp_3072_sub_27(&t1[i], &t1[i], t2);
+ t1[27 + i] -= t2[27];
+ t1[27 + i] += t1[27 + i - 1] >> 57;
+ t1[27 + i - 1] &= 0x1ffffffffffffffL;
+ r1 = (((-t1[27 + i]) << 57) - t1[27 + i - 1]) / dv;
+ r1++;
+ sp_3072_mul_d_27(t2, d, r1);
+ (void)sp_3072_add_27(&t1[i], &t1[i], t2);
+ t1[27 + i] += t1[27 + i - 1] >> 57;
+ t1[27 + i - 1] &= 0x1ffffffffffffffL;
+ }
+ t1[27 - 1] += t1[27 - 2] >> 57;
+ t1[27 - 2] &= 0x1ffffffffffffffL;
+ r1 = t1[27 - 1] / dv;
+
+ sp_3072_mul_d_27(t2, d, r1);
+ (void)sp_3072_sub_27(t1, t1, t2);
+ XMEMCPY(r, t1, sizeof(*r) * 2U * 27U);
+ for (i=0; i<25; i++) {
+ r[i+1] += r[i] >> 57;
+ r[i] &= 0x1ffffffffffffffL;
+ }
+ sp_3072_cond_add_27(r, r, d, 0 - ((r[26] < 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_3072_mod_27(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_3072_div_27(a, m, NULL, r);
+}
+
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_27(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+ const sp_digit* m, int reduceA)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* td;
+ sp_digit* t[3];
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 27 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(td, 0, sizeof(*td) * 3U * 27U * 2U);
+
+ norm = t[0] = td;
+ t[1] = &td[27 * 2];
+ t[2] = &td[2 * 27 * 2];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_27(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_3072_mod_27(t[1], a, m);
+ }
+ else {
+ XMEMCPY(t[1], a, sizeof(sp_digit) * 27U);
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_3072_mul_27(t[1], t[1], norm);
+ err = sp_3072_mod_27(t[1], t[1], m);
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 57;
+ c = bits % 57;
+ n = e[i--] << (57 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 57;
+ }
+
+ y = (n >> 56) & 1;
+ n <<= 1;
+
+ sp_3072_mont_mul_27(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])),
+ sizeof(*t[2]) * 27 * 2);
+ sp_3072_mont_sqr_27(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2],
+ sizeof(*t[2]) * 27 * 2);
+ }
+
+ sp_3072_mont_reduce_27(t[0], m, mp);
+ n = sp_3072_cmp_27(t[0], m);
+ sp_3072_cond_sub_27(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(*r) * 27 * 2);
+
+ }
+
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+
+ return err;
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[3][54];
+#else
+ sp_digit* td;
+ sp_digit* t[3];
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 27 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ t[0] = td;
+ t[1] = &td[27 * 2];
+ t[2] = &td[2 * 27 * 2];
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_27(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_3072_mod_27(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_3072_mul_27(t[1], t[1], norm);
+ err = sp_3072_mod_27(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_3072_mul_27(t[1], a, norm);
+ err = sp_3072_mod_27(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 57;
+ c = bits % 57;
+ n = e[i--] << (57 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 57;
+ }
+
+ y = (n >> 56) & 1;
+ n <<= 1;
+
+ sp_3072_mont_mul_27(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
+ sp_3072_mont_sqr_27(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
+ }
+
+ sp_3072_mont_reduce_27(t[0], m, mp);
+ n = sp_3072_cmp_27(t[0], m);
+ sp_3072_cond_sub_27(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(t[0]));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][54];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit rt[54];
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 54, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 54;
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_27(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_3072_mod_27(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_3072_mul_27(t[1], t[1], norm);
+ err = sp_3072_mod_27(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_3072_mul_27(t[1], a, norm);
+ err = sp_3072_mod_27(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_27(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_27(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_27(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_27(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_27(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_27(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_27(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_27(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_27(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_27(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_27(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_27(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_27(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_27(t[15], t[ 8], t[ 7], m, mp);
+ sp_3072_mont_sqr_27(t[16], t[ 8], m, mp);
+ sp_3072_mont_mul_27(t[17], t[ 9], t[ 8], m, mp);
+ sp_3072_mont_sqr_27(t[18], t[ 9], m, mp);
+ sp_3072_mont_mul_27(t[19], t[10], t[ 9], m, mp);
+ sp_3072_mont_sqr_27(t[20], t[10], m, mp);
+ sp_3072_mont_mul_27(t[21], t[11], t[10], m, mp);
+ sp_3072_mont_sqr_27(t[22], t[11], m, mp);
+ sp_3072_mont_mul_27(t[23], t[12], t[11], m, mp);
+ sp_3072_mont_sqr_27(t[24], t[12], m, mp);
+ sp_3072_mont_mul_27(t[25], t[13], t[12], m, mp);
+ sp_3072_mont_sqr_27(t[26], t[13], m, mp);
+ sp_3072_mont_mul_27(t[27], t[14], t[13], m, mp);
+ sp_3072_mont_sqr_27(t[28], t[14], m, mp);
+ sp_3072_mont_mul_27(t[29], t[15], t[14], m, mp);
+ sp_3072_mont_sqr_27(t[30], t[15], m, mp);
+ sp_3072_mont_mul_27(t[31], t[16], t[15], m, mp);
+
+ bits = ((bits + 4) / 5) * 5;
+ i = ((bits + 56) / 57) - 1;
+ c = bits % 57;
+ if (c == 0) {
+ c = 57;
+ }
+ if (i < 27) {
+ n = e[i--] << (64 - c);
+ }
+ else {
+ n = 0;
+ i--;
+ }
+ if (c < 5) {
+ n |= e[i--] << (7 - c);
+ c += 57;
+ }
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ XMEMCPY(rt, t[y], sizeof(rt));
+ for (; i>=0 || c>=5; ) {
+ if (c < 5) {
+ n |= e[i--] << (7 - c);
+ c += 57;
+ }
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+
+ sp_3072_mont_sqr_27(rt, rt, m, mp);
+ sp_3072_mont_sqr_27(rt, rt, m, mp);
+ sp_3072_mont_sqr_27(rt, rt, m, mp);
+ sp_3072_mont_sqr_27(rt, rt, m, mp);
+ sp_3072_mont_sqr_27(rt, rt, m, mp);
+
+ sp_3072_mont_mul_27(rt, rt, t[y], m, mp);
+ }
+
+ sp_3072_mont_reduce_27(rt, m, mp);
+ n = sp_3072_cmp_27(rt, m);
+ sp_3072_cond_sub_27(rt, rt, m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, rt, sizeof(rt));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#endif
+}
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 3072 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_3072_mont_norm_54(sp_digit* r, const sp_digit* m)
+{
+ /* Set r = 2^n - 1. */
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<53; i++) {
+ r[i] = 0x1ffffffffffffffL;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 48; i += 8) {
+ r[i + 0] = 0x1ffffffffffffffL;
+ r[i + 1] = 0x1ffffffffffffffL;
+ r[i + 2] = 0x1ffffffffffffffL;
+ r[i + 3] = 0x1ffffffffffffffL;
+ r[i + 4] = 0x1ffffffffffffffL;
+ r[i + 5] = 0x1ffffffffffffffL;
+ r[i + 6] = 0x1ffffffffffffffL;
+ r[i + 7] = 0x1ffffffffffffffL;
+ }
+ r[48] = 0x1ffffffffffffffL;
+ r[49] = 0x1ffffffffffffffL;
+ r[50] = 0x1ffffffffffffffL;
+ r[51] = 0x1ffffffffffffffL;
+ r[52] = 0x1ffffffffffffffL;
+#endif
+ r[53] = 0x7ffffffffffffL;
+
+ /* r = (2^n - 1) mod n */
+ (void)sp_3072_sub_54(r, r, m);
+
+ /* Add one so r = 2^n mod m */
+ r[0] += 1;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_3072_cmp_54(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=53; i>=0; i--) {
+ r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#else
+ int i;
+
+ r |= (a[53] - b[53]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[52] - b[52]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[51] - b[51]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[50] - b[50]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[49] - b[49]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[48] - b[48]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ for (i = 40; i >= 0; i -= 8) {
+ r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#endif /* WOLFSSL_SP_SMALL */
+
+ return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static void sp_3072_cond_sub_54(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 54; i++) {
+ r[i] = a[i] - (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 48; i += 8) {
+ r[i + 0] = a[i + 0] - (b[i + 0] & m);
+ r[i + 1] = a[i + 1] - (b[i + 1] & m);
+ r[i + 2] = a[i + 2] - (b[i + 2] & m);
+ r[i + 3] = a[i + 3] - (b[i + 3] & m);
+ r[i + 4] = a[i + 4] - (b[i + 4] & m);
+ r[i + 5] = a[i + 5] - (b[i + 5] & m);
+ r[i + 6] = a[i + 6] - (b[i + 6] & m);
+ r[i + 7] = a[i + 7] - (b[i + 7] & m);
+ }
+ r[48] = a[48] - (b[48] & m);
+ r[49] = a[49] - (b[49] & m);
+ r[50] = a[50] - (b[50] & m);
+ r[51] = a[51] - (b[51] & m);
+ r[52] = a[52] - (b[52] & m);
+ r[53] = a[53] - (b[53] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_3072_mul_add_54(sp_digit* r, const sp_digit* a,
+ const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int128_t tb = b;
+ int128_t t = 0;
+ int i;
+
+ for (i = 0; i < 54; i++) {
+ t += (tb * a[i]) + r[i];
+ r[i] = t & 0x1ffffffffffffffL;
+ t >>= 57;
+ }
+ r[54] += t;
+#else
+ int128_t tb = b;
+ int128_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL);
+ for (i = 0; i < 48; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
+ t[2] = tb * a[i+2];
+ r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
+ t[3] = tb * a[i+3];
+ r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL));
+ t[4] = tb * a[i+4];
+ r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL));
+ t[5] = tb * a[i+5];
+ r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL));
+ t[6] = tb * a[i+6];
+ r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL));
+ t[7] = tb * a[i+7];
+ r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL));
+ t[0] = tb * a[i+8];
+ r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL));
+ }
+ t[1] = tb * a[49]; r[49] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
+ t[2] = tb * a[50]; r[50] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
+ t[3] = tb * a[51]; r[51] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL));
+ t[4] = tb * a[52]; r[52] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL));
+ t[5] = tb * a[53]; r[53] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL));
+ r[54] += (sp_digit)(t[5] >> 57);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 57.
+ *
+ * a Array of sp_digit to normalize.
+ */
+static void sp_3072_norm_54(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ for (i = 0; i < 53; i++) {
+ a[i+1] += a[i] >> 57;
+ a[i] &= 0x1ffffffffffffffL;
+ }
+#else
+ int i;
+ for (i = 0; i < 48; i += 8) {
+ a[i+1] += a[i+0] >> 57; a[i+0] &= 0x1ffffffffffffffL;
+ a[i+2] += a[i+1] >> 57; a[i+1] &= 0x1ffffffffffffffL;
+ a[i+3] += a[i+2] >> 57; a[i+2] &= 0x1ffffffffffffffL;
+ a[i+4] += a[i+3] >> 57; a[i+3] &= 0x1ffffffffffffffL;
+ a[i+5] += a[i+4] >> 57; a[i+4] &= 0x1ffffffffffffffL;
+ a[i+6] += a[i+5] >> 57; a[i+5] &= 0x1ffffffffffffffL;
+ a[i+7] += a[i+6] >> 57; a[i+6] &= 0x1ffffffffffffffL;
+ a[i+8] += a[i+7] >> 57; a[i+7] &= 0x1ffffffffffffffL;
+ a[i+9] += a[i+8] >> 57; a[i+8] &= 0x1ffffffffffffffL;
+ }
+ a[48+1] += a[48] >> 57;
+ a[48] &= 0x1ffffffffffffffL;
+ a[49+1] += a[49] >> 57;
+ a[49] &= 0x1ffffffffffffffL;
+ a[50+1] += a[50] >> 57;
+ a[50] &= 0x1ffffffffffffffL;
+ a[51+1] += a[51] >> 57;
+ a[51] &= 0x1ffffffffffffffL;
+ a[52+1] += a[52] >> 57;
+ a[52] &= 0x1ffffffffffffffL;
+#endif
+}
+
+/* Shift the result in the high 3072 bits down to the bottom.
+ *
+ * r A single precision number.
+ * a A single precision number.
+ */
+static void sp_3072_mont_shift_54(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ int128_t n = a[53] >> 51;
+ n += ((int128_t)a[54]) << 6;
+
+ for (i = 0; i < 53; i++) {
+ r[i] = n & 0x1ffffffffffffffL;
+ n >>= 57;
+ n += ((int128_t)a[55 + i]) << 6;
+ }
+ r[53] = (sp_digit)n;
+#else
+ int i;
+ int128_t n = a[53] >> 51;
+ n += ((int128_t)a[54]) << 6;
+ for (i = 0; i < 48; i += 8) {
+ r[i + 0] = n & 0x1ffffffffffffffL;
+ n >>= 57; n += ((int128_t)a[i + 55]) << 6;
+ r[i + 1] = n & 0x1ffffffffffffffL;
+ n >>= 57; n += ((int128_t)a[i + 56]) << 6;
+ r[i + 2] = n & 0x1ffffffffffffffL;
+ n >>= 57; n += ((int128_t)a[i + 57]) << 6;
+ r[i + 3] = n & 0x1ffffffffffffffL;
+ n >>= 57; n += ((int128_t)a[i + 58]) << 6;
+ r[i + 4] = n & 0x1ffffffffffffffL;
+ n >>= 57; n += ((int128_t)a[i + 59]) << 6;
+ r[i + 5] = n & 0x1ffffffffffffffL;
+ n >>= 57; n += ((int128_t)a[i + 60]) << 6;
+ r[i + 6] = n & 0x1ffffffffffffffL;
+ n >>= 57; n += ((int128_t)a[i + 61]) << 6;
+ r[i + 7] = n & 0x1ffffffffffffffL;
+ n >>= 57; n += ((int128_t)a[i + 62]) << 6;
+ }
+ r[48] = n & 0x1ffffffffffffffL; n >>= 57; n += ((int128_t)a[103]) << 6;
+ r[49] = n & 0x1ffffffffffffffL; n >>= 57; n += ((int128_t)a[104]) << 6;
+ r[50] = n & 0x1ffffffffffffffL; n >>= 57; n += ((int128_t)a[105]) << 6;
+ r[51] = n & 0x1ffffffffffffffL; n >>= 57; n += ((int128_t)a[106]) << 6;
+ r[52] = n & 0x1ffffffffffffffL; n >>= 57; n += ((int128_t)a[107]) << 6;
+ r[53] = (sp_digit)n;
+#endif /* WOLFSSL_SP_SMALL */
+ XMEMSET(&r[54], 0, sizeof(*r) * 54U);
+}
+
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_3072_mont_reduce_54(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+ int i;
+ sp_digit mu;
+
+ sp_3072_norm_54(a + 54);
+
+#ifdef WOLFSSL_SP_DH
+ if (mp != 1) {
+ for (i=0; i<53; i++) {
+ mu = (a[i] * mp) & 0x1ffffffffffffffL;
+ sp_3072_mul_add_54(a+i, m, mu);
+ a[i+1] += a[i] >> 57;
+ }
+ mu = (a[i] * mp) & 0x7ffffffffffffL;
+ sp_3072_mul_add_54(a+i, m, mu);
+ a[i+1] += a[i] >> 57;
+ a[i] &= 0x1ffffffffffffffL;
+ }
+ else {
+ for (i=0; i<53; i++) {
+ mu = a[i] & 0x1ffffffffffffffL;
+ sp_3072_mul_add_54(a+i, m, mu);
+ a[i+1] += a[i] >> 57;
+ }
+ mu = a[i] & 0x7ffffffffffffL;
+ sp_3072_mul_add_54(a+i, m, mu);
+ a[i+1] += a[i] >> 57;
+ a[i] &= 0x1ffffffffffffffL;
+ }
+#else
+ for (i=0; i<53; i++) {
+ mu = (a[i] * mp) & 0x1ffffffffffffffL;
+ sp_3072_mul_add_54(a+i, m, mu);
+ a[i+1] += a[i] >> 57;
+ }
+ mu = (a[i] * mp) & 0x7ffffffffffffL;
+ sp_3072_mul_add_54(a+i, m, mu);
+ a[i+1] += a[i] >> 57;
+ a[i] &= 0x1ffffffffffffffL;
+#endif
+
+ sp_3072_mont_shift_54(a, a);
+ sp_3072_cond_sub_54(a, a, m, 0 - (((a[53] >> 51) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_3072_norm_54(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_mul_54(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_3072_mul_54(r, a, b);
+ sp_3072_mont_reduce_54(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_sqr_54(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_3072_sqr_54(r, a);
+ sp_3072_mont_reduce_54(r, m, mp);
+}
+
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static void sp_3072_cond_add_54(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 54; i++) {
+ r[i] = a[i] + (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 48; i += 8) {
+ r[i + 0] = a[i + 0] + (b[i + 0] & m);
+ r[i + 1] = a[i + 1] + (b[i + 1] & m);
+ r[i + 2] = a[i + 2] + (b[i + 2] & m);
+ r[i + 3] = a[i + 3] + (b[i + 3] & m);
+ r[i + 4] = a[i + 4] + (b[i + 4] & m);
+ r[i + 5] = a[i + 5] + (b[i + 5] & m);
+ r[i + 6] = a[i + 6] + (b[i + 6] & m);
+ r[i + 7] = a[i + 7] + (b[i + 7] & m);
+ }
+ r[48] = a[48] + (b[48] & m);
+ r[49] = a[49] + (b[49] & m);
+ r[50] = a[50] + (b[50] & m);
+ r[51] = a[51] + (b[51] & m);
+ r[52] = a[52] + (b[52] & m);
+ r[53] = a[53] + (b[53] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SP_DIV_64
+static WC_INLINE sp_digit sp_3072_div_word_54(sp_digit d1, sp_digit d0,
+ sp_digit dv)
+{
+ sp_digit d, r, t;
+
+ /* All 57 bits from d1 and top 6 bits from d0. */
+ d = (d1 << 6) | (d0 >> 51);
+ r = d / dv;
+ d -= r * dv;
+ /* Up to 7 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 45) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 13 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 39) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 19 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 33) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 25 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 27) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 31 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 21) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 37 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 15) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 43 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 9) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 49 bits in r */
+ /* Next 6 bits from d0. */
+ r <<= 6;
+ d <<= 6;
+ d |= (d0 >> 3) & ((1 << 6) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 55 bits in r */
+ /* Remaining 3 bits from d0. */
+ r <<= 3;
+ d <<= 3;
+ d |= d0 & ((1 << 3) - 1);
+ t = d / dv;
+ r += t;
+
+ return r;
+}
+#endif /* WOLFSSL_SP_DIV_64 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Number to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_3072_div_54(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ int i;
+#ifndef WOLFSSL_SP_DIV_64
+ int128_t d1;
+#endif
+ sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* td;
+#else
+ sp_digit t1d[108], t2d[54 + 1];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 54 + 1), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = td;
+ t2 = td + 2 * 54;
+#else
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ dv = d[53];
+ XMEMCPY(t1, a, sizeof(*t1) * 2U * 54U);
+ for (i=53; i>=0; i--) {
+ t1[54 + i] += t1[54 + i - 1] >> 57;
+ t1[54 + i - 1] &= 0x1ffffffffffffffL;
+#ifndef WOLFSSL_SP_DIV_64
+ d1 = t1[54 + i];
+ d1 <<= 57;
+ d1 += t1[54 + i - 1];
+ r1 = (sp_digit)(d1 / dv);
+#else
+ r1 = sp_3072_div_word_54(t1[54 + i], t1[54 + i - 1], dv);
+#endif
+
+ sp_3072_mul_d_54(t2, d, r1);
+ (void)sp_3072_sub_54(&t1[i], &t1[i], t2);
+ t1[54 + i] -= t2[54];
+ t1[54 + i] += t1[54 + i - 1] >> 57;
+ t1[54 + i - 1] &= 0x1ffffffffffffffL;
+ r1 = (((-t1[54 + i]) << 57) - t1[54 + i - 1]) / dv;
+ r1++;
+ sp_3072_mul_d_54(t2, d, r1);
+ (void)sp_3072_add_54(&t1[i], &t1[i], t2);
+ t1[54 + i] += t1[54 + i - 1] >> 57;
+ t1[54 + i - 1] &= 0x1ffffffffffffffL;
+ }
+ t1[54 - 1] += t1[54 - 2] >> 57;
+ t1[54 - 2] &= 0x1ffffffffffffffL;
+ r1 = t1[54 - 1] / dv;
+
+ sp_3072_mul_d_54(t2, d, r1);
+ (void)sp_3072_sub_54(t1, t1, t2);
+ XMEMCPY(r, t1, sizeof(*r) * 2U * 54U);
+ for (i=0; i<52; i++) {
+ r[i+1] += r[i] >> 57;
+ r[i] &= 0x1ffffffffffffffL;
+ }
+ sp_3072_cond_add_54(r, r, d, 0 - ((r[53] < 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_3072_mod_54(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_3072_div_54(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+ defined(WOLFSSL_HAVE_SP_DH)
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_54(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+ const sp_digit* m, int reduceA)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* td;
+ sp_digit* t[3];
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 54 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(td, 0, sizeof(*td) * 3U * 54U * 2U);
+
+ norm = t[0] = td;
+ t[1] = &td[54 * 2];
+ t[2] = &td[2 * 54 * 2];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_54(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_3072_mod_54(t[1], a, m);
+ }
+ else {
+ XMEMCPY(t[1], a, sizeof(sp_digit) * 54U);
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_3072_mul_54(t[1], t[1], norm);
+ err = sp_3072_mod_54(t[1], t[1], m);
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 57;
+ c = bits % 57;
+ n = e[i--] << (57 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 57;
+ }
+
+ y = (n >> 56) & 1;
+ n <<= 1;
+
+ sp_3072_mont_mul_54(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])),
+ sizeof(*t[2]) * 54 * 2);
+ sp_3072_mont_sqr_54(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2],
+ sizeof(*t[2]) * 54 * 2);
+ }
+
+ sp_3072_mont_reduce_54(t[0], m, mp);
+ n = sp_3072_cmp_54(t[0], m);
+ sp_3072_cond_sub_54(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(*r) * 54 * 2);
+
+ }
+
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+
+ return err;
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[3][108];
+#else
+ sp_digit* td;
+ sp_digit* t[3];
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 54 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ t[0] = td;
+ t[1] = &td[54 * 2];
+ t[2] = &td[2 * 54 * 2];
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_54(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_3072_mod_54(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_3072_mul_54(t[1], t[1], norm);
+ err = sp_3072_mod_54(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_3072_mul_54(t[1], a, norm);
+ err = sp_3072_mod_54(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 57;
+ c = bits % 57;
+ n = e[i--] << (57 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 57;
+ }
+
+ y = (n >> 56) & 1;
+ n <<= 1;
+
+ sp_3072_mont_mul_54(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
+ sp_3072_mont_sqr_54(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
+ }
+
+ sp_3072_mont_reduce_54(t[0], m, mp);
+ n = sp_3072_cmp_54(t[0], m);
+ sp_3072_cond_sub_54(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(t[0]));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][108];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit rt[108];
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 108, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 108;
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_54(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_3072_mod_54(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_3072_mul_54(t[1], t[1], norm);
+ err = sp_3072_mod_54(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_3072_mul_54(t[1], a, norm);
+ err = sp_3072_mod_54(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_54(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_54(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_54(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_54(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_54(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_54(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_54(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_54(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_54(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_54(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_54(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_54(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_54(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_54(t[15], t[ 8], t[ 7], m, mp);
+ sp_3072_mont_sqr_54(t[16], t[ 8], m, mp);
+ sp_3072_mont_mul_54(t[17], t[ 9], t[ 8], m, mp);
+ sp_3072_mont_sqr_54(t[18], t[ 9], m, mp);
+ sp_3072_mont_mul_54(t[19], t[10], t[ 9], m, mp);
+ sp_3072_mont_sqr_54(t[20], t[10], m, mp);
+ sp_3072_mont_mul_54(t[21], t[11], t[10], m, mp);
+ sp_3072_mont_sqr_54(t[22], t[11], m, mp);
+ sp_3072_mont_mul_54(t[23], t[12], t[11], m, mp);
+ sp_3072_mont_sqr_54(t[24], t[12], m, mp);
+ sp_3072_mont_mul_54(t[25], t[13], t[12], m, mp);
+ sp_3072_mont_sqr_54(t[26], t[13], m, mp);
+ sp_3072_mont_mul_54(t[27], t[14], t[13], m, mp);
+ sp_3072_mont_sqr_54(t[28], t[14], m, mp);
+ sp_3072_mont_mul_54(t[29], t[15], t[14], m, mp);
+ sp_3072_mont_sqr_54(t[30], t[15], m, mp);
+ sp_3072_mont_mul_54(t[31], t[16], t[15], m, mp);
+
+ bits = ((bits + 4) / 5) * 5;
+ i = ((bits + 56) / 57) - 1;
+ c = bits % 57;
+ if (c == 0) {
+ c = 57;
+ }
+ if (i < 54) {
+ n = e[i--] << (64 - c);
+ }
+ else {
+ n = 0;
+ i--;
+ }
+ if (c < 5) {
+ n |= e[i--] << (7 - c);
+ c += 57;
+ }
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ XMEMCPY(rt, t[y], sizeof(rt));
+ for (; i>=0 || c>=5; ) {
+ if (c < 5) {
+ n |= e[i--] << (7 - c);
+ c += 57;
+ }
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+
+ sp_3072_mont_sqr_54(rt, rt, m, mp);
+ sp_3072_mont_sqr_54(rt, rt, m, mp);
+ sp_3072_mont_sqr_54(rt, rt, m, mp);
+ sp_3072_mont_sqr_54(rt, rt, m, mp);
+ sp_3072_mont_sqr_54(rt, rt, m, mp);
+
+ sp_3072_mont_mul_54(rt, rt, t[y], m, mp);
+ }
+
+ sp_3072_mont_reduce_54(rt, m, mp);
+ n = sp_3072_cmp_54(rt, m);
+ sp_3072_cond_sub_54(rt, rt, m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, rt, sizeof(rt));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#endif
+}
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */
+ /* WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+ sp_digit* norm;
+ sp_digit e[1] = {0};
+ sp_digit mp;
+ int i;
+ int err = MP_OKAY;
+
+ if (*outLen < 384U) {
+ err = MP_TO_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(em) > 57) {
+ err = MP_READ_E;
+ }
+ if (inLen > 384U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 54 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 54 * 2;
+ m = r + 54 * 2;
+ norm = r;
+
+ sp_3072_from_bin(a, 54, in, inLen);
+#if DIGIT_BIT >= 57
+ e[0] = (sp_digit)em->dp[0];
+#else
+ e[0] = (sp_digit)em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(m, 54, mm);
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_54(norm, m);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_mul_54(a, a, norm);
+ err = sp_3072_mod_54(a, a, m);
+ }
+ if (err == MP_OKAY) {
+ for (i=56; i>=0; i--) {
+ if ((e[0] >> i) != 0) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 54 * 2);
+ for (i--; i>=0; i--) {
+ sp_3072_mont_sqr_54(r, r, m, mp);
+
+ if (((e[0] >> i) & 1) == 1) {
+ sp_3072_mont_mul_54(r, r, a, m, mp);
+ }
+ }
+ sp_3072_mont_reduce_54(r, m, mp);
+ mp = sp_3072_cmp_54(r, m);
+ sp_3072_cond_sub_54(r, r, m, ((mp < 0) ?
+ (sp_digit)1 : (sp_digit)0)- 1);
+
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit ad[108], md[54], rd[108];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+ sp_digit e[1] = {0};
+ int err = MP_OKAY;
+
+ if (*outLen < 384U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(em) > 57) {
+ err = MP_READ_E;
+ }
+ if (inLen > 384U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 54 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 54 * 2;
+ m = r + 54 * 2;
+ }
+#else
+ a = ad;
+ m = md;
+ r = rd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_3072_from_bin(a, 54, in, inLen);
+#if DIGIT_BIT >= 57
+ e[0] = (sp_digit)em->dp[0];
+#else
+ e[0] = (sp_digit)em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(m, 54, mm);
+
+ if (e[0] == 0x3) {
+ sp_3072_sqr_54(r, a);
+ err = sp_3072_mod_54(r, r, m);
+ if (err == MP_OKAY) {
+ sp_3072_mul_54(r, a, r);
+ err = sp_3072_mod_54(r, r, m);
+ }
+ }
+ else {
+ sp_digit* norm = r;
+ int i;
+ sp_digit mp;
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_54(norm, m);
+
+ sp_3072_mul_54(a, a, norm);
+ err = sp_3072_mod_54(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i=56; i>=0; i--) {
+ if ((e[0] >> i) != 0) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 108U);
+ for (i--; i>=0; i--) {
+ sp_3072_mont_sqr_54(r, r, m, mp);
+
+ if (((e[0] >> i) & 1) == 1) {
+ sp_3072_mont_mul_54(r, r, a, m, mp);
+ }
+ }
+ sp_3072_mont_reduce_54(r, m, mp);
+ mp = sp_3072_cmp_54(r, m);
+ sp_3072_cond_sub_54(r, r, m, ((mp < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#endif
+
+ return err;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM)
+#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* a;
+ sp_digit* d = NULL;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 384U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 3072) {
+ err = MP_READ_E;
+ }
+ if (inLen > 384) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 54 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = d + 54;
+ m = a + 108;
+ r = a;
+
+ sp_3072_from_bin(a, 54, in, inLen);
+ sp_3072_from_mp(d, 54, dm);
+ sp_3072_from_mp(m, 54, mm);
+ err = sp_3072_mod_exp_54(r, a, d, 3072, m, 0);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 54);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+ sp_digit a[108], d[54], m[54];
+ sp_digit* r = a;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 384U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 3072) {
+ err = MP_READ_E;
+ }
+ if (inLen > 384U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_bin(a, 54, in, inLen);
+ sp_3072_from_mp(d, 54, dm);
+ sp_3072_from_mp(m, 54, mm);
+ err = sp_3072_mod_exp_54(r, a, d, 3072, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+ XMEMSET(d, 0, sizeof(sp_digit) * 54);
+
+ return err;
+#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
+#else
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* t = NULL;
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* dq;
+ sp_digit* qi;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 384U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (inLen > 384) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 27 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 54 * 2;
+ q = p + 27;
+ qi = dq = dp = q + 27;
+ tmpa = qi + 27;
+ tmpb = tmpa + 54;
+
+ r = t + 54;
+
+ sp_3072_from_bin(a, 54, in, inLen);
+ sp_3072_from_mp(p, 27, pm);
+ sp_3072_from_mp(q, 27, qm);
+ sp_3072_from_mp(dp, 27, dpm);
+ err = sp_3072_mod_exp_27(tmpa, a, dp, 1536, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(dq, 27, dqm);
+ err = sp_3072_mod_exp_27(tmpb, a, dq, 1536, q, 1);
+ }
+ if (err == MP_OKAY) {
+ (void)sp_3072_sub_27(tmpa, tmpa, tmpb);
+ sp_3072_cond_add_27(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[26] >> 63));
+ sp_3072_cond_add_27(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[26] >> 63));
+
+ sp_3072_from_mp(qi, 27, qim);
+ sp_3072_mul_27(tmpa, tmpa, qi);
+ err = sp_3072_mod_27(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mul_27(tmpa, q, tmpa);
+ (void)sp_3072_add_54(r, tmpb, tmpa);
+ sp_3072_norm_54(r);
+
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 27 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+ sp_digit a[54 * 2];
+ sp_digit p[27], q[27], dp[27], dq[27], qi[27];
+ sp_digit tmpa[54], tmpb[54];
+ sp_digit* r = a;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 384U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (inLen > 384U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_bin(a, 54, in, inLen);
+ sp_3072_from_mp(p, 27, pm);
+ sp_3072_from_mp(q, 27, qm);
+ sp_3072_from_mp(dp, 27, dpm);
+ sp_3072_from_mp(dq, 27, dqm);
+ sp_3072_from_mp(qi, 27, qim);
+
+ err = sp_3072_mod_exp_27(tmpa, a, dp, 1536, p, 1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_3072_mod_exp_27(tmpb, a, dq, 1536, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ (void)sp_3072_sub_27(tmpa, tmpa, tmpb);
+ sp_3072_cond_add_27(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[26] >> 63));
+ sp_3072_cond_add_27(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[26] >> 63));
+ sp_3072_mul_27(tmpa, tmpa, qi);
+ err = sp_3072_mod_27(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mul_27(tmpa, tmpa, q);
+ (void)sp_3072_add_54(r, tmpb, tmpa);
+ sp_3072_norm_54(r);
+
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+ XMEMSET(tmpa, 0, sizeof(tmpa));
+ XMEMSET(tmpb, 0, sizeof(tmpb));
+ XMEMSET(p, 0, sizeof(p));
+ XMEMSET(q, 0, sizeof(q));
+ XMEMSET(dp, 0, sizeof(dp));
+ XMEMSET(dq, 0, sizeof(dq));
+ XMEMSET(qi, 0, sizeof(qi));
+
+ return err;
+#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+}
+
+#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_3072_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 57
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 54);
+ r->used = 54;
+ mp_clamp(r);
+#elif DIGIT_BIT < 57
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 54; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 57) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 57 - s;
+ }
+ r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 54; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 57 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 57 - s;
+ }
+ else {
+ s += 57;
+ }
+ }
+ r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int err = MP_OKAY;
+ sp_digit* d = NULL;
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 3072) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 54 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 54 * 2;
+ m = e + 54;
+ r = b;
+
+ sp_3072_from_mp(b, 54, base);
+ sp_3072_from_mp(e, 54, exp);
+ sp_3072_from_mp(m, 54, mod);
+
+ err = sp_3072_mod_exp_54(r, b, e, mp_count_bits(exp), m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_3072_to_mp(r, res);
+ }
+
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 54U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit bd[108], ed[54], md[54];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 3072) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 54 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 54 * 2;
+ m = e + 54;
+ r = b;
+ }
+#else
+ r = b = bd;
+ e = ed;
+ m = md;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 54, base);
+ sp_3072_from_mp(e, 54, exp);
+ sp_3072_from_mp(m, 54, mod);
+
+ err = sp_3072_mod_exp_54(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_3072_to_mp(r, res);
+ }
+
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 54U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 54U);
+#endif
+
+ return err;
+#endif
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_3072
+SP_NOINLINE static void sp_3072_lshift_54(sp_digit* r, sp_digit* a, byte n)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ r[54] = a[53] >> (57 - n);
+ for (i=53; i>0; i--) {
+ r[i] = ((a[i] << n) | (a[i-1] >> (57 - n))) & 0x1ffffffffffffffL;
+ }
+#else
+ sp_int_digit s, t;
+
+ s = (sp_int_digit)a[53];
+ r[54] = s >> (57U - n);
+ s = (sp_int_digit)(a[53]); t = (sp_int_digit)(a[52]);
+ r[53] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[52]); t = (sp_int_digit)(a[51]);
+ r[52] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[51]); t = (sp_int_digit)(a[50]);
+ r[51] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[50]); t = (sp_int_digit)(a[49]);
+ r[50] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[49]); t = (sp_int_digit)(a[48]);
+ r[49] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[48]); t = (sp_int_digit)(a[47]);
+ r[48] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[47]); t = (sp_int_digit)(a[46]);
+ r[47] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[46]); t = (sp_int_digit)(a[45]);
+ r[46] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[45]); t = (sp_int_digit)(a[44]);
+ r[45] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[44]); t = (sp_int_digit)(a[43]);
+ r[44] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[43]); t = (sp_int_digit)(a[42]);
+ r[43] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[42]); t = (sp_int_digit)(a[41]);
+ r[42] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[41]); t = (sp_int_digit)(a[40]);
+ r[41] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[40]); t = (sp_int_digit)(a[39]);
+ r[40] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[39]); t = (sp_int_digit)(a[38]);
+ r[39] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[38]); t = (sp_int_digit)(a[37]);
+ r[38] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[37]); t = (sp_int_digit)(a[36]);
+ r[37] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[36]); t = (sp_int_digit)(a[35]);
+ r[36] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]);
+ r[35] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]);
+ r[34] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]);
+ r[33] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]);
+ r[32] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]);
+ r[31] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]);
+ r[30] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]);
+ r[29] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]);
+ r[28] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]);
+ r[27] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]);
+ r[26] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]);
+ r[25] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]);
+ r[24] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]);
+ r[23] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]);
+ r[22] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]);
+ r[21] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]);
+ r[20] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]);
+ r[19] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]);
+ r[18] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]);
+ r[17] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]);
+ r[16] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]);
+ r[15] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]);
+ r[14] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]);
+ r[13] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]);
+ r[12] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]);
+ r[11] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]);
+ r[10] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]);
+ r[9] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]);
+ r[8] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]);
+ r[7] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]);
+ r[6] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]);
+ r[5] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]);
+ r[4] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]);
+ r[3] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]);
+ r[2] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+ s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]);
+ r[1] = ((s << n) | (t >> (57U - n))) & 0x1ffffffffffffffUL;
+#endif
+ r[0] = (a[0] << n) & 0x1ffffffffffffffL;
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_2_54(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[108];
+ sp_digit td[55];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 163, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 108;
+ XMEMSET(td, 0, sizeof(sp_digit) * 163);
+#else
+ norm = nd;
+ tmp = td;
+ XMEMSET(td, 0, sizeof(td));
+#endif
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_54(norm, m);
+
+ bits = ((bits + 4) / 5) * 5;
+ i = ((bits + 56) / 57) - 1;
+ c = bits % 57;
+ if (c == 0) {
+ c = 57;
+ }
+ if (i < 54) {
+ n = e[i--] << (64 - c);
+ }
+ else {
+ n = 0;
+ i--;
+ }
+ if (c < 5) {
+ n |= e[i--] << (7 - c);
+ c += 57;
+ }
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ sp_3072_lshift_54(r, norm, y);
+ for (; i>=0 || c>=5; ) {
+ if (c < 5) {
+ n |= e[i--] << (7 - c);
+ c += 57;
+ }
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+
+ sp_3072_mont_sqr_54(r, r, m, mp);
+ sp_3072_mont_sqr_54(r, r, m, mp);
+ sp_3072_mont_sqr_54(r, r, m, mp);
+ sp_3072_mont_sqr_54(r, r, m, mp);
+ sp_3072_mont_sqr_54(r, r, m, mp);
+
+ sp_3072_lshift_54(r, r, y);
+ sp_3072_mul_d_54(tmp, norm, (r[54] << 6) + (r[53] >> 51));
+ r[54] = 0;
+ r[53] &= 0x7ffffffffffffL;
+ (void)sp_3072_add_54(r, r, tmp);
+ sp_3072_norm_54(r);
+ o = sp_3072_cmp_54(r, m);
+ sp_3072_cond_sub_54(r, r, m, ((o < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ }
+
+ sp_3072_mont_reduce_54(r, m, mp);
+ n = sp_3072_cmp_54(r, m);
+ sp_3072_cond_sub_54(r, r, m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+#endif /* HAVE_FFDHE_3072 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int err = MP_OKAY;
+ sp_digit* d = NULL;
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ word32 i;
+
+ if (mp_count_bits(base) > 3072) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 384) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 54 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 54 * 2;
+ m = e + 54;
+ r = b;
+
+ sp_3072_from_mp(b, 54, base);
+ sp_3072_from_bin(e, 54, exp, expLen);
+ sp_3072_from_mp(m, 54, mod);
+
+ #ifdef HAVE_FFDHE_3072
+ if (base->used == 1 && base->dp[0] == 2 &&
+ (m[53] >> 19) == 0xffffffffL) {
+ err = sp_3072_mod_exp_2_54(r, e, expLen * 8, m);
+ }
+ else
+ #endif
+ err = sp_3072_mod_exp_54(r, b, e, expLen * 8, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ for (i=0; i<384 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+ }
+
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 54U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit bd[108], ed[54], md[54];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ word32 i;
+ int err = MP_OKAY;
+
+ if (mp_count_bits(base) > 3072) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 384U) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+#ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 54 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 54 * 2;
+ m = e + 54;
+ r = b;
+ }
+#else
+ r = b = bd;
+ e = ed;
+ m = md;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 54, base);
+ sp_3072_from_bin(e, 54, exp, expLen);
+ sp_3072_from_mp(m, 54, mod);
+
+ #ifdef HAVE_FFDHE_3072
+ if (base->used == 1 && base->dp[0] == 2U &&
+ (m[53] >> 19) == 0xffffffffL) {
+ err = sp_3072_mod_exp_2_54(r, e, expLen * 8U, m);
+ }
+ else {
+ #endif
+ err = sp_3072_mod_exp_54(r, b, e, expLen * 8U, m, 0);
+ #ifdef HAVE_FFDHE_3072
+ }
+ #endif
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ for (i=0; i<384U && out[i] == 0U; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 54U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 54U);
+#endif
+
+ return err;
+#endif
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int err = MP_OKAY;
+ sp_digit* d = NULL;
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 1536) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 1536) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 1536) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 27 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 27 * 2;
+ m = e + 27;
+ r = b;
+
+ sp_3072_from_mp(b, 27, base);
+ sp_3072_from_mp(e, 27, exp);
+ sp_3072_from_mp(m, 27, mod);
+
+ err = sp_3072_mod_exp_27(r, b, e, mp_count_bits(exp), m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(r + 27, 0, sizeof(*r) * 27U);
+ err = sp_3072_to_mp(r, res);
+ }
+
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 27U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit bd[54], ed[27], md[27];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 1536) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 1536) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 1536) {
+ err = MP_READ_E;
+ }
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 27 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 27 * 2;
+ m = e + 27;
+ r = b;
+ }
+#else
+ r = b = bd;
+ e = ed;
+ m = md;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 27, base);
+ sp_3072_from_mp(e, 27, exp);
+ sp_3072_from_mp(m, 27, mod);
+
+ err = sp_3072_mod_exp_27(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(r + 27, 0, sizeof(*r) * 27U);
+ err = sp_3072_to_mp(r, res);
+ }
+
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 27U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 27U);
+#endif
+
+ return err;
+#endif
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_3072 */
+
+#ifdef WOLFSSL_SP_4096
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 45U) {
+ r[j] &= 0x1fffffffffffffL;
+ s = 53U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 53
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 53
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0x1fffffffffffffL;
+ s = 53U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 53U) <= (word32)DIGIT_BIT) {
+ s += 53U;
+ r[j] &= 0x1fffffffffffffL;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 53) {
+ r[j] &= 0x1fffffffffffffL;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 53 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 512
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_4096_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ for (i=0; i<77; i++) {
+ r[i+1] += r[i] >> 53;
+ r[i] &= 0x1fffffffffffffL;
+ }
+ j = 4096 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<78 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 53) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 53);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_13(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int128_t t0 = ((int128_t)a[ 0]) * b[ 0];
+ int128_t t1 = ((int128_t)a[ 0]) * b[ 1]
+ + ((int128_t)a[ 1]) * b[ 0];
+ int128_t t2 = ((int128_t)a[ 0]) * b[ 2]
+ + ((int128_t)a[ 1]) * b[ 1]
+ + ((int128_t)a[ 2]) * b[ 0];
+ int128_t t3 = ((int128_t)a[ 0]) * b[ 3]
+ + ((int128_t)a[ 1]) * b[ 2]
+ + ((int128_t)a[ 2]) * b[ 1]
+ + ((int128_t)a[ 3]) * b[ 0];
+ int128_t t4 = ((int128_t)a[ 0]) * b[ 4]
+ + ((int128_t)a[ 1]) * b[ 3]
+ + ((int128_t)a[ 2]) * b[ 2]
+ + ((int128_t)a[ 3]) * b[ 1]
+ + ((int128_t)a[ 4]) * b[ 0];
+ int128_t t5 = ((int128_t)a[ 0]) * b[ 5]
+ + ((int128_t)a[ 1]) * b[ 4]
+ + ((int128_t)a[ 2]) * b[ 3]
+ + ((int128_t)a[ 3]) * b[ 2]
+ + ((int128_t)a[ 4]) * b[ 1]
+ + ((int128_t)a[ 5]) * b[ 0];
+ int128_t t6 = ((int128_t)a[ 0]) * b[ 6]
+ + ((int128_t)a[ 1]) * b[ 5]
+ + ((int128_t)a[ 2]) * b[ 4]
+ + ((int128_t)a[ 3]) * b[ 3]
+ + ((int128_t)a[ 4]) * b[ 2]
+ + ((int128_t)a[ 5]) * b[ 1]
+ + ((int128_t)a[ 6]) * b[ 0];
+ int128_t t7 = ((int128_t)a[ 0]) * b[ 7]
+ + ((int128_t)a[ 1]) * b[ 6]
+ + ((int128_t)a[ 2]) * b[ 5]
+ + ((int128_t)a[ 3]) * b[ 4]
+ + ((int128_t)a[ 4]) * b[ 3]
+ + ((int128_t)a[ 5]) * b[ 2]
+ + ((int128_t)a[ 6]) * b[ 1]
+ + ((int128_t)a[ 7]) * b[ 0];
+ int128_t t8 = ((int128_t)a[ 0]) * b[ 8]
+ + ((int128_t)a[ 1]) * b[ 7]
+ + ((int128_t)a[ 2]) * b[ 6]
+ + ((int128_t)a[ 3]) * b[ 5]
+ + ((int128_t)a[ 4]) * b[ 4]
+ + ((int128_t)a[ 5]) * b[ 3]
+ + ((int128_t)a[ 6]) * b[ 2]
+ + ((int128_t)a[ 7]) * b[ 1]
+ + ((int128_t)a[ 8]) * b[ 0];
+ int128_t t9 = ((int128_t)a[ 0]) * b[ 9]
+ + ((int128_t)a[ 1]) * b[ 8]
+ + ((int128_t)a[ 2]) * b[ 7]
+ + ((int128_t)a[ 3]) * b[ 6]
+ + ((int128_t)a[ 4]) * b[ 5]
+ + ((int128_t)a[ 5]) * b[ 4]
+ + ((int128_t)a[ 6]) * b[ 3]
+ + ((int128_t)a[ 7]) * b[ 2]
+ + ((int128_t)a[ 8]) * b[ 1]
+ + ((int128_t)a[ 9]) * b[ 0];
+ int128_t t10 = ((int128_t)a[ 0]) * b[10]
+ + ((int128_t)a[ 1]) * b[ 9]
+ + ((int128_t)a[ 2]) * b[ 8]
+ + ((int128_t)a[ 3]) * b[ 7]
+ + ((int128_t)a[ 4]) * b[ 6]
+ + ((int128_t)a[ 5]) * b[ 5]
+ + ((int128_t)a[ 6]) * b[ 4]
+ + ((int128_t)a[ 7]) * b[ 3]
+ + ((int128_t)a[ 8]) * b[ 2]
+ + ((int128_t)a[ 9]) * b[ 1]
+ + ((int128_t)a[10]) * b[ 0];
+ int128_t t11 = ((int128_t)a[ 0]) * b[11]
+ + ((int128_t)a[ 1]) * b[10]
+ + ((int128_t)a[ 2]) * b[ 9]
+ + ((int128_t)a[ 3]) * b[ 8]
+ + ((int128_t)a[ 4]) * b[ 7]
+ + ((int128_t)a[ 5]) * b[ 6]
+ + ((int128_t)a[ 6]) * b[ 5]
+ + ((int128_t)a[ 7]) * b[ 4]
+ + ((int128_t)a[ 8]) * b[ 3]
+ + ((int128_t)a[ 9]) * b[ 2]
+ + ((int128_t)a[10]) * b[ 1]
+ + ((int128_t)a[11]) * b[ 0];
+ int128_t t12 = ((int128_t)a[ 0]) * b[12]
+ + ((int128_t)a[ 1]) * b[11]
+ + ((int128_t)a[ 2]) * b[10]
+ + ((int128_t)a[ 3]) * b[ 9]
+ + ((int128_t)a[ 4]) * b[ 8]
+ + ((int128_t)a[ 5]) * b[ 7]
+ + ((int128_t)a[ 6]) * b[ 6]
+ + ((int128_t)a[ 7]) * b[ 5]
+ + ((int128_t)a[ 8]) * b[ 4]
+ + ((int128_t)a[ 9]) * b[ 3]
+ + ((int128_t)a[10]) * b[ 2]
+ + ((int128_t)a[11]) * b[ 1]
+ + ((int128_t)a[12]) * b[ 0];
+ int128_t t13 = ((int128_t)a[ 1]) * b[12]
+ + ((int128_t)a[ 2]) * b[11]
+ + ((int128_t)a[ 3]) * b[10]
+ + ((int128_t)a[ 4]) * b[ 9]
+ + ((int128_t)a[ 5]) * b[ 8]
+ + ((int128_t)a[ 6]) * b[ 7]
+ + ((int128_t)a[ 7]) * b[ 6]
+ + ((int128_t)a[ 8]) * b[ 5]
+ + ((int128_t)a[ 9]) * b[ 4]
+ + ((int128_t)a[10]) * b[ 3]
+ + ((int128_t)a[11]) * b[ 2]
+ + ((int128_t)a[12]) * b[ 1];
+ int128_t t14 = ((int128_t)a[ 2]) * b[12]
+ + ((int128_t)a[ 3]) * b[11]
+ + ((int128_t)a[ 4]) * b[10]
+ + ((int128_t)a[ 5]) * b[ 9]
+ + ((int128_t)a[ 6]) * b[ 8]
+ + ((int128_t)a[ 7]) * b[ 7]
+ + ((int128_t)a[ 8]) * b[ 6]
+ + ((int128_t)a[ 9]) * b[ 5]
+ + ((int128_t)a[10]) * b[ 4]
+ + ((int128_t)a[11]) * b[ 3]
+ + ((int128_t)a[12]) * b[ 2];
+ int128_t t15 = ((int128_t)a[ 3]) * b[12]
+ + ((int128_t)a[ 4]) * b[11]
+ + ((int128_t)a[ 5]) * b[10]
+ + ((int128_t)a[ 6]) * b[ 9]
+ + ((int128_t)a[ 7]) * b[ 8]
+ + ((int128_t)a[ 8]) * b[ 7]
+ + ((int128_t)a[ 9]) * b[ 6]
+ + ((int128_t)a[10]) * b[ 5]
+ + ((int128_t)a[11]) * b[ 4]
+ + ((int128_t)a[12]) * b[ 3];
+ int128_t t16 = ((int128_t)a[ 4]) * b[12]
+ + ((int128_t)a[ 5]) * b[11]
+ + ((int128_t)a[ 6]) * b[10]
+ + ((int128_t)a[ 7]) * b[ 9]
+ + ((int128_t)a[ 8]) * b[ 8]
+ + ((int128_t)a[ 9]) * b[ 7]
+ + ((int128_t)a[10]) * b[ 6]
+ + ((int128_t)a[11]) * b[ 5]
+ + ((int128_t)a[12]) * b[ 4];
+ int128_t t17 = ((int128_t)a[ 5]) * b[12]
+ + ((int128_t)a[ 6]) * b[11]
+ + ((int128_t)a[ 7]) * b[10]
+ + ((int128_t)a[ 8]) * b[ 9]
+ + ((int128_t)a[ 9]) * b[ 8]
+ + ((int128_t)a[10]) * b[ 7]
+ + ((int128_t)a[11]) * b[ 6]
+ + ((int128_t)a[12]) * b[ 5];
+ int128_t t18 = ((int128_t)a[ 6]) * b[12]
+ + ((int128_t)a[ 7]) * b[11]
+ + ((int128_t)a[ 8]) * b[10]
+ + ((int128_t)a[ 9]) * b[ 9]
+ + ((int128_t)a[10]) * b[ 8]
+ + ((int128_t)a[11]) * b[ 7]
+ + ((int128_t)a[12]) * b[ 6];
+ int128_t t19 = ((int128_t)a[ 7]) * b[12]
+ + ((int128_t)a[ 8]) * b[11]
+ + ((int128_t)a[ 9]) * b[10]
+ + ((int128_t)a[10]) * b[ 9]
+ + ((int128_t)a[11]) * b[ 8]
+ + ((int128_t)a[12]) * b[ 7];
+ int128_t t20 = ((int128_t)a[ 8]) * b[12]
+ + ((int128_t)a[ 9]) * b[11]
+ + ((int128_t)a[10]) * b[10]
+ + ((int128_t)a[11]) * b[ 9]
+ + ((int128_t)a[12]) * b[ 8];
+ int128_t t21 = ((int128_t)a[ 9]) * b[12]
+ + ((int128_t)a[10]) * b[11]
+ + ((int128_t)a[11]) * b[10]
+ + ((int128_t)a[12]) * b[ 9];
+ int128_t t22 = ((int128_t)a[10]) * b[12]
+ + ((int128_t)a[11]) * b[11]
+ + ((int128_t)a[12]) * b[10];
+ int128_t t23 = ((int128_t)a[11]) * b[12]
+ + ((int128_t)a[12]) * b[11];
+ int128_t t24 = ((int128_t)a[12]) * b[12];
+
+ t1 += t0 >> 53; r[ 0] = t0 & 0x1fffffffffffffL;
+ t2 += t1 >> 53; r[ 1] = t1 & 0x1fffffffffffffL;
+ t3 += t2 >> 53; r[ 2] = t2 & 0x1fffffffffffffL;
+ t4 += t3 >> 53; r[ 3] = t3 & 0x1fffffffffffffL;
+ t5 += t4 >> 53; r[ 4] = t4 & 0x1fffffffffffffL;
+ t6 += t5 >> 53; r[ 5] = t5 & 0x1fffffffffffffL;
+ t7 += t6 >> 53; r[ 6] = t6 & 0x1fffffffffffffL;
+ t8 += t7 >> 53; r[ 7] = t7 & 0x1fffffffffffffL;
+ t9 += t8 >> 53; r[ 8] = t8 & 0x1fffffffffffffL;
+ t10 += t9 >> 53; r[ 9] = t9 & 0x1fffffffffffffL;
+ t11 += t10 >> 53; r[10] = t10 & 0x1fffffffffffffL;
+ t12 += t11 >> 53; r[11] = t11 & 0x1fffffffffffffL;
+ t13 += t12 >> 53; r[12] = t12 & 0x1fffffffffffffL;
+ t14 += t13 >> 53; r[13] = t13 & 0x1fffffffffffffL;
+ t15 += t14 >> 53; r[14] = t14 & 0x1fffffffffffffL;
+ t16 += t15 >> 53; r[15] = t15 & 0x1fffffffffffffL;
+ t17 += t16 >> 53; r[16] = t16 & 0x1fffffffffffffL;
+ t18 += t17 >> 53; r[17] = t17 & 0x1fffffffffffffL;
+ t19 += t18 >> 53; r[18] = t18 & 0x1fffffffffffffL;
+ t20 += t19 >> 53; r[19] = t19 & 0x1fffffffffffffL;
+ t21 += t20 >> 53; r[20] = t20 & 0x1fffffffffffffL;
+ t22 += t21 >> 53; r[21] = t21 & 0x1fffffffffffffL;
+ t23 += t22 >> 53; r[22] = t22 & 0x1fffffffffffffL;
+ t24 += t23 >> 53; r[23] = t23 & 0x1fffffffffffffL;
+ r[25] = (sp_digit)(t24 >> 53);
+ r[24] = t24 & 0x1fffffffffffffL;
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_13(sp_digit* r, const sp_digit* a)
+{
+ int128_t t0 = ((int128_t)a[ 0]) * a[ 0];
+ int128_t t1 = (((int128_t)a[ 0]) * a[ 1]) * 2;
+ int128_t t2 = (((int128_t)a[ 0]) * a[ 2]) * 2
+ + ((int128_t)a[ 1]) * a[ 1];
+ int128_t t3 = (((int128_t)a[ 0]) * a[ 3]
+ + ((int128_t)a[ 1]) * a[ 2]) * 2;
+ int128_t t4 = (((int128_t)a[ 0]) * a[ 4]
+ + ((int128_t)a[ 1]) * a[ 3]) * 2
+ + ((int128_t)a[ 2]) * a[ 2];
+ int128_t t5 = (((int128_t)a[ 0]) * a[ 5]
+ + ((int128_t)a[ 1]) * a[ 4]
+ + ((int128_t)a[ 2]) * a[ 3]) * 2;
+ int128_t t6 = (((int128_t)a[ 0]) * a[ 6]
+ + ((int128_t)a[ 1]) * a[ 5]
+ + ((int128_t)a[ 2]) * a[ 4]) * 2
+ + ((int128_t)a[ 3]) * a[ 3];
+ int128_t t7 = (((int128_t)a[ 0]) * a[ 7]
+ + ((int128_t)a[ 1]) * a[ 6]
+ + ((int128_t)a[ 2]) * a[ 5]
+ + ((int128_t)a[ 3]) * a[ 4]) * 2;
+ int128_t t8 = (((int128_t)a[ 0]) * a[ 8]
+ + ((int128_t)a[ 1]) * a[ 7]
+ + ((int128_t)a[ 2]) * a[ 6]
+ + ((int128_t)a[ 3]) * a[ 5]) * 2
+ + ((int128_t)a[ 4]) * a[ 4];
+ int128_t t9 = (((int128_t)a[ 0]) * a[ 9]
+ + ((int128_t)a[ 1]) * a[ 8]
+ + ((int128_t)a[ 2]) * a[ 7]
+ + ((int128_t)a[ 3]) * a[ 6]
+ + ((int128_t)a[ 4]) * a[ 5]) * 2;
+ int128_t t10 = (((int128_t)a[ 0]) * a[10]
+ + ((int128_t)a[ 1]) * a[ 9]
+ + ((int128_t)a[ 2]) * a[ 8]
+ + ((int128_t)a[ 3]) * a[ 7]
+ + ((int128_t)a[ 4]) * a[ 6]) * 2
+ + ((int128_t)a[ 5]) * a[ 5];
+ int128_t t11 = (((int128_t)a[ 0]) * a[11]
+ + ((int128_t)a[ 1]) * a[10]
+ + ((int128_t)a[ 2]) * a[ 9]
+ + ((int128_t)a[ 3]) * a[ 8]
+ + ((int128_t)a[ 4]) * a[ 7]
+ + ((int128_t)a[ 5]) * a[ 6]) * 2;
+ int128_t t12 = (((int128_t)a[ 0]) * a[12]
+ + ((int128_t)a[ 1]) * a[11]
+ + ((int128_t)a[ 2]) * a[10]
+ + ((int128_t)a[ 3]) * a[ 9]
+ + ((int128_t)a[ 4]) * a[ 8]
+ + ((int128_t)a[ 5]) * a[ 7]) * 2
+ + ((int128_t)a[ 6]) * a[ 6];
+ int128_t t13 = (((int128_t)a[ 1]) * a[12]
+ + ((int128_t)a[ 2]) * a[11]
+ + ((int128_t)a[ 3]) * a[10]
+ + ((int128_t)a[ 4]) * a[ 9]
+ + ((int128_t)a[ 5]) * a[ 8]
+ + ((int128_t)a[ 6]) * a[ 7]) * 2;
+ int128_t t14 = (((int128_t)a[ 2]) * a[12]
+ + ((int128_t)a[ 3]) * a[11]
+ + ((int128_t)a[ 4]) * a[10]
+ + ((int128_t)a[ 5]) * a[ 9]
+ + ((int128_t)a[ 6]) * a[ 8]) * 2
+ + ((int128_t)a[ 7]) * a[ 7];
+ int128_t t15 = (((int128_t)a[ 3]) * a[12]
+ + ((int128_t)a[ 4]) * a[11]
+ + ((int128_t)a[ 5]) * a[10]
+ + ((int128_t)a[ 6]) * a[ 9]
+ + ((int128_t)a[ 7]) * a[ 8]) * 2;
+ int128_t t16 = (((int128_t)a[ 4]) * a[12]
+ + ((int128_t)a[ 5]) * a[11]
+ + ((int128_t)a[ 6]) * a[10]
+ + ((int128_t)a[ 7]) * a[ 9]) * 2
+ + ((int128_t)a[ 8]) * a[ 8];
+ int128_t t17 = (((int128_t)a[ 5]) * a[12]
+ + ((int128_t)a[ 6]) * a[11]
+ + ((int128_t)a[ 7]) * a[10]
+ + ((int128_t)a[ 8]) * a[ 9]) * 2;
+ int128_t t18 = (((int128_t)a[ 6]) * a[12]
+ + ((int128_t)a[ 7]) * a[11]
+ + ((int128_t)a[ 8]) * a[10]) * 2
+ + ((int128_t)a[ 9]) * a[ 9];
+ int128_t t19 = (((int128_t)a[ 7]) * a[12]
+ + ((int128_t)a[ 8]) * a[11]
+ + ((int128_t)a[ 9]) * a[10]) * 2;
+ int128_t t20 = (((int128_t)a[ 8]) * a[12]
+ + ((int128_t)a[ 9]) * a[11]) * 2
+ + ((int128_t)a[10]) * a[10];
+ int128_t t21 = (((int128_t)a[ 9]) * a[12]
+ + ((int128_t)a[10]) * a[11]) * 2;
+ int128_t t22 = (((int128_t)a[10]) * a[12]) * 2
+ + ((int128_t)a[11]) * a[11];
+ int128_t t23 = (((int128_t)a[11]) * a[12]) * 2;
+ int128_t t24 = ((int128_t)a[12]) * a[12];
+
+ t1 += t0 >> 53; r[ 0] = t0 & 0x1fffffffffffffL;
+ t2 += t1 >> 53; r[ 1] = t1 & 0x1fffffffffffffL;
+ t3 += t2 >> 53; r[ 2] = t2 & 0x1fffffffffffffL;
+ t4 += t3 >> 53; r[ 3] = t3 & 0x1fffffffffffffL;
+ t5 += t4 >> 53; r[ 4] = t4 & 0x1fffffffffffffL;
+ t6 += t5 >> 53; r[ 5] = t5 & 0x1fffffffffffffL;
+ t7 += t6 >> 53; r[ 6] = t6 & 0x1fffffffffffffL;
+ t8 += t7 >> 53; r[ 7] = t7 & 0x1fffffffffffffL;
+ t9 += t8 >> 53; r[ 8] = t8 & 0x1fffffffffffffL;
+ t10 += t9 >> 53; r[ 9] = t9 & 0x1fffffffffffffL;
+ t11 += t10 >> 53; r[10] = t10 & 0x1fffffffffffffL;
+ t12 += t11 >> 53; r[11] = t11 & 0x1fffffffffffffL;
+ t13 += t12 >> 53; r[12] = t12 & 0x1fffffffffffffL;
+ t14 += t13 >> 53; r[13] = t13 & 0x1fffffffffffffL;
+ t15 += t14 >> 53; r[14] = t14 & 0x1fffffffffffffL;
+ t16 += t15 >> 53; r[15] = t15 & 0x1fffffffffffffL;
+ t17 += t16 >> 53; r[16] = t16 & 0x1fffffffffffffL;
+ t18 += t17 >> 53; r[17] = t17 & 0x1fffffffffffffL;
+ t19 += t18 >> 53; r[18] = t18 & 0x1fffffffffffffL;
+ t20 += t19 >> 53; r[19] = t19 & 0x1fffffffffffffL;
+ t21 += t20 >> 53; r[20] = t20 & 0x1fffffffffffffL;
+ t22 += t21 >> 53; r[21] = t21 & 0x1fffffffffffffL;
+ t23 += t22 >> 53; r[22] = t22 & 0x1fffffffffffffL;
+ t24 += t23 >> 53; r[23] = t23 & 0x1fffffffffffffL;
+ r[25] = (sp_digit)(t24 >> 53);
+ r[24] = t24 & 0x1fffffffffffffL;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_13(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ r[ 0] = a[ 0] + b[ 0];
+ r[ 1] = a[ 1] + b[ 1];
+ r[ 2] = a[ 2] + b[ 2];
+ r[ 3] = a[ 3] + b[ 3];
+ r[ 4] = a[ 4] + b[ 4];
+ r[ 5] = a[ 5] + b[ 5];
+ r[ 6] = a[ 6] + b[ 6];
+ r[ 7] = a[ 7] + b[ 7];
+ r[ 8] = a[ 8] + b[ 8];
+ r[ 9] = a[ 9] + b[ 9];
+ r[10] = a[10] + b[10];
+ r[11] = a[11] + b[11];
+ r[12] = a[12] + b[12];
+
+ return 0;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_26(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 24; i += 8) {
+ r[i + 0] = a[i + 0] - b[i + 0];
+ r[i + 1] = a[i + 1] - b[i + 1];
+ r[i + 2] = a[i + 2] - b[i + 2];
+ r[i + 3] = a[i + 3] - b[i + 3];
+ r[i + 4] = a[i + 4] - b[i + 4];
+ r[i + 5] = a[i + 5] - b[i + 5];
+ r[i + 6] = a[i + 6] - b[i + 6];
+ r[i + 7] = a[i + 7] - b[i + 7];
+ }
+ r[24] = a[24] - b[24];
+ r[25] = a[25] - b[25];
+
+ return 0;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_26(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 24; i += 8) {
+ r[i + 0] = a[i + 0] + b[i + 0];
+ r[i + 1] = a[i + 1] + b[i + 1];
+ r[i + 2] = a[i + 2] + b[i + 2];
+ r[i + 3] = a[i + 3] + b[i + 3];
+ r[i + 4] = a[i + 4] + b[i + 4];
+ r[i + 5] = a[i + 5] + b[i + 5];
+ r[i + 6] = a[i + 6] + b[i + 6];
+ r[i + 7] = a[i + 7] + b[i + 7];
+ }
+ r[24] = a[24] + b[24];
+ r[25] = a[25] + b[25];
+
+ return 0;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_39(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit p0[26];
+ sp_digit p1[26];
+ sp_digit p2[26];
+ sp_digit p3[26];
+ sp_digit p4[26];
+ sp_digit p5[26];
+ sp_digit t0[26];
+ sp_digit t1[26];
+ sp_digit t2[26];
+ sp_digit a0[13];
+ sp_digit a1[13];
+ sp_digit a2[13];
+ sp_digit b0[13];
+ sp_digit b1[13];
+ sp_digit b2[13];
+ (void)sp_4096_add_13(a0, a, &a[13]);
+ (void)sp_4096_add_13(b0, b, &b[13]);
+ (void)sp_4096_add_13(a1, &a[13], &a[26]);
+ (void)sp_4096_add_13(b1, &b[13], &b[26]);
+ (void)sp_4096_add_13(a2, a0, &a[26]);
+ (void)sp_4096_add_13(b2, b0, &b[26]);
+ sp_4096_mul_13(p0, a, b);
+ sp_4096_mul_13(p2, &a[13], &b[13]);
+ sp_4096_mul_13(p4, &a[26], &b[26]);
+ sp_4096_mul_13(p1, a0, b0);
+ sp_4096_mul_13(p3, a1, b1);
+ sp_4096_mul_13(p5, a2, b2);
+ XMEMSET(r, 0, sizeof(*r)*2U*39U);
+ (void)sp_4096_sub_26(t0, p3, p2);
+ (void)sp_4096_sub_26(t1, p1, p2);
+ (void)sp_4096_sub_26(t2, p5, t0);
+ (void)sp_4096_sub_26(t2, t2, t1);
+ (void)sp_4096_sub_26(t0, t0, p4);
+ (void)sp_4096_sub_26(t1, t1, p0);
+ (void)sp_4096_add_26(r, r, p0);
+ (void)sp_4096_add_26(&r[13], &r[13], t1);
+ (void)sp_4096_add_26(&r[26], &r[26], t2);
+ (void)sp_4096_add_26(&r[39], &r[39], t0);
+ (void)sp_4096_add_26(&r[52], &r[52], p4);
+}
+
+/* Square a into r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_39(sp_digit* r, const sp_digit* a)
+{
+ sp_digit p0[26];
+ sp_digit p1[26];
+ sp_digit p2[26];
+ sp_digit p3[26];
+ sp_digit p4[26];
+ sp_digit p5[26];
+ sp_digit t0[26];
+ sp_digit t1[26];
+ sp_digit t2[26];
+ sp_digit a0[13];
+ sp_digit a1[13];
+ sp_digit a2[13];
+ (void)sp_4096_add_13(a0, a, &a[13]);
+ (void)sp_4096_add_13(a1, &a[13], &a[26]);
+ (void)sp_4096_add_13(a2, a0, &a[26]);
+ sp_4096_sqr_13(p0, a);
+ sp_4096_sqr_13(p2, &a[13]);
+ sp_4096_sqr_13(p4, &a[26]);
+ sp_4096_sqr_13(p1, a0);
+ sp_4096_sqr_13(p3, a1);
+ sp_4096_sqr_13(p5, a2);
+ XMEMSET(r, 0, sizeof(*r)*2U*39U);
+ (void)sp_4096_sub_26(t0, p3, p2);
+ (void)sp_4096_sub_26(t1, p1, p2);
+ (void)sp_4096_sub_26(t2, p5, t0);
+ (void)sp_4096_sub_26(t2, t2, t1);
+ (void)sp_4096_sub_26(t0, t0, p4);
+ (void)sp_4096_sub_26(t1, t1, p0);
+ (void)sp_4096_add_26(r, r, p0);
+ (void)sp_4096_add_26(&r[13], &r[13], t1);
+ (void)sp_4096_add_26(&r[26], &r[26], t2);
+ (void)sp_4096_add_26(&r[39], &r[39], t0);
+ (void)sp_4096_add_26(&r[52], &r[52], p4);
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_39(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 32; i += 8) {
+ r[i + 0] = a[i + 0] + b[i + 0];
+ r[i + 1] = a[i + 1] + b[i + 1];
+ r[i + 2] = a[i + 2] + b[i + 2];
+ r[i + 3] = a[i + 3] + b[i + 3];
+ r[i + 4] = a[i + 4] + b[i + 4];
+ r[i + 5] = a[i + 5] + b[i + 5];
+ r[i + 6] = a[i + 6] + b[i + 6];
+ r[i + 7] = a[i + 7] + b[i + 7];
+ }
+ r[32] = a[32] + b[32];
+ r[33] = a[33] + b[33];
+ r[34] = a[34] + b[34];
+ r[35] = a[35] + b[35];
+ r[36] = a[36] + b[36];
+ r[37] = a[37] + b[37];
+ r[38] = a[38] + b[38];
+
+ return 0;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_78(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 72; i += 8) {
+ r[i + 0] = a[i + 0] + b[i + 0];
+ r[i + 1] = a[i + 1] + b[i + 1];
+ r[i + 2] = a[i + 2] + b[i + 2];
+ r[i + 3] = a[i + 3] + b[i + 3];
+ r[i + 4] = a[i + 4] + b[i + 4];
+ r[i + 5] = a[i + 5] + b[i + 5];
+ r[i + 6] = a[i + 6] + b[i + 6];
+ r[i + 7] = a[i + 7] + b[i + 7];
+ }
+ r[72] = a[72] + b[72];
+ r[73] = a[73] + b[73];
+ r[74] = a[74] + b[74];
+ r[75] = a[75] + b[75];
+ r[76] = a[76] + b[76];
+ r[77] = a[77] + b[77];
+
+ return 0;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_78(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 72; i += 8) {
+ r[i + 0] = a[i + 0] - b[i + 0];
+ r[i + 1] = a[i + 1] - b[i + 1];
+ r[i + 2] = a[i + 2] - b[i + 2];
+ r[i + 3] = a[i + 3] - b[i + 3];
+ r[i + 4] = a[i + 4] - b[i + 4];
+ r[i + 5] = a[i + 5] - b[i + 5];
+ r[i + 6] = a[i + 6] - b[i + 6];
+ r[i + 7] = a[i + 7] - b[i + 7];
+ }
+ r[72] = a[72] - b[72];
+ r[73] = a[73] - b[73];
+ r[74] = a[74] - b[74];
+ r[75] = a[75] - b[75];
+ r[76] = a[76] - b[76];
+ r[77] = a[77] - b[77];
+
+ return 0;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_78(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[78];
+ sp_digit* a1 = z1;
+ sp_digit b1[39];
+ sp_digit* z2 = r + 78;
+ (void)sp_4096_add_39(a1, a, &a[39]);
+ (void)sp_4096_add_39(b1, b, &b[39]);
+ sp_4096_mul_39(z2, &a[39], &b[39]);
+ sp_4096_mul_39(z0, a, b);
+ sp_4096_mul_39(z1, a1, b1);
+ (void)sp_4096_sub_78(z1, z1, z2);
+ (void)sp_4096_sub_78(z1, z1, z0);
+ (void)sp_4096_add_78(r + 39, r + 39, z1);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_78(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[78];
+ sp_digit* a1 = z1;
+ sp_digit* z2 = r + 78;
+ (void)sp_4096_add_39(a1, a, &a[39]);
+ sp_4096_sqr_39(z2, &a[39]);
+ sp_4096_sqr_39(z0, a);
+ sp_4096_sqr_39(z1, a1);
+ (void)sp_4096_sub_78(z1, z1, z2);
+ (void)sp_4096_sub_78(z1, z1, z0);
+ (void)sp_4096_add_78(r + 39, r + 39, z1);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_78(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 78; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_78(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 78; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_78(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i, j, k;
+ int128_t c;
+
+ c = ((int128_t)a[77]) * b[77];
+ r[155] = (sp_digit)(c >> 53);
+ c = (c & 0x1fffffffffffffL) << 53;
+ for (k = 153; k >= 0; k--) {
+ for (i = 77; i >= 0; i--) {
+ j = k - i;
+ if (j >= 78) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int128_t)a[i]) * b[j];
+ }
+ r[k + 2] += c >> 106;
+ r[k + 1] = (c >> 53) & 0x1fffffffffffffL;
+ c = (c & 0x1fffffffffffffL) << 53;
+ }
+ r[0] = (sp_digit)(c >> 53);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_78(sp_digit* r, const sp_digit* a)
+{
+ int i, j, k;
+ int128_t c;
+
+ c = ((int128_t)a[77]) * a[77];
+ r[155] = (sp_digit)(c >> 53);
+ c = (c & 0x1fffffffffffffL) << 53;
+ for (k = 153; k >= 0; k--) {
+ for (i = 77; i >= 0; i--) {
+ j = k - i;
+ if (j >= 78 || i <= j) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int128_t)a[i]) * a[j] * 2;
+ }
+ if (i == j) {
+ c += ((int128_t)a[i]) * a[i];
+ }
+
+ r[k + 2] += c >> 106;
+ r[k + 1] = (c >> 53) & 0x1fffffffffffffL;
+ c = (c & 0x1fffffffffffffL) << 53;
+ }
+ r[0] = (sp_digit)(c >> 53);
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D)
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_39(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 39; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_39(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 39; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_39(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 32; i += 8) {
+ r[i + 0] = a[i + 0] - b[i + 0];
+ r[i + 1] = a[i + 1] - b[i + 1];
+ r[i + 2] = a[i + 2] - b[i + 2];
+ r[i + 3] = a[i + 3] - b[i + 3];
+ r[i + 4] = a[i + 4] - b[i + 4];
+ r[i + 5] = a[i + 5] - b[i + 5];
+ r[i + 6] = a[i + 6] - b[i + 6];
+ r[i + 7] = a[i + 7] - b[i + 7];
+ }
+ r[32] = a[32] - b[32];
+ r[33] = a[33] - b[33];
+ r[34] = a[34] - b[34];
+ r[35] = a[35] - b[35];
+ r[36] = a[36] - b[36];
+ r[37] = a[37] - b[37];
+ r[38] = a[38] - b[38];
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_39(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i, j, k;
+ int128_t c;
+
+ c = ((int128_t)a[38]) * b[38];
+ r[77] = (sp_digit)(c >> 53);
+ c = (c & 0x1fffffffffffffL) << 53;
+ for (k = 75; k >= 0; k--) {
+ for (i = 38; i >= 0; i--) {
+ j = k - i;
+ if (j >= 39) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int128_t)a[i]) * b[j];
+ }
+ r[k + 2] += c >> 106;
+ r[k + 1] = (c >> 53) & 0x1fffffffffffffL;
+ c = (c & 0x1fffffffffffffL) << 53;
+ }
+ r[0] = (sp_digit)(c >> 53);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_39(sp_digit* r, const sp_digit* a)
+{
+ int i, j, k;
+ int128_t c;
+
+ c = ((int128_t)a[38]) * a[38];
+ r[77] = (sp_digit)(c >> 53);
+ c = (c & 0x1fffffffffffffL) << 53;
+ for (k = 75; k >= 0; k--) {
+ for (i = 38; i >= 0; i--) {
+ j = k - i;
+ if (j >= 39 || i <= j) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int128_t)a[i]) * a[j] * 2;
+ }
+ if (i == j) {
+ c += ((int128_t)a[i]) * a[i];
+ }
+
+ r[k + 2] += c >> 106;
+ r[k + 1] = (c >> 53) & 0x1fffffffffffffL;
+ c = (c & 0x1fffffffffffffL) << 53;
+ }
+ r[0] = (sp_digit)(c >> 53);
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* WOLFSSL_HAVE_SP_RSA && !SP_RSA_PRIVATE_EXP_D */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**64 */
+ x &= 0x1fffffffffffffL;
+
+ /* rho = -1/m mod b */
+ *rho = (1L << 53) - x;
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_4096_mul_d_78(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int128_t tb = b;
+ int128_t t = 0;
+ int i;
+
+ for (i = 0; i < 78; i++) {
+ t += tb * a[i];
+ r[i] = t & 0x1fffffffffffffL;
+ t >>= 53;
+ }
+ r[78] = (sp_digit)t;
+#else
+ int128_t tb = b;
+ int128_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] = t[0] & 0x1fffffffffffffL;
+ for (i = 0; i < 72; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL);
+ t[2] = tb * a[i+2];
+ r[i+2] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL);
+ t[3] = tb * a[i+3];
+ r[i+3] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL);
+ t[4] = tb * a[i+4];
+ r[i+4] = (sp_digit)(t[3] >> 53) + (t[4] & 0x1fffffffffffffL);
+ t[5] = tb * a[i+5];
+ r[i+5] = (sp_digit)(t[4] >> 53) + (t[5] & 0x1fffffffffffffL);
+ t[6] = tb * a[i+6];
+ r[i+6] = (sp_digit)(t[5] >> 53) + (t[6] & 0x1fffffffffffffL);
+ t[7] = tb * a[i+7];
+ r[i+7] = (sp_digit)(t[6] >> 53) + (t[7] & 0x1fffffffffffffL);
+ t[0] = tb * a[i+8];
+ r[i+8] = (sp_digit)(t[7] >> 53) + (t[0] & 0x1fffffffffffffL);
+ }
+ t[1] = tb * a[73];
+ r[73] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL);
+ t[2] = tb * a[74];
+ r[74] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL);
+ t[3] = tb * a[75];
+ r[75] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL);
+ t[4] = tb * a[76];
+ r[76] = (sp_digit)(t[3] >> 53) + (t[4] & 0x1fffffffffffffL);
+ t[5] = tb * a[77];
+ r[77] = (sp_digit)(t[4] >> 53) + (t[5] & 0x1fffffffffffffL);
+ r[78] = (sp_digit)(t[5] >> 53);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 4096 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_4096_mont_norm_39(sp_digit* r, const sp_digit* m)
+{
+ /* Set r = 2^n - 1. */
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<38; i++) {
+ r[i] = 0x1fffffffffffffL;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 32; i += 8) {
+ r[i + 0] = 0x1fffffffffffffL;
+ r[i + 1] = 0x1fffffffffffffL;
+ r[i + 2] = 0x1fffffffffffffL;
+ r[i + 3] = 0x1fffffffffffffL;
+ r[i + 4] = 0x1fffffffffffffL;
+ r[i + 5] = 0x1fffffffffffffL;
+ r[i + 6] = 0x1fffffffffffffL;
+ r[i + 7] = 0x1fffffffffffffL;
+ }
+ r[32] = 0x1fffffffffffffL;
+ r[33] = 0x1fffffffffffffL;
+ r[34] = 0x1fffffffffffffL;
+ r[35] = 0x1fffffffffffffL;
+ r[36] = 0x1fffffffffffffL;
+ r[37] = 0x1fffffffffffffL;
+#endif
+ r[38] = 0x3ffffffffL;
+
+ /* r = (2^n - 1) mod n */
+ (void)sp_4096_sub_39(r, r, m);
+
+ /* Add one so r = 2^n mod m */
+ r[0] += 1;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_4096_cmp_39(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=38; i>=0; i--) {
+ r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#else
+ int i;
+
+ r |= (a[38] - b[38]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[37] - b[37]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[36] - b[36]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[35] - b[35]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[34] - b[34]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[33] - b[33]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[32] - b[32]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ for (i = 24; i >= 0; i -= 8) {
+ r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#endif /* WOLFSSL_SP_SMALL */
+
+ return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static void sp_4096_cond_sub_39(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 39; i++) {
+ r[i] = a[i] - (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 32; i += 8) {
+ r[i + 0] = a[i + 0] - (b[i + 0] & m);
+ r[i + 1] = a[i + 1] - (b[i + 1] & m);
+ r[i + 2] = a[i + 2] - (b[i + 2] & m);
+ r[i + 3] = a[i + 3] - (b[i + 3] & m);
+ r[i + 4] = a[i + 4] - (b[i + 4] & m);
+ r[i + 5] = a[i + 5] - (b[i + 5] & m);
+ r[i + 6] = a[i + 6] - (b[i + 6] & m);
+ r[i + 7] = a[i + 7] - (b[i + 7] & m);
+ }
+ r[32] = a[32] - (b[32] & m);
+ r[33] = a[33] - (b[33] & m);
+ r[34] = a[34] - (b[34] & m);
+ r[35] = a[35] - (b[35] & m);
+ r[36] = a[36] - (b[36] & m);
+ r[37] = a[37] - (b[37] & m);
+ r[38] = a[38] - (b[38] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_4096_mul_add_39(sp_digit* r, const sp_digit* a,
+ const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int128_t tb = b;
+ int128_t t = 0;
+ int i;
+
+ for (i = 0; i < 39; i++) {
+ t += (tb * a[i]) + r[i];
+ r[i] = t & 0x1fffffffffffffL;
+ t >>= 53;
+ }
+ r[39] += t;
+#else
+ int128_t tb = b;
+ int128_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffffffffffffL);
+ for (i = 0; i < 32; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL));
+ t[2] = tb * a[i+2];
+ r[i+2] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL));
+ t[3] = tb * a[i+3];
+ r[i+3] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL));
+ t[4] = tb * a[i+4];
+ r[i+4] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL));
+ t[5] = tb * a[i+5];
+ r[i+5] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL));
+ t[6] = tb * a[i+6];
+ r[i+6] += (sp_digit)((t[5] >> 53) + (t[6] & 0x1fffffffffffffL));
+ t[7] = tb * a[i+7];
+ r[i+7] += (sp_digit)((t[6] >> 53) + (t[7] & 0x1fffffffffffffL));
+ t[0] = tb * a[i+8];
+ r[i+8] += (sp_digit)((t[7] >> 53) + (t[0] & 0x1fffffffffffffL));
+ }
+ t[1] = tb * a[33]; r[33] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL));
+ t[2] = tb * a[34]; r[34] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL));
+ t[3] = tb * a[35]; r[35] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL));
+ t[4] = tb * a[36]; r[36] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL));
+ t[5] = tb * a[37]; r[37] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL));
+ t[6] = tb * a[38]; r[38] += (sp_digit)((t[5] >> 53) + (t[6] & 0x1fffffffffffffL));
+ r[39] += (sp_digit)(t[6] >> 53);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 53.
+ *
+ * a Array of sp_digit to normalize.
+ */
+static void sp_4096_norm_39(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ for (i = 0; i < 38; i++) {
+ a[i+1] += a[i] >> 53;
+ a[i] &= 0x1fffffffffffffL;
+ }
+#else
+ int i;
+ for (i = 0; i < 32; i += 8) {
+ a[i+1] += a[i+0] >> 53; a[i+0] &= 0x1fffffffffffffL;
+ a[i+2] += a[i+1] >> 53; a[i+1] &= 0x1fffffffffffffL;
+ a[i+3] += a[i+2] >> 53; a[i+2] &= 0x1fffffffffffffL;
+ a[i+4] += a[i+3] >> 53; a[i+3] &= 0x1fffffffffffffL;
+ a[i+5] += a[i+4] >> 53; a[i+4] &= 0x1fffffffffffffL;
+ a[i+6] += a[i+5] >> 53; a[i+5] &= 0x1fffffffffffffL;
+ a[i+7] += a[i+6] >> 53; a[i+6] &= 0x1fffffffffffffL;
+ a[i+8] += a[i+7] >> 53; a[i+7] &= 0x1fffffffffffffL;
+ a[i+9] += a[i+8] >> 53; a[i+8] &= 0x1fffffffffffffL;
+ }
+ a[32+1] += a[32] >> 53;
+ a[32] &= 0x1fffffffffffffL;
+ a[33+1] += a[33] >> 53;
+ a[33] &= 0x1fffffffffffffL;
+ a[34+1] += a[34] >> 53;
+ a[34] &= 0x1fffffffffffffL;
+ a[35+1] += a[35] >> 53;
+ a[35] &= 0x1fffffffffffffL;
+ a[36+1] += a[36] >> 53;
+ a[36] &= 0x1fffffffffffffL;
+ a[37+1] += a[37] >> 53;
+ a[37] &= 0x1fffffffffffffL;
+#endif
+}
+
+/* Shift the result in the high 2048 bits down to the bottom.
+ *
+ * r A single precision number.
+ * a A single precision number.
+ */
+static void sp_4096_mont_shift_39(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ int128_t n = a[38] >> 34;
+ n += ((int128_t)a[39]) << 19;
+
+ for (i = 0; i < 38; i++) {
+ r[i] = n & 0x1fffffffffffffL;
+ n >>= 53;
+ n += ((int128_t)a[40 + i]) << 19;
+ }
+ r[38] = (sp_digit)n;
+#else
+ int i;
+ int128_t n = a[38] >> 34;
+ n += ((int128_t)a[39]) << 19;
+ for (i = 0; i < 32; i += 8) {
+ r[i + 0] = n & 0x1fffffffffffffL;
+ n >>= 53; n += ((int128_t)a[i + 40]) << 19;
+ r[i + 1] = n & 0x1fffffffffffffL;
+ n >>= 53; n += ((int128_t)a[i + 41]) << 19;
+ r[i + 2] = n & 0x1fffffffffffffL;
+ n >>= 53; n += ((int128_t)a[i + 42]) << 19;
+ r[i + 3] = n & 0x1fffffffffffffL;
+ n >>= 53; n += ((int128_t)a[i + 43]) << 19;
+ r[i + 4] = n & 0x1fffffffffffffL;
+ n >>= 53; n += ((int128_t)a[i + 44]) << 19;
+ r[i + 5] = n & 0x1fffffffffffffL;
+ n >>= 53; n += ((int128_t)a[i + 45]) << 19;
+ r[i + 6] = n & 0x1fffffffffffffL;
+ n >>= 53; n += ((int128_t)a[i + 46]) << 19;
+ r[i + 7] = n & 0x1fffffffffffffL;
+ n >>= 53; n += ((int128_t)a[i + 47]) << 19;
+ }
+ r[32] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[72]) << 19;
+ r[33] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[73]) << 19;
+ r[34] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[74]) << 19;
+ r[35] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[75]) << 19;
+ r[36] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[76]) << 19;
+ r[37] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[77]) << 19;
+ r[38] = (sp_digit)n;
+#endif /* WOLFSSL_SP_SMALL */
+ XMEMSET(&r[39], 0, sizeof(*r) * 39U);
+}
+
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_4096_mont_reduce_39(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+ int i;
+ sp_digit mu;
+
+ sp_4096_norm_39(a + 39);
+
+ for (i=0; i<38; i++) {
+ mu = (a[i] * mp) & 0x1fffffffffffffL;
+ sp_4096_mul_add_39(a+i, m, mu);
+ a[i+1] += a[i] >> 53;
+ }
+ mu = (a[i] * mp) & 0x3ffffffffL;
+ sp_4096_mul_add_39(a+i, m, mu);
+ a[i+1] += a[i] >> 53;
+ a[i] &= 0x1fffffffffffffL;
+
+ sp_4096_mont_shift_39(a, a);
+ sp_4096_cond_sub_39(a, a, m, 0 - (((a[38] >> 34) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_4096_norm_39(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_4096_mont_mul_39(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_4096_mul_39(r, a, b);
+ sp_4096_mont_reduce_39(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_4096_mont_sqr_39(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_4096_sqr_39(r, a);
+ sp_4096_mont_reduce_39(r, m, mp);
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_4096_mul_d_39(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int128_t tb = b;
+ int128_t t = 0;
+ int i;
+
+ for (i = 0; i < 39; i++) {
+ t += tb * a[i];
+ r[i] = t & 0x1fffffffffffffL;
+ t >>= 53;
+ }
+ r[39] = (sp_digit)t;
+#else
+ int128_t tb = b;
+ int128_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] = t[0] & 0x1fffffffffffffL;
+ for (i = 0; i < 32; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL);
+ t[2] = tb * a[i+2];
+ r[i+2] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL);
+ t[3] = tb * a[i+3];
+ r[i+3] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL);
+ t[4] = tb * a[i+4];
+ r[i+4] = (sp_digit)(t[3] >> 53) + (t[4] & 0x1fffffffffffffL);
+ t[5] = tb * a[i+5];
+ r[i+5] = (sp_digit)(t[4] >> 53) + (t[5] & 0x1fffffffffffffL);
+ t[6] = tb * a[i+6];
+ r[i+6] = (sp_digit)(t[5] >> 53) + (t[6] & 0x1fffffffffffffL);
+ t[7] = tb * a[i+7];
+ r[i+7] = (sp_digit)(t[6] >> 53) + (t[7] & 0x1fffffffffffffL);
+ t[0] = tb * a[i+8];
+ r[i+8] = (sp_digit)(t[7] >> 53) + (t[0] & 0x1fffffffffffffL);
+ }
+ t[1] = tb * a[33];
+ r[33] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL);
+ t[2] = tb * a[34];
+ r[34] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL);
+ t[3] = tb * a[35];
+ r[35] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL);
+ t[4] = tb * a[36];
+ r[36] = (sp_digit)(t[3] >> 53) + (t[4] & 0x1fffffffffffffL);
+ t[5] = tb * a[37];
+ r[37] = (sp_digit)(t[4] >> 53) + (t[5] & 0x1fffffffffffffL);
+ t[6] = tb * a[38];
+ r[38] = (sp_digit)(t[5] >> 53) + (t[6] & 0x1fffffffffffffL);
+ r[39] = (sp_digit)(t[6] >> 53);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static void sp_4096_cond_add_39(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 39; i++) {
+ r[i] = a[i] + (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 32; i += 8) {
+ r[i + 0] = a[i + 0] + (b[i + 0] & m);
+ r[i + 1] = a[i + 1] + (b[i + 1] & m);
+ r[i + 2] = a[i + 2] + (b[i + 2] & m);
+ r[i + 3] = a[i + 3] + (b[i + 3] & m);
+ r[i + 4] = a[i + 4] + (b[i + 4] & m);
+ r[i + 5] = a[i + 5] + (b[i + 5] & m);
+ r[i + 6] = a[i + 6] + (b[i + 6] & m);
+ r[i + 7] = a[i + 7] + (b[i + 7] & m);
+ }
+ r[32] = a[32] + (b[32] & m);
+ r[33] = a[33] + (b[33] & m);
+ r[34] = a[34] + (b[34] & m);
+ r[35] = a[35] + (b[35] & m);
+ r[36] = a[36] + (b[36] & m);
+ r[37] = a[37] + (b[37] & m);
+ r[38] = a[38] + (b[38] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_39(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 39; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#endif
+SP_NOINLINE static void sp_4096_rshift_39(sp_digit* r, sp_digit* a, byte n)
+{
+ int i;
+
+#ifdef WOLFSSL_SP_SMALL
+ for (i=0; i<38; i++) {
+ r[i] = ((a[i] >> n) | (a[i + 1] << (53 - n))) & 0x1fffffffffffffL;
+ }
+#else
+ for (i=0; i<32; i += 8) {
+ r[i+0] = ((a[i+0] >> n) | (a[i+1] << (53 - n))) & 0x1fffffffffffffL;
+ r[i+1] = ((a[i+1] >> n) | (a[i+2] << (53 - n))) & 0x1fffffffffffffL;
+ r[i+2] = ((a[i+2] >> n) | (a[i+3] << (53 - n))) & 0x1fffffffffffffL;
+ r[i+3] = ((a[i+3] >> n) | (a[i+4] << (53 - n))) & 0x1fffffffffffffL;
+ r[i+4] = ((a[i+4] >> n) | (a[i+5] << (53 - n))) & 0x1fffffffffffffL;
+ r[i+5] = ((a[i+5] >> n) | (a[i+6] << (53 - n))) & 0x1fffffffffffffL;
+ r[i+6] = ((a[i+6] >> n) | (a[i+7] << (53 - n))) & 0x1fffffffffffffL;
+ r[i+7] = ((a[i+7] >> n) | (a[i+8] << (53 - n))) & 0x1fffffffffffffL;
+ }
+ r[32] = ((a[32] >> n) | (a[33] << (53 - n))) & 0x1fffffffffffffL;
+ r[33] = ((a[33] >> n) | (a[34] << (53 - n))) & 0x1fffffffffffffL;
+ r[34] = ((a[34] >> n) | (a[35] << (53 - n))) & 0x1fffffffffffffL;
+ r[35] = ((a[35] >> n) | (a[36] << (53 - n))) & 0x1fffffffffffffL;
+ r[36] = ((a[36] >> n) | (a[37] << (53 - n))) & 0x1fffffffffffffL;
+ r[37] = ((a[37] >> n) | (a[38] << (53 - n))) & 0x1fffffffffffffL;
+#endif
+ r[38] = a[38] >> n;
+}
+
+#ifdef WOLFSSL_SP_DIV_64
+static WC_INLINE sp_digit sp_4096_div_word_39(sp_digit d1, sp_digit d0,
+ sp_digit dv)
+{
+ sp_digit d, r, t;
+
+ /* All 53 bits from d1 and top 10 bits from d0. */
+ d = (d1 << 10) | (d0 >> 43);
+ r = d / dv;
+ d -= r * dv;
+ /* Up to 11 bits in r */
+ /* Next 10 bits from d0. */
+ r <<= 10;
+ d <<= 10;
+ d |= (d0 >> 33) & ((1 << 10) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 21 bits in r */
+ /* Next 10 bits from d0. */
+ r <<= 10;
+ d <<= 10;
+ d |= (d0 >> 23) & ((1 << 10) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 31 bits in r */
+ /* Next 10 bits from d0. */
+ r <<= 10;
+ d <<= 10;
+ d |= (d0 >> 13) & ((1 << 10) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 41 bits in r */
+ /* Next 10 bits from d0. */
+ r <<= 10;
+ d <<= 10;
+ d |= (d0 >> 3) & ((1 << 10) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 51 bits in r */
+ /* Remaining 3 bits from d0. */
+ r <<= 3;
+ d <<= 3;
+ d |= d0 & ((1 << 3) - 1);
+ t = d / dv;
+ r += t;
+
+ return r;
+}
+#endif /* WOLFSSL_SP_DIV_64 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_4096_div_39(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ int i;
+#ifndef WOLFSSL_SP_DIV_64
+ int128_t d1;
+#endif
+ sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* td;
+#else
+ sp_digit t1d[78 + 1], t2d[39 + 1], sdd[39 + 1];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ sp_digit* sd;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 39 + 3), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ (void)m;
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = td;
+ t2 = td + 78 + 1;
+ sd = t2 + 39 + 1;
+#else
+ t1 = t1d;
+ t2 = t2d;
+ sd = sdd;
+#endif
+
+ sp_4096_mul_d_39(sd, d, 1L << 19);
+ sp_4096_mul_d_78(t1, a, 1L << 19);
+ dv = sd[38];
+ for (i=39; i>=0; i--) {
+ t1[39 + i] += t1[39 + i - 1] >> 53;
+ t1[39 + i - 1] &= 0x1fffffffffffffL;
+#ifndef WOLFSSL_SP_DIV_64
+ d1 = t1[39 + i];
+ d1 <<= 53;
+ d1 += t1[39 + i - 1];
+ r1 = (sp_digit)(d1 / dv);
+#else
+ r1 = sp_4096_div_word_39(t1[39 + i], t1[39 + i - 1], dv);
+#endif
+
+ sp_4096_mul_d_39(t2, sd, r1);
+ (void)sp_4096_sub_39(&t1[i], &t1[i], t2);
+ t1[39 + i] -= t2[39];
+ t1[39 + i] += t1[39 + i - 1] >> 53;
+ t1[39 + i - 1] &= 0x1fffffffffffffL;
+ r1 = (((-t1[39 + i]) << 53) - t1[39 + i - 1]) / dv;
+ r1 -= t1[39 + i];
+ sp_4096_mul_d_39(t2, sd, r1);
+ (void)sp_4096_add_39(&t1[i], &t1[i], t2);
+ t1[39 + i] += t1[39 + i - 1] >> 53;
+ t1[39 + i - 1] &= 0x1fffffffffffffL;
+ }
+ t1[39 - 1] += t1[39 - 2] >> 53;
+ t1[39 - 2] &= 0x1fffffffffffffL;
+ r1 = t1[39 - 1] / dv;
+
+ sp_4096_mul_d_39(t2, sd, r1);
+ sp_4096_sub_39(t1, t1, t2);
+ XMEMCPY(r, t1, sizeof(*r) * 2U * 39U);
+ for (i=0; i<37; i++) {
+ r[i+1] += r[i] >> 53;
+ r[i] &= 0x1fffffffffffffL;
+ }
+ sp_4096_cond_add_39(r, r, sd, 0 - ((r[38] < 0) ?
+ (sp_digit)1 : (sp_digit)0));
+
+ sp_4096_norm_39(r);
+ sp_4096_rshift_39(r, r, 19);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_4096_mod_39(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_4096_div_39(a, m, NULL, r);
+}
+
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_39(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+ const sp_digit* m, int reduceA)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* td;
+ sp_digit* t[3];
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 39 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(td, 0, sizeof(*td) * 3U * 39U * 2U);
+
+ norm = t[0] = td;
+ t[1] = &td[39 * 2];
+ t[2] = &td[2 * 39 * 2];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_39(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_4096_mod_39(t[1], a, m);
+ }
+ else {
+ XMEMCPY(t[1], a, sizeof(sp_digit) * 39U);
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_4096_mul_39(t[1], t[1], norm);
+ err = sp_4096_mod_39(t[1], t[1], m);
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 53;
+ c = bits % 53;
+ n = e[i--] << (53 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 53;
+ }
+
+ y = (n >> 52) & 1;
+ n <<= 1;
+
+ sp_4096_mont_mul_39(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])),
+ sizeof(*t[2]) * 39 * 2);
+ sp_4096_mont_sqr_39(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2],
+ sizeof(*t[2]) * 39 * 2);
+ }
+
+ sp_4096_mont_reduce_39(t[0], m, mp);
+ n = sp_4096_cmp_39(t[0], m);
+ sp_4096_cond_sub_39(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(*r) * 39 * 2);
+
+ }
+
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+
+ return err;
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[3][78];
+#else
+ sp_digit* td;
+ sp_digit* t[3];
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 39 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ t[0] = td;
+ t[1] = &td[39 * 2];
+ t[2] = &td[2 * 39 * 2];
+#endif
+ norm = t[0];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_39(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_4096_mod_39(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_4096_mul_39(t[1], t[1], norm);
+ err = sp_4096_mod_39(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_4096_mul_39(t[1], a, norm);
+ err = sp_4096_mod_39(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 53;
+ c = bits % 53;
+ n = e[i--] << (53 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 53;
+ }
+
+ y = (n >> 52) & 1;
+ n <<= 1;
+
+ sp_4096_mont_mul_39(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
+ sp_4096_mont_sqr_39(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
+ }
+
+ sp_4096_mont_reduce_39(t[0], m, mp);
+ n = sp_4096_cmp_39(t[0], m);
+ sp_4096_cond_sub_39(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(t[0]));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][78];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit rt[78];
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 78, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 78;
+#endif
+ norm = t[0];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_39(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_4096_mod_39(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_4096_mul_39(t[1], t[1], norm);
+ err = sp_4096_mod_39(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_4096_mul_39(t[1], a, norm);
+ err = sp_4096_mod_39(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_mont_sqr_39(t[ 2], t[ 1], m, mp);
+ sp_4096_mont_mul_39(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_4096_mont_sqr_39(t[ 4], t[ 2], m, mp);
+ sp_4096_mont_mul_39(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_4096_mont_sqr_39(t[ 6], t[ 3], m, mp);
+ sp_4096_mont_mul_39(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_4096_mont_sqr_39(t[ 8], t[ 4], m, mp);
+ sp_4096_mont_mul_39(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_4096_mont_sqr_39(t[10], t[ 5], m, mp);
+ sp_4096_mont_mul_39(t[11], t[ 6], t[ 5], m, mp);
+ sp_4096_mont_sqr_39(t[12], t[ 6], m, mp);
+ sp_4096_mont_mul_39(t[13], t[ 7], t[ 6], m, mp);
+ sp_4096_mont_sqr_39(t[14], t[ 7], m, mp);
+ sp_4096_mont_mul_39(t[15], t[ 8], t[ 7], m, mp);
+ sp_4096_mont_sqr_39(t[16], t[ 8], m, mp);
+ sp_4096_mont_mul_39(t[17], t[ 9], t[ 8], m, mp);
+ sp_4096_mont_sqr_39(t[18], t[ 9], m, mp);
+ sp_4096_mont_mul_39(t[19], t[10], t[ 9], m, mp);
+ sp_4096_mont_sqr_39(t[20], t[10], m, mp);
+ sp_4096_mont_mul_39(t[21], t[11], t[10], m, mp);
+ sp_4096_mont_sqr_39(t[22], t[11], m, mp);
+ sp_4096_mont_mul_39(t[23], t[12], t[11], m, mp);
+ sp_4096_mont_sqr_39(t[24], t[12], m, mp);
+ sp_4096_mont_mul_39(t[25], t[13], t[12], m, mp);
+ sp_4096_mont_sqr_39(t[26], t[13], m, mp);
+ sp_4096_mont_mul_39(t[27], t[14], t[13], m, mp);
+ sp_4096_mont_sqr_39(t[28], t[14], m, mp);
+ sp_4096_mont_mul_39(t[29], t[15], t[14], m, mp);
+ sp_4096_mont_sqr_39(t[30], t[15], m, mp);
+ sp_4096_mont_mul_39(t[31], t[16], t[15], m, mp);
+
+ bits = ((bits + 4) / 5) * 5;
+ i = ((bits + 52) / 53) - 1;
+ c = bits % 53;
+ if (c == 0) {
+ c = 53;
+ }
+ if (i < 39) {
+ n = e[i--] << (64 - c);
+ }
+ else {
+ n = 0;
+ i--;
+ }
+ if (c < 5) {
+ n |= e[i--] << (11 - c);
+ c += 53;
+ }
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ XMEMCPY(rt, t[y], sizeof(rt));
+ for (; i>=0 || c>=5; ) {
+ if (c < 5) {
+ n |= e[i--] << (11 - c);
+ c += 53;
+ }
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+
+ sp_4096_mont_sqr_39(rt, rt, m, mp);
+ sp_4096_mont_sqr_39(rt, rt, m, mp);
+ sp_4096_mont_sqr_39(rt, rt, m, mp);
+ sp_4096_mont_sqr_39(rt, rt, m, mp);
+ sp_4096_mont_sqr_39(rt, rt, m, mp);
+
+ sp_4096_mont_mul_39(rt, rt, t[y], m, mp);
+ }
+
+ sp_4096_mont_reduce_39(rt, m, mp);
+ n = sp_4096_cmp_39(rt, m);
+ sp_4096_cond_sub_39(rt, rt, m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, rt, sizeof(rt));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#endif
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA && !SP_RSA_PRIVATE_EXP_D */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 4096 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_4096_mont_norm_78(sp_digit* r, const sp_digit* m)
+{
+ /* Set r = 2^n - 1. */
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<77; i++) {
+ r[i] = 0x1fffffffffffffL;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 72; i += 8) {
+ r[i + 0] = 0x1fffffffffffffL;
+ r[i + 1] = 0x1fffffffffffffL;
+ r[i + 2] = 0x1fffffffffffffL;
+ r[i + 3] = 0x1fffffffffffffL;
+ r[i + 4] = 0x1fffffffffffffL;
+ r[i + 5] = 0x1fffffffffffffL;
+ r[i + 6] = 0x1fffffffffffffL;
+ r[i + 7] = 0x1fffffffffffffL;
+ }
+ r[72] = 0x1fffffffffffffL;
+ r[73] = 0x1fffffffffffffL;
+ r[74] = 0x1fffffffffffffL;
+ r[75] = 0x1fffffffffffffL;
+ r[76] = 0x1fffffffffffffL;
+#endif
+ r[77] = 0x7fffL;
+
+ /* r = (2^n - 1) mod n */
+ (void)sp_4096_sub_78(r, r, m);
+
+ /* Add one so r = 2^n mod m */
+ r[0] += 1;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_4096_cmp_78(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=77; i>=0; i--) {
+ r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#else
+ int i;
+
+ r |= (a[77] - b[77]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[76] - b[76]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[75] - b[75]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[74] - b[74]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[73] - b[73]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[72] - b[72]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ for (i = 64; i >= 0; i -= 8) {
+ r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#endif /* WOLFSSL_SP_SMALL */
+
+ return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static void sp_4096_cond_sub_78(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 78; i++) {
+ r[i] = a[i] - (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 72; i += 8) {
+ r[i + 0] = a[i + 0] - (b[i + 0] & m);
+ r[i + 1] = a[i + 1] - (b[i + 1] & m);
+ r[i + 2] = a[i + 2] - (b[i + 2] & m);
+ r[i + 3] = a[i + 3] - (b[i + 3] & m);
+ r[i + 4] = a[i + 4] - (b[i + 4] & m);
+ r[i + 5] = a[i + 5] - (b[i + 5] & m);
+ r[i + 6] = a[i + 6] - (b[i + 6] & m);
+ r[i + 7] = a[i + 7] - (b[i + 7] & m);
+ }
+ r[72] = a[72] - (b[72] & m);
+ r[73] = a[73] - (b[73] & m);
+ r[74] = a[74] - (b[74] & m);
+ r[75] = a[75] - (b[75] & m);
+ r[76] = a[76] - (b[76] & m);
+ r[77] = a[77] - (b[77] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_4096_mul_add_78(sp_digit* r, const sp_digit* a,
+ const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int128_t tb = b;
+ int128_t t = 0;
+ int i;
+
+ for (i = 0; i < 78; i++) {
+ t += (tb * a[i]) + r[i];
+ r[i] = t & 0x1fffffffffffffL;
+ t >>= 53;
+ }
+ r[78] += t;
+#else
+ int128_t tb = b;
+ int128_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffffffffffffL);
+ for (i = 0; i < 72; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL));
+ t[2] = tb * a[i+2];
+ r[i+2] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL));
+ t[3] = tb * a[i+3];
+ r[i+3] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL));
+ t[4] = tb * a[i+4];
+ r[i+4] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL));
+ t[5] = tb * a[i+5];
+ r[i+5] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL));
+ t[6] = tb * a[i+6];
+ r[i+6] += (sp_digit)((t[5] >> 53) + (t[6] & 0x1fffffffffffffL));
+ t[7] = tb * a[i+7];
+ r[i+7] += (sp_digit)((t[6] >> 53) + (t[7] & 0x1fffffffffffffL));
+ t[0] = tb * a[i+8];
+ r[i+8] += (sp_digit)((t[7] >> 53) + (t[0] & 0x1fffffffffffffL));
+ }
+ t[1] = tb * a[73]; r[73] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL));
+ t[2] = tb * a[74]; r[74] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL));
+ t[3] = tb * a[75]; r[75] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL));
+ t[4] = tb * a[76]; r[76] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL));
+ t[5] = tb * a[77]; r[77] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL));
+ r[78] += (sp_digit)(t[5] >> 53);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 53.
+ *
+ * a Array of sp_digit to normalize.
+ */
+static void sp_4096_norm_78(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ for (i = 0; i < 77; i++) {
+ a[i+1] += a[i] >> 53;
+ a[i] &= 0x1fffffffffffffL;
+ }
+#else
+ int i;
+ for (i = 0; i < 72; i += 8) {
+ a[i+1] += a[i+0] >> 53; a[i+0] &= 0x1fffffffffffffL;
+ a[i+2] += a[i+1] >> 53; a[i+1] &= 0x1fffffffffffffL;
+ a[i+3] += a[i+2] >> 53; a[i+2] &= 0x1fffffffffffffL;
+ a[i+4] += a[i+3] >> 53; a[i+3] &= 0x1fffffffffffffL;
+ a[i+5] += a[i+4] >> 53; a[i+4] &= 0x1fffffffffffffL;
+ a[i+6] += a[i+5] >> 53; a[i+5] &= 0x1fffffffffffffL;
+ a[i+7] += a[i+6] >> 53; a[i+6] &= 0x1fffffffffffffL;
+ a[i+8] += a[i+7] >> 53; a[i+7] &= 0x1fffffffffffffL;
+ a[i+9] += a[i+8] >> 53; a[i+8] &= 0x1fffffffffffffL;
+ }
+ a[72+1] += a[72] >> 53;
+ a[72] &= 0x1fffffffffffffL;
+ a[73+1] += a[73] >> 53;
+ a[73] &= 0x1fffffffffffffL;
+ a[74+1] += a[74] >> 53;
+ a[74] &= 0x1fffffffffffffL;
+ a[75+1] += a[75] >> 53;
+ a[75] &= 0x1fffffffffffffL;
+ a[76+1] += a[76] >> 53;
+ a[76] &= 0x1fffffffffffffL;
+#endif
+}
+
+/* Shift the result in the high 4096 bits down to the bottom.
+ *
+ * r A single precision number.
+ * a A single precision number.
+ */
+static void sp_4096_mont_shift_78(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ int128_t n = a[77] >> 15;
+ n += ((int128_t)a[78]) << 38;
+
+ for (i = 0; i < 77; i++) {
+ r[i] = n & 0x1fffffffffffffL;
+ n >>= 53;
+ n += ((int128_t)a[79 + i]) << 38;
+ }
+ r[77] = (sp_digit)n;
+#else
+ int i;
+ int128_t n = a[77] >> 15;
+ n += ((int128_t)a[78]) << 38;
+ for (i = 0; i < 72; i += 8) {
+ r[i + 0] = n & 0x1fffffffffffffL;
+ n >>= 53; n += ((int128_t)a[i + 79]) << 38;
+ r[i + 1] = n & 0x1fffffffffffffL;
+ n >>= 53; n += ((int128_t)a[i + 80]) << 38;
+ r[i + 2] = n & 0x1fffffffffffffL;
+ n >>= 53; n += ((int128_t)a[i + 81]) << 38;
+ r[i + 3] = n & 0x1fffffffffffffL;
+ n >>= 53; n += ((int128_t)a[i + 82]) << 38;
+ r[i + 4] = n & 0x1fffffffffffffL;
+ n >>= 53; n += ((int128_t)a[i + 83]) << 38;
+ r[i + 5] = n & 0x1fffffffffffffL;
+ n >>= 53; n += ((int128_t)a[i + 84]) << 38;
+ r[i + 6] = n & 0x1fffffffffffffL;
+ n >>= 53; n += ((int128_t)a[i + 85]) << 38;
+ r[i + 7] = n & 0x1fffffffffffffL;
+ n >>= 53; n += ((int128_t)a[i + 86]) << 38;
+ }
+ r[72] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[151]) << 38;
+ r[73] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[152]) << 38;
+ r[74] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[153]) << 38;
+ r[75] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[154]) << 38;
+ r[76] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[155]) << 38;
+ r[77] = (sp_digit)n;
+#endif /* WOLFSSL_SP_SMALL */
+ XMEMSET(&r[78], 0, sizeof(*r) * 78U);
+}
+
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_4096_mont_reduce_78(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+ int i;
+ sp_digit mu;
+
+ sp_4096_norm_78(a + 78);
+
+#ifdef WOLFSSL_SP_DH
+ if (mp != 1) {
+ for (i=0; i<77; i++) {
+ mu = (a[i] * mp) & 0x1fffffffffffffL;
+ sp_4096_mul_add_78(a+i, m, mu);
+ a[i+1] += a[i] >> 53;
+ }
+ mu = (a[i] * mp) & 0x7fffL;
+ sp_4096_mul_add_78(a+i, m, mu);
+ a[i+1] += a[i] >> 53;
+ a[i] &= 0x1fffffffffffffL;
+ }
+ else {
+ for (i=0; i<77; i++) {
+ mu = a[i] & 0x1fffffffffffffL;
+ sp_4096_mul_add_78(a+i, m, mu);
+ a[i+1] += a[i] >> 53;
+ }
+ mu = a[i] & 0x7fffL;
+ sp_4096_mul_add_78(a+i, m, mu);
+ a[i+1] += a[i] >> 53;
+ a[i] &= 0x1fffffffffffffL;
+ }
+#else
+ for (i=0; i<77; i++) {
+ mu = (a[i] * mp) & 0x1fffffffffffffL;
+ sp_4096_mul_add_78(a+i, m, mu);
+ a[i+1] += a[i] >> 53;
+ }
+ mu = (a[i] * mp) & 0x7fffL;
+ sp_4096_mul_add_78(a+i, m, mu);
+ a[i+1] += a[i] >> 53;
+ a[i] &= 0x1fffffffffffffL;
+#endif
+
+ sp_4096_mont_shift_78(a, a);
+ sp_4096_cond_sub_78(a, a, m, 0 - (((a[77] >> 15) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_4096_norm_78(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_4096_mont_mul_78(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_4096_mul_78(r, a, b);
+ sp_4096_mont_reduce_78(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_4096_mont_sqr_78(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_4096_sqr_78(r, a);
+ sp_4096_mont_reduce_78(r, m, mp);
+}
+
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_4096_mul_d_156(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int128_t tb = b;
+ int128_t t = 0;
+ int i;
+
+ for (i = 0; i < 156; i++) {
+ t += tb * a[i];
+ r[i] = t & 0x1fffffffffffffL;
+ t >>= 53;
+ }
+ r[156] = (sp_digit)t;
+#else
+ int128_t tb = b;
+ int128_t t[8];
+ int i;
+
+ t[0] = tb * a[0]; r[0] = t[0] & 0x1fffffffffffffL;
+ for (i = 0; i < 152; i += 8) {
+ t[1] = tb * a[i+1];
+ r[i+1] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL);
+ t[2] = tb * a[i+2];
+ r[i+2] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL);
+ t[3] = tb * a[i+3];
+ r[i+3] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL);
+ t[4] = tb * a[i+4];
+ r[i+4] = (sp_digit)(t[3] >> 53) + (t[4] & 0x1fffffffffffffL);
+ t[5] = tb * a[i+5];
+ r[i+5] = (sp_digit)(t[4] >> 53) + (t[5] & 0x1fffffffffffffL);
+ t[6] = tb * a[i+6];
+ r[i+6] = (sp_digit)(t[5] >> 53) + (t[6] & 0x1fffffffffffffL);
+ t[7] = tb * a[i+7];
+ r[i+7] = (sp_digit)(t[6] >> 53) + (t[7] & 0x1fffffffffffffL);
+ t[0] = tb * a[i+8];
+ r[i+8] = (sp_digit)(t[7] >> 53) + (t[0] & 0x1fffffffffffffL);
+ }
+ t[1] = tb * a[153];
+ r[153] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL);
+ t[2] = tb * a[154];
+ r[154] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL);
+ t[3] = tb * a[155];
+ r[155] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL);
+ r[156] = (sp_digit)(t[3] >> 53);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static void sp_4096_cond_add_78(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 78; i++) {
+ r[i] = a[i] + (b[i] & m);
+ }
+#else
+ int i;
+
+ for (i = 0; i < 72; i += 8) {
+ r[i + 0] = a[i + 0] + (b[i + 0] & m);
+ r[i + 1] = a[i + 1] + (b[i + 1] & m);
+ r[i + 2] = a[i + 2] + (b[i + 2] & m);
+ r[i + 3] = a[i + 3] + (b[i + 3] & m);
+ r[i + 4] = a[i + 4] + (b[i + 4] & m);
+ r[i + 5] = a[i + 5] + (b[i + 5] & m);
+ r[i + 6] = a[i + 6] + (b[i + 6] & m);
+ r[i + 7] = a[i + 7] + (b[i + 7] & m);
+ }
+ r[72] = a[72] + (b[72] & m);
+ r[73] = a[73] + (b[73] & m);
+ r[74] = a[74] + (b[74] & m);
+ r[75] = a[75] + (b[75] & m);
+ r[76] = a[76] + (b[76] & m);
+ r[77] = a[77] + (b[77] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_sub_78(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 78; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#endif
+#ifdef WOLFSSL_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_4096_add_78(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 78; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#endif
+SP_NOINLINE static void sp_4096_rshift_78(sp_digit* r, sp_digit* a, byte n)
+{
+ int i;
+
+#ifdef WOLFSSL_SP_SMALL
+ for (i=0; i<77; i++) {
+ r[i] = ((a[i] >> n) | (a[i + 1] << (53 - n))) & 0x1fffffffffffffL;
+ }
+#else
+ for (i=0; i<72; i += 8) {
+ r[i+0] = ((a[i+0] >> n) | (a[i+1] << (53 - n))) & 0x1fffffffffffffL;
+ r[i+1] = ((a[i+1] >> n) | (a[i+2] << (53 - n))) & 0x1fffffffffffffL;
+ r[i+2] = ((a[i+2] >> n) | (a[i+3] << (53 - n))) & 0x1fffffffffffffL;
+ r[i+3] = ((a[i+3] >> n) | (a[i+4] << (53 - n))) & 0x1fffffffffffffL;
+ r[i+4] = ((a[i+4] >> n) | (a[i+5] << (53 - n))) & 0x1fffffffffffffL;
+ r[i+5] = ((a[i+5] >> n) | (a[i+6] << (53 - n))) & 0x1fffffffffffffL;
+ r[i+6] = ((a[i+6] >> n) | (a[i+7] << (53 - n))) & 0x1fffffffffffffL;
+ r[i+7] = ((a[i+7] >> n) | (a[i+8] << (53 - n))) & 0x1fffffffffffffL;
+ }
+ r[72] = ((a[72] >> n) | (a[73] << (53 - n))) & 0x1fffffffffffffL;
+ r[73] = ((a[73] >> n) | (a[74] << (53 - n))) & 0x1fffffffffffffL;
+ r[74] = ((a[74] >> n) | (a[75] << (53 - n))) & 0x1fffffffffffffL;
+ r[75] = ((a[75] >> n) | (a[76] << (53 - n))) & 0x1fffffffffffffL;
+ r[76] = ((a[76] >> n) | (a[77] << (53 - n))) & 0x1fffffffffffffL;
+#endif
+ r[77] = a[77] >> n;
+}
+
+#ifdef WOLFSSL_SP_DIV_64
+static WC_INLINE sp_digit sp_4096_div_word_78(sp_digit d1, sp_digit d0,
+ sp_digit dv)
+{
+ sp_digit d, r, t;
+
+ /* All 53 bits from d1 and top 10 bits from d0. */
+ d = (d1 << 10) | (d0 >> 43);
+ r = d / dv;
+ d -= r * dv;
+ /* Up to 11 bits in r */
+ /* Next 10 bits from d0. */
+ r <<= 10;
+ d <<= 10;
+ d |= (d0 >> 33) & ((1 << 10) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 21 bits in r */
+ /* Next 10 bits from d0. */
+ r <<= 10;
+ d <<= 10;
+ d |= (d0 >> 23) & ((1 << 10) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 31 bits in r */
+ /* Next 10 bits from d0. */
+ r <<= 10;
+ d <<= 10;
+ d |= (d0 >> 13) & ((1 << 10) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 41 bits in r */
+ /* Next 10 bits from d0. */
+ r <<= 10;
+ d <<= 10;
+ d |= (d0 >> 3) & ((1 << 10) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 51 bits in r */
+ /* Remaining 3 bits from d0. */
+ r <<= 3;
+ d <<= 3;
+ d |= d0 & ((1 << 3) - 1);
+ t = d / dv;
+ r += t;
+
+ return r;
+}
+#endif /* WOLFSSL_SP_DIV_64 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_4096_div_78(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ int i;
+#ifndef WOLFSSL_SP_DIV_64
+ int128_t d1;
+#endif
+ sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* td;
+#else
+ sp_digit t1d[156 + 1], t2d[78 + 1], sdd[78 + 1];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ sp_digit* sd;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 78 + 3), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ (void)m;
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = td;
+ t2 = td + 156 + 1;
+ sd = t2 + 78 + 1;
+#else
+ t1 = t1d;
+ t2 = t2d;
+ sd = sdd;
+#endif
+
+ sp_4096_mul_d_78(sd, d, 1L << 38);
+ sp_4096_mul_d_156(t1, a, 1L << 38);
+ dv = sd[77];
+ for (i=78; i>=0; i--) {
+ t1[78 + i] += t1[78 + i - 1] >> 53;
+ t1[78 + i - 1] &= 0x1fffffffffffffL;
+#ifndef WOLFSSL_SP_DIV_64
+ d1 = t1[78 + i];
+ d1 <<= 53;
+ d1 += t1[78 + i - 1];
+ r1 = (sp_digit)(d1 / dv);
+#else
+ r1 = sp_4096_div_word_78(t1[78 + i], t1[78 + i - 1], dv);
+#endif
+
+ sp_4096_mul_d_78(t2, sd, r1);
+ (void)sp_4096_sub_78(&t1[i], &t1[i], t2);
+ t1[78 + i] -= t2[78];
+ t1[78 + i] += t1[78 + i - 1] >> 53;
+ t1[78 + i - 1] &= 0x1fffffffffffffL;
+ r1 = (((-t1[78 + i]) << 53) - t1[78 + i - 1]) / dv;
+ r1 -= t1[78 + i];
+ sp_4096_mul_d_78(t2, sd, r1);
+ (void)sp_4096_add_78(&t1[i], &t1[i], t2);
+ t1[78 + i] += t1[78 + i - 1] >> 53;
+ t1[78 + i - 1] &= 0x1fffffffffffffL;
+ }
+ t1[78 - 1] += t1[78 - 2] >> 53;
+ t1[78 - 2] &= 0x1fffffffffffffL;
+ r1 = t1[78 - 1] / dv;
+
+ sp_4096_mul_d_78(t2, sd, r1);
+ sp_4096_sub_78(t1, t1, t2);
+ XMEMCPY(r, t1, sizeof(*r) * 2U * 78U);
+ for (i=0; i<76; i++) {
+ r[i+1] += r[i] >> 53;
+ r[i] &= 0x1fffffffffffffL;
+ }
+ sp_4096_cond_add_78(r, r, sd, 0 - ((r[77] < 0) ?
+ (sp_digit)1 : (sp_digit)0));
+
+ sp_4096_norm_78(r);
+ sp_4096_rshift_78(r, r, 38);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_4096_mod_78(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_4096_div_78(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+ defined(WOLFSSL_HAVE_SP_DH)
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits,
+ const sp_digit* m, int reduceA)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* td;
+ sp_digit* t[3];
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 78 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(td, 0, sizeof(*td) * 3U * 78U * 2U);
+
+ norm = t[0] = td;
+ t[1] = &td[78 * 2];
+ t[2] = &td[2 * 78 * 2];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_78(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_4096_mod_78(t[1], a, m);
+ }
+ else {
+ XMEMCPY(t[1], a, sizeof(sp_digit) * 78U);
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_4096_mul_78(t[1], t[1], norm);
+ err = sp_4096_mod_78(t[1], t[1], m);
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 53;
+ c = bits % 53;
+ n = e[i--] << (53 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 53;
+ }
+
+ y = (n >> 52) & 1;
+ n <<= 1;
+
+ sp_4096_mont_mul_78(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])),
+ sizeof(*t[2]) * 78 * 2);
+ sp_4096_mont_sqr_78(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2],
+ sizeof(*t[2]) * 78 * 2);
+ }
+
+ sp_4096_mont_reduce_78(t[0], m, mp);
+ n = sp_4096_cmp_78(t[0], m);
+ sp_4096_cond_sub_78(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(*r) * 78 * 2);
+
+ }
+
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+
+ return err;
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[3][156];
+#else
+ sp_digit* td;
+ sp_digit* t[3];
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 78 * 2, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ t[0] = td;
+ t[1] = &td[78 * 2];
+ t[2] = &td[2 * 78 * 2];
+#endif
+ norm = t[0];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_78(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_4096_mod_78(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_4096_mul_78(t[1], t[1], norm);
+ err = sp_4096_mod_78(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_4096_mul_78(t[1], a, norm);
+ err = sp_4096_mod_78(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ i = bits / 53;
+ c = bits % 53;
+ n = e[i--] << (53 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1) {
+ break;
+ }
+
+ n = e[i--];
+ c = 53;
+ }
+
+ y = (n >> 52) & 1;
+ n <<= 1;
+
+ sp_4096_mont_mul_78(t[y^1], t[0], t[1], m, mp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
+ sp_4096_mont_sqr_78(t[2], t[2], m, mp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
+ }
+
+ sp_4096_mont_reduce_78(t[0], m, mp);
+ n = sp_4096_cmp_78(t[0], m);
+ sp_4096_cond_sub_78(t[0], t[0], m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, t[0], sizeof(t[0]));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][156];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit rt[156];
+ sp_digit mp = 1;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 156, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 156;
+#endif
+ norm = t[0];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_78(norm, m);
+
+ if (reduceA != 0) {
+ err = sp_4096_mod_78(t[1], a, m);
+ if (err == MP_OKAY) {
+ sp_4096_mul_78(t[1], t[1], norm);
+ err = sp_4096_mod_78(t[1], t[1], m);
+ }
+ }
+ else {
+ sp_4096_mul_78(t[1], a, norm);
+ err = sp_4096_mod_78(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_mont_sqr_78(t[ 2], t[ 1], m, mp);
+ sp_4096_mont_mul_78(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_4096_mont_sqr_78(t[ 4], t[ 2], m, mp);
+ sp_4096_mont_mul_78(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_4096_mont_sqr_78(t[ 6], t[ 3], m, mp);
+ sp_4096_mont_mul_78(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_4096_mont_sqr_78(t[ 8], t[ 4], m, mp);
+ sp_4096_mont_mul_78(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_4096_mont_sqr_78(t[10], t[ 5], m, mp);
+ sp_4096_mont_mul_78(t[11], t[ 6], t[ 5], m, mp);
+ sp_4096_mont_sqr_78(t[12], t[ 6], m, mp);
+ sp_4096_mont_mul_78(t[13], t[ 7], t[ 6], m, mp);
+ sp_4096_mont_sqr_78(t[14], t[ 7], m, mp);
+ sp_4096_mont_mul_78(t[15], t[ 8], t[ 7], m, mp);
+ sp_4096_mont_sqr_78(t[16], t[ 8], m, mp);
+ sp_4096_mont_mul_78(t[17], t[ 9], t[ 8], m, mp);
+ sp_4096_mont_sqr_78(t[18], t[ 9], m, mp);
+ sp_4096_mont_mul_78(t[19], t[10], t[ 9], m, mp);
+ sp_4096_mont_sqr_78(t[20], t[10], m, mp);
+ sp_4096_mont_mul_78(t[21], t[11], t[10], m, mp);
+ sp_4096_mont_sqr_78(t[22], t[11], m, mp);
+ sp_4096_mont_mul_78(t[23], t[12], t[11], m, mp);
+ sp_4096_mont_sqr_78(t[24], t[12], m, mp);
+ sp_4096_mont_mul_78(t[25], t[13], t[12], m, mp);
+ sp_4096_mont_sqr_78(t[26], t[13], m, mp);
+ sp_4096_mont_mul_78(t[27], t[14], t[13], m, mp);
+ sp_4096_mont_sqr_78(t[28], t[14], m, mp);
+ sp_4096_mont_mul_78(t[29], t[15], t[14], m, mp);
+ sp_4096_mont_sqr_78(t[30], t[15], m, mp);
+ sp_4096_mont_mul_78(t[31], t[16], t[15], m, mp);
+
+ bits = ((bits + 4) / 5) * 5;
+ i = ((bits + 52) / 53) - 1;
+ c = bits % 53;
+ if (c == 0) {
+ c = 53;
+ }
+ if (i < 78) {
+ n = e[i--] << (64 - c);
+ }
+ else {
+ n = 0;
+ i--;
+ }
+ if (c < 5) {
+ n |= e[i--] << (11 - c);
+ c += 53;
+ }
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ XMEMCPY(rt, t[y], sizeof(rt));
+ for (; i>=0 || c>=5; ) {
+ if (c < 5) {
+ n |= e[i--] << (11 - c);
+ c += 53;
+ }
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+
+ sp_4096_mont_sqr_78(rt, rt, m, mp);
+ sp_4096_mont_sqr_78(rt, rt, m, mp);
+ sp_4096_mont_sqr_78(rt, rt, m, mp);
+ sp_4096_mont_sqr_78(rt, rt, m, mp);
+ sp_4096_mont_sqr_78(rt, rt, m, mp);
+
+ sp_4096_mont_mul_78(rt, rt, t[y], m, mp);
+ }
+
+ sp_4096_mont_reduce_78(rt, m, mp);
+ n = sp_4096_cmp_78(rt, m);
+ sp_4096_cond_sub_78(rt, rt, m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ XMEMCPY(r, rt, sizeof(rt));
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+#endif
+}
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */
+ /* WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+ sp_digit* norm;
+ sp_digit e[1] = {0};
+ sp_digit mp;
+ int i;
+ int err = MP_OKAY;
+
+ if (*outLen < 512U) {
+ err = MP_TO_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(em) > 53) {
+ err = MP_READ_E;
+ }
+ if (inLen > 512U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 78 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 78 * 2;
+ m = r + 78 * 2;
+ norm = r;
+
+ sp_4096_from_bin(a, 78, in, inLen);
+#if DIGIT_BIT >= 53
+ e[0] = (sp_digit)em->dp[0];
+#else
+ e[0] = (sp_digit)em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(m, 78, mm);
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_78(norm, m);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_mul_78(a, a, norm);
+ err = sp_4096_mod_78(a, a, m);
+ }
+ if (err == MP_OKAY) {
+ for (i=52; i>=0; i--) {
+ if ((e[0] >> i) != 0) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 78 * 2);
+ for (i--; i>=0; i--) {
+ sp_4096_mont_sqr_78(r, r, m, mp);
+
+ if (((e[0] >> i) & 1) == 1) {
+ sp_4096_mont_mul_78(r, r, a, m, mp);
+ }
+ }
+ sp_4096_mont_reduce_78(r, m, mp);
+ mp = sp_4096_cmp_78(r, m);
+ sp_4096_cond_sub_78(r, r, m, ((mp < 0) ?
+ (sp_digit)1 : (sp_digit)0)- 1);
+
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit ad[156], md[78], rd[156];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+ sp_digit e[1] = {0};
+ int err = MP_OKAY;
+
+ if (*outLen < 512U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(em) > 53) {
+ err = MP_READ_E;
+ }
+ if (inLen > 512U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 78 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 78 * 2;
+ m = r + 78 * 2;
+ }
+#else
+ a = ad;
+ m = md;
+ r = rd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_4096_from_bin(a, 78, in, inLen);
+#if DIGIT_BIT >= 53
+ e[0] = (sp_digit)em->dp[0];
+#else
+ e[0] = (sp_digit)em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(m, 78, mm);
+
+ if (e[0] == 0x3) {
+ sp_4096_sqr_78(r, a);
+ err = sp_4096_mod_78(r, r, m);
+ if (err == MP_OKAY) {
+ sp_4096_mul_78(r, a, r);
+ err = sp_4096_mod_78(r, r, m);
+ }
+ }
+ else {
+ sp_digit* norm = r;
+ int i;
+ sp_digit mp;
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_78(norm, m);
+
+ sp_4096_mul_78(a, a, norm);
+ err = sp_4096_mod_78(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i=52; i>=0; i--) {
+ if ((e[0] >> i) != 0) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 156U);
+ for (i--; i>=0; i--) {
+ sp_4096_mont_sqr_78(r, r, m, mp);
+
+ if (((e[0] >> i) & 1) == 1) {
+ sp_4096_mont_mul_78(r, r, a, m, mp);
+ }
+ }
+ sp_4096_mont_reduce_78(r, m, mp);
+ mp = sp_4096_cmp_78(r, m);
+ sp_4096_cond_sub_78(r, r, m, ((mp < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#endif
+
+ return err;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM)
+#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* a;
+ sp_digit* d = NULL;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 512U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 4096) {
+ err = MP_READ_E;
+ }
+ if (inLen > 512) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 78 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = d + 78;
+ m = a + 156;
+ r = a;
+
+ sp_4096_from_bin(a, 78, in, inLen);
+ sp_4096_from_mp(d, 78, dm);
+ sp_4096_from_mp(m, 78, mm);
+ err = sp_4096_mod_exp_78(r, a, d, 4096, m, 0);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 78);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+ sp_digit a[156], d[78], m[78];
+ sp_digit* r = a;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 512U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 4096) {
+ err = MP_READ_E;
+ }
+ if (inLen > 512U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_from_bin(a, 78, in, inLen);
+ sp_4096_from_mp(d, 78, dm);
+ sp_4096_from_mp(m, 78, mm);
+ err = sp_4096_mod_exp_78(r, a, d, 4096, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+ XMEMSET(d, 0, sizeof(sp_digit) * 78);
+
+ return err;
+#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
+#else
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* t = NULL;
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* dq;
+ sp_digit* qi;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 512U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (inLen > 512) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 39 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 78 * 2;
+ q = p + 39;
+ qi = dq = dp = q + 39;
+ tmpa = qi + 39;
+ tmpb = tmpa + 78;
+
+ r = t + 78;
+
+ sp_4096_from_bin(a, 78, in, inLen);
+ sp_4096_from_mp(p, 39, pm);
+ sp_4096_from_mp(q, 39, qm);
+ sp_4096_from_mp(dp, 39, dpm);
+ err = sp_4096_mod_exp_39(tmpa, a, dp, 2048, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(dq, 39, dqm);
+ err = sp_4096_mod_exp_39(tmpb, a, dq, 2048, q, 1);
+ }
+ if (err == MP_OKAY) {
+ (void)sp_4096_sub_39(tmpa, tmpa, tmpb);
+ sp_4096_cond_add_39(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[38] >> 63));
+ sp_4096_cond_add_39(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[38] >> 63));
+
+ sp_4096_from_mp(qi, 39, qim);
+ sp_4096_mul_39(tmpa, tmpa, qi);
+ err = sp_4096_mod_39(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_mul_39(tmpa, q, tmpa);
+ (void)sp_4096_add_78(r, tmpb, tmpa);
+ sp_4096_norm_78(r);
+
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 39 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+ sp_digit a[78 * 2];
+ sp_digit p[39], q[39], dp[39], dq[39], qi[39];
+ sp_digit tmpa[78], tmpb[78];
+ sp_digit* r = a;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 512U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (inLen > 512U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_from_bin(a, 78, in, inLen);
+ sp_4096_from_mp(p, 39, pm);
+ sp_4096_from_mp(q, 39, qm);
+ sp_4096_from_mp(dp, 39, dpm);
+ sp_4096_from_mp(dq, 39, dqm);
+ sp_4096_from_mp(qi, 39, qim);
+
+ err = sp_4096_mod_exp_39(tmpa, a, dp, 2048, p, 1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_4096_mod_exp_39(tmpb, a, dq, 2048, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ (void)sp_4096_sub_39(tmpa, tmpa, tmpb);
+ sp_4096_cond_add_39(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[38] >> 63));
+ sp_4096_cond_add_39(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[38] >> 63));
+ sp_4096_mul_39(tmpa, tmpa, qi);
+ err = sp_4096_mod_39(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_mul_39(tmpa, tmpa, q);
+ (void)sp_4096_add_78(r, tmpb, tmpa);
+ sp_4096_norm_78(r);
+
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+ XMEMSET(tmpa, 0, sizeof(tmpa));
+ XMEMSET(tmpb, 0, sizeof(tmpb));
+ XMEMSET(p, 0, sizeof(p));
+ XMEMSET(q, 0, sizeof(q));
+ XMEMSET(dp, 0, sizeof(dp));
+ XMEMSET(dq, 0, sizeof(dq));
+ XMEMSET(qi, 0, sizeof(qi));
+
+ return err;
+#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+}
+
+#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_4096_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 53
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 78);
+ r->used = 78;
+ mp_clamp(r);
+#elif DIGIT_BIT < 53
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 78; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 53) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 53 - s;
+ }
+ r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 78; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 53 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 53 - s;
+ }
+ else {
+ s += 53;
+ }
+ }
+ r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int err = MP_OKAY;
+ sp_digit* d = NULL;
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 4096) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 78 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 78 * 2;
+ m = e + 78;
+ r = b;
+
+ sp_4096_from_mp(b, 78, base);
+ sp_4096_from_mp(e, 78, exp);
+ sp_4096_from_mp(m, 78, mod);
+
+ err = sp_4096_mod_exp_78(r, b, e, mp_count_bits(exp), m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_4096_to_mp(r, res);
+ }
+
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 78U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit bd[156], ed[78], md[78];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 4096) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 78 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 78 * 2;
+ m = e + 78;
+ r = b;
+ }
+#else
+ r = b = bd;
+ e = ed;
+ m = md;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(b, 78, base);
+ sp_4096_from_mp(e, 78, exp);
+ sp_4096_from_mp(m, 78, mod);
+
+ err = sp_4096_mod_exp_78(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_4096_to_mp(r, res);
+ }
+
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 78U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 78U);
+#endif
+
+ return err;
+#endif
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_4096
+SP_NOINLINE static void sp_4096_lshift_78(sp_digit* r, sp_digit* a, byte n)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ r[78] = a[77] >> (53 - n);
+ for (i=77; i>0; i--) {
+ r[i] = ((a[i] << n) | (a[i-1] >> (53 - n))) & 0x1fffffffffffffL;
+ }
+#else
+ sp_int_digit s, t;
+
+ s = (sp_int_digit)a[77];
+ r[78] = s >> (53U - n);
+ s = (sp_int_digit)(a[77]); t = (sp_int_digit)(a[76]);
+ r[77] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[76]); t = (sp_int_digit)(a[75]);
+ r[76] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[75]); t = (sp_int_digit)(a[74]);
+ r[75] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[74]); t = (sp_int_digit)(a[73]);
+ r[74] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[73]); t = (sp_int_digit)(a[72]);
+ r[73] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[72]); t = (sp_int_digit)(a[71]);
+ r[72] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[71]); t = (sp_int_digit)(a[70]);
+ r[71] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[70]); t = (sp_int_digit)(a[69]);
+ r[70] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[69]); t = (sp_int_digit)(a[68]);
+ r[69] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[68]); t = (sp_int_digit)(a[67]);
+ r[68] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[67]); t = (sp_int_digit)(a[66]);
+ r[67] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[66]); t = (sp_int_digit)(a[65]);
+ r[66] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[65]); t = (sp_int_digit)(a[64]);
+ r[65] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[64]); t = (sp_int_digit)(a[63]);
+ r[64] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[63]); t = (sp_int_digit)(a[62]);
+ r[63] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[62]); t = (sp_int_digit)(a[61]);
+ r[62] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[61]); t = (sp_int_digit)(a[60]);
+ r[61] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[60]); t = (sp_int_digit)(a[59]);
+ r[60] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[59]); t = (sp_int_digit)(a[58]);
+ r[59] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[58]); t = (sp_int_digit)(a[57]);
+ r[58] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[57]); t = (sp_int_digit)(a[56]);
+ r[57] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[56]); t = (sp_int_digit)(a[55]);
+ r[56] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[55]); t = (sp_int_digit)(a[54]);
+ r[55] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[54]); t = (sp_int_digit)(a[53]);
+ r[54] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[53]); t = (sp_int_digit)(a[52]);
+ r[53] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[52]); t = (sp_int_digit)(a[51]);
+ r[52] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[51]); t = (sp_int_digit)(a[50]);
+ r[51] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[50]); t = (sp_int_digit)(a[49]);
+ r[50] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[49]); t = (sp_int_digit)(a[48]);
+ r[49] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[48]); t = (sp_int_digit)(a[47]);
+ r[48] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[47]); t = (sp_int_digit)(a[46]);
+ r[47] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[46]); t = (sp_int_digit)(a[45]);
+ r[46] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[45]); t = (sp_int_digit)(a[44]);
+ r[45] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[44]); t = (sp_int_digit)(a[43]);
+ r[44] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[43]); t = (sp_int_digit)(a[42]);
+ r[43] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[42]); t = (sp_int_digit)(a[41]);
+ r[42] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[41]); t = (sp_int_digit)(a[40]);
+ r[41] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[40]); t = (sp_int_digit)(a[39]);
+ r[40] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[39]); t = (sp_int_digit)(a[38]);
+ r[39] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[38]); t = (sp_int_digit)(a[37]);
+ r[38] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[37]); t = (sp_int_digit)(a[36]);
+ r[37] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[36]); t = (sp_int_digit)(a[35]);
+ r[36] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]);
+ r[35] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]);
+ r[34] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]);
+ r[33] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]);
+ r[32] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]);
+ r[31] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]);
+ r[30] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]);
+ r[29] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]);
+ r[28] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]);
+ r[27] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]);
+ r[26] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]);
+ r[25] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]);
+ r[24] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]);
+ r[23] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]);
+ r[22] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]);
+ r[21] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]);
+ r[20] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]);
+ r[19] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]);
+ r[18] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]);
+ r[17] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]);
+ r[16] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]);
+ r[15] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]);
+ r[14] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]);
+ r[13] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]);
+ r[12] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]);
+ r[11] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]);
+ r[10] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]);
+ r[9] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]);
+ r[8] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]);
+ r[7] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]);
+ r[6] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]);
+ r[5] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]);
+ r[4] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]);
+ r[3] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]);
+ r[2] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+ s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]);
+ r[1] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL;
+#endif
+ r[0] = (a[0] << n) & 0x1fffffffffffffL;
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_2_78(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[156];
+ sp_digit td[79];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 235, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 156;
+ XMEMSET(td, 0, sizeof(sp_digit) * 235);
+#else
+ norm = nd;
+ tmp = td;
+ XMEMSET(td, 0, sizeof(td));
+#endif
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_78(norm, m);
+
+ bits = ((bits + 4) / 5) * 5;
+ i = ((bits + 52) / 53) - 1;
+ c = bits % 53;
+ if (c == 0) {
+ c = 53;
+ }
+ if (i < 78) {
+ n = e[i--] << (64 - c);
+ }
+ else {
+ n = 0;
+ i--;
+ }
+ if (c < 5) {
+ n |= e[i--] << (11 - c);
+ c += 53;
+ }
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ sp_4096_lshift_78(r, norm, y);
+ for (; i>=0 || c>=5; ) {
+ if (c < 5) {
+ n |= e[i--] << (11 - c);
+ c += 53;
+ }
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+
+ sp_4096_mont_sqr_78(r, r, m, mp);
+ sp_4096_mont_sqr_78(r, r, m, mp);
+ sp_4096_mont_sqr_78(r, r, m, mp);
+ sp_4096_mont_sqr_78(r, r, m, mp);
+ sp_4096_mont_sqr_78(r, r, m, mp);
+
+ sp_4096_lshift_78(r, r, y);
+ sp_4096_mul_d_78(tmp, norm, (r[78] << 38) + (r[77] >> 15));
+ r[78] = 0;
+ r[77] &= 0x7fffL;
+ (void)sp_4096_add_78(r, r, tmp);
+ sp_4096_norm_78(r);
+ o = sp_4096_cmp_78(r, m);
+ sp_4096_cond_sub_78(r, r, m, ((o < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ }
+
+ sp_4096_mont_reduce_78(r, m, mp);
+ n = sp_4096_cmp_78(r, m);
+ sp_4096_cond_sub_78(r, r, m, ((n < 0) ?
+ (sp_digit)1 : (sp_digit)0) - 1);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+#endif /* HAVE_FFDHE_4096 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int err = MP_OKAY;
+ sp_digit* d = NULL;
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ word32 i;
+
+ if (mp_count_bits(base) > 4096) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 512) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 78 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 78 * 2;
+ m = e + 78;
+ r = b;
+
+ sp_4096_from_mp(b, 78, base);
+ sp_4096_from_bin(e, 78, exp, expLen);
+ sp_4096_from_mp(m, 78, mod);
+
+ #ifdef HAVE_FFDHE_4096
+ if (base->used == 1 && base->dp[0] == 2 &&
+ ((m[77] << 17) | (m[76] >> 36)) == 0xffffffffL) {
+ err = sp_4096_mod_exp_2_78(r, e, expLen * 8, m);
+ }
+ else
+ #endif
+ err = sp_4096_mod_exp_78(r, b, e, expLen * 8, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ for (i=0; i<512 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+ }
+
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 78U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+ return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit bd[156], ed[78], md[78];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* b;
+ sp_digit* e;
+ sp_digit* m;
+ sp_digit* r;
+ word32 i;
+ int err = MP_OKAY;
+
+ if (mp_count_bits(base) > 4096) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 512U) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+#ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(*d) * 78 * 4, NULL, DYNAMIC_TYPE_DH);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ b = d;
+ e = b + 78 * 2;
+ m = e + 78;
+ r = b;
+ }
+#else
+ r = b = bd;
+ e = ed;
+ m = md;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(b, 78, base);
+ sp_4096_from_bin(e, 78, exp, expLen);
+ sp_4096_from_mp(m, 78, mod);
+
+ #ifdef HAVE_FFDHE_4096
+ if (base->used == 1 && base->dp[0] == 2U &&
+ ((m[77] << 17) | (m[76] >> 36)) == 0xffffffffL) {
+ err = sp_4096_mod_exp_2_78(r, e, expLen * 8U, m);
+ }
+ else {
+ #endif
+ err = sp_4096_mod_exp_78(r, b, e, expLen * 8U, m, 0);
+ #ifdef HAVE_FFDHE_4096
+ }
+ #endif
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ for (i=0; i<512U && out[i] == 0U; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (d != NULL) {
+ XMEMSET(e, 0, sizeof(sp_digit) * 78U);
+ XFREE(d, NULL, DYNAMIC_TYPE_DH);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 78U);
+#endif
+
+ return err;
+#endif
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* WOLFSSL_SP_4096 */
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+#ifdef WOLFSSL_HAVE_SP_ECC
+#ifndef WOLFSSL_SP_NO_256
+
+/* Point structure to use. */
+typedef struct sp_point_256 {
+ sp_digit x[2 * 5];
+ sp_digit y[2 * 5];
+ sp_digit z[2 * 5];
+ int infinity;
+} sp_point_256;
+
+/* The modulus (prime) of the curve P256. */
+static const sp_digit p256_mod[5] = {
+ 0xfffffffffffffL,0x00fffffffffffL,0x0000000000000L,0x0001000000000L,
+ 0x0ffffffff0000L
+};
+/* The Montogmery normalizer for modulus of the curve P256. */
+static const sp_digit p256_norm_mod[5] = {
+ 0x0000000000001L,0xff00000000000L,0xfffffffffffffL,0xfffefffffffffL,
+ 0x000000000ffffL
+};
+/* The Montogmery multiplier for modulus of the curve P256. */
+static const sp_digit p256_mp_mod = 0x0000000000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* The order of the curve P256. */
+static const sp_digit p256_order[5] = {
+ 0x9cac2fc632551L,0xada7179e84f3bL,0xfffffffbce6faL,0x0000fffffffffL,
+ 0x0ffffffff0000L
+};
+#endif
+/* The order of the curve P256 minus 2. */
+static const sp_digit p256_order2[5] = {
+ 0x9cac2fc63254fL,0xada7179e84f3bL,0xfffffffbce6faL,0x0000fffffffffL,
+ 0x0ffffffff0000L
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P256. */
+static const sp_digit p256_norm_order[5] = {
+ 0x6353d039cdaafL,0x5258e8617b0c4L,0x0000000431905L,0xffff000000000L,
+ 0x000000000ffffL
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P256. */
+static const sp_digit p256_mp_order = 0x1c8aaee00bc4fL;
+#endif
+/* The base point of curve P256. */
+static const sp_point_256 p256_base = {
+ /* X ordinate */
+ {
+ 0x13945d898c296L,0x812deb33a0f4aL,0x3a440f277037dL,0x4247f8bce6e56L,
+ 0x06b17d1f2e12cL,
+ 0L, 0L, 0L, 0L, 0L
+ },
+ /* Y ordinate */
+ {
+ 0x6406837bf51f5L,0x576b315ececbbL,0xc0f9e162bce33L,0x7f9b8ee7eb4a7L,
+ 0x04fe342e2fe1aL,
+ 0L, 0L, 0L, 0L, 0L
+ },
+ /* Z ordinate */
+ {
+ 0x0000000000001L,0x0000000000000L,0x0000000000000L,0x0000000000000L,
+ 0x0000000000000L,
+ 0L, 0L, 0L, 0L, 0L
+ },
+ /* infinity */
+ 0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p256_b[5] = {
+ 0xe3c3e27d2604bL,0xb0cc53b0f63bcL,0x69886bc651d06L,0x93e7b3ebbd557L,
+ 0x05ac635d8aa3aL
+};
+#endif
+
+static int sp_256_point_new_ex_5(void* heap, sp_point_256* sp, sp_point_256** p)
+{
+ int ret = MP_OKAY;
+ (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ (void)sp;
+ *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC);
+#else
+ *p = sp;
+#endif
+ if (*p == NULL) {
+ ret = MEMORY_E;
+ }
+ return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_256_point_new_5(heap, sp, p) sp_256_point_new_ex_5((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_256_point_new_5(heap, sp, p) sp_256_point_new_ex_5((heap), &(sp), &(p))
+#endif
+
+
+static void sp_256_point_free_5(sp_point_256* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+ if (p != NULL) {
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+ XFREE(p, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+/* Clear point data if requested. */
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+#endif
+ (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r The resulting Montgomery form number.
+ * a The number to convert.
+ * m The modulus (prime).
+ * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_256_mod_mul_norm_5(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ int64_t* td;
+#else
+ int64_t td[8];
+ int64_t a32d[8];
+#endif
+ int64_t* t;
+ int64_t* a32;
+ int64_t o;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 8, NULL, DYNAMIC_TYPE_ECC);
+ if (td == NULL) {
+ return MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = td;
+ a32 = td + 8;
+#else
+ t = td;
+ a32 = a32d;
+#endif
+
+ a32[0] = (sp_digit)(a[0]) & 0xffffffffL;
+ a32[1] = (sp_digit)(a[0] >> 32U);
+ a32[1] |= a[1] << 20U;
+ a32[1] &= 0xffffffffL;
+ a32[2] = (sp_digit)(a[1] >> 12U) & 0xffffffffL;
+ a32[3] = (sp_digit)(a[1] >> 44U);
+ a32[3] |= a[2] << 8U;
+ a32[3] &= 0xffffffffL;
+ a32[4] = (sp_digit)(a[2] >> 24U);
+ a32[4] |= a[3] << 28U;
+ a32[4] &= 0xffffffffL;
+ a32[5] = (sp_digit)(a[3] >> 4U) & 0xffffffffL;
+ a32[6] = (sp_digit)(a[3] >> 36U);
+ a32[6] |= a[4] << 16U;
+ a32[6] &= 0xffffffffL;
+ a32[7] = (sp_digit)(a[4] >> 16U) & 0xffffffffL;
+
+ /* 1 1 0 -1 -1 -1 -1 0 */
+ t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6];
+ /* 0 1 1 0 -1 -1 -1 -1 */
+ t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7];
+ /* 0 0 1 1 0 -1 -1 -1 */
+ t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7];
+ /* -1 -1 0 2 2 1 0 -1 */
+ t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7];
+ /* 0 -1 -1 0 2 2 1 0 */
+ t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6];
+ /* 0 0 -1 -1 0 2 2 1 */
+ t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7];
+ /* -1 -1 0 0 0 1 3 2 */
+ t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7];
+ /* 1 0 -1 -1 -1 -1 0 3 */
+ t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7];
+
+ t[1] += t[0] >> 32U; t[0] &= 0xffffffffL;
+ t[2] += t[1] >> 32U; t[1] &= 0xffffffffL;
+ t[3] += t[2] >> 32U; t[2] &= 0xffffffffL;
+ t[4] += t[3] >> 32U; t[3] &= 0xffffffffL;
+ t[5] += t[4] >> 32U; t[4] &= 0xffffffffL;
+ t[6] += t[5] >> 32U; t[5] &= 0xffffffffL;
+ t[7] += t[6] >> 32U; t[6] &= 0xffffffffL;
+ o = t[7] >> 32U; t[7] &= 0xffffffffL;
+ t[0] += o;
+ t[3] -= o;
+ t[6] -= o;
+ t[7] += o;
+ t[1] += t[0] >> 32U; t[0] &= 0xffffffffL;
+ t[2] += t[1] >> 32U; t[1] &= 0xffffffffL;
+ t[3] += t[2] >> 32U; t[2] &= 0xffffffffL;
+ t[4] += t[3] >> 32U; t[3] &= 0xffffffffL;
+ t[5] += t[4] >> 32U; t[4] &= 0xffffffffL;
+ t[6] += t[5] >> 32U; t[5] &= 0xffffffffL;
+ t[7] += t[6] >> 32U; t[6] &= 0xffffffffL;
+
+ r[0] = t[0];
+ r[0] |= t[1] << 32U;
+ r[0] &= 0xfffffffffffffLL;
+ r[1] = (sp_digit)(t[1] >> 20);
+ r[1] |= t[2] << 12U;
+ r[1] |= t[3] << 44U;
+ r[1] &= 0xfffffffffffffLL;
+ r[2] = (sp_digit)(t[3] >> 8);
+ r[2] |= t[4] << 24U;
+ r[2] &= 0xfffffffffffffLL;
+ r[3] = (sp_digit)(t[4] >> 28);
+ r[3] |= t[5] << 4U;
+ r[3] |= t[6] << 36U;
+ r[3] &= 0xfffffffffffffLL;
+ r[4] = (sp_digit)(t[6] >> 16);
+ r[4] |= t[7] << 16U;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 52
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 52
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xfffffffffffffL;
+ s = 52U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 52U) <= (word32)DIGIT_BIT) {
+ s += 52U;
+ r[j] &= 0xfffffffffffffL;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 52) {
+ r[j] &= 0xfffffffffffffL;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 52 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_256.
+ *
+ * p Point of type sp_point_256 (result).
+ * pm Point of type ecc_point.
+ */
+static void sp_256_point_from_ecc_point_5(sp_point_256* p, const ecc_point* pm)
+{
+ XMEMSET(p->x, 0, sizeof(p->x));
+ XMEMSET(p->y, 0, sizeof(p->y));
+ XMEMSET(p->z, 0, sizeof(p->z));
+ sp_256_from_mp(p->x, 5, pm->x);
+ sp_256_from_mp(p->y, 5, pm->y);
+ sp_256_from_mp(p->z, 5, pm->z);
+ p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_256_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 52
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 5);
+ r->used = 5;
+ mp_clamp(r);
+#elif DIGIT_BIT < 52
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 5; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 52) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 52 - s;
+ }
+ r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 5; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 52 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 52 - s;
+ }
+ else {
+ s += 52;
+ }
+ }
+ r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Convert a point of type sp_point_256 to type ecc_point.
+ *
+ * p Point of type sp_point_256.
+ * pm Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_256_point_to_ecc_point_5(const sp_point_256* p, ecc_point* pm)
+{
+ int err;
+
+ err = sp_256_to_mp(p->x, pm->x);
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, pm->y);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, pm->z);
+ }
+
+ return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_256_mul_5(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i, j, k;
+ int128_t c;
+
+ c = ((int128_t)a[4]) * b[4];
+ r[9] = (sp_digit)(c >> 52);
+ c = (c & 0xfffffffffffffL) << 52;
+ for (k = 7; k >= 0; k--) {
+ for (i = 4; i >= 0; i--) {
+ j = k - i;
+ if (j >= 5) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int128_t)a[i]) * b[j];
+ }
+ r[k + 2] += c >> 104;
+ r[k + 1] = (c >> 52) & 0xfffffffffffffL;
+ c = (c & 0xfffffffffffffL) << 52;
+ }
+ r[0] = (sp_digit)(c >> 52);
+}
+
+#else
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_256_mul_5(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int128_t t0 = ((int128_t)a[ 0]) * b[ 0];
+ int128_t t1 = ((int128_t)a[ 0]) * b[ 1]
+ + ((int128_t)a[ 1]) * b[ 0];
+ int128_t t2 = ((int128_t)a[ 0]) * b[ 2]
+ + ((int128_t)a[ 1]) * b[ 1]
+ + ((int128_t)a[ 2]) * b[ 0];
+ int128_t t3 = ((int128_t)a[ 0]) * b[ 3]
+ + ((int128_t)a[ 1]) * b[ 2]
+ + ((int128_t)a[ 2]) * b[ 1]
+ + ((int128_t)a[ 3]) * b[ 0];
+ int128_t t4 = ((int128_t)a[ 0]) * b[ 4]
+ + ((int128_t)a[ 1]) * b[ 3]
+ + ((int128_t)a[ 2]) * b[ 2]
+ + ((int128_t)a[ 3]) * b[ 1]
+ + ((int128_t)a[ 4]) * b[ 0];
+ int128_t t5 = ((int128_t)a[ 1]) * b[ 4]
+ + ((int128_t)a[ 2]) * b[ 3]
+ + ((int128_t)a[ 3]) * b[ 2]
+ + ((int128_t)a[ 4]) * b[ 1];
+ int128_t t6 = ((int128_t)a[ 2]) * b[ 4]
+ + ((int128_t)a[ 3]) * b[ 3]
+ + ((int128_t)a[ 4]) * b[ 2];
+ int128_t t7 = ((int128_t)a[ 3]) * b[ 4]
+ + ((int128_t)a[ 4]) * b[ 3];
+ int128_t t8 = ((int128_t)a[ 4]) * b[ 4];
+
+ t1 += t0 >> 52; r[ 0] = t0 & 0xfffffffffffffL;
+ t2 += t1 >> 52; r[ 1] = t1 & 0xfffffffffffffL;
+ t3 += t2 >> 52; r[ 2] = t2 & 0xfffffffffffffL;
+ t4 += t3 >> 52; r[ 3] = t3 & 0xfffffffffffffL;
+ t5 += t4 >> 52; r[ 4] = t4 & 0xfffffffffffffL;
+ t6 += t5 >> 52; r[ 5] = t5 & 0xfffffffffffffL;
+ t7 += t6 >> 52; r[ 6] = t6 & 0xfffffffffffffL;
+ t8 += t7 >> 52; r[ 7] = t7 & 0xfffffffffffffL;
+ r[9] = (sp_digit)(t8 >> 52);
+ r[8] = t8 & 0xfffffffffffffL;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#define sp_256_mont_reduce_order_5 sp_256_mont_reduce_5
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_256_cmp_5(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=4; i>=0; i--) {
+ r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#else
+ r |= (a[ 4] - b[ 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 3] - b[ 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 2] - b[ 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 1] - b[ 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 0] - b[ 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+#endif /* WOLFSSL_SP_SMALL */
+
+ return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static void sp_256_cond_sub_5(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 5; i++) {
+ r[i] = a[i] - (b[i] & m);
+ }
+#else
+ r[ 0] = a[ 0] - (b[ 0] & m);
+ r[ 1] = a[ 1] - (b[ 1] & m);
+ r[ 2] = a[ 2] - (b[ 2] & m);
+ r[ 3] = a[ 3] - (b[ 3] & m);
+ r[ 4] = a[ 4] - (b[ 4] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_256_mul_add_5(sp_digit* r, const sp_digit* a,
+ const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int128_t tb = b;
+ int128_t t = 0;
+ int i;
+
+ for (i = 0; i < 5; i++) {
+ t += (tb * a[i]) + r[i];
+ r[i] = t & 0xfffffffffffffL;
+ t >>= 52;
+ }
+ r[5] += t;
+#else
+ int128_t tb = b;
+ int128_t t[5];
+
+ t[ 0] = tb * a[ 0];
+ t[ 1] = tb * a[ 1];
+ t[ 2] = tb * a[ 2];
+ t[ 3] = tb * a[ 3];
+ t[ 4] = tb * a[ 4];
+ r[ 0] += (sp_digit) (t[ 0] & 0xfffffffffffffL);
+ r[ 1] += (sp_digit)((t[ 0] >> 52) + (t[ 1] & 0xfffffffffffffL));
+ r[ 2] += (sp_digit)((t[ 1] >> 52) + (t[ 2] & 0xfffffffffffffL));
+ r[ 3] += (sp_digit)((t[ 2] >> 52) + (t[ 3] & 0xfffffffffffffL));
+ r[ 4] += (sp_digit)((t[ 3] >> 52) + (t[ 4] & 0xfffffffffffffL));
+ r[ 5] += (sp_digit) (t[ 4] >> 52);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 52.
+ *
+ * a Array of sp_digit to normalize.
+ */
+static void sp_256_norm_5(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ for (i = 0; i < 4; i++) {
+ a[i+1] += a[i] >> 52;
+ a[i] &= 0xfffffffffffffL;
+ }
+#else
+ a[1] += a[0] >> 52; a[0] &= 0xfffffffffffffL;
+ a[2] += a[1] >> 52; a[1] &= 0xfffffffffffffL;
+ a[3] += a[2] >> 52; a[2] &= 0xfffffffffffffL;
+ a[4] += a[3] >> 52; a[3] &= 0xfffffffffffffL;
+#endif
+}
+
+/* Shift the result in the high 256 bits down to the bottom.
+ *
+ * r A single precision number.
+ * a A single precision number.
+ */
+static void sp_256_mont_shift_5(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ word64 n;
+
+ n = a[4] >> 48;
+ for (i = 0; i < 4; i++) {
+ n += (word64)a[5 + i] << 4;
+ r[i] = n & 0xfffffffffffffL;
+ n >>= 52;
+ }
+ n += (word64)a[9] << 4;
+ r[4] = n;
+#else
+ word64 n;
+
+ n = a[4] >> 48;
+ n += (word64)a[ 5] << 4U; r[ 0] = n & 0xfffffffffffffUL; n >>= 52U;
+ n += (word64)a[ 6] << 4U; r[ 1] = n & 0xfffffffffffffUL; n >>= 52U;
+ n += (word64)a[ 7] << 4U; r[ 2] = n & 0xfffffffffffffUL; n >>= 52U;
+ n += (word64)a[ 8] << 4U; r[ 3] = n & 0xfffffffffffffUL; n >>= 52U;
+ n += (word64)a[ 9] << 4U; r[ 4] = n;
+#endif /* WOLFSSL_SP_SMALL */
+ XMEMSET(&r[5], 0, sizeof(*r) * 5U);
+}
+
+/* Reduce the number back to 256 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_256_mont_reduce_5(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+ int i;
+ sp_digit mu;
+
+ if (mp != 1) {
+ for (i=0; i<4; i++) {
+ mu = (a[i] * mp) & 0xfffffffffffffL;
+ sp_256_mul_add_5(a+i, m, mu);
+ a[i+1] += a[i] >> 52;
+ }
+ mu = (a[i] * mp) & 0xffffffffffffL;
+ sp_256_mul_add_5(a+i, m, mu);
+ a[i+1] += a[i] >> 52;
+ a[i] &= 0xfffffffffffffL;
+ }
+ else {
+ for (i=0; i<4; i++) {
+ mu = a[i] & 0xfffffffffffffL;
+ sp_256_mul_add_5(a+i, p256_mod, mu);
+ a[i+1] += a[i] >> 52;
+ }
+ mu = a[i] & 0xffffffffffffL;
+ sp_256_mul_add_5(a+i, p256_mod, mu);
+ a[i+1] += a[i] >> 52;
+ a[i] &= 0xfffffffffffffL;
+ }
+
+ sp_256_mont_shift_5(a, a);
+ sp_256_cond_sub_5(a, a, m, 0 - (((a[4] >> 48) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_5(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_256_mont_mul_5(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_256_mul_5(r, a, b);
+ sp_256_mont_reduce_5(r, m, mp);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_256_sqr_5(sp_digit* r, const sp_digit* a)
+{
+ int i, j, k;
+ int128_t c;
+
+ c = ((int128_t)a[4]) * a[4];
+ r[9] = (sp_digit)(c >> 52);
+ c = (c & 0xfffffffffffffL) << 52;
+ for (k = 7; k >= 0; k--) {
+ for (i = 4; i >= 0; i--) {
+ j = k - i;
+ if (j >= 5 || i <= j) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int128_t)a[i]) * a[j] * 2;
+ }
+ if (i == j) {
+ c += ((int128_t)a[i]) * a[i];
+ }
+
+ r[k + 2] += c >> 104;
+ r[k + 1] = (c >> 52) & 0xfffffffffffffL;
+ c = (c & 0xfffffffffffffL) << 52;
+ }
+ r[0] = (sp_digit)(c >> 52);
+}
+
+#else
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_256_sqr_5(sp_digit* r, const sp_digit* a)
+{
+ int128_t t0 = ((int128_t)a[ 0]) * a[ 0];
+ int128_t t1 = (((int128_t)a[ 0]) * a[ 1]) * 2;
+ int128_t t2 = (((int128_t)a[ 0]) * a[ 2]) * 2
+ + ((int128_t)a[ 1]) * a[ 1];
+ int128_t t3 = (((int128_t)a[ 0]) * a[ 3]
+ + ((int128_t)a[ 1]) * a[ 2]) * 2;
+ int128_t t4 = (((int128_t)a[ 0]) * a[ 4]
+ + ((int128_t)a[ 1]) * a[ 3]) * 2
+ + ((int128_t)a[ 2]) * a[ 2];
+ int128_t t5 = (((int128_t)a[ 1]) * a[ 4]
+ + ((int128_t)a[ 2]) * a[ 3]) * 2;
+ int128_t t6 = (((int128_t)a[ 2]) * a[ 4]) * 2
+ + ((int128_t)a[ 3]) * a[ 3];
+ int128_t t7 = (((int128_t)a[ 3]) * a[ 4]) * 2;
+ int128_t t8 = ((int128_t)a[ 4]) * a[ 4];
+
+ t1 += t0 >> 52; r[ 0] = t0 & 0xfffffffffffffL;
+ t2 += t1 >> 52; r[ 1] = t1 & 0xfffffffffffffL;
+ t3 += t2 >> 52; r[ 2] = t2 & 0xfffffffffffffL;
+ t4 += t3 >> 52; r[ 3] = t3 & 0xfffffffffffffL;
+ t5 += t4 >> 52; r[ 4] = t4 & 0xfffffffffffffL;
+ t6 += t5 >> 52; r[ 5] = t5 & 0xfffffffffffffL;
+ t7 += t6 >> 52; r[ 6] = t6 & 0xfffffffffffffL;
+ t8 += t7 >> 52; r[ 7] = t7 & 0xfffffffffffffL;
+ r[9] = (sp_digit)(t8 >> 52);
+ r[8] = t8 & 0xfffffffffffffL;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_5(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_256_sqr_5(r, a);
+ sp_256_mont_reduce_5(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * n Number of times to square.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_n_5(sp_digit* r, const sp_digit* a, int n,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_256_mont_sqr_5(r, a, m, mp);
+ for (; n > 1; n--) {
+ sp_256_mont_sqr_5(r, r, m, mp);
+ }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P256 curve. */
+static const uint64_t p256_mod_minus_2[4] = {
+ 0xfffffffffffffffdU,0x00000000ffffffffU,0x0000000000000000U,
+ 0xffffffff00000001U
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P256 curve. (r = 1 / a mod m)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_256_mont_inv_5(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 5);
+ for (i=254; i>=0; i--) {
+ sp_256_mont_sqr_5(t, t, p256_mod, p256_mp_mod);
+ if (p256_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64)))
+ sp_256_mont_mul_5(t, t, a, p256_mod, p256_mp_mod);
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 5);
+#else
+ sp_digit* t1 = td;
+ sp_digit* t2 = td + 2 * 5;
+ sp_digit* t3 = td + 4 * 5;
+ /* 0x2 */
+ sp_256_mont_sqr_5(t1, a, p256_mod, p256_mp_mod);
+ /* 0x3 */
+ sp_256_mont_mul_5(t2, t1, a, p256_mod, p256_mp_mod);
+ /* 0xc */
+ sp_256_mont_sqr_n_5(t1, t2, 2, p256_mod, p256_mp_mod);
+ /* 0xd */
+ sp_256_mont_mul_5(t3, t1, a, p256_mod, p256_mp_mod);
+ /* 0xf */
+ sp_256_mont_mul_5(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xf0 */
+ sp_256_mont_sqr_n_5(t1, t2, 4, p256_mod, p256_mp_mod);
+ /* 0xfd */
+ sp_256_mont_mul_5(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xff */
+ sp_256_mont_mul_5(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xff00 */
+ sp_256_mont_sqr_n_5(t1, t2, 8, p256_mod, p256_mp_mod);
+ /* 0xfffd */
+ sp_256_mont_mul_5(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xffff */
+ sp_256_mont_mul_5(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffff0000 */
+ sp_256_mont_sqr_n_5(t1, t2, 16, p256_mod, p256_mp_mod);
+ /* 0xfffffffd */
+ sp_256_mont_mul_5(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff */
+ sp_256_mont_mul_5(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000000 */
+ sp_256_mont_sqr_n_5(t1, t2, 32, p256_mod, p256_mp_mod);
+ /* 0xffffffffffffffff */
+ sp_256_mont_mul_5(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001 */
+ sp_256_mont_mul_5(r, t1, a, p256_mod, p256_mp_mod);
+ /* 0xffffffff000000010000000000000000000000000000000000000000 */
+ sp_256_mont_sqr_n_5(r, r, 160, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */
+ sp_256_mont_mul_5(r, r, t2, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */
+ sp_256_mont_sqr_n_5(r, r, 32, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */
+ sp_256_mont_mul_5(r, r, t3, p256_mod, p256_mp_mod);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r Resulting affine coordinate point.
+ * p Montgomery form projective coordinate point.
+ * t Temporary ordinate data.
+ */
+static void sp_256_map_5(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*5;
+ int64_t n;
+
+ sp_256_mont_inv_5(t1, p->z, t + 2*5);
+
+ sp_256_mont_sqr_5(t2, t1, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_5(t1, t2, t1, p256_mod, p256_mp_mod);
+
+ /* x /= z^2 */
+ sp_256_mont_mul_5(r->x, p->x, t2, p256_mod, p256_mp_mod);
+ XMEMSET(r->x + 5, 0, sizeof(r->x) / 2U);
+ sp_256_mont_reduce_5(r->x, p256_mod, p256_mp_mod);
+ /* Reduce x to less than modulus */
+ n = sp_256_cmp_5(r->x, p256_mod);
+ sp_256_cond_sub_5(r->x, r->x, p256_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_5(r->x);
+
+ /* y /= z^3 */
+ sp_256_mont_mul_5(r->y, p->y, t1, p256_mod, p256_mp_mod);
+ XMEMSET(r->y + 5, 0, sizeof(r->y) / 2U);
+ sp_256_mont_reduce_5(r->y, p256_mod, p256_mp_mod);
+ /* Reduce y to less than modulus */
+ n = sp_256_cmp_5(r->y, p256_mod);
+ sp_256_cond_sub_5(r->y, r->y, p256_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_5(r->y);
+
+ XMEMSET(r->z, 0, sizeof(r->z));
+ r->z[0] = 1;
+
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_256_add_5(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 5; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#else
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_256_add_5(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ r[ 0] = a[ 0] + b[ 0];
+ r[ 1] = a[ 1] + b[ 1];
+ r[ 2] = a[ 2] + b[ 2];
+ r[ 3] = a[ 3] + b[ 3];
+ r[ 4] = a[ 4] + b[ 4];
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r Result of addition.
+ * a First number to add in Montogmery form.
+ * b Second number to add in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_add_5(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ (void)sp_256_add_5(r, a, b);
+ sp_256_norm_5(r);
+ sp_256_cond_sub_5(r, r, m, 0 - (((r[4] >> 48) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_5(r);
+}
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r Result of doubling.
+ * a Number to double in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_dbl_5(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ (void)sp_256_add_5(r, a, a);
+ sp_256_norm_5(r);
+ sp_256_cond_sub_5(r, r, m, 0 - (((r[4] >> 48) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_5(r);
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r Result of Tripling.
+ * a Number to triple in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_tpl_5(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ (void)sp_256_add_5(r, a, a);
+ sp_256_norm_5(r);
+ sp_256_cond_sub_5(r, r, m, 0 - (((r[4] >> 48) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_5(r);
+ (void)sp_256_add_5(r, r, a);
+ sp_256_norm_5(r);
+ sp_256_cond_sub_5(r, r, m, 0 - (((r[4] >> 48) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_5(r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_256_sub_5(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 5; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_256_sub_5(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ r[ 0] = a[ 0] - b[ 0];
+ r[ 1] = a[ 1] - b[ 1];
+ r[ 2] = a[ 2] - b[ 2];
+ r[ 3] = a[ 3] - b[ 3];
+ r[ 4] = a[ 4] - b[ 4];
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static void sp_256_cond_add_5(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 5; i++) {
+ r[i] = a[i] + (b[i] & m);
+ }
+#else
+ r[ 0] = a[ 0] + (b[ 0] & m);
+ r[ 1] = a[ 1] + (b[ 1] & m);
+ r[ 2] = a[ 2] + (b[ 2] & m);
+ r[ 3] = a[ 3] + (b[ 3] & m);
+ r[ 4] = a[ 4] + (b[ 4] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r Result of subtration.
+ * a Number to subtract from in Montogmery form.
+ * b Number to subtract with in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_sub_5(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ (void)sp_256_sub_5(r, a, b);
+ sp_256_cond_add_5(r, r, m, r[4] >> 48);
+ sp_256_norm_5(r);
+}
+
+/* Shift number left one bit.
+ * Bottom bit is lost.
+ *
+ * r Result of shift.
+ * a Number to shift.
+ */
+SP_NOINLINE static void sp_256_rshift1_5(sp_digit* r, sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<4; i++) {
+ r[i] = ((a[i] >> 1) | (a[i + 1] << 51)) & 0xfffffffffffffL;
+ }
+#else
+ r[0] = ((a[0] >> 1) | (a[1] << 51)) & 0xfffffffffffffL;
+ r[1] = ((a[1] >> 1) | (a[2] << 51)) & 0xfffffffffffffL;
+ r[2] = ((a[2] >> 1) | (a[3] << 51)) & 0xfffffffffffffL;
+ r[3] = ((a[3] >> 1) | (a[4] << 51)) & 0xfffffffffffffL;
+#endif
+ r[4] = a[4] >> 1;
+}
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r Result of division by 2.
+ * a Number to divide.
+ * m Modulus (prime).
+ */
+static void sp_256_div2_5(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ sp_256_cond_add_5(r, a, m, 0 - (a[0] & 1));
+ sp_256_norm_5(r);
+ sp_256_rshift1_5(r, r);
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r Result of doubling point.
+ * p Point to double.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_5(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*5;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = r->x;
+ y = r->y;
+ z = r->z;
+ /* Put infinity into result. */
+ if (r != p) {
+ r->infinity = p->infinity;
+ }
+
+ /* T1 = Z * Z */
+ sp_256_mont_sqr_5(t1, p->z, p256_mod, p256_mp_mod);
+ /* Z = Y * Z */
+ sp_256_mont_mul_5(z, p->y, p->z, p256_mod, p256_mp_mod);
+ /* Z = 2Z */
+ sp_256_mont_dbl_5(z, z, p256_mod);
+ /* T2 = X - T1 */
+ sp_256_mont_sub_5(t2, p->x, t1, p256_mod);
+ /* T1 = X + T1 */
+ sp_256_mont_add_5(t1, p->x, t1, p256_mod);
+ /* T2 = T1 * T2 */
+ sp_256_mont_mul_5(t2, t1, t2, p256_mod, p256_mp_mod);
+ /* T1 = 3T2 */
+ sp_256_mont_tpl_5(t1, t2, p256_mod);
+ /* Y = 2Y */
+ sp_256_mont_dbl_5(y, p->y, p256_mod);
+ /* Y = Y * Y */
+ sp_256_mont_sqr_5(y, y, p256_mod, p256_mp_mod);
+ /* T2 = Y * Y */
+ sp_256_mont_sqr_5(t2, y, p256_mod, p256_mp_mod);
+ /* T2 = T2/2 */
+ sp_256_div2_5(t2, t2, p256_mod);
+ /* Y = Y * X */
+ sp_256_mont_mul_5(y, y, p->x, p256_mod, p256_mp_mod);
+ /* X = T1 * T1 */
+ sp_256_mont_sqr_5(x, t1, p256_mod, p256_mp_mod);
+ /* X = X - Y */
+ sp_256_mont_sub_5(x, x, y, p256_mod);
+ /* X = X - Y */
+ sp_256_mont_sub_5(x, x, y, p256_mod);
+ /* Y = Y - X */
+ sp_256_mont_sub_5(y, y, x, p256_mod);
+ /* Y = Y * T1 */
+ sp_256_mont_mul_5(y, y, t1, p256_mod, p256_mp_mod);
+ /* Y = Y - T2 */
+ sp_256_mont_sub_5(y, y, t2, p256_mod);
+}
+
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a First number to compare.
+ * b Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_256_cmp_equal_5(const sp_digit* a, const sp_digit* b)
+{
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+ (a[4] ^ b[4])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_5(sp_point_256* r, const sp_point_256* p, const sp_point_256* q,
+ sp_digit* t)
+{
+ const sp_point_256* ap[2];
+ sp_point_256* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*5;
+ sp_digit* t3 = t + 4*5;
+ sp_digit* t4 = t + 6*5;
+ sp_digit* t5 = t + 8*5;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Ensure only the first point is the same as the result. */
+ if (q == r) {
+ const sp_point_256* a = p;
+ p = q;
+ q = a;
+ }
+
+ /* Check double */
+ (void)sp_256_sub_5(t1, p256_mod, q->y);
+ sp_256_norm_5(t1);
+ if ((sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) &
+ (sp_256_cmp_equal_5(p->y, q->y) | sp_256_cmp_equal_5(p->y, t1))) != 0) {
+ sp_256_proj_point_dbl_5(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_256));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<5; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<5; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<5; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U1 = X1*Z2^2 */
+ sp_256_mont_sqr_5(t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_5(t3, t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_5(t1, t1, x, p256_mod, p256_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_5(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_5(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_5(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_256_mont_mul_5(t3, t3, y, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_5(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - U1 */
+ sp_256_mont_sub_5(t2, t2, t1, p256_mod);
+ /* R = S2 - S1 */
+ sp_256_mont_sub_5(t4, t4, t3, p256_mod);
+ /* Z3 = H*Z1*Z2 */
+ sp_256_mont_mul_5(z, z, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_5(z, z, t2, p256_mod, p256_mp_mod);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ sp_256_mont_sqr_5(x, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_5(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_5(y, t1, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_5(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_5(x, x, t5, p256_mod);
+ sp_256_mont_dbl_5(t1, y, p256_mod);
+ sp_256_mont_sub_5(x, x, t1, p256_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ sp_256_mont_sub_5(y, y, x, p256_mod);
+ sp_256_mont_mul_5(y, y, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_5(t5, t5, t3, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_5(y, y, t5, p256_mod);
+ }
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_5(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifdef WOLFSSL_SP_NO_MALLOC
+ sp_point_256 t[3];
+ sp_digit tmp[2 * 5 * 5];
+#else
+ sp_point_256* t;
+ sp_digit* tmp;
+#endif
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ (void)heap;
+
+#ifndef WOLFSSL_SP_NO_MALLOC
+ t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 3, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMSET(t, 0, sizeof(sp_point_256) * 3);
+
+ /* t[0] = {0, 0, 1} * norm */
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ err = sp_256_mod_mul_norm_5(t[1].x, g->x, p256_mod);
+ }
+ if (err == MP_OKAY)
+ err = sp_256_mod_mul_norm_5(t[1].y, g->y, p256_mod);
+ if (err == MP_OKAY)
+ err = sp_256_mod_mul_norm_5(t[1].z, g->z, p256_mod);
+
+ if (err == MP_OKAY) {
+ i = 4;
+ c = 48;
+ n = k[i--] << (52 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1)
+ break;
+
+ n = k[i--];
+ c = 52;
+ }
+
+ y = (n >> 51) & 1;
+ n <<= 1;
+
+ sp_256_proj_point_add_5(&t[y^1], &t[0], &t[1], tmp);
+
+ XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) +
+ ((size_t)&t[1] & addr_mask[y])),
+ sizeof(sp_point_256));
+ sp_256_proj_point_dbl_5(&t[2], &t[2], tmp);
+ XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
+ ((size_t)&t[1] & addr_mask[y])), &t[2],
+ sizeof(sp_point_256));
+ }
+
+ if (map != 0) {
+ sp_256_map_5(r, &t[0], tmp);
+ }
+ else {
+ XMEMCPY(r, &t[0], sizeof(sp_point_256));
+ }
+ }
+
+#ifndef WOLFSSL_SP_NO_MALLOC
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 5 * 5);
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_point_256) * 3);
+ XFREE(t, NULL, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmp, sizeof(tmp));
+ ForceZero(t, sizeof(t));
+#endif
+
+ return err;
+}
+
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_5(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+ int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 t[3];
+ sp_digit tmp[2 * 5 * 5];
+#else
+ sp_point_256* t;
+ sp_digit* tmp;
+#endif
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ (void)heap;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_point*)XMALLOC(sizeof(*t) * 3, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#endif
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ t[1].infinity = 0;
+ err = sp_256_mod_mul_norm_5(t[1].x, g->x, p256_mod);
+ }
+ if (err == MP_OKAY)
+ err = sp_256_mod_mul_norm_5(t[1].y, g->y, p256_mod);
+ if (err == MP_OKAY)
+ err = sp_256_mod_mul_norm_5(t[1].z, g->z, p256_mod);
+
+ if (err == MP_OKAY) {
+ i = 4;
+ c = 48;
+ n = k[i--] << (52 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1)
+ break;
+
+ n = k[i--];
+ c = 52;
+ }
+
+ y = (n >> 51) & 1;
+ n <<= 1;
+
+ sp_256_proj_point_add_5(&t[y^1], &t[0], &t[1], tmp);
+
+ XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) +
+ ((size_t)&t[1] & addr_mask[y])), sizeof(t[2]));
+ sp_256_proj_point_dbl_5(&t[2], &t[2], tmp);
+ XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
+ ((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
+ }
+
+ if (map != 0) {
+ sp_256_map_5(r, &t[0], tmp);
+ }
+ else {
+ XMEMCPY(r, &t[0], sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 5 * 5);
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+ }
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_point_256) * 3);
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmp, sizeof(tmp));
+ ForceZero(t, sizeof(t));
+#endif
+
+ return err;
+}
+
+#else
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_256 {
+ sp_digit x[5];
+ sp_digit y[5];
+} sp_table_entry_256;
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_fast_5(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+ int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 td[16];
+ sp_point_256 rtd;
+ sp_digit tmpd[2 * 5 * 5];
+#endif
+ sp_point_256* t;
+ sp_point_256* rt;
+ sp_digit* tmp;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_5(heap, rtd, rt);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 16, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ t = td;
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ (void)sp_256_mod_mul_norm_5(t[1].x, g->x, p256_mod);
+ (void)sp_256_mod_mul_norm_5(t[1].y, g->y, p256_mod);
+ (void)sp_256_mod_mul_norm_5(t[1].z, g->z, p256_mod);
+ t[1].infinity = 0;
+ sp_256_proj_point_dbl_5(&t[ 2], &t[ 1], tmp);
+ t[ 2].infinity = 0;
+ sp_256_proj_point_add_5(&t[ 3], &t[ 2], &t[ 1], tmp);
+ t[ 3].infinity = 0;
+ sp_256_proj_point_dbl_5(&t[ 4], &t[ 2], tmp);
+ t[ 4].infinity = 0;
+ sp_256_proj_point_add_5(&t[ 5], &t[ 3], &t[ 2], tmp);
+ t[ 5].infinity = 0;
+ sp_256_proj_point_dbl_5(&t[ 6], &t[ 3], tmp);
+ t[ 6].infinity = 0;
+ sp_256_proj_point_add_5(&t[ 7], &t[ 4], &t[ 3], tmp);
+ t[ 7].infinity = 0;
+ sp_256_proj_point_dbl_5(&t[ 8], &t[ 4], tmp);
+ t[ 8].infinity = 0;
+ sp_256_proj_point_add_5(&t[ 9], &t[ 5], &t[ 4], tmp);
+ t[ 9].infinity = 0;
+ sp_256_proj_point_dbl_5(&t[10], &t[ 5], tmp);
+ t[10].infinity = 0;
+ sp_256_proj_point_add_5(&t[11], &t[ 6], &t[ 5], tmp);
+ t[11].infinity = 0;
+ sp_256_proj_point_dbl_5(&t[12], &t[ 6], tmp);
+ t[12].infinity = 0;
+ sp_256_proj_point_add_5(&t[13], &t[ 7], &t[ 6], tmp);
+ t[13].infinity = 0;
+ sp_256_proj_point_dbl_5(&t[14], &t[ 7], tmp);
+ t[14].infinity = 0;
+ sp_256_proj_point_add_5(&t[15], &t[ 8], &t[ 7], tmp);
+ t[15].infinity = 0;
+
+ i = 3;
+ n = k[i+1] << 12;
+ c = 44;
+ y = n >> 56;
+ XMEMCPY(rt, &t[y], sizeof(sp_point_256));
+ n <<= 8;
+ for (; i>=0 || c>=4; ) {
+ if (c < 4) {
+ n |= k[i--] << (12 - c);
+ c += 52;
+ }
+ y = (n >> 60) & 0xf;
+ n <<= 4;
+ c -= 4;
+
+ sp_256_proj_point_dbl_5(rt, rt, tmp);
+ sp_256_proj_point_dbl_5(rt, rt, tmp);
+ sp_256_proj_point_dbl_5(rt, rt, tmp);
+ sp_256_proj_point_dbl_5(rt, rt, tmp);
+
+ sp_256_proj_point_add_5(rt, rt, &t[y], tmp);
+ }
+
+ if (map != 0) {
+ sp_256_map_5(r, rt, tmp);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 5 * 5);
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+ }
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_point_256) * 16);
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmpd, sizeof(tmpd));
+ ForceZero(td, sizeof(td));
+#endif
+ sp_256_point_free_5(rt, 1, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_n_5(sp_point_256* p, int n, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*5;
+ sp_digit* b = t + 4*5;
+ sp_digit* t1 = t + 6*5;
+ sp_digit* t2 = t + 8*5;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = p->x;
+ y = p->y;
+ z = p->z;
+
+ /* Y = 2*Y */
+ sp_256_mont_dbl_5(y, y, p256_mod);
+ /* W = Z^4 */
+ sp_256_mont_sqr_5(w, z, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_5(w, w, p256_mod, p256_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+ while (--n > 0)
+#else
+ while (--n >= 0)
+#endif
+ {
+ /* A = 3*(X^2 - W) */
+ sp_256_mont_sqr_5(t1, x, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_5(t1, t1, w, p256_mod);
+ sp_256_mont_tpl_5(a, t1, p256_mod);
+ /* B = X*Y^2 */
+ sp_256_mont_sqr_5(t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_5(b, t1, x, p256_mod, p256_mp_mod);
+ /* X = A^2 - 2B */
+ sp_256_mont_sqr_5(x, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_5(t2, b, p256_mod);
+ sp_256_mont_sub_5(x, x, t2, p256_mod);
+ /* Z = Z*Y */
+ sp_256_mont_mul_5(z, z, y, p256_mod, p256_mp_mod);
+ /* t2 = Y^4 */
+ sp_256_mont_sqr_5(t1, t1, p256_mod, p256_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+ if (n != 0)
+#endif
+ {
+ /* W = W*Y^4 */
+ sp_256_mont_mul_5(w, w, t1, p256_mod, p256_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_256_mont_sub_5(y, b, x, p256_mod);
+ sp_256_mont_mul_5(y, y, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_5(y, y, p256_mod);
+ sp_256_mont_sub_5(y, y, t1, p256_mod);
+ }
+#ifndef WOLFSSL_SP_SMALL
+ /* A = 3*(X^2 - W) */
+ sp_256_mont_sqr_5(t1, x, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_5(t1, t1, w, p256_mod);
+ sp_256_mont_tpl_5(a, t1, p256_mod);
+ /* B = X*Y^2 */
+ sp_256_mont_sqr_5(t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_5(b, t1, x, p256_mod, p256_mp_mod);
+ /* X = A^2 - 2B */
+ sp_256_mont_sqr_5(x, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_5(t2, b, p256_mod);
+ sp_256_mont_sub_5(x, x, t2, p256_mod);
+ /* Z = Z*Y */
+ sp_256_mont_mul_5(z, z, y, p256_mod, p256_mp_mod);
+ /* t2 = Y^4 */
+ sp_256_mont_sqr_5(t1, t1, p256_mod, p256_mp_mod);
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_256_mont_sub_5(y, b, x, p256_mod);
+ sp_256_mont_mul_5(y, y, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_5(y, y, p256_mod);
+ sp_256_mont_sub_5(y, y, t1, p256_mod);
+#endif
+ /* Y = Y/2 */
+ sp_256_div2_5(y, y, p256_mod);
+}
+
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_qz1_5(sp_point_256* r, const sp_point_256* p,
+ const sp_point_256* q, sp_digit* t)
+{
+ const sp_point_256* ap[2];
+ sp_point_256* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*5;
+ sp_digit* t3 = t + 4*5;
+ sp_digit* t4 = t + 6*5;
+ sp_digit* t5 = t + 8*5;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Check double */
+ (void)sp_256_sub_5(t1, p256_mod, q->y);
+ sp_256_norm_5(t1);
+ if ((sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) &
+ (sp_256_cmp_equal_5(p->y, q->y) | sp_256_cmp_equal_5(p->y, t1))) != 0) {
+ sp_256_proj_point_dbl_5(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_256));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<5; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<5; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<5; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_5(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_5(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_5(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_5(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - X1 */
+ sp_256_mont_sub_5(t2, t2, x, p256_mod);
+ /* R = S2 - Y1 */
+ sp_256_mont_sub_5(t4, t4, y, p256_mod);
+ /* Z3 = H*Z1 */
+ sp_256_mont_mul_5(z, z, t2, p256_mod, p256_mp_mod);
+ /* X3 = R^2 - H^3 - 2*X1*H^2 */
+ sp_256_mont_sqr_5(t1, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_5(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_5(t3, x, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_5(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_5(x, t1, t5, p256_mod);
+ sp_256_mont_dbl_5(t1, t3, p256_mod);
+ sp_256_mont_sub_5(x, x, t1, p256_mod);
+ /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+ sp_256_mont_sub_5(t3, t3, x, p256_mod);
+ sp_256_mont_mul_5(t3, t3, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_5(t5, t5, y, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_5(y, t3, t5, p256_mod);
+ }
+}
+
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a Point to convert.
+ * t Temporary data.
+ */
+static void sp_256_proj_to_affine_5(sp_point_256* a, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2 * 5;
+ sp_digit* tmp = t + 4 * 5;
+
+ sp_256_mont_inv_5(t1, a->z, tmp);
+
+ sp_256_mont_sqr_5(t2, t1, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_5(t1, t2, t1, p256_mod, p256_mp_mod);
+
+ sp_256_mont_mul_5(a->x, a->x, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_5(a->y, a->y, t1, p256_mod, p256_mp_mod);
+ XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
+}
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_256_gen_stripe_table_5(const sp_point_256* a,
+ sp_table_entry_256* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 td, s1d, s2d;
+#endif
+ sp_point_256* t;
+ sp_point_256* s1 = NULL;
+ sp_point_256* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_5(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_5(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_5(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_5(t->x, a->x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_5(t->y, a->y, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_5(t->z, a->z, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_256_proj_to_affine_5(t, tmp);
+
+ XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<8; i++) {
+ sp_256_proj_point_dbl_n_5(t, 32, tmp);
+ sp_256_proj_to_affine_5(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<8; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_256_proj_point_add_qz1_5(t, s1, s2, tmp);
+ sp_256_proj_to_affine_5(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_256_point_free_5(s2, 0, heap);
+ sp_256_point_free_5(s1, 0, heap);
+ sp_256_point_free_5( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_stripe_5(sp_point_256* r, const sp_point_256* g,
+ const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 rtd;
+ sp_point_256 pd;
+ sp_digit td[2 * 5 * 5];
+#endif
+ sp_point_256* rt;
+ sp_point_256* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_256_point_new_5(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_5(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
+ XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+ y = 0;
+ for (j=0,x=31; j<8; j++,x+=32) {
+ y |= ((k[x / 52] >> (x % 52)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=30; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<8; j++,x+=32) {
+ y |= ((k[x / 52] >> (x % 52)) & 1) << j;
+ }
+
+ sp_256_proj_point_dbl_5(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_256_proj_point_add_qz1_5(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_256_map_5(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_5(p, 0, heap);
+ sp_256_point_free_5(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_256_t {
+ sp_digit x[5];
+ sp_digit y[5];
+ sp_table_entry_256 table[256];
+ uint32_t cnt;
+ int set;
+} sp_cache_256_t;
+
+static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_256_last = -1;
+static THREAD_LS_T int sp_cache_256_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex_256 = 0;
+ static wolfSSL_Mutex sp_cache_256_lock;
+#endif
+
+static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_256_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache_256[i].set = 0;
+ }
+ sp_cache_256_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache_256[i].set)
+ continue;
+
+ if (sp_256_cmp_equal_5(g->x, sp_cache_256[i].x) &
+ sp_256_cmp_equal_5(g->y, sp_cache_256[i].y)) {
+ sp_cache_256[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_256_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache_256[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_256_last) {
+ least = sp_cache_256[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache_256[j].cnt < least) {
+ i = j;
+ least = sp_cache_256[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
+ XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
+ sp_cache_256[i].set = 1;
+ sp_cache_256[i].cnt = 1;
+ }
+
+ *cache = &sp_cache_256[i];
+ sp_cache_256_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_5(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_256_ecc_mulmod_fast_5(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 5 * 5];
+ sp_cache_256_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_256 == 0) {
+ wc_InitMutex(&sp_cache_256_lock);
+ initCacheMutex_256 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_256_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_256(g, &cache);
+ if (cache->cnt == 2)
+ sp_256_gen_stripe_table_5(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_256_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_256_ecc_mulmod_fast_5(r, g, k, map, heap);
+ }
+ else {
+ err = sp_256_ecc_mulmod_stripe_5(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#endif
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * p Point to multiply.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+ void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[5];
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_256_point_new_5(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 5, km);
+ sp_256_point_from_ecc_point_5(point, gm);
+
+ err = sp_256_ecc_mulmod_5(point, point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_5(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_5(point, 0, heap);
+
+ return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_5(sp_point_256* r, const sp_digit* k,
+ int map, void* heap)
+{
+ /* No pre-computed values. */
+ return sp_256_ecc_mulmod_5(r, &p256_base, k, map, heap);
+}
+
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_5(sp_point_256* r, const sp_digit* k,
+ int map, void* heap)
+{
+ /* No pre-computed values. */
+ return sp_256_ecc_mulmod_5(r, &p256_base, k, map, heap);
+}
+
+#else
+static const sp_table_entry_256 p256_table[256] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x730d418a9143cL,0xfc5fedb60179eL,0x762251075ba95L,0x55c679fb732b7L,
+ 0x018905f76a537L },
+ { 0x25357ce95560aL,0xe4ba19e45cddfL,0xd21f3258b4ab8L,0x5d85d2e88688dL,
+ 0x08571ff182588L } },
+ /* 2 */
+ { { 0x886024147519aL,0xac26b372f0202L,0x785ebc8d0981eL,0x58e9a9d4a7caaL,
+ 0x0d953c50ddbdfL },
+ { 0x361ccfd590f8fL,0x6b44e6c9179d6L,0x2eb64cf72e962L,0x88f37fd961102L,
+ 0x0863ebb7e9eb2L } },
+ /* 3 */
+ { { 0x6b6235cdb6485L,0xa22f0a2f97785L,0xf7e300b808f0eL,0x80a03e68d9544L,
+ 0x000076055b5ffL },
+ { 0x4eb9b838d2010L,0xbb3243708a763L,0x42a660654014fL,0x3ee0e0e47d398L,
+ 0x0830877613437L } },
+ /* 4 */
+ { { 0x22fc516a0d2bbL,0x6c1a6234994f9L,0x7c62c8b0d5cc1L,0x667f9241cf3a5L,
+ 0x02f5e6961fd1bL },
+ { 0x5c70bf5a01797L,0x4d609561925c1L,0x71fdb523d20b4L,0x0f7b04911b370L,
+ 0x0f648f9168d6fL } },
+ /* 5 */
+ { { 0x66847e137bbbcL,0x9e8a6a0bec9e5L,0x9d73463e43446L,0x0015b1c427617L,
+ 0x05abe0285133dL },
+ { 0xa837cc04c7dabL,0x4c43260c0792aL,0x8e6cc37573d9fL,0x73830c9315627L,
+ 0x094bb725b6b6fL } },
+ /* 6 */
+ { { 0x9b48f720f141cL,0xcd2df5bc74bbfL,0x11045c46199b3L,0xc4efdc3f61294L,
+ 0x0cdd6bbcb2f7dL },
+ { 0x6700beaf436fdL,0x6db99326beccaL,0x14f25226f647fL,0xe5f60c0fa7920L,
+ 0x0a361bebd4bdaL } },
+ /* 7 */
+ { { 0xa2558597c13c7L,0x5f50b7c3e128aL,0x3c09d1dc38d63L,0x292c07039aecfL,
+ 0x0ba12ca09c4b5L },
+ { 0x08fa459f91dfdL,0x66ceea07fb9e4L,0xd780b293af43bL,0xef4b1eceb0899L,
+ 0x053ebb99d701fL } },
+ /* 8 */
+ { { 0x7ee31b0e63d34L,0x72a9e54fab4feL,0x5e7b5a4f46005L,0x4831c0493334dL,
+ 0x08589fb9206d5L },
+ { 0x0f5cc6583553aL,0x4ae25649e5aa7L,0x0044652087909L,0x1c4fcc9045071L,
+ 0x0ebb0696d0254L } },
+ /* 9 */
+ { { 0x6ca15ac1647c5L,0x47c4cf5799461L,0x64dfbacb8127dL,0x7da3dc666aa37L,
+ 0x0eb2820cbd1b2L },
+ { 0x6f8d86a87e008L,0x9d922378f3940L,0x0ccecb2d87dfaL,0xda1d56ed2e428L,
+ 0x01f28289b55a7L } },
+ /* 10 */
+ { { 0xaa0c03b89da99L,0x9eb8284022abbL,0x81c05e8a6f2d7L,0x4d6327847862bL,
+ 0x0337a4b5905e5L },
+ { 0x7500d21f7794aL,0xb77d6d7f613c6L,0x4cfd6e8207005L,0xfbd60a5a37810L,
+ 0x00d65e0d5f4c2L } },
+ /* 11 */
+ { { 0x09bbeb5275d38L,0x450be0a358d9dL,0x73eb2654268a7L,0xa232f0762ff49L,
+ 0x0c23da24252f4L },
+ { 0x1b84f0b94520cL,0x63b05bd78e5daL,0x4d29ea1096667L,0xcff13a4dcb869L,
+ 0x019de3b8cc790L } },
+ /* 12 */
+ { { 0xa716c26c5fe04L,0x0b3bba1bdb183L,0x4cb712c3b28deL,0xcbfd7432c586aL,
+ 0x0e34dcbd491fcL },
+ { 0x8d46baaa58403L,0x8682e97a53b40L,0x6aaa8af9a6974L,0x0f7f9e3901273L,
+ 0x0e7641f447b4eL } },
+ /* 13 */
+ { { 0x53941df64ba59L,0xec0b0242fc7d7L,0x1581859d33f10L,0x57bf4f06dfc6aL,
+ 0x04a12df57052aL },
+ { 0x6338f9439dbd0L,0xd4bde53e1fbfaL,0x1f1b314d3c24bL,0xea46fd5e4ffa2L,
+ 0x06af5aa93bb5bL } },
+ /* 14 */
+ { { 0x0b69910c91999L,0x402a580491da1L,0x8cc20900a24b4L,0x40133e0094b4bL,
+ 0x05fe3475a66a4L },
+ { 0x8cabdf93e7b4bL,0x1a7c23f91ab0fL,0xd1e6263292b50L,0xa91642e889aecL,
+ 0x0b544e308ecfeL } },
+ /* 15 */
+ { { 0x8c6e916ddfdceL,0x66f89179e6647L,0xd4e67e12c3291L,0xc20b4e8d6e764L,
+ 0x0e0b6b2bda6b0L },
+ { 0x12df2bb7efb57L,0xde790c40070d3L,0x79bc9441aac0dL,0x3774f90336ad6L,
+ 0x071c023de25a6L } },
+ /* 16 */
+ { { 0x8c244bfe20925L,0xc38fdce86762aL,0xd38706391c19aL,0x24f65a96a5d5dL,
+ 0x061d587d421d3L },
+ { 0x673a2a37173eaL,0x0853778b65e87L,0x5bab43e238480L,0xefbe10f8441e0L,
+ 0x0fa11fe124621L } },
+ /* 17 */
+ { { 0x91f2b2cb19ffdL,0x5bb1923c231c8L,0xac5ca8e01ba8dL,0xbedcb6d03d678L,
+ 0x0586eb04c1f13L },
+ { 0x5c6e527e8ed09L,0x3c1819ede20c3L,0x6c652fa1e81a3L,0x4f11278fd6c05L,
+ 0x019d5ac087086L } },
+ /* 18 */
+ { { 0x9f581309a4e1fL,0x1be92700741e9L,0xfd28d20ab7de7L,0x563f26a5ef0beL,
+ 0x0e7c0073f7f9cL },
+ { 0xd663a0ef59f76L,0x5420fcb0501f6L,0xa6602d4669b3bL,0x3c0ac08c1f7a7L,
+ 0x0e08504fec65bL } },
+ /* 19 */
+ { { 0x8f68da031b3caL,0x9ee6da6d66f09L,0x4f246e86d1cabL,0x96b45bfd81fa9L,
+ 0x078f018825b09L },
+ { 0xefde43a25787fL,0x0d1dccac9bb7eL,0x35bfc368016f8L,0x747a0cea4877bL,
+ 0x043a773b87e94L } },
+ /* 20 */
+ { { 0x77734d2b533d5L,0xf6a1bdddc0625L,0x79ec293673b8aL,0x66b1577e7c9aaL,
+ 0x0bb6de651c3b2L },
+ { 0x9303ab65259b3L,0xd3d03a7480e7eL,0xb3cfc27d6a0afL,0xb99bc5ac83d19L,
+ 0x060b4619a5d18L } },
+ /* 21 */
+ { { 0xa38e11ae5aa1cL,0x2b49e73658bd6L,0xe5f87edb8b765L,0xffcd0b130014eL,
+ 0x09d0f27b2aeebL },
+ { 0x246317a730a55L,0x2fddbbc83aca9L,0xc019a719c955bL,0xc48d07c1dfe0aL,
+ 0x0244a566d356eL } },
+ /* 22 */
+ { { 0x0394aeacf1f96L,0xa9024c271c6dbL,0x2cbd3b99f2122L,0xef692626ac1b8L,
+ 0x045e58c873581L },
+ { 0xf479da38f9dbcL,0x46e888a040d3fL,0x6e0bed7a8aaf1L,0xb7a4945adfb24L,
+ 0x0c040e21cc1e4L } },
+ /* 23 */
+ { { 0xaf0006f8117b6L,0xff73a35433847L,0xd9475eb651969L,0x6ec7482b35761L,
+ 0x01cdf5c97682cL },
+ { 0x775b411f04839L,0xf448de16987dbL,0x70b32197dbeacL,0xff3db2921dd1bL,
+ 0x0046755f8a92dL } },
+ /* 24 */
+ { { 0xac5d2bce8ffcdL,0x8b2fe61a82cc8L,0x202d6c70d53c4L,0xa5f3f6f161727L,
+ 0x0046e5e113b83L },
+ { 0x8ff64d8007f01L,0x125af43183e7bL,0x5e1a03c7fb1efL,0x005b045c5ea63L,
+ 0x06e0106c3303dL } },
+ /* 25 */
+ { { 0x7358488dd73b1L,0x8f995ed0d948cL,0x56a2ab7767070L,0xcf1f38385ea8cL,
+ 0x0442594ede901L },
+ { 0xaa2c912d4b65bL,0x3b96c90c37f8fL,0xe978d1f94c234L,0xe68ed326e4a15L,
+ 0x0a796fa514c2eL } },
+ /* 26 */
+ { { 0xfb604823addd7L,0x83e56693b3359L,0xcbf3c809e2a61L,0x66e9f885b78e3L,
+ 0x0e4ad2da9c697L },
+ { 0xf7f428e048a61L,0x8cc092d9a0357L,0x03ed8ef082d19L,0x5143fc3a1af4cL,
+ 0x0c5e94046c37bL } },
+ /* 27 */
+ { { 0xa538c2be75f9eL,0xe8cb123a78476L,0x109c04b6fd1a9L,0x4747d85e4df0bL,
+ 0x063283dafdb46L },
+ { 0x28cf7baf2df15L,0x550ad9a7f4ce7L,0x834bcc3e592c4L,0xa938fab226adeL,
+ 0x068bd19ab1981L } },
+ /* 28 */
+ { { 0xead511887d659L,0xf4b359305ac08L,0xfe74fe33374d5L,0xdfd696986981cL,
+ 0x0495292f53c6fL },
+ { 0x78c9e1acec896L,0x10ec5b44844a8L,0x64d60a7d964b2L,0x68376696f7e26L,
+ 0x00ec7530d2603L } },
+ /* 29 */
+ { { 0x13a05ad2687bbL,0x6af32e21fa2daL,0xdd4607ba1f83bL,0x3f0b390f5ef51L,
+ 0x00f6207a66486L },
+ { 0x7e3bb0f138233L,0x6c272aa718bd6L,0x6ec88aedd66b9L,0x6dcf8ed004072L,
+ 0x0ff0db07208edL } },
+ /* 30 */
+ { { 0xfa1014c95d553L,0xfd5d680a8a749L,0xf3b566fa44052L,0x0ea3183b4317fL,
+ 0x0313b513c8874L },
+ { 0x2e2ac08d11549L,0x0bb4dee21cb40L,0x7f2320e071ee1L,0x9f8126b987dd4L,
+ 0x02d3abcf986f1L } },
+ /* 31 */
+ { { 0x88501815581a2L,0x56632211af4c2L,0xcab2e999a0a6dL,0x8cdf19ba7a0f0L,
+ 0x0c036fa10ded9L },
+ { 0xe08bac1fbd009L,0x9006d1581629aL,0xb9e0d8f0b68b1L,0x0194c2eb32779L,
+ 0x0a6b2a2c4b6d4L } },
+ /* 32 */
+ { { 0x3e50f6d3549cfL,0x6ffacd665ed43L,0xe11fcb46f3369L,0x9860695bfdaccL,
+ 0x0810ee252af7cL },
+ { 0x50fe17159bb2cL,0xbe758b357b654L,0x69fea72f7dfbeL,0x17452b057e74dL,
+ 0x0d485717a9273L } },
+ /* 33 */
+ { { 0x41a8af0cb5a98L,0x931f3110bf117L,0xb382adfd3da8fL,0x604e1994e2cbaL,
+ 0x06a6045a72f9aL },
+ { 0xc0d3fa2b2411dL,0x3e510e96e0170L,0x865b3ccbe0eb8L,0x57903bcc9f738L,
+ 0x0d3e45cfaf9e1L } },
+ /* 34 */
+ { { 0xf69bbe83f7669L,0x8272877d6bce1L,0x244278d09f8aeL,0xc19c9548ae543L,
+ 0x0207755dee3c2L },
+ { 0xd61d96fef1945L,0xefb12d28c387bL,0x2df64aa18813cL,0xb00d9fbcd1d67L,
+ 0x048dc5ee57154L } },
+ /* 35 */
+ { { 0x790bff7e5a199L,0xcf989ccbb7123L,0xa519c79e0efb8L,0xf445c27a2bfe0L,
+ 0x0f2fb0aeddff6L },
+ { 0x09575f0b5025fL,0xd740fa9f2241cL,0x80bfbd0550543L,0xd5258fa3c8ad3L,
+ 0x0a13e9015db28L } },
+ /* 36 */
+ { { 0x7a350a2b65cbcL,0x722a464226f9fL,0x23f07a10b04b9L,0x526f265ce241eL,
+ 0x02bf0d6b01497L },
+ { 0x4dd3f4b216fb7L,0x67fbdda26ad3dL,0x708505cf7d7b8L,0xe89faeb7b83f6L,
+ 0x042a94a5a162fL } },
+ /* 37 */
+ { { 0x6ad0beaadf191L,0x9025a268d7584L,0x94dc1f60f8a48L,0xde3de86030504L,
+ 0x02c2dd969c65eL },
+ { 0x2171d93849c17L,0xba1da250dd6d0L,0xc3a5485460488L,0x6dbc4810c7063L,
+ 0x0f437fa1f42c5L } },
+ /* 38 */
+ { { 0x0d7144a0f7dabL,0x931776e9ac6aaL,0x5f397860f0497L,0x7aa852c0a050fL,
+ 0x0aaf45b335470L },
+ { 0x37c33c18d364aL,0x063e49716585eL,0x5ec5444d40b9bL,0x72bcf41716811L,
+ 0x0cdf6310df4f2L } },
+ /* 39 */
+ { { 0x3c6238ea8b7efL,0x1885bc2287747L,0xbda8e3408e935L,0x2ff2419567722L,
+ 0x0f0d008bada9eL },
+ { 0x2671d2414d3b1L,0x85b019ea76291L,0x53bcbdbb37549L,0x7b8b5c61b96d4L,
+ 0x05bd5c2f5ca88L } },
+ /* 40 */
+ { { 0xf469ef49a3154L,0x956e2b2e9aef0L,0xa924a9c3e85a5L,0x471945aaec1eaL,
+ 0x0aa12dfc8a09eL },
+ { 0x272274df69f1dL,0x2ca2ff5e7326fL,0x7a9dd44e0e4c8L,0xa901b9d8ce73bL,
+ 0x06c036e73e48cL } },
+ /* 41 */
+ { { 0xae12a0f6e3138L,0x0025ad345a5cfL,0x5672bc56966efL,0xbe248993c64b4L,
+ 0x0292ff65896afL },
+ { 0x50d445e213402L,0x274392c9fed52L,0xa1c72e8f6580eL,0x7276097b397fdL,
+ 0x0644e0c90311bL } },
+ /* 42 */
+ { { 0x421e1a47153f0L,0x79920418c9e1eL,0x05d7672b86c3bL,0x9a7793bdce877L,
+ 0x0f25ae793cab7L },
+ { 0x194a36d869d0cL,0x824986c2641f3L,0x96e945e9d55c8L,0x0a3e49fb5ea30L,
+ 0x039b8e65313dbL } },
+ /* 43 */
+ { { 0x54200b6fd2e59L,0x669255c98f377L,0xe2a573935e2c0L,0xdb06d9dab21a0L,
+ 0x039122f2f0f19L },
+ { 0xce1e003cad53cL,0x0fe65c17e3cfbL,0xaa13877225b2cL,0xff8d72baf1d29L,
+ 0x08de80af8ce80L } },
+ /* 44 */
+ { { 0xea8d9207bbb76L,0x7c21782758afbL,0xc0436b1921c7eL,0x8c04dfa2b74b1L,
+ 0x0871949062e36L },
+ { 0x928bba3993df5L,0xb5f3b3d26ab5fL,0x5b55050639d75L,0xfde1011aa78a8L,
+ 0x0fc315e6a5b74L } },
+ /* 45 */
+ { { 0xfd41ae8d6ecfaL,0xf61aec7f86561L,0x924741d5f8c44L,0x908898452a7b4L,
+ 0x0e6d4a7adee38L },
+ { 0x52ed14593c75dL,0xa4dd271162605L,0xba2c7db70a70dL,0xae57d2aede937L,
+ 0x035dfaf9a9be2L } },
+ /* 46 */
+ { { 0x56fcdaa736636L,0x97ae2cab7e6b9L,0xf34996609f51dL,0x0d2bfb10bf410L,
+ 0x01da5c7d71c83L },
+ { 0x1e4833cce6825L,0x8ff9573c3b5c4L,0x23036b815ad11L,0xb9d6a28552c7fL,
+ 0x07077c0fddbf4L } },
+ /* 47 */
+ { { 0x3ff8d46b9661cL,0x6b0d2cfd71bf6L,0x847f8f7a1dfd3L,0xfe440373e140aL,
+ 0x053a8632ee50eL },
+ { 0x6ff68696d8051L,0x95c74f468a097L,0xe4e26bddaec0cL,0xfcc162994dc35L,
+ 0x0028ca76d34e1L } },
+ /* 48 */
+ { { 0xd47dcfc9877eeL,0x10801d0002d11L,0x4c260b6c8b362L,0xf046d002c1175L,
+ 0x004c17cd86962L },
+ { 0xbd094b0daddf5L,0x7524ce55c06d9L,0x2da03b5bea235L,0x7474663356e67L,
+ 0x0f7ba4de9fed9L } },
+ /* 49 */
+ { { 0xbfa34ebe1263fL,0x3571ae7ce6d0dL,0x2a6f523557637L,0x1c41d24405538L,
+ 0x0e31f96005213L },
+ { 0xb9216ea6b6ec6L,0x2e73c2fc44d1bL,0x9d0a29437a1d1L,0xd47bc10e7eac8L,
+ 0x0aa3a6259ce34L } },
+ /* 50 */
+ { { 0xf9df536f3dcd3L,0x50d2bf7360fbcL,0xf504f5b6cededL,0xdaee491710fadL,
+ 0x02398dd627e79L },
+ { 0x705a36d09569eL,0xbb5149f769cf4L,0x5f6034cea0619L,0x6210ff9c03773L,
+ 0x05717f5b21c04L } },
+ /* 51 */
+ { { 0x229c921dd895eL,0x0040c284519feL,0xd637ecd8e5185L,0x28defa13d2391L,
+ 0x0660a2c560e3cL },
+ { 0xa88aed67fcbd0L,0x780ea9f0969ccL,0x2e92b4dc84724L,0x245332b2f4817L,
+ 0x0624ee54c4f52L } },
+ /* 52 */
+ { { 0x49ce4d897ecccL,0xd93f9880aa095L,0x43a7c204d49d1L,0xfbc0723c24230L,
+ 0x04f392afb92bdL },
+ { 0x9f8fa7de44fd9L,0xe457b32156696L,0x68ebc3cb66cfbL,0x399cdb2fa8033L,
+ 0x08a3e7977ccdbL } },
+ /* 53 */
+ { { 0x1881f06c4b125L,0x00f6e3ca8cddeL,0xc7a13e9ae34e3L,0x4404ef6999de5L,
+ 0x03888d02370c2L },
+ { 0x8035644f91081L,0x615f015504762L,0x32cd36e3d9fcfL,0x23361827edc86L,
+ 0x0a5e62e471810L } },
+ /* 54 */
+ { { 0x25ee32facd6c8L,0x5454bcbc661a8L,0x8df9931699c63L,0x5adc0ce3edf79L,
+ 0x02c4768e6466aL },
+ { 0x6ff8c90a64bc9L,0x20e4779f5cb34L,0xc05e884630a60L,0x52a0d949d064bL,
+ 0x07b5e6441f9e6L } },
+ /* 55 */
+ { { 0x9422c1d28444aL,0xd8be136a39216L,0xb0c7fcee996c5L,0x744a2387afe5fL,
+ 0x0b8af73cb0c8dL },
+ { 0xe83aa338b86fdL,0x58a58a5cff5fdL,0x0ac9433fee3f1L,0x0895c9ee8f6f2L,
+ 0x0a036395f7f3fL } },
+ /* 56 */
+ { { 0x3c6bba10f7770L,0x81a12a0e248c7L,0x1bc2b9fa6f16dL,0xb533100df6825L,
+ 0x04be36b01875fL },
+ { 0x6086e9fb56dbbL,0x8b07e7a4f8922L,0x6d52f20306fefL,0x00c0eeaccc056L,
+ 0x08cbc9a871bdcL } },
+ /* 57 */
+ { { 0x1895cc0dac4abL,0x40712ff112e13L,0xa1cee57a874a4L,0x35f86332ae7c6L,
+ 0x044e7553e0c08L },
+ { 0x03fff7734002dL,0x8b0b34425c6d5L,0xe8738b59d35cbL,0xfc1895f702760L,
+ 0x0470a683a5eb8L } },
+ /* 58 */
+ { { 0x761dc90513482L,0x2a01e9276a81bL,0xce73083028720L,0xc6efcda441ee0L,
+ 0x016410690c63dL },
+ { 0x34a066d06a2edL,0x45189b100bf50L,0xb8218c9dd4d77L,0xbb4fd914ae72aL,
+ 0x0d73479fd7abcL } },
+ /* 59 */
+ { { 0xefb165ad4c6e5L,0x8f5b06d04d7edL,0x575cb14262cf0L,0x666b12ed5bb18L,
+ 0x0816469e30771L },
+ { 0xb9d79561e291eL,0x22c1de1661d7aL,0x35e0513eb9dafL,0x3f9cf49827eb1L,
+ 0x00a36dd23f0ddL } },
+ /* 60 */
+ { { 0xd32c741d5533cL,0x9e8684628f098L,0x349bd117c5f5aL,0xb11839a228adeL,
+ 0x0e331dfd6fdbaL },
+ { 0x0ab686bcc6ed8L,0xbdef7a260e510L,0xce850d77160c3L,0x33899063d9a7bL,
+ 0x0d3b4782a492eL } },
+ /* 61 */
+ { { 0x9b6e8f3821f90L,0xed66eb7aada14L,0xa01311692edd9L,0xa5bd0bb669531L,
+ 0x07281275a4c86L },
+ { 0x858f7d3ff47e5L,0xbc61016441503L,0xdfd9bb15e1616L,0x505962b0f11a7L,
+ 0x02c062e7ece14L } },
+ /* 62 */
+ { { 0xf996f0159ac2eL,0x36cbdb2713a76L,0x8e46047281e77L,0x7ef12ad6d2880L,
+ 0x0282a35f92c4eL },
+ { 0x54b1ec0ce5cd2L,0xc91379c2299c3L,0xe82c11ecf99efL,0x2abd992caf383L,
+ 0x0c71cd513554dL } },
+ /* 63 */
+ { { 0x5de9c09b578f4L,0x58e3affa7a488L,0x9182f1f1884e2L,0xf3a38f76b1b75L,
+ 0x0c50f6740cf47L },
+ { 0x4adf3374b68eaL,0x2369965fe2a9cL,0x5a53050a406f3L,0x58dc2f86a2228L,
+ 0x0b9ecb3a72129L } },
+ /* 64 */
+ { { 0x8410ef4f8b16aL,0xfec47b266a56fL,0xd9c87c197241aL,0xab1b0a406b8e6L,
+ 0x0803f3e02cd42L },
+ { 0x309a804dbec69L,0xf73bbad05f7f0L,0xd8e197fa83b85L,0xadc1c6097273aL,
+ 0x0c097440e5067L } },
+ /* 65 */
+ { { 0xa56f2c379ab34L,0x8b841df8d1846L,0x76c68efa8ee06L,0x1f30203144591L,
+ 0x0f1af32d5915fL },
+ { 0x375315d75bd50L,0xbaf72f67bc99cL,0x8d7723f837cffL,0x1c8b0613a4184L,
+ 0x023d0f130e2d4L } },
+ /* 66 */
+ { { 0xab6edf41500d9L,0xe5fcbeada8857L,0x97259510d890aL,0xfadd52fe86488L,
+ 0x0b0288dd6c0a3L },
+ { 0x20f30650bcb08L,0x13695d6e16853L,0x989aa7671af63L,0xc8d231f520a7bL,
+ 0x0ffd3724ff408L } },
+ /* 67 */
+ { { 0x68e64b458e6cbL,0x20317a5d28539L,0xaa75f56992dadL,0x26df3814ae0b7L,
+ 0x0f5590f4ad78cL },
+ { 0x24bd3cf0ba55aL,0x4a0c778bae0fcL,0x83b674a0fc472L,0x4a201ce9864f6L,
+ 0x018d6da54f6f7L } },
+ /* 68 */
+ { { 0x3e225d5be5a2bL,0x835934f3c6ed9L,0x2626ffc6fe799L,0x216a431409262L,
+ 0x050bbb4d97990L },
+ { 0x191c6e57ec63eL,0x40181dcdb2378L,0x236e0f665422cL,0x49c341a8099b0L,
+ 0x02b10011801feL } },
+ /* 69 */
+ { { 0x8b5c59b391593L,0xa2598270fcfc6L,0x19adcbbc385f5L,0xae0c7144f3aadL,
+ 0x0dd55899983fbL },
+ { 0x88b8e74b82ff4L,0x4071e734c993bL,0x3c0322ad2e03cL,0x60419a7a9eaf4L,
+ 0x0e6e4c551149dL } },
+ /* 70 */
+ { { 0x655bb1e9af288L,0x64f7ada93155fL,0xb2820e5647e1aL,0x56ff43697e4bcL,
+ 0x051e00db107edL },
+ { 0x169b8771c327eL,0x0b4a96c2ad43dL,0xdeb477929cdb2L,0x9177c07d51f53L,
+ 0x0e22f42414982L } },
+ /* 71 */
+ { { 0x5e8f4635f1abbL,0xb568538874cd4L,0x5a8034d7edc0cL,0x48c9c9472c1fbL,
+ 0x0f709373d52dcL },
+ { 0x966bba8af30d6L,0x4af137b69c401L,0x361c47e95bf5fL,0x5b113966162a9L,
+ 0x0bd52d288e727L } },
+ /* 72 */
+ { { 0x55c7a9c5fa877L,0x727d3a3d48ab1L,0x3d189d817dad6L,0x77a643f43f9e7L,
+ 0x0a0d0f8e4c8aaL },
+ { 0xeafd8cc94f92dL,0xbe0c4ddb3a0bbL,0x82eba14d818c8L,0x6a0022cc65f8bL,
+ 0x0a56c78c7946dL } },
+ /* 73 */
+ { { 0x2391b0dd09529L,0xa63daddfcf296L,0xb5bf481803e0eL,0x367a2c77351f5L,
+ 0x0d8befdf8731aL },
+ { 0x19d42fc0157f4L,0xd7fec8e650ab9L,0x2d48b0af51caeL,0x6478cdf9cb400L,
+ 0x0854a68a5ce9fL } },
+ /* 74 */
+ { { 0x5f67b63506ea5L,0x89a4fe0d66dc3L,0xe95cd4d9286c4L,0x6a953f101d3bfL,
+ 0x05cacea0b9884L },
+ { 0xdf60c9ceac44dL,0xf4354d1c3aa90L,0xd5dbabe3db29aL,0xefa908dd3de8aL,
+ 0x0e4982d1235e4L } },
+ /* 75 */
+ { { 0x04a22c34cd55eL,0xb32680d132231L,0xfa1d94358695bL,0x0499fb345afa1L,
+ 0x08046b7f616b2L },
+ { 0x3581e38e7d098L,0x8df46f0b70b53L,0x4cb78c4d7f61eL,0xaf5530dea9ea4L,
+ 0x0eb17ca7b9082L } },
+ /* 76 */
+ { { 0x1b59876a145b9L,0x0fc1bc71ec175L,0x92715bba5cf6bL,0xe131d3e035653L,
+ 0x0097b00bafab5L },
+ { 0x6c8e9565f69e1L,0x5ab5be5199aa6L,0xa4fd98477e8f7L,0xcc9e6033ba11dL,
+ 0x0f95c747bafdbL } },
+ /* 77 */
+ { { 0xf01d3bebae45eL,0xf0c4bc6955558L,0xbc64fc6a8ebe9L,0xd837aeb705b1dL,
+ 0x03512601e566eL },
+ { 0x6f1e1fa1161cdL,0xd54c65ef87933L,0x24f21e5328ab8L,0xab6b4757eee27L,
+ 0x00ef971236068L } },
+ /* 78 */
+ { { 0x98cf754ca4226L,0x38f8642c8e025L,0x68e17905eede1L,0xbc9548963f744L,
+ 0x0fc16d9333b4fL },
+ { 0x6fb31e7c800caL,0x312678adaabe9L,0xff3e8b5138063L,0x7a173d6244976L,
+ 0x014ca4af1b95dL } },
+ /* 79 */
+ { { 0x771babd2f81d5L,0x6901f7d1967a4L,0xad9c9071a5f9dL,0x231dd898bef7cL,
+ 0x04057b063f59cL },
+ { 0xd82fe89c05c0aL,0x6f1dc0df85bffL,0x35a16dbe4911cL,0x0b133befccaeaL,
+ 0x01c3b5d64f133L } },
+ /* 80 */
+ { { 0x14bfe80ec21feL,0x6ac255be825feL,0xf4a5d67f6ce11L,0x63af98bc5a072L,
+ 0x0fad27148db7eL },
+ { 0x0b6ac29ab05b3L,0x3c4e251ae690cL,0x2aade7d37a9a8L,0x1a840a7dc875cL,
+ 0x077387de39f0eL } },
+ /* 81 */
+ { { 0xecc49a56c0dd7L,0xd846086c741e9L,0x505aecea5cffcL,0xc47e8f7a1408fL,
+ 0x0b37b85c0bef0L },
+ { 0x6b6e4cc0e6a8fL,0xbf6b388f23359L,0x39cef4efd6d4bL,0x28d5aba453facL,
+ 0x09c135ac8f9f6L } },
+ /* 82 */
+ { { 0xa320284e35743L,0xb185a3cdef32aL,0xdf19819320d6aL,0x851fb821b1761L,
+ 0x05721361fc433L },
+ { 0xdb36a71fc9168L,0x735e5c403c1f0L,0x7bcd8f55f98baL,0x11bdf64ca87e3L,
+ 0x0dcbac3c9e6bbL } },
+ /* 83 */
+ { { 0xd99684518cbe2L,0x189c9eb04ef01L,0x47feebfd242fcL,0x6862727663c7eL,
+ 0x0b8c1c89e2d62L },
+ { 0x58bddc8e1d569L,0xc8b7d88cd051aL,0x11f31eb563809L,0x22d426c27fd9fL,
+ 0x05d23bbda2f94L } },
+ /* 84 */
+ { { 0xc729495c8f8beL,0x803bf362bf0a1L,0xf63d4ac2961c4L,0xe9009e418403dL,
+ 0x0c109f9cb91ecL },
+ { 0x095d058945705L,0x96ddeb85c0c2dL,0xa40449bb9083dL,0x1ee184692b8d7L,
+ 0x09bc3344f2eeeL } },
+ /* 85 */
+ { { 0xae35642913074L,0x2748a542b10d5L,0x310732a55491bL,0x4cc1469ca665bL,
+ 0x029591d525f1aL },
+ { 0xf5b6bb84f983fL,0x419f5f84e1e76L,0x0baa189be7eefL,0x332c1200d4968L,
+ 0x06376551f18efL } },
+ /* 86 */
+ { { 0x5f14e562976ccL,0xe60ef12c38bdaL,0xcca985222bca3L,0x987abbfa30646L,
+ 0x0bdb79dc808e2L },
+ { 0xcb5c9cb06a772L,0xaafe536dcefd2L,0xc2b5db838f475L,0xc14ac2a3e0227L,
+ 0x08ee86001add3L } },
+ /* 87 */
+ { { 0x96981a4ade873L,0x4dc4fba48ccbeL,0xa054ba57ee9aaL,0xaa4b2cee28995L,
+ 0x092e51d7a6f77L },
+ { 0xbafa87190a34dL,0x5bf6bd1ed1948L,0xcaf1144d698f7L,0xaaaad00ee6e30L,
+ 0x05182f86f0a56L } },
+ /* 88 */
+ { { 0x6212c7a4cc99cL,0x683e6d9ca1fbaL,0xac98c5aff609bL,0xa6f25dbb27cb5L,
+ 0x091dcab5d4073L },
+ { 0x6cc3d5f575a70L,0x396f8d87fa01bL,0x99817360cb361L,0x4f2b165d4e8c8L,
+ 0x017a0cedb9797L } },
+ /* 89 */
+ { { 0x61e2a076c8d3aL,0x39210f924b388L,0x3a835d9701aadL,0xdf4194d0eae41L,
+ 0x02e8ce36c7f4cL },
+ { 0x73dab037a862bL,0xb760e4c8fa912L,0x3baf2dd01ba9bL,0x68f3f96453883L,
+ 0x0f4ccc6cb34f6L } },
+ /* 90 */
+ { { 0xf525cf1f79687L,0x9592efa81544eL,0x5c78d297c5954L,0xf3c9e1231741aL,
+ 0x0ac0db4889a0dL },
+ { 0xfc711df01747fL,0x58ef17df1386bL,0xccb6bb5592b93L,0x74a2e5880e4f5L,
+ 0x095a64a6194c9L } },
+ /* 91 */
+ { { 0x1efdac15a4c93L,0x738258514172cL,0x6cb0bad40269bL,0x06776a8dfb1c1L,
+ 0x0231e54ba2921L },
+ { 0xdf9178ae6d2dcL,0x3f39112918a70L,0xe5b72234d6aa6L,0x31e1f627726b5L,
+ 0x0ab0be032d8a7L } },
+ /* 92 */
+ { { 0xad0e98d131f2dL,0xe33b04f101097L,0x5e9a748637f09L,0xa6791ac86196dL,
+ 0x0f1bcc8802cf6L },
+ { 0x69140e8daacb4L,0x5560f6500925cL,0x77937a63c4e40L,0xb271591cc8fc4L,
+ 0x0851694695aebL } },
+ /* 93 */
+ { { 0x5c143f1dcf593L,0x29b018be3bde3L,0xbdd9d3d78202bL,0x55d8e9cdadc29L,
+ 0x08f67d9d2daadL },
+ { 0x116567481ea5fL,0xe9e34c590c841L,0x5053fa8e7d2ddL,0x8b5dffdd43f40L,
+ 0x0f84572b9c072L } },
+ /* 94 */
+ { { 0xa7a7197af71c9L,0x447a7365655e1L,0xe1d5063a14494L,0x2c19a1b4ae070L,
+ 0x0edee2710616bL },
+ { 0x034f511734121L,0x554a25e9f0b2fL,0x40c2ecf1cac6eL,0xd7f48dc148f3aL,
+ 0x09fd27e9b44ebL } },
+ /* 95 */
+ { { 0x7658af6e2cb16L,0x2cfe5919b63ccL,0x68d5583e3eb7dL,0xf3875a8c58161L,
+ 0x0a40c2fb6958fL },
+ { 0xec560fedcc158L,0xc655f230568c9L,0xa307e127ad804L,0xdecfd93967049L,
+ 0x099bc9bb87dc6L } },
+ /* 96 */
+ { { 0x9521d927dafc6L,0x695c09cd1984aL,0x9366dde52c1fbL,0x7e649d9581a0fL,
+ 0x09abe210ba16dL },
+ { 0xaf84a48915220L,0x6a4dd816c6480L,0x681ca5afa7317L,0x44b0c7d539871L,
+ 0x07881c25787f3L } },
+ /* 97 */
+ { { 0x99b51e0bcf3ffL,0xc5127f74f6933L,0xd01d9680d02cbL,0x89408fb465a2dL,
+ 0x015e6e319a30eL },
+ { 0xd6e0d3e0e05f4L,0xdc43588404646L,0x4f850d3fad7bdL,0x72cebe61c7d1cL,
+ 0x00e55facf1911L } },
+ /* 98 */
+ { { 0xd9806f8787564L,0x2131e85ce67e9L,0x819e8d61a3317L,0x65776b0158cabL,
+ 0x0d73d09766fe9L },
+ { 0x834251eb7206eL,0x0fc618bb42424L,0xe30a520a51929L,0xa50b5dcbb8595L,
+ 0x09250a3748f15L } },
+ /* 99 */
+ { { 0xf08f8be577410L,0x035077a8c6cafL,0xc0a63a4fd408aL,0x8c0bf1f63289eL,
+ 0x077414082c1ccL },
+ { 0x40fa6eb0991cdL,0x6649fdc29605aL,0x324fd40c1ca08L,0x20b93a68a3c7bL,
+ 0x08cb04f4d12ebL } },
+ /* 100 */
+ { { 0x2d0556906171cL,0xcdb0240c3fb1cL,0x89068419073e9L,0x3b51db8e6b4fdL,
+ 0x0e4e429ef4712L },
+ { 0xdd53c38ec36f4L,0x01ff4b6a270b8L,0x79a9a48f9d2dcL,0x65525d066e078L,
+ 0x037bca2ff3c6eL } },
+ /* 101 */
+ { { 0x2e3c7df562470L,0xa2c0964ac94cdL,0x0c793be44f272L,0xb22a7c6d5df98L,
+ 0x059913edc3002L },
+ { 0x39a835750592aL,0x80e783de027a1L,0xa05d64f99e01dL,0xe226cf8c0375eL,
+ 0x043786e4ab013L } },
+ /* 102 */
+ { { 0x2b0ed9e56b5a6L,0xa6d9fc68f9ff3L,0x97846a70750d9L,0x9e7aec15e8455L,
+ 0x08638ca98b7e7L },
+ { 0xae0960afc24b2L,0xaf4dace8f22f5L,0xecba78f05398eL,0xa6f03b765dd0aL,
+ 0x01ecdd36a7b3aL } },
+ /* 103 */
+ { { 0xacd626c5ff2f3L,0xc02873a9785d3L,0x2110d54a2d516L,0xf32dad94c9fadL,
+ 0x0d85d0f85d459L },
+ { 0x00b8d10b11da3L,0x30a78318c49f7L,0x208decdd2c22cL,0x3c62556988f49L,
+ 0x0a04f19c3b4edL } },
+ /* 104 */
+ { { 0x924c8ed7f93bdL,0x5d392f51f6087L,0x21b71afcb64acL,0x50b07cae330a8L,
+ 0x092b2eeea5c09L },
+ { 0xc4c9485b6e235L,0xa92936c0f085aL,0x0508891ab2ca4L,0x276c80faa6b3eL,
+ 0x01ee782215834L } },
+ /* 105 */
+ { { 0xa2e00e63e79f7L,0xb2f399d906a60L,0x607c09df590e7L,0xe1509021054a6L,
+ 0x0f3f2ced857a6L },
+ { 0x510f3f10d9b55L,0xacd8642648200L,0x8bd0e7c9d2fcfL,0xe210e5631aa7eL,
+ 0x00f56a4543da3L } },
+ /* 106 */
+ { { 0x1bffa1043e0dfL,0xcc9c007e6d5b2L,0x4a8517a6c74b6L,0xe2631a656ec0dL,
+ 0x0bd8f17411969L },
+ { 0xbbb86beb7494aL,0x6f45f3b8388a9L,0x4e5a79a1567d4L,0xfa09df7a12a7aL,
+ 0x02d1a1c3530ccL } },
+ /* 107 */
+ { { 0xe3813506508daL,0xc4a1d795a7192L,0xa9944b3336180L,0xba46cddb59497L,
+ 0x0a107a65eb91fL },
+ { 0x1d1c50f94d639L,0x758a58b7d7e6dL,0xd37ca1c8b4af3L,0x9af21a7c5584bL,
+ 0x0183d760af87aL } },
+ /* 108 */
+ { { 0x697110dde59a4L,0x070e8bef8729dL,0xf2ebe78f1ad8dL,0xd754229b49634L,
+ 0x01d44179dc269L },
+ { 0xdc0cf8390d30eL,0x530de8110cb32L,0xbc0339a0a3b27L,0xd26231af1dc52L,
+ 0x0771f9cc29606L } },
+ /* 109 */
+ { { 0x93e7785040739L,0xb98026a939999L,0x5f8fc2644539dL,0x718ecf40f6f2fL,
+ 0x064427a310362L },
+ { 0xf2d8785428aa8L,0x3febfb49a84f4L,0x23d01ac7b7adcL,0x0d6d201b2c6dfL,
+ 0x049d9b7496ae9L } },
+ /* 110 */
+ { { 0x8d8bc435d1099L,0x4e8e8d1a08cc7L,0xcb68a412adbcdL,0x544502c2e2a02L,
+ 0x09037d81b3f60L },
+ { 0xbac27074c7b61L,0xab57bfd72e7cdL,0x96d5352fe2031L,0x639c61ccec965L,
+ 0x008c3de6a7cc0L } },
+ /* 111 */
+ { { 0xdd020f6d552abL,0x9805cd81f120fL,0x135129156baffL,0x6b2f06fb7c3e9L,
+ 0x0c69094424579L },
+ { 0x3ae9c41231bd1L,0x875cc5820517bL,0x9d6a1221eac6eL,0x3ac0208837abfL,
+ 0x03fa3db02cafeL } },
+ /* 112 */
+ { { 0xa3e6505058880L,0xef643943f2d75L,0xab249257da365L,0x08ff4147861cfL,
+ 0x0c5c4bdb0fdb8L },
+ { 0x13e34b272b56bL,0x9511b9043a735L,0x8844969c8327eL,0xb6b5fd8ce37dfL,
+ 0x02d56db9446c2L } },
+ /* 113 */
+ { { 0x1782fff46ac6bL,0x2607a2e425246L,0x9a48de1d19f79L,0xba42fafea3c40L,
+ 0x00f56bd9de503L },
+ { 0xd4ed1345cda49L,0xfc816f299d137L,0xeb43402821158L,0xb5f1e7c6a54aaL,
+ 0x04003bb9d1173L } },
+ /* 114 */
+ { { 0xe8189a0803387L,0xf539cbd4043b8L,0x2877f21ece115L,0x2f9e4297208ddL,
+ 0x053765522a07fL },
+ { 0x80a21a8a4182dL,0x7a3219df79a49L,0xa19a2d4a2bbd0L,0x4549674d0a2e1L,
+ 0x07a056f586c5dL } },
+ /* 115 */
+ { { 0xb25589d8a2a47L,0x48c3df2773646L,0xbf0d5395b5829L,0x267551ec000eaL,
+ 0x077d482f17a1aL },
+ { 0x1bd9587853948L,0xbd6cfbffeeb8aL,0x0681e47a6f817L,0xb0e4ab6ec0578L,
+ 0x04115012b2b38L } },
+ /* 116 */
+ { { 0x3f0f46de28cedL,0x609b13ec473c7L,0xe5c63921d5da7L,0x094661b8ce9e6L,
+ 0x0cdf04572fbeaL },
+ { 0x3c58b6c53c3b0L,0x10447b843c1cbL,0xcb9780e97fe3cL,0x3109fb2b8ae12L,
+ 0x0ee703dda9738L } },
+ /* 117 */
+ { { 0x15140ff57e43aL,0xd3b1b811b8345L,0xf42b986d44660L,0xce212b3b5dff8L,
+ 0x02a0ad89da162L },
+ { 0x4a6946bc277baL,0x54c141c27664eL,0xabf6274c788c9L,0x4659141aa64ccL,
+ 0x0d62d0b67ac2bL } },
+ /* 118 */
+ { { 0x5d87b2c054ac4L,0x59f27df78839cL,0x18128d6570058L,0x2426edf7cbf3bL,
+ 0x0b39a23f2991cL },
+ { 0x84a15f0b16ae5L,0xb1a136f51b952L,0x27007830c6a05L,0x4cc51d63c137fL,
+ 0x004ed0092c067L } },
+ /* 119 */
+ { { 0x185d19ae90393L,0x294a3d64e61f4L,0x854fc143047b4L,0xc387ae0001a69L,
+ 0x0a0a91fc10177L },
+ { 0xa3f01ae2c831eL,0x822b727e16ff0L,0xa3075b4bb76aeL,0x0c418f12c8a15L,
+ 0x0084cf9889ed2L } },
+ /* 120 */
+ { { 0x509defca6becfL,0x807dffb328d98L,0x778e8b92fceaeL,0xf77e5d8a15c44L,
+ 0x0d57955b273abL },
+ { 0xda79e31b5d4f1L,0x4b3cfa7a1c210L,0xc27c20baa52f0L,0x41f1d4d12089dL,
+ 0x08e14ea4202d1L } },
+ /* 121 */
+ { { 0x50345f2897042L,0x1f43402c4aeedL,0x8bdfb218d0533L,0xd158c8d9c194cL,
+ 0x0597e1a372aa4L },
+ { 0x7ec1acf0bd68cL,0xdcab024945032L,0x9fe3e846d4be0L,0x4dea5b9c8d7acL,
+ 0x0ca3f0236199bL } },
+ /* 122 */
+ { { 0xa10b56170bd20L,0xf16d3f5de7592L,0x4b2ade20ea897L,0x07e4a3363ff14L,
+ 0x0bde7fd7e309cL },
+ { 0xbb6d2b8f5432cL,0xcbe043444b516L,0x8f95b5a210dc1L,0xd1983db01e6ffL,
+ 0x0b623ad0e0a7dL } },
+ /* 123 */
+ { { 0xbd67560c7b65bL,0x9023a4a289a75L,0x7b26795ab8c55L,0x137bf8220fd0dL,
+ 0x0d6aa2e4658ecL },
+ { 0xbc00b5138bb85L,0x21d833a95c10aL,0x702a32e8c31d1L,0x513ab24ff00b1L,
+ 0x0111662e02dccL } },
+ /* 124 */
+ { { 0x14015efb42b87L,0x701b6c4dff781L,0x7d7c129bd9f5dL,0x50f866ecccd7aL,
+ 0x0db3ee1cb94b7L },
+ { 0xf3db0f34837cfL,0x8bb9578d4fb26L,0xc56657de7eed1L,0x6a595d2cdf937L,
+ 0x0886a64425220L } },
+ /* 125 */
+ { { 0x34cfb65b569eaL,0x41f72119c13c2L,0x15a619e200111L,0x17bc8badc85daL,
+ 0x0a70cf4eb018aL },
+ { 0xf97ae8c4a6a65L,0x270134378f224L,0xf7e096036e5cfL,0x7b77be3a609e4L,
+ 0x0aa4772abd174L } },
+ /* 126 */
+ { { 0x761317aa60cc0L,0x610368115f676L,0xbc1bb5ac79163L,0xf974ded98bb4bL,
+ 0x0611a6ddc30faL },
+ { 0x78cbcc15ee47aL,0x824e0d96a530eL,0xdd9ed882e8962L,0x9c8836f35adf3L,
+ 0x05cfffaf81642L } },
+ /* 127 */
+ { { 0x54cff9b7a99cdL,0x9d843c45a1c0dL,0x2c739e17bf3b9L,0x994c038a908f6L,
+ 0x06e5a6b237dc1L },
+ { 0xb454e0ba5db77L,0x7facf60d63ef8L,0x6608378b7b880L,0xabcce591c0c67L,
+ 0x0481a238d242dL } },
+ /* 128 */
+ { { 0x17bc035d0b34aL,0x6b8327c0a7e34L,0xc0362d1440b38L,0xf9438fb7262daL,
+ 0x02c41114ce0cdL },
+ { 0x5cef1ad95a0b1L,0xa867d543622baL,0x1e486c9c09b37L,0x929726d6cdd20L,
+ 0x020477abf42ffL } },
+ /* 129 */
+ { { 0x5173c18d65dbfL,0x0e339edad82f7L,0xcf1001c77bf94L,0x96b67022d26bdL,
+ 0x0ac66409ac773L },
+ { 0xbb36fc6261cc3L,0xc9190e7e908b0L,0x45e6c10213f7bL,0x2f856541cebaaL,
+ 0x0ce8e6975cc12L } },
+ /* 130 */
+ { { 0x21b41bc0a67d2L,0x0a444d248a0f1L,0x59b473762d476L,0xb4a80e044f1d6L,
+ 0x008fde365250bL },
+ { 0xec3da848bf287L,0x82d3369d6eaceL,0x2449482c2a621L,0x6cd73582dfdc9L,
+ 0x02f7e2fd2565dL } },
+ /* 131 */
+ { { 0xb92dbc3770fa7L,0x5c379043f9ae4L,0x7761171095e8dL,0x02ae54f34e9d1L,
+ 0x0c65be92e9077L },
+ { 0x8a303f6fd0a40L,0xe3bcce784b275L,0xf9767bfe7d822L,0x3b3a7ae4f5854L,
+ 0x04bff8e47d119L } },
+ /* 132 */
+ { { 0x1d21f00ff1480L,0x7d0754db16cd4L,0xbe0f3ea2ab8fbL,0x967dac81d2efbL,
+ 0x03e4e4ae65772L },
+ { 0x8f36d3c5303e6L,0x4b922623977e1L,0x324c3c03bd999L,0x60289ed70e261L,
+ 0x05388aefd58ecL } },
+ /* 133 */
+ { { 0x317eb5e5d7713L,0xee75de49daad1L,0x74fb26109b985L,0xbe0e32f5bc4fcL,
+ 0x05cf908d14f75L },
+ { 0x435108e657b12L,0xa5b96ed9e6760L,0x970ccc2bfd421L,0x0ce20e29f51f8L,
+ 0x0a698ba4060f0L } },
+ /* 134 */
+ { { 0xb1686ef748fecL,0xa27e9d2cf973dL,0xe265effe6e755L,0xad8d630b6544cL,
+ 0x0b142ef8a7aebL },
+ { 0x1af9f17d5770aL,0x672cb3412fad3L,0xf3359de66af3bL,0x50756bd60d1bdL,
+ 0x0d1896a965851L } },
+ /* 135 */
+ { { 0x957ab33c41c08L,0xac5468e2e1ec5L,0xc472f6c87de94L,0xda3918816b73aL,
+ 0x0267b0e0b7981L },
+ { 0x54e5d8e62b988L,0x55116d21e76e5L,0xd2a6f99d8ddc7L,0x93934610faf03L,
+ 0x0b54e287aa111L } },
+ /* 136 */
+ { { 0x122b5178a876bL,0xff085104b40a0L,0x4f29f7651ff96L,0xd4e6050b31ab1L,
+ 0x084abb28b5f87L },
+ { 0xd439f8270790aL,0x9d85e3f46bd5eL,0xc1e22122d6cb5L,0x564075f55c1b6L,
+ 0x0e5436f671765L } },
+ /* 137 */
+ { { 0x9025e2286e8d5L,0xb4864453be53fL,0x408e3a0353c95L,0xe99ed832f5bdeL,
+ 0x00404f68b5b9cL },
+ { 0x33bdea781e8e5L,0x18163c2f5bcadL,0x119caa33cdf50L,0xc701575769600L,
+ 0x03a4263df0ac1L } },
+ /* 138 */
+ { { 0x65ecc9aeb596dL,0xe7023c92b4c29L,0xe01396101ea03L,0xa3674704b4b62L,
+ 0x00ca8fd3f905eL },
+ { 0x23a42551b2b61L,0x9c390fcd06925L,0x392a63e1eb7a8L,0x0c33e7f1d2be0L,
+ 0x096dca2644ddbL } },
+ /* 139 */
+ { { 0xbb43a387510afL,0xa8a9a36a01203L,0xf950378846feaL,0x59dcd23a57702L,
+ 0x04363e2123aadL },
+ { 0x3a1c740246a47L,0xd2e55dd24dca4L,0xd8faf96b362b8L,0x98c4f9b086045L,
+ 0x0840e115cd8bbL } },
+ /* 140 */
+ { { 0x205e21023e8a7L,0xcdd8dc7a0bf12L,0x63a5ddfc808a8L,0xd6d4e292a2721L,
+ 0x05e0d6abd30deL },
+ { 0x721c27cfc0f64L,0x1d0e55ed8807aL,0xd1f9db242eec0L,0xa25a26a7bef91L,
+ 0x07dea48f42945L } },
+ /* 141 */
+ { { 0xf6f1ce5060a81L,0x72f8f95615abdL,0x6ac268be79f9cL,0x16d1cfd36c540L,
+ 0x0abc2a2beebfdL },
+ { 0x66f91d3e2eac7L,0x63d2dd04668acL,0x282d31b6f10baL,0xefc16790e3770L,
+ 0x04ea353946c7eL } },
+ /* 142 */
+ { { 0xa2f8d5266309dL,0xc081945a3eed8L,0x78c5dc10a51c6L,0xffc3cecaf45a5L,
+ 0x03a76e6891c94L },
+ { 0xce8a47d7b0d0fL,0x968f584a5f9aaL,0xe697fbe963aceL,0x646451a30c724L,
+ 0x08212a10a465eL } },
+ /* 143 */
+ { { 0xc61c3cfab8caaL,0x840e142390ef7L,0xe9733ca18eb8eL,0xb164cd1dff677L,
+ 0x0aa7cab71599cL },
+ { 0xc9273bc837bd1L,0xd0c36af5d702fL,0x423da49c06407L,0x17c317621292fL,
+ 0x040e38073fe06L } },
+ /* 144 */
+ { { 0x80824a7bf9b7cL,0x203fbe30d0f4fL,0x7cf9ce3365d23L,0x5526bfbe53209L,
+ 0x0e3604700b305L },
+ { 0xb99116cc6c2c7L,0x08ba4cbee64dcL,0x37ad9ec726837L,0xe15fdcded4346L,
+ 0x06542d677a3deL } },
+ /* 145 */
+ { { 0x2b6d07b6c377aL,0x47903448be3f3L,0x0da8af76cb038L,0x6f21d6fdd3a82L,
+ 0x0a6534aee09bbL },
+ { 0x1780d1035facfL,0x339dcb47e630aL,0x447f39335e55aL,0xef226ea50fe1cL,
+ 0x0f3cb672fdc9aL } },
+ /* 146 */
+ { { 0x719fe3b55fd83L,0x6c875ddd10eb3L,0x5cea784e0d7a4L,0x70e733ac9fa90L,
+ 0x07cafaa2eaae8L },
+ { 0x14d041d53b338L,0xa0ef87e6c69b8L,0x1672b0fe0acc0L,0x522efb93d1081L,
+ 0x00aab13c1b9bdL } },
+ /* 147 */
+ { { 0xce278d2681297L,0xb1b509546addcL,0x661aaf2cb350eL,0x12e92dc431737L,
+ 0x04b91a6028470L },
+ { 0xf109572f8ddcfL,0x1e9a911af4dcfL,0x372430e08ebf6L,0x1cab48f4360acL,
+ 0x049534c537232L } },
+ /* 148 */
+ { { 0xf7d71f07b7e9dL,0xa313cd516f83dL,0xc047ee3a478efL,0xc5ee78ef264b6L,
+ 0x0caf46c4fd65aL },
+ { 0xd0c7792aa8266L,0x66913684bba04L,0xe4b16b0edf454L,0x770f56e65168aL,
+ 0x014ce9e5704c6L } },
+ /* 149 */
+ { { 0x45e3e965e8f91L,0xbacb0f2492994L,0x0c8a0a0d3aca1L,0x9a71d31cc70f9L,
+ 0x01bb708a53e4cL },
+ { 0xa9e69558bdd7aL,0x08018a26b1d5cL,0xc9cf1ec734a05L,0x0102b093aa714L,
+ 0x0f9d126f2da30L } },
+ /* 150 */
+ { { 0xbca7aaff9563eL,0xfeb49914a0749L,0xf5f1671dd077aL,0xcc69e27a0311bL,
+ 0x0807afcb9729eL },
+ { 0xa9337c9b08b77L,0x85443c7e387f8L,0x76fd8ba86c3a7L,0xcd8c85fafa594L,
+ 0x0751adcd16568L } },
+ /* 151 */
+ { { 0xa38b410715c0dL,0x718f7697f78aeL,0x3fbf06dd113eaL,0x743f665eab149L,
+ 0x029ec44682537L },
+ { 0x4719cb50bebbcL,0xbfe45054223d9L,0xd2dedb1399ee5L,0x077d90cd5b3a8L,
+ 0x0ff9370e392a4L } },
+ /* 152 */
+ { { 0x2d69bc6b75b65L,0xd5266651c559aL,0xde9d7d24188f8L,0xd01a28a9f33e3L,
+ 0x09776478ba2a9L },
+ { 0x2622d929af2c7L,0x6d4e690923885L,0x89a51e9334f5dL,0x82face6cc7e5aL,
+ 0x074a6313fac2fL } },
+ /* 153 */
+ { { 0x4dfddb75f079cL,0x9518e36fbbb2fL,0x7cd36dd85b07cL,0x863d1b6cfcf0eL,
+ 0x0ab75be150ff4L },
+ { 0x367c0173fc9b7L,0x20d2594fd081bL,0x4091236b90a74L,0x59f615fdbf03cL,
+ 0x04ebeac2e0b44L } },
+ /* 154 */
+ { { 0xc5fe75c9f2c53L,0x118eae9411eb6L,0x95ac5d8d25220L,0xaffcc8887633fL,
+ 0x0df99887b2c1bL },
+ { 0x8eed2850aaecbL,0x1b01d6a272bb7L,0x1cdbcac9d4918L,0x4058978dd511bL,
+ 0x027b040a7779fL } },
+ /* 155 */
+ { { 0x05db7f73b2eb2L,0x088e1b2118904L,0x962327ee0df85L,0xa3f5501b71525L,
+ 0x0b393dd37e4cfL },
+ { 0x30e7b3fd75165L,0xc2bcd33554a12L,0xf7b5022d66344L,0x34196c36f1be0L,
+ 0x009588c12d046L } },
+ /* 156 */
+ { { 0x6093f02601c3bL,0xf8cf5c335fe08L,0x94aff28fb0252L,0x648b955cf2808L,
+ 0x081c879a9db9fL },
+ { 0xe687cc6f56c51L,0x693f17618c040L,0x059353bfed471L,0x1bc444f88a419L,
+ 0x0fa0d48f55fc1L } },
+ /* 157 */
+ { { 0xe1c9de1608e4dL,0x113582822cbc6L,0x57ec2d7010ddaL,0x67d6f6b7ddc11L,
+ 0x08ea0e156b6a3L },
+ { 0x4e02f2383b3b4L,0x943f01f53ca35L,0xde03ca569966bL,0xb5ac4ff6632b2L,
+ 0x03f5ab924fa00L } },
+ /* 158 */
+ { { 0xbb0d959739efbL,0xf4e7ebec0d337L,0x11a67d1c751b0L,0x256e2da52dd64L,
+ 0x08bc768872b74L },
+ { 0xe3b7282d3d253L,0xa1f58d779fa5bL,0x16767bba9f679L,0xf34fa1cac168eL,
+ 0x0b386f19060fcL } },
+ /* 159 */
+ { { 0x3c1352fedcfc2L,0x6262f8af0d31fL,0x57288c25396bfL,0x9c4d9a02b4eaeL,
+ 0x04cb460f71b06L },
+ { 0x7b4d35b8095eaL,0x596fc07603ae6L,0x614a16592bbf8L,0x5223e1475f66bL,
+ 0x052c0d50895efL } },
+ /* 160 */
+ { { 0xc210e15339848L,0xe870778c8d231L,0x956e170e87a28L,0x9c0b9d1de6616L,
+ 0x04ac3c9382bb0L },
+ { 0xe05516998987dL,0xc4ae09f4d619bL,0xa3f933d8b2376L,0x05f41de0b7651L,
+ 0x0380d94c7e397L } },
+ /* 161 */
+ { { 0x355aa81542e75L,0xa1ee01b9b701aL,0x24d708796c724L,0x37af6b3a29776L,
+ 0x02ce3e171de26L },
+ { 0xfeb49f5d5bc1aL,0x7e2777e2b5cfeL,0x513756ca65560L,0x4e4d4feaac2f9L,
+ 0x02e6cd8520b62L } },
+ /* 162 */
+ { { 0x5954b8c31c31dL,0x005bf21a0c368L,0x5c79ec968533dL,0x9d540bd7626e7L,
+ 0x0ca17754742c6L },
+ { 0xedafff6d2dbb2L,0xbd174a9d18cc6L,0xa4578e8fd0d8cL,0x2ce6875e8793aL,
+ 0x0a976a7139cabL } },
+ /* 163 */
+ { { 0x51f1b93fb353dL,0x8b57fcfa720a6L,0x1b15281d75cabL,0x4999aa88cfa73L,
+ 0x08720a7170a1fL },
+ { 0xe8d37693e1b90L,0x0b16f6dfc38c3L,0x52a8742d345dcL,0x893c8ea8d00abL,
+ 0x09719ef29c769L } },
+ /* 164 */
+ { { 0xeed8d58e35909L,0xdc33ddc116820L,0xe2050269366d8L,0x04c1d7f999d06L,
+ 0x0a5072976e157L },
+ { 0xa37eac4e70b2eL,0x576890aa8a002L,0x45b2a5c84dcf6L,0x7725cd71bf186L,
+ 0x099389c9df7b7L } },
+ /* 165 */
+ { { 0xc08f27ada7a4bL,0x03fd389366238L,0x66f512c3abe9dL,0x82e46b672e897L,
+ 0x0a88806aa202cL },
+ { 0x2044ad380184eL,0xc4126a8b85660L,0xd844f17a8cb78L,0xdcfe79d670c0aL,
+ 0x00043bffb4738L } },
+ /* 166 */
+ { { 0x9b5dc36d5192eL,0xd34590b2af8d5L,0x1601781acf885L,0x486683566d0a1L,
+ 0x052f3ef01ba6cL },
+ { 0x6732a0edcb64dL,0x238068379f398L,0x040f3090a482cL,0x7e7516cbe5fa7L,
+ 0x03296bd899ef2L } },
+ /* 167 */
+ { { 0xaba89454d81d7L,0xef51eb9b3c476L,0x1c579869eade7L,0x71e9619a21cd8L,
+ 0x03b90febfaee5L },
+ { 0x3023e5496f7cbL,0xd87fb51bc4939L,0x9beb5ce55be41L,0x0b1803f1dd489L,
+ 0x06e88069d9f81L } },
+ /* 168 */
+ { { 0x7ab11b43ea1dbL,0xa95259d292ce3L,0xf84f1860a7ff1L,0xad13851b02218L,
+ 0x0a7222beadefaL },
+ { 0xc78ec2b0a9144L,0x51f2fa59c5a2aL,0x147ce385a0240L,0xc69091d1eca56L,
+ 0x0be94d523bc2aL } },
+ /* 169 */
+ { { 0x4945e0b226ce7L,0x47967e8b7072fL,0x5a6c63eb8afd7L,0xc766edea46f18L,
+ 0x07782defe9be8L },
+ { 0xd2aa43db38626L,0x8776f67ad1760L,0x4499cdb460ae7L,0x2e4b341b86fc5L,
+ 0x003838567a289L } },
+ /* 170 */
+ { { 0xdaefd79ec1a0fL,0xfdceb39c972d8L,0x8f61a953bbcd6L,0xb420f5575ffc5L,
+ 0x0dbd986c4adf7L },
+ { 0xa881415f39eb7L,0xf5b98d976c81aL,0xf2f717d6ee2fcL,0xbbd05465475dcL,
+ 0x08e24d3c46860L } },
+ /* 171 */
+ { { 0xd8e549a587390L,0x4f0cbec588749L,0x25983c612bb19L,0xafc846e07da4bL,
+ 0x0541a99c4407bL },
+ { 0x41692624c8842L,0x2ad86c05ffdb2L,0xf7fcf626044c1L,0x35d1c59d14b44L,
+ 0x0c0092c49f57dL } },
+ /* 172 */
+ { { 0xc75c3df2e61efL,0xc82e1b35cad3cL,0x09f29f47e8841L,0x944dc62d30d19L,
+ 0x075e406347286L },
+ { 0x41fc5bbc237d0L,0xf0ec4f01c9e7dL,0x82bd534c9537bL,0x858691c51a162L,
+ 0x05b7cb658c784L } },
+ /* 173 */
+ { { 0xa70848a28ead1L,0x08fd3b47f6964L,0x67e5b39802dc5L,0x97a19ae4bfd17L,
+ 0x07ae13eba8df0L },
+ { 0x16ef8eadd384eL,0xd9b6b2ff06fd2L,0xbcdb5f30361a2L,0xe3fd204b98784L,
+ 0x0787d8074e2a8L } },
+ /* 174 */
+ { { 0x25d6b757fbb1cL,0xb2ca201debc5eL,0xd2233ffe47bddL,0x84844a55e9a36L,
+ 0x05c2228199ef2L },
+ { 0xd4a8588315250L,0x2b827097c1773L,0xef5d33f21b21aL,0xf2b0ab7c4ea1dL,
+ 0x0e45d37abbaf0L } },
+ /* 175 */
+ { { 0xf1e3428511c8aL,0xc8bdca6cd3d2dL,0x27c39a7ebb229L,0xb9d3578a71a76L,
+ 0x0ed7bc12284dfL },
+ { 0x2a6df93dea561L,0x8dd48f0ed1cf2L,0xbad23e85443f1L,0x6d27d8b861405L,
+ 0x0aac97cc945caL } },
+ /* 176 */
+ { { 0x4ea74a16bd00aL,0xadf5c0bcc1eb5L,0xf9bfc06d839e9L,0xdc4e092bb7f11L,
+ 0x0318f97b31163L },
+ { 0x0c5bec30d7138L,0x23abc30220eccL,0x022360644e8dfL,0xff4d2bb7972fbL,
+ 0x0fa41faa19a84L } },
+ /* 177 */
+ { { 0x2d974a6642269L,0xce9bb783bd440L,0x941e60bc81814L,0xe9e2398d38e47L,
+ 0x038bb6b2c1d26L },
+ { 0xe4a256a577f87L,0x53dc11fe1cc64L,0x22807288b52d2L,0x01a5ff336abf6L,
+ 0x094dd0905ce76L } },
+ /* 178 */
+ { { 0xcf7dcde93f92aL,0xcb89b5f315156L,0x995e750a01333L,0x2ae902404df9cL,
+ 0x092077867d25cL },
+ { 0x71e010bf39d44L,0x2096bb53d7e24L,0xc9c3d8f5f2c90L,0xeb514c44b7b35L,
+ 0x081e8428bd29bL } },
+ /* 179 */
+ { { 0x9c2bac477199fL,0xee6b5ecdd96ddL,0xe40fd0e8cb8eeL,0xa4b18af7db3feL,
+ 0x01b94ab62dbbfL },
+ { 0x0d8b3ce47f143L,0xfc63f4616344fL,0xc59938351e623L,0x90eef18f270fcL,
+ 0x006a38e280555L } },
+ /* 180 */
+ { { 0xb0139b3355b49L,0x60b4ebf99b2e5L,0x269f3dc20e265L,0xd4f8c08ffa6bdL,
+ 0x0a7b36c2083d9L },
+ { 0x15c3a1b3e8830L,0xe1a89f9c0b64dL,0x2d16930d5fceaL,0x2a20cfeee4a2eL,
+ 0x0be54c6b4a282L } },
+ /* 181 */
+ { { 0xdb3df8d91167cL,0x79e7a6625ed6cL,0x46ac7f4517c3fL,0x22bb7105648f3L,
+ 0x0bf30a5abeae0L },
+ { 0x785be93828a68L,0x327f3ef0368e7L,0x92146b25161c3L,0xd13ae11b5feb5L,
+ 0x0d1c820de2732L } },
+ /* 182 */
+ { { 0xe13479038b363L,0x546b05e519043L,0x026cad158c11fL,0x8da34fe57abe6L,
+ 0x0b7d17bed68a1L },
+ { 0xa5891e29c2559L,0x765bfffd8444cL,0x4e469484f7a03L,0xcc64498de4af7L,
+ 0x03997fd5e6412L } },
+ /* 183 */
+ { { 0x746828bd61507L,0xd534a64d2af20L,0xa8a15e329e132L,0x13e8ffeddfb08L,
+ 0x00eeb89293c6cL },
+ { 0x69a3ea7e259f8L,0xe6d13e7e67e9bL,0xd1fa685ce1db7L,0xb6ef277318f6aL,
+ 0x0228916f8c922L } },
+ /* 184 */
+ { { 0xae25b0a12ab5bL,0x1f957bc136959L,0x16e2b0ccc1117L,0x097e8058429edL,
+ 0x0ec05ad1d6e93L },
+ { 0xba5beac3f3708L,0x3530b59d77157L,0x18234e531baf9L,0x1b3747b552371L,
+ 0x07d3141567ff1L } },
+ /* 185 */
+ { { 0x9c05cf6dfefabL,0x68dcb377077bdL,0xa38bb95be2f22L,0xd7a3e53ead973L,
+ 0x0e9ce66fc9bc1L },
+ { 0xa15766f6a02a1L,0xdf60e600ed75aL,0x8cdc1b938c087L,0x0651f8947f346L,
+ 0x0d9650b017228L } },
+ /* 186 */
+ { { 0xb4c4a5a057e60L,0xbe8def25e4504L,0x7c1ccbdcbccc3L,0xb7a2a63532081L,
+ 0x014d6699a804eL },
+ { 0xa8415db1f411aL,0x0bf80d769c2c8L,0xc2f77ad09fbafL,0x598ab4deef901L,
+ 0x06f4c68410d43L } },
+ /* 187 */
+ { { 0x6df4e96c24a96L,0x85fcbd99a3872L,0xb2ae30a534dbcL,0x9abb3c466ef28L,
+ 0x04c4350fd6118L },
+ { 0x7f716f855b8daL,0x94463c38a1296L,0xae9334341a423L,0x18b5c37e1413eL,
+ 0x0a726d2425a31L } },
+ /* 188 */
+ { { 0x6b3ee948c1086L,0x3dcbd3a2e1daeL,0x3d022f3f1de50L,0xf3923f35ed3f0L,
+ 0x013639e82cc6cL },
+ { 0x938fbcdafaa86L,0xfb2654a2589acL,0x5051329f45bc5L,0x35a31963b26e4L,
+ 0x0ca9365e1c1a3L } },
+ /* 189 */
+ { { 0x5ac754c3b2d20L,0x17904e241b361L,0xc9d071d742a54L,0x72a5b08521c4cL,
+ 0x09ce29c34970bL },
+ { 0x81f736d3e0ad6L,0x9ef2f8434c8ccL,0xce862d98060daL,0xaf9835ed1d1a6L,
+ 0x048c4abd7ab42L } },
+ /* 190 */
+ { { 0x1b0cc40c7485aL,0xbbe5274dbfd22L,0x263d2e8ead455L,0x33cb493c76989L,
+ 0x078017c32f67bL },
+ { 0x35769930cb5eeL,0x940c408ed2b9dL,0x72f1a4dc0d14eL,0x1c04f8b7bf552L,
+ 0x053cd0454de5cL } },
+ /* 191 */
+ { { 0x585fa5d28ccacL,0x56005b746ebcdL,0xd0123aa5f823eL,0xfa8f7c79f0a1cL,
+ 0x0eea465c1d3d7L },
+ { 0x0659f0551803bL,0x9f7ce6af70781L,0x9288e706c0b59L,0x91934195a7702L,
+ 0x01b6e42a47ae6L } },
+ /* 192 */
+ { { 0x0937cf67d04c3L,0xe289eeb8112e8L,0x2594d601e312bL,0xbd3d56b5d8879L,
+ 0x00224da14187fL },
+ { 0xbb8630c5fe36fL,0x604ef51f5f87aL,0x3b429ec580f3cL,0xff33964fb1bfbL,
+ 0x060838ef042bfL } },
+ /* 193 */
+ { { 0xcb2f27e0bbe99L,0xf304aa39ee432L,0xfa939037bda44L,0x16435f497c7a9L,
+ 0x0636eb2022d33L },
+ { 0xd0e6193ae00aaL,0xfe31ae6d2ffcfL,0xf93901c875a00L,0x8bacf43658a29L,
+ 0x08844eeb63921L } },
+ /* 194 */
+ { { 0x171d26b3bae58L,0x7117e39f3e114L,0x1a8eada7db3dfL,0x789ecd37bc7f8L,
+ 0x027ba83dc51fbL },
+ { 0xf439ffbf54de5L,0x0bb5fe1a71a7dL,0xb297a48727703L,0xa4ab42ee8e35dL,
+ 0x0adb62d3487f3L } },
+ /* 195 */
+ { { 0x168a2a175df2aL,0x4f618c32e99b1L,0x46b0916082aa0L,0xc8b2c9e4f2e71L,
+ 0x0b990fd7675e7L },
+ { 0x9d96b4df37313L,0x79d0b40789082L,0x80877111c2055L,0xd18d66c9ae4a7L,
+ 0x081707ef94d10L } },
+ /* 196 */
+ { { 0x7cab203d6ff96L,0xfc0d84336097dL,0x042db4b5b851bL,0xaa5c268823c4dL,
+ 0x03792daead5a8L },
+ { 0x18865941afa0bL,0x4142d83671528L,0xbe4e0a7f3e9e7L,0x01ba17c825275L,
+ 0x05abd635e94b0L } },
+ /* 197 */
+ { { 0xfa84e0ac4927cL,0x35a7c8cf23727L,0xadca0dfe38860L,0xb610a4bcd5ea4L,
+ 0x05995bf21846aL },
+ { 0xf860b829dfa33L,0xae958fc18be90L,0x8630366caafe2L,0x411e9b3baf447L,
+ 0x044c32ca2d483L } },
+ /* 198 */
+ { { 0xa97f1e40ed80cL,0xb131d2ca82a74L,0xc2d6ad95f938cL,0xa54c53f2124b7L,
+ 0x01f2162fb8082L },
+ { 0x67cc5720b173eL,0x66085f12f97e4L,0xc9d65dc40e8a6L,0x07c98cebc20e4L,
+ 0x08f1d402bc3e9L } },
+ /* 199 */
+ { { 0x92f9cfbc4058aL,0xb6292f56704f5L,0xc1d8c57b15e14L,0xdbf9c55cfe37bL,
+ 0x0b1980f43926eL },
+ { 0x33e0932c76b09L,0x9d33b07f7898cL,0x63bb4611df527L,0x8e456f08ead48L,
+ 0x02828ad9b3744L } },
+ /* 200 */
+ { { 0x722c4c4cf4ac5L,0x3fdde64afb696L,0x0890832f5ac1aL,0xb3900551baa2eL,
+ 0x04973f1275a14L },
+ { 0xd8335322eac5dL,0xf50bd9b568e59L,0x25883935e07eeL,0x8ac7ab36720faL,
+ 0x06dac8ed0db16L } },
+ /* 201 */
+ { { 0x545aeeda835efL,0xd21d10ed51f7bL,0x3741b094aa113L,0xde4c035a65e01L,
+ 0x04b23ef5920b9L },
+ { 0xbb6803c4c7341L,0x6d3f58bc37e82L,0x51e3ee8d45770L,0x9a4e73527863aL,
+ 0x04dd71534ddf4L } },
+ /* 202 */
+ { { 0x4467295476cd9L,0x2fe31a725bbf9L,0xc4b67e0648d07L,0x4dbb1441c8b8fL,
+ 0x0fd3170002f4aL },
+ { 0x43ff48995d0e1L,0xd10ef729aa1cbL,0x179898276e695L,0xf365e0d5f9764L,
+ 0x014fac58c9569L } },
+ /* 203 */
+ { { 0xa0065f312ae18L,0xc0fcc93fc9ad9L,0xa7d284651958dL,0xda50d9a142408L,
+ 0x0ed7c765136abL },
+ { 0x70f1a25d4abbcL,0xf3f1a113ea462L,0xb51952f9b5dd8L,0x9f53c609b0755L,
+ 0x0fefcb7f74d2eL } },
+ /* 204 */
+ { { 0x9497aba119185L,0x30aac45ba4bd0L,0xa521179d54e8cL,0xd80b492479deaL,
+ 0x01801a57e87e0L },
+ { 0xd3f8dfcafffb0L,0x0bae255240073L,0xb5fdfbc6cf33cL,0x1064781d763b5L,
+ 0x09f8fc11e1eadL } },
+ /* 205 */
+ { { 0x3a1715e69544cL,0x67f04b7813158L,0x78a4c320eaf85L,0x69a91e22a8fd2L,
+ 0x0a9d3809d3d3aL },
+ { 0xc2c2c59a2da3bL,0xf61895c847936L,0x3d5086938ccbcL,0x8ef75e65244e6L,
+ 0x03006b9aee117L } },
+ /* 206 */
+ { { 0x1f2b0c9eead28L,0x5d89f4dfbc0bbL,0x2ce89397eef63L,0xf761074757fdbL,
+ 0x00ab85fd745f8L },
+ { 0xa7c933e5b4549L,0x5c97922f21ecdL,0x43b80404be2bbL,0x42c2261a1274bL,
+ 0x0b122d67511e9L } },
+ /* 207 */
+ { { 0x607be66a5ae7aL,0xfa76adcbe33beL,0xeb6e5c501e703L,0xbaecaf9043014L,
+ 0x09f599dc1097dL },
+ { 0x5b7180ff250edL,0x74349a20dc6d7L,0x0b227a38eb915L,0x4b78425605a41L,
+ 0x07d5528e08a29L } },
+ /* 208 */
+ { { 0x58f6620c26defL,0xea582b2d1ef0fL,0x1ce3881025585L,0x1730fbe7d79b0L,
+ 0x028ccea01303fL },
+ { 0xabcd179644ba5L,0xe806fff0b8d1dL,0x6b3e17b1fc643L,0x13bfa60a76fc6L,
+ 0x0c18baf48a1d0L } },
+ /* 209 */
+ { { 0x638c85dc4216dL,0x67206142ac34eL,0x5f5064a00c010L,0x596bd453a1719L,
+ 0x09def809db7a9L },
+ { 0x8642e67ab8d2cL,0x336237a2b641eL,0x4c4218bb42404L,0x8ce57d506a6d6L,
+ 0x00357f8b06880L } },
+ /* 210 */
+ { { 0xdbe644cd2cc88L,0x8df0b8f39d8e9L,0xd30a0c8cc61c2L,0x98874a309874cL,
+ 0x0e4a01add1b48L },
+ { 0x1eeacf57cd8f9L,0x3ebd594c482edL,0xbd2f7871b767dL,0xcc30a7295c717L,
+ 0x0466d7d79ce10L } },
+ /* 211 */
+ { { 0x318929dada2c7L,0xc38f9aa27d47dL,0x20a59e14fa0a6L,0xad1a90e4fd288L,
+ 0x0c672a522451eL },
+ { 0x07cc85d86b655L,0x3bf9ad4af1306L,0x71172a6f0235dL,0x751399a086805L,
+ 0x05e3d64faf2a6L } },
+ /* 212 */
+ { { 0x410c79b3b4416L,0x85eab26d99aa6L,0xb656a74cd8fcfL,0x42fc5ebff74adL,
+ 0x06c8a7a95eb8eL },
+ { 0x60ba7b02a63bdL,0x038b8f004710cL,0x12d90b06b2f23L,0xca918c6c37383L,
+ 0x0348ae422ad82L } },
+ /* 213 */
+ { { 0x746635ccda2fbL,0xa18e0726d27f4L,0x92b1f2022accaL,0x2d2e85adf7824L,
+ 0x0c1074de0d9efL },
+ { 0x3ce44ae9a65b3L,0xac05d7151bfcfL,0xe6a9788fd71e4L,0x4ffcd4711f50cL,
+ 0x0fbadfbdbc9e5L } },
+ /* 214 */
+ { { 0x3f1cd20a99363L,0x8f6cf22775171L,0x4d359b2b91565L,0x6fcd968175cd2L,
+ 0x0b7f976b48371L },
+ { 0x8e24d5d6dbf74L,0xfd71c3af36575L,0x243dfe38d23baL,0xc80548f477600L,
+ 0x0f4d41b2ecafcL } },
+ /* 215 */
+ { { 0x1cf28fdabd48dL,0x3632c078a451fL,0x17146e9ce81beL,0x0f106ace29741L,
+ 0x0180824eae016L },
+ { 0x7698b66e58358L,0x52ce6ca358038L,0xe41e6c5635687L,0x6d2582380e345L,
+ 0x067e5f63983cfL } },
+ /* 216 */
+ { { 0xccb8dcf4899efL,0xf09ebb44c0f89L,0x2598ec9949015L,0x1fc6546f9276bL,
+ 0x09fef789a04c1L },
+ { 0x67ecf53d2a071L,0x7fa4519b096d3L,0x11e2eefb10e1aL,0x4e20ca6b3fb06L,
+ 0x0bc80c181a99cL } },
+ /* 217 */
+ { { 0x536f8e5eb82e6L,0xc7f56cb920972L,0x0b5da5e1a484fL,0xdf10c78e21715L,
+ 0x049270e629f8cL },
+ { 0x9b7bbea6b50adL,0xc1a2388ffc1a3L,0x107197b9a0284L,0x2f7f5403eb178L,
+ 0x0d2ee52f96137L } },
+ /* 218 */
+ { { 0xcd28588e0362aL,0xa78fa5d94dd37L,0x434a526442fa8L,0xb733aff836e5aL,
+ 0x0dfb478bee5abL },
+ { 0xf1ce7673eede6L,0xd42b5b2f04a91L,0x530da2fa5390aL,0x473a5e66f7bf5L,
+ 0x0d9a140b408dfL } },
+ /* 219 */
+ { { 0x221b56e8ea498L,0x293563ee090e0L,0x35d2ade623478L,0x4b1ae06b83913L,
+ 0x0760c058d623fL },
+ { 0x9b58cc198aa79L,0xd2f07aba7f0b8L,0xde2556af74890L,0x04094e204110fL,
+ 0x07141982d8f19L } },
+ /* 220 */
+ { { 0xa0e334d4b0f45L,0x38392a94e16f0L,0x3c61d5ed9280bL,0x4e473af324c6bL,
+ 0x03af9d1ce89d5L },
+ { 0xf798120930371L,0x4c21c17097fd8L,0xc42309beda266L,0x7dd60e9545dcdL,
+ 0x0b1f815c37395L } },
+ /* 221 */
+ { { 0xaa78e89fec44aL,0x473caa4caf84fL,0x1b6a624c8c2aeL,0xf052691c807dcL,
+ 0x0a41aed141543L },
+ { 0x353997d5ffe04L,0xdf625b6e20424L,0x78177758bacb2L,0x60ef85d660be8L,
+ 0x0d6e9c1dd86fbL } },
+ /* 222 */
+ { { 0x2e97ec6853264L,0xb7e2304a0b3aaL,0x8eae9be771533L,0xf8c21b912bb7bL,
+ 0x09c9c6e10ae9bL },
+ { 0x09a59e030b74cL,0x4d6a631e90a23L,0x49b79f24ed749L,0x61b689f44b23aL,
+ 0x0566bd59640faL } },
+ /* 223 */
+ { { 0xc0118c18061f3L,0xd37c83fc70066L,0x7273245190b25L,0x345ef05fc8e02L,
+ 0x0cf2c7390f525L },
+ { 0xbceb410eb30cfL,0xba0d77703aa09L,0x50ff255cfd2ebL,0x0979e842c43a1L,
+ 0x002f517558aa2L } },
+ /* 224 */
+ { { 0xef794addb7d07L,0x4224455500396L,0x78aa3ce0b4fc7L,0xd97dfaff8eaccL,
+ 0x014e9ada5e8d4L },
+ { 0x480a12f7079e2L,0xcde4b0800edaaL,0x838157d45baa3L,0x9ae801765e2d7L,
+ 0x0a0ad4fab8e9dL } },
+ /* 225 */
+ { { 0xb76214a653618L,0x3c31eaaa5f0bfL,0x4949d5e187281L,0xed1e1553e7374L,
+ 0x0bcd530b86e56L },
+ { 0xbe85332e9c47bL,0xfeb50059ab169L,0x92bfbb4dc2776L,0x341dcdba97611L,
+ 0x0909283cf6979L } },
+ /* 226 */
+ { { 0x0032476e81a13L,0x996217123967bL,0x32e19d69bee1aL,0x549a08ed361bdL,
+ 0x035eeb7c9ace1L },
+ { 0x0ae5a7e4e5bdcL,0xd3b6ceec6e128L,0xe266bc12dcd2cL,0xe86452e4224c6L,
+ 0x09a8b2cf4448aL } },
+ /* 227 */
+ { { 0x71bf209d03b59L,0xa3b65af2abf64L,0xbd5eec9c90e62L,0x1379ff7ff168eL,
+ 0x06bdb60f4d449L },
+ { 0xafebc8a55bc30L,0x1610097fe0dadL,0xc1e3bddc79eadL,0x08a942e197414L,
+ 0x001ec3cfd94baL } },
+ /* 228 */
+ { { 0x277ebdc9485c2L,0x7922fb10c7ba6L,0x0a28d8a48cc9aL,0x64f64f61d60f7L,
+ 0x0d1acb1c04754L },
+ { 0x902b126f36612L,0x4ee0618d8bd26L,0x08357ee59c3a4L,0x26c24df8a8133L,
+ 0x07dcd079d4056L } },
+ /* 229 */
+ { { 0x7d4d3f05a4b48L,0x52372307725ceL,0x12a915aadcd29L,0x19b8d18f79718L,
+ 0x00bf53589377dL },
+ { 0xcd95a6c68ea73L,0xca823a584d35eL,0x473a723c7f3bbL,0x86fc9fb674c6fL,
+ 0x0d28be4d9e166L } },
+ /* 230 */
+ { { 0xb990638fa8e4bL,0x6e893fd8fc5d2L,0x36fb6fc559f18L,0x88ce3a6de2aa4L,
+ 0x0d76007aa510fL },
+ { 0x0aab6523a4988L,0x4474dd02732d1L,0x3407278b455cfL,0xbb017f467082aL,
+ 0x0f2b52f68b303L } },
+ /* 231 */
+ { { 0x7eafa9835b4caL,0xfcbb669cbc0d5L,0x66431982d2232L,0xed3a8eeeb680cL,
+ 0x0d8dbe98ecc5aL },
+ { 0x9be3fc5a02709L,0xe5f5ba1fa8cbaL,0x10ea85230be68L,0x9705febd43cdfL,
+ 0x0e01593a3ee55L } },
+ /* 232 */
+ { { 0x5af50ea75a0a6L,0xac57858033d3eL,0x0176406512226L,0xef066fe6d50fdL,
+ 0x0afec07b1aeb8L },
+ { 0x9956780bb0a31L,0xcc37309aae7fbL,0x1abf3896f1af3L,0xbfdd9153a15a0L,
+ 0x0a71b93546e2dL } },
+ /* 233 */
+ { { 0xe12e018f593d2L,0x28a078122bbf8L,0xba4f2add1a904L,0x23d9150505db0L,
+ 0x053a2005c6285L },
+ { 0x8b639e7f2b935L,0x5ac182961a07cL,0x518ca2c2bff97L,0x8e3d86bceea77L,
+ 0x0bf47d19b3d58L } },
+ /* 234 */
+ { { 0x967a7dd7665d5L,0x572f2f4de5672L,0x0d4903f4e3030L,0xa1b6144005ae8L,
+ 0x0001c2c7f39c9L },
+ { 0xa801469efc6d6L,0xaa7bc7a724143L,0x78150a4c810bdL,0xb99b5f65670baL,
+ 0x0fdadf8e786ffL } },
+ /* 235 */
+ { { 0x8cb88ffc00785L,0x913b48eb67fd3L,0xf368fbc77fa75L,0x3c940454d055bL,
+ 0x03a838e4d5aa4L },
+ { 0x663293e97bb9aL,0x63441d94d9561L,0xadb2a839eb933L,0x1da3515591a60L,
+ 0x03cdb8257873eL } },
+ /* 236 */
+ { { 0x140a97de77eabL,0x0d41648109137L,0xeb1d0dff7e1c5L,0x7fba762dcad2cL,
+ 0x05a60cc89f1f5L },
+ { 0x3638240d45673L,0x195913c65580bL,0xd64b7411b82beL,0x8fc0057284b8dL,
+ 0x0922ff56fdbfdL } },
+ /* 237 */
+ { { 0x65deec9a129a1L,0x57cc284e041b2L,0xebfbe3ca5b1ceL,0xcd6204380c46cL,
+ 0x072919a7df6c5L },
+ { 0xf453a8fb90f9aL,0x0b88e4031b298L,0x96f1856d719c0L,0x089ae32c0e777L,
+ 0x05e7917803624L } },
+ /* 238 */
+ { { 0x6ec557f63cdfbL,0x71f1cae4fd5c1L,0x60597ca8e6a35L,0x2fabfce26bea5L,
+ 0x04e0a5371e24cL },
+ { 0xa40d3a5765357L,0x440d73a2b4276L,0x1d11a323c89afL,0x04eeb8f370ae4L,
+ 0x0f5ff7818d566L } },
+ /* 239 */
+ { { 0x3e3fe1a09df21L,0x8ee66e8e47fbfL,0x9c8901526d5d2L,0x5e642096bd0a2L,
+ 0x0e41df0e9533fL },
+ { 0xfda40b3ba9e3fL,0xeb2604d895305L,0xf0367c7f2340cL,0x155f0866e1927L,
+ 0x08edd7d6eac4fL } },
+ /* 240 */
+ { { 0x1dc0e0bfc8ff3L,0x2be936f42fc9aL,0xca381ef14efd8L,0xee9667016f7ccL,
+ 0x01432c1caed8aL },
+ { 0x8482970b23c26L,0x730735b273ec6L,0xaef0f5aa64fe8L,0xd2c6e389f6e5eL,
+ 0x0caef480b5ac8L } },
+ /* 241 */
+ { { 0x5c97875315922L,0x713063cca5524L,0x64ef2cbd82951L,0xe236f3ce60d0bL,
+ 0x0d0ba177e8efaL },
+ { 0x9ae8fb1b3af60L,0xe53d2da20e53aL,0xf9eef281a796aL,0xae1601d63605dL,
+ 0x0f31c957c1c54L } },
+ /* 242 */
+ { { 0x58d5249cc4597L,0xb0bae0a028c0fL,0x34a814adc5015L,0x7c3aefc5fc557L,
+ 0x0013404cb96e1L },
+ { 0xe2585c9a824bfL,0x5e001eaed7b29L,0x1ef68acd59318L,0x3e6c8d6ee6826L,
+ 0x06f377c4b9193L } },
+ /* 243 */
+ { { 0x3bad1a8333fd2L,0x025a2a95b89f9L,0xaf75acea89302L,0x9506211e5037eL,
+ 0x06dba3e4ed2d0L },
+ { 0xef98cd04399cdL,0x6ee6b73adea48L,0x17ecaf31811c6L,0xf4a772f60752cL,
+ 0x0f13cf3423becL } },
+ /* 244 */
+ { { 0xb9ec0a919e2ebL,0x95f62c0f68ceeL,0xaba229983a9a1L,0xbad3cfba3bb67L,
+ 0x0c83fa9a9274bL },
+ { 0xd1b0b62fa1ce0L,0xf53418efbf0d7L,0x2706f04e58b60L,0x2683bfa8ef9e5L,
+ 0x0b49d70f45d70L } },
+ /* 245 */
+ { { 0xc7510fad5513bL,0xecb1751e2d914L,0x9fb9d5905f32eL,0xf1cf6d850418dL,
+ 0x059cfadbb0c30L },
+ { 0x7ac2355cb7fd6L,0xb8820426a3e16L,0x0a78864249367L,0x4b67eaeec58c9L,
+ 0x05babf362354aL } },
+ /* 246 */
+ { { 0x981d1ee424865L,0x78f2e5577f37cL,0x9e0c0588b0028L,0xc8f0702970f1bL,
+ 0x06188c6a79026L },
+ { 0x9a19bd0f244daL,0x5cfb08087306fL,0xf2136371eccedL,0xb9d935470f9b9L,
+ 0x0993fe475df50L } },
+ /* 247 */
+ { { 0x31cdf9b2c3609L,0xc02c46d4ea68eL,0xa77510184eb19L,0x616b7ac9ec1a9L,
+ 0x081f764664c80L },
+ { 0xc2a5a75fbe978L,0xd3f183b3561d7L,0x01dd2bf6743feL,0x060d838d1f045L,
+ 0x0564a812a5fe9L } },
+ /* 248 */
+ { { 0xa64f4fa817d1dL,0x44bea82e0f7a5L,0xd57f9aa55f968L,0x1d6cb5ff5a0fcL,
+ 0x0226bf3cf00e5L },
+ { 0x1a9f92f2833cfL,0x5a4f4f89a8d6dL,0xf3f7f7720a0a3L,0x783611536c498L,
+ 0x068779f47ff25L } },
+ /* 249 */
+ { { 0x0c1c173043d08L,0x741fc020fa79bL,0xa6d26d0a54467L,0x2e0bd3767e289L,
+ 0x097bcb0d1eb09L },
+ { 0x6eaa8f32ed3c3L,0x51b281bc482abL,0xfa178f3c8a4f1L,0x46554d1bf4f3bL,
+ 0x0a872ffe80a78L } },
+ /* 250 */
+ { { 0xb7935a32b2086L,0x0e8160f486b1aL,0xb6ae6bee1eb71L,0xa36a9bd0cd913L,
+ 0x002812bfcb732L },
+ { 0xfd7cacf605318L,0x50fdfd6d1da63L,0x102d619646e5dL,0x96afa1d683982L,
+ 0x007391cc9fe53L } },
+ /* 251 */
+ { { 0x157f08b80d02bL,0xd162877f7fc50L,0x8d542ae6b8333L,0x2a087aca1af87L,
+ 0x0355d2adc7e6dL },
+ { 0xf335a287386e1L,0x94f8e43275b41L,0x79989eafd272aL,0x3a79286ca2cdeL,
+ 0x03dc2b1e37c2aL } },
+ /* 252 */
+ { { 0x9d21c04581352L,0x25376782bed68L,0xfed701f0a00c8L,0x846b203bd5909L,
+ 0x0c47869103ccdL },
+ { 0xa770824c768edL,0x026841f6575dbL,0xaccce0e72feeaL,0x4d3273313ed56L,
+ 0x0ccc42968d5bbL } },
+ /* 253 */
+ { { 0x50de13d7620b9L,0x8a5992a56a94eL,0x75487c9d89a5cL,0x71cfdc0076406L,
+ 0x0e147eb42aa48L },
+ { 0xab4eeacf3ae46L,0xfb50350fbe274L,0x8c840eafd4936L,0x96e3df2afe474L,
+ 0x0239ac047080eL } },
+ /* 254 */
+ { { 0xd1f352bfee8d4L,0xcffa7b0fec481L,0xce9af3cce80b5L,0xe59d105c4c9e2L,
+ 0x0c55fa1a3f5f7L },
+ { 0x6f14e8257c227L,0x3f342be00b318L,0xa904fb2c5b165L,0xb69909afc998aL,
+ 0x0094cd99cd4f4L } },
+ /* 255 */
+ { { 0x81c84d703bebaL,0x5032ceb2918a9L,0x3bd49ec8631d1L,0xad33a445f2c9eL,
+ 0x0b90a30b642abL },
+ { 0x5404fb4a5abf9L,0xc375db7603b46L,0xa35d89f004750L,0x24f76f9a42cccL,
+ 0x0019f8b9a1b79L } },
+};
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_5(sp_point_256* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_256_ecc_mulmod_stripe_5(r, &p256_base, p256_table,
+ k, map, heap);
+}
+
+#endif
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[5];
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_256_point_new_5(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 5, km);
+
+ err = sp_256_ecc_mulmod_base_5(point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_5(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_5(point, 0, heap);
+
+ return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_256_iszero_5(const sp_digit* a)
+{
+ return (a[0] | a[1] | a[2] | a[3] | a[4]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+/* Add 1 to a. (a = a + 1)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_256_add_one_5(sp_digit* a)
+{
+ a[0]++;
+ sp_256_norm_5(a);
+}
+
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 44U) {
+ r[j] &= 0xfffffffffffffL;
+ s = 52U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng Random number generator.
+ * k Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_256_ecc_gen_k_5(WC_RNG* rng, sp_digit* k)
+{
+ int err;
+ byte buf[32];
+
+ do {
+ err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+ if (err == 0) {
+ sp_256_from_bin(k, 5, buf, (int)sizeof(buf));
+ if (sp_256_cmp_5(k, p256_order2) < 0) {
+ sp_256_add_one_5(k);
+ break;
+ }
+ }
+ }
+ while (err == 0);
+
+ return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng Random number generator.
+ * priv Generated private value.
+ * pub Generated public point.
+ * heap Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[5];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_256 inf;
+#endif
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_256* infinity;
+#endif
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_5(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_5(heap, inf, infinity);
+ }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_gen_k_5(rng, k);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_base_5(point, k, 1, NULL);
+ }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_5(infinity, point, p256_order, 1, NULL);
+ }
+ if (err == MP_OKAY) {
+ if ((sp_256_iszero_5(point->x) == 0) || (sp_256_iszero_5(point->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(k, priv);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_5(point, pub);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_256_point_free_5(infinity, 1, heap);
+#endif
+ sp_256_point_free_5(point, 1, heap);
+
+ return err;
+}
+
+#ifdef HAVE_ECC_DHE
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 32
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_256_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ for (i=0; i<4; i++) {
+ r[i+1] += r[i] >> 52;
+ r[i] &= 0xfffffffffffffL;
+ }
+ j = 256 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<5 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 52) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 52);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv Scalar to multiply the point by.
+ * pub Point to multiply.
+ * out Buffer to hold X ordinate.
+ * outLen On entry, size of the buffer in bytes.
+ * On exit, length of data in buffer in bytes.
+ * heap Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
+ word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[5];
+#endif
+ sp_point_256* point = NULL;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ if (*outLen < 32U) {
+ err = BUFFER_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_5(heap, p, point);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 5, priv);
+ sp_256_point_from_ecc_point_5(point, pub);
+ err = sp_256_ecc_mulmod_5(point, point, k, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ sp_256_to_bin(point->x, out);
+ *outLen = 32;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_5(point, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_256_mul_d_5(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int128_t tb = b;
+ int128_t t = 0;
+ int i;
+
+ for (i = 0; i < 5; i++) {
+ t += tb * a[i];
+ r[i] = t & 0xfffffffffffffL;
+ t >>= 52;
+ }
+ r[5] = (sp_digit)t;
+#else
+ int128_t tb = b;
+ int128_t t[5];
+
+ t[ 0] = tb * a[ 0];
+ t[ 1] = tb * a[ 1];
+ t[ 2] = tb * a[ 2];
+ t[ 3] = tb * a[ 3];
+ t[ 4] = tb * a[ 4];
+ r[ 0] = (t[ 0] & 0xfffffffffffffL);
+ r[ 1] = (sp_digit)(t[ 0] >> 52) + (t[ 1] & 0xfffffffffffffL);
+ r[ 2] = (sp_digit)(t[ 1] >> 52) + (t[ 2] & 0xfffffffffffffL);
+ r[ 3] = (sp_digit)(t[ 2] >> 52) + (t[ 3] & 0xfffffffffffffL);
+ r[ 4] = (sp_digit)(t[ 3] >> 52) + (t[ 4] & 0xfffffffffffffL);
+ r[ 5] = (sp_digit)(t[ 4] >> 52);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SP_DIV_64
+static WC_INLINE sp_digit sp_256_div_word_5(sp_digit d1, sp_digit d0,
+ sp_digit dv)
+{
+ sp_digit d, r, t;
+
+ /* All 52 bits from d1 and top 11 bits from d0. */
+ d = (d1 << 11) | (d0 >> 41);
+ r = d / dv;
+ d -= r * dv;
+ /* Up to 12 bits in r */
+ /* Next 11 bits from d0. */
+ r <<= 11;
+ d <<= 11;
+ d |= (d0 >> 30) & ((1 << 11) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 23 bits in r */
+ /* Next 11 bits from d0. */
+ r <<= 11;
+ d <<= 11;
+ d |= (d0 >> 19) & ((1 << 11) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 34 bits in r */
+ /* Next 11 bits from d0. */
+ r <<= 11;
+ d <<= 11;
+ d |= (d0 >> 8) & ((1 << 11) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 45 bits in r */
+ /* Remaining 8 bits from d0. */
+ r <<= 8;
+ d <<= 8;
+ d |= d0 & ((1 << 8) - 1);
+ t = d / dv;
+ r += t;
+
+ return r;
+}
+#endif /* WOLFSSL_SP_DIV_64 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Number to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_256_div_5(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ int i;
+#ifndef WOLFSSL_SP_DIV_64
+ int128_t d1;
+#endif
+ sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* td;
+#else
+ sp_digit t1d[10], t2d[5 + 1];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 5 + 1), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = td;
+ t2 = td + 2 * 5;
+#else
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ dv = d[4];
+ XMEMCPY(t1, a, sizeof(*t1) * 2U * 5U);
+ for (i=4; i>=0; i--) {
+ t1[5 + i] += t1[5 + i - 1] >> 52;
+ t1[5 + i - 1] &= 0xfffffffffffffL;
+#ifndef WOLFSSL_SP_DIV_64
+ d1 = t1[5 + i];
+ d1 <<= 52;
+ d1 += t1[5 + i - 1];
+ r1 = (sp_digit)(d1 / dv);
+#else
+ r1 = sp_256_div_word_5(t1[5 + i], t1[5 + i - 1], dv);
+#endif
+
+ sp_256_mul_d_5(t2, d, r1);
+ (void)sp_256_sub_5(&t1[i], &t1[i], t2);
+ t1[5 + i] -= t2[5];
+ t1[5 + i] += t1[5 + i - 1] >> 52;
+ t1[5 + i - 1] &= 0xfffffffffffffL;
+ r1 = (((-t1[5 + i]) << 52) - t1[5 + i - 1]) / dv;
+ r1++;
+ sp_256_mul_d_5(t2, d, r1);
+ (void)sp_256_add_5(&t1[i], &t1[i], t2);
+ t1[5 + i] += t1[5 + i - 1] >> 52;
+ t1[5 + i - 1] &= 0xfffffffffffffL;
+ }
+ t1[5 - 1] += t1[5 - 2] >> 52;
+ t1[5 - 2] &= 0xfffffffffffffL;
+ r1 = t1[5 - 1] / dv;
+
+ sp_256_mul_d_5(t2, d, r1);
+ (void)sp_256_sub_5(t1, t1, t2);
+ XMEMCPY(r, t1, sizeof(*r) * 2U * 5U);
+ for (i=0; i<3; i++) {
+ r[i+1] += r[i] >> 52;
+ r[i] &= 0xfffffffffffffL;
+ }
+ sp_256_cond_add_5(r, r, d, 0 - ((r[4] < 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_256_mod_5(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_256_div_5(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P256 curve. */
+static const uint64_t p256_order_minus_2[4] = {
+ 0xf3b9cac2fc63254fU,0xbce6faada7179e84U,0xffffffffffffffffU,
+ 0xffffffff00000000U
+};
+#else
+/* The low half of the order-2 of the P256 curve. */
+static const uint64_t p256_order_low[2] = {
+ 0xf3b9cac2fc63254fU,0xbce6faada7179e84U
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
+ *
+ * r Result of the multiplication.
+ * a First operand of the multiplication.
+ * b Second operand of the multiplication.
+ */
+static void sp_256_mont_mul_order_5(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_256_mul_5(r, a, b);
+ sp_256_mont_reduce_order_5(r, p256_order, p256_mp_order);
+}
+
+/* Square number mod the order of P256 curve. (r = a * a mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_256_mont_sqr_order_5(sp_digit* r, const sp_digit* a)
+{
+ sp_256_sqr_5(r, a);
+ sp_256_mont_reduce_order_5(r, p256_order, p256_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P256 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_256_mont_sqr_n_order_5(sp_digit* r, const sp_digit* a, int n)
+{
+ int i;
+
+ sp_256_mont_sqr_order_5(r, a);
+ for (i=1; i<n; i++) {
+ sp_256_mont_sqr_order_5(r, r);
+ }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
+ * (r = 1 / a mod order)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_256_mont_inv_order_5(sp_digit* r, const sp_digit* a,
+ sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 5);
+ for (i=254; i>=0; i--) {
+ sp_256_mont_sqr_order_5(t, t);
+ if ((p256_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_256_mont_mul_order_5(t, t, a);
+ }
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 5U);
+#else
+ sp_digit* t = td;
+ sp_digit* t2 = td + 2 * 5;
+ sp_digit* t3 = td + 4 * 5;
+ int i;
+
+ /* t = a^2 */
+ sp_256_mont_sqr_order_5(t, a);
+ /* t = a^3 = t * a */
+ sp_256_mont_mul_order_5(t, t, a);
+ /* t2= a^c = t ^ 2 ^ 2 */
+ sp_256_mont_sqr_n_order_5(t2, t, 2);
+ /* t3= a^f = t2 * t */
+ sp_256_mont_mul_order_5(t3, t2, t);
+ /* t2= a^f0 = t3 ^ 2 ^ 4 */
+ sp_256_mont_sqr_n_order_5(t2, t3, 4);
+ /* t = a^ff = t2 * t3 */
+ sp_256_mont_mul_order_5(t, t2, t3);
+ /* t3= a^ff00 = t ^ 2 ^ 8 */
+ sp_256_mont_sqr_n_order_5(t2, t, 8);
+ /* t = a^ffff = t2 * t */
+ sp_256_mont_mul_order_5(t, t2, t);
+ /* t2= a^ffff0000 = t ^ 2 ^ 16 */
+ sp_256_mont_sqr_n_order_5(t2, t, 16);
+ /* t = a^ffffffff = t2 * t */
+ sp_256_mont_mul_order_5(t, t2, t);
+ /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */
+ sp_256_mont_sqr_n_order_5(t2, t, 64);
+ /* t2= a^ffffffff00000000ffffffff = t2 * t */
+ sp_256_mont_mul_order_5(t2, t2, t);
+ /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */
+ sp_256_mont_sqr_n_order_5(t2, t2, 32);
+ /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
+ sp_256_mont_mul_order_5(t2, t2, t);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
+ for (i=127; i>=112; i--) {
+ sp_256_mont_sqr_order_5(t2, t2);
+ if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_256_mont_mul_order_5(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
+ sp_256_mont_sqr_n_order_5(t2, t2, 4);
+ sp_256_mont_mul_order_5(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
+ for (i=107; i>=64; i--) {
+ sp_256_mont_sqr_order_5(t2, t2);
+ if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_256_mont_mul_order_5(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
+ sp_256_mont_sqr_n_order_5(t2, t2, 4);
+ sp_256_mont_mul_order_5(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
+ for (i=59; i>=32; i--) {
+ sp_256_mont_sqr_order_5(t2, t2);
+ if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_256_mont_mul_order_5(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
+ sp_256_mont_sqr_n_order_5(t2, t2, 4);
+ sp_256_mont_mul_order_5(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
+ for (i=27; i>=0; i--) {
+ sp_256_mont_sqr_order_5(t2, t2);
+ if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_256_mont_mul_order_5(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
+ sp_256_mont_sqr_n_order_5(t2, t2, 4);
+ /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
+ sp_256_mont_mul_order_5(r, t2, t3);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN 64
+#endif
+
+/* Sign the hash using the private key.
+ * e = [hash, 256 bits] from binary
+ * r = (k.G)->x mod order
+ * s = (r * x + e) / k mod order
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+ mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit ed[2*5];
+ sp_digit xd[2*5];
+ sp_digit kd[2*5];
+ sp_digit rd[2*5];
+ sp_digit td[3 * 2*5];
+ sp_point_256 p;
+#endif
+ sp_digit* e = NULL;
+ sp_digit* x = NULL;
+ sp_digit* k = NULL;
+ sp_digit* r = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_256* point = NULL;
+ sp_digit carry;
+ sp_digit* s = NULL;
+ sp_digit* kInv = NULL;
+ int err = MP_OKAY;
+ int64_t c;
+ int i;
+
+ (void)heap;
+
+ err = sp_256_point_new_5(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ e = d + 0 * 5;
+ x = d + 2 * 5;
+ k = d + 4 * 5;
+ r = d + 6 * 5;
+ tmp = d + 8 * 5;
+#else
+ e = ed;
+ x = xd;
+ k = kd;
+ r = rd;
+ tmp = td;
+#endif
+ s = e;
+ kInv = k;
+
+ if (hashLen > 32U) {
+ hashLen = 32U;
+ }
+
+ sp_256_from_bin(e, 5, hash, (int)hashLen);
+ }
+
+ for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+ sp_256_from_mp(x, 5, priv);
+
+ /* New random point. */
+ if (km == NULL || mp_iszero(km)) {
+ err = sp_256_ecc_gen_k_5(rng, k);
+ }
+ else {
+ sp_256_from_mp(k, 5, km);
+ mp_zero(km);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_base_5(point, k, 1, NULL);
+ }
+
+ if (err == MP_OKAY) {
+ /* r = point->x mod order */
+ XMEMCPY(r, point->x, sizeof(sp_digit) * 5U);
+ sp_256_norm_5(r);
+ c = sp_256_cmp_5(r, p256_order);
+ sp_256_cond_sub_5(r, r, p256_order, 0L - (sp_digit)(c >= 0));
+ sp_256_norm_5(r);
+
+ /* Conv k to Montgomery form (mod order) */
+ sp_256_mul_5(k, k, p256_norm_order);
+ err = sp_256_mod_5(k, k, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_5(k);
+ /* kInv = 1/k mod order */
+ sp_256_mont_inv_order_5(kInv, k, tmp);
+ sp_256_norm_5(kInv);
+
+ /* s = r * x + e */
+ sp_256_mul_5(x, x, r);
+ err = sp_256_mod_5(x, x, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_5(x);
+ carry = sp_256_add_5(s, e, x);
+ sp_256_cond_sub_5(s, s, p256_order, 0 - carry);
+ sp_256_norm_5(s);
+ c = sp_256_cmp_5(s, p256_order);
+ sp_256_cond_sub_5(s, s, p256_order, 0L - (sp_digit)(c >= 0));
+ sp_256_norm_5(s);
+
+ /* s = s * k^-1 mod order */
+ sp_256_mont_mul_order_5(s, s, kInv);
+ sp_256_norm_5(s);
+
+ /* Check that signature is usable. */
+ if (sp_256_iszero_5(s) == 0) {
+ break;
+ }
+ }
+ }
+
+ if (i == 0) {
+ err = RNG_FAILURE_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(r, rm);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(s, sm);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 8 * 5);
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 2U * 5U);
+ XMEMSET(x, 0, sizeof(sp_digit) * 2U * 5U);
+ XMEMSET(k, 0, sizeof(sp_digit) * 2U * 5U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 5U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 5U);
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 5U);
+#endif
+ sp_256_point_free_5(point, 1, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ * e = Truncate(hash, 256)
+ * u1 = e/s mod order
+ * u2 = r/s mod order
+ * r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
+ mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit u1d[2*5];
+ sp_digit u2d[2*5];
+ sp_digit sd[2*5];
+ sp_digit tmpd[2*5 * 5];
+ sp_point_256 p1d;
+ sp_point_256 p2d;
+#endif
+ sp_digit* u1 = NULL;
+ sp_digit* u2 = NULL;
+ sp_digit* s = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_256* p1;
+ sp_point_256* p2 = NULL;
+ sp_digit carry;
+ int64_t c;
+ int err;
+
+ err = sp_256_point_new_5(heap, p1d, p1);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_5(heap, p2d, p2);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ u1 = d + 0 * 5;
+ u2 = d + 2 * 5;
+ s = d + 4 * 5;
+ tmp = d + 6 * 5;
+#else
+ u1 = u1d;
+ u2 = u2d;
+ s = sd;
+ tmp = tmpd;
+#endif
+
+ if (hashLen > 32U) {
+ hashLen = 32U;
+ }
+
+ sp_256_from_bin(u1, 5, hash, (int)hashLen);
+ sp_256_from_mp(u2, 5, r);
+ sp_256_from_mp(s, 5, sm);
+ sp_256_from_mp(p2->x, 5, pX);
+ sp_256_from_mp(p2->y, 5, pY);
+ sp_256_from_mp(p2->z, 5, pZ);
+
+ {
+ sp_256_mul_5(s, s, p256_norm_order);
+ }
+ err = sp_256_mod_5(s, s, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_5(s);
+ {
+ sp_256_mont_inv_order_5(s, s, tmp);
+ sp_256_mont_mul_order_5(u1, u1, s);
+ sp_256_mont_mul_order_5(u2, u2, s);
+ }
+
+ err = sp_256_ecc_mulmod_base_5(p1, u1, 0, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_5(p2, p2, u2, 0, heap);
+ }
+
+ if (err == MP_OKAY) {
+ {
+ sp_256_proj_point_add_5(p1, p1, p2, tmp);
+ if (sp_256_iszero_5(p1->z)) {
+ if (sp_256_iszero_5(p1->x) && sp_256_iszero_5(p1->y)) {
+ sp_256_proj_point_dbl_5(p1, p2, tmp);
+ }
+ else {
+ /* Y ordinate is not used from here - don't set. */
+ p1->x[0] = 0;
+ p1->x[1] = 0;
+ p1->x[2] = 0;
+ p1->x[3] = 0;
+ p1->x[4] = 0;
+ XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod));
+ }
+ }
+ }
+
+ /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+ /* Reload r and convert to Montgomery form. */
+ sp_256_from_mp(u2, 5, r);
+ err = sp_256_mod_mul_norm_5(u2, u2, p256_mod);
+ }
+
+ if (err == MP_OKAY) {
+ /* u1 = r.z'.z' mod prime */
+ sp_256_mont_sqr_5(p1->z, p1->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_5(u1, u2, p1->z, p256_mod, p256_mp_mod);
+ *res = (int)(sp_256_cmp_5(p1->x, u1) == 0);
+ if (*res == 0) {
+ /* Reload r and add order. */
+ sp_256_from_mp(u2, 5, r);
+ carry = sp_256_add_5(u2, u2, p256_order);
+ /* Carry means result is greater than mod and is not valid. */
+ if (carry == 0) {
+ sp_256_norm_5(u2);
+
+ /* Compare with mod and if greater or equal then not valid. */
+ c = sp_256_cmp_5(u2, p256_mod);
+ if (c < 0) {
+ /* Convert to Montogomery form */
+ err = sp_256_mod_mul_norm_5(u2, u2, p256_mod);
+ if (err == MP_OKAY) {
+ /* u1 = (r + 1*order).z'.z' mod prime */
+ sp_256_mont_mul_5(u1, u2, p1->z, p256_mod,
+ p256_mp_mod);
+ *res = (int)(sp_256_cmp_5(p1->x, u1) == 0);
+ }
+ }
+ }
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL)
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_256_point_free_5(p1, 0, heap);
+ sp_256_point_free_5(p2, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point EC point.
+ * heap Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_256_ecc_is_point_5(sp_point_256* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit t1d[2*5];
+ sp_digit t2d[2*5];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 4, heap, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 5;
+ t2 = d + 2 * 5;
+#else
+ (void)heap;
+
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ sp_256_sqr_5(t1, point->y);
+ (void)sp_256_mod_5(t1, t1, p256_mod);
+ sp_256_sqr_5(t2, point->x);
+ (void)sp_256_mod_5(t2, t2, p256_mod);
+ sp_256_mul_5(t2, t2, point->x);
+ (void)sp_256_mod_5(t2, t2, p256_mod);
+ (void)sp_256_sub_5(t2, p256_mod, t2);
+ sp_256_mont_add_5(t1, t1, t2, p256_mod);
+
+ sp_256_mont_add_5(t1, t1, point->x, p256_mod);
+ sp_256_mont_add_5(t1, t1, point->x, p256_mod);
+ sp_256_mont_add_5(t1, t1, point->x, p256_mod);
+
+ if (sp_256_cmp_5(t1, p256_b) != 0) {
+ err = MP_VAL;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_256(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 pubd;
+#endif
+ sp_point_256* pub;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_256_point_new_5(NULL, pubd, pub);
+ if (err == MP_OKAY) {
+ sp_256_from_mp(pub->x, 5, pX);
+ sp_256_from_mp(pub->y, 5, pY);
+ sp_256_from_bin(pub->z, 5, one, (int)sizeof(one));
+
+ err = sp_256_ecc_is_point_5(pub, NULL);
+ }
+
+ sp_256_point_free_5(pub, 0, NULL);
+
+ return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * privm Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit privd[5];
+ sp_point_256 pubd;
+ sp_point_256 pd;
+#endif
+ sp_digit* priv = NULL;
+ sp_point_256* pub;
+ sp_point_256* p = NULL;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_256_point_new_5(heap, pubd, pub);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_5(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (priv == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ priv = privd;
+#endif
+
+ sp_256_from_mp(pub->x, 5, pX);
+ sp_256_from_mp(pub->y, 5, pY);
+ sp_256_from_bin(pub->z, 5, one, (int)sizeof(one));
+ sp_256_from_mp(priv, 5, privm);
+
+ /* Check point at infinitiy. */
+ if ((sp_256_iszero_5(pub->x) != 0) &&
+ (sp_256_iszero_5(pub->y) != 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check range of X and Y */
+ if (sp_256_cmp_5(pub->x, p256_mod) >= 0 ||
+ sp_256_cmp_5(pub->y, p256_mod) >= 0) {
+ err = ECC_OUT_OF_RANGE_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check point is on curve */
+ err = sp_256_ecc_is_point_5(pub, heap);
+ }
+
+ if (err == MP_OKAY) {
+ /* Point * order = infinity */
+ err = sp_256_ecc_mulmod_5(p, pub, p256_order, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is infinity */
+ if ((sp_256_iszero_5(p->x) == 0) ||
+ (sp_256_iszero_5(p->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Base * private = point */
+ err = sp_256_ecc_mulmod_base_5(p, priv, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is public key */
+ if (sp_256_cmp_5(p->x, pub->x) != 0 ||
+ sp_256_cmp_5(p->y, pub->y) != 0) {
+ err = ECC_PRIV_KEY_E;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (priv != NULL) {
+ XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_5(p, 0, heap);
+ sp_256_point_free_5(pub, 0, heap);
+
+ return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX First EC point's X ordinate.
+ * pY First EC point's Y ordinate.
+ * pZ First EC point's Z ordinate.
+ * qX Second EC point's X ordinate.
+ * qY Second EC point's Y ordinate.
+ * qZ Second EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* qX, mp_int* qY, mp_int* qZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 5 * 5];
+ sp_point_256 pd;
+ sp_point_256 qd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ sp_point_256* q = NULL;
+ int err;
+
+ err = sp_256_point_new_5(NULL, pd, p);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_5(NULL, qd, q);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 5, pX);
+ sp_256_from_mp(p->y, 5, pY);
+ sp_256_from_mp(p->z, 5, pZ);
+ sp_256_from_mp(q->x, 5, qX);
+ sp_256_from_mp(q->y, 5, qY);
+ sp_256_from_mp(q->z, 5, qZ);
+
+ sp_256_proj_point_add_5(p, p, q, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_5(q, 0, NULL);
+ sp_256_point_free_5(p, 0, NULL);
+
+ return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 5 * 2];
+ sp_point_256 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ int err;
+
+ err = sp_256_point_new_5(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 2, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 5, pX);
+ sp_256_from_mp(p->y, 5, pY);
+ sp_256_from_mp(p->z, 5, pZ);
+
+ sp_256_proj_point_dbl_5(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_5(p, 0, NULL);
+
+ return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 5 * 4];
+ sp_point_256 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ int err;
+
+ err = sp_256_point_new_5(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 4, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 5, pX);
+ sp_256_from_mp(p->y, 5, pY);
+ sp_256_from_mp(p->z, 5, pZ);
+
+ sp_256_map_5(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, pX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, pY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, pZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_5(p, 0, NULL);
+
+ return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_256_mont_sqrt_5(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit t1d[2 * 5];
+ sp_digit t2d[2 * 5];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 5, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 5;
+ t2 = d + 2 * 5;
+#else
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ {
+ /* t2 = y ^ 0x2 */
+ sp_256_mont_sqr_5(t2, y, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0x3 */
+ sp_256_mont_mul_5(t1, t2, y, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xc */
+ sp_256_mont_sqr_n_5(t2, t1, 2, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xf */
+ sp_256_mont_mul_5(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xf0 */
+ sp_256_mont_sqr_n_5(t2, t1, 4, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xff */
+ sp_256_mont_mul_5(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xff00 */
+ sp_256_mont_sqr_n_5(t2, t1, 8, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffff */
+ sp_256_mont_mul_5(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xffff0000 */
+ sp_256_mont_sqr_n_5(t2, t1, 16, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff */
+ sp_256_mont_mul_5(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000000 */
+ sp_256_mont_sqr_n_5(t1, t1, 32, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001 */
+ sp_256_mont_mul_5(t1, t1, y, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
+ sp_256_mont_sqr_n_5(t1, t1, 96, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
+ sp_256_mont_mul_5(t1, t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_n_5(y, t1, 94, p256_mod, p256_mp_mod);
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm X ordinate.
+ * odd Whether the Y ordinate is odd.
+ * ym Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit xd[2 * 5];
+ sp_digit yd[2 * 5];
+#endif
+ sp_digit* x = NULL;
+ sp_digit* y = NULL;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 5, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ x = d + 0 * 5;
+ y = d + 2 * 5;
+#else
+ x = xd;
+ y = yd;
+#endif
+
+ sp_256_from_mp(x, 5, xm);
+ err = sp_256_mod_mul_norm_5(x, x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ /* y = x^3 */
+ {
+ sp_256_mont_sqr_5(y, x, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_5(y, y, x, p256_mod, p256_mp_mod);
+ }
+ /* y = x^3 - 3x */
+ sp_256_mont_sub_5(y, y, x, p256_mod);
+ sp_256_mont_sub_5(y, y, x, p256_mod);
+ sp_256_mont_sub_5(y, y, x, p256_mod);
+ /* y = x^3 - 3x + b */
+ err = sp_256_mod_mul_norm_5(x, p256_b, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ sp_256_mont_add_5(y, y, x, p256_mod);
+ /* y = sqrt(x^3 - 3x + b) */
+ err = sp_256_mont_sqrt_5(y);
+ }
+ if (err == MP_OKAY) {
+ XMEMSET(y + 5, 0, 5U * sizeof(sp_digit));
+ sp_256_mont_reduce_5(y, p256_mod, p256_mp_mod);
+ if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+ sp_256_mont_sub_5(y, p256_mod, y, p256_mod);
+ }
+
+ err = sp_256_to_mp(y, ym);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+#endif
+#endif /* !WOLFSSL_SP_NO_256 */
+#ifdef WOLFSSL_SP_384
+
+/* Point structure to use. */
+typedef struct sp_point_384 {
+ sp_digit x[2 * 7];
+ sp_digit y[2 * 7];
+ sp_digit z[2 * 7];
+ int infinity;
+} sp_point_384;
+
+/* The modulus (prime) of the curve P384. */
+static const sp_digit p384_mod[7] = {
+ 0x000000ffffffffL,0x7ffe0000000000L,0x7ffffffffbffffL,0x7fffffffffffffL,
+ 0x7fffffffffffffL,0x7fffffffffffffL,0x3fffffffffffffL
+};
+/* The Montogmery normalizer for modulus of the curve P384. */
+static const sp_digit p384_norm_mod[7] = {
+ 0x7fffff00000001L,0x0001ffffffffffL,0x00000000040000L,0x00000000000000L,
+ 0x00000000000000L,0x00000000000000L,0x00000000000000L
+};
+/* The Montogmery multiplier for modulus of the curve P384. */
+static sp_digit p384_mp_mod = 0x0000100000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* The order of the curve P384. */
+static const sp_digit p384_order[7] = {
+ 0x6c196accc52973L,0x1b6491614ef5d9L,0x07d0dcb77d6068L,0x7ffffffe3b1a6cL,
+ 0x7fffffffffffffL,0x7fffffffffffffL,0x3fffffffffffffL
+};
+#endif
+/* The order of the curve P384 minus 2. */
+static const sp_digit p384_order2[7] = {
+ 0x6c196accc52971L,0x1b6491614ef5d9L,0x07d0dcb77d6068L,0x7ffffffe3b1a6cL,
+ 0x7fffffffffffffL,0x7fffffffffffffL,0x3fffffffffffffL
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P384. */
+static const sp_digit p384_norm_order[7] = {
+ 0x13e695333ad68dL,0x649b6e9eb10a26L,0x782f2348829f97L,0x00000001c4e593L,
+ 0x00000000000000L,0x00000000000000L,0x00000000000000L
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P384. */
+static sp_digit p384_mp_order = 0x546089e88fdc45l;
+#endif
+/* The base point of curve P384. */
+static const sp_point_384 p384_base = {
+ /* X ordinate */
+ {
+ 0x545e3872760ab7L,0x64bb7eaa52d874L,0x020950a8e1540bL,
+ 0x5d3cdcc2cfba0fL,0x0ad746e1d3b628L,0x26f1d638e3de64L,0x2aa1f288afa2c1L,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Y ordinate */
+ {
+ 0x431d7c90ea0e5fL,0x639c3afd033af4L,0x4ed7c2e3002982L,
+ 0x44d0a3e74ed188L,0x2dc29f8f41dbd2L,0x0debb3d317f252L,0x0d85f792a5898bL,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Z ordinate */
+ {
+ 0x00000000000001L,0x00000000000000L,0x00000000000000L,
+ 0x00000000000000L,0x00000000000000L,0x00000000000000L,0x00000000000000L,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* infinity */
+ 0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p384_b[7] = {
+ 0x05c8edd3ec2aefL,0x731b145da33a55L,0x3d404e1d6b1958L,0x740a089018a044L,
+ 0x02d19181d9c6efL,0x7c9311c0ad7c7fL,0x2ccc4be9f88fb9L
+};
+#endif
+
+static int sp_384_point_new_ex_7(void* heap, sp_point_384* sp, sp_point_384** p)
+{
+ int ret = MP_OKAY;
+ (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ (void)sp;
+ *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC);
+#else
+ *p = sp;
+#endif
+ if (*p == NULL) {
+ ret = MEMORY_E;
+ }
+ return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_384_point_new_7(heap, sp, p) sp_384_point_new_ex_7((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_384_point_new_7(heap, sp, p) sp_384_point_new_ex_7((heap), &(sp), &(p))
+#endif
+
+
+static void sp_384_point_free_7(sp_point_384* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+ if (p != NULL) {
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+ XFREE(p, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+/* Clear point data if requested. */
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+#endif
+ (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r The resulting Montgomery form number.
+ * a The number to convert.
+ * m The modulus (prime).
+ * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mod_mul_norm_7(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ int64_t* td;
+#else
+ int64_t td[12];
+ int64_t a32d[12];
+#endif
+ int64_t* t;
+ int64_t* a32;
+ int64_t o;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 12, NULL, DYNAMIC_TYPE_ECC);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = td;
+ a32 = td + 12;
+#else
+ t = td;
+ a32 = a32d;
+#endif
+
+ a32[0] = (sp_digit)(a[0]) & 0xffffffffL;
+ a32[1] = (sp_digit)(a[0] >> 32U);
+ a32[1] |= a[1] << 23U;
+ a32[1] &= 0xffffffffL;
+ a32[2] = (sp_digit)(a[1] >> 9U) & 0xffffffffL;
+ a32[3] = (sp_digit)(a[1] >> 41U);
+ a32[3] |= a[2] << 14U;
+ a32[3] &= 0xffffffffL;
+ a32[4] = (sp_digit)(a[2] >> 18U) & 0xffffffffL;
+ a32[5] = (sp_digit)(a[2] >> 50U);
+ a32[5] |= a[3] << 5U;
+ a32[5] &= 0xffffffffL;
+ a32[6] = (sp_digit)(a[3] >> 27U);
+ a32[6] |= a[4] << 28U;
+ a32[6] &= 0xffffffffL;
+ a32[7] = (sp_digit)(a[4] >> 4U) & 0xffffffffL;
+ a32[8] = (sp_digit)(a[4] >> 36U);
+ a32[8] |= a[5] << 19U;
+ a32[8] &= 0xffffffffL;
+ a32[9] = (sp_digit)(a[5] >> 13U) & 0xffffffffL;
+ a32[10] = (sp_digit)(a[5] >> 45U);
+ a32[10] |= a[6] << 10U;
+ a32[10] &= 0xffffffffL;
+ a32[11] = (sp_digit)(a[6] >> 22U) & 0xffffffffL;
+
+ /* 1 0 0 0 0 0 0 0 1 1 0 -1 */
+ t[0] = 0 + a32[0] + a32[8] + a32[9] - a32[11];
+ /* -1 1 0 0 0 0 0 0 -1 0 1 1 */
+ t[1] = 0 - a32[0] + a32[1] - a32[8] + a32[10] + a32[11];
+ /* 0 -1 1 0 0 0 0 0 0 -1 0 1 */
+ t[2] = 0 - a32[1] + a32[2] - a32[9] + a32[11];
+ /* 1 0 -1 1 0 0 0 0 1 1 -1 -1 */
+ t[3] = 0 + a32[0] - a32[2] + a32[3] + a32[8] + a32[9] - a32[10] - a32[11];
+ /* 1 1 0 -1 1 0 0 0 1 2 1 -2 */
+ t[4] = 0 + a32[0] + a32[1] - a32[3] + a32[4] + a32[8] + 2 * a32[9] + a32[10] - 2 * a32[11];
+ /* 0 1 1 0 -1 1 0 0 0 1 2 1 */
+ t[5] = 0 + a32[1] + a32[2] - a32[4] + a32[5] + a32[9] + 2 * a32[10] + a32[11];
+ /* 0 0 1 1 0 -1 1 0 0 0 1 2 */
+ t[6] = 0 + a32[2] + a32[3] - a32[5] + a32[6] + a32[10] + 2 * a32[11];
+ /* 0 0 0 1 1 0 -1 1 0 0 0 1 */
+ t[7] = 0 + a32[3] + a32[4] - a32[6] + a32[7] + a32[11];
+ /* 0 0 0 0 1 1 0 -1 1 0 0 0 */
+ t[8] = 0 + a32[4] + a32[5] - a32[7] + a32[8];
+ /* 0 0 0 0 0 1 1 0 -1 1 0 0 */
+ t[9] = 0 + a32[5] + a32[6] - a32[8] + a32[9];
+ /* 0 0 0 0 0 0 1 1 0 -1 1 0 */
+ t[10] = 0 + a32[6] + a32[7] - a32[9] + a32[10];
+ /* 0 0 0 0 0 0 0 1 1 0 -1 1 */
+ t[11] = 0 + a32[7] + a32[8] - a32[10] + a32[11];
+
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+ t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+ t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+ t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+ o = t[11] >> 32; t[11] &= 0xffffffff;
+ t[0] += o;
+ t[1] -= o;
+ t[3] += o;
+ t[4] += o;
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+ t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+ t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+ t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+
+ r[0] = t[0];
+ r[0] |= t[1] << 32U;
+ r[0] &= 0x7fffffffffffffLL;
+ r[1] = (sp_digit)(t[1] >> 23);
+ r[1] |= t[2] << 9U;
+ r[1] |= t[3] << 41U;
+ r[1] &= 0x7fffffffffffffLL;
+ r[2] = (sp_digit)(t[3] >> 14);
+ r[2] |= t[4] << 18U;
+ r[2] |= t[5] << 50U;
+ r[2] &= 0x7fffffffffffffLL;
+ r[3] = (sp_digit)(t[5] >> 5);
+ r[3] |= t[6] << 27U;
+ r[3] &= 0x7fffffffffffffLL;
+ r[4] = (sp_digit)(t[6] >> 28);
+ r[4] |= t[7] << 4U;
+ r[4] |= t[8] << 36U;
+ r[4] &= 0x7fffffffffffffLL;
+ r[5] = (sp_digit)(t[8] >> 19);
+ r[5] |= t[9] << 13U;
+ r[5] |= t[10] << 45U;
+ r[5] &= 0x7fffffffffffffLL;
+ r[6] = (sp_digit)(t[10] >> 10);
+ r[6] |= t[11] << 22U;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL)
+ XFREE(td, NULL, DYNAMIC_TYPE_ECC);
+#endif
+
+ return err;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 55
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 55
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0x7fffffffffffffL;
+ s = 55U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 55U) <= (word32)DIGIT_BIT) {
+ s += 55U;
+ r[j] &= 0x7fffffffffffffL;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 55) {
+ r[j] &= 0x7fffffffffffffL;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 55 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_384.
+ *
+ * p Point of type sp_point_384 (result).
+ * pm Point of type ecc_point.
+ */
+static void sp_384_point_from_ecc_point_7(sp_point_384* p, const ecc_point* pm)
+{
+ XMEMSET(p->x, 0, sizeof(p->x));
+ XMEMSET(p->y, 0, sizeof(p->y));
+ XMEMSET(p->z, 0, sizeof(p->z));
+ sp_384_from_mp(p->x, 7, pm->x);
+ sp_384_from_mp(p->y, 7, pm->y);
+ sp_384_from_mp(p->z, 7, pm->z);
+ p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_384_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 55
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 7);
+ r->used = 7;
+ mp_clamp(r);
+#elif DIGIT_BIT < 55
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 7; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 55) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 55 - s;
+ }
+ r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 7; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 55 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 55 - s;
+ }
+ else {
+ s += 55;
+ }
+ }
+ r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Convert a point of type sp_point_384 to type ecc_point.
+ *
+ * p Point of type sp_point_384.
+ * pm Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_384_point_to_ecc_point_7(const sp_point_384* p, ecc_point* pm)
+{
+ int err;
+
+ err = sp_384_to_mp(p->x, pm->x);
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, pm->y);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, pm->z);
+ }
+
+ return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_384_mul_7(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i, j, k;
+ int128_t c;
+
+ c = ((int128_t)a[6]) * b[6];
+ r[13] = (sp_digit)(c >> 55);
+ c = (c & 0x7fffffffffffffL) << 55;
+ for (k = 11; k >= 0; k--) {
+ for (i = 6; i >= 0; i--) {
+ j = k - i;
+ if (j >= 7) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int128_t)a[i]) * b[j];
+ }
+ r[k + 2] += c >> 110;
+ r[k + 1] = (c >> 55) & 0x7fffffffffffffL;
+ c = (c & 0x7fffffffffffffL) << 55;
+ }
+ r[0] = (sp_digit)(c >> 55);
+}
+
+#else
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_384_mul_7(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int128_t t0 = ((int128_t)a[ 0]) * b[ 0];
+ int128_t t1 = ((int128_t)a[ 0]) * b[ 1]
+ + ((int128_t)a[ 1]) * b[ 0];
+ int128_t t2 = ((int128_t)a[ 0]) * b[ 2]
+ + ((int128_t)a[ 1]) * b[ 1]
+ + ((int128_t)a[ 2]) * b[ 0];
+ int128_t t3 = ((int128_t)a[ 0]) * b[ 3]
+ + ((int128_t)a[ 1]) * b[ 2]
+ + ((int128_t)a[ 2]) * b[ 1]
+ + ((int128_t)a[ 3]) * b[ 0];
+ int128_t t4 = ((int128_t)a[ 0]) * b[ 4]
+ + ((int128_t)a[ 1]) * b[ 3]
+ + ((int128_t)a[ 2]) * b[ 2]
+ + ((int128_t)a[ 3]) * b[ 1]
+ + ((int128_t)a[ 4]) * b[ 0];
+ int128_t t5 = ((int128_t)a[ 0]) * b[ 5]
+ + ((int128_t)a[ 1]) * b[ 4]
+ + ((int128_t)a[ 2]) * b[ 3]
+ + ((int128_t)a[ 3]) * b[ 2]
+ + ((int128_t)a[ 4]) * b[ 1]
+ + ((int128_t)a[ 5]) * b[ 0];
+ int128_t t6 = ((int128_t)a[ 0]) * b[ 6]
+ + ((int128_t)a[ 1]) * b[ 5]
+ + ((int128_t)a[ 2]) * b[ 4]
+ + ((int128_t)a[ 3]) * b[ 3]
+ + ((int128_t)a[ 4]) * b[ 2]
+ + ((int128_t)a[ 5]) * b[ 1]
+ + ((int128_t)a[ 6]) * b[ 0];
+ int128_t t7 = ((int128_t)a[ 1]) * b[ 6]
+ + ((int128_t)a[ 2]) * b[ 5]
+ + ((int128_t)a[ 3]) * b[ 4]
+ + ((int128_t)a[ 4]) * b[ 3]
+ + ((int128_t)a[ 5]) * b[ 2]
+ + ((int128_t)a[ 6]) * b[ 1];
+ int128_t t8 = ((int128_t)a[ 2]) * b[ 6]
+ + ((int128_t)a[ 3]) * b[ 5]
+ + ((int128_t)a[ 4]) * b[ 4]
+ + ((int128_t)a[ 5]) * b[ 3]
+ + ((int128_t)a[ 6]) * b[ 2];
+ int128_t t9 = ((int128_t)a[ 3]) * b[ 6]
+ + ((int128_t)a[ 4]) * b[ 5]
+ + ((int128_t)a[ 5]) * b[ 4]
+ + ((int128_t)a[ 6]) * b[ 3];
+ int128_t t10 = ((int128_t)a[ 4]) * b[ 6]
+ + ((int128_t)a[ 5]) * b[ 5]
+ + ((int128_t)a[ 6]) * b[ 4];
+ int128_t t11 = ((int128_t)a[ 5]) * b[ 6]
+ + ((int128_t)a[ 6]) * b[ 5];
+ int128_t t12 = ((int128_t)a[ 6]) * b[ 6];
+
+ t1 += t0 >> 55; r[ 0] = t0 & 0x7fffffffffffffL;
+ t2 += t1 >> 55; r[ 1] = t1 & 0x7fffffffffffffL;
+ t3 += t2 >> 55; r[ 2] = t2 & 0x7fffffffffffffL;
+ t4 += t3 >> 55; r[ 3] = t3 & 0x7fffffffffffffL;
+ t5 += t4 >> 55; r[ 4] = t4 & 0x7fffffffffffffL;
+ t6 += t5 >> 55; r[ 5] = t5 & 0x7fffffffffffffL;
+ t7 += t6 >> 55; r[ 6] = t6 & 0x7fffffffffffffL;
+ t8 += t7 >> 55; r[ 7] = t7 & 0x7fffffffffffffL;
+ t9 += t8 >> 55; r[ 8] = t8 & 0x7fffffffffffffL;
+ t10 += t9 >> 55; r[ 9] = t9 & 0x7fffffffffffffL;
+ t11 += t10 >> 55; r[10] = t10 & 0x7fffffffffffffL;
+ t12 += t11 >> 55; r[11] = t11 & 0x7fffffffffffffL;
+ r[13] = (sp_digit)(t12 >> 55);
+ r[12] = t12 & 0x7fffffffffffffL;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#define sp_384_mont_reduce_order_7 sp_384_mont_reduce_7
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_384_cmp_7(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=6; i>=0; i--) {
+ r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#else
+ r |= (a[ 6] - b[ 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 5] - b[ 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 4] - b[ 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 3] - b[ 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 2] - b[ 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 1] - b[ 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 0] - b[ 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+#endif /* WOLFSSL_SP_SMALL */
+
+ return r;
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static void sp_384_cond_sub_7(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 7; i++) {
+ r[i] = a[i] - (b[i] & m);
+ }
+#else
+ r[ 0] = a[ 0] - (b[ 0] & m);
+ r[ 1] = a[ 1] - (b[ 1] & m);
+ r[ 2] = a[ 2] - (b[ 2] & m);
+ r[ 3] = a[ 3] - (b[ 3] & m);
+ r[ 4] = a[ 4] - (b[ 4] & m);
+ r[ 5] = a[ 5] - (b[ 5] & m);
+ r[ 6] = a[ 6] - (b[ 6] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_384_mul_add_7(sp_digit* r, const sp_digit* a,
+ const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int128_t tb = b;
+ int128_t t = 0;
+ int i;
+
+ for (i = 0; i < 7; i++) {
+ t += (tb * a[i]) + r[i];
+ r[i] = t & 0x7fffffffffffffL;
+ t >>= 55;
+ }
+ r[7] += t;
+#else
+ int128_t tb = b;
+ int128_t t[7];
+
+ t[ 0] = tb * a[ 0];
+ t[ 1] = tb * a[ 1];
+ t[ 2] = tb * a[ 2];
+ t[ 3] = tb * a[ 3];
+ t[ 4] = tb * a[ 4];
+ t[ 5] = tb * a[ 5];
+ t[ 6] = tb * a[ 6];
+ r[ 0] += (sp_digit) (t[ 0] & 0x7fffffffffffffL);
+ r[ 1] += (sp_digit)((t[ 0] >> 55) + (t[ 1] & 0x7fffffffffffffL));
+ r[ 2] += (sp_digit)((t[ 1] >> 55) + (t[ 2] & 0x7fffffffffffffL));
+ r[ 3] += (sp_digit)((t[ 2] >> 55) + (t[ 3] & 0x7fffffffffffffL));
+ r[ 4] += (sp_digit)((t[ 3] >> 55) + (t[ 4] & 0x7fffffffffffffL));
+ r[ 5] += (sp_digit)((t[ 4] >> 55) + (t[ 5] & 0x7fffffffffffffL));
+ r[ 6] += (sp_digit)((t[ 5] >> 55) + (t[ 6] & 0x7fffffffffffffL));
+ r[ 7] += (sp_digit) (t[ 6] >> 55);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Normalize the values in each word to 55.
+ *
+ * a Array of sp_digit to normalize.
+ */
+static void sp_384_norm_7(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ for (i = 0; i < 6; i++) {
+ a[i+1] += a[i] >> 55;
+ a[i] &= 0x7fffffffffffffL;
+ }
+#else
+ a[1] += a[0] >> 55; a[0] &= 0x7fffffffffffffL;
+ a[2] += a[1] >> 55; a[1] &= 0x7fffffffffffffL;
+ a[3] += a[2] >> 55; a[2] &= 0x7fffffffffffffL;
+ a[4] += a[3] >> 55; a[3] &= 0x7fffffffffffffL;
+ a[5] += a[4] >> 55; a[4] &= 0x7fffffffffffffL;
+ a[6] += a[5] >> 55; a[5] &= 0x7fffffffffffffL;
+#endif
+}
+
+/* Shift the result in the high 384 bits down to the bottom.
+ *
+ * r A single precision number.
+ * a A single precision number.
+ */
+static void sp_384_mont_shift_7(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ word64 n;
+
+ n = a[6] >> 54;
+ for (i = 0; i < 6; i++) {
+ n += (word64)a[7 + i] << 1;
+ r[i] = n & 0x7fffffffffffffL;
+ n >>= 55;
+ }
+ n += (word64)a[13] << 1;
+ r[6] = n;
+#else
+ word64 n;
+
+ n = a[6] >> 54;
+ n += (word64)a[ 7] << 1U; r[ 0] = n & 0x7fffffffffffffUL; n >>= 55U;
+ n += (word64)a[ 8] << 1U; r[ 1] = n & 0x7fffffffffffffUL; n >>= 55U;
+ n += (word64)a[ 9] << 1U; r[ 2] = n & 0x7fffffffffffffUL; n >>= 55U;
+ n += (word64)a[10] << 1U; r[ 3] = n & 0x7fffffffffffffUL; n >>= 55U;
+ n += (word64)a[11] << 1U; r[ 4] = n & 0x7fffffffffffffUL; n >>= 55U;
+ n += (word64)a[12] << 1U; r[ 5] = n & 0x7fffffffffffffUL; n >>= 55U;
+ n += (word64)a[13] << 1U; r[ 6] = n;
+#endif /* WOLFSSL_SP_SMALL */
+ XMEMSET(&r[7], 0, sizeof(*r) * 7U);
+}
+
+/* Reduce the number back to 384 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_384_mont_reduce_7(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+ int i;
+ sp_digit mu;
+
+ sp_384_norm_7(a + 7);
+
+ for (i=0; i<6; i++) {
+ mu = (a[i] * mp) & 0x7fffffffffffffL;
+ sp_384_mul_add_7(a+i, m, mu);
+ a[i+1] += a[i] >> 55;
+ }
+ mu = (a[i] * mp) & 0x3fffffffffffffL;
+ sp_384_mul_add_7(a+i, m, mu);
+ a[i+1] += a[i] >> 55;
+ a[i] &= 0x7fffffffffffffL;
+
+ sp_384_mont_shift_7(a, a);
+ sp_384_cond_sub_7(a, a, m, 0 - (((a[6] >> 54) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_7(a);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_mul_7(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_384_mul_7(r, a, b);
+ sp_384_mont_reduce_7(r, m, mp);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_384_sqr_7(sp_digit* r, const sp_digit* a)
+{
+ int i, j, k;
+ int128_t c;
+
+ c = ((int128_t)a[6]) * a[6];
+ r[13] = (sp_digit)(c >> 55);
+ c = (c & 0x7fffffffffffffL) << 55;
+ for (k = 11; k >= 0; k--) {
+ for (i = 6; i >= 0; i--) {
+ j = k - i;
+ if (j >= 7 || i <= j) {
+ break;
+ }
+ if (j < 0) {
+ continue;
+ }
+
+ c += ((int128_t)a[i]) * a[j] * 2;
+ }
+ if (i == j) {
+ c += ((int128_t)a[i]) * a[i];
+ }
+
+ r[k + 2] += c >> 110;
+ r[k + 1] = (c >> 55) & 0x7fffffffffffffL;
+ c = (c & 0x7fffffffffffffL) << 55;
+ }
+ r[0] = (sp_digit)(c >> 55);
+}
+
+#else
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_384_sqr_7(sp_digit* r, const sp_digit* a)
+{
+ int128_t t0 = ((int128_t)a[ 0]) * a[ 0];
+ int128_t t1 = (((int128_t)a[ 0]) * a[ 1]) * 2;
+ int128_t t2 = (((int128_t)a[ 0]) * a[ 2]) * 2
+ + ((int128_t)a[ 1]) * a[ 1];
+ int128_t t3 = (((int128_t)a[ 0]) * a[ 3]
+ + ((int128_t)a[ 1]) * a[ 2]) * 2;
+ int128_t t4 = (((int128_t)a[ 0]) * a[ 4]
+ + ((int128_t)a[ 1]) * a[ 3]) * 2
+ + ((int128_t)a[ 2]) * a[ 2];
+ int128_t t5 = (((int128_t)a[ 0]) * a[ 5]
+ + ((int128_t)a[ 1]) * a[ 4]
+ + ((int128_t)a[ 2]) * a[ 3]) * 2;
+ int128_t t6 = (((int128_t)a[ 0]) * a[ 6]
+ + ((int128_t)a[ 1]) * a[ 5]
+ + ((int128_t)a[ 2]) * a[ 4]) * 2
+ + ((int128_t)a[ 3]) * a[ 3];
+ int128_t t7 = (((int128_t)a[ 1]) * a[ 6]
+ + ((int128_t)a[ 2]) * a[ 5]
+ + ((int128_t)a[ 3]) * a[ 4]) * 2;
+ int128_t t8 = (((int128_t)a[ 2]) * a[ 6]
+ + ((int128_t)a[ 3]) * a[ 5]) * 2
+ + ((int128_t)a[ 4]) * a[ 4];
+ int128_t t9 = (((int128_t)a[ 3]) * a[ 6]
+ + ((int128_t)a[ 4]) * a[ 5]) * 2;
+ int128_t t10 = (((int128_t)a[ 4]) * a[ 6]) * 2
+ + ((int128_t)a[ 5]) * a[ 5];
+ int128_t t11 = (((int128_t)a[ 5]) * a[ 6]) * 2;
+ int128_t t12 = ((int128_t)a[ 6]) * a[ 6];
+
+ t1 += t0 >> 55; r[ 0] = t0 & 0x7fffffffffffffL;
+ t2 += t1 >> 55; r[ 1] = t1 & 0x7fffffffffffffL;
+ t3 += t2 >> 55; r[ 2] = t2 & 0x7fffffffffffffL;
+ t4 += t3 >> 55; r[ 3] = t3 & 0x7fffffffffffffL;
+ t5 += t4 >> 55; r[ 4] = t4 & 0x7fffffffffffffL;
+ t6 += t5 >> 55; r[ 5] = t5 & 0x7fffffffffffffL;
+ t7 += t6 >> 55; r[ 6] = t6 & 0x7fffffffffffffL;
+ t8 += t7 >> 55; r[ 7] = t7 & 0x7fffffffffffffL;
+ t9 += t8 >> 55; r[ 8] = t8 & 0x7fffffffffffffL;
+ t10 += t9 >> 55; r[ 9] = t9 & 0x7fffffffffffffL;
+ t11 += t10 >> 55; r[10] = t10 & 0x7fffffffffffffL;
+ t12 += t11 >> 55; r[11] = t11 & 0x7fffffffffffffL;
+ r[13] = (sp_digit)(t12 >> 55);
+ r[12] = t12 & 0x7fffffffffffffL;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_7(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_384_sqr_7(r, a);
+ sp_384_mont_reduce_7(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * n Number of times to square.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_n_7(sp_digit* r, const sp_digit* a, int n,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_384_mont_sqr_7(r, a, m, mp);
+ for (; n > 1; n--) {
+ sp_384_mont_sqr_7(r, r, m, mp);
+ }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P384 curve. */
+static const uint64_t p384_mod_minus_2[6] = {
+ 0x00000000fffffffdU,0xffffffff00000000U,0xfffffffffffffffeU,
+ 0xffffffffffffffffU,0xffffffffffffffffU,0xffffffffffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P384 curve. (r = 1 / a mod m)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_384_mont_inv_7(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 7);
+ for (i=382; i>=0; i--) {
+ sp_384_mont_sqr_7(t, t, p384_mod, p384_mp_mod);
+ if (p384_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64)))
+ sp_384_mont_mul_7(t, t, a, p384_mod, p384_mp_mod);
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 7);
+#else
+ sp_digit* t1 = td;
+ sp_digit* t2 = td + 2 * 7;
+ sp_digit* t3 = td + 4 * 7;
+ sp_digit* t4 = td + 6 * 7;
+ sp_digit* t5 = td + 8 * 7;
+
+ /* 0x2 */
+ sp_384_mont_sqr_7(t1, a, p384_mod, p384_mp_mod);
+ /* 0x3 */
+ sp_384_mont_mul_7(t5, t1, a, p384_mod, p384_mp_mod);
+ /* 0xc */
+ sp_384_mont_sqr_n_7(t1, t5, 2, p384_mod, p384_mp_mod);
+ /* 0xf */
+ sp_384_mont_mul_7(t2, t5, t1, p384_mod, p384_mp_mod);
+ /* 0x1e */
+ sp_384_mont_sqr_7(t1, t2, p384_mod, p384_mp_mod);
+ /* 0x1f */
+ sp_384_mont_mul_7(t4, t1, a, p384_mod, p384_mp_mod);
+ /* 0x3e0 */
+ sp_384_mont_sqr_n_7(t1, t4, 5, p384_mod, p384_mp_mod);
+ /* 0x3ff */
+ sp_384_mont_mul_7(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0x7fe0 */
+ sp_384_mont_sqr_n_7(t1, t2, 5, p384_mod, p384_mp_mod);
+ /* 0x7fff */
+ sp_384_mont_mul_7(t4, t4, t1, p384_mod, p384_mp_mod);
+ /* 0x3fff8000 */
+ sp_384_mont_sqr_n_7(t1, t4, 15, p384_mod, p384_mp_mod);
+ /* 0x3fffffff */
+ sp_384_mont_mul_7(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffc */
+ sp_384_mont_sqr_n_7(t3, t2, 2, p384_mod, p384_mp_mod);
+ /* 0xfffffffd */
+ sp_384_mont_mul_7(r, t3, a, p384_mod, p384_mp_mod);
+ /* 0xffffffff */
+ sp_384_mont_mul_7(t3, t5, t3, p384_mod, p384_mp_mod);
+ /* 0xfffffffc0000000 */
+ sp_384_mont_sqr_n_7(t1, t2, 30, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffff */
+ sp_384_mont_mul_7(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffff000000000000000 */
+ sp_384_mont_sqr_n_7(t1, t2, 60, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_7(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+ sp_384_mont_sqr_n_7(t1, t2, 120, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_7(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+ sp_384_mont_sqr_n_7(t1, t2, 15, p384_mod, p384_mp_mod);
+ /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_7(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */
+ sp_384_mont_sqr_n_7(t1, t2, 33, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */
+ sp_384_mont_mul_7(t2, t3, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */
+ sp_384_mont_sqr_n_7(t1, t2, 96, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */
+ sp_384_mont_mul_7(r, r, t1, p384_mod, p384_mp_mod);
+
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r Resulting affine coordinate point.
+ * p Montgomery form projective coordinate point.
+ * t Temporary ordinate data.
+ */
+static void sp_384_map_7(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*7;
+ int64_t n;
+
+ sp_384_mont_inv_7(t1, p->z, t + 2*7);
+
+ sp_384_mont_sqr_7(t2, t1, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_7(t1, t2, t1, p384_mod, p384_mp_mod);
+
+ /* x /= z^2 */
+ sp_384_mont_mul_7(r->x, p->x, t2, p384_mod, p384_mp_mod);
+ XMEMSET(r->x + 7, 0, sizeof(r->x) / 2U);
+ sp_384_mont_reduce_7(r->x, p384_mod, p384_mp_mod);
+ /* Reduce x to less than modulus */
+ n = sp_384_cmp_7(r->x, p384_mod);
+ sp_384_cond_sub_7(r->x, r->x, p384_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_7(r->x);
+
+ /* y /= z^3 */
+ sp_384_mont_mul_7(r->y, p->y, t1, p384_mod, p384_mp_mod);
+ XMEMSET(r->y + 7, 0, sizeof(r->y) / 2U);
+ sp_384_mont_reduce_7(r->y, p384_mod, p384_mp_mod);
+ /* Reduce y to less than modulus */
+ n = sp_384_cmp_7(r->y, p384_mod);
+ sp_384_cond_sub_7(r->y, r->y, p384_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_7(r->y);
+
+ XMEMSET(r->z, 0, sizeof(r->z));
+ r->z[0] = 1;
+
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_384_add_7(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 7; i++) {
+ r[i] = a[i] + b[i];
+ }
+
+ return 0;
+}
+#else
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_384_add_7(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ r[ 0] = a[ 0] + b[ 0];
+ r[ 1] = a[ 1] + b[ 1];
+ r[ 2] = a[ 2] + b[ 2];
+ r[ 3] = a[ 3] + b[ 3];
+ r[ 4] = a[ 4] + b[ 4];
+ r[ 5] = a[ 5] + b[ 5];
+ r[ 6] = a[ 6] + b[ 6];
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r Result of addition.
+ * a First number to add in Montogmery form.
+ * b Second number to add in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_384_mont_add_7(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ (void)sp_384_add_7(r, a, b);
+ sp_384_norm_7(r);
+ sp_384_cond_sub_7(r, r, m, 0 - (((r[6] >> 54) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_7(r);
+}
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r Result of doubling.
+ * a Number to double in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_384_mont_dbl_7(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ (void)sp_384_add_7(r, a, a);
+ sp_384_norm_7(r);
+ sp_384_cond_sub_7(r, r, m, 0 - (((r[6] >> 54) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_7(r);
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r Result of Tripling.
+ * a Number to triple in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_384_mont_tpl_7(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ (void)sp_384_add_7(r, a, a);
+ sp_384_norm_7(r);
+ sp_384_cond_sub_7(r, r, m, 0 - (((r[6] >> 54) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_7(r);
+ (void)sp_384_add_7(r, r, a);
+ sp_384_norm_7(r);
+ sp_384_cond_sub_7(r, r, m, 0 - (((r[6] >> 54) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_7(r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_384_sub_7(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ int i;
+
+ for (i = 0; i < 7; i++) {
+ r[i] = a[i] - b[i];
+ }
+
+ return 0;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_384_sub_7(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ r[ 0] = a[ 0] - b[ 0];
+ r[ 1] = a[ 1] - b[ 1];
+ r[ 2] = a[ 2] - b[ 2];
+ r[ 3] = a[ 3] - b[ 3];
+ r[ 4] = a[ 4] - b[ 4];
+ r[ 5] = a[ 5] - b[ 5];
+ r[ 6] = a[ 6] - b[ 6];
+
+ return 0;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static void sp_384_cond_add_7(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 7; i++) {
+ r[i] = a[i] + (b[i] & m);
+ }
+#else
+ r[ 0] = a[ 0] + (b[ 0] & m);
+ r[ 1] = a[ 1] + (b[ 1] & m);
+ r[ 2] = a[ 2] + (b[ 2] & m);
+ r[ 3] = a[ 3] + (b[ 3] & m);
+ r[ 4] = a[ 4] + (b[ 4] & m);
+ r[ 5] = a[ 5] + (b[ 5] & m);
+ r[ 6] = a[ 6] + (b[ 6] & m);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r Result of subtration.
+ * a Number to subtract from in Montogmery form.
+ * b Number to subtract with in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_384_mont_sub_7(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ (void)sp_384_sub_7(r, a, b);
+ sp_384_cond_add_7(r, r, m, r[6] >> 54);
+ sp_384_norm_7(r);
+}
+
+/* Shift number left one bit.
+ * Bottom bit is lost.
+ *
+ * r Result of shift.
+ * a Number to shift.
+ */
+SP_NOINLINE static void sp_384_rshift1_7(sp_digit* r, sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<6; i++) {
+ r[i] = ((a[i] >> 1) | (a[i + 1] << 54)) & 0x7fffffffffffffL;
+ }
+#else
+ r[0] = ((a[0] >> 1) | (a[1] << 54)) & 0x7fffffffffffffL;
+ r[1] = ((a[1] >> 1) | (a[2] << 54)) & 0x7fffffffffffffL;
+ r[2] = ((a[2] >> 1) | (a[3] << 54)) & 0x7fffffffffffffL;
+ r[3] = ((a[3] >> 1) | (a[4] << 54)) & 0x7fffffffffffffL;
+ r[4] = ((a[4] >> 1) | (a[5] << 54)) & 0x7fffffffffffffL;
+ r[5] = ((a[5] >> 1) | (a[6] << 54)) & 0x7fffffffffffffL;
+#endif
+ r[6] = a[6] >> 1;
+}
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r Result of division by 2.
+ * a Number to divide.
+ * m Modulus (prime).
+ */
+static void sp_384_div2_7(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ sp_384_cond_add_7(r, a, m, 0 - (a[0] & 1));
+ sp_384_norm_7(r);
+ sp_384_rshift1_7(r, r);
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r Result of doubling point.
+ * p Point to double.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_7(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*7;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = r->x;
+ y = r->y;
+ z = r->z;
+ /* Put infinity into result. */
+ if (r != p) {
+ r->infinity = p->infinity;
+ }
+
+ /* T1 = Z * Z */
+ sp_384_mont_sqr_7(t1, p->z, p384_mod, p384_mp_mod);
+ /* Z = Y * Z */
+ sp_384_mont_mul_7(z, p->y, p->z, p384_mod, p384_mp_mod);
+ /* Z = 2Z */
+ sp_384_mont_dbl_7(z, z, p384_mod);
+ /* T2 = X - T1 */
+ sp_384_mont_sub_7(t2, p->x, t1, p384_mod);
+ /* T1 = X + T1 */
+ sp_384_mont_add_7(t1, p->x, t1, p384_mod);
+ /* T2 = T1 * T2 */
+ sp_384_mont_mul_7(t2, t1, t2, p384_mod, p384_mp_mod);
+ /* T1 = 3T2 */
+ sp_384_mont_tpl_7(t1, t2, p384_mod);
+ /* Y = 2Y */
+ sp_384_mont_dbl_7(y, p->y, p384_mod);
+ /* Y = Y * Y */
+ sp_384_mont_sqr_7(y, y, p384_mod, p384_mp_mod);
+ /* T2 = Y * Y */
+ sp_384_mont_sqr_7(t2, y, p384_mod, p384_mp_mod);
+ /* T2 = T2/2 */
+ sp_384_div2_7(t2, t2, p384_mod);
+ /* Y = Y * X */
+ sp_384_mont_mul_7(y, y, p->x, p384_mod, p384_mp_mod);
+ /* X = T1 * T1 */
+ sp_384_mont_sqr_7(x, t1, p384_mod, p384_mp_mod);
+ /* X = X - Y */
+ sp_384_mont_sub_7(x, x, y, p384_mod);
+ /* X = X - Y */
+ sp_384_mont_sub_7(x, x, y, p384_mod);
+ /* Y = Y - X */
+ sp_384_mont_sub_7(y, y, x, p384_mod);
+ /* Y = Y * T1 */
+ sp_384_mont_mul_7(y, y, t1, p384_mod, p384_mp_mod);
+ /* Y = Y - T2 */
+ sp_384_mont_sub_7(y, y, t2, p384_mod);
+}
+
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a First number to compare.
+ * b Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_384_cmp_equal_7(const sp_digit* a, const sp_digit* b)
+{
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+ (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_7(sp_point_384* r, const sp_point_384* p, const sp_point_384* q,
+ sp_digit* t)
+{
+ const sp_point_384* ap[2];
+ sp_point_384* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*7;
+ sp_digit* t3 = t + 4*7;
+ sp_digit* t4 = t + 6*7;
+ sp_digit* t5 = t + 8*7;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Ensure only the first point is the same as the result. */
+ if (q == r) {
+ const sp_point_384* a = p;
+ p = q;
+ q = a;
+ }
+
+ /* Check double */
+ (void)sp_384_sub_7(t1, p384_mod, q->y);
+ sp_384_norm_7(t1);
+ if ((sp_384_cmp_equal_7(p->x, q->x) & sp_384_cmp_equal_7(p->z, q->z) &
+ (sp_384_cmp_equal_7(p->y, q->y) | sp_384_cmp_equal_7(p->y, t1))) != 0) {
+ sp_384_proj_point_dbl_7(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_384));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<7; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<7; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<7; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U1 = X1*Z2^2 */
+ sp_384_mont_sqr_7(t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_7(t3, t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_7(t1, t1, x, p384_mod, p384_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_384_mont_sqr_7(t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_7(t4, t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_7(t2, t2, q->x, p384_mod, p384_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_384_mont_mul_7(t3, t3, y, p384_mod, p384_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_384_mont_mul_7(t4, t4, q->y, p384_mod, p384_mp_mod);
+ /* H = U2 - U1 */
+ sp_384_mont_sub_7(t2, t2, t1, p384_mod);
+ /* R = S2 - S1 */
+ sp_384_mont_sub_7(t4, t4, t3, p384_mod);
+ /* Z3 = H*Z1*Z2 */
+ sp_384_mont_mul_7(z, z, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_7(z, z, t2, p384_mod, p384_mp_mod);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ sp_384_mont_sqr_7(x, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_7(t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_7(y, t1, t5, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_7(t5, t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_7(x, x, t5, p384_mod);
+ sp_384_mont_dbl_7(t1, y, p384_mod);
+ sp_384_mont_sub_7(x, x, t1, p384_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ sp_384_mont_sub_7(y, y, x, p384_mod);
+ sp_384_mont_mul_7(y, y, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_7(t5, t5, t3, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_7(y, y, t5, p384_mod);
+ }
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_7(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifdef WOLFSSL_SP_NO_MALLOC
+ sp_point_384 t[3];
+ sp_digit tmp[2 * 7 * 6];
+#else
+ sp_point_384* t;
+ sp_digit* tmp;
+#endif
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ (void)heap;
+
+#ifndef WOLFSSL_SP_NO_MALLOC
+ t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 3, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMSET(t, 0, sizeof(sp_point_384) * 3);
+
+ /* t[0] = {0, 0, 1} * norm */
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ err = sp_384_mod_mul_norm_7(t[1].x, g->x, p384_mod);
+ }
+ if (err == MP_OKAY)
+ err = sp_384_mod_mul_norm_7(t[1].y, g->y, p384_mod);
+ if (err == MP_OKAY)
+ err = sp_384_mod_mul_norm_7(t[1].z, g->z, p384_mod);
+
+ if (err == MP_OKAY) {
+ i = 6;
+ c = 54;
+ n = k[i--] << (55 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1)
+ break;
+
+ n = k[i--];
+ c = 55;
+ }
+
+ y = (n >> 54) & 1;
+ n <<= 1;
+
+ sp_384_proj_point_add_7(&t[y^1], &t[0], &t[1], tmp);
+
+ XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) +
+ ((size_t)&t[1] & addr_mask[y])),
+ sizeof(sp_point_384));
+ sp_384_proj_point_dbl_7(&t[2], &t[2], tmp);
+ XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
+ ((size_t)&t[1] & addr_mask[y])), &t[2],
+ sizeof(sp_point_384));
+ }
+
+ if (map != 0) {
+ sp_384_map_7(r, &t[0], tmp);
+ }
+ else {
+ XMEMCPY(r, &t[0], sizeof(sp_point_384));
+ }
+ }
+
+#ifndef WOLFSSL_SP_NO_MALLOC
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 7 * 6);
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_point_384) * 3);
+ XFREE(t, NULL, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmp, sizeof(tmp));
+ ForceZero(t, sizeof(t));
+#endif
+
+ return err;
+}
+
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_7(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+ int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 t[3];
+ sp_digit tmp[2 * 7 * 6];
+#else
+ sp_point_384* t;
+ sp_digit* tmp;
+#endif
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ (void)heap;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_point*)XMALLOC(sizeof(*t) * 3, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#endif
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ t[1].infinity = 0;
+ err = sp_384_mod_mul_norm_7(t[1].x, g->x, p384_mod);
+ }
+ if (err == MP_OKAY)
+ err = sp_384_mod_mul_norm_7(t[1].y, g->y, p384_mod);
+ if (err == MP_OKAY)
+ err = sp_384_mod_mul_norm_7(t[1].z, g->z, p384_mod);
+
+ if (err == MP_OKAY) {
+ i = 6;
+ c = 54;
+ n = k[i--] << (55 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1)
+ break;
+
+ n = k[i--];
+ c = 55;
+ }
+
+ y = (n >> 54) & 1;
+ n <<= 1;
+
+ sp_384_proj_point_add_7(&t[y^1], &t[0], &t[1], tmp);
+
+ XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) +
+ ((size_t)&t[1] & addr_mask[y])), sizeof(t[2]));
+ sp_384_proj_point_dbl_7(&t[2], &t[2], tmp);
+ XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
+ ((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
+ }
+
+ if (map != 0) {
+ sp_384_map_7(r, &t[0], tmp);
+ }
+ else {
+ XMEMCPY(r, &t[0], sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 7 * 6);
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+ }
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_point_384) * 3);
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmp, sizeof(tmp));
+ ForceZero(t, sizeof(t));
+#endif
+
+ return err;
+}
+
+#else
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_384 {
+ sp_digit x[7];
+ sp_digit y[7];
+} sp_table_entry_384;
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_fast_7(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+ int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 td[16];
+ sp_point_384 rtd;
+ sp_digit tmpd[2 * 7 * 6];
+#endif
+ sp_point_384* t;
+ sp_point_384* rt;
+ sp_digit* tmp;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_7(heap, rtd, rt);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 16, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ t = td;
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ (void)sp_384_mod_mul_norm_7(t[1].x, g->x, p384_mod);
+ (void)sp_384_mod_mul_norm_7(t[1].y, g->y, p384_mod);
+ (void)sp_384_mod_mul_norm_7(t[1].z, g->z, p384_mod);
+ t[1].infinity = 0;
+ sp_384_proj_point_dbl_7(&t[ 2], &t[ 1], tmp);
+ t[ 2].infinity = 0;
+ sp_384_proj_point_add_7(&t[ 3], &t[ 2], &t[ 1], tmp);
+ t[ 3].infinity = 0;
+ sp_384_proj_point_dbl_7(&t[ 4], &t[ 2], tmp);
+ t[ 4].infinity = 0;
+ sp_384_proj_point_add_7(&t[ 5], &t[ 3], &t[ 2], tmp);
+ t[ 5].infinity = 0;
+ sp_384_proj_point_dbl_7(&t[ 6], &t[ 3], tmp);
+ t[ 6].infinity = 0;
+ sp_384_proj_point_add_7(&t[ 7], &t[ 4], &t[ 3], tmp);
+ t[ 7].infinity = 0;
+ sp_384_proj_point_dbl_7(&t[ 8], &t[ 4], tmp);
+ t[ 8].infinity = 0;
+ sp_384_proj_point_add_7(&t[ 9], &t[ 5], &t[ 4], tmp);
+ t[ 9].infinity = 0;
+ sp_384_proj_point_dbl_7(&t[10], &t[ 5], tmp);
+ t[10].infinity = 0;
+ sp_384_proj_point_add_7(&t[11], &t[ 6], &t[ 5], tmp);
+ t[11].infinity = 0;
+ sp_384_proj_point_dbl_7(&t[12], &t[ 6], tmp);
+ t[12].infinity = 0;
+ sp_384_proj_point_add_7(&t[13], &t[ 7], &t[ 6], tmp);
+ t[13].infinity = 0;
+ sp_384_proj_point_dbl_7(&t[14], &t[ 7], tmp);
+ t[14].infinity = 0;
+ sp_384_proj_point_add_7(&t[15], &t[ 8], &t[ 7], tmp);
+ t[15].infinity = 0;
+
+ i = 5;
+ n = k[i+1] << 9;
+ c = 50;
+ y = n >> 59;
+ XMEMCPY(rt, &t[y], sizeof(sp_point_384));
+ n <<= 5;
+ for (; i>=0 || c>=4; ) {
+ if (c < 4) {
+ n |= k[i--] << (9 - c);
+ c += 55;
+ }
+ y = (n >> 60) & 0xf;
+ n <<= 4;
+ c -= 4;
+
+ sp_384_proj_point_dbl_7(rt, rt, tmp);
+ sp_384_proj_point_dbl_7(rt, rt, tmp);
+ sp_384_proj_point_dbl_7(rt, rt, tmp);
+ sp_384_proj_point_dbl_7(rt, rt, tmp);
+
+ sp_384_proj_point_add_7(rt, rt, &t[y], tmp);
+ }
+
+ if (map != 0) {
+ sp_384_map_7(r, rt, tmp);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 7 * 6);
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+ }
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_point_384) * 16);
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmpd, sizeof(tmpd));
+ ForceZero(td, sizeof(td));
+#endif
+ sp_384_point_free_7(rt, 1, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_n_7(sp_point_384* p, int n, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*7;
+ sp_digit* b = t + 4*7;
+ sp_digit* t1 = t + 6*7;
+ sp_digit* t2 = t + 8*7;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = p->x;
+ y = p->y;
+ z = p->z;
+
+ /* Y = 2*Y */
+ sp_384_mont_dbl_7(y, y, p384_mod);
+ /* W = Z^4 */
+ sp_384_mont_sqr_7(w, z, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_7(w, w, p384_mod, p384_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+ while (--n > 0)
+#else
+ while (--n >= 0)
+#endif
+ {
+ /* A = 3*(X^2 - W) */
+ sp_384_mont_sqr_7(t1, x, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_7(t1, t1, w, p384_mod);
+ sp_384_mont_tpl_7(a, t1, p384_mod);
+ /* B = X*Y^2 */
+ sp_384_mont_sqr_7(t1, y, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_7(b, t1, x, p384_mod, p384_mp_mod);
+ /* X = A^2 - 2B */
+ sp_384_mont_sqr_7(x, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_7(t2, b, p384_mod);
+ sp_384_mont_sub_7(x, x, t2, p384_mod);
+ /* Z = Z*Y */
+ sp_384_mont_mul_7(z, z, y, p384_mod, p384_mp_mod);
+ /* t2 = Y^4 */
+ sp_384_mont_sqr_7(t1, t1, p384_mod, p384_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+ if (n != 0)
+#endif
+ {
+ /* W = W*Y^4 */
+ sp_384_mont_mul_7(w, w, t1, p384_mod, p384_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_384_mont_sub_7(y, b, x, p384_mod);
+ sp_384_mont_mul_7(y, y, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_7(y, y, p384_mod);
+ sp_384_mont_sub_7(y, y, t1, p384_mod);
+ }
+#ifndef WOLFSSL_SP_SMALL
+ /* A = 3*(X^2 - W) */
+ sp_384_mont_sqr_7(t1, x, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_7(t1, t1, w, p384_mod);
+ sp_384_mont_tpl_7(a, t1, p384_mod);
+ /* B = X*Y^2 */
+ sp_384_mont_sqr_7(t1, y, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_7(b, t1, x, p384_mod, p384_mp_mod);
+ /* X = A^2 - 2B */
+ sp_384_mont_sqr_7(x, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_7(t2, b, p384_mod);
+ sp_384_mont_sub_7(x, x, t2, p384_mod);
+ /* Z = Z*Y */
+ sp_384_mont_mul_7(z, z, y, p384_mod, p384_mp_mod);
+ /* t2 = Y^4 */
+ sp_384_mont_sqr_7(t1, t1, p384_mod, p384_mp_mod);
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_384_mont_sub_7(y, b, x, p384_mod);
+ sp_384_mont_mul_7(y, y, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_7(y, y, p384_mod);
+ sp_384_mont_sub_7(y, y, t1, p384_mod);
+#endif
+ /* Y = Y/2 */
+ sp_384_div2_7(y, y, p384_mod);
+}
+
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_qz1_7(sp_point_384* r, const sp_point_384* p,
+ const sp_point_384* q, sp_digit* t)
+{
+ const sp_point_384* ap[2];
+ sp_point_384* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*7;
+ sp_digit* t3 = t + 4*7;
+ sp_digit* t4 = t + 6*7;
+ sp_digit* t5 = t + 8*7;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Check double */
+ (void)sp_384_sub_7(t1, p384_mod, q->y);
+ sp_384_norm_7(t1);
+ if ((sp_384_cmp_equal_7(p->x, q->x) & sp_384_cmp_equal_7(p->z, q->z) &
+ (sp_384_cmp_equal_7(p->y, q->y) | sp_384_cmp_equal_7(p->y, t1))) != 0) {
+ sp_384_proj_point_dbl_7(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_384));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<7; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<7; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<7; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U2 = X2*Z1^2 */
+ sp_384_mont_sqr_7(t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_7(t4, t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_7(t2, t2, q->x, p384_mod, p384_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_384_mont_mul_7(t4, t4, q->y, p384_mod, p384_mp_mod);
+ /* H = U2 - X1 */
+ sp_384_mont_sub_7(t2, t2, x, p384_mod);
+ /* R = S2 - Y1 */
+ sp_384_mont_sub_7(t4, t4, y, p384_mod);
+ /* Z3 = H*Z1 */
+ sp_384_mont_mul_7(z, z, t2, p384_mod, p384_mp_mod);
+ /* X3 = R^2 - H^3 - 2*X1*H^2 */
+ sp_384_mont_sqr_7(t1, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_7(t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_7(t3, x, t5, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_7(t5, t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_7(x, t1, t5, p384_mod);
+ sp_384_mont_dbl_7(t1, t3, p384_mod);
+ sp_384_mont_sub_7(x, x, t1, p384_mod);
+ /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+ sp_384_mont_sub_7(t3, t3, x, p384_mod);
+ sp_384_mont_mul_7(t3, t3, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_7(t5, t5, y, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_7(y, t3, t5, p384_mod);
+ }
+}
+
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a Point to convert.
+ * t Temporary data.
+ */
+static void sp_384_proj_to_affine_7(sp_point_384* a, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2 * 7;
+ sp_digit* tmp = t + 4 * 7;
+
+ sp_384_mont_inv_7(t1, a->z, tmp);
+
+ sp_384_mont_sqr_7(t2, t1, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_7(t1, t2, t1, p384_mod, p384_mp_mod);
+
+ sp_384_mont_mul_7(a->x, a->x, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_7(a->y, a->y, t1, p384_mod, p384_mp_mod);
+ XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod));
+}
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_384_gen_stripe_table_7(const sp_point_384* a,
+ sp_table_entry_384* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 td, s1d, s2d;
+#endif
+ sp_point_384* t;
+ sp_point_384* s1 = NULL;
+ sp_point_384* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_7(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_7(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_7(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_7(t->x, a->x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_7(t->y, a->y, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_7(t->z, a->z, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_384_proj_to_affine_7(t, tmp);
+
+ XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<8; i++) {
+ sp_384_proj_point_dbl_n_7(t, 48, tmp);
+ sp_384_proj_to_affine_7(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<8; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_384_proj_point_add_qz1_7(t, s1, s2, tmp);
+ sp_384_proj_to_affine_7(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_384_point_free_7(s2, 0, heap);
+ sp_384_point_free_7(s1, 0, heap);
+ sp_384_point_free_7( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_stripe_7(sp_point_384* r, const sp_point_384* g,
+ const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 rtd;
+ sp_point_384 pd;
+ sp_digit td[2 * 7 * 6];
+#endif
+ sp_point_384* rt;
+ sp_point_384* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_384_point_new_7(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_7(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
+ XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
+
+ y = 0;
+ for (j=0,x=47; j<8; j++,x+=48) {
+ y |= ((k[x / 55] >> (x % 55)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=46; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<8; j++,x+=48) {
+ y |= ((k[x / 55] >> (x % 55)) & 1) << j;
+ }
+
+ sp_384_proj_point_dbl_7(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_384_proj_point_add_qz1_7(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_384_map_7(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_7(p, 0, heap);
+ sp_384_point_free_7(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_384_t {
+ sp_digit x[7];
+ sp_digit y[7];
+ sp_table_entry_384 table[256];
+ uint32_t cnt;
+ int set;
+} sp_cache_384_t;
+
+static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_384_last = -1;
+static THREAD_LS_T int sp_cache_384_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex_384 = 0;
+ static wolfSSL_Mutex sp_cache_384_lock;
+#endif
+
+static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_384_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache_384[i].set = 0;
+ }
+ sp_cache_384_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache_384[i].set)
+ continue;
+
+ if (sp_384_cmp_equal_7(g->x, sp_cache_384[i].x) &
+ sp_384_cmp_equal_7(g->y, sp_cache_384[i].y)) {
+ sp_cache_384[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_384_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache_384[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_384_last) {
+ least = sp_cache_384[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache_384[j].cnt < least) {
+ i = j;
+ least = sp_cache_384[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
+ XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
+ sp_cache_384[i].set = 1;
+ sp_cache_384[i].cnt = 1;
+ }
+
+ *cache = &sp_cache_384[i];
+ sp_cache_384_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_7(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_384_ecc_mulmod_fast_7(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 7 * 7];
+ sp_cache_384_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_384 == 0) {
+ wc_InitMutex(&sp_cache_384_lock);
+ initCacheMutex_384 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_384_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_384(g, &cache);
+ if (cache->cnt == 2)
+ sp_384_gen_stripe_table_7(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_384_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_384_ecc_mulmod_fast_7(r, g, k, map, heap);
+ }
+ else {
+ err = sp_384_ecc_mulmod_stripe_7(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#endif
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * p Point to multiply.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+ void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[7];
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_384_point_new_7(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 7, km);
+ sp_384_point_from_ecc_point_7(point, gm);
+
+ err = sp_384_ecc_mulmod_7(point, point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_7(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_7(point, 0, heap);
+
+ return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_7(sp_point_384* r, const sp_digit* k,
+ int map, void* heap)
+{
+ /* No pre-computed values. */
+ return sp_384_ecc_mulmod_7(r, &p384_base, k, map, heap);
+}
+
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_7(sp_point_384* r, const sp_digit* k,
+ int map, void* heap)
+{
+ /* No pre-computed values. */
+ return sp_384_ecc_mulmod_7(r, &p384_base, k, map, heap);
+}
+
+#else
+static const sp_table_entry_384 p384_table[256] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x50756649c0b528L,0x71c541ad9c707bL,0x71506d35b8838dL,
+ 0x4d1877fc3ce1d7L,0x6de2b645486845L,0x227025fee46c29L,
+ 0x134eab708a6785L },
+ { 0x043dad4b03a4feL,0x517ef769535846L,0x58ba0ec14286feL,
+ 0x47a7fecc5d6f3aL,0x1a840c6c352196L,0x3d3bb00044c72dL,
+ 0x0ade2af0968571L } },
+ /* 2 */
+ { { 0x0647532b0c535bL,0x52a6e0a0c52c53L,0x5085aae6b24375L,
+ 0x7096bb501c66b5L,0x47bdb3df9b7b7bL,0x11227e9b2f0be6L,
+ 0x088b172704fa51L },
+ { 0x0e796f2680dc64L,0x796eb06a482ebfL,0x2b441d02e04839L,
+ 0x19bef7312a5aecL,0x02247c38b8efb5L,0x099ed1185c329eL,
+ 0x1ed71d7cdb096fL } },
+ /* 3 */
+ { { 0x6a3cc39edffea5L,0x7a386fafd3f9c4L,0x366f78fbd8d6efL,
+ 0x529c7ad7873b80L,0x79eb30380eb471L,0x07c5d3b51760b7L,
+ 0x36ee4f1cc69183L },
+ { 0x5ba260f526b605L,0x2f1dfaf0aa6e6fL,0x6bb5ca812a5752L,
+ 0x3002d8d1276bc9L,0x01f82269483777L,0x1df33eaaf733cdL,
+ 0x2b97e555f59255L } },
+ /* 4 */
+ { { 0x480c57f26feef9L,0x4d28741c248048L,0x0c9cf8af1f0c68L,
+ 0x778f6a639a8016L,0x148e88c42e9c53L,0x464051757ecfe9L,
+ 0x1a940bd0e2a5e1L },
+ { 0x713a46b74536feL,0x1757b153e1d7ebL,0x30dc8c9da07486L,
+ 0x3b7460c1879b5eL,0x4b766c5317b315L,0x1b9de3aaf4d377L,
+ 0x245f124c2cf8f5L } },
+ /* 5 */
+ { { 0x426e2ee349ddd0L,0x7df3365f84a022L,0x03b005d29a7c45L,
+ 0x422c2337f9b5a4L,0x060494f4bde761L,0x5245e5db6da0b0L,
+ 0x22b71d744677f2L },
+ { 0x19d097b7d5a7ceL,0x6bcb468823d34cL,0x1c3692d3be1d09L,
+ 0x3c80ec7aa01f02L,0x7170f2ebaafd97L,0x06cbcc7d79d4e8L,
+ 0x04a8da511fe760L } },
+ /* 6 */
+ { { 0x79c07a4fc52870L,0x6e9034a752c251L,0x603860a367382cL,
+ 0x56d912d6aa87d0L,0x0a348a24abaf76L,0x6c5a23da14adcbL,
+ 0x3cf60479a522b2L },
+ { 0x18dd774c61ed22L,0x0ff30168f93b0cL,0x3f79ae15642eddL,
+ 0x40510f4915fbcbL,0x2c9ddfdfd1c6d6L,0x67b81b62aee55eL,
+ 0x2824de79b07a43L } },
+ /* 7 */
+ { { 0x6c66efe085c629L,0x48c212b7913470L,0x4480fd2d057f0aL,
+ 0x725ec7a89a9eb1L,0x78ce97ca1972b7L,0x54760ee70154fbL,
+ 0x362a40e27b9f93L },
+ { 0x474dc7e7b14461L,0x602819389ef037L,0x1a13bc284370b2L,
+ 0x0193ff1295a59dL,0x79615bde6ea5d2L,0x2e76e3d886acc1L,
+ 0x3bb796812e2b60L } },
+ /* 8 */
+ { { 0x04cbb3893b9a2dL,0x4c16010a18baabL,0x19f7cb50f60831L,
+ 0x084f400a0936c1L,0x72f1cdd5bbbf00L,0x1b30b725dc6702L,
+ 0x182753e4fcc50cL },
+ { 0x059a07eadaf9d6L,0x26d81e24bf603cL,0x45583c839dc399L,
+ 0x5579d4d6b1103aL,0x2e14ea59489ae7L,0x492f6e1c5ecc97L,
+ 0x03740dc05db420L } },
+ /* 9 */
+ { { 0x413be88510521fL,0x3753ee49982e99L,0x6cd4f7098e1cc5L,
+ 0x613c92bda4ec1dL,0x495378b677efe0L,0x132a2143839927L,
+ 0x0cf8c336291c0bL },
+ { 0x7fc89d2208353fL,0x751b9da85657e1L,0x349b8a97d405c3L,
+ 0x65a964b048428fL,0x1adf481276455eL,0x5560c8d89c2ffcL,
+ 0x144fc11fac21a3L } },
+ /* 10 */
+ { { 0x7611f4df5bdf53L,0x634eb16234db80L,0x3c713b8e51174cL,
+ 0x52c3c68ac4b2edL,0x53025ba8bebe75L,0x7175d98143105bL,
+ 0x33ca8e266a48faL },
+ { 0x0c9281d24fd048L,0x76b3177604bbf3L,0x3b26ae754e106fL,
+ 0x7f782275c6efc6L,0x36662538a4cb67L,0x0ca1255843e464L,
+ 0x2a4674e142d9bcL } },
+ /* 11 */
+ { { 0x303b4085d480d8L,0x68f23650f4fa7bL,0x552a3ceeba3367L,
+ 0x6da0c4947926e3L,0x6e0f5482eb8003L,0x0de717f3d6738aL,
+ 0x22e5dcc826a477L },
+ { 0x1b05b27209cfc2L,0x7f0a0b65b6e146L,0x63586549ed3126L,
+ 0x7d628dd2b23124L,0x383423fe510391L,0x57ff609eabd569L,
+ 0x301f04370131baL } },
+ /* 12 */
+ { { 0x22fe4cdb32f048L,0x7f228ebdadbf5aL,0x02a99adb2d7c8eL,
+ 0x01a02e05286706L,0x62d6adf627a89fL,0x49c6ce906fbf2bL,
+ 0x0207256dae90b9L },
+ { 0x23e036e71d6cebL,0x199ed8d604e3d7L,0x0c1a11c076d16fL,
+ 0x389291fb3da3f3L,0x47adc60f8f942eL,0x177048468e4b9aL,
+ 0x20c09f5e61d927L } },
+ /* 13 */
+ { { 0x129ea63615b0b8L,0x03fb4a9b588367L,0x5ad6da8da2d051L,
+ 0x33f782f44caeaaL,0x5a27fa80d45291L,0x6d1ed796942da4L,
+ 0x08435a931ef556L },
+ { 0x004abb25351130L,0x6d33207c6fd7e7L,0x702130972074b7L,
+ 0x0e34748af900f7L,0x762a531a28c87aL,0x3a903b5a4a6ac7L,
+ 0x1775b79c35b105L } },
+ /* 14 */
+ { { 0x7470fd846612ceL,0x7dd9b431b32e53L,0x04bcd2be1a61bcL,
+ 0x36ed7c5b5c260bL,0x6795f5ef0a4084L,0x46e2880b401c93L,
+ 0x17d246c5aa8bdeL },
+ { 0x707ae4db41b38dL,0x233c31f7f9558fL,0x585110ec67bdf4L,
+ 0x4d0cc931d0c703L,0x26fbe4356841a7L,0x64323e95239c44L,
+ 0x371dc9230f3221L } },
+ /* 15 */
+ { { 0x70ff1ae4b1ec9dL,0x7c1dcfddee0daaL,0x53286782188748L,
+ 0x6a5d9381e6f207L,0x3aa6c7d6523c4cL,0x6c02d83e0d97e2L,
+ 0x16a9c916b45312L },
+ { 0x78146744b74de8L,0x742ec415269c6fL,0x237a2c6a860e79L,
+ 0x186baf17ba68a7L,0x4261e8789fa51fL,0x3dc136480a5903L,
+ 0x1953899e0cf159L } },
+ /* 16 */
+ { { 0x0205de2f9fbe67L,0x1706fee51c886fL,0x31a0b803c712bfL,
+ 0x0a6aa11ede7603L,0x2463ef2a145c31L,0x615403b30e8f4aL,
+ 0x3f024d6c5f5c5eL },
+ { 0x53bc4fd4d01f95L,0x7d512ac15a692cL,0x72be38fcfe6aa0L,
+ 0x437f0b77bbca1eL,0x7fdcf70774a10eL,0x392d6c5cde37f3L,
+ 0x229cbce79621d1L } },
+ /* 17 */
+ { { 0x2de4da2341c342L,0x5ca9d4e08844e7L,0x60dd073bcf74c9L,
+ 0x4f30aa499b63ecL,0x23efd1eafa00d5L,0x7c99a7db1257b3L,
+ 0x00febc9b3171b1L },
+ { 0x7e2fcf3045f8acL,0x2a642e9e3ce610L,0x23f82be69c5299L,
+ 0x66e49ad967c279L,0x1c895ddfd7a842L,0x798981e22f6d25L,
+ 0x0d595cb59322f3L } },
+ /* 18 */
+ { { 0x4bac017d8c1bbaL,0x73872161e7aafdL,0x0fd865f43d8163L,
+ 0x019d89457708b7L,0x1b983c4dd70684L,0x095e109b74d841L,
+ 0x25f1f0b3e0c76fL },
+ { 0x4e61ddf96010e8L,0x1c40a53f542e5eL,0x01a74dfc8365f9L,
+ 0x69b36b92773333L,0x08e0fccc139ed3L,0x266d216ddc4269L,
+ 0x1f2b47717ce9b5L } },
+ /* 19 */
+ { { 0x0a9a81da57a41fL,0x0825d800736cccL,0x2d7876b4579d28L,
+ 0x3340ea6211a1e3L,0x49e89284f3ff54L,0x6276a210fe2c6eL,
+ 0x01c3c8f31be7cbL },
+ { 0x2211da5d186e14L,0x1e6ffbb61bfea8L,0x536c7d060211d2L,
+ 0x320168720d1d55L,0x5835525ed667baL,0x5125e52495205eL,
+ 0x16113b9f3e9129L } },
+ /* 20 */
+ { { 0x3086073f3b236fL,0x283b03c443b5f5L,0x78e49ed0a067a7L,
+ 0x2a878fb79fb2b8L,0x662f04348a9337L,0x57ee2cf732d50bL,
+ 0x18b50dd65fd514L },
+ { 0x5feb9ef2955926L,0x2c3edbef06a7b0L,0x32728dad651029L,
+ 0x116d00b1c4b347L,0x13254052bf1a1aL,0x3e77bf7fee5ec1L,
+ 0x253943ca388882L } },
+ /* 21 */
+ { { 0x32e5b33062e8afL,0x46ebd147a6d321L,0x2c8076dec6a15cL,
+ 0x7328d511ff0d80L,0x10ad7e926def0eL,0x4e8ca85937d736L,
+ 0x02638c26e8bf2fL },
+ { 0x1deeb3fff1d63fL,0x5014417fa6e8efL,0x6e1da3de5c8f43L,
+ 0x7ca942b42295d9L,0x23faacf75bb4d1L,0x4a71fcd680053dL,
+ 0x04af4f90204dceL } },
+ /* 22 */
+ { { 0x23780d104cbba5L,0x4e8ff46bba9980L,0x2072a6da8d881fL,
+ 0x3cc3d881ae11c9L,0x2eee84ff19be89L,0x69b708ed77f004L,
+ 0x2a82928534eef9L },
+ { 0x794331187d4543L,0x70e0f3edc0cc41L,0x3ab1fa0b84c854L,
+ 0x1478355c1d87baL,0x6f35fa7748ba28L,0x37b8be0531584dL,
+ 0x03c3141c23a69fL } },
+ /* 23 */
+ { { 0x5c244cdef029ddL,0x0d0f0a0cc37018L,0x17f8476604f6afL,
+ 0x13a6dd6ccc95c3L,0x5a242e9801b8f6L,0x211ca9cc632131L,
+ 0x264a6a46a4694fL },
+ { 0x3ffd7235285887L,0x284be28302046fL,0x57f4b9b882f1d6L,
+ 0x5e21772c940661L,0x7619a735c600cfL,0x2f76f5a50c9106L,
+ 0x28d89c8c69de31L } },
+ /* 24 */
+ { { 0x799b5c91361ed8L,0x36ead8c66cd95cL,0x046c9969a91f5cL,
+ 0x46bbdba2a66ea9L,0x29db0e0215a599L,0x26c8849b36f756L,
+ 0x22c3feb31ff679L },
+ { 0x585d1237b5d9efL,0x5ac57f522e8e8dL,0x617e66e8b56c41L,
+ 0x68826f276823cfL,0x0983f0e6f39231L,0x4e1075099084bdL,
+ 0x2a541f82be0416L } },
+ /* 25 */
+ { { 0x468a6e14cf381cL,0x4f7b845c6399edL,0x36aa29732ebe74L,
+ 0x19c726911ab46aL,0x2ad1fe431eec0eL,0x301e35051fd1eaL,
+ 0x36da815e7a1ab3L },
+ { 0x05672e4507832aL,0x4ebf10fca51251L,0x6015843421cff0L,
+ 0x3affad832fc013L,0x712b58d9b45540L,0x1e4751d1f6213eL,
+ 0x0e7c2b218bafa7L } },
+ /* 26 */
+ { { 0x7abf784c52edf5L,0x6fcb4b135ca7b1L,0x435e46ac5f735cL,
+ 0x67f8364ca48c5fL,0x46d45b5fbd956bL,0x10deda6065db94L,
+ 0x0b37fdf85068f9L },
+ { 0x74b3ba61f47ec8L,0x42c7ddf08c10ccL,0x1531a1fe422a20L,
+ 0x366f913d12be38L,0x6a846e30cb2edfL,0x2785898c994fedL,
+ 0x061be85f331af3L } },
+ /* 27 */
+ { { 0x23f5361dfcb91eL,0x3c26c8da6b1491L,0x6e444a1e620d65L,
+ 0x0c3babd5e8ac13L,0x573723ce612b82L,0x2d10e62a142c37L,
+ 0x3d1a114c2d98bdL },
+ { 0x33950b401896f6L,0x7134efe7c12110L,0x31239fd2978472L,
+ 0x30333bf5978965L,0x79f93313dd769fL,0x457fb9e11662caL,
+ 0x190a73b251ae3cL } },
+ /* 28 */
+ { { 0x04dd54bb75f9a4L,0x0d7253a76ae093L,0x08f5b930792bbcL,
+ 0x041f79adafc265L,0x4a9ff24c61c11bL,0x0019c94e724725L,
+ 0x21975945d9cc2aL },
+ { 0x3dfe76722b4a2bL,0x17f2f6107c1d94L,0x546e1ae2944b01L,
+ 0x53f1f06401e72dL,0x2dbe43fc7632d6L,0x5639132e185903L,
+ 0x0f2f34eb448385L } },
+ /* 29 */
+ { { 0x7b4cc7ec30ce93L,0x58fb6e4e4145f7L,0x5d1ed5540043b5L,
+ 0x19ffbe1f633adfL,0x5bfc0907259033L,0x6378f872e7ca0eL,
+ 0x2c127b2c01eb3cL },
+ { 0x076eaf4f58839cL,0x2db54560bc9f68L,0x42ad0319b84062L,
+ 0x46c325d1fb019dL,0x76d2a19ee9eebcL,0x6fbd6d9e2aa8f7L,
+ 0x2396a598fe0991L } },
+ /* 30 */
+ { { 0x662fddf7fbd5e1L,0x7ca8ed22563ad3L,0x5b4768efece3b3L,
+ 0x643786a422d1eaL,0x36ce80494950e1L,0x1a30795b7f2778L,
+ 0x107f395c93f332L },
+ { 0x7939c28332c144L,0x491610e3c8dc0bL,0x099ba2bfdac5fcL,
+ 0x5c2e3149ec29a7L,0x31b731d06f1dc3L,0x1cbb60d465d462L,
+ 0x3ca5461362cfd9L } },
+ /* 31 */
+ { { 0x653ff736ddc103L,0x7c6f2bdec0dfb2L,0x73f81b73a097d0L,
+ 0x05b775f84f180fL,0x56b2085af23413L,0x0d6f36256a61feL,
+ 0x26d3ed267fa68fL },
+ { 0x54f89251d27ac2L,0x4fc6ad94a71202L,0x7ebf01969b4cc5L,
+ 0x7ba364dbc14760L,0x4f8370959a2587L,0x7b7631e37c6188L,
+ 0x29e51845f104cbL } },
+ /* 32 */
+ { { 0x426b775e3c647bL,0x327319e0a69180L,0x0c5cb034f6ff2fL,
+ 0x73aa39b98e9897L,0x7ee615f49fde6eL,0x3f712aa61e0db4L,
+ 0x33ca06c2ba2ce9L },
+ { 0x14973541b8a543L,0x4b4e6101ba61faL,0x1d94e4233d0698L,
+ 0x501513c715d570L,0x1b8f8c3d01436bL,0x52f41a0445cf64L,
+ 0x3f709c3a75fb04L } },
+ /* 33 */
+ { { 0x073c0cbc7f41d6L,0x227c36f5ac8201L,0x508e110fef65d8L,
+ 0x0f317229529b7fL,0x45fc6030d00e24L,0x118a65d30cebeaL,
+ 0x3340cc4223a448L },
+ { 0x204c999797612cL,0x7c05dd4ce9c5a3L,0x7b865d0a8750e4L,
+ 0x2f82c876ab7d34L,0x2243ddd2ab4808L,0x6834b9df8a4914L,
+ 0x123319ed950e0fL } },
+ /* 34 */
+ { { 0x50430efc14ab48L,0x7e9e4ce0d4e89cL,0x2332207fd8656dL,
+ 0x4a2809e97f4511L,0x2162bb1b968e2dL,0x29526d54af2972L,
+ 0x13edd9adcd939dL },
+ { 0x793bca31e1ff7fL,0x6b959c9e4d2227L,0x628ac27809a5baL,
+ 0x2c71ffc7fbaa5fL,0x0c0b058f13c9ceL,0x5676eae68de2cfL,
+ 0x35508036ea19a4L } },
+ /* 35 */
+ { { 0x030bbd6dda1265L,0x67f9d12e31bb34L,0x7e4d8196e3ded3L,
+ 0x7b9120e5352498L,0x75857bce72d875L,0x4ead976a396caeL,
+ 0x31c5860553a64dL },
+ { 0x1a0f792ee32189L,0x564c4efb8165d0L,0x7adc7d1a7fbcbeL,
+ 0x7ed7c2ccf327b7L,0x35df1b448ce33dL,0x6f67eb838997cdL,
+ 0x3ee37ec0077917L } },
+ /* 36 */
+ { { 0x345fa74d5bb921L,0x097c9a56ccfd8eL,0x00a0b5e8f971f8L,
+ 0x723d95223f69d4L,0x08e2e5c2777f87L,0x68b13676200109L,
+ 0x26ab5df0acbad6L },
+ { 0x01bca7daac34aeL,0x49ca4d5f664dadL,0x110687b850914bL,
+ 0x1203d6f06443c9L,0x7a2ac743b04d4cL,0x40d96bd3337f82L,
+ 0x13728be0929c06L } },
+ /* 37 */
+ { { 0x631ca61127bc1aL,0x2b362fd5a77cd1L,0x17897d68568fb7L,
+ 0x21070af33db5b2L,0x6872e76221794aL,0x436f29fb076963L,
+ 0x1f2acfc0ecb7b3L },
+ { 0x19bf15ca9b3586L,0x32489a4a17aee2L,0x2b31af3c929551L,
+ 0x0db7c420b9b19fL,0x538c39bd308c2bL,0x438775c0dea88fL,
+ 0x1537304d7cd07fL } },
+ /* 38 */
+ { { 0x53598d943caf0dL,0x1d5244bfe266adL,0x7158feb7ab3811L,
+ 0x1f46e13cf6fb53L,0x0dcab632eb9447L,0x46302968cfc632L,
+ 0x0b53d3cc5b6ec7L },
+ { 0x69811ca143b7caL,0x5865bcf9f2a11aL,0x74ded7fa093b06L,
+ 0x1c878ec911d5afL,0x04610e82616e49L,0x1e157fe9640eb0L,
+ 0x046e6f8561d6c2L } },
+ /* 39 */
+ { { 0x631a3d3bbe682cL,0x3a4ce9dde5ba95L,0x28f11f7502f1f1L,
+ 0x0a55cf0c957e88L,0x495e4ec7e0a3bcL,0x30ad4d87ba365cL,
+ 0x0217b97a4c26f3L },
+ { 0x01a9088c2e67fdL,0x7501c4c3d5e5e7L,0x265b7bb854c820L,
+ 0x729263c87e6b52L,0x308b9e3b8fb035L,0x33f1b86c1b23abL,
+ 0x0e81b8b21fc99cL } },
+ /* 40 */
+ { { 0x59f5a87237cac0L,0x6b3a86b0cf28b9L,0x13a53db13a4fc2L,
+ 0x313c169a1c253bL,0x060158304ed2bbL,0x21e171b71679bcL,
+ 0x10cdb754d76f86L },
+ { 0x44355392ab473aL,0x64eb7cbda08caeL,0x3086426a900c71L,
+ 0x49016ed9f3c33cL,0x7e6354ab7e04f9L,0x17c4c91a40cd2eL,
+ 0x3509f461024c66L } },
+ /* 41 */
+ { { 0x2848f50f9b5a31L,0x68d1755b6c5504L,0x48cd5d5672ec00L,
+ 0x4d77421919d023L,0x1e1e349ef68807L,0x4ab5130cf415d7L,
+ 0x305464c6c7dbe6L },
+ { 0x64eb0bad74251eL,0x64c6957e52bda4L,0x6c12583440dee6L,
+ 0x6d3bee05b00490L,0x186970de53dbc4L,0x3be03b37567a56L,
+ 0x2b553b1ebdc55bL } },
+ /* 42 */
+ { { 0x74dc3579efdc58L,0x26d29fed1bb71cL,0x334c825a9515afL,
+ 0x433c1e839273a6L,0x0d8a4e41cff423L,0x3454098fe42f8eL,
+ 0x1046674bf98686L },
+ { 0x09a3e029c05dd2L,0x54d7cfc7fb53a7L,0x35f0ad37e14d7cL,
+ 0x73a294a13767b9L,0x3f519678275f4fL,0x788c63393993a4L,
+ 0x0781680b620123L } },
+ /* 43 */
+ { { 0x4c8e2ed4d5ffe8L,0x112db7d42fe4ebL,0x433b8f2d2be2edL,
+ 0x23e30b29a82cbcL,0x35d2f4c06ee85aL,0x78ff31ffe4b252L,
+ 0x0d31295c8cbff5L },
+ { 0x314806ea0376a2L,0x4ea09e22bc0589L,0x0879575f00ba97L,
+ 0x188226d2996bb7L,0x7799368dc9411fL,0x7ab24e5c8cae36L,
+ 0x2b6a8e2ee4ea33L } },
+ /* 44 */
+ { { 0x70c7127d4ed72aL,0x24c9743ef34697L,0x2fd30e7a93683aL,
+ 0x538a89c246012cL,0x6c660a5394ed82L,0x79a95ea239d7e0L,
+ 0x3f3af3bbfb170dL },
+ { 0x3b75aa779ae8c1L,0x33995a3cc0dde4L,0x7489d5720b7bfdL,
+ 0x599677ef9fa937L,0x3defd64c5ab44bL,0x27d52dc234522bL,
+ 0x2ac65d1a8450e0L } },
+ /* 45 */
+ { { 0x478585ec837d7dL,0x5f7971dc174887L,0x67576ed7bb296dL,
+ 0x5a78e529a74926L,0x640f73f4fa104bL,0x7d42a8b16e4730L,
+ 0x108c7eaa75fd01L },
+ { 0x60661ef96e6896L,0x18d3a0761f3aa7L,0x6e71e163455539L,
+ 0x165827d6a7e583L,0x4e7f77e9527935L,0x790bebe2ae912eL,
+ 0x0b8fe9561adb55L } },
+ /* 46 */
+ { { 0x4d48036a9951a8L,0x371084f255a085L,0x66aeca69cea2c5L,
+ 0x04c99f40c745e7L,0x08dc4bfd9a0924L,0x0b0ec146b29df7L,
+ 0x05106218d01c91L },
+ { 0x2a56ee99caedc7L,0x5d9b23a203922cL,0x1ce4c80b6a3ec4L,
+ 0x2666bcb75338cbL,0x185a81aac8c4aaL,0x2b4fb60a06c39eL,
+ 0x0327e1b3633f42L } },
+ /* 47 */
+ { { 0x72814710b2a556L,0x52c864f6e16534L,0x4978de66ddd9f2L,
+ 0x151f5950276cf0L,0x450ac6781d2dc2L,0x114b7a22dd61b2L,
+ 0x3b32b07f29faf8L },
+ { 0x68444fdc2d6e94L,0x68526bd9e437bcL,0x0ca780e8b0d887L,
+ 0x69f3f850a716aaL,0x500b953e42cd57L,0x4e57744d812e7dL,
+ 0x000a5f0e715f48L } },
+ /* 48 */
+ { { 0x2aab10b8243a7dL,0x727d1f4b18b675L,0x0e6b9fdd91bbbbL,
+ 0x0d58269fc337e5L,0x45d6664105a266L,0x11946af1b14072L,
+ 0x2c2334f91e46e1L },
+ { 0x6dc5f8756d2411L,0x21b34eaa25188bL,0x0d2797da83529eL,
+ 0x324df55616784bL,0x7039ec66d267dfL,0x2de79cdb2d108cL,
+ 0x14011b1ad0bde0L } },
+ /* 49 */
+ { { 0x2e160266425043L,0x55fbe11b712125L,0x7e3c58b3947fd9L,
+ 0x67aacc79c37ad3L,0x4a18e18d2dea0fL,0x5eef06e5674351L,
+ 0x37c3483ae33439L },
+ { 0x5d5e1d75bb4045L,0x0f9d72db296efdL,0x60b1899dd894a9L,
+ 0x06e8818ded949aL,0x747fd853c39434L,0x0953b937d9efabL,
+ 0x09f08c0beeb901L } },
+ /* 50 */
+ { { 0x1d208a8f2d49ceL,0x54042c5be1445aL,0x1c2681fd943646L,
+ 0x219c8094e2e674L,0x442cddf07238b8L,0x574a051c590832L,
+ 0x0b72f4d61c818aL },
+ { 0x7bc3cbe4680967L,0x0c8b3f25ae596bL,0x0445b0da74a9efL,
+ 0x0bbf46c40363b7L,0x1df575c50677a3L,0x016ea6e73d68adL,
+ 0x0b5207bd8db0fdL } },
+ /* 51 */
+ { { 0x2d39fdfea1103eL,0x2b252bf0362e34L,0x63d66c992baab9L,
+ 0x5ac97706de8550L,0x0cca390c39c1acL,0x0d9bec5f01b2eaL,
+ 0x369360a0f7e5f3L },
+ { 0x6dd3461e201067L,0x70b2d3f63ed614L,0x487580487c54c7L,
+ 0x6020e48a44af2aL,0x1ccf80b21aab04L,0x3cf3b12d88d798L,
+ 0x349368eccc506fL } },
+ /* 52 */
+ { { 0x5a053753b0a354L,0x65e818dbb9b0aeL,0x7d5855ee50e4bfL,
+ 0x58dc06885c7467L,0x5ee15073e57bd3L,0x63254ebc1e07fdL,
+ 0x1d48e0392aa39bL },
+ { 0x4e227c6558ffe9L,0x0c3033d8a82a3eL,0x7bde65c214e8d2L,
+ 0x6e23561559c16aL,0x5094c5e6deaffdL,0x78dca2880f1f91L,
+ 0x3d9d3f947d838dL } },
+ /* 53 */
+ { { 0x387ae5af63408fL,0x6d539aeb4e6edfL,0x7f3d3186368e70L,
+ 0x01a6446bc19989L,0x35288fbcd4482fL,0x39288d34ec2736L,
+ 0x1de9c47159ad76L },
+ { 0x695dc7944f8d65L,0x3eca2c35575094L,0x0c918059a79b69L,
+ 0x4573a48c32a74eL,0x580d8bc8b93f52L,0x190be3a3d071eaL,
+ 0x2333e686b3a8cbL } },
+ /* 54 */
+ { { 0x2b110c7196fee2L,0x3ac70e99128a51L,0x20a6bb6b75d5e6L,
+ 0x5f447fa513149aL,0x560d69714cc7b2L,0x1d3ee25279fab1L,
+ 0x369adb2ccca959L },
+ { 0x3fddb13dd821c2L,0x70bf21ba647be8L,0x64121227e3cbc9L,
+ 0x12633a4c892320L,0x3c15c61660f26dL,0x1932c3b3d19900L,
+ 0x18c718563eab71L } },
+ /* 55 */
+ { { 0x72ebe0fd752366L,0x681c2737d11759L,0x143c805e7ae4f0L,
+ 0x78ed3c2cc7b324L,0x5c16e14820254fL,0x226a4f1c4ec9f0L,
+ 0x2891bb915eaac6L },
+ { 0x061eb453763b33L,0x07f88b81781a87L,0x72b5ac7a87127cL,
+ 0x7ea4e4cd7ff8b5L,0x5e8c3ce33908b6L,0x0bcb8a3d37feffL,
+ 0x01da9e8e7fc50bL } },
+ /* 56 */
+ { { 0x639dfe9e338d10L,0x32dfe856823608L,0x46a1d73bca3b9aL,
+ 0x2da685d4b0230eL,0x6e0bc1057b6d69L,0x7144ec724a5520L,
+ 0x0b067c26b87083L },
+ { 0x0fc3f0eef4c43dL,0x63500f509552b7L,0x220d74af6f8b86L,
+ 0x038996eafa2aa9L,0x7f6750f4aee4d2L,0x3e1d3f06718720L,
+ 0x1ea1d37243814cL } },
+ /* 57 */
+ { { 0x322d4597c27050L,0x1beeb3ce17f109L,0x15e5ce2e6ef42eL,
+ 0x6c8be27da6b3a0L,0x66e3347f4d5f5cL,0x7172133899c279L,
+ 0x250aff4e548743L },
+ { 0x28f0f6a43b566dL,0x0cd2437fefbca0L,0x5b1108cb36bdbaL,
+ 0x48a834d41fb7c2L,0x6cb8565680579fL,0x42da2412b45d9fL,
+ 0x33dfc1abb6c06eL } },
+ /* 58 */
+ { { 0x56e3c48ef96c80L,0x65667bb6c1381eL,0x09f70514375487L,
+ 0x1548ff115f4a08L,0x237de2d21a0710L,0x1425cdee9f43dfL,
+ 0x26a6a42e055b0aL },
+ { 0x4ea9ea9dc7dfcbL,0x4df858583ac58aL,0x1d274f819f1d39L,
+ 0x26e9c56cf91fcbL,0x6cee31c7c3a465L,0x0bb8e00b108b28L,
+ 0x226158da117301L } },
+ /* 59 */
+ { { 0x5a7cd4fce73946L,0x7b6a462d0ac653L,0x732ea4bb1a3da5L,
+ 0x7c8e9f54711af4L,0x0a6cd55d4655f9L,0x341e6d13e4754aL,
+ 0x373c87098879a8L },
+ { 0x7bc82e61b818bfL,0x5f2db48f44879fL,0x2a2f06833f1d28L,
+ 0x494e5b691a74c0L,0x17d6cf35fd6b57L,0x5f7028d1c25dfcL,
+ 0x377a9ab9562cb6L } },
+ /* 60 */
+ { { 0x4de8877e787b2eL,0x183e7352621a52L,0x2ab0509974962bL,
+ 0x045a450496cb8aL,0x3bf7118b5591c7L,0x7724f98d761c35L,
+ 0x301607e8d5a0c1L },
+ { 0x0f58a3f24d4d58L,0x3771c19c464f3cL,0x06746f9c0bfafaL,
+ 0x56564c9c8feb52L,0x0d66d9a7d8a45fL,0x403578141193caL,
+ 0x00b0d0bdc19260L } },
+ /* 61 */
+ { { 0x571407157bdbc2L,0x138d5a1c2c0b99L,0x2ee4a8057dcbeaL,
+ 0x051ff2b58e9ed1L,0x067378ad9e7cdaL,0x7cc2c1db97a49eL,
+ 0x1e7536ccd849d6L },
+ { 0x531fd95f3497c4L,0x55dc08325f61a7L,0x144e942bce32bfL,
+ 0x642d572f09e53aL,0x556ff188261678L,0x3e79c0d9d513d6L,
+ 0x0bbbc6656f6d52L } },
+ /* 62 */
+ { { 0x57d3eb50596edcL,0x26c520a487451dL,0x0a92db40aea8d6L,
+ 0x27df6345109616L,0x7733d611fd727cL,0x61d14171fef709L,
+ 0x36169ae417c36bL },
+ { 0x6899f5d4091cf7L,0x56ce5dfe4ed0c1L,0x2c430ce5913fbcL,
+ 0x1b13547e0f8caeL,0x4840a8275d3699L,0x59b8ef209e81adL,
+ 0x22362dff5ea1a2L } },
+ /* 63 */
+ { { 0x7237237bd98425L,0x73258e162a9d0bL,0x0a59a1e8bb5118L,
+ 0x4190a7ee5d8077L,0x13684905fdbf7cL,0x31c4033a52626bL,
+ 0x010a30e4fbd448L },
+ { 0x47623f981e909aL,0x670af7c325b481L,0x3d004241fa4944L,
+ 0x0905a2ca47f240L,0x58f3cdd7a187c3L,0x78b93aee05b43fL,
+ 0x19b91d4ef8d63bL } },
+ /* 64 */
+ { { 0x0d34e116973cf4L,0x4116fc9e69ee0eL,0x657ae2b4a482bbL,
+ 0x3522eed134d7cdL,0x741e0dde0a036aL,0x6554316a51cc7bL,
+ 0x00f31c6ca89837L },
+ { 0x26770aa06b1dd7L,0x38233a4ceba649L,0x065a1110c96feaL,
+ 0x18d367839e0f15L,0x794543660558d1L,0x39b605139065dcL,
+ 0x29abbec071b637L } },
+ /* 65 */
+ { { 0x1464b401ab5245L,0x16db891b27ff74L,0x724eb49cb26e34L,
+ 0x74fee3bc9cc33eL,0x6a8bdbebe085eaL,0x5c2e75ca207129L,
+ 0x1d03f2268e6b08L },
+ { 0x28b0a328e23b23L,0x645dc26209a0bcL,0x62c28990348d49L,
+ 0x4dd9be1fa333d0L,0x6183aac74a72e4L,0x1d6f3ee69e1d03L,
+ 0x2fff96db0ff670L } },
+ /* 66 */
+ { { 0x2358f5c6a2123fL,0x5b2bfc51bedb63L,0x4fc6674be649ecL,
+ 0x51fc16e44b813aL,0x2ffe10a73754c1L,0x69a0c7a053aeefL,
+ 0x150e605fb6b9b4L },
+ { 0x179eef6b8b83c4L,0x64293b28ad05efL,0x331795fab98572L,
+ 0x09823eec78727dL,0x36508042b89b81L,0x65f1106adb927eL,
+ 0x2fc0234617f47cL } },
+ /* 67 */
+ { { 0x12aa244e8068dbL,0x0c834ae5348f00L,0x310fc1a4771cb3L,
+ 0x6c90a2f9e19ef9L,0x77946fa0573471L,0x37f5df81e5f72fL,
+ 0x204f5d72cbe048L },
+ { 0x613c724383bba6L,0x1ce14844967e0aL,0x797c85e69aa493L,
+ 0x4fb15b0f2ce765L,0x5807978e2e8aa7L,0x52c75859876a75L,
+ 0x1554635c763d3eL } },
+ /* 68 */
+ { { 0x4f292200623f3bL,0x6222be53d7fe07L,0x1e02a9a08c2571L,
+ 0x22c6058216b912L,0x1ec20044c7ba17L,0x53f94c5efde12bL,
+ 0x102b8aadfe32a4L },
+ { 0x45377aa927b102L,0x0d41b8062ee371L,0x77085a9018e62aL,
+ 0x0c69980024847cL,0x14739b423a73a9L,0x52ec6961fe3c17L,
+ 0x38a779c94b5a7dL } },
+ /* 69 */
+ { { 0x4d14008435af04L,0x363bfd8325b4e8L,0x48cdb715097c95L,
+ 0x1b534540f8bee0L,0x4ca1e5c90c2a76L,0x4b52c193d6eee0L,
+ 0x277a33c79becf5L },
+ { 0x0fee0d511d3d06L,0x4627f3d6a58f8cL,0x7c81ac245119b8L,
+ 0x0c8d526ba1e07aL,0x3dbc242f55bac2L,0x2399df8f91fffdL,
+ 0x353e982079ba3bL } },
+ /* 70 */
+ { { 0x6405d3b0ab9645L,0x7f31abe3ee236bL,0x456170a9babbb1L,
+ 0x09634a2456a118L,0x5b1c6045acb9e5L,0x2c75c20d89d521L,
+ 0x2e27ccf5626399L },
+ { 0x307cd97fed2ce4L,0x1c2fbb02b64087L,0x542a068d27e64dL,
+ 0x148c030b3bc6a6L,0x671129e616ade5L,0x123f40db60dafcL,
+ 0x07688f3c621220L } },
+ /* 71 */
+ { { 0x1c46b342f2c4b5L,0x27decc0b3c8f04L,0x0d9bd433464c54L,
+ 0x1f3d893b818572L,0x2536043b536c94L,0x57e00c4b19ebf9L,
+ 0x3938fb9e5ad55eL },
+ { 0x6b390024c8b22fL,0x4583f97e20a976L,0x2559d24abcbad7L,
+ 0x67a9cabc9bd8c6L,0x73a56f09432e4aL,0x79eb0beb53a3b7L,
+ 0x3e19d47f6f8221L } },
+ /* 72 */
+ { { 0x7399cb9d10e0b2L,0x32acc1b8a36e2aL,0x287d60c2407035L,
+ 0x42c82420ea4b5cL,0x13f286658bc268L,0x3c91181156e064L,
+ 0x234b83dcdeb963L },
+ { 0x79bc95486cfee6L,0x4d8fd3cb78af36L,0x07362ba5e80da8L,
+ 0x79d024a0d681b0L,0x6b58406907f87fL,0x4b40f1e977e58fL,
+ 0x38dcc6fd5fa342L } },
+ /* 73 */
+ { { 0x72282be1cd0abeL,0x02bd0fdfdf44e5L,0x19b0e0d2f753e4L,
+ 0x4514e76ce8c4c0L,0x02ebc9c8cdcc1bL,0x6ac0c0373e9fddL,
+ 0x0dc414af1c81a9L },
+ { 0x7a109246f32562L,0x26982e6a3768edL,0x5ecd8daed76ab5L,
+ 0x2eaa70061eb261L,0x09e7c038a8c514L,0x2a2603cc300658L,
+ 0x25d93ab9e55cd4L } },
+ /* 74 */
+ { { 0x11b19fcbd5256aL,0x41e4d94274770fL,0x0133c1a411001fL,
+ 0x360bac481dbca3L,0x45908b18a9c22bL,0x1e34396fafb03aL,
+ 0x1b84fea7486edaL },
+ { 0x183c62a71e6e16L,0x5f1dc30e93da8eL,0x6cb97b502573c3L,
+ 0x3708bf0964e3fcL,0x35a7f042eeacceL,0x56370da902c27fL,
+ 0x3a873c3b72797fL } },
+ /* 75 */
+ { { 0x6573c9cea4cc9bL,0x2c3b5f9d91e6dcL,0x2a90e2dbd9505eL,
+ 0x66a75444025f81L,0x1571fb894b03cdL,0x5d1a1f00fd26f3L,
+ 0x0d19a9fd618855L },
+ { 0x659acd56515664L,0x7279478bd616a3L,0x09a909e76d56c3L,
+ 0x2fd70474250358L,0x3a1a25c850579cL,0x11b9e0f71b74ccL,
+ 0x1268daef3d1bffL } },
+ /* 76 */
+ { { 0x7f5acc46d93106L,0x5bc15512f939c8L,0x504b5f92f996deL,
+ 0x25965549be7a64L,0x357a3a2ae9b80dL,0x3f2bcf9c139cc0L,
+ 0x0a7ddd99f23b35L },
+ { 0x6868f5a8a0b1c5L,0x319ec52f15b1beL,0x0770000a849021L,
+ 0x7f4d50287bd608L,0x62c971d28a9d7fL,0x164e89309acb72L,
+ 0x2a29f002cf4a32L } },
+ /* 77 */
+ { { 0x58a852ae11a338L,0x27e3a35f2dcef8L,0x494d5731ce9e18L,
+ 0x49516f33f4bb3eL,0x386b26ba370097L,0x4e8fac1ec30248L,
+ 0x2ac26d4c44455dL },
+ { 0x20484198eb9dd0L,0x75982a0e06512bL,0x152271b9279b05L,
+ 0x5908a9857e36d2L,0x6a933ab45a60abL,0x58d8b1acb24fafL,
+ 0x28fbcf19425590L } },
+ /* 78 */
+ { { 0x5420e9df010879L,0x4aba72aec2f313L,0x438e544eda7494L,
+ 0x2e8e189ce6f7eaL,0x2f771e4efe45bdL,0x0d780293bce7efL,
+ 0x1569ad3d0d02acL },
+ { 0x325251ebeaf771L,0x02510f1a8511e2L,0x3863816bf8aad1L,
+ 0x60fdb15fe6ac19L,0x4792aef52a348cL,0x38e57a104e9838L,
+ 0x0d171611a1df1bL } },
+ /* 79 */
+ { { 0x15ceb0bea65e90L,0x6e56482db339bcL,0x37f618f7b0261fL,
+ 0x6351abc226dabcL,0x0e999f617b74baL,0x37d3cc57af5b69L,
+ 0x21df2b987aac68L },
+ { 0x2dddaa3a358610L,0x2da264bc560e47L,0x545615d538bf13L,
+ 0x1c95ac244b8cc7L,0x77de1f741852cbL,0x75d324f00996abL,
+ 0x3a79b13b46aa3bL } },
+ /* 80 */
+ { { 0x7db63998683186L,0x6849bb989d530cL,0x7b53c39ef7ed73L,
+ 0x53bcfbf664d3ffL,0x25ef27c57f71c7L,0x50120ee80f3ad6L,
+ 0x243aba40ed0205L },
+ { 0x2aae5e0ee1fcebL,0x3449d0d8343fbeL,0x5b2864fb7cffc7L,
+ 0x64dceb5407ac3eL,0x20303a5695523dL,0x3def70812010b2L,
+ 0x07be937f2e9b6fL } },
+ /* 81 */
+ { { 0x5838f9e0540015L,0x728d8720efb9f7L,0x1ab5864490b0c8L,
+ 0x6531754458fdcfL,0x600ff9612440c0L,0x48735b36a585b7L,
+ 0x3d4aaea86b865dL },
+ { 0x6898942cac32adL,0x3c84c5531f23a1L,0x3c9dbd572f7edeL,
+ 0x5691f0932a2976L,0x186f0db1ac0d27L,0x4fbed18bed5bc9L,
+ 0x0e26b0dee0b38cL } },
+ /* 82 */
+ { { 0x1188b4f8e60f5bL,0x602a915455b4a2L,0x60e06af289ff99L,
+ 0x579fe4bed999e5L,0x2bc03b15e6d9ddL,0x1689649edd66d5L,
+ 0x3165e277dca9d2L },
+ { 0x7cb8a529cf5279L,0x57f8035b34d84dL,0x352e2eb26de8f1L,
+ 0x6406820c3367c4L,0x5d148f4c899899L,0x483e1408482e15L,
+ 0x1680bd1e517606L } },
+ /* 83 */
+ { { 0x5c877cc1c90202L,0x2881f158eae1f4L,0x6f45e207df4267L,
+ 0x59280eba1452d8L,0x4465b61e267db5L,0x171f1137e09e5cL,
+ 0x1368eb821daa93L },
+ { 0x70fe26e3e66861L,0x52a6663170da7dL,0x71d1ce5b7d79dcL,
+ 0x1cffe9be1e1afdL,0x703745115a29c4L,0x73b7f897b2f65aL,
+ 0x02218c3a95891aL } },
+ /* 84 */
+ { { 0x16866db8a9e8c9L,0x4770b770123d9bL,0x4c116cf34a8465L,
+ 0x079b28263fc86aL,0x3751c755a72b58L,0x7bc8df1673243aL,
+ 0x12fff72454f064L },
+ { 0x15c049b89554e7L,0x4ea9ef44d7cd9aL,0x42f50765c0d4f1L,
+ 0x158bb603cb011bL,0x0809dde16470b1L,0x63cad7422ea819L,
+ 0x38b6cd70f90d7eL } },
+ /* 85 */
+ { { 0x1e4aab6328e33fL,0x70575f026da3aeL,0x7e1b55c8c55219L,
+ 0x328d4b403d24caL,0x03b6df1f0a5bd1L,0x26b4bb8b648ed0L,
+ 0x17161f2f10b76aL },
+ { 0x6cdb32bae8b4c0L,0x33176266227056L,0x4975fa58519b45L,
+ 0x254602ea511d96L,0x4e82e93e402a67L,0x0ca8b5929cdb4fL,
+ 0x3ae7e0a07918f5L } },
+ /* 86 */
+ { { 0x60f9d1fecf5b9bL,0x6257e40d2cd469L,0x6c7aa814d28456L,
+ 0x58aac7caac8e79L,0x703a55f0293cbfL,0x702390a0f48378L,
+ 0x24b9ae07218b07L },
+ { 0x1ebc66cdaf24e3L,0x7d9ae5f9f8e199L,0x42055ee921a245L,
+ 0x035595936e4d49L,0x129c45d425c08bL,0x6486c5f19ce6ddL,
+ 0x027dbd5f18ba24L } },
+ /* 87 */
+ { { 0x7d6b78d29375fbL,0x0a3dc6ba22ae38L,0x35090fa91feaf6L,
+ 0x7f18587fb7b16eL,0x6e7091dd924608L,0x54e102cdbf5ff8L,
+ 0x31b131a4c22079L },
+ { 0x368f87d6a53fb0L,0x1d3f3d69a3f240L,0x36bf5f9e40e1c6L,
+ 0x17f150e01f8456L,0x76e5d0835eb447L,0x662fc0a1207100L,
+ 0x14e3dd97a98e39L } },
+ /* 88 */
+ { { 0x0249d9c2663b4bL,0x56b68f9a71ba1cL,0x74b119567f9c02L,
+ 0x5e6f336d8c92acL,0x2ced58f9f74a84L,0x4b75a2c2a467c5L,
+ 0x30557011cf740eL },
+ { 0x6a87993be454ebL,0x29b7076fb99a68L,0x62ae74aaf99bbaL,
+ 0x399f9aa8fb6c1bL,0x553c24a396dd27L,0x2868337a815ea6L,
+ 0x343ab6635cc776L } },
+ /* 89 */
+ { { 0x0e0b0eec142408L,0x79728229662121L,0x605d0ac75e6250L,
+ 0x49a097a01edfbeL,0x1e20cd270df6b6L,0x7438a0ca9291edL,
+ 0x29daa430da5f90L },
+ { 0x7a33844624825aL,0x181715986985c1L,0x53a6853cae0b92L,
+ 0x6d98401bd925e8L,0x5a0a34f5dd5e24L,0x7b818ef53cf265L,
+ 0x0836e43c9d3194L } },
+ /* 90 */
+ { { 0x1179b70e6c5fd9L,0x0246d9305dd44cL,0x635255edfbe2fbL,
+ 0x5397b3523b4199L,0x59350cc47e6640L,0x2b57aa97ed4375L,
+ 0x37efd31abd153aL },
+ { 0x7a7afa6907f4faL,0x75c10cb94e6a7eL,0x60a925ab69cc47L,
+ 0x2ff5bcd9239bd5L,0x13c2113e425f11L,0x56bd3d2f8a1437L,
+ 0x2c9adbab13774fL } },
+ /* 91 */
+ { { 0x4ab9f52a2e5f2bL,0x5e537e70b58903L,0x0f242658ebe4f2L,
+ 0x2648a1e7a5f9aeL,0x1b4c5081e73007L,0x6827d4aff51850L,
+ 0x3925e41726cd01L },
+ { 0x56dd8a55ab3cfbL,0x72d6a31b6d5beaL,0x697bd2e5575112L,
+ 0x66935519a7aa12L,0x55e97dda7a3aceL,0x0e16afb4237b4cL,
+ 0x00b68fbff08093L } },
+ /* 92 */
+ { { 0x4b00366481d0d9L,0x37cb031fbfc5c4L,0x14643f6800dd03L,
+ 0x6793fef60fe0faL,0x4f43e329c92803L,0x1fce86b96a6d26L,
+ 0x0ad416975e213aL },
+ { 0x7cc6a6711adcc9L,0x64b8a63c43c2d9L,0x1e6caa2a67c0d0L,
+ 0x610deffd17a54bL,0x57d669d5f38423L,0x77364b8f022636L,
+ 0x36d4d13602e024L } },
+ /* 93 */
+ { { 0x72e667ae50a2f5L,0x1b15c950c3a21aL,0x3ccc37c72e6dfeL,
+ 0x027f7e1d094fb8L,0x43ae1e90aa5d7eL,0x3f5feac3d97ce5L,
+ 0x0363ed0a336e55L },
+ { 0x235f73d7663784L,0x5d8cfc588ad5a4L,0x10ab6ff333016eL,
+ 0x7d8886af2e1497L,0x549f34fd17988eL,0x3fc4fcaee69a33L,
+ 0x0622b133a13d9eL } },
+ /* 94 */
+ { { 0x6344cfa796c53eL,0x0e9a10d00136fdL,0x5d1d284a56efd8L,
+ 0x608b1968f8aca7L,0x2fa5a66776edcaL,0x13430c44f1609cL,
+ 0x1499973cb2152aL },
+ { 0x3764648104ab58L,0x3226e409fadafcL,0x1513a8466459ddL,
+ 0x649206ec365035L,0x46149aa3f765b1L,0x3aebf0a035248eL,
+ 0x1ee60b8c373494L } },
+ /* 95 */
+ { { 0x4e9efcc15f3060L,0x5e5d50fd77cdc8L,0x071e5403516b58L,
+ 0x1b7d4e89b24ceaL,0x53b1fa66d6dc03L,0x457f15f892ab5fL,
+ 0x076332c9397260L },
+ { 0x31422b79d7584bL,0x0b01d47e41ba80L,0x3e5611a3171528L,
+ 0x5f53b9a9fc1be4L,0x7e2fc3d82f110fL,0x006cf350ef0fbfL,
+ 0x123ae98ec81c12L } },
+ /* 96 */
+ { { 0x310d41df46e2f6L,0x2ff032a286cf13L,0x64751a721c4eadL,
+ 0x7b62bcc0339b95L,0x49acf0c195afa4L,0x359d48742544e5L,
+ 0x276b7632d9e2afL },
+ { 0x656c6be182579aL,0x75b65a4d85b199L,0x04a911d1721bfaL,
+ 0x46e023d0e33477L,0x1ec2d580acd869L,0x540b456f398a37L,
+ 0x001f698210153dL } },
+ /* 97 */
+ { { 0x3ca35217b00dd0L,0x73961d034f4d3cL,0x4f520b61c4119dL,
+ 0x4919fde5cccff7L,0x4d0e0e6f38134dL,0x55c22586003e91L,
+ 0x24d39d5d8f1b19L },
+ { 0x4d4fc3d73234dcL,0x40c50c9d5f8368L,0x149afbc86bf2b8L,
+ 0x1dbafefc21d7f1L,0x42e6b61355107fL,0x6e506cf4b54f29L,
+ 0x0f498a6c615228L } },
+ /* 98 */
+ { { 0x30618f437cfaf8L,0x059640658532c4L,0x1c8a4d90e96e1dL,
+ 0x4a327bcca4fb92L,0x54143b8040f1a0L,0x4ec0928c5a49e4L,
+ 0x2af5ad488d9b1fL },
+ { 0x1b392bd5338f55L,0x539c0292b41823L,0x1fe35d4df86a02L,
+ 0x5fa5bb17988c65L,0x02b6cb715adc26L,0x09a48a0c2cb509L,
+ 0x365635f1a5a9f2L } },
+ /* 99 */
+ { { 0x58aa87bdc21f31L,0x156900c7cb1935L,0x0ec1f75ee2b6cfL,
+ 0x5f3e35a77ec314L,0x582dec7b9b7621L,0x3e65deb0e8202aL,
+ 0x325c314b8a66b7L },
+ { 0x702e2a22f24d66L,0x3a20e9982014f1L,0x6424c5b86bbfb0L,
+ 0x424eea4d795351L,0x7fc4cce7c22055L,0x581383fceb92d7L,
+ 0x32b663f49ee81bL } },
+ /* 100 */
+ { { 0x76e2d0b648b73eL,0x59ca39fa50bddaL,0x18bb44f786a7e4L,
+ 0x28c8d49d464360L,0x1b8bf1d3a574eaL,0x7c670b9bf1635aL,
+ 0x2efb30a291f4b3L },
+ { 0x5326c069cec548L,0x03bbe481416531L,0x08a415c8d93d6fL,
+ 0x3414a52120d383L,0x1f17a0fc6e9c5cL,0x0de9a090717463L,
+ 0x22d84b3c67ff07L } },
+ /* 101 */
+ { { 0x30b5014c3830ebL,0x70791dc1a18b37L,0x09e6ea4e24f423L,
+ 0x65e148a5253132L,0x446f05d5d40449L,0x7ad5d3d707c0e9L,
+ 0x18eedd63dd3ab5L },
+ { 0x40d2eac6bb29e0L,0x5b0e9605e83c38L,0x554f2c666a56a8L,
+ 0x0ac27b6c94c48bL,0x1aaecdd91bafe5L,0x73c6e2bdf72634L,
+ 0x306dab96d19e03L } },
+ /* 102 */
+ { { 0x6d3e4b42772f41L,0x1aba7796f3a39bL,0x3a03fbb980e9c0L,
+ 0x2f2ea5da2186a8L,0x358ff444ef1fcfL,0x0798cc0329fcdcL,
+ 0x39a28bcc9aa46dL },
+ { 0x42775c977fe4d2L,0x5eb8fc5483d6b0L,0x0bfe37c039e3f7L,
+ 0x429292eaf9df60L,0x188bdf4b840cd5L,0x06e10e090749cdL,
+ 0x0e52678e73192eL } },
+ /* 103 */
+ { { 0x05de80b08df5feL,0x2af8c77406c5f8L,0x53573c50a0304aL,
+ 0x277b10b751bca0L,0x65cf8c559132a5L,0x4c667abe25f73cL,
+ 0x0271809e05a575L },
+ { 0x41ced461f7a2fbL,0x0889a9ebdd7075L,0x320c63f2b7760eL,
+ 0x4f8d4324151c63L,0x5af47315be2e5eL,0x73c62f6aee2885L,
+ 0x206d6412a56a97L } },
+ /* 104 */
+ { { 0x6b1c508b21d232L,0x3781185974ead6L,0x1aba7c3ebe1fcfL,
+ 0x5bdc03cd3f3a5aL,0x74a25036a0985bL,0x5929e30b7211b2L,
+ 0x16a9f3bc366bd7L },
+ { 0x566a7057dcfffcL,0x23b5708a644bc0L,0x348cda2aa5ba8cL,
+ 0x466aa96b9750d4L,0x6a435ed9b20834L,0x2e7730f2cf9901L,
+ 0x2b5cd71d5b0410L } },
+ /* 105 */
+ { { 0x285ab3cee76ef4L,0x68895e3a57275dL,0x6fab2e48fd1265L,
+ 0x0f1de060428c94L,0x668a2b080b5905L,0x1b589dc3b0cb37L,
+ 0x3c037886592c9bL },
+ { 0x7fb5c0f2e90d4dL,0x334eefb3d8c91aL,0x75747124700388L,
+ 0x547a2c2e2737f5L,0x2af9c080e37541L,0x0a295370d9091aL,
+ 0x0bb5c36dad99e6L } },
+ /* 106 */
+ { { 0x644116586f25cbL,0x0c3f41f9ee1f5dL,0x00628d43a3dedaL,
+ 0x16e1437aae9669L,0x6aba7861bf3e59L,0x60735631ff4c44L,
+ 0x345609efaa615eL },
+ { 0x41f54792e6acefL,0x4791583f75864dL,0x37f2ff5c7508b1L,
+ 0x1288912516c3b0L,0x51a2135f6a539bL,0x3b775511f42091L,
+ 0x127c6afa7afe66L } },
+ /* 107 */
+ { { 0x79f4f4f7492b73L,0x583d967256342dL,0x51a729bff33ca3L,
+ 0x3977d2c22d8986L,0x066f528ba8d40bL,0x5d759d30f8eb94L,
+ 0x0f8e649192b408L },
+ { 0x22d84e752555bbL,0x76953855c728c7L,0x3b2254e72aaaa4L,
+ 0x508cd4ce6c0212L,0x726296d6b5a6daL,0x7a77aa066986f3L,
+ 0x2267a497bbcf31L } },
+ /* 108 */
+ { { 0x7f3651bf825dc4L,0x3988817388c56fL,0x257313ed6c3dd0L,
+ 0x3feab7f3b8ffadL,0x6c0d3cb9e9c9b4L,0x1317be0a7b6ac4L,
+ 0x2a5f399d7df850L },
+ { 0x2fe5a36c934f5eL,0x429199df88ded1L,0x435ea21619b357L,
+ 0x6aac6a063bac2bL,0x600c149978f5edL,0x76543aa1114c95L,
+ 0x163ca9c83c7596L } },
+ /* 109 */
+ { { 0x7dda4a3e4daedbL,0x1824cba360a4cdL,0x09312efd70e0c6L,
+ 0x454e68a146c885L,0x40aee762fe5c47L,0x29811cbd755a59L,
+ 0x34b37c95f28319L },
+ { 0x77c58b08b717d2L,0x309470d9a0f491L,0x1ab9f40448e01cL,
+ 0x21c8bd819207b1L,0x6a01803e9361bcL,0x6e5e4c350ec415L,
+ 0x14fd55a91f8798L } },
+ /* 110 */
+ { { 0x4cee562f512a90L,0x0008361d53e390L,0x3789b307a892cfL,
+ 0x064f7be8770ae9L,0x41435d848762cfL,0x662204dd38baa6L,
+ 0x23d6dcf73f6c5aL },
+ { 0x69bef2d2c75d95L,0x2b037c0c9bb43eL,0x495fb4d79a34cfL,
+ 0x184e140c601260L,0x60193f8d435f9cL,0x283fa52a0c3ad2L,
+ 0x1998635e3a7925L } },
+ /* 111 */
+ { { 0x1cfd458ce382deL,0x0dddbd201bbcaeL,0x14d2ae8ed45d60L,
+ 0x73d764ab0c24cbL,0x2a97fe899778adL,0x0dbd1e01eddfe9L,
+ 0x2ba5c72d4042c3L },
+ { 0x27eebc3af788f1L,0x53ffc827fc5a30L,0x6d1d0726d35188L,
+ 0x4721275c50aa2aL,0x077125f02e690fL,0x6da8142405db5dL,
+ 0x126cef68992513L } },
+ /* 112 */
+ { { 0x3c6067035b2d69L,0x2a1ad7db2361acL,0x3debece6cad41cL,
+ 0x30095b30f9afc1L,0x25f50b9bd9c011L,0x79201b2f2c1da1L,
+ 0x3b5c151449c5bdL },
+ { 0x76eff4127abdb4L,0x2d31e03ce0382aL,0x24ff21f8bda143L,
+ 0x0671f244fd3ebaL,0x0c1c00b6bcc6fbL,0x18de9f7c3ebefbL,
+ 0x33dd48c3809c67L } },
+ /* 113 */
+ { { 0x61d6c2722d94edL,0x7e426e31041cceL,0x4097439f1b47b0L,
+ 0x579e798b2d205bL,0x6a430d67f830ebL,0x0d2c676700f727L,
+ 0x05fea83a82f25bL },
+ { 0x3f3482df866b98L,0x3dd353b6a5a9cdL,0x77fe6ae1a48170L,
+ 0x2f75cc2a8f7cddL,0x7442a3863dad17L,0x643de42d877a79L,
+ 0x0fec8a38fe7238L } },
+ /* 114 */
+ { { 0x79b70c0760ac07L,0x195d3af37e9b29L,0x1317ff20f7cf27L,
+ 0x624e1c739e7504L,0x67330ef50f943dL,0x775e8cf455d793L,
+ 0x17b94d2d913a9fL },
+ { 0x4b627203609e7fL,0x06aac5fb93e041L,0x603c515fdc2611L,
+ 0x2592ca0d7ae472L,0x02395d1f50a6cbL,0x466ef9648f85d9L,
+ 0x297cf879768f72L } },
+ /* 115 */
+ { { 0x3489d67d85fa94L,0x0a6e5b739c8e04L,0x7ebb5eab442e90L,
+ 0x52665a007efbd0L,0x0967ca57b0d739L,0x24891f9d932b63L,
+ 0x3cc2d6dbadc9d3L },
+ { 0x4b4773c81c5338L,0x73cd47dad7a0f9L,0x7c755bab6ae158L,
+ 0x50b03d6becefcaL,0x574d6e256d57f0L,0x188db4fffb92aeL,
+ 0x197e10118071eaL } },
+ /* 116 */
+ { { 0x45d0cbcba1e7f1L,0x1180056abec91aL,0x6c5f86624bbc28L,
+ 0x442c83f3b8e518L,0x4e16ae1843ecb4L,0x670cef2fd786c9L,
+ 0x205b4acb637d2cL },
+ { 0x70b0e539aa8671L,0x67c982056bebd0L,0x645c831a5e7c36L,
+ 0x09e06951a14b32L,0x5dd610ad4c89e6L,0x41c35f20164831L,
+ 0x3821f29cb4cdb8L } },
+ /* 117 */
+ { { 0x2831ffaba10079L,0x70f6dac9ffe444L,0x1cfa32ccc03717L,
+ 0x01519fda22a3c8L,0x23215e815aaa27L,0x390671ad65cbf7L,
+ 0x03dd4d72de7d52L },
+ { 0x1ecd972ee95923L,0x166f8da3813e8eL,0x33199bbd387a1aL,
+ 0x04525fe15e3dc7L,0x44d2ef54165898L,0x4b7e47d3dc47f7L,
+ 0x10d5c8db0b5d44L } },
+ /* 118 */
+ { { 0x176d95ba9cdb1bL,0x14025f04f23dfcL,0x49379332891687L,
+ 0x6625e5ccbb2a57L,0x7ac0abdbf9d0e5L,0x7aded4fbea15b2L,
+ 0x314844ac184d67L },
+ { 0x6d9ce34f05eae3L,0x3805d2875856d2L,0x1c2122f85e40ebL,
+ 0x51cb9f2d483a9aL,0x367e91e20f1702L,0x573c3559838dfdL,
+ 0x0b282b0cb85af1L } },
+ /* 119 */
+ { { 0x6a12e4ef871eb5L,0x64bb517e14f5ffL,0x29e04d3aaa530bL,
+ 0x1b07d88268f261L,0x411be11ed16fb0L,0x1f480536db70bfL,
+ 0x17a7deadfd34e4L },
+ { 0x76d72f30646612L,0x5a3bbb43a1b0a0L,0x5e1687440e82bfL,
+ 0x713b5e69481112L,0x46c3dcb499e174L,0x0862da3b4e2a24L,
+ 0x31cb55b4d62681L } },
+ /* 120 */
+ { { 0x5ffc74dae5bb45L,0x18944c37adb9beL,0x6aaa63b1ee641aL,
+ 0x090f4b6ee057d3L,0x4045cedd2ee00fL,0x21c2c798f7c282L,
+ 0x2c2c6ef38cd6bdL },
+ { 0x40d78501a06293L,0x56f8caa5cc89a8L,0x7231d5f91b37aeL,
+ 0x655f1e5a465c6dL,0x3f59a81f9cf783L,0x09bbba04c23624L,
+ 0x0f71ee23bbacdeL } },
+ /* 121 */
+ { { 0x38d398c4741456L,0x5204c0654243c3L,0x34498c916ea77eL,
+ 0x12238c60e5fe43L,0x0fc54f411c7625L,0x30b2ca43aa80b6L,
+ 0x06bead1bb6ea92L },
+ { 0x5902ba8674b4adL,0x075ab5b0fa254eL,0x58db83426521adL,
+ 0x5b66b6b3958e39L,0x2ce4e39890e07bL,0x46702513338b37L,
+ 0x363690c2ded4d7L } },
+ /* 122 */
+ { { 0x765642c6b75791L,0x0f4c4300d7f673L,0x404d8bbe101425L,
+ 0x61e91c88651f1bL,0x61ddc9bc60aed8L,0x0ef36910ce2e65L,
+ 0x04b44367aa63b8L },
+ { 0x72822d3651b7dcL,0x4b750157a2716dL,0x091cb4f2118d16L,
+ 0x662ba93b101993L,0x447cbd54a1d40aL,0x12cdd48d674848L,
+ 0x16f10415cbec69L } },
+ /* 123 */
+ { { 0x0c57a3a751cd0eL,0x0833d7478fadceL,0x1e751f55686436L,
+ 0x489636c58e1df7L,0x26ad6da941266fL,0x22225d3559880fL,
+ 0x35b397c45ba0e2L },
+ { 0x3ca97b70e1f2ceL,0x78e50427a8680cL,0x06137e042a8f91L,
+ 0x7ec40d2500b712L,0x3f0ad688ad7b0dL,0x24746fb33f9513L,
+ 0x3638fcce688f0bL } },
+ /* 124 */
+ { { 0x753163750bed6fL,0x786507cd16157bL,0x1d6ec228ce022aL,
+ 0x587255f42d1b31L,0x0c6adf72a3a0f6L,0x4bfeee2da33f5eL,
+ 0x08b7300814de6cL },
+ { 0x00bf8df9a56e11L,0x75aead48fe42e8L,0x3de9bad911b2e2L,
+ 0x0fadb233e4b8bbL,0x5b054e8fd84f7dL,0x5eb3064152889bL,
+ 0x01c1c6e8c777a1L } },
+ /* 125 */
+ { { 0x5fa0e598f8fcb9L,0x11c129a1ae18dfL,0x5c41b482a2273bL,
+ 0x545664e5044c9cL,0x7e01c915bfb9abL,0x7f626e19296aa0L,
+ 0x20c91a9822a087L },
+ { 0x273a9fbe3c378fL,0x0f126b44b7d350L,0x493764a75df951L,
+ 0x32dec3c367d24bL,0x1a7ae987fed9d3L,0x58a93055928b85L,
+ 0x11626975d7775fL } },
+ /* 126 */
+ { { 0x2bb174a95540a9L,0x10de02c58b613fL,0x2fa8f7b861f3eeL,
+ 0x44731260bdf3b3L,0x19c38ff7da41feL,0x3535a16e3d7172L,
+ 0x21a948b83cc7feL },
+ { 0x0e6f72868bc259L,0x0c70799df3c979L,0x526919955584c3L,
+ 0x4d95fda04f8fa2L,0x7bb228e6c0f091L,0x4f728b88d92194L,
+ 0x2b361c5a136bedL } },
+ /* 127 */
+ { { 0x0c72ca10c53841L,0x4036ab49f9da12L,0x578408d2b7082bL,
+ 0x2c4903201fbf5eL,0x14722b3f42a6a8L,0x1997b786181694L,
+ 0x25c6f10de32849L },
+ { 0x79f46d517ff2ffL,0x2dc5d97528f6deL,0x518a494489aa72L,
+ 0x52748f8af3cf97L,0x472da30a96bb16L,0x1be228f92465a9L,
+ 0x196f0c47d60479L } },
+ /* 128 */
+ { { 0x47dd7d139b3239L,0x049c9b06775d0fL,0x627ffc00562d5eL,
+ 0x04f578d5e5e243L,0x43a788ffcef8b9L,0x7db320be9dde28L,
+ 0x00837528b8572fL },
+ { 0x2969eca306d695L,0x195b72795ec194L,0x5e1fa9b8e77e50L,
+ 0x4c627f2b3fbfd5L,0x4b91e0d0ee10ffL,0x5698c8d0f35833L,
+ 0x12d3a9431f475eL } },
+ /* 129 */
+ { { 0x6409457a0db57eL,0x795b35192e0433L,0x146f973fe79805L,
+ 0x3d49c516dfb9cfL,0x50dfc3646b3cdaL,0x16a08a2210ad06L,
+ 0x2b4ef5bcd5b826L },
+ { 0x5ebabfee2e3e3eL,0x2e048e724d9726L,0x0a7a7ed6abef40L,
+ 0x71ff7f83e39ad8L,0x3405ac52a1b852L,0x2e3233357a608dL,
+ 0x38c1bf3b0e40e6L } },
+ /* 130 */
+ { { 0x59aec823e4712cL,0x6ed9878331ddadL,0x1cc6faf629f2a0L,
+ 0x445ff79f36c18cL,0x4edc7ed57aff3dL,0x22ee54c8bdd9e8L,
+ 0x35398f42d72ec5L },
+ { 0x4e7a1cceee0ecfL,0x4c66a707dd1d31L,0x629ad157a23c04L,
+ 0x3b2c6031dc3c83L,0x3336acbcd3d96cL,0x26ce43adfce0f0L,
+ 0x3c869c98d699dcL } },
+ /* 131 */
+ { { 0x58b3cd9586ba11L,0x5d6514b8090033L,0x7c88c3bd736782L,
+ 0x1735f84f2130edL,0x47784095a9dee0L,0x76312c6e47901bL,
+ 0x1725f6ebc51455L },
+ { 0x6744344bc4503eL,0x16630b4d66e12fL,0x7b3481752c3ec7L,
+ 0x47bb2ed1f46f95L,0x08a1a497dd1bcfL,0x1f525df2b8ed93L,
+ 0x0fe492ea993713L } },
+ /* 132 */
+ { { 0x71b8dd7268b448L,0x1743dfaf3728d7L,0x23938d547f530aL,
+ 0x648c3d497d0fc6L,0x26c0d769e3ad45L,0x4d25108769a806L,
+ 0x3fbf2025143575L },
+ { 0x485bfd90339366L,0x2de2b99ed87461L,0x24a33347713badL,
+ 0x1674bc7073958aL,0x5bb2373ee85b5fL,0x57f9bd657e662cL,
+ 0x2041b248d39042L } },
+ /* 133 */
+ { { 0x5f01617d02f4eeL,0x2a8e31c4244b91L,0x2dab3e790229e0L,
+ 0x72d319ea7544afL,0x01ffb8b000cb56L,0x065e63b0daafd3L,
+ 0x3d7200a7111d6fL },
+ { 0x4561ce1b568973L,0x37034c532dd8ecL,0x1368215020be02L,
+ 0x30e7184cf289ebL,0x199e0c27d815deL,0x7ee1b4dff324e5L,
+ 0x2f4a11de7fab5cL } },
+ /* 134 */
+ { { 0x33c2f99b1cdf2bL,0x1e0d78bf42a2c0L,0x64485dececaa67L,
+ 0x2242a41be93e92L,0x62297b1f15273cL,0x16ebfaafb02205L,
+ 0x0f50f805f1fdabL },
+ { 0x28bb0b3a70eb28L,0x5b1c7d0160d683L,0x05c30a37959f78L,
+ 0x3d9301184922d2L,0x46c1ead7dbcb1aL,0x03ee161146a597L,
+ 0x2d413ed9a6ccc1L } },
+ /* 135 */
+ { { 0x685ab5f97a27c2L,0x59178214023751L,0x4ffef3c585ab17L,
+ 0x2bc85302aba2a9L,0x675b001780e856L,0x103c8a37f0b33dL,
+ 0x2241e98ece70a6L },
+ { 0x546738260189edL,0x086c8f7a6b96edL,0x00832ad878a129L,
+ 0x0b679056ba7462L,0x020ce6264bf8c4L,0x3f9f4b4d92abfbL,
+ 0x3e9c55343c92edL } },
+ /* 136 */
+ { { 0x482cec9b3f5034L,0x08b59b3cd1fa30L,0x5a55d1bc8e58b5L,
+ 0x464a5259337d8eL,0x0a5b6c66ade5a5L,0x55db77b504ddadL,
+ 0x015992935eac35L },
+ { 0x54fe51025e32fcL,0x5d7f52dbe4a579L,0x08c564a8c58696L,
+ 0x4482a8bec4503fL,0x440e75d9d94de9L,0x6992d768020bfaL,
+ 0x06c311e8ba01f6L } },
+ /* 137 */
+ { { 0x2a6ac808223878L,0x04d3ccb4aab0b8L,0x6e6ef09ff6e823L,
+ 0x15cb03ee9158dcL,0x0dc58919171bf7L,0x3273568abf3cb1L,
+ 0x1b55245b88d98bL },
+ { 0x28e9383b1de0c1L,0x30d5009e4f1f1bL,0x334d185a56a134L,
+ 0x0875865dfa4c46L,0x266edf5eae3beeL,0x2e03ff16d1f7e5L,
+ 0x29a36bd9f0c16dL } },
+ /* 138 */
+ { { 0x004cff44b2e045L,0x426c96380ba982L,0x422292281e46d7L,
+ 0x508dd8d29d7204L,0x3a4ea73fb2995eL,0x4be64090ae07b2L,
+ 0x3339177a0eff22L },
+ { 0x74a97ec2b3106eL,0x0c616d09169f5fL,0x1bb5d8907241a7L,
+ 0x661fb67f6d41bdL,0x018a88a0daf136L,0x746333a093a7b4L,
+ 0x3e19f1ac76424eL } },
+ /* 139 */
+ { { 0x542a5656527296L,0x0e7b9ce22f1bc9L,0x31b0945992b89bL,
+ 0x6e0570eb85056dL,0x32daf813483ae5L,0x69eeae9d59bb55L,
+ 0x315ad4b730b557L },
+ { 0x2bc16795f32923L,0x6b02b7ba55130eL,0x1e9da67c012f85L,
+ 0x5616f014dabf8fL,0x777395fcd9c723L,0x2ff075e7743246L,
+ 0x2993538aff142eL } },
+ /* 140 */
+ { { 0x72dae20e552b40L,0x2e4ba69aa5d042L,0x001e563e618bd2L,
+ 0x28feeba3c98772L,0x648c356da2a907L,0x687e2325069ea7L,
+ 0x0d34ab09a394f0L },
+ { 0x73c21813111286L,0x5829b53b304e20L,0x6fba574de08076L,
+ 0x79f7058f61614eL,0x4e71c9316f1191L,0x24ef12193e0a89L,
+ 0x35dc4e2bc9d848L } },
+ /* 141 */
+ { { 0x045e6d3b4ad1cdL,0x729c95493782f0L,0x77f59de85b361aL,
+ 0x5309b4babf28f8L,0x4d893d9290935fL,0x736f47f2b2669eL,
+ 0x23270922d757f3L },
+ { 0x23a4826f70d4e9L,0x68a8c63215d33eL,0x4d6c2069205c9cL,
+ 0x46b2938a5eebe0L,0x41d1f1e2de3892L,0x5ca1775544bcb0L,
+ 0x3130629e5d19dcL } },
+ /* 142 */
+ { { 0x6e2681593375acL,0x117cfbabc22621L,0x6c903cd4e13ccaL,
+ 0x6f358f14d4bd97L,0x1bc58fa11089f1L,0x36aa2db4ac426aL,
+ 0x15ced8464b7ea1L },
+ { 0x6966836cba7df5L,0x7c2b1851568113L,0x22b50ff2ffca66L,
+ 0x50e77d9f48e49aL,0x32775e9bbc7cc9L,0x403915bb0ece71L,
+ 0x1b8ec7cb9dd7aaL } },
+ /* 143 */
+ { { 0x65a888b677788bL,0x51887fac2e7806L,0x06792636f98d2bL,
+ 0x47bbcd59824c3bL,0x1aca908c43e6dcL,0x2e00d15c708981L,
+ 0x08e031c2c80634L },
+ { 0x77fbc3a297c5ecL,0x10a7948af2919eL,0x10cdafb1fb6b2fL,
+ 0x27762309b486f0L,0x13abf26bbac641L,0x53da38478fc3eeL,
+ 0x3c22eff379bf55L } },
+ /* 144 */
+ { { 0x0163f484770ee3L,0x7f28e8942e0cbfL,0x5f86cb51b43831L,
+ 0x00feccd4e4782fL,0x40e5b417eafe7dL,0x79e5742bbea228L,
+ 0x3717154aa469beL },
+ { 0x271d74a270f721L,0x40eb400890b70cL,0x0e37be81d4cb02L,
+ 0x786907f4e8d43fL,0x5a1f5b590a7acbL,0x048861883851fdL,
+ 0x11534a1e563dbbL } },
+ /* 145 */
+ { { 0x37a6357c525435L,0x6afe6f897b78a5L,0x7b7ff311d4f67bL,
+ 0x38879df15dc9f4L,0x727def7b8ba987L,0x20285dd0db4436L,
+ 0x156b0fc64b9243L },
+ { 0x7e3a6ec0c1c390L,0x668a88d9bcf690L,0x5925aba5440dbeL,
+ 0x0f6891a044f593L,0x70b46edfed4d97L,0x1a6cc361bab201L,
+ 0x046f5bc6e160bcL } },
+ /* 146 */
+ { { 0x79350f076bc9d1L,0x077d9e79a586b9L,0x0896bc0c705764L,
+ 0x58e632b90e7e46L,0x14e87e0ad32488L,0x4b1bb3f72c6e00L,
+ 0x3c3ce9684a5fc5L },
+ { 0x108fbaf1f703aaL,0x08405ecec17577L,0x199a8e2d44be73L,
+ 0x2eb22ed0067763L,0x633944deda3300L,0x20d739eb8e5efbL,
+ 0x2bbbd94086b532L } },
+ /* 147 */
+ { { 0x03c8b17a19045dL,0x6205a0a504980bL,0x67fdb3e962b9f0L,
+ 0x16399e01511a4bL,0x44b09fe9dffc96L,0x00a74ff44a1381L,
+ 0x14590deed3f886L },
+ { 0x54e3d5c2a23ddbL,0x310e5138209d28L,0x613f45490c1c9bL,
+ 0x6bbc85d44bbec8L,0x2f85fc559e73f6L,0x0d71fa7d0fa8cbL,
+ 0x2898571d17fbb9L } },
+ /* 148 */
+ { { 0x5607a84335167dL,0x3009c1eb910f91L,0x7ce63447e62d0bL,
+ 0x03a0633afcf89eL,0x1234b5aaa50872L,0x5a307b534d547bL,
+ 0x2f4e97138a952eL },
+ { 0x13914c2db0f658L,0x6cdcb47e6e75baL,0x5549169caca772L,
+ 0x0f20423dfeb16fL,0x6b1ae19d180239L,0x0b7b3bee9b7626L,
+ 0x1ca81adacfe4efL } },
+ /* 149 */
+ { { 0x219ec3ad19d96fL,0x3549f6548132dbL,0x699889c7aacd0bL,
+ 0x74602a58730b19L,0x62dc63bcece81cL,0x316f991c0c317aL,
+ 0x2b8627867b95e3L },
+ { 0x67a25ddced1eedL,0x7e14f0eba756e7L,0x0873fbc09b0495L,
+ 0x0fefb0e16596adL,0x03e6cd98ef39bbL,0x1179b1cded249dL,
+ 0x35c79c1db1edc2L } },
+ /* 150 */
+ { { 0x1368309d4245bfL,0x442e55852a7667L,0x095b0f0f348b65L,
+ 0x6834cf459dfad4L,0x6645950c9be910L,0x06bd81288c71e6L,
+ 0x1b015b6e944edfL },
+ { 0x7a6a83045ab0e3L,0x6afe88b9252ad0L,0x2285bd65523502L,
+ 0x6c78543879a282L,0x1c5e264b5c6393L,0x3a820c6a7453eeL,
+ 0x37562d1d61d3c3L } },
+ /* 151 */
+ { { 0x6c084f62230c72L,0x599490270bc6cfL,0x1d3369ddd3c53dL,
+ 0x516ddb5fac5da0L,0x35ab1e15011b1aL,0x5fba9106d3a180L,
+ 0x3be0f092a0917cL },
+ { 0x57328f9fdc2538L,0x0526323fc8d5f6L,0x10cbb79521e602L,
+ 0x50d01167147ae2L,0x2ec7f1b3cda99eL,0x43073cc736e7beL,
+ 0x1ded89cadd83a6L } },
+ /* 152 */
+ { { 0x1d51bda65d56d5L,0x63f2fd4d2dc056L,0x326413d310ea6dL,
+ 0x3abba5bca92876L,0x6b9aa8bc4d6ebeL,0x1961c687f15d5dL,
+ 0x311cf07464c381L },
+ { 0x2321b1064cd8aeL,0x6e3caac4443850L,0x3346fc4887d2d0L,
+ 0x1640417e0e640fL,0x4a958a52a07a9eL,0x1346a1b1cb374cL,
+ 0x0a793cf79beccbL } },
+ /* 153 */
+ { { 0x29d56cba89aaa5L,0x1581898c0b3c15L,0x1af5b77293c082L,
+ 0x1617ba53a006ceL,0x62dd3b384e475fL,0x71a9820c3f962aL,
+ 0x0e4938920b854eL },
+ { 0x0b8d98849808abL,0x64c14923546de7L,0x6a20883b78a6fcL,
+ 0x72de211428acd6L,0x009678b47915bbL,0x21b5269ae5dae6L,
+ 0x313cc0e60b9457L } },
+ /* 154 */
+ { { 0x69ee421b1de38bL,0x44b484c6cec1c7L,0x0240596c6a8493L,
+ 0x2321a62c85fb9eL,0x7a10921802a341L,0x3d2a95507e45c3L,
+ 0x0752f40f3b6714L },
+ { 0x596a38798751e6L,0x46bf186a0feb85L,0x0b23093e23b49cL,
+ 0x1bfa7bc5afdc07L,0x4ba96f873eefadL,0x292e453fae9e44L,
+ 0x2773646667b75cL } },
+ /* 155 */
+ { { 0x1f81a64e94f22aL,0x3125ee3d8683ddL,0x76a660a13b9582L,
+ 0x5aa584c3640c6eL,0x27cc99fd472953L,0x7048f4d58061d1L,
+ 0x379a1397ac81e8L },
+ { 0x5d1ecd2b6b956bL,0x0829e0366b0697L,0x49548cec502421L,
+ 0x7af5e2f717c059L,0x329a25a0fec54eL,0x028e99e4bcd7f1L,
+ 0x071d5fe81fca78L } },
+ /* 156 */
+ { { 0x4b5c4aeb0fdfe4L,0x1367e11326ce37L,0x7c16f020ef5f19L,
+ 0x3c55303d77b471L,0x23a4457a06e46aL,0x2174426dd98424L,
+ 0x226f592114bd69L },
+ { 0x4411b94455f15aL,0x52e0115381fae4L,0x45b6d8efbc8f7eL,
+ 0x58b1221bd86d26L,0x284fb6f8a7ec1fL,0x045835939ddd30L,
+ 0x0216960accd598L } },
+ /* 157 */
+ { { 0x4b61f9ec1f138aL,0x4460cd1e18502bL,0x277e4fce3c4726L,
+ 0x0244246d6414b9L,0x28fbfcef256984L,0x3347ed0db40577L,
+ 0x3b57fa9e044718L },
+ { 0x4f73bcd6d1c833L,0x2c0d0dcf7f0136L,0x2010ac75454254L,
+ 0x7dc4f6151539a8L,0x0b8929ef6ea495L,0x517e20119d2bdfL,
+ 0x1e29f9a126ba15L } },
+ /* 158 */
+ { { 0x683a7c10470cd8L,0x0d05f0dbe0007fL,0x2f6a5026d649cdL,
+ 0x249ce2fdaed603L,0x116dc1e7a96609L,0x199bd8d82a0b98L,
+ 0x0694ad0219aeb2L },
+ { 0x03a3656e864045L,0x4e552273df82a6L,0x19bcc7553d17abL,
+ 0x74ac536c1df632L,0x440302fb4a86f6L,0x1becec0e31c9feL,
+ 0x002045f8fa46b8L } },
+ /* 159 */
+ { { 0x5833ba384310a2L,0x1db83fad93f8baL,0x0a12713ee2f7edL,
+ 0x40e0f0fdcd2788L,0x1746de5fb239a5L,0x573748965cfa15L,
+ 0x1e3dedda0ef650L },
+ { 0x6c8ca1c87607aeL,0x785dab9554fc0eL,0x649d8f91860ac8L,
+ 0x4436f88b52c0f9L,0x67f22ca8a5e4a3L,0x1f990fd219e4c9L,
+ 0x013dd21c08573fL } },
+ /* 160 */
+ { { 0x05d116141d161cL,0x5c1d2789da2ea5L,0x11f0d861f99f34L,
+ 0x692c2650963153L,0x3bd69f5329539eL,0x215898eef8885fL,
+ 0x041f79dd86f7f1L },
+ { 0x76dcc5e96beebdL,0x7f2b50cb42a332L,0x067621cabef8abL,
+ 0x31e0be607054edL,0x4c67c5e357a3daL,0x5b1a63fbfb1c2bL,
+ 0x3112efbf5e5c31L } },
+ /* 161 */
+ { { 0x3f83e24c0c62f1L,0x51dc9c32aae4e0L,0x2ff89b33b66c78L,
+ 0x21b1c7d354142cL,0x243d8d381c84bcL,0x68729ee50cf4b7L,
+ 0x0ed29e0f442e09L },
+ { 0x1ad7b57576451eL,0x6b2e296d6b91dcL,0x53f2b306e30f42L,
+ 0x3964ebd9ee184aL,0x0a32855df110e4L,0x31f2f90ddae05fL,
+ 0x3410cd04e23702L } },
+ /* 162 */
+ { { 0x60d1522ca8f2feL,0x12909237a83e34L,0x15637f80d58590L,
+ 0x3c72431b6d714dL,0x7c8e59a615bea2L,0x5f977b688ef35aL,
+ 0x071c198c0b3ab0L },
+ { 0x2b54c699699b4bL,0x14da473c2fd0bcL,0x7ba818ea0ad427L,
+ 0x35117013940b2fL,0x6e1df6b5e609dbL,0x3f42502720b64dL,
+ 0x01ee7dc890e524L } },
+ /* 163 */
+ { { 0x12ec1448ff4e49L,0x3e2edac882522bL,0x20455ab300f93aL,
+ 0x5849585bd67c14L,0x0393d5aa34ba8bL,0x30f9a1f2044fa7L,
+ 0x1059c9377a93e0L },
+ { 0x4e641cc0139e73L,0x0d9f23c9b0fa78L,0x4b2ad87e2b83f9L,
+ 0x1c343a9f6d9e3cL,0x1098a4cb46de4dL,0x4ddc893843a41eL,
+ 0x1797f4167d6e3aL } },
+ /* 164 */
+ { { 0x4add4675856031L,0x499bd5e5f7a0ffL,0x39ea1f1202271eL,
+ 0x0ecd7480d7a91eL,0x395f5e5fc10956L,0x0fa7f6b0c9f79bL,
+ 0x2fad4623aed6cbL },
+ { 0x1563c33ae65825L,0x29881cafac827aL,0x50650baf4c45a1L,
+ 0x034aad988fb9e9L,0x20a6224dc5904cL,0x6fb141a990732bL,
+ 0x3ec9ae1b5755deL } },
+ /* 165 */
+ { { 0x3108e7c686ae17L,0x2e73a383b4ad8aL,0x4e6bb142ba4243L,
+ 0x24d355922c1d80L,0x2f850dd9a088baL,0x21c50325dd5e70L,
+ 0x33237dd5bd7fa4L },
+ { 0x7823a39cab7630L,0x1535f71cff830eL,0x70d92ff0599261L,
+ 0x227154d2a2477cL,0x495e9bbb4f871cL,0x40d2034835686bL,
+ 0x31b08f97eaa942L } },
+ /* 166 */
+ { { 0x0016c19034d8ddL,0x68961627cf376fL,0x6acc90681615aeL,
+ 0x6bc7690c2e3204L,0x6ddf28d2fe19a2L,0x609b98f84dae4dL,
+ 0x0f32bfd7c94413L },
+ { 0x7d7edc6b21f843L,0x49bbd2ebbc9872L,0x593d6ada7b6a23L,
+ 0x55736602939e9cL,0x79461537680e39L,0x7a7ee9399ca7cdL,
+ 0x008776f6655effL } },
+ /* 167 */
+ { { 0x64585f777233cfL,0x63ec12854de0f6L,0x6b7f9bbbc3f99dL,
+ 0x301c014b1b55d3L,0x7cf3663bbeb568L,0x24959dcb085bd1L,
+ 0x12366aa6752881L },
+ { 0x77a74c0da5e57aL,0x3279ca93ad939fL,0x33c3c8a1ef08c9L,
+ 0x641b05ab42825eL,0x02f416d7d098dbL,0x7e3d58be292b68L,
+ 0x1864dbc46e1f46L } },
+ /* 168 */
+ { { 0x1da167b8153a9dL,0x47593d07d9e155L,0x386d984e12927fL,
+ 0x421a6f08a60c7cL,0x5ae9661c24dab3L,0x7927b2e7874507L,
+ 0x3266ea80609d53L },
+ { 0x7d198f4c26b1e3L,0x430d4ea2c4048eL,0x58d8ab77e84ba3L,
+ 0x1cb14299c37297L,0x6db6031e8f695cL,0x159bd855e26d55L,
+ 0x3f3f6d318a73ddL } },
+ /* 169 */
+ { { 0x3ee958cca40298L,0x02a7e5eba32ad6L,0x43b4bab96f0e1eL,
+ 0x534be79062b2b1L,0x029ead089b37e3L,0x4d585da558f5aaL,
+ 0x1f9737eb43c376L },
+ { 0x0426dfd9b86202L,0x4162866bc0a9f3L,0x18fc518e7bb465L,
+ 0x6db63380fed812L,0x421e117f709c30L,0x1597f8d0f5cee6L,
+ 0x04ffbf1289b06aL } },
+ /* 170 */
+ { { 0x61a1987ffa0a5fL,0x42058c7fc213c6L,0x15b1d38447d2c9L,
+ 0x3d5f5d7932565eL,0x5db754af445fa7L,0x5d489189fba499L,
+ 0x02c4c55f51141bL },
+ { 0x26b15972e9993dL,0x2fc90bcbd97c45L,0x2ff60f8684b0f1L,
+ 0x1dc641dd339ab0L,0x3e38e6be23f82cL,0x3368162752c817L,
+ 0x19bba80ceb45ceL } },
+ /* 171 */
+ { { 0x7c6e95b4c6c693L,0x6bbc6d5efa7093L,0x74d7f90bf3bf1cL,
+ 0x54d5be1f0299a1L,0x7cb24f0aa427c6L,0x0a18f3e086c941L,
+ 0x058a1c90e4faefL },
+ { 0x3d6bd016927e1eL,0x1da4ce773098b8L,0x2133522e690056L,
+ 0x0751416d3fc37eL,0x1beed1643eda66L,0x5288b6727d5c54L,
+ 0x199320e78655c6L } },
+ /* 172 */
+ { { 0x74575027eeaf94L,0x124bd533c3ceaeL,0x69421ab7a8a1d7L,
+ 0x37f2127e093f3dL,0x40281765252a08L,0x25a228798d856dL,
+ 0x326eca62759c4cL },
+ { 0x0c337c51acb0a5L,0x122ba78c1ef110L,0x02498adbb68dc4L,
+ 0x67240c124b089eL,0x135865d25d9f89L,0x338a76d5ae5670L,
+ 0x03a8efaf130385L } },
+ /* 173 */
+ { { 0x3a450ac5e49beaL,0x282af80bb4b395L,0x6779eb0db1a139L,
+ 0x737cabdd174e55L,0x017b14ca79b5f2L,0x61fdef6048e137L,
+ 0x3acc12641f6277L },
+ { 0x0f730746fe5096L,0x21d05c09d55ea1L,0x64d44bddb1a560L,
+ 0x75e5035c4778deL,0x158b7776613513L,0x7b5efa90c7599eL,
+ 0x2caa0791253b95L } },
+ /* 174 */
+ { { 0x288e5b6d53e6baL,0x435228909d45feL,0x33b4cf23b2a437L,
+ 0x45b352017d6db0L,0x4372d579d6ef32L,0x0fa9e5badbbd84L,
+ 0x3a78cff24759bbL },
+ { 0x0899d2039eab6eL,0x4cf47d2f76bc22L,0x373f739a3a8c69L,
+ 0x09beaa5b1000b3L,0x0acdfbe83ebae5L,0x10c10befb0e900L,
+ 0x33d2ac4cc31be3L } },
+ /* 175 */
+ { { 0x765845931e08fbL,0x2a3c2a0dc58007L,0x7270da587d90e1L,
+ 0x1ee648b2bc8f86L,0x5d2ca68107b29eL,0x2b7064846e9e92L,
+ 0x3633ed98dbb962L },
+ { 0x5e0f16a0349b1bL,0x58d8941f570ca4L,0x20abe376a4cf34L,
+ 0x0f4bd69a360977L,0x21eb07cc424ba7L,0x720d2ecdbbe6ecL,
+ 0x255597d5a97c34L } },
+ /* 176 */
+ { { 0x67bbf21a0f5e94L,0x422a3b05a64fc1L,0x773ac447ebddc7L,
+ 0x1a1331c08019f1L,0x01ef6d269744ddL,0x55f7be5b3b401aL,
+ 0x072e031c681273L },
+ { 0x7183289e21c677L,0x5e0a3391f3162fL,0x5e02d9e65d914aL,
+ 0x07c79ea1adce2fL,0x667ca5c2e1cbe4L,0x4f287f22caccdaL,
+ 0x27eaa81673e75bL } },
+ /* 177 */
+ { { 0x5246180a078fe6L,0x67cc8c9fa3bb15L,0x370f8dd123db31L,
+ 0x1938dafa69671aL,0x5af72624950c5eL,0x78cc5221ebddf8L,
+ 0x22d616fe2a84caL },
+ { 0x723985a839327fL,0x24fa95584a5e22L,0x3d8a5b3138d38bL,
+ 0x3829ef4a017acfL,0x4f09b00ae055c4L,0x01df84552e4516L,
+ 0x2a7a18993e8306L } },
+ /* 178 */
+ { { 0x7b6224bc310eccL,0x69e2cff429da16L,0x01c850e5722869L,
+ 0x2e4889443ee84bL,0x264a8df1b3d09fL,0x18a73fe478d0d6L,
+ 0x370b52740f9635L },
+ { 0x52b7d3a9d6f501L,0x5c49808129ee42L,0x5b64e2643fd30cL,
+ 0x27d903fe31b32cL,0x594cb084d078f9L,0x567fb33e3ae650L,
+ 0x0db7be9932cb65L } },
+ /* 179 */
+ { { 0x19b78113ed7cbeL,0x002b2f097a1c8cL,0x70b1dc17fa5794L,
+ 0x786e8419519128L,0x1a45ba376af995L,0x4f6aa84b8d806cL,
+ 0x204b4b3bc7ca47L },
+ { 0x7581a05fd94972L,0x1c73cadb870799L,0x758f6fefc09b88L,
+ 0x35c62ba8049b42L,0x6f5e71fc164cc3L,0x0cd738b5702721L,
+ 0x10021afac9a423L } },
+ /* 180 */
+ { { 0x654f7937e3c115L,0x5d198288b515cbL,0x4add965c25a6e3L,
+ 0x5a37df33cd76ffL,0x57bb7e288e1631L,0x049b69089e1a31L,
+ 0x383a88f4122a99L },
+ { 0x4c0e4ef3d80a73L,0x553c77ac9f30e2L,0x20bb18c2021e82L,
+ 0x2aec0d1c4225c5L,0x397fce0ac9c302L,0x2ab0c2a246e8aaL,
+ 0x02e5e5190be080L } },
+ /* 181 */
+ { { 0x7a255a4ae03080L,0x0d68b01513f624L,0x29905bd4e48c8cL,
+ 0x1d81507027466bL,0x1684aaeb70dee1L,0x7dd460719f0981L,
+ 0x29c43b0f0a390cL },
+ { 0x272567681b1f7dL,0x1d2a5f8502e0efL,0x0fd5cd6b221befL,
+ 0x5eb4749e9a0434L,0x7d1553a324e2a6L,0x2eefd8e86a7804L,
+ 0x2ad80d5335109cL } },
+ /* 182 */
+ { { 0x25342aef4c209dL,0x24e811ac4e0865L,0x3f209757f8ae9dL,
+ 0x1473ff8a5da57bL,0x340f61c3919cedL,0x7523bf85fb9bc0L,
+ 0x319602ebca7cceL },
+ { 0x121e7541d442cbL,0x4ffa748e49c95cL,0x11493cd1d131dcL,
+ 0x42b215172ab6b5L,0x045fd87e13cc77L,0x0ae305df76342fL,
+ 0x373b033c538512L } },
+ /* 183 */
+ { { 0x389541e9539819L,0x769f3b29b7e239L,0x0d05f695e3232cL,
+ 0x029d04f0e9a9fbL,0x58b78b7a697fb8L,0x7531b082e6386bL,
+ 0x215d235bed95a9L },
+ { 0x503947c1859c5dL,0x4b82a6ba45443fL,0x78328eab71b3a5L,
+ 0x7d8a77f8cb3509L,0x53fcd9802e41d4L,0x77552091976edbL,
+ 0x226c60ad7a5156L } },
+ /* 184 */
+ { { 0x77ad6a43360710L,0x0fdeabd326d7aeL,0x4012886c92104aL,
+ 0x2d6c378dd7ae33L,0x7e72ef2c0725f3L,0x4a4671f4ca18e0L,
+ 0x0afe3b4bb6220fL },
+ { 0x212cf4b56e0d6aL,0x7c24d086521960L,0x0662cf71bd414dL,
+ 0x1085b916c58c25L,0x781eed2be9a350L,0x26880e80db6ab2L,
+ 0x169e356442f061L } },
+ /* 185 */
+ { { 0x57aa2ad748b02cL,0x68a34256772a9aL,0x1591c44962f96cL,
+ 0x110a9edd6e53d2L,0x31eab597e091a3L,0x603e64e200c65dL,
+ 0x2f66b72e8a1cfcL },
+ { 0x5c79d138543f7fL,0x412524363fdfa3L,0x547977e3b40008L,
+ 0x735ca25436d9f7L,0x232b4888cae049L,0x27ce37a53d8f23L,
+ 0x34d45881a9b470L } },
+ /* 186 */
+ { { 0x76b95255924f43L,0x035c9f3bd1aa5dL,0x5eb71a010b4bd0L,
+ 0x6ce8dda7e39f46L,0x35679627ea70c0L,0x5c987767c7d77eL,
+ 0x1fa28952b620b7L },
+ { 0x106f50b5924407L,0x1cc3435a889411L,0x0597cdce3bc528L,
+ 0x738f8b0d5077d1L,0x5894dd60c7dd6aL,0x0013d0721f5e2eL,
+ 0x344573480527d3L } },
+ /* 187 */
+ { { 0x2e2c1da52abf77L,0x394aa8464ad05eL,0x095259b7330a83L,
+ 0x686e81cf6a11f5L,0x405c7e48c93c7cL,0x65c3ca9444a2ecL,
+ 0x07bed6c59c3563L },
+ { 0x51f9d994fb1471L,0x3c3ecfa5283b4eL,0x494dccda63f6ccL,
+ 0x4d07b255363a75L,0x0d2b6d3155d118L,0x3c688299fc9497L,
+ 0x235692fa3dea3aL } },
+ /* 188 */
+ { { 0x16b4d452669e98L,0x72451fa85406b9L,0x674a145d39151fL,
+ 0x325ffd067ae098L,0x527e7805cd1ae0L,0x422a1d1789e48dL,
+ 0x3e27be63f55e07L },
+ { 0x7f95f6dee0b63fL,0x008e444cc74969L,0x01348f3a72b614L,
+ 0x000cfac81348c3L,0x508ae3e5309ce5L,0x2584fcdee44d34L,
+ 0x3a4dd994899ee9L } },
+ /* 189 */
+ { { 0x4d289cc0368708L,0x0e5ebc60dc3b40L,0x78cc44bfab1162L,
+ 0x77ef2173b7d11eL,0x06091718e39746L,0x30fe19319b83a4L,
+ 0x17e8f2988529c6L },
+ { 0x68188bdcaa9f2aL,0x0e64b1350c1bddL,0x5b18ebac7cc4b3L,
+ 0x75315a9fcc046eL,0x36e9770fd43db4L,0x54c5857fc69121L,
+ 0x0417e18f3e909aL } },
+ /* 190 */
+ { { 0x29795db38059adL,0x6efd20c8fd4016L,0x3b6d1ce8f95a1aL,
+ 0x4db68f177f8238L,0x14ec7278d2340fL,0x47bd77ff2b77abL,
+ 0x3d2dc8cd34e9fcL },
+ { 0x285980a5a83f0bL,0x08352e2d516654L,0x74894460481e1bL,
+ 0x17f6f3709c480dL,0x6b590d1b55221eL,0x45c100dc4c9be9L,
+ 0x1b13225f9d8b91L } },
+ /* 191 */
+ { { 0x0b905fb4b41d9dL,0x48cc8a474cb7a2L,0x4eda67e8de09b2L,
+ 0x1de47c829adde8L,0x118ad5b9933d77L,0x7a12665ac3f9a4L,
+ 0x05631a4fb52997L },
+ { 0x5fb2a8e6806e63L,0x27d96bbcca369bL,0x46066f1a6b8c7bL,
+ 0x63b58fc7ca3072L,0x170a36229c0d62L,0x57176f1e463203L,
+ 0x0c7ce083e73b9cL } },
+ /* 192 */
+ { { 0x31caf2c09e1c72L,0x6530253219e9d2L,0x7650c98b601c57L,
+ 0x182469f99d56c0L,0x415f65d292b7a7L,0x30f62a55549b8eL,
+ 0x30f443f643f465L },
+ { 0x6b35c575ddadd0L,0x14a23cf6d299eeL,0x2f0198c0967d7dL,
+ 0x1013058178d5bfL,0x39da601c9cc879L,0x09d8963ec340baL,
+ 0x1b735db13ad2a7L } },
+ /* 193 */
+ { { 0x20916ffdc83f01L,0x16892aa7c9f217L,0x6bff179888d532L,
+ 0x4adf3c3d366288L,0x41a62b954726aeL,0x3139609022aeb6L,
+ 0x3e8ab9b37aff7aL },
+ { 0x76bbc70f24659aL,0x33fa98513886c6L,0x13b26af62c4ea6L,
+ 0x3c4d5826389a0cL,0x526ec28c02bf6aL,0x751ff083d79a7cL,
+ 0x110ac647990224L } },
+ /* 194 */
+ { { 0x2c6c62fa2b6e20L,0x3d37edad30c299L,0x6ef25b44b65fcaL,
+ 0x7470846914558eL,0x712456eb913275L,0x075a967a9a280eL,
+ 0x186c8188f2a2a0L },
+ { 0x2f3b41a6a560b1L,0x3a8070b3f9e858L,0x140936ff0e1e78L,
+ 0x5fd298abe6da8aL,0x3823a55d08f153L,0x3445eafaee7552L,
+ 0x2a5fc96731a8b2L } },
+ /* 195 */
+ { { 0x06317be58edbbbL,0x4a38f3bfbe2786L,0x445b60f75896b7L,
+ 0x6ec7c92b5adf57L,0x07b6be8038a441L,0x1bcfe002879655L,
+ 0x2a2174037d6d0eL },
+ { 0x776790cf9e48bdL,0x73e14a2c4ed1d3L,0x7eb5ed5f2fc2f7L,
+ 0x3e0aedb821b384L,0x0ee3b7e151c12fL,0x51a6a29e044bb2L,
+ 0x0ba13a00cb0d86L } },
+ /* 196 */
+ { { 0x77607d563ec8d8L,0x023fc726996e44L,0x6bd63f577a9986L,
+ 0x114a6351e53973L,0x3efe97989da046L,0x1051166e117ed7L,
+ 0x0354933dd4fb5fL },
+ { 0x7699ca2f30c073L,0x4c973b83b9e6d3L,0x2017c2abdbc3e8L,
+ 0x0cdcdd7a26522bL,0x511070f5b23c7dL,0x70672327e83d57L,
+ 0x278f842b4a9f26L } },
+ /* 197 */
+ { { 0x0824f0d4ae972fL,0x60578dd08dcf52L,0x48a74858290fbbL,
+ 0x7302748bf23030L,0x184b229a178acfL,0x3e8460ade089d6L,
+ 0x13f2b557fad533L },
+ { 0x7f96f3ae728d15L,0x018d8d40066341L,0x01fb94955a289aL,
+ 0x2d32ed6afc2657L,0x23f4f5e462c3acL,0x60eba5703bfc5aL,
+ 0x1b91cc06f16c7aL } },
+ /* 198 */
+ { { 0x411d68af8219b9L,0x79cca36320f4eeL,0x5c404e0ed72e20L,
+ 0x417cb8692e43f2L,0x305d29c7d98599L,0x3b754d5794a230L,
+ 0x1c97fb4be404e9L },
+ { 0x7cdbafababd109L,0x1ead0eb0ca5090L,0x1a2b56095303e3L,
+ 0x75dea935012c8fL,0x67e31c071b1d1dL,0x7c324fbfd172c3L,
+ 0x157e257e6498f7L } },
+ /* 199 */
+ { { 0x19b00db175645bL,0x4c4f6cb69725f1L,0x36d9ce67bd47ceL,
+ 0x2005e105179d64L,0x7b952e717867feL,0x3c28599204032cL,
+ 0x0f5659d44fb347L },
+ { 0x1ebcdedb979775L,0x4378d45cfd11a8L,0x14c85413ca66e9L,
+ 0x3dd17d681c8a4dL,0x58368e7dc23142L,0x14f3eaac6116afL,
+ 0x0adb45b255f6a0L } },
+ /* 200 */
+ { { 0x2f5e76279ad982L,0x125b3917034d09L,0x3839a6399e6ed3L,
+ 0x32fe0b3ebcd6a2L,0x24ccce8be90482L,0x467e26befcc187L,
+ 0x2828434e2e218eL },
+ { 0x17247cd386efd9L,0x27f36a468d85c3L,0x65e181ef203bbfL,
+ 0x0433a6761120afL,0x1d607a2a8f8625L,0x49f4e55a13d919L,
+ 0x3367c3b7943e9dL } },
+ /* 201 */
+ { { 0x3391c7d1a46d4dL,0x38233d602d260cL,0x02127a0f78b7d4L,
+ 0x56841c162c24c0L,0x4273648fd09aa8L,0x019480bb0e754eL,
+ 0x3b927987b87e58L },
+ { 0x6676be48c76f73L,0x01ec024e9655aeL,0x720fe1c6376704L,
+ 0x17e06b98885db3L,0x656adec85a4200L,0x73780893c3ce88L,
+ 0x0a339cdd8df664L } },
+ /* 202 */
+ { { 0x69af7244544ac7L,0x31ab7402084d2fL,0x67eceb7ef7cb19L,
+ 0x16f8583b996f61L,0x1e208d12faf91aL,0x4a91584ce4a42eL,
+ 0x3e08337216c93eL },
+ { 0x7a6eea94f4cf77L,0x07a52894678c60L,0x302dd06b14631eL,
+ 0x7fddb7225c9ceaL,0x55e441d7acd153L,0x2a00d4490b0f44L,
+ 0x053ef125338cdbL } },
+ /* 203 */
+ { { 0x120c0c51584e3cL,0x78b3efca804f37L,0x662108aefb1dccL,
+ 0x11deb55f126709L,0x66def11ada8125L,0x05bbc0d1001711L,
+ 0x1ee1c99c7fa316L },
+ { 0x746f287de53510L,0x1733ef2e32d09cL,0x1df64a2b0924beL,
+ 0x19758da8f6405eL,0x28f6eb3913e484L,0x7175a1090cc640L,
+ 0x048aee0d63f0bcL } },
+ /* 204 */
+ { { 0x1f3b1e3b0b29c3L,0x48649f4882a215L,0x485eca3a9e0dedL,
+ 0x4228ba85cc82e4L,0x36da1f39bc9379L,0x1659a7078499d1L,
+ 0x0a67d5f6c04188L },
+ { 0x6ac39658afdce3L,0x0d667a0bde8ef6L,0x0ae6ec0bfe8548L,
+ 0x6d9cb2650571bfL,0x54bea107760ab9L,0x705c53bd340cf2L,
+ 0x111a86b610c70fL } },
+ /* 205 */
+ { { 0x7ecea05c6b8195L,0x4f8be93ce3738dL,0x305de9eb9f5d12L,
+ 0x2c3b9d3d474b56L,0x673691a05746c3L,0x2e3482c428c6eaL,
+ 0x2a8085fde1f472L },
+ { 0x69d15877fd3226L,0x4609c9ec017cc3L,0x71e9b7fc1c3dbcL,
+ 0x4f8951254e2675L,0x63ee9d15afa010L,0x0f05775b645190L,
+ 0x28a0a439397ae3L } },
+ /* 206 */
+ { { 0x387fa03e9de330L,0x40cc32b828b6abL,0x02a482fbc04ac9L,
+ 0x68cad6e70429b7L,0x741877bff6f2c4L,0x48efe633d3b28bL,
+ 0x3e612218fe24b3L },
+ { 0x6fc1d34fe37657L,0x3d04b9e1c8b5a1L,0x6a2c332ef8f163L,
+ 0x7ca97e2b135690L,0x37357d2a31208aL,0x29f02f2332bd68L,
+ 0x17c674c3e63a57L } },
+ /* 207 */
+ { { 0x683d9a0e6865bbL,0x5e77ec68ad4ce5L,0x4d18f236788bd6L,
+ 0x7f34b87204f4e3L,0x391ca40e9e578dL,0x3470ed6ddf4e23L,
+ 0x225544b3e50989L },
+ { 0x48eda8cb4e462bL,0x2a948825cf9109L,0x473adedc7e1300L,
+ 0x37b843b82192edL,0x2b9ac1537dde36L,0x4efe7412732332L,
+ 0x29cc5981b5262bL } },
+ /* 208 */
+ { { 0x190d2fcad260f5L,0x7c53dd81d18027L,0x003def5f55db0eL,
+ 0x7f5ed25bee2df7L,0x2b87e9be167d2eL,0x2b999c7bbcd224L,
+ 0x1d68a2c260ad50L },
+ { 0x010bcde84607a6L,0x0250de9b7e1bedL,0x746d36bfaf1b56L,
+ 0x3359475ff56abbL,0x7e84b9bc440b20L,0x2eaa7e3b52f162L,
+ 0x01165412f36a69L } },
+ /* 209 */
+ { { 0x639a02329e5836L,0x7aa3ee2e4d3a27L,0x5bc9b258ecb279L,
+ 0x4cb3dfae2d62c6L,0x08d9d3b0c6c437L,0x5a2c177d47eab2L,
+ 0x36120479fc1f26L },
+ { 0x7609a75bd20e4aL,0x3ba414e17551fcL,0x42cd800e1b90c9L,
+ 0x04921811b88f9bL,0x4443697f9562fdL,0x3a8081b8186959L,
+ 0x3f5b5c97379e73L } },
+ /* 210 */
+ { { 0x6fd0e3cf13eafbL,0x3976b5415cbf67L,0x4de40889e48402L,
+ 0x17e4d36f24062aL,0x16ae7755cf334bL,0x2730ac94b7e0e1L,
+ 0x377592742f48e0L },
+ { 0x5e10b18a045041L,0x682792afaae5a1L,0x19383ec971b816L,
+ 0x208b17dae2ffc0L,0x439f9d933179b6L,0x55485a9090bcaeL,
+ 0x1c316f42a2a35cL } },
+ /* 211 */
+ { { 0x67173897bdf646L,0x0b6956653ef94eL,0x5be3c97f7ea852L,
+ 0x3110c12671f08eL,0x2474076a3fc7ecL,0x53408be503fe72L,
+ 0x09155f53a5b44eL },
+ { 0x5c804bdd4c27cdL,0x61e81eb8ffd50eL,0x2f7157fdf84717L,
+ 0x081f880d646440L,0x7aa892acddec51L,0x6ae70683443f33L,
+ 0x31ed9e8b33a75aL } },
+ /* 212 */
+ { { 0x0d724f8e357586L,0x1febbec91b4134L,0x6ff7b98a9475fdL,
+ 0x1c4d9b94e1f364L,0x2b8790499cef00L,0x42fd2080a1b31dL,
+ 0x3a3bbc6d9b0145L },
+ { 0x75bfebc37e3ca9L,0x28db49c1723bd7L,0x50b12fa8a1f17aL,
+ 0x733d95bbc84b98L,0x45ede81f6c109eL,0x18f5e46fb37b5fL,
+ 0x34b980804aaec1L } },
+ /* 213 */
+ { { 0x56060c8a4f57bfL,0x0d2dfe223054c2L,0x718a5bbc03e5d6L,
+ 0x7b3344cc19b3b9L,0x4d11c9c054bcefL,0x1f5ad422c22e33L,
+ 0x2609299076f86bL },
+ { 0x7b7a5fba89fd01L,0x7013113ef3b016L,0x23d5e0a173e34eL,
+ 0x736c14462f0f50L,0x1ef5f7ac74536aL,0x4baba6f4400ea4L,
+ 0x17b310612c9828L } },
+ /* 214 */
+ { { 0x4ebb19a708c8d3L,0x209f8c7f03d9bbL,0x00461cfe5798fbL,
+ 0x4f93b6ae822fadL,0x2e5b33b5ad5447L,0x40b024e547a84bL,
+ 0x22ffad40443385L },
+ { 0x33809c888228bfL,0x559f655fefbe84L,0x0032f529fd2f60L,
+ 0x5a2191ece3478cL,0x5b957fcd771246L,0x6fec181f9ed123L,
+ 0x33eed3624136a3L } },
+ /* 215 */
+ { { 0x6a5df93b26139aL,0x55076598fd7134L,0x356a592f34f81dL,
+ 0x493c6b5a3d4741L,0x435498a4e2a39bL,0x2cd26a0d931c88L,
+ 0x01925ea3fc7835L },
+ { 0x6e8d992b1efa05L,0x79508a727c667bL,0x5f3c15e6b4b698L,
+ 0x11b6c755257b93L,0x617f5af4b46393L,0x248d995b2b6656L,
+ 0x339db62e2e22ecL } },
+ /* 216 */
+ { { 0x52537a083843dcL,0x6a283c82a768c7L,0x13aa6bf25227acL,
+ 0x768d76ba8baf5eL,0x682977a6525808L,0x67ace52ac23b0bL,
+ 0x2374b5a2ed612dL },
+ { 0x7139e60133c3a4L,0x715697a4f1d446L,0x4b018bf36677a0L,
+ 0x1dd43837414d83L,0x505ec70730d4f6L,0x09ac100907fa79L,
+ 0x21caad6e03217eL } },
+ /* 217 */
+ { { 0x0776d3999d4d49L,0x33bdd87e8bcff8L,0x1036b87f068fadL,
+ 0x0a9b8ffde4c872L,0x7ab2533596b1eaL,0x305a88fb965378L,
+ 0x3356d8fa4d65e5L },
+ { 0x3366fa77d1ff11L,0x1e0bdbdcd2075cL,0x46910cefc967caL,
+ 0x7ce700737a1ff6L,0x1c5dc15409c9bdL,0x368436b9bdb595L,
+ 0x3e7ccd6560b5efL } },
+ /* 218 */
+ { { 0x1443789422c792L,0x524792b1717f2bL,0x1f7c1d95048e7aL,
+ 0x5cfe2a225b0d12L,0x245594d29ce85bL,0x20134d254ce168L,
+ 0x1b83296803921aL },
+ { 0x79a78285b3beceL,0x3c738c3f3124d6L,0x6ab9d1fe0907cdL,
+ 0x0652ceb7fc104cL,0x06b5f58c8ae3fdL,0x486959261c5328L,
+ 0x0b3813ae677c90L } },
+ /* 219 */
+ { { 0x66b9941ac37b82L,0x651a4b609b0686L,0x046711edf3fc31L,
+ 0x77f89f38faa89bL,0x2683ddbf2d5edbL,0x389ef1dfaa3c25L,
+ 0x20b3616e66273eL },
+ { 0x3c6db6e0cb5d37L,0x5d7ae5dc342bc4L,0x74a1dc6c52062bL,
+ 0x6f7c0bec109557L,0x5c51f7bc221d91L,0x0d7b5880745288L,
+ 0x1c46c145c4b0ddL } },
+ /* 220 */
+ { { 0x59ed485ea99eccL,0x201b71956bc21dL,0x72d5c32f73de65L,
+ 0x1aefd76547643eL,0x580a452cfb2c2dL,0x7cb1a63f5c4dc9L,
+ 0x39a8df727737aaL },
+ { 0x365a341deca452L,0x714a1ad1689cbaL,0x16981d12c42697L,
+ 0x5a124f4ac91c75L,0x1b2e3f2fedc0dbL,0x4a1c72b8e9d521L,
+ 0x3855b4694e4e20L } },
+ /* 221 */
+ { { 0x16b3d047181ae9L,0x17508832f011afL,0x50d33cfeb2ebd1L,
+ 0x1deae237349984L,0x147c641aa6adecL,0x24a9fb4ebb1ddbL,
+ 0x2b367504a7a969L },
+ { 0x4c55a3d430301bL,0x379ef6a5d492cbL,0x3c56541fc0f269L,
+ 0x73a546e91698ceL,0x2c2b62ee0b9b5dL,0x6284184d43d0efL,
+ 0x0e1f5cf6a4b9f0L } },
+ /* 222 */
+ { { 0x44833e8cd3fdacL,0x28e6665cb71c27L,0x2f8bf87f4ddbf3L,
+ 0x6cc6c767fb38daL,0x3bc114d734e8b5L,0x12963d5a78ca29L,
+ 0x34532a161ece41L },
+ { 0x2443af5d2d37e9L,0x54e6008c8c452bL,0x2c55d54111cf1bL,
+ 0x55ac7f7522575aL,0x00a6fba3f8575fL,0x3f92ef3b793b8dL,
+ 0x387b97d69ecdf7L } },
+ /* 223 */
+ { { 0x0b464812d29f46L,0x36161daa626f9aL,0x5202fbdb264ca5L,
+ 0x21245805ff1304L,0x7f9c4a65657885L,0x542d3887f9501cL,
+ 0x086420deef8507L },
+ { 0x5e159aa1b26cfbL,0x3f0ef5ffd0a50eL,0x364b29663a432aL,
+ 0x49c56888af32a8L,0x6f937e3e0945d1L,0x3cbdeec6d766cdL,
+ 0x2d80d342ece61aL } },
+ /* 224 */
+ { { 0x255e3026d8356eL,0x4ddba628c4de9aL,0x074323b593e0d9L,
+ 0x333bdb0a10eefbL,0x318b396e473c52L,0x6ebb5a95efd3d3L,
+ 0x3f3bff52aa4e4fL },
+ { 0x3138a111c731d5L,0x674365e283b308L,0x5585edd9c416f2L,
+ 0x466763d9070fd4L,0x1b568befce8128L,0x16eb040e7b921eL,
+ 0x3d5c898687c157L } },
+ /* 225 */
+ { { 0x14827736973088L,0x4e110d53f301e6L,0x1f811b09870023L,
+ 0x53b5e500dbcacaL,0x4ddf0df1e6a7dcL,0x1e9575fb10ce35L,
+ 0x3fdc153644d936L },
+ { 0x763547e2260594L,0x26e5ae764efc59L,0x13be6f4d791a29L,
+ 0x2021e61e3a0cf1L,0x339cd2b4a1c202L,0x5c7451e08f5121L,
+ 0x3728b3a851be68L } },
+ /* 226 */
+ { { 0x78873653277538L,0x444b9ed2ee7156L,0x79ac8b8b069cd3L,
+ 0x5f0e90933770e8L,0x307662c615389eL,0x40fe6d95a80057L,
+ 0x04822170cf993cL },
+ { 0x677d5690fbfec2L,0x0355af4ae95cb3L,0x417411794fe79eL,
+ 0x48daf87400a085L,0x33521d3b5f0aaaL,0x53567a3be00ff7L,
+ 0x04712ccfb1cafbL } },
+ /* 227 */
+ { { 0x2b983283c3a7f3L,0x579f11b146a9a6L,0x1143d3b16a020eL,
+ 0x20f1483ef58b20L,0x3f03e18d747f06L,0x3129d12f15de37L,
+ 0x24c911f7222833L },
+ { 0x1e0febcf3d5897L,0x505e26c01cdaacL,0x4f45a9adcff0e9L,
+ 0x14dfac063c5cebL,0x69e5ce713fededL,0x3481444a44611aL,
+ 0x0ea49295c7fdffL } },
+ /* 228 */
+ { { 0x64554cb4093beeL,0x344b4b18dd81f6L,0x350f43b4de9b59L,
+ 0x28a96a220934caL,0x4aa8da5689a515L,0x27171cbd518509L,
+ 0x0cfc1753f47c95L },
+ { 0x7dfe091b615d6eL,0x7d1ee0aa0fb5c1L,0x145eef3200b7b5L,
+ 0x33fe88feeab18fL,0x1d62d4f87453e2L,0x43b8db4e47fff1L,
+ 0x1572f2b8b8f368L } },
+ /* 229 */
+ { { 0x6bc94e6b4e84f3L,0x60629dee586a66L,0x3bbad5fe65ca18L,
+ 0x217670db6c2fefL,0x0320a7f4e3272aL,0x3ccff0d976a6deL,
+ 0x3c26da8ae48cccL },
+ { 0x53ecf156778435L,0x7533064765a443L,0x6c5c12f03ca5deL,
+ 0x44f8245350dabfL,0x342cdd777cf8b3L,0x2b539c42e9f58dL,
+ 0x10138affc279b1L } },
+ /* 230 */
+ { { 0x1b135e204c5ddbL,0x40887dfeaa1d37L,0x7fb0ef83da76ffL,
+ 0x521f2b79af55a5L,0x3f9b38b4c3f0d0L,0x20a9838cce61ceL,
+ 0x24bb4e2f4b1e32L },
+ { 0x003f6aa386e27cL,0x68df59db0a0f8eL,0x21677d5192e713L,
+ 0x14ab9757501276L,0x411944af961524L,0x3184f39abc5c3fL,
+ 0x2a8dda80ca078dL } },
+ /* 231 */
+ { { 0x0592233cdbc95cL,0x54d5de5c66f40fL,0x351caa1512ab86L,
+ 0x681bdbee020084L,0x6ee2480c853e68L,0x6a5a44262b918fL,
+ 0x06574e15a3b91dL },
+ { 0x31ba03dacd7fbeL,0x0c3da7c18a57a9L,0x49aaaded492d6bL,
+ 0x3071ff53469e02L,0x5efb4f0d7248c6L,0x6db5fb67f12628L,
+ 0x29cff668e3d024L } },
+ /* 232 */
+ { { 0x1b9ef3bb1b17ceL,0x6ccf8c24fe6312L,0x34c15487f45008L,
+ 0x1a84044095972cL,0x515073a47e449eL,0x2ddc93f9097feeL,
+ 0x1008fdc894c434L },
+ { 0x08e5edb73399faL,0x65b1aa65547d4cL,0x3a117a1057c498L,
+ 0x7e16c3089d13acL,0x502f2ae4b6f851L,0x57a70f3eb62673L,
+ 0x111b48a9a03667L } },
+ /* 233 */
+ { { 0x5023024be164f1L,0x25ad117032401eL,0x46612b3bfe3427L,
+ 0x2f4f406a8a02b7L,0x16a93a5c4ddf07L,0x7ee71968fcdbe9L,
+ 0x2267875ace37daL },
+ { 0x687e88b59eb2a6L,0x3ac7368fe716d3L,0x28d953a554a036L,
+ 0x34d52c0acca08fL,0x742a7cf8dd4fd9L,0x10bfeb8575ea60L,
+ 0x290e454d868dccL } },
+ /* 234 */
+ { { 0x4e72a3a8a4bdd2L,0x1ba36d1dee04d5L,0x7a43136b63195bL,
+ 0x6ca8e286a519f3L,0x568e64aece08a9L,0x571d5000b5c10bL,
+ 0x3f75e9f5dbdd40L },
+ { 0x6fb0a698d6fa45L,0x0ce42209d7199cL,0x1f68275f708a3eL,
+ 0x5749832e91ec3cL,0x6c3665521428b2L,0x14b2bf5747bd4aL,
+ 0x3b6f940e42a22bL } },
+ /* 235 */
+ { { 0x4da0adbfb26c82L,0x16792a585f39acL,0x17df9dfda3975cL,
+ 0x4796b4afaf479bL,0x67be67234e0020L,0x69df5f201dda25L,
+ 0x09f71a4d12b3dcL },
+ { 0x64ff5ec260a46aL,0x579c5b86385101L,0x4f29a7d549f697L,
+ 0x4e64261242e2ebL,0x54ecacdfb6b296L,0x46e0638b5fddadL,
+ 0x31eefd3208891dL } },
+ /* 236 */
+ { { 0x5b72c749fe01b2L,0x230cf27523713aL,0x533d1810e0d1e1L,
+ 0x5590db7d1dd1e2L,0x7b8ab73e8e43d3L,0x4c8a19bd1c17caL,
+ 0x19222ce9f74810L },
+ { 0x6398b3dddc4582L,0x0352b7d88dfd53L,0x3c55b4e10c5a63L,
+ 0x38194d13f8a237L,0x106683fd25dd87L,0x59e0b62443458eL,
+ 0x196cb70aa9cbb9L } },
+ /* 237 */
+ { { 0x2885f7cd021d63L,0x162bfd4c3e1043L,0x77173dcf98fcd1L,
+ 0x13d4591d6add36L,0x59311154d0d8f2L,0x74336e86e79b8aL,
+ 0x13faadc5661883L },
+ { 0x18938e7d9ec924L,0x14bcda8fcaa0a1L,0x706d85d41a1355L,
+ 0x0ac34520d168deL,0x5a92499fe17826L,0x36c2e3b4f00600L,
+ 0x29c2fd7b5f63deL } },
+ /* 238 */
+ { { 0x41250dfe2216c5L,0x44a0ec0366a217L,0x575bc1adf8b0dfL,
+ 0x5ff5cdbdb1800bL,0x7843d4dde8ca18L,0x5fa9e420865705L,
+ 0x235c38be6c6b02L },
+ { 0x473b78aae91abbL,0x39470c6051e44bL,0x3f973cc2dc08c3L,
+ 0x2837932c5c91f6L,0x25e39ed754ec25L,0x1371c837118e53L,
+ 0x3b99f3b0aeafe2L } },
+ /* 239 */
+ { { 0x03acf51be46c65L,0x271fceacbaf5c3L,0x476589ed3a5e25L,
+ 0x78ec8c3c3c399cL,0x1f5c8bf4ac4c19L,0x730bb733ec68d2L,
+ 0x29a37e00dd287eL },
+ { 0x448ed1bf92b5faL,0x10827c17b86478L,0x55e6fc05b28263L,
+ 0x0af1226c73a66aL,0x0b66e5df0d09c1L,0x26128315a02682L,
+ 0x22d84932c5e808L } },
+ /* 240 */
+ { { 0x5ec3afc26e3392L,0x08e142e45c0084L,0x4388d5ad0f01feL,
+ 0x0f7acd36e6140cL,0x028c14ed97dffbL,0x311845675a38c6L,
+ 0x01c1c8f09a3062L },
+ { 0x5a302f4cf49e7dL,0x79267e254a44e1L,0x746165052317a1L,
+ 0x53a09263a566e8L,0x7d478ad5f73abcL,0x187ce5c947dad3L,
+ 0x18564e1a1ec45fL } },
+ /* 241 */
+ { { 0x7b9577a9aa0486L,0x766b40c7aaaef6L,0x1f6a411f5db907L,
+ 0x4543dd4d80beaeL,0x0ad938c7482806L,0x451568bf4b9be1L,
+ 0x3367ec85d30a22L },
+ { 0x5446425747843dL,0x18d94ac223c6b2L,0x052ff3a354d359L,
+ 0x0b4933f89723f5L,0x03fb517740e056L,0x226b892871dddaL,
+ 0x2768c2b753f0fdL } },
+ /* 242 */
+ { { 0x685282ccfa5200L,0x411ed433627b89L,0x77d5c9b8bc9c1dL,
+ 0x4a13ef2ee5cd29L,0x5582a612407c9eL,0x2307cb42fc3aa9L,
+ 0x2e661df79956b8L },
+ { 0x0e972b015254deL,0x5b63e14def8adeL,0x06995be2ca4a95L,
+ 0x6cc0cc1e94bf27L,0x7ed8499fe0052aL,0x671a6ca5a5e0f9L,
+ 0x31e10d4ba10f05L } },
+ /* 243 */
+ { { 0x690af07e9b2d8aL,0x6030af9e32c8ddL,0x45c7ca3bf2b235L,
+ 0x40959077b76c81L,0x61eee7f70d5a96L,0x6b04f6aafe9e38L,
+ 0x3c726f55f1898dL },
+ { 0x77d0142a1a6194L,0x1c1631215708b9L,0x403a4f0a9b7585L,
+ 0x066c8e29f7cef0L,0x6fc32f98cf575eL,0x518a09d818c297L,
+ 0x34144e99989e75L } },
+ /* 244 */
+ { { 0x6adbada859fb6aL,0x0dcfb6506ccd51L,0x68f88b8d573e0dL,
+ 0x4b1ce35bd9af30L,0x241c8293ece2c9L,0x3b5f402c5c4adeL,
+ 0x34b9b1ee6fde87L },
+ { 0x5e625340075e63L,0x54c3f3d9050da1L,0x2a3f9152509016L,
+ 0x3274e46111bc18L,0x3a7504fd01ac73L,0x4169b387a43209L,
+ 0x35626f852bc6d4L } },
+ /* 245 */
+ { { 0x576a4f4662e53bL,0x5ea3f20eecec26L,0x4e5f02be5cd7b0L,
+ 0x72cc5ac3314be8L,0x0f604ed3201fe9L,0x2a29378ea54bceL,
+ 0x2d52bd4d6ec4b6L },
+ { 0x6a4c2b212c1c76L,0x778fd64a1bfa6dL,0x326828691863d6L,
+ 0x5616c8bd06a336L,0x5fab552564da4dL,0x46640cab3e91d2L,
+ 0x1d21f06427299eL } },
+ /* 246 */
+ { { 0x2bfe37dde98e9cL,0x164c54822332ebL,0x5b736c7df266e4L,
+ 0x59dab3a8da084cL,0x0ae1eab346f118L,0x182090a4327e3fL,
+ 0x07b13489dae2e6L },
+ { 0x3bc92645452baaL,0x30b159894ae574L,0x5b947c5c78e1f4L,
+ 0x18f0e004a3c77fL,0x48ca8f357077d9L,0x349ffdcef9bca9L,
+ 0x3ed224bfd54772L } },
+ /* 247 */
+ { { 0x1bdad02db8dff8L,0x69fab4450b44b6L,0x3b6802d187518bL,
+ 0x098368d8eb556cL,0x3fe1943fbefcf4L,0x008851d0de6d42L,
+ 0x322cbc4605fe25L },
+ { 0x2528aaf0d51afbL,0x7d48a9363a0cecL,0x4ba8f77d9a8f8bL,
+ 0x7dee903437d6c7L,0x1ff5a0d9ccc4b4L,0x34d9bd2fa99831L,
+ 0x30d9e4f58667c6L } },
+ /* 248 */
+ { { 0x38909b51b85197L,0x7ba16992512bd4L,0x2c776cfcfffec5L,
+ 0x2be7879075843cL,0x557e2b05d28ffcL,0x641b17bc5ce357L,
+ 0x1fcaf8a3710306L },
+ { 0x54dca2299a2d48L,0x745d06ef305acaL,0x7c41c65c6944c2L,
+ 0x679412ec431902L,0x48f2b15ee62827L,0x341a96d8afe06eL,
+ 0x2a78fd3690c0e1L } },
+ /* 249 */
+ { { 0x6b7cec83fbc9c6L,0x238e8a82eefc67L,0x5d3c1d9ff0928cL,
+ 0x55b816d6409bbfL,0x7969612adae364L,0x55b6ff96db654eL,
+ 0x129beca10073a9L },
+ { 0x0b1d2acdfc73deL,0x5d1a3605fa64bdL,0x436076146743beL,
+ 0x64044b89fcce0cL,0x7ae7b3c18f7fafL,0x7f083ee27cea36L,
+ 0x0292cd0d7c1ff0L } },
+ /* 250 */
+ { { 0x5a3c4c019b7d2eL,0x1a35a9b89712fbL,0x38736cc4f18c72L,
+ 0x603dd832a44e6bL,0x000d1d44aed104L,0x69b1f2fc274ebeL,
+ 0x03a7b993f76977L },
+ { 0x299f3b3e346910L,0x5243f45295afd5L,0x34342cbfa588bdL,
+ 0x72c40dd1155510L,0x718024fed2f991L,0x2f935e765ad82aL,
+ 0x246799ea371fb8L } },
+ /* 251 */
+ { { 0x24fe4c76250533L,0x01cafb02fdf18eL,0x505cb25d462882L,
+ 0x3e038175157d87L,0x7e3e99b10cdeb1L,0x38b7e72ebc7936L,
+ 0x081845f7c73433L },
+ { 0x049e61be05ebd5L,0x6ab82d8f0581f6L,0x62adffb427ac2eL,
+ 0x19431f809d198dL,0x36195f6c58b1d6L,0x22cc4c9dedc9a7L,
+ 0x24b146d8e694fcL } },
+ /* 252 */
+ { { 0x7c7bc8288b364dL,0x5c10f683cb894aL,0x19a62a68452958L,
+ 0x1fc24dcb4ce90eL,0x726baa4ed9581fL,0x1f34447dde73d6L,
+ 0x04c56708f30a21L },
+ { 0x131e583a3f4963L,0x071215b4d502e7L,0x196aca542e5940L,
+ 0x3afd5a91f7450eL,0x671b6eedf49497L,0x6aac7aca5c29e4L,
+ 0x3fb512470f138bL } },
+ /* 253 */
+ { { 0x5eadc3f4eb453eL,0x16c795ba34b666L,0x5d7612a4697fddL,
+ 0x24dd19bb499e86L,0x415b89ca3eeb9bL,0x7c83edf599d809L,
+ 0x13bc64c9b70269L },
+ { 0x52d3243dca3233L,0x0b21444b3a96a7L,0x6d551bc0083b90L,
+ 0x4f535b88c61176L,0x11e61924298010L,0x0a155b415bb61dL,
+ 0x17f94fbd26658fL } },
+ /* 254 */
+ { { 0x2dd06b90c28c65L,0x48582339c8fa6eL,0x01ac8bf2085d94L,
+ 0x053e660e020fdcL,0x1bece667edf07bL,0x4558f2b33ce24cL,
+ 0x2f1a766e8673fcL },
+ { 0x1d77cd13c06819L,0x4d5dc5056f3a01L,0x18896c6fa18d69L,
+ 0x120047ca76d625L,0x6af8457d4f4e45L,0x70ddc53358b60aL,
+ 0x330e11130e82f0L } },
+ /* 255 */
+ { { 0x0643b1cd4c2356L,0x10a2ea0a8f7c92L,0x2752513011d029L,
+ 0x4cd4c50321f579L,0x5fdf9ba5724792L,0x2f691653e2ddc0L,
+ 0x0cfed3d84226cbL },
+ { 0x704902a950f955L,0x069bfdb87bbf0cL,0x5817eeda8a5f84L,
+ 0x1914cdd9089905L,0x0e4a323d7b93f4L,0x1cc3fc340af0b2L,
+ 0x23874161bd6303L } },
+};
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_7(sp_point_384* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_384_ecc_mulmod_stripe_7(r, &p384_base, p384_table,
+ k, map, heap);
+}
+
+#endif
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[7];
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_384_point_new_7(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 7, km);
+
+ err = sp_384_ecc_mulmod_base_7(point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_7(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_7(point, 0, heap);
+
+ return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_384_iszero_7(const sp_digit* a)
+{
+ return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+/* Add 1 to a. (a = a + 1)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_384_add_one_7(sp_digit* a)
+{
+ a[0]++;
+ sp_384_norm_7(a);
+}
+
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 47U) {
+ r[j] &= 0x7fffffffffffffL;
+ s = 55U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng Random number generator.
+ * k Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_384_ecc_gen_k_7(WC_RNG* rng, sp_digit* k)
+{
+ int err;
+ byte buf[48];
+
+ do {
+ err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+ if (err == 0) {
+ sp_384_from_bin(k, 7, buf, (int)sizeof(buf));
+ if (sp_384_cmp_7(k, p384_order2) < 0) {
+ sp_384_add_one_7(k);
+ break;
+ }
+ }
+ }
+ while (err == 0);
+
+ return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng Random number generator.
+ * priv Generated private value.
+ * pub Generated public point.
+ * heap Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[7];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_384 inf;
+#endif
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_384* infinity;
+#endif
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_7(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_7(heap, inf, infinity);
+ }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_gen_k_7(rng, k);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_base_7(point, k, 1, NULL);
+ }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_7(infinity, point, p384_order, 1, NULL);
+ }
+ if (err == MP_OKAY) {
+ if ((sp_384_iszero_7(point->x) == 0) || (sp_384_iszero_7(point->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(k, priv);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_7(point, pub);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_384_point_free_7(infinity, 1, heap);
+#endif
+ sp_384_point_free_7(point, 1, heap);
+
+ return err;
+}
+
+#ifdef HAVE_ECC_DHE
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 48
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_384_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ for (i=0; i<6; i++) {
+ r[i+1] += r[i] >> 55;
+ r[i] &= 0x7fffffffffffffL;
+ }
+ j = 384 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<7 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 55) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 55);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv Scalar to multiply the point by.
+ * pub Point to multiply.
+ * out Buffer to hold X ordinate.
+ * outLen On entry, size of the buffer in bytes.
+ * On exit, length of data in buffer in bytes.
+ * heap Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out,
+ word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[7];
+#endif
+ sp_point_384* point = NULL;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ if (*outLen < 48U) {
+ err = BUFFER_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_7(heap, p, point);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 7, priv);
+ sp_384_point_from_ecc_point_7(point, pub);
+ err = sp_384_ecc_mulmod_7(point, point, k, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ sp_384_to_bin(point->x, out);
+ *outLen = 48;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_7(point, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_384_mul_d_7(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int128_t tb = b;
+ int128_t t = 0;
+ int i;
+
+ for (i = 0; i < 7; i++) {
+ t += tb * a[i];
+ r[i] = t & 0x7fffffffffffffL;
+ t >>= 55;
+ }
+ r[7] = (sp_digit)t;
+#else
+ int128_t tb = b;
+ int128_t t[7];
+
+ t[ 0] = tb * a[ 0];
+ t[ 1] = tb * a[ 1];
+ t[ 2] = tb * a[ 2];
+ t[ 3] = tb * a[ 3];
+ t[ 4] = tb * a[ 4];
+ t[ 5] = tb * a[ 5];
+ t[ 6] = tb * a[ 6];
+ r[ 0] = (t[ 0] & 0x7fffffffffffffL);
+ r[ 1] = (sp_digit)(t[ 0] >> 55) + (t[ 1] & 0x7fffffffffffffL);
+ r[ 2] = (sp_digit)(t[ 1] >> 55) + (t[ 2] & 0x7fffffffffffffL);
+ r[ 3] = (sp_digit)(t[ 2] >> 55) + (t[ 3] & 0x7fffffffffffffL);
+ r[ 4] = (sp_digit)(t[ 3] >> 55) + (t[ 4] & 0x7fffffffffffffL);
+ r[ 5] = (sp_digit)(t[ 4] >> 55) + (t[ 5] & 0x7fffffffffffffL);
+ r[ 6] = (sp_digit)(t[ 5] >> 55) + (t[ 6] & 0x7fffffffffffffL);
+ r[ 7] = (sp_digit)(t[ 6] >> 55);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SP_DIV_64
+static WC_INLINE sp_digit sp_384_div_word_7(sp_digit d1, sp_digit d0,
+ sp_digit dv)
+{
+ sp_digit d, r, t;
+
+ /* All 55 bits from d1 and top 8 bits from d0. */
+ d = (d1 << 8) | (d0 >> 47);
+ r = d / dv;
+ d -= r * dv;
+ /* Up to 9 bits in r */
+ /* Next 8 bits from d0. */
+ r <<= 8;
+ d <<= 8;
+ d |= (d0 >> 39) & ((1 << 8) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 17 bits in r */
+ /* Next 8 bits from d0. */
+ r <<= 8;
+ d <<= 8;
+ d |= (d0 >> 31) & ((1 << 8) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 25 bits in r */
+ /* Next 8 bits from d0. */
+ r <<= 8;
+ d <<= 8;
+ d |= (d0 >> 23) & ((1 << 8) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 33 bits in r */
+ /* Next 8 bits from d0. */
+ r <<= 8;
+ d <<= 8;
+ d |= (d0 >> 15) & ((1 << 8) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 41 bits in r */
+ /* Next 8 bits from d0. */
+ r <<= 8;
+ d <<= 8;
+ d |= (d0 >> 7) & ((1 << 8) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 49 bits in r */
+ /* Remaining 7 bits from d0. */
+ r <<= 7;
+ d <<= 7;
+ d |= d0 & ((1 << 7) - 1);
+ t = d / dv;
+ r += t;
+
+ return r;
+}
+#endif /* WOLFSSL_SP_DIV_64 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Number to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_384_div_7(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ int i;
+#ifndef WOLFSSL_SP_DIV_64
+ int128_t d1;
+#endif
+ sp_digit dv, r1;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* td;
+#else
+ sp_digit t1d[14], t2d[7 + 1];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 7 + 1), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = td;
+ t2 = td + 2 * 7;
+#else
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ dv = d[6];
+ XMEMCPY(t1, a, sizeof(*t1) * 2U * 7U);
+ for (i=6; i>=0; i--) {
+ t1[7 + i] += t1[7 + i - 1] >> 55;
+ t1[7 + i - 1] &= 0x7fffffffffffffL;
+#ifndef WOLFSSL_SP_DIV_64
+ d1 = t1[7 + i];
+ d1 <<= 55;
+ d1 += t1[7 + i - 1];
+ r1 = (sp_digit)(d1 / dv);
+#else
+ r1 = sp_384_div_word_7(t1[7 + i], t1[7 + i - 1], dv);
+#endif
+
+ sp_384_mul_d_7(t2, d, r1);
+ (void)sp_384_sub_7(&t1[i], &t1[i], t2);
+ t1[7 + i] -= t2[7];
+ t1[7 + i] += t1[7 + i - 1] >> 55;
+ t1[7 + i - 1] &= 0x7fffffffffffffL;
+ r1 = (((-t1[7 + i]) << 55) - t1[7 + i - 1]) / dv;
+ r1++;
+ sp_384_mul_d_7(t2, d, r1);
+ (void)sp_384_add_7(&t1[i], &t1[i], t2);
+ t1[7 + i] += t1[7 + i - 1] >> 55;
+ t1[7 + i - 1] &= 0x7fffffffffffffL;
+ }
+ t1[7 - 1] += t1[7 - 2] >> 55;
+ t1[7 - 2] &= 0x7fffffffffffffL;
+ r1 = t1[7 - 1] / dv;
+
+ sp_384_mul_d_7(t2, d, r1);
+ (void)sp_384_sub_7(t1, t1, t2);
+ XMEMCPY(r, t1, sizeof(*r) * 2U * 7U);
+ for (i=0; i<5; i++) {
+ r[i+1] += r[i] >> 55;
+ r[i] &= 0x7fffffffffffffL;
+ }
+ sp_384_cond_add_7(r, r, d, 0 - ((r[6] < 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_384_mod_7(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_384_div_7(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P384 curve. */
+static const uint64_t p384_order_minus_2[6] = {
+ 0xecec196accc52971U,0x581a0db248b0a77aU,0xc7634d81f4372ddfU,
+ 0xffffffffffffffffU,0xffffffffffffffffU,0xffffffffffffffffU
+};
+#else
+/* The low half of the order-2 of the P384 curve. */
+static const uint64_t p384_order_low[3] = {
+ 0xecec196accc52971U,0x581a0db248b0a77aU,0xc7634d81f4372ddfU
+
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P384 curve. (r = a * b mod order)
+ *
+ * r Result of the multiplication.
+ * a First operand of the multiplication.
+ * b Second operand of the multiplication.
+ */
+static void sp_384_mont_mul_order_7(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_384_mul_7(r, a, b);
+ sp_384_mont_reduce_order_7(r, p384_order, p384_mp_order);
+}
+
+/* Square number mod the order of P384 curve. (r = a * a mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_384_mont_sqr_order_7(sp_digit* r, const sp_digit* a)
+{
+ sp_384_sqr_7(r, a);
+ sp_384_mont_reduce_order_7(r, p384_order, p384_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P384 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_384_mont_sqr_n_order_7(sp_digit* r, const sp_digit* a, int n)
+{
+ int i;
+
+ sp_384_mont_sqr_order_7(r, a);
+ for (i=1; i<n; i++) {
+ sp_384_mont_sqr_order_7(r, r);
+ }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P384 curve.
+ * (r = 1 / a mod order)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_384_mont_inv_order_7(sp_digit* r, const sp_digit* a,
+ sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 7);
+ for (i=382; i>=0; i--) {
+ sp_384_mont_sqr_order_7(t, t);
+ if ((p384_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_384_mont_mul_order_7(t, t, a);
+ }
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 7U);
+#else
+ sp_digit* t = td;
+ sp_digit* t2 = td + 2 * 7;
+ sp_digit* t3 = td + 4 * 7;
+ int i;
+
+ /* t = a^2 */
+ sp_384_mont_sqr_order_7(t, a);
+ /* t = a^3 = t * a */
+ sp_384_mont_mul_order_7(t, t, a);
+ /* t2= a^c = t ^ 2 ^ 2 */
+ sp_384_mont_sqr_n_order_7(t2, t, 2);
+ /* t = a^f = t2 * t */
+ sp_384_mont_mul_order_7(t, t2, t);
+ /* t2= a^f0 = t ^ 2 ^ 4 */
+ sp_384_mont_sqr_n_order_7(t2, t, 4);
+ /* t = a^ff = t2 * t */
+ sp_384_mont_mul_order_7(t, t2, t);
+ /* t2= a^ff00 = t ^ 2 ^ 8 */
+ sp_384_mont_sqr_n_order_7(t2, t, 8);
+ /* t3= a^ffff = t2 * t */
+ sp_384_mont_mul_order_7(t3, t2, t);
+ /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */
+ sp_384_mont_sqr_n_order_7(t2, t3, 16);
+ /* t = a^ffffffff = t2 * t3 */
+ sp_384_mont_mul_order_7(t, t2, t3);
+ /* t2= a^ffffffff0000 = t ^ 2 ^ 16 */
+ sp_384_mont_sqr_n_order_7(t2, t, 16);
+ /* t = a^ffffffffffff = t2 * t3 */
+ sp_384_mont_mul_order_7(t, t2, t3);
+ /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48 */
+ sp_384_mont_sqr_n_order_7(t2, t, 48);
+ /* t= a^fffffffffffffffffffffffff = t2 * t */
+ sp_384_mont_mul_order_7(t, t2, t);
+ /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */
+ sp_384_mont_sqr_n_order_7(t2, t, 96);
+ /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */
+ sp_384_mont_mul_order_7(t2, t2, t);
+ for (i=191; i>=1; i--) {
+ sp_384_mont_sqr_order_7(t2, t2);
+ if (((sp_digit)p384_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_384_mont_mul_order_7(t2, t2, a);
+ }
+ }
+ sp_384_mont_sqr_order_7(t2, t2);
+ sp_384_mont_mul_order_7(r, t2, a);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN 64
+#endif
+
+/* Sign the hash using the private key.
+ * e = [hash, 384 bits] from binary
+ * r = (k.G)->x mod order
+ * s = (r * x + e) / k mod order
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+ mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit ed[2*7];
+ sp_digit xd[2*7];
+ sp_digit kd[2*7];
+ sp_digit rd[2*7];
+ sp_digit td[3 * 2*7];
+ sp_point_384 p;
+#endif
+ sp_digit* e = NULL;
+ sp_digit* x = NULL;
+ sp_digit* k = NULL;
+ sp_digit* r = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_384* point = NULL;
+ sp_digit carry;
+ sp_digit* s = NULL;
+ sp_digit* kInv = NULL;
+ int err = MP_OKAY;
+ int64_t c;
+ int i;
+
+ (void)heap;
+
+ err = sp_384_point_new_7(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 7, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ e = d + 0 * 7;
+ x = d + 2 * 7;
+ k = d + 4 * 7;
+ r = d + 6 * 7;
+ tmp = d + 8 * 7;
+#else
+ e = ed;
+ x = xd;
+ k = kd;
+ r = rd;
+ tmp = td;
+#endif
+ s = e;
+ kInv = k;
+
+ if (hashLen > 48U) {
+ hashLen = 48U;
+ }
+
+ sp_384_from_bin(e, 7, hash, (int)hashLen);
+ }
+
+ for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+ sp_384_from_mp(x, 7, priv);
+
+ /* New random point. */
+ if (km == NULL || mp_iszero(km)) {
+ err = sp_384_ecc_gen_k_7(rng, k);
+ }
+ else {
+ sp_384_from_mp(k, 7, km);
+ mp_zero(km);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_base_7(point, k, 1, NULL);
+ }
+
+ if (err == MP_OKAY) {
+ /* r = point->x mod order */
+ XMEMCPY(r, point->x, sizeof(sp_digit) * 7U);
+ sp_384_norm_7(r);
+ c = sp_384_cmp_7(r, p384_order);
+ sp_384_cond_sub_7(r, r, p384_order, 0L - (sp_digit)(c >= 0));
+ sp_384_norm_7(r);
+
+ /* Conv k to Montgomery form (mod order) */
+ sp_384_mul_7(k, k, p384_norm_order);
+ err = sp_384_mod_7(k, k, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_7(k);
+ /* kInv = 1/k mod order */
+ sp_384_mont_inv_order_7(kInv, k, tmp);
+ sp_384_norm_7(kInv);
+
+ /* s = r * x + e */
+ sp_384_mul_7(x, x, r);
+ err = sp_384_mod_7(x, x, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_7(x);
+ carry = sp_384_add_7(s, e, x);
+ sp_384_cond_sub_7(s, s, p384_order, 0 - carry);
+ sp_384_norm_7(s);
+ c = sp_384_cmp_7(s, p384_order);
+ sp_384_cond_sub_7(s, s, p384_order, 0L - (sp_digit)(c >= 0));
+ sp_384_norm_7(s);
+
+ /* s = s * k^-1 mod order */
+ sp_384_mont_mul_order_7(s, s, kInv);
+ sp_384_norm_7(s);
+
+ /* Check that signature is usable. */
+ if (sp_384_iszero_7(s) == 0) {
+ break;
+ }
+ }
+ }
+
+ if (i == 0) {
+ err = RNG_FAILURE_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(r, rm);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(s, sm);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 8 * 7);
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 2U * 7U);
+ XMEMSET(x, 0, sizeof(sp_digit) * 2U * 7U);
+ XMEMSET(k, 0, sizeof(sp_digit) * 2U * 7U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 7U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 7U);
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 7U);
+#endif
+ sp_384_point_free_7(point, 1, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ * e = Truncate(hash, 384)
+ * u1 = e/s mod order
+ * u2 = r/s mod order
+ * r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX,
+ mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit u1d[2*7];
+ sp_digit u2d[2*7];
+ sp_digit sd[2*7];
+ sp_digit tmpd[2*7 * 5];
+ sp_point_384 p1d;
+ sp_point_384 p2d;
+#endif
+ sp_digit* u1 = NULL;
+ sp_digit* u2 = NULL;
+ sp_digit* s = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_384* p1;
+ sp_point_384* p2 = NULL;
+ sp_digit carry;
+ int64_t c;
+ int err;
+
+ err = sp_384_point_new_7(heap, p1d, p1);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_7(heap, p2d, p2);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 7, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ u1 = d + 0 * 7;
+ u2 = d + 2 * 7;
+ s = d + 4 * 7;
+ tmp = d + 6 * 7;
+#else
+ u1 = u1d;
+ u2 = u2d;
+ s = sd;
+ tmp = tmpd;
+#endif
+
+ if (hashLen > 48U) {
+ hashLen = 48U;
+ }
+
+ sp_384_from_bin(u1, 7, hash, (int)hashLen);
+ sp_384_from_mp(u2, 7, r);
+ sp_384_from_mp(s, 7, sm);
+ sp_384_from_mp(p2->x, 7, pX);
+ sp_384_from_mp(p2->y, 7, pY);
+ sp_384_from_mp(p2->z, 7, pZ);
+
+ {
+ sp_384_mul_7(s, s, p384_norm_order);
+ }
+ err = sp_384_mod_7(s, s, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_7(s);
+ {
+ sp_384_mont_inv_order_7(s, s, tmp);
+ sp_384_mont_mul_order_7(u1, u1, s);
+ sp_384_mont_mul_order_7(u2, u2, s);
+ }
+
+ err = sp_384_ecc_mulmod_base_7(p1, u1, 0, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_7(p2, p2, u2, 0, heap);
+ }
+
+ if (err == MP_OKAY) {
+ {
+ sp_384_proj_point_add_7(p1, p1, p2, tmp);
+ if (sp_384_iszero_7(p1->z)) {
+ if (sp_384_iszero_7(p1->x) && sp_384_iszero_7(p1->y)) {
+ sp_384_proj_point_dbl_7(p1, p2, tmp);
+ }
+ else {
+ /* Y ordinate is not used from here - don't set. */
+ p1->x[0] = 0;
+ p1->x[1] = 0;
+ p1->x[2] = 0;
+ p1->x[3] = 0;
+ p1->x[4] = 0;
+ p1->x[5] = 0;
+ p1->x[6] = 0;
+ XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod));
+ }
+ }
+ }
+
+ /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+ /* Reload r and convert to Montgomery form. */
+ sp_384_from_mp(u2, 7, r);
+ err = sp_384_mod_mul_norm_7(u2, u2, p384_mod);
+ }
+
+ if (err == MP_OKAY) {
+ /* u1 = r.z'.z' mod prime */
+ sp_384_mont_sqr_7(p1->z, p1->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_7(u1, u2, p1->z, p384_mod, p384_mp_mod);
+ *res = (int)(sp_384_cmp_7(p1->x, u1) == 0);
+ if (*res == 0) {
+ /* Reload r and add order. */
+ sp_384_from_mp(u2, 7, r);
+ carry = sp_384_add_7(u2, u2, p384_order);
+ /* Carry means result is greater than mod and is not valid. */
+ if (carry == 0) {
+ sp_384_norm_7(u2);
+
+ /* Compare with mod and if greater or equal then not valid. */
+ c = sp_384_cmp_7(u2, p384_mod);
+ if (c < 0) {
+ /* Convert to Montogomery form */
+ err = sp_384_mod_mul_norm_7(u2, u2, p384_mod);
+ if (err == MP_OKAY) {
+ /* u1 = (r + 1*order).z'.z' mod prime */
+ sp_384_mont_mul_7(u1, u2, p1->z, p384_mod,
+ p384_mp_mod);
+ *res = (int)(sp_384_cmp_7(p1->x, u1) == 0);
+ }
+ }
+ }
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL)
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_384_point_free_7(p1, 0, heap);
+ sp_384_point_free_7(p2, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point EC point.
+ * heap Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_384_ecc_is_point_7(sp_point_384* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit t1d[2*7];
+ sp_digit t2d[2*7];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 4, heap, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 7;
+ t2 = d + 2 * 7;
+#else
+ (void)heap;
+
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ sp_384_sqr_7(t1, point->y);
+ (void)sp_384_mod_7(t1, t1, p384_mod);
+ sp_384_sqr_7(t2, point->x);
+ (void)sp_384_mod_7(t2, t2, p384_mod);
+ sp_384_mul_7(t2, t2, point->x);
+ (void)sp_384_mod_7(t2, t2, p384_mod);
+ (void)sp_384_sub_7(t2, p384_mod, t2);
+ sp_384_mont_add_7(t1, t1, t2, p384_mod);
+
+ sp_384_mont_add_7(t1, t1, point->x, p384_mod);
+ sp_384_mont_add_7(t1, t1, point->x, p384_mod);
+ sp_384_mont_add_7(t1, t1, point->x, p384_mod);
+
+ if (sp_384_cmp_7(t1, p384_b) != 0) {
+ err = MP_VAL;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_384(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 pubd;
+#endif
+ sp_point_384* pub;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_384_point_new_7(NULL, pubd, pub);
+ if (err == MP_OKAY) {
+ sp_384_from_mp(pub->x, 7, pX);
+ sp_384_from_mp(pub->y, 7, pY);
+ sp_384_from_bin(pub->z, 7, one, (int)sizeof(one));
+
+ err = sp_384_ecc_is_point_7(pub, NULL);
+ }
+
+ sp_384_point_free_7(pub, 0, NULL);
+
+ return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * privm Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit privd[7];
+ sp_point_384 pubd;
+ sp_point_384 pd;
+#endif
+ sp_digit* priv = NULL;
+ sp_point_384* pub;
+ sp_point_384* p = NULL;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_384_point_new_7(heap, pubd, pub);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_7(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7, heap,
+ DYNAMIC_TYPE_ECC);
+ if (priv == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ priv = privd;
+#endif
+
+ sp_384_from_mp(pub->x, 7, pX);
+ sp_384_from_mp(pub->y, 7, pY);
+ sp_384_from_bin(pub->z, 7, one, (int)sizeof(one));
+ sp_384_from_mp(priv, 7, privm);
+
+ /* Check point at infinitiy. */
+ if ((sp_384_iszero_7(pub->x) != 0) &&
+ (sp_384_iszero_7(pub->y) != 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check range of X and Y */
+ if (sp_384_cmp_7(pub->x, p384_mod) >= 0 ||
+ sp_384_cmp_7(pub->y, p384_mod) >= 0) {
+ err = ECC_OUT_OF_RANGE_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check point is on curve */
+ err = sp_384_ecc_is_point_7(pub, heap);
+ }
+
+ if (err == MP_OKAY) {
+ /* Point * order = infinity */
+ err = sp_384_ecc_mulmod_7(p, pub, p384_order, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is infinity */
+ if ((sp_384_iszero_7(p->x) == 0) ||
+ (sp_384_iszero_7(p->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Base * private = point */
+ err = sp_384_ecc_mulmod_base_7(p, priv, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is public key */
+ if (sp_384_cmp_7(p->x, pub->x) != 0 ||
+ sp_384_cmp_7(p->y, pub->y) != 0) {
+ err = ECC_PRIV_KEY_E;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (priv != NULL) {
+ XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_7(p, 0, heap);
+ sp_384_point_free_7(pub, 0, heap);
+
+ return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX First EC point's X ordinate.
+ * pY First EC point's Y ordinate.
+ * pZ First EC point's Z ordinate.
+ * qX Second EC point's X ordinate.
+ * qY Second EC point's Y ordinate.
+ * qZ Second EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* qX, mp_int* qY, mp_int* qZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 7 * 5];
+ sp_point_384 pd;
+ sp_point_384 qd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ sp_point_384* q = NULL;
+ int err;
+
+ err = sp_384_point_new_7(NULL, pd, p);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_7(NULL, qd, q);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 5, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 7, pX);
+ sp_384_from_mp(p->y, 7, pY);
+ sp_384_from_mp(p->z, 7, pZ);
+ sp_384_from_mp(q->x, 7, qX);
+ sp_384_from_mp(q->y, 7, qY);
+ sp_384_from_mp(q->z, 7, qZ);
+
+ sp_384_proj_point_add_7(p, p, q, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_7(q, 0, NULL);
+ sp_384_point_free_7(p, 0, NULL);
+
+ return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 7 * 2];
+ sp_point_384 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ int err;
+
+ err = sp_384_point_new_7(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 2, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 7, pX);
+ sp_384_from_mp(p->y, 7, pY);
+ sp_384_from_mp(p->z, 7, pZ);
+
+ sp_384_proj_point_dbl_7(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_7(p, 0, NULL);
+
+ return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 7 * 6];
+ sp_point_384 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ int err;
+
+ err = sp_384_point_new_7(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 6, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 7, pX);
+ sp_384_from_mp(p->y, 7, pY);
+ sp_384_from_mp(p->z, 7, pZ);
+
+ sp_384_map_7(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, pX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, pY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, pZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_7(p, 0, NULL);
+
+ return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mont_sqrt_7(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit t1d[2 * 7];
+ sp_digit t2d[2 * 7];
+ sp_digit t3d[2 * 7];
+ sp_digit t4d[2 * 7];
+ sp_digit t5d[2 * 7];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ sp_digit* t3;
+ sp_digit* t4;
+ sp_digit* t5;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 7, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 7;
+ t2 = d + 2 * 7;
+ t3 = d + 4 * 7;
+ t4 = d + 6 * 7;
+ t5 = d + 8 * 7;
+#else
+ t1 = t1d;
+ t2 = t2d;
+ t3 = t3d;
+ t4 = t4d;
+ t5 = t5d;
+#endif
+
+ {
+ /* t2 = y ^ 0x2 */
+ sp_384_mont_sqr_7(t2, y, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3 */
+ sp_384_mont_mul_7(t1, t2, y, p384_mod, p384_mp_mod);
+ /* t5 = y ^ 0xc */
+ sp_384_mont_sqr_n_7(t5, t1, 2, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xf */
+ sp_384_mont_mul_7(t1, t1, t5, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x1e */
+ sp_384_mont_sqr_7(t2, t1, p384_mod, p384_mp_mod);
+ /* t3 = y ^ 0x1f */
+ sp_384_mont_mul_7(t3, t2, y, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3e0 */
+ sp_384_mont_sqr_n_7(t2, t3, 5, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3ff */
+ sp_384_mont_mul_7(t1, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x7fe0 */
+ sp_384_mont_sqr_n_7(t2, t1, 5, p384_mod, p384_mp_mod);
+ /* t3 = y ^ 0x7fff */
+ sp_384_mont_mul_7(t3, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fff800 */
+ sp_384_mont_sqr_n_7(t2, t3, 15, p384_mod, p384_mp_mod);
+ /* t4 = y ^ 0x3ffffff */
+ sp_384_mont_mul_7(t4, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xffffffc000000 */
+ sp_384_mont_sqr_n_7(t2, t4, 30, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xfffffffffffff */
+ sp_384_mont_mul_7(t1, t4, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xfffffffffffffff000000000000000 */
+ sp_384_mont_sqr_n_7(t2, t1, 60, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_7(t1, t1, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+ sp_384_mont_sqr_n_7(t2, t1, 120, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_7(t1, t1, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+ sp_384_mont_sqr_n_7(t2, t1, 15, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_7(t1, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */
+ sp_384_mont_sqr_n_7(t2, t1, 31, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */
+ sp_384_mont_mul_7(t1, t4, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */
+ sp_384_mont_sqr_n_7(t2, t1, 4, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */
+ sp_384_mont_mul_7(t1, t5, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */
+ sp_384_mont_sqr_n_7(t2, t1, 62, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */
+ sp_384_mont_mul_7(t1, y, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */
+ sp_384_mont_sqr_n_7(y, t1, 30, p384_mod, p384_mp_mod);
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm X ordinate.
+ * odd Whether the Y ordinate is odd.
+ * ym Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit xd[2 * 7];
+ sp_digit yd[2 * 7];
+#endif
+ sp_digit* x = NULL;
+ sp_digit* y = NULL;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 7, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ x = d + 0 * 7;
+ y = d + 2 * 7;
+#else
+ x = xd;
+ y = yd;
+#endif
+
+ sp_384_from_mp(x, 7, xm);
+ err = sp_384_mod_mul_norm_7(x, x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ /* y = x^3 */
+ {
+ sp_384_mont_sqr_7(y, x, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_7(y, y, x, p384_mod, p384_mp_mod);
+ }
+ /* y = x^3 - 3x */
+ sp_384_mont_sub_7(y, y, x, p384_mod);
+ sp_384_mont_sub_7(y, y, x, p384_mod);
+ sp_384_mont_sub_7(y, y, x, p384_mod);
+ /* y = x^3 - 3x + b */
+ err = sp_384_mod_mul_norm_7(x, p384_b, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ sp_384_mont_add_7(y, y, x, p384_mod);
+ /* y = sqrt(x^3 - 3x + b) */
+ err = sp_384_mont_sqrt_7(y);
+ }
+ if (err == MP_OKAY) {
+ XMEMSET(y + 7, 0, 7U * sizeof(sp_digit));
+ sp_384_mont_reduce_7(y, p384_mod, p384_mp_mod);
+ if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+ sp_384_mont_sub_7(y, p384_mod, y, p384_mod);
+ }
+
+ err = sp_384_to_mp(y, ym);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+#endif
+#endif /* WOLFSSL_SP_384 */
+#endif /* WOLFSSL_HAVE_SP_ECC */
+#endif /* SP_WORD_SIZE == 64 */
+#endif /* !WOLFSSL_SP_ASM */
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_cortexm.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_cortexm.c
new file mode 100644
index 000000000..b03de8ab4
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_cortexm.c
@@ -0,0 +1,25687 @@
+/* sp.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Implementation by Sean Parkinson. */
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/cpuid.h>
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \
+ defined(WOLFSSL_HAVE_SP_ECC)
+
+#ifdef RSA_LOW_MEM
+#ifndef WOLFSSL_SP_SMALL
+#define WOLFSSL_SP_SMALL
+#endif
+#endif
+
+#include <wolfssl/wolfcrypt/sp.h>
+
+#ifdef __IAR_SYSTEMS_ICC__
+#define __asm__ asm
+#define __volatile__ volatile
+#endif /* __IAR_SYSTEMS_ICC__ */
+#ifdef __KEIL__
+#define __asm__ __asm
+#define __volatile__ volatile
+#endif
+
+#ifdef WOLFSSL_SP_ARM_CORTEX_M_ASM
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+#ifndef WOLFSSL_SP_NO_2048
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 24U) {
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 32
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 32U) <= (word32)DIGIT_BIT) {
+ s += 32U;
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 32) {
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 32 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 256
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_2048_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ j = 2048 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<64 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 32) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 32);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit tmp[8];
+
+ __asm__ __volatile__ (
+ /* A[0] * B[0] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "umull r3, r4, r6, r8\n\t"
+ "mov r5, #0\n\t"
+ "str r3, [%[tmp], #0]\n\t"
+ "mov r3, #0\n\t"
+ /* A[0] * B[1] */
+ "ldr r8, [%[b], #4]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adc r5, r5, r8\n\t"
+ /* A[1] * B[0] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ "str r4, [%[tmp], #4]\n\t"
+ "mov r4, #0\n\t"
+ /* A[0] * B[2] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[1] * B[1] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[b], #4]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[2] * B[0] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ "str r5, [%[tmp], #8]\n\t"
+ "mov r5, #0\n\t"
+ /* A[0] * B[3] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[b], #12]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[1] * B[2] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[2] * B[1] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[b], #4]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[3] * B[0] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r3, [%[tmp], #12]\n\t"
+ "mov r3, #0\n\t"
+ /* A[0] * B[4] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[1] * B[3] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[b], #12]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[2] * B[2] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[3] * B[1] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #4]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[4] * B[0] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ "str r4, [%[tmp], #16]\n\t"
+ "mov r4, #0\n\t"
+ /* A[0] * B[5] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[b], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[1] * B[4] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[2] * B[3] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[b], #12]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[3] * B[2] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[4] * B[1] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[b], #4]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[5] * B[0] */
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ "str r5, [%[tmp], #20]\n\t"
+ "mov r5, #0\n\t"
+ /* A[0] * B[6] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[1] * B[5] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[b], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[2] * B[4] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[3] * B[3] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #12]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[4] * B[2] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[5] * B[1] */
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r8, [%[b], #4]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[6] * B[0] */
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r3, [%[tmp], #24]\n\t"
+ "mov r3, #0\n\t"
+ /* A[0] * B[7] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[b], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[1] * B[6] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[2] * B[5] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[b], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[3] * B[4] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[4] * B[3] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[b], #12]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[5] * B[2] */
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[6] * B[1] */
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r8, [%[b], #4]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[7] * B[0] */
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ "str r4, [%[tmp], #28]\n\t"
+ "mov r4, #0\n\t"
+ /* A[1] * B[7] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[b], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[2] * B[6] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[3] * B[5] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[4] * B[4] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[5] * B[3] */
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r8, [%[b], #12]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[6] * B[2] */
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[7] * B[1] */
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #4]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ "str r5, [%[r], #32]\n\t"
+ "mov r5, #0\n\t"
+ /* A[2] * B[7] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[b], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[3] * B[6] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[4] * B[5] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[b], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[5] * B[4] */
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[6] * B[3] */
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r8, [%[b], #12]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[7] * B[2] */
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "mov r3, #0\n\t"
+ /* A[3] * B[7] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[4] * B[6] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[5] * B[5] */
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r8, [%[b], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[6] * B[4] */
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[7] * B[3] */
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #12]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "mov r4, #0\n\t"
+ /* A[4] * B[7] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[b], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[5] * B[6] */
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[6] * B[5] */
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r8, [%[b], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[7] * B[4] */
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ "str r5, [%[r], #44]\n\t"
+ "mov r5, #0\n\t"
+ /* A[5] * B[7] */
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r8, [%[b], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[6] * B[6] */
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[7] * B[5] */
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "mov r3, #0\n\t"
+ /* A[6] * B[7] */
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r8, [%[b], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[7] * B[6] */
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "mov r4, #0\n\t"
+ /* A[7] * B[7] */
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adc r3, r3, r8\n\t"
+ "str r5, [%[r], #56]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ /* Transfer tmp to r */
+ "ldr r3, [%[tmp], #0]\n\t"
+ "ldr r4, [%[tmp], #4]\n\t"
+ "ldr r5, [%[tmp], #8]\n\t"
+ "ldr r6, [%[tmp], #12]\n\t"
+ "str r3, [%[r], #0]\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r3, [%[tmp], #16]\n\t"
+ "ldr r4, [%[tmp], #20]\n\t"
+ "ldr r5, [%[tmp], #24]\n\t"
+ "ldr r6, [%[tmp], #28]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "str r5, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
+ : "memory", "r3", "r4", "r5", "r6", "r8"
+ );
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
+{
+ sp_digit tmp[8];
+ __asm__ __volatile__ (
+ /* A[0] * A[0] */
+ "ldr r6, [%[a], #0]\n\t"
+ "umull r3, r4, r6, r6\n\t"
+ "mov r5, #0\n\t"
+ "str r3, [%[tmp], #0]\n\t"
+ "mov r3, #0\n\t"
+ /* A[0] * A[1] */
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adc r5, r5, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ "str r4, [%[tmp], #4]\n\t"
+ "mov r4, #0\n\t"
+ /* A[0] * A[2] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adc r3, r3, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[1] * A[1] */
+ "ldr r6, [%[a], #4]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ "str r5, [%[tmp], #8]\n\t"
+ "mov r5, #0\n\t"
+ /* A[0] * A[3] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r9, r10, r6, r8\n\t"
+ "mov r11, #0\n\t"
+ /* A[1] * A[2] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ "adds r9, r9, r9\n\t"
+ "adcs r10, r10, r10\n\t"
+ "adc r11, r11, r11\n\t"
+ "adds r3, r3, r9\n\t"
+ "adcs r4, r4, r10\n\t"
+ "adc r5, r5, r11\n\t"
+ "str r3, [%[tmp], #12]\n\t"
+ "mov r3, #0\n\t"
+ /* A[0] * A[4] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r9, r10, r6, r8\n\t"
+ "mov r11, #0\n\t"
+ /* A[1] * A[3] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ /* A[2] * A[2] */
+ "ldr r6, [%[a], #8]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ "adds r9, r9, r9\n\t"
+ "adcs r10, r10, r10\n\t"
+ "adc r11, r11, r11\n\t"
+ "adds r4, r4, r9\n\t"
+ "adcs r5, r5, r10\n\t"
+ "adc r3, r3, r11\n\t"
+ "str r4, [%[tmp], #16]\n\t"
+ "mov r4, #0\n\t"
+ /* A[0] * A[5] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r9, r10, r6, r8\n\t"
+ "mov r11, #0\n\t"
+ /* A[1] * A[4] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ /* A[2] * A[3] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ "adds r9, r9, r9\n\t"
+ "adcs r10, r10, r10\n\t"
+ "adc r11, r11, r11\n\t"
+ "adds r5, r5, r9\n\t"
+ "adcs r3, r3, r10\n\t"
+ "adc r4, r4, r11\n\t"
+ "str r5, [%[tmp], #20]\n\t"
+ "mov r5, #0\n\t"
+ /* A[0] * A[6] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r9, r10, r6, r8\n\t"
+ "mov r11, #0\n\t"
+ /* A[1] * A[5] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ /* A[2] * A[4] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ /* A[3] * A[3] */
+ "ldr r6, [%[a], #12]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ "adds r9, r9, r9\n\t"
+ "adcs r10, r10, r10\n\t"
+ "adc r11, r11, r11\n\t"
+ "adds r3, r3, r9\n\t"
+ "adcs r4, r4, r10\n\t"
+ "adc r5, r5, r11\n\t"
+ "str r3, [%[tmp], #24]\n\t"
+ "mov r3, #0\n\t"
+ /* A[0] * A[7] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r9, r10, r6, r8\n\t"
+ "mov r11, #0\n\t"
+ /* A[1] * A[6] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ /* A[2] * A[5] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ /* A[3] * A[4] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ "adds r9, r9, r9\n\t"
+ "adcs r10, r10, r10\n\t"
+ "adc r11, r11, r11\n\t"
+ "adds r4, r4, r9\n\t"
+ "adcs r5, r5, r10\n\t"
+ "adc r3, r3, r11\n\t"
+ "str r4, [%[tmp], #28]\n\t"
+ "mov r4, #0\n\t"
+ /* A[1] * A[7] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r9, r10, r6, r8\n\t"
+ "mov r11, #0\n\t"
+ /* A[2] * A[6] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ /* A[3] * A[5] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ /* A[4] * A[4] */
+ "ldr r6, [%[a], #16]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ "adds r9, r9, r9\n\t"
+ "adcs r10, r10, r10\n\t"
+ "adc r11, r11, r11\n\t"
+ "adds r5, r5, r9\n\t"
+ "adcs r3, r3, r10\n\t"
+ "adc r4, r4, r11\n\t"
+ "str r5, [%[r], #32]\n\t"
+ "mov r5, #0\n\t"
+ /* A[2] * A[7] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r9, r10, r6, r8\n\t"
+ "mov r11, #0\n\t"
+ /* A[3] * A[6] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ /* A[4] * A[5] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ "adds r9, r9, r9\n\t"
+ "adcs r10, r10, r10\n\t"
+ "adc r11, r11, r11\n\t"
+ "adds r3, r3, r9\n\t"
+ "adcs r4, r4, r10\n\t"
+ "adc r5, r5, r11\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "mov r3, #0\n\t"
+ /* A[3] * A[7] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r9, r10, r6, r8\n\t"
+ "mov r11, #0\n\t"
+ /* A[4] * A[6] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ /* A[5] * A[5] */
+ "ldr r6, [%[a], #20]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ "adds r9, r9, r9\n\t"
+ "adcs r10, r10, r10\n\t"
+ "adc r11, r11, r11\n\t"
+ "adds r4, r4, r9\n\t"
+ "adcs r5, r5, r10\n\t"
+ "adc r3, r3, r11\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "mov r4, #0\n\t"
+ /* A[4] * A[7] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[5] * A[6] */
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ "str r5, [%[r], #44]\n\t"
+ "mov r5, #0\n\t"
+ /* A[5] * A[7] */
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[6] * A[6] */
+ "ldr r6, [%[a], #24]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "mov r3, #0\n\t"
+ /* A[6] * A[7] */
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "mov r4, #0\n\t"
+ /* A[7] * A[7] */
+ "ldr r6, [%[a], #28]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r5, r5, r6\n\t"
+ "adc r3, r3, r8\n\t"
+ "str r5, [%[r], #56]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ /* Transfer tmp to r */
+ "ldr r3, [%[tmp], #0]\n\t"
+ "ldr r4, [%[tmp], #4]\n\t"
+ "ldr r5, [%[tmp], #8]\n\t"
+ "ldr r6, [%[tmp], #12]\n\t"
+ "str r3, [%[r], #0]\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r3, [%[tmp], #16]\n\t"
+ "ldr r4, [%[tmp], #20]\n\t"
+ "ldr r5, [%[tmp], #24]\n\t"
+ "ldr r6, [%[tmp], #28]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "str r5, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
+ : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11"
+ );
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_sub_in_place_16(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "subs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "sbc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<8; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ r[0] = a[0] & m;
+ r[1] = a[1] & m;
+ r[2] = a[2] & m;
+ r[3] = a[3] & m;
+ r[4] = a[4] & m;
+ r[5] = a[5] & m;
+ r[6] = a[6] & m;
+ r[7] = a[7] & m;
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[16];
+ sp_digit a1[8];
+ sp_digit b1[8];
+ sp_digit z2[16];
+ sp_digit u, ca, cb;
+
+ ca = sp_2048_add_8(a1, a, &a[8]);
+ cb = sp_2048_add_8(b1, b, &b[8]);
+ u = ca & cb;
+ sp_2048_mul_8(z1, a1, b1);
+ sp_2048_mul_8(z2, &a[8], &b[8]);
+ sp_2048_mul_8(z0, a, b);
+ sp_2048_mask_8(r + 16, a1, 0 - cb);
+ sp_2048_mask_8(b1, b1, 0 - ca);
+ u += sp_2048_add_8(r + 16, r + 16, b1);
+ u += sp_2048_sub_in_place_16(z1, z2);
+ u += sp_2048_sub_in_place_16(z1, z0);
+ u += sp_2048_add_16(r + 8, r + 8, z1);
+ r[24] = u;
+ XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
+ (void)sp_2048_add_16(r + 16, r + 16, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[16];
+ sp_digit z1[16];
+ sp_digit a1[8];
+ sp_digit u;
+
+ u = sp_2048_add_8(a1, a, &a[8]);
+ sp_2048_sqr_8(z1, a1);
+ sp_2048_sqr_8(z2, &a[8]);
+ sp_2048_sqr_8(z0, a);
+ sp_2048_mask_8(r + 16, a1, 0 - u);
+ u += sp_2048_add_8(r + 16, r + 16, r + 16);
+ u += sp_2048_sub_in_place_16(z1, z2);
+ u += sp_2048_sub_in_place_16(z1, z0);
+ u += sp_2048_add_16(r + 8, r + 8, z1);
+ r[24] = u;
+ XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
+ (void)sp_2048_add_16(r + 16, r + 16, z2);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "subs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "sbc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<16; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 16; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[32];
+ sp_digit a1[16];
+ sp_digit b1[16];
+ sp_digit z2[32];
+ sp_digit u, ca, cb;
+
+ ca = sp_2048_add_16(a1, a, &a[16]);
+ cb = sp_2048_add_16(b1, b, &b[16]);
+ u = ca & cb;
+ sp_2048_mul_16(z1, a1, b1);
+ sp_2048_mul_16(z2, &a[16], &b[16]);
+ sp_2048_mul_16(z0, a, b);
+ sp_2048_mask_16(r + 32, a1, 0 - cb);
+ sp_2048_mask_16(b1, b1, 0 - ca);
+ u += sp_2048_add_16(r + 32, r + 32, b1);
+ u += sp_2048_sub_in_place_32(z1, z2);
+ u += sp_2048_sub_in_place_32(z1, z0);
+ u += sp_2048_add_32(r + 16, r + 16, z1);
+ r[48] = u;
+ XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
+ (void)sp_2048_add_32(r + 32, r + 32, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[32];
+ sp_digit z1[32];
+ sp_digit a1[16];
+ sp_digit u;
+
+ u = sp_2048_add_16(a1, a, &a[16]);
+ sp_2048_sqr_16(z1, a1);
+ sp_2048_sqr_16(z2, &a[16]);
+ sp_2048_sqr_16(z0, a);
+ sp_2048_mask_16(r + 32, a1, 0 - u);
+ u += sp_2048_add_16(r + 32, r + 32, r + 32);
+ u += sp_2048_sub_in_place_32(z1, z2);
+ u += sp_2048_sub_in_place_32(z1, z0);
+ u += sp_2048_add_32(r + 16, r + 16, z1);
+ r[48] = u;
+ XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
+ (void)sp_2048_add_32(r + 32, r + 32, z2);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "subs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "sbc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<32; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 32; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[64];
+ sp_digit a1[32];
+ sp_digit b1[32];
+ sp_digit z2[64];
+ sp_digit u, ca, cb;
+
+ ca = sp_2048_add_32(a1, a, &a[32]);
+ cb = sp_2048_add_32(b1, b, &b[32]);
+ u = ca & cb;
+ sp_2048_mul_32(z1, a1, b1);
+ sp_2048_mul_32(z2, &a[32], &b[32]);
+ sp_2048_mul_32(z0, a, b);
+ sp_2048_mask_32(r + 64, a1, 0 - cb);
+ sp_2048_mask_32(b1, b1, 0 - ca);
+ u += sp_2048_add_32(r + 64, r + 64, b1);
+ u += sp_2048_sub_in_place_64(z1, z2);
+ u += sp_2048_sub_in_place_64(z1, z0);
+ u += sp_2048_add_64(r + 32, r + 32, z1);
+ r[96] = u;
+ XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
+ (void)sp_2048_add_64(r + 64, r + 64, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[64];
+ sp_digit z1[64];
+ sp_digit a1[32];
+ sp_digit u;
+
+ u = sp_2048_add_32(a1, a, &a[32]);
+ sp_2048_sqr_32(z1, a1);
+ sp_2048_sqr_32(z2, &a[32]);
+ sp_2048_sqr_32(z0, a);
+ sp_2048_mask_32(r + 64, a1, 0 - u);
+ u += sp_2048_add_32(r + 64, r + 64, r + 64);
+ u += sp_2048_sub_in_place_64(z1, z2);
+ u += sp_2048_sub_in_place_64(z1, z0);
+ u += sp_2048_add_64(r + 32, r + 32, z1);
+ r[96] = u;
+ XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
+ (void)sp_2048_add_64(r + 64, r + 64, z2);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r6, %[a]\n\t"
+ "mov r8, #0\n\t"
+ "add r6, r6, #256\n\t"
+ "sub r8, r8, #1\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], r8\n\t"
+ "ldr r4, [%[a]]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c], %[c]\n\t"
+ "add %[a], %[a], #4\n\t"
+ "add %[b], %[b], #4\n\t"
+ "add %[r], %[r], #4\n\t"
+ "cmp %[a], r6\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+ __asm__ __volatile__ (
+ "mov r8, %[a]\n\t"
+ "add r8, r8, #256\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "subs r5, r5, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "str r3, [%[a]]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "sbc %[c], %[c], %[c]\n\t"
+ "add %[a], %[a], #8\n\t"
+ "add %[b], %[b], #8\n\t"
+ "cmp %[a], r8\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit tmp[64 * 2];
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r9, r3\n\t"
+ "mov r12, %[r]\n\t"
+ "mov r10, %[a]\n\t"
+ "mov r11, %[b]\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, r10\n\t"
+ "mov r14, r6\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #252\n\t"
+ "mov %[a], r9\n\t"
+ "subs %[a], %[a], r6\n\t"
+ "sbc r6, r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], %[a], r6\n\t"
+ "mov %[b], r9\n\t"
+ "sub %[b], %[b], %[a]\n\t"
+ "add %[a], %[a], r10\n\t"
+ "add %[b], %[b], r11\n\t"
+ "\n2:\n\t"
+ /* Multiply Start */
+ "ldr r6, [%[a]]\n\t"
+ "ldr r8, [%[b]]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Multiply Done */
+ "add %[a], %[a], #4\n\t"
+ "sub %[b], %[b], #4\n\t"
+ "cmp %[a], r14\n\t"
+ "beq 3f\n\t"
+ "mov r6, r9\n\t"
+ "add r6, r6, r10\n\t"
+ "cmp %[a], r6\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r12\n\t"
+ "mov r8, r9\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add r8, r8, #4\n\t"
+ "mov r9, r8\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, #248\n\t"
+ "cmp r8, r6\n\t"
+ "ble 1b\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov %[a], r10\n\t"
+ "mov %[b], r11\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r9, r3\n\t"
+ "mov r12, %[r]\n\t"
+ "mov r6, #2\n\t"
+ "lsl r6, r6, #8\n\t"
+ "neg r6, r6\n\t"
+ "add sp, sp, r6\n\t"
+ "mov r11, sp\n\t"
+ "mov r10, %[a]\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r6, #252\n\t"
+ "mov %[a], r9\n\t"
+ "subs %[a], %[a], r6\n\t"
+ "sbc r6, r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], %[a], r6\n\t"
+ "mov r2, r9\n\t"
+ "sub r2, r2, %[a]\n\t"
+ "add %[a], %[a], r10\n\t"
+ "add r2, r2, r10\n\t"
+ "\n2:\n\t"
+ "cmp r2, %[a]\n\t"
+ "beq 4f\n\t"
+ /* Multiply * 2: Start */
+ "ldr r6, [%[a]]\n\t"
+ "ldr r8, [r2]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Multiply * 2: Done */
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ /* Square: Start */
+ "ldr r6, [%[a]]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Square: Done */
+ "\n5:\n\t"
+ "add %[a], %[a], #4\n\t"
+ "sub r2, r2, #4\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, r10\n\t"
+ "cmp %[a], r6\n\t"
+ "beq 3f\n\t"
+ "cmp %[a], r2\n\t"
+ "bgt 3f\n\t"
+ "mov r8, r9\n\t"
+ "add r8, r8, r10\n\t"
+ "cmp %[a], r8\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r11\n\t"
+ "mov r8, r9\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r8, r8, #4\n\t"
+ "mov r9, r8\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, #248\n\t"
+ "cmp r8, r6\n\t"
+ "ble 1b\n\t"
+ "mov %[a], r10\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov %[r], r12\n\t"
+ "mov %[a], r11\n\t"
+ "mov r3, #1\n\t"
+ "lsl r3, r3, #8\n\t"
+ "add r3, r3, #252\n\t"
+ "\n4:\n\t"
+ "ldr r6, [%[a], r3]\n\t"
+ "str r6, [%[r], r3]\n\t"
+ "subs r3, r3, #4\n\t"
+ "bge 4b\n\t"
+ "mov r6, #2\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add sp, sp, r6\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#ifdef WOLFSSL_SP_SMALL
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+ int i;
+
+ for (i=0; i<32; i++) {
+ r[i] = a[i] & m;
+ }
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r6, %[a]\n\t"
+ "mov r8, #0\n\t"
+ "add r6, r6, #128\n\t"
+ "sub r8, r8, #1\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], r8\n\t"
+ "ldr r4, [%[a]]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c], %[c]\n\t"
+ "add %[a], %[a], #4\n\t"
+ "add %[b], %[b], #4\n\t"
+ "add %[r], %[r], #4\n\t"
+ "cmp %[a], r6\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+ __asm__ __volatile__ (
+ "mov r8, %[a]\n\t"
+ "add r8, r8, #128\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "subs r5, r5, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "str r3, [%[a]]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "sbc %[c], %[c], %[c]\n\t"
+ "add %[a], %[a], #8\n\t"
+ "add %[b], %[b], #8\n\t"
+ "cmp %[a], r8\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit tmp[32 * 2];
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r9, r3\n\t"
+ "mov r12, %[r]\n\t"
+ "mov r10, %[a]\n\t"
+ "mov r11, %[b]\n\t"
+ "mov r6, #128\n\t"
+ "add r6, r6, r10\n\t"
+ "mov r14, r6\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #124\n\t"
+ "mov %[a], r9\n\t"
+ "subs %[a], %[a], r6\n\t"
+ "sbc r6, r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], %[a], r6\n\t"
+ "mov %[b], r9\n\t"
+ "sub %[b], %[b], %[a]\n\t"
+ "add %[a], %[a], r10\n\t"
+ "add %[b], %[b], r11\n\t"
+ "\n2:\n\t"
+ /* Multiply Start */
+ "ldr r6, [%[a]]\n\t"
+ "ldr r8, [%[b]]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Multiply Done */
+ "add %[a], %[a], #4\n\t"
+ "sub %[b], %[b], #4\n\t"
+ "cmp %[a], r14\n\t"
+ "beq 3f\n\t"
+ "mov r6, r9\n\t"
+ "add r6, r6, r10\n\t"
+ "cmp %[a], r6\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r12\n\t"
+ "mov r8, r9\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add r8, r8, #4\n\t"
+ "mov r9, r8\n\t"
+ "mov r6, #248\n\t"
+ "cmp r8, r6\n\t"
+ "ble 1b\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov %[a], r10\n\t"
+ "mov %[b], r11\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r9, r3\n\t"
+ "mov r12, %[r]\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "neg r6, r6\n\t"
+ "add sp, sp, r6\n\t"
+ "mov r11, sp\n\t"
+ "mov r10, %[a]\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r6, #124\n\t"
+ "mov %[a], r9\n\t"
+ "subs %[a], %[a], r6\n\t"
+ "sbc r6, r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], %[a], r6\n\t"
+ "mov r2, r9\n\t"
+ "sub r2, r2, %[a]\n\t"
+ "add %[a], %[a], r10\n\t"
+ "add r2, r2, r10\n\t"
+ "\n2:\n\t"
+ "cmp r2, %[a]\n\t"
+ "beq 4f\n\t"
+ /* Multiply * 2: Start */
+ "ldr r6, [%[a]]\n\t"
+ "ldr r8, [r2]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Multiply * 2: Done */
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ /* Square: Start */
+ "ldr r6, [%[a]]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Square: Done */
+ "\n5:\n\t"
+ "add %[a], %[a], #4\n\t"
+ "sub r2, r2, #4\n\t"
+ "mov r6, #128\n\t"
+ "add r6, r6, r10\n\t"
+ "cmp %[a], r6\n\t"
+ "beq 3f\n\t"
+ "cmp %[a], r2\n\t"
+ "bgt 3f\n\t"
+ "mov r8, r9\n\t"
+ "add r8, r8, r10\n\t"
+ "cmp %[a], r8\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r11\n\t"
+ "mov r8, r9\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r8, r8, #4\n\t"
+ "mov r9, r8\n\t"
+ "mov r6, #248\n\t"
+ "cmp r8, r6\n\t"
+ "ble 1b\n\t"
+ "mov %[a], r10\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov %[r], r12\n\t"
+ "mov %[a], r11\n\t"
+ "mov r3, #252\n\t"
+ "\n4:\n\t"
+ "ldr r6, [%[a], r3]\n\t"
+ "str r6, [%[r], r3]\n\t"
+ "subs r3, r3, #4\n\t"
+ "bge 4b\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add sp, sp, r6\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+
+ /* rho = -1/m mod b */
+ *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+ __asm__ __volatile__ (
+ "add r9, %[a], #256\n\t"
+ /* A[0] * B */
+ "ldr r6, [%[a]], #4\n\t"
+ "umull r5, r3, r6, %[b]\n\t"
+ "mov r4, #0\n\t"
+ "str r5, [%[r]], #4\n\t"
+ /* A[0] * B - Done */
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ /* A[] * B */
+ "ldr r6, [%[a]], #4\n\t"
+ "umull r6, r8, r6, %[b]\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[] * B - Done */
+ "str r3, [%[r]], #4\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "cmp %[a], r9\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r]]\n\t"
+ : [r] "+r" (r), [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
+ );
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 2048 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 32);
+
+ /* r = 2^n mod m */
+ sp_2048_sub_in_place_32(r, m);
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #128\n\t"
+ "mov r9, r5\n\t"
+ "mov r8, #0\n\t"
+ "\n1:\n\t"
+ "ldr r6, [%[b], r8]\n\t"
+ "and r6, r6, %[m]\n\t"
+ "mov r5, #0\n\t"
+ "subs r5, r5, %[c]\n\t"
+ "ldr r5, [%[a], r8]\n\t"
+ "sbcs r5, r5, r6\n\t"
+ "sbcs %[c], %[c], %[c]\n\t"
+ "str r5, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, r9\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r8", "r9"
+ );
+
+ return c;
+}
+
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "mov r9, %[mp]\n\t"
+ "mov r12, %[m]\n\t"
+ "mov r10, %[a]\n\t"
+ "mov r4, #0\n\t"
+ "add r11, r10, #128\n\t"
+ "\n1:\n\t"
+ /* mu = a[i] * mp */
+ "mov %[mp], r9\n\t"
+ "ldr %[a], [r10]\n\t"
+ "mul %[mp], %[mp], %[a]\n\t"
+ "mov %[m], r12\n\t"
+ "add r14, r10, #120\n\t"
+ "\n2:\n\t"
+ /* a[i+j] += m[j] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r5, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r5, r5, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r4, r4, %[a]\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r4, [r10], #4\n\t"
+ /* a[i+j+1] += m[j+1] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r4, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r4, r4, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r5, r5, %[a]\n\t"
+ "adc r4, r4, #0\n\t"
+ "str r5, [r10], #4\n\t"
+ "cmp r10, r14\n\t"
+ "blt 2b\n\t"
+ /* a[i+30] += m[30] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r5, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r5, r5, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r4, r4, %[a]\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r4, [r10], #4\n\t"
+ /* a[i+31] += m[31] * mu */
+ "mov r4, %[ca]\n\t"
+ "mov %[ca], #0\n\t"
+ /* Multiply m[31] and mu - Start */
+ "ldr r8, [%[m]]\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ /* Multiply m[31] and mu - Done */
+ "ldr r6, [r10]\n\t"
+ "ldr r8, [r10, #4]\n\t"
+ "adds r6, r6, r5\n\t"
+ "adcs r8, r8, r4\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ "str r6, [r10]\n\t"
+ "str r8, [r10, #4]\n\t"
+ /* Next word in a */
+ "sub r10, r10, #120\n\t"
+ "cmp r10, r11\n\t"
+ "blt 1b\n\t"
+ "mov %[a], r10\n\t"
+ "mov %[m], r12\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_2048_mul_32(r, a, b);
+ sp_2048_mont_reduce_32(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_2048_sqr_32(r, a);
+ sp_2048_mont_reduce_32(r, m, mp);
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+ __asm__ __volatile__ (
+ "add r9, %[a], #128\n\t"
+ /* A[0] * B */
+ "ldr r6, [%[a]], #4\n\t"
+ "umull r5, r3, r6, %[b]\n\t"
+ "mov r4, #0\n\t"
+ "str r5, [%[r]], #4\n\t"
+ /* A[0] * B - Done */
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ /* A[] * B */
+ "ldr r6, [%[a]], #4\n\t"
+ "umull r6, r8, r6, %[b]\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[] * B - Done */
+ "str r3, [%[r]], #4\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "cmp %[a], r9\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r]]\n\t"
+ : [r] "+r" (r), [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
+ );
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r6, %[div], #16\n\t"
+ "add r6, r6, #1\n\t"
+ "udiv r4, %[d1], r6\n\t"
+ "lsl r8, r4, #16\n\t"
+ "umull r4, r5, %[div], r8\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "udiv r5, %[d1], r6\n\t"
+ "lsl r4, r5, #16\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "lsl r4, %[d1], #16\n\t"
+ "orr r4, r4, %[d0], lsr #16\n\t"
+ "udiv r4, r4, r6\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "lsl r4, %[d1], #16\n\t"
+ "orr r4, r4, %[d0], lsr #16\n\t"
+ "udiv r4, r4, r6\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "udiv r4, %[d0], %[div]\n\t"
+ "add r8, r8, r4\n\t"
+ "mov %[r], r8\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r6", "r8"
+ );
+ return r;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+
+
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mvn r3, r3\n\t"
+ "mov r6, #124\n\t"
+ "\n1:\n\t"
+ "ldr r8, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r8, r8, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "mov r4, r8\n\t"
+ "subs r8, r8, r5\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], r8\n\t"
+ "mvn r8, r8\n\t"
+ "and r3, r3, r8\n\t"
+ "subs r5, r5, r4\n\t"
+ "sbc r8, r8, r8\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "mvn r8, r8\n\t"
+ "and r3, r3, r8\n\t"
+ "sub r6, r6, #4\n\t"
+ "cmp r6, #0\n\t"
+ "bge 1b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "r3", "r4", "r5", "r6", "r8"
+ );
+
+ return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[64], t2[33];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[31];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
+ for (i=31; i>=0; i--) {
+ r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div);
+
+ sp_2048_mul_d_32(t2, d, r1);
+ t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
+ t1[32 + i] -= t2[32];
+ sp_2048_mask_32(t2, d, t1[32 + i]);
+ t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
+ sp_2048_mask_32(t2, d, t1[32 + i]);
+ t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_2048_cmp_32(t1, d) >= 0;
+ sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_2048_div_32(a, m, NULL, r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[16][64];
+#else
+ sp_digit* t[16];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 64, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<16; i++) {
+ t[i] = td + i * 64;
+ }
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_32(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
+ if (reduceA != 0) {
+ err = sp_2048_mod_32(t[1] + 32, a, m);
+ if (err == MP_OKAY) {
+ err = sp_2048_mod_32(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
+ err = sp_2048_mod_32(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 4;
+ if (c == 32) {
+ c = 28;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
+ for (; i>=0 || c>=4; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 28;
+ n <<= 4;
+ c = 28;
+ }
+ else if (c < 4) {
+ y = n >> 28;
+ n = e[i--];
+ c = 4 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+ }
+
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+
+ sp_2048_mont_mul_32(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
+ sp_2048_mont_reduce_32(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
+ sp_2048_cond_sub_32(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][64];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 64, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++) {
+ t[i] = td + i * 64;
+ }
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_32(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
+ if (reduceA != 0) {
+ err = sp_2048_mod_32(t[1] + 32, a, m);
+ if (err == MP_OKAY) {
+ err = sp_2048_mod_32(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
+ err = sp_2048_mod_32(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
+ sp_2048_mont_sqr_32(t[16], t[ 8], m, mp);
+ sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp);
+ sp_2048_mont_sqr_32(t[18], t[ 9], m, mp);
+ sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp);
+ sp_2048_mont_sqr_32(t[20], t[10], m, mp);
+ sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp);
+ sp_2048_mont_sqr_32(t[22], t[11], m, mp);
+ sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp);
+ sp_2048_mont_sqr_32(t[24], t[12], m, mp);
+ sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp);
+ sp_2048_mont_sqr_32(t[26], t[13], m, mp);
+ sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp);
+ sp_2048_mont_sqr_32(t[28], t[14], m, mp);
+ sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp);
+ sp_2048_mont_sqr_32(t[30], t[15], m, mp);
+ sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+
+ sp_2048_mont_mul_32(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
+ sp_2048_mont_reduce_32(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
+ sp_2048_cond_sub_32(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 2048 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 64);
+
+ /* r = 2^n mod m */
+ sp_2048_sub_in_place_64(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #1\n\t"
+ "lsl r5, r5, #8\n\t"
+ "mov r9, r5\n\t"
+ "mov r8, #0\n\t"
+ "\n1:\n\t"
+ "ldr r6, [%[b], r8]\n\t"
+ "and r6, r6, %[m]\n\t"
+ "mov r5, #0\n\t"
+ "subs r5, r5, %[c]\n\t"
+ "ldr r5, [%[a], r8]\n\t"
+ "sbcs r5, r5, r6\n\t"
+ "sbcs %[c], %[c], %[c]\n\t"
+ "str r5, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, r9\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r8", "r9"
+ );
+
+ return c;
+}
+
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "mov r9, %[mp]\n\t"
+ "mov r12, %[m]\n\t"
+ "mov r10, %[a]\n\t"
+ "mov r4, #0\n\t"
+ "add r11, r10, #256\n\t"
+ "\n1:\n\t"
+ /* mu = a[i] * mp */
+ "mov %[mp], r9\n\t"
+ "ldr %[a], [r10]\n\t"
+ "mul %[mp], %[mp], %[a]\n\t"
+ "mov %[m], r12\n\t"
+ "add r14, r10, #248\n\t"
+ "\n2:\n\t"
+ /* a[i+j] += m[j] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r5, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r5, r5, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r4, r4, %[a]\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r4, [r10], #4\n\t"
+ /* a[i+j+1] += m[j+1] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r4, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r4, r4, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r5, r5, %[a]\n\t"
+ "adc r4, r4, #0\n\t"
+ "str r5, [r10], #4\n\t"
+ "cmp r10, r14\n\t"
+ "blt 2b\n\t"
+ /* a[i+62] += m[62] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r5, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r5, r5, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r4, r4, %[a]\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r4, [r10], #4\n\t"
+ /* a[i+63] += m[63] * mu */
+ "mov r4, %[ca]\n\t"
+ "mov %[ca], #0\n\t"
+ /* Multiply m[63] and mu - Start */
+ "ldr r8, [%[m]]\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ /* Multiply m[63] and mu - Done */
+ "ldr r6, [r10]\n\t"
+ "ldr r8, [r10, #4]\n\t"
+ "adds r6, r6, r5\n\t"
+ "adcs r8, r8, r4\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ "str r6, [r10]\n\t"
+ "str r8, [r10, #4]\n\t"
+ /* Next word in a */
+ "sub r10, r10, #248\n\t"
+ "cmp r10, r11\n\t"
+ "blt 1b\n\t"
+ "mov %[a], r10\n\t"
+ "mov %[m], r12\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_2048_mul_64(r, a, b);
+ sp_2048_mont_reduce_64(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_2048_sqr_64(r, a);
+ sp_2048_mont_reduce_64(r, m, mp);
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r6, %[div], #16\n\t"
+ "add r6, r6, #1\n\t"
+ "udiv r4, %[d1], r6\n\t"
+ "lsl r8, r4, #16\n\t"
+ "umull r4, r5, %[div], r8\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "udiv r5, %[d1], r6\n\t"
+ "lsl r4, r5, #16\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "lsl r4, %[d1], #16\n\t"
+ "orr r4, r4, %[d0], lsr #16\n\t"
+ "udiv r4, r4, r6\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "lsl r4, %[d1], #16\n\t"
+ "orr r4, r4, %[d0], lsr #16\n\t"
+ "udiv r4, r4, r6\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "udiv r4, %[d0], %[div]\n\t"
+ "add r8, r8, r4\n\t"
+ "mov %[r], r8\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r6", "r8"
+ );
+ return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<64; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 64; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_2048_cmp_64(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+
+
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mvn r3, r3\n\t"
+ "mov r6, #252\n\t"
+ "\n1:\n\t"
+ "ldr r8, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r8, r8, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "mov r4, r8\n\t"
+ "subs r8, r8, r5\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], r8\n\t"
+ "mvn r8, r8\n\t"
+ "and r3, r3, r8\n\t"
+ "subs r5, r5, r4\n\t"
+ "sbc r8, r8, r8\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "mvn r8, r8\n\t"
+ "and r3, r3, r8\n\t"
+ "sub r6, r6, #4\n\t"
+ "cmp r6, #0\n\t"
+ "bge 1b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "r3", "r4", "r5", "r6", "r8"
+ );
+
+ return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[128], t2[65];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[63];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
+ for (i=63; i>=0; i--) {
+ r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div);
+
+ sp_2048_mul_d_64(t2, d, r1);
+ t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
+ t1[64 + i] -= t2[64];
+ sp_2048_mask_64(t2, d, t1[64 + i]);
+ t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
+ sp_2048_mask_64(t2, d, t1[64 + i]);
+ t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_2048_cmp_64(t1, d) >= 0;
+ sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_2048_div_64(a, m, NULL, r);
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[128], t2[65];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[63];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
+ for (i=63; i>=0; i--) {
+ r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div);
+
+ sp_2048_mul_d_64(t2, d, r1);
+ t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
+ t1[64 + i] -= t2[64];
+ if (t1[64 + i] != 0) {
+ t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
+ if (t1[64 + i] != 0)
+ t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
+ }
+ }
+
+ r1 = sp_2048_cmp_64(t1, d) >= 0;
+ sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_2048_div_64_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+ defined(WOLFSSL_HAVE_SP_DH)
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[16][128];
+#else
+ sp_digit* t[16];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 128, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<16; i++) {
+ t[i] = td + i * 128;
+ }
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_64(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
+ if (reduceA != 0) {
+ err = sp_2048_mod_64(t[1] + 64, a, m);
+ if (err == MP_OKAY) {
+ err = sp_2048_mod_64(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
+ err = sp_2048_mod_64(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_64(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_64(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_64(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 4;
+ if (c == 32) {
+ c = 28;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
+ for (; i>=0 || c>=4; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 28;
+ n <<= 4;
+ c = 28;
+ }
+ else if (c < 4) {
+ y = n >> 28;
+ n = e[i--];
+ c = 4 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+ }
+
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+
+ sp_2048_mont_mul_64(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+ sp_2048_mont_reduce_64(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
+ sp_2048_cond_sub_64(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][128];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 128, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++) {
+ t[i] = td + i * 128;
+ }
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_64(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
+ if (reduceA != 0) {
+ err = sp_2048_mod_64(t[1] + 64, a, m);
+ if (err == MP_OKAY) {
+ err = sp_2048_mod_64(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
+ err = sp_2048_mod_64(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_64(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_64(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_64(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
+ sp_2048_mont_sqr_64(t[16], t[ 8], m, mp);
+ sp_2048_mont_mul_64(t[17], t[ 9], t[ 8], m, mp);
+ sp_2048_mont_sqr_64(t[18], t[ 9], m, mp);
+ sp_2048_mont_mul_64(t[19], t[10], t[ 9], m, mp);
+ sp_2048_mont_sqr_64(t[20], t[10], m, mp);
+ sp_2048_mont_mul_64(t[21], t[11], t[10], m, mp);
+ sp_2048_mont_sqr_64(t[22], t[11], m, mp);
+ sp_2048_mont_mul_64(t[23], t[12], t[11], m, mp);
+ sp_2048_mont_sqr_64(t[24], t[12], m, mp);
+ sp_2048_mont_mul_64(t[25], t[13], t[12], m, mp);
+ sp_2048_mont_sqr_64(t[26], t[13], m, mp);
+ sp_2048_mont_mul_64(t[27], t[14], t[13], m, mp);
+ sp_2048_mont_sqr_64(t[28], t[14], m, mp);
+ sp_2048_mont_mul_64(t[29], t[15], t[14], m, mp);
+ sp_2048_mont_sqr_64(t[30], t[15], m, mp);
+ sp_2048_mont_mul_64(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+
+ sp_2048_mont_mul_64(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+ sp_2048_mont_reduce_64(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
+ sp_2048_cond_sub_64(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[128], m[64], r[128];
+#else
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+#endif
+ sp_digit *ah;
+ sp_digit e[1];
+ int err = MP_OKAY;
+
+ if (*outLen < 256)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 256 ||
+ mp_count_bits(mm) != 2048))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 64 * 2;
+ m = r + 64 * 2;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ ah = a + 64;
+
+ sp_2048_from_bin(ah, 64, in, inLen);
+#if DIGIT_BIT >= 32
+ e[0] = em->dp[0];
+#else
+ e[0] = em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(m, 64, mm);
+
+ if (e[0] == 0x3) {
+ if (err == MP_OKAY) {
+ sp_2048_sqr_64(r, ah);
+ err = sp_2048_mod_64_cond(r, r, m);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_mul_64(r, ah, r);
+ err = sp_2048_mod_64_cond(r, r, m);
+ }
+ }
+ else {
+ int i;
+ sp_digit mp;
+
+ sp_2048_mont_setup(m, &mp);
+
+ /* Convert to Montgomery form. */
+ XMEMSET(a, 0, sizeof(sp_digit) * 64);
+ err = sp_2048_mod_64_cond(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i = 31; i >= 0; i--) {
+ if (e[0] >> i) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 64);
+ for (i--; i>=0; i--) {
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ if (((e[0] >> i) & 1) == 1) {
+ sp_2048_mont_mul_64(r, r, a, m, mp);
+ }
+ }
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
+ sp_2048_mont_reduce_64(r, m, mp);
+
+ for (i = 63; i > 0; i--) {
+ if (r[i] != m[i]) {
+ break;
+ }
+ }
+ if (r[i] >= m[i]) {
+ sp_2048_sub_in_place_64(r, m);
+ }
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#endif
+
+ return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+ sp_digit* a;
+ sp_digit* d = NULL;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 256U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 2048) {
+ err = MP_READ_E;
+ }
+ if (inLen > 256) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = d + 64;
+ m = a + 128;
+ r = a;
+
+ sp_2048_from_bin(a, 64, in, inLen);
+ sp_2048_from_mp(d, 64, dm);
+ sp_2048_from_mp(m, 64, mm);
+ err = sp_2048_mod_exp_64(r, a, d, 2048, m, 0);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 64);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #128\n\t"
+ "mov r9, r5\n\t"
+ "mov r8, #0\n\t"
+ "\n1:\n\t"
+ "ldr r6, [%[b], r8]\n\t"
+ "and r6, r6, %[m]\n\t"
+ "adds r5, %[c], #-1\n\t"
+ "ldr r5, [%[a], r8]\n\t"
+ "adcs r5, r5, r6\n\t"
+ "mov %[c], #0\n\t"
+ "adcs %[c], %[c], %[c]\n\t"
+ "str r5, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, r9\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r8", "r9"
+ );
+
+ return c;
+}
+
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[64 * 2];
+ sp_digit p[32], q[32], dp[32];
+ sp_digit tmpa[64], tmpb[64];
+#else
+ sp_digit* t = NULL;
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+#endif
+ sp_digit* r;
+ sp_digit* qi;
+ sp_digit* dq;
+ sp_digit c;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 256)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL)
+ err = MEMORY_E;
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 64 * 2;
+ q = p + 32;
+ qi = dq = dp = q + 32;
+ tmpa = qi + 32;
+ tmpb = tmpa + 64;
+
+ r = t + 64;
+ }
+#else
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ r = a;
+ qi = dq = dp;
+#endif
+ sp_2048_from_bin(a, 64, in, inLen);
+ sp_2048_from_mp(p, 32, pm);
+ sp_2048_from_mp(q, 32, qm);
+ sp_2048_from_mp(dp, 32, dpm);
+
+ err = sp_2048_mod_exp_32(tmpa, a, dp, 1024, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(dq, 32, dqm);
+ err = sp_2048_mod_exp_32(tmpb, a, dq, 1024, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ c = sp_2048_sub_in_place_32(tmpa, tmpb);
+ c += sp_2048_cond_add_32(tmpa, tmpa, p, c);
+ sp_2048_cond_add_32(tmpa, tmpa, p, c);
+
+ sp_2048_from_mp(qi, 32, qim);
+ sp_2048_mul_32(tmpa, tmpa, qi);
+ err = sp_2048_mod_32(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mul_32(tmpa, q, tmpa);
+ XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32);
+ sp_2048_add_64(r, tmpb, tmpa);
+
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 32 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+#else
+ XMEMSET(tmpa, 0, sizeof(tmpa));
+ XMEMSET(tmpb, 0, sizeof(tmpb));
+ XMEMSET(p, 0, sizeof(p));
+ XMEMSET(q, 0, sizeof(q));
+ XMEMSET(dp, 0, sizeof(dp));
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_2048_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 64);
+ r->used = 64;
+ mp_clamp(r);
+#elif DIGIT_BIT < 32
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 64; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 32) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 32 - s;
+ }
+ r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 64; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 32 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 32 - s;
+ }
+ else {
+ s += 32;
+ }
+ }
+ r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[128], e[64], m[64];
+ sp_digit* r = b;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 2048) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 64, base);
+ sp_2048_from_mp(e, 64, exp);
+ sp_2048_from_mp(m, 64, mod);
+
+ err = sp_2048_mod_exp_64(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_2048_to_mp(r, res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_2048
+static void sp_2048_lshift_64(sp_digit* r, sp_digit* a, byte n)
+{
+ __asm__ __volatile__ (
+ "mov r6, #31\n\t"
+ "sub r6, r6, %[n]\n\t"
+ "add %[a], %[a], #192\n\t"
+ "add %[r], %[r], #192\n\t"
+ "ldr r3, [%[a], #60]\n\t"
+ "lsr r4, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r4, r4, r6\n\t"
+ "ldr r2, [%[a], #56]\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "str r2, [%[r], #56]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #44]\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "str r2, [%[r], #44]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "str r2, [%[r], #32]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #20]\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #12]\n\t"
+ "str r2, [%[r], #20]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #8]\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "str r2, [%[r], #8]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r2, [%[a], #60]\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "str r3, [%[r], #64]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #52]\n\t"
+ "str r2, [%[r], #60]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #48]\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "str r3, [%[r], #52]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "str r2, [%[r], #48]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #36]\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "str r3, [%[r], #40]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #28]\n\t"
+ "str r2, [%[r], #36]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #24]\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "str r3, [%[r], #28]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "str r2, [%[r], #24]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #12]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "str r2, [%[r], #12]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #0]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "str r3, [%[r], #68]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "str r2, [%[r], #64]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #52]\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "str r3, [%[r], #56]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #44]\n\t"
+ "str r2, [%[r], #52]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #40]\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "str r3, [%[r], #44]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "str r2, [%[r], #40]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #28]\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "str r3, [%[r], #32]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #20]\n\t"
+ "str r2, [%[r], #28]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #16]\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "str r3, [%[r], #20]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "str r2, [%[r], #16]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #4]\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "str r3, [%[r], #8]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r3, [%[a], #60]\n\t"
+ "str r2, [%[r], #68]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #56]\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "str r2, [%[r], #56]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #44]\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "str r2, [%[r], #44]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "str r2, [%[r], #32]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #20]\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #12]\n\t"
+ "str r2, [%[r], #20]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #8]\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "str r2, [%[r], #8]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "str r3, [%[r]]\n\t"
+ "str r4, [%[r], #4]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+ : "memory", "r2", "r3", "r4", "r5", "r6"
+ );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits,
+ const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[128];
+ sp_digit td[65];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 128;
+#else
+ norm = nd;
+ tmp = td;
+#endif
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_64(norm, m);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ sp_2048_lshift_64(r, norm, y);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+ sp_2048_mont_sqr_64(r, r, m, mp);
+
+ sp_2048_lshift_64(r, r, y);
+ sp_2048_mul_d_64(tmp, norm, r[64]);
+ r[64] = 0;
+ o = sp_2048_add_64(r, r, tmp);
+ sp_2048_cond_sub_64(r, r, m, (sp_digit)0 - o);
+ }
+
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
+ sp_2048_mont_reduce_64(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
+ sp_2048_cond_sub_64(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* HAVE_FFDHE_2048 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+ int err = MP_OKAY;
+ sp_digit b[128], e[64], m[64];
+ sp_digit* r = b;
+ word32 i;
+
+ if (mp_count_bits(base) > 2048) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 256) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 64, base);
+ sp_2048_from_bin(e, 64, exp, expLen);
+ sp_2048_from_mp(m, 64, mod);
+
+ #ifdef HAVE_FFDHE_2048
+ if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1)
+ err = sp_2048_mod_exp_2_64(r, e, expLen * 8, m);
+ else
+ #endif
+ err = sp_2048_mod_exp_64(r, b, e, expLen * 8, m, 0);
+
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ for (i=0; i<256 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[64], e[32], m[32];
+ sp_digit* r = b;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 1024) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 1024) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 1024) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 32, base);
+ sp_2048_from_mp(e, 32, exp);
+ sp_2048_from_mp(m, 32, mod);
+
+ err = sp_2048_mod_exp_32(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(r + 32, 0, sizeof(*r) * 32U);
+ err = sp_2048_to_mp(r, res);
+ res->used = mod->used;
+ mp_clamp(res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_2048 */
+
+#ifndef WOLFSSL_SP_NO_3072
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 24U) {
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 32
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 32U) <= (word32)DIGIT_BIT) {
+ s += 32U;
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 32) {
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 32 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 384
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_3072_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ j = 3072 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<96 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 32) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 32);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit tmp[12 * 2];
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r9, r3\n\t"
+ "mov r12, %[r]\n\t"
+ "mov r10, %[a]\n\t"
+ "mov r11, %[b]\n\t"
+ "mov r6, #48\n\t"
+ "add r6, r6, r10\n\t"
+ "mov r14, r6\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #44\n\t"
+ "mov %[a], r9\n\t"
+ "subs %[a], %[a], r6\n\t"
+ "sbc r6, r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], %[a], r6\n\t"
+ "mov %[b], r9\n\t"
+ "sub %[b], %[b], %[a]\n\t"
+ "add %[a], %[a], r10\n\t"
+ "add %[b], %[b], r11\n\t"
+ "\n2:\n\t"
+ /* Multiply Start */
+ "ldr r6, [%[a]]\n\t"
+ "ldr r8, [%[b]]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Multiply Done */
+ "add %[a], %[a], #4\n\t"
+ "sub %[b], %[b], #4\n\t"
+ "cmp %[a], r14\n\t"
+ "beq 3f\n\t"
+ "mov r6, r9\n\t"
+ "add r6, r6, r10\n\t"
+ "cmp %[a], r6\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r12\n\t"
+ "mov r8, r9\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add r8, r8, #4\n\t"
+ "mov r9, r8\n\t"
+ "mov r6, #88\n\t"
+ "cmp r8, r6\n\t"
+ "ble 1b\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov %[a], r10\n\t"
+ "mov %[b], r11\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r9, r3\n\t"
+ "mov r12, %[r]\n\t"
+ "mov r6, #96\n\t"
+ "neg r6, r6\n\t"
+ "add sp, sp, r6\n\t"
+ "mov r11, sp\n\t"
+ "mov r10, %[a]\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r6, #44\n\t"
+ "mov %[a], r9\n\t"
+ "subs %[a], %[a], r6\n\t"
+ "sbc r6, r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], %[a], r6\n\t"
+ "mov r2, r9\n\t"
+ "sub r2, r2, %[a]\n\t"
+ "add %[a], %[a], r10\n\t"
+ "add r2, r2, r10\n\t"
+ "\n2:\n\t"
+ "cmp r2, %[a]\n\t"
+ "beq 4f\n\t"
+ /* Multiply * 2: Start */
+ "ldr r6, [%[a]]\n\t"
+ "ldr r8, [r2]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Multiply * 2: Done */
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ /* Square: Start */
+ "ldr r6, [%[a]]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Square: Done */
+ "\n5:\n\t"
+ "add %[a], %[a], #4\n\t"
+ "sub r2, r2, #4\n\t"
+ "mov r6, #48\n\t"
+ "add r6, r6, r10\n\t"
+ "cmp %[a], r6\n\t"
+ "beq 3f\n\t"
+ "cmp %[a], r2\n\t"
+ "bgt 3f\n\t"
+ "mov r8, r9\n\t"
+ "add r8, r8, r10\n\t"
+ "cmp %[a], r8\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r11\n\t"
+ "mov r8, r9\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r8, r8, #4\n\t"
+ "mov r9, r8\n\t"
+ "mov r6, #88\n\t"
+ "cmp r8, r6\n\t"
+ "ble 1b\n\t"
+ "mov %[a], r10\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov %[r], r12\n\t"
+ "mov %[a], r11\n\t"
+ "mov r3, #92\n\t"
+ "\n4:\n\t"
+ "ldr r6, [%[a], r3]\n\t"
+ "str r6, [%[r], r3]\n\t"
+ "subs r3, r3, #4\n\t"
+ "bge 4b\n\t"
+ "mov r6, #96\n\t"
+ "add sp, sp, r6\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+ );
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_sub_in_place_24(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "subs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "sbc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<12; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ r[0] = a[0] & m;
+ r[1] = a[1] & m;
+ r[2] = a[2] & m;
+ r[3] = a[3] & m;
+ r[4] = a[4] & m;
+ r[5] = a[5] & m;
+ r[6] = a[6] & m;
+ r[7] = a[7] & m;
+ r[8] = a[8] & m;
+ r[9] = a[9] & m;
+ r[10] = a[10] & m;
+ r[11] = a[11] & m;
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[24];
+ sp_digit a1[12];
+ sp_digit b1[12];
+ sp_digit z2[24];
+ sp_digit u, ca, cb;
+
+ ca = sp_3072_add_12(a1, a, &a[12]);
+ cb = sp_3072_add_12(b1, b, &b[12]);
+ u = ca & cb;
+ sp_3072_mul_12(z1, a1, b1);
+ sp_3072_mul_12(z2, &a[12], &b[12]);
+ sp_3072_mul_12(z0, a, b);
+ sp_3072_mask_12(r + 24, a1, 0 - cb);
+ sp_3072_mask_12(b1, b1, 0 - ca);
+ u += sp_3072_add_12(r + 24, r + 24, b1);
+ u += sp_3072_sub_in_place_24(z1, z2);
+ u += sp_3072_sub_in_place_24(z1, z0);
+ u += sp_3072_add_24(r + 12, r + 12, z1);
+ r[36] = u;
+ XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
+ (void)sp_3072_add_24(r + 24, r + 24, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[24];
+ sp_digit z1[24];
+ sp_digit a1[12];
+ sp_digit u;
+
+ u = sp_3072_add_12(a1, a, &a[12]);
+ sp_3072_sqr_12(z1, a1);
+ sp_3072_sqr_12(z2, &a[12]);
+ sp_3072_sqr_12(z0, a);
+ sp_3072_mask_12(r + 24, a1, 0 - u);
+ u += sp_3072_add_12(r + 24, r + 24, r + 24);
+ u += sp_3072_sub_in_place_24(z1, z2);
+ u += sp_3072_sub_in_place_24(z1, z0);
+ u += sp_3072_add_24(r + 12, r + 12, z1);
+ r[36] = u;
+ XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
+ (void)sp_3072_add_24(r + 24, r + 24, z2);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "subs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "sbc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<24; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 24; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[48];
+ sp_digit a1[24];
+ sp_digit b1[24];
+ sp_digit z2[48];
+ sp_digit u, ca, cb;
+
+ ca = sp_3072_add_24(a1, a, &a[24]);
+ cb = sp_3072_add_24(b1, b, &b[24]);
+ u = ca & cb;
+ sp_3072_mul_24(z1, a1, b1);
+ sp_3072_mul_24(z2, &a[24], &b[24]);
+ sp_3072_mul_24(z0, a, b);
+ sp_3072_mask_24(r + 48, a1, 0 - cb);
+ sp_3072_mask_24(b1, b1, 0 - ca);
+ u += sp_3072_add_24(r + 48, r + 48, b1);
+ u += sp_3072_sub_in_place_48(z1, z2);
+ u += sp_3072_sub_in_place_48(z1, z0);
+ u += sp_3072_add_48(r + 24, r + 24, z1);
+ r[72] = u;
+ XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
+ (void)sp_3072_add_48(r + 48, r + 48, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[48];
+ sp_digit z1[48];
+ sp_digit a1[24];
+ sp_digit u;
+
+ u = sp_3072_add_24(a1, a, &a[24]);
+ sp_3072_sqr_24(z1, a1);
+ sp_3072_sqr_24(z2, &a[24]);
+ sp_3072_sqr_24(z0, a);
+ sp_3072_mask_24(r + 48, a1, 0 - u);
+ u += sp_3072_add_24(r + 48, r + 48, r + 48);
+ u += sp_3072_sub_in_place_48(z1, z2);
+ u += sp_3072_sub_in_place_48(z1, z0);
+ u += sp_3072_add_48(r + 24, r + 24, z1);
+ r[72] = u;
+ XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
+ (void)sp_3072_add_48(r + 48, r + 48, z2);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "subs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "sbc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<48; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 48; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[96];
+ sp_digit a1[48];
+ sp_digit b1[48];
+ sp_digit z2[96];
+ sp_digit u, ca, cb;
+
+ ca = sp_3072_add_48(a1, a, &a[48]);
+ cb = sp_3072_add_48(b1, b, &b[48]);
+ u = ca & cb;
+ sp_3072_mul_48(z1, a1, b1);
+ sp_3072_mul_48(z2, &a[48], &b[48]);
+ sp_3072_mul_48(z0, a, b);
+ sp_3072_mask_48(r + 96, a1, 0 - cb);
+ sp_3072_mask_48(b1, b1, 0 - ca);
+ u += sp_3072_add_48(r + 96, r + 96, b1);
+ u += sp_3072_sub_in_place_96(z1, z2);
+ u += sp_3072_sub_in_place_96(z1, z0);
+ u += sp_3072_add_96(r + 48, r + 48, z1);
+ r[144] = u;
+ XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
+ (void)sp_3072_add_96(r + 96, r + 96, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[96];
+ sp_digit z1[96];
+ sp_digit a1[48];
+ sp_digit u;
+
+ u = sp_3072_add_48(a1, a, &a[48]);
+ sp_3072_sqr_48(z1, a1);
+ sp_3072_sqr_48(z2, &a[48]);
+ sp_3072_sqr_48(z0, a);
+ sp_3072_mask_48(r + 96, a1, 0 - u);
+ u += sp_3072_add_48(r + 96, r + 96, r + 96);
+ u += sp_3072_sub_in_place_96(z1, z2);
+ u += sp_3072_sub_in_place_96(z1, z0);
+ u += sp_3072_add_96(r + 48, r + 48, z1);
+ r[144] = u;
+ XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
+ (void)sp_3072_add_96(r + 96, r + 96, z2);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r6, %[a]\n\t"
+ "mov r8, #0\n\t"
+ "add r6, r6, #384\n\t"
+ "sub r8, r8, #1\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], r8\n\t"
+ "ldr r4, [%[a]]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c], %[c]\n\t"
+ "add %[a], %[a], #4\n\t"
+ "add %[b], %[b], #4\n\t"
+ "add %[r], %[r], #4\n\t"
+ "cmp %[a], r6\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+ __asm__ __volatile__ (
+ "mov r8, %[a]\n\t"
+ "add r8, r8, #384\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "subs r5, r5, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "str r3, [%[a]]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "sbc %[c], %[c], %[c]\n\t"
+ "add %[a], %[a], #8\n\t"
+ "add %[b], %[b], #8\n\t"
+ "cmp %[a], r8\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit tmp[96 * 2];
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r9, r3\n\t"
+ "mov r12, %[r]\n\t"
+ "mov r10, %[a]\n\t"
+ "mov r11, %[b]\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, #128\n\t"
+ "add r6, r6, r10\n\t"
+ "mov r14, r6\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, #124\n\t"
+ "mov %[a], r9\n\t"
+ "subs %[a], %[a], r6\n\t"
+ "sbc r6, r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], %[a], r6\n\t"
+ "mov %[b], r9\n\t"
+ "sub %[b], %[b], %[a]\n\t"
+ "add %[a], %[a], r10\n\t"
+ "add %[b], %[b], r11\n\t"
+ "\n2:\n\t"
+ /* Multiply Start */
+ "ldr r6, [%[a]]\n\t"
+ "ldr r8, [%[b]]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Multiply Done */
+ "add %[a], %[a], #4\n\t"
+ "sub %[b], %[b], #4\n\t"
+ "cmp %[a], r14\n\t"
+ "beq 3f\n\t"
+ "mov r6, r9\n\t"
+ "add r6, r6, r10\n\t"
+ "cmp %[a], r6\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r12\n\t"
+ "mov r8, r9\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add r8, r8, #4\n\t"
+ "mov r9, r8\n\t"
+ "mov r6, #2\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, #248\n\t"
+ "cmp r8, r6\n\t"
+ "ble 1b\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov %[a], r10\n\t"
+ "mov %[b], r11\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r9, r3\n\t"
+ "mov r12, %[r]\n\t"
+ "mov r6, #3\n\t"
+ "lsl r6, r6, #8\n\t"
+ "neg r6, r6\n\t"
+ "add sp, sp, r6\n\t"
+ "mov r11, sp\n\t"
+ "mov r10, %[a]\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, #124\n\t"
+ "mov %[a], r9\n\t"
+ "subs %[a], %[a], r6\n\t"
+ "sbc r6, r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], %[a], r6\n\t"
+ "mov r2, r9\n\t"
+ "sub r2, r2, %[a]\n\t"
+ "add %[a], %[a], r10\n\t"
+ "add r2, r2, r10\n\t"
+ "\n2:\n\t"
+ "cmp r2, %[a]\n\t"
+ "beq 4f\n\t"
+ /* Multiply * 2: Start */
+ "ldr r6, [%[a]]\n\t"
+ "ldr r8, [r2]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Multiply * 2: Done */
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ /* Square: Start */
+ "ldr r6, [%[a]]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Square: Done */
+ "\n5:\n\t"
+ "add %[a], %[a], #4\n\t"
+ "sub r2, r2, #4\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, #128\n\t"
+ "add r6, r6, r10\n\t"
+ "cmp %[a], r6\n\t"
+ "beq 3f\n\t"
+ "cmp %[a], r2\n\t"
+ "bgt 3f\n\t"
+ "mov r8, r9\n\t"
+ "add r8, r8, r10\n\t"
+ "cmp %[a], r8\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r11\n\t"
+ "mov r8, r9\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r8, r8, #4\n\t"
+ "mov r9, r8\n\t"
+ "mov r6, #2\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, #248\n\t"
+ "cmp r8, r6\n\t"
+ "ble 1b\n\t"
+ "mov %[a], r10\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov %[r], r12\n\t"
+ "mov %[a], r11\n\t"
+ "mov r3, #2\n\t"
+ "lsl r3, r3, #8\n\t"
+ "add r3, r3, #252\n\t"
+ "\n4:\n\t"
+ "ldr r6, [%[a], r3]\n\t"
+ "str r6, [%[r], r3]\n\t"
+ "subs r3, r3, #4\n\t"
+ "bge 4b\n\t"
+ "mov r6, #3\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add sp, sp, r6\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#ifdef WOLFSSL_SP_SMALL
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+ int i;
+
+ for (i=0; i<48; i++) {
+ r[i] = a[i] & m;
+ }
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r6, %[a]\n\t"
+ "mov r8, #0\n\t"
+ "add r6, r6, #192\n\t"
+ "sub r8, r8, #1\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], r8\n\t"
+ "ldr r4, [%[a]]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c], %[c]\n\t"
+ "add %[a], %[a], #4\n\t"
+ "add %[b], %[b], #4\n\t"
+ "add %[r], %[r], #4\n\t"
+ "cmp %[a], r6\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+ __asm__ __volatile__ (
+ "mov r8, %[a]\n\t"
+ "add r8, r8, #192\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "subs r5, r5, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "str r3, [%[a]]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "sbc %[c], %[c], %[c]\n\t"
+ "add %[a], %[a], #8\n\t"
+ "add %[b], %[b], #8\n\t"
+ "cmp %[a], r8\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit tmp[48 * 2];
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r9, r3\n\t"
+ "mov r12, %[r]\n\t"
+ "mov r10, %[a]\n\t"
+ "mov r11, %[b]\n\t"
+ "mov r6, #192\n\t"
+ "add r6, r6, r10\n\t"
+ "mov r14, r6\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #188\n\t"
+ "mov %[a], r9\n\t"
+ "subs %[a], %[a], r6\n\t"
+ "sbc r6, r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], %[a], r6\n\t"
+ "mov %[b], r9\n\t"
+ "sub %[b], %[b], %[a]\n\t"
+ "add %[a], %[a], r10\n\t"
+ "add %[b], %[b], r11\n\t"
+ "\n2:\n\t"
+ /* Multiply Start */
+ "ldr r6, [%[a]]\n\t"
+ "ldr r8, [%[b]]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Multiply Done */
+ "add %[a], %[a], #4\n\t"
+ "sub %[b], %[b], #4\n\t"
+ "cmp %[a], r14\n\t"
+ "beq 3f\n\t"
+ "mov r6, r9\n\t"
+ "add r6, r6, r10\n\t"
+ "cmp %[a], r6\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r12\n\t"
+ "mov r8, r9\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add r8, r8, #4\n\t"
+ "mov r9, r8\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, #120\n\t"
+ "cmp r8, r6\n\t"
+ "ble 1b\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov %[a], r10\n\t"
+ "mov %[b], r11\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r9, r3\n\t"
+ "mov r12, %[r]\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, #128\n\t"
+ "neg r6, r6\n\t"
+ "add sp, sp, r6\n\t"
+ "mov r11, sp\n\t"
+ "mov r10, %[a]\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r6, #188\n\t"
+ "mov %[a], r9\n\t"
+ "subs %[a], %[a], r6\n\t"
+ "sbc r6, r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], %[a], r6\n\t"
+ "mov r2, r9\n\t"
+ "sub r2, r2, %[a]\n\t"
+ "add %[a], %[a], r10\n\t"
+ "add r2, r2, r10\n\t"
+ "\n2:\n\t"
+ "cmp r2, %[a]\n\t"
+ "beq 4f\n\t"
+ /* Multiply * 2: Start */
+ "ldr r6, [%[a]]\n\t"
+ "ldr r8, [r2]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Multiply * 2: Done */
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ /* Square: Start */
+ "ldr r6, [%[a]]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Square: Done */
+ "\n5:\n\t"
+ "add %[a], %[a], #4\n\t"
+ "sub r2, r2, #4\n\t"
+ "mov r6, #192\n\t"
+ "add r6, r6, r10\n\t"
+ "cmp %[a], r6\n\t"
+ "beq 3f\n\t"
+ "cmp %[a], r2\n\t"
+ "bgt 3f\n\t"
+ "mov r8, r9\n\t"
+ "add r8, r8, r10\n\t"
+ "cmp %[a], r8\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r11\n\t"
+ "mov r8, r9\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r8, r8, #4\n\t"
+ "mov r9, r8\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, #120\n\t"
+ "cmp r8, r6\n\t"
+ "ble 1b\n\t"
+ "mov %[a], r10\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov %[r], r12\n\t"
+ "mov %[a], r11\n\t"
+ "mov r3, #1\n\t"
+ "lsl r3, r3, #8\n\t"
+ "add r3, r3, #124\n\t"
+ "\n4:\n\t"
+ "ldr r6, [%[a], r3]\n\t"
+ "str r6, [%[r], r3]\n\t"
+ "subs r3, r3, #4\n\t"
+ "bge 4b\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, #128\n\t"
+ "add sp, sp, r6\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+
+ /* rho = -1/m mod b */
+ *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+ __asm__ __volatile__ (
+ "add r9, %[a], #384\n\t"
+ /* A[0] * B */
+ "ldr r6, [%[a]], #4\n\t"
+ "umull r5, r3, r6, %[b]\n\t"
+ "mov r4, #0\n\t"
+ "str r5, [%[r]], #4\n\t"
+ /* A[0] * B - Done */
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ /* A[] * B */
+ "ldr r6, [%[a]], #4\n\t"
+ "umull r6, r8, r6, %[b]\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[] * B - Done */
+ "str r3, [%[r]], #4\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "cmp %[a], r9\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r]]\n\t"
+ : [r] "+r" (r), [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
+ );
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 3072 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 48);
+
+ /* r = 2^n mod m */
+ sp_3072_sub_in_place_48(r, m);
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #192\n\t"
+ "mov r9, r5\n\t"
+ "mov r8, #0\n\t"
+ "\n1:\n\t"
+ "ldr r6, [%[b], r8]\n\t"
+ "and r6, r6, %[m]\n\t"
+ "mov r5, #0\n\t"
+ "subs r5, r5, %[c]\n\t"
+ "ldr r5, [%[a], r8]\n\t"
+ "sbcs r5, r5, r6\n\t"
+ "sbcs %[c], %[c], %[c]\n\t"
+ "str r5, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, r9\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r8", "r9"
+ );
+
+ return c;
+}
+
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "mov r9, %[mp]\n\t"
+ "mov r12, %[m]\n\t"
+ "mov r10, %[a]\n\t"
+ "mov r4, #0\n\t"
+ "add r11, r10, #192\n\t"
+ "\n1:\n\t"
+ /* mu = a[i] * mp */
+ "mov %[mp], r9\n\t"
+ "ldr %[a], [r10]\n\t"
+ "mul %[mp], %[mp], %[a]\n\t"
+ "mov %[m], r12\n\t"
+ "add r14, r10, #184\n\t"
+ "\n2:\n\t"
+ /* a[i+j] += m[j] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r5, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r5, r5, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r4, r4, %[a]\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r4, [r10], #4\n\t"
+ /* a[i+j+1] += m[j+1] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r4, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r4, r4, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r5, r5, %[a]\n\t"
+ "adc r4, r4, #0\n\t"
+ "str r5, [r10], #4\n\t"
+ "cmp r10, r14\n\t"
+ "blt 2b\n\t"
+ /* a[i+46] += m[46] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r5, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r5, r5, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r4, r4, %[a]\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r4, [r10], #4\n\t"
+ /* a[i+47] += m[47] * mu */
+ "mov r4, %[ca]\n\t"
+ "mov %[ca], #0\n\t"
+ /* Multiply m[47] and mu - Start */
+ "ldr r8, [%[m]]\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ /* Multiply m[47] and mu - Done */
+ "ldr r6, [r10]\n\t"
+ "ldr r8, [r10, #4]\n\t"
+ "adds r6, r6, r5\n\t"
+ "adcs r8, r8, r4\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ "str r6, [r10]\n\t"
+ "str r8, [r10, #4]\n\t"
+ /* Next word in a */
+ "sub r10, r10, #184\n\t"
+ "cmp r10, r11\n\t"
+ "blt 1b\n\t"
+ "mov %[a], r10\n\t"
+ "mov %[m], r12\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_3072_mul_48(r, a, b);
+ sp_3072_mont_reduce_48(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_3072_sqr_48(r, a);
+ sp_3072_mont_reduce_48(r, m, mp);
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+ __asm__ __volatile__ (
+ "add r9, %[a], #192\n\t"
+ /* A[0] * B */
+ "ldr r6, [%[a]], #4\n\t"
+ "umull r5, r3, r6, %[b]\n\t"
+ "mov r4, #0\n\t"
+ "str r5, [%[r]], #4\n\t"
+ /* A[0] * B - Done */
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ /* A[] * B */
+ "ldr r6, [%[a]], #4\n\t"
+ "umull r6, r8, r6, %[b]\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[] * B - Done */
+ "str r3, [%[r]], #4\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "cmp %[a], r9\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r]]\n\t"
+ : [r] "+r" (r), [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
+ );
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r6, %[div], #16\n\t"
+ "add r6, r6, #1\n\t"
+ "udiv r4, %[d1], r6\n\t"
+ "lsl r8, r4, #16\n\t"
+ "umull r4, r5, %[div], r8\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "udiv r5, %[d1], r6\n\t"
+ "lsl r4, r5, #16\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "lsl r4, %[d1], #16\n\t"
+ "orr r4, r4, %[d0], lsr #16\n\t"
+ "udiv r4, r4, r6\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "lsl r4, %[d1], #16\n\t"
+ "orr r4, r4, %[d0], lsr #16\n\t"
+ "udiv r4, r4, r6\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "udiv r4, %[d0], %[div]\n\t"
+ "add r8, r8, r4\n\t"
+ "mov %[r], r8\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r6", "r8"
+ );
+ return r;
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+
+
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mvn r3, r3\n\t"
+ "mov r6, #188\n\t"
+ "\n1:\n\t"
+ "ldr r8, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r8, r8, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "mov r4, r8\n\t"
+ "subs r8, r8, r5\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], r8\n\t"
+ "mvn r8, r8\n\t"
+ "and r3, r3, r8\n\t"
+ "subs r5, r5, r4\n\t"
+ "sbc r8, r8, r8\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "mvn r8, r8\n\t"
+ "and r3, r3, r8\n\t"
+ "sub r6, r6, #4\n\t"
+ "cmp r6, #0\n\t"
+ "bge 1b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "r3", "r4", "r5", "r6", "r8"
+ );
+
+ return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[96], t2[49];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[47];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
+ for (i=47; i>=0; i--) {
+ r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div);
+
+ sp_3072_mul_d_48(t2, d, r1);
+ t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2);
+ t1[48 + i] -= t2[48];
+ sp_3072_mask_48(t2, d, t1[48 + i]);
+ t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
+ sp_3072_mask_48(t2, d, t1[48 + i]);
+ t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_3072_cmp_48(t1, d) >= 0;
+ sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_3072_div_48(a, m, NULL, r);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[16][96];
+#else
+ sp_digit* t[16];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 96, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<16; i++) {
+ t[i] = td + i * 96;
+ }
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_48(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
+ if (reduceA != 0) {
+ err = sp_3072_mod_48(t[1] + 48, a, m);
+ if (err == MP_OKAY) {
+ err = sp_3072_mod_48(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
+ err = sp_3072_mod_48(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 4;
+ if (c == 32) {
+ c = 28;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
+ for (; i>=0 || c>=4; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 28;
+ n <<= 4;
+ c = 28;
+ }
+ else if (c < 4) {
+ y = n >> 28;
+ n = e[i--];
+ c = 4 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+ }
+
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+
+ sp_3072_mont_mul_48(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
+ sp_3072_mont_reduce_48(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
+ sp_3072_cond_sub_48(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][96];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 96, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++) {
+ t[i] = td + i * 96;
+ }
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_48(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
+ if (reduceA != 0) {
+ err = sp_3072_mod_48(t[1] + 48, a, m);
+ if (err == MP_OKAY) {
+ err = sp_3072_mod_48(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
+ err = sp_3072_mod_48(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
+ sp_3072_mont_sqr_48(t[16], t[ 8], m, mp);
+ sp_3072_mont_mul_48(t[17], t[ 9], t[ 8], m, mp);
+ sp_3072_mont_sqr_48(t[18], t[ 9], m, mp);
+ sp_3072_mont_mul_48(t[19], t[10], t[ 9], m, mp);
+ sp_3072_mont_sqr_48(t[20], t[10], m, mp);
+ sp_3072_mont_mul_48(t[21], t[11], t[10], m, mp);
+ sp_3072_mont_sqr_48(t[22], t[11], m, mp);
+ sp_3072_mont_mul_48(t[23], t[12], t[11], m, mp);
+ sp_3072_mont_sqr_48(t[24], t[12], m, mp);
+ sp_3072_mont_mul_48(t[25], t[13], t[12], m, mp);
+ sp_3072_mont_sqr_48(t[26], t[13], m, mp);
+ sp_3072_mont_mul_48(t[27], t[14], t[13], m, mp);
+ sp_3072_mont_sqr_48(t[28], t[14], m, mp);
+ sp_3072_mont_mul_48(t[29], t[15], t[14], m, mp);
+ sp_3072_mont_sqr_48(t[30], t[15], m, mp);
+ sp_3072_mont_mul_48(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+
+ sp_3072_mont_mul_48(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
+ sp_3072_mont_reduce_48(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
+ sp_3072_cond_sub_48(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 3072 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 96);
+
+ /* r = 2^n mod m */
+ sp_3072_sub_in_place_96(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #1\n\t"
+ "lsl r5, r5, #8\n\t"
+ "add r5, r5, #128\n\t"
+ "mov r9, r5\n\t"
+ "mov r8, #0\n\t"
+ "\n1:\n\t"
+ "ldr r6, [%[b], r8]\n\t"
+ "and r6, r6, %[m]\n\t"
+ "mov r5, #0\n\t"
+ "subs r5, r5, %[c]\n\t"
+ "ldr r5, [%[a], r8]\n\t"
+ "sbcs r5, r5, r6\n\t"
+ "sbcs %[c], %[c], %[c]\n\t"
+ "str r5, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, r9\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r8", "r9"
+ );
+
+ return c;
+}
+
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "mov r9, %[mp]\n\t"
+ "mov r12, %[m]\n\t"
+ "mov r10, %[a]\n\t"
+ "mov r4, #0\n\t"
+ "add r11, r10, #384\n\t"
+ "\n1:\n\t"
+ /* mu = a[i] * mp */
+ "mov %[mp], r9\n\t"
+ "ldr %[a], [r10]\n\t"
+ "mul %[mp], %[mp], %[a]\n\t"
+ "mov %[m], r12\n\t"
+ "add r14, r10, #376\n\t"
+ "\n2:\n\t"
+ /* a[i+j] += m[j] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r5, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r5, r5, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r4, r4, %[a]\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r4, [r10], #4\n\t"
+ /* a[i+j+1] += m[j+1] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r4, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r4, r4, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r5, r5, %[a]\n\t"
+ "adc r4, r4, #0\n\t"
+ "str r5, [r10], #4\n\t"
+ "cmp r10, r14\n\t"
+ "blt 2b\n\t"
+ /* a[i+94] += m[94] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r5, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r5, r5, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r4, r4, %[a]\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r4, [r10], #4\n\t"
+ /* a[i+95] += m[95] * mu */
+ "mov r4, %[ca]\n\t"
+ "mov %[ca], #0\n\t"
+ /* Multiply m[95] and mu - Start */
+ "ldr r8, [%[m]]\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ /* Multiply m[95] and mu - Done */
+ "ldr r6, [r10]\n\t"
+ "ldr r8, [r10, #4]\n\t"
+ "adds r6, r6, r5\n\t"
+ "adcs r8, r8, r4\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ "str r6, [r10]\n\t"
+ "str r8, [r10, #4]\n\t"
+ /* Next word in a */
+ "sub r10, r10, #376\n\t"
+ "cmp r10, r11\n\t"
+ "blt 1b\n\t"
+ "mov %[a], r10\n\t"
+ "mov %[m], r12\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_3072_mul_96(r, a, b);
+ sp_3072_mont_reduce_96(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_3072_sqr_96(r, a);
+ sp_3072_mont_reduce_96(r, m, mp);
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r6, %[div], #16\n\t"
+ "add r6, r6, #1\n\t"
+ "udiv r4, %[d1], r6\n\t"
+ "lsl r8, r4, #16\n\t"
+ "umull r4, r5, %[div], r8\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "udiv r5, %[d1], r6\n\t"
+ "lsl r4, r5, #16\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "lsl r4, %[d1], #16\n\t"
+ "orr r4, r4, %[d0], lsr #16\n\t"
+ "udiv r4, r4, r6\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "lsl r4, %[d1], #16\n\t"
+ "orr r4, r4, %[d0], lsr #16\n\t"
+ "udiv r4, r4, r6\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "udiv r4, %[d0], %[div]\n\t"
+ "add r8, r8, r4\n\t"
+ "mov %[r], r8\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r6", "r8"
+ );
+ return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<96; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 96; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_3072_cmp_96(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+
+
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mvn r3, r3\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, #124\n\t"
+ "\n1:\n\t"
+ "ldr r8, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r8, r8, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "mov r4, r8\n\t"
+ "subs r8, r8, r5\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], r8\n\t"
+ "mvn r8, r8\n\t"
+ "and r3, r3, r8\n\t"
+ "subs r5, r5, r4\n\t"
+ "sbc r8, r8, r8\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "mvn r8, r8\n\t"
+ "and r3, r3, r8\n\t"
+ "sub r6, r6, #4\n\t"
+ "cmp r6, #0\n\t"
+ "bge 1b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "r3", "r4", "r5", "r6", "r8"
+ );
+
+ return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[192], t2[97];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[95];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
+ for (i=95; i>=0; i--) {
+ r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div);
+
+ sp_3072_mul_d_96(t2, d, r1);
+ t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
+ t1[96 + i] -= t2[96];
+ sp_3072_mask_96(t2, d, t1[96 + i]);
+ t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2);
+ sp_3072_mask_96(t2, d, t1[96 + i]);
+ t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_3072_cmp_96(t1, d) >= 0;
+ sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_96(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_3072_div_96(a, m, NULL, r);
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[192], t2[97];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[95];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
+ for (i=95; i>=0; i--) {
+ r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div);
+
+ sp_3072_mul_d_96(t2, d, r1);
+ t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
+ t1[96 + i] -= t2[96];
+ if (t1[96 + i] != 0) {
+ t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d);
+ if (t1[96 + i] != 0)
+ t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d);
+ }
+ }
+
+ r1 = sp_3072_cmp_96(t1, d) >= 0;
+ sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_96_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_3072_div_96_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+ defined(WOLFSSL_HAVE_SP_DH)
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[16][192];
+#else
+ sp_digit* t[16];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 192, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<16; i++) {
+ t[i] = td + i * 192;
+ }
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_96(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 96U);
+ if (reduceA != 0) {
+ err = sp_3072_mod_96(t[1] + 96, a, m);
+ if (err == MP_OKAY) {
+ err = sp_3072_mod_96(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
+ err = sp_3072_mod_96(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_96(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_96(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_96(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 4;
+ if (c == 32) {
+ c = 28;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
+ for (; i>=0 || c>=4; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 28;
+ n <<= 4;
+ c = 28;
+ }
+ else if (c < 4) {
+ y = n >> 28;
+ n = e[i--];
+ c = 4 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+ }
+
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+
+ sp_3072_mont_mul_96(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
+ sp_3072_mont_reduce_96(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
+ sp_3072_cond_sub_96(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][192];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 192, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++) {
+ t[i] = td + i * 192;
+ }
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_96(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 96U);
+ if (reduceA != 0) {
+ err = sp_3072_mod_96(t[1] + 96, a, m);
+ if (err == MP_OKAY) {
+ err = sp_3072_mod_96(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
+ err = sp_3072_mod_96(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_96(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_96(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_96(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp);
+ sp_3072_mont_sqr_96(t[16], t[ 8], m, mp);
+ sp_3072_mont_mul_96(t[17], t[ 9], t[ 8], m, mp);
+ sp_3072_mont_sqr_96(t[18], t[ 9], m, mp);
+ sp_3072_mont_mul_96(t[19], t[10], t[ 9], m, mp);
+ sp_3072_mont_sqr_96(t[20], t[10], m, mp);
+ sp_3072_mont_mul_96(t[21], t[11], t[10], m, mp);
+ sp_3072_mont_sqr_96(t[22], t[11], m, mp);
+ sp_3072_mont_mul_96(t[23], t[12], t[11], m, mp);
+ sp_3072_mont_sqr_96(t[24], t[12], m, mp);
+ sp_3072_mont_mul_96(t[25], t[13], t[12], m, mp);
+ sp_3072_mont_sqr_96(t[26], t[13], m, mp);
+ sp_3072_mont_mul_96(t[27], t[14], t[13], m, mp);
+ sp_3072_mont_sqr_96(t[28], t[14], m, mp);
+ sp_3072_mont_mul_96(t[29], t[15], t[14], m, mp);
+ sp_3072_mont_sqr_96(t[30], t[15], m, mp);
+ sp_3072_mont_mul_96(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+
+ sp_3072_mont_mul_96(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
+ sp_3072_mont_reduce_96(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
+ sp_3072_cond_sub_96(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[192], m[96], r[192];
+#else
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+#endif
+ sp_digit *ah;
+ sp_digit e[1];
+ int err = MP_OKAY;
+
+ if (*outLen < 384)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 384 ||
+ mp_count_bits(mm) != 3072))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 96 * 2;
+ m = r + 96 * 2;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ ah = a + 96;
+
+ sp_3072_from_bin(ah, 96, in, inLen);
+#if DIGIT_BIT >= 32
+ e[0] = em->dp[0];
+#else
+ e[0] = em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(m, 96, mm);
+
+ if (e[0] == 0x3) {
+ if (err == MP_OKAY) {
+ sp_3072_sqr_96(r, ah);
+ err = sp_3072_mod_96_cond(r, r, m);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_mul_96(r, ah, r);
+ err = sp_3072_mod_96_cond(r, r, m);
+ }
+ }
+ else {
+ int i;
+ sp_digit mp;
+
+ sp_3072_mont_setup(m, &mp);
+
+ /* Convert to Montgomery form. */
+ XMEMSET(a, 0, sizeof(sp_digit) * 96);
+ err = sp_3072_mod_96_cond(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i = 31; i >= 0; i--) {
+ if (e[0] >> i) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 96);
+ for (i--; i>=0; i--) {
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ if (((e[0] >> i) & 1) == 1) {
+ sp_3072_mont_mul_96(r, r, a, m, mp);
+ }
+ }
+ XMEMSET(&r[96], 0, sizeof(sp_digit) * 96);
+ sp_3072_mont_reduce_96(r, m, mp);
+
+ for (i = 95; i > 0; i--) {
+ if (r[i] != m[i]) {
+ break;
+ }
+ }
+ if (r[i] >= m[i]) {
+ sp_3072_sub_in_place_96(r, m);
+ }
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#endif
+
+ return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+ sp_digit* a;
+ sp_digit* d = NULL;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 384U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 3072) {
+ err = MP_READ_E;
+ }
+ if (inLen > 384) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = d + 96;
+ m = a + 192;
+ r = a;
+
+ sp_3072_from_bin(a, 96, in, inLen);
+ sp_3072_from_mp(d, 96, dm);
+ sp_3072_from_mp(m, 96, mm);
+ err = sp_3072_mod_exp_96(r, a, d, 3072, m, 0);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 96);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #192\n\t"
+ "mov r9, r5\n\t"
+ "mov r8, #0\n\t"
+ "\n1:\n\t"
+ "ldr r6, [%[b], r8]\n\t"
+ "and r6, r6, %[m]\n\t"
+ "adds r5, %[c], #-1\n\t"
+ "ldr r5, [%[a], r8]\n\t"
+ "adcs r5, r5, r6\n\t"
+ "mov %[c], #0\n\t"
+ "adcs %[c], %[c], %[c]\n\t"
+ "str r5, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, r9\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r8", "r9"
+ );
+
+ return c;
+}
+
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[96 * 2];
+ sp_digit p[48], q[48], dp[48];
+ sp_digit tmpa[96], tmpb[96];
+#else
+ sp_digit* t = NULL;
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+#endif
+ sp_digit* r;
+ sp_digit* qi;
+ sp_digit* dq;
+ sp_digit c;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 384)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL)
+ err = MEMORY_E;
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 96 * 2;
+ q = p + 48;
+ qi = dq = dp = q + 48;
+ tmpa = qi + 48;
+ tmpb = tmpa + 96;
+
+ r = t + 96;
+ }
+#else
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ r = a;
+ qi = dq = dp;
+#endif
+ sp_3072_from_bin(a, 96, in, inLen);
+ sp_3072_from_mp(p, 48, pm);
+ sp_3072_from_mp(q, 48, qm);
+ sp_3072_from_mp(dp, 48, dpm);
+
+ err = sp_3072_mod_exp_48(tmpa, a, dp, 1536, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(dq, 48, dqm);
+ err = sp_3072_mod_exp_48(tmpb, a, dq, 1536, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ c = sp_3072_sub_in_place_48(tmpa, tmpb);
+ c += sp_3072_cond_add_48(tmpa, tmpa, p, c);
+ sp_3072_cond_add_48(tmpa, tmpa, p, c);
+
+ sp_3072_from_mp(qi, 48, qim);
+ sp_3072_mul_48(tmpa, tmpa, qi);
+ err = sp_3072_mod_48(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mul_48(tmpa, q, tmpa);
+ XMEMSET(&tmpb[48], 0, sizeof(sp_digit) * 48);
+ sp_3072_add_96(r, tmpb, tmpa);
+
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 48 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+#else
+ XMEMSET(tmpa, 0, sizeof(tmpa));
+ XMEMSET(tmpb, 0, sizeof(tmpb));
+ XMEMSET(p, 0, sizeof(p));
+ XMEMSET(q, 0, sizeof(q));
+ XMEMSET(dp, 0, sizeof(dp));
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_3072_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 96);
+ r->used = 96;
+ mp_clamp(r);
+#elif DIGIT_BIT < 32
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 96; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 32) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 32 - s;
+ }
+ r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 96; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 32 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 32 - s;
+ }
+ else {
+ s += 32;
+ }
+ }
+ r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[192], e[96], m[96];
+ sp_digit* r = b;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 3072) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 96, base);
+ sp_3072_from_mp(e, 96, exp);
+ sp_3072_from_mp(m, 96, mod);
+
+ err = sp_3072_mod_exp_96(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_3072_to_mp(r, res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_3072
+static void sp_3072_lshift_96(sp_digit* r, sp_digit* a, byte n)
+{
+ __asm__ __volatile__ (
+ "mov r6, #31\n\t"
+ "sub r6, r6, %[n]\n\t"
+ "add %[a], %[a], #320\n\t"
+ "add %[r], %[r], #320\n\t"
+ "ldr r3, [%[a], #60]\n\t"
+ "lsr r4, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r4, r4, r6\n\t"
+ "ldr r2, [%[a], #56]\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "str r2, [%[r], #56]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #44]\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "str r2, [%[r], #44]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "str r2, [%[r], #32]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #20]\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #12]\n\t"
+ "str r2, [%[r], #20]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #8]\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "str r2, [%[r], #8]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r2, [%[a], #60]\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "str r3, [%[r], #64]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #52]\n\t"
+ "str r2, [%[r], #60]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #48]\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "str r3, [%[r], #52]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "str r2, [%[r], #48]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #36]\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "str r3, [%[r], #40]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #28]\n\t"
+ "str r2, [%[r], #36]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #24]\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "str r3, [%[r], #28]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "str r2, [%[r], #24]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #12]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "str r2, [%[r], #12]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #0]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "str r3, [%[r], #68]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "str r2, [%[r], #64]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #52]\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "str r3, [%[r], #56]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #44]\n\t"
+ "str r2, [%[r], #52]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #40]\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "str r3, [%[r], #44]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "str r2, [%[r], #40]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #28]\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "str r3, [%[r], #32]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #20]\n\t"
+ "str r2, [%[r], #28]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #16]\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "str r3, [%[r], #20]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "str r2, [%[r], #16]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #4]\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "str r3, [%[r], #8]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r3, [%[a], #60]\n\t"
+ "str r2, [%[r], #68]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #56]\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "str r2, [%[r], #56]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #44]\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "str r2, [%[r], #44]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "str r2, [%[r], #32]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #20]\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #12]\n\t"
+ "str r2, [%[r], #20]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #8]\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "str r2, [%[r], #8]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r2, [%[a], #60]\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "str r3, [%[r], #64]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #52]\n\t"
+ "str r2, [%[r], #60]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #48]\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "str r3, [%[r], #52]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "str r2, [%[r], #48]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #36]\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "str r3, [%[r], #40]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #28]\n\t"
+ "str r2, [%[r], #36]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #24]\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "str r3, [%[r], #28]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "str r2, [%[r], #24]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #12]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "str r2, [%[r], #12]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #0]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "str r3, [%[r], #68]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "str r2, [%[r], #64]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #52]\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "str r3, [%[r], #56]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #44]\n\t"
+ "str r2, [%[r], #52]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #40]\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "str r3, [%[r], #44]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "str r2, [%[r], #40]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #28]\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "str r3, [%[r], #32]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #20]\n\t"
+ "str r2, [%[r], #28]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #16]\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "str r3, [%[r], #20]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "str r2, [%[r], #16]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #4]\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "str r3, [%[r], #8]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "str r2, [%[r], #4]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+ : "memory", "r2", "r3", "r4", "r5", "r6"
+ );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_2_96(sp_digit* r, const sp_digit* e, int bits,
+ const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[192];
+ sp_digit td[97];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 289, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 192;
+#else
+ norm = nd;
+ tmp = td;
+#endif
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_96(norm, m);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ sp_3072_lshift_96(r, norm, y);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+ sp_3072_mont_sqr_96(r, r, m, mp);
+
+ sp_3072_lshift_96(r, r, y);
+ sp_3072_mul_d_96(tmp, norm, r[96]);
+ r[96] = 0;
+ o = sp_3072_add_96(r, r, tmp);
+ sp_3072_cond_sub_96(r, r, m, (sp_digit)0 - o);
+ }
+
+ XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
+ sp_3072_mont_reduce_96(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
+ sp_3072_cond_sub_96(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* HAVE_FFDHE_3072 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+ int err = MP_OKAY;
+ sp_digit b[192], e[96], m[96];
+ sp_digit* r = b;
+ word32 i;
+
+ if (mp_count_bits(base) > 3072) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 384) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 96, base);
+ sp_3072_from_bin(e, 96, exp, expLen);
+ sp_3072_from_mp(m, 96, mod);
+
+ #ifdef HAVE_FFDHE_3072
+ if (base->used == 1 && base->dp[0] == 2 && m[95] == (sp_digit)-1)
+ err = sp_3072_mod_exp_2_96(r, e, expLen * 8, m);
+ else
+ #endif
+ err = sp_3072_mod_exp_96(r, b, e, expLen * 8, m, 0);
+
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ for (i=0; i<384 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[96], e[48], m[48];
+ sp_digit* r = b;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 1536) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 1536) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 1536) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 48, base);
+ sp_3072_from_mp(e, 48, exp);
+ sp_3072_from_mp(m, 48, mod);
+
+ err = sp_3072_mod_exp_48(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(r + 48, 0, sizeof(*r) * 48U);
+ err = sp_3072_to_mp(r, res);
+ res->used = mod->used;
+ mp_clamp(res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_3072 */
+
+#ifdef WOLFSSL_SP_4096
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 24U) {
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 32
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 32U) <= (word32)DIGIT_BIT) {
+ s += 32U;
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 32) {
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 32 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 512
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_4096_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ j = 4096 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<128 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 32) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 32);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "subs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "sbc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_64(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit tmp[64 * 2];
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r9, r3\n\t"
+ "mov r12, %[r]\n\t"
+ "mov r10, %[a]\n\t"
+ "mov r11, %[b]\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, r10\n\t"
+ "mov r14, r6\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #252\n\t"
+ "mov %[a], r9\n\t"
+ "subs %[a], %[a], r6\n\t"
+ "sbc r6, r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], %[a], r6\n\t"
+ "mov %[b], r9\n\t"
+ "sub %[b], %[b], %[a]\n\t"
+ "add %[a], %[a], r10\n\t"
+ "add %[b], %[b], r11\n\t"
+ "\n2:\n\t"
+ /* Multiply Start */
+ "ldr r6, [%[a]]\n\t"
+ "ldr r8, [%[b]]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Multiply Done */
+ "add %[a], %[a], #4\n\t"
+ "sub %[b], %[b], #4\n\t"
+ "cmp %[a], r14\n\t"
+ "beq 3f\n\t"
+ "mov r6, r9\n\t"
+ "add r6, r6, r10\n\t"
+ "cmp %[a], r6\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r12\n\t"
+ "mov r8, r9\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add r8, r8, #4\n\t"
+ "mov r9, r8\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, #248\n\t"
+ "cmp r8, r6\n\t"
+ "ble 1b\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov %[a], r10\n\t"
+ "mov %[b], r11\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<64; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 64; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit* z0 = r;
+ sp_digit z1[128];
+ sp_digit a1[64];
+ sp_digit b1[64];
+ sp_digit z2[128];
+ sp_digit u, ca, cb;
+
+ ca = sp_2048_add_64(a1, a, &a[64]);
+ cb = sp_2048_add_64(b1, b, &b[64]);
+ u = ca & cb;
+ sp_2048_mul_64(z1, a1, b1);
+ sp_2048_mul_64(z2, &a[64], &b[64]);
+ sp_2048_mul_64(z0, a, b);
+ sp_2048_mask_64(r + 128, a1, 0 - cb);
+ sp_2048_mask_64(b1, b1, 0 - ca);
+ u += sp_2048_add_64(r + 128, r + 128, b1);
+ u += sp_4096_sub_in_place_128(z1, z2);
+ u += sp_4096_sub_in_place_128(z1, z0);
+ u += sp_4096_add_128(r + 64, r + 64, z1);
+ r[192] = u;
+ XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1));
+ (void)sp_4096_add_128(r + 128, r + 128, z2);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r9, r3\n\t"
+ "mov r12, %[r]\n\t"
+ "mov r6, #2\n\t"
+ "lsl r6, r6, #8\n\t"
+ "neg r6, r6\n\t"
+ "add sp, sp, r6\n\t"
+ "mov r11, sp\n\t"
+ "mov r10, %[a]\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r6, #252\n\t"
+ "mov %[a], r9\n\t"
+ "subs %[a], %[a], r6\n\t"
+ "sbc r6, r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], %[a], r6\n\t"
+ "mov r2, r9\n\t"
+ "sub r2, r2, %[a]\n\t"
+ "add %[a], %[a], r10\n\t"
+ "add r2, r2, r10\n\t"
+ "\n2:\n\t"
+ "cmp r2, %[a]\n\t"
+ "beq 4f\n\t"
+ /* Multiply * 2: Start */
+ "ldr r6, [%[a]]\n\t"
+ "ldr r8, [r2]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Multiply * 2: Done */
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ /* Square: Start */
+ "ldr r6, [%[a]]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Square: Done */
+ "\n5:\n\t"
+ "add %[a], %[a], #4\n\t"
+ "sub r2, r2, #4\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, r10\n\t"
+ "cmp %[a], r6\n\t"
+ "beq 3f\n\t"
+ "cmp %[a], r2\n\t"
+ "bgt 3f\n\t"
+ "mov r8, r9\n\t"
+ "add r8, r8, r10\n\t"
+ "cmp %[a], r8\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r11\n\t"
+ "mov r8, r9\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r8, r8, #4\n\t"
+ "mov r9, r8\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, #248\n\t"
+ "cmp r8, r6\n\t"
+ "ble 1b\n\t"
+ "mov %[a], r10\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov %[r], r12\n\t"
+ "mov %[a], r11\n\t"
+ "mov r3, #1\n\t"
+ "lsl r3, r3, #8\n\t"
+ "add r3, r3, #252\n\t"
+ "\n4:\n\t"
+ "ldr r6, [%[a], r3]\n\t"
+ "str r6, [%[r], r3]\n\t"
+ "subs r3, r3, #4\n\t"
+ "bge 4b\n\t"
+ "mov r6, #2\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add sp, sp, r6\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+ );
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
+{
+ sp_digit* z0 = r;
+ sp_digit z2[128];
+ sp_digit z1[128];
+ sp_digit a1[64];
+ sp_digit u;
+
+ u = sp_2048_add_64(a1, a, &a[64]);
+ sp_2048_sqr_64(z1, a1);
+ sp_2048_sqr_64(z2, &a[64]);
+ sp_2048_sqr_64(z0, a);
+ sp_2048_mask_64(r + 128, a1, 0 - u);
+ u += sp_2048_add_64(r + 128, r + 128, r + 128);
+ u += sp_4096_sub_in_place_128(z1, z2);
+ u += sp_4096_sub_in_place_128(z1, z0);
+ u += sp_4096_add_128(r + 64, r + 64, z1);
+ r[192] = u;
+ XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1));
+ (void)sp_4096_add_128(r + 128, r + 128, z2);
+}
+
+#endif /* !WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r6, %[a]\n\t"
+ "mov r8, #0\n\t"
+ "add r6, r6, #512\n\t"
+ "sub r8, r8, #1\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], r8\n\t"
+ "ldr r4, [%[a]]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c], %[c]\n\t"
+ "add %[a], %[a], #4\n\t"
+ "add %[b], %[b], #4\n\t"
+ "add %[r], %[r], #4\n\t"
+ "cmp %[a], r6\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+ __asm__ __volatile__ (
+ "mov r8, %[a]\n\t"
+ "add r8, r8, #512\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "subs r5, r5, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "str r3, [%[a]]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "sbc %[c], %[c], %[c]\n\t"
+ "add %[a], %[a], #8\n\t"
+ "add %[b], %[b], #8\n\t"
+ "cmp %[a], r8\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit tmp[128 * 2];
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r9, r3\n\t"
+ "mov r12, %[r]\n\t"
+ "mov r10, %[a]\n\t"
+ "mov r11, %[b]\n\t"
+ "mov r6, #2\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, r10\n\t"
+ "mov r14, r6\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, #252\n\t"
+ "mov %[a], r9\n\t"
+ "subs %[a], %[a], r6\n\t"
+ "sbc r6, r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], %[a], r6\n\t"
+ "mov %[b], r9\n\t"
+ "sub %[b], %[b], %[a]\n\t"
+ "add %[a], %[a], r10\n\t"
+ "add %[b], %[b], r11\n\t"
+ "\n2:\n\t"
+ /* Multiply Start */
+ "ldr r6, [%[a]]\n\t"
+ "ldr r8, [%[b]]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Multiply Done */
+ "add %[a], %[a], #4\n\t"
+ "sub %[b], %[b], #4\n\t"
+ "cmp %[a], r14\n\t"
+ "beq 3f\n\t"
+ "mov r6, r9\n\t"
+ "add r6, r6, r10\n\t"
+ "cmp %[a], r6\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r12\n\t"
+ "mov r8, r9\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add r8, r8, #4\n\t"
+ "mov r9, r8\n\t"
+ "mov r6, #3\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, #248\n\t"
+ "cmp r8, r6\n\t"
+ "ble 1b\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov %[a], r10\n\t"
+ "mov %[b], r11\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r9, r3\n\t"
+ "mov r12, %[r]\n\t"
+ "mov r6, #4\n\t"
+ "lsl r6, r6, #8\n\t"
+ "neg r6, r6\n\t"
+ "add sp, sp, r6\n\t"
+ "mov r11, sp\n\t"
+ "mov r10, %[a]\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, #252\n\t"
+ "mov %[a], r9\n\t"
+ "subs %[a], %[a], r6\n\t"
+ "sbc r6, r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], %[a], r6\n\t"
+ "mov r2, r9\n\t"
+ "sub r2, r2, %[a]\n\t"
+ "add %[a], %[a], r10\n\t"
+ "add r2, r2, r10\n\t"
+ "\n2:\n\t"
+ "cmp r2, %[a]\n\t"
+ "beq 4f\n\t"
+ /* Multiply * 2: Start */
+ "ldr r6, [%[a]]\n\t"
+ "ldr r8, [r2]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Multiply * 2: Done */
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ /* Square: Start */
+ "ldr r6, [%[a]]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Square: Done */
+ "\n5:\n\t"
+ "add %[a], %[a], #4\n\t"
+ "sub r2, r2, #4\n\t"
+ "mov r6, #2\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, r10\n\t"
+ "cmp %[a], r6\n\t"
+ "beq 3f\n\t"
+ "cmp %[a], r2\n\t"
+ "bgt 3f\n\t"
+ "mov r8, r9\n\t"
+ "add r8, r8, r10\n\t"
+ "cmp %[a], r8\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r11\n\t"
+ "mov r8, r9\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r8, r8, #4\n\t"
+ "mov r9, r8\n\t"
+ "mov r6, #3\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, #248\n\t"
+ "cmp r8, r6\n\t"
+ "ble 1b\n\t"
+ "mov %[a], r10\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov %[r], r12\n\t"
+ "mov %[a], r11\n\t"
+ "mov r3, #3\n\t"
+ "lsl r3, r3, #8\n\t"
+ "add r3, r3, #252\n\t"
+ "\n4:\n\t"
+ "ldr r6, [%[a], r3]\n\t"
+ "str r6, [%[r], r3]\n\t"
+ "subs r3, r3, #4\n\t"
+ "bge 4b\n\t"
+ "mov r6, #4\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add sp, sp, r6\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+ );
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+
+ /* rho = -1/m mod b */
+ *rho = -x;
+}
+
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+SP_NOINLINE static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+ __asm__ __volatile__ (
+ "add r9, %[a], #512\n\t"
+ /* A[0] * B */
+ "ldr r6, [%[a]], #4\n\t"
+ "umull r5, r3, r6, %[b]\n\t"
+ "mov r4, #0\n\t"
+ "str r5, [%[r]], #4\n\t"
+ /* A[0] * B - Done */
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ /* A[] * B */
+ "ldr r6, [%[a]], #4\n\t"
+ "umull r6, r8, r6, %[b]\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[] * B - Done */
+ "str r3, [%[r]], #4\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "cmp %[a], r9\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r]]\n\t"
+ : [r] "+r" (r), [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
+ );
+}
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 4096 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 128);
+
+ /* r = 2^n mod m */
+ sp_4096_sub_in_place_128(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #2\n\t"
+ "lsl r5, r5, #8\n\t"
+ "mov r9, r5\n\t"
+ "mov r8, #0\n\t"
+ "\n1:\n\t"
+ "ldr r6, [%[b], r8]\n\t"
+ "and r6, r6, %[m]\n\t"
+ "mov r5, #0\n\t"
+ "subs r5, r5, %[c]\n\t"
+ "ldr r5, [%[a], r8]\n\t"
+ "sbcs r5, r5, r6\n\t"
+ "sbcs %[c], %[c], %[c]\n\t"
+ "str r5, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, r9\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r8", "r9"
+ );
+
+ return c;
+}
+
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "mov r9, %[mp]\n\t"
+ "mov r12, %[m]\n\t"
+ "mov r10, %[a]\n\t"
+ "mov r4, #0\n\t"
+ "add r11, r10, #512\n\t"
+ "\n1:\n\t"
+ /* mu = a[i] * mp */
+ "mov %[mp], r9\n\t"
+ "ldr %[a], [r10]\n\t"
+ "mul %[mp], %[mp], %[a]\n\t"
+ "mov %[m], r12\n\t"
+ "add r14, r10, #504\n\t"
+ "\n2:\n\t"
+ /* a[i+j] += m[j] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r5, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r5, r5, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r4, r4, %[a]\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r4, [r10], #4\n\t"
+ /* a[i+j+1] += m[j+1] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r4, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r4, r4, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r5, r5, %[a]\n\t"
+ "adc r4, r4, #0\n\t"
+ "str r5, [r10], #4\n\t"
+ "cmp r10, r14\n\t"
+ "blt 2b\n\t"
+ /* a[i+126] += m[126] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r5, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r5, r5, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r4, r4, %[a]\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r4, [r10], #4\n\t"
+ /* a[i+127] += m[127] * mu */
+ "mov r4, %[ca]\n\t"
+ "mov %[ca], #0\n\t"
+ /* Multiply m[127] and mu - Start */
+ "ldr r8, [%[m]]\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ /* Multiply m[127] and mu - Done */
+ "ldr r6, [r10]\n\t"
+ "ldr r8, [r10, #4]\n\t"
+ "adds r6, r6, r5\n\t"
+ "adcs r8, r8, r4\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ "str r6, [r10]\n\t"
+ "str r8, [r10, #4]\n\t"
+ /* Next word in a */
+ "sub r10, r10, #504\n\t"
+ "cmp r10, r11\n\t"
+ "blt 1b\n\t"
+ "mov %[a], r10\n\t"
+ "mov %[m], r12\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_4096_mont_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_4096_mul_128(r, a, b);
+ sp_4096_mont_reduce_128(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_4096_sqr_128(r, a);
+ sp_4096_mont_reduce_128(r, m, mp);
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r6, %[div], #16\n\t"
+ "add r6, r6, #1\n\t"
+ "udiv r4, %[d1], r6\n\t"
+ "lsl r8, r4, #16\n\t"
+ "umull r4, r5, %[div], r8\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "udiv r5, %[d1], r6\n\t"
+ "lsl r4, r5, #16\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "lsl r4, %[d1], #16\n\t"
+ "orr r4, r4, %[d0], lsr #16\n\t"
+ "udiv r4, r4, r6\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "lsl r4, %[d1], #16\n\t"
+ "orr r4, r4, %[d0], lsr #16\n\t"
+ "udiv r4, r4, r6\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "udiv r4, %[d0], %[div]\n\t"
+ "add r8, r8, r4\n\t"
+ "mov %[r], r8\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r6", "r8"
+ );
+ return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<128; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 128; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_4096_cmp_128(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+
+
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mvn r3, r3\n\t"
+ "mov r6, #1\n\t"
+ "lsl r6, r6, #8\n\t"
+ "add r6, r6, #252\n\t"
+ "\n1:\n\t"
+ "ldr r8, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r8, r8, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "mov r4, r8\n\t"
+ "subs r8, r8, r5\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], r8\n\t"
+ "mvn r8, r8\n\t"
+ "and r3, r3, r8\n\t"
+ "subs r5, r5, r4\n\t"
+ "sbc r8, r8, r8\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "mvn r8, r8\n\t"
+ "and r3, r3, r8\n\t"
+ "sub r6, r6, #4\n\t"
+ "cmp r6, #0\n\t"
+ "bge 1b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "r3", "r4", "r5", "r6", "r8"
+ );
+
+ return r;
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[256], t2[129];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[127];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
+ for (i=127; i>=0; i--) {
+ r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div);
+
+ sp_4096_mul_d_128(t2, d, r1);
+ t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
+ t1[128 + i] -= t2[128];
+ sp_4096_mask_128(t2, d, t1[128 + i]);
+ t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2);
+ sp_4096_mask_128(t2, d, t1[128 + i]);
+ t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_4096_cmp_128(t1, d) >= 0;
+ sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_4096_div_128(a, m, NULL, r);
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[256], t2[129];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[127];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
+ for (i=127; i>=0; i--) {
+ r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div);
+
+ sp_4096_mul_d_128(t2, d, r1);
+ t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
+ t1[128 + i] -= t2[128];
+ if (t1[128 + i] != 0) {
+ t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d);
+ if (t1[128 + i] != 0)
+ t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d);
+ }
+ }
+
+ r1 = sp_4096_cmp_128(t1, d) >= 0;
+ sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_mod_128_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_4096_div_128_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+ defined(WOLFSSL_HAVE_SP_DH)
+#ifdef WOLFSSL_SP_SMALL
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[16][256];
+#else
+ sp_digit* t[16];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 256, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<16; i++) {
+ t[i] = td + i * 256;
+ }
+#endif
+ norm = t[0];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_128(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 128U);
+ if (reduceA != 0) {
+ err = sp_4096_mod_128(t[1] + 128, a, m);
+ if (err == MP_OKAY) {
+ err = sp_4096_mod_128(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128);
+ err = sp_4096_mod_128(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp);
+ sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp);
+ sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp);
+ sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp);
+ sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_4096_mont_sqr_128(t[10], t[ 5], m, mp);
+ sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp);
+ sp_4096_mont_sqr_128(t[12], t[ 6], m, mp);
+ sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp);
+ sp_4096_mont_sqr_128(t[14], t[ 7], m, mp);
+ sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 4;
+ if (c == 32) {
+ c = 28;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 128);
+ for (; i>=0 || c>=4; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 28;
+ n <<= 4;
+ c = 28;
+ }
+ else if (c < 4) {
+ y = n >> 28;
+ n = e[i--];
+ c = 4 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+ }
+
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+
+ sp_4096_mont_mul_128(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
+ sp_4096_mont_reduce_128(r, m, mp);
+
+ mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
+ sp_4096_cond_sub_128(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#else
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][256];
+#else
+ sp_digit* t[32];
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 256, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++) {
+ t[i] = td + i * 256;
+ }
+#endif
+ norm = t[0];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_128(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 128U);
+ if (reduceA != 0) {
+ err = sp_4096_mod_128(t[1] + 128, a, m);
+ if (err == MP_OKAY) {
+ err = sp_4096_mod_128(t[1], t[1], m);
+ }
+ }
+ else {
+ XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128);
+ err = sp_4096_mod_128(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp);
+ sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp);
+ sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp);
+ sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp);
+ sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_4096_mont_sqr_128(t[10], t[ 5], m, mp);
+ sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp);
+ sp_4096_mont_sqr_128(t[12], t[ 6], m, mp);
+ sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp);
+ sp_4096_mont_sqr_128(t[14], t[ 7], m, mp);
+ sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp);
+ sp_4096_mont_sqr_128(t[16], t[ 8], m, mp);
+ sp_4096_mont_mul_128(t[17], t[ 9], t[ 8], m, mp);
+ sp_4096_mont_sqr_128(t[18], t[ 9], m, mp);
+ sp_4096_mont_mul_128(t[19], t[10], t[ 9], m, mp);
+ sp_4096_mont_sqr_128(t[20], t[10], m, mp);
+ sp_4096_mont_mul_128(t[21], t[11], t[10], m, mp);
+ sp_4096_mont_sqr_128(t[22], t[11], m, mp);
+ sp_4096_mont_mul_128(t[23], t[12], t[11], m, mp);
+ sp_4096_mont_sqr_128(t[24], t[12], m, mp);
+ sp_4096_mont_mul_128(t[25], t[13], t[12], m, mp);
+ sp_4096_mont_sqr_128(t[26], t[13], m, mp);
+ sp_4096_mont_mul_128(t[27], t[14], t[13], m, mp);
+ sp_4096_mont_sqr_128(t[28], t[14], m, mp);
+ sp_4096_mont_mul_128(t[29], t[15], t[14], m, mp);
+ sp_4096_mont_sqr_128(t[30], t[15], m, mp);
+ sp_4096_mont_mul_128(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 128);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+
+ sp_4096_mont_mul_128(r, r, t[y], m, mp);
+ }
+
+ XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
+ sp_4096_mont_reduce_128(r, m, mp);
+
+ mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
+ sp_4096_cond_sub_128(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_SP_SMALL */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[256], m[128], r[256];
+#else
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+ sp_digit* r;
+#endif
+ sp_digit *ah;
+ sp_digit e[1];
+ int err = MP_OKAY;
+
+ if (*outLen < 512)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 512 ||
+ mp_count_bits(mm) != 4096))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 128 * 2;
+ m = r + 128 * 2;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ ah = a + 128;
+
+ sp_4096_from_bin(ah, 128, in, inLen);
+#if DIGIT_BIT >= 32
+ e[0] = em->dp[0];
+#else
+ e[0] = em->dp[0];
+ if (em->used > 1) {
+ e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+ }
+#endif
+ if (e[0] == 0) {
+ err = MP_EXPTMOD_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(m, 128, mm);
+
+ if (e[0] == 0x3) {
+ if (err == MP_OKAY) {
+ sp_4096_sqr_128(r, ah);
+ err = sp_4096_mod_128_cond(r, r, m);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_mul_128(r, ah, r);
+ err = sp_4096_mod_128_cond(r, r, m);
+ }
+ }
+ else {
+ int i;
+ sp_digit mp;
+
+ sp_4096_mont_setup(m, &mp);
+
+ /* Convert to Montgomery form. */
+ XMEMSET(a, 0, sizeof(sp_digit) * 128);
+ err = sp_4096_mod_128_cond(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i = 31; i >= 0; i--) {
+ if (e[0] >> i) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 128);
+ for (i--; i>=0; i--) {
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ if (((e[0] >> i) & 1) == 1) {
+ sp_4096_mont_mul_128(r, r, a, m, mp);
+ }
+ }
+ XMEMSET(&r[128], 0, sizeof(sp_digit) * 128);
+ sp_4096_mont_reduce_128(r, m, mp);
+
+ for (i = 127; i > 0; i--) {
+ if (r[i] != m[i]) {
+ break;
+ }
+ }
+ if (r[i] >= m[i]) {
+ sp_4096_sub_in_place_128(r, m);
+ }
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#endif
+
+ return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+ sp_digit* a;
+ sp_digit* d = NULL;
+ sp_digit* m;
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 512U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 4096) {
+ err = MP_READ_E;
+ }
+ if (inLen > 512) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+ if (err == MP_OKAY) {
+ a = d + 128;
+ m = a + 256;
+ r = a;
+
+ sp_4096_from_bin(a, 128, in, inLen);
+ sp_4096_from_mp(d, 128, dm);
+ sp_4096_from_mp(m, 128, mm);
+ err = sp_4096_mod_exp_128(r, a, d, 4096, m, 0);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 128);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+
+ return err;
+#else
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #1\n\t"
+ "lsl r5, r5, #8\n\t"
+ "mov r9, r5\n\t"
+ "mov r8, #0\n\t"
+ "\n1:\n\t"
+ "ldr r6, [%[b], r8]\n\t"
+ "and r6, r6, %[m]\n\t"
+ "adds r5, %[c], #-1\n\t"
+ "ldr r5, [%[a], r8]\n\t"
+ "adcs r5, r5, r6\n\t"
+ "mov %[c], #0\n\t"
+ "adcs %[c], %[c], %[c]\n\t"
+ "str r5, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, r9\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r8", "r9"
+ );
+
+ return c;
+}
+
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit a[128 * 2];
+ sp_digit p[64], q[64], dp[64];
+ sp_digit tmpa[128], tmpb[128];
+#else
+ sp_digit* t = NULL;
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+#endif
+ sp_digit* r;
+ sp_digit* qi;
+ sp_digit* dq;
+ sp_digit c;
+ int err = MP_OKAY;
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 512)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (inLen > 512 || mp_count_bits(mm) != 4096))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL)
+ err = MEMORY_E;
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 128 * 2;
+ q = p + 64;
+ qi = dq = dp = q + 64;
+ tmpa = qi + 64;
+ tmpb = tmpa + 128;
+
+ r = t + 128;
+ }
+#else
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ r = a;
+ qi = dq = dp;
+#endif
+ sp_4096_from_bin(a, 128, in, inLen);
+ sp_4096_from_mp(p, 64, pm);
+ sp_4096_from_mp(q, 64, qm);
+ sp_4096_from_mp(dp, 64, dpm);
+
+ err = sp_2048_mod_exp_64(tmpa, a, dp, 2048, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(dq, 64, dqm);
+ err = sp_2048_mod_exp_64(tmpb, a, dq, 2048, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ c = sp_2048_sub_in_place_64(tmpa, tmpb);
+ c += sp_4096_cond_add_64(tmpa, tmpa, p, c);
+ sp_4096_cond_add_64(tmpa, tmpa, p, c);
+
+ sp_2048_from_mp(qi, 64, qim);
+ sp_2048_mul_64(tmpa, tmpa, qi);
+ err = sp_2048_mod_64(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mul_64(tmpa, q, tmpa);
+ XMEMSET(&tmpb[64], 0, sizeof(sp_digit) * 64);
+ sp_4096_add_128(r, tmpb, tmpa);
+
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 64 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+#else
+ XMEMSET(tmpa, 0, sizeof(tmpa));
+ XMEMSET(tmpb, 0, sizeof(tmpb));
+ XMEMSET(p, 0, sizeof(p));
+ XMEMSET(q, 0, sizeof(q));
+ XMEMSET(dp, 0, sizeof(dp));
+#endif
+
+ return err;
+}
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_4096_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 128);
+ r->used = 128;
+ mp_clamp(r);
+#elif DIGIT_BIT < 32
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 128; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 32) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 32 - s;
+ }
+ r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 128; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 32 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 32 - s;
+ }
+ else {
+ s += 32;
+ }
+ }
+ r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[256], e[128], m[128];
+ sp_digit* r = b;
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 4096) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expBits > 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(b, 128, base);
+ sp_4096_from_mp(e, 128, exp);
+ sp_4096_from_mp(m, 128, mod);
+
+ err = sp_4096_mod_exp_128(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_4096_to_mp(r, res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
+#ifdef HAVE_FFDHE_4096
+static void sp_4096_lshift_128(sp_digit* r, sp_digit* a, byte n)
+{
+ __asm__ __volatile__ (
+ "mov r6, #31\n\t"
+ "sub r6, r6, %[n]\n\t"
+ "add %[a], %[a], #448\n\t"
+ "add %[r], %[r], #448\n\t"
+ "ldr r3, [%[a], #60]\n\t"
+ "lsr r4, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r4, r4, r6\n\t"
+ "ldr r2, [%[a], #56]\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "str r2, [%[r], #56]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #44]\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "str r2, [%[r], #44]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "str r2, [%[r], #32]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #20]\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #12]\n\t"
+ "str r2, [%[r], #20]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #8]\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "str r2, [%[r], #8]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r2, [%[a], #60]\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "str r3, [%[r], #64]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #52]\n\t"
+ "str r2, [%[r], #60]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #48]\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "str r3, [%[r], #52]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "str r2, [%[r], #48]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #36]\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "str r3, [%[r], #40]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #28]\n\t"
+ "str r2, [%[r], #36]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #24]\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "str r3, [%[r], #28]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "str r2, [%[r], #24]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #12]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "str r2, [%[r], #12]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #0]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "str r3, [%[r], #68]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "str r2, [%[r], #64]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #52]\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "str r3, [%[r], #56]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #44]\n\t"
+ "str r2, [%[r], #52]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #40]\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "str r3, [%[r], #44]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "str r2, [%[r], #40]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #28]\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "str r3, [%[r], #32]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #20]\n\t"
+ "str r2, [%[r], #28]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #16]\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "str r3, [%[r], #20]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "str r2, [%[r], #16]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #4]\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "str r3, [%[r], #8]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r3, [%[a], #60]\n\t"
+ "str r2, [%[r], #68]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #56]\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "str r2, [%[r], #56]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #44]\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "str r2, [%[r], #44]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "str r2, [%[r], #32]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #20]\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #12]\n\t"
+ "str r2, [%[r], #20]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #8]\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "str r2, [%[r], #8]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r2, [%[a], #60]\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "str r3, [%[r], #64]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #52]\n\t"
+ "str r2, [%[r], #60]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #48]\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "str r3, [%[r], #52]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "str r2, [%[r], #48]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #36]\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "str r3, [%[r], #40]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #28]\n\t"
+ "str r2, [%[r], #36]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #24]\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "str r3, [%[r], #28]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "str r2, [%[r], #24]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #12]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "str r2, [%[r], #12]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #0]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r4, [%[a], #60]\n\t"
+ "str r3, [%[r], #68]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #56]\n\t"
+ "str r2, [%[r], #64]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #52]\n\t"
+ "str r4, [%[r], #60]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #48]\n\t"
+ "str r3, [%[r], #56]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #44]\n\t"
+ "str r2, [%[r], #52]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #40]\n\t"
+ "str r4, [%[r], #48]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #36]\n\t"
+ "str r3, [%[r], #44]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #32]\n\t"
+ "str r2, [%[r], #40]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #28]\n\t"
+ "str r4, [%[r], #36]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "str r3, [%[r], #32]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #20]\n\t"
+ "str r2, [%[r], #28]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #16]\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "str r3, [%[r], #20]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "str r2, [%[r], #16]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #4]\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #0]\n\t"
+ "str r3, [%[r], #8]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r3, [%[a], #60]\n\t"
+ "str r2, [%[r], #68]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #56]\n\t"
+ "str r4, [%[r], #64]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #52]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #48]\n\t"
+ "str r2, [%[r], #56]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #44]\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #36]\n\t"
+ "str r2, [%[r], #44]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #32]\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "str r2, [%[r], #32]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #20]\n\t"
+ "str r4, [%[r], #28]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "str r3, [%[r], #24]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #12]\n\t"
+ "str r2, [%[r], #20]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #8]\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "str r3, [%[r], #12]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "str r2, [%[r], #8]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "sub %[a], %[a], #64\n\t"
+ "sub %[r], %[r], #64\n\t"
+ "ldr r2, [%[a], #60]\n\t"
+ "str r4, [%[r], #68]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #56]\n\t"
+ "str r3, [%[r], #64]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #52]\n\t"
+ "str r2, [%[r], #60]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #48]\n\t"
+ "str r4, [%[r], #56]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "str r3, [%[r], #52]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "str r2, [%[r], #48]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #36]\n\t"
+ "str r4, [%[r], #44]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "str r3, [%[r], #40]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #28]\n\t"
+ "str r2, [%[r], #36]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #24]\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "str r3, [%[r], #28]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "str r2, [%[r], #24]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #12]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "lsr r5, r4, #1\n\t"
+ "lsl r4, r4, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "str r2, [%[r], #12]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r2, [%[a], #0]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "lsr r5, r2, #1\n\t"
+ "lsl r2, r2, %[n]\n\t"
+ "lsr r5, r5, r6\n\t"
+ "orr r3, r3, r5\n\t"
+ "str r2, [%[r]]\n\t"
+ "str r3, [%[r], #4]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [n] "r" (n)
+ : "memory", "r2", "r3", "r4", "r5", "r6"
+ );
+}
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits,
+ const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[256];
+ sp_digit td[129];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 385, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 256;
+#else
+ norm = nd;
+ tmp = td;
+#endif
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_128(norm, m);
+
+ i = (bits - 1) / 32;
+ n = e[i--];
+ c = bits & 31;
+ if (c == 0) {
+ c = 32;
+ }
+ c -= bits % 5;
+ if (c == 32) {
+ c = 27;
+ }
+ y = (int)(n >> c);
+ n <<= 32 - c;
+ sp_4096_lshift_128(r, norm, y);
+ for (; i>=0 || c>=5; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = n >> 27;
+ n <<= 5;
+ c = 27;
+ }
+ else if (c < 5) {
+ y = n >> 27;
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (32 - c);
+ n <<= c;
+ c = 32 - c;
+ }
+ else {
+ y = (n >> 27) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+ sp_4096_mont_sqr_128(r, r, m, mp);
+
+ sp_4096_lshift_128(r, r, y);
+ sp_4096_mul_d_128(tmp, norm, r[128]);
+ r[128] = 0;
+ o = sp_4096_add_128(r, r, tmp);
+ sp_4096_cond_sub_128(r, r, m, (sp_digit)0 - o);
+ }
+
+ XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
+ sp_4096_mont_reduce_128(r, m, mp);
+
+ mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
+ sp_4096_cond_sub_128(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+#endif /* HAVE_FFDHE_4096 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+ int err = MP_OKAY;
+ sp_digit b[256], e[128], m[128];
+ sp_digit* r = b;
+ word32 i;
+
+ if (mp_count_bits(base) > 4096) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ if (expLen > 512) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ if (mp_count_bits(mod) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(b, 128, base);
+ sp_4096_from_bin(e, 128, exp, expLen);
+ sp_4096_from_mp(m, 128, mod);
+
+ #ifdef HAVE_FFDHE_4096
+ if (base->used == 1 && base->dp[0] == 2 && m[127] == (sp_digit)-1)
+ err = sp_4096_mod_exp_2_128(r, e, expLen * 8, m);
+ else
+ #endif
+ err = sp_4096_mod_exp_128(r, b, e, expLen * 8, m, 0);
+
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ for (i=0; i<512 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+#endif /* WOLFSSL_HAVE_SP_DH */
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* WOLFSSL_SP_4096 */
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+#ifdef WOLFSSL_HAVE_SP_ECC
+#ifndef WOLFSSL_SP_NO_256
+
+/* Point structure to use. */
+typedef struct sp_point_256 {
+ sp_digit x[2 * 8];
+ sp_digit y[2 * 8];
+ sp_digit z[2 * 8];
+ int infinity;
+} sp_point_256;
+
+/* The modulus (prime) of the curve P256. */
+static const sp_digit p256_mod[8] = {
+ 0xffffffff,0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000,
+ 0x00000001,0xffffffff
+};
+/* The Montogmery normalizer for modulus of the curve P256. */
+static const sp_digit p256_norm_mod[8] = {
+ 0x00000001,0x00000000,0x00000000,0xffffffff,0xffffffff,0xffffffff,
+ 0xfffffffe,0x00000000
+};
+/* The Montogmery multiplier for modulus of the curve P256. */
+static const sp_digit p256_mp_mod = 0x00000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* The order of the curve P256. */
+static const sp_digit p256_order[8] = {
+ 0xfc632551,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
+ 0x00000000,0xffffffff
+};
+#endif
+/* The order of the curve P256 minus 2. */
+static const sp_digit p256_order2[8] = {
+ 0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
+ 0x00000000,0xffffffff
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P256. */
+static const sp_digit p256_norm_order[8] = {
+ 0x039cdaaf,0x0c46353d,0x58e8617b,0x43190552,0x00000000,0x00000000,
+ 0xffffffff,0x00000000
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P256. */
+static const sp_digit p256_mp_order = 0xee00bc4f;
+#endif
+/* The base point of curve P256. */
+static const sp_point_256 p256_base = {
+ /* X ordinate */
+ {
+ 0xd898c296,0xf4a13945,0x2deb33a0,0x77037d81,0x63a440f2,0xf8bce6e5,
+ 0xe12c4247,0x6b17d1f2,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Y ordinate */
+ {
+ 0x37bf51f5,0xcbb64068,0x6b315ece,0x2bce3357,0x7c0f9e16,0x8ee7eb4a,
+ 0xfe1a7f9b,0x4fe342e2,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Z ordinate */
+ {
+ 0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
+ 0x00000000,0x00000000,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* infinity */
+ 0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p256_b[8] = {
+ 0x27d2604b,0x3bce3c3e,0xcc53b0f6,0x651d06b0,0x769886bc,0xb3ebbd55,
+ 0xaa3a93e7,0x5ac635d8
+};
+#endif
+
+static int sp_256_point_new_ex_8(void* heap, sp_point_256* sp, sp_point_256** p)
+{
+ int ret = MP_OKAY;
+ (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ (void)sp;
+ *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC);
+#else
+ *p = sp;
+#endif
+ if (*p == NULL) {
+ ret = MEMORY_E;
+ }
+ return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), &(sp), &(p))
+#endif
+
+
+static void sp_256_point_free_8(sp_point_256* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+ if (p != NULL) {
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+ XFREE(p, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+/* Clear point data if requested. */
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+#endif
+ (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r The resulting Montgomery form number.
+ * a The number to convert.
+ * m The modulus (prime).
+ */
+static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ int64_t t[8];
+ int64_t a64[8];
+ int64_t o;
+
+ (void)m;
+
+ a64[0] = a[0];
+ a64[1] = a[1];
+ a64[2] = a[2];
+ a64[3] = a[3];
+ a64[4] = a[4];
+ a64[5] = a[5];
+ a64[6] = a[6];
+ a64[7] = a[7];
+
+ /* 1 1 0 -1 -1 -1 -1 0 */
+ t[0] = 0 + a64[0] + a64[1] - a64[3] - a64[4] - a64[5] - a64[6];
+ /* 0 1 1 0 -1 -1 -1 -1 */
+ t[1] = 0 + a64[1] + a64[2] - a64[4] - a64[5] - a64[6] - a64[7];
+ /* 0 0 1 1 0 -1 -1 -1 */
+ t[2] = 0 + a64[2] + a64[3] - a64[5] - a64[6] - a64[7];
+ /* -1 -1 0 2 2 1 0 -1 */
+ t[3] = 0 - a64[0] - a64[1] + 2 * a64[3] + 2 * a64[4] + a64[5] - a64[7];
+ /* 0 -1 -1 0 2 2 1 0 */
+ t[4] = 0 - a64[1] - a64[2] + 2 * a64[4] + 2 * a64[5] + a64[6];
+ /* 0 0 -1 -1 0 2 2 1 */
+ t[5] = 0 - a64[2] - a64[3] + 2 * a64[5] + 2 * a64[6] + a64[7];
+ /* -1 -1 0 0 0 1 3 2 */
+ t[6] = 0 - a64[0] - a64[1] + a64[5] + 3 * a64[6] + 2 * a64[7];
+ /* 1 0 -1 -1 -1 -1 0 3 */
+ t[7] = 0 + a64[0] - a64[2] - a64[3] - a64[4] - a64[5] + 3 * a64[7];
+
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ o = t[7] >> 32; t[7] &= 0xffffffff;
+ t[0] += o;
+ t[3] -= o;
+ t[6] -= o;
+ t[7] += o;
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ r[0] = t[0];
+ r[1] = t[1];
+ r[2] = t[2];
+ r[3] = t[3];
+ r[4] = t[4];
+ r[5] = t[5];
+ r[6] = t[6];
+ r[7] = t[7];
+
+ return MP_OKAY;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 32
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 32U) <= (word32)DIGIT_BIT) {
+ s += 32U;
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 32) {
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 32 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_256.
+ *
+ * p Point of type sp_point_256 (result).
+ * pm Point of type ecc_point.
+ */
+static void sp_256_point_from_ecc_point_8(sp_point_256* p, const ecc_point* pm)
+{
+ XMEMSET(p->x, 0, sizeof(p->x));
+ XMEMSET(p->y, 0, sizeof(p->y));
+ XMEMSET(p->z, 0, sizeof(p->z));
+ sp_256_from_mp(p->x, 8, pm->x);
+ sp_256_from_mp(p->y, 8, pm->y);
+ sp_256_from_mp(p->z, 8, pm->z);
+ p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_256_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 8);
+ r->used = 8;
+ mp_clamp(r);
+#elif DIGIT_BIT < 32
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 8; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 32) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 32 - s;
+ }
+ r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 8; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 32 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 32 - s;
+ }
+ else {
+ s += 32;
+ }
+ }
+ r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Convert a point of type sp_point_256 to type ecc_point.
+ *
+ * p Point of type sp_point_256.
+ * pm Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm)
+{
+ int err;
+
+ err = sp_256_to_mp(p->x, pm->x);
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, pm->y);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, pm->z);
+ }
+
+ return err;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit tmp[8];
+
+ __asm__ __volatile__ (
+ /* A[0] * B[0] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "umull r3, r4, r6, r8\n\t"
+ "mov r5, #0\n\t"
+ "str r3, [%[tmp], #0]\n\t"
+ "mov r3, #0\n\t"
+ /* A[0] * B[1] */
+ "ldr r8, [%[b], #4]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adc r5, r5, r8\n\t"
+ /* A[1] * B[0] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ "str r4, [%[tmp], #4]\n\t"
+ "mov r4, #0\n\t"
+ /* A[0] * B[2] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[1] * B[1] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[b], #4]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[2] * B[0] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ "str r5, [%[tmp], #8]\n\t"
+ "mov r5, #0\n\t"
+ /* A[0] * B[3] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[b], #12]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[1] * B[2] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[2] * B[1] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[b], #4]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[3] * B[0] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r3, [%[tmp], #12]\n\t"
+ "mov r3, #0\n\t"
+ /* A[0] * B[4] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[1] * B[3] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[b], #12]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[2] * B[2] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[3] * B[1] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #4]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[4] * B[0] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ "str r4, [%[tmp], #16]\n\t"
+ "mov r4, #0\n\t"
+ /* A[0] * B[5] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[b], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[1] * B[4] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[2] * B[3] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[b], #12]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[3] * B[2] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[4] * B[1] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[b], #4]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[5] * B[0] */
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ "str r5, [%[tmp], #20]\n\t"
+ "mov r5, #0\n\t"
+ /* A[0] * B[6] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[1] * B[5] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[b], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[2] * B[4] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[3] * B[3] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #12]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[4] * B[2] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[5] * B[1] */
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r8, [%[b], #4]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[6] * B[0] */
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r3, [%[tmp], #24]\n\t"
+ "mov r3, #0\n\t"
+ /* A[0] * B[7] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[b], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[1] * B[6] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[2] * B[5] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[b], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[3] * B[4] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[4] * B[3] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[b], #12]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[5] * B[2] */
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[6] * B[1] */
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r8, [%[b], #4]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[7] * B[0] */
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #0]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ "str r4, [%[tmp], #28]\n\t"
+ "mov r4, #0\n\t"
+ /* A[1] * B[7] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[b], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[2] * B[6] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[3] * B[5] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[4] * B[4] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[5] * B[3] */
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r8, [%[b], #12]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[6] * B[2] */
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[7] * B[1] */
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #4]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ "str r5, [%[r], #32]\n\t"
+ "mov r5, #0\n\t"
+ /* A[2] * B[7] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[b], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[3] * B[6] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[4] * B[5] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[b], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[5] * B[4] */
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[6] * B[3] */
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r8, [%[b], #12]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[7] * B[2] */
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #8]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "mov r3, #0\n\t"
+ /* A[3] * B[7] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[b], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[4] * B[6] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[5] * B[5] */
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r8, [%[b], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[6] * B[4] */
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[7] * B[3] */
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #12]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "mov r4, #0\n\t"
+ /* A[4] * B[7] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[b], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[5] * B[6] */
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[6] * B[5] */
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r8, [%[b], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[7] * B[4] */
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ "str r5, [%[r], #44]\n\t"
+ "mov r5, #0\n\t"
+ /* A[5] * B[7] */
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r8, [%[b], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[6] * B[6] */
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[7] * B[5] */
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "mov r3, #0\n\t"
+ /* A[6] * B[7] */
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r8, [%[b], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ /* A[7] * B[6] */
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "mov r4, #0\n\t"
+ /* A[7] * B[7] */
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r8, [%[b], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adc r3, r3, r8\n\t"
+ "str r5, [%[r], #56]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ /* Transfer tmp to r */
+ "ldr r3, [%[tmp], #0]\n\t"
+ "ldr r4, [%[tmp], #4]\n\t"
+ "ldr r5, [%[tmp], #8]\n\t"
+ "ldr r6, [%[tmp], #12]\n\t"
+ "str r3, [%[r], #0]\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r3, [%[tmp], #16]\n\t"
+ "ldr r4, [%[tmp], #20]\n\t"
+ "ldr r5, [%[tmp], #24]\n\t"
+ "ldr r6, [%[tmp], #28]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "str r5, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
+ : "memory", "r3", "r4", "r5", "r6", "r8"
+ );
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #32\n\t"
+ "mov r9, r5\n\t"
+ "mov r8, #0\n\t"
+ "\n1:\n\t"
+ "ldr r6, [%[b], r8]\n\t"
+ "and r6, r6, %[m]\n\t"
+ "mov r5, #0\n\t"
+ "subs r5, r5, %[c]\n\t"
+ "ldr r5, [%[a], r8]\n\t"
+ "sbcs r5, r5, r6\n\t"
+ "sbcs %[c], %[c], %[c]\n\t"
+ "str r5, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, r9\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r8", "r9"
+ );
+
+ return c;
+}
+
+/* Reduce the number back to 256 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ (void)mp;
+ (void)m;
+
+ __asm__ __volatile__ (
+ "mov r2, #0\n\t"
+ "mov r1, #0\n\t"
+ /* i = 0 */
+ "mov r9, r2\n\t"
+ "\n1:\n\t"
+ "mov r4, #0\n\t"
+ /* mu = a[i] * 1 (mp) = a[i] */
+ "ldr r3, [%[a]]\n\t"
+ /* a[i] += -1 * mu = -1 * a[i] => a[i] = 0 no carry */
+ /* a[i+1] += -1 * mu */
+ "ldr r6, [%[a], #4]\n\t"
+ "mov r5, #0\n\t"
+ "adds r4, r4, r6\n\t"
+ "adc r5, r5, r2\n\t"
+ "str r4, [%[a], #4]\n\t"
+ /* a[i+2] += -1 * mu */
+ "ldr r6, [%[a], #8]\n\t"
+ "mov r4, #0\n\t"
+ "adds r5, r5, r6\n\t"
+ "adc r4, r4, r2\n\t"
+ "str r5, [%[a], #8]\n\t"
+ /* a[i+3] += 0 * mu */
+ "ldr r6, [%[a], #12]\n\t"
+ "mov r5, #0\n\t"
+ "adds r4, r4, r3\n\t"
+ "adc r5, r5, r2\n\t"
+ "adds r4, r4, r6\n\t"
+ "adc r5, r5, r2\n\t"
+ "str r4, [%[a], #12]\n\t"
+ /* a[i+4] += 0 * mu */
+ "ldr r6, [%[a], #16]\n\t"
+ "mov r4, #0\n\t"
+ "adds r5, r5, r6\n\t"
+ "adc r4, r4, r2\n\t"
+ "str r5, [%[a], #16]\n\t"
+ /* a[i+5] += 0 * mu */
+ "ldr r6, [%[a], #20]\n\t"
+ "mov r5, #0\n\t"
+ "adds r4, r4, r6\n\t"
+ "adc r5, r5, r2\n\t"
+ "str r4, [%[a], #20]\n\t"
+ /* a[i+6] += 1 * mu */
+ "ldr r6, [%[a], #24]\n\t"
+ "mov r4, #0\n\t"
+ "adds r5, r5, r3\n\t"
+ "adc r4, r4, r2\n\t"
+ "adds r5, r5, r6\n\t"
+ "adc r4, r4, r2\n\t"
+ "str r5, [%[a], #24]\n\t"
+ /* a[i+7] += -1 * mu */
+ "ldr r6, [%[a], #28]\n\t"
+ "ldr r8, [%[a], #32]\n\t"
+ "adds r5, r1, r3\n\t"
+ "mov r1, #0\n\t"
+ "adc r1, r1, r2\n\t"
+ "subs r4, r4, r3\n\t"
+ "sbcs r5, r5, r2\n\t"
+ "sbc r1, r1, r2\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r1, r1, r2\n\t"
+ "str r4, [%[a], #28]\n\t"
+ "str r5, [%[a], #32]\n\t"
+ /* i += 1 */
+ "add r9, r9, #1\n\t"
+ "add %[a], %[a], #4\n\t"
+ "mov r6, #8\n\t"
+ "cmp r9, r6\n\t"
+ "blt 1b\n\t"
+ "sub %[a], %[a], #32\n\t"
+ "mov r3, r1\n\t"
+ "sub r1, r1, #1\n\t"
+ "mvn r1, r1\n\t"
+ "ldr r4, [%[a],#32]\n\t"
+ "ldr r5, [%[a],#36]\n\t"
+ "ldr r6, [%[a],#40]\n\t"
+ "ldr r8, [%[a],#44]\n\t"
+ "subs r4, r4, r1\n\t"
+ "sbcs r5, r5, r1\n\t"
+ "sbcs r6, r6, r1\n\t"
+ "sbcs r8, r8, r2\n\t"
+ "str r4, [%[a],#0]\n\t"
+ "str r5, [%[a],#4]\n\t"
+ "str r6, [%[a],#8]\n\t"
+ "str r8, [%[a],#12]\n\t"
+ "ldr r4, [%[a],#48]\n\t"
+ "ldr r5, [%[a],#52]\n\t"
+ "ldr r6, [%[a],#56]\n\t"
+ "ldr r8, [%[a],#60]\n\t"
+ "sbcs r4, r4, r2\n\t"
+ "sbcs r5, r5, r2\n\t"
+ "sbcs r6, r6, r3\n\t"
+ "sbc r8, r8, r1\n\t"
+ "str r4, [%[a],#16]\n\t"
+ "str r5, [%[a],#20]\n\t"
+ "str r6, [%[a],#24]\n\t"
+ "str r8, [%[a],#28]\n\t"
+ : [a] "+r" (a)
+ :
+ : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r8", "r9"
+ );
+
+
+ (void)m;
+ (void)mp;
+}
+
+/* Reduce the number back to 256 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "mov r9, %[mp]\n\t"
+ "mov r12, %[m]\n\t"
+ "mov r10, %[a]\n\t"
+ "mov r4, #0\n\t"
+ "add r11, r10, #32\n\t"
+ "\n1:\n\t"
+ /* mu = a[i] * mp */
+ "mov %[mp], r9\n\t"
+ "ldr %[a], [r10]\n\t"
+ "mul %[mp], %[mp], %[a]\n\t"
+ "mov %[m], r12\n\t"
+ "add r14, r10, #24\n\t"
+ "\n2:\n\t"
+ /* a[i+j] += m[j] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r5, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r5, r5, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r4, r4, %[a]\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r4, [r10], #4\n\t"
+ /* a[i+j+1] += m[j+1] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r4, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r4, r4, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r5, r5, %[a]\n\t"
+ "adc r4, r4, #0\n\t"
+ "str r5, [r10], #4\n\t"
+ "cmp r10, r14\n\t"
+ "blt 2b\n\t"
+ /* a[i+6] += m[6] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r5, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r5, r5, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r4, r4, %[a]\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r4, [r10], #4\n\t"
+ /* a[i+7] += m[7] * mu */
+ "mov r4, %[ca]\n\t"
+ "mov %[ca], #0\n\t"
+ /* Multiply m[7] and mu - Start */
+ "ldr r8, [%[m]]\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ /* Multiply m[7] and mu - Done */
+ "ldr r6, [r10]\n\t"
+ "ldr r8, [r10, #4]\n\t"
+ "adds r6, r6, r5\n\t"
+ "adcs r8, r8, r4\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ "str r6, [r10]\n\t"
+ "str r8, [r10, #4]\n\t"
+ /* Next word in a */
+ "sub r10, r10, #24\n\t"
+ "cmp r10, r11\n\t"
+ "blt 1b\n\t"
+ "mov %[a], r10\n\t"
+ "mov %[m], r12\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_256_mul_8(r, a, b);
+ sp_256_mont_reduce_8(r, m, mp);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
+{
+ sp_digit tmp[8];
+ __asm__ __volatile__ (
+ /* A[0] * A[0] */
+ "ldr r6, [%[a], #0]\n\t"
+ "umull r3, r4, r6, r6\n\t"
+ "mov r5, #0\n\t"
+ "str r3, [%[tmp], #0]\n\t"
+ "mov r3, #0\n\t"
+ /* A[0] * A[1] */
+ "ldr r8, [%[a], #4]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adc r5, r5, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ "str r4, [%[tmp], #4]\n\t"
+ "mov r4, #0\n\t"
+ /* A[0] * A[2] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adc r3, r3, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[1] * A[1] */
+ "ldr r6, [%[a], #4]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ "str r5, [%[tmp], #8]\n\t"
+ "mov r5, #0\n\t"
+ /* A[0] * A[3] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r9, r10, r6, r8\n\t"
+ "mov r11, #0\n\t"
+ /* A[1] * A[2] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[a], #8]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ "adds r9, r9, r9\n\t"
+ "adcs r10, r10, r10\n\t"
+ "adc r11, r11, r11\n\t"
+ "adds r3, r3, r9\n\t"
+ "adcs r4, r4, r10\n\t"
+ "adc r5, r5, r11\n\t"
+ "str r3, [%[tmp], #12]\n\t"
+ "mov r3, #0\n\t"
+ /* A[0] * A[4] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r9, r10, r6, r8\n\t"
+ "mov r11, #0\n\t"
+ /* A[1] * A[3] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ /* A[2] * A[2] */
+ "ldr r6, [%[a], #8]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ "adds r9, r9, r9\n\t"
+ "adcs r10, r10, r10\n\t"
+ "adc r11, r11, r11\n\t"
+ "adds r4, r4, r9\n\t"
+ "adcs r5, r5, r10\n\t"
+ "adc r3, r3, r11\n\t"
+ "str r4, [%[tmp], #16]\n\t"
+ "mov r4, #0\n\t"
+ /* A[0] * A[5] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r9, r10, r6, r8\n\t"
+ "mov r11, #0\n\t"
+ /* A[1] * A[4] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ /* A[2] * A[3] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[a], #12]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ "adds r9, r9, r9\n\t"
+ "adcs r10, r10, r10\n\t"
+ "adc r11, r11, r11\n\t"
+ "adds r5, r5, r9\n\t"
+ "adcs r3, r3, r10\n\t"
+ "adc r4, r4, r11\n\t"
+ "str r5, [%[tmp], #20]\n\t"
+ "mov r5, #0\n\t"
+ /* A[0] * A[6] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r9, r10, r6, r8\n\t"
+ "mov r11, #0\n\t"
+ /* A[1] * A[5] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ /* A[2] * A[4] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ /* A[3] * A[3] */
+ "ldr r6, [%[a], #12]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ "adds r9, r9, r9\n\t"
+ "adcs r10, r10, r10\n\t"
+ "adc r11, r11, r11\n\t"
+ "adds r3, r3, r9\n\t"
+ "adcs r4, r4, r10\n\t"
+ "adc r5, r5, r11\n\t"
+ "str r3, [%[tmp], #24]\n\t"
+ "mov r3, #0\n\t"
+ /* A[0] * A[7] */
+ "ldr r6, [%[a], #0]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r9, r10, r6, r8\n\t"
+ "mov r11, #0\n\t"
+ /* A[1] * A[6] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ /* A[2] * A[5] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ /* A[3] * A[4] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #16]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ "adds r9, r9, r9\n\t"
+ "adcs r10, r10, r10\n\t"
+ "adc r11, r11, r11\n\t"
+ "adds r4, r4, r9\n\t"
+ "adcs r5, r5, r10\n\t"
+ "adc r3, r3, r11\n\t"
+ "str r4, [%[tmp], #28]\n\t"
+ "mov r4, #0\n\t"
+ /* A[1] * A[7] */
+ "ldr r6, [%[a], #4]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r9, r10, r6, r8\n\t"
+ "mov r11, #0\n\t"
+ /* A[2] * A[6] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ /* A[3] * A[5] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ /* A[4] * A[4] */
+ "ldr r6, [%[a], #16]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ "adds r9, r9, r9\n\t"
+ "adcs r10, r10, r10\n\t"
+ "adc r11, r11, r11\n\t"
+ "adds r5, r5, r9\n\t"
+ "adcs r3, r3, r10\n\t"
+ "adc r4, r4, r11\n\t"
+ "str r5, [%[r], #32]\n\t"
+ "mov r5, #0\n\t"
+ /* A[2] * A[7] */
+ "ldr r6, [%[a], #8]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r9, r10, r6, r8\n\t"
+ "mov r11, #0\n\t"
+ /* A[3] * A[6] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ /* A[4] * A[5] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #20]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ "adds r9, r9, r9\n\t"
+ "adcs r10, r10, r10\n\t"
+ "adc r11, r11, r11\n\t"
+ "adds r3, r3, r9\n\t"
+ "adcs r4, r4, r10\n\t"
+ "adc r5, r5, r11\n\t"
+ "str r3, [%[r], #36]\n\t"
+ "mov r3, #0\n\t"
+ /* A[3] * A[7] */
+ "ldr r6, [%[a], #12]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r9, r10, r6, r8\n\t"
+ "mov r11, #0\n\t"
+ /* A[4] * A[6] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r9, r9, r6\n\t"
+ "adcs r10, r10, r8\n\t"
+ "adc r11, r11, #0\n\t"
+ /* A[5] * A[5] */
+ "ldr r6, [%[a], #20]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ "adds r9, r9, r9\n\t"
+ "adcs r10, r10, r10\n\t"
+ "adc r11, r11, r11\n\t"
+ "adds r4, r4, r9\n\t"
+ "adcs r5, r5, r10\n\t"
+ "adc r3, r3, r11\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "mov r4, #0\n\t"
+ /* A[4] * A[7] */
+ "ldr r6, [%[a], #16]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ /* A[5] * A[6] */
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #24]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adc r4, r4, #0\n\t"
+ "str r5, [%[r], #44]\n\t"
+ "mov r5, #0\n\t"
+ /* A[5] * A[7] */
+ "ldr r6, [%[a], #20]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[6] * A[6] */
+ "ldr r6, [%[a], #24]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r3, [%[r], #48]\n\t"
+ "mov r3, #0\n\t"
+ /* A[6] * A[7] */
+ "ldr r6, [%[a], #24]\n\t"
+ "ldr r8, [%[a], #28]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "adc r3, r3, #0\n\t"
+ "str r4, [%[r], #52]\n\t"
+ "mov r4, #0\n\t"
+ /* A[7] * A[7] */
+ "ldr r6, [%[a], #28]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r5, r5, r6\n\t"
+ "adc r3, r3, r8\n\t"
+ "str r5, [%[r], #56]\n\t"
+ "str r3, [%[r], #60]\n\t"
+ /* Transfer tmp to r */
+ "ldr r3, [%[tmp], #0]\n\t"
+ "ldr r4, [%[tmp], #4]\n\t"
+ "ldr r5, [%[tmp], #8]\n\t"
+ "ldr r6, [%[tmp], #12]\n\t"
+ "str r3, [%[r], #0]\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r3, [%[tmp], #16]\n\t"
+ "ldr r4, [%[tmp], #20]\n\t"
+ "ldr r5, [%[tmp], #24]\n\t"
+ "ldr r6, [%[tmp], #28]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "str r5, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
+ : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11"
+ );
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_256_sqr_8(r, a);
+ sp_256_mont_reduce_8(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * n Number of times to square.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_n_8(sp_digit* r, const sp_digit* a, int n,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_256_mont_sqr_8(r, a, m, mp);
+ for (; n > 1; n--) {
+ sp_256_mont_sqr_8(r, r, m, mp);
+ }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P256 curve. */
+static const uint32_t p256_mod_minus_2[8] = {
+ 0xfffffffdU,0xffffffffU,0xffffffffU,0x00000000U,0x00000000U,0x00000000U,
+ 0x00000001U,0xffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P256 curve. (r = 1 / a mod m)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_256_mont_inv_8(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 8);
+ for (i=254; i>=0; i--) {
+ sp_256_mont_sqr_8(t, t, p256_mod, p256_mp_mod);
+ if (p256_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
+ sp_256_mont_mul_8(t, t, a, p256_mod, p256_mp_mod);
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 8);
+#else
+ sp_digit* t1 = td;
+ sp_digit* t2 = td + 2 * 8;
+ sp_digit* t3 = td + 4 * 8;
+ /* 0x2 */
+ sp_256_mont_sqr_8(t1, a, p256_mod, p256_mp_mod);
+ /* 0x3 */
+ sp_256_mont_mul_8(t2, t1, a, p256_mod, p256_mp_mod);
+ /* 0xc */
+ sp_256_mont_sqr_n_8(t1, t2, 2, p256_mod, p256_mp_mod);
+ /* 0xd */
+ sp_256_mont_mul_8(t3, t1, a, p256_mod, p256_mp_mod);
+ /* 0xf */
+ sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xf0 */
+ sp_256_mont_sqr_n_8(t1, t2, 4, p256_mod, p256_mp_mod);
+ /* 0xfd */
+ sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xff */
+ sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xff00 */
+ sp_256_mont_sqr_n_8(t1, t2, 8, p256_mod, p256_mp_mod);
+ /* 0xfffd */
+ sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xffff */
+ sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffff0000 */
+ sp_256_mont_sqr_n_8(t1, t2, 16, p256_mod, p256_mp_mod);
+ /* 0xfffffffd */
+ sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff */
+ sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000000 */
+ sp_256_mont_sqr_n_8(t1, t2, 32, p256_mod, p256_mp_mod);
+ /* 0xffffffffffffffff */
+ sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001 */
+ sp_256_mont_mul_8(r, t1, a, p256_mod, p256_mp_mod);
+ /* 0xffffffff000000010000000000000000000000000000000000000000 */
+ sp_256_mont_sqr_n_8(r, r, 160, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */
+ sp_256_mont_mul_8(r, r, t2, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */
+ sp_256_mont_sqr_n_8(r, r, 32, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */
+ sp_256_mont_mul_8(r, r, t3, p256_mod, p256_mp_mod);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_256_cmp_8(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+
+
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mvn r3, r3\n\t"
+ "mov r6, #28\n\t"
+ "\n1:\n\t"
+ "ldr r8, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r8, r8, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "mov r4, r8\n\t"
+ "subs r8, r8, r5\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], r8\n\t"
+ "mvn r8, r8\n\t"
+ "and r3, r3, r8\n\t"
+ "subs r5, r5, r4\n\t"
+ "sbc r8, r8, r8\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "mvn r8, r8\n\t"
+ "and r3, r3, r8\n\t"
+ "sub r6, r6, #4\n\t"
+ "cmp r6, #0\n\t"
+ "bge 1b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "r3", "r4", "r5", "r6", "r8"
+ );
+
+ return r;
+}
+
+/* Normalize the values in each word to 32.
+ *
+ * a Array of sp_digit to normalize.
+ */
+#define sp_256_norm_8(a)
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r Resulting affine coordinate point.
+ * p Montgomery form projective coordinate point.
+ * t Temporary ordinate data.
+ */
+static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*8;
+ int32_t n;
+
+ sp_256_mont_inv_8(t1, p->z, t + 2*8);
+
+ sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod);
+
+ /* x /= z^2 */
+ sp_256_mont_mul_8(r->x, p->x, t2, p256_mod, p256_mp_mod);
+ XMEMSET(r->x + 8, 0, sizeof(r->x) / 2U);
+ sp_256_mont_reduce_8(r->x, p256_mod, p256_mp_mod);
+ /* Reduce x to less than modulus */
+ n = sp_256_cmp_8(r->x, p256_mod);
+ sp_256_cond_sub_8(r->x, r->x, p256_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_8(r->x);
+
+ /* y /= z^3 */
+ sp_256_mont_mul_8(r->y, p->y, t1, p256_mod, p256_mp_mod);
+ XMEMSET(r->y + 8, 0, sizeof(r->y) / 2U);
+ sp_256_mont_reduce_8(r->y, p256_mod, p256_mp_mod);
+ /* Reduce y to less than modulus */
+ n = sp_256_cmp_8(r->y, p256_mod);
+ sp_256_cond_sub_8(r->y, r->y, p256_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_8(r->y);
+
+ XMEMSET(r->z, 0, sizeof(r->z));
+ r->z[0] = 1;
+
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r6, %[a]\n\t"
+ "mov r8, #0\n\t"
+ "add r6, r6, #32\n\t"
+ "sub r8, r8, #1\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], r8\n\t"
+ "ldr r4, [%[a]]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c], %[c]\n\t"
+ "add %[a], %[a], #4\n\t"
+ "add %[b], %[b], #4\n\t"
+ "add %[r], %[r], #4\n\t"
+ "cmp %[a], r6\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+#else
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r Result of addition.
+ * a First number to add in Montogmery form.
+ * b Second number to add in Montogmery form.
+ * m Modulus (prime).
+ */
+SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ (void)m;
+
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "ldr r4, [%[a],#0]\n\t"
+ "ldr r5, [%[a],#4]\n\t"
+ "ldr r6, [%[b],#0]\n\t"
+ "ldr r8, [%[b],#4]\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "str r4, [%[r],#0]\n\t"
+ "str r5, [%[r],#4]\n\t"
+ "ldr r4, [%[a],#8]\n\t"
+ "ldr r5, [%[a],#12]\n\t"
+ "ldr r6, [%[b],#8]\n\t"
+ "ldr r8, [%[b],#12]\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "str r4, [%[r],#8]\n\t"
+ "str r5, [%[r],#12]\n\t"
+ "ldr r4, [%[a],#16]\n\t"
+ "ldr r5, [%[a],#20]\n\t"
+ "ldr r6, [%[b],#16]\n\t"
+ "ldr r8, [%[b],#20]\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "mov r9, r4\n\t"
+ "mov r10, r5\n\t"
+ "ldr r4, [%[a],#24]\n\t"
+ "ldr r5, [%[a],#28]\n\t"
+ "ldr r6, [%[b],#24]\n\t"
+ "ldr r8, [%[b],#28]\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "mov r11, r4\n\t"
+ "mov r12, r5\n\t"
+ "adc r3, r3, r3\n\t"
+ "mov r6, r3\n\t"
+ "sub r3, r3, #1\n\t"
+ "mvn r3, r3\n\t"
+ "mov r8, #0\n\t"
+ "ldr r4, [%[r],#0]\n\t"
+ "ldr r5, [%[r],#4]\n\t"
+ "subs r4, r4, r3\n\t"
+ "sbcs r5, r5, r3\n\t"
+ "str r4, [%[r],#0]\n\t"
+ "str r5, [%[r],#4]\n\t"
+ "ldr r4, [%[r],#8]\n\t"
+ "ldr r5, [%[r],#12]\n\t"
+ "sbcs r4, r4, r3\n\t"
+ "sbcs r5, r5, r8\n\t"
+ "str r4, [%[r],#8]\n\t"
+ "str r5, [%[r],#12]\n\t"
+ "mov r4, r9\n\t"
+ "mov r5, r10\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r8\n\t"
+ "str r4, [%[r],#16]\n\t"
+ "str r5, [%[r],#20]\n\t"
+ "mov r4, r11\n\t"
+ "mov r5, r12\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "sbc r5, r5, r3\n\t"
+ "str r4, [%[r],#24]\n\t"
+ "str r5, [%[r],#28]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+ );
+}
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r Result of doubling.
+ * a Number to double in Montogmery form.
+ * m Modulus (prime).
+ */
+SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ (void)m;
+
+ __asm__ __volatile__ (
+ "ldr r4, [%[a],#0]\n\t"
+ "ldr r5, [%[a],#4]\n\t"
+ "ldr r6, [%[a],#8]\n\t"
+ "ldr r8, [%[a],#12]\n\t"
+ "adds r4, r4, r4\n\t"
+ "adcs r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adcs r8, r8, r8\n\t"
+ "str r4, [%[r],#0]\n\t"
+ "str r5, [%[r],#4]\n\t"
+ "str r6, [%[r],#8]\n\t"
+ "str r8, [%[r],#12]\n\t"
+ "ldr r4, [%[a],#16]\n\t"
+ "ldr r5, [%[a],#20]\n\t"
+ "ldr r6, [%[a],#24]\n\t"
+ "ldr r8, [%[a],#28]\n\t"
+ "adcs r4, r4, r4\n\t"
+ "adcs r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adcs r8, r8, r8\n\t"
+ "mov r9, r4\n\t"
+ "mov r10, r5\n\t"
+ "mov r11, r6\n\t"
+ "mov r12, r8\n\t"
+ "mov r3, #0\n\t"
+ "mov r8, #0\n\t"
+ "adc r3, r3, r3\n\t"
+ "mov r2, r3\n\t"
+ "sub r3, r3, #1\n\t"
+ "mvn r3, r3\n\t"
+ "ldr r4, [%[r],#0]\n\t"
+ "ldr r5, [%[r],#4]\n\t"
+ "ldr r6, [%[r],#8]\n\t"
+ "subs r4, r4, r3\n\t"
+ "sbcs r5, r5, r3\n\t"
+ "sbcs r6, r6, r3\n\t"
+ "str r4, [%[r],#0]\n\t"
+ "str r5, [%[r],#4]\n\t"
+ "str r6, [%[r],#8]\n\t"
+ "ldr r4, [%[r],#12]\n\t"
+ "mov r5, r9\n\t"
+ "mov r6, r10\n\t"
+ "sbcs r4, r4, r8\n\t"
+ "sbcs r5, r5, r8\n\t"
+ "sbcs r6, r6, r8\n\t"
+ "str r4, [%[r],#12]\n\t"
+ "str r5, [%[r],#16]\n\t"
+ "str r6, [%[r],#20]\n\t"
+ "mov r4, r11\n\t"
+ "mov r5, r12\n\t"
+ "sbcs r4, r4, r2\n\t"
+ "sbc r5, r5, r3\n\t"
+ "str r4, [%[r],#24]\n\t"
+ "str r5, [%[r],#28]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r3", "r2", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+ );
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r Result of Tripling.
+ * a Number to triple in Montogmery form.
+ * m Modulus (prime).
+ */
+SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ (void)m;
+
+ __asm__ __volatile__ (
+ "ldr r2, [%[a],#0]\n\t"
+ "ldr r3, [%[a],#4]\n\t"
+ "ldr r4, [%[a],#8]\n\t"
+ "ldr r5, [%[a],#12]\n\t"
+ "ldr r6, [%[a],#16]\n\t"
+ "ldr r8, [%[a],#20]\n\t"
+ "ldr r9, [%[a],#24]\n\t"
+ "ldr r10, [%[a],#28]\n\t"
+ "adds r2, r2, r2\n\t"
+ "adcs r3, r3, r3\n\t"
+ "adcs r4, r4, r4\n\t"
+ "adcs r5, r5, r5\n\t"
+ "adcs r6, r6, r6\n\t"
+ "adcs r8, r8, r8\n\t"
+ "adcs r9, r9, r9\n\t"
+ "adcs r10, r10, r10\n\t"
+ "mov r11, #0\n\t"
+ "mov r14, #0\n\t"
+ "adc r11, r11, r11\n\t"
+ "mov r12, r11\n\t"
+ "sub r11, r11, #1\n\t"
+ "mvn r11, r11\n\t"
+ "subs r2, r2, r11\n\t"
+ "sbcs r3, r3, r11\n\t"
+ "sbcs r4, r4, r11\n\t"
+ "sbcs r5, r5, r14\n\t"
+ "sbcs r6, r6, r14\n\t"
+ "sbcs r8, r8, r14\n\t"
+ "sbcs r9, r9, r12\n\t"
+ "sbc r10, r10, r11\n\t"
+ "ldr r12, [%[a],#0]\n\t"
+ "ldr r14, [%[a],#4]\n\t"
+ "adds r2, r2, r12\n\t"
+ "adcs r3, r3, r14\n\t"
+ "ldr r12, [%[a],#8]\n\t"
+ "ldr r14, [%[a],#12]\n\t"
+ "adcs r4, r4, r12\n\t"
+ "adcs r5, r5, r14\n\t"
+ "ldr r12, [%[a],#16]\n\t"
+ "ldr r14, [%[a],#20]\n\t"
+ "adcs r6, r6, r12\n\t"
+ "adcs r8, r8, r14\n\t"
+ "ldr r12, [%[a],#24]\n\t"
+ "ldr r14, [%[a],#28]\n\t"
+ "adcs r9, r9, r12\n\t"
+ "adcs r10, r10, r14\n\t"
+ "mov r11, #0\n\t"
+ "mov r14, #0\n\t"
+ "adc r11, r11, r11\n\t"
+ "mov r12, r11\n\t"
+ "sub r11, r11, #1\n\t"
+ "mvn r11, r11\n\t"
+ "subs r2, r2, r11\n\t"
+ "str r2, [%[r],#0]\n\t"
+ "sbcs r3, r3, r11\n\t"
+ "str r3, [%[r],#4]\n\t"
+ "sbcs r4, r4, r11\n\t"
+ "str r4, [%[r],#8]\n\t"
+ "sbcs r5, r5, r14\n\t"
+ "str r5, [%[r],#12]\n\t"
+ "sbcs r6, r6, r14\n\t"
+ "str r6, [%[r],#16]\n\t"
+ "sbcs r8, r8, r14\n\t"
+ "str r8, [%[r],#20]\n\t"
+ "sbcs r9, r9, r12\n\t"
+ "str r9, [%[r],#24]\n\t"
+ "sbc r10, r10, r11\n\t"
+ "str r10, [%[r],#28]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r11", "r12", "r14", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10"
+ );
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r Result of subtration.
+ * a Number to subtract from in Montogmery form.
+ * b Number to subtract with in Montogmery form.
+ * m Modulus (prime).
+ */
+SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ (void)m;
+
+ __asm__ __volatile__ (
+ "ldr r4, [%[a],#0]\n\t"
+ "ldr r5, [%[a],#4]\n\t"
+ "ldr r6, [%[b],#0]\n\t"
+ "ldr r8, [%[b],#4]\n\t"
+ "subs r4, r4, r6\n\t"
+ "sbcs r5, r5, r8\n\t"
+ "str r4, [%[r],#0]\n\t"
+ "str r5, [%[r],#4]\n\t"
+ "ldr r4, [%[a],#8]\n\t"
+ "ldr r5, [%[a],#12]\n\t"
+ "ldr r6, [%[b],#8]\n\t"
+ "ldr r8, [%[b],#12]\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "sbcs r5, r5, r8\n\t"
+ "str r4, [%[r],#8]\n\t"
+ "str r5, [%[r],#12]\n\t"
+ "ldr r4, [%[a],#16]\n\t"
+ "ldr r5, [%[a],#20]\n\t"
+ "ldr r6, [%[b],#16]\n\t"
+ "ldr r8, [%[b],#20]\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "sbcs r5, r5, r8\n\t"
+ "mov r9, r4\n\t"
+ "mov r10, r5\n\t"
+ "ldr r4, [%[a],#24]\n\t"
+ "ldr r5, [%[a],#28]\n\t"
+ "ldr r6, [%[b],#24]\n\t"
+ "ldr r8, [%[b],#28]\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "sbcs r5, r5, r8\n\t"
+ "mov r11, r4\n\t"
+ "mov r12, r5\n\t"
+ "sbc r3, r3, r3\n\t"
+ "lsr r8, r3, #31\n\t"
+ "mov r6, #0\n\t"
+ "ldr r4, [%[r],#0]\n\t"
+ "ldr r5, [%[r],#4]\n\t"
+ "adds r4, r4, r3\n\t"
+ "adcs r5, r5, r3\n\t"
+ "str r4, [%[r],#0]\n\t"
+ "str r5, [%[r],#4]\n\t"
+ "ldr r4, [%[r],#8]\n\t"
+ "ldr r5, [%[r],#12]\n\t"
+ "adcs r4, r4, r3\n\t"
+ "adcs r5, r5, r6\n\t"
+ "str r4, [%[r],#8]\n\t"
+ "str r5, [%[r],#12]\n\t"
+ "mov r4, r9\n\t"
+ "mov r5, r10\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r6\n\t"
+ "str r4, [%[r],#16]\n\t"
+ "str r5, [%[r],#20]\n\t"
+ "mov r4, r11\n\t"
+ "mov r5, r12\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, r3\n\t"
+ "str r4, [%[r],#24]\n\t"
+ "str r5, [%[r],#28]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+ );
+}
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r Result of division by 2.
+ * a Number to divide.
+ * m Modulus (prime).
+ */
+SP_NOINLINE static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ __asm__ __volatile__ (
+ "ldr r8, [%[a], #0]\n\t"
+ "lsl r8, r8, #31\n\t"
+ "lsr r8, r8, #31\n\t"
+ "mov r5, #0\n\t"
+ "sub r5, r5, r8\n\t"
+ "mov r8, #0\n\t"
+ "lsl r6, r5, #31\n\t"
+ "lsr r6, r6, #31\n\t"
+ "ldr r3, [%[a], #0]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "adds r3, r3, r5\n\t"
+ "adcs r4, r4, r5\n\t"
+ "str r3, [%[r], #0]\n\t"
+ "str r4, [%[r], #4]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "adcs r3, r3, r5\n\t"
+ "adcs r4, r4, r8\n\t"
+ "str r3, [%[r], #8]\n\t"
+ "str r4, [%[r], #12]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "adcs r3, r3, r8\n\t"
+ "adcs r4, r4, r8\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "ldr r3, [%[a], #24]\n\t"
+ "ldr r4, [%[a], #28]\n\t"
+ "adcs r3, r3, r6\n\t"
+ "adcs r4, r4, r5\n\t"
+ "adc r8, r8, r8\n\t"
+ "lsl r8, r8, #31\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, #31\n\t"
+ "lsr r6, r4, #1\n\t"
+ "lsl r4, r4, #31\n\t"
+ "orr r5, r5, r4\n\t"
+ "orr r6, r6, r8\n\t"
+ "mov r8, r3\n\t"
+ "str r5, [%[r], #24]\n\t"
+ "str r6, [%[r], #28]\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, #31\n\t"
+ "lsr r6, r4, #1\n\t"
+ "lsl r4, r4, #31\n\t"
+ "orr r5, r5, r4\n\t"
+ "orr r6, r6, r8\n\t"
+ "mov r8, r3\n\t"
+ "str r5, [%[r], #16]\n\t"
+ "str r6, [%[r], #20]\n\t"
+ "ldr r3, [%[a], #8]\n\t"
+ "ldr r4, [%[a], #12]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsl r3, r3, #31\n\t"
+ "lsr r6, r4, #1\n\t"
+ "lsl r4, r4, #31\n\t"
+ "orr r5, r5, r4\n\t"
+ "orr r6, r6, r8\n\t"
+ "mov r8, r3\n\t"
+ "str r5, [%[r], #8]\n\t"
+ "str r6, [%[r], #12]\n\t"
+ "ldr r3, [%[r], #0]\n\t"
+ "ldr r4, [%[r], #4]\n\t"
+ "lsr r5, r3, #1\n\t"
+ "lsr r6, r4, #1\n\t"
+ "lsl r4, r4, #31\n\t"
+ "orr r5, r5, r4\n\t"
+ "orr r6, r6, r8\n\t"
+ "str r5, [%[r], #0]\n\t"
+ "str r6, [%[r], #4]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a), [m] "r" (m)
+ : "memory", "r3", "r4", "r5", "r6", "r8"
+ );
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r Result of doubling point.
+ * p Point to double.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*8;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = r->x;
+ y = r->y;
+ z = r->z;
+ /* Put infinity into result. */
+ if (r != p) {
+ r->infinity = p->infinity;
+ }
+
+ /* T1 = Z * Z */
+ sp_256_mont_sqr_8(t1, p->z, p256_mod, p256_mp_mod);
+ /* Z = Y * Z */
+ sp_256_mont_mul_8(z, p->y, p->z, p256_mod, p256_mp_mod);
+ /* Z = 2Z */
+ sp_256_mont_dbl_8(z, z, p256_mod);
+ /* T2 = X - T1 */
+ sp_256_mont_sub_8(t2, p->x, t1, p256_mod);
+ /* T1 = X + T1 */
+ sp_256_mont_add_8(t1, p->x, t1, p256_mod);
+ /* T2 = T1 * T2 */
+ sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod);
+ /* T1 = 3T2 */
+ sp_256_mont_tpl_8(t1, t2, p256_mod);
+ /* Y = 2Y */
+ sp_256_mont_dbl_8(y, p->y, p256_mod);
+ /* Y = Y * Y */
+ sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod);
+ /* T2 = Y * Y */
+ sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
+ /* T2 = T2/2 */
+ sp_256_div2_8(t2, t2, p256_mod);
+ /* Y = Y * X */
+ sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod);
+ /* X = T1 * T1 */
+ sp_256_mont_sqr_8(x, t1, p256_mod, p256_mp_mod);
+ /* X = X - Y */
+ sp_256_mont_sub_8(x, x, y, p256_mod);
+ /* X = X - Y */
+ sp_256_mont_sub_8(x, x, y, p256_mod);
+ /* Y = Y - X */
+ sp_256_mont_sub_8(y, y, x, p256_mod);
+ /* Y = Y * T1 */
+ sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod);
+ /* Y = Y - T2 */
+ sp_256_mont_sub_8(y, y, t2, p256_mod);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r6, %[a]\n\t"
+ "add r6, r6, #32\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "subs r5, r5, %[c]\n\t"
+ "ldr r4, [%[a]]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "sbc %[c], %[c], %[c]\n\t"
+ "add %[a], %[a], #4\n\t"
+ "add %[b], %[b], #4\n\t"
+ "add %[r], %[r], #4\n\t"
+ "cmp %[a], r6\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[a], #4]\n\t"
+ "ldr r6, [%[b], #0]\n\t"
+ "ldr r8, [%[b], #4]\n\t"
+ "subs r4, r4, r6\n\t"
+ "sbcs r5, r5, r8\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r5, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[a], #12]\n\t"
+ "ldr r6, [%[b], #8]\n\t"
+ "ldr r8, [%[b], #12]\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "sbcs r5, r5, r8\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r5, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[a], #20]\n\t"
+ "ldr r6, [%[b], #16]\n\t"
+ "ldr r8, [%[b], #20]\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "sbcs r5, r5, r8\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[a], #28]\n\t"
+ "ldr r6, [%[b], #24]\n\t"
+ "ldr r8, [%[b], #28]\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "sbcs r5, r5, r8\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "str r5, [%[r], #28]\n\t"
+ "sbc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a First number to compare.
+ * b Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_256_cmp_equal_8(const sp_digit* a, const sp_digit* b)
+{
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+ (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_8(sp_point_256* r, const sp_point_256* p, const sp_point_256* q,
+ sp_digit* t)
+{
+ const sp_point_256* ap[2];
+ sp_point_256* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*8;
+ sp_digit* t3 = t + 4*8;
+ sp_digit* t4 = t + 6*8;
+ sp_digit* t5 = t + 8*8;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Ensure only the first point is the same as the result. */
+ if (q == r) {
+ const sp_point_256* a = p;
+ p = q;
+ q = a;
+ }
+
+ /* Check double */
+ (void)sp_256_sub_8(t1, p256_mod, q->y);
+ sp_256_norm_8(t1);
+ if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
+ (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) {
+ sp_256_proj_point_dbl_8(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_256));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<8; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<8; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<8; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U1 = X1*Z2^2 */
+ sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t1, t1, x, p256_mod, p256_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_256_mont_mul_8(t3, t3, y, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - U1 */
+ sp_256_mont_sub_8(t2, t2, t1, p256_mod);
+ /* R = S2 - S1 */
+ sp_256_mont_sub_8(t4, t4, t3, p256_mod);
+ /* Z3 = H*Z1*Z2 */
+ sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_8(x, x, t5, p256_mod);
+ sp_256_mont_dbl_8(t1, y, p256_mod);
+ sp_256_mont_sub_8(x, x, t1, p256_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ sp_256_mont_sub_8(y, y, x, p256_mod);
+ sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_8(y, y, t5, p256_mod);
+ }
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_fast_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+ int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 td[16];
+ sp_point_256 rtd;
+ sp_digit tmpd[2 * 8 * 5];
+#endif
+ sp_point_256* t;
+ sp_point_256* rt;
+ sp_digit* tmp;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_8(heap, rtd, rt);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 16, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ t = td;
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ (void)sp_256_mod_mul_norm_8(t[1].x, g->x, p256_mod);
+ (void)sp_256_mod_mul_norm_8(t[1].y, g->y, p256_mod);
+ (void)sp_256_mod_mul_norm_8(t[1].z, g->z, p256_mod);
+ t[1].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[ 2], &t[ 1], tmp);
+ t[ 2].infinity = 0;
+ sp_256_proj_point_add_8(&t[ 3], &t[ 2], &t[ 1], tmp);
+ t[ 3].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[ 4], &t[ 2], tmp);
+ t[ 4].infinity = 0;
+ sp_256_proj_point_add_8(&t[ 5], &t[ 3], &t[ 2], tmp);
+ t[ 5].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[ 6], &t[ 3], tmp);
+ t[ 6].infinity = 0;
+ sp_256_proj_point_add_8(&t[ 7], &t[ 4], &t[ 3], tmp);
+ t[ 7].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[ 8], &t[ 4], tmp);
+ t[ 8].infinity = 0;
+ sp_256_proj_point_add_8(&t[ 9], &t[ 5], &t[ 4], tmp);
+ t[ 9].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[10], &t[ 5], tmp);
+ t[10].infinity = 0;
+ sp_256_proj_point_add_8(&t[11], &t[ 6], &t[ 5], tmp);
+ t[11].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[12], &t[ 6], tmp);
+ t[12].infinity = 0;
+ sp_256_proj_point_add_8(&t[13], &t[ 7], &t[ 6], tmp);
+ t[13].infinity = 0;
+ sp_256_proj_point_dbl_8(&t[14], &t[ 7], tmp);
+ t[14].infinity = 0;
+ sp_256_proj_point_add_8(&t[15], &t[ 8], &t[ 7], tmp);
+ t[15].infinity = 0;
+
+ i = 6;
+ n = k[i+1] << 0;
+ c = 28;
+ y = n >> 28;
+ XMEMCPY(rt, &t[y], sizeof(sp_point_256));
+ n <<= 4;
+ for (; i>=0 || c>=4; ) {
+ if (c < 4) {
+ n |= k[i--];
+ c += 32;
+ }
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+
+ sp_256_proj_point_dbl_8(rt, rt, tmp);
+ sp_256_proj_point_dbl_8(rt, rt, tmp);
+ sp_256_proj_point_dbl_8(rt, rt, tmp);
+ sp_256_proj_point_dbl_8(rt, rt, tmp);
+
+ sp_256_proj_point_add_8(rt, rt, &t[y], tmp);
+ }
+
+ if (map != 0) {
+ sp_256_map_8(r, rt, tmp);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 8 * 5);
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+ }
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_point_256) * 16);
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmpd, sizeof(tmpd));
+ ForceZero(td, sizeof(td));
+#endif
+ sp_256_point_free_8(rt, 1, heap);
+
+ return err;
+}
+
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_256 {
+ sp_digit x[8];
+ sp_digit y[8];
+} sp_table_entry_256;
+
+#ifdef FP_ECC
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int n, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*8;
+ sp_digit* b = t + 4*8;
+ sp_digit* t1 = t + 6*8;
+ sp_digit* t2 = t + 8*8;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = p->x;
+ y = p->y;
+ z = p->z;
+
+ /* Y = 2*Y */
+ sp_256_mont_dbl_8(y, y, p256_mod);
+ /* W = Z^4 */
+ sp_256_mont_sqr_8(w, z, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_8(w, w, p256_mod, p256_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+ while (--n > 0)
+#else
+ while (--n >= 0)
+#endif
+ {
+ /* A = 3*(X^2 - W) */
+ sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_8(t1, t1, w, p256_mod);
+ sp_256_mont_tpl_8(a, t1, p256_mod);
+ /* B = X*Y^2 */
+ sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod);
+ /* X = A^2 - 2B */
+ sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_8(t2, b, p256_mod);
+ sp_256_mont_sub_8(x, x, t2, p256_mod);
+ /* Z = Z*Y */
+ sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod);
+ /* t2 = Y^4 */
+ sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+ if (n != 0)
+#endif
+ {
+ /* W = W*Y^4 */
+ sp_256_mont_mul_8(w, w, t1, p256_mod, p256_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_256_mont_sub_8(y, b, x, p256_mod);
+ sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_8(y, y, p256_mod);
+ sp_256_mont_sub_8(y, y, t1, p256_mod);
+ }
+#ifndef WOLFSSL_SP_SMALL
+ /* A = 3*(X^2 - W) */
+ sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_8(t1, t1, w, p256_mod);
+ sp_256_mont_tpl_8(a, t1, p256_mod);
+ /* B = X*Y^2 */
+ sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod);
+ /* X = A^2 - 2B */
+ sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_8(t2, b, p256_mod);
+ sp_256_mont_sub_8(x, x, t2, p256_mod);
+ /* Z = Z*Y */
+ sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod);
+ /* t2 = Y^4 */
+ sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod);
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_256_mont_sub_8(y, b, x, p256_mod);
+ sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_8(y, y, p256_mod);
+ sp_256_mont_sub_8(y, y, t1, p256_mod);
+#endif
+ /* Y = Y/2 */
+ sp_256_div2_8(y, y, p256_mod);
+}
+
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a Point to convert.
+ * t Temporary data.
+ */
+static void sp_256_proj_to_affine_8(sp_point_256* a, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2 * 8;
+ sp_digit* tmp = t + 4 * 8;
+
+ sp_256_mont_inv_8(t1, a->z, tmp);
+
+ sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod);
+
+ sp_256_mont_mul_8(a->x, a->x, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(a->y, a->y, t1, p256_mod, p256_mp_mod);
+ XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
+}
+
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p,
+ const sp_point_256* q, sp_digit* t)
+{
+ const sp_point_256* ap[2];
+ sp_point_256* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*8;
+ sp_digit* t3 = t + 4*8;
+ sp_digit* t4 = t + 6*8;
+ sp_digit* t5 = t + 8*8;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Check double */
+ (void)sp_256_sub_8(t1, p256_mod, q->y);
+ sp_256_norm_8(t1);
+ if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
+ (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) {
+ sp_256_proj_point_dbl_8(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_256));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<8; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<8; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<8; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - X1 */
+ sp_256_mont_sub_8(t2, t2, x, p256_mod);
+ /* R = S2 - Y1 */
+ sp_256_mont_sub_8(t4, t4, y, p256_mod);
+ /* Z3 = H*Z1 */
+ sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod);
+ /* X3 = R^2 - H^3 - 2*X1*H^2 */
+ sp_256_mont_sqr_8(t1, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t3, x, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_8(x, t1, t5, p256_mod);
+ sp_256_mont_dbl_8(t1, t3, p256_mod);
+ sp_256_mont_sub_8(x, x, t1, p256_mod);
+ /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+ sp_256_mont_sub_8(t3, t3, x, p256_mod);
+ sp_256_mont_mul_8(t3, t3, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(t5, t5, y, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_8(y, t3, t5, p256_mod);
+ }
+}
+
+#ifdef WOLFSSL_SP_SMALL
+#ifdef FP_ECC
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_256_gen_stripe_table_8(const sp_point_256* a,
+ sp_table_entry_256* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 td, s1d, s2d;
+#endif
+ sp_point_256* t;
+ sp_point_256* s1 = NULL;
+ sp_point_256* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_8(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_256_proj_to_affine_8(t, tmp);
+
+ XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<4; i++) {
+ sp_256_proj_point_dbl_n_8(t, 64, tmp);
+ sp_256_proj_to_affine_8(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<4; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_256_proj_point_add_qz1_8(t, s1, s2, tmp);
+ sp_256_proj_to_affine_8(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_256_point_free_8(s2, 0, heap);
+ sp_256_point_free_8(s1, 0, heap);
+ sp_256_point_free_8( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g,
+ const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 rtd;
+ sp_point_256 pd;
+ sp_digit td[2 * 8 * 5];
+#endif
+ sp_point_256* rt;
+ sp_point_256* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_256_point_new_8(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
+ XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+ y = 0;
+ for (j=0,x=63; j<4; j++,x+=64) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=62; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<4; j++,x+=64) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+
+ sp_256_proj_point_dbl_8(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_256_proj_point_add_qz1_8(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_256_map_8(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(p, 0, heap);
+ sp_256_point_free_8(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_256_t {
+ sp_digit x[8];
+ sp_digit y[8];
+ sp_table_entry_256 table[16];
+ uint32_t cnt;
+ int set;
+} sp_cache_256_t;
+
+static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_256_last = -1;
+static THREAD_LS_T int sp_cache_256_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex_256 = 0;
+ static wolfSSL_Mutex sp_cache_256_lock;
+#endif
+
+static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_256_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache_256[i].set = 0;
+ }
+ sp_cache_256_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache_256[i].set)
+ continue;
+
+ if (sp_256_cmp_equal_8(g->x, sp_cache_256[i].x) &
+ sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) {
+ sp_cache_256[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_256_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache_256[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_256_last) {
+ least = sp_cache_256[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache_256[j].cnt < least) {
+ i = j;
+ least = sp_cache_256[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
+ XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
+ sp_cache_256[i].set = 1;
+ sp_cache_256[i].cnt = 1;
+ }
+
+ *cache = &sp_cache_256[i];
+ sp_cache_256_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 8 * 5];
+ sp_cache_256_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_256 == 0) {
+ wc_InitMutex(&sp_cache_256_lock);
+ initCacheMutex_256 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_256_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_256(g, &cache);
+ if (cache->cnt == 2)
+ sp_256_gen_stripe_table_8(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_256_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
+ }
+ else {
+ err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#else
+#ifdef FP_ECC
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_256_gen_stripe_table_8(const sp_point_256* a,
+ sp_table_entry_256* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 td, s1d, s2d;
+#endif
+ sp_point_256* t;
+ sp_point_256* s1 = NULL;
+ sp_point_256* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_8(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_256_proj_to_affine_8(t, tmp);
+
+ XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<8; i++) {
+ sp_256_proj_point_dbl_n_8(t, 32, tmp);
+ sp_256_proj_to_affine_8(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<8; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_256_proj_point_add_qz1_8(t, s1, s2, tmp);
+ sp_256_proj_to_affine_8(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_256_point_free_8(s2, 0, heap);
+ sp_256_point_free_8(s1, 0, heap);
+ sp_256_point_free_8( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g,
+ const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 rtd;
+ sp_point_256 pd;
+ sp_digit td[2 * 8 * 5];
+#endif
+ sp_point_256* rt;
+ sp_point_256* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_256_point_new_8(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
+ XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+ y = 0;
+ for (j=0,x=31; j<8; j++,x+=32) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=30; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<8; j++,x+=32) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+
+ sp_256_proj_point_dbl_8(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_256_proj_point_add_qz1_8(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_256_map_8(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(p, 0, heap);
+ sp_256_point_free_8(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_256_t {
+ sp_digit x[8];
+ sp_digit y[8];
+ sp_table_entry_256 table[256];
+ uint32_t cnt;
+ int set;
+} sp_cache_256_t;
+
+static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_256_last = -1;
+static THREAD_LS_T int sp_cache_256_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex_256 = 0;
+ static wolfSSL_Mutex sp_cache_256_lock;
+#endif
+
+static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_256_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache_256[i].set = 0;
+ }
+ sp_cache_256_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache_256[i].set)
+ continue;
+
+ if (sp_256_cmp_equal_8(g->x, sp_cache_256[i].x) &
+ sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) {
+ sp_cache_256[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_256_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache_256[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_256_last) {
+ least = sp_cache_256[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache_256[j].cnt < least) {
+ i = j;
+ least = sp_cache_256[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
+ XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
+ sp_cache_256[i].set = 1;
+ sp_cache_256[i].cnt = 1;
+ }
+
+ *cache = &sp_cache_256[i];
+ sp_cache_256_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 8 * 5];
+ sp_cache_256_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_256 == 0) {
+ wc_InitMutex(&sp_cache_256_lock);
+ initCacheMutex_256 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_256_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_256(g, &cache);
+ if (cache->cnt == 2)
+ sp_256_gen_stripe_table_8(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_256_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
+ }
+ else {
+ err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * p Point to multiply.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+ void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[8];
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_256_point_new_8(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 8, km);
+ sp_256_point_from_ecc_point_8(point, gm);
+
+ err = sp_256_ecc_mulmod_8(point, point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_8(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(point, 0, heap);
+
+ return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+static const sp_table_entry_256 p256_table[16] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,
+ 0xa53755c6,0x18905f76 },
+ { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,
+ 0x25885d85,0x8571ff18 } },
+ /* 2 */
+ { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,
+ 0xfd1b667f,0x2f5e6961 },
+ { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,
+ 0x8d6f0f7b,0xf648f916 } },
+ /* 3 */
+ { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761,
+ 0x133d0015,0x5abe0285 },
+ { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562,
+ 0x6b6f7383,0x94bb725b } },
+ /* 4 */
+ { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,
+ 0x21d324f6,0x61d587d4 },
+ { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,
+ 0x4621efbe,0xfa11fe12 } },
+ /* 5 */
+ { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67,
+ 0x1f13bedc,0x586eb04c },
+ { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0,
+ 0x70864f11,0x19d5ac08 } },
+ /* 6 */
+ { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a,
+ 0xc3b266b1,0xbb6de651 },
+ { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1,
+ 0x5d18b99b,0x60b4619a } },
+ /* 7 */
+ { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014,
+ 0xaeebffcd,0x9d0f27b2 },
+ { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0,
+ 0x356ec48d,0x244a566d } },
+ /* 8 */
+ { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,
+ 0xcd42ab1b,0x803f3e02 },
+ { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,
+ 0x5067adc1,0xc097440e } },
+ /* 9 */
+ { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459,
+ 0x915f1f30,0xf1af32d5 },
+ { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418,
+ 0xe2d41c8b,0x23d0f130 } },
+ /* 10 */
+ { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926,
+ 0x7990216a,0x50bbb4d9 },
+ { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b,
+ 0x01fe49c3,0x2b100118 } },
+ /* 11 */
+ { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa,
+ 0x83fbae0c,0xdd558999 },
+ { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf,
+ 0x149d6041,0xe6e4c551 } },
+ /* 12 */
+ { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07,
+ 0xdb7e63af,0xfad27148 },
+ { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875,
+ 0x9f0e1a84,0x77387de3 } },
+ /* 13 */
+ { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408,
+ 0xbef0c47e,0xb37b85c0 },
+ { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa,
+ 0xf9f628d5,0x9c135ac8 } },
+ /* 14 */
+ { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403,
+ 0x91ece900,0xc109f9cb },
+ { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d,
+ 0x2eee1ee1,0x9bc3344f } },
+ /* 15 */
+ { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665,
+ 0x5f1a4cc1,0x29591d52 },
+ { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496,
+ 0x18ef332c,0x6376551f } },
+};
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
+ k, map, heap);
+}
+
+#else
+static const sp_table_entry_256 p256_table[256] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,
+ 0xa53755c6,0x18905f76 },
+ { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,
+ 0x25885d85,0x8571ff18 } },
+ /* 2 */
+ { { 0x4147519a,0x20288602,0x26b372f0,0xd0981eac,0xa785ebc8,0xa9d4a7ca,
+ 0xdbdf58e9,0xd953c50d },
+ { 0xfd590f8f,0x9d6361cc,0x44e6c917,0x72e9626b,0x22eb64cf,0x7fd96110,
+ 0x9eb288f3,0x863ebb7e } },
+ /* 3 */
+ { { 0x5cdb6485,0x7856b623,0x2f0a2f97,0x808f0ea2,0x4f7e300b,0x3e68d954,
+ 0xb5ff80a0,0x00076055 },
+ { 0x838d2010,0x7634eb9b,0x3243708a,0x54014fbb,0x842a6606,0xe0e47d39,
+ 0x34373ee0,0x83087761 } },
+ /* 4 */
+ { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,
+ 0xfd1b667f,0x2f5e6961 },
+ { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,
+ 0x8d6f0f7b,0xf648f916 } },
+ /* 5 */
+ { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761,
+ 0x133d0015,0x5abe0285 },
+ { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562,
+ 0x6b6f7383,0x94bb725b } },
+ /* 6 */
+ { { 0x720f141c,0xbbf9b48f,0x2df5bc74,0x6199b3cd,0x411045c4,0xdc3f6129,
+ 0x2f7dc4ef,0xcdd6bbcb },
+ { 0xeaf436fd,0xcca6700b,0xb99326be,0x6f647f6d,0x014f2522,0x0c0fa792,
+ 0x4bdae5f6,0xa361bebd } },
+ /* 7 */
+ { { 0x597c13c7,0x28aa2558,0x50b7c3e1,0xc38d635f,0xf3c09d1d,0x07039aec,
+ 0xc4b5292c,0xba12ca09 },
+ { 0x59f91dfd,0x9e408fa4,0xceea07fb,0x3af43b66,0x9d780b29,0x1eceb089,
+ 0x701fef4b,0x53ebb99d } },
+ /* 8 */
+ { { 0xb0e63d34,0x4fe7ee31,0xa9e54fab,0xf4600572,0xd5e7b5a4,0xc0493334,
+ 0x06d54831,0x8589fb92 },
+ { 0x6583553a,0xaa70f5cc,0xe25649e5,0x0879094a,0x10044652,0xcc904507,
+ 0x02541c4f,0xebb0696d } },
+ /* 9 */
+ { { 0xac1647c5,0x4616ca15,0xc4cf5799,0xb8127d47,0x764dfbac,0xdc666aa3,
+ 0xd1b27da3,0xeb2820cb },
+ { 0x6a87e008,0x9406f8d8,0x922378f3,0xd87dfa9d,0x80ccecb2,0x56ed2e42,
+ 0x55a7da1d,0x1f28289b } },
+ /* 10 */
+ { { 0x3b89da99,0xabbaa0c0,0xb8284022,0xa6f2d79e,0xb81c05e8,0x27847862,
+ 0x05e54d63,0x337a4b59 },
+ { 0x21f7794a,0x3c67500d,0x7d6d7f61,0x207005b7,0x04cfd6e8,0x0a5a3781,
+ 0xf4c2fbd6,0x0d65e0d5 } },
+ /* 11 */
+ { { 0xb5275d38,0xd9d09bbe,0x0be0a358,0x4268a745,0x973eb265,0xf0762ff4,
+ 0x52f4a232,0xc23da242 },
+ { 0x0b94520c,0x5da1b84f,0xb05bd78e,0x09666763,0x94d29ea1,0x3a4dcb86,
+ 0xc790cff1,0x19de3b8c } },
+ /* 12 */
+ { { 0x26c5fe04,0x183a716c,0x3bba1bdb,0x3b28de0b,0xa4cb712c,0x7432c586,
+ 0x91fccbfd,0xe34dcbd4 },
+ { 0xaaa58403,0xb408d46b,0x82e97a53,0x9a697486,0x36aaa8af,0x9e390127,
+ 0x7b4e0f7f,0xe7641f44 } },
+ /* 13 */
+ { { 0xdf64ba59,0x7d753941,0x0b0242fc,0xd33f10ec,0xa1581859,0x4f06dfc6,
+ 0x052a57bf,0x4a12df57 },
+ { 0x9439dbd0,0xbfa6338f,0xbde53e1f,0xd3c24bd4,0x21f1b314,0xfd5e4ffa,
+ 0xbb5bea46,0x6af5aa93 } },
+ /* 14 */
+ { { 0x10c91999,0xda10b699,0x2a580491,0x0a24b440,0xb8cc2090,0x3e0094b4,
+ 0x66a44013,0x5fe3475a },
+ { 0xf93e7b4b,0xb0f8cabd,0x7c23f91a,0x292b501a,0xcd1e6263,0x42e889ae,
+ 0xecfea916,0xb544e308 } },
+ /* 15 */
+ { { 0x16ddfdce,0x6478c6e9,0xf89179e6,0x2c329166,0x4d4e67e1,0x4e8d6e76,
+ 0xa6b0c20b,0xe0b6b2bd },
+ { 0xbb7efb57,0x0d312df2,0x790c4007,0x1aac0dde,0x679bc944,0xf90336ad,
+ 0x25a63774,0x71c023de } },
+ /* 16 */
+ { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,
+ 0x21d324f6,0x61d587d4 },
+ { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,
+ 0x4621efbe,0xfa11fe12 } },
+ /* 17 */
+ { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67,
+ 0x1f13bedc,0x586eb04c },
+ { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0,
+ 0x70864f11,0x19d5ac08 } },
+ /* 18 */
+ { { 0x309a4e1f,0x1e99f581,0xe9270074,0xab7de71b,0xefd28d20,0x26a5ef0b,
+ 0x7f9c563f,0xe7c0073f },
+ { 0x0ef59f76,0x1f6d663a,0x20fcb050,0x669b3b54,0x7a6602d4,0xc08c1f7a,
+ 0xc65b3c0a,0xe08504fe } },
+ /* 19 */
+ { { 0xa031b3ca,0xf098f68d,0xe6da6d66,0x6d1cab9e,0x94f246e8,0x5bfd81fa,
+ 0x5b0996b4,0x78f01882 },
+ { 0x3a25787f,0xb7eefde4,0x1dccac9b,0x8016f80d,0xb35bfc36,0x0cea4877,
+ 0x7e94747a,0x43a773b8 } },
+ /* 20 */
+ { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a,
+ 0xc3b266b1,0xbb6de651 },
+ { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1,
+ 0x5d18b99b,0x60b4619a } },
+ /* 21 */
+ { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014,
+ 0xaeebffcd,0x9d0f27b2 },
+ { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0,
+ 0x356ec48d,0x244a566d } },
+ /* 22 */
+ { { 0xeacf1f96,0x6db0394a,0x024c271c,0x9f2122a9,0x82cbd3b9,0x2626ac1b,
+ 0x3581ef69,0x45e58c87 },
+ { 0xa38f9dbc,0xd3ff479d,0xe888a040,0xa8aaf146,0x46e0bed7,0x945adfb2,
+ 0xc1e4b7a4,0xc040e21c } },
+ /* 23 */
+ { { 0x6f8117b6,0x847af000,0x73a35433,0x651969ff,0x1d9475eb,0x482b3576,
+ 0x682c6ec7,0x1cdf5c97 },
+ { 0x11f04839,0x7db775b4,0x48de1698,0x7dbeacf4,0xb70b3219,0xb2921dd1,
+ 0xa92dff3d,0x046755f8 } },
+ /* 24 */
+ { { 0xbce8ffcd,0xcc8ac5d2,0x2fe61a82,0x0d53c48b,0x7202d6c7,0xf6f16172,
+ 0x3b83a5f3,0x046e5e11 },
+ { 0xd8007f01,0xe7b8ff64,0x5af43183,0x7fb1ef12,0x35e1a03c,0x045c5ea6,
+ 0x303d005b,0x6e0106c3 } },
+ /* 25 */
+ { { 0x88dd73b1,0x48c73584,0x995ed0d9,0x7670708f,0xc56a2ab7,0x38385ea8,
+ 0xe901cf1f,0x442594ed },
+ { 0x12d4b65b,0xf8faa2c9,0x96c90c37,0x94c2343b,0x5e978d1f,0xd326e4a1,
+ 0x4c2ee68e,0xa796fa51 } },
+ /* 26 */
+ { { 0x823addd7,0x359fb604,0xe56693b3,0x9e2a6183,0x3cbf3c80,0xf885b78e,
+ 0xc69766e9,0xe4ad2da9 },
+ { 0x8e048a61,0x357f7f42,0xc092d9a0,0x082d198c,0xc03ed8ef,0xfc3a1af4,
+ 0xc37b5143,0xc5e94046 } },
+ /* 27 */
+ { { 0x2be75f9e,0x476a538c,0xcb123a78,0x6fd1a9e8,0xb109c04b,0xd85e4df0,
+ 0xdb464747,0x63283daf },
+ { 0xbaf2df15,0xce728cf7,0x0ad9a7f4,0xe592c455,0xe834bcc3,0xfab226ad,
+ 0x1981a938,0x68bd19ab } },
+ /* 28 */
+ { { 0x1887d659,0xc08ead51,0xb359305a,0x3374d5f4,0xcfe74fe3,0x96986981,
+ 0x3c6fdfd6,0x495292f5 },
+ { 0x1acec896,0x4a878c9e,0xec5b4484,0xd964b210,0x664d60a7,0x6696f7e2,
+ 0x26036837,0x0ec7530d } },
+ /* 29 */
+ { { 0xad2687bb,0x2da13a05,0xf32e21fa,0xa1f83b6a,0x1dd4607b,0x390f5ef5,
+ 0x64863f0b,0x0f6207a6 },
+ { 0x0f138233,0xbd67e3bb,0x272aa718,0xdd66b96c,0x26ec88ae,0x8ed00407,
+ 0x08ed6dcf,0xff0db072 } },
+ /* 30 */
+ { { 0x4c95d553,0x749fa101,0x5d680a8a,0xa44052fd,0xff3b566f,0x183b4317,
+ 0x88740ea3,0x313b513c },
+ { 0x08d11549,0xb402e2ac,0xb4dee21c,0x071ee10b,0x47f2320e,0x26b987dd,
+ 0x86f19f81,0x2d3abcf9 } },
+ /* 31 */
+ { { 0x815581a2,0x4c288501,0x632211af,0x9a0a6d56,0x0cab2e99,0x19ba7a0f,
+ 0xded98cdf,0xc036fa10 },
+ { 0xc1fbd009,0x29ae08ba,0x06d15816,0x0b68b190,0x9b9e0d8f,0xc2eb3277,
+ 0xb6d40194,0xa6b2a2c4 } },
+ /* 32 */
+ { { 0x6d3549cf,0xd433e50f,0xfacd665e,0x6f33696f,0xce11fcb4,0x695bfdac,
+ 0xaf7c9860,0x810ee252 },
+ { 0x7159bb2c,0x65450fe1,0x758b357b,0xf7dfbebe,0xd69fea72,0x2b057e74,
+ 0x92731745,0xd485717a } },
+ /* 33 */
+ { { 0xf0cb5a98,0x11741a8a,0x1f3110bf,0xd3da8f93,0xab382adf,0x1994e2cb,
+ 0x2f9a604e,0x6a6045a7 },
+ { 0xa2b2411d,0x170c0d3f,0x510e96e0,0xbe0eb83e,0x8865b3cc,0x3bcc9f73,
+ 0xf9e15790,0xd3e45cfa } },
+ /* 34 */
+ { { 0xe83f7669,0xce1f69bb,0x72877d6b,0x09f8ae82,0x3244278d,0x9548ae54,
+ 0xe3c2c19c,0x207755de },
+ { 0x6fef1945,0x87bd61d9,0xb12d28c3,0x18813cef,0x72df64aa,0x9fbcd1d6,
+ 0x7154b00d,0x48dc5ee5 } },
+ /* 35 */
+ { { 0xf7e5a199,0x123790bf,0x989ccbb7,0xe0efb8cf,0x0a519c79,0xc27a2bfe,
+ 0xdff6f445,0xf2fb0aed },
+ { 0xf0b5025f,0x41c09575,0x40fa9f22,0x550543d7,0x380bfbd0,0x8fa3c8ad,
+ 0xdb28d525,0xa13e9015 } },
+ /* 36 */
+ { { 0xa2b65cbc,0xf9f7a350,0x2a464226,0x0b04b972,0xe23f07a1,0x265ce241,
+ 0x1497526f,0x2bf0d6b0 },
+ { 0x4b216fb7,0xd3d4dd3f,0xfbdda26a,0xf7d7b867,0x6708505c,0xaeb7b83f,
+ 0x162fe89f,0x42a94a5a } },
+ /* 37 */
+ { { 0xeaadf191,0x5846ad0b,0x25a268d7,0x0f8a4890,0x494dc1f6,0xe8603050,
+ 0xc65ede3d,0x2c2dd969 },
+ { 0x93849c17,0x6d02171d,0x1da250dd,0x460488ba,0x3c3a5485,0x4810c706,
+ 0x42c56dbc,0xf437fa1f } },
+ /* 38 */
+ { { 0x4a0f7dab,0x6aa0d714,0x1776e9ac,0x0f049793,0xf5f39786,0x52c0a050,
+ 0x54707aa8,0xaaf45b33 },
+ { 0xc18d364a,0x85e37c33,0x3e497165,0xd40b9b06,0x15ec5444,0xf4171681,
+ 0xf4f272bc,0xcdf6310d } },
+ /* 39 */
+ { { 0x8ea8b7ef,0x7473c623,0x85bc2287,0x08e93518,0x2bda8e34,0x41956772,
+ 0xda9e2ff2,0xf0d008ba },
+ { 0x2414d3b1,0x2912671d,0xb019ea76,0xb3754985,0x453bcbdb,0x5c61b96d,
+ 0xca887b8b,0x5bd5c2f5 } },
+ /* 40 */
+ { { 0xf49a3154,0xef0f469e,0x6e2b2e9a,0x3e85a595,0xaa924a9c,0x45aaec1e,
+ 0xa09e4719,0xaa12dfc8 },
+ { 0x4df69f1d,0x26f27227,0xa2ff5e73,0xe0e4c82c,0xb7a9dd44,0xb9d8ce73,
+ 0xe48ca901,0x6c036e73 } },
+ /* 41 */
+ { { 0x0f6e3138,0x5cfae12a,0x25ad345a,0x6966ef00,0x45672bc5,0x8993c64b,
+ 0x96afbe24,0x292ff658 },
+ { 0x5e213402,0xd5250d44,0x4392c9fe,0xf6580e27,0xda1c72e8,0x097b397f,
+ 0x311b7276,0x644e0c90 } },
+ /* 42 */
+ { { 0xa47153f0,0xe1e421e1,0x920418c9,0xb86c3b79,0x705d7672,0x93bdce87,
+ 0xcab79a77,0xf25ae793 },
+ { 0x6d869d0c,0x1f3194a3,0x4986c264,0x9d55c882,0x096e945e,0x49fb5ea3,
+ 0x13db0a3e,0x39b8e653 } },
+ /* 43 */
+ { { 0xb6fd2e59,0x37754200,0x9255c98f,0x35e2c066,0x0e2a5739,0xd9dab21a,
+ 0x0f19db06,0x39122f2f },
+ { 0x03cad53c,0xcfbce1e0,0xe65c17e3,0x225b2c0f,0x9aa13877,0x72baf1d2,
+ 0xce80ff8d,0x8de80af8 } },
+ /* 44 */
+ { { 0x207bbb76,0xafbea8d9,0x21782758,0x921c7e7c,0x1c0436b1,0xdfa2b74b,
+ 0x2e368c04,0x87194906 },
+ { 0xa3993df5,0xb5f928bb,0xf3b3d26a,0x639d75b5,0x85b55050,0x011aa78a,
+ 0x5b74fde1,0xfc315e6a } },
+ /* 45 */
+ { { 0xe8d6ecfa,0x561fd41a,0x1aec7f86,0x5f8c44f6,0x4924741d,0x98452a7b,
+ 0xee389088,0xe6d4a7ad },
+ { 0x4593c75d,0x60552ed1,0xdd271162,0x70a70da4,0x7ba2c7db,0xd2aede93,
+ 0x9be2ae57,0x35dfaf9a } },
+ /* 46 */
+ { { 0xaa736636,0x6b956fcd,0xae2cab7e,0x09f51d97,0x0f349966,0xfb10bf41,
+ 0x1c830d2b,0x1da5c7d7 },
+ { 0x3cce6825,0x5c41e483,0xf9573c3b,0x15ad118f,0xf23036b8,0xa28552c7,
+ 0xdbf4b9d6,0x7077c0fd } },
+ /* 47 */
+ { { 0x46b9661c,0xbf63ff8d,0x0d2cfd71,0xa1dfd36b,0xa847f8f7,0x0373e140,
+ 0xe50efe44,0x53a8632e },
+ { 0x696d8051,0x0976ff68,0xc74f468a,0xdaec0c95,0x5e4e26bd,0x62994dc3,
+ 0x34e1fcc1,0x028ca76d } },
+ /* 48 */
+ { { 0xfc9877ee,0xd11d47dc,0x801d0002,0xc8b36210,0x54c260b6,0xd002c117,
+ 0x6962f046,0x04c17cd8 },
+ { 0xb0daddf5,0x6d9bd094,0x24ce55c0,0xbea23575,0x72da03b5,0x663356e6,
+ 0xfed97474,0xf7ba4de9 } },
+ /* 49 */
+ { { 0xebe1263f,0xd0dbfa34,0x71ae7ce6,0x55763735,0x82a6f523,0xd2440553,
+ 0x52131c41,0xe31f9600 },
+ { 0xea6b6ec6,0xd1bb9216,0x73c2fc44,0x37a1d12e,0x89d0a294,0xc10e7eac,
+ 0xce34d47b,0xaa3a6259 } },
+ /* 50 */
+ { { 0x36f3dcd3,0xfbcf9df5,0xd2bf7360,0x6ceded50,0xdf504f5b,0x491710fa,
+ 0x7e79daee,0x2398dd62 },
+ { 0x6d09569e,0xcf4705a3,0x5149f769,0xea0619bb,0x35f6034c,0xff9c0377,
+ 0x1c046210,0x5717f5b2 } },
+ /* 51 */
+ { { 0x21dd895e,0x9fe229c9,0x40c28451,0x8e518500,0x1d637ecd,0xfa13d239,
+ 0x0e3c28de,0x660a2c56 },
+ { 0xd67fcbd0,0x9cca88ae,0x0ea9f096,0xc8472478,0x72e92b4d,0x32b2f481,
+ 0x4f522453,0x624ee54c } },
+ /* 52 */
+ { { 0xd897eccc,0x09549ce4,0x3f9880aa,0x4d49d1d9,0x043a7c20,0x723c2423,
+ 0x92bdfbc0,0x4f392afb },
+ { 0x7de44fd9,0x6969f8fa,0x57b32156,0xb66cfbe4,0x368ebc3c,0xdb2fa803,
+ 0xccdb399c,0x8a3e7977 } },
+ /* 53 */
+ { { 0x06c4b125,0xdde1881f,0xf6e3ca8c,0xae34e300,0x5c7a13e9,0xef6999de,
+ 0x70c24404,0x3888d023 },
+ { 0x44f91081,0x76280356,0x5f015504,0x3d9fcf61,0x632cd36e,0x1827edc8,
+ 0x18102336,0xa5e62e47 } },
+ /* 54 */
+ { { 0x2facd6c8,0x1a825ee3,0x54bcbc66,0x699c6354,0x98df9931,0x0ce3edf7,
+ 0x466a5adc,0x2c4768e6 },
+ { 0x90a64bc9,0xb346ff8c,0xe4779f5c,0x630a6020,0xbc05e884,0xd949d064,
+ 0xf9e652a0,0x7b5e6441 } },
+ /* 55 */
+ { { 0x1d28444a,0x2169422c,0xbe136a39,0xe996c5d8,0xfb0c7fce,0x2387afe5,
+ 0x0c8d744a,0xb8af73cb },
+ { 0x338b86fd,0x5fde83aa,0xa58a5cff,0xfee3f158,0x20ac9433,0xc9ee8f6f,
+ 0x7f3f0895,0xa036395f } },
+ /* 56 */
+ { { 0xa10f7770,0x8c73c6bb,0xa12a0e24,0xa6f16d81,0x51bc2b9f,0x100df682,
+ 0x875fb533,0x4be36b01 },
+ { 0x9fb56dbb,0x9226086e,0x07e7a4f8,0x306fef8b,0x66d52f20,0xeeaccc05,
+ 0x1bdc00c0,0x8cbc9a87 } },
+ /* 57 */
+ { { 0xc0dac4ab,0xe131895c,0x712ff112,0xa874a440,0x6a1cee57,0x6332ae7c,
+ 0x0c0835f8,0x44e7553e },
+ { 0x7734002d,0x6d503fff,0x0b34425c,0x9d35cb8b,0x0e8738b5,0x95f70276,
+ 0x5eb8fc18,0x470a683a } },
+ /* 58 */
+ { { 0x90513482,0x81b761dc,0x01e9276a,0x0287202a,0x0ce73083,0xcda441ee,
+ 0xc63dc6ef,0x16410690 },
+ { 0x6d06a2ed,0xf5034a06,0x189b100b,0xdd4d7745,0xab8218c9,0xd914ae72,
+ 0x7abcbb4f,0xd73479fd } },
+ /* 59 */
+ { { 0x5ad4c6e5,0x7edefb16,0x5b06d04d,0x262cf08f,0x8575cb14,0x12ed5bb1,
+ 0x0771666b,0x816469e3 },
+ { 0x561e291e,0xd7ab9d79,0xc1de1661,0xeb9daf22,0x135e0513,0xf49827eb,
+ 0xf0dd3f9c,0x0a36dd23 } },
+ /* 60 */
+ { { 0x41d5533c,0x098d32c7,0x8684628f,0x7c5f5a9e,0xe349bd11,0x39a228ad,
+ 0xfdbab118,0xe331dfd6 },
+ { 0x6bcc6ed8,0x5100ab68,0xef7a260e,0x7160c3bd,0xbce850d7,0x9063d9a7,
+ 0x492e3389,0xd3b4782a } },
+ /* 61 */
+ { { 0xf3821f90,0xa149b6e8,0x66eb7aad,0x92edd9ed,0x1a013116,0x0bb66953,
+ 0x4c86a5bd,0x7281275a },
+ { 0xd3ff47e5,0x503858f7,0x61016441,0x5e1616bc,0x7dfd9bb1,0x62b0f11a,
+ 0xce145059,0x2c062e7e } },
+ /* 62 */
+ { { 0x0159ac2e,0xa76f996f,0xcbdb2713,0x281e7736,0x08e46047,0x2ad6d288,
+ 0x2c4e7ef1,0x282a35f9 },
+ { 0xc0ce5cd2,0x9c354b1e,0x1379c229,0xcf99efc9,0x3e82c11e,0x992caf38,
+ 0x554d2abd,0xc71cd513 } },
+ /* 63 */
+ { { 0x09b578f4,0x4885de9c,0xe3affa7a,0x1884e258,0x59182f1f,0x8f76b1b7,
+ 0xcf47f3a3,0xc50f6740 },
+ { 0x374b68ea,0xa9c4adf3,0x69965fe2,0xa406f323,0x85a53050,0x2f86a222,
+ 0x212958dc,0xb9ecb3a7 } },
+ /* 64 */
+ { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,
+ 0xcd42ab1b,0x803f3e02 },
+ { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,
+ 0x5067adc1,0xc097440e } },
+ /* 65 */
+ { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459,
+ 0x915f1f30,0xf1af32d5 },
+ { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418,
+ 0xe2d41c8b,0x23d0f130 } },
+ /* 66 */
+ { { 0xf41500d9,0x857ab6ed,0xfcbeada8,0x0d890ae5,0x89725951,0x52fe8648,
+ 0xc0a3fadd,0xb0288dd6 },
+ { 0x650bcb08,0x85320f30,0x695d6e16,0x71af6313,0xb989aa76,0x31f520a7,
+ 0xf408c8d2,0xffd3724f } },
+ /* 67 */
+ { { 0xb458e6cb,0x53968e64,0x317a5d28,0x992dad20,0x7aa75f56,0x3814ae0b,
+ 0xd78c26df,0xf5590f4a },
+ { 0xcf0ba55a,0x0fc24bd3,0x0c778bae,0x0fc4724a,0x683b674a,0x1ce9864f,
+ 0xf6f74a20,0x18d6da54 } },
+ /* 68 */
+ { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926,
+ 0x7990216a,0x50bbb4d9 },
+ { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b,
+ 0x01fe49c3,0x2b100118 } },
+ /* 69 */
+ { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa,
+ 0x83fbae0c,0xdd558999 },
+ { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf,
+ 0x149d6041,0xe6e4c551 } },
+ /* 70 */
+ { { 0x1e9af288,0x55f655bb,0xf7ada931,0x647e1a64,0xcb2820e5,0x43697e4b,
+ 0x07ed56ff,0x51e00db1 },
+ { 0x771c327e,0x43d169b8,0x4a96c2ad,0x29cdb20b,0x3deb4779,0xc07d51f5,
+ 0x49829177,0xe22f4241 } },
+ /* 71 */
+ { { 0x635f1abb,0xcd45e8f4,0x68538874,0x7edc0cb5,0xb5a8034d,0xc9472c1f,
+ 0x52dc48c9,0xf709373d },
+ { 0xa8af30d6,0x401966bb,0xf137b69c,0x95bf5f4a,0x9361c47e,0x3966162a,
+ 0xe7275b11,0xbd52d288 } },
+ /* 72 */
+ { { 0x9c5fa877,0xab155c7a,0x7d3a3d48,0x17dad672,0x73d189d8,0x43f43f9e,
+ 0xc8aa77a6,0xa0d0f8e4 },
+ { 0xcc94f92d,0x0bbeafd8,0x0c4ddb3a,0xd818c8be,0xb82eba14,0x22cc65f8,
+ 0x946d6a00,0xa56c78c7 } },
+ /* 73 */
+ { { 0x0dd09529,0x2962391b,0x3daddfcf,0x803e0ea6,0x5b5bf481,0x2c77351f,
+ 0x731a367a,0xd8befdf8 },
+ { 0xfc0157f4,0xab919d42,0xfec8e650,0xf51caed7,0x02d48b0a,0xcdf9cb40,
+ 0xce9f6478,0x854a68a5 } },
+ /* 74 */
+ { { 0x63506ea5,0xdc35f67b,0xa4fe0d66,0x9286c489,0xfe95cd4d,0x3f101d3b,
+ 0x98846a95,0x5cacea0b },
+ { 0x9ceac44d,0xa90df60c,0x354d1c3a,0x3db29af4,0xad5dbabe,0x08dd3de8,
+ 0x35e4efa9,0xe4982d12 } },
+ /* 75 */
+ { { 0xc34cd55e,0x23104a22,0x2680d132,0x58695bb3,0x1fa1d943,0xfb345afa,
+ 0x16b20499,0x8046b7f6 },
+ { 0x38e7d098,0xb533581e,0xf46f0b70,0xd7f61e8d,0x44cb78c4,0x30dea9ea,
+ 0x9082af55,0xeb17ca7b } },
+ /* 76 */
+ { { 0x76a145b9,0x1751b598,0xc1bc71ec,0xa5cf6b0f,0x392715bb,0xd3e03565,
+ 0xfab5e131,0x097b00ba },
+ { 0x565f69e1,0xaa66c8e9,0xb5be5199,0x77e8f75a,0xda4fd984,0x6033ba11,
+ 0xafdbcc9e,0xf95c747b } },
+ /* 77 */
+ { { 0xbebae45e,0x558f01d3,0xc4bc6955,0xa8ebe9f0,0xdbc64fc6,0xaeb705b1,
+ 0x566ed837,0x3512601e },
+ { 0xfa1161cd,0x9336f1e1,0x4c65ef87,0x328ab8d5,0x724f21e5,0x4757eee2,
+ 0x6068ab6b,0x0ef97123 } },
+ /* 78 */
+ { { 0x54ca4226,0x02598cf7,0xf8642c8e,0x5eede138,0x468e1790,0x48963f74,
+ 0x3b4fbc95,0xfc16d933 },
+ { 0xe7c800ca,0xbe96fb31,0x2678adaa,0x13806331,0x6ff3e8b5,0x3d624497,
+ 0xb95d7a17,0x14ca4af1 } },
+ /* 79 */
+ { { 0xbd2f81d5,0x7a4771ba,0x01f7d196,0x1a5f9d69,0xcad9c907,0xd898bef7,
+ 0xf59c231d,0x4057b063 },
+ { 0x89c05c0a,0xbffd82fe,0x1dc0df85,0xe4911c6f,0xa35a16db,0x3befccae,
+ 0xf1330b13,0x1c3b5d64 } },
+ /* 80 */
+ { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07,
+ 0xdb7e63af,0xfad27148 },
+ { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875,
+ 0x9f0e1a84,0x77387de3 } },
+ /* 81 */
+ { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408,
+ 0xbef0c47e,0xb37b85c0 },
+ { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa,
+ 0xf9f628d5,0x9c135ac8 } },
+ /* 82 */
+ { { 0x84e35743,0x32aa3202,0x85a3cdef,0x320d6ab1,0x1df19819,0xb821b176,
+ 0xc433851f,0x5721361f },
+ { 0x71fc9168,0x1f0db36a,0x5e5c403c,0x5f98ba73,0x37bcd8f5,0xf64ca87e,
+ 0xe6bb11bd,0xdcbac3c9 } },
+ /* 83 */
+ { { 0x4518cbe2,0xf01d9968,0x9c9eb04e,0xd242fc18,0xe47feebf,0x727663c7,
+ 0x2d626862,0xb8c1c89e },
+ { 0xc8e1d569,0x51a58bdd,0xb7d88cd0,0x563809c8,0xf11f31eb,0x26c27fd9,
+ 0x2f9422d4,0x5d23bbda } },
+ /* 84 */
+ { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403,
+ 0x91ece900,0xc109f9cb },
+ { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d,
+ 0x2eee1ee1,0x9bc3344f } },
+ /* 85 */
+ { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665,
+ 0x5f1a4cc1,0x29591d52 },
+ { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496,
+ 0x18ef332c,0x6376551f } },
+ /* 86 */
+ { { 0x562976cc,0xbda5f14e,0x0ef12c38,0x22bca3e6,0x6cca9852,0xbbfa3064,
+ 0x08e2987a,0xbdb79dc8 },
+ { 0xcb06a772,0xfd2cb5c9,0xfe536dce,0x38f475aa,0x7c2b5db8,0xc2a3e022,
+ 0xadd3c14a,0x8ee86001 } },
+ /* 87 */
+ { { 0xa4ade873,0xcbe96981,0xc4fba48c,0x7ee9aa4d,0x5a054ba5,0x2cee2899,
+ 0x6f77aa4b,0x92e51d7a },
+ { 0x7190a34d,0x948bafa8,0xf6bd1ed1,0xd698f75b,0x0caf1144,0xd00ee6e3,
+ 0x0a56aaaa,0x5182f86f } },
+ /* 88 */
+ { { 0x7a4cc99c,0xfba6212c,0x3e6d9ca1,0xff609b68,0x5ac98c5a,0x5dbb27cb,
+ 0x4073a6f2,0x91dcab5d },
+ { 0x5f575a70,0x01b6cc3d,0x6f8d87fa,0x0cb36139,0x89981736,0x165d4e8c,
+ 0x97974f2b,0x17a0cedb } },
+ /* 89 */
+ { { 0x076c8d3a,0x38861e2a,0x210f924b,0x701aad39,0x13a835d9,0x94d0eae4,
+ 0x7f4cdf41,0x2e8ce36c },
+ { 0x037a862b,0x91273dab,0x60e4c8fa,0x01ba9bb7,0x33baf2dd,0xf9645388,
+ 0x34f668f3,0xf4ccc6cb } },
+ /* 90 */
+ { { 0xf1f79687,0x44ef525c,0x92efa815,0x7c595495,0xa5c78d29,0xe1231741,
+ 0x9a0df3c9,0xac0db488 },
+ { 0xdf01747f,0x86bfc711,0xef17df13,0x592b9358,0x5ccb6bb5,0xe5880e4f,
+ 0x94c974a2,0x95a64a61 } },
+ /* 91 */
+ { { 0xc15a4c93,0x72c1efda,0x82585141,0x40269b73,0x16cb0bad,0x6a8dfb1c,
+ 0x29210677,0x231e54ba },
+ { 0x8ae6d2dc,0xa70df917,0x39112918,0x4d6aa63f,0x5e5b7223,0xf627726b,
+ 0xd8a731e1,0xab0be032 } },
+ /* 92 */
+ { { 0x8d131f2d,0x097ad0e9,0x3b04f101,0x637f09e3,0xd5e9a748,0x1ac86196,
+ 0x2cf6a679,0xf1bcc880 },
+ { 0xe8daacb4,0x25c69140,0x60f65009,0x3c4e4055,0x477937a6,0x591cc8fc,
+ 0x5aebb271,0x85169469 } },
+ /* 93 */
+ { { 0xf1dcf593,0xde35c143,0xb018be3b,0x78202b29,0x9bdd9d3d,0xe9cdadc2,
+ 0xdaad55d8,0x8f67d9d2 },
+ { 0x7481ea5f,0x84111656,0xe34c590c,0xe7d2dde9,0x05053fa8,0xffdd43f4,
+ 0xc0728b5d,0xf84572b9 } },
+ /* 94 */
+ { { 0x97af71c9,0x5e1a7a71,0x7a736565,0xa1449444,0x0e1d5063,0xa1b4ae07,
+ 0x616b2c19,0xedee2710 },
+ { 0x11734121,0xb2f034f5,0x4a25e9f0,0x1cac6e55,0xa40c2ecf,0x8dc148f3,
+ 0x44ebd7f4,0x9fd27e9b } },
+ /* 95 */
+ { { 0xf6e2cb16,0x3cc7658a,0xfe5919b6,0xe3eb7d2c,0x168d5583,0x5a8c5816,
+ 0x958ff387,0xa40c2fb6 },
+ { 0xfedcc158,0x8c9ec560,0x55f23056,0x7ad804c6,0x9a307e12,0xd9396704,
+ 0x7dc6decf,0x99bc9bb8 } },
+ /* 96 */
+ { { 0x927dafc6,0x84a9521d,0x5c09cd19,0x52c1fb69,0xf9366dde,0x9d9581a0,
+ 0xa16d7e64,0x9abe210b },
+ { 0x48915220,0x480af84a,0x4dd816c6,0xfa73176a,0x1681ca5a,0xc7d53987,
+ 0x87f344b0,0x7881c257 } },
+ /* 97 */
+ { { 0xe0bcf3ff,0x93399b51,0x127f74f6,0x0d02cbc5,0xdd01d968,0x8fb465a2,
+ 0xa30e8940,0x15e6e319 },
+ { 0x3e0e05f4,0x646d6e0d,0x43588404,0xfad7bddc,0xc4f850d3,0xbe61c7d1,
+ 0x191172ce,0x0e55facf } },
+ /* 98 */
+ { { 0xf8787564,0x7e9d9806,0x31e85ce6,0x1a331721,0xb819e8d6,0x6b0158ca,
+ 0x6fe96577,0xd73d0976 },
+ { 0x1eb7206e,0x42483425,0xc618bb42,0xa519290f,0x5e30a520,0x5dcbb859,
+ 0x8f15a50b,0x9250a374 } },
+ /* 99 */
+ { { 0xbe577410,0xcaff08f8,0x5077a8c6,0xfd408a03,0xec0a63a4,0xf1f63289,
+ 0xc1cc8c0b,0x77414082 },
+ { 0xeb0991cd,0x05a40fa6,0x49fdc296,0xc1ca0866,0xb324fd40,0x3a68a3c7,
+ 0x12eb20b9,0x8cb04f4d } },
+ /* 100 */
+ { { 0x6906171c,0xb1c2d055,0xb0240c3f,0x9073e9cd,0xd8906841,0xdb8e6b4f,
+ 0x47123b51,0xe4e429ef },
+ { 0x38ec36f4,0x0b8dd53c,0xff4b6a27,0xf9d2dc01,0x879a9a48,0x5d066e07,
+ 0x3c6e6552,0x37bca2ff } },
+ /* 101 */
+ { { 0xdf562470,0x4cd2e3c7,0xc0964ac9,0x44f272a2,0x80c793be,0x7c6d5df9,
+ 0x3002b22a,0x59913edc },
+ { 0x5750592a,0x7a139a83,0xe783de02,0x99e01d80,0xea05d64f,0xcf8c0375,
+ 0xb013e226,0x43786e4a } },
+ /* 102 */
+ { { 0x9e56b5a6,0xff32b0ed,0xd9fc68f9,0x0750d9a6,0x597846a7,0xec15e845,
+ 0xb7e79e7a,0x8638ca98 },
+ { 0x0afc24b2,0x2f5ae096,0x4dace8f2,0x05398eaf,0xaecba78f,0x3b765dd0,
+ 0x7b3aa6f0,0x1ecdd36a } },
+ /* 103 */
+ { { 0x6c5ff2f3,0x5d3acd62,0x2873a978,0xa2d516c0,0xd2110d54,0xad94c9fa,
+ 0xd459f32d,0xd85d0f85 },
+ { 0x10b11da3,0x9f700b8d,0xa78318c4,0xd2c22c30,0x9208decd,0x556988f4,
+ 0xb4ed3c62,0xa04f19c3 } },
+ /* 104 */
+ { { 0xed7f93bd,0x087924c8,0x392f51f6,0xcb64ac5d,0x821b71af,0x7cae330a,
+ 0x5c0950b0,0x92b2eeea },
+ { 0x85b6e235,0x85ac4c94,0x2936c0f0,0xab2ca4a9,0xe0508891,0x80faa6b3,
+ 0x5834276c,0x1ee78221 } },
+ /* 105 */
+ { { 0xe63e79f7,0xa60a2e00,0xf399d906,0xf590e7b2,0x6607c09d,0x9021054a,
+ 0x57a6e150,0xf3f2ced8 },
+ { 0xf10d9b55,0x200510f3,0xd8642648,0x9d2fcfac,0xe8bd0e7c,0xe5631aa7,
+ 0x3da3e210,0x0f56a454 } },
+ /* 106 */
+ { { 0x1043e0df,0x5b21bffa,0x9c007e6d,0x6c74b6cc,0xd4a8517a,0x1a656ec0,
+ 0x1969e263,0xbd8f1741 },
+ { 0xbeb7494a,0x8a9bbb86,0x45f3b838,0x1567d46f,0xa4e5a79a,0xdf7a12a7,
+ 0x30ccfa09,0x2d1a1c35 } },
+ /* 107 */
+ { { 0x506508da,0x192e3813,0xa1d795a7,0x336180c4,0x7a9944b3,0xcddb5949,
+ 0xb91fba46,0xa107a65e },
+ { 0x0f94d639,0xe6d1d1c5,0x8a58b7d7,0x8b4af375,0xbd37ca1c,0x1a7c5584,
+ 0xf87a9af2,0x183d760a } },
+ /* 108 */
+ { { 0x0dde59a4,0x29d69711,0x0e8bef87,0xf1ad8d07,0x4f2ebe78,0x229b4963,
+ 0xc269d754,0x1d44179d },
+ { 0x8390d30e,0xb32dc0cf,0x0de8110c,0x0a3b2753,0x2bc0339a,0x31af1dc5,
+ 0x9606d262,0x771f9cc2 } },
+ /* 109 */
+ { { 0x85040739,0x99993e77,0x8026a939,0x44539db9,0xf5f8fc26,0xcf40f6f2,
+ 0x0362718e,0x64427a31 },
+ { 0x85428aa8,0x4f4f2d87,0xebfb49a8,0x7b7adc3f,0xf23d01ac,0x201b2c6d,
+ 0x6ae90d6d,0x49d9b749 } },
+ /* 110 */
+ { { 0x435d1099,0xcc78d8bc,0x8e8d1a08,0x2adbcd4e,0x2cb68a41,0x02c2e2a0,
+ 0x3f605445,0x9037d81b },
+ { 0x074c7b61,0x7cdbac27,0x57bfd72e,0xfe2031ab,0x596d5352,0x61ccec96,
+ 0x7cc0639c,0x08c3de6a } },
+ /* 111 */
+ { { 0xf6d552ab,0x20fdd020,0x05cd81f1,0x56baff98,0x91351291,0x06fb7c3e,
+ 0x45796b2f,0xc6909442 },
+ { 0x41231bd1,0x17b3ae9c,0x5cc58205,0x1eac6e87,0xf9d6a122,0x208837ab,
+ 0xcafe3ac0,0x3fa3db02 } },
+ /* 112 */
+ { { 0x05058880,0xd75a3e65,0x643943f2,0x7da365ef,0xfab24925,0x4147861c,
+ 0xfdb808ff,0xc5c4bdb0 },
+ { 0xb272b56b,0x73513e34,0x11b9043a,0xc8327e95,0xf8844969,0xfd8ce37d,
+ 0x46c2b6b5,0x2d56db94 } },
+ /* 113 */
+ { { 0xff46ac6b,0x2461782f,0x07a2e425,0xd19f7926,0x09a48de1,0xfafea3c4,
+ 0xe503ba42,0x0f56bd9d },
+ { 0x345cda49,0x137d4ed1,0x816f299d,0x821158fc,0xaeb43402,0xe7c6a54a,
+ 0x1173b5f1,0x4003bb9d } },
+ /* 114 */
+ { { 0xa0803387,0x3b8e8189,0x39cbd404,0xece115f5,0xd2877f21,0x4297208d,
+ 0xa07f2f9e,0x53765522 },
+ { 0xa8a4182d,0xa4980a21,0x3219df79,0xa2bbd07a,0x1a19a2d4,0x674d0a2e,
+ 0x6c5d4549,0x7a056f58 } },
+ /* 115 */
+ { { 0x9d8a2a47,0x646b2558,0xc3df2773,0x5b582948,0xabf0d539,0x51ec000e,
+ 0x7a1a2675,0x77d482f1 },
+ { 0x87853948,0xb8a1bd95,0x6cfbffee,0xa6f817bd,0x80681e47,0xab6ec057,
+ 0x2b38b0e4,0x4115012b } },
+ /* 116 */
+ { { 0x6de28ced,0x3c73f0f4,0x9b13ec47,0x1d5da760,0x6e5c6392,0x61b8ce9e,
+ 0xfbea0946,0xcdf04572 },
+ { 0x6c53c3b0,0x1cb3c58b,0x447b843c,0x97fe3c10,0x2cb9780e,0xfb2b8ae1,
+ 0x97383109,0xee703dda } },
+ /* 117 */
+ { { 0xff57e43a,0x34515140,0xb1b811b8,0xd44660d3,0x8f42b986,0x2b3b5dff,
+ 0xa162ce21,0x2a0ad89d },
+ { 0x6bc277ba,0x64e4a694,0xc141c276,0xc788c954,0xcabf6274,0x141aa64c,
+ 0xac2b4659,0xd62d0b67 } },
+ /* 118 */
+ { { 0x2c054ac4,0x39c5d87b,0xf27df788,0x57005859,0xb18128d6,0xedf7cbf3,
+ 0x991c2426,0xb39a23f2 },
+ { 0xf0b16ae5,0x95284a15,0xa136f51b,0x0c6a05b1,0xf2700783,0x1d63c137,
+ 0xc0674cc5,0x04ed0092 } },
+ /* 119 */
+ { { 0x9ae90393,0x1f4185d1,0x4a3d64e6,0x3047b429,0x9854fc14,0xae0001a6,
+ 0x0177c387,0xa0a91fc1 },
+ { 0xae2c831e,0xff0a3f01,0x2b727e16,0xbb76ae82,0x5a3075b4,0x8f12c8a1,
+ 0x9ed20c41,0x084cf988 } },
+ /* 120 */
+ { { 0xfca6becf,0xd98509de,0x7dffb328,0x2fceae80,0x4778e8b9,0x5d8a15c4,
+ 0x73abf77e,0xd57955b2 },
+ { 0x31b5d4f1,0x210da79e,0x3cfa7a1c,0xaa52f04b,0xdc27c20b,0xd4d12089,
+ 0x02d141f1,0x8e14ea42 } },
+ /* 121 */
+ { { 0xf2897042,0xeed50345,0x43402c4a,0x8d05331f,0xc8bdfb21,0xc8d9c194,
+ 0x2aa4d158,0x597e1a37 },
+ { 0xcf0bd68c,0x0327ec1a,0xab024945,0x6d4be0dc,0xc9fe3e84,0x5b9c8d7a,
+ 0x199b4dea,0xca3f0236 } },
+ /* 122 */
+ { { 0x6170bd20,0x592a10b5,0x6d3f5de7,0x0ea897f1,0x44b2ade2,0xa3363ff1,
+ 0x309c07e4,0xbde7fd7e },
+ { 0xb8f5432c,0x516bb6d2,0xe043444b,0x210dc1cb,0xf8f95b5a,0x3db01e6f,
+ 0x0a7dd198,0xb623ad0e } },
+ /* 123 */
+ { { 0x60c7b65b,0xa75bd675,0x23a4a289,0xab8c5590,0xd7b26795,0xf8220fd0,
+ 0x58ec137b,0xd6aa2e46 },
+ { 0x5138bb85,0x10abc00b,0xd833a95c,0x8c31d121,0x1702a32e,0xb24ff00b,
+ 0x2dcc513a,0x111662e0 } },
+ /* 124 */
+ { { 0xefb42b87,0x78114015,0x1b6c4dff,0xbd9f5d70,0xa7d7c129,0x66ecccd7,
+ 0x94b750f8,0xdb3ee1cb },
+ { 0xf34837cf,0xb26f3db0,0xb9578d4f,0xe7eed18b,0x7c56657d,0x5d2cdf93,
+ 0x52206a59,0x886a6442 } },
+ /* 125 */
+ { { 0x65b569ea,0x3c234cfb,0xf72119c1,0x20011141,0xa15a619e,0x8badc85d,
+ 0x018a17bc,0xa70cf4eb },
+ { 0x8c4a6a65,0x224f97ae,0x0134378f,0x36e5cf27,0x4f7e0960,0xbe3a609e,
+ 0xd1747b77,0xaa4772ab } },
+ /* 126 */
+ { { 0x7aa60cc0,0x67676131,0x0368115f,0xc7916361,0xbbc1bb5a,0xded98bb4,
+ 0x30faf974,0x611a6ddc },
+ { 0xc15ee47a,0x30e78cbc,0x4e0d96a5,0x2e896282,0x3dd9ed88,0x36f35adf,
+ 0x16429c88,0x5cfffaf8 } },
+ /* 127 */
+ { { 0x9b7a99cd,0xc0d54cff,0x843c45a1,0x7bf3b99d,0x62c739e1,0x038a908f,
+ 0x7dc1994c,0x6e5a6b23 },
+ { 0x0ba5db77,0xef8b454e,0xacf60d63,0xb7b8807f,0x76608378,0xe591c0c6,
+ 0x242dabcc,0x481a238d } },
+ /* 128 */
+ { { 0x35d0b34a,0xe3417bc0,0x8327c0a7,0x440b386b,0xac0362d1,0x8fb7262d,
+ 0xe0cdf943,0x2c41114c },
+ { 0xad95a0b1,0x2ba5cef1,0x67d54362,0xc09b37a8,0x01e486c9,0x26d6cdd2,
+ 0x42ff9297,0x20477abf } },
+ /* 129 */
+ { { 0x18d65dbf,0x2f75173c,0x339edad8,0x77bf940e,0xdcf1001c,0x7022d26b,
+ 0xc77396b6,0xac66409a },
+ { 0xc6261cc3,0x8b0bb36f,0x190e7e90,0x213f7bc9,0xa45e6c10,0x6541ceba,
+ 0xcc122f85,0xce8e6975 } },
+ /* 130 */
+ { { 0xbc0a67d2,0x0f121b41,0x444d248a,0x62d4760a,0x659b4737,0x0e044f1d,
+ 0x250bb4a8,0x08fde365 },
+ { 0x848bf287,0xaceec3da,0xd3369d6e,0xc2a62182,0x92449482,0x3582dfdc,
+ 0x565d6cd7,0x2f7e2fd2 } },
+ /* 131 */
+ { { 0xc3770fa7,0xae4b92db,0x379043f9,0x095e8d5c,0x17761171,0x54f34e9d,
+ 0x907702ae,0xc65be92e },
+ { 0xf6fd0a40,0x2758a303,0xbcce784b,0xe7d822e3,0x4f9767bf,0x7ae4f585,
+ 0xd1193b3a,0x4bff8e47 } },
+ /* 132 */
+ { { 0x00ff1480,0xcd41d21f,0x0754db16,0x2ab8fb7d,0xbbe0f3ea,0xac81d2ef,
+ 0x5772967d,0x3e4e4ae6 },
+ { 0x3c5303e6,0x7e18f36d,0x92262397,0x3bd9994b,0x1324c3c0,0x9ed70e26,
+ 0x58ec6028,0x5388aefd } },
+ /* 133 */
+ { { 0x5e5d7713,0xad1317eb,0x75de49da,0x09b985ee,0xc74fb261,0x32f5bc4f,
+ 0x4f75be0e,0x5cf908d1 },
+ { 0x8e657b12,0x76043510,0xb96ed9e6,0xbfd421a5,0x8970ccc2,0x0e29f51f,
+ 0x60f00ce2,0xa698ba40 } },
+ /* 134 */
+ { { 0xef748fec,0x73db1686,0x7e9d2cf9,0xe6e755a2,0xce265eff,0x630b6544,
+ 0x7aebad8d,0xb142ef8a },
+ { 0x17d5770a,0xad31af9f,0x2cb3412f,0x66af3b67,0xdf3359de,0x6bd60d1b,
+ 0x58515075,0xd1896a96 } },
+ /* 135 */
+ { { 0x33c41c08,0xec5957ab,0x5468e2e1,0x87de94ac,0xac472f6c,0x18816b73,
+ 0x7981da39,0x267b0e0b },
+ { 0x8e62b988,0x6e554e5d,0x116d21e7,0xd8ddc755,0x3d2a6f99,0x4610faf0,
+ 0xa1119393,0xb54e287a } },
+ /* 136 */
+ { { 0x178a876b,0x0a0122b5,0x085104b4,0x51ff96ff,0x14f29f76,0x050b31ab,
+ 0x5f87d4e6,0x84abb28b },
+ { 0x8270790a,0xd5ed439f,0x85e3f46b,0x2d6cb59d,0x6c1e2212,0x75f55c1b,
+ 0x17655640,0xe5436f67 } },
+ /* 137 */
+ { { 0x2286e8d5,0x53f9025e,0x864453be,0x353c95b4,0xe408e3a0,0xd832f5bd,
+ 0x5b9ce99e,0x0404f68b },
+ { 0xa781e8e5,0xcad33bde,0x163c2f5b,0x3cdf5018,0x0119caa3,0x57576960,
+ 0x0ac1c701,0x3a4263df } },
+ /* 138 */
+ { { 0x9aeb596d,0xc2965ecc,0x023c92b4,0x01ea03e7,0x2e013961,0x4704b4b6,
+ 0x905ea367,0x0ca8fd3f },
+ { 0x551b2b61,0x92523a42,0x390fcd06,0x1eb7a89c,0x0392a63e,0xe7f1d2be,
+ 0x4ddb0c33,0x96dca264 } },
+ /* 139 */
+ { { 0x387510af,0x203bb43a,0xa9a36a01,0x846feaa8,0x2f950378,0xd23a5770,
+ 0x3aad59dc,0x4363e212 },
+ { 0x40246a47,0xca43a1c7,0xe55dd24d,0xb362b8d2,0x5d8faf96,0xf9b08604,
+ 0xd8bb98c4,0x840e115c } },
+ /* 140 */
+ { { 0x1023e8a7,0xf12205e2,0xd8dc7a0b,0xc808a8cd,0x163a5ddf,0xe292a272,
+ 0x30ded6d4,0x5e0d6abd },
+ { 0x7cfc0f64,0x07a721c2,0x0e55ed88,0x42eec01d,0x1d1f9db2,0x26a7bef9,
+ 0x2945a25a,0x7dea48f4 } },
+ /* 141 */
+ { { 0xe5060a81,0xabdf6f1c,0xf8f95615,0xe79f9c72,0x06ac268b,0xcfd36c54,
+ 0xebfd16d1,0xabc2a2be },
+ { 0xd3e2eac7,0x8ac66f91,0xd2dd0466,0x6f10ba63,0x0282d31b,0x6790e377,
+ 0x6c7eefc1,0x4ea35394 } },
+ /* 142 */
+ { { 0x5266309d,0xed8a2f8d,0x81945a3e,0x0a51c6c0,0x578c5dc1,0xcecaf45a,
+ 0x1c94ffc3,0x3a76e689 },
+ { 0x7d7b0d0f,0x9aace8a4,0x8f584a5f,0x963ace96,0x4e697fbe,0x51a30c72,
+ 0x465e6464,0x8212a10a } },
+ /* 143 */
+ { { 0xcfab8caa,0xef7c61c3,0x0e142390,0x18eb8e84,0x7e9733ca,0xcd1dff67,
+ 0x599cb164,0xaa7cab71 },
+ { 0xbc837bd1,0x02fc9273,0xc36af5d7,0xc06407d0,0xf423da49,0x17621292,
+ 0xfe0617c3,0x40e38073 } },
+ /* 144 */
+ { { 0xa7bf9b7c,0xf4f80824,0x3fbe30d0,0x365d2320,0x97cf9ce3,0xbfbe5320,
+ 0xb3055526,0xe3604700 },
+ { 0x6cc6c2c7,0x4dcb9911,0xba4cbee6,0x72683708,0x637ad9ec,0xdcded434,
+ 0xa3dee15f,0x6542d677 } },
+ /* 145 */
+ { { 0x7b6c377a,0x3f32b6d0,0x903448be,0x6cb03847,0x20da8af7,0xd6fdd3a8,
+ 0x09bb6f21,0xa6534aee },
+ { 0x1035facf,0x30a1780d,0x9dcb47e6,0x35e55a33,0xc447f393,0x6ea50fe1,
+ 0xdc9aef22,0xf3cb672f } },
+ /* 146 */
+ { { 0x3b55fd83,0xeb3719fe,0x875ddd10,0xe0d7a46c,0x05cea784,0x33ac9fa9,
+ 0xaae870e7,0x7cafaa2e },
+ { 0x1d53b338,0x9b814d04,0xef87e6c6,0xe0acc0a0,0x11672b0f,0xfb93d108,
+ 0xb9bd522e,0x0aab13c1 } },
+ /* 147 */
+ { { 0xd2681297,0xddcce278,0xb509546a,0xcb350eb1,0x7661aaf2,0x2dc43173,
+ 0x847012e9,0x4b91a602 },
+ { 0x72f8ddcf,0xdcff1095,0x9a911af4,0x08ebf61e,0xc372430e,0x48f4360a,
+ 0x72321cab,0x49534c53 } },
+ /* 148 */
+ { { 0xf07b7e9d,0x83df7d71,0x13cd516f,0xa478efa3,0x6c047ee3,0x78ef264b,
+ 0xd65ac5ee,0xcaf46c4f },
+ { 0x92aa8266,0xa04d0c77,0x913684bb,0xedf45466,0xae4b16b0,0x56e65168,
+ 0x04c6770f,0x14ce9e57 } },
+ /* 149 */
+ { { 0x965e8f91,0x99445e3e,0xcb0f2492,0xd3aca1ba,0x90c8a0a0,0xd31cc70f,
+ 0x3e4c9a71,0x1bb708a5 },
+ { 0x558bdd7a,0xd5ca9e69,0x018a26b1,0x734a0508,0x4c9cf1ec,0xb093aa71,
+ 0xda300102,0xf9d126f2 } },
+ /* 150 */
+ { { 0xaff9563e,0x749bca7a,0xb49914a0,0xdd077afe,0xbf5f1671,0xe27a0311,
+ 0x729ecc69,0x807afcb9 },
+ { 0xc9b08b77,0x7f8a9337,0x443c7e38,0x86c3a785,0x476fd8ba,0x85fafa59,
+ 0x6568cd8c,0x751adcd1 } },
+ /* 151 */
+ { { 0x10715c0d,0x8aea38b4,0x8f7697f7,0xd113ea71,0x93fbf06d,0x665eab14,
+ 0x2537743f,0x29ec4468 },
+ { 0xb50bebbc,0x3d94719c,0xe4505422,0x399ee5bf,0x8d2dedb1,0x90cd5b3a,
+ 0x92a4077d,0xff9370e3 } },
+ /* 152 */
+ { { 0xc6b75b65,0x59a2d69b,0x266651c5,0x4188f8d5,0x3de9d7d2,0x28a9f33e,
+ 0xa2a9d01a,0x9776478b },
+ { 0x929af2c7,0x8852622d,0x4e690923,0x334f5d6d,0xa89a51e9,0xce6cc7e5,
+ 0xac2f82fa,0x74a6313f } },
+ /* 153 */
+ { { 0xb75f079c,0xb2f4dfdd,0x18e36fbb,0x85b07c95,0xe7cd36dd,0x1b6cfcf0,
+ 0x0ff4863d,0xab75be15 },
+ { 0x173fc9b7,0x81b367c0,0xd2594fd0,0xb90a7420,0xc4091236,0x15fdbf03,
+ 0x0b4459f6,0x4ebeac2e } },
+ /* 154 */
+ { { 0x5c9f2c53,0xeb6c5fe7,0x8eae9411,0xd2522011,0xf95ac5d8,0xc8887633,
+ 0x2c1baffc,0xdf99887b },
+ { 0x850aaecb,0xbb78eed2,0x01d6a272,0x9d49181b,0xb1cdbcac,0x978dd511,
+ 0x779f4058,0x27b040a7 } },
+ /* 155 */
+ { { 0xf73b2eb2,0x90405db7,0x8e1b2118,0xe0df8508,0x5962327e,0x501b7152,
+ 0xe4cfa3f5,0xb393dd37 },
+ { 0x3fd75165,0xa1230e7b,0xbcd33554,0xd66344c2,0x0f7b5022,0x6c36f1be,
+ 0xd0463419,0x09588c12 } },
+ /* 156 */
+ { { 0x02601c3b,0xe086093f,0xcf5c335f,0xfb0252f8,0x894aff28,0x955cf280,
+ 0xdb9f648b,0x81c879a9 },
+ { 0xc6f56c51,0x040e687c,0x3f17618c,0xfed47169,0x9059353b,0x44f88a41,
+ 0x5fc11bc4,0xfa0d48f5 } },
+ /* 157 */
+ { { 0xe1608e4d,0xbc6e1c9d,0x3582822c,0x010dda11,0x157ec2d7,0xf6b7ddc1,
+ 0xb6a367d6,0x8ea0e156 },
+ { 0x2383b3b4,0xa354e02f,0x3f01f53c,0x69966b94,0x2de03ca5,0x4ff6632b,
+ 0xfa00b5ac,0x3f5ab924 } },
+ /* 158 */
+ { { 0x59739efb,0x337bb0d9,0xe7ebec0d,0xc751b0f4,0x411a67d1,0x2da52dd6,
+ 0x2b74256e,0x8bc76887 },
+ { 0x82d3d253,0xa5be3b72,0xf58d779f,0xa9f679a1,0xe16767bb,0xa1cac168,
+ 0x60fcf34f,0xb386f190 } },
+ /* 159 */
+ { { 0x2fedcfc2,0x31f3c135,0x62f8af0d,0x5396bf62,0xe57288c2,0x9a02b4ea,
+ 0x1b069c4d,0x4cb460f7 },
+ { 0x5b8095ea,0xae67b4d3,0x6fc07603,0x92bbf859,0xb614a165,0xe1475f66,
+ 0x95ef5223,0x52c0d508 } },
+ /* 160 */
+ { { 0x15339848,0x231c210e,0x70778c8d,0xe87a28e8,0x6956e170,0x9d1de661,
+ 0x2bb09c0b,0x4ac3c938 },
+ { 0x6998987d,0x19be0551,0xae09f4d6,0x8b2376c4,0x1a3f933d,0x1de0b765,
+ 0xe39705f4,0x380d94c7 } },
+ /* 161 */
+ { { 0x81542e75,0x01a355aa,0xee01b9b7,0x96c724a1,0x624d7087,0x6b3a2977,
+ 0xde2637af,0x2ce3e171 },
+ { 0xf5d5bc1a,0xcfefeb49,0x2777e2b5,0xa655607e,0x9513756c,0x4feaac2f,
+ 0x0b624e4d,0x2e6cd852 } },
+ /* 162 */
+ { { 0x8c31c31d,0x3685954b,0x5bf21a0c,0x68533d00,0x75c79ec9,0x0bd7626e,
+ 0x42c69d54,0xca177547 },
+ { 0xf6d2dbb2,0xcc6edaff,0x174a9d18,0xfd0d8cbd,0xaa4578e8,0x875e8793,
+ 0x9cab2ce6,0xa976a713 } },
+ /* 163 */
+ { { 0x93fb353d,0x0a651f1b,0x57fcfa72,0xd75cab8b,0x31b15281,0xaa88cfa7,
+ 0x0a1f4999,0x8720a717 },
+ { 0x693e1b90,0x8c3e8d37,0x16f6dfc3,0xd345dc0b,0xb52a8742,0x8ea8d00a,
+ 0xc769893c,0x9719ef29 } },
+ /* 164 */
+ { { 0x58e35909,0x820eed8d,0x33ddc116,0x9366d8dc,0x6e205026,0xd7f999d0,
+ 0xe15704c1,0xa5072976 },
+ { 0xc4e70b2e,0x002a37ea,0x6890aa8a,0x84dcf657,0x645b2a5c,0xcd71bf18,
+ 0xf7b77725,0x99389c9d } },
+ /* 165 */
+ { { 0x7ada7a4b,0x238c08f2,0xfd389366,0x3abe9d03,0x766f512c,0x6b672e89,
+ 0x202c82e4,0xa88806aa },
+ { 0xd380184e,0x6602044a,0x126a8b85,0xa8cb78c4,0xad844f17,0x79d670c0,
+ 0x4738dcfe,0x0043bffb } },
+ /* 166 */
+ { { 0x36d5192e,0x8d59b5dc,0x4590b2af,0xacf885d3,0x11601781,0x83566d0a,
+ 0xba6c4866,0x52f3ef01 },
+ { 0x0edcb64d,0x3986732a,0x8068379f,0x0a482c23,0x7040f309,0x16cbe5fa,
+ 0x9ef27e75,0x3296bd89 } },
+ /* 167 */
+ { { 0x454d81d7,0x476aba89,0x51eb9b3c,0x9eade7ef,0x81c57986,0x619a21cd,
+ 0xaee571e9,0x3b90febf },
+ { 0x5496f7cb,0x9393023e,0x7fb51bc4,0x55be41d8,0x99beb5ce,0x03f1dd48,
+ 0x9f810b18,0x6e88069d } },
+ /* 168 */
+ { { 0xb43ea1db,0xce37ab11,0x5259d292,0x0a7ff1a9,0x8f84f186,0x851b0221,
+ 0xdefaad13,0xa7222bea },
+ { 0x2b0a9144,0xa2ac78ec,0xf2fa59c5,0x5a024051,0x6147ce38,0x91d1eca5,
+ 0xbc2ac690,0xbe94d523 } },
+ /* 169 */
+ { { 0x0b226ce7,0x72f4945e,0x967e8b70,0xb8afd747,0x85a6c63e,0xedea46f1,
+ 0x9be8c766,0x7782defe },
+ { 0x3db38626,0x760d2aa4,0x76f67ad1,0x460ae787,0x54499cdb,0x341b86fc,
+ 0xa2892e4b,0x03838567 } },
+ /* 170 */
+ { { 0x79ec1a0f,0x2d8daefd,0xceb39c97,0x3bbcd6fd,0x58f61a95,0xf5575ffc,
+ 0xadf7b420,0xdbd986c4 },
+ { 0x15f39eb7,0x81aa8814,0xb98d976c,0x6ee2fcf5,0xcf2f717d,0x5465475d,
+ 0x6860bbd0,0x8e24d3c4 } },
+ /* 171 */
+ { { 0x9a587390,0x749d8e54,0x0cbec588,0x12bb194f,0xb25983c6,0x46e07da4,
+ 0x407bafc8,0x541a99c4 },
+ { 0x624c8842,0xdb241692,0xd86c05ff,0x6044c12a,0x4f7fcf62,0xc59d14b4,
+ 0xf57d35d1,0xc0092c49 } },
+ /* 172 */
+ { { 0xdf2e61ef,0xd3cc75c3,0x2e1b35ca,0x7e8841c8,0x909f29f4,0xc62d30d1,
+ 0x7286944d,0x75e40634 },
+ { 0xbbc237d0,0xe7d41fc5,0xec4f01c9,0xc9537bf0,0x282bd534,0x91c51a16,
+ 0xc7848586,0x5b7cb658 } },
+ /* 173 */
+ { { 0x8a28ead1,0x964a7084,0xfd3b47f6,0x802dc508,0x767e5b39,0x9ae4bfd1,
+ 0x8df097a1,0x7ae13eba },
+ { 0xeadd384e,0xfd216ef8,0xb6b2ff06,0x0361a2d9,0x4bcdb5f3,0x204b9878,
+ 0xe2a8e3fd,0x787d8074 } },
+ /* 174 */
+ { { 0x757fbb1c,0xc5e25d6b,0xca201deb,0xe47bddb2,0x6d2233ff,0x4a55e9a3,
+ 0x9ef28484,0x5c222819 },
+ { 0x88315250,0x773d4a85,0x827097c1,0x21b21a2b,0xdef5d33f,0xab7c4ea1,
+ 0xbaf0f2b0,0xe45d37ab } },
+ /* 175 */
+ { { 0x28511c8a,0xd2df1e34,0xbdca6cd3,0xebb229c8,0x627c39a7,0x578a71a7,
+ 0x84dfb9d3,0xed7bc122 },
+ { 0x93dea561,0xcf22a6df,0xd48f0ed1,0x5443f18d,0x5bad23e8,0xd8b86140,
+ 0x45ca6d27,0xaac97cc9 } },
+ /* 176 */
+ { { 0xa16bd00a,0xeb54ea74,0xf5c0bcc1,0xd839e9ad,0x1f9bfc06,0x092bb7f1,
+ 0x1163dc4e,0x318f97b3 },
+ { 0xc30d7138,0xecc0c5be,0xabc30220,0x44e8df23,0xb0223606,0x2bb7972f,
+ 0x9a84ff4d,0xfa41faa1 } },
+ /* 177 */
+ { { 0xa6642269,0x4402d974,0x9bb783bd,0xc81814ce,0x7941e60b,0x398d38e4,
+ 0x1d26e9e2,0x38bb6b2c },
+ { 0x6a577f87,0xc64e4a25,0xdc11fe1c,0x8b52d253,0x62280728,0xff336abf,
+ 0xce7601a5,0x94dd0905 } },
+ /* 178 */
+ { { 0xde93f92a,0x156cf7dc,0x89b5f315,0xa01333cb,0xc995e750,0x02404df9,
+ 0xd25c2ae9,0x92077867 },
+ { 0x0bf39d44,0xe2471e01,0x96bb53d7,0x5f2c9020,0x5c9c3d8f,0x4c44b7b3,
+ 0xd29beb51,0x81e8428b } },
+ /* 179 */
+ { { 0xc477199f,0x6dd9c2ba,0x6b5ecdd9,0x8cb8eeee,0xee40fd0e,0x8af7db3f,
+ 0xdbbfa4b1,0x1b94ab62 },
+ { 0xce47f143,0x44f0d8b3,0x63f46163,0x51e623fc,0xcc599383,0xf18f270f,
+ 0x055590ee,0x06a38e28 } },
+ /* 180 */
+ { { 0xb3355b49,0x2e5b0139,0xb4ebf99b,0x20e26560,0xd269f3dc,0xc08ffa6b,
+ 0x83d9d4f8,0xa7b36c20 },
+ { 0x1b3e8830,0x64d15c3a,0xa89f9c0b,0xd5fceae1,0xe2d16930,0xcfeee4a2,
+ 0xa2822a20,0xbe54c6b4 } },
+ /* 181 */
+ { { 0x8d91167c,0xd6cdb3df,0xe7a6625e,0x517c3f79,0x346ac7f4,0x7105648f,
+ 0xeae022bb,0xbf30a5ab },
+ { 0x93828a68,0x8e7785be,0x7f3ef036,0x5161c332,0x592146b2,0xe11b5feb,
+ 0x2732d13a,0xd1c820de } },
+ /* 182 */
+ { { 0x9038b363,0x043e1347,0x6b05e519,0x58c11f54,0x6026cad1,0x4fe57abe,
+ 0x68a18da3,0xb7d17bed },
+ { 0xe29c2559,0x44ca5891,0x5bfffd84,0x4f7a0376,0x74e46948,0x498de4af,
+ 0x6412cc64,0x3997fd5e } },
+ /* 183 */
+ { { 0x8bd61507,0xf2074682,0x34a64d2a,0x29e132d5,0x8a8a15e3,0xffeddfb0,
+ 0x3c6c13e8,0x0eeb8929 },
+ { 0xa7e259f8,0xe9b69a3e,0xd13e7e67,0xce1db7e6,0xad1fa685,0x277318f6,
+ 0xc922b6ef,0x228916f8 } },
+ /* 184 */
+ { { 0x0a12ab5b,0x959ae25b,0x957bc136,0xcc11171f,0xd16e2b0c,0x8058429e,
+ 0x6e93097e,0xec05ad1d },
+ { 0xac3f3708,0x157ba5be,0x30b59d77,0x31baf935,0x118234e5,0x47b55237,
+ 0x7ff11b37,0x7d314156 } },
+ /* 185 */
+ { { 0xf6dfefab,0x7bd9c05c,0xdcb37707,0xbe2f2268,0x3a38bb95,0xe53ead97,
+ 0x9bc1d7a3,0xe9ce66fc },
+ { 0x6f6a02a1,0x75aa1576,0x60e600ed,0x38c087df,0x68cdc1b9,0xf8947f34,
+ 0x72280651,0xd9650b01 } },
+ /* 186 */
+ { { 0x5a057e60,0x504b4c4a,0x8def25e4,0xcbccc3be,0x17c1ccbd,0xa6353208,
+ 0x804eb7a2,0x14d6699a },
+ { 0xdb1f411a,0x2c8a8415,0xf80d769c,0x09fbaf0b,0x1c2f77ad,0xb4deef90,
+ 0x0d43598a,0x6f4c6841 } },
+ /* 187 */
+ { { 0x96c24a96,0x8726df4e,0xfcbd99a3,0x534dbc85,0x8b2ae30a,0x3c466ef2,
+ 0x61189abb,0x4c4350fd },
+ { 0xf855b8da,0x2967f716,0x463c38a1,0x41a42394,0xeae93343,0xc37e1413,
+ 0x5a3118b5,0xa726d242 } },
+ /* 188 */
+ { { 0x948c1086,0xdae6b3ee,0xcbd3a2e1,0xf1de503d,0x03d022f3,0x3f35ed3f,
+ 0xcc6cf392,0x13639e82 },
+ { 0xcdafaa86,0x9ac938fb,0x2654a258,0xf45bc5fb,0x45051329,0x1963b26e,
+ 0xc1a335a3,0xca9365e1 } },
+ /* 189 */
+ { { 0x4c3b2d20,0x3615ac75,0x904e241b,0x742a5417,0xcc9d071d,0xb08521c4,
+ 0x970b72a5,0x9ce29c34 },
+ { 0x6d3e0ad6,0x8cc81f73,0xf2f8434c,0x8060da9e,0x6ce862d9,0x35ed1d1a,
+ 0xab42af98,0x48c4abd7 } },
+ /* 190 */
+ { { 0x40c7485a,0xd221b0cc,0xe5274dbf,0xead455bb,0x9263d2e8,0x493c7698,
+ 0xf67b33cb,0x78017c32 },
+ { 0x930cb5ee,0xb9d35769,0x0c408ed2,0xc0d14e94,0x272f1a4d,0xf8b7bf55,
+ 0xde5c1c04,0x53cd0454 } },
+ /* 191 */
+ { { 0x5d28ccac,0xbcd585fa,0x005b746e,0x5f823e56,0xcd0123aa,0x7c79f0a1,
+ 0xd3d7fa8f,0xeea465c1 },
+ { 0x0551803b,0x7810659f,0x7ce6af70,0x6c0b599f,0x29288e70,0x4195a770,
+ 0x7ae69193,0x1b6e42a4 } },
+ /* 192 */
+ { { 0xf67d04c3,0x2e80937c,0x89eeb811,0x1e312be2,0x92594d60,0x56b5d887,
+ 0x187fbd3d,0x0224da14 },
+ { 0x0c5fe36f,0x87abb863,0x4ef51f5f,0x580f3c60,0xb3b429ec,0x964fb1bf,
+ 0x42bfff33,0x60838ef0 } },
+ /* 193 */
+ { { 0x7e0bbe99,0x432cb2f2,0x04aa39ee,0x7bda44f3,0x9fa93903,0x5f497c7a,
+ 0x2d331643,0x636eb202 },
+ { 0x93ae00aa,0xfcfd0e61,0x31ae6d2f,0x875a00fe,0x9f93901c,0xf43658a2,
+ 0x39218bac,0x8844eeb6 } },
+ /* 194 */
+ { { 0x6b3bae58,0x114171d2,0x17e39f3e,0x7db3df71,0x81a8eada,0xcd37bc7f,
+ 0x51fb789e,0x27ba83dc },
+ { 0xfbf54de5,0xa7df439f,0xb5fe1a71,0x7277030b,0xdb297a48,0x42ee8e35,
+ 0x87f3a4ab,0xadb62d34 } },
+ /* 195 */
+ { { 0xa175df2a,0x9b1168a2,0x618c32e9,0x082aa04f,0x146b0916,0xc9e4f2e7,
+ 0x75e7c8b2,0xb990fd76 },
+ { 0x4df37313,0x0829d96b,0xd0b40789,0x1c205579,0x78087711,0x66c9ae4a,
+ 0x4d10d18d,0x81707ef9 } },
+ /* 196 */
+ { { 0x03d6ff96,0x97d7cab2,0x0d843360,0x5b851bfc,0xd042db4b,0x268823c4,
+ 0xd5a8aa5c,0x3792daea },
+ { 0x941afa0b,0x52818865,0x42d83671,0xf3e9e741,0x5be4e0a7,0x17c82527,
+ 0x94b001ba,0x5abd635e } },
+ /* 197 */
+ { { 0x0ac4927c,0x727fa84e,0xa7c8cf23,0xe3886035,0x4adca0df,0xa4bcd5ea,
+ 0x846ab610,0x5995bf21 },
+ { 0x829dfa33,0xe90f860b,0x958fc18b,0xcaafe2ae,0x78630366,0x9b3baf44,
+ 0xd483411e,0x44c32ca2 } },
+ /* 198 */
+ { { 0xe40ed80c,0xa74a97f1,0x31d2ca82,0x5f938cb1,0x7c2d6ad9,0x53f2124b,
+ 0x8082a54c,0x1f2162fb },
+ { 0x720b173e,0x7e467cc5,0x085f12f9,0x40e8a666,0x4c9d65dc,0x8cebc20e,
+ 0xc3e907c9,0x8f1d402b } },
+ /* 199 */
+ { { 0xfbc4058a,0x4f592f9c,0x292f5670,0xb15e14b6,0xbc1d8c57,0xc55cfe37,
+ 0x926edbf9,0xb1980f43 },
+ { 0x32c76b09,0x98c33e09,0x33b07f78,0x1df5279d,0x863bb461,0x6f08ead4,
+ 0x37448e45,0x2828ad9b } },
+ /* 200 */
+ { { 0xc4cf4ac5,0x696722c4,0xdde64afb,0xf5ac1a3f,0xe0890832,0x0551baa2,
+ 0x5a14b390,0x4973f127 },
+ { 0x322eac5d,0xe59d8335,0x0bd9b568,0x5e07eef5,0xa2588393,0xab36720f,
+ 0xdb168ac7,0x6dac8ed0 } },
+ /* 201 */
+ { { 0xeda835ef,0xf7b545ae,0x1d10ed51,0x4aa113d2,0x13741b09,0x035a65e0,
+ 0x20b9de4c,0x4b23ef59 },
+ { 0x3c4c7341,0xe82bb680,0x3f58bc37,0xd457706d,0xa51e3ee8,0x73527863,
+ 0xddf49a4e,0x4dd71534 } },
+ /* 202 */
+ { { 0x95476cd9,0xbf944672,0xe31a725b,0x648d072f,0xfc4b67e0,0x1441c8b8,
+ 0x2f4a4dbb,0xfd317000 },
+ { 0x8995d0e1,0x1cb43ff4,0x0ef729aa,0x76e695d1,0x41798982,0xe0d5f976,
+ 0x9569f365,0x14fac58c } },
+ /* 203 */
+ { { 0xf312ae18,0xad9a0065,0xfcc93fc9,0x51958dc0,0x8a7d2846,0xd9a14240,
+ 0x36abda50,0xed7c7651 },
+ { 0x25d4abbc,0x46270f1a,0xf1a113ea,0x9b5dd8f3,0x5b51952f,0xc609b075,
+ 0x4d2e9f53,0xfefcb7f7 } },
+ /* 204 */
+ { { 0xba119185,0xbd09497a,0xaac45ba4,0xd54e8c30,0xaa521179,0x492479de,
+ 0x87e0d80b,0x1801a57e },
+ { 0xfcafffb0,0x073d3f8d,0xae255240,0x6cf33c0b,0x5b5fdfbc,0x781d763b,
+ 0x1ead1064,0x9f8fc11e } },
+ /* 205 */
+ { { 0x5e69544c,0x1583a171,0xf04b7813,0x0eaf8567,0x278a4c32,0x1e22a8fd,
+ 0x3d3a69a9,0xa9d3809d },
+ { 0x59a2da3b,0x936c2c2c,0x1895c847,0x38ccbcf6,0x63d50869,0x5e65244e,
+ 0xe1178ef7,0x3006b9ae } },
+ /* 206 */
+ { { 0xc9eead28,0x0bb1f2b0,0x89f4dfbc,0x7eef635d,0xb2ce8939,0x074757fd,
+ 0x45f8f761,0x0ab85fd7 },
+ { 0x3e5b4549,0xecda7c93,0x97922f21,0x4be2bb5c,0xb43b8040,0x261a1274,
+ 0x11e942c2,0xb122d675 } },
+ /* 207 */
+ { { 0x66a5ae7a,0x3be607be,0x76adcbe3,0x01e703fa,0x4eb6e5c5,0xaf904301,
+ 0x097dbaec,0x9f599dc1 },
+ { 0x0ff250ed,0x6d75b718,0x349a20dc,0x8eb91574,0x10b227a3,0x425605a4,
+ 0x8a294b78,0x7d5528e0 } },
+ /* 208 */
+ { { 0x20c26def,0xf0f58f66,0x582b2d1e,0x025585ea,0x01ce3881,0xfbe7d79b,
+ 0x303f1730,0x28ccea01 },
+ { 0x79644ba5,0xd1dabcd1,0x06fff0b8,0x1fc643e8,0x66b3e17b,0xa60a76fc,
+ 0xa1d013bf,0xc18baf48 } },
+ /* 209 */
+ { { 0x5dc4216d,0x34e638c8,0x206142ac,0x00c01067,0x95f5064a,0xd453a171,
+ 0xb7a9596b,0x9def809d },
+ { 0x67ab8d2c,0x41e8642e,0x6237a2b6,0xb4240433,0x64c4218b,0x7d506a6d,
+ 0x68808ce5,0x0357f8b0 } },
+ /* 210 */
+ { { 0x4cd2cc88,0x8e9dbe64,0xf0b8f39d,0xcc61c28d,0xcd30a0c8,0x4a309874,
+ 0x1b489887,0xe4a01add },
+ { 0xf57cd8f9,0x2ed1eeac,0xbd594c48,0x1b767d3e,0x7bd2f787,0xa7295c71,
+ 0xce10cc30,0x466d7d79 } },
+ /* 211 */
+ { { 0x9dada2c7,0x47d31892,0x8f9aa27d,0x4fa0a6c3,0x820a59e1,0x90e4fd28,
+ 0x451ead1a,0xc672a522 },
+ { 0x5d86b655,0x30607cc8,0xf9ad4af1,0xf0235d3b,0x571172a6,0x99a08680,
+ 0xf2a67513,0x5e3d64fa } },
+ /* 212 */
+ { { 0x9b3b4416,0xaa6410c7,0xeab26d99,0xcd8fcf85,0xdb656a74,0x5ebff74a,
+ 0xeb8e42fc,0x6c8a7a95 },
+ { 0xb02a63bd,0x10c60ba7,0x8b8f0047,0x6b2f2303,0x312d90b0,0x8c6c3738,
+ 0xad82ca91,0x348ae422 } },
+ /* 213 */
+ { { 0x5ccda2fb,0x7f474663,0x8e0726d2,0x22accaa1,0x492b1f20,0x85adf782,
+ 0xd9ef2d2e,0xc1074de0 },
+ { 0xae9a65b3,0xfcf3ce44,0x05d7151b,0xfd71e4ac,0xce6a9788,0xd4711f50,
+ 0xc9e54ffc,0xfbadfbdb } },
+ /* 214 */
+ { { 0x20a99363,0x1713f1cd,0x6cf22775,0xb915658f,0x24d359b2,0x968175cd,
+ 0x83716fcd,0xb7f976b4 },
+ { 0x5d6dbf74,0x5758e24d,0x71c3af36,0x8d23bafd,0x0243dfe3,0x48f47760,
+ 0xcafcc805,0xf4d41b2e } },
+ /* 215 */
+ { { 0xfdabd48d,0x51f1cf28,0x32c078a4,0xce81be36,0x117146e9,0x6ace2974,
+ 0xe0160f10,0x180824ea },
+ { 0x66e58358,0x0387698b,0xce6ca358,0x63568752,0x5e41e6c5,0x82380e34,
+ 0x83cf6d25,0x67e5f639 } },
+ /* 216 */
+ { { 0xcf4899ef,0xf89ccb8d,0x9ebb44c0,0x949015f0,0xb2598ec9,0x546f9276,
+ 0x04c11fc6,0x9fef789a },
+ { 0x53d2a071,0x6d367ecf,0xa4519b09,0xb10e1a7f,0x611e2eef,0xca6b3fb0,
+ 0xa99c4e20,0xbc80c181 } },
+ /* 217 */
+ { { 0xe5eb82e6,0x972536f8,0xf56cb920,0x1a484fc7,0x50b5da5e,0xc78e2171,
+ 0x9f8cdf10,0x49270e62 },
+ { 0xea6b50ad,0x1a39b7bb,0xa2388ffc,0x9a0284c1,0x8107197b,0x5403eb17,
+ 0x61372f7f,0xd2ee52f9 } },
+ /* 218 */
+ { { 0x88e0362a,0xd37cd285,0x8fa5d94d,0x442fa8a7,0xa434a526,0xaff836e5,
+ 0xe5abb733,0xdfb478be },
+ { 0x673eede6,0xa91f1ce7,0x2b5b2f04,0xa5390ad4,0x5530da2f,0x5e66f7bf,
+ 0x08df473a,0xd9a140b4 } },
+ /* 219 */
+ { { 0x6e8ea498,0x0e0221b5,0x3563ee09,0x62347829,0x335d2ade,0xe06b8391,
+ 0x623f4b1a,0x760c058d },
+ { 0xc198aa79,0x0b89b58c,0xf07aba7f,0xf74890d2,0xfde2556a,0x4e204110,
+ 0x8f190409,0x7141982d } },
+ /* 220 */
+ { { 0x4d4b0f45,0x6f0a0e33,0x392a94e1,0xd9280b38,0xb3c61d5e,0x3af324c6,
+ 0x89d54e47,0x3af9d1ce },
+ { 0x20930371,0xfd8f7981,0x21c17097,0xeda2664c,0xdc42309b,0x0e9545dc,
+ 0x73957dd6,0xb1f815c3 } },
+ /* 221 */
+ { { 0x89fec44a,0x84faa78e,0x3caa4caf,0xc8c2ae47,0xc1b6a624,0x691c807d,
+ 0x1543f052,0xa41aed14 },
+ { 0x7d5ffe04,0x42435399,0x625b6e20,0x8bacb2df,0x87817775,0x85d660be,
+ 0x86fb60ef,0xd6e9c1dd } },
+ /* 222 */
+ { { 0xc6853264,0x3aa2e97e,0xe2304a0b,0x771533b7,0xb8eae9be,0x1b912bb7,
+ 0xae9bf8c2,0x9c9c6e10 },
+ { 0xe030b74c,0xa2309a59,0x6a631e90,0x4ed7494d,0xa49b79f2,0x89f44b23,
+ 0x40fa61b6,0x566bd596 } },
+ /* 223 */
+ { { 0xc18061f3,0x066c0118,0x7c83fc70,0x190b25d3,0x27273245,0xf05fc8e0,
+ 0xf525345e,0xcf2c7390 },
+ { 0x10eb30cf,0xa09bceb4,0x0d77703a,0xcfd2ebba,0x150ff255,0xe842c43a,
+ 0x8aa20979,0x02f51755 } },
+ /* 224 */
+ { { 0xaddb7d07,0x396ef794,0x24455500,0x0b4fc742,0xc78aa3ce,0xfaff8eac,
+ 0xe8d4d97d,0x14e9ada5 },
+ { 0x2f7079e2,0xdaa480a1,0xe4b0800e,0x45baa3cd,0x7838157d,0x01765e2d,
+ 0x8e9d9ae8,0xa0ad4fab } },
+ /* 225 */
+ { { 0x4a653618,0x0bfb7621,0x31eaaa5f,0x1872813c,0x44949d5e,0x1553e737,
+ 0x6e56ed1e,0xbcd530b8 },
+ { 0x32e9c47b,0x169be853,0xb50059ab,0xdc2776fe,0x192bfbb4,0xcdba9761,
+ 0x6979341d,0x909283cf } },
+ /* 226 */
+ { { 0x76e81a13,0x67b00324,0x62171239,0x9bee1a99,0xd32e19d6,0x08ed361b,
+ 0xace1549a,0x35eeb7c9 },
+ { 0x7e4e5bdc,0x1280ae5a,0xb6ceec6e,0x2dcd2cd3,0x6e266bc1,0x52e4224c,
+ 0x448ae864,0x9a8b2cf4 } },
+ /* 227 */
+ { { 0x09d03b59,0xf6471bf2,0xb65af2ab,0xc90e62a3,0xebd5eec9,0xff7ff168,
+ 0xd4491379,0x6bdb60f4 },
+ { 0x8a55bc30,0xdadafebc,0x10097fe0,0xc79ead16,0x4c1e3bdd,0x42e19741,
+ 0x94ba08a9,0x01ec3cfd } },
+ /* 228 */
+ { { 0xdc9485c2,0xba6277eb,0x22fb10c7,0x48cc9a79,0x70a28d8a,0x4f61d60f,
+ 0x475464f6,0xd1acb1c0 },
+ { 0x26f36612,0xd26902b1,0xe0618d8b,0x59c3a44e,0x308357ee,0x4df8a813,
+ 0x405626c2,0x7dcd079d } },
+ /* 229 */
+ { { 0xf05a4b48,0x5ce7d4d3,0x37230772,0xadcd2952,0x812a915a,0xd18f7971,
+ 0x377d19b8,0x0bf53589 },
+ { 0x6c68ea73,0x35ecd95a,0x823a584d,0xc7f3bbca,0xf473a723,0x9fb674c6,
+ 0xe16686fc,0xd28be4d9 } },
+ /* 230 */
+ { { 0x38fa8e4b,0x5d2b9906,0x893fd8fc,0x559f186e,0x436fb6fc,0x3a6de2aa,
+ 0x510f88ce,0xd76007aa },
+ { 0x523a4988,0x2d10aab6,0x74dd0273,0xb455cf44,0xa3407278,0x7f467082,
+ 0xb303bb01,0xf2b52f68 } },
+ /* 231 */
+ { { 0x9835b4ca,0x0d57eafa,0xbb669cbc,0x2d2232fc,0xc6643198,0x8eeeb680,
+ 0xcc5aed3a,0xd8dbe98e },
+ { 0xc5a02709,0xcba9be3f,0xf5ba1fa8,0x30be68e5,0xf10ea852,0xfebd43cd,
+ 0xee559705,0xe01593a3 } },
+ /* 232 */
+ { { 0xea75a0a6,0xd3e5af50,0x57858033,0x512226ac,0xd0176406,0x6fe6d50f,
+ 0xaeb8ef06,0xafec07b1 },
+ { 0x80bb0a31,0x7fb99567,0x37309aae,0x6f1af3cc,0x01abf389,0x9153a15a,
+ 0x6e2dbfdd,0xa71b9354 } },
+ /* 233 */
+ { { 0x18f593d2,0xbf8e12e0,0xa078122b,0xd1a90428,0x0ba4f2ad,0x150505db,
+ 0x628523d9,0x53a2005c },
+ { 0xe7f2b935,0x07c8b639,0xc182961a,0x2bff975a,0x7518ca2c,0x86bceea7,
+ 0x3d588e3d,0xbf47d19b } },
+ /* 234 */
+ { { 0xdd7665d5,0x672967a7,0x2f2f4de5,0x4e303057,0x80d4903f,0x144005ae,
+ 0x39c9a1b6,0x001c2c7f },
+ { 0x69efc6d6,0x143a8014,0x7bc7a724,0xc810bdaa,0xa78150a4,0x5f65670b,
+ 0x86ffb99b,0xfdadf8e7 } },
+ /* 235 */
+ { { 0xffc00785,0xfd38cb88,0x3b48eb67,0x77fa7591,0xbf368fbc,0x0454d055,
+ 0x5aa43c94,0x3a838e4d },
+ { 0x3e97bb9a,0x56166329,0x441d94d9,0x9eb93363,0x0adb2a83,0x515591a6,
+ 0x873e1da3,0x3cdb8257 } },
+ /* 236 */
+ { { 0x7de77eab,0x137140a9,0x41648109,0xf7e1c50d,0xceb1d0df,0x762dcad2,
+ 0xf1f57fba,0x5a60cc89 },
+ { 0x40d45673,0x80b36382,0x5913c655,0x1b82be19,0xdd64b741,0x057284b8,
+ 0xdbfd8fc0,0x922ff56f } },
+ /* 237 */
+ { { 0xc9a129a1,0x1b265dee,0xcc284e04,0xa5b1ce57,0xcebfbe3c,0x04380c46,
+ 0xf6c5cd62,0x72919a7d },
+ { 0x8fb90f9a,0x298f453a,0x88e4031b,0xd719c00b,0x796f1856,0xe32c0e77,
+ 0x3624089a,0x5e791780 } },
+ /* 238 */
+ { { 0x7f63cdfb,0x5c16ec55,0xf1cae4fd,0x8e6a3571,0x560597ca,0xfce26bea,
+ 0xe24c2fab,0x4e0a5371 },
+ { 0xa5765357,0x276a40d3,0x0d73a2b4,0x3c89af44,0x41d11a32,0xb8f370ae,
+ 0xd56604ee,0xf5ff7818 } },
+ /* 239 */
+ { { 0x1a09df21,0xfbf3e3fe,0xe66e8e47,0x26d5d28e,0x29c89015,0x2096bd0a,
+ 0x533f5e64,0xe41df0e9 },
+ { 0xb3ba9e3f,0x305fda40,0x2604d895,0xf2340ceb,0x7f0367c7,0x0866e192,
+ 0xac4f155f,0x8edd7d6e } },
+ /* 240 */
+ { { 0x0bfc8ff3,0xc9a1dc0e,0xe936f42f,0x14efd82b,0xcca381ef,0x67016f7c,
+ 0xed8aee96,0x1432c1ca },
+ { 0x70b23c26,0xec684829,0x0735b273,0xa64fe873,0xeaef0f5a,0xe389f6e5,
+ 0x5ac8d2c6,0xcaef480b } },
+ /* 241 */
+ { { 0x75315922,0x5245c978,0x3063cca5,0xd8295171,0xb64ef2cb,0xf3ce60d0,
+ 0x8efae236,0xd0ba177e },
+ { 0xb1b3af60,0x53a9ae8f,0x3d2da20e,0x1a796ae5,0xdf9eef28,0x01d63605,
+ 0x1c54ae16,0xf31c957c } },
+ /* 242 */
+ { { 0x49cc4597,0xc0f58d52,0xbae0a028,0xdc5015b0,0x734a814a,0xefc5fc55,
+ 0x96e17c3a,0x013404cb },
+ { 0xc9a824bf,0xb29e2585,0x001eaed7,0xd593185e,0x61ef68ac,0x8d6ee682,
+ 0x91933e6c,0x6f377c4b } },
+ /* 243 */
+ { { 0xa8333fd2,0x9f93bad1,0x5a2a95b8,0xa8930202,0xeaf75ace,0x211e5037,
+ 0xd2d09506,0x6dba3e4e },
+ { 0xd04399cd,0xa48ef98c,0xe6b73ade,0x1811c66e,0xc17ecaf3,0x72f60752,
+ 0x3becf4a7,0xf13cf342 } },
+ /* 244 */
+ { { 0xa919e2eb,0xceeb9ec0,0xf62c0f68,0x83a9a195,0x7aba2299,0xcfba3bb6,
+ 0x274bbad3,0xc83fa9a9 },
+ { 0x62fa1ce0,0x0d7d1b0b,0x3418efbf,0xe58b60f5,0x52706f04,0xbfa8ef9e,
+ 0x5d702683,0xb49d70f4 } },
+ /* 245 */
+ { { 0xfad5513b,0x914c7510,0xb1751e2d,0x05f32eec,0xd9fb9d59,0x6d850418,
+ 0x0c30f1cf,0x59cfadbb },
+ { 0x55cb7fd6,0xe167ac23,0x820426a3,0x249367b8,0x90a78864,0xeaeec58c,
+ 0x354a4b67,0x5babf362 } },
+ /* 246 */
+ { { 0xee424865,0x37c981d1,0xf2e5577f,0x8b002878,0xb9e0c058,0x702970f1,
+ 0x9026c8f0,0x6188c6a7 },
+ { 0xd0f244da,0x06f9a19b,0xfb080873,0x1ecced5c,0x9f213637,0x35470f9b,
+ 0xdf50b9d9,0x993fe475 } },
+ /* 247 */
+ { { 0x9b2c3609,0x68e31cdf,0x2c46d4ea,0x84eb19c0,0x9a775101,0x7ac9ec1a,
+ 0x4c80616b,0x81f76466 },
+ { 0x75fbe978,0x1d7c2a5a,0xf183b356,0x6743fed3,0x501dd2bf,0x838d1f04,
+ 0x5fe9060d,0x564a812a } },
+ /* 248 */
+ { { 0xfa817d1d,0x7a5a64f4,0xbea82e0f,0x55f96844,0xcd57f9aa,0xb5ff5a0f,
+ 0x00e51d6c,0x226bf3cf },
+ { 0x2f2833cf,0xd6d1a9f9,0x4f4f89a8,0x20a0a35a,0x8f3f7f77,0x11536c49,
+ 0xff257836,0x68779f47 } },
+ /* 249 */
+ { { 0x73043d08,0x79b0c1c1,0x1fc020fa,0xa5446774,0x9a6d26d0,0xd3767e28,
+ 0xeb092e0b,0x97bcb0d1 },
+ { 0xf32ed3c3,0x2ab6eaa8,0xb281bc48,0xc8a4f151,0xbfa178f3,0x4d1bf4f3,
+ 0x0a784655,0xa872ffe8 } },
+ /* 250 */
+ { { 0xa32b2086,0xb1ab7935,0x8160f486,0xe1eb710e,0x3b6ae6be,0x9bd0cd91,
+ 0xb732a36a,0x02812bfc },
+ { 0xcf605318,0xa63fd7ca,0xfdfd6d1d,0x646e5d50,0x2102d619,0xa1d68398,
+ 0xfe5396af,0x07391cc9 } },
+ /* 251 */
+ { { 0x8b80d02b,0xc50157f0,0x62877f7f,0x6b8333d1,0x78d542ae,0x7aca1af8,
+ 0x7e6d2a08,0x355d2adc },
+ { 0x287386e1,0xb41f335a,0xf8e43275,0xfd272a94,0xe79989ea,0x286ca2cd,
+ 0x7c2a3a79,0x3dc2b1e3 } },
+ /* 252 */
+ { { 0x04581352,0xd689d21c,0x376782be,0x0a00c825,0x9fed701f,0x203bd590,
+ 0x3ccd846b,0xc4786910 },
+ { 0x24c768ed,0x5dba7708,0x6841f657,0x72feea02,0x6accce0e,0x73313ed5,
+ 0xd5bb4d32,0xccc42968 } },
+ /* 253 */
+ { { 0x3d7620b9,0x94e50de1,0x5992a56a,0xd89a5c8a,0x675487c9,0xdc007640,
+ 0xaa4871cf,0xe147eb42 },
+ { 0xacf3ae46,0x274ab4ee,0x50350fbe,0xfd4936fb,0x48c840ea,0xdf2afe47,
+ 0x080e96e3,0x239ac047 } },
+ /* 254 */
+ { { 0x2bfee8d4,0x481d1f35,0xfa7b0fec,0xce80b5cf,0x2ce9af3c,0x105c4c9e,
+ 0xf5f7e59d,0xc55fa1a3 },
+ { 0x8257c227,0x3186f14e,0x342be00b,0xc5b1653f,0xaa904fb2,0x09afc998,
+ 0xd4f4b699,0x094cd99c } },
+ /* 255 */
+ { { 0xd703beba,0x8a981c84,0x32ceb291,0x8631d150,0xe3bd49ec,0xa445f2c9,
+ 0x42abad33,0xb90a30b6 },
+ { 0xb4a5abf9,0xb465404f,0x75db7603,0x004750c3,0xca35d89f,0x6f9a42cc,
+ 0x1b7924f7,0x019f8b9a } },
+};
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
+ k, map, heap);
+}
+
+#endif
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[8];
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_256_point_new_8(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 8, km);
+
+ err = sp_256_ecc_mulmod_base_8(point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_8(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(point, 0, heap);
+
+ return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_256_iszero_8(const sp_digit* a)
+{
+ return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+/* Add 1 to a. (a = a + 1)
+ *
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_256_add_one_8(sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r2, #1\n\t"
+ "ldr r1, [%[a], #0]\n\t"
+ "adds r1, r1, r2\n\t"
+ "mov r2, #0\n\t"
+ "str r1, [%[a], #0]\n\t"
+ "ldr r1, [%[a], #4]\n\t"
+ "adcs r1, r1, r2\n\t"
+ "str r1, [%[a], #4]\n\t"
+ "ldr r1, [%[a], #8]\n\t"
+ "adcs r1, r1, r2\n\t"
+ "str r1, [%[a], #8]\n\t"
+ "ldr r1, [%[a], #12]\n\t"
+ "adcs r1, r1, r2\n\t"
+ "str r1, [%[a], #12]\n\t"
+ "ldr r1, [%[a], #16]\n\t"
+ "adcs r1, r1, r2\n\t"
+ "str r1, [%[a], #16]\n\t"
+ "ldr r1, [%[a], #20]\n\t"
+ "adcs r1, r1, r2\n\t"
+ "str r1, [%[a], #20]\n\t"
+ "ldr r1, [%[a], #24]\n\t"
+ "adcs r1, r1, r2\n\t"
+ "str r1, [%[a], #24]\n\t"
+ "ldr r1, [%[a], #28]\n\t"
+ "adcs r1, r1, r2\n\t"
+ "str r1, [%[a], #28]\n\t"
+ :
+ : [a] "r" (a)
+ : "memory", "r1", "r2"
+ );
+}
+
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 24U) {
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng Random number generator.
+ * k Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_256_ecc_gen_k_8(WC_RNG* rng, sp_digit* k)
+{
+ int err;
+ byte buf[32];
+
+ do {
+ err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+ if (err == 0) {
+ sp_256_from_bin(k, 8, buf, (int)sizeof(buf));
+ if (sp_256_cmp_8(k, p256_order2) < 0) {
+ sp_256_add_one_8(k);
+ break;
+ }
+ }
+ }
+ while (err == 0);
+
+ return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng Random number generator.
+ * priv Generated private value.
+ * pub Generated public point.
+ * heap Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[8];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_256 inf;
+#endif
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_256* infinity;
+#endif
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_8(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, inf, infinity);
+ }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_gen_k_8(rng, k);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL);
+ }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_8(infinity, point, p256_order, 1, NULL);
+ }
+ if (err == MP_OKAY) {
+ if ((sp_256_iszero_8(point->x) == 0) || (sp_256_iszero_8(point->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(k, priv);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_8(point, pub);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_256_point_free_8(infinity, 1, heap);
+#endif
+ sp_256_point_free_8(point, 1, heap);
+
+ return err;
+}
+
+#ifdef HAVE_ECC_DHE
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 32
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_256_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ j = 256 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<8 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 32) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 32);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv Scalar to multiply the point by.
+ * pub Point to multiply.
+ * out Buffer to hold X ordinate.
+ * outLen On entry, size of the buffer in bytes.
+ * On exit, length of data in buffer in bytes.
+ * heap Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
+ word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[8];
+#endif
+ sp_point_256* point = NULL;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ if (*outLen < 32U) {
+ err = BUFFER_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, p, point);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 8, priv);
+ sp_256_point_from_ecc_point_8(point, pub);
+ err = sp_256_ecc_mulmod_8(point, point, k, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ sp_256_to_bin(point->x, out);
+ *outLen = 32;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(point, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+ __asm__ __volatile__ (
+ "mov r8, %[a]\n\t"
+ "add r8, r8, #32\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "subs r5, r5, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "str r3, [%[a]]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "sbc %[c], %[c], %[c]\n\t"
+ "add %[a], %[a], #8\n\t"
+ "add %[b], %[b], #8\n\t"
+ "cmp %[a], r8\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "subs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "sbc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+SP_NOINLINE static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+ __asm__ __volatile__ (
+ "add r9, %[a], #32\n\t"
+ /* A[0] * B */
+ "ldr r6, [%[a]], #4\n\t"
+ "umull r5, r3, r6, %[b]\n\t"
+ "mov r4, #0\n\t"
+ "str r5, [%[r]], #4\n\t"
+ /* A[0] * B - Done */
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ /* A[] * B */
+ "ldr r6, [%[a]], #4\n\t"
+ "umull r6, r8, r6, %[b]\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[] * B - Done */
+ "str r3, [%[r]], #4\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "cmp %[a], r9\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r]]\n\t"
+ : [r] "+r" (r), [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
+ );
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r6, %[div], #16\n\t"
+ "add r6, r6, #1\n\t"
+ "udiv r4, %[d1], r6\n\t"
+ "lsl r8, r4, #16\n\t"
+ "umull r4, r5, %[div], r8\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "udiv r5, %[d1], r6\n\t"
+ "lsl r4, r5, #16\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "lsl r4, %[d1], #16\n\t"
+ "orr r4, r4, %[d0], lsr #16\n\t"
+ "udiv r4, r4, r6\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "lsl r4, %[d1], #16\n\t"
+ "orr r4, r4, %[d0], lsr #16\n\t"
+ "udiv r4, r4, r6\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "udiv r4, %[d0], %[div]\n\t"
+ "add r8, r8, r4\n\t"
+ "mov %[r], r8\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r6", "r8"
+ );
+ return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_256_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<8; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ r[0] = a[0] & m;
+ r[1] = a[1] & m;
+ r[2] = a[2] & m;
+ r[3] = a[3] & m;
+ r[4] = a[4] & m;
+ r[5] = a[5] & m;
+ r[6] = a[6] & m;
+ r[7] = a[7] & m;
+#endif
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[16], t2[9];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[7];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 8);
+ for (i=7; i>=0; i--) {
+ r1 = div_256_word_8(t1[8 + i], t1[8 + i - 1], div);
+
+ sp_256_mul_d_8(t2, d, r1);
+ t1[8 + i] += sp_256_sub_in_place_8(&t1[i], t2);
+ t1[8 + i] -= t2[8];
+ sp_256_mask_8(t2, d, t1[8 + i]);
+ t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2);
+ sp_256_mask_8(t2, d, t1[8 + i]);
+ t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_256_cmp_8(t1, d) >= 0;
+ sp_256_cond_sub_8(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_256_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_256_div_8(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P256 curve. */
+static const uint32_t p256_order_minus_2[8] = {
+ 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU,0xffffffffU,0xffffffffU,
+ 0x00000000U,0xffffffffU
+};
+#else
+/* The low half of the order-2 of the P256 curve. */
+static const uint32_t p256_order_low[4] = {
+ 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
+ *
+ * r Result of the multiplication.
+ * a First operand of the multiplication.
+ * b Second operand of the multiplication.
+ */
+static void sp_256_mont_mul_order_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_256_mul_8(r, a, b);
+ sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order);
+}
+
+/* Square number mod the order of P256 curve. (r = a * a mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_256_mont_sqr_order_8(sp_digit* r, const sp_digit* a)
+{
+ sp_256_sqr_8(r, a);
+ sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P256 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_256_mont_sqr_n_order_8(sp_digit* r, const sp_digit* a, int n)
+{
+ int i;
+
+ sp_256_mont_sqr_order_8(r, a);
+ for (i=1; i<n; i++) {
+ sp_256_mont_sqr_order_8(r, r);
+ }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
+ * (r = 1 / a mod order)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_256_mont_inv_order_8(sp_digit* r, const sp_digit* a,
+ sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 8);
+ for (i=254; i>=0; i--) {
+ sp_256_mont_sqr_order_8(t, t);
+ if ((p256_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_8(t, t, a);
+ }
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 8U);
+#else
+ sp_digit* t = td;
+ sp_digit* t2 = td + 2 * 8;
+ sp_digit* t3 = td + 4 * 8;
+ int i;
+
+ /* t = a^2 */
+ sp_256_mont_sqr_order_8(t, a);
+ /* t = a^3 = t * a */
+ sp_256_mont_mul_order_8(t, t, a);
+ /* t2= a^c = t ^ 2 ^ 2 */
+ sp_256_mont_sqr_n_order_8(t2, t, 2);
+ /* t3= a^f = t2 * t */
+ sp_256_mont_mul_order_8(t3, t2, t);
+ /* t2= a^f0 = t3 ^ 2 ^ 4 */
+ sp_256_mont_sqr_n_order_8(t2, t3, 4);
+ /* t = a^ff = t2 * t3 */
+ sp_256_mont_mul_order_8(t, t2, t3);
+ /* t3= a^ff00 = t ^ 2 ^ 8 */
+ sp_256_mont_sqr_n_order_8(t2, t, 8);
+ /* t = a^ffff = t2 * t */
+ sp_256_mont_mul_order_8(t, t2, t);
+ /* t2= a^ffff0000 = t ^ 2 ^ 16 */
+ sp_256_mont_sqr_n_order_8(t2, t, 16);
+ /* t = a^ffffffff = t2 * t */
+ sp_256_mont_mul_order_8(t, t2, t);
+ /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */
+ sp_256_mont_sqr_n_order_8(t2, t, 64);
+ /* t2= a^ffffffff00000000ffffffff = t2 * t */
+ sp_256_mont_mul_order_8(t2, t2, t);
+ /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */
+ sp_256_mont_sqr_n_order_8(t2, t2, 32);
+ /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
+ sp_256_mont_mul_order_8(t2, t2, t);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
+ for (i=127; i>=112; i--) {
+ sp_256_mont_sqr_order_8(t2, t2);
+ if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_8(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
+ sp_256_mont_sqr_n_order_8(t2, t2, 4);
+ sp_256_mont_mul_order_8(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
+ for (i=107; i>=64; i--) {
+ sp_256_mont_sqr_order_8(t2, t2);
+ if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_8(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
+ sp_256_mont_sqr_n_order_8(t2, t2, 4);
+ sp_256_mont_mul_order_8(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
+ for (i=59; i>=32; i--) {
+ sp_256_mont_sqr_order_8(t2, t2);
+ if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_8(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
+ sp_256_mont_sqr_n_order_8(t2, t2, 4);
+ sp_256_mont_mul_order_8(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
+ for (i=27; i>=0; i--) {
+ sp_256_mont_sqr_order_8(t2, t2);
+ if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_8(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
+ sp_256_mont_sqr_n_order_8(t2, t2, 4);
+ /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
+ sp_256_mont_mul_order_8(r, t2, t3);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN 64
+#endif
+
+/* Sign the hash using the private key.
+ * e = [hash, 256 bits] from binary
+ * r = (k.G)->x mod order
+ * s = (r * x + e) / k mod order
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+ mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit ed[2*8];
+ sp_digit xd[2*8];
+ sp_digit kd[2*8];
+ sp_digit rd[2*8];
+ sp_digit td[3 * 2*8];
+ sp_point_256 p;
+#endif
+ sp_digit* e = NULL;
+ sp_digit* x = NULL;
+ sp_digit* k = NULL;
+ sp_digit* r = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_256* point = NULL;
+ sp_digit carry;
+ sp_digit* s = NULL;
+ sp_digit* kInv = NULL;
+ int err = MP_OKAY;
+ int32_t c;
+ int i;
+
+ (void)heap;
+
+ err = sp_256_point_new_8(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ e = d + 0 * 8;
+ x = d + 2 * 8;
+ k = d + 4 * 8;
+ r = d + 6 * 8;
+ tmp = d + 8 * 8;
+#else
+ e = ed;
+ x = xd;
+ k = kd;
+ r = rd;
+ tmp = td;
+#endif
+ s = e;
+ kInv = k;
+
+ if (hashLen > 32U) {
+ hashLen = 32U;
+ }
+
+ sp_256_from_bin(e, 8, hash, (int)hashLen);
+ }
+
+ for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+ sp_256_from_mp(x, 8, priv);
+
+ /* New random point. */
+ if (km == NULL || mp_iszero(km)) {
+ err = sp_256_ecc_gen_k_8(rng, k);
+ }
+ else {
+ sp_256_from_mp(k, 8, km);
+ mp_zero(km);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL);
+ }
+
+ if (err == MP_OKAY) {
+ /* r = point->x mod order */
+ XMEMCPY(r, point->x, sizeof(sp_digit) * 8U);
+ sp_256_norm_8(r);
+ c = sp_256_cmp_8(r, p256_order);
+ sp_256_cond_sub_8(r, r, p256_order, 0L - (sp_digit)(c >= 0));
+ sp_256_norm_8(r);
+
+ /* Conv k to Montgomery form (mod order) */
+ sp_256_mul_8(k, k, p256_norm_order);
+ err = sp_256_mod_8(k, k, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_8(k);
+ /* kInv = 1/k mod order */
+ sp_256_mont_inv_order_8(kInv, k, tmp);
+ sp_256_norm_8(kInv);
+
+ /* s = r * x + e */
+ sp_256_mul_8(x, x, r);
+ err = sp_256_mod_8(x, x, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_8(x);
+ carry = sp_256_add_8(s, e, x);
+ sp_256_cond_sub_8(s, s, p256_order, 0 - carry);
+ sp_256_norm_8(s);
+ c = sp_256_cmp_8(s, p256_order);
+ sp_256_cond_sub_8(s, s, p256_order, 0L - (sp_digit)(c >= 0));
+ sp_256_norm_8(s);
+
+ /* s = s * k^-1 mod order */
+ sp_256_mont_mul_order_8(s, s, kInv);
+ sp_256_norm_8(s);
+
+ /* Check that signature is usable. */
+ if (sp_256_iszero_8(s) == 0) {
+ break;
+ }
+ }
+ }
+
+ if (i == 0) {
+ err = RNG_FAILURE_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(r, rm);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(s, sm);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 8 * 8);
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 2U * 8U);
+ XMEMSET(x, 0, sizeof(sp_digit) * 2U * 8U);
+ XMEMSET(k, 0, sizeof(sp_digit) * 2U * 8U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U);
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 8U);
+#endif
+ sp_256_point_free_8(point, 1, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ * e = Truncate(hash, 256)
+ * u1 = e/s mod order
+ * u2 = r/s mod order
+ * r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
+ mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit u1d[2*8];
+ sp_digit u2d[2*8];
+ sp_digit sd[2*8];
+ sp_digit tmpd[2*8 * 5];
+ sp_point_256 p1d;
+ sp_point_256 p2d;
+#endif
+ sp_digit* u1 = NULL;
+ sp_digit* u2 = NULL;
+ sp_digit* s = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_256* p1;
+ sp_point_256* p2 = NULL;
+ sp_digit carry;
+ int32_t c;
+ int err;
+
+ err = sp_256_point_new_8(heap, p1d, p1);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, p2d, p2);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ u1 = d + 0 * 8;
+ u2 = d + 2 * 8;
+ s = d + 4 * 8;
+ tmp = d + 6 * 8;
+#else
+ u1 = u1d;
+ u2 = u2d;
+ s = sd;
+ tmp = tmpd;
+#endif
+
+ if (hashLen > 32U) {
+ hashLen = 32U;
+ }
+
+ sp_256_from_bin(u1, 8, hash, (int)hashLen);
+ sp_256_from_mp(u2, 8, r);
+ sp_256_from_mp(s, 8, sm);
+ sp_256_from_mp(p2->x, 8, pX);
+ sp_256_from_mp(p2->y, 8, pY);
+ sp_256_from_mp(p2->z, 8, pZ);
+
+ {
+ sp_256_mul_8(s, s, p256_norm_order);
+ }
+ err = sp_256_mod_8(s, s, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_8(s);
+ {
+ sp_256_mont_inv_order_8(s, s, tmp);
+ sp_256_mont_mul_order_8(u1, u1, s);
+ sp_256_mont_mul_order_8(u2, u2, s);
+ }
+
+ err = sp_256_ecc_mulmod_base_8(p1, u1, 0, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_8(p2, p2, u2, 0, heap);
+ }
+
+ if (err == MP_OKAY) {
+ {
+ sp_256_proj_point_add_8(p1, p1, p2, tmp);
+ if (sp_256_iszero_8(p1->z)) {
+ if (sp_256_iszero_8(p1->x) && sp_256_iszero_8(p1->y)) {
+ sp_256_proj_point_dbl_8(p1, p2, tmp);
+ }
+ else {
+ /* Y ordinate is not used from here - don't set. */
+ p1->x[0] = 0;
+ p1->x[1] = 0;
+ p1->x[2] = 0;
+ p1->x[3] = 0;
+ p1->x[4] = 0;
+ p1->x[5] = 0;
+ p1->x[6] = 0;
+ p1->x[7] = 0;
+ XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod));
+ }
+ }
+ }
+
+ /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+ /* Reload r and convert to Montgomery form. */
+ sp_256_from_mp(u2, 8, r);
+ err = sp_256_mod_mul_norm_8(u2, u2, p256_mod);
+ }
+
+ if (err == MP_OKAY) {
+ /* u1 = r.z'.z' mod prime */
+ sp_256_mont_sqr_8(p1->z, p1->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(u1, u2, p1->z, p256_mod, p256_mp_mod);
+ *res = (int)(sp_256_cmp_8(p1->x, u1) == 0);
+ if (*res == 0) {
+ /* Reload r and add order. */
+ sp_256_from_mp(u2, 8, r);
+ carry = sp_256_add_8(u2, u2, p256_order);
+ /* Carry means result is greater than mod and is not valid. */
+ if (carry == 0) {
+ sp_256_norm_8(u2);
+
+ /* Compare with mod and if greater or equal then not valid. */
+ c = sp_256_cmp_8(u2, p256_mod);
+ if (c < 0) {
+ /* Convert to Montogomery form */
+ err = sp_256_mod_mul_norm_8(u2, u2, p256_mod);
+ if (err == MP_OKAY) {
+ /* u1 = (r + 1*order).z'.z' mod prime */
+ sp_256_mont_mul_8(u1, u2, p1->z, p256_mod,
+ p256_mp_mod);
+ *res = (int)(sp_256_cmp_8(p1->x, u1) == 0);
+ }
+ }
+ }
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL)
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_256_point_free_8(p1, 0, heap);
+ sp_256_point_free_8(p2, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point EC point.
+ * heap Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_256_ecc_is_point_8(sp_point_256* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit t1d[2*8];
+ sp_digit t2d[2*8];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8 * 4, heap, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 8;
+ t2 = d + 2 * 8;
+#else
+ (void)heap;
+
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ sp_256_sqr_8(t1, point->y);
+ (void)sp_256_mod_8(t1, t1, p256_mod);
+ sp_256_sqr_8(t2, point->x);
+ (void)sp_256_mod_8(t2, t2, p256_mod);
+ sp_256_mul_8(t2, t2, point->x);
+ (void)sp_256_mod_8(t2, t2, p256_mod);
+ (void)sp_256_sub_8(t2, p256_mod, t2);
+ sp_256_mont_add_8(t1, t1, t2, p256_mod);
+
+ sp_256_mont_add_8(t1, t1, point->x, p256_mod);
+ sp_256_mont_add_8(t1, t1, point->x, p256_mod);
+ sp_256_mont_add_8(t1, t1, point->x, p256_mod);
+
+ if (sp_256_cmp_8(t1, p256_b) != 0) {
+ err = MP_VAL;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_256(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 pubd;
+#endif
+ sp_point_256* pub;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_256_point_new_8(NULL, pubd, pub);
+ if (err == MP_OKAY) {
+ sp_256_from_mp(pub->x, 8, pX);
+ sp_256_from_mp(pub->y, 8, pY);
+ sp_256_from_bin(pub->z, 8, one, (int)sizeof(one));
+
+ err = sp_256_ecc_is_point_8(pub, NULL);
+ }
+
+ sp_256_point_free_8(pub, 0, NULL);
+
+ return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * privm Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit privd[8];
+ sp_point_256 pubd;
+ sp_point_256 pd;
+#endif
+ sp_digit* priv = NULL;
+ sp_point_256* pub;
+ sp_point_256* p = NULL;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_256_point_new_8(heap, pubd, pub);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
+ DYNAMIC_TYPE_ECC);
+ if (priv == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ priv = privd;
+#endif
+
+ sp_256_from_mp(pub->x, 8, pX);
+ sp_256_from_mp(pub->y, 8, pY);
+ sp_256_from_bin(pub->z, 8, one, (int)sizeof(one));
+ sp_256_from_mp(priv, 8, privm);
+
+ /* Check point at infinitiy. */
+ if ((sp_256_iszero_8(pub->x) != 0) &&
+ (sp_256_iszero_8(pub->y) != 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check range of X and Y */
+ if (sp_256_cmp_8(pub->x, p256_mod) >= 0 ||
+ sp_256_cmp_8(pub->y, p256_mod) >= 0) {
+ err = ECC_OUT_OF_RANGE_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check point is on curve */
+ err = sp_256_ecc_is_point_8(pub, heap);
+ }
+
+ if (err == MP_OKAY) {
+ /* Point * order = infinity */
+ err = sp_256_ecc_mulmod_8(p, pub, p256_order, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is infinity */
+ if ((sp_256_iszero_8(p->x) == 0) ||
+ (sp_256_iszero_8(p->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Base * private = point */
+ err = sp_256_ecc_mulmod_base_8(p, priv, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is public key */
+ if (sp_256_cmp_8(p->x, pub->x) != 0 ||
+ sp_256_cmp_8(p->y, pub->y) != 0) {
+ err = ECC_PRIV_KEY_E;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (priv != NULL) {
+ XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(p, 0, heap);
+ sp_256_point_free_8(pub, 0, heap);
+
+ return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX First EC point's X ordinate.
+ * pY First EC point's Y ordinate.
+ * pZ First EC point's Z ordinate.
+ * qX Second EC point's X ordinate.
+ * qY Second EC point's Y ordinate.
+ * qZ Second EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* qX, mp_int* qY, mp_int* qZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 8 * 5];
+ sp_point_256 pd;
+ sp_point_256 qd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ sp_point_256* q = NULL;
+ int err;
+
+ err = sp_256_point_new_8(NULL, pd, p);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_8(NULL, qd, q);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 8, pX);
+ sp_256_from_mp(p->y, 8, pY);
+ sp_256_from_mp(p->z, 8, pZ);
+ sp_256_from_mp(q->x, 8, qX);
+ sp_256_from_mp(q->y, 8, qY);
+ sp_256_from_mp(q->z, 8, qZ);
+
+ sp_256_proj_point_add_8(p, p, q, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(q, 0, NULL);
+ sp_256_point_free_8(p, 0, NULL);
+
+ return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 8 * 2];
+ sp_point_256 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ int err;
+
+ err = sp_256_point_new_8(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 2, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 8, pX);
+ sp_256_from_mp(p->y, 8, pY);
+ sp_256_from_mp(p->z, 8, pZ);
+
+ sp_256_proj_point_dbl_8(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(p, 0, NULL);
+
+ return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 8 * 4];
+ sp_point_256 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ int err;
+
+ err = sp_256_point_new_8(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 4, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 8, pX);
+ sp_256_from_mp(p->y, 8, pY);
+ sp_256_from_mp(p->z, 8, pZ);
+
+ sp_256_map_8(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, pX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, pY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, pZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_8(p, 0, NULL);
+
+ return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_256_mont_sqrt_8(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit t1d[2 * 8];
+ sp_digit t2d[2 * 8];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 8;
+ t2 = d + 2 * 8;
+#else
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ {
+ /* t2 = y ^ 0x2 */
+ sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0x3 */
+ sp_256_mont_mul_8(t1, t2, y, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xc */
+ sp_256_mont_sqr_n_8(t2, t1, 2, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xf */
+ sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xf0 */
+ sp_256_mont_sqr_n_8(t2, t1, 4, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xff */
+ sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xff00 */
+ sp_256_mont_sqr_n_8(t2, t1, 8, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffff */
+ sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xffff0000 */
+ sp_256_mont_sqr_n_8(t2, t1, 16, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff */
+ sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000000 */
+ sp_256_mont_sqr_n_8(t1, t1, 32, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001 */
+ sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
+ sp_256_mont_sqr_n_8(t1, t1, 96, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
+ sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_n_8(y, t1, 94, p256_mod, p256_mp_mod);
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm X ordinate.
+ * odd Whether the Y ordinate is odd.
+ * ym Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit xd[2 * 8];
+ sp_digit yd[2 * 8];
+#endif
+ sp_digit* x = NULL;
+ sp_digit* y = NULL;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ x = d + 0 * 8;
+ y = d + 2 * 8;
+#else
+ x = xd;
+ y = yd;
+#endif
+
+ sp_256_from_mp(x, 8, xm);
+ err = sp_256_mod_mul_norm_8(x, x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ /* y = x^3 */
+ {
+ sp_256_mont_sqr_8(y, x, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_8(y, y, x, p256_mod, p256_mp_mod);
+ }
+ /* y = x^3 - 3x */
+ sp_256_mont_sub_8(y, y, x, p256_mod);
+ sp_256_mont_sub_8(y, y, x, p256_mod);
+ sp_256_mont_sub_8(y, y, x, p256_mod);
+ /* y = x^3 - 3x + b */
+ err = sp_256_mod_mul_norm_8(x, p256_b, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ sp_256_mont_add_8(y, y, x, p256_mod);
+ /* y = sqrt(x^3 - 3x + b) */
+ err = sp_256_mont_sqrt_8(y);
+ }
+ if (err == MP_OKAY) {
+ XMEMSET(y + 8, 0, 8U * sizeof(sp_digit));
+ sp_256_mont_reduce_8(y, p256_mod, p256_mp_mod);
+ if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+ sp_256_mont_sub_8(y, p256_mod, y, p256_mod);
+ }
+
+ err = sp_256_to_mp(y, ym);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+#endif
+#endif /* !WOLFSSL_SP_NO_256 */
+#ifdef WOLFSSL_SP_384
+
+/* Point structure to use. */
+typedef struct sp_point_384 {
+ sp_digit x[2 * 12];
+ sp_digit y[2 * 12];
+ sp_digit z[2 * 12];
+ int infinity;
+} sp_point_384;
+
+/* The modulus (prime) of the curve P384. */
+static const sp_digit p384_mod[12] = {
+ 0xffffffff,0x00000000,0x00000000,0xffffffff,0xfffffffe,0xffffffff,
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
+};
+/* The Montogmery normalizer for modulus of the curve P384. */
+static const sp_digit p384_norm_mod[12] = {
+ 0x00000001,0xffffffff,0xffffffff,0x00000000,0x00000001,0x00000000,
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000
+};
+/* The Montogmery multiplier for modulus of the curve P384. */
+static sp_digit p384_mp_mod = 0x00000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* The order of the curve P384. */
+static const sp_digit p384_order[12] = {
+ 0xccc52973,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81,
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
+};
+#endif
+/* The order of the curve P384 minus 2. */
+static const sp_digit p384_order2[12] = {
+ 0xccc52971,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81,
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P384. */
+static const sp_digit p384_norm_order[12] = {
+ 0x333ad68d,0x1313e695,0xb74f5885,0xa7e5f24d,0x0bc8d220,0x389cb27e,
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P384. */
+static sp_digit p384_mp_order = 0xe88fdc45;
+#endif
+/* The base point of curve P384. */
+static const sp_point_384 p384_base = {
+ /* X ordinate */
+ {
+ 0x72760ab7,0x3a545e38,0xbf55296c,0x5502f25d,0x82542a38,0x59f741e0,
+ 0x8ba79b98,0x6e1d3b62,0xf320ad74,0x8eb1c71e,0xbe8b0537,0xaa87ca22,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Y ordinate */
+ {
+ 0x90ea0e5f,0x7a431d7c,0x1d7e819d,0x0a60b1ce,0xb5f0b8c0,0xe9da3113,
+ 0x289a147c,0xf8f41dbd,0x9292dc29,0x5d9e98bf,0x96262c6f,0x3617de4a,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Z ordinate */
+ {
+ 0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
+ 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* infinity */
+ 0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p384_b[12] = {
+ 0xd3ec2aef,0x2a85c8ed,0x8a2ed19d,0xc656398d,0x5013875a,0x0314088f,
+ 0xfe814112,0x181d9c6e,0xe3f82d19,0x988e056b,0xe23ee7e4,0xb3312fa7
+};
+#endif
+
+static int sp_384_point_new_ex_12(void* heap, sp_point_384* sp, sp_point_384** p)
+{
+ int ret = MP_OKAY;
+ (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ (void)sp;
+ *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC);
+#else
+ *p = sp;
+#endif
+ if (*p == NULL) {
+ ret = MEMORY_E;
+ }
+ return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), &(sp), &(p))
+#endif
+
+
+static void sp_384_point_free_12(sp_point_384* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+ if (p != NULL) {
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+ XFREE(p, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+/* Clear point data if requested. */
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+#endif
+ (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r The resulting Montgomery form number.
+ * a The number to convert.
+ * m The modulus (prime).
+ * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mod_mul_norm_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ int64_t* t;
+#else
+ int64_t t[12];
+#endif
+ int64_t o;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (int64_t*)XMALLOC(sizeof(int64_t) * 12, NULL, DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ /* 1 0 0 0 0 0 0 0 1 1 0 -1 */
+ t[0] = 0 + (uint64_t)a[0] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[11];
+ /* -1 1 0 0 0 0 0 0 -1 0 1 1 */
+ t[1] = 0 - (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[8] + (uint64_t)a[10] + (uint64_t)a[11];
+ /* 0 -1 1 0 0 0 0 0 0 -1 0 1 */
+ t[2] = 0 - (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[9] + (uint64_t)a[11];
+ /* 1 0 -1 1 0 0 0 0 1 1 -1 -1 */
+ t[3] = 0 + (uint64_t)a[0] - (uint64_t)a[2] + (uint64_t)a[3] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[10] - (uint64_t)a[11];
+ /* 1 1 0 -1 1 0 0 0 1 2 1 -2 */
+ t[4] = 0 + (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[3] + (uint64_t)a[4] + (uint64_t)a[8] + 2 * (uint64_t)a[9] + (uint64_t)a[10] - 2 * (uint64_t)a[11];
+ /* 0 1 1 0 -1 1 0 0 0 1 2 1 */
+ t[5] = 0 + (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[4] + (uint64_t)a[5] + (uint64_t)a[9] + 2 * (uint64_t)a[10] + (uint64_t)a[11];
+ /* 0 0 1 1 0 -1 1 0 0 0 1 2 */
+ t[6] = 0 + (uint64_t)a[2] + (uint64_t)a[3] - (uint64_t)a[5] + (uint64_t)a[6] + (uint64_t)a[10] + 2 * (uint64_t)a[11];
+ /* 0 0 0 1 1 0 -1 1 0 0 0 1 */
+ t[7] = 0 + (uint64_t)a[3] + (uint64_t)a[4] - (uint64_t)a[6] + (uint64_t)a[7] + (uint64_t)a[11];
+ /* 0 0 0 0 1 1 0 -1 1 0 0 0 */
+ t[8] = 0 + (uint64_t)a[4] + (uint64_t)a[5] - (uint64_t)a[7] + (uint64_t)a[8];
+ /* 0 0 0 0 0 1 1 0 -1 1 0 0 */
+ t[9] = 0 + (uint64_t)a[5] + (uint64_t)a[6] - (uint64_t)a[8] + (uint64_t)a[9];
+ /* 0 0 0 0 0 0 1 1 0 -1 1 0 */
+ t[10] = 0 + (uint64_t)a[6] + (uint64_t)a[7] - (uint64_t)a[9] + (uint64_t)a[10];
+ /* 0 0 0 0 0 0 0 1 1 0 -1 1 */
+ t[11] = 0 + (uint64_t)a[7] + (uint64_t)a[8] - (uint64_t)a[10] + (uint64_t)a[11];
+
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+ t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+ t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+ t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+ o = t[11] >> 32; t[11] &= 0xffffffff;
+ t[0] += o;
+ t[1] -= o;
+ t[3] += o;
+ t[4] += o;
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+ t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+ t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+ t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+
+ r[0] = t[0];
+ r[1] = t[1];
+ r[2] = t[2];
+ r[3] = t[3];
+ r[4] = t[4];
+ r[5] = t[5];
+ r[6] = t[6];
+ r[7] = t[7];
+ r[8] = t[8];
+ r[9] = t[9];
+ r[10] = t[10];
+ r[11] = t[11];
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL)
+ XFREE(t, NULL, DYNAMIC_TYPE_ECC);
+#endif
+
+ return err;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 32
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 32
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 32U) <= (word32)DIGIT_BIT) {
+ s += 32U;
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 32) {
+ r[j] &= 0xffffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 32 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_384.
+ *
+ * p Point of type sp_point_384 (result).
+ * pm Point of type ecc_point.
+ */
+static void sp_384_point_from_ecc_point_12(sp_point_384* p, const ecc_point* pm)
+{
+ XMEMSET(p->x, 0, sizeof(p->x));
+ XMEMSET(p->y, 0, sizeof(p->y));
+ XMEMSET(p->z, 0, sizeof(p->z));
+ sp_384_from_mp(p->x, 12, pm->x);
+ sp_384_from_mp(p->y, 12, pm->y);
+ sp_384_from_mp(p->z, 12, pm->z);
+ p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_384_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 32
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 12);
+ r->used = 12;
+ mp_clamp(r);
+#elif DIGIT_BIT < 32
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 12; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 32) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 32 - s;
+ }
+ r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 12; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 32 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 32 - s;
+ }
+ else {
+ s += 32;
+ }
+ }
+ r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Convert a point of type sp_point_384 to type ecc_point.
+ *
+ * p Point of type sp_point_384.
+ * pm Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm)
+{
+ int err;
+
+ err = sp_384_to_mp(p->x, pm->x);
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, pm->y);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, pm->z);
+ }
+
+ return err;
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_384_mul_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit tmp[12 * 2];
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r9, r3\n\t"
+ "mov r12, %[r]\n\t"
+ "mov r10, %[a]\n\t"
+ "mov r11, %[b]\n\t"
+ "mov r6, #48\n\t"
+ "add r6, r6, r10\n\t"
+ "mov r14, r6\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r6, #44\n\t"
+ "mov %[a], r9\n\t"
+ "subs %[a], %[a], r6\n\t"
+ "sbc r6, r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], %[a], r6\n\t"
+ "mov %[b], r9\n\t"
+ "sub %[b], %[b], %[a]\n\t"
+ "add %[a], %[a], r10\n\t"
+ "add %[b], %[b], r11\n\t"
+ "\n2:\n\t"
+ /* Multiply Start */
+ "ldr r6, [%[a]]\n\t"
+ "ldr r8, [%[b]]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Multiply Done */
+ "add %[a], %[a], #4\n\t"
+ "sub %[b], %[b], #4\n\t"
+ "cmp %[a], r14\n\t"
+ "beq 3f\n\t"
+ "mov r6, r9\n\t"
+ "add r6, r6, r10\n\t"
+ "cmp %[a], r6\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r12\n\t"
+ "mov r8, r9\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "add r8, r8, #4\n\t"
+ "mov r9, r8\n\t"
+ "mov r6, #88\n\t"
+ "cmp r8, r6\n\t"
+ "ble 1b\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov %[a], r10\n\t"
+ "mov %[b], r11\n\t"
+ :
+ : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ XMEMCPY(r, tmp, sizeof(tmp));
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #48\n\t"
+ "mov r9, r5\n\t"
+ "mov r8, #0\n\t"
+ "\n1:\n\t"
+ "ldr r6, [%[b], r8]\n\t"
+ "and r6, r6, %[m]\n\t"
+ "mov r5, #0\n\t"
+ "subs r5, r5, %[c]\n\t"
+ "ldr r5, [%[a], r8]\n\t"
+ "sbcs r5, r5, r6\n\t"
+ "sbcs %[c], %[c], %[c]\n\t"
+ "str r5, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, r9\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r8", "r9"
+ );
+
+ return c;
+}
+
+#define sp_384_mont_reduce_order_12 sp_384_mont_reduce_12
+
+/* Reduce the number back to 384 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_digit ca = 0;
+
+ __asm__ __volatile__ (
+ "mov r9, %[mp]\n\t"
+ "mov r12, %[m]\n\t"
+ "mov r10, %[a]\n\t"
+ "mov r4, #0\n\t"
+ "add r11, r10, #48\n\t"
+ "\n1:\n\t"
+ /* mu = a[i] * mp */
+ "mov %[mp], r9\n\t"
+ "ldr %[a], [r10]\n\t"
+ "mul %[mp], %[mp], %[a]\n\t"
+ "mov %[m], r12\n\t"
+ "add r14, r10, #40\n\t"
+ "\n2:\n\t"
+ /* a[i+j] += m[j] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r5, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r5, r5, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r4, r4, %[a]\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r4, [r10], #4\n\t"
+ /* a[i+j+1] += m[j+1] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r4, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r4, r4, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r5, r5, %[a]\n\t"
+ "adc r4, r4, #0\n\t"
+ "str r5, [r10], #4\n\t"
+ "cmp r10, r14\n\t"
+ "blt 2b\n\t"
+ /* a[i+10] += m[10] * mu */
+ "ldr %[a], [r10]\n\t"
+ "mov r5, #0\n\t"
+ /* Multiply m[j] and mu - Start */
+ "ldr r8, [%[m]], #4\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds %[a], %[a], r6\n\t"
+ "adc r5, r5, r8\n\t"
+ /* Multiply m[j] and mu - Done */
+ "adds r4, r4, %[a]\n\t"
+ "adc r5, r5, #0\n\t"
+ "str r4, [r10], #4\n\t"
+ /* a[i+11] += m[11] * mu */
+ "mov r4, %[ca]\n\t"
+ "mov %[ca], #0\n\t"
+ /* Multiply m[11] and mu - Start */
+ "ldr r8, [%[m]]\n\t"
+ "umull r6, r8, %[mp], r8\n\t"
+ "adds r5, r5, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ /* Multiply m[11] and mu - Done */
+ "ldr r6, [r10]\n\t"
+ "ldr r8, [r10, #4]\n\t"
+ "adds r6, r6, r5\n\t"
+ "adcs r8, r8, r4\n\t"
+ "adc %[ca], %[ca], #0\n\t"
+ "str r6, [r10]\n\t"
+ "str r8, [r10, #4]\n\t"
+ /* Next word in a */
+ "sub r10, r10, #40\n\t"
+ "cmp r10, r11\n\t"
+ "blt 1b\n\t"
+ "mov %[a], r10\n\t"
+ "mov %[m], r12\n\t"
+ : [ca] "+r" (ca), [a] "+r" (a)
+ : [m] "r" (m), [mp] "r" (mp)
+ : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
+ );
+
+ sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - ca);
+}
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_384_mul_12(r, a, b);
+ sp_384_mont_reduce_12(r, m, mp);
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mov r4, #0\n\t"
+ "mov r5, #0\n\t"
+ "mov r9, r3\n\t"
+ "mov r12, %[r]\n\t"
+ "mov r6, #96\n\t"
+ "neg r6, r6\n\t"
+ "add sp, sp, r6\n\t"
+ "mov r11, sp\n\t"
+ "mov r10, %[a]\n\t"
+ "\n1:\n\t"
+ "mov %[r], #0\n\t"
+ "mov r6, #44\n\t"
+ "mov %[a], r9\n\t"
+ "subs %[a], %[a], r6\n\t"
+ "sbc r6, r6, r6\n\t"
+ "mvn r6, r6\n\t"
+ "and %[a], %[a], r6\n\t"
+ "mov r2, r9\n\t"
+ "sub r2, r2, %[a]\n\t"
+ "add %[a], %[a], r10\n\t"
+ "add r2, r2, r10\n\t"
+ "\n2:\n\t"
+ "cmp r2, %[a]\n\t"
+ "beq 4f\n\t"
+ /* Multiply * 2: Start */
+ "ldr r6, [%[a]]\n\t"
+ "ldr r8, [r2]\n\t"
+ "umull r6, r8, r6, r8\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Multiply * 2: Done */
+ "bal 5f\n\t"
+ "\n4:\n\t"
+ /* Square: Start */
+ "ldr r6, [%[a]]\n\t"
+ "umull r6, r8, r6, r6\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, %[r]\n\t"
+ /* Square: Done */
+ "\n5:\n\t"
+ "add %[a], %[a], #4\n\t"
+ "sub r2, r2, #4\n\t"
+ "mov r6, #48\n\t"
+ "add r6, r6, r10\n\t"
+ "cmp %[a], r6\n\t"
+ "beq 3f\n\t"
+ "cmp %[a], r2\n\t"
+ "bgt 3f\n\t"
+ "mov r8, r9\n\t"
+ "add r8, r8, r10\n\t"
+ "cmp %[a], r8\n\t"
+ "ble 2b\n\t"
+ "\n3:\n\t"
+ "mov %[r], r11\n\t"
+ "mov r8, r9\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "mov r5, #0\n\t"
+ "add r8, r8, #4\n\t"
+ "mov r9, r8\n\t"
+ "mov r6, #88\n\t"
+ "cmp r8, r6\n\t"
+ "ble 1b\n\t"
+ "mov %[a], r10\n\t"
+ "str r3, [%[r], r8]\n\t"
+ "mov %[r], r12\n\t"
+ "mov %[a], r11\n\t"
+ "mov r3, #92\n\t"
+ "\n4:\n\t"
+ "ldr r6, [%[a], r3]\n\t"
+ "str r6, [%[r], r3]\n\t"
+ "subs r3, r3, #4\n\t"
+ "bge 4b\n\t"
+ "mov r6, #96\n\t"
+ "add sp, sp, r6\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
+ );
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_12(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_384_sqr_12(r, a);
+ sp_384_mont_reduce_12(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * n Number of times to square.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_n_12(sp_digit* r, const sp_digit* a, int n,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_384_mont_sqr_12(r, a, m, mp);
+ for (; n > 1; n--) {
+ sp_384_mont_sqr_12(r, r, m, mp);
+ }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P384 curve. */
+static const uint32_t p384_mod_minus_2[12] = {
+ 0xfffffffdU,0x00000000U,0x00000000U,0xffffffffU,0xfffffffeU,0xffffffffU,
+ 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P384 curve. (r = 1 / a mod m)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_384_mont_inv_12(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 12);
+ for (i=382; i>=0; i--) {
+ sp_384_mont_sqr_12(t, t, p384_mod, p384_mp_mod);
+ if (p384_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
+ sp_384_mont_mul_12(t, t, a, p384_mod, p384_mp_mod);
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 12);
+#else
+ sp_digit* t1 = td;
+ sp_digit* t2 = td + 2 * 12;
+ sp_digit* t3 = td + 4 * 12;
+ sp_digit* t4 = td + 6 * 12;
+ sp_digit* t5 = td + 8 * 12;
+
+ /* 0x2 */
+ sp_384_mont_sqr_12(t1, a, p384_mod, p384_mp_mod);
+ /* 0x3 */
+ sp_384_mont_mul_12(t5, t1, a, p384_mod, p384_mp_mod);
+ /* 0xc */
+ sp_384_mont_sqr_n_12(t1, t5, 2, p384_mod, p384_mp_mod);
+ /* 0xf */
+ sp_384_mont_mul_12(t2, t5, t1, p384_mod, p384_mp_mod);
+ /* 0x1e */
+ sp_384_mont_sqr_12(t1, t2, p384_mod, p384_mp_mod);
+ /* 0x1f */
+ sp_384_mont_mul_12(t4, t1, a, p384_mod, p384_mp_mod);
+ /* 0x3e0 */
+ sp_384_mont_sqr_n_12(t1, t4, 5, p384_mod, p384_mp_mod);
+ /* 0x3ff */
+ sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0x7fe0 */
+ sp_384_mont_sqr_n_12(t1, t2, 5, p384_mod, p384_mp_mod);
+ /* 0x7fff */
+ sp_384_mont_mul_12(t4, t4, t1, p384_mod, p384_mp_mod);
+ /* 0x3fff8000 */
+ sp_384_mont_sqr_n_12(t1, t4, 15, p384_mod, p384_mp_mod);
+ /* 0x3fffffff */
+ sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffc */
+ sp_384_mont_sqr_n_12(t3, t2, 2, p384_mod, p384_mp_mod);
+ /* 0xfffffffd */
+ sp_384_mont_mul_12(r, t3, a, p384_mod, p384_mp_mod);
+ /* 0xffffffff */
+ sp_384_mont_mul_12(t3, t5, t3, p384_mod, p384_mp_mod);
+ /* 0xfffffffc0000000 */
+ sp_384_mont_sqr_n_12(t1, t2, 30, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffff */
+ sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffff000000000000000 */
+ sp_384_mont_sqr_n_12(t1, t2, 60, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+ sp_384_mont_sqr_n_12(t1, t2, 120, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+ sp_384_mont_sqr_n_12(t1, t2, 15, p384_mod, p384_mp_mod);
+ /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */
+ sp_384_mont_sqr_n_12(t1, t2, 33, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */
+ sp_384_mont_mul_12(t2, t3, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */
+ sp_384_mont_sqr_n_12(t1, t2, 96, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */
+ sp_384_mont_mul_12(r, r, t1, p384_mod, p384_mp_mod);
+
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+SP_NOINLINE static int32_t sp_384_cmp_12(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+
+
+ __asm__ __volatile__ (
+ "mov r3, #0\n\t"
+ "mvn r3, r3\n\t"
+ "mov r6, #44\n\t"
+ "\n1:\n\t"
+ "ldr r8, [%[a], r6]\n\t"
+ "ldr r5, [%[b], r6]\n\t"
+ "and r8, r8, r3\n\t"
+ "and r5, r5, r3\n\t"
+ "mov r4, r8\n\t"
+ "subs r8, r8, r5\n\t"
+ "sbc r8, r8, r8\n\t"
+ "add %[r], %[r], r8\n\t"
+ "mvn r8, r8\n\t"
+ "and r3, r3, r8\n\t"
+ "subs r5, r5, r4\n\t"
+ "sbc r8, r8, r8\n\t"
+ "sub %[r], %[r], r8\n\t"
+ "mvn r8, r8\n\t"
+ "and r3, r3, r8\n\t"
+ "sub r6, r6, #4\n\t"
+ "cmp r6, #0\n\t"
+ "bge 1b\n\t"
+ : [r] "+r" (r)
+ : [a] "r" (a), [b] "r" (b)
+ : "r3", "r4", "r5", "r6", "r8"
+ );
+
+ return r;
+}
+
+/* Normalize the values in each word to 32.
+ *
+ * a Array of sp_digit to normalize.
+ */
+#define sp_384_norm_12(a)
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r Resulting affine coordinate point.
+ * p Montgomery form projective coordinate point.
+ * t Temporary ordinate data.
+ */
+static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*12;
+ int32_t n;
+
+ sp_384_mont_inv_12(t1, p->z, t + 2*12);
+
+ sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod);
+
+ /* x /= z^2 */
+ sp_384_mont_mul_12(r->x, p->x, t2, p384_mod, p384_mp_mod);
+ XMEMSET(r->x + 12, 0, sizeof(r->x) / 2U);
+ sp_384_mont_reduce_12(r->x, p384_mod, p384_mp_mod);
+ /* Reduce x to less than modulus */
+ n = sp_384_cmp_12(r->x, p384_mod);
+ sp_384_cond_sub_12(r->x, r->x, p384_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_12(r->x);
+
+ /* y /= z^3 */
+ sp_384_mont_mul_12(r->y, p->y, t1, p384_mod, p384_mp_mod);
+ XMEMSET(r->y + 12, 0, sizeof(r->y) / 2U);
+ sp_384_mont_reduce_12(r->y, p384_mod, p384_mp_mod);
+ /* Reduce y to less than modulus */
+ n = sp_384_cmp_12(r->y, p384_mod);
+ sp_384_cond_sub_12(r->y, r->y, p384_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_12(r->y);
+
+ XMEMSET(r->z, 0, sizeof(r->z));
+ r->z[0] = 1;
+
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r6, %[a]\n\t"
+ "mov r8, #0\n\t"
+ "add r6, r6, #48\n\t"
+ "sub r8, r8, #1\n\t"
+ "\n1:\n\t"
+ "adds %[c], %[c], r8\n\t"
+ "ldr r4, [%[a]]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "adcs r4, r4, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c], %[c]\n\t"
+ "add %[a], %[a], #4\n\t"
+ "add %[b], %[b], #4\n\t"
+ "add %[r], %[r], #4\n\t"
+ "cmp %[a], r6\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+#else
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adds r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "ldm %[a]!, {r4, r5}\n\t"
+ "ldm %[b]!, {r6, r8}\n\t"
+ "adcs r4, r4, r6\n\t"
+ "adcs r5, r5, r8\n\t"
+ "stm %[r]!, {r4, r5}\n\t"
+ "mov %[c], #0\n\t"
+ "adc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r Result of addition.
+ * a First number to add in Montogmery form.
+ * b Second number to add in Montogmery form.
+ * m Modulus (prime).
+ */
+SP_NOINLINE static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_add_12(r, a, b);
+ sp_384_cond_sub_12(r, r, m, 0 - o);
+}
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r Result of doubling.
+ * a Number to double in Montogmery form.
+ * m Modulus (prime).
+ */
+SP_NOINLINE static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_add_12(r, a, a);
+ sp_384_cond_sub_12(r, r, m, 0 - o);
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r Result of Tripling.
+ * a Number to triple in Montogmery form.
+ * m Modulus (prime).
+ */
+SP_NOINLINE static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_add_12(r, a, a);
+ sp_384_cond_sub_12(r, r, m, 0 - o);
+ o = sp_384_add_12(r, r, a);
+ sp_384_cond_sub_12(r, r, m, 0 - o);
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r6, %[a]\n\t"
+ "add r6, r6, #48\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "subs r5, r5, %[c]\n\t"
+ "ldr r4, [%[a]]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "sbcs r4, r4, r5\n\t"
+ "str r4, [%[r]]\n\t"
+ "sbc %[c], %[c], %[c]\n\t"
+ "add %[a], %[a], #4\n\t"
+ "add %[b], %[b], #4\n\t"
+ "add %[r], %[r], #4\n\t"
+ "cmp %[a], r6\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldr r4, [%[a], #0]\n\t"
+ "ldr r5, [%[a], #4]\n\t"
+ "ldr r6, [%[b], #0]\n\t"
+ "ldr r8, [%[b], #4]\n\t"
+ "subs r4, r4, r6\n\t"
+ "sbcs r5, r5, r8\n\t"
+ "str r4, [%[r], #0]\n\t"
+ "str r5, [%[r], #4]\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "ldr r5, [%[a], #12]\n\t"
+ "ldr r6, [%[b], #8]\n\t"
+ "ldr r8, [%[b], #12]\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "sbcs r5, r5, r8\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "str r5, [%[r], #12]\n\t"
+ "ldr r4, [%[a], #16]\n\t"
+ "ldr r5, [%[a], #20]\n\t"
+ "ldr r6, [%[b], #16]\n\t"
+ "ldr r8, [%[b], #20]\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "sbcs r5, r5, r8\n\t"
+ "str r4, [%[r], #16]\n\t"
+ "str r5, [%[r], #20]\n\t"
+ "ldr r4, [%[a], #24]\n\t"
+ "ldr r5, [%[a], #28]\n\t"
+ "ldr r6, [%[b], #24]\n\t"
+ "ldr r8, [%[b], #28]\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "sbcs r5, r5, r8\n\t"
+ "str r4, [%[r], #24]\n\t"
+ "str r5, [%[r], #28]\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "ldr r5, [%[a], #36]\n\t"
+ "ldr r6, [%[b], #32]\n\t"
+ "ldr r8, [%[b], #36]\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "sbcs r5, r5, r8\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "str r5, [%[r], #36]\n\t"
+ "ldr r4, [%[a], #40]\n\t"
+ "ldr r5, [%[a], #44]\n\t"
+ "ldr r6, [%[b], #40]\n\t"
+ "ldr r8, [%[b], #44]\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "sbcs r5, r5, r8\n\t"
+ "str r4, [%[r], #40]\n\t"
+ "str r5, [%[r], #44]\n\t"
+ "sbc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+SP_NOINLINE static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ sp_digit m)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "mov r5, #48\n\t"
+ "mov r9, r5\n\t"
+ "mov r8, #0\n\t"
+ "\n1:\n\t"
+ "ldr r6, [%[b], r8]\n\t"
+ "and r6, r6, %[m]\n\t"
+ "adds r5, %[c], #-1\n\t"
+ "ldr r5, [%[a], r8]\n\t"
+ "adcs r5, r5, r6\n\t"
+ "mov %[c], #0\n\t"
+ "adcs %[c], %[c], %[c]\n\t"
+ "str r5, [%[r], r8]\n\t"
+ "add r8, r8, #4\n\t"
+ "cmp r8, r9\n\t"
+ "blt 1b\n\t"
+ : [c] "+r" (c)
+ : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
+ : "memory", "r5", "r6", "r8", "r9"
+ );
+
+ return c;
+}
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r Result of subtration.
+ * a Number to subtract from in Montogmery form.
+ * b Number to subtract with in Montogmery form.
+ * m Modulus (prime).
+ */
+SP_NOINLINE static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_sub_12(r, a, b);
+ sp_384_cond_add_12(r, r, m, o);
+}
+
+static void sp_384_rshift1_12(sp_digit* r, sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "ldr r2, [%[a]]\n\t"
+ "ldr r3, [%[a], #4]\n\t"
+ "lsr r2, r2, #1\n\t"
+ "lsl r5, r3, #31\n\t"
+ "lsr r3, r3, #1\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r4, [%[a], #8]\n\t"
+ "str r2, [%[r], #0]\n\t"
+ "lsl r5, r4, #31\n\t"
+ "lsr r4, r4, #1\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r2, [%[a], #12]\n\t"
+ "str r3, [%[r], #4]\n\t"
+ "lsl r5, r2, #31\n\t"
+ "lsr r2, r2, #1\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r3, [%[a], #16]\n\t"
+ "str r4, [%[r], #8]\n\t"
+ "lsl r5, r3, #31\n\t"
+ "lsr r3, r3, #1\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r4, [%[a], #20]\n\t"
+ "str r2, [%[r], #12]\n\t"
+ "lsl r5, r4, #31\n\t"
+ "lsr r4, r4, #1\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r2, [%[a], #24]\n\t"
+ "str r3, [%[r], #16]\n\t"
+ "lsl r5, r2, #31\n\t"
+ "lsr r2, r2, #1\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r3, [%[a], #28]\n\t"
+ "str r4, [%[r], #20]\n\t"
+ "lsl r5, r3, #31\n\t"
+ "lsr r3, r3, #1\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r4, [%[a], #32]\n\t"
+ "str r2, [%[r], #24]\n\t"
+ "lsl r5, r4, #31\n\t"
+ "lsr r4, r4, #1\n\t"
+ "orr r3, r3, r5\n\t"
+ "ldr r2, [%[a], #36]\n\t"
+ "str r3, [%[r], #28]\n\t"
+ "lsl r5, r2, #31\n\t"
+ "lsr r2, r2, #1\n\t"
+ "orr r4, r4, r5\n\t"
+ "ldr r3, [%[a], #40]\n\t"
+ "str r4, [%[r], #32]\n\t"
+ "lsl r5, r3, #31\n\t"
+ "lsr r3, r3, #1\n\t"
+ "orr r2, r2, r5\n\t"
+ "ldr r4, [%[a], #44]\n\t"
+ "str r2, [%[r], #36]\n\t"
+ "lsl r5, r4, #31\n\t"
+ "lsr r4, r4, #1\n\t"
+ "orr r3, r3, r5\n\t"
+ "str r3, [%[r], #40]\n\t"
+ "str r4, [%[r], #44]\n\t"
+ :
+ : [r] "r" (r), [a] "r" (a)
+ : "memory", "r2", "r3", "r4", "r5"
+ );
+}
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r Result of division by 2.
+ * a Number to divide.
+ * m Modulus (prime).
+ */
+SP_NOINLINE static void sp_384_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_cond_add_12(r, a, m, 0 - (a[0] & 1));
+ sp_384_rshift1_12(r, r);
+ r[11] |= o << 31;
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r Result of doubling point.
+ * p Point to double.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*12;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = r->x;
+ y = r->y;
+ z = r->z;
+ /* Put infinity into result. */
+ if (r != p) {
+ r->infinity = p->infinity;
+ }
+
+ /* T1 = Z * Z */
+ sp_384_mont_sqr_12(t1, p->z, p384_mod, p384_mp_mod);
+ /* Z = Y * Z */
+ sp_384_mont_mul_12(z, p->y, p->z, p384_mod, p384_mp_mod);
+ /* Z = 2Z */
+ sp_384_mont_dbl_12(z, z, p384_mod);
+ /* T2 = X - T1 */
+ sp_384_mont_sub_12(t2, p->x, t1, p384_mod);
+ /* T1 = X + T1 */
+ sp_384_mont_add_12(t1, p->x, t1, p384_mod);
+ /* T2 = T1 * T2 */
+ sp_384_mont_mul_12(t2, t1, t2, p384_mod, p384_mp_mod);
+ /* T1 = 3T2 */
+ sp_384_mont_tpl_12(t1, t2, p384_mod);
+ /* Y = 2Y */
+ sp_384_mont_dbl_12(y, p->y, p384_mod);
+ /* Y = Y * Y */
+ sp_384_mont_sqr_12(y, y, p384_mod, p384_mp_mod);
+ /* T2 = Y * Y */
+ sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod);
+ /* T2 = T2/2 */
+ sp_384_div2_12(t2, t2, p384_mod);
+ /* Y = Y * X */
+ sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod);
+ /* X = T1 * T1 */
+ sp_384_mont_sqr_12(x, t1, p384_mod, p384_mp_mod);
+ /* X = X - Y */
+ sp_384_mont_sub_12(x, x, y, p384_mod);
+ /* X = X - Y */
+ sp_384_mont_sub_12(x, x, y, p384_mod);
+ /* Y = Y - X */
+ sp_384_mont_sub_12(y, y, x, p384_mod);
+ /* Y = Y * T1 */
+ sp_384_mont_mul_12(y, y, t1, p384_mod, p384_mp_mod);
+ /* Y = Y - T2 */
+ sp_384_mont_sub_12(y, y, t2, p384_mod);
+}
+
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a First number to compare.
+ * b Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_384_cmp_equal_12(const sp_digit* a, const sp_digit* b)
+{
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+ (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) |
+ (a[8] ^ b[8]) | (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_12(sp_point_384* r, const sp_point_384* p, const sp_point_384* q,
+ sp_digit* t)
+{
+ const sp_point_384* ap[2];
+ sp_point_384* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*12;
+ sp_digit* t3 = t + 4*12;
+ sp_digit* t4 = t + 6*12;
+ sp_digit* t5 = t + 8*12;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Ensure only the first point is the same as the result. */
+ if (q == r) {
+ const sp_point_384* a = p;
+ p = q;
+ q = a;
+ }
+
+ /* Check double */
+ (void)sp_384_sub_12(t1, p384_mod, q->y);
+ sp_384_norm_12(t1);
+ if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) &
+ (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) {
+ sp_384_proj_point_dbl_12(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_384));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<12; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<12; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<12; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U1 = X1*Z2^2 */
+ sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t1, t1, x, p384_mod, p384_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_384_mont_mul_12(t3, t3, y, p384_mod, p384_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod);
+ /* H = U2 - U1 */
+ sp_384_mont_sub_12(t2, t2, t1, p384_mod);
+ /* R = S2 - S1 */
+ sp_384_mont_sub_12(t4, t4, t3, p384_mod);
+ /* Z3 = H*Z1*Z2 */
+ sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_12(x, x, t5, p384_mod);
+ sp_384_mont_dbl_12(t1, y, p384_mod);
+ sp_384_mont_sub_12(x, x, t1, p384_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ sp_384_mont_sub_12(y, y, x, p384_mod);
+ sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_12(y, y, t5, p384_mod);
+ }
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_fast_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+ int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 td[16];
+ sp_point_384 rtd;
+ sp_digit tmpd[2 * 12 * 6];
+#endif
+ sp_point_384* t;
+ sp_point_384* rt;
+ sp_digit* tmp;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_12(heap, rtd, rt);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 16, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ t = td;
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ (void)sp_384_mod_mul_norm_12(t[1].x, g->x, p384_mod);
+ (void)sp_384_mod_mul_norm_12(t[1].y, g->y, p384_mod);
+ (void)sp_384_mod_mul_norm_12(t[1].z, g->z, p384_mod);
+ t[1].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[ 2], &t[ 1], tmp);
+ t[ 2].infinity = 0;
+ sp_384_proj_point_add_12(&t[ 3], &t[ 2], &t[ 1], tmp);
+ t[ 3].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[ 4], &t[ 2], tmp);
+ t[ 4].infinity = 0;
+ sp_384_proj_point_add_12(&t[ 5], &t[ 3], &t[ 2], tmp);
+ t[ 5].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[ 6], &t[ 3], tmp);
+ t[ 6].infinity = 0;
+ sp_384_proj_point_add_12(&t[ 7], &t[ 4], &t[ 3], tmp);
+ t[ 7].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[ 8], &t[ 4], tmp);
+ t[ 8].infinity = 0;
+ sp_384_proj_point_add_12(&t[ 9], &t[ 5], &t[ 4], tmp);
+ t[ 9].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[10], &t[ 5], tmp);
+ t[10].infinity = 0;
+ sp_384_proj_point_add_12(&t[11], &t[ 6], &t[ 5], tmp);
+ t[11].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[12], &t[ 6], tmp);
+ t[12].infinity = 0;
+ sp_384_proj_point_add_12(&t[13], &t[ 7], &t[ 6], tmp);
+ t[13].infinity = 0;
+ sp_384_proj_point_dbl_12(&t[14], &t[ 7], tmp);
+ t[14].infinity = 0;
+ sp_384_proj_point_add_12(&t[15], &t[ 8], &t[ 7], tmp);
+ t[15].infinity = 0;
+
+ i = 10;
+ n = k[i+1] << 0;
+ c = 28;
+ y = n >> 28;
+ XMEMCPY(rt, &t[y], sizeof(sp_point_384));
+ n <<= 4;
+ for (; i>=0 || c>=4; ) {
+ if (c < 4) {
+ n |= k[i--];
+ c += 32;
+ }
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+
+ sp_384_proj_point_dbl_12(rt, rt, tmp);
+ sp_384_proj_point_dbl_12(rt, rt, tmp);
+ sp_384_proj_point_dbl_12(rt, rt, tmp);
+ sp_384_proj_point_dbl_12(rt, rt, tmp);
+
+ sp_384_proj_point_add_12(rt, rt, &t[y], tmp);
+ }
+
+ if (map != 0) {
+ sp_384_map_12(r, rt, tmp);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 12 * 6);
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+ }
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_point_384) * 16);
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmpd, sizeof(tmpd));
+ ForceZero(td, sizeof(td));
+#endif
+ sp_384_point_free_12(rt, 1, heap);
+
+ return err;
+}
+
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_384 {
+ sp_digit x[12];
+ sp_digit y[12];
+} sp_table_entry_384;
+
+#ifdef FP_ECC
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int n, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*12;
+ sp_digit* b = t + 4*12;
+ sp_digit* t1 = t + 6*12;
+ sp_digit* t2 = t + 8*12;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = p->x;
+ y = p->y;
+ z = p->z;
+
+ /* Y = 2*Y */
+ sp_384_mont_dbl_12(y, y, p384_mod);
+ /* W = Z^4 */
+ sp_384_mont_sqr_12(w, z, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_12(w, w, p384_mod, p384_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+ while (--n > 0)
+#else
+ while (--n >= 0)
+#endif
+ {
+ /* A = 3*(X^2 - W) */
+ sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_12(t1, t1, w, p384_mod);
+ sp_384_mont_tpl_12(a, t1, p384_mod);
+ /* B = X*Y^2 */
+ sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod);
+ /* X = A^2 - 2B */
+ sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_12(t2, b, p384_mod);
+ sp_384_mont_sub_12(x, x, t2, p384_mod);
+ /* Z = Z*Y */
+ sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod);
+ /* t2 = Y^4 */
+ sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+ if (n != 0)
+#endif
+ {
+ /* W = W*Y^4 */
+ sp_384_mont_mul_12(w, w, t1, p384_mod, p384_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_384_mont_sub_12(y, b, x, p384_mod);
+ sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_12(y, y, p384_mod);
+ sp_384_mont_sub_12(y, y, t1, p384_mod);
+ }
+#ifndef WOLFSSL_SP_SMALL
+ /* A = 3*(X^2 - W) */
+ sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_12(t1, t1, w, p384_mod);
+ sp_384_mont_tpl_12(a, t1, p384_mod);
+ /* B = X*Y^2 */
+ sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod);
+ /* X = A^2 - 2B */
+ sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_12(t2, b, p384_mod);
+ sp_384_mont_sub_12(x, x, t2, p384_mod);
+ /* Z = Z*Y */
+ sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod);
+ /* t2 = Y^4 */
+ sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod);
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_384_mont_sub_12(y, b, x, p384_mod);
+ sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_12(y, y, p384_mod);
+ sp_384_mont_sub_12(y, y, t1, p384_mod);
+#endif
+ /* Y = Y/2 */
+ sp_384_div2_12(y, y, p384_mod);
+}
+
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a Point to convert.
+ * t Temporary data.
+ */
+static void sp_384_proj_to_affine_12(sp_point_384* a, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2 * 12;
+ sp_digit* tmp = t + 4 * 12;
+
+ sp_384_mont_inv_12(t1, a->z, tmp);
+
+ sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod);
+
+ sp_384_mont_mul_12(a->x, a->x, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(a->y, a->y, t1, p384_mod, p384_mp_mod);
+ XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod));
+}
+
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p,
+ const sp_point_384* q, sp_digit* t)
+{
+ const sp_point_384* ap[2];
+ sp_point_384* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*12;
+ sp_digit* t3 = t + 4*12;
+ sp_digit* t4 = t + 6*12;
+ sp_digit* t5 = t + 8*12;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Check double */
+ (void)sp_384_sub_12(t1, p384_mod, q->y);
+ sp_384_norm_12(t1);
+ if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) &
+ (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) {
+ sp_384_proj_point_dbl_12(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_384));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<12; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<12; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<12; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U2 = X2*Z1^2 */
+ sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod);
+ /* H = U2 - X1 */
+ sp_384_mont_sub_12(t2, t2, x, p384_mod);
+ /* R = S2 - Y1 */
+ sp_384_mont_sub_12(t4, t4, y, p384_mod);
+ /* Z3 = H*Z1 */
+ sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod);
+ /* X3 = R^2 - H^3 - 2*X1*H^2 */
+ sp_384_mont_sqr_12(t1, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t3, x, t5, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_12(x, t1, t5, p384_mod);
+ sp_384_mont_dbl_12(t1, t3, p384_mod);
+ sp_384_mont_sub_12(x, x, t1, p384_mod);
+ /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+ sp_384_mont_sub_12(t3, t3, x, p384_mod);
+ sp_384_mont_mul_12(t3, t3, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(t5, t5, y, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_12(y, t3, t5, p384_mod);
+ }
+}
+
+#ifdef WOLFSSL_SP_SMALL
+#ifdef FP_ECC
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_384_gen_stripe_table_12(const sp_point_384* a,
+ sp_table_entry_384* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 td, s1d, s2d;
+#endif
+ sp_point_384* t;
+ sp_point_384* s1 = NULL;
+ sp_point_384* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_12(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_384_proj_to_affine_12(t, tmp);
+
+ XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<4; i++) {
+ sp_384_proj_point_dbl_n_12(t, 96, tmp);
+ sp_384_proj_to_affine_12(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<4; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_384_proj_point_add_qz1_12(t, s1, s2, tmp);
+ sp_384_proj_to_affine_12(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_384_point_free_12(s2, 0, heap);
+ sp_384_point_free_12(s1, 0, heap);
+ sp_384_point_free_12( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g,
+ const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 rtd;
+ sp_point_384 pd;
+ sp_digit td[2 * 12 * 6];
+#endif
+ sp_point_384* rt;
+ sp_point_384* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_384_point_new_12(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
+ XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
+
+ y = 0;
+ for (j=0,x=95; j<4; j++,x+=96) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=94; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<4; j++,x+=96) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+
+ sp_384_proj_point_dbl_12(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_384_proj_point_add_qz1_12(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_384_map_12(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(p, 0, heap);
+ sp_384_point_free_12(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_384_t {
+ sp_digit x[12];
+ sp_digit y[12];
+ sp_table_entry_384 table[16];
+ uint32_t cnt;
+ int set;
+} sp_cache_384_t;
+
+static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_384_last = -1;
+static THREAD_LS_T int sp_cache_384_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex_384 = 0;
+ static wolfSSL_Mutex sp_cache_384_lock;
+#endif
+
+static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_384_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache_384[i].set = 0;
+ }
+ sp_cache_384_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache_384[i].set)
+ continue;
+
+ if (sp_384_cmp_equal_12(g->x, sp_cache_384[i].x) &
+ sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) {
+ sp_cache_384[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_384_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache_384[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_384_last) {
+ least = sp_cache_384[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache_384[j].cnt < least) {
+ i = j;
+ least = sp_cache_384[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
+ XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
+ sp_cache_384[i].set = 1;
+ sp_cache_384[i].cnt = 1;
+ }
+
+ *cache = &sp_cache_384[i];
+ sp_cache_384_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 12 * 7];
+ sp_cache_384_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_384 == 0) {
+ wc_InitMutex(&sp_cache_384_lock);
+ initCacheMutex_384 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_384_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_384(g, &cache);
+ if (cache->cnt == 2)
+ sp_384_gen_stripe_table_12(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_384_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+ }
+ else {
+ err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#else
+#ifdef FP_ECC
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_384_gen_stripe_table_12(const sp_point_384* a,
+ sp_table_entry_384* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 td, s1d, s2d;
+#endif
+ sp_point_384* t;
+ sp_point_384* s1 = NULL;
+ sp_point_384* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_12(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_384_proj_to_affine_12(t, tmp);
+
+ XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<8; i++) {
+ sp_384_proj_point_dbl_n_12(t, 48, tmp);
+ sp_384_proj_to_affine_12(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<8; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_384_proj_point_add_qz1_12(t, s1, s2, tmp);
+ sp_384_proj_to_affine_12(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_384_point_free_12(s2, 0, heap);
+ sp_384_point_free_12(s1, 0, heap);
+ sp_384_point_free_12( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g,
+ const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 rtd;
+ sp_point_384 pd;
+ sp_digit td[2 * 12 * 6];
+#endif
+ sp_point_384* rt;
+ sp_point_384* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_384_point_new_12(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
+ XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
+
+ y = 0;
+ for (j=0,x=47; j<8; j++,x+=48) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=46; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<8; j++,x+=48) {
+ y |= ((k[x / 32] >> (x % 32)) & 1) << j;
+ }
+
+ sp_384_proj_point_dbl_12(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_384_proj_point_add_qz1_12(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_384_map_12(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(p, 0, heap);
+ sp_384_point_free_12(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_384_t {
+ sp_digit x[12];
+ sp_digit y[12];
+ sp_table_entry_384 table[256];
+ uint32_t cnt;
+ int set;
+} sp_cache_384_t;
+
+static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_384_last = -1;
+static THREAD_LS_T int sp_cache_384_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex_384 = 0;
+ static wolfSSL_Mutex sp_cache_384_lock;
+#endif
+
+static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_384_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache_384[i].set = 0;
+ }
+ sp_cache_384_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache_384[i].set)
+ continue;
+
+ if (sp_384_cmp_equal_12(g->x, sp_cache_384[i].x) &
+ sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) {
+ sp_cache_384[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_384_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache_384[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_384_last) {
+ least = sp_cache_384[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache_384[j].cnt < least) {
+ i = j;
+ least = sp_cache_384[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
+ XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
+ sp_cache_384[i].set = 1;
+ sp_cache_384[i].cnt = 1;
+ }
+
+ *cache = &sp_cache_384[i];
+ sp_cache_384_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 12 * 7];
+ sp_cache_384_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_384 == 0) {
+ wc_InitMutex(&sp_cache_384_lock);
+ initCacheMutex_384 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_384_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_384(g, &cache);
+ if (cache->cnt == 2)
+ sp_384_gen_stripe_table_12(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_384_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
+ }
+ else {
+ err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * p Point to multiply.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+ void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[12];
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_384_point_new_12(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 12, km);
+ sp_384_point_from_ecc_point_12(point, gm);
+
+ err = sp_384_ecc_mulmod_12(point, point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_12(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(point, 0, heap);
+
+ return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+static const sp_table_entry_384 p384_table[16] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc,
+ 0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 },
+ { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756,
+ 0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } },
+ /* 2 */
+ { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3,
+ 0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 },
+ { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc,
+ 0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } },
+ /* 3 */
+ { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480,
+ 0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 },
+ { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047,
+ 0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } },
+ /* 4 */
+ { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c,
+ 0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 },
+ { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc,
+ 0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } },
+ /* 5 */
+ { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98,
+ 0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c },
+ { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28,
+ 0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } },
+ /* 6 */
+ { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e,
+ 0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 },
+ { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec,
+ 0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } },
+ /* 7 */
+ { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b,
+ 0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b },
+ { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b,
+ 0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } },
+ /* 8 */
+ { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9,
+ 0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 },
+ { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1,
+ 0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } },
+ /* 9 */
+ { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc,
+ 0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a },
+ { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18,
+ 0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } },
+ /* 10 */
+ { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247,
+ 0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 },
+ { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d,
+ 0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } },
+ /* 11 */
+ { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12,
+ 0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e },
+ { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f,
+ 0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } },
+ /* 12 */
+ { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe,
+ 0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 },
+ { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6,
+ 0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } },
+ /* 13 */
+ { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6,
+ 0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 },
+ { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf,
+ 0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } },
+ /* 14 */
+ { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53,
+ 0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 },
+ { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370,
+ 0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } },
+ /* 15 */
+ { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f,
+ 0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc },
+ { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2,
+ 0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } },
+};
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table,
+ k, map, heap);
+}
+
+#else
+static const sp_table_entry_384 p384_table[256] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc,
+ 0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 },
+ { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756,
+ 0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } },
+ /* 2 */
+ { { 0x2b0c535b,0x29864753,0x70506296,0x90dd6953,0x216ab9ac,0x038cd6b4,
+ 0xbe12d76a,0x3df9b7b7,0x5f347bdb,0x13f4d978,0x13e94489,0x222c5c9c },
+ { 0x2680dc64,0x5f8e796f,0x58352417,0x120e7cb7,0xd10740b8,0x254b5d8a,
+ 0x5337dee6,0xc38b8efb,0x94f02247,0xf688c2e1,0x6c25bc4c,0x7b5c75f3 } },
+ /* 3 */
+ { { 0x9edffea5,0xe26a3cc3,0x37d7e9fc,0x35bbfd1c,0x9bde3ef6,0xf0e7700d,
+ 0x1a538f5a,0x0380eb47,0x05bf9eb3,0x2e9da8bb,0x1a460c3e,0xdbb93c73 },
+ { 0xf526b605,0x37dba260,0xfd785537,0x95d4978e,0xed72a04a,0x24ed793a,
+ 0x76005b1a,0x26948377,0x9e681f82,0x99f557b9,0xd64954ef,0xae5f9557 } },
+ /* 4 */
+ { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3,
+ 0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 },
+ { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc,
+ 0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } },
+ /* 5 */
+ { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480,
+ 0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 },
+ { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047,
+ 0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } },
+ /* 6 */
+ { { 0x4fc52870,0x28f9c07a,0x1a53a961,0xce0b3748,0x0e1828d9,0xd550fa18,
+ 0x6adb225a,0xa24abaf7,0x6e58a348,0xd11ed0a5,0x948acb62,0xf3d811e6 },
+ { 0x4c61ed22,0x8618dd77,0x80b47c9d,0x0bb747f9,0xde6b8559,0x22bf796f,
+ 0x680a21e9,0xfdfd1c6d,0x2af2c9dd,0xc0db1577,0xc1e90f3d,0xa09379e6 } },
+ /* 7 */
+ { { 0xe085c629,0x386c66ef,0x095bc89a,0x5fc2a461,0x203f4b41,0x1353d631,
+ 0x7e4bd8f5,0x7ca1972b,0xa7df8ce9,0xb077380a,0xee7e4ea3,0xd8a90389 },
+ { 0xe7b14461,0x1bc74dc7,0x0c9c4f78,0xdc2cb014,0x84ef0a10,0x52b4b3a6,
+ 0x20327fe2,0xbde6ea5d,0x660f9615,0xb71ec435,0xb8ad8173,0xeede5a04 } },
+ /* 8 */
+ { { 0x893b9a2d,0x5584cbb3,0x00850c5d,0x820c660b,0x7df2d43d,0x4126d826,
+ 0x0109e801,0xdd5bbbf0,0x38172f1c,0x85b92ee3,0xf31430d9,0x609d4f93 },
+ { 0xeadaf9d6,0x1e059a07,0x0f125fb0,0x70e6536c,0x560f20e7,0xd6220751,
+ 0x7aaf3a9a,0xa59489ae,0x64bae14e,0x7b70e2f6,0x76d08249,0x0dd03701 } },
+ /* 9 */
+ { { 0x8510521f,0x4cc13be8,0xf724cc17,0x87315ba9,0x353dc263,0xb49d83bb,
+ 0x0c279257,0x8b677efe,0xc93c9537,0x510a1c1c,0xa4702c99,0x33e30cd8 },
+ { 0x2208353f,0xf0ffc89d,0xced42b2b,0x0170fa8d,0x26e2a5f5,0x090851ed,
+ 0xecb52c96,0x81276455,0x7fe1adf4,0x0646c4e1,0xb0868eab,0x513f047e } },
+ /* 10 */
+ { { 0xdf5bdf53,0xc07611f4,0x58b11a6d,0x45d331a7,0x1c4ee394,0x58965daf,
+ 0x5a5878d1,0xba8bebe7,0x82dd3025,0xaecc0a18,0xa923eb8b,0xcf2a3899 },
+ { 0xd24fd048,0xf98c9281,0x8bbb025d,0x841bfb59,0xc9ab9d53,0xb8ddf8ce,
+ 0x7fef044e,0x538a4cb6,0x23236662,0x092ac21f,0x0b66f065,0xa919d385 } },
+ /* 11 */
+ { { 0x85d480d8,0x3db03b40,0x1b287a7d,0x8cd9f479,0x4a8f3bae,0x8f24dc75,
+ 0x3db41892,0x482eb800,0x9c56e0f5,0x38bf9eb3,0x9a91dc6f,0x8b977320 },
+ { 0x7209cfc2,0xa31b05b2,0x05b2db70,0x4c49bf85,0xd619527b,0x56462498,
+ 0x1fac51ba,0x3fe51039,0xab4b8342,0xfb04f55e,0x04c6eabf,0xc07c10dc } },
+ /* 12 */
+ { { 0xdb32f048,0xad22fe4c,0x475ed6df,0x5f23bf91,0xaa66b6cb,0xa50ce0c0,
+ 0xf03405c0,0xdf627a89,0xf95e2d6a,0x3674837d,0xba42e64e,0x081c95b6 },
+ { 0xe71d6ceb,0xeba3e036,0x6c6b0271,0xb45bcccf,0x0684701d,0x67b47e63,
+ 0xe712523f,0x60f8f942,0x5cd47adc,0x82423472,0x87649cbb,0x83027d79 } },
+ /* 13 */
+ { { 0x3615b0b8,0xb3929ea6,0xa54dac41,0xb41441fd,0xb5b6a368,0x8995d556,
+ 0x167ef05e,0xa80d4529,0x6d25a27f,0xf6bcb4a1,0x7bd55b68,0x210d6a4c },
+ { 0x25351130,0xf3804abb,0x903e37eb,0x1d2df699,0x084c25c8,0x5f201efc,
+ 0xa1c68e91,0x31a28c87,0x563f62a5,0x81dad253,0xd6c415d4,0x5dd6de70 } },
+ /* 14 */
+ { { 0x846612ce,0x29f470fd,0xda18d997,0x986f3eec,0x2f34af86,0x6b84c161,
+ 0x46ddaf8b,0x5ef0a408,0xe49e795f,0x14405a00,0xaa2f7a37,0x5f491b16 },
+ { 0xdb41b38d,0xc7f07ae4,0x18fbfcaa,0xef7d119e,0x14443b19,0x3a18e076,
+ 0x79a19926,0x4356841a,0xe2226fbe,0x91f4a91c,0x3cc88721,0xdc77248c } },
+ /* 15 */
+ { { 0xe4b1ec9d,0xd570ff1a,0xe7eef706,0x21d23e0e,0xca19e086,0x3cde40f4,
+ 0xcd4bb270,0x7d6523c4,0xbf13aa6c,0x16c1f06c,0xd14c4b60,0x5aa7245a },
+ { 0x44b74de8,0x37f81467,0x620a934e,0x839e7a17,0xde8b1aa1,0xf74d14e8,
+ 0xf30d75e2,0x8789fa51,0xc81c261e,0x09b24052,0x33c565ee,0x654e2678 } },
+ /* 16 */
+ { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c,
+ 0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 },
+ { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc,
+ 0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } },
+ /* 17 */
+ { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98,
+ 0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c },
+ { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28,
+ 0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } },
+ /* 18 */
+ { { 0x7d8c1bba,0x7ecbac01,0x90b0f3d5,0x6058f9c3,0xf6197d0f,0xaee116e3,
+ 0x4033b128,0xc4dd7068,0xc209b983,0xf084dba6,0x831dbc4a,0x97c7c2cf },
+ { 0xf96010e8,0x2f4e61dd,0x529faa17,0xd97e4e20,0x69d37f20,0x4ee66660,
+ 0x3d366d72,0xccc139ed,0x13488e0f,0x690b6ee2,0xf3a6d533,0x7cad1dc5 } },
+ /* 19 */
+ { { 0xda57a41f,0x660a9a81,0xec0039b6,0xe74a0412,0x5e1dad15,0x42343c6b,
+ 0x46681d4c,0x284f3ff5,0x63749e89,0xb51087f1,0x6f9f2f13,0x070f23cc },
+ { 0x5d186e14,0x542211da,0xfddb0dff,0x84748f37,0xdb1f4180,0x41a3aab4,
+ 0xa6402d0e,0x25ed667b,0x02f58355,0x2f2924a9,0xfa44a689,0x5844ee7c } },
+ /* 20 */
+ { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e,
+ 0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 },
+ { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec,
+ 0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } },
+ /* 21 */
+ { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b,
+ 0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b },
+ { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b,
+ 0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } },
+ /* 22 */
+ { { 0x104cbba5,0xc023780d,0xfa35dd4c,0x6207e747,0x1ca9b6a3,0x35c23928,
+ 0x97987b10,0x4ff19be8,0x8022eee8,0xb8476bbf,0xd3bbe74d,0xaa0a4a14 },
+ { 0x187d4543,0x20f94331,0x79f6e066,0x32153870,0xac7e82e1,0x83b0f74e,
+ 0x828f06ab,0xa7748ba2,0xc26ef35f,0xc5f0298a,0x8e9a7dbd,0x0f0c5070 } },
+ /* 23 */
+ { { 0xdef029dd,0x0c5c244c,0x850661b8,0x3dabc687,0xfe11d981,0x9992b865,
+ 0x6274dbad,0xe9801b8f,0x098da242,0xe54e6319,0x91a53d08,0x9929a91a },
+ { 0x35285887,0x37bffd72,0xf1418102,0xbc759425,0xfd2e6e20,0x9280cc35,
+ 0xfbc42ee5,0x735c600c,0x8837619a,0xb7ad2864,0xa778c57b,0xa3627231 } },
+ /* 24 */
+ { { 0x91361ed8,0xae799b5c,0x6c63366c,0x47d71b75,0x1b265a6a,0x54cdd521,
+ 0x98d77b74,0xe0215a59,0xbab29db0,0x4424d9b7,0x7fd9e536,0x8b0ffacc },
+ { 0x37b5d9ef,0x46d85d12,0xbfa91747,0x5b106d62,0x5f99ba2d,0xed0479f8,
+ 0x1d104de4,0x0e6f3923,0x25e8983f,0x83a84c84,0xf8105a70,0xa9507e0a } },
+ /* 25 */
+ { { 0x14cf381c,0xf6c68a6e,0xc22e31cc,0xaf9d27bd,0xaa8a5ccb,0x23568d4d,
+ 0xe338e4d2,0xe431eec0,0x8f52ad1f,0xf1a828fe,0xe86acd80,0xdb6a0579 },
+ { 0x4507832a,0x2885672e,0x887e5289,0x73fc275f,0x05610d08,0x65f80278,
+ 0x075ff5b0,0x8d9b4554,0x09f712b5,0x3a8e8fb1,0x2ebe9cf2,0x39f0ac86 } },
+ /* 26 */
+ { { 0x4c52edf5,0xd8fabf78,0xa589ae53,0xdcd737e5,0xd791ab17,0x94918bf0,
+ 0xbcff06c9,0xb5fbd956,0xdca46d45,0xf6d3032e,0x41a3e486,0x2cdff7e1 },
+ { 0x61f47ec8,0x6674b3ba,0xeef84608,0x8a882163,0x4c687f90,0xa257c705,
+ 0xf6cdf227,0xe30cb2ed,0x7f6ea846,0x2c4c64ca,0xcc6bcd3c,0x186fa17c } },
+ /* 27 */
+ { { 0x1dfcb91e,0x48a3f536,0x646d358a,0x83595e13,0x91128798,0xbd15827b,
+ 0x2187757a,0x3ce612b8,0x61bd7372,0x873150a1,0xb662f568,0xf4684530 },
+ { 0x401896f6,0x8833950b,0x77f3e090,0xe11cb89a,0x48e7f4a5,0xb2f12cac,
+ 0xf606677e,0x313dd769,0x16579f93,0xfdcf08b3,0x46b8f22b,0x6429cec9 } },
+ /* 28 */
+ { { 0xbb75f9a4,0x4984dd54,0x29d3b570,0x4aef06b9,0x3d6e4c1e,0xb5f84ca2,
+ 0xb083ef35,0x24c61c11,0x392ca9ff,0xce4a7392,0x6730a800,0x865d6517 },
+ { 0x722b4a2b,0xca3dfe76,0x7b083e0e,0x12c04bf9,0x1b86b8a5,0x803ce5b5,
+ 0x6a7e3e0c,0x3fc7632d,0xc81adbe4,0xc89970c2,0x120e16b1,0x3cbcd3ad } },
+ /* 29 */
+ { { 0xec30ce93,0xfbfb4cc7,0xb72720a2,0x10ed6c7d,0x47b55500,0xec675bf7,
+ 0x333ff7c3,0x90725903,0x5075bfc0,0xc7c3973e,0x07acf31b,0xb049ecb0 },
+ { 0x4f58839c,0xb4076eaf,0xa2b05e4f,0x101896da,0xab40c66e,0x3f6033b0,
+ 0xc8d864ba,0x19ee9eeb,0x47bf6d2a,0xeb6cf155,0xf826477d,0x8e5a9663 } },
+ /* 30 */
+ { { 0xf7fbd5e1,0x69e62fdd,0x76912b1d,0x38ecfe54,0xd1da3bfb,0x845a3d56,
+ 0x1c86f0d4,0x0494950e,0x3bc36ce8,0x83cadbf9,0x4fccc8d1,0x41fce572 },
+ { 0x8332c144,0x05f939c2,0x0871e46e,0xb17f248b,0x66e8aff6,0x3d8534e2,
+ 0x3b85c629,0x1d06f1dc,0xa3131b73,0xdb06a32e,0x8b3f64e5,0xf295184d } },
+ /* 31 */
+ { { 0x36ddc103,0xd9653ff7,0x95ef606f,0x25f43e37,0xfe06dce8,0x09e301fc,
+ 0x30b6eebf,0x85af2341,0x0ff56b20,0x79b12b53,0xfe9a3c6b,0x9b4fb499 },
+ { 0x51d27ac2,0x0154f892,0x56ca5389,0xd33167e3,0xafc065a6,0x7828ec1f,
+ 0x7f746c9b,0x0959a258,0x0c44f837,0xb18f1be3,0xc4132fdb,0xa7946117 } },
+ /* 32 */
+ { { 0x5e3c647b,0xc0426b77,0x8cf05348,0xbfcbd939,0x172c0d3d,0x31d312e3,
+ 0xee754737,0x5f49fde6,0x6da7ee61,0x895530f0,0xe8b3a5fb,0xcf281b0a },
+ { 0x41b8a543,0xfd149735,0x3080dd30,0x41a625a7,0x653908cf,0xe2baae07,
+ 0xba02a278,0xc3d01436,0x7b21b8f8,0xa0d0222e,0xd7ec1297,0xfdc270e9 } },
+ /* 33 */
+ { { 0xbc7f41d6,0x00873c0c,0x1b7ad641,0xd976113e,0x238443fb,0x2a536ff4,
+ 0x41e62e45,0x030d00e2,0x5f545fc6,0x532e9867,0x8e91208c,0xcd033108 },
+ { 0x9797612c,0xd1a04c99,0xeea674e2,0xd4393e02,0xe19742a1,0xd56fa69e,
+ 0x85f0590e,0xdd2ab480,0x48a2243d,0xa5cefc52,0x54383f41,0x48cc67b6 } },
+ /* 34 */
+ { { 0xfc14ab48,0x4e50430e,0x26706a74,0x195b7f4f,0xcc881ff6,0x2fe8a228,
+ 0xd945013d,0xb1b968e2,0x4b92162b,0x936aa579,0x364e754a,0x4fb766b7 },
+ { 0x31e1ff7f,0x13f93bca,0xce4f2691,0x696eb5ca,0xa2b09e02,0xff754bf8,
+ 0xe58e3ff8,0x58f13c9c,0x1678c0b0,0xb757346f,0xa86692b3,0xd54200db } },
+ /* 35 */
+ { { 0x6dda1265,0x9a030bbd,0xe89718dd,0xf7b4f3fc,0x936065b8,0xa6a4931f,
+ 0x5f72241c,0xbce72d87,0x65775857,0x6cbb51cb,0x4e993675,0xc7161815 },
+ { 0x2ee32189,0xe81a0f79,0x277dc0b2,0xef2fab26,0xb71f469f,0x9e64f6fe,
+ 0xdfdaf859,0xb448ce33,0xbe6b5df1,0x3f5c1c4c,0x1de45f7b,0xfb8dfb00 } },
+ /* 36 */
+ { { 0x4d5bb921,0xc7345fa7,0x4d2b667e,0x5c7e04be,0x282d7a3e,0x47ed3a80,
+ 0x7e47b2a4,0x5c2777f8,0x08488e2e,0x89b3b100,0xb2eb5b45,0x9aad77c2 },
+ { 0xdaac34ae,0xd681bca7,0x26afb326,0x2452e4e5,0x41a1ee14,0x0c887924,
+ 0xc2407ade,0x743b04d4,0xfc17a2ac,0xcb5e999b,0x4a701a06,0x4dca2f82 } },
+ /* 37 */
+ { { 0x1127bc1a,0x68e31ca6,0x17ead3be,0xa3edd59b,0xe25f5a15,0x67b6b645,
+ 0xa420e15e,0x76221794,0x4b1e872e,0x794fd83b,0xb2dece1b,0x7cab3f03 },
+ { 0xca9b3586,0x7119bf15,0x4d250bd7,0xa5545924,0xcc6bcf24,0x173633ea,
+ 0xb1b6f884,0x9bd308c2,0x447d38c3,0x3bae06f5,0xf341fe1c,0x54dcc135 } },
+ /* 38 */
+ { { 0x943caf0d,0x56d3598d,0x225ff133,0xce044ea9,0x563fadea,0x9edf6a7c,
+ 0x73e8dc27,0x632eb944,0x3190dcab,0x814b467e,0x6dbb1e31,0x2d4f4f31 },
+ { 0xa143b7ca,0x8d69811c,0xde7cf950,0x4ec1ac32,0x37b5fe82,0x223ab5fd,
+ 0x9390f1d9,0xe82616e4,0x75804610,0xabff4b20,0x875b08f0,0x11b9be15 } },
+ /* 39 */
+ { { 0x3bbe682c,0x4ae31a3d,0x74eef2dd,0xbc7c5d26,0x3c47dd40,0x92afd10a,
+ 0xc14ab9e1,0xec7e0a3b,0xb2e495e4,0x6a6c3dd1,0x309bcd85,0x085ee5e9 },
+ { 0x8c2e67fd,0xf381a908,0xe261eaf2,0x32083a80,0x96deee15,0x0fcd6a49,
+ 0x5e524c79,0xe3b8fb03,0x1d5b08b9,0x8dc360d9,0x7f26719f,0x3a06e2c8 } },
+ /* 40 */
+ { { 0x7237cac0,0x5cd9f5a8,0x43586794,0x93f0b59d,0xe94f6c4e,0x4384a764,
+ 0xb62782d3,0x8304ed2b,0xcde06015,0x0b8db8b3,0x5dbe190f,0x4336dd53 },
+ { 0x92ab473a,0x57443553,0xbe5ed046,0x031c7275,0x21909aa4,0x3e78678c,
+ 0x99202ddb,0x4ab7e04f,0x6977e635,0x2648d206,0x093198be,0xd427d184 } },
+ /* 41 */
+ { { 0x0f9b5a31,0x822848f5,0xbaadb62a,0xbb003468,0x3357559c,0x233a0472,
+ 0x79aee843,0x49ef6880,0xaeb9e1e3,0xa89867a0,0x1f6f9a55,0xc151931b },
+ { 0xad74251e,0xd264eb0b,0x4abf295e,0x37b9b263,0x04960d10,0xb600921b,
+ 0x4da77dc0,0x0de53dbc,0xd2b18697,0x01d9bab3,0xf7156ddf,0xad54ec7a } },
+ /* 42 */
+ { { 0x79efdc58,0x8e74dc35,0x4ff68ddb,0x456bd369,0xd32096a5,0x724e74cc,
+ 0x386783d0,0xe41cff42,0x7c70d8a4,0xa04c7f21,0xe61a19a2,0x41199d2f },
+ { 0x29c05dd2,0xd389a3e0,0xe7e3fda9,0x535f2a6b,0x7c2b4df8,0x26ecf72d,
+ 0xfe745294,0x678275f4,0x9d23f519,0x6319c9cc,0x88048fc4,0x1e05a02d } },
+ /* 43 */
+ { { 0xd4d5ffe8,0x75cc8e2e,0xdbea17f2,0xf8bb4896,0xcee3cb4a,0x35059790,
+ 0xa47c6165,0x4c06ee85,0x92935d2f,0xf98fff25,0x32ffd7c7,0x34c4a572 },
+ { 0xea0376a2,0xc4b14806,0x4f115e02,0x2ea5e750,0x1e55d7c0,0x532d76e2,
+ 0xf31044da,0x68dc9411,0x71b77993,0x9272e465,0x93a8cfd5,0xadaa38bb } },
+ /* 44 */
+ { { 0x7d4ed72a,0x4bf0c712,0xba1f79a3,0xda0e9264,0xf4c39ea4,0x48c0258b,
+ 0x2a715138,0xa5394ed8,0xbf06c660,0x4af511ce,0xec5c37cd,0xfcebceef },
+ { 0x779ae8c1,0xf23b75aa,0xad1e606e,0xdeff59cc,0x22755c82,0xf3f526fd,
+ 0xbb32cefd,0x64c5ab44,0x915bdefd,0xa96e11a2,0x1143813e,0xab19746a } },
+ /* 45 */
+ { { 0xec837d7d,0x43c78585,0xb8ee0ba4,0xca5b6fbc,0xd5dbb5ee,0x34e924d9,
+ 0xbb4f1ca5,0x3f4fa104,0x398640f7,0x15458b72,0xd7f407ea,0x4231faa9 },
+ { 0xf96e6896,0x53e0661e,0xd03b0f9d,0x554e4c69,0x9c7858d1,0xd4fcb07b,
+ 0x52cb04fa,0x7e952793,0x8974e7f7,0x5f5f1574,0x6b6d57c8,0x2e3fa558 } },
+ /* 46 */
+ { { 0x6a9951a8,0x42cd4803,0x42792ad0,0xa8b15b88,0xabb29a73,0x18e8bcf9,
+ 0x409933e8,0xbfd9a092,0xefb88dc4,0x760a3594,0x40724458,0x14418863 },
+ { 0x99caedc7,0x162a56ee,0x91d101c9,0x8fb12ecd,0x393202da,0xea671967,
+ 0xa4ccd796,0x1aac8c4a,0x1cf185a8,0x7db05036,0x8cfd095a,0x0c9f86cd } },
+ /* 47 */
+ { { 0x10b2a556,0x9a728147,0x327b70b2,0x767ca964,0x5e3799b7,0x04ed9e12,
+ 0x22a3eb2a,0x6781d2dc,0x0d9450ac,0x5bd116eb,0xa7ebe08a,0xeccac1fc },
+ { 0xdc2d6e94,0xde68444f,0x35ecf21b,0x3621f429,0x29e03a2c,0x14e2d543,
+ 0x7d3e7f0a,0x53e42cd5,0x73ed00b9,0xbba26c09,0xc57d2272,0x00297c39 } },
+ /* 48 */
+ { { 0xb8243a7d,0x3aaaab10,0x8fa58c5b,0x6eeef93e,0x9ae7f764,0xf866fca3,
+ 0x61ab04d3,0x64105a26,0x03945d66,0xa3578d8a,0x791b848c,0xb08cd3e4 },
+ { 0x756d2411,0x45edc5f8,0xa755128c,0xd4a790d9,0x49e5f6a0,0xc2cf0963,
+ 0xf649beaa,0xc66d267d,0x8467039e,0x3ce6d968,0x42f7816f,0x50046c6b } },
+ /* 49 */
+ { { 0x66425043,0x92ae1602,0xf08db890,0x1ff66afd,0x8f162ce5,0x386f5a7f,
+ 0xfcf5598f,0x18d2dea0,0x1a8ca18e,0x78372b3a,0x8cd0e6f7,0xdf0d20eb },
+ { 0x75bb4045,0x7edd5e1d,0xb96d94b7,0x252a47ce,0x2c626776,0xbdb29358,
+ 0x40dd1031,0x853c3943,0x7d5f47fd,0x9dc9becf,0xbae4044a,0x27c2302f } },
+ /* 50 */
+ { { 0x8f2d49ce,0x2d1d208a,0x162df0a2,0x0d91aa02,0x09a07f65,0x9c5cce87,
+ 0x84339012,0xdf07238b,0x419442cd,0x5028e2c8,0x72062aba,0x2dcbd358 },
+ { 0xe4680967,0xb5fbc3cb,0x9f92d72c,0x2a7bc645,0x116c369d,0x806c76e1,
+ 0x3177e8d8,0x5c50677a,0x4569df57,0x753739eb,0x36c3f40b,0x2d481ef6 } },
+ /* 51 */
+ { { 0xfea1103e,0x1a2d39fd,0x95f81b17,0xeaae5592,0xf59b264a,0xdbd0aa18,
+ 0xcb592ee0,0x90c39c1a,0x9750cca3,0xdf62f80d,0xdf97cc6c,0xda4d8283 },
+ { 0x1e201067,0x0a6dd346,0x69fb1f6b,0x1531f859,0x1d60121f,0x4895e552,
+ 0x4c041c91,0x0b21aab0,0xbcc1ccf8,0x9d896c46,0x3141bde7,0xd24da3b3 } },
+ /* 52 */
+ { { 0x53b0a354,0x575a0537,0x0c6ddcd8,0x392ff2f4,0x56157b94,0x0b8e8cff,
+ 0x3b1b80d1,0x073e57bd,0x3fedee15,0x2a75e0f0,0xaa8e6f19,0x752380e4 },
+ { 0x6558ffe9,0x1f4e227c,0x19ec5415,0x3a348618,0xf7997085,0xab382d5e,
+ 0xddc46ac2,0x5e6deaff,0xfc8d094c,0xe5144078,0xf60e37c6,0xf674fe51 } },
+ /* 53 */
+ { { 0xaf63408f,0x6fb87ae5,0xcd75a737,0xa39c36a9,0xcf4c618d,0x7833313f,
+ 0xf034c88d,0xfbcd4482,0x39b35288,0x4469a761,0x66b5d9c9,0x77a711c5 },
+ { 0x944f8d65,0x4a695dc7,0x161aaba8,0xe6da5f65,0x24601669,0x8654e9c3,
+ 0x28ae7491,0xbc8b93f5,0x8f5580d8,0x5f1d1e83,0xcea32cc8,0x8ccf9a1a } },
+ /* 54 */
+ { { 0x7196fee2,0x28ab110c,0x874c8945,0x75799d63,0x29aedadd,0xa2629348,
+ 0x2be88ff4,0x9714cc7b,0xd58d60d6,0xf71293cf,0x32a564e9,0xda6b6cb3 },
+ { 0x3dd821c2,0xf43fddb1,0x90dd323d,0xf2f2785f,0x048489f8,0x91246419,
+ 0xd24c6749,0x61660f26,0xc803c15c,0x961d9e8c,0xfaadc4c9,0x631c6158 } },
+ /* 55 */
+ { { 0xfd752366,0xacf2ebe0,0x139be88b,0xb93c340e,0x0f20179e,0x98f66485,
+ 0xff1da785,0x14820254,0x4f85c16e,0x5278e276,0x7aab1913,0xa246ee45 },
+ { 0x53763b33,0x43861eb4,0x45c0bc0d,0xc49f03fc,0xad6b1ea1,0xafff16bc,
+ 0x6fd49c99,0xce33908b,0xf7fde8c3,0x5c51e9bf,0xff142c5e,0x076a7a39 } },
+ /* 56 */
+ { { 0x9e338d10,0x04639dfe,0xf42b411b,0x8ee6996f,0xa875cef2,0x960461d1,
+ 0x95b4d0ba,0x1057b6d6,0xa906e0bc,0x27639252,0xe1c20f8a,0x2c19f09a },
+ { 0xeef4c43d,0x5b8fc3f0,0x07a84aa9,0xe2e1b1a8,0x835d2bdb,0x5f455528,
+ 0x207132dd,0x0f4aee4d,0x3907f675,0xe9f8338c,0x0e0531f0,0x7a874dc9 } },
+ /* 57 */
+ { { 0x97c27050,0x84b22d45,0x59e70bf8,0xbd0b8df7,0x79738b9b,0xb4d67405,
+ 0xcd917c4f,0x47f4d5f5,0x13ce6e33,0x9099c4ce,0x521d0f8b,0x942bfd39 },
+ { 0xa43b566d,0x5028f0f6,0x21bff7de,0xaf6e8669,0xc44232cd,0x83f6f856,
+ 0xf915069a,0x65680579,0xecfecb85,0xd12095a2,0xdb01ba16,0xcf7f06ae } },
+ /* 58 */
+ { { 0x8ef96c80,0x0f56e3c4,0x3ddb609c,0xd521f2b3,0x7dc1450d,0x2be94102,
+ 0x02a91fe2,0x2d21a071,0x1efa37de,0x2e6f74fa,0x156c28a1,0x9a9a90b8 },
+ { 0x9dc7dfcb,0xc54ea9ea,0x2c2c1d62,0xc74e66fc,0x49d3e067,0x9f23f967,
+ 0x54dd38ad,0x1c7c3a46,0x5946cee3,0xc7005884,0x45cc045d,0x89856368 } },
+ /* 59 */
+ { { 0xfce73946,0x29da7cd4,0x23168563,0x8f697db5,0xcba92ec6,0x8e235e9c,
+ 0x9f91d3ea,0x55d4655f,0xaa50a6cd,0xf3689f23,0x21e6a1a0,0xdcf21c26 },
+ { 0x61b818bf,0xcffbc82e,0xda47a243,0xc74a2f96,0x8bc1a0cf,0x234e980a,
+ 0x7929cb6d,0xf35fd6b5,0xefe17d6c,0x81468e12,0x58b2dafb,0xddea6ae5 } },
+ /* 60 */
+ { { 0x7e787b2e,0x294de887,0x39a9310d,0x258acc1f,0xac14265d,0x92d9714a,
+ 0x708b48a0,0x18b5591c,0xe1abbf71,0x27cc6bb0,0x568307b9,0xc0581fa3 },
+ { 0xf24d4d58,0x9e0f58a3,0xe0ce2327,0xfebe9bb8,0x9d1be702,0x91fd6a41,
+ 0xfacac993,0x9a7d8a45,0x9e50d66d,0xabc0a08c,0x06498201,0x02c342f7 } },
+ /* 61 */
+ { { 0x157bdbc2,0xccd71407,0xad0e1605,0x72fa89c6,0xb92a015f,0xb1d3da2b,
+ 0xa0a3fe56,0x8ad9e7cd,0x24f06737,0x160edcbd,0x61275be6,0x79d4db33 },
+ { 0x5f3497c4,0xd3d31fd9,0x04192fb0,0x8cafeaee,0x13a50af3,0xe13ca745,
+ 0x8c85aae5,0x18826167,0x9eb556ff,0xce06cea8,0xbdb549f3,0x2eef1995 } },
+ /* 62 */
+ { { 0x50596edc,0x8ed7d3eb,0x905243a2,0xaa359362,0xa4b6d02b,0xa212c2c2,
+ 0xc4fbec68,0x611fd727,0xb84f733d,0x8a0b8ff7,0x5f0daf0e,0xd85a6b90 },
+ { 0xd4091cf7,0x60e899f5,0x2eff2768,0x4fef2b67,0x10c33964,0xc1f195cb,
+ 0x93626a8f,0x8275d369,0x0d6c840a,0xc77904f4,0x7a868acd,0x88d8b7fd } },
+ /* 63 */
+ { { 0x7bd98425,0x85f23723,0xc70b154e,0xd4463992,0x96687a2e,0xcbb00ee2,
+ 0xc83214fd,0x905fdbf7,0x13593684,0x2019d293,0xef51218e,0x0428c393 },
+ { 0x981e909a,0x40c7623f,0x7be192da,0x92513385,0x4010907e,0x48fe480f,
+ 0x3120b459,0xdd7a187c,0xa1fd8f3c,0xc9d7702d,0xe358efc5,0x66e4753b } },
+ /* 64 */
+ { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9,
+ 0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 },
+ { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1,
+ 0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } },
+ /* 65 */
+ { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc,
+ 0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a },
+ { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18,
+ 0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } },
+ /* 66 */
+ { { 0xc6a2123f,0xb1a358f5,0xfe28df6d,0x927b2d95,0xf199d2f9,0x89702753,
+ 0x1a3f82dc,0x0a73754c,0x777affe1,0x063d029d,0xdae6d34d,0x5439817e },
+ { 0x6b8b83c4,0xf7979eef,0x9d945682,0x615cb214,0xc5e57eae,0x8f0e4fac,
+ 0x113047dd,0x042b89b8,0x93f36508,0x888356dc,0x5fd1f32f,0xbf008d18 } },
+ /* 67 */
+ { { 0x4e8068db,0x8012aa24,0xa5729a47,0xc72cc641,0x43f0691d,0x3c33df2c,
+ 0x1d92145f,0xfa057347,0xb97f7946,0xaefc0f2f,0x2f8121bf,0x813d75cb },
+ { 0x4383bba6,0x05613c72,0xa4224b3f,0xa924ce70,0x5f2179a6,0xe59cecbe,
+ 0x79f62b61,0x78e2e8aa,0x53ad8079,0x3ac2cc3b,0xd8f4fa96,0x55518d71 } },
+ /* 68 */
+ { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247,
+ 0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 },
+ { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d,
+ 0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } },
+ /* 69 */
+ { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12,
+ 0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e },
+ { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f,
+ 0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } },
+ /* 70 */
+ { { 0xb0ab9645,0xb5e405d3,0xd5f1f711,0xaeec7f98,0x585c2a6e,0x8ad42311,
+ 0x512c6944,0x045acb9e,0xa90db1c6,0xae106c4e,0x898e6563,0xb89f33d5 },
+ { 0x7fed2ce4,0x43b07cd9,0xdd815b20,0xf9934e17,0x0a81a349,0x6778d4d5,
+ 0x52918061,0x9e616ade,0xd7e67112,0xfa06db06,0x88488091,0x1da23cf1 } },
+ /* 71 */
+ { { 0x42f2c4b5,0x821c46b3,0x66059e47,0x931513ef,0x66f50cd1,0x7030ae43,
+ 0x43e7b127,0x43b536c9,0x5fca5360,0x006258cf,0x6b557abf,0xe4e3ee79 },
+ { 0x24c8b22f,0xbb6b3900,0xfcbf1054,0x2eb5e2c1,0x567492af,0x937b18c9,
+ 0xacf53957,0xf09432e4,0x1dbf3a56,0x585f5a9d,0xbe0887cf,0xf86751fd } },
+ /* 72 */
+ { { 0x9d10e0b2,0x157399cb,0x60dc51b7,0x1c0d5956,0x1f583090,0x1d496b8a,
+ 0x88590484,0x6658bc26,0x03213f28,0x88c08ab7,0x7ae58de4,0x8d2e0f73 },
+ { 0x486cfee6,0x9b79bc95,0xe9e5bc57,0x036a26c7,0xcd8ae97a,0x1ad03601,
+ 0xff3a0494,0x06907f87,0x2c7eb584,0x078f4bbf,0x7e8d0a5a,0xe3731bf5 } },
+ /* 73 */
+ { { 0xe1cd0abe,0x72f2282b,0x87efefa2,0xd4f9015e,0x6c3834bd,0x9d189806,
+ 0xb8a29ced,0x9c8cdcc1,0xfee82ebc,0x0601b9f4,0x7206a756,0x371052bc },
+ { 0x46f32562,0x76fa1092,0x17351bb4,0xdaad534c,0xb3636bb5,0xc3d64c37,
+ 0x45d54e00,0x038a8c51,0x32c09e7c,0x301e6180,0x95735151,0x9764eae7 } },
+ /* 74 */
+ { { 0xcbd5256a,0x8791b19f,0x6ca13a3b,0x4007e0f2,0x4cf06904,0x03b79460,
+ 0xb6c17589,0xb18a9c22,0x81d45908,0xa1cb7d7d,0x21bb68f1,0x6e13fa9d },
+ { 0xa71e6e16,0x47183c62,0xe18749ed,0x5cf0ef8e,0x2e5ed409,0x2c9c7f9b,
+ 0xe6e117e1,0x042eeacc,0x13fb5a7f,0xb86d4816,0xc9e5feb1,0xea1cf0ed } },
+ /* 75 */
+ { { 0xcea4cc9b,0x6e6573c9,0xafcec8f3,0x5417961d,0xa438b6f6,0x804bf02a,
+ 0xdcd4ea88,0xb894b03c,0x3799571f,0xd0f807e9,0x862156e8,0x3466a7f5 },
+ { 0x56515664,0x51e59acd,0xa3c5eb0b,0x55b0f93c,0x6a4279db,0x84a06b02,
+ 0xc5fae08e,0x5c850579,0xa663a1a2,0xcf07b8db,0xf46ffc8d,0x49a36bbc } },
+ /* 76 */
+ { { 0x46d93106,0xe47f5acc,0xaa897c9c,0x65b7ade0,0x12d7e4be,0x37cf4c94,
+ 0xd4b2caa9,0xa2ae9b80,0xe60357a3,0x5e7ce09c,0xc8ecd5f9,0x29f77667 },
+ { 0xa8a0b1c5,0xdf6868f5,0x62978ad8,0x240858cf,0xdc0002a1,0x0f7ac101,
+ 0xffe9aa05,0x1d28a9d7,0x5b962c97,0x744984d6,0x3d28c8b2,0xa8a7c00b } },
+ /* 77 */
+ { { 0xae11a338,0x7c58a852,0xd1af96e7,0xa78613f1,0x5355cc73,0x7e9767d2,
+ 0x792a2de6,0x6ba37009,0x124386b2,0x7d60f618,0x11157674,0xab09b531 },
+ { 0x98eb9dd0,0x95a04841,0x15070328,0xe6c17acc,0x489c6e49,0xafc6da45,
+ 0xbb211530,0xab45a60a,0x7d7ea933,0xc58d6592,0x095642c6,0xa3ef3c65 } },
+ /* 78 */
+ { { 0xdf010879,0x89d420e9,0x39576179,0x9d25255d,0xe39513b6,0x9cdefd50,
+ 0xd5d1c313,0xe4efe45b,0x3f7af771,0xc0149de7,0x340ab06b,0x55a6b4f4 },
+ { 0xebeaf771,0xf1325251,0x878d4288,0x2ab44128,0x18e05afe,0xfcd5832e,
+ 0xcc1fb62b,0xef52a348,0xc1c4792a,0x2bd08274,0x877c6dc7,0x345c5846 } },
+ /* 79 */
+ { { 0xbea65e90,0xde15ceb0,0x2416d99c,0x0987f72b,0xfd863dec,0x44db578d,
+ 0xac6a3578,0xf617b74b,0xdb48e999,0x9e62bd7a,0xeab1a1be,0x877cae61 },
+ { 0x3a358610,0x23adddaa,0x325e2b07,0x2fc4d6d1,0x1585754e,0x897198f5,
+ 0xb392b584,0xf741852c,0xb55f7de1,0x9927804c,0x1aa8efae,0xe9e6c4ed } },
+ /* 80 */
+ { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe,
+ 0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 },
+ { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6,
+ 0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } },
+ /* 81 */
+ { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6,
+ 0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 },
+ { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf,
+ 0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } },
+ /* 82 */
+ { { 0xf8e60f5b,0x511188b4,0x48aa2ada,0x7fe67015,0x381abca2,0xdb333cb8,
+ 0xdaf3fc97,0xb15e6d9d,0x36aabc03,0x4b24f6eb,0x72a748b4,0xc59789df },
+ { 0x29cf5279,0x26fcb8a5,0x01ad9a6c,0x7a3c6bfc,0x4b8bac9b,0x866cf88d,
+ 0x9c80d041,0xf4c89989,0x70add148,0xf0a04241,0x45d81a41,0x5a02f479 } },
+ /* 83 */
+ { { 0xc1c90202,0xfa5c877c,0xf8ac7570,0xd099d440,0xd17881f7,0x428a5b1b,
+ 0x5b2501d7,0x61e267db,0xf2e4465b,0xf889bf04,0x76aa4cb8,0x4da3ae08 },
+ { 0xe3e66861,0x3ef0fe26,0x3318b86d,0x5e772953,0x747396df,0xc3c35fbc,
+ 0x439ffd37,0x5115a29c,0xb2d70374,0xbfc4bd97,0x56246b9d,0x088630ea } },
+ /* 84 */
+ { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53,
+ 0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 },
+ { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370,
+ 0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } },
+ /* 85 */
+ { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f,
+ 0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc },
+ { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2,
+ 0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } },
+ /* 86 */
+ { { 0xfecf5b9b,0x34e0f9d1,0xf206966a,0xa115b12b,0x1eaa0534,0x5591cf3b,
+ 0xfb1558f9,0x5f0293cb,0x1bc703a5,0x1c8507a4,0x862c1f81,0x92e6b81c },
+ { 0xcdaf24e3,0xcc9ebc66,0x72fcfc70,0x68917ecd,0x8157ba48,0x6dc9a930,
+ 0xb06ab2b2,0x5d425c08,0x36e929c4,0x362f8ce7,0x62e89324,0x09f6f57c } },
+ /* 87 */
+ { { 0xd29375fb,0x1c7d6b78,0xe35d1157,0xfabd851e,0x4243ea47,0xf6f62dcd,
+ 0x8fe30b0f,0x1dd92460,0xffc6e709,0x08166dfa,0x0881e6a7,0xc6c4c693 },
+ { 0xd6a53fb0,0x20368f87,0x9eb4d1f9,0x38718e9f,0xafd7e790,0x03f08acd,
+ 0x72fe2a1c,0x0835eb44,0x88076e5d,0x7e050903,0xa638e731,0x538f765e } },
+ /* 88 */
+ { { 0xc2663b4b,0x0e0249d9,0x47cd38dd,0xe700ab5b,0x2c46559f,0xb192559d,
+ 0x4bcde66d,0x8f9f74a8,0x3e2aced5,0xad161523,0x3dd03a5b,0xc155c047 },
+ { 0x3be454eb,0x346a8799,0x83b7dccd,0x66ee94db,0xab9d2abe,0x1f6d8378,
+ 0x7733f355,0x4a396dd2,0xf53553c2,0x419bd40a,0x731dd943,0xd0ead98d } },
+ /* 89 */
+ { { 0xec142408,0x908e0b0e,0x4114b310,0x98943cb9,0x1742b1d7,0x03dbf7d8,
+ 0x693412f4,0xd270df6b,0x8f69e20c,0xc5065494,0x697e43a1,0xa76a90c3 },
+ { 0x4624825a,0xe0fa3384,0x8acc34c2,0x82e48c0b,0xe9a14f2b,0x7b24bd14,
+ 0x4db30803,0x4f5dd5e2,0x932da0a3,0x0c77a9e7,0x74c653dc,0x20db90f2 } },
+ /* 90 */
+ { { 0x0e6c5fd9,0x261179b7,0x6c982eea,0xf8bec123,0xd4957b7e,0x47683338,
+ 0x0a72f66a,0xcc47e664,0x1bad9350,0xbd54bf6a,0xf454e95a,0xdfbf4c6a },
+ { 0x6907f4fa,0x3f7a7afa,0x865ca735,0x7311fae0,0x2a496ada,0x24737ab8,
+ 0x15feb79b,0x13e425f1,0xa1b93c21,0xe9e97c50,0x4ddd3eb5,0xb26b6eac } },
+ /* 91 */
+ { { 0x2a2e5f2b,0x81cab9f5,0xbf385ac4,0xf93caf29,0xc909963a,0xf4bf35c3,
+ 0x74c9143c,0x081e7300,0xc281b4c5,0x3ea57fa8,0x9b340741,0xe497905c },
+ { 0x55ab3cfb,0xf556dd8a,0x518db6ad,0xd444b96b,0x5ef4b955,0x34f5425a,
+ 0xecd26aa3,0xdda7a3ac,0xda655e97,0xb57da11b,0xc2024c70,0x02da3eff } },
+ /* 92 */
+ { { 0x6481d0d9,0xe24b0036,0x818fdfe2,0x3740dbe5,0x190fda00,0xc1fc1f45,
+ 0x3cf27fde,0x329c9280,0x6934f43e,0x7435cb53,0x7884e8fe,0x2b505a5d },
+ { 0x711adcc9,0x6cfcc6a6,0x531e21e1,0xf034325c,0x9b2a8a99,0xa2f4a967,
+ 0x3c21bdff,0x9d5f3842,0x31b57d66,0xb25c7811,0x0b8093b9,0xdb5344d8 } },
+ /* 93 */
+ { { 0xae50a2f5,0x0d72e667,0xe4a861d1,0x9b7f8d8a,0x330df1cb,0xa129f70f,
+ 0xe04fefc3,0xe90aa5d7,0xe72c3ae1,0xff561ecb,0xcdb955fa,0x0d8fb428 },
+ { 0xd7663784,0xd2235f73,0x7e2c456a,0xc05baec6,0x2adbfccc,0xe5c292e4,
+ 0xefb110d5,0x4fd17988,0xd19d49f3,0x27e57734,0x84f679fe,0x188ac4ce } },
+ /* 94 */
+ { { 0xa796c53e,0x7ee344cf,0x0868009b,0xbbf6074d,0x474a1295,0x1f1594f7,
+ 0xac11632d,0x66776edc,0x04e2fa5a,0x1862278b,0xc854a89a,0x52665cf2 },
+ { 0x8104ab58,0x7e376464,0x7204fd6d,0x16775913,0x44ea1199,0x86ca06a5,
+ 0x1c9240dd,0xaa3f765b,0x24746149,0x5f8501a9,0xdcd251d7,0x7b982e30 } },
+ /* 95 */
+ { { 0xc15f3060,0xe44e9efc,0xa87ebbe6,0x5ad62f2e,0xc79500d4,0x36499d41,
+ 0x336fa9d1,0xa66d6dc0,0x5afd3b1f,0xf8afc495,0xe5c9822b,0x1d8ccb24 },
+ { 0x79d7584b,0x4031422b,0xea3f20dd,0xc54a0580,0x958468c5,0x3f837c8f,
+ 0xfbea7735,0x3d82f110,0x7dffe2fc,0x679a8778,0x20704803,0x48eba63b } },
+ /* 96 */
+ { { 0xdf46e2f6,0x89b10d41,0x19514367,0x13ab57f8,0x1d469c87,0x067372b9,
+ 0x4f6c5798,0x0c195afa,0x272c9acf,0xea43a12a,0x678abdac,0x9dadd8cb },
+ { 0xe182579a,0xcce56c6b,0x2d26c2d8,0x86febadb,0x2a44745c,0x1c668ee1,
+ 0x98dc047a,0x580acd86,0x51b9ec2d,0x5a2b79cc,0x4054f6a0,0x007da608 } },
+ /* 97 */
+ { { 0x17b00dd0,0x9e3ca352,0x0e81a7a6,0x046779cb,0xd482d871,0xb999fef3,
+ 0xd9233fbc,0xe6f38134,0xf48cd0e0,0x112c3001,0x3c6c66ae,0x934e7576 },
+ { 0xd73234dc,0xb44d4fc3,0x864eafc1,0xfcae2062,0x26bef21a,0x843afe25,
+ 0xf3b75fdf,0x61355107,0x794c2e6b,0x8367a5aa,0x8548a372,0x3d2629b1 } },
+ /* 98 */
+ { { 0x437cfaf8,0x6230618f,0x2032c299,0x5b8742cb,0x2293643a,0x949f7247,
+ 0x09464f79,0xb8040f1a,0x4f254143,0x049462d2,0x366c7e76,0xabd6b522 },
+ { 0xd5338f55,0x119b392b,0x01495a0c,0x1a80a9ce,0xf8d7537e,0xf3118ca7,
+ 0x6bf4b762,0xb715adc2,0xa8482b6c,0x24506165,0x96a7c84d,0xd958d7c6 } },
+ /* 99 */
+ { { 0xbdc21f31,0x9ad8aa87,0x8063e58c,0xadb3cab4,0xb07dd7b8,0xefd86283,
+ 0x1be7c6b4,0xc7b9b762,0x015582de,0x2ef58741,0x299addf3,0xc970c52e },
+ { 0x22f24d66,0x78f02e2a,0x74cc100a,0xefec1d10,0x09316e1a,0xaf2a6a39,
+ 0x5849dd49,0xce7c2205,0x96bffc4c,0x9c1fe75c,0x7ba06ec0,0xcad98fd2 } },
+ /* 100 */
+ { { 0xb648b73e,0xed76e2d0,0x1cfd285e,0xa9f92ce5,0x2ed13de1,0xa8c86c06,
+ 0xa5191a93,0x1d3a574e,0x1ad1b8bf,0x385cdf8b,0x47d2cfe3,0xbbecc28a },
+ { 0x69cec548,0x98d326c0,0xf240a0b2,0x4f5bc1dd,0x29057236,0x241a7062,
+ 0xc68294a4,0x0fc6e9c5,0xa319f17a,0x4d04838b,0x9ffc1c6f,0x8b612cf1 } },
+ /* 101 */
+ { { 0x4c3830eb,0x9bb0b501,0x8ee0d0c5,0x3d08f83c,0x79ba9389,0xa4a62642,
+ 0x9cbc2914,0x5d5d4044,0x074c46f0,0xae9eb83e,0x74ead7d6,0x63bb758f },
+ { 0xc6bb29e0,0x1c40d2ea,0x4b02f41e,0x95aa2d87,0x53cb199a,0x92989175,
+ 0x51584f6d,0xdd91bafe,0x31a1aaec,0x3715efb9,0x46780f9e,0xc1b6ae5b } },
+ /* 102 */
+ { { 0x42772f41,0xcded3e4b,0x3bcb79d1,0x3a700d5d,0x80feee60,0x4430d50e,
+ 0xf5e5d4bb,0x444ef1fc,0xe6e358ff,0xc660194f,0x6a91b43c,0xe68a2f32 },
+ { 0x977fe4d2,0x5842775c,0x7e2a41eb,0x78fdef5c,0xff8df00e,0x5f3bec02,
+ 0x5852525d,0xf4b840cd,0x4e6988bd,0x0870483a,0xcc64b837,0x39499e39 } },
+ /* 103 */
+ { { 0xb08df5fe,0xfc05de80,0x63ba0362,0x0c12957c,0xd5cf1428,0xea379414,
+ 0x54ef6216,0xc559132a,0xb9e65cf8,0x33d5f12f,0x1695d663,0x09c60278 },
+ { 0x61f7a2fb,0x3ac1ced4,0xd4f5eeb8,0xdd838444,0x8318fcad,0x82a38c6c,
+ 0xe9f1a864,0x315be2e5,0x442daf47,0x317b5771,0x95aa5f9e,0x81b5904a } },
+ /* 104 */
+ { { 0x8b21d232,0x6b6b1c50,0x8c2cba75,0x87f3dbc0,0xae9f0faf,0xa7e74b46,
+ 0xbb7b8079,0x036a0985,0x8d974a25,0x4f185b90,0xd9af5ec9,0x5aa7cef0 },
+ { 0x57dcfffc,0xe0566a70,0xb8453225,0x6ea311da,0x23368aa9,0x72ea1a8d,
+ 0x48cd552d,0xed9b2083,0xc80ea435,0xb987967c,0x6c104173,0xad735c75 } },
+ /* 105 */
+ { { 0xcee76ef4,0xaea85ab3,0xaf1d2b93,0x44997444,0xeacb923f,0x0851929b,
+ 0x51e3bc0c,0xb080b590,0x59be68a2,0xc4ee1d86,0x64b26cda,0xf00de219 },
+ { 0xf2e90d4d,0x8d7fb5c0,0x77d9ec64,0x00e219a7,0x5d1c491c,0xc4e6febd,
+ 0x1a8f4585,0x080e3754,0x48d2af9c,0x4a9b86c8,0xb6679851,0x2ed70db6 } },
+ /* 106 */
+ { { 0x586f25cb,0xaee44116,0xa0fcf70f,0xf7b6861f,0x18a350e8,0x55d2cd20,
+ 0x92dc286f,0x861bf3e5,0x6226aba7,0x9ab18ffa,0xa9857b03,0xd15827be },
+ { 0x92e6acef,0x26c1f547,0xac1fbac3,0x422c63c8,0xfcbfd71d,0xa2d8760d,
+ 0xb2511224,0x35f6a539,0x048d1a21,0xbaa88fa1,0xebf999db,0x49f1abe9 } },
+ /* 107 */
+ { { 0xf7492b73,0x16f9f4f4,0xcb392b1a,0xcf28ec1e,0x69ca6ffc,0x45b130d4,
+ 0xb72efa58,0x28ba8d40,0x5ca066f5,0xace987c7,0x4ad022eb,0x3e399246 },
+ { 0x752555bb,0x63a2d84e,0x9c2ae394,0xaaa93b4a,0xc89539ca,0xcd80424e,
+ 0xaa119a99,0x6d6b5a6d,0x379f2629,0xbd50334c,0xef3cc7d3,0x899e925e } },
+ /* 108 */
+ { { 0xbf825dc4,0xb7ff3651,0x40b9c462,0x0f741cc4,0x5cc4fb5b,0x771ff5a9,
+ 0x47fd56fe,0xcb9e9c9b,0x5626c0d3,0xbdf053db,0xf7e14098,0xa97ce675 },
+ { 0x6c934f5e,0x68afe5a3,0xccefc46f,0x6cd5e148,0xd7a88586,0xc7758570,
+ 0xdd558d40,0x49978f5e,0x64ae00c1,0xa1d5088a,0xf1d65bb2,0x58f2a720 } },
+ /* 109 */
+ { { 0x3e4daedb,0x66fdda4a,0x65d1b052,0x38318c12,0x4c4bbf5c,0x28d910a2,
+ 0x78a9cd14,0x762fe5c4,0xd2cc0aee,0x08e5ebaa,0xca0c654c,0xd2cdf257 },
+ { 0x08b717d2,0x48f7c58b,0x386cd07a,0x3807184a,0xae7d0112,0x3240f626,
+ 0xc43917b0,0x03e9361b,0x20aea018,0xf261a876,0x7e1e6372,0x53f556a4 } },
+ /* 110 */
+ { { 0x2f512a90,0xc84cee56,0x1b0ea9f1,0x24b3c004,0xe26cc1ea,0x0ee15d2d,
+ 0xf0c9ef7d,0xd848762c,0xd5341435,0x1026e9c5,0xfdb16b31,0x8f5b73dc },
+ { 0xd2c75d95,0x1f69bef2,0xbe064dda,0x8d33d581,0x57ed35e6,0x8c024c12,
+ 0xc309c281,0xf8d435f9,0xd6960193,0xfd295061,0xe9e49541,0x66618d78 } },
+ /* 111 */
+ { { 0x8ce382de,0x571cfd45,0xde900dde,0x175806ee,0x34aba3b5,0x61849965,
+ 0xde7aec95,0xe899778a,0xff4aa97f,0xe8f00f6e,0x010b0c6d,0xae971cb5 },
+ { 0x3af788f1,0x1827eebc,0xe413fe2d,0xd46229ff,0x4741c9b4,0x8a15455b,
+ 0xf8e424eb,0x5f02e690,0xdae87712,0x40a1202e,0x64944f6d,0x49b3bda2 } },
+ /* 112 */
+ { { 0x035b2d69,0xd63c6067,0x6bed91b0,0xb507150d,0x7afb39b2,0x1f35f82f,
+ 0x16012b66,0xb9bd9c01,0xed0a5f50,0x00d97960,0x2716f7c9,0xed705451 },
+ { 0x127abdb4,0x1576eff4,0xf01e701c,0x6850d698,0x3fc87e2f,0x9fa7d749,
+ 0xb0ce3e48,0x0b6bcc6f,0xf7d8c1c0,0xf4fbe1f5,0x02719cc6,0xcf75230e } },
+ /* 113 */
+ { { 0x722d94ed,0x6761d6c2,0x3718820e,0xd1ec3f21,0x25d0e7c6,0x65a40b70,
+ 0xbaf3cf31,0xd67f830e,0xb93ea430,0x633b3807,0x0bc96c69,0x17faa0ea },
+ { 0xdf866b98,0xe6bf3482,0xa9db52d4,0x205c1ee9,0xff9ab869,0x51ef9bbd,
+ 0x75eeb985,0x3863dad1,0xd3cf442a,0xef216c3b,0xf9c8e321,0x3fb228e3 } },
+ /* 114 */
+ { { 0x0760ac07,0x94f9b70c,0x9d79bf4d,0xf3c9ccae,0xc5ffc83d,0x73cea084,
+ 0xdc49c38e,0xef50f943,0xbc9e7330,0xf467a2ae,0x44ea7fba,0x5ee534b6 },
+ { 0x03609e7f,0x20cb6272,0x62fdc9f0,0x09844355,0x0f1457f7,0xaf5c8e58,
+ 0xb4b25941,0xd1f50a6c,0x2ec82395,0x77cb247c,0xda3dca33,0xa5f3e1e5 } },
+ /* 115 */
+ { { 0x7d85fa94,0x023489d6,0x2db9ce47,0x0ba40537,0xaed7aad1,0x0fdf7a1f,
+ 0x9a4ccb40,0xa57b0d73,0x5b18967c,0x48fcec99,0xb7274d24,0xf30b5b6e },
+ { 0xc81c5338,0x7ccb4773,0xa3ed6bd0,0xb85639e6,0x1d56eada,0x7d9df95f,
+ 0x0a1607ad,0xe256d57f,0x957574d6,0x6da7ffdc,0x01c7a8c4,0x65f84046 } },
+ /* 116 */
+ { { 0xcba1e7f1,0x8d45d0cb,0x02b55f64,0xef0a08c0,0x17e19892,0x771ca31b,
+ 0x4885907e,0xe1843ecb,0x364ce16a,0x67797ebc,0x8df4b338,0x816d2b2d },
+ { 0x39aa8671,0xe870b0e5,0xc102b5f5,0x9f0db3e4,0x1720c697,0x34296659,
+ 0x613c0d2a,0x0ad4c89e,0x418ddd61,0x1af900b2,0xd336e20e,0xe087ca72 } },
+ /* 117 */
+ { { 0xaba10079,0x222831ff,0x6d64fff2,0x0dc5f87b,0x3e8cb330,0x44547907,
+ 0x702a33fb,0xe815aaa2,0x5fba3215,0x338d6b2e,0x79f549c8,0x0f7535cb },
+ { 0x2ee95923,0x471ecd97,0xc6d1c09f,0x1e868b37,0xc666ef4e,0x2bc7b8ec,
+ 0x808a4bfc,0xf5416589,0x3fbc4d2e,0xf23e9ee2,0x2d75125b,0x4357236c } },
+ /* 118 */
+ { { 0xba9cdb1b,0xfe176d95,0x2f82791e,0x45a1ca01,0x4de4cca2,0x97654af2,
+ 0x5cc4bcb9,0xbdbf9d0e,0xad97ac0a,0xf6a7df50,0x61359fd6,0xc52112b0 },
+ { 0x4f05eae3,0x696d9ce3,0xe943ac2b,0x903adc02,0x0848be17,0xa9075347,
+ 0x2a3973e5,0x1e20f170,0x6feb67e9,0xe1aacc1c,0xe16bc6b9,0x2ca0ac32 } },
+ /* 119 */
+ { { 0xef871eb5,0xffea12e4,0xa8bf0a7a,0x94c2f25d,0x78134eaa,0x4d1e4c2a,
+ 0x0360fb10,0x11ed16fb,0x85fc11be,0x4029b6db,0xf4d390fa,0x5e9f7ab7 },
+ { 0x30646612,0x5076d72f,0xdda1d0d8,0xa0afed1d,0x85a1d103,0x29022257,
+ 0x4e276bcd,0xcb499e17,0x51246c3d,0x16d1da71,0x589a0443,0xc72d56d3 } },
+ /* 120 */
+ { { 0xdae5bb45,0xdf5ffc74,0x261bd6dc,0x99068c4a,0xaa98ec7b,0xdc0afa7a,
+ 0xf121e96d,0xedd2ee00,0x1414045c,0x163cc7be,0x335af50e,0xb0b1bbce },
+ { 0x01a06293,0xd440d785,0x6552e644,0xcdebab7c,0x8c757e46,0x48cb8dbc,
+ 0x3cabe3cb,0x81f9cf78,0xb123f59a,0xddd02611,0xeeb3784d,0x3dc7b88e } },
+ /* 121 */
+ { { 0xc4741456,0xe1b8d398,0x6032a121,0xa9dfa902,0x1263245b,0x1cbfc86d,
+ 0x5244718c,0xf411c762,0x05b0fc54,0x96521d54,0xdbaa4985,0x1afab46e },
+ { 0x8674b4ad,0xa75902ba,0x5ad87d12,0x486b43ad,0x36e0d099,0x72b1c736,
+ 0xbb6cd6d6,0x39890e07,0x59bace4e,0x8128999c,0x7b535e33,0xd8da430b } },
+ /* 122 */
+ { { 0xc6b75791,0x39f65642,0x21806bfb,0x050947a6,0x1362ef84,0x0ca3e370,
+ 0x8c3d2391,0x9bc60aed,0x732e1ddc,0x9b488671,0xa98ee077,0x12d10d9e },
+ { 0x3651b7dc,0xb6f2822d,0x80abd138,0x6345a5ba,0x472d3c84,0x62033262,
+ 0xacc57527,0xd54a1d40,0x424447cb,0x6ea46b3a,0x2fb1a496,0x5bc41057 } },
+ /* 123 */
+ { { 0xa751cd0e,0xe70c57a3,0xeba3c7d6,0x190d8419,0x9d47d55a,0xb1c3bee7,
+ 0xf912c6d8,0xda941266,0x407a6ad6,0x12e9aacc,0x6e838911,0xd6ce5f11 },
+ { 0x70e1f2ce,0x063ca97b,0x8213d434,0xa3e47c72,0x84df810a,0xa016e241,
+ 0xdfd881a4,0x688ad7b0,0xa89bf0ad,0xa37d99fc,0xa23c2d23,0xd8e3f339 } },
+ /* 124 */
+ { { 0x750bed6f,0xbdf53163,0x83e68b0a,0x808abc32,0x5bb08a33,0x85a36627,
+ 0x6b0e4abe,0xf72a3a0f,0xfaf0c6ad,0xf7716d19,0x5379b25f,0x22dcc020 },
+ { 0xf9a56e11,0x7400bf8d,0x56a47f21,0x6cb8bad7,0x7a6eb644,0x7c97176f,
+ 0xd1f5b646,0xe8fd84f7,0x44ddb054,0x98320a94,0x1dde86f5,0x07071ba3 } },
+ /* 125 */
+ { { 0x98f8fcb9,0x6fdfa0e5,0x94d0d70c,0x89cec8e0,0x106d20a8,0xa0899397,
+ 0xba8acc9c,0x915bfb9a,0x5507e01c,0x1370c94b,0x8a821ffb,0x83246a60 },
+ { 0xbe3c378f,0xa8273a9f,0x35a25be9,0x7e544789,0x4dd929d7,0x6cfa4972,
+ 0x365bd878,0x987fed9d,0x5c29a7ae,0x4982ac94,0x5ddd7ec5,0x4589a5d7 } },
+ /* 126 */
+ { { 0xa95540a9,0x9fabb174,0x0162c5b0,0x7cfb886f,0xea3dee18,0x17be766b,
+ 0xe88e624c,0xff7da41f,0x8b919c38,0xad0b71eb,0xf31ff9a9,0x86a522e0 },
+ { 0x868bc259,0xbc8e6f72,0x3ccef9e4,0x6130c638,0x9a466555,0x09f1f454,
+ 0x19b2bfb4,0x8e6c0f09,0x0ca7bb22,0x945c46c9,0x4dafb67b,0xacd87168 } },
+ /* 127 */
+ { { 0x10c53841,0x090c72ca,0x55a4fced,0xc20ae01b,0xe10234ad,0x03f7ebd5,
+ 0x85892064,0xb3f42a6a,0xb4a14722,0xbdbc30c0,0x8ca124cc,0x971bc437 },
+ { 0x517ff2ff,0x6f79f46d,0xecba947b,0x6a9c96e2,0x62925122,0x5e79f2f4,
+ 0x6a4e91f1,0x30a96bb1,0x2d4c72da,0x1147c923,0x5811e4df,0x65bc311f } },
+ /* 128 */
+ { { 0x139b3239,0x87c7dd7d,0x4d833bae,0x8b57824e,0x9fff0015,0xbcbc4878,
+ 0x909eaf1a,0x8ffcef8b,0xf1443a78,0x9905f4ee,0xe15cbfed,0x020dd4a2 },
+ { 0xa306d695,0xca2969ec,0xb93caf60,0xdf940cad,0x87ea6e39,0x67f7fab7,
+ 0xf98c4fe5,0x0d0ee10f,0xc19cb91e,0xc646879a,0x7d1d7ab4,0x4b4ea50c } },
+ /* 129 */
+ { { 0x7a0db57e,0x19e40945,0x9a8c9702,0xe6017cad,0x1be5cff9,0xdbf739e5,
+ 0xa7a938a2,0x3646b3cd,0x68350dfc,0x04511085,0x56e098b5,0xad3bd6f3 },
+ { 0xee2e3e3e,0x935ebabf,0x473926cb,0xfbd01702,0x9e9fb5aa,0x7c735b02,
+ 0x2e3feff0,0xc52a1b85,0x046b405a,0x9199abd3,0x39039971,0xe306fcec } },
+ /* 130 */
+ { { 0x23e4712c,0xd6d9aec8,0xc3c198ee,0x7ca8376c,0x31bebd8a,0xe6d83187,
+ 0xd88bfef3,0xed57aff3,0xcf44edc7,0x72a645ee,0x5cbb1517,0xd4e63d0b },
+ { 0xceee0ecf,0x98ce7a1c,0x5383ee8e,0x8f012633,0xa6b455e8,0x3b879078,
+ 0xc7658c06,0xcbcd3d96,0x0783336a,0x721d6fe7,0x5a677136,0xf21a7263 } },
+ /* 131 */
+ { { 0x9586ba11,0x19d8b3cd,0x8a5c0480,0xd9e0aeb2,0x2230ef5c,0xe4261dbf,
+ 0x02e6bf09,0x095a9dee,0x80dc7784,0x8963723c,0x145157b1,0x5c97dbaf },
+ { 0x4bc4503e,0x97e74434,0x85a6b370,0x0fb1cb31,0xcd205d4b,0x3e8df2be,
+ 0xf8f765da,0x497dd1bc,0x6c988a1a,0x92ef95c7,0x64dc4cfa,0x3f924baa } },
+ /* 132 */
+ { { 0x7268b448,0x6bf1b8dd,0xefd79b94,0xd4c28ba1,0xe4e3551f,0x2fa1f8c8,
+ 0x5c9187a9,0x769e3ad4,0x40326c0d,0x28843b4d,0x50d5d669,0xfefc8094 },
+ { 0x90339366,0x30c85bfd,0x5ccf6c3a,0x4eeb56f1,0x28ccd1dc,0x0e72b149,
+ 0xf2ce978e,0x73ee85b5,0x3165bb23,0xcdeb2bf3,0x4e410abf,0x8106c923 } },
+ /* 133 */
+ { { 0x7d02f4ee,0xc8df0161,0x18e21225,0x8a781547,0x6acf9e40,0x4ea895eb,
+ 0x6e5a633d,0x8b000cb5,0x7e981ffb,0xf31d86d5,0x4475bc32,0xf5c8029c },
+ { 0x1b568973,0x764561ce,0xa62996ec,0x2f809b81,0xda085408,0x9e513d64,
+ 0xe61ce309,0xc27d815d,0x272999e0,0x0da6ff99,0xfead73f7,0xbd284779 } },
+ /* 134 */
+ { { 0x9b1cdf2b,0x6033c2f9,0xbc5fa151,0x2a99cf06,0x12177b3b,0x7d27d259,
+ 0xc4485483,0xb1f15273,0x102e2297,0x5fd57d81,0xc7f6acb7,0x3d43e017 },
+ { 0x3a70eb28,0x41a8bb0b,0x3e80b06b,0x67de2d8e,0x70c28de5,0x09245a41,
+ 0xa7b26023,0xad7dbcb1,0x2cbc6c1e,0x70b08a35,0x9b33041f,0xb504fb66 } },
+ /* 135 */
+ { { 0xf97a27c2,0xa8e85ab5,0xc10a011b,0x6ac5ec8b,0xffbcf161,0x55745533,
+ 0x65790a60,0x01780e85,0x99ee75b0,0xe451bf85,0x39c29881,0x8907a63b },
+ { 0x260189ed,0x76d46738,0x47bd35cb,0x284a4436,0x20cab61e,0xd74e8c40,
+ 0x416cf20a,0x6264bf8c,0x5fd820ce,0xfa5a6c95,0xf24bb5fc,0xfa7154d0 } },
+ /* 136 */
+ { { 0x9b3f5034,0x18482cec,0xcd9e68fd,0x962d445a,0x95746f23,0x266fb1d6,
+ 0x58c94a4b,0xc66ade5a,0xed68a5b6,0xdbbda826,0x7ab0d6ae,0x05664a4d },
+ { 0x025e32fc,0xbcd4fe51,0xa96df252,0x61a5aebf,0x31592a31,0xd88a07e2,
+ 0x98905517,0x5d9d94de,0x5fd440e7,0x96bb4010,0xe807db4c,0x1b0c47a2 } },
+ /* 137 */
+ { { 0x08223878,0x5c2a6ac8,0xe65a5558,0xba08c269,0x9bbc27fd,0xd22b1b9b,
+ 0x72b9607d,0x919171bf,0xe588dc58,0x9ab455f9,0x23662d93,0x6d54916e },
+ { 0x3b1de0c1,0x8da8e938,0x804f278f,0xa84d186a,0xd3461695,0xbf4988cc,
+ 0xe10eb0cb,0xf5eae3be,0xbf2a66ed,0x1ff8b68f,0xc305b570,0xa68daf67 } },
+ /* 138 */
+ { { 0x44b2e045,0xc1004cff,0x4b1c05d4,0x91b5e136,0x88a48a07,0x53ae4090,
+ 0xea11bb1a,0x73fb2995,0x3d93a4ea,0x32048570,0x3bfc8a5f,0xcce45de8 },
+ { 0xc2b3106e,0xaff4a97e,0xb6848b4f,0x9069c630,0xed76241c,0xeda837a6,
+ 0x6cc3f6cf,0x8a0daf13,0x3da018a8,0x199d049d,0xd9093ba3,0xf867c6b1 } },
+ /* 139 */
+ { { 0x56527296,0xe4d42a56,0xce71178d,0xae26c73d,0x6c251664,0x70a0adac,
+ 0x5dc0ae1d,0x813483ae,0xdaab2daf,0x7574eacd,0xc2d55f4f,0xc56b52dc },
+ { 0x95f32923,0x872bc167,0x5bdd2a89,0x4be17581,0xa7699f00,0x9b57f1e7,
+ 0x3ac2de02,0x5fcd9c72,0x92377739,0x83af3ba1,0xfc50b97f,0xa64d4e2b } },
+ /* 140 */
+ { { 0x0e552b40,0x2172dae2,0xd34d52e8,0x62f49725,0x07958f98,0x7930ee40,
+ 0x751fdd74,0x56da2a90,0xf53e48c3,0xf1192834,0x8e53c343,0x34d2ac26 },
+ { 0x13111286,0x1073c218,0xda9d9827,0x201dac14,0xee95d378,0xec2c29db,
+ 0x1f3ee0b1,0x9316f119,0x544ce71c,0x7890c9f0,0x27612127,0xd77138af } },
+ /* 141 */
+ { { 0x3b4ad1cd,0x78045e6d,0x4aa49bc1,0xcd86b94e,0xfd677a16,0x57e51f1d,
+ 0xfa613697,0xd9290935,0x34f4d893,0x7a3f9593,0x5d5fcf9b,0x8c9c248b },
+ { 0x6f70d4e9,0x9f23a482,0x63190ae9,0x17273454,0x5b081a48,0x4bdd7c13,
+ 0x28d65271,0x1e2de389,0xe5841d1f,0x0bbaaa25,0x746772e5,0xc4c18a79 } },
+ /* 142 */
+ { { 0x593375ac,0x10ee2681,0x7dd5e113,0x4f3288be,0x240f3538,0x9a97b2fb,
+ 0x1de6b1e2,0xfa11089f,0x1351bc58,0x516da562,0x2dfa85b5,0x573b6119 },
+ { 0x6cba7df5,0x89e96683,0x8c28ab40,0xf299be15,0xad43fcbf,0xe91c9348,
+ 0x9a1cefb3,0xe9bbc7cc,0x738b2775,0xc8add876,0x775eaa01,0x6e3b1f2e } },
+ /* 143 */
+ { { 0xb677788b,0x0365a888,0x3fd6173c,0x634ae8c4,0x9e498dbe,0x30498761,
+ 0xc8f779ab,0x08c43e6d,0x4c09aca9,0x068ae384,0x2018d170,0x2380c70b },
+ { 0xa297c5ec,0xcf77fbc3,0xca457948,0xdacbc853,0x336bec7e,0x3690de04,
+ 0x14eec461,0x26bbac64,0x1f713abf,0xd1c23c7e,0xe6fd569e,0xf08bbfcd } },
+ /* 144 */
+ { { 0x84770ee3,0x5f8163f4,0x744a1706,0x0e0c7f94,0xe1b2d46d,0x9c8f05f7,
+ 0xd01fd99a,0x417eafe7,0x11440e5b,0x2ba15df5,0x91a6fbcf,0xdc5c552a },
+ { 0xa270f721,0x86271d74,0xa004485b,0x32c0a075,0x8defa075,0x9d1a87e3,
+ 0xbf0d20fe,0xb590a7ac,0x8feda1f5,0x430c41c2,0x58f6ec24,0x454d2879 } },
+ /* 145 */
+ { { 0x7c525435,0x52b7a635,0x37c4bdbc,0x3d9ef57f,0xdffcc475,0x2bb93e9e,
+ 0x7710f3be,0xf7b8ba98,0x21b727de,0x42ee86da,0x2e490d01,0x55ac3f19 },
+ { 0xc0c1c390,0x487e3a6e,0x446cde7b,0x036fb345,0x496ae951,0x089eb276,
+ 0x71ed1234,0xedfed4d9,0x900f0b46,0x661b0dd5,0x8582f0d3,0x11bd6f1b } },
+ /* 146 */
+ { { 0x076bc9d1,0x5cf9350f,0xcf3cd2c3,0x15d903be,0x25af031c,0x21cfc8c2,
+ 0x8b1cc657,0xe0ad3248,0x70014e87,0xdd9fb963,0x297f1658,0xf0f3a5a1 },
+ { 0xf1f703aa,0xbb908fba,0x2f6760ba,0x2f9cc420,0x66a38b51,0x00ceec66,
+ 0x05d645da,0x4deda330,0xf7de3394,0xb9cf5c72,0x1ad4c906,0xaeef6502 } },
+ /* 147 */
+ { { 0x7a19045d,0x0583c8b1,0xd052824c,0xae7c3102,0xff6cfa58,0x2a234979,
+ 0x62c733c0,0xfe9dffc9,0x9c0c4b09,0x3a7fa250,0x4fe21805,0x516437bb },
+ { 0xc2a23ddb,0x9454e3d5,0x289c104e,0x0726d887,0x4fd15243,0x8977d918,
+ 0x6d7790ba,0xc559e73f,0x465af85f,0x8fd3e87d,0x5feee46b,0xa2615c74 } },
+ /* 148 */
+ { { 0x4335167d,0xc8d607a8,0xe0f5c887,0x8b42d804,0x398d11f9,0x5f9f13df,
+ 0x20740c67,0x5aaa5087,0xa3d9234b,0x83da9a6a,0x2a54bad1,0xbd3a5c4e },
+ { 0x2db0f658,0xdd13914c,0x5a3f373a,0x29dcb66e,0x5245a72b,0xbfd62df5,
+ 0x91e40847,0x19d18023,0xb136b1ae,0xd9df74db,0x3f93bc5b,0x72a06b6b } },
+ /* 149 */
+ { { 0xad19d96f,0x6da19ec3,0xfb2a4099,0xb342daa4,0x662271ea,0x0e61633a,
+ 0xce8c054b,0x3bcece81,0x8bd62dc6,0x7cc8e061,0xee578d8b,0xae189e19 },
+ { 0xdced1eed,0x73e7a25d,0x7875d3ab,0xc1257f0a,0x1cfef026,0x2cb2d5a2,
+ 0xb1fdf61c,0xd98ef39b,0x24e83e6c,0xcd8e6f69,0xc7b7088b,0xd71e7076 } },
+ /* 150 */
+ { { 0x9d4245bf,0x33936830,0x2ac2953b,0x22d96217,0x56c3c3cd,0xb3bf5a82,
+ 0x0d0699e8,0x50c9be91,0x8f366459,0xec094463,0x513b7c35,0x6c056dba },
+ { 0x045ab0e3,0x687a6a83,0x445c9295,0x8d40b57f,0xa16f5954,0x0f345048,
+ 0x3d8f0a87,0x64b5c639,0x9f71c5e2,0x106353a2,0x874f0dd4,0xdd58b475 } },
+ /* 151 */
+ { { 0x62230c72,0x67ec084f,0x481385e3,0xf14f6cca,0x4cda7774,0xf58bb407,
+ 0xaa2dbb6b,0xe15011b1,0x0c035ab1,0xd488369d,0x8245f2fd,0xef83c24a },
+ { 0x9fdc2538,0xfb57328f,0x191fe46a,0x79808293,0x32ede548,0xe28f5c44,
+ 0xea1a022c,0x1b3cda99,0x3df2ec7f,0x39e639b7,0x760e9a18,0x77b6272b } },
+ /* 152 */
+ { { 0xa65d56d5,0x2b1d51bd,0x7ea696e0,0x3a9b71f9,0x9904f4c4,0x95250ecc,
+ 0xe75774b7,0x8bc4d6eb,0xeaeeb9aa,0x0e343f8a,0x930e04cb,0xc473c1d1 },
+ { 0x064cd8ae,0x282321b1,0x5562221c,0xf4b4371e,0xd1bf1221,0xc1cc81ec,
+ 0xe2c8082f,0xa52a07a9,0xba64a958,0x350d8e59,0x6fb32c9a,0x29e4f3de } },
+ /* 153 */
+ { { 0xba89aaa5,0x0aa9d56c,0xc4c6059e,0xf0208ac0,0xbd6ddca4,0x7400d9c6,
+ 0xf2c2f74a,0xb384e475,0xb1562dd3,0x4c1061fc,0x2e153b8d,0x3924e248 },
+ { 0x849808ab,0xf38b8d98,0xa491aa36,0x29bf3260,0x88220ede,0x85159ada,
+ 0xbe5bc422,0x8b47915b,0xd7300967,0xa934d72e,0x2e515d0d,0xc4f30398 } },
+ /* 154 */
+ { { 0x1b1de38b,0xe3e9ee42,0x42636760,0xa124e25a,0x90165b1a,0x90bf73c0,
+ 0x146434c5,0x21802a34,0x2e1fa109,0x54aa83f2,0xed9c51e9,0x1d4bd03c },
+ { 0x798751e6,0xc2d96a38,0x8c3507f5,0xed27235f,0xc8c24f88,0xb5fb80e2,
+ 0xd37f4f78,0xf873eefa,0xf224ba96,0x7229fd74,0x9edd7149,0x9dcd9199 } },
+ /* 155 */
+ { { 0x4e94f22a,0xee9f81a6,0xf71ec341,0xe5609892,0xa998284e,0x6c818ddd,
+ 0x3b54b098,0x9fd47295,0x0e8a7cc9,0x47a6ac03,0xb207a382,0xde684e5e },
+ { 0x2b6b956b,0x4bdd1ecd,0xf01b3583,0x09084414,0x55233b14,0xe2f80b32,
+ 0xef5ebc5e,0x5a0fec54,0xbf8b29a2,0x74cf25e6,0x7f29e014,0x1c757fa0 } },
+ /* 156 */
+ { { 0xeb0fdfe4,0x1bcb5c4a,0xf0899367,0xd7c649b3,0x05bc083b,0xaef68e3f,
+ 0xa78aa607,0x57a06e46,0x21223a44,0xa2136ecc,0x52f5a50b,0x89bd6484 },
+ { 0x4455f15a,0x724411b9,0x08a9c0fd,0x23dfa970,0x6db63bef,0x7b0da4d1,
+ 0xfb162443,0x6f8a7ec1,0xe98284fb,0xc1ac9cee,0x33566022,0x085a582b } },
+ /* 157 */
+ { { 0xec1f138a,0x15cb61f9,0x668f0c28,0x11c9a230,0xdf93f38f,0xac829729,
+ 0x4048848d,0xcef25698,0x2bba8fbf,0x3f686da0,0x111c619a,0xed5fea78 },
+ { 0xd6d1c833,0x9b4f73bc,0x86e7bf80,0x50951606,0x042b1d51,0xa2a73508,
+ 0x5fb89ec2,0x9ef6ea49,0x5ef8b892,0xf1008ce9,0x9ae8568b,0x78a7e684 } },
+ /* 158 */
+ { { 0x10470cd8,0x3fe83a7c,0xf86df000,0x92734682,0xda9409b5,0xb5dac06b,
+ 0x94939c5f,0x1e7a9660,0x5cc116dc,0xdec6c150,0x66bac8cc,0x1a52b408 },
+ { 0x6e864045,0x5303a365,0x9139efc1,0x45eae72a,0x6f31d54f,0x83bec646,
+ 0x6e958a6d,0x2fb4a86f,0x4ff44030,0x6760718e,0xe91ae0df,0x008117e3 } },
+ /* 159 */
+ { { 0x384310a2,0x5d5833ba,0x1fd6c9fc,0xbdfb4edc,0x849c4fb8,0xb9a4f102,
+ 0x581c1e1f,0xe5fb239a,0xd0a9746d,0xba44b2e7,0x3bd942b9,0x78f7b768 },
+ { 0xc87607ae,0x076c8ca1,0xd5caaa7e,0x82b23c2e,0x2763e461,0x6a581f39,
+ 0x3886df11,0xca8a5e4a,0x264e7f22,0xc87e90cf,0x215cfcfc,0x04f74870 } },
+ /* 160 */
+ { { 0x141d161c,0x5285d116,0x93c4ed17,0x67cd2e0e,0x7c36187e,0x12c62a64,
+ 0xed2584ca,0xf5329539,0x42fbbd69,0xc4c777c4,0x1bdfc50a,0x107de776 },
+ { 0xe96beebd,0x9976dcc5,0xa865a151,0xbe2aff95,0x9d8872af,0x0e0a9da1,
+ 0xa63c17cc,0x5e357a3d,0xe15cc67c,0xd31fdfd8,0x7970c6d8,0xc44bbefd } },
+ /* 161 */
+ { { 0x4c0c62f1,0x703f83e2,0x4e195572,0x9b1e28ee,0xfe26cced,0x6a82858b,
+ 0xc43638fa,0xd381c84b,0xa5ba43d8,0x94f72867,0x10b82743,0x3b4a783d },
+ { 0x7576451e,0xee1ad7b5,0x14b6b5c8,0xc3d0b597,0xfcacc1b8,0x3dc30954,
+ 0x472c9d7b,0x55df110e,0x02f8a328,0x97c86ed7,0x88dc098f,0xd0433413 } },
+ /* 162 */
+ { { 0x2ca8f2fe,0x1a60d152,0x491bd41f,0x61640948,0x58dfe035,0x6dae29a5,
+ 0x278e4863,0x9a615bea,0x9ad7c8e5,0xbbdb4477,0x2ceac2fc,0x1c706630 },
+ { 0x99699b4b,0x5e2b54c6,0x239e17e8,0xb509ca6d,0xea063a82,0x728165fe,
+ 0xb6a22e02,0x6b5e609d,0xb26ee1df,0x12813905,0x439491fa,0x07b9f722 } },
+ /* 163 */
+ { { 0x48ff4e49,0x1592ec14,0x6d644129,0x3e4e9f17,0x1156acc0,0x7acf8288,
+ 0xbb092b0b,0x5aa34ba8,0x7d38393d,0xcd0f9022,0xea4f8187,0x416724dd },
+ { 0xc0139e73,0x3c4e641c,0x91e4d87d,0xe0fe46cf,0xcab61f8a,0xedb3c792,
+ 0xd3868753,0x4cb46de4,0x20f1098a,0xe449c21d,0xf5b8ea6e,0x5e5fd059 } },
+ /* 164 */
+ { { 0x75856031,0x7fcadd46,0xeaf2fbd0,0x89c7a4cd,0x7a87c480,0x1af523ce,
+ 0x61d9ae90,0xe5fc1095,0xbcdb95f5,0x3fb5864f,0xbb5b2c7d,0xbeb5188e },
+ { 0x3ae65825,0x3d1563c3,0x0e57d641,0x116854c4,0x1942ebd3,0x11f73d34,
+ 0xc06955b3,0x24dc5904,0x995a0a62,0x8a0d4c83,0x5d577b7d,0xfb26b86d } },
+ /* 165 */
+ { { 0xc686ae17,0xc53108e7,0xd1c1da56,0x9090d739,0x9aec50ae,0x4583b013,
+ 0xa49a6ab2,0xdd9a088b,0xf382f850,0x28192eea,0xf5fe910e,0xcc8df756 },
+ { 0x9cab7630,0x877823a3,0xfb8e7fc1,0x64984a9a,0x364bfc16,0x5448ef9c,
+ 0xc44e2a9a,0xbbb4f871,0x435c95e9,0x901a41ab,0xaaa50a06,0xc6c23e5f } },
+ /* 166 */
+ { { 0x9034d8dd,0xb78016c1,0x0b13e79b,0x856bb44b,0xb3241a05,0x85c6409a,
+ 0x2d78ed21,0x8d2fe19a,0x726eddf2,0xdcc7c26d,0x25104f04,0x3ccaff5f },
+ { 0x6b21f843,0x397d7edc,0xe975de4c,0xda88e4dd,0x4f5ab69e,0x5273d396,
+ 0x9aae6cc0,0x537680e3,0x3e6f9461,0xf749cce5,0x957bffd3,0x021ddbd9 } },
+ /* 167 */
+ { { 0x777233cf,0x7b64585f,0x0942a6f0,0xfe6771f6,0xdfe6eef0,0x636aba7a,
+ 0x86038029,0x63bbeb56,0xde8fcf36,0xacee5842,0xd4a20524,0x48d9aa99 },
+ { 0x0da5e57a,0xcff7a74c,0xe549d6c9,0xc232593c,0xf0f2287b,0x68504bcc,
+ 0xbc8360b5,0x6d7d098d,0x5b402f41,0xeac5f149,0xb87d1bf1,0x61936f11 } },
+ /* 168 */
+ { { 0xb8153a9d,0xaa9da167,0x9e83ecf0,0xa49fe3ac,0x1b661384,0x14c18f8e,
+ 0x38434de1,0x61c24dab,0x283dae96,0x3d973c3a,0x82754fc9,0xc99baa01 },
+ { 0x4c26b1e3,0x477d198f,0xa7516202,0x12e8e186,0x362addfa,0x386e52f6,
+ 0xc3962853,0x31e8f695,0x6aaedb60,0xdec2af13,0x29cf74ac,0xfcfdb4c6 } },
+ /* 169 */
+ { { 0xcca40298,0x6b3ee958,0xf2f5d195,0xc3878153,0xed2eae5b,0x0c565630,
+ 0x3a697cf2,0xd089b37e,0xad5029ea,0xc2ed2ac7,0x0f0dda6a,0x7e5cdfad },
+ { 0xd9b86202,0xf98426df,0x4335e054,0xed1960b1,0x3f14639e,0x1fdb0246,
+ 0x0db6c670,0x17f709c3,0x773421e1,0xbfc687ae,0x26c1a8ac,0x13fefc4a } },
+ /* 170 */
+ { { 0x7ffa0a5f,0xe361a198,0xc63fe109,0xf4b26102,0x6c74e111,0x264acbc5,
+ 0x77abebaf,0x4af445fa,0x24cddb75,0x448c4fdd,0x44506eea,0x0b13157d },
+ { 0x72e9993d,0x22a6b159,0x85e5ecbe,0x2c3c57e4,0xfd83e1a1,0xa673560b,
+ 0xc3b8c83b,0x6be23f82,0x40bbe38e,0x40b13a96,0xad17399b,0x66eea033 } },
+ /* 171 */
+ { { 0xb4c6c693,0x49fc6e95,0x36af7d38,0xefc735de,0x35fe42fc,0xe053343d,
+ 0x6a9ab7c3,0xf0aa427c,0x4a0fcb24,0xc79f0436,0x93ebbc50,0x16287243 },
+ { 0x16927e1e,0x5c3d6bd0,0x673b984c,0x40158ed2,0x4cd48b9a,0xa7f86fc8,
+ 0x60ea282d,0x1643eda6,0xe2a1beed,0x45b393ea,0x19571a94,0x664c839e } },
+ /* 172 */
+ { { 0x27eeaf94,0x57745750,0xea99e1e7,0x2875c925,0x5086adea,0xc127e7ba,
+ 0x86fe424f,0x765252a0,0x2b6c0281,0x1143cc6c,0xd671312d,0xc9bb2989 },
+ { 0x51acb0a5,0x880c337c,0xd3c60f78,0xa3710915,0x9262b6ed,0x496113c0,
+ 0x9ce48182,0x5d25d9f8,0xb3813586,0x53b6ad72,0x4c0e159c,0x0ea3bebc } },
+ /* 173 */
+ { { 0xc5e49bea,0xcaba450a,0x7c05da59,0x684e5415,0xde7ac36c,0xa2e9cab9,
+ 0x2e6f957b,0x4ca79b5f,0x09b817b1,0xef7b0247,0x7d89df0f,0xeb304990 },
+ { 0x46fe5096,0x508f7307,0x2e04eaaf,0x695810e8,0x3512f76c,0x88ef1bd9,
+ 0x3ebca06b,0x77661351,0xccf158b7,0xf7d4863a,0x94ee57da,0xb2a81e44 } },
+ /* 174 */
+ { { 0x6d53e6ba,0xff288e5b,0x14484ea2,0xa90de1a9,0xed33c8ec,0x2fadb60c,
+ 0x28b66a40,0x579d6ef3,0xec24372d,0x4f2dd6dd,0x1d66ec7d,0xe9e33fc9 },
+ { 0x039eab6e,0x110899d2,0x3e97bb5e,0xa31a667a,0xcfdce68e,0x6200166d,
+ 0x5137d54b,0xbe83ebae,0x4800acdf,0x085f7d87,0x0c6f8c86,0xcf4ab133 } },
+ /* 175 */
+ { { 0x931e08fb,0x03f65845,0x1506e2c0,0x6438551e,0x9c36961f,0x5791f0dc,
+ 0xe3dcc916,0x68107b29,0xf495d2ca,0x83242374,0x6ee5895b,0xd8cfb663 },
+ { 0xa0349b1b,0x525e0f16,0x4a0fab86,0x33cd2c6c,0x2af8dda9,0x46c12ee8,
+ 0x71e97ad3,0x7cc424ba,0x37621eb0,0x69766ddf,0xa5f0d390,0x95565f56 } },
+ /* 176 */
+ { { 0x1a0f5e94,0xe0e7bbf2,0x1d82d327,0xf771e115,0xceb111fa,0x10033e3d,
+ 0xd3426638,0xd269744d,0x00d01ef6,0xbdf2d9da,0xa049ceaf,0x1cb80c71 },
+ { 0x9e21c677,0x17f18328,0x19c8f98b,0x6452af05,0x80b67997,0x35b9c5f7,
+ 0x40f8f3d4,0x5c2e1cbe,0x66d667ca,0x43f91656,0xcf9d6e79,0x9faaa059 } },
+ /* 177 */
+ { { 0x0a078fe6,0x8ad24618,0x464fd1dd,0xf6cc73e6,0xc3e37448,0x4d2ce34d,
+ 0xe3271b5f,0x624950c5,0xefc5af72,0x62910f5e,0xaa132bc6,0x8b585bf8 },
+ { 0xa839327f,0x11723985,0x4aac252f,0x34e2d27d,0x6296cc4e,0x402f59ef,
+ 0x47053de9,0x00ae055c,0x28b4f09b,0xfc22a972,0xfa0c180e,0xa9e86264 } },
+ /* 178 */
+ { { 0xbc310ecc,0x0b7b6224,0x67fa14ed,0x8a1a74f1,0x7214395c,0x87dd0960,
+ 0xf5c91128,0xdf1b3d09,0x86b264a8,0x39ff23c6,0x3e58d4c5,0xdc2d49d0 },
+ { 0xa9d6f501,0x2152b7d3,0xc04094f7,0xf4c32e24,0xd938990f,0xc6366596,
+ 0x94fb207f,0x084d078f,0x328594cb,0xfd99f1d7,0xcb2d96b3,0x36defa64 } },
+ /* 179 */
+ { { 0x13ed7cbe,0x4619b781,0x9784bd0e,0x95e50015,0x2c7705fe,0x2a32251c,
+ 0x5f0dd083,0xa376af99,0x0361a45b,0x55425c6c,0x1f291e7b,0x812d2cef },
+ { 0x5fd94972,0xccf581a0,0xe56dc383,0x26e20e39,0x63dbfbf0,0x0093685d,
+ 0x36b8c575,0x1fc164cc,0x390ef5e7,0xb9c5ab81,0x26908c66,0x40086beb } },
+ /* 180 */
+ { { 0x37e3c115,0xe5e54f79,0xc1445a8a,0x69b8ee8c,0xb7659709,0x79aedff2,
+ 0x1b46fbe6,0xe288e163,0xd18d7bb7,0xdb4844f0,0x48aa6424,0xe0ea23d0 },
+ { 0xf3d80a73,0x714c0e4e,0x3bd64f98,0x87a0aa9e,0x2ec63080,0x8844b8a8,
+ 0x255d81a3,0xe0ac9c30,0x455397fc,0x86151237,0x2f820155,0x0b979464 } },
+ /* 181 */
+ { { 0x4ae03080,0x127a255a,0x580a89fb,0x232306b4,0x6416f539,0x04e8cd6a,
+ 0x13b02a0e,0xaeb70dee,0x4c09684a,0xa3038cf8,0x28e433ee,0xa710ec3c },
+ { 0x681b1f7d,0x77a72567,0x2fc28170,0x86fbce95,0xf5735ac8,0xd3408683,
+ 0x6bd68e93,0x3a324e2a,0xc027d155,0x7ec74353,0xd4427177,0xab60354c } },
+ /* 182 */
+ { { 0xef4c209d,0x32a5342a,0x08d62704,0x2ba75274,0xc825d5fe,0x4bb4af6f,
+ 0xd28e7ff1,0x1c3919ce,0xde0340f6,0x1dfc2fdc,0x29f33ba9,0xc6580baf },
+ { 0x41d442cb,0xae121e75,0x3a4724e4,0x4c7727fd,0x524f3474,0xe556d6a4,
+ 0x785642a2,0x87e13cc7,0xa17845fd,0x182efbb1,0x4e144857,0xdcec0cf1 } },
+ /* 183 */
+ { { 0xe9539819,0x1cb89541,0x9d94dbf1,0xc8cb3b4f,0x417da578,0x1d353f63,
+ 0x8053a09e,0xb7a697fb,0xc35d8b78,0x8d841731,0xb656a7a9,0x85748d6f },
+ { 0xc1859c5d,0x1fd03947,0x535d22a2,0x6ce965c1,0x0ca3aadc,0x1966a13e,
+ 0x4fb14eff,0x9802e41d,0x76dd3fcd,0xa9048cbb,0xe9455bba,0x89b182b5 } },
+ /* 184 */
+ { { 0x43360710,0xd777ad6a,0x55e9936b,0x841287ef,0x04a21b24,0xbaf5c670,
+ 0x35ad86f1,0xf2c0725f,0xc707e72e,0x338fa650,0xd8883e52,0x2bf8ed2e },
+ { 0xb56e0d6a,0xb0212cf4,0x6843290c,0x50537e12,0x98b3dc6f,0xd8b184a1,
+ 0x0210b722,0xd2be9a35,0x559781ee,0x407406db,0x0bc18534,0x5a78d591 } },
+ /* 185 */
+ { { 0xd748b02c,0x4d57aa2a,0xa12b3b95,0xbe5b3451,0x64711258,0xadca7a45,
+ 0x322153db,0x597e091a,0x32eb1eab,0xf3271006,0x2873f301,0xbd9adcba },
+ { 0x38543f7f,0xd1dc79d1,0x921b1fef,0x00022092,0x1e5df8ed,0x86db3ef5,
+ 0x9e6b944a,0x888cae04,0x791a32b4,0x71bd29ec,0xa6d1c13e,0xd3516206 } },
+ /* 186 */
+ { { 0x55924f43,0x2ef6b952,0x4f9de8d5,0xd2f401ae,0xadc68042,0xfc73e8d7,
+ 0x0d9d1bb4,0x627ea70c,0xbbf35679,0xc3bb3e3e,0xd882dee4,0x7e8a254a },
+ { 0xb5924407,0x08906f50,0xa1ad444a,0xf14a0e61,0x65f3738e,0xaa0efa21,
+ 0xae71f161,0xd60c7dd6,0xf175894d,0x9e8390fa,0x149f4c00,0xd115cd20 } },
+ /* 187 */
+ { { 0xa52abf77,0x2f2e2c1d,0x54232568,0xc2a0dca5,0x54966dcc,0xed423ea2,
+ 0xcd0dd039,0xe48c93c7,0x176405c7,0x1e54a225,0x70d58f2e,0x1efb5b16 },
+ { 0x94fb1471,0xa751f9d9,0x67d2941d,0xfdb31e1f,0x53733698,0xa6c74eb2,
+ 0x89a0f64a,0xd3155d11,0xa4b8d2b6,0x4414cfe4,0xf7a8e9e3,0x8d5a4be8 } },
+ /* 188 */
+ { { 0x52669e98,0x5c96b4d4,0x8fd42a03,0x4547f922,0xd285174e,0xcf5c1319,
+ 0x064bffa0,0x805cd1ae,0x246d27e7,0x50e8bc4f,0xd5781e11,0xf89ef98f },
+ { 0xdee0b63f,0xb4ff95f6,0x222663a4,0xad850047,0x4d23ce9c,0x02691860,
+ 0x50019f59,0x3e5309ce,0x69a508ae,0x27e6f722,0x267ba52c,0xe9376652 } },
+ /* 189 */
+ { { 0xc0368708,0xa04d289c,0x5e306e1d,0xc458872f,0x33112fea,0x76fa23de,
+ 0x6efde42e,0x718e3974,0x1d206091,0xf0c98cdc,0x14a71987,0x5fa3ca62 },
+ { 0xdcaa9f2a,0xeee8188b,0x589a860d,0x312cc732,0xc63aeb1f,0xf9808dd6,
+ 0x4ea62b53,0x70fd43db,0x890b6e97,0x2c2bfe34,0xfa426aa6,0x105f863c } },
+ /* 190 */
+ { { 0xb38059ad,0x0b29795d,0x90647ea0,0x5686b77e,0xdb473a3e,0xeff0470e,
+ 0xf9b6d1e2,0x278d2340,0xbd594ec7,0xebbff95b,0xd3a7f23d,0xf4b72334 },
+ { 0xa5a83f0b,0x2a285980,0x9716a8b3,0x0786c41a,0x22511812,0x138901bd,
+ 0xe2fede6e,0xd1b55221,0xdf4eb590,0x0806e264,0x762e462e,0x6c4c897e } },
+ /* 191 */
+ { { 0xb4b41d9d,0xd10b905f,0x4523a65b,0x826ca466,0xb699fa37,0x535bbd13,
+ 0x73bc8f90,0x5b9933d7,0xcd2118ad,0x9332d61f,0xd4a65fd0,0x158c693e },
+ { 0xe6806e63,0x4ddfb2a8,0xb5de651b,0xe31ed3ec,0x819bc69a,0xf9460e51,
+ 0x2c76b1f8,0x6229c0d6,0x901970a3,0xbb78f231,0x9cee72b8,0x31f3820f } },
+ /* 192 */
+ { { 0xc09e1c72,0xe931caf2,0x12990cf4,0x0715f298,0x943262d8,0x33aad81d,
+ 0x73048d3f,0x5d292b7a,0xdc7415f6,0xb152aaa4,0x0fd19587,0xc3d10fd9 },
+ { 0x75ddadd0,0xf76b35c5,0x1e7b694c,0x9f5f4a51,0xc0663025,0x2f1ab7eb,
+ 0x920260b0,0x01c9cc87,0x05d39da6,0xc4b1f61a,0xeb4a9c4e,0x6dcd76c4 } },
+ /* 193 */
+ { { 0xfdc83f01,0x0ba0916f,0x9553e4f9,0x354c8b44,0xffc5e622,0xa6cc511a,
+ 0xe95be787,0xb954726a,0x75b41a62,0xcb048115,0xebfde989,0xfa2ae6cd },
+ { 0x0f24659a,0x6376bbc7,0x4c289c43,0x13a999fd,0xec9abd8b,0xc7134184,
+ 0xa789ab04,0x28c02bf6,0xd3e526ec,0xff841ebc,0x640893a8,0x442b191e } },
+ /* 194 */
+ { { 0xfa2b6e20,0x4cac6c62,0xf6d69861,0x97f29e9b,0xbc96d12d,0x228ab1db,
+ 0x5e8e108d,0x6eb91327,0x40771245,0xd4b3d4d1,0xca8a803a,0x61b20623 },
+ { 0xa6a560b1,0x2c2f3b41,0x3859fcf4,0x879e1d40,0x024dbfc3,0x7cdb5145,
+ 0x3bfa5315,0x55d08f15,0xaa93823a,0x2f57d773,0xc6a2c9a2,0xa97f259c } },
+ /* 195 */
+ { { 0xe58edbbb,0xc306317b,0x79dfdf13,0x25ade51c,0x16d83dd6,0x6b5beaf1,
+ 0x1dd8f925,0xe8038a44,0xb2a87b6b,0x7f00143c,0xf5b438de,0xa885d00d },
+ { 0xcf9e48bd,0xe9f76790,0xa5162768,0xf0bdf9f0,0xad7b57cb,0x0436709f,
+ 0xf7c15db7,0x7e151c12,0x5d90ee3b,0x3514f022,0x2c361a8d,0x2e84e803 } },
+ /* 196 */
+ { { 0x563ec8d8,0x2277607d,0xe3934cb7,0xa661811f,0xf58fd5de,0x3ca72e7a,
+ 0x62294c6a,0x7989da04,0xf6bbefe9,0x88b3708b,0x53ed7c82,0x0d524cf7 },
+ { 0x2f30c073,0x69f699ca,0x9dc1dcf3,0xf0fa264b,0x05f0aaf6,0x44ca4568,
+ 0xd19b9baf,0x0f5b23c7,0xeabd1107,0x39193f41,0x2a7c9b83,0x9e3e10ad } },
+ /* 197 */
+ { { 0xd4ae972f,0xa90824f0,0xc6e846e7,0x43eef02b,0x29d2160a,0x7e460612,
+ 0xfe604e91,0x29a178ac,0x4eb184b2,0x23056f04,0xeb54cdf4,0x4fcad55f },
+ { 0xae728d15,0xa0ff96f3,0xc6a00331,0x8a2680c6,0x7ee52556,0x5f84cae0,
+ 0xc5a65dad,0x5e462c3a,0xe2d23f4f,0x5d2b81df,0xc5b1eb07,0x6e47301b } },
+ /* 198 */
+ { { 0xaf8219b9,0x77411d68,0x51b1907a,0xcb883ce6,0x101383b5,0x25c87e57,
+ 0x982f970d,0x9c7d9859,0x118305d2,0xaa6abca5,0x9013a5db,0x725fed2f },
+ { 0xababd109,0x487cdbaf,0x87586528,0xc0f8cf56,0x8ad58254,0xa02591e6,
+ 0xdebbd526,0xc071b1d1,0x961e7e31,0x927dfe8b,0x9263dfe1,0x55f895f9 } },
+ /* 199 */
+ { { 0xb175645b,0xf899b00d,0xb65b4b92,0x51f3a627,0xb67399ef,0xa2f3ac8d,
+ 0xe400bc20,0xe717867f,0x1967b952,0x42cc9020,0x3ecd1de1,0x3d596751 },
+ { 0xdb979775,0xd41ebcde,0x6a2e7e88,0x99ba61bc,0x321504f2,0x039149a5,
+ 0x27ba2fad,0xe7dc2314,0xb57d8368,0x9f556308,0x57da80a7,0x2b6d16c9 } },
+ /* 200 */
+ { { 0x279ad982,0x84af5e76,0x9c8b81a6,0x9bb4c92d,0x0e698e67,0xd79ad44e,
+ 0x265fc167,0xe8be9048,0x0c3a4ccc,0xf135f7e6,0xb8863a33,0xa0a10d38 },
+ { 0xd386efd9,0xe197247c,0xb52346c2,0x0eefd3f9,0x78607bc8,0xc22415f9,
+ 0x508674ce,0xa2a8f862,0xc8c9d607,0xa72ad09e,0x50fa764f,0xcd9f0ede } },
+ /* 201 */
+ { { 0xd1a46d4d,0x063391c7,0x9eb01693,0x2df51c11,0x849e83de,0xc5849800,
+ 0x8ad08382,0x48fd09aa,0xaa742736,0xa405d873,0xe1f9600c,0xee49e61e },
+ { 0x48c76f73,0xd76676be,0x01274b2a,0xd9c100f6,0x83f8718d,0x110bb67c,
+ 0x02fc0d73,0xec85a420,0x744656ad,0xc0449e1e,0x37d9939b,0x28ce7376 } },
+ /* 202 */
+ { { 0x44544ac7,0x97e9af72,0xba010426,0xf2c658d5,0xfb3adfbd,0x732dec39,
+ 0xa2df0b07,0xd12faf91,0x2171e208,0x8ac26725,0x5b24fa54,0xf820cdc8 },
+ { 0x94f4cf77,0x307a6eea,0x944a33c6,0x18c783d2,0x0b741ac5,0x4b939d4c,
+ 0x3ffbb6e4,0x1d7acd15,0x7a255e44,0x06a24858,0xce336d50,0x14fbc494 } },
+ /* 203 */
+ { { 0x51584e3c,0x9b920c0c,0xf7e54027,0xc7733c59,0x88422bbe,0xe24ce139,
+ 0x523bd6ab,0x11ada812,0xb88e6def,0xde068800,0xfe8c582d,0x7b872671 },
+ { 0x7de53510,0x4e746f28,0xf7971968,0x492f8b99,0x7d928ac2,0x1ec80bc7,
+ 0x432eb1b5,0xb3913e48,0x32028f6e,0xad084866,0x8fc2f38b,0x122bb835 } },
+ /* 204 */
+ { { 0x3b0b29c3,0x0a9f3b1e,0x4fa44151,0x837b6432,0x17b28ea7,0xb9905c92,
+ 0x98451750,0xf39bc937,0xce8b6da1,0xcd383c24,0x010620b2,0x299f57db },
+ { 0x58afdce3,0x7b6ac396,0x3d05ef47,0xa15206b3,0xb9bb02ff,0xa0ae37e2,
+ 0x9db3964c,0x107760ab,0x67954bea,0xe29de9a0,0x431c3f82,0x446a1ad8 } },
+ /* 205 */
+ { { 0x5c6b8195,0xc6fecea0,0xf49e71b9,0xd744a7c5,0x177a7ae7,0xa8e96acc,
+ 0x358773a7,0x1a05746c,0x37567369,0xa4162146,0x87d1c971,0xaa0217f7 },
+ { 0x77fd3226,0x61e9d158,0xe4f600be,0x0f6f2304,0x7a6dff07,0xa9c4cebc,
+ 0x09f12a24,0xd15afa01,0x8c863ee9,0x2bbadb22,0xe5eb8c78,0xa28290e4 } },
+ /* 206 */
+ { { 0x3e9de330,0x55b87fa0,0x195c145b,0x12b26066,0xa920bef0,0xe08536e0,
+ 0x4d195adc,0x7bff6f2c,0x945f4187,0x7f319e9d,0xf892ce47,0xf9848863 },
+ { 0x4fe37657,0xd0efc1d3,0x5cf0e45a,0x3c58de82,0x8b0ccbbe,0x626ad21a,
+ 0xaf952fc5,0xd2a31208,0xeb437357,0x81791995,0x98e95d4f,0x5f19d30f } },
+ /* 207 */
+ { { 0x0e6865bb,0x72e83d9a,0xf63456a6,0x22f5af3b,0x463c8d9e,0x409e9c73,
+ 0xdfe6970e,0x40e9e578,0x711b91ca,0x876b6efa,0x942625a3,0x895512cf },
+ { 0xcb4e462b,0x84c8eda8,0x4412e7c8,0x84c0154a,0xceb7b71f,0x04325db1,
+ 0x66f70877,0x1537dde3,0x1992b9ac,0xf3a09399,0xd498ae77,0xa7316606 } },
+ /* 208 */
+ { { 0xcad260f5,0x13990d2f,0xeec0e8c0,0x76c3be29,0x0f7bd7d5,0x7dc5bee0,
+ 0xefebda4b,0x9be167d2,0x9122b87e,0xcce3dde6,0x82b5415c,0x75a28b09 },
+ { 0xe84607a6,0xf6810bcd,0x6f4dbf0d,0xc6d58128,0x1b4dafeb,0xfead577d,
+ 0x066b28eb,0x9bc440b2,0x8b17e84b,0x53f1da97,0xcda9a575,0x0459504b } },
+ /* 209 */
+ { { 0x329e5836,0x13e39a02,0xf717269d,0x2c9e7d51,0xf26c963b,0xc5ac58d6,
+ 0x79967bf5,0x3b0c6c43,0x55908d9d,0x60bbea3f,0xf07c9ad1,0xd84811e7 },
+ { 0x5bd20e4a,0xfe7609a7,0x0a70baa8,0xe4325dd2,0xb3600386,0x3711f370,
+ 0xd0924302,0x97f9562f,0x4acc4436,0x040dc0c3,0xde79cdd4,0xfd6d725c } },
+ /* 210 */
+ { { 0xcf13eafb,0xb3efd0e3,0x5aa0ae5f,0x21009cbb,0x79022279,0xe480c553,
+ 0xb2fc9a6d,0x755cf334,0x07096ae7,0x8564a5bf,0xbd238139,0xddd649d0 },
+ { 0x8a045041,0xd0de10b1,0xc957d572,0x6e05b413,0x4e0fb25c,0x5c5ff806,
+ 0x641162fb,0xd933179b,0xe57439f9,0x42d48485,0x8a8d72aa,0x70c5bd0a } },
+ /* 211 */
+ { { 0x97bdf646,0xa7671738,0xab329f7c,0xaa1485b4,0xf8f25fdf,0xce3e11d6,
+ 0xc6221824,0x76a3fc7e,0xf3924740,0x045f281f,0x96d13a9a,0x24557d4e },
+ { 0xdd4c27cd,0x875c804b,0x0f5c7fea,0x11c5f0f4,0xdc55ff7e,0xac8c880b,
+ 0x1103f101,0x2acddec5,0xf99faa89,0x38341a21,0xce9d6b57,0xc7b67a2c } },
+ /* 212 */
+ { { 0x8e357586,0x9a0d724f,0xdf648da0,0x1d7f4ff5,0xfdee62a5,0x9c3e6c9b,
+ 0x0389b372,0x0499cef0,0x98eab879,0xe904050d,0x6c051617,0xe8eef1b6 },
+ { 0xc37e3ca9,0xebf5bfeb,0xa4e0b91d,0x7c5e946d,0x2c4bea28,0x79097314,
+ 0xee67b2b7,0x81f6c109,0xdafc5ede,0xaf237d9b,0x2abb04c7,0xd2e60201 } },
+ /* 213 */
+ { { 0x8a4f57bf,0x6156060c,0xff11182a,0xf9758696,0x6296ef00,0x8336773c,
+ 0xff666899,0x9c054bce,0x719cd11c,0xd6a11611,0xdbe1acfa,0x9824a641 },
+ { 0xba89fd01,0x0b7b7a5f,0x889f79d8,0xf8d3b809,0xf578285c,0xc5e1ea08,
+ 0xae6d8288,0x7ac74536,0x7521ef5f,0x5d37a200,0xb260a25d,0x5ecc4184 } },
+ /* 214 */
+ { { 0xa708c8d3,0xddcebb19,0xc63f81ec,0xe63ed04f,0x11873f95,0xd045f5a0,
+ 0x79f276d5,0x3b5ad544,0x425ae5b3,0x81272a3d,0x10ce1605,0x8bfeb501 },
+ { 0x888228bf,0x4233809c,0xb2aff7df,0x4bd82acf,0x0cbd4a7f,0x9c68f180,
+ 0x6b44323d,0xfcd77124,0x891db957,0x60c0fcf6,0x04da8f7f,0xcfbb4d89 } },
+ /* 215 */
+ { { 0x3b26139a,0x9a6a5df9,0xb2cc7eb8,0x3e076a83,0x5a964bcd,0x47a8e82d,
+ 0xb9278d6b,0x8a4e2a39,0xe4443549,0x93506c98,0xf1e0d566,0x06497a8f },
+ { 0x2b1efa05,0x3dee8d99,0x45393e33,0x2da63ca8,0xcf0579ad,0xa4af7277,
+ 0x3236d8ea,0xaf4b4639,0x32b617f5,0x6ccad95b,0xb88bb124,0xce76d8b8 } },
+ /* 216 */
+ { { 0x083843dc,0x63d2537a,0x1e4153b4,0x89eb3514,0xea9afc94,0x5175ebc4,
+ 0x8ed1aed7,0x7a652580,0xd85e8297,0x67295611,0xb584b73d,0x8dd2d68b },
+ { 0x0133c3a4,0x237139e6,0x4bd278ea,0x9de838ab,0xc062fcd9,0xe829b072,
+ 0x63ba8706,0x70730d4f,0xd3cd05ec,0x6080483f,0x0c85f84d,0x872ab5b8 } },
+ /* 217 */
+ { { 0x999d4d49,0xfc0776d3,0xec3f45e7,0xa3eb59de,0x0dae1fc1,0xbc990e44,
+ 0xa15371ff,0x33596b1e,0x9bc7ab25,0xd447dcb2,0x35979582,0xcd5b63e9 },
+ { 0x77d1ff11,0xae3366fa,0xedee6903,0x59f28f05,0xa4433bf2,0x6f43fed1,
+ 0xdf9ce00e,0x15409c9b,0xaca9c5dc,0x21b5cded,0x82d7bdb4,0xf9f33595 } },
+ /* 218 */
+ { { 0x9422c792,0x95944378,0xc958b8bf,0x239ea923,0xdf076541,0x4b61a247,
+ 0xbb9fc544,0x4d29ce85,0x0b424559,0x9a692a67,0x0e486900,0x6e0ca5a0 },
+ { 0x85b3bece,0x6b79a782,0xc61f9892,0x41f35e39,0xae747f82,0xff82099a,
+ 0xd0ca59d6,0x58c8ae3f,0x99406b5f,0x4ac930e2,0x9df24243,0x2ce04eb9 } },
+ /* 219 */
+ { { 0x1ac37b82,0x4366b994,0x25b04d83,0xff0c728d,0x19c47b7c,0x1f551361,
+ 0xbeff13e7,0xdbf2d5ed,0xe12a683d,0xf78efd51,0x989cf9c4,0x82cd85b9 },
+ { 0xe0cb5d37,0xe23c6db6,0x72ee1a15,0x818aeebd,0x28771b14,0x8212aafd,
+ 0x1def817d,0x7bc221d9,0x9445c51f,0xdac403a2,0x12c3746b,0x711b0517 } },
+ /* 220 */
+ { { 0x5ea99ecc,0x0ed9ed48,0xb8cab5e1,0xf799500d,0xb570cbdc,0xa8ec87dc,
+ 0xd35dfaec,0x52cfb2c2,0x6e4d80a4,0x8d31fae2,0xdcdeabe5,0xe6a37dc9 },
+ { 0x1deca452,0x5d365a34,0x0d68b44e,0x09a5f8a5,0xa60744b1,0x59238ea5,
+ 0xbb4249e9,0xf2fedc0d,0xa909b2e3,0xe395c74e,0x39388250,0xe156d1a5 } },
+ /* 221 */
+ { { 0x47181ae9,0xd796b3d0,0x44197808,0xbaf44ba8,0x34cf3fac,0xe6933094,
+ 0xc3bd5c46,0x41aa6ade,0xeed947c6,0x4fda75d8,0x9ea5a525,0xacd9d412 },
+ { 0xd430301b,0x65cc55a3,0x7b52ea49,0x3c9a5bcf,0x159507f0,0x22d319cf,
+ 0xde74a8dd,0x2ee0b9b5,0x877ac2b6,0x20c26a1e,0x92e7c314,0x387d73da } },
+ /* 222 */
+ { { 0x8cd3fdac,0x13c4833e,0x332e5b8e,0x76fcd473,0xe2fe1fd3,0xff671b4b,
+ 0x5d98d8ec,0x4d734e8b,0x514bbc11,0xb1ead3c6,0x7b390494,0xd14ca858 },
+ { 0x5d2d37e9,0x95a443af,0x00464622,0x73c6ea73,0x15755044,0xa44aeb4b,
+ 0xfab58fee,0xba3f8575,0xdc680a6f,0x9779dbc9,0x7b37ddfc,0xe1ee5f5a } },
+ /* 223 */
+ { { 0x12d29f46,0xcd0b4648,0x0ed53137,0x93295b0b,0x80bef6c9,0xbfe26094,
+ 0x54248b00,0xa6565788,0x80e7f9c4,0x69c43fca,0xbe141ea1,0x2190837b },
+ { 0xa1b26cfb,0x875e159a,0x7affe852,0x90ca9f87,0x92ca598e,0x15e6550d,
+ 0x1938ad11,0xe3e0945d,0x366ef937,0xef7636bb,0xb39869e5,0xb6034d0b } },
+ /* 224 */
+ { { 0x26d8356e,0x4d255e30,0xd314626f,0xf83666ed,0xd0c8ed64,0x421ddf61,
+ 0x26677b61,0x96e473c5,0x9e9b18b3,0xdad4af7e,0xa9393f75,0xfceffd4a },
+ { 0x11c731d5,0x843138a1,0xb2f141d9,0x05bcb3a1,0x617b7671,0x20e1fa95,
+ 0x88ccec7b,0xbefce812,0x90f1b568,0x582073dc,0x1f055cb7,0xf572261a } },
+ /* 225 */
+ { { 0x36973088,0xf3148277,0x86a9f980,0xc008e708,0xe046c261,0x1b795947,
+ 0xca76bca0,0xdf1e6a7d,0x71acddf0,0xabafd886,0x1364d8f4,0xff7054d9 },
+ { 0xe2260594,0x2cf63547,0xd73b277e,0x468a5372,0xef9bd35e,0xc7419e24,
+ 0x24043cc3,0x2b4a1c20,0x890b39cd,0xa28f047a,0x46f9a2e3,0xdca2cea1 } },
+ /* 226 */
+ { { 0x53277538,0xab788736,0xcf697738,0xa734e225,0x6b22e2c1,0x66ee1d1e,
+ 0xebe1d212,0x2c615389,0x02bb0766,0xf36cad40,0x3e64f207,0x120885c3 },
+ { 0x90fbfec2,0x59e77d56,0xd7a574ae,0xf9e781aa,0x5d045e53,0x801410b0,
+ 0xa91b5f0e,0xd3b5f0aa,0x7fbb3521,0xb3d1df00,0xc72bee9a,0x11c4b33e } },
+ /* 227 */
+ { { 0x83c3a7f3,0xd32b9832,0x88d8a354,0x8083abcf,0x50f4ec5a,0xdeb16404,
+ 0x641e2907,0x18d747f0,0xf1bbf03e,0x4e8978ae,0x88a0cd89,0x932447dc },
+ { 0xcf3d5897,0x561e0feb,0x13600e6d,0xfc3a682f,0xd16a6b73,0xc78b9d73,
+ 0xd29bf580,0xe713fede,0x08d69e5c,0x0a225223,0x1ff7fda4,0x3a924a57 } },
+ /* 228 */
+ { { 0xb4093bee,0xfb64554c,0xa58c6ec0,0xa6d65a25,0x43d0ed37,0x4126994d,
+ 0x55152d44,0xa5689a51,0x284caa8d,0xb8e5ea8c,0xd1f25538,0x33f05d4f },
+ { 0x1b615d6e,0xe0fdfe09,0x705507da,0x2ded7e8f,0x17bbcc80,0xdd5631e5,
+ 0x267fd11f,0x4f87453e,0xff89d62d,0xc6da723f,0xe3cda21d,0x55cbcae2 } },
+ /* 229 */
+ { { 0x6b4e84f3,0x336bc94e,0x4ef72c35,0x72863031,0xeeb57f99,0x6d85fdee,
+ 0xa42ece1b,0x7f4e3272,0x36f0320a,0x7f86cbb5,0x923331e6,0xf09b6a2b },
+ { 0x56778435,0x21d3ecf1,0x8323b2d2,0x2977ba99,0x1704bc0f,0x6a1b57fb,
+ 0x389f048a,0xd777cf8b,0xac6b42cd,0x9ce2174f,0x09e6c55a,0x404e2bff } },
+ /* 230 */
+ { { 0x204c5ddb,0x9b9b135e,0x3eff550e,0x9dbfe044,0xec3be0f6,0x35eab4bf,
+ 0x0a43e56f,0x8b4c3f0d,0x0e73f9b3,0x4c1c6673,0x2c78c905,0x92ed38bd },
+ { 0xa386e27c,0xc7003f6a,0xaced8507,0xb9c4f46f,0x59df5464,0xea024ec8,
+ 0x429572ea,0x4af96152,0xe1fc1194,0x279cd5e2,0x281e358c,0xaa376a03 } },
+ /* 231 */
+ { { 0x3cdbc95c,0x07859223,0xef2e337a,0xaae1aa6a,0x472a8544,0xc040108d,
+ 0x8d037b7d,0x80c853e6,0x8c7eee24,0xd221315c,0x8ee47752,0x195d3856 },
+ { 0xdacd7fbe,0xd4b1ba03,0xd3e0c52b,0x4b5ac61e,0x6aab7b52,0x68d3c052,
+ 0x660e3fea,0xf0d7248c,0x3145efb4,0xafdb3f89,0x8f40936d,0xa73fd9a3 } },
+ /* 232 */
+ { { 0xbb1b17ce,0x891b9ef3,0xc6127f31,0x14023667,0x305521fd,0x12b2e58d,
+ 0xe3508088,0x3a47e449,0xff751507,0xe49fc84b,0x5310d16e,0x4023f722 },
+ { 0xb73399fa,0xa608e5ed,0xd532aa3e,0xf12632d8,0x845e8415,0x13a2758e,
+ 0x1fc2d861,0xae4b6f85,0x339d02f2,0x3879f5b1,0x80d99ebd,0x446d22a6 } },
+ /* 233 */
+ { { 0x4be164f1,0x0f502302,0x88b81920,0x8d09d2d6,0x984aceff,0x514056f1,
+ 0x75e9e80d,0xa5c4ddf0,0xdf496a93,0x38cb47e6,0x38df6bf7,0x899e1d6b },
+ { 0xb59eb2a6,0x69e87e88,0x9b47f38b,0x280d9d63,0x3654e955,0x599411ea,
+ 0x969aa581,0xcf8dd4fd,0x530742a7,0xff5c2baf,0x1a373085,0xa4391536 } },
+ /* 234 */
+ { { 0xa8a4bdd2,0x6ace72a3,0xb68ef702,0xc656cdd1,0x90c4dad8,0xd4a33e7e,
+ 0x9d951c50,0x4aece08a,0x085d68e6,0xea8005ae,0x6f7502b8,0xfdd7a7d7 },
+ { 0x98d6fa45,0xce6fb0a6,0x1104eb8c,0x228f8672,0xda09d7dc,0xd23d8787,
+ 0x2ae93065,0x5521428b,0xea56c366,0x95faba3d,0x0a88aca5,0xedbe5039 } },
+ /* 235 */
+ { { 0xbfb26c82,0xd64da0ad,0x952c2f9c,0xe5d70b3c,0xf7e77f68,0xf5e8f365,
+ 0x08f2d695,0x7234e002,0xd12e7be6,0xfaf900ee,0x4acf734e,0x27dc6934 },
+ { 0xc260a46a,0x80e4ff5e,0x2dc31c28,0x7da5ebce,0xca69f552,0x485c5d73,
+ 0x69cc84c2,0xcdfb6b29,0xed6d4eca,0x031c5afe,0x22247637,0xc7bbf4c8 } },
+ /* 236 */
+ { { 0x49fe01b2,0x9d5b72c7,0x793a91b8,0x34785186,0xcf460438,0xa3ba3c54,
+ 0x3ab21b6f,0x73e8e43d,0xbe57b8ab,0x50cde8e0,0xdd204264,0x6488b3a7 },
+ { 0xdddc4582,0xa9e398b3,0x5bec46fe,0x1698c1a9,0x156d3843,0x7f1446ef,
+ 0x770329a2,0x3fd25dd8,0x2c710668,0x05b1221a,0xa72ee6cf,0x65b2dc2a } },
+ /* 237 */
+ { { 0xcd021d63,0x21a885f7,0xfea61f08,0x3f344b15,0xc5cf73e6,0xad5ba6dd,
+ 0x227a8b23,0x154d0d8f,0xdc559311,0x9b74373c,0x98620fa1,0x4feab715 },
+ { 0x7d9ec924,0x5098938e,0x6d47e550,0x84d54a5e,0x1b617506,0x1a2d1bdc,
+ 0x615868a4,0x99fe1782,0x3005a924,0x171da780,0x7d8f79b6,0xa70bf5ed } },
+ /* 238 */
+ { { 0xfe2216c5,0x0bc1250d,0x7601b351,0x2c37e250,0xd6f06b7e,0xb6300175,
+ 0x8bfeb9b7,0x4dde8ca1,0xb82f843d,0x4f210432,0xb1ac0afd,0x8d70e2f9 },
+ { 0xaae91abb,0x25c73b78,0x863028f2,0x0230dca3,0xe5cf30b7,0x8b923ecf,
+ 0x5506f265,0xed754ec2,0x729a5e39,0x8e41b88c,0xbabf889b,0xee67cec2 } },
+ /* 239 */
+ { { 0x1be46c65,0xe183acf5,0xe7565d7a,0x9789538f,0xd9627b4e,0x87873391,
+ 0x9f1d9187,0xbf4ac4c1,0x4691f5c8,0x5db99f63,0x74a1fb98,0xa68df803 },
+ { 0xbf92b5fa,0x3c448ed1,0x3e0bdc32,0xa098c841,0x79bf016c,0x8e74cd55,
+ 0x115e244d,0x5df0d09c,0x3410b66e,0x9418ad01,0x17a02130,0x8b6124cb } },
+ /* 240 */
+ { { 0xc26e3392,0x425ec3af,0xa1722e00,0xc07f8470,0xe2356b43,0xdcc28190,
+ 0xb1ef59a6,0x4ed97dff,0xc63028c1,0xc22b3ad1,0x68c18988,0x070723c2 },
+ { 0x4cf49e7d,0x70da302f,0x3f12a522,0xc5e87c93,0x18594148,0x74acdd1d,
+ 0xca74124c,0xad5f73ab,0xd69fd478,0xe72e4a3e,0x7b117cc3,0x61593868 } },
+ /* 241 */
+ { { 0xa9aa0486,0x7b7b9577,0xa063d557,0x6e41fb35,0xda9047d7,0xb017d5c7,
+ 0x68a87ba9,0x8c748280,0xdf08ad93,0xab45fa5c,0x4c288a28,0xcd9fb217 },
+ { 0x5747843d,0x59544642,0xa56111e3,0x34d64c6c,0x4bfce8d5,0x12e47ea1,
+ 0x6169267f,0x17740e05,0xeed03fb5,0x5c49438e,0x4fc3f513,0x9da30add } },
+ /* 242 */
+ { { 0xccfa5200,0xc4e85282,0x6a19b13d,0x2707608f,0xf5726e2f,0xdcb9a53d,
+ 0xe9427de5,0x612407c9,0xd54d582a,0x3e5a17e1,0x655ae118,0xb99877de },
+ { 0x015254de,0x6f0e972b,0xf0a6f7c5,0x92a56db1,0xa656f8b2,0xd297e4e1,
+ 0xad981983,0x99fe0052,0x07cfed84,0xd3652d2f,0x843c1738,0xc784352e } },
+ /* 243 */
+ { { 0x7e9b2d8a,0x6ee90af0,0x57cf1964,0xac8d7018,0x71f28efc,0xf6ed9031,
+ 0x6812b20e,0x7f70d5a9,0xf1c61eee,0x27b557f4,0xc6263758,0xf1c9bd57 },
+ { 0x2a1a6194,0x5cf7d014,0x1890ab84,0xdd614e0b,0x0e93c2a6,0x3ef9de10,
+ 0xe0cd91c5,0xf98cf575,0x14befc32,0x504ec0c6,0x6279d68c,0xd0513a66 } },
+ /* 244 */
+ { { 0xa859fb6a,0xa8eadbad,0xdb283666,0xcf8346e7,0x3e22e355,0x7b35e61a,
+ 0x99639c6b,0x293ece2c,0x56f241c8,0xfa0162e2,0xbf7a1dda,0xd2e6c7b9 },
+ { 0x40075e63,0xd0de6253,0xf9ec8286,0x2405aa61,0x8fe45494,0x2237830a,
+ 0x364e9c8c,0x4fd01ac7,0x904ba750,0x4d9c3d21,0xaf1b520b,0xd589be14 } },
+ /* 245 */
+ { { 0x4662e53b,0x13576a4f,0xf9077676,0x35ec2f51,0x97c0af97,0x66297d13,
+ 0x9e598b58,0xed3201fe,0x5e70f604,0x49bc752a,0xbb12d951,0xb54af535 },
+ { 0x212c1c76,0x36ea4c2b,0xeb250dfd,0x18f5bbc7,0x9a0a1a46,0xa0d466cc,
+ 0xdac2d917,0x52564da4,0x8e95fab5,0x206559f4,0x9ca67a33,0x7487c190 } },
+ /* 246 */
+ { { 0xdde98e9c,0x75abfe37,0x2a411199,0x99b90b26,0xdcdb1f7c,0x1b410996,
+ 0x8b3b5675,0xab346f11,0xf1f8ae1e,0x04852193,0x6b8b98c1,0x1ec4d227 },
+ { 0x45452baa,0xba3bc926,0xacc4a572,0x387d1858,0xe51f171e,0x9478eff6,
+ 0x931e1c00,0xf357077d,0xe54c8ca8,0xffee77cd,0x551dc9a4,0xfb4892ff } },
+ /* 247 */
+ { { 0x2db8dff8,0x5b1bdad0,0x5a2285a2,0xd462f4fd,0xda00b461,0x1d6aad8e,
+ 0x41306d1b,0x43fbefcf,0x6a13fe19,0x428e86f3,0x17f89404,0xc8b2f118 },
+ { 0xf0d51afb,0x762528aa,0x549b1d06,0xa3e2fea4,0xea3ddf66,0x86fad8f2,
+ 0x4fbdd206,0x0d9ccc4b,0xc189ff5a,0xcde97d4c,0x199f19a6,0xc36793d6 } },
+ /* 248 */
+ { { 0x51b85197,0xea38909b,0xb4c92895,0xffb17dd0,0x1ddb3f3f,0x0eb0878b,
+ 0xc57cf0f2,0xb05d28ff,0x1abd57e2,0xd8bde2e7,0xc40c1b20,0x7f2be28d },
+ { 0x299a2d48,0x6554dca2,0x8377982d,0x5130ba2e,0x1071971a,0x8863205f,
+ 0x7cf2825d,0x15ee6282,0x03748f2b,0xd4b6c57f,0x430385a0,0xa9e3f4da } },
+ /* 249 */
+ { { 0x83fbc9c6,0x33eb7cec,0x4541777e,0x24a311c7,0x4f0767fc,0xc81377f7,
+ 0x4ab702da,0x12adae36,0x2a779696,0xb7fcb6db,0x01cea6ad,0x4a6fb284 },
+ { 0xcdfc73de,0x5e8b1d2a,0x1b02fd32,0xd0efae8d,0xd81d8519,0x3f99c190,
+ 0xfc808971,0x3c18f7fa,0x51b7ae7b,0x41f713e7,0xf07fc3f8,0x0a4b3435 } },
+ /* 250 */
+ { { 0x019b7d2e,0x7dda3c4c,0xd4dc4b89,0x631c8d1a,0x1cdb313c,0x5489cd6e,
+ 0x4c07bb06,0xd44aed10,0x75f000d1,0x8f97e13a,0xdda5df4d,0x0e9ee64f },
+ { 0x3e346910,0xeaa99f3b,0xfa294ad7,0x622f6921,0x0d0b2fe9,0x22aaa20d,
+ 0x1e5881ba,0x4fed2f99,0xc1571802,0x9af3b2d6,0xdc7ee17c,0x919e67a8 } },
+ /* 251 */
+ { { 0x76250533,0xc724fe4c,0x7d817ef8,0x8a2080e5,0x172c9751,0xa2afb0f4,
+ 0x17c0702e,0x9b10cdeb,0xc9b7e3e9,0xbf3975e3,0x1cd0cdc5,0x206117df },
+ { 0xbe05ebd5,0xfb049e61,0x16c782c0,0xeb0bb55c,0xab7fed09,0x13a331b8,
+ 0x632863f0,0xf6c58b1d,0x4d3b6195,0x6264ef6e,0x9a53f116,0x92c51b63 } },
+ /* 252 */
+ { { 0x288b364d,0xa57c7bc8,0x7b41e5c4,0x4a562e08,0x698a9a11,0x699d21c6,
+ 0xf3f849b9,0xa4ed9581,0x9eb726ba,0xa223eef3,0xcc2884f9,0x13159c23 },
+ { 0x3a3f4963,0x73931e58,0x0ada6a81,0x96500389,0x5ab2950b,0x3ee8a1c6,
+ 0x775fab52,0xeedf4949,0x4f2671b6,0x63d652e1,0x3c4e2f55,0xfed4491c } },
+ /* 253 */
+ { { 0xf4eb453e,0x335eadc3,0xcadd1a5b,0x5ff74b63,0x5d84a91a,0x6933d0d7,
+ 0xb49ba337,0x9ca3eeb9,0xc04c15b8,0x1f6facce,0xdc09a7e4,0x4ef19326 },
+ { 0x3dca3233,0x53d2d324,0xa2259d4b,0x0ee40590,0x5546f002,0x18c22edb,
+ 0x09ea6b71,0x92429801,0xb0e91e61,0xaada0add,0x99963c50,0x5fe53ef4 } },
+ /* 254 */
+ { { 0x90c28c65,0x372dd06b,0x119ce47d,0x1765242c,0x6b22fc82,0xc041fb80,
+ 0xb0a7ccc1,0x667edf07,0x1261bece,0xc79599e7,0x19cff22a,0xbc69d9ba },
+ { 0x13c06819,0x009d77cd,0xe282b79d,0x635a66ae,0x225b1be8,0x4edac4a6,
+ 0x524008f9,0x57d4f4e4,0xb056af84,0xee299ac5,0x3a0bc386,0xcc38444c } },
+ /* 255 */
+ { { 0xcd4c2356,0x490643b1,0x750547be,0x740a4851,0xd4944c04,0x643eaf29,
+ 0x299a98a0,0xba572479,0xee05fdf9,0x48b29f16,0x089b2d7b,0x33fb4f61 },
+ { 0xa950f955,0x86704902,0xfedc3ddf,0x97e1034d,0x05fbb6a2,0x211320b6,
+ 0x432299bb,0x23d7b93f,0x8590e4a3,0x1fe1a057,0xf58c0ce6,0x8e1d0586 } },
+};
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table,
+ k, map, heap);
+}
+
+#endif
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[12];
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ err = sp_384_point_new_12(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 12, km);
+
+ err = sp_384_ecc_mulmod_base_12(point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_12(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(point, 0, heap);
+
+ return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_384_iszero_12(const sp_digit* a)
+{
+ return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] |
+ a[8] | a[9] | a[10] | a[11]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+/* Add 1 to a. (a = a + 1)
+ *
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_384_add_one_12(sp_digit* a)
+{
+ __asm__ __volatile__ (
+ "mov r2, #1\n\t"
+ "ldr r1, [%[a], #0]\n\t"
+ "adds r1, r1, r2\n\t"
+ "mov r2, #0\n\t"
+ "str r1, [%[a], #0]\n\t"
+ "ldr r1, [%[a], #4]\n\t"
+ "adcs r1, r1, r2\n\t"
+ "str r1, [%[a], #4]\n\t"
+ "ldr r1, [%[a], #8]\n\t"
+ "adcs r1, r1, r2\n\t"
+ "str r1, [%[a], #8]\n\t"
+ "ldr r1, [%[a], #12]\n\t"
+ "adcs r1, r1, r2\n\t"
+ "str r1, [%[a], #12]\n\t"
+ "ldr r1, [%[a], #16]\n\t"
+ "adcs r1, r1, r2\n\t"
+ "str r1, [%[a], #16]\n\t"
+ "ldr r1, [%[a], #20]\n\t"
+ "adcs r1, r1, r2\n\t"
+ "str r1, [%[a], #20]\n\t"
+ "ldr r1, [%[a], #24]\n\t"
+ "adcs r1, r1, r2\n\t"
+ "str r1, [%[a], #24]\n\t"
+ "ldr r1, [%[a], #28]\n\t"
+ "adcs r1, r1, r2\n\t"
+ "str r1, [%[a], #28]\n\t"
+ "ldr r1, [%[a], #32]\n\t"
+ "adcs r1, r1, r2\n\t"
+ "str r1, [%[a], #32]\n\t"
+ "ldr r1, [%[a], #36]\n\t"
+ "adcs r1, r1, r2\n\t"
+ "str r1, [%[a], #36]\n\t"
+ "ldr r1, [%[a], #40]\n\t"
+ "adcs r1, r1, r2\n\t"
+ "str r1, [%[a], #40]\n\t"
+ "ldr r1, [%[a], #44]\n\t"
+ "adcs r1, r1, r2\n\t"
+ "str r1, [%[a], #44]\n\t"
+ :
+ : [a] "r" (a)
+ : "memory", "r1", "r2"
+ );
+}
+
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((sp_digit)a[i]) << s);
+ if (s >= 24U) {
+ r[j] &= 0xffffffff;
+ s = 32U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (sp_digit)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng Random number generator.
+ * k Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_384_ecc_gen_k_12(WC_RNG* rng, sp_digit* k)
+{
+ int err;
+ byte buf[48];
+
+ do {
+ err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+ if (err == 0) {
+ sp_384_from_bin(k, 12, buf, (int)sizeof(buf));
+ if (sp_384_cmp_12(k, p384_order2) < 0) {
+ sp_384_add_one_12(k);
+ break;
+ }
+ }
+ }
+ while (err == 0);
+
+ return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng Random number generator.
+ * priv Generated private value.
+ * pub Generated public point.
+ * heap Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[12];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_384 inf;
+#endif
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_384* infinity;
+#endif
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_12(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, inf, infinity);
+ }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_gen_k_12(rng, k);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_base_12(point, k, 1, NULL);
+ }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_12(infinity, point, p384_order, 1, NULL);
+ }
+ if (err == MP_OKAY) {
+ if ((sp_384_iszero_12(point->x) == 0) || (sp_384_iszero_12(point->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(k, priv);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_12(point, pub);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_384_point_free_12(infinity, 1, heap);
+#endif
+ sp_384_point_free_12(point, 1, heap);
+
+ return err;
+}
+
+#ifdef HAVE_ECC_DHE
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 48
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+static void sp_384_to_bin(sp_digit* r, byte* a)
+{
+ int i, j, s = 0, b;
+
+ j = 384 / 8 - 1;
+ a[j] = 0;
+ for (i=0; i<12 && j>=0; i++) {
+ b = 0;
+ /* lint allow cast of mismatch sp_digit and int */
+ a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+ b += 8 - s;
+ if (j < 0) {
+ break;
+ }
+ while (b < 32) {
+ a[j--] = (byte)(r[i] >> b);
+ b += 8;
+ if (j < 0) {
+ break;
+ }
+ }
+ s = 8 - (b - 32);
+ if (j >= 0) {
+ a[j] = 0;
+ }
+ if (s != 0) {
+ j++;
+ }
+ }
+}
+
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv Scalar to multiply the point by.
+ * pub Point to multiply.
+ * out Buffer to hold X ordinate.
+ * outLen On entry, size of the buffer in bytes.
+ * On exit, length of data in buffer in bytes.
+ * heap Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out,
+ word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[12];
+#endif
+ sp_point_384* point = NULL;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+
+ if (*outLen < 48U) {
+ err = BUFFER_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, p, point);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 12, priv);
+ sp_384_point_from_ecc_point_12(point, pub);
+ err = sp_384_ecc_mulmod_12(point, point, k, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ sp_384_to_bin(point->x, out);
+ *outLen = 48;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(point, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+ __asm__ __volatile__ (
+ "mov r8, %[a]\n\t"
+ "add r8, r8, #48\n\t"
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ "subs r5, r5, %[c]\n\t"
+ "ldr r3, [%[a]]\n\t"
+ "ldr r4, [%[a], #4]\n\t"
+ "ldr r5, [%[b]]\n\t"
+ "ldr r6, [%[b], #4]\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "str r3, [%[a]]\n\t"
+ "str r4, [%[a], #4]\n\t"
+ "sbc %[c], %[c], %[c]\n\t"
+ "add %[a], %[a], #8\n\t"
+ "add %[b], %[b], #8\n\t"
+ "cmp %[a], r8\n\t"
+ "bne 1b\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6", "r8"
+ );
+
+ return c;
+}
+
+#else
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a,
+ const sp_digit* b)
+{
+ sp_digit c = 0;
+
+ __asm__ __volatile__ (
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "subs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "ldm %[a], {r3, r4}\n\t"
+ "ldm %[b]!, {r5, r6}\n\t"
+ "sbcs r3, r3, r5\n\t"
+ "sbcs r4, r4, r6\n\t"
+ "stm %[a]!, {r3, r4}\n\t"
+ "sbc %[c], %[c], %[c]\n\t"
+ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
+ :
+ : "memory", "r3", "r4", "r5", "r6"
+ );
+
+ return c;
+}
+
+#endif /* WOLFSSL_SP_SMALL */
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+SP_NOINLINE static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+ __asm__ __volatile__ (
+ "add r9, %[a], #48\n\t"
+ /* A[0] * B */
+ "ldr r6, [%[a]], #4\n\t"
+ "umull r5, r3, r6, %[b]\n\t"
+ "mov r4, #0\n\t"
+ "str r5, [%[r]], #4\n\t"
+ /* A[0] * B - Done */
+ "\n1:\n\t"
+ "mov r5, #0\n\t"
+ /* A[] * B */
+ "ldr r6, [%[a]], #4\n\t"
+ "umull r6, r8, r6, %[b]\n\t"
+ "adds r3, r3, r6\n\t"
+ "adcs r4, r4, r8\n\t"
+ "adc r5, r5, #0\n\t"
+ /* A[] * B - Done */
+ "str r3, [%[r]], #4\n\t"
+ "mov r3, r4\n\t"
+ "mov r4, r5\n\t"
+ "cmp %[a], r9\n\t"
+ "blt 1b\n\t"
+ "str r3, [%[r]]\n\t"
+ : [r] "+r" (r), [a] "+r" (a)
+ : [b] "r" (b)
+ : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
+ );
+}
+
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ *
+ * Note that this is an approximate div. It may give an answer 1 larger.
+ */
+SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ sp_digit r = 0;
+
+ __asm__ __volatile__ (
+ "lsr r6, %[div], #16\n\t"
+ "add r6, r6, #1\n\t"
+ "udiv r4, %[d1], r6\n\t"
+ "lsl r8, r4, #16\n\t"
+ "umull r4, r5, %[div], r8\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "udiv r5, %[d1], r6\n\t"
+ "lsl r4, r5, #16\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "lsl r4, %[d1], #16\n\t"
+ "orr r4, r4, %[d0], lsr #16\n\t"
+ "udiv r4, r4, r6\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "lsl r4, %[d1], #16\n\t"
+ "orr r4, r4, %[d0], lsr #16\n\t"
+ "udiv r4, r4, r6\n\t"
+ "add r8, r8, r4\n\t"
+ "umull r4, r5, %[div], r4\n\t"
+ "subs %[d0], %[d0], r4\n\t"
+ "sbc %[d1], %[d1], r5\n\t"
+ "udiv r4, %[d0], %[div]\n\t"
+ "add r8, r8, r4\n\t"
+ "mov %[r], r8\n\t"
+ : [r] "+r" (r)
+ : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
+ : "r4", "r5", "r6", "r8"
+ );
+ return r;
+}
+
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_384_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<12; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ r[0] = a[0] & m;
+ r[1] = a[1] & m;
+ r[2] = a[2] & m;
+ r[3] = a[3] & m;
+ r[4] = a[4] & m;
+ r[5] = a[5] & m;
+ r[6] = a[6] & m;
+ r[7] = a[7] & m;
+ r[8] = a[8] & m;
+ r[9] = a[9] & m;
+ r[10] = a[10] & m;
+ r[11] = a[11] & m;
+#endif
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[24], t2[13];
+ sp_digit div, r1;
+ int i;
+
+ (void)m;
+
+ div = d[11];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 12);
+ for (i=11; i>=0; i--) {
+ r1 = div_384_word_12(t1[12 + i], t1[12 + i - 1], div);
+
+ sp_384_mul_d_12(t2, d, r1);
+ t1[12 + i] += sp_384_sub_in_place_12(&t1[i], t2);
+ t1[12 + i] -= t2[12];
+ sp_384_mask_12(t2, d, t1[12 + i]);
+ t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2);
+ sp_384_mask_12(t2, d, t1[12 + i]);
+ t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_384_cmp_12(t1, d) >= 0;
+ sp_384_cond_sub_12(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_384_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_384_div_12(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P384 curve. */
+static const uint32_t p384_order_minus_2[12] = {
+ 0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U,
+ 0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
+};
+#else
+/* The low half of the order-2 of the P384 curve. */
+static const uint32_t p384_order_low[6] = {
+ 0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U
+
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P384 curve. (r = a * b mod order)
+ *
+ * r Result of the multiplication.
+ * a First operand of the multiplication.
+ * b Second operand of the multiplication.
+ */
+static void sp_384_mont_mul_order_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_384_mul_12(r, a, b);
+ sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order);
+}
+
+/* Square number mod the order of P384 curve. (r = a * a mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_384_mont_sqr_order_12(sp_digit* r, const sp_digit* a)
+{
+ sp_384_sqr_12(r, a);
+ sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P384 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_384_mont_sqr_n_order_12(sp_digit* r, const sp_digit* a, int n)
+{
+ int i;
+
+ sp_384_mont_sqr_order_12(r, a);
+ for (i=1; i<n; i++) {
+ sp_384_mont_sqr_order_12(r, r);
+ }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P384 curve.
+ * (r = 1 / a mod order)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_384_mont_inv_order_12(sp_digit* r, const sp_digit* a,
+ sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 12);
+ for (i=382; i>=0; i--) {
+ sp_384_mont_sqr_order_12(t, t);
+ if ((p384_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_384_mont_mul_order_12(t, t, a);
+ }
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 12U);
+#else
+ sp_digit* t = td;
+ sp_digit* t2 = td + 2 * 12;
+ sp_digit* t3 = td + 4 * 12;
+ int i;
+
+ /* t = a^2 */
+ sp_384_mont_sqr_order_12(t, a);
+ /* t = a^3 = t * a */
+ sp_384_mont_mul_order_12(t, t, a);
+ /* t2= a^c = t ^ 2 ^ 2 */
+ sp_384_mont_sqr_n_order_12(t2, t, 2);
+ /* t = a^f = t2 * t */
+ sp_384_mont_mul_order_12(t, t2, t);
+ /* t2= a^f0 = t ^ 2 ^ 4 */
+ sp_384_mont_sqr_n_order_12(t2, t, 4);
+ /* t = a^ff = t2 * t */
+ sp_384_mont_mul_order_12(t, t2, t);
+ /* t2= a^ff00 = t ^ 2 ^ 8 */
+ sp_384_mont_sqr_n_order_12(t2, t, 8);
+ /* t3= a^ffff = t2 * t */
+ sp_384_mont_mul_order_12(t3, t2, t);
+ /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */
+ sp_384_mont_sqr_n_order_12(t2, t3, 16);
+ /* t = a^ffffffff = t2 * t3 */
+ sp_384_mont_mul_order_12(t, t2, t3);
+ /* t2= a^ffffffff0000 = t ^ 2 ^ 16 */
+ sp_384_mont_sqr_n_order_12(t2, t, 16);
+ /* t = a^ffffffffffff = t2 * t3 */
+ sp_384_mont_mul_order_12(t, t2, t3);
+ /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48 */
+ sp_384_mont_sqr_n_order_12(t2, t, 48);
+ /* t= a^fffffffffffffffffffffffff = t2 * t */
+ sp_384_mont_mul_order_12(t, t2, t);
+ /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */
+ sp_384_mont_sqr_n_order_12(t2, t, 96);
+ /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */
+ sp_384_mont_mul_order_12(t2, t2, t);
+ for (i=191; i>=1; i--) {
+ sp_384_mont_sqr_order_12(t2, t2);
+ if (((sp_digit)p384_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_384_mont_mul_order_12(t2, t2, a);
+ }
+ }
+ sp_384_mont_sqr_order_12(t2, t2);
+ sp_384_mont_mul_order_12(r, t2, a);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN 64
+#endif
+
+/* Sign the hash using the private key.
+ * e = [hash, 384 bits] from binary
+ * r = (k.G)->x mod order
+ * s = (r * x + e) / k mod order
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+ mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit ed[2*12];
+ sp_digit xd[2*12];
+ sp_digit kd[2*12];
+ sp_digit rd[2*12];
+ sp_digit td[3 * 2*12];
+ sp_point_384 p;
+#endif
+ sp_digit* e = NULL;
+ sp_digit* x = NULL;
+ sp_digit* k = NULL;
+ sp_digit* r = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_384* point = NULL;
+ sp_digit carry;
+ sp_digit* s = NULL;
+ sp_digit* kInv = NULL;
+ int err = MP_OKAY;
+ int32_t c;
+ int i;
+
+ (void)heap;
+
+ err = sp_384_point_new_12(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ e = d + 0 * 12;
+ x = d + 2 * 12;
+ k = d + 4 * 12;
+ r = d + 6 * 12;
+ tmp = d + 8 * 12;
+#else
+ e = ed;
+ x = xd;
+ k = kd;
+ r = rd;
+ tmp = td;
+#endif
+ s = e;
+ kInv = k;
+
+ if (hashLen > 48U) {
+ hashLen = 48U;
+ }
+
+ sp_384_from_bin(e, 12, hash, (int)hashLen);
+ }
+
+ for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+ sp_384_from_mp(x, 12, priv);
+
+ /* New random point. */
+ if (km == NULL || mp_iszero(km)) {
+ err = sp_384_ecc_gen_k_12(rng, k);
+ }
+ else {
+ sp_384_from_mp(k, 12, km);
+ mp_zero(km);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_base_12(point, k, 1, NULL);
+ }
+
+ if (err == MP_OKAY) {
+ /* r = point->x mod order */
+ XMEMCPY(r, point->x, sizeof(sp_digit) * 12U);
+ sp_384_norm_12(r);
+ c = sp_384_cmp_12(r, p384_order);
+ sp_384_cond_sub_12(r, r, p384_order, 0L - (sp_digit)(c >= 0));
+ sp_384_norm_12(r);
+
+ /* Conv k to Montgomery form (mod order) */
+ sp_384_mul_12(k, k, p384_norm_order);
+ err = sp_384_mod_12(k, k, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_12(k);
+ /* kInv = 1/k mod order */
+ sp_384_mont_inv_order_12(kInv, k, tmp);
+ sp_384_norm_12(kInv);
+
+ /* s = r * x + e */
+ sp_384_mul_12(x, x, r);
+ err = sp_384_mod_12(x, x, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_12(x);
+ carry = sp_384_add_12(s, e, x);
+ sp_384_cond_sub_12(s, s, p384_order, 0 - carry);
+ sp_384_norm_12(s);
+ c = sp_384_cmp_12(s, p384_order);
+ sp_384_cond_sub_12(s, s, p384_order, 0L - (sp_digit)(c >= 0));
+ sp_384_norm_12(s);
+
+ /* s = s * k^-1 mod order */
+ sp_384_mont_mul_order_12(s, s, kInv);
+ sp_384_norm_12(s);
+
+ /* Check that signature is usable. */
+ if (sp_384_iszero_12(s) == 0) {
+ break;
+ }
+ }
+ }
+
+ if (i == 0) {
+ err = RNG_FAILURE_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(r, rm);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(s, sm);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 8 * 12);
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 2U * 12U);
+ XMEMSET(x, 0, sizeof(sp_digit) * 2U * 12U);
+ XMEMSET(k, 0, sizeof(sp_digit) * 2U * 12U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U);
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 12U);
+#endif
+ sp_384_point_free_12(point, 1, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ * e = Truncate(hash, 384)
+ * u1 = e/s mod order
+ * u2 = r/s mod order
+ * r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX,
+ mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit u1d[2*12];
+ sp_digit u2d[2*12];
+ sp_digit sd[2*12];
+ sp_digit tmpd[2*12 * 5];
+ sp_point_384 p1d;
+ sp_point_384 p2d;
+#endif
+ sp_digit* u1 = NULL;
+ sp_digit* u2 = NULL;
+ sp_digit* s = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_384* p1;
+ sp_point_384* p2 = NULL;
+ sp_digit carry;
+ int32_t c;
+ int err;
+
+ err = sp_384_point_new_12(heap, p1d, p1);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, p2d, p2);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ u1 = d + 0 * 12;
+ u2 = d + 2 * 12;
+ s = d + 4 * 12;
+ tmp = d + 6 * 12;
+#else
+ u1 = u1d;
+ u2 = u2d;
+ s = sd;
+ tmp = tmpd;
+#endif
+
+ if (hashLen > 48U) {
+ hashLen = 48U;
+ }
+
+ sp_384_from_bin(u1, 12, hash, (int)hashLen);
+ sp_384_from_mp(u2, 12, r);
+ sp_384_from_mp(s, 12, sm);
+ sp_384_from_mp(p2->x, 12, pX);
+ sp_384_from_mp(p2->y, 12, pY);
+ sp_384_from_mp(p2->z, 12, pZ);
+
+ {
+ sp_384_mul_12(s, s, p384_norm_order);
+ }
+ err = sp_384_mod_12(s, s, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_12(s);
+ {
+ sp_384_mont_inv_order_12(s, s, tmp);
+ sp_384_mont_mul_order_12(u1, u1, s);
+ sp_384_mont_mul_order_12(u2, u2, s);
+ }
+
+ err = sp_384_ecc_mulmod_base_12(p1, u1, 0, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_mulmod_12(p2, p2, u2, 0, heap);
+ }
+
+ if (err == MP_OKAY) {
+ {
+ sp_384_proj_point_add_12(p1, p1, p2, tmp);
+ if (sp_384_iszero_12(p1->z)) {
+ if (sp_384_iszero_12(p1->x) && sp_384_iszero_12(p1->y)) {
+ sp_384_proj_point_dbl_12(p1, p2, tmp);
+ }
+ else {
+ /* Y ordinate is not used from here - don't set. */
+ p1->x[0] = 0;
+ p1->x[1] = 0;
+ p1->x[2] = 0;
+ p1->x[3] = 0;
+ p1->x[4] = 0;
+ p1->x[5] = 0;
+ p1->x[6] = 0;
+ p1->x[7] = 0;
+ p1->x[8] = 0;
+ p1->x[9] = 0;
+ p1->x[10] = 0;
+ p1->x[11] = 0;
+ XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod));
+ }
+ }
+ }
+
+ /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+ /* Reload r and convert to Montgomery form. */
+ sp_384_from_mp(u2, 12, r);
+ err = sp_384_mod_mul_norm_12(u2, u2, p384_mod);
+ }
+
+ if (err == MP_OKAY) {
+ /* u1 = r.z'.z' mod prime */
+ sp_384_mont_sqr_12(p1->z, p1->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(u1, u2, p1->z, p384_mod, p384_mp_mod);
+ *res = (int)(sp_384_cmp_12(p1->x, u1) == 0);
+ if (*res == 0) {
+ /* Reload r and add order. */
+ sp_384_from_mp(u2, 12, r);
+ carry = sp_384_add_12(u2, u2, p384_order);
+ /* Carry means result is greater than mod and is not valid. */
+ if (carry == 0) {
+ sp_384_norm_12(u2);
+
+ /* Compare with mod and if greater or equal then not valid. */
+ c = sp_384_cmp_12(u2, p384_mod);
+ if (c < 0) {
+ /* Convert to Montogomery form */
+ err = sp_384_mod_mul_norm_12(u2, u2, p384_mod);
+ if (err == MP_OKAY) {
+ /* u1 = (r + 1*order).z'.z' mod prime */
+ sp_384_mont_mul_12(u1, u2, p1->z, p384_mod,
+ p384_mp_mod);
+ *res = (int)(sp_384_cmp_12(p1->x, u1) == 0);
+ }
+ }
+ }
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL)
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_384_point_free_12(p1, 0, heap);
+ sp_384_point_free_12(p2, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point EC point.
+ * heap Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_384_ecc_is_point_12(sp_point_384* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit t1d[2*12];
+ sp_digit t2d[2*12];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12 * 4, heap, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 12;
+ t2 = d + 2 * 12;
+#else
+ (void)heap;
+
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ sp_384_sqr_12(t1, point->y);
+ (void)sp_384_mod_12(t1, t1, p384_mod);
+ sp_384_sqr_12(t2, point->x);
+ (void)sp_384_mod_12(t2, t2, p384_mod);
+ sp_384_mul_12(t2, t2, point->x);
+ (void)sp_384_mod_12(t2, t2, p384_mod);
+ (void)sp_384_sub_12(t2, p384_mod, t2);
+ sp_384_mont_add_12(t1, t1, t2, p384_mod);
+
+ sp_384_mont_add_12(t1, t1, point->x, p384_mod);
+ sp_384_mont_add_12(t1, t1, point->x, p384_mod);
+ sp_384_mont_add_12(t1, t1, point->x, p384_mod);
+
+ if (sp_384_cmp_12(t1, p384_b) != 0) {
+ err = MP_VAL;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_384(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 pubd;
+#endif
+ sp_point_384* pub;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_384_point_new_12(NULL, pubd, pub);
+ if (err == MP_OKAY) {
+ sp_384_from_mp(pub->x, 12, pX);
+ sp_384_from_mp(pub->y, 12, pY);
+ sp_384_from_bin(pub->z, 12, one, (int)sizeof(one));
+
+ err = sp_384_ecc_is_point_12(pub, NULL);
+ }
+
+ sp_384_point_free_12(pub, 0, NULL);
+
+ return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * privm Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit privd[12];
+ sp_point_384 pubd;
+ sp_point_384 pd;
+#endif
+ sp_digit* priv = NULL;
+ sp_point_384* pub;
+ sp_point_384* p = NULL;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_384_point_new_12(heap, pubd, pub);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
+ DYNAMIC_TYPE_ECC);
+ if (priv == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ priv = privd;
+#endif
+
+ sp_384_from_mp(pub->x, 12, pX);
+ sp_384_from_mp(pub->y, 12, pY);
+ sp_384_from_bin(pub->z, 12, one, (int)sizeof(one));
+ sp_384_from_mp(priv, 12, privm);
+
+ /* Check point at infinitiy. */
+ if ((sp_384_iszero_12(pub->x) != 0) &&
+ (sp_384_iszero_12(pub->y) != 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check range of X and Y */
+ if (sp_384_cmp_12(pub->x, p384_mod) >= 0 ||
+ sp_384_cmp_12(pub->y, p384_mod) >= 0) {
+ err = ECC_OUT_OF_RANGE_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check point is on curve */
+ err = sp_384_ecc_is_point_12(pub, heap);
+ }
+
+ if (err == MP_OKAY) {
+ /* Point * order = infinity */
+ err = sp_384_ecc_mulmod_12(p, pub, p384_order, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is infinity */
+ if ((sp_384_iszero_12(p->x) == 0) ||
+ (sp_384_iszero_12(p->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Base * private = point */
+ err = sp_384_ecc_mulmod_base_12(p, priv, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is public key */
+ if (sp_384_cmp_12(p->x, pub->x) != 0 ||
+ sp_384_cmp_12(p->y, pub->y) != 0) {
+ err = ECC_PRIV_KEY_E;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (priv != NULL) {
+ XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(p, 0, heap);
+ sp_384_point_free_12(pub, 0, heap);
+
+ return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX First EC point's X ordinate.
+ * pY First EC point's Y ordinate.
+ * pZ First EC point's Z ordinate.
+ * qX Second EC point's X ordinate.
+ * qY Second EC point's Y ordinate.
+ * qZ Second EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* qX, mp_int* qY, mp_int* qZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 12 * 5];
+ sp_point_384 pd;
+ sp_point_384 qd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ sp_point_384* q = NULL;
+ int err;
+
+ err = sp_384_point_new_12(NULL, pd, p);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_12(NULL, qd, q);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 5, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 12, pX);
+ sp_384_from_mp(p->y, 12, pY);
+ sp_384_from_mp(p->z, 12, pZ);
+ sp_384_from_mp(q->x, 12, qX);
+ sp_384_from_mp(q->y, 12, qY);
+ sp_384_from_mp(q->z, 12, qZ);
+
+ sp_384_proj_point_add_12(p, p, q, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(q, 0, NULL);
+ sp_384_point_free_12(p, 0, NULL);
+
+ return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 12 * 2];
+ sp_point_384 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ int err;
+
+ err = sp_384_point_new_12(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 2, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 12, pX);
+ sp_384_from_mp(p->y, 12, pY);
+ sp_384_from_mp(p->z, 12, pZ);
+
+ sp_384_proj_point_dbl_12(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(p, 0, NULL);
+
+ return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 12 * 6];
+ sp_point_384 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ int err;
+
+ err = sp_384_point_new_12(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 12, pX);
+ sp_384_from_mp(p->y, 12, pY);
+ sp_384_from_mp(p->z, 12, pZ);
+
+ sp_384_map_12(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, pX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, pY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, pZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_12(p, 0, NULL);
+
+ return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mont_sqrt_12(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit t1d[2 * 12];
+ sp_digit t2d[2 * 12];
+ sp_digit t3d[2 * 12];
+ sp_digit t4d[2 * 12];
+ sp_digit t5d[2 * 12];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ sp_digit* t3;
+ sp_digit* t4;
+ sp_digit* t5;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 12, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 12;
+ t2 = d + 2 * 12;
+ t3 = d + 4 * 12;
+ t4 = d + 6 * 12;
+ t5 = d + 8 * 12;
+#else
+ t1 = t1d;
+ t2 = t2d;
+ t3 = t3d;
+ t4 = t4d;
+ t5 = t5d;
+#endif
+
+ {
+ /* t2 = y ^ 0x2 */
+ sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3 */
+ sp_384_mont_mul_12(t1, t2, y, p384_mod, p384_mp_mod);
+ /* t5 = y ^ 0xc */
+ sp_384_mont_sqr_n_12(t5, t1, 2, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xf */
+ sp_384_mont_mul_12(t1, t1, t5, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x1e */
+ sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
+ /* t3 = y ^ 0x1f */
+ sp_384_mont_mul_12(t3, t2, y, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3e0 */
+ sp_384_mont_sqr_n_12(t2, t3, 5, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3ff */
+ sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x7fe0 */
+ sp_384_mont_sqr_n_12(t2, t1, 5, p384_mod, p384_mp_mod);
+ /* t3 = y ^ 0x7fff */
+ sp_384_mont_mul_12(t3, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fff800 */
+ sp_384_mont_sqr_n_12(t2, t3, 15, p384_mod, p384_mp_mod);
+ /* t4 = y ^ 0x3ffffff */
+ sp_384_mont_mul_12(t4, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xffffffc000000 */
+ sp_384_mont_sqr_n_12(t2, t4, 30, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xfffffffffffff */
+ sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xfffffffffffffff000000000000000 */
+ sp_384_mont_sqr_n_12(t2, t1, 60, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+ sp_384_mont_sqr_n_12(t2, t1, 120, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+ sp_384_mont_sqr_n_12(t2, t1, 15, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */
+ sp_384_mont_sqr_n_12(t2, t1, 31, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */
+ sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */
+ sp_384_mont_sqr_n_12(t2, t1, 4, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */
+ sp_384_mont_mul_12(t1, t5, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */
+ sp_384_mont_sqr_n_12(t2, t1, 62, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */
+ sp_384_mont_mul_12(t1, y, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */
+ sp_384_mont_sqr_n_12(y, t1, 30, p384_mod, p384_mp_mod);
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm X ordinate.
+ * odd Whether the Y ordinate is odd.
+ * ym Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit xd[2 * 12];
+ sp_digit yd[2 * 12];
+#endif
+ sp_digit* x = NULL;
+ sp_digit* y = NULL;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 12, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ x = d + 0 * 12;
+ y = d + 2 * 12;
+#else
+ x = xd;
+ y = yd;
+#endif
+
+ sp_384_from_mp(x, 12, xm);
+ err = sp_384_mod_mul_norm_12(x, x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ /* y = x^3 */
+ {
+ sp_384_mont_sqr_12(y, x, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_12(y, y, x, p384_mod, p384_mp_mod);
+ }
+ /* y = x^3 - 3x */
+ sp_384_mont_sub_12(y, y, x, p384_mod);
+ sp_384_mont_sub_12(y, y, x, p384_mod);
+ sp_384_mont_sub_12(y, y, x, p384_mod);
+ /* y = x^3 - 3x + b */
+ err = sp_384_mod_mul_norm_12(x, p384_b, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ sp_384_mont_add_12(y, y, x, p384_mod);
+ /* y = sqrt(x^3 - 3x + b) */
+ err = sp_384_mont_sqrt_12(y);
+ }
+ if (err == MP_OKAY) {
+ XMEMSET(y + 12, 0, 12U * sizeof(sp_digit));
+ sp_384_mont_reduce_12(y, p384_mod, p384_mp_mod);
+ if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+ sp_384_mont_sub_12(y, p384_mod, y, p384_mod);
+ }
+
+ err = sp_384_to_mp(y, ym);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+#endif
+#endif /* WOLFSSL_SP_384 */
+#endif /* WOLFSSL_HAVE_SP_ECC */
+#endif /* WOLFSSL_SP_ARM_CORTEX_M_ASM */
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_dsp32.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_dsp32.c
new file mode 100644
index 000000000..ef95c06fb
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_dsp32.c
@@ -0,0 +1,4908 @@
+/* sp_cdsp_signed.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* from wolfcrypt/src/sp_c32.c */
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/cpuid.h>
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+#if defined(WOLFSSL_HAVE_SP_ECC)
+#ifdef WOLFSSL_DSP
+
+#include <wolfssl/wolfcrypt/sp.h>
+#include "remote.h"
+#include "hexagon_protos.h"
+#include "hexagon_types.h"
+
+#if (defined(WOLFSSL_SP_CACHE_RESISTANT) || defined(WOLFSSL_SP_SMALL)) && (defined(WOLFSSL_HAVE_SP_ECC) || !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Mask for address to obfuscate which of the two address will be used. */
+static const size_t addr_mask[2] = { 0, (size_t)-1 };
+#endif
+
+#ifdef WOLFSSL_HAVE_SP_ECC
+#ifndef WOLFSSL_SP_NO_256
+
+/* Point structure to use. */
+typedef struct sp_point {
+ sp_digit x[2 * 10] __attribute__((aligned(128)));
+ sp_digit y[2 * 10] __attribute__((aligned(128)));
+ sp_digit z[2 * 10] __attribute__((aligned(128)));
+ int infinity;
+} sp_point;
+
+/* The modulus (prime) of the curve P256. */
+static const sp_digit p256_mod[10] __attribute__((aligned(128))) = {
+ 0x3ffffff,0x3ffffff,0x3ffffff,0x003ffff,0x0000000,0x0000000,0x0000000,
+ 0x0000400,0x3ff0000,0x03fffff
+};
+#ifndef WOLFSSL_SP_SMALL
+/* The Montogmery normalizer for modulus of the curve P256. */
+static const sp_digit p256_norm_mod[10] __attribute__((aligned(128))) = {
+ 0x0000001,0x0000000,0x0000000,0x3fc0000,0x3ffffff,0x3ffffff,0x3ffffff,
+ 0x3fffbff,0x000ffff,0x0000000
+};
+#endif /* WOLFSSL_SP_SMALL */
+/* The Montogmery multiplier for modulus of the curve P256. */
+static const sp_digit p256_mp_mod __attribute__((aligned(128))) = 0x000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* The order of the curve P256. */
+static const sp_digit p256_order[10] __attribute__((aligned(128))) = {
+ 0x0632551,0x272b0bf,0x1e84f3b,0x2b69c5e,0x3bce6fa,0x3ffffff,0x3ffffff,
+ 0x00003ff,0x3ff0000,0x03fffff
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P256. */
+static const sp_digit p256_norm_order[10] __attribute__((aligned(128))) = {
+ 0x39cdaaf,0x18d4f40,0x217b0c4,0x14963a1,0x0431905,0x0000000,0x0000000,
+ 0x3fffc00,0x000ffff,0x0000000
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P256. */
+static const sp_digit p256_mp_order __attribute__((aligned(128))) = 0x200bc4f;
+#endif
+/* The base point of curve P256. */
+static const sp_point p256_base __attribute__((aligned(128))) = {
+ /* X ordinate */
+ {
+ 0x098c296,0x04e5176,0x33a0f4a,0x204b7ac,0x277037d,0x0e9103c,0x3ce6e56,
+ 0x1091fe2,0x1f2e12c,0x01ac5f4, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Y ordinate */
+ {
+ 0x3bf51f5,0x1901a0d,0x1ececbb,0x15dacc5,0x22bce33,0x303e785,0x27eb4a7,
+ 0x1fe6e3b,0x2e2fe1a,0x013f8d0, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Z ordinate */
+ {
+ 0x0000001,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,
+ 0x0000000,0x0000000,0x0000000, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* infinity */
+ 0
+};
+
+static int sp_ecc_point_new_ex(void* heap, sp_point* sp, sp_point** p)
+{
+ int ret = MP_OKAY;
+ (void)heap;
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ (void)sp;
+ *p = (sp_point*)XMALLOC(sizeof(sp_point), heap, DYNAMIC_TYPE_ECC);
+#else
+ *p = sp;
+#endif
+ if (p == NULL) {
+ ret = MEMORY_E;
+ }
+ return ret;
+}
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+/* Allocate memory for point and return error. */
+#define sp_ecc_point_new(heap, sp, p) sp_ecc_point_new_ex((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_ecc_point_new(heap, sp, p) sp_ecc_point_new_ex((heap), &(sp), &(p))
+#endif
+
+
+static void sp_ecc_point_free(sp_point* p, int clear, void* heap)
+{
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+/* If valid pointer then clear point data if requested and free data. */
+ if (p != NULL) {
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+ XFREE(p, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+/* Clear point data if requested. */
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+#endif
+ (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r The resulting Montgomery form number.
+ * a The number to convert.
+ * m The modulus (prime).
+ * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_256_mod_mul_norm_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ int64_t* td;
+#else
+ int64_t td[8];
+ int64_t a32d[8];
+#endif
+ int64_t* t;
+ int64_t* a32;
+ int64_t o;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 8, NULL, DYNAMIC_TYPE_ECC);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ t = td;
+ a32 = td + 8;
+#else
+ t = td;
+ a32 = a32d;
+#endif
+
+ a32[0] = a[0];
+ a32[0] |= a[1] << 26U;
+ a32[0] &= 0xffffffffL;
+ a32[1] = (sp_digit)(a[1] >> 6);
+ a32[1] |= a[2] << 20U;
+ a32[1] &= 0xffffffffL;
+ a32[2] = (sp_digit)(a[2] >> 12);
+ a32[2] |= a[3] << 14U;
+ a32[2] &= 0xffffffffL;
+ a32[3] = (sp_digit)(a[3] >> 18);
+ a32[3] |= a[4] << 8U;
+ a32[3] &= 0xffffffffL;
+ a32[4] = (sp_digit)(a[4] >> 24);
+ a32[4] |= a[5] << 2U;
+ a32[4] |= a[6] << 28U;
+ a32[4] &= 0xffffffffL;
+ a32[5] = (sp_digit)(a[6] >> 4);
+ a32[5] |= a[7] << 22U;
+ a32[5] &= 0xffffffffL;
+ a32[6] = (sp_digit)(a[7] >> 10);
+ a32[6] |= a[8] << 16U;
+ a32[6] &= 0xffffffffL;
+ a32[7] = (sp_digit)(a[8] >> 16);
+ a32[7] |= a[9] << 10U;
+ a32[7] &= 0xffffffffL;
+
+ /* 1 1 0 -1 -1 -1 -1 0 */
+ t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6];
+ /* 0 1 1 0 -1 -1 -1 -1 */
+ t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7];
+ /* 0 0 1 1 0 -1 -1 -1 */
+ t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7];
+ /* -1 -1 0 2 2 1 0 -1 */
+ t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7];
+ /* 0 -1 -1 0 2 2 1 0 */
+ t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6];
+ /* 0 0 -1 -1 0 2 2 1 */
+ t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7];
+ /* -1 -1 0 0 0 1 3 2 */
+ t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7];
+ /* 1 0 -1 -1 -1 -1 0 3 */
+ t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7];
+
+ t[1] += t[0] >> 32U; t[0] &= 0xffffffffL;
+ t[2] += t[1] >> 32U; t[1] &= 0xffffffffL;
+ t[3] += t[2] >> 32U; t[2] &= 0xffffffffL;
+ t[4] += t[3] >> 32U; t[3] &= 0xffffffffL;
+ t[5] += t[4] >> 32U; t[4] &= 0xffffffffL;
+ t[6] += t[5] >> 32U; t[5] &= 0xffffffffL;
+ t[7] += t[6] >> 32U; t[6] &= 0xffffffffL;
+ o = t[7] >> 32U; t[7] &= 0xffffffffL;
+ t[0] += o;
+ t[3] -= o;
+ t[6] -= o;
+ t[7] += o;
+ t[1] += t[0] >> 32U; t[0] &= 0xffffffffL;
+ t[2] += t[1] >> 32U; t[1] &= 0xffffffffL;
+ t[3] += t[2] >> 32U; t[2] &= 0xffffffffL;
+ t[4] += t[3] >> 32U; t[3] &= 0xffffffffL;
+ t[5] += t[4] >> 32U; t[4] &= 0xffffffffL;
+ t[6] += t[5] >> 32U; t[5] &= 0xffffffffL;
+ t[7] += t[6] >> 32U; t[6] &= 0xffffffffL;
+
+ r[0] = (sp_digit)(t[0]) & 0x3ffffffL;
+ r[1] = (sp_digit)(t[0] >> 26U);
+ r[1] |= t[1] << 6U;
+ r[1] &= 0x3ffffffL;
+ r[2] = (sp_digit)(t[1] >> 20U);
+ r[2] |= t[2] << 12U;
+ r[2] &= 0x3ffffffL;
+ r[3] = (sp_digit)(t[2] >> 14U);
+ r[3] |= t[3] << 18U;
+ r[3] &= 0x3ffffffL;
+ r[4] = (sp_digit)(t[3] >> 8U);
+ r[4] |= t[4] << 24U;
+ r[4] &= 0x3ffffffL;
+ r[5] = (sp_digit)(t[4] >> 2U) & 0x3ffffffL;
+ r[6] = (sp_digit)(t[4] >> 28U);
+ r[6] |= t[5] << 4U;
+ r[6] &= 0x3ffffffL;
+ r[7] = (sp_digit)(t[5] >> 22U);
+ r[7] |= t[6] << 10U;
+ r[7] &= 0x3ffffffL;
+ r[8] = (sp_digit)(t[6] >> 16U);
+ r[8] |= t[7] << 16U;
+ r[8] &= 0x3ffffffL;
+ r[9] = (sp_digit)(t[7] >> 10U);
+ }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+static sp_digit sp_256_cmp_10(const sp_digit* a, const sp_digit* b)
+{
+ sp_digit r = 0;
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=9; i>=0; i--) {
+ r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ }
+#else
+ r |= (a[ 9] - b[ 9]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 8] - b[ 8]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 7] - b[ 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 6] - b[ 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 5] - b[ 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 4] - b[ 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 3] - b[ 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 2] - b[ 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 1] - b[ 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+ r |= (a[ 0] - b[ 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
+#endif /* WOLFSSL_SP_SMALL */
+
+ return r;
+}
+
+/* Normalize the values in each word to 26.
+ *
+ * a Array of sp_digit to normalize.
+ */
+static void sp_256_norm_10(sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ for (i = 0; i < 9; i++) {
+ a[i+1] += a[i] >> 26;
+ a[i] &= 0x3ffffff;
+ }
+#else
+ a[1] += a[0] >> 26; a[0] = Q6_R_and_RR(a[0], 0x3ffffff);
+ a[2] += a[1] >> 26; a[1] = Q6_R_and_RR(a[1], 0x3ffffff);
+ a[3] += a[2] >> 26; a[2] = Q6_R_and_RR(a[2], 0x3ffffff);
+ a[4] += a[3] >> 26; a[3] = Q6_R_and_RR(a[3], 0x3ffffff);
+ a[5] += a[4] >> 26; a[4] = Q6_R_and_RR(a[4], 0x3ffffff);
+ a[6] += a[5] >> 26; a[5] = Q6_R_and_RR(a[5], 0x3ffffff);
+ a[7] += a[6] >> 26; a[6] = Q6_R_and_RR(a[6], 0x3ffffff);
+ a[8] += a[7] >> 26; a[7] = Q6_R_and_RR(a[7], 0x3ffffff);
+ a[9] += a[8] >> 26; a[8] = Q6_R_and_RR(a[8], 0x3ffffff);
+#endif
+}
+
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+static void sp_256_cond_sub_10(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ r[i] = a[i] - (b[i] & m);
+ }
+#else
+ r[ 0] = Q6_R_sub_RR(a[ 0], Q6_R_and_RR(b[ 0], m));
+ r[ 1] = Q6_R_sub_RR(a[ 1], Q6_R_and_RR(b[ 1], m));
+ r[ 2] = Q6_R_sub_RR(a[ 2], Q6_R_and_RR(b[ 2], m));
+ r[ 3] = Q6_R_sub_RR(a[ 3], Q6_R_and_RR(b[ 3], m));
+ r[ 4] = Q6_R_sub_RR(a[ 4], Q6_R_and_RR(b[ 4], m));
+ r[ 5] = Q6_R_sub_RR(a[ 5], Q6_R_and_RR(b[ 5], m));
+ r[ 6] = Q6_R_sub_RR(a[ 6], Q6_R_and_RR(b[ 6], m));
+ r[ 7] = Q6_R_sub_RR(a[ 7], Q6_R_and_RR(b[ 7], m));
+ r[ 8] = Q6_R_sub_RR(a[ 8], Q6_R_and_RR(b[ 8], m));
+ r[ 9] = Q6_R_sub_RR(a[ 9], Q6_R_and_RR(b[ 9], m));
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#define sp_256_mont_reduce_order_10 sp_256_mont_reduce_10
+
+/* Mul a by scalar b and add into r. (r += a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a,
+ const sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ t += (tb * a[i]) + r[i];
+ r[i] = t & 0x3ffffff;
+ t >>= 26;
+ }
+ r[10] += t;
+#else
+ int64_t tb = b;
+ int64_t t[10];
+
+ t[ 0] = Q6_P_mpy_RR(tb, a[ 0]);
+ t[ 1] = Q6_P_mpy_RR(tb, a[ 1]);
+ t[ 2] = Q6_P_mpy_RR(tb, a[ 2]);
+ t[ 3] = Q6_P_mpy_RR(tb, a[ 3]);
+ t[ 4] = Q6_P_mpy_RR(tb, a[ 4]);
+ t[ 5] = Q6_P_mpy_RR(tb, a[ 5]);
+ t[ 6] = Q6_P_mpy_RR(tb, a[ 6]);
+ t[ 7] = Q6_P_mpy_RR(tb, a[ 7]);
+ t[ 8] = Q6_P_mpy_RR(tb, a[ 8]);
+ t[ 9] = Q6_P_mpy_RR(tb, a[ 9]);
+ r[ 0] += (t[ 0] & 0x3ffffff);
+ r[ 1] += (t[ 0] >> 26) + (t[ 1] & 0x3ffffff);
+ r[ 2] += (t[ 1] >> 26) + (t[ 2] & 0x3ffffff);
+ r[ 3] += (t[ 2] >> 26) + (t[ 3] & 0x3ffffff);
+ r[ 4] += (t[ 3] >> 26) + (t[ 4] & 0x3ffffff);
+ r[ 5] += (t[ 4] >> 26) + (t[ 5] & 0x3ffffff);
+ r[ 6] += (t[ 5] >> 26) + (t[ 6] & 0x3ffffff);
+ r[ 7] += (t[ 6] >> 26) + (t[ 7] & 0x3ffffff);
+ r[ 8] += (t[ 7] >> 26) + (t[ 8] & 0x3ffffff);
+ r[ 9] += (t[ 8] >> 26) + (t[ 9] & 0x3ffffff);
+ r[10] += t[ 9] >> 26;
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Shift the result in the high 256 bits down to the bottom.
+ *
+ * r A single precision number.
+ * a A single precision number.
+ */
+static void sp_256_mont_shift_10(sp_digit* r, const sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+ sp_digit n, s;
+
+ s = a[10];
+ n = a[9] >> 22;
+ for (i = 0; i < 9; i++) {
+ n += (s & 0x3ffffff) << 4;
+ r[i] = n & 0x3ffffff;
+ n >>= 26;
+ s = a[11 + i] + (s >> 26);
+ }
+ n += s << 4;
+ r[9] = n;
+#else
+ sp_digit n, s;
+
+ s = a[10]; n = a[9] >> 22;
+ n += (s & 0x3ffffff) << 4; r[ 0] = Q6_R_and_RR(n, 0x3ffffff);
+ n >>= 26; s = a[11] + (s >> 26);
+ n += (s & 0x3ffffff) << 4; r[ 1] = Q6_R_and_RR(n, 0x3ffffff);
+ n >>= 26; s = a[12] + (s >> 26);
+ n += (s & 0x3ffffff) << 4; r[ 2] = Q6_R_and_RR(n, 0x3ffffff);
+ n >>= 26; s = a[13] + (s >> 26);
+ n += (s & 0x3ffffff) << 4; r[ 3] = Q6_R_and_RR(n, 0x3ffffff);
+ n >>= 26; s = a[14] + (s >> 26);
+ n += (s & 0x3ffffff) << 4; r[ 4] = Q6_R_and_RR(n, 0x3ffffff);
+ n >>= 26; s = a[15] + (s >> 26);
+ n += (s & 0x3ffffff) << 4; r[ 5] = Q6_R_and_RR(n, 0x3ffffff);
+ n >>= 26; s = a[16] + (s >> 26);
+ n += (s & 0x3ffffff) << 4; r[ 6] = Q6_R_and_RR(n, 0x3ffffff);
+ n >>= 26; s = a[17] + (s >> 26);
+ n += (s & 0x3ffffff) << 4; r[ 7] = Q6_R_and_RR(n, 0x3ffffff);
+ n >>= 26; s = a[18] + (s >> 26);
+ n += (s & 0x3ffffff) << 4; r[ 8] = Q6_R_and_RR(n, 0x3ffffff);
+ n >>= 26; s = a[19] + (s >> 26);
+ n += s << 4; r[ 9] = n;
+#endif /* WOLFSSL_SP_SMALL */
+ XMEMSET(&r[10], 0, sizeof(*r) * 10U);
+}
+
+
+/* Reduce the number back to 256 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+static void sp_256_mont_reduce_10(sp_digit* a, const sp_digit* m, sp_digit mp)
+{
+ sp_digit mu;
+
+
+ /* unrolled for loops due to unexpected behavior with -O optimizations */
+ if (mp != 1) {
+ mu = Q6_P_mpy_RR(a[0], mp) & 0x3ffffff;
+ sp_256_mul_add_10(a+0, m, mu);
+ a[0+1] += a[0] >> 26;
+
+ mu = Q6_P_mpy_RR(a[1], mp) & 0x3ffffff;
+ sp_256_mul_add_10(a+1, m, mu);
+ a[1+1] += a[1] >> 26;
+
+ mu = Q6_P_mpy_RR(a[2], mp) & 0x3ffffff;
+ sp_256_mul_add_10(a+2, m, mu);
+ a[2+1] += a[2] >> 26;
+
+ mu = Q6_P_mpy_RR(a[3], mp) & 0x3ffffff;
+ sp_256_mul_add_10(a+3, m, mu);
+ a[3+1] += a[3] >> 26;
+
+ mu = Q6_P_mpy_RR(a[4], mp) & 0x3ffffff;
+ sp_256_mul_add_10(a+4, m, mu);
+ a[4+1] += a[4] >> 26;
+
+ mu = Q6_P_mpy_RR(a[5], mp) & 0x3ffffff;
+ sp_256_mul_add_10(a+5, m, mu);
+ a[5+1] += a[5] >> 26;
+
+ mu = Q6_P_mpy_RR(a[6], mp) & 0x3ffffff;
+ sp_256_mul_add_10(a+6, m, mu);
+ a[6+1] += a[6] >> 26;
+
+ mu = Q6_P_mpy_RR(a[7], mp) & 0x3ffffff;
+ sp_256_mul_add_10(a+7, m, mu);
+ a[7+1] += a[7] >> 26;
+
+ mu = Q6_P_mpy_RR(a[8], mp) & 0x3ffffff;
+ sp_256_mul_add_10(a+8, m, mu);
+ a[8+1] += a[8] >> 26;
+
+ mu = Q6_P_mpy_RR(a[9], mp) & 0x3fffffL;
+ sp_256_mul_add_10(a+9, m, mu);
+ a[9+1] += a[9] >> 26;
+ a[9] &= 0x3ffffff;
+ }
+ else {
+ mu = Q6_P_mpy_RR(a[0], mp) & 0x3ffffff;
+ sp_256_mul_add_10(a+0, p256_mod, mu);
+ a[0+1] += a[0] >> 26;
+
+ mu = Q6_P_mpy_RR(a[1], mp) & 0x3ffffff;
+ sp_256_mul_add_10(a+1, p256_mod, mu);
+ a[1+1] += a[1] >> 26;
+
+ mu = Q6_P_mpy_RR(a[2], mp) & 0x3ffffff;
+ sp_256_mul_add_10(a+2, p256_mod, mu);
+ a[2+1] += a[2] >> 26;
+
+ mu = Q6_P_mpy_RR(a[3], mp) & 0x3ffffff;
+ sp_256_mul_add_10(a+3, p256_mod, mu);
+ a[3+1] += a[3] >> 26;
+
+ mu = Q6_P_mpy_RR(a[4], mp) & 0x3ffffff;
+ sp_256_mul_add_10(a+4, p256_mod, mu);
+ a[4+1] += a[4] >> 26;
+
+ mu = Q6_P_mpy_RR(a[5], mp) & 0x3ffffff;
+ sp_256_mul_add_10(a+5, p256_mod, mu);
+ a[5+1] += a[5] >> 26;
+
+ mu = Q6_P_mpy_RR(a[6], mp) & 0x3ffffff;
+ sp_256_mul_add_10(a+6, p256_mod, mu);
+ a[6+1] += a[6] >> 26;
+
+ mu = Q6_P_mpy_RR(a[7], mp) & 0x3ffffff;
+ sp_256_mul_add_10(a+7, p256_mod, mu);
+ a[7+1] += a[7] >> 26;
+
+ mu = Q6_P_mpy_RR(a[8], mp) & 0x3ffffff;
+ sp_256_mul_add_10(a+8, p256_mod, mu);
+ a[8+1] += a[8] >> 26;
+
+ mu = Q6_P_mpy_RR(a[9], mp) & 0x3fffffL;
+ sp_256_mul_add_10(a+9, p256_mod, mu);
+ a[9+1] += a[9] >> 26;
+ a[9] &= 0x3ffffff;
+ }
+
+
+ sp_256_mont_shift_10(a, a);
+ sp_256_cond_sub_10(a, a, m, 0 - (((a[9] >> 22) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_10(a);
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static void sp_256_mul_10(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+#if 1
+ int64_t t0 = Q6_P_mpy_RR(a[0], b[0]);
+ int64_t t1 = Q6_P_mpy_RR(a[0], b[1])
+ + Q6_P_mpy_RR(a[1], b[0]);
+ int64_t t2 = Q6_P_mpy_RR(a[0], b[2])
+ + Q6_P_mpy_RR(a[1], b[1])
+ + Q6_P_mpy_RR(a[2], b[0]);
+ int64_t t3 = Q6_P_mpy_RR(a[0], b[3])
+ + Q6_P_mpy_RR(a[1], b[2])
+ + Q6_P_mpy_RR(a[2], b[1])
+ + Q6_P_mpy_RR(a[3], b[0]);
+ int64_t t4 = Q6_P_mpy_RR(a[0], b[4])
+ + Q6_P_mpy_RR(a[1], b[3])
+ + Q6_P_mpy_RR(a[2], b[2])
+ + Q6_P_mpy_RR(a[3], b[1])
+ + Q6_P_mpy_RR(a[4], b[0]);
+ int64_t t5 = Q6_P_mpy_RR(a[0], b[5])
+ + Q6_P_mpy_RR(a[1], b[4])
+ + Q6_P_mpy_RR(a[2], b[3])
+ + Q6_P_mpy_RR(a[3], b[2])
+ + Q6_P_mpy_RR(a[4], b[1])
+ + Q6_P_mpy_RR(a[5], b[0]);
+ int64_t t6 = Q6_P_mpy_RR(a[0], b[6])
+ + Q6_P_mpy_RR(a[1], b[5])
+ + Q6_P_mpy_RR(a[2], b[4])
+ + Q6_P_mpy_RR(a[3], b[3])
+ + Q6_P_mpy_RR(a[4], b[2])
+ + Q6_P_mpy_RR(a[5], b[1])
+ + Q6_P_mpy_RR(a[6], b[0]);
+ int64_t t7 = Q6_P_mpy_RR(a[0], b[7])
+ + Q6_P_mpy_RR(a[1], b[6])
+ + Q6_P_mpy_RR(a[2], b[5])
+ + Q6_P_mpy_RR(a[3], b[4])
+ + Q6_P_mpy_RR(a[4], b[3])
+ + Q6_P_mpy_RR(a[5], b[2])
+ + Q6_P_mpy_RR(a[6], b[1])
+ + Q6_P_mpy_RR(a[7], b[0]);
+ int64_t t8 = Q6_P_mpy_RR(a[0], b[8])
+ + Q6_P_mpy_RR(a[1], b[7])
+ + Q6_P_mpy_RR(a[2], b[6])
+ + Q6_P_mpy_RR(a[3], b[5])
+ + Q6_P_mpy_RR(a[4], b[4])
+ + Q6_P_mpy_RR(a[5], b[3])
+ + Q6_P_mpy_RR(a[6], b[2])
+ + Q6_P_mpy_RR(a[7], b[1])
+ + Q6_P_mpy_RR(a[8], b[0]);
+ int64_t t9 = Q6_P_mpy_RR(a[0], b[9])
+ + Q6_P_mpy_RR(a[1], b[8])
+ + Q6_P_mpy_RR(a[2], b[7])
+ + Q6_P_mpy_RR(a[3], b[6])
+ + Q6_P_mpy_RR(a[4], b[5])
+ + Q6_P_mpy_RR(a[5], b[4])
+ + Q6_P_mpy_RR(a[6], b[3])
+ + Q6_P_mpy_RR(a[7], b[2])
+ + Q6_P_mpy_RR(a[8], b[1])
+ + Q6_P_mpy_RR(a[9], b[0]);
+ int64_t t10 = Q6_P_mpy_RR(a[1], b[9])
+ + Q6_P_mpy_RR(a[2], b[8])
+ + Q6_P_mpy_RR(a[3], b[7])
+ + Q6_P_mpy_RR(a[4], b[6])
+ + Q6_P_mpy_RR(a[5], b[5])
+ + Q6_P_mpy_RR(a[6], b[4])
+ + Q6_P_mpy_RR(a[7], b[3])
+ + Q6_P_mpy_RR(a[8], b[2])
+ + Q6_P_mpy_RR(a[9], b[1]);
+ int64_t t11 = Q6_P_mpy_RR(a[2], b[9])
+ + Q6_P_mpy_RR(a[3], b[8])
+ + Q6_P_mpy_RR(a[4], b[7])
+ + Q6_P_mpy_RR(a[5], b[6])
+ + Q6_P_mpy_RR(a[6], b[5])
+ + Q6_P_mpy_RR(a[7], b[4])
+ + Q6_P_mpy_RR(a[8], b[3])
+ + Q6_P_mpy_RR(a[9], b[2]);
+ int64_t t12 = Q6_P_mpy_RR(a[3], b[9])
+ + Q6_P_mpy_RR(a[4], b[8])
+ + Q6_P_mpy_RR(a[5], b[7])
+ + Q6_P_mpy_RR(a[6], b[6])
+ + Q6_P_mpy_RR(a[7], b[5])
+ + Q6_P_mpy_RR(a[8], b[4])
+ + Q6_P_mpy_RR(a[9], b[3]);
+ int64_t t13 = Q6_P_mpy_RR(a[4], b[9])
+ + Q6_P_mpy_RR(a[5], b[8])
+ + Q6_P_mpy_RR(a[6], b[7])
+ + Q6_P_mpy_RR(a[7], b[6])
+ + Q6_P_mpy_RR(a[8], b[5])
+ + Q6_P_mpy_RR(a[9], b[4]);
+ int64_t t14 = Q6_P_mpy_RR(a[5], b[9])
+ + Q6_P_mpy_RR(a[6], b[8])
+ + Q6_P_mpy_RR(a[7], b[7])
+ + Q6_P_mpy_RR(a[8], b[6])
+ + Q6_P_mpy_RR(a[9], b[5]);
+ int64_t t15 = Q6_P_mpy_RR(a[6], b[9])
+ + Q6_P_mpy_RR(a[7], b[8])
+ + Q6_P_mpy_RR(a[8], b[7])
+ + Q6_P_mpy_RR(a[9], b[6]);
+ int64_t t16 = Q6_P_mpy_RR(a[7], b[9])
+ + Q6_P_mpy_RR(a[8], b[8])
+ + Q6_P_mpy_RR(a[9], b[7]);
+ int64_t t17 = Q6_P_mpy_RR(a[8], b[9])
+ + Q6_P_mpy_RR(a[9], b[8]);
+ int64_t t18 = Q6_P_mpy_RR(a[9], b[9]);
+
+
+ t1 += t0 >> 26; r[ 0] = t0 & 0x3ffffff;
+ t2 += t1 >> 26; r[ 1] = t1 & 0x3ffffff;
+ t3 += t2 >> 26; r[ 2] = t2 & 0x3ffffff;
+ t4 += t3 >> 26; r[ 3] = t3 & 0x3ffffff;
+ t5 += t4 >> 26; r[ 4] = t4 & 0x3ffffff;
+ t6 += t5 >> 26; r[ 5] = t5 & 0x3ffffff;
+ t7 += t6 >> 26; r[ 6] = t6 & 0x3ffffff;
+ t8 += t7 >> 26; r[ 7] = t7 & 0x3ffffff;
+ t9 += t8 >> 26; r[ 8] = t8 & 0x3ffffff;
+ t10 += t9 >> 26; r[ 9] = t9 & 0x3ffffff;
+ t11 += t10 >> 26; r[10] = t10 & 0x3ffffff;
+ t12 += t11 >> 26; r[11] = t11 & 0x3ffffff;
+ t13 += t12 >> 26; r[12] = t12 & 0x3ffffff;
+ t14 += t13 >> 26; r[13] = t13 & 0x3ffffff;
+ t15 += t14 >> 26; r[14] = t14 & 0x3ffffff;
+ t16 += t15 >> 26; r[15] = t15 & 0x3ffffff;
+ t17 += t16 >> 26; r[16] = t16 & 0x3ffffff;
+ t18 += t17 >> 26; r[17] = t17 & 0x3ffffff;
+ r[19] = (sp_digit)(t18 >> 26);
+ r[18] = t18 & 0x3ffffff;
+#endif
+#if 0
+ /* Testing speeds with using HVX_Vectors */
+ {
+ int64_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15, t16, t17, t18;
+ HVX_Vector av, splat;
+ HVX_Vector vlow, vhi;
+
+ av = Q6_V_vzero();
+ vlow = Q6_V_vzero();
+ vhi = Q6_V_vzero();
+
+ XMEMCPY((byte*)&av, (byte*)a, 40);
+
+ splat = Q6_V_vsplat_R(b[0]);
+ vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
+ vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
+
+ vhi = Q6_Vw_vmpye_VwVuh(av, splat);
+ vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
+ unsigned int* loi = (unsigned int*)&vlow;
+ int* hii = (int*)&vhi;
+
+ /* a[0] * b[0] */
+ t0 = loi[0] | ((int64_t)hii[0] << 31);
+
+ /* a[1] * b[0] */
+ t1 = loi[1] | ((int64_t)hii[1] << 31);
+
+ /* a[2] * b[0] */
+ t2 = loi[2] | ((int64_t)hii[2] << 31);
+
+ /* a[3] * b[0] */
+ t3 = loi[3] | ((int64_t)hii[3] << 31);
+
+ /* a[4] * b[0] */
+ t4 = loi[4] | ((int64_t)hii[4] << 31);
+
+ /* a[5] * b[0] */
+ t5 = loi[5] | ((int64_t)hii[5] << 31);
+
+ /* a[6] * b[0] */
+ t6 = loi[6] | ((int64_t)hii[6] << 31);
+
+ /* a[7] * b[0] */
+ t7 = loi[7] | ((int64_t)hii[7] << 31);
+
+ /* a[8] * b[0] */
+ t8 = loi[8] | ((int64_t)hii[8] << 31);
+
+ /* a[9] * b[0] */
+ t9 = loi[9] | ((int64_t)hii[9] << 31);
+
+ /* a[*] * b[1] */
+ splat = Q6_V_vsplat_R(b[1]);
+ vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
+ vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
+ vhi = Q6_Vw_vmpye_VwVuh(av, splat);
+ vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
+ loi = (unsigned int*)&vlow;
+ hii = (int*)&vhi;
+
+ /* a[0] * b[1] */
+ t1 += (loi[0] | ((int64_t)hii[0] << 31));
+
+ /* a[1] * b[1] */
+ t2 += (loi[1] | ((int64_t)hii[1] << 31));
+
+ /* a[2] * b[1] */
+ t3 += (loi[2] | ((int64_t)hii[2] << 31));
+
+ /* a[3] * b[1] */
+ t4 += (loi[3] | ((int64_t)hii[3] << 31));
+
+ /* a[4] * b[1] */
+ t5 += (loi[4] | ((int64_t)hii[4] << 31));
+
+ /* a[5] * b[1] */
+ t6 += (loi[5] | ((int64_t)hii[5] << 31));
+
+ /* a[6] * b[1] */
+ t7 += (loi[6] | ((int64_t)hii[6] << 31));
+
+ /* a[7] * b[1] */
+ t8 += (loi[7] | ((int64_t)hii[7] << 31));
+
+ /* a[8] * b[1] */
+ t9 += (loi[8] | ((int64_t)hii[8] << 31));
+
+ /* a[9] * b[1] */
+ t10 = (loi[9] | ((int64_t)hii[9] << 31));
+
+ /* a[*] * b[2] */
+ splat = Q6_V_vsplat_R(b[2]);
+ vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
+ vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
+ vhi = Q6_Vw_vmpye_VwVuh(av, splat);
+ vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
+ loi = (unsigned int*)&vlow;
+ hii = (int*)&vhi;
+
+
+ /* a[0] * b[2] */
+ t2 += (loi[0] | ((int64_t)hii[0] << 31));
+
+ /* a[1] * b[2] */
+ t3 += (loi[1] | ((int64_t)hii[1] << 31));
+
+ /* a[2] * b[2] */
+ t4 += (loi[2] | ((int64_t)hii[2] << 31));
+
+ /* a[3] * b[2] */
+ t5 += (loi[3] | ((int64_t)hii[3] << 31));
+
+ /* a[4] * b[2] */
+ t6 += (loi[4] | ((int64_t)hii[4] << 31));
+
+ /* a[5] * b[2] */
+ t7 += (loi[5] | ((int64_t)hii[5] << 31));
+
+ /* a[6] * b[2] */
+ t8 += (loi[6] | ((int64_t)hii[6] << 31));
+
+ /* a[7] * b[2] */
+ t9 += (loi[7] | ((int64_t)hii[7] << 31));
+
+ /* a[8] * b[2] */
+ t10 += (loi[8] | ((int64_t)hii[8] << 31));
+
+ /* a[9] * b[2] */
+ t11 = (loi[9] | ((int64_t)hii[9] << 31));
+
+
+ /* a[*] * b[3] */
+ splat = Q6_V_vsplat_R(b[3]);
+ vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
+ vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
+ vhi = Q6_Vw_vmpye_VwVuh(av, splat);
+ vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
+ loi = (unsigned int*)&vlow;
+ hii = (int*)&vhi;
+
+
+ /* a[0] * b[3] */
+ t3 += (loi[0] | ((int64_t)hii[0] << 31));
+
+ /* a[1] * b[3] */
+ t4 += (loi[1] | ((int64_t)hii[1] << 31));
+
+ /* a[2] * b[3] */
+ t5 += (loi[2] | ((int64_t)hii[2] << 31));
+
+ /* a[3] * b[3] */
+ t6 += (loi[3] | ((int64_t)hii[3] << 31));
+
+ /* a[4] * b[3] */
+ t7 += (loi[4] | ((int64_t)hii[4] << 31));
+
+ /* a[5] * b[3] */
+ t8 += (loi[5] | ((int64_t)hii[5] << 31));
+
+ /* a[6] * b[3] */
+ t9 += (loi[6] | ((int64_t)hii[6] << 31));
+
+ /* a[7] * b[3] */
+ t10 += (loi[7] | ((int64_t)hii[7] << 31));
+
+ /* a[8] * b[3] */
+ t11 += (loi[8] | ((int64_t)hii[8] << 31));
+
+ /* a[9] * b[3] */
+ t12 = (loi[9] | ((int64_t)hii[9] << 31));
+
+
+ /* a[*] * b[4] */
+ splat = Q6_V_vsplat_R(b[4]);
+ vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
+ vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
+ vhi = Q6_Vw_vmpye_VwVuh(av, splat);
+ vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
+ loi = (unsigned int*)&vlow;
+ hii = (int*)&vhi;
+
+
+ /* a[0] * b[4] */
+ t4 += (loi[0] | ((int64_t)hii[0] << 31));
+
+ /* a[1] * b[4] */
+ t5 += (loi[1] | ((int64_t)hii[1] << 31));
+
+ /* a[2] * b[4] */
+ t6 += (loi[2] | ((int64_t)hii[2] << 31));
+
+ /* a[3] * b[4] */
+ t7 += (loi[3] | ((int64_t)hii[3] << 31));
+
+ /* a[4] * b[4] */
+ t8 += (loi[4] | ((int64_t)hii[4] << 31));
+
+ /* a[5] * b[4] */
+ t9 += (loi[5] | ((int64_t)hii[5] << 31));
+
+ /* a[6] * b[4] */
+ t10 += (loi[6] | ((int64_t)hii[6] << 31));
+
+ /* a[7] * b[4] */
+ t11 += (loi[7] | ((int64_t)hii[7] << 31));
+
+ /* a[8] * b[4] */
+ t12 += (loi[8] | ((int64_t)hii[8] << 31));
+
+ /* a[9] * b[4] */
+ t13 = (loi[9] | ((int64_t)hii[9] << 31));
+
+
+ /* a[*] * b[5] */
+ splat = Q6_V_vsplat_R(b[5]);
+ vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
+ vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
+ vhi = Q6_Vw_vmpye_VwVuh(av, splat);
+ vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
+ loi = (unsigned int*)&vlow;
+ hii = (int*)&vhi;
+
+
+ /* a[0] * b[5] */
+ t5 += (loi[0] | ((int64_t)hii[0] << 31));
+
+ /* a[1] * b[5] */
+ t6 += (loi[1] | ((int64_t)hii[1] << 31));
+
+ /* a[2] * b[5] */
+ t7 += (loi[2] | ((int64_t)hii[2] << 31));
+
+ /* a[3] * b[5] */
+ t8 += (loi[3] | ((int64_t)hii[3] << 31));
+
+ /* a[4] * b[5] */
+ t9 += (loi[4] | ((int64_t)hii[4] << 31));
+
+ /* a[5] * b[5] */
+ t10 += (loi[5] | ((int64_t)hii[5] << 31));
+
+ /* a[6] * b[5] */
+ t11 += (loi[6] | ((int64_t)hii[6] << 31));
+
+ /* a[7] * b[5] */
+ t12 += (loi[7] | ((int64_t)hii[7] << 31));
+
+ /* a[8] * b[5] */
+ t13 += (loi[8] | ((int64_t)hii[8] << 31));
+
+ /* a[9] * b[5] */
+ t14 = (loi[9] | ((int64_t)hii[9] << 31));
+
+
+ /* a[*] * b[6] */
+ splat = Q6_V_vsplat_R(b[6]);
+ vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
+ vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
+ vhi = Q6_Vw_vmpye_VwVuh(av, splat);
+ vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
+ loi = (unsigned int*)&vlow;
+ hii = (int*)&vhi;
+
+
+ /* a[0] * b[6] */
+ t6 += (loi[0] | ((int64_t)hii[0] << 31));
+
+ /* a[1] * b[6] */
+ t7 += (loi[1] | ((int64_t)hii[1] << 31));
+
+ /* a[2] * b[6] */
+ t8 += (loi[2] | ((int64_t)hii[2] << 31));
+
+ /* a[3] * b[6] */
+ t9 += (loi[3] | ((int64_t)hii[3] << 31));
+
+ /* a[4] * b[6] */
+ t10 += (loi[4] | ((int64_t)hii[4] << 31));
+
+ /* a[5] * b[6] */
+ t11 += (loi[5] | ((int64_t)hii[5] << 31));
+
+ /* a[6] * b[6] */
+ t12 += (loi[6] | ((int64_t)hii[6] << 31));
+
+ /* a[7] * b[6] */
+ t13 += (loi[7] | ((int64_t)hii[7] << 31));
+
+ /* a[8] * b[6] */
+ t14 += (loi[8] | ((int64_t)hii[8] << 31));
+
+ /* a[9] * b[6] */
+ t15 = (loi[9] | ((int64_t)hii[9] << 31));
+
+
+
+ /* a[*] * b[7] */
+ splat = Q6_V_vsplat_R(b[7]);
+ vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
+ vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
+ vhi = Q6_Vw_vmpye_VwVuh(av, splat);
+ vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
+ loi = (unsigned int*)&vlow;
+ hii = (int*)&vhi;
+
+
+ /* a[0] * b[7] */
+ t7 += (loi[0] | ((int64_t)hii[0] << 31));
+
+ /* a[1] * b[7] */
+ t8 += (loi[1] | ((int64_t)hii[1] << 31));
+
+ /* a[2] * b[7] */
+ t9 += (loi[2] | ((int64_t)hii[2] << 31));
+
+ /* a[3] * b[7] */
+ t10 += (loi[3] | ((int64_t)hii[3] << 31));
+
+ /* a[4] * b[7] */
+ t11 += (loi[4] | ((int64_t)hii[4] << 31));
+
+ /* a[5] * b[7] */
+ t12 += (loi[5] | ((int64_t)hii[5] << 31));
+
+ /* a[6] * b[7] */
+ t13 += (loi[6] | ((int64_t)hii[6] << 31));
+
+ /* a[7] * b[7] */
+ t14 += (loi[7] | ((int64_t)hii[7] << 31));
+
+ /* a[8] * b[7] */
+ t15 += (loi[8] | ((int64_t)hii[8] << 31));
+
+ /* a[9] * b[7] */
+ t16 = (loi[9] | ((int64_t)hii[9] << 31));
+
+
+ /* a[*] * b[8] */
+ splat = Q6_V_vsplat_R(b[8]);
+ vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
+ vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
+ vhi = Q6_Vw_vmpye_VwVuh(av, splat);
+ vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
+ loi = (unsigned int*)&vlow;
+ hii = (int*)&vhi;
+
+
+ /* a[0] * b[8] */
+ t8 += (loi[0] | ((int64_t)hii[0] << 31));
+
+ /* a[1] * b[8] */
+ t9 += (loi[1] | ((int64_t)hii[1] << 31));
+
+ /* a[2] * b[8] */
+ t10 += (loi[2] | ((int64_t)hii[2] << 31));
+
+ /* a[3] * b[8] */
+ t11 += (loi[3] | ((int64_t)hii[3] << 31));
+
+ /* a[4] * b[8] */
+ t12 += (loi[4] | ((int64_t)hii[4] << 31));
+
+ /* a[5] * b[8] */
+ t13 += (loi[5] | ((int64_t)hii[5] << 31));
+
+ /* a[6] * b[8] */
+ t14 += (loi[6] | ((int64_t)hii[6] << 31));
+
+ /* a[7] * b[8] */
+ t15 += (loi[7] | ((int64_t)hii[7] << 31));
+
+ /* a[8] * b[8] */
+ t16 += (loi[8] | ((int64_t)hii[8] << 31));
+
+ /* a[9] * b[8] */
+ t17 = (loi[9] | ((int64_t)hii[9] << 31));
+
+
+ /* a[*] * b[9] */
+ splat = Q6_V_vsplat_R(b[9]);
+ vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
+ vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
+ vhi = Q6_Vw_vmpye_VwVuh(av, splat);
+ vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
+ loi = (unsigned int*)&vlow;
+ hii = (int*)&vhi;
+
+
+ /* a[0] * b[9] */
+ t9 += (loi[0] | ((int64_t)hii[0] << 31));
+
+ /* a[1] * b[9] */
+ t10 += (loi[1] | ((int64_t)hii[1] << 31));
+
+ /* a[2] * b[9] */
+ t11 += (loi[2] | ((int64_t)hii[2] << 31));
+
+ /* a[3] * b[9] */
+ t12 += (loi[3] | ((int64_t)hii[3] << 31));
+
+ /* a[4] * b[9] */
+ t13 += (loi[4] | ((int64_t)hii[4] << 31));
+
+ /* a[5] * b[9] */
+ t14 += (loi[5] | ((int64_t)hii[5] << 31));
+
+ /* a[6] * b[9] */
+ t15 += (loi[6] | ((int64_t)hii[6] << 31));
+
+ /* a[7] * b[9] */
+ t16 += (loi[7] | ((int64_t)hii[7] << 31));
+
+ /* a[8] * b[9] */
+ t17 += (loi[8] | ((int64_t)hii[8] << 31));
+
+ /* a[9] * b[9] */
+ t18 = (loi[9] | ((int64_t)hii[9] << 31));
+
+ t1 += t0 >> 26; r[ 0] = t0 & 0x3ffffff;
+ t2 += t1 >> 26; r[ 1] = t1 & 0x3ffffff;
+ t3 += t2 >> 26; r[ 2] = t2 & 0x3ffffff;
+ t4 += t3 >> 26; r[ 3] = t3 & 0x3ffffff;
+ t5 += t4 >> 26; r[ 4] = t4 & 0x3ffffff;
+ t6 += t5 >> 26; r[ 5] = t5 & 0x3ffffff;
+ t7 += t6 >> 26; r[ 6] = t6 & 0x3ffffff;
+ t8 += t7 >> 26; r[ 7] = t7 & 0x3ffffff;
+ t9 += t8 >> 26; r[ 8] = t8 & 0x3ffffff;
+ t10 += t9 >> 26; r[ 9] = t9 & 0x3ffffff;
+ t11 += t10 >> 26; r[10] = t10 & 0x3ffffff;
+ t12 += t11 >> 26; r[11] = t11 & 0x3ffffff;
+ t13 += t12 >> 26; r[12] = t12 & 0x3ffffff;
+ t14 += t13 >> 26; r[13] = t13 & 0x3ffffff;
+ t15 += t14 >> 26; r[14] = t14 & 0x3ffffff;
+ t16 += t15 >> 26; r[15] = t15 & 0x3ffffff;
+ t17 += t16 >> 26; r[16] = t16 & 0x3ffffff;
+ t18 += t17 >> 26; r[17] = t17 & 0x3ffffff;
+ r[19] = (sp_digit)(t18 >> 26);
+ r[18] = t18 & 0x3ffffff;
+ }
+#endif
+}
+
+
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_256_mont_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_256_mul_10(r, a, b);
+ sp_256_mont_reduce_10(r, m, mp);
+}
+
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+SP_NOINLINE static void sp_256_sqr_10(sp_digit* r, const sp_digit* a)
+{
+ int64_t t0 = Q6_P_mpy_RR(a[0], a[0]);
+ int64_t t1 = Q6_P_mpy_RR(a[0], a[1]) * 2;
+ int64_t t2 = Q6_P_mpy_RR(a[0], a[2]) * 2
+ + Q6_P_mpy_RR(a[1], a[1]);
+ int64_t t3 = (Q6_P_mpy_RR(a[0], a[3])
+ + Q6_P_mpy_RR(a[1], a[2])) * 2;
+ int64_t t4 = (Q6_P_mpy_RR(a[ 0], a[ 4])
+ + Q6_P_mpy_RR(a[ 1], a[ 3])) * 2
+ + Q6_P_mpy_RR(a[ 2], a[ 2]);
+ int64_t t5 = (Q6_P_mpy_RR(a[ 0], a[ 5])
+ + Q6_P_mpy_RR(a[ 1], a[ 4])
+ + Q6_P_mpy_RR(a[ 2], a[ 3])) * 2;
+ int64_t t6 = (Q6_P_mpy_RR(a[ 0], a[ 6])
+ + Q6_P_mpy_RR(a[ 1], a[ 5])
+ + Q6_P_mpy_RR(a[ 2], a[ 4])) * 2
+ + Q6_P_mpy_RR(a[ 3], a[ 3]);
+ int64_t t7 = (Q6_P_mpy_RR(a[ 0], a[ 7])
+ + Q6_P_mpy_RR(a[ 1], a[ 6])
+ + Q6_P_mpy_RR(a[ 2], a[ 5])
+ + Q6_P_mpy_RR(a[ 3], a[ 4])) * 2;
+ int64_t t8 = (Q6_P_mpy_RR(a[ 0], a[ 8])
+ + Q6_P_mpy_RR(a[ 1], a[ 7])
+ + Q6_P_mpy_RR(a[ 2], a[ 6])
+ + Q6_P_mpy_RR(a[ 3], a[ 5])) * 2
+ + Q6_P_mpy_RR(a[ 4], a[ 4]);
+ int64_t t9 = (Q6_P_mpy_RR(a[ 0], a[ 9])
+ + Q6_P_mpy_RR(a[ 1], a[ 8])
+ + Q6_P_mpy_RR(a[ 2], a[ 7])
+ + Q6_P_mpy_RR(a[ 3], a[ 6])
+ + Q6_P_mpy_RR(a[ 4], a[ 5])) * 2;
+ int64_t t10 = (Q6_P_mpy_RR(a[ 1], a[ 9])
+ + Q6_P_mpy_RR(a[ 2], a[ 8])
+ + Q6_P_mpy_RR(a[ 3], a[ 7])
+ + Q6_P_mpy_RR(a[ 4], a[ 6])) * 2
+ + Q6_P_mpy_RR(a[ 5], a[ 5]);
+ int64_t t11 = (Q6_P_mpy_RR(a[ 2], a[ 9])
+ + Q6_P_mpy_RR(a[ 3], a[ 8])
+ + Q6_P_mpy_RR(a[ 4], a[ 7])
+ + Q6_P_mpy_RR(a[ 5], a[ 6])) * 2;
+ int64_t t12 = (Q6_P_mpy_RR(a[ 3], a[ 9])
+ + Q6_P_mpy_RR(a[ 4], a[ 8])
+ + Q6_P_mpy_RR(a[ 5], a[ 7])) * 2
+ + Q6_P_mpy_RR(a[ 6], a[ 6]);
+ int64_t t13 = (Q6_P_mpy_RR(a[ 4], a[ 9])
+ + Q6_P_mpy_RR(a[ 5], a[ 8])
+ + Q6_P_mpy_RR(a[ 6], a[ 7])) * 2;
+ int64_t t14 = (Q6_P_mpy_RR(a[ 5], a[ 9])
+ + Q6_P_mpy_RR(a[ 6], a[ 8])) * 2
+ + Q6_P_mpy_RR(a[ 7], a[ 7]);
+ int64_t t15 =( Q6_P_mpy_RR(a[ 6], a[ 9])
+ + Q6_P_mpy_RR(a[ 7], a[ 8])) * 2;
+ int64_t t16 = Q6_P_mpy_RR(a[ 7], a[ 9]) * 2
+ + Q6_P_mpy_RR(a[ 8], a[ 8]);
+ int64_t t17 = Q6_P_mpy_RR(a[ 8], a[ 9]) * 2;
+ int64_t t18 = Q6_P_mpy_RR(a[ 9], a[ 9]);
+
+ t1 += t0 >> 26; r[ 0] = t0 & 0x3ffffff;
+ t2 += t1 >> 26; r[ 1] = t1 & 0x3ffffff;
+ t3 += t2 >> 26; r[ 2] = t2 & 0x3ffffff;
+ t4 += t3 >> 26; r[ 3] = t3 & 0x3ffffff;
+ t5 += t4 >> 26; r[ 4] = t4 & 0x3ffffff;
+ t6 += t5 >> 26; r[ 5] = t5 & 0x3ffffff;
+ t7 += t6 >> 26; r[ 6] = t6 & 0x3ffffff;
+ t8 += t7 >> 26; r[ 7] = t7 & 0x3ffffff;
+ t9 += t8 >> 26; r[ 8] = t8 & 0x3ffffff;
+ t10 += t9 >> 26; r[ 9] = t9 & 0x3ffffff;
+ t11 += t10 >> 26; r[10] = t10 & 0x3ffffff;
+ t12 += t11 >> 26; r[11] = t11 & 0x3ffffff;
+ t13 += t12 >> 26; r[12] = t12 & 0x3ffffff;
+ t14 += t13 >> 26; r[13] = t13 & 0x3ffffff;
+ t15 += t14 >> 26; r[14] = t14 & 0x3ffffff;
+ t16 += t15 >> 26; r[15] = t15 & 0x3ffffff;
+ t17 += t16 >> 26; r[16] = t16 & 0x3ffffff;
+ t18 += t17 >> 26; r[17] = t17 & 0x3ffffff;
+ r[19] = (sp_digit)(t18 >> 26);
+ r[18] = t18 & 0x3ffffff;
+}
+
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_10(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_256_sqr_10(r, a);
+ sp_256_mont_reduce_10(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * n Number of times to square.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_n_10(sp_digit* r, const sp_digit* a, int n,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_256_mont_sqr_10(r, a, m, mp);
+ for (; n > 1; n--) {
+ sp_256_mont_sqr_10(r, r, m, mp);
+ }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P256 curve. */
+static const uint32_t p256_mod_2[8] = {
+ 0xfffffffdU,0xffffffffU,0xffffffffU,0x00000000U,0x00000000U,0x00000000U,
+ 0x00000001U,0xffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P256 curve. (r = 1 / a mod m)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_256_mont_inv_10(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 10);
+ for (i=254; i>=0; i--) {
+ sp_256_mont_sqr_10(t, t, p256_mod, p256_mp_mod);
+ if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))
+ sp_256_mont_mul_10(t, t, a, p256_mod, p256_mp_mod);
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 10);
+#else
+ sp_digit* t = td;
+ sp_digit* t2 = td + Q6_P_mpy_RR(2, 10);
+ sp_digit* t3 = td + Q6_P_mpy_RR(4, 10);
+
+ /* t = a^2 */
+ sp_256_mont_sqr_10(t, a, p256_mod, p256_mp_mod);
+ /* t = a^3 = t * a */
+ sp_256_mont_mul_10(t, t, a, p256_mod, p256_mp_mod);
+ /* t2= a^c = t ^ 2 ^ 2 */
+ sp_256_mont_sqr_n_10(t2, t, 2, p256_mod, p256_mp_mod);
+ /* t3= a^d = t2 * a */
+ sp_256_mont_mul_10(t3, t2, a, p256_mod, p256_mp_mod);
+ /* t = a^f = t2 * t */
+ sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
+ /* t2= a^f0 = t ^ 2 ^ 4 */
+ sp_256_mont_sqr_n_10(t2, t, 4, p256_mod, p256_mp_mod);
+ /* t3= a^fd = t2 * t3 */
+ sp_256_mont_mul_10(t3, t2, t3, p256_mod, p256_mp_mod);
+ /* t = a^ff = t2 * t */
+ sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
+ /* t2= a^ff00 = t ^ 2 ^ 8 */
+ sp_256_mont_sqr_n_10(t2, t, 8, p256_mod, p256_mp_mod);
+ /* t3= a^fffd = t2 * t3 */
+ sp_256_mont_mul_10(t3, t2, t3, p256_mod, p256_mp_mod);
+ /* t = a^ffff = t2 * t */
+ sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
+ /* t2= a^ffff0000 = t ^ 2 ^ 16 */
+ sp_256_mont_sqr_n_10(t2, t, 16, p256_mod, p256_mp_mod);
+ /* t3= a^fffffffd = t2 * t3 */
+ sp_256_mont_mul_10(t3, t2, t3, p256_mod, p256_mp_mod);
+ /* t = a^ffffffff = t2 * t */
+ sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
+ /* t = a^ffffffff00000000 = t ^ 2 ^ 32 */
+ sp_256_mont_sqr_n_10(t2, t, 32, p256_mod, p256_mp_mod);
+ /* t2= a^ffffffffffffffff = t2 * t */
+ sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
+ /* t2= a^ffffffff00000001 = t2 * a */
+ sp_256_mont_mul_10(t2, t2, a, p256_mod, p256_mp_mod);
+ /* t2= a^ffffffff000000010000000000000000000000000000000000000000
+ * = t2 ^ 2 ^ 160 */
+ sp_256_mont_sqr_n_10(t2, t2, 160, p256_mod, p256_mp_mod);
+ /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff
+ * = t2 * t */
+ sp_256_mont_mul_10(t2, t2, t, p256_mod, p256_mp_mod);
+ /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff00000000
+ * = t2 ^ 2 ^ 32 */
+ sp_256_mont_sqr_n_10(t2, t2, 32, p256_mod, p256_mp_mod);
+ /* r = a^ffffffff00000001000000000000000000000000fffffffffffffffffffffffd
+ * = t2 * t3 */
+ sp_256_mont_mul_10(r, t2, t3, p256_mod, p256_mp_mod);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+
+/* Map the Montgomery form projective co-ordinate point to an affine point.
+ *
+ * r Resulting affine co-ordinate point.
+ * p Montgomery form projective co-ordinate point.
+ * t Temporary ordinate data.
+ */
+static void sp_256_map_10(sp_point* r, const sp_point* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + Q6_P_mpy_RR(2, 10);
+ int32_t n;
+
+ sp_256_mont_inv_10(t1, p->z, t + 2*10);
+
+ sp_256_mont_sqr_10(t2, t1, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t1, t2, t1, p256_mod, p256_mp_mod);
+
+ /* x /= z^2 */
+ sp_256_mont_mul_10(r->x, p->x, t2, p256_mod, p256_mp_mod);
+ XMEMSET(r->x + 10, 0, sizeof(r->x) / 2U);
+ sp_256_mont_reduce_10(r->x, p256_mod, p256_mp_mod);
+ /* Reduce x to less than modulus */
+ n = sp_256_cmp_10(r->x, p256_mod);
+ sp_256_cond_sub_10(r->x, r->x, p256_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_10(r->x);
+
+ /* y /= z^3 */
+ sp_256_mont_mul_10(r->y, p->y, t1, p256_mod, p256_mp_mod);
+ XMEMSET(r->y + 10, 0, sizeof(r->y) / 2U);
+ sp_256_mont_reduce_10(r->y, p256_mod, p256_mp_mod);
+ /* Reduce y to less than modulus */
+ n = sp_256_cmp_10(r->y, p256_mod);
+ sp_256_cond_sub_10(r->y, r->y, p256_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_10(r->y);
+
+ XMEMSET(r->z, 0, sizeof(r->z));
+ r->z[0] = 1;
+
+}
+
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_256_add_10(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+#if 0
+ r[ 0] = Q6_R_add_RR(a[0], b[0]);
+ r[ 1] = Q6_R_add_RR(a[1], b[1]);
+ r[ 2] = Q6_R_add_RR(a[2], b[2]);
+ r[ 3] = Q6_R_add_RR(a[3], b[3]);
+ r[ 4] = Q6_R_add_RR(a[4], b[4]);
+ r[ 5] = Q6_R_add_RR(a[5], b[5]);
+ r[ 6] = Q6_R_add_RR(a[6], b[6]);
+ r[ 7] = Q6_R_add_RR(a[7], b[7]);
+ r[ 8] = Q6_R_add_RR(a[8], b[8]);
+ r[ 9] = Q6_R_add_RR(a[9], b[9]);
+#endif
+#if 1
+ __asm__ __volatile__ (
+ "{ r1 = memw(%[a]+#0) \n"
+ " r2 = memw(%[b]+#0) }\n"
+ "{ r3 = memw(%[a]+#4) \n"
+ " r19 = add(r1,r2) \n"
+ " r4 = memw(%[b]+#4) }\n"
+ "{ r5 = memw(%[a]+#8) \n"
+ " r20 = add(r3,r4) \n"
+ " r6 = memw(%[b]+#8) }\n"
+ "{ memw(%[r]+#0) = r19 }\n"
+ "{ r7 = memw(%[a]+#12) \n"
+ " r21 = add(r5,r6) \n"
+ " r8 = memw(%[b]+#12) }\n"
+ "{ memw(%[r]+#4) = r20 }\n"
+ "{ r9 = memw(%[a]+#16) \n"
+ " r22 = add(r7,r8) \n"
+ " r10 = memw(%[b]+#16) }\n"
+ "{ memw(%[r]+#8) = r21 }\n"
+ "{ r11 = memw(%[a]+#20) \n"
+ " r23 = add(r9,r10) \n"
+ " r12 = memw(%[b]+#20) }\n"
+ "{ memw(%[r]+#12) = r22 }\n"
+ "{ r13 = memw(%[a]+#24) \n"
+ " r24 = add(r11,r12) \n"
+ " r14 = memw(%[b]+#24) }\n"
+ "{ memw(%[r]+#16) = r23 }\n"
+ "{ r15 = memw(%[a]+#28) \n"
+ " r25 = add(r13,r14) \n"
+ " r16 = memw(%[b]+#28) }\n"
+ "{ memw(%[r]+#20) = r24 }\n"
+ "{ r17 = memw(%[a]+#32) \n"
+ " r26 = add(r15,r16) \n"
+ " r18 = memw(%[b]+#32) }\n"
+ "{ memw(%[r]+#24) = r25 }\n"
+ "{ r5 = memw(%[a]+#36) \n"
+ " r19 = add(r17,r18) \n"
+ " r6 = memw(%[b]+#36) }\n"
+ "{ memw(%[r]+#28) = r26 }\n"
+ "{ r20 = add(r5,r6) \n"
+ " memw(%[r]+#32) = r19 }\n"
+ "{ memw(%[r]+#36) = r20 }\n"
+ : [r] "+r" (r)
+ : [a] "r"(a), [b] "r"(b)
+ : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26"
+ );
+#endif
+ return 0;
+}
+
+
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r Result of addition.
+ * a First number to add in Montogmery form.
+ * b Second number to add in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_add_10(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ (void)sp_256_add_10(r, a, b);
+ sp_256_norm_10(r);
+ sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_10(r);
+}
+
+
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r Result of doubling.
+ * a Number to double in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_dbl_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ (void)sp_256_add_10(r, a, a);
+ sp_256_norm_10(r);
+ sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_10(r);
+}
+
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r Result of Tripling.
+ * a Number to triple in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_tpl_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ (void)sp_256_add_10(r, a, a);
+ sp_256_norm_10(r);
+ sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_10(r);
+ (void)sp_256_add_10(r, r, a);
+ sp_256_norm_10(r);
+ sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_10(r);
+}
+
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+SP_NOINLINE static int sp_256_sub_10(sp_digit* r, const sp_digit* a,
+ const sp_digit* b)
+{
+#if 0
+ r[ 0] = Q6_R_sub_RR(a[0], b[0]);
+ r[ 1] = Q6_R_sub_RR(a[1], b[1]);
+ r[ 2] = Q6_R_sub_RR(a[2], b[2]);
+ r[ 3] = Q6_R_sub_RR(a[3], b[3]);
+ r[ 4] = Q6_R_sub_RR(a[4], b[4]);
+ r[ 5] = Q6_R_sub_RR(a[5], b[5]);
+ r[ 6] = Q6_R_sub_RR(a[6], b[6]);
+ r[ 7] = Q6_R_sub_RR(a[7], b[7]);
+ r[ 8] = Q6_R_sub_RR(a[8], b[8]);
+ r[ 9] = Q6_R_sub_RR(a[9], b[9]);
+#endif
+#if 1
+ __asm__ __volatile__ (
+ "{ r1 = memw(%[a]+#0) \n"
+ " r2 = memw(%[b]+#0) }\n"
+ "{ r3 = memw(%[a]+#4) \n"
+ " r19 = sub(r1,r2) \n"
+ " r4 = memw(%[b]+#4) }\n"
+ "{ r5 = memw(%[a]+#8) \n"
+ " r20 = sub(r3,r4) \n"
+ " r6 = memw(%[b]+#8) }\n"
+ "{ memw(%[r]+#0) = r19 }\n"
+ "{ r7 = memw(%[a]+#12) \n"
+ " r21 = sub(r5,r6) \n"
+ " r8 = memw(%[b]+#12) }\n"
+ "{ memw(%[r]+#4) = r20 }\n"
+ "{ r9 = memw(%[a]+#16) \n"
+ " r22 = sub(r7,r8) \n"
+ " r10 = memw(%[b]+#16) }\n"
+ "{ memw(%[r]+#8) = r21 }\n"
+ "{ r11 = memw(%[a]+#20) \n"
+ " r23 = sub(r9,r10) \n"
+ " r12 = memw(%[b]+#20) }\n"
+ "{ memw(%[r]+#12) = r22 }\n"
+ "{ r13 = memw(%[a]+#24) \n"
+ " r24 = sub(r11,r12) \n"
+ " r14 = memw(%[b]+#24) }\n"
+ "{ memw(%[r]+#16) = r23 }\n"
+ "{ r15 = memw(%[a]+#28) \n"
+ " r25 = sub(r13,r14) \n"
+ " r16 = memw(%[b]+#28) }\n"
+ "{ memw(%[r]+#20) = r24 }\n"
+ "{ r17 = memw(%[a]+#32) \n"
+ " r26 = sub(r15,r16) \n"
+ " r18 = memw(%[b]+#32) }\n"
+ "{ memw(%[r]+#24) = r25 }\n"
+ "{ r5 = memw(%[a]+#36) \n"
+ " r19 = sub(r17,r18) \n"
+ " r6 = memw(%[b]+#36) }\n"
+ "{ memw(%[r]+#28) = r26 }\n"
+ "{ r20 = sub(r5,r6) \n"
+ " memw(%[r]+#32) = r19 }\n"
+ "{ memw(%[r]+#36) = r20 }\n"
+ : [r] "+r" (r)
+ : [a] "r"(a), [b] "r"(b)
+ : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26"
+ );
+#endif
+ return 0;
+}
+
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+static void sp_256_cond_add_10(sp_digit* r, const sp_digit* a,
+ const sp_digit* b, const sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ r[i] = a[i] + (b[i] & m);
+ }
+#else
+ r[ 0] = Q6_R_add_RR(a[ 0], Q6_R_and_RR(b[ 0], m));
+ r[ 1] = Q6_R_add_RR(a[ 1], Q6_R_and_RR(b[ 1], m));
+ r[ 2] = Q6_R_add_RR(a[ 2], Q6_R_and_RR(b[ 2], m));
+ r[ 3] = Q6_R_add_RR(a[ 3], Q6_R_and_RR(b[ 3], m));
+ r[ 4] = Q6_R_add_RR(a[ 4], Q6_R_and_RR(b[ 4], m));
+ r[ 5] = Q6_R_add_RR(a[ 5], Q6_R_and_RR(b[ 5], m));
+ r[ 6] = Q6_R_add_RR(a[ 6], Q6_R_and_RR(b[ 6], m));
+ r[ 7] = Q6_R_add_RR(a[ 7], Q6_R_and_RR(b[ 7], m));
+ r[ 8] = Q6_R_add_RR(a[ 8], Q6_R_and_RR(b[ 8], m));
+ r[ 9] = Q6_R_add_RR(a[ 9], Q6_R_and_RR(b[ 9], m));
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r Result of subtration.
+ * a Number to subtract from in Montogmery form.
+ * b Number to subtract with in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_256_mont_sub_10(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ (void)sp_256_sub_10(r, a, b);
+ sp_256_cond_add_10(r, r, m, r[9] >> 22);
+ sp_256_norm_10(r);
+}
+
+
+/* Shift number left one bit.
+ * Bottom bit is lost.
+ *
+ * r Result of shift.
+ * a Number to shift.
+ */
+SP_NOINLINE static void sp_256_rshift1_10(sp_digit* r, sp_digit* a)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<9; i++) {
+ r[i] = ((a[i] >> 1) | (a[i + 1] << 25)) & 0x3ffffff;
+ }
+#else
+ r[0] = ((a[0] >> 1) | Q6_R_and_RR((a[1] << 25), 0x3ffffff));
+ r[1] = ((a[1] >> 1) | Q6_R_and_RR((a[2] << 25), 0x3ffffff));
+ r[2] = ((a[2] >> 1) | Q6_R_and_RR((a[3] << 25), 0x3ffffff));
+ r[3] = ((a[3] >> 1) | Q6_R_and_RR((a[4] << 25), 0x3ffffff));
+ r[4] = ((a[4] >> 1) | Q6_R_and_RR((a[5] << 25), 0x3ffffff));
+ r[5] = ((a[5] >> 1) | Q6_R_and_RR((a[6] << 25), 0x3ffffff));
+ r[6] = ((a[6] >> 1) | Q6_R_and_RR((a[7] << 25), 0x3ffffff));
+ r[7] = ((a[7] >> 1) | Q6_R_and_RR((a[8] << 25), 0x3ffffff));
+ r[8] = ((a[8] >> 1) | Q6_R_and_RR((a[9] << 25), 0x3ffffff));
+#endif
+ r[9] = a[9] >> 1;
+}
+
+
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r Result of division by 2.
+ * a Number to divide.
+ * m Modulus (prime).
+ */
+static void sp_256_div2_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ sp_256_cond_add_10(r, a, m, 0 - (a[0] & 1));
+ sp_256_norm_10(r);
+ sp_256_rshift1_10(r, r);
+}
+
+
+/* Double the Montgomery form projective point p.
+ *
+ * r Result of doubling point.
+ * p Point to double.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_10(sp_point* r, const sp_point* p, sp_digit* t)
+{
+ sp_point* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*10;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* When infinity don't double point passed in - constant time. */
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point));
+ x = rp[p->infinity]->x;
+ y = rp[p->infinity]->y;
+ z = rp[p->infinity]->z;
+ /* Put point to double into result - good for infinity. */
+ if (r != p) {
+ for (i=0; i<10; i++) {
+ r->x[i] = p->x[i];
+ }
+ for (i=0; i<10; i++) {
+ r->y[i] = p->y[i];
+ }
+ for (i=0; i<10; i++) {
+ r->z[i] = p->z[i];
+ }
+ r->infinity = p->infinity;
+ }
+
+ /* T1 = Z * Z */
+ sp_256_mont_sqr_10(t1, z, p256_mod, p256_mp_mod);
+ /* Z = Y * Z */
+ sp_256_mont_mul_10(z, y, z, p256_mod, p256_mp_mod);
+ /* Z = 2Z */
+ sp_256_mont_dbl_10(z, z, p256_mod);
+ /* T2 = X - T1 */
+ sp_256_mont_sub_10(t2, x, t1, p256_mod);
+ /* T1 = X + T1 */
+ sp_256_mont_add_10(t1, x, t1, p256_mod);
+ /* T2 = T1 * T2 */
+ sp_256_mont_mul_10(t2, t1, t2, p256_mod, p256_mp_mod);
+ /* T1 = 3T2 */
+ sp_256_mont_tpl_10(t1, t2, p256_mod);
+ /* Y = 2Y */
+ sp_256_mont_dbl_10(y, y, p256_mod);
+ /* Y = Y * Y */
+ sp_256_mont_sqr_10(y, y, p256_mod, p256_mp_mod);
+ /* T2 = Y * Y */
+ sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod);
+ /* T2 = T2/2 */
+ sp_256_div2_10(t2, t2, p256_mod);
+ /* Y = Y * X */
+ sp_256_mont_mul_10(y, y, x, p256_mod, p256_mp_mod);
+ /* X = T1 * T1 */
+ sp_256_mont_mul_10(x, t1, t1, p256_mod, p256_mp_mod);
+ /* X = X - Y */
+ sp_256_mont_sub_10(x, x, y, p256_mod);
+ /* X = X - Y */
+ sp_256_mont_sub_10(x, x, y, p256_mod);
+ /* Y = Y - X */
+ sp_256_mont_sub_10(y, y, x, p256_mod);
+ /* Y = Y * T1 */
+ sp_256_mont_mul_10(y, y, t1, p256_mod, p256_mp_mod);
+ /* Y = Y - T2 */
+ sp_256_mont_sub_10(y, y, t2, p256_mod);
+
+}
+
+
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a First number to compare.
+ * b Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_256_cmp_equal_10(const sp_digit* a, const sp_digit* b)
+{
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+ (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) |
+ (a[8] ^ b[8]) | (a[9] ^ b[9])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_10(sp_point* r, const sp_point* p, const sp_point* q,
+ sp_digit* t)
+{
+ const sp_point* ap[2];
+ sp_point* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*10;
+ sp_digit* t3 = t + 4*10;
+ sp_digit* t4 = t + 6*10;
+ sp_digit* t5 = t + 8*10;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Ensure only the first point is the same as the result. */
+ if (q == r) {
+ const sp_point* a = p;
+ p = q;
+ q = a;
+ }
+
+ /* Check double */
+ (void)sp_256_sub_10(t1, p256_mod, q->y);
+ sp_256_norm_10(t1);
+ if ((sp_256_cmp_equal_10(p->x, q->x) & sp_256_cmp_equal_10(p->z, q->z) &
+ (sp_256_cmp_equal_10(p->y, q->y) | sp_256_cmp_equal_10(p->y, t1))) != 0) {
+ sp_256_proj_point_dbl_10(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<10; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<10; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<10; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U1 = X1*Z2^2 */
+ sp_256_mont_sqr_10(t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t3, t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t1, t1, x, p256_mod, p256_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_10(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_256_mont_mul_10(t3, t3, y, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_10(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - U1 */
+ sp_256_mont_sub_10(t2, t2, t1, p256_mod);
+ /* R = S2 - S1 */
+ sp_256_mont_sub_10(t4, t4, t3, p256_mod);
+ /* Z3 = H*Z1*Z2 */
+ sp_256_mont_mul_10(z, z, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(z, z, t2, p256_mod, p256_mp_mod);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ sp_256_mont_sqr_10(x, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_10(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(y, t1, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_10(x, x, t5, p256_mod);
+ sp_256_mont_dbl_10(t1, y, p256_mod);
+ sp_256_mont_sub_10(x, x, t1, p256_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ sp_256_mont_sub_10(y, y, x, p256_mod);
+ sp_256_mont_mul_10(y, y, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t5, t5, t3, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_10(y, y, t5, p256_mod);
+ }
+}
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine co-ordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_10(sp_point* r, const sp_point* g, const sp_digit* k,
+ int map, void* heap)
+{
+ sp_point* td;
+ sp_point* t[3];
+ sp_digit* tmp;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ (void)heap;
+
+ td = (sp_point*)XMALLOC(sizeof(sp_point) * 3, heap, DYNAMIC_TYPE_ECC);
+ if (td == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+
+ if (err == MP_OKAY) {
+ XMEMSET(td, 0, sizeof(*td) * 3);
+
+ t[0] = &td[0];
+ t[1] = &td[1];
+ t[2] = &td[2];
+
+ /* t[0] = {0, 0, 1} * norm */
+ t[0]->infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ err = sp_256_mod_mul_norm_10(t[1]->x, g->x, p256_mod);
+ }
+ if (err == MP_OKAY)
+ err = sp_256_mod_mul_norm_10(t[1]->y, g->y, p256_mod);
+ if (err == MP_OKAY)
+ err = sp_256_mod_mul_norm_10(t[1]->z, g->z, p256_mod);
+
+ if (err == MP_OKAY) {
+ i = 9;
+ c = 22;
+ n = k[i--] << (26 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1)
+ break;
+
+ n = k[i--];
+ c = 26;
+ }
+
+ y = (n >> 25) & 1;
+ n <<= 1;
+
+ sp_256_proj_point_add_10(t[y^1], t[0], t[1], tmp);
+
+ XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])),
+ sizeof(sp_point));
+ sp_256_proj_point_dbl_10(t[2], t[2], tmp);
+ XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
+ ((size_t)t[1] & addr_mask[y])), t[2],
+ sizeof(sp_point));
+ }
+
+ if (map != 0) {
+ sp_256_map_10(r, t[0], tmp);
+ }
+ else {
+ XMEMCPY(r, t[0], sizeof(sp_point));
+ }
+ }
+
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5);
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+ if (td != NULL) {
+ XMEMSET(td, 0, sizeof(sp_point) * 3);
+ XFREE(td, NULL, DYNAMIC_TYPE_ECC);
+ }
+
+ return err;
+}
+
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine co-ordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_10(sp_point* r, const sp_point* g, const sp_digit* k,
+ int map, void* heap)
+{
+#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+ sp_point td[3];
+ sp_digit tmpd[2 * 10 * 5];
+#endif
+ sp_point* t;
+ sp_digit* tmp;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+ (void)heap;
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ sp_point td[3];
+ t = (sp_point*)XMALLOC(sizeof(*td) * 3, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ t = td;
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ t[0] = &td[0];
+ t[1] = &td[1];
+ t[2] = &td[2];
+
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ err = sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod);
+ }
+ if (err == MP_OKAY)
+ err = sp_256_mod_mul_norm_10(t[1].y, g->y, p256_mod);
+ if (err == MP_OKAY)
+ err = sp_256_mod_mul_norm_10(t[1].z, g->z, p256_mod);
+
+ if (err == MP_OKAY) {
+ i = 9;
+ c = 22;
+ n = k[i--] << (26 - c);
+ for (; ; c--) {
+ if (c == 0) {
+ if (i == -1)
+ break;
+
+ n = k[i--];
+ c = 26;
+ }
+
+ y = (n >> 25) & 1;
+ n <<= 1;
+
+ sp_256_proj_point_add_10(&t[y^1], &t[0], &t[1], tmp);
+
+ XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) +
+ ((size_t)&t[1] & addr_mask[y])), sizeof(t[2]));
+ sp_256_proj_point_dbl_10(&t[2], &t[2], tmp);
+ XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
+ ((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
+ }
+
+ if (map != 0) {
+ sp_256_map_10(r, &t[0], tmp);
+ }
+ else {
+ XMEMCPY(r, &t[0], sizeof(sp_point));
+ }
+ }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5);
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+ }
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_point) * 3);
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmpd, sizeof(tmpd));
+ ForceZero(td, sizeof(td));
+#endif
+
+ return err;
+}
+
+#else
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry {
+ sp_digit x[10] __attribute__((aligned(128)));
+ sp_digit y[10] __attribute__((aligned(128)));
+} sp_table_entry;
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine co-ordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_fast_10(sp_point* r, const sp_point* g, const sp_digit* k,
+ int map, void* heap)
+{
+#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+ sp_point td[16];
+ sp_point rtd;
+ sp_digit tmpd[2 * 10 * 5];
+#endif
+ sp_point* t;
+ sp_point* rt;
+ sp_digit* tmp;
+ sp_digit n;
+ int i;
+ int c, y;
+ int err;
+
+ (void)heap;
+
+ err = sp_ecc_point_new(heap, rtd, rt);
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ t = (sp_point*)XMALLOC(sizeof(sp_point) * 16, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ t = td;
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ (void)sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod);
+ (void)sp_256_mod_mul_norm_10(t[1].y, g->y, p256_mod);
+ (void)sp_256_mod_mul_norm_10(t[1].z, g->z, p256_mod);
+ t[1].infinity = 0;
+ sp_256_proj_point_dbl_10(&t[ 2], &t[ 1], tmp);
+ t[ 2].infinity = 0;
+ sp_256_proj_point_add_10(&t[ 3], &t[ 2], &t[ 1], tmp);
+ t[ 3].infinity = 0;
+ sp_256_proj_point_dbl_10(&t[ 4], &t[ 2], tmp);
+ t[ 4].infinity = 0;
+ sp_256_proj_point_add_10(&t[ 5], &t[ 3], &t[ 2], tmp);
+ t[ 5].infinity = 0;
+ sp_256_proj_point_dbl_10(&t[ 6], &t[ 3], tmp);
+ t[ 6].infinity = 0;
+ sp_256_proj_point_add_10(&t[ 7], &t[ 4], &t[ 3], tmp);
+ t[ 7].infinity = 0;
+ sp_256_proj_point_dbl_10(&t[ 8], &t[ 4], tmp);
+ t[ 8].infinity = 0;
+ sp_256_proj_point_add_10(&t[ 9], &t[ 5], &t[ 4], tmp);
+ t[ 9].infinity = 0;
+ sp_256_proj_point_dbl_10(&t[10], &t[ 5], tmp);
+ t[10].infinity = 0;
+ sp_256_proj_point_add_10(&t[11], &t[ 6], &t[ 5], tmp);
+ t[11].infinity = 0;
+ sp_256_proj_point_dbl_10(&t[12], &t[ 6], tmp);
+ t[12].infinity = 0;
+ sp_256_proj_point_add_10(&t[13], &t[ 7], &t[ 6], tmp);
+ t[13].infinity = 0;
+ sp_256_proj_point_dbl_10(&t[14], &t[ 7], tmp);
+ t[14].infinity = 0;
+ sp_256_proj_point_add_10(&t[15], &t[ 8], &t[ 7], tmp);
+ t[15].infinity = 0;
+
+ i = 8;
+ n = k[i+1] << 6;
+ c = 18;
+ y = n >> 24;
+ XMEMCPY(rt, &t[y], sizeof(sp_point));
+ n <<= 8;
+ for (; i>=0 || c>=4; ) {
+ if (c < 4) {
+ n |= k[i--] << (6 - c);
+ c += 26;
+ }
+ y = (n >> 28) & 0xf;
+ n <<= 4;
+ c -= 4;
+
+ sp_256_proj_point_dbl_10(rt, rt, tmp);
+ sp_256_proj_point_dbl_10(rt, rt, tmp);
+ sp_256_proj_point_dbl_10(rt, rt, tmp);
+ sp_256_proj_point_dbl_10(rt, rt, tmp);
+
+ sp_256_proj_point_add_10(rt, rt, &t[y], tmp);
+ }
+
+ if (map != 0) {
+ sp_256_map_10(r, rt, tmp);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point));
+ }
+ }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5);
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+ }
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_point) * 16);
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmpd, sizeof(tmpd));
+ ForceZero(td, sizeof(td));
+#endif
+ sp_ecc_point_free(rt, 1, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_n_10(sp_point* r, const sp_point* p, int n,
+ sp_digit* t)
+{
+ sp_point* rp[2];
+ sp_digit* w = t;
+ sp_digit* a = t + 2*10;
+ sp_digit* b = t + 4*10;
+ sp_digit* t1 = t + 6*10;
+ sp_digit* t2 = t + 8*10;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point));
+ x = rp[p->infinity]->x;
+ y = rp[p->infinity]->y;
+ z = rp[p->infinity]->z;
+ if (r != p) {
+ for (i=0; i<10; i++) {
+ r->x[i] = p->x[i];
+ }
+ for (i=0; i<10; i++) {
+ r->y[i] = p->y[i];
+ }
+ for (i=0; i<10; i++) {
+ r->z[i] = p->z[i];
+ }
+ r->infinity = p->infinity;
+ }
+
+ /* Y = 2*Y */
+ sp_256_mont_dbl_10(y, y, p256_mod);
+ /* W = Z^4 */
+ sp_256_mont_sqr_10(w, z, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_10(w, w, p256_mod, p256_mp_mod);
+ while (n-- > 0) {
+ /* A = 3*(X^2 - W) */
+ sp_256_mont_sqr_10(t1, x, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_10(t1, t1, w, p256_mod);
+ sp_256_mont_tpl_10(a, t1, p256_mod);
+ /* B = X*Y^2 */
+ sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(b, t2, x, p256_mod, p256_mp_mod);
+ /* X = A^2 - 2B */
+ sp_256_mont_sqr_10(x, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_10(t1, b, p256_mod);
+ sp_256_mont_sub_10(x, x, t1, p256_mod);
+ /* Z = Z*Y */
+ sp_256_mont_mul_10(z, z, y, p256_mod, p256_mp_mod);
+ /* t2 = Y^4 */
+ sp_256_mont_sqr_10(t2, t2, p256_mod, p256_mp_mod);
+ if (n != 0) {
+ /* W = W*Y^4 */
+ sp_256_mont_mul_10(w, w, t2, p256_mod, p256_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_256_mont_sub_10(y, b, x, p256_mod);
+ sp_256_mont_mul_10(y, y, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_10(y, y, p256_mod);
+ sp_256_mont_sub_10(y, y, t2, p256_mod);
+ }
+ /* Y = Y/2 */
+ sp_256_div2_10(y, y, p256_mod);
+}
+
+#endif /* FP_ECC */
+
+
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_qz1_10(sp_point* r, const sp_point* p,
+ const sp_point* q, sp_digit* t)
+{
+ const sp_point* ap[2];
+ sp_point* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*10;
+ sp_digit* t3 = t + 4*10;
+ sp_digit* t4 = t + 6*10;
+ sp_digit* t5 = t + 8*10;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Check double */
+ (void)sp_256_sub_10(t1, p256_mod, q->y);
+ sp_256_norm_10(t1);
+ if ((sp_256_cmp_equal_10(p->x, q->x) & sp_256_cmp_equal_10(p->z, q->z) &
+ (sp_256_cmp_equal_10(p->y, q->y) | sp_256_cmp_equal_10(p->y, t1))) != 0) {
+ sp_256_proj_point_dbl_10(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<10; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<10; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<10; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_10(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_10(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - X1 */
+ sp_256_mont_sub_10(t2, t2, x, p256_mod);
+ /* R = S2 - Y1 */
+ sp_256_mont_sub_10(t4, t4, y, p256_mod);
+ /* Z3 = H*Z1 */
+ sp_256_mont_mul_10(z, z, t2, p256_mod, p256_mp_mod);
+ /* X3 = R^2 - H^3 - 2*X1*H^2 */
+ sp_256_mont_sqr_10(t1, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_10(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t3, x, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_10(x, t1, t5, p256_mod);
+ sp_256_mont_dbl_10(t1, t3, p256_mod);
+ sp_256_mont_sub_10(x, x, t1, p256_mod);
+ /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+ sp_256_mont_sub_10(t3, t3, x, p256_mod);
+ sp_256_mont_mul_10(t3, t3, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t5, t5, y, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_10(y, t3, t5, p256_mod);
+ }
+}
+
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a Point to convert.
+ * t Temporary data.
+ */
+static void sp_256_proj_to_affine_10(sp_point* a, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2 * 10;
+ sp_digit* tmp = t + 4 * 10;
+
+ sp_256_mont_inv_10(t1, a->z, tmp);
+
+ sp_256_mont_sqr_10(t2, t1, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(t1, t2, t1, p256_mod, p256_mp_mod);
+
+ sp_256_mont_mul_10(a->x, a->x, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(a->y, a->y, t1, p256_mod, p256_mp_mod);
+ XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
+}
+
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_256_gen_stripe_table_10(const sp_point* a,
+ sp_table_entry* table, sp_digit* tmp, void* heap)
+{
+#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+ sp_point td, s1d, s2d;
+#endif
+ sp_point* t;
+ sp_point* s1 = NULL;
+ sp_point* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_ecc_point_new(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_ecc_point_new(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_ecc_point_new(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_10(t->x, a->x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_10(t->y, a->y, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_10(t->z, a->z, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_256_proj_to_affine_10(t, tmp);
+
+ XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<8; i++) {
+ sp_256_proj_point_dbl_n_10(t, t, 32, tmp);
+ sp_256_proj_to_affine_10(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<8; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_256_proj_point_add_qz1_10(t, s1, s2, tmp);
+ sp_256_proj_to_affine_10(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_ecc_point_free(s2, 0, heap);
+ sp_ecc_point_free(s1, 0, heap);
+ sp_ecc_point_free( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine co-ordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_stripe_10(sp_point* r, const sp_point* g,
+ const sp_table_entry* table, const sp_digit* k, int map, void* heap)
+{
+#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+ sp_point rtd;
+ sp_point pd;
+ sp_digit td[2 * 10 * 5];
+#endif
+ sp_point* rt;
+ sp_point* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+ err = sp_ecc_point_new(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_ecc_point_new(heap, pd, p);
+ }
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
+ XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+ y = 0;
+ for (j=0,x=31; j<8; j++,x+=32) {
+ y |= ((k[x / 26] >> (x % 26)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=30; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<8; j++,x+=32) {
+ y |= ((k[x / 26] >> (x % 26)) & 1) << j;
+ }
+
+ sp_256_proj_point_dbl_10(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_256_proj_point_add_qz1_10(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_256_map_10(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point));
+ }
+ }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_ecc_point_free(p, 0, heap);
+ sp_ecc_point_free(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_t {
+ sp_digit x[10] __attribute__((aligned(128)));
+ sp_digit y[10] __attribute__((aligned(128)));
+ sp_table_entry table[256] __attribute__((aligned(128)));
+ uint32_t cnt;
+ int set;
+} sp_cache_t;
+
+static THREAD_LS_T sp_cache_t sp_cache[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_last = -1;
+static THREAD_LS_T int sp_cache_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex = 0;
+ static wolfSSL_Mutex sp_cache_lock;
+#endif
+
+static void sp_ecc_get_cache(const sp_point* g, sp_cache_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache[i].set = 0;
+ }
+ sp_cache_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache[i].set)
+ continue;
+
+ if (sp_256_cmp_equal_10(g->x, sp_cache[i].x) &
+ sp_256_cmp_equal_10(g->y, sp_cache[i].y)) {
+ sp_cache[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_last) {
+ least = sp_cache[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache[j].cnt < least) {
+ i = j;
+ least = sp_cache[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache[i].x, g->x, sizeof(sp_cache[i].x));
+ XMEMCPY(sp_cache[i].y, g->y, sizeof(sp_cache[i].y));
+ sp_cache[i].set = 1;
+ sp_cache[i].cnt = 1;
+ }
+
+ *cache = &sp_cache[i];
+ sp_cache_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine co-ordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_10(sp_point* r, const sp_point* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_256_ecc_mulmod_fast_10(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 10 * 5];
+ sp_cache_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex == 0) {
+ wc_InitMutex(&sp_cache_lock);
+ initCacheMutex = 1;
+ }
+ if (wc_LockMutex(&sp_cache_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache(g, &cache);
+ if (cache->cnt == 2)
+ sp_256_gen_stripe_table_10(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_256_ecc_mulmod_fast_10(r, g, k, map, heap);
+ }
+ else {
+ err = sp_256_ecc_mulmod_stripe_10(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#endif
+
+#ifdef WOLFSSL_SP_SMALL
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine co-ordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_10(sp_point* r, const sp_digit* k,
+ int map, void* heap)
+{
+ /* No pre-computed values. */
+ return sp_256_ecc_mulmod_10(r, &p256_base, k, map, heap);
+}
+
+#else
+static const sp_table_entry p256_table[256] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x0a9143c,0x1cc3506,0x360179e,0x3f17fb6,0x075ba95,0x1d88944,
+ 0x3b732b7,0x15719e7,0x376a537,0x0062417 },
+ { 0x295560a,0x094d5f3,0x245cddf,0x392e867,0x18b4ab8,0x3487cc9,
+ 0x288688d,0x176174b,0x3182588,0x0215c7f } },
+ /* 2 */
+ { { 0x147519a,0x2218090,0x32f0202,0x2b09acd,0x0d0981e,0x1e17af2,
+ 0x14a7caa,0x163a6a7,0x10ddbdf,0x03654f1 },
+ { 0x1590f8f,0x0d8733f,0x09179d6,0x1ad139b,0x372e962,0x0bad933,
+ 0x1961102,0x223cdff,0x37e9eb2,0x0218fae } },
+ /* 3 */
+ { { 0x0db6485,0x1ad88d7,0x2f97785,0x288bc28,0x3808f0e,0x3df8c02,
+ 0x28d9544,0x20280f9,0x055b5ff,0x00001d8 },
+ { 0x38d2010,0x13ae6e0,0x308a763,0x2ecc90d,0x254014f,0x10a9981,
+ 0x247d398,0x0fb8383,0x3613437,0x020c21d } },
+ /* 4 */
+ { { 0x2a0d2bb,0x08bf145,0x34994f9,0x1b06988,0x30d5cc1,0x1f18b22,
+ 0x01cf3a5,0x199fe49,0x161fd1b,0x00bd79a },
+ { 0x1a01797,0x171c2fd,0x21925c1,0x1358255,0x23d20b4,0x1c7f6d4,
+ 0x111b370,0x03dec12,0x1168d6f,0x03d923e } },
+ /* 5 */
+ { { 0x137bbbc,0x19a11f8,0x0bec9e5,0x27a29a8,0x3e43446,0x275cd18,
+ 0x0427617,0x00056c7,0x285133d,0x016af80 },
+ { 0x04c7dab,0x2a0df30,0x0c0792a,0x1310c98,0x3573d9f,0x239b30d,
+ 0x1315627,0x1ce0c32,0x25b6b6f,0x0252edc } },
+ /* 6 */
+ { { 0x20f141c,0x26d23dc,0x3c74bbf,0x334b7d6,0x06199b3,0x0441171,
+ 0x3f61294,0x313bf70,0x3cb2f7d,0x03375ae },
+ { 0x2f436fd,0x19c02fa,0x26becca,0x1b6e64c,0x26f647f,0x053c948,
+ 0x0fa7920,0x397d830,0x2bd4bda,0x028d86f } },
+ /* 7 */
+ { { 0x17c13c7,0x2895616,0x03e128a,0x17d42df,0x1c38d63,0x0f02747,
+ 0x039aecf,0x0a4b01c,0x209c4b5,0x02e84b2 },
+ { 0x1f91dfd,0x023e916,0x07fb9e4,0x19b3ba8,0x13af43b,0x35e02ca,
+ 0x0eb0899,0x3bd2c7b,0x19d701f,0x014faee } },
+ /* 8 */
+ { { 0x0e63d34,0x1fb8c6c,0x0fab4fe,0x1caa795,0x0f46005,0x179ed69,
+ 0x093334d,0x120c701,0x39206d5,0x021627e },
+ { 0x183553a,0x03d7319,0x09e5aa7,0x12b8959,0x2087909,0x0011194,
+ 0x1045071,0x0713f32,0x16d0254,0x03aec1a } },
+ /* 9 */
+ { { 0x01647c5,0x1b2856b,0x1799461,0x11f133d,0x0b8127d,0x1937eeb,
+ 0x266aa37,0x1f68f71,0x0cbd1b2,0x03aca08 },
+ { 0x287e008,0x1be361a,0x38f3940,0x276488d,0x2d87dfa,0x0333b2c,
+ 0x2d2e428,0x368755b,0x09b55a7,0x007ca0a } },
+ /* 10 */
+ { { 0x389da99,0x2a8300e,0x0022abb,0x27ae0a1,0x0a6f2d7,0x207017a,
+ 0x047862b,0x1358c9e,0x35905e5,0x00cde92 },
+ { 0x1f7794a,0x1d40348,0x3f613c6,0x2ddf5b5,0x0207005,0x133f5ba,
+ 0x1a37810,0x3ef5829,0x0d5f4c2,0x0035978 } },
+ /* 11 */
+ { { 0x1275d38,0x026efad,0x2358d9d,0x1142f82,0x14268a7,0x1cfac99,
+ 0x362ff49,0x288cbc1,0x24252f4,0x0308f68 },
+ { 0x394520c,0x06e13c2,0x178e5da,0x18ec16f,0x1096667,0x134a7a8,
+ 0x0dcb869,0x33fc4e9,0x38cc790,0x006778e } },
+ /* 12 */
+ { { 0x2c5fe04,0x29c5b09,0x1bdb183,0x02ceee8,0x03b28de,0x132dc4b,
+ 0x32c586a,0x32ff5d0,0x3d491fc,0x038d372 },
+ { 0x2a58403,0x2351aea,0x3a53b40,0x21a0ba5,0x39a6974,0x1aaaa2b,
+ 0x3901273,0x03dfe78,0x3447b4e,0x039d907 } },
+ /* 13 */
+ { { 0x364ba59,0x14e5077,0x02fc7d7,0x3b02c09,0x1d33f10,0x0560616,
+ 0x06dfc6a,0x15efd3c,0x357052a,0x01284b7 },
+ { 0x039dbd0,0x18ce3e5,0x3e1fbfa,0x352f794,0x0d3c24b,0x07c6cc5,
+ 0x1e4ffa2,0x3a91bf5,0x293bb5b,0x01abd6a } },
+ /* 14 */
+ { { 0x0c91999,0x02da644,0x0491da1,0x100a960,0x00a24b4,0x2330824,
+ 0x0094b4b,0x1004cf8,0x35a66a4,0x017f8d1 },
+ { 0x13e7b4b,0x232af7e,0x391ab0f,0x069f08f,0x3292b50,0x3479898,
+ 0x2889aec,0x2a4590b,0x308ecfe,0x02d5138 } },
+ /* 15 */
+ { { 0x2ddfdce,0x231ba45,0x39e6647,0x19be245,0x12c3291,0x35399f8,
+ 0x0d6e764,0x3082d3a,0x2bda6b0,0x0382dac },
+ { 0x37efb57,0x04b7cae,0x00070d3,0x379e431,0x01aac0d,0x1e6f251,
+ 0x0336ad6,0x0ddd3e4,0x3de25a6,0x01c7008 } },
+ /* 16 */
+ { { 0x3e20925,0x230912f,0x286762a,0x30e3f73,0x391c19a,0x34e1c18,
+ 0x16a5d5d,0x093d96a,0x3d421d3,0x0187561 },
+ { 0x37173ea,0x19ce8a8,0x0b65e87,0x0214dde,0x2238480,0x16ead0f,
+ 0x38441e0,0x3bef843,0x2124621,0x03e847f } },
+ /* 17 */
+ { { 0x0b19ffd,0x247cacb,0x3c231c8,0x16ec648,0x201ba8d,0x2b172a3,
+ 0x103d678,0x2fb72db,0x04c1f13,0x0161bac },
+ { 0x3e8ed09,0x171b949,0x2de20c3,0x0f06067,0x21e81a3,0x1b194be,
+ 0x0fd6c05,0x13c449e,0x0087086,0x006756b } },
+ /* 18 */
+ { { 0x09a4e1f,0x27d604c,0x00741e9,0x06fa49c,0x0ab7de7,0x3f4a348,
+ 0x25ef0be,0x158fc9a,0x33f7f9c,0x039f001 },
+ { 0x2f59f76,0x3598e83,0x30501f6,0x15083f2,0x0669b3b,0x29980b5,
+ 0x0c1f7a7,0x0f02b02,0x0fec65b,0x0382141 } },
+ /* 19 */
+ { { 0x031b3ca,0x23da368,0x2d66f09,0x27b9b69,0x06d1cab,0x13c91ba,
+ 0x3d81fa9,0x25ad16f,0x0825b09,0x01e3c06 },
+ { 0x225787f,0x3bf790e,0x2c9bb7e,0x0347732,0x28016f8,0x0d6ff0d,
+ 0x2a4877b,0x1d1e833,0x3b87e94,0x010e9dc } },
+ /* 20 */
+ { { 0x2b533d5,0x1ddcd34,0x1dc0625,0x3da86f7,0x3673b8a,0x1e7b0a4,
+ 0x3e7c9aa,0x19ac55d,0x251c3b2,0x02edb79 },
+ { 0x25259b3,0x24c0ead,0x3480e7e,0x34f40e9,0x3d6a0af,0x2cf3f09,
+ 0x2c83d19,0x2e66f16,0x19a5d18,0x0182d18 } },
+ /* 21 */
+ { { 0x2e5aa1c,0x28e3846,0x3658bd6,0x0ad279c,0x1b8b765,0x397e1fb,
+ 0x130014e,0x3ff342c,0x3b2aeeb,0x02743c9 },
+ { 0x2730a55,0x0918c5e,0x083aca9,0x0bf76ef,0x19c955b,0x300669c,
+ 0x01dfe0a,0x312341f,0x26d356e,0x0091295 } },
+ /* 22 */
+ { { 0x2cf1f96,0x00e52ba,0x271c6db,0x2a40930,0x19f2122,0x0b2f4ee,
+ 0x26ac1b8,0x3bda498,0x0873581,0x0117963 },
+ { 0x38f9dbc,0x3d1e768,0x2040d3f,0x11ba222,0x3a8aaf1,0x1b82fb5,
+ 0x1adfb24,0x2de9251,0x21cc1e4,0x0301038 } },
+ /* 23 */
+ { { 0x38117b6,0x2bc001b,0x1433847,0x3fdce8d,0x3651969,0x3651d7a,
+ 0x2b35761,0x1bb1d20,0x097682c,0x00737d7 },
+ { 0x1f04839,0x1dd6d04,0x16987db,0x3d12378,0x17dbeac,0x1c2cc86,
+ 0x121dd1b,0x3fcf6ca,0x1f8a92d,0x00119d5 } },
+ /* 24 */
+ { { 0x0e8ffcd,0x2b174af,0x1a82cc8,0x22cbf98,0x30d53c4,0x080b5b1,
+ 0x3161727,0x297cfdb,0x2113b83,0x0011b97 },
+ { 0x0007f01,0x23fd936,0x3183e7b,0x0496bd0,0x07fb1ef,0x178680f,
+ 0x1c5ea63,0x0016c11,0x2c3303d,0x01b8041 } },
+ /* 25 */
+ { { 0x0dd73b1,0x1cd6122,0x10d948c,0x23e657b,0x3767070,0x15a8aad,
+ 0x385ea8c,0x33c7ce0,0x0ede901,0x0110965 },
+ { 0x2d4b65b,0x2a8b244,0x0c37f8f,0x0ee5b24,0x394c234,0x3a5e347,
+ 0x26e4a15,0x39a3b4c,0x2514c2e,0x029e5be } },
+ /* 26 */
+ { { 0x23addd7,0x3ed8120,0x13b3359,0x20f959a,0x09e2a61,0x32fcf20,
+ 0x05b78e3,0x19ba7e2,0x1a9c697,0x0392b4b },
+ { 0x2048a61,0x3dfd0a3,0x19a0357,0x233024b,0x3082d19,0x00fb63b,
+ 0x3a1af4c,0x1450ff0,0x046c37b,0x0317a50 } },
+ /* 27 */
+ { { 0x3e75f9e,0x294e30a,0x3a78476,0x3a32c48,0x36fd1a9,0x0427012,
+ 0x1e4df0b,0x11d1f61,0x1afdb46,0x018ca0f },
+ { 0x2f2df15,0x0a33dee,0x27f4ce7,0x1542b66,0x3e592c4,0x20d2f30,
+ 0x3226ade,0x2a4e3ea,0x1ab1981,0x01a2f46 } },
+ /* 28 */
+ { { 0x087d659,0x3ab5446,0x305ac08,0x3d2cd64,0x33374d5,0x3f9d3f8,
+ 0x186981c,0x37f5a5a,0x2f53c6f,0x01254a4 },
+ { 0x2cec896,0x1e32786,0x04844a8,0x043b16d,0x3d964b2,0x1935829,
+ 0x16f7e26,0x1a0dd9a,0x30d2603,0x003b1d4 } },
+ /* 29 */
+ { { 0x12687bb,0x04e816b,0x21fa2da,0x1abccb8,0x3a1f83b,0x375181e,
+ 0x0f5ef51,0x0fc2ce4,0x3a66486,0x003d881 },
+ { 0x3138233,0x1f8eec3,0x2718bd6,0x1b09caa,0x2dd66b9,0x1bb222b,
+ 0x1004072,0x1b73e3b,0x07208ed,0x03fc36c } },
+ /* 30 */
+ { { 0x095d553,0x3e84053,0x0a8a749,0x3f575a0,0x3a44052,0x3ced59b,
+ 0x3b4317f,0x03a8c60,0x13c8874,0x00c4ed4 },
+ { 0x0d11549,0x0b8ab02,0x221cb40,0x02ed37b,0x2071ee1,0x1fc8c83,
+ 0x3987dd4,0x27e049a,0x0f986f1,0x00b4eaf } },
+ /* 31 */
+ { { 0x15581a2,0x2214060,0x11af4c2,0x1598c88,0x19a0a6d,0x32acba6,
+ 0x3a7a0f0,0x2337c66,0x210ded9,0x0300dbe },
+ { 0x1fbd009,0x3822eb0,0x181629a,0x2401b45,0x30b68b1,0x2e78363,
+ 0x2b32779,0x006530b,0x2c4b6d4,0x029aca8 } },
+ /* 32 */
+ { { 0x13549cf,0x0f943db,0x265ed43,0x1bfeb35,0x06f3369,0x3847f2d,
+ 0x1bfdacc,0x26181a5,0x252af7c,0x02043b8 },
+ { 0x159bb2c,0x143f85c,0x357b654,0x2f9d62c,0x2f7dfbe,0x1a7fa9c,
+ 0x057e74d,0x05d14ac,0x17a9273,0x035215c } },
+ /* 33 */
+ { { 0x0cb5a98,0x106a2bc,0x10bf117,0x24c7cc4,0x3d3da8f,0x2ce0ab7,
+ 0x14e2cba,0x1813866,0x1a72f9a,0x01a9811 },
+ { 0x2b2411d,0x3034fe8,0x16e0170,0x0f9443a,0x0be0eb8,0x2196cf3,
+ 0x0c9f738,0x15e40ef,0x0faf9e1,0x034f917 } },
+ /* 34 */
+ { { 0x03f7669,0x3da6efa,0x3d6bce1,0x209ca1d,0x109f8ae,0x09109e3,
+ 0x08ae543,0x3067255,0x1dee3c2,0x0081dd5 },
+ { 0x3ef1945,0x358765b,0x28c387b,0x3bec4b4,0x218813c,0x0b7d92a,
+ 0x3cd1d67,0x2c0367e,0x2e57154,0x0123717 } },
+ /* 35 */
+ { { 0x3e5a199,0x1e42ffd,0x0bb7123,0x33e6273,0x1e0efb8,0x294671e,
+ 0x3a2bfe0,0x3d11709,0x2eddff6,0x03cbec2 },
+ { 0x0b5025f,0x0255d7c,0x1f2241c,0x35d03ea,0x0550543,0x202fef4,
+ 0x23c8ad3,0x354963e,0x015db28,0x0284fa4 } },
+ /* 36 */
+ { { 0x2b65cbc,0x1e8d428,0x0226f9f,0x1c8a919,0x10b04b9,0x08fc1e8,
+ 0x1ce241e,0x149bc99,0x2b01497,0x00afc35 },
+ { 0x3216fb7,0x1374fd2,0x226ad3d,0x19fef76,0x0f7d7b8,0x1c21417,
+ 0x37b83f6,0x3a27eba,0x25a162f,0x010aa52 } },
+ /* 37 */
+ { { 0x2adf191,0x1ab42fa,0x28d7584,0x2409689,0x20f8a48,0x253707d,
+ 0x2030504,0x378f7a1,0x169c65e,0x00b0b76 },
+ { 0x3849c17,0x085c764,0x10dd6d0,0x2e87689,0x1460488,0x30e9521,
+ 0x10c7063,0x1b6f120,0x21f42c5,0x03d0dfe } },
+ /* 38 */
+ { { 0x20f7dab,0x035c512,0x29ac6aa,0x24c5ddb,0x20f0497,0x17ce5e1,
+ 0x00a050f,0x1eaa14b,0x3335470,0x02abd16 },
+ { 0x18d364a,0x0df0cf0,0x316585e,0x018f925,0x0d40b9b,0x17b1511,
+ 0x1716811,0x1caf3d0,0x10df4f2,0x0337d8c } },
+ /* 39 */
+ { { 0x2a8b7ef,0x0f188e3,0x2287747,0x06216f0,0x008e935,0x2f6a38d,
+ 0x1567722,0x0bfc906,0x0bada9e,0x03c3402 },
+ { 0x014d3b1,0x099c749,0x2a76291,0x216c067,0x3b37549,0x14ef2f6,
+ 0x21b96d4,0x1ee2d71,0x2f5ca88,0x016f570 } },
+ /* 40 */
+ { { 0x09a3154,0x3d1a7bd,0x2e9aef0,0x255b8ac,0x03e85a5,0x2a492a7,
+ 0x2aec1ea,0x11c6516,0x3c8a09e,0x02a84b7 },
+ { 0x1f69f1d,0x09c89d3,0x1e7326f,0x0b28bfd,0x0e0e4c8,0x1ea7751,
+ 0x18ce73b,0x2a406e7,0x273e48c,0x01b00db } },
+ /* 41 */
+ { { 0x36e3138,0x2b84a83,0x345a5cf,0x00096b4,0x16966ef,0x159caf1,
+ 0x13c64b4,0x2f89226,0x25896af,0x00a4bfd },
+ { 0x2213402,0x1435117,0x09fed52,0x09d0e4b,0x0f6580e,0x2871cba,
+ 0x3b397fd,0x1c9d825,0x090311b,0x0191383 } },
+ /* 42 */
+ { { 0x07153f0,0x1087869,0x18c9e1e,0x1e64810,0x2b86c3b,0x0175d9c,
+ 0x3dce877,0x269de4e,0x393cab7,0x03c96b9 },
+ { 0x1869d0c,0x06528db,0x02641f3,0x209261b,0x29d55c8,0x25ba517,
+ 0x3b5ea30,0x028f927,0x25313db,0x00e6e39 } },
+ /* 43 */
+ { { 0x2fd2e59,0x150802d,0x098f377,0x19a4957,0x135e2c0,0x38a95ce,
+ 0x1ab21a0,0x36c1b67,0x32f0f19,0x00e448b },
+ { 0x3cad53c,0x3387800,0x17e3cfb,0x03f9970,0x3225b2c,0x2a84e1d,
+ 0x3af1d29,0x3fe35ca,0x2f8ce80,0x0237a02 } },
+ /* 44 */
+ { { 0x07bbb76,0x3aa3648,0x2758afb,0x1f085e0,0x1921c7e,0x3010dac,
+ 0x22b74b1,0x230137e,0x1062e36,0x021c652 },
+ { 0x3993df5,0x24a2ee8,0x126ab5f,0x2d7cecf,0x0639d75,0x16d5414,
+ 0x1aa78a8,0x3f78404,0x26a5b74,0x03f0c57 } },
+ /* 45 */
+ { { 0x0d6ecfa,0x3f506ba,0x3f86561,0x3d86bb1,0x15f8c44,0x2491d07,
+ 0x052a7b4,0x2422261,0x3adee38,0x039b529 },
+ { 0x193c75d,0x14bb451,0x1162605,0x293749c,0x370a70d,0x2e8b1f6,
+ 0x2ede937,0x2b95f4a,0x39a9be2,0x00d77eb } },
+ /* 46 */
+ { { 0x2736636,0x15bf36a,0x2b7e6b9,0x25eb8b2,0x209f51d,0x3cd2659,
+ 0x10bf410,0x034afec,0x3d71c83,0x0076971 },
+ { 0x0ce6825,0x07920cf,0x3c3b5c4,0x23fe55c,0x015ad11,0x08c0dae,
+ 0x0552c7f,0x2e75a8a,0x0fddbf4,0x01c1df0 } },
+ /* 47 */
+ { { 0x2b9661c,0x0ffe351,0x3d71bf6,0x1ac34b3,0x3a1dfd3,0x211fe3d,
+ 0x33e140a,0x3f9100d,0x32ee50e,0x014ea18 },
+ { 0x16d8051,0x1bfda1a,0x068a097,0x2571d3d,0x1daec0c,0x39389af,
+ 0x194dc35,0x3f3058a,0x36d34e1,0x000a329 } },
+ /* 48 */
+ { { 0x09877ee,0x351f73f,0x0002d11,0x0420074,0x2c8b362,0x130982d,
+ 0x02c1175,0x3c11b40,0x0d86962,0x001305f },
+ { 0x0daddf5,0x2f4252c,0x15c06d9,0x1d49339,0x1bea235,0x0b680ed,
+ 0x3356e67,0x1d1d198,0x1e9fed9,0x03dee93 } },
+ /* 49 */
+ { { 0x3e1263f,0x2fe8d3a,0x3ce6d0d,0x0d5c6b9,0x3557637,0x0a9bd48,
+ 0x0405538,0x0710749,0x2005213,0x038c7e5 },
+ { 0x26b6ec6,0x2e485ba,0x3c44d1b,0x0b9cf0b,0x037a1d1,0x27428a5,
+ 0x0e7eac8,0x351ef04,0x259ce34,0x02a8e98 } },
+ /* 50 */
+ { { 0x2f3dcd3,0x3e77d4d,0x3360fbc,0x1434afd,0x36ceded,0x3d413d6,
+ 0x1710fad,0x36bb924,0x1627e79,0x008e637 },
+ { 0x109569e,0x1c168db,0x3769cf4,0x2ed4527,0x0ea0619,0x17d80d3,
+ 0x1c03773,0x18843fe,0x1b21c04,0x015c5fd } },
+ /* 51 */
+ { { 0x1dd895e,0x08a7248,0x04519fe,0x001030a,0x18e5185,0x358dfb3,
+ 0x13d2391,0x0a37be8,0x0560e3c,0x019828b },
+ { 0x27fcbd0,0x2a22bb5,0x30969cc,0x1e03aa7,0x1c84724,0x0ba4ad3,
+ 0x32f4817,0x0914cca,0x14c4f52,0x01893b9 } },
+ /* 52 */
+ { { 0x097eccc,0x1273936,0x00aa095,0x364fe62,0x04d49d1,0x10e9f08,
+ 0x3c24230,0x3ef01c8,0x2fb92bd,0x013ce4a },
+ { 0x1e44fd9,0x27e3e9f,0x2156696,0x3915ecc,0x0b66cfb,0x1a3af0f,
+ 0x2fa8033,0x0e6736c,0x177ccdb,0x0228f9e } },
+ /* 53 */
+ { { 0x2c4b125,0x06207c1,0x0a8cdde,0x003db8f,0x1ae34e3,0x31e84fa,
+ 0x2999de5,0x11013bd,0x02370c2,0x00e2234 },
+ { 0x0f91081,0x200d591,0x1504762,0x1857c05,0x23d9fcf,0x0cb34db,
+ 0x27edc86,0x08cd860,0x2471810,0x029798b } },
+ /* 54 */
+ { { 0x3acd6c8,0x097b8cb,0x3c661a8,0x15152f2,0x1699c63,0x237e64c,
+ 0x23edf79,0x16b7033,0x0e6466a,0x00b11da },
+ { 0x0a64bc9,0x1bfe324,0x1f5cb34,0x08391de,0x0630a60,0x3017a21,
+ 0x09d064b,0x14a8365,0x041f9e6,0x01ed799 } },
+ /* 55 */
+ { { 0x128444a,0x2508b07,0x2a39216,0x362f84d,0x2e996c5,0x2c31ff3,
+ 0x07afe5f,0x1d1288e,0x3cb0c8d,0x02e2bdc },
+ { 0x38b86fd,0x3a0ea8c,0x1cff5fd,0x1629629,0x3fee3f1,0x02b250c,
+ 0x2e8f6f2,0x0225727,0x15f7f3f,0x0280d8e } },
+ /* 56 */
+ { { 0x10f7770,0x0f1aee8,0x0e248c7,0x20684a8,0x3a6f16d,0x06f0ae7,
+ 0x0df6825,0x2d4cc40,0x301875f,0x012f8da },
+ { 0x3b56dbb,0x1821ba7,0x24f8922,0x22c1f9e,0x0306fef,0x1b54bc8,
+ 0x2ccc056,0x00303ba,0x2871bdc,0x0232f26 } },
+ /* 57 */
+ { { 0x0dac4ab,0x0625730,0x3112e13,0x101c4bf,0x3a874a4,0x2873b95,
+ 0x32ae7c6,0x0d7e18c,0x13e0c08,0x01139d5 },
+ { 0x334002d,0x00fffdd,0x025c6d5,0x22c2cd1,0x19d35cb,0x3a1ce2d,
+ 0x3702760,0x3f06257,0x03a5eb8,0x011c29a } },
+ /* 58 */
+ { { 0x0513482,0x1d87724,0x276a81b,0x0a807a4,0x3028720,0x339cc20,
+ 0x2441ee0,0x31bbf36,0x290c63d,0x0059041 },
+ { 0x106a2ed,0x0d2819b,0x100bf50,0x114626c,0x1dd4d77,0x2e08632,
+ 0x14ae72a,0x2ed3f64,0x1fd7abc,0x035cd1e } },
+ /* 59 */
+ { { 0x2d4c6e5,0x3bec596,0x104d7ed,0x23d6c1b,0x0262cf0,0x15d72c5,
+ 0x2d5bb18,0x199ac4b,0x1e30771,0x020591a },
+ { 0x21e291e,0x2e75e55,0x1661d7a,0x08b0778,0x3eb9daf,0x0d78144,
+ 0x1827eb1,0x0fe73d2,0x123f0dd,0x0028db7 } },
+ /* 60 */
+ { { 0x1d5533c,0x34cb1d0,0x228f098,0x27a1a11,0x17c5f5a,0x0d26f44,
+ 0x2228ade,0x2c460e6,0x3d6fdba,0x038cc77 },
+ { 0x3cc6ed8,0x02ada1a,0x260e510,0x2f7bde8,0x37160c3,0x33a1435,
+ 0x23d9a7b,0x0ce2641,0x02a492e,0x034ed1e } },
+ /* 61 */
+ { { 0x3821f90,0x26dba3c,0x3aada14,0x3b59bad,0x292edd9,0x2804c45,
+ 0x3669531,0x296f42e,0x35a4c86,0x01ca049 },
+ { 0x3ff47e5,0x2163df4,0x2441503,0x2f18405,0x15e1616,0x37f66ec,
+ 0x30f11a7,0x141658a,0x27ece14,0x00b018b } },
+ /* 62 */
+ { { 0x159ac2e,0x3e65bc0,0x2713a76,0x0db2f6c,0x3281e77,0x2391811,
+ 0x16d2880,0x1fbc4ab,0x1f92c4e,0x00a0a8d },
+ { 0x0ce5cd2,0x152c7b0,0x02299c3,0x3244de7,0x2cf99ef,0x3a0b047,
+ 0x2caf383,0x0aaf664,0x113554d,0x031c735 } },
+ /* 63 */
+ { { 0x1b578f4,0x177a702,0x3a7a488,0x1638ebf,0x31884e2,0x2460bc7,
+ 0x36b1b75,0x3ce8e3d,0x340cf47,0x03143d9 },
+ { 0x34b68ea,0x12b7ccd,0x1fe2a9c,0x08da659,0x0a406f3,0x1694c14,
+ 0x06a2228,0x16370be,0x3a72129,0x02e7b2c } },
+ /* 64 */
+ { { 0x0f8b16a,0x21043bd,0x266a56f,0x3fb11ec,0x197241a,0x36721f0,
+ 0x006b8e6,0x2ac6c29,0x202cd42,0x0200fcf },
+ { 0x0dbec69,0x0c26a01,0x105f7f0,0x3dceeeb,0x3a83b85,0x363865f,
+ 0x097273a,0x2b70718,0x00e5067,0x03025d1 } },
+ /* 65 */
+ { { 0x379ab34,0x295bcb0,0x38d1846,0x22e1077,0x3a8ee06,0x1db1a3b,
+ 0x3144591,0x07cc080,0x2d5915f,0x03c6bcc },
+ { 0x175bd50,0x0dd4c57,0x27bc99c,0x2ebdcbd,0x3837cff,0x235dc8f,
+ 0x13a4184,0x0722c18,0x130e2d4,0x008f43c } },
+ /* 66 */
+ { { 0x01500d9,0x2adbb7d,0x2da8857,0x397f2fa,0x10d890a,0x25c9654,
+ 0x3e86488,0x3eb754b,0x1d6c0a3,0x02c0a23 },
+ { 0x10bcb08,0x083cc19,0x2e16853,0x04da575,0x271af63,0x2626a9d,
+ 0x3520a7b,0x32348c7,0x24ff408,0x03ff4dc } },
+ /* 67 */
+ { { 0x058e6cb,0x1a3992d,0x1d28539,0x080c5e9,0x2992dad,0x2a9d7d5,
+ 0x14ae0b7,0x09b7ce0,0x34ad78c,0x03d5643 },
+ { 0x30ba55a,0x092f4f3,0x0bae0fc,0x12831de,0x20fc472,0x20ed9d2,
+ 0x29864f6,0x1288073,0x254f6f7,0x00635b6 } },
+ /* 68 */
+ { { 0x1be5a2b,0x0f88975,0x33c6ed9,0x20d64d3,0x06fe799,0x0989bff,
+ 0x1409262,0x085a90c,0x0d97990,0x0142eed },
+ { 0x17ec63e,0x06471b9,0x0db2378,0x1006077,0x265422c,0x08db83d,
+ 0x28099b0,0x1270d06,0x11801fe,0x00ac400 } },
+ /* 69 */
+ { { 0x3391593,0x22d7166,0x30fcfc6,0x2896609,0x3c385f5,0x066b72e,
+ 0x04f3aad,0x2b831c5,0x19983fb,0x0375562 },
+ { 0x0b82ff4,0x222e39d,0x34c993b,0x101c79c,0x2d2e03c,0x0f00c8a,
+ 0x3a9eaf4,0x1810669,0x151149d,0x039b931 } },
+ /* 70 */
+ { { 0x29af288,0x1956ec7,0x293155f,0x193deb6,0x1647e1a,0x2ca0839,
+ 0x297e4bc,0x15bfd0d,0x1b107ed,0x0147803 },
+ { 0x31c327e,0x05a6e1d,0x02ad43d,0x02d2a5b,0x129cdb2,0x37ad1de,
+ 0x3d51f53,0x245df01,0x2414982,0x0388bd0 } },
+ /* 71 */
+ { { 0x35f1abb,0x17a3d18,0x0874cd4,0x2d5a14e,0x17edc0c,0x16a00d3,
+ 0x072c1fb,0x1232725,0x33d52dc,0x03dc24d },
+ { 0x0af30d6,0x259aeea,0x369c401,0x12bc4de,0x295bf5f,0x0d8711f,
+ 0x26162a9,0x16c44e5,0x288e727,0x02f54b4 } },
+ /* 72 */
+ { { 0x05fa877,0x1571ea7,0x3d48ab1,0x1c9f4e8,0x017dad6,0x0f46276,
+ 0x343f9e7,0x1de990f,0x0e4c8aa,0x028343e },
+ { 0x094f92d,0x3abf633,0x1b3a0bb,0x2f83137,0x0d818c8,0x20bae85,
+ 0x0c65f8b,0x1a8008b,0x0c7946d,0x0295b1e } },
+ /* 73 */
+ { { 0x1d09529,0x08e46c3,0x1fcf296,0x298f6b7,0x1803e0e,0x2d6fd20,
+ 0x37351f5,0x0d9e8b1,0x1f8731a,0x0362fbf },
+ { 0x00157f4,0x06750bf,0x2650ab9,0x35ffb23,0x2f51cae,0x0b522c2,
+ 0x39cb400,0x191e337,0x0a5ce9f,0x021529a } },
+ /* 74 */
+ { { 0x3506ea5,0x17d9ed8,0x0d66dc3,0x22693f8,0x19286c4,0x3a57353,
+ 0x101d3bf,0x1aa54fc,0x20b9884,0x0172b3a },
+ { 0x0eac44d,0x37d8327,0x1c3aa90,0x3d0d534,0x23db29a,0x3576eaf,
+ 0x1d3de8a,0x3bea423,0x11235e4,0x039260b } },
+ /* 75 */
+ { { 0x34cd55e,0x01288b0,0x1132231,0x2cc9a03,0x358695b,0x3e87650,
+ 0x345afa1,0x01267ec,0x3f616b2,0x02011ad },
+ { 0x0e7d098,0x0d6078e,0x0b70b53,0x237d1bc,0x0d7f61e,0x132de31,
+ 0x1ea9ea4,0x2bd54c3,0x27b9082,0x03ac5f2 } },
+ /* 76 */
+ { { 0x2a145b9,0x06d661d,0x31ec175,0x03f06f1,0x3a5cf6b,0x249c56e,
+ 0x2035653,0x384c74f,0x0bafab5,0x0025ec0 },
+ { 0x25f69e1,0x1b23a55,0x1199aa6,0x16ad6f9,0x077e8f7,0x293f661,
+ 0x33ba11d,0x3327980,0x07bafdb,0x03e571d } },
+ /* 77 */
+ { { 0x2bae45e,0x3c074ef,0x2955558,0x3c312f1,0x2a8ebe9,0x2f193f1,
+ 0x3705b1d,0x360deba,0x01e566e,0x00d4498 },
+ { 0x21161cd,0x1bc787e,0x2f87933,0x3553197,0x1328ab8,0x093c879,
+ 0x17eee27,0x2adad1d,0x1236068,0x003be5c } },
+ /* 78 */
+ { { 0x0ca4226,0x2633dd5,0x2c8e025,0x0e3e190,0x05eede1,0x1a385e4,
+ 0x163f744,0x2f25522,0x1333b4f,0x03f05b6 },
+ { 0x3c800ca,0x1becc79,0x2daabe9,0x0c499e2,0x1138063,0x3fcfa2d,
+ 0x2244976,0x1e85cf5,0x2f1b95d,0x0053292 } },
+ /* 79 */
+ { { 0x12f81d5,0x1dc6eaf,0x11967a4,0x1a407df,0x31a5f9d,0x2b67241,
+ 0x18bef7c,0x08c7762,0x063f59c,0x01015ec },
+ { 0x1c05c0a,0x360bfa2,0x1f85bff,0x1bc7703,0x3e4911c,0x0d685b6,
+ 0x2fccaea,0x02c4cef,0x164f133,0x0070ed7 } },
+ /* 80 */
+ { { 0x0ec21fe,0x052ffa0,0x3e825fe,0x1ab0956,0x3f6ce11,0x3d29759,
+ 0x3c5a072,0x18ebe62,0x148db7e,0x03eb49c },
+ { 0x1ab05b3,0x02dab0a,0x1ae690c,0x0f13894,0x137a9a8,0x0aab79f,
+ 0x3dc875c,0x06a1029,0x1e39f0e,0x01dce1f } },
+ /* 81 */
+ { { 0x16c0dd7,0x3b31269,0x2c741e9,0x3611821,0x2a5cffc,0x1416bb3,
+ 0x3a1408f,0x311fa3d,0x1c0bef0,0x02cdee1 },
+ { 0x00e6a8f,0x1adb933,0x0f23359,0x2fdace2,0x2fd6d4b,0x0e73bd3,
+ 0x2453fac,0x0a356ae,0x2c8f9f6,0x02704d6 } },
+ /* 82 */
+ { { 0x0e35743,0x28c80a1,0x0def32a,0x2c6168f,0x1320d6a,0x37c6606,
+ 0x21b1761,0x2147ee0,0x21fc433,0x015c84d },
+ { 0x1fc9168,0x36cda9c,0x003c1f0,0x1cd7971,0x15f98ba,0x1ef363d,
+ 0x0ca87e3,0x046f7d9,0x3c9e6bb,0x0372eb0 } },
+ /* 83 */
+ { { 0x118cbe2,0x3665a11,0x304ef01,0x062727a,0x3d242fc,0x11ffbaf,
+ 0x3663c7e,0x1a189c9,0x09e2d62,0x02e3072 },
+ { 0x0e1d569,0x162f772,0x0cd051a,0x322df62,0x3563809,0x047cc7a,
+ 0x027fd9f,0x08b509b,0x3da2f94,0x01748ee } },
+ /* 84 */
+ { { 0x1c8f8be,0x31ca525,0x22bf0a1,0x200efcd,0x02961c4,0x3d8f52b,
+ 0x018403d,0x3a40279,0x1cb91ec,0x030427e },
+ { 0x0945705,0x0257416,0x05c0c2d,0x25b77ae,0x3b9083d,0x2901126,
+ 0x292b8d7,0x07b8611,0x04f2eee,0x026f0cd } },
+ /* 85 */
+ { { 0x2913074,0x2b8d590,0x02b10d5,0x09d2295,0x255491b,0x0c41cca,
+ 0x1ca665b,0x133051a,0x1525f1a,0x00a5647 },
+ { 0x04f983f,0x3d6daee,0x04e1e76,0x1067d7e,0x1be7eef,0x02ea862,
+ 0x00d4968,0x0ccb048,0x11f18ef,0x018dd95 } },
+ /* 86 */
+ { { 0x22976cc,0x17c5395,0x2c38bda,0x3983bc4,0x222bca3,0x332a614,
+ 0x3a30646,0x261eaef,0x1c808e2,0x02f6de7 },
+ { 0x306a772,0x32d7272,0x2dcefd2,0x2abf94d,0x038f475,0x30ad76e,
+ 0x23e0227,0x3052b0a,0x001add3,0x023ba18 } },
+ /* 87 */
+ { { 0x0ade873,0x25a6069,0x248ccbe,0x13713ee,0x17ee9aa,0x28152e9,
+ 0x2e28995,0x2a92cb3,0x17a6f77,0x024b947 },
+ { 0x190a34d,0x2ebea1c,0x1ed1948,0x16fdaf4,0x0d698f7,0x32bc451,
+ 0x0ee6e30,0x2aaab40,0x06f0a56,0x01460be } },
+ /* 88 */
+ { { 0x24cc99c,0x1884b1e,0x1ca1fba,0x1a0f9b6,0x2ff609b,0x2b26316,
+ 0x3b27cb5,0x29bc976,0x35d4073,0x024772a },
+ { 0x3575a70,0x1b30f57,0x07fa01b,0x0e5be36,0x20cb361,0x26605cd,
+ 0x1d4e8c8,0x13cac59,0x2db9797,0x005e833 } },
+ /* 89 */
+ { { 0x36c8d3a,0x1878a81,0x124b388,0x0e4843e,0x1701aad,0x0ea0d76,
+ 0x10eae41,0x37d0653,0x36c7f4c,0x00ba338 },
+ { 0x37a862b,0x1cf6ac0,0x08fa912,0x2dd8393,0x101ba9b,0x0eebcb7,
+ 0x2453883,0x1a3cfe5,0x2cb34f6,0x03d3331 } },
+ /* 90 */
+ { { 0x1f79687,0x3d4973c,0x281544e,0x2564bbe,0x17c5954,0x171e34a,
+ 0x231741a,0x3cf2784,0x0889a0d,0x02b036d },
+ { 0x301747f,0x3f1c477,0x1f1386b,0x163bc5f,0x1592b93,0x332daed,
+ 0x080e4f5,0x1d28b96,0x26194c9,0x0256992 } },
+ /* 91 */
+ { { 0x15a4c93,0x07bf6b0,0x114172c,0x1ce0961,0x140269b,0x1b2c2eb,
+ 0x0dfb1c1,0x019ddaa,0x0ba2921,0x008c795 },
+ { 0x2e6d2dc,0x37e45e2,0x2918a70,0x0fce444,0x34d6aa6,0x396dc88,
+ 0x27726b5,0x0c787d8,0x032d8a7,0x02ac2f8 } },
+ /* 92 */
+ { { 0x1131f2d,0x2b43a63,0x3101097,0x38cec13,0x0637f09,0x17a69d2,
+ 0x086196d,0x299e46b,0x0802cf6,0x03c6f32 },
+ { 0x0daacb4,0x1a4503a,0x100925c,0x15583d9,0x23c4e40,0x1de4de9,
+ 0x1cc8fc4,0x2c9c564,0x0695aeb,0x02145a5 } },
+ /* 93 */
+ { { 0x1dcf593,0x17050fc,0x3e3bde3,0x0a6c062,0x178202b,0x2f7674f,
+ 0x0dadc29,0x15763a7,0x1d2daad,0x023d9f6 },
+ { 0x081ea5f,0x045959d,0x190c841,0x3a78d31,0x0e7d2dd,0x1414fea,
+ 0x1d43f40,0x22d77ff,0x2b9c072,0x03e115c } },
+ /* 94 */
+ { { 0x3af71c9,0x29e9c65,0x25655e1,0x111e9cd,0x3a14494,0x3875418,
+ 0x34ae070,0x0b06686,0x310616b,0x03b7b89 },
+ { 0x1734121,0x00d3d44,0x29f0b2f,0x1552897,0x31cac6e,0x1030bb3,
+ 0x0148f3a,0x35fd237,0x29b44eb,0x027f49f } },
+ /* 95 */
+ { { 0x2e2cb16,0x1d962bd,0x19b63cc,0x0b3f964,0x3e3eb7d,0x1a35560,
+ 0x0c58161,0x3ce1d6a,0x3b6958f,0x029030b },
+ { 0x2dcc158,0x3b1583f,0x30568c9,0x31957c8,0x27ad804,0x28c1f84,
+ 0x3967049,0x37b3f64,0x3b87dc6,0x0266f26 } },
+ /* 96 */
+ { { 0x27dafc6,0x2548764,0x0d1984a,0x1a57027,0x252c1fb,0x24d9b77,
+ 0x1581a0f,0x1f99276,0x10ba16d,0x026af88 },
+ { 0x0915220,0x2be1292,0x16c6480,0x1a93760,0x2fa7317,0x1a07296,
+ 0x1539871,0x112c31f,0x25787f3,0x01e2070 } },
+ /* 97 */
+ { { 0x0bcf3ff,0x266d478,0x34f6933,0x31449fd,0x00d02cb,0x340765a,
+ 0x3465a2d,0x225023e,0x319a30e,0x00579b8 },
+ { 0x20e05f4,0x35b834f,0x0404646,0x3710d62,0x3fad7bd,0x13e1434,
+ 0x21c7d1c,0x1cb3af9,0x2cf1911,0x003957e } },
+ /* 98 */
+ { { 0x0787564,0x36601be,0x1ce67e9,0x084c7a1,0x21a3317,0x2067a35,
+ 0x0158cab,0x195ddac,0x1766fe9,0x035cf42 },
+ { 0x2b7206e,0x20d0947,0x3b42424,0x03f1862,0x0a51929,0x38c2948,
+ 0x0bb8595,0x2942d77,0x3748f15,0x0249428 } },
+ /* 99 */
+ { { 0x2577410,0x3c23e2f,0x28c6caf,0x00d41de,0x0fd408a,0x30298e9,
+ 0x363289e,0x2302fc7,0x082c1cc,0x01dd050 },
+ { 0x30991cd,0x103e9ba,0x029605a,0x19927f7,0x0c1ca08,0x0c93f50,
+ 0x28a3c7b,0x082e4e9,0x34d12eb,0x0232c13 } },
+ /* 100 */
+ { { 0x106171c,0x0b4155a,0x0c3fb1c,0x336c090,0x19073e9,0x2241a10,
+ 0x0e6b4fd,0x0ed476e,0x1ef4712,0x039390a },
+ { 0x0ec36f4,0x3754f0e,0x2a270b8,0x007fd2d,0x0f9d2dc,0x1e6a692,
+ 0x066e078,0x1954974,0x2ff3c6e,0x00def28 } },
+ /* 101 */
+ { { 0x3562470,0x0b8f1f7,0x0ac94cd,0x28b0259,0x244f272,0x031e4ef,
+ 0x2d5df98,0x2c8a9f1,0x2dc3002,0x016644f },
+ { 0x350592a,0x0e6a0d5,0x1e027a1,0x2039e0f,0x399e01d,0x2817593,
+ 0x0c0375e,0x3889b3e,0x24ab013,0x010de1b } },
+ /* 102 */
+ { { 0x256b5a6,0x0ac3b67,0x28f9ff3,0x29b67f1,0x30750d9,0x25e11a9,
+ 0x15e8455,0x279ebb0,0x298b7e7,0x0218e32 },
+ { 0x2fc24b2,0x2b82582,0x28f22f5,0x2bd36b3,0x305398e,0x3b2e9e3,
+ 0x365dd0a,0x29bc0ed,0x36a7b3a,0x007b374 } },
+ /* 103 */
+ { { 0x05ff2f3,0x2b3589b,0x29785d3,0x300a1ce,0x0a2d516,0x0844355,
+ 0x14c9fad,0x3ccb6b6,0x385d459,0x0361743 },
+ { 0x0b11da3,0x002e344,0x18c49f7,0x0c29e0c,0x1d2c22c,0x08237b3,
+ 0x2988f49,0x0f18955,0x1c3b4ed,0x02813c6 } },
+ /* 104 */
+ { { 0x17f93bd,0x249323b,0x11f6087,0x174e4bd,0x3cb64ac,0x086dc6b,
+ 0x2e330a8,0x142c1f2,0x2ea5c09,0x024acbb },
+ { 0x1b6e235,0x3132521,0x00f085a,0x2a4a4db,0x1ab2ca4,0x0142224,
+ 0x3aa6b3e,0x09db203,0x2215834,0x007b9e0 } },
+ /* 105 */
+ { { 0x23e79f7,0x28b8039,0x1906a60,0x2cbce67,0x1f590e7,0x181f027,
+ 0x21054a6,0x3854240,0x2d857a6,0x03cfcb3 },
+ { 0x10d9b55,0x1443cfc,0x2648200,0x2b36190,0x09d2fcf,0x22f439f,
+ 0x231aa7e,0x3884395,0x0543da3,0x003d5a9 } },
+ /* 106 */
+ { { 0x043e0df,0x06ffe84,0x3e6d5b2,0x3327001,0x26c74b6,0x12a145e,
+ 0x256ec0d,0x3898c69,0x3411969,0x02f63c5 },
+ { 0x2b7494a,0x2eee1af,0x38388a9,0x1bd17ce,0x21567d4,0x13969e6,
+ 0x3a12a7a,0x3e8277d,0x03530cc,0x00b4687 } },
+ /* 107 */
+ { { 0x06508da,0x38e04d4,0x15a7192,0x312875e,0x3336180,0x2a6512c,
+ 0x1b59497,0x2e91b37,0x25eb91f,0x02841e9 },
+ { 0x394d639,0x0747143,0x37d7e6d,0x1d62962,0x08b4af3,0x34df287,
+ 0x3c5584b,0x26bc869,0x20af87a,0x0060f5d } },
+ /* 108 */
+ { { 0x1de59a4,0x1a5c443,0x2f8729d,0x01c3a2f,0x0f1ad8d,0x3cbaf9e,
+ 0x1b49634,0x35d508a,0x39dc269,0x0075105 },
+ { 0x390d30e,0x37033e0,0x110cb32,0x14c37a0,0x20a3b27,0x2f00ce6,
+ 0x2f1dc52,0x34988c6,0x0c29606,0x01dc7e7 } },
+ /* 109 */
+ { { 0x1040739,0x24f9de1,0x2939999,0x2e6009a,0x244539d,0x17e3f09,
+ 0x00f6f2f,0x1c63b3d,0x2310362,0x019109e },
+ { 0x1428aa8,0x3cb61e1,0x09a84f4,0x0ffafed,0x07b7adc,0x08f406b,
+ 0x1b2c6df,0x035b480,0x3496ae9,0x012766d } },
+ /* 110 */
+ { { 0x35d1099,0x2362f10,0x1a08cc7,0x13a3a34,0x12adbcd,0x32da290,
+ 0x02e2a02,0x151140b,0x01b3f60,0x0240df6 },
+ { 0x34c7b61,0x2eb09c1,0x172e7cd,0x2ad5eff,0x2fe2031,0x25b54d4,
+ 0x0cec965,0x18e7187,0x26a7cc0,0x00230f7 } },
+ /* 111 */
+ { { 0x2d552ab,0x374083d,0x01f120f,0x2601736,0x156baff,0x04d44a4,
+ 0x3b7c3e9,0x1acbc1b,0x0424579,0x031a425 },
+ { 0x1231bd1,0x0eba710,0x020517b,0x21d7316,0x21eac6e,0x275a848,
+ 0x0837abf,0x0eb0082,0x302cafe,0x00fe8f6 } },
+ /* 112 */
+ { { 0x1058880,0x28f9941,0x03f2d75,0x3bd90e5,0x17da365,0x2ac9249,
+ 0x07861cf,0x023fd05,0x1b0fdb8,0x031712f },
+ { 0x272b56b,0x04f8d2c,0x043a735,0x25446e4,0x1c8327e,0x221125a,
+ 0x0ce37df,0x2dad7f6,0x39446c2,0x00b55b6 } },
+ /* 113 */
+ { { 0x346ac6b,0x05e0bff,0x2425246,0x0981e8b,0x1d19f79,0x2692378,
+ 0x3ea3c40,0x2e90beb,0x19de503,0x003d5af },
+ { 0x05cda49,0x353b44d,0x299d137,0x3f205bc,0x2821158,0x3ad0d00,
+ 0x06a54aa,0x2d7c79f,0x39d1173,0x01000ee } },
+ /* 114 */
+ { { 0x0803387,0x3a06268,0x14043b8,0x3d4e72f,0x1ece115,0x0a1dfc8,
+ 0x17208dd,0x0be790a,0x122a07f,0x014dd95 },
+ { 0x0a4182d,0x202886a,0x1f79a49,0x1e8c867,0x0a2bbd0,0x28668b5,
+ 0x0d0a2e1,0x115259d,0x3586c5d,0x01e815b } },
+ /* 115 */
+ { { 0x18a2a47,0x2c95627,0x2773646,0x1230f7c,0x15b5829,0x2fc354e,
+ 0x2c000ea,0x099d547,0x2f17a1a,0x01df520 },
+ { 0x3853948,0x06f6561,0x3feeb8a,0x2f5b3ef,0x3a6f817,0x01a0791,
+ 0x2ec0578,0x2c392ad,0x12b2b38,0x0104540 } },
+ /* 116 */
+ { { 0x1e28ced,0x0fc3d1b,0x2c473c7,0x1826c4f,0x21d5da7,0x39718e4,
+ 0x38ce9e6,0x0251986,0x172fbea,0x0337c11 },
+ { 0x053c3b0,0x0f162db,0x043c1cb,0x04111ee,0x297fe3c,0x32e5e03,
+ 0x2b8ae12,0x0c427ec,0x1da9738,0x03b9c0f } },
+ /* 117 */
+ { { 0x357e43a,0x054503f,0x11b8345,0x34ec6e0,0x2d44660,0x3d0ae61,
+ 0x3b5dff8,0x33884ac,0x09da162,0x00a82b6 },
+ { 0x3c277ba,0x129a51a,0x027664e,0x1530507,0x0c788c9,0x2afd89d,
+ 0x1aa64cc,0x1196450,0x367ac2b,0x0358b42 } },
+ /* 118 */
+ { { 0x0054ac4,0x1761ecb,0x378839c,0x167c9f7,0x2570058,0x0604a35,
+ 0x37cbf3b,0x0909bb7,0x3f2991c,0x02ce688 },
+ { 0x0b16ae5,0x212857c,0x351b952,0x2c684db,0x30c6a05,0x09c01e0,
+ 0x23c137f,0x1331475,0x092c067,0x0013b40 } },
+ /* 119 */
+ { { 0x2e90393,0x0617466,0x24e61f4,0x0a528f5,0x03047b4,0x2153f05,
+ 0x0001a69,0x30e1eb8,0x3c10177,0x0282a47 },
+ { 0x22c831e,0x28fc06b,0x3e16ff0,0x208adc9,0x0bb76ae,0x28c1d6d,
+ 0x12c8a15,0x031063c,0x1889ed2,0x002133e } },
+ /* 120 */
+ { { 0x0a6becf,0x14277bf,0x3328d98,0x201f7fe,0x12fceae,0x1de3a2e,
+ 0x0a15c44,0x3ddf976,0x1b273ab,0x0355e55 },
+ { 0x1b5d4f1,0x369e78c,0x3a1c210,0x12cf3e9,0x3aa52f0,0x309f082,
+ 0x112089d,0x107c753,0x24202d1,0x023853a } },
+ /* 121 */
+ { { 0x2897042,0x140d17c,0x2c4aeed,0x07d0d00,0x18d0533,0x22f7ec8,
+ 0x19c194c,0x3456323,0x2372aa4,0x0165f86 },
+ { 0x30bd68c,0x1fb06b3,0x0945032,0x372ac09,0x06d4be0,0x27f8fa1,
+ 0x1c8d7ac,0x137a96e,0x236199b,0x0328fc0 } },
+ /* 122 */
+ { { 0x170bd20,0x2842d58,0x1de7592,0x3c5b4fd,0x20ea897,0x12cab78,
+ 0x363ff14,0x01f928c,0x17e309c,0x02f79ff },
+ { 0x0f5432c,0x2edb4ae,0x044b516,0x32f810d,0x2210dc1,0x23e56d6,
+ 0x301e6ff,0x34660f6,0x10e0a7d,0x02d88eb } },
+ /* 123 */
+ { { 0x0c7b65b,0x2f59d58,0x2289a75,0x2408e92,0x1ab8c55,0x1ec99e5,
+ 0x220fd0d,0x04defe0,0x24658ec,0x035aa8b },
+ { 0x138bb85,0x2f002d4,0x295c10a,0x08760ce,0x28c31d1,0x1c0a8cb,
+ 0x0ff00b1,0x144eac9,0x2e02dcc,0x0044598 } },
+ /* 124 */
+ { { 0x3b42b87,0x050057b,0x0dff781,0x1c06db1,0x1bd9f5d,0x1f5f04a,
+ 0x2cccd7a,0x143e19b,0x1cb94b7,0x036cfb8 },
+ { 0x34837cf,0x3cf6c3c,0x0d4fb26,0x22ee55e,0x1e7eed1,0x315995f,
+ 0x2cdf937,0x1a96574,0x0425220,0x0221a99 } },
+ /* 125 */
+ { { 0x1b569ea,0x0d33ed9,0x19c13c2,0x107dc84,0x2200111,0x0569867,
+ 0x2dc85da,0x05ef22e,0x0eb018a,0x029c33d },
+ { 0x04a6a65,0x3e5eba3,0x378f224,0x09c04d0,0x036e5cf,0x3df8258,
+ 0x3a609e4,0x1eddef8,0x2abd174,0x02a91dc } },
+ /* 126 */
+ { { 0x2a60cc0,0x1d84c5e,0x115f676,0x1840da0,0x2c79163,0x2f06ed6,
+ 0x198bb4b,0x3e5d37b,0x1dc30fa,0x018469b },
+ { 0x15ee47a,0x1e32f30,0x16a530e,0x2093836,0x02e8962,0x3767b62,
+ 0x335adf3,0x27220db,0x2f81642,0x0173ffe } },
+ /* 127 */
+ { { 0x37a99cd,0x1533fe6,0x05a1c0d,0x27610f1,0x17bf3b9,0x0b1ce78,
+ 0x0a908f6,0x265300e,0x3237dc1,0x01b969a },
+ { 0x3a5db77,0x2d15382,0x0d63ef8,0x1feb3d8,0x0b7b880,0x19820de,
+ 0x11c0c67,0x2af3396,0x38d242d,0x0120688 } },
+ /* 128 */
+ { { 0x1d0b34a,0x05ef00d,0x00a7e34,0x1ae0c9f,0x1440b38,0x300d8b4,
+ 0x37262da,0x3e50e3e,0x14ce0cd,0x00b1044 },
+ { 0x195a0b1,0x173bc6b,0x03622ba,0x2a19f55,0x1c09b37,0x07921b2,
+ 0x16cdd20,0x24a5c9b,0x2bf42ff,0x00811de } },
+ /* 129 */
+ { { 0x0d65dbf,0x145cf06,0x1ad82f7,0x038ce7b,0x077bf94,0x33c4007,
+ 0x22d26bd,0x25ad9c0,0x09ac773,0x02b1990 },
+ { 0x2261cc3,0x2ecdbf1,0x3e908b0,0x3246439,0x0213f7b,0x1179b04,
+ 0x01cebaa,0x0be1595,0x175cc12,0x033a39a } },
+ /* 130 */
+ { { 0x00a67d2,0x086d06f,0x248a0f1,0x0291134,0x362d476,0x166d1cd,
+ 0x044f1d6,0x2d2a038,0x365250b,0x0023f78 },
+ { 0x08bf287,0x3b0f6a1,0x1d6eace,0x20b4cda,0x2c2a621,0x0912520,
+ 0x02dfdc9,0x1b35cd6,0x3d2565d,0x00bdf8b } },
+ /* 131 */
+ { { 0x3770fa7,0x2e4b6f0,0x03f9ae4,0x170de41,0x1095e8d,0x1dd845c,
+ 0x334e9d1,0x00ab953,0x12e9077,0x03196fa },
+ { 0x2fd0a40,0x228c0fd,0x384b275,0x38ef339,0x3e7d822,0x3e5d9ef,
+ 0x24f5854,0x0ece9eb,0x247d119,0x012ffe3 } },
+ /* 132 */
+ { { 0x0ff1480,0x07487c0,0x1b16cd4,0x1f41d53,0x22ab8fb,0x2f83cfa,
+ 0x01d2efb,0x259f6b2,0x2e65772,0x00f9392 },
+ { 0x05303e6,0x23cdb4f,0x23977e1,0x12e4898,0x03bd999,0x0c930f0,
+ 0x170e261,0x180a27b,0x2fd58ec,0x014e22b } },
+ /* 133 */
+ { { 0x25d7713,0x0c5fad7,0x09daad1,0x3b9d779,0x109b985,0x1d3ec98,
+ 0x35bc4fc,0x2f838cb,0x0d14f75,0x0173e42 },
+ { 0x2657b12,0x10d4423,0x19e6760,0x296e5bb,0x2bfd421,0x25c3330,
+ 0x29f51f8,0x0338838,0x24060f0,0x029a62e } },
+ /* 134 */
+ { { 0x3748fec,0x2c5a1bb,0x2cf973d,0x289fa74,0x3e6e755,0x38997bf,
+ 0x0b6544c,0x2b6358c,0x38a7aeb,0x02c50bb },
+ { 0x3d5770a,0x06be7c5,0x012fad3,0x19cb2cd,0x266af3b,0x3ccd677,
+ 0x160d1bd,0x141d5af,0x2965851,0x034625a } },
+ /* 135 */
+ { { 0x3c41c08,0x255eacc,0x22e1ec5,0x2b151a3,0x087de94,0x311cbdb,
+ 0x016b73a,0x368e462,0x20b7981,0x0099ec3 },
+ { 0x262b988,0x1539763,0x21e76e5,0x15445b4,0x1d8ddc7,0x34a9be6,
+ 0x10faf03,0x24e4d18,0x07aa111,0x02d538a } },
+ /* 136 */
+ { { 0x38a876b,0x048ad45,0x04b40a0,0x3fc2144,0x251ff96,0x13ca7dd,
+ 0x0b31ab1,0x3539814,0x28b5f87,0x0212aec },
+ { 0x270790a,0x350e7e0,0x346bd5e,0x276178f,0x22d6cb5,0x3078884,
+ 0x355c1b6,0x15901d7,0x3671765,0x03950db } },
+ /* 137 */
+ { { 0x286e8d5,0x2409788,0x13be53f,0x2d21911,0x0353c95,0x10238e8,
+ 0x32f5bde,0x3a67b60,0x28b5b9c,0x001013d },
+ { 0x381e8e5,0x0cef7a9,0x2f5bcad,0x06058f0,0x33cdf50,0x04672a8,
+ 0x1769600,0x31c055d,0x3df0ac1,0x00e9098 } },
+ /* 138 */
+ { { 0x2eb596d,0x197b326,0x12b4c29,0x39c08f2,0x101ea03,0x3804e58,
+ 0x04b4b62,0x28d9d1c,0x13f905e,0x0032a3f },
+ { 0x11b2b61,0x08e9095,0x0d06925,0x270e43f,0x21eb7a8,0x0e4a98f,
+ 0x31d2be0,0x030cf9f,0x2644ddb,0x025b728 } },
+ /* 139 */
+ { { 0x07510af,0x2ed0e8e,0x2a01203,0x2a2a68d,0x0846fea,0x3e540de,
+ 0x3a57702,0x1677348,0x2123aad,0x010d8f8 },
+ { 0x0246a47,0x0e871d0,0x124dca4,0x34b9577,0x2b362b8,0x363ebe5,
+ 0x3086045,0x26313e6,0x15cd8bb,0x0210384 } },
+ /* 140 */
+ { { 0x023e8a7,0x0817884,0x3a0bf12,0x3376371,0x3c808a8,0x18e9777,
+ 0x12a2721,0x35b538a,0x2bd30de,0x017835a },
+ { 0x0fc0f64,0x1c8709f,0x2d8807a,0x0743957,0x242eec0,0x347e76c,
+ 0x27bef91,0x289689a,0x0f42945,0x01f7a92 } },
+ /* 141 */
+ { { 0x1060a81,0x3dbc739,0x1615abd,0x1cbe3e5,0x3e79f9c,0x1ab09a2,
+ 0x136c540,0x05b473f,0x2beebfd,0x02af0a8 },
+ { 0x3e2eac7,0x19be474,0x04668ac,0x18f4b74,0x36f10ba,0x0a0b4c6,
+ 0x10e3770,0x3bf059e,0x3946c7e,0x013a8d4 } },
+ /* 142 */
+ { { 0x266309d,0x28be354,0x1a3eed8,0x3020651,0x10a51c6,0x1e31770,
+ 0x0af45a5,0x3ff0f3b,0x2891c94,0x00e9db9 },
+ { 0x17b0d0f,0x33a291f,0x0a5f9aa,0x25a3d61,0x2963ace,0x39a5fef,
+ 0x230c724,0x1919146,0x10a465e,0x02084a8 } },
+ /* 143 */
+ { { 0x3ab8caa,0x31870f3,0x2390ef7,0x2103850,0x218eb8e,0x3a5ccf2,
+ 0x1dff677,0x2c59334,0x371599c,0x02a9f2a },
+ { 0x0837bd1,0x3249cef,0x35d702f,0x3430dab,0x1c06407,0x108f692,
+ 0x221292f,0x05f0c5d,0x073fe06,0x01038e0 } },
+ /* 144 */
+ { { 0x3bf9b7c,0x2020929,0x30d0f4f,0x080fef8,0x3365d23,0x1f3e738,
+ 0x3e53209,0x1549afe,0x300b305,0x038d811 },
+ { 0x0c6c2c7,0x2e6445b,0x3ee64dc,0x022e932,0x0726837,0x0deb67b,
+ 0x1ed4346,0x3857f73,0x277a3de,0x01950b5 } },
+ /* 145 */
+ { { 0x36c377a,0x0adb41e,0x08be3f3,0x11e40d1,0x36cb038,0x036a2bd,
+ 0x3dd3a82,0x1bc875b,0x2ee09bb,0x02994d2 },
+ { 0x035facf,0x05e0344,0x07e630a,0x0ce772d,0x335e55a,0x111fce4,
+ 0x250fe1c,0x3bc89ba,0x32fdc9a,0x03cf2d9 } },
+ /* 146 */
+ { { 0x355fd83,0x1c67f8e,0x1d10eb3,0x1b21d77,0x0e0d7a4,0x173a9e1,
+ 0x2c9fa90,0x1c39cce,0x22eaae8,0x01f2bea },
+ { 0x153b338,0x0534107,0x26c69b8,0x283be1f,0x3e0acc0,0x059cac3,
+ 0x13d1081,0x148bbee,0x3c1b9bd,0x002aac4 } },
+ /* 147 */
+ { { 0x2681297,0x3389e34,0x146addc,0x2c6d425,0x2cb350e,0x1986abc,
+ 0x0431737,0x04ba4b7,0x2028470,0x012e469 },
+ { 0x2f8ddcf,0x3c4255c,0x1af4dcf,0x07a6a44,0x208ebf6,0x0dc90c3,
+ 0x34360ac,0x072ad23,0x0537232,0x01254d3 } },
+ /* 148 */
+ { { 0x07b7e9d,0x3df5c7c,0x116f83d,0x28c4f35,0x3a478ef,0x3011fb8,
+ 0x2f264b6,0x317b9e3,0x04fd65a,0x032bd1b },
+ { 0x2aa8266,0x3431de4,0x04bba04,0x19a44da,0x0edf454,0x392c5ac,
+ 0x265168a,0x1dc3d5b,0x25704c6,0x00533a7 } },
+ /* 149 */
+ { { 0x25e8f91,0x1178fa5,0x2492994,0x2eb2c3c,0x0d3aca1,0x0322828,
+ 0x1cc70f9,0x269c74c,0x0a53e4c,0x006edc2 },
+ { 0x18bdd7a,0x2a79a55,0x26b1d5c,0x0200628,0x0734a05,0x3273c7b,
+ 0x13aa714,0x0040ac2,0x2f2da30,0x03e7449 } },
+ /* 150 */
+ { { 0x3f9563e,0x2f29eab,0x14a0749,0x3fad264,0x1dd077a,0x3d7c59c,
+ 0x3a0311b,0x331a789,0x0b9729e,0x0201ebf },
+ { 0x1b08b77,0x2a4cdf2,0x3e387f8,0x21510f1,0x286c3a7,0x1dbf62e,
+ 0x3afa594,0x3363217,0x0d16568,0x01d46b7 } },
+ /* 151 */
+ { { 0x0715c0d,0x28e2d04,0x17f78ae,0x1c63dda,0x1d113ea,0x0fefc1b,
+ 0x1eab149,0x1d0fd99,0x0682537,0x00a7b11 },
+ { 0x10bebbc,0x11c672d,0x14223d9,0x2ff9141,0x1399ee5,0x34b7b6c,
+ 0x0d5b3a8,0x01df643,0x0e392a4,0x03fe4dc } },
+ /* 152 */
+ { { 0x2b75b65,0x0b5a6f1,0x11c559a,0x3549999,0x24188f8,0x37a75f4,
+ 0x29f33e3,0x34068a2,0x38ba2a9,0x025dd91 },
+ { 0x29af2c7,0x0988b64,0x0923885,0x1b539a4,0x1334f5d,0x226947a,
+ 0x2cc7e5a,0x20beb39,0x13fac2f,0x01d298c } },
+ /* 153 */
+ { { 0x35f079c,0x137f76d,0x2fbbb2f,0x254638d,0x185b07c,0x1f34db7,
+ 0x2cfcf0e,0x218f46d,0x2150ff4,0x02add6f },
+ { 0x33fc9b7,0x0d9f005,0x0fd081b,0x0834965,0x2b90a74,0x102448d,
+ 0x3dbf03c,0x167d857,0x02e0b44,0x013afab } },
+ /* 154 */
+ { { 0x09f2c53,0x317f9d7,0x1411eb6,0x0463aba,0x0d25220,0x256b176,
+ 0x087633f,0x2bff322,0x07b2c1b,0x037e662 },
+ { 0x10aaecb,0x23bb4a1,0x2272bb7,0x06c075a,0x09d4918,0x0736f2b,
+ 0x0dd511b,0x101625e,0x0a7779f,0x009ec10 } },
+ /* 155 */
+ { { 0x33b2eb2,0x0176dfd,0x2118904,0x022386c,0x2e0df85,0x2588c9f,
+ 0x1b71525,0x28fd540,0x137e4cf,0x02ce4f7 },
+ { 0x3d75165,0x0c39ecf,0x3554a12,0x30af34c,0x2d66344,0x3ded408,
+ 0x36f1be0,0x0d065b0,0x012d046,0x0025623 } },
+ /* 156 */
+ { { 0x2601c3b,0x1824fc0,0x335fe08,0x3e33d70,0x0fb0252,0x252bfca,
+ 0x1cf2808,0x1922e55,0x1a9db9f,0x020721e },
+ { 0x2f56c51,0x39a1f31,0x218c040,0x1a4fc5d,0x3fed471,0x0164d4e,
+ 0x388a419,0x06f1113,0x0f55fc1,0x03e8352 } },
+ /* 157 */
+ { { 0x1608e4d,0x3872778,0x022cbc6,0x044d60a,0x3010dda,0x15fb0b5,
+ 0x37ddc11,0x19f5bda,0x156b6a3,0x023a838 },
+ { 0x383b3b4,0x1380bc8,0x353ca35,0x250fc07,0x169966b,0x3780f29,
+ 0x36632b2,0x2d6b13f,0x124fa00,0x00fd6ae } },
+ /* 158 */
+ { { 0x1739efb,0x2ec3656,0x2c0d337,0x3d39faf,0x1c751b0,0x04699f4,
+ 0x252dd64,0x095b8b6,0x0872b74,0x022f1da },
+ { 0x2d3d253,0x38edca0,0x379fa5b,0x287d635,0x3a9f679,0x059d9ee,
+ 0x0ac168e,0x3cd3e87,0x19060fc,0x02ce1bc } },
+ /* 159 */
+ { { 0x3edcfc2,0x0f04d4b,0x2f0d31f,0x1898be2,0x25396bf,0x15ca230,
+ 0x02b4eae,0x2713668,0x0f71b06,0x0132d18 },
+ { 0x38095ea,0x1ed34d6,0x3603ae6,0x165bf01,0x192bbf8,0x1852859,
+ 0x075f66b,0x1488f85,0x10895ef,0x014b035 } },
+ /* 160 */
+ { { 0x1339848,0x3084385,0x0c8d231,0x3a1c1de,0x0e87a28,0x255b85c,
+ 0x1de6616,0x2702e74,0x1382bb0,0x012b0f2 },
+ { 0x198987d,0x381545a,0x34d619b,0x312b827,0x18b2376,0x28fe4cf,
+ 0x20b7651,0x017d077,0x0c7e397,0x00e0365 } },
+ /* 161 */
+ { { 0x1542e75,0x0d56aa0,0x39b701a,0x287b806,0x396c724,0x0935c21,
+ 0x3a29776,0x0debdac,0x171de26,0x00b38f8 },
+ { 0x1d5bc1a,0x3fad27d,0x22b5cfe,0x1f89ddf,0x0a65560,0x144dd5b,
+ 0x2aac2f9,0x139353f,0x0520b62,0x00b9b36 } },
+ /* 162 */
+ { { 0x031c31d,0x16552e3,0x1a0c368,0x0016fc8,0x168533d,0x171e7b2,
+ 0x17626e7,0x275502f,0x14742c6,0x03285dd },
+ { 0x2d2dbb2,0x3b6bffd,0x1d18cc6,0x2f45d2a,0x0fd0d8c,0x2915e3a,
+ 0x1e8793a,0x0b39a1d,0x3139cab,0x02a5da9 } },
+ /* 163 */
+ { { 0x3fb353d,0x147c6e4,0x3a720a6,0x22d5ff3,0x1d75cab,0x06c54a0,
+ 0x08cfa73,0x12666aa,0x3170a1f,0x021c829 },
+ { 0x13e1b90,0x3a34dda,0x1fc38c3,0x02c5bdb,0x2d345dc,0x14aa1d0,
+ 0x28d00ab,0x224f23a,0x329c769,0x025c67b } },
+ /* 164 */
+ { { 0x0e35909,0x3bb6356,0x0116820,0x370cf77,0x29366d8,0x3881409,
+ 0x3999d06,0x013075f,0x176e157,0x02941ca },
+ { 0x0e70b2e,0x28dfab1,0x2a8a002,0x15da242,0x084dcf6,0x116ca97,
+ 0x31bf186,0x1dc9735,0x09df7b7,0x0264e27 } },
+ /* 165 */
+ { { 0x2da7a4b,0x3023c9e,0x1366238,0x00ff4e2,0x03abe9d,0x19bd44b,
+ 0x272e897,0x20b91ad,0x2aa202c,0x02a2201 },
+ { 0x380184e,0x08112b4,0x0b85660,0x31049aa,0x3a8cb78,0x36113c5,
+ 0x1670c0a,0x373f9e7,0x3fb4738,0x00010ef } },
+ /* 166 */
+ { { 0x2d5192e,0x26d770d,0x32af8d5,0x34d1642,0x1acf885,0x05805e0,
+ 0x166d0a1,0x1219a0d,0x301ba6c,0x014bcfb },
+ { 0x2dcb64d,0x19cca83,0x379f398,0x08e01a0,0x10a482c,0x0103cc2,
+ 0x0be5fa7,0x1f9d45b,0x1899ef2,0x00ca5af } },
+ /* 167 */
+ { { 0x14d81d7,0x2aea251,0x1b3c476,0x3bd47ae,0x29eade7,0x0715e61,
+ 0x1a21cd8,0x1c7a586,0x2bfaee5,0x00ee43f },
+ { 0x096f7cb,0x0c08f95,0x1bc4939,0x361fed4,0x255be41,0x26fad73,
+ 0x31dd489,0x02c600f,0x29d9f81,0x01ba201 } },
+ /* 168 */
+ { { 0x03ea1db,0x1eac46d,0x1292ce3,0x2a54967,0x20a7ff1,0x3e13c61,
+ 0x1b02218,0x2b44e14,0x3eadefa,0x029c88a },
+ { 0x30a9144,0x31e3b0a,0x19c5a2a,0x147cbe9,0x05a0240,0x051f38e,
+ 0x11eca56,0x31a4247,0x123bc2a,0x02fa535 } },
+ /* 169 */
+ { { 0x3226ce7,0x1251782,0x0b7072f,0x11e59fa,0x2b8afd7,0x169b18f,
+ 0x2a46f18,0x31d9bb7,0x2fe9be8,0x01de0b7 },
+ { 0x1b38626,0x34aa90f,0x3ad1760,0x21ddbd9,0x3460ae7,0x1126736,
+ 0x1b86fc5,0x0b92cd0,0x167a289,0x000e0e1 } },
+ /* 170 */
+ { { 0x1ec1a0f,0x36bbf5e,0x1c972d8,0x3f73ace,0x13bbcd6,0x23d86a5,
+ 0x175ffc5,0x2d083d5,0x2c4adf7,0x036f661 },
+ { 0x1f39eb7,0x2a20505,0x176c81a,0x3d6e636,0x16ee2fc,0x3cbdc5f,
+ 0x25475dc,0x2ef4151,0x3c46860,0x0238934 } },
+ /* 171 */
+ { { 0x2587390,0x3639526,0x0588749,0x13c32fb,0x212bb19,0x09660f1,
+ 0x207da4b,0x2bf211b,0x1c4407b,0x01506a6 },
+ { 0x24c8842,0x105a498,0x05ffdb2,0x0ab61b0,0x26044c1,0x3dff3d8,
+ 0x1d14b44,0x0d74716,0x049f57d,0x030024b } },
+ /* 172 */
+ { { 0x32e61ef,0x31d70f7,0x35cad3c,0x320b86c,0x07e8841,0x027ca7d,
+ 0x2d30d19,0x2513718,0x2347286,0x01d7901 },
+ { 0x3c237d0,0x107f16e,0x01c9e7d,0x3c3b13c,0x0c9537b,0x20af54d,
+ 0x051a162,0x2161a47,0x258c784,0x016df2d } },
+ /* 173 */
+ { { 0x228ead1,0x29c2122,0x07f6964,0x023f4ed,0x1802dc5,0x19f96ce,
+ 0x24bfd17,0x25e866b,0x2ba8df0,0x01eb84f },
+ { 0x2dd384e,0x05bbe3a,0x3f06fd2,0x366dacb,0x30361a2,0x2f36d7c,
+ 0x0b98784,0x38ff481,0x074e2a8,0x01e1f60 } },
+ /* 174 */
+ { { 0x17fbb1c,0x0975add,0x1debc5e,0x2cb2880,0x3e47bdd,0x3488cff,
+ 0x15e9a36,0x2121129,0x0199ef2,0x017088a },
+ { 0x0315250,0x352a162,0x17c1773,0x0ae09c2,0x321b21a,0x3bd74cf,
+ 0x3c4ea1d,0x3cac2ad,0x3abbaf0,0x039174d } },
+ /* 175 */
+ { { 0x0511c8a,0x3c78d0a,0x2cd3d2d,0x322f729,0x3ebb229,0x09f0e69,
+ 0x0a71a76,0x2e74d5e,0x12284df,0x03b5ef0 },
+ { 0x3dea561,0x0a9b7e4,0x0ed1cf2,0x237523c,0x05443f1,0x2eb48fa,
+ 0x3861405,0x1b49f62,0x0c945ca,0x02ab25f } },
+ /* 176 */
+ { { 0x16bd00a,0x13a9d28,0x3cc1eb5,0x2b7d702,0x2d839e9,0x3e6ff01,
+ 0x2bb7f11,0x3713824,0x3b31163,0x00c63e5 },
+ { 0x30d7138,0x0316fb0,0x0220ecc,0x08eaf0c,0x244e8df,0x0088d81,
+ 0x37972fb,0x3fd34ae,0x2a19a84,0x03e907e } },
+ /* 177 */
+ { { 0x2642269,0x0b65d29,0x03bd440,0x33a6ede,0x3c81814,0x2507982,
+ 0x0d38e47,0x3a788e6,0x32c1d26,0x00e2eda },
+ { 0x2577f87,0x392895a,0x3e1cc64,0x14f7047,0x08b52d2,0x08a01ca,
+ 0x336abf6,0x00697fc,0x105ce76,0x0253742 } },
+ /* 178 */
+ { { 0x293f92a,0x33df737,0x3315156,0x32e26d7,0x0a01333,0x26579d4,
+ 0x004df9c,0x0aba409,0x067d25c,0x02481de },
+ { 0x3f39d44,0x1c78042,0x13d7e24,0x0825aed,0x35f2c90,0x3270f63,
+ 0x04b7b35,0x3ad4531,0x28bd29b,0x0207a10 } },
+ /* 179 */
+ { { 0x077199f,0x270aeb1,0x0dd96dd,0x3b9ad7b,0x28cb8ee,0x3903f43,
+ 0x37db3fe,0x292c62b,0x362dbbf,0x006e52a },
+ { 0x247f143,0x0362cf3,0x216344f,0x3f18fd1,0x351e623,0x31664e0,
+ 0x0f270fc,0x243bbc6,0x2280555,0x001a8e3 } },
+ /* 180 */
+ { { 0x3355b49,0x2c04e6c,0x399b2e5,0x182d3af,0x020e265,0x09a7cf7,
+ 0x0ffa6bd,0x353e302,0x02083d9,0x029ecdb },
+ { 0x33e8830,0x0570e86,0x1c0b64d,0x386a27e,0x0d5fcea,0x0b45a4c,
+ 0x2ee4a2e,0x0a8833f,0x2b4a282,0x02f9531 } },
+ /* 181 */
+ { { 0x191167c,0x36cf7e3,0x225ed6c,0x1e79e99,0x0517c3f,0x11ab1fd,
+ 0x05648f3,0x08aedc4,0x1abeae0,0x02fcc29 },
+ { 0x3828a68,0x1e16fa4,0x30368e7,0x0c9fcfb,0x25161c3,0x24851ac,
+ 0x1b5feb5,0x344eb84,0x0de2732,0x0347208 } },
+ /* 182 */
+ { { 0x038b363,0x384d1e4,0x2519043,0x151ac17,0x158c11f,0x009b2b4,
+ 0x257abe6,0x2368d3f,0x3ed68a1,0x02df45e },
+ { 0x29c2559,0x2962478,0x3d8444c,0x1d96fff,0x04f7a03,0x1391a52,
+ 0x0de4af7,0x3319126,0x15e6412,0x00e65ff } },
+ /* 183 */
+ { { 0x3d61507,0x1d1a0a2,0x0d2af20,0x354d299,0x329e132,0x2a28578,
+ 0x2ddfb08,0x04fa3ff,0x1293c6c,0x003bae2 },
+ { 0x3e259f8,0x1a68fa9,0x3e67e9b,0x39b44f9,0x1ce1db7,0x347e9a1,
+ 0x3318f6a,0x2dbbc9d,0x2f8c922,0x008a245 } },
+ /* 184 */
+ { { 0x212ab5b,0x2b896c2,0x0136959,0x07e55ef,0x0cc1117,0x05b8ac3,
+ 0x18429ed,0x025fa01,0x11d6e93,0x03b016b },
+ { 0x03f3708,0x2e96fab,0x1d77157,0x0d4c2d6,0x131baf9,0x0608d39,
+ 0x3552371,0x06cdd1e,0x1567ff1,0x01f4c50 } },
+ /* 185 */
+ { { 0x2dfefab,0x270173d,0x37077bd,0x1a372cd,0x1be2f22,0x28e2ee5,
+ 0x3ead973,0x35e8f94,0x2fc9bc1,0x03a7399 },
+ { 0x36a02a1,0x2855d9b,0x00ed75a,0x37d8398,0x138c087,0x233706e,
+ 0x147f346,0x01947e2,0x3017228,0x0365942 } },
+ /* 186 */
+ { { 0x2057e60,0x2d31296,0x25e4504,0x2fa37bc,0x1cbccc3,0x1f0732f,
+ 0x3532081,0x2de8a98,0x19a804e,0x005359a },
+ { 0x31f411a,0x2a10576,0x369c2c8,0x02fe035,0x109fbaf,0x30bddeb,
+ 0x1eef901,0x1662ad3,0x0410d43,0x01bd31a } },
+ /* 187 */
+ { { 0x2c24a96,0x1b7d3a5,0x19a3872,0x217f2f6,0x2534dbc,0x2cab8c2,
+ 0x066ef28,0x26aecf1,0x0fd6118,0x01310d4 },
+ { 0x055b8da,0x1fdc5be,0x38a1296,0x25118f0,0x341a423,0x2ba4cd0,
+ 0x3e1413e,0x062d70d,0x2425a31,0x029c9b4 } },
+ /* 188 */
+ { { 0x08c1086,0x1acfba5,0x22e1dae,0x0f72f4e,0x3f1de50,0x0f408bc,
+ 0x35ed3f0,0x3ce48fc,0x282cc6c,0x004d8e7 },
+ { 0x1afaa86,0x24e3ef3,0x22589ac,0x3ec9952,0x1f45bc5,0x14144ca,
+ 0x23b26e4,0x0d68c65,0x1e1c1a3,0x032a4d9 } },
+ /* 189 */
+ { { 0x03b2d20,0x16b1d53,0x241b361,0x05e4138,0x1742a54,0x32741c7,
+ 0x0521c4c,0x1ca96c2,0x034970b,0x02738a7 },
+ { 0x13e0ad6,0x207dcdb,0x034c8cc,0x27bcbe1,0x18060da,0x33a18b6,
+ 0x2d1d1a6,0x2be60d7,0x3d7ab42,0x012312a } },
+ /* 190 */
+ { { 0x0c7485a,0x06c3310,0x0dbfd22,0x2ef949d,0x0ead455,0x098f4ba,
+ 0x3c76989,0x0cf2d24,0x032f67b,0x01e005f },
+ { 0x30cb5ee,0x0d5da64,0x0ed2b9d,0x2503102,0x1c0d14e,0x1cbc693,
+ 0x37bf552,0x07013e2,0x054de5c,0x014f341 } },
+ /* 191 */
+ { { 0x128ccac,0x1617e97,0x346ebcd,0x158016d,0x25f823e,0x34048ea,
+ 0x39f0a1c,0x3ea3df1,0x1c1d3d7,0x03ba919 },
+ { 0x151803b,0x01967c1,0x2f70781,0x27df39a,0x06c0b59,0x24a239c,
+ 0x15a7702,0x2464d06,0x2a47ae6,0x006db90 } },
+ /* 192 */
+ { { 0x27d04c3,0x024df3d,0x38112e8,0x38a27ba,0x01e312b,0x0965358,
+ 0x35d8879,0x2f4f55a,0x214187f,0x0008936 },
+ { 0x05fe36f,0x2ee18c3,0x1f5f87a,0x1813bd4,0x0580f3c,0x0ed0a7b,
+ 0x0fb1bfb,0x3fcce59,0x2f042bf,0x01820e3 } },
+ /* 193 */
+ { { 0x20bbe99,0x32cbc9f,0x39ee432,0x3cc12a8,0x37bda44,0x3ea4e40,
+ 0x097c7a9,0x0590d7d,0x2022d33,0x018dbac },
+ { 0x3ae00aa,0x3439864,0x2d2ffcf,0x3f8c6b9,0x0875a00,0x3e4e407,
+ 0x3658a29,0x22eb3d0,0x2b63921,0x022113b } },
+ /* 194 */
+ { { 0x33bae58,0x05c749a,0x1f3e114,0x1c45f8e,0x27db3df,0x06a3ab6,
+ 0x37bc7f8,0x1e27b34,0x3dc51fb,0x009eea0 },
+ { 0x3f54de5,0x3d0e7fe,0x1a71a7d,0x02ed7f8,0x0727703,0x2ca5e92,
+ 0x2e8e35d,0x292ad0b,0x13487f3,0x02b6d8b } },
+ /* 195 */
+ { { 0x175df2a,0x05a28a8,0x32e99b1,0x13d8630,0x2082aa0,0x11ac245,
+ 0x24f2e71,0x322cb27,0x17675e7,0x02e643f },
+ { 0x1f37313,0x2765ad3,0x0789082,0x1e742d0,0x11c2055,0x2021dc4,
+ 0x09ae4a7,0x346359b,0x2f94d10,0x0205c1f } },
+ /* 196 */
+ { { 0x3d6ff96,0x1f2ac80,0x336097d,0x3f03610,0x35b851b,0x010b6d2,
+ 0x0823c4d,0x2a9709a,0x2ead5a8,0x00de4b6 },
+ { 0x01afa0b,0x0621965,0x3671528,0x1050b60,0x3f3e9e7,0x2f93829,
+ 0x0825275,0x006e85f,0x35e94b0,0x016af58 } },
+ /* 197 */
+ { { 0x2c4927c,0x3ea1382,0x0f23727,0x0d69f23,0x3e38860,0x2b72837,
+ 0x3cd5ea4,0x2d84292,0x321846a,0x016656f },
+ { 0x29dfa33,0x3e182e0,0x018be90,0x2ba563f,0x2caafe2,0x218c0d9,
+ 0x3baf447,0x1047a6c,0x0a2d483,0x01130cb } },
+ /* 198 */
+ { { 0x00ed80c,0x2a5fc79,0x0a82a74,0x2c4c74b,0x15f938c,0x30b5ab6,
+ 0x32124b7,0x295314f,0x2fb8082,0x007c858 },
+ { 0x20b173e,0x19f315c,0x12f97e4,0x198217c,0x040e8a6,0x3275977,
+ 0x2bc20e4,0x01f2633,0x02bc3e9,0x023c750 } },
+ /* 199 */
+ { { 0x3c4058a,0x24be73e,0x16704f5,0x2d8a4bd,0x3b15e14,0x3076315,
+ 0x1cfe37b,0x36fe715,0x343926e,0x02c6603 },
+ { 0x2c76b09,0x0cf824c,0x3f7898c,0x274cec1,0x11df527,0x18eed18,
+ 0x08ead48,0x23915bc,0x19b3744,0x00a0a2b } },
+ /* 200 */
+ { { 0x0cf4ac5,0x1c8b131,0x0afb696,0x0ff7799,0x2f5ac1a,0x022420c,
+ 0x11baa2e,0x2ce4015,0x1275a14,0x0125cfc },
+ { 0x22eac5d,0x360cd4c,0x3568e59,0x3d42f66,0x35e07ee,0x09620e4,
+ 0x36720fa,0x22b1eac,0x2d0db16,0x01b6b23 } },
+ /* 201 */
+ { { 0x1a835ef,0x1516bbb,0x2d51f7b,0x3487443,0x14aa113,0x0dd06c2,
+ 0x1a65e01,0x379300d,0x35920b9,0x012c8fb },
+ { 0x04c7341,0x2eda00f,0x3c37e82,0x1b4fd62,0x0d45770,0x1478fba,
+ 0x127863a,0x26939cd,0x134ddf4,0x01375c5 } },
+ /* 202 */
+ { { 0x1476cd9,0x1119ca5,0x325bbf9,0x0bf8c69,0x0648d07,0x312d9f8,
+ 0x01c8b8f,0x136ec51,0x0002f4a,0x03f4c5c },
+ { 0x195d0e1,0x10ffd22,0x29aa1cb,0x3443bdc,0x276e695,0x05e6260,
+ 0x15f9764,0x3cd9783,0x18c9569,0x0053eb1 } },
+ /* 203 */
+ { { 0x312ae18,0x280197c,0x3fc9ad9,0x303f324,0x251958d,0x29f4a11,
+ 0x2142408,0x3694366,0x25136ab,0x03b5f1d },
+ { 0x1d4abbc,0x1c3c689,0x13ea462,0x3cfc684,0x39b5dd8,0x2d4654b,
+ 0x09b0755,0x27d4f18,0x3f74d2e,0x03fbf2d } },
+ /* 204 */
+ { { 0x2119185,0x2525eae,0x1ba4bd0,0x0c2ab11,0x1d54e8c,0x294845e,
+ 0x2479dea,0x3602d24,0x17e87e0,0x0060069 },
+ { 0x0afffb0,0x34fe37f,0x1240073,0x02eb895,0x06cf33c,0x2d7f7ef,
+ 0x1d763b5,0x04191e0,0x11e1ead,0x027e3f0 } },
+ /* 205 */
+ { { 0x269544c,0x0e85c57,0x3813158,0x19fc12d,0x20eaf85,0x1e2930c,
+ 0x22a8fd2,0x1a6a478,0x09d3d3a,0x02a74e0 },
+ { 0x1a2da3b,0x30b0b16,0x0847936,0x3d86257,0x138ccbc,0x0f5421a,
+ 0x25244e6,0x23bdd79,0x1aee117,0x00c01ae } },
+ /* 206 */
+ { { 0x1eead28,0x07cac32,0x1fbc0bb,0x17627d3,0x17eef63,0x0b3a24e,
+ 0x0757fdb,0x3dd841d,0x3d745f8,0x002ae17 },
+ { 0x25b4549,0x29f24cf,0x2f21ecd,0x1725e48,0x04be2bb,0x10ee010,
+ 0x1a1274b,0x10b0898,0x27511e9,0x02c48b5 } },
+ /* 207 */
+ { { 0x2a5ae7a,0x181ef99,0x0be33be,0x3e9dab7,0x101e703,0x3adb971,
+ 0x1043014,0x2ebb2be,0x1c1097d,0x027d667 },
+ { 0x3f250ed,0x16dc603,0x20dc6d7,0x1d0d268,0x38eb915,0x02c89e8,
+ 0x1605a41,0x12de109,0x0e08a29,0x01f554a } },
+ /* 208 */
+ { { 0x0c26def,0x163d988,0x2d1ef0f,0x3a960ac,0x1025585,0x0738e20,
+ 0x27d79b0,0x05cc3ef,0x201303f,0x00a333a },
+ { 0x1644ba5,0x2af345e,0x30b8d1d,0x3a01bff,0x31fc643,0x1acf85e,
+ 0x0a76fc6,0x04efe98,0x348a1d0,0x03062eb } },
+ /* 209 */
+ { { 0x1c4216d,0x18e3217,0x02ac34e,0x19c8185,0x200c010,0x17d4192,
+ 0x13a1719,0x165af51,0x09db7a9,0x0277be0 },
+ { 0x3ab8d2c,0x2190b99,0x22b641e,0x0cd88de,0x3b42404,0x1310862,
+ 0x106a6d6,0x23395f5,0x0b06880,0x000d5fe } },
+ /* 210 */
+ { { 0x0d2cc88,0x36f9913,0x339d8e9,0x237c2e3,0x0cc61c2,0x34c2832,
+ 0x309874c,0x2621d28,0x2dd1b48,0x0392806 },
+ { 0x17cd8f9,0x07bab3d,0x0c482ed,0x0faf565,0x31b767d,0x2f4bde1,
+ 0x295c717,0x330c29c,0x179ce10,0x0119b5f } },
+ /* 211 */
+ { { 0x1ada2c7,0x0c624a7,0x227d47d,0x30e3e6a,0x14fa0a6,0x0829678,
+ 0x24fd288,0x2b46a43,0x122451e,0x0319ca9 },
+ { 0x186b655,0x01f3217,0x0af1306,0x0efe6b5,0x2f0235d,0x1c45ca9,
+ 0x2086805,0x1d44e66,0x0faf2a6,0x0178f59 } },
+ /* 212 */
+ { { 0x33b4416,0x10431e6,0x2d99aa6,0x217aac9,0x0cd8fcf,0x2d95a9d,
+ 0x3ff74ad,0x10bf17a,0x295eb8e,0x01b229e },
+ { 0x02a63bd,0x182e9ec,0x004710c,0x00e2e3c,0x06b2f23,0x04b642c,
+ 0x2c37383,0x32a4631,0x022ad82,0x00d22b9 } },
+ /* 213 */
+ { { 0x0cda2fb,0x1d198d7,0x26d27f4,0x286381c,0x022acca,0x24ac7c8,
+ 0x2df7824,0x0b4ba16,0x1e0d9ef,0x03041d3 },
+ { 0x29a65b3,0x0f3912b,0x151bfcf,0x2b0175c,0x0fd71e4,0x39aa5e2,
+ 0x311f50c,0x13ff351,0x3dbc9e5,0x03eeb7e } },
+ /* 214 */
+ { { 0x0a99363,0x0fc7348,0x2775171,0x23db3c8,0x2b91565,0x134d66c,
+ 0x0175cd2,0x1bf365a,0x2b48371,0x02dfe5d },
+ { 0x16dbf74,0x2389357,0x2f36575,0x3f5c70e,0x38d23ba,0x090f7f8,
+ 0x3477600,0x3201523,0x32ecafc,0x03d3506 } },
+ /* 215 */
+ { { 0x1abd48d,0x073ca3f,0x38a451f,0x0d8cb01,0x1ce81be,0x05c51ba,
+ 0x0e29741,0x03c41ab,0x0eae016,0x0060209 },
+ { 0x2e58358,0x1da62d9,0x2358038,0x14b39b2,0x1635687,0x39079b1,
+ 0x380e345,0x1b49608,0x23983cf,0x019f97d } },
+ /* 216 */
+ { { 0x34899ef,0x332e373,0x04c0f89,0x3c27aed,0x1949015,0x09663b2,
+ 0x2f9276b,0x07f1951,0x09a04c1,0x027fbde },
+ { 0x3d2a071,0x19fb3d4,0x1b096d3,0x1fe9146,0x3b10e1a,0x0478bbb,
+ 0x2b3fb06,0x1388329,0x181a99c,0x02f2030 } },
+ /* 217 */
+ { { 0x1eb82e6,0x14dbe39,0x3920972,0x31fd5b2,0x21a484f,0x02d7697,
+ 0x0e21715,0x37c431e,0x2629f8c,0x01249c3 },
+ { 0x26b50ad,0x26deefa,0x0ffc1a3,0x30688e2,0x39a0284,0x041c65e,
+ 0x03eb178,0x0bdfd50,0x2f96137,0x034bb94 } },
+ /* 218 */
+ { { 0x0e0362a,0x334a162,0x194dd37,0x29e3e97,0x2442fa8,0x10d2949,
+ 0x3836e5a,0x2dccebf,0x0bee5ab,0x037ed1e },
+ { 0x33eede6,0x3c739d9,0x2f04a91,0x350ad6c,0x3a5390a,0x14c368b,
+ 0x26f7bf5,0x11ce979,0x0b408df,0x0366850 } },
+ /* 219 */
+ { { 0x28ea498,0x0886d5b,0x2e090e0,0x0a4d58f,0x2623478,0x0d74ab7,
+ 0x2b83913,0x12c6b81,0x18d623f,0x01d8301 },
+ { 0x198aa79,0x26d6330,0x3a7f0b8,0x34bc1ea,0x2f74890,0x378955a,
+ 0x204110f,0x0102538,0x02d8f19,0x01c5066 } },
+ /* 220 */
+ { { 0x14b0f45,0x2838cd3,0x14e16f0,0x0e0e4aa,0x2d9280b,0x0f18757,
+ 0x3324c6b,0x1391ceb,0x1ce89d5,0x00ebe74 },
+ { 0x0930371,0x3de6048,0x3097fd8,0x1308705,0x3eda266,0x3108c26,
+ 0x1545dcd,0x1f7583a,0x1c37395,0x02c7e05 } },
+ /* 221 */
+ { { 0x1fec44a,0x2a9e3a2,0x0caf84f,0x11cf2a9,0x0c8c2ae,0x06da989,
+ 0x1c807dc,0x3c149a4,0x1141543,0x02906bb },
+ { 0x15ffe04,0x0d4e65f,0x2e20424,0x37d896d,0x18bacb2,0x1e05ddd,
+ 0x1660be8,0x183be17,0x1dd86fb,0x035ba70 } },
+ /* 222 */
+ { { 0x2853264,0x0ba5fb1,0x0a0b3aa,0x2df88c1,0x2771533,0x23aba6f,
+ 0x112bb7b,0x3e3086e,0x210ae9b,0x027271b },
+ { 0x030b74c,0x0269678,0x1e90a23,0x135a98c,0x24ed749,0x126de7c,
+ 0x344b23a,0x186da27,0x19640fa,0x0159af5 } },
+ /* 223 */
+ { { 0x18061f3,0x3004630,0x3c70066,0x34df20f,0x1190b25,0x1c9cc91,
+ 0x1fc8e02,0x0d17bc1,0x390f525,0x033cb1c },
+ { 0x0eb30cf,0x2f3ad04,0x303aa09,0x2e835dd,0x1cfd2eb,0x143fc95,
+ 0x02c43a1,0x025e7a1,0x3558aa2,0x000bd45 } },
+ /* 224 */
+ { { 0x1db7d07,0x3bde52b,0x1500396,0x1089115,0x20b4fc7,0x1e2a8f3,
+ 0x3f8eacc,0x365f7eb,0x1a5e8d4,0x0053a6b },
+ { 0x37079e2,0x120284b,0x000edaa,0x33792c2,0x145baa3,0x20e055f,
+ 0x365e2d7,0x26ba005,0x3ab8e9d,0x0282b53 } },
+ /* 225 */
+ { { 0x2653618,0x2dd8852,0x2a5f0bf,0x0f0c7aa,0x2187281,0x1252757,
+ 0x13e7374,0x3b47855,0x0b86e56,0x02f354c },
+ { 0x2e9c47b,0x2fa14cc,0x19ab169,0x3fad401,0x0dc2776,0x24afeed,
+ 0x3a97611,0x0d07736,0x3cf6979,0x02424a0 } },
+ /* 226 */
+ { { 0x2e81a13,0x000c91d,0x123967b,0x265885c,0x29bee1a,0x0cb8675,
+ 0x2d361bd,0x1526823,0x3c9ace1,0x00d7bad },
+ { 0x24e5bdc,0x02b969f,0x2c6e128,0x34edb3b,0x12dcd2c,0x3899af0,
+ 0x24224c6,0x3a1914b,0x0f4448a,0x026a2cb } },
+ /* 227 */
+ { { 0x1d03b59,0x1c6fc82,0x32abf64,0x28ed96b,0x1c90e62,0x2f57bb2,
+ 0x3ff168e,0x04de7fd,0x0f4d449,0x01af6d8 },
+ { 0x255bc30,0x2bfaf22,0x3fe0dad,0x0584025,0x1c79ead,0x3078ef7,
+ 0x2197414,0x022a50b,0x0fd94ba,0x0007b0f } },
+ /* 228 */
+ { { 0x09485c2,0x09dfaf7,0x10c7ba6,0x1e48bec,0x248cc9a,0x028a362,
+ 0x21d60f7,0x193d93d,0x1c04754,0x0346b2c },
+ { 0x2f36612,0x240ac49,0x0d8bd26,0x13b8186,0x259c3a4,0x020d5fb,
+ 0x38a8133,0x09b0937,0x39d4056,0x01f7341 } },
+ /* 229 */
+ { { 0x05a4b48,0x1f534fc,0x07725ce,0x148dc8c,0x2adcd29,0x04aa456,
+ 0x0f79718,0x066e346,0x189377d,0x002fd4d },
+ { 0x068ea73,0x336569b,0x184d35e,0x32a08e9,0x3c7f3bb,0x11ce9c8,
+ 0x3674c6f,0x21bf27e,0x0d9e166,0x034a2f9 } },
+ /* 230 */
+ { { 0x0fa8e4b,0x2e6418e,0x18fc5d2,0x1ba24ff,0x0559f18,0x0dbedbf,
+ 0x2de2aa4,0x22338e9,0x3aa510f,0x035d801 },
+ { 0x23a4988,0x02aad94,0x02732d1,0x111d374,0x0b455cf,0x0d01c9e,
+ 0x067082a,0x2ec05fd,0x368b303,0x03cad4b } },
+ /* 231 */
+ { { 0x035b4ca,0x1fabea6,0x1cbc0d5,0x3f2ed9a,0x02d2232,0x1990c66,
+ 0x2eb680c,0x3b4ea3b,0x18ecc5a,0x03636fa },
+ { 0x1a02709,0x26f8ff1,0x1fa8cba,0x397d6e8,0x230be68,0x043aa14,
+ 0x3d43cdf,0x25c17fa,0x3a3ee55,0x0380564 } },
+ /* 232 */
+ { { 0x275a0a6,0x16bd43a,0x0033d3e,0x2b15e16,0x2512226,0x005d901,
+ 0x26d50fd,0x3bc19bf,0x3b1aeb8,0x02bfb01 },
+ { 0x0bb0a31,0x26559e0,0x1aae7fb,0x330dcc2,0x16f1af3,0x06afce2,
+ 0x13a15a0,0x2ff7645,0x3546e2d,0x029c6e4 } },
+ /* 233 */
+ { { 0x0f593d2,0x384b806,0x122bbf8,0x0a281e0,0x1d1a904,0x2e93cab,
+ 0x0505db0,0x08f6454,0x05c6285,0x014e880 },
+ { 0x3f2b935,0x22d8e79,0x161a07c,0x16b060a,0x02bff97,0x146328b,
+ 0x3ceea77,0x238f61a,0x19b3d58,0x02fd1f4 } },
+ /* 234 */
+ { { 0x17665d5,0x259e9f7,0x0de5672,0x15cbcbd,0x34e3030,0x035240f,
+ 0x0005ae8,0x286d851,0x07f39c9,0x000070b },
+ { 0x1efc6d6,0x2a0051a,0x2724143,0x2a9ef1e,0x0c810bd,0x1e05429,
+ 0x25670ba,0x2e66d7d,0x0e786ff,0x03f6b7e } },
+ /* 235 */
+ { { 0x3c00785,0x232e23f,0x2b67fd3,0x244ed23,0x077fa75,0x3cda3ef,
+ 0x14d055b,0x0f25011,0x24d5aa4,0x00ea0e3 },
+ { 0x297bb9a,0x198ca4f,0x14d9561,0x18d1076,0x39eb933,0x2b6caa0,
+ 0x1591a60,0x0768d45,0x257873e,0x00f36e0 } },
+ /* 236 */
+ { { 0x1e77eab,0x0502a5f,0x0109137,0x0350592,0x3f7e1c5,0x3ac7437,
+ 0x2dcad2c,0x1fee9d8,0x089f1f5,0x0169833 },
+ { 0x0d45673,0x0d8e090,0x065580b,0x065644f,0x11b82be,0x3592dd0,
+ 0x3284b8d,0x23f0015,0x16fdbfd,0x0248bfd } },
+ /* 237 */
+ { { 0x1a129a1,0x1977bb2,0x0e041b2,0x15f30a1,0x0a5b1ce,0x3afef8f,
+ 0x380c46c,0x3358810,0x27df6c5,0x01ca466 },
+ { 0x3b90f9a,0x3d14ea3,0x031b298,0x02e2390,0x2d719c0,0x25bc615,
+ 0x2c0e777,0x0226b8c,0x3803624,0x0179e45 } },
+ /* 238 */
+ { { 0x363cdfb,0x1bb155f,0x24fd5c1,0x1c7c72b,0x28e6a35,0x18165f2,
+ 0x226bea5,0x0beaff3,0x371e24c,0x0138294 },
+ { 0x1765357,0x29034e9,0x22b4276,0x11035ce,0x23c89af,0x074468c,
+ 0x3370ae4,0x013bae3,0x018d566,0x03d7fde } },
+ /* 239 */
+ { { 0x209df21,0x0f8ff86,0x0e47fbf,0x23b99ba,0x126d5d2,0x2722405,
+ 0x16bd0a2,0x1799082,0x0e9533f,0x039077c },
+ { 0x3ba9e3f,0x3f6902c,0x1895305,0x3ac9813,0x3f2340c,0x3c0d9f1,
+ 0x26e1927,0x0557c21,0x16eac4f,0x023b75f } },
+ /* 240 */
+ { { 0x3fc8ff3,0x0770382,0x342fc9a,0x0afa4db,0x314efd8,0x328e07b,
+ 0x016f7cc,0x3ba599c,0x1caed8a,0x0050cb0 },
+ { 0x0b23c26,0x2120a5c,0x3273ec6,0x1cc1cd6,0x2a64fe8,0x2bbc3d6,
+ 0x09f6e5e,0x34b1b8e,0x00b5ac8,0x032bbd2 } },
+ /* 241 */
+ { { 0x1315922,0x1725e1d,0x0ca5524,0x1c4c18f,0x3d82951,0x193bcb2,
+ 0x0e60d0b,0x388dbcf,0x37e8efa,0x0342e85 },
+ { 0x1b3af60,0x26ba3ec,0x220e53a,0x394f4b6,0x01a796a,0x3e7bbca,
+ 0x163605d,0x2b85807,0x17c1c54,0x03cc725 } },
+ /* 242 */
+ { { 0x1cc4597,0x1635492,0x2028c0f,0x2c2eb82,0x2dc5015,0x0d2a052,
+ 0x05fc557,0x1f0ebbf,0x0cb96e1,0x0004d01 },
+ { 0x1a824bf,0x3896172,0x2ed7b29,0x178007a,0x0d59318,0x07bda2b,
+ 0x2ee6826,0x0f9b235,0x04b9193,0x01bcddf } },
+ /* 243 */
+ { { 0x0333fd2,0x0eeb46a,0x15b89f9,0x00968aa,0x2a89302,0x2bdd6b3,
+ 0x1e5037e,0x2541884,0x24ed2d0,0x01b6e8f },
+ { 0x04399cd,0x3be6334,0x3adea48,0x1bb9adc,0x31811c6,0x05fb2bc,
+ 0x360752c,0x3d29dcb,0x3423bec,0x03c4f3c } },
+ /* 244 */
+ { { 0x119e2eb,0x2e7b02a,0x0f68cee,0x257d8b0,0x183a9a1,0x2ae88a6,
+ 0x3a3bb67,0x2eb4f3e,0x1a9274b,0x0320fea },
+ { 0x2fa1ce0,0x346c2d8,0x2fbf0d7,0x3d4d063,0x0e58b60,0x09c1bc1,
+ 0x28ef9e5,0x09a0efe,0x0f45d70,0x02d275c } },
+ /* 245 */
+ { { 0x2d5513b,0x31d443e,0x1e2d914,0x3b2c5d4,0x105f32e,0x27ee756,
+ 0x050418d,0x3c73db6,0x1bb0c30,0x01673eb },
+ { 0x1cb7fd6,0x1eb08d5,0x26a3e16,0x2e20810,0x0249367,0x029e219,
+ 0x2ec58c9,0x12d9fab,0x362354a,0x016eafc } },
+ /* 246 */
+ { { 0x2424865,0x260747b,0x177f37c,0x1e3cb95,0x08b0028,0x2783016,
+ 0x2970f1b,0x323c1c0,0x2a79026,0x0186231 },
+ { 0x0f244da,0x26866f4,0x087306f,0x173ec20,0x31ecced,0x3c84d8d,
+ 0x070f9b9,0x2e764d5,0x075df50,0x0264ff9 } },
+ /* 247 */
+ { { 0x32c3609,0x0c737e6,0x14ea68e,0x300b11b,0x184eb19,0x29dd440,
+ 0x09ec1a9,0x185adeb,0x0664c80,0x0207dd9 },
+ { 0x1fbe978,0x30a969d,0x33561d7,0x34fc60e,0x36743fe,0x00774af,
+ 0x0d1f045,0x018360e,0x12a5fe9,0x01592a0 } },
+ /* 248 */
+ { { 0x2817d1d,0x2993d3e,0x2e0f7a5,0x112faa0,0x255f968,0x355fe6a,
+ 0x3f5a0fc,0x075b2d7,0x3cf00e5,0x0089afc },
+ { 0x32833cf,0x06a7e4b,0x09a8d6d,0x1693d3e,0x320a0a3,0x3cfdfdd,
+ 0x136c498,0x1e0d845,0x347ff25,0x01a1de7 } },
+ /* 249 */
+ { { 0x3043d08,0x030705c,0x20fa79b,0x1d07f00,0x0a54467,0x29b49b4,
+ 0x367e289,0x0b82f4d,0x0d1eb09,0x025ef2c },
+ { 0x32ed3c3,0x1baaa3c,0x3c482ab,0x146ca06,0x3c8a4f1,0x3e85e3c,
+ 0x1bf4f3b,0x1195534,0x3e80a78,0x02a1cbf } },
+ /* 250 */
+ { { 0x32b2086,0x2de4d68,0x3486b1a,0x03a0583,0x2e1eb71,0x2dab9af,
+ 0x10cd913,0x28daa6f,0x3fcb732,0x000a04a },
+ { 0x3605318,0x3f5f2b3,0x2d1da63,0x143f7f5,0x1646e5d,0x040b586,
+ 0x1683982,0x25abe87,0x0c9fe53,0x001ce47 } },
+ /* 251 */
+ { { 0x380d02b,0x055fc22,0x3f7fc50,0x3458a1d,0x26b8333,0x23550ab,
+ 0x0a1af87,0x0a821eb,0x2dc7e6d,0x00d574a },
+ { 0x07386e1,0x3ccd68a,0x3275b41,0x253e390,0x2fd272a,0x1e6627a,
+ 0x2ca2cde,0x0e9e4a1,0x1e37c2a,0x00f70ac } },
+ /* 252 */
+ { { 0x0581352,0x2748701,0x02bed68,0x094dd9e,0x30a00c8,0x3fb5c07,
+ 0x3bd5909,0x211ac80,0x1103ccd,0x0311e1a },
+ { 0x0c768ed,0x29dc209,0x36575db,0x009a107,0x272feea,0x2b33383,
+ 0x313ed56,0x134c9cc,0x168d5bb,0x033310a } },
+ /* 253 */
+ { { 0x17620b9,0x143784f,0x256a94e,0x229664a,0x1d89a5c,0x1d521f2,
+ 0x0076406,0x1c73f70,0x342aa48,0x03851fa },
+ { 0x0f3ae46,0x2ad3bab,0x0fbe274,0x3ed40d4,0x2fd4936,0x232103a,
+ 0x2afe474,0x25b8f7c,0x047080e,0x008e6b0 } },
+ /* 254 */
+ { { 0x3fee8d4,0x347cd4a,0x0fec481,0x33fe9ec,0x0ce80b5,0x33a6bcf,
+ 0x1c4c9e2,0x3967441,0x1a3f5f7,0x03157e8 },
+ { 0x257c227,0x1bc53a0,0x200b318,0x0fcd0af,0x2c5b165,0x2a413ec,
+ 0x2fc998a,0x2da6426,0x19cd4f4,0x0025336 } },
+ /* 255 */
+ { { 0x303beba,0x2072135,0x32918a9,0x140cb3a,0x08631d1,0x0ef527b,
+ 0x05f2c9e,0x2b4ce91,0x0b642ab,0x02e428c },
+ { 0x0a5abf9,0x15013ed,0x3603b46,0x30dd76d,0x3004750,0x28d7627,
+ 0x1a42ccc,0x093ddbe,0x39a1b79,0x00067e2 } },
+};
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine co-ordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_10(sp_point* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_256_ecc_mulmod_stripe_10(r, &p256_base, p256_table,
+ k, map, heap);
+}
+
+#endif
+
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* Multiply a by scalar b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A scalar.
+ */
+SP_NOINLINE static void sp_256_mul_d_10(sp_digit* r, const sp_digit* a,
+ sp_digit b)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int64_t tb = b;
+ int64_t t = 0;
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ t += tb * a[i];
+ r[i] = t & 0x3ffffff;
+ t >>= 26;
+ }
+ r[10] = (sp_digit)t;
+#else
+ int64_t tb = b;
+ int64_t t[10];
+
+ t[ 0] = Q6_P_mpy_RR(tb, a[0]);
+ t[ 1] = Q6_P_mpy_RR(tb, a[1]);
+ t[ 2] = Q6_P_mpy_RR(tb, a[2]);
+ t[ 3] = Q6_P_mpy_RR(tb, a[3]);
+ t[ 4] = Q6_P_mpy_RR(tb, a[4]);
+ t[ 5] = Q6_P_mpy_RR(tb, a[5]);
+ t[ 6] = Q6_P_mpy_RR(tb, a[6]);
+ t[ 7] = Q6_P_mpy_RR(tb, a[7]);
+ t[ 8] = Q6_P_mpy_RR(tb, a[8]);
+ t[ 9] = Q6_P_mpy_RR(tb, a[9]);
+ r[ 0] = Q6_R_and_RR(t[ 0], 0x3ffffff);
+ r[ 1] = (sp_digit)(t[ 0] >> 26) + Q6_R_and_RR(t[ 1], 0x3ffffff);
+ r[ 2] = (sp_digit)(t[ 1] >> 26) + Q6_R_and_RR(t[ 2], 0x3ffffff);
+ r[ 3] = (sp_digit)(t[ 2] >> 26) + Q6_R_and_RR(t[ 3], 0x3ffffff);
+ r[ 4] = (sp_digit)(t[ 3] >> 26) + Q6_R_and_RR(t[ 4], 0x3ffffff);
+ r[ 5] = (sp_digit)(t[ 4] >> 26) + Q6_R_and_RR(t[ 5], 0x3ffffff);
+ r[ 6] = (sp_digit)(t[ 5] >> 26) + Q6_R_and_RR(t[ 6], 0x3ffffff);
+ r[ 7] = (sp_digit)(t[ 6] >> 26) + Q6_R_and_RR(t[ 7], 0x3ffffff);
+ r[ 8] = (sp_digit)(t[ 7] >> 26) + Q6_R_and_RR(t[ 8], 0x3ffffff);
+ r[ 9] = (sp_digit)(t[ 8] >> 26) + Q6_R_and_RR(t[ 9], 0x3ffffff);
+ r[10] = (sp_digit)(t[ 9] >> 26);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef WOLFSSL_SP_DIV_32
+static WC_INLINE sp_digit sp_256_div_word_10(sp_digit d1, sp_digit d0,
+ sp_digit dv)
+{
+ sp_digit d, r, t, dv;
+ int64_t t0, t1;
+
+ /* dv has 14 bits. */
+ dv = (div >> 12) + 1;
+ /* All 26 bits from d1 and top 5 bits from d0. */
+ d = (d1 << 5) | (d0 >> 21);
+ r = d / dv;
+ d -= r * dv;
+ /* Up to 17 bits in r */
+ /* Next 9 bits from d0. */
+ d <<= 9;
+ r <<= 9;
+ d |= (d0 >> 12) & ((1 << 9) - 1);
+ t = d / dv;
+ d -= t * dv;
+ r += t;
+ /* Up to 26 bits in r */
+
+ /* Handle rounding error with dv - top part */
+ t0 = ((int64_t)d1 << 26) + d0;
+ t1 = (int64_t)r * dv;
+ t1 = t0 - t1;
+ t = (sp_digit)(t1 >> 12) / dv;
+ r += t;
+
+ /* Handle rounding error with dv - bottom 32 bits */
+ t1 = (sp_digit)t0 - (r * dv);
+ t = (sp_digit)t1 / dv;
+ r += t;
+
+ return r;
+}
+#endif /* WOLFSSL_SP_DIV_32 */
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Number to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_256_div_10(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ int i;
+#ifndef WOLFSSL_SP_DIV_32
+ int64_t d1;
+#endif
+ sp_digit dv, r1;
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ sp_digit* td;
+#else
+ sp_digit t1d[20], t2d[10 + 1];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 10 + 1), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ t1 = td;
+ t2 = td + 2 * 10;
+#else
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ dv = d[9];
+ XMEMCPY(t1, a, sizeof(*t1) * 2U * 10U);
+ for (i=9; i>=0; i--) {
+ t1[10 + i] += t1[10 + i - 1] >> 26;
+ t1[10 + i - 1] = Q6_R_and_RR(t1[10 + i - 1], 0x3ffffff);
+#ifndef WOLFSSL_SP_DIV_32
+ d1 = t1[10 + i];
+ d1 <<= 26;
+ d1 += t1[10 + i - 1];
+ r1 = (sp_digit)(d1 / dv);
+#else
+ r1 = sp_256_div_word_10(t1[10 + i], t1[10 + i - 1], dv);
+#endif
+
+ sp_256_mul_d_10(t2, d, r1);
+ (void)sp_256_sub_10(&t1[i], &t1[i], t2);
+ t1[10 + i] -= t2[10];
+ t1[10 + i] += t1[10 + i - 1] >> 26;
+ t1[10 + i - 1] = Q6_R_and_RR(t1[10 + i - 1], 0x3ffffff);
+ r1 = (((-t1[10 + i]) << 26) - t1[10 + i - 1]) / dv;
+ r1++;
+ sp_256_mul_d_10(t2, d, r1);
+ (void)sp_256_add_10(&t1[i], &t1[i], t2);
+ t1[10 + i] += t1[10 + i - 1] >> 26;
+ t1[10 + i - 1] = Q6_R_and_RR(t1[10 + i - 1], 0x3ffffff);
+ }
+ t1[10 - 1] += t1[10 - 2] >> 26;
+ t1[10 - 2] &= 0x3ffffff;
+ d1 = t1[10 - 1];
+ r1 = (sp_digit)(d1 / dv);
+
+ sp_256_mul_d_10(t2, d, r1);
+ (void)sp_256_sub_10(t1, t1, t2);
+ XMEMCPY(r, t1, sizeof(*r) * 2U * 10U);
+ for (i=0; i<8; i++) {
+ r[i+1] += r[i] >> 26;
+ r[i] &= 0x3ffffff;
+ }
+ sp_256_cond_add_10(r, r, d, 0 - ((r[9] < 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ if (td != NULL) {
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif
+
+ return err;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
+ */
+static int sp_256_mod_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_256_div_10(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P256 curve. */
+static const uint32_t p256_order_2[8] = {
+ 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU,0xffffffffU,0xffffffffU,
+ 0x00000000U,0xffffffffU
+};
+#else
+/* The low half of the order-2 of the P256 curve. */
+static const uint32_t p256_order_low[4] = {
+ 0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
+ *
+ * r Result of the multiplication.
+ * a First operand of the multiplication.
+ * b Second operand of the multiplication.
+ */
+static void sp_256_mont_mul_order_10(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_256_mul_10(r, a, b);
+ sp_256_mont_reduce_order_10(r, p256_order, p256_mp_order);
+}
+
+/* Square number mod the order of P256 curve. (r = a * a mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_256_mont_sqr_order_10(sp_digit* r, const sp_digit* a)
+{
+ sp_256_sqr_10(r, a);
+ sp_256_mont_reduce_order_10(r, p256_order, p256_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P256 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_256_mont_sqr_n_order_10(sp_digit* r, const sp_digit* a, int n)
+{
+ int i;
+
+ sp_256_mont_sqr_order_10(r, a);
+ for (i=1; i<n; i++) {
+ sp_256_mont_sqr_order_10(r, r);
+ }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
+ * (r = 1 / a mod order)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_256_mont_inv_order_10(sp_digit* r, const sp_digit* a,
+ sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 10);
+ for (i=254; i>=0; i--) {
+ sp_256_mont_sqr_order_10(t, t);
+ if ((p256_order_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_10(t, t, a);
+ }
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 10U);
+#else
+ sp_digit* t = td;
+ sp_digit* t2 = td + 2 * 10;
+ sp_digit* t3 = td + 4 * 10;
+ int i;
+
+
+ /* t = a^2 */
+ sp_256_mont_sqr_order_10(t, a);
+ /* t = a^3 = t * a */
+ sp_256_mont_mul_order_10(t, t, a);
+ /* t2= a^c = t ^ 2 ^ 2 */
+ sp_256_mont_sqr_n_order_10(t2, t, 2);
+ /* t3= a^f = t2 * t */
+ sp_256_mont_mul_order_10(t3, t2, t);
+ /* t2= a^f0 = t3 ^ 2 ^ 4 */
+ sp_256_mont_sqr_n_order_10(t2, t3, 4);
+ /* t = a^ff = t2 * t3 */
+ sp_256_mont_mul_order_10(t, t2, t3);
+ /* t3= a^ff00 = t ^ 2 ^ 8 */
+ sp_256_mont_sqr_n_order_10(t2, t, 8);
+ /* t = a^ffff = t2 * t */
+ sp_256_mont_mul_order_10(t, t2, t);
+ /* t2= a^ffff0000 = t ^ 2 ^ 16 */
+ sp_256_mont_sqr_n_order_10(t2, t, 16);
+ /* t = a^ffffffff = t2 * t */
+ sp_256_mont_mul_order_10(t, t2, t);
+ /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */
+ sp_256_mont_sqr_n_order_10(t2, t, 64);
+ /* t2= a^ffffffff00000000ffffffff = t2 * t */
+ sp_256_mont_mul_order_10(t2, t2, t);
+ /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */
+ sp_256_mont_sqr_n_order_10(t2, t2, 32);
+ /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
+ sp_256_mont_mul_order_10(t2, t2, t);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
+
+ for (i=127; i>=112; i--) {
+ sp_256_mont_sqr_order_10(t2, t2);
+ if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_10(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
+ sp_256_mont_sqr_n_order_10(t2, t2, 4);
+ sp_256_mont_mul_order_10(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
+ for (i=107; i>=64; i--) {
+ sp_256_mont_sqr_order_10(t2, t2);
+ if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_10(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
+ sp_256_mont_sqr_n_order_10(t2, t2, 4);
+ sp_256_mont_mul_order_10(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
+ for (i=59; i>=32; i--) {
+ sp_256_mont_sqr_order_10(t2, t2);
+ if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_10(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
+ sp_256_mont_sqr_n_order_10(t2, t2, 4);
+ sp_256_mont_mul_order_10(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
+ for (i=27; i>=0; i--) {
+ sp_256_mont_sqr_order_10(t2, t2);
+ if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
+ sp_256_mont_mul_order_10(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
+ sp_256_mont_sqr_n_order_10(t2, t2, 4);
+ /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
+ sp_256_mont_mul_order_10(r, t2, t3);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_VERIFY
+
+
+/* Verify the signature values with the hash and public key.
+ * e = Truncate(hash, 256)
+ * u1 = e/s mod order
+ * u2 = r/s mod order
+ * r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int wolfSSL_DSP_ECC_Verify_256(remote_handle64 h, int32 *u1, int hashLen, int32* r, int rSz, int32* s, int sSz,
+ int32* x, int xSz, int32* y, int ySz, int32* z, int zSz, int* res)
+{
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ sp_digit* d = NULL;
+#else
+ sp_digit u2d[2*10] __attribute__((aligned(128)));
+ sp_digit tmpd[2*10 * 5] __attribute__((aligned(128)));
+ sp_point p1d;
+ sp_point p2d;
+#endif
+ sp_digit* u2 = NULL;
+ sp_digit* tmp = NULL;
+ sp_point* p1;
+ sp_point* p2 = NULL;
+ sp_digit carry;
+ int32_t c;
+ int err;
+ void* heap = NULL;
+
+ (void)h;
+ (void)hashLen;
+
+ err = sp_ecc_point_new(heap, p1d, p1);
+ if (err == MP_OKAY) {
+ err = sp_ecc_point_new(heap, p2d, p2);
+ }
+
+ if (err == MP_OKAY) {
+ u2 = u2d;
+ tmp = tmpd;
+
+ XMEMCPY(u2, r, 40);
+ XMEMCPY(p2->x, x, 40);
+ XMEMCPY(p2->y, y, 40);
+ XMEMCPY(p2->z, z, 40);
+
+ sp_256_mul_10(s, s, p256_norm_order);
+ err = sp_256_mod_10(s, s, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_10(s);
+ {
+
+ sp_256_mont_inv_order_10(s, s, tmp);
+ sp_256_mont_mul_order_10(u1, u1, s);
+ sp_256_mont_mul_order_10(u2, u2, s);
+ }
+
+ err = sp_256_ecc_mulmod_base_10(p1, u1, 0, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_mulmod_10(p2, p2, u2, 0, heap);
+ }
+
+ if (err == MP_OKAY) {
+ sp_256_proj_point_add_10(p1, p1, p2, tmp);
+
+ /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+ /* Reload r and convert to Montgomery form. */
+ XMEMCPY(u2, r, 40);
+ err = sp_256_mod_mul_norm_10(u2, u2, p256_mod);
+ }
+
+ if (err == MP_OKAY) {
+ /* u1 = r.z'.z' mod prime */
+ sp_256_mont_sqr_10(p1->z, p1->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(u1, u2, p1->z, p256_mod, p256_mp_mod);
+ *res = (int)(sp_256_cmp_10(p1->x, u1) == 0);
+ if (*res == 0) {
+ /* Reload r and add order. */
+ XMEMCPY(u2, r, 40);
+ carry = sp_256_add_10(u2, u2, p256_order);
+ /* Carry means result is greater than mod and is not valid. */
+ if (carry == 0) {
+ sp_256_norm_10(u2);
+
+ /* Compare with mod and if greater or equal then not valid. */
+ c = sp_256_cmp_10(u2, p256_mod);
+ if (c < 0) {
+ /* Convert to Montogomery form */
+ err = sp_256_mod_mul_norm_10(u2, u2, p256_mod);
+ if (err == MP_OKAY) {
+ /* u1 = (r + 1*order).z'.z' mod prime */
+ sp_256_mont_mul_10(u1, u2, p1->z, p256_mod,
+ p256_mp_mod);
+ *res = (int)(sp_256_cmp_10(p1->x, u2) == 0);
+ }
+ }
+ }
+ }
+ }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ if (d != NULL)
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_ecc_point_free(p1, 0, heap);
+ sp_ecc_point_free(p2, 0, heap);
+
+ return err;
+}
+
+/** Free the Fixed Point cache */
+void wc_ecc_fp_free(void)
+{
+}
+
+
+AEEResult wolfSSL_open(const char *uri, remote_handle64 *handle)
+{
+ void *tptr;
+ /* can be any value or ignored, rpc layer doesn't care
+ * also ok
+ * *handle = 0;
+ * *handle = 0xdeadc0de;
+ */
+ tptr = (void *)malloc(1);
+ *handle = (remote_handle64)tptr;
+ return 0;
+}
+
+AEEResult wolfSSL_close(remote_handle64 handle)
+{
+ if (handle)
+ free((void*)handle);
+ return 0;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX First EC point's X ordinate.
+ * pY First EC point's Y ordinate.
+ * pZ First EC point's Z ordinate.
+ * qX Second EC point's X ordinate.
+ * qY Second EC point's Y ordinate.
+ * qZ Second EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* qX, mp_int* qY, mp_int* qZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+ sp_digit tmpd[2 * 10 * 5];
+ sp_point pd;
+ sp_point qd;
+#endif
+ sp_digit* tmp;
+ sp_point* p;
+ sp_point* q = NULL;
+ int err;
+
+ err = sp_ecc_point_new(NULL, pd, p);
+ if (err == MP_OKAY) {
+ err = sp_ecc_point_new(NULL, qd, q);
+ }
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 10, pX);
+ sp_256_from_mp(p->y, 10, pY);
+ sp_256_from_mp(p->z, 10, pZ);
+ sp_256_from_mp(q->x, 10, qX);
+ sp_256_from_mp(q->y, 10, qY);
+ sp_256_from_mp(q->z, 10, qZ);
+
+ sp_256_proj_point_add_10(p, p, q, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, rZ);
+ }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_ecc_point_free(q, 0, NULL);
+ sp_ecc_point_free(p, 0, NULL);
+
+ return err;
+}
+
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+ sp_digit tmpd[2 * 10 * 2];
+ sp_point pd;
+#endif
+ sp_digit* tmp;
+ sp_point* p;
+ int err;
+
+ err = sp_ecc_point_new(NULL, pd, p);
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 2, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 10, pX);
+ sp_256_from_mp(p->y, 10, pY);
+ sp_256_from_mp(p->z, 10, pZ);
+
+ sp_256_proj_point_dbl_10(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, rZ);
+ }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_ecc_point_free(p, 0, NULL);
+
+ return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+ sp_digit tmpd[2 * 10 * 4];
+ sp_point pd;
+#endif
+ sp_digit* tmp;
+ sp_point* p;
+ int err;
+
+ err = sp_ecc_point_new(NULL, pd, p);
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 4, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 10, pX);
+ sp_256_from_mp(p->y, 10, pY);
+ sp_256_from_mp(p->z, 10, pZ);
+
+ sp_256_map_10(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, pX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, pY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, pZ);
+ }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_ecc_point_free(p, 0, NULL);
+
+ return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_256_mont_sqrt_10(sp_digit* y)
+{
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ sp_digit* d;
+#else
+ sp_digit t1d[2 * 10];
+ sp_digit t2d[2 * 10];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ t1 = d + 0 * 10;
+ t2 = d + 2 * 10;
+#else
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ {
+ /* t2 = y ^ 0x2 */
+ sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0x3 */
+ sp_256_mont_mul_10(t1, t2, y, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xc */
+ sp_256_mont_sqr_n_10(t2, t1, 2, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xf */
+ sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xf0 */
+ sp_256_mont_sqr_n_10(t2, t1, 4, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xff */
+ sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xff00 */
+ sp_256_mont_sqr_n_10(t2, t1, 8, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffff */
+ sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xffff0000 */
+ sp_256_mont_sqr_n_10(t2, t1, 16, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff */
+ sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000000 */
+ sp_256_mont_sqr_n_10(t1, t1, 32, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001 */
+ sp_256_mont_mul_10(t1, t1, y, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
+ sp_256_mont_sqr_n_10(t1, t1, 96, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
+ sp_256_mont_mul_10(t1, t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_n_10(y, t1, 94, p256_mod, p256_mp_mod);
+ }
+ }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm X ordinate.
+ * odd Whether the Y ordinate is odd.
+ * ym Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
+{
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ sp_digit* d;
+#else
+ sp_digit xd[2 * 10];
+ sp_digit yd[2 * 10];
+#endif
+ sp_digit* x = NULL;
+ sp_digit* y = NULL;
+ int err = MP_OKAY;
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ x = d + 0 * 10;
+ y = d + 2 * 10;
+#else
+ x = xd;
+ y = yd;
+#endif
+
+ sp_256_from_mp(x, 10, xm);
+ err = sp_256_mod_mul_norm_10(x, x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ /* y = x^3 */
+ {
+ sp_256_mont_sqr_10(y, x, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_10(y, y, x, p256_mod, p256_mp_mod);
+ }
+ /* y = x^3 - 3x */
+ sp_256_mont_sub_10(y, y, x, p256_mod);
+ sp_256_mont_sub_10(y, y, x, p256_mod);
+ sp_256_mont_sub_10(y, y, x, p256_mod);
+ /* y = x^3 - 3x + b */
+ err = sp_256_mod_mul_norm_10(x, p256_b, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ sp_256_mont_add_10(y, y, x, p256_mod);
+ /* y = sqrt(x^3 - 3x + b) */
+ err = sp_256_mont_sqrt_10(y);
+ }
+ if (err == MP_OKAY) {
+ XMEMSET(y + 10, 0, 10U * sizeof(sp_digit));
+ sp_256_mont_reduce_10(y, p256_mod, p256_mp_mod);
+ if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+ sp_256_mont_sub_10(y, p256_mod, y, p256_mod);
+ }
+
+ err = sp_256_to_mp(y, ym);
+ }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+#endif
+#endif /* !WOLFSSL_SP_NO_256 */
+#endif /* WOLFSSL_HAVE_SP_ECC */
+#endif /* WOLFSSL_DSP */
+#endif /* WOLFSSL_HAVE_SP_ECC */
+
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_int.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_int.c
new file mode 100644
index 000000000..0db891b98
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_int.c
@@ -0,0 +1,2203 @@
+/* sp_int.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Implementation by Sean Parkinson. */
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+/* SP Build Options:
+ * WOLFSSL_HAVE_SP_RSA: Enable SP RSA support
+ * WOLFSSL_HAVE_SP_DH: Enable SP DH support
+ * WOLFSSL_HAVE_SP_ECC: Enable SP ECC support
+ * WOLFSSL_SP_MATH: Use only single precision math and algorithms it supports (no fastmath tfm.c or normal integer.c)
+ * WOLFSSL_SP_SMALL: Use smaller version of code and avoid large stack variables
+ * WOLFSSL_SP_NO_MALLOC: Always use stack, no heap XMALLOC/XFREE allowed
+ * WOLFSSL_SP_NO_3072: Disable RSA/DH 3072-bit support
+ * WOLFSSL_SP_NO_2048: Disable RSA/DH 2048-bit support
+ * WOLFSSL_SP_4096: Enable RSA/RH 4096-bit support
+ * WOLFSSL_SP_384 Enable ECC 384-bit SECP384R1 support
+ * WOLFSSL_SP_NO_256 Disable ECC 256-bit SECP256R1 support
+ * WOLFSSL_SP_CACHE_RESISTANT Enable cache resistantant code
+ * WOLFSSL_SP_ASM Enable assembly speedups (detect platform)
+ * WOLFSSL_SP_X86_64_ASM Enable Intel x86 assembly speedups like AVX/AVX2
+ * WOLFSSL_SP_ARM32_ASM Enable Aarch32 assembly speedups
+ * WOLFSSL_SP_ARM64_ASM Enable Aarch64 assembly speedups
+ * WOLFSSL_SP_ARM_CORTEX_M_ASM Enable Cortex-M assembly speedups
+ * WOLFSSL_SP_ARM_THUMB_ASM Enable ARM Thumb assembly speedups (used with -mthumb)
+ */
+
+#ifdef WOLFSSL_SP_MATH
+
+#include <wolfssl/wolfcrypt/sp_int.h>
+
+#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
+
+WOLFSSL_LOCAL int sp_ModExp_1024(sp_int* base, sp_int* exp, sp_int* mod,
+ sp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_1536(sp_int* base, sp_int* exp, sp_int* mod,
+ sp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_2048(sp_int* base, sp_int* exp, sp_int* mod,
+ sp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_3072(sp_int* base, sp_int* exp, sp_int* mod,
+ sp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_4096(sp_int* base, sp_int* exp, sp_int* mod,
+ sp_int* res);
+
+#endif
+
+int sp_get_digit_count(sp_int *a)
+{
+ int ret;
+ if (!a)
+ ret = 0;
+ else
+ ret = a->used;
+ return ret;
+}
+
+/* Initialize the big number to be zero.
+ *
+ * a SP integer.
+ * returns MP_OKAY always.
+ */
+int sp_init(sp_int* a)
+{
+ a->used = 0;
+ a->size = SP_INT_DIGITS;
+
+ return MP_OKAY;
+}
+
+#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || (!defined(NO_DH) || defined(HAVE_ECC))
+/* Initialize up to six big numbers to be zero.
+ *
+ * a SP integer.
+ * b SP integer.
+ * c SP integer.
+ * d SP integer.
+ * e SP integer.
+ * f SP integer.
+ * returns MP_OKAY always.
+ */
+int sp_init_multi(sp_int* a, sp_int* b, sp_int* c, sp_int* d, sp_int* e,
+ sp_int* f)
+{
+ if (a != NULL) {
+ a->used = 0;
+ a->size = SP_INT_DIGITS;
+ }
+ if (b != NULL) {
+ b->used = 0;
+ b->size = SP_INT_DIGITS;
+ }
+ if (c != NULL) {
+ c->used = 0;
+ c->size = SP_INT_DIGITS;
+ }
+ if (d != NULL) {
+ d->used = 0;
+ d->size = SP_INT_DIGITS;
+ }
+ if (e != NULL) {
+ e->used = 0;
+ e->size = SP_INT_DIGITS;
+ }
+ if (f != NULL) {
+ f->used = 0;
+ f->size = SP_INT_DIGITS;
+ }
+
+ return MP_OKAY;
+}
+#endif
+
+/* Clear the data from the big number and set to zero.
+ *
+ * a SP integer.
+ */
+void sp_clear(sp_int* a)
+{
+ if (a != NULL) {
+ int i;
+
+ for (i=0; i<a->used; i++)
+ a->dp[i] = 0;
+ a->used = 0;
+ }
+}
+
+/* Calculate the number of 8-bit values required to represent the big number.
+ *
+ * a SP integer.
+ * returns the count.
+ */
+int sp_unsigned_bin_size(sp_int* a)
+{
+ int size = sp_count_bits(a);
+ return (size + 7) / 8;
+}
+
+/* Convert a number as an array of bytes in big-endian format to a big number.
+ *
+ * a SP integer.
+ * in Array of bytes.
+ * inSz Number of data bytes in array.
+ * returns BAD_FUNC_ARG when the number is too big to fit in an SP and
+ MP_OKAY otherwise.
+ */
+int sp_read_unsigned_bin(sp_int* a, const byte* in, int inSz)
+{
+ int err = MP_OKAY;
+ int i, j = 0, k;
+
+ if (inSz > SP_INT_DIGITS * (int)sizeof(a->dp[0])) {
+ err = MP_VAL;
+ }
+
+ if (err == MP_OKAY) {
+ for (i = inSz-1; i >= (SP_WORD_SIZE/8); i -= (SP_WORD_SIZE/8), j++) {
+ a->dp[j] = (((sp_int_digit)in[i-0]) << (0*8))
+ | (((sp_int_digit)in[i-1]) << (1*8))
+ | (((sp_int_digit)in[i-2]) << (2*8))
+ | (((sp_int_digit)in[i-3]) << (3*8));
+ #if SP_WORD_SIZE == 64
+ a->dp[j] |= (((sp_int_digit)in[i-4]) << (4*8))
+ | (((sp_int_digit)in[i-5]) << (5*8))
+ | (((sp_int_digit)in[i-6]) << (6*8))
+ | (((sp_int_digit)in[i-7]) << (7*8));
+ #endif
+ }
+ if (i >= 0) {
+ a->dp[j] = 0;
+ for (k = 0; k <= i; k++) {
+ a->dp[j] <<= 8;
+ a->dp[j] |= in[k];
+ }
+ }
+ a->used = j + 1;
+ }
+
+ sp_clamp(a);
+
+ return err;
+}
+
+#ifdef HAVE_ECC
+/* Convert a number as string in big-endian format to a big number.
+ * Only supports base-16 (hexadecimal).
+ * Negative values not supported.
+ *
+ * a SP integer.
+ * in NUL terminated string.
+ * radix Number of values in a digit.
+ * returns BAD_FUNC_ARG when radix not supported or value is negative, MP_VAL
+ * when a character is not valid and MP_OKAY otherwise.
+ */
+int sp_read_radix(sp_int* a, const char* in, int radix)
+{
+ int err = MP_OKAY;
+ int i, j = 0, k = 0;
+ char ch;
+
+ if ((radix != 16) || (*in == '-')) {
+ err = BAD_FUNC_ARG;
+ }
+
+ while (*in == '0') {
+ in++;
+ }
+
+ if (err == MP_OKAY) {
+ a->dp[0] = 0;
+ for (i = (int)(XSTRLEN(in) - 1); i >= 0; i--) {
+ ch = in[i];
+ if (ch >= '0' && ch <= '9')
+ ch -= '0';
+ else if (ch >= 'A' && ch <= 'F')
+ ch -= 'A' - 10;
+ else if (ch >= 'a' && ch <= 'f')
+ ch -= 'a' - 10;
+ else {
+ err = MP_VAL;
+ break;
+ }
+
+ a->dp[k] |= ((sp_int_digit)ch) << j;
+ j += 4;
+ if (k >= SP_INT_DIGITS - 1) {
+ err = MP_VAL;
+ break;
+ }
+ if (j == DIGIT_BIT)
+ a->dp[++k] = 0;
+ j &= SP_WORD_SIZE - 1;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ a->used = k + 1;
+ if (a->dp[k] == 0)
+ a->used--;
+
+ for (k++; k < a->size; k++)
+ a->dp[k] = 0;
+
+ sp_clamp(a);
+ }
+
+ return err;
+}
+#endif
+
+/* Compare two big numbers.
+ *
+ * a SP integer.
+ * b SP integer.
+ * returns MP_GT if a is greater than b, MP_LT if a is less than b and MP_EQ
+ * when a equals b.
+ */
+int sp_cmp(sp_int* a, sp_int* b)
+{
+ int ret = MP_EQ;
+ int i;
+
+ if (a->used > b->used)
+ ret = MP_GT;
+ else if (a->used < b->used)
+ ret = MP_LT;
+ else {
+ for (i = a->used - 1; i >= 0; i--) {
+ if (a->dp[i] > b->dp[i]) {
+ ret = MP_GT;
+ break;
+ }
+ else if (a->dp[i] < b->dp[i]) {
+ ret = MP_LT;
+ break;
+ }
+ }
+ }
+ return ret;
+}
+
+/* Count the number of bits in the big number.
+ *
+ * a SP integer.
+ * returns the number of bits.
+ */
+int sp_count_bits(sp_int* a)
+{
+ int r = 0;
+ sp_int_digit d;
+
+ r = a->used - 1;
+ while (r >= 0 && a->dp[r] == 0)
+ r--;
+ if (r < 0)
+ r = 0;
+ else {
+ d = a->dp[r];
+ r *= SP_WORD_SIZE;
+ if (d >= (1L << (SP_WORD_SIZE / 2))) {
+ r += SP_WORD_SIZE;
+ while ((d & (1UL << (SP_WORD_SIZE - 1))) == 0) {
+ r--;
+ d <<= 1;
+ }
+ }
+ else {
+ while (d != 0) {
+ r++;
+ d >>= 1;
+ }
+ }
+ }
+
+ return r;
+}
+
+/* Determine if the most significant byte of the encoded big number as the top
+ * bit set.
+ *
+ * a SP integer.
+ * returns 1 when the top bit is set and 0 otherwise.
+ */
+int sp_leading_bit(sp_int* a)
+{
+ int bit = 0;
+ sp_int_digit d;
+
+ if (a->used > 0) {
+ d = a->dp[a->used - 1];
+ while (d > (sp_int_digit)0xff)
+ d >>= 8;
+ bit = (int)(d >> 7);
+ }
+
+ return bit;
+}
+
+#if !defined(NO_DH) || defined(HAVE_ECC) || defined(WC_RSA_BLINDING) || \
+ !defined(WOLFSSL_RSA_VERIFY_ONLY)
+/* Convert the big number to an array of bytes in big-endian format.
+ * The array must be large enough for encoded number - use mp_unsigned_bin_size
+ * to calculate the number of bytes required.
+ *
+ * a SP integer.
+ * out Array to put encoding into.
+ * returns MP_OKAY always.
+ */
+int sp_to_unsigned_bin(sp_int* a, byte* out)
+{
+ int i, j, b;
+ sp_int_digit d;
+
+ j = sp_unsigned_bin_size(a) - 1;
+ for (i=0; j>=0; i++) {
+ d = a->dp[i];
+ for (b = 0; b < SP_WORD_SIZE / 8; b++) {
+ out[j] = d;
+ if (--j < 0) {
+ break;
+ }
+ d >>= 8;
+ }
+ }
+
+ return MP_OKAY;
+}
+#endif
+
+/* Convert the big number to an array of bytes in big-endian format.
+ * The array must be large enough for encoded number - use mp_unsigned_bin_size
+ * to calculate the number of bytes required.
+ * Front-pads the output array with zeros make number the size of the array.
+ *
+ * a SP integer.
+ * out Array to put encoding into.
+ * outSz Size of the array.
+ * returns MP_OKAY always.
+ */
+int sp_to_unsigned_bin_len(sp_int* a, byte* out, int outSz)
+{
+ int i, j, b;
+
+ j = outSz - 1;
+ for (i=0; j>=0; i++) {
+ for (b = 0; b < SP_WORD_SIZE; b += 8) {
+ out[j--] = a->dp[i] >> b;
+ if (j < 0)
+ break;
+ }
+ }
+
+ return MP_OKAY;
+}
+
+#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || (!defined(NO_DH) || defined(HAVE_ECC))
+/* Ensure the data in the big number is zeroed.
+ *
+ * a SP integer.
+ */
+void sp_forcezero(sp_int* a)
+{
+ ForceZero(a->dp, a->used * sizeof(sp_int_digit));
+ a->used = 0;
+}
+#endif
+
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || (!defined(NO_DH) || defined(HAVE_ECC))
+/* Copy value of big number a into r.
+ *
+ * a SP integer.
+ * r SP integer.
+ * returns MP_OKAY always.
+ */
+int sp_copy(sp_int* a, sp_int* r)
+{
+ if (a != r) {
+ XMEMCPY(r->dp, a->dp, a->used * sizeof(sp_int_digit));
+ r->used = a->used;
+ }
+ return MP_OKAY;
+}
+
+/* creates "a" then copies b into it */
+int sp_init_copy (sp_int * a, sp_int * b)
+{
+ int err;
+ if ((err = sp_init(a)) == MP_OKAY) {
+ if((err = sp_copy (b, a)) != MP_OKAY) {
+ sp_clear(a);
+ }
+ }
+ return err;
+}
+#endif
+
+/* Set the big number to be the value of the digit.
+ *
+ * a SP integer.
+ * d Digit to be set.
+ * returns MP_OKAY always.
+ */
+int sp_set(sp_int* a, sp_int_digit d)
+{
+ if (d == 0) {
+ a->dp[0] = d;
+ a->used = 0;
+ }
+ else {
+ a->dp[0] = d;
+ a->used = 1;
+ }
+ return MP_OKAY;
+}
+
+/* Recalculate the number of digits used.
+ *
+ * a SP integer.
+ */
+void sp_clamp(sp_int* a)
+{
+ int i;
+
+ for (i = a->used - 1; i >= 0 && a->dp[i] == 0; i--) {
+ }
+ a->used = i + 1;
+}
+
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || (!defined(NO_DH) || defined(HAVE_ECC))
+/* Grow big number to be able to hold l digits.
+ * This function does nothing as the number of digits is fixed.
+ *
+ * a SP integer.
+ * l Number of digits.
+ * returns MP_MEM if the number of digits requested is more than available and
+ * MP_OKAY otherwise.
+ */
+int sp_grow(sp_int* a, int l)
+{
+ int err = MP_OKAY;
+
+ if (l > a->size)
+ err = MP_MEM;
+
+ return err;
+}
+
+/* Sub a one digit number from the big number.
+ *
+ * a SP integer.
+ * d Digit to subtract.
+ * r SP integer - result.
+ * returns MP_OKAY always.
+ */
+int sp_sub_d(sp_int* a, sp_int_digit d, sp_int* r)
+{
+ int i = 0;
+ sp_int_digit t;
+
+ r->used = a->used;
+ t = a->dp[0] - d;
+ if (t > a->dp[0]) {
+ for (++i; i < a->used; i++) {
+ r->dp[i] = a->dp[i] - 1;
+ if (r->dp[i] != (sp_int_digit)-1)
+ break;
+ }
+ }
+ r->dp[0] = t;
+ if (r != a) {
+ for (++i; i < a->used; i++)
+ r->dp[i] = a->dp[i];
+ }
+ sp_clamp(r);
+
+ return MP_OKAY;
+}
+#endif
+
+/* Compare a one digit number with a big number.
+ *
+ * a SP integer.
+ * d Digit to compare with.
+ * returns MP_GT if a is greater than d, MP_LT if a is less than d and MP_EQ
+ * when a equals d.
+ */
+int sp_cmp_d(sp_int *a, sp_int_digit d)
+{
+ int ret = MP_EQ;
+
+ /* special case for zero*/
+ if (a->used == 0) {
+ if (d == 0)
+ ret = MP_EQ;
+ else
+ ret = MP_LT;
+ }
+ else if (a->used > 1)
+ ret = MP_GT;
+ else {
+ /* compare the only digit of a to d */
+ if (a->dp[0] > d)
+ ret = MP_GT;
+ else if (a->dp[0] < d)
+ ret = MP_LT;
+ }
+
+ return ret;
+}
+
+#if !defined(NO_DH) || defined(HAVE_ECC) || !defined(WOLFSSL_RSA_VERIFY_ONLY)
+/* Left shift the number by number of bits.
+ * Bits may be larger than the word size.
+ *
+ * a SP integer.
+ * n Number of bits to shift.
+ * returns MP_OKAY always.
+ */
+static int sp_lshb(sp_int* a, int n)
+{
+ int i;
+
+ if (n >= SP_WORD_SIZE) {
+ sp_lshd(a, n / SP_WORD_SIZE);
+ n %= SP_WORD_SIZE;
+ }
+
+ if (n != 0) {
+ a->dp[a->used] = 0;
+ for (i = a->used - 1; i >= 0; i--) {
+ a->dp[i+1] |= a->dp[i] >> (SP_WORD_SIZE - n);
+ a->dp[i] = a->dp[i] << n;
+ }
+ if (a->dp[a->used] != 0)
+ a->used++;
+ }
+
+ return MP_OKAY;
+}
+
+/* Subtract two large numbers into result: r = a - b
+ * a must be greater than b.
+ *
+ * a SP integer.
+ * b SP integer.
+ * r SP integer.
+ * returns MP_OKAY always.
+ */
+int sp_sub(sp_int* a, sp_int* b, sp_int* r)
+{
+ int i;
+ sp_int_digit c = 0;
+ sp_int_digit t;
+
+ for (i = 0; i < a->used && i < b->used; i++) {
+ t = a->dp[i] - b->dp[i] - c;
+ if (c == 0)
+ c = t > a->dp[i];
+ else
+ c = t >= a->dp[i];
+ r->dp[i] = t;
+ }
+ for (; i < a->used; i++) {
+ r->dp[i] = a->dp[i] - c;
+ c &= (r->dp[i] == (sp_int_digit)-1);
+ }
+ r->used = i;
+ sp_clamp(r);
+
+ return MP_OKAY;
+}
+
+/* Shift a right by n bits into r: r = a >> n
+ *
+ * a SP integer operand.
+ * n Number of bits to shift.
+ * r SP integer result.
+ */
+void sp_rshb(sp_int* a, int n, sp_int* r)
+{
+ int i;
+ int j;
+
+ for (i = n / SP_WORD_SIZE, j = 0; i < a->used-1; i++, j++)
+ r->dp[i] = (a->dp[j] >> n) | (a->dp[j+1] << (SP_WORD_SIZE - n));
+ r->dp[i] = a->dp[j] >> n;
+ r->used = j + 1;
+ sp_clamp(r);
+}
+
+/* Multiply a by digit n and put result into r shifting up o digits.
+ * r = (a * n) << (o * SP_WORD_SIZE)
+ *
+ * a SP integer to be multiplied.
+ * n Number to multiply by.
+ * r SP integer result.
+ * o Number of digits to move result up by.
+ */
+static void _sp_mul_d(sp_int* a, sp_int_digit n, sp_int* r, int o)
+{
+ int i;
+ sp_int_word t = 0;
+
+ for (i = 0; i < o; i++)
+ r->dp[i] = 0;
+
+ for (i = 0; i < a->used; i++) {
+ t += (sp_int_word)n * a->dp[i];
+ r->dp[i + o] = (sp_int_digit)t;
+ t >>= SP_WORD_SIZE;
+ }
+
+ r->dp[i+o] = (sp_int_digit)t;
+ r->used = i+o+1;
+ sp_clamp(r);
+}
+
+/* Divide a by d and return the quotient in r and the remainder in rem.
+ * r = a / d; rem = a % d
+ *
+ * a SP integer to be divided.
+ * d SP integer to divide by.
+ * r SP integer of quotient.
+ * rem SP integer of remainder.
+ * returns MP_VAL when d is 0, MP_MEM when dynamic memory allocation fails and
+ * MP_OKAY otherwise.
+ */
+static int sp_div(sp_int* a, sp_int* d, sp_int* r, sp_int* rem)
+{
+ int err = MP_OKAY;
+ int ret;
+ int done = 0;
+ int i;
+ int s;
+#ifndef WOLFSSL_SP_DIV_32
+ sp_int_word w = 0;
+#endif
+ sp_int_digit dt;
+ sp_int_digit t;
+#ifdef WOLFSSL_SMALL_STACK
+ sp_int* sa = NULL;
+ sp_int* sd;
+ sp_int* tr;
+ sp_int* trial;
+#else
+ sp_int sa[1];
+ sp_int sd[1];
+ sp_int tr[1];
+ sp_int trial[1];
+#endif
+
+ if (sp_iszero(d))
+ err = MP_VAL;
+
+ ret = sp_cmp(a, d);
+ if (ret == MP_LT) {
+ if (rem != NULL) {
+ sp_copy(a, rem);
+ }
+ if (r != NULL) {
+ sp_set(r, 0);
+ }
+ done = 1;
+ }
+ else if (ret == MP_EQ) {
+ if (rem != NULL) {
+ sp_set(rem, 0);
+ }
+ if (r != NULL) {
+ sp_set(r, 1);
+ }
+ done = 1;
+ }
+ else if (sp_count_bits(a) == sp_count_bits(d)) {
+ /* a is greater than d but same bit length */
+ if (rem != NULL) {
+ sp_sub(a, d, rem);
+ }
+ if (r != NULL) {
+ sp_set(r, 1);
+ }
+ done = 1;
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (!done && err == MP_OKAY) {
+ sa = (sp_int*)XMALLOC(sizeof(sp_int) * 4, NULL, DYNAMIC_TYPE_BIGINT);
+ if (sa == NULL) {
+ err = MP_MEM;
+ }
+ }
+#endif
+
+ if (!done && err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ sd = &sa[1];
+ tr = &sa[2];
+ trial = &sa[3];
+#endif
+
+ sp_init(sa);
+ sp_init(sd);
+ sp_init(tr);
+ sp_init(trial);
+
+ s = sp_count_bits(d);
+ s = SP_WORD_SIZE - (s % SP_WORD_SIZE);
+ sp_copy(a, sa);
+ if (s != SP_WORD_SIZE) {
+ sp_lshb(sa, s);
+ sp_copy(d, sd);
+ sp_lshb(sd, s);
+ d = sd;
+ }
+
+ tr->used = sa->used - d->used + 1;
+ sp_clear(tr);
+ tr->used = sa->used - d->used + 1;
+ dt = d->dp[d->used-1];
+#ifndef WOLFSSL_SP_DIV_32
+ for (i = sa->used - 1; i >= d->used; ) {
+ if (sa->dp[i] > dt) {
+ t = (sp_int_digit)-1;
+ }
+ else {
+ w = ((sp_int_word)sa->dp[i] << SP_WORD_SIZE) | sa->dp[i-1];
+ w /= dt;
+ if (w > (sp_int_digit)-1) {
+ t = (sp_int_digit)-1;
+ }
+ else {
+ t = (sp_int_digit)w;
+ }
+ }
+
+ if (t > 0) {
+ _sp_mul_d(d, t, trial, i - d->used);
+ while (sp_cmp(trial, sa) == MP_GT) {
+ t--;
+ _sp_mul_d(d, t, trial, i - d->used);
+ }
+ sp_sub(sa, trial, sa);
+ tr->dp[i - d->used] += t;
+ if (tr->dp[i - d->used] < t)
+ tr->dp[i + 1 - d->used]++;
+ }
+ i = sa->used - 1;
+ }
+#else
+ {
+ sp_int_digit div = (dt >> (SP_WORD_SIZE / 2)) + 1;
+ for (i = sa->used - 1; i >= d->used; ) {
+ t = sa->dp[i] / div;
+ if ((t > 0) && (t << (SP_WORD_SIZE / 2) == 0))
+ t = (sp_int_digit)-1;
+ t <<= SP_WORD_SIZE / 2;
+ if (t == 0) {
+ t = sa->dp[i] << (SP_WORD_SIZE / 2);
+ t += sa->dp[i-1] >> (SP_WORD_SIZE / 2);
+ t /= div;
+ }
+
+ if (t > 0) {
+ _sp_mul_d(d, t, trial, i - d->used);
+ while (sp_cmp(trial, sa) == MP_GT) {
+ t--;
+ _sp_mul_d(d, t, trial, i - d->used);
+ }
+ sp_sub(sa, trial, sa);
+ tr->dp[i - d->used] += t;
+ if (tr->dp[i - d->used] < t)
+ tr->dp[i + 1 - d->used]++;
+ }
+ i = sa->used - 1;
+ }
+
+ while (sp_cmp(sa, d) != MP_LT) {
+ sp_sub(sa, d, sa);
+ sp_add_d(tr, 1, tr);
+ }
+ }
+#endif
+
+ sp_clamp(tr);
+
+ if (rem != NULL) {
+ if (s != SP_WORD_SIZE)
+ sp_rshb(sa, s, sa);
+ sp_copy(sa, rem);
+ }
+ if (r != NULL)
+ sp_copy(tr, r);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (sa != NULL)
+ XFREE(sa, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+
+ return err;
+}
+
+
+#ifndef FREESCALE_LTC_TFM
+/* Calculate the remainder of dividing a by m: r = a mod m.
+ *
+ * a SP integer.
+ * m SP integer.
+ * r SP integer.
+ * returns MP_VAL when m is 0 and MP_OKAY otherwise.
+ */
+int sp_mod(sp_int* a, sp_int* m, sp_int* r)
+{
+ return sp_div(a, m, NULL, r);
+}
+#endif
+#endif
+
+/* Clear all data in the big number and sets value to zero.
+ *
+ * a SP integer.
+ */
+void sp_zero(sp_int* a)
+{
+ XMEMSET(a->dp, 0, a->size * sizeof(*a->dp));
+ a->used = 0;
+}
+
+/* Add a one digit number to the big number.
+ *
+ * a SP integer.
+ * d Digit to add.
+ * r SP integer - result.
+ * returns MP_OKAY always.
+ */
+int sp_add_d(sp_int* a, sp_int_digit d, sp_int* r)
+{
+ int i = 0;
+
+ r->used = a->used;
+ if (a->used == 0) {
+ r->used = 1;
+ }
+ r->dp[0] = a->dp[0] + d;
+ if (r->dp[i] < a->dp[i]) {
+ for (; i < a->used; i++) {
+ r->dp[i] = a->dp[i] + 1;
+ if (r->dp[i] != 0)
+ break;
+ }
+
+ if (i == a->used) {
+ r->used++;
+ r->dp[i] = 1;
+ }
+ }
+ for (; i < a->used; i++)
+ r->dp[i] = a->dp[i];
+
+ return MP_OKAY;
+}
+
+#if !defined(NO_DH) || defined(HAVE_ECC) || defined(WC_RSA_BLINDING) || \
+ !defined(WOLFSSL_RSA_VERIFY_ONLY)
+/* Left shift the big number by a number of digits.
+ * WIll chop off digits overflowing maximum size.
+ *
+ * a SP integer.
+ * s Number of digits to shift.
+ * returns MP_OKAY always.
+ */
+int sp_lshd(sp_int* a, int s)
+{
+ if (a->used + s > a->size)
+ a->used = a->size - s;
+
+ XMEMMOVE(a->dp + s, a->dp, a->used * sizeof(sp_int_digit));
+ a->used += s;
+ XMEMSET(a->dp, 0, s * sizeof(sp_int_digit));
+ sp_clamp(a);
+
+ return MP_OKAY;
+}
+#endif
+
+#if !defined(NO_PWDBASED) || defined(WOLFSSL_KEY_GEN) || !defined(NO_DH)
+/* Add two large numbers into result: r = a + b
+ *
+ * a SP integer.
+ * b SP integer.
+ * r SP integer.
+ * returns MP_OKAY always.
+ */
+int sp_add(sp_int* a, sp_int* b, sp_int* r)
+{
+ int i;
+ sp_int_digit c = 0;
+ sp_int_digit t;
+
+ for (i = 0; i < a->used && i < b->used; i++) {
+ t = a->dp[i] + b->dp[i] + c;
+ if (c == 0)
+ c = t < a->dp[i];
+ else
+ c = t <= a->dp[i];
+ r->dp[i] = t;
+ }
+ for (; i < a->used; i++) {
+ r->dp[i] = a->dp[i] + c;
+ c = (a->dp[i] != 0) && (r->dp[i] == 0);
+ }
+ for (; i < b->used; i++) {
+ r->dp[i] = b->dp[i] + c;
+ c = (b->dp[i] != 0) && (r->dp[i] == 0);
+ }
+ r->dp[i] = c;
+ r->used = (int)(i + c);
+
+ return MP_OKAY;
+}
+#endif /* !NO_PWDBASED || WOLFSSL_KEY_GEN || !NO_DH */
+
+#ifndef NO_RSA
+/* Set a number into the big number.
+ *
+ * a SP integer.
+ * b Value to set.
+ * returns MP_OKAY always.
+ */
+int sp_set_int(sp_int* a, unsigned long b)
+{
+ if (b == 0) {
+ a->used = 0;
+ a->dp[0] = 0;
+ }
+ else {
+ a->used = 1;
+ a->dp[0] = (sp_int_digit)b;
+ }
+
+ return MP_OKAY;
+}
+#endif /* !NO_RSA */
+
+#ifdef WC_MP_TO_RADIX
+/* Hex string characters. */
+static const char sp_hex_char[16] = {
+ '0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
+};
+
+/* Put the hex string version, big-endian, of a in str.
+ *
+ * a SP integer.
+ * str Hex string is stored here.
+ * returns MP_OKAY always.
+ */
+int sp_tohex(sp_int* a, char* str)
+{
+ int i, j;
+
+ /* quick out if its zero */
+ if (sp_iszero(a) == MP_YES) {
+ *str++ = '0';
+ *str = '\0';
+ }
+ else {
+ i = a->used - 1;
+ for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
+ if (((a->dp[i] >> j) & 0xf) != 0)
+ break;
+ }
+ for (; j >= 0; j -= 4)
+ *(str++) = sp_hex_char[(a->dp[i] >> j) & 0xf];
+ for (--i; i >= 0; i--) {
+ for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4)
+ *(str++) = sp_hex_char[(a->dp[i] >> j) & 0xf];
+ }
+ *str = '\0';
+ }
+
+ return MP_OKAY;
+}
+#endif /* WC_MP_TO_RADIX */
+
+#if defined(WOLFSSL_KEY_GEN) || !defined(NO_DH) && !defined(WC_NO_RNG)
+/* Set a bit of a: a |= 1 << i
+ * The field 'used' is updated in a.
+ *
+ * a SP integer to modify.
+ * i Index of bit to set.
+ * returns MP_OKAY always.
+ */
+int sp_set_bit(sp_int* a, int i)
+{
+ int ret = MP_OKAY;
+
+ if ((a == NULL) || (i / SP_WORD_SIZE >= SP_INT_DIGITS)) {
+ ret = BAD_FUNC_ARG;
+ }
+ else {
+ a->dp[i/SP_WORD_SIZE] |= (sp_int_digit)1 << (i % SP_WORD_SIZE);
+ if (a->used <= i / SP_WORD_SIZE)
+ a->used = (i / SP_WORD_SIZE) + 1;
+ }
+ return ret;
+}
+
+/* Exponentiate 2 to the power of e: a = 2^e
+ * This is done by setting the 'e'th bit.
+ *
+ * a SP integer.
+ * e Exponent.
+ * returns MP_OKAY always.
+ */
+int sp_2expt(sp_int* a, int e)
+{
+ sp_zero(a);
+ return sp_set_bit(a, e);
+}
+
+/* Generate a random prime for RSA only.
+ *
+ * r SP integer
+ * len Number of bytes to prime.
+ * rng Random number generator.
+ * heap Unused
+ * returns MP_OKAY on success and MP_VAL when length is not supported or random
+ * number genrator fails.
+ */
+int sp_rand_prime(sp_int* r, int len, WC_RNG* rng, void* heap)
+{
+ static const int USE_BBS = 1;
+ int err = 0, type;
+ int isPrime = MP_NO;
+
+ (void)heap;
+
+ /* get type */
+ if (len < 0) {
+ type = USE_BBS;
+ len = -len;
+ }
+ else {
+ type = 0;
+ }
+
+#if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN)
+ if (len == 32) {
+ }
+ else
+#endif
+ /* Generate RSA primes that are half the modulus length. */
+#ifndef WOLFSSL_SP_NO_3072
+ if (len != 128 && len != 192)
+#else
+ if (len != 128)
+#endif
+ {
+ err = MP_VAL;
+ }
+
+ r->used = len / (SP_WORD_SIZE / 8);
+
+ /* Assume the candidate is probably prime and then test until
+ * it is proven composite. */
+ while (err == 0 && isPrime == MP_NO) {
+#ifdef SHOW_GEN
+ printf(".");
+ fflush(stdout);
+#endif
+ /* generate value */
+ err = wc_RNG_GenerateBlock(rng, (byte*)r->dp, len);
+ if (err != 0) {
+ err = MP_VAL;
+ break;
+ }
+
+ /* munge bits */
+ ((byte*)r->dp)[len-1] |= 0x80 | 0x40;
+ r->dp[0] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
+
+ /* test */
+ /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
+ * of a 1024-bit candidate being a false positive, when it is our
+ * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
+ * Using 8 because we've always used 8 */
+ sp_prime_is_prime_ex(r, 8, &isPrime, rng);
+ }
+
+ return err;
+}
+
+/* Multiply a by b and store in r: r = a * b
+ *
+ * a SP integer to multiply.
+ * b SP integer to multiply.
+ * r SP integer result.
+ * returns MP_OKAY always.
+ */
+int sp_mul(sp_int* a, sp_int* b, sp_int* r)
+{
+ int err = MP_OKAY;
+ int i;
+#ifdef WOLFSSL_SMALL_STACK
+ sp_int* t = NULL;
+ sp_int* tr;
+#else
+ sp_int t[1];
+ sp_int tr[1];
+#endif
+
+ if (a->used + b->used > SP_INT_DIGITS)
+ err = MP_VAL;
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY) {
+ t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT);
+ if (t == NULL)
+ err = MP_MEM;
+ else
+ tr = &t[1];
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ sp_init(t);
+ sp_init(tr);
+
+ for (i = 0; i < b->used; i++) {
+ _sp_mul_d(a, b->dp[i], t, i);
+ sp_add(tr, t, tr);
+ }
+ sp_copy(tr, r);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (t != NULL)
+ XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+
+ return err;
+}
+
+/* Square a mod m and store in r: r = (a * a) mod m
+ *
+ * a SP integer to square.
+ * m SP integer modulus.
+ * r SP integer result.
+ * returns MP_VAL when m is 0, MP_MEM when dynamic memory allocation fails,
+ * BAD_FUNC_ARG when a is to big and MP_OKAY otherwise.
+ */
+static int sp_sqrmod(sp_int* a, sp_int* m, sp_int* r)
+{
+ int err = MP_OKAY;
+
+ if (a->used * 2 > SP_INT_DIGITS)
+ err = MP_VAL;
+
+ if (err == MP_OKAY)
+ err = sp_mul(a, a, r);
+ if (err == MP_OKAY)
+ err = sp_mod(r, m, r);
+
+ return err;
+}
+
+#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_KEY_GEN)
+/* Multiply a by b mod m and store in r: r = (a * b) mod m
+ *
+ * a SP integer to multiply.
+ * b SP integer to multiply.
+ * m SP integer modulus.
+ * r SP integer result.
+ * returns MP_VAL when m is 0, MP_MEM when dynamic memory allocation fails and
+ * MP_OKAY otherwise.
+ */
+int sp_mulmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
+{
+ int err = MP_OKAY;
+#ifdef WOLFSSL_SMALL_STACK
+ sp_int* t = NULL;
+#else
+ sp_int t[1];
+#endif
+
+ if (a->used + b->used > SP_INT_DIGITS)
+ err = MP_VAL;
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY) {
+ t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT);
+ if (t == NULL) {
+ err = MP_MEM;
+ }
+ }
+#endif
+ if (err == MP_OKAY) {
+ err = sp_mul(a, b, t);
+ }
+ if (err == MP_OKAY) {
+ err = sp_mod(t, m, r);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (t != NULL)
+ XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return err;
+}
+#endif
+
+/* Calculate a modulo the digit d into r: r = a mod d
+ *
+ * a SP integer to square.
+ * d SP integer digit, modulus.
+ * r SP integer digit, result.
+ * returns MP_VAL when d is 0 and MP_OKAY otherwise.
+ */
+static int sp_mod_d(sp_int* a, const sp_int_digit d, sp_int_digit* r)
+{
+ int err = MP_OKAY;
+ int i;
+ sp_int_word w = 0;
+ sp_int_digit t;
+
+ if (d == 0)
+ err = MP_VAL;
+
+ if (err == MP_OKAY) {
+ for (i = a->used - 1; i >= 0; i--) {
+ w = (w << SP_WORD_SIZE) | a->dp[i];
+ t = (sp_int_digit)(w / d);
+ w -= (sp_int_word)t * d;
+ }
+
+ *r = (sp_int_digit)w;
+ }
+
+ return err;
+}
+
+/* Calculates the Greatest Common Denominator (GCD) of a and b into r.
+ *
+ * a SP integer operand.
+ * b SP integer operand.
+ * r SP integer result.
+ * returns MP_MEM when dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_gcd(sp_int* a, sp_int* b, sp_int* r)
+{
+ int err = MP_OKAY;
+#ifdef WOLFSSL_SMALL_STACK
+ sp_int* u = NULL;
+ sp_int* v;
+ sp_int* t;
+#else
+ sp_int u[1], v[1], t[1];
+#endif
+
+ if (sp_iszero(a))
+ sp_copy(b, r);
+ else if (sp_iszero(b))
+ sp_copy(a, r);
+ else {
+#ifdef WOLFSSL_SMALL_STACK
+ u = (sp_int*)XMALLOC(sizeof(sp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT);
+ if (u == NULL)
+ err = MP_MEM;
+ else {
+ v = &u[1];
+ t = &u[2];
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ sp_init(u);
+ sp_init(v);
+ sp_init(t);
+
+ if (sp_cmp(a, b) != MP_LT) {
+ sp_copy(b, u);
+ /* First iteration - u = a, v = b */
+ if (b->used == 1) {
+ err = sp_mod_d(a, b->dp[0], &v->dp[0]);
+ if (err == MP_OKAY)
+ v->used = (v->dp[0] != 0);
+ }
+ else
+ err = sp_mod(a, b, v);
+ }
+ else {
+ sp_copy(a, u);
+ /* First iteration - u = b, v = a */
+ if (a->used == 1) {
+ err = sp_mod_d(b, a->dp[0], &v->dp[0]);
+ if (err == MP_OKAY)
+ v->used = (v->dp[0] != 0);
+ }
+ else
+ err = sp_mod(b, a, v);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ while (!sp_iszero(v)) {
+ if (v->used == 1) {
+ sp_mod_d(u, v->dp[0], &t->dp[0]);
+ t->used = (t->dp[0] != 0);
+ }
+ else
+ sp_mod(u, v, t);
+ sp_copy(v, u);
+ sp_copy(t, v);
+ }
+ sp_copy(u, r);
+ }
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (u != NULL)
+ XFREE(u, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+
+ return err;
+}
+
+/* Divides a by 2 and stores in r: r = a >> 1
+ *
+ * a SP integer to divide.
+ * r SP integer result.
+ * returns MP_OKAY always.
+ */
+static int sp_div_2(sp_int* a, sp_int* r)
+{
+ int i;
+
+ for (i = 0; i < a->used-1; i++)
+ r->dp[i] = (a->dp[i] >> 1) | (a->dp[i+1] << (SP_WORD_SIZE - 1));
+ r->dp[i] = a->dp[i] >> 1;
+ r->used = i + 1;
+ sp_clamp(r);
+
+ return MP_OKAY;
+}
+
+
+/* Calculates the multiplicative inverse in the field.
+ *
+ * a SP integer to invert.
+ * m SP integer that is the modulus of the field.
+ * r SP integer result.
+ * returns MP_VAL when a or m is 0, MP_MEM when dynamic memory allocation fails
+ * and MP_OKAY otherwise.
+ */
+int sp_invmod(sp_int* a, sp_int* m, sp_int* r)
+{
+ int err = MP_OKAY;
+#ifdef WOLFSSL_SMALL_STACK
+ sp_int* u = NULL;
+ sp_int* v;
+ sp_int* b;
+ sp_int* c;
+#else
+ sp_int u[1], v[1], b[1], c[1];
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ u = (sp_int*)XMALLOC(sizeof(sp_int) * 4, NULL, DYNAMIC_TYPE_BIGINT);
+ if (u == NULL) {
+ err = MP_MEM;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ v = &u[1];
+ b = &u[2];
+ c = &u[3];
+#endif
+ sp_init(v);
+
+ if (sp_cmp(a, m) != MP_LT) {
+ err = sp_mod(a, m, v);
+ a = v;
+ }
+ }
+
+ /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */
+ if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m))) {
+ err = MP_VAL;
+ }
+ /* r*2*x != n*2*y + 1 */
+ if ((err == MP_OKAY) && sp_iseven(a) && sp_iseven(m)) {
+ err = MP_VAL;
+ }
+
+ /* 1*1 = 0*m + 1 */
+ if ((err == MP_OKAY) && sp_isone(a)) {
+ sp_set(r, 1);
+ }
+ else if (err != MP_OKAY) {
+ }
+ else if (sp_iseven(m)) {
+ /* a^-1 mod m = m + (1 - m*(m^-1 % a)) / a
+ * = m - (m*(m^-1 % a) - 1) / a
+ */
+ err = sp_invmod(m, a, r);
+ if (err == MP_OKAY) {
+ err = sp_mul(r, m, r);
+ }
+ if (err == MP_OKAY) {
+ sp_sub_d(r, 1, r);
+ sp_div(r, a, r, NULL);
+ sp_sub(m, r, r);
+ }
+ }
+ else {
+ if (err == MP_OKAY) {
+ sp_init(u);
+ sp_init(b);
+ sp_init(c);
+
+ sp_copy(m, u);
+ sp_copy(a, v);
+ sp_zero(b);
+ sp_set(c, 1);
+
+ while (!sp_isone(v) && !sp_iszero(u)) {
+ if (sp_iseven(u)) {
+ sp_div_2(u, u);
+ if (sp_isodd(b)) {
+ sp_add(b, m, b);
+ }
+ sp_div_2(b, b);
+ }
+ else if (sp_iseven(v)) {
+ sp_div_2(v, v);
+ if (sp_isodd(c)) {
+ sp_add(c, m, c);
+ }
+ sp_div_2(c, c);
+ }
+ else if (sp_cmp(u, v) != MP_LT) {
+ sp_sub(u, v, u);
+ if (sp_cmp(b, c) == MP_LT) {
+ sp_add(b, m, b);
+ }
+ sp_sub(b, c, b);
+ }
+ else {
+ sp_sub(v, u, v);
+ if (sp_cmp(c, b) == MP_LT) {
+ sp_add(c, m, c);
+ }
+ sp_sub(c, b, c);
+ }
+ }
+ if (sp_iszero(u)) {
+ err = MP_VAL;
+ }
+ else {
+ sp_copy(c, r);
+ }
+ }
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (u != NULL) {
+ XFREE(u, NULL, DYNAMIC_TYPE_BIGINT);
+ }
+#endif
+
+ return err;
+}
+
+/* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
+ *
+ * a SP integer operand.
+ * b SP integer operand.
+ * r SP integer result.
+ * returns MP_MEM when dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_lcm(sp_int* a, sp_int* b, sp_int* r)
+{
+ int err = MP_OKAY;
+#ifndef WOLFSSL_SMALL_STACK
+ sp_int t[2];
+#else
+ sp_int *t = NULL;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT);
+ if (t == NULL) {
+ err = MP_MEM;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ sp_init(&t[0]);
+ sp_init(&t[1]);
+ err = sp_gcd(a, b, &t[0]);
+ if (err == MP_OKAY) {
+ if (sp_cmp(a, b) == MP_GT) {
+ err = sp_div(a, &t[0], &t[1], NULL);
+ if (err == MP_OKAY)
+ err = sp_mul(b, &t[1], r);
+ }
+ else {
+ err = sp_div(b, &t[0], &t[1], NULL);
+ if (err == MP_OKAY)
+ err = sp_mul(a, &t[1], r);
+ }
+ }
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (t != NULL)
+ XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return err;
+}
+
+/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
+ *
+ * b SP integer base.
+ * e SP integer exponent.
+ * m SP integer modulus.
+ * r SP integer result.
+ * returns MP_VAL when m is not 1024, 2048, 1536 or 3072 bits and otherwise
+ * MP_OKAY.
+ */
+int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
+{
+ int err = MP_OKAY;
+ int done = 0;
+ int mBits = sp_count_bits(m);
+ int bBits = sp_count_bits(b);
+ int eBits = sp_count_bits(e);
+
+ if (sp_iszero(m)) {
+ err = MP_VAL;
+ }
+ else if (sp_isone(m)) {
+ sp_set(r, 0);
+ done = 1;
+ }
+ else if (sp_iszero(e)) {
+ sp_set(r, 1);
+ done = 1;
+ }
+ else if (sp_iszero(b)) {
+ sp_set(r, 0);
+ done = 1;
+ }
+ else if (m->used * 2 > SP_INT_DIGITS) {
+ err = BAD_FUNC_ARG;
+ }
+
+ if (!done && (err == MP_OKAY)) {
+#ifndef WOLFSSL_SP_NO_2048
+ if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) &&
+ (eBits <= 1024)) {
+ err = sp_ModExp_1024(b, e, m, r);
+ done = 1;
+ }
+ else if ((mBits == 2048) && sp_isodd(m) && (bBits <= 2048) &&
+ (eBits <= 2048)) {
+ err = sp_ModExp_2048(b, e, m, r);
+ done = 1;
+ }
+ else
+#endif
+#ifndef WOLFSSL_SP_NO_3072
+ if ((mBits == 1536) && sp_isodd(m) && (bBits <= 1536) &&
+ (eBits <= 1536)) {
+ err = sp_ModExp_1536(b, e, m, r);
+ done = 1;
+ }
+ else if ((mBits == 3072) && sp_isodd(m) && (bBits <= 3072) &&
+ (eBits <= 3072)) {
+ err = sp_ModExp_3072(b, e, m, r);
+ done = 1;
+ }
+ else
+#endif
+#ifdef WOLFSSL_SP_NO_4096
+ if ((mBits == 4096) && sp_isodd(m) && (bBits <= 4096) &&
+ (eBits <= 4096)) {
+ err = sp_ModExp_4096(b, e, m, r);
+ done = 1;
+ }
+ else
+#endif
+ {
+ }
+ }
+#if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN)
+ if (!done && (err == MP_OKAY)) {
+ int i;
+
+ #ifdef WOLFSSL_SMALL_STACK
+ sp_int* t = NULL;
+ #else
+ sp_int t[1];
+ #endif
+
+ #ifdef WOLFSSL_SMALL_STACK
+ if (!done && (err == MP_OKAY)) {
+ t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT);
+ if (t == NULL) {
+ err = MP_MEM;
+ }
+ }
+ #endif
+ if (!done && (err == MP_OKAY)) {
+ sp_init(t);
+
+ if (sp_cmp(b, m) != MP_LT) {
+ err = sp_mod(b, m, t);
+ if (err == MP_OKAY && sp_iszero(t)) {
+ sp_set(r, 0);
+ done = 1;
+ }
+ }
+ else {
+ sp_copy(b, t);
+ }
+
+ if (!done && (err == MP_OKAY)) {
+ for (i = eBits-2; err == MP_OKAY && i >= 0; i--) {
+ err = sp_sqrmod(t, m, t);
+ if (err == MP_OKAY && (e->dp[i / SP_WORD_SIZE] >>
+ (i % SP_WORD_SIZE)) & 1) {
+ err = sp_mulmod(t, b, m, t);
+ }
+ }
+ }
+ }
+ if (!done && (err == MP_OKAY)) {
+ sp_copy(t, r);
+ }
+
+ #ifdef WOLFSSL_SMALL_STACK
+ if (t != NULL) {
+ XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+ }
+ #endif
+ }
+#else
+ if (!done && (err == MP_OKAY)) {
+ err = MP_VAL;
+ }
+#endif
+
+ (void)mBits;
+ (void)bBits;
+ (void)eBits;
+
+ return err;
+}
+
+
+/* Number of entries in array of number of least significant zero bits. */
+#define SP_LNZ_CNT 16
+/* Number of bits the array checks. */
+#define SP_LNZ_BITS 4
+/* Mask to apply to check with array. */
+#define SP_LNZ_MASK 0xf
+/* Number of least significant zero bits in first SP_LNZ_CNT numbers. */
+static const int lnz[SP_LNZ_CNT] = {
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+};
+
+/* Count the number of least significant zero bits.
+ *
+ * a Number to check
+ * returns the count of least significant zero bits.
+ */
+static int sp_cnt_lsb(sp_int* a)
+{
+ int i, j;
+ int cnt = 0;
+ int bc = 0;
+
+ if (!sp_iszero(a)) {
+ for (i = 0; i < a->used && a->dp[i] == 0; i++, cnt += SP_WORD_SIZE) {
+ }
+
+ for (j = 0; j < SP_WORD_SIZE; j += SP_LNZ_BITS) {
+ bc = lnz[(a->dp[i] >> j) & SP_LNZ_MASK];
+ if (bc != 4) {
+ bc += cnt + j;
+ break;
+ }
+ }
+ }
+
+ return bc;
+}
+
+/* Miller-Rabin test of "a" to the base of "b" as described in
+ * HAC pp. 139 Algorithm 4.24
+ *
+ * Sets result to 0 if definitely composite or 1 if probably prime.
+ * Randomly the chance of error is no more than 1/4 and often
+ * very much lower.
+ *
+ * a SP integer to check.
+ * b SP integer small prime.
+ * result Whether a is likely prime: MP_YES or MP_NO.
+ * n1 SP integer operand.
+ * y SP integer operand.
+ * r SP integer operand.
+ * returns MP_VAL when a is not 1024, 2048, 1536 or 3072 and MP_OKAY otherwise.
+ */
+static int sp_prime_miller_rabin_ex(sp_int * a, sp_int * b, int *result,
+ sp_int *n1, sp_int *y, sp_int *r)
+{
+ int s, j;
+ int err = MP_OKAY;
+
+ /* default */
+ *result = MP_NO;
+
+ /* ensure b > 1 */
+ if (sp_cmp_d(b, 1) == MP_GT) {
+ /* get n1 = a - 1 */
+ sp_copy(a, n1);
+ sp_sub_d(n1, 1, n1);
+ /* set 2**s * r = n1 */
+ sp_copy(n1, r);
+
+ /* count the number of least significant bits
+ * which are zero
+ */
+ s = sp_cnt_lsb(r);
+
+ /* now divide n - 1 by 2**s */
+ sp_rshb(r, s, r);
+
+ /* compute y = b**r mod a */
+ sp_zero(y);
+
+ err = sp_exptmod(b, r, a, y);
+
+ if (err == MP_OKAY) {
+ /* probably prime until shown otherwise */
+ *result = MP_YES;
+
+ /* if y != 1 and y != n1 do */
+ if (sp_cmp_d(y, 1) != MP_EQ && sp_cmp(y, n1) != MP_EQ) {
+ j = 1;
+ /* while j <= s-1 and y != n1 */
+ while ((j <= (s - 1)) && sp_cmp(y, n1) != MP_EQ) {
+ sp_sqrmod(y, a, y);
+
+ /* if y == 1 then composite */
+ if (sp_cmp_d(y, 1) == MP_EQ) {
+ *result = MP_NO;
+ break;
+ }
+ ++j;
+ }
+
+ /* if y != n1 then composite */
+ if (*result == MP_YES && sp_cmp(y, n1) != MP_EQ)
+ *result = MP_NO;
+ }
+ }
+ }
+
+ return err;
+}
+
+/* Miller-Rabin test of "a" to the base of "b" as described in
+ * HAC pp. 139 Algorithm 4.24
+ *
+ * Sets result to 0 if definitely composite or 1 if probably prime.
+ * Randomly the chance of error is no more than 1/4 and often
+ * very much lower.
+ *
+ * a SP integer to check.
+ * b SP integer small prime.
+ * result Whether a is likely prime: MP_YES or MP_NO.
+ * returns MP_MEM when dynamic memory allocation fails, MP_VAL when a is not
+ * 1024, 2048, 1536 or 3072 and MP_OKAY otherwise.
+ */
+static int sp_prime_miller_rabin(sp_int * a, sp_int * b, int *result)
+{
+ int err = MP_OKAY;
+#ifndef WOLFSSL_SMALL_STACK
+ sp_int n1[1], y[1], r[1];
+#else
+ sp_int *n1 = NULL, *y, *r;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ n1 = (sp_int*)XMALLOC(sizeof(sp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT);
+ if (n1 == NULL)
+ err = MP_MEM;
+ else {
+ y = &n1[1];
+ r = &n1[2];
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ sp_init(n1);
+ sp_init(y);
+ sp_init(r);
+
+ err = sp_prime_miller_rabin_ex(a, b, result, n1, y, r);
+
+ sp_clear(n1);
+ sp_clear(y);
+ sp_clear(r);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (n1 != NULL)
+ XFREE(n1, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+
+ return err;
+}
+
+/* Number of pre-computed primes. First n primes. */
+#define SP_PRIME_SIZE 256
+
+/* a few primes */
+static const sp_int_digit primes[SP_PRIME_SIZE] = {
+ 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
+ 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
+ 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
+ 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
+ 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
+ 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
+ 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
+ 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
+
+ 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
+ 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
+ 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
+ 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
+ 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
+ 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
+ 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
+ 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
+
+ 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
+ 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
+ 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
+ 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
+ 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
+ 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
+ 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
+ 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
+
+ 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
+ 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
+ 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
+ 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
+ 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
+ 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
+ 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
+ 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
+};
+
+
+/* Check whether a is prime.
+ * Checks against a number of small primes and does t iterations of
+ * Miller-Rabin.
+ *
+ * a SP integer to check.
+ * t Number of iterations of Muller-Rabin to perform.
+ * result MP_YES when prime.
+ * MP_NO when not prime.
+ * returns MP_VAL when t is out of range, MP_MEM when dynamic memory allocation
+ * fails and otherwise MP_OKAY.
+ */
+int sp_prime_is_prime(sp_int *a, int t, int* result)
+{
+ int err = MP_OKAY;
+ int i;
+ int haveRes = 0;
+#ifndef WOLFSSL_SMALL_STACK
+ sp_int b[1];
+#else
+ sp_int *b = NULL;
+#endif
+ sp_int_digit d;
+
+ if (t <= 0 || t > SP_PRIME_SIZE) {
+ *result = MP_NO;
+ err = MP_VAL;
+ }
+
+ if (sp_isone(a)) {
+ *result = MP_NO;
+ return MP_OKAY;
+ }
+
+ if (err == MP_OKAY && a->used == 1) {
+ /* check against primes table */
+ for (i = 0; i < SP_PRIME_SIZE; i++) {
+ if (sp_cmp_d(a, primes[i]) == MP_EQ) {
+ *result = MP_YES;
+ haveRes = 1;
+ break;
+ }
+ }
+ }
+
+ if (err == MP_OKAY && !haveRes) {
+ /* do trial division */
+ for (i = 0; i < SP_PRIME_SIZE; i++) {
+ err = sp_mod_d(a, primes[i], &d);
+ if (err != MP_OKAY || d == 0) {
+ *result = MP_NO;
+ haveRes = 1;
+ break;
+ }
+ }
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY && !haveRes) {
+ b = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT);
+ if (b == NULL)
+ err = MP_MEM;
+ }
+#endif
+
+ if (err == MP_OKAY && !haveRes) {
+ /* now do 't' miller rabins */
+ sp_init(b);
+ for (i = 0; i < t; i++) {
+ sp_set(b, primes[i]);
+ err = sp_prime_miller_rabin(a, b, result);
+ if (err != MP_OKAY || *result == MP_NO)
+ break;
+ }
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (b != NULL)
+ XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+
+ return err;
+}
+
+/* Check whether a is prime.
+ * Checks against a number of small primes and does t iterations of
+ * Miller-Rabin.
+ *
+ * a SP integer to check.
+ * t Number of iterations of Muller-Rabin to perform.
+ * result MP_YES when prime.
+ * MP_NO when not prime.
+ * rng Random number generator.
+ * returns MP_VAL when t is out of range, MP_MEM when dynamic memory allocation
+ * fails and otherwise MP_OKAY.
+ */
+int sp_prime_is_prime_ex(sp_int* a, int t, int* result, WC_RNG* rng)
+{
+ int err = MP_OKAY;
+ int ret = MP_YES;
+ int haveRes = 0;
+ int i;
+#ifndef WC_NO_RNG
+ #ifndef WOLFSSL_SMALL_STACK
+ sp_int b[1], c[1], n1[1], y[1], r[1];
+ #else
+ sp_int *b = NULL, *c = NULL, *n1 = NULL, *y = NULL, *r = NULL;
+ #endif
+ word32 baseSz;
+#endif
+
+ if (a == NULL || result == NULL || rng == NULL)
+ err = MP_VAL;
+
+ if (sp_isone(a)) {
+ *result = MP_NO;
+ return MP_OKAY;
+ }
+
+ if (err == MP_OKAY && a->used == 1) {
+ /* check against primes table */
+ for (i = 0; i < SP_PRIME_SIZE; i++) {
+ if (sp_cmp_d(a, primes[i]) == MP_EQ) {
+ ret = MP_YES;
+ haveRes = 1;
+ break;
+ }
+ }
+ }
+
+ if (err == MP_OKAY && !haveRes) {
+ sp_int_digit d;
+
+ /* do trial division */
+ for (i = 0; i < SP_PRIME_SIZE; i++) {
+ err = sp_mod_d(a, primes[i], &d);
+ if (err != MP_OKAY || d == 0) {
+ ret = MP_NO;
+ haveRes = 1;
+ break;
+ }
+ }
+ }
+
+#ifndef WC_NO_RNG
+ /* now do a miller rabin with up to t random numbers, this should
+ * give a (1/4)^t chance of a false prime. */
+ #ifdef WOLFSSL_SMALL_STACK
+ if (err == MP_OKAY && !haveRes) {
+ b = (sp_int*)XMALLOC(sizeof(sp_int) * 5, NULL, DYNAMIC_TYPE_BIGINT);
+ if (b == NULL) {
+ err = MP_MEM;
+ }
+ else {
+ c = &b[1]; n1 = &b[2]; y= &b[3]; r = &b[4];
+ }
+ }
+ #endif
+
+ if (err == MP_OKAY && !haveRes) {
+ sp_init(b);
+ sp_init(c);
+ sp_init(n1);
+ sp_init(y);
+ sp_init(r);
+
+ err = sp_sub_d(a, 2, c);
+ }
+
+ if (err == MP_OKAY && !haveRes) {
+ baseSz = (sp_count_bits(a) + 7) / 8;
+
+ while (t > 0) {
+ err = wc_RNG_GenerateBlock(rng, (byte*)b->dp, baseSz);
+ if (err != MP_OKAY)
+ break;
+ b->used = a->used;
+
+ if (sp_cmp_d(b, 2) != MP_GT || sp_cmp(b, c) != MP_LT)
+ continue;
+
+ err = sp_prime_miller_rabin_ex(a, b, &ret, n1, y, r);
+ if (err != MP_OKAY || ret == MP_NO)
+ break;
+
+ t--;
+ }
+
+ sp_clear(n1);
+ sp_clear(y);
+ sp_clear(r);
+ sp_clear(b);
+ sp_clear(c);
+ }
+
+ #ifdef WOLFSSL_SMALL_STACK
+ if (b != NULL)
+ XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
+ #endif
+#else
+ (void)t;
+#endif /* !WC_NO_RNG */
+
+ *result = ret;
+ return err;
+}
+
+#ifndef NO_DH
+int sp_exch(sp_int* a, sp_int* b)
+{
+ int err = MP_OKAY;
+#ifndef WOLFSSL_SMALL_STACK
+ sp_int t[1];
+#else
+ sp_int *t = NULL;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT);
+ if (t == NULL)
+ err = MP_MEM;
+#endif
+
+ if (err == MP_OKAY) {
+ *t = *a;
+ *a = *b;
+ *b = *t;
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (t != NULL)
+ XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return MP_OKAY;
+}
+#endif
+#endif
+
+#if defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA)
+/* Multiply a by digit n and put result into r. r = a * n
+ *
+ * a SP integer to be multiplied.
+ * n Number to multiply by.
+ * r SP integer result.
+ * returns MP_OKAY always.
+ */
+int sp_mul_d(sp_int* a, sp_int_digit n, sp_int* r)
+{
+ _sp_mul_d(a, n, r, 0);
+ return MP_OKAY;
+}
+#endif
+
+/* Returns the run time settings.
+ *
+ * returns the settings value.
+ */
+word32 CheckRunTimeSettings(void)
+{
+ return CTC_SETTINGS;
+}
+
+#endif /* WOLFSSL_SP_MATH */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_x86_64.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_x86_64.c
new file mode 100644
index 000000000..3e49d2022
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_x86_64.c
@@ -0,0 +1,29555 @@
+/* sp.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Implementation by Sean Parkinson. */
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/cpuid.h>
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \
+ defined(WOLFSSL_HAVE_SP_ECC)
+
+#ifdef RSA_LOW_MEM
+#ifndef WOLFSSL_SP_SMALL
+#define WOLFSSL_SP_SMALL
+#endif
+#endif
+
+#include <wolfssl/wolfcrypt/sp.h>
+
+#ifdef WOLFSSL_SP_X86_64_ASM
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+#ifndef WOLFSSL_SP_NO_2048
+extern void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n);
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 64
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 64
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffffffffffffl;
+ s = 64U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 64U) <= (word32)DIGIT_BIT) {
+ s += 64U;
+ r[j] &= 0xffffffffffffffffl;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 64) {
+ r[j] &= 0xffffffffffffffffl;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 64 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+extern void sp_2048_to_bin(sp_digit* r, byte* a);
+extern void sp_2048_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b);
+extern void sp_2048_sqr_16(sp_digit* r, const sp_digit* a);
+extern void sp_2048_mul_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit* b);
+extern void sp_2048_sqr_avx2_16(sp_digit* r, const sp_digit* a);
+extern sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b);
+extern sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b);
+extern sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b);
+extern void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b);
+
+extern sp_digit sp_2048_dbl_16(sp_digit* r, const sp_digit* a);
+extern void sp_2048_sqr_32(sp_digit* r, const sp_digit* a);
+
+#ifdef HAVE_INTEL_AVX2
+extern void sp_2048_mul_avx2_32(sp_digit* r, const sp_digit* a, const sp_digit* b);
+#endif /* HAVE_INTEL_AVX2 */
+
+#ifdef HAVE_INTEL_AVX2
+extern void sp_2048_sqr_avx2_32(sp_digit* r, const sp_digit* a);
+#endif /* HAVE_INTEL_AVX2 */
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**64 */
+
+ /* rho = -1/m mod b */
+ *rho = -x;
+}
+
+extern void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b);
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+extern sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b);
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 2048 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_2048_mont_norm_16(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 16);
+
+ /* r = 2^n mod m */
+ sp_2048_sub_in_place_16(r, m);
+}
+
+extern sp_digit sp_2048_cond_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+extern void sp_2048_mont_reduce_16(sp_digit* a, const sp_digit* m, sp_digit mp);
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_2048_mul_16(r, a, b);
+ sp_2048_mont_reduce_16(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_sqr_16(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_2048_sqr_16(r, a);
+ sp_2048_mont_reduce_16(r, m, mp);
+}
+
+extern sp_digit sp_2048_cond_sub_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+extern void sp_2048_mul_d_16(sp_digit* r, const sp_digit* a, sp_digit b);
+extern void sp_2048_mul_d_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit b);
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ */
+static WC_INLINE sp_digit div_2048_word_16(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ register sp_digit r asm("rax");
+ __asm__ __volatile__ (
+ "divq %3"
+ : "=a" (r)
+ : "d" (d1), "a" (d0), "r" (div)
+ :
+ );
+ return r;
+}
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<16; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 16; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+extern int64_t sp_2048_cmp_16(const sp_digit* a, const sp_digit* b);
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_16(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[32], t2[17];
+ sp_digit div, r1;
+ int i;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ (void)m;
+
+ div = d[15];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 16);
+ r1 = sp_2048_cmp_16(&t1[16], d) >= 0;
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_2048_cond_sub_avx2_16(&t1[16], &t1[16], d, (sp_digit)0 - r1);
+ else
+#endif
+ sp_2048_cond_sub_16(&t1[16], &t1[16], d, (sp_digit)0 - r1);
+ for (i=15; i>=0; i--) {
+ r1 = div_2048_word_16(t1[16 + i], t1[16 + i - 1], div);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_2048_mul_d_avx2_16(t2, d, r1);
+ else
+#endif
+ sp_2048_mul_d_16(t2, d, r1);
+ t1[16 + i] += sp_2048_sub_in_place_16(&t1[i], t2);
+ t1[16 + i] -= t2[16];
+ sp_2048_mask_16(t2, d, t1[16 + i]);
+ t1[16 + i] += sp_2048_add_16(&t1[i], &t1[i], t2);
+ sp_2048_mask_16(t2, d, t1[16 + i]);
+ t1[16 + i] += sp_2048_add_16(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_2048_cmp_16(t1, d) >= 0;
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_2048_cond_sub_avx2_16(r, t1, d, (sp_digit)0 - r1);
+ else
+#endif
+ sp_2048_cond_sub_16(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_16(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_2048_div_16(a, m, NULL, r);
+}
+
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][32];
+ sp_digit rt[32];
+#else
+ sp_digit* t[32];
+ sp_digit* rt;
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 33 * 32, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 32;
+ rt = td + 1024;
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_16(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 16);
+ if (reduceA) {
+ err = sp_2048_mod_16(t[1] + 16, a, m);
+ if (err == MP_OKAY)
+ err = sp_2048_mod_16(t[1], t[1], m);
+ }
+ else {
+ XMEMCPY(t[1] + 16, a, sizeof(sp_digit) * 16);
+ err = sp_2048_mod_16(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_16(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_16(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_16(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_16(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_16(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_16(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_16(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_16(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_16(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_16(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_16(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_16(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_16(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_16(t[15], t[ 8], t[ 7], m, mp);
+ sp_2048_mont_sqr_16(t[16], t[ 8], m, mp);
+ sp_2048_mont_mul_16(t[17], t[ 9], t[ 8], m, mp);
+ sp_2048_mont_sqr_16(t[18], t[ 9], m, mp);
+ sp_2048_mont_mul_16(t[19], t[10], t[ 9], m, mp);
+ sp_2048_mont_sqr_16(t[20], t[10], m, mp);
+ sp_2048_mont_mul_16(t[21], t[11], t[10], m, mp);
+ sp_2048_mont_sqr_16(t[22], t[11], m, mp);
+ sp_2048_mont_mul_16(t[23], t[12], t[11], m, mp);
+ sp_2048_mont_sqr_16(t[24], t[12], m, mp);
+ sp_2048_mont_mul_16(t[25], t[13], t[12], m, mp);
+ sp_2048_mont_sqr_16(t[26], t[13], m, mp);
+ sp_2048_mont_mul_16(t[27], t[14], t[13], m, mp);
+ sp_2048_mont_sqr_16(t[28], t[14], m, mp);
+ sp_2048_mont_mul_16(t[29], t[15], t[14], m, mp);
+ sp_2048_mont_sqr_16(t[30], t[15], m, mp);
+ sp_2048_mont_mul_16(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ if ((bits % 5) == 0) {
+ c -= 5;
+ }
+ else {
+ c -= bits % 5;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 16);
+ for (; i>=0 || c>=5; ) {
+ if (c >= 5) {
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+ else if (c == 0) {
+ n = e[i--];
+ y = (int)(n >> 59);
+ n <<= 5;
+ c = 59;
+ }
+ else {
+ y = (int)(n >> 59);
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+
+ sp_2048_sqr_16(rt, r);
+ sp_2048_mont_reduce_16(rt, m, mp);
+ sp_2048_sqr_16(r, rt);
+ sp_2048_mont_reduce_16(r, m, mp);
+ sp_2048_sqr_16(rt, r);
+ sp_2048_mont_reduce_16(rt, m, mp);
+ sp_2048_sqr_16(r, rt);
+ sp_2048_mont_reduce_16(r, m, mp);
+ sp_2048_sqr_16(rt, r);
+ sp_2048_mont_reduce_16(rt, m, mp);
+
+ sp_2048_mul_16(r, rt, t[y]);
+ sp_2048_mont_reduce_16(r, m, mp);
+ }
+
+ XMEMSET(&r[16], 0, sizeof(sp_digit) * 16);
+ sp_2048_mont_reduce_16(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_16(r, m) >= 0);
+ sp_2048_cond_sub_16(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL)
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return err;
+}
+
+extern void sp_2048_mont_reduce_avx2_16(sp_digit* a, const sp_digit* m, sp_digit mp);
+#ifdef HAVE_INTEL_AVX2
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_mul_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_2048_mul_avx2_16(r, a, b);
+ sp_2048_mont_reduce_avx2_16(r, m, mp);
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+#ifdef HAVE_INTEL_AVX2
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_sqr_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_2048_sqr_avx2_16(r, a);
+ sp_2048_mont_reduce_avx2_16(r, m, mp);
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+#ifdef HAVE_INTEL_AVX2
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][32];
+ sp_digit rt[32];
+#else
+ sp_digit* t[32];
+ sp_digit* rt;
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 33 * 32, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 32;
+ rt = td + 1024;
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_16(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 16);
+ if (reduceA) {
+ err = sp_2048_mod_16(t[1] + 16, a, m);
+ if (err == MP_OKAY)
+ err = sp_2048_mod_16(t[1], t[1], m);
+ }
+ else {
+ XMEMCPY(t[1] + 16, a, sizeof(sp_digit) * 16);
+ err = sp_2048_mod_16(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_avx2_16(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_avx2_16(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_avx2_16(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_avx2_16(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_avx2_16(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_avx2_16(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_avx2_16(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_avx2_16(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_avx2_16(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_avx2_16(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_avx2_16(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_avx2_16(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_avx2_16(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_avx2_16(t[15], t[ 8], t[ 7], m, mp);
+ sp_2048_mont_sqr_avx2_16(t[16], t[ 8], m, mp);
+ sp_2048_mont_mul_avx2_16(t[17], t[ 9], t[ 8], m, mp);
+ sp_2048_mont_sqr_avx2_16(t[18], t[ 9], m, mp);
+ sp_2048_mont_mul_avx2_16(t[19], t[10], t[ 9], m, mp);
+ sp_2048_mont_sqr_avx2_16(t[20], t[10], m, mp);
+ sp_2048_mont_mul_avx2_16(t[21], t[11], t[10], m, mp);
+ sp_2048_mont_sqr_avx2_16(t[22], t[11], m, mp);
+ sp_2048_mont_mul_avx2_16(t[23], t[12], t[11], m, mp);
+ sp_2048_mont_sqr_avx2_16(t[24], t[12], m, mp);
+ sp_2048_mont_mul_avx2_16(t[25], t[13], t[12], m, mp);
+ sp_2048_mont_sqr_avx2_16(t[26], t[13], m, mp);
+ sp_2048_mont_mul_avx2_16(t[27], t[14], t[13], m, mp);
+ sp_2048_mont_sqr_avx2_16(t[28], t[14], m, mp);
+ sp_2048_mont_mul_avx2_16(t[29], t[15], t[14], m, mp);
+ sp_2048_mont_sqr_avx2_16(t[30], t[15], m, mp);
+ sp_2048_mont_mul_avx2_16(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ if ((bits % 5) == 0) {
+ c -= 5;
+ }
+ else {
+ c -= bits % 5;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 16);
+ for (; i>=0 || c>=5; ) {
+ if (c >= 5) {
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+ else if (c == 0) {
+ n = e[i--];
+ y = (int)(n >> 59);
+ n <<= 5;
+ c = 59;
+ }
+ else {
+ y = (int)(n >> 59);
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+
+ sp_2048_sqr_avx2_16(rt, r);
+ sp_2048_mont_reduce_avx2_16(rt, m, mp);
+ sp_2048_sqr_avx2_16(r, rt);
+ sp_2048_mont_reduce_avx2_16(r, m, mp);
+ sp_2048_sqr_avx2_16(rt, r);
+ sp_2048_mont_reduce_avx2_16(rt, m, mp);
+ sp_2048_sqr_avx2_16(r, rt);
+ sp_2048_mont_reduce_avx2_16(r, m, mp);
+ sp_2048_sqr_avx2_16(rt, r);
+ sp_2048_mont_reduce_avx2_16(rt, m, mp);
+
+ sp_2048_mul_avx2_16(r, rt, t[y]);
+ sp_2048_mont_reduce_avx2_16(r, m, mp);
+ }
+
+ XMEMSET(&r[16], 0, sizeof(sp_digit) * 16);
+ sp_2048_mont_reduce_avx2_16(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_16(r, m) >= 0);
+ sp_2048_cond_sub_avx2_16(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL)
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return err;
+}
+#endif /* HAVE_INTEL_AVX2 */
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 2048 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 32);
+
+ /* r = 2^n mod m */
+ sp_2048_sub_in_place_32(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+extern sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+extern void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp);
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_2048_mul_32(r, a, b);
+ sp_2048_mont_reduce_32(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_2048_sqr_32(r, a);
+ sp_2048_mont_reduce_32(r, m, mp);
+}
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+extern sp_digit sp_2048_cond_sub_avx2_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+extern void sp_2048_mul_d_avx2_32(sp_digit* r, const sp_digit* a, const sp_digit b);
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ */
+static WC_INLINE sp_digit div_2048_word_32(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ register sp_digit r asm("rax");
+ __asm__ __volatile__ (
+ "divq %3"
+ : "=a" (r)
+ : "d" (d1), "a" (d0), "r" (div)
+ :
+ );
+ return r;
+}
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<32; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 32; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+extern int64_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b);
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[64], t2[33];
+ sp_digit div, r1;
+ int i;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ (void)m;
+
+ div = d[31];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
+ r1 = sp_2048_cmp_32(&t1[32], d) >= 0;
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_2048_cond_sub_avx2_32(&t1[32], &t1[32], d, (sp_digit)0 - r1);
+ else
+#endif
+ sp_2048_cond_sub_32(&t1[32], &t1[32], d, (sp_digit)0 - r1);
+ for (i=31; i>=0; i--) {
+ r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_2048_mul_d_avx2_32(t2, d, r1);
+ else
+#endif
+ sp_2048_mul_d_32(t2, d, r1);
+ t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
+ t1[32 + i] -= t2[32];
+ sp_2048_mask_32(t2, d, t1[32 + i]);
+ t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
+ sp_2048_mask_32(t2, d, t1[32 + i]);
+ t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_2048_cmp_32(t1, d) >= 0;
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_2048_cond_sub_avx2_32(r, t1, d, (sp_digit)0 - r1);
+ else
+#endif
+ sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_2048_div_32(a, m, NULL, r);
+}
+
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+extern sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b);
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_div_32_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[64], t2[33];
+ sp_digit div, r1;
+ int i;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ (void)m;
+
+ div = d[31];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
+ for (i = 31; i > 0; i--) {
+ if (t1[i + 32] != d[i])
+ break;
+ }
+ if (t1[i + 32] >= d[i]) {
+ sp_2048_sub_in_place_32(&t1[32], d);
+ }
+ for (i=31; i>=0; i--) {
+ r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_2048_mul_d_avx2_32(t2, d, r1);
+ else
+#endif
+ sp_2048_mul_d_32(t2, d, r1);
+ t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
+ t1[32 + i] -= t2[32];
+ if (t1[32 + i] != 0) {
+ t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], d);
+ if (t1[32 + i] != 0)
+ t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], d);
+ }
+ }
+
+ for (i = 31; i > 0; i--) {
+ if (t1[i] != d[i])
+ break;
+ }
+ if (t1[i] >= d[i]) {
+ sp_2048_sub_32(r, t1, d);
+ }
+ else {
+ XMEMCPY(r, t1, sizeof(*t1) * 32);
+ }
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_2048_mod_32_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_2048_div_32_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][64];
+ sp_digit rt[64];
+#else
+ sp_digit* t[32];
+ sp_digit* rt;
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 33 * 64, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 64;
+ rt = td + 2048;
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_32(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 32);
+ if (reduceA) {
+ err = sp_2048_mod_32(t[1] + 32, a, m);
+ if (err == MP_OKAY)
+ err = sp_2048_mod_32(t[1], t[1], m);
+ }
+ else {
+ XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
+ err = sp_2048_mod_32(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
+ sp_2048_mont_sqr_32(t[16], t[ 8], m, mp);
+ sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp);
+ sp_2048_mont_sqr_32(t[18], t[ 9], m, mp);
+ sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp);
+ sp_2048_mont_sqr_32(t[20], t[10], m, mp);
+ sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp);
+ sp_2048_mont_sqr_32(t[22], t[11], m, mp);
+ sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp);
+ sp_2048_mont_sqr_32(t[24], t[12], m, mp);
+ sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp);
+ sp_2048_mont_sqr_32(t[26], t[13], m, mp);
+ sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp);
+ sp_2048_mont_sqr_32(t[28], t[14], m, mp);
+ sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp);
+ sp_2048_mont_sqr_32(t[30], t[15], m, mp);
+ sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ if ((bits % 5) == 0) {
+ c -= 5;
+ }
+ else {
+ c -= bits % 5;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
+ for (; i>=0 || c>=5; ) {
+ if (c >= 5) {
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+ else if (c == 0) {
+ n = e[i--];
+ y = (int)(n >> 59);
+ n <<= 5;
+ c = 59;
+ }
+ else {
+ y = (int)(n >> 59);
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+
+ sp_2048_sqr_32(rt, r);
+ sp_2048_mont_reduce_32(rt, m, mp);
+ sp_2048_sqr_32(r, rt);
+ sp_2048_mont_reduce_32(r, m, mp);
+ sp_2048_sqr_32(rt, r);
+ sp_2048_mont_reduce_32(rt, m, mp);
+ sp_2048_sqr_32(r, rt);
+ sp_2048_mont_reduce_32(r, m, mp);
+ sp_2048_sqr_32(rt, r);
+ sp_2048_mont_reduce_32(rt, m, mp);
+
+ sp_2048_mul_32(r, rt, t[y]);
+ sp_2048_mont_reduce_32(r, m, mp);
+ }
+
+ XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
+ sp_2048_mont_reduce_32(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
+ sp_2048_cond_sub_32(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL)
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return err;
+}
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+extern void sp_2048_mont_reduce_avx2_32(sp_digit* a, const sp_digit* m, sp_digit mp);
+#ifdef HAVE_INTEL_AVX2
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_mul_avx2_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_2048_mul_avx2_32(r, a, b);
+ sp_2048_mont_reduce_avx2_32(r, m, mp);
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+#ifdef HAVE_INTEL_AVX2
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_2048_mont_sqr_avx2_32(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_2048_sqr_avx2_32(r, a);
+ sp_2048_mont_reduce_avx2_32(r, m, mp);
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
+#ifdef HAVE_INTEL_AVX2
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_avx2_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][64];
+ sp_digit rt[64];
+#else
+ sp_digit* t[32];
+ sp_digit* rt;
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 33 * 64, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 64;
+ rt = td + 2048;
+#endif
+ norm = t[0];
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_32(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 32);
+ if (reduceA) {
+ err = sp_2048_mod_32(t[1] + 32, a, m);
+ if (err == MP_OKAY)
+ err = sp_2048_mod_32(t[1], t[1], m);
+ }
+ else {
+ XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
+ err = sp_2048_mod_32(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_mont_sqr_avx2_32(t[ 2], t[ 1], m, mp);
+ sp_2048_mont_mul_avx2_32(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_2048_mont_sqr_avx2_32(t[ 4], t[ 2], m, mp);
+ sp_2048_mont_mul_avx2_32(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_2048_mont_sqr_avx2_32(t[ 6], t[ 3], m, mp);
+ sp_2048_mont_mul_avx2_32(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_2048_mont_sqr_avx2_32(t[ 8], t[ 4], m, mp);
+ sp_2048_mont_mul_avx2_32(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_2048_mont_sqr_avx2_32(t[10], t[ 5], m, mp);
+ sp_2048_mont_mul_avx2_32(t[11], t[ 6], t[ 5], m, mp);
+ sp_2048_mont_sqr_avx2_32(t[12], t[ 6], m, mp);
+ sp_2048_mont_mul_avx2_32(t[13], t[ 7], t[ 6], m, mp);
+ sp_2048_mont_sqr_avx2_32(t[14], t[ 7], m, mp);
+ sp_2048_mont_mul_avx2_32(t[15], t[ 8], t[ 7], m, mp);
+ sp_2048_mont_sqr_avx2_32(t[16], t[ 8], m, mp);
+ sp_2048_mont_mul_avx2_32(t[17], t[ 9], t[ 8], m, mp);
+ sp_2048_mont_sqr_avx2_32(t[18], t[ 9], m, mp);
+ sp_2048_mont_mul_avx2_32(t[19], t[10], t[ 9], m, mp);
+ sp_2048_mont_sqr_avx2_32(t[20], t[10], m, mp);
+ sp_2048_mont_mul_avx2_32(t[21], t[11], t[10], m, mp);
+ sp_2048_mont_sqr_avx2_32(t[22], t[11], m, mp);
+ sp_2048_mont_mul_avx2_32(t[23], t[12], t[11], m, mp);
+ sp_2048_mont_sqr_avx2_32(t[24], t[12], m, mp);
+ sp_2048_mont_mul_avx2_32(t[25], t[13], t[12], m, mp);
+ sp_2048_mont_sqr_avx2_32(t[26], t[13], m, mp);
+ sp_2048_mont_mul_avx2_32(t[27], t[14], t[13], m, mp);
+ sp_2048_mont_sqr_avx2_32(t[28], t[14], m, mp);
+ sp_2048_mont_mul_avx2_32(t[29], t[15], t[14], m, mp);
+ sp_2048_mont_sqr_avx2_32(t[30], t[15], m, mp);
+ sp_2048_mont_mul_avx2_32(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ if ((bits % 5) == 0) {
+ c -= 5;
+ }
+ else {
+ c -= bits % 5;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
+ for (; i>=0 || c>=5; ) {
+ if (c >= 5) {
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+ else if (c == 0) {
+ n = e[i--];
+ y = (int)(n >> 59);
+ n <<= 5;
+ c = 59;
+ }
+ else {
+ y = (int)(n >> 59);
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+
+ sp_2048_sqr_avx2_32(rt, r);
+ sp_2048_mont_reduce_avx2_32(rt, m, mp);
+ sp_2048_sqr_avx2_32(r, rt);
+ sp_2048_mont_reduce_avx2_32(r, m, mp);
+ sp_2048_sqr_avx2_32(rt, r);
+ sp_2048_mont_reduce_avx2_32(rt, m, mp);
+ sp_2048_sqr_avx2_32(r, rt);
+ sp_2048_mont_reduce_avx2_32(r, m, mp);
+ sp_2048_sqr_avx2_32(rt, r);
+ sp_2048_mont_reduce_avx2_32(rt, m, mp);
+
+ sp_2048_mul_avx2_32(r, rt, t[y]);
+ sp_2048_mont_reduce_avx2_32(r, m, mp);
+ }
+
+ XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
+ sp_2048_mont_reduce_avx2_32(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
+ sp_2048_cond_sub_avx2_32(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL)
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return err;
+}
+#endif /* HAVE_INTEL_AVX2 */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit ad[64], md[32], rd[64];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* a;
+ sp_digit *ah;
+ sp_digit* m;
+ sp_digit* r;
+ sp_digit e = 0;
+ int err = MP_OKAY;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ if (*outLen < 256)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (mp_count_bits(em) > 64 || inLen > 256 ||
+ mp_count_bits(mm) != 2048))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 32 * 2;
+ m = r + 32 * 2;
+ ah = a + 32;
+ }
+#else
+ a = ad;
+ m = md;
+ r = rd;
+ ah = a + 32;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_2048_from_bin(ah, 32, in, inLen);
+#if DIGIT_BIT >= 64
+ e = em->dp[0];
+#else
+ e = em->dp[0];
+ if (em->used > 1)
+ e |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+#endif
+ if (e == 0)
+ err = MP_EXPTMOD_E;
+ }
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(m, 32, mm);
+
+ if (e == 0x3) {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ if (err == MP_OKAY) {
+ sp_2048_sqr_avx2_32(r, ah);
+ err = sp_2048_mod_32_cond(r, r, m);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_mul_avx2_32(r, ah, r);
+ err = sp_2048_mod_32_cond(r, r, m);
+ }
+ }
+ else
+#endif
+ {
+ if (err == MP_OKAY) {
+ sp_2048_sqr_32(r, ah);
+ err = sp_2048_mod_32_cond(r, r, m);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_mul_32(r, ah, r);
+ err = sp_2048_mod_32_cond(r, r, m);
+ }
+ }
+ }
+ else {
+ int i;
+ sp_digit mp;
+
+ sp_2048_mont_setup(m, &mp);
+
+ /* Convert to Montgomery form. */
+ XMEMSET(a, 0, sizeof(sp_digit) * 32);
+ err = sp_2048_mod_32_cond(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i=63; i>=0; i--) {
+ if (e >> i) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 32);
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ for (i--; i>=0; i--) {
+ sp_2048_mont_sqr_avx2_32(r, r, m, mp);
+ if (((e >> i) & 1) == 1) {
+ sp_2048_mont_mul_avx2_32(r, r, a, m, mp);
+ }
+ }
+ XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
+ sp_2048_mont_reduce_avx2_32(r, m, mp);
+ }
+ else
+#endif
+ {
+ for (i--; i>=0; i--) {
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ if (((e >> i) & 1) == 1) {
+ sp_2048_mont_mul_32(r, r, a, m, mp);
+ }
+ }
+ XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
+ sp_2048_mont_reduce_32(r, m, mp);
+ }
+
+ for (i = 31; i > 0; i--) {
+ if (r[i] != m[i])
+ break;
+ }
+ if (r[i] >= m[i])
+ sp_2048_sub_in_place_32(r, m);
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL)
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+#endif
+
+ return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+ sp_digit a[64], d[32], m[32];
+#else
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+#endif
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 256U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 2048) {
+ err = MP_READ_E;
+ }
+ if (inLen > 256U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 2048) {
+ err = MP_READ_E;
+ }
+ }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ a = d + 32;
+ m = a + 64;
+#endif
+ r = a;
+
+ sp_2048_from_bin(a, 32, in, inLen);
+ sp_2048_from_mp(d, 32, dm);
+ sp_2048_from_mp(m, 32, mm);
+ err = sp_2048_mod_exp_32(r, a, d, 2048, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 32);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#else
+ XMEMSET(d, 0, sizeof(sp_digit) * 32);
+#endif
+
+ return err;
+}
+
+#else
+extern sp_digit sp_2048_cond_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+extern sp_digit sp_2048_cond_add_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit ad[32 * 2];
+ sp_digit pd[16], qd[16], dpd[16];
+ sp_digit tmpad[32], tmpbd[32];
+#else
+ sp_digit* t = NULL;
+#endif
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* dq;
+ sp_digit* qi;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+ sp_digit* r;
+ sp_digit c;
+ int err = MP_OKAY;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 256)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL)
+ err = MEMORY_E;
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 32 * 2;
+ q = p + 16;
+ qi = dq = dp = q + 16;
+ tmpa = qi + 16;
+ tmpb = tmpa + 32;
+
+ r = t + 32;
+ }
+#else
+ r = a = ad;
+ p = pd;
+ q = qd;
+ qi = dq = dp = dpd;
+ tmpa = tmpad;
+ tmpb = tmpbd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_2048_from_bin(a, 32, in, inLen);
+ sp_2048_from_mp(p, 16, pm);
+ sp_2048_from_mp(q, 16, qm);
+ sp_2048_from_mp(dp, 16, dpm);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_2048_mod_exp_avx2_16(tmpa, a, dp, 1024, p, 1);
+ else
+#endif
+ err = sp_2048_mod_exp_16(tmpa, a, dp, 1024, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(dq, 16, dqm);
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_2048_mod_exp_avx2_16(tmpb, a, dq, 1024, q, 1);
+ else
+#endif
+ err = sp_2048_mod_exp_16(tmpb, a, dq, 1024, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ c = sp_2048_sub_in_place_16(tmpa, tmpb);
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ c += sp_2048_cond_add_avx2_16(tmpa, tmpa, p, c);
+ sp_2048_cond_add_avx2_16(tmpa, tmpa, p, c);
+ }
+ else
+#endif
+ {
+ c += sp_2048_cond_add_16(tmpa, tmpa, p, c);
+ sp_2048_cond_add_16(tmpa, tmpa, p, c);
+ }
+
+ sp_2048_from_mp(qi, 16, qim);
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ sp_2048_mul_avx2_16(tmpa, tmpa, qi);
+ }
+ else
+#endif
+ {
+ sp_2048_mul_16(tmpa, tmpa, qi);
+ }
+ err = sp_2048_mod_16(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ sp_2048_mul_avx2_16(tmpa, q, tmpa);
+ }
+ else
+#endif
+ {
+ sp_2048_mul_16(tmpa, q, tmpa);
+ }
+ XMEMSET(&tmpb[16], 0, sizeof(sp_digit) * 16);
+ sp_2048_add_32(r, tmpb, tmpa);
+
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 16 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+#else
+ XMEMSET(tmpad, 0, sizeof(tmpad));
+ XMEMSET(tmpbd, 0, sizeof(tmpbd));
+ XMEMSET(pd, 0, sizeof(pd));
+ XMEMSET(qd, 0, sizeof(qd));
+ XMEMSET(dpd, 0, sizeof(dpd));
+#endif
+
+ return err;
+}
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_2048_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 64
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 32);
+ r->used = 32;
+ mp_clamp(r);
+#elif DIGIT_BIT < 64
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 32; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 64) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 64 - s;
+ }
+ r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 32; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 64 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 64 - s;
+ }
+ else {
+ s += 64;
+ }
+ }
+ r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[64], e[32], m[32];
+ sp_digit* r = b;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 2048 || expBits > 2048 ||
+ mp_count_bits(mod) != 2048) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 32, base);
+ sp_2048_from_mp(e, 32, exp);
+ sp_2048_from_mp(m, 32, mod);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_2048_mod_exp_avx2_32(r, b, e, expBits, m, 0);
+ else
+#endif
+ err = sp_2048_mod_exp_32(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_2048_to_mp(r, res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+#ifdef HAVE_FFDHE_2048
+extern void sp_2048_lshift_32(sp_digit* r, const sp_digit* a, int n);
+#ifdef HAVE_INTEL_AVX2
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_2_avx2_32(sp_digit* r, const sp_digit* e, int bits,
+ const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[64];
+ sp_digit td[33];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 97, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 64;
+#else
+ norm = nd;
+ tmp = td;
+#endif
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_32(norm, m);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ if ((bits % 6) == 0) {
+ c -= 6;
+ }
+ else {
+ c -= bits % 6;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ sp_2048_lshift_32(r, norm, y);
+ for (; i>=0 || c>=6; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = (int)(n >> 58);
+ n <<= 6;
+ c = 58;
+ }
+ else if (c < 6) {
+ y = (int)(n >> 58);
+ n = e[i--];
+ c = 6 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+ else {
+ y = (n >> 58) & 0x3f;
+ n <<= 6;
+ c -= 6;
+ }
+
+ sp_2048_mont_sqr_avx2_32(r, r, m, mp);
+ sp_2048_mont_sqr_avx2_32(r, r, m, mp);
+ sp_2048_mont_sqr_avx2_32(r, r, m, mp);
+ sp_2048_mont_sqr_avx2_32(r, r, m, mp);
+ sp_2048_mont_sqr_avx2_32(r, r, m, mp);
+ sp_2048_mont_sqr_avx2_32(r, r, m, mp);
+
+ sp_2048_lshift_32(r, r, y);
+ sp_2048_mul_d_avx2_32(tmp, norm, r[32]);
+ r[32] = 0;
+ o = sp_2048_add_32(r, r, tmp);
+ sp_2048_cond_sub_avx2_32(r, r, m, (sp_digit)0 - o);
+ }
+
+ XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
+ sp_2048_mont_reduce_avx2_32(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
+ sp_2048_cond_sub_avx2_32(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL)
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return err;
+}
+#endif /* HAVE_INTEL_AVX2 */
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_2048_mod_exp_2_32(sp_digit* r, const sp_digit* e, int bits,
+ const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[64];
+ sp_digit td[33];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 97, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 64;
+#else
+ norm = nd;
+ tmp = td;
+#endif
+
+ sp_2048_mont_setup(m, &mp);
+ sp_2048_mont_norm_32(norm, m);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ if ((bits % 6) == 0) {
+ c -= 6;
+ }
+ else {
+ c -= bits % 6;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ sp_2048_lshift_32(r, norm, y);
+ for (; i>=0 || c>=6; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = (int)(n >> 58);
+ n <<= 6;
+ c = 58;
+ }
+ else if (c < 6) {
+ y = (int)(n >> 58);
+ n = e[i--];
+ c = 6 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+ else {
+ y = (n >> 58) & 0x3f;
+ n <<= 6;
+ c -= 6;
+ }
+
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+ sp_2048_mont_sqr_32(r, r, m, mp);
+
+ sp_2048_lshift_32(r, r, y);
+ sp_2048_mul_d_32(tmp, norm, r[32]);
+ r[32] = 0;
+ o = sp_2048_add_32(r, r, tmp);
+ sp_2048_cond_sub_32(r, r, m, (sp_digit)0 - o);
+ }
+
+ XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
+ sp_2048_mont_reduce_32(r, m, mp);
+
+ mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
+ sp_2048_cond_sub_32(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL)
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return err;
+}
+
+#endif /* HAVE_FFDHE_2048 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 256 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+ int err = MP_OKAY;
+ sp_digit b[64], e[32], m[32];
+ sp_digit* r = b;
+ word32 i;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ if (mp_count_bits(base) > 2048 || expLen > 256 ||
+ mp_count_bits(mod) != 2048) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 32, base);
+ sp_2048_from_bin(e, 32, exp, expLen);
+ sp_2048_from_mp(m, 32, mod);
+
+ #ifdef HAVE_FFDHE_2048
+ if (base->used == 1 && base->dp[0] == 2 && m[31] == (sp_digit)-1) {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_2048_mod_exp_2_avx2_32(r, e, expLen * 8, m);
+ else
+#endif
+ err = sp_2048_mod_exp_2_32(r, e, expLen * 8, m);
+ }
+ else
+ #endif
+ {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_2048_mod_exp_avx2_32(r, b, e, expLen * 8, m, 0);
+ else
+#endif
+ err = sp_2048_mod_exp_32(r, b, e, expLen * 8, m, 0);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_to_bin(r, out);
+ *outLen = 256;
+ for (i=0; i<256 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+#endif
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[32], e[16], m[16];
+ sp_digit* r = b;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 1024 || expBits > 1024 ||
+ mp_count_bits(mod) != 1024) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ sp_2048_from_mp(b, 16, base);
+ sp_2048_from_mp(e, 16, exp);
+ sp_2048_from_mp(m, 16, mod);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_2048_mod_exp_avx2_16(r, b, e, expBits, m, 0);
+ else
+#endif
+ err = sp_2048_mod_exp_16(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(r + 16, 0, sizeof(*r) * 16);
+ err = sp_2048_to_mp(r, res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_2048 */
+
+#ifndef WOLFSSL_SP_NO_3072
+extern void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n);
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 64
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 64
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffffffffffffl;
+ s = 64U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 64U) <= (word32)DIGIT_BIT) {
+ s += 64U;
+ r[j] &= 0xffffffffffffffffl;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 64) {
+ r[j] &= 0xffffffffffffffffl;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 64 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+extern void sp_3072_to_bin(sp_digit* r, byte* a);
+extern void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b);
+extern void sp_3072_sqr_12(sp_digit* r, const sp_digit* a);
+extern void sp_3072_mul_avx2_12(sp_digit* r, const sp_digit* a, const sp_digit* b);
+extern void sp_3072_sqr_avx2_12(sp_digit* r, const sp_digit* a);
+extern sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b);
+extern sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b);
+extern sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, const sp_digit* b);
+extern void sp_3072_mul_24(sp_digit* r, const sp_digit* a, const sp_digit* b);
+
+extern sp_digit sp_3072_dbl_12(sp_digit* r, const sp_digit* a);
+extern void sp_3072_sqr_24(sp_digit* r, const sp_digit* a);
+
+#ifdef HAVE_INTEL_AVX2
+extern void sp_3072_mul_avx2_24(sp_digit* r, const sp_digit* a, const sp_digit* b);
+#endif /* HAVE_INTEL_AVX2 */
+
+#ifdef HAVE_INTEL_AVX2
+extern void sp_3072_sqr_avx2_24(sp_digit* r, const sp_digit* a);
+#endif /* HAVE_INTEL_AVX2 */
+
+extern sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b);
+extern sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b);
+extern void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b);
+
+extern sp_digit sp_3072_dbl_24(sp_digit* r, const sp_digit* a);
+extern void sp_3072_sqr_48(sp_digit* r, const sp_digit* a);
+
+#ifdef HAVE_INTEL_AVX2
+extern void sp_3072_mul_avx2_48(sp_digit* r, const sp_digit* a, const sp_digit* b);
+#endif /* HAVE_INTEL_AVX2 */
+
+#ifdef HAVE_INTEL_AVX2
+extern void sp_3072_sqr_avx2_48(sp_digit* r, const sp_digit* a);
+#endif /* HAVE_INTEL_AVX2 */
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**64 */
+
+ /* rho = -1/m mod b */
+ *rho = -x;
+}
+
+extern void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b);
+#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 3072 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_3072_mont_norm_24(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 24);
+
+ /* r = 2^n mod m */
+ sp_3072_sub_in_place_24(r, m);
+}
+
+extern sp_digit sp_3072_cond_sub_24(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+extern void sp_3072_mont_reduce_24(sp_digit* a, const sp_digit* m, sp_digit mp);
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_mul_24(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_3072_mul_24(r, a, b);
+ sp_3072_mont_reduce_24(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_sqr_24(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_3072_sqr_24(r, a);
+ sp_3072_mont_reduce_24(r, m, mp);
+}
+
+extern sp_digit sp_3072_cond_sub_avx2_24(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+extern void sp_3072_mul_d_24(sp_digit* r, const sp_digit* a, sp_digit b);
+extern void sp_3072_mul_d_avx2_24(sp_digit* r, const sp_digit* a, const sp_digit b);
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ */
+static WC_INLINE sp_digit div_3072_word_24(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ register sp_digit r asm("rax");
+ __asm__ __volatile__ (
+ "divq %3"
+ : "=a" (r)
+ : "d" (d1), "a" (d0), "r" (div)
+ :
+ );
+ return r;
+}
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<24; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 24; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+extern int64_t sp_3072_cmp_24(const sp_digit* a, const sp_digit* b);
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_24(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[48], t2[25];
+ sp_digit div, r1;
+ int i;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ (void)m;
+
+ div = d[23];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 24);
+ r1 = sp_3072_cmp_24(&t1[24], d) >= 0;
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_3072_cond_sub_avx2_24(&t1[24], &t1[24], d, (sp_digit)0 - r1);
+ else
+#endif
+ sp_3072_cond_sub_24(&t1[24], &t1[24], d, (sp_digit)0 - r1);
+ for (i=23; i>=0; i--) {
+ r1 = div_3072_word_24(t1[24 + i], t1[24 + i - 1], div);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_3072_mul_d_avx2_24(t2, d, r1);
+ else
+#endif
+ sp_3072_mul_d_24(t2, d, r1);
+ t1[24 + i] += sp_3072_sub_in_place_24(&t1[i], t2);
+ t1[24 + i] -= t2[24];
+ sp_3072_mask_24(t2, d, t1[24 + i]);
+ t1[24 + i] += sp_3072_add_24(&t1[i], &t1[i], t2);
+ sp_3072_mask_24(t2, d, t1[24 + i]);
+ t1[24 + i] += sp_3072_add_24(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_3072_cmp_24(t1, d) >= 0;
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_3072_cond_sub_avx2_24(r, t1, d, (sp_digit)0 - r1);
+ else
+#endif
+ sp_3072_cond_sub_24(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_24(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_3072_div_24(a, m, NULL, r);
+}
+
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][48];
+ sp_digit rt[48];
+#else
+ sp_digit* t[32];
+ sp_digit* rt;
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 33 * 48, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 48;
+ rt = td + 1536;
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_24(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 24);
+ if (reduceA) {
+ err = sp_3072_mod_24(t[1] + 24, a, m);
+ if (err == MP_OKAY)
+ err = sp_3072_mod_24(t[1], t[1], m);
+ }
+ else {
+ XMEMCPY(t[1] + 24, a, sizeof(sp_digit) * 24);
+ err = sp_3072_mod_24(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_24(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_24(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_24(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_24(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_24(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_24(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_24(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_24(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_24(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_24(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_24(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_24(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_24(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_24(t[15], t[ 8], t[ 7], m, mp);
+ sp_3072_mont_sqr_24(t[16], t[ 8], m, mp);
+ sp_3072_mont_mul_24(t[17], t[ 9], t[ 8], m, mp);
+ sp_3072_mont_sqr_24(t[18], t[ 9], m, mp);
+ sp_3072_mont_mul_24(t[19], t[10], t[ 9], m, mp);
+ sp_3072_mont_sqr_24(t[20], t[10], m, mp);
+ sp_3072_mont_mul_24(t[21], t[11], t[10], m, mp);
+ sp_3072_mont_sqr_24(t[22], t[11], m, mp);
+ sp_3072_mont_mul_24(t[23], t[12], t[11], m, mp);
+ sp_3072_mont_sqr_24(t[24], t[12], m, mp);
+ sp_3072_mont_mul_24(t[25], t[13], t[12], m, mp);
+ sp_3072_mont_sqr_24(t[26], t[13], m, mp);
+ sp_3072_mont_mul_24(t[27], t[14], t[13], m, mp);
+ sp_3072_mont_sqr_24(t[28], t[14], m, mp);
+ sp_3072_mont_mul_24(t[29], t[15], t[14], m, mp);
+ sp_3072_mont_sqr_24(t[30], t[15], m, mp);
+ sp_3072_mont_mul_24(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ if ((bits % 5) == 0) {
+ c -= 5;
+ }
+ else {
+ c -= bits % 5;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 24);
+ for (; i>=0 || c>=5; ) {
+ if (c >= 5) {
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+ else if (c == 0) {
+ n = e[i--];
+ y = (int)(n >> 59);
+ n <<= 5;
+ c = 59;
+ }
+ else {
+ y = (int)(n >> 59);
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+
+ sp_3072_sqr_24(rt, r);
+ sp_3072_mont_reduce_24(rt, m, mp);
+ sp_3072_sqr_24(r, rt);
+ sp_3072_mont_reduce_24(r, m, mp);
+ sp_3072_sqr_24(rt, r);
+ sp_3072_mont_reduce_24(rt, m, mp);
+ sp_3072_sqr_24(r, rt);
+ sp_3072_mont_reduce_24(r, m, mp);
+ sp_3072_sqr_24(rt, r);
+ sp_3072_mont_reduce_24(rt, m, mp);
+
+ sp_3072_mul_24(r, rt, t[y]);
+ sp_3072_mont_reduce_24(r, m, mp);
+ }
+
+ XMEMSET(&r[24], 0, sizeof(sp_digit) * 24);
+ sp_3072_mont_reduce_24(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_24(r, m) >= 0);
+ sp_3072_cond_sub_24(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL)
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return err;
+}
+
+extern void sp_3072_mont_reduce_avx2_24(sp_digit* a, const sp_digit* m, sp_digit mp);
+#ifdef HAVE_INTEL_AVX2
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_mul_avx2_24(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_3072_mul_avx2_24(r, a, b);
+ sp_3072_mont_reduce_avx2_24(r, m, mp);
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+#ifdef HAVE_INTEL_AVX2
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_sqr_avx2_24(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_3072_sqr_avx2_24(r, a);
+ sp_3072_mont_reduce_avx2_24(r, m, mp);
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+#ifdef HAVE_INTEL_AVX2
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_avx2_24(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][48];
+ sp_digit rt[48];
+#else
+ sp_digit* t[32];
+ sp_digit* rt;
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 33 * 48, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 48;
+ rt = td + 1536;
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_24(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 24);
+ if (reduceA) {
+ err = sp_3072_mod_24(t[1] + 24, a, m);
+ if (err == MP_OKAY)
+ err = sp_3072_mod_24(t[1], t[1], m);
+ }
+ else {
+ XMEMCPY(t[1] + 24, a, sizeof(sp_digit) * 24);
+ err = sp_3072_mod_24(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_avx2_24(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_avx2_24(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_avx2_24(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_avx2_24(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_avx2_24(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_avx2_24(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_avx2_24(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_avx2_24(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_avx2_24(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_avx2_24(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_avx2_24(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_avx2_24(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_avx2_24(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_avx2_24(t[15], t[ 8], t[ 7], m, mp);
+ sp_3072_mont_sqr_avx2_24(t[16], t[ 8], m, mp);
+ sp_3072_mont_mul_avx2_24(t[17], t[ 9], t[ 8], m, mp);
+ sp_3072_mont_sqr_avx2_24(t[18], t[ 9], m, mp);
+ sp_3072_mont_mul_avx2_24(t[19], t[10], t[ 9], m, mp);
+ sp_3072_mont_sqr_avx2_24(t[20], t[10], m, mp);
+ sp_3072_mont_mul_avx2_24(t[21], t[11], t[10], m, mp);
+ sp_3072_mont_sqr_avx2_24(t[22], t[11], m, mp);
+ sp_3072_mont_mul_avx2_24(t[23], t[12], t[11], m, mp);
+ sp_3072_mont_sqr_avx2_24(t[24], t[12], m, mp);
+ sp_3072_mont_mul_avx2_24(t[25], t[13], t[12], m, mp);
+ sp_3072_mont_sqr_avx2_24(t[26], t[13], m, mp);
+ sp_3072_mont_mul_avx2_24(t[27], t[14], t[13], m, mp);
+ sp_3072_mont_sqr_avx2_24(t[28], t[14], m, mp);
+ sp_3072_mont_mul_avx2_24(t[29], t[15], t[14], m, mp);
+ sp_3072_mont_sqr_avx2_24(t[30], t[15], m, mp);
+ sp_3072_mont_mul_avx2_24(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ if ((bits % 5) == 0) {
+ c -= 5;
+ }
+ else {
+ c -= bits % 5;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 24);
+ for (; i>=0 || c>=5; ) {
+ if (c >= 5) {
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+ else if (c == 0) {
+ n = e[i--];
+ y = (int)(n >> 59);
+ n <<= 5;
+ c = 59;
+ }
+ else {
+ y = (int)(n >> 59);
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+
+ sp_3072_sqr_avx2_24(rt, r);
+ sp_3072_mont_reduce_avx2_24(rt, m, mp);
+ sp_3072_sqr_avx2_24(r, rt);
+ sp_3072_mont_reduce_avx2_24(r, m, mp);
+ sp_3072_sqr_avx2_24(rt, r);
+ sp_3072_mont_reduce_avx2_24(rt, m, mp);
+ sp_3072_sqr_avx2_24(r, rt);
+ sp_3072_mont_reduce_avx2_24(r, m, mp);
+ sp_3072_sqr_avx2_24(rt, r);
+ sp_3072_mont_reduce_avx2_24(rt, m, mp);
+
+ sp_3072_mul_avx2_24(r, rt, t[y]);
+ sp_3072_mont_reduce_avx2_24(r, m, mp);
+ }
+
+ XMEMSET(&r[24], 0, sizeof(sp_digit) * 24);
+ sp_3072_mont_reduce_avx2_24(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_24(r, m) >= 0);
+ sp_3072_cond_sub_avx2_24(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL)
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return err;
+}
+#endif /* HAVE_INTEL_AVX2 */
+
+#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 3072 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 48);
+
+ /* r = 2^n mod m */
+ sp_3072_sub_in_place_48(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+extern sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+extern void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp);
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_3072_mul_48(r, a, b);
+ sp_3072_mont_reduce_48(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_3072_sqr_48(r, a);
+ sp_3072_mont_reduce_48(r, m, mp);
+}
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+extern sp_digit sp_3072_cond_sub_avx2_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+extern void sp_3072_mul_d_avx2_48(sp_digit* r, const sp_digit* a, const sp_digit b);
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ */
+static WC_INLINE sp_digit div_3072_word_48(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ register sp_digit r asm("rax");
+ __asm__ __volatile__ (
+ "divq %3"
+ : "=a" (r)
+ : "d" (d1), "a" (d0), "r" (div)
+ :
+ );
+ return r;
+}
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<48; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 48; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+extern int64_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b);
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[96], t2[49];
+ sp_digit div, r1;
+ int i;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ (void)m;
+
+ div = d[47];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
+ r1 = sp_3072_cmp_48(&t1[48], d) >= 0;
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_3072_cond_sub_avx2_48(&t1[48], &t1[48], d, (sp_digit)0 - r1);
+ else
+#endif
+ sp_3072_cond_sub_48(&t1[48], &t1[48], d, (sp_digit)0 - r1);
+ for (i=47; i>=0; i--) {
+ r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_3072_mul_d_avx2_48(t2, d, r1);
+ else
+#endif
+ sp_3072_mul_d_48(t2, d, r1);
+ t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2);
+ t1[48 + i] -= t2[48];
+ sp_3072_mask_48(t2, d, t1[48 + i]);
+ t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
+ sp_3072_mask_48(t2, d, t1[48 + i]);
+ t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_3072_cmp_48(t1, d) >= 0;
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_3072_cond_sub_avx2_48(r, t1, d, (sp_digit)0 - r1);
+ else
+#endif
+ sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_3072_div_48(a, m, NULL, r);
+}
+
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+extern sp_digit sp_3072_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b);
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_div_48_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[96], t2[49];
+ sp_digit div, r1;
+ int i;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ (void)m;
+
+ div = d[47];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
+ for (i = 47; i > 0; i--) {
+ if (t1[i + 48] != d[i])
+ break;
+ }
+ if (t1[i + 48] >= d[i]) {
+ sp_3072_sub_in_place_48(&t1[48], d);
+ }
+ for (i=47; i>=0; i--) {
+ r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_3072_mul_d_avx2_48(t2, d, r1);
+ else
+#endif
+ sp_3072_mul_d_48(t2, d, r1);
+ t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2);
+ t1[48 + i] -= t2[48];
+ if (t1[48 + i] != 0) {
+ t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], d);
+ if (t1[48 + i] != 0)
+ t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], d);
+ }
+ }
+
+ for (i = 47; i > 0; i--) {
+ if (t1[i] != d[i])
+ break;
+ }
+ if (t1[i] >= d[i]) {
+ sp_3072_sub_48(r, t1, d);
+ }
+ else {
+ XMEMCPY(r, t1, sizeof(*t1) * 48);
+ }
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_3072_mod_48_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_3072_div_48_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][96];
+ sp_digit rt[96];
+#else
+ sp_digit* t[32];
+ sp_digit* rt;
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 33 * 96, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 96;
+ rt = td + 3072;
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_48(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 48);
+ if (reduceA) {
+ err = sp_3072_mod_48(t[1] + 48, a, m);
+ if (err == MP_OKAY)
+ err = sp_3072_mod_48(t[1], t[1], m);
+ }
+ else {
+ XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
+ err = sp_3072_mod_48(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
+ sp_3072_mont_sqr_48(t[16], t[ 8], m, mp);
+ sp_3072_mont_mul_48(t[17], t[ 9], t[ 8], m, mp);
+ sp_3072_mont_sqr_48(t[18], t[ 9], m, mp);
+ sp_3072_mont_mul_48(t[19], t[10], t[ 9], m, mp);
+ sp_3072_mont_sqr_48(t[20], t[10], m, mp);
+ sp_3072_mont_mul_48(t[21], t[11], t[10], m, mp);
+ sp_3072_mont_sqr_48(t[22], t[11], m, mp);
+ sp_3072_mont_mul_48(t[23], t[12], t[11], m, mp);
+ sp_3072_mont_sqr_48(t[24], t[12], m, mp);
+ sp_3072_mont_mul_48(t[25], t[13], t[12], m, mp);
+ sp_3072_mont_sqr_48(t[26], t[13], m, mp);
+ sp_3072_mont_mul_48(t[27], t[14], t[13], m, mp);
+ sp_3072_mont_sqr_48(t[28], t[14], m, mp);
+ sp_3072_mont_mul_48(t[29], t[15], t[14], m, mp);
+ sp_3072_mont_sqr_48(t[30], t[15], m, mp);
+ sp_3072_mont_mul_48(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ if ((bits % 5) == 0) {
+ c -= 5;
+ }
+ else {
+ c -= bits % 5;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
+ for (; i>=0 || c>=5; ) {
+ if (c >= 5) {
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+ else if (c == 0) {
+ n = e[i--];
+ y = (int)(n >> 59);
+ n <<= 5;
+ c = 59;
+ }
+ else {
+ y = (int)(n >> 59);
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+
+ sp_3072_sqr_48(rt, r);
+ sp_3072_mont_reduce_48(rt, m, mp);
+ sp_3072_sqr_48(r, rt);
+ sp_3072_mont_reduce_48(r, m, mp);
+ sp_3072_sqr_48(rt, r);
+ sp_3072_mont_reduce_48(rt, m, mp);
+ sp_3072_sqr_48(r, rt);
+ sp_3072_mont_reduce_48(r, m, mp);
+ sp_3072_sqr_48(rt, r);
+ sp_3072_mont_reduce_48(rt, m, mp);
+
+ sp_3072_mul_48(r, rt, t[y]);
+ sp_3072_mont_reduce_48(r, m, mp);
+ }
+
+ XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
+ sp_3072_mont_reduce_48(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
+ sp_3072_cond_sub_48(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL)
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return err;
+}
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+extern void sp_3072_mont_reduce_avx2_48(sp_digit* a, const sp_digit* m, sp_digit mp);
+#ifdef HAVE_INTEL_AVX2
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_mul_avx2_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_3072_mul_avx2_48(r, a, b);
+ sp_3072_mont_reduce_avx2_48(r, m, mp);
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+#ifdef HAVE_INTEL_AVX2
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_3072_mont_sqr_avx2_48(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_3072_sqr_avx2_48(r, a);
+ sp_3072_mont_reduce_avx2_48(r, m, mp);
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
+#ifdef HAVE_INTEL_AVX2
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_avx2_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][96];
+ sp_digit rt[96];
+#else
+ sp_digit* t[32];
+ sp_digit* rt;
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 33 * 96, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 96;
+ rt = td + 3072;
+#endif
+ norm = t[0];
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_48(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 48);
+ if (reduceA) {
+ err = sp_3072_mod_48(t[1] + 48, a, m);
+ if (err == MP_OKAY)
+ err = sp_3072_mod_48(t[1], t[1], m);
+ }
+ else {
+ XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
+ err = sp_3072_mod_48(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_mont_sqr_avx2_48(t[ 2], t[ 1], m, mp);
+ sp_3072_mont_mul_avx2_48(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_3072_mont_sqr_avx2_48(t[ 4], t[ 2], m, mp);
+ sp_3072_mont_mul_avx2_48(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_3072_mont_sqr_avx2_48(t[ 6], t[ 3], m, mp);
+ sp_3072_mont_mul_avx2_48(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_3072_mont_sqr_avx2_48(t[ 8], t[ 4], m, mp);
+ sp_3072_mont_mul_avx2_48(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_3072_mont_sqr_avx2_48(t[10], t[ 5], m, mp);
+ sp_3072_mont_mul_avx2_48(t[11], t[ 6], t[ 5], m, mp);
+ sp_3072_mont_sqr_avx2_48(t[12], t[ 6], m, mp);
+ sp_3072_mont_mul_avx2_48(t[13], t[ 7], t[ 6], m, mp);
+ sp_3072_mont_sqr_avx2_48(t[14], t[ 7], m, mp);
+ sp_3072_mont_mul_avx2_48(t[15], t[ 8], t[ 7], m, mp);
+ sp_3072_mont_sqr_avx2_48(t[16], t[ 8], m, mp);
+ sp_3072_mont_mul_avx2_48(t[17], t[ 9], t[ 8], m, mp);
+ sp_3072_mont_sqr_avx2_48(t[18], t[ 9], m, mp);
+ sp_3072_mont_mul_avx2_48(t[19], t[10], t[ 9], m, mp);
+ sp_3072_mont_sqr_avx2_48(t[20], t[10], m, mp);
+ sp_3072_mont_mul_avx2_48(t[21], t[11], t[10], m, mp);
+ sp_3072_mont_sqr_avx2_48(t[22], t[11], m, mp);
+ sp_3072_mont_mul_avx2_48(t[23], t[12], t[11], m, mp);
+ sp_3072_mont_sqr_avx2_48(t[24], t[12], m, mp);
+ sp_3072_mont_mul_avx2_48(t[25], t[13], t[12], m, mp);
+ sp_3072_mont_sqr_avx2_48(t[26], t[13], m, mp);
+ sp_3072_mont_mul_avx2_48(t[27], t[14], t[13], m, mp);
+ sp_3072_mont_sqr_avx2_48(t[28], t[14], m, mp);
+ sp_3072_mont_mul_avx2_48(t[29], t[15], t[14], m, mp);
+ sp_3072_mont_sqr_avx2_48(t[30], t[15], m, mp);
+ sp_3072_mont_mul_avx2_48(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ if ((bits % 5) == 0) {
+ c -= 5;
+ }
+ else {
+ c -= bits % 5;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
+ for (; i>=0 || c>=5; ) {
+ if (c >= 5) {
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+ else if (c == 0) {
+ n = e[i--];
+ y = (int)(n >> 59);
+ n <<= 5;
+ c = 59;
+ }
+ else {
+ y = (int)(n >> 59);
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+
+ sp_3072_sqr_avx2_48(rt, r);
+ sp_3072_mont_reduce_avx2_48(rt, m, mp);
+ sp_3072_sqr_avx2_48(r, rt);
+ sp_3072_mont_reduce_avx2_48(r, m, mp);
+ sp_3072_sqr_avx2_48(rt, r);
+ sp_3072_mont_reduce_avx2_48(rt, m, mp);
+ sp_3072_sqr_avx2_48(r, rt);
+ sp_3072_mont_reduce_avx2_48(r, m, mp);
+ sp_3072_sqr_avx2_48(rt, r);
+ sp_3072_mont_reduce_avx2_48(rt, m, mp);
+
+ sp_3072_mul_avx2_48(r, rt, t[y]);
+ sp_3072_mont_reduce_avx2_48(r, m, mp);
+ }
+
+ XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
+ sp_3072_mont_reduce_avx2_48(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
+ sp_3072_cond_sub_avx2_48(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL)
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return err;
+}
+#endif /* HAVE_INTEL_AVX2 */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit ad[96], md[48], rd[96];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* a;
+ sp_digit *ah;
+ sp_digit* m;
+ sp_digit* r;
+ sp_digit e = 0;
+ int err = MP_OKAY;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ if (*outLen < 384)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (mp_count_bits(em) > 64 || inLen > 384 ||
+ mp_count_bits(mm) != 3072))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 48 * 2;
+ m = r + 48 * 2;
+ ah = a + 48;
+ }
+#else
+ a = ad;
+ m = md;
+ r = rd;
+ ah = a + 48;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_3072_from_bin(ah, 48, in, inLen);
+#if DIGIT_BIT >= 64
+ e = em->dp[0];
+#else
+ e = em->dp[0];
+ if (em->used > 1)
+ e |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+#endif
+ if (e == 0)
+ err = MP_EXPTMOD_E;
+ }
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(m, 48, mm);
+
+ if (e == 0x3) {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ if (err == MP_OKAY) {
+ sp_3072_sqr_avx2_48(r, ah);
+ err = sp_3072_mod_48_cond(r, r, m);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_mul_avx2_48(r, ah, r);
+ err = sp_3072_mod_48_cond(r, r, m);
+ }
+ }
+ else
+#endif
+ {
+ if (err == MP_OKAY) {
+ sp_3072_sqr_48(r, ah);
+ err = sp_3072_mod_48_cond(r, r, m);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_mul_48(r, ah, r);
+ err = sp_3072_mod_48_cond(r, r, m);
+ }
+ }
+ }
+ else {
+ int i;
+ sp_digit mp;
+
+ sp_3072_mont_setup(m, &mp);
+
+ /* Convert to Montgomery form. */
+ XMEMSET(a, 0, sizeof(sp_digit) * 48);
+ err = sp_3072_mod_48_cond(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i=63; i>=0; i--) {
+ if (e >> i) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 48);
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ for (i--; i>=0; i--) {
+ sp_3072_mont_sqr_avx2_48(r, r, m, mp);
+ if (((e >> i) & 1) == 1) {
+ sp_3072_mont_mul_avx2_48(r, r, a, m, mp);
+ }
+ }
+ XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
+ sp_3072_mont_reduce_avx2_48(r, m, mp);
+ }
+ else
+#endif
+ {
+ for (i--; i>=0; i--) {
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ if (((e >> i) & 1) == 1) {
+ sp_3072_mont_mul_48(r, r, a, m, mp);
+ }
+ }
+ XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
+ sp_3072_mont_reduce_48(r, m, mp);
+ }
+
+ for (i = 47; i > 0; i--) {
+ if (r[i] != m[i])
+ break;
+ }
+ if (r[i] >= m[i])
+ sp_3072_sub_in_place_48(r, m);
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL)
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+#endif
+
+ return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+ sp_digit a[96], d[48], m[48];
+#else
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+#endif
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 384U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 3072) {
+ err = MP_READ_E;
+ }
+ if (inLen > 384U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 3072) {
+ err = MP_READ_E;
+ }
+ }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ a = d + 48;
+ m = a + 96;
+#endif
+ r = a;
+
+ sp_3072_from_bin(a, 48, in, inLen);
+ sp_3072_from_mp(d, 48, dm);
+ sp_3072_from_mp(m, 48, mm);
+ err = sp_3072_mod_exp_48(r, a, d, 3072, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 48);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#else
+ XMEMSET(d, 0, sizeof(sp_digit) * 48);
+#endif
+
+ return err;
+}
+
+#else
+extern sp_digit sp_3072_cond_add_24(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+extern sp_digit sp_3072_cond_add_avx2_24(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit ad[48 * 2];
+ sp_digit pd[24], qd[24], dpd[24];
+ sp_digit tmpad[48], tmpbd[48];
+#else
+ sp_digit* t = NULL;
+#endif
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* dq;
+ sp_digit* qi;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+ sp_digit* r;
+ sp_digit c;
+ int err = MP_OKAY;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 384)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 24 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL)
+ err = MEMORY_E;
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 48 * 2;
+ q = p + 24;
+ qi = dq = dp = q + 24;
+ tmpa = qi + 24;
+ tmpb = tmpa + 48;
+
+ r = t + 48;
+ }
+#else
+ r = a = ad;
+ p = pd;
+ q = qd;
+ qi = dq = dp = dpd;
+ tmpa = tmpad;
+ tmpb = tmpbd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_3072_from_bin(a, 48, in, inLen);
+ sp_3072_from_mp(p, 24, pm);
+ sp_3072_from_mp(q, 24, qm);
+ sp_3072_from_mp(dp, 24, dpm);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_3072_mod_exp_avx2_24(tmpa, a, dp, 1536, p, 1);
+ else
+#endif
+ err = sp_3072_mod_exp_24(tmpa, a, dp, 1536, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(dq, 24, dqm);
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_3072_mod_exp_avx2_24(tmpb, a, dq, 1536, q, 1);
+ else
+#endif
+ err = sp_3072_mod_exp_24(tmpb, a, dq, 1536, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ c = sp_3072_sub_in_place_24(tmpa, tmpb);
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ c += sp_3072_cond_add_avx2_24(tmpa, tmpa, p, c);
+ sp_3072_cond_add_avx2_24(tmpa, tmpa, p, c);
+ }
+ else
+#endif
+ {
+ c += sp_3072_cond_add_24(tmpa, tmpa, p, c);
+ sp_3072_cond_add_24(tmpa, tmpa, p, c);
+ }
+
+ sp_3072_from_mp(qi, 24, qim);
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ sp_3072_mul_avx2_24(tmpa, tmpa, qi);
+ }
+ else
+#endif
+ {
+ sp_3072_mul_24(tmpa, tmpa, qi);
+ }
+ err = sp_3072_mod_24(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ sp_3072_mul_avx2_24(tmpa, q, tmpa);
+ }
+ else
+#endif
+ {
+ sp_3072_mul_24(tmpa, q, tmpa);
+ }
+ XMEMSET(&tmpb[24], 0, sizeof(sp_digit) * 24);
+ sp_3072_add_48(r, tmpb, tmpa);
+
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 24 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+#else
+ XMEMSET(tmpad, 0, sizeof(tmpad));
+ XMEMSET(tmpbd, 0, sizeof(tmpbd));
+ XMEMSET(pd, 0, sizeof(pd));
+ XMEMSET(qd, 0, sizeof(qd));
+ XMEMSET(dpd, 0, sizeof(dpd));
+#endif
+
+ return err;
+}
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_3072_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 64
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 48);
+ r->used = 48;
+ mp_clamp(r);
+#elif DIGIT_BIT < 64
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 48; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 64) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 64 - s;
+ }
+ r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 48; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 64 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 64 - s;
+ }
+ else {
+ s += 64;
+ }
+ }
+ r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[96], e[48], m[48];
+ sp_digit* r = b;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 3072 || expBits > 3072 ||
+ mp_count_bits(mod) != 3072) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 48, base);
+ sp_3072_from_mp(e, 48, exp);
+ sp_3072_from_mp(m, 48, mod);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_3072_mod_exp_avx2_48(r, b, e, expBits, m, 0);
+ else
+#endif
+ err = sp_3072_mod_exp_48(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_3072_to_mp(r, res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+#ifdef HAVE_FFDHE_3072
+extern void sp_3072_lshift_48(sp_digit* r, const sp_digit* a, int n);
+#ifdef HAVE_INTEL_AVX2
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_2_avx2_48(sp_digit* r, const sp_digit* e, int bits,
+ const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[96];
+ sp_digit td[49];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 145, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 96;
+#else
+ norm = nd;
+ tmp = td;
+#endif
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_48(norm, m);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ if ((bits % 6) == 0) {
+ c -= 6;
+ }
+ else {
+ c -= bits % 6;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ sp_3072_lshift_48(r, norm, y);
+ for (; i>=0 || c>=6; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = (int)(n >> 58);
+ n <<= 6;
+ c = 58;
+ }
+ else if (c < 6) {
+ y = (int)(n >> 58);
+ n = e[i--];
+ c = 6 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+ else {
+ y = (n >> 58) & 0x3f;
+ n <<= 6;
+ c -= 6;
+ }
+
+ sp_3072_mont_sqr_avx2_48(r, r, m, mp);
+ sp_3072_mont_sqr_avx2_48(r, r, m, mp);
+ sp_3072_mont_sqr_avx2_48(r, r, m, mp);
+ sp_3072_mont_sqr_avx2_48(r, r, m, mp);
+ sp_3072_mont_sqr_avx2_48(r, r, m, mp);
+ sp_3072_mont_sqr_avx2_48(r, r, m, mp);
+
+ sp_3072_lshift_48(r, r, y);
+ sp_3072_mul_d_avx2_48(tmp, norm, r[48]);
+ r[48] = 0;
+ o = sp_3072_add_48(r, r, tmp);
+ sp_3072_cond_sub_avx2_48(r, r, m, (sp_digit)0 - o);
+ }
+
+ XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
+ sp_3072_mont_reduce_avx2_48(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
+ sp_3072_cond_sub_avx2_48(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL)
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return err;
+}
+#endif /* HAVE_INTEL_AVX2 */
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_3072_mod_exp_2_48(sp_digit* r, const sp_digit* e, int bits,
+ const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[96];
+ sp_digit td[49];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 145, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 96;
+#else
+ norm = nd;
+ tmp = td;
+#endif
+
+ sp_3072_mont_setup(m, &mp);
+ sp_3072_mont_norm_48(norm, m);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ if ((bits % 6) == 0) {
+ c -= 6;
+ }
+ else {
+ c -= bits % 6;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ sp_3072_lshift_48(r, norm, y);
+ for (; i>=0 || c>=6; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = (int)(n >> 58);
+ n <<= 6;
+ c = 58;
+ }
+ else if (c < 6) {
+ y = (int)(n >> 58);
+ n = e[i--];
+ c = 6 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+ else {
+ y = (n >> 58) & 0x3f;
+ n <<= 6;
+ c -= 6;
+ }
+
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+ sp_3072_mont_sqr_48(r, r, m, mp);
+
+ sp_3072_lshift_48(r, r, y);
+ sp_3072_mul_d_48(tmp, norm, r[48]);
+ r[48] = 0;
+ o = sp_3072_add_48(r, r, tmp);
+ sp_3072_cond_sub_48(r, r, m, (sp_digit)0 - o);
+ }
+
+ XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
+ sp_3072_mont_reduce_48(r, m, mp);
+
+ mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
+ sp_3072_cond_sub_48(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL)
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return err;
+}
+
+#endif /* HAVE_FFDHE_3072 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 384 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+ int err = MP_OKAY;
+ sp_digit b[96], e[48], m[48];
+ sp_digit* r = b;
+ word32 i;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ if (mp_count_bits(base) > 3072 || expLen > 384 ||
+ mp_count_bits(mod) != 3072) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 48, base);
+ sp_3072_from_bin(e, 48, exp, expLen);
+ sp_3072_from_mp(m, 48, mod);
+
+ #ifdef HAVE_FFDHE_3072
+ if (base->used == 1 && base->dp[0] == 2 && m[47] == (sp_digit)-1) {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_3072_mod_exp_2_avx2_48(r, e, expLen * 8, m);
+ else
+#endif
+ err = sp_3072_mod_exp_2_48(r, e, expLen * 8, m);
+ }
+ else
+ #endif
+ {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_3072_mod_exp_avx2_48(r, b, e, expLen * 8, m, 0);
+ else
+#endif
+ err = sp_3072_mod_exp_48(r, b, e, expLen * 8, m, 0);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_to_bin(r, out);
+ *outLen = 384;
+ for (i=0; i<384 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+#endif
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[48], e[24], m[24];
+ sp_digit* r = b;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 1536 || expBits > 1536 ||
+ mp_count_bits(mod) != 1536) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ sp_3072_from_mp(b, 24, base);
+ sp_3072_from_mp(e, 24, exp);
+ sp_3072_from_mp(m, 24, mod);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_3072_mod_exp_avx2_24(r, b, e, expBits, m, 0);
+ else
+#endif
+ err = sp_3072_mod_exp_24(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ XMEMSET(r + 24, 0, sizeof(*r) * 24);
+ err = sp_3072_to_mp(r, res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* !WOLFSSL_SP_NO_3072 */
+
+#ifdef WOLFSSL_SP_4096
+extern void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n);
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 64
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 64
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffffffffffffl;
+ s = 64U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 64U) <= (word32)DIGIT_BIT) {
+ s += 64U;
+ r[j] &= 0xffffffffffffffffl;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 64) {
+ r[j] &= 0xffffffffffffffffl;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 64 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+extern void sp_4096_to_bin(sp_digit* r, byte* a);
+extern sp_digit sp_4096_sub_in_place_64(sp_digit* a, const sp_digit* b);
+extern sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b);
+extern void sp_4096_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b);
+
+extern sp_digit sp_2048_dbl_32(sp_digit* r, const sp_digit* a);
+extern void sp_4096_sqr_64(sp_digit* r, const sp_digit* a);
+
+#ifdef HAVE_INTEL_AVX2
+extern void sp_4096_mul_avx2_64(sp_digit* r, const sp_digit* a, const sp_digit* b);
+#endif /* HAVE_INTEL_AVX2 */
+
+#ifdef HAVE_INTEL_AVX2
+extern void sp_4096_sqr_avx2_64(sp_digit* r, const sp_digit* a);
+#endif /* HAVE_INTEL_AVX2 */
+
+/* Caclulate the bottom digit of -1/a mod 2^n.
+ *
+ * a A single precision number.
+ * rho Bottom word of inverse.
+ */
+static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho)
+{
+ sp_digit x, b;
+
+ b = a[0];
+ x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**8 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**16 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**32 */
+ x *= 2 - b * x; /* here x*a==1 mod 2**64 */
+
+ /* rho = -1/m mod b */
+ *rho = -x;
+}
+
+extern void sp_4096_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b);
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+/* r = 2^n mod m where n is the number of bits to reduce by.
+ * Given m must be 4096 bits, just need to subtract.
+ *
+ * r A single precision number.
+ * m A single precision number.
+ */
+static void sp_4096_mont_norm_64(sp_digit* r, const sp_digit* m)
+{
+ XMEMSET(r, 0, sizeof(sp_digit) * 64);
+
+ /* r = 2^n mod m */
+ sp_4096_sub_in_place_64(r, m);
+}
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+extern sp_digit sp_4096_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+extern void sp_4096_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp);
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_4096_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_4096_mul_64(r, a, b);
+ sp_4096_mont_reduce_64(r, m, mp);
+}
+
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_4096_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_4096_sqr_64(r, a);
+ sp_4096_mont_reduce_64(r, m, mp);
+}
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+extern sp_digit sp_4096_cond_sub_avx2_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+extern void sp_4096_mul_d_avx2_64(sp_digit* r, const sp_digit* a, const sp_digit b);
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ */
+static WC_INLINE sp_digit div_4096_word_64(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ register sp_digit r asm("rax");
+ __asm__ __volatile__ (
+ "divq %3"
+ : "=a" (r)
+ : "d" (d1), "a" (d0), "r" (div)
+ :
+ );
+ return r;
+}
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<64; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ int i;
+
+ for (i = 0; i < 64; i += 8) {
+ r[i+0] = a[i+0] & m;
+ r[i+1] = a[i+1] & m;
+ r[i+2] = a[i+2] & m;
+ r[i+3] = a[i+3] & m;
+ r[i+4] = a[i+4] & m;
+ r[i+5] = a[i+5] & m;
+ r[i+6] = a[i+6] & m;
+ r[i+7] = a[i+7] & m;
+ }
+#endif
+}
+
+extern int64_t sp_4096_cmp_64(const sp_digit* a, const sp_digit* b);
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[128], t2[65];
+ sp_digit div, r1;
+ int i;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ (void)m;
+
+ div = d[63];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
+ r1 = sp_4096_cmp_64(&t1[64], d) >= 0;
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_4096_cond_sub_avx2_64(&t1[64], &t1[64], d, (sp_digit)0 - r1);
+ else
+#endif
+ sp_4096_cond_sub_64(&t1[64], &t1[64], d, (sp_digit)0 - r1);
+ for (i=63; i>=0; i--) {
+ r1 = div_4096_word_64(t1[64 + i], t1[64 + i - 1], div);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_4096_mul_d_avx2_64(t2, d, r1);
+ else
+#endif
+ sp_4096_mul_d_64(t2, d, r1);
+ t1[64 + i] += sp_4096_sub_in_place_64(&t1[i], t2);
+ t1[64 + i] -= t2[64];
+ sp_4096_mask_64(t2, d, t1[64 + i]);
+ t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], t2);
+ sp_4096_mask_64(t2, d, t1[64 + i]);
+ t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_4096_cmp_64(t1, d) >= 0;
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_4096_cond_sub_avx2_64(r, t1, d, (sp_digit)0 - r1);
+ else
+#endif
+ sp_4096_cond_sub_64(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_4096_div_64(a, m, NULL, r);
+}
+
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+extern sp_digit sp_4096_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b);
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[128], t2[65];
+ sp_digit div, r1;
+ int i;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ (void)m;
+
+ div = d[63];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
+ for (i = 63; i > 0; i--) {
+ if (t1[i + 64] != d[i])
+ break;
+ }
+ if (t1[i + 64] >= d[i]) {
+ sp_4096_sub_in_place_64(&t1[64], d);
+ }
+ for (i=63; i>=0; i--) {
+ r1 = div_4096_word_64(t1[64 + i], t1[64 + i - 1], div);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_4096_mul_d_avx2_64(t2, d, r1);
+ else
+#endif
+ sp_4096_mul_d_64(t2, d, r1);
+ t1[64 + i] += sp_4096_sub_in_place_64(&t1[i], t2);
+ t1[64 + i] -= t2[64];
+ if (t1[64 + i] != 0) {
+ t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], d);
+ if (t1[64 + i] != 0)
+ t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], d);
+ }
+ }
+
+ for (i = 63; i > 0; i--) {
+ if (t1[i] != d[i])
+ break;
+ }
+ if (t1[i] >= d[i]) {
+ sp_4096_sub_64(r, t1, d);
+ }
+ else {
+ XMEMCPY(r, t1, sizeof(*t1) * 64);
+ }
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_4096_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_4096_div_64_cond(a, m, NULL, r);
+}
+
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][128];
+ sp_digit rt[128];
+#else
+ sp_digit* t[32];
+ sp_digit* rt;
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 33 * 128, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 128;
+ rt = td + 4096;
+#endif
+ norm = t[0];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_64(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 64);
+ if (reduceA) {
+ err = sp_4096_mod_64(t[1] + 64, a, m);
+ if (err == MP_OKAY)
+ err = sp_4096_mod_64(t[1], t[1], m);
+ }
+ else {
+ XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
+ err = sp_4096_mod_64(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_mont_sqr_64(t[ 2], t[ 1], m, mp);
+ sp_4096_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_4096_mont_sqr_64(t[ 4], t[ 2], m, mp);
+ sp_4096_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_4096_mont_sqr_64(t[ 6], t[ 3], m, mp);
+ sp_4096_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_4096_mont_sqr_64(t[ 8], t[ 4], m, mp);
+ sp_4096_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_4096_mont_sqr_64(t[10], t[ 5], m, mp);
+ sp_4096_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
+ sp_4096_mont_sqr_64(t[12], t[ 6], m, mp);
+ sp_4096_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
+ sp_4096_mont_sqr_64(t[14], t[ 7], m, mp);
+ sp_4096_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
+ sp_4096_mont_sqr_64(t[16], t[ 8], m, mp);
+ sp_4096_mont_mul_64(t[17], t[ 9], t[ 8], m, mp);
+ sp_4096_mont_sqr_64(t[18], t[ 9], m, mp);
+ sp_4096_mont_mul_64(t[19], t[10], t[ 9], m, mp);
+ sp_4096_mont_sqr_64(t[20], t[10], m, mp);
+ sp_4096_mont_mul_64(t[21], t[11], t[10], m, mp);
+ sp_4096_mont_sqr_64(t[22], t[11], m, mp);
+ sp_4096_mont_mul_64(t[23], t[12], t[11], m, mp);
+ sp_4096_mont_sqr_64(t[24], t[12], m, mp);
+ sp_4096_mont_mul_64(t[25], t[13], t[12], m, mp);
+ sp_4096_mont_sqr_64(t[26], t[13], m, mp);
+ sp_4096_mont_mul_64(t[27], t[14], t[13], m, mp);
+ sp_4096_mont_sqr_64(t[28], t[14], m, mp);
+ sp_4096_mont_mul_64(t[29], t[15], t[14], m, mp);
+ sp_4096_mont_sqr_64(t[30], t[15], m, mp);
+ sp_4096_mont_mul_64(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ if ((bits % 5) == 0) {
+ c -= 5;
+ }
+ else {
+ c -= bits % 5;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
+ for (; i>=0 || c>=5; ) {
+ if (c >= 5) {
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+ else if (c == 0) {
+ n = e[i--];
+ y = (int)(n >> 59);
+ n <<= 5;
+ c = 59;
+ }
+ else {
+ y = (int)(n >> 59);
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+
+ sp_4096_sqr_64(rt, r);
+ sp_4096_mont_reduce_64(rt, m, mp);
+ sp_4096_sqr_64(r, rt);
+ sp_4096_mont_reduce_64(r, m, mp);
+ sp_4096_sqr_64(rt, r);
+ sp_4096_mont_reduce_64(rt, m, mp);
+ sp_4096_sqr_64(r, rt);
+ sp_4096_mont_reduce_64(r, m, mp);
+ sp_4096_sqr_64(rt, r);
+ sp_4096_mont_reduce_64(rt, m, mp);
+
+ sp_4096_mul_64(r, rt, t[y]);
+ sp_4096_mont_reduce_64(r, m, mp);
+ }
+
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
+ sp_4096_mont_reduce_64(r, m, mp);
+
+ mask = 0 - (sp_4096_cmp_64(r, m) >= 0);
+ sp_4096_cond_sub_64(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL)
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return err;
+}
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+extern void sp_4096_mont_reduce_avx2_64(sp_digit* a, const sp_digit* m, sp_digit mp);
+#ifdef HAVE_INTEL_AVX2
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_4096_mont_mul_avx2_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_4096_mul_avx2_64(r, a, b);
+ sp_4096_mont_reduce_avx2_64(r, m, mp);
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+#ifdef HAVE_INTEL_AVX2
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_4096_mont_sqr_avx2_64(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_4096_sqr_avx2_64(r, a);
+ sp_4096_mont_reduce_avx2_64(r, m, mp);
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
+#ifdef HAVE_INTEL_AVX2
+/* Modular exponentiate a to the e mod m. (r = a^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * a A single precision number being exponentiated.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_avx2_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
+ int bits, const sp_digit* m, int reduceA)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit t[32][128];
+ sp_digit rt[128];
+#else
+ sp_digit* t[32];
+ sp_digit* rt;
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit mp = 1;
+ sp_digit n;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 33 * 128, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ for (i=0; i<32; i++)
+ t[i] = td + i * 128;
+ rt = td + 4096;
+#endif
+ norm = t[0];
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_64(norm, m);
+
+ XMEMSET(t[1], 0, sizeof(sp_digit) * 64);
+ if (reduceA) {
+ err = sp_4096_mod_64(t[1] + 64, a, m);
+ if (err == MP_OKAY)
+ err = sp_4096_mod_64(t[1], t[1], m);
+ }
+ else {
+ XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
+ err = sp_4096_mod_64(t[1], t[1], m);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_mont_sqr_avx2_64(t[ 2], t[ 1], m, mp);
+ sp_4096_mont_mul_avx2_64(t[ 3], t[ 2], t[ 1], m, mp);
+ sp_4096_mont_sqr_avx2_64(t[ 4], t[ 2], m, mp);
+ sp_4096_mont_mul_avx2_64(t[ 5], t[ 3], t[ 2], m, mp);
+ sp_4096_mont_sqr_avx2_64(t[ 6], t[ 3], m, mp);
+ sp_4096_mont_mul_avx2_64(t[ 7], t[ 4], t[ 3], m, mp);
+ sp_4096_mont_sqr_avx2_64(t[ 8], t[ 4], m, mp);
+ sp_4096_mont_mul_avx2_64(t[ 9], t[ 5], t[ 4], m, mp);
+ sp_4096_mont_sqr_avx2_64(t[10], t[ 5], m, mp);
+ sp_4096_mont_mul_avx2_64(t[11], t[ 6], t[ 5], m, mp);
+ sp_4096_mont_sqr_avx2_64(t[12], t[ 6], m, mp);
+ sp_4096_mont_mul_avx2_64(t[13], t[ 7], t[ 6], m, mp);
+ sp_4096_mont_sqr_avx2_64(t[14], t[ 7], m, mp);
+ sp_4096_mont_mul_avx2_64(t[15], t[ 8], t[ 7], m, mp);
+ sp_4096_mont_sqr_avx2_64(t[16], t[ 8], m, mp);
+ sp_4096_mont_mul_avx2_64(t[17], t[ 9], t[ 8], m, mp);
+ sp_4096_mont_sqr_avx2_64(t[18], t[ 9], m, mp);
+ sp_4096_mont_mul_avx2_64(t[19], t[10], t[ 9], m, mp);
+ sp_4096_mont_sqr_avx2_64(t[20], t[10], m, mp);
+ sp_4096_mont_mul_avx2_64(t[21], t[11], t[10], m, mp);
+ sp_4096_mont_sqr_avx2_64(t[22], t[11], m, mp);
+ sp_4096_mont_mul_avx2_64(t[23], t[12], t[11], m, mp);
+ sp_4096_mont_sqr_avx2_64(t[24], t[12], m, mp);
+ sp_4096_mont_mul_avx2_64(t[25], t[13], t[12], m, mp);
+ sp_4096_mont_sqr_avx2_64(t[26], t[13], m, mp);
+ sp_4096_mont_mul_avx2_64(t[27], t[14], t[13], m, mp);
+ sp_4096_mont_sqr_avx2_64(t[28], t[14], m, mp);
+ sp_4096_mont_mul_avx2_64(t[29], t[15], t[14], m, mp);
+ sp_4096_mont_sqr_avx2_64(t[30], t[15], m, mp);
+ sp_4096_mont_mul_avx2_64(t[31], t[16], t[15], m, mp);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ if ((bits % 5) == 0) {
+ c -= 5;
+ }
+ else {
+ c -= bits % 5;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
+ for (; i>=0 || c>=5; ) {
+ if (c >= 5) {
+ y = (n >> 59) & 0x1f;
+ n <<= 5;
+ c -= 5;
+ }
+ else if (c == 0) {
+ n = e[i--];
+ y = (int)(n >> 59);
+ n <<= 5;
+ c = 59;
+ }
+ else {
+ y = (int)(n >> 59);
+ n = e[i--];
+ c = 5 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+
+ sp_4096_sqr_avx2_64(rt, r);
+ sp_4096_mont_reduce_avx2_64(rt, m, mp);
+ sp_4096_sqr_avx2_64(r, rt);
+ sp_4096_mont_reduce_avx2_64(r, m, mp);
+ sp_4096_sqr_avx2_64(rt, r);
+ sp_4096_mont_reduce_avx2_64(rt, m, mp);
+ sp_4096_sqr_avx2_64(r, rt);
+ sp_4096_mont_reduce_avx2_64(r, m, mp);
+ sp_4096_sqr_avx2_64(rt, r);
+ sp_4096_mont_reduce_avx2_64(rt, m, mp);
+
+ sp_4096_mul_avx2_64(r, rt, t[y]);
+ sp_4096_mont_reduce_avx2_64(r, m, mp);
+ }
+
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
+ sp_4096_mont_reduce_avx2_64(r, m, mp);
+
+ mask = 0 - (sp_4096_cmp_64(r, m) >= 0);
+ sp_4096_cond_sub_avx2_64(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL)
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return err;
+}
+#endif /* HAVE_INTEL_AVX2 */
+#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
+
+#ifdef WOLFSSL_HAVE_SP_RSA
+/* RSA public key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * em Public exponent.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit ad[128], md[64], rd[128];
+#else
+ sp_digit* d = NULL;
+#endif
+ sp_digit* a;
+ sp_digit *ah;
+ sp_digit* m;
+ sp_digit* r;
+ sp_digit e = 0;
+ int err = MP_OKAY;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ if (*outLen < 512)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (mp_count_bits(em) > 64 || inLen > 512 ||
+ mp_count_bits(mm) != 4096))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL)
+ err = MEMORY_E;
+ }
+
+ if (err == MP_OKAY) {
+ a = d;
+ r = a + 64 * 2;
+ m = r + 64 * 2;
+ ah = a + 64;
+ }
+#else
+ a = ad;
+ m = md;
+ r = rd;
+ ah = a + 64;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_4096_from_bin(ah, 64, in, inLen);
+#if DIGIT_BIT >= 64
+ e = em->dp[0];
+#else
+ e = em->dp[0];
+ if (em->used > 1)
+ e |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
+#endif
+ if (e == 0)
+ err = MP_EXPTMOD_E;
+ }
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(m, 64, mm);
+
+ if (e == 0x3) {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ if (err == MP_OKAY) {
+ sp_4096_sqr_avx2_64(r, ah);
+ err = sp_4096_mod_64_cond(r, r, m);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_mul_avx2_64(r, ah, r);
+ err = sp_4096_mod_64_cond(r, r, m);
+ }
+ }
+ else
+#endif
+ {
+ if (err == MP_OKAY) {
+ sp_4096_sqr_64(r, ah);
+ err = sp_4096_mod_64_cond(r, r, m);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_mul_64(r, ah, r);
+ err = sp_4096_mod_64_cond(r, r, m);
+ }
+ }
+ }
+ else {
+ int i;
+ sp_digit mp;
+
+ sp_4096_mont_setup(m, &mp);
+
+ /* Convert to Montgomery form. */
+ XMEMSET(a, 0, sizeof(sp_digit) * 64);
+ err = sp_4096_mod_64_cond(a, a, m);
+
+ if (err == MP_OKAY) {
+ for (i=63; i>=0; i--) {
+ if (e >> i) {
+ break;
+ }
+ }
+
+ XMEMCPY(r, a, sizeof(sp_digit) * 64);
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ for (i--; i>=0; i--) {
+ sp_4096_mont_sqr_avx2_64(r, r, m, mp);
+ if (((e >> i) & 1) == 1) {
+ sp_4096_mont_mul_avx2_64(r, r, a, m, mp);
+ }
+ }
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
+ sp_4096_mont_reduce_avx2_64(r, m, mp);
+ }
+ else
+#endif
+ {
+ for (i--; i>=0; i--) {
+ sp_4096_mont_sqr_64(r, r, m, mp);
+ if (((e >> i) & 1) == 1) {
+ sp_4096_mont_mul_64(r, r, a, m, mp);
+ }
+ }
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
+ sp_4096_mont_reduce_64(r, m, mp);
+ }
+
+ for (i = 63; i > 0; i--) {
+ if (r[i] != m[i])
+ break;
+ }
+ if (r[i] >= m[i])
+ sp_4096_sub_in_place_64(r, m);
+ }
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL)
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+#endif
+
+ return err;
+}
+
+#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
+ sp_digit a[128], d[64], m[64];
+#else
+ sp_digit* d = NULL;
+ sp_digit* a;
+ sp_digit* m;
+#endif
+ sp_digit* r;
+ int err = MP_OKAY;
+
+ (void)pm;
+ (void)qm;
+ (void)dpm;
+ (void)dqm;
+ (void)qim;
+
+ if (*outLen < 512U) {
+ err = MP_TO_E;
+ }
+ if (err == MP_OKAY) {
+ if (mp_count_bits(dm) > 4096) {
+ err = MP_READ_E;
+ }
+ if (inLen > 512U) {
+ err = MP_READ_E;
+ }
+ if (mp_count_bits(mm) != 4096) {
+ err = MP_READ_E;
+ }
+ }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 4, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ a = d + 64;
+ m = a + 128;
+#endif
+ r = a;
+
+ sp_4096_from_bin(a, 64, in, inLen);
+ sp_4096_from_mp(d, 64, dm);
+ sp_4096_from_mp(m, 64, mm);
+ err = sp_4096_mod_exp_64(r, a, d, 4096, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 64);
+ XFREE(d, NULL, DYNAMIC_TYPE_RSA);
+ }
+#else
+ XMEMSET(d, 0, sizeof(sp_digit) * 64);
+#endif
+
+ return err;
+}
+
+#else
+extern sp_digit sp_4096_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+extern sp_digit sp_4096_cond_add_avx2_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+/* RSA private key operation.
+ *
+ * in Array of bytes representing the number to exponentiate, base.
+ * inLen Number of bytes in base.
+ * dm Private exponent.
+ * pm First prime.
+ * qm Second prime.
+ * dpm First prime's CRT exponent.
+ * dqm Second prime's CRT exponent.
+ * qim Inverse of second prime mod p.
+ * mm Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Number of bytes in result.
+ * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
+ * an array is too long and MEMORY_E when dynamic memory allocation fails.
+ */
+int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm,
+ mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
+ byte* out, word32* outLen)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit ad[64 * 2];
+ sp_digit pd[32], qd[32], dpd[32];
+ sp_digit tmpad[64], tmpbd[64];
+#else
+ sp_digit* t = NULL;
+#endif
+ sp_digit* a;
+ sp_digit* p;
+ sp_digit* q;
+ sp_digit* dp;
+ sp_digit* dq;
+ sp_digit* qi;
+ sp_digit* tmpa;
+ sp_digit* tmpb;
+ sp_digit* r;
+ sp_digit c;
+ int err = MP_OKAY;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ (void)dm;
+ (void)mm;
+
+ if (*outLen < 512)
+ err = MP_TO_E;
+ if (err == MP_OKAY && (inLen > 512 || mp_count_bits(mm) != 4096))
+ err = MP_READ_E;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL,
+ DYNAMIC_TYPE_RSA);
+ if (t == NULL)
+ err = MEMORY_E;
+ }
+ if (err == MP_OKAY) {
+ a = t;
+ p = a + 64 * 2;
+ q = p + 32;
+ qi = dq = dp = q + 32;
+ tmpa = qi + 32;
+ tmpb = tmpa + 64;
+
+ r = t + 64;
+ }
+#else
+ r = a = ad;
+ p = pd;
+ q = qd;
+ qi = dq = dp = dpd;
+ tmpa = tmpad;
+ tmpb = tmpbd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_4096_from_bin(a, 64, in, inLen);
+ sp_4096_from_mp(p, 32, pm);
+ sp_4096_from_mp(q, 32, qm);
+ sp_4096_from_mp(dp, 32, dpm);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_2048_mod_exp_avx2_32(tmpa, a, dp, 2048, p, 1);
+ else
+#endif
+ err = sp_2048_mod_exp_32(tmpa, a, dp, 2048, p, 1);
+ }
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(dq, 32, dqm);
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_2048_mod_exp_avx2_32(tmpb, a, dq, 2048, q, 1);
+ else
+#endif
+ err = sp_2048_mod_exp_32(tmpb, a, dq, 2048, q, 1);
+ }
+
+ if (err == MP_OKAY) {
+ c = sp_2048_sub_in_place_32(tmpa, tmpb);
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ c += sp_4096_cond_add_avx2_32(tmpa, tmpa, p, c);
+ sp_4096_cond_add_avx2_32(tmpa, tmpa, p, c);
+ }
+ else
+#endif
+ {
+ c += sp_4096_cond_add_32(tmpa, tmpa, p, c);
+ sp_4096_cond_add_32(tmpa, tmpa, p, c);
+ }
+
+ sp_2048_from_mp(qi, 32, qim);
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ sp_2048_mul_avx2_32(tmpa, tmpa, qi);
+ }
+ else
+#endif
+ {
+ sp_2048_mul_32(tmpa, tmpa, qi);
+ }
+ err = sp_2048_mod_32(tmpa, tmpa, p);
+ }
+
+ if (err == MP_OKAY) {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ sp_2048_mul_avx2_32(tmpa, q, tmpa);
+ }
+ else
+#endif
+ {
+ sp_2048_mul_32(tmpa, q, tmpa);
+ }
+ XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32);
+ sp_4096_add_64(r, tmpb, tmpa);
+
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XMEMSET(t, 0, sizeof(sp_digit) * 32 * 11);
+ XFREE(t, NULL, DYNAMIC_TYPE_RSA);
+ }
+#else
+ XMEMSET(tmpad, 0, sizeof(tmpad));
+ XMEMSET(tmpbd, 0, sizeof(tmpbd));
+ XMEMSET(pd, 0, sizeof(pd));
+ XMEMSET(qd, 0, sizeof(qd));
+ XMEMSET(dpd, 0, sizeof(dpd));
+#endif
+
+ return err;
+}
+#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
+#endif /* WOLFSSL_HAVE_SP_RSA */
+#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
+ !defined(WOLFSSL_RSA_PUBLIC_ONLY))
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_4096_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 64
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 64);
+ r->used = 64;
+ mp_clamp(r);
+#elif DIGIT_BIT < 64
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 64; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 64) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 64 - s;
+ }
+ r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 64; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 64 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 64 - s;
+ }
+ else {
+ s += 64;
+ }
+ }
+ r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base. MP integer.
+ * exp Exponent. MP integer.
+ * mod Modulus. MP integer.
+ * res Result. MP integer.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+ int err = MP_OKAY;
+ sp_digit b[128], e[64], m[64];
+ sp_digit* r = b;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+ int expBits = mp_count_bits(exp);
+
+ if (mp_count_bits(base) > 4096 || expBits > 4096 ||
+ mp_count_bits(mod) != 4096) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(b, 64, base);
+ sp_4096_from_mp(e, 64, exp);
+ sp_4096_from_mp(m, 64, mod);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_4096_mod_exp_avx2_64(r, b, e, expBits, m, 0);
+ else
+#endif
+ err = sp_4096_mod_exp_64(r, b, e, expBits, m, 0);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_4096_to_mp(r, res);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+
+#ifdef WOLFSSL_HAVE_SP_DH
+#ifdef HAVE_FFDHE_4096
+extern void sp_4096_lshift_64(sp_digit* r, const sp_digit* a, int n);
+#ifdef HAVE_INTEL_AVX2
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_2_avx2_64(sp_digit* r, const sp_digit* e, int bits,
+ const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[128];
+ sp_digit td[65];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 128;
+#else
+ norm = nd;
+ tmp = td;
+#endif
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_64(norm, m);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ if ((bits % 6) == 0) {
+ c -= 6;
+ }
+ else {
+ c -= bits % 6;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ sp_4096_lshift_64(r, norm, y);
+ for (; i>=0 || c>=6; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = (int)(n >> 58);
+ n <<= 6;
+ c = 58;
+ }
+ else if (c < 6) {
+ y = (int)(n >> 58);
+ n = e[i--];
+ c = 6 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+ else {
+ y = (n >> 58) & 0x3f;
+ n <<= 6;
+ c -= 6;
+ }
+
+ sp_4096_mont_sqr_avx2_64(r, r, m, mp);
+ sp_4096_mont_sqr_avx2_64(r, r, m, mp);
+ sp_4096_mont_sqr_avx2_64(r, r, m, mp);
+ sp_4096_mont_sqr_avx2_64(r, r, m, mp);
+ sp_4096_mont_sqr_avx2_64(r, r, m, mp);
+ sp_4096_mont_sqr_avx2_64(r, r, m, mp);
+
+ sp_4096_lshift_64(r, r, y);
+ sp_4096_mul_d_avx2_64(tmp, norm, r[64]);
+ r[64] = 0;
+ o = sp_4096_add_64(r, r, tmp);
+ sp_4096_cond_sub_avx2_64(r, r, m, (sp_digit)0 - o);
+ }
+
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
+ sp_4096_mont_reduce_avx2_64(r, m, mp);
+
+ mask = 0 - (sp_4096_cmp_64(r, m) >= 0);
+ sp_4096_cond_sub_avx2_64(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL)
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return err;
+}
+#endif /* HAVE_INTEL_AVX2 */
+
+/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
+ *
+ * r A single precision number that is the result of the operation.
+ * e A single precision number that is the exponent.
+ * bits The number of bits in the exponent.
+ * m A single precision number that is the modulus.
+ * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
+ */
+static int sp_4096_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits,
+ const sp_digit* m)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ sp_digit nd[128];
+ sp_digit td[65];
+#else
+ sp_digit* td;
+#endif
+ sp_digit* norm;
+ sp_digit* tmp;
+ sp_digit mp = 1;
+ sp_digit n, o;
+ sp_digit mask;
+ int i;
+ int c, y;
+ int err = MP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ norm = td;
+ tmp = td + 128;
+#else
+ norm = nd;
+ tmp = td;
+#endif
+
+ sp_4096_mont_setup(m, &mp);
+ sp_4096_mont_norm_64(norm, m);
+
+ i = (bits - 1) / 64;
+ n = e[i--];
+ c = bits & 63;
+ if (c == 0) {
+ c = 64;
+ }
+ if ((bits % 6) == 0) {
+ c -= 6;
+ }
+ else {
+ c -= bits % 6;
+ }
+ y = (int)(n >> c);
+ n <<= 64 - c;
+ sp_4096_lshift_64(r, norm, y);
+ for (; i>=0 || c>=6; ) {
+ if (c == 0) {
+ n = e[i--];
+ y = (int)(n >> 58);
+ n <<= 6;
+ c = 58;
+ }
+ else if (c < 6) {
+ y = (int)(n >> 58);
+ n = e[i--];
+ c = 6 - c;
+ y |= n >> (64 - c);
+ n <<= c;
+ c = 64 - c;
+ }
+ else {
+ y = (n >> 58) & 0x3f;
+ n <<= 6;
+ c -= 6;
+ }
+
+ sp_4096_mont_sqr_64(r, r, m, mp);
+ sp_4096_mont_sqr_64(r, r, m, mp);
+ sp_4096_mont_sqr_64(r, r, m, mp);
+ sp_4096_mont_sqr_64(r, r, m, mp);
+ sp_4096_mont_sqr_64(r, r, m, mp);
+ sp_4096_mont_sqr_64(r, r, m, mp);
+
+ sp_4096_lshift_64(r, r, y);
+ sp_4096_mul_d_64(tmp, norm, r[64]);
+ r[64] = 0;
+ o = sp_4096_add_64(r, r, tmp);
+ sp_4096_cond_sub_64(r, r, m, (sp_digit)0 - o);
+ }
+
+ XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
+ sp_4096_mont_reduce_64(r, m, mp);
+
+ mask = 0 - (sp_4096_cmp_64(r, m) >= 0);
+ sp_4096_cond_sub_64(r, r, m, mask);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ if (td != NULL)
+ XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return err;
+}
+
+#endif /* HAVE_FFDHE_4096 */
+
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base Base.
+ * exp Array of bytes that is the exponent.
+ * expLen Length of data, in bytes, in exponent.
+ * mod Modulus.
+ * out Buffer to hold big-endian bytes of exponentiation result.
+ * Must be at least 512 bytes long.
+ * outLen Length, in bytes, of exponentiation result.
+ * returns 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen,
+ mp_int* mod, byte* out, word32* outLen)
+{
+ int err = MP_OKAY;
+ sp_digit b[128], e[64], m[64];
+ sp_digit* r = b;
+ word32 i;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ if (mp_count_bits(base) > 4096 || expLen > 512 ||
+ mp_count_bits(mod) != 4096) {
+ err = MP_READ_E;
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_from_mp(b, 64, base);
+ sp_4096_from_bin(e, 64, exp, expLen);
+ sp_4096_from_mp(m, 64, mod);
+
+ #ifdef HAVE_FFDHE_4096
+ if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1) {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_4096_mod_exp_2_avx2_64(r, e, expLen * 8, m);
+ else
+#endif
+ err = sp_4096_mod_exp_2_64(r, e, expLen * 8, m);
+ }
+ else
+ #endif
+ {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_4096_mod_exp_avx2_64(r, b, e, expLen * 8, m, 0);
+ else
+#endif
+ err = sp_4096_mod_exp_64(r, b, e, expLen * 8, m, 0);
+ }
+ }
+
+ if (err == MP_OKAY) {
+ sp_4096_to_bin(r, out);
+ *outLen = 512;
+ for (i=0; i<512 && out[i] == 0; i++) {
+ }
+ *outLen -= i;
+ XMEMMOVE(out, out + i, *outLen);
+ }
+
+ XMEMSET(e, 0, sizeof(e));
+
+ return err;
+}
+#endif
+#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
+
+#endif /* WOLFSSL_SP_4096 */
+
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
+#ifdef WOLFSSL_HAVE_SP_ECC
+#ifndef WOLFSSL_SP_NO_256
+
+/* Point structure to use. */
+typedef struct sp_point_256 {
+ sp_digit x[2 * 4];
+ sp_digit y[2 * 4];
+ sp_digit z[2 * 4];
+ int infinity;
+} sp_point_256;
+
+/* The modulus (prime) of the curve P256. */
+static const sp_digit p256_mod[4] = {
+ 0xffffffffffffffffL,0x00000000ffffffffL,0x0000000000000000L,
+ 0xffffffff00000001L
+};
+/* The Montogmery normalizer for modulus of the curve P256. */
+static const sp_digit p256_norm_mod[4] = {
+ 0x0000000000000001L,0xffffffff00000000L,0xffffffffffffffffL,
+ 0x00000000fffffffeL
+};
+/* The Montogmery multiplier for modulus of the curve P256. */
+static const sp_digit p256_mp_mod = 0x0000000000000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* The order of the curve P256. */
+static const sp_digit p256_order[4] = {
+ 0xf3b9cac2fc632551L,0xbce6faada7179e84L,0xffffffffffffffffL,
+ 0xffffffff00000000L
+};
+#endif
+/* The order of the curve P256 minus 2. */
+static const sp_digit p256_order2[4] = {
+ 0xf3b9cac2fc63254fL,0xbce6faada7179e84L,0xffffffffffffffffL,
+ 0xffffffff00000000L
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P256. */
+static const sp_digit p256_norm_order[4] = {
+ 0x0c46353d039cdaafL,0x4319055258e8617bL,0x0000000000000000L,
+ 0x00000000ffffffffL
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P256. */
+static const sp_digit p256_mp_order = 0xccd1c8aaee00bc4fL;
+#endif
+#ifdef WOLFSSL_SP_SMALL
+/* The base point of curve P256. */
+static const sp_point_256 p256_base = {
+ /* X ordinate */
+ {
+ 0xf4a13945d898c296L,0x77037d812deb33a0L,0xf8bce6e563a440f2L,
+ 0x6b17d1f2e12c4247L,
+ 0L, 0L, 0L, 0L
+ },
+ /* Y ordinate */
+ {
+ 0xcbb6406837bf51f5L,0x2bce33576b315eceL,0x8ee7eb4a7c0f9e16L,
+ 0x4fe342e2fe1a7f9bL,
+ 0L, 0L, 0L, 0L
+ },
+ /* Z ordinate */
+ {
+ 0x0000000000000001L,0x0000000000000000L,0x0000000000000000L,
+ 0x0000000000000000L,
+ 0L, 0L, 0L, 0L
+ },
+ /* infinity */
+ 0
+};
+#endif /* WOLFSSL_SP_SMALL */
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p256_b[4] = {
+ 0x3bce3c3e27d2604bL,0x651d06b0cc53b0f6L,0xb3ebbd55769886bcL,
+ 0x5ac635d8aa3a93e7L
+};
+#endif
+
+static int sp_256_point_new_ex_4(void* heap, sp_point_256* sp, sp_point_256** p)
+{
+ int ret = MP_OKAY;
+ (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ (void)sp;
+ *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC);
+#else
+ *p = sp;
+#endif
+ if (*p == NULL) {
+ ret = MEMORY_E;
+ }
+ return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_256_point_new_4(heap, sp, p) sp_256_point_new_ex_4((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_256_point_new_4(heap, sp, p) sp_256_point_new_ex_4((heap), &(sp), &(p))
+#endif
+
+
+static void sp_256_point_free_4(sp_point_256* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+ if (p != NULL) {
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+ XFREE(p, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+/* Clear point data if requested. */
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+#endif
+ (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r The resulting Montgomery form number.
+ * a The number to convert.
+ * m The modulus (prime).
+ */
+static int sp_256_mod_mul_norm_4(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ int64_t t[8];
+ int64_t a32[8];
+ int64_t o;
+
+ (void)m;
+
+ a32[0] = a[0] & 0xffffffff;
+ a32[1] = a[0] >> 32;
+ a32[2] = a[1] & 0xffffffff;
+ a32[3] = a[1] >> 32;
+ a32[4] = a[2] & 0xffffffff;
+ a32[5] = a[2] >> 32;
+ a32[6] = a[3] & 0xffffffff;
+ a32[7] = a[3] >> 32;
+
+ /* 1 1 0 -1 -1 -1 -1 0 */
+ t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6];
+ /* 0 1 1 0 -1 -1 -1 -1 */
+ t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7];
+ /* 0 0 1 1 0 -1 -1 -1 */
+ t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7];
+ /* -1 -1 0 2 2 1 0 -1 */
+ t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7];
+ /* 0 -1 -1 0 2 2 1 0 */
+ t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6];
+ /* 0 0 -1 -1 0 2 2 1 */
+ t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7];
+ /* -1 -1 0 0 0 1 3 2 */
+ t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7];
+ /* 1 0 -1 -1 -1 -1 0 3 */
+ t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7];
+
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ o = t[7] >> 32; t[7] &= 0xffffffff;
+ t[0] += o;
+ t[3] -= o;
+ t[6] -= o;
+ t[7] += o;
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ r[0] = (t[1] << 32) | t[0];
+ r[1] = (t[3] << 32) | t[2];
+ r[2] = (t[5] << 32) | t[4];
+ r[3] = (t[7] << 32) | t[6];
+
+ return MP_OKAY;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 64
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 64
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffffffffffffl;
+ s = 64U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 64U) <= (word32)DIGIT_BIT) {
+ s += 64U;
+ r[j] &= 0xffffffffffffffffl;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 64) {
+ r[j] &= 0xffffffffffffffffl;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 64 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_256.
+ *
+ * p Point of type sp_point_256 (result).
+ * pm Point of type ecc_point.
+ */
+static void sp_256_point_from_ecc_point_4(sp_point_256* p, const ecc_point* pm)
+{
+ XMEMSET(p->x, 0, sizeof(p->x));
+ XMEMSET(p->y, 0, sizeof(p->y));
+ XMEMSET(p->z, 0, sizeof(p->z));
+ sp_256_from_mp(p->x, 4, pm->x);
+ sp_256_from_mp(p->y, 4, pm->y);
+ sp_256_from_mp(p->z, 4, pm->z);
+ p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_256_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 64
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 4);
+ r->used = 4;
+ mp_clamp(r);
+#elif DIGIT_BIT < 64
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 4; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 64) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 64 - s;
+ }
+ r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 4; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 64 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 64 - s;
+ }
+ else {
+ s += 64;
+ }
+ }
+ r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Convert a point of type sp_point_256 to type ecc_point.
+ *
+ * p Point of type sp_point_256.
+ * pm Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_256_point_to_ecc_point_4(const sp_point_256* p, ecc_point* pm)
+{
+ int err;
+
+ err = sp_256_to_mp(p->x, pm->x);
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, pm->y);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, pm->z);
+ }
+
+ return err;
+}
+
+extern void sp_256_cond_copy_4(sp_digit* r, const sp_digit* a, sp_digit m);
+extern void sp_256_mont_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp);
+extern void sp_256_mont_sqr_4(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp);
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * n Number of times to square.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_n_4(sp_digit* r, const sp_digit* a, int n,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_256_mont_sqr_4(r, a, m, mp);
+ for (; n > 1; n--) {
+ sp_256_mont_sqr_4(r, r, m, mp);
+ }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P256 curve. */
+static const uint64_t p256_mod_minus_2[4] = {
+ 0xfffffffffffffffdU,0x00000000ffffffffU,0x0000000000000000U,
+ 0xffffffff00000001U
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P256 curve. (r = 1 / a mod m)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_256_mont_inv_4(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 4);
+ for (i=254; i>=0; i--) {
+ sp_256_mont_sqr_4(t, t, p256_mod, p256_mp_mod);
+ if (p256_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64)))
+ sp_256_mont_mul_4(t, t, a, p256_mod, p256_mp_mod);
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 4);
+#else
+ sp_digit* t1 = td;
+ sp_digit* t2 = td + 2 * 4;
+ sp_digit* t3 = td + 4 * 4;
+ /* 0x2 */
+ sp_256_mont_sqr_4(t1, a, p256_mod, p256_mp_mod);
+ /* 0x3 */
+ sp_256_mont_mul_4(t2, t1, a, p256_mod, p256_mp_mod);
+ /* 0xc */
+ sp_256_mont_sqr_n_4(t1, t2, 2, p256_mod, p256_mp_mod);
+ /* 0xd */
+ sp_256_mont_mul_4(t3, t1, a, p256_mod, p256_mp_mod);
+ /* 0xf */
+ sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xf0 */
+ sp_256_mont_sqr_n_4(t1, t2, 4, p256_mod, p256_mp_mod);
+ /* 0xfd */
+ sp_256_mont_mul_4(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xff */
+ sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xff00 */
+ sp_256_mont_sqr_n_4(t1, t2, 8, p256_mod, p256_mp_mod);
+ /* 0xfffd */
+ sp_256_mont_mul_4(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xffff */
+ sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffff0000 */
+ sp_256_mont_sqr_n_4(t1, t2, 16, p256_mod, p256_mp_mod);
+ /* 0xfffffffd */
+ sp_256_mont_mul_4(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff */
+ sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000000 */
+ sp_256_mont_sqr_n_4(t1, t2, 32, p256_mod, p256_mp_mod);
+ /* 0xffffffffffffffff */
+ sp_256_mont_mul_4(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001 */
+ sp_256_mont_mul_4(r, t1, a, p256_mod, p256_mp_mod);
+ /* 0xffffffff000000010000000000000000000000000000000000000000 */
+ sp_256_mont_sqr_n_4(r, r, 160, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */
+ sp_256_mont_mul_4(r, r, t2, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */
+ sp_256_mont_sqr_n_4(r, r, 32, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */
+ sp_256_mont_mul_4(r, r, t3, p256_mod, p256_mp_mod);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+extern int64_t sp_256_cmp_4(const sp_digit* a, const sp_digit* b);
+/* Normalize the values in each word to 64.
+ *
+ * a Array of sp_digit to normalize.
+ */
+#define sp_256_norm_4(a)
+
+extern sp_digit sp_256_cond_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+extern sp_digit sp_256_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b);
+#define sp_256_mont_reduce_order_4 sp_256_mont_reduce_4
+
+extern void sp_256_mont_reduce_4(sp_digit* a, const sp_digit* m, sp_digit mp);
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r Resulting affine coordinate point.
+ * p Montgomery form projective coordinate point.
+ * t Temporary ordinate data.
+ */
+static void sp_256_map_4(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*4;
+ int64_t n;
+
+ sp_256_mont_inv_4(t1, p->z, t + 2*4);
+
+ sp_256_mont_sqr_4(t2, t1, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t1, t2, t1, p256_mod, p256_mp_mod);
+
+ /* x /= z^2 */
+ sp_256_mont_mul_4(r->x, p->x, t2, p256_mod, p256_mp_mod);
+ XMEMSET(r->x + 4, 0, sizeof(r->x) / 2U);
+ sp_256_mont_reduce_4(r->x, p256_mod, p256_mp_mod);
+ /* Reduce x to less than modulus */
+ n = sp_256_cmp_4(r->x, p256_mod);
+ sp_256_cond_sub_4(r->x, r->x, p256_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_4(r->x);
+
+ /* y /= z^3 */
+ sp_256_mont_mul_4(r->y, p->y, t1, p256_mod, p256_mp_mod);
+ XMEMSET(r->y + 4, 0, sizeof(r->y) / 2U);
+ sp_256_mont_reduce_4(r->y, p256_mod, p256_mp_mod);
+ /* Reduce y to less than modulus */
+ n = sp_256_cmp_4(r->y, p256_mod);
+ sp_256_cond_sub_4(r->y, r->y, p256_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_4(r->y);
+
+ XMEMSET(r->z, 0, sizeof(r->z));
+ r->z[0] = 1;
+
+}
+
+extern void sp_256_mont_add_4(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m);
+extern void sp_256_mont_dbl_4(const sp_digit* r, const sp_digit* a, const sp_digit* m);
+extern void sp_256_mont_tpl_4(sp_digit* r, const sp_digit* a, const sp_digit* m);
+extern void sp_256_mont_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m);
+extern void sp_256_div2_4(sp_digit* r, const sp_digit* a, const sp_digit* m);
+/* Double the Montgomery form projective point p.
+ *
+ * r Result of doubling point.
+ * p Point to double.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_4(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*4;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = r->x;
+ y = r->y;
+ z = r->z;
+ /* Put infinity into result. */
+ if (r != p) {
+ r->infinity = p->infinity;
+ }
+
+ /* T1 = Z * Z */
+ sp_256_mont_sqr_4(t1, p->z, p256_mod, p256_mp_mod);
+ /* Z = Y * Z */
+ sp_256_mont_mul_4(z, p->y, p->z, p256_mod, p256_mp_mod);
+ /* Z = 2Z */
+ sp_256_mont_dbl_4(z, z, p256_mod);
+ /* T2 = X - T1 */
+ sp_256_mont_sub_4(t2, p->x, t1, p256_mod);
+ /* T1 = X + T1 */
+ sp_256_mont_add_4(t1, p->x, t1, p256_mod);
+ /* T2 = T1 * T2 */
+ sp_256_mont_mul_4(t2, t1, t2, p256_mod, p256_mp_mod);
+ /* T1 = 3T2 */
+ sp_256_mont_tpl_4(t1, t2, p256_mod);
+ /* Y = 2Y */
+ sp_256_mont_dbl_4(y, p->y, p256_mod);
+ /* Y = Y * Y */
+ sp_256_mont_sqr_4(y, y, p256_mod, p256_mp_mod);
+ /* T2 = Y * Y */
+ sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod);
+ /* T2 = T2/2 */
+ sp_256_div2_4(t2, t2, p256_mod);
+ /* Y = Y * X */
+ sp_256_mont_mul_4(y, y, p->x, p256_mod, p256_mp_mod);
+ /* X = T1 * T1 */
+ sp_256_mont_sqr_4(x, t1, p256_mod, p256_mp_mod);
+ /* X = X - Y */
+ sp_256_mont_sub_4(x, x, y, p256_mod);
+ /* X = X - Y */
+ sp_256_mont_sub_4(x, x, y, p256_mod);
+ /* Y = Y - X */
+ sp_256_mont_sub_4(y, y, x, p256_mod);
+ /* Y = Y * T1 */
+ sp_256_mont_mul_4(y, y, t1, p256_mod, p256_mp_mod);
+ /* Y = Y - T2 */
+ sp_256_mont_sub_4(y, y, t2, p256_mod);
+}
+
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int n, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*4;
+ sp_digit* b = t + 4*4;
+ sp_digit* t1 = t + 6*4;
+ sp_digit* t2 = t + 8*4;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = p->x;
+ y = p->y;
+ z = p->z;
+
+ /* Y = 2*Y */
+ sp_256_mont_dbl_4(y, y, p256_mod);
+ /* W = Z^4 */
+ sp_256_mont_sqr_4(w, z, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_4(w, w, p256_mod, p256_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+ while (--n > 0)
+#else
+ while (--n >= 0)
+#endif
+ {
+ /* A = 3*(X^2 - W) */
+ sp_256_mont_sqr_4(t1, x, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(t1, t1, w, p256_mod);
+ sp_256_mont_tpl_4(a, t1, p256_mod);
+ /* B = X*Y^2 */
+ sp_256_mont_sqr_4(t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(b, t1, x, p256_mod, p256_mp_mod);
+ /* X = A^2 - 2B */
+ sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_4(t2, b, p256_mod);
+ sp_256_mont_sub_4(x, x, t2, p256_mod);
+ /* Z = Z*Y */
+ sp_256_mont_mul_4(z, z, y, p256_mod, p256_mp_mod);
+ /* t2 = Y^4 */
+ sp_256_mont_sqr_4(t1, t1, p256_mod, p256_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+ if (n != 0)
+#endif
+ {
+ /* W = W*Y^4 */
+ sp_256_mont_mul_4(w, w, t1, p256_mod, p256_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_256_mont_sub_4(y, b, x, p256_mod);
+ sp_256_mont_mul_4(y, y, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_4(y, y, p256_mod);
+ sp_256_mont_sub_4(y, y, t1, p256_mod);
+ }
+#ifndef WOLFSSL_SP_SMALL
+ /* A = 3*(X^2 - W) */
+ sp_256_mont_sqr_4(t1, x, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(t1, t1, w, p256_mod);
+ sp_256_mont_tpl_4(a, t1, p256_mod);
+ /* B = X*Y^2 */
+ sp_256_mont_sqr_4(t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(b, t1, x, p256_mod, p256_mp_mod);
+ /* X = A^2 - 2B */
+ sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_4(t2, b, p256_mod);
+ sp_256_mont_sub_4(x, x, t2, p256_mod);
+ /* Z = Z*Y */
+ sp_256_mont_mul_4(z, z, y, p256_mod, p256_mp_mod);
+ /* t2 = Y^4 */
+ sp_256_mont_sqr_4(t1, t1, p256_mod, p256_mp_mod);
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_256_mont_sub_4(y, b, x, p256_mod);
+ sp_256_mont_mul_4(y, y, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_4(y, y, p256_mod);
+ sp_256_mont_sub_4(y, y, t1, p256_mod);
+#endif
+ /* Y = Y/2 */
+ sp_256_div2_4(y, y, p256_mod);
+}
+
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a First number to compare.
+ * b Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_256_cmp_equal_4(const sp_digit* a, const sp_digit* b)
+{
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_4(sp_point_256* r, const sp_point_256* p, const sp_point_256* q,
+ sp_digit* t)
+{
+ const sp_point_256* ap[2];
+ sp_point_256* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*4;
+ sp_digit* t3 = t + 4*4;
+ sp_digit* t4 = t + 6*4;
+ sp_digit* t5 = t + 8*4;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Ensure only the first point is the same as the result. */
+ if (q == r) {
+ const sp_point_256* a = p;
+ p = q;
+ q = a;
+ }
+
+ /* Check double */
+ (void)sp_256_sub_4(t1, p256_mod, q->y);
+ sp_256_norm_4(t1);
+ if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) &
+ (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) {
+ sp_256_proj_point_dbl_4(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_256));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<4; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<4; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<4; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U1 = X1*Z2^2 */
+ sp_256_mont_sqr_4(t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t3, t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t1, t1, x, p256_mod, p256_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_4(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_256_mont_mul_4(t3, t3, y, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - U1 */
+ sp_256_mont_sub_4(t2, t2, t1, p256_mod);
+ /* R = S2 - S1 */
+ sp_256_mont_sub_4(t4, t4, t3, p256_mod);
+ /* Z3 = H*Z1*Z2 */
+ sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(z, z, t2, p256_mod, p256_mp_mod);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(x, x, t5, p256_mod);
+ sp_256_mont_dbl_4(t1, y, p256_mod);
+ sp_256_mont_sub_4(x, x, t1, p256_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ sp_256_mont_sub_4(y, y, x, p256_mod);
+ sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(y, y, t5, p256_mod);
+ }
+}
+
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_n_store_4(sp_point_256* r, const sp_point_256* p,
+ int n, int m, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*4;
+ sp_digit* b = t + 4*4;
+ sp_digit* t1 = t + 6*4;
+ sp_digit* t2 = t + 8*4;
+ sp_digit* x = r[2*m].x;
+ sp_digit* y = r[(1<<n)*m].y;
+ sp_digit* z = r[2*m].z;
+ int i;
+
+ for (i=0; i<4; i++) {
+ x[i] = p->x[i];
+ }
+ for (i=0; i<4; i++) {
+ y[i] = p->y[i];
+ }
+ for (i=0; i<4; i++) {
+ z[i] = p->z[i];
+ }
+
+ /* Y = 2*Y */
+ sp_256_mont_dbl_4(y, y, p256_mod);
+ /* W = Z^4 */
+ sp_256_mont_sqr_4(w, z, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_4(w, w, p256_mod, p256_mp_mod);
+ for (i=1; i<=n; i++) {
+ /* A = 3*(X^2 - W) */
+ sp_256_mont_sqr_4(t1, x, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(t1, t1, w, p256_mod);
+ sp_256_mont_tpl_4(a, t1, p256_mod);
+ /* B = X*Y^2 */
+ sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(b, t2, x, p256_mod, p256_mp_mod);
+ x = r[(1<<i)*m].x;
+ /* X = A^2 - 2B */
+ sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_4(t1, b, p256_mod);
+ sp_256_mont_sub_4(x, x, t1, p256_mod);
+ /* Z = Z*Y */
+ sp_256_mont_mul_4(r[(1<<i)*m].z, z, y, p256_mod, p256_mp_mod);
+ z = r[(1<<i)*m].z;
+ /* t2 = Y^4 */
+ sp_256_mont_sqr_4(t2, t2, p256_mod, p256_mp_mod);
+ if (i != n) {
+ /* W = W*Y^4 */
+ sp_256_mont_mul_4(w, w, t2, p256_mod, p256_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_256_mont_sub_4(y, b, x, p256_mod);
+ sp_256_mont_mul_4(y, y, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_4(y, y, p256_mod);
+ sp_256_mont_sub_4(y, y, t2, p256_mod);
+
+ /* Y = Y/2 */
+ sp_256_div2_4(r[(1<<i)*m].y, y, p256_mod);
+ r[(1<<i)*m].infinity = 0;
+ }
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * ra Result of addition.
+ * rs Result of subtraction.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_sub_4(sp_point_256* ra, sp_point_256* rs,
+ const sp_point_256* p, const sp_point_256* q, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*4;
+ sp_digit* t3 = t + 4*4;
+ sp_digit* t4 = t + 6*4;
+ sp_digit* t5 = t + 8*4;
+ sp_digit* t6 = t + 10*4;
+ sp_digit* x = ra->x;
+ sp_digit* y = ra->y;
+ sp_digit* z = ra->z;
+ sp_digit* xs = rs->x;
+ sp_digit* ys = rs->y;
+ sp_digit* zs = rs->z;
+
+
+ XMEMCPY(x, p->x, sizeof(p->x) / 2);
+ XMEMCPY(y, p->y, sizeof(p->y) / 2);
+ XMEMCPY(z, p->z, sizeof(p->z) / 2);
+ ra->infinity = 0;
+ rs->infinity = 0;
+
+ /* U1 = X1*Z2^2 */
+ sp_256_mont_sqr_4(t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t3, t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t1, t1, x, p256_mod, p256_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_4(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_256_mont_mul_4(t3, t3, y, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - U1 */
+ sp_256_mont_sub_4(t2, t2, t1, p256_mod);
+ /* RS = S2 + S1 */
+ sp_256_mont_add_4(t6, t4, t3, p256_mod);
+ /* R = S2 - S1 */
+ sp_256_mont_sub_4(t4, t4, t3, p256_mod);
+ /* Z3 = H*Z1*Z2 */
+ /* ZS = H*Z1*Z2 */
+ sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(z, z, t2, p256_mod, p256_mp_mod);
+ XMEMCPY(zs, z, sizeof(p->z)/2);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ /* XS = RS^2 - H^3 - 2*U1*H^2 */
+ sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_4(xs, t6, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(x, x, t5, p256_mod);
+ sp_256_mont_sub_4(xs, xs, t5, p256_mod);
+ sp_256_mont_dbl_4(t1, y, p256_mod);
+ sp_256_mont_sub_4(x, x, t1, p256_mod);
+ sp_256_mont_sub_4(xs, xs, t1, p256_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */
+ sp_256_mont_sub_4(ys, y, xs, p256_mod);
+ sp_256_mont_sub_4(y, y, x, p256_mod);
+ sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod);
+ sp_256_sub_4(t6, p256_mod, t6);
+ sp_256_mont_mul_4(ys, ys, t6, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(y, y, t5, p256_mod);
+ sp_256_mont_sub_4(ys, ys, t5, p256_mod);
+}
+
+/* Structure used to describe recoding of scalar multiplication. */
+typedef struct ecc_recode_256 {
+ /* Index into pre-computation table. */
+ uint8_t i;
+ /* Use the negative of the point. */
+ uint8_t neg;
+} ecc_recode_256;
+
+/* The index into pre-computation table to use. */
+static const uint8_t recode_index_4_6[66] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
+ 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
+ 0, 1,
+};
+
+/* Whether to negate y-ordinate. */
+static const uint8_t recode_neg_4_6[66] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0,
+};
+
+/* Recode the scalar for multiplication using pre-computed values and
+ * subtraction.
+ *
+ * k Scalar to multiply by.
+ * v Vector of operations to perform.
+ */
+static void sp_256_ecc_recode_6_4(const sp_digit* k, ecc_recode_256* v)
+{
+ int i, j;
+ uint8_t y;
+ int carry = 0;
+ int o;
+ sp_digit n;
+
+ j = 0;
+ n = k[j];
+ o = 0;
+ for (i=0; i<43; i++) {
+ y = n;
+ if (o + 6 < 64) {
+ y &= 0x3f;
+ n >>= 6;
+ o += 6;
+ }
+ else if (o + 6 == 64) {
+ n >>= 6;
+ if (++j < 4)
+ n = k[j];
+ o = 0;
+ }
+ else if (++j < 4) {
+ n = k[j];
+ y |= (n << (64 - o)) & 0x3f;
+ o -= 58;
+ n >>= o;
+ }
+
+ y += carry;
+ v[i].i = recode_index_4_6[y];
+ v[i].neg = recode_neg_4_6[y];
+ carry = (y >> 6) + v[i].neg;
+ }
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_win_add_sub_4(sp_point_256* r, const sp_point_256* g,
+ const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 td[33];
+ sp_point_256 rtd, pd;
+ sp_digit tmpd[2 * 4 * 6];
+#endif
+ sp_point_256* t;
+ sp_point_256* rt;
+ sp_point_256* p = NULL;
+ sp_digit* tmp;
+ sp_digit* negy;
+ int i;
+ ecc_recode_256 v[43];
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_4(heap, rtd, rt);
+ if (err == MP_OKAY)
+ err = sp_256_point_new_4(heap, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 33, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ t = td;
+ tmp = tmpd;
+#endif
+
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ err = sp_256_mod_mul_norm_4(t[1].x, g->x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_4(t[1].y, g->y, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_4(t[1].z, g->z, p256_mod);
+ }
+
+ if (err == MP_OKAY) {
+ t[1].infinity = 0;
+ /* t[2] ... t[32] */
+ sp_256_proj_point_dbl_n_store_4(t, &t[ 1], 5, 1, tmp);
+ sp_256_proj_point_add_4(&t[ 3], &t[ 2], &t[ 1], tmp);
+ sp_256_proj_point_dbl_4(&t[ 6], &t[ 3], tmp);
+ sp_256_proj_point_add_sub_4(&t[ 7], &t[ 5], &t[ 6], &t[ 1], tmp);
+ sp_256_proj_point_dbl_4(&t[10], &t[ 5], tmp);
+ sp_256_proj_point_add_sub_4(&t[11], &t[ 9], &t[10], &t[ 1], tmp);
+ sp_256_proj_point_dbl_4(&t[12], &t[ 6], tmp);
+ sp_256_proj_point_dbl_4(&t[14], &t[ 7], tmp);
+ sp_256_proj_point_add_sub_4(&t[15], &t[13], &t[14], &t[ 1], tmp);
+ sp_256_proj_point_dbl_4(&t[18], &t[ 9], tmp);
+ sp_256_proj_point_add_sub_4(&t[19], &t[17], &t[18], &t[ 1], tmp);
+ sp_256_proj_point_dbl_4(&t[20], &t[10], tmp);
+ sp_256_proj_point_dbl_4(&t[22], &t[11], tmp);
+ sp_256_proj_point_add_sub_4(&t[23], &t[21], &t[22], &t[ 1], tmp);
+ sp_256_proj_point_dbl_4(&t[24], &t[12], tmp);
+ sp_256_proj_point_dbl_4(&t[26], &t[13], tmp);
+ sp_256_proj_point_add_sub_4(&t[27], &t[25], &t[26], &t[ 1], tmp);
+ sp_256_proj_point_dbl_4(&t[28], &t[14], tmp);
+ sp_256_proj_point_dbl_4(&t[30], &t[15], tmp);
+ sp_256_proj_point_add_sub_4(&t[31], &t[29], &t[30], &t[ 1], tmp);
+
+ negy = t[0].y;
+
+ sp_256_ecc_recode_6_4(k, v);
+
+ i = 42;
+ XMEMCPY(rt, &t[v[i].i], sizeof(sp_point_256));
+ for (--i; i>=0; i--) {
+ sp_256_proj_point_dbl_n_4(rt, 6, tmp);
+
+ XMEMCPY(p, &t[v[i].i], sizeof(sp_point_256));
+ sp_256_sub_4(negy, p256_mod, p->y);
+ sp_256_cond_copy_4(p->y, negy, (sp_digit)0 - v[i].neg);
+ sp_256_proj_point_add_4(rt, rt, p, tmp);
+ }
+
+ if (map != 0) {
+ sp_256_map_4(r, rt, tmp);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL)
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ if (tmp != NULL)
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_256_point_free_4(p, 0, heap);
+ sp_256_point_free_4(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef HAVE_INTEL_AVX2
+extern void sp_256_mont_mul_avx2_4(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp);
+extern void sp_256_mont_sqr_avx2_4(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp);
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * n Number of times to square.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_256_mont_sqr_n_avx2_4(sp_digit* r, const sp_digit* a, int n,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_256_mont_sqr_avx2_4(r, a, m, mp);
+ for (; n > 1; n--) {
+ sp_256_mont_sqr_avx2_4(r, r, m, mp);
+ }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P256 curve. (r = 1 / a mod m)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_256_mont_inv_avx2_4(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 4);
+ for (i=254; i>=0; i--) {
+ sp_256_mont_sqr_avx2_4(t, t, p256_mod, p256_mp_mod);
+ if (p256_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64)))
+ sp_256_mont_mul_avx2_4(t, t, a, p256_mod, p256_mp_mod);
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 4);
+#else
+ sp_digit* t1 = td;
+ sp_digit* t2 = td + 2 * 4;
+ sp_digit* t3 = td + 4 * 4;
+ /* 0x2 */
+ sp_256_mont_sqr_avx2_4(t1, a, p256_mod, p256_mp_mod);
+ /* 0x3 */
+ sp_256_mont_mul_avx2_4(t2, t1, a, p256_mod, p256_mp_mod);
+ /* 0xc */
+ sp_256_mont_sqr_n_avx2_4(t1, t2, 2, p256_mod, p256_mp_mod);
+ /* 0xd */
+ sp_256_mont_mul_avx2_4(t3, t1, a, p256_mod, p256_mp_mod);
+ /* 0xf */
+ sp_256_mont_mul_avx2_4(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xf0 */
+ sp_256_mont_sqr_n_avx2_4(t1, t2, 4, p256_mod, p256_mp_mod);
+ /* 0xfd */
+ sp_256_mont_mul_avx2_4(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xff */
+ sp_256_mont_mul_avx2_4(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xff00 */
+ sp_256_mont_sqr_n_avx2_4(t1, t2, 8, p256_mod, p256_mp_mod);
+ /* 0xfffd */
+ sp_256_mont_mul_avx2_4(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xffff */
+ sp_256_mont_mul_avx2_4(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffff0000 */
+ sp_256_mont_sqr_n_avx2_4(t1, t2, 16, p256_mod, p256_mp_mod);
+ /* 0xfffffffd */
+ sp_256_mont_mul_avx2_4(t3, t3, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff */
+ sp_256_mont_mul_avx2_4(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000000 */
+ sp_256_mont_sqr_n_avx2_4(t1, t2, 32, p256_mod, p256_mp_mod);
+ /* 0xffffffffffffffff */
+ sp_256_mont_mul_avx2_4(t2, t2, t1, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001 */
+ sp_256_mont_mul_avx2_4(r, t1, a, p256_mod, p256_mp_mod);
+ /* 0xffffffff000000010000000000000000000000000000000000000000 */
+ sp_256_mont_sqr_n_avx2_4(r, r, 160, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */
+ sp_256_mont_mul_avx2_4(r, r, t2, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */
+ sp_256_mont_sqr_n_avx2_4(r, r, 32, p256_mod, p256_mp_mod);
+ /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */
+ sp_256_mont_mul_avx2_4(r, r, t3, p256_mod, p256_mp_mod);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r Resulting affine coordinate point.
+ * p Montgomery form projective coordinate point.
+ * t Temporary ordinate data.
+ */
+static void sp_256_map_avx2_4(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*4;
+ int64_t n;
+
+ sp_256_mont_inv_avx2_4(t1, p->z, t + 2*4);
+
+ sp_256_mont_sqr_avx2_4(t2, t1, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(t1, t2, t1, p256_mod, p256_mp_mod);
+
+ /* x /= z^2 */
+ sp_256_mont_mul_avx2_4(r->x, p->x, t2, p256_mod, p256_mp_mod);
+ XMEMSET(r->x + 4, 0, sizeof(r->x) / 2U);
+ sp_256_mont_reduce_4(r->x, p256_mod, p256_mp_mod);
+ /* Reduce x to less than modulus */
+ n = sp_256_cmp_4(r->x, p256_mod);
+ sp_256_cond_sub_4(r->x, r->x, p256_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_4(r->x);
+
+ /* y /= z^3 */
+ sp_256_mont_mul_avx2_4(r->y, p->y, t1, p256_mod, p256_mp_mod);
+ XMEMSET(r->y + 4, 0, sizeof(r->y) / 2U);
+ sp_256_mont_reduce_4(r->y, p256_mod, p256_mp_mod);
+ /* Reduce y to less than modulus */
+ n = sp_256_cmp_4(r->y, p256_mod);
+ sp_256_cond_sub_4(r->y, r->y, p256_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_256_norm_4(r->y);
+
+ XMEMSET(r->z, 0, sizeof(r->z));
+ r->z[0] = 1;
+
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r Result of doubling point.
+ * p Point to double.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_avx2_4(sp_point_256* r, const sp_point_256* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*4;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = r->x;
+ y = r->y;
+ z = r->z;
+ /* Put infinity into result. */
+ if (r != p) {
+ r->infinity = p->infinity;
+ }
+
+ /* T1 = Z * Z */
+ sp_256_mont_sqr_avx2_4(t1, p->z, p256_mod, p256_mp_mod);
+ /* Z = Y * Z */
+ sp_256_mont_mul_avx2_4(z, p->y, p->z, p256_mod, p256_mp_mod);
+ /* Z = 2Z */
+ sp_256_mont_dbl_4(z, z, p256_mod);
+ /* T2 = X - T1 */
+ sp_256_mont_sub_4(t2, p->x, t1, p256_mod);
+ /* T1 = X + T1 */
+ sp_256_mont_add_4(t1, p->x, t1, p256_mod);
+ /* T2 = T1 * T2 */
+ sp_256_mont_mul_avx2_4(t2, t1, t2, p256_mod, p256_mp_mod);
+ /* T1 = 3T2 */
+ sp_256_mont_tpl_4(t1, t2, p256_mod);
+ /* Y = 2Y */
+ sp_256_mont_dbl_4(y, p->y, p256_mod);
+ /* Y = Y * Y */
+ sp_256_mont_sqr_avx2_4(y, y, p256_mod, p256_mp_mod);
+ /* T2 = Y * Y */
+ sp_256_mont_sqr_avx2_4(t2, y, p256_mod, p256_mp_mod);
+ /* T2 = T2/2 */
+ sp_256_div2_4(t2, t2, p256_mod);
+ /* Y = Y * X */
+ sp_256_mont_mul_avx2_4(y, y, p->x, p256_mod, p256_mp_mod);
+ /* X = T1 * T1 */
+ sp_256_mont_sqr_avx2_4(x, t1, p256_mod, p256_mp_mod);
+ /* X = X - Y */
+ sp_256_mont_sub_4(x, x, y, p256_mod);
+ /* X = X - Y */
+ sp_256_mont_sub_4(x, x, y, p256_mod);
+ /* Y = Y - X */
+ sp_256_mont_sub_4(y, y, x, p256_mod);
+ /* Y = Y * T1 */
+ sp_256_mont_mul_avx2_4(y, y, t1, p256_mod, p256_mp_mod);
+ /* Y = Y - T2 */
+ sp_256_mont_sub_4(y, y, t2, p256_mod);
+}
+
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_n_avx2_4(sp_point_256* p, int n, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*4;
+ sp_digit* b = t + 4*4;
+ sp_digit* t1 = t + 6*4;
+ sp_digit* t2 = t + 8*4;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = p->x;
+ y = p->y;
+ z = p->z;
+
+ /* Y = 2*Y */
+ sp_256_mont_dbl_4(y, y, p256_mod);
+ /* W = Z^4 */
+ sp_256_mont_sqr_avx2_4(w, z, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_avx2_4(w, w, p256_mod, p256_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+ while (--n > 0)
+#else
+ while (--n >= 0)
+#endif
+ {
+ /* A = 3*(X^2 - W) */
+ sp_256_mont_sqr_avx2_4(t1, x, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(t1, t1, w, p256_mod);
+ sp_256_mont_tpl_4(a, t1, p256_mod);
+ /* B = X*Y^2 */
+ sp_256_mont_sqr_avx2_4(t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(b, t1, x, p256_mod, p256_mp_mod);
+ /* X = A^2 - 2B */
+ sp_256_mont_sqr_avx2_4(x, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_4(t2, b, p256_mod);
+ sp_256_mont_sub_4(x, x, t2, p256_mod);
+ /* Z = Z*Y */
+ sp_256_mont_mul_avx2_4(z, z, y, p256_mod, p256_mp_mod);
+ /* t2 = Y^4 */
+ sp_256_mont_sqr_avx2_4(t1, t1, p256_mod, p256_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+ if (n != 0)
+#endif
+ {
+ /* W = W*Y^4 */
+ sp_256_mont_mul_avx2_4(w, w, t1, p256_mod, p256_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_256_mont_sub_4(y, b, x, p256_mod);
+ sp_256_mont_mul_avx2_4(y, y, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_4(y, y, p256_mod);
+ sp_256_mont_sub_4(y, y, t1, p256_mod);
+ }
+#ifndef WOLFSSL_SP_SMALL
+ /* A = 3*(X^2 - W) */
+ sp_256_mont_sqr_avx2_4(t1, x, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(t1, t1, w, p256_mod);
+ sp_256_mont_tpl_4(a, t1, p256_mod);
+ /* B = X*Y^2 */
+ sp_256_mont_sqr_avx2_4(t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(b, t1, x, p256_mod, p256_mp_mod);
+ /* X = A^2 - 2B */
+ sp_256_mont_sqr_avx2_4(x, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_4(t2, b, p256_mod);
+ sp_256_mont_sub_4(x, x, t2, p256_mod);
+ /* Z = Z*Y */
+ sp_256_mont_mul_avx2_4(z, z, y, p256_mod, p256_mp_mod);
+ /* t2 = Y^4 */
+ sp_256_mont_sqr_avx2_4(t1, t1, p256_mod, p256_mp_mod);
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_256_mont_sub_4(y, b, x, p256_mod);
+ sp_256_mont_mul_avx2_4(y, y, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_4(y, y, p256_mod);
+ sp_256_mont_sub_4(y, y, t1, p256_mod);
+#endif
+ /* Y = Y/2 */
+ sp_256_div2_4(y, y, p256_mod);
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_avx2_4(sp_point_256* r, const sp_point_256* p, const sp_point_256* q,
+ sp_digit* t)
+{
+ const sp_point_256* ap[2];
+ sp_point_256* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*4;
+ sp_digit* t3 = t + 4*4;
+ sp_digit* t4 = t + 6*4;
+ sp_digit* t5 = t + 8*4;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Ensure only the first point is the same as the result. */
+ if (q == r) {
+ const sp_point_256* a = p;
+ p = q;
+ q = a;
+ }
+
+ /* Check double */
+ (void)sp_256_sub_4(t1, p256_mod, q->y);
+ sp_256_norm_4(t1);
+ if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) &
+ (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) {
+ sp_256_proj_point_dbl_4(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_256));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<4; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<4; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<4; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U1 = X1*Z2^2 */
+ sp_256_mont_sqr_avx2_4(t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(t3, t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(t1, t1, x, p256_mod, p256_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_avx2_4(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_256_mont_mul_avx2_4(t3, t3, y, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_avx2_4(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - U1 */
+ sp_256_mont_sub_4(t2, t2, t1, p256_mod);
+ /* R = S2 - S1 */
+ sp_256_mont_sub_4(t4, t4, t3, p256_mod);
+ /* Z3 = H*Z1*Z2 */
+ sp_256_mont_mul_avx2_4(z, z, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(z, z, t2, p256_mod, p256_mp_mod);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ sp_256_mont_sqr_avx2_4(x, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_avx2_4(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(y, t1, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(x, x, t5, p256_mod);
+ sp_256_mont_dbl_4(t1, y, p256_mod);
+ sp_256_mont_sub_4(x, x, t1, p256_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ sp_256_mont_sub_4(y, y, x, p256_mod);
+ sp_256_mont_mul_avx2_4(y, y, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(t5, t5, t3, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(y, y, t5, p256_mod);
+ }
+}
+
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_dbl_n_store_avx2_4(sp_point_256* r, const sp_point_256* p,
+ int n, int m, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*4;
+ sp_digit* b = t + 4*4;
+ sp_digit* t1 = t + 6*4;
+ sp_digit* t2 = t + 8*4;
+ sp_digit* x = r[2*m].x;
+ sp_digit* y = r[(1<<n)*m].y;
+ sp_digit* z = r[2*m].z;
+ int i;
+
+ for (i=0; i<4; i++) {
+ x[i] = p->x[i];
+ }
+ for (i=0; i<4; i++) {
+ y[i] = p->y[i];
+ }
+ for (i=0; i<4; i++) {
+ z[i] = p->z[i];
+ }
+
+ /* Y = 2*Y */
+ sp_256_mont_dbl_4(y, y, p256_mod);
+ /* W = Z^4 */
+ sp_256_mont_sqr_avx2_4(w, z, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_avx2_4(w, w, p256_mod, p256_mp_mod);
+ for (i=1; i<=n; i++) {
+ /* A = 3*(X^2 - W) */
+ sp_256_mont_sqr_avx2_4(t1, x, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(t1, t1, w, p256_mod);
+ sp_256_mont_tpl_4(a, t1, p256_mod);
+ /* B = X*Y^2 */
+ sp_256_mont_sqr_avx2_4(t2, y, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(b, t2, x, p256_mod, p256_mp_mod);
+ x = r[(1<<i)*m].x;
+ /* X = A^2 - 2B */
+ sp_256_mont_sqr_avx2_4(x, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_4(t1, b, p256_mod);
+ sp_256_mont_sub_4(x, x, t1, p256_mod);
+ /* Z = Z*Y */
+ sp_256_mont_mul_avx2_4(r[(1<<i)*m].z, z, y, p256_mod, p256_mp_mod);
+ z = r[(1<<i)*m].z;
+ /* t2 = Y^4 */
+ sp_256_mont_sqr_avx2_4(t2, t2, p256_mod, p256_mp_mod);
+ if (i != n) {
+ /* W = W*Y^4 */
+ sp_256_mont_mul_avx2_4(w, w, t2, p256_mod, p256_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_256_mont_sub_4(y, b, x, p256_mod);
+ sp_256_mont_mul_avx2_4(y, y, a, p256_mod, p256_mp_mod);
+ sp_256_mont_dbl_4(y, y, p256_mod);
+ sp_256_mont_sub_4(y, y, t2, p256_mod);
+
+ /* Y = Y/2 */
+ sp_256_div2_4(r[(1<<i)*m].y, y, p256_mod);
+ r[(1<<i)*m].infinity = 0;
+ }
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * ra Result of addition.
+ * rs Result of subtraction.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_sub_avx2_4(sp_point_256* ra, sp_point_256* rs,
+ const sp_point_256* p, const sp_point_256* q, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*4;
+ sp_digit* t3 = t + 4*4;
+ sp_digit* t4 = t + 6*4;
+ sp_digit* t5 = t + 8*4;
+ sp_digit* t6 = t + 10*4;
+ sp_digit* x = ra->x;
+ sp_digit* y = ra->y;
+ sp_digit* z = ra->z;
+ sp_digit* xs = rs->x;
+ sp_digit* ys = rs->y;
+ sp_digit* zs = rs->z;
+
+
+ XMEMCPY(x, p->x, sizeof(p->x) / 2);
+ XMEMCPY(y, p->y, sizeof(p->y) / 2);
+ XMEMCPY(z, p->z, sizeof(p->z) / 2);
+ ra->infinity = 0;
+ rs->infinity = 0;
+
+ /* U1 = X1*Z2^2 */
+ sp_256_mont_sqr_avx2_4(t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(t3, t1, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(t1, t1, x, p256_mod, p256_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_avx2_4(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_256_mont_mul_avx2_4(t3, t3, y, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_avx2_4(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - U1 */
+ sp_256_mont_sub_4(t2, t2, t1, p256_mod);
+ /* RS = S2 + S1 */
+ sp_256_mont_add_4(t6, t4, t3, p256_mod);
+ /* R = S2 - S1 */
+ sp_256_mont_sub_4(t4, t4, t3, p256_mod);
+ /* Z3 = H*Z1*Z2 */
+ /* ZS = H*Z1*Z2 */
+ sp_256_mont_mul_avx2_4(z, z, q->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(z, z, t2, p256_mod, p256_mp_mod);
+ XMEMCPY(zs, z, sizeof(p->z)/2);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ /* XS = RS^2 - H^3 - 2*U1*H^2 */
+ sp_256_mont_sqr_avx2_4(x, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_avx2_4(xs, t6, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_avx2_4(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(y, t1, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(x, x, t5, p256_mod);
+ sp_256_mont_sub_4(xs, xs, t5, p256_mod);
+ sp_256_mont_dbl_4(t1, y, p256_mod);
+ sp_256_mont_sub_4(x, x, t1, p256_mod);
+ sp_256_mont_sub_4(xs, xs, t1, p256_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */
+ sp_256_mont_sub_4(ys, y, xs, p256_mod);
+ sp_256_mont_sub_4(y, y, x, p256_mod);
+ sp_256_mont_mul_avx2_4(y, y, t4, p256_mod, p256_mp_mod);
+ sp_256_sub_4(t6, p256_mod, t6);
+ sp_256_mont_mul_avx2_4(ys, ys, t6, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(t5, t5, t3, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(y, y, t5, p256_mod);
+ sp_256_mont_sub_4(ys, ys, t5, p256_mod);
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_win_add_sub_avx2_4(sp_point_256* r, const sp_point_256* g,
+ const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 td[33];
+ sp_point_256 rtd, pd;
+ sp_digit tmpd[2 * 4 * 6];
+#endif
+ sp_point_256* t;
+ sp_point_256* rt;
+ sp_point_256* p = NULL;
+ sp_digit* tmp;
+ sp_digit* negy;
+ int i;
+ ecc_recode_256 v[43];
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_4(heap, rtd, rt);
+ if (err == MP_OKAY)
+ err = sp_256_point_new_4(heap, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 33, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ t = td;
+ tmp = tmpd;
+#endif
+
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ err = sp_256_mod_mul_norm_4(t[1].x, g->x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_4(t[1].y, g->y, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_4(t[1].z, g->z, p256_mod);
+ }
+
+ if (err == MP_OKAY) {
+ t[1].infinity = 0;
+ /* t[2] ... t[32] */
+ sp_256_proj_point_dbl_n_store_avx2_4(t, &t[ 1], 5, 1, tmp);
+ sp_256_proj_point_add_avx2_4(&t[ 3], &t[ 2], &t[ 1], tmp);
+ sp_256_proj_point_dbl_avx2_4(&t[ 6], &t[ 3], tmp);
+ sp_256_proj_point_add_sub_avx2_4(&t[ 7], &t[ 5], &t[ 6], &t[ 1], tmp);
+ sp_256_proj_point_dbl_avx2_4(&t[10], &t[ 5], tmp);
+ sp_256_proj_point_add_sub_avx2_4(&t[11], &t[ 9], &t[10], &t[ 1], tmp);
+ sp_256_proj_point_dbl_avx2_4(&t[12], &t[ 6], tmp);
+ sp_256_proj_point_dbl_avx2_4(&t[14], &t[ 7], tmp);
+ sp_256_proj_point_add_sub_avx2_4(&t[15], &t[13], &t[14], &t[ 1], tmp);
+ sp_256_proj_point_dbl_avx2_4(&t[18], &t[ 9], tmp);
+ sp_256_proj_point_add_sub_avx2_4(&t[19], &t[17], &t[18], &t[ 1], tmp);
+ sp_256_proj_point_dbl_avx2_4(&t[20], &t[10], tmp);
+ sp_256_proj_point_dbl_avx2_4(&t[22], &t[11], tmp);
+ sp_256_proj_point_add_sub_avx2_4(&t[23], &t[21], &t[22], &t[ 1], tmp);
+ sp_256_proj_point_dbl_avx2_4(&t[24], &t[12], tmp);
+ sp_256_proj_point_dbl_avx2_4(&t[26], &t[13], tmp);
+ sp_256_proj_point_add_sub_avx2_4(&t[27], &t[25], &t[26], &t[ 1], tmp);
+ sp_256_proj_point_dbl_avx2_4(&t[28], &t[14], tmp);
+ sp_256_proj_point_dbl_avx2_4(&t[30], &t[15], tmp);
+ sp_256_proj_point_add_sub_avx2_4(&t[31], &t[29], &t[30], &t[ 1], tmp);
+
+ negy = t[0].y;
+
+ sp_256_ecc_recode_6_4(k, v);
+
+ i = 42;
+ XMEMCPY(rt, &t[v[i].i], sizeof(sp_point_256));
+ for (--i; i>=0; i--) {
+ sp_256_proj_point_dbl_n_avx2_4(rt, 6, tmp);
+
+ XMEMCPY(p, &t[v[i].i], sizeof(sp_point_256));
+ sp_256_sub_4(negy, p256_mod, p->y);
+ sp_256_cond_copy_4(p->y, negy, (sp_digit)0 - v[i].neg);
+ sp_256_proj_point_add_avx2_4(rt, rt, p, tmp);
+ }
+
+ if (map != 0) {
+ sp_256_map_avx2_4(r, rt, tmp);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL)
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ if (tmp != NULL)
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_256_point_free_4(p, 0, heap);
+ sp_256_point_free_4(rt, 0, heap);
+
+ return err;
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_256 {
+ sp_digit x[4];
+ sp_digit y[4];
+} sp_table_entry_256;
+
+#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL)
+#endif /* FP_ECC || WOLFSSL_SP_SMALL */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_qz1_4(sp_point_256* r, const sp_point_256* p,
+ const sp_point_256* q, sp_digit* t)
+{
+ const sp_point_256* ap[2];
+ sp_point_256* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*4;
+ sp_digit* t3 = t + 4*4;
+ sp_digit* t4 = t + 6*4;
+ sp_digit* t5 = t + 8*4;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Check double */
+ (void)sp_256_sub_4(t1, p256_mod, q->y);
+ sp_256_norm_4(t1);
+ if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) &
+ (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) {
+ sp_256_proj_point_dbl_4(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_256));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<4; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<4; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<4; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_4(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - X1 */
+ sp_256_mont_sub_4(t2, t2, x, p256_mod);
+ /* R = S2 - Y1 */
+ sp_256_mont_sub_4(t4, t4, y, p256_mod);
+ /* Z3 = H*Z1 */
+ sp_256_mont_mul_4(z, z, t2, p256_mod, p256_mp_mod);
+ /* X3 = R^2 - H^3 - 2*X1*H^2 */
+ sp_256_mont_sqr_4(t1, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t3, x, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(x, t1, t5, p256_mod);
+ sp_256_mont_dbl_4(t1, t3, p256_mod);
+ sp_256_mont_sub_4(x, x, t1, p256_mod);
+ /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+ sp_256_mont_sub_4(t3, t3, x, p256_mod);
+ sp_256_mont_mul_4(t3, t3, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t5, t5, y, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(y, t3, t5, p256_mod);
+ }
+}
+
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a Point to convert.
+ * t Temporary data.
+ */
+static void sp_256_proj_to_affine_4(sp_point_256* a, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2 * 4;
+ sp_digit* tmp = t + 4 * 4;
+
+ sp_256_mont_inv_4(t1, a->z, tmp);
+
+ sp_256_mont_sqr_4(t2, t1, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(t1, t2, t1, p256_mod, p256_mp_mod);
+
+ sp_256_mont_mul_4(a->x, a->x, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(a->y, a->y, t1, p256_mod, p256_mp_mod);
+ XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
+}
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_256_gen_stripe_table_4(const sp_point_256* a,
+ sp_table_entry_256* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 td, s1d, s2d;
+#endif
+ sp_point_256* t;
+ sp_point_256* s1 = NULL;
+ sp_point_256* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_4(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_4(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_4(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_4(t->x, a->x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_4(t->y, a->y, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_4(t->z, a->z, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_256_proj_to_affine_4(t, tmp);
+
+ XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<8; i++) {
+ sp_256_proj_point_dbl_n_4(t, 32, tmp);
+ sp_256_proj_to_affine_4(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<8; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_256_proj_point_add_qz1_4(t, s1, s2, tmp);
+ sp_256_proj_to_affine_4(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_256_point_free_4(s2, 0, heap);
+ sp_256_point_free_4(s1, 0, heap);
+ sp_256_point_free_4( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL)
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_stripe_4(sp_point_256* r, const sp_point_256* g,
+ const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 rtd;
+ sp_point_256 pd;
+ sp_digit td[2 * 4 * 5];
+#endif
+ sp_point_256* rt;
+ sp_point_256* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_256_point_new_4(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_4(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
+ XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+ y = 0;
+ for (j=0,x=31; j<8; j++,x+=32) {
+ y |= ((k[x / 64] >> (x % 64)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=30; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<8; j++,x+=32) {
+ y |= ((k[x / 64] >> (x % 64)) & 1) << j;
+ }
+
+ sp_256_proj_point_dbl_4(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_256_proj_point_add_qz1_4(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_256_map_4(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_4(p, 0, heap);
+ sp_256_point_free_4(rt, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC || WOLFSSL_SP_SMALL */
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_256_t {
+ sp_digit x[4];
+ sp_digit y[4];
+ sp_table_entry_256 table[256];
+ uint32_t cnt;
+ int set;
+} sp_cache_256_t;
+
+static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_256_last = -1;
+static THREAD_LS_T int sp_cache_256_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex_256 = 0;
+ static wolfSSL_Mutex sp_cache_256_lock;
+#endif
+
+static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_256_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache_256[i].set = 0;
+ }
+ sp_cache_256_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache_256[i].set)
+ continue;
+
+ if (sp_256_cmp_equal_4(g->x, sp_cache_256[i].x) &
+ sp_256_cmp_equal_4(g->y, sp_cache_256[i].y)) {
+ sp_cache_256[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_256_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache_256[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_256_last) {
+ least = sp_cache_256[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache_256[j].cnt < least) {
+ i = j;
+ least = sp_cache_256[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
+ XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
+ sp_cache_256[i].set = 1;
+ sp_cache_256[i].cnt = 1;
+ }
+
+ *cache = &sp_cache_256[i];
+ sp_cache_256_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_4(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_256_ecc_mulmod_win_add_sub_4(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 4 * 5];
+ sp_cache_256_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_256 == 0) {
+ wc_InitMutex(&sp_cache_256_lock);
+ initCacheMutex_256 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_256_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_256(g, &cache);
+ if (cache->cnt == 2)
+ sp_256_gen_stripe_table_4(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_256_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_256_ecc_mulmod_win_add_sub_4(r, g, k, map, heap);
+ }
+ else {
+ err = sp_256_ecc_mulmod_stripe_4(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#ifdef HAVE_INTEL_AVX2
+#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL)
+#endif /* FP_ECC || WOLFSSL_SP_SMALL */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_256_proj_point_add_qz1_avx2_4(sp_point_256* r, const sp_point_256* p,
+ const sp_point_256* q, sp_digit* t)
+{
+ const sp_point_256* ap[2];
+ sp_point_256* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*4;
+ sp_digit* t3 = t + 4*4;
+ sp_digit* t4 = t + 6*4;
+ sp_digit* t5 = t + 8*4;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Check double */
+ (void)sp_256_sub_4(t1, p256_mod, q->y);
+ sp_256_norm_4(t1);
+ if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) &
+ (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) {
+ sp_256_proj_point_dbl_4(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_256));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<4; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<4; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<4; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U2 = X2*Z1^2 */
+ sp_256_mont_sqr_avx2_4(t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(t4, t2, z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(t2, t2, q->x, p256_mod, p256_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_256_mont_mul_avx2_4(t4, t4, q->y, p256_mod, p256_mp_mod);
+ /* H = U2 - X1 */
+ sp_256_mont_sub_4(t2, t2, x, p256_mod);
+ /* R = S2 - Y1 */
+ sp_256_mont_sub_4(t4, t4, y, p256_mod);
+ /* Z3 = H*Z1 */
+ sp_256_mont_mul_avx2_4(z, z, t2, p256_mod, p256_mp_mod);
+ /* X3 = R^2 - H^3 - 2*X1*H^2 */
+ sp_256_mont_sqr_avx2_4(t1, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_avx2_4(t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(t3, x, t5, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(t5, t5, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(x, t1, t5, p256_mod);
+ sp_256_mont_dbl_4(t1, t3, p256_mod);
+ sp_256_mont_sub_4(x, x, t1, p256_mod);
+ /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+ sp_256_mont_sub_4(t3, t3, x, p256_mod);
+ sp_256_mont_mul_avx2_4(t3, t3, t4, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(t5, t5, y, p256_mod, p256_mp_mod);
+ sp_256_mont_sub_4(y, t3, t5, p256_mod);
+ }
+}
+
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a Point to convert.
+ * t Temporary data.
+ */
+static void sp_256_proj_to_affine_avx2_4(sp_point_256* a, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2 * 4;
+ sp_digit* tmp = t + 4 * 4;
+
+ sp_256_mont_inv_avx2_4(t1, a->z, tmp);
+
+ sp_256_mont_sqr_avx2_4(t2, t1, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(t1, t2, t1, p256_mod, p256_mp_mod);
+
+ sp_256_mont_mul_avx2_4(a->x, a->x, t2, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(a->y, a->y, t1, p256_mod, p256_mp_mod);
+ XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
+}
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_256_gen_stripe_table_avx2_4(const sp_point_256* a,
+ sp_table_entry_256* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 td, s1d, s2d;
+#endif
+ sp_point_256* t;
+ sp_point_256* s1 = NULL;
+ sp_point_256* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_256_point_new_4(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_4(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_4(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_4(t->x, a->x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_4(t->y, a->y, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_mod_mul_norm_4(t->z, a->z, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_256_proj_to_affine_avx2_4(t, tmp);
+
+ XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<8; i++) {
+ sp_256_proj_point_dbl_n_avx2_4(t, 32, tmp);
+ sp_256_proj_to_affine_avx2_4(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<8; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_256_proj_point_add_qz1_avx2_4(t, s1, s2, tmp);
+ sp_256_proj_to_affine_avx2_4(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_256_point_free_4(s2, 0, heap);
+ sp_256_point_free_4(s1, 0, heap);
+ sp_256_point_free_4( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+#if defined(FP_ECC) || defined(WOLFSSL_SP_SMALL)
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_stripe_avx2_4(sp_point_256* r, const sp_point_256* g,
+ const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 rtd;
+ sp_point_256 pd;
+ sp_digit td[2 * 4 * 5];
+#endif
+ sp_point_256* rt;
+ sp_point_256* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_256_point_new_4(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_4(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
+ XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+ y = 0;
+ for (j=0,x=31; j<8; j++,x+=32) {
+ y |= ((k[x / 64] >> (x % 64)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=30; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<8; j++,x+=32) {
+ y |= ((k[x / 64] >> (x % 64)) & 1) << j;
+ }
+
+ sp_256_proj_point_dbl_avx2_4(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_256_proj_point_add_qz1_avx2_4(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_256_map_avx2_4(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_4(p, 0, heap);
+ sp_256_point_free_4(rt, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC || WOLFSSL_SP_SMALL */
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_avx2_4(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_256_ecc_mulmod_win_add_sub_avx2_4(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 4 * 5];
+ sp_cache_256_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_256 == 0) {
+ wc_InitMutex(&sp_cache_256_lock);
+ initCacheMutex_256 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_256_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_256(g, &cache);
+ if (cache->cnt == 2)
+ sp_256_gen_stripe_table_avx2_4(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_256_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_256_ecc_mulmod_win_add_sub_avx2_4(r, g, k, map, heap);
+ }
+ else {
+ err = sp_256_ecc_mulmod_stripe_avx2_4(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * p Point to multiply.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+ void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[4];
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ err = sp_256_point_new_4(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 4, km);
+ sp_256_point_from_ecc_point_4(point, gm);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_256_ecc_mulmod_avx2_4(point, point, k, map, heap);
+ else
+#endif
+ err = sp_256_ecc_mulmod_4(point, point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_4(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_4(point, 0, heap);
+
+ return err;
+}
+
+#ifdef WOLFSSL_SP_SMALL
+static const sp_table_entry_256 p256_table[256] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x79e730d418a9143cL,0x75ba95fc5fedb601L,0x79fb732b77622510L,
+ 0x18905f76a53755c6L },
+ { 0xddf25357ce95560aL,0x8b4ab8e4ba19e45cL,0xd2e88688dd21f325L,
+ 0x8571ff1825885d85L } },
+ /* 2 */
+ { { 0x202886024147519aL,0xd0981eac26b372f0L,0xa9d4a7caa785ebc8L,
+ 0xd953c50ddbdf58e9L },
+ { 0x9d6361ccfd590f8fL,0x72e9626b44e6c917L,0x7fd9611022eb64cfL,
+ 0x863ebb7e9eb288f3L } },
+ /* 3 */
+ { { 0x7856b6235cdb6485L,0x808f0ea22f0a2f97L,0x3e68d9544f7e300bL,
+ 0x00076055b5ff80a0L },
+ { 0x7634eb9b838d2010L,0x54014fbb3243708aL,0xe0e47d39842a6606L,
+ 0x8308776134373ee0L } },
+ /* 4 */
+ { { 0x4f922fc516a0d2bbL,0x0d5cc16c1a623499L,0x9241cf3a57c62c8bL,
+ 0x2f5e6961fd1b667fL },
+ { 0x5c15c70bf5a01797L,0x3d20b44d60956192L,0x04911b37071fdb52L,
+ 0xf648f9168d6f0f7bL } },
+ /* 5 */
+ { { 0x9e566847e137bbbcL,0xe434469e8a6a0becL,0xb1c4276179d73463L,
+ 0x5abe0285133d0015L },
+ { 0x92aa837cc04c7dabL,0x573d9f4c43260c07L,0x0c93156278e6cc37L,
+ 0x94bb725b6b6f7383L } },
+ /* 6 */
+ { { 0xbbf9b48f720f141cL,0x6199b3cd2df5bc74L,0xdc3f6129411045c4L,
+ 0xcdd6bbcb2f7dc4efL },
+ { 0xcca6700beaf436fdL,0x6f647f6db99326beL,0x0c0fa792014f2522L,
+ 0xa361bebd4bdae5f6L } },
+ /* 7 */
+ { { 0x28aa2558597c13c7L,0xc38d635f50b7c3e1L,0x07039aecf3c09d1dL,
+ 0xba12ca09c4b5292cL },
+ { 0x9e408fa459f91dfdL,0x3af43b66ceea07fbL,0x1eceb0899d780b29L,
+ 0x53ebb99d701fef4bL } },
+ /* 8 */
+ { { 0x4fe7ee31b0e63d34L,0xf4600572a9e54fabL,0xc0493334d5e7b5a4L,
+ 0x8589fb9206d54831L },
+ { 0xaa70f5cc6583553aL,0x0879094ae25649e5L,0xcc90450710044652L,
+ 0xebb0696d02541c4fL } },
+ /* 9 */
+ { { 0x4616ca15ac1647c5L,0xb8127d47c4cf5799L,0xdc666aa3764dfbacL,
+ 0xeb2820cbd1b27da3L },
+ { 0x9406f8d86a87e008L,0xd87dfa9d922378f3L,0x56ed2e4280ccecb2L,
+ 0x1f28289b55a7da1dL } },
+ /* 10 */
+ { { 0xabbaa0c03b89da99L,0xa6f2d79eb8284022L,0x27847862b81c05e8L,
+ 0x337a4b5905e54d63L },
+ { 0x3c67500d21f7794aL,0x207005b77d6d7f61L,0x0a5a378104cfd6e8L,
+ 0x0d65e0d5f4c2fbd6L } },
+ /* 11 */
+ { { 0xd9d09bbeb5275d38L,0x4268a7450be0a358L,0xf0762ff4973eb265L,
+ 0xc23da24252f4a232L },
+ { 0x5da1b84f0b94520cL,0x09666763b05bd78eL,0x3a4dcb8694d29ea1L,
+ 0x19de3b8cc790cff1L } },
+ /* 12 */
+ { { 0x183a716c26c5fe04L,0x3b28de0b3bba1bdbL,0x7432c586a4cb712cL,
+ 0xe34dcbd491fccbfdL },
+ { 0xb408d46baaa58403L,0x9a69748682e97a53L,0x9e39012736aaa8afL,
+ 0xe7641f447b4e0f7fL } },
+ /* 13 */
+ { { 0x7d753941df64ba59L,0xd33f10ec0b0242fcL,0x4f06dfc6a1581859L,
+ 0x4a12df57052a57bfL },
+ { 0xbfa6338f9439dbd0L,0xd3c24bd4bde53e1fL,0xfd5e4ffa21f1b314L,
+ 0x6af5aa93bb5bea46L } },
+ /* 14 */
+ { { 0xda10b69910c91999L,0x0a24b4402a580491L,0x3e0094b4b8cc2090L,
+ 0x5fe3475a66a44013L },
+ { 0xb0f8cabdf93e7b4bL,0x292b501a7c23f91aL,0x42e889aecd1e6263L,
+ 0xb544e308ecfea916L } },
+ /* 15 */
+ { { 0x6478c6e916ddfdceL,0x2c329166f89179e6L,0x4e8d6e764d4e67e1L,
+ 0xe0b6b2bda6b0c20bL },
+ { 0x0d312df2bb7efb57L,0x1aac0dde790c4007L,0xf90336ad679bc944L,
+ 0x71c023de25a63774L } },
+ /* 16 */
+ { { 0x62a8c244bfe20925L,0x91c19ac38fdce867L,0x5a96a5d5dd387063L,
+ 0x61d587d421d324f6L },
+ { 0xe87673a2a37173eaL,0x2384800853778b65L,0x10f8441e05bab43eL,
+ 0xfa11fe124621efbeL } },
+ /* 17 */
+ { { 0x1c891f2b2cb19ffdL,0x01ba8d5bb1923c23L,0xb6d03d678ac5ca8eL,
+ 0x586eb04c1f13bedcL },
+ { 0x0c35c6e527e8ed09L,0x1e81a33c1819ede2L,0x278fd6c056c652faL,
+ 0x19d5ac0870864f11L } },
+ /* 18 */
+ { { 0x1e99f581309a4e1fL,0xab7de71be9270074L,0x26a5ef0befd28d20L,
+ 0xe7c0073f7f9c563fL },
+ { 0x1f6d663a0ef59f76L,0x669b3b5420fcb050L,0xc08c1f7a7a6602d4L,
+ 0xe08504fec65b3c0aL } },
+ /* 19 */
+ { { 0xf098f68da031b3caL,0x6d1cab9ee6da6d66L,0x5bfd81fa94f246e8L,
+ 0x78f018825b0996b4L },
+ { 0xb7eefde43a25787fL,0x8016f80d1dccac9bL,0x0cea4877b35bfc36L,
+ 0x43a773b87e94747aL } },
+ /* 20 */
+ { { 0x62577734d2b533d5L,0x673b8af6a1bdddc0L,0x577e7c9aa79ec293L,
+ 0xbb6de651c3b266b1L },
+ { 0xe7e9303ab65259b3L,0xd6a0afd3d03a7480L,0xc5ac83d19b3cfc27L,
+ 0x60b4619a5d18b99bL } },
+ /* 21 */
+ { { 0xbd6a38e11ae5aa1cL,0xb8b7652b49e73658L,0x0b130014ee5f87edL,
+ 0x9d0f27b2aeebffcdL },
+ { 0xca9246317a730a55L,0x9c955b2fddbbc83aL,0x07c1dfe0ac019a71L,
+ 0x244a566d356ec48dL } },
+ /* 22 */
+ { { 0x6db0394aeacf1f96L,0x9f2122a9024c271cL,0x2626ac1b82cbd3b9L,
+ 0x45e58c873581ef69L },
+ { 0xd3ff479da38f9dbcL,0xa8aaf146e888a040L,0x945adfb246e0bed7L,
+ 0xc040e21cc1e4b7a4L } },
+ /* 23 */
+ { { 0x847af0006f8117b6L,0x651969ff73a35433L,0x482b35761d9475ebL,
+ 0x1cdf5c97682c6ec7L },
+ { 0x7db775b411f04839L,0x7dbeacf448de1698L,0xb2921dd1b70b3219L,
+ 0x046755f8a92dff3dL } },
+ /* 24 */
+ { { 0xcc8ac5d2bce8ffcdL,0x0d53c48b2fe61a82L,0xf6f161727202d6c7L,
+ 0x046e5e113b83a5f3L },
+ { 0xe7b8ff64d8007f01L,0x7fb1ef125af43183L,0x045c5ea635e1a03cL,
+ 0x6e0106c3303d005bL } },
+ /* 25 */
+ { { 0x48c7358488dd73b1L,0x7670708f995ed0d9L,0x38385ea8c56a2ab7L,
+ 0x442594ede901cf1fL },
+ { 0xf8faa2c912d4b65bL,0x94c2343b96c90c37L,0xd326e4a15e978d1fL,
+ 0xa796fa514c2ee68eL } },
+ /* 26 */
+ { { 0x359fb604823addd7L,0x9e2a6183e56693b3L,0xf885b78e3cbf3c80L,
+ 0xe4ad2da9c69766e9L },
+ { 0x357f7f428e048a61L,0x082d198cc092d9a0L,0xfc3a1af4c03ed8efL,
+ 0xc5e94046c37b5143L } },
+ /* 27 */
+ { { 0x476a538c2be75f9eL,0x6fd1a9e8cb123a78L,0xd85e4df0b109c04bL,
+ 0x63283dafdb464747L },
+ { 0xce728cf7baf2df15L,0xe592c4550ad9a7f4L,0xfab226ade834bcc3L,
+ 0x68bd19ab1981a938L } },
+ /* 28 */
+ { { 0xc08ead511887d659L,0x3374d5f4b359305aL,0x96986981cfe74fe3L,
+ 0x495292f53c6fdfd6L },
+ { 0x4a878c9e1acec896L,0xd964b210ec5b4484L,0x6696f7e2664d60a7L,
+ 0x0ec7530d26036837L } },
+ /* 29 */
+ { { 0x2da13a05ad2687bbL,0xa1f83b6af32e21faL,0x390f5ef51dd4607bL,
+ 0x0f6207a664863f0bL },
+ { 0xbd67e3bb0f138233L,0xdd66b96c272aa718L,0x8ed0040726ec88aeL,
+ 0xff0db07208ed6dcfL } },
+ /* 30 */
+ { { 0x749fa1014c95d553L,0xa44052fd5d680a8aL,0x183b4317ff3b566fL,
+ 0x313b513c88740ea3L },
+ { 0xb402e2ac08d11549L,0x071ee10bb4dee21cL,0x26b987dd47f2320eL,
+ 0x2d3abcf986f19f81L } },
+ /* 31 */
+ { { 0x4c288501815581a2L,0x9a0a6d56632211afL,0x19ba7a0f0cab2e99L,
+ 0xc036fa10ded98cdfL },
+ { 0x29ae08bac1fbd009L,0x0b68b19006d15816L,0xc2eb32779b9e0d8fL,
+ 0xa6b2a2c4b6d40194L } },
+ /* 32 */
+ { { 0xd433e50f6d3549cfL,0x6f33696ffacd665eL,0x695bfdacce11fcb4L,
+ 0x810ee252af7c9860L },
+ { 0x65450fe17159bb2cL,0xf7dfbebe758b357bL,0x2b057e74d69fea72L,
+ 0xd485717a92731745L } },
+ /* 33 */
+ { { 0x11741a8af0cb5a98L,0xd3da8f931f3110bfL,0x1994e2cbab382adfL,
+ 0x6a6045a72f9a604eL },
+ { 0x170c0d3fa2b2411dL,0xbe0eb83e510e96e0L,0x3bcc9f738865b3ccL,
+ 0xd3e45cfaf9e15790L } },
+ /* 34 */
+ { { 0xce1f69bbe83f7669L,0x09f8ae8272877d6bL,0x9548ae543244278dL,
+ 0x207755dee3c2c19cL },
+ { 0x87bd61d96fef1945L,0x18813cefb12d28c3L,0x9fbcd1d672df64aaL,
+ 0x48dc5ee57154b00dL } },
+ /* 35 */
+ { { 0x123790bff7e5a199L,0xe0efb8cf989ccbb7L,0xc27a2bfe0a519c79L,
+ 0xf2fb0aeddff6f445L },
+ { 0x41c09575f0b5025fL,0x550543d740fa9f22L,0x8fa3c8ad380bfbd0L,
+ 0xa13e9015db28d525L } },
+ /* 36 */
+ { { 0xf9f7a350a2b65cbcL,0x0b04b9722a464226L,0x265ce241e23f07a1L,
+ 0x2bf0d6b01497526fL },
+ { 0xd3d4dd3f4b216fb7L,0xf7d7b867fbdda26aL,0xaeb7b83f6708505cL,
+ 0x42a94a5a162fe89fL } },
+ /* 37 */
+ { { 0x5846ad0beaadf191L,0x0f8a489025a268d7L,0xe8603050494dc1f6L,
+ 0x2c2dd969c65ede3dL },
+ { 0x6d02171d93849c17L,0x460488ba1da250ddL,0x4810c7063c3a5485L,
+ 0xf437fa1f42c56dbcL } },
+ /* 38 */
+ { { 0x6aa0d7144a0f7dabL,0x0f0497931776e9acL,0x52c0a050f5f39786L,
+ 0xaaf45b3354707aa8L },
+ { 0x85e37c33c18d364aL,0xd40b9b063e497165L,0xf417168115ec5444L,
+ 0xcdf6310df4f272bcL } },
+ /* 39 */
+ { { 0x7473c6238ea8b7efL,0x08e9351885bc2287L,0x419567722bda8e34L,
+ 0xf0d008bada9e2ff2L },
+ { 0x2912671d2414d3b1L,0xb3754985b019ea76L,0x5c61b96d453bcbdbL,
+ 0x5bd5c2f5ca887b8bL } },
+ /* 40 */
+ { { 0xef0f469ef49a3154L,0x3e85a5956e2b2e9aL,0x45aaec1eaa924a9cL,
+ 0xaa12dfc8a09e4719L },
+ { 0x26f272274df69f1dL,0xe0e4c82ca2ff5e73L,0xb9d8ce73b7a9dd44L,
+ 0x6c036e73e48ca901L } },
+ /* 41 */
+ { { 0x5cfae12a0f6e3138L,0x6966ef0025ad345aL,0x8993c64b45672bc5L,
+ 0x292ff65896afbe24L },
+ { 0xd5250d445e213402L,0xf6580e274392c9feL,0x097b397fda1c72e8L,
+ 0x644e0c90311b7276L } },
+ /* 42 */
+ { { 0xe1e421e1a47153f0L,0xb86c3b79920418c9L,0x93bdce87705d7672L,
+ 0xf25ae793cab79a77L },
+ { 0x1f3194a36d869d0cL,0x9d55c8824986c264L,0x49fb5ea3096e945eL,
+ 0x39b8e65313db0a3eL } },
+ /* 43 */
+ { { 0x37754200b6fd2e59L,0x35e2c0669255c98fL,0xd9dab21a0e2a5739L,
+ 0x39122f2f0f19db06L },
+ { 0xcfbce1e003cad53cL,0x225b2c0fe65c17e3L,0x72baf1d29aa13877L,
+ 0x8de80af8ce80ff8dL } },
+ /* 44 */
+ { { 0xafbea8d9207bbb76L,0x921c7e7c21782758L,0xdfa2b74b1c0436b1L,
+ 0x871949062e368c04L },
+ { 0xb5f928bba3993df5L,0x639d75b5f3b3d26aL,0x011aa78a85b55050L,
+ 0xfc315e6a5b74fde1L } },
+ /* 45 */
+ { { 0x561fd41ae8d6ecfaL,0x5f8c44f61aec7f86L,0x98452a7b4924741dL,
+ 0xe6d4a7adee389088L },
+ { 0x60552ed14593c75dL,0x70a70da4dd271162L,0xd2aede937ba2c7dbL,
+ 0x35dfaf9a9be2ae57L } },
+ /* 46 */
+ { { 0x6b956fcdaa736636L,0x09f51d97ae2cab7eL,0xfb10bf410f349966L,
+ 0x1da5c7d71c830d2bL },
+ { 0x5c41e4833cce6825L,0x15ad118ff9573c3bL,0xa28552c7f23036b8L,
+ 0x7077c0fddbf4b9d6L } },
+ /* 47 */
+ { { 0xbf63ff8d46b9661cL,0xa1dfd36b0d2cfd71L,0x0373e140a847f8f7L,
+ 0x53a8632ee50efe44L },
+ { 0x0976ff68696d8051L,0xdaec0c95c74f468aL,0x62994dc35e4e26bdL,
+ 0x028ca76d34e1fcc1L } },
+ /* 48 */
+ { { 0xd11d47dcfc9877eeL,0xc8b36210801d0002L,0xd002c11754c260b6L,
+ 0x04c17cd86962f046L },
+ { 0x6d9bd094b0daddf5L,0xbea2357524ce55c0L,0x663356e672da03b5L,
+ 0xf7ba4de9fed97474L } },
+ /* 49 */
+ { { 0xd0dbfa34ebe1263fL,0x5576373571ae7ce6L,0xd244055382a6f523L,
+ 0xe31f960052131c41L },
+ { 0xd1bb9216ea6b6ec6L,0x37a1d12e73c2fc44L,0xc10e7eac89d0a294L,
+ 0xaa3a6259ce34d47bL } },
+ /* 50 */
+ { { 0xfbcf9df536f3dcd3L,0x6ceded50d2bf7360L,0x491710fadf504f5bL,
+ 0x2398dd627e79daeeL },
+ { 0xcf4705a36d09569eL,0xea0619bb5149f769L,0xff9c037735f6034cL,
+ 0x5717f5b21c046210L } },
+ /* 51 */
+ { { 0x9fe229c921dd895eL,0x8e51850040c28451L,0xfa13d2391d637ecdL,
+ 0x660a2c560e3c28deL },
+ { 0x9cca88aed67fcbd0L,0xc84724780ea9f096L,0x32b2f48172e92b4dL,
+ 0x624ee54c4f522453L } },
+ /* 52 */
+ { { 0x09549ce4d897ecccL,0x4d49d1d93f9880aaL,0x723c2423043a7c20L,
+ 0x4f392afb92bdfbc0L },
+ { 0x6969f8fa7de44fd9L,0xb66cfbe457b32156L,0xdb2fa803368ebc3cL,
+ 0x8a3e7977ccdb399cL } },
+ /* 53 */
+ { { 0xdde1881f06c4b125L,0xae34e300f6e3ca8cL,0xef6999de5c7a13e9L,
+ 0x3888d02370c24404L },
+ { 0x7628035644f91081L,0x3d9fcf615f015504L,0x1827edc8632cd36eL,
+ 0xa5e62e4718102336L } },
+ /* 54 */
+ { { 0x1a825ee32facd6c8L,0x699c635454bcbc66L,0x0ce3edf798df9931L,
+ 0x2c4768e6466a5adcL },
+ { 0xb346ff8c90a64bc9L,0x630a6020e4779f5cL,0xd949d064bc05e884L,
+ 0x7b5e6441f9e652a0L } },
+ /* 55 */
+ { { 0x2169422c1d28444aL,0xe996c5d8be136a39L,0x2387afe5fb0c7fceL,
+ 0xb8af73cb0c8d744aL },
+ { 0x5fde83aa338b86fdL,0xfee3f158a58a5cffL,0xc9ee8f6f20ac9433L,
+ 0xa036395f7f3f0895L } },
+ /* 56 */
+ { { 0x8c73c6bba10f7770L,0xa6f16d81a12a0e24L,0x100df68251bc2b9fL,
+ 0x4be36b01875fb533L },
+ { 0x9226086e9fb56dbbL,0x306fef8b07e7a4f8L,0xeeaccc0566d52f20L,
+ 0x8cbc9a871bdc00c0L } },
+ /* 57 */
+ { { 0xe131895cc0dac4abL,0xa874a440712ff112L,0x6332ae7c6a1cee57L,
+ 0x44e7553e0c0835f8L },
+ { 0x6d503fff7734002dL,0x9d35cb8b0b34425cL,0x95f702760e8738b5L,
+ 0x470a683a5eb8fc18L } },
+ /* 58 */
+ { { 0x81b761dc90513482L,0x0287202a01e9276aL,0xcda441ee0ce73083L,
+ 0x16410690c63dc6efL },
+ { 0xf5034a066d06a2edL,0xdd4d7745189b100bL,0xd914ae72ab8218c9L,
+ 0xd73479fd7abcbb4fL } },
+ /* 59 */
+ { { 0x7edefb165ad4c6e5L,0x262cf08f5b06d04dL,0x12ed5bb18575cb14L,
+ 0x816469e30771666bL },
+ { 0xd7ab9d79561e291eL,0xeb9daf22c1de1661L,0xf49827eb135e0513L,
+ 0x0a36dd23f0dd3f9cL } },
+ /* 60 */
+ { { 0x098d32c741d5533cL,0x7c5f5a9e8684628fL,0x39a228ade349bd11L,
+ 0xe331dfd6fdbab118L },
+ { 0x5100ab686bcc6ed8L,0x7160c3bdef7a260eL,0x9063d9a7bce850d7L,
+ 0xd3b4782a492e3389L } },
+ /* 61 */
+ { { 0xa149b6e8f3821f90L,0x92edd9ed66eb7aadL,0x0bb669531a013116L,
+ 0x7281275a4c86a5bdL },
+ { 0x503858f7d3ff47e5L,0x5e1616bc61016441L,0x62b0f11a7dfd9bb1L,
+ 0x2c062e7ece145059L } },
+ /* 62 */
+ { { 0xa76f996f0159ac2eL,0x281e7736cbdb2713L,0x2ad6d28808e46047L,
+ 0x282a35f92c4e7ef1L },
+ { 0x9c354b1ec0ce5cd2L,0xcf99efc91379c229L,0x992caf383e82c11eL,
+ 0xc71cd513554d2abdL } },
+ /* 63 */
+ { { 0x4885de9c09b578f4L,0x1884e258e3affa7aL,0x8f76b1b759182f1fL,
+ 0xc50f6740cf47f3a3L },
+ { 0xa9c4adf3374b68eaL,0xa406f32369965fe2L,0x2f86a22285a53050L,
+ 0xb9ecb3a7212958dcL } },
+ /* 64 */
+ { { 0x56f8410ef4f8b16aL,0x97241afec47b266aL,0x0a406b8e6d9c87c1L,
+ 0x803f3e02cd42ab1bL },
+ { 0x7f0309a804dbec69L,0xa83b85f73bbad05fL,0xc6097273ad8e197fL,
+ 0xc097440e5067adc1L } },
+ /* 65 */
+ { { 0x846a56f2c379ab34L,0xa8ee068b841df8d1L,0x20314459176c68efL,
+ 0xf1af32d5915f1f30L },
+ { 0x99c375315d75bd50L,0x837cffbaf72f67bcL,0x0613a41848d7723fL,
+ 0x23d0f130e2d41c8bL } },
+ /* 66 */
+ { { 0x857ab6edf41500d9L,0x0d890ae5fcbeada8L,0x52fe864889725951L,
+ 0xb0288dd6c0a3faddL },
+ { 0x85320f30650bcb08L,0x71af6313695d6e16L,0x31f520a7b989aa76L,
+ 0xffd3724ff408c8d2L } },
+ /* 67 */
+ { { 0x53968e64b458e6cbL,0x992dad20317a5d28L,0x3814ae0b7aa75f56L,
+ 0xf5590f4ad78c26dfL },
+ { 0x0fc24bd3cf0ba55aL,0x0fc4724a0c778baeL,0x1ce9864f683b674aL,
+ 0x18d6da54f6f74a20L } },
+ /* 68 */
+ { { 0xed93e225d5be5a2bL,0x6fe799835934f3c6L,0x4314092622626ffcL,
+ 0x50bbb4d97990216aL },
+ { 0x378191c6e57ec63eL,0x65422c40181dcdb2L,0x41a8099b0236e0f6L,
+ 0x2b10011801fe49c3L } },
+ /* 69 */
+ { { 0xfc68b5c59b391593L,0xc385f5a2598270fcL,0x7144f3aad19adcbbL,
+ 0xdd55899983fbae0cL },
+ { 0x93b88b8e74b82ff4L,0xd2e03c4071e734c9L,0x9a7a9eaf43c0322aL,
+ 0xe6e4c551149d6041L } },
+ /* 70 */
+ { { 0x55f655bb1e9af288L,0x647e1a64f7ada931L,0x43697e4bcb2820e5L,
+ 0x51e00db107ed56ffL },
+ { 0x43d169b8771c327eL,0x29cdb20b4a96c2adL,0xc07d51f53deb4779L,
+ 0xe22f424149829177L } },
+ /* 71 */
+ { { 0xcd45e8f4635f1abbL,0x7edc0cb568538874L,0xc9472c1fb5a8034dL,
+ 0xf709373d52dc48c9L },
+ { 0x401966bba8af30d6L,0x95bf5f4af137b69cL,0x3966162a9361c47eL,
+ 0xbd52d288e7275b11L } },
+ /* 72 */
+ { { 0xab155c7a9c5fa877L,0x17dad6727d3a3d48L,0x43f43f9e73d189d8L,
+ 0xa0d0f8e4c8aa77a6L },
+ { 0x0bbeafd8cc94f92dL,0xd818c8be0c4ddb3aL,0x22cc65f8b82eba14L,
+ 0xa56c78c7946d6a00L } },
+ /* 73 */
+ { { 0x2962391b0dd09529L,0x803e0ea63daddfcfL,0x2c77351f5b5bf481L,
+ 0xd8befdf8731a367aL },
+ { 0xab919d42fc0157f4L,0xf51caed7fec8e650L,0xcdf9cb4002d48b0aL,
+ 0x854a68a5ce9f6478L } },
+ /* 74 */
+ { { 0xdc35f67b63506ea5L,0x9286c489a4fe0d66L,0x3f101d3bfe95cd4dL,
+ 0x5cacea0b98846a95L },
+ { 0xa90df60c9ceac44dL,0x3db29af4354d1c3aL,0x08dd3de8ad5dbabeL,
+ 0xe4982d1235e4efa9L } },
+ /* 75 */
+ { { 0x23104a22c34cd55eL,0x58695bb32680d132L,0xfb345afa1fa1d943L,
+ 0x8046b7f616b20499L },
+ { 0xb533581e38e7d098L,0xd7f61e8df46f0b70L,0x30dea9ea44cb78c4L,
+ 0xeb17ca7b9082af55L } },
+ /* 76 */
+ { { 0x1751b59876a145b9L,0xa5cf6b0fc1bc71ecL,0xd3e03565392715bbL,
+ 0x097b00bafab5e131L },
+ { 0xaa66c8e9565f69e1L,0x77e8f75ab5be5199L,0x6033ba11da4fd984L,
+ 0xf95c747bafdbcc9eL } },
+ /* 77 */
+ { { 0x558f01d3bebae45eL,0xa8ebe9f0c4bc6955L,0xaeb705b1dbc64fc6L,
+ 0x3512601e566ed837L },
+ { 0x9336f1e1fa1161cdL,0x328ab8d54c65ef87L,0x4757eee2724f21e5L,
+ 0x0ef971236068ab6bL } },
+ /* 78 */
+ { { 0x02598cf754ca4226L,0x5eede138f8642c8eL,0x48963f74468e1790L,
+ 0xfc16d9333b4fbc95L },
+ { 0xbe96fb31e7c800caL,0x138063312678adaaL,0x3d6244976ff3e8b5L,
+ 0x14ca4af1b95d7a17L } },
+ /* 79 */
+ { { 0x7a4771babd2f81d5L,0x1a5f9d6901f7d196L,0xd898bef7cad9c907L,
+ 0x4057b063f59c231dL },
+ { 0xbffd82fe89c05c0aL,0xe4911c6f1dc0df85L,0x3befccaea35a16dbL,
+ 0x1c3b5d64f1330b13L } },
+ /* 80 */
+ { { 0x5fe14bfe80ec21feL,0xf6ce116ac255be82L,0x98bc5a072f4a5d67L,
+ 0xfad27148db7e63afL },
+ { 0x90c0b6ac29ab05b3L,0x37a9a83c4e251ae6L,0x0a7dc875c2aade7dL,
+ 0x77387de39f0e1a84L } },
+ /* 81 */
+ { { 0x1e9ecc49a56c0dd7L,0xa5cffcd846086c74L,0x8f7a1408f505aeceL,
+ 0xb37b85c0bef0c47eL },
+ { 0x3596b6e4cc0e6a8fL,0xfd6d4bbf6b388f23L,0xaba453fac39cef4eL,
+ 0x9c135ac8f9f628d5L } },
+ /* 82 */
+ { { 0x32aa320284e35743L,0x320d6ab185a3cdefL,0xb821b1761df19819L,
+ 0x5721361fc433851fL },
+ { 0x1f0db36a71fc9168L,0x5f98ba735e5c403cL,0xf64ca87e37bcd8f5L,
+ 0xdcbac3c9e6bb11bdL } },
+ /* 83 */
+ { { 0xf01d99684518cbe2L,0xd242fc189c9eb04eL,0x727663c7e47feebfL,
+ 0xb8c1c89e2d626862L },
+ { 0x51a58bddc8e1d569L,0x563809c8b7d88cd0L,0x26c27fd9f11f31ebL,
+ 0x5d23bbda2f9422d4L } },
+ /* 84 */
+ { { 0x0a1c729495c8f8beL,0x2961c4803bf362bfL,0x9e418403df63d4acL,
+ 0xc109f9cb91ece900L },
+ { 0xc2d095d058945705L,0xb9083d96ddeb85c0L,0x84692b8d7a40449bL,
+ 0x9bc3344f2eee1ee1L } },
+ /* 85 */
+ { { 0x0d5ae35642913074L,0x55491b2748a542b1L,0x469ca665b310732aL,
+ 0x29591d525f1a4cc1L },
+ { 0xe76f5b6bb84f983fL,0xbe7eef419f5f84e1L,0x1200d49680baa189L,
+ 0x6376551f18ef332cL } },
+ /* 86 */
+ { { 0xbda5f14e562976ccL,0x22bca3e60ef12c38L,0xbbfa30646cca9852L,
+ 0xbdb79dc808e2987aL },
+ { 0xfd2cb5c9cb06a772L,0x38f475aafe536dceL,0xc2a3e0227c2b5db8L,
+ 0x8ee86001add3c14aL } },
+ /* 87 */
+ { { 0xcbe96981a4ade873L,0x7ee9aa4dc4fba48cL,0x2cee28995a054ba5L,
+ 0x92e51d7a6f77aa4bL },
+ { 0x948bafa87190a34dL,0xd698f75bf6bd1ed1L,0xd00ee6e30caf1144L,
+ 0x5182f86f0a56aaaaL } },
+ /* 88 */
+ { { 0xfba6212c7a4cc99cL,0xff609b683e6d9ca1L,0x5dbb27cb5ac98c5aL,
+ 0x91dcab5d4073a6f2L },
+ { 0x01b6cc3d5f575a70L,0x0cb361396f8d87faL,0x165d4e8c89981736L,
+ 0x17a0cedb97974f2bL } },
+ /* 89 */
+ { { 0x38861e2a076c8d3aL,0x701aad39210f924bL,0x94d0eae413a835d9L,
+ 0x2e8ce36c7f4cdf41L },
+ { 0x91273dab037a862bL,0x01ba9bb760e4c8faL,0xf964538833baf2ddL,
+ 0xf4ccc6cb34f668f3L } },
+ /* 90 */
+ { { 0x44ef525cf1f79687L,0x7c59549592efa815L,0xe1231741a5c78d29L,
+ 0xac0db4889a0df3c9L },
+ { 0x86bfc711df01747fL,0x592b9358ef17df13L,0xe5880e4f5ccb6bb5L,
+ 0x95a64a6194c974a2L } },
+ /* 91 */
+ { { 0x72c1efdac15a4c93L,0x40269b7382585141L,0x6a8dfb1c16cb0badL,
+ 0x231e54ba29210677L },
+ { 0xa70df9178ae6d2dcL,0x4d6aa63f39112918L,0xf627726b5e5b7223L,
+ 0xab0be032d8a731e1L } },
+ /* 92 */
+ { { 0x097ad0e98d131f2dL,0x637f09e33b04f101L,0x1ac86196d5e9a748L,
+ 0xf1bcc8802cf6a679L },
+ { 0x25c69140e8daacb4L,0x3c4e405560f65009L,0x591cc8fc477937a6L,
+ 0x851694695aebb271L } },
+ /* 93 */
+ { { 0xde35c143f1dcf593L,0x78202b29b018be3bL,0xe9cdadc29bdd9d3dL,
+ 0x8f67d9d2daad55d8L },
+ { 0x841116567481ea5fL,0xe7d2dde9e34c590cL,0xffdd43f405053fa8L,
+ 0xf84572b9c0728b5dL } },
+ /* 94 */
+ { { 0x5e1a7a7197af71c9L,0xa14494447a736565L,0xa1b4ae070e1d5063L,
+ 0xedee2710616b2c19L },
+ { 0xb2f034f511734121L,0x1cac6e554a25e9f0L,0x8dc148f3a40c2ecfL,
+ 0x9fd27e9b44ebd7f4L } },
+ /* 95 */
+ { { 0x3cc7658af6e2cb16L,0xe3eb7d2cfe5919b6L,0x5a8c5816168d5583L,
+ 0xa40c2fb6958ff387L },
+ { 0x8c9ec560fedcc158L,0x7ad804c655f23056L,0xd93967049a307e12L,
+ 0x99bc9bb87dc6decfL } },
+ /* 96 */
+ { { 0x84a9521d927dafc6L,0x52c1fb695c09cd19L,0x9d9581a0f9366ddeL,
+ 0x9abe210ba16d7e64L },
+ { 0x480af84a48915220L,0xfa73176a4dd816c6L,0xc7d539871681ca5aL,
+ 0x7881c25787f344b0L } },
+ /* 97 */
+ { { 0x93399b51e0bcf3ffL,0x0d02cbc5127f74f6L,0x8fb465a2dd01d968L,
+ 0x15e6e319a30e8940L },
+ { 0x646d6e0d3e0e05f4L,0xfad7bddc43588404L,0xbe61c7d1c4f850d3L,
+ 0x0e55facf191172ceL } },
+ /* 98 */
+ { { 0x7e9d9806f8787564L,0x1a33172131e85ce6L,0x6b0158cab819e8d6L,
+ 0xd73d09766fe96577L },
+ { 0x424834251eb7206eL,0xa519290fc618bb42L,0x5dcbb8595e30a520L,
+ 0x9250a3748f15a50bL } },
+ /* 99 */
+ { { 0xcaff08f8be577410L,0xfd408a035077a8c6L,0xf1f63289ec0a63a4L,
+ 0x77414082c1cc8c0bL },
+ { 0x05a40fa6eb0991cdL,0xc1ca086649fdc296L,0x3a68a3c7b324fd40L,
+ 0x8cb04f4d12eb20b9L } },
+ /* 100 */
+ { { 0xb1c2d0556906171cL,0x9073e9cdb0240c3fL,0xdb8e6b4fd8906841L,
+ 0xe4e429ef47123b51L },
+ { 0x0b8dd53c38ec36f4L,0xf9d2dc01ff4b6a27L,0x5d066e07879a9a48L,
+ 0x37bca2ff3c6e6552L } },
+ /* 101 */
+ { { 0x4cd2e3c7df562470L,0x44f272a2c0964ac9L,0x7c6d5df980c793beL,
+ 0x59913edc3002b22aL },
+ { 0x7a139a835750592aL,0x99e01d80e783de02L,0xcf8c0375ea05d64fL,
+ 0x43786e4ab013e226L } },
+ /* 102 */
+ { { 0xff32b0ed9e56b5a6L,0x0750d9a6d9fc68f9L,0xec15e845597846a7L,
+ 0x8638ca98b7e79e7aL },
+ { 0x2f5ae0960afc24b2L,0x05398eaf4dace8f2L,0x3b765dd0aecba78fL,
+ 0x1ecdd36a7b3aa6f0L } },
+ /* 103 */
+ { { 0x5d3acd626c5ff2f3L,0xa2d516c02873a978L,0xad94c9fad2110d54L,
+ 0xd85d0f85d459f32dL },
+ { 0x9f700b8d10b11da3L,0xd2c22c30a78318c4L,0x556988f49208decdL,
+ 0xa04f19c3b4ed3c62L } },
+ /* 104 */
+ { { 0x087924c8ed7f93bdL,0xcb64ac5d392f51f6L,0x7cae330a821b71afL,
+ 0x92b2eeea5c0950b0L },
+ { 0x85ac4c9485b6e235L,0xab2ca4a92936c0f0L,0x80faa6b3e0508891L,
+ 0x1ee782215834276cL } },
+ /* 105 */
+ { { 0xa60a2e00e63e79f7L,0xf590e7b2f399d906L,0x9021054a6607c09dL,
+ 0xf3f2ced857a6e150L },
+ { 0x200510f3f10d9b55L,0x9d2fcfacd8642648L,0xe5631aa7e8bd0e7cL,
+ 0x0f56a4543da3e210L } },
+ /* 106 */
+ { { 0x5b21bffa1043e0dfL,0x6c74b6cc9c007e6dL,0x1a656ec0d4a8517aL,
+ 0xbd8f17411969e263L },
+ { 0x8a9bbb86beb7494aL,0x1567d46f45f3b838L,0xdf7a12a7a4e5a79aL,
+ 0x2d1a1c3530ccfa09L } },
+ /* 107 */
+ { { 0x192e3813506508daL,0x336180c4a1d795a7L,0xcddb59497a9944b3L,
+ 0xa107a65eb91fba46L },
+ { 0xe6d1d1c50f94d639L,0x8b4af3758a58b7d7L,0x1a7c5584bd37ca1cL,
+ 0x183d760af87a9af2L } },
+ /* 108 */
+ { { 0x29d697110dde59a4L,0xf1ad8d070e8bef87L,0x229b49634f2ebe78L,
+ 0x1d44179dc269d754L },
+ { 0xb32dc0cf8390d30eL,0x0a3b27530de8110cL,0x31af1dc52bc0339aL,
+ 0x771f9cc29606d262L } },
+ /* 109 */
+ { { 0x99993e7785040739L,0x44539db98026a939L,0xcf40f6f2f5f8fc26L,
+ 0x64427a310362718eL },
+ { 0x4f4f2d8785428aa8L,0x7b7adc3febfb49a8L,0x201b2c6df23d01acL,
+ 0x49d9b7496ae90d6dL } },
+ /* 110 */
+ { { 0xcc78d8bc435d1099L,0x2adbcd4e8e8d1a08L,0x02c2e2a02cb68a41L,
+ 0x9037d81b3f605445L },
+ { 0x7cdbac27074c7b61L,0xfe2031ab57bfd72eL,0x61ccec96596d5352L,
+ 0x08c3de6a7cc0639cL } },
+ /* 111 */
+ { { 0x20fdd020f6d552abL,0x56baff9805cd81f1L,0x06fb7c3e91351291L,
+ 0xc690944245796b2fL },
+ { 0x17b3ae9c41231bd1L,0x1eac6e875cc58205L,0x208837abf9d6a122L,
+ 0x3fa3db02cafe3ac0L } },
+ /* 112 */
+ { { 0xd75a3e6505058880L,0x7da365ef643943f2L,0x4147861cfab24925L,
+ 0xc5c4bdb0fdb808ffL },
+ { 0x73513e34b272b56bL,0xc8327e9511b9043aL,0xfd8ce37df8844969L,
+ 0x2d56db9446c2b6b5L } },
+ /* 113 */
+ { { 0x2461782fff46ac6bL,0xd19f792607a2e425L,0xfafea3c409a48de1L,
+ 0x0f56bd9de503ba42L },
+ { 0x137d4ed1345cda49L,0x821158fc816f299dL,0xe7c6a54aaeb43402L,
+ 0x4003bb9d1173b5f1L } },
+ /* 114 */
+ { { 0x3b8e8189a0803387L,0xece115f539cbd404L,0x4297208dd2877f21L,
+ 0x53765522a07f2f9eL },
+ { 0xa4980a21a8a4182dL,0xa2bbd07a3219df79L,0x674d0a2e1a19a2d4L,
+ 0x7a056f586c5d4549L } },
+ /* 115 */
+ { { 0x646b25589d8a2a47L,0x5b582948c3df2773L,0x51ec000eabf0d539L,
+ 0x77d482f17a1a2675L },
+ { 0xb8a1bd9587853948L,0xa6f817bd6cfbffeeL,0xab6ec05780681e47L,
+ 0x4115012b2b38b0e4L } },
+ /* 116 */
+ { { 0x3c73f0f46de28cedL,0x1d5da7609b13ec47L,0x61b8ce9e6e5c6392L,
+ 0xcdf04572fbea0946L },
+ { 0x1cb3c58b6c53c3b0L,0x97fe3c10447b843cL,0xfb2b8ae12cb9780eL,
+ 0xee703dda97383109L } },
+ /* 117 */
+ { { 0x34515140ff57e43aL,0xd44660d3b1b811b8L,0x2b3b5dff8f42b986L,
+ 0x2a0ad89da162ce21L },
+ { 0x64e4a6946bc277baL,0xc788c954c141c276L,0x141aa64ccabf6274L,
+ 0xd62d0b67ac2b4659L } },
+ /* 118 */
+ { { 0x39c5d87b2c054ac4L,0x57005859f27df788L,0xedf7cbf3b18128d6L,
+ 0xb39a23f2991c2426L },
+ { 0x95284a15f0b16ae5L,0x0c6a05b1a136f51bL,0x1d63c137f2700783L,
+ 0x04ed0092c0674cc5L } },
+ /* 119 */
+ { { 0x1f4185d19ae90393L,0x3047b4294a3d64e6L,0xae0001a69854fc14L,
+ 0xa0a91fc10177c387L },
+ { 0xff0a3f01ae2c831eL,0xbb76ae822b727e16L,0x8f12c8a15a3075b4L,
+ 0x084cf9889ed20c41L } },
+ /* 120 */
+ { { 0xd98509defca6becfL,0x2fceae807dffb328L,0x5d8a15c44778e8b9L,
+ 0xd57955b273abf77eL },
+ { 0x210da79e31b5d4f1L,0xaa52f04b3cfa7a1cL,0xd4d12089dc27c20bL,
+ 0x8e14ea4202d141f1L } },
+ /* 121 */
+ { { 0xeed50345f2897042L,0x8d05331f43402c4aL,0xc8d9c194c8bdfb21L,
+ 0x597e1a372aa4d158L },
+ { 0x0327ec1acf0bd68cL,0x6d4be0dcab024945L,0x5b9c8d7ac9fe3e84L,
+ 0xca3f0236199b4deaL } },
+ /* 122 */
+ { { 0x592a10b56170bd20L,0x0ea897f16d3f5de7L,0xa3363ff144b2ade2L,
+ 0xbde7fd7e309c07e4L },
+ { 0x516bb6d2b8f5432cL,0x210dc1cbe043444bL,0x3db01e6ff8f95b5aL,
+ 0xb623ad0e0a7dd198L } },
+ /* 123 */
+ { { 0xa75bd67560c7b65bL,0xab8c559023a4a289L,0xf8220fd0d7b26795L,
+ 0xd6aa2e4658ec137bL },
+ { 0x10abc00b5138bb85L,0x8c31d121d833a95cL,0xb24ff00b1702a32eL,
+ 0x111662e02dcc513aL } },
+ /* 124 */
+ { { 0x78114015efb42b87L,0xbd9f5d701b6c4dffL,0x66ecccd7a7d7c129L,
+ 0xdb3ee1cb94b750f8L },
+ { 0xb26f3db0f34837cfL,0xe7eed18bb9578d4fL,0x5d2cdf937c56657dL,
+ 0x886a644252206a59L } },
+ /* 125 */
+ { { 0x3c234cfb65b569eaL,0x20011141f72119c1L,0x8badc85da15a619eL,
+ 0xa70cf4eb018a17bcL },
+ { 0x224f97ae8c4a6a65L,0x36e5cf270134378fL,0xbe3a609e4f7e0960L,
+ 0xaa4772abd1747b77L } },
+ /* 126 */
+ { { 0x676761317aa60cc0L,0xc79163610368115fL,0xded98bb4bbc1bb5aL,
+ 0x611a6ddc30faf974L },
+ { 0x30e78cbcc15ee47aL,0x2e8962824e0d96a5L,0x36f35adf3dd9ed88L,
+ 0x5cfffaf816429c88L } },
+ /* 127 */
+ { { 0xc0d54cff9b7a99cdL,0x7bf3b99d843c45a1L,0x038a908f62c739e1L,
+ 0x6e5a6b237dc1994cL },
+ { 0xef8b454e0ba5db77L,0xb7b8807facf60d63L,0xe591c0c676608378L,
+ 0x481a238d242dabccL } },
+ /* 128 */
+ { { 0xe3417bc035d0b34aL,0x440b386b8327c0a7L,0x8fb7262dac0362d1L,
+ 0x2c41114ce0cdf943L },
+ { 0x2ba5cef1ad95a0b1L,0xc09b37a867d54362L,0x26d6cdd201e486c9L,
+ 0x20477abf42ff9297L } },
+ /* 129 */
+ { { 0x2f75173c18d65dbfL,0x77bf940e339edad8L,0x7022d26bdcf1001cL,
+ 0xac66409ac77396b6L },
+ { 0x8b0bb36fc6261cc3L,0x213f7bc9190e7e90L,0x6541cebaa45e6c10L,
+ 0xce8e6975cc122f85L } },
+ /* 130 */
+ { { 0x0f121b41bc0a67d2L,0x62d4760a444d248aL,0x0e044f1d659b4737L,
+ 0x08fde365250bb4a8L },
+ { 0xaceec3da848bf287L,0xc2a62182d3369d6eL,0x3582dfdc92449482L,
+ 0x2f7e2fd2565d6cd7L } },
+ /* 131 */
+ { { 0xae4b92dbc3770fa7L,0x095e8d5c379043f9L,0x54f34e9d17761171L,
+ 0xc65be92e907702aeL },
+ { 0x2758a303f6fd0a40L,0xe7d822e3bcce784bL,0x7ae4f5854f9767bfL,
+ 0x4bff8e47d1193b3aL } },
+ /* 132 */
+ { { 0xcd41d21f00ff1480L,0x2ab8fb7d0754db16L,0xac81d2efbbe0f3eaL,
+ 0x3e4e4ae65772967dL },
+ { 0x7e18f36d3c5303e6L,0x3bd9994b92262397L,0x9ed70e261324c3c0L,
+ 0x5388aefd58ec6028L } },
+ /* 133 */
+ { { 0xad1317eb5e5d7713L,0x09b985ee75de49daL,0x32f5bc4fc74fb261L,
+ 0x5cf908d14f75be0eL },
+ { 0x760435108e657b12L,0xbfd421a5b96ed9e6L,0x0e29f51f8970ccc2L,
+ 0xa698ba4060f00ce2L } },
+ /* 134 */
+ { { 0x73db1686ef748fecL,0xe6e755a27e9d2cf9L,0x630b6544ce265effL,
+ 0xb142ef8a7aebad8dL },
+ { 0xad31af9f17d5770aL,0x66af3b672cb3412fL,0x6bd60d1bdf3359deL,
+ 0xd1896a9658515075L } },
+ /* 135 */
+ { { 0xec5957ab33c41c08L,0x87de94ac5468e2e1L,0x18816b73ac472f6cL,
+ 0x267b0e0b7981da39L },
+ { 0x6e554e5d8e62b988L,0xd8ddc755116d21e7L,0x4610faf03d2a6f99L,
+ 0xb54e287aa1119393L } },
+ /* 136 */
+ { { 0x0a0122b5178a876bL,0x51ff96ff085104b4L,0x050b31ab14f29f76L,
+ 0x84abb28b5f87d4e6L },
+ { 0xd5ed439f8270790aL,0x2d6cb59d85e3f46bL,0x75f55c1b6c1e2212L,
+ 0xe5436f6717655640L } },
+ /* 137 */
+ { { 0x53f9025e2286e8d5L,0x353c95b4864453beL,0xd832f5bde408e3a0L,
+ 0x0404f68b5b9ce99eL },
+ { 0xcad33bdea781e8e5L,0x3cdf5018163c2f5bL,0x575769600119caa3L,
+ 0x3a4263df0ac1c701L } },
+ /* 138 */
+ { { 0xc2965ecc9aeb596dL,0x01ea03e7023c92b4L,0x4704b4b62e013961L,
+ 0x0ca8fd3f905ea367L },
+ { 0x92523a42551b2b61L,0x1eb7a89c390fcd06L,0xe7f1d2be0392a63eL,
+ 0x96dca2644ddb0c33L } },
+ /* 139 */
+ { { 0x203bb43a387510afL,0x846feaa8a9a36a01L,0xd23a57702f950378L,
+ 0x4363e2123aad59dcL },
+ { 0xca43a1c740246a47L,0xb362b8d2e55dd24dL,0xf9b086045d8faf96L,
+ 0x840e115cd8bb98c4L } },
+ /* 140 */
+ { { 0xf12205e21023e8a7L,0xc808a8cdd8dc7a0bL,0xe292a272163a5ddfL,
+ 0x5e0d6abd30ded6d4L },
+ { 0x07a721c27cfc0f64L,0x42eec01d0e55ed88L,0x26a7bef91d1f9db2L,
+ 0x7dea48f42945a25aL } },
+ /* 141 */
+ { { 0xabdf6f1ce5060a81L,0xe79f9c72f8f95615L,0xcfd36c5406ac268bL,
+ 0xabc2a2beebfd16d1L },
+ { 0x8ac66f91d3e2eac7L,0x6f10ba63d2dd0466L,0x6790e3770282d31bL,
+ 0x4ea353946c7eefc1L } },
+ /* 142 */
+ { { 0xed8a2f8d5266309dL,0x0a51c6c081945a3eL,0xcecaf45a578c5dc1L,
+ 0x3a76e6891c94ffc3L },
+ { 0x9aace8a47d7b0d0fL,0x963ace968f584a5fL,0x51a30c724e697fbeL,
+ 0x8212a10a465e6464L } },
+ /* 143 */
+ { { 0xef7c61c3cfab8caaL,0x18eb8e840e142390L,0xcd1dff677e9733caL,
+ 0xaa7cab71599cb164L },
+ { 0x02fc9273bc837bd1L,0xc06407d0c36af5d7L,0x17621292f423da49L,
+ 0x40e38073fe0617c3L } },
+ /* 144 */
+ { { 0xf4f80824a7bf9b7cL,0x365d23203fbe30d0L,0xbfbe532097cf9ce3L,
+ 0xe3604700b3055526L },
+ { 0x4dcb99116cc6c2c7L,0x72683708ba4cbee6L,0xdcded434637ad9ecL,
+ 0x6542d677a3dee15fL } },
+ /* 145 */
+ { { 0x3f32b6d07b6c377aL,0x6cb03847903448beL,0xd6fdd3a820da8af7L,
+ 0xa6534aee09bb6f21L },
+ { 0x30a1780d1035facfL,0x35e55a339dcb47e6L,0x6ea50fe1c447f393L,
+ 0xf3cb672fdc9aef22L } },
+ /* 146 */
+ { { 0xeb3719fe3b55fd83L,0xe0d7a46c875ddd10L,0x33ac9fa905cea784L,
+ 0x7cafaa2eaae870e7L },
+ { 0x9b814d041d53b338L,0xe0acc0a0ef87e6c6L,0xfb93d10811672b0fL,
+ 0x0aab13c1b9bd522eL } },
+ /* 147 */
+ { { 0xddcce278d2681297L,0xcb350eb1b509546aL,0x2dc431737661aaf2L,
+ 0x4b91a602847012e9L },
+ { 0xdcff109572f8ddcfL,0x08ebf61e9a911af4L,0x48f4360ac372430eL,
+ 0x49534c5372321cabL } },
+ /* 148 */
+ { { 0x83df7d71f07b7e9dL,0xa478efa313cd516fL,0x78ef264b6c047ee3L,
+ 0xcaf46c4fd65ac5eeL },
+ { 0xa04d0c7792aa8266L,0xedf45466913684bbL,0x56e65168ae4b16b0L,
+ 0x14ce9e5704c6770fL } },
+ /* 149 */
+ { { 0x99445e3e965e8f91L,0xd3aca1bacb0f2492L,0xd31cc70f90c8a0a0L,
+ 0x1bb708a53e4c9a71L },
+ { 0xd5ca9e69558bdd7aL,0x734a0508018a26b1L,0xb093aa714c9cf1ecL,
+ 0xf9d126f2da300102L } },
+ /* 150 */
+ { { 0x749bca7aaff9563eL,0xdd077afeb49914a0L,0xe27a0311bf5f1671L,
+ 0x807afcb9729ecc69L },
+ { 0x7f8a9337c9b08b77L,0x86c3a785443c7e38L,0x85fafa59476fd8baL,
+ 0x751adcd16568cd8cL } },
+ /* 151 */
+ { { 0x8aea38b410715c0dL,0xd113ea718f7697f7L,0x665eab1493fbf06dL,
+ 0x29ec44682537743fL },
+ { 0x3d94719cb50bebbcL,0x399ee5bfe4505422L,0x90cd5b3a8d2dedb1L,
+ 0xff9370e392a4077dL } },
+ /* 152 */
+ { { 0x59a2d69bc6b75b65L,0x4188f8d5266651c5L,0x28a9f33e3de9d7d2L,
+ 0x9776478ba2a9d01aL },
+ { 0x8852622d929af2c7L,0x334f5d6d4e690923L,0xce6cc7e5a89a51e9L,
+ 0x74a6313fac2f82faL } },
+ /* 153 */
+ { { 0xb2f4dfddb75f079cL,0x85b07c9518e36fbbL,0x1b6cfcf0e7cd36ddL,
+ 0xab75be150ff4863dL },
+ { 0x81b367c0173fc9b7L,0xb90a7420d2594fd0L,0x15fdbf03c4091236L,
+ 0x4ebeac2e0b4459f6L } },
+ /* 154 */
+ { { 0xeb6c5fe75c9f2c53L,0xd25220118eae9411L,0xc8887633f95ac5d8L,
+ 0xdf99887b2c1baffcL },
+ { 0xbb78eed2850aaecbL,0x9d49181b01d6a272L,0x978dd511b1cdbcacL,
+ 0x27b040a7779f4058L } },
+ /* 155 */
+ { { 0x90405db7f73b2eb2L,0xe0df85088e1b2118L,0x501b71525962327eL,
+ 0xb393dd37e4cfa3f5L },
+ { 0xa1230e7b3fd75165L,0xd66344c2bcd33554L,0x6c36f1be0f7b5022L,
+ 0x09588c12d0463419L } },
+ /* 156 */
+ { { 0xe086093f02601c3bL,0xfb0252f8cf5c335fL,0x955cf280894aff28L,
+ 0x81c879a9db9f648bL },
+ { 0x040e687cc6f56c51L,0xfed471693f17618cL,0x44f88a419059353bL,
+ 0xfa0d48f55fc11bc4L } },
+ /* 157 */
+ { { 0xbc6e1c9de1608e4dL,0x010dda113582822cL,0xf6b7ddc1157ec2d7L,
+ 0x8ea0e156b6a367d6L },
+ { 0xa354e02f2383b3b4L,0x69966b943f01f53cL,0x4ff6632b2de03ca5L,
+ 0x3f5ab924fa00b5acL } },
+ /* 158 */
+ { { 0x337bb0d959739efbL,0xc751b0f4e7ebec0dL,0x2da52dd6411a67d1L,
+ 0x8bc768872b74256eL },
+ { 0xa5be3b7282d3d253L,0xa9f679a1f58d779fL,0xa1cac168e16767bbL,
+ 0xb386f19060fcf34fL } },
+ /* 159 */
+ { { 0x31f3c1352fedcfc2L,0x5396bf6262f8af0dL,0x9a02b4eae57288c2L,
+ 0x4cb460f71b069c4dL },
+ { 0xae67b4d35b8095eaL,0x92bbf8596fc07603L,0xe1475f66b614a165L,
+ 0x52c0d50895ef5223L } },
+ /* 160 */
+ { { 0x231c210e15339848L,0xe87a28e870778c8dL,0x9d1de6616956e170L,
+ 0x4ac3c9382bb09c0bL },
+ { 0x19be05516998987dL,0x8b2376c4ae09f4d6L,0x1de0b7651a3f933dL,
+ 0x380d94c7e39705f4L } },
+ /* 161 */
+ { { 0x01a355aa81542e75L,0x96c724a1ee01b9b7L,0x6b3a2977624d7087L,
+ 0x2ce3e171de2637afL },
+ { 0xcfefeb49f5d5bc1aL,0xa655607e2777e2b5L,0x4feaac2f9513756cL,
+ 0x2e6cd8520b624e4dL } },
+ /* 162 */
+ { { 0x3685954b8c31c31dL,0x68533d005bf21a0cL,0x0bd7626e75c79ec9L,
+ 0xca17754742c69d54L },
+ { 0xcc6edafff6d2dbb2L,0xfd0d8cbd174a9d18L,0x875e8793aa4578e8L,
+ 0xa976a7139cab2ce6L } },
+ /* 163 */
+ { { 0x0a651f1b93fb353dL,0xd75cab8b57fcfa72L,0xaa88cfa731b15281L,
+ 0x8720a7170a1f4999L },
+ { 0x8c3e8d37693e1b90L,0xd345dc0b16f6dfc3L,0x8ea8d00ab52a8742L,
+ 0x9719ef29c769893cL } },
+ /* 164 */
+ { { 0x820eed8d58e35909L,0x9366d8dc33ddc116L,0xd7f999d06e205026L,
+ 0xa5072976e15704c1L },
+ { 0x002a37eac4e70b2eL,0x84dcf6576890aa8aL,0xcd71bf18645b2a5cL,
+ 0x99389c9df7b77725L } },
+ /* 165 */
+ { { 0x238c08f27ada7a4bL,0x3abe9d03fd389366L,0x6b672e89766f512cL,
+ 0xa88806aa202c82e4L },
+ { 0x6602044ad380184eL,0xa8cb78c4126a8b85L,0x79d670c0ad844f17L,
+ 0x0043bffb4738dcfeL } },
+ /* 166 */
+ { { 0x8d59b5dc36d5192eL,0xacf885d34590b2afL,0x83566d0a11601781L,
+ 0x52f3ef01ba6c4866L },
+ { 0x3986732a0edcb64dL,0x0a482c238068379fL,0x16cbe5fa7040f309L,
+ 0x3296bd899ef27e75L } },
+ /* 167 */
+ { { 0x476aba89454d81d7L,0x9eade7ef51eb9b3cL,0x619a21cd81c57986L,
+ 0x3b90febfaee571e9L },
+ { 0x9393023e5496f7cbL,0x55be41d87fb51bc4L,0x03f1dd4899beb5ceL,
+ 0x6e88069d9f810b18L } },
+ /* 168 */
+ { { 0xce37ab11b43ea1dbL,0x0a7ff1a95259d292L,0x851b02218f84f186L,
+ 0xa7222beadefaad13L },
+ { 0xa2ac78ec2b0a9144L,0x5a024051f2fa59c5L,0x91d1eca56147ce38L,
+ 0xbe94d523bc2ac690L } },
+ /* 169 */
+ { { 0x72f4945e0b226ce7L,0xb8afd747967e8b70L,0xedea46f185a6c63eL,
+ 0x7782defe9be8c766L },
+ { 0x760d2aa43db38626L,0x460ae78776f67ad1L,0x341b86fc54499cdbL,
+ 0x03838567a2892e4bL } },
+ /* 170 */
+ { { 0x2d8daefd79ec1a0fL,0x3bbcd6fdceb39c97L,0xf5575ffc58f61a95L,
+ 0xdbd986c4adf7b420L },
+ { 0x81aa881415f39eb7L,0x6ee2fcf5b98d976cL,0x5465475dcf2f717dL,
+ 0x8e24d3c46860bbd0L } },
+ /* 171 */
+ { { 0x749d8e549a587390L,0x12bb194f0cbec588L,0x46e07da4b25983c6L,
+ 0x541a99c4407bafc8L },
+ { 0xdb241692624c8842L,0x6044c12ad86c05ffL,0xc59d14b44f7fcf62L,
+ 0xc0092c49f57d35d1L } },
+ /* 172 */
+ { { 0xd3cc75c3df2e61efL,0x7e8841c82e1b35caL,0xc62d30d1909f29f4L,
+ 0x75e406347286944dL },
+ { 0xe7d41fc5bbc237d0L,0xc9537bf0ec4f01c9L,0x91c51a16282bd534L,
+ 0x5b7cb658c7848586L } },
+ /* 173 */
+ { { 0x964a70848a28ead1L,0x802dc508fd3b47f6L,0x9ae4bfd1767e5b39L,
+ 0x7ae13eba8df097a1L },
+ { 0xfd216ef8eadd384eL,0x0361a2d9b6b2ff06L,0x204b98784bcdb5f3L,
+ 0x787d8074e2a8e3fdL } },
+ /* 174 */
+ { { 0xc5e25d6b757fbb1cL,0xe47bddb2ca201debL,0x4a55e9a36d2233ffL,
+ 0x5c2228199ef28484L },
+ { 0x773d4a8588315250L,0x21b21a2b827097c1L,0xab7c4ea1def5d33fL,
+ 0xe45d37abbaf0f2b0L } },
+ /* 175 */
+ { { 0xd2df1e3428511c8aL,0xebb229c8bdca6cd3L,0x578a71a7627c39a7L,
+ 0xed7bc12284dfb9d3L },
+ { 0xcf22a6df93dea561L,0x5443f18dd48f0ed1L,0xd8b861405bad23e8L,
+ 0xaac97cc945ca6d27L } },
+ /* 176 */
+ { { 0xeb54ea74a16bd00aL,0xd839e9adf5c0bcc1L,0x092bb7f11f9bfc06L,
+ 0x318f97b31163dc4eL },
+ { 0xecc0c5bec30d7138L,0x44e8df23abc30220L,0x2bb7972fb0223606L,
+ 0xfa41faa19a84ff4dL } },
+ /* 177 */
+ { { 0x4402d974a6642269L,0xc81814ce9bb783bdL,0x398d38e47941e60bL,
+ 0x38bb6b2c1d26e9e2L },
+ { 0xc64e4a256a577f87L,0x8b52d253dc11fe1cL,0xff336abf62280728L,
+ 0x94dd0905ce7601a5L } },
+ /* 178 */
+ { { 0x156cf7dcde93f92aL,0xa01333cb89b5f315L,0x02404df9c995e750L,
+ 0x92077867d25c2ae9L },
+ { 0xe2471e010bf39d44L,0x5f2c902096bb53d7L,0x4c44b7b35c9c3d8fL,
+ 0x81e8428bd29beb51L } },
+ /* 179 */
+ { { 0x6dd9c2bac477199fL,0x8cb8eeee6b5ecdd9L,0x8af7db3fee40fd0eL,
+ 0x1b94ab62dbbfa4b1L },
+ { 0x44f0d8b3ce47f143L,0x51e623fc63f46163L,0xf18f270fcc599383L,
+ 0x06a38e28055590eeL } },
+ /* 180 */
+ { { 0x2e5b0139b3355b49L,0x20e26560b4ebf99bL,0xc08ffa6bd269f3dcL,
+ 0xa7b36c2083d9d4f8L },
+ { 0x64d15c3a1b3e8830L,0xd5fceae1a89f9c0bL,0xcfeee4a2e2d16930L,
+ 0xbe54c6b4a2822a20L } },
+ /* 181 */
+ { { 0xd6cdb3df8d91167cL,0x517c3f79e7a6625eL,0x7105648f346ac7f4L,
+ 0xbf30a5abeae022bbL },
+ { 0x8e7785be93828a68L,0x5161c3327f3ef036L,0xe11b5feb592146b2L,
+ 0xd1c820de2732d13aL } },
+ /* 182 */
+ { { 0x043e13479038b363L,0x58c11f546b05e519L,0x4fe57abe6026cad1L,
+ 0xb7d17bed68a18da3L },
+ { 0x44ca5891e29c2559L,0x4f7a03765bfffd84L,0x498de4af74e46948L,
+ 0x3997fd5e6412cc64L } },
+ /* 183 */
+ { { 0xf20746828bd61507L,0x29e132d534a64d2aL,0xffeddfb08a8a15e3L,
+ 0x0eeb89293c6c13e8L },
+ { 0xe9b69a3ea7e259f8L,0xce1db7e6d13e7e67L,0x277318f6ad1fa685L,
+ 0x228916f8c922b6efL } },
+ /* 184 */
+ { { 0x959ae25b0a12ab5bL,0xcc11171f957bc136L,0x8058429ed16e2b0cL,
+ 0xec05ad1d6e93097eL },
+ { 0x157ba5beac3f3708L,0x31baf93530b59d77L,0x47b55237118234e5L,
+ 0x7d3141567ff11b37L } },
+ /* 185 */
+ { { 0x7bd9c05cf6dfefabL,0xbe2f2268dcb37707L,0xe53ead973a38bb95L,
+ 0xe9ce66fc9bc1d7a3L },
+ { 0x75aa15766f6a02a1L,0x38c087df60e600edL,0xf8947f3468cdc1b9L,
+ 0xd9650b0172280651L } },
+ /* 186 */
+ { { 0x504b4c4a5a057e60L,0xcbccc3be8def25e4L,0xa635320817c1ccbdL,
+ 0x14d6699a804eb7a2L },
+ { 0x2c8a8415db1f411aL,0x09fbaf0bf80d769cL,0xb4deef901c2f77adL,
+ 0x6f4c68410d43598aL } },
+ /* 187 */
+ { { 0x8726df4e96c24a96L,0x534dbc85fcbd99a3L,0x3c466ef28b2ae30aL,
+ 0x4c4350fd61189abbL },
+ { 0x2967f716f855b8daL,0x41a42394463c38a1L,0xc37e1413eae93343L,
+ 0xa726d2425a3118b5L } },
+ /* 188 */
+ { { 0xdae6b3ee948c1086L,0xf1de503dcbd3a2e1L,0x3f35ed3f03d022f3L,
+ 0x13639e82cc6cf392L },
+ { 0x9ac938fbcdafaa86L,0xf45bc5fb2654a258L,0x1963b26e45051329L,
+ 0xca9365e1c1a335a3L } },
+ /* 189 */
+ { { 0x3615ac754c3b2d20L,0x742a5417904e241bL,0xb08521c4cc9d071dL,
+ 0x9ce29c34970b72a5L },
+ { 0x8cc81f736d3e0ad6L,0x8060da9ef2f8434cL,0x35ed1d1a6ce862d9L,
+ 0x48c4abd7ab42af98L } },
+ /* 190 */
+ { { 0xd221b0cc40c7485aL,0xead455bbe5274dbfL,0x493c76989263d2e8L,
+ 0x78017c32f67b33cbL },
+ { 0xb9d35769930cb5eeL,0xc0d14e940c408ed2L,0xf8b7bf55272f1a4dL,
+ 0x53cd0454de5c1c04L } },
+ /* 191 */
+ { { 0xbcd585fa5d28ccacL,0x5f823e56005b746eL,0x7c79f0a1cd0123aaL,
+ 0xeea465c1d3d7fa8fL },
+ { 0x7810659f0551803bL,0x6c0b599f7ce6af70L,0x4195a77029288e70L,
+ 0x1b6e42a47ae69193L } },
+ /* 192 */
+ { { 0x2e80937cf67d04c3L,0x1e312be289eeb811L,0x56b5d88792594d60L,
+ 0x0224da14187fbd3dL },
+ { 0x87abb8630c5fe36fL,0x580f3c604ef51f5fL,0x964fb1bfb3b429ecL,
+ 0x60838ef042bfff33L } },
+ /* 193 */
+ { { 0x432cb2f27e0bbe99L,0x7bda44f304aa39eeL,0x5f497c7a9fa93903L,
+ 0x636eb2022d331643L },
+ { 0xfcfd0e6193ae00aaL,0x875a00fe31ae6d2fL,0xf43658a29f93901cL,
+ 0x8844eeb639218bacL } },
+ /* 194 */
+ { { 0x114171d26b3bae58L,0x7db3df7117e39f3eL,0xcd37bc7f81a8eadaL,
+ 0x27ba83dc51fb789eL },
+ { 0xa7df439ffbf54de5L,0x7277030bb5fe1a71L,0x42ee8e35db297a48L,
+ 0xadb62d3487f3a4abL } },
+ /* 195 */
+ { { 0x9b1168a2a175df2aL,0x082aa04f618c32e9L,0xc9e4f2e7146b0916L,
+ 0xb990fd7675e7c8b2L },
+ { 0x0829d96b4df37313L,0x1c205579d0b40789L,0x66c9ae4a78087711L,
+ 0x81707ef94d10d18dL } },
+ /* 196 */
+ { { 0x97d7cab203d6ff96L,0x5b851bfc0d843360L,0x268823c4d042db4bL,
+ 0x3792daead5a8aa5cL },
+ { 0x52818865941afa0bL,0xf3e9e74142d83671L,0x17c825275be4e0a7L,
+ 0x5abd635e94b001baL } },
+ /* 197 */
+ { { 0x727fa84e0ac4927cL,0xe3886035a7c8cf23L,0xa4bcd5ea4adca0dfL,
+ 0x5995bf21846ab610L },
+ { 0xe90f860b829dfa33L,0xcaafe2ae958fc18bL,0x9b3baf4478630366L,
+ 0x44c32ca2d483411eL } },
+ /* 198 */
+ { { 0xa74a97f1e40ed80cL,0x5f938cb131d2ca82L,0x53f2124b7c2d6ad9L,
+ 0x1f2162fb8082a54cL },
+ { 0x7e467cc5720b173eL,0x40e8a666085f12f9L,0x8cebc20e4c9d65dcL,
+ 0x8f1d402bc3e907c9L } },
+ /* 199 */
+ { { 0x4f592f9cfbc4058aL,0xb15e14b6292f5670L,0xc55cfe37bc1d8c57L,
+ 0xb1980f43926edbf9L },
+ { 0x98c33e0932c76b09L,0x1df5279d33b07f78L,0x6f08ead4863bb461L,
+ 0x2828ad9b37448e45L } },
+ /* 200 */
+ { { 0x696722c4c4cf4ac5L,0xf5ac1a3fdde64afbL,0x0551baa2e0890832L,
+ 0x4973f1275a14b390L },
+ { 0xe59d8335322eac5dL,0x5e07eef50bd9b568L,0xab36720fa2588393L,
+ 0x6dac8ed0db168ac7L } },
+ /* 201 */
+ { { 0xf7b545aeeda835efL,0x4aa113d21d10ed51L,0x035a65e013741b09L,
+ 0x4b23ef5920b9de4cL },
+ { 0xe82bb6803c4c7341L,0xd457706d3f58bc37L,0x73527863a51e3ee8L,
+ 0x4dd71534ddf49a4eL } },
+ /* 202 */
+ { { 0xbf94467295476cd9L,0x648d072fe31a725bL,0x1441c8b8fc4b67e0L,
+ 0xfd3170002f4a4dbbL },
+ { 0x1cb43ff48995d0e1L,0x76e695d10ef729aaL,0xe0d5f97641798982L,
+ 0x14fac58c9569f365L } },
+ /* 203 */
+ { { 0xad9a0065f312ae18L,0x51958dc0fcc93fc9L,0xd9a142408a7d2846L,
+ 0xed7c765136abda50L },
+ { 0x46270f1a25d4abbcL,0x9b5dd8f3f1a113eaL,0xc609b0755b51952fL,
+ 0xfefcb7f74d2e9f53L } },
+ /* 204 */
+ { { 0xbd09497aba119185L,0xd54e8c30aac45ba4L,0x492479deaa521179L,
+ 0x1801a57e87e0d80bL },
+ { 0x073d3f8dfcafffb0L,0x6cf33c0bae255240L,0x781d763b5b5fdfbcL,
+ 0x9f8fc11e1ead1064L } },
+ /* 205 */
+ { { 0x1583a1715e69544cL,0x0eaf8567f04b7813L,0x1e22a8fd278a4c32L,
+ 0xa9d3809d3d3a69a9L },
+ { 0x936c2c2c59a2da3bL,0x38ccbcf61895c847L,0x5e65244e63d50869L,
+ 0x3006b9aee1178ef7L } },
+ /* 206 */
+ { { 0x0bb1f2b0c9eead28L,0x7eef635d89f4dfbcL,0x074757fdb2ce8939L,
+ 0x0ab85fd745f8f761L },
+ { 0xecda7c933e5b4549L,0x4be2bb5c97922f21L,0x261a1274b43b8040L,
+ 0xb122d67511e942c2L } },
+ /* 207 */
+ { { 0x3be607be66a5ae7aL,0x01e703fa76adcbe3L,0xaf9043014eb6e5c5L,
+ 0x9f599dc1097dbaecL },
+ { 0x6d75b7180ff250edL,0x8eb91574349a20dcL,0x425605a410b227a3L,
+ 0x7d5528e08a294b78L } },
+ /* 208 */
+ { { 0xf0f58f6620c26defL,0x025585ea582b2d1eL,0xfbe7d79b01ce3881L,
+ 0x28ccea01303f1730L },
+ { 0xd1dabcd179644ba5L,0x1fc643e806fff0b8L,0xa60a76fc66b3e17bL,
+ 0xc18baf48a1d013bfL } },
+ /* 209 */
+ { { 0x34e638c85dc4216dL,0x00c01067206142acL,0xd453a17195f5064aL,
+ 0x9def809db7a9596bL },
+ { 0x41e8642e67ab8d2cL,0xb42404336237a2b6L,0x7d506a6d64c4218bL,
+ 0x0357f8b068808ce5L } },
+ /* 210 */
+ { { 0x8e9dbe644cd2cc88L,0xcc61c28df0b8f39dL,0x4a309874cd30a0c8L,
+ 0xe4a01add1b489887L },
+ { 0x2ed1eeacf57cd8f9L,0x1b767d3ebd594c48L,0xa7295c717bd2f787L,
+ 0x466d7d79ce10cc30L } },
+ /* 211 */
+ { { 0x47d318929dada2c7L,0x4fa0a6c38f9aa27dL,0x90e4fd28820a59e1L,
+ 0xc672a522451ead1aL },
+ { 0x30607cc85d86b655L,0xf0235d3bf9ad4af1L,0x99a08680571172a6L,
+ 0x5e3d64faf2a67513L } },
+ /* 212 */
+ { { 0xaa6410c79b3b4416L,0xcd8fcf85eab26d99L,0x5ebff74adb656a74L,
+ 0x6c8a7a95eb8e42fcL },
+ { 0x10c60ba7b02a63bdL,0x6b2f23038b8f0047L,0x8c6c3738312d90b0L,
+ 0x348ae422ad82ca91L } },
+ /* 213 */
+ { { 0x7f4746635ccda2fbL,0x22accaa18e0726d2L,0x85adf782492b1f20L,
+ 0xc1074de0d9ef2d2eL },
+ { 0xfcf3ce44ae9a65b3L,0xfd71e4ac05d7151bL,0xd4711f50ce6a9788L,
+ 0xfbadfbdbc9e54ffcL } },
+ /* 214 */
+ { { 0x1713f1cd20a99363L,0xb915658f6cf22775L,0x968175cd24d359b2L,
+ 0xb7f976b483716fcdL },
+ { 0x5758e24d5d6dbf74L,0x8d23bafd71c3af36L,0x48f477600243dfe3L,
+ 0xf4d41b2ecafcc805L } },
+ /* 215 */
+ { { 0x51f1cf28fdabd48dL,0xce81be3632c078a4L,0x6ace2974117146e9L,
+ 0x180824eae0160f10L },
+ { 0x0387698b66e58358L,0x63568752ce6ca358L,0x82380e345e41e6c5L,
+ 0x67e5f63983cf6d25L } },
+ /* 216 */
+ { { 0xf89ccb8dcf4899efL,0x949015f09ebb44c0L,0x546f9276b2598ec9L,
+ 0x9fef789a04c11fc6L },
+ { 0x6d367ecf53d2a071L,0xb10e1a7fa4519b09L,0xca6b3fb0611e2eefL,
+ 0xbc80c181a99c4e20L } },
+ /* 217 */
+ { { 0x972536f8e5eb82e6L,0x1a484fc7f56cb920L,0xc78e217150b5da5eL,
+ 0x49270e629f8cdf10L },
+ { 0x1a39b7bbea6b50adL,0x9a0284c1a2388ffcL,0x5403eb178107197bL,
+ 0xd2ee52f961372f7fL } },
+ /* 218 */
+ { { 0xd37cd28588e0362aL,0x442fa8a78fa5d94dL,0xaff836e5a434a526L,
+ 0xdfb478bee5abb733L },
+ { 0xa91f1ce7673eede6L,0xa5390ad42b5b2f04L,0x5e66f7bf5530da2fL,
+ 0xd9a140b408df473aL } },
+ /* 219 */
+ { { 0x0e0221b56e8ea498L,0x623478293563ee09L,0xe06b8391335d2adeL,
+ 0x760c058d623f4b1aL },
+ { 0x0b89b58cc198aa79L,0xf74890d2f07aba7fL,0x4e204110fde2556aL,
+ 0x7141982d8f190409L } },
+ /* 220 */
+ { { 0x6f0a0e334d4b0f45L,0xd9280b38392a94e1L,0x3af324c6b3c61d5eL,
+ 0x3af9d1ce89d54e47L },
+ { 0xfd8f798120930371L,0xeda2664c21c17097L,0x0e9545dcdc42309bL,
+ 0xb1f815c373957dd6L } },
+ /* 221 */
+ { { 0x84faa78e89fec44aL,0xc8c2ae473caa4cafL,0x691c807dc1b6a624L,
+ 0xa41aed141543f052L },
+ { 0x424353997d5ffe04L,0x8bacb2df625b6e20L,0x85d660be87817775L,
+ 0xd6e9c1dd86fb60efL } },
+ /* 222 */
+ { { 0x3aa2e97ec6853264L,0x771533b7e2304a0bL,0x1b912bb7b8eae9beL,
+ 0x9c9c6e10ae9bf8c2L },
+ { 0xa2309a59e030b74cL,0x4ed7494d6a631e90L,0x89f44b23a49b79f2L,
+ 0x566bd59640fa61b6L } },
+ /* 223 */
+ { { 0x066c0118c18061f3L,0x190b25d37c83fc70L,0xf05fc8e027273245L,
+ 0xcf2c7390f525345eL },
+ { 0xa09bceb410eb30cfL,0xcfd2ebba0d77703aL,0xe842c43a150ff255L,
+ 0x02f517558aa20979L } },
+ /* 224 */
+ { { 0x396ef794addb7d07L,0x0b4fc74224455500L,0xfaff8eacc78aa3ceL,
+ 0x14e9ada5e8d4d97dL },
+ { 0xdaa480a12f7079e2L,0x45baa3cde4b0800eL,0x01765e2d7838157dL,
+ 0xa0ad4fab8e9d9ae8L } },
+ /* 225 */
+ { { 0x0bfb76214a653618L,0x1872813c31eaaa5fL,0x1553e73744949d5eL,
+ 0xbcd530b86e56ed1eL },
+ { 0x169be85332e9c47bL,0xdc2776feb50059abL,0xcdba9761192bfbb4L,
+ 0x909283cf6979341dL } },
+ /* 226 */
+ { { 0x67b0032476e81a13L,0x9bee1a9962171239L,0x08ed361bd32e19d6L,
+ 0x35eeb7c9ace1549aL },
+ { 0x1280ae5a7e4e5bdcL,0x2dcd2cd3b6ceec6eL,0x52e4224c6e266bc1L,
+ 0x9a8b2cf4448ae864L } },
+ /* 227 */
+ { { 0xf6471bf209d03b59L,0xc90e62a3b65af2abL,0xff7ff168ebd5eec9L,
+ 0x6bdb60f4d4491379L },
+ { 0xdadafebc8a55bc30L,0xc79ead1610097fe0L,0x42e197414c1e3bddL,
+ 0x01ec3cfd94ba08a9L } },
+ /* 228 */
+ { { 0xba6277ebdc9485c2L,0x48cc9a7922fb10c7L,0x4f61d60f70a28d8aL,
+ 0xd1acb1c0475464f6L },
+ { 0xd26902b126f36612L,0x59c3a44ee0618d8bL,0x4df8a813308357eeL,
+ 0x7dcd079d405626c2L } },
+ /* 229 */
+ { { 0x5ce7d4d3f05a4b48L,0xadcd295237230772L,0xd18f7971812a915aL,
+ 0x0bf53589377d19b8L },
+ { 0x35ecd95a6c68ea73L,0xc7f3bbca823a584dL,0x9fb674c6f473a723L,
+ 0xd28be4d9e16686fcL } },
+ /* 230 */
+ { { 0x5d2b990638fa8e4bL,0x559f186e893fd8fcL,0x3a6de2aa436fb6fcL,
+ 0xd76007aa510f88ceL },
+ { 0x2d10aab6523a4988L,0xb455cf4474dd0273L,0x7f467082a3407278L,
+ 0xf2b52f68b303bb01L } },
+ /* 231 */
+ { { 0x0d57eafa9835b4caL,0x2d2232fcbb669cbcL,0x8eeeb680c6643198L,
+ 0xd8dbe98ecc5aed3aL },
+ { 0xcba9be3fc5a02709L,0x30be68e5f5ba1fa8L,0xfebd43cdf10ea852L,
+ 0xe01593a3ee559705L } },
+ /* 232 */
+ { { 0xd3e5af50ea75a0a6L,0x512226ac57858033L,0x6fe6d50fd0176406L,
+ 0xafec07b1aeb8ef06L },
+ { 0x7fb9956780bb0a31L,0x6f1af3cc37309aaeL,0x9153a15a01abf389L,
+ 0xa71b93546e2dbfddL } },
+ /* 233 */
+ { { 0xbf8e12e018f593d2L,0xd1a90428a078122bL,0x150505db0ba4f2adL,
+ 0x53a2005c628523d9L },
+ { 0x07c8b639e7f2b935L,0x2bff975ac182961aL,0x86bceea77518ca2cL,
+ 0xbf47d19b3d588e3dL } },
+ /* 234 */
+ { { 0x672967a7dd7665d5L,0x4e3030572f2f4de5L,0x144005ae80d4903fL,
+ 0x001c2c7f39c9a1b6L },
+ { 0x143a801469efc6d6L,0xc810bdaa7bc7a724L,0x5f65670ba78150a4L,
+ 0xfdadf8e786ffb99bL } },
+ /* 235 */
+ { { 0xfd38cb88ffc00785L,0x77fa75913b48eb67L,0x0454d055bf368fbcL,
+ 0x3a838e4d5aa43c94L },
+ { 0x561663293e97bb9aL,0x9eb93363441d94d9L,0x515591a60adb2a83L,
+ 0x3cdb8257873e1da3L } },
+ /* 236 */
+ { { 0x137140a97de77eabL,0xf7e1c50d41648109L,0x762dcad2ceb1d0dfL,
+ 0x5a60cc89f1f57fbaL },
+ { 0x80b3638240d45673L,0x1b82be195913c655L,0x057284b8dd64b741L,
+ 0x922ff56fdbfd8fc0L } },
+ /* 237 */
+ { { 0x1b265deec9a129a1L,0xa5b1ce57cc284e04L,0x04380c46cebfbe3cL,
+ 0x72919a7df6c5cd62L },
+ { 0x298f453a8fb90f9aL,0xd719c00b88e4031bL,0xe32c0e77796f1856L,
+ 0x5e7917803624089aL } },
+ /* 238 */
+ { { 0x5c16ec557f63cdfbL,0x8e6a3571f1cae4fdL,0xfce26bea560597caL,
+ 0x4e0a5371e24c2fabL },
+ { 0x276a40d3a5765357L,0x3c89af440d73a2b4L,0xb8f370ae41d11a32L,
+ 0xf5ff7818d56604eeL } },
+ /* 239 */
+ { { 0xfbf3e3fe1a09df21L,0x26d5d28ee66e8e47L,0x2096bd0a29c89015L,
+ 0xe41df0e9533f5e64L },
+ { 0x305fda40b3ba9e3fL,0xf2340ceb2604d895L,0x0866e1927f0367c7L,
+ 0x8edd7d6eac4f155fL } },
+ /* 240 */
+ { { 0xc9a1dc0e0bfc8ff3L,0x14efd82be936f42fL,0x67016f7ccca381efL,
+ 0x1432c1caed8aee96L },
+ { 0xec68482970b23c26L,0xa64fe8730735b273L,0xe389f6e5eaef0f5aL,
+ 0xcaef480b5ac8d2c6L } },
+ /* 241 */
+ { { 0x5245c97875315922L,0xd82951713063cca5L,0xf3ce60d0b64ef2cbL,
+ 0xd0ba177e8efae236L },
+ { 0x53a9ae8fb1b3af60L,0x1a796ae53d2da20eL,0x01d63605df9eef28L,
+ 0xf31c957c1c54ae16L } },
+ /* 242 */
+ { { 0xc0f58d5249cc4597L,0xdc5015b0bae0a028L,0xefc5fc55734a814aL,
+ 0x013404cb96e17c3aL },
+ { 0xb29e2585c9a824bfL,0xd593185e001eaed7L,0x8d6ee68261ef68acL,
+ 0x6f377c4b91933e6cL } },
+ /* 243 */
+ { { 0x9f93bad1a8333fd2L,0xa89302025a2a95b8L,0x211e5037eaf75aceL,
+ 0x6dba3e4ed2d09506L },
+ { 0xa48ef98cd04399cdL,0x1811c66ee6b73adeL,0x72f60752c17ecaf3L,
+ 0xf13cf3423becf4a7L } },
+ /* 244 */
+ { { 0xceeb9ec0a919e2ebL,0x83a9a195f62c0f68L,0xcfba3bb67aba2299L,
+ 0xc83fa9a9274bbad3L },
+ { 0x0d7d1b0b62fa1ce0L,0xe58b60f53418efbfL,0xbfa8ef9e52706f04L,
+ 0xb49d70f45d702683L } },
+ /* 245 */
+ { { 0x914c7510fad5513bL,0x05f32eecb1751e2dL,0x6d850418d9fb9d59L,
+ 0x59cfadbb0c30f1cfL },
+ { 0xe167ac2355cb7fd6L,0x249367b8820426a3L,0xeaeec58c90a78864L,
+ 0x5babf362354a4b67L } },
+ /* 246 */
+ { { 0x37c981d1ee424865L,0x8b002878f2e5577fL,0x702970f1b9e0c058L,
+ 0x6188c6a79026c8f0L },
+ { 0x06f9a19bd0f244daL,0x1ecced5cfb080873L,0x35470f9b9f213637L,
+ 0x993fe475df50b9d9L } },
+ /* 247 */
+ { { 0x68e31cdf9b2c3609L,0x84eb19c02c46d4eaL,0x7ac9ec1a9a775101L,
+ 0x81f764664c80616bL },
+ { 0x1d7c2a5a75fbe978L,0x6743fed3f183b356L,0x838d1f04501dd2bfL,
+ 0x564a812a5fe9060dL } },
+ /* 248 */
+ { { 0x7a5a64f4fa817d1dL,0x55f96844bea82e0fL,0xb5ff5a0fcd57f9aaL,
+ 0x226bf3cf00e51d6cL },
+ { 0xd6d1a9f92f2833cfL,0x20a0a35a4f4f89a8L,0x11536c498f3f7f77L,
+ 0x68779f47ff257836L } },
+ /* 249 */
+ { { 0x79b0c1c173043d08L,0xa54467741fc020faL,0xd3767e289a6d26d0L,
+ 0x97bcb0d1eb092e0bL },
+ { 0x2ab6eaa8f32ed3c3L,0xc8a4f151b281bc48L,0x4d1bf4f3bfa178f3L,
+ 0xa872ffe80a784655L } },
+ /* 250 */
+ { { 0xb1ab7935a32b2086L,0xe1eb710e8160f486L,0x9bd0cd913b6ae6beL,
+ 0x02812bfcb732a36aL },
+ { 0xa63fd7cacf605318L,0x646e5d50fdfd6d1dL,0xa1d683982102d619L,
+ 0x07391cc9fe5396afL } },
+ /* 251 */
+ { { 0xc50157f08b80d02bL,0x6b8333d162877f7fL,0x7aca1af878d542aeL,
+ 0x355d2adc7e6d2a08L },
+ { 0xb41f335a287386e1L,0xfd272a94f8e43275L,0x286ca2cde79989eaL,
+ 0x3dc2b1e37c2a3a79L } },
+ /* 252 */
+ { { 0xd689d21c04581352L,0x0a00c825376782beL,0x203bd5909fed701fL,
+ 0xc47869103ccd846bL },
+ { 0x5dba770824c768edL,0x72feea026841f657L,0x73313ed56accce0eL,
+ 0xccc42968d5bb4d32L } },
+ /* 253 */
+ { { 0x94e50de13d7620b9L,0xd89a5c8a5992a56aL,0xdc007640675487c9L,
+ 0xe147eb42aa4871cfL },
+ { 0x274ab4eeacf3ae46L,0xfd4936fb50350fbeL,0xdf2afe4748c840eaL,
+ 0x239ac047080e96e3L } },
+ /* 254 */
+ { { 0x481d1f352bfee8d4L,0xce80b5cffa7b0fecL,0x105c4c9e2ce9af3cL,
+ 0xc55fa1a3f5f7e59dL },
+ { 0x3186f14e8257c227L,0xc5b1653f342be00bL,0x09afc998aa904fb2L,
+ 0x094cd99cd4f4b699L } },
+ /* 255 */
+ { { 0x8a981c84d703bebaL,0x8631d15032ceb291L,0xa445f2c9e3bd49ecL,
+ 0xb90a30b642abad33L },
+ { 0xb465404fb4a5abf9L,0x004750c375db7603L,0x6f9a42ccca35d89fL,
+ 0x019f8b9a1b7924f7L } },
+};
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_4(sp_point_256* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_256_ecc_mulmod_stripe_4(r, &p256_base, p256_table,
+ k, map, heap);
+}
+
+#ifdef HAVE_INTEL_AVX2
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_avx2_4(sp_point_256* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_256_ecc_mulmod_stripe_avx2_4(r, &p256_base, p256_table,
+ k, map, heap);
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+#else /* WOLFSSL_SP_SMALL */
+/* The index into pre-computation table to use. */
+static const uint8_t recode_index_4_7[130] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49,
+ 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33,
+ 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
+ 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
+ 0, 1,
+};
+
+/* Whether to negate y-ordinate. */
+static const uint8_t recode_neg_4_7[130] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0,
+};
+
+/* Recode the scalar for multiplication using pre-computed values and
+ * subtraction.
+ *
+ * k Scalar to multiply by.
+ * v Vector of operations to perform.
+ */
+static void sp_256_ecc_recode_7_4(const sp_digit* k, ecc_recode_256* v)
+{
+ int i, j;
+ uint8_t y;
+ int carry = 0;
+ int o;
+ sp_digit n;
+
+ j = 0;
+ n = k[j];
+ o = 0;
+ for (i=0; i<37; i++) {
+ y = n;
+ if (o + 7 < 64) {
+ y &= 0x7f;
+ n >>= 7;
+ o += 7;
+ }
+ else if (o + 7 == 64) {
+ n >>= 7;
+ if (++j < 4)
+ n = k[j];
+ o = 0;
+ }
+ else if (++j < 4) {
+ n = k[j];
+ y |= (n << (64 - o)) & 0x7f;
+ o -= 57;
+ n >>= o;
+ }
+
+ y += carry;
+ v[i].i = recode_index_4_7[y];
+ v[i].neg = recode_neg_4_7[y];
+ carry = (y >> 7) + v[i].neg;
+ }
+}
+
+static const sp_table_entry_256 p256_table[2405] = {
+ /* 0 << 0 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 0 */
+ { { 0x79e730d418a9143cL,0x75ba95fc5fedb601L,0x79fb732b77622510L,
+ 0x18905f76a53755c6L },
+ { 0xddf25357ce95560aL,0x8b4ab8e4ba19e45cL,0xd2e88688dd21f325L,
+ 0x8571ff1825885d85L } },
+ /* 2 << 0 */
+ { { 0x850046d410ddd64dL,0xaa6ae3c1a433827dL,0x732205038d1490d9L,
+ 0xf6bb32e43dcf3a3bL },
+ { 0x2f3648d361bee1a5L,0x152cd7cbeb236ff8L,0x19a8fb0e92042dbeL,
+ 0x78c577510a5b8a3bL } },
+ /* 3 << 0 */
+ { { 0xffac3f904eebc127L,0xb027f84a087d81fbL,0x66ad77dd87cbbc98L,
+ 0x26936a3fb6ff747eL },
+ { 0xb04c5c1fc983a7ebL,0x583e47ad0861fe1aL,0x788208311a2ee98eL,
+ 0xd5f06a29e587cc07L } },
+ /* 4 << 0 */
+ { { 0x74b0b50d46918dccL,0x4650a6edc623c173L,0x0cdaacace8100af2L,
+ 0x577362f541b0176bL },
+ { 0x2d96f24ce4cbaba6L,0x17628471fad6f447L,0x6b6c36dee5ddd22eL,
+ 0x84b14c394c5ab863L } },
+ /* 5 << 0 */
+ { { 0xbe1b8aaec45c61f5L,0x90ec649a94b9537dL,0x941cb5aad076c20cL,
+ 0xc9079605890523c8L },
+ { 0xeb309b4ae7ba4f10L,0x73c568efe5eb882bL,0x3540a9877e7a1f68L,
+ 0x73a076bb2dd1e916L } },
+ /* 6 << 0 */
+ { { 0x403947373e77664aL,0x55ae744f346cee3eL,0xd50a961a5b17a3adL,
+ 0x13074b5954213673L },
+ { 0x93d36220d377e44bL,0x299c2b53adff14b5L,0xf424d44cef639f11L,
+ 0xa4c9916d4a07f75fL } },
+ /* 7 << 0 */
+ { { 0x0746354ea0173b4fL,0x2bd20213d23c00f7L,0xf43eaab50c23bb08L,
+ 0x13ba5119c3123e03L },
+ { 0x2847d0303f5b9d4dL,0x6742f2f25da67bddL,0xef933bdc77c94195L,
+ 0xeaedd9156e240867L } },
+ /* 8 << 0 */
+ { { 0x27f14cd19499a78fL,0x462ab5c56f9b3455L,0x8f90f02af02cfc6bL,
+ 0xb763891eb265230dL },
+ { 0xf59da3a9532d4977L,0x21e3327dcf9eba15L,0x123c7b84be60bbf0L,
+ 0x56ec12f27706df76L } },
+ /* 9 << 0 */
+ { { 0x75c96e8f264e20e8L,0xabe6bfed59a7a841L,0x2cc09c0444c8eb00L,
+ 0xe05b3080f0c4e16bL },
+ { 0x1eb7777aa45f3314L,0x56af7bedce5d45e3L,0x2b6e019a88b12f1aL,
+ 0x086659cdfd835f9bL } },
+ /* 10 << 0 */
+ { { 0x2c18dbd19dc21ec8L,0x98f9868a0fcf8139L,0x737d2cd648250b49L,
+ 0xcc61c94724b3428fL },
+ { 0x0c2b407880dd9e76L,0xc43a8991383fbe08L,0x5f7d2d65779be5d2L,
+ 0x78719a54eb3b4ab5L } },
+ /* 11 << 0 */
+ { { 0xea7d260a6245e404L,0x9de407956e7fdfe0L,0x1ff3a4158dac1ab5L,
+ 0x3e7090f1649c9073L },
+ { 0x1a7685612b944e88L,0x250f939ee57f61c8L,0x0c0daa891ead643dL,
+ 0x68930023e125b88eL } },
+ /* 12 << 0 */
+ { { 0x04b71aa7d2697768L,0xabdedef5ca345a33L,0x2409d29dee37385eL,
+ 0x4ee1df77cb83e156L },
+ { 0x0cac12d91cbb5b43L,0x170ed2f6ca895637L,0x28228cfa8ade6d66L,
+ 0x7ff57c9553238acaL } },
+ /* 13 << 0 */
+ { { 0xccc425634b2ed709L,0x0e356769856fd30dL,0xbcbcd43f559e9811L,
+ 0x738477ac5395b759L },
+ { 0x35752b90c00ee17fL,0x68748390742ed2e3L,0x7cd06422bd1f5bc1L,
+ 0xfbc08769c9e7b797L } },
+ /* 14 << 0 */
+ { { 0xa242a35bb0cf664aL,0x126e48f77f9707e3L,0x1717bf54c6832660L,
+ 0xfaae7332fd12c72eL },
+ { 0x27b52db7995d586bL,0xbe29569e832237c2L,0xe8e4193e2a65e7dbL,
+ 0x152706dc2eaa1bbbL } },
+ /* 15 << 0 */
+ { { 0x72bcd8b7bc60055bL,0x03cc23ee56e27e4bL,0xee337424e4819370L,
+ 0xe2aa0e430ad3da09L },
+ { 0x40b8524f6383c45dL,0xd766355442a41b25L,0x64efa6de778a4797L,
+ 0x2042170a7079adf4L } },
+ /* 16 << 0 */
+ { { 0x808b0b650bc6fb80L,0x5882e0753ffe2e6bL,0xd5ef2f7c2c83f549L,
+ 0x54d63c809103b723L },
+ { 0xf2f11bd652a23f9bL,0x3670c3194b0b6587L,0x55c4623bb1580e9eL,
+ 0x64edf7b201efe220L } },
+ /* 17 << 0 */
+ { { 0x97091dcbd53c5c9dL,0xf17624b6ac0a177bL,0xb0f139752cfe2dffL,
+ 0xc1a35c0a6c7a574eL },
+ { 0x227d314693e79987L,0x0575bf30e89cb80eL,0x2f4e247f0d1883bbL,
+ 0xebd512263274c3d0L } },
+ /* 18 << 0 */
+ { { 0x5f3e51c856ada97aL,0x4afc964d8f8b403eL,0xa6f247ab412e2979L,
+ 0x675abd1b6f80ebdaL },
+ { 0x66a2bd725e485a1dL,0x4b2a5caf8f4f0b3cL,0x2626927f1b847bbaL,
+ 0x6c6fc7d90502394dL } },
+ /* 19 << 0 */
+ { { 0xfea912baa5659ae8L,0x68363aba25e1a16eL,0xb8842277752c41acL,
+ 0xfe545c282897c3fcL },
+ { 0x2d36e9e7dc4c696bL,0x5806244afba977c5L,0x85665e9be39508c1L,
+ 0xf720ee256d12597bL } },
+ /* 20 << 0 */
+ { { 0x8a979129d2337a31L,0x5916868f0f862bdcL,0x048099d95dd283baL,
+ 0xe2d1eeb6fe5bfb4eL },
+ { 0x82ef1c417884005dL,0xa2d4ec17ffffcbaeL,0x9161c53f8aa95e66L,
+ 0x5ee104e1c5fee0d0L } },
+ /* 21 << 0 */
+ { { 0x562e4cecc135b208L,0x74e1b2654783f47dL,0x6d2a506c5a3f3b30L,
+ 0xecead9f4c16762fcL },
+ { 0xf29dd4b2e286e5b9L,0x1b0fadc083bb3c61L,0x7a75023e7fac29a4L,
+ 0xc086d5f1c9477fa3L } },
+ /* 22 << 0 */
+ { { 0x0fc611352f6f3076L,0xc99ffa23e3912a9aL,0x6a0b0685d2f8ba3dL,
+ 0xfdc777e8e93358a4L },
+ { 0x94a787bb35415f04L,0x640c2d6a4d23fea4L,0x9de917da153a35b5L,
+ 0x793e8d075d5cd074L } },
+ /* 23 << 0 */
+ { { 0xf4f876532de45068L,0x37c7a7e89e2e1f6eL,0xd0825fa2a3584069L,
+ 0xaf2cea7c1727bf42L },
+ { 0x0360a4fb9e4785a9L,0xe5fda49c27299f4aL,0x48068e1371ac2f71L,
+ 0x83d0687b9077666fL } },
+ /* 24 << 0 */
+ { { 0x6d3883b215d02819L,0x6d0d755040dd9a35L,0x61d7cbf91d2b469fL,
+ 0xf97b232f2efc3115L },
+ { 0xa551d750b24bcbc7L,0x11ea494988a1e356L,0x7669f03193cb7501L,
+ 0x595dc55eca737b8aL } },
+ /* 25 << 0 */
+ { { 0xa4a319acd837879fL,0x6fc1b49eed6b67b0L,0xe395993332f1f3afL,
+ 0x966742eb65432a2eL },
+ { 0x4b8dc9feb4966228L,0x96cc631243f43950L,0x12068859c9b731eeL,
+ 0x7b948dc356f79968L } },
+ /* 26 << 0 */
+ { { 0x61e4ad32ed1f8008L,0xe6c9267ad8b17538L,0x1ac7c5eb857ff6fbL,
+ 0x994baaa855f2fb10L },
+ { 0x84cf14e11d248018L,0x5a39898b628ac508L,0x14fde97b5fa944f5L,
+ 0xed178030d12e5ac7L } },
+ /* 27 << 0 */
+ { { 0x042c2af497e2feb4L,0xd36a42d7aebf7313L,0x49d2c9eb084ffdd7L,
+ 0x9f8aa54b2ef7c76aL },
+ { 0x9200b7ba09895e70L,0x3bd0c66fddb7fb58L,0x2d97d10878eb4cbbL,
+ 0x2d431068d84bde31L } },
+ /* 28 << 0 */
+ { { 0x4b523eb7172ccd1fL,0x7323cb2830a6a892L,0x97082ec0cfe153ebL,
+ 0xe97f6b6af2aadb97L },
+ { 0x1d3d393ed1a83da1L,0xa6a7f9c7804b2a68L,0x4a688b482d0cb71eL,
+ 0xa9b4cc5f40585278L } },
+ /* 29 << 0 */
+ { { 0x5e5db46acb66e132L,0xf1be963a0d925880L,0x944a70270317b9e2L,
+ 0xe266f95948603d48L },
+ { 0x98db66735c208899L,0x90472447a2fb18a3L,0x8a966939777c619fL,
+ 0x3798142a2a3be21bL } },
+ /* 30 << 0 */
+ { { 0xb4241cb13298b343L,0xa3a14e49b44f65a1L,0xc5f4d6cd3ac77acdL,
+ 0xd0288cb552b6fc3cL },
+ { 0xd5cc8c2f1c040abcL,0xb675511e06bf9b4aL,0xd667da379b3aa441L,
+ 0x460d45ce51601f72L } },
+ /* 31 << 0 */
+ { { 0xe2f73c696755ff89L,0xdd3cf7e7473017e6L,0x8ef5689d3cf7600dL,
+ 0x948dc4f8b1fc87b4L },
+ { 0xd9e9fe814ea53299L,0x2d921ca298eb6028L,0xfaecedfd0c9803fcL,
+ 0xf38ae8914d7b4745L } },
+ /* 32 << 0 */
+ { { 0xd8c5fccfc5e3a3d8L,0xbefd904c4079dfbfL,0xbc6d6a58fead0197L,
+ 0x39227077695532a4L },
+ { 0x09e23e6ddbef42f5L,0x7e449b64480a9908L,0x7b969c1aad9a2e40L,
+ 0x6231d7929591c2a4L } },
+ /* 33 << 0 */
+ { { 0x871514560f664534L,0x85ceae7c4b68f103L,0xac09c4ae65578ab9L,
+ 0x33ec6868f044b10cL },
+ { 0x6ac4832b3a8ec1f1L,0x5509d1285847d5efL,0xf909604f763f1574L,
+ 0xb16c4303c32f63c4L } },
+ /* 34 << 0 */
+ { { 0xb6ab20147ca23cd3L,0xcaa7a5c6a391849dL,0x5b0673a375678d94L,
+ 0xc982ddd4dd303e64L },
+ { 0xfd7b000b5db6f971L,0xbba2cb1f6f876f92L,0xc77332a33c569426L,
+ 0xa159100c570d74f8L } },
+ /* 35 << 0 */
+ { { 0xfd16847fdec67ef5L,0x742ee464233e76b7L,0x0b8e4134efc2b4c8L,
+ 0xca640b8642a3e521L },
+ { 0x653a01908ceb6aa9L,0x313c300c547852d5L,0x24e4ab126b237af7L,
+ 0x2ba901628bb47af8L } },
+ /* 36 << 0 */
+ { { 0x3d5e58d6a8219bb7L,0xc691d0bd1b06c57fL,0x0ae4cb10d257576eL,
+ 0x3569656cd54a3dc3L },
+ { 0xe5ebaebd94cda03aL,0x934e82d3162bfe13L,0x450ac0bae251a0c6L,
+ 0x480b9e11dd6da526L } },
+ /* 37 << 0 */
+ { { 0x00467bc58cce08b5L,0xb636458c7f178d55L,0xc5748baea677d806L,
+ 0x2763a387dfa394ebL },
+ { 0xa12b448a7d3cebb6L,0xe7adda3e6f20d850L,0xf63ebce51558462cL,
+ 0x58b36143620088a8L } },
+ /* 38 << 0 */
+ { { 0x8a2cc3ca4d63c0eeL,0x512331170fe948ceL,0x7463fd85222ef33bL,
+ 0xadf0c7dc7c603d6cL },
+ { 0x0ec32d3bfe7765e5L,0xccaab359bf380409L,0xbdaa84d68e59319cL,
+ 0xd9a4c2809c80c34dL } },
+ /* 39 << 0 */
+ { { 0xa9d89488a059c142L,0x6f5ae714ff0b9346L,0x068f237d16fb3664L,
+ 0x5853e4c4363186acL },
+ { 0xe2d87d2363c52f98L,0x2ec4a76681828876L,0x47b864fae14e7b1cL,
+ 0x0c0bc0e569192408L } },
+ /* 40 << 0 */
+ { { 0xe4d7681db82e9f3eL,0x83200f0bdf25e13cL,0x8909984c66f27280L,
+ 0x462d7b0075f73227L },
+ { 0xd90ba188f2651798L,0x74c6e18c36ab1c34L,0xab256ea35ef54359L,
+ 0x03466612d1aa702fL } },
+ /* 41 << 0 */
+ { { 0x624d60492ed22e91L,0x6fdfe0b56f072822L,0xeeca111539ce2271L,
+ 0x98100a4fdb01614fL },
+ { 0xb6b0daa2a35c628fL,0xb6f94d2ec87e9a47L,0xc67732591d57d9ceL,
+ 0xf70bfeec03884a7bL } },
+ /* 42 << 0 */
+ { { 0x5fb35ccfed2bad01L,0xa155cbe31da6a5c7L,0xc2e2594c30a92f8fL,
+ 0x649c89ce5bfafe43L },
+ { 0xd158667de9ff257aL,0x9b359611f32c50aeL,0x4b00b20b906014cfL,
+ 0xf3a8cfe389bc7d3dL } },
+ /* 43 << 0 */
+ { { 0x4ff23ffd248a7d06L,0x80c5bfb4878873faL,0xb7d9ad9005745981L,
+ 0x179c85db3db01994L },
+ { 0xba41b06261a6966cL,0x4d82d052eadce5a8L,0x9e91cd3ba5e6a318L,
+ 0x47795f4f95b2dda0L } },
+ /* 44 << 0 */
+ { { 0xecfd7c1fd55a897cL,0x009194abb29110fbL,0x5f0e2046e381d3b0L,
+ 0x5f3425f6a98dd291L },
+ { 0xbfa06687730d50daL,0x0423446c4b083b7fL,0x397a247dd69d3417L,
+ 0xeb629f90387ba42aL } },
+ /* 45 << 0 */
+ { { 0x1ee426ccd5cd79bfL,0x0032940b946c6e18L,0x1b1e8ae057477f58L,
+ 0xe94f7d346d823278L },
+ { 0xc747cb96782ba21aL,0xc5254469f72b33a5L,0x772ef6dec7f80c81L,
+ 0xd73acbfe2cd9e6b5L } },
+ /* 46 << 0 */
+ { { 0x4075b5b149ee90d9L,0x785c339aa06e9ebaL,0xa1030d5babf825e0L,
+ 0xcec684c3a42931dcL },
+ { 0x42ab62c9c1586e63L,0x45431d665ab43f2bL,0x57c8b2c055f7835dL,
+ 0x033da338c1b7f865L } },
+ /* 47 << 0 */
+ { { 0x283c7513caa76097L,0x0a624fa936c83906L,0x6b20afec715af2c7L,
+ 0x4b969974eba78bfdL },
+ { 0x220755ccd921d60eL,0x9b944e107baeca13L,0x04819d515ded93d4L,
+ 0x9bbff86e6dddfd27L } },
+ /* 48 << 0 */
+ { { 0x6b34413077adc612L,0xa7496529bbd803a0L,0x1a1baaa76d8805bdL,
+ 0xc8403902470343adL },
+ { 0x39f59f66175adff1L,0x0b26d7fbb7d8c5b7L,0xa875f5ce529d75e3L,
+ 0x85efc7e941325cc2L } },
+ /* 49 << 0 */
+ { { 0x21950b421ff6acd3L,0xffe7048453dc6909L,0xff4cd0b228766127L,
+ 0xabdbe6084fb7db2bL },
+ { 0x837c92285e1109e8L,0x26147d27f4645b5aL,0x4d78f592f7818ed8L,
+ 0xd394077ef247fa36L } },
+ /* 50 << 0 */
+ { { 0x0fb9c2d0488c171aL,0xa78bfbaa13685278L,0xedfbe268d5b1fa6aL,
+ 0x0dceb8db2b7eaba7L },
+ { 0xbf9e80899ae2b710L,0xefde7ae6a4449c96L,0x43b7716bcc143a46L,
+ 0xd7d34194c3628c13L } },
+ /* 51 << 0 */
+ { { 0x508cec1c3b3f64c9L,0xe20bc0ba1e5edf3fL,0xda1deb852f4318d4L,
+ 0xd20ebe0d5c3fa443L },
+ { 0x370b4ea773241ea3L,0x61f1511c5e1a5f65L,0x99a5e23d82681c62L,
+ 0xd731e383a2f54c2dL } },
+ /* 52 << 0 */
+ { { 0x2692f36e83445904L,0x2e0ec469af45f9c0L,0x905a3201c67528b7L,
+ 0x88f77f34d0e5e542L },
+ { 0xf67a8d295864687cL,0x23b92eae22df3562L,0x5c27014b9bbec39eL,
+ 0x7ef2f2269c0f0f8dL } },
+ /* 53 << 0 */
+ { { 0x97359638546c4d8dL,0x5f9c3fc492f24679L,0x912e8beda8c8acd9L,
+ 0xec3a318d306634b0L },
+ { 0x80167f41c31cb264L,0x3db82f6f522113f2L,0xb155bcd2dcafe197L,
+ 0xfba1da5943465283L } },
+ /* 54 << 0 */
+ { { 0xa0425b8eb212cf53L,0x4f2e512ef8557c5fL,0xc1286ff925c4d56cL,
+ 0xbb8a0feaee26c851L },
+ { 0xc28f70d2e7d6107eL,0x7ee0c444e76265aaL,0x3df277a41d1936b1L,
+ 0x1a556e3fea9595ebL } },
+ /* 55 << 0 */
+ { { 0x258bbbf9e7305683L,0x31eea5bf07ef5be6L,0x0deb0e4a46c814c1L,
+ 0x5cee8449a7b730ddL },
+ { 0xeab495c5a0182bdeL,0xee759f879e27a6b4L,0xc2cf6a6880e518caL,
+ 0x25e8013ff14cf3f4L } },
+ /* 56 << 0 */
+ { { 0x8fc441407e8d7a14L,0xbb1ff3ca9556f36aL,0x6a84438514600044L,
+ 0xba3f0c4a7451ae63L },
+ { 0xdfcac25b1f9af32aL,0x01e0db86b1f2214bL,0x4e9a5bc2a4b596acL,
+ 0x83927681026c2c08L } },
+ /* 57 << 0 */
+ { { 0x3ec832e77acaca28L,0x1bfeea57c7385b29L,0x068212e3fd1eaf38L,
+ 0xc13298306acf8cccL },
+ { 0xb909f2db2aac9e59L,0x5748060db661782aL,0xc5ab2632c79b7a01L,
+ 0xda44c6c600017626L } },
+ /* 58 << 0 */
+ { { 0xf26c00e8a7ea82f0L,0x99cac80de4299aafL,0xd66fe3b67ed78be1L,
+ 0x305f725f648d02cdL },
+ { 0x33ed1bc4623fb21bL,0xfa70533e7a6319adL,0x17ab562dbe5ffb3eL,
+ 0x0637499456674741L } },
+ /* 59 << 0 */
+ { { 0x69d44ed65c46aa8eL,0x2100d5d3a8d063d1L,0xcb9727eaa2d17c36L,
+ 0x4c2bab1b8add53b7L },
+ { 0xa084e90c15426704L,0x778afcd3a837ebeaL,0x6651f7017ce477f8L,
+ 0xa062499846fb7a8bL } },
+ /* 60 << 0 */
+ { { 0xdc1e6828ed8a6e19L,0x33fc23364189d9c7L,0x026f8fe2671c39bcL,
+ 0xd40c4ccdbc6f9915L },
+ { 0xafa135bbf80e75caL,0x12c651a022adff2cL,0xc40a04bd4f51ad96L,
+ 0x04820109bbe4e832L } },
+ /* 61 << 0 */
+ { { 0x3667eb1a7f4c04ccL,0x59556621a9404f84L,0x71cdf6537eceb50aL,
+ 0x994a44a69b8335faL },
+ { 0xd7faf819dbeb9b69L,0x473c5680eed4350dL,0xb6658466da44bba2L,
+ 0x0d1bc780872bdbf3L } },
+ /* 62 << 0 */
+ { { 0xe535f175a1962f91L,0x6ed7e061ed58f5a7L,0x177aa4c02089a233L,
+ 0x0dbcb03ae539b413L },
+ { 0xe3dc424ebb32e38eL,0x6472e5ef6806701eL,0xdd47ff98814be9eeL,
+ 0x6b60cfff35ace009L } },
+ /* 63 << 0 */
+ { { 0xb8d3d9319ff91fe5L,0x039c4800f0518eedL,0x95c376329182cb26L,
+ 0x0763a43482fc568dL },
+ { 0x707c04d5383e76baL,0xac98b930824e8197L,0x92bf7c8f91230de0L,
+ 0x90876a0140959b70L } },
+ /* 64 << 0 */
+ { { 0xdb6d96f305968b80L,0x380a0913089f73b9L,0x7da70b83c2c61e01L,
+ 0x95fb8394569b38c7L },
+ { 0x9a3c651280edfe2fL,0x8f726bb98faeaf82L,0x8010a4a078424bf8L,
+ 0x296720440e844970L } },
+ /* 0 << 7 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 7 */
+ { { 0x63c5cb817a2ad62aL,0x7ef2b6b9ac62ff54L,0x3749bba4b3ad9db5L,
+ 0xad311f2c46d5a617L },
+ { 0xb77a8087c2ff3b6dL,0xb46feaf3367834ffL,0xf8aa266d75d6b138L,
+ 0xfa38d320ec008188L } },
+ /* 2 << 7 */
+ { { 0x486d8ffa696946fcL,0x50fbc6d8b9cba56dL,0x7e3d423e90f35a15L,
+ 0x7c3da195c0dd962cL },
+ { 0xe673fdb03cfd5d8bL,0x0704b7c2889dfca5L,0xf6ce581ff52305aaL,
+ 0x399d49eb914d5e53L } },
+ /* 3 << 7 */
+ { { 0x380a496d6ec293cdL,0x733dbda78e7051f5L,0x037e388db849140aL,
+ 0xee4b32b05946dbf6L },
+ { 0xb1c4fda9cae368d1L,0x5001a7b0fdb0b2f3L,0x6df593742e3ac46eL,
+ 0x4af675f239b3e656L } },
+ /* 4 << 7 */
+ { { 0x44e3811039949296L,0x5b63827b361db1b5L,0x3e5323ed206eaff5L,
+ 0x942370d2c21f4290L },
+ { 0xf2caaf2ee0d985a1L,0x192cc64b7239846dL,0x7c0b8f47ae6312f8L,
+ 0x7dc61f9196620108L } },
+ /* 5 << 7 */
+ { { 0xb830fb5bc2da7de9L,0xd0e643df0ff8d3beL,0x31ee77ba188a9641L,
+ 0x4e8aa3aabcf6d502L },
+ { 0xf9fb65329a49110fL,0xd18317f62dd6b220L,0x7e3ced4152c3ea5aL,
+ 0x0d296a147d579c4aL } },
+ /* 6 << 7 */
+ { { 0x35d6a53eed4c3717L,0x9f8240cf3d0ed2a3L,0x8c0d4d05e5543aa5L,
+ 0x45d5bbfbdd33b4b4L },
+ { 0xfa04cc73137fd28eL,0x862ac6efc73b3ffdL,0x403ff9f531f51ef2L,
+ 0x34d5e0fcbc73f5a2L } },
+ /* 7 << 7 */
+ { { 0xf252682008913f4fL,0xea20ed61eac93d95L,0x51ed38b46ca6b26cL,
+ 0x8662dcbcea4327b0L },
+ { 0x6daf295c725d2aaaL,0xbad2752f8e52dcdaL,0x2210e7210b17daccL,
+ 0xa37f7912d51e8232L } },
+ /* 8 << 7 */
+ { { 0x4f7081e144cc3addL,0xd5ffa1d687be82cfL,0x89890b6c0edd6472L,
+ 0xada26e1a3ed17863L },
+ { 0x276f271563483caaL,0xe6924cd92f6077fdL,0x05a7fe980a466e3cL,
+ 0xf1c794b0b1902d1fL } },
+ /* 9 << 7 */
+ { { 0xe521368882a8042cL,0xd931cfafcd278298L,0x069a0ae0f597a740L,
+ 0x0adbb3f3eb59107cL },
+ { 0x983e951e5eaa8eb8L,0xe663a8b511b48e78L,0x1631cc0d8a03f2c5L,
+ 0x7577c11e11e271e2L } },
+ /* 10 << 7 */
+ { { 0x33b2385c08369a90L,0x2990c59b190eb4f8L,0x819a6145c68eac80L,
+ 0x7a786d622ec4a014L },
+ { 0x33faadbe20ac3a8dL,0x31a217815aba2d30L,0x209d2742dba4f565L,
+ 0xdb2ce9e355aa0fbbL } },
+ /* 11 << 7 */
+ { { 0x8cef334b168984dfL,0xe81dce1733879638L,0xf6e6949c263720f0L,
+ 0x5c56feaff593cbecL },
+ { 0x8bff5601fde58c84L,0x74e241172eccb314L,0xbcf01b614c9a8a78L,
+ 0xa233e35e544c9868L } },
+ /* 12 << 7 */
+ { { 0xb3156bf38bd7aff1L,0x1b5ee4cb1d81b146L,0x7ba1ac41d628a915L,
+ 0x8f3a8f9cfd89699eL },
+ { 0x7329b9c9a0748be7L,0x1d391c95a92e621fL,0xe51e6b214d10a837L,
+ 0xd255f53a4947b435L } },
+ /* 13 << 7 */
+ { { 0x07669e04f1788ee3L,0xc14f27afa86938a2L,0x8b47a334e93a01c0L,
+ 0xff627438d9366808L },
+ { 0x7a0985d8ca2a5965L,0x3d9a5542d6e9b9b3L,0xc23eb80b4cf972e8L,
+ 0x5c1c33bb4fdf72fdL } },
+ /* 14 << 7 */
+ { { 0x0c4a58d474a86108L,0xf8048a8fee4c5d90L,0xe3c7c924e86d4c80L,
+ 0x28c889de056a1e60L },
+ { 0x57e2662eb214a040L,0xe8c48e9837e10347L,0x8774286280ac748aL,
+ 0xf1c24022186b06f2L } },
+ /* 15 << 7 */
+ { { 0xac2dd4c35f74040aL,0x409aeb71fceac957L,0x4fbad78255c4ec23L,
+ 0xb359ed618a7b76ecL },
+ { 0x12744926ed6f4a60L,0xe21e8d7f4b912de3L,0xe2575a59fc705a59L,
+ 0x72f1d4deed2dbc0eL } },
+ /* 16 << 7 */
+ { { 0x3d2b24b9eb7926b8L,0xbff88cb3cdbe5509L,0xd0f399afe4dd640bL,
+ 0x3c5fe1302f76ed45L },
+ { 0x6f3562f43764fb3dL,0x7b5af3183151b62dL,0xd5bd0bc7d79ce5f3L,
+ 0xfdaf6b20ec66890fL } },
+ /* 17 << 7 */
+ { { 0x735c67ec6063540cL,0x50b259c2e5f9cb8fL,0xb8734f9a3f99c6abL,
+ 0xf8cc13d5a3a7bc85L },
+ { 0x80c1b305c5217659L,0xfe5364d44ec12a54L,0xbd87045e681345feL,
+ 0x7f8efeb1582f897fL } },
+ /* 18 << 7 */
+ { { 0xe8cbf1e5d5923359L,0xdb0cea9d539b9fb0L,0x0c5b34cf49859b98L,
+ 0x5e583c56a4403cc6L },
+ { 0x11fc1a2dd48185b7L,0xc93fbc7e6e521787L,0x47e7a05805105b8bL,
+ 0x7b4d4d58db8260c8L } },
+ /* 19 << 7 */
+ { { 0xe33930b046eb842aL,0x8e844a9a7bdae56dL,0x34ef3a9e13f7fdfcL,
+ 0xb3768f82636ca176L },
+ { 0x2821f4e04e09e61cL,0x414dc3a1a0c7cddcL,0xd537943754945fcdL,
+ 0x151b6eefb3555ff1L } },
+ /* 20 << 7 */
+ { { 0xb31bd6136339c083L,0x39ff8155dfb64701L,0x7c3388d2e29604abL,
+ 0x1e19084ba6b10442L },
+ { 0x17cf54c0eccd47efL,0x896933854a5dfb30L,0x69d023fb47daf9f6L,
+ 0x9222840b7d91d959L } },
+ /* 21 << 7 */
+ { { 0x439108f5803bac62L,0x0b7dd91d379bd45fL,0xd651e827ca63c581L,
+ 0x5c5d75f6509c104fL },
+ { 0x7d5fc7381f2dc308L,0x20faa7bfd98454beL,0x95374beea517b031L,
+ 0xf036b9b1642692acL } },
+ /* 22 << 7 */
+ { { 0xc510610939842194L,0xb7e2353e49d05295L,0xfc8c1d5cefb42ee0L,
+ 0xe04884eb08ce811cL },
+ { 0xf1f75d817419f40eL,0x5b0ac162a995c241L,0x120921bbc4c55646L,
+ 0x713520c28d33cf97L } },
+ /* 23 << 7 */
+ { { 0xb4a65a5ce98c5100L,0x6cec871d2ddd0f5aL,0x251f0b7f9ba2e78bL,
+ 0x224a8434ce3a2a5fL },
+ { 0x26827f6125f5c46fL,0x6a22bedc48545ec0L,0x25ae5fa0b1bb5cdcL,
+ 0xd693682ffcb9b98fL } },
+ /* 24 << 7 */
+ { { 0x32027fe891e5d7d3L,0xf14b7d1773a07678L,0xf88497b3c0dfdd61L,
+ 0xf7c2eec02a8c4f48L },
+ { 0xaa5573f43756e621L,0xc013a2401825b948L,0x1c03b34563878572L,
+ 0xa0472bea653a4184L } },
+ /* 25 << 7 */
+ { { 0xf4222e270ac69a80L,0x34096d25f51e54f6L,0x00a648cb8fffa591L,
+ 0x4e87acdc69b6527fL },
+ { 0x0575e037e285ccb4L,0x188089e450ddcf52L,0xaa96c9a8870ff719L,
+ 0x74a56cd81fc7e369L } },
+ /* 26 << 7 */
+ { { 0x41d04ee21726931aL,0x0bbbb2c83660ecfdL,0xa6ef6de524818e18L,
+ 0xe421cc51e7d57887L },
+ { 0xf127d208bea87be6L,0x16a475d3b1cdd682L,0x9db1b684439b63f7L,
+ 0x5359b3dbf0f113b6L } },
+ /* 27 << 7 */
+ { { 0xdfccf1de8bf06e31L,0x1fdf8f44dd383901L,0x10775cad5017e7d2L,
+ 0xdfc3a59758d11eefL },
+ { 0x6ec9c8a0b1ecff10L,0xee6ed6cc28400549L,0xb5ad7bae1b4f8d73L,
+ 0x61b4f11de00aaab9L } },
+ /* 28 << 7 */
+ { { 0x7b32d69bd4eff2d7L,0x88ae67714288b60fL,0x159461b437a1e723L,
+ 0x1f3d4789570aae8cL },
+ { 0x869118c07f9871daL,0x35fbda78f635e278L,0x738f3641e1541dacL,
+ 0x6794b13ac0dae45fL } },
+ /* 29 << 7 */
+ { { 0x065064ac09cc0917L,0x27c53729c68540fdL,0x0d2d4c8eef227671L,
+ 0xd23a9f80a1785a04L },
+ { 0x98c5952852650359L,0xfa09ad0174a1acadL,0x082d5a290b55bf5cL,
+ 0xa40f1c67419b8084L } },
+ /* 30 << 7 */
+ { { 0x3a5c752edcc18770L,0x4baf1f2f8825c3a5L,0xebd63f7421b153edL,
+ 0xa2383e47b2f64723L },
+ { 0xe7bf620a2646d19aL,0x56cb44ec03c83ffdL,0xaf7267c94f6be9f1L,
+ 0x8b2dfd7bc06bb5e9L } },
+ /* 31 << 7 */
+ { { 0xb87072f2a672c5c7L,0xeacb11c80d53c5e2L,0x22dac29dff435932L,
+ 0x37bdb99d4408693cL },
+ { 0xf6e62fb62899c20fL,0x3535d512447ece24L,0xfbdc6b88ff577ce3L,
+ 0x726693bd190575f2L } },
+ /* 32 << 7 */
+ { { 0x6772b0e5ab4b35a2L,0x1d8b6001f5eeaacfL,0x728f7ce4795b9580L,
+ 0x4a20ed2a41fb81daL },
+ { 0x9f685cd44fec01e6L,0x3ed7ddcca7ff50adL,0x460fd2640c2d97fdL,
+ 0x3a241426eb82f4f9L } },
+ /* 33 << 7 */
+ { { 0x17d1df2c6a8ea820L,0xb2b50d3bf22cc254L,0x03856cbab7291426L,
+ 0x87fd26ae04f5ee39L },
+ { 0x9cb696cc02bee4baL,0x5312180406820fd6L,0xa5dfc2690212e985L,
+ 0x666f7ffa160f9a09L } },
+ /* 34 << 7 */
+ { { 0xc503cd33bccd9617L,0x365dede4ba7730a3L,0x798c63555ddb0786L,
+ 0xa6c3200efc9cd3bcL },
+ { 0x060ffb2ce5e35efdL,0x99a4e25b5555a1c1L,0x11d95375f70b3751L,
+ 0x0a57354a160e1bf6L } },
+ /* 35 << 7 */
+ { { 0xecb3ae4bf8e4b065L,0x07a834c42e53022bL,0x1cd300b38692ed96L,
+ 0x16a6f79261ee14ecL },
+ { 0x8f1063c66a8649edL,0xfbcdfcfe869f3e14L,0x2cfb97c100a7b3ecL,
+ 0xcea49b3c7130c2f1L } },
+ /* 36 << 7 */
+ { { 0x462d044fe9d96488L,0x4b53d52e8182a0c1L,0x84b6ddd30391e9e9L,
+ 0x80ab7b48b1741a09L },
+ { 0xec0e15d427d3317fL,0x8dfc1ddb1a64671eL,0x93cc5d5fd49c5b92L,
+ 0xc995d53d3674a331L } },
+ /* 37 << 7 */
+ { { 0x302e41ec090090aeL,0x2278a0ccedb06830L,0x1d025932fbc99690L,
+ 0x0c32fbd2b80d68daL },
+ { 0xd79146daf341a6c1L,0xae0ba1391bef68a0L,0xc6b8a5638d774b3aL,
+ 0x1cf307bd880ba4d7L } },
+ /* 38 << 7 */
+ { { 0xc033bdc719803511L,0xa9f97b3b8888c3beL,0x3d68aebc85c6d05eL,
+ 0xc3b88a9d193919ebL },
+ { 0x2d300748c48b0ee3L,0x7506bc7c07a746c1L,0xfc48437c6e6d57f3L,
+ 0x5bd71587cfeaa91aL } },
+ /* 39 << 7 */
+ { { 0xa4ed0408c1bc5225L,0xd0b946db2719226dL,0x109ecd62758d2d43L,
+ 0x75c8485a2751759bL },
+ { 0xb0b75f499ce4177aL,0x4fa61a1e79c10c3dL,0xc062d300a167fcd7L,
+ 0x4df3874c750f0fa8L } },
+ /* 40 << 7 */
+ { { 0x29ae2cf983dfedc9L,0xf84371348d87631aL,0xaf5717117429c8d2L,
+ 0x18d15867146d9272L },
+ { 0x83053ecf69769bb7L,0xc55eb856c479ab82L,0x5ef7791c21b0f4b2L,
+ 0xaa5956ba3d491525L } },
+ /* 41 << 7 */
+ { { 0x407a96c29fe20ebaL,0xf27168bbe52a5ad3L,0x43b60ab3bf1d9d89L,
+ 0xe45c51ef710e727aL },
+ { 0xdfca5276099b4221L,0x8dc6407c2557a159L,0x0ead833591035895L,
+ 0x0a9db9579c55dc32L } },
+ /* 42 << 7 */
+ { { 0xe40736d3df61bc76L,0x13a619c03f778cdbL,0x6dd921a4c56ea28fL,
+ 0x76a524332fa647b4L },
+ { 0x23591891ac5bdc5dL,0xff4a1a72bac7dc01L,0x9905e26162df8453L,
+ 0x3ac045dfe63b265fL } },
+ /* 43 << 7 */
+ { { 0x8a3f341bad53dba7L,0x8ec269cc837b625aL,0xd71a27823ae31189L,
+ 0x8fb4f9a355e96120L },
+ { 0x804af823ff9875cfL,0x23224f575d442a9bL,0x1c4d3b9eecc62679L,
+ 0x91da22fba0e7ddb1L } },
+ /* 44 << 7 */
+ { { 0xa370324d6c04a661L,0x9710d3b65e376d17L,0xed8c98f03044e357L,
+ 0xc364ebbe6422701cL },
+ { 0x347f5d517733d61cL,0xd55644b9cea826c3L,0x80c6e0ad55a25548L,
+ 0x0aa7641d844220a7L } },
+ /* 45 << 7 */
+ { { 0x1438ec8131810660L,0x9dfa6507de4b4043L,0x10b515d8cc3e0273L,
+ 0x1b6066dd28d8cfb2L },
+ { 0xd3b045919c9efebdL,0x425d4bdfa21c1ff4L,0x5fe5af19d57607d3L,
+ 0xbbf773f754481084L } },
+ /* 46 << 7 */
+ { { 0x8435bd6994b03ed1L,0xd9ad1de3634cc546L,0x2cf423fc00e420caL,
+ 0xeed26d80a03096ddL },
+ { 0xd7f60be7a4db09d2L,0xf47f569d960622f7L,0xe5925fd77296c729L,
+ 0xeff2db2626ca2715L } },
+ /* 47 << 7 */
+ { { 0xa6fcd014b913e759L,0x53da47868ff4de93L,0x14616d79c32068e1L,
+ 0xb187d664ccdf352eL },
+ { 0xf7afb6501dc90b59L,0x8170e9437daa1b26L,0xc8e3bdd8700c0a84L,
+ 0x6e8d345f6482bdfaL } },
+ /* 48 << 7 */
+ { { 0x84cfbfa1c5c5ea50L,0xd3baf14c67960681L,0x263984030dd50942L,
+ 0xe4b7839c4716a663L },
+ { 0xd5f1f794e7de6dc0L,0x5cd0f4d4622aa7ceL,0x5295f3f159acfeecL,
+ 0x8d933552953e0607L } },
+ /* 49 << 7 */
+ { { 0xc7db8ec5776c5722L,0xdc467e622b5f290cL,0xd4297e704ff425a9L,
+ 0x4be924c10cf7bb72L },
+ { 0x0d5dc5aea1892131L,0x8bf8a8e3a705c992L,0x73a0b0647a305ac5L,
+ 0x00c9ca4e9a8c77a8L } },
+ /* 50 << 7 */
+ { { 0x5dfee80f83774bddL,0x6313160285734485L,0xa1b524ae914a69a9L,
+ 0xebc2ffafd4e300d7L },
+ { 0x52c93db77cfa46a5L,0x71e6161f21653b50L,0x3574fc57a4bc580aL,
+ 0xc09015dde1bc1253L } },
+ /* 51 << 7 */
+ { { 0x4b7b47b2d174d7aaL,0x4072d8e8f3a15d04L,0xeeb7d47fd6fa07edL,
+ 0x6f2b9ff9edbdafb1L },
+ { 0x18c516153760fe8aL,0x7a96e6bff06c6c13L,0x4d7a04100ea2d071L,
+ 0xa1914e9b0be2a5ceL } },
+ /* 52 << 7 */
+ { { 0x5726e357d8a3c5cfL,0x1197ecc32abb2b13L,0x6c0d7f7f31ae88ddL,
+ 0x15b20d1afdbb3efeL },
+ { 0xcd06aa2670584039L,0x2277c969a7dc9747L,0xbca695877855d815L,
+ 0x899ea2385188b32aL } },
+ /* 53 << 7 */
+ { { 0x37d9228b760c1c9dL,0xc7efbb119b5c18daL,0x7f0d1bc819f6dbc5L,
+ 0x4875384b07e6905bL },
+ { 0xc7c50baa3ba8cd86L,0xb0ce40fbc2905de0L,0x708406737a231952L,
+ 0xa912a262cf43de26L } },
+ /* 54 << 7 */
+ { { 0x9c38ddcceb5b76c1L,0x746f528526fc0ab4L,0x52a63a50d62c269fL,
+ 0x60049c5599458621L },
+ { 0xe7f48f823c2f7c9eL,0x6bd99043917d5cf3L,0xeb1317a88701f469L,
+ 0xbd3fe2ed9a449fe0L } },
+ /* 55 << 7 */
+ { { 0x421e79ca12ef3d36L,0x9ee3c36c3e7ea5deL,0xe48198b5cdff36f7L,
+ 0xaff4f967c6b82228L },
+ { 0x15e19dd0c47adb7eL,0x45699b23032e7dfaL,0x40680c8b1fae026aL,
+ 0x5a347a48550dbf4dL } },
+ /* 56 << 7 */
+ { { 0xe652533b3cef0d7dL,0xd94f7b182bbb4381L,0x838752be0e80f500L,
+ 0x8e6e24889e9c9bfbL },
+ { 0xc975169716caca6aL,0x866c49d838531ad9L,0xc917e2397151ade1L,
+ 0x2d016ec16037c407L } },
+ /* 57 << 7 */
+ { { 0xa407ccc900eac3f9L,0x835f6280e2ed4748L,0xcc54c3471cc98e0dL,
+ 0x0e969937dcb572ebL },
+ { 0x1b16c8e88f30c9cbL,0xa606ae75373c4661L,0x47aa689b35502cabL,
+ 0xf89014ae4d9bb64fL } },
+ /* 58 << 7 */
+ { { 0x202f6a9c31c71f7bL,0x01f95aa3296ffe5cL,0x5fc0601453cec3a3L,
+ 0xeb9912375f498a45L },
+ { 0xae9a935e5d91ba87L,0xc6ac62810b564a19L,0x8a8fe81c3bd44e69L,
+ 0x7c8b467f9dd11d45L } },
+ /* 59 << 7 */
+ { { 0xf772251fea5b8e69L,0xaeecb3bdc5b75fbcL,0x1aca3331887ff0e5L,
+ 0xbe5d49ff19f0a131L },
+ { 0x582c13aae5c8646fL,0xdbaa12e820e19980L,0x8f40f31af7abbd94L,
+ 0x1f13f5a81dfc7663L } },
+ /* 60 << 7 */
+ { { 0x5d81f1eeaceb4fc0L,0x362560025e6f0f42L,0x4b67d6d7751370c8L,
+ 0x2608b69803e80589L },
+ { 0xcfc0d2fc05268301L,0xa6943d3940309212L,0x192a90c21fd0e1c2L,
+ 0xb209f11337f1dc76L } },
+ /* 61 << 7 */
+ { { 0xefcc5e0697bf1298L,0xcbdb6730219d639eL,0xd009c116b81e8c6fL,
+ 0xa3ffdde31a7ce2e5L },
+ { 0xc53fbaaaa914d3baL,0x836d500f88df85eeL,0xd98dc71b66ee0751L,
+ 0x5a3d7005714516fdL } },
+ /* 62 << 7 */
+ { { 0x21d3634d39eedbbaL,0x35cd2e680455a46dL,0xc8cafe65f9d7eb0cL,
+ 0xbda3ce9e00cefb3eL },
+ { 0xddc17a602c9cf7a4L,0x01572ee47bcb8773L,0xa92b2b018c7548dfL,
+ 0x732fd309a84600e3L } },
+ /* 63 << 7 */
+ { { 0xe22109c716543a40L,0x9acafd36fede3c6cL,0xfb2068526824e614L,
+ 0x2a4544a9da25dca0L },
+ { 0x2598526291d60b06L,0x281b7be928753545L,0xec667b1a90f13b27L,
+ 0x33a83aff940e2eb4L } },
+ /* 64 << 7 */
+ { { 0x80009862d5d721d5L,0x0c3357a35bd3a182L,0x27f3a83b7aa2cda4L,
+ 0xb58ae74ef6f83085L },
+ { 0x2a911a812e6dad6bL,0xde286051f43d6c5bL,0x4bdccc41f996c4d8L,
+ 0xe7312ec00ae1e24eL } },
+ /* 0 << 14 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 14 */
+ { { 0xf8d112e76e6485b3L,0x4d3e24db771c52f8L,0x48e3ee41684a2f6dL,
+ 0x7161957d21d95551L },
+ { 0x19631283cdb12a6cL,0xbf3fa8822e50e164L,0xf6254b633166cc73L,
+ 0x3aefa7aeaee8cc38L } },
+ /* 2 << 14 */
+ { { 0x79b0fe623b36f9fdL,0x26543b23fde19fc0L,0x136e64a0958482efL,
+ 0x23f637719b095825L },
+ { 0x14cfd596b6a1142eL,0x5ea6aac6335aac0bL,0x86a0e8bdf3081dd5L,
+ 0x5fb89d79003dc12aL } },
+ /* 3 << 14 */
+ { { 0xf615c33af72e34d4L,0x0bd9ea40110eec35L,0x1c12bc5bc1dea34eL,
+ 0x686584c949ae4699L },
+ { 0x13ad95d38c97b942L,0x4609561a4e5c7562L,0x9e94a4aef2737f89L,
+ 0xf57594c6371c78b6L } },
+ /* 4 << 14 */
+ { { 0x0f0165fce3779ee3L,0xe00e7f9dbd495d9eL,0x1fa4efa220284e7aL,
+ 0x4564bade47ac6219L },
+ { 0x90e6312ac4708e8eL,0x4f5725fba71e9adfL,0xe95f55ae3d684b9fL,
+ 0x47f7ccb11e94b415L } },
+ /* 5 << 14 */
+ { { 0x7322851b8d946581L,0xf0d13133bdf4a012L,0xa3510f696584dae0L,
+ 0x03a7c1713c9f6c6dL },
+ { 0x5be97f38e475381aL,0xca1ba42285823334L,0xf83cc5c70be17ddaL,
+ 0x158b14940b918c0fL } },
+ /* 6 << 14 */
+ { { 0xda3a77e5522e6b69L,0x69c908c3bbcd6c18L,0x1f1b9e48d924fd56L,
+ 0x37c64e36aa4bb3f7L },
+ { 0x5a4fdbdfee478d7dL,0xba75c8bc0193f7a0L,0x84bc1e8456cd16dfL,
+ 0x1fb08f0846fad151L } },
+ /* 7 << 14 */
+ { { 0x8a7cabf9842e9f30L,0xa331d4bf5eab83afL,0xd272cfba017f2a6aL,
+ 0x27560abc83aba0e3L },
+ { 0x94b833870e3a6b75L,0x25c6aea26b9f50f5L,0x803d691db5fdf6d0L,
+ 0x03b77509e6333514L } },
+ /* 8 << 14 */
+ { { 0x3617890361a341c1L,0x3604dc600cfd6142L,0x022295eb8533316cL,
+ 0x3dbde4ac44af2922L },
+ { 0x898afc5d1c7eef69L,0x58896805d14f4fa1L,0x05002160203c21caL,
+ 0x6f0d1f3040ef730bL } },
+ /* 9 << 14 */
+ { { 0x8e8c44d4196224f8L,0x75a4ab95374d079dL,0x79085ecc7d48f123L,
+ 0x56f04d311bf65ad8L },
+ { 0xe220bf1cbda602b2L,0x73ee1742f9612c69L,0x76008fc8084fd06bL,
+ 0x4000ef9ff11380d1L } },
+ /* 10 << 14 */
+ { { 0x48201b4b12cfe297L,0x3eee129c292f74e5L,0xe1fe114ec9e874e8L,
+ 0x899b055c92c5fc41L },
+ { 0x4e477a643a39c8cfL,0x82f09efe78963cc9L,0x6fd3fd8fd333f863L,
+ 0x85132b2adc949c63L } },
+ /* 11 << 14 */
+ { { 0x7e06a3ab516eb17bL,0x73bec06fd2c7372bL,0xe4f74f55ba896da6L,
+ 0xbb4afef88e9eb40fL },
+ { 0x2d75bec8e61d66b0L,0x02bda4b4ef29300bL,0x8bbaa8de026baa5aL,
+ 0xff54befda07f4440L } },
+ /* 12 << 14 */
+ { { 0xbd9b8b1dbe7a2af3L,0xec51caa94fb74a72L,0xb9937a4b63879697L,
+ 0x7c9a9d20ec2687d5L },
+ { 0x1773e44f6ef5f014L,0x8abcf412e90c6900L,0x387bd0228142161eL,
+ 0x50393755fcb6ff2aL } },
+ /* 13 << 14 */
+ { { 0x9813fd56ed6def63L,0x53cf64827d53106cL,0x991a35bd431f7ac1L,
+ 0xf1e274dd63e65fafL },
+ { 0xf63ffa3c44cc7880L,0x411a426b7c256981L,0xb698b9fd93a420e0L,
+ 0x89fdddc0ae53f8feL } },
+ /* 14 << 14 */
+ { { 0x766e072232398baaL,0x205fee425cfca031L,0xa49f53417a029cf2L,
+ 0xa88c68b84023890dL },
+ { 0xbc2750417337aaa8L,0x9ed364ad0eb384f4L,0xe0816f8529aba92fL,
+ 0x2e9e194104e38a88L } },
+ /* 15 << 14 */
+ { { 0x57eef44a3dafd2d5L,0x35d1fae597ed98d8L,0x50628c092307f9b1L,
+ 0x09d84aaed6cba5c6L },
+ { 0x67071bc788aaa691L,0x2dea57a9afe6cb03L,0xdfe11bb43d78ac01L,
+ 0x7286418c7fd7aa51L } },
+ /* 16 << 14 */
+ { { 0xfabf770977f7195aL,0x8ec86167adeb838fL,0xea1285a8bb4f012dL,
+ 0xd68835039a3eab3fL },
+ { 0xee5d24f8309004c2L,0xa96e4b7613ffe95eL,0x0cdffe12bd223ea4L,
+ 0x8f5c2ee5b6739a53L } },
+ /* 17 << 14 */
+ { { 0x5cb4aaa5dd968198L,0xfa131c5272413a6cL,0x53d46a909536d903L,
+ 0xb270f0d348606d8eL },
+ { 0x518c7564a053a3bcL,0x088254b71a86caefL,0xb3ba8cb40ab5efd0L,
+ 0x5c59900e4605945dL } },
+ /* 18 << 14 */
+ { { 0xecace1dda1887395L,0x40960f36932a65deL,0x9611ff5c3aa95529L,
+ 0xc58215b07c1e5a36L },
+ { 0xd48c9b58f0e1a524L,0xb406856bf590dfb8L,0xc7605e049cd95662L,
+ 0x0dd036eea33ecf82L } },
+ /* 19 << 14 */
+ { { 0xa50171acc33156b3L,0xf09d24ea4a80172eL,0x4e1f72c676dc8eefL,
+ 0xe60caadc5e3d44eeL },
+ { 0x006ef8a6979b1d8fL,0x60908a1c97788d26L,0x6e08f95b266feec0L,
+ 0x618427c222e8c94eL } },
+ /* 20 << 14 */
+ { { 0x3d61333959145a65L,0xcd9bc368fa406337L,0x82d11be32d8a52a0L,
+ 0xf6877b2797a1c590L },
+ { 0x837a819bf5cbdb25L,0x2a4fd1d8de090249L,0x622a7de774990e5fL,
+ 0x840fa5a07945511bL } },
+ /* 21 << 14 */
+ { { 0x30b974be6558842dL,0x70df8c6417f3d0a6L,0x7c8035207542e46dL,
+ 0x7251fe7fe4ecc823L },
+ { 0xe59134cb5e9aac9aL,0x11bb0934f0045d71L,0x53e5d9b5dbcb1d4eL,
+ 0x8d97a90592defc91L } },
+ /* 22 << 14 */
+ { { 0xfe2893277946d3f9L,0xe132bd2407472273L,0xeeeb510c1eb6ae86L,
+ 0x777708c5f0595067L },
+ { 0x18e2c8cd1297029eL,0x2c61095cbbf9305eL,0xe466c2586b85d6d9L,
+ 0x8ac06c36da1ea530L } },
+ /* 23 << 14 */
+ { { 0xa365dc39a1304668L,0xe4a9c88507f89606L,0x65a4898facc7228dL,
+ 0x3e2347ff84ca8303L },
+ { 0xa5f6fb77ea7d23a3L,0x2fac257d672a71cdL,0x6908bef87e6a44d3L,
+ 0x8ff87566891d3d7aL } },
+ /* 24 << 14 */
+ { { 0xe58e90b36b0cf82eL,0x6438d2462615b5e7L,0x07b1f8fc669c145aL,
+ 0xb0d8b2da36f1e1cbL },
+ { 0x54d5dadbd9184c4dL,0x3dbb18d5f93d9976L,0x0a3e0f56d1147d47L,
+ 0x2afa8c8da0a48609L } },
+ /* 25 << 14 */
+ { { 0x275353e8bc36742cL,0x898f427eeea0ed90L,0x26f4947e3e477b00L,
+ 0x8ad8848a308741e3L },
+ { 0x6c703c38d74a2a46L,0x5e3e05a99ba17ba2L,0xc1fa6f664ab9a9e4L,
+ 0x474a2d9a3841d6ecL } },
+ /* 26 << 14 */
+ { { 0x871239ad653ae326L,0x14bcf72aa74cbb43L,0x8737650e20d4c083L,
+ 0x3df86536110ed4afL },
+ { 0xd2d86fe7b53ca555L,0x688cb00dabd5d538L,0xcf81bda31ad38468L,
+ 0x7ccfe3ccf01167b6L } },
+ /* 27 << 14 */
+ { { 0xcf4f47e06c4c1fe6L,0x557e1f1a298bbb79L,0xf93b974f30d45a14L,
+ 0x174a1d2d0baf97c4L },
+ { 0x7a003b30c51fbf53L,0xd8940991ee68b225L,0x5b0aa7b71c0f4173L,
+ 0x975797c9a20a7153L } },
+ /* 28 << 14 */
+ { { 0x26e08c07e3533d77L,0xd7222e6a2e341c99L,0x9d60ec3d8d2dc4edL,
+ 0xbdfe0d8f7c476cf8L },
+ { 0x1fe59ab61d056605L,0xa9ea9df686a8551fL,0x8489941e47fb8d8cL,
+ 0xfeb874eb4a7f1b10L } },
+ /* 29 << 14 */
+ { { 0xfe5fea867ee0d98fL,0x201ad34bdbf61864L,0x45d8fe4737c031d4L,
+ 0xd5f49fae795f0822L },
+ { 0xdb0fb291c7f4a40cL,0x2e69d9c1730ddd92L,0x754e105449d76987L,
+ 0x8a24911d7662db87L } },
+ /* 30 << 14 */
+ { { 0x61fc181060a71676L,0xe852d1a8f66a8ad1L,0x172bbd656417231eL,
+ 0x0d6de7bd3babb11fL },
+ { 0x6fde6f88c8e347f8L,0x1c5875479bd99cc3L,0x78e54ed034076950L,
+ 0x97f0f334796e83baL } },
+ /* 31 << 14 */
+ { { 0xe4dbe1ce4924867aL,0xbd5f51b060b84917L,0x375300403cb09a79L,
+ 0xdb3fe0f8ff1743d8L },
+ { 0xed7894d8556fa9dbL,0xfa26216923412fbfL,0x563be0dbba7b9291L,
+ 0x6ca8b8c00c9fb234L } },
+ /* 32 << 14 */
+ { { 0xed406aa9bd763802L,0xc21486a065303da1L,0x61ae291ec7e62ec4L,
+ 0x622a0492df99333eL },
+ { 0x7fd80c9dbb7a8ee0L,0xdc2ed3bc6c01aedbL,0x35c35a1208be74ecL,
+ 0xd540cb1a469f671fL } },
+ /* 33 << 14 */
+ { { 0xd16ced4ecf84f6c7L,0x8561fb9c2d090f43L,0x7e693d796f239db4L,
+ 0xa736f92877bd0d94L },
+ { 0x07b4d9292c1950eeL,0xda17754356dc11b3L,0xa5dfbbaa7a6a878eL,
+ 0x1c70cb294decb08aL } },
+ /* 34 << 14 */
+ { { 0xfba28c8b6f0f7c50L,0xa8eba2b8854dcc6dL,0x5ff8e89a36b78642L,
+ 0x070c1c8ef6873adfL },
+ { 0xbbd3c3716484d2e4L,0xfb78318f0d414129L,0x2621a39c6ad93b0bL,
+ 0x979d74c2a9e917f7L } },
+ /* 35 << 14 */
+ { { 0xfc19564761fb0428L,0x4d78954abee624d4L,0xb94896e0b8ae86fdL,
+ 0x6667ac0cc91c8b13L },
+ { 0x9f18051243bcf832L,0xfbadf8b7a0010137L,0xc69b4089b3ba8aa7L,
+ 0xfac4bacde687ce85L } },
+ /* 36 << 14 */
+ { { 0x9164088d977eab40L,0x51f4c5b62760b390L,0xd238238f340dd553L,
+ 0x358566c3db1d31c9L },
+ { 0x3a5ad69e5068f5ffL,0xf31435fcdaff6b06L,0xae549a5bd6debff0L,
+ 0x59e5f0b775e01331L } },
+ /* 37 << 14 */
+ { { 0x5d492fb898559acfL,0x96018c2e4db79b50L,0x55f4a48f609f66aaL,
+ 0x1943b3af4900a14fL },
+ { 0xc22496df15a40d39L,0xb2a446844c20f7c5L,0x76a35afa3b98404cL,
+ 0xbec75725ff5d1b77L } },
+ /* 38 << 14 */
+ { { 0xb67aa163bea06444L,0x27e95bb2f724b6f2L,0x3c20e3e9d238c8abL,
+ 0x1213754eddd6ae17L },
+ { 0x8c431020716e0f74L,0x6679c82effc095c2L,0x2eb3adf4d0ac2932L,
+ 0x2cc970d301bb7a76L } },
+ /* 39 << 14 */
+ { { 0x70c71f2f740f0e66L,0x545c616b2b6b23ccL,0x4528cfcbb40a8bd7L,
+ 0xff8396332ab27722L },
+ { 0x049127d9025ac99aL,0xd314d4a02b63e33bL,0xc8c310e728d84519L,
+ 0x0fcb8983b3bc84baL } },
+ /* 40 << 14 */
+ { { 0x2cc5226138634818L,0x501814f4b44c2e0bL,0xf7e181aa54dfdba3L,
+ 0xcfd58ff0e759718cL },
+ { 0xf90cdb14d3b507a8L,0x57bd478ec50bdad8L,0x29c197e250e5f9aaL,
+ 0x4db6eef8e40bc855L } },
+ /* 41 << 14 */
+ { { 0x2cc8f21ad1fc0654L,0xc71cc96381269d73L,0xecfbb204077f49f9L,
+ 0xdde92571ca56b793L },
+ { 0x9abed6a3f97ad8f7L,0xe6c19d3f924de3bdL,0x8dce92f4a140a800L,
+ 0x85f44d1e1337af07L } },
+ /* 42 << 14 */
+ { { 0x5953c08b09d64c52L,0xa1b5e49ff5df9749L,0x336a8fb852735f7dL,
+ 0xb332b6db9add676bL },
+ { 0x558b88a0b4511aa4L,0x09788752dbd5cc55L,0x16b43b9cd8cd52bdL,
+ 0x7f0bc5a0c2a2696bL } },
+ /* 43 << 14 */
+ { { 0x146e12d4c11f61efL,0x9ce107543a83e79eL,0x08ec73d96cbfca15L,
+ 0x09ff29ad5b49653fL },
+ { 0xe31b72bde7da946eL,0xebf9eb3bee80a4f2L,0xd1aabd0817598ce4L,
+ 0x18b5fef453f37e80L } },
+ /* 44 << 14 */
+ { { 0xd5d5cdd35958cd79L,0x3580a1b51d373114L,0xa36e4c91fa935726L,
+ 0xa38c534def20d760L },
+ { 0x7088e40a2ff5845bL,0xe5bb40bdbd78177fL,0x4f06a7a8857f9920L,
+ 0xe3cc3e50e968f05dL } },
+ /* 45 << 14 */
+ { { 0x1d68b7fee5682d26L,0x5206f76faec7f87cL,0x41110530041951abL,
+ 0x58ec52c1d4b5a71aL },
+ { 0xf3488f990f75cf9aL,0xf411951fba82d0d5L,0x27ee75be618895abL,
+ 0xeae060d46d8aab14L } },
+ /* 46 << 14 */
+ { { 0x9ae1df737fb54dc2L,0x1f3e391b25963649L,0x242ec32afe055081L,
+ 0x5bd450ef8491c9bdL },
+ { 0x367efc67981eb389L,0xed7e19283a0550d5L,0x362e776bab3ce75cL,
+ 0xe890e3081f24c523L } },
+ /* 47 << 14 */
+ { { 0xb961b682feccef76L,0x8b8e11f58bba6d92L,0x8f2ccc4c2b2375c4L,
+ 0x0d7f7a52e2f86cfaL },
+ { 0xfd94d30a9efe5633L,0x2d8d246b5451f934L,0x2234c6e3244e6a00L,
+ 0xde2b5b0dddec8c50L } },
+ /* 48 << 14 */
+ { { 0x2ce53c5abf776f5bL,0x6f72407160357b05L,0xb259371771bf3f7aL,
+ 0x87d2501c440c4a9fL },
+ { 0x440552e187b05340L,0xb7bf7cc821624c32L,0x4155a6ce22facddbL,
+ 0x5a4228cb889837efL } },
+ /* 49 << 14 */
+ { { 0xef87d6d6fd4fd671L,0xa233687ec2daa10eL,0x7562224403c0eb96L,
+ 0x7632d1848bf19be6L },
+ { 0x05d0f8e940735ff4L,0x3a3e6e13c00931f1L,0x31ccde6adafe3f18L,
+ 0xf381366acfe51207L } },
+ /* 50 << 14 */
+ { { 0x24c222a960167d92L,0x62f9d6f87529f18cL,0x412397c00353b114L,
+ 0x334d89dcef808043L },
+ { 0xd9ec63ba2a4383ceL,0xcec8e9375cf92ba0L,0xfb8b4288c8be74c0L,
+ 0x67d6912f105d4391L } },
+ /* 51 << 14 */
+ { { 0x7b996c461b913149L,0x36aae2ef3a4e02daL,0xb68aa003972de594L,
+ 0x284ec70d4ec6d545L },
+ { 0xf3d2b2d061391d54L,0x69c5d5d6fe114e92L,0xbe0f00b5b4482dffL,
+ 0xe1596fa5f5bf33c5L } },
+ /* 52 << 14 */
+ { { 0x10595b5696a71cbaL,0x944938b2fdcadeb7L,0xa282da4cfccd8471L,
+ 0x98ec05f30d37bfe1L },
+ { 0xe171ce1b0698304aL,0x2d69144421bdf79bL,0xd0cd3b741b21dec1L,
+ 0x712ecd8b16a15f71L } },
+ /* 53 << 14 */
+ { { 0x8d4c00a700fd56e1L,0x02ec9692f9527c18L,0x21c449374a3e42e1L,
+ 0x9176fbab1392ae0aL },
+ { 0x8726f1ba44b7b618L,0xb4d7aae9f1de491cL,0xf91df7b907b582c0L,
+ 0x7e116c30ef60aa3aL } },
+ /* 54 << 14 */
+ { { 0x99270f81466265d7L,0xb15b6fe24df7adf0L,0xfe33b2d3f9738f7fL,
+ 0x48553ab9d6d70f95L },
+ { 0x2cc72ac8c21e94dbL,0x795ac38dbdc0bbeeL,0x0a1be4492e40478fL,
+ 0x81bd3394052bde55L } },
+ /* 55 << 14 */
+ { { 0x63c8dbe956b3c4f2L,0x017a99cf904177ccL,0x947bbddb4d010fc1L,
+ 0xacf9b00bbb2c9b21L },
+ { 0x2970bc8d47173611L,0x1a4cbe08ac7d756fL,0x06d9f4aa67d541a2L,
+ 0xa3e8b68959c2cf44L } },
+ /* 56 << 14 */
+ { { 0xaad066da4d88f1ddL,0xc604f1657ad35deaL,0x7edc07204478ca67L,
+ 0xa10dfae0ba02ce06L },
+ { 0xeceb1c76af36f4e4L,0x994b2292af3f8f48L,0xbf9ed77b77c8a68cL,
+ 0x74f544ea51744c9dL } },
+ /* 57 << 14 */
+ { { 0x82d05bb98113a757L,0x4ef2d2b48a9885e4L,0x1e332be51aa7865fL,
+ 0x22b76b18290d1a52L },
+ { 0x308a231044351683L,0x9d861896a3f22840L,0x5959ddcd841ed947L,
+ 0x0def0c94154b73bfL } },
+ /* 58 << 14 */
+ { { 0xf01054174c7c15e0L,0x539bfb023a277c32L,0xe699268ef9dccf5fL,
+ 0x9f5796a50247a3bdL },
+ { 0x8b839de84f157269L,0xc825c1e57a30196bL,0x6ef0aabcdc8a5a91L,
+ 0xf4a8ce6c498b7fe6L } },
+ /* 59 << 14 */
+ { { 0x1cce35a770cbac78L,0x83488e9bf6b23958L,0x0341a070d76cb011L,
+ 0xda6c9d06ae1b2658L },
+ { 0xb701fb30dd648c52L,0x994ca02c52fb9fd1L,0x069331176f563086L,
+ 0x3d2b810017856babL } },
+ /* 60 << 14 */
+ { { 0xe89f48c85963a46eL,0x658ab875a99e61c7L,0x6e296f874b8517b4L,
+ 0x36c4fcdcfc1bc656L },
+ { 0xde5227a1a3906defL,0x9fe95f5762418945L,0x20c91e81fdd96cdeL,
+ 0x5adbe47eda4480deL } },
+ /* 61 << 14 */
+ { { 0xa009370f396de2b6L,0x98583d4bf0ecc7bdL,0xf44f6b57e51d0672L,
+ 0x03d6b078556b1984L },
+ { 0x27dbdd93b0b64912L,0x9b3a343415687b09L,0x0dba646151ec20a9L,
+ 0xec93db7fff28187cL } },
+ /* 62 << 14 */
+ { { 0x00ff8c2466e48bddL,0x2514f2f911ccd78eL,0xeba11f4fe1250603L,
+ 0x8a22cd41243fa156L },
+ { 0xa4e58df4b283e4c6L,0x78c298598b39783fL,0x5235aee2a5259809L,
+ 0xc16284b50e0227ddL } },
+ /* 63 << 14 */
+ { { 0xa5f579161338830dL,0x6d4b8a6bd2123fcaL,0x236ea68af9c546f8L,
+ 0xc1d36873fa608d36L },
+ { 0xcd76e4958d436d13L,0xd4d9c2218fb080afL,0x665c1728e8ad3fb5L,
+ 0xcf1ebe4db3d572e0L } },
+ /* 64 << 14 */
+ { { 0xa7a8746a584c5e20L,0x267e4ea1b9dc7035L,0x593a15cfb9548c9bL,
+ 0x5e6e21354bd012f3L },
+ { 0xdf31cc6a8c8f936eL,0x8af84d04b5c241dcL,0x63990a6f345efb86L,
+ 0x6fef4e61b9b962cbL } },
+ /* 0 << 21 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 21 */
+ { { 0xf6368f0925722608L,0x131260db131cf5c6L,0x40eb353bfab4f7acL,
+ 0x85c7888037eee829L },
+ { 0x4c1581ffc3bdf24eL,0x5bff75cbf5c3c5a8L,0x35e8c83fa14e6f40L,
+ 0xb81d1c0f0295e0caL } },
+ /* 2 << 21 */
+ { { 0xfcde7cc8f43a730fL,0xe89b6f3c33ab590eL,0xc823f529ad03240bL,
+ 0x82b79afe98bea5dbL },
+ { 0x568f2856962fe5deL,0x0c590adb60c591f3L,0x1fc74a144a28a858L,
+ 0x3b662498b3203f4cL } },
+ /* 3 << 21 */
+ { { 0x91e3cf0d6c39765aL,0xa2db3acdac3cca0bL,0x288f2f08cb953b50L,
+ 0x2414582ccf43cf1aL },
+ { 0x8dec8bbc60eee9a8L,0x54c79f02729aa042L,0xd81cd5ec6532f5d5L,
+ 0xa672303acf82e15fL } },
+ /* 4 << 21 */
+ { { 0x376aafa8719c0563L,0xcd8ad2dcbc5fc79fL,0x303fdb9fcb750cd3L,
+ 0x14ff052f4418b08eL },
+ { 0xf75084cf3e2d6520L,0x7ebdf0f8144ed509L,0xf43bf0f2d3f25b98L,
+ 0x86ad71cfa354d837L } },
+ /* 5 << 21 */
+ { { 0xb827fe9226f43572L,0xdfd3ab5b5d824758L,0x315dd23a539094c1L,
+ 0x85c0e37a66623d68L },
+ { 0x575c79727be19ae0L,0x616a3396df0d36b5L,0xa1ebb3c826b1ff7eL,
+ 0x635b9485140ad453L } },
+ /* 6 << 21 */
+ { { 0x92bf3cdada430c0bL,0x4702850e3a96dac6L,0xc91cf0a515ac326aL,
+ 0x95de4f49ab8c25e4L },
+ { 0xb01bad09e265c17cL,0x24e45464087b3881L,0xd43e583ce1fac5caL,
+ 0xe17cb3186ead97a6L } },
+ /* 7 << 21 */
+ { { 0x6cc3924374dcec46L,0x33cfc02d54c2b73fL,0x82917844f26cd99cL,
+ 0x8819dd95d1773f89L },
+ { 0x09572aa60871f427L,0x8e0cf365f6f01c34L,0x7fa52988bff1f5afL,
+ 0x4eb357eae75e8e50L } },
+ /* 8 << 21 */
+ { { 0xd9d0c8c4868af75dL,0xd7325cff45c8c7eaL,0xab471996cc81ecb0L,
+ 0xff5d55f3611824edL },
+ { 0xbe3145411977a0eeL,0x5085c4c5722038c6L,0x2d5335bff94bb495L,
+ 0x894ad8a6c8e2a082L } },
+ /* 9 << 21 */
+ { { 0x5c3e2341ada35438L,0xf4a9fc89049b8c4eL,0xbeeb355a9f17cf34L,
+ 0x3f311e0e6c91fe10L },
+ { 0xc2d2003892ab9891L,0x257bdcc13e8ce9a9L,0x1b2d978988c53beeL,
+ 0x927ce89acdba143aL } },
+ /* 10 << 21 */
+ { { 0xb0a32cca523db280L,0x5c889f8a50d43783L,0x503e04b34897d16fL,
+ 0x8cdb6e7808f5f2e8L },
+ { 0x6ab91cf0179c8e74L,0xd8874e5248211d60L,0xf948d4d5ea851200L,
+ 0x4076d41ee6f9840aL } },
+ /* 11 << 21 */
+ { { 0xc20e263c47b517eaL,0x79a448fd30685e5eL,0xe55f6f78f90631a0L,
+ 0x88a790b1a79e6346L },
+ { 0x62160c7d80969fe8L,0x54f92fd441491bb9L,0xa6645c235c957526L,
+ 0xf44cc5aebea3ce7bL } },
+ /* 12 << 21 */
+ { { 0xf76283278b1e68b7L,0xc731ad7a303f29d3L,0xfe5a9ca957d03ecbL,
+ 0x96c0d50c41bc97a7L },
+ { 0xc4669fe79b4f7f24L,0xfdd781d83d9967efL,0x7892c7c35d2c208dL,
+ 0x8bf64f7cae545cb3L } },
+ /* 13 << 21 */
+ { { 0xc01f862c467be912L,0xf4c85ee9c73d30ccL,0x1fa6f4be6ab83ec7L,
+ 0xa07a3c1c4e3e3cf9L },
+ { 0x87f8ef450c00beb3L,0x30e2c2b3000d4c3eL,0x1aa00b94fe08bf5bL,
+ 0x32c133aa9224ef52L } },
+ /* 14 << 21 */
+ { { 0x38df16bb32e5685dL,0x68a9e06958e6f544L,0x495aaff7cdc5ebc6L,
+ 0xf894a645378b135fL },
+ { 0xf316350a09e27ecfL,0xeced201e58f7179dL,0x2eec273ce97861baL,
+ 0x47ec2caed693be2eL } },
+ /* 15 << 21 */
+ { { 0xfa4c97c4f68367ceL,0xe4f47d0bbe5a5755L,0x17de815db298a979L,
+ 0xd7eca659c177dc7dL },
+ { 0x20fdbb7149ded0a3L,0x4cb2aad4fb34d3c5L,0x2cf31d2860858a33L,
+ 0x3b6873efa24aa40fL } },
+ /* 16 << 21 */
+ { { 0x540234b22c11bb37L,0x2d0366dded4c74a3L,0xf9a968daeec5f25dL,
+ 0x3660106867b63142L },
+ { 0x07cd6d2c68d7b6d4L,0xa8f74f090c842942L,0xe27514047768b1eeL,
+ 0x4b5f7e89fe62aee4L } },
+ /* 17 << 21 */
+ { { 0xc6a7717789070d26L,0xa1f28e4edd1c8bc7L,0xea5f4f06469e1f17L,
+ 0x78fc242afbdb78e0L },
+ { 0xc9c7c5928b0588f1L,0xb6b7a0fd1535921eL,0xcc5bdb91bde5ae35L,
+ 0xb42c485e12ff1864L } },
+ /* 18 << 21 */
+ { { 0xa1113e13dbab98aaL,0xde9d469ba17b1024L,0x23f48b37c0462d3aL,
+ 0x3752e5377c5c078dL },
+ { 0xe3a86add15544eb9L,0xf013aea780fba279L,0x8b5bb76cf22001b5L,
+ 0xe617ba14f02891abL } },
+ /* 19 << 21 */
+ { { 0xd39182a6936219d3L,0x5ce1f194ae51cb19L,0xc78f8598bf07a74cL,
+ 0x6d7158f222cbf1bcL },
+ { 0x3b846b21e300ce18L,0x35fba6302d11275dL,0x5fe25c36a0239b9bL,
+ 0xd8beb35ddf05d940L } },
+ /* 20 << 21 */
+ { { 0x4db02bb01f7e320dL,0x0641c3646da320eaL,0x6d95fa5d821389a3L,
+ 0x926997488fcd8e3dL },
+ { 0x316fef17ceb6c143L,0x67fcb841d933762bL,0xbb837e35118b17f8L,
+ 0x4b92552f9fd24821L } },
+ /* 21 << 21 */
+ { { 0xae6bc70e46aca793L,0x1cf0b0e4e579311bL,0x8dc631be5802f716L,
+ 0x099bdc6fbddbee4dL },
+ { 0xcc352bb20caf8b05L,0xf74d505a72d63df2L,0xb9876d4b91c4f408L,
+ 0x1ce184739e229b2dL } },
+ /* 22 << 21 */
+ { { 0x4950759783abdb4aL,0x850fbcb6dee84b18L,0x6325236e609e67dcL,
+ 0x04d831d99336c6d8L },
+ { 0x8deaae3bfa12d45dL,0xe425f8ce4746e246L,0x8004c17524f5f31eL,
+ 0xaca16d8fad62c3b7L } },
+ /* 23 << 21 */
+ { { 0x0dc15a6a9152f934L,0xf1235e5ded0e12c1L,0xc33c06ecda477dacL,
+ 0x76be8732b2ea0006L },
+ { 0xcf3f78310c0cd313L,0x3c524553a614260dL,0x31a756f8cab22d15L,
+ 0x03ee10d177827a20L } },
+ /* 24 << 21 */
+ { { 0xd1e059b21994ef20L,0x2a653b69638ae318L,0x70d5eb582f699010L,
+ 0x279739f709f5f84aL },
+ { 0x5da4663c8b799336L,0xfdfdf14d203c37ebL,0x32d8a9dca1dbfb2dL,
+ 0xab40cff077d48f9bL } },
+ /* 25 << 21 */
+ { { 0xc018b383d20b42d5L,0xf9a810ef9f78845fL,0x40af3753bdba9df0L,
+ 0xb90bdcfc131dfdf9L },
+ { 0x18720591f01ab782L,0xc823f2116af12a88L,0xa51b80f30dc14401L,
+ 0xde248f77fb2dfbe3L } },
+ /* 26 << 21 */
+ { { 0xef5a44e50cafe751L,0x73997c9cd4dcd221L,0x32fd86d1de854024L,
+ 0xd5b53adca09b84bbL },
+ { 0x008d7a11dcedd8d1L,0x406bd1c874b32c84L,0x5d4472ff05dde8b1L,
+ 0x2e25f2cdfce2b32fL } },
+ /* 27 << 21 */
+ { { 0xbec0dd5e29dfc254L,0x4455fcf62b98b267L,0x0b4d43a5c72df2adL,
+ 0xea70e6be48a75397L },
+ { 0x2aad61695820f3bfL,0xf410d2dd9e37f68fL,0x70fb7dba7be5ac83L,
+ 0x636bb64536ec3eecL } },
+ /* 28 << 21 */
+ { { 0x27104ea39754e21cL,0xbc87a3e68d63c373L,0x483351d74109db9aL,
+ 0x0fa724e360134da7L },
+ { 0x9ff44c29b0720b16L,0x2dd0cf1306aceeadL,0x5942758ce26929a6L,
+ 0x96c5db92b766a92bL } },
+ /* 29 << 21 */
+ { { 0xcec7d4c05f18395eL,0xd3f227441f80d032L,0x7a68b37acb86075bL,
+ 0x074764ddafef92dbL },
+ { 0xded1e9507bc7f389L,0xc580c850b9756460L,0xaeeec2a47da48157L,
+ 0x3f0b4e7f82c587b3L } },
+ /* 30 << 21 */
+ { { 0x231c6de8a9f19c53L,0x5717bd736974e34eL,0xd9e1d216f1508fa9L,
+ 0x9f112361dadaa124L },
+ { 0x80145e31823b7348L,0x4dd8f0d5ac634069L,0xe3d82fc72297c258L,
+ 0x276fcfee9cee7431L } },
+ /* 31 << 21 */
+ { { 0x8eb61b5e2bc0aea9L,0x4f668fd5de329431L,0x03a32ab138e4b87eL,
+ 0xe137451773d0ef0bL },
+ { 0x1a46f7e6853ac983L,0xc3bdf42e68e78a57L,0xacf207852ea96dd1L,
+ 0xa10649b9f1638460L } },
+ /* 32 << 21 */
+ { { 0xf2369f0b879fbbedL,0x0ff0ae86da9d1869L,0x5251d75956766f45L,
+ 0x4984d8c02be8d0fcL },
+ { 0x7ecc95a6d21008f0L,0x29bd54a03a1a1c49L,0xab9828c5d26c50f3L,
+ 0x32c0087c51d0d251L } },
+ /* 33 << 21 */
+ { { 0x9bac3ce60c1cdb26L,0xcd94d947557ca205L,0x1b1bd5989db1fdcdL,
+ 0x0eda0108a3d8b149L },
+ { 0x9506661056152fccL,0xc2f037e6e7192b33L,0xdeffb41ac92e05a4L,
+ 0x1105f6c2c2f6c62eL } },
+ /* 34 << 21 */
+ { { 0x68e735008733913cL,0xcce861633f3adc40L,0xf407a94238a278e9L,
+ 0xd13c1b9d2ab21292L },
+ { 0x93ed7ec71c74cf5cL,0x8887dc48f1a4c1b4L,0x3830ff304b3a11f1L,
+ 0x358c5a3c58937cb6L } },
+ /* 35 << 21 */
+ { { 0x027dc40489022829L,0x40e939773b798f79L,0x90ad333738be6eadL,
+ 0x9c23f6bcf34c0a5dL },
+ { 0xd1711a35fbffd8bbL,0x60fcfb491949d3ddL,0x09c8ef4b7825d93aL,
+ 0x24233cffa0a8c968L } },
+ /* 36 << 21 */
+ { { 0x67ade46ce6d982afL,0xebb6bf3ee7544d7cL,0xd6b9ba763d8bd087L,
+ 0x46fe382d4dc61280L },
+ { 0xbd39a7e8b5bdbd75L,0xab381331b8f228feL,0x0709a77cce1c4300L,
+ 0x6a247e56f337ceacL } },
+ /* 37 << 21 */
+ { { 0x8f34f21b636288beL,0x9dfdca74c8a7c305L,0x6decfd1bea919e04L,
+ 0xcdf2688d8e1991f8L },
+ { 0xe607df44d0f8a67eL,0xd985df4b0b58d010L,0x57f834c50c24f8f4L,
+ 0xe976ef56a0bf01aeL } },
+ /* 38 << 21 */
+ { { 0x536395aca1c32373L,0x351027aa734c0a13L,0xd2f1b5d65e6bd5bcL,
+ 0x2b539e24223debedL },
+ { 0xd4994cec0eaa1d71L,0x2a83381d661dcf65L,0x5f1aed2f7b54c740L,
+ 0x0bea3fa5d6dda5eeL } },
+ /* 39 << 21 */
+ { { 0x9d4fb68436cc6134L,0x8eb9bbf3c0a443ddL,0xfc500e2e383b7d2aL,
+ 0x7aad621c5b775257L },
+ { 0x69284d740a8f7cc0L,0xe820c2ce07562d65L,0xbf9531b9499758eeL,
+ 0x73e95ca56ee0cc2dL } },
+ /* 40 << 21 */
+ { { 0xf61790abfbaf50a5L,0xdf55e76b684e0750L,0xec516da7f176b005L,
+ 0x575553bb7a2dddc7L },
+ { 0x37c87ca3553afa73L,0x315f3ffc4d55c251L,0xe846442aaf3e5d35L,
+ 0x61b911496495ff28L } },
+ /* 41 << 21 */
+ { { 0x23cc95d3fa326dc3L,0x1df4da1f18fc2ceaL,0x24bf9adcd0a37d59L,
+ 0xb6710053320d6e1eL },
+ { 0x96f9667e618344d1L,0xcc7ce042a06445afL,0xa02d8514d68dbc3aL,
+ 0x4ea109e4280b5a5bL } },
+ /* 42 << 21 */
+ { { 0x5741a7acb40961bfL,0x4ada59376aa56bfaL,0x7feb914502b765d1L,
+ 0x561e97bee6ad1582L },
+ { 0xbbc4a5b6da3982f5L,0x0c2659edb546f468L,0xb8e7e6aa59612d20L,
+ 0xd83dfe20ac19e8e0L } },
+ /* 43 << 21 */
+ { { 0x8530c45fb835398cL,0x6106a8bfb38a41c2L,0x21e8f9a635f5dcdbL,
+ 0x39707137cae498edL },
+ { 0x70c23834d8249f00L,0x9f14b58fab2537a0L,0xd043c3655f61c0c2L,
+ 0xdc5926d609a194a7L } },
+ /* 44 << 21 */
+ { { 0xddec03398e77738aL,0xd07a63effba46426L,0x2e58e79cee7f6e86L,
+ 0xe59b0459ff32d241L },
+ { 0xc5ec84e520fa0338L,0x97939ac8eaff5aceL,0x0310a4e3b4a38313L,
+ 0x9115fba28f9d9885L } },
+ /* 45 << 21 */
+ { { 0x8dd710c25fadf8c3L,0x66be38a2ce19c0e2L,0xd42a279c4cfe5022L,
+ 0x597bb5300e24e1b8L },
+ { 0x3cde86b7c153ca7fL,0xa8d30fb3707d63bdL,0xac905f92bd60d21eL,
+ 0x98e7ffb67b9a54abL } },
+ /* 46 << 21 */
+ { { 0xd7147df8e9726a30L,0xb5e216ffafce3533L,0xb550b7992ff1ec40L,
+ 0x6b613b87a1e953fdL },
+ { 0x87b88dba792d5610L,0x2ee1270aa190fbe1L,0x02f4e2dc2ef581daL,
+ 0x016530e4eff82a95L } },
+ /* 47 << 21 */
+ { { 0xcbb93dfd8fd6ee89L,0x16d3d98646848fffL,0x600eff241da47adfL,
+ 0x1b9754a00ad47a71L },
+ { 0x8f9266df70c33b98L,0xaadc87aedf34186eL,0x0d2ce8e14ad24132L,
+ 0x8a47cbfc19946ebaL } },
+ /* 48 << 21 */
+ { { 0x47feeb6662b5f3afL,0xcefab5610abb3734L,0x449de60e19f35cb1L,
+ 0x39f8db14157f0eb9L },
+ { 0xffaecc5b3c61bfd6L,0xa5a4d41d41216703L,0x7f8fabed224e1cc2L,
+ 0x0d5a8186871ad953L } },
+ /* 49 << 21 */
+ { { 0xf10774f7d22da9a9L,0x45b8a678cc8a9b0dL,0xd9c2e722bdc32cffL,
+ 0xbf71b5f5337202a5L },
+ { 0x95c57f2f69fc4db9L,0xb6dad34c765d01e1L,0x7e0bd13fcb904635L,
+ 0x61751253763a588cL } },
+ /* 50 << 21 */
+ { { 0xd85c299781af2c2dL,0xc0f7d9c481b9d7daL,0x838a34ae08533e8dL,
+ 0x15c4cb08311d8311L },
+ { 0x97f832858e121e14L,0xeea7dc1e85000a5fL,0x0c6059b65d256274L,
+ 0xec9beaceb95075c0L } },
+ /* 51 << 21 */
+ { { 0x173daad71df97828L,0xbf851cb5a8937877L,0xb083c59401646f3cL,
+ 0x3bad30cf50c6d352L },
+ { 0xfeb2b202496bbceaL,0x3cf9fd4f18a1e8baL,0xd26de7ff1c066029L,
+ 0x39c81e9e4e9ed4f8L } },
+ /* 52 << 21 */
+ { { 0xd8be0cb97b390d35L,0x01df2bbd964aab27L,0x3e8c1a65c3ef64f8L,
+ 0x567291d1716ed1ddL },
+ { 0x95499c6c5f5406d3L,0x71fdda395ba8e23fL,0xcfeb320ed5096eceL,
+ 0xbe7ba92bca66dd16L } },
+ /* 53 << 21 */
+ { { 0x4608d36bc6fb5a7dL,0xe3eea15a6d2dd0e0L,0x75b0a3eb8f97a36aL,
+ 0xf59814cc1c83de1eL },
+ { 0x56c9c5b01c33c23fL,0xa96c1da46faa4136L,0x46bf2074de316551L,
+ 0x3b866e7b1f756c8fL } },
+ /* 54 << 21 */
+ { { 0x727727d81495ed6bL,0xb2394243b682dce7L,0x8ab8454e758610f3L,
+ 0xc243ce84857d72a4L },
+ { 0x7b320d71dbbf370fL,0xff9afa3778e0f7caL,0x0119d1e0ea7b523fL,
+ 0xb997f8cb058c7d42L } },
+ /* 55 << 21 */
+ { { 0x285bcd2a37bbb184L,0x51dcec49a45d1fa6L,0x6ade3b64e29634cbL,
+ 0x080c94a726b86ef1L },
+ { 0xba583db12283fbe3L,0x902bddc85a9315edL,0x07c1ccb386964becL,
+ 0x78f4eacfb6258301L } },
+ /* 56 << 21 */
+ { { 0x4bdf3a4956f90823L,0xba0f5080741d777bL,0x091d71c3f38bf760L,
+ 0x9633d50f9b625b02L },
+ { 0x03ecb743b8c9de61L,0xb47512545de74720L,0x9f9defc974ce1cb2L,
+ 0x774a4f6a00bd32efL } },
+ /* 57 << 21 */
+ { { 0xaca385f773848f22L,0x53dad716f3f8558eL,0xab7b34b093c471f9L,
+ 0xf530e06919644bc7L },
+ { 0x3d9fb1ffdd59d31aL,0x4382e0df08daa795L,0x165c6f4bd5cc88d7L,
+ 0xeaa392d54a18c900L } },
+ /* 58 << 21 */
+ { { 0x94203c67648024eeL,0x188763f28c2fabcdL,0xa80f87acbbaec835L,
+ 0x632c96e0f29d8d54L },
+ { 0x29b0a60e4c00a95eL,0x2ef17f40e011e9faL,0xf6c0e1d115b77223L,
+ 0xaaec2c6214b04e32L } },
+ /* 59 << 21 */
+ { { 0xd35688d83d84e58cL,0x2af5094c958571dbL,0x4fff7e19760682a6L,
+ 0x4cb27077e39a407cL },
+ { 0x0f59c5474ff0e321L,0x169f34a61b34c8ffL,0x2bff109652bc1ba7L,
+ 0xa25423b783583544L } },
+ /* 60 << 21 */
+ { { 0x5d55d5d50ac8b782L,0xff6622ec2db3c892L,0x48fce7416b8bb642L,
+ 0x31d6998c69d7e3dcL },
+ { 0xdbaf8004cadcaed0L,0x801b0142d81d053cL,0x94b189fc59630ec6L,
+ 0x120e9934af762c8eL } },
+ /* 61 << 21 */
+ { { 0x53a29aa4fdc6a404L,0x19d8e01ea1909948L,0x3cfcabf1d7e89681L,
+ 0x3321a50d4e132d37L },
+ { 0xd0496863e9a86111L,0x8c0cde6106a3bc65L,0xaf866c49fc9f8eefL,
+ 0x2066350eff7f5141L } },
+ /* 62 << 21 */
+ { { 0x4f8a4689e56ddfbdL,0xea1b0c07fe32983aL,0x2b317462873cb8cbL,
+ 0x658deddc2d93229fL },
+ { 0x65efaf4d0f64ef58L,0xfe43287d730cc7a8L,0xaebc0c723d047d70L,
+ 0x92efa539d92d26c9L } },
+ /* 63 << 21 */
+ { { 0x06e7845794b56526L,0x415cb80f0961002dL,0x89e5c56576dcb10fL,
+ 0x8bbb6982ff9259feL },
+ { 0x4fe8795b9abc2668L,0xb5d4f5341e678fb1L,0x6601f3be7b7da2b9L,
+ 0x98da59e2a13d6805L } },
+ /* 64 << 21 */
+ { { 0x190d8ea601799a52L,0xa20cec41b86d2952L,0x3062ffb27fff2a7cL,
+ 0x741b32e579f19d37L },
+ { 0xf80d81814eb57d47L,0x7a2d0ed416aef06bL,0x09735fb01cecb588L,
+ 0x1641caaac6061f5bL } },
+ /* 0 << 28 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 28 */
+ { { 0x7f99824f20151427L,0x206828b692430206L,0xaa9097d7e1112357L,
+ 0xacf9a2f209e414ecL },
+ { 0xdbdac9da27915356L,0x7e0734b7001efee3L,0x54fab5bbd2b288e2L,
+ 0x4c630fc4f62dd09cL } },
+ /* 2 << 28 */
+ { { 0x8537107a1ac2703bL,0xb49258d86bc857b5L,0x57df14debcdaccd1L,
+ 0x24ab68d7c4ae8529L },
+ { 0x7ed8b5d4734e59d0L,0x5f8740c8c495cc80L,0x84aedd5a291db9b3L,
+ 0x80b360f84fb995beL } },
+ /* 3 << 28 */
+ { { 0xae915f5d5fa067d1L,0x4134b57f9668960cL,0xbd3656d6a48edaacL,
+ 0xdac1e3e4fc1d7436L },
+ { 0x674ff869d81fbb26L,0x449ed3ecb26c33d4L,0x85138705d94203e8L,
+ 0xccde538bbeeb6f4aL } },
+ /* 4 << 28 */
+ { { 0x55d5c68da61a76faL,0x598b441dca1554dcL,0xd39923b9773b279cL,
+ 0x33331d3c36bf9efcL },
+ { 0x2d4c848e298de399L,0xcfdb8e77a1a27f56L,0x94c855ea57b8ab70L,
+ 0xdcdb9dae6f7879baL } },
+ /* 5 << 28 */
+ { { 0x7bdff8c2019f2a59L,0xb3ce5bb3cb4fbc74L,0xea907f688a9173ddL,
+ 0x6cd3d0d395a75439L },
+ { 0x92ecc4d6efed021cL,0x09a9f9b06a77339aL,0x87ca6b157188c64aL,
+ 0x10c2996844899158L } },
+ /* 6 << 28 */
+ { { 0x5859a229ed6e82efL,0x16f338e365ebaf4eL,0x0cd313875ead67aeL,
+ 0x1c73d22854ef0bb4L },
+ { 0x4cb5513174a5c8c7L,0x01cd29707f69ad6aL,0xa04d00dde966f87eL,
+ 0xd96fe4470b7b0321L } },
+ /* 7 << 28 */
+ { { 0x342ac06e88fbd381L,0x02cd4a845c35a493L,0xe8fa89de54f1bbcdL,
+ 0x341d63672575ed4cL },
+ { 0xebe357fbd238202bL,0x600b4d1aa984ead9L,0xc35c9f4452436ea0L,
+ 0x96fe0a39a370751bL } },
+ /* 8 << 28 */
+ { { 0x4c4f07367f636a38L,0x9f943fb70e76d5cbL,0xb03510baa8b68b8bL,
+ 0xc246780a9ed07a1fL },
+ { 0x3c0514156d549fc2L,0xc2953f31607781caL,0x955e2c69d8d95413L,
+ 0xb300fadc7bd282e3L } },
+ /* 9 << 28 */
+ { { 0x81fe7b5087e9189fL,0xdb17375cf42dda27L,0x22f7d896cf0a5904L,
+ 0xa0e57c5aebe348e6L },
+ { 0xa61011d3f40e3c80L,0xb11893218db705c5L,0x4ed9309e50fedec3L,
+ 0xdcf14a104d6d5c1dL } },
+ /* 10 << 28 */
+ { { 0x056c265b55691342L,0xe8e0850491049dc7L,0x131329f5c9bae20aL,
+ 0x96c8b3e8d9dccdb4L },
+ { 0x8c5ff838fb4ee6b4L,0xfc5a9aeb41e8ccf0L,0x7417b764fae050c6L,
+ 0x0953c3d700452080L } },
+ /* 11 << 28 */
+ { { 0x2137268238dfe7e8L,0xea417e152bb79d4bL,0x59641f1c76e7cf2dL,
+ 0x271e3059ea0bcfccL },
+ { 0x624c7dfd7253ecbdL,0x2f552e254fca6186L,0xcbf84ecd4d866e9cL,
+ 0x73967709f68d4610L } },
+ /* 12 << 28 */
+ { { 0xa14b1163c27901b4L,0xfd9236e0899b8bf3L,0x42b091eccbc6da0aL,
+ 0xbb1dac6f5ad1d297L },
+ { 0x80e61d53a91cf76eL,0x4110a412d31f1ee7L,0x2d87c3ba13efcf77L,
+ 0x1f374bb4df450d76L } },
+ /* 13 << 28 */
+ { { 0x5e78e2f20d188dabL,0xe3968ed0f4b885efL,0x46c0568e7314570fL,
+ 0x3161633801170521L },
+ { 0x18e1e7e24f0c8afeL,0x4caa75ffdeea78daL,0x82db67f27c5d8a51L,
+ 0x36a44d866f505370L } },
+ /* 14 << 28 */
+ { { 0xd72c5bda0333974fL,0x5db516ae27a70146L,0x34705281210ef921L,
+ 0xbff17a8f0c9c38e5L },
+ { 0x78f4814e12476da1L,0xc1e1661333c16980L,0x9e5b386f424d4bcaL,
+ 0x4c274e87c85740deL } },
+ /* 15 << 28 */
+ { { 0xb6a9b88d6c2f5226L,0x14d1b944550d7ca8L,0x580c85fc1fc41709L,
+ 0xc1da368b54c6d519L },
+ { 0x2b0785ced5113cf7L,0x0670f6335a34708fL,0x46e2376715cc3f88L,
+ 0x1b480cfa50c72c8fL } },
+ /* 16 << 28 */
+ { { 0x202886024147519aL,0xd0981eac26b372f0L,0xa9d4a7caa785ebc8L,
+ 0xd953c50ddbdf58e9L },
+ { 0x9d6361ccfd590f8fL,0x72e9626b44e6c917L,0x7fd9611022eb64cfL,
+ 0x863ebb7e9eb288f3L } },
+ /* 17 << 28 */
+ { { 0x6e6ab7616aca8ee7L,0x97d10b39d7b40358L,0x1687d3771e5feb0dL,
+ 0xc83e50e48265a27aL },
+ { 0x8f75a9fec954b313L,0xcc2e8f47310d1f61L,0xf5ba81c56557d0e0L,
+ 0x25f9680c3eaf6207L } },
+ /* 18 << 28 */
+ { { 0xf95c66094354080bL,0x5225bfa57bf2fe1cL,0xc5c004e25c7d98faL,
+ 0x3561bf1c019aaf60L },
+ { 0x5e6f9f17ba151474L,0xdec2f934b04f6ecaL,0x64e368a1269acb1eL,
+ 0x1332d9e40cdda493L } },
+ /* 19 << 28 */
+ { { 0x60d6cf69df23de05L,0x66d17da2009339a0L,0x9fcac9850a693923L,
+ 0xbcf057fced7c6a6dL },
+ { 0xc3c5c8c5f0b5662cL,0x25318dd8dcba4f24L,0x60e8cb75082b69ffL,
+ 0x7c23b3ee1e728c01L } },
+ /* 20 << 28 */
+ { { 0x15e10a0a097e4403L,0xcb3d0a8619854665L,0x88d8e211d67d4826L,
+ 0xb39af66e0b9d2839L },
+ { 0xa5f94588bd475ca8L,0xe06b7966c077b80bL,0xfedb1485da27c26cL,
+ 0xd290d33afe0fd5e0L } },
+ /* 21 << 28 */
+ { { 0xa40bcc47f34fb0faL,0xb4760cc81fb1ab09L,0x8fca0993a273bfe3L,
+ 0x13e4fe07f70b213cL },
+ { 0x3bcdb992fdb05163L,0x8c484b110c2b19b6L,0x1acb815faaf2e3e2L,
+ 0xc6905935b89ff1b4L } },
+ /* 22 << 28 */
+ { { 0xb2ad6f9d586e74e1L,0x488883ad67b80484L,0x758aa2c7369c3ddbL,
+ 0x8ab74e699f9afd31L },
+ { 0x10fc2d285e21beb1L,0x3484518a318c42f9L,0x377427dc53cf40c3L,
+ 0x9de0781a391bc1d9L } },
+ /* 23 << 28 */
+ { { 0x8faee858693807e1L,0xa38653274e81ccc7L,0x02c30ff26f835b84L,
+ 0xb604437b0d3d38d4L },
+ { 0xb3fc8a985ca1823dL,0xb82f7ec903be0324L,0xee36d761cf684a33L,
+ 0x5a01df0e9f29bf7dL } },
+ /* 24 << 28 */
+ { { 0x686202f31306583dL,0x05b10da0437c622eL,0xbf9aaa0f076a7bc8L,
+ 0x25e94efb8f8f4e43L },
+ { 0x8a35c9b7fa3dc26dL,0xe0e5fb9396ff03c5L,0xa77e3843ebc394ceL,
+ 0xcede65958361de60L } },
+ /* 25 << 28 */
+ { { 0xd27c22f6a1993545L,0xab01cc3624d671baL,0x63fa2877a169c28eL,
+ 0x925ef9042eb08376L },
+ { 0x3b2fa3cf53aa0b32L,0xb27beb5b71c49d7aL,0xb60e1834d105e27fL,
+ 0xd60897884f68570dL } },
+ /* 26 << 28 */
+ { { 0x23094ce0d6fbc2acL,0x738037a1815ff551L,0xda73b1bb6bef119cL,
+ 0xdcf6c430eef506baL },
+ { 0x00e4fe7be3ef104aL,0xebdd9a2c0a065628L,0x853a81c38792043eL,
+ 0x22ad6eceb3b59108L } },
+ /* 27 << 28 */
+ { { 0x9fb813c039cd297dL,0x8ec7e16e05bda5d9L,0x2834797c0d104b96L,
+ 0xcc11a2e77c511510L },
+ { 0x96ca5a5396ee6380L,0x054c8655cea38742L,0xb5946852d54dfa7dL,
+ 0x97c422e71f4ab207L } },
+ /* 28 << 28 */
+ { { 0xbf9075090c22b540L,0x2cde42aab7c267d4L,0xba18f9ed5ab0d693L,
+ 0x3ba62aa66e4660d9L },
+ { 0xb24bf97bab9ea96aL,0x5d039642e3b60e32L,0x4e6a45067c4d9bd5L,
+ 0x666c5b9e7ed4a6a4L } },
+ /* 29 << 28 */
+ { { 0xfa3fdcd98edbd7ccL,0x4660bb87c6ccd753L,0x9ae9082021e6b64fL,
+ 0x8a56a713b36bfb3fL },
+ { 0xabfce0965726d47fL,0x9eed01b20b1a9a7fL,0x30e9cad44eb74a37L,
+ 0x7b2524cc53e9666dL } },
+ /* 30 << 28 */
+ { { 0x6a29683b8f4b002fL,0xc2200d7a41f4fc20L,0xcf3af47a3a338accL,
+ 0x6539a4fbe7128975L },
+ { 0xcec31c14c33c7fcfL,0x7eb6799bc7be322bL,0x119ef4e96646f623L,
+ 0x7b7a26a554d7299bL } },
+ /* 31 << 28 */
+ { { 0xcb37f08d403f46f2L,0x94b8fc431a0ec0c7L,0xbb8514e3c332142fL,
+ 0xf3ed2c33e80d2a7aL },
+ { 0x8d2080afb639126cL,0xf7b6be60e3553adeL,0x3950aa9f1c7e2b09L,
+ 0x847ff9586410f02bL } },
+ /* 32 << 28 */
+ { { 0x877b7cf5678a31b0L,0xd50301ae3998b620L,0x734257c5c00fb396L,
+ 0xf9fb18a004e672a6L },
+ { 0xff8bd8ebe8758851L,0x1e64e4c65d99ba44L,0x4b8eaedf7dfd93b7L,
+ 0xba2f2a9804e76b8cL } },
+ /* 33 << 28 */
+ { { 0x7d790cbae8053433L,0xc8e725a03d2c9585L,0x58c5c476cdd8f5edL,
+ 0xd106b952efa9fe1dL },
+ { 0x3c5c775b0eff13a9L,0x242442bae057b930L,0xe9f458d4c9b70cbdL,
+ 0x69b71448a3cdb89aL } },
+ /* 34 << 28 */
+ { { 0x41ee46f60e2ed742L,0x573f104540067493L,0xb1e154ff9d54c304L,
+ 0x2ad0436a8d3a7502L },
+ { 0xee4aaa2d431a8121L,0xcd38b3ab886f11edL,0x57d49ea6034a0eb7L,
+ 0xd2b773bdf7e85e58L } },
+ /* 35 << 28 */
+ { { 0x4a559ac49b5c1f14L,0xc444be1a3e54df2bL,0x13aad704eda41891L,
+ 0xcd927bec5eb5c788L },
+ { 0xeb3c8516e48c8a34L,0x1b7ac8124b546669L,0x1815f896594df8ecL,
+ 0x87c6a79c79227865L } },
+ /* 36 << 28 */
+ { { 0xae02a2f09b56ddbdL,0x1339b5ac8a2f1cf3L,0xf2b569c7839dff0dL,
+ 0xb0b9e864fee9a43dL },
+ { 0x4ff8ca4177bb064eL,0x145a2812fd249f63L,0x3ab7beacf86f689aL,
+ 0x9bafec2701d35f5eL } },
+ /* 37 << 28 */
+ { { 0x28054c654265aa91L,0xa4b18304035efe42L,0x6887b0e69639dec7L,
+ 0xf4b8f6ad3d52aea5L },
+ { 0xfb9293cc971a8a13L,0x3f159e5d4c934d07L,0x2c50e9b109acbc29L,
+ 0x08eb65e67154d129L } },
+ /* 38 << 28 */
+ { { 0x4feff58930b75c3eL,0x0bb82fe294491c93L,0xd8ac377a89af62bbL,
+ 0xd7b514909685e49fL },
+ { 0xabca9a7b04497f19L,0x1b35ed0a1a7ad13fL,0x6b601e213ec86ed6L,
+ 0xda91fcb9ce0c76f1L } },
+ /* 39 << 28 */
+ { { 0x9e28507bd7ab27e1L,0x7c19a55563945b7bL,0x6b43f0a1aafc9827L,
+ 0x443b4fbd3aa55b91L },
+ { 0x962b2e656962c88fL,0x139da8d4ce0db0caL,0xb93f05dd1b8d6c4fL,
+ 0x779cdff7180b9824L } },
+ /* 40 << 28 */
+ { { 0xbba23fddae57c7b7L,0x345342f21b932522L,0xfd9c80fe556d4aa3L,
+ 0xa03907ba6525bb61L },
+ { 0x38b010e1ff218933L,0xc066b654aa52117bL,0x8e14192094f2e6eaL,
+ 0x66a27dca0d32f2b2L } },
+ /* 41 << 28 */
+ { { 0x69c7f993048b3717L,0xbf5a989ab178ae1cL,0x49fa9058564f1d6bL,
+ 0x27ec6e15d31fde4eL },
+ { 0x4cce03737276e7fcL,0x64086d7989d6bf02L,0x5a72f0464ccdd979L,
+ 0x909c356647775631L } },
+ /* 42 << 28 */
+ { { 0x1c07bc6b75dd7125L,0xb4c6bc9787a0428dL,0x507ece52fdeb6b9dL,
+ 0xfca56512b2c95432L },
+ { 0x15d97181d0e8bd06L,0x384dd317c6bb46eaL,0x5441ea203952b624L,
+ 0xbcf70dee4e7dc2fbL } },
+ /* 43 << 28 */
+ { { 0x372b016e6628e8c3L,0x07a0d667b60a7522L,0xcf05751b0a344ee2L,
+ 0x0ec09a48118bdeecL },
+ { 0x6e4b3d4ed83dce46L,0x43a6316d99d2fc6eL,0xa99d898956cf044cL,
+ 0x7c7f4454ae3e5fb7L } },
+ /* 44 << 28 */
+ { { 0xb2e6b121fbabbe92L,0x281850fbe1330076L,0x093581ec97890015L,
+ 0x69b1dded75ff77f5L },
+ { 0x7cf0b18fab105105L,0x953ced31a89ccfefL,0x3151f85feb914009L,
+ 0x3c9f1b8788ed48adL } },
+ /* 45 << 28 */
+ { { 0xc9aba1a14a7eadcbL,0x928e7501522e71cfL,0xeaede7273a2e4f83L,
+ 0x467e10d11ce3bbd3L },
+ { 0xf3442ac3b955dcf0L,0xba96307dd3d5e527L,0xf763a10efd77f474L,
+ 0x5d744bd06a6e1ff0L } },
+ /* 46 << 28 */
+ { { 0xd287282aa777899eL,0xe20eda8fd03f3cdeL,0x6a7e75bb50b07d31L,
+ 0x0b7e2a946f379de4L },
+ { 0x31cb64ad19f593cfL,0x7b1a9e4f1e76ef1dL,0xe18c9c9db62d609cL,
+ 0x439bad6de779a650L } },
+ /* 47 << 28 */
+ { { 0x219d9066e032f144L,0x1db632b8e8b2ec6aL,0xff0d0fd4fda12f78L,
+ 0x56fb4c2d2a25d265L },
+ { 0x5f4e2ee1255a03f1L,0x61cd6af2e96af176L,0xe0317ba8d068bc97L,
+ 0x927d6bab264b988eL } },
+ /* 48 << 28 */
+ { { 0xa18f07e0e90fb21eL,0x00fd2b80bba7fca1L,0x20387f2795cd67b5L,
+ 0x5b89a4e7d39707f7L },
+ { 0x8f83ad3f894407ceL,0xa0025b946c226132L,0xc79563c7f906c13bL,
+ 0x5f548f314e7bb025L } },
+ /* 49 << 28 */
+ { { 0x2b4c6b8feac6d113L,0xa67e3f9c0e813c76L,0x3982717c3fe1f4b9L,
+ 0x5886581926d8050eL },
+ { 0x99f3640cf7f06f20L,0xdc6102162a66ebc2L,0x52f2c175767a1e08L,
+ 0x05660e1a5999871bL } },
+ /* 50 << 28 */
+ { { 0x6b0f17626d3c4693L,0xf0e7d62737ed7beaL,0xc51758c7b75b226dL,
+ 0x40a886281f91613bL },
+ { 0x889dbaa7bbb38ce0L,0xe0404b65bddcad81L,0xfebccd3a8bc9671fL,
+ 0xfbf9a357ee1f5375L } },
+ /* 51 << 28 */
+ { { 0x5dc169b028f33398L,0xb07ec11d72e90f65L,0xae7f3b4afaab1eb1L,
+ 0xd970195e5f17538aL },
+ { 0x52b05cbe0181e640L,0xf5debd622643313dL,0x761481545df31f82L,
+ 0x23e03b333a9e13c5L } },
+ /* 52 << 28 */
+ { { 0xff7589494fde0c1fL,0xbf8a1abee5b6ec20L,0x702278fb87e1db6cL,
+ 0xc447ad7a35ed658fL },
+ { 0x48d4aa3803d0ccf2L,0x80acb338819a7c03L,0x9bc7c89e6e17ceccL,
+ 0x46736b8b03be1d82L } },
+ /* 53 << 28 */
+ { { 0xd65d7b60c0432f96L,0xddebe7a3deb5442fL,0x79a253077dff69a2L,
+ 0x37a56d9402cf3122L },
+ { 0x8bab8aedf2350d0aL,0x13c3f276037b0d9aL,0xc664957c44c65caeL,
+ 0x88b44089c2e71a88L } },
+ /* 54 << 28 */
+ { { 0xdb88e5a35cb02664L,0x5d4c0bf18686c72eL,0xea3d9b62a682d53eL,
+ 0x9b605ef40b2ad431L },
+ { 0x71bac202c69645d0L,0xa115f03a6a1b66e7L,0xfe2c563a158f4dc4L,
+ 0xf715b3a04d12a78cL } },
+ /* 55 << 28 */
+ { { 0x8f7f0a48d413213aL,0x2035806dc04becdbL,0xecd34a995d8587f5L,
+ 0x4d8c30799f6d3a71L },
+ { 0x1b2a2a678d95a8f6L,0xc58c9d7df2110d0dL,0xdeee81d5cf8fba3fL,
+ 0xa42be3c00c7cdf68L } },
+ /* 56 << 28 */
+ { { 0x2126f742d43b5eaaL,0x054a0766dfa59b85L,0x9d0d5e36126bfd45L,
+ 0xa1f8fbd7384f8a8fL },
+ { 0x317680f5d563fcccL,0x48ca5055f280a928L,0xe00b81b227b578cfL,
+ 0x10aad9182994a514L } },
+ /* 57 << 28 */
+ { { 0xd9e07b62b7bdc953L,0x9f0f6ff25bc086ddL,0x09d1ccff655eee77L,
+ 0x45475f795bef7df1L },
+ { 0x3faa28fa86f702ccL,0x92e609050f021f07L,0xe9e629687f8fa8c6L,
+ 0xbd71419af036ea2cL } },
+ /* 58 << 28 */
+ { { 0x171ee1cc6028da9aL,0x5352fe1ac251f573L,0xf8ff236e3fa997f4L,
+ 0xd831b6c9a5749d5fL },
+ { 0x7c872e1de350e2c2L,0xc56240d91e0ce403L,0xf9deb0776974f5cbL,
+ 0x7d50ba87961c3728L } },
+ /* 59 << 28 */
+ { { 0xd6f894265a3a2518L,0xcf817799c6303d43L,0x510a0471619e5696L,
+ 0xab049ff63a5e307bL },
+ { 0xe4cdf9b0feb13ec7L,0xd5e971179d8ff90cL,0xf6f64d069afa96afL,
+ 0x00d0bf5e9d2012a2L } },
+ /* 60 << 28 */
+ { { 0xe63f301f358bcdc0L,0x07689e990a9d47f8L,0x1f689e2f4f43d43aL,
+ 0x4d542a1690920904L },
+ { 0xaea293d59ca0a707L,0xd061fe458ac68065L,0x1033bf1b0090008cL,
+ 0x29749558c08a6db6L } },
+ /* 61 << 28 */
+ { { 0x74b5fc59c1d5d034L,0xf712e9f667e215e0L,0xfd520cbd860200e6L,
+ 0x0229acb43ea22588L },
+ { 0x9cd1e14cfff0c82eL,0x87684b6259c69e73L,0xda85e61c96ccb989L,
+ 0x2d5dbb02a3d06493L } },
+ /* 62 << 28 */
+ { { 0xf22ad33ae86b173cL,0xe8e41ea5a79ff0e3L,0x01d2d725dd0d0c10L,
+ 0x31f39088032d28f9L },
+ { 0x7b3f71e17829839eL,0x0cf691b44502ae58L,0xef658dbdbefc6115L,
+ 0xa5cd6ee5b3ab5314L } },
+ /* 63 << 28 */
+ { { 0x206c8d7b5f1d2347L,0x794645ba4cc2253aL,0xd517d8ff58389e08L,
+ 0x4fa20dee9f847288L },
+ { 0xeba072d8d797770aL,0x7360c91dbf429e26L,0x7200a3b380af8279L,
+ 0x6a1c915082dadce3L } },
+ /* 64 << 28 */
+ { { 0x0ee6d3a7c35d8794L,0x042e65580356bae5L,0x9f59698d643322fdL,
+ 0x9379ae1550a61967L },
+ { 0x64b9ae62fcc9981eL,0xaed3d6316d2934c6L,0x2454b3025e4e65ebL,
+ 0xab09f647f9950428L } },
+ /* 0 << 35 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 35 */
+ { { 0xb2083a1222248accL,0x1f6ec0ef3264e366L,0x5659b7045afdee28L,
+ 0x7a823a40e6430bb5L },
+ { 0x24592a04e1900a79L,0xcde09d4ac9ee6576L,0x52b6463f4b5ea54aL,
+ 0x1efe9ed3d3ca65a7L } },
+ /* 2 << 35 */
+ { { 0xe27a6dbe305406ddL,0x8eb7dc7fdd5d1957L,0xf54a6876387d4d8fL,
+ 0x9c479409c7762de4L },
+ { 0xbe4d5b5d99b30778L,0x25380c566e793682L,0x602d37f3dac740e3L,
+ 0x140deabe1566e4aeL } },
+ /* 3 << 35 */
+ { { 0x4481d067afd32acfL,0xd8f0fccae1f71ccfL,0xd208dd0cb596f2daL,
+ 0xd049d7309aad93f9L },
+ { 0xc79f263d42ab580eL,0x09411bb123f707b4L,0x8cfde1ff835e0edaL,
+ 0x7270749090f03402L } },
+ /* 4 << 35 */
+ { { 0xeaee6126c49a861eL,0x024f3b65e14f0d06L,0x51a3f1e8c69bfc17L,
+ 0xc3c3a8e9a7686381L },
+ { 0x3400752cb103d4c8L,0x02bc46139218b36bL,0xc67f75eb7651504aL,
+ 0xd6848b56d02aebfaL } },
+ /* 5 << 35 */
+ { { 0xbd9802e6c30fa92bL,0x5a70d96d9a552784L,0x9085c4ea3f83169bL,
+ 0xfa9423bb06908228L },
+ { 0x2ffebe12fe97a5b9L,0x85da604971b99118L,0x9cbc2f7f63178846L,
+ 0xfd96bc709153218eL } },
+ /* 6 << 35 */
+ { { 0x958381db1782269bL,0xae34bf792597e550L,0xbb5c60645f385153L,
+ 0x6f0e96afe3088048L },
+ { 0xbf6a021577884456L,0xb3b5688c69310ea7L,0x17c9429504fad2deL,
+ 0xe020f0e517896d4dL } },
+ /* 7 << 35 */
+ { { 0x730ba0ab0976505fL,0x567f6813095e2ec5L,0x470620106331ab71L,
+ 0x72cfa97741d22b9fL },
+ { 0x33e55ead8a2373daL,0xa8d0d5f47ba45a68L,0xba1d8f9c03029d15L,
+ 0x8f34f1ccfc55b9f3L } },
+ /* 8 << 35 */
+ { { 0xcca4428dbbe5a1a9L,0x8187fd5f3126bd67L,0x0036973a48105826L,
+ 0xa39b6663b8bd61a0L },
+ { 0x6d42deef2d65a808L,0x4969044f94636b19L,0xf611ee47dd5d564cL,
+ 0x7b2f3a49d2873077L } },
+ /* 9 << 35 */
+ { { 0x94157d45300eb294L,0x2b2a656e169c1494L,0xc000dd76d3a47aa9L,
+ 0xa2864e4fa6243ea4L },
+ { 0x82716c47db89842eL,0x12dfd7d761479fb7L,0x3b9a2c56e0b2f6dcL,
+ 0x46be862ad7f85d67L } },
+ /* 10 << 35 */
+ { { 0x03b0d8dd0f82b214L,0x460c34f9f103cbc6L,0xf32e5c0318d79e19L,
+ 0x8b8888baa84117f8L },
+ { 0x8f3c37dcc0722677L,0x10d21be91c1c0f27L,0xd47c8468e0f7a0c6L,
+ 0x9bf02213adecc0e0L } },
+ /* 11 << 35 */
+ { { 0x0baa7d1242b48b99L,0x1bcb665d48424096L,0x8b847cd6ebfb5cfbL,
+ 0x87c2ae569ad4d10dL },
+ { 0xf1cbb1220de36726L,0xe7043c683fdfbd21L,0x4bd0826a4e79d460L,
+ 0x11f5e5984bd1a2cbL } },
+ /* 12 << 35 */
+ { { 0x97554160b7fe7b6eL,0x7d16189a400a3fb2L,0xd73e9beae328ca1eL,
+ 0x0dd04b97e793d8ccL },
+ { 0xa9c83c9b506db8ccL,0x5cd47aaecf38814cL,0x26fc430db64b45e6L,
+ 0x079b5499d818ea84L } },
+ /* 13 << 35 */
+ { { 0xebb01102c1c24a3bL,0xca24e5681c161c1aL,0x103eea6936f00a4aL,
+ 0x9ad76ee876176c7bL },
+ { 0x97451fc2538e0ff7L,0x94f898096604b3b0L,0x6311436e3249cfd7L,
+ 0x27b4a7bd41224f69L } },
+ /* 14 << 35 */
+ { { 0x03b5d21ae0ac2941L,0x279b0254c2d31937L,0x3307c052cac992d0L,
+ 0x6aa7cb92efa8b1f3L },
+ { 0x5a1825800d37c7a5L,0x13380c37342d5422L,0x92ac2d66d5d2ef92L,
+ 0x035a70c9030c63c6L } },
+ /* 15 << 35 */
+ { { 0xc16025dd4ce4f152L,0x1f419a71f9df7c06L,0x6d5b221491e4bb14L,
+ 0xfc43c6cc839fb4ceL },
+ { 0x49f06591925d6b2dL,0x4b37d9d362186598L,0x8c54a971d01b1629L,
+ 0xe1a9c29f51d50e05L } },
+ /* 16 << 35 */
+ { { 0x5109b78571ba1861L,0x48b22d5cd0c8f93dL,0xe8fa84a78633bb93L,
+ 0x53fba6ba5aebbd08L },
+ { 0x7ff27df3e5eea7d8L,0x521c879668ca7158L,0xb9d5133bce6f1a05L,
+ 0x2d50cd53fd0ebee4L } },
+ /* 17 << 35 */
+ { { 0xc82115d6c5a3ef16L,0x993eff9dba079221L,0xe4da2c5e4b5da81cL,
+ 0x9a89dbdb8033fd85L },
+ { 0x60819ebf2b892891L,0x53902b215d14a4d5L,0x6ac35051d7fda421L,
+ 0xcc6ab88561c83284L } },
+ /* 18 << 35 */
+ { { 0x14eba133f74cff17L,0x240aaa03ecb813f2L,0xcfbb65406f665beeL,
+ 0x084b1fe4a425ad73L },
+ { 0x009d5d16d081f6a6L,0x35304fe8eef82c90L,0xf20346d5aa9eaa22L,
+ 0x0ada9f07ac1c91e3L } },
+ /* 19 << 35 */
+ { { 0xa6e21678968a6144L,0x54c1f77c07b31a1eL,0xd6bb787e5781fbe1L,
+ 0x61bd2ee0e31f1c4aL },
+ { 0xf25aa1e9781105fcL,0x9cf2971f7b2f8e80L,0x26d15412cdff919bL,
+ 0x01db4ebe34bc896eL } },
+ /* 20 << 35 */
+ { { 0x7d9b3e23b40df1cfL,0x5933737394e971b4L,0xbf57bd14669cf921L,
+ 0x865daedf0c1a1064L },
+ { 0x3eb70bd383279125L,0xbc3d5b9f34ecdaabL,0x91e3ed7e5f755cafL,
+ 0x49699f54d41e6f02L } },
+ /* 21 << 35 */
+ { { 0x185770e1d4a7a15bL,0x08f3587aeaac87e7L,0x352018db473133eaL,
+ 0x674ce71904fd30fcL },
+ { 0x7b8d9835088b3e0eL,0x7a0356a95d0d47a1L,0x9d9e76596474a3c4L,
+ 0x61ea48a7ff66966cL } },
+ /* 22 << 35 */
+ { { 0x304177580f3e4834L,0xfdbb21c217a9afcbL,0x756fa17f2f9a67b3L,
+ 0x2a6b2421a245c1a8L },
+ { 0x64be27944af02291L,0xade465c62a5804feL,0x8dffbd39a6f08fd7L,
+ 0xc4efa84caa14403bL } },
+ /* 23 << 35 */
+ { { 0xa1b91b2a442b0f5cL,0xb748e317cf997736L,0x8d1b62bfcee90e16L,
+ 0x907ae2710b2078c0L },
+ { 0xdf31534b0c9bcdddL,0x043fb05439adce83L,0x99031043d826846aL,
+ 0x61a9c0d6b144f393L } },
+ /* 24 << 35 */
+ { { 0xdab4804647718427L,0xdf17ff9b6e830f8bL,0x408d7ee8e49a1347L,
+ 0x6ac71e2391c1d4aeL },
+ { 0xc8cbb9fd1defd73cL,0x19840657bbbbfec5L,0x39db1cb59e7ef8eaL,
+ 0x78aa829664105f30L } },
+ /* 25 << 35 */
+ { { 0xa3d9b7f0a3738c29L,0x0a2f235abc3250a3L,0x55e506f6445e4cafL,
+ 0x0974f73d33475f7aL },
+ { 0xd37dbba35ba2f5a8L,0x542c6e636af40066L,0x26d99b53c5d73e2cL,
+ 0x06060d7d6c3ca33eL } },
+ /* 26 << 35 */
+ { { 0xcdbef1c2065fef4aL,0x77e60f7dfd5b92e3L,0xd7c549f026708350L,
+ 0x201b3ad034f121bfL },
+ { 0x5fcac2a10334fc14L,0x8a9a9e09344552f6L,0x7dd8a1d397653082L,
+ 0x5fc0738f79d4f289L } },
+ /* 27 << 35 */
+ { { 0x787d244d17d2d8c3L,0xeffc634570830684L,0x5ddb96dde4f73ae5L,
+ 0x8efb14b1172549a5L },
+ { 0x6eb73eee2245ae7aL,0xbca4061eea11f13eL,0xb577421d30b01f5dL,
+ 0xaa688b24782e152cL } },
+ /* 28 << 35 */
+ { { 0x67608e71bd3502baL,0x4ef41f24b4de75a0L,0xb08dde5efd6125e5L,
+ 0xde484825a409543fL },
+ { 0x1f198d9865cc2295L,0x428a37716e0edfa2L,0x4f9697a2adf35fc7L,
+ 0x01a43c79f7cac3c7L } },
+ /* 29 << 35 */
+ { { 0xb05d70590fd3659aL,0x8927f30cbb7f2d9aL,0x4023d1ac8cf984d3L,
+ 0x32125ed302897a45L },
+ { 0xfb572dad3d414205L,0x73000ef2e3fa82a9L,0x4c0868e9f10a5581L,
+ 0x5b61fc676b0b3ca5L } },
+ /* 30 << 35 */
+ { { 0xc1258d5b7cae440cL,0x21c08b41402b7531L,0xf61a8955de932321L,
+ 0x3568faf82d1408afL },
+ { 0x71b15e999ecf965bL,0xf14ed248e917276fL,0xc6f4caa1820cf9e2L,
+ 0x681b20b218d83c7eL } },
+ /* 31 << 35 */
+ { { 0x6cde738dc6c01120L,0x71db0813ae70e0dbL,0x95fc064474afe18cL,
+ 0x34619053129e2be7L },
+ { 0x80615ceadb2a3b15L,0x0a49a19edb4c7073L,0x0e1b84c88fd2d367L,
+ 0xd74bf462033fb8aaL } },
+ /* 32 << 35 */
+ { { 0x889f6d65533ef217L,0x7158c7e4c3ca2e87L,0xfb670dfbdc2b4167L,
+ 0x75910a01844c257fL },
+ { 0xf336bf07cf88577dL,0x22245250e45e2aceL,0x2ed92e8d7ca23d85L,
+ 0x29f8be4c2b812f58L } },
+ /* 33 << 35 */
+ { { 0xdd9ebaa7076fe12bL,0x3f2400cbae1537f9L,0x1aa9352817bdfb46L,
+ 0xc0f9843067883b41L },
+ { 0x5590ede10170911dL,0x7562f5bb34d4b17fL,0xe1fa1df21826b8d2L,
+ 0xb40b796a6bd80d59L } },
+ /* 34 << 35 */
+ { { 0xd65bf1973467ba92L,0x8c9b46dbf70954b0L,0x97c8a0f30e78f15dL,
+ 0xa8f3a69a85a4c961L },
+ { 0x4242660f61e4ce9bL,0xbf06aab36ea6790cL,0xc6706f8eec986416L,
+ 0x9e56dec19a9fc225L } },
+ /* 35 << 35 */
+ { { 0x527c46f49a9898d9L,0xd799e77b5633cdefL,0x24eacc167d9e4297L,
+ 0xabb61cea6b1cb734L },
+ { 0xbee2e8a7f778443cL,0x3bb42bf129de2fe6L,0xcbed86a13003bb6fL,
+ 0xd3918e6cd781cdf6L } },
+ /* 36 << 35 */
+ { { 0x4bee32719a5103f1L,0x5243efc6f50eac06L,0xb8e122cb6adcc119L,
+ 0x1b7faa84c0b80a08L },
+ { 0x32c3d1bd6dfcd08cL,0x129dec4e0be427deL,0x98ab679c1d263c83L,
+ 0xafc83cb7cef64effL } },
+ /* 37 << 35 */
+ { { 0x85eb60882fa6be76L,0x892585fb1328cbfeL,0xc154d3edcf618ddaL,
+ 0xc44f601b3abaf26eL },
+ { 0x7bf57d0b2be1fdfdL,0xa833bd2d21137feeL,0x9353af362db591a8L,
+ 0xc76f26dc5562a056L } },
+ /* 38 << 35 */
+ { { 0x1d87e47d3fdf5a51L,0x7afb5f9355c9cab0L,0x91bbf58f89e0586eL,
+ 0x7c72c0180d843709L },
+ { 0xa9a5aafb99b5c3dcL,0xa48a0f1d3844aeb0L,0x7178b7ddb667e482L,
+ 0x453985e96e23a59aL } },
+ /* 39 << 35 */
+ { { 0x4a54c86001b25dd8L,0x0dd37f48fb897c8aL,0x5f8aa6100ea90cd9L,
+ 0xc8892c6816d5830dL },
+ { 0xeb4befc0ef514ca5L,0x478eb679e72c9ee6L,0x9bca20dadbc40d5fL,
+ 0xf015de21dde4f64aL } },
+ /* 40 << 35 */
+ { { 0xaa6a4de0eaf4b8a5L,0x68cfd9ca4bc60e32L,0x668a4b017fd15e70L,
+ 0xd9f0694af27dc09dL },
+ { 0xf6c3cad5ba708bcdL,0x5cd2ba695bb95c2aL,0xaa28c1d333c0a58fL,
+ 0x23e274e3abc77870L } },
+ /* 41 << 35 */
+ { { 0x44c3692ddfd20a4aL,0x091c5fd381a66653L,0x6c0bb69109a0757dL,
+ 0x9072e8b9667343eaL },
+ { 0x31d40eb080848becL,0x95bd480a79fd36ccL,0x01a77c6165ed43f5L,
+ 0xafccd1272e0d40bfL } },
+ /* 42 << 35 */
+ { { 0xeccfc82d1cc1884bL,0xc85ac2015d4753b4L,0xc7a6caac658e099fL,
+ 0xcf46369e04b27390L },
+ { 0xe2e7d049506467eaL,0x481b63a237cdecccL,0x4029abd8ed80143aL,
+ 0x28bfe3c7bcb00b88L } },
+ /* 43 << 35 */
+ { { 0x3bec10090643d84aL,0x885f3668abd11041L,0xdb02432cf83a34d6L,
+ 0x32f7b360719ceebeL },
+ { 0xf06c7837dad1fe7aL,0x60a157a95441a0b0L,0x704970e9e2d47550L,
+ 0xcd2bd553271b9020L } },
+ /* 44 << 35 */
+ { { 0xff57f82f33e24a0bL,0x9cbee23ff2565079L,0x16353427eb5f5825L,
+ 0x276feec4e948d662L },
+ { 0xd1b62bc6da10032bL,0x718351ddf0e72a53L,0x934520762420e7baL,
+ 0x96368fff3a00118dL } },
+ /* 45 << 35 */
+ { { 0x00ce2d26150a49e4L,0x0c28b6363f04706bL,0xbad65a4658b196d0L,
+ 0x6c8455fcec9f8b7cL },
+ { 0xe90c895f2d71867eL,0x5c0be31bedf9f38cL,0x2a37a15ed8f6ec04L,
+ 0x239639e78cd85251L } },
+ /* 46 << 35 */
+ { { 0xd89753159c7c4c6bL,0x603aa3c0d7409af7L,0xb8d53d0c007132fbL,
+ 0x68d12af7a6849238L },
+ { 0xbe0607e7bf5d9279L,0x9aa50055aada74ceL,0xe81079cbba7e8ccbL,
+ 0x610c71d1a5f4ff5eL } },
+ /* 47 << 35 */
+ { { 0x9e2ee1a75aa07093L,0xca84004ba75da47cL,0x074d39513de75401L,
+ 0xf938f756bb311592L },
+ { 0x9619761800a43421L,0x39a2536207bc78c8L,0x278f710a0a171276L,
+ 0xb28446ea8d1a8f08L } },
+ /* 48 << 35 */
+ { { 0x184781bfe3b6a661L,0x7751cb1de6d279f7L,0xf8ff95d6c59eb662L,
+ 0x186d90b758d3dea7L },
+ { 0x0e4bb6c1dfb4f754L,0x5c5cf56b2b2801dcL,0xc561e4521f54564dL,
+ 0xb4fb8c60f0dd7f13L } },
+ /* 49 << 35 */
+ { { 0xf884963033ff98c7L,0x9619fffacf17769cL,0xf8090bf61bfdd80aL,
+ 0x14d9a149422cfe63L },
+ { 0xb354c3606f6df9eaL,0xdbcf770d218f17eaL,0x207db7c879eb3480L,
+ 0x213dbda8559b6a26L } },
+ /* 50 << 35 */
+ { { 0xac4c200b29fc81b3L,0xebc3e09f171d87c1L,0x917995301481aa9eL,
+ 0x051b92e192e114faL },
+ { 0xdf8f92e9ecb5537fL,0x44b1b2cc290c7483L,0xa711455a2adeb016L,
+ 0x964b685681a10c2cL } },
+ /* 51 << 35 */
+ { { 0x4f159d99cec03623L,0x05532225ef3271eaL,0xb231bea3c5ee4849L,
+ 0x57a54f507094f103L },
+ { 0x3e2d421d9598b352L,0xe865a49c67412ab4L,0xd2998a251cc3a912L,
+ 0x5d0928080c74d65dL } },
+ /* 52 << 35 */
+ { { 0x73f459084088567aL,0xeb6b280e1f214a61L,0x8c9adc34caf0c13dL,
+ 0x39d12938f561fb80L },
+ { 0xb2dc3a5ebc6edfb4L,0x7485b1b1fe4d210eL,0x062e0400e186ae72L,
+ 0x91e32d5c6eeb3b88L } },
+ /* 53 << 35 */
+ { { 0x6df574d74be59224L,0xebc88ccc716d55f3L,0x26c2e6d0cad6ed33L,
+ 0xc6e21e7d0d3e8b10L },
+ { 0x2cc5840e5bcc36bbL,0x9292445e7da74f69L,0x8be8d3214e5193a8L,
+ 0x3ec236298df06413L } },
+ /* 54 << 35 */
+ { { 0xc7e9ae85b134defaL,0x6073b1d01bb2d475L,0xb9ad615e2863c00dL,
+ 0x9e29493d525f4ac4L },
+ { 0xc32b1dea4e9acf4fL,0x3e1f01c8a50db88dL,0xb05d70ea04da916cL,
+ 0x714b0d0ad865803eL } },
+ /* 55 << 35 */
+ { { 0x4bd493fc9920cb5eL,0x5b44b1f792c7a3acL,0xa2a77293bcec9235L,
+ 0x5ee06e87cd378553L },
+ { 0xceff8173da621607L,0x2bb03e4c99f5d290L,0x2945106aa6f734acL,
+ 0xb5056604d25c4732L } },
+ /* 56 << 35 */
+ { { 0x5945920ce079afeeL,0x686e17a06789831fL,0x5966bee8b74a5ae5L,
+ 0x38a673a21e258d46L },
+ { 0xbd1cc1f283141c95L,0x3b2ecf4f0e96e486L,0xcd3aa89674e5fc78L,
+ 0x415ec10c2482fa7aL } },
+ /* 57 << 35 */
+ { { 0x1523441980503380L,0x513d917ad314b392L,0xb0b52f4e63caecaeL,
+ 0x07bf22ad2dc7780bL },
+ { 0xe761e8a1e4306839L,0x1b3be9625dd7feaaL,0x4fe728de74c778f1L,
+ 0xf1fa0bda5e0070f6L } },
+ /* 58 << 35 */
+ { { 0x85205a316ec3f510L,0x2c7e4a14d2980475L,0xde3c19c06f30ebfdL,
+ 0xdb1c1f38d4b7e644L },
+ { 0xfe291a755dce364aL,0xb7b22a3c058f5be3L,0x2cd2c30237fea38cL,
+ 0x2930967a2e17be17L } },
+ /* 59 << 35 */
+ { { 0x87f009de0c061c65L,0xcb014aacedc6ed44L,0x49bd1cb43bafb1ebL,
+ 0x81bd8b5c282d3688L },
+ { 0x1cdab87ef01a17afL,0x21f37ac4e710063bL,0x5a6c567642fc8193L,
+ 0xf4753e7056a6015cL } },
+ /* 60 << 35 */
+ { { 0x020f795ea15b0a44L,0x8f37c8d78958a958L,0x63b7e89ba4b675b5L,
+ 0xb4fb0c0c0fc31aeaL },
+ { 0xed95e639a7ff1f2eL,0x9880f5a3619614fbL,0xdeb6ff02947151abL,
+ 0x5bc5118ca868dcdbL } },
+ /* 61 << 35 */
+ { { 0xd8da20554c20cea5L,0xcac2776e14c4d69aL,0xcccb22c1622d599bL,
+ 0xa4ddb65368a9bb50L },
+ { 0x2c4ff1511b4941b4L,0xe1ff19b46efba588L,0x35034363c48345e0L,
+ 0x45542e3d1e29dfc4L } },
+ /* 62 << 35 */
+ { { 0xf197cb91349f7aedL,0x3b2b5a008fca8420L,0x7c175ee823aaf6d8L,
+ 0x54dcf42135af32b6L },
+ { 0x0ba1430727d6561eL,0x879d5ee4d175b1e2L,0xc7c4367399807db5L,
+ 0x77a544559cd55bcdL } },
+ /* 63 << 35 */
+ { { 0xe6c2ff130105c072L,0x18f7a99f8dda7da4L,0x4c3018200e2d35c1L,
+ 0x06a53ca0d9cc6c82L },
+ { 0xaa21cc1ef1aa1d9eL,0x324143344a75b1e8L,0x2a6d13280ebe9fdcL,
+ 0x16bd173f98a4755aL } },
+ /* 64 << 35 */
+ { { 0xfbb9b2452133ffd9L,0x39a8b2f1830f1a20L,0x484bc97dd5a1f52aL,
+ 0xd6aebf56a40eddf8L },
+ { 0x32257acb76ccdac6L,0xaf4d36ec1586ff27L,0x8eaa8863f8de7dd1L,
+ 0x0045d5cf88647c16L } },
+ /* 0 << 42 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 42 */
+ { { 0xa6f3d574c005979dL,0xc2072b426a40e350L,0xfca5c1568de2ecf9L,
+ 0xa8c8bf5ba515344eL },
+ { 0x97aee555114df14aL,0xd4374a4dfdc5ec6bL,0x754cc28f2ca85418L,
+ 0x71cb9e27d3c41f78L } },
+ /* 2 << 42 */
+ { { 0x8910507903605c39L,0xf0843d9ea142c96cL,0xf374493416923684L,
+ 0x732caa2ffa0a2893L },
+ { 0xb2e8c27061160170L,0xc32788cc437fbaa3L,0x39cd818ea6eda3acL,
+ 0xe2e942399e2b2e07L } },
+ /* 3 << 42 */
+ { { 0x6967d39b0260e52aL,0xd42585cc90653325L,0x0d9bd60521ca7954L,
+ 0x4fa2087781ed57b3L },
+ { 0x60c1eff8e34a0bbeL,0x56b0040c84f6ef64L,0x28be2b24b1af8483L,
+ 0xb2278163f5531614L } },
+ /* 4 << 42 */
+ { { 0x8df275455922ac1cL,0xa7b3ef5ca52b3f63L,0x8e77b21471de57c4L,
+ 0x31682c10834c008bL },
+ { 0xc76824f04bd55d31L,0xb6d1c08617b61c71L,0x31db0903c2a5089dL,
+ 0x9c092172184e5d3fL } },
+ /* 5 << 42 */
+ { { 0xdd7ced5bc00cc638L,0x1a2015eb61278fc2L,0x2e8e52886a37f8d6L,
+ 0xc457786fe79933adL },
+ { 0xb3fe4cce2c51211aL,0xad9b10b224c20498L,0x90d87a4fd28db5e5L,
+ 0x698cd1053aca2fc3L } },
+ /* 6 << 42 */
+ { { 0x4f112d07e91b536dL,0xceb982f29eba09d6L,0x3c157b2c197c396fL,
+ 0xe23c2d417b66eb24L },
+ { 0x480c57d93f330d37L,0xb3a4c8a179108debL,0x702388decb199ce5L,
+ 0x0b019211b944a8d4L } },
+ /* 7 << 42 */
+ { { 0x24f2a692840bb336L,0x7c353bdca669fa7bL,0xda20d6fcdec9c300L,
+ 0x625fbe2fa13a4f17L },
+ { 0xa2b1b61adbc17328L,0x008965bfa9515621L,0x49690939c620ff46L,
+ 0x182dd27d8717e91cL } },
+ /* 8 << 42 */
+ { { 0x5ace5035ea6c3997L,0x54259aaac2610befL,0xef18bb3f3c80dd39L,
+ 0x6910b95b5fc3fa39L },
+ { 0xfce2f51043e09aeeL,0xced56c9fa7675665L,0x10e265acd872db61L,
+ 0x6982812eae9fce69L } },
+ /* 9 << 42 */
+ { { 0x29be11c6ce800998L,0x72bb1752b90360d9L,0x2c1931975a4ad590L,
+ 0x2ba2f5489fc1dbc0L },
+ { 0x7fe4eebbe490ebe0L,0x12a0a4cd7fae11c0L,0x7197cf81e903ba37L,
+ 0xcf7d4aa8de1c6dd8L } },
+ /* 10 << 42 */
+ { { 0x92af6bf43fd5684cL,0x2b26eecf80360aa1L,0xbd960f3000546a82L,
+ 0x407b3c43f59ad8feL },
+ { 0x86cae5fe249c82baL,0x9e0faec72463744cL,0x87f551e894916272L,
+ 0x033f93446ceb0615L } },
+ /* 11 << 42 */
+ { { 0x1e5eb0d18be82e84L,0x89967f0e7a582fefL,0xbcf687d5a6e921faL,
+ 0xdfee4cf3d37a09baL },
+ { 0x94f06965b493c465L,0x638b9a1c7635c030L,0x7666786466f05e9fL,
+ 0xccaf6808c04da725L } },
+ /* 12 << 42 */
+ { { 0xca2eb690768fccfcL,0xf402d37db835b362L,0x0efac0d0e2fdfcceL,
+ 0xefc9cdefb638d990L },
+ { 0x2af12b72d1669a8bL,0x33c536bc5774ccbdL,0x30b21909fb34870eL,
+ 0xc38fa2f77df25acaL } },
+ /* 13 << 42 */
+ { { 0x74c5f02bbf81f3f5L,0x0525a5aeaf7e4581L,0x88d2aaba433c54aeL,
+ 0xed9775db806a56c5L },
+ { 0xd320738ac0edb37dL,0x25fdb6ee66cc1f51L,0xac661d1710600d76L,
+ 0x931ec1f3bdd1ed76L } },
+ /* 14 << 42 */
+ { { 0x65c11d6219ee43f1L,0x5cd57c3e60829d97L,0xd26c91a3984be6e8L,
+ 0xf08d93098b0c53bdL },
+ { 0x94bc9e5bc016e4eaL,0xd391683911d43d2bL,0x886c5ad773701155L,
+ 0xe037762620b00715L } },
+ /* 15 << 42 */
+ { { 0x7f01c9ecaa80ba59L,0x3083411a68538e51L,0x970370f1e88128afL,
+ 0x625cc3db91dec14bL },
+ { 0xfef9666c01ac3107L,0xb2a8d577d5057ac3L,0xb0f2629992be5df7L,
+ 0xf579c8e500353924L } },
+ /* 16 << 42 */
+ { { 0xb8fa3d931341ed7aL,0x4223272ca7b59d49L,0x3dcb194783b8c4a4L,
+ 0x4e413c01ed1302e4L },
+ { 0x6d999127e17e44ceL,0xee86bf7533b3adfbL,0xf6902fe625aa96caL,
+ 0xb73540e4e5aae47dL } },
+ /* 17 << 42 */
+ { { 0x32801d7b1b4a158cL,0xe571c99e27e2a369L,0x40cb76c010d9f197L,
+ 0xc308c2893167c0aeL },
+ { 0xa6ef9dd3eb7958f2L,0xa7226dfc300879b1L,0x6cd0b3627edf0636L,
+ 0x4efbce6c7bc37eedL } },
+ /* 18 << 42 */
+ { { 0x75f92a058d699021L,0x586d4c79772566e3L,0x378ca5f1761ad23aL,
+ 0x650d86fc1465a8acL },
+ { 0x7a4ed457842ba251L,0x6b65e3e642234933L,0xaf1543b731aad657L,
+ 0xa4cefe98cbfec369L } },
+ /* 19 << 42 */
+ { { 0xb587da909f47befbL,0x6562e9fb41312d13L,0xa691ea59eff1cefeL,
+ 0xcc30477a05fc4cf6L },
+ { 0xa16324610b0ffd3dL,0xa1f16f3b5b355956L,0x5b148d534224ec24L,
+ 0xdc834e7bf977012aL } },
+ /* 20 << 42 */
+ { { 0x7bfc5e75b2c69dbcL,0x3aa77a2903c3da6cL,0xde0df03cca910271L,
+ 0xcbd5ca4a7806dc55L },
+ { 0xe1ca58076db476cbL,0xfde15d625f37a31eL,0xf49af520f41af416L,
+ 0x96c5c5b17d342db5L } },
+ /* 21 << 42 */
+ { { 0x155c43b7eb4ceb9bL,0x2e9930104e77371aL,0x1d2987da675d43afL,
+ 0xef2bc1c08599fd72L },
+ { 0x96894b7b9342f6b2L,0x201eadf27c8e71f0L,0xf3479d9f4a1f3efcL,
+ 0xe0f8a742702a9704L } },
+ /* 22 << 42 */
+ { { 0xeafd44b6b3eba40cL,0xf9739f29c1c1e0d0L,0x0091471a619d505eL,
+ 0xc15f9c969d7c263eL },
+ { 0x5be4728583afbe33L,0xa3b6d6af04f1e092L,0xe76526b9751a9d11L,
+ 0x2ec5b26d9a4ae4d2L } },
+ /* 23 << 42 */
+ { { 0xeb66f4d902f6fb8dL,0x4063c56196912164L,0xeb7050c180ef3000L,
+ 0x288d1c33eaa5b3f0L },
+ { 0xe87c68d607806fd8L,0xb2f7f9d54bbbf50fL,0x25972f3aac8d6627L,
+ 0xf854777410e8c13bL } },
+ /* 24 << 42 */
+ { { 0xcc50ef6c872b4a60L,0xab2a34a44613521bL,0x39c5c190983e15d1L,
+ 0x61dde5df59905512L },
+ { 0xe417f6219f2275f3L,0x0750c8b6451d894bL,0x75b04ab978b0bdaaL,
+ 0x3bfd9fd4458589bdL } },
+ /* 25 << 42 */
+ { { 0xf1013e30ee9120b6L,0x2b51af9323a4743eL,0xea96ffae48d14d9eL,
+ 0x71dc0dbe698a1d32L },
+ { 0x914962d20180cca4L,0x1ae60677c3568963L,0x8cf227b1437bc444L,
+ 0xc650c83bc9962c7aL } },
+ /* 26 << 42 */
+ { { 0x23c2c7ddfe7ccfc4L,0xf925c89d1b929d48L,0x4460f74b06783c33L,
+ 0xac2c8d49a590475aL },
+ { 0xfb40b407b807bba0L,0x9d1e362d69ff8f3aL,0xa33e9681cbef64a4L,
+ 0x67ece5fa332fb4b2L } },
+ /* 27 << 42 */
+ { { 0x6900a99b739f10e3L,0xc3341ca9ff525925L,0xee18a626a9e2d041L,
+ 0xa5a8368529580dddL },
+ { 0xf3470c819d7de3cdL,0xedf025862062cf9cL,0xf43522fac010edb0L,
+ 0x3031413513a4b1aeL } },
+ /* 28 << 42 */
+ { { 0xc792e02adb22b94bL,0x993d8ae9a1eaa45bL,0x8aad6cd3cd1e1c63L,
+ 0x89529ca7c5ce688aL },
+ { 0x2ccee3aae572a253L,0xe02b643802a21efbL,0xa7091b6ec9430358L,
+ 0x06d1b1fa9d7db504L } },
+ /* 29 << 42 */
+ { { 0x58846d32c4744733L,0x40517c71379f9e34L,0x2f65655f130ef6caL,
+ 0x526e4488f1f3503fL },
+ { 0x8467bd177ee4a976L,0x1d9dc913921363d1L,0xd8d24c33b069e041L,
+ 0x5eb5da0a2cdf7f51L } },
+ /* 30 << 42 */
+ { { 0x1c0f3cb1197b994fL,0x3c95a6c52843eae9L,0x7766ffc9a6097ea5L,
+ 0x7bea4093d723b867L },
+ { 0xb48e1f734db378f9L,0x70025b00e37b77acL,0x943dc8e7af24ad46L,
+ 0xb98a15ac16d00a85L } },
+ /* 31 << 42 */
+ { { 0x3adc38ba2743b004L,0xb1c7f4f7334415eeL,0xea43df8f1e62d05aL,
+ 0x326189059d76a3b6L },
+ { 0x2fbd0bb5a23a0f46L,0x5bc971db6a01918cL,0x7801d94ab4743f94L,
+ 0xb94df65e676ae22bL } },
+ /* 32 << 42 */
+ { { 0xaafcbfabaf95894cL,0x7b9bdc07276b2241L,0xeaf983625bdda48bL,
+ 0x5977faf2a3fcb4dfL },
+ { 0xbed042ef052c4b5bL,0x9fe87f71067591f0L,0xc89c73ca22f24ec7L,
+ 0x7d37fa9ee64a9f1bL } },
+ /* 33 << 42 */
+ { { 0x2710841a15562627L,0x2c01a613c243b034L,0x1d135c562bc68609L,
+ 0xc2ca17158b03f1f6L },
+ { 0xc9966c2d3eb81d82L,0xc02abf4a8f6df13eL,0x77b34bd78f72b43bL,
+ 0xaff6218f360c82b0L } },
+ /* 34 << 42 */
+ { { 0x0aa5726c8d55b9d2L,0xdc0adbe999e9bffbL,0x9097549cefb9e72aL,
+ 0x167557129dfb3111L },
+ { 0xdd8bf984f26847f9L,0xbcb8e387dfb30cb7L,0xc1fd32a75171ef9cL,
+ 0x977f3fc7389b363fL } },
+ /* 35 << 42 */
+ { { 0x116eaf2bf4babda0L,0xfeab68bdf7113c8eL,0xd1e3f064b7def526L,
+ 0x1ac30885e0b3fa02L },
+ { 0x1c5a6e7b40142d9dL,0x839b560330921c0bL,0x48f301fa36a116a3L,
+ 0x380e1107cfd9ee6dL } },
+ /* 36 << 42 */
+ { { 0x7945ead858854be1L,0x4111c12ecbd4d49dL,0xece3b1ec3a29c2efL,
+ 0x6356d4048d3616f5L },
+ { 0x9f0d6a8f594d320eL,0x0989316df651ccd2L,0x6c32117a0f8fdde4L,
+ 0x9abe5cc5a26a9bbcL } },
+ /* 37 << 42 */
+ { { 0xcff560fb9723f671L,0x21b2a12d7f3d593cL,0xe4cb18da24ba0696L,
+ 0x186e2220c3543384L },
+ { 0x722f64e088312c29L,0x94282a9917dc7752L,0x62467bbf5a85ee89L,
+ 0xf435c650f10076a0L } },
+ /* 38 << 42 */
+ { { 0xc9ff153943b3a50bL,0x7132130c1a53efbcL,0x31bfe063f7b0c5b7L,
+ 0xb0179a7d4ea994ccL },
+ { 0x12d064b3c85f455bL,0x472593288f6e0062L,0xf64e590bb875d6d9L,
+ 0x22dd6225ad92bcc7L } },
+ /* 39 << 42 */
+ { { 0xb658038eb9c3bd6dL,0x00cdb0d6fbba27c8L,0x0c6813371062c45dL,
+ 0xd8515b8c2d33407dL },
+ { 0xcb8f699e8cbb5ecfL,0x8c4347f8c608d7d8L,0x2c11850abb3e00dbL,
+ 0x20a8dafdecb49d19L } },
+ /* 40 << 42 */
+ { { 0xbd78148045ee2f40L,0x75e354af416b60cfL,0xde0b58a18d49a8c4L,
+ 0xe40e94e2fa359536L },
+ { 0xbd4fa59f62accd76L,0x05cf466a8c762837L,0xb5abda99448c277bL,
+ 0x5a9e01bf48b13740L } },
+ /* 41 << 42 */
+ { { 0x9d457798326aad8dL,0xbdef4954c396f7e7L,0x6fb274a2c253e292L,
+ 0x2800bf0a1cfe53e7L },
+ { 0x22426d3144438fd4L,0xef2339235e259f9aL,0x4188503c03f66264L,
+ 0x9e5e7f137f9fdfabL } },
+ /* 42 << 42 */
+ { { 0x565eb76c5fcc1abaL,0xea63254859b5bff8L,0x5587c087aab6d3faL,
+ 0x92b639ea6ce39c1bL },
+ { 0x0706e782953b135cL,0x7308912e425268efL,0x599e92c7090e7469L,
+ 0x83b90f529bc35e75L } },
+ /* 43 << 42 */
+ { { 0x4750b3d0244975b3L,0xf3a4435811965d72L,0x179c67749c8dc751L,
+ 0xff18cdfed23d9ff0L },
+ { 0xc40138332028e247L,0x96e280e2f3bfbc79L,0xf60417bdd0880a84L,
+ 0x263c9f3d2a568151L } },
+ /* 44 << 42 */
+ { { 0x36be15b32d2ce811L,0x846dc0c2f8291d21L,0x5cfa0ecb789fcfdbL,
+ 0x45a0beedd7535b9aL },
+ { 0xec8e9f0796d69af1L,0x31a7c5b8599ab6dcL,0xd36d45eff9e2e09fL,
+ 0x3cf49ef1dcee954bL } },
+ /* 45 << 42 */
+ { { 0x6be34cf3086cff9bL,0x88dbd49139a3360fL,0x1e96b8cc0dbfbd1dL,
+ 0xc1e5f7bfcb7e2552L },
+ { 0x0547b21428819d98L,0xc770dd9c7aea9dcbL,0xaef0d4c7041d68c8L,
+ 0xcc2b981813cb9ba8L } },
+ /* 46 << 42 */
+ { { 0x7fc7bc76fe86c607L,0x6b7b9337502a9a95L,0x1948dc27d14dab63L,
+ 0x249dd198dae047beL },
+ { 0xe8356584a981a202L,0x3531dd183a893387L,0x1be11f90c85c7209L,
+ 0x93d2fe1ee2a52b5aL } },
+ /* 47 << 42 */
+ { { 0x8225bfe2ec6d6b97L,0x9cf6d6f4bd0aa5deL,0x911459cb54779f5fL,
+ 0x5649cddb86aeb1f3L },
+ { 0x321335793f26ce5aL,0xc289a102550f431eL,0x559dcfda73b84c6fL,
+ 0x84973819ee3ac4d7L } },
+ /* 48 << 42 */
+ { { 0xb51e55e6f2606a82L,0xe25f706190f2fb57L,0xacef6c2ab1a4e37cL,
+ 0x864e359d5dcf2706L },
+ { 0x479e6b187ce57316L,0x2cab25003a96b23dL,0xed4898628ef16df7L,
+ 0x2056538cef3758b5L } },
+ /* 49 << 42 */
+ { { 0xa7df865ef15d3101L,0x80c5533a61b553d7L,0x366e19974ed14294L,
+ 0x6620741fb3c0bcd6L },
+ { 0x21d1d9c4edc45418L,0x005b859ec1cc4a9dL,0xdf01f630a1c462f0L,
+ 0x15d06cf3f26820c7L } },
+ /* 50 << 42 */
+ { { 0x9f7f24ee3484be47L,0x2ff33e964a0c902fL,0x00bdf4575a0bc453L,
+ 0x2378dfaf1aa238dbL },
+ { 0x272420ec856720f2L,0x2ad9d95b96797291L,0xd1242cc6768a1558L,
+ 0x2e287f8b5cc86aa8L } },
+ /* 51 << 42 */
+ { { 0x796873d0990cecaaL,0xade55f81675d4080L,0x2645eea321f0cd84L,
+ 0x7a1efa0fb4e17d02L },
+ { 0xf6858420037cc061L,0x682e05f0d5d43e12L,0x59c3699427218710L,
+ 0x85cbba4d3f7cd2fcL } },
+ /* 52 << 42 */
+ { { 0x726f97297a3cd22aL,0x9f8cd5dc4a628397L,0x17b93ab9c23165edL,
+ 0xff5f5dbf122823d4L },
+ { 0xc1e4e4b5654a446dL,0xd1a9496f677257baL,0x6387ba94de766a56L,
+ 0x23608bc8521ec74aL } },
+ /* 53 << 42 */
+ { { 0x16a522d76688c4d4L,0x9d6b428207373abdL,0xa62f07acb42efaa3L,
+ 0xf73e00f7e3b90180L },
+ { 0x36175fec49421c3eL,0xc4e44f9b3dcf2678L,0x76df436b7220f09fL,
+ 0x172755fb3aa8b6cfL } },
+ /* 54 << 42 */
+ { { 0xbab89d57446139ccL,0x0a0a6e025fe0208fL,0xcdbb63e211e5d399L,
+ 0x33ecaa12a8977f0bL },
+ { 0x59598b21f7c42664L,0xb3e91b32ab65d08aL,0x035822eef4502526L,
+ 0x1dcf0176720a82a9L } },
+ /* 55 << 42 */
+ { { 0x50f8598f3d589e02L,0xdf0478ffb1d63d2cL,0x8b8068bd1571cd07L,
+ 0x30c3aa4fd79670cdL },
+ { 0x25e8fd4b941ade7fL,0x3d1debdc32790011L,0x65b6dcbd3a3f9ff0L,
+ 0x282736a4793de69cL } },
+ /* 56 << 42 */
+ { { 0xef69a0c3d41d3bd3L,0xb533b8c907a26bdeL,0xe2801d97db2edf9fL,
+ 0xdc4a8269e1877af0L },
+ { 0x6c1c58513d590dbeL,0x84632f6bee4e9357L,0xd36d36b779b33374L,
+ 0xb46833e39bbca2e6L } },
+ /* 57 << 42 */
+ { { 0x37893913f7fc0586L,0x385315f766bf4719L,0x72c56293b31855dcL,
+ 0xd1416d4e849061feL },
+ { 0xbeb3ab7851047213L,0x447f6e61f040c996L,0xd06d310d638b1d0cL,
+ 0xe28a413fbad1522eL } },
+ /* 58 << 42 */
+ { { 0x685a76cb82003f86L,0x610d07f70bcdbca3L,0x6ff660219ca4c455L,
+ 0x7df39b87cea10eecL },
+ { 0xb9255f96e22db218L,0x8cc6d9eb08a34c44L,0xcd4ffb86859f9276L,
+ 0x8fa15eb250d07335L } },
+ /* 59 << 42 */
+ { { 0xdf553845cf2c24b5L,0x89f66a9f52f9c3baL,0x8f22b5b9e4a7ceb3L,
+ 0xaffef8090e134686L },
+ { 0x3e53e1c68eb8fac2L,0x93c1e4eb28aec98eL,0xb6b91ec532a43bcbL,
+ 0x2dbfa947b2d74a51L } },
+ /* 60 << 42 */
+ { { 0xe065d190ca84bad7L,0xfb13919fad58e65cL,0x3c41718bf1cb6e31L,
+ 0x688969f006d05c3fL },
+ { 0xd4f94ce721264d45L,0xfdfb65e97367532bL,0x5b1be8b10945a39dL,
+ 0x229f789c2b8baf3bL } },
+ /* 61 << 42 */
+ { { 0xd8f41f3e6f49f15dL,0x678ce828907f0792L,0xc69ace82fca6e867L,
+ 0x106451aed01dcc89L },
+ { 0x1bb4f7f019fc32d2L,0x64633dfcb00c52d2L,0x8f13549aad9ea445L,
+ 0x99a3bf50fb323705L } },
+ /* 62 << 42 */
+ { { 0x0c9625a2534d4dbcL,0x45b8f1d1c2a2fea3L,0x76ec21a1a530fc1aL,
+ 0x4bac9c2a9e5bd734L },
+ { 0x5996d76a7b4e3587L,0x0045cdee1182d9e3L,0x1aee24b91207f13dL,
+ 0x66452e9797345a41L } },
+ /* 63 << 42 */
+ { { 0x16e5b0549f950cd0L,0x9cc72fb1d7fdd075L,0x6edd61e766249663L,
+ 0xde4caa4df043cccbL },
+ { 0x11b1f57a55c7ac17L,0x779cbd441a85e24dL,0x78030f86e46081e7L,
+ 0xfd4a60328e20f643L } },
+ /* 64 << 42 */
+ { { 0xcc7a64880a750c0fL,0x39bacfe34e548e83L,0x3d418c760c110f05L,
+ 0x3e4daa4cb1f11588L },
+ { 0x2733e7b55ffc69ffL,0x46f147bc92053127L,0x885b2434d722df94L,
+ 0x6a444f65e6fc6b7cL } },
+ /* 0 << 49 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 49 */
+ { { 0x7a1a465ac3f16ea8L,0x115a461db2f1d11cL,0x4767dd956c68a172L,
+ 0x3392f2ebd13a4698L },
+ { 0xc7a99ccde526cdc7L,0x8e537fdc22292b81L,0x76d8cf69a6d39198L,
+ 0xffc5ff432446852dL } },
+ /* 2 << 49 */
+ { { 0x97b14f7ea90567e6L,0x513257b7b6ae5cb7L,0x85454a3c9f10903dL,
+ 0xd8d2c9ad69bc3724L },
+ { 0x38da93246b29cb44L,0xb540a21d77c8cbacL,0x9bbfe43501918e42L,
+ 0xfffa707a56c3614eL } },
+ /* 3 << 49 */
+ { { 0x0ce4e3f1d4e353b7L,0x062d8a14ef46b0a0L,0x6408d5ab574b73fdL,
+ 0xbc41d1c9d3273ffdL },
+ { 0x3538e1e76be77800L,0x71fe8b37c5655031L,0x1cd916216b9b331aL,
+ 0xad825d0bbb388f73L } },
+ /* 4 << 49 */
+ { { 0x56c2e05b1cb76219L,0x0ec0bf9171567e7eL,0xe7076f8661c4c910L,
+ 0xd67b085bbabc04d9L },
+ { 0x9fb904595e93a96aL,0x7526c1eafbdc249aL,0x0d44d367ecdd0bb7L,
+ 0x953999179dc0d695L } },
+ /* 5 << 49 */
+ { { 0x61360ee99e240d18L,0x057cdcacb4b94466L,0xe7667cd12fe5325cL,
+ 0x1fa297b521974e3bL },
+ { 0xfa4081e7db083d76L,0x31993be6f206bd15L,0x8949269b14c19f8cL,
+ 0x21468d72a9d92357L } },
+ /* 6 << 49 */
+ { { 0x2ccbc583a4c506ecL,0x957ed188d1acfe97L,0x8baed83312f1aea2L,
+ 0xef2a6cb48325362dL },
+ { 0x130dde428e195c43L,0xc842025a0e6050c6L,0x2da972a708686a5dL,
+ 0xb52999a1e508b4a8L } },
+ /* 7 << 49 */
+ { { 0xd9f090b910a5a8bdL,0xca91d249096864daL,0x8e6a93be3f67dbc1L,
+ 0xacae6fbaf5f4764cL },
+ { 0x1563c6e0d21411a0L,0x28fa787fda0a4ad8L,0xd524491c908c8030L,
+ 0x1257ba0e4c795f07L } },
+ /* 8 << 49 */
+ { { 0x83f49167ceca9754L,0x426d2cf64b7939a0L,0x2555e355723fd0bfL,
+ 0xa96e6d06c4f144e2L },
+ { 0x4768a8dd87880e61L,0x15543815e508e4d5L,0x09d7e772b1b65e15L,
+ 0x63439dd6ac302fa0L } },
+ /* 9 << 49 */
+ { { 0xb93f802fc14e35c2L,0x71735b7c4341333cL,0x03a2510416d4f362L,
+ 0x3f4d069bbf433c8eL },
+ { 0x0d83ae01f78f5a7cL,0x50a8ffbe7c4eed07L,0xc74f890676e10f83L,
+ 0x7d0809669ddaf8e1L } },
+ /* 10 << 49 */
+ { { 0xb11df8e1698e04ccL,0x877be203169005c8L,0x32749e8c4f3c6179L,
+ 0x2dbc9d0a7853fc05L },
+ { 0x187d4f939454d937L,0xe682ce9db4800e1bL,0xa9129ad8165e68e8L,
+ 0x0fe29735be7f785bL } },
+ /* 11 << 49 */
+ { { 0x5303f40c5b9e02b7L,0xa37c969235ee04e8L,0x5f46cc2034d6632bL,
+ 0x55ef72b296ac545bL },
+ { 0xabec5c1f7b91b062L,0x0a79e1c7bb33e821L,0xbb04b4283a9f4117L,
+ 0x0de1f28ffd2a475aL } },
+ /* 12 << 49 */
+ { { 0x31019ccf3a4434b4L,0xa34581111a7954dcL,0xa9dac80de34972a7L,
+ 0xb043d05474f6b8ddL },
+ { 0x021c319e11137b1aL,0x00a754ceed5cc03fL,0x0aa2c794cbea5ad4L,
+ 0x093e67f470c015b6L } },
+ /* 13 << 49 */
+ { { 0x72cdfee9c97e3f6bL,0xc10bcab4b6da7461L,0x3b02d2fcb59806b9L,
+ 0x85185e89a1de6f47L },
+ { 0x39e6931f0eb6c4d4L,0x4d4440bdd4fa5b04L,0x5418786e34be7eb8L,
+ 0x6380e5219d7259bcL } },
+ /* 14 << 49 */
+ { { 0x20ac0351d598d710L,0x272c4166cb3a4da4L,0xdb82fe1aca71de1fL,
+ 0x746e79f2d8f54b0fL },
+ { 0x6e7fc7364b573e9bL,0x75d03f46fd4b5040L,0x5c1cc36d0b98d87bL,
+ 0x513ba3f11f472da1L } },
+ /* 15 << 49 */
+ { { 0x79d0af26abb177ddL,0xf82ab5687891d564L,0x2b6768a972232173L,
+ 0xefbb3bb08c1f6619L },
+ { 0xb29c11dba6d18358L,0x519e2797b0916d3aL,0xd4dc18f09188e290L,
+ 0x648e86e398b0ca7fL } },
+ /* 16 << 49 */
+ { { 0x859d3145983c38b5L,0xb14f176c637abc8bL,0x2793fb9dcaff7be6L,
+ 0xebe5a55f35a66a5aL },
+ { 0x7cec1dcd9f87dc59L,0x7c595cd3fbdbf560L,0x5b543b2226eb3257L,
+ 0x69080646c4c935fdL } },
+ /* 17 << 49 */
+ { { 0x7f2e440381e9ede3L,0x243c3894caf6df0aL,0x7c605bb11c073b11L,
+ 0xcd06a541ba6a4a62L },
+ { 0x2916894949d4e2e5L,0x33649d074af66880L,0xbfc0c885e9a85035L,
+ 0xb4e52113fc410f4bL } },
+ /* 18 << 49 */
+ { { 0xdca3b70678a6513bL,0x92ea4a2a9edb1943L,0x02642216db6e2dd8L,
+ 0x9b45d0b49fd57894L },
+ { 0x114e70dbc69d11aeL,0x1477dd194c57595fL,0xbc2208b4ec77c272L,
+ 0x95c5b4d7db68f59cL } },
+ /* 19 << 49 */
+ { { 0xb8c4fc6342e532b7L,0x386ba4229ae35290L,0xfb5dda42d201ecbcL,
+ 0x2353dc8ba0e38fd6L },
+ { 0x9a0b85ea68f7e978L,0x96ec56822ad6d11fL,0x5e279d6ce5f6886dL,
+ 0xd3fe03cd3cb1914dL } },
+ /* 20 << 49 */
+ { { 0xfe541fa47ea67c77L,0x952bd2afe3ea810cL,0x791fef568d01d374L,
+ 0xa3a1c6210f11336eL },
+ { 0x5ad0d5a9c7ec6d79L,0xff7038af3225c342L,0x003c6689bc69601bL,
+ 0x25059bc745e8747dL } },
+ /* 21 << 49 */
+ { { 0xfa4965b2f2086fbfL,0xf6840ea686916078L,0xd7ac762070081d6cL,
+ 0xe600da31b5328645L },
+ { 0x01916f63529b8a80L,0xe80e48582d7d6f3eL,0x29eb0fe8d664ca7cL,
+ 0xf017637be7b43b0cL } },
+ /* 22 << 49 */
+ { { 0x9a75c80676cb2566L,0x8f76acb1b24892d9L,0x7ae7b9cc1f08fe45L,
+ 0x19ef73296a4907d8L },
+ { 0x2db4ab715f228bf0L,0xf3cdea39817032d7L,0x0b1f482edcabe3c0L,
+ 0x3baf76b4bb86325cL } },
+ /* 23 << 49 */
+ { { 0xd49065e010089465L,0x3bab5d298e77c596L,0x7636c3a6193dbd95L,
+ 0xdef5d294b246e499L },
+ { 0xb22c58b9286b2475L,0xa0b93939cd80862bL,0x3002c83af0992388L,
+ 0x6de01f9beacbe14cL } },
+ /* 24 << 49 */
+ { { 0x6aac688eadd70482L,0x708de92a7b4a4e8aL,0x75b6dd73758a6eefL,
+ 0xea4bf352725b3c43L },
+ { 0x10041f2c87912868L,0xb1b1be95ef09297aL,0x19ae23c5a9f3860aL,
+ 0xc4f0f839515dcf4bL } },
+ /* 25 << 49 */
+ { { 0x3c7ecca397f6306aL,0x744c44ae68a3a4b0L,0x69cd13a0b3a1d8a2L,
+ 0x7cad0a1e5256b578L },
+ { 0xea653fcd33791d9eL,0x9cc2a05d74b2e05fL,0x73b391dcfd7affa2L,
+ 0xddb7091eb6b05442L } },
+ /* 26 << 49 */
+ { { 0xc71e27bf8538a5c6L,0x195c63dd89abff17L,0xfd3152851b71e3daL,
+ 0x9cbdfda7fa680fa0L },
+ { 0x9db876ca849d7eabL,0xebe2764b3c273271L,0x663357e3f208dceaL,
+ 0x8c5bd833565b1b70L } },
+ /* 27 << 49 */
+ { { 0xccc3b4f59837fc0dL,0x9b641ba8a79cf00fL,0x7428243ddfdf3990L,
+ 0x83a594c4020786b1L },
+ { 0xb712451a526c4502L,0x9d39438e6adb3f93L,0xfdb261e3e9ff0ccdL,
+ 0x80344e3ce07af4c3L } },
+ /* 28 << 49 */
+ { { 0x75900d7c2fa4f126L,0x08a3b8655c99a232L,0x2478b6bfdb25e0c3L,
+ 0x482cc2c271db2edfL },
+ { 0x37df7e645f321bb8L,0x8a93821b9a8005b4L,0x3fa2f10ccc8c1958L,
+ 0x0d3322182c269d0aL } },
+ /* 29 << 49 */
+ { { 0x20ab8119e246b0e6L,0xb39781e4d349fd17L,0xd293231eb31aa100L,
+ 0x4b779c97bb032168L },
+ { 0x4b3f19e1c8470500L,0x45b7efe90c4c869dL,0xdb84f38aa1a6bbccL,
+ 0x3b59cb15b2fddbc1L } },
+ /* 30 << 49 */
+ { { 0xba5514df3fd165e8L,0x499fd6a9061f8811L,0x72cd1fe0bfef9f00L,
+ 0x120a4bb979ad7e8aL },
+ { 0xf2ffd0955f4a5ac5L,0xcfd174f195a7a2f0L,0xd42301ba9d17baf1L,
+ 0xd2fa487a77f22089L } },
+ /* 31 << 49 */
+ { { 0x9cb09efeb1dc77e1L,0xe956693921c99682L,0x8c5469016c6067bbL,
+ 0xfd37857461c24456L },
+ { 0x2b6a6cbe81796b33L,0x62d550f658e87f8bL,0x1b763e1c7f1b01b4L,
+ 0x4b93cfea1b1b5e12L } },
+ /* 32 << 49 */
+ { { 0xb93452381d531696L,0x57201c0088cdde69L,0xdde922519a86afc7L,
+ 0xe3043895bd35cea8L },
+ { 0x7608c1e18555970dL,0x8267dfa92535935eL,0xd4c60a57322ea38bL,
+ 0xe0bf7977804ef8b5L } },
+ /* 33 << 49 */
+ { { 0x1a0dab28c06fece4L,0xd405991e94e7b49dL,0xc542b6d2706dab28L,
+ 0xcb228da3a91618fbL },
+ { 0x224e4164107d1ceaL,0xeb9fdab3d0f5d8f1L,0xc02ba3860d6e41cdL,
+ 0x676a72c59b1f7146L } },
+ /* 34 << 49 */
+ { { 0xffd6dd984d6cb00bL,0xcef9c5cade2e8d7cL,0xa1bbf5d7641c7936L,
+ 0x1b95b230ee8f772eL },
+ { 0xf765a92ee8ac25b1L,0xceb04cfc3a18b7c6L,0x27944cef0acc8966L,
+ 0xcbb3c957434c1004L } },
+ /* 35 << 49 */
+ { { 0x9c9971a1a43ff93cL,0x5bc2db17a1e358a9L,0x45b4862ea8d9bc82L,
+ 0x70ebfbfb2201e052L },
+ { 0xafdf64c792871591L,0xea5bcae6b42d0219L,0xde536c552ad8f03cL,
+ 0xcd6c3f4da76aa33cL } },
+ /* 36 << 49 */
+ { { 0xbeb5f6230bca6de3L,0xdd20dd99b1e706fdL,0x90b3ff9dac9059d4L,
+ 0x2d7b29027ccccc4eL },
+ { 0x8a090a59ce98840fL,0xa5d947e08410680aL,0x49ae346a923379a5L,
+ 0x7dbc84f9b28a3156L } },
+ /* 37 << 49 */
+ { { 0xfd40d91654a1aff2L,0xabf318ba3a78fb9bL,0x50152ed83029f95eL,
+ 0x9fc1dd77c58ad7faL },
+ { 0x5fa5791513595c17L,0xb95046688f62b3a9L,0x907b5b24ff3055b0L,
+ 0x2e995e359a84f125L } },
+ /* 38 << 49 */
+ { { 0x87dacf697e9bbcfbL,0x95d0c1d6e86d96e3L,0x65726e3c2d95a75cL,
+ 0x2c3c9001acd27f21L },
+ { 0x1deab5616c973f57L,0x108b7e2ca5221643L,0x5fee9859c4ef79d4L,
+ 0xbd62b88a40d4b8c6L } },
+ /* 39 << 49 */
+ { { 0xb4dd29c4197c75d6L,0x266a6df2b7076febL,0x9512d0ea4bf2df11L,
+ 0x1320c24f6b0cc9ecL },
+ { 0x6bb1e0e101a59596L,0x8317c5bbeff9aaacL,0x65bb405e385aa6c9L,
+ 0x613439c18f07988fL } },
+ /* 40 << 49 */
+ { { 0xd730049f16a66e91L,0xe97f2820fa1b0e0dL,0x4131e003304c28eaL,
+ 0x820ab732526bac62L },
+ { 0xb2ac9ef928714423L,0x54ecfffaadb10cb2L,0x8781476ef886a4ccL,
+ 0x4b2c87b5db2f8d49L } },
+ /* 41 << 49 */
+ { { 0xe857cd200a44295dL,0x707d7d2158c6b044L,0xae8521f9f596757cL,
+ 0x87448f0367b2b714L },
+ { 0x13a9bc455ebcd58dL,0x79bcced99122d3c1L,0x3c6442479e076642L,
+ 0x0cf227782df4767dL } },
+ /* 42 << 49 */
+ { { 0x5e61aee471d444b6L,0x211236bfc5084a1dL,0x7e15bc9a4fd3eaf6L,
+ 0x68df2c34ab622bf5L },
+ { 0x9e674f0f59bf4f36L,0xf883669bd7f34d73L,0xc48ac1b831497b1dL,
+ 0x323b925d5106703bL } },
+ /* 43 << 49 */
+ { { 0x22156f4274082008L,0xeffc521ac8482bcbL,0x5c6831bf12173479L,
+ 0xcaa2528fc4739490L },
+ { 0x84d2102a8f1b3c4dL,0xcf64dfc12d9bec0dL,0x433febad78a546efL,
+ 0x1f621ec37b73cef1L } },
+ /* 44 << 49 */
+ { { 0x6aecd62737338615L,0x162082ab01d8edf6L,0x833a811919e86b66L,
+ 0x6023a251d299b5dbL },
+ { 0xf5bb0c3abbf04b89L,0x6735eb69ae749a44L,0xd0e058c54713de3bL,
+ 0xfdf2593e2c3d4ccdL } },
+ /* 45 << 49 */
+ { { 0x1b8f414efdd23667L,0xdd52aacafa2015eeL,0x3e31b517bd9625ffL,
+ 0x5ec9322d8db5918cL },
+ { 0xbc73ac85a96f5294L,0x82aa5bf361a0666aL,0x49755810bf08ac42L,
+ 0xd21cdfd5891cedfcL } },
+ /* 46 << 49 */
+ { { 0x918cb57b67f8be10L,0x365d1a7c56ffa726L,0x2435c5046532de93L,
+ 0xc0fc5e102674cd02L },
+ { 0x6e51fcf89cbbb142L,0x1d436e5aafc50692L,0x766bffff3fbcae22L,
+ 0x3148c2fdfd55d3b8L } },
+ /* 47 << 49 */
+ { { 0x52c7fdc9233222faL,0x89ff1092e419fb6bL,0x3cd6db9925254977L,
+ 0x2e85a1611cf12ca7L },
+ { 0xadd2547cdc810bc9L,0xea3f458f9d257c22L,0x642c1fbe27d6b19bL,
+ 0xed07e6b5140481a6L } },
+ /* 48 << 49 */
+ { { 0x6ada1d4286d2e0f8L,0xe59201220e8a9fd5L,0x02c936af708c1b49L,
+ 0x60f30fee2b4bfaffL },
+ { 0x6637ad06858e6a61L,0xce4c77673fd374d0L,0x39d54b2d7188defbL,
+ 0xa8c9d250f56a6b66L } },
+ /* 49 << 49 */
+ { { 0x58fc0f5eb24fe1dcL,0x9eaf9dee6b73f24cL,0xa90d588b33650705L,
+ 0xde5b62c5af2ec729L },
+ { 0x5c72cfaed3c2b36eL,0x868c19d5034435daL,0x88605f93e17ee145L,
+ 0xaa60c4ee77a5d5b1L } },
+ /* 50 << 49 */
+ { { 0xbcf5bfd23b60c472L,0xaf4ef13ceb1d3049L,0x373f44fce13895c9L,
+ 0xf29b382f0cbc9822L },
+ { 0x1bfcb85373efaef6L,0xcf56ac9ca8c96f40L,0xd7adf1097a191e24L,
+ 0x98035f44bf8a8dc2L } },
+ /* 51 << 49 */
+ { { 0xf40a71b91e750c84L,0xc57f7b0c5dc6c469L,0x49a0e79c6fbc19c1L,
+ 0x6b0f5889a48ebdb8L },
+ { 0x5d3fd084a07c4e9fL,0xc3830111ab27de14L,0x0e4929fe33e08dccL,
+ 0xf4a5ad2440bb73a3L } },
+ /* 52 << 49 */
+ { { 0xde86c2bf490f97caL,0x288f09c667a1ce18L,0x364bb8861844478dL,
+ 0x7840fa42ceedb040L },
+ { 0x1269fdd25a631b37L,0x94761f1ea47c8b7dL,0xfc0c2e17481c6266L,
+ 0x85e16ea23daa5fa7L } },
+ /* 53 << 49 */
+ { { 0xccd8603392491048L,0x0c2f6963f4d402d7L,0x6336f7dfdf6a865cL,
+ 0x0a2a463cb5c02a87L },
+ { 0xb0e29be7bf2f12eeL,0xf0a2200266bad988L,0x27f87e039123c1d7L,
+ 0x21669c55328a8c98L } },
+ /* 54 << 49 */
+ { { 0x186b980392f14529L,0xd3d056cc63954df3L,0x2f03fd58175a46f6L,
+ 0x63e34ebe11558558L },
+ { 0xe13fedee5b80cfa5L,0xe872a120d401dbd1L,0x52657616e8a9d667L,
+ 0xbc8da4b6e08d6693L } },
+ /* 55 << 49 */
+ { { 0x370fb9bb1b703e75L,0x6773b186d4338363L,0x18dad378ecef7bffL,
+ 0xaac787ed995677daL },
+ { 0x4801ea8b0437164bL,0xf430ad2073fe795eL,0xb164154d8ee5eb73L,
+ 0x0884ecd8108f7c0eL } },
+ /* 56 << 49 */
+ { { 0x0e6ec0965f520698L,0x640631fe44f7b8d9L,0x92fd34fca35a68b9L,
+ 0x9c5a4b664d40cf4eL },
+ { 0x949454bf80b6783dL,0x80e701fe3a320a10L,0x8d1a564a1a0a39b2L,
+ 0x1436d53d320587dbL } },
+ /* 57 << 49 */
+ { { 0xf5096e6d6556c362L,0xbc23a3c0e2455d7eL,0x3a7aee54807230f9L,
+ 0x9ba1cfa622ae82fdL },
+ { 0x833a057a99c5d706L,0x8be85f4b842315c9L,0xd083179a66a72f12L,
+ 0x2fc77d5dcdcc73cdL } },
+ /* 58 << 49 */
+ { { 0x22b88a805616ee30L,0xfb09548fe7ab1083L,0x8ad6ab0d511270cdL,
+ 0x61f6c57a6924d9abL },
+ { 0xa0f7bf7290aecb08L,0x849f87c90df784a4L,0x27c79c15cfaf1d03L,
+ 0xbbf9f675c463faceL } },
+ /* 59 << 49 */
+ { { 0x91502c65765ba543L,0x18ce3cac42ea60ddL,0xe5cee6ac6e43ecb3L,
+ 0x63e4e91068f2aeebL },
+ { 0x26234fa3c85932eeL,0x96883e8b4c90c44dL,0x29b9e738a18a50f6L,
+ 0xbfc62b2a3f0420dfL } },
+ /* 60 << 49 */
+ { { 0xd22a7d906d3e1fa9L,0x17115618fe05b8a3L,0x2a0c9926bb2b9c01L,
+ 0xc739fcc6e07e76a2L },
+ { 0x540e9157165e439aL,0x06353a626a9063d8L,0x84d9559461e927a3L,
+ 0x013b9b26e2e0be7fL } },
+ /* 61 << 49 */
+ { { 0x4feaec3b973497f1L,0x15c0f94e093ebc2dL,0x6af5f22733af0583L,
+ 0x0c2af206c61f3340L },
+ { 0xd25dbdf14457397cL,0x2e8ed017cabcbae0L,0xe3010938c2815306L,
+ 0xbaa99337e8c6cd68L } },
+ /* 62 << 49 */
+ { { 0x085131823b0ec7deL,0x1e1b822b58df05dfL,0x5c14842fa5c3b683L,
+ 0x98fe977e3eba34ceL },
+ { 0xfd2316c20d5e8873L,0xe48d839abd0d427dL,0x495b2218623fc961L,
+ 0x24ee56e7b46fba5eL } },
+ /* 63 << 49 */
+ { { 0x9184a55b91e4de58L,0xa7488ca5dfdea288L,0xa723862ea8dcc943L,
+ 0x92d762b2849dc0fcL },
+ { 0x3c444a12091ff4a9L,0x581113fa0cada274L,0xb9de0a4530d8eae2L,
+ 0x5e0fcd85df6b41eaL } },
+ /* 64 << 49 */
+ { { 0x6233ea68c094dbb5L,0xb77d062ed968d410L,0x3e719bbc58b3002dL,
+ 0x68e7dd3d3dc49d58L },
+ { 0x8d825740013a5e58L,0x213117473c9e3c1bL,0x0cb0a2a77c99b6abL,
+ 0x5c48a3b3c2f888f2L } },
+ /* 0 << 56 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 56 */
+ { { 0xc7913e91991724f3L,0x5eda799c39cbd686L,0xddb595c763d4fc1eL,
+ 0x6b63b80bac4fed54L },
+ { 0x6ea0fc697e5fb516L,0x737708bad0f1c964L,0x9628745f11a92ca5L,
+ 0x61f379589a86967aL } },
+ /* 2 << 56 */
+ { { 0x9af39b2caa665072L,0x78322fa4efd324efL,0x3d153394c327bd31L,
+ 0x81d5f2713129dab0L },
+ { 0xc72e0c42f48027f5L,0xaa40cdbc8536e717L,0xf45a657a2d369d0fL,
+ 0xb03bbfc4ea7f74e6L } },
+ /* 3 << 56 */
+ { { 0x46a8c4180d738dedL,0x6f1a5bb0e0de5729L,0xf10230b98ba81675L,
+ 0x32c6f30c112b33d4L },
+ { 0x7559129dd8fffb62L,0x6a281b47b459bf05L,0x77c1bd3afa3b6776L,
+ 0x0709b3807829973aL } },
+ /* 4 << 56 */
+ { { 0x8c26b232a3326505L,0x38d69272ee1d41bfL,0x0459453effe32afaL,
+ 0xce8143ad7cb3ea87L },
+ { 0x932ec1fa7e6ab666L,0x6cd2d23022286264L,0x459a46fe6736f8edL,
+ 0x50bf0d009eca85bbL } },
+ /* 5 << 56 */
+ { { 0x0b825852877a21ecL,0x300414a70f537a94L,0x3f1cba4021a9a6a2L,
+ 0x50824eee76943c00L },
+ { 0xa0dbfcecf83cba5dL,0xf953814893b4f3c0L,0x6174416248f24dd7L,
+ 0x5322d64de4fb09ddL } },
+ /* 6 << 56 */
+ { { 0x574473843d9325f3L,0xa9bef2d0f371cb84L,0x77d2188ba61e36c5L,
+ 0xbbd6a7d7c602df72L },
+ { 0xba3aa9028f61bc0bL,0xf49085ed6ed0b6a1L,0x8bc625d6ae6e8298L,
+ 0x832b0b1da2e9c01dL } },
+ /* 7 << 56 */
+ { { 0xa337c447f1f0ced1L,0x800cc7939492dd2bL,0x4b93151dbea08efaL,
+ 0x820cf3f8de0a741eL },
+ { 0xff1982dc1c0f7d13L,0xef92196084dde6caL,0x1ad7d97245f96ee3L,
+ 0x319c8dbe29dea0c7L } },
+ /* 8 << 56 */
+ { { 0xd3ea38717b82b99bL,0x75922d4d470eb624L,0x8f66ec543b95d466L,
+ 0x66e673ccbee1e346L },
+ { 0x6afe67c4b5f2b89aL,0x3de9c1e6290e5cd3L,0x8c278bb6310a2adaL,
+ 0x420fa3840bdb323bL } },
+ /* 9 << 56 */
+ { { 0x0ae1d63b0eb919b0L,0xd74ee51da74b9620L,0x395458d0a674290cL,
+ 0x324c930f4620a510L },
+ { 0x2d1f4d19fbac27d4L,0x4086e8ca9bedeeacL,0x0cdd211b9b679ab8L,
+ 0x5970167d7090fec4L } },
+ /* 10 << 56 */
+ { { 0x3420f2c9faf1fc63L,0x616d333a328c8bb4L,0x7d65364c57f1fe4aL,
+ 0x9343e87755e5c73aL },
+ { 0x5795176be970e78cL,0xa36ccebf60533627L,0xfc7c738009cdfc1bL,
+ 0xb39a2afeb3fec326L } },
+ /* 11 << 56 */
+ { { 0xb7ff1ba16224408aL,0xcc856e92247cfc5eL,0x01f102e7c18bc493L,
+ 0x4613ab742091c727L },
+ { 0xaa25e89cc420bf2bL,0x00a5317690337ec2L,0xd2be9f437d025fc7L,
+ 0x3316fb856e6fe3dcL } },
+ /* 12 << 56 */
+ { { 0x27520af59ac50814L,0xfdf95e789a8e4223L,0xb7e7df2a56bec5a0L,
+ 0xf7022f7ddf159e5dL },
+ { 0x93eeeab1cac1fe8fL,0x8040188c37451168L,0x7ee8aa8ad967dce6L,
+ 0xfa0e79e73abc9299L } },
+ /* 13 << 56 */
+ { { 0x67332cfc2064cfd1L,0x339c31deb0651934L,0x719b28d52a3bcbeaL,
+ 0xee74c82b9d6ae5c6L },
+ { 0x0927d05ebaf28ee6L,0x82cecf2c9d719028L,0x0b0d353eddb30289L,
+ 0xfe4bb977fddb2e29L } },
+ /* 14 << 56 */
+ { { 0xbb5bb990640bfd9eL,0xd226e27782f62108L,0x4bf0098502ffdd56L,
+ 0x7756758a2ca1b1b5L },
+ { 0xc32b62a35285fe91L,0xedbc546a8c9cd140L,0x1e47a013af5cb008L,
+ 0xbca7e720073ce8f2L } },
+ /* 15 << 56 */
+ { { 0xe10b2ab817a91caeL,0xb89aab6508e27f63L,0x7b3074a7dba3ddf9L,
+ 0x1c20ce09330c2972L },
+ { 0x6b9917b45fcf7e33L,0xe6793743945ceb42L,0x18fc22155c633d19L,
+ 0xad1adb3cc7485474L } },
+ /* 16 << 56 */
+ { { 0x646f96796424c49bL,0xf888dfe867c241c9L,0xe12d4b9324f68b49L,
+ 0x9a6b62d8a571df20L },
+ { 0x81b4b26d179483cbL,0x666f96329511fae2L,0xd281b3e4d53aa51fL,
+ 0x7f96a7657f3dbd16L } },
+ /* 17 << 56 */
+ { { 0xa7f8b5bf074a30ceL,0xd7f52107005a32e6L,0x6f9e090750237ed4L,
+ 0x2f21da478096fa2bL },
+ { 0xf3e19cb4eec863a0L,0xd18f77fd9527620aL,0x9505c81c407c1cf8L,
+ 0x9998db4e1b6ec284L } },
+ /* 18 << 56 */
+ { { 0x7e3389e5c247d44dL,0x125071413f4f3d80L,0xd4ba01104a78a6c7L,
+ 0x312874a0767720beL },
+ { 0xded059a675944370L,0xd6123d903b2c0bddL,0xa56b717b51c108e3L,
+ 0x9bb7940e070623e9L } },
+ /* 19 << 56 */
+ { { 0x794e2d5984ac066cL,0xf5954a92e68c69a0L,0x28c524584fd99dccL,
+ 0x60e639fcb1012517L },
+ { 0xc2e601257de79248L,0xe9ef6404f12fc6d7L,0x4c4f28082a3b5d32L,
+ 0x865ad32ec768eb8aL } },
+ /* 20 << 56 */
+ { { 0xac02331b13fb70b6L,0x037b44c195599b27L,0x1a860fc460bd082cL,
+ 0xa2e25745c980cd01L },
+ { 0xee3387a81da0263eL,0x931bfb952d10f3d6L,0x5b687270a1f24a32L,
+ 0xf140e65dca494b86L } },
+ /* 21 << 56 */
+ { { 0x4f4ddf91b2f1ac7aL,0xf99eaabb760fee27L,0x57f4008a49c228e5L,
+ 0x090be4401cf713bbL },
+ { 0xac91fbe45004f022L,0xd838c2c2569e1af6L,0xd6c7d20b0f1daaa5L,
+ 0xaa063ac11bbb02c0L } },
+ /* 22 << 56 */
+ { { 0x0938a42259558a78L,0x5343c6698435da2fL,0x96f67b18034410dcL,
+ 0x7cc1e42484510804L },
+ { 0x86a1543f16dfbb7dL,0x921fa9425b5bd592L,0x9dcccb6eb33dd03cL,
+ 0x8581ddd9b843f51eL } },
+ /* 23 << 56 */
+ { { 0x54935fcb81d73c9eL,0x6d07e9790a5e97abL,0x4dc7b30acf3a6babL,
+ 0x147ab1f3170bee11L },
+ { 0x0aaf8e3d9fafdee4L,0xfab3dbcb538a8b95L,0x405df4b36ef13871L,
+ 0xf1f4e9cb088d5a49L } },
+ /* 24 << 56 */
+ { { 0x9bcd24d366b33f1dL,0x3b97b8205ce445c0L,0xe2926549ba93ff61L,
+ 0xd9c341ce4dafe616L },
+ { 0xfb30a76e16efb6f3L,0xdf24b8ca605b953cL,0x8bd52afec2fffb9fL,
+ 0xbbac5ff7e19d0b96L } },
+ /* 25 << 56 */
+ { { 0x43c01b87459afccdL,0x6bd45143b7432652L,0x8473453055b5d78eL,
+ 0x81088fdb1554ba7dL },
+ { 0xada0a52c1e269375L,0xf9f037c42dc5ec10L,0xc066060794bfbc11L,
+ 0xc0a630bbc9c40d2fL } },
+ /* 26 << 56 */
+ { { 0x5efc797eab64c31eL,0xffdb1dab74507144L,0xf61242871ca6790cL,
+ 0xe9609d81e69bf1bfL },
+ { 0xdb89859500d24fc9L,0x9c750333e51fb417L,0x51830a91fef7bbdeL,
+ 0x0ce67dc8945f585cL } },
+ /* 27 << 56 */
+ { { 0x9a730ed44763eb50L,0x24a0e221c1ab0d66L,0x643b6393648748f3L,
+ 0x1982daa16d3c6291L },
+ { 0x6f00a9f78bbc5549L,0x7a1783e17f36384eL,0xe8346323de977f50L,
+ 0x91ab688db245502aL } },
+ /* 28 << 56 */
+ { { 0x331ab6b56d0bdd66L,0x0a6ef32e64b71229L,0x1028150efe7c352fL,
+ 0x27e04350ce7b39d3L },
+ { 0x2a3c8acdc1070c82L,0xfb2034d380c9feefL,0x2d729621709f3729L,
+ 0x8df290bf62cb4549L } },
+ /* 29 << 56 */
+ { { 0x02f99f33fc2e4326L,0x3b30076d5eddf032L,0xbb21f8cf0c652fb5L,
+ 0x314fb49eed91cf7bL },
+ { 0xa013eca52f700750L,0x2b9e3c23712a4575L,0xe5355557af30fbb0L,
+ 0x1ada35167c77e771L } },
+ /* 30 << 56 */
+ { { 0x45f6ecb27b135670L,0xe85d19df7cfc202eL,0x0f1b50c758d1be9fL,
+ 0x5ebf2c0aead2e344L },
+ { 0x1531fe4eabc199c9L,0xc703259256bab0aeL,0x16ab2e486c1fec54L,
+ 0x0f87fda804280188L } },
+ /* 31 << 56 */
+ { { 0xdc9f46fc609e4a74L,0x2a44a143ba667f91L,0xbc3d8b95b4d83436L,
+ 0xa01e4bd0c7bd2958L },
+ { 0x7b18293273483c90L,0xa79c6aa1a7c7b598L,0xbf3983c6eaaac07eL,
+ 0x8f18181e96e0d4e6L } },
+ /* 32 << 56 */
+ { { 0x8553d37c051af62bL,0xe9a998eb0bf94496L,0xe0844f9fb0d59aa1L,
+ 0x983fd558e6afb813L },
+ { 0x9670c0ca65d69804L,0x732b22de6ea5ff2dL,0xd7640ba95fd8623bL,
+ 0x9f619163a6351782L } },
+ /* 33 << 56 */
+ { { 0x0bfc27eeacee5043L,0xae419e732eb10f02L,0x19c028d18943fb05L,
+ 0x71f01cf7ff13aa2aL },
+ { 0x7790737e8887a132L,0x6751330966318410L,0x9819e8a37ddb795eL,
+ 0xfecb8ef5dad100b2L } },
+ /* 34 << 56 */
+ { { 0x59f74a223021926aL,0xb7c28a496f9b4c1cL,0xed1a733f912ad0abL,
+ 0x42a910af01a5659cL },
+ { 0x3842c6e07bd68cabL,0x2b57fa3876d70ac8L,0x8a6707a83c53aaebL,
+ 0x62c1c51065b4db18L } },
+ /* 35 << 56 */
+ { { 0x8de2c1fbb2d09dc7L,0xc3dfed12266bd23bL,0x927d039bd5b27db6L,
+ 0x2fb2f0f1103243daL },
+ { 0xf855a07b80be7399L,0xed9327ce1f9f27a8L,0xa0bd99c7729bdef7L,
+ 0x2b67125e28250d88L } },
+ /* 36 << 56 */
+ { { 0x784b26e88670ced7L,0xe3dfe41fc31bd3b4L,0x9e353a06bcc85cbcL,
+ 0x302e290960178a9dL },
+ { 0x860abf11a6eac16eL,0x76447000aa2b3aacL,0x46ff9d19850afdabL,
+ 0x35bdd6a5fdb2d4c1L } },
+ /* 37 << 56 */
+ { { 0xe82594b07e5c9ce9L,0x0f379e5320af346eL,0x608b31e3bc65ad4aL,
+ 0x710c6b12267c4826L },
+ { 0x51c966f971954cf1L,0xb1cec7930d0aa215L,0x1f15598986bd23a8L,
+ 0xae2ff99cf9452e86L } },
+ /* 38 << 56 */
+ { { 0xd8dd953c340ceaa2L,0x263552752e2e9333L,0x15d4e5f98586f06dL,
+ 0xd6bf94a8f7cab546L },
+ { 0x33c59a0ab76a9af0L,0x52740ab3ba095af7L,0xc444de8a24389ca0L,
+ 0xcc6f9863706da0cbL } },
+ /* 39 << 56 */
+ { { 0xb5a741a76b2515cfL,0x71c416019585c749L,0x78350d4fe683de97L,
+ 0x31d6152463d0b5f5L },
+ { 0x7a0cc5e1fbce090bL,0xaac927edfbcb2a5bL,0xe920de4920d84c35L,
+ 0x8c06a0b622b4de26L } },
+ /* 40 << 56 */
+ { { 0xd34dd58bafe7ddf3L,0x55851fedc1e6e55bL,0xd1395616960696e7L,
+ 0x940304b25f22705fL },
+ { 0x6f43f861b0a2a860L,0xcf1212820e7cc981L,0x121862120ab64a96L,
+ 0x09215b9ab789383cL } },
+ /* 41 << 56 */
+ { { 0x311eb30537387c09L,0xc5832fcef03ee760L,0x30358f5832f7ea19L,
+ 0xe01d3c3491d53551L },
+ { 0x1ca5ee41da48ea80L,0x34e71e8ecf4fa4c1L,0x312abd257af1e1c7L,
+ 0xe3afcdeb2153f4a5L } },
+ /* 42 << 56 */
+ { { 0x9d5c84d700235e9aL,0x0308d3f48c4c836fL,0xc0a66b0489332de5L,
+ 0x610dd39989e566efL },
+ { 0xf8eea460d1ac1635L,0x84cbb3fb20a2c0dfL,0x40afb488e74a48c5L,
+ 0x29738198d326b150L } },
+ /* 43 << 56 */
+ { { 0x2a17747fa6d74081L,0x60ea4c0555a26214L,0x53514bb41f88c5feL,
+ 0xedd645677e83426cL },
+ { 0xd5d6cbec96460b25L,0xa12fd0ce68dc115eL,0xc5bc3ed2697840eaL,
+ 0x969876a8a6331e31L } },
+ /* 44 << 56 */
+ { { 0x60c36217472ff580L,0xf42297054ad41393L,0x4bd99ef0a03b8b92L,
+ 0x501c7317c144f4f6L },
+ { 0x159009b318464945L,0x6d5e594c74c5c6beL,0x2d587011321a3660L,
+ 0xd1e184b13898d022L } },
+ /* 45 << 56 */
+ { { 0x5ba047524c6a7e04L,0x47fa1e2b45550b65L,0x9419daf048c0a9a5L,
+ 0x663629537c243236L },
+ { 0xcd0744b15cb12a88L,0x561b6f9a2b646188L,0x599415a566c2c0c0L,
+ 0xbe3f08590f83f09aL } },
+ /* 46 << 56 */
+ { { 0x9141c5beb92041b8L,0x01ae38c726477d0dL,0xca8b71f3d12c7a94L,
+ 0xfab5b31f765c70dbL },
+ { 0x76ae7492487443e9L,0x8595a310990d1349L,0xf8dbeda87d460a37L,
+ 0x7f7ad0821e45a38fL } },
+ /* 47 << 56 */
+ { { 0xed1d4db61059705aL,0xa3dd492ae6b9c697L,0x4b92ee3a6eb38bd5L,
+ 0xbab2609d67cc0bb7L },
+ { 0x7fc4fe896e70ee82L,0xeff2c56e13e6b7e3L,0x9b18959e34d26fcaL,
+ 0x2517ab66889d6b45L } },
+ /* 48 << 56 */
+ { { 0xf167b4e0bdefdd4fL,0x69958465f366e401L,0x5aa368aba73bbec0L,
+ 0x121487097b240c21L },
+ { 0x378c323318969006L,0xcb4d73cee1fe53d1L,0x5f50a80e130c4361L,
+ 0xd67f59517ef5212bL } },
+ /* 49 << 56 */
+ { { 0xf145e21e9e70c72eL,0xb2e52e295566d2fbL,0x44eaba4a032397f5L,
+ 0x5e56937b7e31a7deL },
+ { 0x68dcf517456c61e1L,0xbc2e954aa8b0a388L,0xe3552fa760a8b755L,
+ 0x03442dae73ad0cdeL } },
+ /* 50 << 56 */
+ { { 0x37ffe747ceb26210L,0x983545e8787baef9L,0x8b8c853586a3de31L,
+ 0xc621dbcbfacd46dbL },
+ { 0x82e442e959266fbbL,0xa3514c37339d471cL,0x3a11b77162cdad96L,
+ 0xf0cb3b3cecf9bdf0L } },
+ /* 51 << 56 */
+ { { 0x3fcbdbce478e2135L,0x7547b5cfbda35342L,0xa97e81f18a677af6L,
+ 0xc8c2bf8328817987L },
+ { 0xdf07eaaf45580985L,0xc68d1f05c93b45cbL,0x106aa2fec77b4cacL,
+ 0x4c1d8afc04a7ae86L } },
+ /* 52 << 56 */
+ { { 0xdb41c3fd9eb45ab2L,0x5b234b5bd4b22e74L,0xda253decf215958aL,
+ 0x67e0606ea04edfa0L },
+ { 0xabbbf070ef751b11L,0xf352f175f6f06dceL,0xdfc4b6af6839f6b4L,
+ 0x53ddf9a89959848eL } },
+ /* 53 << 56 */
+ { { 0xda49c379c21520b0L,0x90864ff0dbd5d1b6L,0x2f055d235f49c7f7L,
+ 0xe51e4e6aa796b2d8L },
+ { 0xc361a67f5c9dc340L,0x5ad53c37bca7c620L,0xda1d658832c756d0L,
+ 0xad60d9118bb67e13L } },
+ /* 54 << 56 */
+ { { 0xd6c47bdf0eeec8c6L,0x4a27fec1078a1821L,0x081f7415c3099524L,
+ 0x8effdf0b82cd8060L },
+ { 0xdb70ec1c65842df8L,0x8821b358d319a901L,0x72ee56eede42b529L,
+ 0x5bb39592236e4286L } },
+ /* 55 << 56 */
+ { { 0xd1183316fd6f7140L,0xf9fadb5bbd8e81f7L,0x701d5e0c5a02d962L,
+ 0xfdee4dbf1b601324L },
+ { 0xbed1740735d7620eL,0x04e3c2c3f48c0012L,0x9ee29da73455449aL,
+ 0x562cdef491a836c4L } },
+ /* 56 << 56 */
+ { { 0x8f682a5f47701097L,0x617125d8ff88d0c2L,0x948fda2457bb86ddL,
+ 0x348abb8f289f7286L },
+ { 0xeb10eab599d94bbdL,0xd51ba28e4684d160L,0xabe0e51c30c8f41aL,
+ 0x66588b4513254f4aL } },
+ /* 57 << 56 */
+ { { 0x147ebf01fad097a5L,0x49883ea8610e815dL,0xe44d60ba8a11de56L,
+ 0xa970de6e827a7a6dL },
+ { 0x2be414245e17fc19L,0xd833c65701214057L,0x1375813b363e723fL,
+ 0x6820bb88e6a52e9bL } },
+ /* 58 << 56 */
+ { { 0x7e7f6970d875d56aL,0xd6a0a9ac51fbf6bfL,0x54ba8790a3083c12L,
+ 0xebaeb23d6ae7eb64L },
+ { 0xa8685c3ab99a907aL,0xf1e74550026bf40bL,0x7b73a027c802cd9eL,
+ 0x9a8a927c4fef4635L } },
+ /* 59 << 56 */
+ { { 0xe1b6f60c08191224L,0xc4126ebbde4ec091L,0xe1dff4dc4ae38d84L,
+ 0xde3f57db4f2ef985L },
+ { 0x34964337d446a1ddL,0x7bf217a0859e77f6L,0x8ff105278e1d13f5L,
+ 0xa304ef0374eeae27L } },
+ /* 60 << 56 */
+ { { 0xfc6f5e47d19dfa5aL,0xdb007de37fad982bL,0x28205ad1613715f5L,
+ 0x251e67297889529eL },
+ { 0x727051841ae98e78L,0xf818537d271cac32L,0xc8a15b7eb7f410f5L,
+ 0xc474356f81f62393L } },
+ /* 61 << 56 */
+ { { 0x92dbdc5ac242316bL,0xabe060acdbf4aff5L,0x6e8c38fe909a8ec6L,
+ 0x43e514e56116cb94L },
+ { 0x2078fa3807d784f9L,0x1161a880f4b5b357L,0x5283ce7913adea3dL,
+ 0x0756c3e6cc6a910bL } },
+ /* 62 << 56 */
+ { { 0x60bcfe01aaa79697L,0x04a73b2956391db1L,0xdd8dad47189b45a0L,
+ 0xbfac0dd048d5b8d9L },
+ { 0x34ab3af57d3d2ec2L,0x6fa2fc2d207bd3afL,0x9ff4009266550dedL,
+ 0x719b3e871fd5b913L } },
+ /* 63 << 56 */
+ { { 0xa573a4966d17fbc7L,0x0cd1a70a73d2b24eL,0x34e2c5cab2676937L,
+ 0xe7050b06bf669f21L },
+ { 0xfbe948b61ede9046L,0xa053005197662659L,0x58cbd4edf10124c5L,
+ 0xde2646e4dd6c06c8L } },
+ /* 64 << 56 */
+ { { 0x332f81088cad38c0L,0x471b7e906bd68ae2L,0x56ac3fb20d8e27a3L,
+ 0xb54660db136b4b0dL },
+ { 0x123a1e11a6fd8de4L,0x44dbffeaa37799efL,0x4540b977ce6ac17cL,
+ 0x495173a8af60acefL } },
+ /* 0 << 63 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 63 */
+ { { 0x9ebb284d391c2a82L,0xbcdd4863158308e8L,0x006f16ec83f1edcaL,
+ 0xa13e2c37695dc6c8L },
+ { 0x2ab756f04a057a87L,0xa8765500a6b48f98L,0x4252face68651c44L,
+ 0xa52b540be1765e02L } },
+ /* 2 << 63 */
+ { { 0x4f922fc516a0d2bbL,0x0d5cc16c1a623499L,0x9241cf3a57c62c8bL,
+ 0x2f5e6961fd1b667fL },
+ { 0x5c15c70bf5a01797L,0x3d20b44d60956192L,0x04911b37071fdb52L,
+ 0xf648f9168d6f0f7bL } },
+ /* 3 << 63 */
+ { { 0x6dc1acafe60b7cf7L,0x25860a5084a9d869L,0x56fc6f09e7ba8ac4L,
+ 0x828c5bd06148d29eL },
+ { 0xac6b435edc55ae5fL,0xa527f56cc0117411L,0x94d5045efd24342cL,
+ 0x2c4c0a3570b67c0dL } },
+ /* 4 << 63 */
+ { { 0x027cc8b8fac61d9aL,0x7d25e062e3c6fe8aL,0xe08805bfe5bff503L,
+ 0x13271e6c6ff632f7L },
+ { 0x55dca6c0232f76a5L,0x8957c32d701ef426L,0xee728bcba10a5178L,
+ 0x5ea60411b62c5173L } },
+ /* 5 << 63 */
+ { { 0xfc4e964ed0b8892bL,0x9ea176839301bb74L,0x6265c5aefcc48626L,
+ 0xe60cf82ebb3e9102L },
+ { 0x57adf797d4df5531L,0x235b59a18deeefe2L,0x60adcf583f306eb1L,
+ 0x105c27533d09492dL } },
+ /* 6 << 63 */
+ { { 0x4090914bb5def996L,0x1cb69c83233dd1e7L,0xc1e9c1d39b3d5e76L,
+ 0x1f3338edfccf6012L },
+ { 0xb1e95d0d2f5378a8L,0xacf4c2c72f00cd21L,0x6e984240eb5fe290L,
+ 0xd66c038d248088aeL } },
+ /* 7 << 63 */
+ { { 0x804d264af94d70cfL,0xbdb802ef7314bf7eL,0x8fb54de24333ed02L,
+ 0x740461e0285635d9L },
+ { 0x4113b2c8365e9383L,0xea762c833fdef652L,0x4eec6e2e47b956c1L,
+ 0xa3d814be65620fa4L } },
+ /* 8 << 63 */
+ { { 0x9ad5462bb4d8bc50L,0x181c0b16a9195770L,0xebd4fe1c78412a68L,
+ 0xae0341bcc0dff48cL },
+ { 0xb6bc45cf7003e866L,0xf11a6dea8a24a41bL,0x5407151ad04c24c2L,
+ 0x62c9d27dda5b7b68L } },
+ /* 9 << 63 */
+ { { 0x2e96423588cceff6L,0x8594c54f8b07ed69L,0x1578e73cc84d0d0dL,
+ 0x7b4e1055ff532868L },
+ { 0xa348c0d5b5ec995aL,0xbf4b9d5514289a54L,0x9ba155a658fbd777L,
+ 0x186ed7a81a84491dL } },
+ /* 10 << 63 */
+ { { 0xd4992b30614c0900L,0xda98d121bd00c24bL,0x7f534dc87ec4bfa1L,
+ 0x4a5ff67437dc34bcL },
+ { 0x68c196b81d7ea1d7L,0x38cf289380a6d208L,0xfd56cd09e3cbbd6eL,
+ 0xec72e27e4205a5b6L } },
+ /* 11 << 63 */
+ { { 0x15ea68f5a44f77f7L,0x7aa5f9fdb43c52bcL,0x86ff676f94f0e609L,
+ 0xa4cde9632e2d432bL },
+ { 0x8cafa0c0eee470afL,0x84137d0e8a3f5ec8L,0xebb40411faa31231L,
+ 0xa239c13f6f7f7ccfL } },
+ /* 12 << 63 */
+ { { 0x32865719a8afd30bL,0x867983288a826dceL,0xdf04e891c4a8fbe0L,
+ 0xbb6b6e1bebf56ad3L },
+ { 0x0a695b11471f1ff0L,0xd76c3389be15baf0L,0x018edb95be96c43eL,
+ 0xf2beaaf490794158L } },
+ /* 13 << 63 */
+ { { 0x152db09ec3076a27L,0x5e82908ee416545dL,0xa2c41272356d6f2eL,
+ 0xdc9c964231fd74e1L },
+ { 0x66ceb88d519bf615L,0xe29ecd7605a2274eL,0x3a0473c4bf5e2fa0L,
+ 0x6b6eb67164284e67L } },
+ /* 14 << 63 */
+ { { 0xe8b97932b88756ddL,0xed4e8652f17e3e61L,0xc2dd14993ee1c4a4L,
+ 0xc0aaee17597f8c0eL },
+ { 0x15c4edb96c168af3L,0x6563c7bfb39ae875L,0xadfadb6f20adb436L,
+ 0xad55e8c99a042ac0L } },
+ /* 15 << 63 */
+ { { 0x975a1ed8b76da1f5L,0x10dfa466a58acb94L,0x8dd7f7e3ac060282L,
+ 0x6813e66a572a051eL },
+ { 0xb4ccae1e350cb901L,0xb653d65650cb7822L,0x42484710dfab3b87L,
+ 0xcd7ee5379b670fd0L } },
+ /* 16 << 63 */
+ { { 0x0a50b12e523b8bf6L,0x8009eb5b8f910c1bL,0xf535af824a167588L,
+ 0x0f835f9cfb2a2abdL },
+ { 0xf59b29312afceb62L,0xc797df2a169d383fL,0xeb3f5fb066ac02b0L,
+ 0x029d4c6fdaa2d0caL } },
+ /* 17 << 63 */
+ { { 0xd4059bc1afab4bc5L,0x833f5c6f56783247L,0xb53466308d2d3605L,
+ 0x83387891d34d8433L },
+ { 0xd973b30fadd9419aL,0xbcca1099afe3fce8L,0x081783150809aac6L,
+ 0x01b7f21a540f0f11L } },
+ /* 18 << 63 */
+ { { 0x65c29219909523c8L,0xa62f648fa3a1c741L,0x88598d4f60c9e55aL,
+ 0xbce9141b0e4f347aL },
+ { 0x9af97d8435f9b988L,0x0210da62320475b6L,0x3c076e229191476cL,
+ 0x7520dbd944fc7834L } },
+ /* 19 << 63 */
+ { { 0x6a6b2cfec1ab1bbdL,0xef8a65bedc650938L,0x72855540805d7bc4L,
+ 0xda389396ed11fdfdL },
+ { 0xa9d5bd3674660876L,0x11d67c54b45dff35L,0x6af7d148a4f5da94L,
+ 0xbb8d4c3fc0bbeb31L } },
+ /* 20 << 63 */
+ { { 0x87a7ebd1e0a1b12aL,0x1e4ef88d770ba95fL,0x8c33345cdc2ae9cbL,
+ 0xcecf127601cc8403L },
+ { 0x687c012e1b39b80fL,0xfd90d0ad35c33ba4L,0xa3ef5a675c9661c2L,
+ 0x368fc88ee017429eL } },
+ /* 21 << 63 */
+ { { 0xd30c6761196a2fa2L,0x931b9817bd5b312eL,0xba01000c72f54a31L,
+ 0xa203d2c866eaa541L },
+ { 0xf2abdee098939db3L,0xe37d6c2c3e606c02L,0xf2921574521ff643L,
+ 0x2781b3c4d7e2fca3L } },
+ /* 22 << 63 */
+ { { 0x664300b07850ec06L,0xac5a38b97d3a10cfL,0x9233188de34ab39dL,
+ 0xe77057e45072cbb9L },
+ { 0xbcf0c042b59e78dfL,0x4cfc91e81d97de52L,0x4661a26c3ee0ca4aL,
+ 0x5620a4c1fb8507bcL } },
+ /* 23 << 63 */
+ { { 0x4b44d4aa049f842cL,0xceabc5d51540e82bL,0x306710fd15c6f156L,
+ 0xbe5ae52b63db1d72L },
+ { 0x06f1e7e6334957f1L,0x57e388f031144a70L,0xfb69bb2fdf96447bL,
+ 0x0f78ebd373e38a12L } },
+ /* 24 << 63 */
+ { { 0xb82226052b7ce542L,0xe6d4ce997472bde1L,0x53e16ebe09d2f4daL,
+ 0x180ff42e53b92b2eL },
+ { 0xc59bcc022c34a1c6L,0x3803d6f9422c46c2L,0x18aff74f5c14a8a2L,
+ 0x55aebf8010a08b28L } },
+ /* 25 << 63 */
+ { { 0x66097d587135593fL,0x32e6eff72be570cdL,0x584e6a102a8c860dL,
+ 0xcd185890a2eb4163L },
+ { 0x7ceae99d6d97e134L,0xd42c6b70dd8447ceL,0x59ddbb4ab8c50273L,
+ 0x03c612df3cf34e1eL } },
+ /* 26 << 63 */
+ { { 0x84b9ca1504b6c5a0L,0x35216f3918f0e3a3L,0x3ec2d2bcbd986c00L,
+ 0x8bf546d9d19228feL },
+ { 0xd1c655a44cd623c3L,0x366ce718502b8e5aL,0x2cfc84b4eea0bfe7L,
+ 0xe01d5ceecf443e8eL } },
+ /* 27 << 63 */
+ { { 0x8ec045d9036520f8L,0xdfb3c3d192d40e98L,0x0bac4ccecc559a04L,
+ 0x35eccae5240ea6b1L },
+ { 0x180b32dbf8a5a0acL,0x547972a5eb699700L,0xa3765801ca26bca0L,
+ 0x57e09d0ea647f25aL } },
+ /* 28 << 63 */
+ { { 0xb956970e2fdd23ccL,0xb80288bc5682e971L,0xe6e6d91e9ae86ebcL,
+ 0x0564c83f8c9f1939L },
+ { 0x551932a239560368L,0xe893752b049c28e2L,0x0b03cee5a6a158c3L,
+ 0xe12d656b04964263L } },
+ /* 29 << 63 */
+ { { 0x4b47554e63e3bc1dL,0xc719b6a245044ff7L,0x4f24d30ae48daa07L,
+ 0xa3f37556c8c1edc3L },
+ { 0x9a47bf760700d360L,0xbb1a1824822ae4e2L,0x22e275a389f1fb4cL,
+ 0x72b1aa239968c5f5L } },
+ /* 30 << 63 */
+ { { 0xa75feacabe063f64L,0x9b392f43bce47a09L,0xd42415091ad07acaL,
+ 0x4b0c591b8d26cd0fL },
+ { 0x2d42ddfd92f1169aL,0x63aeb1ac4cbf2392L,0x1de9e8770691a2afL,
+ 0xebe79af7d98021daL } },
+ /* 31 << 63 */
+ { { 0xcfdf2a4e40e50acfL,0xf0a98ad7af01d665L,0xefb640bf1831be1fL,
+ 0x6fe8bd2f80e9ada0L },
+ { 0x94c103a16cafbc91L,0x170f87598308e08cL,0x5de2d2ab9780ff4fL,
+ 0x666466bc45b201f2L } },
+ /* 32 << 63 */
+ { { 0x58af2010f5b343bcL,0x0f2e400af2f142feL,0x3483bfdea85f4bdfL,
+ 0xf0b1d09303bfeaa9L },
+ { 0x2ea01b95c7081603L,0xe943e4c93dba1097L,0x47be92adb438f3a6L,
+ 0x00bb7742e5bf6636L } },
+ /* 33 << 63 */
+ { { 0x136b7083824297b4L,0x9d0e55805584455fL,0xab48cedcf1c7d69eL,
+ 0x53a9e4812a256e76L },
+ { 0x0402b0e065eb2413L,0xdadbbb848fc407a7L,0xa65cd5a48d7f5492L,
+ 0x21d4429374bae294L } },
+ /* 34 << 63 */
+ { { 0x66917ce63b5f1cc4L,0x37ae52eace872e62L,0xbb087b722905f244L,
+ 0x120770861e6af74fL },
+ { 0x4b644e491058edeaL,0x827510e3b638ca1dL,0x8cf2b7046038591cL,
+ 0xffc8b47afe635063L } },
+ /* 35 << 63 */
+ { { 0x3ae220e61b4d5e63L,0xbd8647429d961b4bL,0x610c107e9bd16bedL,
+ 0x4270352a1127147bL },
+ { 0x7d17ffe664cfc50eL,0x50dee01a1e36cb42L,0x068a762235dc5f9aL,
+ 0x9a08d536df53f62cL } },
+ /* 36 << 63 */
+ { { 0x4ed714576be5f7deL,0xd93006f8c2263c9eL,0xe073694ccacacb36L,
+ 0x2ff7a5b43ae118abL },
+ { 0x3cce53f1cd871236L,0xf156a39dc2aa6d52L,0x9cc5f271b198d76dL,
+ 0xbc615b6f81383d39L } },
+ /* 37 << 63 */
+ { { 0xa54538e8de3eee6bL,0x58c77538ab910d91L,0x31e5bdbc58d278bdL,
+ 0x3cde4adfb963acaeL },
+ { 0xb1881fd25302169cL,0x8ca60fa0a989ed8bL,0xa1999458ff96a0eeL,
+ 0xc1141f03ac6c283dL } },
+ /* 38 << 63 */
+ { { 0x7677408d6dfafed3L,0x33a0165339661588L,0x3c9c15ec0b726fa0L,
+ 0x090cfd936c9b56daL },
+ { 0xe34f4baea3c40af5L,0x3469eadbd21129f1L,0xcc51674a1e207ce8L,
+ 0x1e293b24c83b1ef9L } },
+ /* 39 << 63 */
+ { { 0x17173d131e6c0bb4L,0x1900469590776d35L,0xe7980e346de6f922L,
+ 0x873554cbf4dd9a22L },
+ { 0x0316c627cbf18a51L,0x4d93651b3032c081L,0x207f27713946834dL,
+ 0x2c08d7b430cdbf80L } },
+ /* 40 << 63 */
+ { { 0x137a4fb486df2a61L,0xa1ed9c07ecf7b4a2L,0xb2e460e27bd042ffL,
+ 0xb7f5e2fa5f62f5ecL },
+ { 0x7aa6ec6bcc2423b7L,0x75ce0a7fba63eea7L,0x67a45fb1f250a6e1L,
+ 0x93bc919ce53cdc9fL } },
+ /* 41 << 63 */
+ { { 0x9271f56f871942dfL,0x2372ff6f7859ad66L,0x5f4c2b9633cb1a78L,
+ 0xe3e291015838aa83L },
+ { 0xa7ed1611e4e8110cL,0x2a2d70d5330198ceL,0xbdf132e86720efe0L,
+ 0xe61a896266a471bfL } },
+ /* 42 << 63 */
+ { { 0x796d3a85825808bdL,0x51dc3cb73fd6e902L,0x643c768a916219d1L,
+ 0x36cd7685a2ad7d32L },
+ { 0xe3db9d05b22922a4L,0x6494c87edba29660L,0xf0ac91dfbcd2ebc7L,
+ 0x4deb57a045107f8dL } },
+ /* 43 << 63 */
+ { { 0x42271f59c3d12a73L,0x5f71687ca5c2c51dL,0xcb1f50c605797bcbL,
+ 0x29ed0ed9d6d34eb0L },
+ { 0xe5fe5b474683c2ebL,0x4956eeb597447c46L,0x5b163a4371207167L,
+ 0x93fa2fed0248c5efL } },
+ /* 44 << 63 */
+ { { 0x67930af231f63950L,0xa77797c114caa2c9L,0x526e80ee27ac7e62L,
+ 0xe1e6e62658b28aecL },
+ { 0x636178b0b3c9fef0L,0xaf7752e06d5f90beL,0x94ecaf18eece51cfL,
+ 0x2864d0edca806e1fL } },
+ /* 45 << 63 */
+ { { 0x6de2e38397c69134L,0x5a42c316eb291293L,0xc77792196a60bae0L,
+ 0xa24de3466b7599d1L },
+ { 0x49d374aab75d4941L,0x989005862d501ff0L,0x9f16d40eeb7974cfL,
+ 0x1033860bcdd8c115L } },
+ /* 46 << 63 */
+ { { 0xb6c69ac82094cec3L,0x9976fb88403b770cL,0x1dea026c4859590dL,
+ 0xb6acbb468562d1fdL },
+ { 0x7cd6c46144569d85L,0xc3190a3697f0891dL,0xc6f5319548d5a17dL,
+ 0x7d919966d749abc8L } },
+ /* 47 << 63 */
+ { { 0x65104837dd1c8a20L,0x7e5410c82f683419L,0x958c3ca8be94022eL,
+ 0x605c31976145dac2L },
+ { 0x3fc0750101683d54L,0x1d7127c5595b1234L,0x10b8f87c9481277fL,
+ 0x677db2a8e65a1adbL } },
+ /* 48 << 63 */
+ { { 0xec2fccaaddce3345L,0x2a6811b7012a4350L,0x96760ff1ac598bdcL,
+ 0x054d652ad1bf4128L },
+ { 0x0a1151d492a21005L,0xad7f397133110fdfL,0x8c95928c1960100fL,
+ 0x6c91c8257bf03362L } },
+ /* 49 << 63 */
+ { { 0xc8c8b2a2ce309f06L,0xfdb27b59ca27204bL,0xd223eaa50848e32eL,
+ 0xb93e4b2ee7bfaf1eL },
+ { 0xc5308ae644aa3dedL,0x317a666ac015d573L,0xc888ce231a979707L,
+ 0xf141c1e60d5c4958L } },
+ /* 50 << 63 */
+ { { 0xb53b7de561906373L,0x858dbadeeb999595L,0x8cbb47b2a59e5c36L,
+ 0x660318b3dcf4e842L },
+ { 0xbd161ccd12ba4b7aL,0xf399daabf8c8282aL,0x1587633aeeb2130dL,
+ 0xa465311ada38dd7dL } },
+ /* 51 << 63 */
+ { { 0x5f75eec864d3779bL,0x3c5d0476ad64c171L,0x874103712a914428L,
+ 0x8096a89190e2fc29L },
+ { 0xd3d2ae9d23b3ebc2L,0x90bdd6dba580cfd6L,0x52dbb7f3c5b01f6cL,
+ 0xe68eded4e102a2dcL } },
+ /* 52 << 63 */
+ { { 0x17785b7799eb6df0L,0x26c3cc517386b779L,0x345ed9886417a48eL,
+ 0xe990b4e407d6ef31L },
+ { 0x0f456b7e2586abbaL,0x239ca6a559c96e9aL,0xe327459ce2eb4206L,
+ 0x3a4c3313a002b90aL } },
+ /* 53 << 63 */
+ { { 0x2a114806f6a3f6fbL,0xad5cad2f85c251ddL,0x92c1f613f5a784d3L,
+ 0xec7bfacf349766d5L },
+ { 0x04b3cd333e23cb3bL,0x3979fe84c5a64b2dL,0x192e27207e589106L,
+ 0xa60c43d1a15b527fL } },
+ /* 54 << 63 */
+ { { 0x2dae9082be7cf3a6L,0xcc86ba92bc967274L,0xf28a2ce8aea0a8a9L,
+ 0x404ca6d96ee988b3L },
+ { 0xfd7e9c5d005921b8L,0xf56297f144e79bf9L,0xa163b4600d75ddc2L,
+ 0x30b23616a1f2be87L } },
+ /* 55 << 63 */
+ { { 0x4b070d21bfe50e2bL,0x7ef8cfd0e1bfede1L,0xadba00112aac4ae0L,
+ 0x2a3e7d01b9ebd033L },
+ { 0x995277ece38d9d1cL,0xb500249e9c5d2de3L,0x8912b820f13ca8c9L,
+ 0xc8798114877793afL } },
+ /* 56 << 63 */
+ { { 0x19e6125dec3f1decL,0x07b1f040911178daL,0xd93ededa904a6738L,
+ 0x55187a5a0bebedcdL },
+ { 0xf7d04722eb329d41L,0xf449099ef170b391L,0xfd317a69ca99f828L,
+ 0x50c3db2b34a4976dL } },
+ /* 57 << 63 */
+ { { 0xe9ba77843757b392L,0x326caefdaa3ca05aL,0x78e5293bf1e593d4L,
+ 0x7842a9370d98fd13L },
+ { 0xe694bf965f96b10dL,0x373a9df606a8cd05L,0x997d1e51e8f0c7fcL,
+ 0x1d01979063fd972eL } },
+ /* 58 << 63 */
+ { { 0x0064d8585499fb32L,0x7b67bad977a8aeb7L,0x1d3eb9772d08eec5L,
+ 0x5fc047a6cbabae1dL },
+ { 0x0577d159e54a64bbL,0x8862201bc43497e4L,0xad6b4e282ce0608dL,
+ 0x8b687b7d0b167aacL } },
+ /* 59 << 63 */
+ { { 0x6ed4d3678b2ecfa9L,0x24dfe62da90c3c38L,0xa1862e103fe5c42bL,
+ 0x1ca73dcad5732a9fL },
+ { 0x35f038b776bb87adL,0x674976abf242b81fL,0x4f2bde7eb0fd90cdL,
+ 0x6efc172ea7fdf092L } },
+ /* 60 << 63 */
+ { { 0x3806b69b92222f1fL,0x5a2459ca6cf7ae70L,0x6789f69ca85217eeL,
+ 0x5f232b5ee3dc85acL },
+ { 0x660e3ec548e9e516L,0x124b4e473197eb31L,0x10a0cb13aafcca23L,
+ 0x7bd63ba48213224fL } },
+ /* 61 << 63 */
+ { { 0xaffad7cc290a7f4fL,0x6b409c9e0286b461L,0x58ab809fffa407afL,
+ 0xc3122eedc68ac073L },
+ { 0x17bf9e504ef24d7eL,0x5d9297943e2a5811L,0x519bc86702902e01L,
+ 0x76bba5da39c8a851L } },
+ /* 62 << 63 */
+ { { 0xe9f9669cda94951eL,0x4b6af58d66b8d418L,0xfa32107417d426a4L,
+ 0xc78e66a99dde6027L },
+ { 0x0516c0834a53b964L,0xfc659d38ff602330L,0x0ab55e5c58c5c897L,
+ 0x985099b2838bc5dfL } },
+ /* 63 << 63 */
+ { { 0x061d9efcc52fc238L,0x712b27286ac1da3fL,0xfb6581499283fe08L,
+ 0x4954ac94b8aaa2f7L },
+ { 0x85c0ada47fb2e74fL,0xee8ba98eb89926b0L,0xe4f9d37d23d1af5bL,
+ 0x14ccdbf9ba9b015eL } },
+ /* 64 << 63 */
+ { { 0xb674481b7bfe7178L,0x4e1debae65405868L,0x061b2821c48c867dL,
+ 0x69c15b35513b30eaL },
+ { 0x3b4a166636871088L,0xe5e29f5d1220b1ffL,0x4b82bb35233d9f4dL,
+ 0x4e07633318cdc675L } },
+ /* 0 << 70 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 70 */
+ { { 0x0d53f5c7a3e6fcedL,0xe8cbbdd5f45fbdebL,0xf85c01df13339a70L,
+ 0x0ff71880142ceb81L },
+ { 0x4c4e8774bd70437aL,0x5fb32891ba0bda6aL,0x1cdbebd2f18bd26eL,
+ 0x2f9526f103a9d522L } },
+ /* 2 << 70 */
+ { { 0x40ce305192c4d684L,0x8b04d7257612efcdL,0xb9dcda366f9cae20L,
+ 0x0edc4d24f058856cL },
+ { 0x64f2e6bf85427900L,0x3de81295dc09dfeaL,0xd41b4487379bf26cL,
+ 0x50b62c6d6df135a9L } },
+ /* 3 << 70 */
+ { { 0xd4f8e3b4c72dfe67L,0xc416b0f690e19fdfL,0x18b9098d4c13bd35L,
+ 0xac11118a15b8cb9eL },
+ { 0xf598a318f0062841L,0xbfe0602f89f356f4L,0x7ae3637e30177a0cL,
+ 0x3409774761136537L } },
+ /* 4 << 70 */
+ { { 0x0db2fb5ed005832aL,0x5f5efd3b91042e4fL,0x8c4ffdc6ed70f8caL,
+ 0xe4645d0bb52da9ccL },
+ { 0x9596f58bc9001d1fL,0x52c8f0bc4e117205L,0xfd4aa0d2e398a084L,
+ 0x815bfe3a104f49deL } },
+ /* 5 << 70 */
+ { { 0x97e5443f23885e5fL,0xf72f8f99e8433aabL,0xbd00b154e4d4e604L,
+ 0xd0b35e6ae5e173ffL },
+ { 0x57b2a0489164722dL,0x3e3c665b88761ec8L,0x6bdd13973da83832L,
+ 0x3c8b1a1e73dafe3bL } },
+ /* 6 << 70 */
+ { { 0x4497ace654317cacL,0xbe600ab9521771b3L,0xb42e409eb0dfe8b8L,
+ 0x386a67d73942310fL },
+ { 0x25548d8d4431cc28L,0xa7cff142985dc524L,0x4d60f5a193c4be32L,
+ 0x83ebd5c8d071c6e1L } },
+ /* 7 << 70 */
+ { { 0xba3a80a7b1fd2b0bL,0x9b3ad3965bec33e8L,0xb3868d6179743fb3L,
+ 0xcfd169fcfdb462faL },
+ { 0xd3b499d79ce0a6afL,0x55dc1cf1e42d3ff8L,0x04fb9e6cc6c3e1b2L,
+ 0x47e6961d6f69a474L } },
+ /* 8 << 70 */
+ { { 0x54eb3acce548b37bL,0xb38e754284d40549L,0x8c3daa517b341b4fL,
+ 0x2f6928ec690bf7faL },
+ { 0x0496b32386ce6c41L,0x01be1c5510adadcdL,0xc04e67e74bb5faf9L,
+ 0x3cbaf678e15c9985L } },
+ /* 9 << 70 */
+ { { 0x8cd1214550ca4247L,0xba1aa47ae7dd30aaL,0x2f81ddf1e58fee24L,
+ 0x03452936eec9b0e8L },
+ { 0x8bdc3b81243aea96L,0x9a2919af15c3d0e5L,0x9ea640ec10948361L,
+ 0x5ac86d5b6e0bcccfL } },
+ /* 10 << 70 */
+ { { 0xf892d918c36cf440L,0xaed3e837c939719cL,0xb07b08d2c0218b64L,
+ 0x6f1bcbbace9790ddL },
+ { 0x4a84d6ed60919b8eL,0xd89007918ac1f9ebL,0xf84941aa0dd5daefL,
+ 0xb22fe40a67fd62c5L } },
+ /* 11 << 70 */
+ { { 0x97e15ba2157f2db3L,0xbda2fc8f8e28ca9cL,0x5d050da437b9f454L,
+ 0x3d57eb572379d72eL },
+ { 0xe9b5eba2fb5ee997L,0x01648ca2e11538caL,0x32bb76f6f6327974L,
+ 0x338f14b8ff3f4bb7L } },
+ /* 12 << 70 */
+ { { 0x524d226ad7ab9a2dL,0x9c00090d7dfae958L,0x0ba5f5398751d8c2L,
+ 0x8afcbcdd3ab8262dL },
+ { 0x57392729e99d043bL,0xef51263baebc943aL,0x9feace9320862935L,
+ 0x639efc03b06c817bL } },
+ /* 13 << 70 */
+ { { 0x1fe054b366b4be7aL,0x3f25a9de84a37a1eL,0xf39ef1ad78d75cd9L,
+ 0xd7b58f495062c1b5L },
+ { 0x6f74f9a9ff563436L,0xf718ff29e8af51e7L,0x5234d31315e97fecL,
+ 0xb6a8e2b1292f1c0aL } },
+ /* 14 << 70 */
+ { { 0xa7f53aa8327720c1L,0x956ca322ba092cc8L,0x8f03d64a28746c4dL,
+ 0x51fe178266d0d392L },
+ { 0xd19b34db3c832c80L,0x60dccc5c6da2e3b4L,0x245dd62e0a104cccL,
+ 0xa7ab1de1620b21fdL } },
+ /* 15 << 70 */
+ { { 0xb293ae0b3893d123L,0xf7b75783b15ee71cL,0x5aa3c61442a9468bL,
+ 0xd686123cdb15d744L },
+ { 0x8c616891a7ab4116L,0x6fcd72c8a4e6a459L,0xac21911077e5fad7L,
+ 0xfb6a20e7704fa46bL } },
+ /* 16 << 70 */
+ { { 0xe839be7d341d81dcL,0xcddb688932148379L,0xda6211a1f7026eadL,
+ 0xf3b2575ff4d1cc5eL },
+ { 0x40cfc8f6a7a73ae6L,0x83879a5e61d5b483L,0xc5acb1ed41a50ebcL,
+ 0x59a60cc83c07d8faL } },
+ /* 17 << 70 */
+ { { 0x1b73bdceb1876262L,0x2b0d79f012af4ee9L,0x8bcf3b0bd46e1d07L,
+ 0x17d6af9de45d152fL },
+ { 0x735204616d736451L,0x43cbbd9756b0bf5aL,0xb0833a5bd5999b9dL,
+ 0x702614f0eb72e398L } },
+ /* 18 << 70 */
+ { { 0x0aadf01a59c3e9f8L,0x40200e77ce6b3d16L,0xda22bdd3deddafadL,
+ 0x76dedaf4310d72e1L },
+ { 0x49ef807c4bc2e88fL,0x6ba81291146dd5a5L,0xa1a4077a7d8d59e9L,
+ 0x87b6a2e7802db349L } },
+ /* 19 << 70 */
+ { { 0xd56799971b4e598eL,0xf499ef1f06fe4b1dL,0x3978d3aefcb267c5L,
+ 0xb582b557235786d0L },
+ { 0x32b3b2ca1715cb07L,0x4c3de6a28480241dL,0x63b5ffedcb571ecdL,
+ 0xeaf53900ed2fe9a9L } },
+ /* 20 << 70 */
+ { { 0xdec98d4ac3b81990L,0x1cb837229e0cc8feL,0xfe0b0491d2b427b9L,
+ 0x0f2386ace983a66cL },
+ { 0x930c4d1eb3291213L,0xa2f82b2e59a62ae4L,0x77233853f93e89e3L,
+ 0x7f8063ac11777c7fL } },
+ /* 21 << 70 */
+ { { 0xff0eb56759ad2877L,0x6f4546429865c754L,0xe6fe701a236e9a84L,
+ 0xc586ef1606e40fc3L },
+ { 0x3f62b6e024bafad9L,0xc8b42bd264da906aL,0xc98e1eb4da3276a0L,
+ 0x30d0e5fc06cbf852L } },
+ /* 22 << 70 */
+ { { 0x1b6b2ae1e8b4dfd4L,0xd754d5c78301cbacL,0x66097629112a39acL,
+ 0xf86b599993ba4ab9L },
+ { 0x26c9dea799f9d581L,0x0473b1a8c2fafeaaL,0x1469af553b2505a5L,
+ 0x227d16d7d6a43323L } },
+ /* 23 << 70 */
+ { { 0x3316f73cad3d97f9L,0x52bf3bb51f137455L,0x953eafeb09954e7cL,
+ 0xa721dfeddd732411L },
+ { 0xb4929821141d4579L,0x3411321caa3bd435L,0xafb355aa17fa6015L,
+ 0xb4e7ef4a18e42f0eL } },
+ /* 24 << 70 */
+ { { 0x604ac97c59371000L,0xe1c48c707f759c18L,0x3f62ecc5a5db6b65L,
+ 0x0a78b17338a21495L },
+ { 0x6be1819dbcc8ad94L,0x70dc04f6d89c3400L,0x462557b4a6b4840aL,
+ 0x544c6ade60bd21c0L } },
+ /* 25 << 70 */
+ { { 0x6a00f24e907a544bL,0xa7520dcb313da210L,0xfe939b7511e4994bL,
+ 0x918b6ba6bc275d70L },
+ { 0xd3e5e0fc644be892L,0x707a9816fdaf6c42L,0x60145567f15c13feL,
+ 0x4818ebaae130a54aL } },
+ /* 26 << 70 */
+ { { 0x28aad3ad58d2f767L,0xdc5267fdd7e7c773L,0x4919cc88c3afcc98L,
+ 0xaa2e6ab02db8cd4bL },
+ { 0xd46fec04d0c63eaaL,0xa1cb92c519ffa832L,0x678dd178e43a631fL,
+ 0xfb5ae1cd3dc788b3L } },
+ /* 27 << 70 */
+ { { 0x68b4fb906e77de04L,0x7992bcf0f06dbb97L,0x896e6a13c417c01dL,
+ 0x8d96332cb956be01L },
+ { 0x902fc93a413aa2b9L,0x99a4d915fc98c8a5L,0x52c29407565f1137L,
+ 0x4072690f21e4f281L } },
+ /* 28 << 70 */
+ { { 0x36e607cf02ff6072L,0xa47d2ca98ad98cdcL,0xbf471d1ef5f56609L,
+ 0xbcf86623f264ada0L },
+ { 0xb70c0687aa9e5cb6L,0xc98124f217401c6cL,0x8189635fd4a61435L,
+ 0xd28fb8afa9d98ea6L } },
+ /* 29 << 70 */
+ { { 0xb9a67c2a40c251f8L,0x88cd5d87a2da44beL,0x437deb96e09b5423L,
+ 0x150467db64287dc1L },
+ { 0xe161debbcdabb839L,0xa79e9742f1839a3eL,0xbb8dd3c2652d202bL,
+ 0x7b3e67f7e9f97d96L } },
+ /* 30 << 70 */
+ { { 0x5aa5d78fb1cb6ac9L,0xffa13e8eca1d0d45L,0x369295dd2ba5bf95L,
+ 0xd68bd1f839aff05eL },
+ { 0xaf0d86f926d783f2L,0x543a59b3fc3aafc1L,0x3fcf81d27b7da97cL,
+ 0xc990a056d25dee46L } },
+ /* 31 << 70 */
+ { { 0x3e6775b8519cce2cL,0xfc9af71fae13d863L,0x774a4a6f47c1605cL,
+ 0x46ba42452fd205e8L },
+ { 0xa06feea4d3fd524dL,0x1e7246416de1acc2L,0xf53816f1334e2b42L,
+ 0x49e5918e922f0024L } },
+ /* 32 << 70 */
+ { { 0x439530b665c7322dL,0xcf12cc01b3c1b3fbL,0xc70b01860172f685L,
+ 0xb915ee221b58391dL },
+ { 0x9afdf03ba317db24L,0x87dec65917b8ffc4L,0x7f46597be4d3d050L,
+ 0x80a1c1ed006500e7L } },
+ /* 33 << 70 */
+ { { 0x84902a9678bf030eL,0xfb5e9c9a50560148L,0x6dae0a9263362426L,
+ 0xdcaeecf4a9e30c40L },
+ { 0xc0d887bb518d0c6bL,0x99181152cb985b9dL,0xad186898ef7bc381L,
+ 0x18168ffb9ee46201L } },
+ /* 34 << 70 */
+ { { 0x9a04cdaa2502753cL,0xbb279e2651407c41L,0xeacb03aaf23564e5L,
+ 0x1833658271e61016L },
+ { 0x8684b8c4eb809877L,0xb336e18dea0e672eL,0xefb601f034ee5867L,
+ 0x2733edbe1341cfd1L } },
+ /* 35 << 70 */
+ { { 0xb15e809a26025c3cL,0xe6e981a69350df88L,0x923762378502fd8eL,
+ 0x4791f2160c12be9bL },
+ { 0xb725678925f02425L,0xec8631947a974443L,0x7c0ce882fb41cc52L,
+ 0xc266ff7ef25c07f2L } },
+ /* 36 << 70 */
+ { { 0x3d4da8c3017025f3L,0xefcf628cfb9579b4L,0x5c4d00161f3716ecL,
+ 0x9c27ebc46801116eL },
+ { 0x5eba0ea11da1767eL,0xfe15145247004c57L,0x3ace6df68c2373b7L,
+ 0x75c3dffe5dbc37acL } },
+ /* 37 << 70 */
+ { { 0x3dc32a73ddc925fcL,0xb679c8412f65ee0bL,0x715a3295451cbfebL,
+ 0xd9889768f76e9a29L },
+ { 0xec20ce7fb28ad247L,0xe99146c400894d79L,0x71457d7c9f5e3ea7L,
+ 0x097b266238030031L } },
+ /* 38 << 70 */
+ { { 0xdb7f6ae6cf9f82a8L,0x319decb9438f473aL,0xa63ab386283856c3L,
+ 0x13e3172fb06a361bL },
+ { 0x2959f8dc7d5a006cL,0x2dbc27c675fba752L,0xc1227ab287c22c9eL,
+ 0x06f61f7571a268b2L } },
+ /* 39 << 70 */
+ { { 0x1b6bb97104779ce2L,0xaca838120aadcb1dL,0x297ae0bcaeaab2d5L,
+ 0xa5c14ee75bfb9f13L },
+ { 0xaa00c583f17a62c7L,0x39eb962c173759f6L,0x1eeba1d486c9a88fL,
+ 0x0ab6c37adf016c5eL } },
+ /* 40 << 70 */
+ { { 0xa2a147dba28a0749L,0x246c20d6ee519165L,0x5068d1b1d3810715L,
+ 0xb1e7018c748160b9L },
+ { 0x03f5b1faf380ff62L,0xef7fb1ddf3cb2c1eL,0xeab539a8fc91a7daL,
+ 0x83ddb707f3f9b561L } },
+ /* 41 << 70 */
+ { { 0xc550e211fe7df7a4L,0xa7cd07f2063f6f40L,0xb0de36352976879cL,
+ 0xb5f83f85e55741daL },
+ { 0x4ea9d25ef3d8ac3dL,0x6fe2066f62819f02L,0x4ab2b9c2cef4a564L,
+ 0x1e155d965ffa2de3L } },
+ /* 42 << 70 */
+ { { 0x0eb0a19bc3a72d00L,0x4037665b8513c31bL,0x2fb2b6bf04c64637L,
+ 0x45c34d6e08cdc639L },
+ { 0x56f1e10ff01fd796L,0x4dfb8101fe3667b8L,0xe0eda2539021d0c0L,
+ 0x7a94e9ff8a06c6abL } },
+ /* 43 << 70 */
+ { { 0x2d3bb0d9bb9aa882L,0xea20e4e5ec05fd10L,0xed7eeb5f1a1ca64eL,
+ 0x2fa6b43cc6327cbdL },
+ { 0xb577e3cf3aa91121L,0x8c6bd5ea3a34079bL,0xd7e5ba3960e02fc0L,
+ 0xf16dd2c390141bf8L } },
+ /* 44 << 70 */
+ { { 0xb57276d980101b98L,0x760883fdb82f0f66L,0x89d7de754bc3eff3L,
+ 0x03b606435dc2ab40L },
+ { 0xcd6e53dfe05beeacL,0xf2f1e862bc3325cdL,0xdd0f7921774f03c3L,
+ 0x97ca72214552cc1bL } },
+ /* 45 << 70 */
+ { { 0x5a0d6afe1cd19f72L,0xa20915dcf183fbebL,0x9fda4b40832c403cL,
+ 0x32738eddbe425442L },
+ { 0x469a1df6b5eccf1aL,0x4b5aff4228bbe1f0L,0x31359d7f570dfc93L,
+ 0xa18be235f0088628L } },
+ /* 46 << 70 */
+ { { 0xa5b30fbab00ed3a9L,0x34c6137473cdf8beL,0x2c5c5f46abc56797L,
+ 0x5cecf93db82a8ae2L },
+ { 0x7d3dbe41a968fbf0L,0xd23d45831a5c7f3dL,0xf28f69a0c087a9c7L,
+ 0xc2d75471474471caL } },
+ /* 47 << 70 */
+ { { 0x36ec9f4a4eb732ecL,0x6c943bbdb1ca6bedL,0xd64535e1f2457892L,
+ 0x8b84a8eaf7e2ac06L },
+ { 0xe0936cd32499dd5fL,0x12053d7e0ed04e57L,0x4bdd0076e4305d9dL,
+ 0x34a527b91f67f0a2L } },
+ /* 48 << 70 */
+ { { 0xe79a4af09cec46eaL,0xb15347a1658b9bc7L,0x6bd2796f35af2f75L,
+ 0xac9579904051c435L },
+ { 0x2669dda3c33a655dL,0x5d503c2e88514aa3L,0xdfa113373753dd41L,
+ 0x3f0546730b754f78L } },
+ /* 49 << 70 */
+ { { 0xbf185677496125bdL,0xfb0023c83775006cL,0xfa0f072f3a037899L,
+ 0x4222b6eb0e4aea57L },
+ { 0x3dde5e767866d25aL,0xb6eb04f84837aa6fL,0x5315591a2cf1cdb8L,
+ 0x6dfb4f412d4e683cL } },
+ /* 50 << 70 */
+ { { 0x7e923ea448ee1f3aL,0x9604d9f705a2afd5L,0xbe1d4a3340ea4948L,
+ 0x5b45f1f4b44cbd2fL },
+ { 0x5faf83764acc757eL,0xa7cf9ab863d68ff7L,0x8ad62f69df0e404bL,
+ 0xd65f33c212bdafdfL } },
+ /* 51 << 70 */
+ { { 0xc365de15a377b14eL,0x6bf5463b8e39f60cL,0x62030d2d2ce68148L,
+ 0xd95867efe6f843a8L },
+ { 0xd39a0244ef5ab017L,0x0bd2d8c14ab55d12L,0xc9503db341639169L,
+ 0x2d4e25b0f7660c8aL } },
+ /* 52 << 70 */
+ { { 0x760cb3b5e224c5d7L,0xfa3baf8c68616919L,0x9fbca1138d142552L,
+ 0x1ab18bf17669ebf5L },
+ { 0x55e6f53e9bdf25ddL,0x04cc0bf3cb6cd154L,0x595bef4995e89080L,
+ 0xfe9459a8104a9ac1L } },
+ /* 53 << 70 */
+ { { 0xad2d89cacce9bb32L,0xddea65e1f7de8285L,0x62ed8c35b351bd4bL,
+ 0x4150ff360c0e19a7L },
+ { 0x86e3c801345f4e47L,0x3bf21f71203a266cL,0x7ae110d4855b1f13L,
+ 0x5d6aaf6a07262517L } },
+ /* 54 << 70 */
+ { { 0x1e0f12e1813d28f1L,0x6000e11d7ad7a523L,0xc7d8deefc744a17bL,
+ 0x1e990b4814c05a00L },
+ { 0x68fddaee93e976d5L,0x696241d146610d63L,0xb204e7c3893dda88L,
+ 0x8bccfa656a3a6946L } },
+ /* 55 << 70 */
+ { { 0xb59425b4c5cd1411L,0x701b4042ff3658b1L,0xe3e56bca4784cf93L,
+ 0x27de5f158fe68d60L },
+ { 0x4ab9cfcef8d53f19L,0xddb10311a40a730dL,0x6fa73cd14eee0a8aL,
+ 0xfd5487485249719dL } },
+ /* 56 << 70 */
+ { { 0x49d66316a8123ef0L,0x73c32db4e7f95438L,0x2e2ed2090d9e7854L,
+ 0xf98a93299d9f0507L },
+ { 0xc5d33cf60c6aa20aL,0x9a32ba1475279bb2L,0x7e3202cb774a7307L,
+ 0x64ed4bc4e8c42dbdL } },
+ /* 57 << 70 */
+ { { 0xc20f1a06d4caed0dL,0xb8021407171d22b3L,0xd426ca04d13268d7L,
+ 0x9237700725f4d126L },
+ { 0x4204cbc371f21a85L,0x18461b7af82369baL,0xc0c07d313fc858f9L,
+ 0x5deb5a50e2bab569L } },
+ /* 58 << 70 */
+ { { 0xd5959d46d5eea89eL,0xfdff842408437f4bL,0xf21071e43cfe254fL,
+ 0x7241769695468321L },
+ { 0x5d8288b9102cae3eL,0x2d143e3df1965dffL,0x00c9a376a078d847L,
+ 0x6fc0da3126028731L } },
+ /* 59 << 70 */
+ { { 0xa2baeadfe45083a2L,0x66bc72185e5b4bcdL,0x2c826442d04b8e7fL,
+ 0xc19f54516c4b586bL },
+ { 0x60182c495b7eeed5L,0xd9954ecd7aa9dfa1L,0xa403a8ecc73884adL,
+ 0x7fb17de29bb39041L } },
+ /* 60 << 70 */
+ { { 0x694b64c5abb020e8L,0x3d18c18419c4eec7L,0x9c4673ef1c4793e5L,
+ 0xc7b8aeb5056092e6L },
+ { 0x3aa1ca43f0f8c16bL,0x224ed5ecd679b2f6L,0x0d56eeaf55a205c9L,
+ 0xbfe115ba4b8e028bL } },
+ /* 61 << 70 */
+ { { 0x97e608493927f4feL,0xf91fbf94759aa7c5L,0x985af7696be90a51L,
+ 0xc1277b7878ccb823L },
+ { 0x395b656ee7a75952L,0x00df7de0928da5f5L,0x09c231754ca4454fL,
+ 0x4ec971f47aa2d3c1L } },
+ /* 62 << 70 */
+ { { 0x45c3c507e75d9cccL,0x63b7be8a3dc90306L,0x37e09c665db44bdcL,
+ 0x50d60da16841c6a2L },
+ { 0x6f9b65ee08df1b12L,0x387348797ff089dfL,0x9c331a663fe8013dL,
+ 0x017f5de95f42fcc8L } },
+ /* 63 << 70 */
+ { { 0x43077866e8e57567L,0xc9f781cef9fcdb18L,0x38131dda9b12e174L,
+ 0x25d84aa38a03752aL },
+ { 0x45e09e094d0c0ce2L,0x1564008b92bebba5L,0xf7e8ad31a87284c7L,
+ 0xb7c4b46c97e7bbaaL } },
+ /* 64 << 70 */
+ { { 0x3e22a7b397acf4ecL,0x0426c4005ea8b640L,0x5e3295a64e969285L,
+ 0x22aabc59a6a45670L },
+ { 0xb929714c5f5942bcL,0x9a6168bdfa3182edL,0x2216a665104152baL,
+ 0x46908d03b6926368L } },
+ /* 0 << 77 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 77 */
+ { { 0xa9f5d8745a1251fbL,0x967747a8c72725c7L,0x195c33e531ffe89eL,
+ 0x609d210fe964935eL },
+ { 0xcafd6ca82fe12227L,0xaf9b5b960426469dL,0x2e9ee04c5693183cL,
+ 0x1084a333c8146fefL } },
+ /* 2 << 77 */
+ { { 0x96649933aed1d1f7L,0x566eaff350563090L,0x345057f0ad2e39cfL,
+ 0x148ff65b1f832124L },
+ { 0x042e89d4cf94cf0dL,0x319bec84520c58b3L,0x2a2676265361aa0dL,
+ 0xc86fa3028fbc87adL } },
+ /* 3 << 77 */
+ { { 0xfc83d2ab5c8b06d5L,0xb1a785a2fe4eac46L,0xb99315bc846f7779L,
+ 0xcf31d816ef9ea505L },
+ { 0x2391fe6a15d7dc85L,0x2f132b04b4016b33L,0x29547fe3181cb4c7L,
+ 0xdb66d8a6650155a1L } },
+ /* 4 << 77 */
+ { { 0x6b66d7e1adc1696fL,0x98ebe5930acd72d0L,0x65f24550cc1b7435L,
+ 0xce231393b4b9a5ecL },
+ { 0x234a22d4db067df9L,0x98dda095caff9b00L,0x1bbc75a06100c9c1L,
+ 0x1560a9c8939cf695L } },
+ /* 5 << 77 */
+ { { 0xcf006d3e99e0925fL,0x2dd74a966322375aL,0xc58b446ab56af5baL,
+ 0x50292683e0b9b4f1L },
+ { 0xe2c34cb41aeaffa3L,0x8b17203f9b9587c1L,0x6d559207ead1350cL,
+ 0x2b66a215fb7f9604L } },
+ /* 6 << 77 */
+ { { 0x0850325efe51bf74L,0x9c4f579e5e460094L,0x5c87b92a76da2f25L,
+ 0x889de4e06febef33L },
+ { 0x6900ec06646083ceL,0xbe2a0335bfe12773L,0xadd1da35c5344110L,
+ 0x757568b7b802cd20L } },
+ /* 7 << 77 */
+ { { 0x7555977900f7e6c8L,0x38e8b94f0facd2f0L,0xfea1f3af03fde375L,
+ 0x5e11a1d875881dfcL },
+ { 0xb3a6b02ec1e2f2efL,0x193d2bbbc605a6c5L,0x325ffeee339a0b2dL,
+ 0x27b6a7249e0c8846L } },
+ /* 8 << 77 */
+ { { 0xe4050f1cf1c367caL,0x9bc85a9bc90fbc7dL,0xa373c4a2e1a11032L,
+ 0xb64232b7ad0393a9L },
+ { 0xf5577eb0167dad29L,0x1604f30194b78ab2L,0x0baa94afe829348bL,
+ 0x77fbd8dd41654342L } },
+ /* 9 << 77 */
+ { { 0xdab50ea5b964e39aL,0xd4c29e3cd0d3c76eL,0x80dae67c56d11964L,
+ 0x7307a8bfe5ffcc2fL },
+ { 0x65bbc1aa91708c3bL,0xa151e62c28bf0eebL,0x6cb533816fa34db7L,
+ 0x5139e05ca29403a8L } },
+ /* 10 << 77 */
+ { { 0x6ff651b494a7cd2eL,0x5671ffd10699336cL,0x6f5fd2cc979a896aL,
+ 0x11e893a8d8148cefL },
+ { 0x988906a165cf7b10L,0x81b67178c50d8485L,0x7c0deb358a35b3deL,
+ 0x423ac855c1d29799L } },
+ /* 11 << 77 */
+ { { 0xaf580d87dac50b74L,0x28b2b89f5869734cL,0x99a3b936874e28fbL,
+ 0xbb2c919025f3f73aL },
+ { 0x199f691884a9d5b7L,0x7ebe23257e770374L,0xf442e1070738efe2L,
+ 0xcf9f3f56cf9082d2L } },
+ /* 12 << 77 */
+ { { 0x719f69e109618708L,0xcc9e8364c183f9b1L,0xec203a95366a21afL,
+ 0x6aec5d6d068b141fL },
+ { 0xee2df78a994f04e9L,0xb39ccae8271245b0L,0xb875a4a997e43f4fL,
+ 0x507dfe11db2cea98L } },
+ /* 13 << 77 */
+ { { 0x4fbf81cb489b03e9L,0xdb86ec5b6ec414faL,0xfad444f9f51b3ae5L,
+ 0xca7d33d61914e3feL },
+ { 0xa9c32f5c0ae6c4d0L,0xa9ca1d1e73969568L,0x98043c311aa7467eL,
+ 0xe832e75ce21b5ac6L } },
+ /* 14 << 77 */
+ { { 0x314b7aea5232123dL,0x08307c8c65ae86dbL,0x06e7165caa4668edL,
+ 0xb170458bb4d3ec39L },
+ { 0x4d2e3ec6c19bb986L,0xc5f34846ae0304edL,0x917695a06c9f9722L,
+ 0x6c7f73174cab1c0aL } },
+ /* 15 << 77 */
+ { { 0x6295940e9d6d2e8bL,0xd318b8c1549f7c97L,0x2245320497713885L,
+ 0x468d834ba8a440feL },
+ { 0xd81fe5b2bfba796eL,0x152364db6d71f116L,0xbb8c7c59b5b66e53L,
+ 0x0b12c61b2641a192L } },
+ /* 16 << 77 */
+ { { 0x31f14802fcf0a7fdL,0x42fd07895488b01eL,0x71d78d6d9952b498L,
+ 0x8eb572d907ac5201L },
+ { 0xe0a2a44c4d194a88L,0xd2b63fd9ba017e66L,0x78efc6c8f888aefcL,
+ 0xb76f6bda4a881a11L } },
+ /* 17 << 77 */
+ { { 0x187f314bb46c2397L,0x004cf5665ded2819L,0xa9ea570438764d34L,
+ 0xbba4521778084709L },
+ { 0x064745711171121eL,0xad7b7eb1e7c9b671L,0xdacfbc40730f7507L,
+ 0x178cd8c6c7ad7bd1L } },
+ /* 18 << 77 */
+ { { 0xbf0be101b2a67238L,0x3556d367af9c14f2L,0x104b7831a5662075L,
+ 0x58ca59bb79d9e60aL },
+ { 0x4bc45392a569a73bL,0x517a52e85698f6c9L,0x85643da5aeadd755L,
+ 0x1aed0cd52a581b84L } },
+ /* 19 << 77 */
+ { { 0xb9b4ff8480af1372L,0x244c3113f1ba5d1fL,0x2a5dacbef5f98d31L,
+ 0x2c3323e84375bc2aL },
+ { 0x17a3ab4a5594b1ddL,0xa1928bfbceb4797eL,0xe83af245e4886a19L,
+ 0x8979d54672b5a74aL } },
+ /* 20 << 77 */
+ { { 0xa0f726bc19f9e967L,0xd9d03152e8fbbf4eL,0xcfd6f51db7707d40L,
+ 0x633084d963f6e6e0L },
+ { 0xedcd9cdc55667eafL,0x73b7f92b2e44d56fL,0xfb2e39b64e962b14L,
+ 0x7d408f6ef671fcbfL } },
+ /* 21 << 77 */
+ { { 0xcc634ddc164a89bbL,0x74a42bb23ef3bd05L,0x1280dbb2428decbbL,
+ 0x6103f6bb402c8596L },
+ { 0xfa2bf581355a5752L,0x562f96a800946674L,0x4e4ca16d6da0223bL,
+ 0xfe47819f28d3aa25L } },
+ /* 22 << 77 */
+ { { 0x9eea3075f8dfcf8aL,0xa284f0aa95669825L,0xb3fca250867d3fd8L,
+ 0x20757b5f269d691eL },
+ { 0xf2c2402093b8a5deL,0xd3f93359ebc06da6L,0x1178293eb2739c33L,
+ 0xd2a3e770bcd686e5L } },
+ /* 23 << 77 */
+ { { 0xa76f49f4cd941534L,0x0d37406be3c71c0eL,0x172d93973b97f7e3L,
+ 0xec17e239bd7fd0deL },
+ { 0xe32905516f496ba2L,0x6a69317236ad50e7L,0xc4e539a283e7eff5L,
+ 0x752737e718e1b4cfL } },
+ /* 24 << 77 */
+ { { 0xa2f7932c68af43eeL,0x5502468e703d00bdL,0xe5dc978f2fb061f5L,
+ 0xc9a1904a28c815adL },
+ { 0xd3af538d470c56a4L,0x159abc5f193d8cedL,0x2a37245f20108ef3L,
+ 0xfa17081e223f7178L } },
+ /* 25 << 77 */
+ { { 0x27b0fb2b10c8c0f5L,0x2102c3ea40650547L,0x594564df8ac3bfa7L,
+ 0x98102033509dad96L },
+ { 0x6989643ff1d18a13L,0x35eebd91d7fc5af0L,0x078d096afaeaafd8L,
+ 0xb7a89341def3de98L } },
+ /* 26 << 77 */
+ { { 0x2a206e8decf2a73aL,0x066a63978e551994L,0x3a6a088ab98d53a2L,
+ 0x0ce7c67c2d1124aaL },
+ { 0x48cec671759a113cL,0xe3b373d34f6f67faL,0x5455d479fd36727bL,
+ 0xe5a428eea13c0d81L } },
+ /* 27 << 77 */
+ { { 0xb853dbc81c86682bL,0xb78d2727b8d02b2aL,0xaaf69bed8ebc329aL,
+ 0xdb6b40b3293b2148L },
+ { 0xe42ea77db8c4961fL,0xb1a12f7c20e5e0abL,0xa0ec527479e8b05eL,
+ 0x68027391fab60a80L } },
+ /* 28 << 77 */
+ { { 0x6bfeea5f16b1bd5eL,0xf957e4204de30ad3L,0xcbaf664e6a353b9eL,
+ 0x5c87331226d14febL },
+ { 0x4e87f98cb65f57cbL,0xdb60a6215e0cdd41L,0x67c16865a6881440L,
+ 0x1093ef1a46ab52aaL } },
+ /* 29 << 77 */
+ { { 0xc095afb53f4ece64L,0x6a6bb02e7604551aL,0x55d44b4e0b26b8cdL,
+ 0xe5f9a999f971268aL },
+ { 0xc08ec42511a7de84L,0x83568095fda469ddL,0x737bfba16c6c90a2L,
+ 0x1cb9c4a0be229831L } },
+ /* 30 << 77 */
+ { { 0x93bccbbabb2eec64L,0xa0c23b64da03adbeL,0x5f7aa00ae0e86ac4L,
+ 0x470b941efc1401e6L },
+ { 0x5ad8d6799df43574L,0x4ccfb8a90f65d810L,0x1bce80e3aa7fbd81L,
+ 0x273291ad9508d20aL } },
+ /* 31 << 77 */
+ { { 0xf5c4b46b42a92806L,0x810684eca86ab44aL,0x4591640bca0bc9f8L,
+ 0xb5efcdfc5c4b6054L },
+ { 0x16fc89076e9edd12L,0xe29d0b50d4d792f9L,0xa45fd01c9b03116dL,
+ 0x85035235c81765a4L } },
+ /* 32 << 77 */
+ { { 0x1fe2a9b2b4b4b67cL,0xc1d10df0e8020604L,0x9d64abfcbc8058d8L,
+ 0x8943b9b2712a0fbbL },
+ { 0x90eed9143b3def04L,0x85ab3aa24ce775ffL,0x605fd4ca7bbc9040L,
+ 0x8b34a564e2c75dfbL } },
+ /* 33 << 77 */
+ { { 0x41ffc94a10358560L,0x2d8a50729e5c28aaL,0xe915a0fc4cc7eb15L,
+ 0xe9efab058f6d0f5dL },
+ { 0xdbab47a9d19e9b91L,0x8cfed7450276154cL,0x154357ae2cfede0dL,
+ 0x520630df19f5a4efL } },
+ /* 34 << 77 */
+ { { 0x25759f7ce382360fL,0xb6db05c988bf5857L,0x2917d61d6c58d46cL,
+ 0x14f8e491fd20cb7aL },
+ { 0xb68a727a11c20340L,0x0386f86faf7ccbb6L,0x5c8bc6ccfee09a20L,
+ 0x7d76ff4abb7eea35L } },
+ /* 35 << 77 */
+ { { 0xa7bdebe7db15be7aL,0x67a08054d89f0302L,0x56bf0ea9c1193364L,
+ 0xc824446762837ebeL },
+ { 0x32bd8e8b20d841b8L,0x127a0548dbb8a54fL,0x83dd4ca663b20236L,
+ 0x87714718203491faL } },
+ /* 36 << 77 */
+ { { 0x4dabcaaaaa8a5288L,0x91cc0c8aaf23a1c9L,0x34c72c6a3f220e0cL,
+ 0xbcc20bdf1232144aL },
+ { 0x6e2f42daa20ede1bL,0xc441f00c74a00515L,0xbf46a5b6734b8c4bL,
+ 0x574095037b56c9a4L } },
+ /* 37 << 77 */
+ { { 0x9f735261e4585d45L,0x9231faed6734e642L,0x1158a176be70ee6cL,
+ 0x35f1068d7c3501bfL },
+ { 0x6beef900a2d26115L,0x649406f2ef0afee3L,0x3f43a60abc2420a1L,
+ 0x509002a7d5aee4acL } },
+ /* 38 << 77 */
+ { { 0xb46836a53ff3571bL,0x24f98b78837927c1L,0x6254256a4533c716L,
+ 0xf27abb0bd07ee196L },
+ { 0xd7cf64fc5c6d5bfdL,0x6915c751f0cd7a77L,0xd9f590128798f534L,
+ 0x772b0da8f81d8b5fL } },
+ /* 39 << 77 */
+ { { 0x1244260c2e03fa69L,0x36cf0e3a3be1a374L,0x6e7c1633ef06b960L,
+ 0xa71a4c55671f90f6L },
+ { 0x7a94125133c673dbL,0xc0bea51073e8c131L,0x61a8a699d4f6c734L,
+ 0x25e78c88341ed001L } },
+ /* 40 << 77 */
+ { { 0x5c18acf88e2f7d90L,0xfdbf33d777be32cdL,0x0a085cd7d2eb5ee9L,
+ 0x2d702cfbb3201115L },
+ { 0xb6e0ebdb85c88ce8L,0x23a3ce3c1e01d617L,0x3041618e567333acL,
+ 0x9dd0fd8f157edb6bL } },
+ /* 41 << 77 */
+ { { 0x27f74702b57872b8L,0x2ef26b4f657d5fe1L,0x95426f0a57cf3d40L,
+ 0x847e2ad165a6067aL },
+ { 0xd474d9a009996a74L,0x16a56acd2a26115cL,0x02a615c3d16f4d43L,
+ 0xcc3fc965aadb85b7L } },
+ /* 42 << 77 */
+ { { 0x386bda73ce07d1b0L,0xd82910c258ad4178L,0x124f82cfcd2617f4L,
+ 0xcc2f5e8def691770L },
+ { 0x82702550b8c30cccL,0x7b856aea1a8e575aL,0xbb822fefb1ab9459L,
+ 0x085928bcec24e38eL } },
+ /* 43 << 77 */
+ { { 0x5d0402ecba8f4b4dL,0xc07cd4ba00b4d58bL,0x5d8dffd529227e7aL,
+ 0x61d44d0c31bf386fL },
+ { 0xe486dc2b135e6f4dL,0x680962ebe79410efL,0xa61bd343f10088b5L,
+ 0x6aa76076e2e28686L } },
+ /* 44 << 77 */
+ { { 0x80463d118fb98871L,0xcb26f5c3bbc76affL,0xd4ab8eddfbe03614L,
+ 0xc8eb579bc0cf2deeL },
+ { 0xcc004c15c93bae41L,0x46fbae5d3aeca3b2L,0x671235cf0f1e9ab1L,
+ 0xadfba9349ec285c1L } },
+ /* 45 << 77 */
+ { { 0x88ded013f216c980L,0xc8ac4fb8f79e0bc1L,0xa29b89c6fb97a237L,
+ 0xb697b7809922d8e7L },
+ { 0x3142c639ddb945b5L,0x447b06c7e094c3a9L,0xcdcb364272266c90L,
+ 0x633aad08a9385046L } },
+ /* 46 << 77 */
+ { { 0xa36c936bb57c6477L,0x871f8b64e94dbcc6L,0x28d0fb62a591a67bL,
+ 0x9d40e081c1d926f5L },
+ { 0x3111eaf6f2d84b5aL,0x228993f9a565b644L,0x0ccbf5922c83188bL,
+ 0xf87b30ab3df3e197L } },
+ /* 47 << 77 */
+ { { 0xb8658b317642bca8L,0x1a032d7f52800f17L,0x051dcae579bf9445L,
+ 0xeba6b8ee54a2e253L },
+ { 0x5c8b9cadd4485692L,0x84bda40e8986e9beL,0xd16d16a42f0db448L,
+ 0x8ec80050a14d4188L } },
+ /* 48 << 77 */
+ { { 0xb2b2610798fa7aaaL,0x41209ee4f073aa4eL,0xf1570359f2d6b19bL,
+ 0xcbe6868cfc577cafL },
+ { 0x186c4bdc32c04dd3L,0xa6c35faecfeee397L,0xb4a1b312f086c0cfL,
+ 0xe0a5ccc6d9461fe2L } },
+ /* 49 << 77 */
+ { { 0xc32278aa1536189fL,0x1126c55fba6df571L,0x0f71a602b194560eL,
+ 0x8b2d7405324bd6e1L },
+ { 0x8481939e3738be71L,0xb5090b1a1a4d97a9L,0x116c65a3f05ba915L,
+ 0x21863ad3aae448aaL } },
+ /* 50 << 77 */
+ { { 0xd24e2679a7aae5d3L,0x7076013d0de5c1c4L,0x2d50f8babb05b629L,
+ 0x73c1abe26e66efbbL },
+ { 0xefd4b422f2488af7L,0xe4105d02663ba575L,0x7eb60a8b53a69457L,
+ 0x62210008c945973bL } },
+ /* 51 << 77 */
+ { { 0xfb25547877a50ec6L,0xbf0392f70a37a72cL,0xa0a7a19c4be18e7aL,
+ 0x90d8ea1625b1e0afL },
+ { 0x7582a293ef953f57L,0x90a64d05bdc5465aL,0xca79c497e2510717L,
+ 0x560dbb7c18cb641fL } },
+ /* 52 << 77 */
+ { { 0x1d8e32864b66abfbL,0xd26f52e559030900L,0x1ee3f6435584941aL,
+ 0x6d3b3730569f5958L },
+ { 0x9ff2a62f4789dba5L,0x91fcb81572b5c9b7L,0xf446cb7d6c8f9a0eL,
+ 0x48f625c139b7ecb5L } },
+ /* 53 << 77 */
+ { { 0xbabae8011c6219b8L,0xe7a562d928ac2f23L,0xe1b4873226e20588L,
+ 0x06ee1cad775af051L },
+ { 0xda29ae43faff79f7L,0xc141a412652ee9e0L,0x1e127f6f195f4bd0L,
+ 0x29c6ab4f072f34f8L } },
+ /* 54 << 77 */
+ { { 0x7b7c147730448112L,0x82b51af1e4a38656L,0x2bf2028a2f315010L,
+ 0xc9a4a01f6ea88cd4L },
+ { 0xf63e95d8257e5818L,0xdd8efa10b4519b16L,0xed8973e00da910bfL,
+ 0xed49d0775c0fe4a9L } },
+ /* 55 << 77 */
+ { { 0xac3aac5eb7caee1eL,0x1033898da7f4da57L,0x42145c0e5c6669b9L,
+ 0x42daa688c1aa2aa0L },
+ { 0x629cc15c1a1d885aL,0x25572ec0f4b76817L,0x8312e4359c8f8f28L,
+ 0x8107f8cd81965490L } },
+ /* 56 << 77 */
+ { { 0x516ff3a36fa6110cL,0x74fb1eb1fb93561fL,0x6c0c90478457522bL,
+ 0xcfd321046bb8bdc6L },
+ { 0x2d6884a2cc80ad57L,0x7c27fc3586a9b637L,0x3461baedadf4e8cdL,
+ 0x1d56251a617242f0L } },
+ /* 57 << 77 */
+ { { 0x0b80d209c955bef4L,0xdf02cad206adb047L,0xf0d7cb915ec74feeL,
+ 0xd25033751111ba44L },
+ { 0x9671755edf53cb36L,0x54dcb6123368551bL,0x66d69aacc8a025a4L,
+ 0x6be946c6e77ef445L } },
+ /* 58 << 77 */
+ { { 0x719946d1a995e094L,0x65e848f6e51e04d8L,0xe62f33006a1e3113L,
+ 0x1541c7c1501de503L },
+ { 0x4daac9faf4acfadeL,0x0e58589744cd0b71L,0x544fd8690a51cd77L,
+ 0x60fc20ed0031016dL } },
+ /* 59 << 77 */
+ { { 0x58b404eca4276867L,0x46f6c3cc34f34993L,0x477ca007c636e5bdL,
+ 0x8018f5e57c458b47L },
+ { 0xa1202270e47b668fL,0xcef48ccdee14f203L,0x23f98bae62ff9b4dL,
+ 0x55acc035c589edddL } },
+ /* 60 << 77 */
+ { { 0x3fe712af64db4444L,0x19e9d634becdd480L,0xe08bc047a930978aL,
+ 0x2dbf24eca1280733L },
+ { 0x3c0ae38c2cd706b2L,0x5b012a5b359017b9L,0x3943c38c72e0f5aeL,
+ 0x786167ea57176fa3L } },
+ /* 61 << 77 */
+ { { 0xe5f9897d594881dcL,0x6b5efad8cfb820c1L,0xb2179093d55018deL,
+ 0x39ad7d320bac56ceL },
+ { 0xb55122e02cfc0e81L,0x117c4661f6d89daaL,0x362d01e1cb64fa09L,
+ 0x6a309b4e3e9c4dddL } },
+ /* 62 << 77 */
+ { { 0xfa979fb7abea49b1L,0xb4b1d27d10e2c6c5L,0xbd61c2c423afde7aL,
+ 0xeb6614f89786d358L },
+ { 0x4a5d816b7f6f7459L,0xe431a44f09360e7bL,0x8c27a032c309914cL,
+ 0xcea5d68acaede3d8L } },
+ /* 63 << 77 */
+ { { 0x3668f6653a0a3f95L,0x893694167ceba27bL,0x89981fade4728fe9L,
+ 0x7102c8a08a093562L },
+ { 0xbb80310e235d21c8L,0x505e55d1befb7f7bL,0xa0a9081112958a67L,
+ 0xd67e106a4d851fefL } },
+ /* 64 << 77 */
+ { { 0xb84011a9431dd80eL,0xeb7c7cca73306cd9L,0x20fadd29d1b3b730L,
+ 0x83858b5bfe37b3d3L },
+ { 0xbf4cd193b6251d5cL,0x1cca1fd31352d952L,0xc66157a490fbc051L,
+ 0x7990a63889b98636L } },
+ /* 0 << 84 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 84 */
+ { { 0xe5aa692a87dec0e1L,0x010ded8df7b39d00L,0x7b1b80c854cfa0b5L,
+ 0x66beb876a0f8ea28L },
+ { 0x50d7f5313476cd0eL,0xa63d0e65b08d3949L,0x1a09eea953479fc6L,
+ 0x82ae9891f499e742L } },
+ /* 2 << 84 */
+ { { 0xab58b9105ca7d866L,0x582967e23adb3b34L,0x89ae4447cceac0bcL,
+ 0x919c667c7bf56af5L },
+ { 0x9aec17b160f5dcd7L,0xec697b9fddcaadbcL,0x0b98f341463467f5L,
+ 0xb187f1f7a967132fL } },
+ /* 3 << 84 */
+ { { 0x90fe7a1d214aeb18L,0x1506af3c741432f7L,0xbb5565f9e591a0c4L,
+ 0x10d41a77b44f1bc3L },
+ { 0xa09d65e4a84bde96L,0x42f060d8f20a6a1cL,0x652a3bfdf27f9ce7L,
+ 0xb6bdb65c3b3d739fL } },
+ /* 4 << 84 */
+ { { 0xeb5ddcb6ec7fae9fL,0x995f2714efb66e5aL,0xdee95d8e69445d52L,
+ 0x1b6c2d4609e27620L },
+ { 0x32621c318129d716L,0xb03909f10958c1aaL,0x8c468ef91af4af63L,
+ 0x162c429ffba5cdf6L } },
+ /* 5 << 84 */
+ { { 0x2f682343753b9371L,0x29cab45a5f1f9cd7L,0x571623abb245db96L,
+ 0xc507db093fd79999L },
+ { 0x4e2ef652af036c32L,0x86f0cc7805018e5cL,0xc10a73d4ab8be350L,
+ 0x6519b3977e826327L } },
+ /* 6 << 84 */
+ { { 0xe8cb5eef9c053df7L,0x8de25b37b300ea6fL,0xdb03fa92c849cffbL,
+ 0x242e43a7e84169bbL },
+ { 0xe4fa51f4dd6f958eL,0x6925a77ff4445a8dL,0xe6e72a50e90d8949L,
+ 0xc66648e32b1f6390L } },
+ /* 7 << 84 */
+ { { 0xb2ab1957173e460cL,0x1bbbce7530704590L,0xc0a90dbddb1c7162L,
+ 0x505e399e15cdd65dL },
+ { 0x68434dcb57797ab7L,0x60ad35ba6a2ca8e8L,0x4bfdb1e0de3336c1L,
+ 0xbbef99ebd8b39015L } },
+ /* 8 << 84 */
+ { { 0x6c3b96f31711ebecL,0x2da40f1fce98fdc4L,0xb99774d357b4411fL,
+ 0x87c8bdf415b65bb6L },
+ { 0xda3a89e3c2eef12dL,0xde95bb9b3c7471f3L,0x600f225bd812c594L,
+ 0x54907c5d2b75a56bL } },
+ /* 9 << 84 */
+ { { 0xa93cc5f08db60e35L,0x743e3cd6fa833319L,0x7dad5c41f81683c9L,
+ 0x70c1e7d99c34107eL },
+ { 0x0edc4a39a6be0907L,0x36d4703586d0b7d3L,0x8c76da03272bfa60L,
+ 0x0b4a07ea0f08a414L } },
+ /* 10 << 84 */
+ { { 0x699e4d2945c1dd53L,0xcadc5898231debb5L,0xdf49fcc7a77f00e0L,
+ 0x93057bbfa73e5a0eL },
+ { 0x2f8b7ecd027a4cd1L,0x114734b3c614011aL,0xe7a01db767677c68L,
+ 0x89d9be5e7e273f4fL } },
+ /* 11 << 84 */
+ { { 0xd225cb2e089808efL,0xf1f7a27dd59e4107L,0x53afc7618211b9c9L,
+ 0x0361bc67e6819159L },
+ { 0x2a865d0b7f071426L,0x6a3c1810e7072567L,0x3e3bca1e0d6bcabdL,
+ 0xa1b02bc1408591bcL } },
+ /* 12 << 84 */
+ { { 0xe0deee5931fba239L,0xf47424d398bd91d1L,0x0f8886f4071a3c1dL,
+ 0x3f7d41e8a819233bL },
+ { 0x708623c2cf6eb998L,0x86bb49af609a287fL,0x942bb24963c90762L,
+ 0x0ef6eea555a9654bL } },
+ /* 13 << 84 */
+ { { 0x5f6d2d7236f5defeL,0xfa9922dc56f99176L,0x6c8c5ecef78ce0c7L,
+ 0x7b44589dbe09b55eL },
+ { 0xe11b3bca9ea83770L,0xd7fa2c7f2ab71547L,0x2a3dd6fa2a1ddcc0L,
+ 0x09acb4305a7b7707L } },
+ /* 14 << 84 */
+ { { 0x4add4a2e649d4e57L,0xcd53a2b01917526eL,0xc526233020b44ac4L,
+ 0x4028746abaa2c31dL },
+ { 0x5131839064291d4cL,0xbf48f151ee5ad909L,0xcce57f597b185681L,
+ 0x7c3ac1b04854d442L } },
+ /* 15 << 84 */
+ { { 0x65587dc3c093c171L,0xae7acb2424f42b65L,0x5a338adb955996cbL,
+ 0xc8e656756051f91bL },
+ { 0x66711fba28b8d0b1L,0x15d74137b6c10a90L,0x70cdd7eb3a232a80L,
+ 0xc9e2f07f6191ed24L } },
+ /* 16 << 84 */
+ { { 0xa80d1db6f79588c0L,0xfa52fc69b55768ccL,0x0b4df1ae7f54438aL,
+ 0x0cadd1a7f9b46a4fL },
+ { 0xb40ea6b31803dd6fL,0x488e4fa555eaae35L,0x9f047d55382e4e16L,
+ 0xc9b5b7e02f6e0c98L } },
+ /* 17 << 84 */
+ { { 0x6b1bd2d395762649L,0xa9604ee7c7aea3f6L,0x3646ff276dc6f896L,
+ 0x9bf0e7f52860bad1L },
+ { 0x2d92c8217cb44b92L,0xa2f5ce63aea9c182L,0xd0a2afb19154a5fdL,
+ 0x482e474c95801da6L } },
+ /* 18 << 84 */
+ { { 0xc19972d0b611c24bL,0x1d468e6560a8f351L,0xeb7580697bcf6421L,
+ 0xec9dd0ee88fbc491L },
+ { 0x5b59d2bf956c2e32L,0x73dc6864dcddf94eL,0xfd5e2321bcee7665L,
+ 0xa7b4f8ef5e9a06c4L } },
+ /* 19 << 84 */
+ { { 0xfba918dd7280f855L,0xbbaac2608baec688L,0xa3b3f00f33400f42L,
+ 0x3d2dba2966f2e6e4L },
+ { 0xb6f71a9498509375L,0x8f33031fcea423ccL,0x009b8dd04807e6fbL,
+ 0x5163cfe55cdb954cL } },
+ /* 20 << 84 */
+ { { 0x03cc8f17cf41c6e8L,0xf1f03c2a037b925cL,0xc39c19cc66d2427cL,
+ 0x823d24ba7b6c18e4L },
+ { 0x32ef9013901f0b4fL,0x684360f1f8941c2eL,0x0ebaff522c28092eL,
+ 0x7891e4e3256c932fL } },
+ /* 21 << 84 */
+ { { 0x51264319ac445e3dL,0x553432e78ea74381L,0xe6eeaa6967e9c50aL,
+ 0x27ced28462e628c7L },
+ { 0x3f96d3757a4afa57L,0xde0a14c3e484c150L,0x364a24eb38bd9923L,
+ 0x1df18da0e5177422L } },
+ /* 22 << 84 */
+ { { 0x174e8f82d8d38a9bL,0x2e97c600e7de1391L,0xc5709850a1c175ddL,
+ 0x969041a032ae5035L },
+ { 0xcbfd533b76a2086bL,0xd6bba71bd7c2e8feL,0xb2d58ee6099dfb67L,
+ 0x3a8b342d064a85d9L } },
+ /* 23 << 84 */
+ { { 0x3bc07649522f9be3L,0x690c075bdf1f49a8L,0x80e1aee83854ec42L,
+ 0x2a7dbf4417689dc7L },
+ { 0xc004fc0e3faf4078L,0xb2f02e9edf11862cL,0xf10a5e0fa0a1b7b3L,
+ 0x30aca6238936ec80L } },
+ /* 24 << 84 */
+ { { 0xf83cbf0502f40d9aL,0x4681c4682c318a4dL,0x985756180e9c2674L,
+ 0xbe79d0461847092eL },
+ { 0xaf1e480a78bd01e0L,0x6dd359e472a51db9L,0x62ce3821e3afbab6L,
+ 0xc5cee5b617733199L } },
+ /* 25 << 84 */
+ { { 0xe08b30d46ffd9fbbL,0x6e5bc69936c610b7L,0xf343cff29ce262cfL,
+ 0xca2e4e3568b914c1L },
+ { 0x011d64c016de36c5L,0xe0b10fdd42e2b829L,0x789429816685aaf8L,
+ 0xe7511708230ede97L } },
+ /* 26 << 84 */
+ { { 0x671ed8fc3b922bf8L,0xe4d8c0a04c29b133L,0x87eb12393b6e99c4L,
+ 0xaff3974c8793bebaL },
+ { 0x037494052c18df9bL,0xc5c3a29391007139L,0x6a77234fe37a0b95L,
+ 0x02c29a21b661c96bL } },
+ /* 27 << 84 */
+ { { 0xc3aaf1d6141ecf61L,0x9195509e3bb22f53L,0x2959740422d51357L,
+ 0x1b083822537bed60L },
+ { 0xcd7d6e35e07289f0L,0x1f94c48c6dd86effL,0xc8bb1f82eb0f9cfaL,
+ 0x9ee0b7e61b2eb97dL } },
+ /* 28 << 84 */
+ { { 0x5a52fe2e34d74e31L,0xa352c3103bf79ab6L,0x97ff6c5aabfeeb8fL,
+ 0xbfbe8feff5c97305L },
+ { 0xd6081ce6a7904608L,0x1f812f3ac4fca249L,0x9b24bc9ab9e5e200L,
+ 0x91022c6738012ee8L } },
+ /* 29 << 84 */
+ { { 0xe83d9c5d30a713a1L,0x4876e3f084ef0f93L,0xc9777029c1fbf928L,
+ 0xef7a6bb3bce7d2a4L },
+ { 0xb8067228dfa2a659L,0xd5cd3398d877a48fL,0xbea4fd8f025d0f3fL,
+ 0xd67d2e352eae7c2bL } },
+ /* 30 << 84 */
+ { { 0x184de7d7cc5f4394L,0xb5551b5c4536e142L,0x2e89b212d34aa60aL,
+ 0x14a96feaf50051d5L },
+ { 0x4e21ef740d12bb0bL,0xc522f02060b9677eL,0x8b12e4672df7731dL,
+ 0x39f803827b326d31L } },
+ /* 31 << 84 */
+ { { 0xdfb8630c39024a94L,0xaacb96a897319452L,0xd68a3961eda3867cL,
+ 0x0c58e2b077c4ffcaL },
+ { 0x3d545d634da919faL,0xef79b69af15e2289L,0x54bc3d3d808bab10L,
+ 0xc8ab300745f82c37L } },
+ /* 32 << 84 */
+ { { 0xc12738b67c4a658aL,0xb3c4763940e72182L,0x3b77be468798e44fL,
+ 0xdc047df217a7f85fL },
+ { 0x2439d4c55e59d92dL,0xcedca475e8e64d8dL,0xa724cd0d87ca9b16L,
+ 0x35e4fd59a5540dfeL } },
+ /* 33 << 84 */
+ { { 0xf8c1ff18e4bcf6b1L,0x856d6285295018faL,0x433f665c3263c949L,
+ 0xa6a76dd6a1f21409L },
+ { 0x17d32334cc7b4f79L,0xa1d0312206720e4aL,0xadb6661d81d9bed5L,
+ 0xf0d6fb0211db15d1L } },
+ /* 34 << 84 */
+ { { 0x7fd11ad51fb747d2L,0xab50f9593033762bL,0x2a7e711bfbefaf5aL,
+ 0xc73932783fef2bbfL },
+ { 0xe29fa2440df6f9beL,0x9092757b71efd215L,0xee60e3114f3d6fd9L,
+ 0x338542d40acfb78bL } },
+ /* 35 << 84 */
+ { { 0x44a23f0838961a0fL,0x1426eade986987caL,0x36e6ee2e4a863cc6L,
+ 0x48059420628b8b79L },
+ { 0x30303ad87396e1deL,0x5c8bdc4838c5aad1L,0x3e40e11f5c8f5066L,
+ 0xabd6e7688d246bbdL } },
+ /* 36 << 84 */
+ { { 0x68aa40bb23330a01L,0xd23f5ee4c34eafa0L,0x3bbee3155de02c21L,
+ 0x18dd4397d1d8dd06L },
+ { 0x3ba1939a122d7b44L,0xe6d3b40aa33870d6L,0x8e620f701c4fe3f8L,
+ 0xf6bba1a5d3a50cbfL } },
+ /* 37 << 84 */
+ { { 0x4a78bde5cfc0aee0L,0x847edc46c08c50bdL,0xbaa2439cad63c9b2L,
+ 0xceb4a72810fc2acbL },
+ { 0xa419e40e26da033dL,0x6cc3889d03e02683L,0x1cd28559fdccf725L,
+ 0x0fd7e0f18d13d208L } },
+ /* 38 << 84 */
+ { { 0x01b9733b1f0df9d4L,0x8cc2c5f3a2b5e4f3L,0x43053bfa3a304fd4L,
+ 0x8e87665c0a9f1aa7L },
+ { 0x087f29ecd73dc965L,0x15ace4553e9023dbL,0x2370e3092bce28b4L,
+ 0xf9723442b6b1e84aL } },
+ /* 39 << 84 */
+ { { 0xbeee662eb72d9f26L,0xb19396def0e47109L,0x85b1fa73e13289d0L,
+ 0x436cf77e54e58e32L },
+ { 0x0ec833b3e990ef77L,0x7373e3ed1b11fc25L,0xbe0eda870fc332ceL,
+ 0xced049708d7ea856L } },
+ /* 40 << 84 */
+ { { 0xf85ff7857e977ca0L,0xb66ee8dadfdd5d2bL,0xf5e37950905af461L,
+ 0x587b9090966d487cL },
+ { 0x6a198a1b32ba0127L,0xa7720e07141615acL,0xa23f3499996ef2f2L,
+ 0xef5f64b4470bcb3dL } },
+ /* 41 << 84 */
+ { { 0xa526a96292b8c559L,0x0c14aac069740a0fL,0x0d41a9e3a6bdc0a5L,
+ 0x97d521069c48aef4L },
+ { 0xcf16bd303e7c253bL,0xcc834b1a47fdedc1L,0x7362c6e5373aab2eL,
+ 0x264ed85ec5f590ffL } },
+ /* 42 << 84 */
+ { { 0x7a46d9c066d41870L,0xa50c20b14787ba09L,0x185e7e51e3d44635L,
+ 0xb3b3e08031e2d8dcL },
+ { 0xbed1e558a179e9d9L,0x2daa3f7974a76781L,0x4372baf23a40864fL,
+ 0x46900c544fe75cb5L } },
+ /* 43 << 84 */
+ { { 0xb95f171ef76765d0L,0x4ad726d295c87502L,0x2ec769da4d7c99bdL,
+ 0x5e2ddd19c36cdfa8L },
+ { 0xc22117fca93e6deaL,0xe8a2583b93771123L,0xbe2f6089fa08a3a2L,
+ 0x4809d5ed8f0e1112L } },
+ /* 44 << 84 */
+ { { 0x3b414aa3da7a095eL,0x9049acf126f5aaddL,0x78d46a4d6be8b84aL,
+ 0xd66b1963b732b9b3L },
+ { 0x5c2ac2a0de6e9555L,0xcf52d098b5bd8770L,0x15a15fa60fd28921L,
+ 0x56ccb81e8b27536dL } },
+ /* 45 << 84 */
+ { { 0x0f0d8ab89f4ccbb8L,0xed5f44d2db221729L,0x4314198800bed10cL,
+ 0xc94348a41d735b8bL },
+ { 0x79f3e9c429ef8479L,0x4c13a4e3614c693fL,0x32c9af568e143a14L,
+ 0xbc517799e29ac5c4L } },
+ /* 46 << 84 */
+ { { 0x05e179922774856fL,0x6e52fb056c1bf55fL,0xaeda4225e4f19e16L,
+ 0x70f4728aaf5ccb26L },
+ { 0x5d2118d1b2947f22L,0xc827ea16281d6fb9L,0x8412328d8cf0eabdL,
+ 0x45ee9fb203ef9dcfL } },
+ /* 47 << 84 */
+ { { 0x8e700421bb937d63L,0xdf8ff2d5cc4b37a6L,0xa4c0d5b25ced7b68L,
+ 0x6537c1efc7308f59L },
+ { 0x25ce6a263b37f8e8L,0x170e9a9bdeebc6ceL,0xdd0379528728d72cL,
+ 0x445b0e55850154bcL } },
+ /* 48 << 84 */
+ { { 0x4b7d0e0683a7337bL,0x1e3416d4ffecf249L,0x24840eff66a2b71fL,
+ 0xd0d9a50ab37cc26dL },
+ { 0xe21981506fe28ef7L,0x3cc5ef1623324c7fL,0x220f3455769b5263L,
+ 0xe2ade2f1a10bf475L } },
+ /* 49 << 84 */
+ { { 0x28cd20fa458d3671L,0x1549722c2dc4847bL,0x6dd01e55591941e3L,
+ 0x0e6fbcea27128ccbL },
+ { 0xae1a1e6b3bef0262L,0xfa8c472c8f54e103L,0x7539c0a872c052ecL,
+ 0xd7b273695a3490e9L } },
+ /* 50 << 84 */
+ { { 0x143fe1f171684349L,0x36b4722e32e19b97L,0xdc05922790980affL,
+ 0x175c9c889e13d674L },
+ { 0xa7de5b226e6bfdb1L,0x5ea5b7b2bedb4b46L,0xd5570191d34a6e44L,
+ 0xfcf60d2ea24ff7e6L } },
+ /* 51 << 84 */
+ { { 0x614a392d677819e1L,0x7be74c7eaa5a29e8L,0xab50fece63c85f3fL,
+ 0xaca2e2a946cab337L },
+ { 0x7f700388122a6fe3L,0xdb69f703882a04a8L,0x9a77935dcf7aed57L,
+ 0xdf16207c8d91c86fL } },
+ /* 52 << 84 */
+ { { 0x2fca49ab63ed9998L,0xa3125c44a77ddf96L,0x05dd8a8624344072L,
+ 0xa023dda2fec3fb56L },
+ { 0x421b41fc0c743032L,0x4f2120c15e438639L,0xfb7cae51c83c1b07L,
+ 0xb2370caacac2171aL } },
+ /* 53 << 84 */
+ { { 0x2eb2d9626cc820fbL,0x59feee5cb85a44bfL,0x94620fca5b6598f0L,
+ 0x6b922cae7e314051L },
+ { 0xff8745ad106bed4eL,0x546e71f5dfa1e9abL,0x935c1e481ec29487L,
+ 0x9509216c4d936530L } },
+ /* 54 << 84 */
+ { { 0xc7ca306785c9a2dbL,0xd6ae51526be8606fL,0x09dbcae6e14c651dL,
+ 0xc9536e239bc32f96L },
+ { 0xa90535a934521b03L,0xf39c526c878756ffL,0x383172ec8aedf03cL,
+ 0x20a8075eefe0c034L } },
+ /* 55 << 84 */
+ { { 0xf22f9c6264026422L,0x8dd1078024b9d076L,0x944c742a3bef2950L,
+ 0x55b9502e88a2b00bL },
+ { 0xa59e14b486a09817L,0xa39dd3ac47bb4071L,0x55137f663be0592fL,
+ 0x07fcafd4c9e63f5bL } },
+ /* 56 << 84 */
+ { { 0x963652ee346eb226L,0x7dfab085ec2facb7L,0x273bf2b8691add26L,
+ 0x30d74540f2b46c44L },
+ { 0x05e8e73ef2c2d065L,0xff9b8a00d42eeac9L,0x2fcbd20597209d22L,
+ 0xeb740ffade14ea2cL } },
+ /* 57 << 84 */
+ { { 0xc71ff913a8aef518L,0x7bfc74bbfff4cfa2L,0x1716680cb6b36048L,
+ 0x121b2cce9ef79af1L },
+ { 0xbff3c836a01eb3d3L,0x50eb1c6a5f79077bL,0xa48c32d6a004bbcfL,
+ 0x47a593167d64f61dL } },
+ /* 58 << 84 */
+ { { 0x6068147f93102016L,0x12c5f65494d12576L,0xefb071a7c9bc6b91L,
+ 0x7c2da0c56e23ea95L },
+ { 0xf4fd45b6d4a1dd5dL,0x3e7ad9b69122b13cL,0x342ca118e6f57a48L,
+ 0x1c2e94a706f8288fL } },
+ /* 59 << 84 */
+ { { 0x99e68f075a97d231L,0x7c80de974d838758L,0xbce0f5d005872727L,
+ 0xbe5d95c219c4d016L },
+ { 0x921d5cb19c2492eeL,0x42192dc1404d6fb3L,0x4c84dcd132f988d3L,
+ 0xde26d61fa17b8e85L } },
+ /* 60 << 84 */
+ { { 0xc466dcb6137c7408L,0x9a38d7b636a266daL,0x7ef5cb0683bebf1bL,
+ 0xe5cdcbbf0fd014e3L },
+ { 0x30aa376df65965a0L,0x60fe88c2ebb3e95eL,0x33fd0b6166ee6f20L,
+ 0x8827dcdb3f41f0a0L } },
+ /* 61 << 84 */
+ { { 0xbf8a9d240c56c690L,0x40265dadddb7641dL,0x522b05bf3a6b662bL,
+ 0x466d1dfeb1478c9bL },
+ { 0xaa6169621484469bL,0x0db6054902df8f9fL,0xc37bca023cb8bf51L,
+ 0x5effe34621371ce8L } },
+ /* 62 << 84 */
+ { { 0xe8f65264ff112c32L,0x8a9c736d7b971fb2L,0xa4f194707b75080dL,
+ 0xfc3f2c5a8839c59bL },
+ { 0x1d6c777e5aeb49c2L,0xf3db034dda1addfeL,0xd76fee5a5535affcL,
+ 0x0853ac70b92251fdL } },
+ /* 63 << 84 */
+ { { 0x37e3d5948b2a29d5L,0x28f1f4574de00ddbL,0x8083c1b5f42c328bL,
+ 0xd8ef1d8fe493c73bL },
+ { 0x96fb626041dc61bdL,0xf74e8a9d27ee2f8aL,0x7c605a802c946a5dL,
+ 0xeed48d653839ccfdL } },
+ /* 64 << 84 */
+ { { 0x9894344f3a29467aL,0xde81e949c51eba6dL,0xdaea066ba5e5c2f2L,
+ 0x3fc8a61408c8c7b3L },
+ { 0x7adff88f06d0de9fL,0xbbc11cf53b75ce0aL,0x9fbb7accfbbc87d5L,
+ 0xa1458e267badfde2L } },
+ /* 0 << 91 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 91 */
+ { { 0x1cb43668e039c256L,0x5f26fb8b7c17fd5dL,0xeee426af79aa062bL,
+ 0x072002d0d78fbf04L },
+ { 0x4c9ca237e84fb7e3L,0xb401d8a10c82133dL,0xaaa525926d7e4181L,
+ 0xe943083373dbb152L } },
+ /* 2 << 91 */
+ { { 0xf92dda31be24319aL,0x03f7d28be095a8e7L,0xa52fe84098782185L,
+ 0x276ddafe29c24dbcL },
+ { 0x80cd54961d7a64ebL,0xe43608897f1dbe42L,0x2f81a8778438d2d5L,
+ 0x7e4d52a885169036L } },
+ /* 3 << 91 */
+ { { 0x19e3d5b11d59715dL,0xc7eaa762d788983eL,0xe5a730b0abf1f248L,
+ 0xfbab8084fae3fd83L },
+ { 0x65e50d2153765b2fL,0xbdd4e083fa127f3dL,0x9cf3c074397b1b10L,
+ 0x59f8090cb1b59fd3L } },
+ /* 4 << 91 */
+ { { 0x7b15fd9d615faa8fL,0x8fa1eb40968554edL,0x7bb4447e7aa44882L,
+ 0x2bb2d0d1029fff32L },
+ { 0x075e2a646caa6d2fL,0x8eb879de22e7351bL,0xbcd5624e9a506c62L,
+ 0x218eaef0a87e24dcL } },
+ /* 5 << 91 */
+ { { 0x37e5684744ddfa35L,0x9ccfc5c5dab3f747L,0x9ac1df3f1ee96cf4L,
+ 0x0c0571a13b480b8fL },
+ { 0x2fbeb3d54b3a7b3cL,0x35c036695dcdbb99L,0x52a0f5dcb2415b3aL,
+ 0xd57759b44413ed9aL } },
+ /* 6 << 91 */
+ { { 0x1fe647d83d30a2c5L,0x0857f77ef78a81dcL,0x11d5a334131a4a9bL,
+ 0xc0a94af929d393f5L },
+ { 0xbc3a5c0bdaa6ec1aL,0xba9fe49388d2d7edL,0xbb4335b4bb614797L,
+ 0x991c4d6872f83533L } },
+ /* 7 << 91 */
+ { { 0x53258c28d2f01cb3L,0x93d6eaa3d75db0b1L,0x419a2b0de87d0db4L,
+ 0xa1e48f03d8fe8493L },
+ { 0xf747faf6c508b23aL,0xf137571a35d53549L,0x9f5e58e2fcf9b838L,
+ 0xc7186ceea7fd3cf5L } },
+ /* 8 << 91 */
+ { { 0x77b868cee978a1d3L,0xe3a68b337ab92d04L,0x5102979487a5b862L,
+ 0x5f0606c33a61d41dL },
+ { 0x2814be276f9326f1L,0x2f521c14c6fe3c2eL,0x17464d7dacdf7351L,
+ 0x10f5f9d3777f7e44L } },
+ /* 9 << 91 */
+ { { 0xce8e616b269fb37dL,0xaaf738047de62de5L,0xaba111754fdd4153L,
+ 0x515759ba3770b49bL },
+ { 0x8b09ebf8aa423a61L,0x592245a1cd41fb92L,0x1cba8ec19b4c8936L,
+ 0xa87e91e3af36710eL } },
+ /* 10 << 91 */
+ { { 0x1fd84ce43d34a2e3L,0xee3759ceb43b5d61L,0x895bc78c619186c7L,
+ 0xf19c3809cbb9725aL },
+ { 0xc0be21aade744b1fL,0xa7d222b060f8056bL,0x74be6157b23efe11L,
+ 0x6fab2b4f0cd68253L } },
+ /* 11 << 91 */
+ { { 0xad33ea5f4bf1d725L,0x9c1d8ee24f6c950fL,0x544ee78aa377af06L,
+ 0x54f489bb94a113e1L },
+ { 0x8f11d634992fb7e8L,0x0169a7aaa2a44347L,0x1d49d4af95020e00L,
+ 0x95945722e08e120bL } },
+ /* 12 << 91 */
+ { { 0xb6e33878a4d32282L,0xe36e029d48020ae7L,0xe05847fb37a9b750L,
+ 0xf876812cb29e3819L },
+ { 0x84ad138ed23a17f0L,0x6d7b4480f0b3950eL,0xdfa8aef42fd67ae0L,
+ 0x8d3eea2452333af6L } },
+ /* 13 << 91 */
+ { { 0x0d052075b15d5accL,0xc6d9c79fbd815bc4L,0x8dcafd88dfa36cf2L,
+ 0x908ccbe238aa9070L },
+ { 0x638722c4ba35afceL,0x5a3da8b0fd6abf0bL,0x2dce252cc9c335c1L,
+ 0x84e7f0de65aa799bL } },
+ /* 14 << 91 */
+ { { 0x2101a522b99a72cbL,0x06de6e6787618016L,0x5ff8c7cde6f3653eL,
+ 0x0a821ab5c7a6754aL },
+ { 0x7e3fa52b7cb0b5a2L,0xa7fb121cc9048790L,0x1a72502006ce053aL,
+ 0xb490a31f04e929b0L } },
+ /* 15 << 91 */
+ { { 0xe17be47d62dd61adL,0x781a961c6be01371L,0x1063bfd3dae3cbbaL,
+ 0x356474067f73c9baL },
+ { 0xf50e957b2736a129L,0xa6313702ed13f256L,0x9436ee653a19fcc5L,
+ 0xcf2bdb29e7a4c8b6L } },
+ /* 16 << 91 */
+ { { 0xb06b1244c5f95cd8L,0xda8c8af0f4ab95f4L,0x1bae59c2b9e5836dL,
+ 0x07d51e7e3acffffcL },
+ { 0x01e15e6ac2ccbcdaL,0x3bc1923f8528c3e0L,0x43324577a49fead4L,
+ 0x61a1b8842aa7a711L } },
+ /* 17 << 91 */
+ { { 0xf9a86e08700230efL,0x0af585a1bd19adf8L,0x7645f361f55ad8f2L,
+ 0x6e67622346c3614cL },
+ { 0x23cb257c4e774d3fL,0x82a38513ac102d1bL,0x9bcddd887b126aa5L,
+ 0xe716998beefd3ee4L } },
+ /* 18 << 91 */
+ { { 0x4239d571fb167583L,0xdd011c78d16c8f8aL,0x271c289569a27519L,
+ 0x9ce0a3b7d2d64b6aL },
+ { 0x8c977289d5ec6738L,0xa3b49f9a8840ef6bL,0x808c14c99a453419L,
+ 0x5c00295b0cf0a2d5L } },
+ /* 19 << 91 */
+ { { 0x524414fb1d4bcc76L,0xb07691d2459a88f1L,0x77f43263f70d110fL,
+ 0x64ada5e0b7abf9f3L },
+ { 0xafd0f94e5b544cf5L,0xb4a13a15fd2713feL,0xb99b7d6e250c74f4L,
+ 0x097f2f7320324e45L } },
+ /* 20 << 91 */
+ { { 0x994b37d8affa8208L,0xc3c31b0bdc29aafcL,0x3da746517a3a607fL,
+ 0xd8e1b8c1fe6955d6L },
+ { 0x716e1815c8418682L,0x541d487f7dc91d97L,0x48a04669c6996982L,
+ 0xf39cab1583a6502eL } },
+ /* 21 << 91 */
+ { { 0x025801a0e68db055L,0xf3569758ba3338d5L,0xb0c8c0aaee2afa84L,
+ 0x4f6985d3fb6562d1L },
+ { 0x351f1f15132ed17aL,0x510ed0b4c04365feL,0xa3f98138e5b1f066L,
+ 0xbc9d95d632df03dcL } },
+ /* 22 << 91 */
+ { { 0xa83ccf6e19abd09eL,0x0b4097c14ff17edbL,0x58a5c478d64a06ceL,
+ 0x2ddcc3fd544a58fdL },
+ { 0xd449503d9e8153b8L,0x3324fd027774179bL,0xaf5d47c8dbd9120cL,
+ 0xeb86016234fa94dbL } },
+ /* 23 << 91 */
+ { { 0x5817bdd1972f07f4L,0xe5579e2ed27bbcebL,0x86847a1f5f11e5a6L,
+ 0xb39ed2557c3cf048L },
+ { 0xe1076417a2f62e55L,0x6b9ab38f1bcf82a2L,0x4bb7c3197aeb29f9L,
+ 0xf6d17da317227a46L } },
+ /* 24 << 91 */
+ { { 0xab53ddbd0f968c00L,0xa03da7ec000c880bL,0x7b2396246a9ad24dL,
+ 0x612c040101ec60d0L },
+ { 0x70d10493109f5df1L,0xfbda403080af7550L,0x30b93f95c6b9a9b3L,
+ 0x0c74ec71007d9418L } },
+ /* 25 << 91 */
+ { { 0x941755646edb951fL,0x5f4a9d787f22c282L,0xb7870895b38d1196L,
+ 0xbc593df3a228ce7cL },
+ { 0xc78c5bd46af3641aL,0x7802200b3d9b3dccL,0x0dc73f328be33304L,
+ 0x847ed87d61ffb79aL } },
+ /* 26 << 91 */
+ { { 0xf85c974e6d671192L,0x1e14100ade16f60fL,0x45cb0d5a95c38797L,
+ 0x18923bba9b022da4L },
+ { 0xef2be899bbe7e86eL,0x4a1510ee216067bfL,0xd98c815484d5ce3eL,
+ 0x1af777f0f92a2b90L } },
+ /* 27 << 91 */
+ { { 0x9fbcb4004ef65724L,0x3e04a4c93c0ca6feL,0xfb3e2cb555002994L,
+ 0x1f3a93c55363ecabL },
+ { 0x1fe00efe3923555bL,0x744bedd91e1751eaL,0x3fb2db596ab69357L,
+ 0x8dbd7365f5e6618bL } },
+ /* 28 << 91 */
+ { { 0x99d53099df1ea40eL,0xb3f24a0b57d61e64L,0xd088a198596eb812L,
+ 0x22c8361b5762940bL },
+ { 0x66f01f97f9c0d95cL,0x884611728e43cdaeL,0x11599a7fb72b15c3L,
+ 0x135a7536420d95ccL } },
+ /* 29 << 91 */
+ { { 0x2dcdf0f75f7ae2f6L,0x15fc6e1dd7fa6da2L,0x81ca829ad1d441b6L,
+ 0x84c10cf804a106b6L },
+ { 0xa9b26c95a73fbbd0L,0x7f24e0cb4d8f6ee8L,0x48b459371e25a043L,
+ 0xf8a74fca036f3dfeL } },
+ /* 30 << 91 */
+ { { 0x1ed46585c9f84296L,0x7fbaa8fb3bc278b0L,0xa8e96cd46c4fcbd0L,
+ 0x940a120273b60a5fL },
+ { 0x34aae12055a4aec8L,0x550e9a74dbd742f0L,0x794456d7228c68abL,
+ 0x492f8868a4e25ec6L } },
+ /* 31 << 91 */
+ { { 0x682915adb2d8f398L,0xf13b51cc5b84c953L,0xcda90ab85bb917d6L,
+ 0x4b6155604ea3dee1L },
+ { 0x578b4e850a52c1c8L,0xeab1a69520b75fc4L,0x60c14f3caa0bb3c6L,
+ 0x220f448ab8216094L } },
+ /* 32 << 91 */
+ { { 0x4fe7ee31b0e63d34L,0xf4600572a9e54fabL,0xc0493334d5e7b5a4L,
+ 0x8589fb9206d54831L },
+ { 0xaa70f5cc6583553aL,0x0879094ae25649e5L,0xcc90450710044652L,
+ 0xebb0696d02541c4fL } },
+ /* 33 << 91 */
+ { { 0x5a171fdeb9718710L,0x38f1bed8f374a9f5L,0xc8c582e1ba39bdc1L,
+ 0xfc457b0a908cc0ceL },
+ { 0x9a187fd4883841e2L,0x8ec25b3938725381L,0x2553ed0596f84395L,
+ 0x095c76616f6c6897L } },
+ /* 34 << 91 */
+ { { 0x917ac85c4bdc5610L,0xb2885fe4179eb301L,0x5fc655478b78bdccL,
+ 0x4a9fc893e59e4699L },
+ { 0xbb7ff0cd3ce299afL,0x195be9b3adf38b20L,0x6a929c87d38ddb8fL,
+ 0x55fcc99cb21a51b9L } },
+ /* 35 << 91 */
+ { { 0x2b695b4c721a4593L,0xed1e9a15768eaac2L,0xfb63d71c7489f914L,
+ 0xf98ba31c78118910L },
+ { 0x802913739b128eb4L,0x7801214ed448af4aL,0xdbd2e22b55418dd3L,
+ 0xeffb3c0dd3998242L } },
+ /* 36 << 91 */
+ { { 0xdfa6077cc7bf3827L,0xf2165bcb47f8238fL,0xfe37cf688564d554L,
+ 0xe5f825c40a81fb98L },
+ { 0x43cc4f67ffed4d6fL,0xbc609578b50a34b0L,0x8aa8fcf95041faf1L,
+ 0x5659f053651773b6L } },
+ /* 37 << 91 */
+ { { 0xe87582c36044d63bL,0xa60894090cdb0ca0L,0x8c993e0fbfb2bcf6L,
+ 0xfc64a71945985cfcL },
+ { 0x15c4da8083dbedbaL,0x804ae1122be67df7L,0xda4c9658a23defdeL,
+ 0x12002ddd5156e0d3L } },
+ /* 38 << 91 */
+ { { 0xe68eae895dd21b96L,0x8b99f28bcf44624dL,0x0ae008081ec8897aL,
+ 0xdd0a93036712f76eL },
+ { 0x962375224e233de4L,0x192445b12b36a8a5L,0xabf9ff74023993d9L,
+ 0x21f37bf42aad4a8fL } },
+ /* 39 << 91 */
+ { { 0x340a4349f8bd2bbdL,0x1d902cd94868195dL,0x3d27bbf1e5fdb6f1L,
+ 0x7a5ab088124f9f1cL },
+ { 0xc466ab06f7a09e03L,0x2f8a197731f2c123L,0xda355dc7041b6657L,
+ 0xcb840d128ece2a7cL } },
+ /* 40 << 91 */
+ { { 0xb600ad9f7db32675L,0x78fea13307a06f1bL,0x5d032269b31f6094L,
+ 0x07753ef583ec37aaL },
+ { 0x03485aed9c0bea78L,0x41bb3989bc3f4524L,0x09403761697f726dL,
+ 0x6109beb3df394820L } },
+ /* 41 << 91 */
+ { { 0x804111ea3b6d1145L,0xb6271ea9a8582654L,0x619615e624e66562L,
+ 0xa2554945d7b6ad9cL },
+ { 0xd9c4985e99bfe35fL,0x9770ccc07b51cdf6L,0x7c32701392881832L,
+ 0x8777d45f286b26d1L } },
+ /* 42 << 91 */
+ { { 0x9bbeda22d847999dL,0x03aa33b6c3525d32L,0x4b7b96d428a959a1L,
+ 0xbb3786e531e5d234L },
+ { 0xaeb5d3ce6961f247L,0x20aa85af02f93d3fL,0x9cd1ad3dd7a7ae4fL,
+ 0xbf6688f0781adaa8L } },
+ /* 43 << 91 */
+ { { 0xb1b40e867469ceadL,0x1904c524309fca48L,0x9b7312af4b54bbc7L,
+ 0xbe24bf8f593affa2L },
+ { 0xbe5e0790bd98764bL,0xa0f45f17a26e299eL,0x4af0d2c26b8fe4c7L,
+ 0xef170db18ae8a3e6L } },
+ /* 44 << 91 */
+ { { 0x0e8d61a029e0ccc1L,0xcd53e87e60ad36caL,0x328c6623c8173822L,
+ 0x7ee1767da496be55L },
+ { 0x89f13259648945afL,0x9e45a5fd25c8009cL,0xaf2febd91f61ab8cL,
+ 0x43f6bc868a275385L } },
+ /* 45 << 91 */
+ { { 0x87792348f2142e79L,0x17d89259c6e6238aL,0x7536d2f64a839d9bL,
+ 0x1f428fce76a1fbdcL },
+ { 0x1c1096010db06dfeL,0xbfc16bc150a3a3ccL,0xf9cbd9ec9b30f41bL,
+ 0x5b5da0d600138cceL } },
+ /* 46 << 91 */
+ { { 0xec1d0a4856ef96a7L,0xb47eb848982bf842L,0x66deae32ec3f700dL,
+ 0x4e43c42caa1181e0L },
+ { 0xa1d72a31d1a4aa2aL,0x440d4668c004f3ceL,0x0d6a2d3b45fe8a7aL,
+ 0x820e52e2fb128365L } },
+ /* 47 << 91 */
+ { { 0x29ac5fcf25e51b09L,0x180cd2bf2023d159L,0xa9892171a1ebf90eL,
+ 0xf97c4c877c132181L },
+ { 0x9f1dc724c03dbb7eL,0xae043765018cbbe4L,0xfb0b2a360767d153L,
+ 0xa8e2f4d6249cbaebL } },
+ /* 48 << 91 */
+ { { 0x172a5247d95ea168L,0x1758fada2970764aL,0xac803a511d978169L,
+ 0x299cfe2ede77e01bL },
+ { 0x652a1e17b0a98927L,0x2e26e1d120014495L,0x7ae0af9f7175b56aL,
+ 0xc2e22a80d64b9f95L } },
+ /* 49 << 91 */
+ { { 0x4d0ff9fbd90a060aL,0x496a27dbbaf38085L,0x32305401da776bcfL,
+ 0xb8cdcef6725f209eL },
+ { 0x61ba0f37436a0bbaL,0x263fa10876860049L,0x92beb98eda3542cfL,
+ 0xa2d4d14ad5849538L } },
+ /* 50 << 91 */
+ { { 0x989b9d6812e9a1bcL,0x61d9075c5f6e3268L,0x352c6aa999ace638L,
+ 0xde4e4a55920f43ffL },
+ { 0xe5e4144ad673c017L,0x667417ae6f6e05eaL,0x613416aedcd1bd56L,
+ 0x5eb3620186693711L } },
+ /* 51 << 91 */
+ { { 0x2d7bc5043a1aa914L,0x175a129976dc5975L,0xe900e0f23fc8125cL,
+ 0x569ef68c11198875L },
+ { 0x9012db6363a113b4L,0xe3bd3f5698835766L,0xa5c94a5276412deaL,
+ 0xad9e2a09aa735e5cL } },
+ /* 52 << 91 */
+ { { 0x405a984c508b65e9L,0xbde4a1d16df1a0d1L,0x1a9433a1dfba80daL,
+ 0xe9192ff99440ad2eL },
+ { 0x9f6496965099fe92L,0x25ddb65c0b27a54aL,0x178279ddc590da61L,
+ 0x5479a999fbde681aL } },
+ /* 53 << 91 */
+ { { 0xd0e84e05013fe162L,0xbe11dc92632d471bL,0xdf0b0c45fc0e089fL,
+ 0x04fb15b04c144025L },
+ { 0xa61d5fc213c99927L,0xa033e9e03de2eb35L,0xf8185d5cb8dacbb4L,
+ 0x9a88e2658644549dL } },
+ /* 54 << 91 */
+ { { 0xf717af6254671ff6L,0x4bd4241b5fa58603L,0x06fba40be67773c0L,
+ 0xc1d933d26a2847e9L },
+ { 0xf4f5acf3689e2c70L,0x92aab0e746bafd31L,0x798d76aa3473f6e5L,
+ 0xcc6641db93141934L } },
+ /* 55 << 91 */
+ { { 0xcae27757d31e535eL,0x04cc43b687c2ee11L,0x8d1f96752e029ffaL,
+ 0xc2150672e4cc7a2cL },
+ { 0x3b03c1e08d68b013L,0xa9d6816fedf298f3L,0x1bfbb529a2804464L,
+ 0x95a52fae5db22125L } },
+ /* 56 << 91 */
+ { { 0x55b321600e1cb64eL,0x004828f67e7fc9feL,0x13394b821bb0fb93L,
+ 0xb6293a2d35f1a920L },
+ { 0xde35ef21d145d2d9L,0xbe6225b3bb8fa603L,0x00fc8f6b32cf252dL,
+ 0xa28e52e6117cf8c2L } },
+ /* 57 << 91 */
+ { { 0x9d1dc89b4c371e6dL,0xcebe067536ef0f28L,0x5de05d09a4292f81L,
+ 0xa8303593353e3083L },
+ { 0xa1715b0a7e37a9bbL,0x8c56f61e2b8faec3L,0x5250743133c9b102L,
+ 0x0130cefca44431f0L } },
+ /* 58 << 91 */
+ { { 0x56039fa0bd865cfbL,0x4b03e578bc5f1dd7L,0x40edf2e4babe7224L,
+ 0xc752496d3a1988f6L },
+ { 0xd1572d3b564beb6bL,0x0db1d11039a1c608L,0x568d193416f60126L,
+ 0x05ae9668f354af33L } },
+ /* 59 << 91 */
+ { { 0x19de6d37c92544f2L,0xcc084353a35837d5L,0xcbb6869c1a514eceL,
+ 0xb633e7282e1d1066L },
+ { 0xf15dd69f936c581cL,0x96e7b8ce7439c4f9L,0x5e676f482e448a5bL,
+ 0xb2ca7d5bfd916bbbL } },
+ /* 60 << 91 */
+ { { 0xd55a2541f5024025L,0x47bc5769e4c2d937L,0x7d31b92a0362189fL,
+ 0x83f3086eef7816f9L },
+ { 0xf9f46d94b587579aL,0xec2d22d830e76c5fL,0x27d57461b000ffcfL,
+ 0xbb7e65f9364ffc2cL } },
+ /* 61 << 91 */
+ { { 0x7c7c94776652a220L,0x61618f89d696c981L,0x5021701d89effff3L,
+ 0xf2c8ff8e7c314163L },
+ { 0x2da413ad8efb4d3eL,0x937b5adfce176d95L,0x22867d342a67d51cL,
+ 0x262b9b1018eb3ac9L } },
+ /* 62 << 91 */
+ { { 0x4e314fe4c43ff28bL,0x764766276a664e7aL,0x3e90e40bb7a565c2L,
+ 0x8588993ac1acf831L },
+ { 0xd7b501d68f938829L,0x996627ee3edd7d4cL,0x37d44a6290cd34c7L,
+ 0xa8327499f3833e8dL } },
+ /* 63 << 91 */
+ { { 0x2e18917d4bf50353L,0x85dd726b556765fbL,0x54fe65d693d5ab66L,
+ 0x3ddbaced915c25feL },
+ { 0xa799d9a412f22e85L,0xe2a248676d06f6bcL,0xf4f1ee5643ca1637L,
+ 0xfda2828b61ece30aL } },
+ /* 64 << 91 */
+ { { 0x758c1a3ea2dee7a6L,0xdcde2f3c734b2284L,0xaba445d24eaba6adL,
+ 0x35aaf66876cee0a7L },
+ { 0x7e0b04a9e5aa049aL,0xe74083ad91103e84L,0xbeb183ce40afecc3L,
+ 0x6b89de9fea043f7aL } },
+ /* 0 << 98 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 98 */
+ { { 0x0e299d23fe67ba66L,0x9145076093cf2f34L,0xf45b5ea997fcf913L,
+ 0x5be008438bd7dddaL },
+ { 0x358c3e05d53ff04dL,0xbf7ccdc35de91ef7L,0xad684dbfb69ec1a0L,
+ 0x367e7cf2801fd997L } },
+ /* 2 << 98 */
+ { { 0x0ca1f3b7b0dc8595L,0x27de46089f1d9f2eL,0x1af3bf39badd82a7L,
+ 0x79356a7965862448L },
+ { 0xc0602345f5f9a052L,0x1a8b0f89139a42f9L,0xb53eee42844d40fcL,
+ 0x93b0bfe54e5b6368L } },
+ /* 3 << 98 */
+ { { 0x5434dd02c024789cL,0x90dca9ea41b57bfcL,0x8aa898e2243398dfL,
+ 0xf607c834894a94bbL },
+ { 0xbb07be97c2c99b76L,0x6576ba6718c29302L,0x3d79efcce703a88cL,
+ 0xf259ced7b6a0d106L } },
+ /* 4 << 98 */
+ { { 0x0f893a5dc8de610bL,0xe8c515fb67e223ceL,0x7774bfa64ead6dc5L,
+ 0x89d20f95925c728fL },
+ { 0x7a1e0966098583ceL,0xa2eedb9493f2a7d7L,0x1b2820974c304d4aL,
+ 0x0842e3dac077282dL } },
+ /* 5 << 98 */
+ { { 0xe4d972a33b9e2d7bL,0x7cc60b27c48218ffL,0x8fc7083884149d91L,
+ 0x5c04346f2f461eccL },
+ { 0xebe9fdf2614650a9L,0x5e35b537c1f666acL,0x645613d188babc83L,
+ 0x88cace3ac5e1c93eL } },
+ /* 6 << 98 */
+ { { 0x209ca3753de92e23L,0xccb03cc85fbbb6e3L,0xccb90f03d7b1487eL,
+ 0xfa9c2a38c710941fL },
+ { 0x756c38236724ceedL,0x3a902258192d0323L,0xb150e519ea5e038eL,
+ 0xdcba2865c7427591L } },
+ /* 7 << 98 */
+ { { 0xe549237f78890732L,0xc443bef953fcb4d9L,0x9884d8a6eb3480d6L,
+ 0x8a35b6a13048b186L },
+ { 0xb4e4471665e9a90aL,0x45bf380d653006c0L,0x8f3f820d4fe9ae3bL,
+ 0x244a35a0979a3b71L } },
+ /* 8 << 98 */
+ { { 0xa1010e9d74cd06ffL,0x9c17c7dfaca3eeacL,0x74c86cd38063aa2bL,
+ 0x8595c4b3734614ffL },
+ { 0xa3de00ca990f62ccL,0xd9bed213ca0c3be5L,0x7886078adf8ce9f5L,
+ 0xddb27ce35cd44444L } },
+ /* 9 << 98 */
+ { { 0xed374a6658926dddL,0x138b2d49908015b8L,0x886c6579de1f7ab8L,
+ 0x888b9aa0c3020b7aL },
+ { 0xd3ec034e3a96e355L,0xba65b0b8f30fbe9aL,0x064c8e50ff21367aL,
+ 0x1f508ea40b04b46eL } },
+ /* 10 << 98 */
+ { { 0x98561a49747c866cL,0xbbb1e5fe0518a062L,0x20ff4e8becdc3608L,
+ 0x7f55cded20184027L },
+ { 0x8d73ec95f38c85f0L,0x5b589fdf8bc3b8c3L,0xbe95dd980f12b66fL,
+ 0xf5bd1a090e338e01L } },
+ /* 11 << 98 */
+ { { 0x65163ae55e915918L,0x6158d6d986f8a46bL,0x8466b538eeebf99cL,
+ 0xca8761f6bca477efL },
+ { 0xaf3449c29ebbc601L,0xef3b0f41e0c3ae2fL,0xaa6c577d5de63752L,
+ 0xe916660164682a51L } },
+ /* 12 << 98 */
+ { { 0x5a3097befc15aa1eL,0x40d12548b54b0745L,0x5bad4706519a5f12L,
+ 0xed03f717a439dee6L },
+ { 0x0794bb6c4a02c499L,0xf725083dcffe71d2L,0x2cad75190f3adcafL,
+ 0x7f68ea1c43729310L } },
+ /* 13 << 98 */
+ { { 0xe747c8c7b7ffd977L,0xec104c3580761a22L,0x8395ebaf5a3ffb83L,
+ 0xfb3261f4e4b63db7L },
+ { 0x53544960d883e544L,0x13520d708cc2eeb8L,0x08f6337bd3d65f99L,
+ 0x83997db2781cf95bL } },
+ /* 14 << 98 */
+ { { 0xce6ff1060dbd2c01L,0x4f8eea6b1f9ce934L,0x546f7c4b0e993921L,
+ 0x6236a3245e753fc7L },
+ { 0x65a41f84a16022e9L,0x0c18d87843d1dbb2L,0x73c556402d4cef9cL,
+ 0xa042810870444c74L } },
+ /* 15 << 98 */
+ { { 0x68e4f15e9afdfb3cL,0x49a561435bdfb6dfL,0xa9bc1bd45f823d97L,
+ 0xbceb5970ea111c2aL },
+ { 0x366b455fb269bbc4L,0x7cd85e1ee9bc5d62L,0xc743c41c4f18b086L,
+ 0xa4b4099095294fb9L } },
+ /* 16 << 98 */
+ { { 0x9c7c581d26ee8382L,0xcf17dcc5359d638eL,0xee8273abb728ae3dL,
+ 0x1d112926f821f047L },
+ { 0x1149847750491a74L,0x687fa761fde0dfb9L,0x2c2580227ea435abL,
+ 0x6b8bdb9491ce7e3fL } },
+ /* 17 << 98 */
+ { { 0x4c5b5dc93bf834aaL,0x043718194f6c7e4bL,0xc284e00a3736bcadL,
+ 0x0d88111821ae8f8dL },
+ { 0xf9cf0f82f48c8e33L,0xa11fd075a1bf40dbL,0xdceab0dedc2733e5L,
+ 0xc560a8b58e986bd7L } },
+ /* 18 << 98 */
+ { { 0x48dd1fe23929d097L,0x3885b29092f188f1L,0x0f2ae613da6fcdacL,
+ 0x9054303eb662a46cL },
+ { 0xb6871e440738042aL,0x98e6a977bdaf6449L,0xd8bc0650d1c9df1bL,
+ 0xef3d645136e098f9L } },
+ /* 19 << 98 */
+ { { 0x03fbae82b6d72d28L,0x77ca9db1f5d84080L,0x8a112cffa58efc1cL,
+ 0x518d761cc564cb4aL },
+ { 0x69b5740ef0d1b5ceL,0x717039cce9eb1785L,0x3fe29f9022f53382L,
+ 0x8e54ba566bc7c95cL } },
+ /* 20 << 98 */
+ { { 0x9c806d8af7f91d0fL,0x3b61b0f1a82a5728L,0x4640032d94d76754L,
+ 0x273eb5de47d834c6L },
+ { 0x2988abf77b4e4d53L,0xb7ce66bfde401777L,0x9fba6b32715071b3L,
+ 0x82413c24ad3a1a98L } },
+ /* 21 << 98 */
+ { { 0x5b7fc8c4e0e8ad93L,0xb5679aee5fab868dL,0xb1f9d2fa2b3946f3L,
+ 0x458897dc5685b50aL },
+ { 0x1e98c93089d0caf3L,0x39564c5f78642e92L,0x1b77729a0dbdaf18L,
+ 0xf9170722579e82e6L } },
+ /* 22 << 98 */
+ { { 0x680c0317e4515fa5L,0xf85cff84fb0c790fL,0xc7a82aab6d2e0765L,
+ 0x7446bca935c82b32L },
+ { 0x5de607aa6d63184fL,0x7c1a46a8262803a6L,0xd218313daebe8035L,
+ 0x92113ffdc73c51f8L } },
+ /* 23 << 98 */
+ { { 0x4b38e08312e7e46cL,0x69d0a37a56126bd5L,0xfb3f324b73c07e04L,
+ 0xa0c22f678fda7267L },
+ { 0x8f2c00514d2c7d8fL,0xbc45ced3cbe2cae5L,0xe1c6cf07a8f0f277L,
+ 0xbc3923121eb99a98L } },
+ /* 24 << 98 */
+ { { 0x75537b7e3cc8ac85L,0x8d725f57dd02753bL,0xfd05ff64b737df2fL,
+ 0x55fe8712f6d2531dL },
+ { 0x57ce04a96ab6b01cL,0x69a02a897cd93724L,0x4f82ac35cf86699bL,
+ 0x8242d3ad9cb4b232L } },
+ /* 25 << 98 */
+ { { 0x713d0f65d62105e5L,0xbb222bfa2d29be61L,0xf2f9a79e6cfbef09L,
+ 0xfc24d8d3d5d6782fL },
+ { 0x5db77085d4129967L,0xdb81c3ccdc3c2a43L,0x9d655fc005d8d9a3L,
+ 0x3f5d057a54298026L } },
+ /* 26 << 98 */
+ { { 0x1157f56d88c54694L,0xb26baba59b09573eL,0x2cab03b022adffd1L,
+ 0x60a412c8dd69f383L },
+ { 0xed76e98b54b25039L,0xd4ee67d3687e714dL,0x877396487b00b594L,
+ 0xce419775c9ef709bL } },
+ /* 27 << 98 */
+ { { 0x40f76f851c203a40L,0x30d352d6eafd8f91L,0xaf196d3d95578dd2L,
+ 0xea4bb3d777cc3f3dL },
+ { 0x42a5bd03b98e782bL,0xac958c400624920dL,0xb838134cfc56fcc8L,
+ 0x86ec4ccf89572e5eL } },
+ /* 28 << 98 */
+ { { 0x69c435269be47be0L,0x323b7dd8cb28fea1L,0xfa5538ba3a6c67e5L,
+ 0xef921d701d378e46L },
+ { 0xf92961fc3c4b880eL,0x3f6f914e98940a67L,0xa990eb0afef0ff39L,
+ 0xa6c2920ff0eeff9cL } },
+ /* 29 << 98 */
+ { { 0xca80416651b8d9a3L,0x42531bc90ffb0db1L,0x72ce4718aa82e7ceL,
+ 0x6e199913df574741L },
+ { 0xd5f1b13dd5d36946L,0x8255dc65f68f0194L,0xdc9df4cd8710d230L,
+ 0x3453c20f138c1988L } },
+ /* 30 << 98 */
+ { { 0x9af98dc089a6ef01L,0x4dbcc3f09857df85L,0x348056015c1ad924L,
+ 0x40448da5d0493046L },
+ { 0xf629926d4ee343e2L,0x6343f1bd90e8a301L,0xefc9349140815b3fL,
+ 0xf882a423de8f66fbL } },
+ /* 31 << 98 */
+ { { 0x3a12d5f4e7db9f57L,0x7dfba38a3c384c27L,0x7a904bfd6fc660b1L,
+ 0xeb6c5db32773b21cL },
+ { 0xc350ee661cdfe049L,0x9baac0ce44540f29L,0xbc57b6aba5ec6aadL,
+ 0x167ce8c30a7c1baaL } },
+ /* 32 << 98 */
+ { { 0xb23a03a553fb2b56L,0x6ce141e74e057f78L,0x796525c389e490d9L,
+ 0x0bc95725a31a7e75L },
+ { 0x1ec567911220fd06L,0x716e3a3c408b0bd6L,0x31cd6bf7e8ebeba9L,
+ 0xa7326ca6bee6b670L } },
+ /* 33 << 98 */
+ { { 0x3d9f851ccd090c43L,0x561e8f13f12c3988L,0x50490b6a904b7be4L,
+ 0x61690ce10410737bL },
+ { 0x299e9a370f009052L,0x258758f0f026092eL,0x9fa255f3fdfcdc0fL,
+ 0xdbc9fb1fc0e1bcd2L } },
+ /* 34 << 98 */
+ { { 0x35f9dd6e24651840L,0xdca45a84a5c59abcL,0x103d396fecca4938L,
+ 0x4532da0ab97b3f29L },
+ { 0xc4135ea51999a6bfL,0x3aa9505a5e6bf2eeL,0xf77cef063f5be093L,
+ 0x97d1a0f8a943152eL } },
+ /* 35 << 98 */
+ { { 0x2cb0ebba2e1c21ddL,0xf41b29fc2c6797c4L,0xc6e17321b300101fL,
+ 0x4422b0e9d0d79a89L },
+ { 0x49e4901c92f1bfc4L,0x06ab1f8fe1e10ed9L,0x84d35577db2926b8L,
+ 0xca349d39356e8ec2L } },
+ /* 36 << 98 */
+ { { 0x70b63d32343bf1a9L,0x8fd3bd2837d1a6b1L,0x0454879c316865b4L,
+ 0xee959ff6c458efa2L },
+ { 0x0461dcf89706dc3fL,0x737db0e2164e4b2eL,0x092626802f8843c8L,
+ 0x54498bbc7745e6f6L } },
+ /* 37 << 98 */
+ { { 0x359473faa29e24afL,0xfcc3c45470aa87a1L,0xfd2c4bf500573aceL,
+ 0xb65b514e28dd1965L },
+ { 0xe46ae7cf2193e393L,0x60e9a4e1f5444d97L,0xe7594e9600ff38edL,
+ 0x43d84d2f0a0e0f02L } },
+ /* 38 << 98 */
+ { { 0x8b6db141ee398a21L,0xb88a56aee3bcc5beL,0x0a1aa52f373460eaL,
+ 0x20da1a56160bb19bL },
+ { 0xfb54999d65bf0384L,0x71a14d245d5a180eL,0xbc44db7b21737b04L,
+ 0xd84fcb1801dd8e92L } },
+ /* 39 << 98 */
+ { { 0x80de937bfa44b479L,0x535054995c98fd4fL,0x1edb12ab28f08727L,
+ 0x4c58b582a5f3ef53L },
+ { 0xbfb236d88327f246L,0xc3a3bfaa4d7df320L,0xecd96c59b96024f2L,
+ 0xfc293a537f4e0433L } },
+ /* 40 << 98 */
+ { { 0x5341352b5acf6e10L,0xc50343fdafe652c3L,0x4af3792d18577a7fL,
+ 0xe1a4c617af16823dL },
+ { 0x9b26d0cd33425d0aL,0x306399ed9b7bc47fL,0x2a792f33706bb20bL,
+ 0x3121961498111055L } },
+ /* 41 << 98 */
+ { { 0x864ec06487f5d28bL,0x11392d91962277fdL,0xb5aa7942bb6aed5fL,
+ 0x080094dc47e799d9L },
+ { 0x4afa588c208ba19bL,0xd3e7570f8512f284L,0xcbae64e602f5799aL,
+ 0xdeebe7ef514b9492L } },
+ /* 42 << 98 */
+ { { 0x30300f98e5c298ffL,0x17f561be3678361fL,0xf52ff31298cb9a16L,
+ 0x6233c3bc5562d490L },
+ { 0x7bfa15a192e3a2cbL,0x961bcfd1e6365119L,0x3bdd29bf2c8c53b1L,
+ 0x739704df822844baL } },
+ /* 43 << 98 */
+ { { 0x7dacfb587e7b754bL,0x23360791a806c9b9L,0xe7eb88c923504452L,
+ 0x2983e996852c1783L },
+ { 0xdd4ae529958d881dL,0x026bae03262c7b3cL,0x3a6f9193960b52d1L,
+ 0xd0980f9092696cfbL } },
+ /* 44 << 98 */
+ { { 0x4c1f428cd5f30851L,0x94dfed272a4f6630L,0x4df53772fc5d48a4L,
+ 0xdd2d5a2f933260ceL },
+ { 0x574115bdd44cc7a5L,0x4ba6b20dbd12533aL,0x30e93cb8243057c9L,
+ 0x794c486a14de320eL } },
+ /* 45 << 98 */
+ { { 0xe925d4cef21496e4L,0xf951d198ec696331L,0x9810e2de3e8d812fL,
+ 0xd0a47259389294abL },
+ { 0x513ba2b50e3bab66L,0x462caff5abad306fL,0xe2dc6d59af04c49eL,
+ 0x1aeb8750e0b84b0bL } },
+ /* 46 << 98 */
+ { { 0xc034f12f2f7d0ca2L,0x6d2e8128e06acf2fL,0x801f4f8321facc2fL,
+ 0xa1170c03f40ef607L },
+ { 0xfe0a1d4f7805a99cL,0xbde56a36cc26aba5L,0x5b1629d035531f40L,
+ 0xac212c2b9afa6108L } },
+ /* 47 << 98 */
+ { { 0x30a06bf315697be5L,0x6f0545dc2c63c7c1L,0x5d8cb8427ccdadafL,
+ 0xd52e379bac7015bbL },
+ { 0xc4f56147f462c23eL,0xd44a429846bc24b0L,0xbc73d23ae2856d4fL,
+ 0x61cedd8c0832bcdfL } },
+ /* 48 << 98 */
+ { { 0x6095355699f241d7L,0xee4adbd7001a349dL,0x0b35bf6aaa89e491L,
+ 0x7f0076f4136f7546L },
+ { 0xd19a18ba9264da3dL,0x6eb2d2cd62a7a28bL,0xcdba941f8761c971L,
+ 0x1550518ba3be4a5dL } },
+ /* 49 << 98 */
+ { { 0xd0e8e2f057d0b70cL,0xeea8612ecd133ba3L,0x814670f044416aecL,
+ 0x424db6c330775061L },
+ { 0xd96039d116213fd1L,0xc61e7fa518a3478fL,0xa805bdcccb0c5021L,
+ 0xbdd6f3a80cc616ddL } },
+ /* 50 << 98 */
+ { { 0x060096675d97f7e2L,0x31db0fc1af0bf4b6L,0x23680ed45491627aL,
+ 0xb99a3c667d741fb1L },
+ { 0xe9bb5f5536b1ff92L,0x29738577512b388dL,0xdb8a2ce750fcf263L,
+ 0x385346d46c4f7b47L } },
+ /* 51 << 98 */
+ { { 0xbe86c5ef31631f9eL,0xbf91da2103a57a29L,0xc3b1f7967b23f821L,
+ 0x0f7d00d2770db354L },
+ { 0x8ffc6c3bd8fe79daL,0xcc5e8c40d525c996L,0x4640991dcfff632aL,
+ 0x64d97e8c67112528L } },
+ /* 52 << 98 */
+ { { 0xc232d97302f1cd1eL,0xce87eacb1dd212a4L,0x6e4c8c73e69802f7L,
+ 0x12ef02901fffddbdL },
+ { 0x941ec74e1bcea6e2L,0xd0b540243cb92cbbL,0x809fb9d47e8f9d05L,
+ 0x3bf16159f2992aaeL } },
+ /* 53 << 98 */
+ { { 0xad40f279f8a7a838L,0x11aea63105615660L,0xbf52e6f1a01f6fa1L,
+ 0xef0469953dc2aec9L },
+ { 0x785dbec9d8080711L,0xe1aec60a9fdedf76L,0xece797b5fa21c126L,
+ 0xc66e898f05e52732L } },
+ /* 54 << 98 */
+ { { 0x39bb69c408811fdbL,0x8bfe1ef82fc7f082L,0xc8e7a393174f4138L,
+ 0xfba8ad1dd58d1f98L },
+ { 0xbc21d0cebfd2fd5bL,0x0b839a826ee60d61L,0xaacf7658afd22253L,
+ 0xb526bed8aae396b3L } },
+ /* 55 << 98 */
+ { { 0xccc1bbc238564464L,0x9e3ff9478c45bc73L,0xcde9bca358188a78L,
+ 0x138b8ee0d73bf8f7L },
+ { 0x5c7e234c4123c489L,0x66e69368fa643297L,0x0629eeee39a15fa3L,
+ 0x95fab881a9e2a927L } },
+ /* 56 << 98 */
+ { { 0xb2497007eafbb1e1L,0xd75c9ce6e75b7a93L,0x3558352defb68d78L,
+ 0xa2f26699223f6396L },
+ { 0xeb911ecfe469b17aL,0x62545779e72d3ec2L,0x8ea47de782cb113fL,
+ 0xebe4b0864e1fa98dL } },
+ /* 57 << 98 */
+ { { 0xec2d5ed78cdfedb1L,0xa535c077fe211a74L,0x9678109b11d244c5L,
+ 0xf17c8bfbbe299a76L },
+ { 0xb651412efb11fbc4L,0xea0b548294ab3f65L,0xd8dffd950cf78243L,
+ 0x2e719e57ce0361d4L } },
+ /* 58 << 98 */
+ { { 0x9007f085304ddc5bL,0x095e8c6d4daba2eaL,0x5a33cdb43f9d28a9L,
+ 0x85b95cd8e2283003L },
+ { 0xbcd6c819b9744733L,0x29c5f538fc7f5783L,0x6c49b2fad59038e4L,
+ 0x68349cc13bbe1018L } },
+ /* 59 << 98 */
+ { { 0xcc490c1d21830ee5L,0x36f9c4eee9bfa297L,0x58fd729448de1a94L,
+ 0xaadb13a84e8f2cdcL },
+ { 0x515eaaa081313dbaL,0xc76bb468c2152dd8L,0x357f8d75a653dbf8L,
+ 0xe4d8c4d1b14ac143L } },
+ /* 60 << 98 */
+ { { 0xbdb8e675b055cb40L,0x898f8e7b977b5167L,0xecc65651b82fb863L,
+ 0x565448146d88f01fL },
+ { 0xb0928e95263a75a9L,0xcfb6836f1a22fcdaL,0x651d14db3f3bd37cL,
+ 0x1d3837fbb6ad4664L } },
+ /* 61 << 98 */
+ { { 0x7c5fb538ff4f94abL,0x7243c7126d7fb8f2L,0xef13d60ca85c5287L,
+ 0x18cfb7c74bb8dd1bL },
+ { 0x82f9bfe672908219L,0x35c4592b9d5144abL,0x52734f379cf4b42fL,
+ 0x6bac55e78c60ddc4L } },
+ /* 62 << 98 */
+ { { 0xb5cd811e94dea0f6L,0x259ecae4e18cc1a3L,0x6a0e836e15e660f8L,
+ 0x6c639ea60e02bff2L },
+ { 0x8721b8cb7e1026fdL,0x9e73b50b63261942L,0xb8c7097477f01da3L,
+ 0x1839e6a68268f57fL } },
+ /* 63 << 98 */
+ { { 0x571b94155150b805L,0x1892389ef92c7097L,0x8d69c18e4a084b95L,
+ 0x7014c512be5b495cL },
+ { 0x4780db361b07523cL,0x2f6219ce2c1c64faL,0xc38b81b0602c105aL,
+ 0xab4f4f205dc8e360L } },
+ /* 64 << 98 */
+ { { 0x20d3c982cf7d62d2L,0x1f36e29d23ba8150L,0x48ae0bf092763f9eL,
+ 0x7a527e6b1d3a7007L },
+ { 0xb4a89097581a85e3L,0x1f1a520fdc158be5L,0xf98db37d167d726eL,
+ 0x8802786e1113e862L } },
+ /* 0 << 105 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 105 */
+ { { 0xefb2149e36f09ab0L,0x03f163ca4a10bb5bL,0xd029704506e20998L,
+ 0x56f0af001b5a3babL },
+ { 0x7af4cfec70880e0dL,0x7332a66fbe3d913fL,0x32e6c84a7eceb4bdL,
+ 0xedc4a79a9c228f55L } },
+ /* 2 << 105 */
+ { { 0xc37c7dd0c55c4496L,0xa6a9635725bbabd2L,0x5b7e63f2add7f363L,
+ 0x9dce37822e73f1dfL },
+ { 0xe1e5a16ab2b91f71L,0xe44898235ba0163cL,0xf2759c32f6e515adL,
+ 0xa5e2f1f88615eecfL } },
+ /* 3 << 105 */
+ { { 0x74519be7abded551L,0x03d358b8c8b74410L,0x4d00b10b0e10d9a9L,
+ 0x6392b0b128da52b7L },
+ { 0x6744a2980b75c904L,0xc305b0aea8f7f96cL,0x042e421d182cf932L,
+ 0xf6fc5d509e4636caL } },
+ /* 4 << 105 */
+ { { 0x795847c9d64cc78cL,0x6c50621b9b6cb27bL,0x07099bf8df8022abL,
+ 0x48f862ebc04eda1dL },
+ { 0xd12732ede1603c16L,0x19a80e0f5c9a9450L,0xe2257f54b429b4fcL,
+ 0x66d3b2c645460515L } },
+ /* 5 << 105 */
+ { { 0x6ca4f87e822e37beL,0x73f237b4253bda4eL,0xf747f3a241190aebL,
+ 0xf06fa36f804cf284L },
+ { 0x0a6bbb6efc621c12L,0x5d624b6440b80ec6L,0x4b0724257ba556f3L,
+ 0x7fa0c3543e2d20a8L } },
+ /* 6 << 105 */
+ { { 0xe921fa31e3229d41L,0xa929c65294531bd4L,0x84156027a6d38209L,
+ 0xf3d69f736bdb97bdL },
+ { 0x8906d19a16833631L,0x68a34c2e03d51be3L,0xcb59583b0e511cd8L,
+ 0x99ce6bfdfdc132a8L } },
+ /* 7 << 105 */
+ { { 0x3facdaaaffcdb463L,0x658bbc1a34a38b08L,0x12a801f8f1a9078dL,
+ 0x1567bcf96ab855deL },
+ { 0xe08498e03572359bL,0xcf0353e58659e68bL,0xbb86e9c87d23807cL,
+ 0xbc08728d2198e8a2L } },
+ /* 8 << 105 */
+ { { 0x8de2b7bc453cadd6L,0x203900a7bc0bc1f8L,0xbcd86e47a6abd3afL,
+ 0x911cac128502effbL },
+ { 0x2d550242ec965469L,0x0e9f769229e0017eL,0x633f078f65979885L,
+ 0xfb87d4494cf751efL } },
+ /* 9 << 105 */
+ { { 0xe1790e4bfc25419aL,0x364672034bff3cfdL,0xc8db638625b6e83fL,
+ 0x6cc69f236cad6fd2L },
+ { 0x0219e45a6bc68bb9L,0xe43d79b6297f7334L,0x7d445368465dc97cL,
+ 0x4b9eea322a0b949aL } },
+ /* 10 << 105 */
+ { { 0x1b96c6ba6102d021L,0xeaafac782f4461eaL,0xd4b85c41c49f19a8L,
+ 0x275c28e4cf538875L },
+ { 0x35451a9ddd2e54e0L,0x6991adb50605618bL,0x5b8b4bcd7b36cd24L,
+ 0x372a4f8c56f37216L } },
+ /* 11 << 105 */
+ { { 0xc890bd73a6a5da60L,0x6f083da0dc4c9ff0L,0xf4e14d94f0536e57L,
+ 0xf9ee1edaaaec8243L },
+ { 0x571241ec8bdcf8e7L,0xa5db82710b041e26L,0x9a0b9a99e3fff040L,
+ 0xcaaf21dd7c271202L } },
+ /* 12 << 105 */
+ { { 0xb4e2b2e14f0dd2e8L,0xe77e7c4f0a377ac7L,0x69202c3f0d7a2198L,
+ 0xf759b7ff28200eb8L },
+ { 0xc87526eddcfe314eL,0xeb84c52453d5cf99L,0xb1b52ace515138b6L,
+ 0x5aa7ff8c23fca3f4L } },
+ /* 13 << 105 */
+ { { 0xff0b13c3b9791a26L,0x960022dacdd58b16L,0xdbd55c9257aad2deL,
+ 0x3baaaaa3f30fe619L },
+ { 0x9a4b23460d881efdL,0x506416c046325e2aL,0x91381e76035c18d4L,
+ 0xb3bb68bef27817b0L } },
+ /* 14 << 105 */
+ { { 0x15bfb8bf5116f937L,0x7c64a586c1268943L,0x71e25cc38419a2c8L,
+ 0x9fd6b0c48335f463L },
+ { 0x4bf0ba3ce8ee0e0eL,0x6f6fba60298c21faL,0x57d57b39ae66bee0L,
+ 0x292d513022672544L } },
+ /* 15 << 105 */
+ { { 0xf451105dbab093b3L,0x012f59b902839986L,0x8a9158023474a89cL,
+ 0x048c919c2de03e97L },
+ { 0xc476a2b591071cd5L,0x791ed89a034970a5L,0x89bd9042e1b7994bL,
+ 0x8eaf5179a1057ffdL } },
+ /* 16 << 105 */
+ { { 0x6066e2a2d551ee10L,0x87a8f1d8727e09a6L,0x00d08bab2c01148dL,
+ 0x6da8e4f1424f33feL },
+ { 0x466d17f0cf9a4e71L,0xff5020103bf5cb19L,0xdccf97d8d062ecc0L,
+ 0x80c0d9af81d80ac4L } },
+ /* 17 << 105 */
+ { { 0xe87771d8033f2876L,0xb0186ec67d5cc3dbL,0x58e8bb803bc9bc1dL,
+ 0x4d1395cc6f6ef60eL },
+ { 0xa73c62d6186244a0L,0x918e5f23110a5b53L,0xed4878ca741b7eabL,
+ 0x3038d71adbe03e51L } },
+ /* 18 << 105 */
+ { { 0x840204b7a93c3246L,0x21ab6069a0b9b4cdL,0xf5fa6e2bb1d64218L,
+ 0x1de6ad0ef3d56191L },
+ { 0x570aaa88ff1929c7L,0xc6df4c6b640e87b5L,0xde8a74f2c65f0cccL,
+ 0x8b972fd5e6f6cc01L } },
+ /* 19 << 105 */
+ { { 0x3fff36b60b846531L,0xba7e45e610a5e475L,0x84a1d10e4145b6c5L,
+ 0xf1f7f91a5e046d9dL },
+ { 0x0317a69244de90d7L,0x951a1d4af199c15eL,0x91f78046c9d73debL,
+ 0x74c82828fab8224fL } },
+ /* 20 << 105 */
+ { { 0xaa6778fce7560b90L,0xb4073e61a7e824ceL,0xff0d693cd642eba8L,
+ 0x7ce2e57a5dccef38L },
+ { 0x89c2c7891df1ad46L,0x83a06922098346fdL,0x2d715d72da2fc177L,
+ 0x7b6dd71d85b6cf1dL } },
+ /* 21 << 105 */
+ { { 0xc60a6d0a73fa9cb0L,0xedd3992e328bf5a9L,0xc380ddd0832c8c82L,
+ 0xd182d410a2a0bf50L },
+ { 0x7d9d7438d9a528dbL,0xe8b1a0e9caf53994L,0xddd6e5fe0e19987cL,
+ 0xacb8df03190b059dL } },
+ /* 22 << 105 */
+ { { 0x53703a328300129fL,0x1f63766268c43bfdL,0xbcbd191300e54051L,
+ 0x812fcc627bf5a8c5L },
+ { 0x3f969d5f29fb85daL,0x72f4e00a694759e8L,0x426b6e52790726b7L,
+ 0x617bbc873bdbb209L } },
+ /* 23 << 105 */
+ { { 0x511f8bb997aee317L,0x812a4096e81536a8L,0x137dfe593ac09b9bL,
+ 0x0682238fba8c9a7aL },
+ { 0x7072ead6aeccb4bdL,0x6a34e9aa692ba633L,0xc82eaec26fff9d33L,
+ 0xfb7535121d4d2b62L } },
+ /* 24 << 105 */
+ { { 0x1a0445ff1d7aadabL,0x65d38260d5f6a67cL,0x6e62fb0891cfb26fL,
+ 0xef1e0fa55c7d91d6L },
+ { 0x47e7c7ba33db72cdL,0x017cbc09fa7c74b2L,0x3c931590f50a503cL,
+ 0xcac54f60616baa42L } },
+ /* 25 << 105 */
+ { { 0x9b6cd380b2369f0fL,0x97d3a70d23c76151L,0x5f9dd6fc9862a9c6L,
+ 0x044c4ab212312f51L },
+ { 0x035ea0fd834a2ddcL,0x49e6b862cc7b826dL,0xb03d688362fce490L,
+ 0x62f2497ab37e36e9L } },
+ /* 26 << 105 */
+ { { 0x04b005b6c6458293L,0x36bb5276e8d10af7L,0xacf2dc138ee617b8L,
+ 0x470d2d35b004b3d4L },
+ { 0x06790832feeb1b77L,0x2bb75c3985657f9cL,0xd70bd4edc0f60004L,
+ 0xfe797ecc219b018bL } },
+ /* 27 << 105 */
+ { { 0x9b5bec2a753aebccL,0xdaf9f3dcc939eca5L,0xd6bc6833d095ad09L,
+ 0x98abdd51daa4d2fcL },
+ { 0xd9840a318d168be5L,0xcf7c10e02325a23cL,0xa5c02aa07e6ecfafL,
+ 0x2462e7e6b5bfdf18L } },
+ /* 28 << 105 */
+ { { 0xab2d8a8ba0cc3f12L,0x68dd485dbc672a29L,0x72039752596f2cd3L,
+ 0x5d3eea67a0cf3d8dL },
+ { 0x810a1a81e6602671L,0x8f144a4014026c0cL,0xbc753a6d76b50f85L,
+ 0xc4dc21e8645cd4a4L } },
+ /* 29 << 105 */
+ { { 0xc5262dea521d0378L,0x802b8e0e05011c6fL,0x1ba19cbb0b4c19eaL,
+ 0x21db64b5ebf0aaecL },
+ { 0x1f394ee970342f9dL,0x93a10aee1bc44a14L,0xa7eed31b3efd0baaL,
+ 0x6e7c824e1d154e65L } },
+ /* 30 << 105 */
+ { { 0xee23fa819966e7eeL,0x64ec4aa805b7920dL,0x2d44462d2d90aad4L,
+ 0xf44dd195df277ad5L },
+ { 0x8d6471f1bb46b6a1L,0x1e65d313fd885090L,0x33a800f513a977b4L,
+ 0xaca9d7210797e1efL } },
+ /* 31 << 105 */
+ { { 0x9a5a85a0fcff6a17L,0x9970a3f31eca7ceeL,0xbb9f0d6bc9504be3L,
+ 0xe0c504beadd24ee2L },
+ { 0x7e09d95677fcc2f4L,0xef1a522765bb5fc4L,0x145d4fb18b9286aaL,
+ 0x66fd0c5d6649028bL } },
+ /* 32 << 105 */
+ { { 0x98857ceb1bf4581cL,0xe635e186aca7b166L,0x278ddd22659722acL,
+ 0xa0903c4c1db68007L },
+ { 0x366e458948f21402L,0x31b49c14b96abda2L,0x329c4b09e0403190L,
+ 0x97197ca3d29f43feL } },
+ /* 33 << 105 */
+ { { 0x8073dd1e274983d8L,0xda1a3bde55717c8fL,0xfd3d4da20361f9d1L,
+ 0x1332d0814c7de1ceL },
+ { 0x9b7ef7a3aa6d0e10L,0x17db2e73f54f1c4aL,0xaf3dffae4cd35567L,
+ 0xaaa2f406e56f4e71L } },
+ /* 34 << 105 */
+ { { 0x8966759e7ace3fc7L,0x9594eacf45a8d8c6L,0x8de3bd8b91834e0eL,
+ 0xafe4ca53548c0421L },
+ { 0xfdd7e856e6ee81c6L,0x8f671beb6b891a3aL,0xf7a58f2bfae63829L,
+ 0x9ab186fb9c11ac9fL } },
+ /* 35 << 105 */
+ { { 0x8d6eb36910b5be76L,0x046b7739fb040bcdL,0xccb4529fcb73de88L,
+ 0x1df0fefccf26be03L },
+ { 0xad7757a6bcfcd027L,0xa8786c75bb3165caL,0xe9db1e347e99a4d9L,
+ 0x99ee86dfb06c504bL } },
+ /* 36 << 105 */
+ { { 0x5b7c2dddc15c9f0aL,0xdf87a7344295989eL,0x59ece47c03d08fdaL,
+ 0xb074d3ddad5fc702L },
+ { 0x2040790351a03776L,0x2bb1f77b2a608007L,0x25c58f4fe1153185L,
+ 0xe6df62f6766e6447L } },
+ /* 37 << 105 */
+ { { 0xefb3d1beed51275aL,0x5de47dc72f0f483fL,0x7932d98e97c2bedfL,
+ 0xd5c119270219f8a1L },
+ { 0x9d751200a73a294eL,0x5f88434a9dc20172L,0xd28d9fd3a26f506aL,
+ 0xa890cd319d1dcd48L } },
+ /* 38 << 105 */
+ { { 0x0aebaec170f4d3b4L,0xfd1a13690ffc8d00L,0xb9d9c24057d57838L,
+ 0x45929d2668bac361L },
+ { 0x5a2cd06025b15ca6L,0x4b3c83e16e474446L,0x1aac7578ee1e5134L,
+ 0xa418f5d6c91e2f41L } },
+ /* 39 << 105 */
+ { { 0x6936fc8a213ed68bL,0x860ae7ed510a5224L,0x63660335def09b53L,
+ 0x641b2897cd79c98dL },
+ { 0x29bd38e101110f35L,0x79c26f42648b1937L,0x64dae5199d9164f4L,
+ 0xd85a23100265c273L } },
+ /* 40 << 105 */
+ { { 0x7173dd5d4b07e2b1L,0xd144c4cb8d9ea221L,0xe8b04ea41105ab14L,
+ 0x92dda542fe80d8f1L },
+ { 0xe9982fa8cf03dce6L,0x8b5ea9651a22cffcL,0xf7f4ea7f3fad88c4L,
+ 0x62db773e6a5ba95cL } },
+ /* 41 << 105 */
+ { { 0xd20f02fb93f24567L,0xfd46c69a315257caL,0x0ac74cc78bcab987L,
+ 0x46f31c015ceca2f5L },
+ { 0x40aedb59888b219eL,0xe50ecc37e1fccd02L,0x1bcd9dad911f816cL,
+ 0x583cc1ec8db9b00cL } },
+ /* 42 << 105 */
+ { { 0xf3cd2e66a483bf11L,0xfa08a6f5b1b2c169L,0xf375e2454be9fa28L,
+ 0x99a7ffec5b6d011fL },
+ { 0x6a3ebddbc4ae62daL,0x6cea00ae374aef5dL,0xab5fb98d9d4d05bcL,
+ 0x7cba1423d560f252L } },
+ /* 43 << 105 */
+ { { 0x49b2cc21208490deL,0x1ca66ec3bcfb2879L,0x7f1166b71b6fb16fL,
+ 0xfff63e0865fe5db3L },
+ { 0xb8345abe8b2610beL,0xb732ed8039de3df4L,0x0e24ed50211c32b4L,
+ 0xd10d8a69848ff27dL } },
+ /* 44 << 105 */
+ { { 0xc1074398ed4de248L,0xd7cedace10488927L,0xa4aa6bf885673e13L,
+ 0xb46bae916daf30afL },
+ { 0x07088472fcef7ad8L,0x61151608d4b35e97L,0xbcfe8f26dde29986L,
+ 0xeb84c4c7d5a34c79L } },
+ /* 45 << 105 */
+ { { 0xc1eec55c164e1214L,0x891be86da147bb03L,0x9fab4d100ba96835L,
+ 0xbf01e9b8a5c1ae9fL },
+ { 0x6b4de139b186ebc0L,0xd5c74c2685b91bcaL,0x5086a99cc2d93854L,
+ 0xeed62a7ba7a9dfbcL } },
+ /* 46 << 105 */
+ { { 0x8778ed6f76b7618aL,0xbff750a503b66062L,0x4cb7be22b65186dbL,
+ 0x369dfbf0cc3a6d13L },
+ { 0xc7dab26c7191a321L,0x9edac3f940ed718eL,0xbc142b36d0cfd183L,
+ 0xc8af82f67c991693L } },
+ /* 47 << 105 */
+ { { 0xb3d1e4d897ce0b2aL,0xe6d7c87fc3a55cdfL,0x35846b9568b81afeL,
+ 0x018d12afd3c239d8L },
+ { 0x2b2c620801206e15L,0xe0e42453a3b882c6L,0x854470a3a50162d5L,
+ 0x081574787017a62aL } },
+ /* 48 << 105 */
+ { { 0x18bd3fb4820357c7L,0x992039ae6f1458adL,0x9a1df3c525b44aa1L,
+ 0x2d780357ed3d5281L },
+ { 0x58cf7e4dc77ad4d4L,0xd49a7998f9df4fc4L,0x4465a8b51d71205eL,
+ 0xa0ee0ea6649254aaL } },
+ /* 49 << 105 */
+ { { 0x4b5eeecfab7bd771L,0x6c87307335c262b9L,0xdc5bd6483c9d61e7L,
+ 0x233d6d54321460d2L },
+ { 0xd20c5626fc195bccL,0x2544595804d78b63L,0xe03fcb3d17ec8ef3L,
+ 0x54b690d146b8f781L } },
+ /* 50 << 105 */
+ { { 0x82fa2c8a21230646L,0xf51aabb9084f418cL,0xff4fbec11a30ba43L,
+ 0x6a5acf73743c9df7L },
+ { 0x1da2b357d635b4d5L,0xc3de68ddecd5c1daL,0xa689080bd61af0ddL,
+ 0xdea5938ad665bf99L } },
+ /* 51 << 105 */
+ { { 0x0231d71afe637294L,0x01968aa6a5a81cd8L,0x11252d50048e63b5L,
+ 0xc446bc526ca007e9L },
+ { 0xef8c50a696d6134bL,0x9361fbf59e09a05cL,0xf17f85a6dca3291aL,
+ 0xb178d548ff251a21L } },
+ /* 52 << 105 */
+ { { 0x87f6374ba4df3915L,0x566ce1bf2fd5d608L,0x425cba4d7de35102L,
+ 0x6b745f8f58c5d5e2L },
+ { 0x88402af663122edfL,0x3190f9ed3b989a89L,0x4ad3d387ebba3156L,
+ 0xef385ad9c7c469a5L } },
+ /* 53 << 105 */
+ { { 0xb08281de3f642c29L,0x20be0888910ffb88L,0xf353dd4ad5292546L,
+ 0x3f1627de8377a262L },
+ { 0xa5faa013eefcd638L,0x8f3bf62674cc77c3L,0x32618f65a348f55eL,
+ 0x5787c0dc9fefeb9eL } },
+ /* 54 << 105 */
+ { { 0xf1673aa2d9a23e44L,0x88dfa9934e10690dL,0x1ced1b362bf91108L,
+ 0x9193ceca3af48649L },
+ { 0xfb34327d2d738fc5L,0x6697b037975fee6cL,0x2f485da0c04079a5L,
+ 0x2cdf57352feaa1acL } },
+ /* 55 << 105 */
+ { { 0x76944420bd55659eL,0x7973e32b4376090cL,0x86bb4fe1163b591aL,
+ 0x10441aedc196f0caL },
+ { 0x3b431f4a045ad915L,0x6c11b437a4afacb1L,0x30b0c7db71fdbbd8L,
+ 0xb642931feda65acdL } },
+ /* 56 << 105 */
+ { { 0x4baae6e89c92b235L,0xa73bbd0e6b3993a1L,0xd06d60ec693dd031L,
+ 0x03cab91b7156881cL },
+ { 0xd615862f1db3574bL,0x485b018564bb061aL,0x27434988a0181e06L,
+ 0x2cd61ad4c1c0c757L } },
+ /* 57 << 105 */
+ { { 0x3effed5a2ff9f403L,0x8dc98d8b62239029L,0x2206021e1f17b70dL,
+ 0xafbec0cabf510015L },
+ { 0x9fed716480130dfaL,0x306dc2b58a02dcf5L,0x48f06620feb10fc0L,
+ 0x78d1e1d55a57cf51L } },
+ /* 58 << 105 */
+ { { 0xadef8c5a192ef710L,0x88afbd4b3b7431f9L,0x7e1f740764250c9eL,
+ 0x6e31318db58bec07L },
+ { 0xfd4fc4b824f89b4eL,0x65a5dd8848c36a2aL,0x4f1eccfff024baa7L,
+ 0x22a21cf2cba94650L } },
+ /* 59 << 105 */
+ { { 0x95d29dee42a554f7L,0x828983a5002ec4baL,0x8112a1f78badb73dL,
+ 0x79ea8897a27c1839L },
+ { 0x8969a5a7d065fd83L,0xf49af791b262a0bcL,0xfcdea8b6af2b5127L,
+ 0x10e913e1564c2dbcL } },
+ /* 60 << 105 */
+ { { 0x51239d14bc21ef51L,0xe51c3ceb4ce57292L,0x795ff06847bbcc3bL,
+ 0x86b46e1ebd7e11e6L },
+ { 0x0ea6ba2380041ef4L,0xd72fe5056262342eL,0x8abc6dfd31d294d4L,
+ 0xbbe017a21278c2c9L } },
+ /* 61 << 105 */
+ { { 0xb1fcfa09b389328aL,0x322fbc62d01771b5L,0x04c0d06360b045bfL,
+ 0xdb652edc10e52d01L },
+ { 0x50ef932c03ec6627L,0xde1b3b2dc1ee50e3L,0x5ab7bdc5dc37a90dL,
+ 0xfea6721331e33a96L } },
+ /* 62 << 105 */
+ { { 0x6482b5cb4f2999aaL,0x38476cc6b8cbf0ddL,0x93ebfacb173405bbL,
+ 0x15cdafe7e52369ecL },
+ { 0xd42d5ba4d935b7dbL,0x648b60041c99a4cdL,0x785101bda3b5545bL,
+ 0x4bf2c38a9dd67fafL } },
+ /* 63 << 105 */
+ { { 0xb1aadc634442449cL,0xe0e9921a33ad4fb8L,0x5c552313aa686d82L,
+ 0xdee635fa465d866cL },
+ { 0xbc3c224a18ee6e8aL,0xeed748a6ed42e02fL,0xe70f930ad474cd08L,
+ 0x774ea6ecfff24adfL } },
+ /* 64 << 105 */
+ { { 0x03e2de1cf3480d4aL,0xf0d8edc7bc8acf1aL,0xf23e330368295a9cL,
+ 0xfadd5f68c546a97dL },
+ { 0x895597ad96f8acb1L,0xbddd49d5671bdae2L,0x16fcd52821dd43f4L,
+ 0xa5a454126619141aL } },
+ /* 0 << 112 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 112 */
+ { { 0x8ce9b6bfc360e25aL,0xe6425195075a1a78L,0x9dc756a8481732f4L,
+ 0x83c0440f5432b57aL },
+ { 0xc670b3f1d720281fL,0x2205910ed135e051L,0xded14b0edb052be7L,
+ 0x697b3d27c568ea39L } },
+ /* 2 << 112 */
+ { { 0x2e599b9afb3ff9edL,0x28c2e0ab17f6515cL,0x1cbee4fd474da449L,
+ 0x071279a44f364452L },
+ { 0x97abff6601fbe855L,0x3ee394e85fda51c4L,0x190385f667597c0bL,
+ 0x6e9fccc6a27ee34bL } },
+ /* 3 << 112 */
+ { { 0x0b89de9314092ebbL,0xf17256bd428e240cL,0xcf89a7f393d2f064L,
+ 0x4f57841ee1ed3b14L },
+ { 0x4ee14405e708d855L,0x856aae7203f1c3d0L,0xc8e5424fbdd7eed5L,
+ 0x3333e4ef73ab4270L } },
+ /* 4 << 112 */
+ { { 0x3bc77adedda492f8L,0xc11a3aea78297205L,0x5e89a3e734931b4cL,
+ 0x17512e2e9f5694bbL },
+ { 0x5dc349f3177bf8b6L,0x232ea4ba08c7ff3eL,0x9c4f9d16f511145dL,
+ 0xccf109a333b379c3L } },
+ /* 5 << 112 */
+ { { 0xe75e7a88a1f25897L,0x7ac6961fa1b5d4d8L,0xe3e1077308f3ed5cL,
+ 0x208a54ec0a892dfbL },
+ { 0xbe826e1978660710L,0x0cf70a97237df2c8L,0x418a7340ed704da5L,
+ 0xa3eeb9a908ca33fdL } },
+ /* 6 << 112 */
+ { { 0x49d96233169bca96L,0x04d286d42da6aafbL,0xc09606eca0c2fa94L,
+ 0x8869d0d523ff0fb3L },
+ { 0xa99937e5d0150d65L,0xa92e2503240c14c9L,0x656bf945108e2d49L,
+ 0x152a733aa2f59e2bL } },
+ /* 7 << 112 */
+ { { 0xb4323d588434a920L,0xc0af8e93622103c5L,0x667518ef938dbf9aL,
+ 0xa184307383a9cdf2L },
+ { 0x350a94aa5447ab80L,0xe5e5a325c75a3d61L,0x74ba507f68411a9eL,
+ 0x10581fc1594f70c5L } },
+ /* 8 << 112 */
+ { { 0x60e2857080eb24a9L,0x7bedfb4d488e0cfdL,0x721ebbd7c259cdb8L,
+ 0x0b0da855bc6390a9L },
+ { 0x2b4d04dbde314c70L,0xcdbf1fbc6c32e846L,0x33833eabb162fc9eL,
+ 0x9939b48bb0dd3ab7L } },
+ /* 9 << 112 */
+ { { 0x5aaa98a7cb0c9c8cL,0x75105f3081c4375cL,0xceee50575ef1c90fL,
+ 0xb31e065fc23a17bfL },
+ { 0x5364d275d4b6d45aL,0xd363f3ad62ec8996L,0xb5d212394391c65bL,
+ 0x84564765ebb41b47L } },
+ /* 10 << 112 */
+ { { 0x20d18ecc37107c78L,0xacff3b6b570c2a66L,0x22f975d99bd0d845L,
+ 0xef0a0c46ba178fa0L },
+ { 0x1a41965176b6028eL,0xc49ec674248612d4L,0x5b6ac4f27338af55L,
+ 0x06145e627bee5a36L } },
+ /* 11 << 112 */
+ { { 0x33e95d07e75746b5L,0x1c1e1f6dc40c78beL,0x967833ef222ff8e2L,
+ 0x4bedcf6ab49180adL },
+ { 0x6b37e9c13d7a4c8aL,0x2748887c6ddfe760L,0xf7055123aa3a5bbcL,
+ 0x954ff2257bbb8e74L } },
+ /* 12 << 112 */
+ { { 0xc42b8ab197c3dfb9L,0x55a549b0cf168154L,0xad6748e7c1b50692L,
+ 0x2775780f6fc5cbcbL },
+ { 0x4eab80b8e1c9d7c8L,0x8c69dae13fdbcd56L,0x47e6b4fb9969eaceL,
+ 0x002f1085a705cb5aL } },
+ /* 13 << 112 */
+ { { 0x4e23ca446d3fea55L,0xb4ae9c86f4810568L,0x47bfb91b2a62f27dL,
+ 0x60deb4c9d9bac28cL },
+ { 0xa892d8947de6c34cL,0x4ee682594494587dL,0x914ee14e1a3f8a5bL,
+ 0xbb113eaa28700385L } },
+ /* 14 << 112 */
+ { { 0x81ca03b92115b4c9L,0x7c163d388908cad1L,0xc912a118aa18179aL,
+ 0xe09ed750886e3081L },
+ { 0xa676e3fa26f516caL,0x753cacf78e732f91L,0x51592aea833da8b4L,
+ 0xc626f42f4cbea8aaL } },
+ /* 15 << 112 */
+ { { 0xef9dc899a7b56eafL,0x00c0e52c34ef7316L,0x5b1e4e24fe818a86L,
+ 0x9d31e20dc538be47L },
+ { 0x22eb932d3ed68974L,0xe44bbc087c4e87c4L,0x4121086e0dde9aefL,
+ 0x8e6b9cff134f4345L } },
+ /* 16 << 112 */
+ { { 0x96892c1f711b0eb9L,0xb905f2c8780ab954L,0xace26309a20792dbL,
+ 0xec8ac9b30684e126L },
+ { 0x486ad8b6b40a2447L,0x60121fc19fe3fb24L,0x5626fccf1a8e3b3fL,
+ 0x4e5686226ad1f394L } },
+ /* 17 << 112 */
+ { { 0xda7aae0d196aa5a1L,0xe0df8c771041b5fbL,0x451465d926b318b7L,
+ 0xc29b6e557ab136e9L },
+ { 0x2c2ab48b71148463L,0xb5738de364454a76L,0x54ccf9a05a03abe4L,
+ 0x377c02960427d58eL } },
+ /* 18 << 112 */
+ { { 0x73f5f0b92bb39c1fL,0x14373f2ce608d8c5L,0xdcbfd31400fbb805L,
+ 0xdf18fb2083afdcfbL },
+ { 0x81a57f4242b3523fL,0xe958532d87f650fbL,0xaa8dc8b68b0a7d7cL,
+ 0x1b75dfb7150166beL } },
+ /* 19 << 112 */
+ { { 0x90e4f7c92d7d1413L,0x67e2d6b59834f597L,0x4fd4f4f9a808c3e8L,
+ 0xaf8237e0d5281ec1L },
+ { 0x25ab5fdc84687ceeL,0xc5ded6b1a5b26c09L,0x8e4a5aecc8ea7650L,
+ 0x23b73e5c14cc417fL } },
+ /* 20 << 112 */
+ { { 0x2bfb43183037bf52L,0xb61e6db578c725d7L,0x8efd4060bbb3e5d7L,
+ 0x2e014701dbac488eL },
+ { 0xac75cf9a360aa449L,0xb70cfd0579634d08L,0xa591536dfffb15efL,
+ 0xb2c37582d07c106cL } },
+ /* 21 << 112 */
+ { { 0xb4293fdcf50225f9L,0xc52e175cb0e12b03L,0xf649c3bad0a8bf64L,
+ 0x745a8fefeb8ae3c6L },
+ { 0x30d7e5a358321bc3L,0xb1732be70bc4df48L,0x1f217993e9ea5058L,
+ 0xf7a71cde3e4fd745L } },
+ /* 22 << 112 */
+ { { 0x86cc533e894c5bbbL,0x6915c7d969d83082L,0xa6aa2d055815c244L,
+ 0xaeeee59249b22ce5L },
+ { 0x89e39d1378135486L,0x3a275c1f16b76f2fL,0xdb6bcc1be036e8f5L,
+ 0x4df69b215e4709f5L } },
+ /* 23 << 112 */
+ { { 0xa188b2502d0f39aaL,0x622118bb15a85947L,0x2ebf520ffde0f4faL,
+ 0xa40e9f294860e539L },
+ { 0x7b6a51eb22b57f0fL,0x849a33b97e80644aL,0x50e5d16f1cf095feL,
+ 0xd754b54eec55f002L } },
+ /* 24 << 112 */
+ { { 0x5cfbbb22236f4a98L,0x0b0c59e9066800bbL,0x4ac69a8f5a9a7774L,
+ 0x2b33f804d6bec948L },
+ { 0xb372929532e6c466L,0x68956d0f4e599c73L,0xa47a249f155c31ccL,
+ 0x24d80f0de1ce284eL } },
+ /* 25 << 112 */
+ { { 0xcd821dfb988baf01L,0xe6331a7ddbb16647L,0x1eb8ad33094cb960L,
+ 0x593cca38c91bbca5L },
+ { 0x384aac8d26567456L,0x40fa0309c04b6490L,0x97834cd6dab6c8f6L,
+ 0x68a7318d3f91e55fL } },
+ /* 26 << 112 */
+ { { 0xa00fd04efc4d3157L,0xb56f8ab22bf3bdeaL,0x014f56484fa57172L,
+ 0x948c5860450abdb3L },
+ { 0x342b5df00ebd4f08L,0x3e5168cd0e82938eL,0x7aedc1ceb0df5dd0L,
+ 0x6bbbc6d9e5732516L } },
+ /* 27 << 112 */
+ { { 0xc7bfd486605daaa6L,0x46fd72b7bb9a6c9eL,0xe4847fb1a124fb89L,
+ 0x75959cbda2d8ffbcL },
+ { 0x42579f65c8a588eeL,0x368c92e6b80b499dL,0xea4ef6cd999a5df1L,
+ 0xaa73bb7f936fe604L } },
+ /* 28 << 112 */
+ { { 0xf347a70d6457d188L,0x86eda86b8b7a388bL,0xb7cdff060ccd6013L,
+ 0xbeb1b6c7d0053fb2L },
+ { 0x0b02238799240a9fL,0x1bbb384f776189b2L,0x8695e71e9066193aL,
+ 0x2eb5009706ffac7eL } },
+ /* 29 << 112 */
+ { { 0x0654a9c04a7d2caaL,0x6f3fb3d1a5aaa290L,0x835db041ff476e8fL,
+ 0x540b8b0bc42295e4L },
+ { 0xa5c73ac905e214f5L,0x9a74075a56a0b638L,0x2e4b1090ce9e680bL,
+ 0x57a5b4796b8d9afaL } },
+ /* 30 << 112 */
+ { { 0x0dca48e726bfe65cL,0x097e391c7290c307L,0x683c462e6669e72eL,
+ 0xf505be1e062559acL },
+ { 0x5fbe3ea1e3a3035aL,0x6431ebf69cd50da8L,0xfd169d5c1f6407f2L,
+ 0x8d838a9560fce6b8L } },
+ /* 31 << 112 */
+ { { 0x2a2bfa7f650006f0L,0xdfd7dad350c0fbb2L,0x92452495ccf9ad96L,
+ 0x183bf494d95635f9L },
+ { 0x02d5df434a7bd989L,0x505385cca5431095L,0xdd98e67dfd43f53eL,
+ 0xd61e1a6c500c34a9L } },
+ /* 32 << 112 */
+ { { 0x5a4b46c64a8a3d62L,0x8469c4d0247743d2L,0x2bb3a13d88f7e433L,
+ 0x62b23a1001be5849L },
+ { 0xe83596b4a63d1a4cL,0x454e7fea7d183f3eL,0x643fce6117afb01cL,
+ 0x4e65e5e61c4c3638L } },
+ /* 33 << 112 */
+ { { 0x41d85ea1ef74c45bL,0x2cfbfa66ae328506L,0x98b078f53ada7da9L,
+ 0xd985fe37ec752fbbL },
+ { 0xeece68fe5a0148b4L,0x6f9a55c72d78136dL,0x232dccc4d2b729ceL,
+ 0xa27e0dfd90aafbc4L } },
+ /* 34 << 112 */
+ { { 0x9647445212b4603eL,0xa876c5516b706d14L,0xdf145fcf69a9d412L,
+ 0xe2ab75b72d479c34L },
+ { 0x12df9a761a23ff97L,0xc61389925d359d10L,0x6e51c7aefa835f22L,
+ 0x69a79cb1c0fcc4d9L } },
+ /* 35 << 112 */
+ { { 0xf57f350d594cc7e1L,0x3079ca633350ab79L,0x226fb6149aff594aL,
+ 0x35afec026d59a62bL },
+ { 0x9bee46f406ed2c6eL,0x58da17357d939a57L,0x44c504028fd1797eL,
+ 0xd8853e7c5ccea6caL } },
+ /* 36 << 112 */
+ { { 0x4065508da35fcd5fL,0x8965df8c495ccaebL,0x0f2da85012e1a962L,
+ 0xee471b94c1cf1cc4L },
+ { 0xcef19bc80a08fb75L,0x704958f581de3591L,0x2867f8b23aef4f88L,
+ 0x8d749384ea9f9a5fL } },
+ /* 37 << 112 */
+ { { 0x1b3855378c9049f4L,0x5be948f37b92d8b6L,0xd96f725db6e2bd6bL,
+ 0x37a222bc958c454dL },
+ { 0xe7c61abb8809bf61L,0x46f07fbc1346f18dL,0xfb567a7ae87c0d1cL,
+ 0x84a461c87ef3d07aL } },
+ /* 38 << 112 */
+ { { 0x0a5adce6d9278d98L,0x24d948139dfc73e1L,0x4f3528b6054321c3L,
+ 0x2e03fdde692ea706L },
+ { 0x10e6061947b533c0L,0x1a8bc73f2ca3c055L,0xae58d4b21bb62b8fL,
+ 0xb2045a73584a24e3L } },
+ /* 39 << 112 */
+ { { 0x3ab3d5afbd76e195L,0x478dd1ad6938a810L,0x6ffab3936ee3d5cbL,
+ 0xdfb693db22b361e4L },
+ { 0xf969449651dbf1a7L,0xcab4b4ef08a2e762L,0xe8c92f25d39bba9aL,
+ 0x850e61bcf1464d96L } },
+ /* 40 << 112 */
+ { { 0xb7e830e3dc09508bL,0xfaf6d2cf74317655L,0x72606cebdf690355L,
+ 0x48bb92b3d0c3ded6L },
+ { 0x65b754845c7cf892L,0xf6cd7ac9d5d5f01fL,0xc2c30a5996401d69L,
+ 0x91268650ed921878L } },
+ /* 41 << 112 */
+ { { 0x380bf913b78c558fL,0x43c0baebc8afdaa9L,0x377f61d554f169d3L,
+ 0xf8da07e3ae5ff20bL },
+ { 0xb676c49da8a90ea8L,0x81c1ff2b83a29b21L,0x383297ac2ad8d276L,
+ 0x3001122fba89f982L } },
+ /* 42 << 112 */
+ { { 0xe1d794be6718e448L,0x246c14827c3e6e13L,0x56646ef85d26b5efL,
+ 0x80f5091e88069cddL },
+ { 0xc5992e2f724bdd38L,0x02e915b48471e8c7L,0x96ff320a0d0ff2a9L,
+ 0xbf8864874384d1a0L } },
+ /* 43 << 112 */
+ { { 0xbbe1e6a6c93f72d6L,0xd5f75d12cad800eaL,0xfa40a09fe7acf117L,
+ 0x32c8cdd57581a355L },
+ { 0x742219927023c499L,0xa8afe5d738ec3901L,0x5691afcba90e83f0L,
+ 0x41bcaa030b8f8eacL } },
+ /* 44 << 112 */
+ { { 0xe38b5ff98d2668d5L,0x0715281a7ad81965L,0x1bc8fc7c03c6ce11L,
+ 0xcbbee6e28b650436L },
+ { 0x06b00fe80cdb9808L,0x17d6e066fe3ed315L,0x2e9d38c64d0b5018L,
+ 0xab8bfd56844dcaefL } },
+ /* 45 << 112 */
+ { { 0x42894a59513aed8bL,0xf77f3b6d314bd07aL,0xbbdecb8f8e42b582L,
+ 0xf10e2fa8d2390fe6L },
+ { 0xefb9502262a2f201L,0x4d59ea5050ee32b0L,0xd87f77286da789a8L,
+ 0xcf98a2cff79492c4L } },
+ /* 46 << 112 */
+ { { 0xf9577239720943c2L,0xba044cf53990b9d0L,0x5aa8e82395f2884aL,
+ 0x834de6ed0278a0afL },
+ { 0xc8e1ee9a5f25bd12L,0x9259ceaa6f7ab271L,0x7e6d97a277d00b76L,
+ 0x5c0c6eeaa437832aL } },
+ /* 47 << 112 */
+ { { 0x5232c20f5606b81dL,0xabd7b3750d991ee5L,0x4d2bfe358632d951L,
+ 0x78f8514698ed9364L },
+ { 0x951873f0f30c3282L,0x0da8ac80a789230bL,0x3ac7789c5398967fL,
+ 0xa69b8f7fbdda0fb5L } },
+ /* 48 << 112 */
+ { { 0xe5db77176add8545L,0x1b71cb6672c49b66L,0xd856073968421d77L,
+ 0x03840fe883e3afeaL },
+ { 0xb391dad51ec69977L,0xae243fb9307f6726L,0xc88ac87be8ca160cL,
+ 0x5174cced4ce355f4L } },
+ /* 49 << 112 */
+ { { 0x98a35966e58ba37dL,0xfdcc8da27817335dL,0x5b75283083fbc7bfL,
+ 0x68e419d4d9c96984L },
+ { 0x409a39f402a40380L,0x88940faf1fe977bcL,0xc640a94b8f8edea6L,
+ 0x1e22cd17ed11547dL } },
+ /* 50 << 112 */
+ { { 0xe28568ce59ffc3e2L,0x60aa1b55c1dee4e7L,0xc67497c8837cb363L,
+ 0x06fb438a105a2bf2L },
+ { 0x30357ec4500d8e20L,0x1ad9095d0670db10L,0x7f589a05c73b7cfdL,
+ 0xf544607d880d6d28L } },
+ /* 51 << 112 */
+ { { 0x17ba93b1a20ef103L,0xad8591306ba6577bL,0x65c91cf66fa214a0L,
+ 0xd7d49c6c27990da5L },
+ { 0xecd9ec8d20bb569dL,0xbd4b2502eeffbc33L,0x2056ca5a6bed0467L,
+ 0x7916a1f75b63728cL } },
+ /* 52 << 112 */
+ { { 0xd4f9497d53a4f566L,0x8973466497b56810L,0xf8e1da740494a621L,
+ 0x82546a938d011c68L },
+ { 0x1f3acb19c61ac162L,0x52f8fa9cabad0d3eL,0x15356523b4b7ea43L,
+ 0x5a16ad61ae608125L } },
+ /* 53 << 112 */
+ { { 0xb0bcb87f4faed184L,0x5f236b1d5029f45fL,0xd42c76070bc6b1fcL,
+ 0xc644324e68aefce3L },
+ { 0x8e191d595c5d8446L,0xc020807713ae1979L,0xadcaee553ba59cc7L,
+ 0x20ed6d6ba2cb81baL } },
+ /* 54 << 112 */
+ { { 0x0952ba19b6efcffcL,0x60f12d6897c0b87cL,0x4ee2c7c49caa30bcL,
+ 0x767238b797fbff4eL },
+ { 0xebc73921501b5d92L,0x3279e3dfc2a37737L,0x9fc12bc86d197543L,
+ 0xfa94dc6f0a40db4eL } },
+ /* 55 << 112 */
+ { { 0x7392b41a530ccbbdL,0x87c82146ea823525L,0xa52f984c05d98d0cL,
+ 0x2ae57d735ef6974cL },
+ { 0x9377f7bf3042a6ddL,0xb1a007c019647a64L,0xfaa9079a0cca9767L,
+ 0x3d81a25bf68f72d5L } },
+ /* 56 << 112 */
+ { { 0x752067f8ff81578eL,0x786221509045447dL,0xc0c22fcf0505aa6fL,
+ 0x1030f0a66bed1c77L },
+ { 0x31f29f151f0bd739L,0x2d7989c7e6debe85L,0x5c070e728e677e98L,
+ 0x0a817bd306e81fd5L } },
+ /* 57 << 112 */
+ { { 0xc110d830b0f2ac95L,0x48d0995aab20e64eL,0x0f3e00e17729cd9aL,
+ 0x2a570c20dd556946L },
+ { 0x912dbcfd4e86214dL,0x2d014ee2cf615498L,0x55e2b1e63530d76eL,
+ 0xc5135ae4fd0fd6d1L } },
+ /* 58 << 112 */
+ { { 0x0066273ad4f3049fL,0xbb8e9893e7087477L,0x2dba1ddb14c6e5fdL,
+ 0xdba3788651f57e6cL },
+ { 0x5aaee0a65a72f2cfL,0x1208bfbf7bea5642L,0xf5c6aa3b67872c37L,
+ 0xd726e08343f93224L } },
+ /* 59 << 112 */
+ { { 0x1854daa5061f1658L,0xc0016df1df0cd2b3L,0xc2a3f23e833d50deL,
+ 0x73b681d2bbbd3017L },
+ { 0x2f046dc43ac343c0L,0x9c847e7d85716421L,0xe1e13c910917eed4L,
+ 0x3fc9eebd63a1b9c6L } },
+ /* 60 << 112 */
+ { { 0x0f816a727fe02299L,0x6335ccc2294f3319L,0x3820179f4745c5beL,
+ 0xe647b782922f066eL },
+ { 0xc22e49de02cafb8aL,0x299bc2fffcc2ecccL,0x9a8feea26e0e8282L,
+ 0xa627278bfe893205L } },
+ /* 61 << 112 */
+ { { 0xa7e197337933e47bL,0xf4ff6b132e766402L,0xa4d8be0a98440d9fL,
+ 0x658f5c2f38938808L },
+ { 0x90b75677c95b3b3eL,0xfa0442693137b6ffL,0x077b039b43c47c29L,
+ 0xcca95dd38a6445b2L } },
+ /* 62 << 112 */
+ { { 0x0b498ba42333fc4cL,0x274f8e68f736a1b1L,0x6ca348fd5f1d4b2eL,
+ 0x24d3be78a8f10199L },
+ { 0x8535f858ca14f530L,0xa6e7f1635b982e51L,0x847c851236e1bf62L,
+ 0xf6a7c58e03448418L } },
+ /* 63 << 112 */
+ { { 0x583f3703f9374ab6L,0x864f91956e564145L,0x33bc3f4822526d50L,
+ 0x9f323c801262a496L },
+ { 0xaa97a7ae3f046a9aL,0x70da183edf8a039aL,0x5b68f71c52aa0ba6L,
+ 0x9be0fe5121459c2dL } },
+ /* 64 << 112 */
+ { { 0xc1e17eb6cbc613e5L,0x33131d55497ea61cL,0x2f69d39eaf7eded5L,
+ 0x73c2f434de6af11bL },
+ { 0x4ca52493a4a375faL,0x5f06787cb833c5c2L,0x814e091f3e6e71cfL,
+ 0x76451f578b746666L } },
+ /* 0 << 119 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 119 */
+ { { 0x80f9bdef694db7e0L,0xedca8787b9fcddc6L,0x51981c3403b8dce1L,
+ 0x4274dcf170e10ba1L },
+ { 0xf72743b86def6d1aL,0xd25b1670ebdb1866L,0xc4491e8c050c6f58L,
+ 0x2be2b2ab87fbd7f5L } },
+ /* 2 << 119 */
+ { { 0x3e0e5c9dd111f8ecL,0xbcc33f8db7c4e760L,0x702f9a91bd392a51L,
+ 0x7da4a795c132e92dL },
+ { 0x1a0b0ae30bb1151bL,0x54febac802e32251L,0xea3a5082694e9e78L,
+ 0xe58ffec1e4fe40b8L } },
+ /* 3 << 119 */
+ { { 0xf85592fcd1e0cf9eL,0xdea75f0dc0e7b2e8L,0xc04215cfc135584eL,
+ 0x174fc7272f57092aL },
+ { 0xe7277877eb930beaL,0x504caccb5eb02a5aL,0xf9fe08f7f5241b9bL,
+ 0xe7fb62f48d5ca954L } },
+ /* 4 << 119 */
+ { { 0xfbb8349d29c4120bL,0x9f94391fc0d0d915L,0xc4074fa75410ba51L,
+ 0xa66adbf6150a5911L },
+ { 0xc164543c34bfca38L,0xe0f27560b9e1ccfcL,0x99da0f53e820219cL,
+ 0xe8234498c6b4997aL } },
+ /* 5 << 119 */
+ { { 0xcfb88b769d4c5423L,0x9e56eb10b0521c49L,0x418e0b5ebe8700a1L,
+ 0x00cbaad6f93cb58aL },
+ { 0xe923fbded92a5e67L,0xca4979ac1f347f11L,0x89162d856bc0585bL,
+ 0xdd6254afac3c70e3L } },
+ /* 6 << 119 */
+ { { 0x7b23c513516e19e4L,0x56e2e847c5c4d593L,0x9f727d735ce71ef6L,
+ 0x5b6304a6f79a44c5L },
+ { 0x6638a7363ab7e433L,0x1adea470fe742f83L,0xe054b8545b7fc19fL,
+ 0xf935381aba1d0698L } },
+ /* 7 << 119 */
+ { { 0x546eab2d799e9a74L,0x96239e0ea949f729L,0xca274c6b7090055aL,
+ 0x835142c39020c9b0L },
+ { 0xa405667aa2e8807fL,0x29f2c0851aa3d39eL,0xcc555d6442fc72f5L,
+ 0xe856e0e7fbeacb3cL } },
+ /* 8 << 119 */
+ { { 0xb5504f9d918e4936L,0x65035ef6b2513982L,0x0553a0c26f4d9cb9L,
+ 0x6cb10d56bea85509L },
+ { 0x48d957b7a242da11L,0x16a4d3dd672b7268L,0x3d7e637c8502a96bL,
+ 0x27c7032b730d463bL } },
+ /* 9 << 119 */
+ { { 0xbdc02b18e4136a14L,0xbacf969d678e32bfL,0xc98d89a3dd9c3c03L,
+ 0x7b92420a23becc4fL },
+ { 0xd4b41f78c64d565cL,0x9f969d0010f28295L,0xec7f7f76b13d051aL,
+ 0x08945e1ea92da585L } },
+ /* 10 << 119 */
+ { { 0x55366b7d5846426fL,0xe7d09e89247d441dL,0x510b404d736fbf48L,
+ 0x7fa003d0e784bd7dL },
+ { 0x25f7614f17fd9596L,0x49e0e0a135cb98dbL,0x2c65957b2e83a76aL,
+ 0x5d40da8dcddbe0f8L } },
+ /* 11 << 119 */
+ { { 0xf2b8c405050bad24L,0x8918426dc2aa4823L,0x2aeab3dda38365a7L,
+ 0x720317177c91b690L },
+ { 0x8b00d69960a94120L,0x478a255de99eaeecL,0xbf656a5f6f60aafdL,
+ 0xdfd7cb755dee77b3L } },
+ /* 12 << 119 */
+ { { 0x37f68bb4a595939dL,0x0355647928740217L,0x8e740e7c84ad7612L,
+ 0xd89bc8439044695fL },
+ { 0xf7f3da5d85a9184dL,0x562563bb9fc0b074L,0x06d2e6aaf88a888eL,
+ 0x612d8643161fbe7cL } },
+ /* 13 << 119 */
+ { { 0x465edba7f64085e7L,0xb230f30429aa8511L,0x53388426cda2d188L,
+ 0x908857354b666649L },
+ { 0x6f02ff9a652f54f6L,0x65c822945fae2bf0L,0x7816ade062f5eee3L,
+ 0xdcdbdf43fcc56d70L } },
+ /* 14 << 119 */
+ { { 0x9fb3bba354530bb2L,0xbde3ef77cb0869eaL,0x89bc90460b431163L,
+ 0x4d03d7d2e4819a35L },
+ { 0x33ae4f9e43b6a782L,0x216db3079c88a686L,0x91dd88e000ffedd9L,
+ 0xb280da9f12bd4840L } },
+ /* 15 << 119 */
+ { { 0x32a7cb8a1635e741L,0xfe14008a78be02a7L,0x3fafb3341b7ae030L,
+ 0x7fd508e75add0ce9L },
+ { 0x72c83219d607ad51L,0x0f229c0a8d40964aL,0x1be2c3361c878da2L,
+ 0xe0c96742eab2ab86L } },
+ /* 16 << 119 */
+ { { 0x458f86913e538cd7L,0xa7001f6c8e08ad53L,0x52b8c6e6bf5d15ffL,
+ 0x548234a4011215ddL },
+ { 0xff5a9d2d3d5b4045L,0xb0ffeeb64a904190L,0x55a3aca448607f8bL,
+ 0x8cbd665c30a0672aL } },
+ /* 17 << 119 */
+ { { 0x87f834e042583068L,0x02da2aebf3f6e683L,0x6b763e5d05c12248L,
+ 0x7230378f65a8aefcL },
+ { 0x93bd80b571e8e5caL,0x53ab041cb3b62524L,0x1b8605136c9c552eL,
+ 0xe84d402cd5524e66L } },
+ /* 18 << 119 */
+ { { 0xa37f3573f37f5937L,0xeb0f6c7dd1e4fca5L,0x2965a554ac8ab0fcL,
+ 0x17fbf56c274676acL },
+ { 0x2e2f6bd9acf7d720L,0x41fc8f8810224766L,0x517a14b385d53befL,
+ 0xdae327a57d76a7d1L } },
+ /* 19 << 119 */
+ { { 0x6ad0a065c4818267L,0x33aa189b37c1bbc1L,0x64970b5227392a92L,
+ 0x21699a1c2d1535eaL },
+ { 0xcd20779cc2d7a7fdL,0xe318605999c83cf2L,0x9b69440b72c0b8c7L,
+ 0xa81497d77b9e0e4dL } },
+ /* 20 << 119 */
+ { { 0x515d5c891f5f82dcL,0x9a7f67d76361079eL,0xa8da81e311a35330L,
+ 0xe44990c44b18be1bL },
+ { 0xc7d5ed95af103e59L,0xece8aba78dac9261L,0xbe82b0999394b8d3L,
+ 0x6830f09a16adfe83L } },
+ /* 21 << 119 */
+ { { 0x250a29b488172d01L,0x8b20bd65caff9e02L,0xb8a7661ee8a6329aL,
+ 0x4520304dd3fce920L },
+ { 0xae45da1f2b47f7efL,0xe07f52885bffc540L,0xf79970093464f874L,
+ 0x2244c2cda6fa1f38L } },
+ /* 22 << 119 */
+ { { 0x43c41ac194d7d9b1L,0x5bafdd82c82e7f17L,0xdf0614c15fda0fcaL,
+ 0x74b043a7a8ae37adL },
+ { 0x3ba6afa19e71734cL,0x15d5437e9c450f2eL,0x4a5883fe67e242b1L,
+ 0x5143bdc22c1953c2L } },
+ /* 23 << 119 */
+ { { 0x542b8b53fc5e8920L,0x363bf9a89a9cee08L,0x02375f10c3486e08L,
+ 0x2037543b8c5e70d2L },
+ { 0x7109bccc625640b4L,0xcbc1051e8bc62c3bL,0xf8455fed803f26eaL,
+ 0x6badceabeb372424L } },
+ /* 24 << 119 */
+ { { 0xa2a9ce7c6b53f5f9L,0x642465951b176d99L,0xb1298d36b95c081bL,
+ 0x53505bb81d9a9ee6L },
+ { 0x3f6f9e61f2ba70b0L,0xd07e16c98afad453L,0x9f1694bbe7eb4a6aL,
+ 0xdfebced93cb0bc8eL } },
+ /* 25 << 119 */
+ { { 0x92d3dcdc53868c8bL,0x174311a2386107a6L,0x4109e07c689b4e64L,
+ 0x30e4587f2df3dcb6L },
+ { 0x841aea310811b3b2L,0x6144d41d0cce43eaL,0x464c45812a9a7803L,
+ 0xd03d371f3e158930L } },
+ /* 26 << 119 */
+ { { 0xc676d7f2b1f3390bL,0x9f7a1b8ca5b61272L,0x4ebebfc9c2e127a9L,
+ 0x4602500c5dd997bfL },
+ { 0x7f09771c4711230fL,0x058eb37c020f09c1L,0xab693d4bfee5e38bL,
+ 0x9289eb1f4653cbc0L } },
+ /* 27 << 119 */
+ { { 0xbecf46abd51b9cf5L,0xd2aa9c029f0121afL,0x36aaf7d2e90dc274L,
+ 0x909e4ea048b95a3cL },
+ { 0xe6b704966f32dbdbL,0x672188a08b030b3eL,0xeeffe5b3cfb617e2L,
+ 0x87e947de7c82709eL } },
+ /* 28 << 119 */
+ { { 0xa44d2b391770f5a7L,0xe4d4d7910e44eb82L,0x42e69d1e3f69712aL,
+ 0xbf11c4d6ac6a820eL },
+ { 0xb5e7f3e542c4224cL,0xd6b4e81c449d941cL,0x5d72bd165450e878L,
+ 0x6a61e28aee25ac54L } },
+ /* 29 << 119 */
+ { { 0x33272094e6f1cd95L,0x7512f30d0d18673fL,0x32f7a4ca5afc1464L,
+ 0x2f0956566bbb977bL },
+ { 0x586f47caa8226200L,0x02c868ad1ac07369L,0x4ef2b845c613acbeL,
+ 0x43d7563e0386054cL } },
+ /* 30 << 119 */
+ { { 0x54da9dc7ab952578L,0xb5423df226e84d0bL,0xa8b64eeb9b872042L,
+ 0xac2057825990f6dfL },
+ { 0x4ff696eb21f4c77aL,0x1a79c3e4aab273afL,0x29bc922e9436b3f1L,
+ 0xff807ef8d6d9a27aL } },
+ /* 31 << 119 */
+ { { 0x82acea3d778f22a0L,0xfb10b2e85b5e7469L,0xc0b169802818ee7dL,
+ 0x011afff4c91c1a2fL },
+ { 0x95a6d126ad124418L,0x31c081a5e72e295fL,0x36bb283af2f4db75L,
+ 0xd115540f7acef462L } },
+ /* 32 << 119 */
+ { { 0xc7f3a8f833f6746cL,0x21e46f65fea990caL,0x915fd5c5caddb0a9L,
+ 0xbd41f01678614555L },
+ { 0x346f4434426ffb58L,0x8055943614dbc204L,0xf3dd20fe5a969b7fL,
+ 0x9d59e956e899a39aL } },
+ /* 33 << 119 */
+ { { 0xf1b0971c8ad4cf4bL,0x034488602ffb8fb8L,0xf071ac3c65340ba4L,
+ 0x408d0596b27fd758L },
+ { 0xe7c78ea498c364b0L,0xa4aac4a5051e8ab5L,0xb9e1d560485d9002L,
+ 0x9acd518a88844455L } },
+ /* 34 << 119 */
+ { { 0xe4ca688fd06f56c0L,0xa48af70ddf027972L,0x691f0f045e9a609dL,
+ 0xa9dd82cdee61270eL },
+ { 0x8903ca63a0ef18d3L,0x9fb7ee353d6ca3bdL,0xa7b4a09cabf47d03L,
+ 0x4cdada011c67de8eL } },
+ /* 35 << 119 */
+ { { 0x520037499355a244L,0xe77fd2b64f2151a9L,0x695d6cf666b4efcbL,
+ 0xc5a0cacfda2cfe25L },
+ { 0x104efe5cef811865L,0xf52813e89ea5cc3dL,0x855683dc40b58dbcL,
+ 0x0338ecde175fcb11L } },
+ /* 36 << 119 */
+ { { 0xf9a0563774921592L,0xb4f1261db9bb9d31L,0x551429b74e9c5459L,
+ 0xbe182e6f6ea71f53L },
+ { 0xd3a3b07cdfc50573L,0x9ba1afda62be8d44L,0x9bcfd2cb52ab65d3L,
+ 0xdf11d547a9571802L } },
+ /* 37 << 119 */
+ { { 0x099403ee02a2404aL,0x497406f421088a71L,0x994794095004ae71L,
+ 0xbdb42078a812c362L },
+ { 0x2b72a30fd8828442L,0x283add27fcb5ed1cL,0xf7c0e20066a40015L,
+ 0x3e3be64108b295efL } },
+ /* 38 << 119 */
+ { { 0xac127dc1e038a675L,0x729deff38c5c6320L,0xb7df8fd4a90d2c53L,
+ 0x9b74b0ec681e7cd3L },
+ { 0x5cb5a623dab407e5L,0xcdbd361576b340c6L,0xa184415a7d28392cL,
+ 0xc184c1d8e96f7830L } },
+ /* 39 << 119 */
+ { { 0xc3204f1981d3a80fL,0xfde0c841c8e02432L,0x78203b3e8149e0c1L,
+ 0x5904bdbb08053a73L },
+ { 0x30fc1dd1101b6805L,0x43c223bc49aa6d49L,0x9ed671417a174087L,
+ 0x311469a0d5997008L } },
+ /* 40 << 119 */
+ { { 0xb189b6845e43fc61L,0xf3282375e0d3ab57L,0x4fa34b67b1181da8L,
+ 0x621ed0b299ee52b8L },
+ { 0x9b178de1ad990676L,0xd51de67b56d54065L,0x2a2c27c47538c201L,
+ 0x33856ec838a40f5cL } },
+ /* 41 << 119 */
+ { { 0x2522fc15be6cdcdeL,0x1e603f339f0c6f89L,0x7994edc3103e30a6L,
+ 0x033a00db220c853eL },
+ { 0xd3cfa409f7bb7fd7L,0x70f8781e462d18f6L,0xbbd82980687fe295L,
+ 0x6eef4c32595669f3L } },
+ /* 42 << 119 */
+ { { 0x86a9303b2f7e85c3L,0x5fce462171988f9bL,0x5b935bf6c138acb5L,
+ 0x30ea7d6725661212L },
+ { 0xef1eb5f4e51ab9a2L,0x0587c98aae067c78L,0xb3ce1b3c77ca9ca6L,
+ 0x2a553d4d54b5f057L } },
+ /* 43 << 119 */
+ { { 0xc78982364da29ec2L,0xdbdd5d13b9c57316L,0xc57d6e6b2cd80d47L,
+ 0x80b460cffe9e7391L },
+ { 0x98648cabf963c31eL,0x67f9f633cc4d32fdL,0x0af42a9dfdf7c687L,
+ 0x55f292a30b015ea7L } },
+ /* 44 << 119 */
+ { { 0x89e468b2cd21ab3dL,0xe504f022c393d392L,0xab21e1d4a5013af9L,
+ 0xe3283f78c2c28acbL },
+ { 0xf38b35f6226bf99fL,0xe83542740e291e69L,0x61673a15b20c162dL,
+ 0xc101dc75b04fbdbeL } },
+ /* 45 << 119 */
+ { { 0x8323b4c2255bd617L,0x6c9696936c2a9154L,0xc6e6586062679387L,
+ 0x8e01db0cb8c88e23L },
+ { 0x33c42873893a5559L,0x7630f04b47a3e149L,0xb5d80805ddcf35f8L,
+ 0x582ca08077dfe732L } },
+ /* 46 << 119 */
+ { { 0x2c7156e10b1894a0L,0x92034001d81c68c0L,0xed225d00c8b115b5L,
+ 0x237f9c2283b907f2L },
+ { 0x0ea2f32f4470e2c0L,0xb725f7c158be4e95L,0x0f1dcafab1ae5463L,
+ 0x59ed51871ba2fc04L } },
+ /* 47 << 119 */
+ { { 0xf6e0f316d0115d4dL,0x5180b12fd3691599L,0x157e32c9527f0a41L,
+ 0x7b0b081da8e0ecc0L },
+ { 0x6dbaaa8abf4f0dd0L,0x99b289c74d252696L,0x79b7755edbf864feL,
+ 0x6974e2b176cad3abL } },
+ /* 48 << 119 */
+ { { 0x35dbbee206ddd657L,0xe7cbdd112ff3a96dL,0x88381968076be758L,
+ 0x2d737e7208c91f5dL },
+ { 0x5f83ab6286ec3776L,0x98aa649d945fa7a1L,0xf477ec3772ef0933L,
+ 0x66f52b1e098c17b1L } },
+ /* 49 << 119 */
+ { { 0x9eec58fbd803738bL,0x91aaade7e4e86aa4L,0x6b1ae617a5b51492L,
+ 0x63272121bbc45974L },
+ { 0x7e0e28f0862c5129L,0x0a8f79a93321a4a0L,0xe26d16645041c88fL,
+ 0x0571b80553233e3aL } },
+ /* 50 << 119 */
+ { { 0xd1b0ccdec9520711L,0x55a9e4ed3c8b84bfL,0x9426bd39a1fef314L,
+ 0x4f5f638e6eb93f2bL },
+ { 0xba2a1ed32bf9341bL,0xd63c13214d42d5a9L,0xd2964a89316dc7c5L,
+ 0xd1759606ca511851L } },
+ /* 51 << 119 */
+ { { 0xd8a9201ff9e6ed35L,0xb7b5ee456736925aL,0x0a83fbbc99581af7L,
+ 0x3076bc4064eeb051L },
+ { 0x5511c98c02dec312L,0x270de898238dcb78L,0x2cf4cf9c539c08c9L,
+ 0xa70cb65e38d3b06eL } },
+ /* 52 << 119 */
+ { { 0xb12ec10ecfe57bbdL,0x82c7b65635a0c2b5L,0xddc7d5cd161c67bdL,
+ 0xe32e8985ae3a32ccL },
+ { 0x7aba9444d11a5529L,0xe964ed022427fa1aL,0x1528392d24a1770aL,
+ 0xa152ce2c12c72fcdL } },
+ /* 53 << 119 */
+ { { 0x714553a48ec07649L,0x18b4c290459dd453L,0xea32b7147b64b110L,
+ 0xb871bfa52e6f07a2L },
+ { 0xb67112e59e2e3c9bL,0xfbf250e544aa90f6L,0xf77aedb8bd539006L,
+ 0x3b0cdf9ad172a66fL } },
+ /* 54 << 119 */
+ { { 0xedf69feaf8c51187L,0x05bb67ec741e4da7L,0x47df0f3208114345L,
+ 0x56facb07bb9792b1L },
+ { 0xf3e007e98f6229e4L,0x62d103f4526fba0fL,0x4f33bef7b0339d79L,
+ 0x9841357bb59bfec1L } },
+ /* 55 << 119 */
+ { { 0xfa8dbb59c34e6705L,0xc3c7180b7fdaa84cL,0xf95872fca4108537L,
+ 0x8750cc3b932a3e5aL },
+ { 0xb61cc69db7275d7dL,0xffa0168b2e59b2e9L,0xca032abc6ecbb493L,
+ 0x1d86dbd32c9082d8L } },
+ /* 56 << 119 */
+ { { 0xae1e0b67e28ef5baL,0x2c9a4699cb18e169L,0x0ecd0e331e6bbd20L,
+ 0x571b360eaf5e81d2L },
+ { 0xcd9fea58101c1d45L,0x6651788e18880452L,0xa99726351f8dd446L,
+ 0x44bed022e37281d0L } },
+ /* 57 << 119 */
+ { { 0x094b2b2d33da525dL,0xf193678e13144fd8L,0xb8ab5ba4f4c1061dL,
+ 0x4343b5fadccbe0f4L },
+ { 0xa870237163812713L,0x47bf6d2df7611d93L,0x46729b8cbd21e1d7L,
+ 0x7484d4e0d629e77dL } },
+ /* 58 << 119 */
+ { { 0x830e6eea60dbac1fL,0x23d8c484da06a2f7L,0x896714b050ca535bL,
+ 0xdc8d3644ebd97a9bL },
+ { 0x106ef9fab12177b4L,0xf79bf464534d5d9cL,0x2537a349a6ab360bL,
+ 0xc7c54253a00c744fL } },
+ /* 59 << 119 */
+ { { 0xb3c7a047e5911a76L,0x61ffa5c8647f1ee7L,0x15aed36f8f56ab42L,
+ 0x6a0d41b0a3ff9ac9L },
+ { 0x68f469f5cc30d357L,0xbe9adf816b72be96L,0x1cd926fe903ad461L,
+ 0x7e89e38fcaca441bL } },
+ /* 60 << 119 */
+ { { 0xf0f82de5facf69d4L,0x363b7e764775344cL,0x6894f312b2e36d04L,
+ 0x3c6cb4fe11d1c9a5L },
+ { 0x85d9c3394008e1f2L,0x5e9a85ea249f326cL,0xdc35c60a678c5e06L,
+ 0xc08b944f9f86fba9L } },
+ /* 61 << 119 */
+ { { 0xde40c02c89f71f0fL,0xad8f3e31ff3da3c0L,0x3ea5096b42125dedL,
+ 0x13879cbfa7379183L },
+ { 0x6f4714a56b306a0bL,0x359c2ea667646c5eL,0xfacf894307726368L,
+ 0x07a5893565ff431eL } },
+ /* 62 << 119 */
+ { { 0x24d661d168754ab0L,0x801fce1d6f429a76L,0xc068a85fa58ce769L,
+ 0xedc35c545d5eca2bL },
+ { 0xea31276fa3f660d1L,0xa0184ebeb8fc7167L,0x0f20f21a1d8db0aeL,
+ 0xd96d095f56c35e12L } },
+ /* 63 << 119 */
+ { { 0xedf402b5f8c2a25bL,0x1bb772b9059204b6L,0x50cbeae219b4e34cL,
+ 0x93109d803fa0845aL },
+ { 0x54f7ccf78ef59fb5L,0x3b438fe288070963L,0x9e28c65931f3ba9bL,
+ 0x9cc31b46ead9da92L } },
+ /* 64 << 119 */
+ { { 0x3c2f0ba9b733aa5fL,0xdece47cbf05af235L,0xf8e3f715a2ac82a5L,
+ 0xc97ba6412203f18aL },
+ { 0xc3af550409c11060L,0x56ea2c0546af512dL,0xfac28daff3f28146L,
+ 0x87fab43a959ef494L } },
+ /* 0 << 126 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 126 */
+ { { 0x09891641d4c5105fL,0x1ae80f8e6d7fbd65L,0x9d67225fbee6bdb0L,
+ 0x3b433b597fc4d860L },
+ { 0x44e66db693e85638L,0xf7b59252e3e9862fL,0xdb785157665c32ecL,
+ 0x702fefd7ae362f50L } },
+ /* 2 << 126 */
+ { { 0x3754475d0fefb0c3L,0xd48fb56b46d7c35dL,0xa070b633363798a4L,
+ 0xae89f3d28fdb98e6L },
+ { 0x970b89c86363d14cL,0x8981752167abd27dL,0x9bf7d47444d5a021L,
+ 0xb3083bafcac72aeeL } },
+ /* 3 << 126 */
+ { { 0x389741debe949a44L,0x638e9388546a4fa5L,0x3fe6419ca0047bdcL,
+ 0x7047f648aaea57caL },
+ { 0x54e48a9041fbab17L,0xda8e0b28576bdba2L,0xe807eebcc72afddcL,
+ 0x07d3336df42577bfL } },
+ /* 4 << 126 */
+ { { 0x62a8c244bfe20925L,0x91c19ac38fdce867L,0x5a96a5d5dd387063L,
+ 0x61d587d421d324f6L },
+ { 0xe87673a2a37173eaL,0x2384800853778b65L,0x10f8441e05bab43eL,
+ 0xfa11fe124621efbeL } },
+ /* 5 << 126 */
+ { { 0x047b772e81685d7bL,0x23f27d81bf34a976L,0xc27608e2915f48efL,
+ 0x3b0b43faa521d5c3L },
+ { 0x7613fb2663ca7284L,0x7f5729b41d4db837L,0x87b14898583b526bL,
+ 0x00b732a6bbadd3d1L } },
+ /* 6 << 126 */
+ { { 0x8e02f4262048e396L,0x436b50b6383d9de4L,0xf78d3481471e85adL,
+ 0x8b01ea6ad005c8d6L },
+ { 0xd3c7afee97015c07L,0x46cdf1a94e3ba2aeL,0x7a42e50183d3a1d2L,
+ 0xd54b5268b541dff4L } },
+ /* 7 << 126 */
+ { { 0x3f24cf304e23e9bcL,0x4387f816126e3624L,0x26a46a033b0b6d61L,
+ 0xaf1bc8458b2d777cL },
+ { 0x25c401ba527de79cL,0x0e1346d44261bbb6L,0x4b96c44b287b4bc7L,
+ 0x658493c75254562fL } },
+ /* 8 << 126 */
+ { { 0x23f949feb8a24a20L,0x17ebfed1f52ca53fL,0x9b691bbebcfb4853L,
+ 0x5617ff6b6278a05dL },
+ { 0x241b34c5e3c99ebdL,0xfc64242e1784156aL,0x4206482f695d67dfL,
+ 0xb967ce0eee27c011L } },
+ /* 9 << 126 */
+ { { 0x65db375121c80b5dL,0x2e7a563ca31ecca0L,0xe56ffc4e5238a07eL,
+ 0x3d6c296632ced854L },
+ { 0xe99d7d1aaf70b885L,0xafc3bad92d686459L,0x9c78bf460cc8ba5bL,
+ 0x5a43951918955aa3L } },
+ /* 10 << 126 */
+ { { 0xf8b517a85fe4e314L,0xe60234d0fcb8906fL,0xffe542acf2061b23L,
+ 0x287e191f6b4cb59cL },
+ { 0x21857ddc09d877d8L,0x1c23478c14678941L,0xbbf0c056b6e05ea4L,
+ 0x82da4b53b01594feL } },
+ /* 11 << 126 */
+ { { 0xf7526791fadb8608L,0x049e832d7b74cdf6L,0xa43581ccc2b90a34L,
+ 0x73639eb89360b10cL },
+ { 0x4fba331fe1e4a71bL,0x6ffd6b938072f919L,0x6e53271c65679032L,
+ 0x67206444f14272ceL } },
+ /* 12 << 126 */
+ { { 0xc0f734a3b2335834L,0x9526205a90ef6860L,0xcb8be71704e2bb0dL,
+ 0x2418871e02f383faL },
+ { 0xd71776814082c157L,0xcc914ad029c20073L,0xf186c1ebe587e728L,
+ 0x6fdb3c2261bcd5fdL } },
+ /* 13 << 126 */
+ { { 0x30d014a6f2f9f8e9L,0x963ece234fec49d2L,0x862025c59605a8d9L,
+ 0x3987444519f8929aL },
+ { 0x01b6ff6512bf476aL,0x598a64d809cf7d91L,0xd7ec774993be56caL,
+ 0x10899785cbb33615L } },
+ /* 14 << 126 */
+ { { 0xb8a092fd02eee3adL,0xa86b3d3530145270L,0x323d98c68512b675L,
+ 0x4b8bc78562ebb40fL },
+ { 0x7d301f54413f9cdeL,0xa5e4fb4f2bab5664L,0x1d2b252d1cbfec23L,
+ 0xfcd576bbe177120dL } },
+ /* 15 << 126 */
+ { { 0x04427d3e83731a34L,0x2bb9028eed836e8eL,0xb36acff8b612ca7cL,
+ 0xb88fe5efd3d9c73aL },
+ { 0xbe2a6bc6edea4eb3L,0x43b93133488eec77L,0xf41ff566b17106e1L,
+ 0x469e9172654efa32L } },
+ /* 16 << 126 */
+ { { 0xb4480f0441c23fa3L,0xb4712eb0c1989a2eL,0x3ccbba0f93a29ca7L,
+ 0x6e205c14d619428cL },
+ { 0x90db7957b3641686L,0x0432691d45ac8b4eL,0x07a759acf64e0350L,
+ 0x0514d89c9c972517L } },
+ /* 17 << 126 */
+ { { 0x1701147fa8e67fc3L,0x9e2e0b8bab2085beL,0xd5651824ac284e57L,
+ 0x890d432574893664L },
+ { 0x8a7c5e6ec55e68a3L,0xbf12e90b4339c85aL,0x31846b85f922b655L,
+ 0x9a54ce4d0bf4d700L } },
+ /* 18 << 126 */
+ { { 0xd7f4e83af1a14295L,0x916f955cb285d4f9L,0xe57bb0e099ffdabaL,
+ 0x28a43034eab0d152L },
+ { 0x0a36ffa2b8a9cef8L,0x5517407eb9ec051aL,0x9c796096ea68e672L,
+ 0x853db5fbfb3c77fbL } },
+ /* 19 << 126 */
+ { { 0x21474ba9e864a51aL,0x6c2676996e8a1b8bL,0x7c82362694120a28L,
+ 0xe61e9a488383a5dbL },
+ { 0x7dd750039f84216dL,0xab020d07ad43cd85L,0x9437ae48da12c659L,
+ 0x6449c2ebe65452adL } },
+ /* 20 << 126 */
+ { { 0xcc7c4c1c2cf9d7c1L,0x1320886aee95e5abL,0xbb7b9056beae170cL,
+ 0xc8a5b250dbc0d662L },
+ { 0x4ed81432c11d2303L,0x7da669121f03769fL,0x3ac7a5fd84539828L,
+ 0x14dada943bccdd02L } },
+ /* 21 << 126 */
+ { { 0x8b84c3217ef6b0d1L,0x52a9477a7c933f22L,0x5ef6728afd440b82L,
+ 0x5c3bd8596ce4bd5eL },
+ { 0x918b80f5f22c2d3eL,0x368d5040b7bb6cc5L,0xb66142a12695a11cL,
+ 0x60ac583aeb19ea70L } },
+ /* 22 << 126 */
+ { { 0x317cbb980eab2437L,0x8cc08c555e2654c8L,0xfe2d6520e6d8307fL,
+ 0xe9f147f357428993L },
+ { 0x5f9c7d14d2fd6cf1L,0xa3ecd0642d4fcbb0L,0xad83fef08e7341f7L,
+ 0x643f23a03a63115cL } },
+ /* 23 << 126 */
+ { { 0xd38a78abe65ab743L,0xbf7c75b135edc89cL,0x3dd8752e530df568L,
+ 0xf85c4a76e308c682L },
+ { 0x4c9955b2e68acf37L,0xa544df3dab32af85L,0x4b8ec3f5a25cf493L,
+ 0x4d8f27641a622febL } },
+ /* 24 << 126 */
+ { { 0x7bb4f7aaf0dcbc49L,0x7de551f970bbb45bL,0xcfd0f3e49f2ca2e5L,
+ 0xece587091f5c76efL },
+ { 0x32920edd167d79aeL,0x039df8a2fa7d7ec1L,0xf46206c0bb30af91L,
+ 0x1ff5e2f522676b59L } },
+ /* 25 << 126 */
+ { { 0x11f4a0396ea51d66L,0x506c1445807d7a26L,0x60da5705755a9b24L,
+ 0x8fc8cc321f1a319eL },
+ { 0x83642d4d9433d67dL,0x7fa5cb8f6a7dd296L,0x576591db9b7bde07L,
+ 0x13173d25419716fbL } },
+ /* 26 << 126 */
+ { { 0xea30599dd5b340ffL,0xfc6b5297b0fe76c5L,0x1c6968c8ab8f5adcL,
+ 0xf723c7f5901c928dL },
+ { 0x4203c3219773d402L,0xdf7c6aa31b51dd47L,0x3d49e37a552be23cL,
+ 0x57febee80b5a6e87L } },
+ /* 27 << 126 */
+ { { 0xc5ecbee47bd8e739L,0x79d44994ae63bf75L,0x168bd00f38fb8923L,
+ 0x75d48ee4d0533130L },
+ { 0x554f77aadb5cdf33L,0x3396e8963c696769L,0x2fdddbf2d3fd674eL,
+ 0xbbb8f6ee99d0e3e5L } },
+ /* 28 << 126 */
+ { { 0x51b90651cbae2f70L,0xefc4bc0593aaa8ebL,0x8ecd8689dd1df499L,
+ 0x1aee99a822f367a5L },
+ { 0x95d485b9ae8274c5L,0x6c14d4457d30b39cL,0xbafea90bbcc1ef81L,
+ 0x7c5f317aa459a2edL } },
+ /* 29 << 126 */
+ { { 0x012110754ef44227L,0xa17bed6edc20f496L,0x0cdfe424819853cdL,
+ 0x13793298f71e2ce7L },
+ { 0x3c1f3078dbbe307bL,0x6dd1c20e76ee9936L,0x23ee4b57423caa20L,
+ 0x4ac3793b8efb840eL } },
+ /* 30 << 126 */
+ { { 0x934438ebed1f8ca0L,0x3e5466584ebb25a2L,0xc415af0ec069896fL,
+ 0xc13eddb09a5aa43dL },
+ { 0x7a04204fd49eb8f6L,0xd0d5bdfcd74f1670L,0x3697e28656fc0558L,
+ 0x1020737101cebadeL } },
+ /* 31 << 126 */
+ { { 0x5f87e6900647a82bL,0x908e0ed48f40054fL,0xa9f633d479853803L,
+ 0x8ed13c9a4a28b252L },
+ { 0x3e2ef6761f460f64L,0x53930b9b36d06336L,0x347073ac8fc4979bL,
+ 0x84380e0e5ecd5597L } },
+ /* 32 << 126 */
+ { { 0xe3b22c6bc4fe3c39L,0xba4a81536c7bebdfL,0xf23ab6b725693459L,
+ 0x53bc377014922b11L },
+ { 0x4645c8ab5afc60dbL,0xaa02235520b9f2a3L,0x52a2954cce0fc507L,
+ 0x8c2731bb7ce1c2e7L } },
+ /* 33 << 126 */
+ { { 0xf39608ab18a0339dL,0xac7a658d3735436cL,0xb22c2b07cd992b4fL,
+ 0x4e83daecf40dcfd4L },
+ { 0x8a34c7be2f39ea3eL,0xef0c005fb0a56d2eL,0x62731f6a6edd8038L,
+ 0x5721d7404e3cb075L } },
+ /* 34 << 126 */
+ { { 0x1ea41511fbeeee1bL,0xd1ef5e73ef1d0c05L,0x42feefd173c07d35L,
+ 0xe530a00a8a329493L },
+ { 0x5d55b7fef15ebfb0L,0x549de03cd322491aL,0xf7b5f602745b3237L,
+ 0x3632a3a21ab6e2b6L } },
+ /* 35 << 126 */
+ { { 0x0d3bba890ef59f78L,0x0dfc6443c9e52b9aL,0x1dc7969972631447L,
+ 0xef033917b3be20b1L },
+ { 0x0c92735db1383948L,0xc1fc29a2c0dd7d7dL,0x6485b697403ed068L,
+ 0x13bfaab3aac93bdcL } },
+ /* 36 << 126 */
+ { { 0x410dc6a90deeaf52L,0xb003fb024c641c15L,0x1384978c5bc504c4L,
+ 0x37640487864a6a77L },
+ { 0x05991bc6222a77daL,0x62260a575e47eb11L,0xc7af6613f21b432cL,
+ 0x22f3acc9ab4953e9L } },
+ /* 37 << 126 */
+ { { 0x529349228e41d155L,0x4d0245683ac059efL,0xb02017554d884411L,
+ 0xce8055cfa59a178fL },
+ { 0xcd77d1aff6204549L,0xa0a00a3ec7066759L,0x471071ef0272c229L,
+ 0x009bcf6bd3c4b6b0L } },
+ /* 38 << 126 */
+ { { 0x2a2638a822305177L,0xd51d59df41645bbfL,0xa81142fdc0a7a3c0L,
+ 0xa17eca6d4c7063eeL },
+ { 0x0bb887ed60d9dcecL,0xd6d28e5120ad2455L,0xebed6308a67102baL,
+ 0x042c31148bffa408L } },
+ /* 39 << 126 */
+ { { 0xfd099ac58aa68e30L,0x7a6a3d7c1483513eL,0xffcc6b75ba2d8f0cL,
+ 0x54dacf961e78b954L },
+ { 0xf645696fa4a9af89L,0x3a41194006ac98ecL,0x41b8b3f622a67a20L,
+ 0x2d0b1e0f99dec626L } },
+ /* 40 << 126 */
+ { { 0x27c8919240be34e8L,0xc7162b3791907f35L,0x90188ec1a956702bL,
+ 0xca132f7ddf93769cL },
+ { 0x3ece44f90e2025b4L,0x67aaec690c62f14cL,0xad74141822e3cc11L,
+ 0xcf9b75c37ff9a50eL } },
+ /* 41 << 126 */
+ { { 0x02fa2b164d348272L,0xbd99d61a9959d56dL,0xbc4f19db18762916L,
+ 0xcc7cce5049c1ac80L },
+ { 0x4d59ebaad846bd83L,0x8775a9dca9202849L,0x07ec4ae16e1f4ca9L,
+ 0x27eb5875ba893f11L } },
+ /* 42 << 126 */
+ { { 0x00284d51662cc565L,0x82353a6b0db4138dL,0xd9c7aaaaaa32a594L,
+ 0xf5528b5ea5669c47L },
+ { 0xf32202312f23c5ffL,0xe3e8147a6affa3a1L,0xfb423d5c202ddda0L,
+ 0x3d6414ac6b871bd4L } },
+ /* 43 << 126 */
+ { { 0x586f82e1a51a168aL,0xb712c67148ae5448L,0x9a2e4bd176233eb8L,
+ 0x0188223a78811ca9L },
+ { 0x553c5e21f7c18de1L,0x7682e451b27bb286L,0x3ed036b30e51e929L,
+ 0xf487211bec9cb34fL } },
+ /* 44 << 126 */
+ { { 0x0d0942770c24efc8L,0x0349fd04bef737a4L,0x6d1c9dd2514cdd28L,
+ 0x29c135ff30da9521L },
+ { 0xea6e4508f78b0b6fL,0x176f5dd2678c143cL,0x081484184be21e65L,
+ 0x27f7525ce7df38c4L } },
+ /* 45 << 126 */
+ { { 0x1fb70e09748ab1a4L,0x9cba50a05efe4433L,0x7846c7a615f75af2L,
+ 0x2a7c2c575ee73ea8L },
+ { 0x42e566a43f0a449aL,0x45474c3bad90fc3dL,0x7447be3d8b61d057L,
+ 0x3e9d1cf13a4ec092L } },
+ /* 46 << 126 */
+ { { 0x1603e453f380a6e6L,0x0b86e4319b1437c2L,0x7a4173f2ef29610aL,
+ 0x8fa729a7f03d57f7L },
+ { 0x3e186f6e6c9c217eL,0xbe1d307991919524L,0x92a62a70153d4fb1L,
+ 0x32ed3e34d68c2f71L } },
+ /* 47 << 126 */
+ { { 0xd785027f9eb1a8b7L,0xbc37eb77c5b22fe8L,0x466b34f0b9d6a191L,
+ 0x008a89af9a05f816L },
+ { 0x19b028fb7d42c10aL,0x7fe8c92f49b3f6b8L,0x58907cc0a5a0ade3L,
+ 0xb3154f51559d1a7cL } },
+ /* 48 << 126 */
+ { { 0x5066efb6d9790ed6L,0xa77a0cbca6aa793bL,0x1a915f3c223e042eL,
+ 0x1c5def0469c5874bL },
+ { 0x0e83007873b6c1daL,0x55cf85d2fcd8557aL,0x0f7c7c760460f3b1L,
+ 0x87052acb46e58063L } },
+ /* 49 << 126 */
+ { { 0x09212b80907eae66L,0x3cb068e04d721c89L,0xa87941aedd45ac1cL,
+ 0xde8d5c0d0daa0dbbL },
+ { 0xda421fdce3502e6eL,0xc89442014d89a084L,0x7307ba5ef0c24bfbL,
+ 0xda212beb20bde0efL } },
+ /* 50 << 126 */
+ { { 0xea2da24bf82ce682L,0x058d381607f71fe4L,0x35a024625ffad8deL,
+ 0xcd7b05dcaadcefabL },
+ { 0xd442f8ed1d9f54ecL,0x8be3d618b2d3b5caL,0xe2220ed0e06b2ce2L,
+ 0x82699a5f1b0da4c0L } },
+ /* 51 << 126 */
+ { { 0x3ff106f571c0c3a7L,0x8f580f5a0d34180cL,0x4ebb120e22d7d375L,
+ 0x5e5782cce9513675L },
+ { 0x2275580c99c82a70L,0xe8359fbf15ea8c4cL,0x53b48db87b415e70L,
+ 0xaacf2240100c6014L } },
+ /* 52 << 126 */
+ { { 0x9faaccf5e4652f1dL,0xbd6fdd2ad56157b2L,0xa4f4fb1f6261ec50L,
+ 0x244e55ad476bcd52L },
+ { 0x881c9305047d320bL,0x1ca983d56181263fL,0x354e9a44278fb8eeL,
+ 0xad2dbc0f396e4964L } },
+ /* 53 << 126 */
+ { { 0x723f3aa29268b3deL,0x0d1ca29ae6e0609aL,0x794866aa6cf44252L,
+ 0x0b59f3e301af87edL },
+ { 0xe234e5ff7f4a6c51L,0xa8768fd261dc2f7eL,0xdafc73320a94d81fL,
+ 0xd7f8428206938ce1L } },
+ /* 54 << 126 */
+ { { 0xae0b3c0e0546063eL,0x7fbadcb25d61abc6L,0xd5d7a2c9369ac400L,
+ 0xa5978d09ae67d10cL },
+ { 0x290f211e4f85eaacL,0xe61e2ad1facac681L,0xae125225388384cdL,
+ 0xa7fb68e9ccfde30fL } },
+ /* 55 << 126 */
+ { { 0x7a59b9363daed4c2L,0x80a9aa402606f789L,0xb40c1ea5f6a6d90aL,
+ 0x948364d3514d5885L },
+ { 0x062ebc6070985182L,0xa6db5b0e33310895L,0x64a12175e329c2f5L,
+ 0xc5f25bd290ea237eL } },
+ /* 56 << 126 */
+ { { 0x7915c5242d0a4c23L,0xeb5d26e46bb3cc52L,0x369a9116c09e2c92L,
+ 0x0c527f92cf182cf8L },
+ { 0x9e5919382aede0acL,0xb29222086cc34939L,0x3c9d896299a34361L,
+ 0x3c81836dc1905fe6L } },
+ /* 57 << 126 */
+ { { 0x4bfeb57fa001ec5aL,0xe993f5bba0dc5dbaL,0x47884109724a1380L,
+ 0x8a0369ab32fe9a04L },
+ { 0xea068d608c927db8L,0xbf5f37cf94655741L,0x47d402a204b6c7eaL,
+ 0x4551c2956af259cbL } },
+ /* 58 << 126 */
+ { { 0x698b71e7ed77ee8bL,0xbddf7bd0f309d5c7L,0x6201c22c34e780caL,
+ 0xab04f7d84c295ef4L },
+ { 0x1c9472944313a8ceL,0xe532e4ac92ca4cfeL,0x89738f80d0a7a97aL,
+ 0xec088c88a580fd5bL } },
+ /* 59 << 126 */
+ { { 0x612b1ecc42ce9e51L,0x8f9840fdb25fdd2aL,0x3cda78c001e7f839L,
+ 0x546b3d3aece05480L },
+ { 0x271719a980d30916L,0x45497107584c20c4L,0xaf8f94785bc78608L,
+ 0x28c7d484277e2a4cL } },
+ /* 60 << 126 */
+ { { 0xfce0176788a2ffe4L,0xdc506a3528e169a5L,0x0ea108617af9c93aL,
+ 0x1ed2436103fa0e08L },
+ { 0x96eaaa92a3d694e7L,0xc0f43b4def50bc74L,0xce6aa58c64114db4L,
+ 0x8218e8ea7c000fd4L } },
+ /* 61 << 126 */
+ { { 0xac815dfb185f8844L,0xcd7e90cb1557abfbL,0x23d16655afbfecdfL,
+ 0x80f3271f085cac4aL },
+ { 0x7fc39aa7d0e62f47L,0x88d519d1460a48e5L,0x59559ac4d28f101eL,
+ 0x7981d9e9ca9ae816L } },
+ /* 62 << 126 */
+ { { 0x5c38652c9ac38203L,0x86eaf87f57657fe5L,0x568fc472e21f5416L,
+ 0x2afff39ce7e597b5L },
+ { 0x3adbbb07256d4eabL,0x225986928285ab89L,0x35f8112a041caefeL,
+ 0x95df02e3a5064c8bL } },
+ /* 63 << 126 */
+ { { 0x4d63356ec7004bf3L,0x230a08f4db83c7deL,0xca27b2708709a7b7L,
+ 0x0d1c4cc4cb9abd2dL },
+ { 0x8a0bc66e7550fee8L,0x369cd4c79cf7247eL,0x75562e8492b5b7e7L,
+ 0x8fed0da05802af7bL } },
+ /* 64 << 126 */
+ { { 0x6a7091c2e48fb889L,0x26882c137b8a9d06L,0xa24986631b82a0e2L,
+ 0x844ed7363518152dL },
+ { 0x282f476fd86e27c7L,0xa04edaca04afefdcL,0x8b256ebc6119e34dL,
+ 0x56a413e90787d78bL } },
+ /* 0 << 133 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 133 */
+ { { 0x82ee061d5a74be50L,0xe41781c4dea16ff5L,0xe0b0c81e99bfc8a2L,
+ 0x624f4d690b547e2dL },
+ { 0x3a83545dbdcc9ae4L,0x2573dbb6409b1e8eL,0x482960c4a6c93539L,
+ 0xf01059ad5ae18798L } },
+ /* 2 << 133 */
+ { { 0x715c9f973112795fL,0xe8244437984e6ee1L,0x55cb4858ecb66bcdL,
+ 0x7c136735abaffbeeL },
+ { 0x546615955dbec38eL,0x51c0782c388ad153L,0x9ba4c53ac6e0952fL,
+ 0x27e6782a1b21dfa8L } },
+ /* 3 << 133 */
+ { { 0x682f903d4ed2dbc2L,0x0eba59c87c3b2d83L,0x8e9dc84d9c7e9335L,
+ 0x5f9b21b00eb226d7L },
+ { 0xe33bd394af267baeL,0xaa86cc25be2e15aeL,0x4f0bf67d6a8ec500L,
+ 0x5846aa44f9630658L } },
+ /* 4 << 133 */
+ { { 0xfeb09740e2c2bf15L,0x627a2205a9e99704L,0xec8d73d0c2fbc565L,
+ 0x223eed8fc20c8de8L },
+ { 0x1ee32583a8363b49L,0x1a0b6cb9c9c2b0a6L,0x49f7c3d290dbc85cL,
+ 0xa8dfbb971ef4c1acL } },
+ /* 5 << 133 */
+ { { 0xafb34d4c65c7c2abL,0x1d4610e7e2c5ea84L,0x893f6d1b973c4ab5L,
+ 0xa3cdd7e9945ba5c4L },
+ { 0x60514983064417eeL,0x1459b23cad6bdf2bL,0x23b2c3415cf726c3L,
+ 0x3a82963532d6354aL } },
+ /* 6 << 133 */
+ { { 0x294f901fab192c18L,0xec5fcbfe7030164fL,0xe2e2fcb7e2246ba6L,
+ 0x1e7c88b3221a1a0cL },
+ { 0x72c7dd93c92d88c5L,0x41c2148e1106fb59L,0x547dd4f5a0f60f14L,
+ 0xed9b52b263960f31L } },
+ /* 7 << 133 */
+ { { 0x6c8349ebb0a5b358L,0xb154c5c29e7e2ed6L,0xcad5eccfeda462dbL,
+ 0xf2d6dbe42de66b69L },
+ { 0x426aedf38665e5b2L,0x488a85137b7f5723L,0x15cc43b38bcbb386L,
+ 0x27ad0af3d791d879L } },
+ /* 8 << 133 */
+ { { 0xc16c236e846e364fL,0x7f33527cdea50ca0L,0xc48107750926b86dL,
+ 0x6c2a36090598e70cL },
+ { 0xa6755e52f024e924L,0xe0fa07a49db4afcaL,0x15c3ce7d66831790L,
+ 0x5b4ef350a6cbb0d6L } },
+ /* 9 << 133 */
+ { { 0x2c4aafc4b6205969L,0x42563f02f6c7854fL,0x016aced51d983b48L,
+ 0xfeb356d899949755L },
+ { 0x8c2a2c81d1a39bd7L,0x8f44340fe6934ae9L,0x148cf91c447904daL,
+ 0x7340185f0f51a926L } },
+ /* 10 << 133 */
+ { { 0x2f8f00fb7409ab46L,0x057e78e680e289b2L,0x03e5022ca888e5d1L,
+ 0x3c87111a9dede4e2L },
+ { 0x5b9b0e1c7809460bL,0xe751c85271c9abc7L,0x8b944e28c7cc1dc9L,
+ 0x4f201ffa1d3cfa08L } },
+ /* 11 << 133 */
+ { { 0x02fc905c3e6721ceL,0xd52d70dad0b3674cL,0x5dc2e5ca18810da4L,
+ 0xa984b2735c69dd99L },
+ { 0x63b9252784de5ca4L,0x2f1c9872c852dec4L,0x18b03593c2e3de09L,
+ 0x19d70b019813dc2fL } },
+ /* 12 << 133 */
+ { { 0x42806b2da6dc1d29L,0xd3030009f871e144L,0xa1feb333aaf49276L,
+ 0xb5583b9ec70bc04bL },
+ { 0x1db0be7895695f20L,0xfc84181189d012b5L,0x6409f27205f61643L,
+ 0x40d34174d5883128L } },
+ /* 13 << 133 */
+ { { 0xd79196f567419833L,0x6059e252863b7b08L,0x84da18171c56700cL,
+ 0x5758ee56b28d3ec4L },
+ { 0x7da2771d013b0ea6L,0xfddf524b54c5e9b9L,0x7df4faf824305d80L,
+ 0x58f5c1bf3a97763fL } },
+ /* 14 << 133 */
+ { { 0xa5af37f17c696042L,0xd4cba22c4a2538deL,0x211cb9959ea42600L,
+ 0xcd105f417b069889L },
+ { 0xb1e1cf19ddb81e74L,0x472f2d895157b8caL,0x086fb008ee9db885L,
+ 0x365cd5700f26d131L } },
+ /* 15 << 133 */
+ { { 0x284b02bba2be7053L,0xdcbbf7c67ab9a6d6L,0x4425559c20f7a530L,
+ 0x961f2dfa188767c8L },
+ { 0xe2fd943570dc80c4L,0x104d6b63f0784120L,0x7f592bc153567122L,
+ 0xf6bc1246f688ad77L } },
+ /* 16 << 133 */
+ { { 0x05214c050f15dde9L,0xa47a76a80d5f2b82L,0xbb254d3062e82b62L,
+ 0x11a05fe03ec955eeL },
+ { 0x7eaff46e9d529b36L,0x55ab13018f9e3df6L,0xc463e37199317698L,
+ 0xfd251438ccda47adL } },
+ /* 17 << 133 */
+ { { 0xca9c354723d695eaL,0x48ce626e16e589b5L,0x6b5b64c7b187d086L,
+ 0xd02e1794b2207948L },
+ { 0x8b58e98f7198111dL,0x90ca6305dcf9c3ccL,0x5691fe72f34089b0L,
+ 0x60941af1fc7c80ffL } },
+ /* 18 << 133 */
+ { { 0xa09bc0a222eb51e5L,0xc0bb7244aa9cf09aL,0x36a8077f80159f06L,
+ 0x8b5c989edddc560eL },
+ { 0x19d2f316512e1f43L,0x02eac554ad08ff62L,0x012ab84c07d20b4eL,
+ 0x37d1e115d6d4e4e1L } },
+ /* 19 << 133 */
+ { { 0xb6443e1aab7b19a8L,0xf08d067edef8cd45L,0x63adf3e9685e03daL,
+ 0xcf15a10e4792b916L },
+ { 0xf44bcce5b738a425L,0xebe131d59636b2fdL,0x940688417850d605L,
+ 0x09684eaab40d749dL } },
+ /* 20 << 133 */
+ { { 0x8c3c669c72ba075bL,0x89f78b55ba469015L,0x5706aade3e9f8ba8L,
+ 0x6d8bd565b32d7ed7L },
+ { 0x25f4e63b805f08d6L,0x7f48200dc3bcc1b5L,0x4e801968b025d847L,
+ 0x74afac0487cbe0a8L } },
+ /* 21 << 133 */
+ { { 0x43ed2c2b7e63d690L,0xefb6bbf00223cdb8L,0x4fec3cae2884d3feL,
+ 0x065ecce6d75e25a4L },
+ { 0x6c2294ce69f79071L,0x0d9a8e5f044b8666L,0x5009f23817b69d8fL,
+ 0x3c29f8fec5dfdaf7L } },
+ /* 22 << 133 */
+ { { 0x9067528febae68c4L,0x5b38563230c5ba21L,0x540df1191fdd1aecL,
+ 0xcf37825bcfba4c78L },
+ { 0x77eff980beb11454L,0x40a1a99160c1b066L,0xe8018980f889a1c7L,
+ 0xb9c52ae976c24be0L } },
+ /* 23 << 133 */
+ { { 0x05fbbcce45650ef4L,0xae000f108aa29ac7L,0x884b71724f04c470L,
+ 0x7cd4fde219bb5c25L },
+ { 0x6477b22ae8840869L,0xa88688595fbd0686L,0xf23cc02e1116dfbaL,
+ 0x76cd563fd87d7776L } },
+ /* 24 << 133 */
+ { { 0xe2a37598a9d82abfL,0x5f188ccbe6c170f5L,0x816822005066b087L,
+ 0xda22c212c7155adaL },
+ { 0x151e5d3afbddb479L,0x4b606b846d715b99L,0x4a73b54bf997cb2eL,
+ 0x9a1bfe433ecd8b66L } },
+ /* 25 << 133 */
+ { { 0x1c3128092a67d48aL,0xcd6a671e031fa9e2L,0xbec3312a0e43a34aL,
+ 0x1d93563955ef47d3L },
+ { 0x5ea024898fea73eaL,0x8247b364a035afb2L,0xb58300a65265b54cL,
+ 0x3286662f722c7148L } },
+ /* 26 << 133 */
+ { { 0xb77fd76bb4ec4c20L,0xf0a12fa70f3fe3fdL,0xf845bbf541d8c7e8L,
+ 0xe4d969ca5ec10aa8L },
+ { 0x4c0053b743e232a3L,0xdc7a3fac37f8a45aL,0x3c4261c520d81c8fL,
+ 0xfd4b3453b00eab00L } },
+ /* 27 << 133 */
+ { { 0x76d48f86d36e3062L,0x626c5277a143ff02L,0x538174deaf76f42eL,
+ 0x2267aa866407ceacL },
+ { 0xfad7635172e572d5L,0xab861af7ba7330ebL,0xa0a1c8c7418d8657L,
+ 0x988821cb20289a52L } },
+ /* 28 << 133 */
+ { { 0x79732522cccc18adL,0xaadf3f8df1a6e027L,0xf7382c9317c2354dL,
+ 0x5ce1680cd818b689L },
+ { 0x359ebbfcd9ecbee9L,0x4330689c1cae62acL,0xb55ce5b4c51ac38aL,
+ 0x7921dfeafe238ee8L } },
+ /* 29 << 133 */
+ { { 0x3972bef8271d1ca5L,0x3e423bc7e8aabd18L,0x57b09f3f44a3e5e3L,
+ 0x5da886ae7b444d66L },
+ { 0x68206634a9964375L,0x356a2fa3699cd0ffL,0xaf0faa24dba515e9L,
+ 0x536e1f5cb321d79aL } },
+ /* 30 << 133 */
+ { { 0xd3b9913a5c04e4eaL,0xd549dcfed6f11513L,0xee227bf579fd1d94L,
+ 0x9f35afeeb43f2c67L },
+ { 0xd2638d24f1314f53L,0x62baf948cabcd822L,0x5542de294ef48db0L,
+ 0xb3eb6a04fc5f6bb2L } },
+ /* 31 << 133 */
+ { { 0x23c110ae1208e16aL,0x1a4d15b5f8363e24L,0x30716844164be00bL,
+ 0xa8e24824f6f4690dL },
+ { 0x548773a290b170cfL,0xa1bef33142f191f4L,0x70f418d09247aa97L,
+ 0xea06028e48be9147L } },
+ /* 32 << 133 */
+ { { 0xe13122f3dbfb894eL,0xbe9b79f6ce274b18L,0x85a49de5ca58aadfL,
+ 0x2495775811487351L },
+ { 0x111def61bb939099L,0x1d6a974a26d13694L,0x4474b4ced3fc253bL,
+ 0x3a1485e64c5db15eL } },
+ /* 33 << 133 */
+ { { 0xe79667b4147c15b4L,0xe34f553b7bc61301L,0x032b80f817094381L,
+ 0x55d8bafd723eaa21L },
+ { 0x5a987995f1c0e74eL,0x5a9b292eebba289cL,0x413cd4b2eb4c8251L,
+ 0x98b5d243d162db0aL } },
+ /* 34 << 133 */
+ { { 0xbb47bf6668342520L,0x08d68949baa862d1L,0x11f349c7e906abcdL,
+ 0x454ce985ed7bf00eL },
+ { 0xacab5c9eb55b803bL,0xb03468ea31e3c16dL,0x5c24213dd273bf12L,
+ 0x211538eb71587887L } },
+ /* 35 << 133 */
+ { { 0x198e4a2f731dea2dL,0xd5856cf274ed7b2aL,0x86a632eb13a664feL,
+ 0x932cd909bda41291L },
+ { 0x850e95d4c0c4ddc0L,0xc0f422f8347fc2c9L,0xe68cbec486076bcbL,
+ 0xf9e7c0c0cd6cd286L } },
+ /* 36 << 133 */
+ { { 0x65994ddb0f5f27caL,0xe85461fba80d59ffL,0xff05481a66601023L,
+ 0xc665427afc9ebbfbL },
+ { 0xb0571a697587fd52L,0x935289f88d49efceL,0x61becc60ea420688L,
+ 0xb22639d913a786afL } },
+ /* 37 << 133 */
+ { { 0x1a8e6220361ecf90L,0x001f23e025506463L,0xe4ae9b5d0a5c2b79L,
+ 0xebc9cdadd8149db5L },
+ { 0xb33164a1934aa728L,0x750eb00eae9b60f3L,0x5a91615b9b9cfbfdL,
+ 0x97015cbfef45f7f6L } },
+ /* 38 << 133 */
+ { { 0xb462c4a5bf5151dfL,0x21adcc41b07118f2L,0xd60c545b043fa42cL,
+ 0xfc21aa54e96be1abL },
+ { 0xe84bc32f4e51ea80L,0x3dae45f0259b5d8dL,0xbb73c7ebc38f1b5eL,
+ 0xe405a74ae8ae617dL } },
+ /* 39 << 133 */
+ { { 0xbb1ae9c69f1c56bdL,0x8c176b9849f196a4L,0xc448f3116875092bL,
+ 0xb5afe3de9f976033L },
+ { 0xa8dafd49145813e5L,0x687fc4d9e2b34226L,0xf2dfc92d4c7ff57fL,
+ 0x004e3fc1401f1b46L } },
+ /* 40 << 133 */
+ { { 0x5afddab61430c9abL,0x0bdd41d32238e997L,0xf0947430418042aeL,
+ 0x71f9addacdddc4cbL },
+ { 0x7090c016c52dd907L,0xd9bdf44d29e2047fL,0xe6f1fe801b1011a6L,
+ 0xb63accbcd9acdc78L } },
+ /* 41 << 133 */
+ { { 0xcfc7e2351272a95bL,0x0c667717a6276ac8L,0x3c0d3709e2d7eef7L,
+ 0x5add2b069a685b3eL },
+ { 0x363ad32d14ea5d65L,0xf8e01f068d7dd506L,0xc9ea221375b4aac6L,
+ 0xed2a2bf90d353466L } },
+ /* 42 << 133 */
+ { { 0x439d79b5e9d3a7c3L,0x8e0ee5a681b7f34bL,0xcf3dacf51dc4ba75L,
+ 0x1d3d1773eb3310c7L },
+ { 0xa8e671127747ae83L,0x31f43160197d6b40L,0x0521cceecd961400L,
+ 0x67246f11f6535768L } },
+ /* 43 << 133 */
+ { { 0x702fcc5aef0c3133L,0x247cc45d7e16693bL,0xfd484e49c729b749L,
+ 0x522cef7db218320fL },
+ { 0xe56ef40559ab93b3L,0x225fba119f181071L,0x33bd659515330ed0L,
+ 0xc4be69d51ddb32f7L } },
+ /* 44 << 133 */
+ { { 0x264c76680448087cL,0xac30903f71432daeL,0x3851b26600f9bf47L,
+ 0x400ed3116cdd6d03L },
+ { 0x045e79fef8fd2424L,0xfdfd974afa6da98bL,0x45c9f6410c1e673aL,
+ 0x76f2e7335b2c5168L } },
+ /* 45 << 133 */
+ { { 0x1adaebb52a601753L,0xb286514cc57c2d49L,0xd87696701e0bfd24L,
+ 0x950c547e04478922L },
+ { 0xd1d41969e5d32bfeL,0x30bc1472750d6c3eL,0x8f3679fee0e27f3aL,
+ 0x8f64a7dca4a6ee0cL } },
+ /* 46 << 133 */
+ { { 0x2fe59937633dfb1fL,0xea82c395977f2547L,0xcbdfdf1a661ea646L,
+ 0xc7ccc591b9085451L },
+ { 0x8217796281761e13L,0xda57596f9196885cL,0xbc17e84928ffbd70L,
+ 0x1e6e0a412671d36fL } },
+ /* 47 << 133 */
+ { { 0x61ae872c4152fcf5L,0x441c87b09e77e754L,0xd0799dd5a34dff09L,
+ 0x766b4e4488a6b171L },
+ { 0xdc06a51211f1c792L,0xea02ae934be35c3eL,0xe5ca4d6de90c469eL,
+ 0x4df4368e56e4ff5cL } },
+ /* 48 << 133 */
+ { { 0x7817acab4baef62eL,0x9f5a2202a85b91e8L,0x9666ebe66ce57610L,
+ 0x32ad31f3f73bfe03L },
+ { 0x628330a425bcf4d6L,0xea950593515056e6L,0x59811c89e1332156L,
+ 0xc89cf1fe8c11b2d7L } },
+ /* 49 << 133 */
+ { { 0x75b6391304e60cc0L,0xce811e8d4625d375L,0x030e43fc2d26e562L,
+ 0xfbb30b4b608d36a0L },
+ { 0x634ff82c48528118L,0x7c6fe085cd285911L,0x7f2830c099358f28L,
+ 0x2e60a95e665e6c09L } },
+ /* 50 << 133 */
+ { { 0x08407d3d9b785dbfL,0x530889aba759bce7L,0xf228e0e652f61239L,
+ 0x2b6d14616879be3cL },
+ { 0xe6902c0451a7bbf7L,0x30ad99f076f24a64L,0x66d9317a98bc6da0L,
+ 0xf4f877f3cb596ac0L } },
+ /* 51 << 133 */
+ { { 0xb05ff62d4c44f119L,0x4555f536e9b77416L,0xc7c0d0598caed63bL,
+ 0x0cd2b7cec358b2a9L },
+ { 0x3f33287b46945fa3L,0xf8785b20d67c8791L,0xc54a7a619637bd08L,
+ 0x54d4598c18be79d7L } },
+ /* 52 << 133 */
+ { { 0x889e5acbc46d7ce1L,0x9a515bb78b085877L,0xfac1a03d0b7a5050L,
+ 0x7d3e738af2926035L },
+ { 0x861cc2ce2a6cb0ebL,0x6f2e29558f7adc79L,0x61c4d45133016376L,
+ 0xd9fd2c805ad59090L } },
+ /* 53 << 133 */
+ { { 0xe5a83738b2b836a1L,0x855b41a07c0d6622L,0x186fe3177cc19af1L,
+ 0x6465c1fffdd99acbL },
+ { 0x46e5c23f6974b99eL,0x75a7cf8ba2717cbeL,0x4d2ebc3f062be658L,
+ 0x094b44475f209c98L } },
+ /* 54 << 133 */
+ { { 0x4af285edb940cb5aL,0x6706d7927cc82f10L,0xc8c8776c030526faL,
+ 0xfa8e6f76a0da9140L },
+ { 0x77ea9d34591ee4f0L,0x5f46e33740274166L,0x1bdf98bbea671457L,
+ 0xd7c08b46862a1fe2L } },
+ /* 55 << 133 */
+ { { 0x46cc303c1c08ad63L,0x995434404c845e7bL,0x1b8fbdb548f36bf7L,
+ 0x5b82c3928c8273a7L },
+ { 0x08f712c4928435d5L,0x071cf0f179330380L,0xc74c2d24a8da054aL,
+ 0xcb0e720143c46b5cL } },
+ /* 56 << 133 */
+ { { 0x0ad7337ac0b7eff3L,0x8552225ec5e48b3cL,0xe6f78b0c73f13a5fL,
+ 0x5e70062e82349cbeL },
+ { 0x6b8d5048e7073969L,0x392d2a29c33cb3d2L,0xee4f727c4ecaa20fL,
+ 0xa068c99e2ccde707L } },
+ /* 57 << 133 */
+ { { 0xfcd5651fb87a2913L,0xea3e3c153cc252f0L,0x777d92df3b6cd3e4L,
+ 0x7a414143c5a732e7L },
+ { 0xa895951aa71ff493L,0xfe980c92bbd37cf6L,0x45bd5e64decfeeffL,
+ 0x910dc2a9a44c43e9L } },
+ /* 58 << 133 */
+ { { 0xcb403f26cca9f54dL,0x928bbdfb9303f6dbL,0x3c37951ea9eee67cL,
+ 0x3bd61a52f79961c3L },
+ { 0x09a238e6395c9a79L,0x6940ca2d61eb352dL,0x7d1e5c5ec1875631L,
+ 0x1e19742c1e1b20d1L } },
+ /* 59 << 133 */
+ { { 0x4633d90823fc2e6eL,0xa76e29a908959149L,0x61069d9c84ed7da5L,
+ 0x0baa11cf5dbcad51L },
+ { 0xd01eec64961849daL,0x93b75f1faf3d8c28L,0x57bc4f9f1ca2ee44L,
+ 0x5a26322d00e00558L } },
+ /* 60 << 133 */
+ { { 0x1888d65861a023efL,0x1d72aab4b9e5246eL,0xa9a26348e5563ec0L,
+ 0xa0971963c3439a43L },
+ { 0x567dd54badb9b5b7L,0x73fac1a1c45a524bL,0x8fe97ef7fe38e608L,
+ 0x608748d23f384f48L } },
+ /* 61 << 133 */
+ { { 0xb0571794c486094fL,0x869254a38bf3a8d6L,0x148a8dd1310b0e25L,
+ 0x99ab9f3f9aa3f7d8L },
+ { 0x0927c68a6706c02eL,0x22b5e76c69790e6cL,0x6c3252606c71376cL,
+ 0x53a5769009ef6657L } },
+ /* 62 << 133 */
+ { { 0x8d63f852edffcf3aL,0xb4d2ed043c0a6f55L,0xdb3aa8de12519b9eL,
+ 0x5d38e9c41e0a569aL },
+ { 0x871528bf303747e2L,0xa208e77cf5b5c18dL,0x9d129c88ca6bf923L,
+ 0xbcbf197fbf02839fL } },
+ /* 63 << 133 */
+ { { 0x9b9bf03027323194L,0x3b055a8b339ca59dL,0xb46b23120f669520L,
+ 0x19789f1f497e5f24L },
+ { 0x9c499468aaf01801L,0x72ee11908b69d59cL,0x8bd39595acf4c079L,
+ 0x3ee11ece8e0cd048L } },
+ /* 64 << 133 */
+ { { 0xebde86ec1ed66f18L,0x225d906bd61fce43L,0x5cab07d6e8bed74dL,
+ 0x16e4617f27855ab7L },
+ { 0x6568aaddb2fbc3ddL,0xedb5484f8aeddf5bL,0x878f20e86dcf2fadL,
+ 0x3516497c615f5699L } },
+ /* 0 << 140 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 140 */
+ { { 0xef0a3fecfa181e69L,0x9ea02f8130d69a98L,0xb2e9cf8e66eab95dL,
+ 0x520f2beb24720021L },
+ { 0x621c540a1df84361L,0x1203772171fa6d5dL,0x6e3c7b510ff5f6ffL,
+ 0x817a069babb2bef3L } },
+ /* 2 << 140 */
+ { { 0x83572fb6b294cda6L,0x6ce9bf75b9039f34L,0x20e012f0095cbb21L,
+ 0xa0aecc1bd063f0daL },
+ { 0x57c21c3af02909e5L,0xc7d59ecf48ce9cdcL,0x2732b8448ae336f8L,
+ 0x056e37233f4f85f4L } },
+ /* 3 << 140 */
+ { { 0x8a10b53189e800caL,0x50fe0c17145208fdL,0x9e43c0d3b714ba37L,
+ 0x427d200e34189accL },
+ { 0x05dee24fe616e2c0L,0x9c25f4c8ee1854c1L,0x4d3222a58f342a73L,
+ 0x0807804fa027c952L } },
+ /* 4 << 140 */
+ { { 0xc222653a4f0d56f3L,0x961e4047ca28b805L,0x2c03f8b04a73434bL,
+ 0x4c966787ab712a19L },
+ { 0xcc196c42864fee42L,0xc1be93da5b0ece5cL,0xa87d9f22c131c159L,
+ 0x2bb6d593dce45655L } },
+ /* 5 << 140 */
+ { { 0x22c49ec9b809b7ceL,0x8a41486be2c72c2cL,0x813b9420fea0bf36L,
+ 0xb3d36ee9a66dac69L },
+ { 0x6fddc08a328cc987L,0x0a3bcd2c3a326461L,0x7103c49dd810dbbaL,
+ 0xf9d81a284b78a4c4L } },
+ /* 6 << 140 */
+ { { 0x3de865ade4d55941L,0xdedafa5e30384087L,0x6f414abb4ef18b9bL,
+ 0x9ee9ea42faee5268L },
+ { 0x260faa1637a55a4aL,0xeb19a514015f93b9L,0x51d7ebd29e9c3598L,
+ 0x523fc56d1932178eL } },
+ /* 7 << 140 */
+ { { 0x501d070cb98fe684L,0xd60fbe9a124a1458L,0xa45761c892bc6b3fL,
+ 0xf5384858fe6f27cbL },
+ { 0x4b0271f7b59e763bL,0x3d4606a95b5a8e5eL,0x1eda5d9b05a48292L,
+ 0xda7731d0e6fec446L } },
+ /* 8 << 140 */
+ { { 0xa3e3369390d45871L,0xe976404006166d8dL,0xb5c3368289a90403L,
+ 0x4bd1798372f1d637L },
+ { 0xa616679ed5d2c53aL,0x5ec4bcd8fdcf3b87L,0xae6d7613b66a694eL,
+ 0x7460fc76e3fc27e5L } },
+ /* 9 << 140 */
+ { { 0x70469b8295caabeeL,0xde024ca5889501e3L,0x6bdadc06076ed265L,
+ 0x0cb1236b5a0ef8b2L },
+ { 0x4065ddbf0972ebf9L,0xf1dd387522aca432L,0xa88b97cf744aff76L,
+ 0xd1359afdfe8e3d24L } },
+ /* 10 << 140 */
+ { { 0x52a3ba2b91502cf3L,0x2c3832a8084db75dL,0x04a12dddde30b1c9L,
+ 0x7802eabce31fd60cL },
+ { 0x33707327a37fddabL,0x65d6f2abfaafa973L,0x3525c5b811e6f91aL,
+ 0x76aeb0c95f46530bL } },
+ /* 11 << 140 */
+ { { 0xe8815ff62f93a675L,0xa6ec968405f48679L,0x6dcbb556358ae884L,
+ 0x0af61472e19e3873L },
+ { 0x72334372a5f696beL,0xc65e57ea6f22fb70L,0x268da30c946cea90L,
+ 0x136a8a8765681b2aL } },
+ /* 12 << 140 */
+ { { 0xad5e81dc0f9f44d4L,0xf09a69602c46585aL,0xd1649164c447d1b1L,
+ 0x3b4b36c8879dc8b1L },
+ { 0x20d4177b3b6b234cL,0x096a25051730d9d0L,0x0611b9b8ef80531dL,
+ 0xba904b3b64bb495dL } },
+ /* 13 << 140 */
+ { { 0x1192d9d493a3147aL,0x9f30a5dc9a565545L,0x90b1f9cb6ef07212L,
+ 0x299585460d87fc13L },
+ { 0xd3323effc17db9baL,0xcb18548ccb1644a8L,0x18a306d44f49ffbcL,
+ 0x28d658f14c2e8684L } },
+ /* 14 << 140 */
+ { { 0x44ba60cda99f8c71L,0x67b7abdb4bf742ffL,0x66310f9c914b3f99L,
+ 0xae430a32f412c161L },
+ { 0x1e6776d388ace52fL,0x4bc0fa2452d7067dL,0x03c286aa8f07cd1bL,
+ 0x4cb8f38ca985b2c1L } },
+ /* 15 << 140 */
+ { { 0x83ccbe808c3bff36L,0x005a0bd25263e575L,0x460d7dda259bdcd1L,
+ 0x4a1c5642fa5cab6bL },
+ { 0x2b7bdbb99fe4fc88L,0x09418e28cc97bbb5L,0xd8274fb4a12321aeL,
+ 0xb137007d5c87b64eL } },
+ /* 16 << 140 */
+ { { 0x80531fe1c63c4962L,0x50541e89981fdb25L,0xdc1291a1fd4c2b6bL,
+ 0xc0693a17a6df4fcaL },
+ { 0xb2c4604e0117f203L,0x245f19630a99b8d0L,0xaedc20aac6212c44L,
+ 0xb1ed4e56520f52a8L } },
+ /* 17 << 140 */
+ { { 0xfe48f575f8547be3L,0x0a7033cda9e45f98L,0x4b45d3a918c50100L,
+ 0xb2a6cd6aa61d41daL },
+ { 0x60bbb4f557933c6bL,0xa7538ebd2b0d7ffcL,0x9ea3ab8d8cd626b6L,
+ 0x8273a4843601625aL } },
+ /* 18 << 140 */
+ { { 0x888598450168e508L,0x8cbc9bb299a94abdL,0x713ac792fab0a671L,
+ 0xa3995b196c9ebffcL },
+ { 0xe711668e1239e152L,0x56892558bbb8dff4L,0x8bfc7dabdbf17963L,
+ 0x5b59fe5ab3de1253L } },
+ /* 19 << 140 */
+ { { 0x7e3320eb34a9f7aeL,0xe5e8cf72d751efe4L,0x7ea003bcd9be2f37L,
+ 0xc0f551a0b6c08ef7L },
+ { 0x56606268038f6725L,0x1dd38e356d92d3b6L,0x07dfce7cc3cbd686L,
+ 0x4e549e04651c5da8L } },
+ /* 20 << 140 */
+ { { 0x4058f93b08b19340L,0xc2fae6f4cac6d89dL,0x4bad8a8c8f159cc7L,
+ 0x0ddba4b3cb0b601cL },
+ { 0xda4fc7b51dd95f8cL,0x1d163cd7cea5c255L,0x30707d06274a8c4cL,
+ 0x79d9e0082802e9ceL } },
+ /* 21 << 140 */
+ { { 0x02a29ebfe6ddd505L,0x37064e74b50bed1aL,0x3f6bae65a7327d57L,
+ 0x3846f5f1f83920bcL },
+ { 0x87c3749160df1b9bL,0x4cfb28952d1da29fL,0x10a478ca4ed1743cL,
+ 0x390c60303edd47c6L } },
+ /* 22 << 140 */
+ { { 0x8f3e53128c0a78deL,0xccd02bda1e85df70L,0xd6c75c03a61b6582L,
+ 0x0762921cfc0eebd1L },
+ { 0xd34d0823d85010c0L,0xd73aaacb0044cf1fL,0xfb4159bba3b5e78aL,
+ 0x2287c7f7e5826f3fL } },
+ /* 23 << 140 */
+ { { 0x4aeaf742580b1a01L,0xf080415d60423b79L,0xe12622cda7dea144L,
+ 0x49ea499659d62472L },
+ { 0xb42991ef571f3913L,0x0610f214f5b25a8aL,0x47adc58530b79e8fL,
+ 0xf90e3df607a065a2L } },
+ /* 24 << 140 */
+ { { 0x5d0a5deb43e2e034L,0x53fb5a34444024aaL,0xa8628c686b0c9f7fL,
+ 0x9c69c29cac563656L },
+ { 0x5a231febbace47b6L,0xbdce02899ea5a2ecL,0x05da1fac9463853eL,
+ 0x96812c52509e78aaL } },
+ /* 25 << 140 */
+ { { 0xd3fb577157151692L,0xeb2721f8d98e1c44L,0xc050608732399be1L,
+ 0xda5a5511d979d8b8L },
+ { 0x737ed55dc6f56780L,0xe20d30040dc7a7f4L,0x02ce7301f5941a03L,
+ 0x91ef5215ed30f83aL } },
+ /* 26 << 140 */
+ { { 0x28727fc14092d85fL,0x72d223c65c49e41aL,0xa7cf30a2ba6a4d81L,
+ 0x7c086209b030d87dL },
+ { 0x04844c7dfc588b09L,0x728cd4995874bbb0L,0xcc1281eee84c0495L,
+ 0x0769b5baec31958fL } },
+ /* 27 << 140 */
+ { { 0x665c228bf99c2471L,0xf2d8a11b191eb110L,0x4594f494d36d7024L,
+ 0x482ded8bcdcb25a1L },
+ { 0xc958a9d8dadd4885L,0x7004477ef1d2b547L,0x0a45f6ef2a0af550L,
+ 0x4fc739d62f8d6351L } },
+ /* 28 << 140 */
+ { { 0x75cdaf27786f08a9L,0x8700bb2642c2737fL,0x855a71411c4e2670L,
+ 0x810188c115076fefL },
+ { 0xc251d0c9abcd3297L,0xae4c8967f48108ebL,0xbd146de718ceed30L,
+ 0xf9d4f07ac986bcedL } },
+ /* 29 << 140 */
+ { { 0x5ad98ed583fa1e08L,0x7780d33ebeabd1fbL,0xe330513c903b1196L,
+ 0xba11de9ea47bc8c4L },
+ { 0x684334da02c2d064L,0x7ecf360da48de23bL,0x57a1b4740a9089d8L,
+ 0xf28fa439ff36734cL } },
+ /* 30 << 140 */
+ { { 0xf2a482cbea4570b3L,0xee65d68ba5ebcee9L,0x988d0036b9694cd5L,
+ 0x53edd0e937885d32L },
+ { 0xe37e3307beb9bc6dL,0xe9abb9079f5c6768L,0x4396ccd551f2160fL,
+ 0x2500888c47336da6L } },
+ /* 31 << 140 */
+ { { 0x383f9ed9926fce43L,0x809dd1c704da2930L,0x30f6f5968a4cb227L,
+ 0x0d700c7f73a56b38L },
+ { 0x1825ea33ab64a065L,0xaab9b7351338df80L,0x1516100d9b63f57fL,
+ 0x2574395a27a6a634L } },
+ /* 32 << 140 */
+ { { 0xb5560fb6700a1acdL,0xe823fd73fd999681L,0xda915d1f6cb4e1baL,
+ 0x0d0301186ebe00a3L },
+ { 0x744fb0c989fca8cdL,0x970d01dbf9da0e0bL,0x0ad8c5647931d76fL,
+ 0xb15737bff659b96aL } },
+ /* 33 << 140 */
+ { { 0xdc9933e8a8b484e7L,0xb2fdbdf97a26dec7L,0x2349e9a49f1f0136L,
+ 0x7860368e70fddddbL },
+ { 0xd93d2c1cf9ad3e18L,0x6d6c5f17689f4e79L,0x7a544d91b24ff1b6L,
+ 0x3e12a5ebfe16cd8cL } },
+ /* 34 << 140 */
+ { { 0x543574e9a56b872fL,0xa1ad550cfcf68ea2L,0x689e37d23f560ef7L,
+ 0x8c54b9cac9d47a8bL },
+ { 0x46d40a4a088ac342L,0xec450c7c1576c6d0L,0xb589e31c1f9689e9L,
+ 0xdacf2602b8781718L } },
+ /* 35 << 140 */
+ { { 0xa89237c6c8cb6b42L,0x1326fc93b96ef381L,0x55d56c6db5f07825L,
+ 0xacba2eea7449e22dL },
+ { 0x74e0887a633c3000L,0xcb6cd172d7cbcf71L,0x309e81dec36cf1beL,
+ 0x07a18a6d60ae399bL } },
+ /* 36 << 140 */
+ { { 0xb36c26799edce57eL,0x52b892f4df001d41L,0xd884ae5d16a1f2c6L,
+ 0x9b329424efcc370aL },
+ { 0x3120daf2bd2e21dfL,0x55298d2d02470a99L,0x0b78af6ca05db32eL,
+ 0x5c76a331601f5636L } },
+ /* 37 << 140 */
+ { { 0xaae861fff8a4f29cL,0x70dc9240d68f8d49L,0x960e649f81b1321cL,
+ 0x3d2c801b8792e4ceL },
+ { 0xf479f77242521876L,0x0bed93bc416c79b1L,0xa67fbc05263e5bc9L,
+ 0x01e8e630521db049L } },
+ /* 38 << 140 */
+ { { 0x76f26738c6f3431eL,0xe609cb02e3267541L,0xb10cff2d818c877cL,
+ 0x1f0e75ce786a13cbL },
+ { 0xf4fdca641158544dL,0x5d777e896cb71ed0L,0x3c233737a9aa4755L,
+ 0x7b453192e527ab40L } },
+ /* 39 << 140 */
+ { { 0xdb59f68839f05ffeL,0x8f4f4be06d82574eL,0xcce3450cee292d1bL,
+ 0xaa448a1261ccd086L },
+ { 0xabce91b3f7914967L,0x4537f09b1908a5edL,0xa812421ef51042e7L,
+ 0xfaf5cebcec0b3a34L } },
+ /* 40 << 140 */
+ { { 0x730ffd874ca6b39aL,0x70fb72ed02efd342L,0xeb4735f9d75c8edbL,
+ 0xc11f2157c278aa51L },
+ { 0xc459f635bf3bfebfL,0x3a1ff0b46bd9601fL,0xc9d12823c420cb73L,
+ 0x3e9af3e23c2915a3L } },
+ /* 41 << 140 */
+ { { 0xe0c82c72b41c3440L,0x175239e5e3039a5fL,0xe1084b8a558795a3L,
+ 0x328d0a1dd01e5c60L },
+ { 0x0a495f2ed3788a04L,0x25d8ff1666c11a9fL,0xf5155f059ed692d6L,
+ 0x954fa1074f425fe4L } },
+ /* 42 << 140 */
+ { { 0xd16aabf2e98aaa99L,0x90cd8ba096b0f88aL,0x957f4782c154026aL,
+ 0x54ee073452af56d2L },
+ { 0xbcf89e5445b4147aL,0x3d102f219a52816cL,0x6808517e39b62e77L,
+ 0x92e2542169169ad8L } },
+ /* 43 << 140 */
+ { { 0xd721d871bb608558L,0x60e4ebaef6d4ff9bL,0x0ba1081941f2763eL,
+ 0xca2e45be51ee3247L },
+ { 0x66d172ec2bfd7a5fL,0x528a8f2f74d0b12dL,0xe17f1e38dabe70dcL,
+ 0x1d5d73169f93983cL } },
+ /* 44 << 140 */
+ { { 0x51b2184adf423e31L,0xcb417291aedb1a10L,0x2054ca93625bcab9L,
+ 0x54396860a98998f0L },
+ { 0x4e53f6c4a54ae57eL,0x0ffeb590ee648e9dL,0xfbbdaadc6afaf6bcL,
+ 0xf88ae796aa3bfb8aL } },
+ /* 45 << 140 */
+ { { 0x209f1d44d2359ed9L,0xac68dd03f3544ce2L,0xf378da47fd51e569L,
+ 0xe1abd8602cc80097L },
+ { 0x23ca18d9343b6e3aL,0x480797e8b40a1baeL,0xd1f0c717533f3e67L,
+ 0x4489697006e6cdfcL } },
+ /* 46 << 140 */
+ { { 0x8ca2105552a82e8dL,0xb2caf78578460cdcL,0x4c1b7b62e9037178L,
+ 0xefc09d2cdb514b58L },
+ { 0x5f2df9ee9113be5cL,0x2fbda78fb3f9271cL,0xe09a81af8f83fc54L,
+ 0x06b138668afb5141L } },
+ /* 47 << 140 */
+ { { 0x38f6480f43e3865dL,0x72dd77a81ddf47d9L,0xf2a8e9714c205ff7L,
+ 0x46d449d89d088ad8L },
+ { 0x926619ea185d706fL,0xe47e02ebc7dd7f62L,0xe7f120a78cbc2031L,
+ 0xc18bef00998d4ac9L } },
+ /* 48 << 140 */
+ { { 0x18f37a9c6bdf22daL,0xefbc432f90dc82dfL,0xc52cef8e5d703651L,
+ 0x82887ba0d99881a5L },
+ { 0x7cec9ddab920ec1dL,0xd0d7e8c3ec3e8d3bL,0x445bc3954ca88747L,
+ 0xedeaa2e09fd53535L } },
+ /* 49 << 140 */
+ { { 0x461b1d936cc87475L,0xd92a52e26d2383bdL,0xfabccb59d7903546L,
+ 0x6111a7613d14b112L },
+ { 0x0ae584feb3d5f612L,0x5ea69b8d60e828ecL,0x6c07898554087030L,
+ 0x649cab04ac4821feL } },
+ /* 50 << 140 */
+ { { 0x25ecedcf8bdce214L,0xb5622f7286af7361L,0x0e1227aa7038b9e2L,
+ 0xd0efb273ac20fa77L },
+ { 0x817ff88b79df975bL,0x856bf2861999503eL,0xb4d5351f5038ec46L,
+ 0x740a52c5fc42af6eL } },
+ /* 51 << 140 */
+ { { 0x2e38bb152cbb1a3fL,0xc3eb99fe17a83429L,0xca4fcbf1dd66bb74L,
+ 0x880784d6cde5e8fcL },
+ { 0xddc84c1cb4e7a0beL,0x8780510dbd15a72fL,0x44bcf1af81ec30e1L,
+ 0x141e50a80a61073eL } },
+ /* 52 << 140 */
+ { { 0x0d95571847be87aeL,0x68a61417f76a4372L,0xf57e7e87c607c3d3L,
+ 0x043afaf85252f332L },
+ { 0xcc14e1211552a4d2L,0xb6dee692bb4d4ab4L,0xb6ab74c8a03816a4L,
+ 0x84001ae46f394a29L } },
+ /* 53 << 140 */
+ { { 0x5bed8344d795fb45L,0x57326e7db79f55a5L,0xc9533ce04accdffcL,
+ 0x53473caf3993fa04L },
+ { 0x7906eb93a13df4c8L,0xa73e51f697cbe46fL,0xd1ab3ae10ae4ccf8L,
+ 0x256145088a5b3dbcL } },
+ /* 54 << 140 */
+ { { 0x61eff96211a71b27L,0xdf71412b6bb7fa39L,0xb31ba6b82bd7f3efL,
+ 0xb0b9c41569180d29L },
+ { 0xeec14552014cdde5L,0x702c624b227b4bbbL,0x2b15e8c2d3e988f3L,
+ 0xee3bcc6da4f7fd04L } },
+ /* 55 << 140 */
+ { { 0x9d00822a42ac6c85L,0x2db0cea61df9f2b7L,0xd7cad2ab42de1e58L,
+ 0x346ed5262d6fbb61L },
+ { 0xb39629951a2faf09L,0x2fa8a5807c25612eL,0x30ae04da7cf56490L,
+ 0x756629080eea3961L } },
+ /* 56 << 140 */
+ { { 0x3609f5c53d080847L,0xcb081d395241d4f6L,0xb4fb381077961a63L,
+ 0xc20c59842abb66fcL },
+ { 0x3d40aa7cf902f245L,0x9cb127364e536b1eL,0x5eda24da99b3134fL,
+ 0xafbd9c695cd011afL } },
+ /* 57 << 140 */
+ { { 0x9a16e30ac7088c7dL,0x5ab657103207389fL,0x1b09547fe7407a53L,
+ 0x2322f9d74fdc6eabL },
+ { 0xc0f2f22d7430de4dL,0x19382696e68ca9a9L,0x17f1eff1918e5868L,
+ 0xe3b5b635586f4204L } },
+ /* 58 << 140 */
+ { { 0x146ef9803fbc4341L,0x359f2c805b5eed4eL,0x9f35744e7482e41dL,
+ 0x9a9ac3ecf3b224c2L },
+ { 0x9161a6fe91fc50aeL,0x89ccc66bc613fa7cL,0x89268b14c732f15aL,
+ 0x7cd6f4e2b467ed03L } },
+ /* 59 << 140 */
+ { { 0xfbf79869ce56b40eL,0xf93e094cc02dde98L,0xefe0c3a8edee2cd7L,
+ 0x90f3ffc0b268fd42L },
+ { 0x81a7fd5608241aedL,0x95ab7ad800b1afe8L,0x401270563e310d52L,
+ 0xd3ffdeb109d9fc43L } },
+ /* 60 << 140 */
+ { { 0xc8f85c91d11a8594L,0x2e74d25831cf6db8L,0x829c7ca302b5dfd0L,
+ 0xe389cfbe69143c86L },
+ { 0xd01b6405941768d8L,0x4510399503bf825dL,0xcc4ee16656cd17e2L,
+ 0xbea3c283ba037e79L } },
+ /* 61 << 140 */
+ { { 0x4e1ac06ed9a47520L,0xfbfe18aaaf852404L,0x5615f8e28087648aL,
+ 0x7301e47eb9d150d9L },
+ { 0x79f9f9ddb299b977L,0x76697a7ba5b78314L,0x10d674687d7c90e7L,
+ 0x7afffe03937210b5L } },
+ /* 62 << 140 */
+ { { 0x5aef3e4b28c22ceeL,0xefb0ecd809fd55aeL,0x4cea71320d2a5d6aL,
+ 0x9cfb5fa101db6357L },
+ { 0x395e0b57f36e1ac5L,0x008fa9ad36cafb7dL,0x8f6cdf705308c4dbL,
+ 0x51527a3795ed2477L } },
+ /* 63 << 140 */
+ { { 0xba0dee305bd21311L,0x6ed41b22909c90d7L,0xc5f6b7587c8696d3L,
+ 0x0db8eaa83ce83a80L },
+ { 0xd297fe37b24b4b6fL,0xfe58afe8522d1f0dL,0x973587368c98dbd9L,
+ 0x6bc226ca9454a527L } },
+ /* 64 << 140 */
+ { { 0xa12b384ece53c2d0L,0x779d897d5e4606daL,0xa53e47b073ec12b0L,
+ 0x462dbbba5756f1adL },
+ { 0x69fe09f2cafe37b6L,0x273d1ebfecce2e17L,0x8ac1d5383cf607fdL,
+ 0x8035f7ff12e10c25L } },
+ /* 0 << 147 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 147 */
+ { { 0x854d34c77e6c5520L,0xc27df9efdcb9ea58L,0x405f2369d686666dL,
+ 0x29d1febf0417aa85L },
+ { 0x9846819e93470afeL,0x3e6a9669e2a27f9eL,0x24d008a2e31e6504L,
+ 0xdba7cecf9cb7680aL } },
+ /* 2 << 147 */
+ { { 0xecaff541338d6e43L,0x56f7dd734541d5ccL,0xb5d426de96bc88caL,
+ 0x48d94f6b9ed3a2c3L },
+ { 0x6354a3bb2ef8279cL,0xd575465b0b1867f2L,0xef99b0ff95225151L,
+ 0xf3e19d88f94500d8L } },
+ /* 3 << 147 */
+ { { 0x92a83268e32dd620L,0x913ec99f627849a2L,0xedd8fdfa2c378882L,
+ 0xaf96f33eee6f8cfeL },
+ { 0xc06737e5dc3fa8a5L,0x236bb531b0b03a1dL,0x33e59f2989f037b0L,
+ 0x13f9b5a7d9a12a53L } },
+ /* 4 << 147 */
+ { { 0x0d0df6ce51efb310L,0xcb5b2eb4958df5beL,0xd6459e2936158e59L,
+ 0x82aae2b91466e336L },
+ { 0xfb658a39411aa636L,0x7152ecc5d4c0a933L,0xf10c758a49f026b7L,
+ 0xf4837f97cb09311fL } },
+ /* 5 << 147 */
+ { { 0xddfb02c4c753c45fL,0x18ca81b6f9c840feL,0x846fd09ab0f8a3e6L,
+ 0xb1162adde7733dbcL },
+ { 0x7070ad20236e3ab6L,0xf88cdaf5b2a56326L,0x05fc8719997cbc7aL,
+ 0x442cd4524b665272L } },
+ /* 6 << 147 */
+ { { 0x7807f364b71698f5L,0x6ba418d29f7b605eL,0xfd20b00fa03b2cbbL,
+ 0x883eca37da54386fL },
+ { 0xff0be43ff3437f24L,0xe910b432a48bb33cL,0x4963a128329df765L,
+ 0xac1dd556be2fe6f7L } },
+ /* 7 << 147 */
+ { { 0x557610f924a0a3fcL,0x38e17bf4e881c3f9L,0x6ba84fafed0dac99L,
+ 0xd4a222c359eeb918L },
+ { 0xc79c1dbe13f542b6L,0x1fc65e0de425d457L,0xeffb754f1debb779L,
+ 0x638d8fd09e08af60L } },
+ /* 8 << 147 */
+ { { 0x994f523a626332d5L,0x7bc388335561bb44L,0x005ed4b03d845ea2L,
+ 0xd39d3ee1c2a1f08aL },
+ { 0x6561fdd3e7676b0dL,0x620e35fffb706017L,0x36ce424ff264f9a8L,
+ 0xc4c3419fda2681f7L } },
+ /* 9 << 147 */
+ { { 0xfb6afd2f69beb6e8L,0x3a50b9936d700d03L,0xc840b2ad0c83a14fL,
+ 0x573207be54085befL },
+ { 0x5af882e309fe7e5bL,0x957678a43b40a7e1L,0x172d4bdd543056e2L,
+ 0x9c1b26b40df13c0aL } },
+ /* 10 << 147 */
+ { { 0x1c30861cf405ff06L,0xebac86bd486e828bL,0xe791a971636933fcL,
+ 0x50e7c2be7aeee947L },
+ { 0xc3d4a095fa90d767L,0xae60eb7be670ab7bL,0x17633a64397b056dL,
+ 0x93a21f33105012aaL } },
+ /* 11 << 147 */
+ { { 0x663c370babb88643L,0x91df36d722e21599L,0x183ba8358b761671L,
+ 0x381eea1d728f3bf1L },
+ { 0xb9b2f1ba39966e6cL,0x7c464a28e7295492L,0x0fd5f70a09b26b7fL,
+ 0xa9aba1f9fbe009dfL } },
+ /* 12 << 147 */
+ { { 0x857c1f22369b87adL,0x3c00e5d932fca556L,0x1ad74cab90b06466L,
+ 0xa7112386550faaf2L },
+ { 0x7435e1986d9bd5f5L,0x2dcc7e3859c3463fL,0xdc7df748ca7bd4b2L,
+ 0x13cd4c089dec2f31L } },
+ /* 13 << 147 */
+ { { 0x0d3b5df8e3237710L,0x0dadb26ecbd2f7b0L,0x9f5966abe4aa082bL,
+ 0x666ec8de350e966eL },
+ { 0x1bfd1ed5ee524216L,0xcd93c59b41dab0b6L,0x658a8435d186d6baL,
+ 0x1b7d34d2159d1195L } },
+ /* 14 << 147 */
+ { { 0x5936e46022caf46bL,0x6a45dd8f9a96fe4fL,0xf7925434b98f474eL,
+ 0x414104120053ef15L },
+ { 0x71cf8d1241de97bfL,0xb8547b61bd80bef4L,0xb47d3970c4db0037L,
+ 0xf1bcd328fef20dffL } },
+ /* 15 << 147 */
+ { { 0x31a92e0910caad67L,0x1f5919605531a1e1L,0x3bb852e05f4fc840L,
+ 0x63e297ca93a72c6cL },
+ { 0x3c2b0b2e49abad67L,0x6ec405fced3db0d9L,0xdc14a5307fef1d40L,
+ 0xccd19846280896fcL } },
+ /* 16 << 147 */
+ { { 0x00f831769bb81648L,0xd69eb485653120d0L,0xd17d75f44ccabc62L,
+ 0x34a07f82b749fcb1L },
+ { 0x2c3af787bbfb5554L,0xb06ed4d062e283f8L,0x5722889fa19213a0L,
+ 0x162b085edcf3c7b4L } },
+ /* 17 << 147 */
+ { { 0xbcaecb31e0dd3ecaL,0xc6237fbce52f13a5L,0xcc2b6b0327bac297L,
+ 0x2ae1cac5b917f54aL },
+ { 0x474807d47845ae4fL,0xfec7dd92ce5972e0L,0xc3bd25411d7915bbL,
+ 0x66f85dc4d94907caL } },
+ /* 18 << 147 */
+ { { 0xd981b888bdbcf0caL,0xd75f5da6df279e9fL,0x128bbf247054e934L,
+ 0x3c6ff6e581db134bL },
+ { 0x795b7cf4047d26e4L,0xf370f7b85049ec37L,0xc6712d4dced945afL,
+ 0xdf30b5ec095642bcL } },
+ /* 19 << 147 */
+ { { 0x9b034c624896246eL,0x5652c016ee90bbd1L,0xeb38636f87fedb73L,
+ 0x5e32f8470135a613L },
+ { 0x0703b312cf933c83L,0xd05bb76e1a7f47e6L,0x825e4f0c949c2415L,
+ 0x569e56227250d6f8L } },
+ /* 20 << 147 */
+ { { 0xbbe9eb3a6568013eL,0x8dbd203f22f243fcL,0x9dbd7694b342734aL,
+ 0x8f6d12f846afa984L },
+ { 0xb98610a2c9eade29L,0xbab4f32347dd0f18L,0x5779737b671c0d46L,
+ 0x10b6a7c6d3e0a42aL } },
+ /* 21 << 147 */
+ { { 0xfb19ddf33035b41cL,0xd336343f99c45895L,0x61fe493854c857e5L,
+ 0xc4d506beae4e57d5L },
+ { 0x3cd8c8cbbbc33f75L,0x7281f08a9262c77dL,0x083f4ea6f11a2823L,
+ 0x8895041e9fba2e33L } },
+ /* 22 << 147 */
+ { { 0xfcdfea499c438edfL,0x7678dcc391edba44L,0xf07b3b87e2ba50f0L,
+ 0xc13888ef43948c1bL },
+ { 0xc2135ad41140af42L,0x8e5104f3926ed1a7L,0xf24430cb88f6695fL,
+ 0x0ce0637b6d73c120L } },
+ /* 23 << 147 */
+ { { 0xb2db01e6fe631e8fL,0x1c5563d7d7bdd24bL,0x8daea3ba369ad44fL,
+ 0x000c81b68187a9f9L },
+ { 0x5f48a951aae1fd9aL,0xe35626c78d5aed8aL,0x209527630498c622L,
+ 0x76d17634773aa504L } },
+ /* 24 << 147 */
+ { { 0x36d90ddaeb300f7aL,0x9dcf7dfcedb5e801L,0x645cb26874d5244cL,
+ 0xa127ee79348e3aa2L },
+ { 0x488acc53575f1dbbL,0x95037e8580e6161eL,0x57e59283292650d0L,
+ 0xabe67d9914938216L } },
+ /* 25 << 147 */
+ { { 0x3c7f944b3f8e1065L,0xed908cb6330e8924L,0x08ee8fd56f530136L,
+ 0x2227b7d5d7ffc169L },
+ { 0x4f55c893b5cd6dd5L,0x82225e11a62796e8L,0x5c6cead1cb18e12cL,
+ 0x4381ae0c84f5a51aL } },
+ /* 26 << 147 */
+ { { 0x345913d37fafa4c8L,0x3d9180820491aac0L,0x9347871f3e69264cL,
+ 0xbea9dd3cb4f4f0cdL },
+ { 0xbda5d0673eadd3e7L,0x0033c1b80573bcd8L,0x255893795da2486cL,
+ 0xcb89ee5b86abbee7L } },
+ /* 27 << 147 */
+ { { 0x8fe0a8f322532e5dL,0xb6410ff0727dfc4cL,0x619b9d58226726dbL,
+ 0x5ec256697a2b2dc7L },
+ { 0xaf4d2e064c3beb01L,0x852123d07acea556L,0x0e9470faf783487aL,
+ 0x75a7ea045664b3ebL } },
+ /* 28 << 147 */
+ { { 0x4ad78f356798e4baL,0x9214e6e5c7d0e091L,0xc420b488b1290403L,
+ 0x64049e0afc295749L },
+ { 0x03ef5af13ae9841fL,0xdbe4ca19b0b662a6L,0x46845c5ffa453458L,
+ 0xf8dabf1910b66722L } },
+ /* 29 << 147 */
+ { { 0xb650f0aacce2793bL,0x71db851ec5ec47c1L,0x3eb78f3e3b234fa9L,
+ 0xb0c60f35fc0106ceL },
+ { 0x05427121774eadbdL,0x25367fafce323863L,0x7541b5c9cd086976L,
+ 0x4ff069e2dc507ad1L } },
+ /* 30 << 147 */
+ { { 0x741452568776e667L,0x6e76142cb23c6bb5L,0xdbf307121b3a8a87L,
+ 0x60e7363e98450836L },
+ { 0x5741450eb7366d80L,0xe4ee14ca4837dbdfL,0xa765eb9b69d4316fL,
+ 0x04548dca8ef43825L } },
+ /* 31 << 147 */
+ { { 0x9c9f4e4c5ae888ebL,0x733abb5156e9ac99L,0xdaad3c20ba6ac029L,
+ 0x9b8dd3d32ba3e38eL },
+ { 0xa9bb4c920bc5d11aL,0xf20127a79c5f88a3L,0x4f52b06e161d3cb8L,
+ 0x26c1ff096afaf0a6L } },
+ /* 32 << 147 */
+ { { 0x32670d2f7189e71fL,0xc64387485ecf91e7L,0x15758e57db757a21L,
+ 0x427d09f8290a9ce5L },
+ { 0x846a308f38384a7aL,0xaac3acb4b0732b99L,0x9e94100917845819L,
+ 0x95cba111a7ce5e03L } },
+ /* 33 << 147 */
+ { { 0x6f3d4f7fb00009c4L,0xb8396c278ff28b5fL,0xb1a9ae431c97975dL,
+ 0x9d7ba8afe5d9fed5L },
+ { 0x338cf09f34f485b6L,0xbc0ddacc64122516L,0xa450da1205d471feL,
+ 0x4c3a6250628dd8c9L } },
+ /* 34 << 147 */
+ { { 0x69c7d103d1295837L,0xa2893e503807eb2fL,0xd6e1e1debdb41491L,
+ 0xc630745b5e138235L },
+ { 0xc892109e48661ae1L,0x8d17e7ebea2b2674L,0x00ec0f87c328d6b5L,
+ 0x6d858645f079ff9eL } },
+ /* 35 << 147 */
+ { { 0x6cdf243e19115eadL,0x1ce1393e4bac4fcfL,0x2c960ed09c29f25bL,
+ 0x59be4d8e9d388a05L },
+ { 0x0d46e06cd0def72bL,0xb923db5de0342748L,0xf7d3aacd936d4a3dL,
+ 0x558519cc0b0b099eL } },
+ /* 36 << 147 */
+ { { 0x3ea8ebf8827097efL,0x259353dbd054f55dL,0x84c89abc6d2ed089L,
+ 0x5c548b698e096a7cL },
+ { 0xd587f616994b995dL,0x4d1531f6a5845601L,0x792ab31e451fd9f0L,
+ 0xc8b57bb265adf6caL } },
+ /* 37 << 147 */
+ { { 0x68440fcb1cd5ad73L,0xb9c860e66144da4fL,0x2ab286aa8462beb8L,
+ 0xcc6b8fffef46797fL },
+ { 0xac820da420c8a471L,0x69ae05a177ff7fafL,0xb9163f39bfb5da77L,
+ 0xbd03e5902c73ab7aL } },
+ /* 38 << 147 */
+ { { 0x7e862b5eb2940d9eL,0x3c663d864b9af564L,0xd8309031bde3033dL,
+ 0x298231b2d42c5bc6L },
+ { 0x42090d2c552ad093L,0xa4799d1cff854695L,0x0a88b5d6d31f0d00L,
+ 0xf8b40825a2f26b46L } },
+ /* 39 << 147 */
+ { { 0xec29b1edf1bd7218L,0xd491c53b4b24c86eL,0xd2fe588f3395ea65L,
+ 0x6f3764f74456ef15L },
+ { 0xdb43116dcdc34800L,0xcdbcd456c1e33955L,0xefdb554074ab286bL,
+ 0x948c7a51d18c5d7cL } },
+ /* 40 << 147 */
+ { { 0xeb81aa377378058eL,0x41c746a104411154L,0xa10c73bcfb828ac7L,
+ 0x6439be919d972b29L },
+ { 0x4bf3b4b043a2fbadL,0x39e6dadf82b5e840L,0x4f7164086397bd4cL,
+ 0x0f7de5687f1eeccbL } },
+ /* 41 << 147 */
+ { { 0x5865c5a1d2ffbfc1L,0xf74211fa4ccb6451L,0x66368a88c0b32558L,
+ 0x5b539dc29ad7812eL },
+ { 0x579483d02f3af6f6L,0x5213207899934eceL,0x50b9650fdcc9e983L,
+ 0xca989ec9aee42b8aL } },
+ /* 42 << 147 */
+ { { 0x6a44c829d6f62f99L,0x8f06a3094c2a7c0cL,0x4ea2b3a098a0cb0aL,
+ 0x5c547b70beee8364L },
+ { 0x461d40e1682afe11L,0x9e0fc77a7b41c0a8L,0x79e4aefde20d5d36L,
+ 0x2916e52032dd9f63L } },
+ /* 43 << 147 */
+ { { 0xf59e52e83f883fafL,0x396f96392b868d35L,0xc902a9df4ca19881L,
+ 0x0fc96822db2401a6L },
+ { 0x4123758766f1c68dL,0x10fc6de3fb476c0dL,0xf8b6b579841f5d90L,
+ 0x2ba8446cfa24f44aL } },
+ /* 44 << 147 */
+ { { 0xa237b920ef4a9975L,0x60bb60042330435fL,0xd6f4ab5acfb7e7b5L,
+ 0xb2ac509783435391L },
+ { 0xf036ee2fb0d1ea67L,0xae779a6a74c56230L,0x59bff8c8ab838ae6L,
+ 0xcd83ca999b38e6f0L } },
+ /* 45 << 147 */
+ { { 0xbb27bef5e33deed3L,0xe6356f6f001892a8L,0xbf3be6cc7adfbd3eL,
+ 0xaecbc81c33d1ac9dL },
+ { 0xe4feb909e6e861dcL,0x90a247a453f5f801L,0x01c50acb27346e57L,
+ 0xce29242e461acc1bL } },
+ /* 46 << 147 */
+ { { 0x04dd214a2f998a91L,0x271ee9b1d4baf27bL,0x7e3027d1e8c26722L,
+ 0x21d1645c1820dce5L },
+ { 0x086f242c7501779cL,0xf0061407fa0e8009L,0xf23ce47760187129L,
+ 0x05bbdedb0fde9bd0L } },
+ /* 47 << 147 */
+ { { 0x682f483225d98473L,0xf207fe855c658427L,0xb6fdd7ba4166ffa1L,
+ 0x0c3140569eed799dL },
+ { 0x0db8048f4107e28fL,0x74ed387141216840L,0x74489f8f56a3c06eL,
+ 0x1e1c005b12777134L } },
+ /* 48 << 147 */
+ { { 0xdb332a73f37ec3c3L,0xc65259bddd59eba0L,0x2291709cdb4d3257L,
+ 0x9a793b25bd389390L },
+ { 0xf39fe34be43756f0L,0x2f76bdce9afb56c9L,0x9f37867a61208b27L,
+ 0xea1d4307089972c3L } },
+ /* 49 << 147 */
+ { { 0x8c5953308bdf623aL,0x5f5accda8441fb7dL,0xfafa941832ddfd95L,
+ 0x6ad40c5a0fde9be7L },
+ { 0x43faba89aeca8709L,0xc64a7cf12c248a9dL,0x1662025272637a76L,
+ 0xaee1c79122b8d1bbL } },
+ /* 50 << 147 */
+ { { 0xf0f798fd21a843b2L,0x56e4ed4d8d005cb1L,0x355f77801f0d8abeL,
+ 0x197b04cf34522326L },
+ { 0x41f9b31ffd42c13fL,0x5ef7feb2b40f933dL,0x27326f425d60bad4L,
+ 0x027ecdb28c92cf89L } },
+ /* 51 << 147 */
+ { { 0x04aae4d14e3352feL,0x08414d2f73591b90L,0x5ed6124eb7da7d60L,
+ 0xb985b9314d13d4ecL },
+ { 0xa592d3ab96bf36f9L,0x012dbed5bbdf51dfL,0xa57963c0df6c177dL,
+ 0x010ec86987ca29cfL } },
+ /* 52 << 147 */
+ { { 0xba1700f6bf926dffL,0x7c9fdbd1f4bf6bc2L,0xdc18dc8f64da11f5L,
+ 0xa6074b7ad938ae75L },
+ { 0x14270066e84f44a4L,0x99998d38d27b954eL,0xc1be8ab2b4f38e9aL,
+ 0x8bb55bbf15c01016L } },
+ /* 53 << 147 */
+ { { 0xf73472b40ea2ab30L,0xd365a340f73d68ddL,0xc01a716819c2e1ebL,
+ 0x32f49e3734061719L },
+ { 0xb73c57f101d8b4d6L,0x03c8423c26b47700L,0x321d0bc8a4d8826aL,
+ 0x6004213c4bc0e638L } },
+ /* 54 << 147 */
+ { { 0xf78c64a1c1c06681L,0x16e0a16fef018e50L,0x31cbdf91db42b2b3L,
+ 0xf8f4ffcee0d36f58L },
+ { 0xcdcc71cd4cc5e3e0L,0xd55c7cfaa129e3e0L,0xccdb6ba00fb2cbf1L,
+ 0x6aba0005c4bce3cbL } },
+ /* 55 << 147 */
+ { { 0x501cdb30d232cfc4L,0x9ddcf12ed58a3cefL,0x02d2cf9c87e09149L,
+ 0xdc5d7ec72c976257L },
+ { 0x6447986e0b50d7ddL,0x88fdbaf7807f112aL,0x58c9822ab00ae9f6L,
+ 0x6abfb9506d3d27e0L } },
+ /* 56 << 147 */
+ { { 0xd0a744878a429f4fL,0x0649712bdb516609L,0xb826ba57e769b5dfL,
+ 0x82335df21fc7aaf2L },
+ { 0x2389f0675c93d995L,0x59ac367a68677be6L,0xa77985ff21d9951bL,
+ 0x038956fb85011cceL } },
+ /* 57 << 147 */
+ { { 0x608e48cbbb734e37L,0xc08c0bf22be5b26fL,0x17bbdd3bf9b1a0d9L,
+ 0xeac7d89810483319L },
+ { 0xc95c4bafbc1a6deaL,0xfdd0e2bf172aafdbL,0x40373cbc8235c41aL,
+ 0x14303f21fb6f41d5L } },
+ /* 58 << 147 */
+ { { 0xba0636210408f237L,0xcad3b09aecd2d1edL,0x4667855a52abb6a2L,
+ 0xba9157dcaa8b417bL },
+ { 0xfe7f35074f013efbL,0x1b112c4baa38c4a2L,0xa1406a609ba64345L,
+ 0xe53cba336993c80bL } },
+ /* 59 << 147 */
+ { { 0x45466063ded40d23L,0x3d5f1f4d54908e25L,0x9ebefe62403c3c31L,
+ 0x274ea0b50672a624L },
+ { 0xff818d99451d1b71L,0x80e826438f79cf79L,0xa165df1373ce37f5L,
+ 0xa744ef4ffe3a21fdL } },
+ /* 60 << 147 */
+ { { 0x73f1e7f5cf551396L,0xc616898e868c676bL,0x671c28c78c442c36L,
+ 0xcfe5e5585e0a317dL },
+ { 0x1242d8187051f476L,0x56fad2a614f03442L,0x262068bc0a44d0f6L,
+ 0xdfa2cd6ece6edf4eL } },
+ /* 61 << 147 */
+ { { 0x0f43813ad15d1517L,0x61214cb2377d44f5L,0xd399aa29c639b35fL,
+ 0x42136d7154c51c19L },
+ { 0x9774711b08417221L,0x0a5546b352545a57L,0x80624c411150582dL,
+ 0x9ec5c418fbc555bcL } },
+ /* 62 << 147 */
+ { { 0x2c87dcad771849f1L,0xb0c932c501d7bf6fL,0x6aa5cd3e89116eb2L,
+ 0xd378c25a51ca7bd3L },
+ { 0xc612a0da9e6e3e31L,0x0417a54db68ad5d0L,0x00451e4a22c6edb8L,
+ 0x9fbfe019b42827ceL } },
+ /* 63 << 147 */
+ { { 0x2fa92505ba9384a2L,0x21b8596e64ad69c1L,0x8f4fcc49983b35a6L,
+ 0xde09376072754672L },
+ { 0x2f14ccc8f7bffe6dL,0x27566bff5d94263dL,0xb5b4e9c62df3ec30L,
+ 0x94f1d7d53e6ea6baL } },
+ /* 64 << 147 */
+ { { 0x97b7851aaaca5e9bL,0x518aa52156713b97L,0x3357e8c7150a61f6L,
+ 0x7842e7e2ec2c2b69L },
+ { 0x8dffaf656868a548L,0xd963bd82e068fc81L,0x64da5c8b65917733L,
+ 0x927090ff7b247328L } },
+ /* 0 << 154 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 154 */
+ { { 0x214bc9a7d298c241L,0xe3b697ba56807cfdL,0xef1c78024564eadbL,
+ 0xdde8cdcfb48149c5L },
+ { 0x946bf0a75a4d2604L,0x27154d7f6c1538afL,0x95cc9230de5b1fccL,
+ 0xd88519e966864f82L } },
+ /* 2 << 154 */
+ { { 0xb828dd1a7cb1282cL,0xa08d7626be46973aL,0x6baf8d40e708d6b2L,
+ 0x72571fa14daeb3f3L },
+ { 0x85b1732ff22dfd98L,0x87ab01a70087108dL,0xaaaafea85988207aL,
+ 0xccc832f869f00755L } },
+ /* 3 << 154 */
+ { { 0x964d950e36ff3bf0L,0x8ad20f6ff0b34638L,0x4d9177b3b5d7585fL,
+ 0xcf839760ef3f019fL },
+ { 0x582fc5b38288c545L,0x2f8e4e9b13116bd1L,0xf91e1b2f332120efL,
+ 0xcf5687242a17dd23L } },
+ /* 4 << 154 */
+ { { 0x488f1185ca8d9d1aL,0xadf2c77dd987ded2L,0x5f3039f060c46124L,
+ 0xe5d70b7571e095f4L },
+ { 0x82d586506260e70fL,0x39d75ea7f750d105L,0x8cf3d0b175bac364L,
+ 0xf3a7564d21d01329L } },
+ /* 5 << 154 */
+ { { 0x182f04cd2f52d2a7L,0x4fde149ae2df565aL,0xb80c5eeca79fb2f7L,
+ 0xab491d7b22ddc897L },
+ { 0x99d76c18c6312c7fL,0xca0d5f3d6aa41a57L,0x71207325d15363a0L,
+ 0xe82aa265beb252c2L } },
+ /* 6 << 154 */
+ { { 0x94ab4700ec3128c2L,0x6c76d8628e383f49L,0xdc36b150c03024ebL,
+ 0xfb43947753daac69L },
+ { 0xfc68764a8dc79623L,0x5b86995db440fbb2L,0xd66879bfccc5ee0dL,
+ 0x0522894295aa8bd3L } },
+ /* 7 << 154 */
+ { { 0xb51a40a51e6a75c1L,0x24327c760ea7d817L,0x0663018207774597L,
+ 0xd6fdbec397fa7164L },
+ { 0x20c99dfb13c90f48L,0xd6ac5273686ef263L,0xc6a50bdcfef64eebL,
+ 0xcd87b28186fdfc32L } },
+ /* 8 << 154 */
+ { { 0xb24aa43e3fcd3efcL,0xdd26c034b8088e9aL,0xa5ef4dc9bd3d46eaL,
+ 0xa2f99d588a4c6a6fL },
+ { 0xddabd3552f1da46cL,0x72c3f8ce1afacdd1L,0xd90c4eee92d40578L,
+ 0xd28bb41fca623b94L } },
+ /* 9 << 154 */
+ { { 0x50fc0711745edc11L,0x9dd9ad7d3dc87558L,0xce6931fbb49d1e64L,
+ 0x6c77a0a2c98bd0f9L },
+ { 0x62b9a6296baf7cb1L,0xcf065f91ccf72d22L,0x7203cce979639071L,
+ 0x09ae4885f9cb732fL } },
+ /* 10 << 154 */
+ { { 0x5e7c3becee8314f3L,0x1c068aeddbea298fL,0x08d381f17c80acecL,
+ 0x03b56be8e330495bL },
+ { 0xaeffb8f29222882dL,0x95ff38f6c4af8bf7L,0x50e32d351fc57d8cL,
+ 0x6635be5217b444f0L } },
+ /* 11 << 154 */
+ { { 0x04d15276a5177900L,0x4e1dbb47f6858752L,0x5b475622c615796cL,
+ 0xa6fa0387691867bfL },
+ { 0xed7f5d562844c6d0L,0xc633cf9b03a2477dL,0xf6be5c402d3721d6L,
+ 0xaf312eb7e9fd68e6L } },
+ /* 12 << 154 */
+ { { 0x242792d2e7417ce1L,0xff42bc71970ee7f5L,0x1ff4dc6d5c67a41eL,
+ 0x77709b7b20882a58L },
+ { 0x3554731dbe217f2cL,0x2af2a8cd5bb72177L,0x58eee769591dd059L,
+ 0xbb2930c94bba6477L } },
+ /* 13 << 154 */
+ { { 0x863ee0477d930cfcL,0x4c262ad1396fd1f4L,0xf4765bc8039af7e1L,
+ 0x2519834b5ba104f6L },
+ { 0x7cd61b4cd105f961L,0xa5415da5d63bca54L,0x778280a088a1f17cL,
+ 0xc49689492329512cL } },
+ /* 14 << 154 */
+ { { 0x174a9126cecdaa7aL,0xfc8c7e0e0b13247bL,0x29c110d23484c1c4L,
+ 0xf8eb8757831dfc3bL },
+ { 0x022f0212c0067452L,0x3f6f69ee7b9b926cL,0x09032da0ef42daf4L,
+ 0x79f00ade83f80de4L } },
+ /* 15 << 154 */
+ { { 0x6210db7181236c97L,0x74f7685b3ee0781fL,0x4df7da7ba3e41372L,
+ 0x2aae38b1b1a1553eL },
+ { 0x1688e222f6dd9d1bL,0x576954485b8b6487L,0x478d21274b2edeaaL,
+ 0xb2818fa51e85956aL } },
+ /* 16 << 154 */
+ { { 0x1e6adddaf176f2c0L,0x01ca4604e2572658L,0x0a404ded85342ffbL,
+ 0x8cf60f96441838d6L },
+ { 0x9bbc691cc9071c4aL,0xfd58874434442803L,0x97101c85809c0d81L,
+ 0xa7fb754c8c456f7fL } },
+ /* 17 << 154 */
+ { { 0xc95f3c5cd51805e1L,0xab4ccd39b299dca8L,0x3e03d20b47eaf500L,
+ 0xfa3165c1d7b80893L },
+ { 0x005e8b54e160e552L,0xdc4972ba9019d11fL,0x21a6972e0c9a4a7aL,
+ 0xa52c258f37840fd7L } },
+ /* 18 << 154 */
+ { { 0xf8559ff4c1e99d81L,0x08e1a7d6a3c617c0L,0xb398fd43248c6ba7L,
+ 0x6ffedd91d1283794L },
+ { 0x8a6a59d2d629d208L,0xa9d141d53490530eL,0x42f6fc1838505989L,
+ 0x09bf250d479d94eeL } },
+ /* 19 << 154 */
+ { { 0x223ad3b1b3822790L,0x6c5926c093b8971cL,0x609efc7e75f7fa62L,
+ 0x45d66a6d1ec2d989L },
+ { 0x4422d663987d2792L,0x4a73caad3eb31d2bL,0xf06c2ac1a32cb9e6L,
+ 0xd9445c5f91aeba84L } },
+ /* 20 << 154 */
+ { { 0x6af7a1d5af71013fL,0xe68216e50bedc946L,0xf4cba30bd27370a0L,
+ 0x7981afbf870421ccL },
+ { 0x02496a679449f0e1L,0x86cfc4be0a47edaeL,0x3073c936b1feca22L,
+ 0xf569461203f8f8fbL } },
+ /* 21 << 154 */
+ { { 0xd063b723901515eaL,0x4c6c77a5749cf038L,0x6361e360ab9e5059L,
+ 0x596cf171a76a37c0L },
+ { 0x800f53fa6530ae7aL,0x0f5e631e0792a7a6L,0x5cc29c24efdb81c9L,
+ 0xa269e8683f9c40baL } },
+ /* 22 << 154 */
+ { { 0xec14f9e12cb7191eL,0x78ea1bd8e5b08ea6L,0x3c65aa9b46332bb9L,
+ 0x84cc22b3bf80ce25L },
+ { 0x0098e9e9d49d5bf1L,0xcd4ec1c619087da4L,0x3c9d07c5aef6e357L,
+ 0x839a02689f8f64b8L } },
+ /* 23 << 154 */
+ { { 0xc5e9eb62c6d8607fL,0x759689f56aa995e4L,0x70464669bbb48317L,
+ 0x921474bfe402417dL },
+ { 0xcabe135b2a354c8cL,0xd51e52d2812fa4b5L,0xec74109653311fe8L,
+ 0x4f774535b864514bL } },
+ /* 24 << 154 */
+ { { 0xbcadd6715bde48f8L,0xc97038732189bc7dL,0x5d45299ec709ee8aL,
+ 0xd1287ee2845aaff8L },
+ { 0x7d1f8874db1dbf1fL,0xea46588b990c88d6L,0x60ba649a84368313L,
+ 0xd5fdcbce60d543aeL } },
+ /* 25 << 154 */
+ { { 0x90b46d43810d5ab0L,0x6739d8f904d7e5ccL,0x021c1a580d337c33L,
+ 0x00a6116268e67c40L },
+ { 0x95ef413b379f0a1fL,0xfe126605e9e2ab95L,0x67578b852f5f199cL,
+ 0xf5c003292cb84913L } },
+ /* 26 << 154 */
+ { { 0xf795643037577dd8L,0x83b82af429c5fe88L,0x9c1bea26cdbdc132L,
+ 0x589fa0869c04339eL },
+ { 0x033e9538b13799dfL,0x85fa8b21d295d034L,0xdf17f73fbd9ddccaL,
+ 0xf32bd122ddb66334L } },
+ /* 27 << 154 */
+ { { 0x55ef88a7858b044cL,0x1f0d69c25aa9e397L,0x55fd9cc340d85559L,
+ 0xc774df727785ddb2L },
+ { 0x5dcce9f6d3bd2e1cL,0xeb30da20a85dfed0L,0x5ed7f5bbd3ed09c4L,
+ 0x7d42a35c82a9c1bdL } },
+ /* 28 << 154 */
+ { { 0xcf3de9959890272dL,0x75f3432a3e713a10L,0x5e13479fe28227b8L,
+ 0xb8561ea9fefacdc8L },
+ { 0xa6a297a08332aafdL,0x9b0d8bb573809b62L,0xd2fa1cfd0c63036fL,
+ 0x7a16eb55bd64bda8L } },
+ /* 29 << 154 */
+ { { 0x3f5cf5f678e62ddcL,0x2267c45407fd752bL,0x5e361b6b5e437bbeL,
+ 0x95c595018354e075L },
+ { 0xec725f85f2b254d9L,0x844b617d2cb52b4eL,0xed8554f5cf425fb5L,
+ 0xab67703e2af9f312L } },
+ /* 30 << 154 */
+ { { 0x4cc34ec13cf48283L,0xb09daa259c8a705eL,0xd1e9d0d05b7d4f84L,
+ 0x4df6ef64db38929dL },
+ { 0xe16b0763aa21ba46L,0xc6b1d178a293f8fbL,0x0ff5b602d520aabfL,
+ 0x94d671bdc339397aL } },
+ /* 31 << 154 */
+ { { 0x7c7d98cf4f5792faL,0x7c5e0d6711215261L,0x9b19a631a7c5a6d4L,
+ 0xc8511a627a45274dL },
+ { 0x0c16621ca5a60d99L,0xf7fbab88cf5e48cbL,0xab1e6ca2f7ddee08L,
+ 0x83bd08cee7867f3cL } },
+ /* 32 << 154 */
+ { { 0xf7e48e8a2ac13e27L,0x4494f6df4eb1a9f5L,0xedbf84eb981f0a62L,
+ 0x49badc32536438f0L },
+ { 0x50bea541004f7571L,0xbac67d10df1c94eeL,0x253d73a1b727bc31L,
+ 0xb3d01cf230686e28L } },
+ /* 33 << 154 */
+ { { 0x51b77b1b55fd0b8bL,0xa099d183feec3173L,0x202b1fb7670e72b7L,
+ 0xadc88b33a8e1635fL },
+ { 0x34e8216af989d905L,0xc2e68d2029b58d01L,0x11f81c926fe55a93L,
+ 0x15f1462a8f296f40L } },
+ /* 34 << 154 */
+ { { 0x1915d375ea3d62f2L,0xa17765a301c8977dL,0x7559710ae47b26f6L,
+ 0xe0bd29c8535077a5L },
+ { 0x615f976d08d84858L,0x370dfe8569ced5c1L,0xbbc7503ca734fa56L,
+ 0xfbb9f1ec91ac4574L } },
+ /* 35 << 154 */
+ { { 0x95d7ec53060dd7efL,0xeef2dacd6e657979L,0x54511af3e2a08235L,
+ 0x1e324aa41f4aea3dL },
+ { 0x550e7e71e6e67671L,0xbccd5190bf52faf7L,0xf880d316223cc62aL,
+ 0x0d402c7e2b32eb5dL } },
+ /* 36 << 154 */
+ { { 0xa40bc039306a5a3bL,0x4e0a41fd96783a1bL,0xa1e8d39a0253cdd4L,
+ 0x6480be26c7388638L },
+ { 0xee365e1d2285f382L,0x188d8d8fec0b5c36L,0x34ef1a481f0f4d82L,
+ 0x1a8f43e1a487d29aL } },
+ /* 37 << 154 */
+ { { 0x8168226d77aefb3aL,0xf69a751e1e72c253L,0x8e04359ae9594df1L,
+ 0x475ffd7dd14c0467L },
+ { 0xb5a2c2b13844e95cL,0x85caf647dd12ef94L,0x1ecd2a9ff1063d00L,
+ 0x1dd2e22923843311L } },
+ /* 38 << 154 */
+ { { 0x38f0e09d73d17244L,0x3ede77468fc653f1L,0xae4459f5dc20e21cL,
+ 0x00db2ffa6a8599eaL },
+ { 0x11682c3930cfd905L,0x4934d074a5c112a6L,0xbdf063c5568bfe95L,
+ 0x779a440a016c441aL } },
+ /* 39 << 154 */
+ { { 0x0c23f21897d6fbdcL,0xd3a5cd87e0776aacL,0xcee37f72d712e8dbL,
+ 0xfb28c70d26f74e8dL },
+ { 0xffe0c728b61301a0L,0xa6282168d3724354L,0x7ff4cb00768ffedcL,
+ 0xc51b308803b02de9L } },
+ /* 40 << 154 */
+ { { 0xa5a8147c3902dda5L,0x35d2f706fe6973b4L,0x5ac2efcfc257457eL,
+ 0x933f48d48700611bL },
+ { 0xc365af884912beb2L,0x7f5a4de6162edf94L,0xc646ba7c0c32f34bL,
+ 0x632c6af3b2091074L } },
+ /* 41 << 154 */
+ { { 0x58d4f2e3753e43a9L,0x70e1d21724d4e23fL,0xb24bf729afede6a6L,
+ 0x7f4a94d8710c8b60L },
+ { 0xaad90a968d4faa6aL,0xd9ed0b32b066b690L,0x52fcd37b78b6dbfdL,
+ 0x0b64615e8bd2b431L } },
+ /* 42 << 154 */
+ { { 0x228e2048cfb9fad5L,0xbeaa386d240b76bdL,0x2d6681c890dad7bcL,
+ 0x3e553fc306d38f5eL },
+ { 0xf27cdb9b9d5f9750L,0x3e85c52ad28c5b0eL,0x190795af5247c39bL,
+ 0x547831ebbddd6828L } },
+ /* 43 << 154 */
+ { { 0xf327a2274a82f424L,0x36919c787e47f89dL,0xe478391943c7392cL,
+ 0xf101b9aa2316fefeL },
+ { 0xbcdc9e9c1c5009d2L,0xfb55ea139cd18345L,0xf5b5e231a3ce77c7L,
+ 0xde6b4527d2f2cb3dL } },
+ /* 44 << 154 */
+ { { 0x10f6a3339bb26f5fL,0x1e85db8e044d85b6L,0xc3697a0894197e54L,
+ 0x65e18cc0a7cb4ea8L },
+ { 0xa38c4f50a471fe6eL,0xf031747a2f13439cL,0x53c4a6bac007318bL,
+ 0xa8da3ee51deccb3dL } },
+ /* 45 << 154 */
+ { { 0x0555b31c558216b1L,0x90c7810c2f79e6c2L,0x9b669f4dfe8eed3cL,
+ 0x70398ec8e0fac126L },
+ { 0xa96a449ef701b235L,0x0ceecdb3eb94f395L,0x285fc368d0cb7431L,
+ 0x0d37bb5216a18c64L } },
+ /* 46 << 154 */
+ { { 0x05110d38b880d2ddL,0xa60f177b65930d57L,0x7da34a67f36235f5L,
+ 0x47f5e17c183816b9L },
+ { 0xc7664b57db394af4L,0x39ba215d7036f789L,0x46d2ca0e2f27b472L,
+ 0xc42647eef73a84b7L } },
+ /* 47 << 154 */
+ { { 0x44bc754564488f1dL,0xaa922708f4cf85d5L,0x721a01d553e4df63L,
+ 0x649c0c515db46cedL },
+ { 0x6bf0d64e3cffcb6cL,0xe3bf93fe50f71d96L,0x75044558bcc194a0L,
+ 0x16ae33726afdc554L } },
+ /* 48 << 154 */
+ { { 0xbfc01adf5ca48f3fL,0x64352f06e22a9b84L,0xcee54da1c1099e4aL,
+ 0xbbda54e8fa1b89c0L },
+ { 0x166a3df56f6e55fbL,0x1ca44a2420176f88L,0x936afd88dfb7b5ffL,
+ 0xe34c24378611d4a0L } },
+ /* 49 << 154 */
+ { { 0x7effbb7586142103L,0x6704ba1b1f34fc4dL,0x7c2a468f10c1b122L,
+ 0x36b3a6108c6aace9L },
+ { 0xabfcc0a775a0d050L,0x066f91973ce33e32L,0xce905ef429fe09beL,
+ 0x89ee25baa8376351L } },
+ /* 50 << 154 */
+ { { 0x2a3ede22fd29dc76L,0x7fd32ed936f17260L,0x0cadcf68284b4126L,
+ 0x63422f08a7951fc8L },
+ { 0x562b24f40807e199L,0xfe9ce5d122ad4490L,0xc2f51b100db2b1b4L,
+ 0xeb3613ffe4541d0dL } },
+ /* 51 << 154 */
+ { { 0xbd2c4a052680813bL,0x527aa55d561b08d6L,0xa9f8a40ea7205558L,
+ 0xe3eea56f243d0becL },
+ { 0x7b853817a0ff58b3L,0xb67d3f651a69e627L,0x0b76bbb9a869b5d6L,
+ 0xa3afeb82546723edL } },
+ /* 52 << 154 */
+ { { 0x5f24416d3e554892L,0x8413b53d430e2a45L,0x99c56aee9032a2a0L,
+ 0x09432bf6eec367b1L },
+ { 0x552850c6daf0ecc1L,0x49ebce555bc92048L,0xdfb66ba654811307L,
+ 0x1b84f7976f298597L } },
+ /* 53 << 154 */
+ { { 0x795904818d1d7a0dL,0xd9fabe033a6fa556L,0xa40f9c59ba9e5d35L,
+ 0xcb1771c1f6247577L },
+ { 0x542a47cae9a6312bL,0xa34b3560552dd8c5L,0xfdf94de00d794716L,
+ 0xd46124a99c623094L } },
+ /* 54 << 154 */
+ { { 0x56b7435d68afe8b4L,0x27f205406c0d8ea1L,0x12b77e1473186898L,
+ 0xdbc3dd467479490fL },
+ { 0x951a9842c03b0c05L,0x8b1b3bb37921bc96L,0xa573b3462b202e0aL,
+ 0x77e4665d47254d56L } },
+ /* 55 << 154 */
+ { { 0x08b70dfcd23e3984L,0xab86e8bcebd14236L,0xaa3e07f857114ba7L,
+ 0x5ac71689ab0ef4f2L },
+ { 0x88fca3840139d9afL,0x72733f8876644af0L,0xf122f72a65d74f4aL,
+ 0x13931577a5626c7aL } },
+ /* 56 << 154 */
+ { { 0xd5b5d9eb70f8d5a4L,0x375adde7d7bbb228L,0x31e88b860c1c0b32L,
+ 0xd1f568c4173edbaaL },
+ { 0x1592fc835459df02L,0x2beac0fb0fcd9a7eL,0xb0a6fdb81b473b0aL,
+ 0xe3224c6f0fe8fc48L } },
+ /* 57 << 154 */
+ { { 0x680bd00ee87edf5bL,0x30385f0220e77cf5L,0xe9ab98c04d42d1b2L,
+ 0x72d191d2d3816d77L },
+ { 0x1564daca0917d9e5L,0x394eab591f8fed7fL,0xa209aa8d7fbb3896L,
+ 0x5564f3b9be6ac98eL } },
+ /* 58 << 154 */
+ { { 0xead21d05d73654efL,0x68d1a9c413d78d74L,0x61e017086d4973a0L,
+ 0x83da350046e6d32aL },
+ { 0x6a3dfca468ae0118L,0xa1b9a4c9d02da069L,0x0b2ff9c7ebab8302L,
+ 0x98af07c3944ba436L } },
+ /* 59 << 154 */
+ { { 0x85997326995f0f9fL,0x467fade071b58bc6L,0x47e4495abd625a2bL,
+ 0xfdd2d01d33c3b8cdL },
+ { 0x2c38ae28c693f9faL,0x48622329348f7999L,0x97bf738e2161f583L,
+ 0x15ee2fa7565e8cc9L } },
+ /* 60 << 154 */
+ { { 0xa1a5c8455777e189L,0xcc10bee0456f2829L,0x8ad95c56da762bd5L,
+ 0x152e2214e9d91da8L },
+ { 0x975b0e727cb23c74L,0xfd5d7670a90c66dfL,0xb5b5b8ad225ffc53L,
+ 0xab6dff73faded2aeL } },
+ /* 61 << 154 */
+ { { 0xebd567816f4cbe9dL,0x0ed8b2496a574bd7L,0x41c246fe81a881faL,
+ 0x91564805c3db9c70L },
+ { 0xd7c12b085b862809L,0x1facd1f155858d7bL,0x7693747caf09e92aL,
+ 0x3b69dcba189a425fL } },
+ /* 62 << 154 */
+ { { 0x0be28e9f967365efL,0x57300eb2e801f5c9L,0x93b8ac6ad583352fL,
+ 0xa2cf1f89cd05b2b7L },
+ { 0x7c0c9b744dcc40ccL,0xfee38c45ada523fbL,0xb49a4dec1099cc4dL,
+ 0x325c377f69f069c6L } },
+ /* 63 << 154 */
+ { { 0xe12458ce476cc9ffL,0x580e0b6cc6d4cb63L,0xd561c8b79072289bL,
+ 0x0377f264a619e6daL },
+ { 0x2668536288e591a5L,0xa453a7bd7523ca2bL,0x8a9536d2c1df4533L,
+ 0xc8e50f2fbe972f79L } },
+ /* 64 << 154 */
+ { { 0xd433e50f6d3549cfL,0x6f33696ffacd665eL,0x695bfdacce11fcb4L,
+ 0x810ee252af7c9860L },
+ { 0x65450fe17159bb2cL,0xf7dfbebe758b357bL,0x2b057e74d69fea72L,
+ 0xd485717a92731745L } },
+ /* 0 << 161 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 161 */
+ { { 0x896c42e8ee36860cL,0xdaf04dfd4113c22dL,0x1adbb7b744104213L,
+ 0xe5fd5fa11fd394eaL },
+ { 0x68235d941a4e0551L,0x6772cfbe18d10151L,0x276071e309984523L,
+ 0xe4e879de5a56ba98L } },
+ /* 2 << 161 */
+ { { 0xaaafafb0285b9491L,0x01a0be881e4c705eL,0xff1d4f5d2ad9caabL,
+ 0x6e349a4ac37a233fL },
+ { 0xcf1c12464a1c6a16L,0xd99e6b6629383260L,0xea3d43665f6d5471L,
+ 0x36974d04ff8cc89bL } },
+ /* 3 << 161 */
+ { { 0xc26c49a1cfe89d80L,0xb42c026dda9c8371L,0xca6c013adad066d2L,
+ 0xfb8f722856a4f3eeL },
+ { 0x08b579ecd850935bL,0x34c1a74cd631e1b3L,0xcb5fe596ac198534L,
+ 0x39ff21f6e1f24f25L } },
+ /* 4 << 161 */
+ { { 0x27f29e148f929057L,0x7a64ae06c0c853dfL,0x256cd18358e9c5ceL,
+ 0x9d9cce82ded092a5L },
+ { 0xcc6e59796e93b7c7L,0xe1e4709231bb9e27L,0xb70b3083aa9e29a0L,
+ 0xbf181a753785e644L } },
+ /* 5 << 161 */
+ { { 0xf53f2c658ead09f7L,0x1335e1d59780d14dL,0x69cc20e0cd1b66bcL,
+ 0x9b670a37bbe0bfc8L },
+ { 0xce53dc8128efbeedL,0x0c74e77c8326a6e5L,0x3604e0d2b88e9a63L,
+ 0xbab38fca13dc2248L } },
+ /* 6 << 161 */
+ { { 0x8ed6e8c85c0a3f1eL,0xbcad24927c87c37fL,0xfdfb62bb9ee3b78dL,
+ 0xeba8e477cbceba46L },
+ { 0x37d38cb0eeaede4bL,0x0bc498e87976deb6L,0xb2944c046b6147fbL,
+ 0x8b123f35f71f9609L } },
+ /* 7 << 161 */
+ { { 0xa155dcc7de79dc24L,0xf1168a32558f69cdL,0xbac215950d1850dfL,
+ 0x15c8295bb204c848L },
+ { 0xf661aa367d8184ffL,0xc396228e30447bdbL,0x11cd5143bde4a59eL,
+ 0xe3a26e3b6beab5e6L } },
+ /* 8 << 161 */
+ { { 0xd3b3a13f1402b9d0L,0x573441c32c7bc863L,0x4b301ec4578c3e6eL,
+ 0xc26fc9c40adaf57eL },
+ { 0x96e71bfd7493cea3L,0xd05d4b3f1af81456L,0xdaca2a8a6a8c608fL,
+ 0x53ef07f60725b276L } },
+ /* 9 << 161 */
+ { { 0x07a5fbd27824fc56L,0x3467521813289077L,0x5bf69fd5e0c48349L,
+ 0xa613ddd3b6aa7875L },
+ { 0x7f78c19c5450d866L,0x46f4409c8f84a481L,0x9f1d192890fce239L,
+ 0x016c4168b2ce44b9L } },
+ /* 10 << 161 */
+ { { 0xbae023f0c7435978L,0xb152c88820e30e19L,0x9c241645e3fa6fafL,
+ 0x735d95c184823e60L },
+ { 0x0319757303955317L,0x0b4b02a9f03b4995L,0x076bf55970274600L,
+ 0x32c5cc53aaf57508L } },
+ /* 11 << 161 */
+ { { 0xe8af6d1f60624129L,0xb7bc5d649a5e2b5eL,0x3814b0485f082d72L,
+ 0x76f267f2ce19677aL },
+ { 0x626c630fb36eed93L,0x55230cd73bf56803L,0x78837949ce2736a0L,
+ 0x0d792d60aa6c55f1L } },
+ /* 12 << 161 */
+ { { 0x0318dbfdd5c7c5d2L,0xb38f8da7072b342dL,0x3569bddc7b8de38aL,
+ 0xf25b5887a1c94842L },
+ { 0xb2d5b2842946ad60L,0x854f29ade9d1707eL,0xaa5159dc2c6a4509L,
+ 0x899f94c057189837L } },
+ /* 13 << 161 */
+ { { 0xcf6adc51f4a55b03L,0x261762de35e3b2d5L,0x4cc4301204827b51L,
+ 0xcd22a113c6021442L },
+ { 0xce2fd61a247c9569L,0x59a50973d152becaL,0x6c835a1163a716d4L,
+ 0xc26455ed187dedcfL } },
+ /* 14 << 161 */
+ { { 0x27f536e049ce89e7L,0x18908539cc890cb5L,0x308909abd83c2aa1L,
+ 0xecd3142b1ab73bd3L },
+ { 0x6a85bf59b3f5ab84L,0x3c320a68f2bea4c6L,0xad8dc5386da4541fL,
+ 0xeaf34eb0b7c41186L } },
+ /* 15 << 161 */
+ { { 0x1c780129977c97c4L,0x5ff9beebc57eb9faL,0xa24d0524c822c478L,
+ 0xfd8eec2a461cd415L },
+ { 0xfbde194ef027458cL,0xb4ff53191d1be115L,0x63f874d94866d6f4L,
+ 0x35c75015b21ad0c9L } },
+ /* 16 << 161 */
+ { { 0xa6b5c9d646ac49d2L,0x42c77c0b83137aa9L,0x24d000fc68225a38L,
+ 0x0f63cfc82fe1e907L },
+ { 0x22d1b01bc6441f95L,0x7d38f719ec8e448fL,0x9b33fa5f787fb1baL,
+ 0x94dcfda1190158dfL } },
+ /* 17 << 161 */
+ { { 0xc47cb3395f6d4a09L,0x6b4f355cee52b826L,0x3d100f5df51b930aL,
+ 0xf4512fac9f668f69L },
+ { 0x546781d5206c4c74L,0xd021d4d4cb4d2e48L,0x494a54c2ca085c2dL,
+ 0xf1dbaca4520850a8L } },
+ /* 18 << 161 */
+ { { 0x63c79326490a1acaL,0xcb64dd9c41526b02L,0xbb772591a2979258L,
+ 0x3f58297048d97846L },
+ { 0xd66b70d17c213ba7L,0xc28febb5e8a0ced4L,0x6b911831c10338c1L,
+ 0x0d54e389bf0126f3L } },
+ /* 19 << 161 */
+ { { 0x7048d4604af206eeL,0x786c88f677e97cb9L,0xd4375ae1ac64802eL,
+ 0x469bcfe1d53ec11cL },
+ { 0xfc9b340d47062230L,0xe743bb57c5b4a3acL,0xfe00b4aa59ef45acL,
+ 0x29a4ef2359edf188L } },
+ /* 20 << 161 */
+ { { 0x40242efeb483689bL,0x2575d3f6513ac262L,0xf30037c80ca6db72L,
+ 0xc9fcce8298864be2L },
+ { 0x84a112ff0149362dL,0x95e575821c4ae971L,0x1fa4b1a8945cf86cL,
+ 0x4525a7340b024a2fL } },
+ /* 21 << 161 */
+ { { 0xe76c8b628f338360L,0x483ff59328edf32bL,0x67e8e90a298b1aecL,
+ 0x9caab338736d9a21L },
+ { 0x5c09d2fd66892709L,0x2496b4dcb55a1d41L,0x93f5fb1ae24a4394L,
+ 0x08c750496fa8f6c1L } },
+ /* 22 << 161 */
+ { { 0xcaead1c2c905d85fL,0xe9d7f7900733ae57L,0x24c9a65cf07cdd94L,
+ 0x7389359ca4b55931L },
+ { 0xf58709b7367e45f7L,0x1f203067cb7e7adcL,0x82444bffc7b72818L,
+ 0x07303b35baac8033L } },
+ /* 23 << 161 */
+ { { 0x1e1ee4e4d13b7ea1L,0xe6489b24e0e74180L,0xa5f2c6107e70ef70L,
+ 0xa1655412bdd10894L },
+ { 0x555ebefb7af4194eL,0x533c1c3c8e89bd9cL,0x735b9b5789895856L,
+ 0x15fb3cd2567f5c15L } },
+ /* 24 << 161 */
+ { { 0x057fed45526f09fdL,0xe8a4f10c8128240aL,0x9332efc4ff2bfd8dL,
+ 0x214e77a0bd35aa31L },
+ { 0x32896d7314faa40eL,0x767867ec01e5f186L,0xc9adf8f117a1813eL,
+ 0xcb6cda7854741795L } },
+ /* 25 << 161 */
+ { { 0xb7521b6d349d51aaL,0xf56b5a9ee3c7b8e9L,0xc6f1e5c932a096dfL,
+ 0x083667c4a3635024L },
+ { 0x365ea13518087f2fL,0xf1b8eaacd136e45dL,0xc8a0e48473aec989L,
+ 0xd75a324b142c9259L } },
+ /* 26 << 161 */
+ { { 0xb7b4d00101dae185L,0x45434e0b9b7a94bcL,0xf54339affbd8cb0bL,
+ 0xdcc4569ee98ef49eL },
+ { 0x7789318a09a51299L,0x81b4d206b2b025d8L,0xf64aa418fae85792L,
+ 0x3e50258facd7baf7L } },
+ /* 27 << 161 */
+ { { 0xdce84cdb2996864bL,0xa2e670891f485fa4L,0xb28b2bb6534c6a5aL,
+ 0x31a7ec6bc94b9d39L },
+ { 0x1d217766d6bc20daL,0x4acdb5ec86761190L,0x6872632873701063L,
+ 0x4d24ee7c2128c29bL } },
+ /* 28 << 161 */
+ { { 0xc072ebd3a19fd868L,0x612e481cdb8ddd3bL,0xb4e1d7541a64d852L,
+ 0x00ef95acc4c6c4abL },
+ { 0x1536d2edaa0a6c46L,0x6129408643774790L,0x54af25e8343fda10L,
+ 0x9ff9d98dfd25d6f2L } },
+ /* 29 << 161 */
+ { { 0x0746af7c468b8835L,0x977a31cb730ecea7L,0xa5096b80c2cf4a81L,
+ 0xaa9868336458c37aL },
+ { 0x6af29bf3a6bd9d34L,0x6a62fe9b33c5d854L,0x50e6c304b7133b5eL,
+ 0x04b601597d6e6848L } },
+ /* 30 << 161 */
+ { { 0x4cd296df5579bea4L,0x10e35ac85ceedaf1L,0x04c4c5fde3bcc5b1L,
+ 0x95f9ee8a89412cf9L },
+ { 0x2c9459ee82b6eb0fL,0x2e84576595c2aaddL,0x774a84aed327fcfeL,
+ 0xd8c937220368d476L } },
+ /* 31 << 161 */
+ { { 0x0dbd5748f83e8a3bL,0xa579aa968d2495f3L,0x535996a0ae496e9bL,
+ 0x07afbfe9b7f9bcc2L },
+ { 0x3ac1dc6d5b7bd293L,0x3b592cff7022323dL,0xba0deb989c0a3e76L,
+ 0x18e78e9f4b197acbL } },
+ /* 32 << 161 */
+ { { 0x211cde10296c36efL,0x7ee8967282c4da77L,0xb617d270a57836daL,
+ 0xf0cd9c319cb7560bL },
+ { 0x01fdcbf7e455fe90L,0x3fb53cbb7e7334f3L,0x781e2ea44e7de4ecL,
+ 0x8adab3ad0b384fd0L } },
+ /* 33 << 161 */
+ { { 0x129eee2f53d64829L,0x7a471e17a261492bL,0xe4f9adb9e4cb4a2cL,
+ 0x3d359f6f97ba2c2dL },
+ { 0x346c67860aacd697L,0x92b444c375c2f8a8L,0xc79fa117d85df44eL,
+ 0x56782372398ddf31L } },
+ /* 34 << 161 */
+ { { 0x60e690f2bbbab3b8L,0x4851f8ae8b04816bL,0xc72046ab9c92e4d2L,
+ 0x518c74a17cf3136bL },
+ { 0xff4eb50af9877d4cL,0x14578d90a919cabbL,0x8218f8c4ac5eb2b6L,
+ 0xa3ccc547542016e4L } },
+ /* 35 << 161 */
+ { { 0x025bf48e327f8349L,0xf3e97346f43cb641L,0xdc2bafdf500f1085L,
+ 0x571678762f063055L },
+ { 0x5bd914b9411925a6L,0x7c078d48a1123de5L,0xee6bf835182b165dL,
+ 0xb11b5e5bba519727L } },
+ /* 36 << 161 */
+ { { 0xe33ea76c1eea7b85L,0x2352b46192d4f85eL,0xf101d334afe115bbL,
+ 0xfabc1294889175a3L },
+ { 0x7f6bcdc05233f925L,0xe0a802dbe77fec55L,0xbdb47b758069b659L,
+ 0x1c5e12def98fbd74L } },
+ /* 37 << 161 */
+ { { 0x869c58c64b8457eeL,0xa5360f694f7ea9f7L,0xe576c09ff460b38fL,
+ 0x6b70d54822b7fb36L },
+ { 0x3fd237f13bfae315L,0x33797852cbdff369L,0x97df25f525b516f9L,
+ 0x46f388f2ba38ad2dL } },
+ /* 38 << 161 */
+ { { 0x656c465889d8ddbbL,0x8830b26e70f38ee8L,0x4320fd5cde1212b0L,
+ 0xc34f30cfe4a2edb2L },
+ { 0xabb131a356ab64b8L,0x7f77f0ccd99c5d26L,0x66856a37bf981d94L,
+ 0x19e76d09738bd76eL } },
+ /* 39 << 161 */
+ { { 0xe76c8ac396238f39L,0xc0a482bea830b366L,0xb7b8eaff0b4eb499L,
+ 0x8ecd83bc4bfb4865L },
+ { 0x971b2cb7a2f3776fL,0xb42176a4f4b88adfL,0xb9617df5be1fa446L,
+ 0x8b32d508cd031bd2L } },
+ /* 40 << 161 */
+ { { 0x1c6bd47d53b618c0L,0xc424f46c6a227923L,0x7303ffdedd92d964L,
+ 0xe971287871b5abf2L },
+ { 0x8f48a632f815561dL,0x85f48ff5d3c055d1L,0x222a14277525684fL,
+ 0xd0d841a067360cc3L } },
+ /* 41 << 161 */
+ { { 0x4245a9260b9267c6L,0xc78913f1cf07f863L,0xaa844c8e4d0d9e24L,
+ 0xa42ad5223d5f9017L },
+ { 0xbd371749a2c989d5L,0x928292dfe1f5e78eL,0x493b383e0a1ea6daL,
+ 0x5136fd8d13aee529L } },
+ /* 42 << 161 */
+ { { 0x860c44b1f2c34a99L,0x3b00aca4bf5855acL,0xabf6aaa0faaf37beL,
+ 0x65f436822a53ec08L },
+ { 0x1d9a5801a11b12e1L,0x78a7ab2ce20ed475L,0x0de1067e9a41e0d5L,
+ 0x30473f5f305023eaL } },
+ /* 43 << 161 */
+ { { 0xdd3ae09d169c7d97L,0x5cd5baa4cfaef9cdL,0x5cd7440b65a44803L,
+ 0xdc13966a47f364deL },
+ { 0x077b2be82b8357c1L,0x0cb1b4c5e9d57c2aL,0x7a4ceb3205ff363eL,
+ 0xf310fa4dca35a9efL } },
+ /* 44 << 161 */
+ { { 0xdbb7b352f97f68c6L,0x0c773b500b02cf58L,0xea2e48213c1f96d9L,
+ 0xffb357b0eee01815L },
+ { 0xb9c924cde0f28039L,0x0b36c95a46a3fbe4L,0x1faaaea45e46db6cL,
+ 0xcae575c31928aaffL } },
+ /* 45 << 161 */
+ { { 0x7f671302a70dab86L,0xfcbd12a971c58cfcL,0xcbef9acfbee0cb92L,
+ 0x573da0b9f8c1b583L },
+ { 0x4752fcfe0d41d550L,0xe7eec0e32155cffeL,0x0fc39fcb545ae248L,
+ 0x522cb8d18065f44eL } },
+ /* 46 << 161 */
+ { { 0x263c962a70cbb96cL,0xe034362abcd124a9L,0xf120db283c2ae58dL,
+ 0xb9a38d49fef6d507L },
+ { 0xb1fd2a821ff140fdL,0xbd162f3020aee7e0L,0x4e17a5d4cb251949L,
+ 0x2aebcb834f7e1c3dL } },
+ /* 47 << 161 */
+ { { 0x608eb25f937b0527L,0xf42e1e47eb7d9997L,0xeba699c4b8a53a29L,
+ 0x1f921c71e091b536L },
+ { 0xcce29e7b5b26bbd5L,0x7a8ef5ed3b61a680L,0xe5ef8043ba1f1c7eL,
+ 0x16ea821718158ddaL } },
+ /* 48 << 161 */
+ { { 0x01778a2b599ff0f9L,0x68a923d78104fc6bL,0x5bfa44dfda694ff3L,
+ 0x4f7199dbf7667f12L },
+ { 0xc06d8ff6e46f2a79L,0x08b5deade9f8131dL,0x02519a59abb4ce7cL,
+ 0xc4f710bcb42aec3eL } },
+ /* 49 << 161 */
+ { { 0x3d77b05778bde41aL,0x6474bf80b4186b5aL,0x048b3f6788c65741L,
+ 0xc64519de03c7c154L },
+ { 0xdf0738460edfcc4fL,0x319aa73748f1aa6bL,0x8b9f8a02ca909f77L,
+ 0x902581397580bfefL } },
+ /* 50 << 161 */
+ { { 0xd8bfd3cac0c22719L,0xc60209e4c9ca151eL,0x7a744ab5d9a1a69cL,
+ 0x6de5048b14937f8fL },
+ { 0x171938d8e115ac04L,0x7df709401c6b16d2L,0xa6aeb6637f8e94e7L,
+ 0xc130388e2a2cf094L } },
+ /* 51 << 161 */
+ { { 0x1850be8477f54e6eL,0x9f258a7265d60fe5L,0xff7ff0c06c9146d6L,
+ 0x039aaf90e63a830bL },
+ { 0x38f27a739460342fL,0x4703148c3f795f8aL,0x1bb5467b9681a97eL,
+ 0x00931ba5ecaeb594L } },
+ /* 52 << 161 */
+ { { 0xcdb6719d786f337cL,0xd9c01cd2e704397dL,0x0f4a3f20555c2fefL,
+ 0x004525097c0af223L },
+ { 0x54a5804784db8e76L,0x3bacf1aa93c8aa06L,0x11ca957cf7919422L,
+ 0x5064105378cdaa40L } },
+ /* 53 << 161 */
+ { { 0x7a3038749f7144aeL,0x170c963f43d4acfdL,0x5e14814958ddd3efL,
+ 0xa7bde5829e72dba8L },
+ { 0x0769da8b6fa68750L,0xfa64e532572e0249L,0xfcaadf9d2619ad31L,
+ 0x87882daaa7b349cdL } },
+ /* 54 << 161 */
+ { { 0x9f6eb7316c67a775L,0xcb10471aefc5d0b1L,0xb433750ce1b806b2L,
+ 0x19c5714d57b1ae7eL },
+ { 0xc0dc8b7bed03fd3fL,0xdd03344f31bc194eL,0xa66c52a78c6320b5L,
+ 0x8bc82ce3d0b6fd93L } },
+ /* 55 << 161 */
+ { { 0xf8e13501b35f1341L,0xe53156dd25a43e42L,0xd3adf27e4daeb85cL,
+ 0xb81d8379bbeddeb5L },
+ { 0x1b0b546e2e435867L,0x9020eb94eba5dd60L,0x37d911618210cb9dL,
+ 0x4c596b315c91f1cfL } },
+ /* 56 << 161 */
+ { { 0xb228a90f0e0b040dL,0xbaf02d8245ff897fL,0x2aac79e600fa6122L,
+ 0x248288178e36f557L },
+ { 0xb9521d31113ec356L,0x9e48861e15eff1f8L,0x2aa1d412e0d41715L,
+ 0x71f8620353f131b8L } },
+ /* 57 << 161 */
+ { { 0xf60da8da3fd19408L,0x4aa716dc278d9d99L,0x394531f7a8c51c90L,
+ 0xb560b0e8f59db51cL },
+ { 0xa28fc992fa34bdadL,0xf024fa149cd4f8bdL,0x5cf530f723a9d0d3L,
+ 0x615ca193e28c9b56L } },
+ /* 58 << 161 */
+ { { 0x6d2a483d6f73c51eL,0xa4cb2412ea0dc2ddL,0x50663c411eb917ffL,
+ 0x3d3a74cfeade299eL },
+ { 0x29b3990f4a7a9202L,0xa9bccf59a7b15c3dL,0x66a3ccdca5df9208L,
+ 0x48027c1443f2f929L } },
+ /* 59 << 161 */
+ { { 0xd385377c40b557f0L,0xe001c366cd684660L,0x1b18ed6be2183a27L,
+ 0x879738d863210329L },
+ { 0xa687c74bbda94882L,0xd1bbcc48a684b299L,0xaf6f1112863b3724L,
+ 0x6943d1b42c8ce9f8L } },
+ /* 60 << 161 */
+ { { 0xe044a3bb098cafb4L,0x27ed231060d48cafL,0x542b56753a31b84dL,
+ 0xcbf3dd50fcddbed7L },
+ { 0x25031f1641b1d830L,0xa7ec851dcb0c1e27L,0xac1c8fe0b5ae75dbL,
+ 0xb24c755708c52120L } },
+ /* 61 << 161 */
+ { { 0x57f811dc1d4636c3L,0xf8436526681a9939L,0x1f6bc6d99c81adb3L,
+ 0x840f8ac35b7d80d4L },
+ { 0x731a9811f4387f1aL,0x7c501cd3b5156880L,0xa5ca4a07dfe68867L,
+ 0xf123d8f05fcea120L } },
+ /* 62 << 161 */
+ { { 0x1fbb0e71d607039eL,0x2b70e215cd3a4546L,0x32d2f01d53324091L,
+ 0xb796ff08180ab19bL },
+ { 0x32d87a863c57c4aaL,0x2aed9cafb7c49a27L,0x9fb35eac31630d98L,
+ 0x338e8cdf5c3e20a3L } },
+ /* 63 << 161 */
+ { { 0x80f1618266cde8dbL,0x4e1599802d72fd36L,0xd7b8f13b9b6e5072L,
+ 0xf52139073b7b5dc1L },
+ { 0x4d431f1d8ce4396eL,0x37a1a680a7ed2142L,0xbf375696d01aaf6bL,
+ 0xaa1c0c54e63aab66L } },
+ /* 64 << 161 */
+ { { 0x3014368b4ed80940L,0x67e6d0567a6fceddL,0x7c208c49ca97579fL,
+ 0xfe3d7a81a23597f6L },
+ { 0x5e2032027e096ae2L,0xb1f3e1e724b39366L,0x26da26f32fdcdffcL,
+ 0x79422f1d6097be83L } },
+ /* 0 << 168 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 168 */
+ { { 0x263a2cfb9db3b381L,0x9c3a2deed4df0a4bL,0x728d06e97d04e61fL,
+ 0x8b1adfbc42449325L },
+ { 0x6ec1d9397e053a1bL,0xee2be5c766daf707L,0x80ba1e14810ac7abL,
+ 0xdd2ae778f530f174L } },
+ /* 2 << 168 */
+ { { 0x0435d97a205b9d8bL,0x6eb8f064056756d4L,0xd5e88a8bb6f8210eL,
+ 0x070ef12dec9fd9eaL },
+ { 0x4d8495053bcc876aL,0x12a75338a7404ce3L,0xd22b49e1b8a1db5eL,
+ 0xec1f205114bfa5adL } },
+ /* 3 << 168 */
+ { { 0xadbaeb79b6828f36L,0x9d7a025801bd5b9eL,0xeda01e0d1e844b0cL,
+ 0x4b625175887edfc9L },
+ { 0x14109fdd9669b621L,0x88a2ca56f6f87b98L,0xfe2eb788170df6bcL,
+ 0x0cea06f4ffa473f9L } },
+ /* 4 << 168 */
+ { { 0x43ed81b5c4e83d33L,0xd9f358795efd488bL,0x164a620f9deb4d0fL,
+ 0xc6927bdbac6a7394L },
+ { 0x45c28df79f9e0f03L,0x2868661efcd7e1a9L,0x7cf4e8d0ffa348f1L,
+ 0x6bd4c284398538e0L } },
+ /* 5 << 168 */
+ { { 0x2618a091289a8619L,0xef796e606671b173L,0x664e46e59090c632L,
+ 0xa38062d41e66f8fbL },
+ { 0x6c744a200573274eL,0xd07b67e4a9271394L,0x391223b26bdc0e20L,
+ 0xbe2d93f1eb0a05a7L } },
+ /* 6 << 168 */
+ { { 0xf23e2e533f36d141L,0xe84bb3d44dfca442L,0xb804a48d6b7c023aL,
+ 0x1e16a8fa76431c3bL },
+ { 0x1b5452adddd472e0L,0x7d405ee70d1ee127L,0x50fc6f1dffa27599L,
+ 0x351ac53cbf391b35L } },
+ /* 7 << 168 */
+ { { 0x7efa14b84444896bL,0x64974d2ff94027fbL,0xefdcd0e8de84487dL,
+ 0x8c45b2602b48989bL },
+ { 0xa8fcbbc2d8463487L,0xd1b2b3f73fbc476cL,0x21d005b7c8f443c0L,
+ 0x518f2e6740c0139cL } },
+ /* 8 << 168 */
+ { { 0x56036e8c06d75fc1L,0x2dcf7bb73249a89fL,0x81dd1d3de245e7ddL,
+ 0xf578dc4bebd6e2a7L },
+ { 0x4c028903df2ce7a0L,0xaee362889c39afacL,0xdc847c31146404abL,
+ 0x6304c0d8a4e97818L } },
+ /* 9 << 168 */
+ { { 0xae51dca2a91f6791L,0x2abe41909baa9efcL,0xd9d2e2f4559c7ac1L,
+ 0xe82f4b51fc9f773aL },
+ { 0xa77130274073e81cL,0xc0276facfbb596fcL,0x1d819fc9a684f70cL,
+ 0x29b47fddc9f7b1e0L } },
+ /* 10 << 168 */
+ { { 0x358de103459b1940L,0xec881c595b013e93L,0x51574c9349532ad3L,
+ 0x2db1d445b37b46deL },
+ { 0xc6445b87df239fd8L,0xc718af75151d24eeL,0xaea1c4a4f43c6259L,
+ 0x40c0e5d770be02f7L } },
+ /* 11 << 168 */
+ { { 0x6a4590f4721b33f2L,0x2124f1fbfedf04eaL,0xf8e53cde9745efe7L,
+ 0xe7e1043265f046d9L },
+ { 0xc3fca28ee4d0c7e6L,0x847e339a87253b1bL,0x9b5953483743e643L,
+ 0xcb6a0a0b4fd12fc5L } },
+ /* 12 << 168 */
+ { { 0xfb6836c327d02dccL,0x5ad009827a68bcc2L,0x1b24b44c005e912dL,
+ 0xcc83d20f811fdcfeL },
+ { 0x36527ec1666fba0cL,0x6994819714754635L,0xfcdcb1a8556da9c2L,
+ 0xa593426781a732b2L } },
+ /* 13 << 168 */
+ { { 0xec1214eda714181dL,0x609ac13b6067b341L,0xff4b4c97a545df1fL,
+ 0xa124050134d2076bL },
+ { 0x6efa0c231409ca97L,0x254cc1a820638c43L,0xd4e363afdcfb46cdL,
+ 0x62c2adc303942a27L } },
+ /* 14 << 168 */
+ { { 0xc67b9df056e46483L,0xa55abb2063736356L,0xab93c098c551bc52L,
+ 0x382b49f9b15fe64bL },
+ { 0x9ec221ad4dff8d47L,0x79caf615437df4d6L,0x5f13dc64bb456509L,
+ 0xe4c589d9191f0714L } },
+ /* 15 << 168 */
+ { { 0x27b6a8ab3fd40e09L,0xe455842e77313ea9L,0x8b51d1e21f55988bL,
+ 0x5716dd73062bbbfcL },
+ { 0x633c11e54e8bf3deL,0x9a0e77b61b85be3bL,0x565107290911cca6L,
+ 0x27e76495efa6590fL } },
+ /* 16 << 168 */
+ { { 0xe4ac8b33070d3aabL,0x2643672b9a2cd5e5L,0x52eff79b1cfc9173L,
+ 0x665ca49b90a7c13fL },
+ { 0x5a8dda59b3efb998L,0x8a5b922d052f1341L,0xae9ebbab3cf9a530L,
+ 0x35986e7bf56da4d7L } },
+ /* 17 << 168 */
+ { { 0x3a636b5cff3513ccL,0xbb0cf8ba3198f7ddL,0xb8d4052241f16f86L,
+ 0x760575d8de13a7bfL },
+ { 0x36f74e169f7aa181L,0x163a3ecff509ed1cL,0x6aead61f3c40a491L,
+ 0x158c95fcdfe8fcaaL } },
+ /* 18 << 168 */
+ { { 0xa3991b6e13cda46fL,0x79482415342faed0L,0xf3ba5bde666b5970L,
+ 0x1d52e6bcb26ab6ddL },
+ { 0x768ba1e78608dd3dL,0x4930db2aea076586L,0xd9575714e7dc1afaL,
+ 0x1fc7bf7df7c58817L } },
+ /* 19 << 168 */
+ { { 0x6b47accdd9eee96cL,0x0ca277fbe58cec37L,0x113fe413e702c42aL,
+ 0xdd1764eec47cbe51L },
+ { 0x041e7cde7b3ed739L,0x50cb74595ce9e1c0L,0x355685132925b212L,
+ 0x7cff95c4001b081cL } },
+ /* 20 << 168 */
+ { { 0x63ee4cbd8088b454L,0xdb7f32f79a9e0c8aL,0xb377d4186b2447cbL,
+ 0xe3e982aad370219bL },
+ { 0x06ccc1e4c2a2a593L,0x72c368650773f24fL,0xa13b4da795859423L,
+ 0x8bbf1d3375040c8fL } },
+ /* 21 << 168 */
+ { { 0x726f0973da50c991L,0x48afcd5b822d6ee2L,0xe5fc718b20fd7771L,
+ 0xb9e8e77dfd0807a1L },
+ { 0x7f5e0f4499a7703dL,0x6972930e618e36f3L,0x2b7c77b823807bbeL,
+ 0xe5b82405cb27ff50L } },
+ /* 22 << 168 */
+ { { 0xba8b8be3bd379062L,0xd64b7a1d2dce4a92L,0x040a73c5b2952e37L,
+ 0x0a9e252ed438aecaL },
+ { 0xdd43956bc39d3bcbL,0x1a31ca00b32b2d63L,0xd67133b85c417a18L,
+ 0xd08e47902ef442c8L } },
+ /* 23 << 168 */
+ { { 0x98cb1ae9255c0980L,0x4bd863812b4a739fL,0x5a5c31e11e4a45a1L,
+ 0x1e5d55fe9cb0db2fL },
+ { 0x74661b068ff5cc29L,0x026b389f0eb8a4f4L,0x536b21a458848c24L,
+ 0x2e5bf8ec81dc72b0L } },
+ /* 24 << 168 */
+ { { 0x03c187d0ad886aacL,0x5c16878ab771b645L,0xb07dfc6fc74045abL,
+ 0x2c6360bf7800caedL },
+ { 0x24295bb5b9c972a3L,0xc9e6f88e7c9a6dbaL,0x90ffbf2492a79aa6L,
+ 0xde29d50a41c26ac2L } },
+ /* 25 << 168 */
+ { { 0x9f0af483d309cbe6L,0x5b020d8ae0bced4fL,0x606e986db38023e3L,
+ 0xad8f2c9d1abc6933L },
+ { 0x19292e1de7400e93L,0xfe3e18a952be5e4dL,0xe8e9771d2e0680bfL,
+ 0x8c5bec98c54db063L } },
+ /* 26 << 168 */
+ { { 0x2af9662a74a55d1fL,0xe3fbf28f046f66d8L,0xa3a72ab4d4dc4794L,
+ 0x09779f455c7c2dd8L },
+ { 0xd893bdafc3d19d8dL,0xd5a7509457d6a6dfL,0x8cf8fef9952e6255L,
+ 0x3da67cfbda9a8affL } },
+ /* 27 << 168 */
+ { { 0x4c23f62a2c160dcdL,0x34e6c5e38f90eaefL,0x35865519a9a65d5aL,
+ 0x07c48aae8fd38a3dL },
+ { 0xb7e7aeda50068527L,0x2c09ef231c90936aL,0x31ecfeb6e879324cL,
+ 0xa0871f6bfb0ec938L } },
+ /* 28 << 168 */
+ { { 0xb1f0fb68d84d835dL,0xc90caf39861dc1e6L,0x12e5b0467594f8d7L,
+ 0x26897ae265012b92L },
+ { 0xbcf68a08a4d6755dL,0x403ee41c0991fbdaL,0x733e343e3bbf17e8L,
+ 0xd2c7980d679b3d65L } },
+ /* 29 << 168 */
+ { { 0x33056232d2e11305L,0x966be492f3c07a6fL,0x6a8878ffbb15509dL,
+ 0xff2211010a9b59a4L },
+ { 0x6c9f564aabe30129L,0xc6f2c940336e64cfL,0x0fe752628b0c8022L,
+ 0xbe0267e96ae8db87L } },
+ /* 30 << 168 */
+ { { 0x22e192f193bc042bL,0xf085b534b237c458L,0xa0d192bd832c4168L,
+ 0x7a76e9e3bdf6271dL },
+ { 0x52a882fab88911b5L,0xc85345e4b4db0eb5L,0xa3be02a681a7c3ffL,
+ 0x51889c8cf0ec0469L } },
+ /* 31 << 168 */
+ { { 0x9d031369a5e829e5L,0xcbb4c6fc1607aa41L,0x75ac59a6241d84c1L,
+ 0xc043f2bf8829e0eeL },
+ { 0x82a38f758ea5e185L,0x8bda40b9d87cbd9fL,0x9e65e75e2d8fc601L,
+ 0x3d515f74a35690b3L } },
+ /* 32 << 168 */
+ { { 0x534acf4fda79e5acL,0x68b83b3a8630215fL,0x5c748b2ed085756eL,
+ 0xb0317258e5d37cb2L },
+ { 0x6735841ac5ccc2c4L,0x7d7dc96b3d9d5069L,0xa147e410fd1754bdL,
+ 0x65296e94d399ddd5L } },
+ /* 33 << 168 */
+ { { 0xf6b5b2d0bc8fa5bcL,0x8a5ead67500c277bL,0x214625e6dfa08a5dL,
+ 0x51fdfedc959cf047L },
+ { 0x6bc9430b289fca32L,0xe36ff0cf9d9bdc3fL,0x2fe187cb58ea0edeL,
+ 0xed66af205a900b3fL } },
+ /* 34 << 168 */
+ { { 0x00e0968b5fa9f4d6L,0x2d4066ce37a362e7L,0xa99a9748bd07e772L,
+ 0x710989c006a4f1d0L },
+ { 0xd5dedf35ce40cbd8L,0xab55c5f01743293dL,0x766f11448aa24e2cL,
+ 0x94d874f8605fbcb4L } },
+ /* 35 << 168 */
+ { { 0xa365f0e8a518001bL,0xee605eb69d04ef0fL,0x5a3915cdba8d4d25L,
+ 0x44c0e1b8b5113472L },
+ { 0xcbb024e88b6740dcL,0x89087a53ee1d4f0cL,0xa88fa05c1fc4e372L,
+ 0x8bf395cbaf8b3af2L } },
+ /* 36 << 168 */
+ { { 0x1e71c9a1deb8568bL,0xa35daea080fb3d32L,0xe8b6f2662cf8fb81L,
+ 0x6d51afe89490696aL },
+ { 0x81beac6e51803a19L,0xe3d24b7f86219080L,0x727cfd9ddf6f463cL,
+ 0x8c6865ca72284ee8L } },
+ /* 37 << 168 */
+ { { 0x32c88b7db743f4efL,0x3793909be7d11dceL,0xd398f9222ff2ebe8L,
+ 0x2c70ca44e5e49796L },
+ { 0xdf4d9929cb1131b1L,0x7826f29825888e79L,0x4d3a112cf1d8740aL,
+ 0x00384cb6270afa8bL } },
+ /* 38 << 168 */
+ { { 0xcb64125b3ab48095L,0x3451c25662d05106L,0xd73d577da4955845L,
+ 0x39570c16bf9f4433L },
+ { 0xd7dfaad3adecf263L,0xf1c3d8d1dc76e102L,0x5e774a5854c6a836L,
+ 0xdad4b6723e92d47bL } },
+ /* 39 << 168 */
+ { { 0xbe7e990ff0d796a0L,0x5fc62478df0e8b02L,0x8aae8bf4030c00adL,
+ 0x3d2db93b9004ba0fL },
+ { 0xe48c8a79d85d5ddcL,0xe907caa76bb07f34L,0x58db343aa39eaed5L,
+ 0x0ea6e007adaf5724L } },
+ /* 40 << 168 */
+ { { 0xe00df169d23233f3L,0x3e32279677cb637fL,0x1f897c0e1da0cf6cL,
+ 0xa651f5d831d6bbddL },
+ { 0xdd61af191a230c76L,0xbd527272cdaa5e4aL,0xca753636d0abcd7eL,
+ 0x78bdd37c370bd8dcL } },
+ /* 41 << 168 */
+ { { 0xc23916c217cd93feL,0x65b97a4ddadce6e2L,0xe04ed4eb174e42f8L,
+ 0x1491ccaabb21480aL },
+ { 0x145a828023196332L,0x3c3862d7587b479aL,0x9f4a88a301dcd0edL,
+ 0x4da2b7ef3ea12f1fL } },
+ /* 42 << 168 */
+ { { 0xf8e7ae33b126e48eL,0x404a0b32f494e237L,0x9beac474c55acadbL,
+ 0x4ee5cf3bcbec9fd9L },
+ { 0x336b33b97df3c8c3L,0xbd905fe3b76808fdL,0x8f436981aa45c16aL,
+ 0x255c5bfa3dd27b62L } },
+ /* 43 << 168 */
+ { { 0x71965cbfc3dd9b4dL,0xce23edbffc068a87L,0xb78d4725745b029bL,
+ 0x74610713cefdd9bdL },
+ { 0x7116f75f1266bf52L,0x0204672218e49bb6L,0xdf43df9f3d6f19e3L,
+ 0xef1bc7d0e685cb2fL } },
+ /* 44 << 168 */
+ { { 0xcddb27c17078c432L,0xe1961b9cb77fedb7L,0x1edc2f5cc2290570L,
+ 0x2c3fefca19cbd886L },
+ { 0xcf880a36c2af389aL,0x96c610fdbda71ceaL,0xf03977a932aa8463L,
+ 0x8eb7763f8586d90aL } },
+ /* 45 << 168 */
+ { { 0x3f3424542a296e77L,0xc871868342837a35L,0x7dc710906a09c731L,
+ 0x54778ffb51b816dbL },
+ { 0x6b33bfecaf06defdL,0xfe3c105f8592b70bL,0xf937fda461da6114L,
+ 0x3c13e6514c266ad7L } },
+ /* 46 << 168 */
+ { { 0xe363a829855938e8L,0x2eeb5d9e9de54b72L,0xbeb93b0e20ccfab9L,
+ 0x3dffbb5f25e61a25L },
+ { 0x7f655e431acc093dL,0x0cb6cc3d3964ce61L,0x6ab283a1e5e9b460L,
+ 0x55d787c5a1c7e72dL } },
+ /* 47 << 168 */
+ { { 0x4d2efd47deadbf02L,0x11e80219ac459068L,0x810c762671f311f0L,
+ 0xfa17ef8d4ab6ef53L },
+ { 0xaf47fd2593e43bffL,0x5cb5ff3f0be40632L,0x546871068ee61da3L,
+ 0x7764196eb08afd0fL } },
+ /* 48 << 168 */
+ { { 0x831ab3edf0290a8fL,0xcae81966cb47c387L,0xaad7dece184efb4fL,
+ 0xdcfc53b34749110eL },
+ { 0x6698f23c4cb632f9L,0xc42a1ad6b91f8067L,0xb116a81d6284180aL,
+ 0xebedf5f8e901326fL } },
+ /* 49 << 168 */
+ { { 0xf2274c9f97e3e044L,0x4201852011d09fc9L,0x56a65f17d18e6e23L,
+ 0x2ea61e2a352b683cL },
+ { 0x27d291bc575eaa94L,0x9e7bc721b8ff522dL,0x5f7268bfa7f04d6fL,
+ 0x5868c73faba41748L } },
+ /* 50 << 168 */
+ { { 0x9f85c2db7be0eeadL,0x511e7842ff719135L,0x5a06b1e9c5ea90d7L,
+ 0x0c19e28326fab631L },
+ { 0x8af8f0cfe9206c55L,0x89389cb43553c06aL,0x39dbed97f65f8004L,
+ 0x0621b037c508991dL } },
+ /* 51 << 168 */
+ { { 0x1c52e63596e78cc4L,0x5385c8b20c06b4a8L,0xd84ddfdbb0e87d03L,
+ 0xc49dfb66934bafadL },
+ { 0x7071e17059f70772L,0x3a073a843a1db56bL,0x034949033b8af190L,
+ 0x7d882de3d32920f0L } },
+ /* 52 << 168 */
+ { { 0x91633f0ab2cf8940L,0x72b0b1786f948f51L,0x2d28dc30782653c8L,
+ 0x88829849db903a05L },
+ { 0xb8095d0c6a19d2bbL,0x4b9e7f0c86f782cbL,0x7af739882d907064L,
+ 0xd12be0fe8b32643cL } },
+ /* 53 << 168 */
+ { { 0x358ed23d0e165dc3L,0x3d47ce624e2378ceL,0x7e2bb0b9feb8a087L,
+ 0x3246e8aee29e10b9L },
+ { 0x459f4ec703ce2b4dL,0xe9b4ca1bbbc077cfL,0x2613b4f20e9940c1L,
+ 0xfc598bb9047d1eb1L } },
+ /* 54 << 168 */
+ { { 0x9744c62b45036099L,0xa9dee742167c65d8L,0x0c511525dabe1943L,
+ 0xda11055493c6c624L },
+ { 0xae00a52c651a3be2L,0xcda5111d884449a6L,0x063c06f4ff33bed1L,
+ 0x73baaf9a0d3d76b4L } },
+ /* 55 << 168 */
+ { { 0x52fb0c9d7fc63668L,0x6886c9dd0c039cdeL,0x602bd59955b22351L,
+ 0xb00cab02360c7c13L },
+ { 0x8cb616bc81b69442L,0x41486700b55c3ceeL,0x71093281f49ba278L,
+ 0xad956d9c64a50710L } },
+ /* 56 << 168 */
+ { { 0x9561f28b638a7e81L,0x54155cdf5980ddc3L,0xb2db4a96d26f247aL,
+ 0x9d774e4e4787d100L },
+ { 0x1a9e6e2e078637d2L,0x1c363e2d5e0ae06aL,0x7493483ee9cfa354L,
+ 0x76843cb37f74b98dL } },
+ /* 57 << 168 */
+ { { 0xbaca6591d4b66947L,0xb452ce9804460a8cL,0x6830d24643768f55L,
+ 0xf4197ed87dff12dfL },
+ { 0x6521b472400dd0f7L,0x59f5ca8f4b1e7093L,0x6feff11b080338aeL,
+ 0x0ada31f6a29ca3c6L } },
+ /* 58 << 168 */
+ { { 0x24794eb694a2c215L,0xd83a43ab05a57ab4L,0x264a543a2a6f89feL,
+ 0x2c2a3868dd5ec7c2L },
+ { 0xd33739408439d9b2L,0x715ea6720acd1f11L,0x42c1d235e7e6cc19L,
+ 0x81ce6e96b990585cL } },
+ /* 59 << 168 */
+ { { 0x04e5dfe0d809c7bdL,0xd7b2580c8f1050abL,0x6d91ad78d8a4176fL,
+ 0x0af556ee4e2e897cL },
+ { 0x162a8b73921de0acL,0x52ac9c227ea78400L,0xee2a4eeaefce2174L,
+ 0xbe61844e6d637f79L } },
+ /* 60 << 168 */
+ { { 0x0491f1bc789a283bL,0x72d3ac3d880836f4L,0xaa1c5ea388e5402dL,
+ 0x1b192421d5cc473dL },
+ { 0x5c0b99989dc84cacL,0xb0a8482d9c6e75b8L,0x639961d03a191ce2L,
+ 0xda3bc8656d837930L } },
+ /* 61 << 168 */
+ { { 0xca990653056e6f8fL,0x84861c4164d133a7L,0x8b403276746abe40L,
+ 0xb7b4d51aebf8e303L },
+ { 0x05b43211220a255dL,0xc997152c02419e6eL,0x76ff47b6630c2feaL,
+ 0x50518677281fdadeL } },
+ /* 62 << 168 */
+ { { 0x3283b8bacf902b0bL,0x8d4b4eb537db303bL,0xcc89f42d755011bcL,
+ 0xb43d74bbdd09d19bL },
+ { 0x65746bc98adba350L,0x364eaf8cb51c1927L,0x13c7659610ad72ecL,
+ 0x30045121f8d40c20L } },
+ /* 63 << 168 */
+ { { 0x6d2d99b7ea7b979bL,0xcd78cd74e6fb3bcdL,0x11e45a9e86cffbfeL,
+ 0x78a61cf4637024f6L },
+ { 0xd06bc8723d502295L,0xf1376854458cb288L,0xb9db26a1342f8586L,
+ 0xf33effcf4beee09eL } },
+ /* 64 << 168 */
+ { { 0xd7e0c4cdb30cfb3aL,0x6d09b8c16c9db4c8L,0x40ba1a4207c8d9dfL,
+ 0x6fd495f71c52c66dL },
+ { 0xfb0e169f275264daL,0x80c2b746e57d8362L,0xedd987f749ad7222L,
+ 0xfdc229af4398ec7bL } },
+ /* 0 << 175 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 175 */
+ { { 0xb0d1ed8452666a58L,0x4bcb6e00e6a9c3c2L,0x3c57411c26906408L,
+ 0xcfc2075513556400L },
+ { 0xa08b1c505294dba3L,0xa30ba2868b7dd31eL,0xd70ba90e991eca74L,
+ 0x094e142ce762c2b9L } },
+ /* 2 << 175 */
+ { { 0xb81d783e979f3925L,0x1efd130aaf4c89a7L,0x525c2144fd1bf7faL,
+ 0x4b2969041b265a9eL },
+ { 0xed8e9634b9db65b6L,0x35c82e3203599d8aL,0xdaa7a54f403563f3L,
+ 0x9df088ad022c38abL } },
+ /* 3 << 175 */
+ { { 0xe5cfb066bb3fd30aL,0x429169daeff0354eL,0x809cf8523524e36cL,
+ 0x136f4fb30155be1dL },
+ { 0x4826af011fbba712L,0x6ef0f0b4506ba1a1L,0xd9928b3177aea73eL,
+ 0xe2bf6af25eaa244eL } },
+ /* 4 << 175 */
+ { { 0x8d084f124237b64bL,0x688ebe99e3ecfd07L,0x57b8a70cf6845dd8L,
+ 0x808fc59c5da4a325L },
+ { 0xa9032b2ba3585862L,0xb66825d5edf29386L,0xb5a5a8db431ec29bL,
+ 0xbb143a983a1e8dc8L } },
+ /* 5 << 175 */
+ { { 0x35ee94ce12ae381bL,0x3a7f176c86ccda90L,0xc63a657e4606eacaL,
+ 0x9ae5a38043cd04dfL },
+ { 0x9bec8d15ed251b46L,0x1f5d6d30caca5e64L,0x347b3b359ff20f07L,
+ 0x4d65f034f7e4b286L } },
+ /* 6 << 175 */
+ { { 0x9e93ba24f111661eL,0xedced484b105eb04L,0x96dc9ba1f424b578L,
+ 0xbf8f66b7e83e9069L },
+ { 0x872d4df4d7ed8216L,0xbf07f3778e2cbecfL,0x4281d89998e73754L,
+ 0xfec85fbb8aab8708L } },
+ /* 7 << 175 */
+ { { 0x9a3c0deea5ba5b0bL,0xe6a116ce42d05299L,0xae9775fee9b02d42L,
+ 0x72b05200a1545cb6L },
+ { 0xbc506f7d31a3b4eaL,0xe58930788bbd9b32L,0xc8bc5f37e4b12a97L,
+ 0x6b000c064a73b671L } },
+ /* 8 << 175 */
+ { { 0x13b5bf22765fa7d0L,0x59805bf01d6a5370L,0x67a5e29d4280db98L,
+ 0x4f53916f776b1ce3L },
+ { 0x714ff61f33ddf626L,0x4206238ea085d103L,0x1c50d4b7e5809ee3L,
+ 0x999f450d85f8eb1dL } },
+ /* 9 << 175 */
+ { { 0x658a6051e4c79e9bL,0x1394cb73c66a9feaL,0x27f31ed5c6be7b23L,
+ 0xf4c88f365aa6f8feL },
+ { 0x0fb0721f4aaa499eL,0x68b3a7d5e3fb2a6bL,0xa788097d3a92851dL,
+ 0x060e7f8ae96f4913L } },
+ /* 10 << 175 */
+ { { 0x82eebe731a3a93bcL,0x42bbf465a21adc1aL,0xc10b6fa4ef030efdL,
+ 0x247aa4c787b097bbL },
+ { 0x8b8dc632f60c77daL,0x6ffbc26ac223523eL,0xa4f6ff11344579cfL,
+ 0x5825653c980250f6L } },
+ /* 11 << 175 */
+ { { 0xb2dd097ebc1aa2b9L,0x0788939337a0333aL,0x1cf55e7137a0db38L,
+ 0x2648487f792c1613L },
+ { 0xdad013363fcef261L,0x6239c81d0eabf129L,0x8ee761de9d276be2L,
+ 0x406a7a341eda6ad3L } },
+ /* 12 << 175 */
+ { { 0x4bf367ba4a493b31L,0x54f20a529bf7f026L,0xb696e0629795914bL,
+ 0xcddab96d8bf236acL },
+ { 0x4ff2c70aed25ea13L,0xfa1d09eb81cbbbe7L,0x88fc8c87468544c5L,
+ 0x847a670d696b3317L } },
+ /* 13 << 175 */
+ { { 0xf133421e64bcb626L,0xaea638c826dee0b5L,0xd6e7680bb310346cL,
+ 0xe06f4097d5d4ced3L },
+ { 0x099614527512a30bL,0xf3d867fde589a59aL,0x2e73254f52d0c180L,
+ 0x9063d8a3333c74acL } },
+ /* 14 << 175 */
+ { { 0xeda6c595d314e7bcL,0x2ee7464b467899edL,0x1cef423c0a1ed5d3L,
+ 0x217e76ea69cc7613L },
+ { 0x27ccce1fe7cda917L,0x12d8016b8a893f16L,0xbcd6de849fc74f6bL,
+ 0xfa5817e2f3144e61L } },
+ /* 15 << 175 */
+ { { 0x1f3541640821ee4cL,0x1583eab40bc61992L,0x7490caf61d72879fL,
+ 0x998ad9f3f76ae7b2L },
+ { 0x1e181950a41157f7L,0xa9d7e1e6e8da3a7eL,0x963784eb8426b95fL,
+ 0x0ee4ed6e542e2a10L } },
+ /* 16 << 175 */
+ { { 0xb79d4cc5ac751e7bL,0x93f96472fd4211bdL,0x8c72d3d2c8de4fc6L,
+ 0x7b69cbf5df44f064L },
+ { 0x3da90ca2f4bf94e1L,0x1a5325f8f12894e2L,0x0a437f6c7917d60bL,
+ 0x9be7048696c9cb5dL } },
+ /* 17 << 175 */
+ { { 0xb4d880bfe1dc5c05L,0xd738addaeebeeb57L,0x6f0119d3df0fe6a3L,
+ 0x5c686e5566eaaf5aL },
+ { 0x9cb10b50dfd0b7ecL,0xbdd0264b6a497c21L,0xfc0935148c546c96L,
+ 0x58a947fa79dbf42aL } },
+ /* 18 << 175 */
+ { { 0xc0b48d4e49ccd6d7L,0xff8fb02c88bd5580L,0xc75235e907d473b2L,
+ 0x4fab1ac5a2188af3L },
+ { 0x030fa3bc97576ec0L,0xe8c946e80b7e7d2fL,0x40a5c9cc70305600L,
+ 0x6d8260a9c8b013b4L } },
+ /* 19 << 175 */
+ { { 0x0368304f70bba85cL,0xad090da1a4a0d311L,0x7170e8702415eec1L,
+ 0xbfba35fe8461ea47L },
+ { 0x6279019ac1e91938L,0xa47638f31afc415fL,0x36c65cbbbcba0e0fL,
+ 0x02160efb034e2c48L } },
+ /* 20 << 175 */
+ { { 0xe6c51073615cd9e4L,0x498ec047f1243c06L,0x3e5a8809b17b3d8cL,
+ 0x5cd99e610cc565f1L },
+ { 0x81e312df7851dafeL,0xf156f5baa79061e2L,0x80d62b71880c590eL,
+ 0xbec9746f0a39faa1L } },
+ /* 21 << 175 */
+ { { 0x1d98a9c1c8ed1f7aL,0x09e43bb5a81d5ff2L,0xd5f00f680da0794aL,
+ 0x412050d9661aa836L },
+ { 0xa89f7c4e90747e40L,0x6dc05ebbb62a3686L,0xdf4de847308e3353L,
+ 0x53868fbb9fb53bb9L } },
+ /* 22 << 175 */
+ { { 0x2b09d2c3cfdcf7ddL,0x41a9fce3723fcab4L,0x73d905f707f57ca3L,
+ 0x080f9fb1ac8e1555L },
+ { 0x7c088e849ba7a531L,0x07d35586ed9a147fL,0x602846abaf48c336L,
+ 0x7320fd320ccf0e79L } },
+ /* 23 << 175 */
+ { { 0xaa780798b18bd1ffL,0x52c2e300afdd2905L,0xf27ea3d6434267cdL,
+ 0x8b96d16d15605b5fL },
+ { 0x7bb310494b45706bL,0xe7f58b8e743d25f8L,0xe9b5e45b87f30076L,
+ 0xd19448d65d053d5aL } },
+ /* 24 << 175 */
+ { { 0x1ecc8cb9d3210a04L,0x6bc7d463dafb5269L,0x3e59b10a67c3489fL,
+ 0x1769788c65641e1bL },
+ { 0x8a53b82dbd6cb838L,0x7066d6e6236d5f22L,0x03aa1c616908536eL,
+ 0xc971da0d66ae9809L } },
+ /* 25 << 175 */
+ { { 0x01b3a86bc49a2facL,0x3b8420c03092e77aL,0x020573007d6fb556L,
+ 0x6941b2a1bff40a87L },
+ { 0x140b63080658ff2aL,0x878043633424ab36L,0x0253bd515751e299L,
+ 0xc75bcd76449c3e3aL } },
+ /* 26 << 175 */
+ { { 0x92eb40907f8f875dL,0x9c9d754e56c26bbfL,0x158cea618110bbe7L,
+ 0x62a6b802745f91eaL },
+ { 0xa79c41aac6e7394bL,0x445b6a83ad57ef10L,0x0c5277eb6ea6f40cL,
+ 0x319fe96b88633365L } },
+ /* 27 << 175 */
+ { { 0x0b0fc61f385f63cbL,0x41250c8422bdd127L,0x67d153f109e942c2L,
+ 0x60920d08c021ad5dL },
+ { 0x229f5746724d81a5L,0xb7ffb8925bba3299L,0x518c51a1de413032L,
+ 0x2a9bfe773c2fd94cL } },
+ /* 28 << 175 */
+ { { 0xcbcde2393191f4fdL,0x43093e16d3d6ada1L,0x184579f358769606L,
+ 0x2c94a8b3d236625cL },
+ { 0x6922b9c05c437d8eL,0x3d4ae423d8d9f3c8L,0xf72c31c12e7090a2L,
+ 0x4ac3f5f3d76a55bdL } },
+ /* 29 << 175 */
+ { { 0x342508fc6b6af991L,0x0d5271001b5cebbdL,0xb84740d0dd440dd7L,
+ 0x748ef841780162fdL },
+ { 0xa8dbfe0edfc6fafbL,0xeadfdf05f7300f27L,0x7d06555ffeba4ec9L,
+ 0x12c56f839e25fa97L } },
+ /* 30 << 175 */
+ { { 0x77f84203d39b8c34L,0xed8b1be63125eddbL,0x5bbf2441f6e39dc5L,
+ 0xb00f6ee66a5d678aL },
+ { 0xba456ecf57d0ea99L,0xdcae0f5817e06c43L,0x01643de40f5b4baaL,
+ 0x2c324341d161b9beL } },
+ /* 31 << 175 */
+ { { 0x80177f55e126d468L,0xed325f1f76748e09L,0x6116004acfa9bdc2L,
+ 0x2d8607e63a9fb468L },
+ { 0x0e573e276009d660L,0x3a525d2e8d10c5a1L,0xd26cb45c3b9009a0L,
+ 0xb6b0cdc0de9d7448L } },
+ /* 32 << 175 */
+ { { 0x949c9976e1337c26L,0x6faadebdd73d68e5L,0x9e158614f1b768d9L,
+ 0x22dfa5579cc4f069L },
+ { 0xccd6da17be93c6d6L,0x24866c61a504f5b9L,0x2121353c8d694da1L,
+ 0x1c6ca5800140b8c6L } },
+ /* 33 << 175 */
+ { { 0xc245ad8ce964021eL,0xb83bffba032b82b3L,0xfaa220c647ef9898L,
+ 0x7e8d3ac6982c948aL },
+ { 0x1faa2091bc2d124aL,0xbd54c3dd05b15ff4L,0x386bf3abc87c6fb7L,
+ 0xfb2b0563fdeb6f66L } },
+ /* 34 << 175 */
+ { { 0x4e77c5575b45afb4L,0xe9ded649efb8912dL,0x7ec9bbf542f6e557L,
+ 0x2570dfff62671f00L },
+ { 0x2b3bfb7888e084bdL,0xa024b238f37fe5b4L,0x44e7dc0495649aeeL,
+ 0x498ca2555e7ec1d8L } },
+ /* 35 << 175 */
+ { { 0x3bc766eaaaa07e86L,0x0db6facbf3608586L,0xbadd2549bdc259c8L,
+ 0x95af3c6e041c649fL },
+ { 0xb36a928c02e30afbL,0x9b5356ad008a88b8L,0x4b67a5f1cf1d9e9dL,
+ 0xc6542e47a5d8d8ceL } },
+ /* 36 << 175 */
+ { { 0x73061fe87adfb6ccL,0xcc826fd398678141L,0x00e758b13c80515aL,
+ 0x6afe324741485083L },
+ { 0x0fcb08b9b6ae8a75L,0xb8cf388d4acf51e1L,0x344a55606961b9d6L,
+ 0x1a6778b86a97fd0cL } },
+ /* 37 << 175 */
+ { { 0xd840fdc1ecc4c7e3L,0xde9fe47d16db68ccL,0xe95f89dea3e216aaL,
+ 0x84f1a6a49594a8beL },
+ { 0x7ddc7d725a7b162bL,0xc5cfda19adc817a3L,0x80a5d35078b58d46L,
+ 0x93365b1382978f19L } },
+ /* 38 << 175 */
+ { { 0x2e44d22526a1fc90L,0x0d6d10d24d70705dL,0xd94b6b10d70c45f4L,
+ 0x0f201022b216c079L },
+ { 0xcec966c5658fde41L,0xa8d2bc7d7e27601dL,0xbfcce3e1ff230be7L,
+ 0x3394ff6b0033ffb5L } },
+ /* 39 << 175 */
+ { { 0xd890c5098132c9afL,0xaac4b0eb361e7868L,0x5194ded3e82d15aaL,
+ 0x4550bd2e23ae6b7dL },
+ { 0x3fda318eea5399d4L,0xd989bffa91638b80L,0x5ea124d0a14aa12dL,
+ 0x1fb1b8993667b944L } },
+ /* 40 << 175 */
+ { { 0x95ec796944c44d6aL,0x91df144a57e86137L,0x915fd62073adac44L,
+ 0x8f01732d59a83801L },
+ { 0xec579d253aa0a633L,0x06de5e7cc9d6d59cL,0xc132f958b1ef8010L,
+ 0x29476f96e65c1a02L } },
+ /* 41 << 175 */
+ { { 0x336a77c0d34c3565L,0xef1105b21b9f1e9eL,0x63e6d08bf9e08002L,
+ 0x9aff2f21c613809eL },
+ { 0xb5754f853a80e75dL,0xde71853e6bbda681L,0x86f041df8197fd7aL,
+ 0x8b332e08127817faL } },
+ /* 42 << 175 */
+ { { 0x05d99be8b9c20cdaL,0x89f7aad5d5cd0c98L,0x7ef936fe5bb94183L,
+ 0x92ca0753b05cd7f2L },
+ { 0x9d65db1174a1e035L,0x02628cc813eaea92L,0xf2d9e24249e4fbf2L,
+ 0x94fdfd9be384f8b7L } },
+ /* 43 << 175 */
+ { { 0x65f5605463428c6bL,0x2f7205b290b409a5L,0xf778bb78ff45ae11L,
+ 0xa13045bec5ee53b2L },
+ { 0xe00a14ff03ef77feL,0x689cd59fffef8befL,0x3578f0ed1e9ade22L,
+ 0xe99f3ec06268b6a8L } },
+ /* 44 << 175 */
+ { { 0xa2057d91ea1b3c3eL,0x2d1a7053b8823a4aL,0xabbb336a2cca451eL,
+ 0xcd2466e32218bb5dL },
+ { 0x3ac1f42fc8cb762dL,0x7e312aae7690211fL,0xebb9bd7345d07450L,
+ 0x207c4b8246c2213fL } },
+ /* 45 << 175 */
+ { { 0x99d425c1375913ecL,0x94e45e9667908220L,0xc08f3087cd67dbf6L,
+ 0xa5670fbec0887056L },
+ { 0x6717b64a66f5b8fcL,0xd5a56aea786fec28L,0xa8c3f55fc0ff4952L,
+ 0xa77fefae457ac49bL } },
+ /* 46 << 175 */
+ { { 0x29882d7c98379d44L,0xd000bdfb509edc8aL,0xc6f95979e66fe464L,
+ 0x504a6115fa61bde0L },
+ { 0x56b3b871effea31aL,0x2d3de26df0c21a54L,0x21dbff31834753bfL,
+ 0xe67ecf4969269d86L } },
+ /* 47 << 175 */
+ { { 0x7a176952151fe690L,0x035158047f2adb5fL,0xee794b15d1b62a8dL,
+ 0xf004ceecaae454e6L },
+ { 0x0897ea7cf0386facL,0x3b62ff12d1fca751L,0x154181df1b7a04ecL,
+ 0x2008e04afb5847ecL } },
+ /* 48 << 175 */
+ { { 0xd147148e41dbd772L,0x2b419f7322942654L,0x669f30d3e9c544f7L,
+ 0x52a2c223c8540149L },
+ { 0x5da9ee14634dfb02L,0x5f074ff0f47869f3L,0x74ee878da3933accL,
+ 0xe65106514fe35ed1L } },
+ /* 49 << 175 */
+ { { 0xb3eb9482f1012e7aL,0x51013cc0a8a566aeL,0xdd5e924347c00d3bL,
+ 0x7fde089d946bb0e5L },
+ { 0x030754fec731b4b3L,0x12a136a499fda062L,0x7c1064b85a1a35bcL,
+ 0xbf1f5763446c84efL } },
+ /* 50 << 175 */
+ { { 0xed29a56da16d4b34L,0x7fba9d09dca21c4fL,0x66d7ac006d8de486L,
+ 0x6006198773a2a5e1L },
+ { 0x8b400f869da28ff0L,0x3133f70843c4599cL,0x9911c9b8ee28cb0dL,
+ 0xcd7e28748e0af61dL } },
+ /* 51 << 175 */
+ { { 0x5a85f0f272ed91fcL,0x85214f319cd4a373L,0x881fe5be1925253cL,
+ 0xd8dc98e091e8bc76L },
+ { 0x7120affe585cc3a2L,0x724952ed735bf97aL,0x5581e7dc3eb34581L,
+ 0x5cbff4f2e52ee57dL } },
+ /* 52 << 175 */
+ { { 0x8d320a0e87d8cc7bL,0x9beaa7f3f1d280d0L,0x7a0b95719beec704L,
+ 0x9126332e5b7f0057L },
+ { 0x01fbc1b48ed3bd6dL,0x35bb2c12d945eb24L,0x6404694e9a8ae255L,
+ 0xb6092eec8d6abfb3L } },
+ /* 53 << 175 */
+ { { 0x4d76143fcc058865L,0x7b0a5af26e249922L,0x8aef94406a50d353L,
+ 0xe11e4bcc64f0e07aL },
+ { 0x4472993aa14a90faL,0x7706e20cba0c51d4L,0xf403292f1532672dL,
+ 0x52573bfa21829382L } },
+ /* 54 << 175 */
+ { { 0x6a7bb6a93b5bdb83L,0x08da65c0a4a72318L,0xc58d22aa63eb065fL,
+ 0x1717596c1b15d685L },
+ { 0x112df0d0b266d88bL,0xf688ae975941945aL,0x487386e37c292cacL,
+ 0x42f3b50d57d6985cL } },
+ /* 55 << 175 */
+ { { 0x6da4f9986a90fc34L,0xc8f257d365ca8a8dL,0xc2feabca6951f762L,
+ 0xe1bc81d074c323acL },
+ { 0x1bc68f67251a2a12L,0x10d86587be8a70dcL,0xd648af7ff0f84d2eL,
+ 0xf0aa9ebc6a43ac92L } },
+ /* 56 << 175 */
+ { { 0x69e3be0427596893L,0xb6bb02a645bf452bL,0x0875c11af4c698c8L,
+ 0x6652b5c7bece3794L },
+ { 0x7b3755fd4f5c0499L,0x6ea16558b5532b38L,0xd1c69889a2e96ef7L,
+ 0x9c773c3a61ed8f48L } },
+ /* 57 << 175 */
+ { { 0x2b653a409b323abcL,0xe26605e1f0e1d791L,0x45d410644a87157aL,
+ 0x8f9a78b7cbbce616L },
+ { 0xcf1e44aac407edddL,0x81ddd1d8a35b964fL,0x473e339efd083999L,
+ 0x6c94bdde8e796802L } },
+ /* 58 << 175 */
+ { { 0x5a304ada8545d185L,0x82ae44ea738bb8cbL,0x628a35e3df87e10eL,
+ 0xd3624f3da15b9fe3L },
+ { 0xcc44209b14be4254L,0x7d0efcbcbdbc2ea5L,0x1f60336204c37bbeL,
+ 0x21f363f556a5852cL } },
+ /* 59 << 175 */
+ { { 0xa1503d1ca8501550L,0x2251e0e1d8ab10bbL,0xde129c966961c51cL,
+ 0x1f7246a481910f68L },
+ { 0x2eb744ee5f2591f2L,0x3c47d33f5e627157L,0x4d6d62c922f3bd68L,
+ 0x6120a64bcb8df856L } },
+ /* 60 << 175 */
+ { { 0x3a9ac6c07b5d07dfL,0xa92b95587ef39783L,0xe128a134ab3a9b4fL,
+ 0x41c18807b1252f05L },
+ { 0xfc7ed08980ba9b1cL,0xac8dc6dec532a9ddL,0xbf829cef55246809L,
+ 0x101b784f5b4ee80fL } },
+ /* 61 << 175 */
+ { { 0xc09945bbb6f11603L,0x57b09dbe41d2801eL,0xfba5202fa97534a8L,
+ 0x7fd8ae5fc17b9614L },
+ { 0xa50ba66678308435L,0x9572f77cd3868c4dL,0x0cef7bfd2dd7aab0L,
+ 0xe7958e082c7c79ffL } },
+ /* 62 << 175 */
+ { { 0x81262e4225346689L,0x716da290b07c7004L,0x35f911eab7950ee3L,
+ 0x6fd72969261d21b5L },
+ { 0x5238980308b640d3L,0x5b0026ee887f12a1L,0x20e21660742e9311L,
+ 0x0ef6d5415ff77ff7L } },
+ /* 63 << 175 */
+ { { 0x969127f0f9c41135L,0xf21d60c968a64993L,0x656e5d0ce541875cL,
+ 0xf1e0f84ea1d3c233L },
+ { 0x9bcca35906002d60L,0xbe2da60c06191552L,0x5da8bbae61181ec3L,
+ 0x9f04b82365806f19L } },
+ /* 64 << 175 */
+ { { 0xf1604a7dd4b79bb8L,0xaee806fb52c878c8L,0x34144f118d47b8e8L,
+ 0x72edf52b949f9054L },
+ { 0xebfca84e2127015aL,0x9051d0c09cb7cef3L,0x86e8fe58296deec8L,
+ 0x33b2818841010d74L } },
+ /* 0 << 182 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 182 */
+ { { 0x01079383171b445fL,0x9bcf21e38131ad4cL,0x8cdfe205c93987e8L,
+ 0xe63f4152c92e8c8fL },
+ { 0x729462a930add43dL,0x62ebb143c980f05aL,0x4f3954e53b06e968L,
+ 0xfe1d75ad242cf6b1L } },
+ /* 2 << 182 */
+ { { 0x5f95c6c7af8685c8L,0xd4c1c8ce2f8f01aaL,0xc44bbe322574692aL,
+ 0xb8003478d4a4a068L },
+ { 0x7c8fc6e52eca3cdbL,0xea1db16bec04d399L,0xb05bc82e8f2bc5cfL,
+ 0x763d517ff44793d2L } },
+ /* 3 << 182 */
+ { { 0x4451c1b808bd98d0L,0x644b1cd46575f240L,0x6907eb337375d270L,
+ 0x56c8bebdfa2286bdL },
+ { 0xc713d2acc4632b46L,0x17da427aafd60242L,0x313065b7c95c7546L,
+ 0xf8239898bf17a3deL } },
+ /* 4 << 182 */
+ { { 0xf3b7963f4c830320L,0x842c7aa0903203e3L,0xaf22ca0ae7327afbL,
+ 0x38e13092967609b6L },
+ { 0x73b8fb62757558f1L,0x3cc3e831f7eca8c1L,0xe4174474f6331627L,
+ 0xa77989cac3c40234L } },
+ /* 5 << 182 */
+ { { 0xe5fd17a144a081e0L,0xd797fb7db70e296aL,0x2b472b30481f719cL,
+ 0x0e632a98fe6f8c52L },
+ { 0x89ccd116c5f0c284L,0xf51088af2d987c62L,0x2a2bccda4c2de6cfL,
+ 0x810f9efef679f0f9L } },
+ /* 6 << 182 */
+ { { 0xb0f394b97ffe4b3eL,0x0b691d21e5fa5d21L,0xb0bd77479dfbbc75L,
+ 0xd2830fdafaf78b00L },
+ { 0xf78c249c52434f57L,0x4b1f754598096dabL,0x73bf6f948ff8c0b3L,
+ 0x34aef03d454e134cL } },
+ /* 7 << 182 */
+ { { 0xf8d151f4b7ac7ec5L,0xd6ceb95ae50da7d5L,0xa1b492b0dc3a0eb8L,
+ 0x75157b69b3dd2863L },
+ { 0xe2c4c74ec5413d62L,0xbe329ff7bc5fc4c7L,0x835a2aea60fa9ddaL,
+ 0xf117f5ad7445cb87L } },
+ /* 8 << 182 */
+ { { 0xae8317f4b0166f7aL,0xfbd3e3f7ceec74e6L,0xfdb516ace0874bfdL,
+ 0x3d846019c681f3a3L },
+ { 0x0b12ee5c7c1620b0L,0xba68b4dd2b63c501L,0xac03cd326668c51eL,
+ 0x2a6279f74e0bcb5bL } },
+ /* 9 << 182 */
+ { { 0x17bd69b06ae85c10L,0x729469791dfdd3a6L,0xd9a032682c078becL,
+ 0x41c6a658bfd68a52L },
+ { 0xcdea10240e023900L,0xbaeec121b10d144dL,0x5a600e74058ab8dcL,
+ 0x1333af21bb89ccddL } },
+ /* 10 << 182 */
+ { { 0xdf25eae03aaba1f1L,0x2cada16e3b7144cfL,0x657ee27d71ab98bcL,
+ 0x99088b4c7a6fc96eL },
+ { 0x05d5c0a03549dbd4L,0x42cbdf8ff158c3acL,0x3fb6b3b087edd685L,
+ 0x22071cf686f064d0L } },
+ /* 11 << 182 */
+ { { 0xd2d6721fff2811e5L,0xdb81b703fe7fae8cL,0x3cfb74efd3f1f7bbL,
+ 0x0cdbcd7616cdeb5dL },
+ { 0x4f39642a566a808cL,0x02b74454340064d6L,0xfabbadca0528fa6fL,
+ 0xe4c3074cd3fc0bb6L } },
+ /* 12 << 182 */
+ { { 0xb32cb8b0b796d219L,0xc3e95f4f34741dd9L,0x8721212568edf6f5L,
+ 0x7a03aee4a2b9cb8eL },
+ { 0x0cd3c376f53a89aaL,0x0d8af9b1948a28dcL,0xcf86a3f4902ab04fL,
+ 0x8aacb62a7f42002dL } },
+ /* 13 << 182 */
+ { { 0x106985ebf62ffd52L,0xe670b54e5797bf10L,0x4b405209c5e30aefL,
+ 0x12c97a204365b5e9L },
+ { 0x104646ce1fe32093L,0x13cb4ff63907a8c9L,0x8b9f30d1d46e726bL,
+ 0xe1985e21aba0f499L } },
+ /* 14 << 182 */
+ { { 0xc573dea910a230cdL,0x24f46a93cd30f947L,0xf2623fcfabe2010aL,
+ 0x3f278cb273f00e4fL },
+ { 0xed55c67d50b920ebL,0xf1cb9a2d8e760571L,0x7c50d1090895b709L,
+ 0x4207cf07190d4369L } },
+ /* 15 << 182 */
+ { { 0x3b027e81c4127fe1L,0xa9f8b9ad3ae9c566L,0x5ab10851acbfbba5L,
+ 0xa747d648569556f5L },
+ { 0xcc172b5c2ba97bf7L,0x15e0f77dbcfa3324L,0xa345b7977686279dL,
+ 0x5a723480e38003d3L } },
+ /* 16 << 182 */
+ { { 0xfd8e139f8f5fcda8L,0xf3e558c4bdee5bfdL,0xd76cbaf4e33f9f77L,
+ 0x3a4c97a471771969L },
+ { 0xda27e84bf6dce6a7L,0xff373d9613e6c2d1L,0xf115193cd759a6e9L,
+ 0x3f9b702563d2262cL } },
+ /* 17 << 182 */
+ { { 0xd9764a31317cd062L,0x30779d8e199f8332L,0xd807410616b11b0bL,
+ 0x7917ab9f78aeaed8L },
+ { 0xb67a9cbe28fb1d8eL,0x2e313563136eda33L,0x010b7069a371a86cL,
+ 0x44d90fa26744e6b7L } },
+ /* 18 << 182 */
+ { { 0x68190867d6b3e243L,0x9fe6cd9d59048c48L,0xb900b02895731538L,
+ 0xa012062f32cae04fL },
+ { 0x8107c8bc9399d082L,0x47e8c54a41df12e2L,0x14ba5117b6ef3f73L,
+ 0x22260bea81362f0bL } },
+ /* 19 << 182 */
+ { { 0x90ea261e1a18cc20L,0x2192999f2321d636L,0xef64d314e311b6a0L,
+ 0xd7401e4c3b54a1f5L },
+ { 0x190199836fbca2baL,0x46ad32938fbffc4bL,0xa142d3f63786bf40L,
+ 0xeb5cbc26b67039fcL } },
+ /* 20 << 182 */
+ { { 0x9cb0ae6c252bd479L,0x05e0f88a12b5848fL,0x78f6d2b2a5c97663L,
+ 0x6f6e149bc162225cL },
+ { 0xe602235cde601a89L,0xd17bbe98f373be1fL,0xcaf49a5ba8471827L,
+ 0x7e1a0a8518aaa116L } },
+ /* 21 << 182 */
+ { { 0x6c833196270580c3L,0x1e233839f1c98a14L,0x67b2f7b4ae34e0a5L,
+ 0x47ac8745d8ce7289L },
+ { 0x2b74779a100dd467L,0x274a43374ee50d09L,0x603dcf1383608bc9L,
+ 0xcd9da6c3c89e8388L } },
+ /* 22 << 182 */
+ { { 0x2660199f355116acL,0xcc38bb59b6d18eedL,0x3075f31f2f4bc071L,
+ 0x9774457f265dc57eL },
+ { 0x06a6a9c8c6db88bbL,0x6429d07f4ec98e04L,0x8d05e57b05ecaa8bL,
+ 0x20f140b17872ea7bL } },
+ /* 23 << 182 */
+ { { 0xdf8c0f09ca494693L,0x48d3a020f252e909L,0x4c5c29af57b14b12L,
+ 0x7e6fa37dbf47ad1cL },
+ { 0x66e7b50649a0c938L,0xb72c0d486be5f41fL,0x6a6242b8b2359412L,
+ 0xcd35c7748e859480L } },
+ /* 24 << 182 */
+ { { 0x12536fea87baa627L,0x58c1fec1f72aa680L,0x6c29b637601e5dc9L,
+ 0x9e3c3c1cde9e01b9L },
+ { 0xefc8127b2bcfe0b0L,0x351071022a12f50dL,0x6ccd6cb14879b397L,
+ 0xf792f804f8a82f21L } },
+ /* 25 << 182 */
+ { { 0x509d4804a9b46402L,0xedddf85dc10f0850L,0x928410dc4b6208aaL,
+ 0xf6229c46391012dcL },
+ { 0xc5a7c41e7727b9b6L,0x289e4e4baa444842L,0x049ba1d9e9a947eaL,
+ 0x44f9e47f83c8debcL } },
+ /* 26 << 182 */
+ { { 0xfa77a1fe611f8b8eL,0xfd2e416af518f427L,0xc5fffa70114ebac3L,
+ 0xfe57c4e95d89697bL },
+ { 0xfdd053acb1aaf613L,0x31df210fea585a45L,0x318cc10e24985034L,
+ 0x1a38efd15f1d6130L } },
+ /* 27 << 182 */
+ { { 0xbf86f2370b1e9e21L,0xb258514d1dbe88aaL,0x1e38a58890c1baf9L,
+ 0x2936a01ebdb9b692L },
+ { 0xd576de986dd5b20cL,0xb586bf7170f98ecfL,0xcccf0f12c42d2fd7L,
+ 0x8717e61cfb35bd7bL } },
+ /* 28 << 182 */
+ { { 0x8b1e572235e6fc06L,0x3477728f0b3e13d5L,0x150c294daa8a7372L,
+ 0xc0291d433bfa528aL },
+ { 0xc6c8bc67cec5a196L,0xdeeb31e45c2e8a7cL,0xba93e244fb6e1c51L,
+ 0xb9f8b71b2e28e156L } },
+ /* 29 << 182 */
+ { { 0xce65a287968a2ab9L,0xe3c5ce6946bbcb1fL,0xf8c835b9e7ae3f30L,
+ 0x16bbee26ff72b82bL },
+ { 0x665e2017fd42cd22L,0x1e139970f8b1d2a0L,0x125cda2979204932L,
+ 0x7aee94a549c3bee5L } },
+ /* 30 << 182 */
+ { { 0x68c7016089821a66L,0xf7c376788f981669L,0xd90829fc48cc3645L,
+ 0x346af049d70addfcL },
+ { 0x2057b232370bf29cL,0xf90c73ce42e650eeL,0xe03386eaa126ab90L,
+ 0x0e266e7e975a087bL } },
+ /* 31 << 182 */
+ { { 0x80578eb90fca65d9L,0x7e2989ea16af45b8L,0x7438212dcac75a4eL,
+ 0x38c7ca394fef36b8L },
+ { 0x8650c494d402676aL,0x26ab5a66f72c7c48L,0x4e6cb426ce3a464eL,
+ 0xf8f998962b72f841L } },
+ /* 32 << 182 */
+ { { 0x8c3184911a335cc8L,0x563459ba6a5913e4L,0x1b920d61c7b32919L,
+ 0x805ab8b6a02425adL },
+ { 0x2ac512da8d006086L,0x6ca4846abcf5c0fdL,0xafea51d8ac2138d7L,
+ 0xcb647545344cd443L } },
+ /* 33 << 182 */
+ { { 0x0429ee8fbd7d9040L,0xee66a2de819b9c96L,0x54f9ec25dea7d744L,
+ 0x2ffea642671721bbL },
+ { 0x4f19dbd1114344eaL,0x04304536fd0dbc8bL,0x014b50aa29ec7f91L,
+ 0xb5fc22febb06014dL } },
+ /* 34 << 182 */
+ { { 0x60d963a91ee682e0L,0xdf48abc0fe85c727L,0x0cadba132e707c2dL,
+ 0xde608d3aa645aeffL },
+ { 0x05f1c28bedafd883L,0x3c362edebd94de1fL,0x8dd0629d13593e41L,
+ 0x0a5e736f766d6eafL } },
+ /* 35 << 182 */
+ { { 0xbfa92311f68cf9d1L,0xa4f9ef87c1797556L,0x10d75a1f5601c209L,
+ 0x651c374c09b07361L },
+ { 0x49950b5888b5ceadL,0x0ef000586fa9dbaaL,0xf51ddc264e15f33aL,
+ 0x1f8b5ca62ef46140L } },
+ /* 36 << 182 */
+ { { 0x343ac0a3ee9523f0L,0xbb75eab2975ea978L,0x1bccf332107387f4L,
+ 0x790f92599ab0062eL },
+ { 0xf1a363ad1e4f6a5fL,0x06e08b8462519a50L,0x609151877265f1eeL,
+ 0x6a80ca3493ae985eL } },
+ /* 37 << 182 */
+ { { 0x81b29768aaba4864L,0xb13cabf28d52a7d6L,0xb5c363488ead03f1L,
+ 0xc932ad9581c7c1c0L },
+ { 0x5452708ecae1e27bL,0x9dac42691b0df648L,0x233e3f0cdfcdb8bcL,
+ 0xe6ceccdfec540174L } },
+ /* 38 << 182 */
+ { { 0xbd0d845e95081181L,0xcc8a7920699355d5L,0x111c0f6dc3b375a8L,
+ 0xfd95bc6bfd51e0dcL },
+ { 0x4a106a266888523aL,0x4d142bd6cb01a06dL,0x79bfd289adb9b397L,
+ 0x0bdbfb94e9863914L } },
+ /* 39 << 182 */
+ { { 0x29d8a2291660f6a6L,0x7f6abcd6551c042dL,0x13039deb0ac3ffe8L,
+ 0xa01be628ec8523fbL },
+ { 0x6ea341030ca1c328L,0xc74114bdb903928eL,0x8aa4ff4e9e9144b0L,
+ 0x7064091f7f9a4b17L } },
+ /* 40 << 182 */
+ { { 0xa3f4f521e447f2c4L,0x81b8da7a604291f0L,0xd680bc467d5926deL,
+ 0x84f21fd534a1202fL },
+ { 0x1d1e31814e9df3d8L,0x1ca4861a39ab8d34L,0x809ddeec5b19aa4aL,
+ 0x59f72f7e4d329366L } },
+ /* 41 << 182 */
+ { { 0xa2f93f41386d5087L,0x40bf739cdd67d64fL,0xb449420566702158L,
+ 0xc33c65be73b1e178L },
+ { 0xcdcd657c38ca6153L,0x97f4519adc791976L,0xcc7c7f29cd6e1f39L,
+ 0x38de9cfb7e3c3932L } },
+ /* 42 << 182 */
+ { { 0xe448eba37b793f85L,0xe9f8dbf9f067e914L,0xc0390266f114ae87L,
+ 0x39ed75a7cd6a8e2aL },
+ { 0xadb148487ffba390L,0x67f8cb8b6af9bc09L,0x322c38489c7476dbL,
+ 0xa320fecf52a538d6L } },
+ /* 43 << 182 */
+ { { 0xe0493002b2aced2bL,0xdfba1809616bd430L,0x531c4644c331be70L,
+ 0xbc04d32e90d2e450L },
+ { 0x1805a0d10f9f142dL,0x2c44a0c547ee5a23L,0x31875a433989b4e3L,
+ 0x6b1949fd0c063481L } },
+ /* 44 << 182 */
+ { { 0x2dfb9e08be0f4492L,0x3ff0da03e9d5e517L,0x03dbe9a1f79466a8L,
+ 0x0b87bcd015ea9932L },
+ { 0xeb64fc83ab1f58abL,0x6d9598da817edc8aL,0x699cff661d3b67e5L,
+ 0x645c0f2992635853L } },
+ /* 45 << 182 */
+ { { 0x253cdd82eabaf21cL,0x82b9602a2241659eL,0x2cae07ec2d9f7091L,
+ 0xbe4c720c8b48cd9bL },
+ { 0x6ce5bc036f08d6c9L,0x36e8a997af10bf40L,0x83422d213e10ff12L,
+ 0x7b26d3ebbcc12494L } },
+ /* 46 << 182 */
+ { { 0xb240d2d0c9469ad6L,0xc4a11b4d30afa05bL,0x4b604acedd6ba286L,
+ 0x184866003ee2864cL },
+ { 0x5869d6ba8d9ce5beL,0x0d8f68c5ff4bfb0dL,0xb69f210b5700cf73L,
+ 0x61f6653a6d37c135L } },
+ /* 47 << 182 */
+ { { 0xff3d432b5aff5a48L,0x0d81c4b972ba3a69L,0xee879ae9fa1899efL,
+ 0xbac7e2a02d6acafdL },
+ { 0xd6d93f6c1c664399L,0x4c288de15bcb135dL,0x83031dab9dab7cbfL,
+ 0xfe23feb03abbf5f0L } },
+ /* 48 << 182 */
+ { { 0x9f1b2466cdedca85L,0x140bb7101a09538cL,0xac8ae8515e11115dL,
+ 0x0d63ff676f03f59eL },
+ { 0x755e55517d234afbL,0x61c2db4e7e208fc1L,0xaa9859cef28a4b5dL,
+ 0xbdd6d4fc34af030fL } },
+ /* 49 << 182 */
+ { { 0xd1c4a26d3be01cb1L,0x9ba14ffc243aa07cL,0xf95cd3a9b2503502L,
+ 0xe379bc067d2a93abL },
+ { 0x3efc18e9d4ca8d68L,0x083558ec80bb412aL,0xd903b9409645a968L,
+ 0xa499f0b69ba6054fL } },
+ /* 50 << 182 */
+ { { 0x208b573cb8349abeL,0x3baab3e530b4fc1cL,0x87e978bacb524990L,
+ 0x3524194eccdf0e80L },
+ { 0x627117257d4bcc42L,0xe90a3d9bb90109baL,0x3b1bdd571323e1e0L,
+ 0xb78e9bd55eae1599L } },
+ /* 51 << 182 */
+ { { 0x0794b7469e03d278L,0x80178605d70e6297L,0x171792f899c97855L,
+ 0x11b393eef5a86b5cL },
+ { 0x48ef6582d8884f27L,0xbd44737abf19ba5fL,0x8698de4ca42062c6L,
+ 0x8975eb8061ce9c54L } },
+ /* 52 << 182 */
+ { { 0xd50e57c7d7fe71f3L,0x15342190bc97ce38L,0x51bda2de4df07b63L,
+ 0xba12aeae200eb87dL },
+ { 0xabe135d2a9b4f8f6L,0x04619d65fad6d99cL,0x4a6683a77994937cL,
+ 0x7a778c8b6f94f09aL } },
+ /* 53 << 182 */
+ { { 0x8c50862320a71b89L,0x241a2aed1c229165L,0x352be595aaf83a99L,
+ 0x9fbfee7f1562bac8L },
+ { 0xeaf658b95c4017e3L,0x1dc7f9e015120b86L,0xd84f13dd4c034d6fL,
+ 0x283dd737eaea3038L } },
+ /* 54 << 182 */
+ { { 0x197f2609cd85d6a2L,0x6ebbc345fae60177L,0xb80f031b4e12fedeL,
+ 0xde55d0c207a2186bL },
+ { 0x1fb3e37f24dcdd5aL,0x8d602da57ed191fbL,0x108fb05676023e0dL,
+ 0x70178c71459c20c0L } },
+ /* 55 << 182 */
+ { { 0xfad5a3863fe54cf0L,0xa4a3ec4f02bbb475L,0x1aa5ec20919d94d7L,
+ 0x5d3b63b5a81e4ab3L },
+ { 0x7fa733d85ad3d2afL,0xfbc586ddd1ac7a37L,0x282925de40779614L,
+ 0xfe0ffffbe74a242aL } },
+ /* 56 << 182 */
+ { { 0x3f39e67f906151e5L,0xcea27f5f55e10649L,0xdca1d4e1c17cf7b7L,
+ 0x0c326d122fe2362dL },
+ { 0x05f7ac337dd35df3L,0x0c3b7639c396dbdfL,0x0912f5ac03b7db1cL,
+ 0x9dea4b705c9ed4a9L } },
+ /* 57 << 182 */
+ { { 0x475e6e53aae3f639L,0xfaba0e7cfc278bacL,0x16f9e2219490375fL,
+ 0xaebf9746a5a7ed0aL },
+ { 0x45f9af3ff41ad5d6L,0x03c4623cb2e99224L,0x82c5bb5cb3cf56aaL,
+ 0x6431181934567ed3L } },
+ /* 58 << 182 */
+ { { 0xec57f2118be489acL,0x2821895db9a1104bL,0x610dc8756064e007L,
+ 0x8e526f3f5b20d0feL },
+ { 0x6e71ca775b645aeeL,0x3d1dcb9f800e10ffL,0x36b51162189cf6deL,
+ 0x2c5a3e306bb17353L } },
+ /* 59 << 182 */
+ { { 0xc186cd3e2a6c6fbfL,0xa74516fa4bf97906L,0x5b4b8f4b279d6901L,
+ 0x0c4e57b42b573743L },
+ { 0x75fdb229b6e386b6L,0xb46793fd99deac27L,0xeeec47eacf712629L,
+ 0xe965f3c4cbc3b2ddL } },
+ /* 60 << 182 */
+ { { 0x8dd1fb83425c6559L,0x7fc00ee60af06fdaL,0xe98c922533d956dfL,
+ 0x0f1ef3354fbdc8a2L },
+ { 0x2abb5145b79b8ea2L,0x40fd2945bdbff288L,0x6a814ac4d7185db7L,
+ 0xc4329d6fc084609aL } },
+ /* 61 << 182 */
+ { { 0xc9ba7b52ed1be45dL,0x891dd20de4cd2c74L,0x5a4d4a7f824139b1L,
+ 0x66c17716b873c710L },
+ { 0x5e5bc1412843c4e0L,0xd5ac4817b97eb5bfL,0xc0f8af54450c95c7L,
+ 0xc91b3fa0318406c5L } },
+ /* 62 << 182 */
+ { { 0x360c340aab9d97f8L,0xfb57bd0790a2d611L,0x4339ae3ca6a6f7e5L,
+ 0x9c1fcd2a2feb8a10L },
+ { 0x972bcca9c7ea7432L,0x1b0b924c308076f6L,0x80b2814a2a5b4ca5L,
+ 0x2f78f55b61ef3b29L } },
+ /* 63 << 182 */
+ { { 0xf838744ac18a414fL,0xc611eaae903d0a86L,0x94dabc162a453f55L,
+ 0xe6f2e3da14efb279L },
+ { 0x5b7a60179320dc3cL,0x692e382f8df6b5a4L,0x3f5e15e02d40fa90L,
+ 0xc87883ae643dd318L } },
+ /* 64 << 182 */
+ { { 0x511053e453544774L,0x834d0ecc3adba2bcL,0x4215d7f7bae371f5L,
+ 0xfcfd57bf6c8663bcL },
+ { 0xded2383dd6901b1dL,0x3b49fbb4b5587dc3L,0xfd44a08d07625f62L,
+ 0x3ee4d65b9de9b762L } },
+ /* 0 << 189 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 189 */
+ { { 0x64e5137d0d63d1faL,0x658fc05202a9d89fL,0x4889487450436309L,
+ 0xe9ae30f8d598da61L },
+ { 0x2ed710d1818baf91L,0xe27e9e068b6a0c20L,0x1e28dcfb1c1a6b44L,
+ 0x883acb64d6ac57dcL } },
+ /* 2 << 189 */
+ { { 0x8735728dc2c6ff70L,0x79d6122fc5dc2235L,0x23f5d00319e277f9L,
+ 0x7ee84e25dded8cc7L },
+ { 0x91a8afb063cd880aL,0x3f3ea7c63574af60L,0x0cfcdc8402de7f42L,
+ 0x62d0792fb31aa152L } },
+ /* 3 << 189 */
+ { { 0x8e1b4e438a5807ceL,0xad283893e4109a7eL,0xc30cc9cbafd59ddaL,
+ 0xf65f36c63d8d8093L },
+ { 0xdf31469ea60d32b2L,0xee93df4b3e8191c8L,0x9c1017c5355bdeb5L,
+ 0xd26231858616aa28L } },
+ /* 4 << 189 */
+ { { 0xb02c83f9dec31a21L,0x988c8b236ad9d573L,0x53e983aea57be365L,
+ 0xe968734d646f834eL },
+ { 0x9137ea8f5da6309bL,0x10f3a624c1f1ce16L,0x782a9ea2ca440921L,
+ 0xdf94739e5b46f1b5L } },
+ /* 5 << 189 */
+ { { 0x9f9be006cce85c9bL,0x360e70d6a4c7c2d3L,0x2cd5beeaaefa1e60L,
+ 0x64cf63c08c3d2b6dL },
+ { 0xfb107fa3e1cf6f90L,0xb7e937c6d5e044e6L,0x74e8ca78ce34db9fL,
+ 0x4f8b36c13e210bd0L } },
+ /* 6 << 189 */
+ { { 0x1df165a434a35ea8L,0x3418e0f74d4412f6L,0x5af1f8af518836c3L,
+ 0x42ceef4d130e1965L },
+ { 0x5560ca0b543a1957L,0xc33761e5886cb123L,0x66624b1ffe98ed30L,
+ 0xf772f4bf1090997dL } },
+ /* 7 << 189 */
+ { { 0xf4e540bb4885d410L,0x7287f8109ba5f8d7L,0x22d0d865de98dfb1L,
+ 0x49ff51a1bcfbb8a3L },
+ { 0xb6b6fa536bc3012eL,0x3d31fd72170d541dL,0x8018724f4b0f4966L,
+ 0x79e7399f87dbde07L } },
+ /* 8 << 189 */
+ { { 0x56f8410ef4f8b16aL,0x97241afec47b266aL,0x0a406b8e6d9c87c1L,
+ 0x803f3e02cd42ab1bL },
+ { 0x7f0309a804dbec69L,0xa83b85f73bbad05fL,0xc6097273ad8e197fL,
+ 0xc097440e5067adc1L } },
+ /* 9 << 189 */
+ { { 0x730eafb63524ff16L,0xd7f9b51e823fc6ceL,0x27bd0d32443e4ac0L,
+ 0x40c59ad94d66f217L },
+ { 0x6c33136f17c387a4L,0x5043b8d5eb86804dL,0x74970312675a73c9L,
+ 0x838fdb31f16669b6L } },
+ /* 10 << 189 */
+ { { 0xc507b6dd418e7dddL,0x39888d93472f19d6L,0x7eae26be0c27eb4dL,
+ 0x17b53ed3fbabb884L },
+ { 0xfc27021b2b01ae4fL,0x88462e87cf488682L,0xbee096ec215e2d87L,
+ 0xeb2fea9ad242e29bL } },
+ /* 11 << 189 */
+ { { 0x5d985b5fb821fc28L,0x89d2e197dc1e2ad2L,0x55b566b89030ba62L,
+ 0xe3fd41b54f41b1c6L },
+ { 0xb738ac2eb9a96d61L,0x7f8567ca369443f4L,0x8698622df803a440L,
+ 0x2b5862368fe2f4dcL } },
+ /* 12 << 189 */
+ { { 0xbbcc00c756b95bceL,0x5ec03906616da680L,0x79162ee672214252L,
+ 0x43132b6386a892d2L },
+ { 0x4bdd3ff22f3263bfL,0xd5b3733c9cd0a142L,0x592eaa8244415ccbL,
+ 0x663e89248d5474eaL } },
+ /* 13 << 189 */
+ { { 0x8058a25e5236344eL,0x82e8df9dbda76ee6L,0xdcf6efd811cc3d22L,
+ 0x00089cda3b4ab529L },
+ { 0x91d3a071bd38a3dbL,0x4ea97fc0ef72b925L,0x0c9fc15bea3edf75L,
+ 0x5a6297cda4348ed3L } },
+ /* 14 << 189 */
+ { { 0x0d38ab35ce7c42d4L,0x9fd493ef82feab10L,0x46056b6d82111b45L,
+ 0xda11dae173efc5c3L },
+ { 0xdc7402785545a7fbL,0xbdb2601c40d507e6L,0x121dfeeb7066fa58L,
+ 0x214369a839ae8c2aL } },
+ /* 15 << 189 */
+ { { 0x195709cb06e0956cL,0x4c9d254f010cd34bL,0xf51e13f70471a532L,
+ 0xe19d67911e73054dL },
+ { 0xf702a628db5c7be3L,0xc7141218b24dde05L,0xdc18233cf29b2e2eL,
+ 0x3a6bd1e885342dbaL } },
+ /* 16 << 189 */
+ { { 0x3f747fa0b311898cL,0xe2a272e4cd0eac65L,0x4bba5851f914d0bcL,
+ 0x7a1a9660c4a43ee3L },
+ { 0xe5a367cea1c8cde9L,0x9d958ba97271abe3L,0xf3ff7eb63d1615cdL,
+ 0xa2280dcef5ae20b0L } },
+ /* 17 << 189 */
+ { { 0x56dba5c1cf640147L,0xea5a2e3d5e83d118L,0x04cd6b6dda24c511L,
+ 0x1c0f4671e854d214L },
+ { 0x91a6b7a969565381L,0xdc966240decf1f5bL,0x1b22d21cfcf5d009L,
+ 0x2a05f6419021dbd5L } },
+ /* 18 << 189 */
+ { { 0x8c0ed566d4312483L,0x5179a95d643e216fL,0xcc185fec17044493L,
+ 0xb306333954991a21L },
+ { 0xd801ecdb0081a726L,0x0149b0c64fa89bbbL,0xafe9065a4391b6b9L,
+ 0xedc92786d633f3a3L } },
+ /* 19 << 189 */
+ { { 0xe408c24aae6a8e13L,0x85833fde9f3897abL,0x43800e7ed81a0715L,
+ 0xde08e346b44ffc5fL },
+ { 0x7094184ccdeff2e0L,0x49f9387b165eaed1L,0x635d6129777c468aL,
+ 0x8c0dcfd1538c2dd8L } },
+ /* 20 << 189 */
+ { { 0xd6d9d9e37a6a308bL,0x623758304c2767d3L,0x874a8bc6f38cbeb6L,
+ 0xd94d3f1accb6fd9eL },
+ { 0x92a9735bba21f248L,0x272ad0e56cd1efb0L,0x7437b69c05b03284L,
+ 0xe7f047026948c225L } },
+ /* 21 << 189 */
+ { { 0x8a56c04acba2ececL,0x0c181270e3a73e41L,0x6cb34e9d03e93725L,
+ 0xf77c8713496521a9L },
+ { 0x94569183fa7f9f90L,0xf2e7aa4c8c9707adL,0xced2c9ba26c1c9a3L,
+ 0x9109fe9640197507L } },
+ /* 22 << 189 */
+ { { 0x9ae868a9e9adfe1cL,0x3984403d314e39bbL,0xb5875720f2fe378fL,
+ 0x33f901e0ba44a628L },
+ { 0xea1125fe3652438cL,0xae9ec4e69dd1f20bL,0x1e740d9ebebf7fbdL,
+ 0x6dbd3ddc42dbe79cL } },
+ /* 23 << 189 */
+ { { 0x62082aecedd36776L,0xf612c478e9859039L,0xa493b201032f7065L,
+ 0xebd4d8f24ff9b211L },
+ { 0x3f23a0aaaac4cb32L,0xea3aadb715ed4005L,0xacf17ea4afa27e63L,
+ 0x56125c1ac11fd66cL } },
+ /* 24 << 189 */
+ { { 0x266344a43794f8dcL,0xdcca923a483c5c36L,0x2d6b6bbf3f9d10a0L,
+ 0xb320c5ca81d9bdf3L },
+ { 0x620e28ff47b50a95L,0x933e3b01cef03371L,0xf081bf8599100153L,
+ 0x183be9a0c3a8c8d6L } },
+ /* 25 << 189 */
+ { { 0x4e3ddc5ad6bbe24dL,0xc6c7463053843795L,0x78193dd765ec2d4cL,
+ 0xb8df26cccd3c89b2L },
+ { 0x98dbe3995a483f8dL,0x72d8a9577dd3313aL,0x65087294ab0bd375L,
+ 0xfcd892487c259d16L } },
+ /* 26 << 189 */
+ { { 0x8a9443d77613aa81L,0x8010080085fe6584L,0x70fc4dbc7fb10288L,
+ 0xf58280d3e86beee8L },
+ { 0x14fdd82f7c978c38L,0xdf1204c10de44d7bL,0xa08a1c844160252fL,
+ 0x591554cac17646a5L } },
+ /* 27 << 189 */
+ { { 0x214a37d6a05bd525L,0x48d5f09b07957b3cL,0x0247cdcbd7109bc9L,
+ 0x40f9e4bb30599ce7L },
+ { 0xc325fa03f46ad2ecL,0x00f766cfc3e3f9eeL,0xab556668d43a4577L,
+ 0x68d30a613ee03b93L } },
+ /* 28 << 189 */
+ { { 0x7ddc81ea77b46a08L,0xcf5a6477c7480699L,0x43a8cb346633f683L,
+ 0x1b867e6b92363c60L },
+ { 0x439211141f60558eL,0xcdbcdd632f41450eL,0x7fc04601cc630e8bL,
+ 0xea7c66d597038b43L } },
+ /* 29 << 189 */
+ { { 0x7259b8a504e99fd8L,0x98a8dd124785549aL,0x0e459a7c840552e1L,
+ 0xcdfcf4d04bb0909eL },
+ { 0x34a86db253758da7L,0xe643bb83eac997e1L,0x96400bd7530c5b7eL,
+ 0x9f97af87b41c8b52L } },
+ /* 30 << 189 */
+ { { 0x34fc8820fbeee3f9L,0x93e5349049091afdL,0x764b9be59a31f35cL,
+ 0x71f3786457e3d924L },
+ { 0x02fb34e0943aa75eL,0xa18c9c58ab8ff6e4L,0x080f31b133cf0d19L,
+ 0x5c9682db083518a7L } },
+ /* 31 << 189 */
+ { { 0x873d4ca6b709c3deL,0x64a842623575b8f0L,0x6275da1f020154bbL,
+ 0x97678caad17cf1abL },
+ { 0x8779795f951a95c3L,0xdd35b16350fccc08L,0x3270962733d8f031L,
+ 0x3c5ab10a498dd85cL } },
+ /* 32 << 189 */
+ { { 0xb6c185c341dca566L,0x7de7fedad8622aa3L,0x99e84d92901b6dfbL,
+ 0x30a02b0e7c4ad288L },
+ { 0xc7c81daa2fd3cf36L,0xd1319547df89e59fL,0xb2be8184cd496733L,
+ 0xd5f449eb93d3412bL } },
+ /* 33 << 189 */
+ { { 0x7ea41b1b25fe531dL,0xf97974326a1d5646L,0x86067f722bde501aL,
+ 0xf91481c00c85e89cL },
+ { 0xca8ee465f8b05bc6L,0x1844e1cf02e83cdaL,0xca82114ab4dbe33bL,
+ 0x0f9f87694eabfde2L } },
+ /* 34 << 189 */
+ { { 0x4936b1c038b27fe2L,0x63b6359baba402dfL,0x40c0ea2f656bdbabL,
+ 0x9c992a896580c39cL },
+ { 0x600e8f152a60aed1L,0xeb089ca4e0bf49dfL,0x9c233d7d2d42d99aL,
+ 0x648d3f954c6bc2faL } },
+ /* 35 << 189 */
+ { { 0xdcc383a8e1add3f3L,0xf42c0c6a4f64a348L,0x2abd176f0030dbdbL,
+ 0x4de501a37d6c215eL },
+ { 0x4a107c1f4b9a64bcL,0xa77f0ad32496cd59L,0xfb78ac627688dffbL,
+ 0x7025a2ca67937d8eL } },
+ /* 36 << 189 */
+ { { 0xfde8b2d1d1a8f4e7L,0xf5b3da477354927cL,0xe48606a3d9205735L,
+ 0xac477cc6e177b917L },
+ { 0xfb1f73d2a883239aL,0xe12572f6cc8b8357L,0x9d355e9cfb1f4f86L,
+ 0x89b795f8d9f3ec6eL } },
+ /* 37 << 189 */
+ { { 0x27be56f1b54398dcL,0x1890efd73fedeed5L,0x62f77f1f9c6d0140L,
+ 0x7ef0e314596f0ee4L },
+ { 0x50ca6631cc61dab3L,0x4a39801df4866e4fL,0x66c8d032ae363b39L,
+ 0x22c591e52ead66aaL } },
+ /* 38 << 189 */
+ { { 0x954ba308de02a53eL,0x2a6c060fd389f357L,0xe6cfcde8fbf40b66L,
+ 0x8e02fc56c6340ce1L },
+ { 0xe495779573adb4baL,0x7b86122ca7b03805L,0x63f835120c8e6fa6L,
+ 0x83660ea0057d7804L } },
+ /* 39 << 189 */
+ { { 0xbad7910521ba473cL,0xb6c50beeded5389dL,0xee2caf4daa7c9bc0L,
+ 0xd97b8de48c4e98a7L },
+ { 0xa9f63e70ab3bbddbL,0x3898aabf2597815aL,0x7659af89ac15b3d9L,
+ 0xedf7725b703ce784L } },
+ /* 40 << 189 */
+ { { 0x25470fabe085116bL,0x04a4337587285310L,0x4e39187ee2bfd52fL,
+ 0x36166b447d9ebc74L },
+ { 0x92ad433cfd4b322cL,0x726aa817ba79ab51L,0xf96eacd8c1db15ebL,
+ 0xfaf71e910476be63L } },
+ /* 41 << 189 */
+ { { 0xdd69a640641fad98L,0xb799591829622559L,0x03c6daa5de4199dcL,
+ 0x92cadc97ad545eb4L },
+ { 0x1028238b256534e4L,0x73e80ce68595409aL,0x690d4c66d05dc59bL,
+ 0xc95f7b8f981dee80L } },
+ /* 42 << 189 */
+ { { 0xf4337014d856ac25L,0x441bd9ddac524dcaL,0x640b3d855f0499f5L,
+ 0x39cf84a9d5fda182L },
+ { 0x04e7b055b2aa95a0L,0x29e33f0a0ddf1860L,0x082e74b5423f6b43L,
+ 0x217edeb90aaa2b0fL } },
+ /* 43 << 189 */
+ { { 0x58b83f3583cbea55L,0xc485ee4dbc185d70L,0x833ff03b1e5f6992L,
+ 0xb5b9b9cccf0c0dd5L },
+ { 0x7caaee8e4e9e8a50L,0x462e907b6269dafdL,0x6ed5cee9fbe791c6L,
+ 0x68ca3259ed430790L } },
+ /* 44 << 189 */
+ { { 0x2b72bdf213b5ba88L,0x60294c8a35ef0ac4L,0x9c3230ed19b99b08L,
+ 0x560fff176c2589aaL },
+ { 0x552b8487d6770374L,0xa373202d9a56f685L,0xd3e7f90745f175d9L,
+ 0x3c2f315fd080d810L } },
+ /* 45 << 189 */
+ { { 0x1130e9dd7b9520e8L,0xc078f9e20af037b5L,0x38cd2ec71e9c104cL,
+ 0x0f684368c472fe92L },
+ { 0xd3f1b5ed6247e7efL,0xb32d33a9396dfe21L,0x46f59cf44a9aa2c2L,
+ 0x69cd5168ff0f7e41L } },
+ /* 46 << 189 */
+ { { 0x3f59da0f4b3234daL,0xcf0b0235b4579ebeL,0x6d1cbb256d2476c7L,
+ 0x4f0837e69dc30f08L },
+ { 0x9a4075bb906f6e98L,0x253bb434c761e7d1L,0xde2e645f6e73af10L,
+ 0xb89a40600c5f131cL } },
+ /* 47 << 189 */
+ { { 0xd12840c5b8cc037fL,0x3d093a5b7405bb47L,0x6202c253206348b8L,
+ 0xbf5d57fcc55a3ca7L },
+ { 0x89f6c90c8c3bef48L,0x23ac76235a0a960aL,0xdfbd3d6b552b42abL,
+ 0x3ef22458132061f6L } },
+ /* 48 << 189 */
+ { { 0xd74e9bdac97e6516L,0x88779360c230f49eL,0xa6ec1de31e74ea49L,
+ 0x581dcee53fb645a2L },
+ { 0xbaef23918f483f14L,0x6d2dddfcd137d13bL,0x54cde50ed2743a42L,
+ 0x89a34fc5e4d97e67L } },
+ /* 49 << 189 */
+ { { 0x13f1f5b312e08ce5L,0xa80540b8a7f0b2caL,0x854bcf7701982805L,
+ 0xb8653ffd233bea04L },
+ { 0x8e7b878702b0b4c9L,0x2675261f9acb170aL,0x061a9d90930c14e5L,
+ 0xb59b30e0def0abeaL } },
+ /* 50 << 189 */
+ { { 0x1dc19ea60200ec7dL,0xb6f4a3f90bce132bL,0xb8d5de90f13e27e0L,
+ 0xbaee5ef01fade16fL },
+ { 0x6f406aaae4c6cf38L,0xab4cfe06d1369815L,0x0dcffe87efd550c6L,
+ 0x9d4f59c775ff7d39L } },
+ /* 51 << 189 */
+ { { 0xb02553b151deb6adL,0x812399a4b1877749L,0xce90f71fca6006e1L,
+ 0xc32363a6b02b6e77L },
+ { 0x02284fbedc36c64dL,0x86c81e31a7e1ae61L,0x2576c7e5b909d94aL,
+ 0x8b6f7d02818b2bb0L } },
+ /* 52 << 189 */
+ { { 0xeca3ed0756faa38aL,0xa3790e6c9305bb54L,0xd784eeda7bc73061L,
+ 0xbd56d3696dd50614L },
+ { 0xd6575949229a8aa9L,0xdcca8f474595ec28L,0x814305c106ab4fe6L,
+ 0xc8c3976824f43f16L } },
+ /* 53 << 189 */
+ { { 0xe2a45f36523f2b36L,0x995c6493920d93bbL,0xf8afdab790f1632bL,
+ 0x79ebbecd1c295954L },
+ { 0xc7bb3ddb79592f48L,0x67216a7b5f88e998L,0xd91f098bbc01193eL,
+ 0xf7d928a5b1db83fcL } },
+ /* 54 << 189 */
+ { { 0x55e38417e991f600L,0x2a91113e2981a934L,0xcbc9d64806b13bdeL,
+ 0xb011b6ac0755ff44L },
+ { 0x6f4cb518045ec613L,0x522d2d31c2f5930aL,0x5acae1af382e65deL,
+ 0x5764306727bc966fL } },
+ /* 55 << 189 */
+ { { 0x5e12705d1c7193f0L,0xf0f32f473be8858eL,0x785c3d7d96c6dfc7L,
+ 0xd75b4a20bf31795dL },
+ { 0x91acf17b342659d4L,0xe596ea3444f0378fL,0x4515708fce52129dL,
+ 0x17387e1e79f2f585L } },
+ /* 56 << 189 */
+ { { 0x72cfd2e949dee168L,0x1ae052233e2af239L,0x009e75be1d94066aL,
+ 0x6cca31c738abf413L },
+ { 0xb50bd61d9bc49908L,0x4a9b4a8cf5e2bc1eL,0xeb6cc5f7946f83acL,
+ 0x27da93fcebffab28L } },
+ /* 57 << 189 */
+ { { 0xea314c964821c8c5L,0x8de49deda83c15f4L,0x7a64cf207af33004L,
+ 0x45f1bfebc9627e10L },
+ { 0x878b062654b9df60L,0x5e4fdc3ca95c0b33L,0xe54a37cac2035d8eL,
+ 0x9087cda980f20b8cL } },
+ /* 58 << 189 */
+ { { 0x36f61c238319ade4L,0x766f287ade8cfdf8L,0x48821948346f3705L,
+ 0x49a7b85316e4f4a2L },
+ { 0xb9b3f8a75cedadfdL,0x8f5628158db2a815L,0xc0b7d55401f68f95L,
+ 0x12971e27688a208eL } },
+ /* 59 << 189 */
+ { { 0xc9f8b696d0ff34fcL,0x20824de21222718cL,0x7213cf9f0c95284dL,
+ 0xe2ad741bdc158240L },
+ { 0x0ee3a6df54043ccfL,0x16ff479bd84412b3L,0xf6c74ee0dfc98af0L,
+ 0xa78a169f52fcd2fbL } },
+ /* 60 << 189 */
+ { { 0xd8ae874699c930e9L,0x1d33e85849e117a5L,0x7581fcb46624759fL,
+ 0xde50644f5bedc01dL },
+ { 0xbeec5d00caf3155eL,0x672d66acbc73e75fL,0x86b9d8c6270b01dbL,
+ 0xd249ef8350f55b79L } },
+ /* 61 << 189 */
+ { { 0x6131d6d473978fe3L,0xcc4e4542754b00a1L,0x4e05df0557dfcfe9L,
+ 0x94b29cdd51ef6bf0L },
+ { 0xe4530cff9bc7edf2L,0x8ac236fdd3da65f3L,0x0faf7d5fc8eb0b48L,
+ 0x4d2de14c660eb039L } },
+ /* 62 << 189 */
+ { { 0xc006bba760430e54L,0x10a2d0d6da3289abL,0x9c037a5dd7979c59L,
+ 0x04d1f3d3a116d944L },
+ { 0x9ff224738a0983cdL,0x28e25b38c883cabbL,0xe968dba547a58995L,
+ 0x2c80b505774eebdfL } },
+ /* 63 << 189 */
+ { { 0xee763b714a953bebL,0x502e223f1642e7f6L,0x6fe4b64161d5e722L,
+ 0x9d37c5b0dbef5316L },
+ { 0x0115ed70f8330bc7L,0x139850e675a72789L,0x27d7faecffceccc2L,
+ 0x3016a8604fd9f7f6L } },
+ /* 64 << 189 */
+ { { 0xc492ec644cd8f64cL,0x58a2d790279d7b51L,0x0ced1fc51fc75256L,
+ 0x3e658aed8f433017L },
+ { 0x0b61942e05da59ebL,0xba3d60a30ddc3722L,0x7c311cd1742e7f87L,
+ 0x6473ffeef6b01b6eL } },
+ /* 0 << 196 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 196 */
+ { { 0x8303604f692ac542L,0xf079ffe1227b91d3L,0x19f63e6315aaf9bdL,
+ 0xf99ee565f1f344fbL },
+ { 0x8a1d661fd6219199L,0x8c883bc6d48ce41cL,0x1065118f3c74d904L,
+ 0x713889ee0faf8b1bL } },
+ /* 2 << 196 */
+ { { 0x972b3f8f81a1b3beL,0x4f3ce145ce2764a0L,0xe2d0f1cc28c4f5f7L,
+ 0xdeee0c0dc7f3985bL },
+ { 0x7df4adc0d39e25c3L,0x40619820c467a080L,0x440ebc9361cf5a58L,
+ 0x527729a6422ad600L } },
+ /* 3 << 196 */
+ { { 0xca6c0937b1b76ba6L,0x1a2eab854d2026dcL,0xb1715e1519d9ae0aL,
+ 0xf1ad9199bac4a026L },
+ { 0x35b3dfb807ea7b0eL,0xedf5496f3ed9eb89L,0x8932e5ff2d6d08abL,
+ 0xf314874e25bd2731L } },
+ /* 4 << 196 */
+ { { 0xefb26a753f73f449L,0x1d1c94f88d44fc79L,0x49f0fbc53bc0dc4dL,
+ 0xb747ea0b3698a0d0L },
+ { 0x5218c3fe228d291eL,0x35b804b543c129d6L,0xfac859b8d1acc516L,
+ 0x6c10697d95d6e668L } },
+ /* 5 << 196 */
+ { { 0xc38e438f0876fd4eL,0x45f0c30783d2f383L,0x203cc2ecb10934cbL,
+ 0x6a8f24392c9d46eeL },
+ { 0xf16b431b65ccde7bL,0x41e2cd1827e76a6fL,0xb9c8cf8f4e3484d7L,
+ 0x64426efd8315244aL } },
+ /* 6 << 196 */
+ { { 0x1c0a8e44fc94dea3L,0x34c8cdbfdad6a0b0L,0x919c384004113cefL,
+ 0xfd32fba415490ffaL },
+ { 0x58d190f6795dcfb7L,0xfef01b0383588bafL,0x9e6d1d63ca1fc1c0L,
+ 0x53173f96f0a41ac9L } },
+ /* 7 << 196 */
+ { { 0x2b1d402aba16f73bL,0x2fb310148cf9b9fcL,0x2d51e60e446ef7bfL,
+ 0xc731021bb91e1745L },
+ { 0x9d3b47244fee99d4L,0x4bca48b6fac5c1eaL,0x70f5f514bbea9af7L,
+ 0x751f55a5974c283aL } },
+ /* 8 << 196 */
+ { { 0x6e30251acb452fdbL,0x31ee696550f30650L,0xb0b3e508933548d9L,
+ 0xb8949a4ff4b0ef5bL },
+ { 0x208b83263c88f3bdL,0xab147c30db1d9989L,0xed6515fd44d4df03L,
+ 0x17a12f75e72eb0c5L } },
+ /* 9 << 196 */
+ { { 0x3b59796d36cf69dbL,0x1219eee956670c18L,0xfe3341f77a070d8eL,
+ 0x9b70130ba327f90cL },
+ { 0x36a324620ae18e0eL,0x2021a62346c0a638L,0x251b5817c62eb0d4L,
+ 0x87bfbcdf4c762293L } },
+ /* 10 << 196 */
+ { { 0xf78ab505cdd61d64L,0x8c7a53fcc8c18857L,0xa653ce6f16147515L,
+ 0x9c923aa5ea7d52d5L },
+ { 0xc24709cb5c18871fL,0x7d53bec873b3cc74L,0x59264afffdd1d4c4L,
+ 0x5555917e240da582L } },
+ /* 11 << 196 */
+ { { 0xcae8bbda548f5a0eL,0x1910eaba3bbfbbe1L,0xae5796857677afc3L,
+ 0x49ea61f173ff0b5cL },
+ { 0x786554784f7c3922L,0x95d337cd20c68eefL,0x68f1e1e5df779ab9L,
+ 0x14b491b0b5cf69a8L } },
+ /* 12 << 196 */
+ { { 0x7a6cbbe028e3fe89L,0xe7e1fee4c5aac0ebL,0x7f47eda5697e5140L,
+ 0x4f450137b454921fL },
+ { 0xdb625f8495cd8185L,0x74be0ba1cdb2e583L,0xaee4fd7cdd5e6de4L,
+ 0x4251437de8101739L } },
+ /* 13 << 196 */
+ { { 0x686d72a0ac620366L,0x4be3fb9cb6d59344L,0x6e8b44e7a1eb75b9L,
+ 0x84e39da391a5c10cL },
+ { 0x37cc1490b38f0409L,0x029519432c2ade82L,0x9b6887831190a2d8L,
+ 0x25627d14231182baL } },
+ /* 14 << 196 */
+ { { 0x6eb550aa658a6d87L,0x1405aaa7cf9c7325L,0xd147142e5c8748c9L,
+ 0x7f637e4f53ede0e0L },
+ { 0xf8ca277614ffad2cL,0xe58fb1bdbafb6791L,0x17158c23bf8f93fcL,
+ 0x7f15b3730a4a4655L } },
+ /* 15 << 196 */
+ { { 0x39d4add2d842ca72L,0xa71e43913ed96305L,0x5bb09cbe6700be14L,
+ 0x68d69d54d8befcf6L },
+ { 0xa45f536737183bcfL,0x7152b7bb3370dff7L,0xcf887baabf12525bL,
+ 0xe7ac7bddd6d1e3cdL } },
+ /* 16 << 196 */
+ { { 0x25914f7881fdad90L,0xcf638f560d2cf6abL,0xb90bc03fcc054de5L,
+ 0x932811a718b06350L },
+ { 0x2f00b3309bbd11ffL,0x76108a6fb4044974L,0x801bb9e0a851d266L,
+ 0x0dd099bebf8990c1L } },
+ /* 17 << 196 */
+ { { 0x58c5aaaaabe32986L,0x0fe9dd2a50d59c27L,0x84951ff48d307305L,
+ 0x6c23f82986529b78L },
+ { 0x50bb22180b136a79L,0x7e2174de77a20996L,0x6f00a4b9c0bb4da6L,
+ 0x89a25a17efdde8daL } },
+ /* 18 << 196 */
+ { { 0xf728a27ec11ee01dL,0xf900553ae5f10dfbL,0x189a83c802ec893cL,
+ 0x3ca5bdc123f66d77L },
+ { 0x9878153797eada9fL,0x59c50ab310256230L,0x346042d9323c69b3L,
+ 0x1b715a6d2c460449L } },
+ /* 19 << 196 */
+ { { 0xa41dd4766ae06e0bL,0xcdd7888e9d42e25fL,0x0f395f7456b25a20L,
+ 0xeadfe0ae8700e27eL },
+ { 0xb09d52a969950093L,0x3525d9cb327f8d40L,0xb8235a9467df886aL,
+ 0x77e4b0dd035faec2L } },
+ /* 20 << 196 */
+ { { 0x115eb20a517d7061L,0x77fe34336c2df683L,0x6870ddc7cdc6fc67L,
+ 0xb16105880b87de83L },
+ { 0x343584cad9c4ddbeL,0xb3164f1c3d754be2L,0x0731ed3ac1e6c894L,
+ 0x26327dec4f6b904cL } },
+ /* 21 << 196 */
+ { { 0x9d49c6de97b5cd32L,0x40835daeb5eceecdL,0xc66350edd9ded7feL,
+ 0x8aeebb5c7a678804L },
+ { 0x51d42fb75b8ee9ecL,0xd7a17bdd8e3ca118L,0x40d7511a2ef4400eL,
+ 0xc48990ac875a66f4L } },
+ /* 22 << 196 */
+ { { 0x8de07d2a2199e347L,0xbee755562a39e051L,0x56918786916e51dcL,
+ 0xeb1913134a2d89ecL },
+ { 0x6679610d37d341edL,0x434fbb4156d51c2bL,0xe54b7ee7d7492dbaL,
+ 0xaa33a79a59021493L } },
+ /* 23 << 196 */
+ { { 0x49fc5054e4bd6d3dL,0x09540f045ab551d0L,0x8acc90854942d3a6L,
+ 0x231af02f2d28323bL },
+ { 0x93458cac0992c163L,0x1fef8e71888e3bb4L,0x27578da5be8c268cL,
+ 0xcc8be792e805ec00L } },
+ /* 24 << 196 */
+ { { 0x29267baec61c3855L,0xebff429d58c1fd3bL,0x22d886c08c0b93b8L,
+ 0xca5e00b22ddb8953L },
+ { 0xcf330117c3fed8b7L,0xd49ac6fa819c01f6L,0x6ddaa6bd3c0fbd54L,
+ 0x917430688049a2cfL } },
+ /* 25 << 196 */
+ { { 0xd67f981eaff2ef81L,0xc3654d352818ae80L,0x81d050441b2aa892L,
+ 0x2db067bf3d099328L },
+ { 0xe7c79e86703dcc97L,0xe66f9b37e133e215L,0xcdf119a6e39a7a5cL,
+ 0x47c60de3876f1b61L } },
+ /* 26 << 196 */
+ { { 0x6e405939d860f1b2L,0x3e9a1dbcf5ed4d4aL,0x3f23619ec9b6bcbdL,
+ 0x5ee790cf734e4497L },
+ { 0xf0a834b15bdaf9bbL,0x02cedda74ca295f0L,0x4619aa2bcb8e378cL,
+ 0xe5613244cc987ea4L } },
+ /* 27 << 196 */
+ { { 0x0bc022cc76b23a50L,0x4a2793ad0a6c21ceL,0x3832878089cac3f5L,
+ 0x29176f1bcba26d56L },
+ { 0x062961874f6f59ebL,0x86e9bca98bdc658eL,0x2ca9c4d357e30402L,
+ 0x5438b216516a09bbL } },
+ /* 28 << 196 */
+ { { 0x0a6a063c7672765aL,0x37a3ce640547b9bfL,0x42c099c898b1a633L,
+ 0xb5ab800d05ee6961L },
+ { 0xf1963f5911a5acd6L,0xbaee615746201063L,0x36d9a649a596210aL,
+ 0xaed043631ba7138cL } },
+ /* 29 << 196 */
+ { { 0xcf817d1ca4a82b76L,0x5586960ef3806be9L,0x7ab67c8909dc6bb5L,
+ 0x52ace7a0114fe7ebL },
+ { 0xcd987618cbbc9b70L,0x4f06fd5a604ca5e1L,0x90af14ca6dbde133L,
+ 0x1afe4322948a3264L } },
+ /* 30 << 196 */
+ { { 0xa70d2ca6c44b2c6cL,0xab7267990ef87dfeL,0x310f64dc2e696377L,
+ 0x49b42e684c8126a0L },
+ { 0x0ea444c3cea0b176L,0x53a8ddf7cb269182L,0xf3e674ebbbba9dcbL,
+ 0x0d2878a8d8669d33L } },
+ /* 31 << 196 */
+ { { 0x04b935d5d019b6a3L,0xbb5cf88e406f1e46L,0xa1912d165b57c111L,
+ 0x9803fc2119ebfd78L },
+ { 0x4f231c9ec07764a9L,0xd93286eeb75bd055L,0x83a9457d8ee6c9deL,
+ 0x046959156087ec90L } },
+ /* 32 << 196 */
+ { { 0x14c6dd8a58d6cd46L,0x9cb633b58e6634d2L,0xc1305047f81bc328L,
+ 0x12ede0e226a177e5L },
+ { 0x332cca62065a6f4fL,0xc3a47ecd67be487bL,0x741eb1870f47ed1cL,
+ 0x99e66e58e7598b14L } },
+ /* 33 << 196 */
+ { { 0x6f0544ca63d0ff12L,0xe5efc784b610a05fL,0xf72917b17cad7b47L,
+ 0x3ff6ea20f2cac0c0L },
+ { 0xcc23791bf21db8b7L,0x7dac70b1d7d93565L,0x682cda1d694bdaadL,
+ 0xeb88bb8c1023516dL } },
+ /* 34 << 196 */
+ { { 0xc4c634b4dfdbeb1bL,0x22f5ca72b4ee4deaL,0x1045a368e6524821L,
+ 0xed9e8a3f052b18b2L },
+ { 0x9b7f2cb1b961f49aL,0x7fee2ec17b009670L,0x350d875422507a6dL,
+ 0x561bd7114db55f1dL } },
+ /* 35 << 196 */
+ { { 0x4c189ccc320bbcafL,0x568434cfdf1de48cL,0x6af1b00e0fa8f128L,
+ 0xf0ba9d028907583cL },
+ { 0x735a400432ff9f60L,0x3dd8e4b6c25dcf33L,0xf2230f1642c74cefL,
+ 0xd8117623013fa8adL } },
+ /* 36 << 196 */
+ { { 0x36822876f51fe76eL,0x8a6811cc11d62589L,0xc3fc7e6546225718L,
+ 0xb7df2c9fc82fdbcdL },
+ { 0x3b1d4e52dd7b205bL,0xb695947847a2e414L,0x05e4d793efa91148L,
+ 0xb47ed446fd2e9675L } },
+ /* 37 << 196 */
+ { { 0x1a7098b904c9d9bfL,0x661e28811b793048L,0xb1a16966b01ee461L,
+ 0xbc5213082954746fL },
+ { 0xc909a0fc2477de50L,0xd80bb41c7dbd51efL,0xa85be7ec53294905L,
+ 0x6d465b1883958f97L } },
+ /* 38 << 196 */
+ { { 0x16f6f330fb6840fdL,0xfaaeb2143401e6c8L,0xaf83d30fccb5b4f8L,
+ 0x22885739266dec4bL },
+ { 0x51b4367c7bc467dfL,0x926562e3d842d27aL,0xdfcb66140fea14a6L,
+ 0xeb394daef2734cd9L } },
+ /* 39 << 196 */
+ { { 0x3eeae5d211c0be98L,0xb1e6ed11814e8165L,0x191086bce52bce1cL,
+ 0x14b74cc6a75a04daL },
+ { 0x63cf11868c060985L,0x071047de2dbd7f7cL,0x4e433b8bce0942caL,
+ 0xecbac447d8fec61dL } },
+ /* 40 << 196 */
+ { { 0x8f0ed0e2ebf3232fL,0xfff80f9ec52a2eddL,0xad9ab43375b55fdbL,
+ 0x73ca7820e42e0c11L },
+ { 0x6dace0a0e6251b46L,0x89bc6b5c4c0d932dL,0x3438cd77095da19aL,
+ 0x2f24a9398d48bdfbL } },
+ /* 41 << 196 */
+ { { 0x99b47e46766561b7L,0x736600e60ed0322aL,0x06a47cb1638e1865L,
+ 0x927c1c2dcb136000L },
+ { 0x295423370cc5df69L,0x99b37c0209d649a9L,0xc5f0043c6aefdb27L,
+ 0x6cdd99871be95c27L } },
+ /* 42 << 196 */
+ { { 0x69850931390420d2L,0x299c40ac0983efa4L,0x3a05e778af39aeadL,
+ 0x8427440843a45193L },
+ { 0x6bcd0fb991a711a0L,0x461592c89f52ab17L,0xb49302b4da3c6ed6L,
+ 0xc51fddc7330d7067L } },
+ /* 43 << 196 */
+ { { 0x94babeb6da50d531L,0x521b840da6a7b9daL,0x5305151e404bdc89L,
+ 0x1bcde201d0d07449L },
+ { 0xf427a78b3b76a59aL,0xf84841ce07791a1bL,0xebd314bebf91ed1cL,
+ 0x8e61d34cbf172943L } },
+ /* 44 << 196 */
+ { { 0x1d5dc4515541b892L,0xb186ee41fc9d9e54L,0x9d9f345ed5bf610dL,
+ 0x3e7ba65df6acca9fL },
+ { 0x9dda787aa8369486L,0x09f9dab78eb5ba53L,0x5afb2033d6481bc3L,
+ 0x76f4ce30afa62104L } },
+ /* 45 << 196 */
+ { { 0xa8fa00cff4f066b5L,0x89ab5143461dafc2L,0x44339ed7a3389998L,
+ 0x2ff862f1bc214903L },
+ { 0x2c88f985b05556e3L,0xcd96058e3467081eL,0x7d6a4176edc637eaL,
+ 0xe1743d0936a5acdcL } },
+ /* 46 << 196 */
+ { { 0x66fd72e27eb37726L,0xf7fa264e1481a037L,0x9fbd3bde45f4aa79L,
+ 0xed1e0147767c3e22L },
+ { 0x7621f97982e7abe2L,0x19eedc7245f633f8L,0xe69b155e6137bf3aL,
+ 0xa0ad13ce414ee94eL } },
+ /* 47 << 196 */
+ { { 0x93e3d5241c0e651aL,0xab1a6e2a02ce227eL,0xe7af17974ab27ecaL,
+ 0x245446debd444f39L },
+ { 0x59e22a2156c07613L,0x43deafcef4275498L,0x10834ccb67fd0946L,
+ 0xa75841e547406edfL } },
+ /* 48 << 196 */
+ { { 0xebd6a6777b0ac93dL,0xa6e37b0d78f5e0d7L,0x2516c09676f5492bL,
+ 0x1e4bf8889ac05f3aL },
+ { 0xcdb42ce04df0ba2bL,0x935d5cfd5062341bL,0x8a30333382acac20L,
+ 0x429438c45198b00eL } },
+ /* 49 << 196 */
+ { { 0x1d083bc9049d33faL,0x58b82dda946f67ffL,0xac3e2db867a1d6a3L,
+ 0x62e6bead1798aac8L },
+ { 0xfc85980fde46c58cL,0xa7f6937969c8d7beL,0x23557927837b35ecL,
+ 0x06a933d8e0790c0cL } },
+ /* 50 << 196 */
+ { { 0x827c0e9b077ff55dL,0x53977798bb26e680L,0x595308741d9cb54fL,
+ 0xcca3f4494aac53efL },
+ { 0x11dc5c87a07eda0fL,0xc138bccffd6400c8L,0x549680d313e5da72L,
+ 0xc93eed824540617eL } },
+ /* 51 << 196 */
+ { { 0xfd3db1574d0b75c0L,0x9716eb426386075bL,0x0639605c817b2c16L,
+ 0x09915109f1e4f201L },
+ { 0x35c9a9285cca6c3bL,0xb25f7d1a3505c900L,0xeb9f7d20630480c4L,
+ 0xc3c7b8c62a1a501cL } },
+ /* 52 << 196 */
+ { { 0x3f99183c5a1f8e24L,0xfdb118fa9dd255f0L,0xb9b18b90c27f62a6L,
+ 0xe8f732f7396ec191L },
+ { 0x524a2d910be786abL,0x5d32adef0ac5a0f5L,0x9b53d4d69725f694L,
+ 0x032a76c60510ba89L } },
+ /* 53 << 196 */
+ { { 0x840391a3ebeb1544L,0x44b7b88c3ed73ac3L,0xd24bae7a256cb8b3L,
+ 0x7ceb151ae394cb12L },
+ { 0xbd6b66d05bc1e6a8L,0xec70cecb090f07bfL,0x270644ed7d937589L,
+ 0xee9e1a3d5f1dccfeL } },
+ /* 54 << 196 */
+ { { 0xb0d40a84745b98d2L,0xda429a212556ed40L,0xf676eced85148cb9L,
+ 0x5a22d40cded18936L },
+ { 0x3bc4b9e570e8a4ceL,0xbfd1445b9eae0379L,0xf23f2c0c1a0bd47eL,
+ 0xa9c0bb31e1845531L } },
+ /* 55 << 196 */
+ { { 0x9ddc4d600a4c3f6bL,0xbdfaad792c15ef44L,0xce55a2367f484accL,
+ 0x08653ca7055b1f15L },
+ { 0x2efa8724538873a3L,0x09299e5dace1c7e7L,0x07afab66ade332baL,
+ 0x9be1fdf692dd71b7L } },
+ /* 56 << 196 */
+ { { 0xa49b5d595758b11cL,0x0b852893c8654f40L,0xb63ef6f452379447L,
+ 0xd4957d29105e690cL },
+ { 0x7d484363646559b0L,0xf4a8273c49788a8eL,0xee406cb834ce54a9L,
+ 0x1e1c260ff86fda9bL } },
+ /* 57 << 196 */
+ { { 0xe150e228cf6a4a81L,0x1fa3b6a31b488772L,0x1e6ff110c5a9c15bL,
+ 0xc6133b918ad6aa47L },
+ { 0x8ac5d55c9dffa978L,0xba1d1c1d5f3965f2L,0xf969f4e07732b52fL,
+ 0xfceecdb5a5172a07L } },
+ /* 58 << 196 */
+ { { 0xb0120a5f10f2b8f5L,0xc83a6cdf5c4c2f63L,0x4d47a491f8f9c213L,
+ 0xd9e1cce5d3f1bbd5L },
+ { 0x0d91bc7caba7e372L,0xfcdc74c8dfd1a2dbL,0x05efa800374618e5L,
+ 0x1121696915a7925eL } },
+ /* 59 << 196 */
+ { { 0xd4c89823f6021c5dL,0x880d5e84eff14423L,0x6523bc5a6dcd1396L,
+ 0xd1acfdfc113c978bL },
+ { 0xb0c164e8bbb66840L,0xf7f4301e72b58459L,0xc29ad4a6a638e8ecL,
+ 0xf5ab896146b78699L } },
+ /* 60 << 196 */
+ { { 0x9dbd79740e954750L,0x0121de8864f9d2c6L,0x2e597b42d985232eL,
+ 0x55b6c3c553451777L },
+ { 0xbb53e547519cb9fbL,0xf134019f8428600dL,0x5a473176e081791aL,
+ 0x2f3e226335fb0c08L } },
+ /* 61 << 196 */
+ { { 0xb28c301773d273b0L,0xccd210767721ef9aL,0x054cc292b650dc39L,
+ 0x662246de6188045eL },
+ { 0x904b52fa6b83c0d1L,0xa72df26797e9cd46L,0x886b43cd899725e4L,
+ 0x2b651688d849ff22L } },
+ /* 62 << 196 */
+ { { 0x60479b7902f34533L,0x5e354c140c77c148L,0xb4bb7581a8537c78L,
+ 0x188043d7efe1495fL },
+ { 0x9ba12f428c1d5026L,0x2e0c8a2693d4aaabL,0xbdba7b8baa57c450L,
+ 0x140c9ad69bbdafefL } },
+ /* 63 << 196 */
+ { { 0x2067aa4225ac0f18L,0xf7b1295b04d1fbf3L,0x14829111a4b04824L,
+ 0x2ce3f19233bd5e91L },
+ { 0x9c7a1d558f2e1b72L,0xfe932286302aa243L,0x497ca7b4d4be9554L,
+ 0xb8e821b8e0547a6eL } },
+ /* 64 << 196 */
+ { { 0xfb2838be67e573e0L,0x05891db94084c44bL,0x9131137396c1c2c5L,
+ 0x6aebfa3fd958444bL },
+ { 0xac9cdce9e56e55c1L,0x7148ced32caa46d0L,0x2e10c7efb61fe8ebL,
+ 0x9fd835daff97cf4dL } },
+ /* 0 << 203 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 203 */
+ { { 0xa36da109081e9387L,0xfb9780d78c935828L,0xd5940332e540b015L,
+ 0xc9d7b51be0f466faL },
+ { 0xfaadcd41d6d9f671L,0xba6c1e28b1a2ac17L,0x066a7833ed201e5fL,
+ 0x19d99719f90f462bL } },
+ /* 2 << 203 */
+ { { 0xf431f462060b5f61L,0xa56f46b47bd057c2L,0x348dca6c47e1bf65L,
+ 0x9a38783e41bcf1ffL },
+ { 0x7a5d33a9da710718L,0x5a7799872e0aeaf6L,0xca87314d2d29d187L,
+ 0xfa0edc3ec687d733L } },
+ /* 3 << 203 */
+ { { 0x9df336216a31e09bL,0xde89e44dc1350e35L,0x292148714ca0cf52L,
+ 0xdf3796720b88a538L },
+ { 0xc92a510a2591d61bL,0x79aa87d7585b447bL,0xf67db604e5287f77L,
+ 0x1697c8bf5efe7a80L } },
+ /* 4 << 203 */
+ { { 0x1c894849cb198ac7L,0xa884a93d0f264665L,0x2da964ef9b200678L,
+ 0x3c351b87009834e6L },
+ { 0xafb2ef9fe2c4b44bL,0x580f6c473326790cL,0xb84805210b02264aL,
+ 0x8ba6f9e242a194e2L } },
+ /* 5 << 203 */
+ { { 0xfc87975f8fb54738L,0x3516078827c3ead3L,0x834116d2b74a085aL,
+ 0x53c99a73a62fe996L },
+ { 0x87585be05b81c51bL,0x925bafa8be0852b7L,0x76a4fafda84d19a7L,
+ 0x39a45982585206d4L } },
+ /* 6 << 203 */
+ { { 0x499b6ab65eb03c0eL,0xf19b795472bc3fdeL,0xa86b5b9c6e3a80d2L,
+ 0xe43775086d42819fL },
+ { 0xc1663650bb3ee8a3L,0x75eb14fcb132075fL,0xa8ccc9067ad834f6L,
+ 0xea6a2474e6e92ffdL } },
+ /* 7 << 203 */
+ { { 0x9d72fd950f8d6758L,0xcb84e101408c07ddL,0xb9114bfda5e23221L,
+ 0x358b5fe2e94e742cL },
+ { 0x1c0577ec95f40e75L,0xf01554513d73f3d6L,0x9d55cd67bd1b9b66L,
+ 0x63e86e78af8d63c7L } },
+ /* 8 << 203 */
+ { { 0x39d934abd3c095f1L,0x04b261bee4b76d71L,0x1d2e6970e73e6984L,
+ 0x879fb23b5e5fcb11L },
+ { 0x11506c72dfd75490L,0x3a97d08561bcf1c1L,0x43201d82bf5e7007L,
+ 0x7f0ac52f798232a7L } },
+ /* 9 << 203 */
+ { { 0x2715cbc46eb564d4L,0x8d6c752c9e570e29L,0xf80247c89ef5fd5dL,
+ 0xc3c66b46d53eb514L },
+ { 0x9666b4010f87de56L,0xce62c06fc6c603b5L,0xae7b4c607e4fc942L,
+ 0x38ac0b77663a9c19L } },
+ /* 10 << 203 */
+ { { 0xcb4d20ee4b049136L,0x8b63bf12356a4613L,0x1221aef670e08128L,
+ 0xe62d8c514acb6b16L },
+ { 0x71f64a67379e7896L,0xb25237a2cafd7fa5L,0xf077bd983841ba6aL,
+ 0xc4ac02443cd16e7eL } },
+ /* 11 << 203 */
+ { { 0x548ba86921fea4caL,0xd36d0817f3dfdac1L,0x09d8d71ff4685fafL,
+ 0x8eff66bec52c459aL },
+ { 0x182faee70b57235eL,0xee3c39b10106712bL,0x5107331fc0fcdcb0L,
+ 0x669fb9dca51054baL } },
+ /* 12 << 203 */
+ { { 0xb25101fb319d7682L,0xb02931290a982feeL,0x51c1c9b90261b344L,
+ 0x0e008c5bbfd371faL },
+ { 0xd866dd1c0278ca33L,0x666f76a6e5aa53b1L,0xe5cfb7796013a2cfL,
+ 0x1d3a1aada3521836L } },
+ /* 13 << 203 */
+ { { 0xcedd253173faa485L,0xc8ee6c4fc0a76878L,0xddbccfc92a11667dL,
+ 0x1a418ea91c2f695aL },
+ { 0xdb11bd9251f73971L,0x3e4b3c82da2ed89fL,0x9a44f3f4e73e0319L,
+ 0xd1e3de0f303431afL } },
+ /* 14 << 203 */
+ { { 0x3c5604ff50f75f9cL,0x1d8eddf37e752b22L,0x0ef074dd3c9a1118L,
+ 0xd0ffc172ccb86d7bL },
+ { 0xabd1ece3037d90f2L,0xe3f307d66055856cL,0x422f93287e4c6dafL,
+ 0x902aac66334879a0L } },
+ /* 15 << 203 */
+ { { 0xb6a1e7bf94cdfadeL,0x6c97e1ed7fc6d634L,0x662ad24da2fb63f8L,
+ 0xf81be1b9a5928405L },
+ { 0x86d765e4d14b4206L,0xbecc2e0e8fa0db65L,0xa28838e0b17fc76cL,
+ 0xe49a602ae37cf24eL } },
+ /* 16 << 203 */
+ { { 0x76b4131a567193ecL,0xaf3c305ae5f6e70bL,0x9587bd39031eebddL,
+ 0x5709def871bbe831L },
+ { 0x570599830eb2b669L,0x4d80ce1b875b7029L,0x838a7da80364ac16L,
+ 0x2f431d23be1c83abL } },
+ /* 17 << 203 */
+ { { 0xe56812a6f9294dd3L,0xb448d01f9b4b0d77L,0xf3ae606104e8305cL,
+ 0x2bead64594d8c63eL },
+ { 0x0a85434d84fd8b07L,0x537b983ff7a9dee5L,0xedcc5f18ef55bd85L,
+ 0x2041af6221c6cf8bL } },
+ /* 18 << 203 */
+ { { 0x8e52874cb940c71eL,0x211935a9db5f4b3aL,0x94350492301b1dc3L,
+ 0x33d2646d29958620L },
+ { 0x16b0d64bef911404L,0x9d1f25ea9a3c5ef4L,0x20f200eb4a352c78L,
+ 0x43929f2c4bd0b428L } },
+ /* 19 << 203 */
+ { { 0xa5656667c7196e29L,0x7992c2f09391be48L,0xaaa97cbd9ee0cd6eL,
+ 0x51b0310c3dc8c9bfL },
+ { 0x237f8acfdd9f22cbL,0xbb1d81a1b585d584L,0x8d5d85f58c416388L,
+ 0x0d6e5a5a42fe474fL } },
+ /* 20 << 203 */
+ { { 0xe781276638235d4eL,0x1c62bd67496e3298L,0x8378660c3f175bc8L,
+ 0x4d04e18917afdd4dL },
+ { 0x32a8160185a8068cL,0xdb58e4e192b29a85L,0xe8a65b86c70d8a3bL,
+ 0x5f0e6f4e98a0403bL } },
+ /* 21 << 203 */
+ { { 0x0812968469ed2370L,0x34dc30bd0871ee26L,0x3a5ce9487c9c5b05L,
+ 0x7d487b8043a90c87L },
+ { 0x4089ba37dd0e7179L,0x45f80191b4041811L,0x1c3e105898747ba5L,
+ 0x98c4e13a6e1ae592L } },
+ /* 22 << 203 */
+ { { 0xd44636e6e82c9f9eL,0x711db87cc33a1043L,0x6f431263aa8aec05L,
+ 0x43ff120d2744a4aaL },
+ { 0xd3bd892fae77779bL,0xf0fe0cc98cdc9f82L,0xca5f7fe6f1c5b1bcL,
+ 0xcc63a68244929a72L } },
+ /* 23 << 203 */
+ { { 0xc7eaba0c09dbe19aL,0x2f3585ad6b5c73c2L,0x8ab8924b0ae50c30L,
+ 0x17fcd27a638b30baL },
+ { 0xaf414d3410b3d5a5L,0x09c107d22a9accf1L,0x15dac49f946a6242L,
+ 0xaec3df2ad707d642L } },
+ /* 24 << 203 */
+ { { 0x2c2492b73f894ae0L,0xf59df3e5b75f18ceL,0x7cb740d28f53cad0L,
+ 0x3eb585fbc4f01294L },
+ { 0x17da0c8632c7f717L,0xeb8c795baf943f4cL,0x4ee23fb5f67c51d2L,
+ 0xef18757568889949L } },
+ /* 25 << 203 */
+ { { 0xa6b4bdb20389168bL,0xc4ecd258ea577d03L,0x3a63782b55743082L,
+ 0x6f678f4cc72f08cdL },
+ { 0x553511cf65e58dd8L,0xd53b4e3ed402c0cdL,0x37de3e29a037c14cL,
+ 0x86b6c516c05712aaL } },
+ /* 26 << 203 */
+ { { 0x2834da3eb38dff6fL,0xbe012c52ea636be8L,0x292d238c61dd37f8L,
+ 0x0e54523f8f8142dbL },
+ { 0xe31eb436036a05d8L,0x83e3cdff1e93c0ffL,0x3fd2fe0f50821ddfL,
+ 0xc8e19b0dff9eb33bL } },
+ /* 27 << 203 */
+ { { 0xc8cc943fb569a5feL,0xad0090d4d4342d75L,0x82090b4bcaeca000L,
+ 0xca39687f1bd410ebL },
+ { 0xe7bb0df765959d77L,0x39d782189c964999L,0xd87f62e8b2415451L,
+ 0xe5efb774bed76108L } },
+ /* 28 << 203 */
+ { { 0x3ea011a4e822f0d0L,0xbc647ad15a8704f8L,0xbb315b3550c6820fL,
+ 0x863dec3db7e76becL },
+ { 0x01ff5d3af017bfc7L,0x20054439976b8229L,0x067fca370bbd0d3bL,
+ 0xf63dde647f5e3d0fL } },
+ /* 29 << 203 */
+ { { 0x22dbefb32a4c94e9L,0xafbff0fe96f8278aL,0x80aea0b13503793dL,
+ 0xb22380295f06cd29L },
+ { 0x65703e578ec3fecaL,0x06c38314393e7053L,0xa0b751eb7c6734c4L,
+ 0xd2e8a435c59f0f1eL } },
+ /* 30 << 203 */
+ { { 0x147d90525e9ca895L,0x2f4dd31e972072dfL,0xa16fda8ee6c6755cL,
+ 0xc66826ffcf196558L },
+ { 0x1f1a76a30cf43895L,0xa9d604e083c3097bL,0xe190830966390e0eL,
+ 0xa50bf753b3c85effL } },
+ /* 31 << 203 */
+ { { 0x0696bddef6a70251L,0x548b801b3c6ab16aL,0x37fcf704a4d08762L,
+ 0x090b3defdff76c4eL },
+ { 0x87e8cb8969cb9158L,0x44a90744995ece43L,0xf85395f40ad9fbf5L,
+ 0x49b0f6c54fb0c82dL } },
+ /* 32 << 203 */
+ { { 0x75d9bc15adf7cccfL,0x81a3e5d6dfa1e1b0L,0x8c39e444249bc17eL,
+ 0xf37dccb28ea7fd43L },
+ { 0xda654873907fba12L,0x35daa6da4a372904L,0x0564cfc66283a6c5L,
+ 0xd09fa4f64a9395bfL } },
+ /* 33 << 203 */
+ { { 0x688e9ec9aeb19a36L,0xd913f1cec7bfbfb4L,0x797b9a3c61c2faa6L,
+ 0x2f979bec6a0a9c12L },
+ { 0xb5969d0f359679ecL,0xebcf523d079b0460L,0xfd6b000810fab870L,
+ 0x3f2edcda9373a39cL } },
+ /* 34 << 203 */
+ { { 0x0d64f9a76f568431L,0xf848c27c02f8898cL,0xf418ade1260b5bd5L,
+ 0xc1f3e3236973dee8L },
+ { 0x46e9319c26c185ddL,0x6d85b7d8546f0ac4L,0x427965f2247f9d57L,
+ 0xb519b636b0035f48L } },
+ /* 35 << 203 */
+ { { 0x6b6163a9ab87d59cL,0xff9f58c339caaa11L,0x4ac39cde3177387bL,
+ 0x5f6557c2873e77f9L },
+ { 0x6750400636a83041L,0x9b1c96ca75ef196cL,0xf34283deb08c7940L,
+ 0x7ea096441128c316L } },
+ /* 36 << 203 */
+ { { 0xb510b3b56aa39dffL,0x59b43da29f8e4d8cL,0xa8ce31fd9e4c4b9fL,
+ 0x0e20be26c1303c01L },
+ { 0x18187182e8ee47c9L,0xd9687cdb7db98101L,0x7a520e4da1e14ff6L,
+ 0x429808ba8836d572L } },
+ /* 37 << 203 */
+ { { 0xa37ca60d4944b663L,0xf901f7a9a3f91ae5L,0xe4e3e76e9e36e3b1L,
+ 0x9aa219cf29d93250L },
+ { 0x347fe275056a2512L,0xa4d643d9de65d95cL,0x9669d396699fc3edL,
+ 0xb598dee2cf8c6bbeL } },
+ /* 38 << 203 */
+ { { 0x682ac1e5dda9e5c6L,0x4e0d3c72caa9fc95L,0x17faaade772bea44L,
+ 0x5ef8428cab0009c8L },
+ { 0xcc4ce47a460ff016L,0xda6d12bf725281cbL,0x44c678480223aad2L,
+ 0x6e342afa36256e28L } },
+ /* 39 << 203 */
+ { { 0x1400bb0b93a37c04L,0x62b1bc9bdd10bd96L,0x7251adeb0dac46b7L,
+ 0x7d33b92e7be4ef51L },
+ { 0x28b2a94be61fa29aL,0x4b2be13f06422233L,0x36d6d062330d8d37L,
+ 0x5ef80e1eb28ca005L } },
+ /* 40 << 203 */
+ { { 0x174d46996d16768eL,0x9fc4ff6a628bf217L,0x77705a94154e490dL,
+ 0x9d96dd288d2d997aL },
+ { 0x77e2d9d8ce5d72c4L,0x9d06c5a4c11c714fL,0x02aa513679e4a03eL,
+ 0x1386b3c2030ff28bL } },
+ /* 41 << 203 */
+ { { 0xfe82e8a6fb283f61L,0x7df203e5f3abc3fbL,0xeec7c3513a4d3622L,
+ 0xf7d17dbfdf762761L },
+ { 0xc3956e44522055f0L,0xde3012db8fa748dbL,0xca9fcb63bf1dcc14L,
+ 0xa56d9dcfbe4e2f3aL } },
+ /* 42 << 203 */
+ { { 0xb86186b68bcec9c2L,0x7cf24df9680b9f06L,0xc46b45eac0d29281L,
+ 0xfff42bc507b10e12L },
+ { 0x12263c404d289427L,0x3d5f1899b4848ec4L,0x11f97010d040800cL,
+ 0xb4c5f529300feb20L } },
+ /* 43 << 203 */
+ { { 0xcc543f8fde94fdcbL,0xe96af739c7c2f05eL,0xaa5e0036882692e1L,
+ 0x09c75b68950d4ae9L },
+ { 0x62f63df2b5932a7aL,0x2658252ede0979adL,0x2a19343fb5e69631L,
+ 0x718c7501525b666bL } },
+ /* 44 << 203 */
+ { { 0x26a42d69ea40dc3aL,0xdc84ad22aecc018fL,0x25c36c7b3270f04aL,
+ 0x46ba6d4750fa72edL },
+ { 0x6c37d1c593e58a8eL,0xa2394731120c088cL,0xc3be4263cb6e86daL,
+ 0x2c417d367126d038L } },
+ /* 45 << 203 */
+ { { 0x5b70f9c58b6f8efaL,0x671a2faa37718536L,0xd3ced3c6b539c92bL,
+ 0xe56f1bd9a31203c2L },
+ { 0x8b096ec49ff3c8ebL,0x2deae43243491ceaL,0x2465c6eb17943794L,
+ 0x5d267e6620586843L } },
+ /* 46 << 203 */
+ { { 0x9d3d116db07159d0L,0xae07a67fc1896210L,0x8fc84d87bb961579L,
+ 0x30009e491c1f8dd6L },
+ { 0x8a8caf22e3132819L,0xcffa197cf23ab4ffL,0x58103a44205dd687L,
+ 0x57b796c30ded67a2L } },
+ /* 47 << 203 */
+ { { 0x0b9c3a6ca1779ad7L,0xa33cfe2e357c09c5L,0x2ea293153db4a57eL,
+ 0x919596958ebeb52eL },
+ { 0x118db9a6e546c879L,0x8e996df46295c8d6L,0xdd99048455ec806bL,
+ 0x24f291ca165c1035L } },
+ /* 48 << 203 */
+ { { 0xcca523bb440e2229L,0x324673a273ef4d04L,0xaf3adf343e11ec39L,
+ 0x6136d7f1dc5968d3L },
+ { 0x7a7b2899b053a927L,0x3eaa2661ae067ecdL,0x8549b9c802779cd9L,
+ 0x061d7940c53385eaL } },
+ /* 49 << 203 */
+ { { 0x3e0ba883f06d18bdL,0x4ba6de53b2700843L,0xb966b668591a9e4dL,
+ 0x93f675677f4fa0edL },
+ { 0x5a02711b4347237bL,0xbc041e2fe794608eL,0x55af10f570f73d8cL,
+ 0xd2d4d4f7bb7564f7L } },
+ /* 50 << 203 */
+ { { 0xd7d27a89b3e93ce7L,0xf7b5a8755d3a2c1bL,0xb29e68a0255b218aL,
+ 0xb533837e8af76754L },
+ { 0xd1b05a73579fab2eL,0xb41055a1ecd74385L,0xb2369274445e9115L,
+ 0x2972a7c4f520274eL } },
+ /* 51 << 203 */
+ { { 0x6c08334ef678e68aL,0x4e4160f099b057edL,0x3cfe11b852ccb69aL,
+ 0x2fd1823a21c8f772L },
+ { 0xdf7f072f3298f055L,0x8c0566f9fec74a6eL,0xe549e0195bb4d041L,
+ 0x7c3930ba9208d850L } },
+ /* 52 << 203 */
+ { { 0xe07141fcaaa2902bL,0x539ad799e4f69ad3L,0xa6453f94813f9ffdL,
+ 0xc58d3c48375bc2f7L },
+ { 0xb3326fad5dc64e96L,0x3aafcaa9b240e354L,0x1d1b0903aca1e7a9L,
+ 0x4ceb97671211b8a0L } },
+ /* 53 << 203 */
+ { { 0xeca83e49e32a858eL,0x4c32892eae907badL,0xd5b42ab62eb9b494L,
+ 0x7fde3ee21eabae1bL },
+ { 0x13b5ab09caf54957L,0xbfb028bee5f5d5d5L,0x928a06502003e2c0L,
+ 0x90793aac67476843L } },
+ /* 54 << 203 */
+ { { 0x5e942e79c81710a0L,0x557e4a3627ccadd4L,0x72a2bc564bcf6d0cL,
+ 0x09ee5f4326d7b80cL },
+ { 0x6b70dbe9d4292f19L,0x56f74c2663f16b18L,0xc23db0f735fbb42aL,
+ 0xb606bdf66ae10040L } },
+ /* 55 << 203 */
+ { { 0x1eb15d4d044573acL,0x7dc3cf86556b0ba4L,0x97af9a33c60df6f7L,
+ 0x0b1ef85ca716ce8cL },
+ { 0x2922f884c96958beL,0x7c32fa9435690963L,0x2d7f667ceaa00061L,
+ 0xeaaf7c173547365cL } },
+ /* 56 << 203 */
+ { { 0x1eb4de4687032d58L,0xc54f3d835e2c79e0L,0x07818df45d04ef23L,
+ 0x55faa9c8673d41b4L },
+ { 0xced64f6f89b95355L,0x4860d2eab7415c84L,0x5fdb9bd2050ebad3L,
+ 0xdb53e0cc6685a5bfL } },
+ /* 57 << 203 */
+ { { 0xb830c0319feb6593L,0xdd87f3106accff17L,0x2303ebab9f555c10L,
+ 0x94603695287e7065L },
+ { 0xf88311c32e83358cL,0x508dd9b4eefb0178L,0x7ca237062dba8652L,
+ 0x62aac5a30047abe5L } },
+ /* 58 << 203 */
+ { { 0x9a61d2a08b1ea7b3L,0xd495ab63ae8b1485L,0x38740f8487052f99L,
+ 0x178ebe5bb2974eeaL },
+ { 0x030bbcca5b36d17fL,0xb5e4cce3aaf86eeaL,0xb51a022068f8e9e0L,
+ 0xa434879609eb3e75L } },
+ /* 59 << 203 */
+ { { 0xbe592309eef1a752L,0x5d7162d76f2aa1edL,0xaebfb5ed0f007dd2L,
+ 0x255e14b2c89edd22L },
+ { 0xba85e0720303b697L,0xc5d17e25f05720ffL,0x02b58d6e5128ebb6L,
+ 0x2c80242dd754e113L } },
+ /* 60 << 203 */
+ { { 0x919fca5fabfae1caL,0x937afaac1a21459bL,0x9e0ca91c1f66a4d2L,
+ 0x194cc7f323ec1331L },
+ { 0xad25143a8aa11690L,0xbe40ad8d09b59e08L,0x37d60d9be750860aL,
+ 0x6c53b008c6bf434cL } },
+ /* 61 << 203 */
+ { { 0xb572415d1356eb80L,0xb8bf9da39578ded8L,0x22658e365e8fb38bL,
+ 0x9b70ce225af8cb22L },
+ { 0x7c00018a829a8180L,0x84329f93b81ed295L,0x7c343ea25f3cea83L,
+ 0x38f8655f67586536L } },
+ /* 62 << 203 */
+ { { 0xa661a0d01d3ec517L,0x98744652512321aeL,0x084ca591eca92598L,
+ 0xa9bb9dc91dcb3febL },
+ { 0x14c5435578b4c240L,0x5ed62a3b610cafdcL,0x07512f371b38846bL,
+ 0x571bb70ab0e38161L } },
+ /* 63 << 203 */
+ { { 0xb556b95b2da705d2L,0x3ef8ada6b1a08f98L,0x85302ca7ddecfbe5L,
+ 0x0e530573943105cdL },
+ { 0x60554d5521a9255dL,0x63a32fa1f2f3802aL,0x35c8c5b0cd477875L,
+ 0x97f458ea6ad42da1L } },
+ /* 64 << 203 */
+ { { 0x832d7080eb6b242dL,0xd30bd0233b71e246L,0x7027991bbe31139dL,
+ 0x68797e91462e4e53L },
+ { 0x423fe20a6b4e185aL,0x82f2c67e42d9b707L,0x25c817684cf7811bL,
+ 0xbd53005e045bb95dL } },
+ /* 0 << 210 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 210 */
+ { { 0xe5f649be9d8e68fdL,0xdb0f05331b044320L,0xf6fde9b3e0c33398L,
+ 0x92f4209b66c8cfaeL },
+ { 0xe9d1afcc1a739d4bL,0x09aea75fa28ab8deL,0x14375fb5eac6f1d0L,
+ 0x6420b560708f7aa5L } },
+ /* 2 << 210 */
+ { { 0x9eae499c6254dc41L,0x7e2939247a837e7eL,0x74aec08c090524a7L,
+ 0xf82b92198d6f55f2L },
+ { 0x493c962e1402cec5L,0x9f17ca17fa2f30e7L,0xbcd783e8e9b879cbL,
+ 0xea3d8c145a6f145fL } },
+ /* 3 << 210 */
+ { { 0xdede15e75e0dee6eL,0x74f24872dc628aa2L,0xd3e9c4fe7861bb93L,
+ 0x56d4822a6187b2e0L },
+ { 0xb66417cfc59826f9L,0xca2609692408169eL,0xedf69d06c79ef885L,
+ 0x00031f8adc7d138fL } },
+ /* 4 << 210 */
+ { { 0x103c46e60ebcf726L,0x4482b8316231470eL,0x6f6dfaca487c2109L,
+ 0x2e0ace9762e666efL },
+ { 0x3246a9d31f8d1f42L,0x1b1e83f1574944d2L,0x13dfa63aa57f334bL,
+ 0x0cf8daed9f025d81L } },
+ /* 5 << 210 */
+ { { 0x30d78ea800ee11c1L,0xeb053cd4b5e3dd75L,0x9b65b13ed58c43c5L,
+ 0xc3ad49bdbd151663L },
+ { 0x99fd8e41b6427990L,0x12cf15bd707eae1eL,0x29ad4f1b1aabb71eL,
+ 0x5143e74d07545d0eL } },
+ /* 6 << 210 */
+ { { 0x30266336c88bdee1L,0x25f293065876767cL,0x9c078571c6731996L,
+ 0xc88690b2ed552951L },
+ { 0x274f2c2d852705b4L,0xb0bf8d444e09552dL,0x7628beeb986575d1L,
+ 0x407be2387f864651L } },
+ /* 7 << 210 */
+ { { 0x0e5e3049a639fc6bL,0xe75c35d986003625L,0x0cf35bd85dcc1646L,
+ 0x8bcaced26c26273aL },
+ { 0xe22ecf1db5536742L,0x013dd8971a9e068bL,0x17f411cb8a7909c5L,
+ 0x5757ac98861dd506L } },
+ /* 8 << 210 */
+ { { 0x85de1f0d1e935abbL,0xdefd10b4154de37aL,0xb8d9e392369cebb5L,
+ 0x54d5ef9b761324beL },
+ { 0x4d6341ba74f17e26L,0xc0a0e3c878c1dde4L,0xa6d7758187d918fdL,
+ 0x6687601502ca3a13L } },
+ /* 9 << 210 */
+ { { 0xc7313e9cf36658f0L,0xc433ef1c71f8057eL,0x853262461b6a835aL,
+ 0xc8f053987c86394cL },
+ { 0xff398cdfe983c4a1L,0xbf5e816203b7b931L,0x93193c46b7b9045bL,
+ 0x1e4ebf5da4a6e46bL } },
+ /* 10 << 210 */
+ { { 0xf9942a6043a24fe7L,0x29c1191effb3492bL,0x9f662449902fde05L,
+ 0xc792a7ac6713c32dL },
+ { 0x2fd88ad8b737982cL,0x7e3a0319a21e60e3L,0x09b0de447383591aL,
+ 0x6df141ee8310a456L } },
+ /* 11 << 210 */
+ { { 0xaec1a039e6d6f471L,0x14b2ba0f1198d12eL,0xebc1a1603aeee5acL,
+ 0x401f4836e0b964ceL },
+ { 0x2ee437964fd03f66L,0x3fdb4e49dd8f3f12L,0x6ef267f629380f18L,
+ 0x3e8e96708da64d16L } },
+ /* 12 << 210 */
+ { { 0xbc19180c207674f1L,0x112e09a733ae8fdbL,0x996675546aaeb71eL,
+ 0x79432af1e101b1c7L },
+ { 0xd5eb558fde2ddec6L,0x81392d1f5357753fL,0xa7a76b973ae1158aL,
+ 0x416fbbff4a899991L } },
+ /* 13 << 210 */
+ { { 0x9e65fdfd0d4a9dcfL,0x7bc29e48944ddf12L,0xbc1a92d93c856866L,
+ 0x273c69056e98dfe2L },
+ { 0x69fce418cdfaa6b8L,0x606bd8235061c69fL,0x42d495a06af75e27L,
+ 0x8ed3d5056d873a1fL } },
+ /* 14 << 210 */
+ { { 0xaf5528416ab25b6aL,0xc6c0ffc72b1a4523L,0xab18827b21c99e03L,
+ 0x060e86489034691bL },
+ { 0x5207f90f93c7f398L,0x9f4a96cb82f8d10bL,0xdd71cd793ad0f9e3L,
+ 0x84f435d2fc3a54f5L } },
+ /* 15 << 210 */
+ { { 0x4b03c55b8e33787fL,0xef42f975a6384673L,0xff7304f75051b9f0L,
+ 0x18aca1dc741c87c2L },
+ { 0x56f120a72d4bfe80L,0xfd823b3d053e732cL,0x11bccfe47537ca16L,
+ 0xdf6c9c741b5a996bL } },
+ /* 16 << 210 */
+ { { 0xee7332c7904fc3faL,0x14a23f45c7e3636aL,0xc38659c3f091d9aaL,
+ 0x4a995e5db12d8540L },
+ { 0x20a53becf3a5598aL,0x56534b17b1eaa995L,0x9ed3dca4bf04e03cL,
+ 0x716c563ad8d56268L } },
+ /* 17 << 210 */
+ { { 0x27ba77a41d6178e7L,0xe4c80c4068a1ff8eL,0x750110990a13f63dL,
+ 0x7bf33521a61d46f3L },
+ { 0x0aff218e10b365bbL,0x810218040fd7ea75L,0x05a3fd8aa4b3a925L,
+ 0xb829e75f9b3db4e6L } },
+ /* 18 << 210 */
+ { { 0x6bdc75a54d53e5fbL,0x04a5dc02d52717e3L,0x86af502fe9a42ec2L,
+ 0x8867e8fb2630e382L },
+ { 0xbf845c6ebec9889bL,0x54f491f2cb47c98dL,0xa3091fba790c2a12L,
+ 0xd7f6fd78c20f708bL } },
+ /* 19 << 210 */
+ { { 0xa569ac30acde5e17L,0xd0f996d06852b4d7L,0xe51d4bb54609ae54L,
+ 0x3fa37d170daed061L },
+ { 0x62a8868434b8fb41L,0x99a2acbd9efb64f1L,0xb75c1a5e6448e1f2L,
+ 0xfa99951a42b5a069L } },
+ /* 20 << 210 */
+ { { 0x6d956e892f3b26e7L,0xf4709860da875247L,0x3ad151792482dda3L,
+ 0xd64110e3017d82f0L },
+ { 0x14928d2cfad414e4L,0x2b155f582ed02b24L,0x481a141bcb821bf1L,
+ 0x12e3c7704f81f5daL } },
+ /* 21 << 210 */
+ { { 0xe49c5de59fff8381L,0x110532325bbec894L,0xa0d051cc454d88c4L,
+ 0x4f6db89c1f8e531bL },
+ { 0x34fe3fd6ca563a44L,0x7f5c221558da8ab9L,0x8445016d9474f0a1L,
+ 0x17d34d61cb7d8a0aL } },
+ /* 22 << 210 */
+ { { 0x8e9d39101c474019L,0xcaff2629d52ceefbL,0xf9cf3e32c1622c2bL,
+ 0xd4b95e3ce9071a05L },
+ { 0xfbbca61f1594438cL,0x1eb6e6a604aadedfL,0x853027f468e14940L,
+ 0x221d322adfabda9cL } },
+ /* 23 << 210 */
+ { { 0xed8ea9f6b7cb179aL,0xdc7b764db7934dccL,0xfcb139405e09180dL,
+ 0x6629a6bfb47dc2ddL },
+ { 0xbfc55e4e9f5a915eL,0xb1db9d376204441eL,0xf82d68cf930c5f53L,
+ 0x17d3a142cbb605b1L } },
+ /* 24 << 210 */
+ { { 0xdd5944ea308780f2L,0xdc8de7613845f5e4L,0x6beaba7d7624d7a3L,
+ 0x1e709afd304df11eL },
+ { 0x9536437602170456L,0xbf204b3ac8f94b64L,0x4e53af7c5680ca68L,
+ 0x0526074ae0c67574L } },
+ /* 25 << 210 */
+ { { 0x95d8cef8ecd92af6L,0xe6b9fa7a6cd1745aL,0x3d546d3da325c3e4L,
+ 0x1f57691d9ae93aaeL },
+ { 0xe891f3fe9d2e1a33L,0xd430093fac063d35L,0xeda59b125513a327L,
+ 0xdc2134f35536f18fL } },
+ /* 26 << 210 */
+ { { 0xaa51fe2c5c210286L,0x3f68aaee1cab658cL,0x5a23a00bf9357292L,
+ 0x9a626f397efdabedL },
+ { 0xfe2b3bf3199d78e3L,0xb7a2af7771bbc345L,0x3d19827a1e59802cL,
+ 0x823bbc15b487a51cL } },
+ /* 27 << 210 */
+ { { 0x856139f299d0a422L,0x9ac3df65f456c6fbL,0xaddf65c6701f8bd6L,
+ 0x149f321e3758df87L },
+ { 0xb1ecf714721b7ebaL,0xe17df09831a3312aL,0xdb2fd6ecd5c4d581L,
+ 0xfd02996f8fcea1b3L } },
+ /* 28 << 210 */
+ { { 0xe29fa63e7882f14fL,0xc9f6dc3507c6cadcL,0x46f22d6fb882bed0L,
+ 0x1a45755bd118e52cL },
+ { 0x9f2c7c277c4608cfL,0x7ccbdf32568012c2L,0xfcb0aedd61729b0eL,
+ 0x7ca2ca9ef7d75dbfL } },
+ /* 29 << 210 */
+ { { 0xf58fecb16f640f62L,0xe274b92b39f51946L,0x7f4dfc046288af44L,
+ 0x0a91f32aeac329e5L },
+ { 0x43ad274bd6aaba31L,0x719a16400f6884f9L,0x685d29f6daf91e20L,
+ 0x5ec1cc3327e49d52L } },
+ /* 30 << 210 */
+ { { 0x38f4de963b54a059L,0x0e0015e5efbcfdb3L,0x177d23d94dbb8da6L,
+ 0x98724aa297a617adL },
+ { 0x30f0885bfdb6558eL,0xf9f7a28ac7899a96L,0xd2ae8ac8872dc112L,
+ 0xfa0642ca73c3c459L } },
+ /* 31 << 210 */
+ { { 0x15296981e7dfc8d6L,0x67cd44501fb5b94aL,0x0ec71cf10eddfd37L,
+ 0xc7e5eeb39a8eddc7L },
+ { 0x02ac8e3d81d95028L,0x0088f17270b0e35dL,0xec041fabe1881fe3L,
+ 0x62cf71b8d99e7faaL } },
+ /* 32 << 210 */
+ { { 0x5043dea7e0f222c2L,0x309d42ac72e65142L,0x94fe9ddd9216cd30L,
+ 0xd6539c7d0f87feecL },
+ { 0x03c5a57c432ac7d7L,0x72692cf0327fda10L,0xec28c85f280698deL,
+ 0x2331fb467ec283b1L } },
+ /* 33 << 210 */
+ { { 0xd34bfa322867e633L,0x78709a820a9cc815L,0xb7fe6964875e2fa5L,
+ 0x25cc064f9e98bfb5L },
+ { 0x9eb0151c493a65c5L,0x5fb5d94153182464L,0x69e6f130f04618e2L,
+ 0xa8ecec22f89c8ab6L } },
+ /* 34 << 210 */
+ { { 0xcd6ac88bb96209bdL,0x65fa8cdbb3e1c9e0L,0xa47d22f54a8d8eacL,
+ 0x83895cdf8d33f963L },
+ { 0xa8adca59b56cd3d1L,0x10c8350bdaf38232L,0x2b161fb3a5080a9fL,
+ 0xbe7f5c643af65b3aL } },
+ /* 35 << 210 */
+ { { 0x2c75403997403a11L,0x94626cf7121b96afL,0x431de7c46a983ec2L,
+ 0x3780dd3a52cc3df7L },
+ { 0xe28a0e462baf8e3bL,0xabe68aad51d299aeL,0x603eb8f9647a2408L,
+ 0x14c61ed65c750981L } },
+ /* 36 << 210 */
+ { { 0x88b34414c53352e7L,0x5a34889c1337d46eL,0x612c1560f95f2bc8L,
+ 0x8a3f8441d4807a3aL },
+ { 0x680d9e975224da68L,0x60cd6e88c3eb00e9L,0x3875a98e9a6bc375L,
+ 0xdc80f9244fd554c2L } },
+ /* 37 << 210 */
+ { { 0x6c4b34156ac77407L,0xa1e5ea8f25420681L,0x541bfa144607a458L,
+ 0x5dbc7e7a96d7fbf9L },
+ { 0x646a851b31590a47L,0x039e85ba15ee6df8L,0xd19fa231d7b43fc0L,
+ 0x84bc8be8299a0e04L } },
+ /* 38 << 210 */
+ { { 0x2b9d2936f20df03aL,0x240543828608d472L,0x76b6ba049149202aL,
+ 0xb21c38313670e7b7L },
+ { 0xddd93059d6fdee10L,0x9da47ad378488e71L,0x99cc1dfda0fcfb25L,
+ 0x42abde1064696954L } },
+ /* 39 << 210 */
+ { { 0x14cc15fc17eab9feL,0xd6e863e4d3e70972L,0x29a7765c6432112cL,
+ 0x886600015b0774d8L },
+ { 0x3729175a2c088eaeL,0x13afbcae8230b8d4L,0x44768151915f4379L,
+ 0xf086431ad8d22812L } },
+ /* 40 << 210 */
+ { { 0x37461955c298b974L,0x905fb5f0f8711e04L,0x787abf3afe969d18L,
+ 0x392167c26f6a494eL },
+ { 0xfc7a0d2d28c511daL,0xf127c7dcb66a262dL,0xf9c4bb95fd63fdf0L,
+ 0x900165893913ef46L } },
+ /* 41 << 210 */
+ { { 0x74d2a73c11aa600dL,0x2f5379bd9fb5ab52L,0xe49e53a47fb70068L,
+ 0x68dd39e5404aa9a7L },
+ { 0xb9b0cf572ecaa9c3L,0xba0e103be824826bL,0x60c2198b4631a3c4L,
+ 0xc5ff84abfa8966a2L } },
+ /* 42 << 210 */
+ { { 0x2d6ebe22ac95aff8L,0x1c9bb6dbb5a46d09L,0x419062da53ee4f8dL,
+ 0x7b9042d0bb97efefL },
+ { 0x0f87f080830cf6bdL,0x4861d19a6ec8a6c6L,0xd3a0daa1202f01aaL,
+ 0xb0111674f25afbd5L } },
+ /* 43 << 210 */
+ { { 0x6d00d6cf1afb20d9L,0x1369500040671bc5L,0x913ab0dc2485ea9bL,
+ 0x1f2bed069eef61acL },
+ { 0x850c82176d799e20L,0x93415f373271c2deL,0x5afb06e96c4f5910L,
+ 0x688a52dfc4e9e421L } },
+ /* 44 << 210 */
+ { { 0x30495ba3e2a9a6dbL,0x4601303d58f9268bL,0xbe3b0dad7eb0f04fL,
+ 0x4ea472504456936dL },
+ { 0x8caf8798d33fd3e7L,0x1ccd8a89eb433708L,0x9effe3e887fd50adL,
+ 0xbe240a566b29c4dfL } },
+ /* 45 << 210 */
+ { { 0xec4ffd98ca0e7ebdL,0xf586783ae748616eL,0xa5b00d8fc77baa99L,
+ 0x0acada29b4f34c9cL },
+ { 0x36dad67d0fe723acL,0x1d8e53a539c36c1eL,0xe4dd342d1f4bea41L,
+ 0x64fd5e35ebc9e4e0L } },
+ /* 46 << 210 */
+ { { 0x96f01f9057908805L,0xb5b9ea3d5ed480ddL,0x366c5dc23efd2dd0L,
+ 0xed2fe3056e9dfa27L },
+ { 0x4575e8926e9197e2L,0x11719c09ab502a5dL,0x264c7bece81f213fL,
+ 0x741b924155f5c457L } },
+ /* 47 << 210 */
+ { { 0x78ac7b6849a5f4f4L,0xf91d70a29fc45b7dL,0x39b05544b0f5f355L,
+ 0x11f06bceeef930d9L },
+ { 0xdb84d25d038d05e1L,0x04838ee5bacc1d51L,0x9da3ce869e8ee00bL,
+ 0xc3412057c36eda1fL } },
+ /* 48 << 210 */
+ { { 0xae80b91364d9c2f4L,0x7468bac3a010a8ffL,0xdfd2003737359d41L,
+ 0x1a0f5ab815efeaccL },
+ { 0x7c25ad2f659d0ce0L,0x4011bcbb6785cff1L,0x128b99127e2192c7L,
+ 0xa549d8e113ccb0e8L } },
+ /* 49 << 210 */
+ { { 0x805588d8c85438b1L,0x5680332dbc25cb27L,0xdcd1bc961a4bfdf4L,
+ 0x779ff428706f6566L },
+ { 0x8bbee998f059987aL,0xf6ce8cf2cc686de7L,0xf8ad3c4a953cfdb2L,
+ 0xd1d426d92205da36L } },
+ /* 50 << 210 */
+ { { 0xb3c0f13fc781a241L,0x3e89360ed75362a8L,0xccd05863c8a91184L,
+ 0x9bd0c9b7efa8a7f4L },
+ { 0x97ee4d538a912a4bL,0xde5e15f8bcf518fdL,0x6a055bf8c467e1e0L,
+ 0x10be4b4b1587e256L } },
+ /* 51 << 210 */
+ { { 0xd90c14f2668621c9L,0xd5518f51ab9c92c1L,0x8e6a0100d6d47b3cL,
+ 0xcbe980dd66716175L },
+ { 0x500d3f10ddd83683L,0x3b6cb35d99cac73cL,0x53730c8b6083d550L,
+ 0xcf159767df0a1987L } },
+ /* 52 << 210 */
+ { { 0x84bfcf5343ad73b3L,0x1b528c204f035a94L,0x4294edf733eeac69L,
+ 0xb6283e83817f3240L },
+ { 0xc3fdc9590a5f25b1L,0xefaf8aa55844ee22L,0xde269ba5dbdde4deL,
+ 0xe3347160c56133bfL } },
+ /* 53 << 210 */
+ { { 0xc11842198d9ea9f8L,0x090de5dbf3fc1ab5L,0x404c37b10bf22cdaL,
+ 0x7de20ec8f5618894L },
+ { 0x754c588eecdaecabL,0x6ca4b0ed88342743L,0x76f08bddf4a938ecL,
+ 0xd182de8991493ccbL } },
+ /* 54 << 210 */
+ { { 0xd652c53ec8a4186aL,0xb3e878db946d8e33L,0x088453c05f37663cL,
+ 0x5cd9daaab407748bL },
+ { 0xa1f5197f586d5e72L,0x47500be8c443ca59L,0x78ef35b2e2652424L,
+ 0x09c5d26f6dd7767dL } },
+ /* 55 << 210 */
+ { { 0x7175a79aa74d3f7bL,0x0428fd8dcf5ea459L,0x511cb97ca5d1746dL,
+ 0x36363939e71d1278L },
+ { 0xcf2df95510350bf4L,0xb381743960aae782L,0xa748c0e43e688809L,
+ 0x98021fbfd7a5a006L } },
+ /* 56 << 210 */
+ { { 0x9076a70c0e367a98L,0xbea1bc150f62b7c2L,0x2645a68c30fe0343L,
+ 0xacaffa78699dc14fL },
+ { 0xf4469964457bf9c4L,0x0db6407b0d2ead83L,0x68d56cadb2c6f3ebL,
+ 0x3b512e73f376356cL } },
+ /* 57 << 210 */
+ { { 0xe43b0e1ffce10408L,0x89ddc0035a5e257dL,0xb0ae0d120362e5b3L,
+ 0x07f983c7b0519161L },
+ { 0xc2e94d155d5231e7L,0xcff22aed0b4f9513L,0xb02588dd6ad0b0b5L,
+ 0xb967d1ac11d0dcd5L } },
+ /* 58 << 210 */
+ { { 0x8dac6bc6cf777b6cL,0x0062bdbd4c6d1959L,0x53da71b50ef5cc85L,
+ 0x07012c7d4006f14fL },
+ { 0x4617f962ac47800dL,0x53365f2bc102ed75L,0xb422efcb4ab8c9d3L,
+ 0x195cb26b34af31c9L } },
+ /* 59 << 210 */
+ { { 0x3a926e2905f2c4ceL,0xbd2bdecb9856966cL,0x5d16ab3a85527015L,
+ 0x9f81609e4486c231L },
+ { 0xd8b96b2cda350002L,0xbd054690fa1b7d36L,0xdc90ebf5e71d79bcL,
+ 0xf241b6f908964e4eL } },
+ /* 60 << 210 */
+ { { 0x7c8386432fe3cd4cL,0xe0f33acbb4bc633cL,0xb4a9ecec3d139f1fL,
+ 0x05ce69cddc4a1f49L },
+ { 0xa19d1b16f5f98aafL,0x45bb71d66f23e0efL,0x33789fcd46cdfdd3L,
+ 0x9b8e2978cee040caL } },
+ /* 61 << 210 */
+ { { 0x9c69b246ae0a6828L,0xba533d247078d5aaL,0x7a2e42c07bb4fbdbL,
+ 0xcfb4879a7035385cL },
+ { 0x8c3dd30b3281705bL,0x7e361c6c404fe081L,0x7b21649c3f604edfL,
+ 0x5dbf6a3fe52ffe47L } },
+ /* 62 << 210 */
+ { { 0xc41b7c234b54d9bfL,0x1374e6813511c3d9L,0x1863bf16c1b2b758L,
+ 0x90e785071e9e6a96L },
+ { 0xab4bf98d5d86f174L,0xd74e0bd385e96fe4L,0x8afde39fcac5d344L,
+ 0x90946dbcbd91b847L } },
+ /* 63 << 210 */
+ { { 0xf5b42358fe1a838cL,0x05aae6c5620ac9d8L,0x8e193bd8a1ce5a0bL,
+ 0x8f7105714dabfd72L },
+ { 0x8d8fdd48182caaacL,0x8c4aeefa040745cfL,0x73c6c30af3b93e6dL,
+ 0x991241f316f42011L } },
+ /* 64 << 210 */
+ { { 0xa0158eeae457a477L,0xd19857dbee6ddc05L,0xb326522418c41671L,
+ 0x3ffdfc7e3c2c0d58L },
+ { 0x3a3a525426ee7cdaL,0x341b0869df02c3a8L,0xa023bf42723bbfc8L,
+ 0x3d15002a14452691L } },
+ /* 0 << 217 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 217 */
+ { { 0x5ef7324c85edfa30L,0x2597655487d4f3daL,0x352f5bc0dcb50c86L,
+ 0x8f6927b04832a96cL },
+ { 0xd08ee1ba55f2f94cL,0x6a996f99344b45faL,0xe133cb8da8aa455dL,
+ 0x5d0721ec758dc1f7L } },
+ /* 2 << 217 */
+ { { 0x6ba7a92079e5fb67L,0xe1331feb70aa725eL,0x5080ccf57df5d837L,
+ 0xe4cae01d7ff72e21L },
+ { 0xd9243ee60412a77dL,0x06ff7cacdf449025L,0xbe75f7cd23ef5a31L,
+ 0xbc9578220ddef7a8L } },
+ /* 3 << 217 */
+ { { 0x8cf7230cb0ce1c55L,0x5b534d050bbfb607L,0xee1ef1130e16363bL,
+ 0x27e0aa7ab4999e82L },
+ { 0xce1dac2d79362c41L,0x67920c9091bb6cb0L,0x1e648d632223df24L,
+ 0x0f7d9eefe32e8f28L } },
+ /* 4 << 217 */
+ { { 0x6943f39afa833834L,0x22951722a6328562L,0x81d63dd54170fc10L,
+ 0x9f5fa58faecc2e6dL },
+ { 0xb66c8725e77d9a3bL,0x11235cea6384ebe0L,0x06a8c1185845e24aL,
+ 0x0137b286ebd093b1L } },
+ /* 5 << 217 */
+ { { 0xc589e1ce44ace150L,0xe0f8d3d94381e97cL,0x59e99b1162c5a4b8L,
+ 0x90d262f7fd0ec9f9L },
+ { 0xfbc854c9283e13c9L,0x2d04fde7aedc7085L,0x057d776547dcbecbL,
+ 0x8dbdf5919a76fa5fL } },
+ /* 6 << 217 */
+ { { 0xd01506950de1e578L,0x2e1463e7e9f72bc6L,0xffa684411b39eca5L,
+ 0x673c85307c037f2fL },
+ { 0xd0d6a600747f91daL,0xb08d43e1c9cb78e9L,0x0fc0c64427b5cef5L,
+ 0x5c1d160aa60a2fd6L } },
+ /* 7 << 217 */
+ { { 0xf98cae5328c8e13bL,0x375f10c4b2eddcd1L,0xd4eb8b7f5cce06adL,
+ 0xb4669f4580a2e1efL },
+ { 0xd593f9d05bbd8699L,0x5528a4c9e7976d13L,0x3923e0951c7e28d3L,
+ 0xb92937903f6bb577L } },
+ /* 8 << 217 */
+ { { 0xdb567d6ac42bd6d2L,0x6df86468bb1f96aeL,0x0efe5b1a4843b28eL,
+ 0x961bbb056379b240L },
+ { 0xb6caf5f070a6a26bL,0x70686c0d328e6e39L,0x80da06cf895fc8d3L,
+ 0x804d8810b363fdc9L } },
+ /* 9 << 217 */
+ { { 0xbe22877b207f1670L,0x9b0dd1884e615291L,0x625ae8dc97a3c2bfL,
+ 0x08584ef7439b86e8L },
+ { 0xde7190a5dcd898ffL,0x26286c402058ee3dL,0x3db0b2175f87b1c1L,
+ 0xcc334771102a6db5L } },
+ /* 10 << 217 */
+ { { 0xd99de9542f770fb1L,0x97c1c6204cd7535eL,0xd3b6c4483f09cefcL,
+ 0xd725af155a63b4f8L },
+ { 0x0c95d24fc01e20ecL,0xdfd374949ae7121fL,0x7d6ddb72ec77b7ecL,
+ 0xfe079d3b0353a4aeL } },
+ /* 11 << 217 */
+ { { 0x3066e70a2e6ac8d2L,0x9c6b5a43106e5c05L,0x52d3c6f5ede59b8cL,
+ 0x30d6a5c3fccec9aeL },
+ { 0xedec7c224fc0a9efL,0x190ff08395c16cedL,0xbe12ec8f94de0fdeL,
+ 0x0d131ab8852d3433L } },
+ /* 12 << 217 */
+ { { 0x42ace07e85701291L,0x94793ed9194061a8L,0x30e83ed6d7f4a485L,
+ 0x9eec7269f9eeff4dL },
+ { 0x90acba590c9d8005L,0x5feca4581e79b9d1L,0x8fbe54271d506a1eL,
+ 0xa32b2c8e2439cfa7L } },
+ /* 13 << 217 */
+ { { 0x1671c17373dd0b4eL,0x37a2821444a054c6L,0x81760a1b4e8b53f1L,
+ 0xa6c04224f9f93b9eL },
+ { 0x18784b34cf671e3cL,0x81bbecd2cda9b994L,0x38831979b2ab3848L,
+ 0xef54feb7f2e03c2dL } },
+ /* 14 << 217 */
+ { { 0xcf197ca7fb8088faL,0x014272474ddc96c5L,0xa2d2550a30777176L,
+ 0x534698984d0cf71dL },
+ { 0x6ce937b83a2aaac6L,0xe9f91dc35af38d9bL,0x2598ad83c8bf2899L,
+ 0x8e706ac9b5536c16L } },
+ /* 15 << 217 */
+ { { 0x40dc7495f688dc98L,0x26490cd7124c4afcL,0xe651ec841f18775cL,
+ 0x393ea6c3b4fdaf4aL },
+ { 0x1e1f33437f338e0dL,0x39fb832b6053e7b5L,0x46e702da619e14d5L,
+ 0x859cacd1cdeef6e0L } },
+ /* 16 << 217 */
+ { { 0x63b99ce74462007dL,0xb8ab48a54cb5f5b7L,0x9ec673d2f55edde7L,
+ 0xd1567f748cfaefdaL },
+ { 0x46381b6b0887bcecL,0x694497cee178f3c2L,0x5e6525e31e6266cbL,
+ 0x5931de26697d6413L } },
+ /* 17 << 217 */
+ { { 0x87f8df7c0e58d493L,0xb1ae5ed058b73f12L,0xc368f784dea0c34dL,
+ 0x9bd0a120859a91a0L },
+ { 0xb00d88b7cc863c68L,0x3a1cc11e3d1f4d65L,0xea38e0e70aa85593L,
+ 0x37f13e987dc4aee8L } },
+ /* 18 << 217 */
+ { { 0x10d38667bc947badL,0x738e07ce2a36ee2eL,0xc93470cdc577fcacL,
+ 0xdee1b6162782470dL },
+ { 0x36a25e672e793d12L,0xd6aa6caee0f186daL,0x474d0fd980e07af7L,
+ 0xf7cdc47dba8a5cd4L } },
+ /* 19 << 217 */
+ { { 0x28af6d9dab15247fL,0x7c789c10493a537fL,0x7ac9b11023a334e7L,
+ 0x0236ac0912c9c277L },
+ { 0xa7e5bd251d7a5144L,0x098b9c2af13ec4ecL,0x3639dacad3f0abcaL,
+ 0x642da81aa23960f9L } },
+ /* 20 << 217 */
+ { { 0x7d2e5c054f7269b1L,0xfcf30777e287c385L,0x10edc84ff2a46f21L,
+ 0x354417574f43fa36L },
+ { 0xf1327899fd703431L,0xa438d7a616dd587aL,0x65c34c57e9c8352dL,
+ 0xa728edab5cc5a24eL } },
+ /* 21 << 217 */
+ { { 0xaed78abc42531689L,0x0a51a0e8010963efL,0x5776fa0ad717d9b3L,
+ 0xf356c2397dd3428bL },
+ { 0x29903fff8d3a3dacL,0x409597fa3d94491fL,0x4cd7a5ffbf4a56a4L,
+ 0xe50964748adab462L } },
+ /* 22 << 217 */
+ { { 0xa97b51265c3427b0L,0x6401405cd282c9bdL,0x3629f8d7222c5c45L,
+ 0xb1c02c16e8d50aedL },
+ { 0xbea2ed75d9635bc9L,0x226790c76e24552fL,0x3c33f2a365f1d066L,
+ 0x2a43463e6dfccc2eL } },
+ /* 23 << 217 */
+ { { 0x8cc3453adb483761L,0xe7cc608565d5672bL,0x277ed6cbde3efc87L,
+ 0x19f2f36869234eafL },
+ { 0x9aaf43175c0b800bL,0x1f1e7c898b6da6e2L,0x6cfb4715b94ec75eL,
+ 0xd590dd5f453118c2L } },
+ /* 24 << 217 */
+ { { 0x14e49da11f17a34cL,0x5420ab39235a1456L,0xb76372412f50363bL,
+ 0x7b15d623c3fabb6eL },
+ { 0xa0ef40b1e274e49cL,0x5cf5074496b1860aL,0xd6583fbf66afe5a4L,
+ 0x44240510f47e3e9aL } },
+ /* 25 << 217 */
+ { { 0x9925434311b2d595L,0xf1367499eec8df57L,0x3cb12c613e73dd05L,
+ 0xd248c0337dac102aL },
+ { 0xcf154f13a77739f5L,0xbf4288cb23d2af42L,0xaa64c9b632e4a1cfL,
+ 0xee8c07a8c8a208f3L } },
+ /* 26 << 217 */
+ { { 0xe10d49996fe8393fL,0x0f809a3fe91f3a32L,0x61096d1c802f63c8L,
+ 0x289e146257750d3dL },
+ { 0xed06167e9889feeaL,0xd5c9c0e2e0993909L,0x46fca0d856508ac6L,
+ 0x918260474f1b8e83L } },
+ /* 27 << 217 */
+ { { 0x4f2c877a9a4a2751L,0x71bd0072cae6feadL,0x38df8dcc06aa1941L,
+ 0x5a074b4c63beeaa8L },
+ { 0xd6d65934c1cec8edL,0xa6ecb49eaabc03bdL,0xaade91c2de8a8415L,
+ 0xcfb0efdf691136e0L } },
+ /* 28 << 217 */
+ { { 0x11af45ee23ab3495L,0xa132df880b77463dL,0x8923c15c815d06f4L,
+ 0xc3ceb3f50d61a436L },
+ { 0xaf52291de88fb1daL,0xea0579741da12179L,0xb0d7218cd2fef720L,
+ 0x6c0899c98e1d8845L } },
+ /* 29 << 217 */
+ { { 0x98157504752ddad7L,0xd60bd74fa1a68a97L,0x7047a3a9f658fb99L,
+ 0x1f5d86d65f8511e4L },
+ { 0xb8a4bc424b5a6d88L,0x69eb2c331abefa7dL,0x95bf39e813c9c510L,
+ 0xf571960ad48aab43L } },
+ /* 30 << 217 */
+ { { 0x7e8cfbcf704e23c6L,0xc71b7d2228aaa65bL,0xa041b2bd245e3c83L,
+ 0x69b98834d21854ffL },
+ { 0x89d227a3963bfeecL,0x99947aaade7da7cbL,0x1d9ee9dbee68a9b1L,
+ 0x0a08f003698ec368L } },
+ /* 31 << 217 */
+ { { 0xe9ea409478ef2487L,0xc8d2d41502cfec26L,0xc52f9a6eb7dcf328L,
+ 0x0ed489e385b6a937L },
+ { 0x9b94986bbef3366eL,0x0de59c70edddddb8L,0xffdb748ceadddbe2L,
+ 0x9b9784bb8266ea40L } },
+ /* 32 << 217 */
+ { { 0x142b55021a93507aL,0xb4cd11878d3c06cfL,0xdf70e76a91ec3f40L,
+ 0x484e81ad4e7553c2L },
+ { 0x830f87b5272e9d6eL,0xea1c93e5c6ff514aL,0x67cc2adcc4192a8eL,
+ 0xc77e27e242f4535aL } },
+ /* 33 << 217 */
+ { { 0x9cdbab36d2b713c5L,0x86274ea0cf7b0cd3L,0x784680f309af826bL,
+ 0xbfcc837a0c72dea3L },
+ { 0xa8bdfe9dd6529b73L,0x708aa22863a88002L,0x6c7a9a54c91d45b9L,
+ 0xdf1a38bbfd004f56L } },
+ /* 34 << 217 */
+ { { 0x2e8c9a26b8bad853L,0x2d52cea33723eae7L,0x054d6d8156ca2830L,
+ 0xa3317d149a8dc411L },
+ { 0xa08662fefd4ddedaL,0xed2a153ab55d792bL,0x7035c16abfc6e944L,
+ 0xb6bc583400171cf3L } },
+ /* 35 << 217 */
+ { { 0xe27152b383d102b6L,0xfe695a470646b848L,0xa5bb09d8916e6d37L,
+ 0xb4269d640d17015eL },
+ { 0x8d8156a10a1d2285L,0xfeef6c5146d26d72L,0x9dac57c84c5434a7L,
+ 0x0282e5be59d39e31L } },
+ /* 36 << 217 */
+ { { 0xedfff181721c486dL,0x301baf10bc58824eL,0x8136a6aa00570031L,
+ 0x55aaf78c1cddde68L },
+ { 0x2682937159c63952L,0x3a3bd2748bc25bafL,0xecdf8657b7e52dc3L,
+ 0x2dd8c087fd78e6c8L } },
+ /* 37 << 217 */
+ { { 0x20553274f5531461L,0x8b4a12815d95499bL,0xe2c8763a1a80f9d2L,
+ 0xd1dbe32b4ddec758L },
+ { 0xaf12210d30c34169L,0xba74a95378baa533L,0x3d133c6ea438f254L,
+ 0xa431531a201bef5bL } },
+ /* 38 << 217 */
+ { { 0x15295e22f669d7ecL,0xca374f64357fb515L,0x8a8406ffeaa3fdb3L,
+ 0x106ae448df3f2da8L },
+ { 0x8f9b0a9033c8e9a1L,0x234645e271ad5885L,0x3d0832241c0aed14L,
+ 0xf10a7d3e7a942d46L } },
+ /* 39 << 217 */
+ { { 0x7c11deee40d5c9beL,0xb2bae7ffba84ed98L,0x93e97139aad58dddL,
+ 0x3d8727963f6d1fa3L },
+ { 0x483aca818569ff13L,0x8b89a5fb9a600f72L,0x4cbc27c3c06f2b86L,
+ 0x2213071363ad9c0bL } },
+ /* 40 << 217 */
+ { { 0xb5358b1e48ac2840L,0x18311294ecba9477L,0xda58f990a6946b43L,
+ 0x3098baf99ab41819L },
+ { 0x66c4c1584198da52L,0xab4fc17c146bfd1bL,0x2f0a4c3cbf36a908L,
+ 0x2ae9e34b58cf7838L } },
+ /* 41 << 217 */
+ { { 0xf411529e3fa11b1fL,0x21e43677974af2b4L,0x7c20958ec230793bL,
+ 0x710ea88516e840f3L },
+ { 0xfc0b21fcc5dc67cfL,0x08d5164788405718L,0xd955c21fcfe49eb7L,
+ 0x9722a5d556dd4a1fL } },
+ /* 42 << 217 */
+ { { 0xc9ef50e2c861baa5L,0xc0c21a5d9505ac3eL,0xaf6b9a338b7c063fL,
+ 0xc63703392f4779c1L },
+ { 0x22df99c7638167c3L,0xfe6ffe76795db30cL,0x2b822d33a4854989L,
+ 0xfef031dd30563aa5L } },
+ /* 43 << 217 */
+ { { 0x16b09f82d57c667fL,0xc70312cecc0b76f1L,0xbf04a9e6c9118aecL,
+ 0x82fcb4193409d133L },
+ { 0x1a8ab385ab45d44dL,0xfba07222617b83a3L,0xb05f50dd58e81b52L,
+ 0x1d8db55321ce5affL } },
+ /* 44 << 217 */
+ { { 0x3097b8d4e344a873L,0x7d8d116dfe36d53eL,0x6db22f587875e750L,
+ 0x2dc5e37343e144eaL },
+ { 0xc05f32e6e799eb95L,0xe9e5f4df6899e6ecL,0xbdc3bd681fab23d5L,
+ 0xb72b8ab773af60e6L } },
+ /* 45 << 217 */
+ { { 0x8db27ae02cecc84aL,0x600016d87bdb871cL,0x42a44b13d7c46f58L,
+ 0xb8919727c3a77d39L },
+ { 0xcfc6bbbddafd6088L,0x1a7401466bd20d39L,0x8c747abd98c41072L,
+ 0x4c91e765bdf68ea1L } },
+ /* 46 << 217 */
+ { { 0x7c95e5ca08819a78L,0xcf48b729c9587921L,0x091c7c5fdebbcc7dL,
+ 0x6f287404f0e05149L },
+ { 0xf83b5ac226cd44ecL,0x88ae32a6cfea250eL,0x6ac5047a1d06ebc5L,
+ 0xc7e550b4d434f781L } },
+ /* 47 << 217 */
+ { { 0x61ab1cf25c727bd2L,0x2e4badb11cf915b0L,0x1b4dadecf69d3920L,
+ 0xe61b1ca6f14c1dfeL },
+ { 0x90b479ccbd6bd51fL,0x8024e4018045ec30L,0xcab29ca325ef0e62L,
+ 0x4f2e941649e4ebc0L } },
+ /* 48 << 217 */
+ { { 0x45eb40ec0ccced58L,0x25cd4b9c0da44f98L,0x43e06458871812c6L,
+ 0x99f80d5516cef651L },
+ { 0x571340c9ce6dc153L,0x138d5117d8665521L,0xacdb45bc4e07014dL,
+ 0x2f34bb3884b60b91L } },
+ /* 49 << 217 */
+ { { 0xf44a4fd22ae8921eL,0xb039288e892ba1e2L,0x9da50174b1c180b2L,
+ 0x6b70ab661693dc87L },
+ { 0x7e9babc9e7057481L,0x4581ddef9c80dc41L,0x0c890da951294682L,
+ 0x0b5629d33f4736e5L } },
+ /* 50 << 217 */
+ { { 0x2340c79eb06f5b41L,0xa42e84ce4e243469L,0xf9a20135045a71a9L,
+ 0xefbfb415d27b6fb6L },
+ { 0x25ebea239d33cd6fL,0x9caedb88aa6c0af8L,0x53dc7e9ad9ce6f96L,
+ 0x3897f9fd51e0b15aL } },
+ /* 51 << 217 */
+ { { 0xf51cb1f88e5d788eL,0x1aec7ba8e1d490eeL,0x265991e0cc58cb3cL,
+ 0x9f306e8c9fc3ad31L },
+ { 0x5fed006e5040a0acL,0xca9d5043fb476f2eL,0xa19c06e8beea7a23L,
+ 0xd28658010edabb63L } },
+ /* 52 << 217 */
+ { { 0xdb92293f6967469aL,0x2894d8398d8a8ed8L,0x87c9e406bbc77122L,
+ 0x8671c6f12ea3a26aL },
+ { 0xe42df8d6d7de9853L,0x2e3ce346b1f2bcc7L,0xda601dfc899d50cfL,
+ 0xbfc913defb1b598fL } },
+ /* 53 << 217 */
+ { { 0x81c4909fe61f7908L,0x192e304f9bbc7b29L,0xc3ed8738c104b338L,
+ 0xedbe9e47783f5d61L },
+ { 0x0c06e9be2db30660L,0xda3e613fc0eb7d8eL,0xd8fa3e97322e096eL,
+ 0xfebd91e8d336e247L } },
+ /* 54 << 217 */
+ { { 0x8f13ccc4df655a49L,0xa9e00dfc5eb20210L,0x84631d0fc656b6eaL,
+ 0x93a058cdd8c0d947L },
+ { 0x6846904a67bd3448L,0x4a3d4e1af394fd5cL,0xc102c1a5db225f52L,
+ 0xe3455bbafc4f5e9aL } },
+ /* 55 << 217 */
+ { { 0x6b36985b4b9ad1ceL,0xa98185365bb7f793L,0x6c25e1d048b1a416L,
+ 0x1381dd533c81bee7L },
+ { 0xd2a30d617a4a7620L,0xc841292639b8944cL,0x3c1c6fbe7a97c33aL,
+ 0x941e541d938664e7L } },
+ /* 56 << 217 */
+ { { 0x417499e84a34f239L,0x15fdb83cb90402d5L,0xb75f46bf433aa832L,
+ 0xb61e15af63215db1L },
+ { 0xaabe59d4a127f89aL,0x5d541e0c07e816daL,0xaaba0659a618b692L,
+ 0x5532773317266026L } },
+ /* 57 << 217 */
+ { { 0xaf53a0fc95f57552L,0x329476506cacb0c9L,0x253ff58dc821be01L,
+ 0xb0309531a06f1146L },
+ { 0x59bbbdf505c2e54dL,0x158f27ad26e8dd22L,0xcc5b7ffb397e1e53L,
+ 0xae03f65b7fc1e50dL } },
+ /* 58 << 217 */
+ { { 0xa9784ebd9c95f0f9L,0x5ed9deb224640771L,0x31244af7035561c4L,
+ 0x87332f3a7ee857deL },
+ { 0x09e16e9e2b9e0d88L,0x52d910f456a06049L,0x507ed477a9592f48L,
+ 0x85cb917b2365d678L } },
+ /* 59 << 217 */
+ { { 0xf8511c934c8998d1L,0x2186a3f1730ea58fL,0x50189626b2029db0L,
+ 0x9137a6d902ceb75aL },
+ { 0x2fe17f37748bc82cL,0x87c2e93180469f8cL,0x850f71cdbf891aa2L,
+ 0x0ca1b89b75ec3d8dL } },
+ /* 60 << 217 */
+ { { 0x516c43aa5e1cd3cdL,0x893978089a887c28L,0x0059c699ddea1f9fL,
+ 0x7737d6fa8e6868f7L },
+ { 0x6d93746a60f1524bL,0x36985e55ba052aa7L,0x41b1d322ed923ea5L,
+ 0x3429759f25852a11L } },
+ /* 61 << 217 */
+ { { 0xbeca6ec3092e9f41L,0x3a238c6662256bbdL,0xd82958ea70ad487dL,
+ 0x4ac8aaf965610d93L },
+ { 0x3fa101b15e4ccab0L,0x9bf430f29de14bfbL,0xa10f5cc66531899dL,
+ 0x590005fbea8ce17dL } },
+ /* 62 << 217 */
+ { { 0xc437912f24544cb6L,0x9987b71ad79ac2e3L,0x13e3d9ddc058a212L,
+ 0x00075aacd2de9606L },
+ { 0x80ab508b6cac8369L,0x87842be7f54f6c89L,0xa7ad663d6bc532a4L,
+ 0x67813de778a91bc8L } },
+ /* 63 << 217 */
+ { { 0x5dcb61cec3427239L,0x5f3c7cf0c56934d9L,0xc079e0fbe3191591L,
+ 0xe40896bdb01aada7L },
+ { 0x8d4667910492d25fL,0x8aeb30c9e7408276L,0xe94374959287aaccL,
+ 0x23d4708d79fe03d4L } },
+ /* 64 << 217 */
+ { { 0x8cda9cf2d0c05199L,0x502fbc22fae78454L,0xc0bda9dff572a182L,
+ 0x5f9b71b86158b372L },
+ { 0xe0f33a592b82dd07L,0x763027359523032eL,0x7fe1a721c4505a32L,
+ 0x7b6e3e82f796409fL } },
+ /* 0 << 224 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 224 */
+ { { 0xe3417bc035d0b34aL,0x440b386b8327c0a7L,0x8fb7262dac0362d1L,
+ 0x2c41114ce0cdf943L },
+ { 0x2ba5cef1ad95a0b1L,0xc09b37a867d54362L,0x26d6cdd201e486c9L,
+ 0x20477abf42ff9297L } },
+ /* 2 << 224 */
+ { { 0xa004dcb3292a9287L,0xddc15cf677b092c7L,0x083a8464806c0605L,
+ 0x4a68df703db997b0L },
+ { 0x9c134e4505bf7dd0L,0xa4e63d398ccf7f8cL,0xa6e6517f41b5f8afL,
+ 0xaa8b9342ad7bc1ccL } },
+ /* 3 << 224 */
+ { { 0x126f35b51e706ad9L,0xb99cebb4c3a9ebdfL,0xa75389afbf608d90L,
+ 0x76113c4fc6c89858L },
+ { 0x80de8eb097e2b5aaL,0x7e1022cc63b91304L,0x3bdab6056ccc066cL,
+ 0x33cbb144b2edf900L } },
+ /* 4 << 224 */
+ { { 0xc41764717af715d2L,0xe2f7f594d0134a96L,0x2c1873efa41ec956L,
+ 0xe4e7b4f677821304L },
+ { 0xe5c8ff9788d5374aL,0x2b915e6380823d5bL,0xea6bc755b2ee8fe2L,
+ 0x6657624ce7112651L } },
+ /* 5 << 224 */
+ { { 0x157af101dace5acaL,0xc4fdbcf211a6a267L,0xdaddf340c49c8609L,
+ 0x97e49f52e9604a65L },
+ { 0x9be8e790937e2ad5L,0x846e2508326e17f1L,0x3f38007a0bbbc0dcL,
+ 0xcf03603fb11e16d6L } },
+ /* 6 << 224 */
+ { { 0xd6f800e07442f1d5L,0x475607d166e0e3abL,0x82807f16b7c64047L,
+ 0x8858e1e3a749883dL },
+ { 0x5859120b8231ee10L,0x1b80e7eb638a1eceL,0xcb72525ac6aa73a4L,
+ 0xa7cdea3d844423acL } },
+ /* 7 << 224 */
+ { { 0x5ed0c007f8ae7c38L,0x6db07a5c3d740192L,0xbe5e9c2a5fe36db3L,
+ 0xd5b9d57a76e95046L },
+ { 0x54ac32e78eba20f2L,0xef11ca8f71b9a352L,0x305e373eff98a658L,
+ 0xffe5a100823eb667L } },
+ /* 8 << 224 */
+ { { 0x57477b11e51732d2L,0xdfd6eb282538fc0eL,0x5c43b0cc3b39eec5L,
+ 0x6af12778cb36cc57L },
+ { 0x70b0852d06c425aeL,0x6df92f8c5c221b9bL,0x6c8d4f9ece826d9cL,
+ 0xf59aba7bb49359c3L } },
+ /* 9 << 224 */
+ { { 0x5c8ed8d5da64309dL,0x61a6de5691b30704L,0xd6b52f6a2f9b5808L,
+ 0x0eee419498c958a7L },
+ { 0xcddd9aab771e4caaL,0x83965dfd78bc21beL,0x02affce3b3b504f5L,
+ 0x30847a21561c8291L } },
+ /* 10 << 224 */
+ { { 0xd2eb2cf152bfda05L,0xe0e4c4e96197b98cL,0x1d35076cf8a1726fL,
+ 0x6c06085b2db11e3dL },
+ { 0x15c0c4d74463ba14L,0x9d292f830030238cL,0x1311ee8b3727536dL,
+ 0xfeea86efbeaedc1eL } },
+ /* 11 << 224 */
+ { { 0xb9d18cd366131e2eL,0xf31d974f80fe2682L,0xb6e49e0fe4160289L,
+ 0x7c48ec0b08e92799L },
+ { 0x818111d8d1989aa7L,0xb34fa0aaebf926f9L,0xdb5fe2f5a245474aL,
+ 0xf80a6ebb3c7ca756L } },
+ /* 12 << 224 */
+ { { 0xa7f96054afa05dd8L,0x26dfcf21fcaf119eL,0xe20ef2e30564bb59L,
+ 0xef4dca5061cb02b8L },
+ { 0xcda7838a65d30672L,0x8b08d534fd657e86L,0x4c5b439546d595c8L,
+ 0x39b58725425cb836L } },
+ /* 13 << 224 */
+ { { 0x8ea610593de9abe3L,0x404348819cdc03beL,0x9b261245cfedce8cL,
+ 0x78c318b4cf5234a1L },
+ { 0x510bcf16fde24c99L,0x2a77cb75a2c2ff5dL,0x9c895c2b27960fb4L,
+ 0xd30ce975b0eda42bL } },
+ /* 14 << 224 */
+ { { 0xfda853931a62cc26L,0x23c69b9650c0e052L,0xa227df15bfc633f3L,
+ 0x2ac788481bae7d48L },
+ { 0x487878f9187d073dL,0x6c2be919967f807dL,0x765861d8336e6d8fL,
+ 0x88b8974cce528a43L } },
+ /* 15 << 224 */
+ { { 0x09521177ff57d051L,0x2ff38037fb6a1961L,0xfc0aba74a3d76ad4L,
+ 0x7c76480325a7ec17L },
+ { 0x7532d75f48879bc8L,0xea7eacc058ce6bc1L,0xc82176b48e896c16L,
+ 0x9a30e0b22c750fedL } },
+ /* 16 << 224 */
+ { { 0xc37e2c2e421d3aa4L,0xf926407ce84fa840L,0x18abc03d1454e41cL,
+ 0x26605ecd3f7af644L },
+ { 0x242341a6d6a5eabfL,0x1edb84f4216b668eL,0xd836edb804010102L,
+ 0x5b337ce7945e1d8cL } },
+ /* 17 << 224 */
+ { { 0xd2075c77c055dc14L,0x2a0ffa2581d89cdfL,0x8ce815ea6ffdcbafL,
+ 0xa3428878fb648867L },
+ { 0x277699cf884655fbL,0xfa5b5bd6364d3e41L,0x01f680c6441e1cb7L,
+ 0x3fd61e66b70a7d67L } },
+ /* 18 << 224 */
+ { { 0x666ba2dccc78cf66L,0xb30181746fdbff77L,0x8d4dd0db168d4668L,
+ 0x259455d01dab3a2aL },
+ { 0xf58564c5cde3acecL,0x7714192513adb276L,0x527d725d8a303f65L,
+ 0x55deb6c9e6f38f7bL } },
+ /* 19 << 224 */
+ { { 0xfd5bb657b1fa70fbL,0xfa07f50fd8073a00L,0xf72e3aa7bca02500L,
+ 0xf68f895d9975740dL },
+ { 0x301120605cae2a6aL,0x01bd721802874842L,0x3d4238917ce47bd3L,
+ 0xa66663c1789544f6L } },
+ /* 20 << 224 */
+ { { 0x864d05d73272d838L,0xe22924f9fa6295c5L,0x8189593f6c2fda32L,
+ 0x330d7189b184b544L },
+ { 0x79efa62cbde1f714L,0x35771c94e5cb1a63L,0x2f4826b8641c8332L,
+ 0x00a894fbc8cee854L } },
+ /* 21 << 224 */
+ { { 0xb4b9a39b36194d40L,0xe857a7c577612601L,0xf4209dd24ecf2f58L,
+ 0x82b9e66d5a033487L },
+ { 0xc1e36934e4e8b9ddL,0xd2372c9da42377d7L,0x51dc94c70e3ae43bL,
+ 0x4c57761e04474f6fL } },
+ /* 22 << 224 */
+ { { 0xdcdacd0a1058a318L,0x369cf3f578053a9aL,0xc6c3de5031c68de2L,
+ 0x4653a5763c4b6d9fL },
+ { 0x1688dd5aaa4e5c97L,0x5be80aa1b7ab3c74L,0x70cefe7cbc65c283L,
+ 0x57f95f1306867091L } },
+ /* 23 << 224 */
+ { { 0xa39114e24415503bL,0xc08ff7c64cbb17e9L,0x1eff674dd7dec966L,
+ 0x6d4690af53376f63L },
+ { 0xff6fe32eea74237bL,0xc436d17ecd57508eL,0x15aa28e1edcc40feL,
+ 0x0d769c04581bbb44L } },
+ /* 24 << 224 */
+ { { 0xc240b6de34eaacdaL,0xd9e116e82ba0f1deL,0xcbe45ec779438e55L,
+ 0x91787c9d96f752d7L },
+ { 0x897f532bf129ac2fL,0xd307b7c85a36e22cL,0x91940675749fb8f3L,
+ 0xd14f95d0157fdb28L } },
+ /* 25 << 224 */
+ { { 0xfe51d0296ae55043L,0x8931e98f44a87de1L,0xe57f1cc609e4fee2L,
+ 0x0d063b674e072d92L },
+ { 0x70a998b9ed0e4316L,0xe74a736b306aca46L,0xecf0fbf24fda97c7L,
+ 0xa40f65cb3e178d93L } },
+ /* 26 << 224 */
+ { { 0x1625360416df4285L,0xb0c9babbd0c56ae2L,0x73032b19cfc5cfc3L,
+ 0xe497e5c309752056L },
+ { 0x12096bb4164bda96L,0x1ee42419a0b74da1L,0x8fc36243403826baL,
+ 0x0c8f0069dc09e660L } },
+ /* 27 << 224 */
+ { { 0x8667e981c27253c9L,0x05a6aefb92b36a45L,0xa62c4b369cb7bb46L,
+ 0x8394f37511f7027bL },
+ { 0x747bc79c5f109d0fL,0xcad88a765b8cc60aL,0x80c5a66b58f09e68L,
+ 0xe753d451f6127eacL } },
+ /* 28 << 224 */
+ { { 0xc44b74a15b0ec6f5L,0x47989fe45289b2b8L,0x745f848458d6fc73L,
+ 0xec362a6ff61c70abL },
+ { 0x070c98a7b3a8ad41L,0x73a20fc07b63db51L,0xed2c2173f44c35f4L,
+ 0x8a56149d9acc9dcaL } },
+ /* 29 << 224 */
+ { { 0x98f178819ac6e0f4L,0x360fdeafa413b5edL,0x0625b8f4a300b0fdL,
+ 0xf1f4d76a5b3222d3L },
+ { 0x9d6f5109587f76b8L,0x8b4ee08d2317fdb5L,0x88089bb78c68b095L,
+ 0x95570e9a5808d9b9L } },
+ /* 30 << 224 */
+ { { 0xa395c36f35d33ae7L,0x200ea12350bb5a94L,0x20c789bd0bafe84bL,
+ 0x243ef52d0919276aL },
+ { 0x3934c577e23ae233L,0xb93807afa460d1ecL,0xb72a53b1f8fa76a4L,
+ 0xd8914cb0c3ca4491L } },
+ /* 31 << 224 */
+ { { 0x2e1284943fb42622L,0x3b2700ac500907d5L,0xf370fb091a95ec63L,
+ 0xf8f30be231b6dfbdL },
+ { 0xf2b2f8d269e55f15L,0x1fead851cc1323e9L,0xfa366010d9e5eef6L,
+ 0x64d487b0e316107eL } },
+ /* 32 << 224 */
+ { { 0x4c076b86d23ddc82L,0x03fd344c7e0143f0L,0xa95362ff317af2c5L,
+ 0x0add3db7e18b7a4fL },
+ { 0x9c673e3f8260e01bL,0xfbeb49e554a1cc91L,0x91351bf292f2e433L,
+ 0xc755e7ec851141ebL } },
+ /* 33 << 224 */
+ { { 0xc9a9513929607745L,0x0ca07420a26f2b28L,0xcb2790e74bc6f9ddL,
+ 0x345bbb58adcaffc0L },
+ { 0xc65ea38cbe0f27a2L,0x67c24d7c641fcb56L,0x2c25f0a7a9e2c757L,
+ 0x93f5cdb016f16c49L } },
+ /* 34 << 224 */
+ { { 0x2ca5a9d7c5ee30a1L,0xd1593635b909b729L,0x804ce9f3dadeff48L,
+ 0xec464751b07c30c3L },
+ { 0x89d65ff39e49af6aL,0xf2d6238a6f3d01bcL,0x1095561e0bced843L,
+ 0x51789e12c8a13fd8L } },
+ /* 35 << 224 */
+ { { 0xd633f929763231dfL,0x46df9f7de7cbddefL,0x01c889c0cb265da8L,
+ 0xfce1ad10af4336d2L },
+ { 0x8d110df6fc6a0a7eL,0xdd431b986da425dcL,0xcdc4aeab1834aabeL,
+ 0x84deb1248439b7fcL } },
+ /* 36 << 224 */
+ { { 0x8796f1693c2a5998L,0x9b9247b47947190dL,0x55b9d9a511597014L,
+ 0x7e9dd70d7b1566eeL },
+ { 0x94ad78f7cbcd5e64L,0x0359ac179bd4c032L,0x3b11baaf7cc222aeL,
+ 0xa6a6e284ba78e812L } },
+ /* 37 << 224 */
+ { { 0x8392053f24cea1a0L,0xc97bce4a33621491L,0x7eb1db3435399ee9L,
+ 0x473f78efece81ad1L },
+ { 0x41d72fe0f63d3d0dL,0xe620b880afab62fcL,0x92096bc993158383L,
+ 0x41a213578f896f6cL } },
+ /* 38 << 224 */
+ { { 0x1b5ee2fac7dcfcabL,0x650acfde9546e007L,0xc081b749b1b02e07L,
+ 0xda9e41a0f9eca03dL },
+ { 0x013ba727175a54abL,0xca0cd190ea5d8d10L,0x85ea52c095fd96a9L,
+ 0x2c591b9fbc5c3940L } },
+ /* 39 << 224 */
+ { { 0x6fb4d4e42bad4d5fL,0xfa4c3590fef0059bL,0x6a10218af5122294L,
+ 0x9a78a81aa85751d1L },
+ { 0x04f20579a98e84e7L,0xfe1242c04997e5b5L,0xe77a273bca21e1e4L,
+ 0xfcc8b1ef9411939dL } },
+ /* 40 << 224 */
+ { { 0xe20ea30292d0487aL,0x1442dbec294b91feL,0x1f7a4afebb6b0e8fL,
+ 0x1700ef746889c318L },
+ { 0xf5bbffc370f1fc62L,0x3b31d4b669c79ccaL,0xe8bc2aaba7f6340dL,
+ 0xb0b08ab4a725e10aL } },
+ /* 41 << 224 */
+ { { 0x44f05701ae340050L,0xba4b30161cf0c569L,0x5aa29f83fbe19a51L,
+ 0x1b9ed428b71d752eL },
+ { 0x1666e54eeb4819f5L,0x616cdfed9e18b75bL,0x112ed5be3ee27b0bL,
+ 0xfbf2831944c7de4dL } },
+ /* 42 << 224 */
+ { { 0xd685ec85e0e60d84L,0x68037e301db7ee78L,0x5b65bdcd003c4d6eL,
+ 0x33e7363a93e29a6aL },
+ { 0x995b3a6108d0756cL,0xd727f85c2faf134bL,0xfac6edf71d337823L,
+ 0x99b9aa500439b8b4L } },
+ /* 43 << 224 */
+ { { 0x722eb104e2b4e075L,0x49987295437c4926L,0xb1e4c0e446a9b82dL,
+ 0xd0cb319757a006f5L },
+ { 0xf3de0f7dd7808c56L,0xb5c54d8f51f89772L,0x500a114aadbd31aaL,
+ 0x9afaaaa6295f6cabL } },
+ /* 44 << 224 */
+ { { 0x94705e2104cf667aL,0xfc2a811b9d3935d7L,0x560b02806d09267cL,
+ 0xf19ed119f780e53bL },
+ { 0xf0227c09067b6269L,0x967b85335caef599L,0x155b924368efeebcL,
+ 0xcd6d34f5c497bae6L } },
+ /* 45 << 224 */
+ { { 0x1dd8d5d36cceb370L,0x2aeac579a78d7bf9L,0x5d65017d70b67a62L,
+ 0x70c8e44f17c53f67L },
+ { 0xd1fc095086a34d09L,0xe0fca256e7134907L,0xe24fa29c80fdd315L,
+ 0x2c4acd03d87499adL } },
+ /* 46 << 224 */
+ { { 0xbaaf75173b5a9ba6L,0xb9cbe1f612e51a51L,0xd88edae35e154897L,
+ 0xe4309c3c77b66ca0L },
+ { 0xf5555805f67f3746L,0x85fc37baa36401ffL,0xdf86e2cad9499a53L,
+ 0x6270b2a3ecbc955bL } },
+ /* 47 << 224 */
+ { { 0xafae64f5974ad33bL,0x04d85977fe7b2df1L,0x2a3db3ff4ab03f73L,
+ 0x0b87878a8702740aL },
+ { 0x6d263f015a061732L,0xc25430cea32a1901L,0xf7ebab3ddb155018L,
+ 0x3a86f69363a9b78eL } },
+ /* 48 << 224 */
+ { { 0x349ae368da9f3804L,0x470f07fea164349cL,0xd52f4cc98562baa5L,
+ 0xc74a9e862b290df3L },
+ { 0xd3a1aa3543471a24L,0x239446beb8194511L,0xbec2dd0081dcd44dL,
+ 0xca3d7f0fc42ac82dL } },
+ /* 49 << 224 */
+ { { 0x1f3db085fdaf4520L,0xbb6d3e804549daf2L,0xf5969d8a19ad5c42L,
+ 0x7052b13ddbfd1511L },
+ { 0x11890d1b682b9060L,0xa71d3883ac34452cL,0xa438055b783805b4L,
+ 0x432412774725b23eL } },
+ /* 50 << 224 */
+ { { 0xf20cf96e4901bbedL,0x6419c710f432a2bbL,0x57a0fbb9dfa9cd7dL,
+ 0x589111e400daa249L },
+ { 0x19809a337b60554eL,0xea5f8887ede283a4L,0x2d713802503bfd35L,
+ 0x151bb0af585d2a53L } },
+ /* 51 << 224 */
+ { { 0x40b08f7443b30ca8L,0xe10b5bbad9934583L,0xe8a546d6b51110adL,
+ 0x1dd50e6628e0b6c5L },
+ { 0x292e9d54cff2b821L,0x3882555d47281760L,0x134838f83724d6e3L,
+ 0xf2c679e022ddcda1L } },
+ /* 52 << 224 */
+ { { 0x40ee88156d2a5768L,0x7f227bd21c1e7e2dL,0x487ba134d04ff443L,
+ 0x76e2ff3dc614e54bL },
+ { 0x36b88d6fa3177ec7L,0xbf731d512328fff5L,0x758caea249ba158eL,
+ 0x5ab8ff4c02938188L } },
+ /* 53 << 224 */
+ { { 0x33e1605635edc56dL,0x5a69d3497e940d79L,0x6c4fd00103866dcbL,
+ 0x20a38f574893cdefL },
+ { 0xfbf3e790fac3a15bL,0x6ed7ea2e7a4f8e6bL,0xa663eb4fbc3aca86L,
+ 0x22061ea5080d53f7L } },
+ /* 54 << 224 */
+ { { 0x2480dfe6f546783fL,0xd38bc6da5a0a641eL,0xfb093cd12ede8965L,
+ 0x89654db4acb455cfL },
+ { 0x413cbf9a26e1adeeL,0x291f3764373294d4L,0x00797257648083feL,
+ 0x25f504d3208cc341L } },
+ /* 55 << 224 */
+ { { 0x635a8e5ec3a0ee43L,0x70aaebca679898ffL,0x9ee9f5475dc63d56L,
+ 0xce987966ffb34d00L },
+ { 0xf9f86b195e26310aL,0x9e435484382a8ca8L,0x253bcb81c2352fe4L,
+ 0xa4eac8b04474b571L } },
+ /* 56 << 224 */
+ { { 0xc1b97512c1ad8cf8L,0x193b4e9e99e0b697L,0x939d271601e85df0L,
+ 0x4fb265b3cd44eafdL },
+ { 0x321e7dcde51e1ae2L,0x8e3a8ca6e3d8b096L,0x8de46cb052604998L,
+ 0x91099ad839072aa7L } },
+ /* 57 << 224 */
+ { { 0x2617f91c93aa96b8L,0x0fc8716b7fca2e13L,0xa7106f5e95328723L,
+ 0xd1c9c40b262e6522L },
+ { 0xb9bafe8642b7c094L,0x1873439d1543c021L,0xe1baa5de5cbefd5dL,
+ 0xa363fc5e521e8affL } },
+ /* 58 << 224 */
+ { { 0xefe6320df862eaacL,0x14419c6322c647dcL,0x0e06707c4e46d428L,
+ 0xcb6c834f4a178f8fL },
+ { 0x0f993a45d30f917cL,0xd4c4b0499879afeeL,0xb6142a1e70500063L,
+ 0x7c9b41c3a5d9d605L } },
+ /* 59 << 224 */
+ { { 0xbc00fc2f2f8ba2c7L,0x0966eb2f7c67aa28L,0x13f7b5165a786972L,
+ 0x3bfb75578a2fbba0L },
+ { 0x131c4f235a2b9620L,0xbff3ed276faf46beL,0x9b4473d17e172323L,
+ 0x421e8878339f6246L } },
+ /* 60 << 224 */
+ { { 0x0fa8587a25a41632L,0xc0814124a35b6c93L,0x2b18a9f559ebb8dbL,
+ 0x264e335776edb29cL },
+ { 0xaf245ccdc87c51e2L,0x16b3015b501e6214L,0xbb31c5600a3882ceL,
+ 0x6961bb94fec11e04L } },
+ /* 61 << 224 */
+ { { 0x3b825b8deff7a3a0L,0xbec33738b1df7326L,0x68ad747c99604a1fL,
+ 0xd154c9349a3bd499L },
+ { 0xac33506f1cc7a906L,0x73bb53926c560e8fL,0x6428fcbe263e3944L,
+ 0xc11828d51c387434L } },
+ /* 62 << 224 */
+ { { 0x3cd04be13e4b12ffL,0xc3aad9f92d88667cL,0xc52ddcf8248120cfL,
+ 0x985a892e2a389532L },
+ { 0xfbb4b21b3bb85fa0L,0xf95375e08dfc6269L,0xfb4fb06c7ee2aceaL,
+ 0x6785426e309c4d1fL } },
+ /* 63 << 224 */
+ { { 0x659b17c8d8ceb147L,0x9b649eeeb70a5554L,0x6b7fa0b5ac6bc634L,
+ 0xd99fe2c71d6e732fL },
+ { 0x30e6e7628d3abba2L,0x18fee6e7a797b799L,0x5c9d360dc696464dL,
+ 0xe3baeb4827bfde12L } },
+ /* 64 << 224 */
+ { { 0x2bf5db47f23206d5L,0x2f6d34201d260152L,0x17b876533f8ff89aL,
+ 0x5157c30c378fa458L },
+ { 0x7517c5c52d4fb936L,0xef22f7ace6518cdcL,0xdeb483e6bf847a64L,
+ 0xf508455892e0fa89L } },
+ /* 0 << 231 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 231 */
+ { { 0xab9659d8df7304d4L,0xb71bcf1bff210e8eL,0xa9a2438bd73fbd60L,
+ 0x4595cd1f5d11b4deL },
+ { 0x9c0d329a4835859dL,0x4a0f0d2d7dbb6e56L,0xc6038e5edf928a4eL,
+ 0xc94296218f5ad154L } },
+ /* 2 << 231 */
+ { { 0x91213462f23f2d92L,0x6cab71bd60b94078L,0x6bdd0a63176cde20L,
+ 0x54c9b20cee4d54bcL },
+ { 0x3cd2d8aa9f2ac02fL,0x03f8e617206eedb0L,0xc7f68e1693086434L,
+ 0x831469c592dd3db9L } },
+ /* 3 << 231 */
+ { { 0x8521df248f981354L,0x587e23ec3588a259L,0xcbedf281d7a0992cL,
+ 0x06930a5538961407L },
+ { 0x09320debbe5bbe21L,0xa7ffa5b52491817fL,0xe6c8b4d909065160L,
+ 0xac4f3992fff6d2a9L } },
+ /* 4 << 231 */
+ { { 0x7aa7a1583ae9c1bdL,0xe0af6d98e37ce240L,0xe54342d928ab38b4L,
+ 0xe8b750070a1c98caL },
+ { 0xefce86afe02358f2L,0x31b8b856ea921228L,0x052a19120a1c67fcL,
+ 0xb4069ea4e3aead59L } },
+ /* 5 << 231 */
+ { { 0x3232d6e27fa03cb3L,0xdb938e5b0fdd7d88L,0x04c1d2cd2ccbfc5dL,
+ 0xd2f45c12af3a580fL },
+ { 0x592620b57883e614L,0x5fd27e68be7c5f26L,0x139e45a91567e1e3L,
+ 0x2cc71d2d44d8aaafL } },
+ /* 6 << 231 */
+ { { 0x4a9090cde36d0757L,0xf722d7b1d9a29382L,0xfb7fb04c04b48ddfL,
+ 0x628ad2a7ebe16f43L },
+ { 0xcd3fbfb520226040L,0x6c34ecb15104b6c4L,0x30c0754ec903c188L,
+ 0xec336b082d23cab0L } },
+ /* 7 << 231 */
+ { { 0x473d62a21e206ee5L,0xf1e274808c49a633L,0x87ab956ce9f6b2c3L,
+ 0x61830b4862b606eaL },
+ { 0x67cd6846e78e815fL,0xfe40139f4c02082aL,0x52bbbfcb952ec365L,
+ 0x74c116426b9836abL } },
+ /* 8 << 231 */
+ { { 0x9f51439e558df019L,0x230da4baac712b27L,0x518919e355185a24L,
+ 0x4dcefcdd84b78f50L },
+ { 0xa7d90fb2a47d4c5aL,0x55ac9abfb30e009eL,0xfd2fc35974eed273L,
+ 0xb72d824cdbea8fafL } },
+ /* 9 << 231 */
+ { { 0xce721a744513e2caL,0x0b41861238240b2cL,0x05199968d5baa450L,
+ 0xeb1757ed2b0e8c25L },
+ { 0x6ebc3e283dfac6d5L,0xb2431e2e48a237f5L,0x2acb5e2352f61499L,
+ 0x5558a2a7e06c936bL } },
+ /* 10 << 231 */
+ { { 0xd213f923cbb13d1bL,0x98799f425bfb9bfeL,0x1ae8ddc9701144a9L,
+ 0x0b8b3bb64c5595eeL },
+ { 0x0ea9ef2e3ecebb21L,0x17cb6c4b3671f9a7L,0x47ef464f726f1d1fL,
+ 0x171b94846943a276L } },
+ /* 11 << 231 */
+ { { 0x51a4ae2d7ef0329cL,0x0850922291c4402aL,0x64a61d35afd45bbcL,
+ 0x38f096fe3035a851L },
+ { 0xc7468b74a1dec027L,0xe8cf10e74fc7dcbaL,0xea35ff40f4a06353L,
+ 0x0b4c0dfa8b77dd66L } },
+ /* 12 << 231 */
+ { { 0x779b8552de7e5c19L,0xfab28609c1c0256cL,0x64f58eeeabd4743dL,
+ 0x4e8ef8387b6cc93bL },
+ { 0xee650d264cb1bf3dL,0x4c1f9d0973dedf61L,0xaef7c9d7bfb70cedL,
+ 0x1ec0507e1641de1eL } },
+ /* 13 << 231 */
+ { { 0xcd7e5cc7cde45079L,0xde173c9a516ac9e4L,0x517a8494c170315cL,
+ 0x438fd90591d8e8fbL },
+ { 0x5145c506c7d9630bL,0x6457a87bf47d4d75L,0xd31646bf0d9a80e8L,
+ 0x453add2bcef3aabeL } },
+ /* 14 << 231 */
+ { { 0xc9941109a607419dL,0xfaa71e62bb6bca80L,0x34158c1307c431f3L,
+ 0x594abebc992bc47aL },
+ { 0x6dfea691eb78399fL,0x48aafb353f42cba4L,0xedcd65af077c04f0L,
+ 0x1a29a366e884491aL } },
+ /* 15 << 231 */
+ { { 0x023a40e51c21f2bfL,0xf99a513ca5057aeeL,0xa3fe7e25bcab072eL,
+ 0x8568d2e140e32bcfL },
+ { 0x904594ebd3f69d9fL,0x181a973307affab1L,0xe4d68d76b6e330f4L,
+ 0x87a6dafbc75a7fc1L } },
+ /* 16 << 231 */
+ { { 0x549db2b5ef7d9289L,0x2480d4a8197f015aL,0x61d5590bc40493b6L,
+ 0x3a55b52e6f780331L },
+ { 0x40eb8115309eadb0L,0xdea7de5a92e5c625L,0x64d631f0cc6a3d5aL,
+ 0x9d5e9d7c93e8dd61L } },
+ /* 17 << 231 */
+ { { 0xf297bef5206d3ffcL,0x23d5e0337d808bd4L,0x4a4f6912d24cf5baL,
+ 0xe4d8163b09cdaa8aL },
+ { 0x0e0de9efd3082e8eL,0x4fe1246c0192f360L,0x1f9001504b8eee0aL,
+ 0x5219da81f1da391bL } },
+ /* 18 << 231 */
+ { { 0x7bf6a5c1f7ea25aaL,0xd165e6bffbb07d5fL,0xe353936189e78671L,
+ 0xa3fcac892bac4219L },
+ { 0xdfab6fd4f0baa8abL,0x5a4adac1e2c1c2e5L,0x6cd75e3140d85849L,
+ 0xce263fea19b39181L } },
+ /* 19 << 231 */
+ { { 0xcb6803d307032c72L,0x7f40d5ce790968c8L,0xa6de86bddce978f0L,
+ 0x25547c4f368f751cL },
+ { 0xb1e685fd65fb2a9eL,0xce69336f1eb9179cL,0xb15d1c2712504442L,
+ 0xb7df465cb911a06bL } },
+ /* 20 << 231 */
+ { { 0xb8d804a3315980cdL,0x693bc492fa3bebf7L,0x3578aeee2253c504L,
+ 0x158de498cd2474a2L },
+ { 0x1331f5c7cfda8368L,0xd2d7bbb378d7177eL,0xdf61133af3c1e46eL,
+ 0x5836ce7dd30e7be8L } },
+ /* 21 << 231 */
+ { { 0x83084f1994f834cbL,0xd35653d4429ed782L,0xa542f16f59e58243L,
+ 0xc2b52f650470a22dL },
+ { 0xe3b6221b18f23d96L,0xcb05abac3f5252b4L,0xca00938b87d61402L,
+ 0x2f186cdd411933e4L } },
+ /* 22 << 231 */
+ { { 0xe042ece59a29a5c5L,0xb19b3c073b6c8402L,0xc97667c719d92684L,
+ 0xb5624622ebc66372L },
+ { 0x0cb96e653c04fa02L,0x83a7176c8eaa39aaL,0x2033561deaa1633fL,
+ 0x45a9d0864533df73L } },
+ /* 23 << 231 */
+ { { 0xe0542c1d3dc090bcL,0x82c996efaa59c167L,0xe3f735e80ee7fc4dL,
+ 0x7b1793937c35db79L },
+ { 0xb6419e25f8c5dbfdL,0x4d9d7a1e1f327b04L,0x979f6f9b298dfca8L,
+ 0xc7c5dff18de9366aL } },
+ /* 24 << 231 */
+ { { 0x1b7a588d04c82bddL,0x68005534f8319dfdL,0xde8a55b5d8eb9580L,
+ 0x5ea886da8d5bca81L },
+ { 0xe8530a01252a0b4dL,0x1bffb4fe35eaa0a1L,0x2ad828b1d8e99563L,
+ 0x7de96ef595f9cd87L } },
+ /* 25 << 231 */
+ { { 0x4abb2d0cd77d970cL,0x03cfb933d33ef9cbL,0xb0547c018b211fe9L,
+ 0x2fe64809a56ed1c6L },
+ { 0xcb7d5624c2ac98ccL,0x2a1372c01a393e33L,0xc8d1ec1c29660521L,
+ 0xf3d31b04b37ac3e9L } },
+ /* 26 << 231 */
+ { { 0xa29ae9df5ece6e7cL,0x0603ac8f0facfb55L,0xcfe85b7adda233a5L,
+ 0xe618919fbd75f0b8L },
+ { 0xf555a3d299bf1603L,0x1f43afc9f184255aL,0xdcdaf341319a3e02L,
+ 0xd3b117ef03903a39L } },
+ /* 27 << 231 */
+ { { 0xe095da1365d1d131L,0x86f16367c37ad03eL,0x5f37389e462cd8ddL,
+ 0xc103fa04d67a60e6L },
+ { 0x57c34344f4b478f0L,0xce91edd8e117c98dL,0x001777b0231fc12eL,
+ 0x11ae47f2b207bccbL } },
+ /* 28 << 231 */
+ { { 0xd983cf8d20f8a242L,0x7aff5b1df22e1ad8L,0x68fd11d07fc4feb3L,
+ 0x5d53ae90b0f1c3e1L },
+ { 0x50fb7905ec041803L,0x85e3c97714404888L,0x0e67faedac628d8fL,
+ 0x2e8651506668532cL } },
+ /* 29 << 231 */
+ { { 0x15acaaa46a67a6b0L,0xf4cdee25b25cec41L,0x49ee565ae4c6701eL,
+ 0x2a04ca66fc7d63d8L },
+ { 0xeb105018ef0543fbL,0xf709a4f5d1b0d81dL,0x5b906ee62915d333L,
+ 0xf4a8741296f1f0abL } },
+ /* 30 << 231 */
+ { { 0xb6b82fa74d82f4c2L,0x90725a606804efb3L,0xbc82ec46adc3425eL,
+ 0xb7b805812787843eL },
+ { 0xdf46d91cdd1fc74cL,0xdc1c62cbe783a6c4L,0x59d1b9f31a04cbbaL,
+ 0xd87f6f7295e40764L } },
+ /* 31 << 231 */
+ { { 0x02b4cfc1317f4a76L,0x8d2703eb91036bceL,0x98206cc6a5e72a56L,
+ 0x57be9ed1cf53fb0fL },
+ { 0x09374571ef0b17acL,0x74b2655ed9181b38L,0xc8f80ea889935d0eL,
+ 0xc0d9e94291529936L } },
+ /* 32 << 231 */
+ { { 0x196860411e84e0e5L,0xa5db84d3aea34c93L,0xf9d5bb197073a732L,
+ 0xb8d2fe566bcfd7c0L },
+ { 0x45775f36f3eb82faL,0x8cb20cccfdff8b58L,0x1659b65f8374c110L,
+ 0xb8b4a422330c789aL } },
+ /* 33 << 231 */
+ { { 0x75e3c3ea6fe8208bL,0xbd74b9e4286e78feL,0x0be2e81bd7d93a1aL,
+ 0x7ed06e27dd0a5aaeL },
+ { 0x721f5a586be8b800L,0x428299d1d846db28L,0x95cb8e6b5be88ed3L,
+ 0xc3186b231c034e11L } },
+ /* 34 << 231 */
+ { { 0xa6312c9e8977d99bL,0xbe94433183f531e7L,0x8232c0c218d3b1d4L,
+ 0x617aae8be1247b73L },
+ { 0x40153fc4282aec3bL,0xc6063d2ff7b8f823L,0x68f10e583304f94cL,
+ 0x31efae74ee676346L } },
+ /* 35 << 231 */
+ { { 0xbadb6c6d40a9b97cL,0x14702c634f666256L,0xdeb954f15184b2e3L,
+ 0x5184a52694b6ca40L },
+ { 0xfff05337003c32eaL,0x5aa374dd205974c7L,0x9a7638544b0dd71aL,
+ 0x459cd27fdeb947ecL } },
+ /* 36 << 231 */
+ { { 0xa6e28161459c2b92L,0x2f020fa875ee8ef5L,0xb132ec2d30b06310L,
+ 0xc3e15899bc6a4530L },
+ { 0xdc5f53feaa3f451aL,0x3a3c7f23c2d9acacL,0x2ec2f8926b27e58bL,
+ 0x68466ee7d742799fL } },
+ /* 37 << 231 */
+ { { 0x98324dd41fa26613L,0xa2dc6dabbdc29d63L,0xf9675faad712d657L,
+ 0x813994be21fd8d15L },
+ { 0x5ccbb722fd4f7553L,0x5135ff8bf3a36b20L,0x44be28af69559df5L,
+ 0x40b65bed9d41bf30L } },
+ /* 38 << 231 */
+ { { 0xd98bf2a43734e520L,0x5e3abbe3209bdcbaL,0x77c76553bc945b35L,
+ 0x5331c093c6ef14aaL },
+ { 0x518ffe2976b60c80L,0x2285593b7ace16f8L,0xab1f64ccbe2b9784L,
+ 0xe8f2c0d9ab2421b6L } },
+ /* 39 << 231 */
+ { { 0x617d7174c1df065cL,0xafeeb5ab5f6578faL,0x16ff1329263b54a8L,
+ 0x45c55808c990dce3L },
+ { 0x42eab6c0ecc8c177L,0x799ea9b55982ecaaL,0xf65da244b607ef8eL,
+ 0x8ab226ce32a3fc2cL } },
+ /* 40 << 231 */
+ { { 0x745741e57ea973dcL,0x5c00ca7020888f2eL,0x7cdce3cf45fd9cf1L,
+ 0x8a741ef15507f872L },
+ { 0x47c51c2f196b4cecL,0x70d08e43c97ea618L,0x930da15c15b18a2bL,
+ 0x33b6c6782f610514L } },
+ /* 41 << 231 */
+ { { 0xc662e4f807ac9794L,0x1eccf050ba06cb79L,0x1ff08623e7d954e5L,
+ 0x6ef2c5fb24cf71c3L },
+ { 0xb2c063d267978453L,0xa0cf37961d654af8L,0x7cb242ea7ebdaa37L,
+ 0x206e0b10b86747e0L } },
+ /* 42 << 231 */
+ { { 0x481dae5fd5ecfefcL,0x07084fd8c2bff8fcL,0x8040a01aea324596L,
+ 0x4c646980d4de4036L },
+ { 0x9eb8ab4ed65abfc3L,0xe01cb91f13541ec7L,0x8f029adbfd695012L,
+ 0x9ae284833c7569ecL } },
+ /* 43 << 231 */
+ { { 0xa5614c9ea66d80a1L,0x680a3e4475f5f911L,0x0c07b14dceba4fc1L,
+ 0x891c285ba13071c1L },
+ { 0xcac67ceb799ece3cL,0x29b910a941e07e27L,0x66bdb409f2e43123L,
+ 0x06f8b1377ac9ecbeL } },
+ /* 44 << 231 */
+ { { 0x5981fafd38547090L,0x19ab8b9f85e3415dL,0xfc28c194c7e31b27L,
+ 0x843be0aa6fbcbb42L },
+ { 0xf3b1ed43a6db836cL,0x2a1330e401a45c05L,0x4f19f3c595c1a377L,
+ 0xa85f39d044b5ee33L } },
+ /* 45 << 231 */
+ { { 0x3da18e6d4ae52834L,0x5a403b397423dcb0L,0xbb555e0af2374aefL,
+ 0x2ad599c41e8ca111L },
+ { 0x1b3a2fb9014b3bf8L,0x73092684f66d5007L,0x079f1426c4340102L,
+ 0x1827cf818fddf4deL } },
+ /* 46 << 231 */
+ { { 0xc83605f6f10ff927L,0xd387145123739fc6L,0x6d163450cac1c2ccL,
+ 0x6b521296a2ec1ac5L },
+ { 0x0606c4f96e3cb4a5L,0xe47d3f41778abff7L,0x425a8d5ebe8e3a45L,
+ 0x53ea9e97a6102160L } },
+ /* 47 << 231 */
+ { { 0x477a106e39cbb688L,0x532401d2f3386d32L,0x8e564f64b1b9b421L,
+ 0xca9b838881dad33fL },
+ { 0xb1422b4e2093913eL,0x533d2f9269bc8112L,0x3fa017beebe7b2c7L,
+ 0xb2767c4acaf197c6L } },
+ /* 48 << 231 */
+ { { 0xc925ff87aedbae9fL,0x7daf0eb936880a54L,0x9284ddf59c4d0e71L,
+ 0x1581cf93316f8cf5L },
+ { 0x3eeca8873ac1f452L,0xb417fce9fb6aeffeL,0xa5918046eefb8dc3L,
+ 0x73d318ac02209400L } },
+ /* 49 << 231 */
+ { { 0xe800400f728693e5L,0xe87d814b339927edL,0x93e94d3b57ea9910L,
+ 0xff8a35b62245fb69L },
+ { 0x043853d77f200d34L,0x470f1e680f653ce1L,0x81ac05bd59a06379L,
+ 0xa14052c203930c29L } },
+ /* 50 << 231 */
+ { { 0x6b72fab526bc2797L,0x13670d1699f16771L,0x001700521e3e48d1L,
+ 0x978fe401b7adf678L },
+ { 0x55ecfb92d41c5dd4L,0x5ff8e247c7b27da5L,0xe7518272013fb606L,
+ 0x5768d7e52f547a3cL } },
+ /* 51 << 231 */
+ { { 0xbb24eaa360017a5fL,0x6b18e6e49c64ce9bL,0xc225c655103dde07L,
+ 0xfc3672ae7592f7eaL },
+ { 0x9606ad77d06283a1L,0x542fc650e4d59d99L,0xabb57c492a40e7c2L,
+ 0xac948f13a8db9f55L } },
+ /* 52 << 231 */
+ { { 0x6d4c9682b04465c3L,0xe3d062fa6468bd15L,0xa51729ac5f318d7eL,
+ 0x1fc87df69eb6fc95L },
+ { 0x63d146a80591f652L,0xa861b8f7589621aaL,0x59f5f15ace31348cL,
+ 0x8f663391440da6daL } },
+ /* 53 << 231 */
+ { { 0xcfa778acb591ffa3L,0x027ca9c54cdfebceL,0xbe8e05a5444ea6b3L,
+ 0x8aab4e69a78d8254L },
+ { 0x2437f04fb474d6b8L,0x6597ffd4045b3855L,0xbb0aea4eca47ecaaL,
+ 0x568aae8385c7ebfcL } },
+ /* 54 << 231 */
+ { { 0x0e966e64c73b2383L,0x49eb3447d17d8762L,0xde1078218da05dabL,
+ 0x443d8baa016b7236L },
+ { 0x163b63a5ea7610d6L,0xe47e4185ce1ca979L,0xae648b6580baa132L,
+ 0xebf53de20e0d5b64L } },
+ /* 55 << 231 */
+ { { 0x8d3bfcb4d3c8c1caL,0x0d914ef35d04b309L,0x55ef64153de7d395L,
+ 0xbde1666f26b850e8L },
+ { 0xdbe1ca6ed449ab19L,0x8902b322e89a2672L,0xb1674b7edacb7a53L,
+ 0x8e9faf6ef52523ffL } },
+ /* 56 << 231 */
+ { { 0x6ba535da9a85788bL,0xd21f03aebd0626d4L,0x099f8c47e873dc64L,
+ 0xcda8564d018ec97eL },
+ { 0x3e8d7a5cde92c68cL,0x78e035a173323cc4L,0x3ef26275f880ff7cL,
+ 0xa4ee3dff273eedaaL } },
+ /* 57 << 231 */
+ { { 0x58823507af4e18f8L,0x967ec9b50672f328L,0x9ded19d9559d3186L,
+ 0x5e2ab3de6cdce39cL },
+ { 0xabad6e4d11c226dfL,0xf9783f4387723014L,0x9a49a0cf1a885719L,
+ 0xfc0c1a5a90da9dbfL } },
+ /* 58 << 231 */
+ { { 0x8bbaec49571d92acL,0x569e85fe4692517fL,0x8333b014a14ea4afL,
+ 0x32f2a62f12e5c5adL },
+ { 0x98c2ce3a06d89b85L,0xb90741aa2ff77a08L,0x2530defc01f795a2L,
+ 0xd6e5ba0b84b3c199L } },
+ /* 59 << 231 */
+ { { 0x7d8e845112e4c936L,0xae419f7dbd0be17bL,0xa583fc8c22262bc9L,
+ 0x6b842ac791bfe2bdL },
+ { 0x33cef4e9440d6827L,0x5f69f4deef81fb14L,0xf16cf6f6234fbb92L,
+ 0x76ae3fc3d9e7e158L } },
+ /* 60 << 231 */
+ { { 0x4e89f6c2e9740b33L,0x677bc85d4962d6a1L,0x6c6d8a7f68d10d15L,
+ 0x5f9a72240257b1cdL },
+ { 0x7096b9164ad85961L,0x5f8c47f7e657ab4aL,0xde57d7d0f7461d7eL,
+ 0x7eb6094d80ce5ee2L } },
+ /* 61 << 231 */
+ { { 0x0b1e1dfd34190547L,0x8a394f43f05dd150L,0x0a9eb24d97df44e6L,
+ 0x78ca06bf87675719L },
+ { 0x6f0b34626ffeec22L,0x9d91bcea36cdd8fbL,0xac83363ca105be47L,
+ 0x81ba76c1069710e3L } },
+ /* 62 << 231 */
+ { { 0x3d1b24cb28c682c6L,0x27f252288612575bL,0xb587c779e8e66e98L,
+ 0x7b0c03e9405eb1feL },
+ { 0xfdf0d03015b548e7L,0xa8be76e038b36af7L,0x4cdab04a4f310c40L,
+ 0x6287223ef47ecaecL } },
+ /* 63 << 231 */
+ { { 0x678e60558b399320L,0x61fe3fa6c01e4646L,0xc482866b03261a5eL,
+ 0xdfcf45b85c2f244aL },
+ { 0x8fab9a512f684b43L,0xf796c654c7220a66L,0x1d90707ef5afa58fL,
+ 0x2c421d974fdbe0deL } },
+ /* 64 << 231 */
+ { { 0xc4f4cda3af2ebc2fL,0xa0af843dcb4efe24L,0x53b857c19ccd10b1L,
+ 0xddc9d1eb914d3e04L },
+ { 0x7bdec8bb62771debL,0x829277aa91c5aa81L,0x7af18dd6832391aeL,
+ 0x1740f316c71a84caL } },
+ /* 0 << 238 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 238 */
+ { { 0x8928e99aeeaf8c49L,0xee7aa73d6e24d728L,0x4c5007c2e72b156cL,
+ 0x5fcf57c5ed408a1dL },
+ { 0x9f719e39b6057604L,0x7d343c01c2868bbfL,0x2cca254b7e103e2dL,
+ 0xe6eb38a9f131bea2L } },
+ /* 2 << 238 */
+ { { 0xb33e624f8be762b4L,0x2a9ee4d1058e3413L,0x968e636967d805faL,
+ 0x9848949b7db8bfd7L },
+ { 0x5308d7e5d23a8417L,0x892f3b1df3e29da5L,0xc95c139e3dee471fL,
+ 0x8631594dd757e089L } },
+ /* 3 << 238 */
+ { { 0xe0c82a3cde918dccL,0x2e7b599426fdcf4bL,0x82c5024932cb1b2dL,
+ 0xea613a9d7657ae07L },
+ { 0xc2eb5f6cf1fdc9f7L,0xb6eae8b8879fe682L,0x253dfee0591cbc7fL,
+ 0x000da7133e1290e6L } },
+ /* 4 << 238 */
+ { { 0x1083e2ea1f095615L,0x0a28ad7714e68c33L,0x6bfc02523d8818beL,
+ 0xb585113af35850cdL },
+ { 0x7d935f0b30df8aa1L,0xaddda07c4ab7e3acL,0x92c34299552f00cbL,
+ 0xc33ed1de2909df6cL } },
+ /* 5 << 238 */
+ { { 0x22c2195d80e87766L,0x9e99e6d89ddf4ac0L,0x09642e4e65e74934L,
+ 0x2610ffa2ff1ff241L },
+ { 0x4d1d47d4751c8159L,0x697b4985af3a9363L,0x0318ca4687477c33L,
+ 0xa90cb5659441eff3L } },
+ /* 6 << 238 */
+ { { 0x58bb384836f024cbL,0x85be1f7736016168L,0x6c59587cdc7e07f1L,
+ 0x191be071af1d8f02L },
+ { 0xbf169fa5cca5e55cL,0x3864ba3cf7d04eacL,0x915e367f8d7d05dbL,
+ 0xb48a876da6549e5dL } },
+ /* 7 << 238 */
+ { { 0xef89c656580e40a2L,0xf194ed8c728068bcL,0x74528045a47990c9L,
+ 0xf53fc7d75e1a4649L },
+ { 0xbec5ae9b78593e7dL,0x2cac4ee341db65d7L,0xa8c1eb2404a3d39bL,
+ 0x53b7d63403f8f3efL } },
+ /* 8 << 238 */
+ { { 0x2dc40d483e07113cL,0x6e4a5d397d8b63aeL,0x5582a94b79684c2bL,
+ 0x932b33d4622da26cL },
+ { 0xf534f6510dbbf08dL,0x211d07c964c23a52L,0x0eeece0fee5bdc9bL,
+ 0xdf178168f7015558L } },
+ /* 9 << 238 */
+ { { 0xd42946350a712229L,0x93cbe44809273f8cL,0x00b095ef8f13bc83L,
+ 0xbb7419728798978cL },
+ { 0x9d7309a256dbe6e7L,0xe578ec565a5d39ecL,0x3961151b851f9a31L,
+ 0x2da7715de5709eb4L } },
+ /* 10 << 238 */
+ { { 0x867f301753dfabf0L,0x728d2078b8e39259L,0x5c75a0cd815d9958L,
+ 0xf84867a616603be1L },
+ { 0xc865b13d70e35b1cL,0x0241446819b03e2cL,0xe46041daac1f3121L,
+ 0x7c9017ad6f028a7cL } },
+ /* 11 << 238 */
+ { { 0xabc96de90a482873L,0x4265d6b1b77e54d4L,0x68c38e79a57d88e7L,
+ 0xd461d7669ce82de3L },
+ { 0x817a9ec564a7e489L,0xcc5675cda0def5f2L,0x9a00e785985d494eL,
+ 0xc626833f1b03514aL } },
+ /* 12 << 238 */
+ { { 0xabe7905a83cdd60eL,0x50602fb5a1170184L,0x689886cdb023642aL,
+ 0xd568d090a6e1fb00L },
+ { 0x5b1922c70259217fL,0x93831cd9c43141e4L,0xdfca35870c95f86eL,
+ 0xdec2057a568ae828L } },
+ /* 13 << 238 */
+ { { 0xc44ea599f98a759aL,0x55a0a7a2f7c23c1dL,0xd5ffb6e694c4f687L,
+ 0x3563cce212848478L },
+ { 0x812b3517e7b1fbe1L,0x8a7dc9794f7338e0L,0x211ecee952d048dbL,
+ 0x2eea4056c86ea3b8L } },
+ /* 14 << 238 */
+ { { 0xd8cb68a7ba772b34L,0xe16ed3415f4e2541L,0x9b32f6a60fec14dbL,
+ 0xeee376f7391698beL },
+ { 0xe9a7aa1783674c02L,0x65832f975843022aL,0x29f3a8da5ba4990fL,
+ 0x79a59c3afb8e3216L } },
+ /* 15 << 238 */
+ { { 0x9cdc4d2ebd19bb16L,0xc6c7cfd0b3262d86L,0xd4ce14d0969c0b47L,
+ 0x1fa352b713e56128L },
+ { 0x383d55b8973db6d3L,0x71836850e8e5b7bfL,0xc7714596e6bb571fL,
+ 0x259df31f2d5b2dd2L } },
+ /* 16 << 238 */
+ { { 0x568f8925913cc16dL,0x18bc5b6de1a26f5aL,0xdfa413bef5f499aeL,
+ 0xf8835decc3f0ae84L },
+ { 0xb6e60bd865a40ab0L,0x65596439194b377eL,0xbcd8562592084a69L,
+ 0x5ce433b94f23ede0L } },
+ /* 17 << 238 */
+ { { 0xe8e8f04f6ad65143L,0x11511827d6e14af6L,0x3d390a108295c0c7L,
+ 0x71e29ee4621eba16L },
+ { 0xa588fc0963717b46L,0x02be02fee06ad4a2L,0x931558c604c22b22L,
+ 0xbb4d4bd612f3c849L } },
+ /* 18 << 238 */
+ { { 0x54a4f49620efd662L,0x92ba6d20c5952d14L,0x2db8ea1ecc9784c2L,
+ 0x81cc10ca4b353644L },
+ { 0x40b570ad4b4d7f6cL,0x5c9f1d9684a1dcd2L,0x01379f813147e797L,
+ 0xe5c6097b2bd499f5L } },
+ /* 19 << 238 */
+ { { 0x40dcafa6328e5e20L,0xf7b5244a54815550L,0xb9a4f11847bfc978L,
+ 0x0ea0e79fd25825b1L },
+ { 0xa50f96eb646c7ecfL,0xeb811493446dea9dL,0x2af04677dfabcf69L,
+ 0xbe3a068fc713f6e8L } },
+ /* 20 << 238 */
+ { { 0x860d523d42e06189L,0xbf0779414e3aff13L,0x0b616dcac1b20650L,
+ 0xe66dd6d12131300dL },
+ { 0xd4a0fd67ff99abdeL,0xc9903550c7aac50dL,0x022ecf8b7c46b2d7L,
+ 0x3333b1e83abf92afL } },
+ /* 21 << 238 */
+ { { 0x11cc113c6c491c14L,0x0597668880dd3f88L,0xf5b4d9e729d932edL,
+ 0xe982aad8a2c38b6dL },
+ { 0x6f9253478be0dcf0L,0x700080ae65ca53f2L,0xd8131156443ca77fL,
+ 0xe92d6942ec51f984L } },
+ /* 22 << 238 */
+ { { 0xd2a08af885dfe9aeL,0xd825d9a54d2a86caL,0x2c53988d39dff020L,
+ 0xf38b135a430cdc40L },
+ { 0x0c918ae062a7150bL,0xf31fd8de0c340e9bL,0xafa0e7ae4dbbf02eL,
+ 0x5847fb2a5eba6239L } },
+ /* 23 << 238 */
+ { { 0x6b1647dcdccbac8bL,0xb642aa7806f485c8L,0x873f37657038ecdfL,
+ 0x2ce5e865fa49d3feL },
+ { 0xea223788c98c4400L,0x8104a8cdf1fa5279L,0xbcf7cc7a06becfd7L,
+ 0x49424316c8f974aeL } },
+ /* 24 << 238 */
+ { { 0xc0da65e784d6365dL,0xbcb7443f8f759fb8L,0x35c712b17ae81930L,
+ 0x80428dff4c6e08abL },
+ { 0xf19dafefa4faf843L,0xced8538dffa9855fL,0x20ac409cbe3ac7ceL,
+ 0x358c1fb6882da71eL } },
+ /* 25 << 238 */
+ { { 0xafa9c0e5fd349961L,0x2b2cfa518421c2fcL,0x2a80db17f3a28d38L,
+ 0xa8aba5395d138e7eL },
+ { 0x52012d1d6e96eb8dL,0x65d8dea0cbaf9622L,0x57735447b264f56cL,
+ 0xbeebef3f1b6c8da2L } },
+ /* 26 << 238 */
+ { { 0xfc346d98ce785254L,0xd50e8d72bb64a161L,0xc03567c749794addL,
+ 0x15a76065752c7ef6L },
+ { 0x59f3a222961f23d6L,0x378e443873ecc0b0L,0xc74be4345a82fde4L,
+ 0xae509af2d8b9cf34L } },
+ /* 27 << 238 */
+ { { 0x4a61ee46577f44a1L,0xe09b748cb611deebL,0xc0481b2cf5f7b884L,
+ 0x3562667861acfa6bL },
+ { 0x37f4c518bf8d21e6L,0x22d96531b205a76dL,0x37fb85e1954073c0L,
+ 0xbceafe4f65b3a567L } },
+ /* 28 << 238 */
+ { { 0xefecdef7be42a582L,0xd3fc608065046be6L,0xc9af13c809e8dba9L,
+ 0x1e6c9847641491ffL },
+ { 0x3b574925d30c31f7L,0xb7eb72baac2a2122L,0x776a0dacef0859e7L,
+ 0x06fec31421900942L } },
+ /* 29 << 238 */
+ { { 0x2464bc10f8c22049L,0x9bfbcce7875ebf69L,0xd7a88e2a4336326bL,
+ 0xda05261c5bc2acfaL },
+ { 0xc29f5bdceba7efc8L,0x471237ca25dbbf2eL,0xa72773f22975f127L,
+ 0xdc744e8e04d0b326L } },
+ /* 30 << 238 */
+ { { 0x38a7ed16a56edb73L,0x64357e372c007e70L,0xa167d15b5080b400L,
+ 0x07b4116423de4be1L },
+ { 0xb2d91e3274c89883L,0x3c1628212882e7edL,0xad6b36ba7503e482L,
+ 0x48434e8e0ea34331L } },
+ /* 31 << 238 */
+ { { 0x79f4f24f2c7ae0b9L,0xc46fbf811939b44aL,0x76fefae856595eb1L,
+ 0x417b66abcd5f29c7L },
+ { 0x5f2332b2c5ceec20L,0xd69661ffe1a1cae2L,0x5ede7e529b0286e6L,
+ 0x9d062529e276b993L } },
+ /* 32 << 238 */
+ { { 0x324794b07e50122bL,0xdd744f8b4af07ca5L,0x30a12f08d63fc97bL,
+ 0x39650f1a76626d9dL },
+ { 0x101b47f71fa38477L,0x3d815f19d4dc124fL,0x1569ae95b26eb58aL,
+ 0xc3cde18895fb1887L } },
+ /* 33 << 238 */
+ { { 0x54e9f37bf9539a48L,0xb0100e067408c1a5L,0x821d9811ea580cbbL,
+ 0x8af52d3586e50c56L },
+ { 0xdfbd9d47dbbf698bL,0x2961a1ea03dc1c73L,0x203d38f8e76a5df8L,
+ 0x08a53a686def707aL } },
+ /* 34 << 238 */
+ { { 0x26eefb481bee45d4L,0xb3cee3463c688036L,0x463c5315c42f2469L,
+ 0x19d84d2e81378162L },
+ { 0x22d7c3c51c4d349fL,0x65965844163d59c5L,0xcf198c56b8abceaeL,
+ 0x6fb1fb1b628559d5L } },
+ /* 35 << 238 */
+ { { 0x8bbffd0607bf8fe3L,0x46259c583467734bL,0xd8953cea35f7f0d3L,
+ 0x1f0bece2d65b0ff1L },
+ { 0xf7d5b4b3f3c72914L,0x29e8ea953cb53389L,0x4a365626836b6d46L,
+ 0xe849f910ea174fdeL } },
+ /* 36 << 238 */
+ { { 0x7ec62fbbf4737f21L,0xd8dba5ab6209f5acL,0x24b5d7a9a5f9adbeL,
+ 0x707d28f7a61dc768L },
+ { 0x7711460bcaa999eaL,0xba7b174d1c92e4ccL,0x3c4bab6618d4bf2dL,
+ 0xb8f0c980eb8bd279L } },
+ /* 37 << 238 */
+ { { 0x024bea9a324b4737L,0xfba9e42332a83bcaL,0x6e635643a232dcedL,
+ 0x996193672571c8baL },
+ { 0xe8c9f35754b7032bL,0xf936b3ba2442d54aL,0x2263f0f08290c65aL,
+ 0x48989780ee2c7fdbL } },
+ /* 38 << 238 */
+ { { 0xadc5d55a13d4f95eL,0x737cff85ad9b8500L,0x271c557b8a73f43dL,
+ 0xbed617a4e18bc476L },
+ { 0x662454017dfd8ab2L,0xae7b89ae3a2870aaL,0x1b555f5323a7e545L,
+ 0x6791e247be057e4cL } },
+ /* 39 << 238 */
+ { { 0x860136ad324fa34dL,0xea1114474cbeae28L,0x023a4270bedd3299L,
+ 0x3d5c3a7fc1c35c34L },
+ { 0xb0f6db678d0412d2L,0xd92625e2fcdc6b9aL,0x92ae5ccc4e28a982L,
+ 0xea251c3647a3ce7eL } },
+ /* 40 << 238 */
+ { { 0x9d658932790691bfL,0xed61058906b736aeL,0x712c2f04c0d63b6eL,
+ 0x5cf06fd5c63d488fL },
+ { 0x97363facd9588e41L,0x1f9bf7622b93257eL,0xa9d1ffc4667acaceL,
+ 0x1cf4a1aa0a061ecfL } },
+ /* 41 << 238 */
+ { { 0x40e48a49dc1818d0L,0x0643ff39a3621ab0L,0x5768640ce39ef639L,
+ 0x1fc099ea04d86854L },
+ { 0x9130b9c3eccd28fdL,0xd743cbd27eec54abL,0x052b146fe5b475b6L,
+ 0x058d9a82900a7d1fL } },
+ /* 42 << 238 */
+ { { 0x65e0229291262b72L,0x96f924f9bb0edf03L,0x5cfa59c8fe206842L,
+ 0xf60370045eafa720L },
+ { 0x5f30699e18d7dd96L,0x381e8782cbab2495L,0x91669b46dd8be949L,
+ 0xb40606f526aae8efL } },
+ /* 43 << 238 */
+ { { 0x2812b839fc6751a4L,0x16196214fba800efL,0x4398d5ca4c1a2875L,
+ 0x720c00ee653d8349L },
+ { 0xc2699eb0d820007cL,0x880ee660a39b5825L,0x70694694471f6984L,
+ 0xf7d16ea8e3dda99aL } },
+ /* 44 << 238 */
+ { { 0x28d675b2c0519a23L,0x9ebf94fe4f6952e3L,0xf28bb767a2294a8aL,
+ 0x85512b4dfe0af3f5L },
+ { 0x18958ba899b16a0dL,0x95c2430cba7548a7L,0xb30d1b10a16be615L,
+ 0xe3ebbb9785bfb74cL } },
+ /* 45 << 238 */
+ { { 0xa3273cfe18549fdbL,0xf6e200bf4fcdb792L,0x54a76e1883aba56cL,
+ 0x73ec66f689ef6aa2L },
+ { 0x8d17add7d1b9a305L,0xa959c5b9b7ae1b9dL,0x886435226bcc094aL,
+ 0xcc5616c4d7d429b9L } },
+ /* 46 << 238 */
+ { { 0xa6dada01e6a33f7cL,0xc6217a079d4e70adL,0xd619a81809c15b7cL,
+ 0xea06b3290e80c854L },
+ { 0x174811cea5f5e7b9L,0x66dfc310787c65f4L,0x4ea7bd693316ab54L,
+ 0xc12c4acb1dcc0f70L } },
+ /* 47 << 238 */
+ { { 0xe4308d1a1e407dd9L,0xe8a3587c91afa997L,0xea296c12ab77b7a5L,
+ 0xb5ad49e4673c0d52L },
+ { 0x40f9b2b27006085aL,0xa88ff34087bf6ec2L,0x978603b14e3066a6L,
+ 0xb3f99fc2b5e486e2L } },
+ /* 48 << 238 */
+ { { 0x07b53f5eb2e63645L,0xbe57e54784c84232L,0xd779c2167214d5cfL,
+ 0x617969cd029a3acaL },
+ { 0xd17668cd8a7017a0L,0x77b4d19abe9b7ee8L,0x58fd0e939c161776L,
+ 0xa8c4f4efd5968a72L } },
+ /* 49 << 238 */
+ { { 0x296071cc67b3de77L,0xae3c0b8e634f7905L,0x67e440c28a7100c9L,
+ 0xbb8c3c1beb4b9b42L },
+ { 0x6d71e8eac51b3583L,0x7591f5af9525e642L,0xf73a2f7b13f509f3L,
+ 0x618487aa5619ac9bL } },
+ /* 50 << 238 */
+ { { 0x3a72e5f79d61718aL,0x00413bcc7592d28cL,0x7d9b11d3963c35cfL,
+ 0x77623bcfb90a46edL },
+ { 0xdeef273bdcdd2a50L,0x4a741f9b0601846eL,0x33b89e510ec6e929L,
+ 0xcb02319f8b7f22cdL } },
+ /* 51 << 238 */
+ { { 0xbbe1500d084bae24L,0x2f0ae8d7343d2693L,0xacffb5f27cdef811L,
+ 0xaa0c030a263fb94fL },
+ { 0x6eef0d61a0f442deL,0xf92e181727b139d3L,0x1ae6deb70ad8bc28L,
+ 0xa89e38dcc0514130L } },
+ /* 52 << 238 */
+ { { 0x81eeb865d2fdca23L,0x5a15ee08cc8ef895L,0x768fa10a01905614L,
+ 0xeff5b8ef880ee19bL },
+ { 0xf0c0cabbcb1c8a0eL,0x2e1ee9cdb8c838f9L,0x0587d8b88a4a14c0L,
+ 0xf6f278962ff698e5L } },
+ /* 53 << 238 */
+ { { 0xed38ef1c89ee6256L,0xf44ee1fe6b353b45L,0x9115c0c770e903b3L,
+ 0xc78ec0a1818f31dfL },
+ { 0x6c003324b7dccbc6L,0xd96dd1f3163bbc25L,0x33aa82dd5cedd805L,
+ 0x123aae4f7f7eb2f1L } },
+ /* 54 << 238 */
+ { { 0x1723fcf5a26262cdL,0x1f7f4d5d0060ebd5L,0xf19c5c01b2eaa3afL,
+ 0x2ccb9b149790accfL },
+ { 0x1f9c1cad52324aa6L,0x632005267247df54L,0x5732fe42bac96f82L,
+ 0x52fe771f01a1c384L } },
+ /* 55 << 238 */
+ { { 0x546ca13db1001684L,0xb56b4eeea1709f75L,0x266545a9d5db8672L,
+ 0xed971c901e8f3cfbL },
+ { 0x4e7d8691e3a07b29L,0x7570d9ece4b696b9L,0xdc5fa0677bc7e9aeL,
+ 0x68b44cafc82c4844L } },
+ /* 56 << 238 */
+ { { 0x519d34b3bf44da80L,0x283834f95ab32e66L,0x6e6087976278a000L,
+ 0x1e62960e627312f6L },
+ { 0x9b87b27be6901c55L,0x80e7853824fdbc1fL,0xbbbc09512facc27dL,
+ 0x06394239ac143b5aL } },
+ /* 57 << 238 */
+ { { 0x35bb4a40376c1944L,0x7cb6269463da1511L,0xafd29161b7148a3bL,
+ 0xa6f9d9ed4e2ea2eeL },
+ { 0x15dc2ca2880dd212L,0x903c3813a61139a9L,0x2aa7b46d6c0f8785L,
+ 0x36ce2871901c60ffL } },
+ /* 58 << 238 */
+ { { 0xc683b028e10d9c12L,0x7573baa2032f33d3L,0x87a9b1f667a31b58L,
+ 0xfd3ed11af4ffae12L },
+ { 0x83dcaa9a0cb2748eL,0x8239f0185d6fdf16L,0xba67b49c72753941L,
+ 0x2beec455c321cb36L } },
+ /* 59 << 238 */
+ { { 0x880156063f8b84ceL,0x764170838d38c86fL,0x054f1ca7598953ddL,
+ 0xc939e1104e8e7429L },
+ { 0x9b1ac2b35a914f2fL,0x39e35ed3e74b8f9cL,0xd0debdb2781b2fb0L,
+ 0x1585638f2d997ba2L } },
+ /* 60 << 238 */
+ { { 0x9c4b646e9e2fce99L,0x68a210811e80857fL,0x06d54e443643b52aL,
+ 0xde8d6d630d8eb843L },
+ { 0x7032156342146a0aL,0x8ba826f25eaa3622L,0x227a58bd86138787L,
+ 0x43b6c03c10281d37L } },
+ /* 61 << 238 */
+ { { 0x6326afbbb54dde39L,0x744e5e8adb6f2d5fL,0x48b2a99acff158e1L,
+ 0xa93c8fa0ef87918fL },
+ { 0x2182f956de058c5cL,0x216235d2936f9e7aL,0xace0c0dbd2e31e67L,
+ 0xc96449bff23ac3e7L } },
+ /* 62 << 238 */
+ { { 0x7e9a2874170693bdL,0xa28e14fda45e6335L,0x5757f6b356427344L,
+ 0x822e4556acf8edf9L },
+ { 0x2b7a6ee2e6a285cdL,0x5866f211a9df3af0L,0x40dde2ddf845b844L,
+ 0x986c3726110e5e49L } },
+ /* 63 << 238 */
+ { { 0x73680c2af7172277L,0x57b94f0f0cccb244L,0xbdff72672d438ca7L,
+ 0xbad1ce11cf4663fdL },
+ { 0x9813ed9dd8f71caeL,0xf43272a6961fdaa6L,0xbeff0119bd6d1637L,
+ 0xfebc4f9130361978L } },
+ /* 64 << 238 */
+ { { 0x02b37a952f41deffL,0x0e44a59ae63b89b7L,0x673257dc143ff951L,
+ 0x19c02205d752baf4L },
+ { 0x46c23069c4b7d692L,0x2e6392c3fd1502acL,0x6057b1a21b220846L,
+ 0xe51ff9460c1b5b63L } },
+ /* 0 << 245 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 245 */
+ { { 0x6e85cb51566c5c43L,0xcff9c9193597f046L,0x9354e90c4994d94aL,
+ 0xe0a393322147927dL },
+ { 0x8427fac10dc1eb2bL,0x88cfd8c22ff319faL,0xe2d4e68401965274L,
+ 0xfa2e067d67aaa746L } },
+ /* 2 << 245 */
+ { { 0xb6d92a7f3e5f9f11L,0x9afe153ad6cb3b8eL,0x4d1a6dd7ddf800bdL,
+ 0xf6c13cc0caf17e19L },
+ { 0x15f6c58e325fc3eeL,0x71095400a31dc3b2L,0x168e7c07afa3d3e7L,
+ 0x3f8417a194c7ae2dL } },
+ /* 3 << 245 */
+ { { 0xec234772813b230dL,0x634d0f5f17344427L,0x11548ab1d77fc56aL,
+ 0x7fab1750ce06af77L },
+ { 0xb62c10a74f7c4f83L,0xa7d2edc4220a67d9L,0x1c404170921209a0L,
+ 0x0b9815a0face59f0L } },
+ /* 4 << 245 */
+ { { 0x2842589b319540c3L,0x18490f59a283d6f8L,0xa2731f84daae9fcbL,
+ 0x3db6d960c3683ba0L },
+ { 0xc85c63bb14611069L,0xb19436af0788bf05L,0x905459df347460d2L,
+ 0x73f6e094e11a7db1L } },
+ /* 5 << 245 */
+ { { 0xdc7f938eb6357f37L,0xc5d00f792bd8aa62L,0xc878dcb92ca979fcL,
+ 0x37e83ed9eb023a99L },
+ { 0x6b23e2731560bf3dL,0x1086e4591d0fae61L,0x782483169a9414bdL,
+ 0x1b956bc0f0ea9ea1L } },
+ /* 6 << 245 */
+ { { 0x7b85bb91c31b9c38L,0x0c5aa90b48ef57b5L,0xdedeb169af3bab6fL,
+ 0xe610ad732d373685L },
+ { 0xf13870df02ba8e15L,0x0337edb68ca7f771L,0xe4acf747b62c036cL,
+ 0xd921d576b6b94e81L } },
+ /* 7 << 245 */
+ { { 0xdbc864392c422f7aL,0xfb635362ed348898L,0x83084668c45bfcd1L,
+ 0xc357c9e32b315e11L },
+ { 0xb173b5405b2e5b8cL,0x7e946931e102b9a4L,0x17c890eb7b0fb199L,
+ 0xec225a83d61b662bL } },
+ /* 8 << 245 */
+ { { 0xf306a3c8ee3c76cbL,0x3cf11623d32a1f6eL,0xe6d5ab646863e956L,
+ 0x3b8a4cbe5c005c26L },
+ { 0xdcd529a59ce6bb27L,0xc4afaa5204d4b16fL,0xb0624a267923798dL,
+ 0x85e56df66b307fabL } },
+ /* 9 << 245 */
+ { { 0x0281893c2bf29698L,0x91fc19a4d7ce7603L,0x75a5dca3ad9a558fL,
+ 0x40ceb3fa4d50bf77L },
+ { 0x1baf6060bc9ba369L,0x927e1037597888c2L,0xd936bf1986a34c07L,
+ 0xd4cf10c1c34ae980L } },
+ /* 10 << 245 */
+ { { 0x3a3e5334859dd614L,0x9c475b5b18d0c8eeL,0x63080d1f07cd51d5L,
+ 0xc9c0d0a6b88b4326L },
+ { 0x1ac98691c234296fL,0x2a0a83a494887fb6L,0x565114270cea9cf2L,
+ 0x5230a6e8a24802f5L } },
+ /* 11 << 245 */
+ { { 0xf7a2bf0f72e3d5c1L,0x377174464f21439eL,0xfedcbf259ce30334L,
+ 0xe0030a787ce202f9L },
+ { 0x6f2d9ebf1202e9caL,0xe79dde6c75e6e591L,0xf52072aff1dac4f8L,
+ 0x6c8d087ebb9b404dL } },
+ /* 12 << 245 */
+ { { 0xad0fc73dbce913afL,0x909e587b458a07cbL,0x1300da84d4f00c8aL,
+ 0x425cd048b54466acL },
+ { 0xb59cb9be90e9d8bfL,0x991616db3e431b0eL,0xd3aa117a531aecffL,
+ 0x91af92d359f4dc3bL } },
+ /* 13 << 245 */
+ { { 0x9b1ec292e93fda29L,0x76bb6c17e97d91bcL,0x7509d95faface1e6L,
+ 0x3653fe47be855ae3L },
+ { 0x73180b280f680e75L,0x75eefd1beeb6c26cL,0xa4cdf29fb66d4236L,
+ 0x2d70a9976b5821d8L } },
+ /* 14 << 245 */
+ { { 0x7a3ee20720445c36L,0x71d1ac8259877174L,0x0fc539f7949f73e9L,
+ 0xd05cf3d7982e3081L },
+ { 0x8758e20b7b1c7129L,0xffadcc20569e61f2L,0xb05d3a2f59544c2dL,
+ 0xbe16f5c19fff5e53L } },
+ /* 15 << 245 */
+ { { 0x73cf65b8aad58135L,0x622c2119037aa5beL,0x79373b3f646fd6a0L,
+ 0x0e029db50d3978cfL },
+ { 0x8bdfc43794fba037L,0xaefbd687620797a6L,0x3fa5382bbd30d38eL,
+ 0x7627cfbf585d7464L } },
+ /* 16 << 245 */
+ { { 0xb2330fef4e4ca463L,0xbcef72873566cc63L,0xd161d2cacf780900L,
+ 0x135dc5395b54827dL },
+ { 0x638f052e27bf1bc6L,0x10a224f007dfa06cL,0xe973586d6d3321daL,
+ 0x8b0c573826152c8fL } },
+ /* 17 << 245 */
+ { { 0x07ef4f2a34606074L,0x80fe7fe8a0f7047aL,0x3d1a8152e1a0e306L,
+ 0x32cf43d888da5222L },
+ { 0xbf89a95f5f02ffe6L,0x3d9eb9a4806ad3eaL,0x012c17bb79c8e55eL,
+ 0xfdcd1a7499c81dacL } },
+ /* 18 << 245 */
+ { { 0x7043178bb9556098L,0x4090a1df801c3886L,0x759800ff9b67b912L,
+ 0x3e5c0304232620c8L },
+ { 0x4b9d3c4b70dceecaL,0xbb2d3c15181f648eL,0xf981d8376e33345cL,
+ 0xb626289b0cf2297aL } },
+ /* 19 << 245 */
+ { { 0x766ac6598baebdcfL,0x1a28ae0975df01e5L,0xb71283da375876d8L,
+ 0x4865a96d607b9800L },
+ { 0x25dd1bcd237936b2L,0x332f4f4b60417494L,0xd0923d68370a2147L,
+ 0x497f5dfbdc842203L } },
+ /* 20 << 245 */
+ { { 0x9dc74cbd32be5e0fL,0x7475bcb717a01375L,0x438477c950d872b1L,
+ 0xcec67879ffe1d63dL },
+ { 0x9b006014d8578c70L,0xc9ad99a878bb6b8bL,0x6799008e11fb3806L,
+ 0xcfe81435cd44cab3L } },
+ /* 21 << 245 */
+ { { 0xa2ee15822f4fb344L,0xb8823450483fa6ebL,0x622d323d652c7749L,
+ 0xd8474a98beb0a15bL },
+ { 0xe43c154d5d1c00d0L,0x7fd581d90e3e7aacL,0x2b44c6192525ddf8L,
+ 0x67a033ebb8ae9739L } },
+ /* 22 << 245 */
+ { { 0x113ffec19ef2d2e4L,0x1bf6767ed5a0ea7fL,0x57fff75e03714c0aL,
+ 0xa23c422e0a23e9eeL },
+ { 0xdd5f6b2d540f83afL,0xc2c2c27e55ea46a7L,0xeb6b4246672a1208L,
+ 0xd13599f7ae634f7aL } },
+ /* 23 << 245 */
+ { { 0xcf914b5cd7b32c6eL,0x61a5a640eaf61814L,0x8dc3df8b208a1bbbL,
+ 0xef627fd6b6d79aa5L },
+ { 0x44232ffcc4c86bc8L,0xe6f9231b061539feL,0x1d04f25a958b9533L,
+ 0x180cf93449e8c885L } },
+ /* 24 << 245 */
+ { { 0x896895959884aaf7L,0xb1959be307b348a6L,0x96250e573c147c87L,
+ 0xae0efb3add0c61f8L },
+ { 0xed00745eca8c325eL,0x3c911696ecff3f70L,0x73acbc65319ad41dL,
+ 0x7b01a020f0b1c7efL } },
+ /* 25 << 245 */
+ { { 0xea32b29363a1483fL,0x89eabe717a248f96L,0x9c6231d3343157e5L,
+ 0x93a375e5df3c546dL },
+ { 0xe76e93436a2afe69L,0xc4f89100e166c88eL,0x248efd0d4f872093L,
+ 0xae0eb3ea8fe0ea61L } },
+ /* 26 << 245 */
+ { { 0xaf89790d9d79046eL,0x4d650f2d6cee0976L,0xa3935d9a43071ecaL,
+ 0x66fcd2c9283b0bfeL },
+ { 0x0e665eb5696605f1L,0xe77e5d07a54cd38dL,0x90ee050a43d950cfL,
+ 0x86ddebdad32e69b5L } },
+ /* 27 << 245 */
+ { { 0x6ad94a3dfddf7415L,0xf7fa13093f6e8d5aL,0xc4831d1de9957f75L,
+ 0x7de28501d5817447L },
+ { 0x6f1d70789e2aeb6bL,0xba2b9ff4f67a53c2L,0x36963767df9defc3L,
+ 0x479deed30d38022cL } },
+ /* 28 << 245 */
+ { { 0xd2edb89b3a8631e8L,0x8de855de7a213746L,0xb2056cb7b00c5f11L,
+ 0xdeaefbd02c9b85e4L },
+ { 0x03f39a8dd150892dL,0x37b84686218b7985L,0x36296dd8b7375f1aL,
+ 0x472cd4b1b78e898eL } },
+ /* 29 << 245 */
+ { { 0x15dff651e9f05de9L,0xd40450692ce98ba9L,0x8466a7ae9b38024cL,
+ 0xb910e700e5a6b5efL },
+ { 0xae1c56eab3aa8f0dL,0xbab2a5077eee74a6L,0x0dca11e24b4c4620L,
+ 0xfd896e2e4c47d1f4L } },
+ /* 30 << 245 */
+ { { 0xeb45ae53308fbd93L,0x46cd5a2e02c36fdaL,0x6a3d4e90baa48385L,
+ 0xdd55e62e9dbe9960L },
+ { 0xa1406aa02a81ede7L,0x6860dd14f9274ea7L,0xcfdcb0c280414f86L,
+ 0xff410b1022f94327L } },
+ /* 31 << 245 */
+ { { 0x5a33cc3849ad467bL,0xefb48b6c0a7335f1L,0x14fb54a4b153a360L,
+ 0x604aa9d2b52469ccL },
+ { 0x5e9dc486754e48e9L,0x693cb45537471e8eL,0xfb2fd7cd8d3b37b6L,
+ 0x63345e16cf09ff07L } },
+ /* 32 << 245 */
+ { { 0x9910ba6b23a5d896L,0x1fe19e357fe4364eL,0x6e1da8c39a33c677L,
+ 0x15b4488b29fd9fd0L },
+ { 0x1f4392541a1f22bfL,0x920a8a70ab8163e8L,0x3fd1b24907e5658eL,
+ 0xf2c4f79cb6ec839bL } },
+ /* 33 << 245 */
+ { { 0x1abbc3d04aa38d1bL,0x3b0db35cb5d9510eL,0x1754ac783e60dec0L,
+ 0x53272fd7ea099b33L },
+ { 0x5fb0494f07a8e107L,0x4a89e1376a8191faL,0xa113b7f63c4ad544L,
+ 0x88a2e9096cb9897bL } },
+ /* 34 << 245 */
+ { { 0x17d55de3b44a3f84L,0xacb2f34417c6c690L,0x3208816810232390L,
+ 0xf2e8a61f6c733bf7L },
+ { 0xa774aab69c2d7652L,0xfb5307e3ed95c5bcL,0xa05c73c24981f110L,
+ 0x1baae31ca39458c9L } },
+ /* 35 << 245 */
+ { { 0x1def185bcbea62e7L,0xe8ac9eaeeaf63059L,0x098a8cfd9921851cL,
+ 0xd959c3f13abe2f5bL },
+ { 0xa4f1952520e40ae5L,0x320789e307a24aa1L,0x259e69277392b2bcL,
+ 0x58f6c6671918668bL } },
+ /* 36 << 245 */
+ { { 0xce1db2bbc55d2d8bL,0x41d58bb7f4f6ca56L,0x7650b6808f877614L,
+ 0x905e16baf4c349edL },
+ { 0xed415140f661acacL,0x3b8784f0cb2270afL,0x3bc280ac8a402cbaL,
+ 0xd53f71460937921aL } },
+ /* 37 << 245 */
+ { { 0xc03c8ee5e5681e83L,0x62126105f6ac9e4aL,0x9503a53f936b1a38L,
+ 0x3d45e2d4782fecbdL },
+ { 0x69a5c43976e8ae98L,0xb53b2eebbfb4b00eL,0xf167471272386c89L,
+ 0x30ca34a24268bce4L } },
+ /* 38 << 245 */
+ { { 0x7f1ed86c78341730L,0x8ef5beb8b525e248L,0xbbc489fdb74fbf38L,
+ 0x38a92a0e91a0b382L },
+ { 0x7a77ba3f22433ccfL,0xde8362d6a29f05a9L,0x7f6a30ea61189afcL,
+ 0x693b550559ef114fL } },
+ /* 39 << 245 */
+ { { 0x50266bc0cd1797a1L,0xea17b47ef4b7af2dL,0xd6c4025c3df9483eL,
+ 0x8cbb9d9fa37b18c9L },
+ { 0x91cbfd9c4d8424cfL,0xdb7048f1ab1c3506L,0x9eaf641f028206a3L,
+ 0xf986f3f925bdf6ceL } },
+ /* 40 << 245 */
+ { { 0x262143b5224c08dcL,0x2bbb09b481b50c91L,0xc16ed709aca8c84fL,
+ 0xa6210d9db2850ca8L },
+ { 0x6d8df67a09cb54d6L,0x91eef6e0500919a4L,0x90f613810f132857L,
+ 0x9acede47f8d5028bL } },
+ /* 41 << 245 */
+ { { 0x844d1b7190b771c3L,0x563b71e4ba6426beL,0x2efa2e83bdb802ffL,
+ 0x3410cbabab5b4a41L },
+ { 0x555b2d2630da84ddL,0xd0711ae9ee1cc29aL,0xcf3e8c602f547792L,
+ 0x03d7d5dedc678b35L } },
+ /* 42 << 245 */
+ { { 0x071a2fa8ced806b8L,0x222e6134697f1478L,0xdc16fd5dabfcdbbfL,
+ 0x44912ebf121b53b8L },
+ { 0xac9436742496c27cL,0x8ea3176c1ffc26b0L,0xb6e224ac13debf2cL,
+ 0x524cc235f372a832L } },
+ /* 43 << 245 */
+ { { 0xd706e1d89f6f1b18L,0x2552f00544cce35bL,0x8c8326c2a88e31fcL,
+ 0xb5468b2cf9552047L },
+ { 0xce683e883ff90f2bL,0x77947bdf2f0a5423L,0xd0a1b28bed56e328L,
+ 0xaee35253c20134acL } },
+ /* 44 << 245 */
+ { { 0x7e98367d3567962fL,0x379ed61f8188bffbL,0x73bba348faf130a1L,
+ 0x6c1f75e1904ed734L },
+ { 0x189566423b4a79fcL,0xf20bc83d54ef4493L,0x836d425d9111eca1L,
+ 0xe5b5c318009a8dcfL } },
+ /* 45 << 245 */
+ { { 0x3360b25d13221bc5L,0x707baad26b3eeaf7L,0xd7279ed8743a95a1L,
+ 0x7450a875969e809fL },
+ { 0x32b6bd53e5d0338fL,0x1e77f7af2b883bbcL,0x90da12cc1063ecd0L,
+ 0xe2697b58c315be47L } },
+ /* 46 << 245 */
+ { { 0x2771a5bdda85d534L,0x53e78c1fff980eeaL,0xadf1cf84900385e7L,
+ 0x7d3b14f6c9387b62L },
+ { 0x170e74b0cb8f2bd2L,0x2d50b486827fa993L,0xcdbe8c9af6f32babL,
+ 0x55e906b0c3b93ab8L } },
+ /* 47 << 245 */
+ { { 0x747f22fc8fe280d1L,0xcd8e0de5b2e114abL,0x5ab7dbebe10b68b0L,
+ 0x9dc63a9ca480d4b2L },
+ { 0x78d4bc3b4be1495fL,0x25eb3db89359122dL,0x3f8ac05b0809cbdcL,
+ 0xbf4187bbd37c702fL } },
+ /* 48 << 245 */
+ { { 0x84cea0691416a6a5L,0x8f860c7943ef881cL,0x41311f8a38038a5dL,
+ 0xe78c2ec0fc612067L },
+ { 0x494d2e815ad73581L,0xb4cc9e0059604097L,0xff558aecf3612cbaL,
+ 0x35beef7a9e36c39eL } },
+ /* 49 << 245 */
+ { { 0x1845c7cfdbcf41b9L,0x5703662aaea997c0L,0x8b925afee402f6d8L,
+ 0xd0a1b1ae4dd72162L },
+ { 0x9f47b37503c41c4bL,0xa023829b0391d042L,0x5f5045c3503b8b0aL,
+ 0x123c268898c010e5L } },
+ /* 50 << 245 */
+ { { 0x324ec0cc36ba06eeL,0xface31153dd2cc0cL,0xb364f3bef333e91fL,
+ 0xef8aff7328e832b0L },
+ { 0x1e9bad042d05841bL,0x42f0e3df356a21e2L,0xa3270bcb4add627eL,
+ 0xb09a8158d322e711L } },
+ /* 51 << 245 */
+ { { 0x86e326a10fee104aL,0xad7788f83703f65dL,0x7e76543047bc4833L,
+ 0x6cee582b2b9b893aL },
+ { 0x9cd2a167e8f55a7bL,0xefbee3c6d9e4190dL,0x33ee7185d40c2e9dL,
+ 0x844cc9c5a380b548L } },
+ /* 52 << 245 */
+ { { 0x323f8ecd66926e04L,0x0001e38f8110c1baL,0x8dbcac12fc6a7f07L,
+ 0xd65e1d580cec0827L },
+ { 0xd2cd4141be76ca2dL,0x7895cf5ce892f33aL,0x956d230d367139d2L,
+ 0xa91abd3ed012c4c1L } },
+ /* 53 << 245 */
+ { { 0x34fa488387eb36bfL,0xc5f07102914b8fb4L,0x90f0e579adb9c95fL,
+ 0xfe6ea8cb28888195L },
+ { 0x7b9b5065edfa9284L,0x6c510bd22b8c8d65L,0xd7b8ebefcbe8aafdL,
+ 0xedb3af9896b1da07L } },
+ /* 54 << 245 */
+ { { 0x28ff779d6295d426L,0x0c4f6ac73fa3ad7bL,0xec44d0548b8e2604L,
+ 0x9b32a66d8b0050e1L },
+ { 0x1f943366f0476ce2L,0x7554d953a602c7b4L,0xbe35aca6524f2809L,
+ 0xb6881229fd4edbeaL } },
+ /* 55 << 245 */
+ { { 0xe8cd0c8f508efb63L,0x9eb5b5c86abcefc7L,0xf5621f5fb441ab4fL,
+ 0x79e6c046b76a2b22L },
+ { 0x74a4792ce37a1f69L,0xcbd252cb03542b60L,0x785f65d5b3c20bd3L,
+ 0x8dea61434fabc60cL } },
+ /* 56 << 245 */
+ { { 0x45e21446de673629L,0x57f7aa1e703c2d21L,0xa0e99b7f98c868c7L,
+ 0x4e42f66d8b641676L },
+ { 0x602884dc91077896L,0xa0d690cfc2c9885bL,0xfeb4da333b9a5187L,
+ 0x5f789598153c87eeL } },
+ /* 57 << 245 */
+ { { 0x2192dd4752b16dbaL,0xdeefc0e63524c1b1L,0x465ea76ee4383693L,
+ 0x79401711361b8d98L },
+ { 0xa5f9ace9f21a15cbL,0x73d26163efee9aebL,0xcca844b3e677016cL,
+ 0x6c122b0757eaee06L } },
+ /* 58 << 245 */
+ { { 0xb782dce715f09690L,0x508b9b122dfc0fc9L,0x9015ab4b65d89fc6L,
+ 0x5e79dab7d6d5bb0fL },
+ { 0x64f021f06c775aa2L,0xdf09d8cc37c7eca1L,0x9a761367ef2fa506L,
+ 0xed4ca4765b81eec6L } },
+ /* 59 << 245 */
+ { { 0x262ede3610bbb8b5L,0x0737ce830641ada3L,0x4c94288ae9831cccL,
+ 0x487fc1ce8065e635L },
+ { 0xb13d7ab3b8bb3659L,0xdea5df3e855e4120L,0xb9a1857385eb0244L,
+ 0x1a1b8ea3a7cfe0a3L } },
+ /* 60 << 245 */
+ { { 0x3b83711967b0867cL,0x8d5e0d089d364520L,0x52dccc1ed930f0e3L,
+ 0xefbbcec7bf20bbafL },
+ { 0x99cffcab0263ad10L,0xd8199e6dfcd18f8aL,0x64e2773fe9f10617L,
+ 0x0079e8e108704848L } },
+ /* 61 << 245 */
+ { { 0x1169989f8a342283L,0x8097799ca83012e6L,0xece966cb8a6a9001L,
+ 0x93b3afef072ac7fcL },
+ { 0xe6893a2a2db3d5baL,0x263dc46289bf4fdcL,0x8852dfc9e0396673L,
+ 0x7ac708953af362b6L } },
+ /* 62 << 245 */
+ { { 0xbb9cce4d5c2f342bL,0xbf80907ab52d7aaeL,0x97f3d3cd2161bcd0L,
+ 0xb25b08340962744dL },
+ { 0xc5b18ea56c3a1ddaL,0xfe4ec7eb06c92317L,0xb787b890ad1c4afeL,
+ 0xdccd9a920ede801aL } },
+ /* 63 << 245 */
+ { { 0x9ac6dddadb58da1fL,0x22bbc12fb8cae6eeL,0xc6f8bced815c4a43L,
+ 0x8105a92cf96480c7L },
+ { 0x0dc3dbf37a859d51L,0xe3ec7ce63041196bL,0xd9f64b250d1067c9L,
+ 0xf23213213d1f8dd8L } },
+ /* 64 << 245 */
+ { { 0x8b5c619c76497ee8L,0x5d2b0ac6c717370eL,0x98204cb64fcf68e1L,
+ 0x0bdec21162bc6792L },
+ { 0x6973ccefa63b1011L,0xf9e3fa97e0de1ac5L,0x5efb693e3d0e0c8bL,
+ 0x037248e9d2d4fcb4L } },
+ /* 0 << 252 */
+ { { 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 << 252 */
+ { { 0x80802dc91ec34f9eL,0xd8772d3533810603L,0x3f06d66c530cb4f3L,
+ 0x7be5ed0dc475c129L },
+ { 0xcb9e3c1931e82b10L,0xc63d2857c9ff6b4cL,0xb92118c692a1b45eL,
+ 0x0aec44147285bbcaL } },
+ /* 2 << 252 */
+ { { 0xfc189ae71e29a3efL,0xcbe906f04c93302eL,0xd0107914ceaae10eL,
+ 0xb7a23f34b68e19f8L },
+ { 0xe9d875c2efd2119dL,0x03198c6efcadc9c8L,0x65591bf64da17113L,
+ 0x3cf0bbf83d443038L } },
+ /* 3 << 252 */
+ { { 0xae485bb72b724759L,0x945353e1b2d4c63aL,0x82159d07de7d6f2cL,
+ 0x389caef34ec5b109L },
+ { 0x4a8ebb53db65ef14L,0x2dc2cb7edd99de43L,0x816fa3ed83f2405fL,
+ 0x73429bb9c14208a3L } },
+ /* 4 << 252 */
+ { { 0xb618d590b01e6e27L,0x047e2ccde180b2dcL,0xd1b299b504aea4a9L,
+ 0x412c9e1e9fa403a4L },
+ { 0x88d28a3679407552L,0x49c50136f332b8e3L,0x3a1b6fcce668de19L,
+ 0x178851bc75122b97L } },
+ /* 5 << 252 */
+ { { 0xb1e13752fb85fa4cL,0xd61257ce383c8ce9L,0xd43da670d2f74daeL,
+ 0xa35aa23fbf846bbbL },
+ { 0x5e74235d4421fc83L,0xf6df8ee0c363473bL,0x34d7f52a3c4aa158L,
+ 0x50d05aab9bc6d22eL } },
+ /* 6 << 252 */
+ { { 0x8c56e735a64785f4L,0xbc56637b5f29cd07L,0x53b2bb803ee35067L,
+ 0x50235a0fdc919270L },
+ { 0x191ab6d8f2c4aa65L,0xc34758318396023bL,0x80400ba5f0f805baL,
+ 0x8881065b5ec0f80fL } },
+ /* 7 << 252 */
+ { { 0xc370e522cc1b5e83L,0xde2d4ad1860b8bfbL,0xad364df067b256dfL,
+ 0x8f12502ee0138997L },
+ { 0x503fa0dc7783920aL,0xe80014adc0bc866aL,0x3f89b744d3064ba6L,
+ 0x03511dcdcba5dba5L } },
+ /* 8 << 252 */
+ { { 0x197dd46d95a7b1a2L,0x9c4e7ad63c6341fbL,0x426eca29484c2eceL,
+ 0x9211e489de7f4f8aL },
+ { 0x14997f6ec78ef1f4L,0x2b2c091006574586L,0x17286a6e1c3eede8L,
+ 0x25f92e470f60e018L } },
+ /* 9 << 252 */
+ { { 0x805c564631890a36L,0x703ef60057feea5bL,0x389f747caf3c3030L,
+ 0xe0e5daeb54dd3739L },
+ { 0xfe24a4c3c9c9f155L,0x7e4bf176b5393962L,0x37183de2af20bf29L,
+ 0x4a1bd7b5f95a8c3bL } },
+ /* 10 << 252 */
+ { { 0xa83b969946191d3dL,0x281fc8dd7b87f257L,0xb18e2c1354107588L,
+ 0x6372def79b2bafe8L },
+ { 0xdaf4bb480d8972caL,0x3f2dd4b756167a3fL,0x1eace32d84310cf4L,
+ 0xe3bcefafe42700aaL } },
+ /* 11 << 252 */
+ { { 0x5fe5691ed785e73dL,0xa5db5ab62ea60467L,0x02e23d41dfc6514aL,
+ 0x35e8048ee03c3665L },
+ { 0x3f8b118f1adaa0f8L,0x28ec3b4584ce1a5aL,0xe8cacc6e2c6646b8L,
+ 0x1343d185dbd0e40fL } },
+ /* 12 << 252 */
+ { { 0xe5d7f844caaa358cL,0x1a1db7e49924182aL,0xd64cd42d9c875d9aL,
+ 0xb37b515f042eeec8L },
+ { 0x4d4dd4097b165fbeL,0xfc322ed9e206eff3L,0x7dee410259b7e17eL,
+ 0x55a481c08236ca00L } },
+ /* 13 << 252 */
+ { { 0x8c885312c23fc975L,0x1571580605d6297bL,0xa078868ef78edd39L,
+ 0x956b31e003c45e52L },
+ { 0x470275d5ff7b33a6L,0xc8d5dc3a0c7e673fL,0x419227b47e2f2598L,
+ 0x8b37b6344c14a975L } },
+ /* 14 << 252 */
+ { { 0xd0667ed68b11888cL,0x5e0e8c3e803e25dcL,0x34e5d0dcb987a24aL,
+ 0x9f40ac3bae920323L },
+ { 0x5463de9534e0f63aL,0xa128bf926b6328f9L,0x491ccd7cda64f1b7L,
+ 0x7ef1ec27c47bde35L } },
+ /* 15 << 252 */
+ { { 0xa857240fa36a2737L,0x35dc136663621bc1L,0x7a3a6453d4fb6897L,
+ 0x80f1a439c929319dL },
+ { 0xfc18274bf8cb0ba0L,0xb0b537668078c5ebL,0xfb0d49241e01d0efL,
+ 0x50d7c67d372ab09cL } },
+ /* 16 << 252 */
+ { { 0xb4e370af3aeac968L,0xe4f7fee9c4b63266L,0xb4acd4c2e3ac5664L,
+ 0xf8910bd2ceb38cbfL },
+ { 0x1c3ae50cc9c0726eL,0x15309569d97b40bfL,0x70884b7ffd5a5a1bL,
+ 0x3890896aef8314cdL } },
+ /* 17 << 252 */
+ { { 0x58e1515ca5618c93L,0xe665432b77d942d1L,0xb32181bfb6f767a8L,
+ 0x753794e83a604110L },
+ { 0x09afeb7ce8c0dbccL,0x31e02613598673a3L,0x5d98e5577d46db00L,
+ 0xfc21fb8c9d985b28L } },
+ /* 18 << 252 */
+ { { 0xc9040116b0843e0bL,0x53b1b3a869b04531L,0xdd1649f085d7d830L,
+ 0xbb3bcc87cb7427e8L },
+ { 0x77261100c93dce83L,0x7e79da61a1922a2aL,0x587a2b02f3149ce8L,
+ 0x147e1384de92ec83L } },
+ /* 19 << 252 */
+ { { 0x484c83d3af077f30L,0xea78f8440658b53aL,0x912076c2027aec53L,
+ 0xf34714e393c8177dL },
+ { 0x37ef5d15c2376c84L,0x8315b6593d1aa783L,0x3a75c484ef852a90L,
+ 0x0ba0c58a16086bd4L } },
+ /* 20 << 252 */
+ { { 0x29688d7a529a6d48L,0x9c7f250dc2f19203L,0x123042fb682e2df9L,
+ 0x2b7587e7ad8121bcL },
+ { 0x30fc0233e0182a65L,0xb82ecf87e3e1128aL,0x7168286193fb098fL,
+ 0x043e21ae85e9e6a7L } },
+ /* 21 << 252 */
+ { { 0xab5b49d666c834eaL,0x3be43e1847414287L,0xf40fb859219a2a47L,
+ 0x0e6559e9cc58df3cL },
+ { 0xfe1dfe8e0c6615b4L,0x14abc8fd56459d70L,0x7be0fa8e05de0386L,
+ 0x8e63ef68e9035c7cL } },
+ /* 22 << 252 */
+ { { 0x116401b453b31e91L,0x0cba7ad44436b4d8L,0x9151f9a0107afd66L,
+ 0xafaca8d01f0ee4c4L },
+ { 0x75fe5c1d9ee9761cL,0x3497a16bf0c0588fL,0x3ee2bebd0304804cL,
+ 0xa8fb9a60c2c990b9L } },
+ /* 23 << 252 */
+ { { 0xd14d32fe39251114L,0x36bf25bccac73366L,0xc9562c66dba7495cL,
+ 0x324d301b46ad348bL },
+ { 0x9f46620cd670407eL,0x0ea8d4f1e3733a01L,0xd396d532b0c324e0L,
+ 0x5b211a0e03c317cdL } },
+ /* 24 << 252 */
+ { { 0x090d7d205ffe7b37L,0x3b7f3efb1747d2daL,0xa2cb525fb54fc519L,
+ 0x6e220932f66a971eL },
+ { 0xddc160dfb486d440L,0x7fcfec463fe13465L,0x83da7e4e76e4c151L,
+ 0xd6fa48a1d8d302b5L } },
+ /* 25 << 252 */
+ { { 0xc6304f265872cd88L,0x806c1d3c278b90a1L,0x3553e725caf0bc1cL,
+ 0xff59e603bb9d8d5cL },
+ { 0xa4550f327a0b85ddL,0xdec5720a93ecc217L,0x0b88b74169d62213L,
+ 0x7212f2455b365955L } },
+ /* 26 << 252 */
+ { { 0x20764111b5cae787L,0x13cb7f581dfd3124L,0x2dca77da1175aefbL,
+ 0xeb75466bffaae775L },
+ { 0x74d76f3bdb6cff32L,0x7440f37a61fcda9aL,0x1bb3ac92b525028bL,
+ 0x20fbf8f7a1975f29L } },
+ /* 27 << 252 */
+ { { 0x982692e1df83097fL,0x28738f6c554b0800L,0xdc703717a2ce2f2fL,
+ 0x7913b93c40814194L },
+ { 0x049245931fe89636L,0x7b98443ff78834a6L,0x11c6ab015114a5a1L,
+ 0x60deb383ffba5f4cL } },
+ /* 28 << 252 */
+ { { 0x4caa54c601a982e6L,0x1dd35e113491cd26L,0x973c315f7cbd6b05L,
+ 0xcab0077552494724L },
+ { 0x04659b1f6565e15aL,0xbf30f5298c8fb026L,0xfc21641ba8a0de37L,
+ 0xe9c7a366fa5e5114L } },
+ /* 29 << 252 */
+ { { 0xdb849ca552f03ad8L,0xc7e8dbe9024e35c0L,0xa1a2bbaccfc3c789L,
+ 0xbf733e7d9c26f262L },
+ { 0x882ffbf5b8444823L,0xb7224e886bf8483bL,0x53023b8b65bef640L,
+ 0xaabfec91d4d5f8cdL } },
+ /* 30 << 252 */
+ { { 0xa40e1510079ea1bdL,0x1ad9addcd05d5d26L,0xdb3f2eab13e68d4fL,
+ 0x1cff1ae2640f803fL },
+ { 0xe0e7b749d4cee117L,0x8e9f275b4036d909L,0xce34e31d8f4d4c38L,
+ 0x22b37f69d75130fcL } },
+ /* 31 << 252 */
+ { { 0x83e0f1fdb4014604L,0xa8ce991989415078L,0x82375b7541792efeL,
+ 0x4f59bf5c97d4515bL },
+ { 0xac4f324f923a277dL,0xd9bc9b7d650f3406L,0xc6fa87d18a39bc51L,
+ 0x825885305ccc108fL } },
+ /* 32 << 252 */
+ { { 0x5ced3c9f82e4c634L,0x8efb83143a4464f8L,0xe706381b7a1dca25L,
+ 0x6cd15a3c5a2a412bL },
+ { 0x9347a8fdbfcd8fb5L,0x31db2eef6e54cd22L,0xc4aeb11ef8d8932fL,
+ 0x11e7c1ed344411afL } },
+ /* 33 << 252 */
+ { { 0x2653050cdc9a151eL,0x9edbfc083bb0a859L,0x926c81c7fd5691e7L,
+ 0x9c1b23426f39019aL },
+ { 0x64a81c8b7f8474b9L,0x90657c0701761819L,0x390b333155e0375aL,
+ 0xc676c626b6ebc47dL } },
+ /* 34 << 252 */
+ { { 0x51623247b7d6dee8L,0x0948d92779659313L,0x99700161e9ab35edL,
+ 0x06cc32b48ddde408L },
+ { 0x6f2fd664061ef338L,0x1606fa02c202e9edL,0x55388bc1929ba99bL,
+ 0xc4428c5e1e81df69L } },
+ /* 35 << 252 */
+ { { 0xce2028aef91b0b2aL,0xce870a23f03dfd3fL,0x66ec2c870affe8edL,
+ 0xb205fb46284d0c00L },
+ { 0xbf5dffe744cefa48L,0xb6fc37a8a19876d7L,0xbecfa84c08b72863L,
+ 0xd7205ff52576374fL } },
+ /* 36 << 252 */
+ { { 0x80330d328887de41L,0x5de0df0c869ea534L,0x13f427533c56ea17L,
+ 0xeb1f6069452b1a78L },
+ { 0x50474396e30ea15cL,0x575816a1c1494125L,0xbe1ce55bfe6bb38fL,
+ 0xb901a94896ae30f7L } },
+ /* 37 << 252 */
+ { { 0xe5af0f08d8fc3548L,0x5010b5d0d73bfd08L,0x993d288053fe655aL,
+ 0x99f2630b1c1309fdL },
+ { 0xd8677bafb4e3b76fL,0x14e51ddcb840784bL,0x326c750cbf0092ceL,
+ 0xc83d306bf528320fL } },
+ /* 38 << 252 */
+ { { 0xc445671577d4715cL,0xd30019f96b703235L,0x207ccb2ed669e986L,
+ 0x57c824aff6dbfc28L },
+ { 0xf0eb532fd8f92a23L,0x4a557fd49bb98fd2L,0xa57acea7c1e6199aL,
+ 0x0c6638208b94b1edL } },
+ /* 39 << 252 */
+ { { 0x9b42be8ff83a9266L,0xc7741c970101bd45L,0x95770c1107bd9cebL,
+ 0x1f50250a8b2e0744L },
+ { 0xf762eec81477b654L,0xc65b900e15efe59aL,0x88c961489546a897L,
+ 0x7e8025b3c30b4d7cL } },
+ /* 40 << 252 */
+ { { 0xae4065ef12045cf9L,0x6fcb2caf9ccce8bdL,0x1fa0ba4ef2cf6525L,
+ 0xf683125dcb72c312L },
+ { 0xa01da4eae312410eL,0x67e286776cd8e830L,0xabd9575298fb3f07L,
+ 0x05f11e11eef649a5L } },
+ /* 41 << 252 */
+ { { 0xba47faef9d3472c2L,0x3adff697c77d1345L,0x4761fa04dd15afeeL,
+ 0x64f1f61ab9e69462L },
+ { 0xfa691fab9bfb9093L,0x3df8ae8fa1133dfeL,0xcd5f896758cc710dL,
+ 0xfbb88d5016c7fe79L } },
+ /* 42 << 252 */
+ { { 0x8e011b4ce88c50d1L,0x7532e807a8771c4fL,0x64c78a48e2278ee4L,
+ 0x0b283e833845072aL },
+ { 0x98a6f29149e69274L,0xb96e96681868b21cL,0x38f0adc2b1a8908eL,
+ 0x90afcff71feb829dL } },
+ /* 43 << 252 */
+ { { 0x9915a383210b0856L,0xa5a80602def04889L,0x800e9af97c64d509L,
+ 0x81382d0bb8996f6fL },
+ { 0x490eba5381927e27L,0x46c63b324af50182L,0x784c5fd9d3ad62ceL,
+ 0xe4fa1870f8ae8736L } },
+ /* 44 << 252 */
+ { { 0x4ec9d0bcd7466b25L,0x84ddbe1adb235c65L,0x5e2645ee163c1688L,
+ 0x570bd00e00eba747L },
+ { 0xfa51b629128bfa0fL,0x92fce1bd6c1d3b68L,0x3e7361dcb66778b1L,
+ 0x9c7d249d5561d2bbL } },
+ /* 45 << 252 */
+ { { 0xa40b28bf0bbc6229L,0x1c83c05edfd91497L,0x5f9f5154f083df05L,
+ 0xbac38b3ceee66c9dL },
+ { 0xf71db7e3ec0dfcfdL,0xf2ecda8e8b0a8416L,0x52fddd867812aa66L,
+ 0x2896ef104e6f4272L } },
+ /* 46 << 252 */
+ { { 0xff27186a0fe9a745L,0x08249fcd49ca70dbL,0x7425a2e6441cac49L,
+ 0xf4a0885aece5ff57L },
+ { 0x6e2cb7317d7ead58L,0xf96cf7d61898d104L,0xafe67c9d4f2c9a89L,
+ 0x89895a501c7bf5bcL } },
+ /* 47 << 252 */
+ { { 0xdc7cb8e5573cecfaL,0x66497eaed15f03e6L,0x6bc0de693f084420L,
+ 0x323b9b36acd532b0L },
+ { 0xcfed390a0115a3c1L,0x9414c40b2d65ca0eL,0x641406bd2f530c78L,
+ 0x29369a44833438f2L } },
+ /* 48 << 252 */
+ { { 0x996884f5903fa271L,0xe6da0fd2b9da921eL,0xa6f2f2695db01e54L,
+ 0x1ee3e9bd6876214eL },
+ { 0xa26e181ce27a9497L,0x36d254e48e215e04L,0x42f32a6c252cabcaL,
+ 0x9948148780b57614L } },
+ /* 49 << 252 */
+ { { 0x4c4dfe6940d9cae1L,0x0586958011a10f09L,0xca287b573491b64bL,
+ 0x77862d5d3fd4a53bL },
+ { 0xbf94856e50349126L,0x2be30bd171c5268fL,0x10393f19cbb650a6L,
+ 0x639531fe778cf9fdL } },
+ /* 50 << 252 */
+ { { 0x02556a11b2935359L,0xda38aa96af8c126eL,0x47dbe6c20960167fL,
+ 0x37bbabb6501901cdL },
+ { 0xb6e979e02c947778L,0xd69a51757a1a1dc6L,0xc3ed50959d9faf0cL,
+ 0x4dd9c0961d5fa5f0L } },
+ /* 51 << 252 */
+ { { 0xa0c4304d64f16ea8L,0x8b1cac167e718623L,0x0b5765467c67f03eL,
+ 0x559cf5adcbd88c01L },
+ { 0x074877bb0e2af19aL,0x1f717ec1a1228c92L,0x70bcb800326e8920L,
+ 0xec6e2c5c4f312804L } },
+ /* 52 << 252 */
+ { { 0x426aea7d3fca4752L,0xf12c09492211f62aL,0x24beecd87be7b6b5L,
+ 0xb77eaf4c36d7a27dL },
+ { 0x154c2781fda78fd3L,0x848a83b0264eeabeL,0x81287ef04ffe2bc4L,
+ 0x7b6d88c6b6b6fc2aL } },
+ /* 53 << 252 */
+ { { 0x805fb947ce417d99L,0x4b93dcc38b916cc4L,0x72e65bb321273323L,
+ 0xbcc1badd6ea9886eL },
+ { 0x0e2230114bc5ee85L,0xa561be74c18ee1e4L,0x762fd2d4a6bcf1f1L,
+ 0x50e6a5a495231489L } },
+ /* 54 << 252 */
+ { { 0xca96001fa00b500bL,0x5c098cfc5d7dcdf5L,0xa64e2d2e8c446a85L,
+ 0xbae9bcf1971f3c62L },
+ { 0x4ec226838435a2c5L,0x8ceaed6c4bad4643L,0xe9f8fb47ccccf4e3L,
+ 0xbd4f3fa41ce3b21eL } },
+ /* 55 << 252 */
+ { { 0xd79fb110a3db3292L,0xe28a37dab536c66aL,0x279ce87b8e49e6a9L,
+ 0x70ccfe8dfdcec8e3L },
+ { 0x2193e4e03ba464b2L,0x0f39d60eaca9a398L,0x7d7932aff82c12abL,
+ 0xd8ff50ed91e7e0f7L } },
+ /* 56 << 252 */
+ { { 0xea961058fa28a7e0L,0xc726cf250bf5ec74L,0xe74d55c8db229666L,
+ 0x0bd9abbfa57f5799L },
+ { 0x7479ef074dfc47b3L,0xd9c65fc30c52f91dL,0x8e0283fe36a8bde2L,
+ 0xa32a8b5e7d4b7280L } },
+ /* 57 << 252 */
+ { { 0x6a677c6112e83233L,0x0fbb3512dcc9bf28L,0x562e8ea50d780f61L,
+ 0x0db8b22b1dc4e89cL },
+ { 0x0a6fd1fb89be0144L,0x8c77d246ca57113bL,0x4639075dff09c91cL,
+ 0x5b47b17f5060824cL } },
+ /* 58 << 252 */
+ { { 0x58aea2b016287b52L,0xa1343520d0cd8eb0L,0x6148b4d0c5d58573L,
+ 0xdd2b6170291c68aeL },
+ { 0xa61b39291da3b3b7L,0x5f946d7908c4ac10L,0x4105d4a57217d583L,
+ 0x5061da3d25e6de5eL } },
+ /* 59 << 252 */
+ { { 0x3113940dec1b4991L,0xf12195e136f485aeL,0xa7507fb2731a2ee0L,
+ 0x95057a8e6e9e196eL },
+ { 0xa3c2c9112e130136L,0x97dfbb3633c60d15L,0xcaf3c581b300ee2bL,
+ 0x77f25d90f4bac8b8L } },
+ /* 60 << 252 */
+ { { 0xdb1c4f986d840cd6L,0x471d62c0e634288cL,0x8ec2f85ecec8a161L,
+ 0x41f37cbcfa6f4ae2L },
+ { 0x6793a20f4b709985L,0x7a7bd33befa8985bL,0x2c6a3fbd938e6446L,
+ 0x190426192a8d47c1L } },
+ /* 61 << 252 */
+ { { 0x16848667cc36975fL,0x02acf1689d5f1dfbL,0x62d41ad4613baa94L,
+ 0xb56fbb929f684670L },
+ { 0xce610d0de9e40569L,0x7b99c65f35489fefL,0x0c88ad1b3df18b97L,
+ 0x81b7d9be5d0e9edbL } },
+ /* 62 << 252 */
+ { { 0xd85218c0c716cc0aL,0xf4b5ff9085691c49L,0xa4fd666bce356ac6L,
+ 0x17c728954b327a7aL },
+ { 0xf93d5085da6be7deL,0xff71530e3301d34eL,0x4cd96442d8f448e8L,
+ 0x9283d3312ed18ffaL } },
+ /* 63 << 252 */
+ { { 0x4d33dd992a849870L,0xa716964b41576335L,0xff5e3a9b179be0e5L,
+ 0x5b9d6b1b83b13632L },
+ { 0x3b8bd7d4a52f313bL,0xc9dd95a0637a4660L,0x300359620b3e218fL,
+ 0xce1481a3c7b28a3cL } },
+ /* 64 << 252 */
+ { { 0xab41b43a43228d83L,0x24ae1c304ad63f99L,0x8e525f1a46a51229L,
+ 0x14af860fcd26d2b4L },
+ { 0xd6baef613f714aa1L,0xf51865adeb78795eL,0xd3e21fcee6a9d694L,
+ 0x82ceb1dd8a37b527L } },
+};
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_add_only_4(sp_point_256* r, const sp_point_256* g,
+ const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 rtd;
+ sp_point_256 pd;
+ sp_digit tmpd[2 * 4 * 5];
+#endif
+ sp_point_256* rt;
+ sp_point_256* p = NULL;
+ sp_digit* tmp;
+ sp_digit* negy;
+ int i;
+ ecc_recode_256 v[37];
+ int err;
+
+ (void)g;
+ (void)heap;
+
+ err = sp_256_point_new_4(heap, rtd, rt);
+ if (err == MP_OKAY)
+ err = sp_256_point_new_4(heap, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ tmp = tmpd;
+#endif
+ negy = tmp;
+
+ if (err == MP_OKAY) {
+ sp_256_ecc_recode_7_4(k, v);
+
+ XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
+ XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+ i = 36;
+ XMEMCPY(rt->x, table[i * 65 + v[i].i].x, sizeof(table->x));
+ XMEMCPY(rt->y, table[i * 65 + v[i].i].y, sizeof(table->y));
+ rt->infinity = !v[i].i;
+ for (--i; i>=0; i--) {
+ XMEMCPY(p->x, table[i * 65 + v[i].i].x, sizeof(table->x));
+ XMEMCPY(p->y, table[i * 65 + v[i].i].y, sizeof(table->y));
+ p->infinity = !v[i].i;
+ sp_256_sub_4(negy, p256_mod, p->y);
+ sp_256_cond_copy_4(p->y, negy, 0 - v[i].neg);
+ sp_256_proj_point_add_qz1_4(rt, rt, p, tmp);
+ }
+ if (map != 0) {
+ sp_256_map_4(r, rt, tmp);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 4 * 5);
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmp, sizeof(sp_digit) * 2 * 4 * 5);
+#endif
+ sp_256_point_free_4(p, 0, heap);
+ sp_256_point_free_4(rt, 0, heap);
+
+ return MP_OKAY;
+}
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_4(sp_point_256* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_256_ecc_mulmod_add_only_4(r, NULL, p256_table,
+ k, map, heap);
+}
+
+#ifdef HAVE_INTEL_AVX2
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_add_only_avx2_4(sp_point_256* r, const sp_point_256* g,
+ const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 rtd;
+ sp_point_256 pd;
+ sp_digit tmpd[2 * 4 * 5];
+#endif
+ sp_point_256* rt;
+ sp_point_256* p = NULL;
+ sp_digit* tmp;
+ sp_digit* negy;
+ int i;
+ ecc_recode_256 v[37];
+ int err;
+
+ (void)g;
+ (void)heap;
+
+ err = sp_256_point_new_4(heap, rtd, rt);
+ if (err == MP_OKAY)
+ err = sp_256_point_new_4(heap, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ tmp = tmpd;
+#endif
+ negy = tmp;
+
+ if (err == MP_OKAY) {
+ sp_256_ecc_recode_7_4(k, v);
+
+ XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
+ XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
+
+ i = 36;
+ XMEMCPY(rt->x, table[i * 65 + v[i].i].x, sizeof(table->x));
+ XMEMCPY(rt->y, table[i * 65 + v[i].i].y, sizeof(table->y));
+ rt->infinity = !v[i].i;
+ for (--i; i>=0; i--) {
+ XMEMCPY(p->x, table[i * 65 + v[i].i].x, sizeof(table->x));
+ XMEMCPY(p->y, table[i * 65 + v[i].i].y, sizeof(table->y));
+ p->infinity = !v[i].i;
+ sp_256_sub_4(negy, p256_mod, p->y);
+ sp_256_cond_copy_4(p->y, negy, 0 - v[i].neg);
+ sp_256_proj_point_add_qz1_avx2_4(rt, rt, p, tmp);
+ }
+ if (map != 0) {
+ sp_256_map_avx2_4(r, rt, tmp);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_256));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 4 * 5);
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ ForceZero(tmp, sizeof(sp_digit) * 2 * 4 * 5);
+#endif
+ sp_256_point_free_4(p, 0, heap);
+ sp_256_point_free_4(rt, 0, heap);
+
+ return MP_OKAY;
+}
+
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_avx2_4(sp_point_256* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_256_ecc_mulmod_add_only_avx2_4(r, NULL, p256_table,
+ k, map, heap);
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+#endif /* WOLFSSL_SP_SMALL */
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[4];
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ err = sp_256_point_new_4(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 4, km);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_256_ecc_mulmod_base_avx2_4(point, k, map, heap);
+ else
+#endif
+ err = sp_256_ecc_mulmod_base_4(point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_4(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_4(point, 0, heap);
+
+ return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_256_iszero_4(const sp_digit* a)
+{
+ return (a[0] | a[1] | a[2] | a[3]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+extern void sp_256_add_one_4(sp_digit* a);
+extern void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n);
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng Random number generator.
+ * k Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_256_ecc_gen_k_4(WC_RNG* rng, sp_digit* k)
+{
+ int err;
+ byte buf[32];
+
+ do {
+ err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+ if (err == 0) {
+ sp_256_from_bin(k, 4, buf, (int)sizeof(buf));
+ if (sp_256_cmp_4(k, p256_order2) < 0) {
+ sp_256_add_one_4(k);
+ break;
+ }
+ }
+ }
+ while (err == 0);
+
+ return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng Random number generator.
+ * priv Generated private value.
+ * pub Generated public point.
+ * heap Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[4];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_256 inf;
+#endif
+#endif
+ sp_point_256* point;
+ sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_256* infinity;
+#endif
+ int err;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ (void)heap;
+
+ err = sp_256_point_new_4(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_4(heap, inf, infinity);
+ }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_256_ecc_gen_k_4(rng, k);
+ }
+ if (err == MP_OKAY) {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_256_ecc_mulmod_base_avx2_4(point, k, 1, NULL);
+ else
+#endif
+ err = sp_256_ecc_mulmod_base_4(point, k, 1, NULL);
+ }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ err = sp_256_ecc_mulmod_avx2_4(infinity, point, p256_order, 1,
+ NULL);
+ }
+ else
+#endif
+ err = sp_256_ecc_mulmod_4(infinity, point, p256_order, 1, NULL);
+ }
+ if (err == MP_OKAY) {
+ if ((sp_256_iszero_4(point->x) == 0) || (sp_256_iszero_4(point->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(k, priv);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_point_to_ecc_point_4(point, pub);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_256_point_free_4(infinity, 1, heap);
+#endif
+ sp_256_point_free_4(point, 1, heap);
+
+ return err;
+}
+
+#ifdef HAVE_ECC_DHE
+extern void sp_256_to_bin(sp_digit* r, byte* a);
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv Scalar to multiply the point by.
+ * pub Point to multiply.
+ * out Buffer to hold X ordinate.
+ * outLen On entry, size of the buffer in bytes.
+ * On exit, length of data in buffer in bytes.
+ * heap Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
+ word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 p;
+ sp_digit kd[4];
+#endif
+ sp_point_256* point = NULL;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ if (*outLen < 32U) {
+ err = BUFFER_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_4(heap, p, point);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(k, 4, priv);
+ sp_256_point_from_ecc_point_4(point, pub);
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_256_ecc_mulmod_avx2_4(point, point, k, 1, heap);
+ else
+#endif
+ err = sp_256_ecc_mulmod_4(point, point, k, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ sp_256_to_bin(point->x, out);
+ *outLen = 32;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_4(point, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+extern sp_digit sp_256_add_4(sp_digit* r, const sp_digit* a, const sp_digit* b);
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+extern void sp_256_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b);
+#ifdef HAVE_INTEL_AVX2
+extern void sp_256_mul_avx2_4(sp_digit* r, const sp_digit* a, const sp_digit* b);
+#endif /* HAVE_INTEL_AVX2 */
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+extern sp_digit sp_256_sub_in_place_4(sp_digit* a, const sp_digit* b);
+extern sp_digit sp_256_cond_sub_avx2_4(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+extern void sp_256_mul_d_4(sp_digit* r, const sp_digit* a, sp_digit b);
+extern void sp_256_mul_d_avx2_4(sp_digit* r, const sp_digit* a, const sp_digit b);
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ */
+static WC_INLINE sp_digit div_256_word_4(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ register sp_digit r asm("rax");
+ __asm__ __volatile__ (
+ "divq %3"
+ : "=a" (r)
+ : "d" (d1), "a" (d0), "r" (div)
+ :
+ );
+ return r;
+}
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_256_mask_4(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<4; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ r[0] = a[0] & m;
+ r[1] = a[1] & m;
+ r[2] = a[2] & m;
+ r[3] = a[3] & m;
+#endif
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_256_div_4(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[8], t2[5];
+ sp_digit div, r1;
+ int i;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ (void)m;
+
+ div = d[3];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 4);
+ r1 = sp_256_cmp_4(&t1[4], d) >= 0;
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_256_cond_sub_avx2_4(&t1[4], &t1[4], d, (sp_digit)0 - r1);
+ else
+#endif
+ sp_256_cond_sub_4(&t1[4], &t1[4], d, (sp_digit)0 - r1);
+ for (i=3; i>=0; i--) {
+ r1 = div_256_word_4(t1[4 + i], t1[4 + i - 1], div);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_256_mul_d_avx2_4(t2, d, r1);
+ else
+#endif
+ sp_256_mul_d_4(t2, d, r1);
+ t1[4 + i] += sp_256_sub_in_place_4(&t1[i], t2);
+ t1[4 + i] -= t2[4];
+ sp_256_mask_4(t2, d, t1[4 + i]);
+ t1[4 + i] += sp_256_add_4(&t1[i], &t1[i], t2);
+ sp_256_mask_4(t2, d, t1[4 + i]);
+ t1[4 + i] += sp_256_add_4(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_256_cmp_4(t1, d) >= 0;
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_256_cond_sub_avx2_4(r, t1, d, (sp_digit)0 - r1);
+ else
+#endif
+ sp_256_cond_sub_4(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_256_mod_4(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_256_div_4(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+extern void sp_256_sqr_4(sp_digit* r, const sp_digit* a);
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P256 curve. */
+static const uint64_t p256_order_minus_2[4] = {
+ 0xf3b9cac2fc63254fU,0xbce6faada7179e84U,0xffffffffffffffffU,
+ 0xffffffff00000000U
+};
+#else
+/* The low half of the order-2 of the P256 curve. */
+static const uint64_t p256_order_low[2] = {
+ 0xf3b9cac2fc63254fU,0xbce6faada7179e84U
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
+ *
+ * r Result of the multiplication.
+ * a First operand of the multiplication.
+ * b Second operand of the multiplication.
+ */
+static void sp_256_mont_mul_order_4(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_256_mul_4(r, a, b);
+ sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order);
+}
+
+/* Square number mod the order of P256 curve. (r = a * a mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_256_mont_sqr_order_4(sp_digit* r, const sp_digit* a)
+{
+ sp_256_sqr_4(r, a);
+ sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P256 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_256_mont_sqr_n_order_4(sp_digit* r, const sp_digit* a, int n)
+{
+ int i;
+
+ sp_256_mont_sqr_order_4(r, a);
+ for (i=1; i<n; i++) {
+ sp_256_mont_sqr_order_4(r, r);
+ }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
+ * (r = 1 / a mod order)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_256_mont_inv_order_4(sp_digit* r, const sp_digit* a,
+ sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 4);
+ for (i=254; i>=0; i--) {
+ sp_256_mont_sqr_order_4(t, t);
+ if ((p256_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_256_mont_mul_order_4(t, t, a);
+ }
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 4U);
+#else
+ sp_digit* t = td;
+ sp_digit* t2 = td + 2 * 4;
+ sp_digit* t3 = td + 4 * 4;
+ int i;
+
+ /* t = a^2 */
+ sp_256_mont_sqr_order_4(t, a);
+ /* t = a^3 = t * a */
+ sp_256_mont_mul_order_4(t, t, a);
+ /* t2= a^c = t ^ 2 ^ 2 */
+ sp_256_mont_sqr_n_order_4(t2, t, 2);
+ /* t3= a^f = t2 * t */
+ sp_256_mont_mul_order_4(t3, t2, t);
+ /* t2= a^f0 = t3 ^ 2 ^ 4 */
+ sp_256_mont_sqr_n_order_4(t2, t3, 4);
+ /* t = a^ff = t2 * t3 */
+ sp_256_mont_mul_order_4(t, t2, t3);
+ /* t3= a^ff00 = t ^ 2 ^ 8 */
+ sp_256_mont_sqr_n_order_4(t2, t, 8);
+ /* t = a^ffff = t2 * t */
+ sp_256_mont_mul_order_4(t, t2, t);
+ /* t2= a^ffff0000 = t ^ 2 ^ 16 */
+ sp_256_mont_sqr_n_order_4(t2, t, 16);
+ /* t = a^ffffffff = t2 * t */
+ sp_256_mont_mul_order_4(t, t2, t);
+ /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */
+ sp_256_mont_sqr_n_order_4(t2, t, 64);
+ /* t2= a^ffffffff00000000ffffffff = t2 * t */
+ sp_256_mont_mul_order_4(t2, t2, t);
+ /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */
+ sp_256_mont_sqr_n_order_4(t2, t2, 32);
+ /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
+ sp_256_mont_mul_order_4(t2, t2, t);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
+ for (i=127; i>=112; i--) {
+ sp_256_mont_sqr_order_4(t2, t2);
+ if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_256_mont_mul_order_4(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
+ sp_256_mont_sqr_n_order_4(t2, t2, 4);
+ sp_256_mont_mul_order_4(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
+ for (i=107; i>=64; i--) {
+ sp_256_mont_sqr_order_4(t2, t2);
+ if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_256_mont_mul_order_4(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
+ sp_256_mont_sqr_n_order_4(t2, t2, 4);
+ sp_256_mont_mul_order_4(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
+ for (i=59; i>=32; i--) {
+ sp_256_mont_sqr_order_4(t2, t2);
+ if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_256_mont_mul_order_4(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
+ sp_256_mont_sqr_n_order_4(t2, t2, 4);
+ sp_256_mont_mul_order_4(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
+ for (i=27; i>=0; i--) {
+ sp_256_mont_sqr_order_4(t2, t2);
+ if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_256_mont_mul_order_4(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
+ sp_256_mont_sqr_n_order_4(t2, t2, 4);
+ /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
+ sp_256_mont_mul_order_4(r, t2, t3);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef HAVE_INTEL_AVX2
+extern void sp_256_sqr_avx2_4(sp_digit* r, const sp_digit* a);
+#define sp_256_mont_reduce_order_avx2_4 sp_256_mont_reduce_avx2_4
+
+extern void sp_256_mont_reduce_avx2_4(sp_digit* a, const sp_digit* m, sp_digit mp);
+/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
+ *
+ * r Result of the multiplication.
+ * a First operand of the multiplication.
+ * b Second operand of the multiplication.
+ */
+static void sp_256_mont_mul_order_avx2_4(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_256_mul_avx2_4(r, a, b);
+ sp_256_mont_reduce_order_avx2_4(r, p256_order, p256_mp_order);
+}
+
+/* Square number mod the order of P256 curve. (r = a * a mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_256_mont_sqr_order_avx2_4(sp_digit* r, const sp_digit* a)
+{
+ sp_256_sqr_avx2_4(r, a);
+ sp_256_mont_reduce_order_avx2_4(r, p256_order, p256_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P256 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_256_mont_sqr_n_order_avx2_4(sp_digit* r, const sp_digit* a, int n)
+{
+ int i;
+
+ sp_256_mont_sqr_order_avx2_4(r, a);
+ for (i=1; i<n; i++) {
+ sp_256_mont_sqr_order_avx2_4(r, r);
+ }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
+ * (r = 1 / a mod order)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_256_mont_inv_order_avx2_4(sp_digit* r, const sp_digit* a,
+ sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 4);
+ for (i=254; i>=0; i--) {
+ sp_256_mont_sqr_order_avx2_4(t, t);
+ if ((p256_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_256_mont_mul_order_avx2_4(t, t, a);
+ }
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 4U);
+#else
+ sp_digit* t = td;
+ sp_digit* t2 = td + 2 * 4;
+ sp_digit* t3 = td + 4 * 4;
+ int i;
+
+ /* t = a^2 */
+ sp_256_mont_sqr_order_avx2_4(t, a);
+ /* t = a^3 = t * a */
+ sp_256_mont_mul_order_avx2_4(t, t, a);
+ /* t2= a^c = t ^ 2 ^ 2 */
+ sp_256_mont_sqr_n_order_avx2_4(t2, t, 2);
+ /* t3= a^f = t2 * t */
+ sp_256_mont_mul_order_avx2_4(t3, t2, t);
+ /* t2= a^f0 = t3 ^ 2 ^ 4 */
+ sp_256_mont_sqr_n_order_avx2_4(t2, t3, 4);
+ /* t = a^ff = t2 * t3 */
+ sp_256_mont_mul_order_avx2_4(t, t2, t3);
+ /* t3= a^ff00 = t ^ 2 ^ 8 */
+ sp_256_mont_sqr_n_order_avx2_4(t2, t, 8);
+ /* t = a^ffff = t2 * t */
+ sp_256_mont_mul_order_avx2_4(t, t2, t);
+ /* t2= a^ffff0000 = t ^ 2 ^ 16 */
+ sp_256_mont_sqr_n_order_avx2_4(t2, t, 16);
+ /* t = a^ffffffff = t2 * t */
+ sp_256_mont_mul_order_avx2_4(t, t2, t);
+ /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */
+ sp_256_mont_sqr_n_order_avx2_4(t2, t, 64);
+ /* t2= a^ffffffff00000000ffffffff = t2 * t */
+ sp_256_mont_mul_order_avx2_4(t2, t2, t);
+ /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */
+ sp_256_mont_sqr_n_order_avx2_4(t2, t2, 32);
+ /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
+ sp_256_mont_mul_order_avx2_4(t2, t2, t);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
+ for (i=127; i>=112; i--) {
+ sp_256_mont_sqr_order_avx2_4(t2, t2);
+ if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_256_mont_mul_order_avx2_4(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
+ sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4);
+ sp_256_mont_mul_order_avx2_4(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
+ for (i=107; i>=64; i--) {
+ sp_256_mont_sqr_order_avx2_4(t2, t2);
+ if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_256_mont_mul_order_avx2_4(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
+ sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4);
+ sp_256_mont_mul_order_avx2_4(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
+ for (i=59; i>=32; i--) {
+ sp_256_mont_sqr_order_avx2_4(t2, t2);
+ if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_256_mont_mul_order_avx2_4(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
+ sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4);
+ sp_256_mont_mul_order_avx2_4(t2, t2, t3);
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
+ for (i=27; i>=0; i--) {
+ sp_256_mont_sqr_order_avx2_4(t2, t2);
+ if (((sp_digit)p256_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_256_mont_mul_order_avx2_4(t2, t2, a);
+ }
+ }
+ /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
+ sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4);
+ /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
+ sp_256_mont_mul_order_avx2_4(r, t2, t3);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN 64
+#endif
+
+/* Sign the hash using the private key.
+ * e = [hash, 256 bits] from binary
+ * r = (k.G)->x mod order
+ * s = (r * x + e) / k mod order
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+ mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit ed[2*4];
+ sp_digit xd[2*4];
+ sp_digit kd[2*4];
+ sp_digit rd[2*4];
+ sp_digit td[3 * 2*4];
+ sp_point_256 p;
+#endif
+ sp_digit* e = NULL;
+ sp_digit* x = NULL;
+ sp_digit* k = NULL;
+ sp_digit* r = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_256* point = NULL;
+ sp_digit carry;
+ sp_digit* s = NULL;
+ sp_digit* kInv = NULL;
+ int err = MP_OKAY;
+ int64_t c;
+ int i;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ (void)heap;
+
+ err = sp_256_point_new_4(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 4, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ e = d + 0 * 4;
+ x = d + 2 * 4;
+ k = d + 4 * 4;
+ r = d + 6 * 4;
+ tmp = d + 8 * 4;
+#else
+ e = ed;
+ x = xd;
+ k = kd;
+ r = rd;
+ tmp = td;
+#endif
+ s = e;
+ kInv = k;
+
+ if (hashLen > 32U) {
+ hashLen = 32U;
+ }
+
+ sp_256_from_bin(e, 4, hash, (int)hashLen);
+ }
+
+ for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+ sp_256_from_mp(x, 4, priv);
+
+ /* New random point. */
+ if (km == NULL || mp_iszero(km)) {
+ err = sp_256_ecc_gen_k_4(rng, k);
+ }
+ else {
+ sp_256_from_mp(k, 4, km);
+ mp_zero(km);
+ }
+ if (err == MP_OKAY) {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_256_ecc_mulmod_base_avx2_4(point, k, 1, heap);
+ else
+#endif
+ err = sp_256_ecc_mulmod_base_4(point, k, 1, NULL);
+ }
+
+ if (err == MP_OKAY) {
+ /* r = point->x mod order */
+ XMEMCPY(r, point->x, sizeof(sp_digit) * 4U);
+ sp_256_norm_4(r);
+ c = sp_256_cmp_4(r, p256_order);
+ sp_256_cond_sub_4(r, r, p256_order, 0L - (sp_digit)(c >= 0));
+ sp_256_norm_4(r);
+
+ /* Conv k to Montgomery form (mod order) */
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_256_mul_avx2_4(k, k, p256_norm_order);
+ else
+#endif
+ sp_256_mul_4(k, k, p256_norm_order);
+ err = sp_256_mod_4(k, k, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_4(k);
+ /* kInv = 1/k mod order */
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_256_mont_inv_order_avx2_4(kInv, k, tmp);
+ else
+#endif
+ sp_256_mont_inv_order_4(kInv, k, tmp);
+ sp_256_norm_4(kInv);
+
+ /* s = r * x + e */
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_256_mul_avx2_4(x, x, r);
+ else
+#endif
+ sp_256_mul_4(x, x, r);
+ err = sp_256_mod_4(x, x, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_4(x);
+ carry = sp_256_add_4(s, e, x);
+ sp_256_cond_sub_4(s, s, p256_order, 0 - carry);
+ sp_256_norm_4(s);
+ c = sp_256_cmp_4(s, p256_order);
+ sp_256_cond_sub_4(s, s, p256_order, 0L - (sp_digit)(c >= 0));
+ sp_256_norm_4(s);
+
+ /* s = s * k^-1 mod order */
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_256_mont_mul_order_avx2_4(s, s, kInv);
+ else
+#endif
+ sp_256_mont_mul_order_4(s, s, kInv);
+ sp_256_norm_4(s);
+
+ /* Check that signature is usable. */
+ if (sp_256_iszero_4(s) == 0) {
+ break;
+ }
+ }
+ }
+
+ if (i == 0) {
+ err = RNG_FAILURE_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(r, rm);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(s, sm);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 8 * 4);
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 2U * 4U);
+ XMEMSET(x, 0, sizeof(sp_digit) * 2U * 4U);
+ XMEMSET(k, 0, sizeof(sp_digit) * 2U * 4U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 4U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 4U);
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 4U);
+#endif
+ sp_256_point_free_4(point, 1, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ * e = Truncate(hash, 256)
+ * u1 = e/s mod order
+ * u2 = r/s mod order
+ * r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
+ mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit u1d[2*4];
+ sp_digit u2d[2*4];
+ sp_digit sd[2*4];
+ sp_digit tmpd[2*4 * 5];
+ sp_point_256 p1d;
+ sp_point_256 p2d;
+#endif
+ sp_digit* u1 = NULL;
+ sp_digit* u2 = NULL;
+ sp_digit* s = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_256* p1;
+ sp_point_256* p2 = NULL;
+ sp_digit carry;
+ int64_t c;
+ int err;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ err = sp_256_point_new_4(heap, p1d, p1);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_4(heap, p2d, p2);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 4, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ u1 = d + 0 * 4;
+ u2 = d + 2 * 4;
+ s = d + 4 * 4;
+ tmp = d + 6 * 4;
+#else
+ u1 = u1d;
+ u2 = u2d;
+ s = sd;
+ tmp = tmpd;
+#endif
+
+ if (hashLen > 32U) {
+ hashLen = 32U;
+ }
+
+ sp_256_from_bin(u1, 4, hash, (int)hashLen);
+ sp_256_from_mp(u2, 4, r);
+ sp_256_from_mp(s, 4, sm);
+ sp_256_from_mp(p2->x, 4, pX);
+ sp_256_from_mp(p2->y, 4, pY);
+ sp_256_from_mp(p2->z, 4, pZ);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ sp_256_mul_avx2_4(s, s, p256_norm_order);
+ }
+ else
+#endif
+ {
+ sp_256_mul_4(s, s, p256_norm_order);
+ }
+ err = sp_256_mod_4(s, s, p256_order);
+ }
+ if (err == MP_OKAY) {
+ sp_256_norm_4(s);
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ sp_256_mont_inv_order_avx2_4(s, s, tmp);
+ sp_256_mont_mul_order_avx2_4(u1, u1, s);
+ sp_256_mont_mul_order_avx2_4(u2, u2, s);
+ }
+ else
+#endif
+ {
+ sp_256_mont_inv_order_4(s, s, tmp);
+ sp_256_mont_mul_order_4(u1, u1, s);
+ sp_256_mont_mul_order_4(u2, u2, s);
+ }
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_256_ecc_mulmod_base_avx2_4(p1, u1, 0, heap);
+ else
+#endif
+ err = sp_256_ecc_mulmod_base_4(p1, u1, 0, heap);
+ }
+ if (err == MP_OKAY) {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_256_ecc_mulmod_avx2_4(p2, p2, u2, 0, heap);
+ else
+#endif
+ err = sp_256_ecc_mulmod_4(p2, p2, u2, 0, heap);
+ }
+
+ if (err == MP_OKAY) {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ sp_256_proj_point_add_avx2_4(p1, p1, p2, tmp);
+ if (sp_256_iszero_4(p1->z)) {
+ if (sp_256_iszero_4(p1->x) && sp_256_iszero_4(p1->y)) {
+ sp_256_proj_point_dbl_avx2_4(p1, p2, tmp);
+ }
+ else {
+ /* Y ordinate is not used from here - don't set. */
+ p1->x[0] = 0;
+ p1->x[1] = 0;
+ p1->x[2] = 0;
+ p1->x[3] = 0;
+ XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod));
+ }
+ }
+ }
+ else
+#endif
+ {
+ sp_256_proj_point_add_4(p1, p1, p2, tmp);
+ if (sp_256_iszero_4(p1->z)) {
+ if (sp_256_iszero_4(p1->x) && sp_256_iszero_4(p1->y)) {
+ sp_256_proj_point_dbl_4(p1, p2, tmp);
+ }
+ else {
+ /* Y ordinate is not used from here - don't set. */
+ p1->x[0] = 0;
+ p1->x[1] = 0;
+ p1->x[2] = 0;
+ p1->x[3] = 0;
+ XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod));
+ }
+ }
+ }
+
+ /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+ /* Reload r and convert to Montgomery form. */
+ sp_256_from_mp(u2, 4, r);
+ err = sp_256_mod_mul_norm_4(u2, u2, p256_mod);
+ }
+
+ if (err == MP_OKAY) {
+ /* u1 = r.z'.z' mod prime */
+ sp_256_mont_sqr_4(p1->z, p1->z, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(u1, u2, p1->z, p256_mod, p256_mp_mod);
+ *res = (int)(sp_256_cmp_4(p1->x, u1) == 0);
+ if (*res == 0) {
+ /* Reload r and add order. */
+ sp_256_from_mp(u2, 4, r);
+ carry = sp_256_add_4(u2, u2, p256_order);
+ /* Carry means result is greater than mod and is not valid. */
+ if (carry == 0) {
+ sp_256_norm_4(u2);
+
+ /* Compare with mod and if greater or equal then not valid. */
+ c = sp_256_cmp_4(u2, p256_mod);
+ if (c < 0) {
+ /* Convert to Montogomery form */
+ err = sp_256_mod_mul_norm_4(u2, u2, p256_mod);
+ if (err == MP_OKAY) {
+ /* u1 = (r + 1*order).z'.z' mod prime */
+ sp_256_mont_mul_4(u1, u2, p1->z, p256_mod,
+ p256_mp_mod);
+ *res = (int)(sp_256_cmp_4(p1->x, u1) == 0);
+ }
+ }
+ }
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL)
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_256_point_free_4(p1, 0, heap);
+ sp_256_point_free_4(p2, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point EC point.
+ * heap Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_256_ecc_is_point_4(sp_point_256* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit t1d[2*4];
+ sp_digit t2d[2*4];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 4, heap, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 4;
+ t2 = d + 2 * 4;
+#else
+ (void)heap;
+
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ sp_256_sqr_4(t1, point->y);
+ (void)sp_256_mod_4(t1, t1, p256_mod);
+ sp_256_sqr_4(t2, point->x);
+ (void)sp_256_mod_4(t2, t2, p256_mod);
+ sp_256_mul_4(t2, t2, point->x);
+ (void)sp_256_mod_4(t2, t2, p256_mod);
+ (void)sp_256_sub_4(t2, p256_mod, t2);
+ sp_256_mont_add_4(t1, t1, t2, p256_mod);
+
+ sp_256_mont_add_4(t1, t1, point->x, p256_mod);
+ sp_256_mont_add_4(t1, t1, point->x, p256_mod);
+ sp_256_mont_add_4(t1, t1, point->x, p256_mod);
+
+ if (sp_256_cmp_4(t1, p256_b) != 0) {
+ err = MP_VAL;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_256(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_256 pubd;
+#endif
+ sp_point_256* pub;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_256_point_new_4(NULL, pubd, pub);
+ if (err == MP_OKAY) {
+ sp_256_from_mp(pub->x, 4, pX);
+ sp_256_from_mp(pub->y, 4, pY);
+ sp_256_from_bin(pub->z, 4, one, (int)sizeof(one));
+
+ err = sp_256_ecc_is_point_4(pub, NULL);
+ }
+
+ sp_256_point_free_4(pub, 0, NULL);
+
+ return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * privm Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit privd[4];
+ sp_point_256 pubd;
+ sp_point_256 pd;
+#endif
+ sp_digit* priv = NULL;
+ sp_point_256* pub;
+ sp_point_256* p = NULL;
+ byte one[1] = { 1 };
+ int err;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ err = sp_256_point_new_4(heap, pubd, pub);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_4(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4, heap,
+ DYNAMIC_TYPE_ECC);
+ if (priv == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ priv = privd;
+#endif
+
+ sp_256_from_mp(pub->x, 4, pX);
+ sp_256_from_mp(pub->y, 4, pY);
+ sp_256_from_bin(pub->z, 4, one, (int)sizeof(one));
+ sp_256_from_mp(priv, 4, privm);
+
+ /* Check point at infinitiy. */
+ if ((sp_256_iszero_4(pub->x) != 0) &&
+ (sp_256_iszero_4(pub->y) != 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check range of X and Y */
+ if (sp_256_cmp_4(pub->x, p256_mod) >= 0 ||
+ sp_256_cmp_4(pub->y, p256_mod) >= 0) {
+ err = ECC_OUT_OF_RANGE_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check point is on curve */
+ err = sp_256_ecc_is_point_4(pub, heap);
+ }
+
+ if (err == MP_OKAY) {
+ /* Point * order = infinity */
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_256_ecc_mulmod_avx2_4(p, pub, p256_order, 1, heap);
+ else
+#endif
+ err = sp_256_ecc_mulmod_4(p, pub, p256_order, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is infinity */
+ if ((sp_256_iszero_4(p->x) == 0) ||
+ (sp_256_iszero_4(p->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Base * private = point */
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_256_ecc_mulmod_base_avx2_4(p, priv, 1, heap);
+ else
+#endif
+ err = sp_256_ecc_mulmod_base_4(p, priv, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is public key */
+ if (sp_256_cmp_4(p->x, pub->x) != 0 ||
+ sp_256_cmp_4(p->y, pub->y) != 0) {
+ err = ECC_PRIV_KEY_E;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (priv != NULL) {
+ XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_4(p, 0, heap);
+ sp_256_point_free_4(pub, 0, heap);
+
+ return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX First EC point's X ordinate.
+ * pY First EC point's Y ordinate.
+ * pZ First EC point's Z ordinate.
+ * qX Second EC point's X ordinate.
+ * qY Second EC point's Y ordinate.
+ * qZ Second EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* qX, mp_int* qY, mp_int* qZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 4 * 5];
+ sp_point_256 pd;
+ sp_point_256 qd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ sp_point_256* q = NULL;
+ int err;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ err = sp_256_point_new_4(NULL, pd, p);
+ if (err == MP_OKAY) {
+ err = sp_256_point_new_4(NULL, qd, q);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 5, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 4, pX);
+ sp_256_from_mp(p->y, 4, pY);
+ sp_256_from_mp(p->z, 4, pZ);
+ sp_256_from_mp(q->x, 4, qX);
+ sp_256_from_mp(q->y, 4, qY);
+ sp_256_from_mp(q->z, 4, qZ);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_256_proj_point_add_avx2_4(p, p, q, tmp);
+ else
+#endif
+ sp_256_proj_point_add_4(p, p, q, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_4(q, 0, NULL);
+ sp_256_point_free_4(p, 0, NULL);
+
+ return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 4 * 2];
+ sp_point_256 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ int err;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ err = sp_256_point_new_4(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 2, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 4, pX);
+ sp_256_from_mp(p->y, 4, pY);
+ sp_256_from_mp(p->z, 4, pZ);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_256_proj_point_dbl_avx2_4(p, p, tmp);
+ else
+#endif
+ sp_256_proj_point_dbl_4(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_4(p, 0, NULL);
+
+ return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 4 * 4];
+ sp_point_256 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_256* p;
+ int err;
+
+ err = sp_256_point_new_4(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 4 * 4, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+ if (err == MP_OKAY) {
+ sp_256_from_mp(p->x, 4, pX);
+ sp_256_from_mp(p->y, 4, pY);
+ sp_256_from_mp(p->z, 4, pZ);
+
+ sp_256_map_4(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->x, pX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->y, pY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_256_to_mp(p->z, pZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_256_point_free_4(p, 0, NULL);
+
+ return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_256_mont_sqrt_4(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit t1d[2 * 4];
+ sp_digit t2d[2 * 4];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 4, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 4;
+ t2 = d + 2 * 4;
+#else
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ /* t2 = y ^ 0x2 */
+ sp_256_mont_sqr_avx2_4(t2, y, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0x3 */
+ sp_256_mont_mul_avx2_4(t1, t2, y, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xc */
+ sp_256_mont_sqr_n_avx2_4(t2, t1, 2, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xf */
+ sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xf0 */
+ sp_256_mont_sqr_n_avx2_4(t2, t1, 4, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xff */
+ sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xff00 */
+ sp_256_mont_sqr_n_avx2_4(t2, t1, 8, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffff */
+ sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xffff0000 */
+ sp_256_mont_sqr_n_avx2_4(t2, t1, 16, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff */
+ sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000000 */
+ sp_256_mont_sqr_n_avx2_4(t1, t1, 32, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001 */
+ sp_256_mont_mul_avx2_4(t1, t1, y, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
+ sp_256_mont_sqr_n_avx2_4(t1, t1, 96, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
+ sp_256_mont_mul_avx2_4(t1, t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_n_avx2_4(y, t1, 94, p256_mod, p256_mp_mod);
+ }
+ else
+#endif
+ {
+ /* t2 = y ^ 0x2 */
+ sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0x3 */
+ sp_256_mont_mul_4(t1, t2, y, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xc */
+ sp_256_mont_sqr_n_4(t2, t1, 2, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xf */
+ sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xf0 */
+ sp_256_mont_sqr_n_4(t2, t1, 4, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xff */
+ sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xff00 */
+ sp_256_mont_sqr_n_4(t2, t1, 8, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffff */
+ sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t2 = y ^ 0xffff0000 */
+ sp_256_mont_sqr_n_4(t2, t1, 16, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff */
+ sp_256_mont_mul_4(t1, t1, t2, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000000 */
+ sp_256_mont_sqr_n_4(t1, t1, 32, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001 */
+ sp_256_mont_mul_4(t1, t1, y, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
+ sp_256_mont_sqr_n_4(t1, t1, 96, p256_mod, p256_mp_mod);
+ /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
+ sp_256_mont_mul_4(t1, t1, y, p256_mod, p256_mp_mod);
+ sp_256_mont_sqr_n_4(y, t1, 94, p256_mod, p256_mp_mod);
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm X ordinate.
+ * odd Whether the Y ordinate is odd.
+ * ym Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit xd[2 * 4];
+ sp_digit yd[2 * 4];
+#endif
+ sp_digit* x = NULL;
+ sp_digit* y = NULL;
+ int err = MP_OKAY;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 4, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ x = d + 0 * 4;
+ y = d + 2 * 4;
+#else
+ x = xd;
+ y = yd;
+#endif
+
+ sp_256_from_mp(x, 4, xm);
+ err = sp_256_mod_mul_norm_4(x, x, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ /* y = x^3 */
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ sp_256_mont_sqr_avx2_4(y, x, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_avx2_4(y, y, x, p256_mod, p256_mp_mod);
+ }
+ else
+#endif
+ {
+ sp_256_mont_sqr_4(y, x, p256_mod, p256_mp_mod);
+ sp_256_mont_mul_4(y, y, x, p256_mod, p256_mp_mod);
+ }
+ /* y = x^3 - 3x */
+ sp_256_mont_sub_4(y, y, x, p256_mod);
+ sp_256_mont_sub_4(y, y, x, p256_mod);
+ sp_256_mont_sub_4(y, y, x, p256_mod);
+ /* y = x^3 - 3x + b */
+ err = sp_256_mod_mul_norm_4(x, p256_b, p256_mod);
+ }
+ if (err == MP_OKAY) {
+ sp_256_mont_add_4(y, y, x, p256_mod);
+ /* y = sqrt(x^3 - 3x + b) */
+ err = sp_256_mont_sqrt_4(y);
+ }
+ if (err == MP_OKAY) {
+ XMEMSET(y + 4, 0, 4U * sizeof(sp_digit));
+ sp_256_mont_reduce_4(y, p256_mod, p256_mp_mod);
+ if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+ sp_256_mont_sub_4(y, p256_mod, y, p256_mod);
+ }
+
+ err = sp_256_to_mp(y, ym);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+#endif
+#endif /* !WOLFSSL_SP_NO_256 */
+#ifdef WOLFSSL_SP_384
+
+/* Point structure to use. */
+typedef struct sp_point_384 {
+ sp_digit x[2 * 6];
+ sp_digit y[2 * 6];
+ sp_digit z[2 * 6];
+ int infinity;
+} sp_point_384;
+
+/* The modulus (prime) of the curve P384. */
+static const sp_digit p384_mod[6] = {
+ 0x00000000ffffffffL,0xffffffff00000000L,0xfffffffffffffffeL,
+ 0xffffffffffffffffL,0xffffffffffffffffL,0xffffffffffffffffL
+};
+/* The Montogmery normalizer for modulus of the curve P384. */
+static const sp_digit p384_norm_mod[6] = {
+ 0xffffffff00000001L,0x00000000ffffffffL,0x0000000000000001L,
+ 0x0000000000000000L,0x0000000000000000L,0x0000000000000000L
+};
+/* The Montogmery multiplier for modulus of the curve P384. */
+static sp_digit p384_mp_mod = 0x0000000100000001;
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* The order of the curve P384. */
+static const sp_digit p384_order[6] = {
+ 0xecec196accc52973L,0x581a0db248b0a77aL,0xc7634d81f4372ddfL,
+ 0xffffffffffffffffL,0xffffffffffffffffL,0xffffffffffffffffL
+};
+#endif
+/* The order of the curve P384 minus 2. */
+static const sp_digit p384_order2[6] = {
+ 0xecec196accc52971L,0x581a0db248b0a77aL,0xc7634d81f4372ddfL,
+ 0xffffffffffffffffL,0xffffffffffffffffL,0xffffffffffffffffL
+};
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery normalizer for order of the curve P384. */
+static const sp_digit p384_norm_order[6] = {
+ 0x1313e695333ad68dL,0xa7e5f24db74f5885L,0x389cb27e0bc8d220L,
+ 0x0000000000000000L,0x0000000000000000L,0x0000000000000000L
+};
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+/* The Montogmery multiplier for order of the curve P384. */
+static sp_digit p384_mp_order = 0x6ed46089e88fdc45l;
+#endif
+/* The base point of curve P384. */
+static const sp_point_384 p384_base = {
+ /* X ordinate */
+ {
+ 0x3a545e3872760ab7L,0x5502f25dbf55296cL,0x59f741e082542a38L,
+ 0x6e1d3b628ba79b98L,0x8eb1c71ef320ad74L,0xaa87ca22be8b0537L,
+ 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Y ordinate */
+ {
+ 0x7a431d7c90ea0e5fL,0x0a60b1ce1d7e819dL,0xe9da3113b5f0b8c0L,
+ 0xf8f41dbd289a147cL,0x5d9e98bf9292dc29L,0x3617de4a96262c6fL,
+ 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* Z ordinate */
+ {
+ 0x0000000000000001L,0x0000000000000000L,0x0000000000000000L,
+ 0x0000000000000000L,0x0000000000000000L,0x0000000000000000L,
+ 0L, 0L, 0L, 0L, 0L, 0L
+ },
+ /* infinity */
+ 0
+};
+#if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
+static const sp_digit p384_b[6] = {
+ 0x2a85c8edd3ec2aefL,0xc656398d8a2ed19dL,0x0314088f5013875aL,
+ 0x181d9c6efe814112L,0x988e056be3f82d19L,0xb3312fa7e23ee7e4L
+};
+#endif
+
+static int sp_384_point_new_ex_6(void* heap, sp_point_384* sp, sp_point_384** p)
+{
+ int ret = MP_OKAY;
+ (void)heap;
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ (void)sp;
+ *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC);
+#else
+ *p = sp;
+#endif
+ if (*p == NULL) {
+ ret = MEMORY_E;
+ }
+ return ret;
+}
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* Allocate memory for point and return error. */
+#define sp_384_point_new_6(heap, sp, p) sp_384_point_new_ex_6((heap), NULL, &(p))
+#else
+/* Set pointer to data and return no error. */
+#define sp_384_point_new_6(heap, sp, p) sp_384_point_new_ex_6((heap), &(sp), &(p))
+#endif
+
+
+static void sp_384_point_free_6(sp_point_384* p, int clear, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+/* If valid pointer then clear point data if requested and free data. */
+ if (p != NULL) {
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+ XFREE(p, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+/* Clear point data if requested. */
+ if (clear != 0) {
+ XMEMSET(p, 0, sizeof(*p));
+ }
+#endif
+ (void)heap;
+}
+
+/* Multiply a number by Montogmery normalizer mod modulus (prime).
+ *
+ * r The resulting Montgomery form number.
+ * a The number to convert.
+ * m The modulus (prime).
+ * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mod_mul_norm_6(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ int64_t* td;
+#else
+ int64_t td[12];
+ int64_t a32d[12];
+#endif
+ int64_t* t;
+ int64_t* a32;
+ int64_t o;
+ int err = MP_OKAY;
+
+ (void)m;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 12, NULL, DYNAMIC_TYPE_ECC);
+ if (td == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = td;
+ a32 = td + 12;
+#else
+ t = td;
+ a32 = a32d;
+#endif
+
+ a32[0] = a[0] & 0xffffffff;
+ a32[1] = a[0] >> 32;
+ a32[2] = a[1] & 0xffffffff;
+ a32[3] = a[1] >> 32;
+ a32[4] = a[2] & 0xffffffff;
+ a32[5] = a[2] >> 32;
+ a32[6] = a[3] & 0xffffffff;
+ a32[7] = a[3] >> 32;
+ a32[8] = a[4] & 0xffffffff;
+ a32[9] = a[4] >> 32;
+ a32[10] = a[5] & 0xffffffff;
+ a32[11] = a[5] >> 32;
+
+ /* 1 0 0 0 0 0 0 0 1 1 0 -1 */
+ t[0] = 0 + a32[0] + a32[8] + a32[9] - a32[11];
+ /* -1 1 0 0 0 0 0 0 -1 0 1 1 */
+ t[1] = 0 - a32[0] + a32[1] - a32[8] + a32[10] + a32[11];
+ /* 0 -1 1 0 0 0 0 0 0 -1 0 1 */
+ t[2] = 0 - a32[1] + a32[2] - a32[9] + a32[11];
+ /* 1 0 -1 1 0 0 0 0 1 1 -1 -1 */
+ t[3] = 0 + a32[0] - a32[2] + a32[3] + a32[8] + a32[9] - a32[10] - a32[11];
+ /* 1 1 0 -1 1 0 0 0 1 2 1 -2 */
+ t[4] = 0 + a32[0] + a32[1] - a32[3] + a32[4] + a32[8] + 2 * a32[9] + a32[10] - 2 * a32[11];
+ /* 0 1 1 0 -1 1 0 0 0 1 2 1 */
+ t[5] = 0 + a32[1] + a32[2] - a32[4] + a32[5] + a32[9] + 2 * a32[10] + a32[11];
+ /* 0 0 1 1 0 -1 1 0 0 0 1 2 */
+ t[6] = 0 + a32[2] + a32[3] - a32[5] + a32[6] + a32[10] + 2 * a32[11];
+ /* 0 0 0 1 1 0 -1 1 0 0 0 1 */
+ t[7] = 0 + a32[3] + a32[4] - a32[6] + a32[7] + a32[11];
+ /* 0 0 0 0 1 1 0 -1 1 0 0 0 */
+ t[8] = 0 + a32[4] + a32[5] - a32[7] + a32[8];
+ /* 0 0 0 0 0 1 1 0 -1 1 0 0 */
+ t[9] = 0 + a32[5] + a32[6] - a32[8] + a32[9];
+ /* 0 0 0 0 0 0 1 1 0 -1 1 0 */
+ t[10] = 0 + a32[6] + a32[7] - a32[9] + a32[10];
+ /* 0 0 0 0 0 0 0 1 1 0 -1 1 */
+ t[11] = 0 + a32[7] + a32[8] - a32[10] + a32[11];
+
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+ t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+ t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+ t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+ o = t[11] >> 32; t[11] &= 0xffffffff;
+ t[0] += o;
+ t[1] -= o;
+ t[3] += o;
+ t[4] += o;
+ t[1] += t[0] >> 32; t[0] &= 0xffffffff;
+ t[2] += t[1] >> 32; t[1] &= 0xffffffff;
+ t[3] += t[2] >> 32; t[2] &= 0xffffffff;
+ t[4] += t[3] >> 32; t[3] &= 0xffffffff;
+ t[5] += t[4] >> 32; t[4] &= 0xffffffff;
+ t[6] += t[5] >> 32; t[5] &= 0xffffffff;
+ t[7] += t[6] >> 32; t[6] &= 0xffffffff;
+ t[8] += t[7] >> 32; t[7] &= 0xffffffff;
+ t[9] += t[8] >> 32; t[8] &= 0xffffffff;
+ t[10] += t[9] >> 32; t[9] &= 0xffffffff;
+ t[11] += t[10] >> 32; t[10] &= 0xffffffff;
+
+ r[0] = (t[1] << 32) | t[0];
+ r[1] = (t[3] << 32) | t[2];
+ r[2] = (t[5] << 32) | t[4];
+ r[3] = (t[7] << 32) | t[6];
+ r[4] = (t[9] << 32) | t[8];
+ r[5] = (t[11] << 32) | t[10];
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (td != NULL)
+ XFREE(td, NULL, DYNAMIC_TYPE_ECC);
+#endif
+
+ return err;
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 64
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 64
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i] << s);
+ r[j] &= 0xffffffffffffffffl;
+ s = 64U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 64U) <= (word32)DIGIT_BIT) {
+ s += 64U;
+ r[j] &= 0xffffffffffffffffl;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((sp_digit)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 64) {
+ r[j] &= 0xffffffffffffffffl;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 64 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Convert a point of type ecc_point to type sp_point_384.
+ *
+ * p Point of type sp_point_384 (result).
+ * pm Point of type ecc_point.
+ */
+static void sp_384_point_from_ecc_point_6(sp_point_384* p, const ecc_point* pm)
+{
+ XMEMSET(p->x, 0, sizeof(p->x));
+ XMEMSET(p->y, 0, sizeof(p->y));
+ XMEMSET(p->z, 0, sizeof(p->z));
+ sp_384_from_mp(p->x, 6, pm->x);
+ sp_384_from_mp(p->y, 6, pm->y);
+ sp_384_from_mp(p->z, 6, pm->z);
+ p->infinity = 0;
+}
+
+/* Convert an array of sp_digit to an mp_int.
+ *
+ * a A single precision integer.
+ * r A multi-precision integer.
+ */
+static int sp_384_to_mp(const sp_digit* a, mp_int* r)
+{
+ int err;
+
+ err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT);
+ if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
+#if DIGIT_BIT == 64
+ XMEMCPY(r->dp, a, sizeof(sp_digit) * 6);
+ r->used = 6;
+ mp_clamp(r);
+#elif DIGIT_BIT < 64
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 6; i++) {
+ r->dp[j] |= (mp_digit)(a[i] << s);
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ s = DIGIT_BIT - s;
+ r->dp[++j] = (mp_digit)(a[i] >> s);
+ while (s + DIGIT_BIT <= 64) {
+ s += DIGIT_BIT;
+ r->dp[j++] &= (1L << DIGIT_BIT) - 1;
+ if (s == SP_WORD_SIZE) {
+ r->dp[j] = 0;
+ }
+ else {
+ r->dp[j] = (mp_digit)(a[i] >> s);
+ }
+ }
+ s = 64 - s;
+ }
+ r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#else
+ int i, j = 0, s = 0;
+
+ r->dp[0] = 0;
+ for (i = 0; i < 6; i++) {
+ r->dp[j] |= ((mp_digit)a[i]) << s;
+ if (s + 64 >= DIGIT_BIT) {
+ #if DIGIT_BIT != 32 && DIGIT_BIT != 64
+ r->dp[j] &= (1L << DIGIT_BIT) - 1;
+ #endif
+ s = DIGIT_BIT - s;
+ r->dp[++j] = a[i] >> s;
+ s = 64 - s;
+ }
+ else {
+ s += 64;
+ }
+ }
+ r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
+ mp_clamp(r);
+#endif
+ }
+
+ return err;
+}
+
+/* Convert a point of type sp_point_384 to type ecc_point.
+ *
+ * p Point of type sp_point_384.
+ * pm Point of type ecc_point (result).
+ * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
+ * MP_OKAY.
+ */
+static int sp_384_point_to_ecc_point_6(const sp_point_384* p, ecc_point* pm)
+{
+ int err;
+
+ err = sp_384_to_mp(p->x, pm->x);
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, pm->y);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, pm->z);
+ }
+
+ return err;
+}
+
+extern void sp_384_cond_copy_6(sp_digit* r, const sp_digit* a, sp_digit m);
+extern void sp_384_mul_6(sp_digit* r, const sp_digit* a, const sp_digit* b);
+extern sp_digit sp_384_cond_sub_6(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+extern void sp_384_mont_reduce_6(sp_digit* a, const sp_digit* m, sp_digit mp);
+extern void sp_384_mont_reduce_order_6(sp_digit* a, const sp_digit* m, sp_digit mp);
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_mul_6(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_384_mul_6(r, a, b);
+ sp_384_mont_reduce_6(r, m, mp);
+}
+
+extern void sp_384_sqr_6(sp_digit* r, const sp_digit* a);
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_6(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_384_sqr_6(r, a);
+ sp_384_mont_reduce_6(r, m, mp);
+}
+
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * n Number of times to square.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_n_6(sp_digit* r, const sp_digit* a, int n,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_384_mont_sqr_6(r, a, m, mp);
+ for (; n > 1; n--) {
+ sp_384_mont_sqr_6(r, r, m, mp);
+ }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+#ifdef WOLFSSL_SP_SMALL
+/* Mod-2 for the P384 curve. */
+static const uint64_t p384_mod_minus_2[6] = {
+ 0x00000000fffffffdU,0xffffffff00000000U,0xfffffffffffffffeU,
+ 0xffffffffffffffffU,0xffffffffffffffffU,0xffffffffffffffffU
+};
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P384 curve. (r = 1 / a mod m)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_384_mont_inv_6(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 6);
+ for (i=382; i>=0; i--) {
+ sp_384_mont_sqr_6(t, t, p384_mod, p384_mp_mod);
+ if (p384_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64)))
+ sp_384_mont_mul_6(t, t, a, p384_mod, p384_mp_mod);
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 6);
+#else
+ sp_digit* t1 = td;
+ sp_digit* t2 = td + 2 * 6;
+ sp_digit* t3 = td + 4 * 6;
+ sp_digit* t4 = td + 6 * 6;
+ sp_digit* t5 = td + 8 * 6;
+
+ /* 0x2 */
+ sp_384_mont_sqr_6(t1, a, p384_mod, p384_mp_mod);
+ /* 0x3 */
+ sp_384_mont_mul_6(t5, t1, a, p384_mod, p384_mp_mod);
+ /* 0xc */
+ sp_384_mont_sqr_n_6(t1, t5, 2, p384_mod, p384_mp_mod);
+ /* 0xf */
+ sp_384_mont_mul_6(t2, t5, t1, p384_mod, p384_mp_mod);
+ /* 0x1e */
+ sp_384_mont_sqr_6(t1, t2, p384_mod, p384_mp_mod);
+ /* 0x1f */
+ sp_384_mont_mul_6(t4, t1, a, p384_mod, p384_mp_mod);
+ /* 0x3e0 */
+ sp_384_mont_sqr_n_6(t1, t4, 5, p384_mod, p384_mp_mod);
+ /* 0x3ff */
+ sp_384_mont_mul_6(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0x7fe0 */
+ sp_384_mont_sqr_n_6(t1, t2, 5, p384_mod, p384_mp_mod);
+ /* 0x7fff */
+ sp_384_mont_mul_6(t4, t4, t1, p384_mod, p384_mp_mod);
+ /* 0x3fff8000 */
+ sp_384_mont_sqr_n_6(t1, t4, 15, p384_mod, p384_mp_mod);
+ /* 0x3fffffff */
+ sp_384_mont_mul_6(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffc */
+ sp_384_mont_sqr_n_6(t3, t2, 2, p384_mod, p384_mp_mod);
+ /* 0xfffffffd */
+ sp_384_mont_mul_6(r, t3, a, p384_mod, p384_mp_mod);
+ /* 0xffffffff */
+ sp_384_mont_mul_6(t3, t5, t3, p384_mod, p384_mp_mod);
+ /* 0xfffffffc0000000 */
+ sp_384_mont_sqr_n_6(t1, t2, 30, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffff */
+ sp_384_mont_mul_6(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffff000000000000000 */
+ sp_384_mont_sqr_n_6(t1, t2, 60, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_6(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+ sp_384_mont_sqr_n_6(t1, t2, 120, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_6(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+ sp_384_mont_sqr_n_6(t1, t2, 15, p384_mod, p384_mp_mod);
+ /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_6(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */
+ sp_384_mont_sqr_n_6(t1, t2, 33, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */
+ sp_384_mont_mul_6(t2, t3, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */
+ sp_384_mont_sqr_n_6(t1, t2, 96, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */
+ sp_384_mont_mul_6(r, r, t1, p384_mod, p384_mp_mod);
+
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+extern int64_t sp_384_cmp_6(const sp_digit* a, const sp_digit* b);
+/* Normalize the values in each word to 64.
+ *
+ * a Array of sp_digit to normalize.
+ */
+#define sp_384_norm_6(a)
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r Resulting affine coordinate point.
+ * p Montgomery form projective coordinate point.
+ * t Temporary ordinate data.
+ */
+static void sp_384_map_6(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*6;
+ int64_t n;
+
+ sp_384_mont_inv_6(t1, p->z, t + 2*6);
+
+ sp_384_mont_sqr_6(t2, t1, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t1, t2, t1, p384_mod, p384_mp_mod);
+
+ /* x /= z^2 */
+ sp_384_mont_mul_6(r->x, p->x, t2, p384_mod, p384_mp_mod);
+ XMEMSET(r->x + 6, 0, sizeof(r->x) / 2U);
+ sp_384_mont_reduce_6(r->x, p384_mod, p384_mp_mod);
+ /* Reduce x to less than modulus */
+ n = sp_384_cmp_6(r->x, p384_mod);
+ sp_384_cond_sub_6(r->x, r->x, p384_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_6(r->x);
+
+ /* y /= z^3 */
+ sp_384_mont_mul_6(r->y, p->y, t1, p384_mod, p384_mp_mod);
+ XMEMSET(r->y + 6, 0, sizeof(r->y) / 2U);
+ sp_384_mont_reduce_6(r->y, p384_mod, p384_mp_mod);
+ /* Reduce y to less than modulus */
+ n = sp_384_cmp_6(r->y, p384_mod);
+ sp_384_cond_sub_6(r->y, r->y, p384_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_6(r->y);
+
+ XMEMSET(r->z, 0, sizeof(r->z));
+ r->z[0] = 1;
+
+}
+
+extern sp_digit sp_384_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b);
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r Result of addition.
+ * a First number to add in Montogmery form.
+ * b Second number to add in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_384_mont_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_add_6(r, a, b);
+ sp_384_cond_sub_6(r, r, m, 0 - o);
+}
+
+extern sp_digit sp_384_dbl_6(sp_digit* r, const sp_digit* a);
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r Result of doubling.
+ * a Number to double in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_384_mont_dbl_6(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_dbl_6(r, a);
+ sp_384_cond_sub_6(r, r, m, 0 - o);
+}
+
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r Result of Tripling.
+ * a Number to triple in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_384_mont_tpl_6(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_dbl_6(r, a);
+ sp_384_cond_sub_6(r, r, m, 0 - o);
+ o = sp_384_add_6(r, r, a);
+ sp_384_cond_sub_6(r, r, m, 0 - o);
+}
+
+extern sp_digit sp_384_sub_6(sp_digit* r, const sp_digit* a, const sp_digit* b);
+extern sp_digit sp_384_cond_add_6(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r Result of subtration.
+ * a Number to subtract from in Montogmery form.
+ * b Number to subtract with in Montogmery form.
+ * m Modulus (prime).
+ */
+static void sp_384_mont_sub_6(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m)
+{
+ sp_digit o;
+
+ o = sp_384_sub_6(r, a, b);
+ sp_384_cond_add_6(r, r, m, o);
+}
+
+extern void sp_384_div2_6(sp_digit* r, const sp_digit* a, const sp_digit* m);
+/* Double the Montgomery form projective point p.
+ *
+ * r Result of doubling point.
+ * p Point to double.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_6(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*6;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = r->x;
+ y = r->y;
+ z = r->z;
+ /* Put infinity into result. */
+ if (r != p) {
+ r->infinity = p->infinity;
+ }
+
+ /* T1 = Z * Z */
+ sp_384_mont_sqr_6(t1, p->z, p384_mod, p384_mp_mod);
+ /* Z = Y * Z */
+ sp_384_mont_mul_6(z, p->y, p->z, p384_mod, p384_mp_mod);
+ /* Z = 2Z */
+ sp_384_mont_dbl_6(z, z, p384_mod);
+ /* T2 = X - T1 */
+ sp_384_mont_sub_6(t2, p->x, t1, p384_mod);
+ /* T1 = X + T1 */
+ sp_384_mont_add_6(t1, p->x, t1, p384_mod);
+ /* T2 = T1 * T2 */
+ sp_384_mont_mul_6(t2, t1, t2, p384_mod, p384_mp_mod);
+ /* T1 = 3T2 */
+ sp_384_mont_tpl_6(t1, t2, p384_mod);
+ /* Y = 2Y */
+ sp_384_mont_dbl_6(y, p->y, p384_mod);
+ /* Y = Y * Y */
+ sp_384_mont_sqr_6(y, y, p384_mod, p384_mp_mod);
+ /* T2 = Y * Y */
+ sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod);
+ /* T2 = T2/2 */
+ sp_384_div2_6(t2, t2, p384_mod);
+ /* Y = Y * X */
+ sp_384_mont_mul_6(y, y, p->x, p384_mod, p384_mp_mod);
+ /* X = T1 * T1 */
+ sp_384_mont_sqr_6(x, t1, p384_mod, p384_mp_mod);
+ /* X = X - Y */
+ sp_384_mont_sub_6(x, x, y, p384_mod);
+ /* X = X - Y */
+ sp_384_mont_sub_6(x, x, y, p384_mod);
+ /* Y = Y - X */
+ sp_384_mont_sub_6(y, y, x, p384_mod);
+ /* Y = Y * T1 */
+ sp_384_mont_mul_6(y, y, t1, p384_mod, p384_mp_mod);
+ /* Y = Y - T2 */
+ sp_384_mont_sub_6(y, y, t2, p384_mod);
+}
+
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int n, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*6;
+ sp_digit* b = t + 4*6;
+ sp_digit* t1 = t + 6*6;
+ sp_digit* t2 = t + 8*6;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = p->x;
+ y = p->y;
+ z = p->z;
+
+ /* Y = 2*Y */
+ sp_384_mont_dbl_6(y, y, p384_mod);
+ /* W = Z^4 */
+ sp_384_mont_sqr_6(w, z, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_6(w, w, p384_mod, p384_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+ while (--n > 0)
+#else
+ while (--n >= 0)
+#endif
+ {
+ /* A = 3*(X^2 - W) */
+ sp_384_mont_sqr_6(t1, x, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(t1, t1, w, p384_mod);
+ sp_384_mont_tpl_6(a, t1, p384_mod);
+ /* B = X*Y^2 */
+ sp_384_mont_sqr_6(t1, y, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(b, t1, x, p384_mod, p384_mp_mod);
+ /* X = A^2 - 2B */
+ sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_6(t2, b, p384_mod);
+ sp_384_mont_sub_6(x, x, t2, p384_mod);
+ /* Z = Z*Y */
+ sp_384_mont_mul_6(z, z, y, p384_mod, p384_mp_mod);
+ /* t2 = Y^4 */
+ sp_384_mont_sqr_6(t1, t1, p384_mod, p384_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+ if (n != 0)
+#endif
+ {
+ /* W = W*Y^4 */
+ sp_384_mont_mul_6(w, w, t1, p384_mod, p384_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_384_mont_sub_6(y, b, x, p384_mod);
+ sp_384_mont_mul_6(y, y, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_6(y, y, p384_mod);
+ sp_384_mont_sub_6(y, y, t1, p384_mod);
+ }
+#ifndef WOLFSSL_SP_SMALL
+ /* A = 3*(X^2 - W) */
+ sp_384_mont_sqr_6(t1, x, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(t1, t1, w, p384_mod);
+ sp_384_mont_tpl_6(a, t1, p384_mod);
+ /* B = X*Y^2 */
+ sp_384_mont_sqr_6(t1, y, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(b, t1, x, p384_mod, p384_mp_mod);
+ /* X = A^2 - 2B */
+ sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_6(t2, b, p384_mod);
+ sp_384_mont_sub_6(x, x, t2, p384_mod);
+ /* Z = Z*Y */
+ sp_384_mont_mul_6(z, z, y, p384_mod, p384_mp_mod);
+ /* t2 = Y^4 */
+ sp_384_mont_sqr_6(t1, t1, p384_mod, p384_mp_mod);
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_384_mont_sub_6(y, b, x, p384_mod);
+ sp_384_mont_mul_6(y, y, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_6(y, y, p384_mod);
+ sp_384_mont_sub_6(y, y, t1, p384_mod);
+#endif
+ /* Y = Y/2 */
+ sp_384_div2_6(y, y, p384_mod);
+}
+
+/* Compare two numbers to determine if they are equal.
+ * Constant time implementation.
+ *
+ * a First number to compare.
+ * b Second number to compare.
+ * returns 1 when equal and 0 otherwise.
+ */
+static int sp_384_cmp_equal_6(const sp_digit* a, const sp_digit* b)
+{
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
+ (a[4] ^ b[4]) | (a[5] ^ b[5])) == 0;
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_6(sp_point_384* r, const sp_point_384* p, const sp_point_384* q,
+ sp_digit* t)
+{
+ const sp_point_384* ap[2];
+ sp_point_384* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*6;
+ sp_digit* t3 = t + 4*6;
+ sp_digit* t4 = t + 6*6;
+ sp_digit* t5 = t + 8*6;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Ensure only the first point is the same as the result. */
+ if (q == r) {
+ const sp_point_384* a = p;
+ p = q;
+ q = a;
+ }
+
+ /* Check double */
+ (void)sp_384_sub_6(t1, p384_mod, q->y);
+ sp_384_norm_6(t1);
+ if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) &
+ (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) {
+ sp_384_proj_point_dbl_6(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_384));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<6; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<6; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<6; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U1 = X1*Z2^2 */
+ sp_384_mont_sqr_6(t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t3, t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t1, t1, x, p384_mod, p384_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_384_mont_sqr_6(t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t4, t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_384_mont_mul_6(t3, t3, y, p384_mod, p384_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod);
+ /* H = U2 - U1 */
+ sp_384_mont_sub_6(t2, t2, t1, p384_mod);
+ /* R = S2 - S1 */
+ sp_384_mont_sub_6(t4, t4, t3, p384_mod);
+ /* Z3 = H*Z1*Z2 */
+ sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(z, z, t2, p384_mod, p384_mp_mod);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(x, x, t5, p384_mod);
+ sp_384_mont_dbl_6(t1, y, p384_mod);
+ sp_384_mont_sub_6(x, x, t1, p384_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ sp_384_mont_sub_6(y, y, x, p384_mod);
+ sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(y, y, t5, p384_mod);
+ }
+}
+
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_n_store_6(sp_point_384* r, const sp_point_384* p,
+ int n, int m, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*6;
+ sp_digit* b = t + 4*6;
+ sp_digit* t1 = t + 6*6;
+ sp_digit* t2 = t + 8*6;
+ sp_digit* x = r[2*m].x;
+ sp_digit* y = r[(1<<n)*m].y;
+ sp_digit* z = r[2*m].z;
+ int i;
+
+ for (i=0; i<6; i++) {
+ x[i] = p->x[i];
+ }
+ for (i=0; i<6; i++) {
+ y[i] = p->y[i];
+ }
+ for (i=0; i<6; i++) {
+ z[i] = p->z[i];
+ }
+
+ /* Y = 2*Y */
+ sp_384_mont_dbl_6(y, y, p384_mod);
+ /* W = Z^4 */
+ sp_384_mont_sqr_6(w, z, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_6(w, w, p384_mod, p384_mp_mod);
+ for (i=1; i<=n; i++) {
+ /* A = 3*(X^2 - W) */
+ sp_384_mont_sqr_6(t1, x, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(t1, t1, w, p384_mod);
+ sp_384_mont_tpl_6(a, t1, p384_mod);
+ /* B = X*Y^2 */
+ sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(b, t2, x, p384_mod, p384_mp_mod);
+ x = r[(1<<i)*m].x;
+ /* X = A^2 - 2B */
+ sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_6(t1, b, p384_mod);
+ sp_384_mont_sub_6(x, x, t1, p384_mod);
+ /* Z = Z*Y */
+ sp_384_mont_mul_6(r[(1<<i)*m].z, z, y, p384_mod, p384_mp_mod);
+ z = r[(1<<i)*m].z;
+ /* t2 = Y^4 */
+ sp_384_mont_sqr_6(t2, t2, p384_mod, p384_mp_mod);
+ if (i != n) {
+ /* W = W*Y^4 */
+ sp_384_mont_mul_6(w, w, t2, p384_mod, p384_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_384_mont_sub_6(y, b, x, p384_mod);
+ sp_384_mont_mul_6(y, y, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_6(y, y, p384_mod);
+ sp_384_mont_sub_6(y, y, t2, p384_mod);
+
+ /* Y = Y/2 */
+ sp_384_div2_6(r[(1<<i)*m].y, y, p384_mod);
+ r[(1<<i)*m].infinity = 0;
+ }
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * ra Result of addition.
+ * rs Result of subtraction.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_sub_6(sp_point_384* ra, sp_point_384* rs,
+ const sp_point_384* p, const sp_point_384* q, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*6;
+ sp_digit* t3 = t + 4*6;
+ sp_digit* t4 = t + 6*6;
+ sp_digit* t5 = t + 8*6;
+ sp_digit* t6 = t + 10*6;
+ sp_digit* x = ra->x;
+ sp_digit* y = ra->y;
+ sp_digit* z = ra->z;
+ sp_digit* xs = rs->x;
+ sp_digit* ys = rs->y;
+ sp_digit* zs = rs->z;
+
+
+ XMEMCPY(x, p->x, sizeof(p->x) / 2);
+ XMEMCPY(y, p->y, sizeof(p->y) / 2);
+ XMEMCPY(z, p->z, sizeof(p->z) / 2);
+ ra->infinity = 0;
+ rs->infinity = 0;
+
+ /* U1 = X1*Z2^2 */
+ sp_384_mont_sqr_6(t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t3, t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t1, t1, x, p384_mod, p384_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_384_mont_sqr_6(t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t4, t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_384_mont_mul_6(t3, t3, y, p384_mod, p384_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod);
+ /* H = U2 - U1 */
+ sp_384_mont_sub_6(t2, t2, t1, p384_mod);
+ /* RS = S2 + S1 */
+ sp_384_mont_add_6(t6, t4, t3, p384_mod);
+ /* R = S2 - S1 */
+ sp_384_mont_sub_6(t4, t4, t3, p384_mod);
+ /* Z3 = H*Z1*Z2 */
+ /* ZS = H*Z1*Z2 */
+ sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(z, z, t2, p384_mod, p384_mp_mod);
+ XMEMCPY(zs, z, sizeof(p->z)/2);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ /* XS = RS^2 - H^3 - 2*U1*H^2 */
+ sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_6(xs, t6, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(x, x, t5, p384_mod);
+ sp_384_mont_sub_6(xs, xs, t5, p384_mod);
+ sp_384_mont_dbl_6(t1, y, p384_mod);
+ sp_384_mont_sub_6(x, x, t1, p384_mod);
+ sp_384_mont_sub_6(xs, xs, t1, p384_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */
+ sp_384_mont_sub_6(ys, y, xs, p384_mod);
+ sp_384_mont_sub_6(y, y, x, p384_mod);
+ sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod);
+ sp_384_sub_6(t6, p384_mod, t6);
+ sp_384_mont_mul_6(ys, ys, t6, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(y, y, t5, p384_mod);
+ sp_384_mont_sub_6(ys, ys, t5, p384_mod);
+}
+
+/* Structure used to describe recoding of scalar multiplication. */
+typedef struct ecc_recode_384 {
+ /* Index into pre-computation table. */
+ uint8_t i;
+ /* Use the negative of the point. */
+ uint8_t neg;
+} ecc_recode_384;
+
+/* The index into pre-computation table to use. */
+static const uint8_t recode_index_6_6[66] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
+ 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
+ 0, 1,
+};
+
+/* Whether to negate y-ordinate. */
+static const uint8_t recode_neg_6_6[66] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0,
+};
+
+/* Recode the scalar for multiplication using pre-computed values and
+ * subtraction.
+ *
+ * k Scalar to multiply by.
+ * v Vector of operations to perform.
+ */
+static void sp_384_ecc_recode_6_6(const sp_digit* k, ecc_recode_384* v)
+{
+ int i, j;
+ uint8_t y;
+ int carry = 0;
+ int o;
+ sp_digit n;
+
+ j = 0;
+ n = k[j];
+ o = 0;
+ for (i=0; i<65; i++) {
+ y = n;
+ if (o + 6 < 64) {
+ y &= 0x3f;
+ n >>= 6;
+ o += 6;
+ }
+ else if (o + 6 == 64) {
+ n >>= 6;
+ if (++j < 6)
+ n = k[j];
+ o = 0;
+ }
+ else if (++j < 6) {
+ n = k[j];
+ y |= (n << (64 - o)) & 0x3f;
+ o -= 58;
+ n >>= o;
+ }
+
+ y += carry;
+ v[i].i = recode_index_6_6[y];
+ v[i].neg = recode_neg_6_6[y];
+ carry = (y >> 6) + v[i].neg;
+ }
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_win_add_sub_6(sp_point_384* r, const sp_point_384* g,
+ const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 td[33];
+ sp_point_384 rtd, pd;
+ sp_digit tmpd[2 * 6 * 6];
+#endif
+ sp_point_384* t;
+ sp_point_384* rt;
+ sp_point_384* p = NULL;
+ sp_digit* tmp;
+ sp_digit* negy;
+ int i;
+ ecc_recode_384 v[65];
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_6(heap, rtd, rt);
+ if (err == MP_OKAY)
+ err = sp_384_point_new_6(heap, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 33, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ t = td;
+ tmp = tmpd;
+#endif
+
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ err = sp_384_mod_mul_norm_6(t[1].x, g->x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_6(t[1].y, g->y, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_6(t[1].z, g->z, p384_mod);
+ }
+
+ if (err == MP_OKAY) {
+ t[1].infinity = 0;
+ /* t[2] ... t[32] */
+ sp_384_proj_point_dbl_n_store_6(t, &t[ 1], 5, 1, tmp);
+ sp_384_proj_point_add_6(&t[ 3], &t[ 2], &t[ 1], tmp);
+ sp_384_proj_point_dbl_6(&t[ 6], &t[ 3], tmp);
+ sp_384_proj_point_add_sub_6(&t[ 7], &t[ 5], &t[ 6], &t[ 1], tmp);
+ sp_384_proj_point_dbl_6(&t[10], &t[ 5], tmp);
+ sp_384_proj_point_add_sub_6(&t[11], &t[ 9], &t[10], &t[ 1], tmp);
+ sp_384_proj_point_dbl_6(&t[12], &t[ 6], tmp);
+ sp_384_proj_point_dbl_6(&t[14], &t[ 7], tmp);
+ sp_384_proj_point_add_sub_6(&t[15], &t[13], &t[14], &t[ 1], tmp);
+ sp_384_proj_point_dbl_6(&t[18], &t[ 9], tmp);
+ sp_384_proj_point_add_sub_6(&t[19], &t[17], &t[18], &t[ 1], tmp);
+ sp_384_proj_point_dbl_6(&t[20], &t[10], tmp);
+ sp_384_proj_point_dbl_6(&t[22], &t[11], tmp);
+ sp_384_proj_point_add_sub_6(&t[23], &t[21], &t[22], &t[ 1], tmp);
+ sp_384_proj_point_dbl_6(&t[24], &t[12], tmp);
+ sp_384_proj_point_dbl_6(&t[26], &t[13], tmp);
+ sp_384_proj_point_add_sub_6(&t[27], &t[25], &t[26], &t[ 1], tmp);
+ sp_384_proj_point_dbl_6(&t[28], &t[14], tmp);
+ sp_384_proj_point_dbl_6(&t[30], &t[15], tmp);
+ sp_384_proj_point_add_sub_6(&t[31], &t[29], &t[30], &t[ 1], tmp);
+
+ negy = t[0].y;
+
+ sp_384_ecc_recode_6_6(k, v);
+
+ i = 64;
+ XMEMCPY(rt, &t[v[i].i], sizeof(sp_point_384));
+ for (--i; i>=0; i--) {
+ sp_384_proj_point_dbl_n_6(rt, 6, tmp);
+
+ XMEMCPY(p, &t[v[i].i], sizeof(sp_point_384));
+ sp_384_sub_6(negy, p384_mod, p->y);
+ sp_384_cond_copy_6(p->y, negy, (sp_digit)0 - v[i].neg);
+ sp_384_proj_point_add_6(rt, rt, p, tmp);
+ }
+
+ if (map != 0) {
+ sp_384_map_6(r, rt, tmp);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL)
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ if (tmp != NULL)
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_384_point_free_6(p, 0, heap);
+ sp_384_point_free_6(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef HAVE_INTEL_AVX2
+#ifdef HAVE_INTEL_AVX2
+extern void sp_384_mul_avx2_6(sp_digit* r, const sp_digit* a, const sp_digit* b);
+#define sp_384_mont_reduce_avx2_6 sp_384_mont_reduce_6
+extern void sp_384_mont_reduce_order_avx2_6(sp_digit* a, const sp_digit* m, sp_digit mp);
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_mul_avx2_6(sp_digit* r, const sp_digit* a, const sp_digit* b,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_384_mul_avx2_6(r, a, b);
+ sp_384_mont_reduce_avx2_6(r, m, mp);
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+#ifdef HAVE_INTEL_AVX2
+extern void sp_384_sqr_avx2_6(sp_digit* r, const sp_digit* a);
+/* Square the Montgomery form number. (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_avx2_6(sp_digit* r, const sp_digit* a, const sp_digit* m,
+ sp_digit mp)
+{
+ sp_384_sqr_avx2_6(r, a);
+ sp_384_mont_reduce_avx2_6(r, m, mp);
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
+/* Square the Montgomery form number a number of times. (r = a ^ n mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * n Number of times to square.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+static void sp_384_mont_sqr_n_avx2_6(sp_digit* r, const sp_digit* a, int n,
+ const sp_digit* m, sp_digit mp)
+{
+ sp_384_mont_sqr_avx2_6(r, a, m, mp);
+ for (; n > 1; n--) {
+ sp_384_mont_sqr_avx2_6(r, r, m, mp);
+ }
+}
+
+#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
+
+/* Invert the number, in Montgomery form, modulo the modulus (prime) of the
+ * P384 curve. (r = 1 / a mod m)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_384_mont_inv_avx2_6(sp_digit* r, const sp_digit* a, sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 6);
+ for (i=382; i>=0; i--) {
+ sp_384_mont_sqr_avx2_6(t, t, p384_mod, p384_mp_mod);
+ if (p384_mod_minus_2[i / 64] & ((sp_digit)1 << (i % 64)))
+ sp_384_mont_mul_avx2_6(t, t, a, p384_mod, p384_mp_mod);
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 6);
+#else
+ sp_digit* t1 = td;
+ sp_digit* t2 = td + 2 * 6;
+ sp_digit* t3 = td + 4 * 6;
+ sp_digit* t4 = td + 6 * 6;
+ sp_digit* t5 = td + 8 * 6;
+
+ /* 0x2 */
+ sp_384_mont_sqr_avx2_6(t1, a, p384_mod, p384_mp_mod);
+ /* 0x3 */
+ sp_384_mont_mul_avx2_6(t5, t1, a, p384_mod, p384_mp_mod);
+ /* 0xc */
+ sp_384_mont_sqr_n_avx2_6(t1, t5, 2, p384_mod, p384_mp_mod);
+ /* 0xf */
+ sp_384_mont_mul_avx2_6(t2, t5, t1, p384_mod, p384_mp_mod);
+ /* 0x1e */
+ sp_384_mont_sqr_avx2_6(t1, t2, p384_mod, p384_mp_mod);
+ /* 0x1f */
+ sp_384_mont_mul_avx2_6(t4, t1, a, p384_mod, p384_mp_mod);
+ /* 0x3e0 */
+ sp_384_mont_sqr_n_avx2_6(t1, t4, 5, p384_mod, p384_mp_mod);
+ /* 0x3ff */
+ sp_384_mont_mul_avx2_6(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0x7fe0 */
+ sp_384_mont_sqr_n_avx2_6(t1, t2, 5, p384_mod, p384_mp_mod);
+ /* 0x7fff */
+ sp_384_mont_mul_avx2_6(t4, t4, t1, p384_mod, p384_mp_mod);
+ /* 0x3fff8000 */
+ sp_384_mont_sqr_n_avx2_6(t1, t4, 15, p384_mod, p384_mp_mod);
+ /* 0x3fffffff */
+ sp_384_mont_mul_avx2_6(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffc */
+ sp_384_mont_sqr_n_avx2_6(t3, t2, 2, p384_mod, p384_mp_mod);
+ /* 0xfffffffd */
+ sp_384_mont_mul_avx2_6(r, t3, a, p384_mod, p384_mp_mod);
+ /* 0xffffffff */
+ sp_384_mont_mul_avx2_6(t3, t5, t3, p384_mod, p384_mp_mod);
+ /* 0xfffffffc0000000 */
+ sp_384_mont_sqr_n_avx2_6(t1, t2, 30, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffff */
+ sp_384_mont_mul_avx2_6(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffff000000000000000 */
+ sp_384_mont_sqr_n_avx2_6(t1, t2, 60, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_avx2_6(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+ sp_384_mont_sqr_n_avx2_6(t1, t2, 120, p384_mod, p384_mp_mod);
+ /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_avx2_6(t2, t2, t1, p384_mod, p384_mp_mod);
+ /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+ sp_384_mont_sqr_n_avx2_6(t1, t2, 15, p384_mod, p384_mp_mod);
+ /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_avx2_6(t2, t4, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */
+ sp_384_mont_sqr_n_avx2_6(t1, t2, 33, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */
+ sp_384_mont_mul_avx2_6(t2, t3, t1, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */
+ sp_384_mont_sqr_n_avx2_6(t1, t2, 96, p384_mod, p384_mp_mod);
+ /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */
+ sp_384_mont_mul_avx2_6(r, r, t1, p384_mod, p384_mp_mod);
+
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+/* Map the Montgomery form projective coordinate point to an affine point.
+ *
+ * r Resulting affine coordinate point.
+ * p Montgomery form projective coordinate point.
+ * t Temporary ordinate data.
+ */
+static void sp_384_map_avx2_6(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*6;
+ int64_t n;
+
+ sp_384_mont_inv_avx2_6(t1, p->z, t + 2*6);
+
+ sp_384_mont_sqr_avx2_6(t2, t1, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(t1, t2, t1, p384_mod, p384_mp_mod);
+
+ /* x /= z^2 */
+ sp_384_mont_mul_avx2_6(r->x, p->x, t2, p384_mod, p384_mp_mod);
+ XMEMSET(r->x + 6, 0, sizeof(r->x) / 2U);
+ sp_384_mont_reduce_6(r->x, p384_mod, p384_mp_mod);
+ /* Reduce x to less than modulus */
+ n = sp_384_cmp_6(r->x, p384_mod);
+ sp_384_cond_sub_6(r->x, r->x, p384_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_6(r->x);
+
+ /* y /= z^3 */
+ sp_384_mont_mul_avx2_6(r->y, p->y, t1, p384_mod, p384_mp_mod);
+ XMEMSET(r->y + 6, 0, sizeof(r->y) / 2U);
+ sp_384_mont_reduce_6(r->y, p384_mod, p384_mp_mod);
+ /* Reduce y to less than modulus */
+ n = sp_384_cmp_6(r->y, p384_mod);
+ sp_384_cond_sub_6(r->y, r->y, p384_mod, 0 - ((n >= 0) ?
+ (sp_digit)1 : (sp_digit)0));
+ sp_384_norm_6(r->y);
+
+ XMEMSET(r->z, 0, sizeof(r->z));
+ r->z[0] = 1;
+
+}
+
+/* Double the Montgomery form projective point p.
+ *
+ * r Result of doubling point.
+ * p Point to double.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_avx2_6(sp_point_384* r, const sp_point_384* p, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*6;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = r->x;
+ y = r->y;
+ z = r->z;
+ /* Put infinity into result. */
+ if (r != p) {
+ r->infinity = p->infinity;
+ }
+
+ /* T1 = Z * Z */
+ sp_384_mont_sqr_avx2_6(t1, p->z, p384_mod, p384_mp_mod);
+ /* Z = Y * Z */
+ sp_384_mont_mul_avx2_6(z, p->y, p->z, p384_mod, p384_mp_mod);
+ /* Z = 2Z */
+ sp_384_mont_dbl_6(z, z, p384_mod);
+ /* T2 = X - T1 */
+ sp_384_mont_sub_6(t2, p->x, t1, p384_mod);
+ /* T1 = X + T1 */
+ sp_384_mont_add_6(t1, p->x, t1, p384_mod);
+ /* T2 = T1 * T2 */
+ sp_384_mont_mul_avx2_6(t2, t1, t2, p384_mod, p384_mp_mod);
+ /* T1 = 3T2 */
+ sp_384_mont_tpl_6(t1, t2, p384_mod);
+ /* Y = 2Y */
+ sp_384_mont_dbl_6(y, p->y, p384_mod);
+ /* Y = Y * Y */
+ sp_384_mont_sqr_avx2_6(y, y, p384_mod, p384_mp_mod);
+ /* T2 = Y * Y */
+ sp_384_mont_sqr_avx2_6(t2, y, p384_mod, p384_mp_mod);
+ /* T2 = T2/2 */
+ sp_384_div2_6(t2, t2, p384_mod);
+ /* Y = Y * X */
+ sp_384_mont_mul_avx2_6(y, y, p->x, p384_mod, p384_mp_mod);
+ /* X = T1 * T1 */
+ sp_384_mont_sqr_avx2_6(x, t1, p384_mod, p384_mp_mod);
+ /* X = X - Y */
+ sp_384_mont_sub_6(x, x, y, p384_mod);
+ /* X = X - Y */
+ sp_384_mont_sub_6(x, x, y, p384_mod);
+ /* Y = Y - X */
+ sp_384_mont_sub_6(y, y, x, p384_mod);
+ /* Y = Y * T1 */
+ sp_384_mont_mul_avx2_6(y, y, t1, p384_mod, p384_mp_mod);
+ /* Y = Y - T2 */
+ sp_384_mont_sub_6(y, y, t2, p384_mod);
+}
+
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_n_avx2_6(sp_point_384* p, int n, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*6;
+ sp_digit* b = t + 4*6;
+ sp_digit* t1 = t + 6*6;
+ sp_digit* t2 = t + 8*6;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+
+ x = p->x;
+ y = p->y;
+ z = p->z;
+
+ /* Y = 2*Y */
+ sp_384_mont_dbl_6(y, y, p384_mod);
+ /* W = Z^4 */
+ sp_384_mont_sqr_avx2_6(w, z, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_avx2_6(w, w, p384_mod, p384_mp_mod);
+
+#ifndef WOLFSSL_SP_SMALL
+ while (--n > 0)
+#else
+ while (--n >= 0)
+#endif
+ {
+ /* A = 3*(X^2 - W) */
+ sp_384_mont_sqr_avx2_6(t1, x, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(t1, t1, w, p384_mod);
+ sp_384_mont_tpl_6(a, t1, p384_mod);
+ /* B = X*Y^2 */
+ sp_384_mont_sqr_avx2_6(t1, y, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(b, t1, x, p384_mod, p384_mp_mod);
+ /* X = A^2 - 2B */
+ sp_384_mont_sqr_avx2_6(x, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_6(t2, b, p384_mod);
+ sp_384_mont_sub_6(x, x, t2, p384_mod);
+ /* Z = Z*Y */
+ sp_384_mont_mul_avx2_6(z, z, y, p384_mod, p384_mp_mod);
+ /* t2 = Y^4 */
+ sp_384_mont_sqr_avx2_6(t1, t1, p384_mod, p384_mp_mod);
+#ifdef WOLFSSL_SP_SMALL
+ if (n != 0)
+#endif
+ {
+ /* W = W*Y^4 */
+ sp_384_mont_mul_avx2_6(w, w, t1, p384_mod, p384_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_384_mont_sub_6(y, b, x, p384_mod);
+ sp_384_mont_mul_avx2_6(y, y, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_6(y, y, p384_mod);
+ sp_384_mont_sub_6(y, y, t1, p384_mod);
+ }
+#ifndef WOLFSSL_SP_SMALL
+ /* A = 3*(X^2 - W) */
+ sp_384_mont_sqr_avx2_6(t1, x, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(t1, t1, w, p384_mod);
+ sp_384_mont_tpl_6(a, t1, p384_mod);
+ /* B = X*Y^2 */
+ sp_384_mont_sqr_avx2_6(t1, y, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(b, t1, x, p384_mod, p384_mp_mod);
+ /* X = A^2 - 2B */
+ sp_384_mont_sqr_avx2_6(x, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_6(t2, b, p384_mod);
+ sp_384_mont_sub_6(x, x, t2, p384_mod);
+ /* Z = Z*Y */
+ sp_384_mont_mul_avx2_6(z, z, y, p384_mod, p384_mp_mod);
+ /* t2 = Y^4 */
+ sp_384_mont_sqr_avx2_6(t1, t1, p384_mod, p384_mp_mod);
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_384_mont_sub_6(y, b, x, p384_mod);
+ sp_384_mont_mul_avx2_6(y, y, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_6(y, y, p384_mod);
+ sp_384_mont_sub_6(y, y, t1, p384_mod);
+#endif
+ /* Y = Y/2 */
+ sp_384_div2_6(y, y, p384_mod);
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_avx2_6(sp_point_384* r, const sp_point_384* p, const sp_point_384* q,
+ sp_digit* t)
+{
+ const sp_point_384* ap[2];
+ sp_point_384* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*6;
+ sp_digit* t3 = t + 4*6;
+ sp_digit* t4 = t + 6*6;
+ sp_digit* t5 = t + 8*6;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Ensure only the first point is the same as the result. */
+ if (q == r) {
+ const sp_point_384* a = p;
+ p = q;
+ q = a;
+ }
+
+ /* Check double */
+ (void)sp_384_sub_6(t1, p384_mod, q->y);
+ sp_384_norm_6(t1);
+ if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) &
+ (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) {
+ sp_384_proj_point_dbl_6(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_384));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<6; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<6; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<6; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U1 = X1*Z2^2 */
+ sp_384_mont_sqr_avx2_6(t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(t3, t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(t1, t1, x, p384_mod, p384_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_384_mont_sqr_avx2_6(t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(t4, t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(t2, t2, q->x, p384_mod, p384_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_384_mont_mul_avx2_6(t3, t3, y, p384_mod, p384_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_384_mont_mul_avx2_6(t4, t4, q->y, p384_mod, p384_mp_mod);
+ /* H = U2 - U1 */
+ sp_384_mont_sub_6(t2, t2, t1, p384_mod);
+ /* R = S2 - S1 */
+ sp_384_mont_sub_6(t4, t4, t3, p384_mod);
+ /* Z3 = H*Z1*Z2 */
+ sp_384_mont_mul_avx2_6(z, z, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(z, z, t2, p384_mod, p384_mp_mod);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ sp_384_mont_sqr_avx2_6(x, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_avx2_6(t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(y, t1, t5, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(t5, t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(x, x, t5, p384_mod);
+ sp_384_mont_dbl_6(t1, y, p384_mod);
+ sp_384_mont_sub_6(x, x, t1, p384_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ sp_384_mont_sub_6(y, y, x, p384_mod);
+ sp_384_mont_mul_avx2_6(y, y, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(t5, t5, t3, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(y, y, t5, p384_mod);
+ }
+}
+
+/* Double the Montgomery form projective point p a number of times.
+ *
+ * r Result of repeated doubling of point.
+ * p Point to double.
+ * n Number of times to double
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_dbl_n_store_avx2_6(sp_point_384* r, const sp_point_384* p,
+ int n, int m, sp_digit* t)
+{
+ sp_digit* w = t;
+ sp_digit* a = t + 2*6;
+ sp_digit* b = t + 4*6;
+ sp_digit* t1 = t + 6*6;
+ sp_digit* t2 = t + 8*6;
+ sp_digit* x = r[2*m].x;
+ sp_digit* y = r[(1<<n)*m].y;
+ sp_digit* z = r[2*m].z;
+ int i;
+
+ for (i=0; i<6; i++) {
+ x[i] = p->x[i];
+ }
+ for (i=0; i<6; i++) {
+ y[i] = p->y[i];
+ }
+ for (i=0; i<6; i++) {
+ z[i] = p->z[i];
+ }
+
+ /* Y = 2*Y */
+ sp_384_mont_dbl_6(y, y, p384_mod);
+ /* W = Z^4 */
+ sp_384_mont_sqr_avx2_6(w, z, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_avx2_6(w, w, p384_mod, p384_mp_mod);
+ for (i=1; i<=n; i++) {
+ /* A = 3*(X^2 - W) */
+ sp_384_mont_sqr_avx2_6(t1, x, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(t1, t1, w, p384_mod);
+ sp_384_mont_tpl_6(a, t1, p384_mod);
+ /* B = X*Y^2 */
+ sp_384_mont_sqr_avx2_6(t2, y, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(b, t2, x, p384_mod, p384_mp_mod);
+ x = r[(1<<i)*m].x;
+ /* X = A^2 - 2B */
+ sp_384_mont_sqr_avx2_6(x, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_6(t1, b, p384_mod);
+ sp_384_mont_sub_6(x, x, t1, p384_mod);
+ /* Z = Z*Y */
+ sp_384_mont_mul_avx2_6(r[(1<<i)*m].z, z, y, p384_mod, p384_mp_mod);
+ z = r[(1<<i)*m].z;
+ /* t2 = Y^4 */
+ sp_384_mont_sqr_avx2_6(t2, t2, p384_mod, p384_mp_mod);
+ if (i != n) {
+ /* W = W*Y^4 */
+ sp_384_mont_mul_avx2_6(w, w, t2, p384_mod, p384_mp_mod);
+ }
+ /* y = 2*A*(B - X) - Y^4 */
+ sp_384_mont_sub_6(y, b, x, p384_mod);
+ sp_384_mont_mul_avx2_6(y, y, a, p384_mod, p384_mp_mod);
+ sp_384_mont_dbl_6(y, y, p384_mod);
+ sp_384_mont_sub_6(y, y, t2, p384_mod);
+
+ /* Y = Y/2 */
+ sp_384_div2_6(r[(1<<i)*m].y, y, p384_mod);
+ r[(1<<i)*m].infinity = 0;
+ }
+}
+
+/* Add two Montgomery form projective points.
+ *
+ * ra Result of addition.
+ * rs Result of subtraction.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_sub_avx2_6(sp_point_384* ra, sp_point_384* rs,
+ const sp_point_384* p, const sp_point_384* q, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*6;
+ sp_digit* t3 = t + 4*6;
+ sp_digit* t4 = t + 6*6;
+ sp_digit* t5 = t + 8*6;
+ sp_digit* t6 = t + 10*6;
+ sp_digit* x = ra->x;
+ sp_digit* y = ra->y;
+ sp_digit* z = ra->z;
+ sp_digit* xs = rs->x;
+ sp_digit* ys = rs->y;
+ sp_digit* zs = rs->z;
+
+
+ XMEMCPY(x, p->x, sizeof(p->x) / 2);
+ XMEMCPY(y, p->y, sizeof(p->y) / 2);
+ XMEMCPY(z, p->z, sizeof(p->z) / 2);
+ ra->infinity = 0;
+ rs->infinity = 0;
+
+ /* U1 = X1*Z2^2 */
+ sp_384_mont_sqr_avx2_6(t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(t3, t1, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(t1, t1, x, p384_mod, p384_mp_mod);
+ /* U2 = X2*Z1^2 */
+ sp_384_mont_sqr_avx2_6(t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(t4, t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(t2, t2, q->x, p384_mod, p384_mp_mod);
+ /* S1 = Y1*Z2^3 */
+ sp_384_mont_mul_avx2_6(t3, t3, y, p384_mod, p384_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_384_mont_mul_avx2_6(t4, t4, q->y, p384_mod, p384_mp_mod);
+ /* H = U2 - U1 */
+ sp_384_mont_sub_6(t2, t2, t1, p384_mod);
+ /* RS = S2 + S1 */
+ sp_384_mont_add_6(t6, t4, t3, p384_mod);
+ /* R = S2 - S1 */
+ sp_384_mont_sub_6(t4, t4, t3, p384_mod);
+ /* Z3 = H*Z1*Z2 */
+ /* ZS = H*Z1*Z2 */
+ sp_384_mont_mul_avx2_6(z, z, q->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(z, z, t2, p384_mod, p384_mp_mod);
+ XMEMCPY(zs, z, sizeof(p->z)/2);
+ /* X3 = R^2 - H^3 - 2*U1*H^2 */
+ /* XS = RS^2 - H^3 - 2*U1*H^2 */
+ sp_384_mont_sqr_avx2_6(x, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_avx2_6(xs, t6, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_avx2_6(t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(y, t1, t5, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(t5, t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(x, x, t5, p384_mod);
+ sp_384_mont_sub_6(xs, xs, t5, p384_mod);
+ sp_384_mont_dbl_6(t1, y, p384_mod);
+ sp_384_mont_sub_6(x, x, t1, p384_mod);
+ sp_384_mont_sub_6(xs, xs, t1, p384_mod);
+ /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
+ /* YS = -RS*(U1*H^2 - XS) - S1*H^3 */
+ sp_384_mont_sub_6(ys, y, xs, p384_mod);
+ sp_384_mont_sub_6(y, y, x, p384_mod);
+ sp_384_mont_mul_avx2_6(y, y, t4, p384_mod, p384_mp_mod);
+ sp_384_sub_6(t6, p384_mod, t6);
+ sp_384_mont_mul_avx2_6(ys, ys, t6, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(t5, t5, t3, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(y, y, t5, p384_mod);
+ sp_384_mont_sub_6(ys, ys, t5, p384_mod);
+}
+
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_win_add_sub_avx2_6(sp_point_384* r, const sp_point_384* g,
+ const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 td[33];
+ sp_point_384 rtd, pd;
+ sp_digit tmpd[2 * 6 * 6];
+#endif
+ sp_point_384* t;
+ sp_point_384* rt;
+ sp_point_384* p = NULL;
+ sp_digit* tmp;
+ sp_digit* negy;
+ int i;
+ ecc_recode_384 v[65];
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_6(heap, rtd, rt);
+ if (err == MP_OKAY)
+ err = sp_384_point_new_6(heap, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 33, heap, DYNAMIC_TYPE_ECC);
+ if (t == NULL)
+ err = MEMORY_E;
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL)
+ err = MEMORY_E;
+#else
+ t = td;
+ tmp = tmpd;
+#endif
+
+
+ if (err == MP_OKAY) {
+ /* t[0] = {0, 0, 1} * norm */
+ XMEMSET(&t[0], 0, sizeof(t[0]));
+ t[0].infinity = 1;
+ /* t[1] = {g->x, g->y, g->z} * norm */
+ err = sp_384_mod_mul_norm_6(t[1].x, g->x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_6(t[1].y, g->y, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_6(t[1].z, g->z, p384_mod);
+ }
+
+ if (err == MP_OKAY) {
+ t[1].infinity = 0;
+ /* t[2] ... t[32] */
+ sp_384_proj_point_dbl_n_store_avx2_6(t, &t[ 1], 5, 1, tmp);
+ sp_384_proj_point_add_avx2_6(&t[ 3], &t[ 2], &t[ 1], tmp);
+ sp_384_proj_point_dbl_avx2_6(&t[ 6], &t[ 3], tmp);
+ sp_384_proj_point_add_sub_avx2_6(&t[ 7], &t[ 5], &t[ 6], &t[ 1], tmp);
+ sp_384_proj_point_dbl_avx2_6(&t[10], &t[ 5], tmp);
+ sp_384_proj_point_add_sub_avx2_6(&t[11], &t[ 9], &t[10], &t[ 1], tmp);
+ sp_384_proj_point_dbl_avx2_6(&t[12], &t[ 6], tmp);
+ sp_384_proj_point_dbl_avx2_6(&t[14], &t[ 7], tmp);
+ sp_384_proj_point_add_sub_avx2_6(&t[15], &t[13], &t[14], &t[ 1], tmp);
+ sp_384_proj_point_dbl_avx2_6(&t[18], &t[ 9], tmp);
+ sp_384_proj_point_add_sub_avx2_6(&t[19], &t[17], &t[18], &t[ 1], tmp);
+ sp_384_proj_point_dbl_avx2_6(&t[20], &t[10], tmp);
+ sp_384_proj_point_dbl_avx2_6(&t[22], &t[11], tmp);
+ sp_384_proj_point_add_sub_avx2_6(&t[23], &t[21], &t[22], &t[ 1], tmp);
+ sp_384_proj_point_dbl_avx2_6(&t[24], &t[12], tmp);
+ sp_384_proj_point_dbl_avx2_6(&t[26], &t[13], tmp);
+ sp_384_proj_point_add_sub_avx2_6(&t[27], &t[25], &t[26], &t[ 1], tmp);
+ sp_384_proj_point_dbl_avx2_6(&t[28], &t[14], tmp);
+ sp_384_proj_point_dbl_avx2_6(&t[30], &t[15], tmp);
+ sp_384_proj_point_add_sub_avx2_6(&t[31], &t[29], &t[30], &t[ 1], tmp);
+
+ negy = t[0].y;
+
+ sp_384_ecc_recode_6_6(k, v);
+
+ i = 64;
+ XMEMCPY(rt, &t[v[i].i], sizeof(sp_point_384));
+ for (--i; i>=0; i--) {
+ sp_384_proj_point_dbl_n_avx2_6(rt, 6, tmp);
+
+ XMEMCPY(p, &t[v[i].i], sizeof(sp_point_384));
+ sp_384_sub_6(negy, p384_mod, p->y);
+ sp_384_cond_copy_6(p->y, negy, (sp_digit)0 - v[i].neg);
+ sp_384_proj_point_add_avx2_6(rt, rt, p, tmp);
+ }
+
+ if (map != 0) {
+ sp_384_map_avx2_6(r, rt, tmp);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL)
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ if (tmp != NULL)
+ XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_384_point_free_6(p, 0, heap);
+ sp_384_point_free_6(rt, 0, heap);
+
+ return err;
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+/* A table entry for pre-computed points. */
+typedef struct sp_table_entry_384 {
+ sp_digit x[6];
+ sp_digit y[6];
+} sp_table_entry_384;
+
+#ifdef FP_ECC
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_qz1_6(sp_point_384* r, const sp_point_384* p,
+ const sp_point_384* q, sp_digit* t)
+{
+ const sp_point_384* ap[2];
+ sp_point_384* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*6;
+ sp_digit* t3 = t + 4*6;
+ sp_digit* t4 = t + 6*6;
+ sp_digit* t5 = t + 8*6;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Check double */
+ (void)sp_384_sub_6(t1, p384_mod, q->y);
+ sp_384_norm_6(t1);
+ if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) &
+ (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) {
+ sp_384_proj_point_dbl_6(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_384));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<6; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<6; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<6; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U2 = X2*Z1^2 */
+ sp_384_mont_sqr_6(t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t4, t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod);
+ /* H = U2 - X1 */
+ sp_384_mont_sub_6(t2, t2, x, p384_mod);
+ /* R = S2 - Y1 */
+ sp_384_mont_sub_6(t4, t4, y, p384_mod);
+ /* Z3 = H*Z1 */
+ sp_384_mont_mul_6(z, z, t2, p384_mod, p384_mp_mod);
+ /* X3 = R^2 - H^3 - 2*X1*H^2 */
+ sp_384_mont_sqr_6(t1, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t3, x, t5, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(x, t1, t5, p384_mod);
+ sp_384_mont_dbl_6(t1, t3, p384_mod);
+ sp_384_mont_sub_6(x, x, t1, p384_mod);
+ /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+ sp_384_mont_sub_6(t3, t3, x, p384_mod);
+ sp_384_mont_mul_6(t3, t3, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t5, t5, y, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(y, t3, t5, p384_mod);
+ }
+}
+
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a Point to convert.
+ * t Temporary data.
+ */
+static void sp_384_proj_to_affine_6(sp_point_384* a, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2 * 6;
+ sp_digit* tmp = t + 4 * 6;
+
+ sp_384_mont_inv_6(t1, a->z, tmp);
+
+ sp_384_mont_sqr_6(t2, t1, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(t1, t2, t1, p384_mod, p384_mp_mod);
+
+ sp_384_mont_mul_6(a->x, a->x, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(a->y, a->y, t1, p384_mod, p384_mp_mod);
+ XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod));
+}
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_384_gen_stripe_table_6(const sp_point_384* a,
+ sp_table_entry_384* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 td, s1d, s2d;
+#endif
+ sp_point_384* t;
+ sp_point_384* s1 = NULL;
+ sp_point_384* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_6(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_6(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_6(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_6(t->x, a->x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_6(t->y, a->y, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_6(t->z, a->z, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_384_proj_to_affine_6(t, tmp);
+
+ XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<8; i++) {
+ sp_384_proj_point_dbl_n_6(t, 48, tmp);
+ sp_384_proj_to_affine_6(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<8; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_384_proj_point_add_qz1_6(t, s1, s2, tmp);
+ sp_384_proj_to_affine_6(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_384_point_free_6(s2, 0, heap);
+ sp_384_point_free_6(s1, 0, heap);
+ sp_384_point_free_6( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_stripe_6(sp_point_384* r, const sp_point_384* g,
+ const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 rtd;
+ sp_point_384 pd;
+ sp_digit td[2 * 6 * 6];
+#endif
+ sp_point_384* rt;
+ sp_point_384* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_384_point_new_6(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_6(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
+ XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
+
+ y = 0;
+ for (j=0,x=47; j<8; j++,x+=48) {
+ y |= ((k[x / 64] >> (x % 64)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=46; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<8; j++,x+=48) {
+ y |= ((k[x / 64] >> (x % 64)) & 1) << j;
+ }
+
+ sp_384_proj_point_dbl_6(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_384_proj_point_add_qz1_6(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_384_map_6(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_6(p, 0, heap);
+ sp_384_point_free_6(rt, 0, heap);
+
+ return err;
+}
+
+#ifdef FP_ECC
+#ifndef FP_ENTRIES
+ #define FP_ENTRIES 16
+#endif
+
+typedef struct sp_cache_384_t {
+ sp_digit x[6];
+ sp_digit y[6];
+ sp_table_entry_384 table[256];
+ uint32_t cnt;
+ int set;
+} sp_cache_384_t;
+
+static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
+static THREAD_LS_T int sp_cache_384_last = -1;
+static THREAD_LS_T int sp_cache_384_inited = 0;
+
+#ifndef HAVE_THREAD_LS
+ static volatile int initCacheMutex_384 = 0;
+ static wolfSSL_Mutex sp_cache_384_lock;
+#endif
+
+static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
+{
+ int i, j;
+ uint32_t least;
+
+ if (sp_cache_384_inited == 0) {
+ for (i=0; i<FP_ENTRIES; i++) {
+ sp_cache_384[i].set = 0;
+ }
+ sp_cache_384_inited = 1;
+ }
+
+ /* Compare point with those in cache. */
+ for (i=0; i<FP_ENTRIES; i++) {
+ if (!sp_cache_384[i].set)
+ continue;
+
+ if (sp_384_cmp_equal_6(g->x, sp_cache_384[i].x) &
+ sp_384_cmp_equal_6(g->y, sp_cache_384[i].y)) {
+ sp_cache_384[i].cnt++;
+ break;
+ }
+ }
+
+ /* No match. */
+ if (i == FP_ENTRIES) {
+ /* Find empty entry. */
+ i = (sp_cache_384_last + 1) % FP_ENTRIES;
+ for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
+ if (!sp_cache_384[i].set) {
+ break;
+ }
+ }
+
+ /* Evict least used. */
+ if (i == sp_cache_384_last) {
+ least = sp_cache_384[0].cnt;
+ for (j=1; j<FP_ENTRIES; j++) {
+ if (sp_cache_384[j].cnt < least) {
+ i = j;
+ least = sp_cache_384[i].cnt;
+ }
+ }
+ }
+
+ XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
+ XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
+ sp_cache_384[i].set = 1;
+ sp_cache_384[i].cnt = 1;
+ }
+
+ *cache = &sp_cache_384[i];
+ sp_cache_384_last = i;
+}
+#endif /* FP_ECC */
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_6(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_384_ecc_mulmod_win_add_sub_6(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 6 * 7];
+ sp_cache_384_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_384 == 0) {
+ wc_InitMutex(&sp_cache_384_lock);
+ initCacheMutex_384 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_384_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_384(g, &cache);
+ if (cache->cnt == 2)
+ sp_384_gen_stripe_table_6(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_384_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_384_ecc_mulmod_win_add_sub_6(r, g, k, map, heap);
+ }
+ else {
+ err = sp_384_ecc_mulmod_stripe_6(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#ifdef HAVE_INTEL_AVX2
+#ifdef FP_ECC
+#endif /* FP_ECC */
+/* Add two Montgomery form projective points. The second point has a q value of
+ * one.
+ * Only the first point can be the same pointer as the result point.
+ *
+ * r Result of addition.
+ * p First point to add.
+ * q Second point to add.
+ * t Temporary ordinate data.
+ */
+static void sp_384_proj_point_add_qz1_avx2_6(sp_point_384* r, const sp_point_384* p,
+ const sp_point_384* q, sp_digit* t)
+{
+ const sp_point_384* ap[2];
+ sp_point_384* rp[2];
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2*6;
+ sp_digit* t3 = t + 4*6;
+ sp_digit* t4 = t + 6*6;
+ sp_digit* t5 = t + 8*6;
+ sp_digit* x;
+ sp_digit* y;
+ sp_digit* z;
+ int i;
+
+ /* Check double */
+ (void)sp_384_sub_6(t1, p384_mod, q->y);
+ sp_384_norm_6(t1);
+ if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) &
+ (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) {
+ sp_384_proj_point_dbl_6(r, p, t);
+ }
+ else {
+ rp[0] = r;
+
+ /*lint allow cast to different type of pointer*/
+ rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
+ XMEMSET(rp[1], 0, sizeof(sp_point_384));
+ x = rp[p->infinity | q->infinity]->x;
+ y = rp[p->infinity | q->infinity]->y;
+ z = rp[p->infinity | q->infinity]->z;
+
+ ap[0] = p;
+ ap[1] = q;
+ for (i=0; i<6; i++) {
+ r->x[i] = ap[p->infinity]->x[i];
+ }
+ for (i=0; i<6; i++) {
+ r->y[i] = ap[p->infinity]->y[i];
+ }
+ for (i=0; i<6; i++) {
+ r->z[i] = ap[p->infinity]->z[i];
+ }
+ r->infinity = ap[p->infinity]->infinity;
+
+ /* U2 = X2*Z1^2 */
+ sp_384_mont_sqr_avx2_6(t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(t4, t2, z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(t2, t2, q->x, p384_mod, p384_mp_mod);
+ /* S2 = Y2*Z1^3 */
+ sp_384_mont_mul_avx2_6(t4, t4, q->y, p384_mod, p384_mp_mod);
+ /* H = U2 - X1 */
+ sp_384_mont_sub_6(t2, t2, x, p384_mod);
+ /* R = S2 - Y1 */
+ sp_384_mont_sub_6(t4, t4, y, p384_mod);
+ /* Z3 = H*Z1 */
+ sp_384_mont_mul_avx2_6(z, z, t2, p384_mod, p384_mp_mod);
+ /* X3 = R^2 - H^3 - 2*X1*H^2 */
+ sp_384_mont_sqr_avx2_6(t1, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_sqr_avx2_6(t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(t3, x, t5, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(t5, t5, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(x, t1, t5, p384_mod);
+ sp_384_mont_dbl_6(t1, t3, p384_mod);
+ sp_384_mont_sub_6(x, x, t1, p384_mod);
+ /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
+ sp_384_mont_sub_6(t3, t3, x, p384_mod);
+ sp_384_mont_mul_avx2_6(t3, t3, t4, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(t5, t5, y, p384_mod, p384_mp_mod);
+ sp_384_mont_sub_6(y, t3, t5, p384_mod);
+ }
+}
+
+#ifdef FP_ECC
+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a Point to convert.
+ * t Temporary data.
+ */
+static void sp_384_proj_to_affine_avx2_6(sp_point_384* a, sp_digit* t)
+{
+ sp_digit* t1 = t;
+ sp_digit* t2 = t + 2 * 6;
+ sp_digit* tmp = t + 4 * 6;
+
+ sp_384_mont_inv_avx2_6(t1, a->z, tmp);
+
+ sp_384_mont_sqr_avx2_6(t2, t1, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(t1, t2, t1, p384_mod, p384_mp_mod);
+
+ sp_384_mont_mul_avx2_6(a->x, a->x, t2, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(a->y, a->y, t1, p384_mod, p384_mp_mod);
+ XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod));
+}
+
+/* Generate the pre-computed table of points for the base point.
+ *
+ * a The base point.
+ * table Place to store generated point data.
+ * tmp Temporary data.
+ * heap Heap to use for allocation.
+ */
+static int sp_384_gen_stripe_table_avx2_6(const sp_point_384* a,
+ sp_table_entry_384* table, sp_digit* tmp, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 td, s1d, s2d;
+#endif
+ sp_point_384* t;
+ sp_point_384* s1 = NULL;
+ sp_point_384* s2 = NULL;
+ int i, j;
+ int err;
+
+ (void)heap;
+
+ err = sp_384_point_new_6(heap, td, t);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_6(heap, s1d, s1);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_6(heap, s2d, s2);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_6(t->x, a->x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_6(t->y, a->y, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_mod_mul_norm_6(t->z, a->z, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ t->infinity = 0;
+ sp_384_proj_to_affine_avx2_6(t, tmp);
+
+ XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s1->infinity = 0;
+ XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
+ s2->infinity = 0;
+
+ /* table[0] = {0, 0, infinity} */
+ XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
+ /* table[1] = Affine version of 'a' in Montgomery form */
+ XMEMCPY(table[1].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1].y, t->y, sizeof(table->y));
+
+ for (i=1; i<8; i++) {
+ sp_384_proj_point_dbl_n_avx2_6(t, 48, tmp);
+ sp_384_proj_to_affine_avx2_6(t, tmp);
+ XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
+ XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
+ }
+
+ for (i=1; i<8; i++) {
+ XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
+ XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
+ for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
+ XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
+ XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
+ sp_384_proj_point_add_qz1_avx2_6(t, s1, s2, tmp);
+ sp_384_proj_to_affine_avx2_6(t, tmp);
+ XMEMCPY(table[j].x, t->x, sizeof(table->x));
+ XMEMCPY(table[j].y, t->y, sizeof(table->y));
+ }
+ }
+ }
+
+ sp_384_point_free_6(s2, 0, heap);
+ sp_384_point_free_6(s1, 0, heap);
+ sp_384_point_free_6( t, 0, heap);
+
+ return err;
+}
+
+#endif /* FP_ECC */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_stripe_avx2_6(sp_point_384* r, const sp_point_384* g,
+ const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 rtd;
+ sp_point_384 pd;
+ sp_digit td[2 * 6 * 6];
+#endif
+ sp_point_384* rt;
+ sp_point_384* p = NULL;
+ sp_digit* t;
+ int i, j;
+ int y, x;
+ int err;
+
+ (void)g;
+ (void)heap;
+
+
+ err = sp_384_point_new_6(heap, rtd, rt);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_6(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (t == NULL) {
+ err = MEMORY_E;
+ }
+#else
+ t = td;
+#endif
+
+ if (err == MP_OKAY) {
+ XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
+ XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
+
+ y = 0;
+ for (j=0,x=47; j<8; j++,x+=48) {
+ y |= ((k[x / 64] >> (x % 64)) & 1) << j;
+ }
+ XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
+ rt->infinity = !y;
+ for (i=46; i>=0; i--) {
+ y = 0;
+ for (j=0,x=i; j<8; j++,x+=48) {
+ y |= ((k[x / 64] >> (x % 64)) & 1) << j;
+ }
+
+ sp_384_proj_point_dbl_avx2_6(rt, rt, t);
+ XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
+ XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
+ p->infinity = !y;
+ sp_384_proj_point_add_qz1_avx2_6(rt, rt, p, t);
+ }
+
+ if (map != 0) {
+ sp_384_map_avx2_6(r, rt, t);
+ }
+ else {
+ XMEMCPY(r, rt, sizeof(sp_point_384));
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (t != NULL) {
+ XFREE(t, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_6(p, 0, heap);
+ sp_384_point_free_6(rt, 0, heap);
+
+ return err;
+}
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * g Point to multiply.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_avx2_6(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
+ int map, void* heap)
+{
+#ifndef FP_ECC
+ return sp_384_ecc_mulmod_win_add_sub_avx2_6(r, g, k, map, heap);
+#else
+ sp_digit tmp[2 * 6 * 7];
+ sp_cache_384_t* cache;
+ int err = MP_OKAY;
+
+#ifndef HAVE_THREAD_LS
+ if (initCacheMutex_384 == 0) {
+ wc_InitMutex(&sp_cache_384_lock);
+ initCacheMutex_384 = 1;
+ }
+ if (wc_LockMutex(&sp_cache_384_lock) != 0)
+ err = BAD_MUTEX_E;
+#endif /* HAVE_THREAD_LS */
+
+ if (err == MP_OKAY) {
+ sp_ecc_get_cache_384(g, &cache);
+ if (cache->cnt == 2)
+ sp_384_gen_stripe_table_avx2_6(g, cache->table, tmp, heap);
+
+#ifndef HAVE_THREAD_LS
+ wc_UnLockMutex(&sp_cache_384_lock);
+#endif /* HAVE_THREAD_LS */
+
+ if (cache->cnt < 2) {
+ err = sp_384_ecc_mulmod_win_add_sub_avx2_6(r, g, k, map, heap);
+ }
+ else {
+ err = sp_384_ecc_mulmod_stripe_avx2_6(r, g, cache->table, k,
+ map, heap);
+ }
+ }
+
+ return err;
+#endif
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+/* Multiply the point by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * p Point to multiply.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map,
+ void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[6];
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ err = sp_384_point_new_6(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 6, km);
+ sp_384_point_from_ecc_point_6(point, gm);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_384_ecc_mulmod_avx2_6(point, point, k, map, heap);
+ else
+#endif
+ err = sp_384_ecc_mulmod_6(point, point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_6(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_6(point, 0, heap);
+
+ return err;
+}
+
+static const sp_table_entry_384 p384_table[256] = {
+ /* 0 */
+ { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ /* 1 */
+ { { 0x3dd0756649c0b528L,0x20e378e2a0d6ce38L,0x879c3afc541b4d6eL,
+ 0x6454868459a30effL,0x812ff723614ede2bL,0x4d3aadc2299e1513L },
+ { 0x23043dad4b03a4feL,0xa1bfa8bf7bb4a9acL,0x8bade7562e83b050L,
+ 0xc6c3521968f4ffd9L,0xdd8002263969a840L,0x2b78abc25a15c5e9L } },
+ /* 2 */
+ { { 0x298647532b0c535bL,0x90dd695370506296L,0x038cd6b4216ab9acL,
+ 0x3df9b7b7be12d76aL,0x13f4d9785f347bdbL,0x222c5c9c13e94489L },
+ { 0x5f8e796f2680dc64L,0x120e7cb758352417L,0x254b5d8ad10740b8L,
+ 0xc38b8efb5337dee6L,0xf688c2e194f02247L,0x7b5c75f36c25bc4cL } },
+ /* 3 */
+ { { 0xe26a3cc39edffea5L,0x35bbfd1c37d7e9fcL,0xf0e7700d9bde3ef6L,
+ 0x0380eb471a538f5aL,0x2e9da8bb05bf9eb3L,0xdbb93c731a460c3eL },
+ { 0x37dba260f526b605L,0x95d4978efd785537L,0x24ed793aed72a04aL,
+ 0x2694837776005b1aL,0x99f557b99e681f82L,0xae5f9557d64954efL } },
+ /* 4 */
+ { { 0x24480c57f26feef9L,0xc31a26943a0e1240L,0x735002c3273e2bc7L,
+ 0x8c42e9c53ef1ed4cL,0x028babf67f4948e8L,0x6a502f438a978632L },
+ { 0xf5f13a46b74536feL,0x1d218babd8a9f0ebL,0x30f36bcc37232768L,
+ 0xc5317b31576e8c18L,0xef1d57a69bbcb766L,0x917c4930b3e3d4dcL } },
+ /* 5 */
+ { { 0x11426e2ee349ddd0L,0x9f117ef99b2fc250L,0xff36b480ec0174a6L,
+ 0x4f4bde7618458466L,0x2f2edb6d05806049L,0x8adc75d119dfca92L },
+ { 0xa619d097b7d5a7ceL,0x874275e5a34411e9L,0x5403e0470da4b4efL,
+ 0x2ebaafd977901d8fL,0x5e63ebcea747170fL,0x12a369447f9d8036L } },
+ /* 6 */
+ { { 0x28f9c07a4fc52870L,0xce0b37481a53a961L,0xd550fa180e1828d9L,
+ 0xa24abaf76adb225aL,0xd11ed0a56e58a348L,0xf3d811e6948acb62L },
+ { 0x8618dd774c61ed22L,0x0bb747f980b47c9dL,0x22bf796fde6b8559L,
+ 0xfdfd1c6d680a21e9L,0xc0db15772af2c9ddL,0xa09379e6c1e90f3dL } },
+ /* 7 */
+ { { 0x386c66efe085c629L,0x5fc2a461095bc89aL,0x1353d631203f4b41L,
+ 0x7ca1972b7e4bd8f5L,0xb077380aa7df8ce9L,0xd8a90389ee7e4ea3L },
+ { 0x1bc74dc7e7b14461L,0xdc2cb0140c9c4f78L,0x52b4b3a684ef0a10L,
+ 0xbde6ea5d20327fe2L,0xb71ec435660f9615L,0xeede5a04b8ad8173L } },
+ /* 8 */
+ { { 0x5584cbb3893b9a2dL,0x820c660b00850c5dL,0x4126d8267df2d43dL,
+ 0xdd5bbbf00109e801L,0x85b92ee338172f1cL,0x609d4f93f31430d9L },
+ { 0x1e059a07eadaf9d6L,0x70e6536c0f125fb0L,0xd6220751560f20e7L,
+ 0xa59489ae7aaf3a9aL,0x7b70e2f664bae14eL,0x0dd0370176d08249L } },
+ /* 9 */
+ { { 0x4cc13be88510521fL,0x87315ba9f724cc17L,0xb49d83bb353dc263L,
+ 0x8b677efe0c279257L,0x510a1c1cc93c9537L,0x33e30cd8a4702c99L },
+ { 0xf0ffc89d2208353fL,0x0170fa8dced42b2bL,0x090851ed26e2a5f5L,
+ 0x81276455ecb52c96L,0x0646c4e17fe1adf4L,0x513f047eb0868eabL } },
+ /* 10 */
+ { { 0xc07611f4df5bdf53L,0x45d331a758b11a6dL,0x58965daf1c4ee394L,
+ 0xba8bebe75a5878d1L,0xaecc0a1882dd3025L,0xcf2a3899a923eb8bL },
+ { 0xf98c9281d24fd048L,0x841bfb598bbb025dL,0xb8ddf8cec9ab9d53L,
+ 0x538a4cb67fef044eL,0x092ac21f23236662L,0xa919d3850b66f065L } },
+ /* 11 */
+ { { 0x3db03b4085d480d8L,0x8cd9f4791b287a7dL,0x8f24dc754a8f3baeL,
+ 0x482eb8003db41892L,0x38bf9eb39c56e0f5L,0x8b9773209a91dc6fL },
+ { 0xa31b05b27209cfc2L,0x4c49bf8505b2db70L,0x56462498d619527bL,
+ 0x3fe510391fac51baL,0xfb04f55eab4b8342L,0xc07c10dc04c6eabfL } },
+ /* 12 */
+ { { 0xad22fe4cdb32f048L,0x5f23bf91475ed6dfL,0xa50ce0c0aa66b6cbL,
+ 0xdf627a89f03405c0L,0x3674837df95e2d6aL,0x081c95b6ba42e64eL },
+ { 0xeba3e036e71d6cebL,0xb45bcccf6c6b0271L,0x67b47e630684701dL,
+ 0x60f8f942e712523fL,0x824234725cd47adcL,0x83027d7987649cbbL } },
+ /* 13 */
+ { { 0xb3929ea63615b0b8L,0xb41441fda54dac41L,0x8995d556b5b6a368L,
+ 0xa80d4529167ef05eL,0xf6bcb4a16d25a27fL,0x210d6a4c7bd55b68L },
+ { 0xf3804abb25351130L,0x1d2df699903e37ebL,0x5f201efc084c25c8L,
+ 0x31a28c87a1c68e91L,0x81dad253563f62a5L,0x5dd6de70d6c415d4L } },
+ /* 14 */
+ { { 0x29f470fd846612ceL,0x986f3eecda18d997L,0x6b84c1612f34af86L,
+ 0x5ef0a40846ddaf8bL,0x14405a00e49e795fL,0x5f491b16aa2f7a37L },
+ { 0xc7f07ae4db41b38dL,0xef7d119e18fbfcaaL,0x3a18e07614443b19L,
+ 0x4356841a79a19926L,0x91f4a91ce2226fbeL,0xdc77248c3cc88721L } },
+ /* 15 */
+ { { 0xd570ff1ae4b1ec9dL,0x21d23e0ee7eef706L,0x3cde40f4ca19e086L,
+ 0x7d6523c4cd4bb270L,0x16c1f06cbf13aa6cL,0x5aa7245ad14c4b60L },
+ { 0x37f8146744b74de8L,0x839e7a17620a934eL,0xf74d14e8de8b1aa1L,
+ 0x8789fa51f30d75e2L,0x09b24052c81c261eL,0x654e267833c565eeL } },
+ /* 16 */
+ { { 0x378205de2f9fbe67L,0xc4afcb837f728e44L,0xdbcec06c682e00f1L,
+ 0xf2a145c3114d5423L,0xa01d98747a52463eL,0xfc0935b17d717b0aL },
+ { 0x9653bc4fd4d01f95L,0x9aa83ea89560ad34L,0xf77943dcaf8e3f3fL,
+ 0x70774a10e86fe16eL,0x6b62e6f1bf9ffdcfL,0x8a72f39e588745c9L } },
+ /* 17 */
+ { { 0x73ade4da2341c342L,0xdd326e54ea704422L,0x336c7d983741cef3L,
+ 0x1eafa00d59e61549L,0xcd3ed892bd9a3efdL,0x03faf26cc5c6c7e4L },
+ { 0x087e2fcf3045f8acL,0x14a65532174f1e73L,0x2cf84f28fe0af9a7L,
+ 0xddfd7a842cdc935bL,0x4c0f117b6929c895L,0x356572d64c8bcfccL } },
+ /* 18 */
+ { { 0x7ecbac017d8c1bbaL,0x6058f9c390b0f3d5L,0xaee116e3f6197d0fL,
+ 0xc4dd70684033b128L,0xf084dba6c209b983L,0x97c7c2cf831dbc4aL },
+ { 0x2f4e61ddf96010e8L,0xd97e4e20529faa17L,0x4ee6666069d37f20L,
+ 0xccc139ed3d366d72L,0x690b6ee213488e0fL,0x7cad1dc5f3a6d533L } },
+ /* 19 */
+ { { 0x660a9a81da57a41fL,0xe74a0412ec0039b6L,0x42343c6b5e1dad15L,
+ 0x284f3ff546681d4cL,0xb51087f163749e89L,0x070f23cc6f9f2f13L },
+ { 0x542211da5d186e14L,0x84748f37fddb0dffL,0x41a3aab4db1f4180L,
+ 0x25ed667ba6402d0eL,0x2f2924a902f58355L,0x5844ee7cfa44a689L } },
+ /* 20 */
+ { { 0xfab086073f3b236fL,0x19e9d41d81e221daL,0xf3f6571e3927b428L,
+ 0x4348a9337550f1f6L,0x7167b996a85e62f0L,0x62d437597f5452bfL },
+ { 0xd85feb9ef2955926L,0x440a561f6df78353L,0x389668ec9ca36b59L,
+ 0x052bf1a1a22da016L,0xbdfbff72f6093254L,0x94e50f28e22209f3L } },
+ /* 21 */
+ { { 0x90b2e5b33062e8afL,0xa8572375e8a3d369L,0x3fe1b00b201db7b1L,
+ 0xe926def0ee651aa2L,0x6542c9beb9b10ad7L,0x098e309ba2fcbe74L },
+ { 0x779deeb3fff1d63fL,0x23d0e80a20bfd374L,0x8452bb3b8768f797L,
+ 0xcf75bb4d1f952856L,0x8fe6b40029ea3faaL,0x12bd3e4081373a53L } },
+ /* 22 */
+ { { 0xc023780d104cbba5L,0x6207e747fa35dd4cL,0x35c239281ca9b6a3L,
+ 0x4ff19be897987b10L,0xb8476bbf8022eee8L,0xaa0a4a14d3bbe74dL },
+ { 0x20f94331187d4543L,0x3215387079f6e066L,0x83b0f74eac7e82e1L,
+ 0xa7748ba2828f06abL,0xc5f0298ac26ef35fL,0x0f0c50708e9a7dbdL } },
+ /* 23 */
+ { { 0x0c5c244cdef029ddL,0x3dabc687850661b8L,0x9992b865fe11d981L,
+ 0xe9801b8f6274dbadL,0xe54e6319098da242L,0x9929a91a91a53d08L },
+ { 0x37bffd7235285887L,0xbc759425f1418102L,0x9280cc35fd2e6e20L,
+ 0x735c600cfbc42ee5L,0xb7ad28648837619aL,0xa3627231a778c57bL } },
+ /* 24 */
+ { { 0xae799b5c91361ed8L,0x47d71b756c63366cL,0x54cdd5211b265a6aL,
+ 0xe0215a5998d77b74L,0x4424d9b7bab29db0L,0x8b0ffacc7fd9e536L },
+ { 0x46d85d1237b5d9efL,0x5b106d62bfa91747L,0xed0479f85f99ba2dL,
+ 0x0e6f39231d104de4L,0x83a84c8425e8983fL,0xa9507e0af8105a70L } },
+ /* 25 */
+ { { 0xf6c68a6e14cf381cL,0xaf9d27bdc22e31ccL,0x23568d4daa8a5ccbL,
+ 0xe431eec0e338e4d2L,0xf1a828fe8f52ad1fL,0xdb6a0579e86acd80L },
+ { 0x2885672e4507832aL,0x73fc275f887e5289L,0x65f8027805610d08L,
+ 0x8d9b4554075ff5b0L,0x3a8e8fb109f712b5L,0x39f0ac862ebe9cf2L } },
+ /* 26 */
+ { { 0xd8fabf784c52edf5L,0xdcd737e5a589ae53L,0x94918bf0d791ab17L,
+ 0xb5fbd956bcff06c9L,0xf6d3032edca46d45L,0x2cdff7e141a3e486L },
+ { 0x6674b3ba61f47ec8L,0x8a882163eef84608L,0xa257c7054c687f90L,
+ 0xe30cb2edf6cdf227L,0x2c4c64ca7f6ea846L,0x186fa17ccc6bcd3cL } },
+ /* 27 */
+ { { 0x48a3f5361dfcb91eL,0x83595e13646d358aL,0xbd15827b91128798L,
+ 0x3ce612b82187757aL,0x873150a161bd7372L,0xf4684530b662f568L },
+ { 0x8833950b401896f6L,0xe11cb89a77f3e090L,0xb2f12cac48e7f4a5L,
+ 0x313dd769f606677eL,0xfdcf08b316579f93L,0x6429cec946b8f22bL } },
+ /* 28 */
+ { { 0x4984dd54bb75f9a4L,0x4aef06b929d3b570L,0xb5f84ca23d6e4c1eL,
+ 0x24c61c11b083ef35L,0xce4a7392392ca9ffL,0x865d65176730a800L },
+ { 0xca3dfe76722b4a2bL,0x12c04bf97b083e0eL,0x803ce5b51b86b8a5L,
+ 0x3fc7632d6a7e3e0cL,0xc89970c2c81adbe4L,0x3cbcd3ad120e16b1L } },
+ /* 29 */
+ { { 0xfbfb4cc7ec30ce93L,0x10ed6c7db72720a2L,0xec675bf747b55500L,
+ 0x90725903333ff7c3L,0xc7c3973e5075bfc0L,0xb049ecb007acf31bL },
+ { 0xb4076eaf4f58839cL,0x101896daa2b05e4fL,0x3f6033b0ab40c66eL,
+ 0x19ee9eebc8d864baL,0xeb6cf15547bf6d2aL,0x8e5a9663f826477dL } },
+ /* 30 */
+ { { 0x69e62fddf7fbd5e1L,0x38ecfe5476912b1dL,0x845a3d56d1da3bfbL,
+ 0x0494950e1c86f0d4L,0x83cadbf93bc36ce8L,0x41fce5724fccc8d1L },
+ { 0x05f939c28332c144L,0xb17f248b0871e46eL,0x3d8534e266e8aff6L,
+ 0x1d06f1dc3b85c629L,0xdb06a32ea3131b73L,0xf295184d8b3f64e5L } },
+ /* 31 */
+ { { 0xd9653ff736ddc103L,0x25f43e3795ef606fL,0x09e301fcfe06dce8L,
+ 0x85af234130b6eebfL,0x79b12b530ff56b20L,0x9b4fb499fe9a3c6bL },
+ { 0x0154f89251d27ac2L,0xd33167e356ca5389L,0x7828ec1fafc065a6L,
+ 0x0959a2587f746c9bL,0xb18f1be30c44f837L,0xa7946117c4132fdbL } },
+ /* 32 */
+ { { 0xc0426b775e3c647bL,0xbfcbd9398cf05348L,0x31d312e3172c0d3dL,
+ 0x5f49fde6ee754737L,0x895530f06da7ee61L,0xcf281b0ae8b3a5fbL },
+ { 0xfd14973541b8a543L,0x41a625a73080dd30L,0xe2baae07653908cfL,
+ 0xc3d01436ba02a278L,0xa0d0222e7b21b8f8L,0xfdc270e9d7ec1297L } },
+ /* 33 */
+ { { 0x00873c0cbc7f41d6L,0xd976113e1b7ad641L,0x2a536ff4238443fbL,
+ 0x030d00e241e62e45L,0x532e98675f545fc6L,0xcd0331088e91208cL },
+ { 0xd1a04c999797612cL,0xd4393e02eea674e2L,0xd56fa69ee19742a1L,
+ 0xdd2ab48085f0590eL,0xa5cefc5248a2243dL,0x48cc67b654383f41L } },
+ /* 34 */
+ { { 0x4e50430efc14ab48L,0x195b7f4f26706a74L,0x2fe8a228cc881ff6L,
+ 0xb1b968e2d945013dL,0x936aa5794b92162bL,0x4fb766b7364e754aL },
+ { 0x13f93bca31e1ff7fL,0x696eb5cace4f2691L,0xff754bf8a2b09e02L,
+ 0x58f13c9ce58e3ff8L,0xb757346f1678c0b0L,0xd54200dba86692b3L } },
+ /* 35 */
+ { { 0x9a030bbd6dda1265L,0xf7b4f3fce89718ddL,0xa6a4931f936065b8L,
+ 0xbce72d875f72241cL,0x6cbb51cb65775857L,0xc71618154e993675L },
+ { 0xe81a0f792ee32189L,0xef2fab26277dc0b2L,0x9e64f6feb71f469fL,
+ 0xb448ce33dfdaf859L,0x3f5c1c4cbe6b5df1L,0xfb8dfb001de45f7bL } },
+ /* 36 */
+ { { 0xc7345fa74d5bb921L,0x5c7e04be4d2b667eL,0x47ed3a80282d7a3eL,
+ 0x5c2777f87e47b2a4L,0x89b3b10008488e2eL,0x9aad77c2b2eb5b45L },
+ { 0xd681bca7daac34aeL,0x2452e4e526afb326L,0x0c88792441a1ee14L,
+ 0x743b04d4c2407adeL,0xcb5e999bfc17a2acL,0x4dca2f824a701a06L } },
+ /* 37 */
+ { { 0x68e31ca61127bc1aL,0xa3edd59b17ead3beL,0x67b6b645e25f5a15L,
+ 0x76221794a420e15eL,0x794fd83b4b1e872eL,0x7cab3f03b2dece1bL },
+ { 0x7119bf15ca9b3586L,0xa55459244d250bd7L,0x173633eacc6bcf24L,
+ 0x9bd308c2b1b6f884L,0x3bae06f5447d38c3L,0x54dcc135f341fe1cL } },
+ /* 38 */
+ { { 0x56d3598d943caf0dL,0xce044ea9225ff133L,0x9edf6a7c563fadeaL,
+ 0x632eb94473e8dc27L,0x814b467e3190dcabL,0x2d4f4f316dbb1e31L },
+ { 0x8d69811ca143b7caL,0x4ec1ac32de7cf950L,0x223ab5fd37b5fe82L,
+ 0xe82616e49390f1d9L,0xabff4b2075804610L,0x11b9be15875b08f0L } },
+ /* 39 */
+ { { 0x4ae31a3d3bbe682cL,0xbc7c5d2674eef2ddL,0x92afd10a3c47dd40L,
+ 0xec7e0a3bc14ab9e1L,0x6a6c3dd1b2e495e4L,0x085ee5e9309bcd85L },
+ { 0xf381a9088c2e67fdL,0x32083a80e261eaf2L,0x0fcd6a4996deee15L,
+ 0xe3b8fb035e524c79L,0x8dc360d91d5b08b9L,0x3a06e2c87f26719fL } },
+ /* 40 */
+ { { 0x5cd9f5a87237cac0L,0x93f0b59d43586794L,0x4384a764e94f6c4eL,
+ 0x8304ed2bb62782d3L,0x0b8db8b3cde06015L,0x4336dd535dbe190fL },
+ { 0x5744355392ab473aL,0x031c7275be5ed046L,0x3e78678c21909aa4L,
+ 0x4ab7e04f99202ddbL,0x2648d2066977e635L,0xd427d184093198beL } },
+ /* 41 */
+ { { 0x822848f50f9b5a31L,0xbb003468baadb62aL,0x233a04723357559cL,
+ 0x49ef688079aee843L,0xa89867a0aeb9e1e3L,0xc151931b1f6f9a55L },
+ { 0xd264eb0bad74251eL,0x37b9b2634abf295eL,0xb600921b04960d10L,
+ 0x0de53dbc4da77dc0L,0x01d9bab3d2b18697L,0xad54ec7af7156ddfL } },
+ /* 42 */
+ { { 0x8e74dc3579efdc58L,0x456bd3694ff68ddbL,0x724e74ccd32096a5L,
+ 0xe41cff42386783d0L,0xa04c7f217c70d8a4L,0x41199d2fe61a19a2L },
+ { 0xd389a3e029c05dd2L,0x535f2a6be7e3fda9L,0x26ecf72d7c2b4df8L,
+ 0x678275f4fe745294L,0x6319c9cc9d23f519L,0x1e05a02d88048fc4L } },
+ /* 43 */
+ { { 0x75cc8e2ed4d5ffe8L,0xf8bb4896dbea17f2L,0x35059790cee3cb4aL,
+ 0x4c06ee85a47c6165L,0xf98fff2592935d2fL,0x34c4a57232ffd7c7L },
+ { 0xc4b14806ea0376a2L,0x2ea5e7504f115e02L,0x532d76e21e55d7c0L,
+ 0x68dc9411f31044daL,0x9272e46571b77993L,0xadaa38bb93a8cfd5L } },
+ /* 44 */
+ { { 0x4bf0c7127d4ed72aL,0xda0e9264ba1f79a3L,0x48c0258bf4c39ea4L,
+ 0xa5394ed82a715138L,0x4af511cebf06c660L,0xfcebceefec5c37cdL },
+ { 0xf23b75aa779ae8c1L,0xdeff59ccad1e606eL,0xf3f526fd22755c82L,
+ 0x64c5ab44bb32cefdL,0xa96e11a2915bdefdL,0xab19746a1143813eL } },
+ /* 45 */
+ { { 0x43c78585ec837d7dL,0xca5b6fbcb8ee0ba4L,0x34e924d9d5dbb5eeL,
+ 0x3f4fa104bb4f1ca5L,0x15458b72398640f7L,0x4231faa9d7f407eaL },
+ { 0x53e0661ef96e6896L,0x554e4c69d03b0f9dL,0xd4fcb07b9c7858d1L,
+ 0x7e95279352cb04faL,0x5f5f15748974e7f7L,0x2e3fa5586b6d57c8L } },
+ /* 46 */
+ { { 0x42cd48036a9951a8L,0xa8b15b8842792ad0L,0x18e8bcf9abb29a73L,
+ 0xbfd9a092409933e8L,0x760a3594efb88dc4L,0x1441886340724458L },
+ { 0x162a56ee99caedc7L,0x8fb12ecd91d101c9L,0xea671967393202daL,
+ 0x1aac8c4aa4ccd796L,0x7db050361cf185a8L,0x0c9f86cd8cfd095aL } },
+ /* 47 */
+ { { 0x9a72814710b2a556L,0x767ca964327b70b2L,0x04ed9e125e3799b7L,
+ 0x6781d2dc22a3eb2aL,0x5bd116eb0d9450acL,0xeccac1fca7ebe08aL },
+ { 0xde68444fdc2d6e94L,0x3621f42935ecf21bL,0x14e2d54329e03a2cL,
+ 0x53e42cd57d3e7f0aL,0xbba26c0973ed00b9L,0x00297c39c57d2272L } },
+ /* 48 */
+ { { 0x3aaaab10b8243a7dL,0x6eeef93e8fa58c5bL,0xf866fca39ae7f764L,
+ 0x64105a2661ab04d3L,0xa3578d8a03945d66L,0xb08cd3e4791b848cL },
+ { 0x45edc5f8756d2411L,0xd4a790d9a755128cL,0xc2cf096349e5f6a0L,
+ 0xc66d267df649beaaL,0x3ce6d9688467039eL,0x50046c6b42f7816fL } },
+ /* 49 */
+ { { 0x92ae160266425043L,0x1ff66afdf08db890L,0x386f5a7f8f162ce5L,
+ 0x18d2dea0fcf5598fL,0x78372b3a1a8ca18eL,0xdf0d20eb8cd0e6f7L },
+ { 0x7edd5e1d75bb4045L,0x252a47ceb96d94b7L,0xbdb293582c626776L,
+ 0x853c394340dd1031L,0x9dc9becf7d5f47fdL,0x27c2302fbae4044aL } },
+ /* 50 */
+ { { 0x2d1d208a8f2d49ceL,0x0d91aa02162df0a2L,0x9c5cce8709a07f65L,
+ 0xdf07238b84339012L,0x5028e2c8419442cdL,0x2dcbd35872062abaL },
+ { 0xb5fbc3cbe4680967L,0x2a7bc6459f92d72cL,0x806c76e1116c369dL,
+ 0x5c50677a3177e8d8L,0x753739eb4569df57L,0x2d481ef636c3f40bL } },
+ /* 51 */
+ { { 0x1a2d39fdfea1103eL,0xeaae559295f81b17L,0xdbd0aa18f59b264aL,
+ 0x90c39c1acb592ee0L,0xdf62f80d9750cca3L,0xda4d8283df97cc6cL },
+ { 0x0a6dd3461e201067L,0x1531f85969fb1f6bL,0x4895e5521d60121fL,
+ 0x0b21aab04c041c91L,0x9d896c46bcc1ccf8L,0xd24da3b33141bde7L } },
+ /* 52 */
+ { { 0x575a053753b0a354L,0x392ff2f40c6ddcd8L,0x0b8e8cff56157b94L,
+ 0x073e57bd3b1b80d1L,0x2a75e0f03fedee15L,0x752380e4aa8e6f19L },
+ { 0x1f4e227c6558ffe9L,0x3a34861819ec5415L,0xab382d5ef7997085L,
+ 0x5e6deaffddc46ac2L,0xe5144078fc8d094cL,0xf674fe51f60e37c6L } },
+ /* 53 */
+ { { 0x6fb87ae5af63408fL,0xa39c36a9cd75a737L,0x7833313fcf4c618dL,
+ 0xfbcd4482f034c88dL,0x4469a76139b35288L,0x77a711c566b5d9c9L },
+ { 0x4a695dc7944f8d65L,0xe6da5f65161aaba8L,0x8654e9c324601669L,
+ 0xbc8b93f528ae7491L,0x5f1d1e838f5580d8L,0x8ccf9a1acea32cc8L } },
+ /* 54 */
+ { { 0x28ab110c7196fee2L,0x75799d63874c8945L,0xa262934829aedaddL,
+ 0x9714cc7b2be88ff4L,0xf71293cfd58d60d6L,0xda6b6cb332a564e9L },
+ { 0xf43fddb13dd821c2L,0xf2f2785f90dd323dL,0x91246419048489f8L,
+ 0x61660f26d24c6749L,0x961d9e8cc803c15cL,0x631c6158faadc4c9L } },
+ /* 55 */
+ { { 0xacf2ebe0fd752366L,0xb93c340e139be88bL,0x98f664850f20179eL,
+ 0x14820254ff1da785L,0x5278e2764f85c16eL,0xa246ee457aab1913L },
+ { 0x43861eb453763b33L,0xc49f03fc45c0bc0dL,0xafff16bcad6b1ea1L,
+ 0xce33908b6fd49c99L,0x5c51e9bff7fde8c3L,0x076a7a39ff142c5eL } },
+ /* 56 */
+ { { 0x04639dfe9e338d10L,0x8ee6996ff42b411bL,0x960461d1a875cef2L,
+ 0x1057b6d695b4d0baL,0x27639252a906e0bcL,0x2c19f09ae1c20f8aL },
+ { 0x5b8fc3f0eef4c43dL,0xe2e1b1a807a84aa9L,0x5f455528835d2bdbL,
+ 0x0f4aee4d207132ddL,0xe9f8338c3907f675L,0x7a874dc90e0531f0L } },
+ /* 57 */
+ { { 0x84b22d4597c27050L,0xbd0b8df759e70bf8L,0xb4d6740579738b9bL,
+ 0x47f4d5f5cd917c4fL,0x9099c4ce13ce6e33L,0x942bfd39521d0f8bL },
+ { 0x5028f0f6a43b566dL,0xaf6e866921bff7deL,0x83f6f856c44232cdL,
+ 0x65680579f915069aL,0xd12095a2ecfecb85L,0xcf7f06aedb01ba16L } },
+ /* 58 */
+ { { 0x0f56e3c48ef96c80L,0xd521f2b33ddb609cL,0x2be941027dc1450dL,
+ 0x2d21a07102a91fe2L,0x2e6f74fa1efa37deL,0x9a9a90b8156c28a1L },
+ { 0xc54ea9ea9dc7dfcbL,0xc74e66fc2c2c1d62L,0x9f23f96749d3e067L,
+ 0x1c7c3a4654dd38adL,0xc70058845946cee3L,0x8985636845cc045dL } },
+ /* 59 */
+ { { 0x29da7cd4fce73946L,0x8f697db523168563L,0x8e235e9ccba92ec6L,
+ 0x55d4655f9f91d3eaL,0xf3689f23aa50a6cdL,0xdcf21c2621e6a1a0L },
+ { 0xcffbc82e61b818bfL,0xc74a2f96da47a243L,0x234e980a8bc1a0cfL,
+ 0xf35fd6b57929cb6dL,0x81468e12efe17d6cL,0xddea6ae558b2dafbL } },
+ /* 60 */
+ { { 0x294de8877e787b2eL,0x258acc1f39a9310dL,0x92d9714aac14265dL,
+ 0x18b5591c708b48a0L,0x27cc6bb0e1abbf71L,0xc0581fa3568307b9L },
+ { 0x9e0f58a3f24d4d58L,0xfebe9bb8e0ce2327L,0x91fd6a419d1be702L,
+ 0x9a7d8a45facac993L,0xabc0a08c9e50d66dL,0x02c342f706498201L } },
+ /* 61 */
+ { { 0xccd71407157bdbc2L,0x72fa89c6ad0e1605L,0xb1d3da2bb92a015fL,
+ 0x8ad9e7cda0a3fe56L,0x160edcbd24f06737L,0x79d4db3361275be6L },
+ { 0xd3d31fd95f3497c4L,0x8cafeaee04192fb0L,0xe13ca74513a50af3L,
+ 0x188261678c85aae5L,0xce06cea89eb556ffL,0x2eef1995bdb549f3L } },
+ /* 62 */
+ { { 0x8ed7d3eb50596edcL,0xaa359362905243a2L,0xa212c2c2a4b6d02bL,
+ 0x611fd727c4fbec68L,0x8a0b8ff7b84f733dL,0xd85a6b905f0daf0eL },
+ { 0x60e899f5d4091cf7L,0x4fef2b672eff2768L,0xc1f195cb10c33964L,
+ 0x8275d36993626a8fL,0xc77904f40d6c840aL,0x88d8b7fd7a868acdL } },
+ /* 63 */
+ { { 0x85f237237bd98425L,0xd4463992c70b154eL,0xcbb00ee296687a2eL,
+ 0x905fdbf7c83214fdL,0x2019d29313593684L,0x0428c393ef51218eL },
+ { 0x40c7623f981e909aL,0x925133857be192daL,0x48fe480f4010907eL,
+ 0xdd7a187c3120b459L,0xc9d7702da1fd8f3cL,0x66e4753be358efc5L } },
+ /* 64 */
+ { { 0x070d34e116973cf4L,0x20aee08b7e4f34f7L,0x269af9b95eb8ad29L,
+ 0xdde0a036a6a45ddaL,0xa18b528e63df41e0L,0x03cc71b2a260df2aL },
+ { 0x24a6770aa06b1dd7L,0x5bfa9c119d2675d3L,0x73c1e2a196844432L,
+ 0x3660558d131a6cf0L,0xb0289c832ee79454L,0xa6aefb01c6d8ddcdL } },
+ /* 65 */
+ { { 0xba1464b401ab5245L,0x9b8d0b6dc48d93ffL,0x939867dc93ad272cL,
+ 0xbebe085eae9fdc77L,0x73ae5103894ea8bdL,0x740fc89a39ac22e1L },
+ { 0x5e28b0a328e23b23L,0x2352722ee13104d0L,0xf4667a18b0a2640dL,
+ 0xac74a72e49bb37c3L,0x79f734f0e81e183aL,0xbffe5b6c3fd9c0ebL } },
+ /* 66 */
+ { { 0xb1a358f5c6a2123fL,0x927b2d95fe28df6dL,0x89702753f199d2f9L,
+ 0x0a73754c1a3f82dcL,0x063d029d777affe1L,0x5439817edae6d34dL },
+ { 0xf7979eef6b8b83c4L,0x615cb2149d945682L,0x8f0e4facc5e57eaeL,
+ 0x042b89b8113047ddL,0x888356dc93f36508L,0xbf008d185fd1f32fL } },
+ /* 67 */
+ { { 0x8012aa244e8068dbL,0xc72cc641a5729a47L,0x3c33df2c43f0691dL,
+ 0xfa0573471d92145fL,0xaefc0f2fb97f7946L,0x813d75cb2f8121bfL },
+ { 0x05613c724383bba6L,0xa924ce70a4224b3fL,0xe59cecbe5f2179a6L,
+ 0x78e2e8aa79f62b61L,0x3ac2cc3b53ad8079L,0x55518d71d8f4fa96L } },
+ /* 68 */
+ { { 0x03cf292200623f3bL,0x095c71115f29ebffL,0x42d7224780aa6823L,
+ 0x044c7ba17458c0b0L,0xca62f7ef0959ec20L,0x40ae2ab7f8ca929fL },
+ { 0xb8c5377aa927b102L,0x398a86a0dc031771L,0x04908f9dc216a406L,
+ 0xb423a73a918d3300L,0x634b0ff1e0b94739L,0xe29de7252d69f697L } },
+ /* 69 */
+ { { 0x744d14008435af04L,0x5f255b1dfec192daL,0x1f17dc12336dc542L,
+ 0x5c90c2a7636a68a8L,0x960c9eb77704ca1eL,0x9de8cf1e6fb3d65aL },
+ { 0xc60fee0d511d3d06L,0x466e2313f9eb52c7L,0x743c0f5f206b0914L,
+ 0x42f55bac2191aa4dL,0xcefc7c8fffebdbc2L,0xd4fa6081e6e8ed1cL } },
+ /* 70 */
+ { { 0xb5e405d3b0ab9645L,0xaeec7f98d5f1f711L,0x8ad42311585c2a6eL,
+ 0x045acb9e512c6944L,0xae106c4ea90db1c6L,0xb89f33d5898e6563L },
+ { 0x43b07cd97fed2ce4L,0xf9934e17dd815b20L,0x6778d4d50a81a349L,
+ 0x9e616ade52918061L,0xfa06db06d7e67112L,0x1da23cf188488091L } },
+ /* 71 */
+ { { 0x821c46b342f2c4b5L,0x931513ef66059e47L,0x7030ae4366f50cd1L,
+ 0x43b536c943e7b127L,0x006258cf5fca5360L,0xe4e3ee796b557abfL },
+ { 0xbb6b390024c8b22fL,0x2eb5e2c1fcbf1054L,0x937b18c9567492afL,
+ 0xf09432e4acf53957L,0x585f5a9d1dbf3a56L,0xf86751fdbe0887cfL } },
+ /* 72 */
+ { { 0x157399cb9d10e0b2L,0x1c0d595660dc51b7L,0x1d496b8a1f583090L,
+ 0x6658bc2688590484L,0x88c08ab703213f28L,0x8d2e0f737ae58de4L },
+ { 0x9b79bc95486cfee6L,0x036a26c7e9e5bc57L,0x1ad03601cd8ae97aL,
+ 0x06907f87ff3a0494L,0x078f4bbf2c7eb584L,0xe3731bf57e8d0a5aL } },
+ /* 73 */
+ { { 0x72f2282be1cd0abeL,0xd4f9015e87efefa2L,0x9d1898066c3834bdL,
+ 0x9c8cdcc1b8a29cedL,0x0601b9f4fee82ebcL,0x371052bc7206a756L },
+ { 0x76fa109246f32562L,0xdaad534c17351bb4L,0xc3d64c37b3636bb5L,
+ 0x038a8c5145d54e00L,0x301e618032c09e7cL,0x9764eae795735151L } },
+ /* 74 */
+ { { 0x8791b19fcbd5256aL,0x4007e0f26ca13a3bL,0x03b794604cf06904L,
+ 0xb18a9c22b6c17589L,0xa1cb7d7d81d45908L,0x6e13fa9d21bb68f1L },
+ { 0x47183c62a71e6e16L,0x5cf0ef8ee18749edL,0x2c9c7f9b2e5ed409L,
+ 0x042eeacce6e117e1L,0xb86d481613fb5a7fL,0xea1cf0edc9e5feb1L } },
+ /* 75 */
+ { { 0x6e6573c9cea4cc9bL,0x5417961dafcec8f3L,0x804bf02aa438b6f6L,
+ 0xb894b03cdcd4ea88L,0xd0f807e93799571fL,0x3466a7f5862156e8L },
+ { 0x51e59acd56515664L,0x55b0f93ca3c5eb0bL,0x84a06b026a4279dbL,
+ 0x5c850579c5fae08eL,0xcf07b8dba663a1a2L,0x49a36bbcf46ffc8dL } },
+ /* 76 */
+ { { 0xe47f5acc46d93106L,0x65b7ade0aa897c9cL,0x37cf4c9412d7e4beL,
+ 0xa2ae9b80d4b2caa9L,0x5e7ce09ce60357a3L,0x29f77667c8ecd5f9L },
+ { 0xdf6868f5a8a0b1c5L,0x240858cf62978ad8L,0x0f7ac101dc0002a1L,
+ 0x1d28a9d7ffe9aa05L,0x744984d65b962c97L,0xa8a7c00b3d28c8b2L } },
+ /* 77 */
+ { { 0x7c58a852ae11a338L,0xa78613f1d1af96e7L,0x7e9767d25355cc73L,
+ 0x6ba37009792a2de6L,0x7d60f618124386b2L,0xab09b53111157674L },
+ { 0x95a0484198eb9dd0L,0xe6c17acc15070328L,0xafc6da45489c6e49L,
+ 0xab45a60abb211530L,0xc58d65927d7ea933L,0xa3ef3c65095642c6L } },
+ /* 78 */
+ { { 0x89d420e9df010879L,0x9d25255d39576179L,0x9cdefd50e39513b6L,
+ 0xe4efe45bd5d1c313L,0xc0149de73f7af771L,0x55a6b4f4340ab06bL },
+ { 0xf1325251ebeaf771L,0x2ab44128878d4288L,0xfcd5832e18e05afeL,
+ 0xef52a348cc1fb62bL,0x2bd08274c1c4792aL,0x345c5846877c6dc7L } },
+ /* 79 */
+ { { 0xde15ceb0bea65e90L,0x0987f72b2416d99cL,0x44db578dfd863decL,
+ 0xf617b74bac6a3578L,0x9e62bd7adb48e999L,0x877cae61eab1a1beL },
+ { 0x23adddaa3a358610L,0x2fc4d6d1325e2b07L,0x897198f51585754eL,
+ 0xf741852cb392b584L,0x9927804cb55f7de1L,0xe9e6c4ed1aa8efaeL } },
+ /* 80 */
+ { { 0x867db63998683186L,0xfb5cf424ddcc4ea9L,0xcc9a7ffed4f0e7bdL,
+ 0x7c57f71c7a779f7eL,0x90774079d6b25ef2L,0x90eae903b4081680L },
+ { 0xdf2aae5e0ee1fcebL,0x3ff1da24e86c1a1fL,0x80f587d6ca193edfL,
+ 0xa5695523dc9b9d6aL,0x7b84090085920303L,0x1efa4dfcba6dbdefL } },
+ /* 81 */
+ { { 0xfbd838f9e0540015L,0x2c323946c39077dcL,0x8b1fb9e6ad619124L,
+ 0x9612440c0ca62ea8L,0x9ad9b52c2dbe00ffL,0xf52abaa1ae197643L },
+ { 0xd0e898942cac32adL,0xdfb79e4262a98f91L,0x65452ecf276f55cbL,
+ 0xdb1ac0d27ad23e12L,0xf68c5f6ade4986f0L,0x389ac37b82ce327dL } },
+ /* 82 */
+ { { 0x511188b4f8e60f5bL,0x7fe6701548aa2adaL,0xdb333cb8381abca2L,
+ 0xb15e6d9ddaf3fc97L,0x4b24f6eb36aabc03L,0xc59789df72a748b4L },
+ { 0x26fcb8a529cf5279L,0x7a3c6bfc01ad9a6cL,0x866cf88d4b8bac9bL,
+ 0xf4c899899c80d041L,0xf0a0424170add148L,0x5a02f47945d81a41L } },
+ /* 83 */
+ { { 0xfa5c877cc1c90202L,0xd099d440f8ac7570L,0x428a5b1bd17881f7L,
+ 0x61e267db5b2501d7L,0xf889bf04f2e4465bL,0x4da3ae0876aa4cb8L },
+ { 0x3ef0fe26e3e66861L,0x5e7729533318b86dL,0xc3c35fbc747396dfL,
+ 0x5115a29c439ffd37L,0xbfc4bd97b2d70374L,0x088630ea56246b9dL } },
+ /* 84 */
+ { { 0xcd96866db8a9e8c9L,0xa11963b85bb8091eL,0xc7f90d53045b3cd2L,
+ 0x755a72b580f36504L,0x46f8b39921d3751cL,0x4bffdc9153c193deL },
+ { 0xcd15c049b89554e7L,0x353c6754f7a26be6L,0x79602370bd41d970L,
+ 0xde16470b12b176c0L,0x56ba117540c8809dL,0xe2db35c3e435fb1eL } },
+ /* 85 */
+ { { 0xd71e4aab6328e33fL,0x5486782baf8136d1L,0x07a4995f86d57231L,
+ 0xf1f0a5bd1651a968L,0xa5dc5b2476803b6dL,0x5c587cbc42dda935L },
+ { 0x2b6cdb32bae8b4c0L,0x66d1598bb1331138L,0x4a23b2d25d7e9614L,
+ 0x93e402a674a8c05dL,0x45ac94e6da7ce82eL,0xeb9f8281e463d465L } },
+ /* 86 */
+ { { 0x34e0f9d1fecf5b9bL,0xa115b12bf206966aL,0x5591cf3b1eaa0534L,
+ 0x5f0293cbfb1558f9L,0x1c8507a41bc703a5L,0x92e6b81c862c1f81L },
+ { 0xcc9ebc66cdaf24e3L,0x68917ecd72fcfc70L,0x6dc9a9308157ba48L,
+ 0x5d425c08b06ab2b2L,0x362f8ce736e929c4L,0x09f6f57c62e89324L } },
+ /* 87 */
+ { { 0x1c7d6b78d29375fbL,0xfabd851ee35d1157L,0xf6f62dcd4243ea47L,
+ 0x1dd924608fe30b0fL,0x08166dfaffc6e709L,0xc6c4c6930881e6a7L },
+ { 0x20368f87d6a53fb0L,0x38718e9f9eb4d1f9L,0x03f08acdafd7e790L,
+ 0x0835eb4472fe2a1cL,0x7e05090388076e5dL,0x538f765ea638e731L } },
+ /* 88 */
+ { { 0x0e0249d9c2663b4bL,0xe700ab5b47cd38ddL,0xb192559d2c46559fL,
+ 0x8f9f74a84bcde66dL,0xad1615233e2aced5L,0xc155c0473dd03a5bL },
+ { 0x346a87993be454ebL,0x66ee94db83b7dccdL,0x1f6d8378ab9d2abeL,
+ 0x4a396dd27733f355L,0x419bd40af53553c2L,0xd0ead98d731dd943L } },
+ /* 89 */
+ { { 0x908e0b0eec142408L,0x98943cb94114b310L,0x03dbf7d81742b1d7L,
+ 0xd270df6b693412f4L,0xc50654948f69e20cL,0xa76a90c3697e43a1L },
+ { 0xe0fa33844624825aL,0x82e48c0b8acc34c2L,0x7b24bd14e9a14f2bL,
+ 0x4f5dd5e24db30803L,0x0c77a9e7932da0a3L,0x20db90f274c653dcL } },
+ /* 90 */
+ { { 0x261179b70e6c5fd9L,0xf8bec1236c982eeaL,0x47683338d4957b7eL,
+ 0xcc47e6640a72f66aL,0xbd54bf6a1bad9350L,0xdfbf4c6af454e95aL },
+ { 0x3f7a7afa6907f4faL,0x7311fae0865ca735L,0x24737ab82a496adaL,
+ 0x13e425f115feb79bL,0xe9e97c50a1b93c21L,0xb26b6eac4ddd3eb5L } },
+ /* 91 */
+ { { 0x81cab9f52a2e5f2bL,0xf93caf29bf385ac4L,0xf4bf35c3c909963aL,
+ 0x081e730074c9143cL,0x3ea57fa8c281b4c5L,0xe497905c9b340741L },
+ { 0xf556dd8a55ab3cfbL,0xd444b96b518db6adL,0x34f5425a5ef4b955L,
+ 0xdda7a3acecd26aa3L,0xb57da11bda655e97L,0x02da3effc2024c70L } },
+ /* 92 */
+ { { 0xe24b00366481d0d9L,0x3740dbe5818fdfe2L,0xc1fc1f45190fda00L,
+ 0x329c92803cf27fdeL,0x7435cb536934f43eL,0x2b505a5d7884e8feL },
+ { 0x6cfcc6a6711adcc9L,0xf034325c531e21e1L,0xa2f4a9679b2a8a99L,
+ 0x9d5f38423c21bdffL,0xb25c781131b57d66L,0xdb5344d80b8093b9L } },
+ /* 93 */
+ { { 0x0d72e667ae50a2f5L,0x9b7f8d8ae4a861d1L,0xa129f70f330df1cbL,
+ 0xe90aa5d7e04fefc3L,0xff561ecbe72c3ae1L,0x0d8fb428cdb955faL },
+ { 0xd2235f73d7663784L,0xc05baec67e2c456aL,0xe5c292e42adbfcccL,
+ 0x4fd17988efb110d5L,0x27e57734d19d49f3L,0x188ac4ce84f679feL } },
+ /* 94 */
+ { { 0x7ee344cfa796c53eL,0xbbf6074d0868009bL,0x1f1594f7474a1295L,
+ 0x66776edcac11632dL,0x1862278b04e2fa5aL,0x52665cf2c854a89aL },
+ { 0x7e3764648104ab58L,0x167759137204fd6dL,0x86ca06a544ea1199L,
+ 0xaa3f765b1c9240ddL,0x5f8501a924746149L,0x7b982e30dcd251d7L } },
+ /* 95 */
+ { { 0xe44e9efcc15f3060L,0x5ad62f2ea87ebbe6L,0x36499d41c79500d4L,
+ 0xa66d6dc0336fa9d1L,0xf8afc4955afd3b1fL,0x1d8ccb24e5c9822bL },
+ { 0x4031422b79d7584bL,0xc54a0580ea3f20ddL,0x3f837c8f958468c5L,
+ 0x3d82f110fbea7735L,0x679a87787dffe2fcL,0x48eba63b20704803L } },
+ /* 96 */
+ { { 0x89b10d41df46e2f6L,0x13ab57f819514367L,0x067372b91d469c87L,
+ 0x0c195afa4f6c5798L,0xea43a12a272c9acfL,0x9dadd8cb678abdacL },
+ { 0xcce56c6be182579aL,0x86febadb2d26c2d8L,0x1c668ee12a44745cL,
+ 0x580acd8698dc047aL,0x5a2b79cc51b9ec2dL,0x007da6084054f6a0L } },
+ /* 97 */
+ { { 0x9e3ca35217b00dd0L,0x046779cb0e81a7a6L,0xb999fef3d482d871L,
+ 0xe6f38134d9233fbcL,0x112c3001f48cd0e0L,0x934e75763c6c66aeL },
+ { 0xb44d4fc3d73234dcL,0xfcae2062864eafc1L,0x843afe2526bef21aL,
+ 0x61355107f3b75fdfL,0x8367a5aa794c2e6bL,0x3d2629b18548a372L } },
+ /* 98 */
+ { { 0x6230618f437cfaf8L,0x5b8742cb2032c299L,0x949f72472293643aL,
+ 0xb8040f1a09464f79L,0x049462d24f254143L,0xabd6b522366c7e76L },
+ { 0x119b392bd5338f55L,0x1a80a9ce01495a0cL,0xf3118ca7f8d7537eL,
+ 0xb715adc26bf4b762L,0x24506165a8482b6cL,0xd958d7c696a7c84dL } },
+ /* 99 */
+ { { 0x9ad8aa87bdc21f31L,0xadb3cab48063e58cL,0xefd86283b07dd7b8L,
+ 0xc7b9b7621be7c6b4L,0x2ef58741015582deL,0xc970c52e299addf3L },
+ { 0x78f02e2a22f24d66L,0xefec1d1074cc100aL,0xaf2a6a3909316e1aL,
+ 0xce7c22055849dd49L,0x9c1fe75c96bffc4cL,0xcad98fd27ba06ec0L } },
+ /* 100 */
+ { { 0xed76e2d0b648b73eL,0xa9f92ce51cfd285eL,0xa8c86c062ed13de1L,
+ 0x1d3a574ea5191a93L,0x385cdf8b1ad1b8bfL,0xbbecc28a47d2cfe3L },
+ { 0x98d326c069cec548L,0x4f5bc1ddf240a0b2L,0x241a706229057236L,
+ 0x0fc6e9c5c68294a4L,0x4d04838ba319f17aL,0x8b612cf19ffc1c6fL } },
+ /* 101 */
+ { { 0x9bb0b5014c3830ebL,0x3d08f83c8ee0d0c5L,0xa4a6264279ba9389L,
+ 0x5d5d40449cbc2914L,0xae9eb83e074c46f0L,0x63bb758f74ead7d6L },
+ { 0x1c40d2eac6bb29e0L,0x95aa2d874b02f41eL,0x9298917553cb199aL,
+ 0xdd91bafe51584f6dL,0x3715efb931a1aaecL,0xc1b6ae5b46780f9eL } },
+ /* 102 */
+ { { 0xcded3e4b42772f41L,0x3a700d5d3bcb79d1L,0x4430d50e80feee60L,
+ 0x444ef1fcf5e5d4bbL,0xc660194fe6e358ffL,0xe68a2f326a91b43cL },
+ { 0x5842775c977fe4d2L,0x78fdef5c7e2a41ebL,0x5f3bec02ff8df00eL,
+ 0xf4b840cd5852525dL,0x0870483a4e6988bdL,0x39499e39cc64b837L } },
+ /* 103 */
+ { { 0xfc05de80b08df5feL,0x0c12957c63ba0362L,0xea379414d5cf1428L,
+ 0xc559132a54ef6216L,0x33d5f12fb9e65cf8L,0x09c602781695d663L },
+ { 0x3ac1ced461f7a2fbL,0xdd838444d4f5eeb8L,0x82a38c6c8318fcadL,
+ 0x315be2e5e9f1a864L,0x317b5771442daf47L,0x81b5904a95aa5f9eL } },
+ /* 104 */
+ { { 0x6b6b1c508b21d232L,0x87f3dbc08c2cba75L,0xa7e74b46ae9f0fafL,
+ 0x036a0985bb7b8079L,0x4f185b908d974a25L,0x5aa7cef0d9af5ec9L },
+ { 0xe0566a7057dcfffcL,0x6ea311dab8453225L,0x72ea1a8d23368aa9L,
+ 0xed9b208348cd552dL,0xb987967cc80ea435L,0xad735c756c104173L } },
+ /* 105 */
+ { { 0xaea85ab3cee76ef4L,0x44997444af1d2b93L,0x0851929beacb923fL,
+ 0xb080b59051e3bc0cL,0xc4ee1d8659be68a2L,0xf00de21964b26cdaL },
+ { 0x8d7fb5c0f2e90d4dL,0x00e219a777d9ec64L,0xc4e6febd5d1c491cL,
+ 0x080e37541a8f4585L,0x4a9b86c848d2af9cL,0x2ed70db6b6679851L } },
+ /* 106 */
+ { { 0xaee44116586f25cbL,0xf7b6861fa0fcf70fL,0x55d2cd2018a350e8L,
+ 0x861bf3e592dc286fL,0x9ab18ffa6226aba7L,0xd15827bea9857b03L },
+ { 0x26c1f54792e6acefL,0x422c63c8ac1fbac3L,0xa2d8760dfcbfd71dL,
+ 0x35f6a539b2511224L,0xbaa88fa1048d1a21L,0x49f1abe9ebf999dbL } },
+ /* 107 */
+ { { 0x16f9f4f4f7492b73L,0xcf28ec1ecb392b1aL,0x45b130d469ca6ffcL,
+ 0x28ba8d40b72efa58L,0xace987c75ca066f5L,0x3e3992464ad022ebL },
+ { 0x63a2d84e752555bbL,0xaaa93b4a9c2ae394L,0xcd80424ec89539caL,
+ 0x6d6b5a6daa119a99L,0xbd50334c379f2629L,0x899e925eef3cc7d3L } },
+ /* 108 */
+ { { 0xb7ff3651bf825dc4L,0x0f741cc440b9c462L,0x771ff5a95cc4fb5bL,
+ 0xcb9e9c9b47fd56feL,0xbdf053db5626c0d3L,0xa97ce675f7e14098L },
+ { 0x68afe5a36c934f5eL,0x6cd5e148ccefc46fL,0xc7758570d7a88586L,
+ 0x49978f5edd558d40L,0xa1d5088a64ae00c1L,0x58f2a720f1d65bb2L } },
+ /* 109 */
+ { { 0x66fdda4a3e4daedbL,0x38318c1265d1b052L,0x28d910a24c4bbf5cL,
+ 0x762fe5c478a9cd14L,0x08e5ebaad2cc0aeeL,0xd2cdf257ca0c654cL },
+ { 0x48f7c58b08b717d2L,0x3807184a386cd07aL,0x3240f626ae7d0112L,
+ 0x03e9361bc43917b0L,0xf261a87620aea018L,0x53f556a47e1e6372L } },
+ /* 110 */
+ { { 0xc84cee562f512a90L,0x24b3c0041b0ea9f1L,0x0ee15d2de26cc1eaL,
+ 0xd848762cf0c9ef7dL,0x1026e9c5d5341435L,0x8f5b73dcfdb16b31L },
+ { 0x1f69bef2d2c75d95L,0x8d33d581be064ddaL,0x8c024c1257ed35e6L,
+ 0xf8d435f9c309c281L,0xfd295061d6960193L,0x66618d78e9e49541L } },
+ /* 111 */
+ { { 0x571cfd458ce382deL,0x175806eede900ddeL,0x6184996534aba3b5L,
+ 0xe899778ade7aec95L,0xe8f00f6eff4aa97fL,0xae971cb5010b0c6dL },
+ { 0x1827eebc3af788f1L,0xd46229ffe413fe2dL,0x8a15455b4741c9b4L,
+ 0x5f02e690f8e424ebL,0x40a1202edae87712L,0x49b3bda264944f6dL } },
+ /* 112 */
+ { { 0xd63c6067035b2d69L,0xb507150d6bed91b0L,0x1f35f82f7afb39b2L,
+ 0xb9bd9c0116012b66L,0x00d97960ed0a5f50L,0xed7054512716f7c9L },
+ { 0x1576eff4127abdb4L,0x6850d698f01e701cL,0x9fa7d7493fc87e2fL,
+ 0x0b6bcc6fb0ce3e48L,0xf4fbe1f5f7d8c1c0L,0xcf75230e02719cc6L } },
+ /* 113 */
+ { { 0x6761d6c2722d94edL,0xd1ec3f213718820eL,0x65a40b7025d0e7c6L,
+ 0xd67f830ebaf3cf31L,0x633b3807b93ea430L,0x17faa0ea0bc96c69L },
+ { 0xe6bf3482df866b98L,0x205c1ee9a9db52d4L,0x51ef9bbdff9ab869L,
+ 0x3863dad175eeb985L,0xef216c3bd3cf442aL,0x3fb228e3f9c8e321L } },
+ /* 114 */
+ { { 0x94f9b70c0760ac07L,0xf3c9ccae9d79bf4dL,0x73cea084c5ffc83dL,
+ 0xef50f943dc49c38eL,0xf467a2aebc9e7330L,0x5ee534b644ea7fbaL },
+ { 0x20cb627203609e7fL,0x0984435562fdc9f0L,0xaf5c8e580f1457f7L,
+ 0xd1f50a6cb4b25941L,0x77cb247c2ec82395L,0xa5f3e1e5da3dca33L } },
+ /* 115 */
+ { { 0x023489d67d85fa94L,0x0ba405372db9ce47L,0x0fdf7a1faed7aad1L,
+ 0xa57b0d739a4ccb40L,0x48fcec995b18967cL,0xf30b5b6eb7274d24L },
+ { 0x7ccb4773c81c5338L,0xb85639e6a3ed6bd0L,0x7d9df95f1d56eadaL,
+ 0xe256d57f0a1607adL,0x6da7ffdc957574d6L,0x65f8404601c7a8c4L } },
+ /* 116 */
+ { { 0x8d45d0cbcba1e7f1L,0xef0a08c002b55f64L,0x771ca31b17e19892L,
+ 0xe1843ecb4885907eL,0x67797ebc364ce16aL,0x816d2b2d8df4b338L },
+ { 0xe870b0e539aa8671L,0x9f0db3e4c102b5f5L,0x342966591720c697L,
+ 0x0ad4c89e613c0d2aL,0x1af900b2418ddd61L,0xe087ca72d336e20eL } },
+ /* 117 */
+ { { 0x222831ffaba10079L,0x0dc5f87b6d64fff2L,0x445479073e8cb330L,
+ 0xe815aaa2702a33fbL,0x338d6b2e5fba3215L,0x0f7535cb79f549c8L },
+ { 0x471ecd972ee95923L,0x1e868b37c6d1c09fL,0x2bc7b8ecc666ef4eL,
+ 0xf5416589808a4bfcL,0xf23e9ee23fbc4d2eL,0x4357236c2d75125bL } },
+ /* 118 */
+ { { 0xfe176d95ba9cdb1bL,0x45a1ca012f82791eL,0x97654af24de4cca2L,
+ 0xbdbf9d0e5cc4bcb9L,0xf6a7df50ad97ac0aL,0xc52112b061359fd6L },
+ { 0x696d9ce34f05eae3L,0x903adc02e943ac2bL,0xa90753470848be17L,
+ 0x1e20f1702a3973e5L,0xe1aacc1c6feb67e9L,0x2ca0ac32e16bc6b9L } },
+ /* 119 */
+ { { 0xffea12e4ef871eb5L,0x94c2f25da8bf0a7aL,0x4d1e4c2a78134eaaL,
+ 0x11ed16fb0360fb10L,0x4029b6db85fc11beL,0x5e9f7ab7f4d390faL },
+ { 0x5076d72f30646612L,0xa0afed1ddda1d0d8L,0x2902225785a1d103L,
+ 0xcb499e174e276bcdL,0x16d1da7151246c3dL,0xc72d56d3589a0443L } },
+ /* 120 */
+ { { 0xdf5ffc74dae5bb45L,0x99068c4a261bd6dcL,0xdc0afa7aaa98ec7bL,
+ 0xedd2ee00f121e96dL,0x163cc7be1414045cL,0xb0b1bbce335af50eL },
+ { 0xd440d78501a06293L,0xcdebab7c6552e644L,0x48cb8dbc8c757e46L,
+ 0x81f9cf783cabe3cbL,0xddd02611b123f59aL,0x3dc7b88eeeb3784dL } },
+ /* 121 */
+ { { 0xe1b8d398c4741456L,0xa9dfa9026032a121L,0x1cbfc86d1263245bL,
+ 0xf411c7625244718cL,0x96521d5405b0fc54L,0x1afab46edbaa4985L },
+ { 0xa75902ba8674b4adL,0x486b43ad5ad87d12L,0x72b1c73636e0d099L,
+ 0x39890e07bb6cd6d6L,0x8128999c59bace4eL,0xd8da430b7b535e33L } },
+ /* 122 */
+ { { 0x39f65642c6b75791L,0x050947a621806bfbL,0x0ca3e3701362ef84L,
+ 0x9bc60aed8c3d2391L,0x9b488671732e1ddcL,0x12d10d9ea98ee077L },
+ { 0xb6f2822d3651b7dcL,0x6345a5ba80abd138L,0x62033262472d3c84L,
+ 0xd54a1d40acc57527L,0x6ea46b3a424447cbL,0x5bc410572fb1a496L } },
+ /* 123 */
+ { { 0xe70c57a3a751cd0eL,0x190d8419eba3c7d6L,0xb1c3bee79d47d55aL,
+ 0xda941266f912c6d8L,0x12e9aacc407a6ad6L,0xd6ce5f116e838911L },
+ { 0x063ca97b70e1f2ceL,0xa3e47c728213d434L,0xa016e24184df810aL,
+ 0x688ad7b0dfd881a4L,0xa37d99fca89bf0adL,0xd8e3f339a23c2d23L } },
+ /* 124 */
+ { { 0xbdf53163750bed6fL,0x808abc3283e68b0aL,0x85a366275bb08a33L,
+ 0xf72a3a0f6b0e4abeL,0xf7716d19faf0c6adL,0x22dcc0205379b25fL },
+ { 0x7400bf8df9a56e11L,0x6cb8bad756a47f21L,0x7c97176f7a6eb644L,
+ 0xe8fd84f7d1f5b646L,0x98320a9444ddb054L,0x07071ba31dde86f5L } },
+ /* 125 */
+ { { 0x6fdfa0e598f8fcb9L,0x89cec8e094d0d70cL,0xa0899397106d20a8L,
+ 0x915bfb9aba8acc9cL,0x1370c94b5507e01cL,0x83246a608a821ffbL },
+ { 0xa8273a9fbe3c378fL,0x7e54478935a25be9L,0x6cfa49724dd929d7L,
+ 0x987fed9d365bd878L,0x4982ac945c29a7aeL,0x4589a5d75ddd7ec5L } },
+ /* 126 */
+ { { 0x9fabb174a95540a9L,0x7cfb886f0162c5b0L,0x17be766bea3dee18L,
+ 0xff7da41fe88e624cL,0xad0b71eb8b919c38L,0x86a522e0f31ff9a9L },
+ { 0xbc8e6f72868bc259L,0x6130c6383ccef9e4L,0x09f1f4549a466555L,
+ 0x8e6c0f0919b2bfb4L,0x945c46c90ca7bb22L,0xacd871684dafb67bL } },
+ /* 127 */
+ { { 0x090c72ca10c53841L,0xc20ae01b55a4fcedL,0x03f7ebd5e10234adL,
+ 0xb3f42a6a85892064L,0xbdbc30c0b4a14722L,0x971bc4378ca124ccL },
+ { 0x6f79f46d517ff2ffL,0x6a9c96e2ecba947bL,0x5e79f2f462925122L,
+ 0x30a96bb16a4e91f1L,0x1147c9232d4c72daL,0x65bc311f5811e4dfL } },
+ /* 128 */
+ { { 0x87c7dd7d139b3239L,0x8b57824e4d833baeL,0xbcbc48789fff0015L,
+ 0x8ffcef8b909eaf1aL,0x9905f4eef1443a78L,0x020dd4a2e15cbfedL },
+ { 0xca2969eca306d695L,0xdf940cadb93caf60L,0x67f7fab787ea6e39L,
+ 0x0d0ee10ff98c4fe5L,0xc646879ac19cb91eL,0x4b4ea50c7d1d7ab4L } },
+ /* 129 */
+ { { 0x19e409457a0db57eL,0xe6017cad9a8c9702L,0xdbf739e51be5cff9L,
+ 0x3646b3cda7a938a2L,0x0451108568350dfcL,0xad3bd6f356e098b5L },
+ { 0x935ebabfee2e3e3eL,0xfbd01702473926cbL,0x7c735b029e9fb5aaL,
+ 0xc52a1b852e3feff0L,0x9199abd3046b405aL,0xe306fcec39039971L } },
+ /* 130 */
+ { { 0xd6d9aec823e4712cL,0x7ca8376cc3c198eeL,0xe6d8318731bebd8aL,
+ 0xed57aff3d88bfef3L,0x72a645eecf44edc7L,0xd4e63d0b5cbb1517L },
+ { 0x98ce7a1cceee0ecfL,0x8f0126335383ee8eL,0x3b879078a6b455e8L,
+ 0xcbcd3d96c7658c06L,0x721d6fe70783336aL,0xf21a72635a677136L } },
+ /* 131 */
+ { { 0x19d8b3cd9586ba11L,0xd9e0aeb28a5c0480L,0xe4261dbf2230ef5cL,
+ 0x095a9dee02e6bf09L,0x8963723c80dc7784L,0x5c97dbaf145157b1L },
+ { 0x97e744344bc4503eL,0x0fb1cb3185a6b370L,0x3e8df2becd205d4bL,
+ 0x497dd1bcf8f765daL,0x92ef95c76c988a1aL,0x3f924baa64dc4cfaL } },
+ /* 132 */
+ { { 0x6bf1b8dd7268b448L,0xd4c28ba1efd79b94L,0x2fa1f8c8e4e3551fL,
+ 0x769e3ad45c9187a9L,0x28843b4d40326c0dL,0xfefc809450d5d669L },
+ { 0x30c85bfd90339366L,0x4eeb56f15ccf6c3aL,0x0e72b14928ccd1dcL,
+ 0x73ee85b5f2ce978eL,0xcdeb2bf33165bb23L,0x8106c9234e410abfL } },
+ /* 133 */
+ { { 0xc8df01617d02f4eeL,0x8a78154718e21225L,0x4ea895eb6acf9e40L,
+ 0x8b000cb56e5a633dL,0xf31d86d57e981ffbL,0xf5c8029c4475bc32L },
+ { 0x764561ce1b568973L,0x2f809b81a62996ecL,0x9e513d64da085408L,
+ 0xc27d815de61ce309L,0x0da6ff99272999e0L,0xbd284779fead73f7L } },
+ /* 134 */
+ { { 0x6033c2f99b1cdf2bL,0x2a99cf06bc5fa151L,0x7d27d25912177b3bL,
+ 0xb1f15273c4485483L,0x5fd57d81102e2297L,0x3d43e017c7f6acb7L },
+ { 0x41a8bb0b3a70eb28L,0x67de2d8e3e80b06bL,0x09245a4170c28de5L,
+ 0xad7dbcb1a7b26023L,0x70b08a352cbc6c1eL,0xb504fb669b33041fL } },
+ /* 135 */
+ { { 0xa8e85ab5f97a27c2L,0x6ac5ec8bc10a011bL,0x55745533ffbcf161L,
+ 0x01780e8565790a60L,0xe451bf8599ee75b0L,0x8907a63b39c29881L },
+ { 0x76d46738260189edL,0x284a443647bd35cbL,0xd74e8c4020cab61eL,
+ 0x6264bf8c416cf20aL,0xfa5a6c955fd820ceL,0xfa7154d0f24bb5fcL } },
+ /* 136 */
+ { { 0x18482cec9b3f5034L,0x962d445acd9e68fdL,0x266fb1d695746f23L,
+ 0xc66ade5a58c94a4bL,0xdbbda826ed68a5b6L,0x05664a4d7ab0d6aeL },
+ { 0xbcd4fe51025e32fcL,0x61a5aebfa96df252L,0xd88a07e231592a31L,
+ 0x5d9d94de98905517L,0x96bb40105fd440e7L,0x1b0c47a2e807db4cL } },
+ /* 137 */
+ { { 0x5c2a6ac808223878L,0xba08c269e65a5558L,0xd22b1b9b9bbc27fdL,
+ 0x919171bf72b9607dL,0x9ab455f9e588dc58L,0x6d54916e23662d93L },
+ { 0x8da8e9383b1de0c1L,0xa84d186a804f278fL,0xbf4988ccd3461695L,
+ 0xf5eae3bee10eb0cbL,0x1ff8b68fbf2a66edL,0xa68daf67c305b570L } },
+ /* 138 */
+ { { 0xc1004cff44b2e045L,0x91b5e1364b1c05d4L,0x53ae409088a48a07L,
+ 0x73fb2995ea11bb1aL,0x320485703d93a4eaL,0xcce45de83bfc8a5fL },
+ { 0xaff4a97ec2b3106eL,0x9069c630b6848b4fL,0xeda837a6ed76241cL,
+ 0x8a0daf136cc3f6cfL,0x199d049d3da018a8L,0xf867c6b1d9093ba3L } },
+ /* 139 */
+ { { 0xe4d42a5656527296L,0xae26c73dce71178dL,0x70a0adac6c251664L,
+ 0x813483ae5dc0ae1dL,0x7574eacddaab2dafL,0xc56b52dcc2d55f4fL },
+ { 0x872bc16795f32923L,0x4be175815bdd2a89L,0x9b57f1e7a7699f00L,
+ 0x5fcd9c723ac2de02L,0x83af3ba192377739L,0xa64d4e2bfc50b97fL } },
+ /* 140 */
+ { { 0x2172dae20e552b40L,0x62f49725d34d52e8L,0x7930ee4007958f98L,
+ 0x56da2a90751fdd74L,0xf1192834f53e48c3L,0x34d2ac268e53c343L },
+ { 0x1073c21813111286L,0x201dac14da9d9827L,0xec2c29dbee95d378L,
+ 0x9316f1191f3ee0b1L,0x7890c9f0544ce71cL,0xd77138af27612127L } },
+ /* 141 */
+ { { 0x78045e6d3b4ad1cdL,0xcd86b94e4aa49bc1L,0x57e51f1dfd677a16L,
+ 0xd9290935fa613697L,0x7a3f959334f4d893L,0x8c9c248b5d5fcf9bL },
+ { 0x9f23a4826f70d4e9L,0x1727345463190ae9L,0x4bdd7c135b081a48L,
+ 0x1e2de38928d65271L,0x0bbaaa25e5841d1fL,0xc4c18a79746772e5L } },
+ /* 142 */
+ { { 0x10ee2681593375acL,0x4f3288be7dd5e113L,0x9a97b2fb240f3538L,
+ 0xfa11089f1de6b1e2L,0x516da5621351bc58L,0x573b61192dfa85b5L },
+ { 0x89e966836cba7df5L,0xf299be158c28ab40L,0xe91c9348ad43fcbfL,
+ 0xe9bbc7cc9a1cefb3L,0xc8add876738b2775L,0x6e3b1f2e775eaa01L } },
+ /* 143 */
+ { { 0x0365a888b677788bL,0x634ae8c43fd6173cL,0x304987619e498dbeL,
+ 0x08c43e6dc8f779abL,0x068ae3844c09aca9L,0x2380c70b2018d170L },
+ { 0xcf77fbc3a297c5ecL,0xdacbc853ca457948L,0x3690de04336bec7eL,
+ 0x26bbac6414eec461L,0xd1c23c7e1f713abfL,0xf08bbfcde6fd569eL } },
+ /* 144 */
+ { { 0x5f8163f484770ee3L,0x0e0c7f94744a1706L,0x9c8f05f7e1b2d46dL,
+ 0x417eafe7d01fd99aL,0x2ba15df511440e5bL,0xdc5c552a91a6fbcfL },
+ { 0x86271d74a270f721L,0x32c0a075a004485bL,0x9d1a87e38defa075L,
+ 0xb590a7acbf0d20feL,0x430c41c28feda1f5L,0x454d287958f6ec24L } },
+ /* 145 */
+ { { 0x52b7a6357c525435L,0x3d9ef57f37c4bdbcL,0x2bb93e9edffcc475L,
+ 0xf7b8ba987710f3beL,0x42ee86da21b727deL,0x55ac3f192e490d01L },
+ { 0x487e3a6ec0c1c390L,0x036fb345446cde7bL,0x089eb276496ae951L,
+ 0xedfed4d971ed1234L,0x661b0dd5900f0b46L,0x11bd6f1b8582f0d3L } },
+ /* 146 */
+ { { 0x5cf9350f076bc9d1L,0x15d903becf3cd2c3L,0x21cfc8c225af031cL,
+ 0xe0ad32488b1cc657L,0xdd9fb96370014e87L,0xf0f3a5a1297f1658L },
+ { 0xbb908fbaf1f703aaL,0x2f9cc4202f6760baL,0x00ceec6666a38b51L,
+ 0x4deda33005d645daL,0xb9cf5c72f7de3394L,0xaeef65021ad4c906L } },
+ /* 147 */
+ { { 0x0583c8b17a19045dL,0xae7c3102d052824cL,0x2a234979ff6cfa58L,
+ 0xfe9dffc962c733c0L,0x3a7fa2509c0c4b09L,0x516437bb4fe21805L },
+ { 0x9454e3d5c2a23ddbL,0x0726d887289c104eL,0x8977d9184fd15243L,
+ 0xc559e73f6d7790baL,0x8fd3e87d465af85fL,0xa2615c745feee46bL } },
+ /* 148 */
+ { { 0xc8d607a84335167dL,0x8b42d804e0f5c887L,0x5f9f13df398d11f9L,
+ 0x5aaa508720740c67L,0x83da9a6aa3d9234bL,0xbd3a5c4e2a54bad1L },
+ { 0xdd13914c2db0f658L,0x29dcb66e5a3f373aL,0xbfd62df55245a72bL,
+ 0x19d1802391e40847L,0xd9df74dbb136b1aeL,0x72a06b6b3f93bc5bL } },
+ /* 149 */
+ { { 0x6da19ec3ad19d96fL,0xb342daa4fb2a4099L,0x0e61633a662271eaL,
+ 0x3bcece81ce8c054bL,0x7cc8e0618bd62dc6L,0xae189e19ee578d8bL },
+ { 0x73e7a25ddced1eedL,0xc1257f0a7875d3abL,0x2cb2d5a21cfef026L,
+ 0xd98ef39bb1fdf61cL,0xcd8e6f6924e83e6cL,0xd71e7076c7b7088bL } },
+ /* 150 */
+ { { 0x339368309d4245bfL,0x22d962172ac2953bL,0xb3bf5a8256c3c3cdL,
+ 0x50c9be910d0699e8L,0xec0944638f366459L,0x6c056dba513b7c35L },
+ { 0x687a6a83045ab0e3L,0x8d40b57f445c9295L,0x0f345048a16f5954L,
+ 0x64b5c6393d8f0a87L,0x106353a29f71c5e2L,0xdd58b475874f0dd4L } },
+ /* 151 */
+ { { 0x67ec084f62230c72L,0xf14f6cca481385e3L,0xf58bb4074cda7774L,
+ 0xe15011b1aa2dbb6bL,0xd488369d0c035ab1L,0xef83c24a8245f2fdL },
+ { 0xfb57328f9fdc2538L,0x79808293191fe46aL,0xe28f5c4432ede548L,
+ 0x1b3cda99ea1a022cL,0x39e639b73df2ec7fL,0x77b6272b760e9a18L } },
+ /* 152 */
+ { { 0x2b1d51bda65d56d5L,0x3a9b71f97ea696e0L,0x95250ecc9904f4c4L,
+ 0x8bc4d6ebe75774b7L,0x0e343f8aeaeeb9aaL,0xc473c1d1930e04cbL },
+ { 0x282321b1064cd8aeL,0xf4b4371e5562221cL,0xc1cc81ecd1bf1221L,
+ 0xa52a07a9e2c8082fL,0x350d8e59ba64a958L,0x29e4f3de6fb32c9aL } },
+ /* 153 */
+ { { 0x0aa9d56cba89aaa5L,0xf0208ac0c4c6059eL,0x7400d9c6bd6ddca4L,
+ 0xb384e475f2c2f74aL,0x4c1061fcb1562dd3L,0x3924e2482e153b8dL },
+ { 0xf38b8d98849808abL,0x29bf3260a491aa36L,0x85159ada88220edeL,
+ 0x8b47915bbe5bc422L,0xa934d72ed7300967L,0xc4f303982e515d0dL } },
+ /* 154 */
+ { { 0xe3e9ee421b1de38bL,0xa124e25a42636760L,0x90bf73c090165b1aL,
+ 0x21802a34146434c5L,0x54aa83f22e1fa109L,0x1d4bd03ced9c51e9L },
+ { 0xc2d96a38798751e6L,0xed27235f8c3507f5L,0xb5fb80e2c8c24f88L,
+ 0xf873eefad37f4f78L,0x7229fd74f224ba96L,0x9dcd91999edd7149L } },
+ /* 155 */
+ { { 0xee9f81a64e94f22aL,0xe5609892f71ec341L,0x6c818ddda998284eL,
+ 0x9fd472953b54b098L,0x47a6ac030e8a7cc9L,0xde684e5eb207a382L },
+ { 0x4bdd1ecd2b6b956bL,0x09084414f01b3583L,0xe2f80b3255233b14L,
+ 0x5a0fec54ef5ebc5eL,0x74cf25e6bf8b29a2L,0x1c757fa07f29e014L } },
+ /* 156 */
+ { { 0x1bcb5c4aeb0fdfe4L,0xd7c649b3f0899367L,0xaef68e3f05bc083bL,
+ 0x57a06e46a78aa607L,0xa2136ecc21223a44L,0x89bd648452f5a50bL },
+ { 0x724411b94455f15aL,0x23dfa97008a9c0fdL,0x7b0da4d16db63befL,
+ 0x6f8a7ec1fb162443L,0xc1ac9ceee98284fbL,0x085a582b33566022L } },
+ /* 157 */
+ { { 0x15cb61f9ec1f138aL,0x11c9a230668f0c28L,0xac829729df93f38fL,
+ 0xcef256984048848dL,0x3f686da02bba8fbfL,0xed5fea78111c619aL },
+ { 0x9b4f73bcd6d1c833L,0x5095160686e7bf80L,0xa2a73508042b1d51L,
+ 0x9ef6ea495fb89ec2L,0xf1008ce95ef8b892L,0x78a7e6849ae8568bL } },
+ /* 158 */
+ { { 0x3fe83a7c10470cd8L,0x92734682f86df000L,0xb5dac06bda9409b5L,
+ 0x1e7a966094939c5fL,0xdec6c1505cc116dcL,0x1a52b40866bac8ccL },
+ { 0x5303a3656e864045L,0x45eae72a9139efc1L,0x83bec6466f31d54fL,
+ 0x2fb4a86f6e958a6dL,0x6760718e4ff44030L,0x008117e3e91ae0dfL } },
+ /* 159 */
+ { { 0x5d5833ba384310a2L,0xbdfb4edc1fd6c9fcL,0xb9a4f102849c4fb8L,
+ 0xe5fb239a581c1e1fL,0xba44b2e7d0a9746dL,0x78f7b7683bd942b9L },
+ { 0x076c8ca1c87607aeL,0x82b23c2ed5caaa7eL,0x6a581f392763e461L,
+ 0xca8a5e4a3886df11L,0xc87e90cf264e7f22L,0x04f74870215cfcfcL } },
+ /* 160 */
+ { { 0x5285d116141d161cL,0x67cd2e0e93c4ed17L,0x12c62a647c36187eL,
+ 0xf5329539ed2584caL,0xc4c777c442fbbd69L,0x107de7761bdfc50aL },
+ { 0x9976dcc5e96beebdL,0xbe2aff95a865a151L,0x0e0a9da19d8872afL,
+ 0x5e357a3da63c17ccL,0xd31fdfd8e15cc67cL,0xc44bbefd7970c6d8L } },
+ /* 161 */
+ { { 0x703f83e24c0c62f1L,0x9b1e28ee4e195572L,0x6a82858bfe26ccedL,
+ 0xd381c84bc43638faL,0x94f72867a5ba43d8L,0x3b4a783d10b82743L },
+ { 0xee1ad7b57576451eL,0xc3d0b59714b6b5c8L,0x3dc30954fcacc1b8L,
+ 0x55df110e472c9d7bL,0x97c86ed702f8a328L,0xd043341388dc098fL } },
+ /* 162 */
+ { { 0x1a60d1522ca8f2feL,0x61640948491bd41fL,0x6dae29a558dfe035L,
+ 0x9a615bea278e4863L,0xbbdb44779ad7c8e5L,0x1c7066302ceac2fcL },
+ { 0x5e2b54c699699b4bL,0xb509ca6d239e17e8L,0x728165feea063a82L,
+ 0x6b5e609db6a22e02L,0x12813905b26ee1dfL,0x07b9f722439491faL } },
+ /* 163 */
+ { { 0x1592ec1448ff4e49L,0x3e4e9f176d644129L,0x7acf82881156acc0L,
+ 0x5aa34ba8bb092b0bL,0xcd0f90227d38393dL,0x416724ddea4f8187L },
+ { 0x3c4e641cc0139e73L,0xe0fe46cf91e4d87dL,0xedb3c792cab61f8aL,
+ 0x4cb46de4d3868753L,0xe449c21d20f1098aL,0x5e5fd059f5b8ea6eL } },
+ /* 164 */
+ { { 0x7fcadd4675856031L,0x89c7a4cdeaf2fbd0L,0x1af523ce7a87c480L,
+ 0xe5fc109561d9ae90L,0x3fb5864fbcdb95f5L,0xbeb5188ebb5b2c7dL },
+ { 0x3d1563c33ae65825L,0x116854c40e57d641L,0x11f73d341942ebd3L,
+ 0x24dc5904c06955b3L,0x8a0d4c83995a0a62L,0xfb26b86d5d577b7dL } },
+ /* 165 */
+ { { 0xc53108e7c686ae17L,0x9090d739d1c1da56L,0x4583b0139aec50aeL,
+ 0xdd9a088ba49a6ab2L,0x28192eeaf382f850L,0xcc8df756f5fe910eL },
+ { 0x877823a39cab7630L,0x64984a9afb8e7fc1L,0x5448ef9c364bfc16L,
+ 0xbbb4f871c44e2a9aL,0x901a41ab435c95e9L,0xc6c23e5faaa50a06L } },
+ /* 166 */
+ { { 0xb78016c19034d8ddL,0x856bb44b0b13e79bL,0x85c6409ab3241a05L,
+ 0x8d2fe19a2d78ed21L,0xdcc7c26d726eddf2L,0x3ccaff5f25104f04L },
+ { 0x397d7edc6b21f843L,0xda88e4dde975de4cL,0x5273d3964f5ab69eL,
+ 0x537680e39aae6cc0L,0xf749cce53e6f9461L,0x021ddbd9957bffd3L } },
+ /* 167 */
+ { { 0x7b64585f777233cfL,0xfe6771f60942a6f0L,0x636aba7adfe6eef0L,
+ 0x63bbeb5686038029L,0xacee5842de8fcf36L,0x48d9aa99d4a20524L },
+ { 0xcff7a74c0da5e57aL,0xc232593ce549d6c9L,0x68504bccf0f2287bL,
+ 0x6d7d098dbc8360b5L,0xeac5f1495b402f41L,0x61936f11b87d1bf1L } },
+ /* 168 */
+ { { 0xaa9da167b8153a9dL,0xa49fe3ac9e83ecf0L,0x14c18f8e1b661384L,
+ 0x61c24dab38434de1L,0x3d973c3a283dae96L,0xc99baa0182754fc9L },
+ { 0x477d198f4c26b1e3L,0x12e8e186a7516202L,0x386e52f6362addfaL,
+ 0x31e8f695c3962853L,0xdec2af136aaedb60L,0xfcfdb4c629cf74acL } },
+ /* 169 */
+ { { 0x6b3ee958cca40298L,0xc3878153f2f5d195L,0x0c565630ed2eae5bL,
+ 0xd089b37e3a697cf2L,0xc2ed2ac7ad5029eaL,0x7e5cdfad0f0dda6aL },
+ { 0xf98426dfd9b86202L,0xed1960b14335e054L,0x1fdb02463f14639eL,
+ 0x17f709c30db6c670L,0xbfc687ae773421e1L,0x13fefc4a26c1a8acL } },
+ /* 170 */
+ { { 0xe361a1987ffa0a5fL,0xf4b26102c63fe109L,0x264acbc56c74e111L,
+ 0x4af445fa77abebafL,0x448c4fdd24cddb75L,0x0b13157d44506eeaL },
+ { 0x22a6b15972e9993dL,0x2c3c57e485e5ecbeL,0xa673560bfd83e1a1L,
+ 0x6be23f82c3b8c83bL,0x40b13a9640bbe38eL,0x66eea033ad17399bL } },
+ /* 171 */
+ { { 0x49fc6e95b4c6c693L,0xefc735de36af7d38L,0xe053343d35fe42fcL,
+ 0xf0aa427c6a9ab7c3L,0xc79f04364a0fcb24L,0x1628724393ebbc50L },
+ { 0x5c3d6bd016927e1eL,0x40158ed2673b984cL,0xa7f86fc84cd48b9aL,
+ 0x1643eda660ea282dL,0x45b393eae2a1beedL,0x664c839e19571a94L } },
+ /* 172 */
+ { { 0x5774575027eeaf94L,0x2875c925ea99e1e7L,0xc127e7ba5086adeaL,
+ 0x765252a086fe424fL,0x1143cc6c2b6c0281L,0xc9bb2989d671312dL },
+ { 0x880c337c51acb0a5L,0xa3710915d3c60f78L,0x496113c09262b6edL,
+ 0x5d25d9f89ce48182L,0x53b6ad72b3813586L,0x0ea3bebc4c0e159cL } },
+ /* 173 */
+ { { 0xcaba450ac5e49beaL,0x684e54157c05da59L,0xa2e9cab9de7ac36cL,
+ 0x4ca79b5f2e6f957bL,0xef7b024709b817b1L,0xeb3049907d89df0fL },
+ { 0x508f730746fe5096L,0x695810e82e04eaafL,0x88ef1bd93512f76cL,
+ 0x776613513ebca06bL,0xf7d4863accf158b7L,0xb2a81e4494ee57daL } },
+ /* 174 */
+ { { 0xff288e5b6d53e6baL,0xa90de1a914484ea2L,0x2fadb60ced33c8ecL,
+ 0x579d6ef328b66a40L,0x4f2dd6ddec24372dL,0xe9e33fc91d66ec7dL },
+ { 0x110899d2039eab6eL,0xa31a667a3e97bb5eL,0x6200166dcfdce68eL,
+ 0xbe83ebae5137d54bL,0x085f7d874800acdfL,0xcf4ab1330c6f8c86L } },
+ /* 175 */
+ { { 0x03f65845931e08fbL,0x6438551e1506e2c0L,0x5791f0dc9c36961fL,
+ 0x68107b29e3dcc916L,0x83242374f495d2caL,0xd8cfb6636ee5895bL },
+ { 0x525e0f16a0349b1bL,0x33cd2c6c4a0fab86L,0x46c12ee82af8dda9L,
+ 0x7cc424ba71e97ad3L,0x69766ddf37621eb0L,0x95565f56a5f0d390L } },
+ /* 176 */
+ { { 0xe0e7bbf21a0f5e94L,0xf771e1151d82d327L,0x10033e3dceb111faL,
+ 0xd269744dd3426638L,0xbdf2d9da00d01ef6L,0x1cb80c71a049ceafL },
+ { 0x17f183289e21c677L,0x6452af0519c8f98bL,0x35b9c5f780b67997L,
+ 0x5c2e1cbe40f8f3d4L,0x43f9165666d667caL,0x9faaa059cf9d6e79L } },
+ /* 177 */
+ { { 0x8ad246180a078fe6L,0xf6cc73e6464fd1ddL,0x4d2ce34dc3e37448L,
+ 0x624950c5e3271b5fL,0x62910f5eefc5af72L,0x8b585bf8aa132bc6L },
+ { 0x11723985a839327fL,0x34e2d27d4aac252fL,0x402f59ef6296cc4eL,
+ 0x00ae055c47053de9L,0xfc22a97228b4f09bL,0xa9e86264fa0c180eL } },
+ /* 178 */
+ { { 0x0b7b6224bc310eccL,0x8a1a74f167fa14edL,0x87dd09607214395cL,
+ 0xdf1b3d09f5c91128L,0x39ff23c686b264a8L,0xdc2d49d03e58d4c5L },
+ { 0x2152b7d3a9d6f501L,0xf4c32e24c04094f7L,0xc6366596d938990fL,
+ 0x084d078f94fb207fL,0xfd99f1d7328594cbL,0x36defa64cb2d96b3L } },
+ /* 179 */
+ { { 0x4619b78113ed7cbeL,0x95e500159784bd0eL,0x2a32251c2c7705feL,
+ 0xa376af995f0dd083L,0x55425c6c0361a45bL,0x812d2cef1f291e7bL },
+ { 0xccf581a05fd94972L,0x26e20e39e56dc383L,0x0093685d63dbfbf0L,
+ 0x1fc164cc36b8c575L,0xb9c5ab81390ef5e7L,0x40086beb26908c66L } },
+ /* 180 */
+ { { 0xe5e54f7937e3c115L,0x69b8ee8cc1445a8aL,0x79aedff2b7659709L,
+ 0xe288e1631b46fbe6L,0xdb4844f0d18d7bb7L,0xe0ea23d048aa6424L },
+ { 0x714c0e4ef3d80a73L,0x87a0aa9e3bd64f98L,0x8844b8a82ec63080L,
+ 0xe0ac9c30255d81a3L,0x86151237455397fcL,0x0b9794642f820155L } },
+ /* 181 */
+ { { 0x127a255a4ae03080L,0x232306b4580a89fbL,0x04e8cd6a6416f539L,
+ 0xaeb70dee13b02a0eL,0xa3038cf84c09684aL,0xa710ec3c28e433eeL },
+ { 0x77a72567681b1f7dL,0x86fbce952fc28170L,0xd3408683f5735ac8L,
+ 0x3a324e2a6bd68e93L,0x7ec74353c027d155L,0xab60354cd4427177L } },
+ /* 182 */
+ { { 0x32a5342aef4c209dL,0x2ba7527408d62704L,0x4bb4af6fc825d5feL,
+ 0x1c3919ced28e7ff1L,0x1dfc2fdcde0340f6L,0xc6580baf29f33ba9L },
+ { 0xae121e7541d442cbL,0x4c7727fd3a4724e4L,0xe556d6a4524f3474L,
+ 0x87e13cc7785642a2L,0x182efbb1a17845fdL,0xdcec0cf14e144857L } },
+ /* 183 */
+ { { 0x1cb89541e9539819L,0xc8cb3b4f9d94dbf1L,0x1d353f63417da578L,
+ 0xb7a697fb8053a09eL,0x8d841731c35d8b78L,0x85748d6fb656a7a9L },
+ { 0x1fd03947c1859c5dL,0x6ce965c1535d22a2L,0x1966a13e0ca3aadcL,
+ 0x9802e41d4fb14effL,0xa9048cbb76dd3fcdL,0x89b182b5e9455bbaL } },
+ /* 184 */
+ { { 0xd777ad6a43360710L,0x841287ef55e9936bL,0xbaf5c67004a21b24L,
+ 0xf2c0725f35ad86f1L,0x338fa650c707e72eL,0x2bf8ed2ed8883e52L },
+ { 0xb0212cf4b56e0d6aL,0x50537e126843290cL,0xd8b184a198b3dc6fL,
+ 0xd2be9a350210b722L,0x407406db559781eeL,0x5a78d5910bc18534L } },
+ /* 185 */
+ { { 0x4d57aa2ad748b02cL,0xbe5b3451a12b3b95L,0xadca7a4564711258L,
+ 0x597e091a322153dbL,0xf327100632eb1eabL,0xbd9adcba2873f301L },
+ { 0xd1dc79d138543f7fL,0x00022092921b1fefL,0x86db3ef51e5df8edL,
+ 0x888cae049e6b944aL,0x71bd29ec791a32b4L,0xd3516206a6d1c13eL } },
+ /* 186 */
+ { { 0x2ef6b95255924f43L,0xd2f401ae4f9de8d5L,0xfc73e8d7adc68042L,
+ 0x627ea70c0d9d1bb4L,0xc3bb3e3ebbf35679L,0x7e8a254ad882dee4L },
+ { 0x08906f50b5924407L,0xf14a0e61a1ad444aL,0xaa0efa2165f3738eL,
+ 0xd60c7dd6ae71f161L,0x9e8390faf175894dL,0xd115cd20149f4c00L } },
+ /* 187 */
+ { { 0x2f2e2c1da52abf77L,0xc2a0dca554232568L,0xed423ea254966dccL,
+ 0xe48c93c7cd0dd039L,0x1e54a225176405c7L,0x1efb5b1670d58f2eL },
+ { 0xa751f9d994fb1471L,0xfdb31e1f67d2941dL,0xa6c74eb253733698L,
+ 0xd3155d1189a0f64aL,0x4414cfe4a4b8d2b6L,0x8d5a4be8f7a8e9e3L } },
+ /* 188 */
+ { { 0x5c96b4d452669e98L,0x4547f9228fd42a03L,0xcf5c1319d285174eL,
+ 0x805cd1ae064bffa0L,0x50e8bc4f246d27e7L,0xf89ef98fd5781e11L },
+ { 0xb4ff95f6dee0b63fL,0xad850047222663a4L,0x026918604d23ce9cL,
+ 0x3e5309ce50019f59L,0x27e6f72269a508aeL,0xe9376652267ba52cL } },
+ /* 189 */
+ { { 0xa04d289cc0368708L,0xc458872f5e306e1dL,0x76fa23de33112feaL,
+ 0x718e39746efde42eL,0xf0c98cdc1d206091L,0x5fa3ca6214a71987L },
+ { 0xeee8188bdcaa9f2aL,0x312cc732589a860dL,0xf9808dd6c63aeb1fL,
+ 0x70fd43db4ea62b53L,0x2c2bfe34890b6e97L,0x105f863cfa426aa6L } },
+ /* 190 */
+ { { 0x0b29795db38059adL,0x5686b77e90647ea0L,0xeff0470edb473a3eL,
+ 0x278d2340f9b6d1e2L,0xebbff95bbd594ec7L,0xf4b72334d3a7f23dL },
+ { 0x2a285980a5a83f0bL,0x0786c41a9716a8b3L,0x138901bd22511812L,
+ 0xd1b55221e2fede6eL,0x0806e264df4eb590L,0x6c4c897e762e462eL } },
+ /* 191 */
+ { { 0xd10b905fb4b41d9dL,0x826ca4664523a65bL,0x535bbd13b699fa37L,
+ 0x5b9933d773bc8f90L,0x9332d61fcd2118adL,0x158c693ed4a65fd0L },
+ { 0x4ddfb2a8e6806e63L,0xe31ed3ecb5de651bL,0xf9460e51819bc69aL,
+ 0x6229c0d62c76b1f8L,0xbb78f231901970a3L,0x31f3820f9cee72b8L } },
+ /* 192 */
+ { { 0xe931caf2c09e1c72L,0x0715f29812990cf4L,0x33aad81d943262d8L,
+ 0x5d292b7a73048d3fL,0xb152aaa4dc7415f6L,0xc3d10fd90fd19587L },
+ { 0xf76b35c575ddadd0L,0x9f5f4a511e7b694cL,0x2f1ab7ebc0663025L,
+ 0x01c9cc87920260b0L,0xc4b1f61a05d39da6L,0x6dcd76c4eb4a9c4eL } },
+ /* 193 */
+ { { 0x0ba0916ffdc83f01L,0x354c8b449553e4f9L,0xa6cc511affc5e622L,
+ 0xb954726ae95be787L,0xcb04811575b41a62L,0xfa2ae6cdebfde989L },
+ { 0x6376bbc70f24659aL,0x13a999fd4c289c43L,0xc7134184ec9abd8bL,
+ 0x28c02bf6a789ab04L,0xff841ebcd3e526ecL,0x442b191e640893a8L } },
+ /* 194 */
+ { { 0x4cac6c62fa2b6e20L,0x97f29e9bf6d69861L,0x228ab1dbbc96d12dL,
+ 0x6eb913275e8e108dL,0xd4b3d4d140771245L,0x61b20623ca8a803aL },
+ { 0x2c2f3b41a6a560b1L,0x879e1d403859fcf4L,0x7cdb5145024dbfc3L,
+ 0x55d08f153bfa5315L,0x2f57d773aa93823aL,0xa97f259cc6a2c9a2L } },
+ /* 195 */
+ { { 0xc306317be58edbbbL,0x25ade51c79dfdf13L,0x6b5beaf116d83dd6L,
+ 0xe8038a441dd8f925L,0x7f00143cb2a87b6bL,0xa885d00df5b438deL },
+ { 0xe9f76790cf9e48bdL,0xf0bdf9f0a5162768L,0x0436709fad7b57cbL,
+ 0x7e151c12f7c15db7L,0x3514f0225d90ee3bL,0x2e84e8032c361a8dL } },
+ /* 196 */
+ { { 0x2277607d563ec8d8L,0xa661811fe3934cb7L,0x3ca72e7af58fd5deL,
+ 0x7989da0462294c6aL,0x88b3708bf6bbefe9L,0x0d524cf753ed7c82L },
+ { 0x69f699ca2f30c073L,0xf0fa264b9dc1dcf3L,0x44ca456805f0aaf6L,
+ 0x0f5b23c7d19b9bafL,0x39193f41eabd1107L,0x9e3e10ad2a7c9b83L } },
+ /* 197 */
+ { { 0xa90824f0d4ae972fL,0x43eef02bc6e846e7L,0x7e46061229d2160aL,
+ 0x29a178acfe604e91L,0x23056f044eb184b2L,0x4fcad55feb54cdf4L },
+ { 0xa0ff96f3ae728d15L,0x8a2680c6c6a00331L,0x5f84cae07ee52556L,
+ 0x5e462c3ac5a65dadL,0x5d2b81dfe2d23f4fL,0x6e47301bc5b1eb07L } },
+ /* 198 */
+ { { 0x77411d68af8219b9L,0xcb883ce651b1907aL,0x25c87e57101383b5L,
+ 0x9c7d9859982f970dL,0xaa6abca5118305d2L,0x725fed2f9013a5dbL },
+ { 0x487cdbafababd109L,0xc0f8cf5687586528L,0xa02591e68ad58254L,
+ 0xc071b1d1debbd526L,0x927dfe8b961e7e31L,0x55f895f99263dfe1L } },
+ /* 199 */
+ { { 0xf899b00db175645bL,0x51f3a627b65b4b92L,0xa2f3ac8db67399efL,
+ 0xe717867fe400bc20L,0x42cc90201967b952L,0x3d5967513ecd1de1L },
+ { 0xd41ebcdedb979775L,0x99ba61bc6a2e7e88L,0x039149a5321504f2L,
+ 0xe7dc231427ba2fadL,0x9f556308b57d8368L,0x2b6d16c957da80a7L } },
+ /* 200 */
+ { { 0x84af5e76279ad982L,0x9bb4c92d9c8b81a6L,0xd79ad44e0e698e67L,
+ 0xe8be9048265fc167L,0xf135f7e60c3a4cccL,0xa0a10d38b8863a33L },
+ { 0xe197247cd386efd9L,0x0eefd3f9b52346c2L,0xc22415f978607bc8L,
+ 0xa2a8f862508674ceL,0xa72ad09ec8c9d607L,0xcd9f0ede50fa764fL } },
+ /* 201 */
+ { { 0x063391c7d1a46d4dL,0x2df51c119eb01693L,0xc5849800849e83deL,
+ 0x48fd09aa8ad08382L,0xa405d873aa742736L,0xee49e61ee1f9600cL },
+ { 0xd76676be48c76f73L,0xd9c100f601274b2aL,0x110bb67c83f8718dL,
+ 0xec85a42002fc0d73L,0xc0449e1e744656adL,0x28ce737637d9939bL } },
+ /* 202 */
+ { { 0x97e9af7244544ac7L,0xf2c658d5ba010426L,0x732dec39fb3adfbdL,
+ 0xd12faf91a2df0b07L,0x8ac267252171e208L,0xf820cdc85b24fa54L },
+ { 0x307a6eea94f4cf77L,0x18c783d2944a33c6L,0x4b939d4c0b741ac5L,
+ 0x1d7acd153ffbb6e4L,0x06a248587a255e44L,0x14fbc494ce336d50L } },
+ /* 203 */
+ { { 0x9b920c0c51584e3cL,0xc7733c59f7e54027L,0xe24ce13988422bbeL,
+ 0x11ada812523bd6abL,0xde068800b88e6defL,0x7b872671fe8c582dL },
+ { 0x4e746f287de53510L,0x492f8b99f7971968L,0x1ec80bc77d928ac2L,
+ 0xb3913e48432eb1b5L,0xad08486632028f6eL,0x122bb8358fc2f38bL } },
+ /* 204 */
+ { { 0x0a9f3b1e3b0b29c3L,0x837b64324fa44151L,0xb9905c9217b28ea7L,
+ 0xf39bc93798451750L,0xcd383c24ce8b6da1L,0x299f57db010620b2L },
+ { 0x7b6ac39658afdce3L,0xa15206b33d05ef47L,0xa0ae37e2b9bb02ffL,
+ 0x107760ab9db3964cL,0xe29de9a067954beaL,0x446a1ad8431c3f82L } },
+ /* 205 */
+ { { 0xc6fecea05c6b8195L,0xd744a7c5f49e71b9L,0xa8e96acc177a7ae7L,
+ 0x1a05746c358773a7L,0xa416214637567369L,0xaa0217f787d1c971L },
+ { 0x61e9d15877fd3226L,0x0f6f2304e4f600beL,0xa9c4cebc7a6dff07L,
+ 0xd15afa0109f12a24L,0x2bbadb228c863ee9L,0xa28290e4e5eb8c78L } },
+ /* 206 */
+ { { 0x55b87fa03e9de330L,0x12b26066195c145bL,0xe08536e0a920bef0L,
+ 0x7bff6f2c4d195adcL,0x7f319e9d945f4187L,0xf9848863f892ce47L },
+ { 0xd0efc1d34fe37657L,0x3c58de825cf0e45aL,0x626ad21a8b0ccbbeL,
+ 0xd2a31208af952fc5L,0x81791995eb437357L,0x5f19d30f98e95d4fL } },
+ /* 207 */
+ { { 0x72e83d9a0e6865bbL,0x22f5af3bf63456a6L,0x409e9c73463c8d9eL,
+ 0x40e9e578dfe6970eL,0x876b6efa711b91caL,0x895512cf942625a3L },
+ { 0x84c8eda8cb4e462bL,0x84c0154a4412e7c8L,0x04325db1ceb7b71fL,
+ 0x1537dde366f70877L,0xf3a093991992b9acL,0xa7316606d498ae77L } },
+ /* 208 */
+ { { 0x13990d2fcad260f5L,0x76c3be29eec0e8c0L,0x7dc5bee00f7bd7d5L,
+ 0x9be167d2efebda4bL,0xcce3dde69122b87eL,0x75a28b0982b5415cL },
+ { 0xf6810bcde84607a6L,0xc6d581286f4dbf0dL,0xfead577d1b4dafebL,
+ 0x9bc440b2066b28ebL,0x53f1da978b17e84bL,0x0459504bcda9a575L } },
+ /* 209 */
+ { { 0x13e39a02329e5836L,0x2c9e7d51f717269dL,0xc5ac58d6f26c963bL,
+ 0x3b0c6c4379967bf5L,0x60bbea3f55908d9dL,0xd84811e7f07c9ad1L },
+ { 0xfe7609a75bd20e4aL,0xe4325dd20a70baa8L,0x3711f370b3600386L,
+ 0x97f9562fd0924302L,0x040dc0c34acc4436L,0xfd6d725cde79cdd4L } },
+ /* 210 */
+ { { 0xb3efd0e3cf13eafbL,0x21009cbb5aa0ae5fL,0xe480c55379022279L,
+ 0x755cf334b2fc9a6dL,0x8564a5bf07096ae7L,0xddd649d0bd238139L },
+ { 0xd0de10b18a045041L,0x6e05b413c957d572L,0x5c5ff8064e0fb25cL,
+ 0xd933179b641162fbL,0x42d48485e57439f9L,0x70c5bd0a8a8d72aaL } },
+ /* 211 */
+ { { 0xa767173897bdf646L,0xaa1485b4ab329f7cL,0xce3e11d6f8f25fdfL,
+ 0x76a3fc7ec6221824L,0x045f281ff3924740L,0x24557d4e96d13a9aL },
+ { 0x875c804bdd4c27cdL,0x11c5f0f40f5c7feaL,0xac8c880bdc55ff7eL,
+ 0x2acddec51103f101L,0x38341a21f99faa89L,0xc7b67a2cce9d6b57L } },
+ /* 212 */
+ { { 0x9a0d724f8e357586L,0x1d7f4ff5df648da0L,0x9c3e6c9bfdee62a5L,
+ 0x0499cef00389b372L,0xe904050d98eab879L,0xe8eef1b66c051617L },
+ { 0xebf5bfebc37e3ca9L,0x7c5e946da4e0b91dL,0x790973142c4bea28L,
+ 0x81f6c109ee67b2b7L,0xaf237d9bdafc5edeL,0xd2e602012abb04c7L } },
+ /* 213 */
+ { { 0x6156060c8a4f57bfL,0xf9758696ff11182aL,0x8336773c6296ef00L,
+ 0x9c054bceff666899L,0xd6a11611719cd11cL,0x9824a641dbe1acfaL },
+ { 0x0b7b7a5fba89fd01L,0xf8d3b809889f79d8L,0xc5e1ea08f578285cL,
+ 0x7ac74536ae6d8288L,0x5d37a2007521ef5fL,0x5ecc4184b260a25dL } },
+ /* 214 */
+ { { 0xddcebb19a708c8d3L,0xe63ed04fc63f81ecL,0xd045f5a011873f95L,
+ 0x3b5ad54479f276d5L,0x81272a3d425ae5b3L,0x8bfeb50110ce1605L },
+ { 0x4233809c888228bfL,0x4bd82acfb2aff7dfL,0x9c68f1800cbd4a7fL,
+ 0xfcd771246b44323dL,0x60c0fcf6891db957L,0xcfbb4d8904da8f7fL } },
+ /* 215 */
+ { { 0x9a6a5df93b26139aL,0x3e076a83b2cc7eb8L,0x47a8e82d5a964bcdL,
+ 0x8a4e2a39b9278d6bL,0x93506c98e4443549L,0x06497a8ff1e0d566L },
+ { 0x3dee8d992b1efa05L,0x2da63ca845393e33L,0xa4af7277cf0579adL,
+ 0xaf4b46393236d8eaL,0x6ccad95b32b617f5L,0xce76d8b8b88bb124L } },
+ /* 216 */
+ { { 0x63d2537a083843dcL,0x89eb35141e4153b4L,0x5175ebc4ea9afc94L,
+ 0x7a6525808ed1aed7L,0x67295611d85e8297L,0x8dd2d68bb584b73dL },
+ { 0x237139e60133c3a4L,0x9de838ab4bd278eaL,0xe829b072c062fcd9L,
+ 0x70730d4f63ba8706L,0x6080483fd3cd05ecL,0x872ab5b80c85f84dL } },
+ /* 217 */
+ { { 0xfc0776d3999d4d49L,0xa3eb59deec3f45e7L,0xbc990e440dae1fc1L,
+ 0x33596b1ea15371ffL,0xd447dcb29bc7ab25L,0xcd5b63e935979582L },
+ { 0xae3366fa77d1ff11L,0x59f28f05edee6903L,0x6f43fed1a4433bf2L,
+ 0x15409c9bdf9ce00eL,0x21b5cdedaca9c5dcL,0xf9f3359582d7bdb4L } },
+ /* 218 */
+ { { 0x959443789422c792L,0x239ea923c958b8bfL,0x4b61a247df076541L,
+ 0x4d29ce85bb9fc544L,0x9a692a670b424559L,0x6e0ca5a00e486900L },
+ { 0x6b79a78285b3beceL,0x41f35e39c61f9892L,0xff82099aae747f82L,
+ 0x58c8ae3fd0ca59d6L,0x4ac930e299406b5fL,0x2ce04eb99df24243L } },
+ /* 219 */
+ { { 0x4366b9941ac37b82L,0xff0c728d25b04d83L,0x1f55136119c47b7cL,
+ 0xdbf2d5edbeff13e7L,0xf78efd51e12a683dL,0x82cd85b9989cf9c4L },
+ { 0xe23c6db6e0cb5d37L,0x818aeebd72ee1a15L,0x8212aafd28771b14L,
+ 0x7bc221d91def817dL,0xdac403a29445c51fL,0x711b051712c3746bL } },
+ /* 220 */
+ { { 0x0ed9ed485ea99eccL,0xf799500db8cab5e1L,0xa8ec87dcb570cbdcL,
+ 0x52cfb2c2d35dfaecL,0x8d31fae26e4d80a4L,0xe6a37dc9dcdeabe5L },
+ { 0x5d365a341deca452L,0x09a5f8a50d68b44eL,0x59238ea5a60744b1L,
+ 0xf2fedc0dbb4249e9L,0xe395c74ea909b2e3L,0xe156d1a539388250L } },
+ /* 221 */
+ { { 0xd796b3d047181ae9L,0xbaf44ba844197808L,0xe693309434cf3facL,
+ 0x41aa6adec3bd5c46L,0x4fda75d8eed947c6L,0xacd9d4129ea5a525L },
+ { 0x65cc55a3d430301bL,0x3c9a5bcf7b52ea49L,0x22d319cf159507f0L,
+ 0x2ee0b9b5de74a8ddL,0x20c26a1e877ac2b6L,0x387d73da92e7c314L } },
+ /* 222 */
+ { { 0x13c4833e8cd3fdacL,0x76fcd473332e5b8eL,0xff671b4be2fe1fd3L,
+ 0x4d734e8b5d98d8ecL,0xb1ead3c6514bbc11L,0xd14ca8587b390494L },
+ { 0x95a443af5d2d37e9L,0x73c6ea7300464622L,0xa44aeb4b15755044L,
+ 0xba3f8575fab58feeL,0x9779dbc9dc680a6fL,0xe1ee5f5a7b37ddfcL } },
+ /* 223 */
+ { { 0xcd0b464812d29f46L,0x93295b0b0ed53137L,0xbfe2609480bef6c9L,
+ 0xa656578854248b00L,0x69c43fca80e7f9c4L,0x2190837bbe141ea1L },
+ { 0x875e159aa1b26cfbL,0x90ca9f877affe852L,0x15e6550d92ca598eL,
+ 0xe3e0945d1938ad11L,0xef7636bb366ef937L,0xb6034d0bb39869e5L } },
+ /* 224 */
+ { { 0x4d255e3026d8356eL,0xf83666edd314626fL,0x421ddf61d0c8ed64L,
+ 0x96e473c526677b61L,0xdad4af7e9e9b18b3L,0xfceffd4aa9393f75L },
+ { 0x843138a111c731d5L,0x05bcb3a1b2f141d9L,0x20e1fa95617b7671L,
+ 0xbefce81288ccec7bL,0x582073dc90f1b568L,0xf572261a1f055cb7L } },
+ /* 225 */
+ { { 0xf314827736973088L,0xc008e70886a9f980L,0x1b795947e046c261L,
+ 0xdf1e6a7dca76bca0L,0xabafd88671acddf0L,0xff7054d91364d8f4L },
+ { 0x2cf63547e2260594L,0x468a5372d73b277eL,0xc7419e24ef9bd35eL,
+ 0x2b4a1c2024043cc3L,0xa28f047a890b39cdL,0xdca2cea146f9a2e3L } },
+ /* 226 */
+ { { 0xab78873653277538L,0xa734e225cf697738L,0x66ee1d1e6b22e2c1L,
+ 0x2c615389ebe1d212L,0xf36cad4002bb0766L,0x120885c33e64f207L },
+ { 0x59e77d5690fbfec2L,0xf9e781aad7a574aeL,0x801410b05d045e53L,
+ 0xd3b5f0aaa91b5f0eL,0xb3d1df007fbb3521L,0x11c4b33ec72bee9aL } },
+ /* 227 */
+ { { 0xd32b983283c3a7f3L,0x8083abcf88d8a354L,0xdeb1640450f4ec5aL,
+ 0x18d747f0641e2907L,0x4e8978aef1bbf03eL,0x932447dc88a0cd89L },
+ { 0x561e0febcf3d5897L,0xfc3a682f13600e6dL,0xc78b9d73d16a6b73L,
+ 0xe713feded29bf580L,0x0a22522308d69e5cL,0x3a924a571ff7fda4L } },
+ /* 228 */
+ { { 0xfb64554cb4093beeL,0xa6d65a25a58c6ec0L,0x4126994d43d0ed37L,
+ 0xa5689a5155152d44L,0xb8e5ea8c284caa8dL,0x33f05d4fd1f25538L },
+ { 0xe0fdfe091b615d6eL,0x2ded7e8f705507daL,0xdd5631e517bbcc80L,
+ 0x4f87453e267fd11fL,0xc6da723fff89d62dL,0x55cbcae2e3cda21dL } },
+ /* 229 */
+ { { 0x336bc94e6b4e84f3L,0x728630314ef72c35L,0x6d85fdeeeeb57f99L,
+ 0x7f4e3272a42ece1bL,0x7f86cbb536f0320aL,0xf09b6a2b923331e6L },
+ { 0x21d3ecf156778435L,0x2977ba998323b2d2L,0x6a1b57fb1704bc0fL,
+ 0xd777cf8b389f048aL,0x9ce2174fac6b42cdL,0x404e2bff09e6c55aL } },
+ /* 230 */
+ { { 0x9b9b135e204c5ddbL,0x9dbfe0443eff550eL,0x35eab4bfec3be0f6L,
+ 0x8b4c3f0d0a43e56fL,0x4c1c66730e73f9b3L,0x92ed38bd2c78c905L },
+ { 0xc7003f6aa386e27cL,0xb9c4f46faced8507L,0xea024ec859df5464L,
+ 0x4af96152429572eaL,0x279cd5e2e1fc1194L,0xaa376a03281e358cL } },
+ /* 231 */
+ { { 0x078592233cdbc95cL,0xaae1aa6aef2e337aL,0xc040108d472a8544L,
+ 0x80c853e68d037b7dL,0xd221315c8c7eee24L,0x195d38568ee47752L },
+ { 0xd4b1ba03dacd7fbeL,0x4b5ac61ed3e0c52bL,0x68d3c0526aab7b52L,
+ 0xf0d7248c660e3feaL,0xafdb3f893145efb4L,0xa73fd9a38f40936dL } },
+ /* 232 */
+ { { 0x891b9ef3bb1b17ceL,0x14023667c6127f31L,0x12b2e58d305521fdL,
+ 0x3a47e449e3508088L,0xe49fc84bff751507L,0x4023f7225310d16eL },
+ { 0xa608e5edb73399faL,0xf12632d8d532aa3eL,0x13a2758e845e8415L,
+ 0xae4b6f851fc2d861L,0x3879f5b1339d02f2L,0x446d22a680d99ebdL } },
+ /* 233 */
+ { { 0x0f5023024be164f1L,0x8d09d2d688b81920L,0x514056f1984aceffL,
+ 0xa5c4ddf075e9e80dL,0x38cb47e6df496a93L,0x899e1d6b38df6bf7L },
+ { 0x69e87e88b59eb2a6L,0x280d9d639b47f38bL,0x599411ea3654e955L,
+ 0xcf8dd4fd969aa581L,0xff5c2baf530742a7L,0xa43915361a373085L } },
+ /* 234 */
+ { { 0x6ace72a3a8a4bdd2L,0xc656cdd1b68ef702L,0xd4a33e7e90c4dad8L,
+ 0x4aece08a9d951c50L,0xea8005ae085d68e6L,0xfdd7a7d76f7502b8L },
+ { 0xce6fb0a698d6fa45L,0x228f86721104eb8cL,0xd23d8787da09d7dcL,
+ 0x5521428b2ae93065L,0x95faba3dea56c366L,0xedbe50390a88aca5L } },
+ /* 235 */
+ { { 0xd64da0adbfb26c82L,0xe5d70b3c952c2f9cL,0xf5e8f365f7e77f68L,
+ 0x7234e00208f2d695L,0xfaf900eed12e7be6L,0x27dc69344acf734eL },
+ { 0x80e4ff5ec260a46aL,0x7da5ebce2dc31c28L,0x485c5d73ca69f552L,
+ 0xcdfb6b2969cc84c2L,0x031c5afeed6d4ecaL,0xc7bbf4c822247637L } },
+ /* 236 */
+ { { 0x9d5b72c749fe01b2L,0x34785186793a91b8L,0xa3ba3c54cf460438L,
+ 0x73e8e43d3ab21b6fL,0x50cde8e0be57b8abL,0x6488b3a7dd204264L },
+ { 0xa9e398b3dddc4582L,0x1698c1a95bec46feL,0x7f1446ef156d3843L,
+ 0x3fd25dd8770329a2L,0x05b1221a2c710668L,0x65b2dc2aa72ee6cfL } },
+ /* 237 */
+ { { 0x21a885f7cd021d63L,0x3f344b15fea61f08L,0xad5ba6ddc5cf73e6L,
+ 0x154d0d8f227a8b23L,0x9b74373cdc559311L,0x4feab71598620fa1L },
+ { 0x5098938e7d9ec924L,0x84d54a5e6d47e550L,0x1a2d1bdc1b617506L,
+ 0x99fe1782615868a4L,0x171da7803005a924L,0xa70bf5ed7d8f79b6L } },
+ /* 238 */
+ { { 0x0bc1250dfe2216c5L,0x2c37e2507601b351L,0xb6300175d6f06b7eL,
+ 0x4dde8ca18bfeb9b7L,0x4f210432b82f843dL,0x8d70e2f9b1ac0afdL },
+ { 0x25c73b78aae91abbL,0x0230dca3863028f2L,0x8b923ecfe5cf30b7L,
+ 0xed754ec25506f265L,0x8e41b88c729a5e39L,0xee67cec2babf889bL } },
+ /* 239 */
+ { { 0xe183acf51be46c65L,0x9789538fe7565d7aL,0x87873391d9627b4eL,
+ 0xbf4ac4c19f1d9187L,0x5db99f634691f5c8L,0xa68df80374a1fb98L },
+ { 0x3c448ed1bf92b5faL,0xa098c8413e0bdc32L,0x8e74cd5579bf016cL,
+ 0x5df0d09c115e244dL,0x9418ad013410b66eL,0x8b6124cb17a02130L } },
+ /* 240 */
+ { { 0x425ec3afc26e3392L,0xc07f8470a1722e00L,0xdcc28190e2356b43L,
+ 0x4ed97dffb1ef59a6L,0xc22b3ad1c63028c1L,0x070723c268c18988L },
+ { 0x70da302f4cf49e7dL,0xc5e87c933f12a522L,0x74acdd1d18594148L,
+ 0xad5f73abca74124cL,0xe72e4a3ed69fd478L,0x615938687b117cc3L } },
+ /* 241 */
+ { { 0x7b7b9577a9aa0486L,0x6e41fb35a063d557L,0xb017d5c7da9047d7L,
+ 0x8c74828068a87ba9L,0xab45fa5cdf08ad93L,0xcd9fb2174c288a28L },
+ { 0x595446425747843dL,0x34d64c6ca56111e3L,0x12e47ea14bfce8d5L,
+ 0x17740e056169267fL,0x5c49438eeed03fb5L,0x9da30add4fc3f513L } },
+ /* 242 */
+ { { 0xc4e85282ccfa5200L,0x2707608f6a19b13dL,0xdcb9a53df5726e2fL,
+ 0x612407c9e9427de5L,0x3e5a17e1d54d582aL,0xb99877de655ae118L },
+ { 0x6f0e972b015254deL,0x92a56db1f0a6f7c5L,0xd297e4e1a656f8b2L,
+ 0x99fe0052ad981983L,0xd3652d2f07cfed84L,0xc784352e843c1738L } },
+ /* 243 */
+ { { 0x6ee90af07e9b2d8aL,0xac8d701857cf1964L,0xf6ed903171f28efcL,
+ 0x7f70d5a96812b20eL,0x27b557f4f1c61eeeL,0xf1c9bd57c6263758L },
+ { 0x5cf7d0142a1a6194L,0xdd614e0b1890ab84L,0x3ef9de100e93c2a6L,
+ 0xf98cf575e0cd91c5L,0x504ec0c614befc32L,0xd0513a666279d68cL } },
+ /* 244 */
+ { { 0xa8eadbada859fb6aL,0xcf8346e7db283666L,0x7b35e61a3e22e355L,
+ 0x293ece2c99639c6bL,0xfa0162e256f241c8L,0xd2e6c7b9bf7a1ddaL },
+ { 0xd0de625340075e63L,0x2405aa61f9ec8286L,0x2237830a8fe45494L,
+ 0x4fd01ac7364e9c8cL,0x4d9c3d21904ba750L,0xd589be14af1b520bL } },
+ /* 245 */
+ { { 0x13576a4f4662e53bL,0x35ec2f51f9077676L,0x66297d1397c0af97L,
+ 0xed3201fe9e598b58L,0x49bc752a5e70f604L,0xb54af535bb12d951L },
+ { 0x36ea4c2b212c1c76L,0x18f5bbc7eb250dfdL,0xa0d466cc9a0a1a46L,
+ 0x52564da4dac2d917L,0x206559f48e95fab5L,0x7487c1909ca67a33L } },
+ /* 246 */
+ { { 0x75abfe37dde98e9cL,0x99b90b262a411199L,0x1b410996dcdb1f7cL,
+ 0xab346f118b3b5675L,0x04852193f1f8ae1eL,0x1ec4d2276b8b98c1L },
+ { 0xba3bc92645452baaL,0x387d1858acc4a572L,0x9478eff6e51f171eL,
+ 0xf357077d931e1c00L,0xffee77cde54c8ca8L,0xfb4892ff551dc9a4L } },
+ /* 247 */
+ { { 0x5b1bdad02db8dff8L,0xd462f4fd5a2285a2L,0x1d6aad8eda00b461L,
+ 0x43fbefcf41306d1bL,0x428e86f36a13fe19L,0xc8b2f11817f89404L },
+ { 0x762528aaf0d51afbL,0xa3e2fea4549b1d06L,0x86fad8f2ea3ddf66L,
+ 0x0d9ccc4b4fbdd206L,0xcde97d4cc189ff5aL,0xc36793d6199f19a6L } },
+ /* 248 */
+ { { 0xea38909b51b85197L,0xffb17dd0b4c92895L,0x0eb0878b1ddb3f3fL,
+ 0xb05d28ffc57cf0f2L,0xd8bde2e71abd57e2L,0x7f2be28dc40c1b20L },
+ { 0x6554dca2299a2d48L,0x5130ba2e8377982dL,0x8863205f1071971aL,
+ 0x15ee62827cf2825dL,0xd4b6c57f03748f2bL,0xa9e3f4da430385a0L } },
+ /* 249 */
+ { { 0x33eb7cec83fbc9c6L,0x24a311c74541777eL,0xc81377f74f0767fcL,
+ 0x12adae364ab702daL,0xb7fcb6db2a779696L,0x4a6fb28401cea6adL },
+ { 0x5e8b1d2acdfc73deL,0xd0efae8d1b02fd32L,0x3f99c190d81d8519L,
+ 0x3c18f7fafc808971L,0x41f713e751b7ae7bL,0x0a4b3435f07fc3f8L } },
+ /* 250 */
+ { { 0x7dda3c4c019b7d2eL,0x631c8d1ad4dc4b89L,0x5489cd6e1cdb313cL,
+ 0xd44aed104c07bb06L,0x8f97e13a75f000d1L,0x0e9ee64fdda5df4dL },
+ { 0xeaa99f3b3e346910L,0x622f6921fa294ad7L,0x22aaa20d0d0b2fe9L,
+ 0x4fed2f991e5881baL,0x9af3b2d6c1571802L,0x919e67a8dc7ee17cL } },
+ /* 251 */
+ { { 0xc724fe4c76250533L,0x8a2080e57d817ef8L,0xa2afb0f4172c9751L,
+ 0x9b10cdeb17c0702eL,0xbf3975e3c9b7e3e9L,0x206117df1cd0cdc5L },
+ { 0xfb049e61be05ebd5L,0xeb0bb55c16c782c0L,0x13a331b8ab7fed09L,
+ 0xf6c58b1d632863f0L,0x6264ef6e4d3b6195L,0x92c51b639a53f116L } },
+ /* 252 */
+ { { 0xa57c7bc8288b364dL,0x4a562e087b41e5c4L,0x699d21c6698a9a11L,
+ 0xa4ed9581f3f849b9L,0xa223eef39eb726baL,0x13159c23cc2884f9L },
+ { 0x73931e583a3f4963L,0x965003890ada6a81L,0x3ee8a1c65ab2950bL,
+ 0xeedf4949775fab52L,0x63d652e14f2671b6L,0xfed4491c3c4e2f55L } },
+ /* 253 */
+ { { 0x335eadc3f4eb453eL,0x5ff74b63cadd1a5bL,0x6933d0d75d84a91aL,
+ 0x9ca3eeb9b49ba337L,0x1f6faccec04c15b8L,0x4ef19326dc09a7e4L },
+ { 0x53d2d3243dca3233L,0x0ee40590a2259d4bL,0x18c22edb5546f002L,
+ 0x9242980109ea6b71L,0xaada0addb0e91e61L,0x5fe53ef499963c50L } },
+ /* 254 */
+ { { 0x372dd06b90c28c65L,0x1765242c119ce47dL,0xc041fb806b22fc82L,
+ 0x667edf07b0a7ccc1L,0xc79599e71261beceL,0xbc69d9ba19cff22aL },
+ { 0x009d77cd13c06819L,0x635a66aee282b79dL,0x4edac4a6225b1be8L,
+ 0x57d4f4e4524008f9L,0xee299ac5b056af84L,0xcc38444c3a0bc386L } },
+ /* 255 */
+ { { 0x490643b1cd4c2356L,0x740a4851750547beL,0x643eaf29d4944c04L,
+ 0xba572479299a98a0L,0x48b29f16ee05fdf9L,0x33fb4f61089b2d7bL },
+ { 0x86704902a950f955L,0x97e1034dfedc3ddfL,0x211320b605fbb6a2L,
+ 0x23d7b93f432299bbL,0x1fe1a0578590e4a3L,0x8e1d0586f58c0ce6L } },
+};
+
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_6(sp_point_384* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_384_ecc_mulmod_stripe_6(r, &p384_base, p384_table,
+ k, map, heap);
+}
+
+#ifdef HAVE_INTEL_AVX2
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r Resulting point.
+ * k Scalar to multiply by.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_avx2_6(sp_point_384* r, const sp_digit* k,
+ int map, void* heap)
+{
+ return sp_384_ecc_mulmod_stripe_avx2_6(r, &p384_base, p384_table,
+ k, map, heap);
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * km Scalar to multiply by.
+ * r Resulting point.
+ * map Indicates whether to convert result to affine.
+ * heap Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[6];
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ err = sp_384_point_new_6(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 6, km);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_384_ecc_mulmod_base_avx2_6(point, k, map, heap);
+ else
+#endif
+ err = sp_384_ecc_mulmod_base_6(point, k, map, heap);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_6(point, r);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_6(point, 0, heap);
+
+ return err;
+}
+
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
+ defined(HAVE_ECC_VERIFY)
+/* Returns 1 if the number of zero.
+ * Implementation is constant time.
+ *
+ * a Number to check.
+ * returns 1 if the number is zero and 0 otherwise.
+ */
+static int sp_384_iszero_6(const sp_digit* a)
+{
+ return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5]) == 0;
+}
+
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+extern void sp_384_add_one_6(sp_digit* a);
+extern void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n);
+/* Generates a scalar that is in the range 1..order-1.
+ *
+ * rng Random number generator.
+ * k Scalar value.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+static int sp_384_ecc_gen_k_6(WC_RNG* rng, sp_digit* k)
+{
+ int err;
+ byte buf[48];
+
+ do {
+ err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
+ if (err == 0) {
+ sp_384_from_bin(k, 6, buf, (int)sizeof(buf));
+ if (sp_384_cmp_6(k, p384_order2) < 0) {
+ sp_384_add_one_6(k);
+ break;
+ }
+ }
+ }
+ while (err == 0);
+
+ return err;
+}
+
+/* Makes a random EC key pair.
+ *
+ * rng Random number generator.
+ * priv Generated private value.
+ * pub Generated public point.
+ * heap Heap to use for allocation.
+ * returns ECC_INF_E when the point does not have the correct order, RNG
+ * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[6];
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_384 inf;
+#endif
+#endif
+ sp_point_384* point;
+ sp_digit* k = NULL;
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_point_384* infinity;
+#endif
+ int err;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ (void)heap;
+
+ err = sp_384_point_new_6(heap, p, point);
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_6(heap, inf, infinity);
+ }
+#endif
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_384_ecc_gen_k_6(rng, k);
+ }
+ if (err == MP_OKAY) {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_384_ecc_mulmod_base_avx2_6(point, k, 1, NULL);
+ else
+#endif
+ err = sp_384_ecc_mulmod_base_6(point, k, 1, NULL);
+ }
+
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ if (err == MP_OKAY) {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ err = sp_384_ecc_mulmod_avx2_6(infinity, point, p384_order, 1,
+ NULL);
+ }
+ else
+#endif
+ err = sp_384_ecc_mulmod_6(infinity, point, p384_order, 1, NULL);
+ }
+ if (err == MP_OKAY) {
+ if ((sp_384_iszero_6(point->x) == 0) || (sp_384_iszero_6(point->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(k, priv);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_point_to_ecc_point_6(point, pub);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
+ sp_384_point_free_6(infinity, 1, heap);
+#endif
+ sp_384_point_free_6(point, 1, heap);
+
+ return err;
+}
+
+#ifdef HAVE_ECC_DHE
+extern void sp_384_to_bin(sp_digit* r, byte* a);
+/* Multiply the point by the scalar and serialize the X ordinate.
+ * The number is 0 padded to maximum size on output.
+ *
+ * priv Scalar to multiply the point by.
+ * pub Point to multiply.
+ * out Buffer to hold X ordinate.
+ * outLen On entry, size of the buffer in bytes.
+ * On exit, length of data in buffer in bytes.
+ * heap Heap to use for allocation.
+ * returns BUFFER_E if the buffer is to small for output size,
+ * MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out,
+ word32* outLen, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 p;
+ sp_digit kd[6];
+#endif
+ sp_point_384* point = NULL;
+ sp_digit* k = NULL;
+ int err = MP_OKAY;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ if (*outLen < 48U) {
+ err = BUFFER_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_6(heap, p, point);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (k == NULL)
+ err = MEMORY_E;
+ }
+#else
+ k = kd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(k, 6, priv);
+ sp_384_point_from_ecc_point_6(point, pub);
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_384_ecc_mulmod_avx2_6(point, point, k, 1, heap);
+ else
+#endif
+ err = sp_384_ecc_mulmod_6(point, point, k, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ sp_384_to_bin(point->x, out);
+ *outLen = 48;
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (k != NULL) {
+ XFREE(k, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_6(point, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_DHE */
+
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef HAVE_INTEL_AVX2
+#endif /* HAVE_INTEL_AVX2 */
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+extern sp_digit sp_384_sub_in_place_6(sp_digit* a, const sp_digit* b);
+extern sp_digit sp_384_cond_sub_avx2_6(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m);
+extern void sp_384_mul_d_6(sp_digit* r, const sp_digit* a, sp_digit b);
+extern void sp_384_mul_d_avx2_6(sp_digit* r, const sp_digit* a, const sp_digit b);
+/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
+ *
+ * d1 The high order half of the number to divide.
+ * d0 The low order half of the number to divide.
+ * div The dividend.
+ * returns the result of the division.
+ */
+static WC_INLINE sp_digit div_384_word_6(sp_digit d1, sp_digit d0,
+ sp_digit div)
+{
+ register sp_digit r asm("rax");
+ __asm__ __volatile__ (
+ "divq %3"
+ : "=a" (r)
+ : "d" (d1), "a" (d0), "r" (div)
+ :
+ );
+ return r;
+}
+/* AND m into each word of a and store in r.
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * m Mask to AND against each digit.
+ */
+static void sp_384_mask_6(sp_digit* r, const sp_digit* a, sp_digit m)
+{
+#ifdef WOLFSSL_SP_SMALL
+ int i;
+
+ for (i=0; i<6; i++) {
+ r[i] = a[i] & m;
+ }
+#else
+ r[0] = a[0] & m;
+ r[1] = a[1] & m;
+ r[2] = a[2] & m;
+ r[3] = a[3] & m;
+ r[4] = a[4] & m;
+ r[5] = a[5] & m;
+#endif
+}
+
+/* Divide d in a and put remainder into r (m*d + r = a)
+ * m is not calculated as it is not needed at this time.
+ *
+ * a Nmber to be divided.
+ * d Number to divide with.
+ * m Multiplier result.
+ * r Remainder from the division.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_384_div_6(const sp_digit* a, const sp_digit* d, sp_digit* m,
+ sp_digit* r)
+{
+ sp_digit t1[12], t2[7];
+ sp_digit div, r1;
+ int i;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ (void)m;
+
+ div = d[5];
+ XMEMCPY(t1, a, sizeof(*t1) * 2 * 6);
+ r1 = sp_384_cmp_6(&t1[6], d) >= 0;
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_384_cond_sub_avx2_6(&t1[6], &t1[6], d, (sp_digit)0 - r1);
+ else
+#endif
+ sp_384_cond_sub_6(&t1[6], &t1[6], d, (sp_digit)0 - r1);
+ for (i=5; i>=0; i--) {
+ r1 = div_384_word_6(t1[6 + i], t1[6 + i - 1], div);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_384_mul_d_avx2_6(t2, d, r1);
+ else
+#endif
+ sp_384_mul_d_6(t2, d, r1);
+ t1[6 + i] += sp_384_sub_in_place_6(&t1[i], t2);
+ t1[6 + i] -= t2[6];
+ sp_384_mask_6(t2, d, t1[6 + i]);
+ t1[6 + i] += sp_384_add_6(&t1[i], &t1[i], t2);
+ sp_384_mask_6(t2, d, t1[6 + i]);
+ t1[6 + i] += sp_384_add_6(&t1[i], &t1[i], t2);
+ }
+
+ r1 = sp_384_cmp_6(t1, d) >= 0;
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_384_cond_sub_avx2_6(r, t1, d, (sp_digit)0 - r1);
+ else
+#endif
+ sp_384_cond_sub_6(r, t1, d, (sp_digit)0 - r1);
+
+ return MP_OKAY;
+}
+
+/* Reduce a modulo m into r. (r = a mod m)
+ *
+ * r A single precision number that is the reduced result.
+ * a A single precision number that is to be reduced.
+ * m A single precision number that is the modulus to reduce with.
+ * returns MP_OKAY indicating success.
+ */
+static WC_INLINE int sp_384_mod_6(sp_digit* r, const sp_digit* a, const sp_digit* m)
+{
+ return sp_384_div_6(a, m, NULL, r);
+}
+
+#endif
+#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#ifdef WOLFSSL_SP_SMALL
+/* Order-2 for the P384 curve. */
+static const uint64_t p384_order_minus_2[6] = {
+ 0xecec196accc52971U,0x581a0db248b0a77aU,0xc7634d81f4372ddfU,
+ 0xffffffffffffffffU,0xffffffffffffffffU,0xffffffffffffffffU
+};
+#else
+/* The low half of the order-2 of the P384 curve. */
+static const uint64_t p384_order_low[3] = {
+ 0xecec196accc52971U,0x581a0db248b0a77aU,0xc7634d81f4372ddfU
+
+};
+#endif /* WOLFSSL_SP_SMALL */
+
+/* Multiply two number mod the order of P384 curve. (r = a * b mod order)
+ *
+ * r Result of the multiplication.
+ * a First operand of the multiplication.
+ * b Second operand of the multiplication.
+ */
+static void sp_384_mont_mul_order_6(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_384_mul_6(r, a, b);
+ sp_384_mont_reduce_order_6(r, p384_order, p384_mp_order);
+}
+
+/* Square number mod the order of P384 curve. (r = a * a mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_384_mont_sqr_order_6(sp_digit* r, const sp_digit* a)
+{
+ sp_384_sqr_6(r, a);
+ sp_384_mont_reduce_order_6(r, p384_order, p384_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P384 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_384_mont_sqr_n_order_6(sp_digit* r, const sp_digit* a, int n)
+{
+ int i;
+
+ sp_384_mont_sqr_order_6(r, a);
+ for (i=1; i<n; i++) {
+ sp_384_mont_sqr_order_6(r, r);
+ }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P384 curve.
+ * (r = 1 / a mod order)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_384_mont_inv_order_6(sp_digit* r, const sp_digit* a,
+ sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 6);
+ for (i=382; i>=0; i--) {
+ sp_384_mont_sqr_order_6(t, t);
+ if ((p384_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_384_mont_mul_order_6(t, t, a);
+ }
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 6U);
+#else
+ sp_digit* t = td;
+ sp_digit* t2 = td + 2 * 6;
+ sp_digit* t3 = td + 4 * 6;
+ int i;
+
+ /* t = a^2 */
+ sp_384_mont_sqr_order_6(t, a);
+ /* t = a^3 = t * a */
+ sp_384_mont_mul_order_6(t, t, a);
+ /* t2= a^c = t ^ 2 ^ 2 */
+ sp_384_mont_sqr_n_order_6(t2, t, 2);
+ /* t = a^f = t2 * t */
+ sp_384_mont_mul_order_6(t, t2, t);
+ /* t2= a^f0 = t ^ 2 ^ 4 */
+ sp_384_mont_sqr_n_order_6(t2, t, 4);
+ /* t = a^ff = t2 * t */
+ sp_384_mont_mul_order_6(t, t2, t);
+ /* t2= a^ff00 = t ^ 2 ^ 8 */
+ sp_384_mont_sqr_n_order_6(t2, t, 8);
+ /* t3= a^ffff = t2 * t */
+ sp_384_mont_mul_order_6(t3, t2, t);
+ /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */
+ sp_384_mont_sqr_n_order_6(t2, t3, 16);
+ /* t = a^ffffffff = t2 * t3 */
+ sp_384_mont_mul_order_6(t, t2, t3);
+ /* t2= a^ffffffff0000 = t ^ 2 ^ 16 */
+ sp_384_mont_sqr_n_order_6(t2, t, 16);
+ /* t = a^ffffffffffff = t2 * t3 */
+ sp_384_mont_mul_order_6(t, t2, t3);
+ /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48 */
+ sp_384_mont_sqr_n_order_6(t2, t, 48);
+ /* t= a^fffffffffffffffffffffffff = t2 * t */
+ sp_384_mont_mul_order_6(t, t2, t);
+ /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */
+ sp_384_mont_sqr_n_order_6(t2, t, 96);
+ /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */
+ sp_384_mont_mul_order_6(t2, t2, t);
+ for (i=191; i>=1; i--) {
+ sp_384_mont_sqr_order_6(t2, t2);
+ if (((sp_digit)p384_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_384_mont_mul_order_6(t2, t2, a);
+ }
+ }
+ sp_384_mont_sqr_order_6(t2, t2);
+ sp_384_mont_mul_order_6(r, t2, a);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#ifdef HAVE_INTEL_AVX2
+/* Multiply two number mod the order of P384 curve. (r = a * b mod order)
+ *
+ * r Result of the multiplication.
+ * a First operand of the multiplication.
+ * b Second operand of the multiplication.
+ */
+static void sp_384_mont_mul_order_avx2_6(sp_digit* r, const sp_digit* a, const sp_digit* b)
+{
+ sp_384_mul_avx2_6(r, a, b);
+ sp_384_mont_reduce_order_avx2_6(r, p384_order, p384_mp_order);
+}
+
+/* Square number mod the order of P384 curve. (r = a * a mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_384_mont_sqr_order_avx2_6(sp_digit* r, const sp_digit* a)
+{
+ sp_384_sqr_avx2_6(r, a);
+ sp_384_mont_reduce_order_avx2_6(r, p384_order, p384_mp_order);
+}
+
+#ifndef WOLFSSL_SP_SMALL
+/* Square number mod the order of P384 curve a number of times.
+ * (r = a ^ n mod order)
+ *
+ * r Result of the squaring.
+ * a Number to square.
+ */
+static void sp_384_mont_sqr_n_order_avx2_6(sp_digit* r, const sp_digit* a, int n)
+{
+ int i;
+
+ sp_384_mont_sqr_order_avx2_6(r, a);
+ for (i=1; i<n; i++) {
+ sp_384_mont_sqr_order_avx2_6(r, r);
+ }
+}
+#endif /* !WOLFSSL_SP_SMALL */
+
+/* Invert the number, in Montgomery form, modulo the order of the P384 curve.
+ * (r = 1 / a mod order)
+ *
+ * r Inverse result.
+ * a Number to invert.
+ * td Temporary data.
+ */
+static void sp_384_mont_inv_order_avx2_6(sp_digit* r, const sp_digit* a,
+ sp_digit* td)
+{
+#ifdef WOLFSSL_SP_SMALL
+ sp_digit* t = td;
+ int i;
+
+ XMEMCPY(t, a, sizeof(sp_digit) * 6);
+ for (i=382; i>=0; i--) {
+ sp_384_mont_sqr_order_avx2_6(t, t);
+ if ((p384_order_minus_2[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_384_mont_mul_order_avx2_6(t, t, a);
+ }
+ }
+ XMEMCPY(r, t, sizeof(sp_digit) * 6U);
+#else
+ sp_digit* t = td;
+ sp_digit* t2 = td + 2 * 6;
+ sp_digit* t3 = td + 4 * 6;
+ int i;
+
+ /* t = a^2 */
+ sp_384_mont_sqr_order_avx2_6(t, a);
+ /* t = a^3 = t * a */
+ sp_384_mont_mul_order_avx2_6(t, t, a);
+ /* t2= a^c = t ^ 2 ^ 2 */
+ sp_384_mont_sqr_n_order_avx2_6(t2, t, 2);
+ /* t = a^f = t2 * t */
+ sp_384_mont_mul_order_avx2_6(t, t2, t);
+ /* t2= a^f0 = t ^ 2 ^ 4 */
+ sp_384_mont_sqr_n_order_avx2_6(t2, t, 4);
+ /* t = a^ff = t2 * t */
+ sp_384_mont_mul_order_avx2_6(t, t2, t);
+ /* t2= a^ff00 = t ^ 2 ^ 8 */
+ sp_384_mont_sqr_n_order_avx2_6(t2, t, 8);
+ /* t3= a^ffff = t2 * t */
+ sp_384_mont_mul_order_avx2_6(t3, t2, t);
+ /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */
+ sp_384_mont_sqr_n_order_avx2_6(t2, t3, 16);
+ /* t = a^ffffffff = t2 * t3 */
+ sp_384_mont_mul_order_avx2_6(t, t2, t3);
+ /* t2= a^ffffffff0000 = t ^ 2 ^ 16 */
+ sp_384_mont_sqr_n_order_avx2_6(t2, t, 16);
+ /* t = a^ffffffffffff = t2 * t3 */
+ sp_384_mont_mul_order_avx2_6(t, t2, t3);
+ /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48 */
+ sp_384_mont_sqr_n_order_avx2_6(t2, t, 48);
+ /* t= a^fffffffffffffffffffffffff = t2 * t */
+ sp_384_mont_mul_order_avx2_6(t, t2, t);
+ /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */
+ sp_384_mont_sqr_n_order_avx2_6(t2, t, 96);
+ /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */
+ sp_384_mont_mul_order_avx2_6(t2, t2, t);
+ for (i=191; i>=1; i--) {
+ sp_384_mont_sqr_order_avx2_6(t2, t2);
+ if (((sp_digit)p384_order_low[i / 64] & ((sp_int_digit)1 << (i % 64))) != 0) {
+ sp_384_mont_mul_order_avx2_6(t2, t2, a);
+ }
+ }
+ sp_384_mont_sqr_order_avx2_6(t2, t2);
+ sp_384_mont_mul_order_avx2_6(r, t2, a);
+#endif /* WOLFSSL_SP_SMALL */
+}
+
+#endif /* HAVE_INTEL_AVX2 */
+#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#ifdef HAVE_ECC_SIGN
+#ifndef SP_ECC_MAX_SIG_GEN
+#define SP_ECC_MAX_SIG_GEN 64
+#endif
+
+/* Sign the hash using the private key.
+ * e = [hash, 384 bits] from binary
+ * r = (k.G)->x mod order
+ * s = (r * x + e) / k mod order
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
+ mp_int* rm, mp_int* sm, mp_int* km, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit ed[2*6];
+ sp_digit xd[2*6];
+ sp_digit kd[2*6];
+ sp_digit rd[2*6];
+ sp_digit td[3 * 2*6];
+ sp_point_384 p;
+#endif
+ sp_digit* e = NULL;
+ sp_digit* x = NULL;
+ sp_digit* k = NULL;
+ sp_digit* r = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_384* point = NULL;
+ sp_digit carry;
+ sp_digit* s = NULL;
+ sp_digit* kInv = NULL;
+ int err = MP_OKAY;
+ int64_t c;
+ int i;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ (void)heap;
+
+ err = sp_384_point_new_6(heap, p, point);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ e = d + 0 * 6;
+ x = d + 2 * 6;
+ k = d + 4 * 6;
+ r = d + 6 * 6;
+ tmp = d + 8 * 6;
+#else
+ e = ed;
+ x = xd;
+ k = kd;
+ r = rd;
+ tmp = td;
+#endif
+ s = e;
+ kInv = k;
+
+ if (hashLen > 48U) {
+ hashLen = 48U;
+ }
+
+ sp_384_from_bin(e, 6, hash, (int)hashLen);
+ }
+
+ for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+ sp_384_from_mp(x, 6, priv);
+
+ /* New random point. */
+ if (km == NULL || mp_iszero(km)) {
+ err = sp_384_ecc_gen_k_6(rng, k);
+ }
+ else {
+ sp_384_from_mp(k, 6, km);
+ mp_zero(km);
+ }
+ if (err == MP_OKAY) {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_384_ecc_mulmod_base_avx2_6(point, k, 1, heap);
+ else
+#endif
+ err = sp_384_ecc_mulmod_base_6(point, k, 1, NULL);
+ }
+
+ if (err == MP_OKAY) {
+ /* r = point->x mod order */
+ XMEMCPY(r, point->x, sizeof(sp_digit) * 6U);
+ sp_384_norm_6(r);
+ c = sp_384_cmp_6(r, p384_order);
+ sp_384_cond_sub_6(r, r, p384_order, 0L - (sp_digit)(c >= 0));
+ sp_384_norm_6(r);
+
+ /* Conv k to Montgomery form (mod order) */
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_384_mul_avx2_6(k, k, p384_norm_order);
+ else
+#endif
+ sp_384_mul_6(k, k, p384_norm_order);
+ err = sp_384_mod_6(k, k, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_6(k);
+ /* kInv = 1/k mod order */
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_384_mont_inv_order_avx2_6(kInv, k, tmp);
+ else
+#endif
+ sp_384_mont_inv_order_6(kInv, k, tmp);
+ sp_384_norm_6(kInv);
+
+ /* s = r * x + e */
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_384_mul_avx2_6(x, x, r);
+ else
+#endif
+ sp_384_mul_6(x, x, r);
+ err = sp_384_mod_6(x, x, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_6(x);
+ carry = sp_384_add_6(s, e, x);
+ sp_384_cond_sub_6(s, s, p384_order, 0 - carry);
+ sp_384_norm_6(s);
+ c = sp_384_cmp_6(s, p384_order);
+ sp_384_cond_sub_6(s, s, p384_order, 0L - (sp_digit)(c >= 0));
+ sp_384_norm_6(s);
+
+ /* s = s * k^-1 mod order */
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_384_mont_mul_order_avx2_6(s, s, kInv);
+ else
+#endif
+ sp_384_mont_mul_order_6(s, s, kInv);
+ sp_384_norm_6(s);
+
+ /* Check that signature is usable. */
+ if (sp_384_iszero_6(s) == 0) {
+ break;
+ }
+ }
+ }
+
+ if (i == 0) {
+ err = RNG_FAILURE_E;
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(r, rm);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(s, sm);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XMEMSET(d, 0, sizeof(sp_digit) * 8 * 6);
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#else
+ XMEMSET(e, 0, sizeof(sp_digit) * 2U * 6U);
+ XMEMSET(x, 0, sizeof(sp_digit) * 2U * 6U);
+ XMEMSET(k, 0, sizeof(sp_digit) * 2U * 6U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 6U);
+ XMEMSET(r, 0, sizeof(sp_digit) * 2U * 6U);
+ XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 6U);
+#endif
+ sp_384_point_free_6(point, 1, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_SIGN */
+
+#ifdef HAVE_ECC_VERIFY
+/* Verify the signature values with the hash and public key.
+ * e = Truncate(hash, 384)
+ * u1 = e/s mod order
+ * u2 = r/s mod order
+ * r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 384 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX,
+ mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit u1d[2*6];
+ sp_digit u2d[2*6];
+ sp_digit sd[2*6];
+ sp_digit tmpd[2*6 * 5];
+ sp_point_384 p1d;
+ sp_point_384 p2d;
+#endif
+ sp_digit* u1 = NULL;
+ sp_digit* u2 = NULL;
+ sp_digit* s = NULL;
+ sp_digit* tmp = NULL;
+ sp_point_384* p1;
+ sp_point_384* p2 = NULL;
+ sp_digit carry;
+ int64_t c;
+ int err;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ err = sp_384_point_new_6(heap, p1d, p1);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_6(heap, p2d, p2);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ u1 = d + 0 * 6;
+ u2 = d + 2 * 6;
+ s = d + 4 * 6;
+ tmp = d + 6 * 6;
+#else
+ u1 = u1d;
+ u2 = u2d;
+ s = sd;
+ tmp = tmpd;
+#endif
+
+ if (hashLen > 48U) {
+ hashLen = 48U;
+ }
+
+ sp_384_from_bin(u1, 6, hash, (int)hashLen);
+ sp_384_from_mp(u2, 6, r);
+ sp_384_from_mp(s, 6, sm);
+ sp_384_from_mp(p2->x, 6, pX);
+ sp_384_from_mp(p2->y, 6, pY);
+ sp_384_from_mp(p2->z, 6, pZ);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ sp_384_mul_avx2_6(s, s, p384_norm_order);
+ }
+ else
+#endif
+ {
+ sp_384_mul_6(s, s, p384_norm_order);
+ }
+ err = sp_384_mod_6(s, s, p384_order);
+ }
+ if (err == MP_OKAY) {
+ sp_384_norm_6(s);
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ sp_384_mont_inv_order_avx2_6(s, s, tmp);
+ sp_384_mont_mul_order_avx2_6(u1, u1, s);
+ sp_384_mont_mul_order_avx2_6(u2, u2, s);
+ }
+ else
+#endif
+ {
+ sp_384_mont_inv_order_6(s, s, tmp);
+ sp_384_mont_mul_order_6(u1, u1, s);
+ sp_384_mont_mul_order_6(u2, u2, s);
+ }
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_384_ecc_mulmod_base_avx2_6(p1, u1, 0, heap);
+ else
+#endif
+ err = sp_384_ecc_mulmod_base_6(p1, u1, 0, heap);
+ }
+ if (err == MP_OKAY) {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_384_ecc_mulmod_avx2_6(p2, p2, u2, 0, heap);
+ else
+#endif
+ err = sp_384_ecc_mulmod_6(p2, p2, u2, 0, heap);
+ }
+
+ if (err == MP_OKAY) {
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ sp_384_proj_point_add_avx2_6(p1, p1, p2, tmp);
+ if (sp_384_iszero_6(p1->z)) {
+ if (sp_384_iszero_6(p1->x) && sp_384_iszero_6(p1->y)) {
+ sp_384_proj_point_dbl_avx2_6(p1, p2, tmp);
+ }
+ else {
+ /* Y ordinate is not used from here - don't set. */
+ p1->x[0] = 0;
+ p1->x[1] = 0;
+ p1->x[2] = 0;
+ p1->x[3] = 0;
+ p1->x[4] = 0;
+ p1->x[5] = 0;
+ XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod));
+ }
+ }
+ }
+ else
+#endif
+ {
+ sp_384_proj_point_add_6(p1, p1, p2, tmp);
+ if (sp_384_iszero_6(p1->z)) {
+ if (sp_384_iszero_6(p1->x) && sp_384_iszero_6(p1->y)) {
+ sp_384_proj_point_dbl_6(p1, p2, tmp);
+ }
+ else {
+ /* Y ordinate is not used from here - don't set. */
+ p1->x[0] = 0;
+ p1->x[1] = 0;
+ p1->x[2] = 0;
+ p1->x[3] = 0;
+ p1->x[4] = 0;
+ p1->x[5] = 0;
+ XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod));
+ }
+ }
+ }
+
+ /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
+ /* Reload r and convert to Montgomery form. */
+ sp_384_from_mp(u2, 6, r);
+ err = sp_384_mod_mul_norm_6(u2, u2, p384_mod);
+ }
+
+ if (err == MP_OKAY) {
+ /* u1 = r.z'.z' mod prime */
+ sp_384_mont_sqr_6(p1->z, p1->z, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(u1, u2, p1->z, p384_mod, p384_mp_mod);
+ *res = (int)(sp_384_cmp_6(p1->x, u1) == 0);
+ if (*res == 0) {
+ /* Reload r and add order. */
+ sp_384_from_mp(u2, 6, r);
+ carry = sp_384_add_6(u2, u2, p384_order);
+ /* Carry means result is greater than mod and is not valid. */
+ if (carry == 0) {
+ sp_384_norm_6(u2);
+
+ /* Compare with mod and if greater or equal then not valid. */
+ c = sp_384_cmp_6(u2, p384_mod);
+ if (c < 0) {
+ /* Convert to Montogomery form */
+ err = sp_384_mod_mul_norm_6(u2, u2, p384_mod);
+ if (err == MP_OKAY) {
+ /* u1 = (r + 1*order).z'.z' mod prime */
+ sp_384_mont_mul_6(u1, u2, p1->z, p384_mod,
+ p384_mp_mod);
+ *res = (int)(sp_384_cmp_6(p1->x, u1) == 0);
+ }
+ }
+ }
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL)
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+#endif
+ sp_384_point_free_6(p1, 0, heap);
+ sp_384_point_free_6(p2, 0, heap);
+
+ return err;
+}
+#endif /* HAVE_ECC_VERIFY */
+
+#ifdef HAVE_ECC_CHECK_KEY
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * point EC point.
+ * heap Heap to use if dynamically allocating.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+static int sp_384_ecc_is_point_6(sp_point_384* point, void* heap)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d = NULL;
+#else
+ sp_digit t1d[2*6];
+ sp_digit t2d[2*6];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ int err = MP_OKAY;
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6 * 4, heap, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 6;
+ t2 = d + 2 * 6;
+#else
+ (void)heap;
+
+ t1 = t1d;
+ t2 = t2d;
+#endif
+
+ sp_384_sqr_6(t1, point->y);
+ (void)sp_384_mod_6(t1, t1, p384_mod);
+ sp_384_sqr_6(t2, point->x);
+ (void)sp_384_mod_6(t2, t2, p384_mod);
+ sp_384_mul_6(t2, t2, point->x);
+ (void)sp_384_mod_6(t2, t2, p384_mod);
+ (void)sp_384_sub_6(t2, p384_mod, t2);
+ sp_384_mont_add_6(t1, t1, t2, p384_mod);
+
+ sp_384_mont_add_6(t1, t1, point->x, p384_mod);
+ sp_384_mont_add_6(t1, t1, point->x, p384_mod);
+ sp_384_mont_add_6(t1, t1, point->x, p384_mod);
+
+ if (sp_384_cmp_6(t1, p384_b) != 0) {
+ err = MP_VAL;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+/* Check that the x and y oridinates are a valid point on the curve.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve and MP_OKAY otherwise.
+ */
+int sp_ecc_is_point_384(mp_int* pX, mp_int* pY)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_point_384 pubd;
+#endif
+ sp_point_384* pub;
+ byte one[1] = { 1 };
+ int err;
+
+ err = sp_384_point_new_6(NULL, pubd, pub);
+ if (err == MP_OKAY) {
+ sp_384_from_mp(pub->x, 6, pX);
+ sp_384_from_mp(pub->y, 6, pY);
+ sp_384_from_bin(pub->z, 6, one, (int)sizeof(one));
+
+ err = sp_384_ecc_is_point_6(pub, NULL);
+ }
+
+ sp_384_point_free_6(pub, 0, NULL);
+
+ return err;
+}
+
+/* Check that the private scalar generates the EC point (px, py), the point is
+ * on the curve and the point has the correct order.
+ *
+ * pX X ordinate of EC point.
+ * pY Y ordinate of EC point.
+ * privm Private scalar that generates EC point.
+ * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
+ * not on the curve, ECC_INF_E if the point does not have the correct order,
+ * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
+ * MP_OKAY otherwise.
+ */
+int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit privd[6];
+ sp_point_384 pubd;
+ sp_point_384 pd;
+#endif
+ sp_digit* priv = NULL;
+ sp_point_384* pub;
+ sp_point_384* p = NULL;
+ byte one[1] = { 1 };
+ int err;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ err = sp_384_point_new_6(heap, pubd, pub);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_6(heap, pd, p);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6, heap,
+ DYNAMIC_TYPE_ECC);
+ if (priv == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ priv = privd;
+#endif
+
+ sp_384_from_mp(pub->x, 6, pX);
+ sp_384_from_mp(pub->y, 6, pY);
+ sp_384_from_bin(pub->z, 6, one, (int)sizeof(one));
+ sp_384_from_mp(priv, 6, privm);
+
+ /* Check point at infinitiy. */
+ if ((sp_384_iszero_6(pub->x) != 0) &&
+ (sp_384_iszero_6(pub->y) != 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check range of X and Y */
+ if (sp_384_cmp_6(pub->x, p384_mod) >= 0 ||
+ sp_384_cmp_6(pub->y, p384_mod) >= 0) {
+ err = ECC_OUT_OF_RANGE_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Check point is on curve */
+ err = sp_384_ecc_is_point_6(pub, heap);
+ }
+
+ if (err == MP_OKAY) {
+ /* Point * order = infinity */
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_384_ecc_mulmod_avx2_6(p, pub, p384_order, 1, heap);
+ else
+#endif
+ err = sp_384_ecc_mulmod_6(p, pub, p384_order, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is infinity */
+ if ((sp_384_iszero_6(p->x) == 0) ||
+ (sp_384_iszero_6(p->y) == 0)) {
+ err = ECC_INF_E;
+ }
+ }
+
+ if (err == MP_OKAY) {
+ /* Base * private = point */
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ err = sp_384_ecc_mulmod_base_avx2_6(p, priv, 1, heap);
+ else
+#endif
+ err = sp_384_ecc_mulmod_base_6(p, priv, 1, heap);
+ }
+ if (err == MP_OKAY) {
+ /* Check result is public key */
+ if (sp_384_cmp_6(p->x, pub->x) != 0 ||
+ sp_384_cmp_6(p->y, pub->y) != 0) {
+ err = ECC_PRIV_KEY_E;
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (priv != NULL) {
+ XFREE(priv, heap, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_6(p, 0, heap);
+ sp_384_point_free_6(pub, 0, heap);
+
+ return err;
+}
+#endif
+#ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
+/* Add two projective EC points together.
+ * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
+ *
+ * pX First EC point's X ordinate.
+ * pY First EC point's Y ordinate.
+ * pZ First EC point's Z ordinate.
+ * qX Second EC point's X ordinate.
+ * qY Second EC point's Y ordinate.
+ * qZ Second EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* qX, mp_int* qY, mp_int* qZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 6 * 5];
+ sp_point_384 pd;
+ sp_point_384 qd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ sp_point_384* q = NULL;
+ int err;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ err = sp_384_point_new_6(NULL, pd, p);
+ if (err == MP_OKAY) {
+ err = sp_384_point_new_6(NULL, qd, q);
+ }
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 5, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 6, pX);
+ sp_384_from_mp(p->y, 6, pY);
+ sp_384_from_mp(p->z, 6, pZ);
+ sp_384_from_mp(q->x, 6, qX);
+ sp_384_from_mp(q->y, 6, qY);
+ sp_384_from_mp(q->z, 6, qZ);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_384_proj_point_add_avx2_6(p, p, q, tmp);
+ else
+#endif
+ sp_384_proj_point_add_6(p, p, q, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_6(q, 0, NULL);
+ sp_384_point_free_6(p, 0, NULL);
+
+ return err;
+}
+
+/* Double a projective EC point.
+ * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * rX Resultant EC point's X ordinate.
+ * rY Resultant EC point's Y ordinate.
+ * rZ Resultant EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
+ mp_int* rX, mp_int* rY, mp_int* rZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 6 * 2];
+ sp_point_384 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ int err;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+ err = sp_384_point_new_6(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 2, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 6, pX);
+ sp_384_from_mp(p->y, 6, pY);
+ sp_384_from_mp(p->z, 6, pZ);
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+ sp_384_proj_point_dbl_avx2_6(p, p, tmp);
+ else
+#endif
+ sp_384_proj_point_dbl_6(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, rX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, rY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, rZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_6(p, 0, NULL);
+
+ return err;
+}
+
+/* Map a projective EC point to affine in place.
+ * pZ will be one.
+ *
+ * pX EC point's X ordinate.
+ * pY EC point's Y ordinate.
+ * pZ EC point's Z ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ)
+{
+#if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit tmpd[2 * 6 * 6];
+ sp_point_384 pd;
+#endif
+ sp_digit* tmp;
+ sp_point_384* p;
+ int err;
+
+ err = sp_384_point_new_6(NULL, pd, p);
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (err == MP_OKAY) {
+ tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 6 * 6, NULL,
+ DYNAMIC_TYPE_ECC);
+ if (tmp == NULL) {
+ err = MEMORY_E;
+ }
+ }
+#else
+ tmp = tmpd;
+#endif
+ if (err == MP_OKAY) {
+ sp_384_from_mp(p->x, 6, pX);
+ sp_384_from_mp(p->y, 6, pY);
+ sp_384_from_mp(p->z, 6, pZ);
+
+ sp_384_map_6(p, p, tmp);
+ }
+
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->x, pX);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->y, pY);
+ }
+ if (err == MP_OKAY) {
+ err = sp_384_to_mp(p->z, pZ);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (tmp != NULL) {
+ XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+ sp_384_point_free_6(p, 0, NULL);
+
+ return err;
+}
+#endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
+#ifdef HAVE_COMP_KEY
+/* Find the square root of a number mod the prime of the curve.
+ *
+ * y The number to operate on and the result.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+static int sp_384_mont_sqrt_6(sp_digit* y)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit t1d[2 * 6];
+ sp_digit t2d[2 * 6];
+ sp_digit t3d[2 * 6];
+ sp_digit t4d[2 * 6];
+ sp_digit t5d[2 * 6];
+#endif
+ sp_digit* t1;
+ sp_digit* t2;
+ sp_digit* t3;
+ sp_digit* t4;
+ sp_digit* t5;
+ int err = MP_OKAY;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 6, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ t1 = d + 0 * 6;
+ t2 = d + 2 * 6;
+ t3 = d + 4 * 6;
+ t4 = d + 6 * 6;
+ t5 = d + 8 * 6;
+#else
+ t1 = t1d;
+ t2 = t2d;
+ t3 = t3d;
+ t4 = t4d;
+ t5 = t5d;
+#endif
+
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ /* t2 = y ^ 0x2 */
+ sp_384_mont_sqr_avx2_6(t2, y, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3 */
+ sp_384_mont_mul_avx2_6(t1, t2, y, p384_mod, p384_mp_mod);
+ /* t5 = y ^ 0xc */
+ sp_384_mont_sqr_n_avx2_6(t5, t1, 2, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xf */
+ sp_384_mont_mul_avx2_6(t1, t1, t5, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x1e */
+ sp_384_mont_sqr_avx2_6(t2, t1, p384_mod, p384_mp_mod);
+ /* t3 = y ^ 0x1f */
+ sp_384_mont_mul_avx2_6(t3, t2, y, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3e0 */
+ sp_384_mont_sqr_n_avx2_6(t2, t3, 5, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3ff */
+ sp_384_mont_mul_avx2_6(t1, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x7fe0 */
+ sp_384_mont_sqr_n_avx2_6(t2, t1, 5, p384_mod, p384_mp_mod);
+ /* t3 = y ^ 0x7fff */
+ sp_384_mont_mul_avx2_6(t3, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fff800 */
+ sp_384_mont_sqr_n_avx2_6(t2, t3, 15, p384_mod, p384_mp_mod);
+ /* t4 = y ^ 0x3ffffff */
+ sp_384_mont_mul_avx2_6(t4, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xffffffc000000 */
+ sp_384_mont_sqr_n_avx2_6(t2, t4, 30, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xfffffffffffff */
+ sp_384_mont_mul_avx2_6(t1, t4, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xfffffffffffffff000000000000000 */
+ sp_384_mont_sqr_n_avx2_6(t2, t1, 60, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_avx2_6(t1, t1, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+ sp_384_mont_sqr_n_avx2_6(t2, t1, 120, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_avx2_6(t1, t1, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+ sp_384_mont_sqr_n_avx2_6(t2, t1, 15, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_avx2_6(t1, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */
+ sp_384_mont_sqr_n_avx2_6(t2, t1, 31, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */
+ sp_384_mont_mul_avx2_6(t1, t4, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */
+ sp_384_mont_sqr_n_avx2_6(t2, t1, 4, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */
+ sp_384_mont_mul_avx2_6(t1, t5, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */
+ sp_384_mont_sqr_n_avx2_6(t2, t1, 62, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */
+ sp_384_mont_mul_avx2_6(t1, y, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */
+ sp_384_mont_sqr_n_avx2_6(y, t1, 30, p384_mod, p384_mp_mod);
+ }
+ else
+#endif
+ {
+ /* t2 = y ^ 0x2 */
+ sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3 */
+ sp_384_mont_mul_6(t1, t2, y, p384_mod, p384_mp_mod);
+ /* t5 = y ^ 0xc */
+ sp_384_mont_sqr_n_6(t5, t1, 2, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xf */
+ sp_384_mont_mul_6(t1, t1, t5, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x1e */
+ sp_384_mont_sqr_6(t2, t1, p384_mod, p384_mp_mod);
+ /* t3 = y ^ 0x1f */
+ sp_384_mont_mul_6(t3, t2, y, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3e0 */
+ sp_384_mont_sqr_n_6(t2, t3, 5, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3ff */
+ sp_384_mont_mul_6(t1, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x7fe0 */
+ sp_384_mont_sqr_n_6(t2, t1, 5, p384_mod, p384_mp_mod);
+ /* t3 = y ^ 0x7fff */
+ sp_384_mont_mul_6(t3, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fff800 */
+ sp_384_mont_sqr_n_6(t2, t3, 15, p384_mod, p384_mp_mod);
+ /* t4 = y ^ 0x3ffffff */
+ sp_384_mont_mul_6(t4, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xffffffc000000 */
+ sp_384_mont_sqr_n_6(t2, t4, 30, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xfffffffffffff */
+ sp_384_mont_mul_6(t1, t4, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xfffffffffffffff000000000000000 */
+ sp_384_mont_sqr_n_6(t2, t1, 60, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_6(t1, t1, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
+ sp_384_mont_sqr_n_6(t2, t1, 120, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_6(t1, t1, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
+ sp_384_mont_sqr_n_6(t2, t1, 15, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
+ sp_384_mont_mul_6(t1, t3, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */
+ sp_384_mont_sqr_n_6(t2, t1, 31, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */
+ sp_384_mont_mul_6(t1, t4, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */
+ sp_384_mont_sqr_n_6(t2, t1, 4, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */
+ sp_384_mont_mul_6(t1, t5, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */
+ sp_384_mont_sqr_n_6(t2, t1, 62, p384_mod, p384_mp_mod);
+ /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */
+ sp_384_mont_mul_6(t1, y, t2, p384_mod, p384_mp_mod);
+ /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */
+ sp_384_mont_sqr_n_6(y, t1, 30, p384_mod, p384_mp_mod);
+ }
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+
+
+/* Uncompress the point given the X ordinate.
+ *
+ * xm X ordinate.
+ * odd Whether the Y ordinate is odd.
+ * ym Calculated Y ordinate.
+ * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
+ */
+int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym)
+{
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ sp_digit* d;
+#else
+ sp_digit xd[2 * 6];
+ sp_digit yd[2 * 6];
+#endif
+ sp_digit* x = NULL;
+ sp_digit* y = NULL;
+ int err = MP_OKAY;
+#ifdef HAVE_INTEL_AVX2
+ word32 cpuid_flags = cpuid_get_flags();
+#endif
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 6, NULL, DYNAMIC_TYPE_ECC);
+ if (d == NULL) {
+ err = MEMORY_E;
+ }
+#endif
+
+ if (err == MP_OKAY) {
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ x = d + 0 * 6;
+ y = d + 2 * 6;
+#else
+ x = xd;
+ y = yd;
+#endif
+
+ sp_384_from_mp(x, 6, xm);
+ err = sp_384_mod_mul_norm_6(x, x, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ /* y = x^3 */
+#ifdef HAVE_INTEL_AVX2
+ if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
+ sp_384_mont_sqr_avx2_6(y, x, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_avx2_6(y, y, x, p384_mod, p384_mp_mod);
+ }
+ else
+#endif
+ {
+ sp_384_mont_sqr_6(y, x, p384_mod, p384_mp_mod);
+ sp_384_mont_mul_6(y, y, x, p384_mod, p384_mp_mod);
+ }
+ /* y = x^3 - 3x */
+ sp_384_mont_sub_6(y, y, x, p384_mod);
+ sp_384_mont_sub_6(y, y, x, p384_mod);
+ sp_384_mont_sub_6(y, y, x, p384_mod);
+ /* y = x^3 - 3x + b */
+ err = sp_384_mod_mul_norm_6(x, p384_b, p384_mod);
+ }
+ if (err == MP_OKAY) {
+ sp_384_mont_add_6(y, y, x, p384_mod);
+ /* y = sqrt(x^3 - 3x + b) */
+ err = sp_384_mont_sqrt_6(y);
+ }
+ if (err == MP_OKAY) {
+ XMEMSET(y + 6, 0, 6U * sizeof(sp_digit));
+ sp_384_mont_reduce_6(y, p384_mod, p384_mp_mod);
+ if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
+ sp_384_mont_sub_6(y, p384_mod, y, p384_mod);
+ }
+
+ err = sp_384_to_mp(y, ym);
+ }
+
+#if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
+ if (d != NULL) {
+ XFREE(d, NULL, DYNAMIC_TYPE_ECC);
+ }
+#endif
+
+ return err;
+}
+#endif
+#endif /* WOLFSSL_SP_384 */
+#endif /* WOLFSSL_HAVE_SP_ECC */
+#endif /* WOLFSSL_SP_X86_64_ASM */
+#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_x86_64_asm.S b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_x86_64_asm.S
new file mode 100644
index 000000000..c6941f1f0
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/sp_x86_64_asm.S
@@ -0,0 +1,41830 @@
+/* sp_x86_64_asm
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+#define HAVE_INTEL_AVX2
+#ifndef WOLFSSL_SP_NO_2048
+#ifndef WOLFSSL_SP_NO_2048
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+#ifndef __APPLE__
+.globl sp_2048_from_bin
+.type sp_2048_from_bin,@function
+.align 16
+sp_2048_from_bin:
+#else
+.globl _sp_2048_from_bin
+.p2align 4
+_sp_2048_from_bin:
+#endif /* __APPLE__ */
+ movq %rdx, %r9
+ movq %rdi, %r10
+ addq %rcx, %r9
+ addq $256, %r10
+ xorq %r11, %r11
+ jmp L_2048_from_bin_64_end
+L_2048_from_bin_64_start:
+ subq $64, %r9
+ movbeq 56(%r9), %rax
+ movbeq 48(%r9), %r8
+ movq %rax, (%rdi)
+ movq %r8, 8(%rdi)
+ movbeq 40(%r9), %rax
+ movbeq 32(%r9), %r8
+ movq %rax, 16(%rdi)
+ movq %r8, 24(%rdi)
+ movbeq 24(%r9), %rax
+ movbeq 16(%r9), %r8
+ movq %rax, 32(%rdi)
+ movq %r8, 40(%rdi)
+ movbeq 8(%r9), %rax
+ movbeq (%r9), %r8
+ movq %rax, 48(%rdi)
+ movq %r8, 56(%rdi)
+ addq $64, %rdi
+ subq $64, %rcx
+L_2048_from_bin_64_end:
+ cmpq $63, %rcx
+ jg L_2048_from_bin_64_start
+ jmp L_2048_from_bin_8_end
+L_2048_from_bin_8_start:
+ subq $8, %r9
+ movbeq (%r9), %rax
+ movq %rax, (%rdi)
+ addq $8, %rdi
+ subq $8, %rcx
+L_2048_from_bin_8_end:
+ cmpq $7, %rcx
+ jg L_2048_from_bin_8_start
+ cmpq %r11, %rcx
+ je L_2048_from_bin_hi_end
+ movq %r11, %r8
+ movq %r11, %rax
+L_2048_from_bin_hi_start:
+ movb (%rdx), %al
+ shlq $8, %r8
+ incq %rdx
+ addq %rax, %r8
+ decq %rcx
+ jg L_2048_from_bin_hi_start
+ movq %r8, (%rdi)
+ addq $8, %rdi
+L_2048_from_bin_hi_end:
+ cmpq %r10, %rdi
+ je L_2048_from_bin_zero_end
+L_2048_from_bin_zero_start:
+ movq %r11, (%rdi)
+ addq $8, %rdi
+ cmpq %r10, %rdi
+ jl L_2048_from_bin_zero_start
+L_2048_from_bin_zero_end:
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_from_bin,.-sp_2048_from_bin
+#endif /* __APPLE__ */
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 256
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+#ifndef __APPLE__
+.globl sp_2048_to_bin
+.type sp_2048_to_bin,@function
+.align 16
+sp_2048_to_bin:
+#else
+.globl _sp_2048_to_bin
+.p2align 4
+_sp_2048_to_bin:
+#endif /* __APPLE__ */
+ movbeq 248(%rdi), %rdx
+ movbeq 240(%rdi), %rax
+ movq %rdx, (%rsi)
+ movq %rax, 8(%rsi)
+ movbeq 232(%rdi), %rdx
+ movbeq 224(%rdi), %rax
+ movq %rdx, 16(%rsi)
+ movq %rax, 24(%rsi)
+ movbeq 216(%rdi), %rdx
+ movbeq 208(%rdi), %rax
+ movq %rdx, 32(%rsi)
+ movq %rax, 40(%rsi)
+ movbeq 200(%rdi), %rdx
+ movbeq 192(%rdi), %rax
+ movq %rdx, 48(%rsi)
+ movq %rax, 56(%rsi)
+ movbeq 184(%rdi), %rdx
+ movbeq 176(%rdi), %rax
+ movq %rdx, 64(%rsi)
+ movq %rax, 72(%rsi)
+ movbeq 168(%rdi), %rdx
+ movbeq 160(%rdi), %rax
+ movq %rdx, 80(%rsi)
+ movq %rax, 88(%rsi)
+ movbeq 152(%rdi), %rdx
+ movbeq 144(%rdi), %rax
+ movq %rdx, 96(%rsi)
+ movq %rax, 104(%rsi)
+ movbeq 136(%rdi), %rdx
+ movbeq 128(%rdi), %rax
+ movq %rdx, 112(%rsi)
+ movq %rax, 120(%rsi)
+ movbeq 120(%rdi), %rdx
+ movbeq 112(%rdi), %rax
+ movq %rdx, 128(%rsi)
+ movq %rax, 136(%rsi)
+ movbeq 104(%rdi), %rdx
+ movbeq 96(%rdi), %rax
+ movq %rdx, 144(%rsi)
+ movq %rax, 152(%rsi)
+ movbeq 88(%rdi), %rdx
+ movbeq 80(%rdi), %rax
+ movq %rdx, 160(%rsi)
+ movq %rax, 168(%rsi)
+ movbeq 72(%rdi), %rdx
+ movbeq 64(%rdi), %rax
+ movq %rdx, 176(%rsi)
+ movq %rax, 184(%rsi)
+ movbeq 56(%rdi), %rdx
+ movbeq 48(%rdi), %rax
+ movq %rdx, 192(%rsi)
+ movq %rax, 200(%rsi)
+ movbeq 40(%rdi), %rdx
+ movbeq 32(%rdi), %rax
+ movq %rdx, 208(%rsi)
+ movq %rax, 216(%rsi)
+ movbeq 24(%rdi), %rdx
+ movbeq 16(%rdi), %rax
+ movq %rdx, 224(%rsi)
+ movq %rax, 232(%rsi)
+ movbeq 8(%rdi), %rdx
+ movbeq (%rdi), %rax
+ movq %rdx, 240(%rsi)
+ movq %rax, 248(%rsi)
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_to_bin,.-sp_2048_to_bin
+#endif /* __APPLE__ */
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_2048_mul_16
+.type sp_2048_mul_16,@function
+.align 16
+sp_2048_mul_16:
+#else
+.globl _sp_2048_mul_16
+.p2align 4
+_sp_2048_mul_16:
+#endif /* __APPLE__ */
+ movq %rdx, %rcx
+ subq $128, %rsp
+ # A[0] * B[0]
+ movq (%rcx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ movq %rax, (%rsp)
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rcx), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[1] * B[0]
+ movq (%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 8(%rsp)
+ # A[0] * B[2]
+ movq 16(%rcx), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[1] * B[1]
+ movq 8(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[2] * B[0]
+ movq (%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 16(%rsp)
+ # A[0] * B[3]
+ movq 24(%rcx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[1] * B[2]
+ movq 16(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[2] * B[1]
+ movq 8(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[3] * B[0]
+ movq (%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ movq %r8, 24(%rsp)
+ # A[0] * B[4]
+ movq 32(%rcx), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[1] * B[3]
+ movq 24(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[2] * B[2]
+ movq 16(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[3] * B[1]
+ movq 8(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[4] * B[0]
+ movq (%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 32(%rsp)
+ # A[0] * B[5]
+ movq 40(%rcx), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[1] * B[4]
+ movq 32(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[2] * B[3]
+ movq 24(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[3] * B[2]
+ movq 16(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[4] * B[1]
+ movq 8(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[5] * B[0]
+ movq (%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 40(%rsp)
+ # A[0] * B[6]
+ movq 48(%rcx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[1] * B[5]
+ movq 40(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[2] * B[4]
+ movq 32(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[3] * B[3]
+ movq 24(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[4] * B[2]
+ movq 16(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[5] * B[1]
+ movq 8(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[6] * B[0]
+ movq (%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ movq %r8, 48(%rsp)
+ # A[0] * B[7]
+ movq 56(%rcx), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[1] * B[6]
+ movq 48(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[2] * B[5]
+ movq 40(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[3] * B[4]
+ movq 32(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[4] * B[3]
+ movq 24(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[5] * B[2]
+ movq 16(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[6] * B[1]
+ movq 8(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[7] * B[0]
+ movq (%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 56(%rsp)
+ # A[0] * B[8]
+ movq 64(%rcx), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[1] * B[7]
+ movq 56(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[2] * B[6]
+ movq 48(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[3] * B[5]
+ movq 40(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[4] * B[4]
+ movq 32(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[5] * B[3]
+ movq 24(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[6] * B[2]
+ movq 16(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[7] * B[1]
+ movq 8(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[8] * B[0]
+ movq (%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 64(%rsp)
+ # A[0] * B[9]
+ movq 72(%rcx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[1] * B[8]
+ movq 64(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[2] * B[7]
+ movq 56(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[3] * B[6]
+ movq 48(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[4] * B[5]
+ movq 40(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[5] * B[4]
+ movq 32(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[6] * B[3]
+ movq 24(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[7] * B[2]
+ movq 16(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[8] * B[1]
+ movq 8(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[9] * B[0]
+ movq (%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ movq %r8, 72(%rsp)
+ # A[0] * B[10]
+ movq 80(%rcx), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[1] * B[9]
+ movq 72(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[2] * B[8]
+ movq 64(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[3] * B[7]
+ movq 56(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[4] * B[6]
+ movq 48(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[5] * B[5]
+ movq 40(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[6] * B[4]
+ movq 32(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[7] * B[3]
+ movq 24(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[8] * B[2]
+ movq 16(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[9] * B[1]
+ movq 8(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[10] * B[0]
+ movq (%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 80(%rsp)
+ # A[0] * B[11]
+ movq 88(%rcx), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[1] * B[10]
+ movq 80(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[2] * B[9]
+ movq 72(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[3] * B[8]
+ movq 64(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[4] * B[7]
+ movq 56(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[5] * B[6]
+ movq 48(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[6] * B[5]
+ movq 40(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[7] * B[4]
+ movq 32(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[8] * B[3]
+ movq 24(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[9] * B[2]
+ movq 16(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[10] * B[1]
+ movq 8(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[11] * B[0]
+ movq (%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 88(%rsp)
+ # A[0] * B[12]
+ movq 96(%rcx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[1] * B[11]
+ movq 88(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[2] * B[10]
+ movq 80(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[3] * B[9]
+ movq 72(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[4] * B[8]
+ movq 64(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[5] * B[7]
+ movq 56(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[6] * B[6]
+ movq 48(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[7] * B[5]
+ movq 40(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[8] * B[4]
+ movq 32(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[9] * B[3]
+ movq 24(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[10] * B[2]
+ movq 16(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[11] * B[1]
+ movq 8(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[12] * B[0]
+ movq (%rcx), %rax
+ mulq 96(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ movq %r8, 96(%rsp)
+ # A[0] * B[13]
+ movq 104(%rcx), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[1] * B[12]
+ movq 96(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[2] * B[11]
+ movq 88(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[3] * B[10]
+ movq 80(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[4] * B[9]
+ movq 72(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[5] * B[8]
+ movq 64(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[6] * B[7]
+ movq 56(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[7] * B[6]
+ movq 48(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[8] * B[5]
+ movq 40(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[9] * B[4]
+ movq 32(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[10] * B[3]
+ movq 24(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[11] * B[2]
+ movq 16(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[12] * B[1]
+ movq 8(%rcx), %rax
+ mulq 96(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[13] * B[0]
+ movq (%rcx), %rax
+ mulq 104(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 104(%rsp)
+ # A[0] * B[14]
+ movq 112(%rcx), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[1] * B[13]
+ movq 104(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[2] * B[12]
+ movq 96(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[3] * B[11]
+ movq 88(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[4] * B[10]
+ movq 80(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[5] * B[9]
+ movq 72(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[6] * B[8]
+ movq 64(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[7] * B[7]
+ movq 56(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[8] * B[6]
+ movq 48(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[9] * B[5]
+ movq 40(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[10] * B[4]
+ movq 32(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[11] * B[3]
+ movq 24(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[12] * B[2]
+ movq 16(%rcx), %rax
+ mulq 96(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[13] * B[1]
+ movq 8(%rcx), %rax
+ mulq 104(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[14] * B[0]
+ movq (%rcx), %rax
+ mulq 112(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 112(%rsp)
+ # A[0] * B[15]
+ movq 120(%rcx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[1] * B[14]
+ movq 112(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[2] * B[13]
+ movq 104(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[3] * B[12]
+ movq 96(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[4] * B[11]
+ movq 88(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[5] * B[10]
+ movq 80(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[6] * B[9]
+ movq 72(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[7] * B[8]
+ movq 64(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[8] * B[7]
+ movq 56(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[9] * B[6]
+ movq 48(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[10] * B[5]
+ movq 40(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[11] * B[4]
+ movq 32(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[12] * B[3]
+ movq 24(%rcx), %rax
+ mulq 96(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[13] * B[2]
+ movq 16(%rcx), %rax
+ mulq 104(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[14] * B[1]
+ movq 8(%rcx), %rax
+ mulq 112(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[15] * B[0]
+ movq (%rcx), %rax
+ mulq 120(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ movq %r8, 120(%rsp)
+ # A[1] * B[15]
+ movq 120(%rcx), %rax
+ mulq 8(%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[2] * B[14]
+ movq 112(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[3] * B[13]
+ movq 104(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[4] * B[12]
+ movq 96(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[5] * B[11]
+ movq 88(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[6] * B[10]
+ movq 80(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[7] * B[9]
+ movq 72(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[8] * B[8]
+ movq 64(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[9] * B[7]
+ movq 56(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[10] * B[6]
+ movq 48(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[11] * B[5]
+ movq 40(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[12] * B[4]
+ movq 32(%rcx), %rax
+ mulq 96(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[13] * B[3]
+ movq 24(%rcx), %rax
+ mulq 104(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[14] * B[2]
+ movq 16(%rcx), %rax
+ mulq 112(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[15] * B[1]
+ movq 8(%rcx), %rax
+ mulq 120(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 128(%rdi)
+ # A[2] * B[15]
+ movq 120(%rcx), %rax
+ mulq 16(%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[3] * B[14]
+ movq 112(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[4] * B[13]
+ movq 104(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[5] * B[12]
+ movq 96(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[6] * B[11]
+ movq 88(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[7] * B[10]
+ movq 80(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[8] * B[9]
+ movq 72(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[9] * B[8]
+ movq 64(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[10] * B[7]
+ movq 56(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[11] * B[6]
+ movq 48(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[12] * B[5]
+ movq 40(%rcx), %rax
+ mulq 96(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[13] * B[4]
+ movq 32(%rcx), %rax
+ mulq 104(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[14] * B[3]
+ movq 24(%rcx), %rax
+ mulq 112(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[15] * B[2]
+ movq 16(%rcx), %rax
+ mulq 120(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 136(%rdi)
+ # A[3] * B[15]
+ movq 120(%rcx), %rax
+ mulq 24(%rsi)
+ xorq %r10, %r10
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[4] * B[14]
+ movq 112(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[5] * B[13]
+ movq 104(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[6] * B[12]
+ movq 96(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[7] * B[11]
+ movq 88(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[8] * B[10]
+ movq 80(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[9] * B[9]
+ movq 72(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[10] * B[8]
+ movq 64(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[11] * B[7]
+ movq 56(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[12] * B[6]
+ movq 48(%rcx), %rax
+ mulq 96(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[13] * B[5]
+ movq 40(%rcx), %rax
+ mulq 104(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[14] * B[4]
+ movq 32(%rcx), %rax
+ mulq 112(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[15] * B[3]
+ movq 24(%rcx), %rax
+ mulq 120(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ movq %r8, 144(%rdi)
+ # A[4] * B[15]
+ movq 120(%rcx), %rax
+ mulq 32(%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[5] * B[14]
+ movq 112(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[6] * B[13]
+ movq 104(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[7] * B[12]
+ movq 96(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[8] * B[11]
+ movq 88(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[9] * B[10]
+ movq 80(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[10] * B[9]
+ movq 72(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[11] * B[8]
+ movq 64(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[12] * B[7]
+ movq 56(%rcx), %rax
+ mulq 96(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[13] * B[6]
+ movq 48(%rcx), %rax
+ mulq 104(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[14] * B[5]
+ movq 40(%rcx), %rax
+ mulq 112(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[15] * B[4]
+ movq 32(%rcx), %rax
+ mulq 120(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 152(%rdi)
+ # A[5] * B[15]
+ movq 120(%rcx), %rax
+ mulq 40(%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[6] * B[14]
+ movq 112(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[7] * B[13]
+ movq 104(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[8] * B[12]
+ movq 96(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[9] * B[11]
+ movq 88(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[10] * B[10]
+ movq 80(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[11] * B[9]
+ movq 72(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[12] * B[8]
+ movq 64(%rcx), %rax
+ mulq 96(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[13] * B[7]
+ movq 56(%rcx), %rax
+ mulq 104(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[14] * B[6]
+ movq 48(%rcx), %rax
+ mulq 112(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[15] * B[5]
+ movq 40(%rcx), %rax
+ mulq 120(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 160(%rdi)
+ # A[6] * B[15]
+ movq 120(%rcx), %rax
+ mulq 48(%rsi)
+ xorq %r10, %r10
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[7] * B[14]
+ movq 112(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[8] * B[13]
+ movq 104(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[9] * B[12]
+ movq 96(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[10] * B[11]
+ movq 88(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[11] * B[10]
+ movq 80(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[12] * B[9]
+ movq 72(%rcx), %rax
+ mulq 96(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[13] * B[8]
+ movq 64(%rcx), %rax
+ mulq 104(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[14] * B[7]
+ movq 56(%rcx), %rax
+ mulq 112(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[15] * B[6]
+ movq 48(%rcx), %rax
+ mulq 120(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ movq %r8, 168(%rdi)
+ # A[7] * B[15]
+ movq 120(%rcx), %rax
+ mulq 56(%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[8] * B[14]
+ movq 112(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[9] * B[13]
+ movq 104(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[10] * B[12]
+ movq 96(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[11] * B[11]
+ movq 88(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[12] * B[10]
+ movq 80(%rcx), %rax
+ mulq 96(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[13] * B[9]
+ movq 72(%rcx), %rax
+ mulq 104(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[14] * B[8]
+ movq 64(%rcx), %rax
+ mulq 112(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[15] * B[7]
+ movq 56(%rcx), %rax
+ mulq 120(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 176(%rdi)
+ # A[8] * B[15]
+ movq 120(%rcx), %rax
+ mulq 64(%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[9] * B[14]
+ movq 112(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[10] * B[13]
+ movq 104(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[11] * B[12]
+ movq 96(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[12] * B[11]
+ movq 88(%rcx), %rax
+ mulq 96(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[13] * B[10]
+ movq 80(%rcx), %rax
+ mulq 104(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[14] * B[9]
+ movq 72(%rcx), %rax
+ mulq 112(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[15] * B[8]
+ movq 64(%rcx), %rax
+ mulq 120(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 184(%rdi)
+ # A[9] * B[15]
+ movq 120(%rcx), %rax
+ mulq 72(%rsi)
+ xorq %r10, %r10
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[10] * B[14]
+ movq 112(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[11] * B[13]
+ movq 104(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[12] * B[12]
+ movq 96(%rcx), %rax
+ mulq 96(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[13] * B[11]
+ movq 88(%rcx), %rax
+ mulq 104(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[14] * B[10]
+ movq 80(%rcx), %rax
+ mulq 112(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[15] * B[9]
+ movq 72(%rcx), %rax
+ mulq 120(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ movq %r8, 192(%rdi)
+ # A[10] * B[15]
+ movq 120(%rcx), %rax
+ mulq 80(%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[11] * B[14]
+ movq 112(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[12] * B[13]
+ movq 104(%rcx), %rax
+ mulq 96(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[13] * B[12]
+ movq 96(%rcx), %rax
+ mulq 104(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[14] * B[11]
+ movq 88(%rcx), %rax
+ mulq 112(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[15] * B[10]
+ movq 80(%rcx), %rax
+ mulq 120(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 200(%rdi)
+ # A[11] * B[15]
+ movq 120(%rcx), %rax
+ mulq 88(%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[12] * B[14]
+ movq 112(%rcx), %rax
+ mulq 96(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[13] * B[13]
+ movq 104(%rcx), %rax
+ mulq 104(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[14] * B[12]
+ movq 96(%rcx), %rax
+ mulq 112(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[15] * B[11]
+ movq 88(%rcx), %rax
+ mulq 120(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 208(%rdi)
+ # A[12] * B[15]
+ movq 120(%rcx), %rax
+ mulq 96(%rsi)
+ xorq %r10, %r10
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[13] * B[14]
+ movq 112(%rcx), %rax
+ mulq 104(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[14] * B[13]
+ movq 104(%rcx), %rax
+ mulq 112(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[15] * B[12]
+ movq 96(%rcx), %rax
+ mulq 120(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ movq %r8, 216(%rdi)
+ # A[13] * B[15]
+ movq 120(%rcx), %rax
+ mulq 104(%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[14] * B[14]
+ movq 112(%rcx), %rax
+ mulq 112(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[15] * B[13]
+ movq 104(%rcx), %rax
+ mulq 120(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 224(%rdi)
+ # A[14] * B[15]
+ movq 120(%rcx), %rax
+ mulq 112(%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[15] * B[14]
+ movq 112(%rcx), %rax
+ mulq 120(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 232(%rdi)
+ # A[15] * B[15]
+ movq 120(%rcx), %rax
+ mulq 120(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ movq %r8, 240(%rdi)
+ movq %r9, 248(%rdi)
+ movq (%rsp), %rax
+ movq 8(%rsp), %rdx
+ movq 16(%rsp), %r8
+ movq 24(%rsp), %r9
+ movq %rax, (%rdi)
+ movq %rdx, 8(%rdi)
+ movq %r8, 16(%rdi)
+ movq %r9, 24(%rdi)
+ movq 32(%rsp), %rax
+ movq 40(%rsp), %rdx
+ movq 48(%rsp), %r8
+ movq 56(%rsp), %r9
+ movq %rax, 32(%rdi)
+ movq %rdx, 40(%rdi)
+ movq %r8, 48(%rdi)
+ movq %r9, 56(%rdi)
+ movq 64(%rsp), %rax
+ movq 72(%rsp), %rdx
+ movq 80(%rsp), %r8
+ movq 88(%rsp), %r9
+ movq %rax, 64(%rdi)
+ movq %rdx, 72(%rdi)
+ movq %r8, 80(%rdi)
+ movq %r9, 88(%rdi)
+ movq 96(%rsp), %rax
+ movq 104(%rsp), %rdx
+ movq 112(%rsp), %r8
+ movq 120(%rsp), %r9
+ movq %rax, 96(%rdi)
+ movq %rdx, 104(%rdi)
+ movq %r8, 112(%rdi)
+ movq %r9, 120(%rdi)
+ addq $128, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_mul_16,.-sp_2048_mul_16
+#endif /* __APPLE__ */
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_2048_sqr_16
+.type sp_2048_sqr_16,@function
+.align 16
+sp_2048_sqr_16:
+#else
+.globl _sp_2048_sqr_16
+.p2align 4
+_sp_2048_sqr_16:
+#endif /* __APPLE__ */
+ push %r12
+ subq $128, %rsp
+ # A[0] * A[0]
+ movq (%rsi), %rax
+ mulq %rax
+ xorq %r9, %r9
+ movq %rax, (%rsp)
+ movq %rdx, %r8
+ # A[0] * A[1]
+ movq 8(%rsi), %rax
+ mulq (%rsi)
+ xorq %rcx, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ movq %r8, 8(%rsp)
+ # A[0] * A[2]
+ movq 16(%rsi), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ # A[1] * A[1]
+ movq 8(%rsi), %rax
+ mulq %rax
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ movq %r9, 16(%rsp)
+ # A[0] * A[3]
+ movq 24(%rsi), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[1] * A[2]
+ movq 16(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %rcx, 24(%rsp)
+ # A[0] * A[4]
+ movq 32(%rsi), %rax
+ mulq (%rsi)
+ xorq %rcx, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ # A[1] * A[3]
+ movq 24(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ # A[2] * A[2]
+ movq 16(%rsi), %rax
+ mulq %rax
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ movq %r8, 32(%rsp)
+ # A[0] * A[5]
+ movq 40(%rsi), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[1] * A[4]
+ movq 32(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[2] * A[3]
+ movq 24(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %r10, %r9
+ adcq %r11, %rcx
+ adcq %r12, %r8
+ movq %r9, 40(%rsp)
+ # A[0] * A[6]
+ movq 48(%rsi), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[1] * A[5]
+ movq 40(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[2] * A[4]
+ movq 32(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[3] * A[3]
+ movq 24(%rsi), %rax
+ mulq %rax
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %rcx
+ adcq %r11, %r8
+ adcq %r12, %r9
+ movq %rcx, 48(%rsp)
+ # A[0] * A[7]
+ movq 56(%rsi), %rax
+ mulq (%rsi)
+ xorq %rcx, %rcx
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[1] * A[6]
+ movq 48(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[2] * A[5]
+ movq 40(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[3] * A[4]
+ movq 32(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %r10, %r8
+ adcq %r11, %r9
+ adcq %r12, %rcx
+ movq %r8, 56(%rsp)
+ # A[0] * A[8]
+ movq 64(%rsi), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[1] * A[7]
+ movq 56(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[2] * A[6]
+ movq 48(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[3] * A[5]
+ movq 40(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[4] * A[4]
+ movq 32(%rsi), %rax
+ mulq %rax
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r9
+ adcq %r11, %rcx
+ adcq %r12, %r8
+ movq %r9, 64(%rsp)
+ # A[0] * A[9]
+ movq 72(%rsi), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[1] * A[8]
+ movq 64(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[2] * A[7]
+ movq 56(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[3] * A[6]
+ movq 48(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[4] * A[5]
+ movq 40(%rsi), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %r10, %rcx
+ adcq %r11, %r8
+ adcq %r12, %r9
+ movq %rcx, 72(%rsp)
+ # A[0] * A[10]
+ movq 80(%rsi), %rax
+ mulq (%rsi)
+ xorq %rcx, %rcx
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[1] * A[9]
+ movq 72(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[2] * A[8]
+ movq 64(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[3] * A[7]
+ movq 56(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[4] * A[6]
+ movq 48(%rsi), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[5] * A[5]
+ movq 40(%rsi), %rax
+ mulq %rax
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r8
+ adcq %r11, %r9
+ adcq %r12, %rcx
+ movq %r8, 80(%rsp)
+ # A[0] * A[11]
+ movq 88(%rsi), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[1] * A[10]
+ movq 80(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[2] * A[9]
+ movq 72(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[3] * A[8]
+ movq 64(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[4] * A[7]
+ movq 56(%rsi), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[5] * A[6]
+ movq 48(%rsi), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %r10, %r9
+ adcq %r11, %rcx
+ adcq %r12, %r8
+ movq %r9, 88(%rsp)
+ # A[0] * A[12]
+ movq 96(%rsi), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[1] * A[11]
+ movq 88(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[2] * A[10]
+ movq 80(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[3] * A[9]
+ movq 72(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[4] * A[8]
+ movq 64(%rsi), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[5] * A[7]
+ movq 56(%rsi), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[6] * A[6]
+ movq 48(%rsi), %rax
+ mulq %rax
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %rcx
+ adcq %r11, %r8
+ adcq %r12, %r9
+ movq %rcx, 96(%rsp)
+ # A[0] * A[13]
+ movq 104(%rsi), %rax
+ mulq (%rsi)
+ xorq %rcx, %rcx
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[1] * A[12]
+ movq 96(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[2] * A[11]
+ movq 88(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[3] * A[10]
+ movq 80(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[4] * A[9]
+ movq 72(%rsi), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[5] * A[8]
+ movq 64(%rsi), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[6] * A[7]
+ movq 56(%rsi), %rax
+ mulq 48(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %r10, %r8
+ adcq %r11, %r9
+ adcq %r12, %rcx
+ movq %r8, 104(%rsp)
+ # A[0] * A[14]
+ movq 112(%rsi), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[1] * A[13]
+ movq 104(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[2] * A[12]
+ movq 96(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[3] * A[11]
+ movq 88(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[4] * A[10]
+ movq 80(%rsi), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[5] * A[9]
+ movq 72(%rsi), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[6] * A[8]
+ movq 64(%rsi), %rax
+ mulq 48(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[7] * A[7]
+ movq 56(%rsi), %rax
+ mulq %rax
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r9
+ adcq %r11, %rcx
+ adcq %r12, %r8
+ movq %r9, 112(%rsp)
+ # A[0] * A[15]
+ movq 120(%rsi), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[1] * A[14]
+ movq 112(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[2] * A[13]
+ movq 104(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[3] * A[12]
+ movq 96(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[4] * A[11]
+ movq 88(%rsi), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[5] * A[10]
+ movq 80(%rsi), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[6] * A[9]
+ movq 72(%rsi), %rax
+ mulq 48(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[7] * A[8]
+ movq 64(%rsi), %rax
+ mulq 56(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %r10, %rcx
+ adcq %r11, %r8
+ adcq %r12, %r9
+ movq %rcx, 120(%rsp)
+ # A[1] * A[15]
+ movq 120(%rsi), %rax
+ mulq 8(%rsi)
+ xorq %rcx, %rcx
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[2] * A[14]
+ movq 112(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[3] * A[13]
+ movq 104(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[4] * A[12]
+ movq 96(%rsi), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[5] * A[11]
+ movq 88(%rsi), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[6] * A[10]
+ movq 80(%rsi), %rax
+ mulq 48(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[7] * A[9]
+ movq 72(%rsi), %rax
+ mulq 56(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[8] * A[8]
+ movq 64(%rsi), %rax
+ mulq %rax
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r8
+ adcq %r11, %r9
+ adcq %r12, %rcx
+ movq %r8, 128(%rdi)
+ # A[2] * A[15]
+ movq 120(%rsi), %rax
+ mulq 16(%rsi)
+ xorq %r8, %r8
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[3] * A[14]
+ movq 112(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[4] * A[13]
+ movq 104(%rsi), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[5] * A[12]
+ movq 96(%rsi), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[6] * A[11]
+ movq 88(%rsi), %rax
+ mulq 48(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[7] * A[10]
+ movq 80(%rsi), %rax
+ mulq 56(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[8] * A[9]
+ movq 72(%rsi), %rax
+ mulq 64(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %r10, %r9
+ adcq %r11, %rcx
+ adcq %r12, %r8
+ movq %r9, 136(%rdi)
+ # A[3] * A[15]
+ movq 120(%rsi), %rax
+ mulq 24(%rsi)
+ xorq %r9, %r9
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[4] * A[14]
+ movq 112(%rsi), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[5] * A[13]
+ movq 104(%rsi), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[6] * A[12]
+ movq 96(%rsi), %rax
+ mulq 48(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[7] * A[11]
+ movq 88(%rsi), %rax
+ mulq 56(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[8] * A[10]
+ movq 80(%rsi), %rax
+ mulq 64(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[9] * A[9]
+ movq 72(%rsi), %rax
+ mulq %rax
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %rcx
+ adcq %r11, %r8
+ adcq %r12, %r9
+ movq %rcx, 144(%rdi)
+ # A[4] * A[15]
+ movq 120(%rsi), %rax
+ mulq 32(%rsi)
+ xorq %rcx, %rcx
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[5] * A[14]
+ movq 112(%rsi), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[6] * A[13]
+ movq 104(%rsi), %rax
+ mulq 48(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[7] * A[12]
+ movq 96(%rsi), %rax
+ mulq 56(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[8] * A[11]
+ movq 88(%rsi), %rax
+ mulq 64(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[9] * A[10]
+ movq 80(%rsi), %rax
+ mulq 72(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %r10, %r8
+ adcq %r11, %r9
+ adcq %r12, %rcx
+ movq %r8, 152(%rdi)
+ # A[5] * A[15]
+ movq 120(%rsi), %rax
+ mulq 40(%rsi)
+ xorq %r8, %r8
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[6] * A[14]
+ movq 112(%rsi), %rax
+ mulq 48(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[7] * A[13]
+ movq 104(%rsi), %rax
+ mulq 56(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[8] * A[12]
+ movq 96(%rsi), %rax
+ mulq 64(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[9] * A[11]
+ movq 88(%rsi), %rax
+ mulq 72(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[10] * A[10]
+ movq 80(%rsi), %rax
+ mulq %rax
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r9
+ adcq %r11, %rcx
+ adcq %r12, %r8
+ movq %r9, 160(%rdi)
+ # A[6] * A[15]
+ movq 120(%rsi), %rax
+ mulq 48(%rsi)
+ xorq %r9, %r9
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[7] * A[14]
+ movq 112(%rsi), %rax
+ mulq 56(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[8] * A[13]
+ movq 104(%rsi), %rax
+ mulq 64(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[9] * A[12]
+ movq 96(%rsi), %rax
+ mulq 72(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[10] * A[11]
+ movq 88(%rsi), %rax
+ mulq 80(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %r10, %rcx
+ adcq %r11, %r8
+ adcq %r12, %r9
+ movq %rcx, 168(%rdi)
+ # A[7] * A[15]
+ movq 120(%rsi), %rax
+ mulq 56(%rsi)
+ xorq %rcx, %rcx
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[8] * A[14]
+ movq 112(%rsi), %rax
+ mulq 64(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[9] * A[13]
+ movq 104(%rsi), %rax
+ mulq 72(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[10] * A[12]
+ movq 96(%rsi), %rax
+ mulq 80(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[11] * A[11]
+ movq 88(%rsi), %rax
+ mulq %rax
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r8
+ adcq %r11, %r9
+ adcq %r12, %rcx
+ movq %r8, 176(%rdi)
+ # A[8] * A[15]
+ movq 120(%rsi), %rax
+ mulq 64(%rsi)
+ xorq %r8, %r8
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[9] * A[14]
+ movq 112(%rsi), %rax
+ mulq 72(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[10] * A[13]
+ movq 104(%rsi), %rax
+ mulq 80(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[11] * A[12]
+ movq 96(%rsi), %rax
+ mulq 88(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %r10, %r9
+ adcq %r11, %rcx
+ adcq %r12, %r8
+ movq %r9, 184(%rdi)
+ # A[9] * A[15]
+ movq 120(%rsi), %rax
+ mulq 72(%rsi)
+ xorq %r9, %r9
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[10] * A[14]
+ movq 112(%rsi), %rax
+ mulq 80(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[11] * A[13]
+ movq 104(%rsi), %rax
+ mulq 88(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[12] * A[12]
+ movq 96(%rsi), %rax
+ mulq %rax
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %rcx
+ adcq %r11, %r8
+ adcq %r12, %r9
+ movq %rcx, 192(%rdi)
+ # A[10] * A[15]
+ movq 120(%rsi), %rax
+ mulq 80(%rsi)
+ xorq %rcx, %rcx
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[11] * A[14]
+ movq 112(%rsi), %rax
+ mulq 88(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[12] * A[13]
+ movq 104(%rsi), %rax
+ mulq 96(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %r10, %r8
+ adcq %r11, %r9
+ adcq %r12, %rcx
+ movq %r8, 200(%rdi)
+ # A[11] * A[15]
+ movq 120(%rsi), %rax
+ mulq 88(%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ # A[12] * A[14]
+ movq 112(%rsi), %rax
+ mulq 96(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ # A[13] * A[13]
+ movq 104(%rsi), %rax
+ mulq %rax
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ movq %r9, 208(%rdi)
+ # A[12] * A[15]
+ movq 120(%rsi), %rax
+ mulq 96(%rsi)
+ xorq %r9, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[13] * A[14]
+ movq 112(%rsi), %rax
+ mulq 104(%rsi)
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %rcx, 216(%rdi)
+ # A[13] * A[15]
+ movq 120(%rsi), %rax
+ mulq 104(%rsi)
+ xorq %rcx, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ # A[14] * A[14]
+ movq 112(%rsi), %rax
+ mulq %rax
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ movq %r8, 224(%rdi)
+ # A[14] * A[15]
+ movq 120(%rsi), %rax
+ mulq 112(%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ movq %r9, 232(%rdi)
+ # A[15] * A[15]
+ movq 120(%rsi), %rax
+ mulq %rax
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ movq %rcx, 240(%rdi)
+ movq %r8, 248(%rdi)
+ movq (%rsp), %rax
+ movq 8(%rsp), %rdx
+ movq 16(%rsp), %r10
+ movq 24(%rsp), %r11
+ movq %rax, (%rdi)
+ movq %rdx, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 32(%rsp), %rax
+ movq 40(%rsp), %rdx
+ movq 48(%rsp), %r10
+ movq 56(%rsp), %r11
+ movq %rax, 32(%rdi)
+ movq %rdx, 40(%rdi)
+ movq %r10, 48(%rdi)
+ movq %r11, 56(%rdi)
+ movq 64(%rsp), %rax
+ movq 72(%rsp), %rdx
+ movq 80(%rsp), %r10
+ movq 88(%rsp), %r11
+ movq %rax, 64(%rdi)
+ movq %rdx, 72(%rdi)
+ movq %r10, 80(%rdi)
+ movq %r11, 88(%rdi)
+ movq 96(%rsp), %rax
+ movq 104(%rsp), %rdx
+ movq 112(%rsp), %r10
+ movq 120(%rsp), %r11
+ movq %rax, 96(%rdi)
+ movq %rdx, 104(%rdi)
+ movq %r10, 112(%rdi)
+ movq %r11, 120(%rdi)
+ addq $128, %rsp
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_sqr_16,.-sp_2048_sqr_16
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX2
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r Result of multiplication.
+ * a First number to multiply.
+ * b Second number to multiply.
+ */
+#ifndef __APPLE__
+.globl sp_2048_mul_avx2_16
+.type sp_2048_mul_avx2_16,@function
+.align 16
+sp_2048_mul_avx2_16:
+#else
+.globl _sp_2048_mul_avx2_16
+.p2align 4
+_sp_2048_mul_avx2_16:
+#endif /* __APPLE__ */
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ movq %rdx, %rbp
+ subq $128, %rsp
+ cmpq %rdi, %rsi
+ movq %rsp, %rbx
+ cmovne %rdi, %rbx
+ cmpq %rdi, %rbp
+ cmove %rsp, %rbx
+ xorq %r14, %r14
+ movq (%rsi), %rdx
+ # A[0] * B[0]
+ mulx (%rbp), %r8, %r9
+ # A[0] * B[1]
+ mulx 8(%rbp), %rax, %r10
+ movq %r8, (%rbx)
+ adcxq %rax, %r9
+ # A[0] * B[2]
+ mulx 16(%rbp), %rax, %r11
+ movq %r9, 8(%rbx)
+ adcxq %rax, %r10
+ # A[0] * B[3]
+ mulx 24(%rbp), %rax, %r12
+ movq %r10, 16(%rbx)
+ adcxq %rax, %r11
+ movq %r11, 24(%rbx)
+ # A[0] * B[4]
+ mulx 32(%rbp), %rax, %r8
+ adcxq %rax, %r12
+ # A[0] * B[5]
+ mulx 40(%rbp), %rax, %r9
+ movq %r12, 32(%rbx)
+ adcxq %rax, %r8
+ # A[0] * B[6]
+ mulx 48(%rbp), %rax, %r10
+ movq %r8, 40(%rbx)
+ adcxq %rax, %r9
+ # A[0] * B[7]
+ mulx 56(%rbp), %rax, %r11
+ movq %r9, 48(%rbx)
+ adcxq %rax, %r10
+ movq %r10, 56(%rbx)
+ # A[0] * B[8]
+ mulx 64(%rbp), %rax, %r12
+ adcxq %rax, %r11
+ # A[0] * B[9]
+ mulx 72(%rbp), %rax, %r8
+ movq %r11, 64(%rbx)
+ adcxq %rax, %r12
+ # A[0] * B[10]
+ mulx 80(%rbp), %rax, %r9
+ movq %r12, 72(%rbx)
+ adcxq %rax, %r8
+ # A[0] * B[11]
+ mulx 88(%rbp), %rax, %r10
+ movq %r8, 80(%rbx)
+ adcxq %rax, %r9
+ movq %r9, 88(%rbx)
+ # A[0] * B[12]
+ mulx 96(%rbp), %rax, %r11
+ adcxq %rax, %r10
+ # A[0] * B[13]
+ mulx 104(%rbp), %rax, %r12
+ movq %r10, 96(%rbx)
+ adcxq %rax, %r11
+ # A[0] * B[14]
+ mulx 112(%rbp), %rax, %r8
+ movq %r11, 104(%rbx)
+ adcxq %rax, %r12
+ # A[0] * B[15]
+ mulx 120(%rbp), %rax, %r9
+ movq %r12, 112(%rbx)
+ adcxq %rax, %r8
+ adcxq %r14, %r9
+ movq %r14, %r13
+ adcxq %r14, %r13
+ movq %r8, 120(%rbx)
+ movq %r9, 128(%rdi)
+ movq 8(%rsi), %rdx
+ movq 8(%rbx), %r9
+ movq 16(%rbx), %r10
+ movq 24(%rbx), %r11
+ movq 32(%rbx), %r12
+ movq 40(%rbx), %r8
+ # A[1] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[1] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r9, 8(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[1] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ movq %r10, 16(%rbx)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[1] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r11, 24(%rbx)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ movq %r12, 32(%rbx)
+ movq 48(%rbx), %r9
+ movq 56(%rbx), %r10
+ movq 64(%rbx), %r11
+ movq 72(%rbx), %r12
+ # A[1] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[1] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r8, 40(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[1] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ movq %r9, 48(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[1] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r10, 56(%rbx)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ movq %r11, 64(%rbx)
+ movq 80(%rbx), %r8
+ movq 88(%rbx), %r9
+ movq 96(%rbx), %r10
+ movq 104(%rbx), %r11
+ # A[1] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[1] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r12, 72(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[1] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ movq %r8, 80(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[1] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r9, 88(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 96(%rbx)
+ movq 112(%rbx), %r12
+ movq 120(%rbx), %r8
+ movq 128(%rdi), %r9
+ # A[1] * B[12]
+ mulx 96(%rbp), %rax, %rcx
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[1] * B[13]
+ mulx 104(%rbp), %rax, %rcx
+ movq %r11, 104(%rbx)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[1] * B[14]
+ mulx 112(%rbp), %rax, %rcx
+ movq %r12, 112(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[1] * B[15]
+ mulx 120(%rbp), %rax, %rcx
+ movq %r8, 120(%rbx)
+ movq %r14, %r10
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ adcxq %r13, %r10
+ movq %r14, %r13
+ adoxq %r14, %r13
+ adcxq %r14, %r13
+ movq %r9, 128(%rdi)
+ movq %r10, 136(%rdi)
+ movq 16(%rsi), %rdx
+ movq 16(%rbx), %r10
+ movq 24(%rbx), %r11
+ movq 32(%rbx), %r12
+ movq 40(%rbx), %r8
+ movq 48(%rbx), %r9
+ # A[2] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[2] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r10, 16(%rbx)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ movq %r11, 24(%rbx)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[2] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r12, 32(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 40(%rbx)
+ movq 56(%rbx), %r10
+ movq 64(%rbx), %r11
+ movq 72(%rbx), %r12
+ movq 80(%rbx), %r8
+ # A[2] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[2] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r9, 48(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[2] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ movq %r10, 56(%rbx)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[2] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r11, 64(%rbx)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ movq %r12, 72(%rbx)
+ movq 88(%rbx), %r9
+ movq 96(%rbx), %r10
+ movq 104(%rbx), %r11
+ movq 112(%rbx), %r12
+ # A[2] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[2] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r8, 80(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[2] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ movq %r9, 88(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[2] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r10, 96(%rbx)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ movq %r11, 104(%rbx)
+ movq 120(%rbx), %r8
+ movq 128(%rdi), %r9
+ movq 136(%rdi), %r10
+ # A[2] * B[12]
+ mulx 96(%rbp), %rax, %rcx
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[2] * B[13]
+ mulx 104(%rbp), %rax, %rcx
+ movq %r12, 112(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[2] * B[14]
+ mulx 112(%rbp), %rax, %rcx
+ movq %r8, 120(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[2] * B[15]
+ mulx 120(%rbp), %rax, %rcx
+ movq %r9, 128(%rdi)
+ movq %r14, %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ adcxq %r13, %r11
+ movq %r14, %r13
+ adoxq %r14, %r13
+ adcxq %r14, %r13
+ movq %r10, 136(%rdi)
+ movq %r11, 144(%rdi)
+ movq 24(%rsi), %rdx
+ movq 24(%rbx), %r11
+ movq 32(%rbx), %r12
+ movq 40(%rbx), %r8
+ movq 48(%rbx), %r9
+ movq 56(%rbx), %r10
+ # A[3] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[3] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r11, 24(%rbx)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[3] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ movq %r12, 32(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[3] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r8, 40(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 48(%rbx)
+ movq 64(%rbx), %r11
+ movq 72(%rbx), %r12
+ movq 80(%rbx), %r8
+ movq 88(%rbx), %r9
+ # A[3] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[3] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r10, 56(%rbx)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[3] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ movq %r11, 64(%rbx)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[3] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r12, 72(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 80(%rbx)
+ movq 96(%rbx), %r10
+ movq 104(%rbx), %r11
+ movq 112(%rbx), %r12
+ movq 120(%rbx), %r8
+ # A[3] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[3] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r9, 88(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[3] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ movq %r10, 96(%rbx)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[3] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r11, 104(%rbx)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ movq %r12, 112(%rbx)
+ movq 128(%rdi), %r9
+ movq 136(%rdi), %r10
+ movq 144(%rdi), %r11
+ # A[3] * B[12]
+ mulx 96(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[3] * B[13]
+ mulx 104(%rbp), %rax, %rcx
+ movq %r8, 120(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[3] * B[14]
+ mulx 112(%rbp), %rax, %rcx
+ movq %r9, 128(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[3] * B[15]
+ mulx 120(%rbp), %rax, %rcx
+ movq %r10, 136(%rdi)
+ movq %r14, %r12
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ adcxq %r13, %r12
+ movq %r14, %r13
+ adoxq %r14, %r13
+ adcxq %r14, %r13
+ movq %r11, 144(%rdi)
+ movq %r12, 152(%rdi)
+ movq 32(%rsi), %rdx
+ movq 32(%rbx), %r12
+ movq 40(%rbx), %r8
+ movq 48(%rbx), %r9
+ movq 56(%rbx), %r10
+ movq 64(%rbx), %r11
+ # A[4] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[4] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r12, 32(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[4] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ movq %r8, 40(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[4] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r9, 48(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 56(%rbx)
+ movq 72(%rbx), %r12
+ movq 80(%rbx), %r8
+ movq 88(%rbx), %r9
+ movq 96(%rbx), %r10
+ # A[4] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[4] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r11, 64(%rbx)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[4] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ movq %r12, 72(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[4] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r8, 80(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 88(%rbx)
+ movq 104(%rbx), %r11
+ movq 112(%rbx), %r12
+ movq 120(%rbx), %r8
+ movq 128(%rdi), %r9
+ # A[4] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[4] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r10, 96(%rbx)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[4] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ movq %r11, 104(%rbx)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[4] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r12, 112(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 120(%rbx)
+ movq 136(%rdi), %r10
+ movq 144(%rdi), %r11
+ movq 152(%rdi), %r12
+ # A[4] * B[12]
+ mulx 96(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[4] * B[13]
+ mulx 104(%rbp), %rax, %rcx
+ movq %r9, 128(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[4] * B[14]
+ mulx 112(%rbp), %rax, %rcx
+ movq %r10, 136(%rdi)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[4] * B[15]
+ mulx 120(%rbp), %rax, %rcx
+ movq %r11, 144(%rdi)
+ movq %r14, %r8
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ adcxq %r13, %r8
+ movq %r14, %r13
+ adoxq %r14, %r13
+ adcxq %r14, %r13
+ movq %r12, 152(%rdi)
+ movq %r8, 160(%rdi)
+ movq 40(%rsi), %rdx
+ movq 40(%rbx), %r8
+ movq 48(%rbx), %r9
+ movq 56(%rbx), %r10
+ movq 64(%rbx), %r11
+ movq 72(%rbx), %r12
+ # A[5] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[5] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r8, 40(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[5] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ movq %r9, 48(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[5] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r10, 56(%rbx)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ movq %r11, 64(%rbx)
+ movq 80(%rbx), %r8
+ movq 88(%rbx), %r9
+ movq 96(%rbx), %r10
+ movq 104(%rbx), %r11
+ # A[5] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[5] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r12, 72(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[5] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ movq %r8, 80(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[5] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r9, 88(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 96(%rbx)
+ movq 112(%rbx), %r12
+ movq 120(%rbx), %r8
+ movq 128(%rdi), %r9
+ movq 136(%rdi), %r10
+ # A[5] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[5] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r11, 104(%rbx)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[5] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ movq %r12, 112(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[5] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r8, 120(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 128(%rdi)
+ movq 144(%rdi), %r11
+ movq 152(%rdi), %r12
+ movq 160(%rdi), %r8
+ # A[5] * B[12]
+ mulx 96(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[5] * B[13]
+ mulx 104(%rbp), %rax, %rcx
+ movq %r10, 136(%rdi)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[5] * B[14]
+ mulx 112(%rbp), %rax, %rcx
+ movq %r11, 144(%rdi)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[5] * B[15]
+ mulx 120(%rbp), %rax, %rcx
+ movq %r12, 152(%rdi)
+ movq %r14, %r9
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ adcxq %r13, %r9
+ movq %r14, %r13
+ adoxq %r14, %r13
+ adcxq %r14, %r13
+ movq %r8, 160(%rdi)
+ movq %r9, 168(%rdi)
+ movq 48(%rsi), %rdx
+ movq 48(%rbx), %r9
+ movq 56(%rbx), %r10
+ movq 64(%rbx), %r11
+ movq 72(%rbx), %r12
+ movq 80(%rbx), %r8
+ # A[6] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[6] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r9, 48(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[6] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ movq %r10, 56(%rbx)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[6] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r11, 64(%rbx)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ movq %r12, 72(%rbx)
+ movq 88(%rbx), %r9
+ movq 96(%rbx), %r10
+ movq 104(%rbx), %r11
+ movq 112(%rbx), %r12
+ # A[6] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[6] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r8, 80(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[6] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ movq %r9, 88(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[6] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r10, 96(%rbx)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ movq %r11, 104(%rbx)
+ movq 120(%rbx), %r8
+ movq 128(%rdi), %r9
+ movq 136(%rdi), %r10
+ movq 144(%rdi), %r11
+ # A[6] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[6] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r12, 112(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[6] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ movq %r8, 120(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[6] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r9, 128(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 136(%rdi)
+ movq 152(%rdi), %r12
+ movq 160(%rdi), %r8
+ movq 168(%rdi), %r9
+ # A[6] * B[12]
+ mulx 96(%rbp), %rax, %rcx
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[6] * B[13]
+ mulx 104(%rbp), %rax, %rcx
+ movq %r11, 144(%rdi)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[6] * B[14]
+ mulx 112(%rbp), %rax, %rcx
+ movq %r12, 152(%rdi)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[6] * B[15]
+ mulx 120(%rbp), %rax, %rcx
+ movq %r8, 160(%rdi)
+ movq %r14, %r10
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ adcxq %r13, %r10
+ movq %r14, %r13
+ adoxq %r14, %r13
+ adcxq %r14, %r13
+ movq %r9, 168(%rdi)
+ movq %r10, 176(%rdi)
+ movq 56(%rsi), %rdx
+ movq 56(%rbx), %r10
+ movq 64(%rbx), %r11
+ movq 72(%rbx), %r12
+ movq 80(%rbx), %r8
+ movq 88(%rbx), %r9
+ # A[7] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[7] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r10, 56(%rbx)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[7] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ movq %r11, 64(%rbx)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[7] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r12, 72(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 80(%rbx)
+ movq 96(%rbx), %r10
+ movq 104(%rbx), %r11
+ movq 112(%rbx), %r12
+ movq 120(%rbx), %r8
+ # A[7] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[7] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r9, 88(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[7] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ movq %r10, 96(%rbx)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[7] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r11, 104(%rbx)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ movq %r12, 112(%rbx)
+ movq 128(%rdi), %r9
+ movq 136(%rdi), %r10
+ movq 144(%rdi), %r11
+ movq 152(%rdi), %r12
+ # A[7] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[7] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r8, 120(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[7] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ movq %r9, 128(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[7] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r10, 136(%rdi)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ movq %r11, 144(%rdi)
+ movq 160(%rdi), %r8
+ movq 168(%rdi), %r9
+ movq 176(%rdi), %r10
+ # A[7] * B[12]
+ mulx 96(%rbp), %rax, %rcx
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[7] * B[13]
+ mulx 104(%rbp), %rax, %rcx
+ movq %r12, 152(%rdi)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[7] * B[14]
+ mulx 112(%rbp), %rax, %rcx
+ movq %r8, 160(%rdi)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[7] * B[15]
+ mulx 120(%rbp), %rax, %rcx
+ movq %r9, 168(%rdi)
+ movq %r14, %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ adcxq %r13, %r11
+ movq %r14, %r13
+ adoxq %r14, %r13
+ adcxq %r14, %r13
+ movq %r10, 176(%rdi)
+ movq %r11, 184(%rdi)
+ movq 64(%rsi), %rdx
+ movq 64(%rbx), %r11
+ movq 72(%rbx), %r12
+ movq 80(%rbx), %r8
+ movq 88(%rbx), %r9
+ movq 96(%rbx), %r10
+ # A[8] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[8] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r11, 64(%rbx)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[8] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ movq %r12, 72(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[8] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r8, 80(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 88(%rbx)
+ movq 104(%rbx), %r11
+ movq 112(%rbx), %r12
+ movq 120(%rbx), %r8
+ movq 128(%rdi), %r9
+ # A[8] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[8] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r10, 96(%rbx)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[8] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ movq %r11, 104(%rbx)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[8] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r12, 112(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 120(%rbx)
+ movq 136(%rdi), %r10
+ movq 144(%rdi), %r11
+ movq 152(%rdi), %r12
+ movq 160(%rdi), %r8
+ # A[8] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[8] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r9, 128(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[8] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ movq %r10, 136(%rdi)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[8] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r11, 144(%rdi)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ movq %r12, 152(%rdi)
+ movq 168(%rdi), %r9
+ movq 176(%rdi), %r10
+ movq 184(%rdi), %r11
+ # A[8] * B[12]
+ mulx 96(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[8] * B[13]
+ mulx 104(%rbp), %rax, %rcx
+ movq %r8, 160(%rdi)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[8] * B[14]
+ mulx 112(%rbp), %rax, %rcx
+ movq %r9, 168(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[8] * B[15]
+ mulx 120(%rbp), %rax, %rcx
+ movq %r10, 176(%rdi)
+ movq %r14, %r12
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ adcxq %r13, %r12
+ movq %r14, %r13
+ adoxq %r14, %r13
+ adcxq %r14, %r13
+ movq %r11, 184(%rdi)
+ movq %r12, 192(%rdi)
+ movq 72(%rsi), %rdx
+ movq 72(%rbx), %r12
+ movq 80(%rbx), %r8
+ movq 88(%rbx), %r9
+ movq 96(%rbx), %r10
+ movq 104(%rbx), %r11
+ # A[9] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[9] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r12, 72(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[9] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ movq %r8, 80(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[9] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r9, 88(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 96(%rbx)
+ movq 112(%rbx), %r12
+ movq 120(%rbx), %r8
+ movq 128(%rdi), %r9
+ movq 136(%rdi), %r10
+ # A[9] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[9] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r11, 104(%rbx)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[9] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ movq %r12, 112(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[9] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r8, 120(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 128(%rdi)
+ movq 144(%rdi), %r11
+ movq 152(%rdi), %r12
+ movq 160(%rdi), %r8
+ movq 168(%rdi), %r9
+ # A[9] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[9] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r10, 136(%rdi)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[9] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ movq %r11, 144(%rdi)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[9] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r12, 152(%rdi)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 160(%rdi)
+ movq 176(%rdi), %r10
+ movq 184(%rdi), %r11
+ movq 192(%rdi), %r12
+ # A[9] * B[12]
+ mulx 96(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[9] * B[13]
+ mulx 104(%rbp), %rax, %rcx
+ movq %r9, 168(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[9] * B[14]
+ mulx 112(%rbp), %rax, %rcx
+ movq %r10, 176(%rdi)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[9] * B[15]
+ mulx 120(%rbp), %rax, %rcx
+ movq %r11, 184(%rdi)
+ movq %r14, %r8
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ adcxq %r13, %r8
+ movq %r14, %r13
+ adoxq %r14, %r13
+ adcxq %r14, %r13
+ movq %r12, 192(%rdi)
+ movq %r8, 200(%rdi)
+ movq 80(%rsi), %rdx
+ movq 80(%rbx), %r8
+ movq 88(%rbx), %r9
+ movq 96(%rbx), %r10
+ movq 104(%rbx), %r11
+ movq 112(%rbx), %r12
+ # A[10] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[10] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r8, 80(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[10] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ movq %r9, 88(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[10] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r10, 96(%rbx)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ movq %r11, 104(%rbx)
+ movq 120(%rbx), %r8
+ movq 128(%rdi), %r9
+ movq 136(%rdi), %r10
+ movq 144(%rdi), %r11
+ # A[10] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[10] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r12, 112(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[10] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ movq %r8, 120(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[10] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r9, 128(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 136(%rdi)
+ movq 152(%rdi), %r12
+ movq 160(%rdi), %r8
+ movq 168(%rdi), %r9
+ movq 176(%rdi), %r10
+ # A[10] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[10] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r11, 144(%rdi)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[10] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ movq %r12, 152(%rdi)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[10] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r8, 160(%rdi)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 168(%rdi)
+ movq 184(%rdi), %r11
+ movq 192(%rdi), %r12
+ movq 200(%rdi), %r8
+ # A[10] * B[12]
+ mulx 96(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[10] * B[13]
+ mulx 104(%rbp), %rax, %rcx
+ movq %r10, 176(%rdi)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[10] * B[14]
+ mulx 112(%rbp), %rax, %rcx
+ movq %r11, 184(%rdi)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[10] * B[15]
+ mulx 120(%rbp), %rax, %rcx
+ movq %r12, 192(%rdi)
+ movq %r14, %r9
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ adcxq %r13, %r9
+ movq %r14, %r13
+ adoxq %r14, %r13
+ adcxq %r14, %r13
+ movq %r8, 200(%rdi)
+ movq %r9, 208(%rdi)
+ movq 88(%rsi), %rdx
+ movq 88(%rbx), %r9
+ movq 96(%rbx), %r10
+ movq 104(%rbx), %r11
+ movq 112(%rbx), %r12
+ movq 120(%rbx), %r8
+ # A[11] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[11] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r9, 88(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[11] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ movq %r10, 96(%rbx)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[11] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r11, 104(%rbx)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ movq %r12, 112(%rbx)
+ movq 128(%rdi), %r9
+ movq 136(%rdi), %r10
+ movq 144(%rdi), %r11
+ movq 152(%rdi), %r12
+ # A[11] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[11] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r8, 120(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[11] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ movq %r9, 128(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[11] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r10, 136(%rdi)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ movq %r11, 144(%rdi)
+ movq 160(%rdi), %r8
+ movq 168(%rdi), %r9
+ movq 176(%rdi), %r10
+ movq 184(%rdi), %r11
+ # A[11] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[11] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r12, 152(%rdi)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[11] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ movq %r8, 160(%rdi)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[11] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r9, 168(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 176(%rdi)
+ movq 192(%rdi), %r12
+ movq 200(%rdi), %r8
+ movq 208(%rdi), %r9
+ # A[11] * B[12]
+ mulx 96(%rbp), %rax, %rcx
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[11] * B[13]
+ mulx 104(%rbp), %rax, %rcx
+ movq %r11, 184(%rdi)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[11] * B[14]
+ mulx 112(%rbp), %rax, %rcx
+ movq %r12, 192(%rdi)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[11] * B[15]
+ mulx 120(%rbp), %rax, %rcx
+ movq %r8, 200(%rdi)
+ movq %r14, %r10
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ adcxq %r13, %r10
+ movq %r14, %r13
+ adoxq %r14, %r13
+ adcxq %r14, %r13
+ movq %r9, 208(%rdi)
+ movq %r10, 216(%rdi)
+ movq 96(%rsi), %rdx
+ movq 96(%rbx), %r10
+ movq 104(%rbx), %r11
+ movq 112(%rbx), %r12
+ movq 120(%rbx), %r8
+ movq 128(%rdi), %r9
+ # A[12] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[12] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r10, 96(%rbx)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[12] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ movq %r11, 104(%rbx)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[12] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r12, 112(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 120(%rbx)
+ movq 136(%rdi), %r10
+ movq 144(%rdi), %r11
+ movq 152(%rdi), %r12
+ movq 160(%rdi), %r8
+ # A[12] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[12] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r9, 128(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[12] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ movq %r10, 136(%rdi)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[12] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r11, 144(%rdi)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ movq %r12, 152(%rdi)
+ movq 168(%rdi), %r9
+ movq 176(%rdi), %r10
+ movq 184(%rdi), %r11
+ movq 192(%rdi), %r12
+ # A[12] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[12] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r8, 160(%rdi)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[12] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ movq %r9, 168(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[12] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r10, 176(%rdi)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ movq %r11, 184(%rdi)
+ movq 200(%rdi), %r8
+ movq 208(%rdi), %r9
+ movq 216(%rdi), %r10
+ # A[12] * B[12]
+ mulx 96(%rbp), %rax, %rcx
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[12] * B[13]
+ mulx 104(%rbp), %rax, %rcx
+ movq %r12, 192(%rdi)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[12] * B[14]
+ mulx 112(%rbp), %rax, %rcx
+ movq %r8, 200(%rdi)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[12] * B[15]
+ mulx 120(%rbp), %rax, %rcx
+ movq %r9, 208(%rdi)
+ movq %r14, %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ adcxq %r13, %r11
+ movq %r14, %r13
+ adoxq %r14, %r13
+ adcxq %r14, %r13
+ movq %r10, 216(%rdi)
+ movq %r11, 224(%rdi)
+ movq 104(%rsi), %rdx
+ movq 104(%rbx), %r11
+ movq 112(%rbx), %r12
+ movq 120(%rbx), %r8
+ movq 128(%rdi), %r9
+ movq 136(%rdi), %r10
+ # A[13] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[13] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r11, 104(%rbx)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[13] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ movq %r12, 112(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[13] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r8, 120(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 128(%rdi)
+ movq 144(%rdi), %r11
+ movq 152(%rdi), %r12
+ movq 160(%rdi), %r8
+ movq 168(%rdi), %r9
+ # A[13] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[13] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r10, 136(%rdi)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[13] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ movq %r11, 144(%rdi)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[13] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r12, 152(%rdi)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 160(%rdi)
+ movq 176(%rdi), %r10
+ movq 184(%rdi), %r11
+ movq 192(%rdi), %r12
+ movq 200(%rdi), %r8
+ # A[13] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[13] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r9, 168(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[13] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ movq %r10, 176(%rdi)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[13] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r11, 184(%rdi)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ movq %r12, 192(%rdi)
+ movq 208(%rdi), %r9
+ movq 216(%rdi), %r10
+ movq 224(%rdi), %r11
+ # A[13] * B[12]
+ mulx 96(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[13] * B[13]
+ mulx 104(%rbp), %rax, %rcx
+ movq %r8, 200(%rdi)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[13] * B[14]
+ mulx 112(%rbp), %rax, %rcx
+ movq %r9, 208(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[13] * B[15]
+ mulx 120(%rbp), %rax, %rcx
+ movq %r10, 216(%rdi)
+ movq %r14, %r12
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ adcxq %r13, %r12
+ movq %r14, %r13
+ adoxq %r14, %r13
+ adcxq %r14, %r13
+ movq %r11, 224(%rdi)
+ movq %r12, 232(%rdi)
+ movq 112(%rsi), %rdx
+ movq 112(%rbx), %r12
+ movq 120(%rbx), %r8
+ movq 128(%rdi), %r9
+ movq 136(%rdi), %r10
+ movq 144(%rdi), %r11
+ # A[14] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[14] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r12, 112(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[14] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ movq %r8, 120(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[14] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r9, 128(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 136(%rdi)
+ movq 152(%rdi), %r12
+ movq 160(%rdi), %r8
+ movq 168(%rdi), %r9
+ movq 176(%rdi), %r10
+ # A[14] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[14] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r11, 144(%rdi)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[14] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ movq %r12, 152(%rdi)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[14] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r8, 160(%rdi)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 168(%rdi)
+ movq 184(%rdi), %r11
+ movq 192(%rdi), %r12
+ movq 200(%rdi), %r8
+ movq 208(%rdi), %r9
+ # A[14] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[14] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r10, 176(%rdi)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[14] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ movq %r11, 184(%rdi)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[14] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r12, 192(%rdi)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 200(%rdi)
+ movq 216(%rdi), %r10
+ movq 224(%rdi), %r11
+ movq 232(%rdi), %r12
+ # A[14] * B[12]
+ mulx 96(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[14] * B[13]
+ mulx 104(%rbp), %rax, %rcx
+ movq %r9, 208(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[14] * B[14]
+ mulx 112(%rbp), %rax, %rcx
+ movq %r10, 216(%rdi)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[14] * B[15]
+ mulx 120(%rbp), %rax, %rcx
+ movq %r11, 224(%rdi)
+ movq %r14, %r8
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ adcxq %r13, %r8
+ movq %r14, %r13
+ adoxq %r14, %r13
+ adcxq %r14, %r13
+ movq %r12, 232(%rdi)
+ movq %r8, 240(%rdi)
+ movq 120(%rsi), %rdx
+ movq 120(%rbx), %r8
+ movq 128(%rdi), %r9
+ movq 136(%rdi), %r10
+ movq 144(%rdi), %r11
+ movq 152(%rdi), %r12
+ # A[15] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[15] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r8, 120(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[15] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ movq %r9, 128(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[15] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r10, 136(%rdi)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ movq %r11, 144(%rdi)
+ movq 160(%rdi), %r8
+ movq 168(%rdi), %r9
+ movq 176(%rdi), %r10
+ movq 184(%rdi), %r11
+ # A[15] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[15] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r12, 152(%rdi)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[15] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ movq %r8, 160(%rdi)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[15] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r9, 168(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 176(%rdi)
+ movq 192(%rdi), %r12
+ movq 200(%rdi), %r8
+ movq 208(%rdi), %r9
+ movq 216(%rdi), %r10
+ # A[15] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[15] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r11, 184(%rdi)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[15] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ movq %r12, 192(%rdi)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[15] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r8, 200(%rdi)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 208(%rdi)
+ movq 224(%rdi), %r11
+ movq 232(%rdi), %r12
+ movq 240(%rdi), %r8
+ # A[15] * B[12]
+ mulx 96(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[15] * B[13]
+ mulx 104(%rbp), %rax, %rcx
+ movq %r10, 216(%rdi)
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[15] * B[14]
+ mulx 112(%rbp), %rax, %rcx
+ movq %r11, 224(%rdi)
+ adcxq %rax, %r12
+ adoxq %rcx, %r8
+ # A[15] * B[15]
+ mulx 120(%rbp), %rax, %rcx
+ movq %r12, 232(%rdi)
+ movq %r14, %r9
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ adcxq %r13, %r9
+ movq %r8, 240(%rdi)
+ movq %r9, 248(%rdi)
+ cmpq %rdi, %rsi
+ je L_start_2048_mul_avx2_16
+ cmpq %rdi, %rbp
+ jne L_end_2048_mul_avx2_16
+L_start_2048_mul_avx2_16:
+ vmovdqu (%rbx), %xmm0
+ vmovups %xmm0, (%rdi)
+ vmovdqu 16(%rbx), %xmm0
+ vmovups %xmm0, 16(%rdi)
+ vmovdqu 32(%rbx), %xmm0
+ vmovups %xmm0, 32(%rdi)
+ vmovdqu 48(%rbx), %xmm0
+ vmovups %xmm0, 48(%rdi)
+ vmovdqu 64(%rbx), %xmm0
+ vmovups %xmm0, 64(%rdi)
+ vmovdqu 80(%rbx), %xmm0
+ vmovups %xmm0, 80(%rdi)
+ vmovdqu 96(%rbx), %xmm0
+ vmovups %xmm0, 96(%rdi)
+ vmovdqu 112(%rbx), %xmm0
+ vmovups %xmm0, 112(%rdi)
+L_end_2048_mul_avx2_16:
+ addq $128, %rsp
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_mul_avx2_16,.-sp_2048_mul_avx2_16
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
+#ifdef HAVE_INTEL_AVX2
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_2048_sqr_avx2_16
+.type sp_2048_sqr_avx2_16,@function
+.align 16
+sp_2048_sqr_avx2_16:
+#else
+.globl _sp_2048_sqr_avx2_16
+.p2align 4
+_sp_2048_sqr_avx2_16:
+#endif /* __APPLE__ */
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ push %rbx
+ subq $128, %rsp
+ cmpq %rdi, %rsi
+ movq %rsp, %rbp
+ cmovne %rdi, %rbp
+ xorq %r11, %r11
+ # Diagonal 1
+ xorq %r10, %r10
+ # A[1] x A[0]
+ movq (%rsi), %rdx
+ mulxq 8(%rsi), %r8, %r9
+ # A[2] x A[0]
+ mulxq 16(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r8, 8(%rbp)
+ movq %r9, 16(%rbp)
+ movq %r11, %r8
+ movq %r11, %r9
+ # A[3] x A[0]
+ mulxq 24(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[4] x A[0]
+ mulxq 32(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r10, 24(%rbp)
+ movq %r8, 32(%rbp)
+ movq %r11, %r10
+ movq %r11, %r8
+ # A[5] x A[0]
+ mulxq 40(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[6] x A[0]
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r9, 40(%rbp)
+ movq %r10, 48(%rbp)
+ movq %r11, %r9
+ movq %r11, %r10
+ # A[7] x A[0]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[8] x A[0]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r8, 56(%rbp)
+ movq %r9, 64(%rbp)
+ movq %r11, %r8
+ movq %r11, %r9
+ # A[9] x A[0]
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[10] x A[0]
+ mulxq 80(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r10, 72(%rbp)
+ movq %r8, 80(%rbp)
+ movq %r11, %r10
+ movq %r11, %r8
+ # A[11] x A[0]
+ mulxq 88(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[12] x A[0]
+ mulxq 96(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r9, 88(%rbp)
+ movq %r10, %r13
+ movq %r11, %r9
+ movq %r11, %r10
+ # A[13] x A[0]
+ mulxq 104(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[14] x A[0]
+ mulxq 112(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r8, %r14
+ movq %r9, %r15
+ movq %r11, %r8
+ # A[15] x A[0]
+ mulxq 120(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, %rbx
+ # Carry
+ adcxq %r11, %r8
+ movq %r11, %r12
+ adcxq %r11, %r12
+ adoxq %r11, %r12
+ movq %r8, 128(%rdi)
+ # Diagonal 2
+ movq 24(%rbp), %r8
+ movq 32(%rbp), %r9
+ movq 40(%rbp), %r10
+ # A[2] x A[1]
+ movq 8(%rsi), %rdx
+ mulxq 16(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[3] x A[1]
+ mulxq 24(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r8, 24(%rbp)
+ movq %r9, 32(%rbp)
+ movq 48(%rbp), %r8
+ movq 56(%rbp), %r9
+ # A[4] x A[1]
+ mulxq 32(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[5] x A[1]
+ mulxq 40(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r10, 40(%rbp)
+ movq %r8, 48(%rbp)
+ movq 64(%rbp), %r10
+ movq 72(%rbp), %r8
+ # A[6] x A[1]
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[7] x A[1]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r9, 56(%rbp)
+ movq %r10, 64(%rbp)
+ movq 80(%rbp), %r9
+ movq 88(%rbp), %r10
+ # A[8] x A[1]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[9] x A[1]
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r8, 72(%rbp)
+ movq %r9, 80(%rbp)
+ # No load %r13 - %r8
+ # No load %r14 - %r9
+ # A[10] x A[1]
+ mulxq 80(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r13
+ # A[11] x A[1]
+ mulxq 88(%rsi), %rax, %rcx
+ adcxq %rax, %r13
+ adoxq %rcx, %r14
+ movq %r10, 88(%rbp)
+ # No store %r13
+ # No load %r15 - %r10
+ # No load %rbx - %r8
+ # A[12] x A[1]
+ mulxq 96(%rsi), %rax, %rcx
+ adcxq %rax, %r14
+ adoxq %rcx, %r15
+ # A[13] x A[1]
+ mulxq 104(%rsi), %rax, %rcx
+ adcxq %rax, %r15
+ adoxq %rcx, %rbx
+ # No store %r14
+ # No store %r15
+ movq 128(%rdi), %r9
+ movq %r11, %r10
+ # A[14] x A[1]
+ mulxq 112(%rsi), %rax, %rcx
+ adcxq %rax, %rbx
+ adoxq %rcx, %r9
+ # A[15] x A[1]
+ mulxq 120(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # No store %rbx
+ movq %r9, 128(%rdi)
+ movq %r11, %r8
+ # A[15] x A[2]
+ movq 16(%rsi), %rdx
+ mulxq 120(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 136(%rdi)
+ # Carry
+ adcxq %r12, %r8
+ movq %r11, %r12
+ adcxq %r11, %r12
+ adoxq %r11, %r12
+ movq %r8, 144(%rdi)
+ # Diagonal 3
+ movq 40(%rbp), %r8
+ movq 48(%rbp), %r9
+ movq 56(%rbp), %r10
+ # A[3] x A[2]
+ mulxq 24(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[4] x A[2]
+ mulxq 32(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r8, 40(%rbp)
+ movq %r9, 48(%rbp)
+ movq 64(%rbp), %r8
+ movq 72(%rbp), %r9
+ # A[5] x A[2]
+ mulxq 40(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[6] x A[2]
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r10, 56(%rbp)
+ movq %r8, 64(%rbp)
+ movq 80(%rbp), %r10
+ movq 88(%rbp), %r8
+ # A[7] x A[2]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[8] x A[2]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r9, 72(%rbp)
+ movq %r10, 80(%rbp)
+ # No load %r13 - %r9
+ # No load %r14 - %r10
+ # A[9] x A[2]
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r13
+ # A[10] x A[2]
+ mulxq 80(%rsi), %rax, %rcx
+ adcxq %rax, %r13
+ adoxq %rcx, %r14
+ movq %r8, 88(%rbp)
+ # No store %r13
+ # No load %r15 - %r8
+ # No load %rbx - %r9
+ # A[11] x A[2]
+ mulxq 88(%rsi), %rax, %rcx
+ adcxq %rax, %r14
+ adoxq %rcx, %r15
+ # A[12] x A[2]
+ mulxq 96(%rsi), %rax, %rcx
+ adcxq %rax, %r15
+ adoxq %rcx, %rbx
+ # No store %r14
+ # No store %r15
+ movq 128(%rdi), %r10
+ movq 136(%rdi), %r8
+ # A[13] x A[2]
+ mulxq 104(%rsi), %rax, %rcx
+ adcxq %rax, %rbx
+ adoxq %rcx, %r10
+ # A[14] x A[2]
+ mulxq 112(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # No store %rbx
+ movq %r10, 128(%rdi)
+ movq 144(%rdi), %r9
+ movq %r11, %r10
+ # A[14] x A[3]
+ movq 112(%rsi), %rdx
+ mulxq 24(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[14] x A[4]
+ mulxq 32(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r8, 136(%rdi)
+ movq %r9, 144(%rdi)
+ movq %r11, %r8
+ # A[14] x A[5]
+ mulxq 40(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 152(%rdi)
+ # Carry
+ adcxq %r12, %r8
+ movq %r11, %r12
+ adcxq %r11, %r12
+ adoxq %r11, %r12
+ movq %r8, 160(%rdi)
+ # Diagonal 4
+ movq 56(%rbp), %r8
+ movq 64(%rbp), %r9
+ movq 72(%rbp), %r10
+ # A[4] x A[3]
+ movq 24(%rsi), %rdx
+ mulxq 32(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[5] x A[3]
+ mulxq 40(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r8, 56(%rbp)
+ movq %r9, 64(%rbp)
+ movq 80(%rbp), %r8
+ movq 88(%rbp), %r9
+ # A[6] x A[3]
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[7] x A[3]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r10, 72(%rbp)
+ movq %r8, 80(%rbp)
+ # No load %r13 - %r10
+ # No load %r14 - %r8
+ # A[8] x A[3]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r13
+ # A[9] x A[3]
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %r13
+ adoxq %rcx, %r14
+ movq %r9, 88(%rbp)
+ # No store %r13
+ # No load %r15 - %r9
+ # No load %rbx - %r10
+ # A[10] x A[3]
+ mulxq 80(%rsi), %rax, %rcx
+ adcxq %rax, %r14
+ adoxq %rcx, %r15
+ # A[11] x A[3]
+ mulxq 88(%rsi), %rax, %rcx
+ adcxq %rax, %r15
+ adoxq %rcx, %rbx
+ # No store %r14
+ # No store %r15
+ movq 128(%rdi), %r8
+ movq 136(%rdi), %r9
+ # A[12] x A[3]
+ mulxq 96(%rsi), %rax, %rcx
+ adcxq %rax, %rbx
+ adoxq %rcx, %r8
+ # A[13] x A[3]
+ mulxq 104(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # No store %rbx
+ movq %r8, 128(%rdi)
+ movq 144(%rdi), %r10
+ movq 152(%rdi), %r8
+ # A[13] x A[4]
+ movq 104(%rsi), %rdx
+ mulxq 32(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[13] x A[5]
+ mulxq 40(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r9, 136(%rdi)
+ movq %r10, 144(%rdi)
+ movq 160(%rdi), %r9
+ movq %r11, %r10
+ # A[13] x A[6]
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[13] x A[7]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r8, 152(%rdi)
+ movq %r9, 160(%rdi)
+ movq %r11, %r8
+ # A[13] x A[8]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 168(%rdi)
+ # Carry
+ adcxq %r12, %r8
+ movq %r11, %r12
+ adcxq %r11, %r12
+ adoxq %r11, %r12
+ movq %r8, 176(%rdi)
+ # Diagonal 5
+ movq 72(%rbp), %r8
+ movq 80(%rbp), %r9
+ movq 88(%rbp), %r10
+ # A[5] x A[4]
+ movq 32(%rsi), %rdx
+ mulxq 40(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[6] x A[4]
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r8, 72(%rbp)
+ movq %r9, 80(%rbp)
+ # No load %r13 - %r8
+ # No load %r14 - %r9
+ # A[7] x A[4]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r13
+ # A[8] x A[4]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %r13
+ adoxq %rcx, %r14
+ movq %r10, 88(%rbp)
+ # No store %r13
+ # No load %r15 - %r10
+ # No load %rbx - %r8
+ # A[9] x A[4]
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %r14
+ adoxq %rcx, %r15
+ # A[10] x A[4]
+ mulxq 80(%rsi), %rax, %rcx
+ adcxq %rax, %r15
+ adoxq %rcx, %rbx
+ # No store %r14
+ # No store %r15
+ movq 128(%rdi), %r9
+ movq 136(%rdi), %r10
+ # A[11] x A[4]
+ mulxq 88(%rsi), %rax, %rcx
+ adcxq %rax, %rbx
+ adoxq %rcx, %r9
+ # A[12] x A[4]
+ mulxq 96(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # No store %rbx
+ movq %r9, 128(%rdi)
+ movq 144(%rdi), %r8
+ movq 152(%rdi), %r9
+ # A[12] x A[5]
+ movq 96(%rsi), %rdx
+ mulxq 40(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[12] x A[6]
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r10, 136(%rdi)
+ movq %r8, 144(%rdi)
+ movq 160(%rdi), %r10
+ movq 168(%rdi), %r8
+ # A[12] x A[7]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[12] x A[8]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r9, 152(%rdi)
+ movq %r10, 160(%rdi)
+ movq 176(%rdi), %r9
+ movq %r11, %r10
+ # A[12] x A[9]
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[12] x A[10]
+ mulxq 80(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r8, 168(%rdi)
+ movq %r9, 176(%rdi)
+ movq %r11, %r8
+ # A[12] x A[11]
+ mulxq 88(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 184(%rdi)
+ # Carry
+ adcxq %r12, %r8
+ movq %r11, %r12
+ adcxq %r11, %r12
+ adoxq %r11, %r12
+ movq %r8, 192(%rdi)
+ # Diagonal 6
+ movq 88(%rbp), %r8
+ # No load %r13 - %r9
+ # No load %r14 - %r10
+ # A[6] x A[5]
+ movq 40(%rsi), %rdx
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r13
+ # A[7] x A[5]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r13
+ adoxq %rcx, %r14
+ movq %r8, 88(%rbp)
+ # No store %r13
+ # No load %r15 - %r8
+ # No load %rbx - %r9
+ # A[8] x A[5]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %r14
+ adoxq %rcx, %r15
+ # A[9] x A[5]
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %r15
+ adoxq %rcx, %rbx
+ # No store %r14
+ # No store %r15
+ movq 128(%rdi), %r10
+ movq 136(%rdi), %r8
+ # A[10] x A[5]
+ mulxq 80(%rsi), %rax, %rcx
+ adcxq %rax, %rbx
+ adoxq %rcx, %r10
+ # A[11] x A[5]
+ mulxq 88(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # No store %rbx
+ movq %r10, 128(%rdi)
+ movq 144(%rdi), %r9
+ movq 152(%rdi), %r10
+ # A[11] x A[6]
+ movq 88(%rsi), %rdx
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[11] x A[7]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r8, 136(%rdi)
+ movq %r9, 144(%rdi)
+ movq 160(%rdi), %r8
+ movq 168(%rdi), %r9
+ # A[11] x A[8]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[11] x A[9]
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r10, 152(%rdi)
+ movq %r8, 160(%rdi)
+ movq 176(%rdi), %r10
+ movq 184(%rdi), %r8
+ # A[11] x A[10]
+ mulxq 80(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[13] x A[9]
+ movq 104(%rsi), %rdx
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r9, 168(%rdi)
+ movq %r10, 176(%rdi)
+ movq 192(%rdi), %r9
+ movq %r11, %r10
+ # A[13] x A[10]
+ mulxq 80(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[13] x A[11]
+ mulxq 88(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r8, 184(%rdi)
+ movq %r9, 192(%rdi)
+ movq %r11, %r8
+ # A[13] x A[12]
+ mulxq 96(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 200(%rdi)
+ # Carry
+ adcxq %r12, %r8
+ movq %r11, %r12
+ adcxq %r11, %r12
+ adoxq %r11, %r12
+ movq %r8, 208(%rdi)
+ # Diagonal 7
+ # No load %r14 - %r8
+ # No load %r15 - %r9
+ # No load %rbx - %r10
+ # A[7] x A[6]
+ movq 48(%rsi), %rdx
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r14
+ adoxq %rcx, %r15
+ # A[8] x A[6]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %r15
+ adoxq %rcx, %rbx
+ # No store %r14
+ # No store %r15
+ movq 128(%rdi), %r8
+ movq 136(%rdi), %r9
+ # A[9] x A[6]
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %rbx
+ adoxq %rcx, %r8
+ # A[10] x A[6]
+ mulxq 80(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # No store %rbx
+ movq %r8, 128(%rdi)
+ movq 144(%rdi), %r10
+ movq 152(%rdi), %r8
+ # A[10] x A[7]
+ movq 80(%rsi), %rdx
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[10] x A[8]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r9, 136(%rdi)
+ movq %r10, 144(%rdi)
+ movq 160(%rdi), %r9
+ movq 168(%rdi), %r10
+ # A[10] x A[9]
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[14] x A[6]
+ movq 112(%rsi), %rdx
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r8, 152(%rdi)
+ movq %r9, 160(%rdi)
+ movq 176(%rdi), %r8
+ movq 184(%rdi), %r9
+ # A[14] x A[7]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[14] x A[8]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r10, 168(%rdi)
+ movq %r8, 176(%rdi)
+ movq 192(%rdi), %r10
+ movq 200(%rdi), %r8
+ # A[14] x A[9]
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[14] x A[10]
+ mulxq 80(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r9, 184(%rdi)
+ movq %r10, 192(%rdi)
+ movq 208(%rdi), %r9
+ movq %r11, %r10
+ # A[14] x A[11]
+ mulxq 88(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[14] x A[12]
+ mulxq 96(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r8, 200(%rdi)
+ movq %r9, 208(%rdi)
+ movq %r11, %r8
+ # A[14] x A[13]
+ mulxq 104(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 216(%rdi)
+ # Carry
+ adcxq %r12, %r8
+ movq %r11, %r12
+ adcxq %r11, %r12
+ adoxq %r11, %r12
+ movq %r8, 224(%rdi)
+ # Diagonal 8
+ # No load %rbx - %r8
+ movq 128(%rdi), %r9
+ movq 136(%rdi), %r10
+ # A[8] x A[7]
+ movq 56(%rsi), %rdx
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %rbx
+ adoxq %rcx, %r9
+ # A[9] x A[7]
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # No store %rbx
+ movq %r9, 128(%rdi)
+ movq 144(%rdi), %r8
+ movq 152(%rdi), %r9
+ # A[9] x A[8]
+ movq 64(%rsi), %rdx
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[15] x A[3]
+ movq 120(%rsi), %rdx
+ mulxq 24(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r10, 136(%rdi)
+ movq %r8, 144(%rdi)
+ movq 160(%rdi), %r10
+ movq 168(%rdi), %r8
+ # A[15] x A[4]
+ mulxq 32(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[15] x A[5]
+ mulxq 40(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r9, 152(%rdi)
+ movq %r10, 160(%rdi)
+ movq 176(%rdi), %r9
+ movq 184(%rdi), %r10
+ # A[15] x A[6]
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[15] x A[7]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r8, 168(%rdi)
+ movq %r9, 176(%rdi)
+ movq 192(%rdi), %r8
+ movq 200(%rdi), %r9
+ # A[15] x A[8]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[15] x A[9]
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r10, 184(%rdi)
+ movq %r8, 192(%rdi)
+ movq 208(%rdi), %r10
+ movq 216(%rdi), %r8
+ # A[15] x A[10]
+ mulxq 80(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[15] x A[11]
+ mulxq 88(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r9, 200(%rdi)
+ movq %r10, 208(%rdi)
+ movq 224(%rdi), %r9
+ movq %r11, %r10
+ # A[15] x A[12]
+ mulxq 96(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[15] x A[13]
+ mulxq 104(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r8, 216(%rdi)
+ movq %r9, 224(%rdi)
+ movq %r11, %r8
+ # A[15] x A[14]
+ mulxq 112(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 232(%rdi)
+ # Carry
+ adcxq %r12, %r8
+ movq %r11, %r12
+ adcxq %r11, %r12
+ adoxq %r11, %r12
+ movq %r8, 240(%rdi)
+ movq %r12, 248(%rdi)
+ # Double and Add in A[i] x A[i]
+ movq 8(%rbp), %r9
+ # A[0] x A[0]
+ movq (%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ movq %rax, (%rbp)
+ adoxq %r9, %r9
+ adcxq %rcx, %r9
+ movq %r9, 8(%rbp)
+ movq 16(%rbp), %r8
+ movq 24(%rbp), %r9
+ # A[1] x A[1]
+ movq 8(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 16(%rbp)
+ movq %r9, 24(%rbp)
+ movq 32(%rbp), %r8
+ movq 40(%rbp), %r9
+ # A[2] x A[2]
+ movq 16(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 32(%rbp)
+ movq %r9, 40(%rbp)
+ movq 48(%rbp), %r8
+ movq 56(%rbp), %r9
+ # A[3] x A[3]
+ movq 24(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 48(%rbp)
+ movq %r9, 56(%rbp)
+ movq 64(%rbp), %r8
+ movq 72(%rbp), %r9
+ # A[4] x A[4]
+ movq 32(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 64(%rbp)
+ movq %r9, 72(%rbp)
+ movq 80(%rbp), %r8
+ movq 88(%rbp), %r9
+ # A[5] x A[5]
+ movq 40(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 80(%rbp)
+ movq %r9, 88(%rbp)
+ # A[6] x A[6]
+ movq 48(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r13, %r13
+ adoxq %r14, %r14
+ adcxq %rax, %r13
+ adcxq %rcx, %r14
+ # A[7] x A[7]
+ movq 56(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r15, %r15
+ adoxq %rbx, %rbx
+ adcxq %rax, %r15
+ adcxq %rcx, %rbx
+ movq 128(%rdi), %r8
+ movq 136(%rdi), %r9
+ # A[8] x A[8]
+ movq 64(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 128(%rdi)
+ movq %r9, 136(%rdi)
+ movq 144(%rdi), %r8
+ movq 152(%rdi), %r9
+ # A[9] x A[9]
+ movq 72(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 144(%rdi)
+ movq %r9, 152(%rdi)
+ movq 160(%rdi), %r8
+ movq 168(%rdi), %r9
+ # A[10] x A[10]
+ movq 80(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 160(%rdi)
+ movq %r9, 168(%rdi)
+ movq 176(%rdi), %r8
+ movq 184(%rdi), %r9
+ # A[11] x A[11]
+ movq 88(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 176(%rdi)
+ movq %r9, 184(%rdi)
+ movq 192(%rdi), %r8
+ movq 200(%rdi), %r9
+ # A[12] x A[12]
+ movq 96(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 192(%rdi)
+ movq %r9, 200(%rdi)
+ movq 208(%rdi), %r8
+ movq 216(%rdi), %r9
+ # A[13] x A[13]
+ movq 104(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 208(%rdi)
+ movq %r9, 216(%rdi)
+ movq 224(%rdi), %r8
+ movq 232(%rdi), %r9
+ # A[14] x A[14]
+ movq 112(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 224(%rdi)
+ movq %r9, 232(%rdi)
+ movq 240(%rdi), %r8
+ movq 248(%rdi), %r9
+ # A[15] x A[15]
+ movq 120(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 240(%rdi)
+ movq %r9, 248(%rdi)
+ movq %r13, 96(%rdi)
+ movq %r14, 104(%rdi)
+ movq %r15, 112(%rdi)
+ movq %rbx, 120(%rdi)
+ cmpq %rdi, %rsi
+ jne L_end_2048_sqr_avx2_16
+ vmovdqu (%rbp), %xmm0
+ vmovups %xmm0, (%rdi)
+ vmovdqu 16(%rbp), %xmm0
+ vmovups %xmm0, 16(%rdi)
+ vmovdqu 32(%rbp), %xmm0
+ vmovups %xmm0, 32(%rdi)
+ vmovdqu 48(%rbp), %xmm0
+ vmovups %xmm0, 48(%rdi)
+ vmovdqu 64(%rbp), %xmm0
+ vmovups %xmm0, 64(%rdi)
+ vmovdqu 80(%rbp), %xmm0
+ vmovups %xmm0, 80(%rdi)
+L_end_2048_sqr_avx2_16:
+ addq $128, %rsp
+ pop %rbx
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_sqr_avx2_16,.-sp_2048_sqr_avx2_16
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_2048_add_16
+.type sp_2048_add_16,@function
+.align 16
+sp_2048_add_16:
+#else
+.globl _sp_2048_add_16
+.p2align 4
+_sp_2048_add_16:
+#endif /* __APPLE__ */
+ # Add
+ movq (%rsi), %rcx
+ xorq %rax, %rax
+ addq (%rdx), %rcx
+ movq 8(%rsi), %r8
+ movq %rcx, (%rdi)
+ adcq 8(%rdx), %r8
+ movq 16(%rsi), %rcx
+ movq %r8, 8(%rdi)
+ adcq 16(%rdx), %rcx
+ movq 24(%rsi), %r8
+ movq %rcx, 16(%rdi)
+ adcq 24(%rdx), %r8
+ movq 32(%rsi), %rcx
+ movq %r8, 24(%rdi)
+ adcq 32(%rdx), %rcx
+ movq 40(%rsi), %r8
+ movq %rcx, 32(%rdi)
+ adcq 40(%rdx), %r8
+ movq 48(%rsi), %rcx
+ movq %r8, 40(%rdi)
+ adcq 48(%rdx), %rcx
+ movq 56(%rsi), %r8
+ movq %rcx, 48(%rdi)
+ adcq 56(%rdx), %r8
+ movq 64(%rsi), %rcx
+ movq %r8, 56(%rdi)
+ adcq 64(%rdx), %rcx
+ movq 72(%rsi), %r8
+ movq %rcx, 64(%rdi)
+ adcq 72(%rdx), %r8
+ movq 80(%rsi), %rcx
+ movq %r8, 72(%rdi)
+ adcq 80(%rdx), %rcx
+ movq 88(%rsi), %r8
+ movq %rcx, 80(%rdi)
+ adcq 88(%rdx), %r8
+ movq 96(%rsi), %rcx
+ movq %r8, 88(%rdi)
+ adcq 96(%rdx), %rcx
+ movq 104(%rsi), %r8
+ movq %rcx, 96(%rdi)
+ adcq 104(%rdx), %r8
+ movq 112(%rsi), %rcx
+ movq %r8, 104(%rdi)
+ adcq 112(%rdx), %rcx
+ movq 120(%rsi), %r8
+ movq %rcx, 112(%rdi)
+ adcq 120(%rdx), %r8
+ movq %r8, 120(%rdi)
+ adcq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_add_16,.-sp_2048_add_16
+#endif /* __APPLE__ */
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_2048_sub_in_place_32
+.type sp_2048_sub_in_place_32,@function
+.align 16
+sp_2048_sub_in_place_32:
+#else
+.globl _sp_2048_sub_in_place_32
+.p2align 4
+_sp_2048_sub_in_place_32:
+#endif /* __APPLE__ */
+ movq (%rdi), %rdx
+ xorq %rax, %rax
+ subq (%rsi), %rdx
+ movq 8(%rdi), %rcx
+ movq %rdx, (%rdi)
+ sbbq 8(%rsi), %rcx
+ movq 16(%rdi), %rdx
+ movq %rcx, 8(%rdi)
+ sbbq 16(%rsi), %rdx
+ movq 24(%rdi), %rcx
+ movq %rdx, 16(%rdi)
+ sbbq 24(%rsi), %rcx
+ movq 32(%rdi), %rdx
+ movq %rcx, 24(%rdi)
+ sbbq 32(%rsi), %rdx
+ movq 40(%rdi), %rcx
+ movq %rdx, 32(%rdi)
+ sbbq 40(%rsi), %rcx
+ movq 48(%rdi), %rdx
+ movq %rcx, 40(%rdi)
+ sbbq 48(%rsi), %rdx
+ movq 56(%rdi), %rcx
+ movq %rdx, 48(%rdi)
+ sbbq 56(%rsi), %rcx
+ movq 64(%rdi), %rdx
+ movq %rcx, 56(%rdi)
+ sbbq 64(%rsi), %rdx
+ movq 72(%rdi), %rcx
+ movq %rdx, 64(%rdi)
+ sbbq 72(%rsi), %rcx
+ movq 80(%rdi), %rdx
+ movq %rcx, 72(%rdi)
+ sbbq 80(%rsi), %rdx
+ movq 88(%rdi), %rcx
+ movq %rdx, 80(%rdi)
+ sbbq 88(%rsi), %rcx
+ movq 96(%rdi), %rdx
+ movq %rcx, 88(%rdi)
+ sbbq 96(%rsi), %rdx
+ movq 104(%rdi), %rcx
+ movq %rdx, 96(%rdi)
+ sbbq 104(%rsi), %rcx
+ movq 112(%rdi), %rdx
+ movq %rcx, 104(%rdi)
+ sbbq 112(%rsi), %rdx
+ movq 120(%rdi), %rcx
+ movq %rdx, 112(%rdi)
+ sbbq 120(%rsi), %rcx
+ movq 128(%rdi), %rdx
+ movq %rcx, 120(%rdi)
+ sbbq 128(%rsi), %rdx
+ movq 136(%rdi), %rcx
+ movq %rdx, 128(%rdi)
+ sbbq 136(%rsi), %rcx
+ movq 144(%rdi), %rdx
+ movq %rcx, 136(%rdi)
+ sbbq 144(%rsi), %rdx
+ movq 152(%rdi), %rcx
+ movq %rdx, 144(%rdi)
+ sbbq 152(%rsi), %rcx
+ movq 160(%rdi), %rdx
+ movq %rcx, 152(%rdi)
+ sbbq 160(%rsi), %rdx
+ movq 168(%rdi), %rcx
+ movq %rdx, 160(%rdi)
+ sbbq 168(%rsi), %rcx
+ movq 176(%rdi), %rdx
+ movq %rcx, 168(%rdi)
+ sbbq 176(%rsi), %rdx
+ movq 184(%rdi), %rcx
+ movq %rdx, 176(%rdi)
+ sbbq 184(%rsi), %rcx
+ movq 192(%rdi), %rdx
+ movq %rcx, 184(%rdi)
+ sbbq 192(%rsi), %rdx
+ movq 200(%rdi), %rcx
+ movq %rdx, 192(%rdi)
+ sbbq 200(%rsi), %rcx
+ movq 208(%rdi), %rdx
+ movq %rcx, 200(%rdi)
+ sbbq 208(%rsi), %rdx
+ movq 216(%rdi), %rcx
+ movq %rdx, 208(%rdi)
+ sbbq 216(%rsi), %rcx
+ movq 224(%rdi), %rdx
+ movq %rcx, 216(%rdi)
+ sbbq 224(%rsi), %rdx
+ movq 232(%rdi), %rcx
+ movq %rdx, 224(%rdi)
+ sbbq 232(%rsi), %rcx
+ movq 240(%rdi), %rdx
+ movq %rcx, 232(%rdi)
+ sbbq 240(%rsi), %rdx
+ movq 248(%rdi), %rcx
+ movq %rdx, 240(%rdi)
+ sbbq 248(%rsi), %rcx
+ movq %rcx, 248(%rdi)
+ sbbq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_sub_in_place_32,.-sp_2048_sub_in_place_32
+#endif /* __APPLE__ */
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_2048_add_32
+.type sp_2048_add_32,@function
+.align 16
+sp_2048_add_32:
+#else
+.globl _sp_2048_add_32
+.p2align 4
+_sp_2048_add_32:
+#endif /* __APPLE__ */
+ # Add
+ movq (%rsi), %rcx
+ xorq %rax, %rax
+ addq (%rdx), %rcx
+ movq 8(%rsi), %r8
+ movq %rcx, (%rdi)
+ adcq 8(%rdx), %r8
+ movq 16(%rsi), %rcx
+ movq %r8, 8(%rdi)
+ adcq 16(%rdx), %rcx
+ movq 24(%rsi), %r8
+ movq %rcx, 16(%rdi)
+ adcq 24(%rdx), %r8
+ movq 32(%rsi), %rcx
+ movq %r8, 24(%rdi)
+ adcq 32(%rdx), %rcx
+ movq 40(%rsi), %r8
+ movq %rcx, 32(%rdi)
+ adcq 40(%rdx), %r8
+ movq 48(%rsi), %rcx
+ movq %r8, 40(%rdi)
+ adcq 48(%rdx), %rcx
+ movq 56(%rsi), %r8
+ movq %rcx, 48(%rdi)
+ adcq 56(%rdx), %r8
+ movq 64(%rsi), %rcx
+ movq %r8, 56(%rdi)
+ adcq 64(%rdx), %rcx
+ movq 72(%rsi), %r8
+ movq %rcx, 64(%rdi)
+ adcq 72(%rdx), %r8
+ movq 80(%rsi), %rcx
+ movq %r8, 72(%rdi)
+ adcq 80(%rdx), %rcx
+ movq 88(%rsi), %r8
+ movq %rcx, 80(%rdi)
+ adcq 88(%rdx), %r8
+ movq 96(%rsi), %rcx
+ movq %r8, 88(%rdi)
+ adcq 96(%rdx), %rcx
+ movq 104(%rsi), %r8
+ movq %rcx, 96(%rdi)
+ adcq 104(%rdx), %r8
+ movq 112(%rsi), %rcx
+ movq %r8, 104(%rdi)
+ adcq 112(%rdx), %rcx
+ movq 120(%rsi), %r8
+ movq %rcx, 112(%rdi)
+ adcq 120(%rdx), %r8
+ movq 128(%rsi), %rcx
+ movq %r8, 120(%rdi)
+ adcq 128(%rdx), %rcx
+ movq 136(%rsi), %r8
+ movq %rcx, 128(%rdi)
+ adcq 136(%rdx), %r8
+ movq 144(%rsi), %rcx
+ movq %r8, 136(%rdi)
+ adcq 144(%rdx), %rcx
+ movq 152(%rsi), %r8
+ movq %rcx, 144(%rdi)
+ adcq 152(%rdx), %r8
+ movq 160(%rsi), %rcx
+ movq %r8, 152(%rdi)
+ adcq 160(%rdx), %rcx
+ movq 168(%rsi), %r8
+ movq %rcx, 160(%rdi)
+ adcq 168(%rdx), %r8
+ movq 176(%rsi), %rcx
+ movq %r8, 168(%rdi)
+ adcq 176(%rdx), %rcx
+ movq 184(%rsi), %r8
+ movq %rcx, 176(%rdi)
+ adcq 184(%rdx), %r8
+ movq 192(%rsi), %rcx
+ movq %r8, 184(%rdi)
+ adcq 192(%rdx), %rcx
+ movq 200(%rsi), %r8
+ movq %rcx, 192(%rdi)
+ adcq 200(%rdx), %r8
+ movq 208(%rsi), %rcx
+ movq %r8, 200(%rdi)
+ adcq 208(%rdx), %rcx
+ movq 216(%rsi), %r8
+ movq %rcx, 208(%rdi)
+ adcq 216(%rdx), %r8
+ movq 224(%rsi), %rcx
+ movq %r8, 216(%rdi)
+ adcq 224(%rdx), %rcx
+ movq 232(%rsi), %r8
+ movq %rcx, 224(%rdi)
+ adcq 232(%rdx), %r8
+ movq 240(%rsi), %rcx
+ movq %r8, 232(%rdi)
+ adcq 240(%rdx), %rcx
+ movq 248(%rsi), %r8
+ movq %rcx, 240(%rdi)
+ adcq 248(%rdx), %r8
+ movq %r8, 248(%rdi)
+ adcq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_add_32,.-sp_2048_add_32
+#endif /* __APPLE__ */
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_2048_mul_32
+.type sp_2048_mul_32,@function
+.align 16
+sp_2048_mul_32:
+#else
+.globl _sp_2048_mul_32
+.p2align 4
+_sp_2048_mul_32:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ subq $808, %rsp
+ movq %rdi, 768(%rsp)
+ movq %rsi, 776(%rsp)
+ movq %rdx, 784(%rsp)
+ leaq 512(%rsp), %r10
+ leaq 128(%rsi), %r12
+ # Add
+ movq (%rsi), %rax
+ xorq %r13, %r13
+ addq (%r12), %rax
+ movq 8(%rsi), %rcx
+ movq %rax, (%r10)
+ adcq 8(%r12), %rcx
+ movq 16(%rsi), %r8
+ movq %rcx, 8(%r10)
+ adcq 16(%r12), %r8
+ movq 24(%rsi), %rax
+ movq %r8, 16(%r10)
+ adcq 24(%r12), %rax
+ movq 32(%rsi), %rcx
+ movq %rax, 24(%r10)
+ adcq 32(%r12), %rcx
+ movq 40(%rsi), %r8
+ movq %rcx, 32(%r10)
+ adcq 40(%r12), %r8
+ movq 48(%rsi), %rax
+ movq %r8, 40(%r10)
+ adcq 48(%r12), %rax
+ movq 56(%rsi), %rcx
+ movq %rax, 48(%r10)
+ adcq 56(%r12), %rcx
+ movq 64(%rsi), %r8
+ movq %rcx, 56(%r10)
+ adcq 64(%r12), %r8
+ movq 72(%rsi), %rax
+ movq %r8, 64(%r10)
+ adcq 72(%r12), %rax
+ movq 80(%rsi), %rcx
+ movq %rax, 72(%r10)
+ adcq 80(%r12), %rcx
+ movq 88(%rsi), %r8
+ movq %rcx, 80(%r10)
+ adcq 88(%r12), %r8
+ movq 96(%rsi), %rax
+ movq %r8, 88(%r10)
+ adcq 96(%r12), %rax
+ movq 104(%rsi), %rcx
+ movq %rax, 96(%r10)
+ adcq 104(%r12), %rcx
+ movq 112(%rsi), %r8
+ movq %rcx, 104(%r10)
+ adcq 112(%r12), %r8
+ movq 120(%rsi), %rax
+ movq %r8, 112(%r10)
+ adcq 120(%r12), %rax
+ movq %rax, 120(%r10)
+ adcq $0, %r13
+ movq %r13, 792(%rsp)
+ leaq 640(%rsp), %r11
+ leaq 128(%rdx), %r12
+ # Add
+ movq (%rdx), %rax
+ xorq %r14, %r14
+ addq (%r12), %rax
+ movq 8(%rdx), %rcx
+ movq %rax, (%r11)
+ adcq 8(%r12), %rcx
+ movq 16(%rdx), %r8
+ movq %rcx, 8(%r11)
+ adcq 16(%r12), %r8
+ movq 24(%rdx), %rax
+ movq %r8, 16(%r11)
+ adcq 24(%r12), %rax
+ movq 32(%rdx), %rcx
+ movq %rax, 24(%r11)
+ adcq 32(%r12), %rcx
+ movq 40(%rdx), %r8
+ movq %rcx, 32(%r11)
+ adcq 40(%r12), %r8
+ movq 48(%rdx), %rax
+ movq %r8, 40(%r11)
+ adcq 48(%r12), %rax
+ movq 56(%rdx), %rcx
+ movq %rax, 48(%r11)
+ adcq 56(%r12), %rcx
+ movq 64(%rdx), %r8
+ movq %rcx, 56(%r11)
+ adcq 64(%r12), %r8
+ movq 72(%rdx), %rax
+ movq %r8, 64(%r11)
+ adcq 72(%r12), %rax
+ movq 80(%rdx), %rcx
+ movq %rax, 72(%r11)
+ adcq 80(%r12), %rcx
+ movq 88(%rdx), %r8
+ movq %rcx, 80(%r11)
+ adcq 88(%r12), %r8
+ movq 96(%rdx), %rax
+ movq %r8, 88(%r11)
+ adcq 96(%r12), %rax
+ movq 104(%rdx), %rcx
+ movq %rax, 96(%r11)
+ adcq 104(%r12), %rcx
+ movq 112(%rdx), %r8
+ movq %rcx, 104(%r11)
+ adcq 112(%r12), %r8
+ movq 120(%rdx), %rax
+ movq %r8, 112(%r11)
+ adcq 120(%r12), %rax
+ movq %rax, 120(%r11)
+ adcq $0, %r14
+ movq %r14, 800(%rsp)
+ movq %r11, %rdx
+ movq %r10, %rsi
+ movq %rsp, %rdi
+#ifndef __APPLE__
+ callq sp_2048_mul_16@plt
+#else
+ callq _sp_2048_mul_16
+#endif /* __APPLE__ */
+ movq 784(%rsp), %rdx
+ movq 776(%rsp), %rsi
+ leaq 256(%rsp), %rdi
+ addq $128, %rdx
+ addq $128, %rsi
+#ifndef __APPLE__
+ callq sp_2048_mul_16@plt
+#else
+ callq _sp_2048_mul_16
+#endif /* __APPLE__ */
+ movq 784(%rsp), %rdx
+ movq 776(%rsp), %rsi
+ movq 768(%rsp), %rdi
+#ifndef __APPLE__
+ callq sp_2048_mul_16@plt
+#else
+ callq _sp_2048_mul_16
+#endif /* __APPLE__ */
+ movq 792(%rsp), %r13
+ movq 800(%rsp), %r14
+ movq 768(%rsp), %r15
+ movq %r13, %r9
+ leaq 512(%rsp), %r10
+ leaq 640(%rsp), %r11
+ andq %r14, %r9
+ negq %r13
+ negq %r14
+ addq $256, %r15
+ movq (%r10), %rax
+ movq (%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, (%r10)
+ movq %rcx, (%r11)
+ movq 8(%r10), %rax
+ movq 8(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 8(%r10)
+ movq %rcx, 8(%r11)
+ movq 16(%r10), %rax
+ movq 16(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 16(%r10)
+ movq %rcx, 16(%r11)
+ movq 24(%r10), %rax
+ movq 24(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 24(%r10)
+ movq %rcx, 24(%r11)
+ movq 32(%r10), %rax
+ movq 32(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 32(%r10)
+ movq %rcx, 32(%r11)
+ movq 40(%r10), %rax
+ movq 40(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 40(%r10)
+ movq %rcx, 40(%r11)
+ movq 48(%r10), %rax
+ movq 48(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 48(%r10)
+ movq %rcx, 48(%r11)
+ movq 56(%r10), %rax
+ movq 56(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 56(%r10)
+ movq %rcx, 56(%r11)
+ movq 64(%r10), %rax
+ movq 64(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 64(%r10)
+ movq %rcx, 64(%r11)
+ movq 72(%r10), %rax
+ movq 72(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 72(%r10)
+ movq %rcx, 72(%r11)
+ movq 80(%r10), %rax
+ movq 80(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 80(%r10)
+ movq %rcx, 80(%r11)
+ movq 88(%r10), %rax
+ movq 88(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 88(%r10)
+ movq %rcx, 88(%r11)
+ movq 96(%r10), %rax
+ movq 96(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 96(%r10)
+ movq %rcx, 96(%r11)
+ movq 104(%r10), %rax
+ movq 104(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 104(%r10)
+ movq %rcx, 104(%r11)
+ movq 112(%r10), %rax
+ movq 112(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 112(%r10)
+ movq %rcx, 112(%r11)
+ movq 120(%r10), %rax
+ movq 120(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 120(%r10)
+ movq %rcx, 120(%r11)
+ movq (%r10), %rax
+ addq (%r11), %rax
+ movq 8(%r10), %rcx
+ movq %rax, (%r15)
+ adcq 8(%r11), %rcx
+ movq 16(%r10), %r8
+ movq %rcx, 8(%r15)
+ adcq 16(%r11), %r8
+ movq 24(%r10), %rax
+ movq %r8, 16(%r15)
+ adcq 24(%r11), %rax
+ movq 32(%r10), %rcx
+ movq %rax, 24(%r15)
+ adcq 32(%r11), %rcx
+ movq 40(%r10), %r8
+ movq %rcx, 32(%r15)
+ adcq 40(%r11), %r8
+ movq 48(%r10), %rax
+ movq %r8, 40(%r15)
+ adcq 48(%r11), %rax
+ movq 56(%r10), %rcx
+ movq %rax, 48(%r15)
+ adcq 56(%r11), %rcx
+ movq 64(%r10), %r8
+ movq %rcx, 56(%r15)
+ adcq 64(%r11), %r8
+ movq 72(%r10), %rax
+ movq %r8, 64(%r15)
+ adcq 72(%r11), %rax
+ movq 80(%r10), %rcx
+ movq %rax, 72(%r15)
+ adcq 80(%r11), %rcx
+ movq 88(%r10), %r8
+ movq %rcx, 80(%r15)
+ adcq 88(%r11), %r8
+ movq 96(%r10), %rax
+ movq %r8, 88(%r15)
+ adcq 96(%r11), %rax
+ movq 104(%r10), %rcx
+ movq %rax, 96(%r15)
+ adcq 104(%r11), %rcx
+ movq 112(%r10), %r8
+ movq %rcx, 104(%r15)
+ adcq 112(%r11), %r8
+ movq 120(%r10), %rax
+ movq %r8, 112(%r15)
+ adcq 120(%r11), %rax
+ movq %rax, 120(%r15)
+ adcq $0, %r9
+ leaq 256(%rsp), %r11
+ movq %rsp, %r10
+ movq (%r10), %rax
+ subq (%r11), %rax
+ movq 8(%r10), %rcx
+ movq %rax, (%r10)
+ sbbq 8(%r11), %rcx
+ movq 16(%r10), %r8
+ movq %rcx, 8(%r10)
+ sbbq 16(%r11), %r8
+ movq 24(%r10), %rax
+ movq %r8, 16(%r10)
+ sbbq 24(%r11), %rax
+ movq 32(%r10), %rcx
+ movq %rax, 24(%r10)
+ sbbq 32(%r11), %rcx
+ movq 40(%r10), %r8
+ movq %rcx, 32(%r10)
+ sbbq 40(%r11), %r8
+ movq 48(%r10), %rax
+ movq %r8, 40(%r10)
+ sbbq 48(%r11), %rax
+ movq 56(%r10), %rcx
+ movq %rax, 48(%r10)
+ sbbq 56(%r11), %rcx
+ movq 64(%r10), %r8
+ movq %rcx, 56(%r10)
+ sbbq 64(%r11), %r8
+ movq 72(%r10), %rax
+ movq %r8, 64(%r10)
+ sbbq 72(%r11), %rax
+ movq 80(%r10), %rcx
+ movq %rax, 72(%r10)
+ sbbq 80(%r11), %rcx
+ movq 88(%r10), %r8
+ movq %rcx, 80(%r10)
+ sbbq 88(%r11), %r8
+ movq 96(%r10), %rax
+ movq %r8, 88(%r10)
+ sbbq 96(%r11), %rax
+ movq 104(%r10), %rcx
+ movq %rax, 96(%r10)
+ sbbq 104(%r11), %rcx
+ movq 112(%r10), %r8
+ movq %rcx, 104(%r10)
+ sbbq 112(%r11), %r8
+ movq 120(%r10), %rax
+ movq %r8, 112(%r10)
+ sbbq 120(%r11), %rax
+ movq 128(%r10), %rcx
+ movq %rax, 120(%r10)
+ sbbq 128(%r11), %rcx
+ movq 136(%r10), %r8
+ movq %rcx, 128(%r10)
+ sbbq 136(%r11), %r8
+ movq 144(%r10), %rax
+ movq %r8, 136(%r10)
+ sbbq 144(%r11), %rax
+ movq 152(%r10), %rcx
+ movq %rax, 144(%r10)
+ sbbq 152(%r11), %rcx
+ movq 160(%r10), %r8
+ movq %rcx, 152(%r10)
+ sbbq 160(%r11), %r8
+ movq 168(%r10), %rax
+ movq %r8, 160(%r10)
+ sbbq 168(%r11), %rax
+ movq 176(%r10), %rcx
+ movq %rax, 168(%r10)
+ sbbq 176(%r11), %rcx
+ movq 184(%r10), %r8
+ movq %rcx, 176(%r10)
+ sbbq 184(%r11), %r8
+ movq 192(%r10), %rax
+ movq %r8, 184(%r10)
+ sbbq 192(%r11), %rax
+ movq 200(%r10), %rcx
+ movq %rax, 192(%r10)
+ sbbq 200(%r11), %rcx
+ movq 208(%r10), %r8
+ movq %rcx, 200(%r10)
+ sbbq 208(%r11), %r8
+ movq 216(%r10), %rax
+ movq %r8, 208(%r10)
+ sbbq 216(%r11), %rax
+ movq 224(%r10), %rcx
+ movq %rax, 216(%r10)
+ sbbq 224(%r11), %rcx
+ movq 232(%r10), %r8
+ movq %rcx, 224(%r10)
+ sbbq 232(%r11), %r8
+ movq 240(%r10), %rax
+ movq %r8, 232(%r10)
+ sbbq 240(%r11), %rax
+ movq 248(%r10), %rcx
+ movq %rax, 240(%r10)
+ sbbq 248(%r11), %rcx
+ movq %rcx, 248(%r10)
+ sbbq $0, %r9
+ movq (%r10), %rax
+ subq (%rdi), %rax
+ movq 8(%r10), %rcx
+ movq %rax, (%r10)
+ sbbq 8(%rdi), %rcx
+ movq 16(%r10), %r8
+ movq %rcx, 8(%r10)
+ sbbq 16(%rdi), %r8
+ movq 24(%r10), %rax
+ movq %r8, 16(%r10)
+ sbbq 24(%rdi), %rax
+ movq 32(%r10), %rcx
+ movq %rax, 24(%r10)
+ sbbq 32(%rdi), %rcx
+ movq 40(%r10), %r8
+ movq %rcx, 32(%r10)
+ sbbq 40(%rdi), %r8
+ movq 48(%r10), %rax
+ movq %r8, 40(%r10)
+ sbbq 48(%rdi), %rax
+ movq 56(%r10), %rcx
+ movq %rax, 48(%r10)
+ sbbq 56(%rdi), %rcx
+ movq 64(%r10), %r8
+ movq %rcx, 56(%r10)
+ sbbq 64(%rdi), %r8
+ movq 72(%r10), %rax
+ movq %r8, 64(%r10)
+ sbbq 72(%rdi), %rax
+ movq 80(%r10), %rcx
+ movq %rax, 72(%r10)
+ sbbq 80(%rdi), %rcx
+ movq 88(%r10), %r8
+ movq %rcx, 80(%r10)
+ sbbq 88(%rdi), %r8
+ movq 96(%r10), %rax
+ movq %r8, 88(%r10)
+ sbbq 96(%rdi), %rax
+ movq 104(%r10), %rcx
+ movq %rax, 96(%r10)
+ sbbq 104(%rdi), %rcx
+ movq 112(%r10), %r8
+ movq %rcx, 104(%r10)
+ sbbq 112(%rdi), %r8
+ movq 120(%r10), %rax
+ movq %r8, 112(%r10)
+ sbbq 120(%rdi), %rax
+ movq 128(%r10), %rcx
+ movq %rax, 120(%r10)
+ sbbq 128(%rdi), %rcx
+ movq 136(%r10), %r8
+ movq %rcx, 128(%r10)
+ sbbq 136(%rdi), %r8
+ movq 144(%r10), %rax
+ movq %r8, 136(%r10)
+ sbbq 144(%rdi), %rax
+ movq 152(%r10), %rcx
+ movq %rax, 144(%r10)
+ sbbq 152(%rdi), %rcx
+ movq 160(%r10), %r8
+ movq %rcx, 152(%r10)
+ sbbq 160(%rdi), %r8
+ movq 168(%r10), %rax
+ movq %r8, 160(%r10)
+ sbbq 168(%rdi), %rax
+ movq 176(%r10), %rcx
+ movq %rax, 168(%r10)
+ sbbq 176(%rdi), %rcx
+ movq 184(%r10), %r8
+ movq %rcx, 176(%r10)
+ sbbq 184(%rdi), %r8
+ movq 192(%r10), %rax
+ movq %r8, 184(%r10)
+ sbbq 192(%rdi), %rax
+ movq 200(%r10), %rcx
+ movq %rax, 192(%r10)
+ sbbq 200(%rdi), %rcx
+ movq 208(%r10), %r8
+ movq %rcx, 200(%r10)
+ sbbq 208(%rdi), %r8
+ movq 216(%r10), %rax
+ movq %r8, 208(%r10)
+ sbbq 216(%rdi), %rax
+ movq 224(%r10), %rcx
+ movq %rax, 216(%r10)
+ sbbq 224(%rdi), %rcx
+ movq 232(%r10), %r8
+ movq %rcx, 224(%r10)
+ sbbq 232(%rdi), %r8
+ movq 240(%r10), %rax
+ movq %r8, 232(%r10)
+ sbbq 240(%rdi), %rax
+ movq 248(%r10), %rcx
+ movq %rax, 240(%r10)
+ sbbq 248(%rdi), %rcx
+ movq %rcx, 248(%r10)
+ sbbq $0, %r9
+ subq $128, %r15
+ # Add
+ movq (%r15), %rax
+ addq (%r10), %rax
+ movq 8(%r15), %rcx
+ movq %rax, (%r15)
+ adcq 8(%r10), %rcx
+ movq 16(%r15), %r8
+ movq %rcx, 8(%r15)
+ adcq 16(%r10), %r8
+ movq 24(%r15), %rax
+ movq %r8, 16(%r15)
+ adcq 24(%r10), %rax
+ movq 32(%r15), %rcx
+ movq %rax, 24(%r15)
+ adcq 32(%r10), %rcx
+ movq 40(%r15), %r8
+ movq %rcx, 32(%r15)
+ adcq 40(%r10), %r8
+ movq 48(%r15), %rax
+ movq %r8, 40(%r15)
+ adcq 48(%r10), %rax
+ movq 56(%r15), %rcx
+ movq %rax, 48(%r15)
+ adcq 56(%r10), %rcx
+ movq 64(%r15), %r8
+ movq %rcx, 56(%r15)
+ adcq 64(%r10), %r8
+ movq 72(%r15), %rax
+ movq %r8, 64(%r15)
+ adcq 72(%r10), %rax
+ movq 80(%r15), %rcx
+ movq %rax, 72(%r15)
+ adcq 80(%r10), %rcx
+ movq 88(%r15), %r8
+ movq %rcx, 80(%r15)
+ adcq 88(%r10), %r8
+ movq 96(%r15), %rax
+ movq %r8, 88(%r15)
+ adcq 96(%r10), %rax
+ movq 104(%r15), %rcx
+ movq %rax, 96(%r15)
+ adcq 104(%r10), %rcx
+ movq 112(%r15), %r8
+ movq %rcx, 104(%r15)
+ adcq 112(%r10), %r8
+ movq 120(%r15), %rax
+ movq %r8, 112(%r15)
+ adcq 120(%r10), %rax
+ movq 128(%r15), %rcx
+ movq %rax, 120(%r15)
+ adcq 128(%r10), %rcx
+ movq 136(%r15), %r8
+ movq %rcx, 128(%r15)
+ adcq 136(%r10), %r8
+ movq 144(%r15), %rax
+ movq %r8, 136(%r15)
+ adcq 144(%r10), %rax
+ movq 152(%r15), %rcx
+ movq %rax, 144(%r15)
+ adcq 152(%r10), %rcx
+ movq 160(%r15), %r8
+ movq %rcx, 152(%r15)
+ adcq 160(%r10), %r8
+ movq 168(%r15), %rax
+ movq %r8, 160(%r15)
+ adcq 168(%r10), %rax
+ movq 176(%r15), %rcx
+ movq %rax, 168(%r15)
+ adcq 176(%r10), %rcx
+ movq 184(%r15), %r8
+ movq %rcx, 176(%r15)
+ adcq 184(%r10), %r8
+ movq 192(%r15), %rax
+ movq %r8, 184(%r15)
+ adcq 192(%r10), %rax
+ movq 200(%r15), %rcx
+ movq %rax, 192(%r15)
+ adcq 200(%r10), %rcx
+ movq 208(%r15), %r8
+ movq %rcx, 200(%r15)
+ adcq 208(%r10), %r8
+ movq 216(%r15), %rax
+ movq %r8, 208(%r15)
+ adcq 216(%r10), %rax
+ movq 224(%r15), %rcx
+ movq %rax, 216(%r15)
+ adcq 224(%r10), %rcx
+ movq 232(%r15), %r8
+ movq %rcx, 224(%r15)
+ adcq 232(%r10), %r8
+ movq 240(%r15), %rax
+ movq %r8, 232(%r15)
+ adcq 240(%r10), %rax
+ movq 248(%r15), %rcx
+ movq %rax, 240(%r15)
+ adcq 248(%r10), %rcx
+ movq %rcx, 248(%r15)
+ adcq $0, %r9
+ movq %r9, 384(%rdi)
+ addq $128, %r15
+ # Add
+ movq (%r15), %rax
+ xorq %r9, %r9
+ addq (%r11), %rax
+ movq 8(%r15), %rcx
+ movq %rax, (%r15)
+ adcq 8(%r11), %rcx
+ movq 16(%r15), %r8
+ movq %rcx, 8(%r15)
+ adcq 16(%r11), %r8
+ movq 24(%r15), %rax
+ movq %r8, 16(%r15)
+ adcq 24(%r11), %rax
+ movq 32(%r15), %rcx
+ movq %rax, 24(%r15)
+ adcq 32(%r11), %rcx
+ movq 40(%r15), %r8
+ movq %rcx, 32(%r15)
+ adcq 40(%r11), %r8
+ movq 48(%r15), %rax
+ movq %r8, 40(%r15)
+ adcq 48(%r11), %rax
+ movq 56(%r15), %rcx
+ movq %rax, 48(%r15)
+ adcq 56(%r11), %rcx
+ movq 64(%r15), %r8
+ movq %rcx, 56(%r15)
+ adcq 64(%r11), %r8
+ movq 72(%r15), %rax
+ movq %r8, 64(%r15)
+ adcq 72(%r11), %rax
+ movq 80(%r15), %rcx
+ movq %rax, 72(%r15)
+ adcq 80(%r11), %rcx
+ movq 88(%r15), %r8
+ movq %rcx, 80(%r15)
+ adcq 88(%r11), %r8
+ movq 96(%r15), %rax
+ movq %r8, 88(%r15)
+ adcq 96(%r11), %rax
+ movq 104(%r15), %rcx
+ movq %rax, 96(%r15)
+ adcq 104(%r11), %rcx
+ movq 112(%r15), %r8
+ movq %rcx, 104(%r15)
+ adcq 112(%r11), %r8
+ movq 120(%r15), %rax
+ movq %r8, 112(%r15)
+ adcq 120(%r11), %rax
+ movq 128(%r15), %rcx
+ movq %rax, 120(%r15)
+ adcq 128(%r11), %rcx
+ movq %rcx, 128(%r15)
+ adcq $0, %r9
+ # Add to zero
+ movq 136(%r11), %rax
+ adcq $0, %rax
+ movq 144(%r11), %rcx
+ movq %rax, 136(%r15)
+ adcq $0, %rcx
+ movq 152(%r11), %r8
+ movq %rcx, 144(%r15)
+ adcq $0, %r8
+ movq 160(%r11), %rax
+ movq %r8, 152(%r15)
+ adcq $0, %rax
+ movq 168(%r11), %rcx
+ movq %rax, 160(%r15)
+ adcq $0, %rcx
+ movq 176(%r11), %r8
+ movq %rcx, 168(%r15)
+ adcq $0, %r8
+ movq 184(%r11), %rax
+ movq %r8, 176(%r15)
+ adcq $0, %rax
+ movq 192(%r11), %rcx
+ movq %rax, 184(%r15)
+ adcq $0, %rcx
+ movq 200(%r11), %r8
+ movq %rcx, 192(%r15)
+ adcq $0, %r8
+ movq 208(%r11), %rax
+ movq %r8, 200(%r15)
+ adcq $0, %rax
+ movq 216(%r11), %rcx
+ movq %rax, 208(%r15)
+ adcq $0, %rcx
+ movq 224(%r11), %r8
+ movq %rcx, 216(%r15)
+ adcq $0, %r8
+ movq 232(%r11), %rax
+ movq %r8, 224(%r15)
+ adcq $0, %rax
+ movq 240(%r11), %rcx
+ movq %rax, 232(%r15)
+ adcq $0, %rcx
+ movq 248(%r11), %r8
+ movq %rcx, 240(%r15)
+ adcq $0, %r8
+ movq %r8, 248(%r15)
+ addq $808, %rsp
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_mul_32,.-sp_2048_mul_32
+#endif /* __APPLE__ */
+/* Add a to a into r. (r = a + a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_2048_dbl_16
+.type sp_2048_dbl_16,@function
+.align 16
+sp_2048_dbl_16:
+#else
+.globl _sp_2048_dbl_16
+.p2align 4
+_sp_2048_dbl_16:
+#endif /* __APPLE__ */
+ movq (%rsi), %rdx
+ xorq %rax, %rax
+ addq %rdx, %rdx
+ movq 8(%rsi), %rcx
+ movq %rdx, (%rdi)
+ adcq %rcx, %rcx
+ movq 16(%rsi), %rdx
+ movq %rcx, 8(%rdi)
+ adcq %rdx, %rdx
+ movq 24(%rsi), %rcx
+ movq %rdx, 16(%rdi)
+ adcq %rcx, %rcx
+ movq 32(%rsi), %rdx
+ movq %rcx, 24(%rdi)
+ adcq %rdx, %rdx
+ movq 40(%rsi), %rcx
+ movq %rdx, 32(%rdi)
+ adcq %rcx, %rcx
+ movq 48(%rsi), %rdx
+ movq %rcx, 40(%rdi)
+ adcq %rdx, %rdx
+ movq 56(%rsi), %rcx
+ movq %rdx, 48(%rdi)
+ adcq %rcx, %rcx
+ movq 64(%rsi), %rdx
+ movq %rcx, 56(%rdi)
+ adcq %rdx, %rdx
+ movq 72(%rsi), %rcx
+ movq %rdx, 64(%rdi)
+ adcq %rcx, %rcx
+ movq 80(%rsi), %rdx
+ movq %rcx, 72(%rdi)
+ adcq %rdx, %rdx
+ movq 88(%rsi), %rcx
+ movq %rdx, 80(%rdi)
+ adcq %rcx, %rcx
+ movq 96(%rsi), %rdx
+ movq %rcx, 88(%rdi)
+ adcq %rdx, %rdx
+ movq 104(%rsi), %rcx
+ movq %rdx, 96(%rdi)
+ adcq %rcx, %rcx
+ movq 112(%rsi), %rdx
+ movq %rcx, 104(%rdi)
+ adcq %rdx, %rdx
+ movq 120(%rsi), %rcx
+ movq %rdx, 112(%rdi)
+ adcq %rcx, %rcx
+ movq %rcx, 120(%rdi)
+ adcq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_dbl_16,.-sp_2048_dbl_16
+#endif /* __APPLE__ */
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_2048_sqr_32
+.type sp_2048_sqr_32,@function
+.align 16
+sp_2048_sqr_32:
+#else
+.globl _sp_2048_sqr_32
+.p2align 4
+_sp_2048_sqr_32:
+#endif /* __APPLE__ */
+ subq $664, %rsp
+ movq %rdi, 640(%rsp)
+ movq %rsi, 648(%rsp)
+ leaq 512(%rsp), %r8
+ leaq 128(%rsi), %r9
+ # Add
+ movq (%rsi), %rdx
+ xorq %rcx, %rcx
+ addq (%r9), %rdx
+ movq 8(%rsi), %rax
+ movq %rdx, (%r8)
+ adcq 8(%r9), %rax
+ movq 16(%rsi), %rdx
+ movq %rax, 8(%r8)
+ adcq 16(%r9), %rdx
+ movq 24(%rsi), %rax
+ movq %rdx, 16(%r8)
+ adcq 24(%r9), %rax
+ movq 32(%rsi), %rdx
+ movq %rax, 24(%r8)
+ adcq 32(%r9), %rdx
+ movq 40(%rsi), %rax
+ movq %rdx, 32(%r8)
+ adcq 40(%r9), %rax
+ movq 48(%rsi), %rdx
+ movq %rax, 40(%r8)
+ adcq 48(%r9), %rdx
+ movq 56(%rsi), %rax
+ movq %rdx, 48(%r8)
+ adcq 56(%r9), %rax
+ movq 64(%rsi), %rdx
+ movq %rax, 56(%r8)
+ adcq 64(%r9), %rdx
+ movq 72(%rsi), %rax
+ movq %rdx, 64(%r8)
+ adcq 72(%r9), %rax
+ movq 80(%rsi), %rdx
+ movq %rax, 72(%r8)
+ adcq 80(%r9), %rdx
+ movq 88(%rsi), %rax
+ movq %rdx, 80(%r8)
+ adcq 88(%r9), %rax
+ movq 96(%rsi), %rdx
+ movq %rax, 88(%r8)
+ adcq 96(%r9), %rdx
+ movq 104(%rsi), %rax
+ movq %rdx, 96(%r8)
+ adcq 104(%r9), %rax
+ movq 112(%rsi), %rdx
+ movq %rax, 104(%r8)
+ adcq 112(%r9), %rdx
+ movq 120(%rsi), %rax
+ movq %rdx, 112(%r8)
+ adcq 120(%r9), %rax
+ movq %rax, 120(%r8)
+ adcq $0, %rcx
+ movq %rcx, 656(%rsp)
+ movq %r8, %rsi
+ movq %rsp, %rdi
+#ifndef __APPLE__
+ callq sp_2048_sqr_16@plt
+#else
+ callq _sp_2048_sqr_16
+#endif /* __APPLE__ */
+ movq 648(%rsp), %rsi
+ leaq 256(%rsp), %rdi
+ addq $128, %rsi
+#ifndef __APPLE__
+ callq sp_2048_sqr_16@plt
+#else
+ callq _sp_2048_sqr_16
+#endif /* __APPLE__ */
+ movq 648(%rsp), %rsi
+ movq 640(%rsp), %rdi
+#ifndef __APPLE__
+ callq sp_2048_sqr_16@plt
+#else
+ callq _sp_2048_sqr_16
+#endif /* __APPLE__ */
+ movq 656(%rsp), %r10
+ leaq 512(%rsp), %r8
+ movq %r10, %rcx
+ negq %r10
+ movq (%r8), %rdx
+ movq 8(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 256(%rdi)
+ movq %rax, 264(%rdi)
+ movq 16(%r8), %rdx
+ movq 24(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 272(%rdi)
+ movq %rax, 280(%rdi)
+ movq 32(%r8), %rdx
+ movq 40(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 288(%rdi)
+ movq %rax, 296(%rdi)
+ movq 48(%r8), %rdx
+ movq 56(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 304(%rdi)
+ movq %rax, 312(%rdi)
+ movq 64(%r8), %rdx
+ movq 72(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 320(%rdi)
+ movq %rax, 328(%rdi)
+ movq 80(%r8), %rdx
+ movq 88(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 336(%rdi)
+ movq %rax, 344(%rdi)
+ movq 96(%r8), %rdx
+ movq 104(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 352(%rdi)
+ movq %rax, 360(%rdi)
+ movq 112(%r8), %rdx
+ movq 120(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 368(%rdi)
+ movq %rax, 376(%rdi)
+ movq 256(%rdi), %rdx
+ addq %rdx, %rdx
+ movq 264(%rdi), %rax
+ movq %rdx, 256(%rdi)
+ adcq %rax, %rax
+ movq 272(%rdi), %rdx
+ movq %rax, 264(%rdi)
+ adcq %rdx, %rdx
+ movq 280(%rdi), %rax
+ movq %rdx, 272(%rdi)
+ adcq %rax, %rax
+ movq 288(%rdi), %rdx
+ movq %rax, 280(%rdi)
+ adcq %rdx, %rdx
+ movq 296(%rdi), %rax
+ movq %rdx, 288(%rdi)
+ adcq %rax, %rax
+ movq 304(%rdi), %rdx
+ movq %rax, 296(%rdi)
+ adcq %rdx, %rdx
+ movq 312(%rdi), %rax
+ movq %rdx, 304(%rdi)
+ adcq %rax, %rax
+ movq 320(%rdi), %rdx
+ movq %rax, 312(%rdi)
+ adcq %rdx, %rdx
+ movq 328(%rdi), %rax
+ movq %rdx, 320(%rdi)
+ adcq %rax, %rax
+ movq 336(%rdi), %rdx
+ movq %rax, 328(%rdi)
+ adcq %rdx, %rdx
+ movq 344(%rdi), %rax
+ movq %rdx, 336(%rdi)
+ adcq %rax, %rax
+ movq 352(%rdi), %rdx
+ movq %rax, 344(%rdi)
+ adcq %rdx, %rdx
+ movq 360(%rdi), %rax
+ movq %rdx, 352(%rdi)
+ adcq %rax, %rax
+ movq 368(%rdi), %rdx
+ movq %rax, 360(%rdi)
+ adcq %rdx, %rdx
+ movq 376(%rdi), %rax
+ movq %rdx, 368(%rdi)
+ adcq %rax, %rax
+ movq %rax, 376(%rdi)
+ adcq $0, %rcx
+ leaq 256(%rsp), %rsi
+ movq %rsp, %r8
+ movq (%r8), %rdx
+ subq (%rsi), %rdx
+ movq 8(%r8), %rax
+ movq %rdx, (%r8)
+ sbbq 8(%rsi), %rax
+ movq 16(%r8), %rdx
+ movq %rax, 8(%r8)
+ sbbq 16(%rsi), %rdx
+ movq 24(%r8), %rax
+ movq %rdx, 16(%r8)
+ sbbq 24(%rsi), %rax
+ movq 32(%r8), %rdx
+ movq %rax, 24(%r8)
+ sbbq 32(%rsi), %rdx
+ movq 40(%r8), %rax
+ movq %rdx, 32(%r8)
+ sbbq 40(%rsi), %rax
+ movq 48(%r8), %rdx
+ movq %rax, 40(%r8)
+ sbbq 48(%rsi), %rdx
+ movq 56(%r8), %rax
+ movq %rdx, 48(%r8)
+ sbbq 56(%rsi), %rax
+ movq 64(%r8), %rdx
+ movq %rax, 56(%r8)
+ sbbq 64(%rsi), %rdx
+ movq 72(%r8), %rax
+ movq %rdx, 64(%r8)
+ sbbq 72(%rsi), %rax
+ movq 80(%r8), %rdx
+ movq %rax, 72(%r8)
+ sbbq 80(%rsi), %rdx
+ movq 88(%r8), %rax
+ movq %rdx, 80(%r8)
+ sbbq 88(%rsi), %rax
+ movq 96(%r8), %rdx
+ movq %rax, 88(%r8)
+ sbbq 96(%rsi), %rdx
+ movq 104(%r8), %rax
+ movq %rdx, 96(%r8)
+ sbbq 104(%rsi), %rax
+ movq 112(%r8), %rdx
+ movq %rax, 104(%r8)
+ sbbq 112(%rsi), %rdx
+ movq 120(%r8), %rax
+ movq %rdx, 112(%r8)
+ sbbq 120(%rsi), %rax
+ movq 128(%r8), %rdx
+ movq %rax, 120(%r8)
+ sbbq 128(%rsi), %rdx
+ movq 136(%r8), %rax
+ movq %rdx, 128(%r8)
+ sbbq 136(%rsi), %rax
+ movq 144(%r8), %rdx
+ movq %rax, 136(%r8)
+ sbbq 144(%rsi), %rdx
+ movq 152(%r8), %rax
+ movq %rdx, 144(%r8)
+ sbbq 152(%rsi), %rax
+ movq 160(%r8), %rdx
+ movq %rax, 152(%r8)
+ sbbq 160(%rsi), %rdx
+ movq 168(%r8), %rax
+ movq %rdx, 160(%r8)
+ sbbq 168(%rsi), %rax
+ movq 176(%r8), %rdx
+ movq %rax, 168(%r8)
+ sbbq 176(%rsi), %rdx
+ movq 184(%r8), %rax
+ movq %rdx, 176(%r8)
+ sbbq 184(%rsi), %rax
+ movq 192(%r8), %rdx
+ movq %rax, 184(%r8)
+ sbbq 192(%rsi), %rdx
+ movq 200(%r8), %rax
+ movq %rdx, 192(%r8)
+ sbbq 200(%rsi), %rax
+ movq 208(%r8), %rdx
+ movq %rax, 200(%r8)
+ sbbq 208(%rsi), %rdx
+ movq 216(%r8), %rax
+ movq %rdx, 208(%r8)
+ sbbq 216(%rsi), %rax
+ movq 224(%r8), %rdx
+ movq %rax, 216(%r8)
+ sbbq 224(%rsi), %rdx
+ movq 232(%r8), %rax
+ movq %rdx, 224(%r8)
+ sbbq 232(%rsi), %rax
+ movq 240(%r8), %rdx
+ movq %rax, 232(%r8)
+ sbbq 240(%rsi), %rdx
+ movq 248(%r8), %rax
+ movq %rdx, 240(%r8)
+ sbbq 248(%rsi), %rax
+ movq %rax, 248(%r8)
+ sbbq $0, %rcx
+ movq (%r8), %rdx
+ subq (%rdi), %rdx
+ movq 8(%r8), %rax
+ movq %rdx, (%r8)
+ sbbq 8(%rdi), %rax
+ movq 16(%r8), %rdx
+ movq %rax, 8(%r8)
+ sbbq 16(%rdi), %rdx
+ movq 24(%r8), %rax
+ movq %rdx, 16(%r8)
+ sbbq 24(%rdi), %rax
+ movq 32(%r8), %rdx
+ movq %rax, 24(%r8)
+ sbbq 32(%rdi), %rdx
+ movq 40(%r8), %rax
+ movq %rdx, 32(%r8)
+ sbbq 40(%rdi), %rax
+ movq 48(%r8), %rdx
+ movq %rax, 40(%r8)
+ sbbq 48(%rdi), %rdx
+ movq 56(%r8), %rax
+ movq %rdx, 48(%r8)
+ sbbq 56(%rdi), %rax
+ movq 64(%r8), %rdx
+ movq %rax, 56(%r8)
+ sbbq 64(%rdi), %rdx
+ movq 72(%r8), %rax
+ movq %rdx, 64(%r8)
+ sbbq 72(%rdi), %rax
+ movq 80(%r8), %rdx
+ movq %rax, 72(%r8)
+ sbbq 80(%rdi), %rdx
+ movq 88(%r8), %rax
+ movq %rdx, 80(%r8)
+ sbbq 88(%rdi), %rax
+ movq 96(%r8), %rdx
+ movq %rax, 88(%r8)
+ sbbq 96(%rdi), %rdx
+ movq 104(%r8), %rax
+ movq %rdx, 96(%r8)
+ sbbq 104(%rdi), %rax
+ movq 112(%r8), %rdx
+ movq %rax, 104(%r8)
+ sbbq 112(%rdi), %rdx
+ movq 120(%r8), %rax
+ movq %rdx, 112(%r8)
+ sbbq 120(%rdi), %rax
+ movq 128(%r8), %rdx
+ movq %rax, 120(%r8)
+ sbbq 128(%rdi), %rdx
+ movq 136(%r8), %rax
+ movq %rdx, 128(%r8)
+ sbbq 136(%rdi), %rax
+ movq 144(%r8), %rdx
+ movq %rax, 136(%r8)
+ sbbq 144(%rdi), %rdx
+ movq 152(%r8), %rax
+ movq %rdx, 144(%r8)
+ sbbq 152(%rdi), %rax
+ movq 160(%r8), %rdx
+ movq %rax, 152(%r8)
+ sbbq 160(%rdi), %rdx
+ movq 168(%r8), %rax
+ movq %rdx, 160(%r8)
+ sbbq 168(%rdi), %rax
+ movq 176(%r8), %rdx
+ movq %rax, 168(%r8)
+ sbbq 176(%rdi), %rdx
+ movq 184(%r8), %rax
+ movq %rdx, 176(%r8)
+ sbbq 184(%rdi), %rax
+ movq 192(%r8), %rdx
+ movq %rax, 184(%r8)
+ sbbq 192(%rdi), %rdx
+ movq 200(%r8), %rax
+ movq %rdx, 192(%r8)
+ sbbq 200(%rdi), %rax
+ movq 208(%r8), %rdx
+ movq %rax, 200(%r8)
+ sbbq 208(%rdi), %rdx
+ movq 216(%r8), %rax
+ movq %rdx, 208(%r8)
+ sbbq 216(%rdi), %rax
+ movq 224(%r8), %rdx
+ movq %rax, 216(%r8)
+ sbbq 224(%rdi), %rdx
+ movq 232(%r8), %rax
+ movq %rdx, 224(%r8)
+ sbbq 232(%rdi), %rax
+ movq 240(%r8), %rdx
+ movq %rax, 232(%r8)
+ sbbq 240(%rdi), %rdx
+ movq 248(%r8), %rax
+ movq %rdx, 240(%r8)
+ sbbq 248(%rdi), %rax
+ movq %rax, 248(%r8)
+ sbbq $0, %rcx
+ # Add in place
+ movq 128(%rdi), %rdx
+ addq (%r8), %rdx
+ movq 136(%rdi), %rax
+ movq %rdx, 128(%rdi)
+ adcq 8(%r8), %rax
+ movq 144(%rdi), %rdx
+ movq %rax, 136(%rdi)
+ adcq 16(%r8), %rdx
+ movq 152(%rdi), %rax
+ movq %rdx, 144(%rdi)
+ adcq 24(%r8), %rax
+ movq 160(%rdi), %rdx
+ movq %rax, 152(%rdi)
+ adcq 32(%r8), %rdx
+ movq 168(%rdi), %rax
+ movq %rdx, 160(%rdi)
+ adcq 40(%r8), %rax
+ movq 176(%rdi), %rdx
+ movq %rax, 168(%rdi)
+ adcq 48(%r8), %rdx
+ movq 184(%rdi), %rax
+ movq %rdx, 176(%rdi)
+ adcq 56(%r8), %rax
+ movq 192(%rdi), %rdx
+ movq %rax, 184(%rdi)
+ adcq 64(%r8), %rdx
+ movq 200(%rdi), %rax
+ movq %rdx, 192(%rdi)
+ adcq 72(%r8), %rax
+ movq 208(%rdi), %rdx
+ movq %rax, 200(%rdi)
+ adcq 80(%r8), %rdx
+ movq 216(%rdi), %rax
+ movq %rdx, 208(%rdi)
+ adcq 88(%r8), %rax
+ movq 224(%rdi), %rdx
+ movq %rax, 216(%rdi)
+ adcq 96(%r8), %rdx
+ movq 232(%rdi), %rax
+ movq %rdx, 224(%rdi)
+ adcq 104(%r8), %rax
+ movq 240(%rdi), %rdx
+ movq %rax, 232(%rdi)
+ adcq 112(%r8), %rdx
+ movq 248(%rdi), %rax
+ movq %rdx, 240(%rdi)
+ adcq 120(%r8), %rax
+ movq 256(%rdi), %rdx
+ movq %rax, 248(%rdi)
+ adcq 128(%r8), %rdx
+ movq 264(%rdi), %rax
+ movq %rdx, 256(%rdi)
+ adcq 136(%r8), %rax
+ movq 272(%rdi), %rdx
+ movq %rax, 264(%rdi)
+ adcq 144(%r8), %rdx
+ movq 280(%rdi), %rax
+ movq %rdx, 272(%rdi)
+ adcq 152(%r8), %rax
+ movq 288(%rdi), %rdx
+ movq %rax, 280(%rdi)
+ adcq 160(%r8), %rdx
+ movq 296(%rdi), %rax
+ movq %rdx, 288(%rdi)
+ adcq 168(%r8), %rax
+ movq 304(%rdi), %rdx
+ movq %rax, 296(%rdi)
+ adcq 176(%r8), %rdx
+ movq 312(%rdi), %rax
+ movq %rdx, 304(%rdi)
+ adcq 184(%r8), %rax
+ movq 320(%rdi), %rdx
+ movq %rax, 312(%rdi)
+ adcq 192(%r8), %rdx
+ movq 328(%rdi), %rax
+ movq %rdx, 320(%rdi)
+ adcq 200(%r8), %rax
+ movq 336(%rdi), %rdx
+ movq %rax, 328(%rdi)
+ adcq 208(%r8), %rdx
+ movq 344(%rdi), %rax
+ movq %rdx, 336(%rdi)
+ adcq 216(%r8), %rax
+ movq 352(%rdi), %rdx
+ movq %rax, 344(%rdi)
+ adcq 224(%r8), %rdx
+ movq 360(%rdi), %rax
+ movq %rdx, 352(%rdi)
+ adcq 232(%r8), %rax
+ movq 368(%rdi), %rdx
+ movq %rax, 360(%rdi)
+ adcq 240(%r8), %rdx
+ movq 376(%rdi), %rax
+ movq %rdx, 368(%rdi)
+ adcq 248(%r8), %rax
+ movq %rax, 376(%rdi)
+ adcq $0, %rcx
+ movq %rcx, 384(%rdi)
+ # Add in place
+ movq 256(%rdi), %rdx
+ xorq %rcx, %rcx
+ addq (%rsi), %rdx
+ movq 264(%rdi), %rax
+ movq %rdx, 256(%rdi)
+ adcq 8(%rsi), %rax
+ movq 272(%rdi), %rdx
+ movq %rax, 264(%rdi)
+ adcq 16(%rsi), %rdx
+ movq 280(%rdi), %rax
+ movq %rdx, 272(%rdi)
+ adcq 24(%rsi), %rax
+ movq 288(%rdi), %rdx
+ movq %rax, 280(%rdi)
+ adcq 32(%rsi), %rdx
+ movq 296(%rdi), %rax
+ movq %rdx, 288(%rdi)
+ adcq 40(%rsi), %rax
+ movq 304(%rdi), %rdx
+ movq %rax, 296(%rdi)
+ adcq 48(%rsi), %rdx
+ movq 312(%rdi), %rax
+ movq %rdx, 304(%rdi)
+ adcq 56(%rsi), %rax
+ movq 320(%rdi), %rdx
+ movq %rax, 312(%rdi)
+ adcq 64(%rsi), %rdx
+ movq 328(%rdi), %rax
+ movq %rdx, 320(%rdi)
+ adcq 72(%rsi), %rax
+ movq 336(%rdi), %rdx
+ movq %rax, 328(%rdi)
+ adcq 80(%rsi), %rdx
+ movq 344(%rdi), %rax
+ movq %rdx, 336(%rdi)
+ adcq 88(%rsi), %rax
+ movq 352(%rdi), %rdx
+ movq %rax, 344(%rdi)
+ adcq 96(%rsi), %rdx
+ movq 360(%rdi), %rax
+ movq %rdx, 352(%rdi)
+ adcq 104(%rsi), %rax
+ movq 368(%rdi), %rdx
+ movq %rax, 360(%rdi)
+ adcq 112(%rsi), %rdx
+ movq 376(%rdi), %rax
+ movq %rdx, 368(%rdi)
+ adcq 120(%rsi), %rax
+ movq 384(%rdi), %rdx
+ movq %rax, 376(%rdi)
+ adcq 128(%rsi), %rdx
+ movq %rdx, 384(%rdi)
+ adcq $0, %rcx
+ # Add to zero
+ movq 136(%rsi), %rdx
+ adcq $0, %rdx
+ movq 144(%rsi), %rax
+ movq %rdx, 392(%rdi)
+ adcq $0, %rax
+ movq 152(%rsi), %rdx
+ movq %rax, 400(%rdi)
+ adcq $0, %rdx
+ movq 160(%rsi), %rax
+ movq %rdx, 408(%rdi)
+ adcq $0, %rax
+ movq 168(%rsi), %rdx
+ movq %rax, 416(%rdi)
+ adcq $0, %rdx
+ movq 176(%rsi), %rax
+ movq %rdx, 424(%rdi)
+ adcq $0, %rax
+ movq 184(%rsi), %rdx
+ movq %rax, 432(%rdi)
+ adcq $0, %rdx
+ movq 192(%rsi), %rax
+ movq %rdx, 440(%rdi)
+ adcq $0, %rax
+ movq 200(%rsi), %rdx
+ movq %rax, 448(%rdi)
+ adcq $0, %rdx
+ movq 208(%rsi), %rax
+ movq %rdx, 456(%rdi)
+ adcq $0, %rax
+ movq 216(%rsi), %rdx
+ movq %rax, 464(%rdi)
+ adcq $0, %rdx
+ movq 224(%rsi), %rax
+ movq %rdx, 472(%rdi)
+ adcq $0, %rax
+ movq 232(%rsi), %rdx
+ movq %rax, 480(%rdi)
+ adcq $0, %rdx
+ movq 240(%rsi), %rax
+ movq %rdx, 488(%rdi)
+ adcq $0, %rax
+ movq 248(%rsi), %rdx
+ movq %rax, 496(%rdi)
+ adcq $0, %rdx
+ movq %rdx, 504(%rdi)
+ addq $664, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_sqr_32,.-sp_2048_sqr_32
+#endif /* __APPLE__ */
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_2048_mul_avx2_32
+.type sp_2048_mul_avx2_32,@function
+.align 16
+sp_2048_mul_avx2_32:
+#else
+.globl _sp_2048_mul_avx2_32
+.p2align 4
+_sp_2048_mul_avx2_32:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ subq $808, %rsp
+ movq %rdi, 768(%rsp)
+ movq %rsi, 776(%rsp)
+ movq %rdx, 784(%rsp)
+ leaq 512(%rsp), %r10
+ leaq 128(%rsi), %r12
+ # Add
+ movq (%rsi), %rax
+ xorq %r13, %r13
+ addq (%r12), %rax
+ movq 8(%rsi), %rcx
+ movq %rax, (%r10)
+ adcq 8(%r12), %rcx
+ movq 16(%rsi), %r8
+ movq %rcx, 8(%r10)
+ adcq 16(%r12), %r8
+ movq 24(%rsi), %rax
+ movq %r8, 16(%r10)
+ adcq 24(%r12), %rax
+ movq 32(%rsi), %rcx
+ movq %rax, 24(%r10)
+ adcq 32(%r12), %rcx
+ movq 40(%rsi), %r8
+ movq %rcx, 32(%r10)
+ adcq 40(%r12), %r8
+ movq 48(%rsi), %rax
+ movq %r8, 40(%r10)
+ adcq 48(%r12), %rax
+ movq 56(%rsi), %rcx
+ movq %rax, 48(%r10)
+ adcq 56(%r12), %rcx
+ movq 64(%rsi), %r8
+ movq %rcx, 56(%r10)
+ adcq 64(%r12), %r8
+ movq 72(%rsi), %rax
+ movq %r8, 64(%r10)
+ adcq 72(%r12), %rax
+ movq 80(%rsi), %rcx
+ movq %rax, 72(%r10)
+ adcq 80(%r12), %rcx
+ movq 88(%rsi), %r8
+ movq %rcx, 80(%r10)
+ adcq 88(%r12), %r8
+ movq 96(%rsi), %rax
+ movq %r8, 88(%r10)
+ adcq 96(%r12), %rax
+ movq 104(%rsi), %rcx
+ movq %rax, 96(%r10)
+ adcq 104(%r12), %rcx
+ movq 112(%rsi), %r8
+ movq %rcx, 104(%r10)
+ adcq 112(%r12), %r8
+ movq 120(%rsi), %rax
+ movq %r8, 112(%r10)
+ adcq 120(%r12), %rax
+ movq %rax, 120(%r10)
+ adcq $0, %r13
+ movq %r13, 792(%rsp)
+ leaq 640(%rsp), %r11
+ leaq 128(%rdx), %r12
+ # Add
+ movq (%rdx), %rax
+ xorq %r14, %r14
+ addq (%r12), %rax
+ movq 8(%rdx), %rcx
+ movq %rax, (%r11)
+ adcq 8(%r12), %rcx
+ movq 16(%rdx), %r8
+ movq %rcx, 8(%r11)
+ adcq 16(%r12), %r8
+ movq 24(%rdx), %rax
+ movq %r8, 16(%r11)
+ adcq 24(%r12), %rax
+ movq 32(%rdx), %rcx
+ movq %rax, 24(%r11)
+ adcq 32(%r12), %rcx
+ movq 40(%rdx), %r8
+ movq %rcx, 32(%r11)
+ adcq 40(%r12), %r8
+ movq 48(%rdx), %rax
+ movq %r8, 40(%r11)
+ adcq 48(%r12), %rax
+ movq 56(%rdx), %rcx
+ movq %rax, 48(%r11)
+ adcq 56(%r12), %rcx
+ movq 64(%rdx), %r8
+ movq %rcx, 56(%r11)
+ adcq 64(%r12), %r8
+ movq 72(%rdx), %rax
+ movq %r8, 64(%r11)
+ adcq 72(%r12), %rax
+ movq 80(%rdx), %rcx
+ movq %rax, 72(%r11)
+ adcq 80(%r12), %rcx
+ movq 88(%rdx), %r8
+ movq %rcx, 80(%r11)
+ adcq 88(%r12), %r8
+ movq 96(%rdx), %rax
+ movq %r8, 88(%r11)
+ adcq 96(%r12), %rax
+ movq 104(%rdx), %rcx
+ movq %rax, 96(%r11)
+ adcq 104(%r12), %rcx
+ movq 112(%rdx), %r8
+ movq %rcx, 104(%r11)
+ adcq 112(%r12), %r8
+ movq 120(%rdx), %rax
+ movq %r8, 112(%r11)
+ adcq 120(%r12), %rax
+ movq %rax, 120(%r11)
+ adcq $0, %r14
+ movq %r14, 800(%rsp)
+ movq %r11, %rdx
+ movq %r10, %rsi
+ movq %rsp, %rdi
+#ifndef __APPLE__
+ callq sp_2048_mul_avx2_16@plt
+#else
+ callq _sp_2048_mul_avx2_16
+#endif /* __APPLE__ */
+ movq 784(%rsp), %rdx
+ movq 776(%rsp), %rsi
+ leaq 256(%rsp), %rdi
+ addq $128, %rdx
+ addq $128, %rsi
+#ifndef __APPLE__
+ callq sp_2048_mul_avx2_16@plt
+#else
+ callq _sp_2048_mul_avx2_16
+#endif /* __APPLE__ */
+ movq 784(%rsp), %rdx
+ movq 776(%rsp), %rsi
+ movq 768(%rsp), %rdi
+#ifndef __APPLE__
+ callq sp_2048_mul_avx2_16@plt
+#else
+ callq _sp_2048_mul_avx2_16
+#endif /* __APPLE__ */
+ movq 792(%rsp), %r13
+ movq 800(%rsp), %r14
+ movq 768(%rsp), %r15
+ movq %r13, %r9
+ leaq 512(%rsp), %r10
+ leaq 640(%rsp), %r11
+ andq %r14, %r9
+ negq %r13
+ negq %r14
+ addq $256, %r15
+ movq (%r10), %rax
+ movq (%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ addq %rcx, %rax
+ movq 8(%r10), %rcx
+ movq 8(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, (%r15)
+ adcq %r8, %rcx
+ movq 16(%r10), %r8
+ movq 16(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 8(%r15)
+ adcq %rax, %r8
+ movq 24(%r10), %rax
+ movq 24(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 16(%r15)
+ adcq %rcx, %rax
+ movq 32(%r10), %rcx
+ movq 32(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 24(%r15)
+ adcq %r8, %rcx
+ movq 40(%r10), %r8
+ movq 40(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 32(%r15)
+ adcq %rax, %r8
+ movq 48(%r10), %rax
+ movq 48(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 40(%r15)
+ adcq %rcx, %rax
+ movq 56(%r10), %rcx
+ movq 56(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 48(%r15)
+ adcq %r8, %rcx
+ movq 64(%r10), %r8
+ movq 64(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 56(%r15)
+ adcq %rax, %r8
+ movq 72(%r10), %rax
+ movq 72(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 64(%r15)
+ adcq %rcx, %rax
+ movq 80(%r10), %rcx
+ movq 80(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 72(%r15)
+ adcq %r8, %rcx
+ movq 88(%r10), %r8
+ movq 88(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 80(%r15)
+ adcq %rax, %r8
+ movq 96(%r10), %rax
+ movq 96(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 88(%r15)
+ adcq %rcx, %rax
+ movq 104(%r10), %rcx
+ movq 104(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 96(%r15)
+ adcq %r8, %rcx
+ movq 112(%r10), %r8
+ movq 112(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 104(%r15)
+ adcq %rax, %r8
+ movq 120(%r10), %rax
+ movq 120(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 112(%r15)
+ adcq %rcx, %rax
+ movq %rax, 120(%r15)
+ adcq $0, %r9
+ leaq 256(%rsp), %r11
+ movq %rsp, %r10
+ movq (%r10), %rax
+ subq (%r11), %rax
+ movq 8(%r10), %rcx
+ movq %rax, (%r10)
+ sbbq 8(%r11), %rcx
+ movq 16(%r10), %r8
+ movq %rcx, 8(%r10)
+ sbbq 16(%r11), %r8
+ movq 24(%r10), %rax
+ movq %r8, 16(%r10)
+ sbbq 24(%r11), %rax
+ movq 32(%r10), %rcx
+ movq %rax, 24(%r10)
+ sbbq 32(%r11), %rcx
+ movq 40(%r10), %r8
+ movq %rcx, 32(%r10)
+ sbbq 40(%r11), %r8
+ movq 48(%r10), %rax
+ movq %r8, 40(%r10)
+ sbbq 48(%r11), %rax
+ movq 56(%r10), %rcx
+ movq %rax, 48(%r10)
+ sbbq 56(%r11), %rcx
+ movq 64(%r10), %r8
+ movq %rcx, 56(%r10)
+ sbbq 64(%r11), %r8
+ movq 72(%r10), %rax
+ movq %r8, 64(%r10)
+ sbbq 72(%r11), %rax
+ movq 80(%r10), %rcx
+ movq %rax, 72(%r10)
+ sbbq 80(%r11), %rcx
+ movq 88(%r10), %r8
+ movq %rcx, 80(%r10)
+ sbbq 88(%r11), %r8
+ movq 96(%r10), %rax
+ movq %r8, 88(%r10)
+ sbbq 96(%r11), %rax
+ movq 104(%r10), %rcx
+ movq %rax, 96(%r10)
+ sbbq 104(%r11), %rcx
+ movq 112(%r10), %r8
+ movq %rcx, 104(%r10)
+ sbbq 112(%r11), %r8
+ movq 120(%r10), %rax
+ movq %r8, 112(%r10)
+ sbbq 120(%r11), %rax
+ movq 128(%r10), %rcx
+ movq %rax, 120(%r10)
+ sbbq 128(%r11), %rcx
+ movq 136(%r10), %r8
+ movq %rcx, 128(%r10)
+ sbbq 136(%r11), %r8
+ movq 144(%r10), %rax
+ movq %r8, 136(%r10)
+ sbbq 144(%r11), %rax
+ movq 152(%r10), %rcx
+ movq %rax, 144(%r10)
+ sbbq 152(%r11), %rcx
+ movq 160(%r10), %r8
+ movq %rcx, 152(%r10)
+ sbbq 160(%r11), %r8
+ movq 168(%r10), %rax
+ movq %r8, 160(%r10)
+ sbbq 168(%r11), %rax
+ movq 176(%r10), %rcx
+ movq %rax, 168(%r10)
+ sbbq 176(%r11), %rcx
+ movq 184(%r10), %r8
+ movq %rcx, 176(%r10)
+ sbbq 184(%r11), %r8
+ movq 192(%r10), %rax
+ movq %r8, 184(%r10)
+ sbbq 192(%r11), %rax
+ movq 200(%r10), %rcx
+ movq %rax, 192(%r10)
+ sbbq 200(%r11), %rcx
+ movq 208(%r10), %r8
+ movq %rcx, 200(%r10)
+ sbbq 208(%r11), %r8
+ movq 216(%r10), %rax
+ movq %r8, 208(%r10)
+ sbbq 216(%r11), %rax
+ movq 224(%r10), %rcx
+ movq %rax, 216(%r10)
+ sbbq 224(%r11), %rcx
+ movq 232(%r10), %r8
+ movq %rcx, 224(%r10)
+ sbbq 232(%r11), %r8
+ movq 240(%r10), %rax
+ movq %r8, 232(%r10)
+ sbbq 240(%r11), %rax
+ movq 248(%r10), %rcx
+ movq %rax, 240(%r10)
+ sbbq 248(%r11), %rcx
+ movq %rcx, 248(%r10)
+ sbbq $0, %r9
+ movq (%r10), %rax
+ subq (%rdi), %rax
+ movq 8(%r10), %rcx
+ movq %rax, (%r10)
+ sbbq 8(%rdi), %rcx
+ movq 16(%r10), %r8
+ movq %rcx, 8(%r10)
+ sbbq 16(%rdi), %r8
+ movq 24(%r10), %rax
+ movq %r8, 16(%r10)
+ sbbq 24(%rdi), %rax
+ movq 32(%r10), %rcx
+ movq %rax, 24(%r10)
+ sbbq 32(%rdi), %rcx
+ movq 40(%r10), %r8
+ movq %rcx, 32(%r10)
+ sbbq 40(%rdi), %r8
+ movq 48(%r10), %rax
+ movq %r8, 40(%r10)
+ sbbq 48(%rdi), %rax
+ movq 56(%r10), %rcx
+ movq %rax, 48(%r10)
+ sbbq 56(%rdi), %rcx
+ movq 64(%r10), %r8
+ movq %rcx, 56(%r10)
+ sbbq 64(%rdi), %r8
+ movq 72(%r10), %rax
+ movq %r8, 64(%r10)
+ sbbq 72(%rdi), %rax
+ movq 80(%r10), %rcx
+ movq %rax, 72(%r10)
+ sbbq 80(%rdi), %rcx
+ movq 88(%r10), %r8
+ movq %rcx, 80(%r10)
+ sbbq 88(%rdi), %r8
+ movq 96(%r10), %rax
+ movq %r8, 88(%r10)
+ sbbq 96(%rdi), %rax
+ movq 104(%r10), %rcx
+ movq %rax, 96(%r10)
+ sbbq 104(%rdi), %rcx
+ movq 112(%r10), %r8
+ movq %rcx, 104(%r10)
+ sbbq 112(%rdi), %r8
+ movq 120(%r10), %rax
+ movq %r8, 112(%r10)
+ sbbq 120(%rdi), %rax
+ movq 128(%r10), %rcx
+ movq %rax, 120(%r10)
+ sbbq 128(%rdi), %rcx
+ movq 136(%r10), %r8
+ movq %rcx, 128(%r10)
+ sbbq 136(%rdi), %r8
+ movq 144(%r10), %rax
+ movq %r8, 136(%r10)
+ sbbq 144(%rdi), %rax
+ movq 152(%r10), %rcx
+ movq %rax, 144(%r10)
+ sbbq 152(%rdi), %rcx
+ movq 160(%r10), %r8
+ movq %rcx, 152(%r10)
+ sbbq 160(%rdi), %r8
+ movq 168(%r10), %rax
+ movq %r8, 160(%r10)
+ sbbq 168(%rdi), %rax
+ movq 176(%r10), %rcx
+ movq %rax, 168(%r10)
+ sbbq 176(%rdi), %rcx
+ movq 184(%r10), %r8
+ movq %rcx, 176(%r10)
+ sbbq 184(%rdi), %r8
+ movq 192(%r10), %rax
+ movq %r8, 184(%r10)
+ sbbq 192(%rdi), %rax
+ movq 200(%r10), %rcx
+ movq %rax, 192(%r10)
+ sbbq 200(%rdi), %rcx
+ movq 208(%r10), %r8
+ movq %rcx, 200(%r10)
+ sbbq 208(%rdi), %r8
+ movq 216(%r10), %rax
+ movq %r8, 208(%r10)
+ sbbq 216(%rdi), %rax
+ movq 224(%r10), %rcx
+ movq %rax, 216(%r10)
+ sbbq 224(%rdi), %rcx
+ movq 232(%r10), %r8
+ movq %rcx, 224(%r10)
+ sbbq 232(%rdi), %r8
+ movq 240(%r10), %rax
+ movq %r8, 232(%r10)
+ sbbq 240(%rdi), %rax
+ movq 248(%r10), %rcx
+ movq %rax, 240(%r10)
+ sbbq 248(%rdi), %rcx
+ movq %rcx, 248(%r10)
+ sbbq $0, %r9
+ subq $128, %r15
+ # Add
+ movq (%r15), %rax
+ addq (%r10), %rax
+ movq 8(%r15), %rcx
+ movq %rax, (%r15)
+ adcq 8(%r10), %rcx
+ movq 16(%r15), %r8
+ movq %rcx, 8(%r15)
+ adcq 16(%r10), %r8
+ movq 24(%r15), %rax
+ movq %r8, 16(%r15)
+ adcq 24(%r10), %rax
+ movq 32(%r15), %rcx
+ movq %rax, 24(%r15)
+ adcq 32(%r10), %rcx
+ movq 40(%r15), %r8
+ movq %rcx, 32(%r15)
+ adcq 40(%r10), %r8
+ movq 48(%r15), %rax
+ movq %r8, 40(%r15)
+ adcq 48(%r10), %rax
+ movq 56(%r15), %rcx
+ movq %rax, 48(%r15)
+ adcq 56(%r10), %rcx
+ movq 64(%r15), %r8
+ movq %rcx, 56(%r15)
+ adcq 64(%r10), %r8
+ movq 72(%r15), %rax
+ movq %r8, 64(%r15)
+ adcq 72(%r10), %rax
+ movq 80(%r15), %rcx
+ movq %rax, 72(%r15)
+ adcq 80(%r10), %rcx
+ movq 88(%r15), %r8
+ movq %rcx, 80(%r15)
+ adcq 88(%r10), %r8
+ movq 96(%r15), %rax
+ movq %r8, 88(%r15)
+ adcq 96(%r10), %rax
+ movq 104(%r15), %rcx
+ movq %rax, 96(%r15)
+ adcq 104(%r10), %rcx
+ movq 112(%r15), %r8
+ movq %rcx, 104(%r15)
+ adcq 112(%r10), %r8
+ movq 120(%r15), %rax
+ movq %r8, 112(%r15)
+ adcq 120(%r10), %rax
+ movq 128(%r15), %rcx
+ movq %rax, 120(%r15)
+ adcq 128(%r10), %rcx
+ movq 136(%r15), %r8
+ movq %rcx, 128(%r15)
+ adcq 136(%r10), %r8
+ movq 144(%r15), %rax
+ movq %r8, 136(%r15)
+ adcq 144(%r10), %rax
+ movq 152(%r15), %rcx
+ movq %rax, 144(%r15)
+ adcq 152(%r10), %rcx
+ movq 160(%r15), %r8
+ movq %rcx, 152(%r15)
+ adcq 160(%r10), %r8
+ movq 168(%r15), %rax
+ movq %r8, 160(%r15)
+ adcq 168(%r10), %rax
+ movq 176(%r15), %rcx
+ movq %rax, 168(%r15)
+ adcq 176(%r10), %rcx
+ movq 184(%r15), %r8
+ movq %rcx, 176(%r15)
+ adcq 184(%r10), %r8
+ movq 192(%r15), %rax
+ movq %r8, 184(%r15)
+ adcq 192(%r10), %rax
+ movq 200(%r15), %rcx
+ movq %rax, 192(%r15)
+ adcq 200(%r10), %rcx
+ movq 208(%r15), %r8
+ movq %rcx, 200(%r15)
+ adcq 208(%r10), %r8
+ movq 216(%r15), %rax
+ movq %r8, 208(%r15)
+ adcq 216(%r10), %rax
+ movq 224(%r15), %rcx
+ movq %rax, 216(%r15)
+ adcq 224(%r10), %rcx
+ movq 232(%r15), %r8
+ movq %rcx, 224(%r15)
+ adcq 232(%r10), %r8
+ movq 240(%r15), %rax
+ movq %r8, 232(%r15)
+ adcq 240(%r10), %rax
+ movq 248(%r15), %rcx
+ movq %rax, 240(%r15)
+ adcq 248(%r10), %rcx
+ movq %rcx, 248(%r15)
+ adcq $0, %r9
+ movq %r9, 384(%rdi)
+ addq $128, %r15
+ # Add
+ movq (%r15), %rax
+ xorq %r9, %r9
+ addq (%r11), %rax
+ movq 8(%r15), %rcx
+ movq %rax, (%r15)
+ adcq 8(%r11), %rcx
+ movq 16(%r15), %r8
+ movq %rcx, 8(%r15)
+ adcq 16(%r11), %r8
+ movq 24(%r15), %rax
+ movq %r8, 16(%r15)
+ adcq 24(%r11), %rax
+ movq 32(%r15), %rcx
+ movq %rax, 24(%r15)
+ adcq 32(%r11), %rcx
+ movq 40(%r15), %r8
+ movq %rcx, 32(%r15)
+ adcq 40(%r11), %r8
+ movq 48(%r15), %rax
+ movq %r8, 40(%r15)
+ adcq 48(%r11), %rax
+ movq 56(%r15), %rcx
+ movq %rax, 48(%r15)
+ adcq 56(%r11), %rcx
+ movq 64(%r15), %r8
+ movq %rcx, 56(%r15)
+ adcq 64(%r11), %r8
+ movq 72(%r15), %rax
+ movq %r8, 64(%r15)
+ adcq 72(%r11), %rax
+ movq 80(%r15), %rcx
+ movq %rax, 72(%r15)
+ adcq 80(%r11), %rcx
+ movq 88(%r15), %r8
+ movq %rcx, 80(%r15)
+ adcq 88(%r11), %r8
+ movq 96(%r15), %rax
+ movq %r8, 88(%r15)
+ adcq 96(%r11), %rax
+ movq 104(%r15), %rcx
+ movq %rax, 96(%r15)
+ adcq 104(%r11), %rcx
+ movq 112(%r15), %r8
+ movq %rcx, 104(%r15)
+ adcq 112(%r11), %r8
+ movq 120(%r15), %rax
+ movq %r8, 112(%r15)
+ adcq 120(%r11), %rax
+ movq 128(%r15), %rcx
+ movq %rax, 120(%r15)
+ adcq 128(%r11), %rcx
+ movq %rcx, 128(%r15)
+ adcq $0, %r9
+ # Add to zero
+ movq 136(%r11), %rax
+ adcq $0, %rax
+ movq 144(%r11), %rcx
+ movq %rax, 136(%r15)
+ adcq $0, %rcx
+ movq 152(%r11), %r8
+ movq %rcx, 144(%r15)
+ adcq $0, %r8
+ movq 160(%r11), %rax
+ movq %r8, 152(%r15)
+ adcq $0, %rax
+ movq 168(%r11), %rcx
+ movq %rax, 160(%r15)
+ adcq $0, %rcx
+ movq 176(%r11), %r8
+ movq %rcx, 168(%r15)
+ adcq $0, %r8
+ movq 184(%r11), %rax
+ movq %r8, 176(%r15)
+ adcq $0, %rax
+ movq 192(%r11), %rcx
+ movq %rax, 184(%r15)
+ adcq $0, %rcx
+ movq 200(%r11), %r8
+ movq %rcx, 192(%r15)
+ adcq $0, %r8
+ movq 208(%r11), %rax
+ movq %r8, 200(%r15)
+ adcq $0, %rax
+ movq 216(%r11), %rcx
+ movq %rax, 208(%r15)
+ adcq $0, %rcx
+ movq 224(%r11), %r8
+ movq %rcx, 216(%r15)
+ adcq $0, %r8
+ movq 232(%r11), %rax
+ movq %r8, 224(%r15)
+ adcq $0, %rax
+ movq 240(%r11), %rcx
+ movq %rax, 232(%r15)
+ adcq $0, %rcx
+ movq 248(%r11), %r8
+ movq %rcx, 240(%r15)
+ adcq $0, %r8
+ movq %r8, 248(%r15)
+ addq $808, %rsp
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_mul_avx2_32,.-sp_2048_mul_avx2_32
+#endif /* __APPLE__ */
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_2048_sqr_avx2_32
+.type sp_2048_sqr_avx2_32,@function
+.align 16
+sp_2048_sqr_avx2_32:
+#else
+.globl _sp_2048_sqr_avx2_32
+.p2align 4
+_sp_2048_sqr_avx2_32:
+#endif /* __APPLE__ */
+ subq $664, %rsp
+ movq %rdi, 640(%rsp)
+ movq %rsi, 648(%rsp)
+ leaq 512(%rsp), %r8
+ leaq 128(%rsi), %r9
+ # Add
+ movq (%rsi), %rdx
+ xorq %rcx, %rcx
+ addq (%r9), %rdx
+ movq 8(%rsi), %rax
+ movq %rdx, (%r8)
+ adcq 8(%r9), %rax
+ movq 16(%rsi), %rdx
+ movq %rax, 8(%r8)
+ adcq 16(%r9), %rdx
+ movq 24(%rsi), %rax
+ movq %rdx, 16(%r8)
+ adcq 24(%r9), %rax
+ movq 32(%rsi), %rdx
+ movq %rax, 24(%r8)
+ adcq 32(%r9), %rdx
+ movq 40(%rsi), %rax
+ movq %rdx, 32(%r8)
+ adcq 40(%r9), %rax
+ movq 48(%rsi), %rdx
+ movq %rax, 40(%r8)
+ adcq 48(%r9), %rdx
+ movq 56(%rsi), %rax
+ movq %rdx, 48(%r8)
+ adcq 56(%r9), %rax
+ movq 64(%rsi), %rdx
+ movq %rax, 56(%r8)
+ adcq 64(%r9), %rdx
+ movq 72(%rsi), %rax
+ movq %rdx, 64(%r8)
+ adcq 72(%r9), %rax
+ movq 80(%rsi), %rdx
+ movq %rax, 72(%r8)
+ adcq 80(%r9), %rdx
+ movq 88(%rsi), %rax
+ movq %rdx, 80(%r8)
+ adcq 88(%r9), %rax
+ movq 96(%rsi), %rdx
+ movq %rax, 88(%r8)
+ adcq 96(%r9), %rdx
+ movq 104(%rsi), %rax
+ movq %rdx, 96(%r8)
+ adcq 104(%r9), %rax
+ movq 112(%rsi), %rdx
+ movq %rax, 104(%r8)
+ adcq 112(%r9), %rdx
+ movq 120(%rsi), %rax
+ movq %rdx, 112(%r8)
+ adcq 120(%r9), %rax
+ movq %rax, 120(%r8)
+ adcq $0, %rcx
+ movq %rcx, 656(%rsp)
+ movq %r8, %rsi
+ movq %rsp, %rdi
+#ifndef __APPLE__
+ callq sp_2048_sqr_avx2_16@plt
+#else
+ callq _sp_2048_sqr_avx2_16
+#endif /* __APPLE__ */
+ movq 648(%rsp), %rsi
+ leaq 256(%rsp), %rdi
+ addq $128, %rsi
+#ifndef __APPLE__
+ callq sp_2048_sqr_avx2_16@plt
+#else
+ callq _sp_2048_sqr_avx2_16
+#endif /* __APPLE__ */
+ movq 648(%rsp), %rsi
+ movq 640(%rsp), %rdi
+#ifndef __APPLE__
+ callq sp_2048_sqr_avx2_16@plt
+#else
+ callq _sp_2048_sqr_avx2_16
+#endif /* __APPLE__ */
+ movq 656(%rsp), %r10
+ leaq 512(%rsp), %r8
+ movq %r10, %rcx
+ negq %r10
+ movq (%r8), %rdx
+ pextq %r10, %rdx, %rdx
+ addq %rdx, %rdx
+ movq 8(%r8), %rax
+ movq %rdx, 256(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 16(%r8), %rdx
+ movq %rax, 264(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 24(%r8), %rax
+ movq %rdx, 272(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 32(%r8), %rdx
+ movq %rax, 280(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 40(%r8), %rax
+ movq %rdx, 288(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 48(%r8), %rdx
+ movq %rax, 296(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 56(%r8), %rax
+ movq %rdx, 304(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 64(%r8), %rdx
+ movq %rax, 312(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 72(%r8), %rax
+ movq %rdx, 320(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 80(%r8), %rdx
+ movq %rax, 328(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 88(%r8), %rax
+ movq %rdx, 336(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 96(%r8), %rdx
+ movq %rax, 344(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 104(%r8), %rax
+ movq %rdx, 352(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 112(%r8), %rdx
+ movq %rax, 360(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 120(%r8), %rax
+ movq %rdx, 368(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq %rax, 376(%rdi)
+ adcq $0, %rcx
+ leaq 256(%rsp), %rsi
+ movq %rsp, %r8
+ movq (%r8), %rdx
+ subq (%rsi), %rdx
+ movq 8(%r8), %rax
+ movq %rdx, (%r8)
+ sbbq 8(%rsi), %rax
+ movq 16(%r8), %rdx
+ movq %rax, 8(%r8)
+ sbbq 16(%rsi), %rdx
+ movq 24(%r8), %rax
+ movq %rdx, 16(%r8)
+ sbbq 24(%rsi), %rax
+ movq 32(%r8), %rdx
+ movq %rax, 24(%r8)
+ sbbq 32(%rsi), %rdx
+ movq 40(%r8), %rax
+ movq %rdx, 32(%r8)
+ sbbq 40(%rsi), %rax
+ movq 48(%r8), %rdx
+ movq %rax, 40(%r8)
+ sbbq 48(%rsi), %rdx
+ movq 56(%r8), %rax
+ movq %rdx, 48(%r8)
+ sbbq 56(%rsi), %rax
+ movq 64(%r8), %rdx
+ movq %rax, 56(%r8)
+ sbbq 64(%rsi), %rdx
+ movq 72(%r8), %rax
+ movq %rdx, 64(%r8)
+ sbbq 72(%rsi), %rax
+ movq 80(%r8), %rdx
+ movq %rax, 72(%r8)
+ sbbq 80(%rsi), %rdx
+ movq 88(%r8), %rax
+ movq %rdx, 80(%r8)
+ sbbq 88(%rsi), %rax
+ movq 96(%r8), %rdx
+ movq %rax, 88(%r8)
+ sbbq 96(%rsi), %rdx
+ movq 104(%r8), %rax
+ movq %rdx, 96(%r8)
+ sbbq 104(%rsi), %rax
+ movq 112(%r8), %rdx
+ movq %rax, 104(%r8)
+ sbbq 112(%rsi), %rdx
+ movq 120(%r8), %rax
+ movq %rdx, 112(%r8)
+ sbbq 120(%rsi), %rax
+ movq 128(%r8), %rdx
+ movq %rax, 120(%r8)
+ sbbq 128(%rsi), %rdx
+ movq 136(%r8), %rax
+ movq %rdx, 128(%r8)
+ sbbq 136(%rsi), %rax
+ movq 144(%r8), %rdx
+ movq %rax, 136(%r8)
+ sbbq 144(%rsi), %rdx
+ movq 152(%r8), %rax
+ movq %rdx, 144(%r8)
+ sbbq 152(%rsi), %rax
+ movq 160(%r8), %rdx
+ movq %rax, 152(%r8)
+ sbbq 160(%rsi), %rdx
+ movq 168(%r8), %rax
+ movq %rdx, 160(%r8)
+ sbbq 168(%rsi), %rax
+ movq 176(%r8), %rdx
+ movq %rax, 168(%r8)
+ sbbq 176(%rsi), %rdx
+ movq 184(%r8), %rax
+ movq %rdx, 176(%r8)
+ sbbq 184(%rsi), %rax
+ movq 192(%r8), %rdx
+ movq %rax, 184(%r8)
+ sbbq 192(%rsi), %rdx
+ movq 200(%r8), %rax
+ movq %rdx, 192(%r8)
+ sbbq 200(%rsi), %rax
+ movq 208(%r8), %rdx
+ movq %rax, 200(%r8)
+ sbbq 208(%rsi), %rdx
+ movq 216(%r8), %rax
+ movq %rdx, 208(%r8)
+ sbbq 216(%rsi), %rax
+ movq 224(%r8), %rdx
+ movq %rax, 216(%r8)
+ sbbq 224(%rsi), %rdx
+ movq 232(%r8), %rax
+ movq %rdx, 224(%r8)
+ sbbq 232(%rsi), %rax
+ movq 240(%r8), %rdx
+ movq %rax, 232(%r8)
+ sbbq 240(%rsi), %rdx
+ movq 248(%r8), %rax
+ movq %rdx, 240(%r8)
+ sbbq 248(%rsi), %rax
+ movq %rax, 248(%r8)
+ sbbq $0, %rcx
+ movq (%r8), %rdx
+ subq (%rdi), %rdx
+ movq 8(%r8), %rax
+ movq %rdx, (%r8)
+ sbbq 8(%rdi), %rax
+ movq 16(%r8), %rdx
+ movq %rax, 8(%r8)
+ sbbq 16(%rdi), %rdx
+ movq 24(%r8), %rax
+ movq %rdx, 16(%r8)
+ sbbq 24(%rdi), %rax
+ movq 32(%r8), %rdx
+ movq %rax, 24(%r8)
+ sbbq 32(%rdi), %rdx
+ movq 40(%r8), %rax
+ movq %rdx, 32(%r8)
+ sbbq 40(%rdi), %rax
+ movq 48(%r8), %rdx
+ movq %rax, 40(%r8)
+ sbbq 48(%rdi), %rdx
+ movq 56(%r8), %rax
+ movq %rdx, 48(%r8)
+ sbbq 56(%rdi), %rax
+ movq 64(%r8), %rdx
+ movq %rax, 56(%r8)
+ sbbq 64(%rdi), %rdx
+ movq 72(%r8), %rax
+ movq %rdx, 64(%r8)
+ sbbq 72(%rdi), %rax
+ movq 80(%r8), %rdx
+ movq %rax, 72(%r8)
+ sbbq 80(%rdi), %rdx
+ movq 88(%r8), %rax
+ movq %rdx, 80(%r8)
+ sbbq 88(%rdi), %rax
+ movq 96(%r8), %rdx
+ movq %rax, 88(%r8)
+ sbbq 96(%rdi), %rdx
+ movq 104(%r8), %rax
+ movq %rdx, 96(%r8)
+ sbbq 104(%rdi), %rax
+ movq 112(%r8), %rdx
+ movq %rax, 104(%r8)
+ sbbq 112(%rdi), %rdx
+ movq 120(%r8), %rax
+ movq %rdx, 112(%r8)
+ sbbq 120(%rdi), %rax
+ movq 128(%r8), %rdx
+ movq %rax, 120(%r8)
+ sbbq 128(%rdi), %rdx
+ movq 136(%r8), %rax
+ movq %rdx, 128(%r8)
+ sbbq 136(%rdi), %rax
+ movq 144(%r8), %rdx
+ movq %rax, 136(%r8)
+ sbbq 144(%rdi), %rdx
+ movq 152(%r8), %rax
+ movq %rdx, 144(%r8)
+ sbbq 152(%rdi), %rax
+ movq 160(%r8), %rdx
+ movq %rax, 152(%r8)
+ sbbq 160(%rdi), %rdx
+ movq 168(%r8), %rax
+ movq %rdx, 160(%r8)
+ sbbq 168(%rdi), %rax
+ movq 176(%r8), %rdx
+ movq %rax, 168(%r8)
+ sbbq 176(%rdi), %rdx
+ movq 184(%r8), %rax
+ movq %rdx, 176(%r8)
+ sbbq 184(%rdi), %rax
+ movq 192(%r8), %rdx
+ movq %rax, 184(%r8)
+ sbbq 192(%rdi), %rdx
+ movq 200(%r8), %rax
+ movq %rdx, 192(%r8)
+ sbbq 200(%rdi), %rax
+ movq 208(%r8), %rdx
+ movq %rax, 200(%r8)
+ sbbq 208(%rdi), %rdx
+ movq 216(%r8), %rax
+ movq %rdx, 208(%r8)
+ sbbq 216(%rdi), %rax
+ movq 224(%r8), %rdx
+ movq %rax, 216(%r8)
+ sbbq 224(%rdi), %rdx
+ movq 232(%r8), %rax
+ movq %rdx, 224(%r8)
+ sbbq 232(%rdi), %rax
+ movq 240(%r8), %rdx
+ movq %rax, 232(%r8)
+ sbbq 240(%rdi), %rdx
+ movq 248(%r8), %rax
+ movq %rdx, 240(%r8)
+ sbbq 248(%rdi), %rax
+ movq %rax, 248(%r8)
+ sbbq $0, %rcx
+ # Add in place
+ movq 128(%rdi), %rdx
+ addq (%r8), %rdx
+ movq 136(%rdi), %rax
+ movq %rdx, 128(%rdi)
+ adcq 8(%r8), %rax
+ movq 144(%rdi), %rdx
+ movq %rax, 136(%rdi)
+ adcq 16(%r8), %rdx
+ movq 152(%rdi), %rax
+ movq %rdx, 144(%rdi)
+ adcq 24(%r8), %rax
+ movq 160(%rdi), %rdx
+ movq %rax, 152(%rdi)
+ adcq 32(%r8), %rdx
+ movq 168(%rdi), %rax
+ movq %rdx, 160(%rdi)
+ adcq 40(%r8), %rax
+ movq 176(%rdi), %rdx
+ movq %rax, 168(%rdi)
+ adcq 48(%r8), %rdx
+ movq 184(%rdi), %rax
+ movq %rdx, 176(%rdi)
+ adcq 56(%r8), %rax
+ movq 192(%rdi), %rdx
+ movq %rax, 184(%rdi)
+ adcq 64(%r8), %rdx
+ movq 200(%rdi), %rax
+ movq %rdx, 192(%rdi)
+ adcq 72(%r8), %rax
+ movq 208(%rdi), %rdx
+ movq %rax, 200(%rdi)
+ adcq 80(%r8), %rdx
+ movq 216(%rdi), %rax
+ movq %rdx, 208(%rdi)
+ adcq 88(%r8), %rax
+ movq 224(%rdi), %rdx
+ movq %rax, 216(%rdi)
+ adcq 96(%r8), %rdx
+ movq 232(%rdi), %rax
+ movq %rdx, 224(%rdi)
+ adcq 104(%r8), %rax
+ movq 240(%rdi), %rdx
+ movq %rax, 232(%rdi)
+ adcq 112(%r8), %rdx
+ movq 248(%rdi), %rax
+ movq %rdx, 240(%rdi)
+ adcq 120(%r8), %rax
+ movq 256(%rdi), %rdx
+ movq %rax, 248(%rdi)
+ adcq 128(%r8), %rdx
+ movq 264(%rdi), %rax
+ movq %rdx, 256(%rdi)
+ adcq 136(%r8), %rax
+ movq 272(%rdi), %rdx
+ movq %rax, 264(%rdi)
+ adcq 144(%r8), %rdx
+ movq 280(%rdi), %rax
+ movq %rdx, 272(%rdi)
+ adcq 152(%r8), %rax
+ movq 288(%rdi), %rdx
+ movq %rax, 280(%rdi)
+ adcq 160(%r8), %rdx
+ movq 296(%rdi), %rax
+ movq %rdx, 288(%rdi)
+ adcq 168(%r8), %rax
+ movq 304(%rdi), %rdx
+ movq %rax, 296(%rdi)
+ adcq 176(%r8), %rdx
+ movq 312(%rdi), %rax
+ movq %rdx, 304(%rdi)
+ adcq 184(%r8), %rax
+ movq 320(%rdi), %rdx
+ movq %rax, 312(%rdi)
+ adcq 192(%r8), %rdx
+ movq 328(%rdi), %rax
+ movq %rdx, 320(%rdi)
+ adcq 200(%r8), %rax
+ movq 336(%rdi), %rdx
+ movq %rax, 328(%rdi)
+ adcq 208(%r8), %rdx
+ movq 344(%rdi), %rax
+ movq %rdx, 336(%rdi)
+ adcq 216(%r8), %rax
+ movq 352(%rdi), %rdx
+ movq %rax, 344(%rdi)
+ adcq 224(%r8), %rdx
+ movq 360(%rdi), %rax
+ movq %rdx, 352(%rdi)
+ adcq 232(%r8), %rax
+ movq 368(%rdi), %rdx
+ movq %rax, 360(%rdi)
+ adcq 240(%r8), %rdx
+ movq 376(%rdi), %rax
+ movq %rdx, 368(%rdi)
+ adcq 248(%r8), %rax
+ movq %rax, 376(%rdi)
+ adcq $0, %rcx
+ movq %rcx, 384(%rdi)
+ # Add in place
+ movq 256(%rdi), %rdx
+ xorq %rcx, %rcx
+ addq (%rsi), %rdx
+ movq 264(%rdi), %rax
+ movq %rdx, 256(%rdi)
+ adcq 8(%rsi), %rax
+ movq 272(%rdi), %rdx
+ movq %rax, 264(%rdi)
+ adcq 16(%rsi), %rdx
+ movq 280(%rdi), %rax
+ movq %rdx, 272(%rdi)
+ adcq 24(%rsi), %rax
+ movq 288(%rdi), %rdx
+ movq %rax, 280(%rdi)
+ adcq 32(%rsi), %rdx
+ movq 296(%rdi), %rax
+ movq %rdx, 288(%rdi)
+ adcq 40(%rsi), %rax
+ movq 304(%rdi), %rdx
+ movq %rax, 296(%rdi)
+ adcq 48(%rsi), %rdx
+ movq 312(%rdi), %rax
+ movq %rdx, 304(%rdi)
+ adcq 56(%rsi), %rax
+ movq 320(%rdi), %rdx
+ movq %rax, 312(%rdi)
+ adcq 64(%rsi), %rdx
+ movq 328(%rdi), %rax
+ movq %rdx, 320(%rdi)
+ adcq 72(%rsi), %rax
+ movq 336(%rdi), %rdx
+ movq %rax, 328(%rdi)
+ adcq 80(%rsi), %rdx
+ movq 344(%rdi), %rax
+ movq %rdx, 336(%rdi)
+ adcq 88(%rsi), %rax
+ movq 352(%rdi), %rdx
+ movq %rax, 344(%rdi)
+ adcq 96(%rsi), %rdx
+ movq 360(%rdi), %rax
+ movq %rdx, 352(%rdi)
+ adcq 104(%rsi), %rax
+ movq 368(%rdi), %rdx
+ movq %rax, 360(%rdi)
+ adcq 112(%rsi), %rdx
+ movq 376(%rdi), %rax
+ movq %rdx, 368(%rdi)
+ adcq 120(%rsi), %rax
+ movq 384(%rdi), %rdx
+ movq %rax, 376(%rdi)
+ adcq 128(%rsi), %rdx
+ movq %rdx, 384(%rdi)
+ adcq $0, %rcx
+ # Add to zero
+ movq 136(%rsi), %rdx
+ adcq $0, %rdx
+ movq 144(%rsi), %rax
+ movq %rdx, 392(%rdi)
+ adcq $0, %rax
+ movq 152(%rsi), %rdx
+ movq %rax, 400(%rdi)
+ adcq $0, %rdx
+ movq 160(%rsi), %rax
+ movq %rdx, 408(%rdi)
+ adcq $0, %rax
+ movq 168(%rsi), %rdx
+ movq %rax, 416(%rdi)
+ adcq $0, %rdx
+ movq 176(%rsi), %rax
+ movq %rdx, 424(%rdi)
+ adcq $0, %rax
+ movq 184(%rsi), %rdx
+ movq %rax, 432(%rdi)
+ adcq $0, %rdx
+ movq 192(%rsi), %rax
+ movq %rdx, 440(%rdi)
+ adcq $0, %rax
+ movq 200(%rsi), %rdx
+ movq %rax, 448(%rdi)
+ adcq $0, %rdx
+ movq 208(%rsi), %rax
+ movq %rdx, 456(%rdi)
+ adcq $0, %rax
+ movq 216(%rsi), %rdx
+ movq %rax, 464(%rdi)
+ adcq $0, %rdx
+ movq 224(%rsi), %rax
+ movq %rdx, 472(%rdi)
+ adcq $0, %rax
+ movq 232(%rsi), %rdx
+ movq %rax, 480(%rdi)
+ adcq $0, %rdx
+ movq 240(%rsi), %rax
+ movq %rdx, 488(%rdi)
+ adcq $0, %rax
+ movq 248(%rsi), %rdx
+ movq %rax, 496(%rdi)
+ adcq $0, %rdx
+ movq %rdx, 504(%rdi)
+ addq $664, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_sqr_avx2_32,.-sp_2048_sqr_avx2_32
+#endif /* __APPLE__ */
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+#ifndef __APPLE__
+.globl sp_2048_mul_d_32
+.type sp_2048_mul_d_32,@function
+.align 16
+sp_2048_mul_d_32:
+#else
+.globl _sp_2048_mul_d_32
+.p2align 4
+_sp_2048_mul_d_32:
+#endif /* __APPLE__ */
+ movq %rdx, %rcx
+ # A[0] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ movq %r8, (%rdi)
+ # A[1] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 8(%rsi)
+ addq %rax, %r9
+ movq %r9, 8(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[2] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 16(%rsi)
+ addq %rax, %r10
+ movq %r10, 16(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[3] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 24(%rsi)
+ addq %rax, %r8
+ movq %r8, 24(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[4] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 32(%rsi)
+ addq %rax, %r9
+ movq %r9, 32(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[5] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 40(%rsi)
+ addq %rax, %r10
+ movq %r10, 40(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[6] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 48(%rsi)
+ addq %rax, %r8
+ movq %r8, 48(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[7] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 56(%rsi)
+ addq %rax, %r9
+ movq %r9, 56(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[8] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 64(%rsi)
+ addq %rax, %r10
+ movq %r10, 64(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[9] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 72(%rsi)
+ addq %rax, %r8
+ movq %r8, 72(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[10] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 80(%rsi)
+ addq %rax, %r9
+ movq %r9, 80(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[11] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 88(%rsi)
+ addq %rax, %r10
+ movq %r10, 88(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[12] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 96(%rsi)
+ addq %rax, %r8
+ movq %r8, 96(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[13] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 104(%rsi)
+ addq %rax, %r9
+ movq %r9, 104(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[14] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 112(%rsi)
+ addq %rax, %r10
+ movq %r10, 112(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[15] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 120(%rsi)
+ addq %rax, %r8
+ movq %r8, 120(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[16] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 128(%rsi)
+ addq %rax, %r9
+ movq %r9, 128(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[17] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 136(%rsi)
+ addq %rax, %r10
+ movq %r10, 136(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[18] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 144(%rsi)
+ addq %rax, %r8
+ movq %r8, 144(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[19] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 152(%rsi)
+ addq %rax, %r9
+ movq %r9, 152(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[20] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 160(%rsi)
+ addq %rax, %r10
+ movq %r10, 160(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[21] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 168(%rsi)
+ addq %rax, %r8
+ movq %r8, 168(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[22] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 176(%rsi)
+ addq %rax, %r9
+ movq %r9, 176(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[23] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 184(%rsi)
+ addq %rax, %r10
+ movq %r10, 184(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[24] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 192(%rsi)
+ addq %rax, %r8
+ movq %r8, 192(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[25] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 200(%rsi)
+ addq %rax, %r9
+ movq %r9, 200(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[26] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 208(%rsi)
+ addq %rax, %r10
+ movq %r10, 208(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[27] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 216(%rsi)
+ addq %rax, %r8
+ movq %r8, 216(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[28] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 224(%rsi)
+ addq %rax, %r9
+ movq %r9, 224(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[29] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 232(%rsi)
+ addq %rax, %r10
+ movq %r10, 232(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[30] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 240(%rsi)
+ addq %rax, %r8
+ movq %r8, 240(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[31] * B
+ movq %rcx, %rax
+ mulq 248(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ movq %r9, 248(%rdi)
+ movq %r10, 256(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_mul_d_32,.-sp_2048_mul_d_32
+#endif /* __APPLE__ */
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_2048_sub_in_place_16
+.type sp_2048_sub_in_place_16,@function
+.align 16
+sp_2048_sub_in_place_16:
+#else
+.globl _sp_2048_sub_in_place_16
+.p2align 4
+_sp_2048_sub_in_place_16:
+#endif /* __APPLE__ */
+ movq (%rdi), %rdx
+ xorq %rax, %rax
+ subq (%rsi), %rdx
+ movq 8(%rdi), %rcx
+ movq %rdx, (%rdi)
+ sbbq 8(%rsi), %rcx
+ movq 16(%rdi), %rdx
+ movq %rcx, 8(%rdi)
+ sbbq 16(%rsi), %rdx
+ movq 24(%rdi), %rcx
+ movq %rdx, 16(%rdi)
+ sbbq 24(%rsi), %rcx
+ movq 32(%rdi), %rdx
+ movq %rcx, 24(%rdi)
+ sbbq 32(%rsi), %rdx
+ movq 40(%rdi), %rcx
+ movq %rdx, 32(%rdi)
+ sbbq 40(%rsi), %rcx
+ movq 48(%rdi), %rdx
+ movq %rcx, 40(%rdi)
+ sbbq 48(%rsi), %rdx
+ movq 56(%rdi), %rcx
+ movq %rdx, 48(%rdi)
+ sbbq 56(%rsi), %rcx
+ movq 64(%rdi), %rdx
+ movq %rcx, 56(%rdi)
+ sbbq 64(%rsi), %rdx
+ movq 72(%rdi), %rcx
+ movq %rdx, 64(%rdi)
+ sbbq 72(%rsi), %rcx
+ movq 80(%rdi), %rdx
+ movq %rcx, 72(%rdi)
+ sbbq 80(%rsi), %rdx
+ movq 88(%rdi), %rcx
+ movq %rdx, 80(%rdi)
+ sbbq 88(%rsi), %rcx
+ movq 96(%rdi), %rdx
+ movq %rcx, 88(%rdi)
+ sbbq 96(%rsi), %rdx
+ movq 104(%rdi), %rcx
+ movq %rdx, 96(%rdi)
+ sbbq 104(%rsi), %rcx
+ movq 112(%rdi), %rdx
+ movq %rcx, 104(%rdi)
+ sbbq 112(%rsi), %rdx
+ movq 120(%rdi), %rcx
+ movq %rdx, 112(%rdi)
+ sbbq 120(%rsi), %rcx
+ movq %rcx, 120(%rdi)
+ sbbq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_sub_in_place_16,.-sp_2048_sub_in_place_16
+#endif /* __APPLE__ */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_2048_cond_sub_16
+.type sp_2048_cond_sub_16,@function
+.align 16
+sp_2048_cond_sub_16:
+#else
+.globl _sp_2048_cond_sub_16
+.p2align 4
+_sp_2048_cond_sub_16:
+#endif /* __APPLE__ */
+ subq $128, %rsp
+ movq $0, %rax
+ movq (%rdx), %r8
+ movq 8(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, (%rsp)
+ movq %r9, 8(%rsp)
+ movq 16(%rdx), %r8
+ movq 24(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 16(%rsp)
+ movq %r9, 24(%rsp)
+ movq 32(%rdx), %r8
+ movq 40(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq 48(%rdx), %r8
+ movq 56(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 48(%rsp)
+ movq %r9, 56(%rsp)
+ movq 64(%rdx), %r8
+ movq 72(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 64(%rsp)
+ movq %r9, 72(%rsp)
+ movq 80(%rdx), %r8
+ movq 88(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 80(%rsp)
+ movq %r9, 88(%rsp)
+ movq 96(%rdx), %r8
+ movq 104(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 96(%rsp)
+ movq %r9, 104(%rsp)
+ movq 112(%rdx), %r8
+ movq 120(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 112(%rsp)
+ movq %r9, 120(%rsp)
+ movq (%rsi), %r8
+ movq (%rsp), %rdx
+ subq %rdx, %r8
+ movq 8(%rsi), %r9
+ movq 8(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, (%rdi)
+ movq 16(%rsi), %r8
+ movq 16(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 8(%rdi)
+ movq 24(%rsi), %r9
+ movq 24(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 16(%rdi)
+ movq 32(%rsi), %r8
+ movq 32(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 24(%rdi)
+ movq 40(%rsi), %r9
+ movq 40(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 32(%rdi)
+ movq 48(%rsi), %r8
+ movq 48(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 40(%rdi)
+ movq 56(%rsi), %r9
+ movq 56(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 48(%rdi)
+ movq 64(%rsi), %r8
+ movq 64(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 56(%rdi)
+ movq 72(%rsi), %r9
+ movq 72(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 64(%rdi)
+ movq 80(%rsi), %r8
+ movq 80(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 72(%rdi)
+ movq 88(%rsi), %r9
+ movq 88(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 80(%rdi)
+ movq 96(%rsi), %r8
+ movq 96(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 88(%rdi)
+ movq 104(%rsi), %r9
+ movq 104(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 96(%rdi)
+ movq 112(%rsi), %r8
+ movq 112(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 104(%rdi)
+ movq 120(%rsi), %r9
+ movq 120(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 112(%rdi)
+ movq %r9, 120(%rdi)
+ sbbq $0, %rax
+ addq $128, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_cond_sub_16,.-sp_2048_cond_sub_16
+#endif /* __APPLE__ */
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef __APPLE__
+.globl sp_2048_mont_reduce_16
+.type sp_2048_mont_reduce_16,@function
+.align 16
+sp_2048_mont_reduce_16:
+#else
+.globl _sp_2048_mont_reduce_16
+.p2align 4
+_sp_2048_mont_reduce_16:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ movq %rdx, %rcx
+ xorq %r15, %r15
+ # i = 16
+ movq $16, %r8
+ movq (%rdi), %r13
+ movq 8(%rdi), %r14
+L_mont_loop_16:
+ # mu = a[i] * mp
+ movq %r13, %r11
+ imulq %rcx, %r11
+ # a[i+0] += m[0] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq (%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r10
+ # a[i+1] += m[1] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 8(%rsi)
+ movq %r14, %r13
+ addq %rax, %r13
+ adcq %rdx, %r9
+ addq %r10, %r13
+ adcq $0, %r9
+ # a[i+2] += m[2] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 16(%rsi)
+ movq 16(%rdi), %r14
+ addq %rax, %r14
+ adcq %rdx, %r10
+ addq %r9, %r14
+ adcq $0, %r10
+ # a[i+3] += m[3] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 24(%rsi)
+ movq 24(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 24(%rdi)
+ adcq $0, %r9
+ # a[i+4] += m[4] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 32(%rsi)
+ movq 32(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 32(%rdi)
+ adcq $0, %r10
+ # a[i+5] += m[5] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 40(%rsi)
+ movq 40(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 40(%rdi)
+ adcq $0, %r9
+ # a[i+6] += m[6] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 48(%rsi)
+ movq 48(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 48(%rdi)
+ adcq $0, %r10
+ # a[i+7] += m[7] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 56(%rsi)
+ movq 56(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 56(%rdi)
+ adcq $0, %r9
+ # a[i+8] += m[8] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 64(%rsi)
+ movq 64(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 64(%rdi)
+ adcq $0, %r10
+ # a[i+9] += m[9] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 72(%rsi)
+ movq 72(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 72(%rdi)
+ adcq $0, %r9
+ # a[i+10] += m[10] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 80(%rsi)
+ movq 80(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 80(%rdi)
+ adcq $0, %r10
+ # a[i+11] += m[11] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 88(%rsi)
+ movq 88(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 88(%rdi)
+ adcq $0, %r9
+ # a[i+12] += m[12] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 96(%rsi)
+ movq 96(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 96(%rdi)
+ adcq $0, %r10
+ # a[i+13] += m[13] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 104(%rsi)
+ movq 104(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 104(%rdi)
+ adcq $0, %r9
+ # a[i+14] += m[14] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 112(%rsi)
+ movq 112(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 112(%rdi)
+ adcq $0, %r10
+ # a[i+15] += m[15] * mu
+ movq %r11, %rax
+ mulq 120(%rsi)
+ movq 120(%rdi), %r12
+ addq %rax, %r10
+ adcq %r15, %rdx
+ movq $0, %r15
+ adcq $0, %r15
+ addq %r10, %r12
+ movq %r12, 120(%rdi)
+ adcq %rdx, 128(%rdi)
+ adcq $0, %r15
+ # i -= 1
+ addq $8, %rdi
+ decq %r8
+ jnz L_mont_loop_16
+ movq %r13, (%rdi)
+ movq %r14, 8(%rdi)
+ negq %r15
+ movq %r15, %rcx
+ movq %rsi, %rdx
+ movq %rdi, %rsi
+ subq $128, %rdi
+#ifndef __APPLE__
+ callq sp_2048_cond_sub_16@plt
+#else
+ callq _sp_2048_cond_sub_16
+#endif /* __APPLE__ */
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_mont_reduce_16,.-sp_2048_mont_reduce_16
+#endif /* __APPLE__ */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_2048_cond_sub_avx2_16
+.type sp_2048_cond_sub_avx2_16,@function
+.align 16
+sp_2048_cond_sub_avx2_16:
+#else
+.globl _sp_2048_cond_sub_avx2_16
+.p2align 4
+_sp_2048_cond_sub_avx2_16:
+#endif /* __APPLE__ */
+ movq $0, %rax
+ movq (%rdx), %r10
+ movq (%rsi), %r8
+ pextq %rcx, %r10, %r10
+ subq %r10, %r8
+ movq 8(%rdx), %r10
+ movq 8(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, (%rdi)
+ sbbq %r10, %r9
+ movq 16(%rdx), %r8
+ movq 16(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 8(%rdi)
+ sbbq %r8, %r10
+ movq 24(%rdx), %r9
+ movq 24(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 16(%rdi)
+ sbbq %r9, %r8
+ movq 32(%rdx), %r10
+ movq 32(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 24(%rdi)
+ sbbq %r10, %r9
+ movq 40(%rdx), %r8
+ movq 40(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 32(%rdi)
+ sbbq %r8, %r10
+ movq 48(%rdx), %r9
+ movq 48(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 40(%rdi)
+ sbbq %r9, %r8
+ movq 56(%rdx), %r10
+ movq 56(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 48(%rdi)
+ sbbq %r10, %r9
+ movq 64(%rdx), %r8
+ movq 64(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 56(%rdi)
+ sbbq %r8, %r10
+ movq 72(%rdx), %r9
+ movq 72(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 64(%rdi)
+ sbbq %r9, %r8
+ movq 80(%rdx), %r10
+ movq 80(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 72(%rdi)
+ sbbq %r10, %r9
+ movq 88(%rdx), %r8
+ movq 88(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 80(%rdi)
+ sbbq %r8, %r10
+ movq 96(%rdx), %r9
+ movq 96(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 88(%rdi)
+ sbbq %r9, %r8
+ movq 104(%rdx), %r10
+ movq 104(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 96(%rdi)
+ sbbq %r10, %r9
+ movq 112(%rdx), %r8
+ movq 112(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 104(%rdi)
+ sbbq %r8, %r10
+ movq 120(%rdx), %r9
+ movq 120(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 112(%rdi)
+ sbbq %r9, %r8
+ movq %r8, 120(%rdi)
+ sbbq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_cond_sub_avx2_16,.-sp_2048_cond_sub_avx2_16
+#endif /* __APPLE__ */
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+#ifndef __APPLE__
+.globl sp_2048_mul_d_16
+.type sp_2048_mul_d_16,@function
+.align 16
+sp_2048_mul_d_16:
+#else
+.globl _sp_2048_mul_d_16
+.p2align 4
+_sp_2048_mul_d_16:
+#endif /* __APPLE__ */
+ movq %rdx, %rcx
+ # A[0] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ movq %r8, (%rdi)
+ # A[1] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 8(%rsi)
+ addq %rax, %r9
+ movq %r9, 8(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[2] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 16(%rsi)
+ addq %rax, %r10
+ movq %r10, 16(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[3] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 24(%rsi)
+ addq %rax, %r8
+ movq %r8, 24(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[4] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 32(%rsi)
+ addq %rax, %r9
+ movq %r9, 32(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[5] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 40(%rsi)
+ addq %rax, %r10
+ movq %r10, 40(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[6] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 48(%rsi)
+ addq %rax, %r8
+ movq %r8, 48(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[7] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 56(%rsi)
+ addq %rax, %r9
+ movq %r9, 56(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[8] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 64(%rsi)
+ addq %rax, %r10
+ movq %r10, 64(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[9] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 72(%rsi)
+ addq %rax, %r8
+ movq %r8, 72(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[10] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 80(%rsi)
+ addq %rax, %r9
+ movq %r9, 80(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[11] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 88(%rsi)
+ addq %rax, %r10
+ movq %r10, 88(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[12] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 96(%rsi)
+ addq %rax, %r8
+ movq %r8, 96(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[13] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 104(%rsi)
+ addq %rax, %r9
+ movq %r9, 104(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[14] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 112(%rsi)
+ addq %rax, %r10
+ movq %r10, 112(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[15] * B
+ movq %rcx, %rax
+ mulq 120(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ movq %r8, 120(%rdi)
+ movq %r9, 128(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_mul_d_16,.-sp_2048_mul_d_16
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX2
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+#ifndef __APPLE__
+.globl sp_2048_mul_d_avx2_16
+.type sp_2048_mul_d_avx2_16,@function
+.align 16
+sp_2048_mul_d_avx2_16:
+#else
+.globl _sp_2048_mul_d_avx2_16
+.p2align 4
+_sp_2048_mul_d_avx2_16:
+#endif /* __APPLE__ */
+ movq %rdx, %rax
+ # A[0] * B
+ movq %rax, %rdx
+ xorq %r11, %r11
+ mulxq (%rsi), %r9, %r10
+ movq %r9, (%rdi)
+ # A[1] * B
+ mulxq 8(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 8(%rdi)
+ adoxq %r8, %r9
+ # A[2] * B
+ mulxq 16(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 16(%rdi)
+ adoxq %r8, %r10
+ # A[3] * B
+ mulxq 24(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 24(%rdi)
+ adoxq %r8, %r9
+ # A[4] * B
+ mulxq 32(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 32(%rdi)
+ adoxq %r8, %r10
+ # A[5] * B
+ mulxq 40(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 40(%rdi)
+ adoxq %r8, %r9
+ # A[6] * B
+ mulxq 48(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 48(%rdi)
+ adoxq %r8, %r10
+ # A[7] * B
+ mulxq 56(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 56(%rdi)
+ adoxq %r8, %r9
+ # A[8] * B
+ mulxq 64(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 64(%rdi)
+ adoxq %r8, %r10
+ # A[9] * B
+ mulxq 72(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 72(%rdi)
+ adoxq %r8, %r9
+ # A[10] * B
+ mulxq 80(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 80(%rdi)
+ adoxq %r8, %r10
+ # A[11] * B
+ mulxq 88(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 88(%rdi)
+ adoxq %r8, %r9
+ # A[12] * B
+ mulxq 96(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 96(%rdi)
+ adoxq %r8, %r10
+ # A[13] * B
+ mulxq 104(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 104(%rdi)
+ adoxq %r8, %r9
+ # A[14] * B
+ mulxq 112(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 112(%rdi)
+ adoxq %r8, %r10
+ # A[15] * B
+ mulxq 120(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ adoxq %r8, %r9
+ adcxq %r11, %r9
+ movq %r10, 120(%rdi)
+ movq %r9, 128(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_mul_d_avx2_16,.-sp_2048_mul_d_avx2_16
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+#ifndef __APPLE__
+.globl sp_2048_cmp_16
+.type sp_2048_cmp_16,@function
+.align 16
+sp_2048_cmp_16:
+#else
+.globl _sp_2048_cmp_16
+.p2align 4
+_sp_2048_cmp_16:
+#endif /* __APPLE__ */
+ xorq %rcx, %rcx
+ movq $-1, %rdx
+ movq $-1, %rax
+ movq $1, %r8
+ movq 120(%rdi), %r9
+ movq 120(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 112(%rdi), %r9
+ movq 112(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 104(%rdi), %r9
+ movq 104(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 96(%rdi), %r9
+ movq 96(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 88(%rdi), %r9
+ movq 88(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 80(%rdi), %r9
+ movq 80(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 72(%rdi), %r9
+ movq 72(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 64(%rdi), %r9
+ movq 64(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 56(%rdi), %r9
+ movq 56(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 48(%rdi), %r9
+ movq 48(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 40(%rdi), %r9
+ movq 40(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 32(%rdi), %r9
+ movq 32(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 24(%rdi), %r9
+ movq 24(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 16(%rdi), %r9
+ movq 16(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 8(%rdi), %r9
+ movq 8(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq (%rdi), %r9
+ movq (%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ xorq %rdx, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_cmp_16,.-sp_2048_cmp_16
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX2
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef __APPLE__
+.globl sp_2048_mont_reduce_avx2_16
+.type sp_2048_mont_reduce_avx2_16,@function
+.align 16
+sp_2048_mont_reduce_avx2_16:
+#else
+.globl _sp_2048_mont_reduce_avx2_16
+.p2align 4
+_sp_2048_mont_reduce_avx2_16:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ movq %rdx, %r8
+ xorq %r14, %r14
+ # i = 16
+ movq $16, %r9
+ movq (%rdi), %r13
+ addq $64, %rdi
+ xorq %r12, %r12
+L_mont_loop_avx2_16:
+ # mu = a[i] * mp
+ movq %r13, %rdx
+ movq %r13, %r10
+ imulq %r8, %rdx
+ xorq %r12, %r12
+ # a[i+0] += m[0] * mu
+ mulxq (%rsi), %rax, %rcx
+ movq -56(%rdi), %r13
+ adcxq %rax, %r10
+ adoxq %rcx, %r13
+ # a[i+1] += m[1] * mu
+ mulxq 8(%rsi), %rax, %rcx
+ movq -48(%rdi), %r10
+ adcxq %rax, %r13
+ adoxq %rcx, %r10
+ # a[i+2] += m[2] * mu
+ mulxq 16(%rsi), %rax, %rcx
+ movq -40(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -48(%rdi)
+ # a[i+3] += m[3] * mu
+ mulxq 24(%rsi), %rax, %rcx
+ movq -32(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -40(%rdi)
+ # a[i+4] += m[4] * mu
+ mulxq 32(%rsi), %rax, %rcx
+ movq -24(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -32(%rdi)
+ # a[i+5] += m[5] * mu
+ mulxq 40(%rsi), %rax, %rcx
+ movq -16(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -24(%rdi)
+ # a[i+6] += m[6] * mu
+ mulxq 48(%rsi), %rax, %rcx
+ movq -8(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -16(%rdi)
+ # a[i+7] += m[7] * mu
+ mulxq 56(%rsi), %rax, %rcx
+ movq (%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -8(%rdi)
+ # a[i+8] += m[8] * mu
+ mulxq 64(%rsi), %rax, %rcx
+ movq 8(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, (%rdi)
+ # a[i+9] += m[9] * mu
+ mulxq 72(%rsi), %rax, %rcx
+ movq 16(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 8(%rdi)
+ # a[i+10] += m[10] * mu
+ mulxq 80(%rsi), %rax, %rcx
+ movq 24(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 16(%rdi)
+ # a[i+11] += m[11] * mu
+ mulxq 88(%rsi), %rax, %rcx
+ movq 32(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 24(%rdi)
+ # a[i+12] += m[12] * mu
+ mulxq 96(%rsi), %rax, %rcx
+ movq 40(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 32(%rdi)
+ # a[i+13] += m[13] * mu
+ mulxq 104(%rsi), %rax, %rcx
+ movq 48(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 40(%rdi)
+ # a[i+14] += m[14] * mu
+ mulxq 112(%rsi), %rax, %rcx
+ movq 56(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 48(%rdi)
+ # a[i+15] += m[15] * mu
+ mulxq 120(%rsi), %rax, %rcx
+ movq 64(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 56(%rdi)
+ adcxq %r14, %r10
+ movq %r10, 64(%rdi)
+ movq %r12, %r14
+ adoxq %r12, %r14
+ adcxq %r12, %r14
+ # mu = a[i] * mp
+ movq %r13, %rdx
+ movq %r13, %r10
+ imulq %r8, %rdx
+ xorq %r12, %r12
+ # a[i+0] += m[0] * mu
+ mulxq (%rsi), %rax, %rcx
+ movq -48(%rdi), %r13
+ adcxq %rax, %r10
+ adoxq %rcx, %r13
+ # a[i+1] += m[1] * mu
+ mulxq 8(%rsi), %rax, %rcx
+ movq -40(%rdi), %r10
+ adcxq %rax, %r13
+ adoxq %rcx, %r10
+ # a[i+2] += m[2] * mu
+ mulxq 16(%rsi), %rax, %rcx
+ movq -32(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -40(%rdi)
+ # a[i+3] += m[3] * mu
+ mulxq 24(%rsi), %rax, %rcx
+ movq -24(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -32(%rdi)
+ # a[i+4] += m[4] * mu
+ mulxq 32(%rsi), %rax, %rcx
+ movq -16(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -24(%rdi)
+ # a[i+5] += m[5] * mu
+ mulxq 40(%rsi), %rax, %rcx
+ movq -8(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -16(%rdi)
+ # a[i+6] += m[6] * mu
+ mulxq 48(%rsi), %rax, %rcx
+ movq (%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -8(%rdi)
+ # a[i+7] += m[7] * mu
+ mulxq 56(%rsi), %rax, %rcx
+ movq 8(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, (%rdi)
+ # a[i+8] += m[8] * mu
+ mulxq 64(%rsi), %rax, %rcx
+ movq 16(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 8(%rdi)
+ # a[i+9] += m[9] * mu
+ mulxq 72(%rsi), %rax, %rcx
+ movq 24(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 16(%rdi)
+ # a[i+10] += m[10] * mu
+ mulxq 80(%rsi), %rax, %rcx
+ movq 32(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 24(%rdi)
+ # a[i+11] += m[11] * mu
+ mulxq 88(%rsi), %rax, %rcx
+ movq 40(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 32(%rdi)
+ # a[i+12] += m[12] * mu
+ mulxq 96(%rsi), %rax, %rcx
+ movq 48(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 40(%rdi)
+ # a[i+13] += m[13] * mu
+ mulxq 104(%rsi), %rax, %rcx
+ movq 56(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 48(%rdi)
+ # a[i+14] += m[14] * mu
+ mulxq 112(%rsi), %rax, %rcx
+ movq 64(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 56(%rdi)
+ # a[i+15] += m[15] * mu
+ mulxq 120(%rsi), %rax, %rcx
+ movq 72(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 64(%rdi)
+ adcxq %r14, %r10
+ movq %r10, 72(%rdi)
+ movq %r12, %r14
+ adoxq %r12, %r14
+ adcxq %r12, %r14
+ # a += 2
+ addq $16, %rdi
+ # i -= 2
+ subq $2, %r9
+ jnz L_mont_loop_avx2_16
+ subq $64, %rdi
+ negq %r14
+ movq %rdi, %r8
+ subq $128, %rdi
+ movq (%rsi), %rcx
+ movq %r13, %rdx
+ pextq %r14, %rcx, %rcx
+ subq %rcx, %rdx
+ movq 8(%rsi), %rcx
+ movq 8(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, (%rdi)
+ sbbq %rcx, %rax
+ movq 16(%rsi), %rdx
+ movq 16(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 8(%rdi)
+ sbbq %rdx, %rcx
+ movq 24(%rsi), %rax
+ movq 24(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 16(%rdi)
+ sbbq %rax, %rdx
+ movq 32(%rsi), %rcx
+ movq 32(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 24(%rdi)
+ sbbq %rcx, %rax
+ movq 40(%rsi), %rdx
+ movq 40(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 32(%rdi)
+ sbbq %rdx, %rcx
+ movq 48(%rsi), %rax
+ movq 48(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 40(%rdi)
+ sbbq %rax, %rdx
+ movq 56(%rsi), %rcx
+ movq 56(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 48(%rdi)
+ sbbq %rcx, %rax
+ movq 64(%rsi), %rdx
+ movq 64(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 56(%rdi)
+ sbbq %rdx, %rcx
+ movq 72(%rsi), %rax
+ movq 72(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 64(%rdi)
+ sbbq %rax, %rdx
+ movq 80(%rsi), %rcx
+ movq 80(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 72(%rdi)
+ sbbq %rcx, %rax
+ movq 88(%rsi), %rdx
+ movq 88(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 80(%rdi)
+ sbbq %rdx, %rcx
+ movq 96(%rsi), %rax
+ movq 96(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 88(%rdi)
+ sbbq %rax, %rdx
+ movq 104(%rsi), %rcx
+ movq 104(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 96(%rdi)
+ sbbq %rcx, %rax
+ movq 112(%rsi), %rdx
+ movq 112(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 104(%rdi)
+ sbbq %rdx, %rcx
+ movq 120(%rsi), %rax
+ movq 120(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 112(%rdi)
+ sbbq %rax, %rdx
+ movq %rdx, 120(%rdi)
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_mont_reduce_avx2_16,.-sp_2048_mont_reduce_avx2_16
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_2048_cond_sub_32
+.type sp_2048_cond_sub_32,@function
+.align 16
+sp_2048_cond_sub_32:
+#else
+.globl _sp_2048_cond_sub_32
+.p2align 4
+_sp_2048_cond_sub_32:
+#endif /* __APPLE__ */
+ subq $256, %rsp
+ movq $0, %rax
+ movq (%rdx), %r8
+ movq 8(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, (%rsp)
+ movq %r9, 8(%rsp)
+ movq 16(%rdx), %r8
+ movq 24(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 16(%rsp)
+ movq %r9, 24(%rsp)
+ movq 32(%rdx), %r8
+ movq 40(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq 48(%rdx), %r8
+ movq 56(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 48(%rsp)
+ movq %r9, 56(%rsp)
+ movq 64(%rdx), %r8
+ movq 72(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 64(%rsp)
+ movq %r9, 72(%rsp)
+ movq 80(%rdx), %r8
+ movq 88(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 80(%rsp)
+ movq %r9, 88(%rsp)
+ movq 96(%rdx), %r8
+ movq 104(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 96(%rsp)
+ movq %r9, 104(%rsp)
+ movq 112(%rdx), %r8
+ movq 120(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 112(%rsp)
+ movq %r9, 120(%rsp)
+ movq 128(%rdx), %r8
+ movq 136(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 128(%rsp)
+ movq %r9, 136(%rsp)
+ movq 144(%rdx), %r8
+ movq 152(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 144(%rsp)
+ movq %r9, 152(%rsp)
+ movq 160(%rdx), %r8
+ movq 168(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 160(%rsp)
+ movq %r9, 168(%rsp)
+ movq 176(%rdx), %r8
+ movq 184(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 176(%rsp)
+ movq %r9, 184(%rsp)
+ movq 192(%rdx), %r8
+ movq 200(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 192(%rsp)
+ movq %r9, 200(%rsp)
+ movq 208(%rdx), %r8
+ movq 216(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 208(%rsp)
+ movq %r9, 216(%rsp)
+ movq 224(%rdx), %r8
+ movq 232(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 224(%rsp)
+ movq %r9, 232(%rsp)
+ movq 240(%rdx), %r8
+ movq 248(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 240(%rsp)
+ movq %r9, 248(%rsp)
+ movq (%rsi), %r8
+ movq (%rsp), %rdx
+ subq %rdx, %r8
+ movq 8(%rsi), %r9
+ movq 8(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, (%rdi)
+ movq 16(%rsi), %r8
+ movq 16(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 8(%rdi)
+ movq 24(%rsi), %r9
+ movq 24(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 16(%rdi)
+ movq 32(%rsi), %r8
+ movq 32(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 24(%rdi)
+ movq 40(%rsi), %r9
+ movq 40(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 32(%rdi)
+ movq 48(%rsi), %r8
+ movq 48(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 40(%rdi)
+ movq 56(%rsi), %r9
+ movq 56(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 48(%rdi)
+ movq 64(%rsi), %r8
+ movq 64(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 56(%rdi)
+ movq 72(%rsi), %r9
+ movq 72(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 64(%rdi)
+ movq 80(%rsi), %r8
+ movq 80(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 72(%rdi)
+ movq 88(%rsi), %r9
+ movq 88(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 80(%rdi)
+ movq 96(%rsi), %r8
+ movq 96(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 88(%rdi)
+ movq 104(%rsi), %r9
+ movq 104(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 96(%rdi)
+ movq 112(%rsi), %r8
+ movq 112(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 104(%rdi)
+ movq 120(%rsi), %r9
+ movq 120(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 112(%rdi)
+ movq 128(%rsi), %r8
+ movq 128(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 120(%rdi)
+ movq 136(%rsi), %r9
+ movq 136(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 128(%rdi)
+ movq 144(%rsi), %r8
+ movq 144(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 136(%rdi)
+ movq 152(%rsi), %r9
+ movq 152(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 144(%rdi)
+ movq 160(%rsi), %r8
+ movq 160(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 152(%rdi)
+ movq 168(%rsi), %r9
+ movq 168(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 160(%rdi)
+ movq 176(%rsi), %r8
+ movq 176(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 168(%rdi)
+ movq 184(%rsi), %r9
+ movq 184(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 176(%rdi)
+ movq 192(%rsi), %r8
+ movq 192(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 184(%rdi)
+ movq 200(%rsi), %r9
+ movq 200(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 192(%rdi)
+ movq 208(%rsi), %r8
+ movq 208(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 200(%rdi)
+ movq 216(%rsi), %r9
+ movq 216(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 208(%rdi)
+ movq 224(%rsi), %r8
+ movq 224(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 216(%rdi)
+ movq 232(%rsi), %r9
+ movq 232(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 224(%rdi)
+ movq 240(%rsi), %r8
+ movq 240(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 232(%rdi)
+ movq 248(%rsi), %r9
+ movq 248(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 240(%rdi)
+ movq %r9, 248(%rdi)
+ sbbq $0, %rax
+ addq $256, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_cond_sub_32,.-sp_2048_cond_sub_32
+#endif /* __APPLE__ */
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef __APPLE__
+.globl sp_2048_mont_reduce_32
+.type sp_2048_mont_reduce_32,@function
+.align 16
+sp_2048_mont_reduce_32:
+#else
+.globl _sp_2048_mont_reduce_32
+.p2align 4
+_sp_2048_mont_reduce_32:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ movq %rdx, %rcx
+ xorq %r15, %r15
+ # i = 32
+ movq $32, %r8
+ movq (%rdi), %r13
+ movq 8(%rdi), %r14
+L_mont_loop_32:
+ # mu = a[i] * mp
+ movq %r13, %r11
+ imulq %rcx, %r11
+ # a[i+0] += m[0] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq (%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r10
+ # a[i+1] += m[1] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 8(%rsi)
+ movq %r14, %r13
+ addq %rax, %r13
+ adcq %rdx, %r9
+ addq %r10, %r13
+ adcq $0, %r9
+ # a[i+2] += m[2] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 16(%rsi)
+ movq 16(%rdi), %r14
+ addq %rax, %r14
+ adcq %rdx, %r10
+ addq %r9, %r14
+ adcq $0, %r10
+ # a[i+3] += m[3] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 24(%rsi)
+ movq 24(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 24(%rdi)
+ adcq $0, %r9
+ # a[i+4] += m[4] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 32(%rsi)
+ movq 32(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 32(%rdi)
+ adcq $0, %r10
+ # a[i+5] += m[5] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 40(%rsi)
+ movq 40(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 40(%rdi)
+ adcq $0, %r9
+ # a[i+6] += m[6] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 48(%rsi)
+ movq 48(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 48(%rdi)
+ adcq $0, %r10
+ # a[i+7] += m[7] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 56(%rsi)
+ movq 56(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 56(%rdi)
+ adcq $0, %r9
+ # a[i+8] += m[8] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 64(%rsi)
+ movq 64(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 64(%rdi)
+ adcq $0, %r10
+ # a[i+9] += m[9] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 72(%rsi)
+ movq 72(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 72(%rdi)
+ adcq $0, %r9
+ # a[i+10] += m[10] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 80(%rsi)
+ movq 80(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 80(%rdi)
+ adcq $0, %r10
+ # a[i+11] += m[11] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 88(%rsi)
+ movq 88(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 88(%rdi)
+ adcq $0, %r9
+ # a[i+12] += m[12] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 96(%rsi)
+ movq 96(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 96(%rdi)
+ adcq $0, %r10
+ # a[i+13] += m[13] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 104(%rsi)
+ movq 104(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 104(%rdi)
+ adcq $0, %r9
+ # a[i+14] += m[14] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 112(%rsi)
+ movq 112(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 112(%rdi)
+ adcq $0, %r10
+ # a[i+15] += m[15] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 120(%rsi)
+ movq 120(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 120(%rdi)
+ adcq $0, %r9
+ # a[i+16] += m[16] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 128(%rsi)
+ movq 128(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 128(%rdi)
+ adcq $0, %r10
+ # a[i+17] += m[17] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 136(%rsi)
+ movq 136(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 136(%rdi)
+ adcq $0, %r9
+ # a[i+18] += m[18] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 144(%rsi)
+ movq 144(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 144(%rdi)
+ adcq $0, %r10
+ # a[i+19] += m[19] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 152(%rsi)
+ movq 152(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 152(%rdi)
+ adcq $0, %r9
+ # a[i+20] += m[20] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 160(%rsi)
+ movq 160(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 160(%rdi)
+ adcq $0, %r10
+ # a[i+21] += m[21] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 168(%rsi)
+ movq 168(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 168(%rdi)
+ adcq $0, %r9
+ # a[i+22] += m[22] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 176(%rsi)
+ movq 176(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 176(%rdi)
+ adcq $0, %r10
+ # a[i+23] += m[23] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 184(%rsi)
+ movq 184(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 184(%rdi)
+ adcq $0, %r9
+ # a[i+24] += m[24] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 192(%rsi)
+ movq 192(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 192(%rdi)
+ adcq $0, %r10
+ # a[i+25] += m[25] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 200(%rsi)
+ movq 200(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 200(%rdi)
+ adcq $0, %r9
+ # a[i+26] += m[26] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 208(%rsi)
+ movq 208(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 208(%rdi)
+ adcq $0, %r10
+ # a[i+27] += m[27] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 216(%rsi)
+ movq 216(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 216(%rdi)
+ adcq $0, %r9
+ # a[i+28] += m[28] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 224(%rsi)
+ movq 224(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 224(%rdi)
+ adcq $0, %r10
+ # a[i+29] += m[29] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 232(%rsi)
+ movq 232(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 232(%rdi)
+ adcq $0, %r9
+ # a[i+30] += m[30] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 240(%rsi)
+ movq 240(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 240(%rdi)
+ adcq $0, %r10
+ # a[i+31] += m[31] * mu
+ movq %r11, %rax
+ mulq 248(%rsi)
+ movq 248(%rdi), %r12
+ addq %rax, %r10
+ adcq %r15, %rdx
+ movq $0, %r15
+ adcq $0, %r15
+ addq %r10, %r12
+ movq %r12, 248(%rdi)
+ adcq %rdx, 256(%rdi)
+ adcq $0, %r15
+ # i -= 1
+ addq $8, %rdi
+ decq %r8
+ jnz L_mont_loop_32
+ movq %r13, (%rdi)
+ movq %r14, 8(%rdi)
+ negq %r15
+ movq %r15, %rcx
+ movq %rsi, %rdx
+ movq %rdi, %rsi
+ subq $256, %rdi
+#ifndef __APPLE__
+ callq sp_2048_cond_sub_32@plt
+#else
+ callq _sp_2048_cond_sub_32
+#endif /* __APPLE__ */
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_mont_reduce_32,.-sp_2048_mont_reduce_32
+#endif /* __APPLE__ */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_2048_cond_sub_avx2_32
+.type sp_2048_cond_sub_avx2_32,@function
+.align 16
+sp_2048_cond_sub_avx2_32:
+#else
+.globl _sp_2048_cond_sub_avx2_32
+.p2align 4
+_sp_2048_cond_sub_avx2_32:
+#endif /* __APPLE__ */
+ movq $0, %rax
+ movq (%rdx), %r10
+ movq (%rsi), %r8
+ pextq %rcx, %r10, %r10
+ subq %r10, %r8
+ movq 8(%rdx), %r10
+ movq 8(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, (%rdi)
+ sbbq %r10, %r9
+ movq 16(%rdx), %r8
+ movq 16(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 8(%rdi)
+ sbbq %r8, %r10
+ movq 24(%rdx), %r9
+ movq 24(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 16(%rdi)
+ sbbq %r9, %r8
+ movq 32(%rdx), %r10
+ movq 32(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 24(%rdi)
+ sbbq %r10, %r9
+ movq 40(%rdx), %r8
+ movq 40(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 32(%rdi)
+ sbbq %r8, %r10
+ movq 48(%rdx), %r9
+ movq 48(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 40(%rdi)
+ sbbq %r9, %r8
+ movq 56(%rdx), %r10
+ movq 56(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 48(%rdi)
+ sbbq %r10, %r9
+ movq 64(%rdx), %r8
+ movq 64(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 56(%rdi)
+ sbbq %r8, %r10
+ movq 72(%rdx), %r9
+ movq 72(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 64(%rdi)
+ sbbq %r9, %r8
+ movq 80(%rdx), %r10
+ movq 80(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 72(%rdi)
+ sbbq %r10, %r9
+ movq 88(%rdx), %r8
+ movq 88(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 80(%rdi)
+ sbbq %r8, %r10
+ movq 96(%rdx), %r9
+ movq 96(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 88(%rdi)
+ sbbq %r9, %r8
+ movq 104(%rdx), %r10
+ movq 104(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 96(%rdi)
+ sbbq %r10, %r9
+ movq 112(%rdx), %r8
+ movq 112(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 104(%rdi)
+ sbbq %r8, %r10
+ movq 120(%rdx), %r9
+ movq 120(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 112(%rdi)
+ sbbq %r9, %r8
+ movq 128(%rdx), %r10
+ movq 128(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 120(%rdi)
+ sbbq %r10, %r9
+ movq 136(%rdx), %r8
+ movq 136(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 128(%rdi)
+ sbbq %r8, %r10
+ movq 144(%rdx), %r9
+ movq 144(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 136(%rdi)
+ sbbq %r9, %r8
+ movq 152(%rdx), %r10
+ movq 152(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 144(%rdi)
+ sbbq %r10, %r9
+ movq 160(%rdx), %r8
+ movq 160(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 152(%rdi)
+ sbbq %r8, %r10
+ movq 168(%rdx), %r9
+ movq 168(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 160(%rdi)
+ sbbq %r9, %r8
+ movq 176(%rdx), %r10
+ movq 176(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 168(%rdi)
+ sbbq %r10, %r9
+ movq 184(%rdx), %r8
+ movq 184(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 176(%rdi)
+ sbbq %r8, %r10
+ movq 192(%rdx), %r9
+ movq 192(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 184(%rdi)
+ sbbq %r9, %r8
+ movq 200(%rdx), %r10
+ movq 200(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 192(%rdi)
+ sbbq %r10, %r9
+ movq 208(%rdx), %r8
+ movq 208(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 200(%rdi)
+ sbbq %r8, %r10
+ movq 216(%rdx), %r9
+ movq 216(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 208(%rdi)
+ sbbq %r9, %r8
+ movq 224(%rdx), %r10
+ movq 224(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 216(%rdi)
+ sbbq %r10, %r9
+ movq 232(%rdx), %r8
+ movq 232(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 224(%rdi)
+ sbbq %r8, %r10
+ movq 240(%rdx), %r9
+ movq 240(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 232(%rdi)
+ sbbq %r9, %r8
+ movq 248(%rdx), %r10
+ movq 248(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 240(%rdi)
+ sbbq %r10, %r9
+ movq %r9, 248(%rdi)
+ sbbq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_cond_sub_avx2_32,.-sp_2048_cond_sub_avx2_32
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX2
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+#ifndef __APPLE__
+.globl sp_2048_mul_d_avx2_32
+.type sp_2048_mul_d_avx2_32,@function
+.align 16
+sp_2048_mul_d_avx2_32:
+#else
+.globl _sp_2048_mul_d_avx2_32
+.p2align 4
+_sp_2048_mul_d_avx2_32:
+#endif /* __APPLE__ */
+ movq %rdx, %rax
+ # A[0] * B
+ movq %rax, %rdx
+ xorq %r11, %r11
+ mulxq (%rsi), %r9, %r10
+ movq %r9, (%rdi)
+ # A[1] * B
+ mulxq 8(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 8(%rdi)
+ adoxq %r8, %r9
+ # A[2] * B
+ mulxq 16(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 16(%rdi)
+ adoxq %r8, %r10
+ # A[3] * B
+ mulxq 24(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 24(%rdi)
+ adoxq %r8, %r9
+ # A[4] * B
+ mulxq 32(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 32(%rdi)
+ adoxq %r8, %r10
+ # A[5] * B
+ mulxq 40(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 40(%rdi)
+ adoxq %r8, %r9
+ # A[6] * B
+ mulxq 48(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 48(%rdi)
+ adoxq %r8, %r10
+ # A[7] * B
+ mulxq 56(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 56(%rdi)
+ adoxq %r8, %r9
+ # A[8] * B
+ mulxq 64(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 64(%rdi)
+ adoxq %r8, %r10
+ # A[9] * B
+ mulxq 72(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 72(%rdi)
+ adoxq %r8, %r9
+ # A[10] * B
+ mulxq 80(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 80(%rdi)
+ adoxq %r8, %r10
+ # A[11] * B
+ mulxq 88(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 88(%rdi)
+ adoxq %r8, %r9
+ # A[12] * B
+ mulxq 96(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 96(%rdi)
+ adoxq %r8, %r10
+ # A[13] * B
+ mulxq 104(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 104(%rdi)
+ adoxq %r8, %r9
+ # A[14] * B
+ mulxq 112(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 112(%rdi)
+ adoxq %r8, %r10
+ # A[15] * B
+ mulxq 120(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 120(%rdi)
+ adoxq %r8, %r9
+ # A[16] * B
+ mulxq 128(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 128(%rdi)
+ adoxq %r8, %r10
+ # A[17] * B
+ mulxq 136(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 136(%rdi)
+ adoxq %r8, %r9
+ # A[18] * B
+ mulxq 144(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 144(%rdi)
+ adoxq %r8, %r10
+ # A[19] * B
+ mulxq 152(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 152(%rdi)
+ adoxq %r8, %r9
+ # A[20] * B
+ mulxq 160(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 160(%rdi)
+ adoxq %r8, %r10
+ # A[21] * B
+ mulxq 168(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 168(%rdi)
+ adoxq %r8, %r9
+ # A[22] * B
+ mulxq 176(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 176(%rdi)
+ adoxq %r8, %r10
+ # A[23] * B
+ mulxq 184(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 184(%rdi)
+ adoxq %r8, %r9
+ # A[24] * B
+ mulxq 192(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 192(%rdi)
+ adoxq %r8, %r10
+ # A[25] * B
+ mulxq 200(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 200(%rdi)
+ adoxq %r8, %r9
+ # A[26] * B
+ mulxq 208(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 208(%rdi)
+ adoxq %r8, %r10
+ # A[27] * B
+ mulxq 216(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 216(%rdi)
+ adoxq %r8, %r9
+ # A[28] * B
+ mulxq 224(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 224(%rdi)
+ adoxq %r8, %r10
+ # A[29] * B
+ mulxq 232(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 232(%rdi)
+ adoxq %r8, %r9
+ # A[30] * B
+ mulxq 240(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 240(%rdi)
+ adoxq %r8, %r10
+ # A[31] * B
+ mulxq 248(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ adoxq %r8, %r9
+ adcxq %r11, %r9
+ movq %r10, 248(%rdi)
+ movq %r9, 256(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_mul_d_avx2_32,.-sp_2048_mul_d_avx2_32
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+#ifndef __APPLE__
+.globl sp_2048_cmp_32
+.type sp_2048_cmp_32,@function
+.align 16
+sp_2048_cmp_32:
+#else
+.globl _sp_2048_cmp_32
+.p2align 4
+_sp_2048_cmp_32:
+#endif /* __APPLE__ */
+ xorq %rcx, %rcx
+ movq $-1, %rdx
+ movq $-1, %rax
+ movq $1, %r8
+ movq 248(%rdi), %r9
+ movq 248(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 240(%rdi), %r9
+ movq 240(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 232(%rdi), %r9
+ movq 232(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 224(%rdi), %r9
+ movq 224(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 216(%rdi), %r9
+ movq 216(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 208(%rdi), %r9
+ movq 208(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 200(%rdi), %r9
+ movq 200(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 192(%rdi), %r9
+ movq 192(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 184(%rdi), %r9
+ movq 184(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 176(%rdi), %r9
+ movq 176(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 168(%rdi), %r9
+ movq 168(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 160(%rdi), %r9
+ movq 160(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 152(%rdi), %r9
+ movq 152(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 144(%rdi), %r9
+ movq 144(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 136(%rdi), %r9
+ movq 136(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 128(%rdi), %r9
+ movq 128(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 120(%rdi), %r9
+ movq 120(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 112(%rdi), %r9
+ movq 112(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 104(%rdi), %r9
+ movq 104(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 96(%rdi), %r9
+ movq 96(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 88(%rdi), %r9
+ movq 88(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 80(%rdi), %r9
+ movq 80(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 72(%rdi), %r9
+ movq 72(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 64(%rdi), %r9
+ movq 64(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 56(%rdi), %r9
+ movq 56(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 48(%rdi), %r9
+ movq 48(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 40(%rdi), %r9
+ movq 40(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 32(%rdi), %r9
+ movq 32(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 24(%rdi), %r9
+ movq 24(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 16(%rdi), %r9
+ movq 16(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 8(%rdi), %r9
+ movq 8(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq (%rdi), %r9
+ movq (%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ xorq %rdx, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_cmp_32,.-sp_2048_cmp_32
+#endif /* __APPLE__ */
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_2048_sub_32
+.type sp_2048_sub_32,@function
+.align 16
+sp_2048_sub_32:
+#else
+.globl _sp_2048_sub_32
+.p2align 4
+_sp_2048_sub_32:
+#endif /* __APPLE__ */
+ movq (%rsi), %rcx
+ xorq %rax, %rax
+ subq (%rdx), %rcx
+ movq 8(%rsi), %r8
+ movq %rcx, (%rdi)
+ sbbq 8(%rdx), %r8
+ movq 16(%rsi), %rcx
+ movq %r8, 8(%rdi)
+ sbbq 16(%rdx), %rcx
+ movq 24(%rsi), %r8
+ movq %rcx, 16(%rdi)
+ sbbq 24(%rdx), %r8
+ movq 32(%rsi), %rcx
+ movq %r8, 24(%rdi)
+ sbbq 32(%rdx), %rcx
+ movq 40(%rsi), %r8
+ movq %rcx, 32(%rdi)
+ sbbq 40(%rdx), %r8
+ movq 48(%rsi), %rcx
+ movq %r8, 40(%rdi)
+ sbbq 48(%rdx), %rcx
+ movq 56(%rsi), %r8
+ movq %rcx, 48(%rdi)
+ sbbq 56(%rdx), %r8
+ movq 64(%rsi), %rcx
+ movq %r8, 56(%rdi)
+ sbbq 64(%rdx), %rcx
+ movq 72(%rsi), %r8
+ movq %rcx, 64(%rdi)
+ sbbq 72(%rdx), %r8
+ movq 80(%rsi), %rcx
+ movq %r8, 72(%rdi)
+ sbbq 80(%rdx), %rcx
+ movq 88(%rsi), %r8
+ movq %rcx, 80(%rdi)
+ sbbq 88(%rdx), %r8
+ movq 96(%rsi), %rcx
+ movq %r8, 88(%rdi)
+ sbbq 96(%rdx), %rcx
+ movq 104(%rsi), %r8
+ movq %rcx, 96(%rdi)
+ sbbq 104(%rdx), %r8
+ movq 112(%rsi), %rcx
+ movq %r8, 104(%rdi)
+ sbbq 112(%rdx), %rcx
+ movq 120(%rsi), %r8
+ movq %rcx, 112(%rdi)
+ sbbq 120(%rdx), %r8
+ movq 128(%rsi), %rcx
+ movq %r8, 120(%rdi)
+ sbbq 128(%rdx), %rcx
+ movq 136(%rsi), %r8
+ movq %rcx, 128(%rdi)
+ sbbq 136(%rdx), %r8
+ movq 144(%rsi), %rcx
+ movq %r8, 136(%rdi)
+ sbbq 144(%rdx), %rcx
+ movq 152(%rsi), %r8
+ movq %rcx, 144(%rdi)
+ sbbq 152(%rdx), %r8
+ movq 160(%rsi), %rcx
+ movq %r8, 152(%rdi)
+ sbbq 160(%rdx), %rcx
+ movq 168(%rsi), %r8
+ movq %rcx, 160(%rdi)
+ sbbq 168(%rdx), %r8
+ movq 176(%rsi), %rcx
+ movq %r8, 168(%rdi)
+ sbbq 176(%rdx), %rcx
+ movq 184(%rsi), %r8
+ movq %rcx, 176(%rdi)
+ sbbq 184(%rdx), %r8
+ movq 192(%rsi), %rcx
+ movq %r8, 184(%rdi)
+ sbbq 192(%rdx), %rcx
+ movq 200(%rsi), %r8
+ movq %rcx, 192(%rdi)
+ sbbq 200(%rdx), %r8
+ movq 208(%rsi), %rcx
+ movq %r8, 200(%rdi)
+ sbbq 208(%rdx), %rcx
+ movq 216(%rsi), %r8
+ movq %rcx, 208(%rdi)
+ sbbq 216(%rdx), %r8
+ movq 224(%rsi), %rcx
+ movq %r8, 216(%rdi)
+ sbbq 224(%rdx), %rcx
+ movq 232(%rsi), %r8
+ movq %rcx, 224(%rdi)
+ sbbq 232(%rdx), %r8
+ movq 240(%rsi), %rcx
+ movq %r8, 232(%rdi)
+ sbbq 240(%rdx), %rcx
+ movq 248(%rsi), %r8
+ movq %rcx, 240(%rdi)
+ sbbq 248(%rdx), %r8
+ movq %r8, 248(%rdi)
+ sbbq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_sub_32,.-sp_2048_sub_32
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX2
+/* Reduce the number back to 2048 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef __APPLE__
+.globl sp_2048_mont_reduce_avx2_32
+.type sp_2048_mont_reduce_avx2_32,@function
+.align 16
+sp_2048_mont_reduce_avx2_32:
+#else
+.globl _sp_2048_mont_reduce_avx2_32
+.p2align 4
+_sp_2048_mont_reduce_avx2_32:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ movq %rdx, %r8
+ xorq %r14, %r14
+ # i = 32
+ movq $32, %r9
+ movq (%rdi), %r13
+ addq $128, %rdi
+ xorq %r12, %r12
+L_mont_loop_avx2_32:
+ # mu = a[i] * mp
+ movq %r13, %rdx
+ movq %r13, %r10
+ imulq %r8, %rdx
+ xorq %r12, %r12
+ # a[i+0] += m[0] * mu
+ mulxq (%rsi), %rax, %rcx
+ movq -120(%rdi), %r13
+ adcxq %rax, %r10
+ adoxq %rcx, %r13
+ # a[i+1] += m[1] * mu
+ mulxq 8(%rsi), %rax, %rcx
+ movq -112(%rdi), %r10
+ adcxq %rax, %r13
+ adoxq %rcx, %r10
+ # a[i+2] += m[2] * mu
+ mulxq 16(%rsi), %rax, %rcx
+ movq -104(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -112(%rdi)
+ # a[i+3] += m[3] * mu
+ mulxq 24(%rsi), %rax, %rcx
+ movq -96(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -104(%rdi)
+ # a[i+4] += m[4] * mu
+ mulxq 32(%rsi), %rax, %rcx
+ movq -88(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -96(%rdi)
+ # a[i+5] += m[5] * mu
+ mulxq 40(%rsi), %rax, %rcx
+ movq -80(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -88(%rdi)
+ # a[i+6] += m[6] * mu
+ mulxq 48(%rsi), %rax, %rcx
+ movq -72(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -80(%rdi)
+ # a[i+7] += m[7] * mu
+ mulxq 56(%rsi), %rax, %rcx
+ movq -64(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -72(%rdi)
+ # a[i+8] += m[8] * mu
+ mulxq 64(%rsi), %rax, %rcx
+ movq -56(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -64(%rdi)
+ # a[i+9] += m[9] * mu
+ mulxq 72(%rsi), %rax, %rcx
+ movq -48(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -56(%rdi)
+ # a[i+10] += m[10] * mu
+ mulxq 80(%rsi), %rax, %rcx
+ movq -40(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -48(%rdi)
+ # a[i+11] += m[11] * mu
+ mulxq 88(%rsi), %rax, %rcx
+ movq -32(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -40(%rdi)
+ # a[i+12] += m[12] * mu
+ mulxq 96(%rsi), %rax, %rcx
+ movq -24(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -32(%rdi)
+ # a[i+13] += m[13] * mu
+ mulxq 104(%rsi), %rax, %rcx
+ movq -16(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -24(%rdi)
+ # a[i+14] += m[14] * mu
+ mulxq 112(%rsi), %rax, %rcx
+ movq -8(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -16(%rdi)
+ # a[i+15] += m[15] * mu
+ mulxq 120(%rsi), %rax, %rcx
+ movq (%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -8(%rdi)
+ # a[i+16] += m[16] * mu
+ mulxq 128(%rsi), %rax, %rcx
+ movq 8(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, (%rdi)
+ # a[i+17] += m[17] * mu
+ mulxq 136(%rsi), %rax, %rcx
+ movq 16(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 8(%rdi)
+ # a[i+18] += m[18] * mu
+ mulxq 144(%rsi), %rax, %rcx
+ movq 24(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 16(%rdi)
+ # a[i+19] += m[19] * mu
+ mulxq 152(%rsi), %rax, %rcx
+ movq 32(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 24(%rdi)
+ # a[i+20] += m[20] * mu
+ mulxq 160(%rsi), %rax, %rcx
+ movq 40(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 32(%rdi)
+ # a[i+21] += m[21] * mu
+ mulxq 168(%rsi), %rax, %rcx
+ movq 48(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 40(%rdi)
+ # a[i+22] += m[22] * mu
+ mulxq 176(%rsi), %rax, %rcx
+ movq 56(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 48(%rdi)
+ # a[i+23] += m[23] * mu
+ mulxq 184(%rsi), %rax, %rcx
+ movq 64(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 56(%rdi)
+ # a[i+24] += m[24] * mu
+ mulxq 192(%rsi), %rax, %rcx
+ movq 72(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 64(%rdi)
+ # a[i+25] += m[25] * mu
+ mulxq 200(%rsi), %rax, %rcx
+ movq 80(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 72(%rdi)
+ # a[i+26] += m[26] * mu
+ mulxq 208(%rsi), %rax, %rcx
+ movq 88(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 80(%rdi)
+ # a[i+27] += m[27] * mu
+ mulxq 216(%rsi), %rax, %rcx
+ movq 96(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 88(%rdi)
+ # a[i+28] += m[28] * mu
+ mulxq 224(%rsi), %rax, %rcx
+ movq 104(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 96(%rdi)
+ # a[i+29] += m[29] * mu
+ mulxq 232(%rsi), %rax, %rcx
+ movq 112(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 104(%rdi)
+ # a[i+30] += m[30] * mu
+ mulxq 240(%rsi), %rax, %rcx
+ movq 120(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 112(%rdi)
+ # a[i+31] += m[31] * mu
+ mulxq 248(%rsi), %rax, %rcx
+ movq 128(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 120(%rdi)
+ adcxq %r14, %r10
+ movq %r10, 128(%rdi)
+ movq %r12, %r14
+ adoxq %r12, %r14
+ adcxq %r12, %r14
+ # a += 1
+ addq $8, %rdi
+ # i -= 1
+ subq $1, %r9
+ jnz L_mont_loop_avx2_32
+ subq $128, %rdi
+ negq %r14
+ movq %rdi, %r8
+ subq $256, %rdi
+ movq (%rsi), %rcx
+ movq %r13, %rdx
+ pextq %r14, %rcx, %rcx
+ subq %rcx, %rdx
+ movq 8(%rsi), %rcx
+ movq 8(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, (%rdi)
+ sbbq %rcx, %rax
+ movq 16(%rsi), %rdx
+ movq 16(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 8(%rdi)
+ sbbq %rdx, %rcx
+ movq 24(%rsi), %rax
+ movq 24(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 16(%rdi)
+ sbbq %rax, %rdx
+ movq 32(%rsi), %rcx
+ movq 32(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 24(%rdi)
+ sbbq %rcx, %rax
+ movq 40(%rsi), %rdx
+ movq 40(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 32(%rdi)
+ sbbq %rdx, %rcx
+ movq 48(%rsi), %rax
+ movq 48(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 40(%rdi)
+ sbbq %rax, %rdx
+ movq 56(%rsi), %rcx
+ movq 56(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 48(%rdi)
+ sbbq %rcx, %rax
+ movq 64(%rsi), %rdx
+ movq 64(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 56(%rdi)
+ sbbq %rdx, %rcx
+ movq 72(%rsi), %rax
+ movq 72(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 64(%rdi)
+ sbbq %rax, %rdx
+ movq 80(%rsi), %rcx
+ movq 80(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 72(%rdi)
+ sbbq %rcx, %rax
+ movq 88(%rsi), %rdx
+ movq 88(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 80(%rdi)
+ sbbq %rdx, %rcx
+ movq 96(%rsi), %rax
+ movq 96(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 88(%rdi)
+ sbbq %rax, %rdx
+ movq 104(%rsi), %rcx
+ movq 104(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 96(%rdi)
+ sbbq %rcx, %rax
+ movq 112(%rsi), %rdx
+ movq 112(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 104(%rdi)
+ sbbq %rdx, %rcx
+ movq 120(%rsi), %rax
+ movq 120(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 112(%rdi)
+ sbbq %rax, %rdx
+ movq 128(%rsi), %rcx
+ movq 128(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 120(%rdi)
+ sbbq %rcx, %rax
+ movq 136(%rsi), %rdx
+ movq 136(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 128(%rdi)
+ sbbq %rdx, %rcx
+ movq 144(%rsi), %rax
+ movq 144(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 136(%rdi)
+ sbbq %rax, %rdx
+ movq 152(%rsi), %rcx
+ movq 152(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 144(%rdi)
+ sbbq %rcx, %rax
+ movq 160(%rsi), %rdx
+ movq 160(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 152(%rdi)
+ sbbq %rdx, %rcx
+ movq 168(%rsi), %rax
+ movq 168(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 160(%rdi)
+ sbbq %rax, %rdx
+ movq 176(%rsi), %rcx
+ movq 176(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 168(%rdi)
+ sbbq %rcx, %rax
+ movq 184(%rsi), %rdx
+ movq 184(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 176(%rdi)
+ sbbq %rdx, %rcx
+ movq 192(%rsi), %rax
+ movq 192(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 184(%rdi)
+ sbbq %rax, %rdx
+ movq 200(%rsi), %rcx
+ movq 200(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 192(%rdi)
+ sbbq %rcx, %rax
+ movq 208(%rsi), %rdx
+ movq 208(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 200(%rdi)
+ sbbq %rdx, %rcx
+ movq 216(%rsi), %rax
+ movq 216(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 208(%rdi)
+ sbbq %rax, %rdx
+ movq 224(%rsi), %rcx
+ movq 224(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 216(%rdi)
+ sbbq %rcx, %rax
+ movq 232(%rsi), %rdx
+ movq 232(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 224(%rdi)
+ sbbq %rdx, %rcx
+ movq 240(%rsi), %rax
+ movq 240(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 232(%rdi)
+ sbbq %rax, %rdx
+ movq 248(%rsi), %rcx
+ movq 248(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 240(%rdi)
+ sbbq %rcx, %rax
+ movq %rax, 248(%rdi)
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_mont_reduce_avx2_32,.-sp_2048_mont_reduce_avx2_32
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_2048_cond_add_16
+.type sp_2048_cond_add_16,@function
+.align 16
+sp_2048_cond_add_16:
+#else
+.globl _sp_2048_cond_add_16
+.p2align 4
+_sp_2048_cond_add_16:
+#endif /* __APPLE__ */
+ subq $128, %rsp
+ movq $0, %rax
+ movq (%rdx), %r8
+ movq 8(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, (%rsp)
+ movq %r9, 8(%rsp)
+ movq 16(%rdx), %r8
+ movq 24(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 16(%rsp)
+ movq %r9, 24(%rsp)
+ movq 32(%rdx), %r8
+ movq 40(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq 48(%rdx), %r8
+ movq 56(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 48(%rsp)
+ movq %r9, 56(%rsp)
+ movq 64(%rdx), %r8
+ movq 72(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 64(%rsp)
+ movq %r9, 72(%rsp)
+ movq 80(%rdx), %r8
+ movq 88(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 80(%rsp)
+ movq %r9, 88(%rsp)
+ movq 96(%rdx), %r8
+ movq 104(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 96(%rsp)
+ movq %r9, 104(%rsp)
+ movq 112(%rdx), %r8
+ movq 120(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 112(%rsp)
+ movq %r9, 120(%rsp)
+ movq (%rsi), %r8
+ movq (%rsp), %rdx
+ addq %rdx, %r8
+ movq 8(%rsi), %r9
+ movq 8(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, (%rdi)
+ movq 16(%rsi), %r8
+ movq 16(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 8(%rdi)
+ movq 24(%rsi), %r9
+ movq 24(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 16(%rdi)
+ movq 32(%rsi), %r8
+ movq 32(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 24(%rdi)
+ movq 40(%rsi), %r9
+ movq 40(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 32(%rdi)
+ movq 48(%rsi), %r8
+ movq 48(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 40(%rdi)
+ movq 56(%rsi), %r9
+ movq 56(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 48(%rdi)
+ movq 64(%rsi), %r8
+ movq 64(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 56(%rdi)
+ movq 72(%rsi), %r9
+ movq 72(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 64(%rdi)
+ movq 80(%rsi), %r8
+ movq 80(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 72(%rdi)
+ movq 88(%rsi), %r9
+ movq 88(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 80(%rdi)
+ movq 96(%rsi), %r8
+ movq 96(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 88(%rdi)
+ movq 104(%rsi), %r9
+ movq 104(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 96(%rdi)
+ movq 112(%rsi), %r8
+ movq 112(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 104(%rdi)
+ movq 120(%rsi), %r9
+ movq 120(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 112(%rdi)
+ movq %r9, 120(%rdi)
+ adcq $0, %rax
+ addq $128, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_cond_add_16,.-sp_2048_cond_add_16
+#endif /* __APPLE__ */
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_2048_cond_add_avx2_16
+.type sp_2048_cond_add_avx2_16,@function
+.align 16
+sp_2048_cond_add_avx2_16:
+#else
+.globl _sp_2048_cond_add_avx2_16
+.p2align 4
+_sp_2048_cond_add_avx2_16:
+#endif /* __APPLE__ */
+ movq $0, %rax
+ movq (%rdx), %r10
+ movq (%rsi), %r8
+ pextq %rcx, %r10, %r10
+ addq %r10, %r8
+ movq 8(%rdx), %r10
+ movq 8(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, (%rdi)
+ adcq %r10, %r9
+ movq 16(%rdx), %r8
+ movq 16(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 8(%rdi)
+ adcq %r8, %r10
+ movq 24(%rdx), %r9
+ movq 24(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 16(%rdi)
+ adcq %r9, %r8
+ movq 32(%rdx), %r10
+ movq 32(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 24(%rdi)
+ adcq %r10, %r9
+ movq 40(%rdx), %r8
+ movq 40(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 32(%rdi)
+ adcq %r8, %r10
+ movq 48(%rdx), %r9
+ movq 48(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 40(%rdi)
+ adcq %r9, %r8
+ movq 56(%rdx), %r10
+ movq 56(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 48(%rdi)
+ adcq %r10, %r9
+ movq 64(%rdx), %r8
+ movq 64(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 56(%rdi)
+ adcq %r8, %r10
+ movq 72(%rdx), %r9
+ movq 72(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 64(%rdi)
+ adcq %r9, %r8
+ movq 80(%rdx), %r10
+ movq 80(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 72(%rdi)
+ adcq %r10, %r9
+ movq 88(%rdx), %r8
+ movq 88(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 80(%rdi)
+ adcq %r8, %r10
+ movq 96(%rdx), %r9
+ movq 96(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 88(%rdi)
+ adcq %r9, %r8
+ movq 104(%rdx), %r10
+ movq 104(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 96(%rdi)
+ adcq %r10, %r9
+ movq 112(%rdx), %r8
+ movq 112(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 104(%rdi)
+ adcq %r8, %r10
+ movq 120(%rdx), %r9
+ movq 120(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 112(%rdi)
+ adcq %r9, %r8
+ movq %r8, 120(%rdi)
+ adcq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_cond_add_avx2_16,.-sp_2048_cond_add_avx2_16
+#endif /* __APPLE__ */
+/* Shift number left by n bit. (r = a << n)
+ *
+ * r Result of left shift by n.
+ * a Number to shift.
+ * n Amoutnt o shift.
+ */
+#ifndef __APPLE__
+.globl sp_2048_lshift_32
+.type sp_2048_lshift_32,@function
+.align 16
+sp_2048_lshift_32:
+#else
+.globl _sp_2048_lshift_32
+.p2align 4
+_sp_2048_lshift_32:
+#endif /* __APPLE__ */
+ movq %rdx, %rcx
+ movq $0, %r10
+ movq 216(%rsi), %r11
+ movq 224(%rsi), %rdx
+ movq 232(%rsi), %rax
+ movq 240(%rsi), %r8
+ movq 248(%rsi), %r9
+ shldq %cl, %r9, %r10
+ shldq %cl, %r8, %r9
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r11, %rdx
+ movq %rdx, 224(%rdi)
+ movq %rax, 232(%rdi)
+ movq %r8, 240(%rdi)
+ movq %r9, 248(%rdi)
+ movq %r10, 256(%rdi)
+ movq 184(%rsi), %r9
+ movq 192(%rsi), %rdx
+ movq 200(%rsi), %rax
+ movq 208(%rsi), %r8
+ shldq %cl, %r8, %r11
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r9, %rdx
+ movq %rdx, 192(%rdi)
+ movq %rax, 200(%rdi)
+ movq %r8, 208(%rdi)
+ movq %r11, 216(%rdi)
+ movq 152(%rsi), %r11
+ movq 160(%rsi), %rdx
+ movq 168(%rsi), %rax
+ movq 176(%rsi), %r8
+ shldq %cl, %r8, %r9
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r11, %rdx
+ movq %rdx, 160(%rdi)
+ movq %rax, 168(%rdi)
+ movq %r8, 176(%rdi)
+ movq %r9, 184(%rdi)
+ movq 120(%rsi), %r9
+ movq 128(%rsi), %rdx
+ movq 136(%rsi), %rax
+ movq 144(%rsi), %r8
+ shldq %cl, %r8, %r11
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r9, %rdx
+ movq %rdx, 128(%rdi)
+ movq %rax, 136(%rdi)
+ movq %r8, 144(%rdi)
+ movq %r11, 152(%rdi)
+ movq 88(%rsi), %r11
+ movq 96(%rsi), %rdx
+ movq 104(%rsi), %rax
+ movq 112(%rsi), %r8
+ shldq %cl, %r8, %r9
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r11, %rdx
+ movq %rdx, 96(%rdi)
+ movq %rax, 104(%rdi)
+ movq %r8, 112(%rdi)
+ movq %r9, 120(%rdi)
+ movq 56(%rsi), %r9
+ movq 64(%rsi), %rdx
+ movq 72(%rsi), %rax
+ movq 80(%rsi), %r8
+ shldq %cl, %r8, %r11
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r9, %rdx
+ movq %rdx, 64(%rdi)
+ movq %rax, 72(%rdi)
+ movq %r8, 80(%rdi)
+ movq %r11, 88(%rdi)
+ movq 24(%rsi), %r11
+ movq 32(%rsi), %rdx
+ movq 40(%rsi), %rax
+ movq 48(%rsi), %r8
+ shldq %cl, %r8, %r9
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r11, %rdx
+ movq %rdx, 32(%rdi)
+ movq %rax, 40(%rdi)
+ movq %r8, 48(%rdi)
+ movq %r9, 56(%rdi)
+ movq (%rsi), %rdx
+ movq 8(%rsi), %rax
+ movq 16(%rsi), %r8
+ shldq %cl, %r8, %r11
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shlq %cl, %rdx
+ movq %rdx, (%rdi)
+ movq %rax, 8(%rdi)
+ movq %r8, 16(%rdi)
+ movq %r11, 24(%rdi)
+ repz retq
+#endif /* !WOLFSSL_SP_NO_2048 */
+#endif /* !WOLFSSL_SP_NO_2048 */
+#ifndef WOLFSSL_SP_NO_3072
+#ifndef WOLFSSL_SP_NO_3072
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+#ifndef __APPLE__
+.globl sp_3072_from_bin
+.type sp_3072_from_bin,@function
+.align 16
+sp_3072_from_bin:
+#else
+.globl _sp_3072_from_bin
+.p2align 4
+_sp_3072_from_bin:
+#endif /* __APPLE__ */
+ movq %rdx, %r9
+ movq %rdi, %r10
+ addq %rcx, %r9
+ addq $384, %r10
+ xorq %r11, %r11
+ jmp L_3072_from_bin_64_end
+L_3072_from_bin_64_start:
+ subq $64, %r9
+ movbeq 56(%r9), %rax
+ movbeq 48(%r9), %r8
+ movq %rax, (%rdi)
+ movq %r8, 8(%rdi)
+ movbeq 40(%r9), %rax
+ movbeq 32(%r9), %r8
+ movq %rax, 16(%rdi)
+ movq %r8, 24(%rdi)
+ movbeq 24(%r9), %rax
+ movbeq 16(%r9), %r8
+ movq %rax, 32(%rdi)
+ movq %r8, 40(%rdi)
+ movbeq 8(%r9), %rax
+ movbeq (%r9), %r8
+ movq %rax, 48(%rdi)
+ movq %r8, 56(%rdi)
+ addq $64, %rdi
+ subq $64, %rcx
+L_3072_from_bin_64_end:
+ cmpq $63, %rcx
+ jg L_3072_from_bin_64_start
+ jmp L_3072_from_bin_8_end
+L_3072_from_bin_8_start:
+ subq $8, %r9
+ movbeq (%r9), %rax
+ movq %rax, (%rdi)
+ addq $8, %rdi
+ subq $8, %rcx
+L_3072_from_bin_8_end:
+ cmpq $7, %rcx
+ jg L_3072_from_bin_8_start
+ cmpq %r11, %rcx
+ je L_3072_from_bin_hi_end
+ movq %r11, %r8
+ movq %r11, %rax
+L_3072_from_bin_hi_start:
+ movb (%rdx), %al
+ shlq $8, %r8
+ incq %rdx
+ addq %rax, %r8
+ decq %rcx
+ jg L_3072_from_bin_hi_start
+ movq %r8, (%rdi)
+ addq $8, %rdi
+L_3072_from_bin_hi_end:
+ cmpq %r10, %rdi
+ je L_3072_from_bin_zero_end
+L_3072_from_bin_zero_start:
+ movq %r11, (%rdi)
+ addq $8, %rdi
+ cmpq %r10, %rdi
+ jl L_3072_from_bin_zero_start
+L_3072_from_bin_zero_end:
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_from_bin,.-sp_3072_from_bin
+#endif /* __APPLE__ */
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 384
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+#ifndef __APPLE__
+.globl sp_3072_to_bin
+.type sp_3072_to_bin,@function
+.align 16
+sp_3072_to_bin:
+#else
+.globl _sp_3072_to_bin
+.p2align 4
+_sp_3072_to_bin:
+#endif /* __APPLE__ */
+ movbeq 376(%rdi), %rdx
+ movbeq 368(%rdi), %rax
+ movq %rdx, (%rsi)
+ movq %rax, 8(%rsi)
+ movbeq 360(%rdi), %rdx
+ movbeq 352(%rdi), %rax
+ movq %rdx, 16(%rsi)
+ movq %rax, 24(%rsi)
+ movbeq 344(%rdi), %rdx
+ movbeq 336(%rdi), %rax
+ movq %rdx, 32(%rsi)
+ movq %rax, 40(%rsi)
+ movbeq 328(%rdi), %rdx
+ movbeq 320(%rdi), %rax
+ movq %rdx, 48(%rsi)
+ movq %rax, 56(%rsi)
+ movbeq 312(%rdi), %rdx
+ movbeq 304(%rdi), %rax
+ movq %rdx, 64(%rsi)
+ movq %rax, 72(%rsi)
+ movbeq 296(%rdi), %rdx
+ movbeq 288(%rdi), %rax
+ movq %rdx, 80(%rsi)
+ movq %rax, 88(%rsi)
+ movbeq 280(%rdi), %rdx
+ movbeq 272(%rdi), %rax
+ movq %rdx, 96(%rsi)
+ movq %rax, 104(%rsi)
+ movbeq 264(%rdi), %rdx
+ movbeq 256(%rdi), %rax
+ movq %rdx, 112(%rsi)
+ movq %rax, 120(%rsi)
+ movbeq 248(%rdi), %rdx
+ movbeq 240(%rdi), %rax
+ movq %rdx, 128(%rsi)
+ movq %rax, 136(%rsi)
+ movbeq 232(%rdi), %rdx
+ movbeq 224(%rdi), %rax
+ movq %rdx, 144(%rsi)
+ movq %rax, 152(%rsi)
+ movbeq 216(%rdi), %rdx
+ movbeq 208(%rdi), %rax
+ movq %rdx, 160(%rsi)
+ movq %rax, 168(%rsi)
+ movbeq 200(%rdi), %rdx
+ movbeq 192(%rdi), %rax
+ movq %rdx, 176(%rsi)
+ movq %rax, 184(%rsi)
+ movbeq 184(%rdi), %rdx
+ movbeq 176(%rdi), %rax
+ movq %rdx, 192(%rsi)
+ movq %rax, 200(%rsi)
+ movbeq 168(%rdi), %rdx
+ movbeq 160(%rdi), %rax
+ movq %rdx, 208(%rsi)
+ movq %rax, 216(%rsi)
+ movbeq 152(%rdi), %rdx
+ movbeq 144(%rdi), %rax
+ movq %rdx, 224(%rsi)
+ movq %rax, 232(%rsi)
+ movbeq 136(%rdi), %rdx
+ movbeq 128(%rdi), %rax
+ movq %rdx, 240(%rsi)
+ movq %rax, 248(%rsi)
+ movbeq 120(%rdi), %rdx
+ movbeq 112(%rdi), %rax
+ movq %rdx, 256(%rsi)
+ movq %rax, 264(%rsi)
+ movbeq 104(%rdi), %rdx
+ movbeq 96(%rdi), %rax
+ movq %rdx, 272(%rsi)
+ movq %rax, 280(%rsi)
+ movbeq 88(%rdi), %rdx
+ movbeq 80(%rdi), %rax
+ movq %rdx, 288(%rsi)
+ movq %rax, 296(%rsi)
+ movbeq 72(%rdi), %rdx
+ movbeq 64(%rdi), %rax
+ movq %rdx, 304(%rsi)
+ movq %rax, 312(%rsi)
+ movbeq 56(%rdi), %rdx
+ movbeq 48(%rdi), %rax
+ movq %rdx, 320(%rsi)
+ movq %rax, 328(%rsi)
+ movbeq 40(%rdi), %rdx
+ movbeq 32(%rdi), %rax
+ movq %rdx, 336(%rsi)
+ movq %rax, 344(%rsi)
+ movbeq 24(%rdi), %rdx
+ movbeq 16(%rdi), %rax
+ movq %rdx, 352(%rsi)
+ movq %rax, 360(%rsi)
+ movbeq 8(%rdi), %rdx
+ movbeq (%rdi), %rax
+ movq %rdx, 368(%rsi)
+ movq %rax, 376(%rsi)
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_to_bin,.-sp_3072_to_bin
+#endif /* __APPLE__ */
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_3072_mul_12
+.type sp_3072_mul_12,@function
+.align 16
+sp_3072_mul_12:
+#else
+.globl _sp_3072_mul_12
+.p2align 4
+_sp_3072_mul_12:
+#endif /* __APPLE__ */
+ movq %rdx, %rcx
+ subq $96, %rsp
+ # A[0] * B[0]
+ movq (%rcx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ movq %rax, (%rsp)
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rcx), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[1] * B[0]
+ movq (%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 8(%rsp)
+ # A[0] * B[2]
+ movq 16(%rcx), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[1] * B[1]
+ movq 8(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[2] * B[0]
+ movq (%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 16(%rsp)
+ # A[0] * B[3]
+ movq 24(%rcx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[1] * B[2]
+ movq 16(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[2] * B[1]
+ movq 8(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[3] * B[0]
+ movq (%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ movq %r8, 24(%rsp)
+ # A[0] * B[4]
+ movq 32(%rcx), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[1] * B[3]
+ movq 24(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[2] * B[2]
+ movq 16(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[3] * B[1]
+ movq 8(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[4] * B[0]
+ movq (%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 32(%rsp)
+ # A[0] * B[5]
+ movq 40(%rcx), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[1] * B[4]
+ movq 32(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[2] * B[3]
+ movq 24(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[3] * B[2]
+ movq 16(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[4] * B[1]
+ movq 8(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[5] * B[0]
+ movq (%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 40(%rsp)
+ # A[0] * B[6]
+ movq 48(%rcx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[1] * B[5]
+ movq 40(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[2] * B[4]
+ movq 32(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[3] * B[3]
+ movq 24(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[4] * B[2]
+ movq 16(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[5] * B[1]
+ movq 8(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[6] * B[0]
+ movq (%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ movq %r8, 48(%rsp)
+ # A[0] * B[7]
+ movq 56(%rcx), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[1] * B[6]
+ movq 48(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[2] * B[5]
+ movq 40(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[3] * B[4]
+ movq 32(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[4] * B[3]
+ movq 24(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[5] * B[2]
+ movq 16(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[6] * B[1]
+ movq 8(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[7] * B[0]
+ movq (%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 56(%rsp)
+ # A[0] * B[8]
+ movq 64(%rcx), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[1] * B[7]
+ movq 56(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[2] * B[6]
+ movq 48(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[3] * B[5]
+ movq 40(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[4] * B[4]
+ movq 32(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[5] * B[3]
+ movq 24(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[6] * B[2]
+ movq 16(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[7] * B[1]
+ movq 8(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[8] * B[0]
+ movq (%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 64(%rsp)
+ # A[0] * B[9]
+ movq 72(%rcx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[1] * B[8]
+ movq 64(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[2] * B[7]
+ movq 56(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[3] * B[6]
+ movq 48(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[4] * B[5]
+ movq 40(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[5] * B[4]
+ movq 32(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[6] * B[3]
+ movq 24(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[7] * B[2]
+ movq 16(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[8] * B[1]
+ movq 8(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[9] * B[0]
+ movq (%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ movq %r8, 72(%rsp)
+ # A[0] * B[10]
+ movq 80(%rcx), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[1] * B[9]
+ movq 72(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[2] * B[8]
+ movq 64(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[3] * B[7]
+ movq 56(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[4] * B[6]
+ movq 48(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[5] * B[5]
+ movq 40(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[6] * B[4]
+ movq 32(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[7] * B[3]
+ movq 24(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[8] * B[2]
+ movq 16(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[9] * B[1]
+ movq 8(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[10] * B[0]
+ movq (%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 80(%rsp)
+ # A[0] * B[11]
+ movq 88(%rcx), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[1] * B[10]
+ movq 80(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[2] * B[9]
+ movq 72(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[3] * B[8]
+ movq 64(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[4] * B[7]
+ movq 56(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[5] * B[6]
+ movq 48(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[6] * B[5]
+ movq 40(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[7] * B[4]
+ movq 32(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[8] * B[3]
+ movq 24(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[9] * B[2]
+ movq 16(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[10] * B[1]
+ movq 8(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[11] * B[0]
+ movq (%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 88(%rsp)
+ # A[1] * B[11]
+ movq 88(%rcx), %rax
+ mulq 8(%rsi)
+ xorq %r10, %r10
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[2] * B[10]
+ movq 80(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[3] * B[9]
+ movq 72(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[4] * B[8]
+ movq 64(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[5] * B[7]
+ movq 56(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[6] * B[6]
+ movq 48(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[7] * B[5]
+ movq 40(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[8] * B[4]
+ movq 32(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[9] * B[3]
+ movq 24(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[10] * B[2]
+ movq 16(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[11] * B[1]
+ movq 8(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ movq %r8, 96(%rdi)
+ # A[2] * B[11]
+ movq 88(%rcx), %rax
+ mulq 16(%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[3] * B[10]
+ movq 80(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[4] * B[9]
+ movq 72(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[5] * B[8]
+ movq 64(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[6] * B[7]
+ movq 56(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[7] * B[6]
+ movq 48(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[8] * B[5]
+ movq 40(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[9] * B[4]
+ movq 32(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[10] * B[3]
+ movq 24(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[11] * B[2]
+ movq 16(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 104(%rdi)
+ # A[3] * B[11]
+ movq 88(%rcx), %rax
+ mulq 24(%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[4] * B[10]
+ movq 80(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[5] * B[9]
+ movq 72(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[6] * B[8]
+ movq 64(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[7] * B[7]
+ movq 56(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[8] * B[6]
+ movq 48(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[9] * B[5]
+ movq 40(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[10] * B[4]
+ movq 32(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[11] * B[3]
+ movq 24(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 112(%rdi)
+ # A[4] * B[11]
+ movq 88(%rcx), %rax
+ mulq 32(%rsi)
+ xorq %r10, %r10
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[5] * B[10]
+ movq 80(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[6] * B[9]
+ movq 72(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[7] * B[8]
+ movq 64(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[8] * B[7]
+ movq 56(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[9] * B[6]
+ movq 48(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[10] * B[5]
+ movq 40(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[11] * B[4]
+ movq 32(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ movq %r8, 120(%rdi)
+ # A[5] * B[11]
+ movq 88(%rcx), %rax
+ mulq 40(%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[6] * B[10]
+ movq 80(%rcx), %rax
+ mulq 48(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[7] * B[9]
+ movq 72(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[8] * B[8]
+ movq 64(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[9] * B[7]
+ movq 56(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[10] * B[6]
+ movq 48(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[11] * B[5]
+ movq 40(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 128(%rdi)
+ # A[6] * B[11]
+ movq 88(%rcx), %rax
+ mulq 48(%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[7] * B[10]
+ movq 80(%rcx), %rax
+ mulq 56(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[8] * B[9]
+ movq 72(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[9] * B[8]
+ movq 64(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[10] * B[7]
+ movq 56(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[11] * B[6]
+ movq 48(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 136(%rdi)
+ # A[7] * B[11]
+ movq 88(%rcx), %rax
+ mulq 56(%rsi)
+ xorq %r10, %r10
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[8] * B[10]
+ movq 80(%rcx), %rax
+ mulq 64(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[9] * B[9]
+ movq 72(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[10] * B[8]
+ movq 64(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[11] * B[7]
+ movq 56(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ movq %r8, 144(%rdi)
+ # A[8] * B[11]
+ movq 88(%rcx), %rax
+ mulq 64(%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[9] * B[10]
+ movq 80(%rcx), %rax
+ mulq 72(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[10] * B[9]
+ movq 72(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[11] * B[8]
+ movq 64(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 152(%rdi)
+ # A[9] * B[11]
+ movq 88(%rcx), %rax
+ mulq 72(%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[10] * B[10]
+ movq 80(%rcx), %rax
+ mulq 80(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[11] * B[9]
+ movq 72(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 160(%rdi)
+ # A[10] * B[11]
+ movq 88(%rcx), %rax
+ mulq 80(%rsi)
+ xorq %r10, %r10
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[11] * B[10]
+ movq 80(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ movq %r8, 168(%rdi)
+ # A[11] * B[11]
+ movq 88(%rcx), %rax
+ mulq 88(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ movq %r9, 176(%rdi)
+ movq %r10, 184(%rdi)
+ movq (%rsp), %rax
+ movq 8(%rsp), %rdx
+ movq 16(%rsp), %r8
+ movq 24(%rsp), %r9
+ movq %rax, (%rdi)
+ movq %rdx, 8(%rdi)
+ movq %r8, 16(%rdi)
+ movq %r9, 24(%rdi)
+ movq 32(%rsp), %rax
+ movq 40(%rsp), %rdx
+ movq 48(%rsp), %r8
+ movq 56(%rsp), %r9
+ movq %rax, 32(%rdi)
+ movq %rdx, 40(%rdi)
+ movq %r8, 48(%rdi)
+ movq %r9, 56(%rdi)
+ movq 64(%rsp), %rax
+ movq 72(%rsp), %rdx
+ movq 80(%rsp), %r8
+ movq 88(%rsp), %r9
+ movq %rax, 64(%rdi)
+ movq %rdx, 72(%rdi)
+ movq %r8, 80(%rdi)
+ movq %r9, 88(%rdi)
+ addq $96, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_mul_12,.-sp_3072_mul_12
+#endif /* __APPLE__ */
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_3072_sqr_12
+.type sp_3072_sqr_12,@function
+.align 16
+sp_3072_sqr_12:
+#else
+.globl _sp_3072_sqr_12
+.p2align 4
+_sp_3072_sqr_12:
+#endif /* __APPLE__ */
+ push %r12
+ subq $96, %rsp
+ # A[0] * A[0]
+ movq (%rsi), %rax
+ mulq %rax
+ xorq %r9, %r9
+ movq %rax, (%rsp)
+ movq %rdx, %r8
+ # A[0] * A[1]
+ movq 8(%rsi), %rax
+ mulq (%rsi)
+ xorq %rcx, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ movq %r8, 8(%rsp)
+ # A[0] * A[2]
+ movq 16(%rsi), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ # A[1] * A[1]
+ movq 8(%rsi), %rax
+ mulq %rax
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ movq %r9, 16(%rsp)
+ # A[0] * A[3]
+ movq 24(%rsi), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[1] * A[2]
+ movq 16(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %rcx, 24(%rsp)
+ # A[0] * A[4]
+ movq 32(%rsi), %rax
+ mulq (%rsi)
+ xorq %rcx, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ # A[1] * A[3]
+ movq 24(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ # A[2] * A[2]
+ movq 16(%rsi), %rax
+ mulq %rax
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ movq %r8, 32(%rsp)
+ # A[0] * A[5]
+ movq 40(%rsi), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[1] * A[4]
+ movq 32(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[2] * A[3]
+ movq 24(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %r10, %r9
+ adcq %r11, %rcx
+ adcq %r12, %r8
+ movq %r9, 40(%rsp)
+ # A[0] * A[6]
+ movq 48(%rsi), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[1] * A[5]
+ movq 40(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[2] * A[4]
+ movq 32(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[3] * A[3]
+ movq 24(%rsi), %rax
+ mulq %rax
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %rcx
+ adcq %r11, %r8
+ adcq %r12, %r9
+ movq %rcx, 48(%rsp)
+ # A[0] * A[7]
+ movq 56(%rsi), %rax
+ mulq (%rsi)
+ xorq %rcx, %rcx
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[1] * A[6]
+ movq 48(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[2] * A[5]
+ movq 40(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[3] * A[4]
+ movq 32(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %r10, %r8
+ adcq %r11, %r9
+ adcq %r12, %rcx
+ movq %r8, 56(%rsp)
+ # A[0] * A[8]
+ movq 64(%rsi), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[1] * A[7]
+ movq 56(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[2] * A[6]
+ movq 48(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[3] * A[5]
+ movq 40(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[4] * A[4]
+ movq 32(%rsi), %rax
+ mulq %rax
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r9
+ adcq %r11, %rcx
+ adcq %r12, %r8
+ movq %r9, 64(%rsp)
+ # A[0] * A[9]
+ movq 72(%rsi), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[1] * A[8]
+ movq 64(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[2] * A[7]
+ movq 56(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[3] * A[6]
+ movq 48(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[4] * A[5]
+ movq 40(%rsi), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %r10, %rcx
+ adcq %r11, %r8
+ adcq %r12, %r9
+ movq %rcx, 72(%rsp)
+ # A[0] * A[10]
+ movq 80(%rsi), %rax
+ mulq (%rsi)
+ xorq %rcx, %rcx
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[1] * A[9]
+ movq 72(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[2] * A[8]
+ movq 64(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[3] * A[7]
+ movq 56(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[4] * A[6]
+ movq 48(%rsi), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[5] * A[5]
+ movq 40(%rsi), %rax
+ mulq %rax
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r8
+ adcq %r11, %r9
+ adcq %r12, %rcx
+ movq %r8, 80(%rsp)
+ # A[0] * A[11]
+ movq 88(%rsi), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[1] * A[10]
+ movq 80(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[2] * A[9]
+ movq 72(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[3] * A[8]
+ movq 64(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[4] * A[7]
+ movq 56(%rsi), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[5] * A[6]
+ movq 48(%rsi), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %r10, %r9
+ adcq %r11, %rcx
+ adcq %r12, %r8
+ movq %r9, 88(%rsp)
+ # A[1] * A[11]
+ movq 88(%rsi), %rax
+ mulq 8(%rsi)
+ xorq %r9, %r9
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[2] * A[10]
+ movq 80(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[3] * A[9]
+ movq 72(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[4] * A[8]
+ movq 64(%rsi), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[5] * A[7]
+ movq 56(%rsi), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[6] * A[6]
+ movq 48(%rsi), %rax
+ mulq %rax
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %rcx
+ adcq %r11, %r8
+ adcq %r12, %r9
+ movq %rcx, 96(%rdi)
+ # A[2] * A[11]
+ movq 88(%rsi), %rax
+ mulq 16(%rsi)
+ xorq %rcx, %rcx
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[3] * A[10]
+ movq 80(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[4] * A[9]
+ movq 72(%rsi), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[5] * A[8]
+ movq 64(%rsi), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[6] * A[7]
+ movq 56(%rsi), %rax
+ mulq 48(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %r10, %r8
+ adcq %r11, %r9
+ adcq %r12, %rcx
+ movq %r8, 104(%rdi)
+ # A[3] * A[11]
+ movq 88(%rsi), %rax
+ mulq 24(%rsi)
+ xorq %r8, %r8
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[4] * A[10]
+ movq 80(%rsi), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[5] * A[9]
+ movq 72(%rsi), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[6] * A[8]
+ movq 64(%rsi), %rax
+ mulq 48(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[7] * A[7]
+ movq 56(%rsi), %rax
+ mulq %rax
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r9
+ adcq %r11, %rcx
+ adcq %r12, %r8
+ movq %r9, 112(%rdi)
+ # A[4] * A[11]
+ movq 88(%rsi), %rax
+ mulq 32(%rsi)
+ xorq %r9, %r9
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[5] * A[10]
+ movq 80(%rsi), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[6] * A[9]
+ movq 72(%rsi), %rax
+ mulq 48(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[7] * A[8]
+ movq 64(%rsi), %rax
+ mulq 56(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %r10, %rcx
+ adcq %r11, %r8
+ adcq %r12, %r9
+ movq %rcx, 120(%rdi)
+ # A[5] * A[11]
+ movq 88(%rsi), %rax
+ mulq 40(%rsi)
+ xorq %rcx, %rcx
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[6] * A[10]
+ movq 80(%rsi), %rax
+ mulq 48(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[7] * A[9]
+ movq 72(%rsi), %rax
+ mulq 56(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[8] * A[8]
+ movq 64(%rsi), %rax
+ mulq %rax
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r8
+ adcq %r11, %r9
+ adcq %r12, %rcx
+ movq %r8, 128(%rdi)
+ # A[6] * A[11]
+ movq 88(%rsi), %rax
+ mulq 48(%rsi)
+ xorq %r8, %r8
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[7] * A[10]
+ movq 80(%rsi), %rax
+ mulq 56(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[8] * A[9]
+ movq 72(%rsi), %rax
+ mulq 64(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %r10, %r9
+ adcq %r11, %rcx
+ adcq %r12, %r8
+ movq %r9, 136(%rdi)
+ # A[7] * A[11]
+ movq 88(%rsi), %rax
+ mulq 56(%rsi)
+ xorq %r9, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[8] * A[10]
+ movq 80(%rsi), %rax
+ mulq 64(%rsi)
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[9] * A[9]
+ movq 72(%rsi), %rax
+ mulq %rax
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %rcx, 144(%rdi)
+ # A[8] * A[11]
+ movq 88(%rsi), %rax
+ mulq 64(%rsi)
+ xorq %rcx, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ # A[9] * A[10]
+ movq 80(%rsi), %rax
+ mulq 72(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ movq %r8, 152(%rdi)
+ # A[9] * A[11]
+ movq 88(%rsi), %rax
+ mulq 72(%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ # A[10] * A[10]
+ movq 80(%rsi), %rax
+ mulq %rax
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ movq %r9, 160(%rdi)
+ # A[10] * A[11]
+ movq 88(%rsi), %rax
+ mulq 80(%rsi)
+ xorq %r9, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %rcx, 168(%rdi)
+ # A[11] * A[11]
+ movq 88(%rsi), %rax
+ mulq %rax
+ addq %rax, %r8
+ adcq %rdx, %r9
+ movq %r8, 176(%rdi)
+ movq %r9, 184(%rdi)
+ movq (%rsp), %rax
+ movq 8(%rsp), %rdx
+ movq 16(%rsp), %r10
+ movq 24(%rsp), %r11
+ movq %rax, (%rdi)
+ movq %rdx, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 32(%rsp), %rax
+ movq 40(%rsp), %rdx
+ movq 48(%rsp), %r10
+ movq 56(%rsp), %r11
+ movq %rax, 32(%rdi)
+ movq %rdx, 40(%rdi)
+ movq %r10, 48(%rdi)
+ movq %r11, 56(%rdi)
+ movq 64(%rsp), %rax
+ movq 72(%rsp), %rdx
+ movq 80(%rsp), %r10
+ movq 88(%rsp), %r11
+ movq %rax, 64(%rdi)
+ movq %rdx, 72(%rdi)
+ movq %r10, 80(%rdi)
+ movq %r11, 88(%rdi)
+ addq $96, %rsp
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_sqr_12,.-sp_3072_sqr_12
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX2
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r Result of multiplication.
+ * a First number to multiply.
+ * b Second number to multiply.
+ */
+#ifndef __APPLE__
+.globl sp_3072_mul_avx2_12
+.type sp_3072_mul_avx2_12,@function
+.align 16
+sp_3072_mul_avx2_12:
+#else
+.globl _sp_3072_mul_avx2_12
+.p2align 4
+_sp_3072_mul_avx2_12:
+#endif /* __APPLE__ */
+ push %rbx
+ push %rbp
+ push %r12
+ movq %rdx, %rbp
+ subq $96, %rsp
+ cmpq %rdi, %rsi
+ movq %rsp, %rbx
+ cmovne %rdi, %rbx
+ cmpq %rdi, %rbp
+ cmove %rsp, %rbx
+ xorq %r12, %r12
+ movq (%rsi), %rdx
+ # A[0] * B[0]
+ mulx (%rbp), %r8, %r9
+ # A[0] * B[1]
+ mulx 8(%rbp), %rax, %r10
+ movq %r8, (%rbx)
+ adcxq %rax, %r9
+ movq %r9, 8(%rbx)
+ # A[0] * B[2]
+ mulx 16(%rbp), %rax, %r8
+ adcxq %rax, %r10
+ # A[0] * B[3]
+ mulx 24(%rbp), %rax, %r9
+ movq %r10, 16(%rbx)
+ adcxq %rax, %r8
+ movq %r8, 24(%rbx)
+ # A[0] * B[4]
+ mulx 32(%rbp), %rax, %r10
+ adcxq %rax, %r9
+ # A[0] * B[5]
+ mulx 40(%rbp), %rax, %r8
+ movq %r9, 32(%rbx)
+ adcxq %rax, %r10
+ movq %r10, 40(%rbx)
+ # A[0] * B[6]
+ mulx 48(%rbp), %rax, %r9
+ adcxq %rax, %r8
+ # A[0] * B[7]
+ mulx 56(%rbp), %rax, %r10
+ movq %r8, 48(%rbx)
+ adcxq %rax, %r9
+ movq %r9, 56(%rbx)
+ # A[0] * B[8]
+ mulx 64(%rbp), %rax, %r8
+ adcxq %rax, %r10
+ # A[0] * B[9]
+ mulx 72(%rbp), %rax, %r9
+ movq %r10, 64(%rbx)
+ adcxq %rax, %r8
+ movq %r8, 72(%rbx)
+ # A[0] * B[10]
+ mulx 80(%rbp), %rax, %r10
+ adcxq %rax, %r9
+ # A[0] * B[11]
+ mulx 88(%rbp), %rax, %r8
+ movq %r9, 80(%rbx)
+ adcxq %rax, %r10
+ adcxq %r12, %r8
+ movq %r12, %r11
+ adcxq %r12, %r11
+ movq %r10, 88(%rbx)
+ movq %r8, 96(%rdi)
+ movq 8(%rsi), %rdx
+ movq 8(%rbx), %r9
+ movq 16(%rbx), %r10
+ movq 24(%rbx), %r8
+ # A[1] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[1] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r9, 8(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 16(%rbx)
+ movq 32(%rbx), %r9
+ movq 40(%rbx), %r10
+ # A[1] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[1] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r8, 24(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 32(%rbx)
+ movq 48(%rbx), %r8
+ movq 56(%rbx), %r9
+ # A[1] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[1] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r10, 40(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 48(%rbx)
+ movq 64(%rbx), %r10
+ movq 72(%rbx), %r8
+ # A[1] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[1] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r9, 56(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 64(%rbx)
+ movq 80(%rbx), %r9
+ movq 88(%rbx), %r10
+ # A[1] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[1] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r8, 72(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 80(%rbx)
+ movq 96(%rdi), %r8
+ # A[1] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[1] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r10, 88(%rbx)
+ movq %r12, %r9
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ adcxq %r11, %r9
+ movq %r12, %r11
+ adoxq %r12, %r11
+ adcxq %r12, %r11
+ movq %r8, 96(%rdi)
+ movq %r9, 104(%rdi)
+ movq 16(%rsi), %rdx
+ movq 16(%rbx), %r10
+ movq 24(%rbx), %r8
+ movq 32(%rbx), %r9
+ # A[2] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[2] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r10, 16(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 24(%rbx)
+ movq 40(%rbx), %r10
+ movq 48(%rbx), %r8
+ # A[2] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[2] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r9, 32(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 40(%rbx)
+ movq 56(%rbx), %r9
+ movq 64(%rbx), %r10
+ # A[2] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[2] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r8, 48(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 56(%rbx)
+ movq 72(%rbx), %r8
+ movq 80(%rbx), %r9
+ # A[2] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[2] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r10, 64(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 72(%rbx)
+ movq 88(%rbx), %r10
+ movq 96(%rdi), %r8
+ # A[2] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[2] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r9, 80(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 88(%rbx)
+ movq 104(%rdi), %r9
+ # A[2] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[2] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r8, 96(%rdi)
+ movq %r12, %r10
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ adcxq %r11, %r10
+ movq %r12, %r11
+ adoxq %r12, %r11
+ adcxq %r12, %r11
+ movq %r9, 104(%rdi)
+ movq %r10, 112(%rdi)
+ movq 24(%rsi), %rdx
+ movq 24(%rbx), %r8
+ movq 32(%rbx), %r9
+ movq 40(%rbx), %r10
+ # A[3] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[3] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r8, 24(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 32(%rbx)
+ movq 48(%rbx), %r8
+ movq 56(%rbx), %r9
+ # A[3] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[3] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r10, 40(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 48(%rbx)
+ movq 64(%rbx), %r10
+ movq 72(%rbx), %r8
+ # A[3] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[3] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r9, 56(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 64(%rbx)
+ movq 80(%rbx), %r9
+ movq 88(%rbx), %r10
+ # A[3] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[3] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r8, 72(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 80(%rbx)
+ movq 96(%rdi), %r8
+ movq 104(%rdi), %r9
+ # A[3] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[3] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r10, 88(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 96(%rdi)
+ movq 112(%rdi), %r10
+ # A[3] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[3] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r9, 104(%rdi)
+ movq %r12, %r8
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ adcxq %r11, %r8
+ movq %r12, %r11
+ adoxq %r12, %r11
+ adcxq %r12, %r11
+ movq %r10, 112(%rdi)
+ movq %r8, 120(%rdi)
+ movq 32(%rsi), %rdx
+ movq 32(%rbx), %r9
+ movq 40(%rbx), %r10
+ movq 48(%rbx), %r8
+ # A[4] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[4] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r9, 32(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 40(%rbx)
+ movq 56(%rbx), %r9
+ movq 64(%rbx), %r10
+ # A[4] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[4] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r8, 48(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 56(%rbx)
+ movq 72(%rbx), %r8
+ movq 80(%rbx), %r9
+ # A[4] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[4] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r10, 64(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 72(%rbx)
+ movq 88(%rbx), %r10
+ movq 96(%rdi), %r8
+ # A[4] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[4] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r9, 80(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 88(%rbx)
+ movq 104(%rdi), %r9
+ movq 112(%rdi), %r10
+ # A[4] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[4] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r8, 96(%rdi)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 104(%rdi)
+ movq 120(%rdi), %r8
+ # A[4] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[4] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r10, 112(%rdi)
+ movq %r12, %r9
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ adcxq %r11, %r9
+ movq %r12, %r11
+ adoxq %r12, %r11
+ adcxq %r12, %r11
+ movq %r8, 120(%rdi)
+ movq %r9, 128(%rdi)
+ movq 40(%rsi), %rdx
+ movq 40(%rbx), %r10
+ movq 48(%rbx), %r8
+ movq 56(%rbx), %r9
+ # A[5] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[5] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r10, 40(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 48(%rbx)
+ movq 64(%rbx), %r10
+ movq 72(%rbx), %r8
+ # A[5] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[5] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r9, 56(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 64(%rbx)
+ movq 80(%rbx), %r9
+ movq 88(%rbx), %r10
+ # A[5] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[5] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r8, 72(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 80(%rbx)
+ movq 96(%rdi), %r8
+ movq 104(%rdi), %r9
+ # A[5] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[5] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r10, 88(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 96(%rdi)
+ movq 112(%rdi), %r10
+ movq 120(%rdi), %r8
+ # A[5] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[5] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r9, 104(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 112(%rdi)
+ movq 128(%rdi), %r9
+ # A[5] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[5] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r8, 120(%rdi)
+ movq %r12, %r10
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ adcxq %r11, %r10
+ movq %r12, %r11
+ adoxq %r12, %r11
+ adcxq %r12, %r11
+ movq %r9, 128(%rdi)
+ movq %r10, 136(%rdi)
+ movq 48(%rsi), %rdx
+ movq 48(%rbx), %r8
+ movq 56(%rbx), %r9
+ movq 64(%rbx), %r10
+ # A[6] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[6] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r8, 48(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 56(%rbx)
+ movq 72(%rbx), %r8
+ movq 80(%rbx), %r9
+ # A[6] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[6] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r10, 64(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 72(%rbx)
+ movq 88(%rbx), %r10
+ movq 96(%rdi), %r8
+ # A[6] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[6] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r9, 80(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 88(%rbx)
+ movq 104(%rdi), %r9
+ movq 112(%rdi), %r10
+ # A[6] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[6] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r8, 96(%rdi)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 104(%rdi)
+ movq 120(%rdi), %r8
+ movq 128(%rdi), %r9
+ # A[6] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[6] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r10, 112(%rdi)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 120(%rdi)
+ movq 136(%rdi), %r10
+ # A[6] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[6] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r9, 128(%rdi)
+ movq %r12, %r8
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ adcxq %r11, %r8
+ movq %r12, %r11
+ adoxq %r12, %r11
+ adcxq %r12, %r11
+ movq %r10, 136(%rdi)
+ movq %r8, 144(%rdi)
+ movq 56(%rsi), %rdx
+ movq 56(%rbx), %r9
+ movq 64(%rbx), %r10
+ movq 72(%rbx), %r8
+ # A[7] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[7] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r9, 56(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 64(%rbx)
+ movq 80(%rbx), %r9
+ movq 88(%rbx), %r10
+ # A[7] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[7] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r8, 72(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 80(%rbx)
+ movq 96(%rdi), %r8
+ movq 104(%rdi), %r9
+ # A[7] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[7] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r10, 88(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 96(%rdi)
+ movq 112(%rdi), %r10
+ movq 120(%rdi), %r8
+ # A[7] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[7] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r9, 104(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 112(%rdi)
+ movq 128(%rdi), %r9
+ movq 136(%rdi), %r10
+ # A[7] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[7] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r8, 120(%rdi)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 128(%rdi)
+ movq 144(%rdi), %r8
+ # A[7] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[7] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r10, 136(%rdi)
+ movq %r12, %r9
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ adcxq %r11, %r9
+ movq %r12, %r11
+ adoxq %r12, %r11
+ adcxq %r12, %r11
+ movq %r8, 144(%rdi)
+ movq %r9, 152(%rdi)
+ movq 64(%rsi), %rdx
+ movq 64(%rbx), %r10
+ movq 72(%rbx), %r8
+ movq 80(%rbx), %r9
+ # A[8] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[8] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r10, 64(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 72(%rbx)
+ movq 88(%rbx), %r10
+ movq 96(%rdi), %r8
+ # A[8] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[8] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r9, 80(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 88(%rbx)
+ movq 104(%rdi), %r9
+ movq 112(%rdi), %r10
+ # A[8] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[8] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r8, 96(%rdi)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 104(%rdi)
+ movq 120(%rdi), %r8
+ movq 128(%rdi), %r9
+ # A[8] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[8] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r10, 112(%rdi)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 120(%rdi)
+ movq 136(%rdi), %r10
+ movq 144(%rdi), %r8
+ # A[8] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[8] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r9, 128(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 136(%rdi)
+ movq 152(%rdi), %r9
+ # A[8] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[8] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r8, 144(%rdi)
+ movq %r12, %r10
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ adcxq %r11, %r10
+ movq %r12, %r11
+ adoxq %r12, %r11
+ adcxq %r12, %r11
+ movq %r9, 152(%rdi)
+ movq %r10, 160(%rdi)
+ movq 72(%rsi), %rdx
+ movq 72(%rbx), %r8
+ movq 80(%rbx), %r9
+ movq 88(%rbx), %r10
+ # A[9] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[9] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r8, 72(%rbx)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 80(%rbx)
+ movq 96(%rdi), %r8
+ movq 104(%rdi), %r9
+ # A[9] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[9] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r10, 88(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 96(%rdi)
+ movq 112(%rdi), %r10
+ movq 120(%rdi), %r8
+ # A[9] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[9] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r9, 104(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 112(%rdi)
+ movq 128(%rdi), %r9
+ movq 136(%rdi), %r10
+ # A[9] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[9] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r8, 120(%rdi)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 128(%rdi)
+ movq 144(%rdi), %r8
+ movq 152(%rdi), %r9
+ # A[9] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[9] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r10, 136(%rdi)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 144(%rdi)
+ movq 160(%rdi), %r10
+ # A[9] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[9] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r9, 152(%rdi)
+ movq %r12, %r8
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ adcxq %r11, %r8
+ movq %r12, %r11
+ adoxq %r12, %r11
+ adcxq %r12, %r11
+ movq %r10, 160(%rdi)
+ movq %r8, 168(%rdi)
+ movq 80(%rsi), %rdx
+ movq 80(%rbx), %r9
+ movq 88(%rbx), %r10
+ movq 96(%rdi), %r8
+ # A[10] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[10] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r9, 80(%rbx)
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 88(%rbx)
+ movq 104(%rdi), %r9
+ movq 112(%rdi), %r10
+ # A[10] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[10] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r8, 96(%rdi)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 104(%rdi)
+ movq 120(%rdi), %r8
+ movq 128(%rdi), %r9
+ # A[10] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[10] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r10, 112(%rdi)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 120(%rdi)
+ movq 136(%rdi), %r10
+ movq 144(%rdi), %r8
+ # A[10] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[10] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r9, 128(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 136(%rdi)
+ movq 152(%rdi), %r9
+ movq 160(%rdi), %r10
+ # A[10] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[10] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r8, 144(%rdi)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 152(%rdi)
+ movq 168(%rdi), %r8
+ # A[10] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[10] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r10, 160(%rdi)
+ movq %r12, %r9
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ adcxq %r11, %r9
+ movq %r12, %r11
+ adoxq %r12, %r11
+ adcxq %r12, %r11
+ movq %r8, 168(%rdi)
+ movq %r9, 176(%rdi)
+ movq 88(%rsi), %rdx
+ movq 88(%rbx), %r10
+ movq 96(%rdi), %r8
+ movq 104(%rdi), %r9
+ # A[11] * B[0]
+ mulx (%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[11] * B[1]
+ mulx 8(%rbp), %rax, %rcx
+ movq %r10, 88(%rbx)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 96(%rdi)
+ movq 112(%rdi), %r10
+ movq 120(%rdi), %r8
+ # A[11] * B[2]
+ mulx 16(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[11] * B[3]
+ mulx 24(%rbp), %rax, %rcx
+ movq %r9, 104(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 112(%rdi)
+ movq 128(%rdi), %r9
+ movq 136(%rdi), %r10
+ # A[11] * B[4]
+ mulx 32(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[11] * B[5]
+ mulx 40(%rbp), %rax, %rcx
+ movq %r8, 120(%rdi)
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ movq %r9, 128(%rdi)
+ movq 144(%rdi), %r8
+ movq 152(%rdi), %r9
+ # A[11] * B[6]
+ mulx 48(%rbp), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ # A[11] * B[7]
+ mulx 56(%rbp), %rax, %rcx
+ movq %r10, 136(%rdi)
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 144(%rdi)
+ movq 160(%rdi), %r10
+ movq 168(%rdi), %r8
+ # A[11] * B[8]
+ mulx 64(%rbp), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ # A[11] * B[9]
+ mulx 72(%rbp), %rax, %rcx
+ movq %r9, 152(%rdi)
+ adcxq %rax, %r10
+ adoxq %rcx, %r8
+ movq %r10, 160(%rdi)
+ movq 176(%rdi), %r9
+ # A[11] * B[10]
+ mulx 80(%rbp), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ # A[11] * B[11]
+ mulx 88(%rbp), %rax, %rcx
+ movq %r8, 168(%rdi)
+ movq %r12, %r10
+ adcxq %rax, %r9
+ adoxq %rcx, %r10
+ adcxq %r11, %r10
+ movq %r9, 176(%rdi)
+ movq %r10, 184(%rdi)
+ cmpq %rdi, %rsi
+ je L_start_3072_mul_avx2_12
+ cmpq %rdi, %rbp
+ jne L_end_3072_mul_avx2_12
+L_start_3072_mul_avx2_12:
+ vmovdqu (%rbx), %xmm0
+ vmovups %xmm0, (%rdi)
+ vmovdqu 16(%rbx), %xmm0
+ vmovups %xmm0, 16(%rdi)
+ vmovdqu 32(%rbx), %xmm0
+ vmovups %xmm0, 32(%rdi)
+ vmovdqu 48(%rbx), %xmm0
+ vmovups %xmm0, 48(%rdi)
+ vmovdqu 64(%rbx), %xmm0
+ vmovups %xmm0, 64(%rdi)
+ vmovdqu 80(%rbx), %xmm0
+ vmovups %xmm0, 80(%rdi)
+L_end_3072_mul_avx2_12:
+ addq $96, %rsp
+ pop %r12
+ pop %rbp
+ pop %rbx
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_mul_avx2_12,.-sp_3072_mul_avx2_12
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
+#ifdef HAVE_INTEL_AVX2
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_3072_sqr_avx2_12
+.type sp_3072_sqr_avx2_12,@function
+.align 16
+sp_3072_sqr_avx2_12:
+#else
+.globl _sp_3072_sqr_avx2_12
+.p2align 4
+_sp_3072_sqr_avx2_12:
+#endif /* __APPLE__ */
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ push %rbx
+ subq $96, %rsp
+ cmpq %rdi, %rsi
+ movq %rsp, %rbp
+ cmovne %rdi, %rbp
+ xorq %r10, %r10
+ # Diagonal 1
+ # A[1] x A[0]
+ movq (%rsi), %rdx
+ mulxq 8(%rsi), %r8, %r9
+ movq %r8, 8(%rbp)
+ movq %r10, %r8
+ # A[2] x A[0]
+ mulxq 16(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, 16(%rbp)
+ movq %r10, %r9
+ # A[3] x A[0]
+ mulxq 24(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 24(%rbp)
+ movq %r10, %r8
+ # A[4] x A[0]
+ mulxq 32(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, 32(%rbp)
+ movq %r10, %r9
+ # A[5] x A[0]
+ mulxq 40(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 40(%rbp)
+ movq %r10, %r8
+ # A[6] x A[0]
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, 48(%rbp)
+ movq %r10, %r9
+ # A[7] x A[0]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, %r12
+ movq %r10, %r8
+ # A[8] x A[0]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, %r13
+ movq %r10, %r9
+ # A[9] x A[0]
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, %r14
+ movq %r10, %r8
+ # A[10] x A[0]
+ mulxq 80(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, %r15
+ movq %r10, %r9
+ # A[11] x A[0]
+ mulxq 88(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, %rbx
+ # Carry
+ adcxq %r10, %r9
+ movq %r10, %r11
+ adcxq %r10, %r11
+ adoxq %r10, %r11
+ movq %r9, 96(%rdi)
+ # Diagonal 2
+ movq 24(%rbp), %r9
+ movq 32(%rbp), %r8
+ # A[2] x A[1]
+ movq 8(%rsi), %rdx
+ mulxq 16(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, 24(%rbp)
+ movq 40(%rbp), %r9
+ # A[3] x A[1]
+ mulxq 24(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 32(%rbp)
+ movq 48(%rbp), %r8
+ # A[4] x A[1]
+ mulxq 32(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, 40(%rbp)
+ # No load %r12 - %r9
+ # A[5] x A[1]
+ mulxq 40(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r12
+ movq %r8, 48(%rbp)
+ # No load %r13 - %r8
+ # A[6] x A[1]
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %r12
+ adoxq %rcx, %r13
+ # No store %r12
+ # No load %r14 - %r9
+ # A[7] x A[1]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r13
+ adoxq %rcx, %r14
+ # No store %r13
+ # No load %r15 - %r8
+ # A[8] x A[1]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %r14
+ adoxq %rcx, %r15
+ # No store %r14
+ # No load %rbx - %r9
+ # A[9] x A[1]
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %r15
+ adoxq %rcx, %rbx
+ # No store %r15
+ movq 96(%rdi), %r8
+ # A[10] x A[1]
+ mulxq 80(%rsi), %rax, %rcx
+ adcxq %rax, %rbx
+ adoxq %rcx, %r8
+ # No store %rbx
+ movq %r10, %r9
+ # A[11] x A[1]
+ mulxq 88(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 96(%rdi)
+ movq %r10, %r8
+ # A[11] x A[2]
+ movq 16(%rsi), %rdx
+ mulxq 88(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, 104(%rdi)
+ # Carry
+ adcxq %r11, %r8
+ movq %r10, %r11
+ adcxq %r10, %r11
+ adoxq %r10, %r11
+ movq %r8, 112(%rdi)
+ # Diagonal 3
+ movq 40(%rbp), %r8
+ movq 48(%rbp), %r9
+ # A[3] x A[2]
+ mulxq 24(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 40(%rbp)
+ # No load %r12 - %r8
+ # A[4] x A[2]
+ mulxq 32(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r12
+ movq %r9, 48(%rbp)
+ # No load %r13 - %r9
+ # A[5] x A[2]
+ mulxq 40(%rsi), %rax, %rcx
+ adcxq %rax, %r12
+ adoxq %rcx, %r13
+ # No store %r12
+ # No load %r14 - %r8
+ # A[6] x A[2]
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %r13
+ adoxq %rcx, %r14
+ # No store %r13
+ # No load %r15 - %r9
+ # A[7] x A[2]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r14
+ adoxq %rcx, %r15
+ # No store %r14
+ # No load %rbx - %r8
+ # A[8] x A[2]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %r15
+ adoxq %rcx, %rbx
+ # No store %r15
+ movq 96(%rdi), %r9
+ # A[9] x A[2]
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %rbx
+ adoxq %rcx, %r9
+ # No store %rbx
+ movq 104(%rdi), %r8
+ # A[10] x A[2]
+ mulxq 80(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, 96(%rdi)
+ movq 112(%rdi), %r9
+ # A[10] x A[3]
+ movq 80(%rsi), %rdx
+ mulxq 24(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 104(%rdi)
+ movq %r10, %r8
+ # A[10] x A[4]
+ mulxq 32(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, 112(%rdi)
+ movq %r10, %r9
+ # A[10] x A[5]
+ mulxq 40(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 120(%rdi)
+ # Carry
+ adcxq %r11, %r9
+ movq %r10, %r11
+ adcxq %r10, %r11
+ adoxq %r10, %r11
+ movq %r9, 128(%rdi)
+ # Diagonal 4
+ # No load %r12 - %r9
+ # No load %r13 - %r8
+ # A[4] x A[3]
+ movq 24(%rsi), %rdx
+ mulxq 32(%rsi), %rax, %rcx
+ adcxq %rax, %r12
+ adoxq %rcx, %r13
+ # No store %r12
+ # No load %r14 - %r9
+ # A[5] x A[3]
+ mulxq 40(%rsi), %rax, %rcx
+ adcxq %rax, %r13
+ adoxq %rcx, %r14
+ # No store %r13
+ # No load %r15 - %r8
+ # A[6] x A[3]
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %r14
+ adoxq %rcx, %r15
+ # No store %r14
+ # No load %rbx - %r9
+ # A[7] x A[3]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r15
+ adoxq %rcx, %rbx
+ # No store %r15
+ movq 96(%rdi), %r8
+ # A[8] x A[3]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %rbx
+ adoxq %rcx, %r8
+ # No store %rbx
+ movq 104(%rdi), %r9
+ # A[9] x A[3]
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 96(%rdi)
+ movq 112(%rdi), %r8
+ # A[9] x A[4]
+ movq 72(%rsi), %rdx
+ mulxq 32(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, 104(%rdi)
+ movq 120(%rdi), %r9
+ # A[9] x A[5]
+ mulxq 40(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 112(%rdi)
+ movq 128(%rdi), %r8
+ # A[9] x A[6]
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, 120(%rdi)
+ movq %r10, %r9
+ # A[9] x A[7]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 128(%rdi)
+ movq %r10, %r8
+ # A[9] x A[8]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, 136(%rdi)
+ # Carry
+ adcxq %r11, %r8
+ movq %r10, %r11
+ adcxq %r10, %r11
+ adoxq %r10, %r11
+ movq %r8, 144(%rdi)
+ # Diagonal 5
+ # No load %r14 - %r8
+ # No load %r15 - %r9
+ # A[5] x A[4]
+ movq 32(%rsi), %rdx
+ mulxq 40(%rsi), %rax, %rcx
+ adcxq %rax, %r14
+ adoxq %rcx, %r15
+ # No store %r14
+ # No load %rbx - %r8
+ # A[6] x A[4]
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %r15
+ adoxq %rcx, %rbx
+ # No store %r15
+ movq 96(%rdi), %r9
+ # A[7] x A[4]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %rbx
+ adoxq %rcx, %r9
+ # No store %rbx
+ movq 104(%rdi), %r8
+ # A[8] x A[4]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, 96(%rdi)
+ movq 112(%rdi), %r9
+ # A[8] x A[5]
+ movq 64(%rsi), %rdx
+ mulxq 40(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 104(%rdi)
+ movq 120(%rdi), %r8
+ # A[8] x A[6]
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, 112(%rdi)
+ movq 128(%rdi), %r9
+ # A[8] x A[7]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 120(%rdi)
+ movq 136(%rdi), %r8
+ # A[10] x A[6]
+ movq 80(%rsi), %rdx
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, 128(%rdi)
+ movq 144(%rdi), %r9
+ # A[10] x A[7]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 136(%rdi)
+ movq %r10, %r8
+ # A[10] x A[8]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, 144(%rdi)
+ movq %r10, %r9
+ # A[10] x A[9]
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 152(%rdi)
+ # Carry
+ adcxq %r11, %r9
+ movq %r10, %r11
+ adcxq %r10, %r11
+ adoxq %r10, %r11
+ movq %r9, 160(%rdi)
+ # Diagonal 6
+ # No load %rbx - %r9
+ movq 96(%rdi), %r8
+ # A[6] x A[5]
+ movq 40(%rsi), %rdx
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %rbx
+ adoxq %rcx, %r8
+ # No store %rbx
+ movq 104(%rdi), %r9
+ # A[7] x A[5]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 96(%rdi)
+ movq 112(%rdi), %r8
+ # A[7] x A[6]
+ movq 48(%rsi), %rdx
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, 104(%rdi)
+ movq 120(%rdi), %r9
+ # A[11] x A[3]
+ movq 88(%rsi), %rdx
+ mulxq 24(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 112(%rdi)
+ movq 128(%rdi), %r8
+ # A[11] x A[4]
+ mulxq 32(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, 120(%rdi)
+ movq 136(%rdi), %r9
+ # A[11] x A[5]
+ mulxq 40(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 128(%rdi)
+ movq 144(%rdi), %r8
+ # A[11] x A[6]
+ mulxq 48(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, 136(%rdi)
+ movq 152(%rdi), %r9
+ # A[11] x A[7]
+ mulxq 56(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 144(%rdi)
+ movq 160(%rdi), %r8
+ # A[11] x A[8]
+ mulxq 64(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, 152(%rdi)
+ movq %r10, %r9
+ # A[11] x A[9]
+ mulxq 72(%rsi), %rax, %rcx
+ adcxq %rax, %r8
+ adoxq %rcx, %r9
+ movq %r8, 160(%rdi)
+ movq %r10, %r8
+ # A[11] x A[10]
+ mulxq 80(%rsi), %rax, %rcx
+ adcxq %rax, %r9
+ adoxq %rcx, %r8
+ movq %r9, 168(%rdi)
+ # Carry
+ adcxq %r11, %r8
+ movq %r10, %r11
+ adcxq %r10, %r11
+ adoxq %r10, %r11
+ movq %r8, 176(%rdi)
+ movq %r11, 184(%rdi)
+ # Double and Add in A[i] x A[i]
+ movq 8(%rbp), %r9
+ # A[0] x A[0]
+ movq (%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ movq %rax, (%rbp)
+ adoxq %r9, %r9
+ adcxq %rcx, %r9
+ movq %r9, 8(%rbp)
+ movq 16(%rbp), %r8
+ movq 24(%rbp), %r9
+ # A[1] x A[1]
+ movq 8(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 16(%rbp)
+ movq %r9, 24(%rbp)
+ movq 32(%rbp), %r8
+ movq 40(%rbp), %r9
+ # A[2] x A[2]
+ movq 16(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 32(%rbp)
+ movq %r9, 40(%rbp)
+ movq 48(%rbp), %r8
+ # A[3] x A[3]
+ movq 24(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r12, %r12
+ adcxq %rax, %r8
+ adcxq %rcx, %r12
+ movq %r8, 48(%rbp)
+ # A[4] x A[4]
+ movq 32(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r13, %r13
+ adoxq %r14, %r14
+ adcxq %rax, %r13
+ adcxq %rcx, %r14
+ # A[5] x A[5]
+ movq 40(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r15, %r15
+ adoxq %rbx, %rbx
+ adcxq %rax, %r15
+ adcxq %rcx, %rbx
+ movq 96(%rdi), %r8
+ movq 104(%rdi), %r9
+ # A[6] x A[6]
+ movq 48(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 96(%rdi)
+ movq %r9, 104(%rdi)
+ movq 112(%rdi), %r8
+ movq 120(%rdi), %r9
+ # A[7] x A[7]
+ movq 56(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 112(%rdi)
+ movq %r9, 120(%rdi)
+ movq 128(%rdi), %r8
+ movq 136(%rdi), %r9
+ # A[8] x A[8]
+ movq 64(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 128(%rdi)
+ movq %r9, 136(%rdi)
+ movq 144(%rdi), %r8
+ movq 152(%rdi), %r9
+ # A[9] x A[9]
+ movq 72(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 144(%rdi)
+ movq %r9, 152(%rdi)
+ movq 160(%rdi), %r8
+ movq 168(%rdi), %r9
+ # A[10] x A[10]
+ movq 80(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 160(%rdi)
+ movq %r9, 168(%rdi)
+ movq 176(%rdi), %r8
+ movq 184(%rdi), %r9
+ # A[11] x A[11]
+ movq 88(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r8, %r8
+ adoxq %r9, %r9
+ adcxq %rax, %r8
+ adcxq %rcx, %r9
+ movq %r8, 176(%rdi)
+ movq %r9, 184(%rdi)
+ movq %r12, 56(%rdi)
+ movq %r13, 64(%rdi)
+ movq %r14, 72(%rdi)
+ movq %r15, 80(%rdi)
+ movq %rbx, 88(%rdi)
+ cmpq %rdi, %rsi
+ jne L_end_3072_sqr_avx2_12
+ vmovdqu (%rbp), %xmm0
+ vmovups %xmm0, (%rdi)
+ vmovdqu 16(%rbp), %xmm0
+ vmovups %xmm0, 16(%rdi)
+ vmovdqu 32(%rbp), %xmm0
+ vmovups %xmm0, 32(%rdi)
+ movq 48(%rbp), %rax
+ movq %rax, 48(%rdi)
+L_end_3072_sqr_avx2_12:
+ addq $96, %rsp
+ pop %rbx
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_sqr_avx2_12,.-sp_3072_sqr_avx2_12
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_3072_add_12
+.type sp_3072_add_12,@function
+.align 16
+sp_3072_add_12:
+#else
+.globl _sp_3072_add_12
+.p2align 4
+_sp_3072_add_12:
+#endif /* __APPLE__ */
+ # Add
+ movq (%rsi), %rcx
+ xorq %rax, %rax
+ addq (%rdx), %rcx
+ movq 8(%rsi), %r8
+ movq %rcx, (%rdi)
+ adcq 8(%rdx), %r8
+ movq 16(%rsi), %rcx
+ movq %r8, 8(%rdi)
+ adcq 16(%rdx), %rcx
+ movq 24(%rsi), %r8
+ movq %rcx, 16(%rdi)
+ adcq 24(%rdx), %r8
+ movq 32(%rsi), %rcx
+ movq %r8, 24(%rdi)
+ adcq 32(%rdx), %rcx
+ movq 40(%rsi), %r8
+ movq %rcx, 32(%rdi)
+ adcq 40(%rdx), %r8
+ movq 48(%rsi), %rcx
+ movq %r8, 40(%rdi)
+ adcq 48(%rdx), %rcx
+ movq 56(%rsi), %r8
+ movq %rcx, 48(%rdi)
+ adcq 56(%rdx), %r8
+ movq 64(%rsi), %rcx
+ movq %r8, 56(%rdi)
+ adcq 64(%rdx), %rcx
+ movq 72(%rsi), %r8
+ movq %rcx, 64(%rdi)
+ adcq 72(%rdx), %r8
+ movq 80(%rsi), %rcx
+ movq %r8, 72(%rdi)
+ adcq 80(%rdx), %rcx
+ movq 88(%rsi), %r8
+ movq %rcx, 80(%rdi)
+ adcq 88(%rdx), %r8
+ movq %r8, 88(%rdi)
+ adcq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_add_12,.-sp_3072_add_12
+#endif /* __APPLE__ */
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_3072_sub_in_place_24
+.type sp_3072_sub_in_place_24,@function
+.align 16
+sp_3072_sub_in_place_24:
+#else
+.globl _sp_3072_sub_in_place_24
+.p2align 4
+_sp_3072_sub_in_place_24:
+#endif /* __APPLE__ */
+ movq (%rdi), %rdx
+ xorq %rax, %rax
+ subq (%rsi), %rdx
+ movq 8(%rdi), %rcx
+ movq %rdx, (%rdi)
+ sbbq 8(%rsi), %rcx
+ movq 16(%rdi), %rdx
+ movq %rcx, 8(%rdi)
+ sbbq 16(%rsi), %rdx
+ movq 24(%rdi), %rcx
+ movq %rdx, 16(%rdi)
+ sbbq 24(%rsi), %rcx
+ movq 32(%rdi), %rdx
+ movq %rcx, 24(%rdi)
+ sbbq 32(%rsi), %rdx
+ movq 40(%rdi), %rcx
+ movq %rdx, 32(%rdi)
+ sbbq 40(%rsi), %rcx
+ movq 48(%rdi), %rdx
+ movq %rcx, 40(%rdi)
+ sbbq 48(%rsi), %rdx
+ movq 56(%rdi), %rcx
+ movq %rdx, 48(%rdi)
+ sbbq 56(%rsi), %rcx
+ movq 64(%rdi), %rdx
+ movq %rcx, 56(%rdi)
+ sbbq 64(%rsi), %rdx
+ movq 72(%rdi), %rcx
+ movq %rdx, 64(%rdi)
+ sbbq 72(%rsi), %rcx
+ movq 80(%rdi), %rdx
+ movq %rcx, 72(%rdi)
+ sbbq 80(%rsi), %rdx
+ movq 88(%rdi), %rcx
+ movq %rdx, 80(%rdi)
+ sbbq 88(%rsi), %rcx
+ movq 96(%rdi), %rdx
+ movq %rcx, 88(%rdi)
+ sbbq 96(%rsi), %rdx
+ movq 104(%rdi), %rcx
+ movq %rdx, 96(%rdi)
+ sbbq 104(%rsi), %rcx
+ movq 112(%rdi), %rdx
+ movq %rcx, 104(%rdi)
+ sbbq 112(%rsi), %rdx
+ movq 120(%rdi), %rcx
+ movq %rdx, 112(%rdi)
+ sbbq 120(%rsi), %rcx
+ movq 128(%rdi), %rdx
+ movq %rcx, 120(%rdi)
+ sbbq 128(%rsi), %rdx
+ movq 136(%rdi), %rcx
+ movq %rdx, 128(%rdi)
+ sbbq 136(%rsi), %rcx
+ movq 144(%rdi), %rdx
+ movq %rcx, 136(%rdi)
+ sbbq 144(%rsi), %rdx
+ movq 152(%rdi), %rcx
+ movq %rdx, 144(%rdi)
+ sbbq 152(%rsi), %rcx
+ movq 160(%rdi), %rdx
+ movq %rcx, 152(%rdi)
+ sbbq 160(%rsi), %rdx
+ movq 168(%rdi), %rcx
+ movq %rdx, 160(%rdi)
+ sbbq 168(%rsi), %rcx
+ movq 176(%rdi), %rdx
+ movq %rcx, 168(%rdi)
+ sbbq 176(%rsi), %rdx
+ movq 184(%rdi), %rcx
+ movq %rdx, 176(%rdi)
+ sbbq 184(%rsi), %rcx
+ movq %rcx, 184(%rdi)
+ sbbq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_sub_in_place_24,.-sp_3072_sub_in_place_24
+#endif /* __APPLE__ */
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_3072_add_24
+.type sp_3072_add_24,@function
+.align 16
+sp_3072_add_24:
+#else
+.globl _sp_3072_add_24
+.p2align 4
+_sp_3072_add_24:
+#endif /* __APPLE__ */
+ # Add
+ movq (%rsi), %rcx
+ xorq %rax, %rax
+ addq (%rdx), %rcx
+ movq 8(%rsi), %r8
+ movq %rcx, (%rdi)
+ adcq 8(%rdx), %r8
+ movq 16(%rsi), %rcx
+ movq %r8, 8(%rdi)
+ adcq 16(%rdx), %rcx
+ movq 24(%rsi), %r8
+ movq %rcx, 16(%rdi)
+ adcq 24(%rdx), %r8
+ movq 32(%rsi), %rcx
+ movq %r8, 24(%rdi)
+ adcq 32(%rdx), %rcx
+ movq 40(%rsi), %r8
+ movq %rcx, 32(%rdi)
+ adcq 40(%rdx), %r8
+ movq 48(%rsi), %rcx
+ movq %r8, 40(%rdi)
+ adcq 48(%rdx), %rcx
+ movq 56(%rsi), %r8
+ movq %rcx, 48(%rdi)
+ adcq 56(%rdx), %r8
+ movq 64(%rsi), %rcx
+ movq %r8, 56(%rdi)
+ adcq 64(%rdx), %rcx
+ movq 72(%rsi), %r8
+ movq %rcx, 64(%rdi)
+ adcq 72(%rdx), %r8
+ movq 80(%rsi), %rcx
+ movq %r8, 72(%rdi)
+ adcq 80(%rdx), %rcx
+ movq 88(%rsi), %r8
+ movq %rcx, 80(%rdi)
+ adcq 88(%rdx), %r8
+ movq 96(%rsi), %rcx
+ movq %r8, 88(%rdi)
+ adcq 96(%rdx), %rcx
+ movq 104(%rsi), %r8
+ movq %rcx, 96(%rdi)
+ adcq 104(%rdx), %r8
+ movq 112(%rsi), %rcx
+ movq %r8, 104(%rdi)
+ adcq 112(%rdx), %rcx
+ movq 120(%rsi), %r8
+ movq %rcx, 112(%rdi)
+ adcq 120(%rdx), %r8
+ movq 128(%rsi), %rcx
+ movq %r8, 120(%rdi)
+ adcq 128(%rdx), %rcx
+ movq 136(%rsi), %r8
+ movq %rcx, 128(%rdi)
+ adcq 136(%rdx), %r8
+ movq 144(%rsi), %rcx
+ movq %r8, 136(%rdi)
+ adcq 144(%rdx), %rcx
+ movq 152(%rsi), %r8
+ movq %rcx, 144(%rdi)
+ adcq 152(%rdx), %r8
+ movq 160(%rsi), %rcx
+ movq %r8, 152(%rdi)
+ adcq 160(%rdx), %rcx
+ movq 168(%rsi), %r8
+ movq %rcx, 160(%rdi)
+ adcq 168(%rdx), %r8
+ movq 176(%rsi), %rcx
+ movq %r8, 168(%rdi)
+ adcq 176(%rdx), %rcx
+ movq 184(%rsi), %r8
+ movq %rcx, 176(%rdi)
+ adcq 184(%rdx), %r8
+ movq %r8, 184(%rdi)
+ adcq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_add_24,.-sp_3072_add_24
+#endif /* __APPLE__ */
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_3072_mul_24
+.type sp_3072_mul_24,@function
+.align 16
+sp_3072_mul_24:
+#else
+.globl _sp_3072_mul_24
+.p2align 4
+_sp_3072_mul_24:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ subq $616, %rsp
+ movq %rdi, 576(%rsp)
+ movq %rsi, 584(%rsp)
+ movq %rdx, 592(%rsp)
+ leaq 384(%rsp), %r10
+ leaq 96(%rsi), %r12
+ # Add
+ movq (%rsi), %rax
+ xorq %r13, %r13
+ addq (%r12), %rax
+ movq 8(%rsi), %rcx
+ movq %rax, (%r10)
+ adcq 8(%r12), %rcx
+ movq 16(%rsi), %r8
+ movq %rcx, 8(%r10)
+ adcq 16(%r12), %r8
+ movq 24(%rsi), %rax
+ movq %r8, 16(%r10)
+ adcq 24(%r12), %rax
+ movq 32(%rsi), %rcx
+ movq %rax, 24(%r10)
+ adcq 32(%r12), %rcx
+ movq 40(%rsi), %r8
+ movq %rcx, 32(%r10)
+ adcq 40(%r12), %r8
+ movq 48(%rsi), %rax
+ movq %r8, 40(%r10)
+ adcq 48(%r12), %rax
+ movq 56(%rsi), %rcx
+ movq %rax, 48(%r10)
+ adcq 56(%r12), %rcx
+ movq 64(%rsi), %r8
+ movq %rcx, 56(%r10)
+ adcq 64(%r12), %r8
+ movq 72(%rsi), %rax
+ movq %r8, 64(%r10)
+ adcq 72(%r12), %rax
+ movq 80(%rsi), %rcx
+ movq %rax, 72(%r10)
+ adcq 80(%r12), %rcx
+ movq 88(%rsi), %r8
+ movq %rcx, 80(%r10)
+ adcq 88(%r12), %r8
+ movq %r8, 88(%r10)
+ adcq $0, %r13
+ movq %r13, 600(%rsp)
+ leaq 480(%rsp), %r11
+ leaq 96(%rdx), %r12
+ # Add
+ movq (%rdx), %rax
+ xorq %r14, %r14
+ addq (%r12), %rax
+ movq 8(%rdx), %rcx
+ movq %rax, (%r11)
+ adcq 8(%r12), %rcx
+ movq 16(%rdx), %r8
+ movq %rcx, 8(%r11)
+ adcq 16(%r12), %r8
+ movq 24(%rdx), %rax
+ movq %r8, 16(%r11)
+ adcq 24(%r12), %rax
+ movq 32(%rdx), %rcx
+ movq %rax, 24(%r11)
+ adcq 32(%r12), %rcx
+ movq 40(%rdx), %r8
+ movq %rcx, 32(%r11)
+ adcq 40(%r12), %r8
+ movq 48(%rdx), %rax
+ movq %r8, 40(%r11)
+ adcq 48(%r12), %rax
+ movq 56(%rdx), %rcx
+ movq %rax, 48(%r11)
+ adcq 56(%r12), %rcx
+ movq 64(%rdx), %r8
+ movq %rcx, 56(%r11)
+ adcq 64(%r12), %r8
+ movq 72(%rdx), %rax
+ movq %r8, 64(%r11)
+ adcq 72(%r12), %rax
+ movq 80(%rdx), %rcx
+ movq %rax, 72(%r11)
+ adcq 80(%r12), %rcx
+ movq 88(%rdx), %r8
+ movq %rcx, 80(%r11)
+ adcq 88(%r12), %r8
+ movq %r8, 88(%r11)
+ adcq $0, %r14
+ movq %r14, 608(%rsp)
+ movq %r11, %rdx
+ movq %r10, %rsi
+ movq %rsp, %rdi
+#ifndef __APPLE__
+ callq sp_3072_mul_12@plt
+#else
+ callq _sp_3072_mul_12
+#endif /* __APPLE__ */
+ movq 592(%rsp), %rdx
+ movq 584(%rsp), %rsi
+ leaq 192(%rsp), %rdi
+ addq $96, %rdx
+ addq $96, %rsi
+#ifndef __APPLE__
+ callq sp_3072_mul_12@plt
+#else
+ callq _sp_3072_mul_12
+#endif /* __APPLE__ */
+ movq 592(%rsp), %rdx
+ movq 584(%rsp), %rsi
+ movq 576(%rsp), %rdi
+#ifndef __APPLE__
+ callq sp_3072_mul_12@plt
+#else
+ callq _sp_3072_mul_12
+#endif /* __APPLE__ */
+ movq 600(%rsp), %r13
+ movq 608(%rsp), %r14
+ movq 576(%rsp), %r15
+ movq %r13, %r9
+ leaq 384(%rsp), %r10
+ leaq 480(%rsp), %r11
+ andq %r14, %r9
+ negq %r13
+ negq %r14
+ addq $192, %r15
+ movq (%r10), %rax
+ movq (%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, (%r10)
+ movq %rcx, (%r11)
+ movq 8(%r10), %rax
+ movq 8(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 8(%r10)
+ movq %rcx, 8(%r11)
+ movq 16(%r10), %rax
+ movq 16(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 16(%r10)
+ movq %rcx, 16(%r11)
+ movq 24(%r10), %rax
+ movq 24(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 24(%r10)
+ movq %rcx, 24(%r11)
+ movq 32(%r10), %rax
+ movq 32(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 32(%r10)
+ movq %rcx, 32(%r11)
+ movq 40(%r10), %rax
+ movq 40(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 40(%r10)
+ movq %rcx, 40(%r11)
+ movq 48(%r10), %rax
+ movq 48(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 48(%r10)
+ movq %rcx, 48(%r11)
+ movq 56(%r10), %rax
+ movq 56(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 56(%r10)
+ movq %rcx, 56(%r11)
+ movq 64(%r10), %rax
+ movq 64(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 64(%r10)
+ movq %rcx, 64(%r11)
+ movq 72(%r10), %rax
+ movq 72(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 72(%r10)
+ movq %rcx, 72(%r11)
+ movq 80(%r10), %rax
+ movq 80(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 80(%r10)
+ movq %rcx, 80(%r11)
+ movq 88(%r10), %rax
+ movq 88(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 88(%r10)
+ movq %rcx, 88(%r11)
+ movq (%r10), %rax
+ addq (%r11), %rax
+ movq 8(%r10), %rcx
+ movq %rax, (%r15)
+ adcq 8(%r11), %rcx
+ movq 16(%r10), %r8
+ movq %rcx, 8(%r15)
+ adcq 16(%r11), %r8
+ movq 24(%r10), %rax
+ movq %r8, 16(%r15)
+ adcq 24(%r11), %rax
+ movq 32(%r10), %rcx
+ movq %rax, 24(%r15)
+ adcq 32(%r11), %rcx
+ movq 40(%r10), %r8
+ movq %rcx, 32(%r15)
+ adcq 40(%r11), %r8
+ movq 48(%r10), %rax
+ movq %r8, 40(%r15)
+ adcq 48(%r11), %rax
+ movq 56(%r10), %rcx
+ movq %rax, 48(%r15)
+ adcq 56(%r11), %rcx
+ movq 64(%r10), %r8
+ movq %rcx, 56(%r15)
+ adcq 64(%r11), %r8
+ movq 72(%r10), %rax
+ movq %r8, 64(%r15)
+ adcq 72(%r11), %rax
+ movq 80(%r10), %rcx
+ movq %rax, 72(%r15)
+ adcq 80(%r11), %rcx
+ movq 88(%r10), %r8
+ movq %rcx, 80(%r15)
+ adcq 88(%r11), %r8
+ movq %r8, 88(%r15)
+ adcq $0, %r9
+ leaq 192(%rsp), %r11
+ movq %rsp, %r10
+ movq (%r10), %rax
+ subq (%r11), %rax
+ movq 8(%r10), %rcx
+ movq %rax, (%r10)
+ sbbq 8(%r11), %rcx
+ movq 16(%r10), %r8
+ movq %rcx, 8(%r10)
+ sbbq 16(%r11), %r8
+ movq 24(%r10), %rax
+ movq %r8, 16(%r10)
+ sbbq 24(%r11), %rax
+ movq 32(%r10), %rcx
+ movq %rax, 24(%r10)
+ sbbq 32(%r11), %rcx
+ movq 40(%r10), %r8
+ movq %rcx, 32(%r10)
+ sbbq 40(%r11), %r8
+ movq 48(%r10), %rax
+ movq %r8, 40(%r10)
+ sbbq 48(%r11), %rax
+ movq 56(%r10), %rcx
+ movq %rax, 48(%r10)
+ sbbq 56(%r11), %rcx
+ movq 64(%r10), %r8
+ movq %rcx, 56(%r10)
+ sbbq 64(%r11), %r8
+ movq 72(%r10), %rax
+ movq %r8, 64(%r10)
+ sbbq 72(%r11), %rax
+ movq 80(%r10), %rcx
+ movq %rax, 72(%r10)
+ sbbq 80(%r11), %rcx
+ movq 88(%r10), %r8
+ movq %rcx, 80(%r10)
+ sbbq 88(%r11), %r8
+ movq 96(%r10), %rax
+ movq %r8, 88(%r10)
+ sbbq 96(%r11), %rax
+ movq 104(%r10), %rcx
+ movq %rax, 96(%r10)
+ sbbq 104(%r11), %rcx
+ movq 112(%r10), %r8
+ movq %rcx, 104(%r10)
+ sbbq 112(%r11), %r8
+ movq 120(%r10), %rax
+ movq %r8, 112(%r10)
+ sbbq 120(%r11), %rax
+ movq 128(%r10), %rcx
+ movq %rax, 120(%r10)
+ sbbq 128(%r11), %rcx
+ movq 136(%r10), %r8
+ movq %rcx, 128(%r10)
+ sbbq 136(%r11), %r8
+ movq 144(%r10), %rax
+ movq %r8, 136(%r10)
+ sbbq 144(%r11), %rax
+ movq 152(%r10), %rcx
+ movq %rax, 144(%r10)
+ sbbq 152(%r11), %rcx
+ movq 160(%r10), %r8
+ movq %rcx, 152(%r10)
+ sbbq 160(%r11), %r8
+ movq 168(%r10), %rax
+ movq %r8, 160(%r10)
+ sbbq 168(%r11), %rax
+ movq 176(%r10), %rcx
+ movq %rax, 168(%r10)
+ sbbq 176(%r11), %rcx
+ movq 184(%r10), %r8
+ movq %rcx, 176(%r10)
+ sbbq 184(%r11), %r8
+ movq %r8, 184(%r10)
+ sbbq $0, %r9
+ movq (%r10), %rax
+ subq (%rdi), %rax
+ movq 8(%r10), %rcx
+ movq %rax, (%r10)
+ sbbq 8(%rdi), %rcx
+ movq 16(%r10), %r8
+ movq %rcx, 8(%r10)
+ sbbq 16(%rdi), %r8
+ movq 24(%r10), %rax
+ movq %r8, 16(%r10)
+ sbbq 24(%rdi), %rax
+ movq 32(%r10), %rcx
+ movq %rax, 24(%r10)
+ sbbq 32(%rdi), %rcx
+ movq 40(%r10), %r8
+ movq %rcx, 32(%r10)
+ sbbq 40(%rdi), %r8
+ movq 48(%r10), %rax
+ movq %r8, 40(%r10)
+ sbbq 48(%rdi), %rax
+ movq 56(%r10), %rcx
+ movq %rax, 48(%r10)
+ sbbq 56(%rdi), %rcx
+ movq 64(%r10), %r8
+ movq %rcx, 56(%r10)
+ sbbq 64(%rdi), %r8
+ movq 72(%r10), %rax
+ movq %r8, 64(%r10)
+ sbbq 72(%rdi), %rax
+ movq 80(%r10), %rcx
+ movq %rax, 72(%r10)
+ sbbq 80(%rdi), %rcx
+ movq 88(%r10), %r8
+ movq %rcx, 80(%r10)
+ sbbq 88(%rdi), %r8
+ movq 96(%r10), %rax
+ movq %r8, 88(%r10)
+ sbbq 96(%rdi), %rax
+ movq 104(%r10), %rcx
+ movq %rax, 96(%r10)
+ sbbq 104(%rdi), %rcx
+ movq 112(%r10), %r8
+ movq %rcx, 104(%r10)
+ sbbq 112(%rdi), %r8
+ movq 120(%r10), %rax
+ movq %r8, 112(%r10)
+ sbbq 120(%rdi), %rax
+ movq 128(%r10), %rcx
+ movq %rax, 120(%r10)
+ sbbq 128(%rdi), %rcx
+ movq 136(%r10), %r8
+ movq %rcx, 128(%r10)
+ sbbq 136(%rdi), %r8
+ movq 144(%r10), %rax
+ movq %r8, 136(%r10)
+ sbbq 144(%rdi), %rax
+ movq 152(%r10), %rcx
+ movq %rax, 144(%r10)
+ sbbq 152(%rdi), %rcx
+ movq 160(%r10), %r8
+ movq %rcx, 152(%r10)
+ sbbq 160(%rdi), %r8
+ movq 168(%r10), %rax
+ movq %r8, 160(%r10)
+ sbbq 168(%rdi), %rax
+ movq 176(%r10), %rcx
+ movq %rax, 168(%r10)
+ sbbq 176(%rdi), %rcx
+ movq 184(%r10), %r8
+ movq %rcx, 176(%r10)
+ sbbq 184(%rdi), %r8
+ movq %r8, 184(%r10)
+ sbbq $0, %r9
+ subq $96, %r15
+ # Add
+ movq (%r15), %rax
+ addq (%r10), %rax
+ movq 8(%r15), %rcx
+ movq %rax, (%r15)
+ adcq 8(%r10), %rcx
+ movq 16(%r15), %r8
+ movq %rcx, 8(%r15)
+ adcq 16(%r10), %r8
+ movq 24(%r15), %rax
+ movq %r8, 16(%r15)
+ adcq 24(%r10), %rax
+ movq 32(%r15), %rcx
+ movq %rax, 24(%r15)
+ adcq 32(%r10), %rcx
+ movq 40(%r15), %r8
+ movq %rcx, 32(%r15)
+ adcq 40(%r10), %r8
+ movq 48(%r15), %rax
+ movq %r8, 40(%r15)
+ adcq 48(%r10), %rax
+ movq 56(%r15), %rcx
+ movq %rax, 48(%r15)
+ adcq 56(%r10), %rcx
+ movq 64(%r15), %r8
+ movq %rcx, 56(%r15)
+ adcq 64(%r10), %r8
+ movq 72(%r15), %rax
+ movq %r8, 64(%r15)
+ adcq 72(%r10), %rax
+ movq 80(%r15), %rcx
+ movq %rax, 72(%r15)
+ adcq 80(%r10), %rcx
+ movq 88(%r15), %r8
+ movq %rcx, 80(%r15)
+ adcq 88(%r10), %r8
+ movq 96(%r15), %rax
+ movq %r8, 88(%r15)
+ adcq 96(%r10), %rax
+ movq 104(%r15), %rcx
+ movq %rax, 96(%r15)
+ adcq 104(%r10), %rcx
+ movq 112(%r15), %r8
+ movq %rcx, 104(%r15)
+ adcq 112(%r10), %r8
+ movq 120(%r15), %rax
+ movq %r8, 112(%r15)
+ adcq 120(%r10), %rax
+ movq 128(%r15), %rcx
+ movq %rax, 120(%r15)
+ adcq 128(%r10), %rcx
+ movq 136(%r15), %r8
+ movq %rcx, 128(%r15)
+ adcq 136(%r10), %r8
+ movq 144(%r15), %rax
+ movq %r8, 136(%r15)
+ adcq 144(%r10), %rax
+ movq 152(%r15), %rcx
+ movq %rax, 144(%r15)
+ adcq 152(%r10), %rcx
+ movq 160(%r15), %r8
+ movq %rcx, 152(%r15)
+ adcq 160(%r10), %r8
+ movq 168(%r15), %rax
+ movq %r8, 160(%r15)
+ adcq 168(%r10), %rax
+ movq 176(%r15), %rcx
+ movq %rax, 168(%r15)
+ adcq 176(%r10), %rcx
+ movq 184(%r15), %r8
+ movq %rcx, 176(%r15)
+ adcq 184(%r10), %r8
+ movq %r8, 184(%r15)
+ adcq $0, %r9
+ movq %r9, 288(%rdi)
+ addq $96, %r15
+ # Add
+ movq (%r15), %rax
+ addq (%r11), %rax
+ movq 8(%r15), %rcx
+ movq %rax, (%r15)
+ adcq 8(%r11), %rcx
+ movq 16(%r15), %r8
+ movq %rcx, 8(%r15)
+ adcq 16(%r11), %r8
+ movq 24(%r15), %rax
+ movq %r8, 16(%r15)
+ adcq 24(%r11), %rax
+ movq 32(%r15), %rcx
+ movq %rax, 24(%r15)
+ adcq 32(%r11), %rcx
+ movq 40(%r15), %r8
+ movq %rcx, 32(%r15)
+ adcq 40(%r11), %r8
+ movq 48(%r15), %rax
+ movq %r8, 40(%r15)
+ adcq 48(%r11), %rax
+ movq 56(%r15), %rcx
+ movq %rax, 48(%r15)
+ adcq 56(%r11), %rcx
+ movq 64(%r15), %r8
+ movq %rcx, 56(%r15)
+ adcq 64(%r11), %r8
+ movq 72(%r15), %rax
+ movq %r8, 64(%r15)
+ adcq 72(%r11), %rax
+ movq 80(%r15), %rcx
+ movq %rax, 72(%r15)
+ adcq 80(%r11), %rcx
+ movq 88(%r15), %r8
+ movq %rcx, 80(%r15)
+ adcq 88(%r11), %r8
+ movq 96(%r15), %rax
+ movq %r8, 88(%r15)
+ adcq 96(%r11), %rax
+ movq %rax, 96(%r15)
+ # Add to zero
+ movq 104(%r11), %rax
+ adcq $0, %rax
+ movq 112(%r11), %rcx
+ movq %rax, 104(%r15)
+ adcq $0, %rcx
+ movq 120(%r11), %r8
+ movq %rcx, 112(%r15)
+ adcq $0, %r8
+ movq 128(%r11), %rax
+ movq %r8, 120(%r15)
+ adcq $0, %rax
+ movq 136(%r11), %rcx
+ movq %rax, 128(%r15)
+ adcq $0, %rcx
+ movq 144(%r11), %r8
+ movq %rcx, 136(%r15)
+ adcq $0, %r8
+ movq 152(%r11), %rax
+ movq %r8, 144(%r15)
+ adcq $0, %rax
+ movq 160(%r11), %rcx
+ movq %rax, 152(%r15)
+ adcq $0, %rcx
+ movq 168(%r11), %r8
+ movq %rcx, 160(%r15)
+ adcq $0, %r8
+ movq 176(%r11), %rax
+ movq %r8, 168(%r15)
+ adcq $0, %rax
+ movq 184(%r11), %rcx
+ movq %rax, 176(%r15)
+ adcq $0, %rcx
+ movq %rcx, 184(%r15)
+ addq $616, %rsp
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_mul_24,.-sp_3072_mul_24
+#endif /* __APPLE__ */
+/* Add a to a into r. (r = a + a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_3072_dbl_12
+.type sp_3072_dbl_12,@function
+.align 16
+sp_3072_dbl_12:
+#else
+.globl _sp_3072_dbl_12
+.p2align 4
+_sp_3072_dbl_12:
+#endif /* __APPLE__ */
+ movq (%rsi), %rdx
+ xorq %rax, %rax
+ addq %rdx, %rdx
+ movq 8(%rsi), %rcx
+ movq %rdx, (%rdi)
+ adcq %rcx, %rcx
+ movq 16(%rsi), %rdx
+ movq %rcx, 8(%rdi)
+ adcq %rdx, %rdx
+ movq 24(%rsi), %rcx
+ movq %rdx, 16(%rdi)
+ adcq %rcx, %rcx
+ movq 32(%rsi), %rdx
+ movq %rcx, 24(%rdi)
+ adcq %rdx, %rdx
+ movq 40(%rsi), %rcx
+ movq %rdx, 32(%rdi)
+ adcq %rcx, %rcx
+ movq 48(%rsi), %rdx
+ movq %rcx, 40(%rdi)
+ adcq %rdx, %rdx
+ movq 56(%rsi), %rcx
+ movq %rdx, 48(%rdi)
+ adcq %rcx, %rcx
+ movq 64(%rsi), %rdx
+ movq %rcx, 56(%rdi)
+ adcq %rdx, %rdx
+ movq 72(%rsi), %rcx
+ movq %rdx, 64(%rdi)
+ adcq %rcx, %rcx
+ movq 80(%rsi), %rdx
+ movq %rcx, 72(%rdi)
+ adcq %rdx, %rdx
+ movq 88(%rsi), %rcx
+ movq %rdx, 80(%rdi)
+ adcq %rcx, %rcx
+ movq %rcx, 88(%rdi)
+ adcq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_dbl_12,.-sp_3072_dbl_12
+#endif /* __APPLE__ */
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_3072_sqr_24
+.type sp_3072_sqr_24,@function
+.align 16
+sp_3072_sqr_24:
+#else
+.globl _sp_3072_sqr_24
+.p2align 4
+_sp_3072_sqr_24:
+#endif /* __APPLE__ */
+ subq $504, %rsp
+ movq %rdi, 480(%rsp)
+ movq %rsi, 488(%rsp)
+ leaq 384(%rsp), %r8
+ leaq 96(%rsi), %r9
+ # Add
+ movq (%rsi), %rdx
+ xorq %rcx, %rcx
+ addq (%r9), %rdx
+ movq 8(%rsi), %rax
+ movq %rdx, (%r8)
+ adcq 8(%r9), %rax
+ movq 16(%rsi), %rdx
+ movq %rax, 8(%r8)
+ adcq 16(%r9), %rdx
+ movq 24(%rsi), %rax
+ movq %rdx, 16(%r8)
+ adcq 24(%r9), %rax
+ movq 32(%rsi), %rdx
+ movq %rax, 24(%r8)
+ adcq 32(%r9), %rdx
+ movq 40(%rsi), %rax
+ movq %rdx, 32(%r8)
+ adcq 40(%r9), %rax
+ movq 48(%rsi), %rdx
+ movq %rax, 40(%r8)
+ adcq 48(%r9), %rdx
+ movq 56(%rsi), %rax
+ movq %rdx, 48(%r8)
+ adcq 56(%r9), %rax
+ movq 64(%rsi), %rdx
+ movq %rax, 56(%r8)
+ adcq 64(%r9), %rdx
+ movq 72(%rsi), %rax
+ movq %rdx, 64(%r8)
+ adcq 72(%r9), %rax
+ movq 80(%rsi), %rdx
+ movq %rax, 72(%r8)
+ adcq 80(%r9), %rdx
+ movq 88(%rsi), %rax
+ movq %rdx, 80(%r8)
+ adcq 88(%r9), %rax
+ movq %rax, 88(%r8)
+ adcq $0, %rcx
+ movq %rcx, 496(%rsp)
+ movq %r8, %rsi
+ movq %rsp, %rdi
+#ifndef __APPLE__
+ callq sp_3072_sqr_12@plt
+#else
+ callq _sp_3072_sqr_12
+#endif /* __APPLE__ */
+ movq 488(%rsp), %rsi
+ leaq 192(%rsp), %rdi
+ addq $96, %rsi
+#ifndef __APPLE__
+ callq sp_3072_sqr_12@plt
+#else
+ callq _sp_3072_sqr_12
+#endif /* __APPLE__ */
+ movq 488(%rsp), %rsi
+ movq 480(%rsp), %rdi
+#ifndef __APPLE__
+ callq sp_3072_sqr_12@plt
+#else
+ callq _sp_3072_sqr_12
+#endif /* __APPLE__ */
+ movq 496(%rsp), %r10
+ movq %rdi, %r9
+ leaq 384(%rsp), %r8
+ movq %r10, %rcx
+ negq %r10
+ addq $192, %r9
+ movq (%r8), %rdx
+ movq 8(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, (%r9)
+ movq %rax, 8(%r9)
+ movq 16(%r8), %rdx
+ movq 24(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 16(%r9)
+ movq %rax, 24(%r9)
+ movq 32(%r8), %rdx
+ movq 40(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 32(%r9)
+ movq %rax, 40(%r9)
+ movq 48(%r8), %rdx
+ movq 56(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 48(%r9)
+ movq %rax, 56(%r9)
+ movq 64(%r8), %rdx
+ movq 72(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 64(%r9)
+ movq %rax, 72(%r9)
+ movq 80(%r8), %rdx
+ movq 88(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 80(%r9)
+ movq %rax, 88(%r9)
+ movq (%r9), %rdx
+ addq %rdx, %rdx
+ movq 8(%r9), %rax
+ movq %rdx, (%r9)
+ adcq %rax, %rax
+ movq 16(%r9), %rdx
+ movq %rax, 8(%r9)
+ adcq %rdx, %rdx
+ movq 24(%r9), %rax
+ movq %rdx, 16(%r9)
+ adcq %rax, %rax
+ movq 32(%r9), %rdx
+ movq %rax, 24(%r9)
+ adcq %rdx, %rdx
+ movq 40(%r9), %rax
+ movq %rdx, 32(%r9)
+ adcq %rax, %rax
+ movq 48(%r9), %rdx
+ movq %rax, 40(%r9)
+ adcq %rdx, %rdx
+ movq 56(%r9), %rax
+ movq %rdx, 48(%r9)
+ adcq %rax, %rax
+ movq 64(%r9), %rdx
+ movq %rax, 56(%r9)
+ adcq %rdx, %rdx
+ movq 72(%r9), %rax
+ movq %rdx, 64(%r9)
+ adcq %rax, %rax
+ movq 80(%r9), %rdx
+ movq %rax, 72(%r9)
+ adcq %rdx, %rdx
+ movq 88(%r9), %rax
+ movq %rdx, 80(%r9)
+ adcq %rax, %rax
+ movq %rax, 88(%r9)
+ adcq $0, %rcx
+ leaq 192(%rsp), %rsi
+ movq %rsp, %r8
+ movq (%r8), %rdx
+ subq (%rsi), %rdx
+ movq 8(%r8), %rax
+ movq %rdx, (%r8)
+ sbbq 8(%rsi), %rax
+ movq 16(%r8), %rdx
+ movq %rax, 8(%r8)
+ sbbq 16(%rsi), %rdx
+ movq 24(%r8), %rax
+ movq %rdx, 16(%r8)
+ sbbq 24(%rsi), %rax
+ movq 32(%r8), %rdx
+ movq %rax, 24(%r8)
+ sbbq 32(%rsi), %rdx
+ movq 40(%r8), %rax
+ movq %rdx, 32(%r8)
+ sbbq 40(%rsi), %rax
+ movq 48(%r8), %rdx
+ movq %rax, 40(%r8)
+ sbbq 48(%rsi), %rdx
+ movq 56(%r8), %rax
+ movq %rdx, 48(%r8)
+ sbbq 56(%rsi), %rax
+ movq 64(%r8), %rdx
+ movq %rax, 56(%r8)
+ sbbq 64(%rsi), %rdx
+ movq 72(%r8), %rax
+ movq %rdx, 64(%r8)
+ sbbq 72(%rsi), %rax
+ movq 80(%r8), %rdx
+ movq %rax, 72(%r8)
+ sbbq 80(%rsi), %rdx
+ movq 88(%r8), %rax
+ movq %rdx, 80(%r8)
+ sbbq 88(%rsi), %rax
+ movq 96(%r8), %rdx
+ movq %rax, 88(%r8)
+ sbbq 96(%rsi), %rdx
+ movq 104(%r8), %rax
+ movq %rdx, 96(%r8)
+ sbbq 104(%rsi), %rax
+ movq 112(%r8), %rdx
+ movq %rax, 104(%r8)
+ sbbq 112(%rsi), %rdx
+ movq 120(%r8), %rax
+ movq %rdx, 112(%r8)
+ sbbq 120(%rsi), %rax
+ movq 128(%r8), %rdx
+ movq %rax, 120(%r8)
+ sbbq 128(%rsi), %rdx
+ movq 136(%r8), %rax
+ movq %rdx, 128(%r8)
+ sbbq 136(%rsi), %rax
+ movq 144(%r8), %rdx
+ movq %rax, 136(%r8)
+ sbbq 144(%rsi), %rdx
+ movq 152(%r8), %rax
+ movq %rdx, 144(%r8)
+ sbbq 152(%rsi), %rax
+ movq 160(%r8), %rdx
+ movq %rax, 152(%r8)
+ sbbq 160(%rsi), %rdx
+ movq 168(%r8), %rax
+ movq %rdx, 160(%r8)
+ sbbq 168(%rsi), %rax
+ movq 176(%r8), %rdx
+ movq %rax, 168(%r8)
+ sbbq 176(%rsi), %rdx
+ movq 184(%r8), %rax
+ movq %rdx, 176(%r8)
+ sbbq 184(%rsi), %rax
+ movq %rax, 184(%r8)
+ sbbq $0, %rcx
+ movq (%r8), %rdx
+ subq (%rdi), %rdx
+ movq 8(%r8), %rax
+ movq %rdx, (%r8)
+ sbbq 8(%rdi), %rax
+ movq 16(%r8), %rdx
+ movq %rax, 8(%r8)
+ sbbq 16(%rdi), %rdx
+ movq 24(%r8), %rax
+ movq %rdx, 16(%r8)
+ sbbq 24(%rdi), %rax
+ movq 32(%r8), %rdx
+ movq %rax, 24(%r8)
+ sbbq 32(%rdi), %rdx
+ movq 40(%r8), %rax
+ movq %rdx, 32(%r8)
+ sbbq 40(%rdi), %rax
+ movq 48(%r8), %rdx
+ movq %rax, 40(%r8)
+ sbbq 48(%rdi), %rdx
+ movq 56(%r8), %rax
+ movq %rdx, 48(%r8)
+ sbbq 56(%rdi), %rax
+ movq 64(%r8), %rdx
+ movq %rax, 56(%r8)
+ sbbq 64(%rdi), %rdx
+ movq 72(%r8), %rax
+ movq %rdx, 64(%r8)
+ sbbq 72(%rdi), %rax
+ movq 80(%r8), %rdx
+ movq %rax, 72(%r8)
+ sbbq 80(%rdi), %rdx
+ movq 88(%r8), %rax
+ movq %rdx, 80(%r8)
+ sbbq 88(%rdi), %rax
+ movq 96(%r8), %rdx
+ movq %rax, 88(%r8)
+ sbbq 96(%rdi), %rdx
+ movq 104(%r8), %rax
+ movq %rdx, 96(%r8)
+ sbbq 104(%rdi), %rax
+ movq 112(%r8), %rdx
+ movq %rax, 104(%r8)
+ sbbq 112(%rdi), %rdx
+ movq 120(%r8), %rax
+ movq %rdx, 112(%r8)
+ sbbq 120(%rdi), %rax
+ movq 128(%r8), %rdx
+ movq %rax, 120(%r8)
+ sbbq 128(%rdi), %rdx
+ movq 136(%r8), %rax
+ movq %rdx, 128(%r8)
+ sbbq 136(%rdi), %rax
+ movq 144(%r8), %rdx
+ movq %rax, 136(%r8)
+ sbbq 144(%rdi), %rdx
+ movq 152(%r8), %rax
+ movq %rdx, 144(%r8)
+ sbbq 152(%rdi), %rax
+ movq 160(%r8), %rdx
+ movq %rax, 152(%r8)
+ sbbq 160(%rdi), %rdx
+ movq 168(%r8), %rax
+ movq %rdx, 160(%r8)
+ sbbq 168(%rdi), %rax
+ movq 176(%r8), %rdx
+ movq %rax, 168(%r8)
+ sbbq 176(%rdi), %rdx
+ movq 184(%r8), %rax
+ movq %rdx, 176(%r8)
+ sbbq 184(%rdi), %rax
+ movq %rax, 184(%r8)
+ sbbq $0, %rcx
+ subq $96, %r9
+ # Add in place
+ movq (%r9), %rdx
+ addq (%r8), %rdx
+ movq 8(%r9), %rax
+ movq %rdx, (%r9)
+ adcq 8(%r8), %rax
+ movq 16(%r9), %rdx
+ movq %rax, 8(%r9)
+ adcq 16(%r8), %rdx
+ movq 24(%r9), %rax
+ movq %rdx, 16(%r9)
+ adcq 24(%r8), %rax
+ movq 32(%r9), %rdx
+ movq %rax, 24(%r9)
+ adcq 32(%r8), %rdx
+ movq 40(%r9), %rax
+ movq %rdx, 32(%r9)
+ adcq 40(%r8), %rax
+ movq 48(%r9), %rdx
+ movq %rax, 40(%r9)
+ adcq 48(%r8), %rdx
+ movq 56(%r9), %rax
+ movq %rdx, 48(%r9)
+ adcq 56(%r8), %rax
+ movq 64(%r9), %rdx
+ movq %rax, 56(%r9)
+ adcq 64(%r8), %rdx
+ movq 72(%r9), %rax
+ movq %rdx, 64(%r9)
+ adcq 72(%r8), %rax
+ movq 80(%r9), %rdx
+ movq %rax, 72(%r9)
+ adcq 80(%r8), %rdx
+ movq 88(%r9), %rax
+ movq %rdx, 80(%r9)
+ adcq 88(%r8), %rax
+ movq 96(%r9), %rdx
+ movq %rax, 88(%r9)
+ adcq 96(%r8), %rdx
+ movq 104(%r9), %rax
+ movq %rdx, 96(%r9)
+ adcq 104(%r8), %rax
+ movq 112(%r9), %rdx
+ movq %rax, 104(%r9)
+ adcq 112(%r8), %rdx
+ movq 120(%r9), %rax
+ movq %rdx, 112(%r9)
+ adcq 120(%r8), %rax
+ movq 128(%r9), %rdx
+ movq %rax, 120(%r9)
+ adcq 128(%r8), %rdx
+ movq 136(%r9), %rax
+ movq %rdx, 128(%r9)
+ adcq 136(%r8), %rax
+ movq 144(%r9), %rdx
+ movq %rax, 136(%r9)
+ adcq 144(%r8), %rdx
+ movq 152(%r9), %rax
+ movq %rdx, 144(%r9)
+ adcq 152(%r8), %rax
+ movq 160(%r9), %rdx
+ movq %rax, 152(%r9)
+ adcq 160(%r8), %rdx
+ movq 168(%r9), %rax
+ movq %rdx, 160(%r9)
+ adcq 168(%r8), %rax
+ movq 176(%r9), %rdx
+ movq %rax, 168(%r9)
+ adcq 176(%r8), %rdx
+ movq 184(%r9), %rax
+ movq %rdx, 176(%r9)
+ adcq 184(%r8), %rax
+ movq %rax, 184(%r9)
+ adcq $0, %rcx
+ movq %rcx, 288(%rdi)
+ # Add in place
+ movq 96(%r9), %rdx
+ addq (%rsi), %rdx
+ movq 104(%r9), %rax
+ movq %rdx, 96(%r9)
+ adcq 8(%rsi), %rax
+ movq 112(%r9), %rdx
+ movq %rax, 104(%r9)
+ adcq 16(%rsi), %rdx
+ movq 120(%r9), %rax
+ movq %rdx, 112(%r9)
+ adcq 24(%rsi), %rax
+ movq 128(%r9), %rdx
+ movq %rax, 120(%r9)
+ adcq 32(%rsi), %rdx
+ movq 136(%r9), %rax
+ movq %rdx, 128(%r9)
+ adcq 40(%rsi), %rax
+ movq 144(%r9), %rdx
+ movq %rax, 136(%r9)
+ adcq 48(%rsi), %rdx
+ movq 152(%r9), %rax
+ movq %rdx, 144(%r9)
+ adcq 56(%rsi), %rax
+ movq 160(%r9), %rdx
+ movq %rax, 152(%r9)
+ adcq 64(%rsi), %rdx
+ movq 168(%r9), %rax
+ movq %rdx, 160(%r9)
+ adcq 72(%rsi), %rax
+ movq 176(%r9), %rdx
+ movq %rax, 168(%r9)
+ adcq 80(%rsi), %rdx
+ movq 184(%r9), %rax
+ movq %rdx, 176(%r9)
+ adcq 88(%rsi), %rax
+ movq 192(%r9), %rdx
+ movq %rax, 184(%r9)
+ adcq 96(%rsi), %rdx
+ movq %rdx, 192(%r9)
+ # Add to zero
+ movq 104(%rsi), %rdx
+ adcq $0, %rdx
+ movq 112(%rsi), %rax
+ movq %rdx, 200(%r9)
+ adcq $0, %rax
+ movq 120(%rsi), %rdx
+ movq %rax, 208(%r9)
+ adcq $0, %rdx
+ movq 128(%rsi), %rax
+ movq %rdx, 216(%r9)
+ adcq $0, %rax
+ movq 136(%rsi), %rdx
+ movq %rax, 224(%r9)
+ adcq $0, %rdx
+ movq 144(%rsi), %rax
+ movq %rdx, 232(%r9)
+ adcq $0, %rax
+ movq 152(%rsi), %rdx
+ movq %rax, 240(%r9)
+ adcq $0, %rdx
+ movq 160(%rsi), %rax
+ movq %rdx, 248(%r9)
+ adcq $0, %rax
+ movq 168(%rsi), %rdx
+ movq %rax, 256(%r9)
+ adcq $0, %rdx
+ movq 176(%rsi), %rax
+ movq %rdx, 264(%r9)
+ adcq $0, %rax
+ movq 184(%rsi), %rdx
+ movq %rax, 272(%r9)
+ adcq $0, %rdx
+ movq %rdx, 280(%r9)
+ addq $504, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_sqr_24,.-sp_3072_sqr_24
+#endif /* __APPLE__ */
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_3072_mul_avx2_24
+.type sp_3072_mul_avx2_24,@function
+.align 16
+sp_3072_mul_avx2_24:
+#else
+.globl _sp_3072_mul_avx2_24
+.p2align 4
+_sp_3072_mul_avx2_24:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ subq $616, %rsp
+ movq %rdi, 576(%rsp)
+ movq %rsi, 584(%rsp)
+ movq %rdx, 592(%rsp)
+ leaq 384(%rsp), %r10
+ leaq 96(%rsi), %r12
+ # Add
+ movq (%rsi), %rax
+ xorq %r13, %r13
+ addq (%r12), %rax
+ movq 8(%rsi), %rcx
+ movq %rax, (%r10)
+ adcq 8(%r12), %rcx
+ movq 16(%rsi), %r8
+ movq %rcx, 8(%r10)
+ adcq 16(%r12), %r8
+ movq 24(%rsi), %rax
+ movq %r8, 16(%r10)
+ adcq 24(%r12), %rax
+ movq 32(%rsi), %rcx
+ movq %rax, 24(%r10)
+ adcq 32(%r12), %rcx
+ movq 40(%rsi), %r8
+ movq %rcx, 32(%r10)
+ adcq 40(%r12), %r8
+ movq 48(%rsi), %rax
+ movq %r8, 40(%r10)
+ adcq 48(%r12), %rax
+ movq 56(%rsi), %rcx
+ movq %rax, 48(%r10)
+ adcq 56(%r12), %rcx
+ movq 64(%rsi), %r8
+ movq %rcx, 56(%r10)
+ adcq 64(%r12), %r8
+ movq 72(%rsi), %rax
+ movq %r8, 64(%r10)
+ adcq 72(%r12), %rax
+ movq 80(%rsi), %rcx
+ movq %rax, 72(%r10)
+ adcq 80(%r12), %rcx
+ movq 88(%rsi), %r8
+ movq %rcx, 80(%r10)
+ adcq 88(%r12), %r8
+ movq %r8, 88(%r10)
+ adcq $0, %r13
+ movq %r13, 600(%rsp)
+ leaq 480(%rsp), %r11
+ leaq 96(%rdx), %r12
+ # Add
+ movq (%rdx), %rax
+ xorq %r14, %r14
+ addq (%r12), %rax
+ movq 8(%rdx), %rcx
+ movq %rax, (%r11)
+ adcq 8(%r12), %rcx
+ movq 16(%rdx), %r8
+ movq %rcx, 8(%r11)
+ adcq 16(%r12), %r8
+ movq 24(%rdx), %rax
+ movq %r8, 16(%r11)
+ adcq 24(%r12), %rax
+ movq 32(%rdx), %rcx
+ movq %rax, 24(%r11)
+ adcq 32(%r12), %rcx
+ movq 40(%rdx), %r8
+ movq %rcx, 32(%r11)
+ adcq 40(%r12), %r8
+ movq 48(%rdx), %rax
+ movq %r8, 40(%r11)
+ adcq 48(%r12), %rax
+ movq 56(%rdx), %rcx
+ movq %rax, 48(%r11)
+ adcq 56(%r12), %rcx
+ movq 64(%rdx), %r8
+ movq %rcx, 56(%r11)
+ adcq 64(%r12), %r8
+ movq 72(%rdx), %rax
+ movq %r8, 64(%r11)
+ adcq 72(%r12), %rax
+ movq 80(%rdx), %rcx
+ movq %rax, 72(%r11)
+ adcq 80(%r12), %rcx
+ movq 88(%rdx), %r8
+ movq %rcx, 80(%r11)
+ adcq 88(%r12), %r8
+ movq %r8, 88(%r11)
+ adcq $0, %r14
+ movq %r14, 608(%rsp)
+ movq %r11, %rdx
+ movq %r10, %rsi
+ movq %rsp, %rdi
+#ifndef __APPLE__
+ callq sp_3072_mul_avx2_12@plt
+#else
+ callq _sp_3072_mul_avx2_12
+#endif /* __APPLE__ */
+ movq 592(%rsp), %rdx
+ movq 584(%rsp), %rsi
+ leaq 192(%rsp), %rdi
+ addq $96, %rdx
+ addq $96, %rsi
+#ifndef __APPLE__
+ callq sp_3072_mul_avx2_12@plt
+#else
+ callq _sp_3072_mul_avx2_12
+#endif /* __APPLE__ */
+ movq 592(%rsp), %rdx
+ movq 584(%rsp), %rsi
+ movq 576(%rsp), %rdi
+#ifndef __APPLE__
+ callq sp_3072_mul_avx2_12@plt
+#else
+ callq _sp_3072_mul_avx2_12
+#endif /* __APPLE__ */
+ movq 600(%rsp), %r13
+ movq 608(%rsp), %r14
+ movq 576(%rsp), %r15
+ movq %r13, %r9
+ leaq 384(%rsp), %r10
+ leaq 480(%rsp), %r11
+ andq %r14, %r9
+ negq %r13
+ negq %r14
+ addq $192, %r15
+ movq (%r10), %rax
+ movq (%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ addq %rcx, %rax
+ movq 8(%r10), %rcx
+ movq 8(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, (%r15)
+ adcq %r8, %rcx
+ movq 16(%r10), %r8
+ movq 16(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 8(%r15)
+ adcq %rax, %r8
+ movq 24(%r10), %rax
+ movq 24(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 16(%r15)
+ adcq %rcx, %rax
+ movq 32(%r10), %rcx
+ movq 32(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 24(%r15)
+ adcq %r8, %rcx
+ movq 40(%r10), %r8
+ movq 40(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 32(%r15)
+ adcq %rax, %r8
+ movq 48(%r10), %rax
+ movq 48(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 40(%r15)
+ adcq %rcx, %rax
+ movq 56(%r10), %rcx
+ movq 56(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 48(%r15)
+ adcq %r8, %rcx
+ movq 64(%r10), %r8
+ movq 64(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 56(%r15)
+ adcq %rax, %r8
+ movq 72(%r10), %rax
+ movq 72(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 64(%r15)
+ adcq %rcx, %rax
+ movq 80(%r10), %rcx
+ movq 80(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 72(%r15)
+ adcq %r8, %rcx
+ movq 88(%r10), %r8
+ movq 88(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 80(%r15)
+ adcq %rax, %r8
+ movq %r8, 88(%r15)
+ adcq $0, %r9
+ leaq 192(%rsp), %r11
+ movq %rsp, %r10
+ movq (%r10), %rax
+ subq (%r11), %rax
+ movq 8(%r10), %rcx
+ movq %rax, (%r10)
+ sbbq 8(%r11), %rcx
+ movq 16(%r10), %r8
+ movq %rcx, 8(%r10)
+ sbbq 16(%r11), %r8
+ movq 24(%r10), %rax
+ movq %r8, 16(%r10)
+ sbbq 24(%r11), %rax
+ movq 32(%r10), %rcx
+ movq %rax, 24(%r10)
+ sbbq 32(%r11), %rcx
+ movq 40(%r10), %r8
+ movq %rcx, 32(%r10)
+ sbbq 40(%r11), %r8
+ movq 48(%r10), %rax
+ movq %r8, 40(%r10)
+ sbbq 48(%r11), %rax
+ movq 56(%r10), %rcx
+ movq %rax, 48(%r10)
+ sbbq 56(%r11), %rcx
+ movq 64(%r10), %r8
+ movq %rcx, 56(%r10)
+ sbbq 64(%r11), %r8
+ movq 72(%r10), %rax
+ movq %r8, 64(%r10)
+ sbbq 72(%r11), %rax
+ movq 80(%r10), %rcx
+ movq %rax, 72(%r10)
+ sbbq 80(%r11), %rcx
+ movq 88(%r10), %r8
+ movq %rcx, 80(%r10)
+ sbbq 88(%r11), %r8
+ movq 96(%r10), %rax
+ movq %r8, 88(%r10)
+ sbbq 96(%r11), %rax
+ movq 104(%r10), %rcx
+ movq %rax, 96(%r10)
+ sbbq 104(%r11), %rcx
+ movq 112(%r10), %r8
+ movq %rcx, 104(%r10)
+ sbbq 112(%r11), %r8
+ movq 120(%r10), %rax
+ movq %r8, 112(%r10)
+ sbbq 120(%r11), %rax
+ movq 128(%r10), %rcx
+ movq %rax, 120(%r10)
+ sbbq 128(%r11), %rcx
+ movq 136(%r10), %r8
+ movq %rcx, 128(%r10)
+ sbbq 136(%r11), %r8
+ movq 144(%r10), %rax
+ movq %r8, 136(%r10)
+ sbbq 144(%r11), %rax
+ movq 152(%r10), %rcx
+ movq %rax, 144(%r10)
+ sbbq 152(%r11), %rcx
+ movq 160(%r10), %r8
+ movq %rcx, 152(%r10)
+ sbbq 160(%r11), %r8
+ movq 168(%r10), %rax
+ movq %r8, 160(%r10)
+ sbbq 168(%r11), %rax
+ movq 176(%r10), %rcx
+ movq %rax, 168(%r10)
+ sbbq 176(%r11), %rcx
+ movq 184(%r10), %r8
+ movq %rcx, 176(%r10)
+ sbbq 184(%r11), %r8
+ movq %r8, 184(%r10)
+ sbbq $0, %r9
+ movq (%r10), %rax
+ subq (%rdi), %rax
+ movq 8(%r10), %rcx
+ movq %rax, (%r10)
+ sbbq 8(%rdi), %rcx
+ movq 16(%r10), %r8
+ movq %rcx, 8(%r10)
+ sbbq 16(%rdi), %r8
+ movq 24(%r10), %rax
+ movq %r8, 16(%r10)
+ sbbq 24(%rdi), %rax
+ movq 32(%r10), %rcx
+ movq %rax, 24(%r10)
+ sbbq 32(%rdi), %rcx
+ movq 40(%r10), %r8
+ movq %rcx, 32(%r10)
+ sbbq 40(%rdi), %r8
+ movq 48(%r10), %rax
+ movq %r8, 40(%r10)
+ sbbq 48(%rdi), %rax
+ movq 56(%r10), %rcx
+ movq %rax, 48(%r10)
+ sbbq 56(%rdi), %rcx
+ movq 64(%r10), %r8
+ movq %rcx, 56(%r10)
+ sbbq 64(%rdi), %r8
+ movq 72(%r10), %rax
+ movq %r8, 64(%r10)
+ sbbq 72(%rdi), %rax
+ movq 80(%r10), %rcx
+ movq %rax, 72(%r10)
+ sbbq 80(%rdi), %rcx
+ movq 88(%r10), %r8
+ movq %rcx, 80(%r10)
+ sbbq 88(%rdi), %r8
+ movq 96(%r10), %rax
+ movq %r8, 88(%r10)
+ sbbq 96(%rdi), %rax
+ movq 104(%r10), %rcx
+ movq %rax, 96(%r10)
+ sbbq 104(%rdi), %rcx
+ movq 112(%r10), %r8
+ movq %rcx, 104(%r10)
+ sbbq 112(%rdi), %r8
+ movq 120(%r10), %rax
+ movq %r8, 112(%r10)
+ sbbq 120(%rdi), %rax
+ movq 128(%r10), %rcx
+ movq %rax, 120(%r10)
+ sbbq 128(%rdi), %rcx
+ movq 136(%r10), %r8
+ movq %rcx, 128(%r10)
+ sbbq 136(%rdi), %r8
+ movq 144(%r10), %rax
+ movq %r8, 136(%r10)
+ sbbq 144(%rdi), %rax
+ movq 152(%r10), %rcx
+ movq %rax, 144(%r10)
+ sbbq 152(%rdi), %rcx
+ movq 160(%r10), %r8
+ movq %rcx, 152(%r10)
+ sbbq 160(%rdi), %r8
+ movq 168(%r10), %rax
+ movq %r8, 160(%r10)
+ sbbq 168(%rdi), %rax
+ movq 176(%r10), %rcx
+ movq %rax, 168(%r10)
+ sbbq 176(%rdi), %rcx
+ movq 184(%r10), %r8
+ movq %rcx, 176(%r10)
+ sbbq 184(%rdi), %r8
+ movq %r8, 184(%r10)
+ sbbq $0, %r9
+ subq $96, %r15
+ # Add
+ movq (%r15), %rax
+ addq (%r10), %rax
+ movq 8(%r15), %rcx
+ movq %rax, (%r15)
+ adcq 8(%r10), %rcx
+ movq 16(%r15), %r8
+ movq %rcx, 8(%r15)
+ adcq 16(%r10), %r8
+ movq 24(%r15), %rax
+ movq %r8, 16(%r15)
+ adcq 24(%r10), %rax
+ movq 32(%r15), %rcx
+ movq %rax, 24(%r15)
+ adcq 32(%r10), %rcx
+ movq 40(%r15), %r8
+ movq %rcx, 32(%r15)
+ adcq 40(%r10), %r8
+ movq 48(%r15), %rax
+ movq %r8, 40(%r15)
+ adcq 48(%r10), %rax
+ movq 56(%r15), %rcx
+ movq %rax, 48(%r15)
+ adcq 56(%r10), %rcx
+ movq 64(%r15), %r8
+ movq %rcx, 56(%r15)
+ adcq 64(%r10), %r8
+ movq 72(%r15), %rax
+ movq %r8, 64(%r15)
+ adcq 72(%r10), %rax
+ movq 80(%r15), %rcx
+ movq %rax, 72(%r15)
+ adcq 80(%r10), %rcx
+ movq 88(%r15), %r8
+ movq %rcx, 80(%r15)
+ adcq 88(%r10), %r8
+ movq 96(%r15), %rax
+ movq %r8, 88(%r15)
+ adcq 96(%r10), %rax
+ movq 104(%r15), %rcx
+ movq %rax, 96(%r15)
+ adcq 104(%r10), %rcx
+ movq 112(%r15), %r8
+ movq %rcx, 104(%r15)
+ adcq 112(%r10), %r8
+ movq 120(%r15), %rax
+ movq %r8, 112(%r15)
+ adcq 120(%r10), %rax
+ movq 128(%r15), %rcx
+ movq %rax, 120(%r15)
+ adcq 128(%r10), %rcx
+ movq 136(%r15), %r8
+ movq %rcx, 128(%r15)
+ adcq 136(%r10), %r8
+ movq 144(%r15), %rax
+ movq %r8, 136(%r15)
+ adcq 144(%r10), %rax
+ movq 152(%r15), %rcx
+ movq %rax, 144(%r15)
+ adcq 152(%r10), %rcx
+ movq 160(%r15), %r8
+ movq %rcx, 152(%r15)
+ adcq 160(%r10), %r8
+ movq 168(%r15), %rax
+ movq %r8, 160(%r15)
+ adcq 168(%r10), %rax
+ movq 176(%r15), %rcx
+ movq %rax, 168(%r15)
+ adcq 176(%r10), %rcx
+ movq 184(%r15), %r8
+ movq %rcx, 176(%r15)
+ adcq 184(%r10), %r8
+ movq %r8, 184(%r15)
+ adcq $0, %r9
+ movq %r9, 288(%rdi)
+ addq $96, %r15
+ # Add
+ movq (%r15), %rax
+ addq (%r11), %rax
+ movq 8(%r15), %rcx
+ movq %rax, (%r15)
+ adcq 8(%r11), %rcx
+ movq 16(%r15), %r8
+ movq %rcx, 8(%r15)
+ adcq 16(%r11), %r8
+ movq 24(%r15), %rax
+ movq %r8, 16(%r15)
+ adcq 24(%r11), %rax
+ movq 32(%r15), %rcx
+ movq %rax, 24(%r15)
+ adcq 32(%r11), %rcx
+ movq 40(%r15), %r8
+ movq %rcx, 32(%r15)
+ adcq 40(%r11), %r8
+ movq 48(%r15), %rax
+ movq %r8, 40(%r15)
+ adcq 48(%r11), %rax
+ movq 56(%r15), %rcx
+ movq %rax, 48(%r15)
+ adcq 56(%r11), %rcx
+ movq 64(%r15), %r8
+ movq %rcx, 56(%r15)
+ adcq 64(%r11), %r8
+ movq 72(%r15), %rax
+ movq %r8, 64(%r15)
+ adcq 72(%r11), %rax
+ movq 80(%r15), %rcx
+ movq %rax, 72(%r15)
+ adcq 80(%r11), %rcx
+ movq 88(%r15), %r8
+ movq %rcx, 80(%r15)
+ adcq 88(%r11), %r8
+ movq 96(%r15), %rax
+ movq %r8, 88(%r15)
+ adcq 96(%r11), %rax
+ movq %rax, 96(%r15)
+ # Add to zero
+ movq 104(%r11), %rax
+ adcq $0, %rax
+ movq 112(%r11), %rcx
+ movq %rax, 104(%r15)
+ adcq $0, %rcx
+ movq 120(%r11), %r8
+ movq %rcx, 112(%r15)
+ adcq $0, %r8
+ movq 128(%r11), %rax
+ movq %r8, 120(%r15)
+ adcq $0, %rax
+ movq 136(%r11), %rcx
+ movq %rax, 128(%r15)
+ adcq $0, %rcx
+ movq 144(%r11), %r8
+ movq %rcx, 136(%r15)
+ adcq $0, %r8
+ movq 152(%r11), %rax
+ movq %r8, 144(%r15)
+ adcq $0, %rax
+ movq 160(%r11), %rcx
+ movq %rax, 152(%r15)
+ adcq $0, %rcx
+ movq 168(%r11), %r8
+ movq %rcx, 160(%r15)
+ adcq $0, %r8
+ movq 176(%r11), %rax
+ movq %r8, 168(%r15)
+ adcq $0, %rax
+ movq 184(%r11), %rcx
+ movq %rax, 176(%r15)
+ adcq $0, %rcx
+ movq %rcx, 184(%r15)
+ addq $616, %rsp
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_mul_avx2_24,.-sp_3072_mul_avx2_24
+#endif /* __APPLE__ */
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_3072_sqr_avx2_24
+.type sp_3072_sqr_avx2_24,@function
+.align 16
+sp_3072_sqr_avx2_24:
+#else
+.globl _sp_3072_sqr_avx2_24
+.p2align 4
+_sp_3072_sqr_avx2_24:
+#endif /* __APPLE__ */
+ subq $504, %rsp
+ movq %rdi, 480(%rsp)
+ movq %rsi, 488(%rsp)
+ leaq 384(%rsp), %r8
+ leaq 96(%rsi), %r9
+ # Add
+ movq (%rsi), %rdx
+ xorq %rcx, %rcx
+ addq (%r9), %rdx
+ movq 8(%rsi), %rax
+ movq %rdx, (%r8)
+ adcq 8(%r9), %rax
+ movq 16(%rsi), %rdx
+ movq %rax, 8(%r8)
+ adcq 16(%r9), %rdx
+ movq 24(%rsi), %rax
+ movq %rdx, 16(%r8)
+ adcq 24(%r9), %rax
+ movq 32(%rsi), %rdx
+ movq %rax, 24(%r8)
+ adcq 32(%r9), %rdx
+ movq 40(%rsi), %rax
+ movq %rdx, 32(%r8)
+ adcq 40(%r9), %rax
+ movq 48(%rsi), %rdx
+ movq %rax, 40(%r8)
+ adcq 48(%r9), %rdx
+ movq 56(%rsi), %rax
+ movq %rdx, 48(%r8)
+ adcq 56(%r9), %rax
+ movq 64(%rsi), %rdx
+ movq %rax, 56(%r8)
+ adcq 64(%r9), %rdx
+ movq 72(%rsi), %rax
+ movq %rdx, 64(%r8)
+ adcq 72(%r9), %rax
+ movq 80(%rsi), %rdx
+ movq %rax, 72(%r8)
+ adcq 80(%r9), %rdx
+ movq 88(%rsi), %rax
+ movq %rdx, 80(%r8)
+ adcq 88(%r9), %rax
+ movq %rax, 88(%r8)
+ adcq $0, %rcx
+ movq %rcx, 496(%rsp)
+ movq %r8, %rsi
+ movq %rsp, %rdi
+#ifndef __APPLE__
+ callq sp_3072_sqr_avx2_12@plt
+#else
+ callq _sp_3072_sqr_avx2_12
+#endif /* __APPLE__ */
+ movq 488(%rsp), %rsi
+ leaq 192(%rsp), %rdi
+ addq $96, %rsi
+#ifndef __APPLE__
+ callq sp_3072_sqr_avx2_12@plt
+#else
+ callq _sp_3072_sqr_avx2_12
+#endif /* __APPLE__ */
+ movq 488(%rsp), %rsi
+ movq 480(%rsp), %rdi
+#ifndef __APPLE__
+ callq sp_3072_sqr_avx2_12@plt
+#else
+ callq _sp_3072_sqr_avx2_12
+#endif /* __APPLE__ */
+ movq 496(%rsp), %r10
+ movq %rdi, %r9
+ leaq 384(%rsp), %r8
+ movq %r10, %rcx
+ negq %r10
+ addq $192, %r9
+ movq (%r8), %rdx
+ pextq %r10, %rdx, %rdx
+ addq %rdx, %rdx
+ movq 8(%r8), %rax
+ movq %rdx, (%r9)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 16(%r8), %rdx
+ movq %rax, 8(%r9)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 24(%r8), %rax
+ movq %rdx, 16(%r9)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 32(%r8), %rdx
+ movq %rax, 24(%r9)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 40(%r8), %rax
+ movq %rdx, 32(%r9)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 48(%r8), %rdx
+ movq %rax, 40(%r9)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 56(%r8), %rax
+ movq %rdx, 48(%r9)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 64(%r8), %rdx
+ movq %rax, 56(%r9)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 72(%r8), %rax
+ movq %rdx, 64(%r9)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 80(%r8), %rdx
+ movq %rax, 72(%r9)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 88(%r8), %rax
+ movq %rdx, 80(%r9)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq %rax, 88(%r9)
+ adcq $0, %rcx
+ leaq 192(%rsp), %rsi
+ movq %rsp, %r8
+ movq (%r8), %rdx
+ subq (%rsi), %rdx
+ movq 8(%r8), %rax
+ movq %rdx, (%r8)
+ sbbq 8(%rsi), %rax
+ movq 16(%r8), %rdx
+ movq %rax, 8(%r8)
+ sbbq 16(%rsi), %rdx
+ movq 24(%r8), %rax
+ movq %rdx, 16(%r8)
+ sbbq 24(%rsi), %rax
+ movq 32(%r8), %rdx
+ movq %rax, 24(%r8)
+ sbbq 32(%rsi), %rdx
+ movq 40(%r8), %rax
+ movq %rdx, 32(%r8)
+ sbbq 40(%rsi), %rax
+ movq 48(%r8), %rdx
+ movq %rax, 40(%r8)
+ sbbq 48(%rsi), %rdx
+ movq 56(%r8), %rax
+ movq %rdx, 48(%r8)
+ sbbq 56(%rsi), %rax
+ movq 64(%r8), %rdx
+ movq %rax, 56(%r8)
+ sbbq 64(%rsi), %rdx
+ movq 72(%r8), %rax
+ movq %rdx, 64(%r8)
+ sbbq 72(%rsi), %rax
+ movq 80(%r8), %rdx
+ movq %rax, 72(%r8)
+ sbbq 80(%rsi), %rdx
+ movq 88(%r8), %rax
+ movq %rdx, 80(%r8)
+ sbbq 88(%rsi), %rax
+ movq 96(%r8), %rdx
+ movq %rax, 88(%r8)
+ sbbq 96(%rsi), %rdx
+ movq 104(%r8), %rax
+ movq %rdx, 96(%r8)
+ sbbq 104(%rsi), %rax
+ movq 112(%r8), %rdx
+ movq %rax, 104(%r8)
+ sbbq 112(%rsi), %rdx
+ movq 120(%r8), %rax
+ movq %rdx, 112(%r8)
+ sbbq 120(%rsi), %rax
+ movq 128(%r8), %rdx
+ movq %rax, 120(%r8)
+ sbbq 128(%rsi), %rdx
+ movq 136(%r8), %rax
+ movq %rdx, 128(%r8)
+ sbbq 136(%rsi), %rax
+ movq 144(%r8), %rdx
+ movq %rax, 136(%r8)
+ sbbq 144(%rsi), %rdx
+ movq 152(%r8), %rax
+ movq %rdx, 144(%r8)
+ sbbq 152(%rsi), %rax
+ movq 160(%r8), %rdx
+ movq %rax, 152(%r8)
+ sbbq 160(%rsi), %rdx
+ movq 168(%r8), %rax
+ movq %rdx, 160(%r8)
+ sbbq 168(%rsi), %rax
+ movq 176(%r8), %rdx
+ movq %rax, 168(%r8)
+ sbbq 176(%rsi), %rdx
+ movq 184(%r8), %rax
+ movq %rdx, 176(%r8)
+ sbbq 184(%rsi), %rax
+ movq %rax, 184(%r8)
+ sbbq $0, %rcx
+ movq (%r8), %rdx
+ subq (%rdi), %rdx
+ movq 8(%r8), %rax
+ movq %rdx, (%r8)
+ sbbq 8(%rdi), %rax
+ movq 16(%r8), %rdx
+ movq %rax, 8(%r8)
+ sbbq 16(%rdi), %rdx
+ movq 24(%r8), %rax
+ movq %rdx, 16(%r8)
+ sbbq 24(%rdi), %rax
+ movq 32(%r8), %rdx
+ movq %rax, 24(%r8)
+ sbbq 32(%rdi), %rdx
+ movq 40(%r8), %rax
+ movq %rdx, 32(%r8)
+ sbbq 40(%rdi), %rax
+ movq 48(%r8), %rdx
+ movq %rax, 40(%r8)
+ sbbq 48(%rdi), %rdx
+ movq 56(%r8), %rax
+ movq %rdx, 48(%r8)
+ sbbq 56(%rdi), %rax
+ movq 64(%r8), %rdx
+ movq %rax, 56(%r8)
+ sbbq 64(%rdi), %rdx
+ movq 72(%r8), %rax
+ movq %rdx, 64(%r8)
+ sbbq 72(%rdi), %rax
+ movq 80(%r8), %rdx
+ movq %rax, 72(%r8)
+ sbbq 80(%rdi), %rdx
+ movq 88(%r8), %rax
+ movq %rdx, 80(%r8)
+ sbbq 88(%rdi), %rax
+ movq 96(%r8), %rdx
+ movq %rax, 88(%r8)
+ sbbq 96(%rdi), %rdx
+ movq 104(%r8), %rax
+ movq %rdx, 96(%r8)
+ sbbq 104(%rdi), %rax
+ movq 112(%r8), %rdx
+ movq %rax, 104(%r8)
+ sbbq 112(%rdi), %rdx
+ movq 120(%r8), %rax
+ movq %rdx, 112(%r8)
+ sbbq 120(%rdi), %rax
+ movq 128(%r8), %rdx
+ movq %rax, 120(%r8)
+ sbbq 128(%rdi), %rdx
+ movq 136(%r8), %rax
+ movq %rdx, 128(%r8)
+ sbbq 136(%rdi), %rax
+ movq 144(%r8), %rdx
+ movq %rax, 136(%r8)
+ sbbq 144(%rdi), %rdx
+ movq 152(%r8), %rax
+ movq %rdx, 144(%r8)
+ sbbq 152(%rdi), %rax
+ movq 160(%r8), %rdx
+ movq %rax, 152(%r8)
+ sbbq 160(%rdi), %rdx
+ movq 168(%r8), %rax
+ movq %rdx, 160(%r8)
+ sbbq 168(%rdi), %rax
+ movq 176(%r8), %rdx
+ movq %rax, 168(%r8)
+ sbbq 176(%rdi), %rdx
+ movq 184(%r8), %rax
+ movq %rdx, 176(%r8)
+ sbbq 184(%rdi), %rax
+ movq %rax, 184(%r8)
+ sbbq $0, %rcx
+ subq $96, %r9
+ # Add in place
+ movq (%r9), %rdx
+ addq (%r8), %rdx
+ movq 8(%r9), %rax
+ movq %rdx, (%r9)
+ adcq 8(%r8), %rax
+ movq 16(%r9), %rdx
+ movq %rax, 8(%r9)
+ adcq 16(%r8), %rdx
+ movq 24(%r9), %rax
+ movq %rdx, 16(%r9)
+ adcq 24(%r8), %rax
+ movq 32(%r9), %rdx
+ movq %rax, 24(%r9)
+ adcq 32(%r8), %rdx
+ movq 40(%r9), %rax
+ movq %rdx, 32(%r9)
+ adcq 40(%r8), %rax
+ movq 48(%r9), %rdx
+ movq %rax, 40(%r9)
+ adcq 48(%r8), %rdx
+ movq 56(%r9), %rax
+ movq %rdx, 48(%r9)
+ adcq 56(%r8), %rax
+ movq 64(%r9), %rdx
+ movq %rax, 56(%r9)
+ adcq 64(%r8), %rdx
+ movq 72(%r9), %rax
+ movq %rdx, 64(%r9)
+ adcq 72(%r8), %rax
+ movq 80(%r9), %rdx
+ movq %rax, 72(%r9)
+ adcq 80(%r8), %rdx
+ movq 88(%r9), %rax
+ movq %rdx, 80(%r9)
+ adcq 88(%r8), %rax
+ movq 96(%r9), %rdx
+ movq %rax, 88(%r9)
+ adcq 96(%r8), %rdx
+ movq 104(%r9), %rax
+ movq %rdx, 96(%r9)
+ adcq 104(%r8), %rax
+ movq 112(%r9), %rdx
+ movq %rax, 104(%r9)
+ adcq 112(%r8), %rdx
+ movq 120(%r9), %rax
+ movq %rdx, 112(%r9)
+ adcq 120(%r8), %rax
+ movq 128(%r9), %rdx
+ movq %rax, 120(%r9)
+ adcq 128(%r8), %rdx
+ movq 136(%r9), %rax
+ movq %rdx, 128(%r9)
+ adcq 136(%r8), %rax
+ movq 144(%r9), %rdx
+ movq %rax, 136(%r9)
+ adcq 144(%r8), %rdx
+ movq 152(%r9), %rax
+ movq %rdx, 144(%r9)
+ adcq 152(%r8), %rax
+ movq 160(%r9), %rdx
+ movq %rax, 152(%r9)
+ adcq 160(%r8), %rdx
+ movq 168(%r9), %rax
+ movq %rdx, 160(%r9)
+ adcq 168(%r8), %rax
+ movq 176(%r9), %rdx
+ movq %rax, 168(%r9)
+ adcq 176(%r8), %rdx
+ movq 184(%r9), %rax
+ movq %rdx, 176(%r9)
+ adcq 184(%r8), %rax
+ movq %rax, 184(%r9)
+ adcq $0, %rcx
+ movq %rcx, 288(%rdi)
+ # Add in place
+ movq 96(%r9), %rdx
+ addq (%rsi), %rdx
+ movq 104(%r9), %rax
+ movq %rdx, 96(%r9)
+ adcq 8(%rsi), %rax
+ movq 112(%r9), %rdx
+ movq %rax, 104(%r9)
+ adcq 16(%rsi), %rdx
+ movq 120(%r9), %rax
+ movq %rdx, 112(%r9)
+ adcq 24(%rsi), %rax
+ movq 128(%r9), %rdx
+ movq %rax, 120(%r9)
+ adcq 32(%rsi), %rdx
+ movq 136(%r9), %rax
+ movq %rdx, 128(%r9)
+ adcq 40(%rsi), %rax
+ movq 144(%r9), %rdx
+ movq %rax, 136(%r9)
+ adcq 48(%rsi), %rdx
+ movq 152(%r9), %rax
+ movq %rdx, 144(%r9)
+ adcq 56(%rsi), %rax
+ movq 160(%r9), %rdx
+ movq %rax, 152(%r9)
+ adcq 64(%rsi), %rdx
+ movq 168(%r9), %rax
+ movq %rdx, 160(%r9)
+ adcq 72(%rsi), %rax
+ movq 176(%r9), %rdx
+ movq %rax, 168(%r9)
+ adcq 80(%rsi), %rdx
+ movq 184(%r9), %rax
+ movq %rdx, 176(%r9)
+ adcq 88(%rsi), %rax
+ movq 192(%r9), %rdx
+ movq %rax, 184(%r9)
+ adcq 96(%rsi), %rdx
+ movq %rdx, 192(%r9)
+ # Add to zero
+ movq 104(%rsi), %rdx
+ adcq $0, %rdx
+ movq 112(%rsi), %rax
+ movq %rdx, 200(%r9)
+ adcq $0, %rax
+ movq 120(%rsi), %rdx
+ movq %rax, 208(%r9)
+ adcq $0, %rdx
+ movq 128(%rsi), %rax
+ movq %rdx, 216(%r9)
+ adcq $0, %rax
+ movq 136(%rsi), %rdx
+ movq %rax, 224(%r9)
+ adcq $0, %rdx
+ movq 144(%rsi), %rax
+ movq %rdx, 232(%r9)
+ adcq $0, %rax
+ movq 152(%rsi), %rdx
+ movq %rax, 240(%r9)
+ adcq $0, %rdx
+ movq 160(%rsi), %rax
+ movq %rdx, 248(%r9)
+ adcq $0, %rax
+ movq 168(%rsi), %rdx
+ movq %rax, 256(%r9)
+ adcq $0, %rdx
+ movq 176(%rsi), %rax
+ movq %rdx, 264(%r9)
+ adcq $0, %rax
+ movq 184(%rsi), %rdx
+ movq %rax, 272(%r9)
+ adcq $0, %rdx
+ movq %rdx, 280(%r9)
+ addq $504, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_sqr_avx2_24,.-sp_3072_sqr_avx2_24
+#endif /* __APPLE__ */
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_3072_sub_in_place_48
+.type sp_3072_sub_in_place_48,@function
+.align 16
+sp_3072_sub_in_place_48:
+#else
+.globl _sp_3072_sub_in_place_48
+.p2align 4
+_sp_3072_sub_in_place_48:
+#endif /* __APPLE__ */
+ movq (%rdi), %rdx
+ xorq %rax, %rax
+ subq (%rsi), %rdx
+ movq 8(%rdi), %rcx
+ movq %rdx, (%rdi)
+ sbbq 8(%rsi), %rcx
+ movq 16(%rdi), %rdx
+ movq %rcx, 8(%rdi)
+ sbbq 16(%rsi), %rdx
+ movq 24(%rdi), %rcx
+ movq %rdx, 16(%rdi)
+ sbbq 24(%rsi), %rcx
+ movq 32(%rdi), %rdx
+ movq %rcx, 24(%rdi)
+ sbbq 32(%rsi), %rdx
+ movq 40(%rdi), %rcx
+ movq %rdx, 32(%rdi)
+ sbbq 40(%rsi), %rcx
+ movq 48(%rdi), %rdx
+ movq %rcx, 40(%rdi)
+ sbbq 48(%rsi), %rdx
+ movq 56(%rdi), %rcx
+ movq %rdx, 48(%rdi)
+ sbbq 56(%rsi), %rcx
+ movq 64(%rdi), %rdx
+ movq %rcx, 56(%rdi)
+ sbbq 64(%rsi), %rdx
+ movq 72(%rdi), %rcx
+ movq %rdx, 64(%rdi)
+ sbbq 72(%rsi), %rcx
+ movq 80(%rdi), %rdx
+ movq %rcx, 72(%rdi)
+ sbbq 80(%rsi), %rdx
+ movq 88(%rdi), %rcx
+ movq %rdx, 80(%rdi)
+ sbbq 88(%rsi), %rcx
+ movq 96(%rdi), %rdx
+ movq %rcx, 88(%rdi)
+ sbbq 96(%rsi), %rdx
+ movq 104(%rdi), %rcx
+ movq %rdx, 96(%rdi)
+ sbbq 104(%rsi), %rcx
+ movq 112(%rdi), %rdx
+ movq %rcx, 104(%rdi)
+ sbbq 112(%rsi), %rdx
+ movq 120(%rdi), %rcx
+ movq %rdx, 112(%rdi)
+ sbbq 120(%rsi), %rcx
+ movq 128(%rdi), %rdx
+ movq %rcx, 120(%rdi)
+ sbbq 128(%rsi), %rdx
+ movq 136(%rdi), %rcx
+ movq %rdx, 128(%rdi)
+ sbbq 136(%rsi), %rcx
+ movq 144(%rdi), %rdx
+ movq %rcx, 136(%rdi)
+ sbbq 144(%rsi), %rdx
+ movq 152(%rdi), %rcx
+ movq %rdx, 144(%rdi)
+ sbbq 152(%rsi), %rcx
+ movq 160(%rdi), %rdx
+ movq %rcx, 152(%rdi)
+ sbbq 160(%rsi), %rdx
+ movq 168(%rdi), %rcx
+ movq %rdx, 160(%rdi)
+ sbbq 168(%rsi), %rcx
+ movq 176(%rdi), %rdx
+ movq %rcx, 168(%rdi)
+ sbbq 176(%rsi), %rdx
+ movq 184(%rdi), %rcx
+ movq %rdx, 176(%rdi)
+ sbbq 184(%rsi), %rcx
+ movq 192(%rdi), %rdx
+ movq %rcx, 184(%rdi)
+ sbbq 192(%rsi), %rdx
+ movq 200(%rdi), %rcx
+ movq %rdx, 192(%rdi)
+ sbbq 200(%rsi), %rcx
+ movq 208(%rdi), %rdx
+ movq %rcx, 200(%rdi)
+ sbbq 208(%rsi), %rdx
+ movq 216(%rdi), %rcx
+ movq %rdx, 208(%rdi)
+ sbbq 216(%rsi), %rcx
+ movq 224(%rdi), %rdx
+ movq %rcx, 216(%rdi)
+ sbbq 224(%rsi), %rdx
+ movq 232(%rdi), %rcx
+ movq %rdx, 224(%rdi)
+ sbbq 232(%rsi), %rcx
+ movq 240(%rdi), %rdx
+ movq %rcx, 232(%rdi)
+ sbbq 240(%rsi), %rdx
+ movq 248(%rdi), %rcx
+ movq %rdx, 240(%rdi)
+ sbbq 248(%rsi), %rcx
+ movq 256(%rdi), %rdx
+ movq %rcx, 248(%rdi)
+ sbbq 256(%rsi), %rdx
+ movq 264(%rdi), %rcx
+ movq %rdx, 256(%rdi)
+ sbbq 264(%rsi), %rcx
+ movq 272(%rdi), %rdx
+ movq %rcx, 264(%rdi)
+ sbbq 272(%rsi), %rdx
+ movq 280(%rdi), %rcx
+ movq %rdx, 272(%rdi)
+ sbbq 280(%rsi), %rcx
+ movq 288(%rdi), %rdx
+ movq %rcx, 280(%rdi)
+ sbbq 288(%rsi), %rdx
+ movq 296(%rdi), %rcx
+ movq %rdx, 288(%rdi)
+ sbbq 296(%rsi), %rcx
+ movq 304(%rdi), %rdx
+ movq %rcx, 296(%rdi)
+ sbbq 304(%rsi), %rdx
+ movq 312(%rdi), %rcx
+ movq %rdx, 304(%rdi)
+ sbbq 312(%rsi), %rcx
+ movq 320(%rdi), %rdx
+ movq %rcx, 312(%rdi)
+ sbbq 320(%rsi), %rdx
+ movq 328(%rdi), %rcx
+ movq %rdx, 320(%rdi)
+ sbbq 328(%rsi), %rcx
+ movq 336(%rdi), %rdx
+ movq %rcx, 328(%rdi)
+ sbbq 336(%rsi), %rdx
+ movq 344(%rdi), %rcx
+ movq %rdx, 336(%rdi)
+ sbbq 344(%rsi), %rcx
+ movq 352(%rdi), %rdx
+ movq %rcx, 344(%rdi)
+ sbbq 352(%rsi), %rdx
+ movq 360(%rdi), %rcx
+ movq %rdx, 352(%rdi)
+ sbbq 360(%rsi), %rcx
+ movq 368(%rdi), %rdx
+ movq %rcx, 360(%rdi)
+ sbbq 368(%rsi), %rdx
+ movq 376(%rdi), %rcx
+ movq %rdx, 368(%rdi)
+ sbbq 376(%rsi), %rcx
+ movq %rcx, 376(%rdi)
+ sbbq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_sub_in_place_48,.-sp_3072_sub_in_place_48
+#endif /* __APPLE__ */
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_3072_add_48
+.type sp_3072_add_48,@function
+.align 16
+sp_3072_add_48:
+#else
+.globl _sp_3072_add_48
+.p2align 4
+_sp_3072_add_48:
+#endif /* __APPLE__ */
+ # Add
+ movq (%rsi), %rcx
+ xorq %rax, %rax
+ addq (%rdx), %rcx
+ movq 8(%rsi), %r8
+ movq %rcx, (%rdi)
+ adcq 8(%rdx), %r8
+ movq 16(%rsi), %rcx
+ movq %r8, 8(%rdi)
+ adcq 16(%rdx), %rcx
+ movq 24(%rsi), %r8
+ movq %rcx, 16(%rdi)
+ adcq 24(%rdx), %r8
+ movq 32(%rsi), %rcx
+ movq %r8, 24(%rdi)
+ adcq 32(%rdx), %rcx
+ movq 40(%rsi), %r8
+ movq %rcx, 32(%rdi)
+ adcq 40(%rdx), %r8
+ movq 48(%rsi), %rcx
+ movq %r8, 40(%rdi)
+ adcq 48(%rdx), %rcx
+ movq 56(%rsi), %r8
+ movq %rcx, 48(%rdi)
+ adcq 56(%rdx), %r8
+ movq 64(%rsi), %rcx
+ movq %r8, 56(%rdi)
+ adcq 64(%rdx), %rcx
+ movq 72(%rsi), %r8
+ movq %rcx, 64(%rdi)
+ adcq 72(%rdx), %r8
+ movq 80(%rsi), %rcx
+ movq %r8, 72(%rdi)
+ adcq 80(%rdx), %rcx
+ movq 88(%rsi), %r8
+ movq %rcx, 80(%rdi)
+ adcq 88(%rdx), %r8
+ movq 96(%rsi), %rcx
+ movq %r8, 88(%rdi)
+ adcq 96(%rdx), %rcx
+ movq 104(%rsi), %r8
+ movq %rcx, 96(%rdi)
+ adcq 104(%rdx), %r8
+ movq 112(%rsi), %rcx
+ movq %r8, 104(%rdi)
+ adcq 112(%rdx), %rcx
+ movq 120(%rsi), %r8
+ movq %rcx, 112(%rdi)
+ adcq 120(%rdx), %r8
+ movq 128(%rsi), %rcx
+ movq %r8, 120(%rdi)
+ adcq 128(%rdx), %rcx
+ movq 136(%rsi), %r8
+ movq %rcx, 128(%rdi)
+ adcq 136(%rdx), %r8
+ movq 144(%rsi), %rcx
+ movq %r8, 136(%rdi)
+ adcq 144(%rdx), %rcx
+ movq 152(%rsi), %r8
+ movq %rcx, 144(%rdi)
+ adcq 152(%rdx), %r8
+ movq 160(%rsi), %rcx
+ movq %r8, 152(%rdi)
+ adcq 160(%rdx), %rcx
+ movq 168(%rsi), %r8
+ movq %rcx, 160(%rdi)
+ adcq 168(%rdx), %r8
+ movq 176(%rsi), %rcx
+ movq %r8, 168(%rdi)
+ adcq 176(%rdx), %rcx
+ movq 184(%rsi), %r8
+ movq %rcx, 176(%rdi)
+ adcq 184(%rdx), %r8
+ movq 192(%rsi), %rcx
+ movq %r8, 184(%rdi)
+ adcq 192(%rdx), %rcx
+ movq 200(%rsi), %r8
+ movq %rcx, 192(%rdi)
+ adcq 200(%rdx), %r8
+ movq 208(%rsi), %rcx
+ movq %r8, 200(%rdi)
+ adcq 208(%rdx), %rcx
+ movq 216(%rsi), %r8
+ movq %rcx, 208(%rdi)
+ adcq 216(%rdx), %r8
+ movq 224(%rsi), %rcx
+ movq %r8, 216(%rdi)
+ adcq 224(%rdx), %rcx
+ movq 232(%rsi), %r8
+ movq %rcx, 224(%rdi)
+ adcq 232(%rdx), %r8
+ movq 240(%rsi), %rcx
+ movq %r8, 232(%rdi)
+ adcq 240(%rdx), %rcx
+ movq 248(%rsi), %r8
+ movq %rcx, 240(%rdi)
+ adcq 248(%rdx), %r8
+ movq 256(%rsi), %rcx
+ movq %r8, 248(%rdi)
+ adcq 256(%rdx), %rcx
+ movq 264(%rsi), %r8
+ movq %rcx, 256(%rdi)
+ adcq 264(%rdx), %r8
+ movq 272(%rsi), %rcx
+ movq %r8, 264(%rdi)
+ adcq 272(%rdx), %rcx
+ movq 280(%rsi), %r8
+ movq %rcx, 272(%rdi)
+ adcq 280(%rdx), %r8
+ movq 288(%rsi), %rcx
+ movq %r8, 280(%rdi)
+ adcq 288(%rdx), %rcx
+ movq 296(%rsi), %r8
+ movq %rcx, 288(%rdi)
+ adcq 296(%rdx), %r8
+ movq 304(%rsi), %rcx
+ movq %r8, 296(%rdi)
+ adcq 304(%rdx), %rcx
+ movq 312(%rsi), %r8
+ movq %rcx, 304(%rdi)
+ adcq 312(%rdx), %r8
+ movq 320(%rsi), %rcx
+ movq %r8, 312(%rdi)
+ adcq 320(%rdx), %rcx
+ movq 328(%rsi), %r8
+ movq %rcx, 320(%rdi)
+ adcq 328(%rdx), %r8
+ movq 336(%rsi), %rcx
+ movq %r8, 328(%rdi)
+ adcq 336(%rdx), %rcx
+ movq 344(%rsi), %r8
+ movq %rcx, 336(%rdi)
+ adcq 344(%rdx), %r8
+ movq 352(%rsi), %rcx
+ movq %r8, 344(%rdi)
+ adcq 352(%rdx), %rcx
+ movq 360(%rsi), %r8
+ movq %rcx, 352(%rdi)
+ adcq 360(%rdx), %r8
+ movq 368(%rsi), %rcx
+ movq %r8, 360(%rdi)
+ adcq 368(%rdx), %rcx
+ movq 376(%rsi), %r8
+ movq %rcx, 368(%rdi)
+ adcq 376(%rdx), %r8
+ movq %r8, 376(%rdi)
+ adcq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_add_48,.-sp_3072_add_48
+#endif /* __APPLE__ */
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_3072_mul_48
+.type sp_3072_mul_48,@function
+.align 16
+sp_3072_mul_48:
+#else
+.globl _sp_3072_mul_48
+.p2align 4
+_sp_3072_mul_48:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ subq $1192, %rsp
+ movq %rdi, 1152(%rsp)
+ movq %rsi, 1160(%rsp)
+ movq %rdx, 1168(%rsp)
+ leaq 768(%rsp), %r10
+ leaq 192(%rsi), %r12
+ # Add
+ movq (%rsi), %rax
+ xorq %r13, %r13
+ addq (%r12), %rax
+ movq 8(%rsi), %rcx
+ movq %rax, (%r10)
+ adcq 8(%r12), %rcx
+ movq 16(%rsi), %r8
+ movq %rcx, 8(%r10)
+ adcq 16(%r12), %r8
+ movq 24(%rsi), %rax
+ movq %r8, 16(%r10)
+ adcq 24(%r12), %rax
+ movq 32(%rsi), %rcx
+ movq %rax, 24(%r10)
+ adcq 32(%r12), %rcx
+ movq 40(%rsi), %r8
+ movq %rcx, 32(%r10)
+ adcq 40(%r12), %r8
+ movq 48(%rsi), %rax
+ movq %r8, 40(%r10)
+ adcq 48(%r12), %rax
+ movq 56(%rsi), %rcx
+ movq %rax, 48(%r10)
+ adcq 56(%r12), %rcx
+ movq 64(%rsi), %r8
+ movq %rcx, 56(%r10)
+ adcq 64(%r12), %r8
+ movq 72(%rsi), %rax
+ movq %r8, 64(%r10)
+ adcq 72(%r12), %rax
+ movq 80(%rsi), %rcx
+ movq %rax, 72(%r10)
+ adcq 80(%r12), %rcx
+ movq 88(%rsi), %r8
+ movq %rcx, 80(%r10)
+ adcq 88(%r12), %r8
+ movq 96(%rsi), %rax
+ movq %r8, 88(%r10)
+ adcq 96(%r12), %rax
+ movq 104(%rsi), %rcx
+ movq %rax, 96(%r10)
+ adcq 104(%r12), %rcx
+ movq 112(%rsi), %r8
+ movq %rcx, 104(%r10)
+ adcq 112(%r12), %r8
+ movq 120(%rsi), %rax
+ movq %r8, 112(%r10)
+ adcq 120(%r12), %rax
+ movq 128(%rsi), %rcx
+ movq %rax, 120(%r10)
+ adcq 128(%r12), %rcx
+ movq 136(%rsi), %r8
+ movq %rcx, 128(%r10)
+ adcq 136(%r12), %r8
+ movq 144(%rsi), %rax
+ movq %r8, 136(%r10)
+ adcq 144(%r12), %rax
+ movq 152(%rsi), %rcx
+ movq %rax, 144(%r10)
+ adcq 152(%r12), %rcx
+ movq 160(%rsi), %r8
+ movq %rcx, 152(%r10)
+ adcq 160(%r12), %r8
+ movq 168(%rsi), %rax
+ movq %r8, 160(%r10)
+ adcq 168(%r12), %rax
+ movq 176(%rsi), %rcx
+ movq %rax, 168(%r10)
+ adcq 176(%r12), %rcx
+ movq 184(%rsi), %r8
+ movq %rcx, 176(%r10)
+ adcq 184(%r12), %r8
+ movq %r8, 184(%r10)
+ adcq $0, %r13
+ movq %r13, 1176(%rsp)
+ leaq 960(%rsp), %r11
+ leaq 192(%rdx), %r12
+ # Add
+ movq (%rdx), %rax
+ xorq %r14, %r14
+ addq (%r12), %rax
+ movq 8(%rdx), %rcx
+ movq %rax, (%r11)
+ adcq 8(%r12), %rcx
+ movq 16(%rdx), %r8
+ movq %rcx, 8(%r11)
+ adcq 16(%r12), %r8
+ movq 24(%rdx), %rax
+ movq %r8, 16(%r11)
+ adcq 24(%r12), %rax
+ movq 32(%rdx), %rcx
+ movq %rax, 24(%r11)
+ adcq 32(%r12), %rcx
+ movq 40(%rdx), %r8
+ movq %rcx, 32(%r11)
+ adcq 40(%r12), %r8
+ movq 48(%rdx), %rax
+ movq %r8, 40(%r11)
+ adcq 48(%r12), %rax
+ movq 56(%rdx), %rcx
+ movq %rax, 48(%r11)
+ adcq 56(%r12), %rcx
+ movq 64(%rdx), %r8
+ movq %rcx, 56(%r11)
+ adcq 64(%r12), %r8
+ movq 72(%rdx), %rax
+ movq %r8, 64(%r11)
+ adcq 72(%r12), %rax
+ movq 80(%rdx), %rcx
+ movq %rax, 72(%r11)
+ adcq 80(%r12), %rcx
+ movq 88(%rdx), %r8
+ movq %rcx, 80(%r11)
+ adcq 88(%r12), %r8
+ movq 96(%rdx), %rax
+ movq %r8, 88(%r11)
+ adcq 96(%r12), %rax
+ movq 104(%rdx), %rcx
+ movq %rax, 96(%r11)
+ adcq 104(%r12), %rcx
+ movq 112(%rdx), %r8
+ movq %rcx, 104(%r11)
+ adcq 112(%r12), %r8
+ movq 120(%rdx), %rax
+ movq %r8, 112(%r11)
+ adcq 120(%r12), %rax
+ movq 128(%rdx), %rcx
+ movq %rax, 120(%r11)
+ adcq 128(%r12), %rcx
+ movq 136(%rdx), %r8
+ movq %rcx, 128(%r11)
+ adcq 136(%r12), %r8
+ movq 144(%rdx), %rax
+ movq %r8, 136(%r11)
+ adcq 144(%r12), %rax
+ movq 152(%rdx), %rcx
+ movq %rax, 144(%r11)
+ adcq 152(%r12), %rcx
+ movq 160(%rdx), %r8
+ movq %rcx, 152(%r11)
+ adcq 160(%r12), %r8
+ movq 168(%rdx), %rax
+ movq %r8, 160(%r11)
+ adcq 168(%r12), %rax
+ movq 176(%rdx), %rcx
+ movq %rax, 168(%r11)
+ adcq 176(%r12), %rcx
+ movq 184(%rdx), %r8
+ movq %rcx, 176(%r11)
+ adcq 184(%r12), %r8
+ movq %r8, 184(%r11)
+ adcq $0, %r14
+ movq %r14, 1184(%rsp)
+ movq %r11, %rdx
+ movq %r10, %rsi
+ movq %rsp, %rdi
+#ifndef __APPLE__
+ callq sp_3072_mul_24@plt
+#else
+ callq _sp_3072_mul_24
+#endif /* __APPLE__ */
+ movq 1168(%rsp), %rdx
+ movq 1160(%rsp), %rsi
+ leaq 384(%rsp), %rdi
+ addq $192, %rdx
+ addq $192, %rsi
+#ifndef __APPLE__
+ callq sp_3072_mul_24@plt
+#else
+ callq _sp_3072_mul_24
+#endif /* __APPLE__ */
+ movq 1168(%rsp), %rdx
+ movq 1160(%rsp), %rsi
+ movq 1152(%rsp), %rdi
+#ifndef __APPLE__
+ callq sp_3072_mul_24@plt
+#else
+ callq _sp_3072_mul_24
+#endif /* __APPLE__ */
+ movq 1176(%rsp), %r13
+ movq 1184(%rsp), %r14
+ movq 1152(%rsp), %r15
+ movq %r13, %r9
+ leaq 768(%rsp), %r10
+ leaq 960(%rsp), %r11
+ andq %r14, %r9
+ negq %r13
+ negq %r14
+ addq $384, %r15
+ movq (%r10), %rax
+ movq (%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, (%r10)
+ movq %rcx, (%r11)
+ movq 8(%r10), %rax
+ movq 8(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 8(%r10)
+ movq %rcx, 8(%r11)
+ movq 16(%r10), %rax
+ movq 16(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 16(%r10)
+ movq %rcx, 16(%r11)
+ movq 24(%r10), %rax
+ movq 24(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 24(%r10)
+ movq %rcx, 24(%r11)
+ movq 32(%r10), %rax
+ movq 32(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 32(%r10)
+ movq %rcx, 32(%r11)
+ movq 40(%r10), %rax
+ movq 40(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 40(%r10)
+ movq %rcx, 40(%r11)
+ movq 48(%r10), %rax
+ movq 48(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 48(%r10)
+ movq %rcx, 48(%r11)
+ movq 56(%r10), %rax
+ movq 56(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 56(%r10)
+ movq %rcx, 56(%r11)
+ movq 64(%r10), %rax
+ movq 64(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 64(%r10)
+ movq %rcx, 64(%r11)
+ movq 72(%r10), %rax
+ movq 72(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 72(%r10)
+ movq %rcx, 72(%r11)
+ movq 80(%r10), %rax
+ movq 80(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 80(%r10)
+ movq %rcx, 80(%r11)
+ movq 88(%r10), %rax
+ movq 88(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 88(%r10)
+ movq %rcx, 88(%r11)
+ movq 96(%r10), %rax
+ movq 96(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 96(%r10)
+ movq %rcx, 96(%r11)
+ movq 104(%r10), %rax
+ movq 104(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 104(%r10)
+ movq %rcx, 104(%r11)
+ movq 112(%r10), %rax
+ movq 112(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 112(%r10)
+ movq %rcx, 112(%r11)
+ movq 120(%r10), %rax
+ movq 120(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 120(%r10)
+ movq %rcx, 120(%r11)
+ movq 128(%r10), %rax
+ movq 128(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 128(%r10)
+ movq %rcx, 128(%r11)
+ movq 136(%r10), %rax
+ movq 136(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 136(%r10)
+ movq %rcx, 136(%r11)
+ movq 144(%r10), %rax
+ movq 144(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 144(%r10)
+ movq %rcx, 144(%r11)
+ movq 152(%r10), %rax
+ movq 152(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 152(%r10)
+ movq %rcx, 152(%r11)
+ movq 160(%r10), %rax
+ movq 160(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 160(%r10)
+ movq %rcx, 160(%r11)
+ movq 168(%r10), %rax
+ movq 168(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 168(%r10)
+ movq %rcx, 168(%r11)
+ movq 176(%r10), %rax
+ movq 176(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 176(%r10)
+ movq %rcx, 176(%r11)
+ movq 184(%r10), %rax
+ movq 184(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 184(%r10)
+ movq %rcx, 184(%r11)
+ movq (%r10), %rax
+ addq (%r11), %rax
+ movq 8(%r10), %rcx
+ movq %rax, (%r15)
+ adcq 8(%r11), %rcx
+ movq 16(%r10), %r8
+ movq %rcx, 8(%r15)
+ adcq 16(%r11), %r8
+ movq 24(%r10), %rax
+ movq %r8, 16(%r15)
+ adcq 24(%r11), %rax
+ movq 32(%r10), %rcx
+ movq %rax, 24(%r15)
+ adcq 32(%r11), %rcx
+ movq 40(%r10), %r8
+ movq %rcx, 32(%r15)
+ adcq 40(%r11), %r8
+ movq 48(%r10), %rax
+ movq %r8, 40(%r15)
+ adcq 48(%r11), %rax
+ movq 56(%r10), %rcx
+ movq %rax, 48(%r15)
+ adcq 56(%r11), %rcx
+ movq 64(%r10), %r8
+ movq %rcx, 56(%r15)
+ adcq 64(%r11), %r8
+ movq 72(%r10), %rax
+ movq %r8, 64(%r15)
+ adcq 72(%r11), %rax
+ movq 80(%r10), %rcx
+ movq %rax, 72(%r15)
+ adcq 80(%r11), %rcx
+ movq 88(%r10), %r8
+ movq %rcx, 80(%r15)
+ adcq 88(%r11), %r8
+ movq 96(%r10), %rax
+ movq %r8, 88(%r15)
+ adcq 96(%r11), %rax
+ movq 104(%r10), %rcx
+ movq %rax, 96(%r15)
+ adcq 104(%r11), %rcx
+ movq 112(%r10), %r8
+ movq %rcx, 104(%r15)
+ adcq 112(%r11), %r8
+ movq 120(%r10), %rax
+ movq %r8, 112(%r15)
+ adcq 120(%r11), %rax
+ movq 128(%r10), %rcx
+ movq %rax, 120(%r15)
+ adcq 128(%r11), %rcx
+ movq 136(%r10), %r8
+ movq %rcx, 128(%r15)
+ adcq 136(%r11), %r8
+ movq 144(%r10), %rax
+ movq %r8, 136(%r15)
+ adcq 144(%r11), %rax
+ movq 152(%r10), %rcx
+ movq %rax, 144(%r15)
+ adcq 152(%r11), %rcx
+ movq 160(%r10), %r8
+ movq %rcx, 152(%r15)
+ adcq 160(%r11), %r8
+ movq 168(%r10), %rax
+ movq %r8, 160(%r15)
+ adcq 168(%r11), %rax
+ movq 176(%r10), %rcx
+ movq %rax, 168(%r15)
+ adcq 176(%r11), %rcx
+ movq 184(%r10), %r8
+ movq %rcx, 176(%r15)
+ adcq 184(%r11), %r8
+ movq %r8, 184(%r15)
+ adcq $0, %r9
+ leaq 384(%rsp), %r11
+ movq %rsp, %r10
+ movq (%r10), %rax
+ subq (%r11), %rax
+ movq 8(%r10), %rcx
+ movq %rax, (%r10)
+ sbbq 8(%r11), %rcx
+ movq 16(%r10), %r8
+ movq %rcx, 8(%r10)
+ sbbq 16(%r11), %r8
+ movq 24(%r10), %rax
+ movq %r8, 16(%r10)
+ sbbq 24(%r11), %rax
+ movq 32(%r10), %rcx
+ movq %rax, 24(%r10)
+ sbbq 32(%r11), %rcx
+ movq 40(%r10), %r8
+ movq %rcx, 32(%r10)
+ sbbq 40(%r11), %r8
+ movq 48(%r10), %rax
+ movq %r8, 40(%r10)
+ sbbq 48(%r11), %rax
+ movq 56(%r10), %rcx
+ movq %rax, 48(%r10)
+ sbbq 56(%r11), %rcx
+ movq 64(%r10), %r8
+ movq %rcx, 56(%r10)
+ sbbq 64(%r11), %r8
+ movq 72(%r10), %rax
+ movq %r8, 64(%r10)
+ sbbq 72(%r11), %rax
+ movq 80(%r10), %rcx
+ movq %rax, 72(%r10)
+ sbbq 80(%r11), %rcx
+ movq 88(%r10), %r8
+ movq %rcx, 80(%r10)
+ sbbq 88(%r11), %r8
+ movq 96(%r10), %rax
+ movq %r8, 88(%r10)
+ sbbq 96(%r11), %rax
+ movq 104(%r10), %rcx
+ movq %rax, 96(%r10)
+ sbbq 104(%r11), %rcx
+ movq 112(%r10), %r8
+ movq %rcx, 104(%r10)
+ sbbq 112(%r11), %r8
+ movq 120(%r10), %rax
+ movq %r8, 112(%r10)
+ sbbq 120(%r11), %rax
+ movq 128(%r10), %rcx
+ movq %rax, 120(%r10)
+ sbbq 128(%r11), %rcx
+ movq 136(%r10), %r8
+ movq %rcx, 128(%r10)
+ sbbq 136(%r11), %r8
+ movq 144(%r10), %rax
+ movq %r8, 136(%r10)
+ sbbq 144(%r11), %rax
+ movq 152(%r10), %rcx
+ movq %rax, 144(%r10)
+ sbbq 152(%r11), %rcx
+ movq 160(%r10), %r8
+ movq %rcx, 152(%r10)
+ sbbq 160(%r11), %r8
+ movq 168(%r10), %rax
+ movq %r8, 160(%r10)
+ sbbq 168(%r11), %rax
+ movq 176(%r10), %rcx
+ movq %rax, 168(%r10)
+ sbbq 176(%r11), %rcx
+ movq 184(%r10), %r8
+ movq %rcx, 176(%r10)
+ sbbq 184(%r11), %r8
+ movq 192(%r10), %rax
+ movq %r8, 184(%r10)
+ sbbq 192(%r11), %rax
+ movq 200(%r10), %rcx
+ movq %rax, 192(%r10)
+ sbbq 200(%r11), %rcx
+ movq 208(%r10), %r8
+ movq %rcx, 200(%r10)
+ sbbq 208(%r11), %r8
+ movq 216(%r10), %rax
+ movq %r8, 208(%r10)
+ sbbq 216(%r11), %rax
+ movq 224(%r10), %rcx
+ movq %rax, 216(%r10)
+ sbbq 224(%r11), %rcx
+ movq 232(%r10), %r8
+ movq %rcx, 224(%r10)
+ sbbq 232(%r11), %r8
+ movq 240(%r10), %rax
+ movq %r8, 232(%r10)
+ sbbq 240(%r11), %rax
+ movq 248(%r10), %rcx
+ movq %rax, 240(%r10)
+ sbbq 248(%r11), %rcx
+ movq 256(%r10), %r8
+ movq %rcx, 248(%r10)
+ sbbq 256(%r11), %r8
+ movq 264(%r10), %rax
+ movq %r8, 256(%r10)
+ sbbq 264(%r11), %rax
+ movq 272(%r10), %rcx
+ movq %rax, 264(%r10)
+ sbbq 272(%r11), %rcx
+ movq 280(%r10), %r8
+ movq %rcx, 272(%r10)
+ sbbq 280(%r11), %r8
+ movq 288(%r10), %rax
+ movq %r8, 280(%r10)
+ sbbq 288(%r11), %rax
+ movq 296(%r10), %rcx
+ movq %rax, 288(%r10)
+ sbbq 296(%r11), %rcx
+ movq 304(%r10), %r8
+ movq %rcx, 296(%r10)
+ sbbq 304(%r11), %r8
+ movq 312(%r10), %rax
+ movq %r8, 304(%r10)
+ sbbq 312(%r11), %rax
+ movq 320(%r10), %rcx
+ movq %rax, 312(%r10)
+ sbbq 320(%r11), %rcx
+ movq 328(%r10), %r8
+ movq %rcx, 320(%r10)
+ sbbq 328(%r11), %r8
+ movq 336(%r10), %rax
+ movq %r8, 328(%r10)
+ sbbq 336(%r11), %rax
+ movq 344(%r10), %rcx
+ movq %rax, 336(%r10)
+ sbbq 344(%r11), %rcx
+ movq 352(%r10), %r8
+ movq %rcx, 344(%r10)
+ sbbq 352(%r11), %r8
+ movq 360(%r10), %rax
+ movq %r8, 352(%r10)
+ sbbq 360(%r11), %rax
+ movq 368(%r10), %rcx
+ movq %rax, 360(%r10)
+ sbbq 368(%r11), %rcx
+ movq 376(%r10), %r8
+ movq %rcx, 368(%r10)
+ sbbq 376(%r11), %r8
+ movq %r8, 376(%r10)
+ sbbq $0, %r9
+ movq (%r10), %rax
+ subq (%rdi), %rax
+ movq 8(%r10), %rcx
+ movq %rax, (%r10)
+ sbbq 8(%rdi), %rcx
+ movq 16(%r10), %r8
+ movq %rcx, 8(%r10)
+ sbbq 16(%rdi), %r8
+ movq 24(%r10), %rax
+ movq %r8, 16(%r10)
+ sbbq 24(%rdi), %rax
+ movq 32(%r10), %rcx
+ movq %rax, 24(%r10)
+ sbbq 32(%rdi), %rcx
+ movq 40(%r10), %r8
+ movq %rcx, 32(%r10)
+ sbbq 40(%rdi), %r8
+ movq 48(%r10), %rax
+ movq %r8, 40(%r10)
+ sbbq 48(%rdi), %rax
+ movq 56(%r10), %rcx
+ movq %rax, 48(%r10)
+ sbbq 56(%rdi), %rcx
+ movq 64(%r10), %r8
+ movq %rcx, 56(%r10)
+ sbbq 64(%rdi), %r8
+ movq 72(%r10), %rax
+ movq %r8, 64(%r10)
+ sbbq 72(%rdi), %rax
+ movq 80(%r10), %rcx
+ movq %rax, 72(%r10)
+ sbbq 80(%rdi), %rcx
+ movq 88(%r10), %r8
+ movq %rcx, 80(%r10)
+ sbbq 88(%rdi), %r8
+ movq 96(%r10), %rax
+ movq %r8, 88(%r10)
+ sbbq 96(%rdi), %rax
+ movq 104(%r10), %rcx
+ movq %rax, 96(%r10)
+ sbbq 104(%rdi), %rcx
+ movq 112(%r10), %r8
+ movq %rcx, 104(%r10)
+ sbbq 112(%rdi), %r8
+ movq 120(%r10), %rax
+ movq %r8, 112(%r10)
+ sbbq 120(%rdi), %rax
+ movq 128(%r10), %rcx
+ movq %rax, 120(%r10)
+ sbbq 128(%rdi), %rcx
+ movq 136(%r10), %r8
+ movq %rcx, 128(%r10)
+ sbbq 136(%rdi), %r8
+ movq 144(%r10), %rax
+ movq %r8, 136(%r10)
+ sbbq 144(%rdi), %rax
+ movq 152(%r10), %rcx
+ movq %rax, 144(%r10)
+ sbbq 152(%rdi), %rcx
+ movq 160(%r10), %r8
+ movq %rcx, 152(%r10)
+ sbbq 160(%rdi), %r8
+ movq 168(%r10), %rax
+ movq %r8, 160(%r10)
+ sbbq 168(%rdi), %rax
+ movq 176(%r10), %rcx
+ movq %rax, 168(%r10)
+ sbbq 176(%rdi), %rcx
+ movq 184(%r10), %r8
+ movq %rcx, 176(%r10)
+ sbbq 184(%rdi), %r8
+ movq 192(%r10), %rax
+ movq %r8, 184(%r10)
+ sbbq 192(%rdi), %rax
+ movq 200(%r10), %rcx
+ movq %rax, 192(%r10)
+ sbbq 200(%rdi), %rcx
+ movq 208(%r10), %r8
+ movq %rcx, 200(%r10)
+ sbbq 208(%rdi), %r8
+ movq 216(%r10), %rax
+ movq %r8, 208(%r10)
+ sbbq 216(%rdi), %rax
+ movq 224(%r10), %rcx
+ movq %rax, 216(%r10)
+ sbbq 224(%rdi), %rcx
+ movq 232(%r10), %r8
+ movq %rcx, 224(%r10)
+ sbbq 232(%rdi), %r8
+ movq 240(%r10), %rax
+ movq %r8, 232(%r10)
+ sbbq 240(%rdi), %rax
+ movq 248(%r10), %rcx
+ movq %rax, 240(%r10)
+ sbbq 248(%rdi), %rcx
+ movq 256(%r10), %r8
+ movq %rcx, 248(%r10)
+ sbbq 256(%rdi), %r8
+ movq 264(%r10), %rax
+ movq %r8, 256(%r10)
+ sbbq 264(%rdi), %rax
+ movq 272(%r10), %rcx
+ movq %rax, 264(%r10)
+ sbbq 272(%rdi), %rcx
+ movq 280(%r10), %r8
+ movq %rcx, 272(%r10)
+ sbbq 280(%rdi), %r8
+ movq 288(%r10), %rax
+ movq %r8, 280(%r10)
+ sbbq 288(%rdi), %rax
+ movq 296(%r10), %rcx
+ movq %rax, 288(%r10)
+ sbbq 296(%rdi), %rcx
+ movq 304(%r10), %r8
+ movq %rcx, 296(%r10)
+ sbbq 304(%rdi), %r8
+ movq 312(%r10), %rax
+ movq %r8, 304(%r10)
+ sbbq 312(%rdi), %rax
+ movq 320(%r10), %rcx
+ movq %rax, 312(%r10)
+ sbbq 320(%rdi), %rcx
+ movq 328(%r10), %r8
+ movq %rcx, 320(%r10)
+ sbbq 328(%rdi), %r8
+ movq 336(%r10), %rax
+ movq %r8, 328(%r10)
+ sbbq 336(%rdi), %rax
+ movq 344(%r10), %rcx
+ movq %rax, 336(%r10)
+ sbbq 344(%rdi), %rcx
+ movq 352(%r10), %r8
+ movq %rcx, 344(%r10)
+ sbbq 352(%rdi), %r8
+ movq 360(%r10), %rax
+ movq %r8, 352(%r10)
+ sbbq 360(%rdi), %rax
+ movq 368(%r10), %rcx
+ movq %rax, 360(%r10)
+ sbbq 368(%rdi), %rcx
+ movq 376(%r10), %r8
+ movq %rcx, 368(%r10)
+ sbbq 376(%rdi), %r8
+ movq %r8, 376(%r10)
+ sbbq $0, %r9
+ subq $192, %r15
+ # Add
+ movq (%r15), %rax
+ addq (%r10), %rax
+ movq 8(%r15), %rcx
+ movq %rax, (%r15)
+ adcq 8(%r10), %rcx
+ movq 16(%r15), %r8
+ movq %rcx, 8(%r15)
+ adcq 16(%r10), %r8
+ movq 24(%r15), %rax
+ movq %r8, 16(%r15)
+ adcq 24(%r10), %rax
+ movq 32(%r15), %rcx
+ movq %rax, 24(%r15)
+ adcq 32(%r10), %rcx
+ movq 40(%r15), %r8
+ movq %rcx, 32(%r15)
+ adcq 40(%r10), %r8
+ movq 48(%r15), %rax
+ movq %r8, 40(%r15)
+ adcq 48(%r10), %rax
+ movq 56(%r15), %rcx
+ movq %rax, 48(%r15)
+ adcq 56(%r10), %rcx
+ movq 64(%r15), %r8
+ movq %rcx, 56(%r15)
+ adcq 64(%r10), %r8
+ movq 72(%r15), %rax
+ movq %r8, 64(%r15)
+ adcq 72(%r10), %rax
+ movq 80(%r15), %rcx
+ movq %rax, 72(%r15)
+ adcq 80(%r10), %rcx
+ movq 88(%r15), %r8
+ movq %rcx, 80(%r15)
+ adcq 88(%r10), %r8
+ movq 96(%r15), %rax
+ movq %r8, 88(%r15)
+ adcq 96(%r10), %rax
+ movq 104(%r15), %rcx
+ movq %rax, 96(%r15)
+ adcq 104(%r10), %rcx
+ movq 112(%r15), %r8
+ movq %rcx, 104(%r15)
+ adcq 112(%r10), %r8
+ movq 120(%r15), %rax
+ movq %r8, 112(%r15)
+ adcq 120(%r10), %rax
+ movq 128(%r15), %rcx
+ movq %rax, 120(%r15)
+ adcq 128(%r10), %rcx
+ movq 136(%r15), %r8
+ movq %rcx, 128(%r15)
+ adcq 136(%r10), %r8
+ movq 144(%r15), %rax
+ movq %r8, 136(%r15)
+ adcq 144(%r10), %rax
+ movq 152(%r15), %rcx
+ movq %rax, 144(%r15)
+ adcq 152(%r10), %rcx
+ movq 160(%r15), %r8
+ movq %rcx, 152(%r15)
+ adcq 160(%r10), %r8
+ movq 168(%r15), %rax
+ movq %r8, 160(%r15)
+ adcq 168(%r10), %rax
+ movq 176(%r15), %rcx
+ movq %rax, 168(%r15)
+ adcq 176(%r10), %rcx
+ movq 184(%r15), %r8
+ movq %rcx, 176(%r15)
+ adcq 184(%r10), %r8
+ movq 192(%r15), %rax
+ movq %r8, 184(%r15)
+ adcq 192(%r10), %rax
+ movq 200(%r15), %rcx
+ movq %rax, 192(%r15)
+ adcq 200(%r10), %rcx
+ movq 208(%r15), %r8
+ movq %rcx, 200(%r15)
+ adcq 208(%r10), %r8
+ movq 216(%r15), %rax
+ movq %r8, 208(%r15)
+ adcq 216(%r10), %rax
+ movq 224(%r15), %rcx
+ movq %rax, 216(%r15)
+ adcq 224(%r10), %rcx
+ movq 232(%r15), %r8
+ movq %rcx, 224(%r15)
+ adcq 232(%r10), %r8
+ movq 240(%r15), %rax
+ movq %r8, 232(%r15)
+ adcq 240(%r10), %rax
+ movq 248(%r15), %rcx
+ movq %rax, 240(%r15)
+ adcq 248(%r10), %rcx
+ movq 256(%r15), %r8
+ movq %rcx, 248(%r15)
+ adcq 256(%r10), %r8
+ movq 264(%r15), %rax
+ movq %r8, 256(%r15)
+ adcq 264(%r10), %rax
+ movq 272(%r15), %rcx
+ movq %rax, 264(%r15)
+ adcq 272(%r10), %rcx
+ movq 280(%r15), %r8
+ movq %rcx, 272(%r15)
+ adcq 280(%r10), %r8
+ movq 288(%r15), %rax
+ movq %r8, 280(%r15)
+ adcq 288(%r10), %rax
+ movq 296(%r15), %rcx
+ movq %rax, 288(%r15)
+ adcq 296(%r10), %rcx
+ movq 304(%r15), %r8
+ movq %rcx, 296(%r15)
+ adcq 304(%r10), %r8
+ movq 312(%r15), %rax
+ movq %r8, 304(%r15)
+ adcq 312(%r10), %rax
+ movq 320(%r15), %rcx
+ movq %rax, 312(%r15)
+ adcq 320(%r10), %rcx
+ movq 328(%r15), %r8
+ movq %rcx, 320(%r15)
+ adcq 328(%r10), %r8
+ movq 336(%r15), %rax
+ movq %r8, 328(%r15)
+ adcq 336(%r10), %rax
+ movq 344(%r15), %rcx
+ movq %rax, 336(%r15)
+ adcq 344(%r10), %rcx
+ movq 352(%r15), %r8
+ movq %rcx, 344(%r15)
+ adcq 352(%r10), %r8
+ movq 360(%r15), %rax
+ movq %r8, 352(%r15)
+ adcq 360(%r10), %rax
+ movq 368(%r15), %rcx
+ movq %rax, 360(%r15)
+ adcq 368(%r10), %rcx
+ movq 376(%r15), %r8
+ movq %rcx, 368(%r15)
+ adcq 376(%r10), %r8
+ movq %r8, 376(%r15)
+ adcq $0, %r9
+ movq %r9, 576(%rdi)
+ addq $192, %r15
+ # Add
+ movq (%r15), %rax
+ addq (%r11), %rax
+ movq 8(%r15), %rcx
+ movq %rax, (%r15)
+ adcq 8(%r11), %rcx
+ movq 16(%r15), %r8
+ movq %rcx, 8(%r15)
+ adcq 16(%r11), %r8
+ movq 24(%r15), %rax
+ movq %r8, 16(%r15)
+ adcq 24(%r11), %rax
+ movq 32(%r15), %rcx
+ movq %rax, 24(%r15)
+ adcq 32(%r11), %rcx
+ movq 40(%r15), %r8
+ movq %rcx, 32(%r15)
+ adcq 40(%r11), %r8
+ movq 48(%r15), %rax
+ movq %r8, 40(%r15)
+ adcq 48(%r11), %rax
+ movq 56(%r15), %rcx
+ movq %rax, 48(%r15)
+ adcq 56(%r11), %rcx
+ movq 64(%r15), %r8
+ movq %rcx, 56(%r15)
+ adcq 64(%r11), %r8
+ movq 72(%r15), %rax
+ movq %r8, 64(%r15)
+ adcq 72(%r11), %rax
+ movq 80(%r15), %rcx
+ movq %rax, 72(%r15)
+ adcq 80(%r11), %rcx
+ movq 88(%r15), %r8
+ movq %rcx, 80(%r15)
+ adcq 88(%r11), %r8
+ movq 96(%r15), %rax
+ movq %r8, 88(%r15)
+ adcq 96(%r11), %rax
+ movq 104(%r15), %rcx
+ movq %rax, 96(%r15)
+ adcq 104(%r11), %rcx
+ movq 112(%r15), %r8
+ movq %rcx, 104(%r15)
+ adcq 112(%r11), %r8
+ movq 120(%r15), %rax
+ movq %r8, 112(%r15)
+ adcq 120(%r11), %rax
+ movq 128(%r15), %rcx
+ movq %rax, 120(%r15)
+ adcq 128(%r11), %rcx
+ movq 136(%r15), %r8
+ movq %rcx, 128(%r15)
+ adcq 136(%r11), %r8
+ movq 144(%r15), %rax
+ movq %r8, 136(%r15)
+ adcq 144(%r11), %rax
+ movq 152(%r15), %rcx
+ movq %rax, 144(%r15)
+ adcq 152(%r11), %rcx
+ movq 160(%r15), %r8
+ movq %rcx, 152(%r15)
+ adcq 160(%r11), %r8
+ movq 168(%r15), %rax
+ movq %r8, 160(%r15)
+ adcq 168(%r11), %rax
+ movq 176(%r15), %rcx
+ movq %rax, 168(%r15)
+ adcq 176(%r11), %rcx
+ movq 184(%r15), %r8
+ movq %rcx, 176(%r15)
+ adcq 184(%r11), %r8
+ movq 192(%r15), %rax
+ movq %r8, 184(%r15)
+ adcq 192(%r11), %rax
+ movq %rax, 192(%r15)
+ # Add to zero
+ movq 200(%r11), %rax
+ adcq $0, %rax
+ movq 208(%r11), %rcx
+ movq %rax, 200(%r15)
+ adcq $0, %rcx
+ movq 216(%r11), %r8
+ movq %rcx, 208(%r15)
+ adcq $0, %r8
+ movq 224(%r11), %rax
+ movq %r8, 216(%r15)
+ adcq $0, %rax
+ movq 232(%r11), %rcx
+ movq %rax, 224(%r15)
+ adcq $0, %rcx
+ movq 240(%r11), %r8
+ movq %rcx, 232(%r15)
+ adcq $0, %r8
+ movq 248(%r11), %rax
+ movq %r8, 240(%r15)
+ adcq $0, %rax
+ movq 256(%r11), %rcx
+ movq %rax, 248(%r15)
+ adcq $0, %rcx
+ movq 264(%r11), %r8
+ movq %rcx, 256(%r15)
+ adcq $0, %r8
+ movq 272(%r11), %rax
+ movq %r8, 264(%r15)
+ adcq $0, %rax
+ movq 280(%r11), %rcx
+ movq %rax, 272(%r15)
+ adcq $0, %rcx
+ movq 288(%r11), %r8
+ movq %rcx, 280(%r15)
+ adcq $0, %r8
+ movq 296(%r11), %rax
+ movq %r8, 288(%r15)
+ adcq $0, %rax
+ movq 304(%r11), %rcx
+ movq %rax, 296(%r15)
+ adcq $0, %rcx
+ movq 312(%r11), %r8
+ movq %rcx, 304(%r15)
+ adcq $0, %r8
+ movq 320(%r11), %rax
+ movq %r8, 312(%r15)
+ adcq $0, %rax
+ movq 328(%r11), %rcx
+ movq %rax, 320(%r15)
+ adcq $0, %rcx
+ movq 336(%r11), %r8
+ movq %rcx, 328(%r15)
+ adcq $0, %r8
+ movq 344(%r11), %rax
+ movq %r8, 336(%r15)
+ adcq $0, %rax
+ movq 352(%r11), %rcx
+ movq %rax, 344(%r15)
+ adcq $0, %rcx
+ movq 360(%r11), %r8
+ movq %rcx, 352(%r15)
+ adcq $0, %r8
+ movq 368(%r11), %rax
+ movq %r8, 360(%r15)
+ adcq $0, %rax
+ movq 376(%r11), %rcx
+ movq %rax, 368(%r15)
+ adcq $0, %rcx
+ movq %rcx, 376(%r15)
+ addq $1192, %rsp
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_mul_48,.-sp_3072_mul_48
+#endif /* __APPLE__ */
+/* Add a to a into r. (r = a + a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_3072_dbl_24
+.type sp_3072_dbl_24,@function
+.align 16
+sp_3072_dbl_24:
+#else
+.globl _sp_3072_dbl_24
+.p2align 4
+_sp_3072_dbl_24:
+#endif /* __APPLE__ */
+ movq (%rsi), %rdx
+ xorq %rax, %rax
+ addq %rdx, %rdx
+ movq 8(%rsi), %rcx
+ movq %rdx, (%rdi)
+ adcq %rcx, %rcx
+ movq 16(%rsi), %rdx
+ movq %rcx, 8(%rdi)
+ adcq %rdx, %rdx
+ movq 24(%rsi), %rcx
+ movq %rdx, 16(%rdi)
+ adcq %rcx, %rcx
+ movq 32(%rsi), %rdx
+ movq %rcx, 24(%rdi)
+ adcq %rdx, %rdx
+ movq 40(%rsi), %rcx
+ movq %rdx, 32(%rdi)
+ adcq %rcx, %rcx
+ movq 48(%rsi), %rdx
+ movq %rcx, 40(%rdi)
+ adcq %rdx, %rdx
+ movq 56(%rsi), %rcx
+ movq %rdx, 48(%rdi)
+ adcq %rcx, %rcx
+ movq 64(%rsi), %rdx
+ movq %rcx, 56(%rdi)
+ adcq %rdx, %rdx
+ movq 72(%rsi), %rcx
+ movq %rdx, 64(%rdi)
+ adcq %rcx, %rcx
+ movq 80(%rsi), %rdx
+ movq %rcx, 72(%rdi)
+ adcq %rdx, %rdx
+ movq 88(%rsi), %rcx
+ movq %rdx, 80(%rdi)
+ adcq %rcx, %rcx
+ movq 96(%rsi), %rdx
+ movq %rcx, 88(%rdi)
+ adcq %rdx, %rdx
+ movq 104(%rsi), %rcx
+ movq %rdx, 96(%rdi)
+ adcq %rcx, %rcx
+ movq 112(%rsi), %rdx
+ movq %rcx, 104(%rdi)
+ adcq %rdx, %rdx
+ movq 120(%rsi), %rcx
+ movq %rdx, 112(%rdi)
+ adcq %rcx, %rcx
+ movq 128(%rsi), %rdx
+ movq %rcx, 120(%rdi)
+ adcq %rdx, %rdx
+ movq 136(%rsi), %rcx
+ movq %rdx, 128(%rdi)
+ adcq %rcx, %rcx
+ movq 144(%rsi), %rdx
+ movq %rcx, 136(%rdi)
+ adcq %rdx, %rdx
+ movq 152(%rsi), %rcx
+ movq %rdx, 144(%rdi)
+ adcq %rcx, %rcx
+ movq 160(%rsi), %rdx
+ movq %rcx, 152(%rdi)
+ adcq %rdx, %rdx
+ movq 168(%rsi), %rcx
+ movq %rdx, 160(%rdi)
+ adcq %rcx, %rcx
+ movq 176(%rsi), %rdx
+ movq %rcx, 168(%rdi)
+ adcq %rdx, %rdx
+ movq 184(%rsi), %rcx
+ movq %rdx, 176(%rdi)
+ adcq %rcx, %rcx
+ movq %rcx, 184(%rdi)
+ adcq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_dbl_24,.-sp_3072_dbl_24
+#endif /* __APPLE__ */
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_3072_sqr_48
+.type sp_3072_sqr_48,@function
+.align 16
+sp_3072_sqr_48:
+#else
+.globl _sp_3072_sqr_48
+.p2align 4
+_sp_3072_sqr_48:
+#endif /* __APPLE__ */
+ subq $984, %rsp
+ movq %rdi, 960(%rsp)
+ movq %rsi, 968(%rsp)
+ leaq 768(%rsp), %r8
+ leaq 192(%rsi), %r9
+ # Add
+ movq (%rsi), %rdx
+ xorq %rcx, %rcx
+ addq (%r9), %rdx
+ movq 8(%rsi), %rax
+ movq %rdx, (%r8)
+ adcq 8(%r9), %rax
+ movq 16(%rsi), %rdx
+ movq %rax, 8(%r8)
+ adcq 16(%r9), %rdx
+ movq 24(%rsi), %rax
+ movq %rdx, 16(%r8)
+ adcq 24(%r9), %rax
+ movq 32(%rsi), %rdx
+ movq %rax, 24(%r8)
+ adcq 32(%r9), %rdx
+ movq 40(%rsi), %rax
+ movq %rdx, 32(%r8)
+ adcq 40(%r9), %rax
+ movq 48(%rsi), %rdx
+ movq %rax, 40(%r8)
+ adcq 48(%r9), %rdx
+ movq 56(%rsi), %rax
+ movq %rdx, 48(%r8)
+ adcq 56(%r9), %rax
+ movq 64(%rsi), %rdx
+ movq %rax, 56(%r8)
+ adcq 64(%r9), %rdx
+ movq 72(%rsi), %rax
+ movq %rdx, 64(%r8)
+ adcq 72(%r9), %rax
+ movq 80(%rsi), %rdx
+ movq %rax, 72(%r8)
+ adcq 80(%r9), %rdx
+ movq 88(%rsi), %rax
+ movq %rdx, 80(%r8)
+ adcq 88(%r9), %rax
+ movq 96(%rsi), %rdx
+ movq %rax, 88(%r8)
+ adcq 96(%r9), %rdx
+ movq 104(%rsi), %rax
+ movq %rdx, 96(%r8)
+ adcq 104(%r9), %rax
+ movq 112(%rsi), %rdx
+ movq %rax, 104(%r8)
+ adcq 112(%r9), %rdx
+ movq 120(%rsi), %rax
+ movq %rdx, 112(%r8)
+ adcq 120(%r9), %rax
+ movq 128(%rsi), %rdx
+ movq %rax, 120(%r8)
+ adcq 128(%r9), %rdx
+ movq 136(%rsi), %rax
+ movq %rdx, 128(%r8)
+ adcq 136(%r9), %rax
+ movq 144(%rsi), %rdx
+ movq %rax, 136(%r8)
+ adcq 144(%r9), %rdx
+ movq 152(%rsi), %rax
+ movq %rdx, 144(%r8)
+ adcq 152(%r9), %rax
+ movq 160(%rsi), %rdx
+ movq %rax, 152(%r8)
+ adcq 160(%r9), %rdx
+ movq 168(%rsi), %rax
+ movq %rdx, 160(%r8)
+ adcq 168(%r9), %rax
+ movq 176(%rsi), %rdx
+ movq %rax, 168(%r8)
+ adcq 176(%r9), %rdx
+ movq 184(%rsi), %rax
+ movq %rdx, 176(%r8)
+ adcq 184(%r9), %rax
+ movq %rax, 184(%r8)
+ adcq $0, %rcx
+ movq %rcx, 976(%rsp)
+ movq %r8, %rsi
+ movq %rsp, %rdi
+#ifndef __APPLE__
+ callq sp_3072_sqr_24@plt
+#else
+ callq _sp_3072_sqr_24
+#endif /* __APPLE__ */
+ movq 968(%rsp), %rsi
+ leaq 384(%rsp), %rdi
+ addq $192, %rsi
+#ifndef __APPLE__
+ callq sp_3072_sqr_24@plt
+#else
+ callq _sp_3072_sqr_24
+#endif /* __APPLE__ */
+ movq 968(%rsp), %rsi
+ movq 960(%rsp), %rdi
+#ifndef __APPLE__
+ callq sp_3072_sqr_24@plt
+#else
+ callq _sp_3072_sqr_24
+#endif /* __APPLE__ */
+ movq 976(%rsp), %r10
+ movq %rdi, %r9
+ leaq 768(%rsp), %r8
+ movq %r10, %rcx
+ negq %r10
+ addq $384, %r9
+ movq (%r8), %rdx
+ movq 8(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, (%r9)
+ movq %rax, 8(%r9)
+ movq 16(%r8), %rdx
+ movq 24(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 16(%r9)
+ movq %rax, 24(%r9)
+ movq 32(%r8), %rdx
+ movq 40(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 32(%r9)
+ movq %rax, 40(%r9)
+ movq 48(%r8), %rdx
+ movq 56(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 48(%r9)
+ movq %rax, 56(%r9)
+ movq 64(%r8), %rdx
+ movq 72(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 64(%r9)
+ movq %rax, 72(%r9)
+ movq 80(%r8), %rdx
+ movq 88(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 80(%r9)
+ movq %rax, 88(%r9)
+ movq 96(%r8), %rdx
+ movq 104(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 96(%r9)
+ movq %rax, 104(%r9)
+ movq 112(%r8), %rdx
+ movq 120(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 112(%r9)
+ movq %rax, 120(%r9)
+ movq 128(%r8), %rdx
+ movq 136(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 128(%r9)
+ movq %rax, 136(%r9)
+ movq 144(%r8), %rdx
+ movq 152(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 144(%r9)
+ movq %rax, 152(%r9)
+ movq 160(%r8), %rdx
+ movq 168(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 160(%r9)
+ movq %rax, 168(%r9)
+ movq 176(%r8), %rdx
+ movq 184(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 176(%r9)
+ movq %rax, 184(%r9)
+ movq (%r9), %rdx
+ addq %rdx, %rdx
+ movq 8(%r9), %rax
+ movq %rdx, (%r9)
+ adcq %rax, %rax
+ movq 16(%r9), %rdx
+ movq %rax, 8(%r9)
+ adcq %rdx, %rdx
+ movq 24(%r9), %rax
+ movq %rdx, 16(%r9)
+ adcq %rax, %rax
+ movq 32(%r9), %rdx
+ movq %rax, 24(%r9)
+ adcq %rdx, %rdx
+ movq 40(%r9), %rax
+ movq %rdx, 32(%r9)
+ adcq %rax, %rax
+ movq 48(%r9), %rdx
+ movq %rax, 40(%r9)
+ adcq %rdx, %rdx
+ movq 56(%r9), %rax
+ movq %rdx, 48(%r9)
+ adcq %rax, %rax
+ movq 64(%r9), %rdx
+ movq %rax, 56(%r9)
+ adcq %rdx, %rdx
+ movq 72(%r9), %rax
+ movq %rdx, 64(%r9)
+ adcq %rax, %rax
+ movq 80(%r9), %rdx
+ movq %rax, 72(%r9)
+ adcq %rdx, %rdx
+ movq 88(%r9), %rax
+ movq %rdx, 80(%r9)
+ adcq %rax, %rax
+ movq 96(%r9), %rdx
+ movq %rax, 88(%r9)
+ adcq %rdx, %rdx
+ movq 104(%r9), %rax
+ movq %rdx, 96(%r9)
+ adcq %rax, %rax
+ movq 112(%r9), %rdx
+ movq %rax, 104(%r9)
+ adcq %rdx, %rdx
+ movq 120(%r9), %rax
+ movq %rdx, 112(%r9)
+ adcq %rax, %rax
+ movq 128(%r9), %rdx
+ movq %rax, 120(%r9)
+ adcq %rdx, %rdx
+ movq 136(%r9), %rax
+ movq %rdx, 128(%r9)
+ adcq %rax, %rax
+ movq 144(%r9), %rdx
+ movq %rax, 136(%r9)
+ adcq %rdx, %rdx
+ movq 152(%r9), %rax
+ movq %rdx, 144(%r9)
+ adcq %rax, %rax
+ movq 160(%r9), %rdx
+ movq %rax, 152(%r9)
+ adcq %rdx, %rdx
+ movq 168(%r9), %rax
+ movq %rdx, 160(%r9)
+ adcq %rax, %rax
+ movq 176(%r9), %rdx
+ movq %rax, 168(%r9)
+ adcq %rdx, %rdx
+ movq 184(%r9), %rax
+ movq %rdx, 176(%r9)
+ adcq %rax, %rax
+ movq %rax, 184(%r9)
+ adcq $0, %rcx
+ leaq 384(%rsp), %rsi
+ movq %rsp, %r8
+ movq (%r8), %rdx
+ subq (%rsi), %rdx
+ movq 8(%r8), %rax
+ movq %rdx, (%r8)
+ sbbq 8(%rsi), %rax
+ movq 16(%r8), %rdx
+ movq %rax, 8(%r8)
+ sbbq 16(%rsi), %rdx
+ movq 24(%r8), %rax
+ movq %rdx, 16(%r8)
+ sbbq 24(%rsi), %rax
+ movq 32(%r8), %rdx
+ movq %rax, 24(%r8)
+ sbbq 32(%rsi), %rdx
+ movq 40(%r8), %rax
+ movq %rdx, 32(%r8)
+ sbbq 40(%rsi), %rax
+ movq 48(%r8), %rdx
+ movq %rax, 40(%r8)
+ sbbq 48(%rsi), %rdx
+ movq 56(%r8), %rax
+ movq %rdx, 48(%r8)
+ sbbq 56(%rsi), %rax
+ movq 64(%r8), %rdx
+ movq %rax, 56(%r8)
+ sbbq 64(%rsi), %rdx
+ movq 72(%r8), %rax
+ movq %rdx, 64(%r8)
+ sbbq 72(%rsi), %rax
+ movq 80(%r8), %rdx
+ movq %rax, 72(%r8)
+ sbbq 80(%rsi), %rdx
+ movq 88(%r8), %rax
+ movq %rdx, 80(%r8)
+ sbbq 88(%rsi), %rax
+ movq 96(%r8), %rdx
+ movq %rax, 88(%r8)
+ sbbq 96(%rsi), %rdx
+ movq 104(%r8), %rax
+ movq %rdx, 96(%r8)
+ sbbq 104(%rsi), %rax
+ movq 112(%r8), %rdx
+ movq %rax, 104(%r8)
+ sbbq 112(%rsi), %rdx
+ movq 120(%r8), %rax
+ movq %rdx, 112(%r8)
+ sbbq 120(%rsi), %rax
+ movq 128(%r8), %rdx
+ movq %rax, 120(%r8)
+ sbbq 128(%rsi), %rdx
+ movq 136(%r8), %rax
+ movq %rdx, 128(%r8)
+ sbbq 136(%rsi), %rax
+ movq 144(%r8), %rdx
+ movq %rax, 136(%r8)
+ sbbq 144(%rsi), %rdx
+ movq 152(%r8), %rax
+ movq %rdx, 144(%r8)
+ sbbq 152(%rsi), %rax
+ movq 160(%r8), %rdx
+ movq %rax, 152(%r8)
+ sbbq 160(%rsi), %rdx
+ movq 168(%r8), %rax
+ movq %rdx, 160(%r8)
+ sbbq 168(%rsi), %rax
+ movq 176(%r8), %rdx
+ movq %rax, 168(%r8)
+ sbbq 176(%rsi), %rdx
+ movq 184(%r8), %rax
+ movq %rdx, 176(%r8)
+ sbbq 184(%rsi), %rax
+ movq 192(%r8), %rdx
+ movq %rax, 184(%r8)
+ sbbq 192(%rsi), %rdx
+ movq 200(%r8), %rax
+ movq %rdx, 192(%r8)
+ sbbq 200(%rsi), %rax
+ movq 208(%r8), %rdx
+ movq %rax, 200(%r8)
+ sbbq 208(%rsi), %rdx
+ movq 216(%r8), %rax
+ movq %rdx, 208(%r8)
+ sbbq 216(%rsi), %rax
+ movq 224(%r8), %rdx
+ movq %rax, 216(%r8)
+ sbbq 224(%rsi), %rdx
+ movq 232(%r8), %rax
+ movq %rdx, 224(%r8)
+ sbbq 232(%rsi), %rax
+ movq 240(%r8), %rdx
+ movq %rax, 232(%r8)
+ sbbq 240(%rsi), %rdx
+ movq 248(%r8), %rax
+ movq %rdx, 240(%r8)
+ sbbq 248(%rsi), %rax
+ movq 256(%r8), %rdx
+ movq %rax, 248(%r8)
+ sbbq 256(%rsi), %rdx
+ movq 264(%r8), %rax
+ movq %rdx, 256(%r8)
+ sbbq 264(%rsi), %rax
+ movq 272(%r8), %rdx
+ movq %rax, 264(%r8)
+ sbbq 272(%rsi), %rdx
+ movq 280(%r8), %rax
+ movq %rdx, 272(%r8)
+ sbbq 280(%rsi), %rax
+ movq 288(%r8), %rdx
+ movq %rax, 280(%r8)
+ sbbq 288(%rsi), %rdx
+ movq 296(%r8), %rax
+ movq %rdx, 288(%r8)
+ sbbq 296(%rsi), %rax
+ movq 304(%r8), %rdx
+ movq %rax, 296(%r8)
+ sbbq 304(%rsi), %rdx
+ movq 312(%r8), %rax
+ movq %rdx, 304(%r8)
+ sbbq 312(%rsi), %rax
+ movq 320(%r8), %rdx
+ movq %rax, 312(%r8)
+ sbbq 320(%rsi), %rdx
+ movq 328(%r8), %rax
+ movq %rdx, 320(%r8)
+ sbbq 328(%rsi), %rax
+ movq 336(%r8), %rdx
+ movq %rax, 328(%r8)
+ sbbq 336(%rsi), %rdx
+ movq 344(%r8), %rax
+ movq %rdx, 336(%r8)
+ sbbq 344(%rsi), %rax
+ movq 352(%r8), %rdx
+ movq %rax, 344(%r8)
+ sbbq 352(%rsi), %rdx
+ movq 360(%r8), %rax
+ movq %rdx, 352(%r8)
+ sbbq 360(%rsi), %rax
+ movq 368(%r8), %rdx
+ movq %rax, 360(%r8)
+ sbbq 368(%rsi), %rdx
+ movq 376(%r8), %rax
+ movq %rdx, 368(%r8)
+ sbbq 376(%rsi), %rax
+ movq %rax, 376(%r8)
+ sbbq $0, %rcx
+ movq (%r8), %rdx
+ subq (%rdi), %rdx
+ movq 8(%r8), %rax
+ movq %rdx, (%r8)
+ sbbq 8(%rdi), %rax
+ movq 16(%r8), %rdx
+ movq %rax, 8(%r8)
+ sbbq 16(%rdi), %rdx
+ movq 24(%r8), %rax
+ movq %rdx, 16(%r8)
+ sbbq 24(%rdi), %rax
+ movq 32(%r8), %rdx
+ movq %rax, 24(%r8)
+ sbbq 32(%rdi), %rdx
+ movq 40(%r8), %rax
+ movq %rdx, 32(%r8)
+ sbbq 40(%rdi), %rax
+ movq 48(%r8), %rdx
+ movq %rax, 40(%r8)
+ sbbq 48(%rdi), %rdx
+ movq 56(%r8), %rax
+ movq %rdx, 48(%r8)
+ sbbq 56(%rdi), %rax
+ movq 64(%r8), %rdx
+ movq %rax, 56(%r8)
+ sbbq 64(%rdi), %rdx
+ movq 72(%r8), %rax
+ movq %rdx, 64(%r8)
+ sbbq 72(%rdi), %rax
+ movq 80(%r8), %rdx
+ movq %rax, 72(%r8)
+ sbbq 80(%rdi), %rdx
+ movq 88(%r8), %rax
+ movq %rdx, 80(%r8)
+ sbbq 88(%rdi), %rax
+ movq 96(%r8), %rdx
+ movq %rax, 88(%r8)
+ sbbq 96(%rdi), %rdx
+ movq 104(%r8), %rax
+ movq %rdx, 96(%r8)
+ sbbq 104(%rdi), %rax
+ movq 112(%r8), %rdx
+ movq %rax, 104(%r8)
+ sbbq 112(%rdi), %rdx
+ movq 120(%r8), %rax
+ movq %rdx, 112(%r8)
+ sbbq 120(%rdi), %rax
+ movq 128(%r8), %rdx
+ movq %rax, 120(%r8)
+ sbbq 128(%rdi), %rdx
+ movq 136(%r8), %rax
+ movq %rdx, 128(%r8)
+ sbbq 136(%rdi), %rax
+ movq 144(%r8), %rdx
+ movq %rax, 136(%r8)
+ sbbq 144(%rdi), %rdx
+ movq 152(%r8), %rax
+ movq %rdx, 144(%r8)
+ sbbq 152(%rdi), %rax
+ movq 160(%r8), %rdx
+ movq %rax, 152(%r8)
+ sbbq 160(%rdi), %rdx
+ movq 168(%r8), %rax
+ movq %rdx, 160(%r8)
+ sbbq 168(%rdi), %rax
+ movq 176(%r8), %rdx
+ movq %rax, 168(%r8)
+ sbbq 176(%rdi), %rdx
+ movq 184(%r8), %rax
+ movq %rdx, 176(%r8)
+ sbbq 184(%rdi), %rax
+ movq 192(%r8), %rdx
+ movq %rax, 184(%r8)
+ sbbq 192(%rdi), %rdx
+ movq 200(%r8), %rax
+ movq %rdx, 192(%r8)
+ sbbq 200(%rdi), %rax
+ movq 208(%r8), %rdx
+ movq %rax, 200(%r8)
+ sbbq 208(%rdi), %rdx
+ movq 216(%r8), %rax
+ movq %rdx, 208(%r8)
+ sbbq 216(%rdi), %rax
+ movq 224(%r8), %rdx
+ movq %rax, 216(%r8)
+ sbbq 224(%rdi), %rdx
+ movq 232(%r8), %rax
+ movq %rdx, 224(%r8)
+ sbbq 232(%rdi), %rax
+ movq 240(%r8), %rdx
+ movq %rax, 232(%r8)
+ sbbq 240(%rdi), %rdx
+ movq 248(%r8), %rax
+ movq %rdx, 240(%r8)
+ sbbq 248(%rdi), %rax
+ movq 256(%r8), %rdx
+ movq %rax, 248(%r8)
+ sbbq 256(%rdi), %rdx
+ movq 264(%r8), %rax
+ movq %rdx, 256(%r8)
+ sbbq 264(%rdi), %rax
+ movq 272(%r8), %rdx
+ movq %rax, 264(%r8)
+ sbbq 272(%rdi), %rdx
+ movq 280(%r8), %rax
+ movq %rdx, 272(%r8)
+ sbbq 280(%rdi), %rax
+ movq 288(%r8), %rdx
+ movq %rax, 280(%r8)
+ sbbq 288(%rdi), %rdx
+ movq 296(%r8), %rax
+ movq %rdx, 288(%r8)
+ sbbq 296(%rdi), %rax
+ movq 304(%r8), %rdx
+ movq %rax, 296(%r8)
+ sbbq 304(%rdi), %rdx
+ movq 312(%r8), %rax
+ movq %rdx, 304(%r8)
+ sbbq 312(%rdi), %rax
+ movq 320(%r8), %rdx
+ movq %rax, 312(%r8)
+ sbbq 320(%rdi), %rdx
+ movq 328(%r8), %rax
+ movq %rdx, 320(%r8)
+ sbbq 328(%rdi), %rax
+ movq 336(%r8), %rdx
+ movq %rax, 328(%r8)
+ sbbq 336(%rdi), %rdx
+ movq 344(%r8), %rax
+ movq %rdx, 336(%r8)
+ sbbq 344(%rdi), %rax
+ movq 352(%r8), %rdx
+ movq %rax, 344(%r8)
+ sbbq 352(%rdi), %rdx
+ movq 360(%r8), %rax
+ movq %rdx, 352(%r8)
+ sbbq 360(%rdi), %rax
+ movq 368(%r8), %rdx
+ movq %rax, 360(%r8)
+ sbbq 368(%rdi), %rdx
+ movq 376(%r8), %rax
+ movq %rdx, 368(%r8)
+ sbbq 376(%rdi), %rax
+ movq %rax, 376(%r8)
+ sbbq $0, %rcx
+ subq $192, %r9
+ # Add in place
+ movq (%r9), %rdx
+ addq (%r8), %rdx
+ movq 8(%r9), %rax
+ movq %rdx, (%r9)
+ adcq 8(%r8), %rax
+ movq 16(%r9), %rdx
+ movq %rax, 8(%r9)
+ adcq 16(%r8), %rdx
+ movq 24(%r9), %rax
+ movq %rdx, 16(%r9)
+ adcq 24(%r8), %rax
+ movq 32(%r9), %rdx
+ movq %rax, 24(%r9)
+ adcq 32(%r8), %rdx
+ movq 40(%r9), %rax
+ movq %rdx, 32(%r9)
+ adcq 40(%r8), %rax
+ movq 48(%r9), %rdx
+ movq %rax, 40(%r9)
+ adcq 48(%r8), %rdx
+ movq 56(%r9), %rax
+ movq %rdx, 48(%r9)
+ adcq 56(%r8), %rax
+ movq 64(%r9), %rdx
+ movq %rax, 56(%r9)
+ adcq 64(%r8), %rdx
+ movq 72(%r9), %rax
+ movq %rdx, 64(%r9)
+ adcq 72(%r8), %rax
+ movq 80(%r9), %rdx
+ movq %rax, 72(%r9)
+ adcq 80(%r8), %rdx
+ movq 88(%r9), %rax
+ movq %rdx, 80(%r9)
+ adcq 88(%r8), %rax
+ movq 96(%r9), %rdx
+ movq %rax, 88(%r9)
+ adcq 96(%r8), %rdx
+ movq 104(%r9), %rax
+ movq %rdx, 96(%r9)
+ adcq 104(%r8), %rax
+ movq 112(%r9), %rdx
+ movq %rax, 104(%r9)
+ adcq 112(%r8), %rdx
+ movq 120(%r9), %rax
+ movq %rdx, 112(%r9)
+ adcq 120(%r8), %rax
+ movq 128(%r9), %rdx
+ movq %rax, 120(%r9)
+ adcq 128(%r8), %rdx
+ movq 136(%r9), %rax
+ movq %rdx, 128(%r9)
+ adcq 136(%r8), %rax
+ movq 144(%r9), %rdx
+ movq %rax, 136(%r9)
+ adcq 144(%r8), %rdx
+ movq 152(%r9), %rax
+ movq %rdx, 144(%r9)
+ adcq 152(%r8), %rax
+ movq 160(%r9), %rdx
+ movq %rax, 152(%r9)
+ adcq 160(%r8), %rdx
+ movq 168(%r9), %rax
+ movq %rdx, 160(%r9)
+ adcq 168(%r8), %rax
+ movq 176(%r9), %rdx
+ movq %rax, 168(%r9)
+ adcq 176(%r8), %rdx
+ movq 184(%r9), %rax
+ movq %rdx, 176(%r9)
+ adcq 184(%r8), %rax
+ movq 192(%r9), %rdx
+ movq %rax, 184(%r9)
+ adcq 192(%r8), %rdx
+ movq 200(%r9), %rax
+ movq %rdx, 192(%r9)
+ adcq 200(%r8), %rax
+ movq 208(%r9), %rdx
+ movq %rax, 200(%r9)
+ adcq 208(%r8), %rdx
+ movq 216(%r9), %rax
+ movq %rdx, 208(%r9)
+ adcq 216(%r8), %rax
+ movq 224(%r9), %rdx
+ movq %rax, 216(%r9)
+ adcq 224(%r8), %rdx
+ movq 232(%r9), %rax
+ movq %rdx, 224(%r9)
+ adcq 232(%r8), %rax
+ movq 240(%r9), %rdx
+ movq %rax, 232(%r9)
+ adcq 240(%r8), %rdx
+ movq 248(%r9), %rax
+ movq %rdx, 240(%r9)
+ adcq 248(%r8), %rax
+ movq 256(%r9), %rdx
+ movq %rax, 248(%r9)
+ adcq 256(%r8), %rdx
+ movq 264(%r9), %rax
+ movq %rdx, 256(%r9)
+ adcq 264(%r8), %rax
+ movq 272(%r9), %rdx
+ movq %rax, 264(%r9)
+ adcq 272(%r8), %rdx
+ movq 280(%r9), %rax
+ movq %rdx, 272(%r9)
+ adcq 280(%r8), %rax
+ movq 288(%r9), %rdx
+ movq %rax, 280(%r9)
+ adcq 288(%r8), %rdx
+ movq 296(%r9), %rax
+ movq %rdx, 288(%r9)
+ adcq 296(%r8), %rax
+ movq 304(%r9), %rdx
+ movq %rax, 296(%r9)
+ adcq 304(%r8), %rdx
+ movq 312(%r9), %rax
+ movq %rdx, 304(%r9)
+ adcq 312(%r8), %rax
+ movq 320(%r9), %rdx
+ movq %rax, 312(%r9)
+ adcq 320(%r8), %rdx
+ movq 328(%r9), %rax
+ movq %rdx, 320(%r9)
+ adcq 328(%r8), %rax
+ movq 336(%r9), %rdx
+ movq %rax, 328(%r9)
+ adcq 336(%r8), %rdx
+ movq 344(%r9), %rax
+ movq %rdx, 336(%r9)
+ adcq 344(%r8), %rax
+ movq 352(%r9), %rdx
+ movq %rax, 344(%r9)
+ adcq 352(%r8), %rdx
+ movq 360(%r9), %rax
+ movq %rdx, 352(%r9)
+ adcq 360(%r8), %rax
+ movq 368(%r9), %rdx
+ movq %rax, 360(%r9)
+ adcq 368(%r8), %rdx
+ movq 376(%r9), %rax
+ movq %rdx, 368(%r9)
+ adcq 376(%r8), %rax
+ movq %rax, 376(%r9)
+ adcq $0, %rcx
+ movq %rcx, 576(%rdi)
+ # Add in place
+ movq 192(%r9), %rdx
+ addq (%rsi), %rdx
+ movq 200(%r9), %rax
+ movq %rdx, 192(%r9)
+ adcq 8(%rsi), %rax
+ movq 208(%r9), %rdx
+ movq %rax, 200(%r9)
+ adcq 16(%rsi), %rdx
+ movq 216(%r9), %rax
+ movq %rdx, 208(%r9)
+ adcq 24(%rsi), %rax
+ movq 224(%r9), %rdx
+ movq %rax, 216(%r9)
+ adcq 32(%rsi), %rdx
+ movq 232(%r9), %rax
+ movq %rdx, 224(%r9)
+ adcq 40(%rsi), %rax
+ movq 240(%r9), %rdx
+ movq %rax, 232(%r9)
+ adcq 48(%rsi), %rdx
+ movq 248(%r9), %rax
+ movq %rdx, 240(%r9)
+ adcq 56(%rsi), %rax
+ movq 256(%r9), %rdx
+ movq %rax, 248(%r9)
+ adcq 64(%rsi), %rdx
+ movq 264(%r9), %rax
+ movq %rdx, 256(%r9)
+ adcq 72(%rsi), %rax
+ movq 272(%r9), %rdx
+ movq %rax, 264(%r9)
+ adcq 80(%rsi), %rdx
+ movq 280(%r9), %rax
+ movq %rdx, 272(%r9)
+ adcq 88(%rsi), %rax
+ movq 288(%r9), %rdx
+ movq %rax, 280(%r9)
+ adcq 96(%rsi), %rdx
+ movq 296(%r9), %rax
+ movq %rdx, 288(%r9)
+ adcq 104(%rsi), %rax
+ movq 304(%r9), %rdx
+ movq %rax, 296(%r9)
+ adcq 112(%rsi), %rdx
+ movq 312(%r9), %rax
+ movq %rdx, 304(%r9)
+ adcq 120(%rsi), %rax
+ movq 320(%r9), %rdx
+ movq %rax, 312(%r9)
+ adcq 128(%rsi), %rdx
+ movq 328(%r9), %rax
+ movq %rdx, 320(%r9)
+ adcq 136(%rsi), %rax
+ movq 336(%r9), %rdx
+ movq %rax, 328(%r9)
+ adcq 144(%rsi), %rdx
+ movq 344(%r9), %rax
+ movq %rdx, 336(%r9)
+ adcq 152(%rsi), %rax
+ movq 352(%r9), %rdx
+ movq %rax, 344(%r9)
+ adcq 160(%rsi), %rdx
+ movq 360(%r9), %rax
+ movq %rdx, 352(%r9)
+ adcq 168(%rsi), %rax
+ movq 368(%r9), %rdx
+ movq %rax, 360(%r9)
+ adcq 176(%rsi), %rdx
+ movq 376(%r9), %rax
+ movq %rdx, 368(%r9)
+ adcq 184(%rsi), %rax
+ movq 384(%r9), %rdx
+ movq %rax, 376(%r9)
+ adcq 192(%rsi), %rdx
+ movq %rdx, 384(%r9)
+ # Add to zero
+ movq 200(%rsi), %rdx
+ adcq $0, %rdx
+ movq 208(%rsi), %rax
+ movq %rdx, 392(%r9)
+ adcq $0, %rax
+ movq 216(%rsi), %rdx
+ movq %rax, 400(%r9)
+ adcq $0, %rdx
+ movq 224(%rsi), %rax
+ movq %rdx, 408(%r9)
+ adcq $0, %rax
+ movq 232(%rsi), %rdx
+ movq %rax, 416(%r9)
+ adcq $0, %rdx
+ movq 240(%rsi), %rax
+ movq %rdx, 424(%r9)
+ adcq $0, %rax
+ movq 248(%rsi), %rdx
+ movq %rax, 432(%r9)
+ adcq $0, %rdx
+ movq 256(%rsi), %rax
+ movq %rdx, 440(%r9)
+ adcq $0, %rax
+ movq 264(%rsi), %rdx
+ movq %rax, 448(%r9)
+ adcq $0, %rdx
+ movq 272(%rsi), %rax
+ movq %rdx, 456(%r9)
+ adcq $0, %rax
+ movq 280(%rsi), %rdx
+ movq %rax, 464(%r9)
+ adcq $0, %rdx
+ movq 288(%rsi), %rax
+ movq %rdx, 472(%r9)
+ adcq $0, %rax
+ movq 296(%rsi), %rdx
+ movq %rax, 480(%r9)
+ adcq $0, %rdx
+ movq 304(%rsi), %rax
+ movq %rdx, 488(%r9)
+ adcq $0, %rax
+ movq 312(%rsi), %rdx
+ movq %rax, 496(%r9)
+ adcq $0, %rdx
+ movq 320(%rsi), %rax
+ movq %rdx, 504(%r9)
+ adcq $0, %rax
+ movq 328(%rsi), %rdx
+ movq %rax, 512(%r9)
+ adcq $0, %rdx
+ movq 336(%rsi), %rax
+ movq %rdx, 520(%r9)
+ adcq $0, %rax
+ movq 344(%rsi), %rdx
+ movq %rax, 528(%r9)
+ adcq $0, %rdx
+ movq 352(%rsi), %rax
+ movq %rdx, 536(%r9)
+ adcq $0, %rax
+ movq 360(%rsi), %rdx
+ movq %rax, 544(%r9)
+ adcq $0, %rdx
+ movq 368(%rsi), %rax
+ movq %rdx, 552(%r9)
+ adcq $0, %rax
+ movq 376(%rsi), %rdx
+ movq %rax, 560(%r9)
+ adcq $0, %rdx
+ movq %rdx, 568(%r9)
+ addq $984, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_sqr_48,.-sp_3072_sqr_48
+#endif /* __APPLE__ */
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_3072_mul_avx2_48
+.type sp_3072_mul_avx2_48,@function
+.align 16
+sp_3072_mul_avx2_48:
+#else
+.globl _sp_3072_mul_avx2_48
+.p2align 4
+_sp_3072_mul_avx2_48:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ subq $1192, %rsp
+ movq %rdi, 1152(%rsp)
+ movq %rsi, 1160(%rsp)
+ movq %rdx, 1168(%rsp)
+ leaq 768(%rsp), %r10
+ leaq 192(%rsi), %r12
+ # Add
+ movq (%rsi), %rax
+ xorq %r13, %r13
+ addq (%r12), %rax
+ movq 8(%rsi), %rcx
+ movq %rax, (%r10)
+ adcq 8(%r12), %rcx
+ movq 16(%rsi), %r8
+ movq %rcx, 8(%r10)
+ adcq 16(%r12), %r8
+ movq 24(%rsi), %rax
+ movq %r8, 16(%r10)
+ adcq 24(%r12), %rax
+ movq 32(%rsi), %rcx
+ movq %rax, 24(%r10)
+ adcq 32(%r12), %rcx
+ movq 40(%rsi), %r8
+ movq %rcx, 32(%r10)
+ adcq 40(%r12), %r8
+ movq 48(%rsi), %rax
+ movq %r8, 40(%r10)
+ adcq 48(%r12), %rax
+ movq 56(%rsi), %rcx
+ movq %rax, 48(%r10)
+ adcq 56(%r12), %rcx
+ movq 64(%rsi), %r8
+ movq %rcx, 56(%r10)
+ adcq 64(%r12), %r8
+ movq 72(%rsi), %rax
+ movq %r8, 64(%r10)
+ adcq 72(%r12), %rax
+ movq 80(%rsi), %rcx
+ movq %rax, 72(%r10)
+ adcq 80(%r12), %rcx
+ movq 88(%rsi), %r8
+ movq %rcx, 80(%r10)
+ adcq 88(%r12), %r8
+ movq 96(%rsi), %rax
+ movq %r8, 88(%r10)
+ adcq 96(%r12), %rax
+ movq 104(%rsi), %rcx
+ movq %rax, 96(%r10)
+ adcq 104(%r12), %rcx
+ movq 112(%rsi), %r8
+ movq %rcx, 104(%r10)
+ adcq 112(%r12), %r8
+ movq 120(%rsi), %rax
+ movq %r8, 112(%r10)
+ adcq 120(%r12), %rax
+ movq 128(%rsi), %rcx
+ movq %rax, 120(%r10)
+ adcq 128(%r12), %rcx
+ movq 136(%rsi), %r8
+ movq %rcx, 128(%r10)
+ adcq 136(%r12), %r8
+ movq 144(%rsi), %rax
+ movq %r8, 136(%r10)
+ adcq 144(%r12), %rax
+ movq 152(%rsi), %rcx
+ movq %rax, 144(%r10)
+ adcq 152(%r12), %rcx
+ movq 160(%rsi), %r8
+ movq %rcx, 152(%r10)
+ adcq 160(%r12), %r8
+ movq 168(%rsi), %rax
+ movq %r8, 160(%r10)
+ adcq 168(%r12), %rax
+ movq 176(%rsi), %rcx
+ movq %rax, 168(%r10)
+ adcq 176(%r12), %rcx
+ movq 184(%rsi), %r8
+ movq %rcx, 176(%r10)
+ adcq 184(%r12), %r8
+ movq %r8, 184(%r10)
+ adcq $0, %r13
+ movq %r13, 1176(%rsp)
+ leaq 960(%rsp), %r11
+ leaq 192(%rdx), %r12
+ # Add
+ movq (%rdx), %rax
+ xorq %r14, %r14
+ addq (%r12), %rax
+ movq 8(%rdx), %rcx
+ movq %rax, (%r11)
+ adcq 8(%r12), %rcx
+ movq 16(%rdx), %r8
+ movq %rcx, 8(%r11)
+ adcq 16(%r12), %r8
+ movq 24(%rdx), %rax
+ movq %r8, 16(%r11)
+ adcq 24(%r12), %rax
+ movq 32(%rdx), %rcx
+ movq %rax, 24(%r11)
+ adcq 32(%r12), %rcx
+ movq 40(%rdx), %r8
+ movq %rcx, 32(%r11)
+ adcq 40(%r12), %r8
+ movq 48(%rdx), %rax
+ movq %r8, 40(%r11)
+ adcq 48(%r12), %rax
+ movq 56(%rdx), %rcx
+ movq %rax, 48(%r11)
+ adcq 56(%r12), %rcx
+ movq 64(%rdx), %r8
+ movq %rcx, 56(%r11)
+ adcq 64(%r12), %r8
+ movq 72(%rdx), %rax
+ movq %r8, 64(%r11)
+ adcq 72(%r12), %rax
+ movq 80(%rdx), %rcx
+ movq %rax, 72(%r11)
+ adcq 80(%r12), %rcx
+ movq 88(%rdx), %r8
+ movq %rcx, 80(%r11)
+ adcq 88(%r12), %r8
+ movq 96(%rdx), %rax
+ movq %r8, 88(%r11)
+ adcq 96(%r12), %rax
+ movq 104(%rdx), %rcx
+ movq %rax, 96(%r11)
+ adcq 104(%r12), %rcx
+ movq 112(%rdx), %r8
+ movq %rcx, 104(%r11)
+ adcq 112(%r12), %r8
+ movq 120(%rdx), %rax
+ movq %r8, 112(%r11)
+ adcq 120(%r12), %rax
+ movq 128(%rdx), %rcx
+ movq %rax, 120(%r11)
+ adcq 128(%r12), %rcx
+ movq 136(%rdx), %r8
+ movq %rcx, 128(%r11)
+ adcq 136(%r12), %r8
+ movq 144(%rdx), %rax
+ movq %r8, 136(%r11)
+ adcq 144(%r12), %rax
+ movq 152(%rdx), %rcx
+ movq %rax, 144(%r11)
+ adcq 152(%r12), %rcx
+ movq 160(%rdx), %r8
+ movq %rcx, 152(%r11)
+ adcq 160(%r12), %r8
+ movq 168(%rdx), %rax
+ movq %r8, 160(%r11)
+ adcq 168(%r12), %rax
+ movq 176(%rdx), %rcx
+ movq %rax, 168(%r11)
+ adcq 176(%r12), %rcx
+ movq 184(%rdx), %r8
+ movq %rcx, 176(%r11)
+ adcq 184(%r12), %r8
+ movq %r8, 184(%r11)
+ adcq $0, %r14
+ movq %r14, 1184(%rsp)
+ movq %r11, %rdx
+ movq %r10, %rsi
+ movq %rsp, %rdi
+#ifndef __APPLE__
+ callq sp_3072_mul_avx2_24@plt
+#else
+ callq _sp_3072_mul_avx2_24
+#endif /* __APPLE__ */
+ movq 1168(%rsp), %rdx
+ movq 1160(%rsp), %rsi
+ leaq 384(%rsp), %rdi
+ addq $192, %rdx
+ addq $192, %rsi
+#ifndef __APPLE__
+ callq sp_3072_mul_avx2_24@plt
+#else
+ callq _sp_3072_mul_avx2_24
+#endif /* __APPLE__ */
+ movq 1168(%rsp), %rdx
+ movq 1160(%rsp), %rsi
+ movq 1152(%rsp), %rdi
+#ifndef __APPLE__
+ callq sp_3072_mul_avx2_24@plt
+#else
+ callq _sp_3072_mul_avx2_24
+#endif /* __APPLE__ */
+ movq 1176(%rsp), %r13
+ movq 1184(%rsp), %r14
+ movq 1152(%rsp), %r15
+ movq %r13, %r9
+ leaq 768(%rsp), %r10
+ leaq 960(%rsp), %r11
+ andq %r14, %r9
+ negq %r13
+ negq %r14
+ addq $384, %r15
+ movq (%r10), %rax
+ movq (%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ addq %rcx, %rax
+ movq 8(%r10), %rcx
+ movq 8(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, (%r15)
+ adcq %r8, %rcx
+ movq 16(%r10), %r8
+ movq 16(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 8(%r15)
+ adcq %rax, %r8
+ movq 24(%r10), %rax
+ movq 24(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 16(%r15)
+ adcq %rcx, %rax
+ movq 32(%r10), %rcx
+ movq 32(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 24(%r15)
+ adcq %r8, %rcx
+ movq 40(%r10), %r8
+ movq 40(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 32(%r15)
+ adcq %rax, %r8
+ movq 48(%r10), %rax
+ movq 48(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 40(%r15)
+ adcq %rcx, %rax
+ movq 56(%r10), %rcx
+ movq 56(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 48(%r15)
+ adcq %r8, %rcx
+ movq 64(%r10), %r8
+ movq 64(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 56(%r15)
+ adcq %rax, %r8
+ movq 72(%r10), %rax
+ movq 72(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 64(%r15)
+ adcq %rcx, %rax
+ movq 80(%r10), %rcx
+ movq 80(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 72(%r15)
+ adcq %r8, %rcx
+ movq 88(%r10), %r8
+ movq 88(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 80(%r15)
+ adcq %rax, %r8
+ movq 96(%r10), %rax
+ movq 96(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 88(%r15)
+ adcq %rcx, %rax
+ movq 104(%r10), %rcx
+ movq 104(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 96(%r15)
+ adcq %r8, %rcx
+ movq 112(%r10), %r8
+ movq 112(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 104(%r15)
+ adcq %rax, %r8
+ movq 120(%r10), %rax
+ movq 120(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 112(%r15)
+ adcq %rcx, %rax
+ movq 128(%r10), %rcx
+ movq 128(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 120(%r15)
+ adcq %r8, %rcx
+ movq 136(%r10), %r8
+ movq 136(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 128(%r15)
+ adcq %rax, %r8
+ movq 144(%r10), %rax
+ movq 144(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 136(%r15)
+ adcq %rcx, %rax
+ movq 152(%r10), %rcx
+ movq 152(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 144(%r15)
+ adcq %r8, %rcx
+ movq 160(%r10), %r8
+ movq 160(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 152(%r15)
+ adcq %rax, %r8
+ movq 168(%r10), %rax
+ movq 168(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 160(%r15)
+ adcq %rcx, %rax
+ movq 176(%r10), %rcx
+ movq 176(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 168(%r15)
+ adcq %r8, %rcx
+ movq 184(%r10), %r8
+ movq 184(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 176(%r15)
+ adcq %rax, %r8
+ movq %r8, 184(%r15)
+ adcq $0, %r9
+ leaq 384(%rsp), %r11
+ movq %rsp, %r10
+ movq (%r10), %rax
+ subq (%r11), %rax
+ movq 8(%r10), %rcx
+ movq %rax, (%r10)
+ sbbq 8(%r11), %rcx
+ movq 16(%r10), %r8
+ movq %rcx, 8(%r10)
+ sbbq 16(%r11), %r8
+ movq 24(%r10), %rax
+ movq %r8, 16(%r10)
+ sbbq 24(%r11), %rax
+ movq 32(%r10), %rcx
+ movq %rax, 24(%r10)
+ sbbq 32(%r11), %rcx
+ movq 40(%r10), %r8
+ movq %rcx, 32(%r10)
+ sbbq 40(%r11), %r8
+ movq 48(%r10), %rax
+ movq %r8, 40(%r10)
+ sbbq 48(%r11), %rax
+ movq 56(%r10), %rcx
+ movq %rax, 48(%r10)
+ sbbq 56(%r11), %rcx
+ movq 64(%r10), %r8
+ movq %rcx, 56(%r10)
+ sbbq 64(%r11), %r8
+ movq 72(%r10), %rax
+ movq %r8, 64(%r10)
+ sbbq 72(%r11), %rax
+ movq 80(%r10), %rcx
+ movq %rax, 72(%r10)
+ sbbq 80(%r11), %rcx
+ movq 88(%r10), %r8
+ movq %rcx, 80(%r10)
+ sbbq 88(%r11), %r8
+ movq 96(%r10), %rax
+ movq %r8, 88(%r10)
+ sbbq 96(%r11), %rax
+ movq 104(%r10), %rcx
+ movq %rax, 96(%r10)
+ sbbq 104(%r11), %rcx
+ movq 112(%r10), %r8
+ movq %rcx, 104(%r10)
+ sbbq 112(%r11), %r8
+ movq 120(%r10), %rax
+ movq %r8, 112(%r10)
+ sbbq 120(%r11), %rax
+ movq 128(%r10), %rcx
+ movq %rax, 120(%r10)
+ sbbq 128(%r11), %rcx
+ movq 136(%r10), %r8
+ movq %rcx, 128(%r10)
+ sbbq 136(%r11), %r8
+ movq 144(%r10), %rax
+ movq %r8, 136(%r10)
+ sbbq 144(%r11), %rax
+ movq 152(%r10), %rcx
+ movq %rax, 144(%r10)
+ sbbq 152(%r11), %rcx
+ movq 160(%r10), %r8
+ movq %rcx, 152(%r10)
+ sbbq 160(%r11), %r8
+ movq 168(%r10), %rax
+ movq %r8, 160(%r10)
+ sbbq 168(%r11), %rax
+ movq 176(%r10), %rcx
+ movq %rax, 168(%r10)
+ sbbq 176(%r11), %rcx
+ movq 184(%r10), %r8
+ movq %rcx, 176(%r10)
+ sbbq 184(%r11), %r8
+ movq 192(%r10), %rax
+ movq %r8, 184(%r10)
+ sbbq 192(%r11), %rax
+ movq 200(%r10), %rcx
+ movq %rax, 192(%r10)
+ sbbq 200(%r11), %rcx
+ movq 208(%r10), %r8
+ movq %rcx, 200(%r10)
+ sbbq 208(%r11), %r8
+ movq 216(%r10), %rax
+ movq %r8, 208(%r10)
+ sbbq 216(%r11), %rax
+ movq 224(%r10), %rcx
+ movq %rax, 216(%r10)
+ sbbq 224(%r11), %rcx
+ movq 232(%r10), %r8
+ movq %rcx, 224(%r10)
+ sbbq 232(%r11), %r8
+ movq 240(%r10), %rax
+ movq %r8, 232(%r10)
+ sbbq 240(%r11), %rax
+ movq 248(%r10), %rcx
+ movq %rax, 240(%r10)
+ sbbq 248(%r11), %rcx
+ movq 256(%r10), %r8
+ movq %rcx, 248(%r10)
+ sbbq 256(%r11), %r8
+ movq 264(%r10), %rax
+ movq %r8, 256(%r10)
+ sbbq 264(%r11), %rax
+ movq 272(%r10), %rcx
+ movq %rax, 264(%r10)
+ sbbq 272(%r11), %rcx
+ movq 280(%r10), %r8
+ movq %rcx, 272(%r10)
+ sbbq 280(%r11), %r8
+ movq 288(%r10), %rax
+ movq %r8, 280(%r10)
+ sbbq 288(%r11), %rax
+ movq 296(%r10), %rcx
+ movq %rax, 288(%r10)
+ sbbq 296(%r11), %rcx
+ movq 304(%r10), %r8
+ movq %rcx, 296(%r10)
+ sbbq 304(%r11), %r8
+ movq 312(%r10), %rax
+ movq %r8, 304(%r10)
+ sbbq 312(%r11), %rax
+ movq 320(%r10), %rcx
+ movq %rax, 312(%r10)
+ sbbq 320(%r11), %rcx
+ movq 328(%r10), %r8
+ movq %rcx, 320(%r10)
+ sbbq 328(%r11), %r8
+ movq 336(%r10), %rax
+ movq %r8, 328(%r10)
+ sbbq 336(%r11), %rax
+ movq 344(%r10), %rcx
+ movq %rax, 336(%r10)
+ sbbq 344(%r11), %rcx
+ movq 352(%r10), %r8
+ movq %rcx, 344(%r10)
+ sbbq 352(%r11), %r8
+ movq 360(%r10), %rax
+ movq %r8, 352(%r10)
+ sbbq 360(%r11), %rax
+ movq 368(%r10), %rcx
+ movq %rax, 360(%r10)
+ sbbq 368(%r11), %rcx
+ movq 376(%r10), %r8
+ movq %rcx, 368(%r10)
+ sbbq 376(%r11), %r8
+ movq %r8, 376(%r10)
+ sbbq $0, %r9
+ movq (%r10), %rax
+ subq (%rdi), %rax
+ movq 8(%r10), %rcx
+ movq %rax, (%r10)
+ sbbq 8(%rdi), %rcx
+ movq 16(%r10), %r8
+ movq %rcx, 8(%r10)
+ sbbq 16(%rdi), %r8
+ movq 24(%r10), %rax
+ movq %r8, 16(%r10)
+ sbbq 24(%rdi), %rax
+ movq 32(%r10), %rcx
+ movq %rax, 24(%r10)
+ sbbq 32(%rdi), %rcx
+ movq 40(%r10), %r8
+ movq %rcx, 32(%r10)
+ sbbq 40(%rdi), %r8
+ movq 48(%r10), %rax
+ movq %r8, 40(%r10)
+ sbbq 48(%rdi), %rax
+ movq 56(%r10), %rcx
+ movq %rax, 48(%r10)
+ sbbq 56(%rdi), %rcx
+ movq 64(%r10), %r8
+ movq %rcx, 56(%r10)
+ sbbq 64(%rdi), %r8
+ movq 72(%r10), %rax
+ movq %r8, 64(%r10)
+ sbbq 72(%rdi), %rax
+ movq 80(%r10), %rcx
+ movq %rax, 72(%r10)
+ sbbq 80(%rdi), %rcx
+ movq 88(%r10), %r8
+ movq %rcx, 80(%r10)
+ sbbq 88(%rdi), %r8
+ movq 96(%r10), %rax
+ movq %r8, 88(%r10)
+ sbbq 96(%rdi), %rax
+ movq 104(%r10), %rcx
+ movq %rax, 96(%r10)
+ sbbq 104(%rdi), %rcx
+ movq 112(%r10), %r8
+ movq %rcx, 104(%r10)
+ sbbq 112(%rdi), %r8
+ movq 120(%r10), %rax
+ movq %r8, 112(%r10)
+ sbbq 120(%rdi), %rax
+ movq 128(%r10), %rcx
+ movq %rax, 120(%r10)
+ sbbq 128(%rdi), %rcx
+ movq 136(%r10), %r8
+ movq %rcx, 128(%r10)
+ sbbq 136(%rdi), %r8
+ movq 144(%r10), %rax
+ movq %r8, 136(%r10)
+ sbbq 144(%rdi), %rax
+ movq 152(%r10), %rcx
+ movq %rax, 144(%r10)
+ sbbq 152(%rdi), %rcx
+ movq 160(%r10), %r8
+ movq %rcx, 152(%r10)
+ sbbq 160(%rdi), %r8
+ movq 168(%r10), %rax
+ movq %r8, 160(%r10)
+ sbbq 168(%rdi), %rax
+ movq 176(%r10), %rcx
+ movq %rax, 168(%r10)
+ sbbq 176(%rdi), %rcx
+ movq 184(%r10), %r8
+ movq %rcx, 176(%r10)
+ sbbq 184(%rdi), %r8
+ movq 192(%r10), %rax
+ movq %r8, 184(%r10)
+ sbbq 192(%rdi), %rax
+ movq 200(%r10), %rcx
+ movq %rax, 192(%r10)
+ sbbq 200(%rdi), %rcx
+ movq 208(%r10), %r8
+ movq %rcx, 200(%r10)
+ sbbq 208(%rdi), %r8
+ movq 216(%r10), %rax
+ movq %r8, 208(%r10)
+ sbbq 216(%rdi), %rax
+ movq 224(%r10), %rcx
+ movq %rax, 216(%r10)
+ sbbq 224(%rdi), %rcx
+ movq 232(%r10), %r8
+ movq %rcx, 224(%r10)
+ sbbq 232(%rdi), %r8
+ movq 240(%r10), %rax
+ movq %r8, 232(%r10)
+ sbbq 240(%rdi), %rax
+ movq 248(%r10), %rcx
+ movq %rax, 240(%r10)
+ sbbq 248(%rdi), %rcx
+ movq 256(%r10), %r8
+ movq %rcx, 248(%r10)
+ sbbq 256(%rdi), %r8
+ movq 264(%r10), %rax
+ movq %r8, 256(%r10)
+ sbbq 264(%rdi), %rax
+ movq 272(%r10), %rcx
+ movq %rax, 264(%r10)
+ sbbq 272(%rdi), %rcx
+ movq 280(%r10), %r8
+ movq %rcx, 272(%r10)
+ sbbq 280(%rdi), %r8
+ movq 288(%r10), %rax
+ movq %r8, 280(%r10)
+ sbbq 288(%rdi), %rax
+ movq 296(%r10), %rcx
+ movq %rax, 288(%r10)
+ sbbq 296(%rdi), %rcx
+ movq 304(%r10), %r8
+ movq %rcx, 296(%r10)
+ sbbq 304(%rdi), %r8
+ movq 312(%r10), %rax
+ movq %r8, 304(%r10)
+ sbbq 312(%rdi), %rax
+ movq 320(%r10), %rcx
+ movq %rax, 312(%r10)
+ sbbq 320(%rdi), %rcx
+ movq 328(%r10), %r8
+ movq %rcx, 320(%r10)
+ sbbq 328(%rdi), %r8
+ movq 336(%r10), %rax
+ movq %r8, 328(%r10)
+ sbbq 336(%rdi), %rax
+ movq 344(%r10), %rcx
+ movq %rax, 336(%r10)
+ sbbq 344(%rdi), %rcx
+ movq 352(%r10), %r8
+ movq %rcx, 344(%r10)
+ sbbq 352(%rdi), %r8
+ movq 360(%r10), %rax
+ movq %r8, 352(%r10)
+ sbbq 360(%rdi), %rax
+ movq 368(%r10), %rcx
+ movq %rax, 360(%r10)
+ sbbq 368(%rdi), %rcx
+ movq 376(%r10), %r8
+ movq %rcx, 368(%r10)
+ sbbq 376(%rdi), %r8
+ movq %r8, 376(%r10)
+ sbbq $0, %r9
+ subq $192, %r15
+ # Add
+ movq (%r15), %rax
+ addq (%r10), %rax
+ movq 8(%r15), %rcx
+ movq %rax, (%r15)
+ adcq 8(%r10), %rcx
+ movq 16(%r15), %r8
+ movq %rcx, 8(%r15)
+ adcq 16(%r10), %r8
+ movq 24(%r15), %rax
+ movq %r8, 16(%r15)
+ adcq 24(%r10), %rax
+ movq 32(%r15), %rcx
+ movq %rax, 24(%r15)
+ adcq 32(%r10), %rcx
+ movq 40(%r15), %r8
+ movq %rcx, 32(%r15)
+ adcq 40(%r10), %r8
+ movq 48(%r15), %rax
+ movq %r8, 40(%r15)
+ adcq 48(%r10), %rax
+ movq 56(%r15), %rcx
+ movq %rax, 48(%r15)
+ adcq 56(%r10), %rcx
+ movq 64(%r15), %r8
+ movq %rcx, 56(%r15)
+ adcq 64(%r10), %r8
+ movq 72(%r15), %rax
+ movq %r8, 64(%r15)
+ adcq 72(%r10), %rax
+ movq 80(%r15), %rcx
+ movq %rax, 72(%r15)
+ adcq 80(%r10), %rcx
+ movq 88(%r15), %r8
+ movq %rcx, 80(%r15)
+ adcq 88(%r10), %r8
+ movq 96(%r15), %rax
+ movq %r8, 88(%r15)
+ adcq 96(%r10), %rax
+ movq 104(%r15), %rcx
+ movq %rax, 96(%r15)
+ adcq 104(%r10), %rcx
+ movq 112(%r15), %r8
+ movq %rcx, 104(%r15)
+ adcq 112(%r10), %r8
+ movq 120(%r15), %rax
+ movq %r8, 112(%r15)
+ adcq 120(%r10), %rax
+ movq 128(%r15), %rcx
+ movq %rax, 120(%r15)
+ adcq 128(%r10), %rcx
+ movq 136(%r15), %r8
+ movq %rcx, 128(%r15)
+ adcq 136(%r10), %r8
+ movq 144(%r15), %rax
+ movq %r8, 136(%r15)
+ adcq 144(%r10), %rax
+ movq 152(%r15), %rcx
+ movq %rax, 144(%r15)
+ adcq 152(%r10), %rcx
+ movq 160(%r15), %r8
+ movq %rcx, 152(%r15)
+ adcq 160(%r10), %r8
+ movq 168(%r15), %rax
+ movq %r8, 160(%r15)
+ adcq 168(%r10), %rax
+ movq 176(%r15), %rcx
+ movq %rax, 168(%r15)
+ adcq 176(%r10), %rcx
+ movq 184(%r15), %r8
+ movq %rcx, 176(%r15)
+ adcq 184(%r10), %r8
+ movq 192(%r15), %rax
+ movq %r8, 184(%r15)
+ adcq 192(%r10), %rax
+ movq 200(%r15), %rcx
+ movq %rax, 192(%r15)
+ adcq 200(%r10), %rcx
+ movq 208(%r15), %r8
+ movq %rcx, 200(%r15)
+ adcq 208(%r10), %r8
+ movq 216(%r15), %rax
+ movq %r8, 208(%r15)
+ adcq 216(%r10), %rax
+ movq 224(%r15), %rcx
+ movq %rax, 216(%r15)
+ adcq 224(%r10), %rcx
+ movq 232(%r15), %r8
+ movq %rcx, 224(%r15)
+ adcq 232(%r10), %r8
+ movq 240(%r15), %rax
+ movq %r8, 232(%r15)
+ adcq 240(%r10), %rax
+ movq 248(%r15), %rcx
+ movq %rax, 240(%r15)
+ adcq 248(%r10), %rcx
+ movq 256(%r15), %r8
+ movq %rcx, 248(%r15)
+ adcq 256(%r10), %r8
+ movq 264(%r15), %rax
+ movq %r8, 256(%r15)
+ adcq 264(%r10), %rax
+ movq 272(%r15), %rcx
+ movq %rax, 264(%r15)
+ adcq 272(%r10), %rcx
+ movq 280(%r15), %r8
+ movq %rcx, 272(%r15)
+ adcq 280(%r10), %r8
+ movq 288(%r15), %rax
+ movq %r8, 280(%r15)
+ adcq 288(%r10), %rax
+ movq 296(%r15), %rcx
+ movq %rax, 288(%r15)
+ adcq 296(%r10), %rcx
+ movq 304(%r15), %r8
+ movq %rcx, 296(%r15)
+ adcq 304(%r10), %r8
+ movq 312(%r15), %rax
+ movq %r8, 304(%r15)
+ adcq 312(%r10), %rax
+ movq 320(%r15), %rcx
+ movq %rax, 312(%r15)
+ adcq 320(%r10), %rcx
+ movq 328(%r15), %r8
+ movq %rcx, 320(%r15)
+ adcq 328(%r10), %r8
+ movq 336(%r15), %rax
+ movq %r8, 328(%r15)
+ adcq 336(%r10), %rax
+ movq 344(%r15), %rcx
+ movq %rax, 336(%r15)
+ adcq 344(%r10), %rcx
+ movq 352(%r15), %r8
+ movq %rcx, 344(%r15)
+ adcq 352(%r10), %r8
+ movq 360(%r15), %rax
+ movq %r8, 352(%r15)
+ adcq 360(%r10), %rax
+ movq 368(%r15), %rcx
+ movq %rax, 360(%r15)
+ adcq 368(%r10), %rcx
+ movq 376(%r15), %r8
+ movq %rcx, 368(%r15)
+ adcq 376(%r10), %r8
+ movq %r8, 376(%r15)
+ adcq $0, %r9
+ movq %r9, 576(%rdi)
+ addq $192, %r15
+ # Add
+ movq (%r15), %rax
+ addq (%r11), %rax
+ movq 8(%r15), %rcx
+ movq %rax, (%r15)
+ adcq 8(%r11), %rcx
+ movq 16(%r15), %r8
+ movq %rcx, 8(%r15)
+ adcq 16(%r11), %r8
+ movq 24(%r15), %rax
+ movq %r8, 16(%r15)
+ adcq 24(%r11), %rax
+ movq 32(%r15), %rcx
+ movq %rax, 24(%r15)
+ adcq 32(%r11), %rcx
+ movq 40(%r15), %r8
+ movq %rcx, 32(%r15)
+ adcq 40(%r11), %r8
+ movq 48(%r15), %rax
+ movq %r8, 40(%r15)
+ adcq 48(%r11), %rax
+ movq 56(%r15), %rcx
+ movq %rax, 48(%r15)
+ adcq 56(%r11), %rcx
+ movq 64(%r15), %r8
+ movq %rcx, 56(%r15)
+ adcq 64(%r11), %r8
+ movq 72(%r15), %rax
+ movq %r8, 64(%r15)
+ adcq 72(%r11), %rax
+ movq 80(%r15), %rcx
+ movq %rax, 72(%r15)
+ adcq 80(%r11), %rcx
+ movq 88(%r15), %r8
+ movq %rcx, 80(%r15)
+ adcq 88(%r11), %r8
+ movq 96(%r15), %rax
+ movq %r8, 88(%r15)
+ adcq 96(%r11), %rax
+ movq 104(%r15), %rcx
+ movq %rax, 96(%r15)
+ adcq 104(%r11), %rcx
+ movq 112(%r15), %r8
+ movq %rcx, 104(%r15)
+ adcq 112(%r11), %r8
+ movq 120(%r15), %rax
+ movq %r8, 112(%r15)
+ adcq 120(%r11), %rax
+ movq 128(%r15), %rcx
+ movq %rax, 120(%r15)
+ adcq 128(%r11), %rcx
+ movq 136(%r15), %r8
+ movq %rcx, 128(%r15)
+ adcq 136(%r11), %r8
+ movq 144(%r15), %rax
+ movq %r8, 136(%r15)
+ adcq 144(%r11), %rax
+ movq 152(%r15), %rcx
+ movq %rax, 144(%r15)
+ adcq 152(%r11), %rcx
+ movq 160(%r15), %r8
+ movq %rcx, 152(%r15)
+ adcq 160(%r11), %r8
+ movq 168(%r15), %rax
+ movq %r8, 160(%r15)
+ adcq 168(%r11), %rax
+ movq 176(%r15), %rcx
+ movq %rax, 168(%r15)
+ adcq 176(%r11), %rcx
+ movq 184(%r15), %r8
+ movq %rcx, 176(%r15)
+ adcq 184(%r11), %r8
+ movq 192(%r15), %rax
+ movq %r8, 184(%r15)
+ adcq 192(%r11), %rax
+ movq %rax, 192(%r15)
+ # Add to zero
+ movq 200(%r11), %rax
+ adcq $0, %rax
+ movq 208(%r11), %rcx
+ movq %rax, 200(%r15)
+ adcq $0, %rcx
+ movq 216(%r11), %r8
+ movq %rcx, 208(%r15)
+ adcq $0, %r8
+ movq 224(%r11), %rax
+ movq %r8, 216(%r15)
+ adcq $0, %rax
+ movq 232(%r11), %rcx
+ movq %rax, 224(%r15)
+ adcq $0, %rcx
+ movq 240(%r11), %r8
+ movq %rcx, 232(%r15)
+ adcq $0, %r8
+ movq 248(%r11), %rax
+ movq %r8, 240(%r15)
+ adcq $0, %rax
+ movq 256(%r11), %rcx
+ movq %rax, 248(%r15)
+ adcq $0, %rcx
+ movq 264(%r11), %r8
+ movq %rcx, 256(%r15)
+ adcq $0, %r8
+ movq 272(%r11), %rax
+ movq %r8, 264(%r15)
+ adcq $0, %rax
+ movq 280(%r11), %rcx
+ movq %rax, 272(%r15)
+ adcq $0, %rcx
+ movq 288(%r11), %r8
+ movq %rcx, 280(%r15)
+ adcq $0, %r8
+ movq 296(%r11), %rax
+ movq %r8, 288(%r15)
+ adcq $0, %rax
+ movq 304(%r11), %rcx
+ movq %rax, 296(%r15)
+ adcq $0, %rcx
+ movq 312(%r11), %r8
+ movq %rcx, 304(%r15)
+ adcq $0, %r8
+ movq 320(%r11), %rax
+ movq %r8, 312(%r15)
+ adcq $0, %rax
+ movq 328(%r11), %rcx
+ movq %rax, 320(%r15)
+ adcq $0, %rcx
+ movq 336(%r11), %r8
+ movq %rcx, 328(%r15)
+ adcq $0, %r8
+ movq 344(%r11), %rax
+ movq %r8, 336(%r15)
+ adcq $0, %rax
+ movq 352(%r11), %rcx
+ movq %rax, 344(%r15)
+ adcq $0, %rcx
+ movq 360(%r11), %r8
+ movq %rcx, 352(%r15)
+ adcq $0, %r8
+ movq 368(%r11), %rax
+ movq %r8, 360(%r15)
+ adcq $0, %rax
+ movq 376(%r11), %rcx
+ movq %rax, 368(%r15)
+ adcq $0, %rcx
+ movq %rcx, 376(%r15)
+ addq $1192, %rsp
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_mul_avx2_48,.-sp_3072_mul_avx2_48
+#endif /* __APPLE__ */
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_3072_sqr_avx2_48
+.type sp_3072_sqr_avx2_48,@function
+.align 16
+sp_3072_sqr_avx2_48:
+#else
+.globl _sp_3072_sqr_avx2_48
+.p2align 4
+_sp_3072_sqr_avx2_48:
+#endif /* __APPLE__ */
+ subq $984, %rsp
+ movq %rdi, 960(%rsp)
+ movq %rsi, 968(%rsp)
+ leaq 768(%rsp), %r8
+ leaq 192(%rsi), %r9
+ # Add
+ movq (%rsi), %rdx
+ xorq %rcx, %rcx
+ addq (%r9), %rdx
+ movq 8(%rsi), %rax
+ movq %rdx, (%r8)
+ adcq 8(%r9), %rax
+ movq 16(%rsi), %rdx
+ movq %rax, 8(%r8)
+ adcq 16(%r9), %rdx
+ movq 24(%rsi), %rax
+ movq %rdx, 16(%r8)
+ adcq 24(%r9), %rax
+ movq 32(%rsi), %rdx
+ movq %rax, 24(%r8)
+ adcq 32(%r9), %rdx
+ movq 40(%rsi), %rax
+ movq %rdx, 32(%r8)
+ adcq 40(%r9), %rax
+ movq 48(%rsi), %rdx
+ movq %rax, 40(%r8)
+ adcq 48(%r9), %rdx
+ movq 56(%rsi), %rax
+ movq %rdx, 48(%r8)
+ adcq 56(%r9), %rax
+ movq 64(%rsi), %rdx
+ movq %rax, 56(%r8)
+ adcq 64(%r9), %rdx
+ movq 72(%rsi), %rax
+ movq %rdx, 64(%r8)
+ adcq 72(%r9), %rax
+ movq 80(%rsi), %rdx
+ movq %rax, 72(%r8)
+ adcq 80(%r9), %rdx
+ movq 88(%rsi), %rax
+ movq %rdx, 80(%r8)
+ adcq 88(%r9), %rax
+ movq 96(%rsi), %rdx
+ movq %rax, 88(%r8)
+ adcq 96(%r9), %rdx
+ movq 104(%rsi), %rax
+ movq %rdx, 96(%r8)
+ adcq 104(%r9), %rax
+ movq 112(%rsi), %rdx
+ movq %rax, 104(%r8)
+ adcq 112(%r9), %rdx
+ movq 120(%rsi), %rax
+ movq %rdx, 112(%r8)
+ adcq 120(%r9), %rax
+ movq 128(%rsi), %rdx
+ movq %rax, 120(%r8)
+ adcq 128(%r9), %rdx
+ movq 136(%rsi), %rax
+ movq %rdx, 128(%r8)
+ adcq 136(%r9), %rax
+ movq 144(%rsi), %rdx
+ movq %rax, 136(%r8)
+ adcq 144(%r9), %rdx
+ movq 152(%rsi), %rax
+ movq %rdx, 144(%r8)
+ adcq 152(%r9), %rax
+ movq 160(%rsi), %rdx
+ movq %rax, 152(%r8)
+ adcq 160(%r9), %rdx
+ movq 168(%rsi), %rax
+ movq %rdx, 160(%r8)
+ adcq 168(%r9), %rax
+ movq 176(%rsi), %rdx
+ movq %rax, 168(%r8)
+ adcq 176(%r9), %rdx
+ movq 184(%rsi), %rax
+ movq %rdx, 176(%r8)
+ adcq 184(%r9), %rax
+ movq %rax, 184(%r8)
+ adcq $0, %rcx
+ movq %rcx, 976(%rsp)
+ movq %r8, %rsi
+ movq %rsp, %rdi
+#ifndef __APPLE__
+ callq sp_3072_sqr_avx2_24@plt
+#else
+ callq _sp_3072_sqr_avx2_24
+#endif /* __APPLE__ */
+ movq 968(%rsp), %rsi
+ leaq 384(%rsp), %rdi
+ addq $192, %rsi
+#ifndef __APPLE__
+ callq sp_3072_sqr_avx2_24@plt
+#else
+ callq _sp_3072_sqr_avx2_24
+#endif /* __APPLE__ */
+ movq 968(%rsp), %rsi
+ movq 960(%rsp), %rdi
+#ifndef __APPLE__
+ callq sp_3072_sqr_avx2_24@plt
+#else
+ callq _sp_3072_sqr_avx2_24
+#endif /* __APPLE__ */
+ movq 976(%rsp), %r10
+ movq %rdi, %r9
+ leaq 768(%rsp), %r8
+ movq %r10, %rcx
+ negq %r10
+ addq $384, %r9
+ movq (%r8), %rdx
+ pextq %r10, %rdx, %rdx
+ addq %rdx, %rdx
+ movq 8(%r8), %rax
+ movq %rdx, (%r9)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 16(%r8), %rdx
+ movq %rax, 8(%r9)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 24(%r8), %rax
+ movq %rdx, 16(%r9)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 32(%r8), %rdx
+ movq %rax, 24(%r9)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 40(%r8), %rax
+ movq %rdx, 32(%r9)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 48(%r8), %rdx
+ movq %rax, 40(%r9)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 56(%r8), %rax
+ movq %rdx, 48(%r9)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 64(%r8), %rdx
+ movq %rax, 56(%r9)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 72(%r8), %rax
+ movq %rdx, 64(%r9)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 80(%r8), %rdx
+ movq %rax, 72(%r9)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 88(%r8), %rax
+ movq %rdx, 80(%r9)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 96(%r8), %rdx
+ movq %rax, 88(%r9)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 104(%r8), %rax
+ movq %rdx, 96(%r9)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 112(%r8), %rdx
+ movq %rax, 104(%r9)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 120(%r8), %rax
+ movq %rdx, 112(%r9)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 128(%r8), %rdx
+ movq %rax, 120(%r9)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 136(%r8), %rax
+ movq %rdx, 128(%r9)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 144(%r8), %rdx
+ movq %rax, 136(%r9)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 152(%r8), %rax
+ movq %rdx, 144(%r9)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 160(%r8), %rdx
+ movq %rax, 152(%r9)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 168(%r8), %rax
+ movq %rdx, 160(%r9)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 176(%r8), %rdx
+ movq %rax, 168(%r9)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 184(%r8), %rax
+ movq %rdx, 176(%r9)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq %rax, 184(%r9)
+ adcq $0, %rcx
+ leaq 384(%rsp), %rsi
+ movq %rsp, %r8
+ movq (%r8), %rdx
+ subq (%rsi), %rdx
+ movq 8(%r8), %rax
+ movq %rdx, (%r8)
+ sbbq 8(%rsi), %rax
+ movq 16(%r8), %rdx
+ movq %rax, 8(%r8)
+ sbbq 16(%rsi), %rdx
+ movq 24(%r8), %rax
+ movq %rdx, 16(%r8)
+ sbbq 24(%rsi), %rax
+ movq 32(%r8), %rdx
+ movq %rax, 24(%r8)
+ sbbq 32(%rsi), %rdx
+ movq 40(%r8), %rax
+ movq %rdx, 32(%r8)
+ sbbq 40(%rsi), %rax
+ movq 48(%r8), %rdx
+ movq %rax, 40(%r8)
+ sbbq 48(%rsi), %rdx
+ movq 56(%r8), %rax
+ movq %rdx, 48(%r8)
+ sbbq 56(%rsi), %rax
+ movq 64(%r8), %rdx
+ movq %rax, 56(%r8)
+ sbbq 64(%rsi), %rdx
+ movq 72(%r8), %rax
+ movq %rdx, 64(%r8)
+ sbbq 72(%rsi), %rax
+ movq 80(%r8), %rdx
+ movq %rax, 72(%r8)
+ sbbq 80(%rsi), %rdx
+ movq 88(%r8), %rax
+ movq %rdx, 80(%r8)
+ sbbq 88(%rsi), %rax
+ movq 96(%r8), %rdx
+ movq %rax, 88(%r8)
+ sbbq 96(%rsi), %rdx
+ movq 104(%r8), %rax
+ movq %rdx, 96(%r8)
+ sbbq 104(%rsi), %rax
+ movq 112(%r8), %rdx
+ movq %rax, 104(%r8)
+ sbbq 112(%rsi), %rdx
+ movq 120(%r8), %rax
+ movq %rdx, 112(%r8)
+ sbbq 120(%rsi), %rax
+ movq 128(%r8), %rdx
+ movq %rax, 120(%r8)
+ sbbq 128(%rsi), %rdx
+ movq 136(%r8), %rax
+ movq %rdx, 128(%r8)
+ sbbq 136(%rsi), %rax
+ movq 144(%r8), %rdx
+ movq %rax, 136(%r8)
+ sbbq 144(%rsi), %rdx
+ movq 152(%r8), %rax
+ movq %rdx, 144(%r8)
+ sbbq 152(%rsi), %rax
+ movq 160(%r8), %rdx
+ movq %rax, 152(%r8)
+ sbbq 160(%rsi), %rdx
+ movq 168(%r8), %rax
+ movq %rdx, 160(%r8)
+ sbbq 168(%rsi), %rax
+ movq 176(%r8), %rdx
+ movq %rax, 168(%r8)
+ sbbq 176(%rsi), %rdx
+ movq 184(%r8), %rax
+ movq %rdx, 176(%r8)
+ sbbq 184(%rsi), %rax
+ movq 192(%r8), %rdx
+ movq %rax, 184(%r8)
+ sbbq 192(%rsi), %rdx
+ movq 200(%r8), %rax
+ movq %rdx, 192(%r8)
+ sbbq 200(%rsi), %rax
+ movq 208(%r8), %rdx
+ movq %rax, 200(%r8)
+ sbbq 208(%rsi), %rdx
+ movq 216(%r8), %rax
+ movq %rdx, 208(%r8)
+ sbbq 216(%rsi), %rax
+ movq 224(%r8), %rdx
+ movq %rax, 216(%r8)
+ sbbq 224(%rsi), %rdx
+ movq 232(%r8), %rax
+ movq %rdx, 224(%r8)
+ sbbq 232(%rsi), %rax
+ movq 240(%r8), %rdx
+ movq %rax, 232(%r8)
+ sbbq 240(%rsi), %rdx
+ movq 248(%r8), %rax
+ movq %rdx, 240(%r8)
+ sbbq 248(%rsi), %rax
+ movq 256(%r8), %rdx
+ movq %rax, 248(%r8)
+ sbbq 256(%rsi), %rdx
+ movq 264(%r8), %rax
+ movq %rdx, 256(%r8)
+ sbbq 264(%rsi), %rax
+ movq 272(%r8), %rdx
+ movq %rax, 264(%r8)
+ sbbq 272(%rsi), %rdx
+ movq 280(%r8), %rax
+ movq %rdx, 272(%r8)
+ sbbq 280(%rsi), %rax
+ movq 288(%r8), %rdx
+ movq %rax, 280(%r8)
+ sbbq 288(%rsi), %rdx
+ movq 296(%r8), %rax
+ movq %rdx, 288(%r8)
+ sbbq 296(%rsi), %rax
+ movq 304(%r8), %rdx
+ movq %rax, 296(%r8)
+ sbbq 304(%rsi), %rdx
+ movq 312(%r8), %rax
+ movq %rdx, 304(%r8)
+ sbbq 312(%rsi), %rax
+ movq 320(%r8), %rdx
+ movq %rax, 312(%r8)
+ sbbq 320(%rsi), %rdx
+ movq 328(%r8), %rax
+ movq %rdx, 320(%r8)
+ sbbq 328(%rsi), %rax
+ movq 336(%r8), %rdx
+ movq %rax, 328(%r8)
+ sbbq 336(%rsi), %rdx
+ movq 344(%r8), %rax
+ movq %rdx, 336(%r8)
+ sbbq 344(%rsi), %rax
+ movq 352(%r8), %rdx
+ movq %rax, 344(%r8)
+ sbbq 352(%rsi), %rdx
+ movq 360(%r8), %rax
+ movq %rdx, 352(%r8)
+ sbbq 360(%rsi), %rax
+ movq 368(%r8), %rdx
+ movq %rax, 360(%r8)
+ sbbq 368(%rsi), %rdx
+ movq 376(%r8), %rax
+ movq %rdx, 368(%r8)
+ sbbq 376(%rsi), %rax
+ movq %rax, 376(%r8)
+ sbbq $0, %rcx
+ movq (%r8), %rdx
+ subq (%rdi), %rdx
+ movq 8(%r8), %rax
+ movq %rdx, (%r8)
+ sbbq 8(%rdi), %rax
+ movq 16(%r8), %rdx
+ movq %rax, 8(%r8)
+ sbbq 16(%rdi), %rdx
+ movq 24(%r8), %rax
+ movq %rdx, 16(%r8)
+ sbbq 24(%rdi), %rax
+ movq 32(%r8), %rdx
+ movq %rax, 24(%r8)
+ sbbq 32(%rdi), %rdx
+ movq 40(%r8), %rax
+ movq %rdx, 32(%r8)
+ sbbq 40(%rdi), %rax
+ movq 48(%r8), %rdx
+ movq %rax, 40(%r8)
+ sbbq 48(%rdi), %rdx
+ movq 56(%r8), %rax
+ movq %rdx, 48(%r8)
+ sbbq 56(%rdi), %rax
+ movq 64(%r8), %rdx
+ movq %rax, 56(%r8)
+ sbbq 64(%rdi), %rdx
+ movq 72(%r8), %rax
+ movq %rdx, 64(%r8)
+ sbbq 72(%rdi), %rax
+ movq 80(%r8), %rdx
+ movq %rax, 72(%r8)
+ sbbq 80(%rdi), %rdx
+ movq 88(%r8), %rax
+ movq %rdx, 80(%r8)
+ sbbq 88(%rdi), %rax
+ movq 96(%r8), %rdx
+ movq %rax, 88(%r8)
+ sbbq 96(%rdi), %rdx
+ movq 104(%r8), %rax
+ movq %rdx, 96(%r8)
+ sbbq 104(%rdi), %rax
+ movq 112(%r8), %rdx
+ movq %rax, 104(%r8)
+ sbbq 112(%rdi), %rdx
+ movq 120(%r8), %rax
+ movq %rdx, 112(%r8)
+ sbbq 120(%rdi), %rax
+ movq 128(%r8), %rdx
+ movq %rax, 120(%r8)
+ sbbq 128(%rdi), %rdx
+ movq 136(%r8), %rax
+ movq %rdx, 128(%r8)
+ sbbq 136(%rdi), %rax
+ movq 144(%r8), %rdx
+ movq %rax, 136(%r8)
+ sbbq 144(%rdi), %rdx
+ movq 152(%r8), %rax
+ movq %rdx, 144(%r8)
+ sbbq 152(%rdi), %rax
+ movq 160(%r8), %rdx
+ movq %rax, 152(%r8)
+ sbbq 160(%rdi), %rdx
+ movq 168(%r8), %rax
+ movq %rdx, 160(%r8)
+ sbbq 168(%rdi), %rax
+ movq 176(%r8), %rdx
+ movq %rax, 168(%r8)
+ sbbq 176(%rdi), %rdx
+ movq 184(%r8), %rax
+ movq %rdx, 176(%r8)
+ sbbq 184(%rdi), %rax
+ movq 192(%r8), %rdx
+ movq %rax, 184(%r8)
+ sbbq 192(%rdi), %rdx
+ movq 200(%r8), %rax
+ movq %rdx, 192(%r8)
+ sbbq 200(%rdi), %rax
+ movq 208(%r8), %rdx
+ movq %rax, 200(%r8)
+ sbbq 208(%rdi), %rdx
+ movq 216(%r8), %rax
+ movq %rdx, 208(%r8)
+ sbbq 216(%rdi), %rax
+ movq 224(%r8), %rdx
+ movq %rax, 216(%r8)
+ sbbq 224(%rdi), %rdx
+ movq 232(%r8), %rax
+ movq %rdx, 224(%r8)
+ sbbq 232(%rdi), %rax
+ movq 240(%r8), %rdx
+ movq %rax, 232(%r8)
+ sbbq 240(%rdi), %rdx
+ movq 248(%r8), %rax
+ movq %rdx, 240(%r8)
+ sbbq 248(%rdi), %rax
+ movq 256(%r8), %rdx
+ movq %rax, 248(%r8)
+ sbbq 256(%rdi), %rdx
+ movq 264(%r8), %rax
+ movq %rdx, 256(%r8)
+ sbbq 264(%rdi), %rax
+ movq 272(%r8), %rdx
+ movq %rax, 264(%r8)
+ sbbq 272(%rdi), %rdx
+ movq 280(%r8), %rax
+ movq %rdx, 272(%r8)
+ sbbq 280(%rdi), %rax
+ movq 288(%r8), %rdx
+ movq %rax, 280(%r8)
+ sbbq 288(%rdi), %rdx
+ movq 296(%r8), %rax
+ movq %rdx, 288(%r8)
+ sbbq 296(%rdi), %rax
+ movq 304(%r8), %rdx
+ movq %rax, 296(%r8)
+ sbbq 304(%rdi), %rdx
+ movq 312(%r8), %rax
+ movq %rdx, 304(%r8)
+ sbbq 312(%rdi), %rax
+ movq 320(%r8), %rdx
+ movq %rax, 312(%r8)
+ sbbq 320(%rdi), %rdx
+ movq 328(%r8), %rax
+ movq %rdx, 320(%r8)
+ sbbq 328(%rdi), %rax
+ movq 336(%r8), %rdx
+ movq %rax, 328(%r8)
+ sbbq 336(%rdi), %rdx
+ movq 344(%r8), %rax
+ movq %rdx, 336(%r8)
+ sbbq 344(%rdi), %rax
+ movq 352(%r8), %rdx
+ movq %rax, 344(%r8)
+ sbbq 352(%rdi), %rdx
+ movq 360(%r8), %rax
+ movq %rdx, 352(%r8)
+ sbbq 360(%rdi), %rax
+ movq 368(%r8), %rdx
+ movq %rax, 360(%r8)
+ sbbq 368(%rdi), %rdx
+ movq 376(%r8), %rax
+ movq %rdx, 368(%r8)
+ sbbq 376(%rdi), %rax
+ movq %rax, 376(%r8)
+ sbbq $0, %rcx
+ subq $192, %r9
+ # Add in place
+ movq (%r9), %rdx
+ addq (%r8), %rdx
+ movq 8(%r9), %rax
+ movq %rdx, (%r9)
+ adcq 8(%r8), %rax
+ movq 16(%r9), %rdx
+ movq %rax, 8(%r9)
+ adcq 16(%r8), %rdx
+ movq 24(%r9), %rax
+ movq %rdx, 16(%r9)
+ adcq 24(%r8), %rax
+ movq 32(%r9), %rdx
+ movq %rax, 24(%r9)
+ adcq 32(%r8), %rdx
+ movq 40(%r9), %rax
+ movq %rdx, 32(%r9)
+ adcq 40(%r8), %rax
+ movq 48(%r9), %rdx
+ movq %rax, 40(%r9)
+ adcq 48(%r8), %rdx
+ movq 56(%r9), %rax
+ movq %rdx, 48(%r9)
+ adcq 56(%r8), %rax
+ movq 64(%r9), %rdx
+ movq %rax, 56(%r9)
+ adcq 64(%r8), %rdx
+ movq 72(%r9), %rax
+ movq %rdx, 64(%r9)
+ adcq 72(%r8), %rax
+ movq 80(%r9), %rdx
+ movq %rax, 72(%r9)
+ adcq 80(%r8), %rdx
+ movq 88(%r9), %rax
+ movq %rdx, 80(%r9)
+ adcq 88(%r8), %rax
+ movq 96(%r9), %rdx
+ movq %rax, 88(%r9)
+ adcq 96(%r8), %rdx
+ movq 104(%r9), %rax
+ movq %rdx, 96(%r9)
+ adcq 104(%r8), %rax
+ movq 112(%r9), %rdx
+ movq %rax, 104(%r9)
+ adcq 112(%r8), %rdx
+ movq 120(%r9), %rax
+ movq %rdx, 112(%r9)
+ adcq 120(%r8), %rax
+ movq 128(%r9), %rdx
+ movq %rax, 120(%r9)
+ adcq 128(%r8), %rdx
+ movq 136(%r9), %rax
+ movq %rdx, 128(%r9)
+ adcq 136(%r8), %rax
+ movq 144(%r9), %rdx
+ movq %rax, 136(%r9)
+ adcq 144(%r8), %rdx
+ movq 152(%r9), %rax
+ movq %rdx, 144(%r9)
+ adcq 152(%r8), %rax
+ movq 160(%r9), %rdx
+ movq %rax, 152(%r9)
+ adcq 160(%r8), %rdx
+ movq 168(%r9), %rax
+ movq %rdx, 160(%r9)
+ adcq 168(%r8), %rax
+ movq 176(%r9), %rdx
+ movq %rax, 168(%r9)
+ adcq 176(%r8), %rdx
+ movq 184(%r9), %rax
+ movq %rdx, 176(%r9)
+ adcq 184(%r8), %rax
+ movq 192(%r9), %rdx
+ movq %rax, 184(%r9)
+ adcq 192(%r8), %rdx
+ movq 200(%r9), %rax
+ movq %rdx, 192(%r9)
+ adcq 200(%r8), %rax
+ movq 208(%r9), %rdx
+ movq %rax, 200(%r9)
+ adcq 208(%r8), %rdx
+ movq 216(%r9), %rax
+ movq %rdx, 208(%r9)
+ adcq 216(%r8), %rax
+ movq 224(%r9), %rdx
+ movq %rax, 216(%r9)
+ adcq 224(%r8), %rdx
+ movq 232(%r9), %rax
+ movq %rdx, 224(%r9)
+ adcq 232(%r8), %rax
+ movq 240(%r9), %rdx
+ movq %rax, 232(%r9)
+ adcq 240(%r8), %rdx
+ movq 248(%r9), %rax
+ movq %rdx, 240(%r9)
+ adcq 248(%r8), %rax
+ movq 256(%r9), %rdx
+ movq %rax, 248(%r9)
+ adcq 256(%r8), %rdx
+ movq 264(%r9), %rax
+ movq %rdx, 256(%r9)
+ adcq 264(%r8), %rax
+ movq 272(%r9), %rdx
+ movq %rax, 264(%r9)
+ adcq 272(%r8), %rdx
+ movq 280(%r9), %rax
+ movq %rdx, 272(%r9)
+ adcq 280(%r8), %rax
+ movq 288(%r9), %rdx
+ movq %rax, 280(%r9)
+ adcq 288(%r8), %rdx
+ movq 296(%r9), %rax
+ movq %rdx, 288(%r9)
+ adcq 296(%r8), %rax
+ movq 304(%r9), %rdx
+ movq %rax, 296(%r9)
+ adcq 304(%r8), %rdx
+ movq 312(%r9), %rax
+ movq %rdx, 304(%r9)
+ adcq 312(%r8), %rax
+ movq 320(%r9), %rdx
+ movq %rax, 312(%r9)
+ adcq 320(%r8), %rdx
+ movq 328(%r9), %rax
+ movq %rdx, 320(%r9)
+ adcq 328(%r8), %rax
+ movq 336(%r9), %rdx
+ movq %rax, 328(%r9)
+ adcq 336(%r8), %rdx
+ movq 344(%r9), %rax
+ movq %rdx, 336(%r9)
+ adcq 344(%r8), %rax
+ movq 352(%r9), %rdx
+ movq %rax, 344(%r9)
+ adcq 352(%r8), %rdx
+ movq 360(%r9), %rax
+ movq %rdx, 352(%r9)
+ adcq 360(%r8), %rax
+ movq 368(%r9), %rdx
+ movq %rax, 360(%r9)
+ adcq 368(%r8), %rdx
+ movq 376(%r9), %rax
+ movq %rdx, 368(%r9)
+ adcq 376(%r8), %rax
+ movq %rax, 376(%r9)
+ adcq $0, %rcx
+ movq %rcx, 576(%rdi)
+ # Add in place
+ movq 192(%r9), %rdx
+ addq (%rsi), %rdx
+ movq 200(%r9), %rax
+ movq %rdx, 192(%r9)
+ adcq 8(%rsi), %rax
+ movq 208(%r9), %rdx
+ movq %rax, 200(%r9)
+ adcq 16(%rsi), %rdx
+ movq 216(%r9), %rax
+ movq %rdx, 208(%r9)
+ adcq 24(%rsi), %rax
+ movq 224(%r9), %rdx
+ movq %rax, 216(%r9)
+ adcq 32(%rsi), %rdx
+ movq 232(%r9), %rax
+ movq %rdx, 224(%r9)
+ adcq 40(%rsi), %rax
+ movq 240(%r9), %rdx
+ movq %rax, 232(%r9)
+ adcq 48(%rsi), %rdx
+ movq 248(%r9), %rax
+ movq %rdx, 240(%r9)
+ adcq 56(%rsi), %rax
+ movq 256(%r9), %rdx
+ movq %rax, 248(%r9)
+ adcq 64(%rsi), %rdx
+ movq 264(%r9), %rax
+ movq %rdx, 256(%r9)
+ adcq 72(%rsi), %rax
+ movq 272(%r9), %rdx
+ movq %rax, 264(%r9)
+ adcq 80(%rsi), %rdx
+ movq 280(%r9), %rax
+ movq %rdx, 272(%r9)
+ adcq 88(%rsi), %rax
+ movq 288(%r9), %rdx
+ movq %rax, 280(%r9)
+ adcq 96(%rsi), %rdx
+ movq 296(%r9), %rax
+ movq %rdx, 288(%r9)
+ adcq 104(%rsi), %rax
+ movq 304(%r9), %rdx
+ movq %rax, 296(%r9)
+ adcq 112(%rsi), %rdx
+ movq 312(%r9), %rax
+ movq %rdx, 304(%r9)
+ adcq 120(%rsi), %rax
+ movq 320(%r9), %rdx
+ movq %rax, 312(%r9)
+ adcq 128(%rsi), %rdx
+ movq 328(%r9), %rax
+ movq %rdx, 320(%r9)
+ adcq 136(%rsi), %rax
+ movq 336(%r9), %rdx
+ movq %rax, 328(%r9)
+ adcq 144(%rsi), %rdx
+ movq 344(%r9), %rax
+ movq %rdx, 336(%r9)
+ adcq 152(%rsi), %rax
+ movq 352(%r9), %rdx
+ movq %rax, 344(%r9)
+ adcq 160(%rsi), %rdx
+ movq 360(%r9), %rax
+ movq %rdx, 352(%r9)
+ adcq 168(%rsi), %rax
+ movq 368(%r9), %rdx
+ movq %rax, 360(%r9)
+ adcq 176(%rsi), %rdx
+ movq 376(%r9), %rax
+ movq %rdx, 368(%r9)
+ adcq 184(%rsi), %rax
+ movq 384(%r9), %rdx
+ movq %rax, 376(%r9)
+ adcq 192(%rsi), %rdx
+ movq %rdx, 384(%r9)
+ # Add to zero
+ movq 200(%rsi), %rdx
+ adcq $0, %rdx
+ movq 208(%rsi), %rax
+ movq %rdx, 392(%r9)
+ adcq $0, %rax
+ movq 216(%rsi), %rdx
+ movq %rax, 400(%r9)
+ adcq $0, %rdx
+ movq 224(%rsi), %rax
+ movq %rdx, 408(%r9)
+ adcq $0, %rax
+ movq 232(%rsi), %rdx
+ movq %rax, 416(%r9)
+ adcq $0, %rdx
+ movq 240(%rsi), %rax
+ movq %rdx, 424(%r9)
+ adcq $0, %rax
+ movq 248(%rsi), %rdx
+ movq %rax, 432(%r9)
+ adcq $0, %rdx
+ movq 256(%rsi), %rax
+ movq %rdx, 440(%r9)
+ adcq $0, %rax
+ movq 264(%rsi), %rdx
+ movq %rax, 448(%r9)
+ adcq $0, %rdx
+ movq 272(%rsi), %rax
+ movq %rdx, 456(%r9)
+ adcq $0, %rax
+ movq 280(%rsi), %rdx
+ movq %rax, 464(%r9)
+ adcq $0, %rdx
+ movq 288(%rsi), %rax
+ movq %rdx, 472(%r9)
+ adcq $0, %rax
+ movq 296(%rsi), %rdx
+ movq %rax, 480(%r9)
+ adcq $0, %rdx
+ movq 304(%rsi), %rax
+ movq %rdx, 488(%r9)
+ adcq $0, %rax
+ movq 312(%rsi), %rdx
+ movq %rax, 496(%r9)
+ adcq $0, %rdx
+ movq 320(%rsi), %rax
+ movq %rdx, 504(%r9)
+ adcq $0, %rax
+ movq 328(%rsi), %rdx
+ movq %rax, 512(%r9)
+ adcq $0, %rdx
+ movq 336(%rsi), %rax
+ movq %rdx, 520(%r9)
+ adcq $0, %rax
+ movq 344(%rsi), %rdx
+ movq %rax, 528(%r9)
+ adcq $0, %rdx
+ movq 352(%rsi), %rax
+ movq %rdx, 536(%r9)
+ adcq $0, %rax
+ movq 360(%rsi), %rdx
+ movq %rax, 544(%r9)
+ adcq $0, %rdx
+ movq 368(%rsi), %rax
+ movq %rdx, 552(%r9)
+ adcq $0, %rax
+ movq 376(%rsi), %rdx
+ movq %rax, 560(%r9)
+ adcq $0, %rdx
+ movq %rdx, 568(%r9)
+ addq $984, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_sqr_avx2_48,.-sp_3072_sqr_avx2_48
+#endif /* __APPLE__ */
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+#ifndef __APPLE__
+.globl sp_3072_mul_d_48
+.type sp_3072_mul_d_48,@function
+.align 16
+sp_3072_mul_d_48:
+#else
+.globl _sp_3072_mul_d_48
+.p2align 4
+_sp_3072_mul_d_48:
+#endif /* __APPLE__ */
+ movq %rdx, %rcx
+ # A[0] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ movq %r8, (%rdi)
+ # A[1] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 8(%rsi)
+ addq %rax, %r9
+ movq %r9, 8(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[2] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 16(%rsi)
+ addq %rax, %r10
+ movq %r10, 16(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[3] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 24(%rsi)
+ addq %rax, %r8
+ movq %r8, 24(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[4] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 32(%rsi)
+ addq %rax, %r9
+ movq %r9, 32(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[5] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 40(%rsi)
+ addq %rax, %r10
+ movq %r10, 40(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[6] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 48(%rsi)
+ addq %rax, %r8
+ movq %r8, 48(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[7] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 56(%rsi)
+ addq %rax, %r9
+ movq %r9, 56(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[8] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 64(%rsi)
+ addq %rax, %r10
+ movq %r10, 64(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[9] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 72(%rsi)
+ addq %rax, %r8
+ movq %r8, 72(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[10] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 80(%rsi)
+ addq %rax, %r9
+ movq %r9, 80(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[11] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 88(%rsi)
+ addq %rax, %r10
+ movq %r10, 88(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[12] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 96(%rsi)
+ addq %rax, %r8
+ movq %r8, 96(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[13] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 104(%rsi)
+ addq %rax, %r9
+ movq %r9, 104(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[14] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 112(%rsi)
+ addq %rax, %r10
+ movq %r10, 112(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[15] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 120(%rsi)
+ addq %rax, %r8
+ movq %r8, 120(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[16] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 128(%rsi)
+ addq %rax, %r9
+ movq %r9, 128(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[17] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 136(%rsi)
+ addq %rax, %r10
+ movq %r10, 136(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[18] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 144(%rsi)
+ addq %rax, %r8
+ movq %r8, 144(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[19] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 152(%rsi)
+ addq %rax, %r9
+ movq %r9, 152(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[20] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 160(%rsi)
+ addq %rax, %r10
+ movq %r10, 160(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[21] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 168(%rsi)
+ addq %rax, %r8
+ movq %r8, 168(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[22] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 176(%rsi)
+ addq %rax, %r9
+ movq %r9, 176(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[23] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 184(%rsi)
+ addq %rax, %r10
+ movq %r10, 184(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[24] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 192(%rsi)
+ addq %rax, %r8
+ movq %r8, 192(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[25] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 200(%rsi)
+ addq %rax, %r9
+ movq %r9, 200(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[26] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 208(%rsi)
+ addq %rax, %r10
+ movq %r10, 208(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[27] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 216(%rsi)
+ addq %rax, %r8
+ movq %r8, 216(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[28] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 224(%rsi)
+ addq %rax, %r9
+ movq %r9, 224(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[29] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 232(%rsi)
+ addq %rax, %r10
+ movq %r10, 232(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[30] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 240(%rsi)
+ addq %rax, %r8
+ movq %r8, 240(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[31] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 248(%rsi)
+ addq %rax, %r9
+ movq %r9, 248(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[32] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 256(%rsi)
+ addq %rax, %r10
+ movq %r10, 256(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[33] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 264(%rsi)
+ addq %rax, %r8
+ movq %r8, 264(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[34] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 272(%rsi)
+ addq %rax, %r9
+ movq %r9, 272(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[35] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 280(%rsi)
+ addq %rax, %r10
+ movq %r10, 280(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[36] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 288(%rsi)
+ addq %rax, %r8
+ movq %r8, 288(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[37] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 296(%rsi)
+ addq %rax, %r9
+ movq %r9, 296(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[38] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 304(%rsi)
+ addq %rax, %r10
+ movq %r10, 304(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[39] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 312(%rsi)
+ addq %rax, %r8
+ movq %r8, 312(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[40] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 320(%rsi)
+ addq %rax, %r9
+ movq %r9, 320(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[41] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 328(%rsi)
+ addq %rax, %r10
+ movq %r10, 328(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[42] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 336(%rsi)
+ addq %rax, %r8
+ movq %r8, 336(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[43] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 344(%rsi)
+ addq %rax, %r9
+ movq %r9, 344(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[44] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 352(%rsi)
+ addq %rax, %r10
+ movq %r10, 352(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[45] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 360(%rsi)
+ addq %rax, %r8
+ movq %r8, 360(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[46] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 368(%rsi)
+ addq %rax, %r9
+ movq %r9, 368(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[47] * B
+ movq %rcx, %rax
+ mulq 376(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ movq %r10, 376(%rdi)
+ movq %r8, 384(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_mul_d_48,.-sp_3072_mul_d_48
+#endif /* __APPLE__ */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_3072_cond_sub_24
+.type sp_3072_cond_sub_24,@function
+.align 16
+sp_3072_cond_sub_24:
+#else
+.globl _sp_3072_cond_sub_24
+.p2align 4
+_sp_3072_cond_sub_24:
+#endif /* __APPLE__ */
+ subq $192, %rsp
+ movq $0, %rax
+ movq (%rdx), %r8
+ movq 8(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, (%rsp)
+ movq %r9, 8(%rsp)
+ movq 16(%rdx), %r8
+ movq 24(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 16(%rsp)
+ movq %r9, 24(%rsp)
+ movq 32(%rdx), %r8
+ movq 40(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq 48(%rdx), %r8
+ movq 56(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 48(%rsp)
+ movq %r9, 56(%rsp)
+ movq 64(%rdx), %r8
+ movq 72(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 64(%rsp)
+ movq %r9, 72(%rsp)
+ movq 80(%rdx), %r8
+ movq 88(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 80(%rsp)
+ movq %r9, 88(%rsp)
+ movq 96(%rdx), %r8
+ movq 104(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 96(%rsp)
+ movq %r9, 104(%rsp)
+ movq 112(%rdx), %r8
+ movq 120(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 112(%rsp)
+ movq %r9, 120(%rsp)
+ movq 128(%rdx), %r8
+ movq 136(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 128(%rsp)
+ movq %r9, 136(%rsp)
+ movq 144(%rdx), %r8
+ movq 152(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 144(%rsp)
+ movq %r9, 152(%rsp)
+ movq 160(%rdx), %r8
+ movq 168(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 160(%rsp)
+ movq %r9, 168(%rsp)
+ movq 176(%rdx), %r8
+ movq 184(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 176(%rsp)
+ movq %r9, 184(%rsp)
+ movq (%rsi), %r8
+ movq (%rsp), %rdx
+ subq %rdx, %r8
+ movq 8(%rsi), %r9
+ movq 8(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, (%rdi)
+ movq 16(%rsi), %r8
+ movq 16(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 8(%rdi)
+ movq 24(%rsi), %r9
+ movq 24(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 16(%rdi)
+ movq 32(%rsi), %r8
+ movq 32(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 24(%rdi)
+ movq 40(%rsi), %r9
+ movq 40(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 32(%rdi)
+ movq 48(%rsi), %r8
+ movq 48(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 40(%rdi)
+ movq 56(%rsi), %r9
+ movq 56(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 48(%rdi)
+ movq 64(%rsi), %r8
+ movq 64(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 56(%rdi)
+ movq 72(%rsi), %r9
+ movq 72(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 64(%rdi)
+ movq 80(%rsi), %r8
+ movq 80(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 72(%rdi)
+ movq 88(%rsi), %r9
+ movq 88(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 80(%rdi)
+ movq 96(%rsi), %r8
+ movq 96(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 88(%rdi)
+ movq 104(%rsi), %r9
+ movq 104(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 96(%rdi)
+ movq 112(%rsi), %r8
+ movq 112(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 104(%rdi)
+ movq 120(%rsi), %r9
+ movq 120(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 112(%rdi)
+ movq 128(%rsi), %r8
+ movq 128(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 120(%rdi)
+ movq 136(%rsi), %r9
+ movq 136(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 128(%rdi)
+ movq 144(%rsi), %r8
+ movq 144(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 136(%rdi)
+ movq 152(%rsi), %r9
+ movq 152(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 144(%rdi)
+ movq 160(%rsi), %r8
+ movq 160(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 152(%rdi)
+ movq 168(%rsi), %r9
+ movq 168(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 160(%rdi)
+ movq 176(%rsi), %r8
+ movq 176(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 168(%rdi)
+ movq 184(%rsi), %r9
+ movq 184(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 176(%rdi)
+ movq %r9, 184(%rdi)
+ sbbq $0, %rax
+ addq $192, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_cond_sub_24,.-sp_3072_cond_sub_24
+#endif /* __APPLE__ */
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef __APPLE__
+.globl sp_3072_mont_reduce_24
+.type sp_3072_mont_reduce_24,@function
+.align 16
+sp_3072_mont_reduce_24:
+#else
+.globl _sp_3072_mont_reduce_24
+.p2align 4
+_sp_3072_mont_reduce_24:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ movq %rdx, %rcx
+ xorq %r15, %r15
+ # i = 24
+ movq $24, %r8
+ movq (%rdi), %r13
+ movq 8(%rdi), %r14
+L_mont_loop_24:
+ # mu = a[i] * mp
+ movq %r13, %r11
+ imulq %rcx, %r11
+ # a[i+0] += m[0] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq (%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r10
+ # a[i+1] += m[1] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 8(%rsi)
+ movq %r14, %r13
+ addq %rax, %r13
+ adcq %rdx, %r9
+ addq %r10, %r13
+ adcq $0, %r9
+ # a[i+2] += m[2] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 16(%rsi)
+ movq 16(%rdi), %r14
+ addq %rax, %r14
+ adcq %rdx, %r10
+ addq %r9, %r14
+ adcq $0, %r10
+ # a[i+3] += m[3] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 24(%rsi)
+ movq 24(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 24(%rdi)
+ adcq $0, %r9
+ # a[i+4] += m[4] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 32(%rsi)
+ movq 32(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 32(%rdi)
+ adcq $0, %r10
+ # a[i+5] += m[5] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 40(%rsi)
+ movq 40(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 40(%rdi)
+ adcq $0, %r9
+ # a[i+6] += m[6] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 48(%rsi)
+ movq 48(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 48(%rdi)
+ adcq $0, %r10
+ # a[i+7] += m[7] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 56(%rsi)
+ movq 56(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 56(%rdi)
+ adcq $0, %r9
+ # a[i+8] += m[8] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 64(%rsi)
+ movq 64(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 64(%rdi)
+ adcq $0, %r10
+ # a[i+9] += m[9] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 72(%rsi)
+ movq 72(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 72(%rdi)
+ adcq $0, %r9
+ # a[i+10] += m[10] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 80(%rsi)
+ movq 80(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 80(%rdi)
+ adcq $0, %r10
+ # a[i+11] += m[11] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 88(%rsi)
+ movq 88(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 88(%rdi)
+ adcq $0, %r9
+ # a[i+12] += m[12] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 96(%rsi)
+ movq 96(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 96(%rdi)
+ adcq $0, %r10
+ # a[i+13] += m[13] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 104(%rsi)
+ movq 104(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 104(%rdi)
+ adcq $0, %r9
+ # a[i+14] += m[14] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 112(%rsi)
+ movq 112(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 112(%rdi)
+ adcq $0, %r10
+ # a[i+15] += m[15] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 120(%rsi)
+ movq 120(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 120(%rdi)
+ adcq $0, %r9
+ # a[i+16] += m[16] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 128(%rsi)
+ movq 128(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 128(%rdi)
+ adcq $0, %r10
+ # a[i+17] += m[17] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 136(%rsi)
+ movq 136(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 136(%rdi)
+ adcq $0, %r9
+ # a[i+18] += m[18] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 144(%rsi)
+ movq 144(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 144(%rdi)
+ adcq $0, %r10
+ # a[i+19] += m[19] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 152(%rsi)
+ movq 152(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 152(%rdi)
+ adcq $0, %r9
+ # a[i+20] += m[20] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 160(%rsi)
+ movq 160(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 160(%rdi)
+ adcq $0, %r10
+ # a[i+21] += m[21] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 168(%rsi)
+ movq 168(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 168(%rdi)
+ adcq $0, %r9
+ # a[i+22] += m[22] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 176(%rsi)
+ movq 176(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 176(%rdi)
+ adcq $0, %r10
+ # a[i+23] += m[23] * mu
+ movq %r11, %rax
+ mulq 184(%rsi)
+ movq 184(%rdi), %r12
+ addq %rax, %r10
+ adcq %r15, %rdx
+ movq $0, %r15
+ adcq $0, %r15
+ addq %r10, %r12
+ movq %r12, 184(%rdi)
+ adcq %rdx, 192(%rdi)
+ adcq $0, %r15
+ # i -= 1
+ addq $8, %rdi
+ decq %r8
+ jnz L_mont_loop_24
+ movq %r13, (%rdi)
+ movq %r14, 8(%rdi)
+ negq %r15
+ movq %r15, %rcx
+ movq %rsi, %rdx
+ movq %rdi, %rsi
+ subq $192, %rdi
+#ifndef __APPLE__
+ callq sp_3072_cond_sub_24@plt
+#else
+ callq _sp_3072_cond_sub_24
+#endif /* __APPLE__ */
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_mont_reduce_24,.-sp_3072_mont_reduce_24
+#endif /* __APPLE__ */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_3072_cond_sub_avx2_24
+.type sp_3072_cond_sub_avx2_24,@function
+.align 16
+sp_3072_cond_sub_avx2_24:
+#else
+.globl _sp_3072_cond_sub_avx2_24
+.p2align 4
+_sp_3072_cond_sub_avx2_24:
+#endif /* __APPLE__ */
+ movq $0, %rax
+ movq (%rdx), %r10
+ movq (%rsi), %r8
+ pextq %rcx, %r10, %r10
+ subq %r10, %r8
+ movq 8(%rdx), %r10
+ movq 8(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, (%rdi)
+ sbbq %r10, %r9
+ movq 16(%rdx), %r8
+ movq 16(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 8(%rdi)
+ sbbq %r8, %r10
+ movq 24(%rdx), %r9
+ movq 24(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 16(%rdi)
+ sbbq %r9, %r8
+ movq 32(%rdx), %r10
+ movq 32(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 24(%rdi)
+ sbbq %r10, %r9
+ movq 40(%rdx), %r8
+ movq 40(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 32(%rdi)
+ sbbq %r8, %r10
+ movq 48(%rdx), %r9
+ movq 48(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 40(%rdi)
+ sbbq %r9, %r8
+ movq 56(%rdx), %r10
+ movq 56(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 48(%rdi)
+ sbbq %r10, %r9
+ movq 64(%rdx), %r8
+ movq 64(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 56(%rdi)
+ sbbq %r8, %r10
+ movq 72(%rdx), %r9
+ movq 72(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 64(%rdi)
+ sbbq %r9, %r8
+ movq 80(%rdx), %r10
+ movq 80(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 72(%rdi)
+ sbbq %r10, %r9
+ movq 88(%rdx), %r8
+ movq 88(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 80(%rdi)
+ sbbq %r8, %r10
+ movq 96(%rdx), %r9
+ movq 96(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 88(%rdi)
+ sbbq %r9, %r8
+ movq 104(%rdx), %r10
+ movq 104(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 96(%rdi)
+ sbbq %r10, %r9
+ movq 112(%rdx), %r8
+ movq 112(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 104(%rdi)
+ sbbq %r8, %r10
+ movq 120(%rdx), %r9
+ movq 120(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 112(%rdi)
+ sbbq %r9, %r8
+ movq 128(%rdx), %r10
+ movq 128(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 120(%rdi)
+ sbbq %r10, %r9
+ movq 136(%rdx), %r8
+ movq 136(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 128(%rdi)
+ sbbq %r8, %r10
+ movq 144(%rdx), %r9
+ movq 144(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 136(%rdi)
+ sbbq %r9, %r8
+ movq 152(%rdx), %r10
+ movq 152(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 144(%rdi)
+ sbbq %r10, %r9
+ movq 160(%rdx), %r8
+ movq 160(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 152(%rdi)
+ sbbq %r8, %r10
+ movq 168(%rdx), %r9
+ movq 168(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 160(%rdi)
+ sbbq %r9, %r8
+ movq 176(%rdx), %r10
+ movq 176(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 168(%rdi)
+ sbbq %r10, %r9
+ movq 184(%rdx), %r8
+ movq 184(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 176(%rdi)
+ sbbq %r8, %r10
+ movq %r10, 184(%rdi)
+ sbbq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_cond_sub_avx2_24,.-sp_3072_cond_sub_avx2_24
+#endif /* __APPLE__ */
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+#ifndef __APPLE__
+.globl sp_3072_mul_d_24
+.type sp_3072_mul_d_24,@function
+.align 16
+sp_3072_mul_d_24:
+#else
+.globl _sp_3072_mul_d_24
+.p2align 4
+_sp_3072_mul_d_24:
+#endif /* __APPLE__ */
+ movq %rdx, %rcx
+ # A[0] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ movq %r8, (%rdi)
+ # A[1] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 8(%rsi)
+ addq %rax, %r9
+ movq %r9, 8(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[2] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 16(%rsi)
+ addq %rax, %r10
+ movq %r10, 16(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[3] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 24(%rsi)
+ addq %rax, %r8
+ movq %r8, 24(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[4] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 32(%rsi)
+ addq %rax, %r9
+ movq %r9, 32(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[5] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 40(%rsi)
+ addq %rax, %r10
+ movq %r10, 40(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[6] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 48(%rsi)
+ addq %rax, %r8
+ movq %r8, 48(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[7] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 56(%rsi)
+ addq %rax, %r9
+ movq %r9, 56(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[8] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 64(%rsi)
+ addq %rax, %r10
+ movq %r10, 64(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[9] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 72(%rsi)
+ addq %rax, %r8
+ movq %r8, 72(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[10] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 80(%rsi)
+ addq %rax, %r9
+ movq %r9, 80(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[11] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 88(%rsi)
+ addq %rax, %r10
+ movq %r10, 88(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[12] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 96(%rsi)
+ addq %rax, %r8
+ movq %r8, 96(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[13] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 104(%rsi)
+ addq %rax, %r9
+ movq %r9, 104(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[14] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 112(%rsi)
+ addq %rax, %r10
+ movq %r10, 112(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[15] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 120(%rsi)
+ addq %rax, %r8
+ movq %r8, 120(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[16] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 128(%rsi)
+ addq %rax, %r9
+ movq %r9, 128(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[17] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 136(%rsi)
+ addq %rax, %r10
+ movq %r10, 136(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[18] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 144(%rsi)
+ addq %rax, %r8
+ movq %r8, 144(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[19] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 152(%rsi)
+ addq %rax, %r9
+ movq %r9, 152(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[20] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 160(%rsi)
+ addq %rax, %r10
+ movq %r10, 160(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[21] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 168(%rsi)
+ addq %rax, %r8
+ movq %r8, 168(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[22] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 176(%rsi)
+ addq %rax, %r9
+ movq %r9, 176(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[23] * B
+ movq %rcx, %rax
+ mulq 184(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ movq %r10, 184(%rdi)
+ movq %r8, 192(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_mul_d_24,.-sp_3072_mul_d_24
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX2
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+#ifndef __APPLE__
+.globl sp_3072_mul_d_avx2_24
+.type sp_3072_mul_d_avx2_24,@function
+.align 16
+sp_3072_mul_d_avx2_24:
+#else
+.globl _sp_3072_mul_d_avx2_24
+.p2align 4
+_sp_3072_mul_d_avx2_24:
+#endif /* __APPLE__ */
+ movq %rdx, %rax
+ # A[0] * B
+ movq %rax, %rdx
+ xorq %r11, %r11
+ mulxq (%rsi), %r9, %r10
+ movq %r9, (%rdi)
+ # A[1] * B
+ mulxq 8(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 8(%rdi)
+ adoxq %r8, %r9
+ # A[2] * B
+ mulxq 16(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 16(%rdi)
+ adoxq %r8, %r10
+ # A[3] * B
+ mulxq 24(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 24(%rdi)
+ adoxq %r8, %r9
+ # A[4] * B
+ mulxq 32(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 32(%rdi)
+ adoxq %r8, %r10
+ # A[5] * B
+ mulxq 40(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 40(%rdi)
+ adoxq %r8, %r9
+ # A[6] * B
+ mulxq 48(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 48(%rdi)
+ adoxq %r8, %r10
+ # A[7] * B
+ mulxq 56(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 56(%rdi)
+ adoxq %r8, %r9
+ # A[8] * B
+ mulxq 64(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 64(%rdi)
+ adoxq %r8, %r10
+ # A[9] * B
+ mulxq 72(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 72(%rdi)
+ adoxq %r8, %r9
+ # A[10] * B
+ mulxq 80(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 80(%rdi)
+ adoxq %r8, %r10
+ # A[11] * B
+ mulxq 88(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 88(%rdi)
+ adoxq %r8, %r9
+ # A[12] * B
+ mulxq 96(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 96(%rdi)
+ adoxq %r8, %r10
+ # A[13] * B
+ mulxq 104(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 104(%rdi)
+ adoxq %r8, %r9
+ # A[14] * B
+ mulxq 112(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 112(%rdi)
+ adoxq %r8, %r10
+ # A[15] * B
+ mulxq 120(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 120(%rdi)
+ adoxq %r8, %r9
+ # A[16] * B
+ mulxq 128(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 128(%rdi)
+ adoxq %r8, %r10
+ # A[17] * B
+ mulxq 136(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 136(%rdi)
+ adoxq %r8, %r9
+ # A[18] * B
+ mulxq 144(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 144(%rdi)
+ adoxq %r8, %r10
+ # A[19] * B
+ mulxq 152(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 152(%rdi)
+ adoxq %r8, %r9
+ # A[20] * B
+ mulxq 160(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 160(%rdi)
+ adoxq %r8, %r10
+ # A[21] * B
+ mulxq 168(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 168(%rdi)
+ adoxq %r8, %r9
+ # A[22] * B
+ mulxq 176(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 176(%rdi)
+ adoxq %r8, %r10
+ # A[23] * B
+ mulxq 184(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ adoxq %r8, %r9
+ adcxq %r11, %r9
+ movq %r10, 184(%rdi)
+ movq %r9, 192(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_mul_d_avx2_24,.-sp_3072_mul_d_avx2_24
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+#ifndef __APPLE__
+.globl sp_3072_cmp_24
+.type sp_3072_cmp_24,@function
+.align 16
+sp_3072_cmp_24:
+#else
+.globl _sp_3072_cmp_24
+.p2align 4
+_sp_3072_cmp_24:
+#endif /* __APPLE__ */
+ xorq %rcx, %rcx
+ movq $-1, %rdx
+ movq $-1, %rax
+ movq $1, %r8
+ movq 184(%rdi), %r9
+ movq 184(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 176(%rdi), %r9
+ movq 176(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 168(%rdi), %r9
+ movq 168(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 160(%rdi), %r9
+ movq 160(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 152(%rdi), %r9
+ movq 152(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 144(%rdi), %r9
+ movq 144(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 136(%rdi), %r9
+ movq 136(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 128(%rdi), %r9
+ movq 128(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 120(%rdi), %r9
+ movq 120(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 112(%rdi), %r9
+ movq 112(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 104(%rdi), %r9
+ movq 104(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 96(%rdi), %r9
+ movq 96(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 88(%rdi), %r9
+ movq 88(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 80(%rdi), %r9
+ movq 80(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 72(%rdi), %r9
+ movq 72(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 64(%rdi), %r9
+ movq 64(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 56(%rdi), %r9
+ movq 56(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 48(%rdi), %r9
+ movq 48(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 40(%rdi), %r9
+ movq 40(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 32(%rdi), %r9
+ movq 32(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 24(%rdi), %r9
+ movq 24(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 16(%rdi), %r9
+ movq 16(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 8(%rdi), %r9
+ movq 8(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq (%rdi), %r9
+ movq (%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ xorq %rdx, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_cmp_24,.-sp_3072_cmp_24
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX2
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef __APPLE__
+.globl sp_3072_mont_reduce_avx2_24
+.type sp_3072_mont_reduce_avx2_24,@function
+.align 16
+sp_3072_mont_reduce_avx2_24:
+#else
+.globl _sp_3072_mont_reduce_avx2_24
+.p2align 4
+_sp_3072_mont_reduce_avx2_24:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ movq %rdx, %r8
+ xorq %r14, %r14
+ # i = 24
+ movq $24, %r9
+ movq (%rdi), %r13
+ addq $96, %rdi
+ xorq %r12, %r12
+L_mont_loop_avx2_24:
+ # mu = a[i] * mp
+ movq %r13, %rdx
+ movq %r13, %r10
+ imulq %r8, %rdx
+ xorq %r12, %r12
+ # a[i+0] += m[0] * mu
+ mulxq (%rsi), %rax, %rcx
+ movq -88(%rdi), %r13
+ adcxq %rax, %r10
+ adoxq %rcx, %r13
+ # a[i+1] += m[1] * mu
+ mulxq 8(%rsi), %rax, %rcx
+ movq -80(%rdi), %r10
+ adcxq %rax, %r13
+ adoxq %rcx, %r10
+ # a[i+2] += m[2] * mu
+ mulxq 16(%rsi), %rax, %rcx
+ movq -72(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -80(%rdi)
+ # a[i+3] += m[3] * mu
+ mulxq 24(%rsi), %rax, %rcx
+ movq -64(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -72(%rdi)
+ # a[i+4] += m[4] * mu
+ mulxq 32(%rsi), %rax, %rcx
+ movq -56(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -64(%rdi)
+ # a[i+5] += m[5] * mu
+ mulxq 40(%rsi), %rax, %rcx
+ movq -48(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -56(%rdi)
+ # a[i+6] += m[6] * mu
+ mulxq 48(%rsi), %rax, %rcx
+ movq -40(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -48(%rdi)
+ # a[i+7] += m[7] * mu
+ mulxq 56(%rsi), %rax, %rcx
+ movq -32(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -40(%rdi)
+ # a[i+8] += m[8] * mu
+ mulxq 64(%rsi), %rax, %rcx
+ movq -24(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -32(%rdi)
+ # a[i+9] += m[9] * mu
+ mulxq 72(%rsi), %rax, %rcx
+ movq -16(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -24(%rdi)
+ # a[i+10] += m[10] * mu
+ mulxq 80(%rsi), %rax, %rcx
+ movq -8(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -16(%rdi)
+ # a[i+11] += m[11] * mu
+ mulxq 88(%rsi), %rax, %rcx
+ movq (%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -8(%rdi)
+ # a[i+12] += m[12] * mu
+ mulxq 96(%rsi), %rax, %rcx
+ movq 8(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, (%rdi)
+ # a[i+13] += m[13] * mu
+ mulxq 104(%rsi), %rax, %rcx
+ movq 16(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 8(%rdi)
+ # a[i+14] += m[14] * mu
+ mulxq 112(%rsi), %rax, %rcx
+ movq 24(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 16(%rdi)
+ # a[i+15] += m[15] * mu
+ mulxq 120(%rsi), %rax, %rcx
+ movq 32(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 24(%rdi)
+ # a[i+16] += m[16] * mu
+ mulxq 128(%rsi), %rax, %rcx
+ movq 40(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 32(%rdi)
+ # a[i+17] += m[17] * mu
+ mulxq 136(%rsi), %rax, %rcx
+ movq 48(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 40(%rdi)
+ # a[i+18] += m[18] * mu
+ mulxq 144(%rsi), %rax, %rcx
+ movq 56(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 48(%rdi)
+ # a[i+19] += m[19] * mu
+ mulxq 152(%rsi), %rax, %rcx
+ movq 64(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 56(%rdi)
+ # a[i+20] += m[20] * mu
+ mulxq 160(%rsi), %rax, %rcx
+ movq 72(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 64(%rdi)
+ # a[i+21] += m[21] * mu
+ mulxq 168(%rsi), %rax, %rcx
+ movq 80(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 72(%rdi)
+ # a[i+22] += m[22] * mu
+ mulxq 176(%rsi), %rax, %rcx
+ movq 88(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 80(%rdi)
+ # a[i+23] += m[23] * mu
+ mulxq 184(%rsi), %rax, %rcx
+ movq 96(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 88(%rdi)
+ adcxq %r14, %r10
+ movq %r10, 96(%rdi)
+ movq %r12, %r14
+ adoxq %r12, %r14
+ adcxq %r12, %r14
+ # a += 1
+ addq $8, %rdi
+ # i -= 1
+ subq $1, %r9
+ jnz L_mont_loop_avx2_24
+ subq $96, %rdi
+ negq %r14
+ movq %rdi, %r8
+ subq $192, %rdi
+ movq (%rsi), %rcx
+ movq %r13, %rdx
+ pextq %r14, %rcx, %rcx
+ subq %rcx, %rdx
+ movq 8(%rsi), %rcx
+ movq 8(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, (%rdi)
+ sbbq %rcx, %rax
+ movq 16(%rsi), %rdx
+ movq 16(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 8(%rdi)
+ sbbq %rdx, %rcx
+ movq 24(%rsi), %rax
+ movq 24(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 16(%rdi)
+ sbbq %rax, %rdx
+ movq 32(%rsi), %rcx
+ movq 32(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 24(%rdi)
+ sbbq %rcx, %rax
+ movq 40(%rsi), %rdx
+ movq 40(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 32(%rdi)
+ sbbq %rdx, %rcx
+ movq 48(%rsi), %rax
+ movq 48(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 40(%rdi)
+ sbbq %rax, %rdx
+ movq 56(%rsi), %rcx
+ movq 56(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 48(%rdi)
+ sbbq %rcx, %rax
+ movq 64(%rsi), %rdx
+ movq 64(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 56(%rdi)
+ sbbq %rdx, %rcx
+ movq 72(%rsi), %rax
+ movq 72(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 64(%rdi)
+ sbbq %rax, %rdx
+ movq 80(%rsi), %rcx
+ movq 80(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 72(%rdi)
+ sbbq %rcx, %rax
+ movq 88(%rsi), %rdx
+ movq 88(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 80(%rdi)
+ sbbq %rdx, %rcx
+ movq 96(%rsi), %rax
+ movq 96(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 88(%rdi)
+ sbbq %rax, %rdx
+ movq 104(%rsi), %rcx
+ movq 104(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 96(%rdi)
+ sbbq %rcx, %rax
+ movq 112(%rsi), %rdx
+ movq 112(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 104(%rdi)
+ sbbq %rdx, %rcx
+ movq 120(%rsi), %rax
+ movq 120(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 112(%rdi)
+ sbbq %rax, %rdx
+ movq 128(%rsi), %rcx
+ movq 128(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 120(%rdi)
+ sbbq %rcx, %rax
+ movq 136(%rsi), %rdx
+ movq 136(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 128(%rdi)
+ sbbq %rdx, %rcx
+ movq 144(%rsi), %rax
+ movq 144(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 136(%rdi)
+ sbbq %rax, %rdx
+ movq 152(%rsi), %rcx
+ movq 152(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 144(%rdi)
+ sbbq %rcx, %rax
+ movq 160(%rsi), %rdx
+ movq 160(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 152(%rdi)
+ sbbq %rdx, %rcx
+ movq 168(%rsi), %rax
+ movq 168(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 160(%rdi)
+ sbbq %rax, %rdx
+ movq 176(%rsi), %rcx
+ movq 176(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 168(%rdi)
+ sbbq %rcx, %rax
+ movq 184(%rsi), %rdx
+ movq 184(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 176(%rdi)
+ sbbq %rdx, %rcx
+ movq %rcx, 184(%rdi)
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_mont_reduce_avx2_24,.-sp_3072_mont_reduce_avx2_24
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_3072_cond_sub_48
+.type sp_3072_cond_sub_48,@function
+.align 16
+sp_3072_cond_sub_48:
+#else
+.globl _sp_3072_cond_sub_48
+.p2align 4
+_sp_3072_cond_sub_48:
+#endif /* __APPLE__ */
+ subq $384, %rsp
+ movq $0, %rax
+ movq (%rdx), %r8
+ movq 8(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, (%rsp)
+ movq %r9, 8(%rsp)
+ movq 16(%rdx), %r8
+ movq 24(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 16(%rsp)
+ movq %r9, 24(%rsp)
+ movq 32(%rdx), %r8
+ movq 40(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq 48(%rdx), %r8
+ movq 56(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 48(%rsp)
+ movq %r9, 56(%rsp)
+ movq 64(%rdx), %r8
+ movq 72(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 64(%rsp)
+ movq %r9, 72(%rsp)
+ movq 80(%rdx), %r8
+ movq 88(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 80(%rsp)
+ movq %r9, 88(%rsp)
+ movq 96(%rdx), %r8
+ movq 104(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 96(%rsp)
+ movq %r9, 104(%rsp)
+ movq 112(%rdx), %r8
+ movq 120(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 112(%rsp)
+ movq %r9, 120(%rsp)
+ movq 128(%rdx), %r8
+ movq 136(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 128(%rsp)
+ movq %r9, 136(%rsp)
+ movq 144(%rdx), %r8
+ movq 152(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 144(%rsp)
+ movq %r9, 152(%rsp)
+ movq 160(%rdx), %r8
+ movq 168(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 160(%rsp)
+ movq %r9, 168(%rsp)
+ movq 176(%rdx), %r8
+ movq 184(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 176(%rsp)
+ movq %r9, 184(%rsp)
+ movq 192(%rdx), %r8
+ movq 200(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 192(%rsp)
+ movq %r9, 200(%rsp)
+ movq 208(%rdx), %r8
+ movq 216(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 208(%rsp)
+ movq %r9, 216(%rsp)
+ movq 224(%rdx), %r8
+ movq 232(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 224(%rsp)
+ movq %r9, 232(%rsp)
+ movq 240(%rdx), %r8
+ movq 248(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 240(%rsp)
+ movq %r9, 248(%rsp)
+ movq 256(%rdx), %r8
+ movq 264(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 256(%rsp)
+ movq %r9, 264(%rsp)
+ movq 272(%rdx), %r8
+ movq 280(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 272(%rsp)
+ movq %r9, 280(%rsp)
+ movq 288(%rdx), %r8
+ movq 296(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 288(%rsp)
+ movq %r9, 296(%rsp)
+ movq 304(%rdx), %r8
+ movq 312(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 304(%rsp)
+ movq %r9, 312(%rsp)
+ movq 320(%rdx), %r8
+ movq 328(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 320(%rsp)
+ movq %r9, 328(%rsp)
+ movq 336(%rdx), %r8
+ movq 344(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 336(%rsp)
+ movq %r9, 344(%rsp)
+ movq 352(%rdx), %r8
+ movq 360(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 352(%rsp)
+ movq %r9, 360(%rsp)
+ movq 368(%rdx), %r8
+ movq 376(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 368(%rsp)
+ movq %r9, 376(%rsp)
+ movq (%rsi), %r8
+ movq (%rsp), %rdx
+ subq %rdx, %r8
+ movq 8(%rsi), %r9
+ movq 8(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, (%rdi)
+ movq 16(%rsi), %r8
+ movq 16(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 8(%rdi)
+ movq 24(%rsi), %r9
+ movq 24(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 16(%rdi)
+ movq 32(%rsi), %r8
+ movq 32(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 24(%rdi)
+ movq 40(%rsi), %r9
+ movq 40(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 32(%rdi)
+ movq 48(%rsi), %r8
+ movq 48(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 40(%rdi)
+ movq 56(%rsi), %r9
+ movq 56(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 48(%rdi)
+ movq 64(%rsi), %r8
+ movq 64(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 56(%rdi)
+ movq 72(%rsi), %r9
+ movq 72(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 64(%rdi)
+ movq 80(%rsi), %r8
+ movq 80(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 72(%rdi)
+ movq 88(%rsi), %r9
+ movq 88(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 80(%rdi)
+ movq 96(%rsi), %r8
+ movq 96(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 88(%rdi)
+ movq 104(%rsi), %r9
+ movq 104(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 96(%rdi)
+ movq 112(%rsi), %r8
+ movq 112(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 104(%rdi)
+ movq 120(%rsi), %r9
+ movq 120(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 112(%rdi)
+ movq 128(%rsi), %r8
+ movq 128(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 120(%rdi)
+ movq 136(%rsi), %r9
+ movq 136(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 128(%rdi)
+ movq 144(%rsi), %r8
+ movq 144(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 136(%rdi)
+ movq 152(%rsi), %r9
+ movq 152(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 144(%rdi)
+ movq 160(%rsi), %r8
+ movq 160(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 152(%rdi)
+ movq 168(%rsi), %r9
+ movq 168(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 160(%rdi)
+ movq 176(%rsi), %r8
+ movq 176(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 168(%rdi)
+ movq 184(%rsi), %r9
+ movq 184(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 176(%rdi)
+ movq 192(%rsi), %r8
+ movq 192(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 184(%rdi)
+ movq 200(%rsi), %r9
+ movq 200(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 192(%rdi)
+ movq 208(%rsi), %r8
+ movq 208(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 200(%rdi)
+ movq 216(%rsi), %r9
+ movq 216(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 208(%rdi)
+ movq 224(%rsi), %r8
+ movq 224(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 216(%rdi)
+ movq 232(%rsi), %r9
+ movq 232(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 224(%rdi)
+ movq 240(%rsi), %r8
+ movq 240(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 232(%rdi)
+ movq 248(%rsi), %r9
+ movq 248(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 240(%rdi)
+ movq 256(%rsi), %r8
+ movq 256(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 248(%rdi)
+ movq 264(%rsi), %r9
+ movq 264(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 256(%rdi)
+ movq 272(%rsi), %r8
+ movq 272(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 264(%rdi)
+ movq 280(%rsi), %r9
+ movq 280(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 272(%rdi)
+ movq 288(%rsi), %r8
+ movq 288(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 280(%rdi)
+ movq 296(%rsi), %r9
+ movq 296(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 288(%rdi)
+ movq 304(%rsi), %r8
+ movq 304(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 296(%rdi)
+ movq 312(%rsi), %r9
+ movq 312(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 304(%rdi)
+ movq 320(%rsi), %r8
+ movq 320(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 312(%rdi)
+ movq 328(%rsi), %r9
+ movq 328(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 320(%rdi)
+ movq 336(%rsi), %r8
+ movq 336(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 328(%rdi)
+ movq 344(%rsi), %r9
+ movq 344(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 336(%rdi)
+ movq 352(%rsi), %r8
+ movq 352(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 344(%rdi)
+ movq 360(%rsi), %r9
+ movq 360(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 352(%rdi)
+ movq 368(%rsi), %r8
+ movq 368(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 360(%rdi)
+ movq 376(%rsi), %r9
+ movq 376(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 368(%rdi)
+ movq %r9, 376(%rdi)
+ sbbq $0, %rax
+ addq $384, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_cond_sub_48,.-sp_3072_cond_sub_48
+#endif /* __APPLE__ */
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef __APPLE__
+.globl sp_3072_mont_reduce_48
+.type sp_3072_mont_reduce_48,@function
+.align 16
+sp_3072_mont_reduce_48:
+#else
+.globl _sp_3072_mont_reduce_48
+.p2align 4
+_sp_3072_mont_reduce_48:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ movq %rdx, %rcx
+ xorq %r15, %r15
+ # i = 48
+ movq $48, %r8
+ movq (%rdi), %r13
+ movq 8(%rdi), %r14
+L_mont_loop_48:
+ # mu = a[i] * mp
+ movq %r13, %r11
+ imulq %rcx, %r11
+ # a[i+0] += m[0] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq (%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r10
+ # a[i+1] += m[1] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 8(%rsi)
+ movq %r14, %r13
+ addq %rax, %r13
+ adcq %rdx, %r9
+ addq %r10, %r13
+ adcq $0, %r9
+ # a[i+2] += m[2] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 16(%rsi)
+ movq 16(%rdi), %r14
+ addq %rax, %r14
+ adcq %rdx, %r10
+ addq %r9, %r14
+ adcq $0, %r10
+ # a[i+3] += m[3] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 24(%rsi)
+ movq 24(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 24(%rdi)
+ adcq $0, %r9
+ # a[i+4] += m[4] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 32(%rsi)
+ movq 32(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 32(%rdi)
+ adcq $0, %r10
+ # a[i+5] += m[5] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 40(%rsi)
+ movq 40(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 40(%rdi)
+ adcq $0, %r9
+ # a[i+6] += m[6] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 48(%rsi)
+ movq 48(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 48(%rdi)
+ adcq $0, %r10
+ # a[i+7] += m[7] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 56(%rsi)
+ movq 56(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 56(%rdi)
+ adcq $0, %r9
+ # a[i+8] += m[8] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 64(%rsi)
+ movq 64(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 64(%rdi)
+ adcq $0, %r10
+ # a[i+9] += m[9] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 72(%rsi)
+ movq 72(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 72(%rdi)
+ adcq $0, %r9
+ # a[i+10] += m[10] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 80(%rsi)
+ movq 80(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 80(%rdi)
+ adcq $0, %r10
+ # a[i+11] += m[11] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 88(%rsi)
+ movq 88(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 88(%rdi)
+ adcq $0, %r9
+ # a[i+12] += m[12] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 96(%rsi)
+ movq 96(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 96(%rdi)
+ adcq $0, %r10
+ # a[i+13] += m[13] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 104(%rsi)
+ movq 104(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 104(%rdi)
+ adcq $0, %r9
+ # a[i+14] += m[14] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 112(%rsi)
+ movq 112(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 112(%rdi)
+ adcq $0, %r10
+ # a[i+15] += m[15] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 120(%rsi)
+ movq 120(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 120(%rdi)
+ adcq $0, %r9
+ # a[i+16] += m[16] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 128(%rsi)
+ movq 128(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 128(%rdi)
+ adcq $0, %r10
+ # a[i+17] += m[17] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 136(%rsi)
+ movq 136(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 136(%rdi)
+ adcq $0, %r9
+ # a[i+18] += m[18] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 144(%rsi)
+ movq 144(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 144(%rdi)
+ adcq $0, %r10
+ # a[i+19] += m[19] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 152(%rsi)
+ movq 152(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 152(%rdi)
+ adcq $0, %r9
+ # a[i+20] += m[20] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 160(%rsi)
+ movq 160(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 160(%rdi)
+ adcq $0, %r10
+ # a[i+21] += m[21] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 168(%rsi)
+ movq 168(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 168(%rdi)
+ adcq $0, %r9
+ # a[i+22] += m[22] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 176(%rsi)
+ movq 176(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 176(%rdi)
+ adcq $0, %r10
+ # a[i+23] += m[23] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 184(%rsi)
+ movq 184(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 184(%rdi)
+ adcq $0, %r9
+ # a[i+24] += m[24] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 192(%rsi)
+ movq 192(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 192(%rdi)
+ adcq $0, %r10
+ # a[i+25] += m[25] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 200(%rsi)
+ movq 200(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 200(%rdi)
+ adcq $0, %r9
+ # a[i+26] += m[26] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 208(%rsi)
+ movq 208(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 208(%rdi)
+ adcq $0, %r10
+ # a[i+27] += m[27] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 216(%rsi)
+ movq 216(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 216(%rdi)
+ adcq $0, %r9
+ # a[i+28] += m[28] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 224(%rsi)
+ movq 224(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 224(%rdi)
+ adcq $0, %r10
+ # a[i+29] += m[29] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 232(%rsi)
+ movq 232(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 232(%rdi)
+ adcq $0, %r9
+ # a[i+30] += m[30] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 240(%rsi)
+ movq 240(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 240(%rdi)
+ adcq $0, %r10
+ # a[i+31] += m[31] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 248(%rsi)
+ movq 248(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 248(%rdi)
+ adcq $0, %r9
+ # a[i+32] += m[32] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 256(%rsi)
+ movq 256(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 256(%rdi)
+ adcq $0, %r10
+ # a[i+33] += m[33] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 264(%rsi)
+ movq 264(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 264(%rdi)
+ adcq $0, %r9
+ # a[i+34] += m[34] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 272(%rsi)
+ movq 272(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 272(%rdi)
+ adcq $0, %r10
+ # a[i+35] += m[35] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 280(%rsi)
+ movq 280(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 280(%rdi)
+ adcq $0, %r9
+ # a[i+36] += m[36] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 288(%rsi)
+ movq 288(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 288(%rdi)
+ adcq $0, %r10
+ # a[i+37] += m[37] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 296(%rsi)
+ movq 296(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 296(%rdi)
+ adcq $0, %r9
+ # a[i+38] += m[38] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 304(%rsi)
+ movq 304(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 304(%rdi)
+ adcq $0, %r10
+ # a[i+39] += m[39] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 312(%rsi)
+ movq 312(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 312(%rdi)
+ adcq $0, %r9
+ # a[i+40] += m[40] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 320(%rsi)
+ movq 320(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 320(%rdi)
+ adcq $0, %r10
+ # a[i+41] += m[41] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 328(%rsi)
+ movq 328(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 328(%rdi)
+ adcq $0, %r9
+ # a[i+42] += m[42] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 336(%rsi)
+ movq 336(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 336(%rdi)
+ adcq $0, %r10
+ # a[i+43] += m[43] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 344(%rsi)
+ movq 344(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 344(%rdi)
+ adcq $0, %r9
+ # a[i+44] += m[44] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 352(%rsi)
+ movq 352(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 352(%rdi)
+ adcq $0, %r10
+ # a[i+45] += m[45] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 360(%rsi)
+ movq 360(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 360(%rdi)
+ adcq $0, %r9
+ # a[i+46] += m[46] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 368(%rsi)
+ movq 368(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 368(%rdi)
+ adcq $0, %r10
+ # a[i+47] += m[47] * mu
+ movq %r11, %rax
+ mulq 376(%rsi)
+ movq 376(%rdi), %r12
+ addq %rax, %r10
+ adcq %r15, %rdx
+ movq $0, %r15
+ adcq $0, %r15
+ addq %r10, %r12
+ movq %r12, 376(%rdi)
+ adcq %rdx, 384(%rdi)
+ adcq $0, %r15
+ # i -= 1
+ addq $8, %rdi
+ decq %r8
+ jnz L_mont_loop_48
+ movq %r13, (%rdi)
+ movq %r14, 8(%rdi)
+ negq %r15
+ movq %r15, %rcx
+ movq %rsi, %rdx
+ movq %rdi, %rsi
+ subq $384, %rdi
+#ifndef __APPLE__
+ callq sp_3072_cond_sub_48@plt
+#else
+ callq _sp_3072_cond_sub_48
+#endif /* __APPLE__ */
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_mont_reduce_48,.-sp_3072_mont_reduce_48
+#endif /* __APPLE__ */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_3072_cond_sub_avx2_48
+.type sp_3072_cond_sub_avx2_48,@function
+.align 16
+sp_3072_cond_sub_avx2_48:
+#else
+.globl _sp_3072_cond_sub_avx2_48
+.p2align 4
+_sp_3072_cond_sub_avx2_48:
+#endif /* __APPLE__ */
+ movq $0, %rax
+ movq (%rdx), %r10
+ movq (%rsi), %r8
+ pextq %rcx, %r10, %r10
+ subq %r10, %r8
+ movq 8(%rdx), %r10
+ movq 8(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, (%rdi)
+ sbbq %r10, %r9
+ movq 16(%rdx), %r8
+ movq 16(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 8(%rdi)
+ sbbq %r8, %r10
+ movq 24(%rdx), %r9
+ movq 24(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 16(%rdi)
+ sbbq %r9, %r8
+ movq 32(%rdx), %r10
+ movq 32(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 24(%rdi)
+ sbbq %r10, %r9
+ movq 40(%rdx), %r8
+ movq 40(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 32(%rdi)
+ sbbq %r8, %r10
+ movq 48(%rdx), %r9
+ movq 48(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 40(%rdi)
+ sbbq %r9, %r8
+ movq 56(%rdx), %r10
+ movq 56(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 48(%rdi)
+ sbbq %r10, %r9
+ movq 64(%rdx), %r8
+ movq 64(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 56(%rdi)
+ sbbq %r8, %r10
+ movq 72(%rdx), %r9
+ movq 72(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 64(%rdi)
+ sbbq %r9, %r8
+ movq 80(%rdx), %r10
+ movq 80(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 72(%rdi)
+ sbbq %r10, %r9
+ movq 88(%rdx), %r8
+ movq 88(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 80(%rdi)
+ sbbq %r8, %r10
+ movq 96(%rdx), %r9
+ movq 96(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 88(%rdi)
+ sbbq %r9, %r8
+ movq 104(%rdx), %r10
+ movq 104(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 96(%rdi)
+ sbbq %r10, %r9
+ movq 112(%rdx), %r8
+ movq 112(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 104(%rdi)
+ sbbq %r8, %r10
+ movq 120(%rdx), %r9
+ movq 120(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 112(%rdi)
+ sbbq %r9, %r8
+ movq 128(%rdx), %r10
+ movq 128(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 120(%rdi)
+ sbbq %r10, %r9
+ movq 136(%rdx), %r8
+ movq 136(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 128(%rdi)
+ sbbq %r8, %r10
+ movq 144(%rdx), %r9
+ movq 144(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 136(%rdi)
+ sbbq %r9, %r8
+ movq 152(%rdx), %r10
+ movq 152(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 144(%rdi)
+ sbbq %r10, %r9
+ movq 160(%rdx), %r8
+ movq 160(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 152(%rdi)
+ sbbq %r8, %r10
+ movq 168(%rdx), %r9
+ movq 168(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 160(%rdi)
+ sbbq %r9, %r8
+ movq 176(%rdx), %r10
+ movq 176(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 168(%rdi)
+ sbbq %r10, %r9
+ movq 184(%rdx), %r8
+ movq 184(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 176(%rdi)
+ sbbq %r8, %r10
+ movq 192(%rdx), %r9
+ movq 192(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 184(%rdi)
+ sbbq %r9, %r8
+ movq 200(%rdx), %r10
+ movq 200(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 192(%rdi)
+ sbbq %r10, %r9
+ movq 208(%rdx), %r8
+ movq 208(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 200(%rdi)
+ sbbq %r8, %r10
+ movq 216(%rdx), %r9
+ movq 216(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 208(%rdi)
+ sbbq %r9, %r8
+ movq 224(%rdx), %r10
+ movq 224(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 216(%rdi)
+ sbbq %r10, %r9
+ movq 232(%rdx), %r8
+ movq 232(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 224(%rdi)
+ sbbq %r8, %r10
+ movq 240(%rdx), %r9
+ movq 240(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 232(%rdi)
+ sbbq %r9, %r8
+ movq 248(%rdx), %r10
+ movq 248(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 240(%rdi)
+ sbbq %r10, %r9
+ movq 256(%rdx), %r8
+ movq 256(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 248(%rdi)
+ sbbq %r8, %r10
+ movq 264(%rdx), %r9
+ movq 264(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 256(%rdi)
+ sbbq %r9, %r8
+ movq 272(%rdx), %r10
+ movq 272(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 264(%rdi)
+ sbbq %r10, %r9
+ movq 280(%rdx), %r8
+ movq 280(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 272(%rdi)
+ sbbq %r8, %r10
+ movq 288(%rdx), %r9
+ movq 288(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 280(%rdi)
+ sbbq %r9, %r8
+ movq 296(%rdx), %r10
+ movq 296(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 288(%rdi)
+ sbbq %r10, %r9
+ movq 304(%rdx), %r8
+ movq 304(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 296(%rdi)
+ sbbq %r8, %r10
+ movq 312(%rdx), %r9
+ movq 312(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 304(%rdi)
+ sbbq %r9, %r8
+ movq 320(%rdx), %r10
+ movq 320(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 312(%rdi)
+ sbbq %r10, %r9
+ movq 328(%rdx), %r8
+ movq 328(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 320(%rdi)
+ sbbq %r8, %r10
+ movq 336(%rdx), %r9
+ movq 336(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 328(%rdi)
+ sbbq %r9, %r8
+ movq 344(%rdx), %r10
+ movq 344(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 336(%rdi)
+ sbbq %r10, %r9
+ movq 352(%rdx), %r8
+ movq 352(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 344(%rdi)
+ sbbq %r8, %r10
+ movq 360(%rdx), %r9
+ movq 360(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 352(%rdi)
+ sbbq %r9, %r8
+ movq 368(%rdx), %r10
+ movq 368(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 360(%rdi)
+ sbbq %r10, %r9
+ movq 376(%rdx), %r8
+ movq 376(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 368(%rdi)
+ sbbq %r8, %r10
+ movq %r10, 376(%rdi)
+ sbbq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_cond_sub_avx2_48,.-sp_3072_cond_sub_avx2_48
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX2
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+#ifndef __APPLE__
+.globl sp_3072_mul_d_avx2_48
+.type sp_3072_mul_d_avx2_48,@function
+.align 16
+sp_3072_mul_d_avx2_48:
+#else
+.globl _sp_3072_mul_d_avx2_48
+.p2align 4
+_sp_3072_mul_d_avx2_48:
+#endif /* __APPLE__ */
+ movq %rdx, %rax
+ # A[0] * B
+ movq %rax, %rdx
+ xorq %r11, %r11
+ mulxq (%rsi), %r9, %r10
+ movq %r9, (%rdi)
+ # A[1] * B
+ mulxq 8(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 8(%rdi)
+ adoxq %r8, %r9
+ # A[2] * B
+ mulxq 16(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 16(%rdi)
+ adoxq %r8, %r10
+ # A[3] * B
+ mulxq 24(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 24(%rdi)
+ adoxq %r8, %r9
+ # A[4] * B
+ mulxq 32(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 32(%rdi)
+ adoxq %r8, %r10
+ # A[5] * B
+ mulxq 40(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 40(%rdi)
+ adoxq %r8, %r9
+ # A[6] * B
+ mulxq 48(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 48(%rdi)
+ adoxq %r8, %r10
+ # A[7] * B
+ mulxq 56(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 56(%rdi)
+ adoxq %r8, %r9
+ # A[8] * B
+ mulxq 64(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 64(%rdi)
+ adoxq %r8, %r10
+ # A[9] * B
+ mulxq 72(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 72(%rdi)
+ adoxq %r8, %r9
+ # A[10] * B
+ mulxq 80(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 80(%rdi)
+ adoxq %r8, %r10
+ # A[11] * B
+ mulxq 88(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 88(%rdi)
+ adoxq %r8, %r9
+ # A[12] * B
+ mulxq 96(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 96(%rdi)
+ adoxq %r8, %r10
+ # A[13] * B
+ mulxq 104(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 104(%rdi)
+ adoxq %r8, %r9
+ # A[14] * B
+ mulxq 112(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 112(%rdi)
+ adoxq %r8, %r10
+ # A[15] * B
+ mulxq 120(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 120(%rdi)
+ adoxq %r8, %r9
+ # A[16] * B
+ mulxq 128(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 128(%rdi)
+ adoxq %r8, %r10
+ # A[17] * B
+ mulxq 136(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 136(%rdi)
+ adoxq %r8, %r9
+ # A[18] * B
+ mulxq 144(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 144(%rdi)
+ adoxq %r8, %r10
+ # A[19] * B
+ mulxq 152(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 152(%rdi)
+ adoxq %r8, %r9
+ # A[20] * B
+ mulxq 160(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 160(%rdi)
+ adoxq %r8, %r10
+ # A[21] * B
+ mulxq 168(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 168(%rdi)
+ adoxq %r8, %r9
+ # A[22] * B
+ mulxq 176(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 176(%rdi)
+ adoxq %r8, %r10
+ # A[23] * B
+ mulxq 184(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 184(%rdi)
+ adoxq %r8, %r9
+ # A[24] * B
+ mulxq 192(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 192(%rdi)
+ adoxq %r8, %r10
+ # A[25] * B
+ mulxq 200(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 200(%rdi)
+ adoxq %r8, %r9
+ # A[26] * B
+ mulxq 208(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 208(%rdi)
+ adoxq %r8, %r10
+ # A[27] * B
+ mulxq 216(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 216(%rdi)
+ adoxq %r8, %r9
+ # A[28] * B
+ mulxq 224(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 224(%rdi)
+ adoxq %r8, %r10
+ # A[29] * B
+ mulxq 232(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 232(%rdi)
+ adoxq %r8, %r9
+ # A[30] * B
+ mulxq 240(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 240(%rdi)
+ adoxq %r8, %r10
+ # A[31] * B
+ mulxq 248(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 248(%rdi)
+ adoxq %r8, %r9
+ # A[32] * B
+ mulxq 256(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 256(%rdi)
+ adoxq %r8, %r10
+ # A[33] * B
+ mulxq 264(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 264(%rdi)
+ adoxq %r8, %r9
+ # A[34] * B
+ mulxq 272(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 272(%rdi)
+ adoxq %r8, %r10
+ # A[35] * B
+ mulxq 280(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 280(%rdi)
+ adoxq %r8, %r9
+ # A[36] * B
+ mulxq 288(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 288(%rdi)
+ adoxq %r8, %r10
+ # A[37] * B
+ mulxq 296(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 296(%rdi)
+ adoxq %r8, %r9
+ # A[38] * B
+ mulxq 304(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 304(%rdi)
+ adoxq %r8, %r10
+ # A[39] * B
+ mulxq 312(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 312(%rdi)
+ adoxq %r8, %r9
+ # A[40] * B
+ mulxq 320(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 320(%rdi)
+ adoxq %r8, %r10
+ # A[41] * B
+ mulxq 328(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 328(%rdi)
+ adoxq %r8, %r9
+ # A[42] * B
+ mulxq 336(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 336(%rdi)
+ adoxq %r8, %r10
+ # A[43] * B
+ mulxq 344(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 344(%rdi)
+ adoxq %r8, %r9
+ # A[44] * B
+ mulxq 352(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 352(%rdi)
+ adoxq %r8, %r10
+ # A[45] * B
+ mulxq 360(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 360(%rdi)
+ adoxq %r8, %r9
+ # A[46] * B
+ mulxq 368(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 368(%rdi)
+ adoxq %r8, %r10
+ # A[47] * B
+ mulxq 376(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ adoxq %r8, %r9
+ adcxq %r11, %r9
+ movq %r10, 376(%rdi)
+ movq %r9, 384(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_mul_d_avx2_48,.-sp_3072_mul_d_avx2_48
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+#ifndef __APPLE__
+.globl sp_3072_cmp_48
+.type sp_3072_cmp_48,@function
+.align 16
+sp_3072_cmp_48:
+#else
+.globl _sp_3072_cmp_48
+.p2align 4
+_sp_3072_cmp_48:
+#endif /* __APPLE__ */
+ xorq %rcx, %rcx
+ movq $-1, %rdx
+ movq $-1, %rax
+ movq $1, %r8
+ movq 376(%rdi), %r9
+ movq 376(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 368(%rdi), %r9
+ movq 368(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 360(%rdi), %r9
+ movq 360(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 352(%rdi), %r9
+ movq 352(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 344(%rdi), %r9
+ movq 344(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 336(%rdi), %r9
+ movq 336(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 328(%rdi), %r9
+ movq 328(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 320(%rdi), %r9
+ movq 320(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 312(%rdi), %r9
+ movq 312(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 304(%rdi), %r9
+ movq 304(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 296(%rdi), %r9
+ movq 296(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 288(%rdi), %r9
+ movq 288(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 280(%rdi), %r9
+ movq 280(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 272(%rdi), %r9
+ movq 272(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 264(%rdi), %r9
+ movq 264(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 256(%rdi), %r9
+ movq 256(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 248(%rdi), %r9
+ movq 248(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 240(%rdi), %r9
+ movq 240(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 232(%rdi), %r9
+ movq 232(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 224(%rdi), %r9
+ movq 224(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 216(%rdi), %r9
+ movq 216(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 208(%rdi), %r9
+ movq 208(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 200(%rdi), %r9
+ movq 200(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 192(%rdi), %r9
+ movq 192(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 184(%rdi), %r9
+ movq 184(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 176(%rdi), %r9
+ movq 176(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 168(%rdi), %r9
+ movq 168(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 160(%rdi), %r9
+ movq 160(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 152(%rdi), %r9
+ movq 152(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 144(%rdi), %r9
+ movq 144(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 136(%rdi), %r9
+ movq 136(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 128(%rdi), %r9
+ movq 128(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 120(%rdi), %r9
+ movq 120(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 112(%rdi), %r9
+ movq 112(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 104(%rdi), %r9
+ movq 104(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 96(%rdi), %r9
+ movq 96(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 88(%rdi), %r9
+ movq 88(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 80(%rdi), %r9
+ movq 80(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 72(%rdi), %r9
+ movq 72(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 64(%rdi), %r9
+ movq 64(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 56(%rdi), %r9
+ movq 56(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 48(%rdi), %r9
+ movq 48(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 40(%rdi), %r9
+ movq 40(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 32(%rdi), %r9
+ movq 32(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 24(%rdi), %r9
+ movq 24(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 16(%rdi), %r9
+ movq 16(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 8(%rdi), %r9
+ movq 8(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq (%rdi), %r9
+ movq (%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ xorq %rdx, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_cmp_48,.-sp_3072_cmp_48
+#endif /* __APPLE__ */
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_3072_sub_48
+.type sp_3072_sub_48,@function
+.align 16
+sp_3072_sub_48:
+#else
+.globl _sp_3072_sub_48
+.p2align 4
+_sp_3072_sub_48:
+#endif /* __APPLE__ */
+ movq (%rsi), %rcx
+ xorq %rax, %rax
+ subq (%rdx), %rcx
+ movq 8(%rsi), %r8
+ movq %rcx, (%rdi)
+ sbbq 8(%rdx), %r8
+ movq 16(%rsi), %rcx
+ movq %r8, 8(%rdi)
+ sbbq 16(%rdx), %rcx
+ movq 24(%rsi), %r8
+ movq %rcx, 16(%rdi)
+ sbbq 24(%rdx), %r8
+ movq 32(%rsi), %rcx
+ movq %r8, 24(%rdi)
+ sbbq 32(%rdx), %rcx
+ movq 40(%rsi), %r8
+ movq %rcx, 32(%rdi)
+ sbbq 40(%rdx), %r8
+ movq 48(%rsi), %rcx
+ movq %r8, 40(%rdi)
+ sbbq 48(%rdx), %rcx
+ movq 56(%rsi), %r8
+ movq %rcx, 48(%rdi)
+ sbbq 56(%rdx), %r8
+ movq 64(%rsi), %rcx
+ movq %r8, 56(%rdi)
+ sbbq 64(%rdx), %rcx
+ movq 72(%rsi), %r8
+ movq %rcx, 64(%rdi)
+ sbbq 72(%rdx), %r8
+ movq 80(%rsi), %rcx
+ movq %r8, 72(%rdi)
+ sbbq 80(%rdx), %rcx
+ movq 88(%rsi), %r8
+ movq %rcx, 80(%rdi)
+ sbbq 88(%rdx), %r8
+ movq 96(%rsi), %rcx
+ movq %r8, 88(%rdi)
+ sbbq 96(%rdx), %rcx
+ movq 104(%rsi), %r8
+ movq %rcx, 96(%rdi)
+ sbbq 104(%rdx), %r8
+ movq 112(%rsi), %rcx
+ movq %r8, 104(%rdi)
+ sbbq 112(%rdx), %rcx
+ movq 120(%rsi), %r8
+ movq %rcx, 112(%rdi)
+ sbbq 120(%rdx), %r8
+ movq 128(%rsi), %rcx
+ movq %r8, 120(%rdi)
+ sbbq 128(%rdx), %rcx
+ movq 136(%rsi), %r8
+ movq %rcx, 128(%rdi)
+ sbbq 136(%rdx), %r8
+ movq 144(%rsi), %rcx
+ movq %r8, 136(%rdi)
+ sbbq 144(%rdx), %rcx
+ movq 152(%rsi), %r8
+ movq %rcx, 144(%rdi)
+ sbbq 152(%rdx), %r8
+ movq 160(%rsi), %rcx
+ movq %r8, 152(%rdi)
+ sbbq 160(%rdx), %rcx
+ movq 168(%rsi), %r8
+ movq %rcx, 160(%rdi)
+ sbbq 168(%rdx), %r8
+ movq 176(%rsi), %rcx
+ movq %r8, 168(%rdi)
+ sbbq 176(%rdx), %rcx
+ movq 184(%rsi), %r8
+ movq %rcx, 176(%rdi)
+ sbbq 184(%rdx), %r8
+ movq 192(%rsi), %rcx
+ movq %r8, 184(%rdi)
+ sbbq 192(%rdx), %rcx
+ movq 200(%rsi), %r8
+ movq %rcx, 192(%rdi)
+ sbbq 200(%rdx), %r8
+ movq 208(%rsi), %rcx
+ movq %r8, 200(%rdi)
+ sbbq 208(%rdx), %rcx
+ movq 216(%rsi), %r8
+ movq %rcx, 208(%rdi)
+ sbbq 216(%rdx), %r8
+ movq 224(%rsi), %rcx
+ movq %r8, 216(%rdi)
+ sbbq 224(%rdx), %rcx
+ movq 232(%rsi), %r8
+ movq %rcx, 224(%rdi)
+ sbbq 232(%rdx), %r8
+ movq 240(%rsi), %rcx
+ movq %r8, 232(%rdi)
+ sbbq 240(%rdx), %rcx
+ movq 248(%rsi), %r8
+ movq %rcx, 240(%rdi)
+ sbbq 248(%rdx), %r8
+ movq 256(%rsi), %rcx
+ movq %r8, 248(%rdi)
+ sbbq 256(%rdx), %rcx
+ movq 264(%rsi), %r8
+ movq %rcx, 256(%rdi)
+ sbbq 264(%rdx), %r8
+ movq 272(%rsi), %rcx
+ movq %r8, 264(%rdi)
+ sbbq 272(%rdx), %rcx
+ movq 280(%rsi), %r8
+ movq %rcx, 272(%rdi)
+ sbbq 280(%rdx), %r8
+ movq 288(%rsi), %rcx
+ movq %r8, 280(%rdi)
+ sbbq 288(%rdx), %rcx
+ movq 296(%rsi), %r8
+ movq %rcx, 288(%rdi)
+ sbbq 296(%rdx), %r8
+ movq 304(%rsi), %rcx
+ movq %r8, 296(%rdi)
+ sbbq 304(%rdx), %rcx
+ movq 312(%rsi), %r8
+ movq %rcx, 304(%rdi)
+ sbbq 312(%rdx), %r8
+ movq 320(%rsi), %rcx
+ movq %r8, 312(%rdi)
+ sbbq 320(%rdx), %rcx
+ movq 328(%rsi), %r8
+ movq %rcx, 320(%rdi)
+ sbbq 328(%rdx), %r8
+ movq 336(%rsi), %rcx
+ movq %r8, 328(%rdi)
+ sbbq 336(%rdx), %rcx
+ movq 344(%rsi), %r8
+ movq %rcx, 336(%rdi)
+ sbbq 344(%rdx), %r8
+ movq 352(%rsi), %rcx
+ movq %r8, 344(%rdi)
+ sbbq 352(%rdx), %rcx
+ movq 360(%rsi), %r8
+ movq %rcx, 352(%rdi)
+ sbbq 360(%rdx), %r8
+ movq 368(%rsi), %rcx
+ movq %r8, 360(%rdi)
+ sbbq 368(%rdx), %rcx
+ movq 376(%rsi), %r8
+ movq %rcx, 368(%rdi)
+ sbbq 376(%rdx), %r8
+ movq %r8, 376(%rdi)
+ sbbq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_sub_48,.-sp_3072_sub_48
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX2
+/* Reduce the number back to 3072 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef __APPLE__
+.globl sp_3072_mont_reduce_avx2_48
+.type sp_3072_mont_reduce_avx2_48,@function
+.align 16
+sp_3072_mont_reduce_avx2_48:
+#else
+.globl _sp_3072_mont_reduce_avx2_48
+.p2align 4
+_sp_3072_mont_reduce_avx2_48:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ movq %rdx, %r8
+ xorq %r14, %r14
+ # i = 48
+ movq $48, %r9
+ movq (%rdi), %r13
+ addq $192, %rdi
+ xorq %r12, %r12
+L_mont_loop_avx2_48:
+ # mu = a[i] * mp
+ movq %r13, %rdx
+ movq %r13, %r10
+ imulq %r8, %rdx
+ xorq %r12, %r12
+ # a[i+0] += m[0] * mu
+ mulxq (%rsi), %rax, %rcx
+ movq -184(%rdi), %r13
+ adcxq %rax, %r10
+ adoxq %rcx, %r13
+ # a[i+1] += m[1] * mu
+ mulxq 8(%rsi), %rax, %rcx
+ movq -176(%rdi), %r10
+ adcxq %rax, %r13
+ adoxq %rcx, %r10
+ # a[i+2] += m[2] * mu
+ mulxq 16(%rsi), %rax, %rcx
+ movq -168(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -176(%rdi)
+ # a[i+3] += m[3] * mu
+ mulxq 24(%rsi), %rax, %rcx
+ movq -160(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -168(%rdi)
+ # a[i+4] += m[4] * mu
+ mulxq 32(%rsi), %rax, %rcx
+ movq -152(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -160(%rdi)
+ # a[i+5] += m[5] * mu
+ mulxq 40(%rsi), %rax, %rcx
+ movq -144(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -152(%rdi)
+ # a[i+6] += m[6] * mu
+ mulxq 48(%rsi), %rax, %rcx
+ movq -136(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -144(%rdi)
+ # a[i+7] += m[7] * mu
+ mulxq 56(%rsi), %rax, %rcx
+ movq -128(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -136(%rdi)
+ # a[i+8] += m[8] * mu
+ mulxq 64(%rsi), %rax, %rcx
+ movq -120(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -128(%rdi)
+ # a[i+9] += m[9] * mu
+ mulxq 72(%rsi), %rax, %rcx
+ movq -112(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -120(%rdi)
+ # a[i+10] += m[10] * mu
+ mulxq 80(%rsi), %rax, %rcx
+ movq -104(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -112(%rdi)
+ # a[i+11] += m[11] * mu
+ mulxq 88(%rsi), %rax, %rcx
+ movq -96(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -104(%rdi)
+ # a[i+12] += m[12] * mu
+ mulxq 96(%rsi), %rax, %rcx
+ movq -88(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -96(%rdi)
+ # a[i+13] += m[13] * mu
+ mulxq 104(%rsi), %rax, %rcx
+ movq -80(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -88(%rdi)
+ # a[i+14] += m[14] * mu
+ mulxq 112(%rsi), %rax, %rcx
+ movq -72(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -80(%rdi)
+ # a[i+15] += m[15] * mu
+ mulxq 120(%rsi), %rax, %rcx
+ movq -64(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -72(%rdi)
+ # a[i+16] += m[16] * mu
+ mulxq 128(%rsi), %rax, %rcx
+ movq -56(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -64(%rdi)
+ # a[i+17] += m[17] * mu
+ mulxq 136(%rsi), %rax, %rcx
+ movq -48(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -56(%rdi)
+ # a[i+18] += m[18] * mu
+ mulxq 144(%rsi), %rax, %rcx
+ movq -40(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -48(%rdi)
+ # a[i+19] += m[19] * mu
+ mulxq 152(%rsi), %rax, %rcx
+ movq -32(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -40(%rdi)
+ # a[i+20] += m[20] * mu
+ mulxq 160(%rsi), %rax, %rcx
+ movq -24(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -32(%rdi)
+ # a[i+21] += m[21] * mu
+ mulxq 168(%rsi), %rax, %rcx
+ movq -16(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -24(%rdi)
+ # a[i+22] += m[22] * mu
+ mulxq 176(%rsi), %rax, %rcx
+ movq -8(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -16(%rdi)
+ # a[i+23] += m[23] * mu
+ mulxq 184(%rsi), %rax, %rcx
+ movq (%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -8(%rdi)
+ # a[i+24] += m[24] * mu
+ mulxq 192(%rsi), %rax, %rcx
+ movq 8(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, (%rdi)
+ # a[i+25] += m[25] * mu
+ mulxq 200(%rsi), %rax, %rcx
+ movq 16(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 8(%rdi)
+ # a[i+26] += m[26] * mu
+ mulxq 208(%rsi), %rax, %rcx
+ movq 24(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 16(%rdi)
+ # a[i+27] += m[27] * mu
+ mulxq 216(%rsi), %rax, %rcx
+ movq 32(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 24(%rdi)
+ # a[i+28] += m[28] * mu
+ mulxq 224(%rsi), %rax, %rcx
+ movq 40(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 32(%rdi)
+ # a[i+29] += m[29] * mu
+ mulxq 232(%rsi), %rax, %rcx
+ movq 48(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 40(%rdi)
+ # a[i+30] += m[30] * mu
+ mulxq 240(%rsi), %rax, %rcx
+ movq 56(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 48(%rdi)
+ # a[i+31] += m[31] * mu
+ mulxq 248(%rsi), %rax, %rcx
+ movq 64(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 56(%rdi)
+ # a[i+32] += m[32] * mu
+ mulxq 256(%rsi), %rax, %rcx
+ movq 72(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 64(%rdi)
+ # a[i+33] += m[33] * mu
+ mulxq 264(%rsi), %rax, %rcx
+ movq 80(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 72(%rdi)
+ # a[i+34] += m[34] * mu
+ mulxq 272(%rsi), %rax, %rcx
+ movq 88(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 80(%rdi)
+ # a[i+35] += m[35] * mu
+ mulxq 280(%rsi), %rax, %rcx
+ movq 96(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 88(%rdi)
+ # a[i+36] += m[36] * mu
+ mulxq 288(%rsi), %rax, %rcx
+ movq 104(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 96(%rdi)
+ # a[i+37] += m[37] * mu
+ mulxq 296(%rsi), %rax, %rcx
+ movq 112(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 104(%rdi)
+ # a[i+38] += m[38] * mu
+ mulxq 304(%rsi), %rax, %rcx
+ movq 120(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 112(%rdi)
+ # a[i+39] += m[39] * mu
+ mulxq 312(%rsi), %rax, %rcx
+ movq 128(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 120(%rdi)
+ # a[i+40] += m[40] * mu
+ mulxq 320(%rsi), %rax, %rcx
+ movq 136(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 128(%rdi)
+ # a[i+41] += m[41] * mu
+ mulxq 328(%rsi), %rax, %rcx
+ movq 144(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 136(%rdi)
+ # a[i+42] += m[42] * mu
+ mulxq 336(%rsi), %rax, %rcx
+ movq 152(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 144(%rdi)
+ # a[i+43] += m[43] * mu
+ mulxq 344(%rsi), %rax, %rcx
+ movq 160(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 152(%rdi)
+ # a[i+44] += m[44] * mu
+ mulxq 352(%rsi), %rax, %rcx
+ movq 168(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 160(%rdi)
+ # a[i+45] += m[45] * mu
+ mulxq 360(%rsi), %rax, %rcx
+ movq 176(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 168(%rdi)
+ # a[i+46] += m[46] * mu
+ mulxq 368(%rsi), %rax, %rcx
+ movq 184(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 176(%rdi)
+ # a[i+47] += m[47] * mu
+ mulxq 376(%rsi), %rax, %rcx
+ movq 192(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 184(%rdi)
+ adcxq %r14, %r10
+ movq %r10, 192(%rdi)
+ movq %r12, %r14
+ adoxq %r12, %r14
+ adcxq %r12, %r14
+ # a += 1
+ addq $8, %rdi
+ # i -= 1
+ subq $1, %r9
+ jnz L_mont_loop_avx2_48
+ subq $192, %rdi
+ negq %r14
+ movq %rdi, %r8
+ subq $384, %rdi
+ movq (%rsi), %rcx
+ movq %r13, %rdx
+ pextq %r14, %rcx, %rcx
+ subq %rcx, %rdx
+ movq 8(%rsi), %rcx
+ movq 8(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, (%rdi)
+ sbbq %rcx, %rax
+ movq 16(%rsi), %rdx
+ movq 16(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 8(%rdi)
+ sbbq %rdx, %rcx
+ movq 24(%rsi), %rax
+ movq 24(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 16(%rdi)
+ sbbq %rax, %rdx
+ movq 32(%rsi), %rcx
+ movq 32(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 24(%rdi)
+ sbbq %rcx, %rax
+ movq 40(%rsi), %rdx
+ movq 40(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 32(%rdi)
+ sbbq %rdx, %rcx
+ movq 48(%rsi), %rax
+ movq 48(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 40(%rdi)
+ sbbq %rax, %rdx
+ movq 56(%rsi), %rcx
+ movq 56(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 48(%rdi)
+ sbbq %rcx, %rax
+ movq 64(%rsi), %rdx
+ movq 64(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 56(%rdi)
+ sbbq %rdx, %rcx
+ movq 72(%rsi), %rax
+ movq 72(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 64(%rdi)
+ sbbq %rax, %rdx
+ movq 80(%rsi), %rcx
+ movq 80(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 72(%rdi)
+ sbbq %rcx, %rax
+ movq 88(%rsi), %rdx
+ movq 88(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 80(%rdi)
+ sbbq %rdx, %rcx
+ movq 96(%rsi), %rax
+ movq 96(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 88(%rdi)
+ sbbq %rax, %rdx
+ movq 104(%rsi), %rcx
+ movq 104(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 96(%rdi)
+ sbbq %rcx, %rax
+ movq 112(%rsi), %rdx
+ movq 112(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 104(%rdi)
+ sbbq %rdx, %rcx
+ movq 120(%rsi), %rax
+ movq 120(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 112(%rdi)
+ sbbq %rax, %rdx
+ movq 128(%rsi), %rcx
+ movq 128(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 120(%rdi)
+ sbbq %rcx, %rax
+ movq 136(%rsi), %rdx
+ movq 136(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 128(%rdi)
+ sbbq %rdx, %rcx
+ movq 144(%rsi), %rax
+ movq 144(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 136(%rdi)
+ sbbq %rax, %rdx
+ movq 152(%rsi), %rcx
+ movq 152(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 144(%rdi)
+ sbbq %rcx, %rax
+ movq 160(%rsi), %rdx
+ movq 160(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 152(%rdi)
+ sbbq %rdx, %rcx
+ movq 168(%rsi), %rax
+ movq 168(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 160(%rdi)
+ sbbq %rax, %rdx
+ movq 176(%rsi), %rcx
+ movq 176(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 168(%rdi)
+ sbbq %rcx, %rax
+ movq 184(%rsi), %rdx
+ movq 184(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 176(%rdi)
+ sbbq %rdx, %rcx
+ movq 192(%rsi), %rax
+ movq 192(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 184(%rdi)
+ sbbq %rax, %rdx
+ movq 200(%rsi), %rcx
+ movq 200(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 192(%rdi)
+ sbbq %rcx, %rax
+ movq 208(%rsi), %rdx
+ movq 208(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 200(%rdi)
+ sbbq %rdx, %rcx
+ movq 216(%rsi), %rax
+ movq 216(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 208(%rdi)
+ sbbq %rax, %rdx
+ movq 224(%rsi), %rcx
+ movq 224(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 216(%rdi)
+ sbbq %rcx, %rax
+ movq 232(%rsi), %rdx
+ movq 232(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 224(%rdi)
+ sbbq %rdx, %rcx
+ movq 240(%rsi), %rax
+ movq 240(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 232(%rdi)
+ sbbq %rax, %rdx
+ movq 248(%rsi), %rcx
+ movq 248(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 240(%rdi)
+ sbbq %rcx, %rax
+ movq 256(%rsi), %rdx
+ movq 256(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 248(%rdi)
+ sbbq %rdx, %rcx
+ movq 264(%rsi), %rax
+ movq 264(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 256(%rdi)
+ sbbq %rax, %rdx
+ movq 272(%rsi), %rcx
+ movq 272(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 264(%rdi)
+ sbbq %rcx, %rax
+ movq 280(%rsi), %rdx
+ movq 280(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 272(%rdi)
+ sbbq %rdx, %rcx
+ movq 288(%rsi), %rax
+ movq 288(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 280(%rdi)
+ sbbq %rax, %rdx
+ movq 296(%rsi), %rcx
+ movq 296(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 288(%rdi)
+ sbbq %rcx, %rax
+ movq 304(%rsi), %rdx
+ movq 304(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 296(%rdi)
+ sbbq %rdx, %rcx
+ movq 312(%rsi), %rax
+ movq 312(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 304(%rdi)
+ sbbq %rax, %rdx
+ movq 320(%rsi), %rcx
+ movq 320(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 312(%rdi)
+ sbbq %rcx, %rax
+ movq 328(%rsi), %rdx
+ movq 328(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 320(%rdi)
+ sbbq %rdx, %rcx
+ movq 336(%rsi), %rax
+ movq 336(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 328(%rdi)
+ sbbq %rax, %rdx
+ movq 344(%rsi), %rcx
+ movq 344(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 336(%rdi)
+ sbbq %rcx, %rax
+ movq 352(%rsi), %rdx
+ movq 352(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 344(%rdi)
+ sbbq %rdx, %rcx
+ movq 360(%rsi), %rax
+ movq 360(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 352(%rdi)
+ sbbq %rax, %rdx
+ movq 368(%rsi), %rcx
+ movq 368(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 360(%rdi)
+ sbbq %rcx, %rax
+ movq 376(%rsi), %rdx
+ movq 376(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 368(%rdi)
+ sbbq %rdx, %rcx
+ movq %rcx, 376(%rdi)
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_mont_reduce_avx2_48,.-sp_3072_mont_reduce_avx2_48
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_3072_cond_add_24
+.type sp_3072_cond_add_24,@function
+.align 16
+sp_3072_cond_add_24:
+#else
+.globl _sp_3072_cond_add_24
+.p2align 4
+_sp_3072_cond_add_24:
+#endif /* __APPLE__ */
+ subq $192, %rsp
+ movq $0, %rax
+ movq (%rdx), %r8
+ movq 8(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, (%rsp)
+ movq %r9, 8(%rsp)
+ movq 16(%rdx), %r8
+ movq 24(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 16(%rsp)
+ movq %r9, 24(%rsp)
+ movq 32(%rdx), %r8
+ movq 40(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq 48(%rdx), %r8
+ movq 56(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 48(%rsp)
+ movq %r9, 56(%rsp)
+ movq 64(%rdx), %r8
+ movq 72(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 64(%rsp)
+ movq %r9, 72(%rsp)
+ movq 80(%rdx), %r8
+ movq 88(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 80(%rsp)
+ movq %r9, 88(%rsp)
+ movq 96(%rdx), %r8
+ movq 104(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 96(%rsp)
+ movq %r9, 104(%rsp)
+ movq 112(%rdx), %r8
+ movq 120(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 112(%rsp)
+ movq %r9, 120(%rsp)
+ movq 128(%rdx), %r8
+ movq 136(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 128(%rsp)
+ movq %r9, 136(%rsp)
+ movq 144(%rdx), %r8
+ movq 152(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 144(%rsp)
+ movq %r9, 152(%rsp)
+ movq 160(%rdx), %r8
+ movq 168(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 160(%rsp)
+ movq %r9, 168(%rsp)
+ movq 176(%rdx), %r8
+ movq 184(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 176(%rsp)
+ movq %r9, 184(%rsp)
+ movq (%rsi), %r8
+ movq (%rsp), %rdx
+ addq %rdx, %r8
+ movq 8(%rsi), %r9
+ movq 8(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, (%rdi)
+ movq 16(%rsi), %r8
+ movq 16(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 8(%rdi)
+ movq 24(%rsi), %r9
+ movq 24(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 16(%rdi)
+ movq 32(%rsi), %r8
+ movq 32(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 24(%rdi)
+ movq 40(%rsi), %r9
+ movq 40(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 32(%rdi)
+ movq 48(%rsi), %r8
+ movq 48(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 40(%rdi)
+ movq 56(%rsi), %r9
+ movq 56(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 48(%rdi)
+ movq 64(%rsi), %r8
+ movq 64(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 56(%rdi)
+ movq 72(%rsi), %r9
+ movq 72(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 64(%rdi)
+ movq 80(%rsi), %r8
+ movq 80(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 72(%rdi)
+ movq 88(%rsi), %r9
+ movq 88(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 80(%rdi)
+ movq 96(%rsi), %r8
+ movq 96(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 88(%rdi)
+ movq 104(%rsi), %r9
+ movq 104(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 96(%rdi)
+ movq 112(%rsi), %r8
+ movq 112(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 104(%rdi)
+ movq 120(%rsi), %r9
+ movq 120(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 112(%rdi)
+ movq 128(%rsi), %r8
+ movq 128(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 120(%rdi)
+ movq 136(%rsi), %r9
+ movq 136(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 128(%rdi)
+ movq 144(%rsi), %r8
+ movq 144(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 136(%rdi)
+ movq 152(%rsi), %r9
+ movq 152(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 144(%rdi)
+ movq 160(%rsi), %r8
+ movq 160(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 152(%rdi)
+ movq 168(%rsi), %r9
+ movq 168(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 160(%rdi)
+ movq 176(%rsi), %r8
+ movq 176(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 168(%rdi)
+ movq 184(%rsi), %r9
+ movq 184(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 176(%rdi)
+ movq %r9, 184(%rdi)
+ adcq $0, %rax
+ addq $192, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_cond_add_24,.-sp_3072_cond_add_24
+#endif /* __APPLE__ */
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_3072_cond_add_avx2_24
+.type sp_3072_cond_add_avx2_24,@function
+.align 16
+sp_3072_cond_add_avx2_24:
+#else
+.globl _sp_3072_cond_add_avx2_24
+.p2align 4
+_sp_3072_cond_add_avx2_24:
+#endif /* __APPLE__ */
+ movq $0, %rax
+ movq (%rdx), %r10
+ movq (%rsi), %r8
+ pextq %rcx, %r10, %r10
+ addq %r10, %r8
+ movq 8(%rdx), %r10
+ movq 8(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, (%rdi)
+ adcq %r10, %r9
+ movq 16(%rdx), %r8
+ movq 16(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 8(%rdi)
+ adcq %r8, %r10
+ movq 24(%rdx), %r9
+ movq 24(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 16(%rdi)
+ adcq %r9, %r8
+ movq 32(%rdx), %r10
+ movq 32(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 24(%rdi)
+ adcq %r10, %r9
+ movq 40(%rdx), %r8
+ movq 40(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 32(%rdi)
+ adcq %r8, %r10
+ movq 48(%rdx), %r9
+ movq 48(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 40(%rdi)
+ adcq %r9, %r8
+ movq 56(%rdx), %r10
+ movq 56(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 48(%rdi)
+ adcq %r10, %r9
+ movq 64(%rdx), %r8
+ movq 64(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 56(%rdi)
+ adcq %r8, %r10
+ movq 72(%rdx), %r9
+ movq 72(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 64(%rdi)
+ adcq %r9, %r8
+ movq 80(%rdx), %r10
+ movq 80(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 72(%rdi)
+ adcq %r10, %r9
+ movq 88(%rdx), %r8
+ movq 88(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 80(%rdi)
+ adcq %r8, %r10
+ movq 96(%rdx), %r9
+ movq 96(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 88(%rdi)
+ adcq %r9, %r8
+ movq 104(%rdx), %r10
+ movq 104(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 96(%rdi)
+ adcq %r10, %r9
+ movq 112(%rdx), %r8
+ movq 112(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 104(%rdi)
+ adcq %r8, %r10
+ movq 120(%rdx), %r9
+ movq 120(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 112(%rdi)
+ adcq %r9, %r8
+ movq 128(%rdx), %r10
+ movq 128(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 120(%rdi)
+ adcq %r10, %r9
+ movq 136(%rdx), %r8
+ movq 136(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 128(%rdi)
+ adcq %r8, %r10
+ movq 144(%rdx), %r9
+ movq 144(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 136(%rdi)
+ adcq %r9, %r8
+ movq 152(%rdx), %r10
+ movq 152(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 144(%rdi)
+ adcq %r10, %r9
+ movq 160(%rdx), %r8
+ movq 160(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 152(%rdi)
+ adcq %r8, %r10
+ movq 168(%rdx), %r9
+ movq 168(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 160(%rdi)
+ adcq %r9, %r8
+ movq 176(%rdx), %r10
+ movq 176(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 168(%rdi)
+ adcq %r10, %r9
+ movq 184(%rdx), %r8
+ movq 184(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 176(%rdi)
+ adcq %r8, %r10
+ movq %r10, 184(%rdi)
+ adcq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_3072_cond_add_avx2_24,.-sp_3072_cond_add_avx2_24
+#endif /* __APPLE__ */
+/* Shift number left by n bit. (r = a << n)
+ *
+ * r Result of left shift by n.
+ * a Number to shift.
+ * n Amoutnt o shift.
+ */
+#ifndef __APPLE__
+.globl sp_3072_lshift_48
+.type sp_3072_lshift_48,@function
+.align 16
+sp_3072_lshift_48:
+#else
+.globl _sp_3072_lshift_48
+.p2align 4
+_sp_3072_lshift_48:
+#endif /* __APPLE__ */
+ movq %rdx, %rcx
+ movq $0, %r10
+ movq 344(%rsi), %r11
+ movq 352(%rsi), %rdx
+ movq 360(%rsi), %rax
+ movq 368(%rsi), %r8
+ movq 376(%rsi), %r9
+ shldq %cl, %r9, %r10
+ shldq %cl, %r8, %r9
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r11, %rdx
+ movq %rdx, 352(%rdi)
+ movq %rax, 360(%rdi)
+ movq %r8, 368(%rdi)
+ movq %r9, 376(%rdi)
+ movq %r10, 384(%rdi)
+ movq 312(%rsi), %r9
+ movq 320(%rsi), %rdx
+ movq 328(%rsi), %rax
+ movq 336(%rsi), %r8
+ shldq %cl, %r8, %r11
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r9, %rdx
+ movq %rdx, 320(%rdi)
+ movq %rax, 328(%rdi)
+ movq %r8, 336(%rdi)
+ movq %r11, 344(%rdi)
+ movq 280(%rsi), %r11
+ movq 288(%rsi), %rdx
+ movq 296(%rsi), %rax
+ movq 304(%rsi), %r8
+ shldq %cl, %r8, %r9
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r11, %rdx
+ movq %rdx, 288(%rdi)
+ movq %rax, 296(%rdi)
+ movq %r8, 304(%rdi)
+ movq %r9, 312(%rdi)
+ movq 248(%rsi), %r9
+ movq 256(%rsi), %rdx
+ movq 264(%rsi), %rax
+ movq 272(%rsi), %r8
+ shldq %cl, %r8, %r11
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r9, %rdx
+ movq %rdx, 256(%rdi)
+ movq %rax, 264(%rdi)
+ movq %r8, 272(%rdi)
+ movq %r11, 280(%rdi)
+ movq 216(%rsi), %r11
+ movq 224(%rsi), %rdx
+ movq 232(%rsi), %rax
+ movq 240(%rsi), %r8
+ shldq %cl, %r8, %r9
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r11, %rdx
+ movq %rdx, 224(%rdi)
+ movq %rax, 232(%rdi)
+ movq %r8, 240(%rdi)
+ movq %r9, 248(%rdi)
+ movq 184(%rsi), %r9
+ movq 192(%rsi), %rdx
+ movq 200(%rsi), %rax
+ movq 208(%rsi), %r8
+ shldq %cl, %r8, %r11
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r9, %rdx
+ movq %rdx, 192(%rdi)
+ movq %rax, 200(%rdi)
+ movq %r8, 208(%rdi)
+ movq %r11, 216(%rdi)
+ movq 152(%rsi), %r11
+ movq 160(%rsi), %rdx
+ movq 168(%rsi), %rax
+ movq 176(%rsi), %r8
+ shldq %cl, %r8, %r9
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r11, %rdx
+ movq %rdx, 160(%rdi)
+ movq %rax, 168(%rdi)
+ movq %r8, 176(%rdi)
+ movq %r9, 184(%rdi)
+ movq 120(%rsi), %r9
+ movq 128(%rsi), %rdx
+ movq 136(%rsi), %rax
+ movq 144(%rsi), %r8
+ shldq %cl, %r8, %r11
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r9, %rdx
+ movq %rdx, 128(%rdi)
+ movq %rax, 136(%rdi)
+ movq %r8, 144(%rdi)
+ movq %r11, 152(%rdi)
+ movq 88(%rsi), %r11
+ movq 96(%rsi), %rdx
+ movq 104(%rsi), %rax
+ movq 112(%rsi), %r8
+ shldq %cl, %r8, %r9
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r11, %rdx
+ movq %rdx, 96(%rdi)
+ movq %rax, 104(%rdi)
+ movq %r8, 112(%rdi)
+ movq %r9, 120(%rdi)
+ movq 56(%rsi), %r9
+ movq 64(%rsi), %rdx
+ movq 72(%rsi), %rax
+ movq 80(%rsi), %r8
+ shldq %cl, %r8, %r11
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r9, %rdx
+ movq %rdx, 64(%rdi)
+ movq %rax, 72(%rdi)
+ movq %r8, 80(%rdi)
+ movq %r11, 88(%rdi)
+ movq 24(%rsi), %r11
+ movq 32(%rsi), %rdx
+ movq 40(%rsi), %rax
+ movq 48(%rsi), %r8
+ shldq %cl, %r8, %r9
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r11, %rdx
+ movq %rdx, 32(%rdi)
+ movq %rax, 40(%rdi)
+ movq %r8, 48(%rdi)
+ movq %r9, 56(%rdi)
+ movq (%rsi), %rdx
+ movq 8(%rsi), %rax
+ movq 16(%rsi), %r8
+ shldq %cl, %r8, %r11
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shlq %cl, %rdx
+ movq %rdx, (%rdi)
+ movq %rax, 8(%rdi)
+ movq %r8, 16(%rdi)
+ movq %r11, 24(%rdi)
+ repz retq
+#endif /* !WOLFSSL_SP_NO_3072 */
+#endif /* !WOLFSSL_SP_NO_3072 */
+#ifdef WOLFSSL_SP_4096
+#ifdef WOLFSSL_SP_4096
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+#ifndef __APPLE__
+.globl sp_4096_from_bin
+.type sp_4096_from_bin,@function
+.align 16
+sp_4096_from_bin:
+#else
+.globl _sp_4096_from_bin
+.p2align 4
+_sp_4096_from_bin:
+#endif /* __APPLE__ */
+ movq %rdx, %r9
+ movq %rdi, %r10
+ addq %rcx, %r9
+ addq $512, %r10
+ xorq %r11, %r11
+ jmp L_4096_from_bin_64_end
+L_4096_from_bin_64_start:
+ subq $64, %r9
+ movbeq 56(%r9), %rax
+ movbeq 48(%r9), %r8
+ movq %rax, (%rdi)
+ movq %r8, 8(%rdi)
+ movbeq 40(%r9), %rax
+ movbeq 32(%r9), %r8
+ movq %rax, 16(%rdi)
+ movq %r8, 24(%rdi)
+ movbeq 24(%r9), %rax
+ movbeq 16(%r9), %r8
+ movq %rax, 32(%rdi)
+ movq %r8, 40(%rdi)
+ movbeq 8(%r9), %rax
+ movbeq (%r9), %r8
+ movq %rax, 48(%rdi)
+ movq %r8, 56(%rdi)
+ addq $64, %rdi
+ subq $64, %rcx
+L_4096_from_bin_64_end:
+ cmpq $63, %rcx
+ jg L_4096_from_bin_64_start
+ jmp L_4096_from_bin_8_end
+L_4096_from_bin_8_start:
+ subq $8, %r9
+ movbeq (%r9), %rax
+ movq %rax, (%rdi)
+ addq $8, %rdi
+ subq $8, %rcx
+L_4096_from_bin_8_end:
+ cmpq $7, %rcx
+ jg L_4096_from_bin_8_start
+ cmpq %r11, %rcx
+ je L_4096_from_bin_hi_end
+ movq %r11, %r8
+ movq %r11, %rax
+L_4096_from_bin_hi_start:
+ movb (%rdx), %al
+ shlq $8, %r8
+ incq %rdx
+ addq %rax, %r8
+ decq %rcx
+ jg L_4096_from_bin_hi_start
+ movq %r8, (%rdi)
+ addq $8, %rdi
+L_4096_from_bin_hi_end:
+ cmpq %r10, %rdi
+ je L_4096_from_bin_zero_end
+L_4096_from_bin_zero_start:
+ movq %r11, (%rdi)
+ addq $8, %rdi
+ cmpq %r10, %rdi
+ jl L_4096_from_bin_zero_start
+L_4096_from_bin_zero_end:
+ repz retq
+#ifndef __APPLE__
+.size sp_4096_from_bin,.-sp_4096_from_bin
+#endif /* __APPLE__ */
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 512
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+#ifndef __APPLE__
+.globl sp_4096_to_bin
+.type sp_4096_to_bin,@function
+.align 16
+sp_4096_to_bin:
+#else
+.globl _sp_4096_to_bin
+.p2align 4
+_sp_4096_to_bin:
+#endif /* __APPLE__ */
+ movbeq 504(%rdi), %rdx
+ movbeq 496(%rdi), %rax
+ movq %rdx, (%rsi)
+ movq %rax, 8(%rsi)
+ movbeq 488(%rdi), %rdx
+ movbeq 480(%rdi), %rax
+ movq %rdx, 16(%rsi)
+ movq %rax, 24(%rsi)
+ movbeq 472(%rdi), %rdx
+ movbeq 464(%rdi), %rax
+ movq %rdx, 32(%rsi)
+ movq %rax, 40(%rsi)
+ movbeq 456(%rdi), %rdx
+ movbeq 448(%rdi), %rax
+ movq %rdx, 48(%rsi)
+ movq %rax, 56(%rsi)
+ movbeq 440(%rdi), %rdx
+ movbeq 432(%rdi), %rax
+ movq %rdx, 64(%rsi)
+ movq %rax, 72(%rsi)
+ movbeq 424(%rdi), %rdx
+ movbeq 416(%rdi), %rax
+ movq %rdx, 80(%rsi)
+ movq %rax, 88(%rsi)
+ movbeq 408(%rdi), %rdx
+ movbeq 400(%rdi), %rax
+ movq %rdx, 96(%rsi)
+ movq %rax, 104(%rsi)
+ movbeq 392(%rdi), %rdx
+ movbeq 384(%rdi), %rax
+ movq %rdx, 112(%rsi)
+ movq %rax, 120(%rsi)
+ movbeq 376(%rdi), %rdx
+ movbeq 368(%rdi), %rax
+ movq %rdx, 128(%rsi)
+ movq %rax, 136(%rsi)
+ movbeq 360(%rdi), %rdx
+ movbeq 352(%rdi), %rax
+ movq %rdx, 144(%rsi)
+ movq %rax, 152(%rsi)
+ movbeq 344(%rdi), %rdx
+ movbeq 336(%rdi), %rax
+ movq %rdx, 160(%rsi)
+ movq %rax, 168(%rsi)
+ movbeq 328(%rdi), %rdx
+ movbeq 320(%rdi), %rax
+ movq %rdx, 176(%rsi)
+ movq %rax, 184(%rsi)
+ movbeq 312(%rdi), %rdx
+ movbeq 304(%rdi), %rax
+ movq %rdx, 192(%rsi)
+ movq %rax, 200(%rsi)
+ movbeq 296(%rdi), %rdx
+ movbeq 288(%rdi), %rax
+ movq %rdx, 208(%rsi)
+ movq %rax, 216(%rsi)
+ movbeq 280(%rdi), %rdx
+ movbeq 272(%rdi), %rax
+ movq %rdx, 224(%rsi)
+ movq %rax, 232(%rsi)
+ movbeq 264(%rdi), %rdx
+ movbeq 256(%rdi), %rax
+ movq %rdx, 240(%rsi)
+ movq %rax, 248(%rsi)
+ movbeq 248(%rdi), %rdx
+ movbeq 240(%rdi), %rax
+ movq %rdx, 256(%rsi)
+ movq %rax, 264(%rsi)
+ movbeq 232(%rdi), %rdx
+ movbeq 224(%rdi), %rax
+ movq %rdx, 272(%rsi)
+ movq %rax, 280(%rsi)
+ movbeq 216(%rdi), %rdx
+ movbeq 208(%rdi), %rax
+ movq %rdx, 288(%rsi)
+ movq %rax, 296(%rsi)
+ movbeq 200(%rdi), %rdx
+ movbeq 192(%rdi), %rax
+ movq %rdx, 304(%rsi)
+ movq %rax, 312(%rsi)
+ movbeq 184(%rdi), %rdx
+ movbeq 176(%rdi), %rax
+ movq %rdx, 320(%rsi)
+ movq %rax, 328(%rsi)
+ movbeq 168(%rdi), %rdx
+ movbeq 160(%rdi), %rax
+ movq %rdx, 336(%rsi)
+ movq %rax, 344(%rsi)
+ movbeq 152(%rdi), %rdx
+ movbeq 144(%rdi), %rax
+ movq %rdx, 352(%rsi)
+ movq %rax, 360(%rsi)
+ movbeq 136(%rdi), %rdx
+ movbeq 128(%rdi), %rax
+ movq %rdx, 368(%rsi)
+ movq %rax, 376(%rsi)
+ movbeq 120(%rdi), %rdx
+ movbeq 112(%rdi), %rax
+ movq %rdx, 384(%rsi)
+ movq %rax, 392(%rsi)
+ movbeq 104(%rdi), %rdx
+ movbeq 96(%rdi), %rax
+ movq %rdx, 400(%rsi)
+ movq %rax, 408(%rsi)
+ movbeq 88(%rdi), %rdx
+ movbeq 80(%rdi), %rax
+ movq %rdx, 416(%rsi)
+ movq %rax, 424(%rsi)
+ movbeq 72(%rdi), %rdx
+ movbeq 64(%rdi), %rax
+ movq %rdx, 432(%rsi)
+ movq %rax, 440(%rsi)
+ movbeq 56(%rdi), %rdx
+ movbeq 48(%rdi), %rax
+ movq %rdx, 448(%rsi)
+ movq %rax, 456(%rsi)
+ movbeq 40(%rdi), %rdx
+ movbeq 32(%rdi), %rax
+ movq %rdx, 464(%rsi)
+ movq %rax, 472(%rsi)
+ movbeq 24(%rdi), %rdx
+ movbeq 16(%rdi), %rax
+ movq %rdx, 480(%rsi)
+ movq %rax, 488(%rsi)
+ movbeq 8(%rdi), %rdx
+ movbeq (%rdi), %rax
+ movq %rdx, 496(%rsi)
+ movq %rax, 504(%rsi)
+ repz retq
+#ifndef __APPLE__
+.size sp_4096_to_bin,.-sp_4096_to_bin
+#endif /* __APPLE__ */
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_4096_sub_in_place_64
+.type sp_4096_sub_in_place_64,@function
+.align 16
+sp_4096_sub_in_place_64:
+#else
+.globl _sp_4096_sub_in_place_64
+.p2align 4
+_sp_4096_sub_in_place_64:
+#endif /* __APPLE__ */
+ movq (%rdi), %rdx
+ xorq %rax, %rax
+ subq (%rsi), %rdx
+ movq 8(%rdi), %rcx
+ movq %rdx, (%rdi)
+ sbbq 8(%rsi), %rcx
+ movq 16(%rdi), %rdx
+ movq %rcx, 8(%rdi)
+ sbbq 16(%rsi), %rdx
+ movq 24(%rdi), %rcx
+ movq %rdx, 16(%rdi)
+ sbbq 24(%rsi), %rcx
+ movq 32(%rdi), %rdx
+ movq %rcx, 24(%rdi)
+ sbbq 32(%rsi), %rdx
+ movq 40(%rdi), %rcx
+ movq %rdx, 32(%rdi)
+ sbbq 40(%rsi), %rcx
+ movq 48(%rdi), %rdx
+ movq %rcx, 40(%rdi)
+ sbbq 48(%rsi), %rdx
+ movq 56(%rdi), %rcx
+ movq %rdx, 48(%rdi)
+ sbbq 56(%rsi), %rcx
+ movq 64(%rdi), %rdx
+ movq %rcx, 56(%rdi)
+ sbbq 64(%rsi), %rdx
+ movq 72(%rdi), %rcx
+ movq %rdx, 64(%rdi)
+ sbbq 72(%rsi), %rcx
+ movq 80(%rdi), %rdx
+ movq %rcx, 72(%rdi)
+ sbbq 80(%rsi), %rdx
+ movq 88(%rdi), %rcx
+ movq %rdx, 80(%rdi)
+ sbbq 88(%rsi), %rcx
+ movq 96(%rdi), %rdx
+ movq %rcx, 88(%rdi)
+ sbbq 96(%rsi), %rdx
+ movq 104(%rdi), %rcx
+ movq %rdx, 96(%rdi)
+ sbbq 104(%rsi), %rcx
+ movq 112(%rdi), %rdx
+ movq %rcx, 104(%rdi)
+ sbbq 112(%rsi), %rdx
+ movq 120(%rdi), %rcx
+ movq %rdx, 112(%rdi)
+ sbbq 120(%rsi), %rcx
+ movq 128(%rdi), %rdx
+ movq %rcx, 120(%rdi)
+ sbbq 128(%rsi), %rdx
+ movq 136(%rdi), %rcx
+ movq %rdx, 128(%rdi)
+ sbbq 136(%rsi), %rcx
+ movq 144(%rdi), %rdx
+ movq %rcx, 136(%rdi)
+ sbbq 144(%rsi), %rdx
+ movq 152(%rdi), %rcx
+ movq %rdx, 144(%rdi)
+ sbbq 152(%rsi), %rcx
+ movq 160(%rdi), %rdx
+ movq %rcx, 152(%rdi)
+ sbbq 160(%rsi), %rdx
+ movq 168(%rdi), %rcx
+ movq %rdx, 160(%rdi)
+ sbbq 168(%rsi), %rcx
+ movq 176(%rdi), %rdx
+ movq %rcx, 168(%rdi)
+ sbbq 176(%rsi), %rdx
+ movq 184(%rdi), %rcx
+ movq %rdx, 176(%rdi)
+ sbbq 184(%rsi), %rcx
+ movq 192(%rdi), %rdx
+ movq %rcx, 184(%rdi)
+ sbbq 192(%rsi), %rdx
+ movq 200(%rdi), %rcx
+ movq %rdx, 192(%rdi)
+ sbbq 200(%rsi), %rcx
+ movq 208(%rdi), %rdx
+ movq %rcx, 200(%rdi)
+ sbbq 208(%rsi), %rdx
+ movq 216(%rdi), %rcx
+ movq %rdx, 208(%rdi)
+ sbbq 216(%rsi), %rcx
+ movq 224(%rdi), %rdx
+ movq %rcx, 216(%rdi)
+ sbbq 224(%rsi), %rdx
+ movq 232(%rdi), %rcx
+ movq %rdx, 224(%rdi)
+ sbbq 232(%rsi), %rcx
+ movq 240(%rdi), %rdx
+ movq %rcx, 232(%rdi)
+ sbbq 240(%rsi), %rdx
+ movq 248(%rdi), %rcx
+ movq %rdx, 240(%rdi)
+ sbbq 248(%rsi), %rcx
+ movq 256(%rdi), %rdx
+ movq %rcx, 248(%rdi)
+ sbbq 256(%rsi), %rdx
+ movq 264(%rdi), %rcx
+ movq %rdx, 256(%rdi)
+ sbbq 264(%rsi), %rcx
+ movq 272(%rdi), %rdx
+ movq %rcx, 264(%rdi)
+ sbbq 272(%rsi), %rdx
+ movq 280(%rdi), %rcx
+ movq %rdx, 272(%rdi)
+ sbbq 280(%rsi), %rcx
+ movq 288(%rdi), %rdx
+ movq %rcx, 280(%rdi)
+ sbbq 288(%rsi), %rdx
+ movq 296(%rdi), %rcx
+ movq %rdx, 288(%rdi)
+ sbbq 296(%rsi), %rcx
+ movq 304(%rdi), %rdx
+ movq %rcx, 296(%rdi)
+ sbbq 304(%rsi), %rdx
+ movq 312(%rdi), %rcx
+ movq %rdx, 304(%rdi)
+ sbbq 312(%rsi), %rcx
+ movq 320(%rdi), %rdx
+ movq %rcx, 312(%rdi)
+ sbbq 320(%rsi), %rdx
+ movq 328(%rdi), %rcx
+ movq %rdx, 320(%rdi)
+ sbbq 328(%rsi), %rcx
+ movq 336(%rdi), %rdx
+ movq %rcx, 328(%rdi)
+ sbbq 336(%rsi), %rdx
+ movq 344(%rdi), %rcx
+ movq %rdx, 336(%rdi)
+ sbbq 344(%rsi), %rcx
+ movq 352(%rdi), %rdx
+ movq %rcx, 344(%rdi)
+ sbbq 352(%rsi), %rdx
+ movq 360(%rdi), %rcx
+ movq %rdx, 352(%rdi)
+ sbbq 360(%rsi), %rcx
+ movq 368(%rdi), %rdx
+ movq %rcx, 360(%rdi)
+ sbbq 368(%rsi), %rdx
+ movq 376(%rdi), %rcx
+ movq %rdx, 368(%rdi)
+ sbbq 376(%rsi), %rcx
+ movq 384(%rdi), %rdx
+ movq %rcx, 376(%rdi)
+ sbbq 384(%rsi), %rdx
+ movq 392(%rdi), %rcx
+ movq %rdx, 384(%rdi)
+ sbbq 392(%rsi), %rcx
+ movq 400(%rdi), %rdx
+ movq %rcx, 392(%rdi)
+ sbbq 400(%rsi), %rdx
+ movq 408(%rdi), %rcx
+ movq %rdx, 400(%rdi)
+ sbbq 408(%rsi), %rcx
+ movq 416(%rdi), %rdx
+ movq %rcx, 408(%rdi)
+ sbbq 416(%rsi), %rdx
+ movq 424(%rdi), %rcx
+ movq %rdx, 416(%rdi)
+ sbbq 424(%rsi), %rcx
+ movq 432(%rdi), %rdx
+ movq %rcx, 424(%rdi)
+ sbbq 432(%rsi), %rdx
+ movq 440(%rdi), %rcx
+ movq %rdx, 432(%rdi)
+ sbbq 440(%rsi), %rcx
+ movq 448(%rdi), %rdx
+ movq %rcx, 440(%rdi)
+ sbbq 448(%rsi), %rdx
+ movq 456(%rdi), %rcx
+ movq %rdx, 448(%rdi)
+ sbbq 456(%rsi), %rcx
+ movq 464(%rdi), %rdx
+ movq %rcx, 456(%rdi)
+ sbbq 464(%rsi), %rdx
+ movq 472(%rdi), %rcx
+ movq %rdx, 464(%rdi)
+ sbbq 472(%rsi), %rcx
+ movq 480(%rdi), %rdx
+ movq %rcx, 472(%rdi)
+ sbbq 480(%rsi), %rdx
+ movq 488(%rdi), %rcx
+ movq %rdx, 480(%rdi)
+ sbbq 488(%rsi), %rcx
+ movq 496(%rdi), %rdx
+ movq %rcx, 488(%rdi)
+ sbbq 496(%rsi), %rdx
+ movq 504(%rdi), %rcx
+ movq %rdx, 496(%rdi)
+ sbbq 504(%rsi), %rcx
+ movq %rcx, 504(%rdi)
+ sbbq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_4096_sub_in_place_64,.-sp_4096_sub_in_place_64
+#endif /* __APPLE__ */
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_4096_add_64
+.type sp_4096_add_64,@function
+.align 16
+sp_4096_add_64:
+#else
+.globl _sp_4096_add_64
+.p2align 4
+_sp_4096_add_64:
+#endif /* __APPLE__ */
+ # Add
+ movq (%rsi), %rcx
+ xorq %rax, %rax
+ addq (%rdx), %rcx
+ movq 8(%rsi), %r8
+ movq %rcx, (%rdi)
+ adcq 8(%rdx), %r8
+ movq 16(%rsi), %rcx
+ movq %r8, 8(%rdi)
+ adcq 16(%rdx), %rcx
+ movq 24(%rsi), %r8
+ movq %rcx, 16(%rdi)
+ adcq 24(%rdx), %r8
+ movq 32(%rsi), %rcx
+ movq %r8, 24(%rdi)
+ adcq 32(%rdx), %rcx
+ movq 40(%rsi), %r8
+ movq %rcx, 32(%rdi)
+ adcq 40(%rdx), %r8
+ movq 48(%rsi), %rcx
+ movq %r8, 40(%rdi)
+ adcq 48(%rdx), %rcx
+ movq 56(%rsi), %r8
+ movq %rcx, 48(%rdi)
+ adcq 56(%rdx), %r8
+ movq 64(%rsi), %rcx
+ movq %r8, 56(%rdi)
+ adcq 64(%rdx), %rcx
+ movq 72(%rsi), %r8
+ movq %rcx, 64(%rdi)
+ adcq 72(%rdx), %r8
+ movq 80(%rsi), %rcx
+ movq %r8, 72(%rdi)
+ adcq 80(%rdx), %rcx
+ movq 88(%rsi), %r8
+ movq %rcx, 80(%rdi)
+ adcq 88(%rdx), %r8
+ movq 96(%rsi), %rcx
+ movq %r8, 88(%rdi)
+ adcq 96(%rdx), %rcx
+ movq 104(%rsi), %r8
+ movq %rcx, 96(%rdi)
+ adcq 104(%rdx), %r8
+ movq 112(%rsi), %rcx
+ movq %r8, 104(%rdi)
+ adcq 112(%rdx), %rcx
+ movq 120(%rsi), %r8
+ movq %rcx, 112(%rdi)
+ adcq 120(%rdx), %r8
+ movq 128(%rsi), %rcx
+ movq %r8, 120(%rdi)
+ adcq 128(%rdx), %rcx
+ movq 136(%rsi), %r8
+ movq %rcx, 128(%rdi)
+ adcq 136(%rdx), %r8
+ movq 144(%rsi), %rcx
+ movq %r8, 136(%rdi)
+ adcq 144(%rdx), %rcx
+ movq 152(%rsi), %r8
+ movq %rcx, 144(%rdi)
+ adcq 152(%rdx), %r8
+ movq 160(%rsi), %rcx
+ movq %r8, 152(%rdi)
+ adcq 160(%rdx), %rcx
+ movq 168(%rsi), %r8
+ movq %rcx, 160(%rdi)
+ adcq 168(%rdx), %r8
+ movq 176(%rsi), %rcx
+ movq %r8, 168(%rdi)
+ adcq 176(%rdx), %rcx
+ movq 184(%rsi), %r8
+ movq %rcx, 176(%rdi)
+ adcq 184(%rdx), %r8
+ movq 192(%rsi), %rcx
+ movq %r8, 184(%rdi)
+ adcq 192(%rdx), %rcx
+ movq 200(%rsi), %r8
+ movq %rcx, 192(%rdi)
+ adcq 200(%rdx), %r8
+ movq 208(%rsi), %rcx
+ movq %r8, 200(%rdi)
+ adcq 208(%rdx), %rcx
+ movq 216(%rsi), %r8
+ movq %rcx, 208(%rdi)
+ adcq 216(%rdx), %r8
+ movq 224(%rsi), %rcx
+ movq %r8, 216(%rdi)
+ adcq 224(%rdx), %rcx
+ movq 232(%rsi), %r8
+ movq %rcx, 224(%rdi)
+ adcq 232(%rdx), %r8
+ movq 240(%rsi), %rcx
+ movq %r8, 232(%rdi)
+ adcq 240(%rdx), %rcx
+ movq 248(%rsi), %r8
+ movq %rcx, 240(%rdi)
+ adcq 248(%rdx), %r8
+ movq 256(%rsi), %rcx
+ movq %r8, 248(%rdi)
+ adcq 256(%rdx), %rcx
+ movq 264(%rsi), %r8
+ movq %rcx, 256(%rdi)
+ adcq 264(%rdx), %r8
+ movq 272(%rsi), %rcx
+ movq %r8, 264(%rdi)
+ adcq 272(%rdx), %rcx
+ movq 280(%rsi), %r8
+ movq %rcx, 272(%rdi)
+ adcq 280(%rdx), %r8
+ movq 288(%rsi), %rcx
+ movq %r8, 280(%rdi)
+ adcq 288(%rdx), %rcx
+ movq 296(%rsi), %r8
+ movq %rcx, 288(%rdi)
+ adcq 296(%rdx), %r8
+ movq 304(%rsi), %rcx
+ movq %r8, 296(%rdi)
+ adcq 304(%rdx), %rcx
+ movq 312(%rsi), %r8
+ movq %rcx, 304(%rdi)
+ adcq 312(%rdx), %r8
+ movq 320(%rsi), %rcx
+ movq %r8, 312(%rdi)
+ adcq 320(%rdx), %rcx
+ movq 328(%rsi), %r8
+ movq %rcx, 320(%rdi)
+ adcq 328(%rdx), %r8
+ movq 336(%rsi), %rcx
+ movq %r8, 328(%rdi)
+ adcq 336(%rdx), %rcx
+ movq 344(%rsi), %r8
+ movq %rcx, 336(%rdi)
+ adcq 344(%rdx), %r8
+ movq 352(%rsi), %rcx
+ movq %r8, 344(%rdi)
+ adcq 352(%rdx), %rcx
+ movq 360(%rsi), %r8
+ movq %rcx, 352(%rdi)
+ adcq 360(%rdx), %r8
+ movq 368(%rsi), %rcx
+ movq %r8, 360(%rdi)
+ adcq 368(%rdx), %rcx
+ movq 376(%rsi), %r8
+ movq %rcx, 368(%rdi)
+ adcq 376(%rdx), %r8
+ movq 384(%rsi), %rcx
+ movq %r8, 376(%rdi)
+ adcq 384(%rdx), %rcx
+ movq 392(%rsi), %r8
+ movq %rcx, 384(%rdi)
+ adcq 392(%rdx), %r8
+ movq 400(%rsi), %rcx
+ movq %r8, 392(%rdi)
+ adcq 400(%rdx), %rcx
+ movq 408(%rsi), %r8
+ movq %rcx, 400(%rdi)
+ adcq 408(%rdx), %r8
+ movq 416(%rsi), %rcx
+ movq %r8, 408(%rdi)
+ adcq 416(%rdx), %rcx
+ movq 424(%rsi), %r8
+ movq %rcx, 416(%rdi)
+ adcq 424(%rdx), %r8
+ movq 432(%rsi), %rcx
+ movq %r8, 424(%rdi)
+ adcq 432(%rdx), %rcx
+ movq 440(%rsi), %r8
+ movq %rcx, 432(%rdi)
+ adcq 440(%rdx), %r8
+ movq 448(%rsi), %rcx
+ movq %r8, 440(%rdi)
+ adcq 448(%rdx), %rcx
+ movq 456(%rsi), %r8
+ movq %rcx, 448(%rdi)
+ adcq 456(%rdx), %r8
+ movq 464(%rsi), %rcx
+ movq %r8, 456(%rdi)
+ adcq 464(%rdx), %rcx
+ movq 472(%rsi), %r8
+ movq %rcx, 464(%rdi)
+ adcq 472(%rdx), %r8
+ movq 480(%rsi), %rcx
+ movq %r8, 472(%rdi)
+ adcq 480(%rdx), %rcx
+ movq 488(%rsi), %r8
+ movq %rcx, 480(%rdi)
+ adcq 488(%rdx), %r8
+ movq 496(%rsi), %rcx
+ movq %r8, 488(%rdi)
+ adcq 496(%rdx), %rcx
+ movq 504(%rsi), %r8
+ movq %rcx, 496(%rdi)
+ adcq 504(%rdx), %r8
+ movq %r8, 504(%rdi)
+ adcq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_4096_add_64,.-sp_4096_add_64
+#endif /* __APPLE__ */
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_4096_mul_64
+.type sp_4096_mul_64,@function
+.align 16
+sp_4096_mul_64:
+#else
+.globl _sp_4096_mul_64
+.p2align 4
+_sp_4096_mul_64:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ subq $1576, %rsp
+ movq %rdi, 1536(%rsp)
+ movq %rsi, 1544(%rsp)
+ movq %rdx, 1552(%rsp)
+ leaq 1024(%rsp), %r10
+ leaq 256(%rsi), %r12
+ # Add
+ movq (%rsi), %rax
+ xorq %r13, %r13
+ addq (%r12), %rax
+ movq 8(%rsi), %rcx
+ movq %rax, (%r10)
+ adcq 8(%r12), %rcx
+ movq 16(%rsi), %r8
+ movq %rcx, 8(%r10)
+ adcq 16(%r12), %r8
+ movq 24(%rsi), %rax
+ movq %r8, 16(%r10)
+ adcq 24(%r12), %rax
+ movq 32(%rsi), %rcx
+ movq %rax, 24(%r10)
+ adcq 32(%r12), %rcx
+ movq 40(%rsi), %r8
+ movq %rcx, 32(%r10)
+ adcq 40(%r12), %r8
+ movq 48(%rsi), %rax
+ movq %r8, 40(%r10)
+ adcq 48(%r12), %rax
+ movq 56(%rsi), %rcx
+ movq %rax, 48(%r10)
+ adcq 56(%r12), %rcx
+ movq 64(%rsi), %r8
+ movq %rcx, 56(%r10)
+ adcq 64(%r12), %r8
+ movq 72(%rsi), %rax
+ movq %r8, 64(%r10)
+ adcq 72(%r12), %rax
+ movq 80(%rsi), %rcx
+ movq %rax, 72(%r10)
+ adcq 80(%r12), %rcx
+ movq 88(%rsi), %r8
+ movq %rcx, 80(%r10)
+ adcq 88(%r12), %r8
+ movq 96(%rsi), %rax
+ movq %r8, 88(%r10)
+ adcq 96(%r12), %rax
+ movq 104(%rsi), %rcx
+ movq %rax, 96(%r10)
+ adcq 104(%r12), %rcx
+ movq 112(%rsi), %r8
+ movq %rcx, 104(%r10)
+ adcq 112(%r12), %r8
+ movq 120(%rsi), %rax
+ movq %r8, 112(%r10)
+ adcq 120(%r12), %rax
+ movq 128(%rsi), %rcx
+ movq %rax, 120(%r10)
+ adcq 128(%r12), %rcx
+ movq 136(%rsi), %r8
+ movq %rcx, 128(%r10)
+ adcq 136(%r12), %r8
+ movq 144(%rsi), %rax
+ movq %r8, 136(%r10)
+ adcq 144(%r12), %rax
+ movq 152(%rsi), %rcx
+ movq %rax, 144(%r10)
+ adcq 152(%r12), %rcx
+ movq 160(%rsi), %r8
+ movq %rcx, 152(%r10)
+ adcq 160(%r12), %r8
+ movq 168(%rsi), %rax
+ movq %r8, 160(%r10)
+ adcq 168(%r12), %rax
+ movq 176(%rsi), %rcx
+ movq %rax, 168(%r10)
+ adcq 176(%r12), %rcx
+ movq 184(%rsi), %r8
+ movq %rcx, 176(%r10)
+ adcq 184(%r12), %r8
+ movq 192(%rsi), %rax
+ movq %r8, 184(%r10)
+ adcq 192(%r12), %rax
+ movq 200(%rsi), %rcx
+ movq %rax, 192(%r10)
+ adcq 200(%r12), %rcx
+ movq 208(%rsi), %r8
+ movq %rcx, 200(%r10)
+ adcq 208(%r12), %r8
+ movq 216(%rsi), %rax
+ movq %r8, 208(%r10)
+ adcq 216(%r12), %rax
+ movq 224(%rsi), %rcx
+ movq %rax, 216(%r10)
+ adcq 224(%r12), %rcx
+ movq 232(%rsi), %r8
+ movq %rcx, 224(%r10)
+ adcq 232(%r12), %r8
+ movq 240(%rsi), %rax
+ movq %r8, 232(%r10)
+ adcq 240(%r12), %rax
+ movq 248(%rsi), %rcx
+ movq %rax, 240(%r10)
+ adcq 248(%r12), %rcx
+ movq %rcx, 248(%r10)
+ adcq $0, %r13
+ movq %r13, 1560(%rsp)
+ leaq 1280(%rsp), %r11
+ leaq 256(%rdx), %r12
+ # Add
+ movq (%rdx), %rax
+ xorq %r14, %r14
+ addq (%r12), %rax
+ movq 8(%rdx), %rcx
+ movq %rax, (%r11)
+ adcq 8(%r12), %rcx
+ movq 16(%rdx), %r8
+ movq %rcx, 8(%r11)
+ adcq 16(%r12), %r8
+ movq 24(%rdx), %rax
+ movq %r8, 16(%r11)
+ adcq 24(%r12), %rax
+ movq 32(%rdx), %rcx
+ movq %rax, 24(%r11)
+ adcq 32(%r12), %rcx
+ movq 40(%rdx), %r8
+ movq %rcx, 32(%r11)
+ adcq 40(%r12), %r8
+ movq 48(%rdx), %rax
+ movq %r8, 40(%r11)
+ adcq 48(%r12), %rax
+ movq 56(%rdx), %rcx
+ movq %rax, 48(%r11)
+ adcq 56(%r12), %rcx
+ movq 64(%rdx), %r8
+ movq %rcx, 56(%r11)
+ adcq 64(%r12), %r8
+ movq 72(%rdx), %rax
+ movq %r8, 64(%r11)
+ adcq 72(%r12), %rax
+ movq 80(%rdx), %rcx
+ movq %rax, 72(%r11)
+ adcq 80(%r12), %rcx
+ movq 88(%rdx), %r8
+ movq %rcx, 80(%r11)
+ adcq 88(%r12), %r8
+ movq 96(%rdx), %rax
+ movq %r8, 88(%r11)
+ adcq 96(%r12), %rax
+ movq 104(%rdx), %rcx
+ movq %rax, 96(%r11)
+ adcq 104(%r12), %rcx
+ movq 112(%rdx), %r8
+ movq %rcx, 104(%r11)
+ adcq 112(%r12), %r8
+ movq 120(%rdx), %rax
+ movq %r8, 112(%r11)
+ adcq 120(%r12), %rax
+ movq 128(%rdx), %rcx
+ movq %rax, 120(%r11)
+ adcq 128(%r12), %rcx
+ movq 136(%rdx), %r8
+ movq %rcx, 128(%r11)
+ adcq 136(%r12), %r8
+ movq 144(%rdx), %rax
+ movq %r8, 136(%r11)
+ adcq 144(%r12), %rax
+ movq 152(%rdx), %rcx
+ movq %rax, 144(%r11)
+ adcq 152(%r12), %rcx
+ movq 160(%rdx), %r8
+ movq %rcx, 152(%r11)
+ adcq 160(%r12), %r8
+ movq 168(%rdx), %rax
+ movq %r8, 160(%r11)
+ adcq 168(%r12), %rax
+ movq 176(%rdx), %rcx
+ movq %rax, 168(%r11)
+ adcq 176(%r12), %rcx
+ movq 184(%rdx), %r8
+ movq %rcx, 176(%r11)
+ adcq 184(%r12), %r8
+ movq 192(%rdx), %rax
+ movq %r8, 184(%r11)
+ adcq 192(%r12), %rax
+ movq 200(%rdx), %rcx
+ movq %rax, 192(%r11)
+ adcq 200(%r12), %rcx
+ movq 208(%rdx), %r8
+ movq %rcx, 200(%r11)
+ adcq 208(%r12), %r8
+ movq 216(%rdx), %rax
+ movq %r8, 208(%r11)
+ adcq 216(%r12), %rax
+ movq 224(%rdx), %rcx
+ movq %rax, 216(%r11)
+ adcq 224(%r12), %rcx
+ movq 232(%rdx), %r8
+ movq %rcx, 224(%r11)
+ adcq 232(%r12), %r8
+ movq 240(%rdx), %rax
+ movq %r8, 232(%r11)
+ adcq 240(%r12), %rax
+ movq 248(%rdx), %rcx
+ movq %rax, 240(%r11)
+ adcq 248(%r12), %rcx
+ movq %rcx, 248(%r11)
+ adcq $0, %r14
+ movq %r14, 1568(%rsp)
+ movq %r11, %rdx
+ movq %r10, %rsi
+ movq %rsp, %rdi
+#ifndef __APPLE__
+ callq sp_2048_mul_32@plt
+#else
+ callq _sp_2048_mul_32
+#endif /* __APPLE__ */
+ movq 1552(%rsp), %rdx
+ movq 1544(%rsp), %rsi
+ leaq 512(%rsp), %rdi
+ addq $256, %rdx
+ addq $256, %rsi
+#ifndef __APPLE__
+ callq sp_2048_mul_32@plt
+#else
+ callq _sp_2048_mul_32
+#endif /* __APPLE__ */
+ movq 1552(%rsp), %rdx
+ movq 1544(%rsp), %rsi
+ movq 1536(%rsp), %rdi
+#ifndef __APPLE__
+ callq sp_2048_mul_32@plt
+#else
+ callq _sp_2048_mul_32
+#endif /* __APPLE__ */
+ movq 1560(%rsp), %r13
+ movq 1568(%rsp), %r14
+ movq 1536(%rsp), %r15
+ movq %r13, %r9
+ leaq 1024(%rsp), %r10
+ leaq 1280(%rsp), %r11
+ andq %r14, %r9
+ negq %r13
+ negq %r14
+ addq $512, %r15
+ movq (%r10), %rax
+ movq (%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, (%r10)
+ movq %rcx, (%r11)
+ movq 8(%r10), %rax
+ movq 8(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 8(%r10)
+ movq %rcx, 8(%r11)
+ movq 16(%r10), %rax
+ movq 16(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 16(%r10)
+ movq %rcx, 16(%r11)
+ movq 24(%r10), %rax
+ movq 24(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 24(%r10)
+ movq %rcx, 24(%r11)
+ movq 32(%r10), %rax
+ movq 32(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 32(%r10)
+ movq %rcx, 32(%r11)
+ movq 40(%r10), %rax
+ movq 40(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 40(%r10)
+ movq %rcx, 40(%r11)
+ movq 48(%r10), %rax
+ movq 48(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 48(%r10)
+ movq %rcx, 48(%r11)
+ movq 56(%r10), %rax
+ movq 56(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 56(%r10)
+ movq %rcx, 56(%r11)
+ movq 64(%r10), %rax
+ movq 64(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 64(%r10)
+ movq %rcx, 64(%r11)
+ movq 72(%r10), %rax
+ movq 72(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 72(%r10)
+ movq %rcx, 72(%r11)
+ movq 80(%r10), %rax
+ movq 80(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 80(%r10)
+ movq %rcx, 80(%r11)
+ movq 88(%r10), %rax
+ movq 88(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 88(%r10)
+ movq %rcx, 88(%r11)
+ movq 96(%r10), %rax
+ movq 96(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 96(%r10)
+ movq %rcx, 96(%r11)
+ movq 104(%r10), %rax
+ movq 104(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 104(%r10)
+ movq %rcx, 104(%r11)
+ movq 112(%r10), %rax
+ movq 112(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 112(%r10)
+ movq %rcx, 112(%r11)
+ movq 120(%r10), %rax
+ movq 120(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 120(%r10)
+ movq %rcx, 120(%r11)
+ movq 128(%r10), %rax
+ movq 128(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 128(%r10)
+ movq %rcx, 128(%r11)
+ movq 136(%r10), %rax
+ movq 136(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 136(%r10)
+ movq %rcx, 136(%r11)
+ movq 144(%r10), %rax
+ movq 144(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 144(%r10)
+ movq %rcx, 144(%r11)
+ movq 152(%r10), %rax
+ movq 152(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 152(%r10)
+ movq %rcx, 152(%r11)
+ movq 160(%r10), %rax
+ movq 160(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 160(%r10)
+ movq %rcx, 160(%r11)
+ movq 168(%r10), %rax
+ movq 168(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 168(%r10)
+ movq %rcx, 168(%r11)
+ movq 176(%r10), %rax
+ movq 176(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 176(%r10)
+ movq %rcx, 176(%r11)
+ movq 184(%r10), %rax
+ movq 184(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 184(%r10)
+ movq %rcx, 184(%r11)
+ movq 192(%r10), %rax
+ movq 192(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 192(%r10)
+ movq %rcx, 192(%r11)
+ movq 200(%r10), %rax
+ movq 200(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 200(%r10)
+ movq %rcx, 200(%r11)
+ movq 208(%r10), %rax
+ movq 208(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 208(%r10)
+ movq %rcx, 208(%r11)
+ movq 216(%r10), %rax
+ movq 216(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 216(%r10)
+ movq %rcx, 216(%r11)
+ movq 224(%r10), %rax
+ movq 224(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 224(%r10)
+ movq %rcx, 224(%r11)
+ movq 232(%r10), %rax
+ movq 232(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 232(%r10)
+ movq %rcx, 232(%r11)
+ movq 240(%r10), %rax
+ movq 240(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 240(%r10)
+ movq %rcx, 240(%r11)
+ movq 248(%r10), %rax
+ movq 248(%r11), %rcx
+ andq %r14, %rax
+ andq %r13, %rcx
+ movq %rax, 248(%r10)
+ movq %rcx, 248(%r11)
+ movq (%r10), %rax
+ addq (%r11), %rax
+ movq 8(%r10), %rcx
+ movq %rax, (%r15)
+ adcq 8(%r11), %rcx
+ movq 16(%r10), %r8
+ movq %rcx, 8(%r15)
+ adcq 16(%r11), %r8
+ movq 24(%r10), %rax
+ movq %r8, 16(%r15)
+ adcq 24(%r11), %rax
+ movq 32(%r10), %rcx
+ movq %rax, 24(%r15)
+ adcq 32(%r11), %rcx
+ movq 40(%r10), %r8
+ movq %rcx, 32(%r15)
+ adcq 40(%r11), %r8
+ movq 48(%r10), %rax
+ movq %r8, 40(%r15)
+ adcq 48(%r11), %rax
+ movq 56(%r10), %rcx
+ movq %rax, 48(%r15)
+ adcq 56(%r11), %rcx
+ movq 64(%r10), %r8
+ movq %rcx, 56(%r15)
+ adcq 64(%r11), %r8
+ movq 72(%r10), %rax
+ movq %r8, 64(%r15)
+ adcq 72(%r11), %rax
+ movq 80(%r10), %rcx
+ movq %rax, 72(%r15)
+ adcq 80(%r11), %rcx
+ movq 88(%r10), %r8
+ movq %rcx, 80(%r15)
+ adcq 88(%r11), %r8
+ movq 96(%r10), %rax
+ movq %r8, 88(%r15)
+ adcq 96(%r11), %rax
+ movq 104(%r10), %rcx
+ movq %rax, 96(%r15)
+ adcq 104(%r11), %rcx
+ movq 112(%r10), %r8
+ movq %rcx, 104(%r15)
+ adcq 112(%r11), %r8
+ movq 120(%r10), %rax
+ movq %r8, 112(%r15)
+ adcq 120(%r11), %rax
+ movq 128(%r10), %rcx
+ movq %rax, 120(%r15)
+ adcq 128(%r11), %rcx
+ movq 136(%r10), %r8
+ movq %rcx, 128(%r15)
+ adcq 136(%r11), %r8
+ movq 144(%r10), %rax
+ movq %r8, 136(%r15)
+ adcq 144(%r11), %rax
+ movq 152(%r10), %rcx
+ movq %rax, 144(%r15)
+ adcq 152(%r11), %rcx
+ movq 160(%r10), %r8
+ movq %rcx, 152(%r15)
+ adcq 160(%r11), %r8
+ movq 168(%r10), %rax
+ movq %r8, 160(%r15)
+ adcq 168(%r11), %rax
+ movq 176(%r10), %rcx
+ movq %rax, 168(%r15)
+ adcq 176(%r11), %rcx
+ movq 184(%r10), %r8
+ movq %rcx, 176(%r15)
+ adcq 184(%r11), %r8
+ movq 192(%r10), %rax
+ movq %r8, 184(%r15)
+ adcq 192(%r11), %rax
+ movq 200(%r10), %rcx
+ movq %rax, 192(%r15)
+ adcq 200(%r11), %rcx
+ movq 208(%r10), %r8
+ movq %rcx, 200(%r15)
+ adcq 208(%r11), %r8
+ movq 216(%r10), %rax
+ movq %r8, 208(%r15)
+ adcq 216(%r11), %rax
+ movq 224(%r10), %rcx
+ movq %rax, 216(%r15)
+ adcq 224(%r11), %rcx
+ movq 232(%r10), %r8
+ movq %rcx, 224(%r15)
+ adcq 232(%r11), %r8
+ movq 240(%r10), %rax
+ movq %r8, 232(%r15)
+ adcq 240(%r11), %rax
+ movq 248(%r10), %rcx
+ movq %rax, 240(%r15)
+ adcq 248(%r11), %rcx
+ movq %rcx, 248(%r15)
+ adcq $0, %r9
+ leaq 512(%rsp), %r11
+ movq %rsp, %r10
+ movq (%r10), %rax
+ subq (%r11), %rax
+ movq 8(%r10), %rcx
+ movq %rax, (%r10)
+ sbbq 8(%r11), %rcx
+ movq 16(%r10), %r8
+ movq %rcx, 8(%r10)
+ sbbq 16(%r11), %r8
+ movq 24(%r10), %rax
+ movq %r8, 16(%r10)
+ sbbq 24(%r11), %rax
+ movq 32(%r10), %rcx
+ movq %rax, 24(%r10)
+ sbbq 32(%r11), %rcx
+ movq 40(%r10), %r8
+ movq %rcx, 32(%r10)
+ sbbq 40(%r11), %r8
+ movq 48(%r10), %rax
+ movq %r8, 40(%r10)
+ sbbq 48(%r11), %rax
+ movq 56(%r10), %rcx
+ movq %rax, 48(%r10)
+ sbbq 56(%r11), %rcx
+ movq 64(%r10), %r8
+ movq %rcx, 56(%r10)
+ sbbq 64(%r11), %r8
+ movq 72(%r10), %rax
+ movq %r8, 64(%r10)
+ sbbq 72(%r11), %rax
+ movq 80(%r10), %rcx
+ movq %rax, 72(%r10)
+ sbbq 80(%r11), %rcx
+ movq 88(%r10), %r8
+ movq %rcx, 80(%r10)
+ sbbq 88(%r11), %r8
+ movq 96(%r10), %rax
+ movq %r8, 88(%r10)
+ sbbq 96(%r11), %rax
+ movq 104(%r10), %rcx
+ movq %rax, 96(%r10)
+ sbbq 104(%r11), %rcx
+ movq 112(%r10), %r8
+ movq %rcx, 104(%r10)
+ sbbq 112(%r11), %r8
+ movq 120(%r10), %rax
+ movq %r8, 112(%r10)
+ sbbq 120(%r11), %rax
+ movq 128(%r10), %rcx
+ movq %rax, 120(%r10)
+ sbbq 128(%r11), %rcx
+ movq 136(%r10), %r8
+ movq %rcx, 128(%r10)
+ sbbq 136(%r11), %r8
+ movq 144(%r10), %rax
+ movq %r8, 136(%r10)
+ sbbq 144(%r11), %rax
+ movq 152(%r10), %rcx
+ movq %rax, 144(%r10)
+ sbbq 152(%r11), %rcx
+ movq 160(%r10), %r8
+ movq %rcx, 152(%r10)
+ sbbq 160(%r11), %r8
+ movq 168(%r10), %rax
+ movq %r8, 160(%r10)
+ sbbq 168(%r11), %rax
+ movq 176(%r10), %rcx
+ movq %rax, 168(%r10)
+ sbbq 176(%r11), %rcx
+ movq 184(%r10), %r8
+ movq %rcx, 176(%r10)
+ sbbq 184(%r11), %r8
+ movq 192(%r10), %rax
+ movq %r8, 184(%r10)
+ sbbq 192(%r11), %rax
+ movq 200(%r10), %rcx
+ movq %rax, 192(%r10)
+ sbbq 200(%r11), %rcx
+ movq 208(%r10), %r8
+ movq %rcx, 200(%r10)
+ sbbq 208(%r11), %r8
+ movq 216(%r10), %rax
+ movq %r8, 208(%r10)
+ sbbq 216(%r11), %rax
+ movq 224(%r10), %rcx
+ movq %rax, 216(%r10)
+ sbbq 224(%r11), %rcx
+ movq 232(%r10), %r8
+ movq %rcx, 224(%r10)
+ sbbq 232(%r11), %r8
+ movq 240(%r10), %rax
+ movq %r8, 232(%r10)
+ sbbq 240(%r11), %rax
+ movq 248(%r10), %rcx
+ movq %rax, 240(%r10)
+ sbbq 248(%r11), %rcx
+ movq 256(%r10), %r8
+ movq %rcx, 248(%r10)
+ sbbq 256(%r11), %r8
+ movq 264(%r10), %rax
+ movq %r8, 256(%r10)
+ sbbq 264(%r11), %rax
+ movq 272(%r10), %rcx
+ movq %rax, 264(%r10)
+ sbbq 272(%r11), %rcx
+ movq 280(%r10), %r8
+ movq %rcx, 272(%r10)
+ sbbq 280(%r11), %r8
+ movq 288(%r10), %rax
+ movq %r8, 280(%r10)
+ sbbq 288(%r11), %rax
+ movq 296(%r10), %rcx
+ movq %rax, 288(%r10)
+ sbbq 296(%r11), %rcx
+ movq 304(%r10), %r8
+ movq %rcx, 296(%r10)
+ sbbq 304(%r11), %r8
+ movq 312(%r10), %rax
+ movq %r8, 304(%r10)
+ sbbq 312(%r11), %rax
+ movq 320(%r10), %rcx
+ movq %rax, 312(%r10)
+ sbbq 320(%r11), %rcx
+ movq 328(%r10), %r8
+ movq %rcx, 320(%r10)
+ sbbq 328(%r11), %r8
+ movq 336(%r10), %rax
+ movq %r8, 328(%r10)
+ sbbq 336(%r11), %rax
+ movq 344(%r10), %rcx
+ movq %rax, 336(%r10)
+ sbbq 344(%r11), %rcx
+ movq 352(%r10), %r8
+ movq %rcx, 344(%r10)
+ sbbq 352(%r11), %r8
+ movq 360(%r10), %rax
+ movq %r8, 352(%r10)
+ sbbq 360(%r11), %rax
+ movq 368(%r10), %rcx
+ movq %rax, 360(%r10)
+ sbbq 368(%r11), %rcx
+ movq 376(%r10), %r8
+ movq %rcx, 368(%r10)
+ sbbq 376(%r11), %r8
+ movq 384(%r10), %rax
+ movq %r8, 376(%r10)
+ sbbq 384(%r11), %rax
+ movq 392(%r10), %rcx
+ movq %rax, 384(%r10)
+ sbbq 392(%r11), %rcx
+ movq 400(%r10), %r8
+ movq %rcx, 392(%r10)
+ sbbq 400(%r11), %r8
+ movq 408(%r10), %rax
+ movq %r8, 400(%r10)
+ sbbq 408(%r11), %rax
+ movq 416(%r10), %rcx
+ movq %rax, 408(%r10)
+ sbbq 416(%r11), %rcx
+ movq 424(%r10), %r8
+ movq %rcx, 416(%r10)
+ sbbq 424(%r11), %r8
+ movq 432(%r10), %rax
+ movq %r8, 424(%r10)
+ sbbq 432(%r11), %rax
+ movq 440(%r10), %rcx
+ movq %rax, 432(%r10)
+ sbbq 440(%r11), %rcx
+ movq 448(%r10), %r8
+ movq %rcx, 440(%r10)
+ sbbq 448(%r11), %r8
+ movq 456(%r10), %rax
+ movq %r8, 448(%r10)
+ sbbq 456(%r11), %rax
+ movq 464(%r10), %rcx
+ movq %rax, 456(%r10)
+ sbbq 464(%r11), %rcx
+ movq 472(%r10), %r8
+ movq %rcx, 464(%r10)
+ sbbq 472(%r11), %r8
+ movq 480(%r10), %rax
+ movq %r8, 472(%r10)
+ sbbq 480(%r11), %rax
+ movq 488(%r10), %rcx
+ movq %rax, 480(%r10)
+ sbbq 488(%r11), %rcx
+ movq 496(%r10), %r8
+ movq %rcx, 488(%r10)
+ sbbq 496(%r11), %r8
+ movq 504(%r10), %rax
+ movq %r8, 496(%r10)
+ sbbq 504(%r11), %rax
+ movq %rax, 504(%r10)
+ sbbq $0, %r9
+ movq (%r10), %rax
+ subq (%rdi), %rax
+ movq 8(%r10), %rcx
+ movq %rax, (%r10)
+ sbbq 8(%rdi), %rcx
+ movq 16(%r10), %r8
+ movq %rcx, 8(%r10)
+ sbbq 16(%rdi), %r8
+ movq 24(%r10), %rax
+ movq %r8, 16(%r10)
+ sbbq 24(%rdi), %rax
+ movq 32(%r10), %rcx
+ movq %rax, 24(%r10)
+ sbbq 32(%rdi), %rcx
+ movq 40(%r10), %r8
+ movq %rcx, 32(%r10)
+ sbbq 40(%rdi), %r8
+ movq 48(%r10), %rax
+ movq %r8, 40(%r10)
+ sbbq 48(%rdi), %rax
+ movq 56(%r10), %rcx
+ movq %rax, 48(%r10)
+ sbbq 56(%rdi), %rcx
+ movq 64(%r10), %r8
+ movq %rcx, 56(%r10)
+ sbbq 64(%rdi), %r8
+ movq 72(%r10), %rax
+ movq %r8, 64(%r10)
+ sbbq 72(%rdi), %rax
+ movq 80(%r10), %rcx
+ movq %rax, 72(%r10)
+ sbbq 80(%rdi), %rcx
+ movq 88(%r10), %r8
+ movq %rcx, 80(%r10)
+ sbbq 88(%rdi), %r8
+ movq 96(%r10), %rax
+ movq %r8, 88(%r10)
+ sbbq 96(%rdi), %rax
+ movq 104(%r10), %rcx
+ movq %rax, 96(%r10)
+ sbbq 104(%rdi), %rcx
+ movq 112(%r10), %r8
+ movq %rcx, 104(%r10)
+ sbbq 112(%rdi), %r8
+ movq 120(%r10), %rax
+ movq %r8, 112(%r10)
+ sbbq 120(%rdi), %rax
+ movq 128(%r10), %rcx
+ movq %rax, 120(%r10)
+ sbbq 128(%rdi), %rcx
+ movq 136(%r10), %r8
+ movq %rcx, 128(%r10)
+ sbbq 136(%rdi), %r8
+ movq 144(%r10), %rax
+ movq %r8, 136(%r10)
+ sbbq 144(%rdi), %rax
+ movq 152(%r10), %rcx
+ movq %rax, 144(%r10)
+ sbbq 152(%rdi), %rcx
+ movq 160(%r10), %r8
+ movq %rcx, 152(%r10)
+ sbbq 160(%rdi), %r8
+ movq 168(%r10), %rax
+ movq %r8, 160(%r10)
+ sbbq 168(%rdi), %rax
+ movq 176(%r10), %rcx
+ movq %rax, 168(%r10)
+ sbbq 176(%rdi), %rcx
+ movq 184(%r10), %r8
+ movq %rcx, 176(%r10)
+ sbbq 184(%rdi), %r8
+ movq 192(%r10), %rax
+ movq %r8, 184(%r10)
+ sbbq 192(%rdi), %rax
+ movq 200(%r10), %rcx
+ movq %rax, 192(%r10)
+ sbbq 200(%rdi), %rcx
+ movq 208(%r10), %r8
+ movq %rcx, 200(%r10)
+ sbbq 208(%rdi), %r8
+ movq 216(%r10), %rax
+ movq %r8, 208(%r10)
+ sbbq 216(%rdi), %rax
+ movq 224(%r10), %rcx
+ movq %rax, 216(%r10)
+ sbbq 224(%rdi), %rcx
+ movq 232(%r10), %r8
+ movq %rcx, 224(%r10)
+ sbbq 232(%rdi), %r8
+ movq 240(%r10), %rax
+ movq %r8, 232(%r10)
+ sbbq 240(%rdi), %rax
+ movq 248(%r10), %rcx
+ movq %rax, 240(%r10)
+ sbbq 248(%rdi), %rcx
+ movq 256(%r10), %r8
+ movq %rcx, 248(%r10)
+ sbbq 256(%rdi), %r8
+ movq 264(%r10), %rax
+ movq %r8, 256(%r10)
+ sbbq 264(%rdi), %rax
+ movq 272(%r10), %rcx
+ movq %rax, 264(%r10)
+ sbbq 272(%rdi), %rcx
+ movq 280(%r10), %r8
+ movq %rcx, 272(%r10)
+ sbbq 280(%rdi), %r8
+ movq 288(%r10), %rax
+ movq %r8, 280(%r10)
+ sbbq 288(%rdi), %rax
+ movq 296(%r10), %rcx
+ movq %rax, 288(%r10)
+ sbbq 296(%rdi), %rcx
+ movq 304(%r10), %r8
+ movq %rcx, 296(%r10)
+ sbbq 304(%rdi), %r8
+ movq 312(%r10), %rax
+ movq %r8, 304(%r10)
+ sbbq 312(%rdi), %rax
+ movq 320(%r10), %rcx
+ movq %rax, 312(%r10)
+ sbbq 320(%rdi), %rcx
+ movq 328(%r10), %r8
+ movq %rcx, 320(%r10)
+ sbbq 328(%rdi), %r8
+ movq 336(%r10), %rax
+ movq %r8, 328(%r10)
+ sbbq 336(%rdi), %rax
+ movq 344(%r10), %rcx
+ movq %rax, 336(%r10)
+ sbbq 344(%rdi), %rcx
+ movq 352(%r10), %r8
+ movq %rcx, 344(%r10)
+ sbbq 352(%rdi), %r8
+ movq 360(%r10), %rax
+ movq %r8, 352(%r10)
+ sbbq 360(%rdi), %rax
+ movq 368(%r10), %rcx
+ movq %rax, 360(%r10)
+ sbbq 368(%rdi), %rcx
+ movq 376(%r10), %r8
+ movq %rcx, 368(%r10)
+ sbbq 376(%rdi), %r8
+ movq 384(%r10), %rax
+ movq %r8, 376(%r10)
+ sbbq 384(%rdi), %rax
+ movq 392(%r10), %rcx
+ movq %rax, 384(%r10)
+ sbbq 392(%rdi), %rcx
+ movq 400(%r10), %r8
+ movq %rcx, 392(%r10)
+ sbbq 400(%rdi), %r8
+ movq 408(%r10), %rax
+ movq %r8, 400(%r10)
+ sbbq 408(%rdi), %rax
+ movq 416(%r10), %rcx
+ movq %rax, 408(%r10)
+ sbbq 416(%rdi), %rcx
+ movq 424(%r10), %r8
+ movq %rcx, 416(%r10)
+ sbbq 424(%rdi), %r8
+ movq 432(%r10), %rax
+ movq %r8, 424(%r10)
+ sbbq 432(%rdi), %rax
+ movq 440(%r10), %rcx
+ movq %rax, 432(%r10)
+ sbbq 440(%rdi), %rcx
+ movq 448(%r10), %r8
+ movq %rcx, 440(%r10)
+ sbbq 448(%rdi), %r8
+ movq 456(%r10), %rax
+ movq %r8, 448(%r10)
+ sbbq 456(%rdi), %rax
+ movq 464(%r10), %rcx
+ movq %rax, 456(%r10)
+ sbbq 464(%rdi), %rcx
+ movq 472(%r10), %r8
+ movq %rcx, 464(%r10)
+ sbbq 472(%rdi), %r8
+ movq 480(%r10), %rax
+ movq %r8, 472(%r10)
+ sbbq 480(%rdi), %rax
+ movq 488(%r10), %rcx
+ movq %rax, 480(%r10)
+ sbbq 488(%rdi), %rcx
+ movq 496(%r10), %r8
+ movq %rcx, 488(%r10)
+ sbbq 496(%rdi), %r8
+ movq 504(%r10), %rax
+ movq %r8, 496(%r10)
+ sbbq 504(%rdi), %rax
+ movq %rax, 504(%r10)
+ sbbq $0, %r9
+ subq $256, %r15
+ # Add
+ movq (%r15), %rax
+ addq (%r10), %rax
+ movq 8(%r15), %rcx
+ movq %rax, (%r15)
+ adcq 8(%r10), %rcx
+ movq 16(%r15), %r8
+ movq %rcx, 8(%r15)
+ adcq 16(%r10), %r8
+ movq 24(%r15), %rax
+ movq %r8, 16(%r15)
+ adcq 24(%r10), %rax
+ movq 32(%r15), %rcx
+ movq %rax, 24(%r15)
+ adcq 32(%r10), %rcx
+ movq 40(%r15), %r8
+ movq %rcx, 32(%r15)
+ adcq 40(%r10), %r8
+ movq 48(%r15), %rax
+ movq %r8, 40(%r15)
+ adcq 48(%r10), %rax
+ movq 56(%r15), %rcx
+ movq %rax, 48(%r15)
+ adcq 56(%r10), %rcx
+ movq 64(%r15), %r8
+ movq %rcx, 56(%r15)
+ adcq 64(%r10), %r8
+ movq 72(%r15), %rax
+ movq %r8, 64(%r15)
+ adcq 72(%r10), %rax
+ movq 80(%r15), %rcx
+ movq %rax, 72(%r15)
+ adcq 80(%r10), %rcx
+ movq 88(%r15), %r8
+ movq %rcx, 80(%r15)
+ adcq 88(%r10), %r8
+ movq 96(%r15), %rax
+ movq %r8, 88(%r15)
+ adcq 96(%r10), %rax
+ movq 104(%r15), %rcx
+ movq %rax, 96(%r15)
+ adcq 104(%r10), %rcx
+ movq 112(%r15), %r8
+ movq %rcx, 104(%r15)
+ adcq 112(%r10), %r8
+ movq 120(%r15), %rax
+ movq %r8, 112(%r15)
+ adcq 120(%r10), %rax
+ movq 128(%r15), %rcx
+ movq %rax, 120(%r15)
+ adcq 128(%r10), %rcx
+ movq 136(%r15), %r8
+ movq %rcx, 128(%r15)
+ adcq 136(%r10), %r8
+ movq 144(%r15), %rax
+ movq %r8, 136(%r15)
+ adcq 144(%r10), %rax
+ movq 152(%r15), %rcx
+ movq %rax, 144(%r15)
+ adcq 152(%r10), %rcx
+ movq 160(%r15), %r8
+ movq %rcx, 152(%r15)
+ adcq 160(%r10), %r8
+ movq 168(%r15), %rax
+ movq %r8, 160(%r15)
+ adcq 168(%r10), %rax
+ movq 176(%r15), %rcx
+ movq %rax, 168(%r15)
+ adcq 176(%r10), %rcx
+ movq 184(%r15), %r8
+ movq %rcx, 176(%r15)
+ adcq 184(%r10), %r8
+ movq 192(%r15), %rax
+ movq %r8, 184(%r15)
+ adcq 192(%r10), %rax
+ movq 200(%r15), %rcx
+ movq %rax, 192(%r15)
+ adcq 200(%r10), %rcx
+ movq 208(%r15), %r8
+ movq %rcx, 200(%r15)
+ adcq 208(%r10), %r8
+ movq 216(%r15), %rax
+ movq %r8, 208(%r15)
+ adcq 216(%r10), %rax
+ movq 224(%r15), %rcx
+ movq %rax, 216(%r15)
+ adcq 224(%r10), %rcx
+ movq 232(%r15), %r8
+ movq %rcx, 224(%r15)
+ adcq 232(%r10), %r8
+ movq 240(%r15), %rax
+ movq %r8, 232(%r15)
+ adcq 240(%r10), %rax
+ movq 248(%r15), %rcx
+ movq %rax, 240(%r15)
+ adcq 248(%r10), %rcx
+ movq 256(%r15), %r8
+ movq %rcx, 248(%r15)
+ adcq 256(%r10), %r8
+ movq 264(%r15), %rax
+ movq %r8, 256(%r15)
+ adcq 264(%r10), %rax
+ movq 272(%r15), %rcx
+ movq %rax, 264(%r15)
+ adcq 272(%r10), %rcx
+ movq 280(%r15), %r8
+ movq %rcx, 272(%r15)
+ adcq 280(%r10), %r8
+ movq 288(%r15), %rax
+ movq %r8, 280(%r15)
+ adcq 288(%r10), %rax
+ movq 296(%r15), %rcx
+ movq %rax, 288(%r15)
+ adcq 296(%r10), %rcx
+ movq 304(%r15), %r8
+ movq %rcx, 296(%r15)
+ adcq 304(%r10), %r8
+ movq 312(%r15), %rax
+ movq %r8, 304(%r15)
+ adcq 312(%r10), %rax
+ movq 320(%r15), %rcx
+ movq %rax, 312(%r15)
+ adcq 320(%r10), %rcx
+ movq 328(%r15), %r8
+ movq %rcx, 320(%r15)
+ adcq 328(%r10), %r8
+ movq 336(%r15), %rax
+ movq %r8, 328(%r15)
+ adcq 336(%r10), %rax
+ movq 344(%r15), %rcx
+ movq %rax, 336(%r15)
+ adcq 344(%r10), %rcx
+ movq 352(%r15), %r8
+ movq %rcx, 344(%r15)
+ adcq 352(%r10), %r8
+ movq 360(%r15), %rax
+ movq %r8, 352(%r15)
+ adcq 360(%r10), %rax
+ movq 368(%r15), %rcx
+ movq %rax, 360(%r15)
+ adcq 368(%r10), %rcx
+ movq 376(%r15), %r8
+ movq %rcx, 368(%r15)
+ adcq 376(%r10), %r8
+ movq 384(%r15), %rax
+ movq %r8, 376(%r15)
+ adcq 384(%r10), %rax
+ movq 392(%r15), %rcx
+ movq %rax, 384(%r15)
+ adcq 392(%r10), %rcx
+ movq 400(%r15), %r8
+ movq %rcx, 392(%r15)
+ adcq 400(%r10), %r8
+ movq 408(%r15), %rax
+ movq %r8, 400(%r15)
+ adcq 408(%r10), %rax
+ movq 416(%r15), %rcx
+ movq %rax, 408(%r15)
+ adcq 416(%r10), %rcx
+ movq 424(%r15), %r8
+ movq %rcx, 416(%r15)
+ adcq 424(%r10), %r8
+ movq 432(%r15), %rax
+ movq %r8, 424(%r15)
+ adcq 432(%r10), %rax
+ movq 440(%r15), %rcx
+ movq %rax, 432(%r15)
+ adcq 440(%r10), %rcx
+ movq 448(%r15), %r8
+ movq %rcx, 440(%r15)
+ adcq 448(%r10), %r8
+ movq 456(%r15), %rax
+ movq %r8, 448(%r15)
+ adcq 456(%r10), %rax
+ movq 464(%r15), %rcx
+ movq %rax, 456(%r15)
+ adcq 464(%r10), %rcx
+ movq 472(%r15), %r8
+ movq %rcx, 464(%r15)
+ adcq 472(%r10), %r8
+ movq 480(%r15), %rax
+ movq %r8, 472(%r15)
+ adcq 480(%r10), %rax
+ movq 488(%r15), %rcx
+ movq %rax, 480(%r15)
+ adcq 488(%r10), %rcx
+ movq 496(%r15), %r8
+ movq %rcx, 488(%r15)
+ adcq 496(%r10), %r8
+ movq 504(%r15), %rax
+ movq %r8, 496(%r15)
+ adcq 504(%r10), %rax
+ movq %rax, 504(%r15)
+ adcq $0, %r9
+ movq %r9, 768(%rdi)
+ addq $256, %r15
+ # Add
+ movq (%r15), %rax
+ xorq %r9, %r9
+ addq (%r11), %rax
+ movq 8(%r15), %rcx
+ movq %rax, (%r15)
+ adcq 8(%r11), %rcx
+ movq 16(%r15), %r8
+ movq %rcx, 8(%r15)
+ adcq 16(%r11), %r8
+ movq 24(%r15), %rax
+ movq %r8, 16(%r15)
+ adcq 24(%r11), %rax
+ movq 32(%r15), %rcx
+ movq %rax, 24(%r15)
+ adcq 32(%r11), %rcx
+ movq 40(%r15), %r8
+ movq %rcx, 32(%r15)
+ adcq 40(%r11), %r8
+ movq 48(%r15), %rax
+ movq %r8, 40(%r15)
+ adcq 48(%r11), %rax
+ movq 56(%r15), %rcx
+ movq %rax, 48(%r15)
+ adcq 56(%r11), %rcx
+ movq 64(%r15), %r8
+ movq %rcx, 56(%r15)
+ adcq 64(%r11), %r8
+ movq 72(%r15), %rax
+ movq %r8, 64(%r15)
+ adcq 72(%r11), %rax
+ movq 80(%r15), %rcx
+ movq %rax, 72(%r15)
+ adcq 80(%r11), %rcx
+ movq 88(%r15), %r8
+ movq %rcx, 80(%r15)
+ adcq 88(%r11), %r8
+ movq 96(%r15), %rax
+ movq %r8, 88(%r15)
+ adcq 96(%r11), %rax
+ movq 104(%r15), %rcx
+ movq %rax, 96(%r15)
+ adcq 104(%r11), %rcx
+ movq 112(%r15), %r8
+ movq %rcx, 104(%r15)
+ adcq 112(%r11), %r8
+ movq 120(%r15), %rax
+ movq %r8, 112(%r15)
+ adcq 120(%r11), %rax
+ movq 128(%r15), %rcx
+ movq %rax, 120(%r15)
+ adcq 128(%r11), %rcx
+ movq 136(%r15), %r8
+ movq %rcx, 128(%r15)
+ adcq 136(%r11), %r8
+ movq 144(%r15), %rax
+ movq %r8, 136(%r15)
+ adcq 144(%r11), %rax
+ movq 152(%r15), %rcx
+ movq %rax, 144(%r15)
+ adcq 152(%r11), %rcx
+ movq 160(%r15), %r8
+ movq %rcx, 152(%r15)
+ adcq 160(%r11), %r8
+ movq 168(%r15), %rax
+ movq %r8, 160(%r15)
+ adcq 168(%r11), %rax
+ movq 176(%r15), %rcx
+ movq %rax, 168(%r15)
+ adcq 176(%r11), %rcx
+ movq 184(%r15), %r8
+ movq %rcx, 176(%r15)
+ adcq 184(%r11), %r8
+ movq 192(%r15), %rax
+ movq %r8, 184(%r15)
+ adcq 192(%r11), %rax
+ movq 200(%r15), %rcx
+ movq %rax, 192(%r15)
+ adcq 200(%r11), %rcx
+ movq 208(%r15), %r8
+ movq %rcx, 200(%r15)
+ adcq 208(%r11), %r8
+ movq 216(%r15), %rax
+ movq %r8, 208(%r15)
+ adcq 216(%r11), %rax
+ movq 224(%r15), %rcx
+ movq %rax, 216(%r15)
+ adcq 224(%r11), %rcx
+ movq 232(%r15), %r8
+ movq %rcx, 224(%r15)
+ adcq 232(%r11), %r8
+ movq 240(%r15), %rax
+ movq %r8, 232(%r15)
+ adcq 240(%r11), %rax
+ movq 248(%r15), %rcx
+ movq %rax, 240(%r15)
+ adcq 248(%r11), %rcx
+ movq 256(%r15), %r8
+ movq %rcx, 248(%r15)
+ adcq 256(%r11), %r8
+ movq %r8, 256(%r15)
+ adcq $0, %r9
+ # Add to zero
+ movq 264(%r11), %rax
+ adcq $0, %rax
+ movq 272(%r11), %rcx
+ movq %rax, 264(%r15)
+ adcq $0, %rcx
+ movq 280(%r11), %r8
+ movq %rcx, 272(%r15)
+ adcq $0, %r8
+ movq 288(%r11), %rax
+ movq %r8, 280(%r15)
+ adcq $0, %rax
+ movq 296(%r11), %rcx
+ movq %rax, 288(%r15)
+ adcq $0, %rcx
+ movq 304(%r11), %r8
+ movq %rcx, 296(%r15)
+ adcq $0, %r8
+ movq 312(%r11), %rax
+ movq %r8, 304(%r15)
+ adcq $0, %rax
+ movq 320(%r11), %rcx
+ movq %rax, 312(%r15)
+ adcq $0, %rcx
+ movq 328(%r11), %r8
+ movq %rcx, 320(%r15)
+ adcq $0, %r8
+ movq 336(%r11), %rax
+ movq %r8, 328(%r15)
+ adcq $0, %rax
+ movq 344(%r11), %rcx
+ movq %rax, 336(%r15)
+ adcq $0, %rcx
+ movq 352(%r11), %r8
+ movq %rcx, 344(%r15)
+ adcq $0, %r8
+ movq 360(%r11), %rax
+ movq %r8, 352(%r15)
+ adcq $0, %rax
+ movq 368(%r11), %rcx
+ movq %rax, 360(%r15)
+ adcq $0, %rcx
+ movq 376(%r11), %r8
+ movq %rcx, 368(%r15)
+ adcq $0, %r8
+ movq 384(%r11), %rax
+ movq %r8, 376(%r15)
+ adcq $0, %rax
+ movq 392(%r11), %rcx
+ movq %rax, 384(%r15)
+ adcq $0, %rcx
+ movq 400(%r11), %r8
+ movq %rcx, 392(%r15)
+ adcq $0, %r8
+ movq 408(%r11), %rax
+ movq %r8, 400(%r15)
+ adcq $0, %rax
+ movq 416(%r11), %rcx
+ movq %rax, 408(%r15)
+ adcq $0, %rcx
+ movq 424(%r11), %r8
+ movq %rcx, 416(%r15)
+ adcq $0, %r8
+ movq 432(%r11), %rax
+ movq %r8, 424(%r15)
+ adcq $0, %rax
+ movq 440(%r11), %rcx
+ movq %rax, 432(%r15)
+ adcq $0, %rcx
+ movq 448(%r11), %r8
+ movq %rcx, 440(%r15)
+ adcq $0, %r8
+ movq 456(%r11), %rax
+ movq %r8, 448(%r15)
+ adcq $0, %rax
+ movq 464(%r11), %rcx
+ movq %rax, 456(%r15)
+ adcq $0, %rcx
+ movq 472(%r11), %r8
+ movq %rcx, 464(%r15)
+ adcq $0, %r8
+ movq 480(%r11), %rax
+ movq %r8, 472(%r15)
+ adcq $0, %rax
+ movq 488(%r11), %rcx
+ movq %rax, 480(%r15)
+ adcq $0, %rcx
+ movq 496(%r11), %r8
+ movq %rcx, 488(%r15)
+ adcq $0, %r8
+ movq 504(%r11), %rax
+ movq %r8, 496(%r15)
+ adcq $0, %rax
+ movq %rax, 504(%r15)
+ addq $1576, %rsp
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_4096_mul_64,.-sp_4096_mul_64
+#endif /* __APPLE__ */
+/* Add a to a into r. (r = a + a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_2048_dbl_32
+.type sp_2048_dbl_32,@function
+.align 16
+sp_2048_dbl_32:
+#else
+.globl _sp_2048_dbl_32
+.p2align 4
+_sp_2048_dbl_32:
+#endif /* __APPLE__ */
+ movq (%rsi), %rdx
+ xorq %rax, %rax
+ addq %rdx, %rdx
+ movq 8(%rsi), %rcx
+ movq %rdx, (%rdi)
+ adcq %rcx, %rcx
+ movq 16(%rsi), %rdx
+ movq %rcx, 8(%rdi)
+ adcq %rdx, %rdx
+ movq 24(%rsi), %rcx
+ movq %rdx, 16(%rdi)
+ adcq %rcx, %rcx
+ movq 32(%rsi), %rdx
+ movq %rcx, 24(%rdi)
+ adcq %rdx, %rdx
+ movq 40(%rsi), %rcx
+ movq %rdx, 32(%rdi)
+ adcq %rcx, %rcx
+ movq 48(%rsi), %rdx
+ movq %rcx, 40(%rdi)
+ adcq %rdx, %rdx
+ movq 56(%rsi), %rcx
+ movq %rdx, 48(%rdi)
+ adcq %rcx, %rcx
+ movq 64(%rsi), %rdx
+ movq %rcx, 56(%rdi)
+ adcq %rdx, %rdx
+ movq 72(%rsi), %rcx
+ movq %rdx, 64(%rdi)
+ adcq %rcx, %rcx
+ movq 80(%rsi), %rdx
+ movq %rcx, 72(%rdi)
+ adcq %rdx, %rdx
+ movq 88(%rsi), %rcx
+ movq %rdx, 80(%rdi)
+ adcq %rcx, %rcx
+ movq 96(%rsi), %rdx
+ movq %rcx, 88(%rdi)
+ adcq %rdx, %rdx
+ movq 104(%rsi), %rcx
+ movq %rdx, 96(%rdi)
+ adcq %rcx, %rcx
+ movq 112(%rsi), %rdx
+ movq %rcx, 104(%rdi)
+ adcq %rdx, %rdx
+ movq 120(%rsi), %rcx
+ movq %rdx, 112(%rdi)
+ adcq %rcx, %rcx
+ movq 128(%rsi), %rdx
+ movq %rcx, 120(%rdi)
+ adcq %rdx, %rdx
+ movq 136(%rsi), %rcx
+ movq %rdx, 128(%rdi)
+ adcq %rcx, %rcx
+ movq 144(%rsi), %rdx
+ movq %rcx, 136(%rdi)
+ adcq %rdx, %rdx
+ movq 152(%rsi), %rcx
+ movq %rdx, 144(%rdi)
+ adcq %rcx, %rcx
+ movq 160(%rsi), %rdx
+ movq %rcx, 152(%rdi)
+ adcq %rdx, %rdx
+ movq 168(%rsi), %rcx
+ movq %rdx, 160(%rdi)
+ adcq %rcx, %rcx
+ movq 176(%rsi), %rdx
+ movq %rcx, 168(%rdi)
+ adcq %rdx, %rdx
+ movq 184(%rsi), %rcx
+ movq %rdx, 176(%rdi)
+ adcq %rcx, %rcx
+ movq 192(%rsi), %rdx
+ movq %rcx, 184(%rdi)
+ adcq %rdx, %rdx
+ movq 200(%rsi), %rcx
+ movq %rdx, 192(%rdi)
+ adcq %rcx, %rcx
+ movq 208(%rsi), %rdx
+ movq %rcx, 200(%rdi)
+ adcq %rdx, %rdx
+ movq 216(%rsi), %rcx
+ movq %rdx, 208(%rdi)
+ adcq %rcx, %rcx
+ movq 224(%rsi), %rdx
+ movq %rcx, 216(%rdi)
+ adcq %rdx, %rdx
+ movq 232(%rsi), %rcx
+ movq %rdx, 224(%rdi)
+ adcq %rcx, %rcx
+ movq 240(%rsi), %rdx
+ movq %rcx, 232(%rdi)
+ adcq %rdx, %rdx
+ movq 248(%rsi), %rcx
+ movq %rdx, 240(%rdi)
+ adcq %rcx, %rcx
+ movq %rcx, 248(%rdi)
+ adcq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_2048_dbl_32,.-sp_2048_dbl_32
+#endif /* __APPLE__ */
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_4096_sqr_64
+.type sp_4096_sqr_64,@function
+.align 16
+sp_4096_sqr_64:
+#else
+.globl _sp_4096_sqr_64
+.p2align 4
+_sp_4096_sqr_64:
+#endif /* __APPLE__ */
+ subq $1304, %rsp
+ movq %rdi, 1280(%rsp)
+ movq %rsi, 1288(%rsp)
+ leaq 1024(%rsp), %r8
+ leaq 256(%rsi), %r9
+ # Add
+ movq (%rsi), %rdx
+ xorq %rcx, %rcx
+ addq (%r9), %rdx
+ movq 8(%rsi), %rax
+ movq %rdx, (%r8)
+ adcq 8(%r9), %rax
+ movq 16(%rsi), %rdx
+ movq %rax, 8(%r8)
+ adcq 16(%r9), %rdx
+ movq 24(%rsi), %rax
+ movq %rdx, 16(%r8)
+ adcq 24(%r9), %rax
+ movq 32(%rsi), %rdx
+ movq %rax, 24(%r8)
+ adcq 32(%r9), %rdx
+ movq 40(%rsi), %rax
+ movq %rdx, 32(%r8)
+ adcq 40(%r9), %rax
+ movq 48(%rsi), %rdx
+ movq %rax, 40(%r8)
+ adcq 48(%r9), %rdx
+ movq 56(%rsi), %rax
+ movq %rdx, 48(%r8)
+ adcq 56(%r9), %rax
+ movq 64(%rsi), %rdx
+ movq %rax, 56(%r8)
+ adcq 64(%r9), %rdx
+ movq 72(%rsi), %rax
+ movq %rdx, 64(%r8)
+ adcq 72(%r9), %rax
+ movq 80(%rsi), %rdx
+ movq %rax, 72(%r8)
+ adcq 80(%r9), %rdx
+ movq 88(%rsi), %rax
+ movq %rdx, 80(%r8)
+ adcq 88(%r9), %rax
+ movq 96(%rsi), %rdx
+ movq %rax, 88(%r8)
+ adcq 96(%r9), %rdx
+ movq 104(%rsi), %rax
+ movq %rdx, 96(%r8)
+ adcq 104(%r9), %rax
+ movq 112(%rsi), %rdx
+ movq %rax, 104(%r8)
+ adcq 112(%r9), %rdx
+ movq 120(%rsi), %rax
+ movq %rdx, 112(%r8)
+ adcq 120(%r9), %rax
+ movq 128(%rsi), %rdx
+ movq %rax, 120(%r8)
+ adcq 128(%r9), %rdx
+ movq 136(%rsi), %rax
+ movq %rdx, 128(%r8)
+ adcq 136(%r9), %rax
+ movq 144(%rsi), %rdx
+ movq %rax, 136(%r8)
+ adcq 144(%r9), %rdx
+ movq 152(%rsi), %rax
+ movq %rdx, 144(%r8)
+ adcq 152(%r9), %rax
+ movq 160(%rsi), %rdx
+ movq %rax, 152(%r8)
+ adcq 160(%r9), %rdx
+ movq 168(%rsi), %rax
+ movq %rdx, 160(%r8)
+ adcq 168(%r9), %rax
+ movq 176(%rsi), %rdx
+ movq %rax, 168(%r8)
+ adcq 176(%r9), %rdx
+ movq 184(%rsi), %rax
+ movq %rdx, 176(%r8)
+ adcq 184(%r9), %rax
+ movq 192(%rsi), %rdx
+ movq %rax, 184(%r8)
+ adcq 192(%r9), %rdx
+ movq 200(%rsi), %rax
+ movq %rdx, 192(%r8)
+ adcq 200(%r9), %rax
+ movq 208(%rsi), %rdx
+ movq %rax, 200(%r8)
+ adcq 208(%r9), %rdx
+ movq 216(%rsi), %rax
+ movq %rdx, 208(%r8)
+ adcq 216(%r9), %rax
+ movq 224(%rsi), %rdx
+ movq %rax, 216(%r8)
+ adcq 224(%r9), %rdx
+ movq 232(%rsi), %rax
+ movq %rdx, 224(%r8)
+ adcq 232(%r9), %rax
+ movq 240(%rsi), %rdx
+ movq %rax, 232(%r8)
+ adcq 240(%r9), %rdx
+ movq 248(%rsi), %rax
+ movq %rdx, 240(%r8)
+ adcq 248(%r9), %rax
+ movq %rax, 248(%r8)
+ adcq $0, %rcx
+ movq %rcx, 1296(%rsp)
+ movq %r8, %rsi
+ movq %rsp, %rdi
+#ifndef __APPLE__
+ callq sp_2048_sqr_32@plt
+#else
+ callq _sp_2048_sqr_32
+#endif /* __APPLE__ */
+ movq 1288(%rsp), %rsi
+ leaq 512(%rsp), %rdi
+ addq $256, %rsi
+#ifndef __APPLE__
+ callq sp_2048_sqr_32@plt
+#else
+ callq _sp_2048_sqr_32
+#endif /* __APPLE__ */
+ movq 1288(%rsp), %rsi
+ movq 1280(%rsp), %rdi
+#ifndef __APPLE__
+ callq sp_2048_sqr_32@plt
+#else
+ callq _sp_2048_sqr_32
+#endif /* __APPLE__ */
+ movq 1296(%rsp), %r10
+ leaq 1024(%rsp), %r8
+ movq %r10, %rcx
+ negq %r10
+ movq (%r8), %rdx
+ movq 8(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 512(%rdi)
+ movq %rax, 520(%rdi)
+ movq 16(%r8), %rdx
+ movq 24(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 528(%rdi)
+ movq %rax, 536(%rdi)
+ movq 32(%r8), %rdx
+ movq 40(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 544(%rdi)
+ movq %rax, 552(%rdi)
+ movq 48(%r8), %rdx
+ movq 56(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 560(%rdi)
+ movq %rax, 568(%rdi)
+ movq 64(%r8), %rdx
+ movq 72(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 576(%rdi)
+ movq %rax, 584(%rdi)
+ movq 80(%r8), %rdx
+ movq 88(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 592(%rdi)
+ movq %rax, 600(%rdi)
+ movq 96(%r8), %rdx
+ movq 104(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 608(%rdi)
+ movq %rax, 616(%rdi)
+ movq 112(%r8), %rdx
+ movq 120(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 624(%rdi)
+ movq %rax, 632(%rdi)
+ movq 128(%r8), %rdx
+ movq 136(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 640(%rdi)
+ movq %rax, 648(%rdi)
+ movq 144(%r8), %rdx
+ movq 152(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 656(%rdi)
+ movq %rax, 664(%rdi)
+ movq 160(%r8), %rdx
+ movq 168(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 672(%rdi)
+ movq %rax, 680(%rdi)
+ movq 176(%r8), %rdx
+ movq 184(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 688(%rdi)
+ movq %rax, 696(%rdi)
+ movq 192(%r8), %rdx
+ movq 200(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 704(%rdi)
+ movq %rax, 712(%rdi)
+ movq 208(%r8), %rdx
+ movq 216(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 720(%rdi)
+ movq %rax, 728(%rdi)
+ movq 224(%r8), %rdx
+ movq 232(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 736(%rdi)
+ movq %rax, 744(%rdi)
+ movq 240(%r8), %rdx
+ movq 248(%r8), %rax
+ andq %r10, %rdx
+ andq %r10, %rax
+ movq %rdx, 752(%rdi)
+ movq %rax, 760(%rdi)
+ movq 512(%rdi), %rdx
+ addq %rdx, %rdx
+ movq 520(%rdi), %rax
+ movq %rdx, 512(%rdi)
+ adcq %rax, %rax
+ movq 528(%rdi), %rdx
+ movq %rax, 520(%rdi)
+ adcq %rdx, %rdx
+ movq 536(%rdi), %rax
+ movq %rdx, 528(%rdi)
+ adcq %rax, %rax
+ movq 544(%rdi), %rdx
+ movq %rax, 536(%rdi)
+ adcq %rdx, %rdx
+ movq 552(%rdi), %rax
+ movq %rdx, 544(%rdi)
+ adcq %rax, %rax
+ movq 560(%rdi), %rdx
+ movq %rax, 552(%rdi)
+ adcq %rdx, %rdx
+ movq 568(%rdi), %rax
+ movq %rdx, 560(%rdi)
+ adcq %rax, %rax
+ movq 576(%rdi), %rdx
+ movq %rax, 568(%rdi)
+ adcq %rdx, %rdx
+ movq 584(%rdi), %rax
+ movq %rdx, 576(%rdi)
+ adcq %rax, %rax
+ movq 592(%rdi), %rdx
+ movq %rax, 584(%rdi)
+ adcq %rdx, %rdx
+ movq 600(%rdi), %rax
+ movq %rdx, 592(%rdi)
+ adcq %rax, %rax
+ movq 608(%rdi), %rdx
+ movq %rax, 600(%rdi)
+ adcq %rdx, %rdx
+ movq 616(%rdi), %rax
+ movq %rdx, 608(%rdi)
+ adcq %rax, %rax
+ movq 624(%rdi), %rdx
+ movq %rax, 616(%rdi)
+ adcq %rdx, %rdx
+ movq 632(%rdi), %rax
+ movq %rdx, 624(%rdi)
+ adcq %rax, %rax
+ movq 640(%rdi), %rdx
+ movq %rax, 632(%rdi)
+ adcq %rdx, %rdx
+ movq 648(%rdi), %rax
+ movq %rdx, 640(%rdi)
+ adcq %rax, %rax
+ movq 656(%rdi), %rdx
+ movq %rax, 648(%rdi)
+ adcq %rdx, %rdx
+ movq 664(%rdi), %rax
+ movq %rdx, 656(%rdi)
+ adcq %rax, %rax
+ movq 672(%rdi), %rdx
+ movq %rax, 664(%rdi)
+ adcq %rdx, %rdx
+ movq 680(%rdi), %rax
+ movq %rdx, 672(%rdi)
+ adcq %rax, %rax
+ movq 688(%rdi), %rdx
+ movq %rax, 680(%rdi)
+ adcq %rdx, %rdx
+ movq 696(%rdi), %rax
+ movq %rdx, 688(%rdi)
+ adcq %rax, %rax
+ movq 704(%rdi), %rdx
+ movq %rax, 696(%rdi)
+ adcq %rdx, %rdx
+ movq 712(%rdi), %rax
+ movq %rdx, 704(%rdi)
+ adcq %rax, %rax
+ movq 720(%rdi), %rdx
+ movq %rax, 712(%rdi)
+ adcq %rdx, %rdx
+ movq 728(%rdi), %rax
+ movq %rdx, 720(%rdi)
+ adcq %rax, %rax
+ movq 736(%rdi), %rdx
+ movq %rax, 728(%rdi)
+ adcq %rdx, %rdx
+ movq 744(%rdi), %rax
+ movq %rdx, 736(%rdi)
+ adcq %rax, %rax
+ movq 752(%rdi), %rdx
+ movq %rax, 744(%rdi)
+ adcq %rdx, %rdx
+ movq 760(%rdi), %rax
+ movq %rdx, 752(%rdi)
+ adcq %rax, %rax
+ movq %rax, 760(%rdi)
+ adcq $0, %rcx
+ leaq 512(%rsp), %rsi
+ movq %rsp, %r8
+ movq (%r8), %rdx
+ subq (%rsi), %rdx
+ movq 8(%r8), %rax
+ movq %rdx, (%r8)
+ sbbq 8(%rsi), %rax
+ movq 16(%r8), %rdx
+ movq %rax, 8(%r8)
+ sbbq 16(%rsi), %rdx
+ movq 24(%r8), %rax
+ movq %rdx, 16(%r8)
+ sbbq 24(%rsi), %rax
+ movq 32(%r8), %rdx
+ movq %rax, 24(%r8)
+ sbbq 32(%rsi), %rdx
+ movq 40(%r8), %rax
+ movq %rdx, 32(%r8)
+ sbbq 40(%rsi), %rax
+ movq 48(%r8), %rdx
+ movq %rax, 40(%r8)
+ sbbq 48(%rsi), %rdx
+ movq 56(%r8), %rax
+ movq %rdx, 48(%r8)
+ sbbq 56(%rsi), %rax
+ movq 64(%r8), %rdx
+ movq %rax, 56(%r8)
+ sbbq 64(%rsi), %rdx
+ movq 72(%r8), %rax
+ movq %rdx, 64(%r8)
+ sbbq 72(%rsi), %rax
+ movq 80(%r8), %rdx
+ movq %rax, 72(%r8)
+ sbbq 80(%rsi), %rdx
+ movq 88(%r8), %rax
+ movq %rdx, 80(%r8)
+ sbbq 88(%rsi), %rax
+ movq 96(%r8), %rdx
+ movq %rax, 88(%r8)
+ sbbq 96(%rsi), %rdx
+ movq 104(%r8), %rax
+ movq %rdx, 96(%r8)
+ sbbq 104(%rsi), %rax
+ movq 112(%r8), %rdx
+ movq %rax, 104(%r8)
+ sbbq 112(%rsi), %rdx
+ movq 120(%r8), %rax
+ movq %rdx, 112(%r8)
+ sbbq 120(%rsi), %rax
+ movq 128(%r8), %rdx
+ movq %rax, 120(%r8)
+ sbbq 128(%rsi), %rdx
+ movq 136(%r8), %rax
+ movq %rdx, 128(%r8)
+ sbbq 136(%rsi), %rax
+ movq 144(%r8), %rdx
+ movq %rax, 136(%r8)
+ sbbq 144(%rsi), %rdx
+ movq 152(%r8), %rax
+ movq %rdx, 144(%r8)
+ sbbq 152(%rsi), %rax
+ movq 160(%r8), %rdx
+ movq %rax, 152(%r8)
+ sbbq 160(%rsi), %rdx
+ movq 168(%r8), %rax
+ movq %rdx, 160(%r8)
+ sbbq 168(%rsi), %rax
+ movq 176(%r8), %rdx
+ movq %rax, 168(%r8)
+ sbbq 176(%rsi), %rdx
+ movq 184(%r8), %rax
+ movq %rdx, 176(%r8)
+ sbbq 184(%rsi), %rax
+ movq 192(%r8), %rdx
+ movq %rax, 184(%r8)
+ sbbq 192(%rsi), %rdx
+ movq 200(%r8), %rax
+ movq %rdx, 192(%r8)
+ sbbq 200(%rsi), %rax
+ movq 208(%r8), %rdx
+ movq %rax, 200(%r8)
+ sbbq 208(%rsi), %rdx
+ movq 216(%r8), %rax
+ movq %rdx, 208(%r8)
+ sbbq 216(%rsi), %rax
+ movq 224(%r8), %rdx
+ movq %rax, 216(%r8)
+ sbbq 224(%rsi), %rdx
+ movq 232(%r8), %rax
+ movq %rdx, 224(%r8)
+ sbbq 232(%rsi), %rax
+ movq 240(%r8), %rdx
+ movq %rax, 232(%r8)
+ sbbq 240(%rsi), %rdx
+ movq 248(%r8), %rax
+ movq %rdx, 240(%r8)
+ sbbq 248(%rsi), %rax
+ movq 256(%r8), %rdx
+ movq %rax, 248(%r8)
+ sbbq 256(%rsi), %rdx
+ movq 264(%r8), %rax
+ movq %rdx, 256(%r8)
+ sbbq 264(%rsi), %rax
+ movq 272(%r8), %rdx
+ movq %rax, 264(%r8)
+ sbbq 272(%rsi), %rdx
+ movq 280(%r8), %rax
+ movq %rdx, 272(%r8)
+ sbbq 280(%rsi), %rax
+ movq 288(%r8), %rdx
+ movq %rax, 280(%r8)
+ sbbq 288(%rsi), %rdx
+ movq 296(%r8), %rax
+ movq %rdx, 288(%r8)
+ sbbq 296(%rsi), %rax
+ movq 304(%r8), %rdx
+ movq %rax, 296(%r8)
+ sbbq 304(%rsi), %rdx
+ movq 312(%r8), %rax
+ movq %rdx, 304(%r8)
+ sbbq 312(%rsi), %rax
+ movq 320(%r8), %rdx
+ movq %rax, 312(%r8)
+ sbbq 320(%rsi), %rdx
+ movq 328(%r8), %rax
+ movq %rdx, 320(%r8)
+ sbbq 328(%rsi), %rax
+ movq 336(%r8), %rdx
+ movq %rax, 328(%r8)
+ sbbq 336(%rsi), %rdx
+ movq 344(%r8), %rax
+ movq %rdx, 336(%r8)
+ sbbq 344(%rsi), %rax
+ movq 352(%r8), %rdx
+ movq %rax, 344(%r8)
+ sbbq 352(%rsi), %rdx
+ movq 360(%r8), %rax
+ movq %rdx, 352(%r8)
+ sbbq 360(%rsi), %rax
+ movq 368(%r8), %rdx
+ movq %rax, 360(%r8)
+ sbbq 368(%rsi), %rdx
+ movq 376(%r8), %rax
+ movq %rdx, 368(%r8)
+ sbbq 376(%rsi), %rax
+ movq 384(%r8), %rdx
+ movq %rax, 376(%r8)
+ sbbq 384(%rsi), %rdx
+ movq 392(%r8), %rax
+ movq %rdx, 384(%r8)
+ sbbq 392(%rsi), %rax
+ movq 400(%r8), %rdx
+ movq %rax, 392(%r8)
+ sbbq 400(%rsi), %rdx
+ movq 408(%r8), %rax
+ movq %rdx, 400(%r8)
+ sbbq 408(%rsi), %rax
+ movq 416(%r8), %rdx
+ movq %rax, 408(%r8)
+ sbbq 416(%rsi), %rdx
+ movq 424(%r8), %rax
+ movq %rdx, 416(%r8)
+ sbbq 424(%rsi), %rax
+ movq 432(%r8), %rdx
+ movq %rax, 424(%r8)
+ sbbq 432(%rsi), %rdx
+ movq 440(%r8), %rax
+ movq %rdx, 432(%r8)
+ sbbq 440(%rsi), %rax
+ movq 448(%r8), %rdx
+ movq %rax, 440(%r8)
+ sbbq 448(%rsi), %rdx
+ movq 456(%r8), %rax
+ movq %rdx, 448(%r8)
+ sbbq 456(%rsi), %rax
+ movq 464(%r8), %rdx
+ movq %rax, 456(%r8)
+ sbbq 464(%rsi), %rdx
+ movq 472(%r8), %rax
+ movq %rdx, 464(%r8)
+ sbbq 472(%rsi), %rax
+ movq 480(%r8), %rdx
+ movq %rax, 472(%r8)
+ sbbq 480(%rsi), %rdx
+ movq 488(%r8), %rax
+ movq %rdx, 480(%r8)
+ sbbq 488(%rsi), %rax
+ movq 496(%r8), %rdx
+ movq %rax, 488(%r8)
+ sbbq 496(%rsi), %rdx
+ movq 504(%r8), %rax
+ movq %rdx, 496(%r8)
+ sbbq 504(%rsi), %rax
+ movq %rax, 504(%r8)
+ sbbq $0, %rcx
+ movq (%r8), %rdx
+ subq (%rdi), %rdx
+ movq 8(%r8), %rax
+ movq %rdx, (%r8)
+ sbbq 8(%rdi), %rax
+ movq 16(%r8), %rdx
+ movq %rax, 8(%r8)
+ sbbq 16(%rdi), %rdx
+ movq 24(%r8), %rax
+ movq %rdx, 16(%r8)
+ sbbq 24(%rdi), %rax
+ movq 32(%r8), %rdx
+ movq %rax, 24(%r8)
+ sbbq 32(%rdi), %rdx
+ movq 40(%r8), %rax
+ movq %rdx, 32(%r8)
+ sbbq 40(%rdi), %rax
+ movq 48(%r8), %rdx
+ movq %rax, 40(%r8)
+ sbbq 48(%rdi), %rdx
+ movq 56(%r8), %rax
+ movq %rdx, 48(%r8)
+ sbbq 56(%rdi), %rax
+ movq 64(%r8), %rdx
+ movq %rax, 56(%r8)
+ sbbq 64(%rdi), %rdx
+ movq 72(%r8), %rax
+ movq %rdx, 64(%r8)
+ sbbq 72(%rdi), %rax
+ movq 80(%r8), %rdx
+ movq %rax, 72(%r8)
+ sbbq 80(%rdi), %rdx
+ movq 88(%r8), %rax
+ movq %rdx, 80(%r8)
+ sbbq 88(%rdi), %rax
+ movq 96(%r8), %rdx
+ movq %rax, 88(%r8)
+ sbbq 96(%rdi), %rdx
+ movq 104(%r8), %rax
+ movq %rdx, 96(%r8)
+ sbbq 104(%rdi), %rax
+ movq 112(%r8), %rdx
+ movq %rax, 104(%r8)
+ sbbq 112(%rdi), %rdx
+ movq 120(%r8), %rax
+ movq %rdx, 112(%r8)
+ sbbq 120(%rdi), %rax
+ movq 128(%r8), %rdx
+ movq %rax, 120(%r8)
+ sbbq 128(%rdi), %rdx
+ movq 136(%r8), %rax
+ movq %rdx, 128(%r8)
+ sbbq 136(%rdi), %rax
+ movq 144(%r8), %rdx
+ movq %rax, 136(%r8)
+ sbbq 144(%rdi), %rdx
+ movq 152(%r8), %rax
+ movq %rdx, 144(%r8)
+ sbbq 152(%rdi), %rax
+ movq 160(%r8), %rdx
+ movq %rax, 152(%r8)
+ sbbq 160(%rdi), %rdx
+ movq 168(%r8), %rax
+ movq %rdx, 160(%r8)
+ sbbq 168(%rdi), %rax
+ movq 176(%r8), %rdx
+ movq %rax, 168(%r8)
+ sbbq 176(%rdi), %rdx
+ movq 184(%r8), %rax
+ movq %rdx, 176(%r8)
+ sbbq 184(%rdi), %rax
+ movq 192(%r8), %rdx
+ movq %rax, 184(%r8)
+ sbbq 192(%rdi), %rdx
+ movq 200(%r8), %rax
+ movq %rdx, 192(%r8)
+ sbbq 200(%rdi), %rax
+ movq 208(%r8), %rdx
+ movq %rax, 200(%r8)
+ sbbq 208(%rdi), %rdx
+ movq 216(%r8), %rax
+ movq %rdx, 208(%r8)
+ sbbq 216(%rdi), %rax
+ movq 224(%r8), %rdx
+ movq %rax, 216(%r8)
+ sbbq 224(%rdi), %rdx
+ movq 232(%r8), %rax
+ movq %rdx, 224(%r8)
+ sbbq 232(%rdi), %rax
+ movq 240(%r8), %rdx
+ movq %rax, 232(%r8)
+ sbbq 240(%rdi), %rdx
+ movq 248(%r8), %rax
+ movq %rdx, 240(%r8)
+ sbbq 248(%rdi), %rax
+ movq 256(%r8), %rdx
+ movq %rax, 248(%r8)
+ sbbq 256(%rdi), %rdx
+ movq 264(%r8), %rax
+ movq %rdx, 256(%r8)
+ sbbq 264(%rdi), %rax
+ movq 272(%r8), %rdx
+ movq %rax, 264(%r8)
+ sbbq 272(%rdi), %rdx
+ movq 280(%r8), %rax
+ movq %rdx, 272(%r8)
+ sbbq 280(%rdi), %rax
+ movq 288(%r8), %rdx
+ movq %rax, 280(%r8)
+ sbbq 288(%rdi), %rdx
+ movq 296(%r8), %rax
+ movq %rdx, 288(%r8)
+ sbbq 296(%rdi), %rax
+ movq 304(%r8), %rdx
+ movq %rax, 296(%r8)
+ sbbq 304(%rdi), %rdx
+ movq 312(%r8), %rax
+ movq %rdx, 304(%r8)
+ sbbq 312(%rdi), %rax
+ movq 320(%r8), %rdx
+ movq %rax, 312(%r8)
+ sbbq 320(%rdi), %rdx
+ movq 328(%r8), %rax
+ movq %rdx, 320(%r8)
+ sbbq 328(%rdi), %rax
+ movq 336(%r8), %rdx
+ movq %rax, 328(%r8)
+ sbbq 336(%rdi), %rdx
+ movq 344(%r8), %rax
+ movq %rdx, 336(%r8)
+ sbbq 344(%rdi), %rax
+ movq 352(%r8), %rdx
+ movq %rax, 344(%r8)
+ sbbq 352(%rdi), %rdx
+ movq 360(%r8), %rax
+ movq %rdx, 352(%r8)
+ sbbq 360(%rdi), %rax
+ movq 368(%r8), %rdx
+ movq %rax, 360(%r8)
+ sbbq 368(%rdi), %rdx
+ movq 376(%r8), %rax
+ movq %rdx, 368(%r8)
+ sbbq 376(%rdi), %rax
+ movq 384(%r8), %rdx
+ movq %rax, 376(%r8)
+ sbbq 384(%rdi), %rdx
+ movq 392(%r8), %rax
+ movq %rdx, 384(%r8)
+ sbbq 392(%rdi), %rax
+ movq 400(%r8), %rdx
+ movq %rax, 392(%r8)
+ sbbq 400(%rdi), %rdx
+ movq 408(%r8), %rax
+ movq %rdx, 400(%r8)
+ sbbq 408(%rdi), %rax
+ movq 416(%r8), %rdx
+ movq %rax, 408(%r8)
+ sbbq 416(%rdi), %rdx
+ movq 424(%r8), %rax
+ movq %rdx, 416(%r8)
+ sbbq 424(%rdi), %rax
+ movq 432(%r8), %rdx
+ movq %rax, 424(%r8)
+ sbbq 432(%rdi), %rdx
+ movq 440(%r8), %rax
+ movq %rdx, 432(%r8)
+ sbbq 440(%rdi), %rax
+ movq 448(%r8), %rdx
+ movq %rax, 440(%r8)
+ sbbq 448(%rdi), %rdx
+ movq 456(%r8), %rax
+ movq %rdx, 448(%r8)
+ sbbq 456(%rdi), %rax
+ movq 464(%r8), %rdx
+ movq %rax, 456(%r8)
+ sbbq 464(%rdi), %rdx
+ movq 472(%r8), %rax
+ movq %rdx, 464(%r8)
+ sbbq 472(%rdi), %rax
+ movq 480(%r8), %rdx
+ movq %rax, 472(%r8)
+ sbbq 480(%rdi), %rdx
+ movq 488(%r8), %rax
+ movq %rdx, 480(%r8)
+ sbbq 488(%rdi), %rax
+ movq 496(%r8), %rdx
+ movq %rax, 488(%r8)
+ sbbq 496(%rdi), %rdx
+ movq 504(%r8), %rax
+ movq %rdx, 496(%r8)
+ sbbq 504(%rdi), %rax
+ movq %rax, 504(%r8)
+ sbbq $0, %rcx
+ # Add in place
+ movq 256(%rdi), %rdx
+ addq (%r8), %rdx
+ movq 264(%rdi), %rax
+ movq %rdx, 256(%rdi)
+ adcq 8(%r8), %rax
+ movq 272(%rdi), %rdx
+ movq %rax, 264(%rdi)
+ adcq 16(%r8), %rdx
+ movq 280(%rdi), %rax
+ movq %rdx, 272(%rdi)
+ adcq 24(%r8), %rax
+ movq 288(%rdi), %rdx
+ movq %rax, 280(%rdi)
+ adcq 32(%r8), %rdx
+ movq 296(%rdi), %rax
+ movq %rdx, 288(%rdi)
+ adcq 40(%r8), %rax
+ movq 304(%rdi), %rdx
+ movq %rax, 296(%rdi)
+ adcq 48(%r8), %rdx
+ movq 312(%rdi), %rax
+ movq %rdx, 304(%rdi)
+ adcq 56(%r8), %rax
+ movq 320(%rdi), %rdx
+ movq %rax, 312(%rdi)
+ adcq 64(%r8), %rdx
+ movq 328(%rdi), %rax
+ movq %rdx, 320(%rdi)
+ adcq 72(%r8), %rax
+ movq 336(%rdi), %rdx
+ movq %rax, 328(%rdi)
+ adcq 80(%r8), %rdx
+ movq 344(%rdi), %rax
+ movq %rdx, 336(%rdi)
+ adcq 88(%r8), %rax
+ movq 352(%rdi), %rdx
+ movq %rax, 344(%rdi)
+ adcq 96(%r8), %rdx
+ movq 360(%rdi), %rax
+ movq %rdx, 352(%rdi)
+ adcq 104(%r8), %rax
+ movq 368(%rdi), %rdx
+ movq %rax, 360(%rdi)
+ adcq 112(%r8), %rdx
+ movq 376(%rdi), %rax
+ movq %rdx, 368(%rdi)
+ adcq 120(%r8), %rax
+ movq 384(%rdi), %rdx
+ movq %rax, 376(%rdi)
+ adcq 128(%r8), %rdx
+ movq 392(%rdi), %rax
+ movq %rdx, 384(%rdi)
+ adcq 136(%r8), %rax
+ movq 400(%rdi), %rdx
+ movq %rax, 392(%rdi)
+ adcq 144(%r8), %rdx
+ movq 408(%rdi), %rax
+ movq %rdx, 400(%rdi)
+ adcq 152(%r8), %rax
+ movq 416(%rdi), %rdx
+ movq %rax, 408(%rdi)
+ adcq 160(%r8), %rdx
+ movq 424(%rdi), %rax
+ movq %rdx, 416(%rdi)
+ adcq 168(%r8), %rax
+ movq 432(%rdi), %rdx
+ movq %rax, 424(%rdi)
+ adcq 176(%r8), %rdx
+ movq 440(%rdi), %rax
+ movq %rdx, 432(%rdi)
+ adcq 184(%r8), %rax
+ movq 448(%rdi), %rdx
+ movq %rax, 440(%rdi)
+ adcq 192(%r8), %rdx
+ movq 456(%rdi), %rax
+ movq %rdx, 448(%rdi)
+ adcq 200(%r8), %rax
+ movq 464(%rdi), %rdx
+ movq %rax, 456(%rdi)
+ adcq 208(%r8), %rdx
+ movq 472(%rdi), %rax
+ movq %rdx, 464(%rdi)
+ adcq 216(%r8), %rax
+ movq 480(%rdi), %rdx
+ movq %rax, 472(%rdi)
+ adcq 224(%r8), %rdx
+ movq 488(%rdi), %rax
+ movq %rdx, 480(%rdi)
+ adcq 232(%r8), %rax
+ movq 496(%rdi), %rdx
+ movq %rax, 488(%rdi)
+ adcq 240(%r8), %rdx
+ movq 504(%rdi), %rax
+ movq %rdx, 496(%rdi)
+ adcq 248(%r8), %rax
+ movq 512(%rdi), %rdx
+ movq %rax, 504(%rdi)
+ adcq 256(%r8), %rdx
+ movq 520(%rdi), %rax
+ movq %rdx, 512(%rdi)
+ adcq 264(%r8), %rax
+ movq 528(%rdi), %rdx
+ movq %rax, 520(%rdi)
+ adcq 272(%r8), %rdx
+ movq 536(%rdi), %rax
+ movq %rdx, 528(%rdi)
+ adcq 280(%r8), %rax
+ movq 544(%rdi), %rdx
+ movq %rax, 536(%rdi)
+ adcq 288(%r8), %rdx
+ movq 552(%rdi), %rax
+ movq %rdx, 544(%rdi)
+ adcq 296(%r8), %rax
+ movq 560(%rdi), %rdx
+ movq %rax, 552(%rdi)
+ adcq 304(%r8), %rdx
+ movq 568(%rdi), %rax
+ movq %rdx, 560(%rdi)
+ adcq 312(%r8), %rax
+ movq 576(%rdi), %rdx
+ movq %rax, 568(%rdi)
+ adcq 320(%r8), %rdx
+ movq 584(%rdi), %rax
+ movq %rdx, 576(%rdi)
+ adcq 328(%r8), %rax
+ movq 592(%rdi), %rdx
+ movq %rax, 584(%rdi)
+ adcq 336(%r8), %rdx
+ movq 600(%rdi), %rax
+ movq %rdx, 592(%rdi)
+ adcq 344(%r8), %rax
+ movq 608(%rdi), %rdx
+ movq %rax, 600(%rdi)
+ adcq 352(%r8), %rdx
+ movq 616(%rdi), %rax
+ movq %rdx, 608(%rdi)
+ adcq 360(%r8), %rax
+ movq 624(%rdi), %rdx
+ movq %rax, 616(%rdi)
+ adcq 368(%r8), %rdx
+ movq 632(%rdi), %rax
+ movq %rdx, 624(%rdi)
+ adcq 376(%r8), %rax
+ movq 640(%rdi), %rdx
+ movq %rax, 632(%rdi)
+ adcq 384(%r8), %rdx
+ movq 648(%rdi), %rax
+ movq %rdx, 640(%rdi)
+ adcq 392(%r8), %rax
+ movq 656(%rdi), %rdx
+ movq %rax, 648(%rdi)
+ adcq 400(%r8), %rdx
+ movq 664(%rdi), %rax
+ movq %rdx, 656(%rdi)
+ adcq 408(%r8), %rax
+ movq 672(%rdi), %rdx
+ movq %rax, 664(%rdi)
+ adcq 416(%r8), %rdx
+ movq 680(%rdi), %rax
+ movq %rdx, 672(%rdi)
+ adcq 424(%r8), %rax
+ movq 688(%rdi), %rdx
+ movq %rax, 680(%rdi)
+ adcq 432(%r8), %rdx
+ movq 696(%rdi), %rax
+ movq %rdx, 688(%rdi)
+ adcq 440(%r8), %rax
+ movq 704(%rdi), %rdx
+ movq %rax, 696(%rdi)
+ adcq 448(%r8), %rdx
+ movq 712(%rdi), %rax
+ movq %rdx, 704(%rdi)
+ adcq 456(%r8), %rax
+ movq 720(%rdi), %rdx
+ movq %rax, 712(%rdi)
+ adcq 464(%r8), %rdx
+ movq 728(%rdi), %rax
+ movq %rdx, 720(%rdi)
+ adcq 472(%r8), %rax
+ movq 736(%rdi), %rdx
+ movq %rax, 728(%rdi)
+ adcq 480(%r8), %rdx
+ movq 744(%rdi), %rax
+ movq %rdx, 736(%rdi)
+ adcq 488(%r8), %rax
+ movq 752(%rdi), %rdx
+ movq %rax, 744(%rdi)
+ adcq 496(%r8), %rdx
+ movq 760(%rdi), %rax
+ movq %rdx, 752(%rdi)
+ adcq 504(%r8), %rax
+ movq %rax, 760(%rdi)
+ adcq $0, %rcx
+ movq %rcx, 768(%rdi)
+ # Add in place
+ movq 512(%rdi), %rdx
+ xorq %rcx, %rcx
+ addq (%rsi), %rdx
+ movq 520(%rdi), %rax
+ movq %rdx, 512(%rdi)
+ adcq 8(%rsi), %rax
+ movq 528(%rdi), %rdx
+ movq %rax, 520(%rdi)
+ adcq 16(%rsi), %rdx
+ movq 536(%rdi), %rax
+ movq %rdx, 528(%rdi)
+ adcq 24(%rsi), %rax
+ movq 544(%rdi), %rdx
+ movq %rax, 536(%rdi)
+ adcq 32(%rsi), %rdx
+ movq 552(%rdi), %rax
+ movq %rdx, 544(%rdi)
+ adcq 40(%rsi), %rax
+ movq 560(%rdi), %rdx
+ movq %rax, 552(%rdi)
+ adcq 48(%rsi), %rdx
+ movq 568(%rdi), %rax
+ movq %rdx, 560(%rdi)
+ adcq 56(%rsi), %rax
+ movq 576(%rdi), %rdx
+ movq %rax, 568(%rdi)
+ adcq 64(%rsi), %rdx
+ movq 584(%rdi), %rax
+ movq %rdx, 576(%rdi)
+ adcq 72(%rsi), %rax
+ movq 592(%rdi), %rdx
+ movq %rax, 584(%rdi)
+ adcq 80(%rsi), %rdx
+ movq 600(%rdi), %rax
+ movq %rdx, 592(%rdi)
+ adcq 88(%rsi), %rax
+ movq 608(%rdi), %rdx
+ movq %rax, 600(%rdi)
+ adcq 96(%rsi), %rdx
+ movq 616(%rdi), %rax
+ movq %rdx, 608(%rdi)
+ adcq 104(%rsi), %rax
+ movq 624(%rdi), %rdx
+ movq %rax, 616(%rdi)
+ adcq 112(%rsi), %rdx
+ movq 632(%rdi), %rax
+ movq %rdx, 624(%rdi)
+ adcq 120(%rsi), %rax
+ movq 640(%rdi), %rdx
+ movq %rax, 632(%rdi)
+ adcq 128(%rsi), %rdx
+ movq 648(%rdi), %rax
+ movq %rdx, 640(%rdi)
+ adcq 136(%rsi), %rax
+ movq 656(%rdi), %rdx
+ movq %rax, 648(%rdi)
+ adcq 144(%rsi), %rdx
+ movq 664(%rdi), %rax
+ movq %rdx, 656(%rdi)
+ adcq 152(%rsi), %rax
+ movq 672(%rdi), %rdx
+ movq %rax, 664(%rdi)
+ adcq 160(%rsi), %rdx
+ movq 680(%rdi), %rax
+ movq %rdx, 672(%rdi)
+ adcq 168(%rsi), %rax
+ movq 688(%rdi), %rdx
+ movq %rax, 680(%rdi)
+ adcq 176(%rsi), %rdx
+ movq 696(%rdi), %rax
+ movq %rdx, 688(%rdi)
+ adcq 184(%rsi), %rax
+ movq 704(%rdi), %rdx
+ movq %rax, 696(%rdi)
+ adcq 192(%rsi), %rdx
+ movq 712(%rdi), %rax
+ movq %rdx, 704(%rdi)
+ adcq 200(%rsi), %rax
+ movq 720(%rdi), %rdx
+ movq %rax, 712(%rdi)
+ adcq 208(%rsi), %rdx
+ movq 728(%rdi), %rax
+ movq %rdx, 720(%rdi)
+ adcq 216(%rsi), %rax
+ movq 736(%rdi), %rdx
+ movq %rax, 728(%rdi)
+ adcq 224(%rsi), %rdx
+ movq 744(%rdi), %rax
+ movq %rdx, 736(%rdi)
+ adcq 232(%rsi), %rax
+ movq 752(%rdi), %rdx
+ movq %rax, 744(%rdi)
+ adcq 240(%rsi), %rdx
+ movq 760(%rdi), %rax
+ movq %rdx, 752(%rdi)
+ adcq 248(%rsi), %rax
+ movq 768(%rdi), %rdx
+ movq %rax, 760(%rdi)
+ adcq 256(%rsi), %rdx
+ movq %rdx, 768(%rdi)
+ adcq $0, %rcx
+ # Add to zero
+ movq 264(%rsi), %rdx
+ adcq $0, %rdx
+ movq 272(%rsi), %rax
+ movq %rdx, 776(%rdi)
+ adcq $0, %rax
+ movq 280(%rsi), %rdx
+ movq %rax, 784(%rdi)
+ adcq $0, %rdx
+ movq 288(%rsi), %rax
+ movq %rdx, 792(%rdi)
+ adcq $0, %rax
+ movq 296(%rsi), %rdx
+ movq %rax, 800(%rdi)
+ adcq $0, %rdx
+ movq 304(%rsi), %rax
+ movq %rdx, 808(%rdi)
+ adcq $0, %rax
+ movq 312(%rsi), %rdx
+ movq %rax, 816(%rdi)
+ adcq $0, %rdx
+ movq 320(%rsi), %rax
+ movq %rdx, 824(%rdi)
+ adcq $0, %rax
+ movq 328(%rsi), %rdx
+ movq %rax, 832(%rdi)
+ adcq $0, %rdx
+ movq 336(%rsi), %rax
+ movq %rdx, 840(%rdi)
+ adcq $0, %rax
+ movq 344(%rsi), %rdx
+ movq %rax, 848(%rdi)
+ adcq $0, %rdx
+ movq 352(%rsi), %rax
+ movq %rdx, 856(%rdi)
+ adcq $0, %rax
+ movq 360(%rsi), %rdx
+ movq %rax, 864(%rdi)
+ adcq $0, %rdx
+ movq 368(%rsi), %rax
+ movq %rdx, 872(%rdi)
+ adcq $0, %rax
+ movq 376(%rsi), %rdx
+ movq %rax, 880(%rdi)
+ adcq $0, %rdx
+ movq 384(%rsi), %rax
+ movq %rdx, 888(%rdi)
+ adcq $0, %rax
+ movq 392(%rsi), %rdx
+ movq %rax, 896(%rdi)
+ adcq $0, %rdx
+ movq 400(%rsi), %rax
+ movq %rdx, 904(%rdi)
+ adcq $0, %rax
+ movq 408(%rsi), %rdx
+ movq %rax, 912(%rdi)
+ adcq $0, %rdx
+ movq 416(%rsi), %rax
+ movq %rdx, 920(%rdi)
+ adcq $0, %rax
+ movq 424(%rsi), %rdx
+ movq %rax, 928(%rdi)
+ adcq $0, %rdx
+ movq 432(%rsi), %rax
+ movq %rdx, 936(%rdi)
+ adcq $0, %rax
+ movq 440(%rsi), %rdx
+ movq %rax, 944(%rdi)
+ adcq $0, %rdx
+ movq 448(%rsi), %rax
+ movq %rdx, 952(%rdi)
+ adcq $0, %rax
+ movq 456(%rsi), %rdx
+ movq %rax, 960(%rdi)
+ adcq $0, %rdx
+ movq 464(%rsi), %rax
+ movq %rdx, 968(%rdi)
+ adcq $0, %rax
+ movq 472(%rsi), %rdx
+ movq %rax, 976(%rdi)
+ adcq $0, %rdx
+ movq 480(%rsi), %rax
+ movq %rdx, 984(%rdi)
+ adcq $0, %rax
+ movq 488(%rsi), %rdx
+ movq %rax, 992(%rdi)
+ adcq $0, %rdx
+ movq 496(%rsi), %rax
+ movq %rdx, 1000(%rdi)
+ adcq $0, %rax
+ movq 504(%rsi), %rdx
+ movq %rax, 1008(%rdi)
+ adcq $0, %rdx
+ movq %rdx, 1016(%rdi)
+ addq $1304, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_4096_sqr_64,.-sp_4096_sqr_64
+#endif /* __APPLE__ */
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_4096_mul_avx2_64
+.type sp_4096_mul_avx2_64,@function
+.align 16
+sp_4096_mul_avx2_64:
+#else
+.globl _sp_4096_mul_avx2_64
+.p2align 4
+_sp_4096_mul_avx2_64:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ subq $1576, %rsp
+ movq %rdi, 1536(%rsp)
+ movq %rsi, 1544(%rsp)
+ movq %rdx, 1552(%rsp)
+ leaq 1024(%rsp), %r10
+ leaq 256(%rsi), %r12
+ # Add
+ movq (%rsi), %rax
+ xorq %r13, %r13
+ addq (%r12), %rax
+ movq 8(%rsi), %rcx
+ movq %rax, (%r10)
+ adcq 8(%r12), %rcx
+ movq 16(%rsi), %r8
+ movq %rcx, 8(%r10)
+ adcq 16(%r12), %r8
+ movq 24(%rsi), %rax
+ movq %r8, 16(%r10)
+ adcq 24(%r12), %rax
+ movq 32(%rsi), %rcx
+ movq %rax, 24(%r10)
+ adcq 32(%r12), %rcx
+ movq 40(%rsi), %r8
+ movq %rcx, 32(%r10)
+ adcq 40(%r12), %r8
+ movq 48(%rsi), %rax
+ movq %r8, 40(%r10)
+ adcq 48(%r12), %rax
+ movq 56(%rsi), %rcx
+ movq %rax, 48(%r10)
+ adcq 56(%r12), %rcx
+ movq 64(%rsi), %r8
+ movq %rcx, 56(%r10)
+ adcq 64(%r12), %r8
+ movq 72(%rsi), %rax
+ movq %r8, 64(%r10)
+ adcq 72(%r12), %rax
+ movq 80(%rsi), %rcx
+ movq %rax, 72(%r10)
+ adcq 80(%r12), %rcx
+ movq 88(%rsi), %r8
+ movq %rcx, 80(%r10)
+ adcq 88(%r12), %r8
+ movq 96(%rsi), %rax
+ movq %r8, 88(%r10)
+ adcq 96(%r12), %rax
+ movq 104(%rsi), %rcx
+ movq %rax, 96(%r10)
+ adcq 104(%r12), %rcx
+ movq 112(%rsi), %r8
+ movq %rcx, 104(%r10)
+ adcq 112(%r12), %r8
+ movq 120(%rsi), %rax
+ movq %r8, 112(%r10)
+ adcq 120(%r12), %rax
+ movq 128(%rsi), %rcx
+ movq %rax, 120(%r10)
+ adcq 128(%r12), %rcx
+ movq 136(%rsi), %r8
+ movq %rcx, 128(%r10)
+ adcq 136(%r12), %r8
+ movq 144(%rsi), %rax
+ movq %r8, 136(%r10)
+ adcq 144(%r12), %rax
+ movq 152(%rsi), %rcx
+ movq %rax, 144(%r10)
+ adcq 152(%r12), %rcx
+ movq 160(%rsi), %r8
+ movq %rcx, 152(%r10)
+ adcq 160(%r12), %r8
+ movq 168(%rsi), %rax
+ movq %r8, 160(%r10)
+ adcq 168(%r12), %rax
+ movq 176(%rsi), %rcx
+ movq %rax, 168(%r10)
+ adcq 176(%r12), %rcx
+ movq 184(%rsi), %r8
+ movq %rcx, 176(%r10)
+ adcq 184(%r12), %r8
+ movq 192(%rsi), %rax
+ movq %r8, 184(%r10)
+ adcq 192(%r12), %rax
+ movq 200(%rsi), %rcx
+ movq %rax, 192(%r10)
+ adcq 200(%r12), %rcx
+ movq 208(%rsi), %r8
+ movq %rcx, 200(%r10)
+ adcq 208(%r12), %r8
+ movq 216(%rsi), %rax
+ movq %r8, 208(%r10)
+ adcq 216(%r12), %rax
+ movq 224(%rsi), %rcx
+ movq %rax, 216(%r10)
+ adcq 224(%r12), %rcx
+ movq 232(%rsi), %r8
+ movq %rcx, 224(%r10)
+ adcq 232(%r12), %r8
+ movq 240(%rsi), %rax
+ movq %r8, 232(%r10)
+ adcq 240(%r12), %rax
+ movq 248(%rsi), %rcx
+ movq %rax, 240(%r10)
+ adcq 248(%r12), %rcx
+ movq %rcx, 248(%r10)
+ adcq $0, %r13
+ movq %r13, 1560(%rsp)
+ leaq 1280(%rsp), %r11
+ leaq 256(%rdx), %r12
+ # Add
+ movq (%rdx), %rax
+ xorq %r14, %r14
+ addq (%r12), %rax
+ movq 8(%rdx), %rcx
+ movq %rax, (%r11)
+ adcq 8(%r12), %rcx
+ movq 16(%rdx), %r8
+ movq %rcx, 8(%r11)
+ adcq 16(%r12), %r8
+ movq 24(%rdx), %rax
+ movq %r8, 16(%r11)
+ adcq 24(%r12), %rax
+ movq 32(%rdx), %rcx
+ movq %rax, 24(%r11)
+ adcq 32(%r12), %rcx
+ movq 40(%rdx), %r8
+ movq %rcx, 32(%r11)
+ adcq 40(%r12), %r8
+ movq 48(%rdx), %rax
+ movq %r8, 40(%r11)
+ adcq 48(%r12), %rax
+ movq 56(%rdx), %rcx
+ movq %rax, 48(%r11)
+ adcq 56(%r12), %rcx
+ movq 64(%rdx), %r8
+ movq %rcx, 56(%r11)
+ adcq 64(%r12), %r8
+ movq 72(%rdx), %rax
+ movq %r8, 64(%r11)
+ adcq 72(%r12), %rax
+ movq 80(%rdx), %rcx
+ movq %rax, 72(%r11)
+ adcq 80(%r12), %rcx
+ movq 88(%rdx), %r8
+ movq %rcx, 80(%r11)
+ adcq 88(%r12), %r8
+ movq 96(%rdx), %rax
+ movq %r8, 88(%r11)
+ adcq 96(%r12), %rax
+ movq 104(%rdx), %rcx
+ movq %rax, 96(%r11)
+ adcq 104(%r12), %rcx
+ movq 112(%rdx), %r8
+ movq %rcx, 104(%r11)
+ adcq 112(%r12), %r8
+ movq 120(%rdx), %rax
+ movq %r8, 112(%r11)
+ adcq 120(%r12), %rax
+ movq 128(%rdx), %rcx
+ movq %rax, 120(%r11)
+ adcq 128(%r12), %rcx
+ movq 136(%rdx), %r8
+ movq %rcx, 128(%r11)
+ adcq 136(%r12), %r8
+ movq 144(%rdx), %rax
+ movq %r8, 136(%r11)
+ adcq 144(%r12), %rax
+ movq 152(%rdx), %rcx
+ movq %rax, 144(%r11)
+ adcq 152(%r12), %rcx
+ movq 160(%rdx), %r8
+ movq %rcx, 152(%r11)
+ adcq 160(%r12), %r8
+ movq 168(%rdx), %rax
+ movq %r8, 160(%r11)
+ adcq 168(%r12), %rax
+ movq 176(%rdx), %rcx
+ movq %rax, 168(%r11)
+ adcq 176(%r12), %rcx
+ movq 184(%rdx), %r8
+ movq %rcx, 176(%r11)
+ adcq 184(%r12), %r8
+ movq 192(%rdx), %rax
+ movq %r8, 184(%r11)
+ adcq 192(%r12), %rax
+ movq 200(%rdx), %rcx
+ movq %rax, 192(%r11)
+ adcq 200(%r12), %rcx
+ movq 208(%rdx), %r8
+ movq %rcx, 200(%r11)
+ adcq 208(%r12), %r8
+ movq 216(%rdx), %rax
+ movq %r8, 208(%r11)
+ adcq 216(%r12), %rax
+ movq 224(%rdx), %rcx
+ movq %rax, 216(%r11)
+ adcq 224(%r12), %rcx
+ movq 232(%rdx), %r8
+ movq %rcx, 224(%r11)
+ adcq 232(%r12), %r8
+ movq 240(%rdx), %rax
+ movq %r8, 232(%r11)
+ adcq 240(%r12), %rax
+ movq 248(%rdx), %rcx
+ movq %rax, 240(%r11)
+ adcq 248(%r12), %rcx
+ movq %rcx, 248(%r11)
+ adcq $0, %r14
+ movq %r14, 1568(%rsp)
+ movq %r11, %rdx
+ movq %r10, %rsi
+ movq %rsp, %rdi
+#ifndef __APPLE__
+ callq sp_2048_mul_avx2_32@plt
+#else
+ callq _sp_2048_mul_avx2_32
+#endif /* __APPLE__ */
+ movq 1552(%rsp), %rdx
+ movq 1544(%rsp), %rsi
+ leaq 512(%rsp), %rdi
+ addq $256, %rdx
+ addq $256, %rsi
+#ifndef __APPLE__
+ callq sp_2048_mul_avx2_32@plt
+#else
+ callq _sp_2048_mul_avx2_32
+#endif /* __APPLE__ */
+ movq 1552(%rsp), %rdx
+ movq 1544(%rsp), %rsi
+ movq 1536(%rsp), %rdi
+#ifndef __APPLE__
+ callq sp_2048_mul_avx2_32@plt
+#else
+ callq _sp_2048_mul_avx2_32
+#endif /* __APPLE__ */
+ movq 1560(%rsp), %r13
+ movq 1568(%rsp), %r14
+ movq 1536(%rsp), %r15
+ movq %r13, %r9
+ leaq 1024(%rsp), %r10
+ leaq 1280(%rsp), %r11
+ andq %r14, %r9
+ negq %r13
+ negq %r14
+ addq $512, %r15
+ movq (%r10), %rax
+ movq (%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ addq %rcx, %rax
+ movq 8(%r10), %rcx
+ movq 8(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, (%r15)
+ adcq %r8, %rcx
+ movq 16(%r10), %r8
+ movq 16(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 8(%r15)
+ adcq %rax, %r8
+ movq 24(%r10), %rax
+ movq 24(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 16(%r15)
+ adcq %rcx, %rax
+ movq 32(%r10), %rcx
+ movq 32(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 24(%r15)
+ adcq %r8, %rcx
+ movq 40(%r10), %r8
+ movq 40(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 32(%r15)
+ adcq %rax, %r8
+ movq 48(%r10), %rax
+ movq 48(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 40(%r15)
+ adcq %rcx, %rax
+ movq 56(%r10), %rcx
+ movq 56(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 48(%r15)
+ adcq %r8, %rcx
+ movq 64(%r10), %r8
+ movq 64(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 56(%r15)
+ adcq %rax, %r8
+ movq 72(%r10), %rax
+ movq 72(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 64(%r15)
+ adcq %rcx, %rax
+ movq 80(%r10), %rcx
+ movq 80(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 72(%r15)
+ adcq %r8, %rcx
+ movq 88(%r10), %r8
+ movq 88(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 80(%r15)
+ adcq %rax, %r8
+ movq 96(%r10), %rax
+ movq 96(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 88(%r15)
+ adcq %rcx, %rax
+ movq 104(%r10), %rcx
+ movq 104(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 96(%r15)
+ adcq %r8, %rcx
+ movq 112(%r10), %r8
+ movq 112(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 104(%r15)
+ adcq %rax, %r8
+ movq 120(%r10), %rax
+ movq 120(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 112(%r15)
+ adcq %rcx, %rax
+ movq 128(%r10), %rcx
+ movq 128(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 120(%r15)
+ adcq %r8, %rcx
+ movq 136(%r10), %r8
+ movq 136(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 128(%r15)
+ adcq %rax, %r8
+ movq 144(%r10), %rax
+ movq 144(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 136(%r15)
+ adcq %rcx, %rax
+ movq 152(%r10), %rcx
+ movq 152(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 144(%r15)
+ adcq %r8, %rcx
+ movq 160(%r10), %r8
+ movq 160(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 152(%r15)
+ adcq %rax, %r8
+ movq 168(%r10), %rax
+ movq 168(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 160(%r15)
+ adcq %rcx, %rax
+ movq 176(%r10), %rcx
+ movq 176(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 168(%r15)
+ adcq %r8, %rcx
+ movq 184(%r10), %r8
+ movq 184(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 176(%r15)
+ adcq %rax, %r8
+ movq 192(%r10), %rax
+ movq 192(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 184(%r15)
+ adcq %rcx, %rax
+ movq 200(%r10), %rcx
+ movq 200(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 192(%r15)
+ adcq %r8, %rcx
+ movq 208(%r10), %r8
+ movq 208(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 200(%r15)
+ adcq %rax, %r8
+ movq 216(%r10), %rax
+ movq 216(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 208(%r15)
+ adcq %rcx, %rax
+ movq 224(%r10), %rcx
+ movq 224(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 216(%r15)
+ adcq %r8, %rcx
+ movq 232(%r10), %r8
+ movq 232(%r11), %rax
+ pextq %r14, %r8, %r8
+ pextq %r13, %rax, %rax
+ movq %rcx, 224(%r15)
+ adcq %rax, %r8
+ movq 240(%r10), %rax
+ movq 240(%r11), %rcx
+ pextq %r14, %rax, %rax
+ pextq %r13, %rcx, %rcx
+ movq %r8, 232(%r15)
+ adcq %rcx, %rax
+ movq 248(%r10), %rcx
+ movq 248(%r11), %r8
+ pextq %r14, %rcx, %rcx
+ pextq %r13, %r8, %r8
+ movq %rax, 240(%r15)
+ adcq %r8, %rcx
+ movq %rcx, 248(%r15)
+ adcq $0, %r9
+ leaq 512(%rsp), %r11
+ movq %rsp, %r10
+ movq (%r10), %rax
+ subq (%r11), %rax
+ movq 8(%r10), %rcx
+ movq %rax, (%r10)
+ sbbq 8(%r11), %rcx
+ movq 16(%r10), %r8
+ movq %rcx, 8(%r10)
+ sbbq 16(%r11), %r8
+ movq 24(%r10), %rax
+ movq %r8, 16(%r10)
+ sbbq 24(%r11), %rax
+ movq 32(%r10), %rcx
+ movq %rax, 24(%r10)
+ sbbq 32(%r11), %rcx
+ movq 40(%r10), %r8
+ movq %rcx, 32(%r10)
+ sbbq 40(%r11), %r8
+ movq 48(%r10), %rax
+ movq %r8, 40(%r10)
+ sbbq 48(%r11), %rax
+ movq 56(%r10), %rcx
+ movq %rax, 48(%r10)
+ sbbq 56(%r11), %rcx
+ movq 64(%r10), %r8
+ movq %rcx, 56(%r10)
+ sbbq 64(%r11), %r8
+ movq 72(%r10), %rax
+ movq %r8, 64(%r10)
+ sbbq 72(%r11), %rax
+ movq 80(%r10), %rcx
+ movq %rax, 72(%r10)
+ sbbq 80(%r11), %rcx
+ movq 88(%r10), %r8
+ movq %rcx, 80(%r10)
+ sbbq 88(%r11), %r8
+ movq 96(%r10), %rax
+ movq %r8, 88(%r10)
+ sbbq 96(%r11), %rax
+ movq 104(%r10), %rcx
+ movq %rax, 96(%r10)
+ sbbq 104(%r11), %rcx
+ movq 112(%r10), %r8
+ movq %rcx, 104(%r10)
+ sbbq 112(%r11), %r8
+ movq 120(%r10), %rax
+ movq %r8, 112(%r10)
+ sbbq 120(%r11), %rax
+ movq 128(%r10), %rcx
+ movq %rax, 120(%r10)
+ sbbq 128(%r11), %rcx
+ movq 136(%r10), %r8
+ movq %rcx, 128(%r10)
+ sbbq 136(%r11), %r8
+ movq 144(%r10), %rax
+ movq %r8, 136(%r10)
+ sbbq 144(%r11), %rax
+ movq 152(%r10), %rcx
+ movq %rax, 144(%r10)
+ sbbq 152(%r11), %rcx
+ movq 160(%r10), %r8
+ movq %rcx, 152(%r10)
+ sbbq 160(%r11), %r8
+ movq 168(%r10), %rax
+ movq %r8, 160(%r10)
+ sbbq 168(%r11), %rax
+ movq 176(%r10), %rcx
+ movq %rax, 168(%r10)
+ sbbq 176(%r11), %rcx
+ movq 184(%r10), %r8
+ movq %rcx, 176(%r10)
+ sbbq 184(%r11), %r8
+ movq 192(%r10), %rax
+ movq %r8, 184(%r10)
+ sbbq 192(%r11), %rax
+ movq 200(%r10), %rcx
+ movq %rax, 192(%r10)
+ sbbq 200(%r11), %rcx
+ movq 208(%r10), %r8
+ movq %rcx, 200(%r10)
+ sbbq 208(%r11), %r8
+ movq 216(%r10), %rax
+ movq %r8, 208(%r10)
+ sbbq 216(%r11), %rax
+ movq 224(%r10), %rcx
+ movq %rax, 216(%r10)
+ sbbq 224(%r11), %rcx
+ movq 232(%r10), %r8
+ movq %rcx, 224(%r10)
+ sbbq 232(%r11), %r8
+ movq 240(%r10), %rax
+ movq %r8, 232(%r10)
+ sbbq 240(%r11), %rax
+ movq 248(%r10), %rcx
+ movq %rax, 240(%r10)
+ sbbq 248(%r11), %rcx
+ movq 256(%r10), %r8
+ movq %rcx, 248(%r10)
+ sbbq 256(%r11), %r8
+ movq 264(%r10), %rax
+ movq %r8, 256(%r10)
+ sbbq 264(%r11), %rax
+ movq 272(%r10), %rcx
+ movq %rax, 264(%r10)
+ sbbq 272(%r11), %rcx
+ movq 280(%r10), %r8
+ movq %rcx, 272(%r10)
+ sbbq 280(%r11), %r8
+ movq 288(%r10), %rax
+ movq %r8, 280(%r10)
+ sbbq 288(%r11), %rax
+ movq 296(%r10), %rcx
+ movq %rax, 288(%r10)
+ sbbq 296(%r11), %rcx
+ movq 304(%r10), %r8
+ movq %rcx, 296(%r10)
+ sbbq 304(%r11), %r8
+ movq 312(%r10), %rax
+ movq %r8, 304(%r10)
+ sbbq 312(%r11), %rax
+ movq 320(%r10), %rcx
+ movq %rax, 312(%r10)
+ sbbq 320(%r11), %rcx
+ movq 328(%r10), %r8
+ movq %rcx, 320(%r10)
+ sbbq 328(%r11), %r8
+ movq 336(%r10), %rax
+ movq %r8, 328(%r10)
+ sbbq 336(%r11), %rax
+ movq 344(%r10), %rcx
+ movq %rax, 336(%r10)
+ sbbq 344(%r11), %rcx
+ movq 352(%r10), %r8
+ movq %rcx, 344(%r10)
+ sbbq 352(%r11), %r8
+ movq 360(%r10), %rax
+ movq %r8, 352(%r10)
+ sbbq 360(%r11), %rax
+ movq 368(%r10), %rcx
+ movq %rax, 360(%r10)
+ sbbq 368(%r11), %rcx
+ movq 376(%r10), %r8
+ movq %rcx, 368(%r10)
+ sbbq 376(%r11), %r8
+ movq 384(%r10), %rax
+ movq %r8, 376(%r10)
+ sbbq 384(%r11), %rax
+ movq 392(%r10), %rcx
+ movq %rax, 384(%r10)
+ sbbq 392(%r11), %rcx
+ movq 400(%r10), %r8
+ movq %rcx, 392(%r10)
+ sbbq 400(%r11), %r8
+ movq 408(%r10), %rax
+ movq %r8, 400(%r10)
+ sbbq 408(%r11), %rax
+ movq 416(%r10), %rcx
+ movq %rax, 408(%r10)
+ sbbq 416(%r11), %rcx
+ movq 424(%r10), %r8
+ movq %rcx, 416(%r10)
+ sbbq 424(%r11), %r8
+ movq 432(%r10), %rax
+ movq %r8, 424(%r10)
+ sbbq 432(%r11), %rax
+ movq 440(%r10), %rcx
+ movq %rax, 432(%r10)
+ sbbq 440(%r11), %rcx
+ movq 448(%r10), %r8
+ movq %rcx, 440(%r10)
+ sbbq 448(%r11), %r8
+ movq 456(%r10), %rax
+ movq %r8, 448(%r10)
+ sbbq 456(%r11), %rax
+ movq 464(%r10), %rcx
+ movq %rax, 456(%r10)
+ sbbq 464(%r11), %rcx
+ movq 472(%r10), %r8
+ movq %rcx, 464(%r10)
+ sbbq 472(%r11), %r8
+ movq 480(%r10), %rax
+ movq %r8, 472(%r10)
+ sbbq 480(%r11), %rax
+ movq 488(%r10), %rcx
+ movq %rax, 480(%r10)
+ sbbq 488(%r11), %rcx
+ movq 496(%r10), %r8
+ movq %rcx, 488(%r10)
+ sbbq 496(%r11), %r8
+ movq 504(%r10), %rax
+ movq %r8, 496(%r10)
+ sbbq 504(%r11), %rax
+ movq %rax, 504(%r10)
+ sbbq $0, %r9
+ movq (%r10), %rax
+ subq (%rdi), %rax
+ movq 8(%r10), %rcx
+ movq %rax, (%r10)
+ sbbq 8(%rdi), %rcx
+ movq 16(%r10), %r8
+ movq %rcx, 8(%r10)
+ sbbq 16(%rdi), %r8
+ movq 24(%r10), %rax
+ movq %r8, 16(%r10)
+ sbbq 24(%rdi), %rax
+ movq 32(%r10), %rcx
+ movq %rax, 24(%r10)
+ sbbq 32(%rdi), %rcx
+ movq 40(%r10), %r8
+ movq %rcx, 32(%r10)
+ sbbq 40(%rdi), %r8
+ movq 48(%r10), %rax
+ movq %r8, 40(%r10)
+ sbbq 48(%rdi), %rax
+ movq 56(%r10), %rcx
+ movq %rax, 48(%r10)
+ sbbq 56(%rdi), %rcx
+ movq 64(%r10), %r8
+ movq %rcx, 56(%r10)
+ sbbq 64(%rdi), %r8
+ movq 72(%r10), %rax
+ movq %r8, 64(%r10)
+ sbbq 72(%rdi), %rax
+ movq 80(%r10), %rcx
+ movq %rax, 72(%r10)
+ sbbq 80(%rdi), %rcx
+ movq 88(%r10), %r8
+ movq %rcx, 80(%r10)
+ sbbq 88(%rdi), %r8
+ movq 96(%r10), %rax
+ movq %r8, 88(%r10)
+ sbbq 96(%rdi), %rax
+ movq 104(%r10), %rcx
+ movq %rax, 96(%r10)
+ sbbq 104(%rdi), %rcx
+ movq 112(%r10), %r8
+ movq %rcx, 104(%r10)
+ sbbq 112(%rdi), %r8
+ movq 120(%r10), %rax
+ movq %r8, 112(%r10)
+ sbbq 120(%rdi), %rax
+ movq 128(%r10), %rcx
+ movq %rax, 120(%r10)
+ sbbq 128(%rdi), %rcx
+ movq 136(%r10), %r8
+ movq %rcx, 128(%r10)
+ sbbq 136(%rdi), %r8
+ movq 144(%r10), %rax
+ movq %r8, 136(%r10)
+ sbbq 144(%rdi), %rax
+ movq 152(%r10), %rcx
+ movq %rax, 144(%r10)
+ sbbq 152(%rdi), %rcx
+ movq 160(%r10), %r8
+ movq %rcx, 152(%r10)
+ sbbq 160(%rdi), %r8
+ movq 168(%r10), %rax
+ movq %r8, 160(%r10)
+ sbbq 168(%rdi), %rax
+ movq 176(%r10), %rcx
+ movq %rax, 168(%r10)
+ sbbq 176(%rdi), %rcx
+ movq 184(%r10), %r8
+ movq %rcx, 176(%r10)
+ sbbq 184(%rdi), %r8
+ movq 192(%r10), %rax
+ movq %r8, 184(%r10)
+ sbbq 192(%rdi), %rax
+ movq 200(%r10), %rcx
+ movq %rax, 192(%r10)
+ sbbq 200(%rdi), %rcx
+ movq 208(%r10), %r8
+ movq %rcx, 200(%r10)
+ sbbq 208(%rdi), %r8
+ movq 216(%r10), %rax
+ movq %r8, 208(%r10)
+ sbbq 216(%rdi), %rax
+ movq 224(%r10), %rcx
+ movq %rax, 216(%r10)
+ sbbq 224(%rdi), %rcx
+ movq 232(%r10), %r8
+ movq %rcx, 224(%r10)
+ sbbq 232(%rdi), %r8
+ movq 240(%r10), %rax
+ movq %r8, 232(%r10)
+ sbbq 240(%rdi), %rax
+ movq 248(%r10), %rcx
+ movq %rax, 240(%r10)
+ sbbq 248(%rdi), %rcx
+ movq 256(%r10), %r8
+ movq %rcx, 248(%r10)
+ sbbq 256(%rdi), %r8
+ movq 264(%r10), %rax
+ movq %r8, 256(%r10)
+ sbbq 264(%rdi), %rax
+ movq 272(%r10), %rcx
+ movq %rax, 264(%r10)
+ sbbq 272(%rdi), %rcx
+ movq 280(%r10), %r8
+ movq %rcx, 272(%r10)
+ sbbq 280(%rdi), %r8
+ movq 288(%r10), %rax
+ movq %r8, 280(%r10)
+ sbbq 288(%rdi), %rax
+ movq 296(%r10), %rcx
+ movq %rax, 288(%r10)
+ sbbq 296(%rdi), %rcx
+ movq 304(%r10), %r8
+ movq %rcx, 296(%r10)
+ sbbq 304(%rdi), %r8
+ movq 312(%r10), %rax
+ movq %r8, 304(%r10)
+ sbbq 312(%rdi), %rax
+ movq 320(%r10), %rcx
+ movq %rax, 312(%r10)
+ sbbq 320(%rdi), %rcx
+ movq 328(%r10), %r8
+ movq %rcx, 320(%r10)
+ sbbq 328(%rdi), %r8
+ movq 336(%r10), %rax
+ movq %r8, 328(%r10)
+ sbbq 336(%rdi), %rax
+ movq 344(%r10), %rcx
+ movq %rax, 336(%r10)
+ sbbq 344(%rdi), %rcx
+ movq 352(%r10), %r8
+ movq %rcx, 344(%r10)
+ sbbq 352(%rdi), %r8
+ movq 360(%r10), %rax
+ movq %r8, 352(%r10)
+ sbbq 360(%rdi), %rax
+ movq 368(%r10), %rcx
+ movq %rax, 360(%r10)
+ sbbq 368(%rdi), %rcx
+ movq 376(%r10), %r8
+ movq %rcx, 368(%r10)
+ sbbq 376(%rdi), %r8
+ movq 384(%r10), %rax
+ movq %r8, 376(%r10)
+ sbbq 384(%rdi), %rax
+ movq 392(%r10), %rcx
+ movq %rax, 384(%r10)
+ sbbq 392(%rdi), %rcx
+ movq 400(%r10), %r8
+ movq %rcx, 392(%r10)
+ sbbq 400(%rdi), %r8
+ movq 408(%r10), %rax
+ movq %r8, 400(%r10)
+ sbbq 408(%rdi), %rax
+ movq 416(%r10), %rcx
+ movq %rax, 408(%r10)
+ sbbq 416(%rdi), %rcx
+ movq 424(%r10), %r8
+ movq %rcx, 416(%r10)
+ sbbq 424(%rdi), %r8
+ movq 432(%r10), %rax
+ movq %r8, 424(%r10)
+ sbbq 432(%rdi), %rax
+ movq 440(%r10), %rcx
+ movq %rax, 432(%r10)
+ sbbq 440(%rdi), %rcx
+ movq 448(%r10), %r8
+ movq %rcx, 440(%r10)
+ sbbq 448(%rdi), %r8
+ movq 456(%r10), %rax
+ movq %r8, 448(%r10)
+ sbbq 456(%rdi), %rax
+ movq 464(%r10), %rcx
+ movq %rax, 456(%r10)
+ sbbq 464(%rdi), %rcx
+ movq 472(%r10), %r8
+ movq %rcx, 464(%r10)
+ sbbq 472(%rdi), %r8
+ movq 480(%r10), %rax
+ movq %r8, 472(%r10)
+ sbbq 480(%rdi), %rax
+ movq 488(%r10), %rcx
+ movq %rax, 480(%r10)
+ sbbq 488(%rdi), %rcx
+ movq 496(%r10), %r8
+ movq %rcx, 488(%r10)
+ sbbq 496(%rdi), %r8
+ movq 504(%r10), %rax
+ movq %r8, 496(%r10)
+ sbbq 504(%rdi), %rax
+ movq %rax, 504(%r10)
+ sbbq $0, %r9
+ subq $256, %r15
+ # Add
+ movq (%r15), %rax
+ addq (%r10), %rax
+ movq 8(%r15), %rcx
+ movq %rax, (%r15)
+ adcq 8(%r10), %rcx
+ movq 16(%r15), %r8
+ movq %rcx, 8(%r15)
+ adcq 16(%r10), %r8
+ movq 24(%r15), %rax
+ movq %r8, 16(%r15)
+ adcq 24(%r10), %rax
+ movq 32(%r15), %rcx
+ movq %rax, 24(%r15)
+ adcq 32(%r10), %rcx
+ movq 40(%r15), %r8
+ movq %rcx, 32(%r15)
+ adcq 40(%r10), %r8
+ movq 48(%r15), %rax
+ movq %r8, 40(%r15)
+ adcq 48(%r10), %rax
+ movq 56(%r15), %rcx
+ movq %rax, 48(%r15)
+ adcq 56(%r10), %rcx
+ movq 64(%r15), %r8
+ movq %rcx, 56(%r15)
+ adcq 64(%r10), %r8
+ movq 72(%r15), %rax
+ movq %r8, 64(%r15)
+ adcq 72(%r10), %rax
+ movq 80(%r15), %rcx
+ movq %rax, 72(%r15)
+ adcq 80(%r10), %rcx
+ movq 88(%r15), %r8
+ movq %rcx, 80(%r15)
+ adcq 88(%r10), %r8
+ movq 96(%r15), %rax
+ movq %r8, 88(%r15)
+ adcq 96(%r10), %rax
+ movq 104(%r15), %rcx
+ movq %rax, 96(%r15)
+ adcq 104(%r10), %rcx
+ movq 112(%r15), %r8
+ movq %rcx, 104(%r15)
+ adcq 112(%r10), %r8
+ movq 120(%r15), %rax
+ movq %r8, 112(%r15)
+ adcq 120(%r10), %rax
+ movq 128(%r15), %rcx
+ movq %rax, 120(%r15)
+ adcq 128(%r10), %rcx
+ movq 136(%r15), %r8
+ movq %rcx, 128(%r15)
+ adcq 136(%r10), %r8
+ movq 144(%r15), %rax
+ movq %r8, 136(%r15)
+ adcq 144(%r10), %rax
+ movq 152(%r15), %rcx
+ movq %rax, 144(%r15)
+ adcq 152(%r10), %rcx
+ movq 160(%r15), %r8
+ movq %rcx, 152(%r15)
+ adcq 160(%r10), %r8
+ movq 168(%r15), %rax
+ movq %r8, 160(%r15)
+ adcq 168(%r10), %rax
+ movq 176(%r15), %rcx
+ movq %rax, 168(%r15)
+ adcq 176(%r10), %rcx
+ movq 184(%r15), %r8
+ movq %rcx, 176(%r15)
+ adcq 184(%r10), %r8
+ movq 192(%r15), %rax
+ movq %r8, 184(%r15)
+ adcq 192(%r10), %rax
+ movq 200(%r15), %rcx
+ movq %rax, 192(%r15)
+ adcq 200(%r10), %rcx
+ movq 208(%r15), %r8
+ movq %rcx, 200(%r15)
+ adcq 208(%r10), %r8
+ movq 216(%r15), %rax
+ movq %r8, 208(%r15)
+ adcq 216(%r10), %rax
+ movq 224(%r15), %rcx
+ movq %rax, 216(%r15)
+ adcq 224(%r10), %rcx
+ movq 232(%r15), %r8
+ movq %rcx, 224(%r15)
+ adcq 232(%r10), %r8
+ movq 240(%r15), %rax
+ movq %r8, 232(%r15)
+ adcq 240(%r10), %rax
+ movq 248(%r15), %rcx
+ movq %rax, 240(%r15)
+ adcq 248(%r10), %rcx
+ movq 256(%r15), %r8
+ movq %rcx, 248(%r15)
+ adcq 256(%r10), %r8
+ movq 264(%r15), %rax
+ movq %r8, 256(%r15)
+ adcq 264(%r10), %rax
+ movq 272(%r15), %rcx
+ movq %rax, 264(%r15)
+ adcq 272(%r10), %rcx
+ movq 280(%r15), %r8
+ movq %rcx, 272(%r15)
+ adcq 280(%r10), %r8
+ movq 288(%r15), %rax
+ movq %r8, 280(%r15)
+ adcq 288(%r10), %rax
+ movq 296(%r15), %rcx
+ movq %rax, 288(%r15)
+ adcq 296(%r10), %rcx
+ movq 304(%r15), %r8
+ movq %rcx, 296(%r15)
+ adcq 304(%r10), %r8
+ movq 312(%r15), %rax
+ movq %r8, 304(%r15)
+ adcq 312(%r10), %rax
+ movq 320(%r15), %rcx
+ movq %rax, 312(%r15)
+ adcq 320(%r10), %rcx
+ movq 328(%r15), %r8
+ movq %rcx, 320(%r15)
+ adcq 328(%r10), %r8
+ movq 336(%r15), %rax
+ movq %r8, 328(%r15)
+ adcq 336(%r10), %rax
+ movq 344(%r15), %rcx
+ movq %rax, 336(%r15)
+ adcq 344(%r10), %rcx
+ movq 352(%r15), %r8
+ movq %rcx, 344(%r15)
+ adcq 352(%r10), %r8
+ movq 360(%r15), %rax
+ movq %r8, 352(%r15)
+ adcq 360(%r10), %rax
+ movq 368(%r15), %rcx
+ movq %rax, 360(%r15)
+ adcq 368(%r10), %rcx
+ movq 376(%r15), %r8
+ movq %rcx, 368(%r15)
+ adcq 376(%r10), %r8
+ movq 384(%r15), %rax
+ movq %r8, 376(%r15)
+ adcq 384(%r10), %rax
+ movq 392(%r15), %rcx
+ movq %rax, 384(%r15)
+ adcq 392(%r10), %rcx
+ movq 400(%r15), %r8
+ movq %rcx, 392(%r15)
+ adcq 400(%r10), %r8
+ movq 408(%r15), %rax
+ movq %r8, 400(%r15)
+ adcq 408(%r10), %rax
+ movq 416(%r15), %rcx
+ movq %rax, 408(%r15)
+ adcq 416(%r10), %rcx
+ movq 424(%r15), %r8
+ movq %rcx, 416(%r15)
+ adcq 424(%r10), %r8
+ movq 432(%r15), %rax
+ movq %r8, 424(%r15)
+ adcq 432(%r10), %rax
+ movq 440(%r15), %rcx
+ movq %rax, 432(%r15)
+ adcq 440(%r10), %rcx
+ movq 448(%r15), %r8
+ movq %rcx, 440(%r15)
+ adcq 448(%r10), %r8
+ movq 456(%r15), %rax
+ movq %r8, 448(%r15)
+ adcq 456(%r10), %rax
+ movq 464(%r15), %rcx
+ movq %rax, 456(%r15)
+ adcq 464(%r10), %rcx
+ movq 472(%r15), %r8
+ movq %rcx, 464(%r15)
+ adcq 472(%r10), %r8
+ movq 480(%r15), %rax
+ movq %r8, 472(%r15)
+ adcq 480(%r10), %rax
+ movq 488(%r15), %rcx
+ movq %rax, 480(%r15)
+ adcq 488(%r10), %rcx
+ movq 496(%r15), %r8
+ movq %rcx, 488(%r15)
+ adcq 496(%r10), %r8
+ movq 504(%r15), %rax
+ movq %r8, 496(%r15)
+ adcq 504(%r10), %rax
+ movq %rax, 504(%r15)
+ adcq $0, %r9
+ movq %r9, 768(%rdi)
+ addq $256, %r15
+ # Add
+ movq (%r15), %rax
+ xorq %r9, %r9
+ addq (%r11), %rax
+ movq 8(%r15), %rcx
+ movq %rax, (%r15)
+ adcq 8(%r11), %rcx
+ movq 16(%r15), %r8
+ movq %rcx, 8(%r15)
+ adcq 16(%r11), %r8
+ movq 24(%r15), %rax
+ movq %r8, 16(%r15)
+ adcq 24(%r11), %rax
+ movq 32(%r15), %rcx
+ movq %rax, 24(%r15)
+ adcq 32(%r11), %rcx
+ movq 40(%r15), %r8
+ movq %rcx, 32(%r15)
+ adcq 40(%r11), %r8
+ movq 48(%r15), %rax
+ movq %r8, 40(%r15)
+ adcq 48(%r11), %rax
+ movq 56(%r15), %rcx
+ movq %rax, 48(%r15)
+ adcq 56(%r11), %rcx
+ movq 64(%r15), %r8
+ movq %rcx, 56(%r15)
+ adcq 64(%r11), %r8
+ movq 72(%r15), %rax
+ movq %r8, 64(%r15)
+ adcq 72(%r11), %rax
+ movq 80(%r15), %rcx
+ movq %rax, 72(%r15)
+ adcq 80(%r11), %rcx
+ movq 88(%r15), %r8
+ movq %rcx, 80(%r15)
+ adcq 88(%r11), %r8
+ movq 96(%r15), %rax
+ movq %r8, 88(%r15)
+ adcq 96(%r11), %rax
+ movq 104(%r15), %rcx
+ movq %rax, 96(%r15)
+ adcq 104(%r11), %rcx
+ movq 112(%r15), %r8
+ movq %rcx, 104(%r15)
+ adcq 112(%r11), %r8
+ movq 120(%r15), %rax
+ movq %r8, 112(%r15)
+ adcq 120(%r11), %rax
+ movq 128(%r15), %rcx
+ movq %rax, 120(%r15)
+ adcq 128(%r11), %rcx
+ movq 136(%r15), %r8
+ movq %rcx, 128(%r15)
+ adcq 136(%r11), %r8
+ movq 144(%r15), %rax
+ movq %r8, 136(%r15)
+ adcq 144(%r11), %rax
+ movq 152(%r15), %rcx
+ movq %rax, 144(%r15)
+ adcq 152(%r11), %rcx
+ movq 160(%r15), %r8
+ movq %rcx, 152(%r15)
+ adcq 160(%r11), %r8
+ movq 168(%r15), %rax
+ movq %r8, 160(%r15)
+ adcq 168(%r11), %rax
+ movq 176(%r15), %rcx
+ movq %rax, 168(%r15)
+ adcq 176(%r11), %rcx
+ movq 184(%r15), %r8
+ movq %rcx, 176(%r15)
+ adcq 184(%r11), %r8
+ movq 192(%r15), %rax
+ movq %r8, 184(%r15)
+ adcq 192(%r11), %rax
+ movq 200(%r15), %rcx
+ movq %rax, 192(%r15)
+ adcq 200(%r11), %rcx
+ movq 208(%r15), %r8
+ movq %rcx, 200(%r15)
+ adcq 208(%r11), %r8
+ movq 216(%r15), %rax
+ movq %r8, 208(%r15)
+ adcq 216(%r11), %rax
+ movq 224(%r15), %rcx
+ movq %rax, 216(%r15)
+ adcq 224(%r11), %rcx
+ movq 232(%r15), %r8
+ movq %rcx, 224(%r15)
+ adcq 232(%r11), %r8
+ movq 240(%r15), %rax
+ movq %r8, 232(%r15)
+ adcq 240(%r11), %rax
+ movq 248(%r15), %rcx
+ movq %rax, 240(%r15)
+ adcq 248(%r11), %rcx
+ movq 256(%r15), %r8
+ movq %rcx, 248(%r15)
+ adcq 256(%r11), %r8
+ movq %r8, 256(%r15)
+ adcq $0, %r9
+ # Add to zero
+ movq 264(%r11), %rax
+ adcq $0, %rax
+ movq 272(%r11), %rcx
+ movq %rax, 264(%r15)
+ adcq $0, %rcx
+ movq 280(%r11), %r8
+ movq %rcx, 272(%r15)
+ adcq $0, %r8
+ movq 288(%r11), %rax
+ movq %r8, 280(%r15)
+ adcq $0, %rax
+ movq 296(%r11), %rcx
+ movq %rax, 288(%r15)
+ adcq $0, %rcx
+ movq 304(%r11), %r8
+ movq %rcx, 296(%r15)
+ adcq $0, %r8
+ movq 312(%r11), %rax
+ movq %r8, 304(%r15)
+ adcq $0, %rax
+ movq 320(%r11), %rcx
+ movq %rax, 312(%r15)
+ adcq $0, %rcx
+ movq 328(%r11), %r8
+ movq %rcx, 320(%r15)
+ adcq $0, %r8
+ movq 336(%r11), %rax
+ movq %r8, 328(%r15)
+ adcq $0, %rax
+ movq 344(%r11), %rcx
+ movq %rax, 336(%r15)
+ adcq $0, %rcx
+ movq 352(%r11), %r8
+ movq %rcx, 344(%r15)
+ adcq $0, %r8
+ movq 360(%r11), %rax
+ movq %r8, 352(%r15)
+ adcq $0, %rax
+ movq 368(%r11), %rcx
+ movq %rax, 360(%r15)
+ adcq $0, %rcx
+ movq 376(%r11), %r8
+ movq %rcx, 368(%r15)
+ adcq $0, %r8
+ movq 384(%r11), %rax
+ movq %r8, 376(%r15)
+ adcq $0, %rax
+ movq 392(%r11), %rcx
+ movq %rax, 384(%r15)
+ adcq $0, %rcx
+ movq 400(%r11), %r8
+ movq %rcx, 392(%r15)
+ adcq $0, %r8
+ movq 408(%r11), %rax
+ movq %r8, 400(%r15)
+ adcq $0, %rax
+ movq 416(%r11), %rcx
+ movq %rax, 408(%r15)
+ adcq $0, %rcx
+ movq 424(%r11), %r8
+ movq %rcx, 416(%r15)
+ adcq $0, %r8
+ movq 432(%r11), %rax
+ movq %r8, 424(%r15)
+ adcq $0, %rax
+ movq 440(%r11), %rcx
+ movq %rax, 432(%r15)
+ adcq $0, %rcx
+ movq 448(%r11), %r8
+ movq %rcx, 440(%r15)
+ adcq $0, %r8
+ movq 456(%r11), %rax
+ movq %r8, 448(%r15)
+ adcq $0, %rax
+ movq 464(%r11), %rcx
+ movq %rax, 456(%r15)
+ adcq $0, %rcx
+ movq 472(%r11), %r8
+ movq %rcx, 464(%r15)
+ adcq $0, %r8
+ movq 480(%r11), %rax
+ movq %r8, 472(%r15)
+ adcq $0, %rax
+ movq 488(%r11), %rcx
+ movq %rax, 480(%r15)
+ adcq $0, %rcx
+ movq 496(%r11), %r8
+ movq %rcx, 488(%r15)
+ adcq $0, %r8
+ movq 504(%r11), %rax
+ movq %r8, 496(%r15)
+ adcq $0, %rax
+ movq %rax, 504(%r15)
+ addq $1576, %rsp
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_4096_mul_avx2_64,.-sp_4096_mul_avx2_64
+#endif /* __APPLE__ */
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_4096_sqr_avx2_64
+.type sp_4096_sqr_avx2_64,@function
+.align 16
+sp_4096_sqr_avx2_64:
+#else
+.globl _sp_4096_sqr_avx2_64
+.p2align 4
+_sp_4096_sqr_avx2_64:
+#endif /* __APPLE__ */
+ subq $1304, %rsp
+ movq %rdi, 1280(%rsp)
+ movq %rsi, 1288(%rsp)
+ leaq 1024(%rsp), %r8
+ leaq 256(%rsi), %r9
+ # Add
+ movq (%rsi), %rdx
+ xorq %rcx, %rcx
+ addq (%r9), %rdx
+ movq 8(%rsi), %rax
+ movq %rdx, (%r8)
+ adcq 8(%r9), %rax
+ movq 16(%rsi), %rdx
+ movq %rax, 8(%r8)
+ adcq 16(%r9), %rdx
+ movq 24(%rsi), %rax
+ movq %rdx, 16(%r8)
+ adcq 24(%r9), %rax
+ movq 32(%rsi), %rdx
+ movq %rax, 24(%r8)
+ adcq 32(%r9), %rdx
+ movq 40(%rsi), %rax
+ movq %rdx, 32(%r8)
+ adcq 40(%r9), %rax
+ movq 48(%rsi), %rdx
+ movq %rax, 40(%r8)
+ adcq 48(%r9), %rdx
+ movq 56(%rsi), %rax
+ movq %rdx, 48(%r8)
+ adcq 56(%r9), %rax
+ movq 64(%rsi), %rdx
+ movq %rax, 56(%r8)
+ adcq 64(%r9), %rdx
+ movq 72(%rsi), %rax
+ movq %rdx, 64(%r8)
+ adcq 72(%r9), %rax
+ movq 80(%rsi), %rdx
+ movq %rax, 72(%r8)
+ adcq 80(%r9), %rdx
+ movq 88(%rsi), %rax
+ movq %rdx, 80(%r8)
+ adcq 88(%r9), %rax
+ movq 96(%rsi), %rdx
+ movq %rax, 88(%r8)
+ adcq 96(%r9), %rdx
+ movq 104(%rsi), %rax
+ movq %rdx, 96(%r8)
+ adcq 104(%r9), %rax
+ movq 112(%rsi), %rdx
+ movq %rax, 104(%r8)
+ adcq 112(%r9), %rdx
+ movq 120(%rsi), %rax
+ movq %rdx, 112(%r8)
+ adcq 120(%r9), %rax
+ movq 128(%rsi), %rdx
+ movq %rax, 120(%r8)
+ adcq 128(%r9), %rdx
+ movq 136(%rsi), %rax
+ movq %rdx, 128(%r8)
+ adcq 136(%r9), %rax
+ movq 144(%rsi), %rdx
+ movq %rax, 136(%r8)
+ adcq 144(%r9), %rdx
+ movq 152(%rsi), %rax
+ movq %rdx, 144(%r8)
+ adcq 152(%r9), %rax
+ movq 160(%rsi), %rdx
+ movq %rax, 152(%r8)
+ adcq 160(%r9), %rdx
+ movq 168(%rsi), %rax
+ movq %rdx, 160(%r8)
+ adcq 168(%r9), %rax
+ movq 176(%rsi), %rdx
+ movq %rax, 168(%r8)
+ adcq 176(%r9), %rdx
+ movq 184(%rsi), %rax
+ movq %rdx, 176(%r8)
+ adcq 184(%r9), %rax
+ movq 192(%rsi), %rdx
+ movq %rax, 184(%r8)
+ adcq 192(%r9), %rdx
+ movq 200(%rsi), %rax
+ movq %rdx, 192(%r8)
+ adcq 200(%r9), %rax
+ movq 208(%rsi), %rdx
+ movq %rax, 200(%r8)
+ adcq 208(%r9), %rdx
+ movq 216(%rsi), %rax
+ movq %rdx, 208(%r8)
+ adcq 216(%r9), %rax
+ movq 224(%rsi), %rdx
+ movq %rax, 216(%r8)
+ adcq 224(%r9), %rdx
+ movq 232(%rsi), %rax
+ movq %rdx, 224(%r8)
+ adcq 232(%r9), %rax
+ movq 240(%rsi), %rdx
+ movq %rax, 232(%r8)
+ adcq 240(%r9), %rdx
+ movq 248(%rsi), %rax
+ movq %rdx, 240(%r8)
+ adcq 248(%r9), %rax
+ movq %rax, 248(%r8)
+ adcq $0, %rcx
+ movq %rcx, 1296(%rsp)
+ movq %r8, %rsi
+ movq %rsp, %rdi
+#ifndef __APPLE__
+ callq sp_2048_sqr_avx2_32@plt
+#else
+ callq _sp_2048_sqr_avx2_32
+#endif /* __APPLE__ */
+ movq 1288(%rsp), %rsi
+ leaq 512(%rsp), %rdi
+ addq $256, %rsi
+#ifndef __APPLE__
+ callq sp_2048_sqr_avx2_32@plt
+#else
+ callq _sp_2048_sqr_avx2_32
+#endif /* __APPLE__ */
+ movq 1288(%rsp), %rsi
+ movq 1280(%rsp), %rdi
+#ifndef __APPLE__
+ callq sp_2048_sqr_avx2_32@plt
+#else
+ callq _sp_2048_sqr_avx2_32
+#endif /* __APPLE__ */
+ movq 1296(%rsp), %r10
+ leaq 1024(%rsp), %r8
+ movq %r10, %rcx
+ negq %r10
+ movq (%r8), %rdx
+ pextq %r10, %rdx, %rdx
+ addq %rdx, %rdx
+ movq 8(%r8), %rax
+ movq %rdx, 512(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 16(%r8), %rdx
+ movq %rax, 520(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 24(%r8), %rax
+ movq %rdx, 528(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 32(%r8), %rdx
+ movq %rax, 536(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 40(%r8), %rax
+ movq %rdx, 544(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 48(%r8), %rdx
+ movq %rax, 552(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 56(%r8), %rax
+ movq %rdx, 560(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 64(%r8), %rdx
+ movq %rax, 568(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 72(%r8), %rax
+ movq %rdx, 576(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 80(%r8), %rdx
+ movq %rax, 584(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 88(%r8), %rax
+ movq %rdx, 592(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 96(%r8), %rdx
+ movq %rax, 600(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 104(%r8), %rax
+ movq %rdx, 608(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 112(%r8), %rdx
+ movq %rax, 616(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 120(%r8), %rax
+ movq %rdx, 624(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 128(%r8), %rdx
+ movq %rax, 632(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 136(%r8), %rax
+ movq %rdx, 640(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 144(%r8), %rdx
+ movq %rax, 648(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 152(%r8), %rax
+ movq %rdx, 656(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 160(%r8), %rdx
+ movq %rax, 664(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 168(%r8), %rax
+ movq %rdx, 672(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 176(%r8), %rdx
+ movq %rax, 680(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 184(%r8), %rax
+ movq %rdx, 688(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 192(%r8), %rdx
+ movq %rax, 696(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 200(%r8), %rax
+ movq %rdx, 704(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 208(%r8), %rdx
+ movq %rax, 712(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 216(%r8), %rax
+ movq %rdx, 720(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 224(%r8), %rdx
+ movq %rax, 728(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 232(%r8), %rax
+ movq %rdx, 736(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq 240(%r8), %rdx
+ movq %rax, 744(%rdi)
+ pextq %r10, %rdx, %rdx
+ adcq %rdx, %rdx
+ movq 248(%r8), %rax
+ movq %rdx, 752(%rdi)
+ pextq %r10, %rax, %rax
+ adcq %rax, %rax
+ movq %rax, 760(%rdi)
+ adcq $0, %rcx
+ leaq 512(%rsp), %rsi
+ movq %rsp, %r8
+ movq (%r8), %rdx
+ subq (%rsi), %rdx
+ movq 8(%r8), %rax
+ movq %rdx, (%r8)
+ sbbq 8(%rsi), %rax
+ movq 16(%r8), %rdx
+ movq %rax, 8(%r8)
+ sbbq 16(%rsi), %rdx
+ movq 24(%r8), %rax
+ movq %rdx, 16(%r8)
+ sbbq 24(%rsi), %rax
+ movq 32(%r8), %rdx
+ movq %rax, 24(%r8)
+ sbbq 32(%rsi), %rdx
+ movq 40(%r8), %rax
+ movq %rdx, 32(%r8)
+ sbbq 40(%rsi), %rax
+ movq 48(%r8), %rdx
+ movq %rax, 40(%r8)
+ sbbq 48(%rsi), %rdx
+ movq 56(%r8), %rax
+ movq %rdx, 48(%r8)
+ sbbq 56(%rsi), %rax
+ movq 64(%r8), %rdx
+ movq %rax, 56(%r8)
+ sbbq 64(%rsi), %rdx
+ movq 72(%r8), %rax
+ movq %rdx, 64(%r8)
+ sbbq 72(%rsi), %rax
+ movq 80(%r8), %rdx
+ movq %rax, 72(%r8)
+ sbbq 80(%rsi), %rdx
+ movq 88(%r8), %rax
+ movq %rdx, 80(%r8)
+ sbbq 88(%rsi), %rax
+ movq 96(%r8), %rdx
+ movq %rax, 88(%r8)
+ sbbq 96(%rsi), %rdx
+ movq 104(%r8), %rax
+ movq %rdx, 96(%r8)
+ sbbq 104(%rsi), %rax
+ movq 112(%r8), %rdx
+ movq %rax, 104(%r8)
+ sbbq 112(%rsi), %rdx
+ movq 120(%r8), %rax
+ movq %rdx, 112(%r8)
+ sbbq 120(%rsi), %rax
+ movq 128(%r8), %rdx
+ movq %rax, 120(%r8)
+ sbbq 128(%rsi), %rdx
+ movq 136(%r8), %rax
+ movq %rdx, 128(%r8)
+ sbbq 136(%rsi), %rax
+ movq 144(%r8), %rdx
+ movq %rax, 136(%r8)
+ sbbq 144(%rsi), %rdx
+ movq 152(%r8), %rax
+ movq %rdx, 144(%r8)
+ sbbq 152(%rsi), %rax
+ movq 160(%r8), %rdx
+ movq %rax, 152(%r8)
+ sbbq 160(%rsi), %rdx
+ movq 168(%r8), %rax
+ movq %rdx, 160(%r8)
+ sbbq 168(%rsi), %rax
+ movq 176(%r8), %rdx
+ movq %rax, 168(%r8)
+ sbbq 176(%rsi), %rdx
+ movq 184(%r8), %rax
+ movq %rdx, 176(%r8)
+ sbbq 184(%rsi), %rax
+ movq 192(%r8), %rdx
+ movq %rax, 184(%r8)
+ sbbq 192(%rsi), %rdx
+ movq 200(%r8), %rax
+ movq %rdx, 192(%r8)
+ sbbq 200(%rsi), %rax
+ movq 208(%r8), %rdx
+ movq %rax, 200(%r8)
+ sbbq 208(%rsi), %rdx
+ movq 216(%r8), %rax
+ movq %rdx, 208(%r8)
+ sbbq 216(%rsi), %rax
+ movq 224(%r8), %rdx
+ movq %rax, 216(%r8)
+ sbbq 224(%rsi), %rdx
+ movq 232(%r8), %rax
+ movq %rdx, 224(%r8)
+ sbbq 232(%rsi), %rax
+ movq 240(%r8), %rdx
+ movq %rax, 232(%r8)
+ sbbq 240(%rsi), %rdx
+ movq 248(%r8), %rax
+ movq %rdx, 240(%r8)
+ sbbq 248(%rsi), %rax
+ movq 256(%r8), %rdx
+ movq %rax, 248(%r8)
+ sbbq 256(%rsi), %rdx
+ movq 264(%r8), %rax
+ movq %rdx, 256(%r8)
+ sbbq 264(%rsi), %rax
+ movq 272(%r8), %rdx
+ movq %rax, 264(%r8)
+ sbbq 272(%rsi), %rdx
+ movq 280(%r8), %rax
+ movq %rdx, 272(%r8)
+ sbbq 280(%rsi), %rax
+ movq 288(%r8), %rdx
+ movq %rax, 280(%r8)
+ sbbq 288(%rsi), %rdx
+ movq 296(%r8), %rax
+ movq %rdx, 288(%r8)
+ sbbq 296(%rsi), %rax
+ movq 304(%r8), %rdx
+ movq %rax, 296(%r8)
+ sbbq 304(%rsi), %rdx
+ movq 312(%r8), %rax
+ movq %rdx, 304(%r8)
+ sbbq 312(%rsi), %rax
+ movq 320(%r8), %rdx
+ movq %rax, 312(%r8)
+ sbbq 320(%rsi), %rdx
+ movq 328(%r8), %rax
+ movq %rdx, 320(%r8)
+ sbbq 328(%rsi), %rax
+ movq 336(%r8), %rdx
+ movq %rax, 328(%r8)
+ sbbq 336(%rsi), %rdx
+ movq 344(%r8), %rax
+ movq %rdx, 336(%r8)
+ sbbq 344(%rsi), %rax
+ movq 352(%r8), %rdx
+ movq %rax, 344(%r8)
+ sbbq 352(%rsi), %rdx
+ movq 360(%r8), %rax
+ movq %rdx, 352(%r8)
+ sbbq 360(%rsi), %rax
+ movq 368(%r8), %rdx
+ movq %rax, 360(%r8)
+ sbbq 368(%rsi), %rdx
+ movq 376(%r8), %rax
+ movq %rdx, 368(%r8)
+ sbbq 376(%rsi), %rax
+ movq 384(%r8), %rdx
+ movq %rax, 376(%r8)
+ sbbq 384(%rsi), %rdx
+ movq 392(%r8), %rax
+ movq %rdx, 384(%r8)
+ sbbq 392(%rsi), %rax
+ movq 400(%r8), %rdx
+ movq %rax, 392(%r8)
+ sbbq 400(%rsi), %rdx
+ movq 408(%r8), %rax
+ movq %rdx, 400(%r8)
+ sbbq 408(%rsi), %rax
+ movq 416(%r8), %rdx
+ movq %rax, 408(%r8)
+ sbbq 416(%rsi), %rdx
+ movq 424(%r8), %rax
+ movq %rdx, 416(%r8)
+ sbbq 424(%rsi), %rax
+ movq 432(%r8), %rdx
+ movq %rax, 424(%r8)
+ sbbq 432(%rsi), %rdx
+ movq 440(%r8), %rax
+ movq %rdx, 432(%r8)
+ sbbq 440(%rsi), %rax
+ movq 448(%r8), %rdx
+ movq %rax, 440(%r8)
+ sbbq 448(%rsi), %rdx
+ movq 456(%r8), %rax
+ movq %rdx, 448(%r8)
+ sbbq 456(%rsi), %rax
+ movq 464(%r8), %rdx
+ movq %rax, 456(%r8)
+ sbbq 464(%rsi), %rdx
+ movq 472(%r8), %rax
+ movq %rdx, 464(%r8)
+ sbbq 472(%rsi), %rax
+ movq 480(%r8), %rdx
+ movq %rax, 472(%r8)
+ sbbq 480(%rsi), %rdx
+ movq 488(%r8), %rax
+ movq %rdx, 480(%r8)
+ sbbq 488(%rsi), %rax
+ movq 496(%r8), %rdx
+ movq %rax, 488(%r8)
+ sbbq 496(%rsi), %rdx
+ movq 504(%r8), %rax
+ movq %rdx, 496(%r8)
+ sbbq 504(%rsi), %rax
+ movq %rax, 504(%r8)
+ sbbq $0, %rcx
+ movq (%r8), %rdx
+ subq (%rdi), %rdx
+ movq 8(%r8), %rax
+ movq %rdx, (%r8)
+ sbbq 8(%rdi), %rax
+ movq 16(%r8), %rdx
+ movq %rax, 8(%r8)
+ sbbq 16(%rdi), %rdx
+ movq 24(%r8), %rax
+ movq %rdx, 16(%r8)
+ sbbq 24(%rdi), %rax
+ movq 32(%r8), %rdx
+ movq %rax, 24(%r8)
+ sbbq 32(%rdi), %rdx
+ movq 40(%r8), %rax
+ movq %rdx, 32(%r8)
+ sbbq 40(%rdi), %rax
+ movq 48(%r8), %rdx
+ movq %rax, 40(%r8)
+ sbbq 48(%rdi), %rdx
+ movq 56(%r8), %rax
+ movq %rdx, 48(%r8)
+ sbbq 56(%rdi), %rax
+ movq 64(%r8), %rdx
+ movq %rax, 56(%r8)
+ sbbq 64(%rdi), %rdx
+ movq 72(%r8), %rax
+ movq %rdx, 64(%r8)
+ sbbq 72(%rdi), %rax
+ movq 80(%r8), %rdx
+ movq %rax, 72(%r8)
+ sbbq 80(%rdi), %rdx
+ movq 88(%r8), %rax
+ movq %rdx, 80(%r8)
+ sbbq 88(%rdi), %rax
+ movq 96(%r8), %rdx
+ movq %rax, 88(%r8)
+ sbbq 96(%rdi), %rdx
+ movq 104(%r8), %rax
+ movq %rdx, 96(%r8)
+ sbbq 104(%rdi), %rax
+ movq 112(%r8), %rdx
+ movq %rax, 104(%r8)
+ sbbq 112(%rdi), %rdx
+ movq 120(%r8), %rax
+ movq %rdx, 112(%r8)
+ sbbq 120(%rdi), %rax
+ movq 128(%r8), %rdx
+ movq %rax, 120(%r8)
+ sbbq 128(%rdi), %rdx
+ movq 136(%r8), %rax
+ movq %rdx, 128(%r8)
+ sbbq 136(%rdi), %rax
+ movq 144(%r8), %rdx
+ movq %rax, 136(%r8)
+ sbbq 144(%rdi), %rdx
+ movq 152(%r8), %rax
+ movq %rdx, 144(%r8)
+ sbbq 152(%rdi), %rax
+ movq 160(%r8), %rdx
+ movq %rax, 152(%r8)
+ sbbq 160(%rdi), %rdx
+ movq 168(%r8), %rax
+ movq %rdx, 160(%r8)
+ sbbq 168(%rdi), %rax
+ movq 176(%r8), %rdx
+ movq %rax, 168(%r8)
+ sbbq 176(%rdi), %rdx
+ movq 184(%r8), %rax
+ movq %rdx, 176(%r8)
+ sbbq 184(%rdi), %rax
+ movq 192(%r8), %rdx
+ movq %rax, 184(%r8)
+ sbbq 192(%rdi), %rdx
+ movq 200(%r8), %rax
+ movq %rdx, 192(%r8)
+ sbbq 200(%rdi), %rax
+ movq 208(%r8), %rdx
+ movq %rax, 200(%r8)
+ sbbq 208(%rdi), %rdx
+ movq 216(%r8), %rax
+ movq %rdx, 208(%r8)
+ sbbq 216(%rdi), %rax
+ movq 224(%r8), %rdx
+ movq %rax, 216(%r8)
+ sbbq 224(%rdi), %rdx
+ movq 232(%r8), %rax
+ movq %rdx, 224(%r8)
+ sbbq 232(%rdi), %rax
+ movq 240(%r8), %rdx
+ movq %rax, 232(%r8)
+ sbbq 240(%rdi), %rdx
+ movq 248(%r8), %rax
+ movq %rdx, 240(%r8)
+ sbbq 248(%rdi), %rax
+ movq 256(%r8), %rdx
+ movq %rax, 248(%r8)
+ sbbq 256(%rdi), %rdx
+ movq 264(%r8), %rax
+ movq %rdx, 256(%r8)
+ sbbq 264(%rdi), %rax
+ movq 272(%r8), %rdx
+ movq %rax, 264(%r8)
+ sbbq 272(%rdi), %rdx
+ movq 280(%r8), %rax
+ movq %rdx, 272(%r8)
+ sbbq 280(%rdi), %rax
+ movq 288(%r8), %rdx
+ movq %rax, 280(%r8)
+ sbbq 288(%rdi), %rdx
+ movq 296(%r8), %rax
+ movq %rdx, 288(%r8)
+ sbbq 296(%rdi), %rax
+ movq 304(%r8), %rdx
+ movq %rax, 296(%r8)
+ sbbq 304(%rdi), %rdx
+ movq 312(%r8), %rax
+ movq %rdx, 304(%r8)
+ sbbq 312(%rdi), %rax
+ movq 320(%r8), %rdx
+ movq %rax, 312(%r8)
+ sbbq 320(%rdi), %rdx
+ movq 328(%r8), %rax
+ movq %rdx, 320(%r8)
+ sbbq 328(%rdi), %rax
+ movq 336(%r8), %rdx
+ movq %rax, 328(%r8)
+ sbbq 336(%rdi), %rdx
+ movq 344(%r8), %rax
+ movq %rdx, 336(%r8)
+ sbbq 344(%rdi), %rax
+ movq 352(%r8), %rdx
+ movq %rax, 344(%r8)
+ sbbq 352(%rdi), %rdx
+ movq 360(%r8), %rax
+ movq %rdx, 352(%r8)
+ sbbq 360(%rdi), %rax
+ movq 368(%r8), %rdx
+ movq %rax, 360(%r8)
+ sbbq 368(%rdi), %rdx
+ movq 376(%r8), %rax
+ movq %rdx, 368(%r8)
+ sbbq 376(%rdi), %rax
+ movq 384(%r8), %rdx
+ movq %rax, 376(%r8)
+ sbbq 384(%rdi), %rdx
+ movq 392(%r8), %rax
+ movq %rdx, 384(%r8)
+ sbbq 392(%rdi), %rax
+ movq 400(%r8), %rdx
+ movq %rax, 392(%r8)
+ sbbq 400(%rdi), %rdx
+ movq 408(%r8), %rax
+ movq %rdx, 400(%r8)
+ sbbq 408(%rdi), %rax
+ movq 416(%r8), %rdx
+ movq %rax, 408(%r8)
+ sbbq 416(%rdi), %rdx
+ movq 424(%r8), %rax
+ movq %rdx, 416(%r8)
+ sbbq 424(%rdi), %rax
+ movq 432(%r8), %rdx
+ movq %rax, 424(%r8)
+ sbbq 432(%rdi), %rdx
+ movq 440(%r8), %rax
+ movq %rdx, 432(%r8)
+ sbbq 440(%rdi), %rax
+ movq 448(%r8), %rdx
+ movq %rax, 440(%r8)
+ sbbq 448(%rdi), %rdx
+ movq 456(%r8), %rax
+ movq %rdx, 448(%r8)
+ sbbq 456(%rdi), %rax
+ movq 464(%r8), %rdx
+ movq %rax, 456(%r8)
+ sbbq 464(%rdi), %rdx
+ movq 472(%r8), %rax
+ movq %rdx, 464(%r8)
+ sbbq 472(%rdi), %rax
+ movq 480(%r8), %rdx
+ movq %rax, 472(%r8)
+ sbbq 480(%rdi), %rdx
+ movq 488(%r8), %rax
+ movq %rdx, 480(%r8)
+ sbbq 488(%rdi), %rax
+ movq 496(%r8), %rdx
+ movq %rax, 488(%r8)
+ sbbq 496(%rdi), %rdx
+ movq 504(%r8), %rax
+ movq %rdx, 496(%r8)
+ sbbq 504(%rdi), %rax
+ movq %rax, 504(%r8)
+ sbbq $0, %rcx
+ # Add in place
+ movq 256(%rdi), %rdx
+ addq (%r8), %rdx
+ movq 264(%rdi), %rax
+ movq %rdx, 256(%rdi)
+ adcq 8(%r8), %rax
+ movq 272(%rdi), %rdx
+ movq %rax, 264(%rdi)
+ adcq 16(%r8), %rdx
+ movq 280(%rdi), %rax
+ movq %rdx, 272(%rdi)
+ adcq 24(%r8), %rax
+ movq 288(%rdi), %rdx
+ movq %rax, 280(%rdi)
+ adcq 32(%r8), %rdx
+ movq 296(%rdi), %rax
+ movq %rdx, 288(%rdi)
+ adcq 40(%r8), %rax
+ movq 304(%rdi), %rdx
+ movq %rax, 296(%rdi)
+ adcq 48(%r8), %rdx
+ movq 312(%rdi), %rax
+ movq %rdx, 304(%rdi)
+ adcq 56(%r8), %rax
+ movq 320(%rdi), %rdx
+ movq %rax, 312(%rdi)
+ adcq 64(%r8), %rdx
+ movq 328(%rdi), %rax
+ movq %rdx, 320(%rdi)
+ adcq 72(%r8), %rax
+ movq 336(%rdi), %rdx
+ movq %rax, 328(%rdi)
+ adcq 80(%r8), %rdx
+ movq 344(%rdi), %rax
+ movq %rdx, 336(%rdi)
+ adcq 88(%r8), %rax
+ movq 352(%rdi), %rdx
+ movq %rax, 344(%rdi)
+ adcq 96(%r8), %rdx
+ movq 360(%rdi), %rax
+ movq %rdx, 352(%rdi)
+ adcq 104(%r8), %rax
+ movq 368(%rdi), %rdx
+ movq %rax, 360(%rdi)
+ adcq 112(%r8), %rdx
+ movq 376(%rdi), %rax
+ movq %rdx, 368(%rdi)
+ adcq 120(%r8), %rax
+ movq 384(%rdi), %rdx
+ movq %rax, 376(%rdi)
+ adcq 128(%r8), %rdx
+ movq 392(%rdi), %rax
+ movq %rdx, 384(%rdi)
+ adcq 136(%r8), %rax
+ movq 400(%rdi), %rdx
+ movq %rax, 392(%rdi)
+ adcq 144(%r8), %rdx
+ movq 408(%rdi), %rax
+ movq %rdx, 400(%rdi)
+ adcq 152(%r8), %rax
+ movq 416(%rdi), %rdx
+ movq %rax, 408(%rdi)
+ adcq 160(%r8), %rdx
+ movq 424(%rdi), %rax
+ movq %rdx, 416(%rdi)
+ adcq 168(%r8), %rax
+ movq 432(%rdi), %rdx
+ movq %rax, 424(%rdi)
+ adcq 176(%r8), %rdx
+ movq 440(%rdi), %rax
+ movq %rdx, 432(%rdi)
+ adcq 184(%r8), %rax
+ movq 448(%rdi), %rdx
+ movq %rax, 440(%rdi)
+ adcq 192(%r8), %rdx
+ movq 456(%rdi), %rax
+ movq %rdx, 448(%rdi)
+ adcq 200(%r8), %rax
+ movq 464(%rdi), %rdx
+ movq %rax, 456(%rdi)
+ adcq 208(%r8), %rdx
+ movq 472(%rdi), %rax
+ movq %rdx, 464(%rdi)
+ adcq 216(%r8), %rax
+ movq 480(%rdi), %rdx
+ movq %rax, 472(%rdi)
+ adcq 224(%r8), %rdx
+ movq 488(%rdi), %rax
+ movq %rdx, 480(%rdi)
+ adcq 232(%r8), %rax
+ movq 496(%rdi), %rdx
+ movq %rax, 488(%rdi)
+ adcq 240(%r8), %rdx
+ movq 504(%rdi), %rax
+ movq %rdx, 496(%rdi)
+ adcq 248(%r8), %rax
+ movq 512(%rdi), %rdx
+ movq %rax, 504(%rdi)
+ adcq 256(%r8), %rdx
+ movq 520(%rdi), %rax
+ movq %rdx, 512(%rdi)
+ adcq 264(%r8), %rax
+ movq 528(%rdi), %rdx
+ movq %rax, 520(%rdi)
+ adcq 272(%r8), %rdx
+ movq 536(%rdi), %rax
+ movq %rdx, 528(%rdi)
+ adcq 280(%r8), %rax
+ movq 544(%rdi), %rdx
+ movq %rax, 536(%rdi)
+ adcq 288(%r8), %rdx
+ movq 552(%rdi), %rax
+ movq %rdx, 544(%rdi)
+ adcq 296(%r8), %rax
+ movq 560(%rdi), %rdx
+ movq %rax, 552(%rdi)
+ adcq 304(%r8), %rdx
+ movq 568(%rdi), %rax
+ movq %rdx, 560(%rdi)
+ adcq 312(%r8), %rax
+ movq 576(%rdi), %rdx
+ movq %rax, 568(%rdi)
+ adcq 320(%r8), %rdx
+ movq 584(%rdi), %rax
+ movq %rdx, 576(%rdi)
+ adcq 328(%r8), %rax
+ movq 592(%rdi), %rdx
+ movq %rax, 584(%rdi)
+ adcq 336(%r8), %rdx
+ movq 600(%rdi), %rax
+ movq %rdx, 592(%rdi)
+ adcq 344(%r8), %rax
+ movq 608(%rdi), %rdx
+ movq %rax, 600(%rdi)
+ adcq 352(%r8), %rdx
+ movq 616(%rdi), %rax
+ movq %rdx, 608(%rdi)
+ adcq 360(%r8), %rax
+ movq 624(%rdi), %rdx
+ movq %rax, 616(%rdi)
+ adcq 368(%r8), %rdx
+ movq 632(%rdi), %rax
+ movq %rdx, 624(%rdi)
+ adcq 376(%r8), %rax
+ movq 640(%rdi), %rdx
+ movq %rax, 632(%rdi)
+ adcq 384(%r8), %rdx
+ movq 648(%rdi), %rax
+ movq %rdx, 640(%rdi)
+ adcq 392(%r8), %rax
+ movq 656(%rdi), %rdx
+ movq %rax, 648(%rdi)
+ adcq 400(%r8), %rdx
+ movq 664(%rdi), %rax
+ movq %rdx, 656(%rdi)
+ adcq 408(%r8), %rax
+ movq 672(%rdi), %rdx
+ movq %rax, 664(%rdi)
+ adcq 416(%r8), %rdx
+ movq 680(%rdi), %rax
+ movq %rdx, 672(%rdi)
+ adcq 424(%r8), %rax
+ movq 688(%rdi), %rdx
+ movq %rax, 680(%rdi)
+ adcq 432(%r8), %rdx
+ movq 696(%rdi), %rax
+ movq %rdx, 688(%rdi)
+ adcq 440(%r8), %rax
+ movq 704(%rdi), %rdx
+ movq %rax, 696(%rdi)
+ adcq 448(%r8), %rdx
+ movq 712(%rdi), %rax
+ movq %rdx, 704(%rdi)
+ adcq 456(%r8), %rax
+ movq 720(%rdi), %rdx
+ movq %rax, 712(%rdi)
+ adcq 464(%r8), %rdx
+ movq 728(%rdi), %rax
+ movq %rdx, 720(%rdi)
+ adcq 472(%r8), %rax
+ movq 736(%rdi), %rdx
+ movq %rax, 728(%rdi)
+ adcq 480(%r8), %rdx
+ movq 744(%rdi), %rax
+ movq %rdx, 736(%rdi)
+ adcq 488(%r8), %rax
+ movq 752(%rdi), %rdx
+ movq %rax, 744(%rdi)
+ adcq 496(%r8), %rdx
+ movq 760(%rdi), %rax
+ movq %rdx, 752(%rdi)
+ adcq 504(%r8), %rax
+ movq %rax, 760(%rdi)
+ adcq $0, %rcx
+ movq %rcx, 768(%rdi)
+ # Add in place
+ movq 512(%rdi), %rdx
+ xorq %rcx, %rcx
+ addq (%rsi), %rdx
+ movq 520(%rdi), %rax
+ movq %rdx, 512(%rdi)
+ adcq 8(%rsi), %rax
+ movq 528(%rdi), %rdx
+ movq %rax, 520(%rdi)
+ adcq 16(%rsi), %rdx
+ movq 536(%rdi), %rax
+ movq %rdx, 528(%rdi)
+ adcq 24(%rsi), %rax
+ movq 544(%rdi), %rdx
+ movq %rax, 536(%rdi)
+ adcq 32(%rsi), %rdx
+ movq 552(%rdi), %rax
+ movq %rdx, 544(%rdi)
+ adcq 40(%rsi), %rax
+ movq 560(%rdi), %rdx
+ movq %rax, 552(%rdi)
+ adcq 48(%rsi), %rdx
+ movq 568(%rdi), %rax
+ movq %rdx, 560(%rdi)
+ adcq 56(%rsi), %rax
+ movq 576(%rdi), %rdx
+ movq %rax, 568(%rdi)
+ adcq 64(%rsi), %rdx
+ movq 584(%rdi), %rax
+ movq %rdx, 576(%rdi)
+ adcq 72(%rsi), %rax
+ movq 592(%rdi), %rdx
+ movq %rax, 584(%rdi)
+ adcq 80(%rsi), %rdx
+ movq 600(%rdi), %rax
+ movq %rdx, 592(%rdi)
+ adcq 88(%rsi), %rax
+ movq 608(%rdi), %rdx
+ movq %rax, 600(%rdi)
+ adcq 96(%rsi), %rdx
+ movq 616(%rdi), %rax
+ movq %rdx, 608(%rdi)
+ adcq 104(%rsi), %rax
+ movq 624(%rdi), %rdx
+ movq %rax, 616(%rdi)
+ adcq 112(%rsi), %rdx
+ movq 632(%rdi), %rax
+ movq %rdx, 624(%rdi)
+ adcq 120(%rsi), %rax
+ movq 640(%rdi), %rdx
+ movq %rax, 632(%rdi)
+ adcq 128(%rsi), %rdx
+ movq 648(%rdi), %rax
+ movq %rdx, 640(%rdi)
+ adcq 136(%rsi), %rax
+ movq 656(%rdi), %rdx
+ movq %rax, 648(%rdi)
+ adcq 144(%rsi), %rdx
+ movq 664(%rdi), %rax
+ movq %rdx, 656(%rdi)
+ adcq 152(%rsi), %rax
+ movq 672(%rdi), %rdx
+ movq %rax, 664(%rdi)
+ adcq 160(%rsi), %rdx
+ movq 680(%rdi), %rax
+ movq %rdx, 672(%rdi)
+ adcq 168(%rsi), %rax
+ movq 688(%rdi), %rdx
+ movq %rax, 680(%rdi)
+ adcq 176(%rsi), %rdx
+ movq 696(%rdi), %rax
+ movq %rdx, 688(%rdi)
+ adcq 184(%rsi), %rax
+ movq 704(%rdi), %rdx
+ movq %rax, 696(%rdi)
+ adcq 192(%rsi), %rdx
+ movq 712(%rdi), %rax
+ movq %rdx, 704(%rdi)
+ adcq 200(%rsi), %rax
+ movq 720(%rdi), %rdx
+ movq %rax, 712(%rdi)
+ adcq 208(%rsi), %rdx
+ movq 728(%rdi), %rax
+ movq %rdx, 720(%rdi)
+ adcq 216(%rsi), %rax
+ movq 736(%rdi), %rdx
+ movq %rax, 728(%rdi)
+ adcq 224(%rsi), %rdx
+ movq 744(%rdi), %rax
+ movq %rdx, 736(%rdi)
+ adcq 232(%rsi), %rax
+ movq 752(%rdi), %rdx
+ movq %rax, 744(%rdi)
+ adcq 240(%rsi), %rdx
+ movq 760(%rdi), %rax
+ movq %rdx, 752(%rdi)
+ adcq 248(%rsi), %rax
+ movq 768(%rdi), %rdx
+ movq %rax, 760(%rdi)
+ adcq 256(%rsi), %rdx
+ movq %rdx, 768(%rdi)
+ adcq $0, %rcx
+ # Add to zero
+ movq 264(%rsi), %rdx
+ adcq $0, %rdx
+ movq 272(%rsi), %rax
+ movq %rdx, 776(%rdi)
+ adcq $0, %rax
+ movq 280(%rsi), %rdx
+ movq %rax, 784(%rdi)
+ adcq $0, %rdx
+ movq 288(%rsi), %rax
+ movq %rdx, 792(%rdi)
+ adcq $0, %rax
+ movq 296(%rsi), %rdx
+ movq %rax, 800(%rdi)
+ adcq $0, %rdx
+ movq 304(%rsi), %rax
+ movq %rdx, 808(%rdi)
+ adcq $0, %rax
+ movq 312(%rsi), %rdx
+ movq %rax, 816(%rdi)
+ adcq $0, %rdx
+ movq 320(%rsi), %rax
+ movq %rdx, 824(%rdi)
+ adcq $0, %rax
+ movq 328(%rsi), %rdx
+ movq %rax, 832(%rdi)
+ adcq $0, %rdx
+ movq 336(%rsi), %rax
+ movq %rdx, 840(%rdi)
+ adcq $0, %rax
+ movq 344(%rsi), %rdx
+ movq %rax, 848(%rdi)
+ adcq $0, %rdx
+ movq 352(%rsi), %rax
+ movq %rdx, 856(%rdi)
+ adcq $0, %rax
+ movq 360(%rsi), %rdx
+ movq %rax, 864(%rdi)
+ adcq $0, %rdx
+ movq 368(%rsi), %rax
+ movq %rdx, 872(%rdi)
+ adcq $0, %rax
+ movq 376(%rsi), %rdx
+ movq %rax, 880(%rdi)
+ adcq $0, %rdx
+ movq 384(%rsi), %rax
+ movq %rdx, 888(%rdi)
+ adcq $0, %rax
+ movq 392(%rsi), %rdx
+ movq %rax, 896(%rdi)
+ adcq $0, %rdx
+ movq 400(%rsi), %rax
+ movq %rdx, 904(%rdi)
+ adcq $0, %rax
+ movq 408(%rsi), %rdx
+ movq %rax, 912(%rdi)
+ adcq $0, %rdx
+ movq 416(%rsi), %rax
+ movq %rdx, 920(%rdi)
+ adcq $0, %rax
+ movq 424(%rsi), %rdx
+ movq %rax, 928(%rdi)
+ adcq $0, %rdx
+ movq 432(%rsi), %rax
+ movq %rdx, 936(%rdi)
+ adcq $0, %rax
+ movq 440(%rsi), %rdx
+ movq %rax, 944(%rdi)
+ adcq $0, %rdx
+ movq 448(%rsi), %rax
+ movq %rdx, 952(%rdi)
+ adcq $0, %rax
+ movq 456(%rsi), %rdx
+ movq %rax, 960(%rdi)
+ adcq $0, %rdx
+ movq 464(%rsi), %rax
+ movq %rdx, 968(%rdi)
+ adcq $0, %rax
+ movq 472(%rsi), %rdx
+ movq %rax, 976(%rdi)
+ adcq $0, %rdx
+ movq 480(%rsi), %rax
+ movq %rdx, 984(%rdi)
+ adcq $0, %rax
+ movq 488(%rsi), %rdx
+ movq %rax, 992(%rdi)
+ adcq $0, %rdx
+ movq 496(%rsi), %rax
+ movq %rdx, 1000(%rdi)
+ adcq $0, %rax
+ movq 504(%rsi), %rdx
+ movq %rax, 1008(%rdi)
+ adcq $0, %rdx
+ movq %rdx, 1016(%rdi)
+ addq $1304, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_4096_sqr_avx2_64,.-sp_4096_sqr_avx2_64
+#endif /* __APPLE__ */
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+#ifndef __APPLE__
+.globl sp_4096_mul_d_64
+.type sp_4096_mul_d_64,@function
+.align 16
+sp_4096_mul_d_64:
+#else
+.globl _sp_4096_mul_d_64
+.p2align 4
+_sp_4096_mul_d_64:
+#endif /* __APPLE__ */
+ movq %rdx, %rcx
+ # A[0] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ movq %r8, (%rdi)
+ # A[1] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 8(%rsi)
+ addq %rax, %r9
+ movq %r9, 8(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[2] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 16(%rsi)
+ addq %rax, %r10
+ movq %r10, 16(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[3] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 24(%rsi)
+ addq %rax, %r8
+ movq %r8, 24(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[4] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 32(%rsi)
+ addq %rax, %r9
+ movq %r9, 32(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[5] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 40(%rsi)
+ addq %rax, %r10
+ movq %r10, 40(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[6] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 48(%rsi)
+ addq %rax, %r8
+ movq %r8, 48(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[7] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 56(%rsi)
+ addq %rax, %r9
+ movq %r9, 56(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[8] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 64(%rsi)
+ addq %rax, %r10
+ movq %r10, 64(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[9] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 72(%rsi)
+ addq %rax, %r8
+ movq %r8, 72(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[10] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 80(%rsi)
+ addq %rax, %r9
+ movq %r9, 80(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[11] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 88(%rsi)
+ addq %rax, %r10
+ movq %r10, 88(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[12] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 96(%rsi)
+ addq %rax, %r8
+ movq %r8, 96(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[13] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 104(%rsi)
+ addq %rax, %r9
+ movq %r9, 104(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[14] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 112(%rsi)
+ addq %rax, %r10
+ movq %r10, 112(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[15] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 120(%rsi)
+ addq %rax, %r8
+ movq %r8, 120(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[16] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 128(%rsi)
+ addq %rax, %r9
+ movq %r9, 128(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[17] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 136(%rsi)
+ addq %rax, %r10
+ movq %r10, 136(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[18] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 144(%rsi)
+ addq %rax, %r8
+ movq %r8, 144(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[19] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 152(%rsi)
+ addq %rax, %r9
+ movq %r9, 152(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[20] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 160(%rsi)
+ addq %rax, %r10
+ movq %r10, 160(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[21] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 168(%rsi)
+ addq %rax, %r8
+ movq %r8, 168(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[22] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 176(%rsi)
+ addq %rax, %r9
+ movq %r9, 176(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[23] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 184(%rsi)
+ addq %rax, %r10
+ movq %r10, 184(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[24] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 192(%rsi)
+ addq %rax, %r8
+ movq %r8, 192(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[25] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 200(%rsi)
+ addq %rax, %r9
+ movq %r9, 200(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[26] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 208(%rsi)
+ addq %rax, %r10
+ movq %r10, 208(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[27] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 216(%rsi)
+ addq %rax, %r8
+ movq %r8, 216(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[28] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 224(%rsi)
+ addq %rax, %r9
+ movq %r9, 224(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[29] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 232(%rsi)
+ addq %rax, %r10
+ movq %r10, 232(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[30] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 240(%rsi)
+ addq %rax, %r8
+ movq %r8, 240(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[31] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 248(%rsi)
+ addq %rax, %r9
+ movq %r9, 248(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[32] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 256(%rsi)
+ addq %rax, %r10
+ movq %r10, 256(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[33] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 264(%rsi)
+ addq %rax, %r8
+ movq %r8, 264(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[34] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 272(%rsi)
+ addq %rax, %r9
+ movq %r9, 272(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[35] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 280(%rsi)
+ addq %rax, %r10
+ movq %r10, 280(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[36] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 288(%rsi)
+ addq %rax, %r8
+ movq %r8, 288(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[37] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 296(%rsi)
+ addq %rax, %r9
+ movq %r9, 296(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[38] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 304(%rsi)
+ addq %rax, %r10
+ movq %r10, 304(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[39] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 312(%rsi)
+ addq %rax, %r8
+ movq %r8, 312(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[40] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 320(%rsi)
+ addq %rax, %r9
+ movq %r9, 320(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[41] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 328(%rsi)
+ addq %rax, %r10
+ movq %r10, 328(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[42] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 336(%rsi)
+ addq %rax, %r8
+ movq %r8, 336(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[43] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 344(%rsi)
+ addq %rax, %r9
+ movq %r9, 344(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[44] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 352(%rsi)
+ addq %rax, %r10
+ movq %r10, 352(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[45] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 360(%rsi)
+ addq %rax, %r8
+ movq %r8, 360(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[46] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 368(%rsi)
+ addq %rax, %r9
+ movq %r9, 368(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[47] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 376(%rsi)
+ addq %rax, %r10
+ movq %r10, 376(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[48] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 384(%rsi)
+ addq %rax, %r8
+ movq %r8, 384(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[49] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 392(%rsi)
+ addq %rax, %r9
+ movq %r9, 392(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[50] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 400(%rsi)
+ addq %rax, %r10
+ movq %r10, 400(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[51] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 408(%rsi)
+ addq %rax, %r8
+ movq %r8, 408(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[52] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 416(%rsi)
+ addq %rax, %r9
+ movq %r9, 416(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[53] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 424(%rsi)
+ addq %rax, %r10
+ movq %r10, 424(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[54] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 432(%rsi)
+ addq %rax, %r8
+ movq %r8, 432(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[55] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 440(%rsi)
+ addq %rax, %r9
+ movq %r9, 440(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[56] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 448(%rsi)
+ addq %rax, %r10
+ movq %r10, 448(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[57] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 456(%rsi)
+ addq %rax, %r8
+ movq %r8, 456(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[58] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 464(%rsi)
+ addq %rax, %r9
+ movq %r9, 464(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[59] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 472(%rsi)
+ addq %rax, %r10
+ movq %r10, 472(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[60] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 480(%rsi)
+ addq %rax, %r8
+ movq %r8, 480(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[61] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 488(%rsi)
+ addq %rax, %r9
+ movq %r9, 488(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[62] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 496(%rsi)
+ addq %rax, %r10
+ movq %r10, 496(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[63] * B
+ movq %rcx, %rax
+ mulq 504(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ movq %r8, 504(%rdi)
+ movq %r9, 512(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_4096_mul_d_64,.-sp_4096_mul_d_64
+#endif /* __APPLE__ */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_4096_cond_sub_64
+.type sp_4096_cond_sub_64,@function
+.align 16
+sp_4096_cond_sub_64:
+#else
+.globl _sp_4096_cond_sub_64
+.p2align 4
+_sp_4096_cond_sub_64:
+#endif /* __APPLE__ */
+ subq $512, %rsp
+ movq $0, %rax
+ movq (%rdx), %r8
+ movq 8(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, (%rsp)
+ movq %r9, 8(%rsp)
+ movq 16(%rdx), %r8
+ movq 24(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 16(%rsp)
+ movq %r9, 24(%rsp)
+ movq 32(%rdx), %r8
+ movq 40(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq 48(%rdx), %r8
+ movq 56(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 48(%rsp)
+ movq %r9, 56(%rsp)
+ movq 64(%rdx), %r8
+ movq 72(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 64(%rsp)
+ movq %r9, 72(%rsp)
+ movq 80(%rdx), %r8
+ movq 88(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 80(%rsp)
+ movq %r9, 88(%rsp)
+ movq 96(%rdx), %r8
+ movq 104(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 96(%rsp)
+ movq %r9, 104(%rsp)
+ movq 112(%rdx), %r8
+ movq 120(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 112(%rsp)
+ movq %r9, 120(%rsp)
+ movq 128(%rdx), %r8
+ movq 136(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 128(%rsp)
+ movq %r9, 136(%rsp)
+ movq 144(%rdx), %r8
+ movq 152(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 144(%rsp)
+ movq %r9, 152(%rsp)
+ movq 160(%rdx), %r8
+ movq 168(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 160(%rsp)
+ movq %r9, 168(%rsp)
+ movq 176(%rdx), %r8
+ movq 184(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 176(%rsp)
+ movq %r9, 184(%rsp)
+ movq 192(%rdx), %r8
+ movq 200(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 192(%rsp)
+ movq %r9, 200(%rsp)
+ movq 208(%rdx), %r8
+ movq 216(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 208(%rsp)
+ movq %r9, 216(%rsp)
+ movq 224(%rdx), %r8
+ movq 232(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 224(%rsp)
+ movq %r9, 232(%rsp)
+ movq 240(%rdx), %r8
+ movq 248(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 240(%rsp)
+ movq %r9, 248(%rsp)
+ movq 256(%rdx), %r8
+ movq 264(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 256(%rsp)
+ movq %r9, 264(%rsp)
+ movq 272(%rdx), %r8
+ movq 280(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 272(%rsp)
+ movq %r9, 280(%rsp)
+ movq 288(%rdx), %r8
+ movq 296(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 288(%rsp)
+ movq %r9, 296(%rsp)
+ movq 304(%rdx), %r8
+ movq 312(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 304(%rsp)
+ movq %r9, 312(%rsp)
+ movq 320(%rdx), %r8
+ movq 328(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 320(%rsp)
+ movq %r9, 328(%rsp)
+ movq 336(%rdx), %r8
+ movq 344(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 336(%rsp)
+ movq %r9, 344(%rsp)
+ movq 352(%rdx), %r8
+ movq 360(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 352(%rsp)
+ movq %r9, 360(%rsp)
+ movq 368(%rdx), %r8
+ movq 376(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 368(%rsp)
+ movq %r9, 376(%rsp)
+ movq 384(%rdx), %r8
+ movq 392(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 384(%rsp)
+ movq %r9, 392(%rsp)
+ movq 400(%rdx), %r8
+ movq 408(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 400(%rsp)
+ movq %r9, 408(%rsp)
+ movq 416(%rdx), %r8
+ movq 424(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 416(%rsp)
+ movq %r9, 424(%rsp)
+ movq 432(%rdx), %r8
+ movq 440(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 432(%rsp)
+ movq %r9, 440(%rsp)
+ movq 448(%rdx), %r8
+ movq 456(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 448(%rsp)
+ movq %r9, 456(%rsp)
+ movq 464(%rdx), %r8
+ movq 472(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 464(%rsp)
+ movq %r9, 472(%rsp)
+ movq 480(%rdx), %r8
+ movq 488(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 480(%rsp)
+ movq %r9, 488(%rsp)
+ movq 496(%rdx), %r8
+ movq 504(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 496(%rsp)
+ movq %r9, 504(%rsp)
+ movq (%rsi), %r8
+ movq (%rsp), %rdx
+ subq %rdx, %r8
+ movq 8(%rsi), %r9
+ movq 8(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, (%rdi)
+ movq 16(%rsi), %r8
+ movq 16(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 8(%rdi)
+ movq 24(%rsi), %r9
+ movq 24(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 16(%rdi)
+ movq 32(%rsi), %r8
+ movq 32(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 24(%rdi)
+ movq 40(%rsi), %r9
+ movq 40(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 32(%rdi)
+ movq 48(%rsi), %r8
+ movq 48(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 40(%rdi)
+ movq 56(%rsi), %r9
+ movq 56(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 48(%rdi)
+ movq 64(%rsi), %r8
+ movq 64(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 56(%rdi)
+ movq 72(%rsi), %r9
+ movq 72(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 64(%rdi)
+ movq 80(%rsi), %r8
+ movq 80(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 72(%rdi)
+ movq 88(%rsi), %r9
+ movq 88(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 80(%rdi)
+ movq 96(%rsi), %r8
+ movq 96(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 88(%rdi)
+ movq 104(%rsi), %r9
+ movq 104(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 96(%rdi)
+ movq 112(%rsi), %r8
+ movq 112(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 104(%rdi)
+ movq 120(%rsi), %r9
+ movq 120(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 112(%rdi)
+ movq 128(%rsi), %r8
+ movq 128(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 120(%rdi)
+ movq 136(%rsi), %r9
+ movq 136(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 128(%rdi)
+ movq 144(%rsi), %r8
+ movq 144(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 136(%rdi)
+ movq 152(%rsi), %r9
+ movq 152(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 144(%rdi)
+ movq 160(%rsi), %r8
+ movq 160(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 152(%rdi)
+ movq 168(%rsi), %r9
+ movq 168(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 160(%rdi)
+ movq 176(%rsi), %r8
+ movq 176(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 168(%rdi)
+ movq 184(%rsi), %r9
+ movq 184(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 176(%rdi)
+ movq 192(%rsi), %r8
+ movq 192(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 184(%rdi)
+ movq 200(%rsi), %r9
+ movq 200(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 192(%rdi)
+ movq 208(%rsi), %r8
+ movq 208(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 200(%rdi)
+ movq 216(%rsi), %r9
+ movq 216(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 208(%rdi)
+ movq 224(%rsi), %r8
+ movq 224(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 216(%rdi)
+ movq 232(%rsi), %r9
+ movq 232(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 224(%rdi)
+ movq 240(%rsi), %r8
+ movq 240(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 232(%rdi)
+ movq 248(%rsi), %r9
+ movq 248(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 240(%rdi)
+ movq 256(%rsi), %r8
+ movq 256(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 248(%rdi)
+ movq 264(%rsi), %r9
+ movq 264(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 256(%rdi)
+ movq 272(%rsi), %r8
+ movq 272(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 264(%rdi)
+ movq 280(%rsi), %r9
+ movq 280(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 272(%rdi)
+ movq 288(%rsi), %r8
+ movq 288(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 280(%rdi)
+ movq 296(%rsi), %r9
+ movq 296(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 288(%rdi)
+ movq 304(%rsi), %r8
+ movq 304(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 296(%rdi)
+ movq 312(%rsi), %r9
+ movq 312(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 304(%rdi)
+ movq 320(%rsi), %r8
+ movq 320(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 312(%rdi)
+ movq 328(%rsi), %r9
+ movq 328(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 320(%rdi)
+ movq 336(%rsi), %r8
+ movq 336(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 328(%rdi)
+ movq 344(%rsi), %r9
+ movq 344(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 336(%rdi)
+ movq 352(%rsi), %r8
+ movq 352(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 344(%rdi)
+ movq 360(%rsi), %r9
+ movq 360(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 352(%rdi)
+ movq 368(%rsi), %r8
+ movq 368(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 360(%rdi)
+ movq 376(%rsi), %r9
+ movq 376(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 368(%rdi)
+ movq 384(%rsi), %r8
+ movq 384(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 376(%rdi)
+ movq 392(%rsi), %r9
+ movq 392(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 384(%rdi)
+ movq 400(%rsi), %r8
+ movq 400(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 392(%rdi)
+ movq 408(%rsi), %r9
+ movq 408(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 400(%rdi)
+ movq 416(%rsi), %r8
+ movq 416(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 408(%rdi)
+ movq 424(%rsi), %r9
+ movq 424(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 416(%rdi)
+ movq 432(%rsi), %r8
+ movq 432(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 424(%rdi)
+ movq 440(%rsi), %r9
+ movq 440(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 432(%rdi)
+ movq 448(%rsi), %r8
+ movq 448(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 440(%rdi)
+ movq 456(%rsi), %r9
+ movq 456(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 448(%rdi)
+ movq 464(%rsi), %r8
+ movq 464(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 456(%rdi)
+ movq 472(%rsi), %r9
+ movq 472(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 464(%rdi)
+ movq 480(%rsi), %r8
+ movq 480(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 472(%rdi)
+ movq 488(%rsi), %r9
+ movq 488(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 480(%rdi)
+ movq 496(%rsi), %r8
+ movq 496(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 488(%rdi)
+ movq 504(%rsi), %r9
+ movq 504(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 496(%rdi)
+ movq %r9, 504(%rdi)
+ sbbq $0, %rax
+ addq $512, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_4096_cond_sub_64,.-sp_4096_cond_sub_64
+#endif /* __APPLE__ */
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef __APPLE__
+.globl sp_4096_mont_reduce_64
+.type sp_4096_mont_reduce_64,@function
+.align 16
+sp_4096_mont_reduce_64:
+#else
+.globl _sp_4096_mont_reduce_64
+.p2align 4
+_sp_4096_mont_reduce_64:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ movq %rdx, %rcx
+ xorq %r15, %r15
+ # i = 64
+ movq $64, %r8
+ movq (%rdi), %r13
+ movq 8(%rdi), %r14
+L_mont_loop_64:
+ # mu = a[i] * mp
+ movq %r13, %r11
+ imulq %rcx, %r11
+ # a[i+0] += m[0] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq (%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r10
+ # a[i+1] += m[1] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 8(%rsi)
+ movq %r14, %r13
+ addq %rax, %r13
+ adcq %rdx, %r9
+ addq %r10, %r13
+ adcq $0, %r9
+ # a[i+2] += m[2] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 16(%rsi)
+ movq 16(%rdi), %r14
+ addq %rax, %r14
+ adcq %rdx, %r10
+ addq %r9, %r14
+ adcq $0, %r10
+ # a[i+3] += m[3] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 24(%rsi)
+ movq 24(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 24(%rdi)
+ adcq $0, %r9
+ # a[i+4] += m[4] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 32(%rsi)
+ movq 32(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 32(%rdi)
+ adcq $0, %r10
+ # a[i+5] += m[5] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 40(%rsi)
+ movq 40(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 40(%rdi)
+ adcq $0, %r9
+ # a[i+6] += m[6] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 48(%rsi)
+ movq 48(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 48(%rdi)
+ adcq $0, %r10
+ # a[i+7] += m[7] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 56(%rsi)
+ movq 56(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 56(%rdi)
+ adcq $0, %r9
+ # a[i+8] += m[8] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 64(%rsi)
+ movq 64(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 64(%rdi)
+ adcq $0, %r10
+ # a[i+9] += m[9] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 72(%rsi)
+ movq 72(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 72(%rdi)
+ adcq $0, %r9
+ # a[i+10] += m[10] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 80(%rsi)
+ movq 80(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 80(%rdi)
+ adcq $0, %r10
+ # a[i+11] += m[11] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 88(%rsi)
+ movq 88(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 88(%rdi)
+ adcq $0, %r9
+ # a[i+12] += m[12] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 96(%rsi)
+ movq 96(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 96(%rdi)
+ adcq $0, %r10
+ # a[i+13] += m[13] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 104(%rsi)
+ movq 104(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 104(%rdi)
+ adcq $0, %r9
+ # a[i+14] += m[14] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 112(%rsi)
+ movq 112(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 112(%rdi)
+ adcq $0, %r10
+ # a[i+15] += m[15] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 120(%rsi)
+ movq 120(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 120(%rdi)
+ adcq $0, %r9
+ # a[i+16] += m[16] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 128(%rsi)
+ movq 128(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 128(%rdi)
+ adcq $0, %r10
+ # a[i+17] += m[17] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 136(%rsi)
+ movq 136(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 136(%rdi)
+ adcq $0, %r9
+ # a[i+18] += m[18] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 144(%rsi)
+ movq 144(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 144(%rdi)
+ adcq $0, %r10
+ # a[i+19] += m[19] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 152(%rsi)
+ movq 152(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 152(%rdi)
+ adcq $0, %r9
+ # a[i+20] += m[20] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 160(%rsi)
+ movq 160(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 160(%rdi)
+ adcq $0, %r10
+ # a[i+21] += m[21] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 168(%rsi)
+ movq 168(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 168(%rdi)
+ adcq $0, %r9
+ # a[i+22] += m[22] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 176(%rsi)
+ movq 176(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 176(%rdi)
+ adcq $0, %r10
+ # a[i+23] += m[23] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 184(%rsi)
+ movq 184(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 184(%rdi)
+ adcq $0, %r9
+ # a[i+24] += m[24] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 192(%rsi)
+ movq 192(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 192(%rdi)
+ adcq $0, %r10
+ # a[i+25] += m[25] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 200(%rsi)
+ movq 200(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 200(%rdi)
+ adcq $0, %r9
+ # a[i+26] += m[26] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 208(%rsi)
+ movq 208(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 208(%rdi)
+ adcq $0, %r10
+ # a[i+27] += m[27] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 216(%rsi)
+ movq 216(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 216(%rdi)
+ adcq $0, %r9
+ # a[i+28] += m[28] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 224(%rsi)
+ movq 224(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 224(%rdi)
+ adcq $0, %r10
+ # a[i+29] += m[29] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 232(%rsi)
+ movq 232(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 232(%rdi)
+ adcq $0, %r9
+ # a[i+30] += m[30] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 240(%rsi)
+ movq 240(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 240(%rdi)
+ adcq $0, %r10
+ # a[i+31] += m[31] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 248(%rsi)
+ movq 248(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 248(%rdi)
+ adcq $0, %r9
+ # a[i+32] += m[32] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 256(%rsi)
+ movq 256(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 256(%rdi)
+ adcq $0, %r10
+ # a[i+33] += m[33] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 264(%rsi)
+ movq 264(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 264(%rdi)
+ adcq $0, %r9
+ # a[i+34] += m[34] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 272(%rsi)
+ movq 272(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 272(%rdi)
+ adcq $0, %r10
+ # a[i+35] += m[35] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 280(%rsi)
+ movq 280(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 280(%rdi)
+ adcq $0, %r9
+ # a[i+36] += m[36] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 288(%rsi)
+ movq 288(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 288(%rdi)
+ adcq $0, %r10
+ # a[i+37] += m[37] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 296(%rsi)
+ movq 296(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 296(%rdi)
+ adcq $0, %r9
+ # a[i+38] += m[38] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 304(%rsi)
+ movq 304(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 304(%rdi)
+ adcq $0, %r10
+ # a[i+39] += m[39] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 312(%rsi)
+ movq 312(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 312(%rdi)
+ adcq $0, %r9
+ # a[i+40] += m[40] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 320(%rsi)
+ movq 320(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 320(%rdi)
+ adcq $0, %r10
+ # a[i+41] += m[41] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 328(%rsi)
+ movq 328(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 328(%rdi)
+ adcq $0, %r9
+ # a[i+42] += m[42] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 336(%rsi)
+ movq 336(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 336(%rdi)
+ adcq $0, %r10
+ # a[i+43] += m[43] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 344(%rsi)
+ movq 344(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 344(%rdi)
+ adcq $0, %r9
+ # a[i+44] += m[44] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 352(%rsi)
+ movq 352(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 352(%rdi)
+ adcq $0, %r10
+ # a[i+45] += m[45] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 360(%rsi)
+ movq 360(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 360(%rdi)
+ adcq $0, %r9
+ # a[i+46] += m[46] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 368(%rsi)
+ movq 368(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 368(%rdi)
+ adcq $0, %r10
+ # a[i+47] += m[47] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 376(%rsi)
+ movq 376(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 376(%rdi)
+ adcq $0, %r9
+ # a[i+48] += m[48] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 384(%rsi)
+ movq 384(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 384(%rdi)
+ adcq $0, %r10
+ # a[i+49] += m[49] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 392(%rsi)
+ movq 392(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 392(%rdi)
+ adcq $0, %r9
+ # a[i+50] += m[50] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 400(%rsi)
+ movq 400(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 400(%rdi)
+ adcq $0, %r10
+ # a[i+51] += m[51] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 408(%rsi)
+ movq 408(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 408(%rdi)
+ adcq $0, %r9
+ # a[i+52] += m[52] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 416(%rsi)
+ movq 416(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 416(%rdi)
+ adcq $0, %r10
+ # a[i+53] += m[53] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 424(%rsi)
+ movq 424(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 424(%rdi)
+ adcq $0, %r9
+ # a[i+54] += m[54] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 432(%rsi)
+ movq 432(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 432(%rdi)
+ adcq $0, %r10
+ # a[i+55] += m[55] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 440(%rsi)
+ movq 440(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 440(%rdi)
+ adcq $0, %r9
+ # a[i+56] += m[56] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 448(%rsi)
+ movq 448(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 448(%rdi)
+ adcq $0, %r10
+ # a[i+57] += m[57] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 456(%rsi)
+ movq 456(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 456(%rdi)
+ adcq $0, %r9
+ # a[i+58] += m[58] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 464(%rsi)
+ movq 464(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 464(%rdi)
+ adcq $0, %r10
+ # a[i+59] += m[59] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 472(%rsi)
+ movq 472(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 472(%rdi)
+ adcq $0, %r9
+ # a[i+60] += m[60] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 480(%rsi)
+ movq 480(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 480(%rdi)
+ adcq $0, %r10
+ # a[i+61] += m[61] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 488(%rsi)
+ movq 488(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 488(%rdi)
+ adcq $0, %r9
+ # a[i+62] += m[62] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 496(%rsi)
+ movq 496(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 496(%rdi)
+ adcq $0, %r10
+ # a[i+63] += m[63] * mu
+ movq %r11, %rax
+ mulq 504(%rsi)
+ movq 504(%rdi), %r12
+ addq %rax, %r10
+ adcq %r15, %rdx
+ movq $0, %r15
+ adcq $0, %r15
+ addq %r10, %r12
+ movq %r12, 504(%rdi)
+ adcq %rdx, 512(%rdi)
+ adcq $0, %r15
+ # i -= 1
+ addq $8, %rdi
+ decq %r8
+ jnz L_mont_loop_64
+ movq %r13, (%rdi)
+ movq %r14, 8(%rdi)
+ negq %r15
+ movq %r15, %rcx
+ movq %rsi, %rdx
+ movq %rdi, %rsi
+ subq $512, %rdi
+#ifndef __APPLE__
+ callq sp_4096_cond_sub_64@plt
+#else
+ callq _sp_4096_cond_sub_64
+#endif /* __APPLE__ */
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_4096_mont_reduce_64,.-sp_4096_mont_reduce_64
+#endif /* __APPLE__ */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_4096_cond_sub_avx2_64
+.type sp_4096_cond_sub_avx2_64,@function
+.align 16
+sp_4096_cond_sub_avx2_64:
+#else
+.globl _sp_4096_cond_sub_avx2_64
+.p2align 4
+_sp_4096_cond_sub_avx2_64:
+#endif /* __APPLE__ */
+ movq $0, %rax
+ movq (%rdx), %r10
+ movq (%rsi), %r8
+ pextq %rcx, %r10, %r10
+ subq %r10, %r8
+ movq 8(%rdx), %r10
+ movq 8(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, (%rdi)
+ sbbq %r10, %r9
+ movq 16(%rdx), %r8
+ movq 16(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 8(%rdi)
+ sbbq %r8, %r10
+ movq 24(%rdx), %r9
+ movq 24(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 16(%rdi)
+ sbbq %r9, %r8
+ movq 32(%rdx), %r10
+ movq 32(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 24(%rdi)
+ sbbq %r10, %r9
+ movq 40(%rdx), %r8
+ movq 40(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 32(%rdi)
+ sbbq %r8, %r10
+ movq 48(%rdx), %r9
+ movq 48(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 40(%rdi)
+ sbbq %r9, %r8
+ movq 56(%rdx), %r10
+ movq 56(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 48(%rdi)
+ sbbq %r10, %r9
+ movq 64(%rdx), %r8
+ movq 64(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 56(%rdi)
+ sbbq %r8, %r10
+ movq 72(%rdx), %r9
+ movq 72(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 64(%rdi)
+ sbbq %r9, %r8
+ movq 80(%rdx), %r10
+ movq 80(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 72(%rdi)
+ sbbq %r10, %r9
+ movq 88(%rdx), %r8
+ movq 88(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 80(%rdi)
+ sbbq %r8, %r10
+ movq 96(%rdx), %r9
+ movq 96(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 88(%rdi)
+ sbbq %r9, %r8
+ movq 104(%rdx), %r10
+ movq 104(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 96(%rdi)
+ sbbq %r10, %r9
+ movq 112(%rdx), %r8
+ movq 112(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 104(%rdi)
+ sbbq %r8, %r10
+ movq 120(%rdx), %r9
+ movq 120(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 112(%rdi)
+ sbbq %r9, %r8
+ movq 128(%rdx), %r10
+ movq 128(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 120(%rdi)
+ sbbq %r10, %r9
+ movq 136(%rdx), %r8
+ movq 136(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 128(%rdi)
+ sbbq %r8, %r10
+ movq 144(%rdx), %r9
+ movq 144(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 136(%rdi)
+ sbbq %r9, %r8
+ movq 152(%rdx), %r10
+ movq 152(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 144(%rdi)
+ sbbq %r10, %r9
+ movq 160(%rdx), %r8
+ movq 160(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 152(%rdi)
+ sbbq %r8, %r10
+ movq 168(%rdx), %r9
+ movq 168(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 160(%rdi)
+ sbbq %r9, %r8
+ movq 176(%rdx), %r10
+ movq 176(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 168(%rdi)
+ sbbq %r10, %r9
+ movq 184(%rdx), %r8
+ movq 184(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 176(%rdi)
+ sbbq %r8, %r10
+ movq 192(%rdx), %r9
+ movq 192(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 184(%rdi)
+ sbbq %r9, %r8
+ movq 200(%rdx), %r10
+ movq 200(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 192(%rdi)
+ sbbq %r10, %r9
+ movq 208(%rdx), %r8
+ movq 208(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 200(%rdi)
+ sbbq %r8, %r10
+ movq 216(%rdx), %r9
+ movq 216(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 208(%rdi)
+ sbbq %r9, %r8
+ movq 224(%rdx), %r10
+ movq 224(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 216(%rdi)
+ sbbq %r10, %r9
+ movq 232(%rdx), %r8
+ movq 232(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 224(%rdi)
+ sbbq %r8, %r10
+ movq 240(%rdx), %r9
+ movq 240(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 232(%rdi)
+ sbbq %r9, %r8
+ movq 248(%rdx), %r10
+ movq 248(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 240(%rdi)
+ sbbq %r10, %r9
+ movq 256(%rdx), %r8
+ movq 256(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 248(%rdi)
+ sbbq %r8, %r10
+ movq 264(%rdx), %r9
+ movq 264(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 256(%rdi)
+ sbbq %r9, %r8
+ movq 272(%rdx), %r10
+ movq 272(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 264(%rdi)
+ sbbq %r10, %r9
+ movq 280(%rdx), %r8
+ movq 280(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 272(%rdi)
+ sbbq %r8, %r10
+ movq 288(%rdx), %r9
+ movq 288(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 280(%rdi)
+ sbbq %r9, %r8
+ movq 296(%rdx), %r10
+ movq 296(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 288(%rdi)
+ sbbq %r10, %r9
+ movq 304(%rdx), %r8
+ movq 304(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 296(%rdi)
+ sbbq %r8, %r10
+ movq 312(%rdx), %r9
+ movq 312(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 304(%rdi)
+ sbbq %r9, %r8
+ movq 320(%rdx), %r10
+ movq 320(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 312(%rdi)
+ sbbq %r10, %r9
+ movq 328(%rdx), %r8
+ movq 328(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 320(%rdi)
+ sbbq %r8, %r10
+ movq 336(%rdx), %r9
+ movq 336(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 328(%rdi)
+ sbbq %r9, %r8
+ movq 344(%rdx), %r10
+ movq 344(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 336(%rdi)
+ sbbq %r10, %r9
+ movq 352(%rdx), %r8
+ movq 352(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 344(%rdi)
+ sbbq %r8, %r10
+ movq 360(%rdx), %r9
+ movq 360(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 352(%rdi)
+ sbbq %r9, %r8
+ movq 368(%rdx), %r10
+ movq 368(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 360(%rdi)
+ sbbq %r10, %r9
+ movq 376(%rdx), %r8
+ movq 376(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 368(%rdi)
+ sbbq %r8, %r10
+ movq 384(%rdx), %r9
+ movq 384(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 376(%rdi)
+ sbbq %r9, %r8
+ movq 392(%rdx), %r10
+ movq 392(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 384(%rdi)
+ sbbq %r10, %r9
+ movq 400(%rdx), %r8
+ movq 400(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 392(%rdi)
+ sbbq %r8, %r10
+ movq 408(%rdx), %r9
+ movq 408(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 400(%rdi)
+ sbbq %r9, %r8
+ movq 416(%rdx), %r10
+ movq 416(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 408(%rdi)
+ sbbq %r10, %r9
+ movq 424(%rdx), %r8
+ movq 424(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 416(%rdi)
+ sbbq %r8, %r10
+ movq 432(%rdx), %r9
+ movq 432(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 424(%rdi)
+ sbbq %r9, %r8
+ movq 440(%rdx), %r10
+ movq 440(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 432(%rdi)
+ sbbq %r10, %r9
+ movq 448(%rdx), %r8
+ movq 448(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 440(%rdi)
+ sbbq %r8, %r10
+ movq 456(%rdx), %r9
+ movq 456(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 448(%rdi)
+ sbbq %r9, %r8
+ movq 464(%rdx), %r10
+ movq 464(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 456(%rdi)
+ sbbq %r10, %r9
+ movq 472(%rdx), %r8
+ movq 472(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 464(%rdi)
+ sbbq %r8, %r10
+ movq 480(%rdx), %r9
+ movq 480(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 472(%rdi)
+ sbbq %r9, %r8
+ movq 488(%rdx), %r10
+ movq 488(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 480(%rdi)
+ sbbq %r10, %r9
+ movq 496(%rdx), %r8
+ movq 496(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 488(%rdi)
+ sbbq %r8, %r10
+ movq 504(%rdx), %r9
+ movq 504(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 496(%rdi)
+ sbbq %r9, %r8
+ movq %r8, 504(%rdi)
+ sbbq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_4096_cond_sub_avx2_64,.-sp_4096_cond_sub_avx2_64
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX2
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+#ifndef __APPLE__
+.globl sp_4096_mul_d_avx2_64
+.type sp_4096_mul_d_avx2_64,@function
+.align 16
+sp_4096_mul_d_avx2_64:
+#else
+.globl _sp_4096_mul_d_avx2_64
+.p2align 4
+_sp_4096_mul_d_avx2_64:
+#endif /* __APPLE__ */
+ movq %rdx, %rax
+ # A[0] * B
+ movq %rax, %rdx
+ xorq %r11, %r11
+ mulxq (%rsi), %r9, %r10
+ movq %r9, (%rdi)
+ # A[1] * B
+ mulxq 8(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 8(%rdi)
+ adoxq %r8, %r9
+ # A[2] * B
+ mulxq 16(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 16(%rdi)
+ adoxq %r8, %r10
+ # A[3] * B
+ mulxq 24(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 24(%rdi)
+ adoxq %r8, %r9
+ # A[4] * B
+ mulxq 32(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 32(%rdi)
+ adoxq %r8, %r10
+ # A[5] * B
+ mulxq 40(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 40(%rdi)
+ adoxq %r8, %r9
+ # A[6] * B
+ mulxq 48(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 48(%rdi)
+ adoxq %r8, %r10
+ # A[7] * B
+ mulxq 56(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 56(%rdi)
+ adoxq %r8, %r9
+ # A[8] * B
+ mulxq 64(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 64(%rdi)
+ adoxq %r8, %r10
+ # A[9] * B
+ mulxq 72(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 72(%rdi)
+ adoxq %r8, %r9
+ # A[10] * B
+ mulxq 80(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 80(%rdi)
+ adoxq %r8, %r10
+ # A[11] * B
+ mulxq 88(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 88(%rdi)
+ adoxq %r8, %r9
+ # A[12] * B
+ mulxq 96(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 96(%rdi)
+ adoxq %r8, %r10
+ # A[13] * B
+ mulxq 104(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 104(%rdi)
+ adoxq %r8, %r9
+ # A[14] * B
+ mulxq 112(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 112(%rdi)
+ adoxq %r8, %r10
+ # A[15] * B
+ mulxq 120(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 120(%rdi)
+ adoxq %r8, %r9
+ # A[16] * B
+ mulxq 128(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 128(%rdi)
+ adoxq %r8, %r10
+ # A[17] * B
+ mulxq 136(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 136(%rdi)
+ adoxq %r8, %r9
+ # A[18] * B
+ mulxq 144(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 144(%rdi)
+ adoxq %r8, %r10
+ # A[19] * B
+ mulxq 152(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 152(%rdi)
+ adoxq %r8, %r9
+ # A[20] * B
+ mulxq 160(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 160(%rdi)
+ adoxq %r8, %r10
+ # A[21] * B
+ mulxq 168(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 168(%rdi)
+ adoxq %r8, %r9
+ # A[22] * B
+ mulxq 176(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 176(%rdi)
+ adoxq %r8, %r10
+ # A[23] * B
+ mulxq 184(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 184(%rdi)
+ adoxq %r8, %r9
+ # A[24] * B
+ mulxq 192(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 192(%rdi)
+ adoxq %r8, %r10
+ # A[25] * B
+ mulxq 200(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 200(%rdi)
+ adoxq %r8, %r9
+ # A[26] * B
+ mulxq 208(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 208(%rdi)
+ adoxq %r8, %r10
+ # A[27] * B
+ mulxq 216(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 216(%rdi)
+ adoxq %r8, %r9
+ # A[28] * B
+ mulxq 224(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 224(%rdi)
+ adoxq %r8, %r10
+ # A[29] * B
+ mulxq 232(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 232(%rdi)
+ adoxq %r8, %r9
+ # A[30] * B
+ mulxq 240(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 240(%rdi)
+ adoxq %r8, %r10
+ # A[31] * B
+ mulxq 248(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 248(%rdi)
+ adoxq %r8, %r9
+ # A[32] * B
+ mulxq 256(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 256(%rdi)
+ adoxq %r8, %r10
+ # A[33] * B
+ mulxq 264(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 264(%rdi)
+ adoxq %r8, %r9
+ # A[34] * B
+ mulxq 272(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 272(%rdi)
+ adoxq %r8, %r10
+ # A[35] * B
+ mulxq 280(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 280(%rdi)
+ adoxq %r8, %r9
+ # A[36] * B
+ mulxq 288(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 288(%rdi)
+ adoxq %r8, %r10
+ # A[37] * B
+ mulxq 296(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 296(%rdi)
+ adoxq %r8, %r9
+ # A[38] * B
+ mulxq 304(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 304(%rdi)
+ adoxq %r8, %r10
+ # A[39] * B
+ mulxq 312(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 312(%rdi)
+ adoxq %r8, %r9
+ # A[40] * B
+ mulxq 320(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 320(%rdi)
+ adoxq %r8, %r10
+ # A[41] * B
+ mulxq 328(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 328(%rdi)
+ adoxq %r8, %r9
+ # A[42] * B
+ mulxq 336(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 336(%rdi)
+ adoxq %r8, %r10
+ # A[43] * B
+ mulxq 344(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 344(%rdi)
+ adoxq %r8, %r9
+ # A[44] * B
+ mulxq 352(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 352(%rdi)
+ adoxq %r8, %r10
+ # A[45] * B
+ mulxq 360(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 360(%rdi)
+ adoxq %r8, %r9
+ # A[46] * B
+ mulxq 368(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 368(%rdi)
+ adoxq %r8, %r10
+ # A[47] * B
+ mulxq 376(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 376(%rdi)
+ adoxq %r8, %r9
+ # A[48] * B
+ mulxq 384(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 384(%rdi)
+ adoxq %r8, %r10
+ # A[49] * B
+ mulxq 392(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 392(%rdi)
+ adoxq %r8, %r9
+ # A[50] * B
+ mulxq 400(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 400(%rdi)
+ adoxq %r8, %r10
+ # A[51] * B
+ mulxq 408(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 408(%rdi)
+ adoxq %r8, %r9
+ # A[52] * B
+ mulxq 416(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 416(%rdi)
+ adoxq %r8, %r10
+ # A[53] * B
+ mulxq 424(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 424(%rdi)
+ adoxq %r8, %r9
+ # A[54] * B
+ mulxq 432(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 432(%rdi)
+ adoxq %r8, %r10
+ # A[55] * B
+ mulxq 440(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 440(%rdi)
+ adoxq %r8, %r9
+ # A[56] * B
+ mulxq 448(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 448(%rdi)
+ adoxq %r8, %r10
+ # A[57] * B
+ mulxq 456(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 456(%rdi)
+ adoxq %r8, %r9
+ # A[58] * B
+ mulxq 464(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 464(%rdi)
+ adoxq %r8, %r10
+ # A[59] * B
+ mulxq 472(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 472(%rdi)
+ adoxq %r8, %r9
+ # A[60] * B
+ mulxq 480(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 480(%rdi)
+ adoxq %r8, %r10
+ # A[61] * B
+ mulxq 488(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 488(%rdi)
+ adoxq %r8, %r9
+ # A[62] * B
+ mulxq 496(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 496(%rdi)
+ adoxq %r8, %r10
+ # A[63] * B
+ mulxq 504(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ adoxq %r8, %r9
+ adcxq %r11, %r9
+ movq %r10, 504(%rdi)
+ movq %r9, 512(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_4096_mul_d_avx2_64,.-sp_4096_mul_d_avx2_64
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+#ifndef __APPLE__
+.globl sp_4096_cmp_64
+.type sp_4096_cmp_64,@function
+.align 16
+sp_4096_cmp_64:
+#else
+.globl _sp_4096_cmp_64
+.p2align 4
+_sp_4096_cmp_64:
+#endif /* __APPLE__ */
+ xorq %rcx, %rcx
+ movq $-1, %rdx
+ movq $-1, %rax
+ movq $1, %r8
+ movq 504(%rdi), %r9
+ movq 504(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 496(%rdi), %r9
+ movq 496(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 488(%rdi), %r9
+ movq 488(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 480(%rdi), %r9
+ movq 480(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 472(%rdi), %r9
+ movq 472(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 464(%rdi), %r9
+ movq 464(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 456(%rdi), %r9
+ movq 456(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 448(%rdi), %r9
+ movq 448(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 440(%rdi), %r9
+ movq 440(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 432(%rdi), %r9
+ movq 432(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 424(%rdi), %r9
+ movq 424(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 416(%rdi), %r9
+ movq 416(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 408(%rdi), %r9
+ movq 408(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 400(%rdi), %r9
+ movq 400(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 392(%rdi), %r9
+ movq 392(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 384(%rdi), %r9
+ movq 384(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 376(%rdi), %r9
+ movq 376(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 368(%rdi), %r9
+ movq 368(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 360(%rdi), %r9
+ movq 360(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 352(%rdi), %r9
+ movq 352(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 344(%rdi), %r9
+ movq 344(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 336(%rdi), %r9
+ movq 336(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 328(%rdi), %r9
+ movq 328(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 320(%rdi), %r9
+ movq 320(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 312(%rdi), %r9
+ movq 312(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 304(%rdi), %r9
+ movq 304(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 296(%rdi), %r9
+ movq 296(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 288(%rdi), %r9
+ movq 288(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 280(%rdi), %r9
+ movq 280(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 272(%rdi), %r9
+ movq 272(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 264(%rdi), %r9
+ movq 264(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 256(%rdi), %r9
+ movq 256(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 248(%rdi), %r9
+ movq 248(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 240(%rdi), %r9
+ movq 240(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 232(%rdi), %r9
+ movq 232(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 224(%rdi), %r9
+ movq 224(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 216(%rdi), %r9
+ movq 216(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 208(%rdi), %r9
+ movq 208(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 200(%rdi), %r9
+ movq 200(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 192(%rdi), %r9
+ movq 192(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 184(%rdi), %r9
+ movq 184(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 176(%rdi), %r9
+ movq 176(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 168(%rdi), %r9
+ movq 168(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 160(%rdi), %r9
+ movq 160(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 152(%rdi), %r9
+ movq 152(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 144(%rdi), %r9
+ movq 144(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 136(%rdi), %r9
+ movq 136(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 128(%rdi), %r9
+ movq 128(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 120(%rdi), %r9
+ movq 120(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 112(%rdi), %r9
+ movq 112(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 104(%rdi), %r9
+ movq 104(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 96(%rdi), %r9
+ movq 96(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 88(%rdi), %r9
+ movq 88(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 80(%rdi), %r9
+ movq 80(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 72(%rdi), %r9
+ movq 72(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 64(%rdi), %r9
+ movq 64(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 56(%rdi), %r9
+ movq 56(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 48(%rdi), %r9
+ movq 48(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 40(%rdi), %r9
+ movq 40(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 32(%rdi), %r9
+ movq 32(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 24(%rdi), %r9
+ movq 24(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 16(%rdi), %r9
+ movq 16(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 8(%rdi), %r9
+ movq 8(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq (%rdi), %r9
+ movq (%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ xorq %rdx, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_4096_cmp_64,.-sp_4096_cmp_64
+#endif /* __APPLE__ */
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_4096_sub_64
+.type sp_4096_sub_64,@function
+.align 16
+sp_4096_sub_64:
+#else
+.globl _sp_4096_sub_64
+.p2align 4
+_sp_4096_sub_64:
+#endif /* __APPLE__ */
+ movq (%rsi), %rcx
+ xorq %rax, %rax
+ subq (%rdx), %rcx
+ movq 8(%rsi), %r8
+ movq %rcx, (%rdi)
+ sbbq 8(%rdx), %r8
+ movq 16(%rsi), %rcx
+ movq %r8, 8(%rdi)
+ sbbq 16(%rdx), %rcx
+ movq 24(%rsi), %r8
+ movq %rcx, 16(%rdi)
+ sbbq 24(%rdx), %r8
+ movq 32(%rsi), %rcx
+ movq %r8, 24(%rdi)
+ sbbq 32(%rdx), %rcx
+ movq 40(%rsi), %r8
+ movq %rcx, 32(%rdi)
+ sbbq 40(%rdx), %r8
+ movq 48(%rsi), %rcx
+ movq %r8, 40(%rdi)
+ sbbq 48(%rdx), %rcx
+ movq 56(%rsi), %r8
+ movq %rcx, 48(%rdi)
+ sbbq 56(%rdx), %r8
+ movq 64(%rsi), %rcx
+ movq %r8, 56(%rdi)
+ sbbq 64(%rdx), %rcx
+ movq 72(%rsi), %r8
+ movq %rcx, 64(%rdi)
+ sbbq 72(%rdx), %r8
+ movq 80(%rsi), %rcx
+ movq %r8, 72(%rdi)
+ sbbq 80(%rdx), %rcx
+ movq 88(%rsi), %r8
+ movq %rcx, 80(%rdi)
+ sbbq 88(%rdx), %r8
+ movq 96(%rsi), %rcx
+ movq %r8, 88(%rdi)
+ sbbq 96(%rdx), %rcx
+ movq 104(%rsi), %r8
+ movq %rcx, 96(%rdi)
+ sbbq 104(%rdx), %r8
+ movq 112(%rsi), %rcx
+ movq %r8, 104(%rdi)
+ sbbq 112(%rdx), %rcx
+ movq 120(%rsi), %r8
+ movq %rcx, 112(%rdi)
+ sbbq 120(%rdx), %r8
+ movq 128(%rsi), %rcx
+ movq %r8, 120(%rdi)
+ sbbq 128(%rdx), %rcx
+ movq 136(%rsi), %r8
+ movq %rcx, 128(%rdi)
+ sbbq 136(%rdx), %r8
+ movq 144(%rsi), %rcx
+ movq %r8, 136(%rdi)
+ sbbq 144(%rdx), %rcx
+ movq 152(%rsi), %r8
+ movq %rcx, 144(%rdi)
+ sbbq 152(%rdx), %r8
+ movq 160(%rsi), %rcx
+ movq %r8, 152(%rdi)
+ sbbq 160(%rdx), %rcx
+ movq 168(%rsi), %r8
+ movq %rcx, 160(%rdi)
+ sbbq 168(%rdx), %r8
+ movq 176(%rsi), %rcx
+ movq %r8, 168(%rdi)
+ sbbq 176(%rdx), %rcx
+ movq 184(%rsi), %r8
+ movq %rcx, 176(%rdi)
+ sbbq 184(%rdx), %r8
+ movq 192(%rsi), %rcx
+ movq %r8, 184(%rdi)
+ sbbq 192(%rdx), %rcx
+ movq 200(%rsi), %r8
+ movq %rcx, 192(%rdi)
+ sbbq 200(%rdx), %r8
+ movq 208(%rsi), %rcx
+ movq %r8, 200(%rdi)
+ sbbq 208(%rdx), %rcx
+ movq 216(%rsi), %r8
+ movq %rcx, 208(%rdi)
+ sbbq 216(%rdx), %r8
+ movq 224(%rsi), %rcx
+ movq %r8, 216(%rdi)
+ sbbq 224(%rdx), %rcx
+ movq 232(%rsi), %r8
+ movq %rcx, 224(%rdi)
+ sbbq 232(%rdx), %r8
+ movq 240(%rsi), %rcx
+ movq %r8, 232(%rdi)
+ sbbq 240(%rdx), %rcx
+ movq 248(%rsi), %r8
+ movq %rcx, 240(%rdi)
+ sbbq 248(%rdx), %r8
+ movq 256(%rsi), %rcx
+ movq %r8, 248(%rdi)
+ sbbq 256(%rdx), %rcx
+ movq 264(%rsi), %r8
+ movq %rcx, 256(%rdi)
+ sbbq 264(%rdx), %r8
+ movq 272(%rsi), %rcx
+ movq %r8, 264(%rdi)
+ sbbq 272(%rdx), %rcx
+ movq 280(%rsi), %r8
+ movq %rcx, 272(%rdi)
+ sbbq 280(%rdx), %r8
+ movq 288(%rsi), %rcx
+ movq %r8, 280(%rdi)
+ sbbq 288(%rdx), %rcx
+ movq 296(%rsi), %r8
+ movq %rcx, 288(%rdi)
+ sbbq 296(%rdx), %r8
+ movq 304(%rsi), %rcx
+ movq %r8, 296(%rdi)
+ sbbq 304(%rdx), %rcx
+ movq 312(%rsi), %r8
+ movq %rcx, 304(%rdi)
+ sbbq 312(%rdx), %r8
+ movq 320(%rsi), %rcx
+ movq %r8, 312(%rdi)
+ sbbq 320(%rdx), %rcx
+ movq 328(%rsi), %r8
+ movq %rcx, 320(%rdi)
+ sbbq 328(%rdx), %r8
+ movq 336(%rsi), %rcx
+ movq %r8, 328(%rdi)
+ sbbq 336(%rdx), %rcx
+ movq 344(%rsi), %r8
+ movq %rcx, 336(%rdi)
+ sbbq 344(%rdx), %r8
+ movq 352(%rsi), %rcx
+ movq %r8, 344(%rdi)
+ sbbq 352(%rdx), %rcx
+ movq 360(%rsi), %r8
+ movq %rcx, 352(%rdi)
+ sbbq 360(%rdx), %r8
+ movq 368(%rsi), %rcx
+ movq %r8, 360(%rdi)
+ sbbq 368(%rdx), %rcx
+ movq 376(%rsi), %r8
+ movq %rcx, 368(%rdi)
+ sbbq 376(%rdx), %r8
+ movq 384(%rsi), %rcx
+ movq %r8, 376(%rdi)
+ sbbq 384(%rdx), %rcx
+ movq 392(%rsi), %r8
+ movq %rcx, 384(%rdi)
+ sbbq 392(%rdx), %r8
+ movq 400(%rsi), %rcx
+ movq %r8, 392(%rdi)
+ sbbq 400(%rdx), %rcx
+ movq 408(%rsi), %r8
+ movq %rcx, 400(%rdi)
+ sbbq 408(%rdx), %r8
+ movq 416(%rsi), %rcx
+ movq %r8, 408(%rdi)
+ sbbq 416(%rdx), %rcx
+ movq 424(%rsi), %r8
+ movq %rcx, 416(%rdi)
+ sbbq 424(%rdx), %r8
+ movq 432(%rsi), %rcx
+ movq %r8, 424(%rdi)
+ sbbq 432(%rdx), %rcx
+ movq 440(%rsi), %r8
+ movq %rcx, 432(%rdi)
+ sbbq 440(%rdx), %r8
+ movq 448(%rsi), %rcx
+ movq %r8, 440(%rdi)
+ sbbq 448(%rdx), %rcx
+ movq 456(%rsi), %r8
+ movq %rcx, 448(%rdi)
+ sbbq 456(%rdx), %r8
+ movq 464(%rsi), %rcx
+ movq %r8, 456(%rdi)
+ sbbq 464(%rdx), %rcx
+ movq 472(%rsi), %r8
+ movq %rcx, 464(%rdi)
+ sbbq 472(%rdx), %r8
+ movq 480(%rsi), %rcx
+ movq %r8, 472(%rdi)
+ sbbq 480(%rdx), %rcx
+ movq 488(%rsi), %r8
+ movq %rcx, 480(%rdi)
+ sbbq 488(%rdx), %r8
+ movq 496(%rsi), %rcx
+ movq %r8, 488(%rdi)
+ sbbq 496(%rdx), %rcx
+ movq 504(%rsi), %r8
+ movq %rcx, 496(%rdi)
+ sbbq 504(%rdx), %r8
+ movq %r8, 504(%rdi)
+ sbbq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_4096_sub_64,.-sp_4096_sub_64
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX2
+/* Reduce the number back to 4096 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef __APPLE__
+.globl sp_4096_mont_reduce_avx2_64
+.type sp_4096_mont_reduce_avx2_64,@function
+.align 16
+sp_4096_mont_reduce_avx2_64:
+#else
+.globl _sp_4096_mont_reduce_avx2_64
+.p2align 4
+_sp_4096_mont_reduce_avx2_64:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ movq %rdx, %r8
+ xorq %r14, %r14
+ # i = 64
+ movq $64, %r9
+ movq (%rdi), %r13
+ addq $256, %rdi
+ xorq %r12, %r12
+L_mont_loop_avx2_64:
+ # mu = a[i] * mp
+ movq %r13, %rdx
+ movq %r13, %r10
+ imulq %r8, %rdx
+ xorq %r12, %r12
+ # a[i+0] += m[0] * mu
+ mulxq (%rsi), %rax, %rcx
+ movq -248(%rdi), %r13
+ adcxq %rax, %r10
+ adoxq %rcx, %r13
+ # a[i+1] += m[1] * mu
+ mulxq 8(%rsi), %rax, %rcx
+ movq -240(%rdi), %r10
+ adcxq %rax, %r13
+ adoxq %rcx, %r10
+ # a[i+2] += m[2] * mu
+ mulxq 16(%rsi), %rax, %rcx
+ movq -232(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -240(%rdi)
+ # a[i+3] += m[3] * mu
+ mulxq 24(%rsi), %rax, %rcx
+ movq -224(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -232(%rdi)
+ # a[i+4] += m[4] * mu
+ mulxq 32(%rsi), %rax, %rcx
+ movq -216(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -224(%rdi)
+ # a[i+5] += m[5] * mu
+ mulxq 40(%rsi), %rax, %rcx
+ movq -208(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -216(%rdi)
+ # a[i+6] += m[6] * mu
+ mulxq 48(%rsi), %rax, %rcx
+ movq -200(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -208(%rdi)
+ # a[i+7] += m[7] * mu
+ mulxq 56(%rsi), %rax, %rcx
+ movq -192(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -200(%rdi)
+ # a[i+8] += m[8] * mu
+ mulxq 64(%rsi), %rax, %rcx
+ movq -184(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -192(%rdi)
+ # a[i+9] += m[9] * mu
+ mulxq 72(%rsi), %rax, %rcx
+ movq -176(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -184(%rdi)
+ # a[i+10] += m[10] * mu
+ mulxq 80(%rsi), %rax, %rcx
+ movq -168(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -176(%rdi)
+ # a[i+11] += m[11] * mu
+ mulxq 88(%rsi), %rax, %rcx
+ movq -160(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -168(%rdi)
+ # a[i+12] += m[12] * mu
+ mulxq 96(%rsi), %rax, %rcx
+ movq -152(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -160(%rdi)
+ # a[i+13] += m[13] * mu
+ mulxq 104(%rsi), %rax, %rcx
+ movq -144(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -152(%rdi)
+ # a[i+14] += m[14] * mu
+ mulxq 112(%rsi), %rax, %rcx
+ movq -136(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -144(%rdi)
+ # a[i+15] += m[15] * mu
+ mulxq 120(%rsi), %rax, %rcx
+ movq -128(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -136(%rdi)
+ # a[i+16] += m[16] * mu
+ mulxq 128(%rsi), %rax, %rcx
+ movq -120(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -128(%rdi)
+ # a[i+17] += m[17] * mu
+ mulxq 136(%rsi), %rax, %rcx
+ movq -112(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -120(%rdi)
+ # a[i+18] += m[18] * mu
+ mulxq 144(%rsi), %rax, %rcx
+ movq -104(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -112(%rdi)
+ # a[i+19] += m[19] * mu
+ mulxq 152(%rsi), %rax, %rcx
+ movq -96(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -104(%rdi)
+ # a[i+20] += m[20] * mu
+ mulxq 160(%rsi), %rax, %rcx
+ movq -88(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -96(%rdi)
+ # a[i+21] += m[21] * mu
+ mulxq 168(%rsi), %rax, %rcx
+ movq -80(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -88(%rdi)
+ # a[i+22] += m[22] * mu
+ mulxq 176(%rsi), %rax, %rcx
+ movq -72(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -80(%rdi)
+ # a[i+23] += m[23] * mu
+ mulxq 184(%rsi), %rax, %rcx
+ movq -64(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -72(%rdi)
+ # a[i+24] += m[24] * mu
+ mulxq 192(%rsi), %rax, %rcx
+ movq -56(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -64(%rdi)
+ # a[i+25] += m[25] * mu
+ mulxq 200(%rsi), %rax, %rcx
+ movq -48(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -56(%rdi)
+ # a[i+26] += m[26] * mu
+ mulxq 208(%rsi), %rax, %rcx
+ movq -40(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -48(%rdi)
+ # a[i+27] += m[27] * mu
+ mulxq 216(%rsi), %rax, %rcx
+ movq -32(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -40(%rdi)
+ # a[i+28] += m[28] * mu
+ mulxq 224(%rsi), %rax, %rcx
+ movq -24(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -32(%rdi)
+ # a[i+29] += m[29] * mu
+ mulxq 232(%rsi), %rax, %rcx
+ movq -16(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -24(%rdi)
+ # a[i+30] += m[30] * mu
+ mulxq 240(%rsi), %rax, %rcx
+ movq -8(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, -16(%rdi)
+ # a[i+31] += m[31] * mu
+ mulxq 248(%rsi), %rax, %rcx
+ movq (%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, -8(%rdi)
+ # a[i+32] += m[32] * mu
+ mulxq 256(%rsi), %rax, %rcx
+ movq 8(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, (%rdi)
+ # a[i+33] += m[33] * mu
+ mulxq 264(%rsi), %rax, %rcx
+ movq 16(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 8(%rdi)
+ # a[i+34] += m[34] * mu
+ mulxq 272(%rsi), %rax, %rcx
+ movq 24(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 16(%rdi)
+ # a[i+35] += m[35] * mu
+ mulxq 280(%rsi), %rax, %rcx
+ movq 32(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 24(%rdi)
+ # a[i+36] += m[36] * mu
+ mulxq 288(%rsi), %rax, %rcx
+ movq 40(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 32(%rdi)
+ # a[i+37] += m[37] * mu
+ mulxq 296(%rsi), %rax, %rcx
+ movq 48(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 40(%rdi)
+ # a[i+38] += m[38] * mu
+ mulxq 304(%rsi), %rax, %rcx
+ movq 56(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 48(%rdi)
+ # a[i+39] += m[39] * mu
+ mulxq 312(%rsi), %rax, %rcx
+ movq 64(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 56(%rdi)
+ # a[i+40] += m[40] * mu
+ mulxq 320(%rsi), %rax, %rcx
+ movq 72(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 64(%rdi)
+ # a[i+41] += m[41] * mu
+ mulxq 328(%rsi), %rax, %rcx
+ movq 80(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 72(%rdi)
+ # a[i+42] += m[42] * mu
+ mulxq 336(%rsi), %rax, %rcx
+ movq 88(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 80(%rdi)
+ # a[i+43] += m[43] * mu
+ mulxq 344(%rsi), %rax, %rcx
+ movq 96(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 88(%rdi)
+ # a[i+44] += m[44] * mu
+ mulxq 352(%rsi), %rax, %rcx
+ movq 104(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 96(%rdi)
+ # a[i+45] += m[45] * mu
+ mulxq 360(%rsi), %rax, %rcx
+ movq 112(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 104(%rdi)
+ # a[i+46] += m[46] * mu
+ mulxq 368(%rsi), %rax, %rcx
+ movq 120(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 112(%rdi)
+ # a[i+47] += m[47] * mu
+ mulxq 376(%rsi), %rax, %rcx
+ movq 128(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 120(%rdi)
+ # a[i+48] += m[48] * mu
+ mulxq 384(%rsi), %rax, %rcx
+ movq 136(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 128(%rdi)
+ # a[i+49] += m[49] * mu
+ mulxq 392(%rsi), %rax, %rcx
+ movq 144(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 136(%rdi)
+ # a[i+50] += m[50] * mu
+ mulxq 400(%rsi), %rax, %rcx
+ movq 152(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 144(%rdi)
+ # a[i+51] += m[51] * mu
+ mulxq 408(%rsi), %rax, %rcx
+ movq 160(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 152(%rdi)
+ # a[i+52] += m[52] * mu
+ mulxq 416(%rsi), %rax, %rcx
+ movq 168(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 160(%rdi)
+ # a[i+53] += m[53] * mu
+ mulxq 424(%rsi), %rax, %rcx
+ movq 176(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 168(%rdi)
+ # a[i+54] += m[54] * mu
+ mulxq 432(%rsi), %rax, %rcx
+ movq 184(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 176(%rdi)
+ # a[i+55] += m[55] * mu
+ mulxq 440(%rsi), %rax, %rcx
+ movq 192(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 184(%rdi)
+ # a[i+56] += m[56] * mu
+ mulxq 448(%rsi), %rax, %rcx
+ movq 200(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 192(%rdi)
+ # a[i+57] += m[57] * mu
+ mulxq 456(%rsi), %rax, %rcx
+ movq 208(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 200(%rdi)
+ # a[i+58] += m[58] * mu
+ mulxq 464(%rsi), %rax, %rcx
+ movq 216(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 208(%rdi)
+ # a[i+59] += m[59] * mu
+ mulxq 472(%rsi), %rax, %rcx
+ movq 224(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 216(%rdi)
+ # a[i+60] += m[60] * mu
+ mulxq 480(%rsi), %rax, %rcx
+ movq 232(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 224(%rdi)
+ # a[i+61] += m[61] * mu
+ mulxq 488(%rsi), %rax, %rcx
+ movq 240(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 232(%rdi)
+ # a[i+62] += m[62] * mu
+ mulxq 496(%rsi), %rax, %rcx
+ movq 248(%rdi), %r11
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ movq %r10, 240(%rdi)
+ # a[i+63] += m[63] * mu
+ mulxq 504(%rsi), %rax, %rcx
+ movq 256(%rdi), %r10
+ adcxq %rax, %r11
+ adoxq %rcx, %r10
+ movq %r11, 248(%rdi)
+ adcxq %r14, %r10
+ movq %r10, 256(%rdi)
+ movq %r12, %r14
+ adoxq %r12, %r14
+ adcxq %r12, %r14
+ # a += 1
+ addq $8, %rdi
+ # i -= 1
+ subq $1, %r9
+ jnz L_mont_loop_avx2_64
+ subq $256, %rdi
+ negq %r14
+ movq %rdi, %r8
+ subq $512, %rdi
+ movq (%rsi), %rcx
+ movq %r13, %rdx
+ pextq %r14, %rcx, %rcx
+ subq %rcx, %rdx
+ movq 8(%rsi), %rcx
+ movq 8(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, (%rdi)
+ sbbq %rcx, %rax
+ movq 16(%rsi), %rdx
+ movq 16(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 8(%rdi)
+ sbbq %rdx, %rcx
+ movq 24(%rsi), %rax
+ movq 24(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 16(%rdi)
+ sbbq %rax, %rdx
+ movq 32(%rsi), %rcx
+ movq 32(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 24(%rdi)
+ sbbq %rcx, %rax
+ movq 40(%rsi), %rdx
+ movq 40(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 32(%rdi)
+ sbbq %rdx, %rcx
+ movq 48(%rsi), %rax
+ movq 48(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 40(%rdi)
+ sbbq %rax, %rdx
+ movq 56(%rsi), %rcx
+ movq 56(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 48(%rdi)
+ sbbq %rcx, %rax
+ movq 64(%rsi), %rdx
+ movq 64(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 56(%rdi)
+ sbbq %rdx, %rcx
+ movq 72(%rsi), %rax
+ movq 72(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 64(%rdi)
+ sbbq %rax, %rdx
+ movq 80(%rsi), %rcx
+ movq 80(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 72(%rdi)
+ sbbq %rcx, %rax
+ movq 88(%rsi), %rdx
+ movq 88(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 80(%rdi)
+ sbbq %rdx, %rcx
+ movq 96(%rsi), %rax
+ movq 96(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 88(%rdi)
+ sbbq %rax, %rdx
+ movq 104(%rsi), %rcx
+ movq 104(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 96(%rdi)
+ sbbq %rcx, %rax
+ movq 112(%rsi), %rdx
+ movq 112(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 104(%rdi)
+ sbbq %rdx, %rcx
+ movq 120(%rsi), %rax
+ movq 120(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 112(%rdi)
+ sbbq %rax, %rdx
+ movq 128(%rsi), %rcx
+ movq 128(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 120(%rdi)
+ sbbq %rcx, %rax
+ movq 136(%rsi), %rdx
+ movq 136(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 128(%rdi)
+ sbbq %rdx, %rcx
+ movq 144(%rsi), %rax
+ movq 144(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 136(%rdi)
+ sbbq %rax, %rdx
+ movq 152(%rsi), %rcx
+ movq 152(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 144(%rdi)
+ sbbq %rcx, %rax
+ movq 160(%rsi), %rdx
+ movq 160(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 152(%rdi)
+ sbbq %rdx, %rcx
+ movq 168(%rsi), %rax
+ movq 168(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 160(%rdi)
+ sbbq %rax, %rdx
+ movq 176(%rsi), %rcx
+ movq 176(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 168(%rdi)
+ sbbq %rcx, %rax
+ movq 184(%rsi), %rdx
+ movq 184(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 176(%rdi)
+ sbbq %rdx, %rcx
+ movq 192(%rsi), %rax
+ movq 192(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 184(%rdi)
+ sbbq %rax, %rdx
+ movq 200(%rsi), %rcx
+ movq 200(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 192(%rdi)
+ sbbq %rcx, %rax
+ movq 208(%rsi), %rdx
+ movq 208(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 200(%rdi)
+ sbbq %rdx, %rcx
+ movq 216(%rsi), %rax
+ movq 216(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 208(%rdi)
+ sbbq %rax, %rdx
+ movq 224(%rsi), %rcx
+ movq 224(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 216(%rdi)
+ sbbq %rcx, %rax
+ movq 232(%rsi), %rdx
+ movq 232(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 224(%rdi)
+ sbbq %rdx, %rcx
+ movq 240(%rsi), %rax
+ movq 240(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 232(%rdi)
+ sbbq %rax, %rdx
+ movq 248(%rsi), %rcx
+ movq 248(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 240(%rdi)
+ sbbq %rcx, %rax
+ movq 256(%rsi), %rdx
+ movq 256(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 248(%rdi)
+ sbbq %rdx, %rcx
+ movq 264(%rsi), %rax
+ movq 264(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 256(%rdi)
+ sbbq %rax, %rdx
+ movq 272(%rsi), %rcx
+ movq 272(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 264(%rdi)
+ sbbq %rcx, %rax
+ movq 280(%rsi), %rdx
+ movq 280(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 272(%rdi)
+ sbbq %rdx, %rcx
+ movq 288(%rsi), %rax
+ movq 288(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 280(%rdi)
+ sbbq %rax, %rdx
+ movq 296(%rsi), %rcx
+ movq 296(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 288(%rdi)
+ sbbq %rcx, %rax
+ movq 304(%rsi), %rdx
+ movq 304(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 296(%rdi)
+ sbbq %rdx, %rcx
+ movq 312(%rsi), %rax
+ movq 312(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 304(%rdi)
+ sbbq %rax, %rdx
+ movq 320(%rsi), %rcx
+ movq 320(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 312(%rdi)
+ sbbq %rcx, %rax
+ movq 328(%rsi), %rdx
+ movq 328(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 320(%rdi)
+ sbbq %rdx, %rcx
+ movq 336(%rsi), %rax
+ movq 336(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 328(%rdi)
+ sbbq %rax, %rdx
+ movq 344(%rsi), %rcx
+ movq 344(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 336(%rdi)
+ sbbq %rcx, %rax
+ movq 352(%rsi), %rdx
+ movq 352(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 344(%rdi)
+ sbbq %rdx, %rcx
+ movq 360(%rsi), %rax
+ movq 360(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 352(%rdi)
+ sbbq %rax, %rdx
+ movq 368(%rsi), %rcx
+ movq 368(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 360(%rdi)
+ sbbq %rcx, %rax
+ movq 376(%rsi), %rdx
+ movq 376(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 368(%rdi)
+ sbbq %rdx, %rcx
+ movq 384(%rsi), %rax
+ movq 384(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 376(%rdi)
+ sbbq %rax, %rdx
+ movq 392(%rsi), %rcx
+ movq 392(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 384(%rdi)
+ sbbq %rcx, %rax
+ movq 400(%rsi), %rdx
+ movq 400(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 392(%rdi)
+ sbbq %rdx, %rcx
+ movq 408(%rsi), %rax
+ movq 408(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 400(%rdi)
+ sbbq %rax, %rdx
+ movq 416(%rsi), %rcx
+ movq 416(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 408(%rdi)
+ sbbq %rcx, %rax
+ movq 424(%rsi), %rdx
+ movq 424(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 416(%rdi)
+ sbbq %rdx, %rcx
+ movq 432(%rsi), %rax
+ movq 432(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 424(%rdi)
+ sbbq %rax, %rdx
+ movq 440(%rsi), %rcx
+ movq 440(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 432(%rdi)
+ sbbq %rcx, %rax
+ movq 448(%rsi), %rdx
+ movq 448(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 440(%rdi)
+ sbbq %rdx, %rcx
+ movq 456(%rsi), %rax
+ movq 456(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 448(%rdi)
+ sbbq %rax, %rdx
+ movq 464(%rsi), %rcx
+ movq 464(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 456(%rdi)
+ sbbq %rcx, %rax
+ movq 472(%rsi), %rdx
+ movq 472(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 464(%rdi)
+ sbbq %rdx, %rcx
+ movq 480(%rsi), %rax
+ movq 480(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 472(%rdi)
+ sbbq %rax, %rdx
+ movq 488(%rsi), %rcx
+ movq 488(%r8), %rax
+ pextq %r14, %rcx, %rcx
+ movq %rdx, 480(%rdi)
+ sbbq %rcx, %rax
+ movq 496(%rsi), %rdx
+ movq 496(%r8), %rcx
+ pextq %r14, %rdx, %rdx
+ movq %rax, 488(%rdi)
+ sbbq %rdx, %rcx
+ movq 504(%rsi), %rax
+ movq 504(%r8), %rdx
+ pextq %r14, %rax, %rax
+ movq %rcx, 496(%rdi)
+ sbbq %rax, %rdx
+ movq %rdx, 504(%rdi)
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_4096_mont_reduce_avx2_64,.-sp_4096_mont_reduce_avx2_64
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_4096_cond_add_32
+.type sp_4096_cond_add_32,@function
+.align 16
+sp_4096_cond_add_32:
+#else
+.globl _sp_4096_cond_add_32
+.p2align 4
+_sp_4096_cond_add_32:
+#endif /* __APPLE__ */
+ subq $256, %rsp
+ movq $0, %rax
+ movq (%rdx), %r8
+ movq 8(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, (%rsp)
+ movq %r9, 8(%rsp)
+ movq 16(%rdx), %r8
+ movq 24(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 16(%rsp)
+ movq %r9, 24(%rsp)
+ movq 32(%rdx), %r8
+ movq 40(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq 48(%rdx), %r8
+ movq 56(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 48(%rsp)
+ movq %r9, 56(%rsp)
+ movq 64(%rdx), %r8
+ movq 72(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 64(%rsp)
+ movq %r9, 72(%rsp)
+ movq 80(%rdx), %r8
+ movq 88(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 80(%rsp)
+ movq %r9, 88(%rsp)
+ movq 96(%rdx), %r8
+ movq 104(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 96(%rsp)
+ movq %r9, 104(%rsp)
+ movq 112(%rdx), %r8
+ movq 120(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 112(%rsp)
+ movq %r9, 120(%rsp)
+ movq 128(%rdx), %r8
+ movq 136(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 128(%rsp)
+ movq %r9, 136(%rsp)
+ movq 144(%rdx), %r8
+ movq 152(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 144(%rsp)
+ movq %r9, 152(%rsp)
+ movq 160(%rdx), %r8
+ movq 168(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 160(%rsp)
+ movq %r9, 168(%rsp)
+ movq 176(%rdx), %r8
+ movq 184(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 176(%rsp)
+ movq %r9, 184(%rsp)
+ movq 192(%rdx), %r8
+ movq 200(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 192(%rsp)
+ movq %r9, 200(%rsp)
+ movq 208(%rdx), %r8
+ movq 216(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 208(%rsp)
+ movq %r9, 216(%rsp)
+ movq 224(%rdx), %r8
+ movq 232(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 224(%rsp)
+ movq %r9, 232(%rsp)
+ movq 240(%rdx), %r8
+ movq 248(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 240(%rsp)
+ movq %r9, 248(%rsp)
+ movq (%rsi), %r8
+ movq (%rsp), %rdx
+ addq %rdx, %r8
+ movq 8(%rsi), %r9
+ movq 8(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, (%rdi)
+ movq 16(%rsi), %r8
+ movq 16(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 8(%rdi)
+ movq 24(%rsi), %r9
+ movq 24(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 16(%rdi)
+ movq 32(%rsi), %r8
+ movq 32(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 24(%rdi)
+ movq 40(%rsi), %r9
+ movq 40(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 32(%rdi)
+ movq 48(%rsi), %r8
+ movq 48(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 40(%rdi)
+ movq 56(%rsi), %r9
+ movq 56(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 48(%rdi)
+ movq 64(%rsi), %r8
+ movq 64(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 56(%rdi)
+ movq 72(%rsi), %r9
+ movq 72(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 64(%rdi)
+ movq 80(%rsi), %r8
+ movq 80(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 72(%rdi)
+ movq 88(%rsi), %r9
+ movq 88(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 80(%rdi)
+ movq 96(%rsi), %r8
+ movq 96(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 88(%rdi)
+ movq 104(%rsi), %r9
+ movq 104(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 96(%rdi)
+ movq 112(%rsi), %r8
+ movq 112(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 104(%rdi)
+ movq 120(%rsi), %r9
+ movq 120(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 112(%rdi)
+ movq 128(%rsi), %r8
+ movq 128(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 120(%rdi)
+ movq 136(%rsi), %r9
+ movq 136(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 128(%rdi)
+ movq 144(%rsi), %r8
+ movq 144(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 136(%rdi)
+ movq 152(%rsi), %r9
+ movq 152(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 144(%rdi)
+ movq 160(%rsi), %r8
+ movq 160(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 152(%rdi)
+ movq 168(%rsi), %r9
+ movq 168(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 160(%rdi)
+ movq 176(%rsi), %r8
+ movq 176(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 168(%rdi)
+ movq 184(%rsi), %r9
+ movq 184(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 176(%rdi)
+ movq 192(%rsi), %r8
+ movq 192(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 184(%rdi)
+ movq 200(%rsi), %r9
+ movq 200(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 192(%rdi)
+ movq 208(%rsi), %r8
+ movq 208(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 200(%rdi)
+ movq 216(%rsi), %r9
+ movq 216(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 208(%rdi)
+ movq 224(%rsi), %r8
+ movq 224(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 216(%rdi)
+ movq 232(%rsi), %r9
+ movq 232(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 224(%rdi)
+ movq 240(%rsi), %r8
+ movq 240(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 232(%rdi)
+ movq 248(%rsi), %r9
+ movq 248(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 240(%rdi)
+ movq %r9, 248(%rdi)
+ adcq $0, %rax
+ addq $256, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_4096_cond_add_32,.-sp_4096_cond_add_32
+#endif /* __APPLE__ */
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_4096_cond_add_avx2_32
+.type sp_4096_cond_add_avx2_32,@function
+.align 16
+sp_4096_cond_add_avx2_32:
+#else
+.globl _sp_4096_cond_add_avx2_32
+.p2align 4
+_sp_4096_cond_add_avx2_32:
+#endif /* __APPLE__ */
+ movq $0, %rax
+ movq (%rdx), %r10
+ movq (%rsi), %r8
+ pextq %rcx, %r10, %r10
+ addq %r10, %r8
+ movq 8(%rdx), %r10
+ movq 8(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, (%rdi)
+ adcq %r10, %r9
+ movq 16(%rdx), %r8
+ movq 16(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 8(%rdi)
+ adcq %r8, %r10
+ movq 24(%rdx), %r9
+ movq 24(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 16(%rdi)
+ adcq %r9, %r8
+ movq 32(%rdx), %r10
+ movq 32(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 24(%rdi)
+ adcq %r10, %r9
+ movq 40(%rdx), %r8
+ movq 40(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 32(%rdi)
+ adcq %r8, %r10
+ movq 48(%rdx), %r9
+ movq 48(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 40(%rdi)
+ adcq %r9, %r8
+ movq 56(%rdx), %r10
+ movq 56(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 48(%rdi)
+ adcq %r10, %r9
+ movq 64(%rdx), %r8
+ movq 64(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 56(%rdi)
+ adcq %r8, %r10
+ movq 72(%rdx), %r9
+ movq 72(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 64(%rdi)
+ adcq %r9, %r8
+ movq 80(%rdx), %r10
+ movq 80(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 72(%rdi)
+ adcq %r10, %r9
+ movq 88(%rdx), %r8
+ movq 88(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 80(%rdi)
+ adcq %r8, %r10
+ movq 96(%rdx), %r9
+ movq 96(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 88(%rdi)
+ adcq %r9, %r8
+ movq 104(%rdx), %r10
+ movq 104(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 96(%rdi)
+ adcq %r10, %r9
+ movq 112(%rdx), %r8
+ movq 112(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 104(%rdi)
+ adcq %r8, %r10
+ movq 120(%rdx), %r9
+ movq 120(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 112(%rdi)
+ adcq %r9, %r8
+ movq 128(%rdx), %r10
+ movq 128(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 120(%rdi)
+ adcq %r10, %r9
+ movq 136(%rdx), %r8
+ movq 136(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 128(%rdi)
+ adcq %r8, %r10
+ movq 144(%rdx), %r9
+ movq 144(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 136(%rdi)
+ adcq %r9, %r8
+ movq 152(%rdx), %r10
+ movq 152(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 144(%rdi)
+ adcq %r10, %r9
+ movq 160(%rdx), %r8
+ movq 160(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 152(%rdi)
+ adcq %r8, %r10
+ movq 168(%rdx), %r9
+ movq 168(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 160(%rdi)
+ adcq %r9, %r8
+ movq 176(%rdx), %r10
+ movq 176(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 168(%rdi)
+ adcq %r10, %r9
+ movq 184(%rdx), %r8
+ movq 184(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 176(%rdi)
+ adcq %r8, %r10
+ movq 192(%rdx), %r9
+ movq 192(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 184(%rdi)
+ adcq %r9, %r8
+ movq 200(%rdx), %r10
+ movq 200(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 192(%rdi)
+ adcq %r10, %r9
+ movq 208(%rdx), %r8
+ movq 208(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 200(%rdi)
+ adcq %r8, %r10
+ movq 216(%rdx), %r9
+ movq 216(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 208(%rdi)
+ adcq %r9, %r8
+ movq 224(%rdx), %r10
+ movq 224(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 216(%rdi)
+ adcq %r10, %r9
+ movq 232(%rdx), %r8
+ movq 232(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 224(%rdi)
+ adcq %r8, %r10
+ movq 240(%rdx), %r9
+ movq 240(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 232(%rdi)
+ adcq %r9, %r8
+ movq 248(%rdx), %r10
+ movq 248(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 240(%rdi)
+ adcq %r10, %r9
+ movq %r9, 248(%rdi)
+ adcq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_4096_cond_add_avx2_32,.-sp_4096_cond_add_avx2_32
+#endif /* __APPLE__ */
+/* Shift number left by n bit. (r = a << n)
+ *
+ * r Result of left shift by n.
+ * a Number to shift.
+ * n Amoutnt o shift.
+ */
+#ifndef __APPLE__
+.globl sp_4096_lshift_64
+.type sp_4096_lshift_64,@function
+.align 16
+sp_4096_lshift_64:
+#else
+.globl _sp_4096_lshift_64
+.p2align 4
+_sp_4096_lshift_64:
+#endif /* __APPLE__ */
+ movq %rdx, %rcx
+ movq $0, %r10
+ movq 472(%rsi), %r11
+ movq 480(%rsi), %rdx
+ movq 488(%rsi), %rax
+ movq 496(%rsi), %r8
+ movq 504(%rsi), %r9
+ shldq %cl, %r9, %r10
+ shldq %cl, %r8, %r9
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r11, %rdx
+ movq %rdx, 480(%rdi)
+ movq %rax, 488(%rdi)
+ movq %r8, 496(%rdi)
+ movq %r9, 504(%rdi)
+ movq %r10, 512(%rdi)
+ movq 440(%rsi), %r9
+ movq 448(%rsi), %rdx
+ movq 456(%rsi), %rax
+ movq 464(%rsi), %r8
+ shldq %cl, %r8, %r11
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r9, %rdx
+ movq %rdx, 448(%rdi)
+ movq %rax, 456(%rdi)
+ movq %r8, 464(%rdi)
+ movq %r11, 472(%rdi)
+ movq 408(%rsi), %r11
+ movq 416(%rsi), %rdx
+ movq 424(%rsi), %rax
+ movq 432(%rsi), %r8
+ shldq %cl, %r8, %r9
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r11, %rdx
+ movq %rdx, 416(%rdi)
+ movq %rax, 424(%rdi)
+ movq %r8, 432(%rdi)
+ movq %r9, 440(%rdi)
+ movq 376(%rsi), %r9
+ movq 384(%rsi), %rdx
+ movq 392(%rsi), %rax
+ movq 400(%rsi), %r8
+ shldq %cl, %r8, %r11
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r9, %rdx
+ movq %rdx, 384(%rdi)
+ movq %rax, 392(%rdi)
+ movq %r8, 400(%rdi)
+ movq %r11, 408(%rdi)
+ movq 344(%rsi), %r11
+ movq 352(%rsi), %rdx
+ movq 360(%rsi), %rax
+ movq 368(%rsi), %r8
+ shldq %cl, %r8, %r9
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r11, %rdx
+ movq %rdx, 352(%rdi)
+ movq %rax, 360(%rdi)
+ movq %r8, 368(%rdi)
+ movq %r9, 376(%rdi)
+ movq 312(%rsi), %r9
+ movq 320(%rsi), %rdx
+ movq 328(%rsi), %rax
+ movq 336(%rsi), %r8
+ shldq %cl, %r8, %r11
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r9, %rdx
+ movq %rdx, 320(%rdi)
+ movq %rax, 328(%rdi)
+ movq %r8, 336(%rdi)
+ movq %r11, 344(%rdi)
+ movq 280(%rsi), %r11
+ movq 288(%rsi), %rdx
+ movq 296(%rsi), %rax
+ movq 304(%rsi), %r8
+ shldq %cl, %r8, %r9
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r11, %rdx
+ movq %rdx, 288(%rdi)
+ movq %rax, 296(%rdi)
+ movq %r8, 304(%rdi)
+ movq %r9, 312(%rdi)
+ movq 248(%rsi), %r9
+ movq 256(%rsi), %rdx
+ movq 264(%rsi), %rax
+ movq 272(%rsi), %r8
+ shldq %cl, %r8, %r11
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r9, %rdx
+ movq %rdx, 256(%rdi)
+ movq %rax, 264(%rdi)
+ movq %r8, 272(%rdi)
+ movq %r11, 280(%rdi)
+ movq 216(%rsi), %r11
+ movq 224(%rsi), %rdx
+ movq 232(%rsi), %rax
+ movq 240(%rsi), %r8
+ shldq %cl, %r8, %r9
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r11, %rdx
+ movq %rdx, 224(%rdi)
+ movq %rax, 232(%rdi)
+ movq %r8, 240(%rdi)
+ movq %r9, 248(%rdi)
+ movq 184(%rsi), %r9
+ movq 192(%rsi), %rdx
+ movq 200(%rsi), %rax
+ movq 208(%rsi), %r8
+ shldq %cl, %r8, %r11
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r9, %rdx
+ movq %rdx, 192(%rdi)
+ movq %rax, 200(%rdi)
+ movq %r8, 208(%rdi)
+ movq %r11, 216(%rdi)
+ movq 152(%rsi), %r11
+ movq 160(%rsi), %rdx
+ movq 168(%rsi), %rax
+ movq 176(%rsi), %r8
+ shldq %cl, %r8, %r9
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r11, %rdx
+ movq %rdx, 160(%rdi)
+ movq %rax, 168(%rdi)
+ movq %r8, 176(%rdi)
+ movq %r9, 184(%rdi)
+ movq 120(%rsi), %r9
+ movq 128(%rsi), %rdx
+ movq 136(%rsi), %rax
+ movq 144(%rsi), %r8
+ shldq %cl, %r8, %r11
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r9, %rdx
+ movq %rdx, 128(%rdi)
+ movq %rax, 136(%rdi)
+ movq %r8, 144(%rdi)
+ movq %r11, 152(%rdi)
+ movq 88(%rsi), %r11
+ movq 96(%rsi), %rdx
+ movq 104(%rsi), %rax
+ movq 112(%rsi), %r8
+ shldq %cl, %r8, %r9
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r11, %rdx
+ movq %rdx, 96(%rdi)
+ movq %rax, 104(%rdi)
+ movq %r8, 112(%rdi)
+ movq %r9, 120(%rdi)
+ movq 56(%rsi), %r9
+ movq 64(%rsi), %rdx
+ movq 72(%rsi), %rax
+ movq 80(%rsi), %r8
+ shldq %cl, %r8, %r11
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r9, %rdx
+ movq %rdx, 64(%rdi)
+ movq %rax, 72(%rdi)
+ movq %r8, 80(%rdi)
+ movq %r11, 88(%rdi)
+ movq 24(%rsi), %r11
+ movq 32(%rsi), %rdx
+ movq 40(%rsi), %rax
+ movq 48(%rsi), %r8
+ shldq %cl, %r8, %r9
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shldq %cl, %r11, %rdx
+ movq %rdx, 32(%rdi)
+ movq %rax, 40(%rdi)
+ movq %r8, 48(%rdi)
+ movq %r9, 56(%rdi)
+ movq (%rsi), %rdx
+ movq 8(%rsi), %rax
+ movq 16(%rsi), %r8
+ shldq %cl, %r8, %r11
+ shldq %cl, %rax, %r8
+ shldq %cl, %rdx, %rax
+ shlq %cl, %rdx
+ movq %rdx, (%rdi)
+ movq %rax, 8(%rdi)
+ movq %r8, 16(%rdi)
+ movq %r11, 24(%rdi)
+ repz retq
+#endif /* WOLFSSL_SP_4096 */
+#endif /* WOLFSSL_SP_4096 */
+#ifndef WOLFSSL_SP_NO_256
+/* Conditionally copy a into r using the mask m.
+ * m is -1 to copy and 0 when not.
+ *
+ * r A single precision number to copy over.
+ * a A single precision number to copy.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_256_cond_copy_4
+.type sp_256_cond_copy_4,@function
+.align 16
+sp_256_cond_copy_4:
+#else
+.globl _sp_256_cond_copy_4
+.p2align 4
+_sp_256_cond_copy_4:
+#endif /* __APPLE__ */
+ movq (%rdi), %rax
+ movq 8(%rdi), %rcx
+ movq 16(%rdi), %r8
+ movq 24(%rdi), %r9
+ xorq (%rsi), %rax
+ xorq 8(%rsi), %rcx
+ xorq 16(%rsi), %r8
+ xorq 24(%rsi), %r9
+ andq %rdx, %rax
+ andq %rdx, %rcx
+ andq %rdx, %r8
+ andq %rdx, %r9
+ xorq %rax, (%rdi)
+ xorq %rcx, 8(%rdi)
+ xorq %r8, 16(%rdi)
+ xorq %r9, 24(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_256_cond_copy_4,.-sp_256_cond_copy_4
+#endif /* __APPLE__ */
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+#ifndef __APPLE__
+.globl sp_256_mont_mul_4
+.type sp_256_mont_mul_4,@function
+.align 16
+sp_256_mont_mul_4:
+#else
+.globl _sp_256_mont_mul_4
+.p2align 4
+_sp_256_mont_mul_4:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ push %rbx
+ movq %rdx, %r8
+ # A[0] * B[0]
+ movq (%r8), %rax
+ mulq (%rsi)
+ movq %rax, %r9
+ movq %rdx, %r10
+ # A[0] * B[1]
+ movq 8(%r8), %rax
+ mulq (%rsi)
+ xorq %r11, %r11
+ addq %rax, %r10
+ adcq %rdx, %r11
+ # A[1] * B[0]
+ movq (%r8), %rax
+ mulq 8(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[0] * B[2]
+ movq 16(%r8), %rax
+ mulq (%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ # A[1] * B[1]
+ movq 8(%r8), %rax
+ mulq 8(%rsi)
+ xorq %r13, %r13
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0, %r13
+ # A[2] * B[0]
+ movq (%r8), %rax
+ mulq 16(%rsi)
+ addq %rax, %r11
+ adcq %rdx, %r12
+ adcq $0, %r13
+ # A[0] * B[3]
+ movq 24(%r8), %rax
+ mulq (%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0, %r14
+ # A[1] * B[2]
+ movq 16(%r8), %rax
+ mulq 8(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0, %r14
+ # A[2] * B[1]
+ movq 8(%r8), %rax
+ mulq 16(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0, %r14
+ # A[3] * B[0]
+ movq (%r8), %rax
+ mulq 24(%rsi)
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0, %r14
+ # A[1] * B[3]
+ movq 24(%r8), %rax
+ mulq 8(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0, %r15
+ # A[2] * B[2]
+ movq 16(%r8), %rax
+ mulq 16(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0, %r15
+ # A[3] * B[1]
+ movq 8(%r8), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ adcq $0, %r15
+ # A[2] * B[3]
+ movq 24(%r8), %rax
+ mulq 16(%rsi)
+ xorq %rbx, %rbx
+ addq %rax, %r14
+ adcq %rdx, %r15
+ adcq $0, %rbx
+ # A[3] * B[2]
+ movq 16(%r8), %rax
+ mulq 24(%rsi)
+ addq %rax, %r14
+ adcq %rdx, %r15
+ adcq $0, %rbx
+ # A[3] * B[3]
+ movq 24(%r8), %rax
+ mulq 24(%rsi)
+ addq %rax, %r15
+ adcq %rdx, %rbx
+ # Start Reduction
+ # mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
+ # - a[0] << 32 << 192
+ # + (a[0] * 2) << 192
+ movq %r9, %rax
+ movq %r12, %rdx
+ addq %r9, %rdx
+ movq %r10, %rsi
+ addq %r9, %rdx
+ movq %r11, %r8
+ # a[0]-a[2] << 32
+ shlq $32, %r9
+ shldq $32, %rsi, %r11
+ shldq $32, %rax, %r10
+ # - a[0] << 32 << 192
+ subq %r9, %rdx
+ # + a[0]-a[2] << 32 << 64
+ addq %r9, %rsi
+ adcq %r10, %r8
+ adcq %r11, %rdx
+ # a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
+ # a += mu << 256
+ xorq %r9, %r9
+ addq %rax, %r13
+ adcq %rsi, %r14
+ adcq %r8, %r15
+ adcq %rdx, %rbx
+ sbbq $0, %r9
+ # a += mu << 192
+ addq %rax, %r12
+ adcq %rsi, %r13
+ adcq %r8, %r14
+ adcq %rdx, %r15
+ adcq $0, %rbx
+ sbbq $0, %r9
+ # mu <<= 32
+ movq %rdx, %rcx
+ shldq $32, %r8, %rdx
+ shldq $32, %rsi, %r8
+ shldq $32, %rax, %rsi
+ shrq $32, %rcx
+ shlq $32, %rax
+ # a += (mu << 32) << 64
+ addq %r8, %r12
+ adcq %rdx, %r13
+ adcq %rcx, %r14
+ adcq $0, %r15
+ adcq $0, %rbx
+ sbbq $0, %r9
+ # a -= (mu << 32) << 192
+ subq %rax, %r12
+ sbbq %rsi, %r13
+ sbbq %r8, %r14
+ sbbq %rdx, %r15
+ sbbq %rcx, %rbx
+ adcq $0, %r9
+ movq $4294967295, %rax
+ movq $18446744069414584321, %rsi
+ # mask m and sub from result if overflow
+ # m[0] = -1 & mask = mask
+ andq %r9, %rax
+ # m[2] = 0 & mask = 0
+ andq %r9, %rsi
+ subq %r9, %r13
+ sbbq %rax, %r14
+ sbbq $0, %r15
+ sbbq %rsi, %rbx
+ movq %r13, (%rdi)
+ movq %r14, 8(%rdi)
+ movq %r15, 16(%rdi)
+ movq %rbx, 24(%rdi)
+ pop %rbx
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_256_mont_mul_4,.-sp_256_mont_mul_4
+#endif /* __APPLE__ */
+/* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+#ifndef __APPLE__
+.globl sp_256_mont_sqr_4
+.type sp_256_mont_sqr_4,@function
+.align 16
+sp_256_mont_sqr_4:
+#else
+.globl _sp_256_mont_sqr_4
+.p2align 4
+_sp_256_mont_sqr_4:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ push %rbx
+ # A[0] * A[1]
+ movq (%rsi), %rax
+ mulq 8(%rsi)
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[0] * A[2]
+ movq (%rsi), %rax
+ mulq 16(%rsi)
+ xorq %r12, %r12
+ addq %rax, %r11
+ adcq %rdx, %r12
+ # A[0] * A[3]
+ movq (%rsi), %rax
+ mulq 24(%rsi)
+ xorq %r13, %r13
+ addq %rax, %r12
+ adcq %rdx, %r13
+ # A[1] * A[2]
+ movq 8(%rsi), %rax
+ mulq 16(%rsi)
+ xorq %r14, %r14
+ addq %rax, %r12
+ adcq %rdx, %r13
+ adcq $0, %r14
+ # A[1] * A[3]
+ movq 8(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r14
+ # A[2] * A[3]
+ movq 16(%rsi), %rax
+ mulq 24(%rsi)
+ xorq %r15, %r15
+ addq %rax, %r14
+ adcq %rdx, %r15
+ # Double
+ xorq %rbx, %rbx
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ adcq %r13, %r13
+ adcq %r14, %r14
+ adcq %r15, %r15
+ adcq $0, %rbx
+ # A[0] * A[0]
+ movq (%rsi), %rax
+ mulq %rax
+ movq %rax, %rax
+ movq %rdx, %rdx
+ movq %rax, %r9
+ movq %rdx, %r8
+ # A[1] * A[1]
+ movq 8(%rsi), %rax
+ mulq %rax
+ movq %rax, %rax
+ movq %rdx, %rdx
+ addq %r8, %r10
+ adcq %rax, %r11
+ adcq $0, %rdx
+ movq %rdx, %r8
+ # A[2] * A[2]
+ movq 16(%rsi), %rax
+ mulq %rax
+ movq %rax, %rax
+ movq %rdx, %rdx
+ addq %r8, %r12
+ adcq %rax, %r13
+ adcq $0, %rdx
+ movq %rdx, %r8
+ # A[3] * A[3]
+ movq 24(%rsi), %rax
+ mulq %rax
+ addq %rax, %r15
+ adcq %rdx, %rbx
+ addq %r8, %r14
+ adcq $0, %r15
+ adcq $0, %rbx
+ # Start Reduction
+ # mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
+ # - a[0] << 32 << 192
+ # + (a[0] * 2) << 192
+ movq %r9, %rax
+ movq %r12, %rdx
+ addq %r9, %rdx
+ movq %r10, %rsi
+ addq %r9, %rdx
+ movq %r11, %r8
+ # a[0]-a[2] << 32
+ shlq $32, %r9
+ shldq $32, %rsi, %r11
+ shldq $32, %rax, %r10
+ # - a[0] << 32 << 192
+ subq %r9, %rdx
+ # + a[0]-a[2] << 32 << 64
+ addq %r9, %rsi
+ adcq %r10, %r8
+ adcq %r11, %rdx
+ # a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
+ # a += mu << 256
+ xorq %r9, %r9
+ addq %rax, %r13
+ adcq %rsi, %r14
+ adcq %r8, %r15
+ adcq %rdx, %rbx
+ sbbq $0, %r9
+ # a += mu << 192
+ addq %rax, %r12
+ adcq %rsi, %r13
+ adcq %r8, %r14
+ adcq %rdx, %r15
+ adcq $0, %rbx
+ sbbq $0, %r9
+ # mu <<= 32
+ movq %rdx, %rcx
+ shldq $32, %r8, %rdx
+ shldq $32, %rsi, %r8
+ shldq $32, %rax, %rsi
+ shrq $32, %rcx
+ shlq $32, %rax
+ # a += (mu << 32) << 64
+ addq %r8, %r12
+ adcq %rdx, %r13
+ adcq %rcx, %r14
+ adcq $0, %r15
+ adcq $0, %rbx
+ sbbq $0, %r9
+ # a -= (mu << 32) << 192
+ subq %rax, %r12
+ sbbq %rsi, %r13
+ sbbq %r8, %r14
+ sbbq %rdx, %r15
+ sbbq %rcx, %rbx
+ adcq $0, %r9
+ movq $4294967295, %rax
+ movq $18446744069414584321, %rsi
+ # mask m and sub from result if overflow
+ # m[0] = -1 & mask = mask
+ andq %r9, %rax
+ # m[2] = 0 & mask = 0
+ andq %r9, %rsi
+ subq %r9, %r13
+ sbbq %rax, %r14
+ sbbq $0, %r15
+ sbbq %rsi, %rbx
+ movq %r13, (%rdi)
+ movq %r14, 8(%rdi)
+ movq %r15, 16(%rdi)
+ movq %rbx, 24(%rdi)
+ pop %rbx
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_256_mont_sqr_4,.-sp_256_mont_sqr_4
+#endif /* __APPLE__ */
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+#ifndef __APPLE__
+.globl sp_256_cmp_4
+.type sp_256_cmp_4,@function
+.align 16
+sp_256_cmp_4:
+#else
+.globl _sp_256_cmp_4
+.p2align 4
+_sp_256_cmp_4:
+#endif /* __APPLE__ */
+ xorq %rcx, %rcx
+ movq $-1, %rdx
+ movq $-1, %rax
+ movq $1, %r8
+ movq 24(%rdi), %r9
+ movq 24(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 16(%rdi), %r9
+ movq 16(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 8(%rdi), %r9
+ movq 8(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq (%rdi), %r9
+ movq (%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ xorq %rdx, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_256_cmp_4,.-sp_256_cmp_4
+#endif /* __APPLE__ */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_256_cond_sub_4
+.type sp_256_cond_sub_4,@function
+.align 16
+sp_256_cond_sub_4:
+#else
+.globl _sp_256_cond_sub_4
+.p2align 4
+_sp_256_cond_sub_4:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ movq $0, %rax
+ movq (%rdx), %r12
+ movq 8(%rdx), %r13
+ movq 16(%rdx), %r14
+ movq 24(%rdx), %r15
+ andq %rcx, %r12
+ andq %rcx, %r13
+ andq %rcx, %r14
+ andq %rcx, %r15
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq %r12, %r8
+ sbbq %r13, %r9
+ sbbq %r14, %r10
+ sbbq %r15, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ sbbq $0, %rax
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_256_cond_sub_4,.-sp_256_cond_sub_4
+#endif /* __APPLE__ */
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_256_sub_4
+.type sp_256_sub_4,@function
+.align 16
+sp_256_sub_4:
+#else
+.globl _sp_256_sub_4
+.p2align 4
+_sp_256_sub_4:
+#endif /* __APPLE__ */
+ xorq %rax, %rax
+ movq (%rsi), %rcx
+ movq 8(%rsi), %r8
+ movq 16(%rsi), %r9
+ movq 24(%rsi), %r10
+ subq (%rdx), %rcx
+ sbbq 8(%rdx), %r8
+ sbbq 16(%rdx), %r9
+ sbbq 24(%rdx), %r10
+ movq %rcx, (%rdi)
+ movq %r8, 8(%rdi)
+ movq %r9, 16(%rdi)
+ movq %r10, 24(%rdi)
+ sbbq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_256_sub_4,.-sp_256_sub_4
+#endif /* __APPLE__ */
+/* Reduce the number back to 256 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef __APPLE__
+.globl sp_256_mont_reduce_4
+.type sp_256_mont_reduce_4,@function
+.align 16
+sp_256_mont_reduce_4:
+#else
+.globl _sp_256_mont_reduce_4
+.p2align 4
+_sp_256_mont_reduce_4:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ movq %rdx, %rcx
+ # i = 0
+ xorq %r14, %r14
+ movq $4, %r8
+ movq %rdi, %r13
+L_mont_loop_4:
+ # mu = a[i] * mp
+ movq (%r13), %r12
+ imulq %rcx, %r12
+ # a[i+0] += m[0] * mu
+ movq (%rsi), %rax
+ movq 8(%rsi), %r10
+ mulq %r12
+ movq (%r13), %r15
+ addq %rax, %r15
+ movq %rdx, %r9
+ movq %r15, (%r13)
+ adcq $0, %r9
+ # a[i+1] += m[1] * mu
+ movq %r10, %rax
+ mulq %r12
+ movq 16(%rsi), %r10
+ movq 8(%r13), %r15
+ addq %r9, %rax
+ movq %rdx, %r11
+ adcq $0, %r11
+ addq %rax, %r15
+ movq %r15, 8(%r13)
+ adcq $0, %r11
+ # a[i+2] += m[2] * mu
+ movq %r10, %rax
+ mulq %r12
+ movq 24(%rsi), %r10
+ movq 16(%r13), %r15
+ addq %r11, %rax
+ movq %rdx, %r9
+ adcq $0, %r9
+ addq %rax, %r15
+ movq %r15, 16(%r13)
+ adcq $0, %r9
+ # a[i+3] += m[3] * mu
+ movq %r10, %rax
+ mulq %r12
+ movq 24(%r13), %r15
+ addq %r9, %rax
+ adcq %r14, %rdx
+ movq $0, %r14
+ adcq $0, %r14
+ addq %rax, %r15
+ movq %r15, 24(%r13)
+ adcq %rdx, 32(%r13)
+ adcq $0, %r14
+ # i += 1
+ addq $8, %r13
+ decq %r8
+ jnz L_mont_loop_4
+ xorq %rax, %rax
+ movq 32(%rdi), %rdx
+ movq 40(%rdi), %r8
+ movq 48(%rdi), %r15
+ movq 56(%rdi), %r9
+ subq %r14, %rax
+ movq (%rsi), %r10
+ movq 8(%rsi), %r11
+ movq 16(%rsi), %r12
+ movq 24(%rsi), %r13
+ andq %rax, %r10
+ andq %rax, %r11
+ andq %rax, %r12
+ andq %rax, %r13
+ subq %r10, %rdx
+ sbbq %r11, %r8
+ sbbq %r12, %r15
+ sbbq %r13, %r9
+ movq %rdx, (%rdi)
+ movq %r8, 8(%rdi)
+ movq %r15, 16(%rdi)
+ movq %r9, 24(%rdi)
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_256_mont_reduce_4,.-sp_256_mont_reduce_4
+#endif /* __APPLE__ */
+/* Add two Montgomery form numbers (r = a + b % m).
+ *
+ * r Result of addition.
+ * a First number to add in Montogmery form.
+ * b Second number to add in Montogmery form.
+ * m Modulus (prime).
+ */
+#ifndef __APPLE__
+.globl sp_256_mont_add_4
+.type sp_256_mont_add_4,@function
+.align 16
+sp_256_mont_add_4:
+#else
+.globl _sp_256_mont_add_4
+.p2align 4
+_sp_256_mont_add_4:
+#endif /* __APPLE__ */
+ movq (%rsi), %rax
+ movq 8(%rsi), %rcx
+ movq 16(%rsi), %r8
+ movq 24(%rsi), %r9
+ movq $4294967295, %r10
+ movq $18446744069414584321, %r11
+ addq (%rdx), %rax
+ adcq 8(%rdx), %rcx
+ adcq 16(%rdx), %r8
+ movq $0, %rsi
+ adcq 24(%rdx), %r9
+ sbbq $0, %rsi
+ andq %rsi, %r10
+ andq %rsi, %r11
+ subq %rsi, %rax
+ sbbq %r10, %rcx
+ movq %rax, (%rdi)
+ sbbq $0, %r8
+ movq %rcx, 8(%rdi)
+ sbbq %r11, %r9
+ movq %r8, 16(%rdi)
+ movq %r9, 24(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_256_mont_add_4,.-sp_256_mont_add_4
+#endif /* __APPLE__ */
+/* Double a Montgomery form number (r = a + a % m).
+ *
+ * r Result of doubling.
+ * a Number to double in Montogmery form.
+ * m Modulus (prime).
+ */
+#ifndef __APPLE__
+.globl sp_256_mont_dbl_4
+.type sp_256_mont_dbl_4,@function
+.align 16
+sp_256_mont_dbl_4:
+#else
+.globl _sp_256_mont_dbl_4
+.p2align 4
+_sp_256_mont_dbl_4:
+#endif /* __APPLE__ */
+ movq (%rsi), %rdx
+ movq 8(%rsi), %rax
+ movq 16(%rsi), %rcx
+ movq 24(%rsi), %r8
+ movq $4294967295, %r9
+ movq $18446744069414584321, %r10
+ addq %rdx, %rdx
+ adcq %rax, %rax
+ adcq %rcx, %rcx
+ movq $0, %r11
+ adcq %r8, %r8
+ sbbq $0, %r11
+ andq %r11, %r9
+ andq %r11, %r10
+ subq %r11, %rdx
+ sbbq %r9, %rax
+ movq %rdx, (%rdi)
+ sbbq $0, %rcx
+ movq %rax, 8(%rdi)
+ sbbq %r10, %r8
+ movq %rcx, 16(%rdi)
+ movq %r8, 24(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_256_mont_dbl_4,.-sp_256_mont_dbl_4
+#endif /* __APPLE__ */
+/* Triple a Montgomery form number (r = a + a + a % m).
+ *
+ * r Result of Tripling.
+ * a Number to triple in Montogmery form.
+ * m Modulus (prime).
+ */
+#ifndef __APPLE__
+.globl sp_256_mont_tpl_4
+.type sp_256_mont_tpl_4,@function
+.align 16
+sp_256_mont_tpl_4:
+#else
+.globl _sp_256_mont_tpl_4
+.p2align 4
+_sp_256_mont_tpl_4:
+#endif /* __APPLE__ */
+ movq (%rsi), %rdx
+ movq 8(%rsi), %rax
+ movq 16(%rsi), %rcx
+ movq 24(%rsi), %r8
+ movq $4294967295, %r9
+ movq $18446744069414584321, %r10
+ addq %rdx, %rdx
+ adcq %rax, %rax
+ adcq %rcx, %rcx
+ movq $0, %r11
+ adcq %r8, %r8
+ sbbq $0, %r11
+ andq %r11, %r9
+ andq %r11, %r10
+ subq %r11, %rdx
+ sbbq %r9, %rax
+ sbbq $0, %rcx
+ sbbq %r10, %r8
+ movq $4294967295, %r9
+ movq $18446744069414584321, %r10
+ addq (%rsi), %rdx
+ adcq 8(%rsi), %rax
+ adcq 16(%rsi), %rcx
+ movq $0, %r11
+ adcq 24(%rsi), %r8
+ sbbq $0, %r11
+ andq %r11, %r9
+ andq %r11, %r10
+ subq %r11, %rdx
+ sbbq %r9, %rax
+ movq %rdx, (%rdi)
+ sbbq $0, %rcx
+ movq %rax, 8(%rdi)
+ sbbq %r10, %r8
+ movq %rcx, 16(%rdi)
+ movq %r8, 24(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_256_mont_tpl_4,.-sp_256_mont_tpl_4
+#endif /* __APPLE__ */
+/* Subtract two Montgomery form numbers (r = a - b % m).
+ *
+ * r Result of subtration.
+ * a Number to subtract from in Montogmery form.
+ * b Number to subtract with in Montogmery form.
+ * m Modulus (prime).
+ */
+#ifndef __APPLE__
+.globl sp_256_mont_sub_4
+.type sp_256_mont_sub_4,@function
+.align 16
+sp_256_mont_sub_4:
+#else
+.globl _sp_256_mont_sub_4
+.p2align 4
+_sp_256_mont_sub_4:
+#endif /* __APPLE__ */
+ movq (%rsi), %rax
+ movq 8(%rsi), %rcx
+ movq 16(%rsi), %r8
+ movq 24(%rsi), %r9
+ movq $4294967295, %r10
+ movq $18446744069414584321, %r11
+ subq (%rdx), %rax
+ sbbq 8(%rdx), %rcx
+ sbbq 16(%rdx), %r8
+ movq $0, %rsi
+ sbbq 24(%rdx), %r9
+ sbbq $0, %rsi
+ andq %rsi, %r10
+ andq %rsi, %r11
+ addq %rsi, %rax
+ adcq %r10, %rcx
+ movq %rax, (%rdi)
+ adcq $0, %r8
+ movq %rcx, 8(%rdi)
+ adcq %r11, %r9
+ movq %r8, 16(%rdi)
+ movq %r9, 24(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_256_mont_sub_4,.-sp_256_mont_sub_4
+#endif /* __APPLE__ */
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r Result of division by 2.
+ * a Number to divide.
+ * m Modulus (prime).
+ */
+#ifndef __APPLE__
+.globl sp_256_div2_4
+.type sp_256_div2_4,@function
+.align 16
+sp_256_div2_4:
+#else
+.globl _sp_256_div2_4
+.p2align 4
+_sp_256_div2_4:
+#endif /* __APPLE__ */
+ movq (%rsi), %rdx
+ movq 8(%rsi), %rax
+ movq 16(%rsi), %rcx
+ movq 24(%rsi), %r8
+ movq $4294967295, %r9
+ movq $18446744069414584321, %r10
+ movq %rdx, %r11
+ andq $1, %r11
+ negq %r11
+ andq %r11, %r9
+ andq %r11, %r10
+ addq %r11, %rdx
+ adcq %r9, %rax
+ adcq $0, %rcx
+ adcq %r10, %r8
+ movq $0, %r11
+ adcq $0, %r11
+ shrdq $1, %rax, %rdx
+ shrdq $1, %rcx, %rax
+ shrdq $1, %r8, %rcx
+ shrdq $1, %r11, %r8
+ movq %rdx, (%rdi)
+ movq %rax, 8(%rdi)
+ movq %rcx, 16(%rdi)
+ movq %r8, 24(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_256_div2_4,.-sp_256_div2_4
+#endif /* __APPLE__ */
+/* Multiply two Montogmery form numbers mod the modulus (prime).
+ * (r = a * b mod m)
+ *
+ * r Result of multiplication.
+ * a First number to multiply in Montogmery form.
+ * b Second number to multiply in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+#ifndef __APPLE__
+.globl sp_256_mont_mul_avx2_4
+.type sp_256_mont_mul_avx2_4,@function
+.align 16
+sp_256_mont_mul_avx2_4:
+#else
+.globl _sp_256_mont_mul_avx2_4
+.p2align 4
+_sp_256_mont_mul_avx2_4:
+#endif /* __APPLE__ */
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ movq %rdx, %rbp
+ # A[0] * B[0]
+ movq (%rbp), %rdx
+ mulxq (%rsi), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rsi), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rsi), %rax, %rcx
+ xorq %r15, %r15
+ adcxq %rax, %r9
+ # A[1] * B[3]
+ movq 24(%rbp), %rdx
+ mulxq 8(%rsi), %r12, %r13
+ adcxq %rcx, %r10
+ # A[0] * B[1]
+ movq 8(%rbp), %rdx
+ mulxq (%rsi), %rax, %rcx
+ adoxq %rax, %r9
+ # A[2] * B[1]
+ mulxq 16(%rsi), %rax, %r14
+ adoxq %rcx, %r10
+ adcxq %rax, %r11
+ # A[1] * B[2]
+ movq 16(%rbp), %rdx
+ mulxq 8(%rsi), %rax, %rcx
+ adcxq %r14, %r12
+ adoxq %rax, %r11
+ adcxq %r15, %r13
+ adoxq %rcx, %r12
+ # A[0] * B[2]
+ mulxq (%rsi), %rax, %rcx
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rax, %r10
+ # A[1] * B[1]
+ movq 8(%rbp), %rdx
+ mulxq 8(%rsi), %rdx, %rax
+ adcxq %rcx, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rbp), %rdx
+ adoxq %rax, %r11
+ mulxq 24(%rsi), %rax, %rcx
+ adcxq %rax, %r12
+ # A[2] * B[2]
+ movq 16(%rbp), %rdx
+ mulxq 16(%rsi), %rdx, %rax
+ adcxq %rcx, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rbp), %rdx
+ adoxq %rax, %r13
+ mulxq 24(%rsi), %rax, %rcx
+ adoxq %r15, %r14
+ adcxq %rax, %r14
+ # A[0] * B[3]
+ mulxq (%rsi), %rdx, %rax
+ adcxq %rcx, %r15
+ xorq %rcx, %rcx
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq 24(%rsi), %rdx
+ adcxq %rax, %r12
+ mulxq (%rbp), %rbx, %rax
+ adoxq %rbx, %r11
+ adoxq %rax, %r12
+ # A[3] * B[2]
+ mulxq 16(%rbp), %rdx, %rax
+ adcxq %rdx, %r13
+ # A[2] * B[3]
+ movq 24(%rbp), %rdx
+ adcxq %rax, %r14
+ mulxq 16(%rsi), %rax, %rdx
+ adcxq %rcx, %r15
+ adoxq %rax, %r13
+ adoxq %rdx, %r14
+ adoxq %rcx, %r15
+ # Start Reduction
+ # mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
+ # - a[0] << 32 << 192
+ # + (a[0] * 2) << 192
+ movq %r8, %rax
+ movq %r11, %rdx
+ addq %r8, %rdx
+ movq %r9, %rsi
+ addq %r8, %rdx
+ movq %r10, %rbp
+ # a[0]-a[2] << 32
+ shlq $32, %r8
+ shldq $32, %rsi, %r10
+ shldq $32, %rax, %r9
+ # - a[0] << 32 << 192
+ subq %r8, %rdx
+ # + a[0]-a[2] << 32 << 64
+ addq %r8, %rsi
+ adcq %r9, %rbp
+ adcq %r10, %rdx
+ # a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
+ # a += mu << 256
+ xorq %r8, %r8
+ addq %rax, %r12
+ adcq %rsi, %r13
+ adcq %rbp, %r14
+ adcq %rdx, %r15
+ sbbq $0, %r8
+ # a += mu << 192
+ addq %rax, %r11
+ adcq %rsi, %r12
+ adcq %rbp, %r13
+ adcq %rdx, %r14
+ adcq $0, %r15
+ sbbq $0, %r8
+ # mu <<= 32
+ movq %rdx, %rcx
+ shldq $32, %rbp, %rdx
+ shldq $32, %rsi, %rbp
+ shldq $32, %rax, %rsi
+ shrq $32, %rcx
+ shlq $32, %rax
+ # a += (mu << 32) << 64
+ addq %rbp, %r11
+ adcq %rdx, %r12
+ adcq %rcx, %r13
+ adcq $0, %r14
+ adcq $0, %r15
+ sbbq $0, %r8
+ # a -= (mu << 32) << 192
+ subq %rax, %r11
+ sbbq %rsi, %r12
+ sbbq %rbp, %r13
+ sbbq %rdx, %r14
+ sbbq %rcx, %r15
+ adcq $0, %r8
+ movq $4294967295, %rax
+ movq $18446744069414584321, %rsi
+ # mask m and sub from result if overflow
+ # m[0] = -1 & mask = mask
+ andq %r8, %rax
+ # m[2] = 0 & mask = 0
+ andq %r8, %rsi
+ subq %r8, %r12
+ sbbq %rax, %r13
+ sbbq $0, %r14
+ sbbq %rsi, %r15
+ movq %r12, (%rdi)
+ movq %r13, 8(%rdi)
+ movq %r14, 16(%rdi)
+ movq %r15, 24(%rdi)
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ repz retq
+#ifndef __APPLE__
+.size sp_256_mont_mul_avx2_4,.-sp_256_mont_mul_avx2_4
+#endif /* __APPLE__ */
+/* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ * m Modulus (prime).
+ * mp Montogmery mulitplier.
+ */
+#ifndef __APPLE__
+.globl sp_256_mont_sqr_avx2_4
+.type sp_256_mont_sqr_avx2_4,@function
+.align 16
+sp_256_mont_sqr_avx2_4:
+#else
+.globl _sp_256_mont_sqr_avx2_4
+.p2align 4
+_sp_256_mont_sqr_avx2_4:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ push %rbx
+ # A[0] * A[1]
+ movq (%rsi), %rdx
+ movq 16(%rsi), %r15
+ mulxq 8(%rsi), %r9, %r10
+ # A[0] * A[3]
+ mulxq 24(%rsi), %r11, %r12
+ # A[2] * A[1]
+ movq %r15, %rdx
+ mulxq 8(%rsi), %rcx, %rbx
+ # A[2] * A[3]
+ mulxq 24(%rsi), %r13, %r14
+ xorq %r15, %r15
+ adoxq %rcx, %r11
+ adoxq %rbx, %r12
+ # A[2] * A[0]
+ mulxq (%rsi), %rcx, %rbx
+ # A[1] * A[3]
+ movq 8(%rsi), %rdx
+ adoxq %r15, %r13
+ mulxq 24(%rsi), %rax, %r8
+ adcxq %rcx, %r10
+ adoxq %r15, %r14
+ adcxq %rbx, %r11
+ adcxq %rax, %r12
+ adcxq %r8, %r13
+ adcxq %r15, %r14
+ # Double with Carry Flag
+ xorq %r15, %r15
+ # A[0] * A[0]
+ movq (%rsi), %rdx
+ mulxq %rdx, %r8, %rax
+ adcxq %r9, %r9
+ adcxq %r10, %r10
+ adoxq %rax, %r9
+ # A[1] * A[1]
+ movq 8(%rsi), %rdx
+ mulxq %rdx, %rcx, %rbx
+ adcxq %r11, %r11
+ adoxq %rcx, %r10
+ # A[2] * A[2]
+ movq 16(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adcxq %r12, %r12
+ adoxq %rbx, %r11
+ adcxq %r13, %r13
+ adoxq %rax, %r12
+ adcxq %r14, %r14
+ # A[3] * A[3]
+ movq 24(%rsi), %rdx
+ mulxq %rdx, %rax, %rbx
+ adoxq %rcx, %r13
+ adcxq %r15, %r15
+ adoxq %rax, %r14
+ adoxq %rbx, %r15
+ # Start Reduction
+ # mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
+ # - a[0] << 32 << 192
+ # + (a[0] * 2) << 192
+ movq %r8, %rax
+ movq %r11, %rdx
+ addq %r8, %rdx
+ movq %r9, %rsi
+ addq %r8, %rdx
+ movq %r10, %rcx
+ # a[0]-a[2] << 32
+ shlq $32, %r8
+ shldq $32, %rsi, %r10
+ shldq $32, %rax, %r9
+ # - a[0] << 32 << 192
+ subq %r8, %rdx
+ # + a[0]-a[2] << 32 << 64
+ addq %r8, %rsi
+ adcq %r9, %rcx
+ adcq %r10, %rdx
+ # a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
+ # a += mu << 256
+ xorq %r8, %r8
+ addq %rax, %r12
+ adcq %rsi, %r13
+ adcq %rcx, %r14
+ adcq %rdx, %r15
+ sbbq $0, %r8
+ # a += mu << 192
+ addq %rax, %r11
+ adcq %rsi, %r12
+ adcq %rcx, %r13
+ adcq %rdx, %r14
+ adcq $0, %r15
+ sbbq $0, %r8
+ # mu <<= 32
+ movq %rdx, %rbx
+ shldq $32, %rcx, %rdx
+ shldq $32, %rsi, %rcx
+ shldq $32, %rax, %rsi
+ shrq $32, %rbx
+ shlq $32, %rax
+ # a += (mu << 32) << 64
+ addq %rcx, %r11
+ adcq %rdx, %r12
+ adcq %rbx, %r13
+ adcq $0, %r14
+ adcq $0, %r15
+ sbbq $0, %r8
+ # a -= (mu << 32) << 192
+ subq %rax, %r11
+ sbbq %rsi, %r12
+ sbbq %rcx, %r13
+ sbbq %rdx, %r14
+ sbbq %rbx, %r15
+ adcq $0, %r8
+ movq $4294967295, %rax
+ movq $18446744069414584321, %rsi
+ # mask m and sub from result if overflow
+ # m[0] = -1 & mask = mask
+ andq %r8, %rax
+ # m[2] = 0 & mask = 0
+ andq %r8, %rsi
+ subq %r8, %r12
+ sbbq %rax, %r13
+ sbbq $0, %r14
+ sbbq %rsi, %r15
+ movq %r12, (%rdi)
+ movq %r13, 8(%rdi)
+ movq %r14, 16(%rdi)
+ movq %r15, 24(%rdi)
+ pop %rbx
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_256_mont_sqr_avx2_4,.-sp_256_mont_sqr_avx2_4
+#endif /* __APPLE__ */
+/* Add 1 to a. (a = a + 1)
+ *
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_256_add_one_4
+.type sp_256_add_one_4,@function
+.align 16
+sp_256_add_one_4:
+#else
+.globl _sp_256_add_one_4
+.p2align 4
+_sp_256_add_one_4:
+#endif /* __APPLE__ */
+ addq $1, (%rdi)
+ adcq $0, 8(%rdi)
+ adcq $0, 16(%rdi)
+ adcq $0, 24(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_256_add_one_4,.-sp_256_add_one_4
+#endif /* __APPLE__ */
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+#ifndef __APPLE__
+.globl sp_256_from_bin
+.type sp_256_from_bin,@function
+.align 16
+sp_256_from_bin:
+#else
+.globl _sp_256_from_bin
+.p2align 4
+_sp_256_from_bin:
+#endif /* __APPLE__ */
+ movq %rdx, %r9
+ movq %rdi, %r10
+ addq %rcx, %r9
+ addq $32, %r10
+ xorq %r11, %r11
+ jmp L_256_from_bin_64_end
+L_256_from_bin_64_start:
+ subq $64, %r9
+ movbeq 56(%r9), %rax
+ movbeq 48(%r9), %r8
+ movq %rax, (%rdi)
+ movq %r8, 8(%rdi)
+ movbeq 40(%r9), %rax
+ movbeq 32(%r9), %r8
+ movq %rax, 16(%rdi)
+ movq %r8, 24(%rdi)
+ movbeq 24(%r9), %rax
+ movbeq 16(%r9), %r8
+ movq %rax, 32(%rdi)
+ movq %r8, 40(%rdi)
+ movbeq 8(%r9), %rax
+ movbeq (%r9), %r8
+ movq %rax, 48(%rdi)
+ movq %r8, 56(%rdi)
+ addq $64, %rdi
+ subq $64, %rcx
+L_256_from_bin_64_end:
+ cmpq $63, %rcx
+ jg L_256_from_bin_64_start
+ jmp L_256_from_bin_8_end
+L_256_from_bin_8_start:
+ subq $8, %r9
+ movbeq (%r9), %rax
+ movq %rax, (%rdi)
+ addq $8, %rdi
+ subq $8, %rcx
+L_256_from_bin_8_end:
+ cmpq $7, %rcx
+ jg L_256_from_bin_8_start
+ cmpq %r11, %rcx
+ je L_256_from_bin_hi_end
+ movq %r11, %r8
+ movq %r11, %rax
+L_256_from_bin_hi_start:
+ movb (%rdx), %al
+ shlq $8, %r8
+ incq %rdx
+ addq %rax, %r8
+ decq %rcx
+ jg L_256_from_bin_hi_start
+ movq %r8, (%rdi)
+ addq $8, %rdi
+L_256_from_bin_hi_end:
+ cmpq %r10, %rdi
+ je L_256_from_bin_zero_end
+L_256_from_bin_zero_start:
+ movq %r11, (%rdi)
+ addq $8, %rdi
+ cmpq %r10, %rdi
+ jl L_256_from_bin_zero_start
+L_256_from_bin_zero_end:
+ repz retq
+#ifndef __APPLE__
+.size sp_256_from_bin,.-sp_256_from_bin
+#endif /* __APPLE__ */
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 32
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+#ifndef __APPLE__
+.globl sp_256_to_bin
+.type sp_256_to_bin,@function
+.align 16
+sp_256_to_bin:
+#else
+.globl _sp_256_to_bin
+.p2align 4
+_sp_256_to_bin:
+#endif /* __APPLE__ */
+ movbeq 24(%rdi), %rdx
+ movbeq 16(%rdi), %rax
+ movq %rdx, (%rsi)
+ movq %rax, 8(%rsi)
+ movbeq 8(%rdi), %rdx
+ movbeq (%rdi), %rax
+ movq %rdx, 16(%rsi)
+ movq %rax, 24(%rsi)
+ repz retq
+#ifndef __APPLE__
+.size sp_256_to_bin,.-sp_256_to_bin
+#endif /* __APPLE__ */
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_256_add_4
+.type sp_256_add_4,@function
+.align 16
+sp_256_add_4:
+#else
+.globl _sp_256_add_4
+.p2align 4
+_sp_256_add_4:
+#endif /* __APPLE__ */
+ # Add
+ movq (%rsi), %rcx
+ xorq %rax, %rax
+ addq (%rdx), %rcx
+ movq 8(%rsi), %r8
+ movq %rcx, (%rdi)
+ adcq 8(%rdx), %r8
+ movq 16(%rsi), %rcx
+ movq %r8, 8(%rdi)
+ adcq 16(%rdx), %rcx
+ movq 24(%rsi), %r8
+ movq %rcx, 16(%rdi)
+ adcq 24(%rdx), %r8
+ movq %r8, 24(%rdi)
+ adcq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_256_add_4,.-sp_256_add_4
+#endif /* __APPLE__ */
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_256_mul_4
+.type sp_256_mul_4,@function
+.align 16
+sp_256_mul_4:
+#else
+.globl _sp_256_mul_4
+.p2align 4
+_sp_256_mul_4:
+#endif /* __APPLE__ */
+ movq %rdx, %rcx
+ subq $32, %rsp
+ # A[0] * B[0]
+ movq (%rcx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ movq %rax, (%rsp)
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rcx), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[1] * B[0]
+ movq (%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 8(%rsp)
+ # A[0] * B[2]
+ movq 16(%rcx), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[1] * B[1]
+ movq 8(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[2] * B[0]
+ movq (%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 16(%rsp)
+ # A[0] * B[3]
+ movq 24(%rcx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[1] * B[2]
+ movq 16(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[2] * B[1]
+ movq 8(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[3] * B[0]
+ movq (%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ movq %r8, 24(%rsp)
+ # A[1] * B[3]
+ movq 24(%rcx), %rax
+ mulq 8(%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[2] * B[2]
+ movq 16(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[3] * B[1]
+ movq 8(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 32(%rdi)
+ # A[2] * B[3]
+ movq 24(%rcx), %rax
+ mulq 16(%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[3] * B[2]
+ movq 16(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 40(%rdi)
+ # A[3] * B[3]
+ movq 24(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ movq %r8, 48(%rdi)
+ movq %r9, 56(%rdi)
+ movq (%rsp), %rax
+ movq 8(%rsp), %rdx
+ movq 16(%rsp), %r8
+ movq 24(%rsp), %r9
+ movq %rax, (%rdi)
+ movq %rdx, 8(%rdi)
+ movq %r8, 16(%rdi)
+ movq %r9, 24(%rdi)
+ addq $32, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_256_mul_4,.-sp_256_mul_4
+#endif /* __APPLE__ */
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r Result of multiplication.
+ * a First number to multiply.
+ * b Second number to multiply.
+ */
+#ifndef __APPLE__
+.globl sp_256_mul_avx2_4
+.type sp_256_mul_avx2_4,@function
+.align 16
+sp_256_mul_avx2_4:
+#else
+.globl _sp_256_mul_avx2_4
+.p2align 4
+_sp_256_mul_avx2_4:
+#endif /* __APPLE__ */
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ movq %rdx, %rbp
+ # A[0] * B[0]
+ movq (%rbp), %rdx
+ mulxq (%rsi), %r8, %r9
+ # A[2] * B[0]
+ mulxq 16(%rsi), %r10, %r11
+ # A[1] * B[0]
+ mulxq 8(%rsi), %rax, %rcx
+ xorq %r15, %r15
+ adcxq %rax, %r9
+ # A[1] * B[3]
+ movq 24(%rbp), %rdx
+ mulxq 8(%rsi), %r12, %r13
+ adcxq %rcx, %r10
+ # A[0] * B[1]
+ movq 8(%rbp), %rdx
+ mulxq (%rsi), %rax, %rcx
+ adoxq %rax, %r9
+ # A[2] * B[1]
+ mulxq 16(%rsi), %rax, %r14
+ adoxq %rcx, %r10
+ adcxq %rax, %r11
+ # A[1] * B[2]
+ movq 16(%rbp), %rdx
+ mulxq 8(%rsi), %rax, %rcx
+ adcxq %r14, %r12
+ adoxq %rax, %r11
+ adcxq %r15, %r13
+ adoxq %rcx, %r12
+ # A[0] * B[2]
+ mulxq (%rsi), %rax, %rcx
+ adoxq %r15, %r13
+ xorq %r14, %r14
+ adcxq %rax, %r10
+ # A[1] * B[1]
+ movq 8(%rbp), %rdx
+ mulxq 8(%rsi), %rdx, %rax
+ adcxq %rcx, %r11
+ adoxq %rdx, %r10
+ # A[3] * B[1]
+ movq 8(%rbp), %rdx
+ adoxq %rax, %r11
+ mulxq 24(%rsi), %rax, %rcx
+ adcxq %rax, %r12
+ # A[2] * B[2]
+ movq 16(%rbp), %rdx
+ mulxq 16(%rsi), %rdx, %rax
+ adcxq %rcx, %r13
+ adoxq %rdx, %r12
+ # A[3] * B[3]
+ movq 24(%rbp), %rdx
+ adoxq %rax, %r13
+ mulxq 24(%rsi), %rax, %rcx
+ adoxq %r15, %r14
+ adcxq %rax, %r14
+ # A[0] * B[3]
+ mulxq (%rsi), %rdx, %rax
+ adcxq %rcx, %r15
+ xorq %rcx, %rcx
+ adcxq %rdx, %r11
+ # A[3] * B[0]
+ movq 24(%rsi), %rdx
+ adcxq %rax, %r12
+ mulxq (%rbp), %rbx, %rax
+ adoxq %rbx, %r11
+ adoxq %rax, %r12
+ # A[3] * B[2]
+ mulxq 16(%rbp), %rdx, %rax
+ adcxq %rdx, %r13
+ # A[2] * B[3]
+ movq 24(%rbp), %rdx
+ adcxq %rax, %r14
+ mulxq 16(%rsi), %rax, %rdx
+ adcxq %rcx, %r15
+ adoxq %rax, %r13
+ adoxq %rdx, %r14
+ adoxq %rcx, %r15
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq %r12, 32(%rdi)
+ movq %r13, 40(%rdi)
+ movq %r14, 48(%rdi)
+ movq %r15, 56(%rdi)
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ repz retq
+#ifndef __APPLE__
+.size sp_256_mul_avx2_4,.-sp_256_mul_avx2_4
+#endif /* __APPLE__ */
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_256_sub_in_place_4
+.type sp_256_sub_in_place_4,@function
+.align 16
+sp_256_sub_in_place_4:
+#else
+.globl _sp_256_sub_in_place_4
+.p2align 4
+_sp_256_sub_in_place_4:
+#endif /* __APPLE__ */
+ xorq %rax, %rax
+ movq (%rsi), %rdx
+ movq 8(%rsi), %rcx
+ movq 16(%rsi), %r8
+ movq 24(%rsi), %r9
+ subq %rdx, (%rdi)
+ sbbq %rcx, 8(%rdi)
+ sbbq %r8, 16(%rdi)
+ sbbq %r9, 24(%rdi)
+ sbbq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_256_sub_in_place_4,.-sp_256_sub_in_place_4
+#endif /* __APPLE__ */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_256_cond_sub_avx2_4
+.type sp_256_cond_sub_avx2_4,@function
+.align 16
+sp_256_cond_sub_avx2_4:
+#else
+.globl _sp_256_cond_sub_avx2_4
+.p2align 4
+_sp_256_cond_sub_avx2_4:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ movq $0, %rax
+ movq (%rdx), %r12
+ movq 8(%rdx), %r13
+ movq 16(%rdx), %r14
+ movq 24(%rdx), %r15
+ andq %rcx, %r12
+ andq %rcx, %r13
+ andq %rcx, %r14
+ andq %rcx, %r15
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq %r12, %r8
+ sbbq %r13, %r9
+ sbbq %r14, %r10
+ sbbq %r15, %r11
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ sbbq $0, %rax
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_256_cond_sub_avx2_4,.-sp_256_cond_sub_avx2_4
+#endif /* __APPLE__ */
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+#ifndef __APPLE__
+.globl sp_256_mul_d_4
+.type sp_256_mul_d_4,@function
+.align 16
+sp_256_mul_d_4:
+#else
+.globl _sp_256_mul_d_4
+.p2align 4
+_sp_256_mul_d_4:
+#endif /* __APPLE__ */
+ movq %rdx, %rcx
+ # A[0] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ movq %r8, (%rdi)
+ # A[1] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 8(%rsi)
+ addq %rax, %r9
+ movq %r9, 8(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[2] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 16(%rsi)
+ addq %rax, %r10
+ movq %r10, 16(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[3] * B
+ movq %rcx, %rax
+ mulq 24(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ movq %r8, 24(%rdi)
+ movq %r9, 32(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_256_mul_d_4,.-sp_256_mul_d_4
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX2
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+#ifndef __APPLE__
+.globl sp_256_mul_d_avx2_4
+.type sp_256_mul_d_avx2_4,@function
+.align 16
+sp_256_mul_d_avx2_4:
+#else
+.globl _sp_256_mul_d_avx2_4
+.p2align 4
+_sp_256_mul_d_avx2_4:
+#endif /* __APPLE__ */
+ movq %rdx, %rax
+ # A[0] * B
+ movq %rax, %rdx
+ xorq %r11, %r11
+ mulxq (%rsi), %r9, %r10
+ movq %r9, (%rdi)
+ # A[1] * B
+ mulxq 8(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 8(%rdi)
+ adoxq %r8, %r9
+ # A[2] * B
+ mulxq 16(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 16(%rdi)
+ adoxq %r8, %r10
+ # A[3] * B
+ mulxq 24(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ adoxq %r8, %r9
+ adcxq %r11, %r9
+ movq %r10, 24(%rdi)
+ movq %r9, 32(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_256_mul_d_avx2_4,.-sp_256_mul_d_avx2_4
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_256_sqr_4
+.type sp_256_sqr_4,@function
+.align 16
+sp_256_sqr_4:
+#else
+.globl _sp_256_sqr_4
+.p2align 4
+_sp_256_sqr_4:
+#endif /* __APPLE__ */
+ push %r12
+ subq $32, %rsp
+ # A[0] * A[0]
+ movq (%rsi), %rax
+ mulq %rax
+ xorq %r9, %r9
+ movq %rax, (%rsp)
+ movq %rdx, %r8
+ # A[0] * A[1]
+ movq 8(%rsi), %rax
+ mulq (%rsi)
+ xorq %rcx, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ movq %r8, 8(%rsp)
+ # A[0] * A[2]
+ movq 16(%rsi), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ # A[1] * A[1]
+ movq 8(%rsi), %rax
+ mulq %rax
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ movq %r9, 16(%rsp)
+ # A[0] * A[3]
+ movq 24(%rsi), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[1] * A[2]
+ movq 16(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %rcx, 24(%rsp)
+ # A[1] * A[3]
+ movq 24(%rsi), %rax
+ mulq 8(%rsi)
+ xorq %rcx, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ # A[2] * A[2]
+ movq 16(%rsi), %rax
+ mulq %rax
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ movq %r8, 32(%rdi)
+ # A[2] * A[3]
+ movq 24(%rsi), %rax
+ mulq 16(%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ movq %r9, 40(%rdi)
+ # A[3] * A[3]
+ movq 24(%rsi), %rax
+ mulq %rax
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ movq %rcx, 48(%rdi)
+ movq %r8, 56(%rdi)
+ movq (%rsp), %rax
+ movq 8(%rsp), %rdx
+ movq 16(%rsp), %r10
+ movq 24(%rsp), %r11
+ movq %rax, (%rdi)
+ movq %rdx, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ addq $32, %rsp
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_256_sqr_4,.-sp_256_sqr_4
+#endif /* __APPLE__ */
+/* Square a and put result in r. (r = a * a)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ */
+#ifndef __APPLE__
+.globl sp_256_sqr_avx2_4
+.type sp_256_sqr_avx2_4,@function
+.align 16
+sp_256_sqr_avx2_4:
+#else
+.globl _sp_256_sqr_avx2_4
+.p2align 4
+_sp_256_sqr_avx2_4:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ push %rbx
+ # A[0] * A[1]
+ movq (%rsi), %rdx
+ movq 16(%rsi), %r15
+ mulxq 8(%rsi), %r9, %r10
+ # A[0] * A[3]
+ mulxq 24(%rsi), %r11, %r12
+ # A[2] * A[1]
+ movq %r15, %rdx
+ mulxq 8(%rsi), %rcx, %rbx
+ # A[2] * A[3]
+ mulxq 24(%rsi), %r13, %r14
+ xorq %r15, %r15
+ adoxq %rcx, %r11
+ adoxq %rbx, %r12
+ # A[2] * A[0]
+ mulxq (%rsi), %rcx, %rbx
+ # A[1] * A[3]
+ movq 8(%rsi), %rdx
+ adoxq %r15, %r13
+ mulxq 24(%rsi), %rax, %r8
+ adcxq %rcx, %r10
+ adoxq %r15, %r14
+ adcxq %rbx, %r11
+ adcxq %rax, %r12
+ adcxq %r8, %r13
+ adcxq %r15, %r14
+ # Double with Carry Flag
+ xorq %r15, %r15
+ # A[0] * A[0]
+ movq (%rsi), %rdx
+ mulxq %rdx, %r8, %rax
+ adcxq %r9, %r9
+ adcxq %r10, %r10
+ adoxq %rax, %r9
+ # A[1] * A[1]
+ movq 8(%rsi), %rdx
+ mulxq %rdx, %rcx, %rbx
+ adcxq %r11, %r11
+ adoxq %rcx, %r10
+ # A[2] * A[2]
+ movq 16(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adcxq %r12, %r12
+ adoxq %rbx, %r11
+ adcxq %r13, %r13
+ adoxq %rax, %r12
+ adcxq %r14, %r14
+ # A[3] * A[3]
+ movq 24(%rsi), %rdx
+ mulxq %rdx, %rax, %rbx
+ adoxq %rcx, %r13
+ adcxq %r15, %r15
+ adoxq %rax, %r14
+ adoxq %rbx, %r15
+ movq %r8, (%rdi)
+ movq %r9, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq %r12, 32(%rdi)
+ movq %r13, 40(%rdi)
+ movq %r14, 48(%rdi)
+ movq %r15, 56(%rdi)
+ pop %rbx
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_256_sqr_avx2_4,.-sp_256_sqr_avx2_4
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX2
+/* Reduce the number back to 256 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef __APPLE__
+.globl sp_256_mont_reduce_avx2_4
+.type sp_256_mont_reduce_avx2_4,@function
+.align 16
+sp_256_mont_reduce_avx2_4:
+#else
+.globl _sp_256_mont_reduce_avx2_4
+.p2align 4
+_sp_256_mont_reduce_avx2_4:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ push %rbx
+ movq %rdx, %rax
+ movq (%rdi), %r12
+ movq 8(%rdi), %r13
+ movq 16(%rdi), %r14
+ movq 24(%rdi), %r15
+ xorq %r11, %r11
+ xorq %r10, %r10
+ # a[0-4] += m[0-3] * mu = m[0-3] * (a[0] * mp)
+ movq 32(%rdi), %rbx
+ # mu = a[0] * mp
+ movq %r12, %rdx
+ mulxq %rax, %rdx, %rcx
+ # a[0] += m[0] * mu
+ mulx (%rsi), %r8, %r9
+ adcxq %r8, %r12
+ # a[1] += m[1] * mu
+ mulx 8(%rsi), %r8, %rcx
+ adoxq %r9, %r13
+ adcxq %r8, %r13
+ # a[2] += m[2] * mu
+ mulx 16(%rsi), %r8, %r9
+ adoxq %rcx, %r14
+ adcxq %r8, %r14
+ # a[3] += m[3] * mu
+ mulx 24(%rsi), %r8, %rcx
+ adoxq %r9, %r15
+ adcxq %r8, %r15
+ # a[4] += carry
+ adoxq %rcx, %rbx
+ adcxq %r10, %rbx
+ # carry
+ adoxq %r10, %r11
+ adcxq %r10, %r11
+ # a[1-5] += m[0-3] * mu = m[0-3] * (a[1] * mp)
+ movq 40(%rdi), %r12
+ # mu = a[1] * mp
+ movq %r13, %rdx
+ mulxq %rax, %rdx, %rcx
+ # a[1] += m[0] * mu
+ mulx (%rsi), %r8, %r9
+ adcxq %r8, %r13
+ # a[2] += m[1] * mu
+ mulx 8(%rsi), %r8, %rcx
+ adoxq %r9, %r14
+ adcxq %r8, %r14
+ # a[3] += m[2] * mu
+ mulx 16(%rsi), %r8, %r9
+ adoxq %rcx, %r15
+ adcxq %r8, %r15
+ # a[4] += m[3] * mu
+ mulx 24(%rsi), %r8, %rcx
+ adoxq %r9, %rbx
+ adcxq %r8, %rbx
+ # a[5] += carry
+ adoxq %rcx, %r12
+ adcxq %r11, %r12
+ movq %r10, %r11
+ # carry
+ adoxq %r10, %r11
+ adcxq %r10, %r11
+ # a[2-6] += m[0-3] * mu = m[0-3] * (a[2] * mp)
+ movq 48(%rdi), %r13
+ # mu = a[2] * mp
+ movq %r14, %rdx
+ mulxq %rax, %rdx, %rcx
+ # a[2] += m[0] * mu
+ mulx (%rsi), %r8, %r9
+ adcxq %r8, %r14
+ # a[3] += m[1] * mu
+ mulx 8(%rsi), %r8, %rcx
+ adoxq %r9, %r15
+ adcxq %r8, %r15
+ # a[4] += m[2] * mu
+ mulx 16(%rsi), %r8, %r9
+ adoxq %rcx, %rbx
+ adcxq %r8, %rbx
+ # a[5] += m[3] * mu
+ mulx 24(%rsi), %r8, %rcx
+ adoxq %r9, %r12
+ adcxq %r8, %r12
+ # a[6] += carry
+ adoxq %rcx, %r13
+ adcxq %r11, %r13
+ movq %r10, %r11
+ # carry
+ adoxq %r10, %r11
+ adcxq %r10, %r11
+ # a[3-7] += m[0-3] * mu = m[0-3] * (a[3] * mp)
+ movq 56(%rdi), %r14
+ # mu = a[3] * mp
+ movq %r15, %rdx
+ mulxq %rax, %rdx, %rcx
+ # a[3] += m[0] * mu
+ mulx (%rsi), %r8, %r9
+ adcxq %r8, %r15
+ # a[4] += m[1] * mu
+ mulx 8(%rsi), %r8, %rcx
+ adoxq %r9, %rbx
+ adcxq %r8, %rbx
+ # a[5] += m[2] * mu
+ mulx 16(%rsi), %r8, %r9
+ adoxq %rcx, %r12
+ adcxq %r8, %r12
+ # a[6] += m[3] * mu
+ mulx 24(%rsi), %r8, %rcx
+ adoxq %r9, %r13
+ adcxq %r8, %r13
+ # a[7] += carry
+ adoxq %rcx, %r14
+ adcxq %r11, %r14
+ movq %r10, %r11
+ # carry
+ adoxq %r10, %r11
+ adcxq %r10, %r11
+ # Subtract mod if carry
+ negq %r11
+ movq $17562291160714782033, %r8
+ movq $13611842547513532036, %r9
+ movq $18446744069414584320, %rdx
+ andq %r11, %r8
+ andq %r11, %r9
+ andq %r11, %rdx
+ subq %r8, %rbx
+ sbbq %r9, %r12
+ sbbq %r11, %r13
+ sbbq %rdx, %r14
+ movq %rbx, (%rdi)
+ movq %r12, 8(%rdi)
+ movq %r13, 16(%rdi)
+ movq %r14, 24(%rdi)
+ pop %rbx
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_256_mont_reduce_avx2_4,.-sp_256_mont_reduce_avx2_4
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
+#endif /* !WOLFSSL_SP_NO_256 */
+#ifdef WOLFSSL_SP_384
+/* Conditionally copy a into r using the mask m.
+ * m is -1 to copy and 0 when not.
+ *
+ * r A single precision number to copy over.
+ * a A single precision number to copy.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_384_cond_copy_6
+.type sp_384_cond_copy_6,@function
+.align 16
+sp_384_cond_copy_6:
+#else
+.globl _sp_384_cond_copy_6
+.p2align 4
+_sp_384_cond_copy_6:
+#endif /* __APPLE__ */
+ movq (%rdi), %rax
+ movq 8(%rdi), %rcx
+ movq 16(%rdi), %r8
+ movq 24(%rdi), %r9
+ movq 32(%rdi), %r10
+ movq 40(%rdi), %r11
+ xorq (%rsi), %rax
+ xorq 8(%rsi), %rcx
+ xorq 16(%rsi), %r8
+ xorq 24(%rsi), %r9
+ xorq 32(%rsi), %r10
+ xorq 40(%rsi), %r11
+ andq %rdx, %rax
+ andq %rdx, %rcx
+ andq %rdx, %r8
+ andq %rdx, %r9
+ andq %rdx, %r10
+ andq %rdx, %r11
+ xorq %rax, (%rdi)
+ xorq %rcx, 8(%rdi)
+ xorq %r8, 16(%rdi)
+ xorq %r9, 24(%rdi)
+ xorq %r10, 32(%rdi)
+ xorq %r11, 40(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_384_cond_copy_6,.-sp_384_cond_copy_6
+#endif /* __APPLE__ */
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_384_mul_6
+.type sp_384_mul_6,@function
+.align 16
+sp_384_mul_6:
+#else
+.globl _sp_384_mul_6
+.p2align 4
+_sp_384_mul_6:
+#endif /* __APPLE__ */
+ movq %rdx, %rcx
+ subq $48, %rsp
+ # A[0] * B[0]
+ movq (%rcx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ movq %rax, (%rsp)
+ movq %rdx, %r9
+ # A[0] * B[1]
+ movq 8(%rcx), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[1] * B[0]
+ movq (%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 8(%rsp)
+ # A[0] * B[2]
+ movq 16(%rcx), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[1] * B[1]
+ movq 8(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[2] * B[0]
+ movq (%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 16(%rsp)
+ # A[0] * B[3]
+ movq 24(%rcx), %rax
+ mulq (%rsi)
+ xorq %r10, %r10
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[1] * B[2]
+ movq 16(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[2] * B[1]
+ movq 8(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[3] * B[0]
+ movq (%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ movq %r8, 24(%rsp)
+ # A[0] * B[4]
+ movq 32(%rcx), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[1] * B[3]
+ movq 24(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[2] * B[2]
+ movq 16(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[3] * B[1]
+ movq 8(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[4] * B[0]
+ movq (%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 32(%rsp)
+ # A[0] * B[5]
+ movq 40(%rcx), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[1] * B[4]
+ movq 32(%rcx), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[2] * B[3]
+ movq 24(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[3] * B[2]
+ movq 16(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[4] * B[1]
+ movq 8(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[5] * B[0]
+ movq (%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 40(%rsp)
+ # A[1] * B[5]
+ movq 40(%rcx), %rax
+ mulq 8(%rsi)
+ xorq %r10, %r10
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[2] * B[4]
+ movq 32(%rcx), %rax
+ mulq 16(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[3] * B[3]
+ movq 24(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[4] * B[2]
+ movq 16(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[5] * B[1]
+ movq 8(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ movq %r8, 48(%rdi)
+ # A[2] * B[5]
+ movq 40(%rcx), %rax
+ mulq 16(%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[3] * B[4]
+ movq 32(%rcx), %rax
+ mulq 24(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[4] * B[3]
+ movq 24(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[5] * B[2]
+ movq 16(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ adcq $0, %r8
+ movq %r9, 56(%rdi)
+ # A[3] * B[5]
+ movq 40(%rcx), %rax
+ mulq 24(%rsi)
+ xorq %r9, %r9
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[4] * B[4]
+ movq 32(%rcx), %rax
+ mulq 32(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[5] * B[3]
+ movq 24(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %r10, 64(%rdi)
+ # A[4] * B[5]
+ movq 40(%rcx), %rax
+ mulq 32(%rsi)
+ xorq %r10, %r10
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[5] * B[4]
+ movq 32(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %r10
+ movq %r8, 72(%rdi)
+ # A[5] * B[5]
+ movq 40(%rcx), %rax
+ mulq 40(%rsi)
+ addq %rax, %r9
+ adcq %rdx, %r10
+ movq %r9, 80(%rdi)
+ movq %r10, 88(%rdi)
+ movq (%rsp), %rax
+ movq 8(%rsp), %rdx
+ movq 16(%rsp), %r8
+ movq 24(%rsp), %r9
+ movq %rax, (%rdi)
+ movq %rdx, 8(%rdi)
+ movq %r8, 16(%rdi)
+ movq %r9, 24(%rdi)
+ movq 32(%rsp), %rax
+ movq 40(%rsp), %rdx
+ movq %rax, 32(%rdi)
+ movq %rdx, 40(%rdi)
+ addq $48, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_384_mul_6,.-sp_384_mul_6
+#endif /* __APPLE__ */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_384_cond_sub_6
+.type sp_384_cond_sub_6,@function
+.align 16
+sp_384_cond_sub_6:
+#else
+.globl _sp_384_cond_sub_6
+.p2align 4
+_sp_384_cond_sub_6:
+#endif /* __APPLE__ */
+ subq $48, %rsp
+ movq $0, %rax
+ movq (%rdx), %r8
+ movq 8(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, (%rsp)
+ movq %r9, 8(%rsp)
+ movq 16(%rdx), %r8
+ movq 24(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 16(%rsp)
+ movq %r9, 24(%rsp)
+ movq 32(%rdx), %r8
+ movq 40(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq (%rsi), %r8
+ movq (%rsp), %rdx
+ subq %rdx, %r8
+ movq 8(%rsi), %r9
+ movq 8(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, (%rdi)
+ movq 16(%rsi), %r8
+ movq 16(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 8(%rdi)
+ movq 24(%rsi), %r9
+ movq 24(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 16(%rdi)
+ movq 32(%rsi), %r8
+ movq 32(%rsp), %rdx
+ sbbq %rdx, %r8
+ movq %r9, 24(%rdi)
+ movq 40(%rsi), %r9
+ movq 40(%rsp), %rdx
+ sbbq %rdx, %r9
+ movq %r8, 32(%rdi)
+ movq %r9, 40(%rdi)
+ sbbq $0, %rax
+ addq $48, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_384_cond_sub_6,.-sp_384_cond_sub_6
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX2
+/* Reduce the number back to 384 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef __APPLE__
+.globl sp_384_mont_reduce_6
+.type sp_384_mont_reduce_6,@function
+.align 16
+sp_384_mont_reduce_6:
+#else
+.globl _sp_384_mont_reduce_6
+.p2align 4
+_sp_384_mont_reduce_6:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ push %rbx
+ push %rbp
+ movq (%rdi), %r11
+ movq 8(%rdi), %r12
+ movq 16(%rdi), %r13
+ movq 24(%rdi), %r14
+ movq 32(%rdi), %r15
+ movq 40(%rdi), %rsi
+ xorq %r10, %r10
+ # a[0-7] += m[0-5] * mu[0..1] = m[0-5] * (a[0..1] * mp)
+ movq 48(%rdi), %rbx
+ movq 56(%rdi), %rbp
+ movq %r11, %rdx
+ movq %r12, %rax
+ shldq $32, %rdx, %rax
+ shlq $32, %rdx
+ addq %r11, %rdx
+ adcq %r12, %rax
+ addq %r11, %rax
+ movq %rdx, %rcx
+ movq %rax, %r8
+ movq %rax, %r9
+ shldq $32, %rcx, %r8
+ shlq $32, %rcx
+ shrq $32, %r9
+ addq %rcx, %r11
+ adcq %r8, %r12
+ adcq %r9, %r13
+ adcq $0, %r14
+ adcq $0, %r15
+ adcq $0, %rsi
+ adcq %rdx, %rbx
+ adcq %rax, %rbp
+ adcq $0, %r10
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq %rax, %r9
+ movq $0, %rax
+ adcq $0, %rax
+ subq %r8, %r13
+ sbbq %r9, %r14
+ sbbq %rax, %r15
+ sbbq $0, %rsi
+ sbbq $0, %rbx
+ sbbq $0, %rbp
+ sbbq $0, %r10
+ # a[2-9] += m[0-5] * mu[0..1] = m[0-5] * (a[2..3] * mp)
+ movq 64(%rdi), %r11
+ movq 72(%rdi), %r12
+ movq %r13, %rdx
+ movq %r14, %rax
+ shldq $32, %rdx, %rax
+ shlq $32, %rdx
+ addq %r13, %rdx
+ adcq %r14, %rax
+ addq %r13, %rax
+ movq %rdx, %rcx
+ movq %rax, %r8
+ movq %rax, %r9
+ shldq $32, %rcx, %r8
+ shlq $32, %rcx
+ shrq $32, %r9
+ addq %r10, %r11
+ adcq $0, %r12
+ movq $0, %r10
+ adcq $0, %r10
+ addq %rcx, %r13
+ adcq %r8, %r14
+ adcq %r9, %r15
+ adcq $0, %rsi
+ adcq $0, %rbx
+ adcq $0, %rbp
+ adcq %rdx, %r11
+ adcq %rax, %r12
+ adcq $0, %r10
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq %rax, %r9
+ movq $0, %rax
+ adcq $0, %rax
+ subq %r8, %r15
+ sbbq %r9, %rsi
+ sbbq %rax, %rbx
+ sbbq $0, %rbp
+ sbbq $0, %r11
+ sbbq $0, %r12
+ sbbq $0, %r10
+ # a[4-11] += m[0-5] * mu[0..1] = m[0-5] * (a[4..5] * mp)
+ movq 80(%rdi), %r13
+ movq 88(%rdi), %r14
+ movq %r15, %rdx
+ movq %rsi, %rax
+ shldq $32, %rdx, %rax
+ shlq $32, %rdx
+ addq %r15, %rdx
+ adcq %rsi, %rax
+ addq %r15, %rax
+ movq %rdx, %rcx
+ movq %rax, %r8
+ movq %rax, %r9
+ shldq $32, %rcx, %r8
+ shlq $32, %rcx
+ shrq $32, %r9
+ addq %r10, %r13
+ adcq $0, %r14
+ movq $0, %r10
+ adcq $0, %r10
+ addq %rcx, %r15
+ adcq %r8, %rsi
+ adcq %r9, %rbx
+ adcq $0, %rbp
+ adcq $0, %r11
+ adcq $0, %r12
+ adcq %rdx, %r13
+ adcq %rax, %r14
+ adcq $0, %r10
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq %rax, %r9
+ movq $0, %rax
+ adcq $0, %rax
+ subq %r8, %rbx
+ sbbq %r9, %rbp
+ sbbq %rax, %r11
+ sbbq $0, %r12
+ sbbq $0, %r13
+ sbbq $0, %r14
+ sbbq $0, %r10
+ # Subtract mod if carry
+ negq %r10
+ movq $18446744073709551614, %r9
+ movq %r10, %rcx
+ movq %r10, %r8
+ shrq $32, %rcx
+ shlq $32, %r8
+ andq %r10, %r9
+ subq %rcx, %rbx
+ sbbq %r8, %rbp
+ sbbq %r9, %r11
+ sbbq %r10, %r12
+ sbbq %r10, %r13
+ sbbq %r10, %r14
+ movq %rbx, (%rdi)
+ movq %rbp, 8(%rdi)
+ movq %r11, 16(%rdi)
+ movq %r12, 24(%rdi)
+ movq %r13, 32(%rdi)
+ movq %r14, 40(%rdi)
+ pop %rbp
+ pop %rbx
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_384_mont_reduce_6,.-sp_384_mont_reduce_6
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
+/* Reduce the number back to 384 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef __APPLE__
+.globl sp_384_mont_reduce_order_6
+.type sp_384_mont_reduce_order_6,@function
+.align 16
+sp_384_mont_reduce_order_6:
+#else
+.globl _sp_384_mont_reduce_order_6
+.p2align 4
+_sp_384_mont_reduce_order_6:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ movq %rdx, %rcx
+ xorq %r15, %r15
+ # i = 6
+ movq $6, %r8
+ movq (%rdi), %r13
+ movq 8(%rdi), %r14
+L_mont_loop_order_6:
+ # mu = a[i] * mp
+ movq %r13, %r11
+ imulq %rcx, %r11
+ # a[i+0] += m[0] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq (%rsi)
+ addq %rax, %r13
+ adcq %rdx, %r10
+ # a[i+1] += m[1] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 8(%rsi)
+ movq %r14, %r13
+ addq %rax, %r13
+ adcq %rdx, %r9
+ addq %r10, %r13
+ adcq $0, %r9
+ # a[i+2] += m[2] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 16(%rsi)
+ movq 16(%rdi), %r14
+ addq %rax, %r14
+ adcq %rdx, %r10
+ addq %r9, %r14
+ adcq $0, %r10
+ # a[i+3] += m[3] * mu
+ movq %r11, %rax
+ xorq %r9, %r9
+ mulq 24(%rsi)
+ movq 24(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r9
+ addq %r10, %r12
+ movq %r12, 24(%rdi)
+ adcq $0, %r9
+ # a[i+4] += m[4] * mu
+ movq %r11, %rax
+ xorq %r10, %r10
+ mulq 32(%rsi)
+ movq 32(%rdi), %r12
+ addq %rax, %r12
+ adcq %rdx, %r10
+ addq %r9, %r12
+ movq %r12, 32(%rdi)
+ adcq $0, %r10
+ # a[i+5] += m[5] * mu
+ movq %r11, %rax
+ mulq 40(%rsi)
+ movq 40(%rdi), %r12
+ addq %rax, %r10
+ adcq %r15, %rdx
+ movq $0, %r15
+ adcq $0, %r15
+ addq %r10, %r12
+ movq %r12, 40(%rdi)
+ adcq %rdx, 48(%rdi)
+ adcq $0, %r15
+ # i -= 1
+ addq $8, %rdi
+ decq %r8
+ jnz L_mont_loop_order_6
+ movq %r13, (%rdi)
+ movq %r14, 8(%rdi)
+ negq %r15
+ movq %r15, %rcx
+ movq %rsi, %rdx
+ movq %rdi, %rsi
+ subq $48, %rdi
+#ifndef __APPLE__
+ callq sp_384_cond_sub_6@plt
+#else
+ callq _sp_384_cond_sub_6
+#endif /* __APPLE__ */
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_384_mont_reduce_order_6,.-sp_384_mont_reduce_order_6
+#endif /* __APPLE__ */
+/* Square a and put result in r. (r = a * a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_384_sqr_6
+.type sp_384_sqr_6,@function
+.align 16
+sp_384_sqr_6:
+#else
+.globl _sp_384_sqr_6
+.p2align 4
+_sp_384_sqr_6:
+#endif /* __APPLE__ */
+ push %r12
+ subq $48, %rsp
+ # A[0] * A[0]
+ movq (%rsi), %rax
+ mulq %rax
+ xorq %r9, %r9
+ movq %rax, (%rsp)
+ movq %rdx, %r8
+ # A[0] * A[1]
+ movq 8(%rsi), %rax
+ mulq (%rsi)
+ xorq %rcx, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ movq %r8, 8(%rsp)
+ # A[0] * A[2]
+ movq 16(%rsi), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ # A[1] * A[1]
+ movq 8(%rsi), %rax
+ mulq %rax
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ movq %r9, 16(%rsp)
+ # A[0] * A[3]
+ movq 24(%rsi), %rax
+ mulq (%rsi)
+ xorq %r9, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[1] * A[2]
+ movq 16(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %rcx, 24(%rsp)
+ # A[0] * A[4]
+ movq 32(%rsi), %rax
+ mulq (%rsi)
+ xorq %rcx, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ # A[1] * A[3]
+ movq 24(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ # A[2] * A[2]
+ movq 16(%rsi), %rax
+ mulq %rax
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ movq %r8, 32(%rsp)
+ # A[0] * A[5]
+ movq 40(%rsi), %rax
+ mulq (%rsi)
+ xorq %r8, %r8
+ xorq %r12, %r12
+ movq %rax, %r10
+ movq %rdx, %r11
+ # A[1] * A[4]
+ movq 32(%rsi), %rax
+ mulq 8(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ # A[2] * A[3]
+ movq 24(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r11
+ adcq $0, %r12
+ addq %r10, %r10
+ adcq %r11, %r11
+ adcq %r12, %r12
+ addq %r10, %r9
+ adcq %r11, %rcx
+ adcq %r12, %r8
+ movq %r9, 40(%rsp)
+ # A[1] * A[5]
+ movq 40(%rsi), %rax
+ mulq 8(%rsi)
+ xorq %r9, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[2] * A[4]
+ movq 32(%rsi), %rax
+ mulq 16(%rsi)
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[3] * A[3]
+ movq 24(%rsi), %rax
+ mulq %rax
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %rcx, 48(%rdi)
+ # A[2] * A[5]
+ movq 40(%rsi), %rax
+ mulq 16(%rsi)
+ xorq %rcx, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ # A[3] * A[4]
+ movq 32(%rsi), %rax
+ mulq 24(%rsi)
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ addq %rax, %r8
+ adcq %rdx, %r9
+ adcq $0, %rcx
+ movq %r8, 56(%rdi)
+ # A[3] * A[5]
+ movq 40(%rsi), %rax
+ mulq 24(%rsi)
+ xorq %r8, %r8
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ # A[4] * A[4]
+ movq 32(%rsi), %rax
+ mulq %rax
+ addq %rax, %r9
+ adcq %rdx, %rcx
+ adcq $0, %r8
+ movq %r9, 64(%rdi)
+ # A[4] * A[5]
+ movq 40(%rsi), %rax
+ mulq 32(%rsi)
+ xorq %r9, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ addq %rax, %rcx
+ adcq %rdx, %r8
+ adcq $0, %r9
+ movq %rcx, 72(%rdi)
+ # A[5] * A[5]
+ movq 40(%rsi), %rax
+ mulq %rax
+ addq %rax, %r8
+ adcq %rdx, %r9
+ movq %r8, 80(%rdi)
+ movq %r9, 88(%rdi)
+ movq (%rsp), %rax
+ movq 8(%rsp), %rdx
+ movq 16(%rsp), %r10
+ movq 24(%rsp), %r11
+ movq %rax, (%rdi)
+ movq %rdx, 8(%rdi)
+ movq %r10, 16(%rdi)
+ movq %r11, 24(%rdi)
+ movq 32(%rsp), %rax
+ movq 40(%rsp), %rdx
+ movq %rax, 32(%rdi)
+ movq %rdx, 40(%rdi)
+ addq $48, %rsp
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_384_sqr_6,.-sp_384_sqr_6
+#endif /* __APPLE__ */
+/* Compare a with b in constant time.
+ *
+ * a A single precision integer.
+ * b A single precision integer.
+ * return -ve, 0 or +ve if a is less than, equal to or greater than b
+ * respectively.
+ */
+#ifndef __APPLE__
+.globl sp_384_cmp_6
+.type sp_384_cmp_6,@function
+.align 16
+sp_384_cmp_6:
+#else
+.globl _sp_384_cmp_6
+.p2align 4
+_sp_384_cmp_6:
+#endif /* __APPLE__ */
+ xorq %rcx, %rcx
+ movq $-1, %rdx
+ movq $-1, %rax
+ movq $1, %r8
+ movq 40(%rdi), %r9
+ movq 40(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 32(%rdi), %r9
+ movq 32(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 24(%rdi), %r9
+ movq 24(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 16(%rdi), %r9
+ movq 16(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq 8(%rdi), %r9
+ movq 8(%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ movq (%rdi), %r9
+ movq (%rsi), %r10
+ andq %rdx, %r9
+ andq %rdx, %r10
+ subq %r10, %r9
+ cmova %r8, %rax
+ cmovc %rdx, %rax
+ cmovnz %rcx, %rdx
+ xorq %rdx, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_384_cmp_6,.-sp_384_cmp_6
+#endif /* __APPLE__ */
+/* Add b to a into r. (r = a + b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_384_add_6
+.type sp_384_add_6,@function
+.align 16
+sp_384_add_6:
+#else
+.globl _sp_384_add_6
+.p2align 4
+_sp_384_add_6:
+#endif /* __APPLE__ */
+ # Add
+ movq (%rsi), %rcx
+ xorq %rax, %rax
+ addq (%rdx), %rcx
+ movq 8(%rsi), %r8
+ movq %rcx, (%rdi)
+ adcq 8(%rdx), %r8
+ movq 16(%rsi), %rcx
+ movq %r8, 8(%rdi)
+ adcq 16(%rdx), %rcx
+ movq 24(%rsi), %r8
+ movq %rcx, 16(%rdi)
+ adcq 24(%rdx), %r8
+ movq 32(%rsi), %rcx
+ movq %r8, 24(%rdi)
+ adcq 32(%rdx), %rcx
+ movq 40(%rsi), %r8
+ movq %rcx, 32(%rdi)
+ adcq 40(%rdx), %r8
+ movq %r8, 40(%rdi)
+ adcq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_384_add_6,.-sp_384_add_6
+#endif /* __APPLE__ */
+/* Add a to a into r. (r = a + a)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_384_dbl_6
+.type sp_384_dbl_6,@function
+.align 16
+sp_384_dbl_6:
+#else
+.globl _sp_384_dbl_6
+.p2align 4
+_sp_384_dbl_6:
+#endif /* __APPLE__ */
+ movq (%rsi), %rdx
+ xorq %rax, %rax
+ addq %rdx, %rdx
+ movq 8(%rsi), %rcx
+ movq %rdx, (%rdi)
+ adcq %rcx, %rcx
+ movq 16(%rsi), %rdx
+ movq %rcx, 8(%rdi)
+ adcq %rdx, %rdx
+ movq 24(%rsi), %rcx
+ movq %rdx, 16(%rdi)
+ adcq %rcx, %rcx
+ movq 32(%rsi), %rdx
+ movq %rcx, 24(%rdi)
+ adcq %rdx, %rdx
+ movq 40(%rsi), %rcx
+ movq %rdx, 32(%rdi)
+ adcq %rcx, %rcx
+ movq %rcx, 40(%rdi)
+ adcq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_384_dbl_6,.-sp_384_dbl_6
+#endif /* __APPLE__ */
+/* Sub b from a into r. (r = a - b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_384_sub_6
+.type sp_384_sub_6,@function
+.align 16
+sp_384_sub_6:
+#else
+.globl _sp_384_sub_6
+.p2align 4
+_sp_384_sub_6:
+#endif /* __APPLE__ */
+ push %r12
+ xorq %rax, %rax
+ movq (%rsi), %rcx
+ movq 8(%rsi), %r8
+ movq 16(%rsi), %r9
+ movq 24(%rsi), %r10
+ movq 32(%rsi), %r11
+ movq 40(%rsi), %r12
+ subq (%rdx), %rcx
+ sbbq 8(%rdx), %r8
+ sbbq 16(%rdx), %r9
+ sbbq 24(%rdx), %r10
+ sbbq 32(%rdx), %r11
+ sbbq 40(%rdx), %r12
+ movq %rcx, (%rdi)
+ movq %r8, 8(%rdi)
+ movq %r9, 16(%rdi)
+ movq %r10, 24(%rdi)
+ movq %r11, 32(%rdi)
+ movq %r12, 40(%rdi)
+ sbbq $0, %rax
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_384_sub_6,.-sp_384_sub_6
+#endif /* __APPLE__ */
+/* Conditionally add a and b using the mask m.
+ * m is -1 to add and 0 when not.
+ *
+ * r A single precision number representing conditional add result.
+ * a A single precision number to add with.
+ * b A single precision number to add.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_384_cond_add_6
+.type sp_384_cond_add_6,@function
+.align 16
+sp_384_cond_add_6:
+#else
+.globl _sp_384_cond_add_6
+.p2align 4
+_sp_384_cond_add_6:
+#endif /* __APPLE__ */
+ subq $48, %rsp
+ movq $0, %rax
+ movq (%rdx), %r8
+ movq 8(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, (%rsp)
+ movq %r9, 8(%rsp)
+ movq 16(%rdx), %r8
+ movq 24(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 16(%rsp)
+ movq %r9, 24(%rsp)
+ movq 32(%rdx), %r8
+ movq 40(%rdx), %r9
+ andq %rcx, %r8
+ andq %rcx, %r9
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ movq (%rsi), %r8
+ movq (%rsp), %rdx
+ addq %rdx, %r8
+ movq 8(%rsi), %r9
+ movq 8(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, (%rdi)
+ movq 16(%rsi), %r8
+ movq 16(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 8(%rdi)
+ movq 24(%rsi), %r9
+ movq 24(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 16(%rdi)
+ movq 32(%rsi), %r8
+ movq 32(%rsp), %rdx
+ adcq %rdx, %r8
+ movq %r9, 24(%rdi)
+ movq 40(%rsi), %r9
+ movq 40(%rsp), %rdx
+ adcq %rdx, %r9
+ movq %r8, 32(%rdi)
+ movq %r9, 40(%rdi)
+ adcq $0, %rax
+ addq $48, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_384_cond_add_6,.-sp_384_cond_add_6
+#endif /* __APPLE__ */
+/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
+ *
+ * r Result of division by 2.
+ * a Number to divide.
+ * m Modulus (prime).
+ */
+#ifndef __APPLE__
+.globl sp_384_div2_6
+.type sp_384_div2_6,@function
+.align 16
+sp_384_div2_6:
+#else
+.globl _sp_384_div2_6
+.p2align 4
+_sp_384_div2_6:
+#endif /* __APPLE__ */
+ subq $48, %rsp
+ movq (%rsi), %rax
+ movq %rax, %r11
+ andq $1, %r11
+ negq %r11
+ xorq %r10, %r10
+ movq (%rdx), %r8
+ andq %r11, %r8
+ movq %r8, (%rsp)
+ movq 8(%rdx), %r8
+ andq %r11, %r8
+ movq %r8, 8(%rsp)
+ movq 16(%rdx), %r8
+ andq %r11, %r8
+ movq %r8, 16(%rsp)
+ movq 24(%rdx), %r8
+ andq %r11, %r8
+ movq %r8, 24(%rsp)
+ movq 32(%rdx), %r8
+ andq %r11, %r8
+ movq %r8, 32(%rsp)
+ movq 40(%rdx), %r8
+ andq %r11, %r8
+ movq %r8, 40(%rsp)
+ addq %rax, (%rsp)
+ movq 8(%rsi), %rax
+ adcq %rax, 8(%rsp)
+ movq 16(%rsi), %rax
+ adcq %rax, 16(%rsp)
+ movq 24(%rsi), %rax
+ adcq %rax, 24(%rsp)
+ movq 32(%rsi), %rax
+ adcq %rax, 32(%rsp)
+ movq 40(%rsi), %rax
+ adcq %rax, 40(%rsp)
+ adcq $0, %r10
+ movq (%rsp), %rax
+ movq 8(%rsp), %rcx
+ shrdq $1, %rcx, %rax
+ movq %rax, (%rdi)
+ movq 16(%rsp), %rax
+ shrdq $1, %rax, %rcx
+ movq %rcx, 8(%rdi)
+ movq 24(%rsp), %rcx
+ shrdq $1, %rcx, %rax
+ movq %rax, 16(%rdi)
+ movq 32(%rsp), %rax
+ shrdq $1, %rax, %rcx
+ movq %rcx, 24(%rdi)
+ movq 40(%rsp), %rcx
+ shrdq $1, %rcx, %rax
+ movq %rax, 32(%rdi)
+ shrdq $1, %r10, %rcx
+ movq %rcx, 40(%rdi)
+ addq $48, %rsp
+ repz retq
+#ifndef __APPLE__
+.size sp_384_div2_6,.-sp_384_div2_6
+#endif /* __APPLE__ */
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r Result of multiplication.
+ * a First number to multiply.
+ * b Second number to multiply.
+ */
+#ifndef __APPLE__
+.globl sp_384_mul_avx2_6
+.type sp_384_mul_avx2_6,@function
+.align 16
+sp_384_mul_avx2_6:
+#else
+.globl _sp_384_mul_avx2_6
+.p2align 4
+_sp_384_mul_avx2_6:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ push %rbx
+ movq %rdx, %rax
+ subq $40, %rsp
+ xorq %rbx, %rbx
+ movq (%rsi), %rdx
+ # A[0] * B[0]
+ mulxq (%rax), %r9, %r10
+ # A[0] * B[1]
+ mulxq 8(%rax), %rcx, %r11
+ adcxq %rcx, %r10
+ # A[0] * B[2]
+ mulxq 16(%rax), %rcx, %r12
+ adcxq %rcx, %r11
+ # A[0] * B[3]
+ mulxq 24(%rax), %rcx, %r13
+ adcxq %rcx, %r12
+ # A[0] * B[4]
+ mulxq 32(%rax), %rcx, %r14
+ adcxq %rcx, %r13
+ # A[0] * B[5]
+ mulxq 40(%rax), %rcx, %r15
+ adcxq %rcx, %r14
+ adcxq %rbx, %r15
+ movq %r9, (%rsp)
+ movq $0, %r9
+ adcxq %rbx, %r9
+ xorq %rbx, %rbx
+ movq 8(%rsi), %rdx
+ # A[1] * B[0]
+ mulxq (%rax), %rcx, %r8
+ adcxq %rcx, %r10
+ adoxq %r8, %r11
+ # A[1] * B[1]
+ mulxq 8(%rax), %rcx, %r8
+ adcxq %rcx, %r11
+ adoxq %r8, %r12
+ # A[1] * B[2]
+ mulxq 16(%rax), %rcx, %r8
+ adcxq %rcx, %r12
+ adoxq %r8, %r13
+ # A[1] * B[3]
+ mulxq 24(%rax), %rcx, %r8
+ adcxq %rcx, %r13
+ adoxq %r8, %r14
+ # A[1] * B[4]
+ mulxq 32(%rax), %rcx, %r8
+ adcxq %rcx, %r14
+ adoxq %r8, %r15
+ # A[1] * B[5]
+ mulxq 40(%rax), %rcx, %r8
+ adcxq %rcx, %r15
+ adoxq %r8, %r9
+ adcxq %rbx, %r9
+ movq %r10, 8(%rsp)
+ movq $0, %r10
+ adcxq %rbx, %r10
+ adoxq %rbx, %r10
+ xorq %rbx, %rbx
+ movq 16(%rsi), %rdx
+ # A[2] * B[0]
+ mulxq (%rax), %rcx, %r8
+ adcxq %rcx, %r11
+ adoxq %r8, %r12
+ # A[2] * B[1]
+ mulxq 8(%rax), %rcx, %r8
+ adcxq %rcx, %r12
+ adoxq %r8, %r13
+ # A[2] * B[2]
+ mulxq 16(%rax), %rcx, %r8
+ adcxq %rcx, %r13
+ adoxq %r8, %r14
+ # A[2] * B[3]
+ mulxq 24(%rax), %rcx, %r8
+ adcxq %rcx, %r14
+ adoxq %r8, %r15
+ # A[2] * B[4]
+ mulxq 32(%rax), %rcx, %r8
+ adcxq %rcx, %r15
+ adoxq %r8, %r9
+ # A[2] * B[5]
+ mulxq 40(%rax), %rcx, %r8
+ adcxq %rcx, %r9
+ adoxq %r8, %r10
+ adcxq %rbx, %r10
+ movq %r11, 16(%rsp)
+ movq $0, %r11
+ adcxq %rbx, %r11
+ adoxq %rbx, %r11
+ xorq %rbx, %rbx
+ movq 24(%rsi), %rdx
+ # A[3] * B[0]
+ mulxq (%rax), %rcx, %r8
+ adcxq %rcx, %r12
+ adoxq %r8, %r13
+ # A[3] * B[1]
+ mulxq 8(%rax), %rcx, %r8
+ adcxq %rcx, %r13
+ adoxq %r8, %r14
+ # A[3] * B[2]
+ mulxq 16(%rax), %rcx, %r8
+ adcxq %rcx, %r14
+ adoxq %r8, %r15
+ # A[3] * B[3]
+ mulxq 24(%rax), %rcx, %r8
+ adcxq %rcx, %r15
+ adoxq %r8, %r9
+ # A[3] * B[4]
+ mulxq 32(%rax), %rcx, %r8
+ adcxq %rcx, %r9
+ adoxq %r8, %r10
+ # A[3] * B[5]
+ mulxq 40(%rax), %rcx, %r8
+ adcxq %rcx, %r10
+ adoxq %r8, %r11
+ adcxq %rbx, %r11
+ movq %r12, 24(%rsp)
+ movq $0, %r12
+ adcxq %rbx, %r12
+ adoxq %rbx, %r12
+ xorq %rbx, %rbx
+ movq 32(%rsi), %rdx
+ # A[4] * B[0]
+ mulxq (%rax), %rcx, %r8
+ adcxq %rcx, %r13
+ adoxq %r8, %r14
+ # A[4] * B[1]
+ mulxq 8(%rax), %rcx, %r8
+ adcxq %rcx, %r14
+ adoxq %r8, %r15
+ # A[4] * B[2]
+ mulxq 16(%rax), %rcx, %r8
+ adcxq %rcx, %r15
+ adoxq %r8, %r9
+ # A[4] * B[3]
+ mulxq 24(%rax), %rcx, %r8
+ adcxq %rcx, %r9
+ adoxq %r8, %r10
+ # A[4] * B[4]
+ mulxq 32(%rax), %rcx, %r8
+ adcxq %rcx, %r10
+ adoxq %r8, %r11
+ # A[4] * B[5]
+ mulxq 40(%rax), %rcx, %r8
+ adcxq %rcx, %r11
+ adoxq %r8, %r12
+ adcxq %rbx, %r12
+ movq %r13, 32(%rsp)
+ movq 40(%rsi), %rdx
+ # A[5] * B[0]
+ mulxq (%rax), %rcx, %r8
+ adcxq %rcx, %r14
+ adoxq %r8, %r15
+ # A[5] * B[1]
+ mulxq 8(%rax), %rcx, %r8
+ adcxq %rcx, %r15
+ adoxq %r8, %r9
+ # A[5] * B[2]
+ mulxq 16(%rax), %rcx, %r8
+ adcxq %rcx, %r9
+ adoxq %r8, %r10
+ # A[5] * B[3]
+ mulxq 24(%rax), %rcx, %r8
+ adcxq %rcx, %r10
+ adoxq %r8, %r11
+ # A[5] * B[4]
+ mulxq 32(%rax), %rcx, %r8
+ adcxq %rcx, %r11
+ adoxq %r8, %r12
+ # A[5] * B[5]
+ mulxq 40(%rax), %rcx, %r13
+ adcxq %rcx, %r12
+ adoxq %rbx, %r13
+ adcxq %rbx, %r13
+ movq %r14, 40(%rdi)
+ movq %r15, 48(%rdi)
+ movq %r9, 56(%rdi)
+ movq %r10, 64(%rdi)
+ movq %r11, 72(%rdi)
+ movq %r12, 80(%rdi)
+ movq %r13, 88(%rdi)
+ movq (%rsp), %r9
+ movq 8(%rsp), %r10
+ movq 16(%rsp), %r11
+ movq 24(%rsp), %r12
+ movq 32(%rsp), %r13
+ movq %r9, (%rdi)
+ movq %r10, 8(%rdi)
+ movq %r11, 16(%rdi)
+ movq %r12, 24(%rdi)
+ movq %r13, 32(%rdi)
+ addq $40, %rsp
+ pop %rbx
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_384_mul_avx2_6,.-sp_384_mul_avx2_6
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX2
+/* Reduce the number back to 384 bits using Montgomery reduction.
+ *
+ * a A single precision number to reduce in place.
+ * m The single precision number representing the modulus.
+ * mp The digit representing the negative inverse of m mod 2^n.
+ */
+#ifndef __APPLE__
+.globl sp_384_mont_reduce_order_avx2_6
+.type sp_384_mont_reduce_order_avx2_6,@function
+.align 16
+sp_384_mont_reduce_order_avx2_6:
+#else
+.globl _sp_384_mont_reduce_order_avx2_6
+.p2align 4
+_sp_384_mont_reduce_order_avx2_6:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ movq %rdx, %rax
+ xorq %r13, %r13
+ movq (%rdi), %r12
+ xorq %r11, %r11
+L_mont_loop_order_avx2_6:
+ # mu = a[i] * mp
+ movq %r12, %rdx
+ movq %r12, %r9
+ imulq %rax, %rdx
+ xorq %r11, %r11
+ # a[i+0] += m[0] * mu
+ mulxq (%rsi), %rcx, %r8
+ movq 8(%rdi), %r12
+ adcxq %rcx, %r9
+ adoxq %r8, %r12
+ # a[i+1] += m[1] * mu
+ mulxq 8(%rsi), %rcx, %r8
+ movq 16(%rdi), %r9
+ adcxq %rcx, %r12
+ adoxq %r8, %r9
+ # a[i+2] += m[2] * mu
+ mulxq 16(%rsi), %rcx, %r8
+ movq 24(%rdi), %r10
+ adcxq %rcx, %r9
+ adoxq %r8, %r10
+ movq %r9, 16(%rdi)
+ # a[i+3] += m[3] * mu
+ mulxq 24(%rsi), %rcx, %r8
+ movq 32(%rdi), %r9
+ adcxq %rcx, %r10
+ adoxq %r8, %r9
+ movq %r10, 24(%rdi)
+ # a[i+4] += m[4] * mu
+ mulxq 32(%rsi), %rcx, %r8
+ movq 40(%rdi), %r10
+ adcxq %rcx, %r9
+ adoxq %r8, %r10
+ movq %r9, 32(%rdi)
+ # a[i+5] += m[5] * mu
+ mulxq 40(%rsi), %rcx, %r8
+ movq 48(%rdi), %r9
+ adcxq %rcx, %r10
+ adoxq %r8, %r9
+ movq %r10, 40(%rdi)
+ adcxq %r13, %r9
+ movq %r9, 48(%rdi)
+ movq %r11, %r13
+ adoxq %r11, %r13
+ adcxq %r11, %r13
+ # mu = a[i] * mp
+ movq %r12, %rdx
+ movq %r12, %r9
+ imulq %rax, %rdx
+ xorq %r11, %r11
+ # a[i+0] += m[0] * mu
+ mulxq (%rsi), %rcx, %r8
+ movq 16(%rdi), %r12
+ adcxq %rcx, %r9
+ adoxq %r8, %r12
+ # a[i+1] += m[1] * mu
+ mulxq 8(%rsi), %rcx, %r8
+ movq 24(%rdi), %r9
+ adcxq %rcx, %r12
+ adoxq %r8, %r9
+ # a[i+2] += m[2] * mu
+ mulxq 16(%rsi), %rcx, %r8
+ movq 32(%rdi), %r10
+ adcxq %rcx, %r9
+ adoxq %r8, %r10
+ movq %r9, 24(%rdi)
+ # a[i+3] += m[3] * mu
+ mulxq 24(%rsi), %rcx, %r8
+ movq 40(%rdi), %r9
+ adcxq %rcx, %r10
+ adoxq %r8, %r9
+ movq %r10, 32(%rdi)
+ # a[i+4] += m[4] * mu
+ mulxq 32(%rsi), %rcx, %r8
+ movq 48(%rdi), %r10
+ adcxq %rcx, %r9
+ adoxq %r8, %r10
+ movq %r9, 40(%rdi)
+ # a[i+5] += m[5] * mu
+ mulxq 40(%rsi), %rcx, %r8
+ movq 56(%rdi), %r9
+ adcxq %rcx, %r10
+ adoxq %r8, %r9
+ movq %r10, 48(%rdi)
+ adcxq %r13, %r9
+ movq %r9, 56(%rdi)
+ movq %r11, %r13
+ adoxq %r11, %r13
+ adcxq %r11, %r13
+ # mu = a[i] * mp
+ movq %r12, %rdx
+ movq %r12, %r9
+ imulq %rax, %rdx
+ xorq %r11, %r11
+ # a[i+0] += m[0] * mu
+ mulxq (%rsi), %rcx, %r8
+ movq 24(%rdi), %r12
+ adcxq %rcx, %r9
+ adoxq %r8, %r12
+ # a[i+1] += m[1] * mu
+ mulxq 8(%rsi), %rcx, %r8
+ movq 32(%rdi), %r9
+ adcxq %rcx, %r12
+ adoxq %r8, %r9
+ # a[i+2] += m[2] * mu
+ mulxq 16(%rsi), %rcx, %r8
+ movq 40(%rdi), %r10
+ adcxq %rcx, %r9
+ adoxq %r8, %r10
+ movq %r9, 32(%rdi)
+ # a[i+3] += m[3] * mu
+ mulxq 24(%rsi), %rcx, %r8
+ movq 48(%rdi), %r9
+ adcxq %rcx, %r10
+ adoxq %r8, %r9
+ movq %r10, 40(%rdi)
+ # a[i+4] += m[4] * mu
+ mulxq 32(%rsi), %rcx, %r8
+ movq 56(%rdi), %r10
+ adcxq %rcx, %r9
+ adoxq %r8, %r10
+ movq %r9, 48(%rdi)
+ # a[i+5] += m[5] * mu
+ mulxq 40(%rsi), %rcx, %r8
+ movq 64(%rdi), %r9
+ adcxq %rcx, %r10
+ adoxq %r8, %r9
+ movq %r10, 56(%rdi)
+ adcxq %r13, %r9
+ movq %r9, 64(%rdi)
+ movq %r11, %r13
+ adoxq %r11, %r13
+ adcxq %r11, %r13
+ # mu = a[i] * mp
+ movq %r12, %rdx
+ movq %r12, %r9
+ imulq %rax, %rdx
+ xorq %r11, %r11
+ # a[i+0] += m[0] * mu
+ mulxq (%rsi), %rcx, %r8
+ movq 32(%rdi), %r12
+ adcxq %rcx, %r9
+ adoxq %r8, %r12
+ # a[i+1] += m[1] * mu
+ mulxq 8(%rsi), %rcx, %r8
+ movq 40(%rdi), %r9
+ adcxq %rcx, %r12
+ adoxq %r8, %r9
+ # a[i+2] += m[2] * mu
+ mulxq 16(%rsi), %rcx, %r8
+ movq 48(%rdi), %r10
+ adcxq %rcx, %r9
+ adoxq %r8, %r10
+ movq %r9, 40(%rdi)
+ # a[i+3] += m[3] * mu
+ mulxq 24(%rsi), %rcx, %r8
+ movq 56(%rdi), %r9
+ adcxq %rcx, %r10
+ adoxq %r8, %r9
+ movq %r10, 48(%rdi)
+ # a[i+4] += m[4] * mu
+ mulxq 32(%rsi), %rcx, %r8
+ movq 64(%rdi), %r10
+ adcxq %rcx, %r9
+ adoxq %r8, %r10
+ movq %r9, 56(%rdi)
+ # a[i+5] += m[5] * mu
+ mulxq 40(%rsi), %rcx, %r8
+ movq 72(%rdi), %r9
+ adcxq %rcx, %r10
+ adoxq %r8, %r9
+ movq %r10, 64(%rdi)
+ adcxq %r13, %r9
+ movq %r9, 72(%rdi)
+ movq %r11, %r13
+ adoxq %r11, %r13
+ adcxq %r11, %r13
+ # mu = a[i] * mp
+ movq %r12, %rdx
+ movq %r12, %r9
+ imulq %rax, %rdx
+ xorq %r11, %r11
+ # a[i+0] += m[0] * mu
+ mulxq (%rsi), %rcx, %r8
+ movq 40(%rdi), %r12
+ adcxq %rcx, %r9
+ adoxq %r8, %r12
+ # a[i+1] += m[1] * mu
+ mulxq 8(%rsi), %rcx, %r8
+ movq 48(%rdi), %r9
+ adcxq %rcx, %r12
+ adoxq %r8, %r9
+ # a[i+2] += m[2] * mu
+ mulxq 16(%rsi), %rcx, %r8
+ movq 56(%rdi), %r10
+ adcxq %rcx, %r9
+ adoxq %r8, %r10
+ movq %r9, 48(%rdi)
+ # a[i+3] += m[3] * mu
+ mulxq 24(%rsi), %rcx, %r8
+ movq 64(%rdi), %r9
+ adcxq %rcx, %r10
+ adoxq %r8, %r9
+ movq %r10, 56(%rdi)
+ # a[i+4] += m[4] * mu
+ mulxq 32(%rsi), %rcx, %r8
+ movq 72(%rdi), %r10
+ adcxq %rcx, %r9
+ adoxq %r8, %r10
+ movq %r9, 64(%rdi)
+ # a[i+5] += m[5] * mu
+ mulxq 40(%rsi), %rcx, %r8
+ movq 80(%rdi), %r9
+ adcxq %rcx, %r10
+ adoxq %r8, %r9
+ movq %r10, 72(%rdi)
+ adcxq %r13, %r9
+ movq %r9, 80(%rdi)
+ movq %r11, %r13
+ adoxq %r11, %r13
+ adcxq %r11, %r13
+ # mu = a[i] * mp
+ movq %r12, %rdx
+ movq %r12, %r9
+ imulq %rax, %rdx
+ xorq %r11, %r11
+ # a[i+0] += m[0] * mu
+ mulxq (%rsi), %rcx, %r8
+ movq 48(%rdi), %r12
+ adcxq %rcx, %r9
+ adoxq %r8, %r12
+ # a[i+1] += m[1] * mu
+ mulxq 8(%rsi), %rcx, %r8
+ movq 56(%rdi), %r9
+ adcxq %rcx, %r12
+ adoxq %r8, %r9
+ # a[i+2] += m[2] * mu
+ mulxq 16(%rsi), %rcx, %r8
+ movq 64(%rdi), %r10
+ adcxq %rcx, %r9
+ adoxq %r8, %r10
+ movq %r9, 56(%rdi)
+ # a[i+3] += m[3] * mu
+ mulxq 24(%rsi), %rcx, %r8
+ movq 72(%rdi), %r9
+ adcxq %rcx, %r10
+ adoxq %r8, %r9
+ movq %r10, 64(%rdi)
+ # a[i+4] += m[4] * mu
+ mulxq 32(%rsi), %rcx, %r8
+ movq 80(%rdi), %r10
+ adcxq %rcx, %r9
+ adoxq %r8, %r10
+ movq %r9, 72(%rdi)
+ # a[i+5] += m[5] * mu
+ mulxq 40(%rsi), %rcx, %r8
+ movq 88(%rdi), %r9
+ adcxq %rcx, %r10
+ adoxq %r8, %r9
+ movq %r10, 80(%rdi)
+ adcxq %r13, %r9
+ movq %r9, 88(%rdi)
+ movq %r11, %r13
+ adoxq %r11, %r13
+ adcxq %r11, %r13
+ negq %r13
+ movq %rdi, %rax
+ addq $48, %rdi
+ movq (%rsi), %r8
+ movq %r12, %rdx
+ pextq %r13, %r8, %r8
+ subq %r8, %rdx
+ movq 8(%rsi), %r8
+ movq 8(%rdi), %rcx
+ pextq %r13, %r8, %r8
+ movq %rdx, (%rax)
+ sbbq %r8, %rcx
+ movq 16(%rsi), %rdx
+ movq 16(%rdi), %r8
+ pextq %r13, %rdx, %rdx
+ movq %rcx, 8(%rax)
+ sbbq %rdx, %r8
+ movq 24(%rsi), %rcx
+ movq 24(%rdi), %rdx
+ pextq %r13, %rcx, %rcx
+ movq %r8, 16(%rax)
+ sbbq %rcx, %rdx
+ movq 32(%rsi), %r8
+ movq 32(%rdi), %rcx
+ pextq %r13, %r8, %r8
+ movq %rdx, 24(%rax)
+ sbbq %r8, %rcx
+ movq 40(%rsi), %rdx
+ movq 40(%rdi), %r8
+ pextq %r13, %rdx, %rdx
+ movq %rcx, 32(%rax)
+ sbbq %rdx, %r8
+ movq %r8, 40(%rax)
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_384_mont_reduce_order_avx2_6,.-sp_384_mont_reduce_order_avx2_6
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
+/* Square a and put result in r. (r = a * a)
+ *
+ * r Result of squaring.
+ * a Number to square in Montogmery form.
+ */
+#ifndef __APPLE__
+.globl sp_384_sqr_avx2_6
+.type sp_384_sqr_avx2_6,@function
+.align 16
+sp_384_sqr_avx2_6:
+#else
+.globl _sp_384_sqr_avx2_6
+.p2align 4
+_sp_384_sqr_avx2_6:
+#endif /* __APPLE__ */
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ push %rbx
+ push %rbp
+ push %rdi
+ xorq %rdi, %rdi
+ movq (%rsi), %rdx
+ movq 8(%rsi), %r15
+ movq 16(%rsi), %rbx
+ movq 24(%rsi), %rbp
+ # Diagonal 0
+ # A[1] * A[0]
+ mulxq 8(%rsi), %r8, %r9
+ # A[2] * A[0]
+ mulxq 16(%rsi), %rax, %r10
+ adcxq %rax, %r9
+ # A[3] * A[0]
+ mulxq 24(%rsi), %rax, %r11
+ adcxq %rax, %r10
+ # A[4] * A[0]
+ mulxq 32(%rsi), %rax, %r12
+ adcxq %rax, %r11
+ # A[5] * A[0]
+ mulxq 40(%rsi), %rax, %r13
+ adcxq %rax, %r12
+ adcxq %rdi, %r13
+ # Diagonal 1
+ movq %r15, %rdx
+ # A[2] * A[1]
+ mulxq 16(%rsi), %rax, %rcx
+ adcxq %rax, %r10
+ adoxq %rcx, %r11
+ # A[3] * A[1]
+ mulxq 24(%rsi), %rax, %rcx
+ adcxq %rax, %r11
+ adoxq %rcx, %r12
+ # A[4] * A[1]
+ mulxq 32(%rsi), %rax, %rcx
+ adcxq %rax, %r12
+ adoxq %rcx, %r13
+ # A[5] * A[1]
+ mulxq 40(%rsi), %rax, %r14
+ adcxq %rax, %r13
+ adoxq %rdi, %r14
+ movq %rbx, %rdx
+ # A[5] * A[2]
+ mulxq 40(%rsi), %rax, %r15
+ adcxq %rax, %r14
+ adoxq %rdi, %r15
+ adcxq %rdi, %r15
+ adcxq %rdi, %rbx
+ # Diagonal 2
+ # A[3] * A[2]
+ mulxq 24(%rsi), %rax, %rcx
+ adcxq %rax, %r12
+ adoxq %rcx, %r13
+ # A[4] * A[2]
+ mulxq 32(%rsi), %rax, %rcx
+ adcxq %rax, %r13
+ adoxq %rcx, %r14
+ movq %rbp, %rdx
+ # A[4] * A[3]
+ mulxq 32(%rsi), %rax, %rcx
+ adcxq %rax, %r14
+ adoxq %rcx, %r15
+ # A[5] * A[3]
+ mulxq 40(%rsi), %rax, %rbx
+ adcxq %rax, %r15
+ adoxq %rdi, %rbx
+ movq 32(%rsi), %rdx
+ # A[5] * A[4]
+ mulxq 40(%rsi), %rax, %rbp
+ adcxq %rax, %rbx
+ adoxq %rdi, %rbp
+ adcxq %rdi, %rbp
+ adcxq %rdi, %rdi
+ # Doubling previous result as we add in square words results
+ # A[0] * A[0]
+ movq (%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ pop %rdx
+ movq %rax, (%rdx)
+ adoxq %r8, %r8
+ push %rdx
+ adcxq %rcx, %r8
+ # A[1] * A[1]
+ movq 8(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r9, %r9
+ adcxq %rax, %r9
+ adoxq %r10, %r10
+ adcxq %rcx, %r10
+ # A[2] * A[2]
+ movq 16(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r11, %r11
+ adcxq %rax, %r11
+ adoxq %r12, %r12
+ adcxq %rcx, %r12
+ # A[3] * A[3]
+ movq 24(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r13, %r13
+ adcxq %rax, %r13
+ adoxq %r14, %r14
+ adcxq %rcx, %r14
+ # A[4] * A[4]
+ movq 32(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %r15, %r15
+ adcxq %rax, %r15
+ adoxq %rbx, %rbx
+ adcxq %rcx, %rbx
+ # A[5] * A[5]
+ movq 40(%rsi), %rdx
+ mulxq %rdx, %rax, %rcx
+ adoxq %rbp, %rbp
+ adcxq %rax, %rbp
+ adcxq %rdi, %rcx
+ movq $0, %rax
+ adoxq %rax, %rcx
+ pop %rdi
+ movq %r8, 8(%rdi)
+ movq %r9, 16(%rdi)
+ movq %r10, 24(%rdi)
+ movq %r11, 32(%rdi)
+ movq %r12, 40(%rdi)
+ movq %r13, 48(%rdi)
+ movq %r14, 56(%rdi)
+ movq %r15, 64(%rdi)
+ movq %rbx, 72(%rdi)
+ movq %rbp, 80(%rdi)
+ movq %rcx, 88(%rdi)
+ pop %rbp
+ pop %rbx
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ repz retq
+#ifndef __APPLE__
+.size sp_384_sqr_avx2_6,.-sp_384_sqr_avx2_6
+#endif /* __APPLE__ */
+/* Add 1 to a. (a = a + 1)
+ *
+ * a A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_384_add_one_6
+.type sp_384_add_one_6,@function
+.align 16
+sp_384_add_one_6:
+#else
+.globl _sp_384_add_one_6
+.p2align 4
+_sp_384_add_one_6:
+#endif /* __APPLE__ */
+ addq $1, (%rdi)
+ adcq $0, 8(%rdi)
+ adcq $0, 16(%rdi)
+ adcq $0, 24(%rdi)
+ adcq $0, 32(%rdi)
+ adcq $0, 40(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_384_add_one_6,.-sp_384_add_one_6
+#endif /* __APPLE__ */
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+#ifndef __APPLE__
+.globl sp_384_from_bin
+.type sp_384_from_bin,@function
+.align 16
+sp_384_from_bin:
+#else
+.globl _sp_384_from_bin
+.p2align 4
+_sp_384_from_bin:
+#endif /* __APPLE__ */
+ movq %rdx, %r9
+ movq %rdi, %r10
+ addq %rcx, %r9
+ addq $48, %r10
+ xorq %r11, %r11
+ jmp L_384_from_bin_64_end
+L_384_from_bin_64_start:
+ subq $64, %r9
+ movbeq 56(%r9), %rax
+ movbeq 48(%r9), %r8
+ movq %rax, (%rdi)
+ movq %r8, 8(%rdi)
+ movbeq 40(%r9), %rax
+ movbeq 32(%r9), %r8
+ movq %rax, 16(%rdi)
+ movq %r8, 24(%rdi)
+ movbeq 24(%r9), %rax
+ movbeq 16(%r9), %r8
+ movq %rax, 32(%rdi)
+ movq %r8, 40(%rdi)
+ movbeq 8(%r9), %rax
+ movbeq (%r9), %r8
+ movq %rax, 48(%rdi)
+ movq %r8, 56(%rdi)
+ addq $64, %rdi
+ subq $64, %rcx
+L_384_from_bin_64_end:
+ cmpq $63, %rcx
+ jg L_384_from_bin_64_start
+ jmp L_384_from_bin_8_end
+L_384_from_bin_8_start:
+ subq $8, %r9
+ movbeq (%r9), %rax
+ movq %rax, (%rdi)
+ addq $8, %rdi
+ subq $8, %rcx
+L_384_from_bin_8_end:
+ cmpq $7, %rcx
+ jg L_384_from_bin_8_start
+ cmpq %r11, %rcx
+ je L_384_from_bin_hi_end
+ movq %r11, %r8
+ movq %r11, %rax
+L_384_from_bin_hi_start:
+ movb (%rdx), %al
+ shlq $8, %r8
+ incq %rdx
+ addq %rax, %r8
+ decq %rcx
+ jg L_384_from_bin_hi_start
+ movq %r8, (%rdi)
+ addq $8, %rdi
+L_384_from_bin_hi_end:
+ cmpq %r10, %rdi
+ je L_384_from_bin_zero_end
+L_384_from_bin_zero_start:
+ movq %r11, (%rdi)
+ addq $8, %rdi
+ cmpq %r10, %rdi
+ jl L_384_from_bin_zero_start
+L_384_from_bin_zero_end:
+ repz retq
+#ifndef __APPLE__
+.size sp_384_from_bin,.-sp_384_from_bin
+#endif /* __APPLE__ */
+/* Write r as big endian to byte array.
+ * Fixed length number of bytes written: 48
+ *
+ * r A single precision integer.
+ * a Byte array.
+ */
+#ifndef __APPLE__
+.globl sp_384_to_bin
+.type sp_384_to_bin,@function
+.align 16
+sp_384_to_bin:
+#else
+.globl _sp_384_to_bin
+.p2align 4
+_sp_384_to_bin:
+#endif /* __APPLE__ */
+ movbeq 40(%rdi), %rdx
+ movbeq 32(%rdi), %rax
+ movq %rdx, (%rsi)
+ movq %rax, 8(%rsi)
+ movbeq 24(%rdi), %rdx
+ movbeq 16(%rdi), %rax
+ movq %rdx, 16(%rsi)
+ movq %rax, 24(%rsi)
+ movbeq 8(%rdi), %rdx
+ movbeq (%rdi), %rax
+ movq %rdx, 32(%rsi)
+ movq %rax, 40(%rsi)
+ repz retq
+#ifndef __APPLE__
+.size sp_384_to_bin,.-sp_384_to_bin
+#endif /* __APPLE__ */
+/* Sub b from a into a. (a -= b)
+ *
+ * a A single precision integer and result.
+ * b A single precision integer.
+ */
+#ifndef __APPLE__
+.globl sp_384_sub_in_place_6
+.type sp_384_sub_in_place_6,@function
+.align 16
+sp_384_sub_in_place_6:
+#else
+.globl _sp_384_sub_in_place_6
+.p2align 4
+_sp_384_sub_in_place_6:
+#endif /* __APPLE__ */
+ xorq %rax, %rax
+ movq (%rsi), %rdx
+ movq 8(%rsi), %rcx
+ movq 16(%rsi), %r8
+ movq 24(%rsi), %r9
+ movq 32(%rsi), %r10
+ movq 40(%rsi), %r11
+ subq %rdx, (%rdi)
+ sbbq %rcx, 8(%rdi)
+ sbbq %r8, 16(%rdi)
+ sbbq %r9, 24(%rdi)
+ sbbq %r10, 32(%rdi)
+ sbbq %r11, 40(%rdi)
+ sbbq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_384_sub_in_place_6,.-sp_384_sub_in_place_6
+#endif /* __APPLE__ */
+/* Conditionally subtract b from a using the mask m.
+ * m is -1 to subtract and 0 when not copying.
+ *
+ * r A single precision number representing condition subtract result.
+ * a A single precision number to subtract from.
+ * b A single precision number to subtract.
+ * m Mask value to apply.
+ */
+#ifndef __APPLE__
+.globl sp_384_cond_sub_avx2_6
+.type sp_384_cond_sub_avx2_6,@function
+.align 16
+sp_384_cond_sub_avx2_6:
+#else
+.globl _sp_384_cond_sub_avx2_6
+.p2align 4
+_sp_384_cond_sub_avx2_6:
+#endif /* __APPLE__ */
+ movq $0, %rax
+ movq (%rdx), %r10
+ movq (%rsi), %r8
+ pextq %rcx, %r10, %r10
+ subq %r10, %r8
+ movq 8(%rdx), %r10
+ movq 8(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, (%rdi)
+ sbbq %r10, %r9
+ movq 16(%rdx), %r8
+ movq 16(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 8(%rdi)
+ sbbq %r8, %r10
+ movq 24(%rdx), %r9
+ movq 24(%rsi), %r8
+ pextq %rcx, %r9, %r9
+ movq %r10, 16(%rdi)
+ sbbq %r9, %r8
+ movq 32(%rdx), %r10
+ movq 32(%rsi), %r9
+ pextq %rcx, %r10, %r10
+ movq %r8, 24(%rdi)
+ sbbq %r10, %r9
+ movq 40(%rdx), %r8
+ movq 40(%rsi), %r10
+ pextq %rcx, %r8, %r8
+ movq %r9, 32(%rdi)
+ sbbq %r8, %r10
+ movq %r10, 40(%rdi)
+ sbbq $0, %rax
+ repz retq
+#ifndef __APPLE__
+.size sp_384_cond_sub_avx2_6,.-sp_384_cond_sub_avx2_6
+#endif /* __APPLE__ */
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+#ifndef __APPLE__
+.globl sp_384_mul_d_6
+.type sp_384_mul_d_6,@function
+.align 16
+sp_384_mul_d_6:
+#else
+.globl _sp_384_mul_d_6
+.p2align 4
+_sp_384_mul_d_6:
+#endif /* __APPLE__ */
+ movq %rdx, %rcx
+ # A[0] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq (%rsi)
+ movq %rax, %r8
+ movq %rdx, %r9
+ movq %r8, (%rdi)
+ # A[1] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 8(%rsi)
+ addq %rax, %r9
+ movq %r9, 8(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[2] * B
+ movq %rcx, %rax
+ xorq %r9, %r9
+ mulq 16(%rsi)
+ addq %rax, %r10
+ movq %r10, 16(%rdi)
+ adcq %rdx, %r8
+ adcq $0, %r9
+ # A[3] * B
+ movq %rcx, %rax
+ xorq %r10, %r10
+ mulq 24(%rsi)
+ addq %rax, %r8
+ movq %r8, 24(%rdi)
+ adcq %rdx, %r9
+ adcq $0, %r10
+ # A[4] * B
+ movq %rcx, %rax
+ xorq %r8, %r8
+ mulq 32(%rsi)
+ addq %rax, %r9
+ movq %r9, 32(%rdi)
+ adcq %rdx, %r10
+ adcq $0, %r8
+ # A[5] * B
+ movq %rcx, %rax
+ mulq 40(%rsi)
+ addq %rax, %r10
+ adcq %rdx, %r8
+ movq %r10, 40(%rdi)
+ movq %r8, 48(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_384_mul_d_6,.-sp_384_mul_d_6
+#endif /* __APPLE__ */
+#ifdef HAVE_INTEL_AVX2
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r A single precision integer.
+ * a A single precision integer.
+ * b A single precision digit.
+ */
+#ifndef __APPLE__
+.globl sp_384_mul_d_avx2_6
+.type sp_384_mul_d_avx2_6,@function
+.align 16
+sp_384_mul_d_avx2_6:
+#else
+.globl _sp_384_mul_d_avx2_6
+.p2align 4
+_sp_384_mul_d_avx2_6:
+#endif /* __APPLE__ */
+ movq %rdx, %rax
+ # A[0] * B
+ movq %rax, %rdx
+ xorq %r11, %r11
+ mulxq (%rsi), %r9, %r10
+ movq %r9, (%rdi)
+ # A[1] * B
+ mulxq 8(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 8(%rdi)
+ adoxq %r8, %r9
+ # A[2] * B
+ mulxq 16(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 16(%rdi)
+ adoxq %r8, %r10
+ # A[3] * B
+ mulxq 24(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ movq %r10, 24(%rdi)
+ adoxq %r8, %r9
+ # A[4] * B
+ mulxq 32(%rsi), %rcx, %r8
+ movq %r11, %r10
+ adcxq %rcx, %r9
+ movq %r9, 32(%rdi)
+ adoxq %r8, %r10
+ # A[5] * B
+ mulxq 40(%rsi), %rcx, %r8
+ movq %r11, %r9
+ adcxq %rcx, %r10
+ adoxq %r8, %r9
+ adcxq %r11, %r9
+ movq %r10, 40(%rdi)
+ movq %r9, 48(%rdi)
+ repz retq
+#ifndef __APPLE__
+.size sp_384_mul_d_avx2_6,.-sp_384_mul_d_avx2_6
+#endif /* __APPLE__ */
+#endif /* HAVE_INTEL_AVX2 */
+#endif /* WOLFSSL_SP_384 */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/srp.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/srp.c
new file mode 100644
index 000000000..cf5eff19a
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/srp.c
@@ -0,0 +1,756 @@
+/* srp.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#ifdef WOLFCRYPT_HAVE_SRP
+
+#include <wolfssl/wolfcrypt/srp.h>
+#include <wolfssl/wolfcrypt/random.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+/** Computes the session key using the Mask Generation Function 1. */
+static int wc_SrpSetKey(Srp* srp, byte* secret, word32 size);
+
+static int SrpHashInit(SrpHash* hash, SrpType type)
+{
+ hash->type = type;
+
+ switch (type) {
+ case SRP_TYPE_SHA:
+ #ifndef NO_SHA
+ return wc_InitSha(&hash->data.sha);
+ #else
+ return BAD_FUNC_ARG;
+ #endif
+
+ case SRP_TYPE_SHA256:
+ #ifndef NO_SHA256
+ return wc_InitSha256(&hash->data.sha256);
+ #else
+ return BAD_FUNC_ARG;
+ #endif
+
+ case SRP_TYPE_SHA384:
+ #ifdef WOLFSSL_SHA384
+ return wc_InitSha384(&hash->data.sha384);
+ #else
+ return BAD_FUNC_ARG;
+ #endif
+
+ case SRP_TYPE_SHA512:
+ #ifdef WOLFSSL_SHA512
+ return wc_InitSha512(&hash->data.sha512);
+ #else
+ return BAD_FUNC_ARG;
+ #endif
+
+ default:
+ return BAD_FUNC_ARG;
+ }
+}
+
+static int SrpHashUpdate(SrpHash* hash, const byte* data, word32 size)
+{
+ switch (hash->type) {
+ case SRP_TYPE_SHA:
+ #ifndef NO_SHA
+ return wc_ShaUpdate(&hash->data.sha, data, size);
+ #else
+ return BAD_FUNC_ARG;
+ #endif
+
+ case SRP_TYPE_SHA256:
+ #ifndef NO_SHA256
+ return wc_Sha256Update(&hash->data.sha256, data, size);
+ #else
+ return BAD_FUNC_ARG;
+ #endif
+
+ case SRP_TYPE_SHA384:
+ #ifdef WOLFSSL_SHA384
+ return wc_Sha384Update(&hash->data.sha384, data, size);
+ #else
+ return BAD_FUNC_ARG;
+ #endif
+
+ case SRP_TYPE_SHA512:
+ #ifdef WOLFSSL_SHA512
+ return wc_Sha512Update(&hash->data.sha512, data, size);
+ #else
+ return BAD_FUNC_ARG;
+ #endif
+
+ default:
+ return BAD_FUNC_ARG;
+ }
+}
+
+static int SrpHashFinal(SrpHash* hash, byte* digest)
+{
+ switch (hash->type) {
+ case SRP_TYPE_SHA:
+ #ifndef NO_SHA
+ return wc_ShaFinal(&hash->data.sha, digest);
+ #else
+ return BAD_FUNC_ARG;
+ #endif
+
+ case SRP_TYPE_SHA256:
+ #ifndef NO_SHA256
+ return wc_Sha256Final(&hash->data.sha256, digest);
+ #else
+ return BAD_FUNC_ARG;
+ #endif
+
+ case SRP_TYPE_SHA384:
+ #ifdef WOLFSSL_SHA384
+ return wc_Sha384Final(&hash->data.sha384, digest);
+ #else
+ return BAD_FUNC_ARG;
+ #endif
+
+ case SRP_TYPE_SHA512:
+ #ifdef WOLFSSL_SHA512
+ return wc_Sha512Final(&hash->data.sha512, digest);
+ #else
+ return BAD_FUNC_ARG;
+ #endif
+
+ default:
+ return BAD_FUNC_ARG;
+ }
+}
+
+static word32 SrpHashSize(SrpType type)
+{
+ switch (type) {
+ case SRP_TYPE_SHA:
+ #ifndef NO_SHA
+ return WC_SHA_DIGEST_SIZE;
+ #else
+ return 0;
+ #endif
+
+ case SRP_TYPE_SHA256:
+ #ifndef NO_SHA256
+ return WC_SHA256_DIGEST_SIZE;
+ #else
+ return 0;
+ #endif
+
+ case SRP_TYPE_SHA384:
+ #ifdef WOLFSSL_SHA384
+ return WC_SHA384_DIGEST_SIZE;
+ #else
+ return 0;
+ #endif
+
+ case SRP_TYPE_SHA512:
+ #ifdef WOLFSSL_SHA512
+ return WC_SHA512_DIGEST_SIZE;
+ #else
+ return 0;
+ #endif
+
+ default:
+ return 0;
+ }
+}
+
+int wc_SrpInit(Srp* srp, SrpType type, SrpSide side)
+{
+ int r;
+
+ /* validating params */
+
+ if (!srp)
+ return BAD_FUNC_ARG;
+
+ if (side != SRP_CLIENT_SIDE && side != SRP_SERVER_SIDE)
+ return BAD_FUNC_ARG;
+
+ switch (type) {
+ case SRP_TYPE_SHA:
+ #ifdef NO_SHA
+ return NOT_COMPILED_IN;
+ #else
+ break; /* OK */
+ #endif
+
+ case SRP_TYPE_SHA256:
+ #ifdef NO_SHA256
+ return NOT_COMPILED_IN;
+ #else
+ break; /* OK */
+ #endif
+
+ case SRP_TYPE_SHA384:
+ #ifndef WOLFSSL_SHA384
+ return NOT_COMPILED_IN;
+ #else
+ break; /* OK */
+ #endif
+
+ case SRP_TYPE_SHA512:
+ #ifndef WOLFSSL_SHA512
+ return NOT_COMPILED_IN;
+ #else
+ break; /* OK */
+ #endif
+
+ default:
+ return BAD_FUNC_ARG;
+ }
+
+ /* initializing variables */
+
+ XMEMSET(srp, 0, sizeof(Srp));
+
+ if ((r = SrpHashInit(&srp->client_proof, type)) != 0)
+ return r;
+
+ if ((r = SrpHashInit(&srp->server_proof, type)) != 0)
+ return r;
+
+ if ((r = mp_init_multi(&srp->N, &srp->g, &srp->auth,
+ &srp->priv, 0, 0)) != 0)
+ return r;
+
+ srp->side = side; srp->type = type;
+ srp->salt = NULL; srp->saltSz = 0;
+ srp->user = NULL; srp->userSz = 0;
+ srp->key = NULL; srp->keySz = 0;
+
+ srp->keyGenFunc_cb = wc_SrpSetKey;
+
+ /* default heap hint to NULL or test value */
+#ifdef WOLFSSL_HEAP_TEST
+ srp->heap = (void*)WOLFSSL_HEAP_TEST;
+#else
+ srp->heap = NULL;
+#endif
+
+ return 0;
+}
+
+void wc_SrpTerm(Srp* srp)
+{
+ if (srp) {
+ mp_clear(&srp->N); mp_clear(&srp->g);
+ mp_clear(&srp->auth); mp_clear(&srp->priv);
+ if (srp->salt) {
+ ForceZero(srp->salt, srp->saltSz);
+ XFREE(srp->salt, srp->heap, DYNAMIC_TYPE_SRP);
+ }
+ if (srp->user) {
+ ForceZero(srp->user, srp->userSz);
+ XFREE(srp->user, srp->heap, DYNAMIC_TYPE_SRP);
+ }
+ if (srp->key) {
+ ForceZero(srp->key, srp->keySz);
+ XFREE(srp->key, srp->heap, DYNAMIC_TYPE_SRP);
+ }
+
+ ForceZero(srp, sizeof(Srp));
+ }
+}
+
+int wc_SrpSetUsername(Srp* srp, const byte* username, word32 size)
+{
+ if (!srp || !username)
+ return BAD_FUNC_ARG;
+
+ srp->user = (byte*)XMALLOC(size, srp->heap, DYNAMIC_TYPE_SRP);
+ if (srp->user == NULL)
+ return MEMORY_E;
+
+ srp->userSz = size;
+ XMEMCPY(srp->user, username, srp->userSz);
+
+ return 0;
+}
+
+int wc_SrpSetParams(Srp* srp, const byte* N, word32 nSz,
+ const byte* g, word32 gSz,
+ const byte* salt, word32 saltSz)
+{
+ SrpHash hash;
+ byte digest1[SRP_MAX_DIGEST_SIZE];
+ byte digest2[SRP_MAX_DIGEST_SIZE];
+ byte pad = 0;
+ int i, r;
+ int j = 0;
+
+ if (!srp || !N || !g || !salt || nSz < gSz)
+ return BAD_FUNC_ARG;
+
+ if (!srp->user)
+ return SRP_CALL_ORDER_E;
+
+ /* Set N */
+ if (mp_read_unsigned_bin(&srp->N, N, nSz) != MP_OKAY)
+ return MP_READ_E;
+
+ if (mp_count_bits(&srp->N) < SRP_MODULUS_MIN_BITS)
+ return BAD_FUNC_ARG;
+
+ /* Set g */
+ if (mp_read_unsigned_bin(&srp->g, g, gSz) != MP_OKAY)
+ return MP_READ_E;
+
+ if (mp_cmp(&srp->N, &srp->g) != MP_GT)
+ return BAD_FUNC_ARG;
+
+ /* Set salt */
+ if (srp->salt) {
+ ForceZero(srp->salt, srp->saltSz);
+ XFREE(srp->salt, srp->heap, DYNAMIC_TYPE_SRP);
+ }
+
+ srp->salt = (byte*)XMALLOC(saltSz, srp->heap, DYNAMIC_TYPE_SRP);
+ if (srp->salt == NULL)
+ return MEMORY_E;
+
+ XMEMCPY(srp->salt, salt, saltSz);
+ srp->saltSz = saltSz;
+
+ /* Set k = H(N, g) */
+ r = SrpHashInit(&hash, srp->type);
+ if (!r) r = SrpHashUpdate(&hash, (byte*) N, nSz);
+ for (i = 0; (word32)i < nSz - gSz; i++) {
+ if (!r) r = SrpHashUpdate(&hash, &pad, 1);
+ }
+ if (!r) r = SrpHashUpdate(&hash, (byte*) g, gSz);
+ if (!r) r = SrpHashFinal(&hash, srp->k);
+
+ /* update client proof */
+
+ /* digest1 = H(N) */
+ if (!r) r = SrpHashInit(&hash, srp->type);
+ if (!r) r = SrpHashUpdate(&hash, (byte*) N, nSz);
+ if (!r) r = SrpHashFinal(&hash, digest1);
+
+ /* digest2 = H(g) */
+ if (!r) r = SrpHashInit(&hash, srp->type);
+ if (!r) r = SrpHashUpdate(&hash, (byte*) g, gSz);
+ if (!r) r = SrpHashFinal(&hash, digest2);
+
+ /* digest1 = H(N) ^ H(g) */
+ if (r == 0) {
+ for (i = 0, j = SrpHashSize(srp->type); i < j; i++)
+ digest1[i] ^= digest2[i];
+ }
+
+ /* digest2 = H(user) */
+ if (!r) r = SrpHashInit(&hash, srp->type);
+ if (!r) r = SrpHashUpdate(&hash, srp->user, srp->userSz);
+ if (!r) r = SrpHashFinal(&hash, digest2);
+
+ /* client proof = H( H(N) ^ H(g) | H(user) | salt) */
+ if (!r) r = SrpHashUpdate(&srp->client_proof, digest1, j);
+ if (!r) r = SrpHashUpdate(&srp->client_proof, digest2, j);
+ if (!r) r = SrpHashUpdate(&srp->client_proof, salt, saltSz);
+
+ return r;
+}
+
+int wc_SrpSetPassword(Srp* srp, const byte* password, word32 size)
+{
+ SrpHash hash;
+ byte digest[SRP_MAX_DIGEST_SIZE];
+ word32 digestSz;
+ int r;
+
+ if (!srp || !password || srp->side != SRP_CLIENT_SIDE)
+ return BAD_FUNC_ARG;
+
+ if (!srp->salt)
+ return SRP_CALL_ORDER_E;
+
+ digestSz = SrpHashSize(srp->type);
+
+ /* digest = H(username | ':' | password) */
+ r = SrpHashInit(&hash, srp->type);
+ if (!r) r = SrpHashUpdate(&hash, srp->user, srp->userSz);
+ if (!r) r = SrpHashUpdate(&hash, (const byte*) ":", 1);
+ if (!r) r = SrpHashUpdate(&hash, password, size);
+ if (!r) r = SrpHashFinal(&hash, digest);
+
+ /* digest = H(salt | H(username | ':' | password)) */
+ if (!r) r = SrpHashInit(&hash, srp->type);
+ if (!r) r = SrpHashUpdate(&hash, srp->salt, srp->saltSz);
+ if (!r) r = SrpHashUpdate(&hash, digest, digestSz);
+ if (!r) r = SrpHashFinal(&hash, digest);
+
+ /* Set x (private key) */
+ if (!r) r = mp_read_unsigned_bin(&srp->auth, digest, digestSz);
+
+ ForceZero(digest, SRP_MAX_DIGEST_SIZE);
+
+ return r;
+}
+
+int wc_SrpGetVerifier(Srp* srp, byte* verifier, word32* size)
+{
+ mp_int v;
+ int r;
+
+ if (!srp || !verifier || !size || srp->side != SRP_CLIENT_SIDE)
+ return BAD_FUNC_ARG;
+
+ if (mp_iszero(&srp->auth) == MP_YES)
+ return SRP_CALL_ORDER_E;
+
+ r = mp_init(&v);
+ if (r != MP_OKAY)
+ return MP_INIT_E;
+
+ /* v = g ^ x % N */
+ if (!r) r = mp_exptmod(&srp->g, &srp->auth, &srp->N, &v);
+ if (!r) r = *size < (word32)mp_unsigned_bin_size(&v) ? BUFFER_E : MP_OKAY;
+ if (!r) r = mp_to_unsigned_bin(&v, verifier);
+ if (!r) *size = mp_unsigned_bin_size(&v);
+
+ mp_clear(&v);
+
+ return r;
+}
+
+int wc_SrpSetVerifier(Srp* srp, const byte* verifier, word32 size)
+{
+ if (!srp || !verifier || srp->side != SRP_SERVER_SIDE)
+ return BAD_FUNC_ARG;
+
+ return mp_read_unsigned_bin(&srp->auth, verifier, size);
+}
+
+int wc_SrpSetPrivate(Srp* srp, const byte* priv, word32 size)
+{
+ mp_int p;
+ int r;
+
+ if (!srp || !priv || !size)
+ return BAD_FUNC_ARG;
+
+ if (mp_iszero(&srp->auth) == MP_YES)
+ return SRP_CALL_ORDER_E;
+
+ r = mp_init(&p);
+ if (r != MP_OKAY)
+ return MP_INIT_E;
+ if (!r) r = mp_read_unsigned_bin(&p, priv, size);
+ if (!r) r = mp_mod(&p, &srp->N, &srp->priv);
+ if (!r) r = mp_iszero(&srp->priv) == MP_YES ? SRP_BAD_KEY_E : 0;
+
+ mp_clear(&p);
+
+ return r;
+}
+
+/** Generates random data using wolfcrypt RNG. */
+static int wc_SrpGenPrivate(Srp* srp, byte* priv, word32 size)
+{
+ WC_RNG rng;
+ int r = wc_InitRng(&rng);
+
+ if (!r) r = wc_RNG_GenerateBlock(&rng, priv, size);
+ if (!r) r = wc_SrpSetPrivate(srp, priv, size);
+ if (!r) wc_FreeRng(&rng);
+
+ return r;
+}
+
+int wc_SrpGetPublic(Srp* srp, byte* pub, word32* size)
+{
+ mp_int pubkey;
+ word32 modulusSz;
+ int r;
+
+ if (!srp || !pub || !size)
+ return BAD_FUNC_ARG;
+
+ if (mp_iszero(&srp->auth) == MP_YES)
+ return SRP_CALL_ORDER_E;
+
+ modulusSz = mp_unsigned_bin_size(&srp->N);
+ if (*size < modulusSz)
+ return BUFFER_E;
+
+ r = mp_init(&pubkey);
+ if (r != MP_OKAY)
+ return MP_INIT_E;
+
+ /* priv = random() */
+ if (mp_iszero(&srp->priv) == MP_YES)
+ r = wc_SrpGenPrivate(srp, pub, SRP_PRIVATE_KEY_MIN_BITS / 8);
+
+ /* client side: A = g ^ a % N */
+ if (srp->side == SRP_CLIENT_SIDE) {
+ if (!r) r = mp_exptmod(&srp->g, &srp->priv, &srp->N, &pubkey);
+
+ /* server side: B = (k * v + (g ^ b % N)) % N */
+ } else {
+ mp_int i, j;
+
+ if (mp_init_multi(&i, &j, 0, 0, 0, 0) == MP_OKAY) {
+ if (!r) r = mp_read_unsigned_bin(&i, srp->k,SrpHashSize(srp->type));
+ if (!r) r = mp_iszero(&i) == MP_YES ? SRP_BAD_KEY_E : 0;
+ if (!r) r = mp_exptmod(&srp->g, &srp->priv, &srp->N, &pubkey);
+ if (!r) r = mp_mulmod(&i, &srp->auth, &srp->N, &j);
+ if (!r) r = mp_add(&j, &pubkey, &i);
+ if (!r) r = mp_mod(&i, &srp->N, &pubkey);
+
+ mp_clear(&i); mp_clear(&j);
+ }
+ }
+
+ /* extract public key to buffer */
+ XMEMSET(pub, 0, modulusSz);
+ if (!r) r = mp_to_unsigned_bin(&pubkey, pub);
+ if (!r) *size = mp_unsigned_bin_size(&pubkey);
+ mp_clear(&pubkey);
+
+ return r;
+}
+
+static int wc_SrpSetKey(Srp* srp, byte* secret, word32 size)
+{
+ SrpHash hash;
+ byte digest[SRP_MAX_DIGEST_SIZE];
+ word32 i, j, digestSz = SrpHashSize(srp->type);
+ byte counter[4];
+ int r = BAD_FUNC_ARG;
+
+ XMEMSET(digest, 0, SRP_MAX_DIGEST_SIZE);
+
+ srp->key = (byte*)XMALLOC(2 * digestSz, srp->heap, DYNAMIC_TYPE_SRP);
+ if (srp->key == NULL)
+ return MEMORY_E;
+
+ srp->keySz = 2 * digestSz;
+
+ for (i = j = 0; j < srp->keySz; i++) {
+ counter[0] = (i >> 24) & 0xFF;
+ counter[1] = (i >> 16) & 0xFF;
+ counter[2] = (i >> 8) & 0xFF;
+ counter[3] = i & 0xFF;
+
+ r = SrpHashInit(&hash, srp->type);
+ if (!r) r = SrpHashUpdate(&hash, secret, size);
+ if (!r) r = SrpHashUpdate(&hash, counter, 4);
+
+ if (j + digestSz > srp->keySz) {
+ if (!r) r = SrpHashFinal(&hash, digest);
+ XMEMCPY(srp->key + j, digest, srp->keySz - j);
+ j = srp->keySz;
+ }
+ else {
+ if (!r) r = SrpHashFinal(&hash, srp->key + j);
+ j += digestSz;
+ }
+ }
+
+ ForceZero(digest, sizeof(digest));
+ ForceZero(&hash, sizeof(SrpHash));
+
+ return r;
+}
+
+int wc_SrpComputeKey(Srp* srp, byte* clientPubKey, word32 clientPubKeySz,
+ byte* serverPubKey, word32 serverPubKeySz)
+{
+ SrpHash hash;
+ byte *secret;
+ byte digest[SRP_MAX_DIGEST_SIZE];
+ word32 i, secretSz, digestSz;
+ mp_int u, s, temp1, temp2;
+ byte pad = 0;
+ int r;
+
+ /* validating params */
+
+ if (!srp || !clientPubKey || clientPubKeySz == 0
+ || !serverPubKey || serverPubKeySz == 0)
+ return BAD_FUNC_ARG;
+
+ if (mp_iszero(&srp->priv) == MP_YES)
+ return SRP_CALL_ORDER_E;
+
+ /* initializing variables */
+
+ if ((r = SrpHashInit(&hash, srp->type)) != 0)
+ return r;
+
+ digestSz = SrpHashSize(srp->type);
+ secretSz = mp_unsigned_bin_size(&srp->N);
+
+ if ((secret = (byte*)XMALLOC(secretSz, srp->heap, DYNAMIC_TYPE_SRP)) ==NULL)
+ return MEMORY_E;
+
+ if ((r = mp_init_multi(&u, &s, &temp1, &temp2, 0, 0)) != MP_OKAY) {
+ XFREE(secret, srp->heap, DYNAMIC_TYPE_SRP);
+ return r;
+ }
+
+ /* building u (random scrambling parameter) */
+
+ /* H(A) */
+ for (i = 0; !r && i < secretSz - clientPubKeySz; i++)
+ r = SrpHashUpdate(&hash, &pad, 1);
+ if (!r) r = SrpHashUpdate(&hash, clientPubKey, clientPubKeySz);
+
+ /* H(A | B) */
+ for (i = 0; !r && i < secretSz - serverPubKeySz; i++)
+ r = SrpHashUpdate(&hash, &pad, 1);
+ if (!r) r = SrpHashUpdate(&hash, serverPubKey, serverPubKeySz);
+
+ /* set u */
+ if (!r) r = SrpHashFinal(&hash, digest);
+ if (!r) r = mp_read_unsigned_bin(&u, digest, SrpHashSize(srp->type));
+
+ /* building s (secret) */
+
+ if (!r && srp->side == SRP_CLIENT_SIDE) {
+
+ /* temp1 = B - k * v; rejects k == 0, B == 0 and B >= N. */
+ r = mp_read_unsigned_bin(&temp1, srp->k, digestSz);
+ if (!r) r = mp_iszero(&temp1) == MP_YES ? SRP_BAD_KEY_E : 0;
+ if (!r) r = mp_exptmod(&srp->g, &srp->auth, &srp->N, &temp2);
+ if (!r) r = mp_mulmod(&temp1, &temp2, &srp->N, &s);
+ if (!r) r = mp_read_unsigned_bin(&temp2, serverPubKey, serverPubKeySz);
+ if (!r) r = mp_iszero(&temp2) == MP_YES ? SRP_BAD_KEY_E : 0;
+ if (!r) r = mp_cmp(&temp2, &srp->N) != MP_LT ? SRP_BAD_KEY_E : 0;
+ if (!r) r = mp_sub(&temp2, &s, &temp1);
+
+ /* temp2 = a + u * x */
+ if (!r) r = mp_mulmod(&u, &srp->auth, &srp->N, &s);
+ if (!r) r = mp_add(&srp->priv, &s, &temp2);
+
+ /* secret = temp1 ^ temp2 % N */
+ if (!r) r = mp_exptmod(&temp1, &temp2, &srp->N, &s);
+
+ } else if (!r && srp->side == SRP_SERVER_SIDE) {
+ /* temp1 = v ^ u % N */
+ r = mp_exptmod(&srp->auth, &u, &srp->N, &temp1);
+
+ /* temp2 = A * temp1 % N; rejects A == 0, A >= N */
+ if (!r) r = mp_read_unsigned_bin(&s, clientPubKey, clientPubKeySz);
+ if (!r) r = mp_iszero(&s) == MP_YES ? SRP_BAD_KEY_E : 0;
+ if (!r) r = mp_cmp(&s, &srp->N) != MP_LT ? SRP_BAD_KEY_E : 0;
+ if (!r) r = mp_mulmod(&s, &temp1, &srp->N, &temp2);
+
+ /* rejects A * v ^ u % N >= 1, A * v ^ u % N == -1 % N */
+ if (!r) r = mp_read_unsigned_bin(&temp1, (const byte*)"\001", 1);
+ if (!r) r = mp_cmp(&temp2, &temp1) != MP_GT ? SRP_BAD_KEY_E : 0;
+ if (!r) r = mp_sub(&srp->N, &temp1, &s);
+ if (!r) r = mp_cmp(&temp2, &s) == MP_EQ ? SRP_BAD_KEY_E : 0;
+
+ /* secret = temp2 * b % N */
+ if (!r) r = mp_exptmod(&temp2, &srp->priv, &srp->N, &s);
+ }
+
+ /* building session key from secret */
+
+ if (!r) r = mp_to_unsigned_bin(&s, secret);
+ if (!r) r = srp->keyGenFunc_cb(srp, secret, mp_unsigned_bin_size(&s));
+
+ /* updating client proof = H( H(N) ^ H(g) | H(user) | salt | A | B | K) */
+
+ if (!r) r = SrpHashUpdate(&srp->client_proof, clientPubKey, clientPubKeySz);
+ if (!r) r = SrpHashUpdate(&srp->client_proof, serverPubKey, serverPubKeySz);
+ if (!r) r = SrpHashUpdate(&srp->client_proof, srp->key, srp->keySz);
+
+ /* updating server proof = H(A) */
+
+ if (!r) r = SrpHashUpdate(&srp->server_proof, clientPubKey, clientPubKeySz);
+
+ XFREE(secret, srp->heap, DYNAMIC_TYPE_SRP);
+ mp_clear(&u); mp_clear(&s); mp_clear(&temp1); mp_clear(&temp2);
+
+ return r;
+}
+
+int wc_SrpGetProof(Srp* srp, byte* proof, word32* size)
+{
+ int r;
+
+ if (!srp || !proof || !size)
+ return BAD_FUNC_ARG;
+
+ if (*size < SrpHashSize(srp->type))
+ return BUFFER_E;
+
+ if ((r = SrpHashFinal(srp->side == SRP_CLIENT_SIDE
+ ? &srp->client_proof
+ : &srp->server_proof, proof)) != 0)
+ return r;
+
+ *size = SrpHashSize(srp->type);
+
+ if (srp->side == SRP_CLIENT_SIDE) {
+ /* server proof = H( A | client proof | K) */
+ if (!r) r = SrpHashUpdate(&srp->server_proof, proof, *size);
+ if (!r) r = SrpHashUpdate(&srp->server_proof, srp->key, srp->keySz);
+ }
+
+ return r;
+}
+
+int wc_SrpVerifyPeersProof(Srp* srp, byte* proof, word32 size)
+{
+ byte digest[SRP_MAX_DIGEST_SIZE];
+ int r;
+
+ if (!srp || !proof)
+ return BAD_FUNC_ARG;
+
+ if (size != SrpHashSize(srp->type))
+ return BUFFER_E;
+
+ r = SrpHashFinal(srp->side == SRP_CLIENT_SIDE ? &srp->server_proof
+ : &srp->client_proof, digest);
+
+ if (srp->side == SRP_SERVER_SIDE) {
+ /* server proof = H( A | client proof | K) */
+ if (!r) r = SrpHashUpdate(&srp->server_proof, proof, size);
+ if (!r) r = SrpHashUpdate(&srp->server_proof, srp->key, srp->keySz);
+ }
+
+ if (!r && XMEMCMP(proof, digest, size) != 0)
+ r = SRP_VERIFY_E;
+
+ return r;
+}
+
+#endif /* WOLFCRYPT_HAVE_SRP */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/tfm.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/tfm.c
index 2c1e86c31..61b31f0e1 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/tfm.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/tfm.c
@@ -1,8 +1,8 @@
/* tfm.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,18 +16,19 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
/*
* Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca,
* http://math.libtomcrypt.com
*/
/**
- * Edited by Moisés Guimarães (moisesguimaraesm@gmail.com)
- * to fit CyaSSL's needs.
+ * Edited by Moises Guimaraes (moises@wolfssl.com)
+ * to fit wolfSSL's needs.
*/
#ifdef HAVE_CONFIG_H
@@ -36,19 +37,62 @@
/* in case user set USE_FAST_MATH there */
#include <wolfssl/wolfcrypt/settings.h>
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
#ifdef USE_FAST_MATH
+#include <wolfssl/wolfcrypt/random.h>
#include <wolfssl/wolfcrypt/tfm.h>
#include <wolfcrypt/src/asm.c> /* will define asm MACROS or C ones */
+#include <wolfssl/wolfcrypt/wolfmath.h> /* common functions */
+#if defined(FREESCALE_LTC_TFM)
+ #include <wolfssl/wolfcrypt/port/nxp/ksdk_port.h>
+#endif
+#ifdef WOLFSSL_DEBUG_MATH
+ #include <stdio.h>
+#endif
+#ifdef USE_WINDOWS_API
+ #pragma warning(disable:4127)
+ /* Disables the warning:
+ * 4127: conditional expression is constant
+ * in this file.
+ */
+#endif
+
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+#ifdef __cplusplus
+ extern "C" {
+#endif
+WOLFSSL_LOCAL int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod,
+ mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod,
+ mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod,
+ mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod,
+ mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod,
+ mp_int* res);
+#ifdef __cplusplus
+ } /* extern "C" */
+#endif
+#endif
+
+
+#ifndef WOLFSSL_SP_MATH
/* math settings check */
word32 CheckRunTimeSettings(void)
{
return CTC_SETTINGS;
}
-
+#endif
/* math settings size check */
word32 CheckRunTimeFastMath(void)
@@ -92,12 +136,12 @@ void fp_add(fp_int *a, fp_int *b, fp_int *c)
void s_fp_add(fp_int *a, fp_int *b, fp_int *c)
{
int x, y, oldused;
- register fp_word t;
+ fp_word t;
y = MAX(a->used, b->used);
oldused = MIN(c->used, FP_SIZE); /* help static analysis w/ largest size */
c->used = y;
-
+
t = 0;
for (x = 0; x < y; x++) {
t += ((fp_word)a->dp[x]) + ((fp_word)b->dp[x]);
@@ -110,6 +154,8 @@ void s_fp_add(fp_int *a, fp_int *b, fp_int *c)
}
c->used = x;
+
+ /* zero any excess digits on the destination that we didn't write to */
for (; x < oldused; x++) {
c->dp[x] = 0;
}
@@ -171,6 +217,8 @@ void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
c->dp[x] = (fp_digit)t;
t = (t >> DIGIT_BIT)&1;
}
+
+ /* zero any excess digits on the destination that we didn't write to */
for (; x < oldused; x++) {
c->dp[x] = 0;
}
@@ -178,149 +226,166 @@ void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
}
/* c = a * b */
-void fp_mul(fp_int *A, fp_int *B, fp_int *C)
+int fp_mul(fp_int *A, fp_int *B, fp_int *C)
{
- int y, yy;
+ int ret = 0;
+ int y, yy, oldused;
+
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
+ ret = esp_mp_mul(A, B, C);
+ if(ret != -2) return ret;
+#endif
+
+ oldused = C->used;
y = MAX(A->used, B->used);
yy = MIN(A->used, B->used);
/* call generic if we're out of range */
if (y + yy > FP_SIZE) {
- fp_mul_comba(A, B, C);
- return ;
+ ret = fp_mul_comba(A, B, C);
+ goto clean;
}
/* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size
- of the largest input. We also want to avoid doing excess mults if the
+ of the largest input. We also want to avoid doing excess mults if the
inputs are not close to the next power of two. That is, for example,
- if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications
+ if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications
*/
-#ifdef TFM_MUL3
+#if defined(TFM_MUL3) && FP_SIZE >= 6
if (y <= 3) {
- fp_mul_comba3(A,B,C);
- return;
+ ret = fp_mul_comba3(A,B,C);
+ goto clean;
}
#endif
-#ifdef TFM_MUL4
+#if defined(TFM_MUL4) && FP_SIZE >= 8
if (y == 4) {
- fp_mul_comba4(A,B,C);
- return;
+ ret = fp_mul_comba4(A,B,C);
+ goto clean;
}
#endif
-#ifdef TFM_MUL6
+#if defined(TFM_MUL6) && FP_SIZE >= 12
if (y <= 6) {
- fp_mul_comba6(A,B,C);
- return;
+ ret = fp_mul_comba6(A,B,C);
+ goto clean;
}
#endif
-#ifdef TFM_MUL7
+#if defined(TFM_MUL7) && FP_SIZE >= 14
if (y == 7) {
- fp_mul_comba7(A,B,C);
- return;
+ ret = fp_mul_comba7(A,B,C);
+ goto clean;
}
#endif
-#ifdef TFM_MUL8
+#if defined(TFM_MUL8) && FP_SIZE >= 16
if (y == 8) {
- fp_mul_comba8(A,B,C);
- return;
+ ret = fp_mul_comba8(A,B,C);
+ goto clean;
}
#endif
-#ifdef TFM_MUL9
+#if defined(TFM_MUL9) && FP_SIZE >= 18
if (y == 9) {
- fp_mul_comba9(A,B,C);
- return;
+ ret = fp_mul_comba9(A,B,C);
+ goto clean;
}
#endif
-#ifdef TFM_MUL12
+#if defined(TFM_MUL12) && FP_SIZE >= 24
if (y <= 12) {
- fp_mul_comba12(A,B,C);
- return;
+ ret = fp_mul_comba12(A,B,C);
+ goto clean;
}
#endif
-#ifdef TFM_MUL17
+#if defined(TFM_MUL17) && FP_SIZE >= 34
if (y <= 17) {
- fp_mul_comba17(A,B,C);
- return;
+ ret = fp_mul_comba17(A,B,C);
+ goto clean;
}
#endif
-#ifdef TFM_SMALL_SET
+#if defined(TFM_SMALL_SET) && FP_SIZE >= 32
if (y <= 16) {
- fp_mul_comba_small(A,B,C);
- return;
+ ret = fp_mul_comba_small(A,B,C);
+ goto clean;
}
-#endif
-#if defined(TFM_MUL20)
+#endif
+#if defined(TFM_MUL20) && FP_SIZE >= 40
if (y <= 20) {
- fp_mul_comba20(A,B,C);
- return;
+ ret = fp_mul_comba20(A,B,C);
+ goto clean;
}
#endif
-#if defined(TFM_MUL24)
+#if defined(TFM_MUL24) && FP_SIZE >= 48
if (yy >= 16 && y <= 24) {
- fp_mul_comba24(A,B,C);
- return;
+ ret = fp_mul_comba24(A,B,C);
+ goto clean;
}
#endif
-#if defined(TFM_MUL28)
+#if defined(TFM_MUL28) && FP_SIZE >= 56
if (yy >= 20 && y <= 28) {
- fp_mul_comba28(A,B,C);
- return;
+ ret = fp_mul_comba28(A,B,C);
+ goto clean;
}
#endif
-#if defined(TFM_MUL32)
+#if defined(TFM_MUL32) && FP_SIZE >= 64
if (yy >= 24 && y <= 32) {
- fp_mul_comba32(A,B,C);
- return;
+ ret = fp_mul_comba32(A,B,C);
+ goto clean;
}
#endif
-#if defined(TFM_MUL48)
+#if defined(TFM_MUL48) && FP_SIZE >= 96
if (yy >= 40 && y <= 48) {
- fp_mul_comba48(A,B,C);
- return;
+ ret = fp_mul_comba48(A,B,C);
+ goto clean;
}
-#endif
-#if defined(TFM_MUL64)
+#endif
+#if defined(TFM_MUL64) && FP_SIZE >= 128
if (yy >= 56 && y <= 64) {
- fp_mul_comba64(A,B,C);
- return;
+ ret = fp_mul_comba64(A,B,C);
+ goto clean;
}
#endif
- fp_mul_comba(A,B,C);
+ ret = fp_mul_comba(A,B,C);
+
+clean:
+ /* zero any excess digits on the destination that we didn't write to */
+ for (y = C->used; y >= 0 && y < oldused; y++) {
+ C->dp[y] = 0;
+ }
+
+ return ret;
}
void fp_mul_2(fp_int * a, fp_int * b)
{
int x, oldused;
-
+
oldused = b->used;
b->used = a->used;
{
- register fp_digit r, rr, *tmpa, *tmpb;
+ fp_digit r, rr, *tmpa, *tmpb;
/* alias for source */
tmpa = a->dp;
-
+
/* alias for dest */
tmpb = b->dp;
/* carry */
r = 0;
for (x = 0; x < a->used; x++) {
-
- /* get what will be the *next* carry bit from the
- * MSB of the current digit
+
+ /* get what will be the *next* carry bit from the
+ * MSB of the current digit
*/
rr = *tmpa >> ((fp_digit)(DIGIT_BIT - 1));
-
+
/* now shift up this digit, add in the carry [from the previous] */
*tmpb++ = ((*tmpa++ << ((fp_digit)1)) | r);
-
- /* copy the carry that would be from the source
- * digit into the next iteration
+
+ /* copy the carry that would be from the source
+ * digit into the next iteration
*/
r = rr;
}
@@ -332,9 +397,7 @@ void fp_mul_2(fp_int * a, fp_int * b)
++(b->used);
}
- /* now zero any excess digits on the destination
- * that we didn't write to
- */
+ /* zero any excess digits on the destination that we didn't write to */
tmpb = b->dp + b->used;
for (x = b->used; x < oldused; x++) {
*tmpb++ = 0;
@@ -362,7 +425,10 @@ void fp_mul_d(fp_int *a, fp_digit b, fp_int *c)
c->dp[c->used++] = (fp_digit) w;
++x;
}
- for (; x < oldused; x++) {
+
+ /* zero any excess digits on the destination that we didn't write to */
+ /* also checking FP_SIZE here for static analysis */
+ for (; x < oldused && x < FP_SIZE; x++) {
c->dp[x] = 0;
}
fp_clamp(c);
@@ -385,7 +451,7 @@ void fp_mul_2d(fp_int *a, int b, fp_int *c)
/* shift the digits */
if (b != 0) {
- carry = 0;
+ carry = 0;
shift = DIGIT_BIT - b;
for (x = 0; x < c->used; x++) {
carrytmp = c->dp[x] >> shift;
@@ -403,24 +469,37 @@ void fp_mul_2d(fp_int *a, int b, fp_int *c)
/* generic PxQ multiplier */
#if defined(HAVE_INTEL_MULX)
-INLINE static void fp_mul_comba_mulx(fp_int *A, fp_int *B, fp_int *C)
+WC_INLINE static int fp_mul_comba_mulx(fp_int *A, fp_int *B, fp_int *C)
-{
+{
int ix, iy, iz, pa;
- fp_int tmp, *dst;
+ fp_int *dst;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int tmp[1];
+#else
+ fp_int *tmp;
+#endif
+
+ /* Variables used but not seen by cppcheck. */
+ (void)ix; (void)iy; (void)iz;
+
+#ifdef WOLFSSL_SMALL_STACK
+ tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+ if (tmp == NULL)
+ return FP_MEM;
+#endif
/* get size of output and trim */
pa = A->used + B->used;
if (pa >= FP_SIZE) {
pa = FP_SIZE-1;
}
-
- if (A == C || B == C) {
- fp_init(&tmp);
- dst = &tmp;
- } else {
- fp_zero(C);
- dst = C;
+
+ /* Always take branch to use tmp variable. This avoids a cache attack for
+ * determining if C equals A */
+ if (1) {
+ fp_init(tmp);
+ dst = tmp;
}
TFM_INTEL_MUL_COMBA(A, B, dst) ;
@@ -428,45 +507,62 @@ INLINE static void fp_mul_comba_mulx(fp_int *A, fp_int *B, fp_int *C)
dst->used = pa;
dst->sign = A->sign ^ B->sign;
fp_clamp(dst);
- fp_copy(dst, C);
+ fp_copy(dst, C);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+
+ return FP_OKAY;
}
#endif
-void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
+int fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
{
+ int ret = 0;
int ix, iy, iz, tx, ty, pa;
fp_digit c0, c1, c2, *tmpx, *tmpy;
- fp_int tmp, *dst;
+ fp_int *dst;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int tmp[1];
+#else
+ fp_int *tmp;
+#endif
+
+ IF_HAVE_INTEL_MULX(ret = fp_mul_comba_mulx(A, B, C), return ret) ;
- IF_HAVE_INTEL_MULX(fp_mul_comba_mulx(A, B, C), return) ;
+#ifdef WOLFSSL_SMALL_STACK
+ tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+ if (tmp == NULL)
+ return FP_MEM;
+#endif
COMBA_START;
COMBA_CLEAR;
-
+
/* get size of output and trim */
pa = A->used + B->used;
if (pa >= FP_SIZE) {
pa = FP_SIZE-1;
}
- if (A == C || B == C) {
- fp_init(&tmp);
- dst = &tmp;
- } else {
- fp_zero(C);
- dst = C;
+ /* Always take branch to use tmp variable. This avoids a cache attack for
+ * determining if C equals A */
+ if (1) {
+ fp_init(tmp);
+ dst = tmp;
}
for (ix = 0; ix < pa; ix++) {
/* get offsets into the two bignums */
- ty = MIN(ix, B->used-1);
+ ty = MIN(ix, (B->used > 0 ? B->used - 1 : 0));
tx = ix - ty;
/* setup temp aliases */
tmpx = A->dp + tx;
tmpy = B->dp + ty;
- /* this is the number of times the loop will iterrate, essentially its
+ /* this is the number of times the loop will iterate, essentially its
while (tx++ < a->used && ty-- >= 0) { ... }
*/
iy = MIN(A->used-tx, ty+1);
@@ -474,8 +570,9 @@ void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
/* execute loop */
COMBA_FORWARD;
for (iz = 0; iz < iy; ++iz) {
- /* TAO change COMBA_ADD back to MULADD */
- MULADD(*tmpx++, *tmpy--);
+ fp_digit _tmpx = *tmpx++;
+ fp_digit _tmpy = *tmpy--;
+ MULADD(_tmpx, _tmpy);
}
/* store term */
@@ -487,16 +584,28 @@ void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
dst->sign = A->sign ^ B->sign;
fp_clamp(dst);
fp_copy(dst, C);
+
+ /* Variables used but not seen by cppcheck. */
+ (void)c0; (void)c1; (void)c2;
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return ret;
}
/* a/b => cb + d == a */
int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
{
- fp_int q, x, y, t1, t2;
int n, t, i, norm, neg;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int q[1], x[1], y[1], t1[1], t2[1];
+#else
+ fp_int *q, *x, *y, *t1, *t2;
+#endif
/* is divisor zero ? */
- if (fp_iszero (b) == 1) {
+ if (fp_iszero (b) == FP_YES) {
return FP_VAL;
}
@@ -504,131 +613,140 @@ int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
if (fp_cmp_mag (a, b) == FP_LT) {
if (d != NULL) {
fp_copy (a, d);
- }
+ }
if (c != NULL) {
fp_zero (c);
}
return FP_OKAY;
}
- fp_init(&q);
- q.used = a->used + 2;
+#ifdef WOLFSSL_SMALL_STACK
+ q = (fp_int*)XMALLOC(sizeof(fp_int) * 5, NULL, DYNAMIC_TYPE_BIGINT);
+ if (q == NULL) {
+ return FP_MEM;
+ }
+ x = &q[1]; y = &q[2]; t1 = &q[3]; t2 = &q[4];
+#endif
+
+ fp_init(q);
+ q->used = a->used + 2;
- fp_init(&t1);
- fp_init(&t2);
- fp_init_copy(&x, a);
- fp_init_copy(&y, b);
+ fp_init(t1);
+ fp_init(t2);
+ fp_init_copy(x, a);
+ fp_init_copy(y, b);
/* fix the sign */
neg = (a->sign == b->sign) ? FP_ZPOS : FP_NEG;
- x.sign = y.sign = FP_ZPOS;
+ x->sign = y->sign = FP_ZPOS;
/* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
- norm = fp_count_bits(&y) % DIGIT_BIT;
+ norm = fp_count_bits(y) % DIGIT_BIT;
if (norm < (int)(DIGIT_BIT-1)) {
norm = (DIGIT_BIT-1) - norm;
- fp_mul_2d (&x, norm, &x);
- fp_mul_2d (&y, norm, &y);
+ fp_mul_2d (x, norm, x);
+ fp_mul_2d (y, norm, y);
} else {
norm = 0;
}
/* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
- n = x.used - 1;
- t = y.used - 1;
+ n = x->used - 1;
+ t = y->used - 1;
/* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
- fp_lshd (&y, n - t); /* y = y*b**{n-t} */
+ fp_lshd (y, n - t); /* y = y*b**{n-t} */
- while (fp_cmp (&x, &y) != FP_LT) {
- ++(q.dp[n - t]);
- fp_sub (&x, &y, &x);
+ while (fp_cmp (x, y) != FP_LT) {
+ ++(q->dp[n - t]);
+ fp_sub (x, y, x);
}
/* reset y by shifting it back down */
- fp_rshd (&y, n - t);
+ fp_rshd (y, n - t);
/* step 3. for i from n down to (t + 1) */
for (i = n; i >= (t + 1); i--) {
- if (i > x.used) {
+ if (i > x->used) {
continue;
}
- /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
+ /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
* otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
- if (x.dp[i] == y.dp[t]) {
- q.dp[i - t - 1] = (fp_digit) ((((fp_word)1) << DIGIT_BIT) - 1);
+ if (x->dp[i] == y->dp[t]) {
+ q->dp[i - t - 1] = (fp_digit) ((((fp_word)1) << DIGIT_BIT) - 1);
} else {
fp_word tmp;
- tmp = ((fp_word) x.dp[i]) << ((fp_word) DIGIT_BIT);
- tmp |= ((fp_word) x.dp[i - 1]);
- tmp /= ((fp_word)y.dp[t]);
- q.dp[i - t - 1] = (fp_digit) (tmp);
+ tmp = ((fp_word) x->dp[i]) << ((fp_word) DIGIT_BIT);
+ tmp |= ((fp_word) x->dp[i - 1]);
+ tmp /= ((fp_word)y->dp[t]);
+ q->dp[i - t - 1] = (fp_digit) (tmp);
}
- /* while (q{i-t-1} * (yt * b + y{t-1})) >
- xi * b**2 + xi-1 * b + xi-2
-
- do q{i-t-1} -= 1;
+ /* while (q{i-t-1} * (yt * b + y{t-1})) >
+ xi * b**2 + xi-1 * b + xi-2
+
+ do q{i-t-1} -= 1;
*/
- q.dp[i - t - 1] = (q.dp[i - t - 1] + 1);
+ q->dp[i - t - 1] = (q->dp[i - t - 1] + 1);
do {
- q.dp[i - t - 1] = (q.dp[i - t - 1] - 1);
+ q->dp[i - t - 1] = (q->dp[i - t - 1] - 1);
/* find left hand */
- fp_zero (&t1);
- t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
- t1.dp[1] = y.dp[t];
- t1.used = 2;
- fp_mul_d (&t1, q.dp[i - t - 1], &t1);
+ fp_zero (t1);
+ t1->dp[0] = (t - 1 < 0) ? 0 : y->dp[t - 1];
+ t1->dp[1] = y->dp[t];
+ t1->used = 2;
+ fp_mul_d (t1, q->dp[i - t - 1], t1);
/* find right hand */
- t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
- t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
- t2.dp[2] = x.dp[i];
- t2.used = 3;
- } while (fp_cmp_mag(&t1, &t2) == FP_GT);
+ t2->dp[0] = (i - 2 < 0) ? 0 : x->dp[i - 2];
+ t2->dp[1] = (i - 1 < 0) ? 0 : x->dp[i - 1];
+ t2->dp[2] = x->dp[i];
+ t2->used = 3;
+ } while (fp_cmp_mag(t1, t2) == FP_GT);
/* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
- fp_mul_d (&y, q.dp[i - t - 1], &t1);
- fp_lshd (&t1, i - t - 1);
- fp_sub (&x, &t1, &x);
+ fp_mul_d (y, q->dp[i - t - 1], t1);
+ fp_lshd (t1, i - t - 1);
+ fp_sub (x, t1, x);
/* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
- if (x.sign == FP_NEG) {
- fp_copy (&y, &t1);
- fp_lshd (&t1, i - t - 1);
- fp_add (&x, &t1, &x);
- q.dp[i - t - 1] = q.dp[i - t - 1] - 1;
+ if (x->sign == FP_NEG) {
+ fp_copy (y, t1);
+ fp_lshd (t1, i - t - 1);
+ fp_add (x, t1, x);
+ q->dp[i - t - 1] = q->dp[i - t - 1] - 1;
}
}
- /* now q is the quotient and x is the remainder
- * [which we have to normalize]
+ /* now q is the quotient and x is the remainder
+ * [which we have to normalize]
*/
-
+
/* get sign before writing to c */
- x.sign = x.used == 0 ? FP_ZPOS : a->sign;
+ x->sign = x->used == 0 ? FP_ZPOS : a->sign;
if (c != NULL) {
- fp_clamp (&q);
- fp_copy (&q, c);
+ fp_clamp (q);
+ fp_copy (q, c);
c->sign = neg;
}
if (d != NULL) {
- fp_div_2d (&x, norm, &x, NULL);
+ fp_div_2d (x, norm, x, NULL);
-/* the following is a kludge, essentially we were seeing the right remainder but
- with excess digits that should have been zero
- */
- for (i = b->used; i < x.used; i++) {
- x.dp[i] = 0;
+ /* zero any excess digits on the destination that we didn't write to */
+ for (i = b->used; i < x->used; i++) {
+ x->dp[i] = 0;
}
- fp_clamp(&x);
- fp_copy (&x, d);
+ fp_clamp(x);
+ fp_copy (x, d);
}
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
return FP_OKAY;
}
@@ -640,7 +758,7 @@ void fp_div_2(fp_int * a, fp_int * b)
oldused = b->used;
b->used = a->used;
{
- register fp_digit r, rr, *tmpa, *tmpb;
+ fp_digit r, rr, *tmpa, *tmpb;
/* source alias */
tmpa = a->dp + b->used - 1;
@@ -661,7 +779,7 @@ void fp_div_2(fp_int * a, fp_int * b)
r = rr;
}
- /* zero excess digits */
+ /* zero any excess digits on the destination that we didn't write to */
tmpb = b->dp + b->used;
for (x = b->used; x < oldused; x++) {
*tmpb++ = 0;
@@ -675,7 +793,6 @@ void fp_div_2(fp_int * a, fp_int * b)
void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d)
{
int D;
- fp_int t;
/* if the shift count is <= 0 then we do no work */
if (b <= 0) {
@@ -686,11 +803,9 @@ void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d)
return;
}
- fp_init(&t);
-
- /* get the remainder */
- if (d != NULL) {
- fp_mod_2d (a, b, &t);
+ /* get the remainder before a is changed in calculating c */
+ if (a == c && d != NULL) {
+ fp_mod_2d (a, b, d);
}
/* copy */
@@ -706,28 +821,45 @@ void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d)
if (D != 0) {
fp_rshb(c, D);
}
- fp_clamp (c);
- if (d != NULL) {
- fp_copy (&t, d);
+
+ /* get the remainder if a is not changed in calculating c */
+ if (a != c && d != NULL) {
+ fp_mod_2d (a, b, d);
}
+
+ fp_clamp (c);
}
/* c = a mod b, 0 <= c < b */
int fp_mod(fp_int *a, fp_int *b, fp_int *c)
{
- fp_int t;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int t[1];
+#else
+ fp_int *t;
+#endif
int err;
- fp_init(&t);
- if ((err = fp_div(a, b, NULL, &t)) != FP_OKAY) {
- return err;
- }
- if (t.sign != b->sign) {
- fp_add(&t, b, c);
- } else {
- fp_copy(&t, c);
+#ifdef WOLFSSL_SMALL_STACK
+ t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+ if (t == NULL)
+ return FP_MEM;
+#endif
+
+ fp_init(t);
+ err = fp_div(a, b, NULL, t);
+ if (err == FP_OKAY) {
+ if (t->sign != b->sign) {
+ fp_add(t, b, c);
+ } else {
+ fp_copy(t, c);
+ }
}
- return FP_OKAY;
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return err;
}
/* c = a mod 2**d */
@@ -743,7 +875,7 @@ void fp_mod_2d(fp_int *a, int b, fp_int *c)
/* get copy of input */
fp_copy(a, c);
-
+
/* if 2**d is larger than we just return */
if (b >= (DIGIT_BIT * a->used)) {
return;
@@ -760,218 +892,690 @@ void fp_mod_2d(fp_int *a, int b, fp_int *c)
static int fp_invmod_slow (fp_int * a, fp_int * b, fp_int * c)
{
- fp_int x, y, u, v, A, B, C, D;
- int res;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int x[1], y[1], u[1], v[1], A[1], B[1], C[1], D[1];
+#else
+ fp_int *x, *y, *u, *v, *A, *B, *C, *D;
+#endif
+ int err;
/* b cannot be negative */
- if (b->sign == FP_NEG || fp_iszero(b) == 1) {
+ if (b->sign == FP_NEG || fp_iszero(b) == FP_YES) {
return FP_VAL;
}
+ if (fp_iszero(a) == FP_YES) {
+ return FP_VAL;
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ x = (fp_int*)XMALLOC(sizeof(fp_int) * 8, NULL, DYNAMIC_TYPE_BIGINT);
+ if (x == NULL) {
+ return FP_MEM;
+ }
+ y = &x[1]; u = &x[2]; v = &x[3]; A = &x[4]; B = &x[5]; C = &x[6]; D = &x[7];
+#endif
/* init temps */
- fp_init(&x); fp_init(&y);
- fp_init(&u); fp_init(&v);
- fp_init(&A); fp_init(&B);
- fp_init(&C); fp_init(&D);
+ fp_init(x); fp_init(y);
+ fp_init(u); fp_init(v);
+ fp_init(A); fp_init(B);
+ fp_init(C); fp_init(D);
/* x = a, y = b */
- if ((res = fp_mod(a, b, &x)) != FP_OKAY) {
- return res;
+ if ((err = fp_mod(a, b, x)) != FP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
+ #endif
+ return err;
}
- fp_copy(b, &y);
+ fp_copy(b, y);
/* 2. [modified] if x,y are both even then return an error! */
- if (fp_iseven (&x) == 1 && fp_iseven (&y) == 1) {
+ if (fp_iseven(x) == FP_YES && fp_iseven(y) == FP_YES) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
+ #endif
return FP_VAL;
}
/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
- fp_copy (&x, &u);
- fp_copy (&y, &v);
- fp_set (&A, 1);
- fp_set (&D, 1);
+ fp_copy (x, u);
+ fp_copy (y, v);
+ fp_set (A, 1);
+ fp_set (D, 1);
top:
/* 4. while u is even do */
- while (fp_iseven (&u) == 1) {
+ while (fp_iseven (u) == FP_YES) {
/* 4.1 u = u/2 */
- fp_div_2 (&u, &u);
+ fp_div_2 (u, u);
/* 4.2 if A or B is odd then */
- if (fp_isodd (&A) == 1 || fp_isodd (&B) == 1) {
+ if (fp_isodd (A) == FP_YES || fp_isodd (B) == FP_YES) {
/* A = (A+y)/2, B = (B-x)/2 */
- fp_add (&A, &y, &A);
- fp_sub (&B, &x, &B);
+ fp_add (A, y, A);
+ fp_sub (B, x, B);
}
/* A = A/2, B = B/2 */
- fp_div_2 (&A, &A);
- fp_div_2 (&B, &B);
+ fp_div_2 (A, A);
+ fp_div_2 (B, B);
}
/* 5. while v is even do */
- while (fp_iseven (&v) == 1) {
+ while (fp_iseven (v) == FP_YES) {
/* 5.1 v = v/2 */
- fp_div_2 (&v, &v);
+ fp_div_2 (v, v);
/* 5.2 if C or D is odd then */
- if (fp_isodd (&C) == 1 || fp_isodd (&D) == 1) {
+ if (fp_isodd (C) == FP_YES || fp_isodd (D) == FP_YES) {
/* C = (C+y)/2, D = (D-x)/2 */
- fp_add (&C, &y, &C);
- fp_sub (&D, &x, &D);
+ fp_add (C, y, C);
+ fp_sub (D, x, D);
}
/* C = C/2, D = D/2 */
- fp_div_2 (&C, &C);
- fp_div_2 (&D, &D);
+ fp_div_2 (C, C);
+ fp_div_2 (D, D);
}
/* 6. if u >= v then */
- if (fp_cmp (&u, &v) != FP_LT) {
+ if (fp_cmp (u, v) != FP_LT) {
/* u = u - v, A = A - C, B = B - D */
- fp_sub (&u, &v, &u);
- fp_sub (&A, &C, &A);
- fp_sub (&B, &D, &B);
+ fp_sub (u, v, u);
+ fp_sub (A, C, A);
+ fp_sub (B, D, B);
} else {
/* v - v - u, C = C - A, D = D - B */
- fp_sub (&v, &u, &v);
- fp_sub (&C, &A, &C);
- fp_sub (&D, &B, &D);
+ fp_sub (v, u, v);
+ fp_sub (C, A, C);
+ fp_sub (D, B, D);
}
/* if not zero goto step 4 */
- if (fp_iszero (&u) == 0)
+ if (fp_iszero (u) == FP_NO)
goto top;
/* now a = C, b = D, gcd == g*v */
/* if v != 1 then there is no inverse */
- if (fp_cmp_d (&v, 1) != FP_EQ) {
+ if (fp_cmp_d (v, 1) != FP_EQ) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
+ #endif
return FP_VAL;
}
/* if its too low */
- while (fp_cmp_d(&C, 0) == FP_LT) {
- fp_add(&C, b, &C);
+ while (fp_cmp_d(C, 0) == FP_LT) {
+ fp_add(C, b, C);
}
-
+
/* too big */
- while (fp_cmp_mag(&C, b) != FP_LT) {
- fp_sub(&C, b, &C);
+ while (fp_cmp_mag(C, b) != FP_LT) {
+ fp_sub(C, b, C);
}
-
+
/* C is now the inverse */
- fp_copy(&C, c);
+ fp_copy(C, c);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
return FP_OKAY;
}
-
/* c = 1/a (mod b) for odd b only */
int fp_invmod(fp_int *a, fp_int *b, fp_int *c)
{
- fp_int x, y, u, v, B, D;
- int neg, loop_check = 0;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int x[1], y[1], u[1], v[1], B[1], D[1];
+#else
+ fp_int *x, *y, *u, *v, *B, *D;
+#endif
+ int neg;
+ int err;
+
+ if (b->sign == FP_NEG || fp_iszero(b) == FP_YES) {
+ return FP_VAL;
+ }
+
+ /* [modified] sanity check on "a" */
+ if (fp_iszero(a) == FP_YES) {
+ return FP_VAL; /* can not divide by 0 here */
+ }
/* 2. [modified] b must be odd */
- if (fp_iseven (b) == FP_YES) {
+ if (fp_iseven(b) == FP_YES) {
return fp_invmod_slow(a,b,c);
}
+#ifdef WOLFSSL_SMALL_STACK
+ x = (fp_int*)XMALLOC(sizeof(fp_int) * 6, NULL, DYNAMIC_TYPE_BIGINT);
+ if (x == NULL) {
+ return FP_MEM;
+ }
+ y = &x[1]; u = &x[2]; v = &x[3]; B = &x[4]; D = &x[5];
+#endif
+
/* init all our temps */
- fp_init(&x); fp_init(&y);
- fp_init(&u); fp_init(&v);
- fp_init(&B); fp_init(&D);
+ fp_init(x); fp_init(y);
+ fp_init(u); fp_init(v);
+ fp_init(B); fp_init(D);
+
+ if (fp_cmp(a, b) != MP_LT) {
+ err = mp_mod(a, b, y);
+ if (err != FP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
+ #endif
+ return err;
+ }
+ a = y;
+ }
+
+ if (fp_iszero(a) == FP_YES) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
+ #endif
+ return FP_VAL;
+ }
/* x == modulus, y == value to invert */
- fp_copy(b, &x);
+ fp_copy(b, x);
/* we need y = |a| */
- fp_abs(a, &y);
+ fp_abs(a, y);
/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
- fp_copy(&x, &u);
- fp_copy(&y, &v);
- fp_set (&D, 1);
+ fp_copy(x, u);
+ fp_copy(y, v);
+ fp_set (D, 1);
top:
/* 4. while u is even do */
- while (fp_iseven (&u) == FP_YES) {
+ while (fp_iseven (u) == FP_YES) {
/* 4.1 u = u/2 */
- fp_div_2 (&u, &u);
+ fp_div_2 (u, u);
/* 4.2 if B is odd then */
- if (fp_isodd (&B) == FP_YES) {
- fp_sub (&B, &x, &B);
+ if (fp_isodd (B) == FP_YES) {
+ fp_sub (B, x, B);
}
/* B = B/2 */
- fp_div_2 (&B, &B);
+ fp_div_2 (B, B);
}
/* 5. while v is even do */
- while (fp_iseven (&v) == FP_YES) {
+ while (fp_iseven (v) == FP_YES) {
/* 5.1 v = v/2 */
- fp_div_2 (&v, &v);
+ fp_div_2 (v, v);
/* 5.2 if D is odd then */
- if (fp_isodd (&D) == FP_YES) {
+ if (fp_isodd (D) == FP_YES) {
/* D = (D-x)/2 */
- fp_sub (&D, &x, &D);
+ fp_sub (D, x, D);
}
/* D = D/2 */
- fp_div_2 (&D, &D);
+ fp_div_2 (D, D);
}
/* 6. if u >= v then */
- if (fp_cmp (&u, &v) != FP_LT) {
+ if (fp_cmp (u, v) != FP_LT) {
/* u = u - v, B = B - D */
- fp_sub (&u, &v, &u);
- fp_sub (&B, &D, &B);
+ fp_sub (u, v, u);
+ fp_sub (B, D, B);
} else {
/* v - v - u, D = D - B */
- fp_sub (&v, &u, &v);
- fp_sub (&D, &B, &D);
+ fp_sub (v, u, v);
+ fp_sub (D, B, D);
}
/* if not zero goto step 4 */
- if (fp_iszero (&u) == FP_NO) {
- if (++loop_check > 1024) /* bad input */
- return FP_VAL;
+ if (fp_iszero (u) == FP_NO) {
goto top;
}
/* now a = C, b = D, gcd == g*v */
/* if v != 1 then there is no inverse */
- if (fp_cmp_d (&v, 1) != FP_EQ) {
+ if (fp_cmp_d (v, 1) != FP_EQ) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
+ #endif
return FP_VAL;
}
/* b is now the inverse */
neg = a->sign;
- while (D.sign == FP_NEG) {
- fp_add (&D, b, &D);
+ while (D->sign == FP_NEG) {
+ fp_add (D, b, D);
+ }
+ /* too big */
+ while (fp_cmp_mag(D, b) != FP_LT) {
+ fp_sub(D, b, D);
}
- fp_copy (&D, c);
+ fp_copy (D, c);
c->sign = neg;
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return FP_OKAY;
+}
+
+#define CT_INV_MOD_PRE_CNT 8
+
+/* modulus (b) must be greater than 2 and a prime */
+int fp_invmod_mont_ct(fp_int *a, fp_int *b, fp_int *c, fp_digit mp)
+{
+ int i, j;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int t[1], e[1];
+ fp_int pre[CT_INV_MOD_PRE_CNT];
+#else
+ fp_int* t;
+ fp_int* e;
+ fp_int* pre;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ t = (fp_int*)XMALLOC(sizeof(fp_int) * (2 + CT_INV_MOD_PRE_CNT), NULL,
+ DYNAMIC_TYPE_BIGINT);
+ if (t == NULL)
+ return FP_MEM;
+ e = t + 1;
+ pre = t + 2;
+#endif
+
+ fp_init(t);
+ fp_init(e);
+
+ fp_init(&pre[0]);
+ fp_copy(a, &pre[0]);
+ for (i = 1; i < CT_INV_MOD_PRE_CNT; i++) {
+ fp_init(&pre[i]);
+ fp_sqr(&pre[i-1], &pre[i]);
+ fp_montgomery_reduce(&pre[i], b, mp);
+ fp_mul(&pre[i], a, &pre[i]);
+ fp_montgomery_reduce(&pre[i], b, mp);
+ }
+
+ fp_sub_d(b, 2, e);
+ /* Highest bit is always set. */
+ for (i = fp_count_bits(e)-2, j = 1; i >= 0; i--, j++) {
+ if (!fp_is_bit_set(e, i) || j == CT_INV_MOD_PRE_CNT)
+ break;
+ }
+ fp_copy(&pre[j-1], t);
+ for (j = 0; i >= 0; i--) {
+ int set = fp_is_bit_set(e, i);
+
+ if ((j == CT_INV_MOD_PRE_CNT) || (!set && j > 0)) {
+ fp_mul(t, &pre[j-1], t);
+ fp_montgomery_reduce(t, b, mp);
+ j = 0;
+ }
+ fp_sqr(t, t);
+ fp_montgomery_reduce(t, b, mp);
+ j += set;
+ }
+ if (j > 0) {
+ fp_mul(t, &pre[j-1], c);
+ fp_montgomery_reduce(c, b, mp);
+ }
+ else
+ fp_copy(t, c);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
return FP_OKAY;
}
/* d = a * b (mod c) */
int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
{
- fp_int tmp;
- fp_init(&tmp);
- fp_mul(a, b, &tmp);
- return fp_mod(&tmp, c, d);
+ int err;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int t[1];
+#else
+ fp_int *t;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+ if (t == NULL)
+ return FP_MEM;
+#endif
+
+ fp_init(t);
+ err = fp_mul(a, b, t);
+ if (err == FP_OKAY) {
+ #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
+ if (d->size < FP_SIZE) {
+ err = fp_mod(t, c, t);
+ fp_copy(t, d);
+ } else
+ #endif
+ {
+ err = fp_mod(t, c, d);
+ }
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return err;
+}
+
+/* d = a - b (mod c) */
+int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
+{
+ int err;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int t[1];
+#else
+ fp_int *t;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+ if (t == NULL)
+ return FP_MEM;
+#endif
+
+ fp_init(t);
+ fp_sub(a, b, t);
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
+ if (d->size < FP_SIZE) {
+ err = fp_mod(t, c, t);
+ fp_copy(t, d);
+ } else
+#endif
+ {
+ err = fp_mod(t, c, d);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return err;
+}
+
+/* d = a + b (mod c) */
+int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
+{
+ int err;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int t[1];
+#else
+ fp_int *t;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+ if (t == NULL)
+ return FP_MEM;
+#endif
+
+ fp_init(t);
+ fp_add(a, b, t);
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
+ if (d->size < FP_SIZE) {
+ err = fp_mod(t, c, t);
+ fp_copy(t, d);
+ } else
+#endif
+ {
+ err = fp_mod(t, c, d);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return err;
}
#ifdef TFM_TIMING_RESISTANT
-/* timing resistant montgomery ladder based exptmod
+#ifdef WC_RSA_NONBLOCK
+
+#ifdef WC_RSA_NONBLOCK_TIME
+ /* User can override the check-time at build-time using the
+ * FP_EXPTMOD_NB_CHECKTIME macro to define your own function */
+ #ifndef FP_EXPTMOD_NB_CHECKTIME
+ /* instruction count for each type of operation */
+ /* array lookup is using TFM_EXPTMOD_NB_* states */
+ static const word32 exptModNbInst[TFM_EXPTMOD_NB_COUNT] = {
+ #ifdef TFM_PPC32
+ #ifdef _DEBUG
+ 11098, 8701, 3971, 178394, 858093, 1040, 822, 178056, 181574, 90883, 184339, 236813
+ #else
+ 7050, 2554, 3187, 43178, 200422, 384, 275, 43024, 43550, 30450, 46270, 61376
+ #endif
+ #elif defined(TFM_X86_64)
+ #ifdef _DEBUG
+ 954, 2377, 858, 19027, 90840, 287, 407, 20140, 7874, 11385, 8005, 6151
+ #else
+ 765, 1007, 771, 5216, 34993, 248, 193, 4975, 4201, 3947, 4275, 3811
+ #endif
+ #else /* software only fast math */
+ #ifdef _DEBUG
+ 798, 2245, 802, 16657, 66920, 352, 186, 16997, 16145, 12789, 16742, 15006
+ #else
+ 775, 1084, 783, 4692, 37510, 207, 183, 4374, 4392, 3097, 4442, 4079
+ #endif
+ #endif
+ };
+
+ static int fp_exptmod_nb_checktime(exptModNb_t* nb)
+ {
+ word32 totalInst;
+
+ /* if no max time has been set then stop (do not block) */
+ if (nb->maxBlockInst == 0 || nb->state >= TFM_EXPTMOD_NB_COUNT) {
+ return TFM_EXPTMOD_NB_STOP;
+ }
+
+ /* if instruction table not set then use maxBlockInst as simple counter */
+ if (exptModNbInst[nb->state] == 0) {
+ if (++nb->totalInst < nb->maxBlockInst)
+ return TFM_EXPTMOD_NB_CONTINUE;
+
+ nb->totalInst = 0; /* reset counter */
+ return TFM_EXPTMOD_NB_STOP;
+ }
+
+ /* get total instruction count including next operation */
+ totalInst = nb->totalInst + exptModNbInst[nb->state];
+ /* if the next operation can completed within the maximum then continue */
+ if (totalInst <= nb->maxBlockInst) {
+ return TFM_EXPTMOD_NB_CONTINUE;
+ }
+
+ return TFM_EXPTMOD_NB_STOP;
+ }
+ #define FP_EXPTMOD_NB_CHECKTIME(nb) fp_exptmod_nb_checktime((nb))
+ #endif /* !FP_EXPTMOD_NB_CHECKTIME */
+#endif /* WC_RSA_NONBLOCK_TIME */
+
+/* non-blocking version of timing resistant fp_exptmod function */
+/* supports cache resistance */
+int fp_exptmod_nb(exptModNb_t* nb, fp_int* G, fp_int* X, fp_int* P, fp_int* Y)
+{
+ int err, ret = FP_WOULDBLOCK;
+
+ if (nb == NULL)
+ return FP_VAL;
+
+#ifdef WC_RSA_NONBLOCK_TIME
+ nb->totalInst = 0;
+ do {
+ nb->totalInst += exptModNbInst[nb->state];
+#endif
+
+ switch (nb->state) {
+ case TFM_EXPTMOD_NB_INIT:
+ /* now setup montgomery */
+ if ((err = fp_montgomery_setup(P, &nb->mp)) != FP_OKAY) {
+ nb->state = TFM_EXPTMOD_NB_INIT;
+ return err;
+ }
+
+ /* init ints */
+ fp_init(&nb->R[0]);
+ fp_init(&nb->R[1]);
+ #ifndef WC_NO_CACHE_RESISTANT
+ fp_init(&nb->R[2]);
+ #endif
+ nb->state = TFM_EXPTMOD_NB_MONT;
+ break;
+
+ case TFM_EXPTMOD_NB_MONT:
+ /* mod m -> R[0] */
+ fp_montgomery_calc_normalization(&nb->R[0], P);
+
+ nb->state = TFM_EXPTMOD_NB_MONT_RED;
+ break;
+
+ case TFM_EXPTMOD_NB_MONT_RED:
+ /* reduce G -> R[1] */
+ if (fp_cmp_mag(P, G) != FP_GT) {
+ /* G > P so we reduce it first */
+ fp_mod(G, P, &nb->R[1]);
+ } else {
+ fp_copy(G, &nb->R[1]);
+ }
+
+ nb->state = TFM_EXPTMOD_NB_MONT_MUL;
+ break;
+
+ case TFM_EXPTMOD_NB_MONT_MUL:
+ /* G (R[1]) * m (R[0]) */
+ err = fp_mul(&nb->R[1], &nb->R[0], &nb->R[1]);
+ if (err != FP_OKAY) {
+ nb->state = TFM_EXPTMOD_NB_INIT;
+ return err;
+ }
+
+ nb->state = TFM_EXPTMOD_NB_MONT_MOD;
+ break;
- Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder", Cryptographic Hardware and Embedded Systems, CHES 2002
+ case TFM_EXPTMOD_NB_MONT_MOD:
+ /* mod m */
+ err = fp_div(&nb->R[1], P, NULL, &nb->R[1]);
+ if (err != FP_OKAY) {
+ nb->state = TFM_EXPTMOD_NB_INIT;
+ return err;
+ }
+
+ nb->state = TFM_EXPTMOD_NB_MONT_MODCHK;
+ break;
+
+ case TFM_EXPTMOD_NB_MONT_MODCHK:
+ /* m matches sign of (G * R mod m) */
+ if (nb->R[1].sign != P->sign) {
+ fp_add(&nb->R[1], P, &nb->R[1]);
+ }
+
+ /* set initial mode and bit cnt */
+ nb->bitcnt = 1;
+ nb->buf = 0;
+ nb->digidx = X->used - 1;
+
+ nb->state = TFM_EXPTMOD_NB_NEXT;
+ break;
+
+ case TFM_EXPTMOD_NB_NEXT:
+ /* grab next digit as required */
+ if (--nb->bitcnt == 0) {
+ /* if nb->digidx == -1 we are out of digits so break */
+ if (nb->digidx == -1) {
+ nb->state = TFM_EXPTMOD_NB_RED;
+ break;
+ }
+ /* read next digit and reset nb->bitcnt */
+ nb->buf = X->dp[nb->digidx--];
+ nb->bitcnt = (int)DIGIT_BIT;
+ }
+
+ /* grab the next msb from the exponent */
+ nb->y = (int)(nb->buf >> (DIGIT_BIT - 1)) & 1;
+ nb->buf <<= (fp_digit)1;
+ nb->state = TFM_EXPTMOD_NB_MUL;
+ FALL_THROUGH;
+
+ case TFM_EXPTMOD_NB_MUL:
+ fp_mul(&nb->R[0], &nb->R[1], &nb->R[nb->y^1]);
+ nb->state = TFM_EXPTMOD_NB_MUL_RED;
+ break;
+
+ case TFM_EXPTMOD_NB_MUL_RED:
+ fp_montgomery_reduce(&nb->R[nb->y^1], P, nb->mp);
+ nb->state = TFM_EXPTMOD_NB_SQR;
+ break;
+
+ case TFM_EXPTMOD_NB_SQR:
+ #ifdef WC_NO_CACHE_RESISTANT
+ fp_sqr(&nb->R[nb->y], &nb->R[nb->y]);
+ #else
+ fp_copy((fp_int*) ( ((wolfssl_word)&nb->R[0] & wc_off_on_addr[nb->y^1]) +
+ ((wolfssl_word)&nb->R[1] & wc_off_on_addr[nb->y]) ),
+ &nb->R[2]);
+ fp_sqr(&nb->R[2], &nb->R[2]);
+ #endif /* WC_NO_CACHE_RESISTANT */
+
+ nb->state = TFM_EXPTMOD_NB_SQR_RED;
+ break;
+
+ case TFM_EXPTMOD_NB_SQR_RED:
+ #ifdef WC_NO_CACHE_RESISTANT
+ fp_montgomery_reduce(&nb->R[nb->y], P, nb->mp);
+ #else
+ fp_montgomery_reduce(&nb->R[2], P, nb->mp);
+ fp_copy(&nb->R[2],
+ (fp_int*) ( ((wolfssl_word)&nb->R[0] & wc_off_on_addr[nb->y^1]) +
+ ((wolfssl_word)&nb->R[1] & wc_off_on_addr[nb->y]) ) );
+ #endif /* WC_NO_CACHE_RESISTANT */
+
+ nb->state = TFM_EXPTMOD_NB_NEXT;
+ break;
+
+ case TFM_EXPTMOD_NB_RED:
+ /* final reduce */
+ fp_montgomery_reduce(&nb->R[0], P, nb->mp);
+ fp_copy(&nb->R[0], Y);
+
+ nb->state = TFM_EXPTMOD_NB_INIT;
+ ret = FP_OKAY;
+ break;
+ } /* switch */
+
+#ifdef WC_RSA_NONBLOCK_TIME
+ /* determine if maximum blocking time has been reached */
+ } while (ret == FP_WOULDBLOCK &&
+ FP_EXPTMOD_NB_CHECKTIME(nb) == TFM_EXPTMOD_NB_CONTINUE);
+#endif
+
+ return ret;
+}
+
+#endif /* WC_RSA_NONBLOCK */
+
+
+/* timing resistant montgomery ladder based exptmod
+ Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
+ Cryptographic Hardware and Embedded Systems, CHES 2002
*/
-static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
+static int _fp_exptmod_ct(fp_int * G, fp_int * X, int digits, fp_int * P,
+ fp_int * Y)
{
+#ifndef WOLFSSL_SMALL_STACK
+#ifdef WC_NO_CACHE_RESISTANT
fp_int R[2];
+#else
+ fp_int R[3]; /* need a temp for cache resistance */
+#endif
+#else
+ fp_int *R;
+#endif
fp_digit buf, mp;
int err, bitcnt, digidx, y;
@@ -980,9 +1584,21 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
return err;
}
- fp_init(&R[0]);
- fp_init(&R[1]);
-
+#ifdef WOLFSSL_SMALL_STACK
+#ifndef WC_NO_CACHE_RESISTANT
+ R = (fp_int*)XMALLOC(sizeof(fp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT);
+#else
+ R = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ if (R == NULL)
+ return FP_MEM;
+#endif
+ fp_init(&R[0]);
+ fp_init(&R[1]);
+#ifndef WC_NO_CACHE_RESISTANT
+ fp_init(&R[2]);
+#endif
+
/* now we need R mod m */
fp_montgomery_calc_normalization (&R[0], P);
@@ -998,11 +1614,11 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
/* for j = t-1 downto 0 do
r_!k = R0*R1; r_k = r_k^2
*/
-
+
/* set initial mode and bit cnt */
bitcnt = 1;
buf = 0;
- digidx = X->used - 1;
+ digidx = digits - 1;
for (;;) {
/* grab next digit as required */
@@ -1021,23 +1637,80 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
buf <<= (fp_digit)1;
/* do ops */
- fp_mul(&R[0], &R[1], &R[y^1]); fp_montgomery_reduce(&R[y^1], P, mp);
- fp_sqr(&R[y], &R[y]); fp_montgomery_reduce(&R[y], P, mp);
+ err = fp_mul(&R[0], &R[1], &R[y^1]);
+ if (err != FP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
+ #endif
+ return err;
+ }
+ err = fp_montgomery_reduce(&R[y^1], P, mp);
+ if (err != FP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
+ #endif
+ return err;
+ }
+
+#ifdef WC_NO_CACHE_RESISTANT
+ err = fp_sqr(&R[y], &R[y]);
+ if (err != FP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
+ #endif
+ return err;
+ }
+ err = fp_montgomery_reduce(&R[y], P, mp);
+ if (err != FP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
+ #endif
+ return err;
+ }
+#else
+ /* instead of using R[y] for sqr, which leaks key bit to cache monitor,
+ * use R[2] as temp, make sure address calc is constant, keep
+ * &R[0] and &R[1] in cache */
+ fp_copy((fp_int*) ( ((wolfssl_word)&R[0] & wc_off_on_addr[y^1]) +
+ ((wolfssl_word)&R[1] & wc_off_on_addr[y]) ),
+ &R[2]);
+ err = fp_sqr(&R[2], &R[2]);
+ if (err != FP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
+ #endif
+ return err;
+ }
+ err = fp_montgomery_reduce(&R[2], P, mp);
+ if (err != FP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
+ #endif
+ return err;
+ }
+ fp_copy(&R[2],
+ (fp_int*) ( ((wolfssl_word)&R[0] & wc_off_on_addr[y^1]) +
+ ((wolfssl_word)&R[1] & wc_off_on_addr[y]) ) );
+#endif /* WC_NO_CACHE_RESISTANT */
}
- fp_montgomery_reduce(&R[0], P, mp);
+ err = fp_montgomery_reduce(&R[0], P, mp);
fp_copy(&R[0], Y);
- return FP_OKAY;
-}
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return err;
+}
-#else
+#endif /* TFM_TIMING_RESISTANT */
-/* y = g**x (mod b)
+/* y = g**x (mod b)
* Some restrictions... x must be positive and < b
*/
-static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
+static int _fp_exptmod_nct(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
{
- fp_int M[64], res;
+ fp_int *res;
+ fp_int *M;
fp_digit buf, mp;
int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
@@ -1053,28 +1726,37 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
winsize = 5;
} else {
winsize = 6;
- }
-
- /* init M array */
- XMEMSET(M, 0, sizeof(M));
+ }
/* now setup montgomery */
if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
return err;
}
+ /* only allocate space for what's needed for window plus res */
+ M = (fp_int*)XMALLOC(sizeof(fp_int)*((1 << winsize) + 1), NULL,
+ DYNAMIC_TYPE_BIGINT);
+ if (M == NULL) {
+ return FP_MEM;
+ }
+ res = &M[1 << winsize];
+
+ /* init M array */
+ for(x = 0; x < (1 << winsize); x++)
+ fp_init(&M[x]);
+
/* setup result */
- fp_init(&res);
+ fp_init(res);
/* create M table
*
* The M table contains powers of the input base, e.g. M[x] = G^x mod P
*
- * The first half of the table is not computed though accept for M[0] and M[1]
+ * The first half of the table is not computed though except for M[0] and M[1]
*/
/* now we need R mod m */
- fp_montgomery_calc_normalization (&res, P);
+ fp_montgomery_calc_normalization (res, P);
/* now set M[1] to G * R mod m */
if (fp_cmp_mag(P, G) != FP_GT) {
@@ -1083,24 +1765,37 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
} else {
fp_copy(G, &M[1]);
}
- fp_mulmod (&M[1], &res, P, &M[1]);
+ fp_mulmod (&M[1], res, P, &M[1]);
- /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
+ /* compute the value at M[1<<(winsize-1)] by
+ * squaring M[1] (winsize-1) times */
fp_copy (&M[1], &M[1 << (winsize - 1)]);
for (x = 0; x < (winsize - 1); x++) {
fp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)]);
- fp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp);
+ err = fp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp);
+ if (err != FP_OKAY) {
+ XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+ return err;
+ }
}
/* create upper table */
for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
- fp_mul(&M[x - 1], &M[1], &M[x]);
- fp_montgomery_reduce(&M[x], P, mp);
+ err = fp_mul(&M[x - 1], &M[1], &M[x]);
+ if (err != FP_OKAY) {
+ XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+ return err;
+ }
+ err = fp_montgomery_reduce(&M[x], P, mp);
+ if (err != FP_OKAY) {
+ XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+ return err;
+ }
}
/* set initial mode and bit cnt */
mode = 0;
- bitcnt = 1;
+ bitcnt = (x % DIGIT_BIT) + 1;
buf = 0;
digidx = X->used - 1;
bitcpy = 0;
@@ -1133,8 +1828,16 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
/* if the bit is zero and mode == 1 then we square */
if (mode == 1 && y == 0) {
- fp_sqr(&res, &res);
- fp_montgomery_reduce(&res, P, mp);
+ err = fp_sqr(res, res);
+ if (err != FP_OKAY) {
+ XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+ return err;
+ }
+ fp_montgomery_reduce(res, P, mp);
+ if (err != FP_OKAY) {
+ XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+ return err;
+ }
continue;
}
@@ -1146,13 +1849,29 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
/* ok window is filled so square as required and multiply */
/* square first */
for (x = 0; x < winsize; x++) {
- fp_sqr(&res, &res);
- fp_montgomery_reduce(&res, P, mp);
+ err = fp_sqr(res, res);
+ if (err != FP_OKAY) {
+ XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+ return err;
+ }
+ err = fp_montgomery_reduce(res, P, mp);
+ if (err != FP_OKAY) {
+ XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+ return err;
+ }
}
/* then multiply */
- fp_mul(&res, &M[bitbuf], &res);
- fp_montgomery_reduce(&res, P, mp);
+ err = fp_mul(res, &M[bitbuf], res);
+ if (err != FP_OKAY) {
+ XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+ return err;
+ }
+ err = fp_montgomery_reduce(res, P, mp);
+ if (err != FP_OKAY) {
+ XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+ return err;
+ }
/* empty window and reset */
bitcpy = 0;
@@ -1165,15 +1884,31 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
if (mode == 2 && bitcpy > 0) {
/* square then multiply if the bit is set */
for (x = 0; x < bitcpy; x++) {
- fp_sqr(&res, &res);
- fp_montgomery_reduce(&res, P, mp);
+ err = fp_sqr(res, res);
+ if (err != FP_OKAY) {
+ XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+ return err;
+ }
+ err = fp_montgomery_reduce(res, P, mp);
+ if (err != FP_OKAY) {
+ XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+ return err;
+ }
/* get next bit of the window */
bitbuf <<= 1;
if ((bitbuf & (1 << winsize)) != 0) {
/* then multiply */
- fp_mul(&res, &M[1], &res);
- fp_montgomery_reduce(&res, P, mp);
+ err = fp_mul(res, &M[1], res);
+ if (err != FP_OKAY) {
+ XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+ return err;
+ }
+ err = fp_montgomery_reduce(res, P, mp);
+ if (err != FP_OKAY) {
+ XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+ return err;
+ }
}
}
}
@@ -1184,45 +1919,553 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
* to reduce one more time to cancel out the factor
* of R.
*/
- fp_montgomery_reduce(&res, P, mp);
+ err = fp_montgomery_reduce(res, P, mp);
/* swap res with Y */
- fp_copy (&res, Y);
- return FP_OKAY;
+ fp_copy (res, Y);
+
+ XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
+ return err;
}
+
+#ifdef TFM_TIMING_RESISTANT
+#if DIGIT_BIT <= 16
+ #define WINSIZE 2
+#elif DIGIT_BIT <= 32
+ #define WINSIZE 3
+#elif DIGIT_BIT <= 64
+ #define WINSIZE 4
+#elif DIGIT_BIT <= 128
+ #define WINSIZE 5
+#endif
+
+/* y = 2**x (mod b)
+ * Some restrictions... x must be positive and < b
+ */
+static int _fp_exptmod_base_2(fp_int * X, int digits, fp_int * P,
+ fp_int * Y)
+{
+ fp_digit buf, mp;
+ int err, bitbuf, bitcpy, bitcnt, digidx, x, y;
+#ifdef WOLFSSL_SMALL_STACK
+ fp_int *res;
+ fp_int *tmp;
+#else
+ fp_int res[1];
+ fp_int tmp[1];
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ res = (fp_int*)XMALLOC(2*sizeof(fp_int), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (res == NULL) {
+ return FP_MEM;
+ }
+ tmp = &res[1];
#endif
+ /* now setup montgomery */
+ if ((err = fp_montgomery_setup(P, &mp)) != FP_OKAY) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return err;
+ }
+
+ /* setup result */
+ fp_init(res);
+ fp_init(tmp);
+
+ fp_mul_2d(P, 1 << WINSIZE, tmp);
+
+ /* now we need R mod m */
+ fp_montgomery_calc_normalization(res, P);
+
+ /* Get the top bits left over after taking WINSIZE bits starting at the
+ * least-significant.
+ */
+ digidx = digits - 1;
+ bitcpy = (digits * DIGIT_BIT) % WINSIZE;
+ if (bitcpy > 0) {
+ bitcnt = (int)DIGIT_BIT - bitcpy;
+ buf = X->dp[digidx--];
+ bitbuf = (int)(buf >> bitcnt);
+ /* Multiply montgomery representation of 1 by 2 ^ top */
+ fp_mul_2d(res, bitbuf, res);
+ fp_add(res, tmp, res);
+ err = fp_mod(res, P, res);
+ if (err != FP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return err;
+ }
+ /* Move out bits used */
+ buf <<= bitcpy;
+ bitcnt++;
+ }
+ else {
+ bitcnt = 1;
+ buf = 0;
+ }
+
+ /* empty window and reset */
+ bitbuf = 0;
+ bitcpy = 0;
+
+ for (;;) {
+ /* grab next digit as required */
+ if (--bitcnt == 0) {
+ /* if digidx == -1 we are out of digits so break */
+ if (digidx == -1) {
+ break;
+ }
+ /* read next digit and reset bitcnt */
+ buf = X->dp[digidx--];
+ bitcnt = (int)DIGIT_BIT;
+ }
+
+ /* grab the next msb from the exponent */
+ y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
+ buf <<= (fp_digit)1;
+ /* add bit to the window */
+ bitbuf |= (y << (WINSIZE - ++bitcpy));
+
+ if (bitcpy == WINSIZE) {
+ /* ok window is filled so square as required and multiply */
+ /* square first */
+ for (x = 0; x < WINSIZE; x++) {
+ err = fp_sqr(res, res);
+ if (err != FP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return err;
+ }
+ err = fp_montgomery_reduce(res, P, mp);
+ if (err != FP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return err;
+ }
+ }
+
+ /* then multiply by 2^bitbuf */
+ fp_mul_2d(res, bitbuf, res);
+ /* Add in value to make mod operation take same time */
+ fp_add(res, tmp, res);
+ err = fp_mod(res, P, res);
+ if (err != FP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return err;
+ }
+
+ /* empty window and reset */
+ bitcpy = 0;
+ bitbuf = 0;
+ }
+ }
+
+ /* fixup result if Montgomery reduction is used
+ * recall that any value in a Montgomery system is
+ * actually multiplied by R mod n. So we have
+ * to reduce one more time to cancel out the factor
+ * of R.
+ */
+ err = fp_montgomery_reduce(res, P, mp);
+
+ /* swap res with Y */
+ fp_copy(res, Y);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return err;
+}
+
+#undef WINSIZE
+#else
+#if DIGIT_BIT < 16
+ #define WINSIZE 3
+#elif DIGIT_BIT < 32
+ #define WINSIZE 4
+#elif DIGIT_BIT < 64
+ #define WINSIZE 5
+#elif DIGIT_BIT < 128
+ #define WINSIZE 6
+#elif DIGIT_BIT == 128
+ #define WINSIZE 7
+#endif
+
+/* y = 2**x (mod b)
+ * Some restrictions... x must be positive and < b
+ */
+static int _fp_exptmod_base_2(fp_int * X, int digits, fp_int * P,
+ fp_int * Y)
+{
+ fp_digit buf, mp;
+ int err, bitbuf, bitcpy, bitcnt, digidx, x, y;
+#ifdef WOLFSSL_SMALL_STACK
+ fp_int *res;
+#else
+ fp_int res[1];
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ res = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (res == NULL) {
+ return FP_MEM;
+ }
+#endif
+
+ /* now setup montgomery */
+ if ((err = fp_montgomery_setup(P, &mp)) != FP_OKAY) {
+ return err;
+ }
+
+ /* setup result */
+ fp_init(res);
+
+ /* now we need R mod m */
+ fp_montgomery_calc_normalization(res, P);
+
+ /* Get the top bits left over after taking WINSIZE bits starting at the
+ * least-significant.
+ */
+ digidx = digits - 1;
+ bitcpy = (digits * DIGIT_BIT) % WINSIZE;
+ if (bitcpy > 0) {
+ bitcnt = (int)DIGIT_BIT - bitcpy;
+ buf = X->dp[digidx--];
+ bitbuf = (int)(buf >> bitcnt);
+ /* Multiply montgomery representation of 1 by 2 ^ top */
+ fp_mul_2d(res, bitbuf, res);
+ err = fp_mod(res, P, res);
+ if (err != FP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return err;
+ }
+ /* Move out bits used */
+ buf <<= bitcpy;
+ bitcnt++;
+ }
+ else {
+ bitcnt = 1;
+ buf = 0;
+ }
+
+ /* empty window and reset */
+ bitbuf = 0;
+ bitcpy = 0;
+
+ for (;;) {
+ /* grab next digit as required */
+ if (--bitcnt == 0) {
+ /* if digidx == -1 we are out of digits so break */
+ if (digidx == -1) {
+ break;
+ }
+ /* read next digit and reset bitcnt */
+ buf = X->dp[digidx--];
+ bitcnt = (int)DIGIT_BIT;
+ }
+
+ /* grab the next msb from the exponent */
+ y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
+ buf <<= (fp_digit)1;
+ /* add bit to the window */
+ bitbuf |= (y << (WINSIZE - ++bitcpy));
+
+ if (bitcpy == WINSIZE) {
+ /* ok window is filled so square as required and multiply */
+ /* square first */
+ for (x = 0; x < WINSIZE; x++) {
+ err = fp_sqr(res, res);
+ if (err != FP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return err;
+ }
+ err = fp_montgomery_reduce(res, P, mp);
+ if (err != FP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return err;
+ }
+ }
+
+ /* then multiply by 2^bitbuf */
+ fp_mul_2d(res, bitbuf, res);
+ err = fp_mod(res, P, res);
+ if (err != FP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return err;
+ }
+
+ /* empty window and reset */
+ bitcpy = 0;
+ bitbuf = 0;
+ }
+ }
+
+ /* fixup result if Montgomery reduction is used
+ * recall that any value in a Montgomery system is
+ * actually multiplied by R mod n. So we have
+ * to reduce one more time to cancel out the factor
+ * of R.
+ */
+ err = fp_montgomery_reduce(res, P, mp);
+
+ /* swap res with Y */
+ fp_copy(res, Y);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return err;
+}
+
+#undef WINSIZE
+#endif
+
+
int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
{
+
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
+ int x = fp_count_bits (X);
+#endif
+
+ /* handle modulus of zero and prevent overflows */
+ if (fp_iszero(P) || (P->used > (FP_SIZE/2))) {
+ return FP_VAL;
+ }
+ if (fp_isone(P)) {
+ fp_set(Y, 0);
+ return FP_OKAY;
+ }
+ if (fp_iszero(X)) {
+ fp_set(Y, 1);
+ return FP_OKAY;
+ }
+ if (fp_iszero(G)) {
+ fp_set(Y, 0);
+ return FP_OKAY;
+ }
+
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
+ if(x > EPS_RSA_EXPT_XBTIS) {
+ return esp_mp_exptmod(G, X, x, P, Y);
+ }
+#endif
+
+ if (X->sign == FP_NEG) {
+#ifndef POSITIVE_EXP_ONLY /* reduce stack if assume no negatives */
+ int err;
+ #ifndef WOLFSSL_SMALL_STACK
+ fp_int tmp[2];
+ #else
+ fp_int *tmp;
+ #endif
+
+ #ifdef WOLFSSL_SMALL_STACK
+ tmp = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT);
+ if (tmp == NULL)
+ return FP_MEM;
+ #endif
+
+ /* yes, copy G and invmod it */
+ fp_init_copy(&tmp[0], G);
+ fp_init_copy(&tmp[1], P);
+ tmp[1].sign = FP_ZPOS;
+ err = fp_invmod(&tmp[0], &tmp[1], &tmp[0]);
+ if (err == FP_OKAY) {
+ fp_copy(X, &tmp[1]);
+ tmp[1].sign = FP_ZPOS;
+#ifdef TFM_TIMING_RESISTANT
+ err = _fp_exptmod_ct(&tmp[0], &tmp[1], tmp[1].used, P, Y);
+#else
+ err = _fp_exptmod_nct(&tmp[0], &tmp[1], P, Y);
+#endif
+ if (P->sign == FP_NEG) {
+ fp_add(Y, P, Y);
+ }
+ }
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return err;
+#else
+ return FP_VAL;
+#endif
+ }
+ else if (G->used == 1 && G->dp[0] == 2) {
+ return _fp_exptmod_base_2(X, X->used, P, Y);
+ }
+ else {
+ /* Positive exponent so just exptmod */
+#ifdef TFM_TIMING_RESISTANT
+ return _fp_exptmod_ct(G, X, X->used, P, Y);
+#else
+ return _fp_exptmod_nct(G, X, P, Y);
+#endif
+ }
+}
+
+int fp_exptmod_ex(fp_int * G, fp_int * X, int digits, fp_int * P, fp_int * Y)
+{
+
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
+ int x = fp_count_bits (X);
+#endif
+
+ if (fp_iszero(G)) {
+ fp_set(G, 0);
+ return FP_OKAY;
+ }
+
/* prevent overflows */
if (P->used > (FP_SIZE/2)) {
return FP_VAL;
}
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
+ if(x > EPS_RSA_EXPT_XBTIS) {
+ return esp_mp_exptmod(G, X, x, P, Y);
+ }
+#endif
+
if (X->sign == FP_NEG) {
#ifndef POSITIVE_EXP_ONLY /* reduce stack if assume no negatives */
int err;
- fp_int tmp;
+ #ifndef WOLFSSL_SMALL_STACK
+ fp_int tmp[2];
+ #else
+ fp_int *tmp;
+ #endif
+
+ #ifdef WOLFSSL_SMALL_STACK
+ tmp = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (tmp == NULL)
+ return FP_MEM;
+ #endif
/* yes, copy G and invmod it */
- fp_copy(G, &tmp);
- if ((err = fp_invmod(&tmp, P, &tmp)) != FP_OKAY) {
- return err;
+ fp_init_copy(&tmp[0], G);
+ fp_init_copy(&tmp[1], P);
+ tmp[1].sign = FP_ZPOS;
+ err = fp_invmod(&tmp[0], &tmp[1], &tmp[0]);
+ if (err == FP_OKAY) {
+ X->sign = FP_ZPOS;
+#ifdef TFM_TIMING_RESISTANT
+ err = _fp_exptmod_ct(&tmp[0], X, digits, P, Y);
+#else
+ err = _fp_exptmod_nct(&tmp[0], X, P, Y);
+ (void)digits;
+#endif
+ if (X != Y) {
+ X->sign = FP_NEG;
+ }
+ if (P->sign == FP_NEG) {
+ fp_add(Y, P, Y);
+ }
}
- X->sign = FP_ZPOS;
- err = _fp_exptmod(&tmp, X, P, Y);
- if (X != Y) {
- X->sign = FP_NEG;
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
+ #endif
+ return err;
+#else
+ return FP_VAL;
+#endif
+ }
+ else {
+ /* Positive exponent so just exptmod */
+#ifdef TFM_TIMING_RESISTANT
+ return _fp_exptmod_ct(G, X, digits, P, Y);
+#else
+ return _fp_exptmod_nct(G, X, P, Y);
+#endif
+ }
+}
+
+int fp_exptmod_nct(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
+{
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
+ int x = fp_count_bits (X);
+#endif
+
+ if (fp_iszero(G)) {
+ fp_set(G, 0);
+ return FP_OKAY;
+ }
+
+ /* prevent overflows */
+ if (P->used > (FP_SIZE/2)) {
+ return FP_VAL;
+ }
+
+#if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
+ if(x > EPS_RSA_EXPT_XBTIS) {
+ return esp_mp_exptmod(G, X, x, P, Y);
+ }
+#endif
+
+ if (X->sign == FP_NEG) {
+#ifndef POSITIVE_EXP_ONLY /* reduce stack if assume no negatives */
+ int err;
+ #ifndef WOLFSSL_SMALL_STACK
+ fp_int tmp[2];
+ #else
+ fp_int *tmp;
+ #endif
+
+ #ifdef WOLFSSL_SMALL_STACK
+ tmp = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (tmp == NULL)
+ return FP_MEM;
+ #endif
+
+ /* yes, copy G and invmod it */
+ fp_init_copy(&tmp[0], G);
+ fp_init_copy(&tmp[1], P);
+ tmp[1].sign = FP_ZPOS;
+ err = fp_invmod(&tmp[0], &tmp[1], &tmp[0]);
+ if (err == FP_OKAY) {
+ X->sign = FP_ZPOS;
+ err = _fp_exptmod_nct(&tmp[0], X, P, Y);
+ if (X != Y) {
+ X->sign = FP_NEG;
+ }
+ if (P->sign == FP_NEG) {
+ fp_add(Y, P, Y);
+ }
}
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
+ #endif
return err;
#else
return FP_VAL;
-#endif
+#endif
}
else {
/* Positive exponent so just exptmod */
- return _fp_exptmod(G, X, P, Y);
+ return _fp_exptmod_nct(G, X, P, Y);
}
}
@@ -1234,13 +2477,13 @@ void fp_2expt(fp_int *a, int b)
/* zero a as per default */
fp_zero (a);
- if (b < 0) {
+ if (b < 0) {
return;
}
z = b / DIGIT_BIT;
if (z >= FP_SIZE) {
- return;
+ return;
}
/* set the used count of where the bit will go */
@@ -1251,118 +2494,141 @@ void fp_2expt(fp_int *a, int b)
}
/* b = a*a */
-void fp_sqr(fp_int *A, fp_int *B)
+int fp_sqr(fp_int *A, fp_int *B)
{
- int y = A->used;
+ int err;
+ int y, oldused;
+
+ oldused = B->used;
+ y = A->used;
/* call generic if we're out of range */
if (y + y > FP_SIZE) {
- fp_sqr_comba(A, B);
- return ;
+ err = fp_sqr_comba(A, B);
+ goto clean;
}
-#if defined(TFM_SQR3)
+#if defined(TFM_SQR3) && FP_SIZE >= 6
if (y <= 3) {
- fp_sqr_comba3(A,B);
- return;
+ err = fp_sqr_comba3(A,B);
+ goto clean;
}
#endif
-#if defined(TFM_SQR4)
+#if defined(TFM_SQR4) && FP_SIZE >= 8
if (y == 4) {
- fp_sqr_comba4(A,B);
- return;
+ err = fp_sqr_comba4(A,B);
+ goto clean;
}
#endif
-#if defined(TFM_SQR6)
+#if defined(TFM_SQR6) && FP_SIZE >= 12
if (y <= 6) {
- fp_sqr_comba6(A,B);
- return;
+ err = fp_sqr_comba6(A,B);
+ goto clean;
}
#endif
-#if defined(TFM_SQR7)
+#if defined(TFM_SQR7) && FP_SIZE >= 14
if (y == 7) {
- fp_sqr_comba7(A,B);
- return;
+ err = fp_sqr_comba7(A,B);
+ goto clean;
}
#endif
-#if defined(TFM_SQR8)
+#if defined(TFM_SQR8) && FP_SIZE >= 16
if (y == 8) {
- fp_sqr_comba8(A,B);
- return;
+ err = fp_sqr_comba8(A,B);
+ goto clean;
}
#endif
-#if defined(TFM_SQR9)
+#if defined(TFM_SQR9) && FP_SIZE >= 18
if (y == 9) {
- fp_sqr_comba9(A,B);
- return;
+ err = fp_sqr_comba9(A,B);
+ goto clean;
}
#endif
-#if defined(TFM_SQR12)
+#if defined(TFM_SQR12) && FP_SIZE >= 24
if (y <= 12) {
- fp_sqr_comba12(A,B);
- return;
+ err = fp_sqr_comba12(A,B);
+ goto clean;
}
#endif
-#if defined(TFM_SQR17)
+#if defined(TFM_SQR17) && FP_SIZE >= 34
if (y <= 17) {
- fp_sqr_comba17(A,B);
- return;
+ err = fp_sqr_comba17(A,B);
+ goto clean;
}
#endif
#if defined(TFM_SMALL_SET)
if (y <= 16) {
- fp_sqr_comba_small(A,B);
- return;
+ err = fp_sqr_comba_small(A,B);
+ goto clean;
}
#endif
-#if defined(TFM_SQR20)
+#if defined(TFM_SQR20) && FP_SIZE >= 40
if (y <= 20) {
- fp_sqr_comba20(A,B);
- return;
+ err = fp_sqr_comba20(A,B);
+ goto clean;
}
#endif
-#if defined(TFM_SQR24)
+#if defined(TFM_SQR24) && FP_SIZE >= 48
if (y <= 24) {
- fp_sqr_comba24(A,B);
- return;
+ err = fp_sqr_comba24(A,B);
+ goto clean;
}
#endif
-#if defined(TFM_SQR28)
+#if defined(TFM_SQR28) && FP_SIZE >= 56
if (y <= 28) {
- fp_sqr_comba28(A,B);
- return;
+ err = fp_sqr_comba28(A,B);
+ goto clean;
}
#endif
-#if defined(TFM_SQR32)
+#if defined(TFM_SQR32) && FP_SIZE >= 64
if (y <= 32) {
- fp_sqr_comba32(A,B);
- return;
+ err = fp_sqr_comba32(A,B);
+ goto clean;
}
#endif
-#if defined(TFM_SQR48)
+#if defined(TFM_SQR48) && FP_SIZE >= 96
if (y <= 48) {
- fp_sqr_comba48(A,B);
- return;
+ err = fp_sqr_comba48(A,B);
+ goto clean;
}
#endif
-#if defined(TFM_SQR64)
+#if defined(TFM_SQR64) && FP_SIZE >= 128
if (y <= 64) {
- fp_sqr_comba64(A,B);
- return;
+ err = fp_sqr_comba64(A,B);
+ goto clean;
}
#endif
- fp_sqr_comba(A, B);
+ err = fp_sqr_comba(A, B);
+
+clean:
+ /* zero any excess digits on the destination that we didn't write to */
+ for (y = B->used; y >= 0 && y < oldused; y++) {
+ B->dp[y] = 0;
+ }
+
+ return err;
}
/* generic comba squarer */
-void fp_sqr_comba(fp_int *A, fp_int *B)
+int fp_sqr_comba(fp_int *A, fp_int *B)
{
int pa, ix, iz;
fp_digit c0, c1, c2;
- fp_int tmp, *dst;
#ifdef TFM_ISO
fp_word tt;
-#endif
+#endif
+ fp_int *dst;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int tmp[1];
+#else
+ fp_int *tmp;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+ if (tmp == NULL)
+ return FP_MEM;
+#endif
/* get size of output and trim */
pa = A->used + A->used;
@@ -1375,14 +2641,14 @@ void fp_sqr_comba(fp_int *A, fp_int *B)
COMBA_CLEAR;
if (A == B) {
- fp_init(&tmp);
- dst = &tmp;
+ fp_init(tmp);
+ dst = tmp;
} else {
fp_zero(B);
dst = B;
}
- for (ix = 0; ix < pa; ix++) {
+ for (ix = 0; ix < pa; ix++) {
int tx, ty, iy;
fp_digit *tmpy, *tmpx;
@@ -1394,14 +2660,14 @@ void fp_sqr_comba(fp_int *A, fp_int *B)
tmpx = A->dp + tx;
tmpy = A->dp + ty;
- /* this is the number of times the loop will iterrate,
+ /* this is the number of times the loop will iterate,
while (tx++ < a->used && ty-- >= 0) { ... }
*/
iy = MIN(A->used-tx, ty+1);
- /* now for squaring tx can never equal ty
- * we halve the distance since they approach
- * at a rate of 2x and we have to round because
+ /* now for squaring tx can never equal ty
+ * we halve the distance since they approach
+ * at a rate of 2x and we have to round because
* odd cases need to be executed
*/
iy = MIN(iy, (ty-tx+1)>>1);
@@ -1432,6 +2698,17 @@ void fp_sqr_comba(fp_int *A, fp_int *B)
if (dst != B) {
fp_copy(dst, B);
}
+
+ /* Variables used but not seen by cppcheck. */
+ (void)c0; (void)c1; (void)c2;
+#ifdef TFM_ISO
+ (void)tt;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return FP_OKAY;
}
int fp_cmp(fp_int *a, fp_int *b)
@@ -1454,6 +2731,10 @@ int fp_cmp(fp_int *a, fp_int *b)
/* compare against a single digit */
int fp_cmp_d(fp_int *a, fp_digit b)
{
+ /* special case for zero*/
+ if (a->used == 0 && b == 0)
+ return FP_EQ;
+
/* compare based on sign */
if ((b && a->used == 0) || a->sign == FP_NEG) {
return FP_LT;
@@ -1495,7 +2776,7 @@ int fp_cmp_mag(fp_int *a, fp_int *b)
return FP_EQ;
}
-/* setups the montgomery reduction */
+/* sets up the montgomery reduction */
int fp_montgomery_setup(fp_int *a, fp_digit *rho)
{
fp_digit x, b;
@@ -1562,39 +2843,46 @@ void fp_montgomery_calc_normalization(fp_int *a, fp_int *b)
#endif
#ifdef HAVE_INTEL_MULX
-static inline void innermul8_mulx(fp_digit *c_mulx, fp_digit *cy_mulx, fp_digit *tmpm, fp_digit mu)
+static WC_INLINE void innermul8_mulx(fp_digit *c_mulx, fp_digit *cy_mulx, fp_digit *tmpm, fp_digit mu)
{
- fp_digit _c0, _c1, _c2, _c3, _c4, _c5, _c6, _c7, cy ;
-
- cy = *cy_mulx ;
- _c0=c_mulx[0]; _c1=c_mulx[1]; _c2=c_mulx[2]; _c3=c_mulx[3]; _c4=c_mulx[4]; _c5=c_mulx[5]; _c6=c_mulx[6]; _c7=c_mulx[7];
+ fp_digit cy = *cy_mulx ;
INNERMUL8_MULX ;
- c_mulx[0]=_c0; c_mulx[1]=_c1; c_mulx[2]=_c2; c_mulx[3]=_c3; c_mulx[4]=_c4; c_mulx[5]=_c5; c_mulx[6]=_c6; c_mulx[7]=_c7;
*cy_mulx = cy ;
}
/* computes x/R == x (mod N) via Montgomery Reduction */
-static void fp_montgomery_reduce_mulx(fp_int *a, fp_int *m, fp_digit mp)
+static int fp_montgomery_reduce_mulx(fp_int *a, fp_int *m, fp_digit mp)
{
- fp_digit c[FP_SIZE], *_c, *tmpm, mu = 0;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit c[FP_SIZE+1];
+#else
+ fp_digit *c;
+#endif
+ fp_digit *_c, *tmpm, mu = 0;
int oldused, x, y, pa;
/* bail if too large */
if (m->used > (FP_SIZE/2)) {
(void)mu; /* shut up compiler */
- return;
+ return FP_OKAY;
}
#ifdef TFM_SMALL_MONT_SET
if (m->used <= 16) {
- fp_montgomery_reduce_small(a, m, mp);
- return;
+ return fp_montgomery_reduce_small(a, m, mp);
}
#endif
+#ifdef WOLFSSL_SMALL_STACK
+ /* only allocate space for what's needed for window plus res */
+ c = (fp_digit*)XMALLOC(sizeof(fp_digit)*(FP_SIZE + 1), NULL, DYNAMIC_TYPE_BIGINT);
+ if (c == NULL) {
+ return FP_MEM;
+ }
+#endif
/* now zero the buff */
- XMEMSET(c, 0, sizeof c);
+ XMEMSET(c, 0, sizeof(fp_digit)*(FP_SIZE + 1));
pa = m->used;
/* copy the input */
@@ -1625,7 +2913,7 @@ static void fp_montgomery_reduce_mulx(fp_int *a, fp_int *m, fp_digit mp)
PROPCARRY;
++_c;
}
- }
+ }
/* now copy out */
_c = c + pa;
@@ -1634,7 +2922,8 @@ static void fp_montgomery_reduce_mulx(fp_int *a, fp_int *m, fp_digit mp)
*tmpm++ = *_c++;
}
- for (; x < oldused; x++) {
+ /* zero any excess digits on the destination that we didn't write to */
+ for (; x < oldused; x++) {
*tmpm++ = 0;
}
@@ -1642,38 +2931,55 @@ static void fp_montgomery_reduce_mulx(fp_int *a, fp_int *m, fp_digit mp)
a->used = pa+1;
fp_clamp(a);
-
+
/* if A >= m then A = A - m */
if (fp_cmp_mag (a, m) != FP_LT) {
s_fp_sub (a, m, a);
}
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(c, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return FP_OKAY;
}
#endif
/* computes x/R == x (mod N) via Montgomery Reduction */
-void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
+int fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
{
- fp_digit c[FP_SIZE], *_c, *tmpm, mu = 0;
- int oldused, x, y, pa;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_digit c[FP_SIZE+1];
+#else
+ fp_digit *c;
+#endif
+ fp_digit *_c, *tmpm, mu = 0;
+ int oldused, x, y, pa, err = 0;
- IF_HAVE_INTEL_MULX(fp_montgomery_reduce_mulx(a, m, mp), return) ;
+ IF_HAVE_INTEL_MULX(err = fp_montgomery_reduce_mulx(a, m, mp), return err) ;
+ (void)err;
/* bail if too large */
if (m->used > (FP_SIZE/2)) {
(void)mu; /* shut up compiler */
- return;
+ return FP_OKAY;
}
#ifdef TFM_SMALL_MONT_SET
if (m->used <= 16) {
- fp_montgomery_reduce_small(a, m, mp);
- return;
+ return fp_montgomery_reduce_small(a, m, mp);
}
#endif
+#ifdef WOLFSSL_SMALL_STACK
+ /* only allocate space for what's needed for window plus res */
+ c = (fp_digit*)XMALLOC(sizeof(fp_digit)*(FP_SIZE + 1), NULL, DYNAMIC_TYPE_BIGINT);
+ if (c == NULL) {
+ return FP_MEM;
+ }
+#endif
/* now zero the buff */
- XMEMSET(c, 0, sizeof c);
+ XMEMSET(c, 0, sizeof(fp_digit)*(FP_SIZE + 1));
pa = m->used;
/* copy the input */
@@ -1690,13 +2996,13 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
_c = c + x;
tmpm = m->dp;
y = 0;
- #if (defined(TFM_SSE2) || defined(TFM_X86_64))
+#if defined(INNERMUL8)
for (; y < (pa & ~7); y += 8) {
INNERMUL8 ;
_c += 8;
tmpm += 8;
}
- #endif
+#endif
for (; y < pa; y++) {
INNERMUL;
++_c;
@@ -1706,7 +3012,7 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
PROPCARRY;
++_c;
}
- }
+ }
/* now copy out */
_c = c + pa;
@@ -1715,7 +3021,8 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
*tmpm++ = *_c++;
}
- for (; x < oldused; x++) {
+ /* zero any excess digits on the destination that we didn't write to */
+ for (; x < oldused; x++) {
*tmpm++ = 0;
}
@@ -1723,21 +3030,40 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
a->used = pa+1;
fp_clamp(a);
-
+
/* if A >= m then A = A - m */
if (fp_cmp_mag (a, m) != FP_LT) {
s_fp_sub (a, m, a);
}
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(c, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return FP_OKAY;
}
-void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c)
+void fp_read_unsigned_bin(fp_int *a, const unsigned char *b, int c)
{
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
+ const word32 maxC = (a->size * sizeof(fp_digit));
+#else
+ const word32 maxC = (FP_SIZE * sizeof(fp_digit));
+#endif
+
/* zero the int */
fp_zero (a);
+ /* if input b excess max, then truncate */
+ if (c > 0 && (word32)c > maxC) {
+ int excess = (c - maxC);
+ c -= excess;
+ b += excess;
+ }
+
/* If we know the endianness of this architecture, and we're using
32-bit fp_digits, we can optimize this */
-#if (defined(LITTLE_ENDIAN_ORDER) || defined(BIG_ENDIAN_ORDER)) && defined(FP_32BIT)
+#if (defined(LITTLE_ENDIAN_ORDER) || defined(BIG_ENDIAN_ORDER)) && \
+ defined(FP_32BIT)
/* But not for both simultaneously */
#if defined(LITTLE_ENDIAN_ORDER) && defined(BIG_ENDIAN_ORDER)
#error Both LITTLE_ENDIAN_ORDER and BIG_ENDIAN_ORDER defined.
@@ -1745,11 +3071,6 @@ void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c)
{
unsigned char *pd = (unsigned char *)a->dp;
- if ((unsigned)c > (FP_SIZE * sizeof(fp_digit))) {
- int excess = c - (FP_SIZE * sizeof(fp_digit));
- c -= excess;
- b += excess;
- }
a->used = (c + sizeof(fp_digit) - 1)/sizeof(fp_digit);
/* read the bytes in */
#ifdef BIG_ENDIAN_ORDER
@@ -1757,12 +3078,12 @@ void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c)
/* Use Duff's device to unroll the loop. */
int idx = (c - 1) & ~3;
switch (c % 4) {
- case 0: do { pd[idx+0] = *b++;
- case 3: pd[idx+1] = *b++;
- case 2: pd[idx+2] = *b++;
- case 1: pd[idx+3] = *b++;
+ case 0: do { pd[idx+0] = *b++; // fallthrough
+ case 3: pd[idx+1] = *b++; // fallthrough
+ case 2: pd[idx+2] = *b++; // fallthrough
+ case 1: pd[idx+3] = *b++; // fallthrough
idx -= 4;
- } while ((c -= 4) > 0);
+ } while ((c -= 4) > 0);
}
}
#else
@@ -1776,25 +3097,108 @@ void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c)
for (; c > 0; c--) {
fp_mul_2d (a, 8, a);
a->dp[0] |= *b++;
- a->used += 1;
+
+ if (a->used == 0) {
+ a->used = 1;
+ }
}
#endif
fp_clamp (a);
}
-void fp_to_unsigned_bin(fp_int *a, unsigned char *b)
+int fp_to_unsigned_bin_at_pos(int x, fp_int *t, unsigned char *b)
+{
+#if DIGIT_BIT == 64 || DIGIT_BIT == 32
+ int i, j;
+ fp_digit n;
+
+ for (j=0,i=0; i<t->used-1; ) {
+ b[x++] = (unsigned char)(t->dp[i] >> j);
+ j += 8;
+ i += j == DIGIT_BIT;
+ j &= DIGIT_BIT - 1;
+ }
+ n = t->dp[i];
+ while (n != 0) {
+ b[x++] = (unsigned char)n;
+ n >>= 8;
+ }
+ return x;
+#else
+ while (fp_iszero (t) == FP_NO) {
+ b[x++] = (unsigned char) (t->dp[0] & 255);
+ fp_div_2d (t, 8, t, NULL);
+ }
+ return x;
+#endif
+}
+
+int fp_to_unsigned_bin(fp_int *a, unsigned char *b)
+{
+ int x;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int t[1];
+#else
+ fp_int *t;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+ if (t == NULL)
+ return FP_MEM;
+#endif
+
+ fp_init_copy(t, a);
+
+ x = fp_to_unsigned_bin_at_pos(0, t, b);
+ fp_reverse (b, x);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return FP_OKAY;
+}
+
+int fp_to_unsigned_bin_len(fp_int *a, unsigned char *b, int c)
{
+#if DIGIT_BIT == 64 || DIGIT_BIT == 32
+ int i, j, x;
+
+ for (x=c-1,j=0,i=0; x >= 0; x--) {
+ b[x] = (unsigned char)(a->dp[i] >> j);
+ j += 8;
+ i += j == DIGIT_BIT;
+ j &= DIGIT_BIT - 1;
+ }
+
+ return FP_OKAY;
+#else
int x;
- fp_int t;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int t[1];
+#else
+ fp_int *t;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+ if (t == NULL)
+ return FP_MEM;
+#endif
- fp_init_copy(&t, a);
+ fp_init_copy(t, a);
- x = 0;
- while (fp_iszero (&t) == FP_NO) {
- b[x++] = (unsigned char) (t.dp[0] & 255);
- fp_div_2d (&t, 8, &t, NULL);
+ for (x = 0; x < c; x++) {
+ b[x] = (unsigned char) (t->dp[0] & 255);
+ fp_div_2d (t, 8, t, NULL);
}
fp_reverse (b, x);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return FP_OKAY;
+#endif
}
int fp_unsigned_bin_size(fp_int *a)
@@ -1810,6 +3214,77 @@ void fp_set(fp_int *a, fp_digit b)
a->used = a->dp[0] ? 1 : 0;
}
+
+#ifndef MP_SET_CHUNK_BITS
+ #define MP_SET_CHUNK_BITS 4
+#endif
+void fp_set_int(fp_int *a, unsigned long b)
+{
+ int x;
+
+ /* use direct fp_set if b is less than fp_digit max */
+ if (b < FP_DIGIT_MAX) {
+ fp_set (a, (fp_digit)b);
+ return;
+ }
+
+ fp_zero (a);
+
+ /* set chunk bits at a time */
+ for (x = 0; x < (int)(sizeof(b) * 8) / MP_SET_CHUNK_BITS; x++) {
+ fp_mul_2d (a, MP_SET_CHUNK_BITS, a);
+
+ /* OR in the top bits of the source */
+ a->dp[0] |= (b >> ((sizeof(b) * 8) - MP_SET_CHUNK_BITS)) &
+ ((1 << MP_SET_CHUNK_BITS) - 1);
+
+ /* shift the source up to the next chunk bits */
+ b <<= MP_SET_CHUNK_BITS;
+
+ /* ensure that digits are not clamped off */
+ a->used += 1;
+ }
+
+ /* clamp digits */
+ fp_clamp(a);
+}
+
+/* check if a bit is set */
+int fp_is_bit_set (fp_int *a, fp_digit b)
+{
+ fp_digit i;
+
+ if (b > FP_MAX_BITS)
+ return 0;
+ else
+ i = b/DIGIT_BIT;
+
+ if ((fp_digit)a->used < i)
+ return 0;
+
+ return (int)((a->dp[i] >> b%DIGIT_BIT) & (fp_digit)1);
+}
+
+/* set the b bit of a */
+int fp_set_bit (fp_int * a, fp_digit b)
+{
+ fp_digit i;
+
+ if (b > FP_MAX_BITS)
+ return 0;
+ else
+ i = b/DIGIT_BIT;
+
+ /* set the used count of where the bit will go if required */
+ if (a->used < (int)(i+1))
+ a->used = (int)(i+1);
+
+ /* put the single bit in its place */
+ a->dp[i] |= ((fp_digit)1) << (b % DIGIT_BIT);
+
+ return MP_OKAY;
+}
+
int fp_count_bits (fp_int * a)
{
int r;
@@ -1829,6 +3304,7 @@ int fp_count_bits (fp_int * a)
++r;
q >>= ((fp_digit) 1);
}
+
return r;
}
@@ -1853,36 +3329,38 @@ int fp_leading_bit(fp_int *a)
void fp_lshd(fp_int *a, int x)
{
- int y;
+ int y;
- /* move up and truncate as required */
- y = MIN(a->used + x - 1, (int)(FP_SIZE-1));
+ /* move up and truncate as required */
+ y = MIN(a->used + x - 1, (int)(FP_SIZE-1));
- /* store new size */
- a->used = y + 1;
+ /* store new size */
+ a->used = y + 1;
- /* move digits */
- for (; y >= x; y--) {
- a->dp[y] = a->dp[y-x];
- }
-
- /* zero lower digits */
- for (; y >= 0; y--) {
- a->dp[y] = 0;
- }
+ /* move digits */
+ for (; y >= x; y--) {
+ a->dp[y] = a->dp[y-x];
+ }
- /* clamp digits */
- fp_clamp(a);
+ /* zero lower digits */
+ for (; y >= 0; y--) {
+ a->dp[y] = 0;
+ }
+
+ /* clamp digits */
+ fp_clamp(a);
}
/* right shift by bit count */
void fp_rshb(fp_int *c, int x)
{
- register fp_digit *tmpc, mask, shift;
+ fp_digit *tmpc, mask, shift;
fp_digit r, rr;
fp_digit D = x;
+ if (fp_iszero(c)) return;
+
/* mask */
mask = (((fp_digit)1) << D) - 1;
@@ -1905,6 +3383,9 @@ void fp_rshb(fp_int *c, int x)
/* set the carry to the carry bits of the current word found above */
r = rr;
}
+
+ /* clamp digits */
+ fp_clamp(c);
}
@@ -1927,7 +3408,7 @@ void fp_rshd(fp_int *a, int x)
for (; y < a->used; y++) {
a->dp[y] = 0;
}
-
+
/* decrement count */
a->used -= x;
fp_clamp(a);
@@ -1952,16 +3433,40 @@ void fp_reverse (unsigned char *s, int len)
/* c = a - b */
-void fp_sub_d(fp_int *a, fp_digit b, fp_int *c)
+int fp_sub_d(fp_int *a, fp_digit b, fp_int *c)
{
- fp_int tmp;
- fp_init(&tmp);
- fp_set(&tmp, b);
- fp_sub(a, &tmp, c);
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int tmp[1];
+#else
+ fp_int *tmp;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+ if (tmp == NULL)
+ return FP_MEM;
+#endif
+
+ fp_init(tmp);
+ fp_set(tmp, b);
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
+ if (c->size < FP_SIZE) {
+ fp_sub(a, tmp, tmp);
+ fp_copy(tmp, c);
+ } else
+#endif
+ {
+ fp_sub(a, tmp, c);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return FP_OKAY;
}
-/* CyaSSL callers from normal lib */
+/* wolfSSL callers from normal lib */
/* init a new mp_int */
int mp_init (mp_int * a)
@@ -1971,30 +3476,92 @@ int mp_init (mp_int * a)
return MP_OKAY;
}
-#ifdef ALT_ECC_SIZE
void fp_init(fp_int *a)
{
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
a->size = FP_SIZE;
+#endif
+#ifdef HAVE_WOLF_BIGINT
+ wc_bigint_init(&a->raw);
+#endif
fp_zero(a);
}
void fp_zero(fp_int *a)
{
+ int size;
a->used = 0;
a->sign = FP_ZPOS;
- XMEMSET(a->dp, 0, a->size * sizeof(fp_digit));
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
+ size = a->size;
+#else
+ size = FP_SIZE;
+#endif
+ XMEMSET(a->dp, 0, size * sizeof(fp_digit));
}
+
+void fp_clear(fp_int *a)
+{
+ int size;
+ a->used = 0;
+ a->sign = FP_ZPOS;
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
+ size = a->size;
+#else
+ size = FP_SIZE;
+#endif
+ XMEMSET(a->dp, 0, size * sizeof(fp_digit));
+ fp_free(a);
+}
+
+void fp_forcezero (mp_int * a)
+{
+ int size;
+ a->used = 0;
+ a->sign = FP_ZPOS;
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
+ size = a->size;
+#else
+ size = FP_SIZE;
#endif
+ ForceZero(a->dp, size * sizeof(fp_digit));
+#ifdef HAVE_WOLF_BIGINT
+ wc_bigint_zero(&a->raw);
+#endif
+ fp_free(a);
+}
+
+void mp_forcezero (mp_int * a)
+{
+ fp_forcezero(a);
+}
+
+void fp_free(fp_int* a)
+{
+#ifdef HAVE_WOLF_BIGINT
+ wc_bigint_free(&a->raw);
+#else
+ (void)a;
+#endif
+}
/* clear one (frees) */
void mp_clear (mp_int * a)
{
- fp_zero(a);
+ if (a == NULL)
+ return;
+ fp_clear(a);
+}
+
+void mp_free(mp_int* a)
+{
+ fp_free(a);
}
/* handle up to 6 inits */
-int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e, mp_int* f)
+int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d,
+ mp_int* e, mp_int* f)
{
if (a)
fp_init(a);
@@ -2027,40 +3594,103 @@ int mp_sub (mp_int * a, mp_int * b, mp_int * c)
}
/* high level multiplication (handles sign) */
+#if defined(FREESCALE_LTC_TFM)
+int wolfcrypt_mp_mul(mp_int * a, mp_int * b, mp_int * c)
+#else
int mp_mul (mp_int * a, mp_int * b, mp_int * c)
+#endif
{
- fp_mul(a, b, c);
+ return fp_mul(a, b, c);
+}
+
+int mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
+{
+ fp_mul_d(a, b, c);
return MP_OKAY;
}
/* d = a * b (mod c) */
+#if defined(FREESCALE_LTC_TFM)
+int wolfcrypt_mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
+#else
int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
+#endif
+{
+ #if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
+ !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
+ int A = fp_count_bits (a);
+ int B = fp_count_bits (b);
+
+ if( A >= ESP_RSA_MULM_BITS && B >= ESP_RSA_MULM_BITS)
+ return esp_mp_mulmod(a, b, c, d);
+ else
+ #endif
+ return fp_mulmod(a, b, c, d);
+}
+
+/* d = a - b (mod c) */
+int mp_submod(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
+{
+ return fp_submod(a, b, c, d);
+}
+
+/* d = a + b (mod c) */
+int mp_addmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
{
- return fp_mulmod(a, b, c, d);
+ return fp_addmod(a, b, c, d);
}
/* c = a mod b, 0 <= c < b */
+#if defined(FREESCALE_LTC_TFM)
+int wolfcrypt_mp_mod (mp_int * a, mp_int * b, mp_int * c)
+#else
int mp_mod (mp_int * a, mp_int * b, mp_int * c)
+#endif
{
return fp_mod (a, b, c);
}
/* hac 14.61, pp608 */
+#if defined(FREESCALE_LTC_TFM)
+int wolfcrypt_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
+#else
int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
+#endif
{
return fp_invmod(a, b, c);
}
+/* hac 14.61, pp608 */
+int mp_invmod_mont_ct (mp_int * a, mp_int * b, mp_int * c, mp_digit mp)
+{
+ return fp_invmod_mont_ct(a, b, c, mp);
+}
+
/* this is a shell function that calls either the normal or Montgomery
* exptmod functions. Originally the call to the montgomery code was
- * embedded in the normal function but that wasted alot of stack space
+ * embedded in the normal function but that wasted a lot of stack space
* for nothing (since 99% of the time the Montgomery code would be called)
*/
+#if defined(FREESCALE_LTC_TFM)
+int wolfcrypt_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
+#else
int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
+#endif
{
return fp_exptmod(G, X, P, Y);
}
+int mp_exptmod_ex (mp_int * G, mp_int * X, int digits, mp_int * P, mp_int * Y)
+{
+ return fp_exptmod_ex(G, X, digits, P, Y);
+}
+
+int mp_exptmod_nct (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
+{
+ return fp_exptmod_nct(G, X, P, Y);
+}
+
+
/* compare two ints (signed)*/
int mp_cmp (mp_int * a, mp_int * b)
{
@@ -2079,35 +3709,85 @@ int mp_unsigned_bin_size (mp_int * a)
return fp_unsigned_bin_size(a);
}
+int mp_to_unsigned_bin_at_pos(int x, fp_int *t, unsigned char *b)
+{
+ return fp_to_unsigned_bin_at_pos(x, t, b);
+}
+
/* store in unsigned [big endian] format */
int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
{
- fp_to_unsigned_bin(a,b);
- return MP_OKAY;
+ return fp_to_unsigned_bin(a,b);
}
+int mp_to_unsigned_bin_len(mp_int * a, unsigned char *b, int c)
+{
+ return fp_to_unsigned_bin_len(a, b, c);
+}
/* reads a unsigned char array, assumes the msb is stored first [big endian] */
int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
{
- fp_read_unsigned_bin(a, (unsigned char *)b, c);
+ fp_read_unsigned_bin(a, b, c);
return MP_OKAY;
}
int mp_sub_d(fp_int *a, fp_digit b, fp_int *c)
{
- fp_sub_d(a, b, c);
+ return fp_sub_d(a, b, c);
+}
+
+int mp_mul_2d(fp_int *a, int b, fp_int *c)
+{
+ fp_mul_2d(a, b, c);
+ return MP_OKAY;
+}
+
+int mp_2expt(fp_int* a, int b)
+{
+ fp_2expt(a, b);
return MP_OKAY;
}
+int mp_div(fp_int * a, fp_int * b, fp_int * c, fp_int * d)
+{
+ return fp_div(a, b, c, d);
+}
-#ifdef ALT_ECC_SIZE
-void fp_copy(fp_int *a, fp_int* b)
+int mp_div_2d(fp_int* a, int b, fp_int* c, fp_int* d)
{
+ fp_div_2d(a, b, c, d);
+ return MP_OKAY;
+}
+
+void fp_copy(fp_int *a, fp_int *b)
+{
+ /* if source and destination are different */
if (a != b) {
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
+ /* verify a will fit in b */
+ if (b->size >= a->used) {
+ int x, oldused;
+ oldused = b->used;
+ b->used = a->used;
+ b->sign = a->sign;
+
+ XMEMCPY(b->dp, a->dp, a->used * sizeof(fp_digit));
+
+ /* zero any excess digits on the destination that we didn't write to */
+ for (x = b->used; x >= 0 && x < oldused; x++) {
+ b->dp[x] = 0;
+ }
+ }
+ else {
+ /* TODO: Handle error case */
+ }
+#else
+ /* all dp's are same size, so do straight copy */
b->used = a->used;
b->sign = a->sign;
- XMEMCPY(b->dp, a->dp, a->used * sizeof(fp_digit));
+ XMEMCPY(b->dp, a->dp, FP_SIZE * sizeof(fp_digit));
+#endif
}
}
@@ -2118,67 +3798,98 @@ void fp_init_copy(fp_int *a, fp_int* b)
fp_copy(b, a);
}
}
-#endif
-/* fast math conversion */
+/* fast math wrappers */
int mp_copy(fp_int* a, fp_int* b)
{
fp_copy(a, b);
return MP_OKAY;
}
-
-/* fast math conversion */
int mp_isodd(mp_int* a)
{
return fp_isodd(a);
}
-
-/* fast math conversion */
int mp_iszero(mp_int* a)
{
return fp_iszero(a);
}
-
-/* fast math conversion */
int mp_count_bits (mp_int* a)
{
return fp_count_bits(a);
}
-
int mp_leading_bit (mp_int* a)
{
return fp_leading_bit(a);
}
-
-/* fast math conversion */
void mp_rshb (mp_int* a, int x)
{
fp_rshb(a, x);
}
+void mp_rshd (mp_int* a, int x)
+{
+ fp_rshd(a, x);
+}
-/* fast math wrappers */
-int mp_set_int(fp_int *a, fp_digit b)
+int mp_set_int(mp_int *a, unsigned long b)
{
- fp_set(a, b);
+ fp_set_int(a, b);
return MP_OKAY;
}
+int mp_is_bit_set (mp_int *a, mp_digit b)
+{
+ return fp_is_bit_set(a, b);
+}
-#if defined(WOLFSSL_KEY_GEN) || defined (HAVE_ECC)
+int mp_set_bit(mp_int *a, mp_digit b)
+{
+ return fp_set_bit(a, b);
+}
+
+#if defined(WOLFSSL_KEY_GEN) || defined (HAVE_ECC) || !defined(NO_DH) || \
+ !defined(NO_DSA) || !defined(NO_RSA)
/* c = a * a (mod b) */
int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c)
{
- fp_int tmp;
- fp_init(&tmp);
- fp_sqr(a, &tmp);
- return fp_mod(&tmp, b, c);
+ int err;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int t[1];
+#else
+ fp_int *t;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+ if (t == NULL)
+ return FP_MEM;
+#endif
+
+ fp_init(t);
+ err = fp_sqr(a, t);
+ if (err == FP_OKAY) {
+ #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
+ if (c->size < FP_SIZE) {
+ err = fp_mod(t, b, t);
+ fp_copy(t, c);
+ }
+ else
+ #endif
+ {
+ err = fp_mod(t, b, c);
+ }
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return err;
}
/* fast math conversion */
@@ -2197,7 +3908,37 @@ int mp_montgomery_calc_normalization(mp_int *a, mp_int *b)
#endif /* WOLFSSL_KEYGEN || HAVE_ECC */
-#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
+#if defined(WC_MP_TO_RADIX) || !defined(NO_DH) || !defined(NO_DSA) || \
+ !defined(NO_RSA)
+
+#ifdef WOLFSSL_KEY_GEN
+/* swap the elements of two integers, for cases where you can't simply swap the
+ * mp_int pointers around
+ */
+static int fp_exch (fp_int * a, fp_int * b)
+{
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int t[1];
+#else
+ fp_int *t;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+ if (t == NULL)
+ return FP_MEM;
+#endif
+
+ *t = *a;
+ *a = *b;
+ *b = *t;
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return FP_OKAY;
+}
+#endif
static const int lnz[16] = {
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
@@ -2210,12 +3951,12 @@ int fp_cnt_lsb(fp_int *a)
fp_digit q, qq;
/* easy out */
- if (fp_iszero(a) == 1) {
+ if (fp_iszero(a) == FP_YES) {
return 0;
}
/* scan lower digits until non-zero */
- for (x = 0; x < a->used && a->dp[x] == 0; x++);
+ for (x = 0; x < a->used && a->dp[x] == 0; x++) {}
q = a->dp[x];
x *= DIGIT_BIT;
@@ -2231,30 +3972,32 @@ int fp_cnt_lsb(fp_int *a)
}
-
-
static int s_is_power_of_two(fp_digit b, int *p)
{
int x;
/* fast return if no power of two */
if ((b==0) || (b & (b-1))) {
- return 0;
+ return FP_NO;
}
for (x = 0; x < DIGIT_BIT; x++) {
if (b == (((fp_digit)1)<<x)) {
*p = x;
- return 1;
+ return FP_YES;
}
}
- return 0;
+ return FP_NO;
}
/* a/b => cb + d == a */
static int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d)
{
- fp_int q;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int q[1];
+#else
+ fp_int *q;
+#endif
fp_word w;
fp_digit t;
int ix;
@@ -2265,7 +4008,7 @@ static int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d)
}
/* quick outs */
- if (b == 1 || fp_iszero(a) == 1) {
+ if (b == 1 || fp_iszero(a) == FP_YES) {
if (d != NULL) {
*d = 0;
}
@@ -2276,7 +4019,7 @@ static int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d)
}
/* power of two ? */
- if (s_is_power_of_two(b, &ix) == 1) {
+ if (s_is_power_of_two(b, &ix) == FP_YES) {
if (d != NULL) {
*d = a->dp[0] & ((((fp_digit)1)<<ix) - 1);
}
@@ -2286,33 +4029,45 @@ static int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d)
return FP_OKAY;
}
- /* no easy answer [c'est la vie]. Just division */
- fp_init(&q);
-
- q.used = a->used;
- q.sign = a->sign;
+#ifdef WOLFSSL_SMALL_STACK
+ q = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+ if (q == NULL)
+ return FP_MEM;
+#endif
+
+ fp_init(q);
+
+ if (c != NULL) {
+ q->used = a->used;
+ q->sign = a->sign;
+ }
+
w = 0;
for (ix = a->used - 1; ix >= 0; ix--) {
w = (w << ((fp_word)DIGIT_BIT)) | ((fp_word)a->dp[ix]);
-
+
if (w >= b) {
t = (fp_digit)(w / b);
w -= ((fp_word)t) * ((fp_word)b);
} else {
t = 0;
}
- q.dp[ix] = (fp_digit)t;
+ if (c != NULL)
+ q->dp[ix] = (fp_digit)t;
}
-
+
if (d != NULL) {
*d = (fp_digit)w;
}
-
+
if (c != NULL) {
- fp_clamp(&q);
- fp_copy(&q, c);
+ fp_clamp(q);
+ fp_copy(q, c);
}
-
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
return FP_OKAY;
}
@@ -2328,101 +4083,148 @@ int mp_mod_d(fp_int *a, fp_digit b, fp_digit *c)
return fp_mod_d(a, b, c);
}
-#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) */
-
-#ifdef WOLFSSL_KEY_GEN
-
-void fp_gcd(fp_int *a, fp_int *b, fp_int *c);
-void fp_lcm(fp_int *a, fp_int *b, fp_int *c);
-int fp_isprime(fp_int *a);
+#endif /* WC_MP_TO_RADIX || !NO_DH || !NO_DSA || !NO_RSA */
-int mp_gcd(fp_int *a, fp_int *b, fp_int *c)
-{
- fp_gcd(a, b, c);
- return MP_OKAY;
-}
+#if !defined(NO_DH) || !defined(NO_DSA) || !defined(NO_RSA) || \
+ defined(WOLFSSL_KEY_GEN)
-int mp_lcm(fp_int *a, fp_int *b, fp_int *c)
-{
- fp_lcm(a, b, c);
- return MP_OKAY;
-}
+static int fp_isprime_ex(fp_int *a, int t, int* result);
int mp_prime_is_prime(mp_int* a, int t, int* result)
{
- (void)t;
- *result = fp_isprime(a);
- return MP_OKAY;
+ return fp_isprime_ex(a, t, result);
}
-/* Miller-Rabin test of "a" to the base of "b" as described in
+/* Miller-Rabin test of "a" to the base of "b" as described in
* HAC pp. 139 Algorithm 4.24
*
* Sets result to 0 if definitely composite or 1 if probably prime.
- * Randomly the chance of error is no more than 1/4 and often
+ * Randomly the chance of error is no more than 1/4 and often
* very much lower.
*/
-static void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result)
+static int fp_prime_miller_rabin_ex(fp_int * a, fp_int * b, int *result,
+ fp_int *n1, fp_int *y, fp_int *r)
{
- fp_int n1, y, r;
- int s, j;
+ int s, j;
+ int err;
/* default */
*result = FP_NO;
/* ensure b > 1 */
if (fp_cmp_d(b, 1) != FP_GT) {
- return;
- }
+ return FP_OKAY;
+ }
/* get n1 = a - 1 */
- fp_init_copy(&n1, a);
- fp_sub_d(&n1, 1, &n1);
+ fp_copy(a, n1);
+ err = fp_sub_d(n1, 1, n1);
+ if (err != FP_OKAY) {
+ return err;
+ }
/* set 2**s * r = n1 */
- fp_init_copy(&r, &n1);
+ fp_copy(n1, r);
/* count the number of least significant bits
* which are zero
*/
- s = fp_cnt_lsb(&r);
+ s = fp_cnt_lsb(r);
/* now divide n - 1 by 2**s */
- fp_div_2d (&r, s, &r, NULL);
+ fp_div_2d (r, s, r, NULL);
/* compute y = b**r mod a */
- fp_init(&y);
- fp_exptmod(b, &r, a, &y);
+ fp_zero(y);
+#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
+ defined(WOLFSSL_HAVE_SP_DH)
+#ifndef WOLFSSL_SP_NO_2048
+ if (fp_count_bits(a) == 1024)
+ sp_ModExp_1024(b, r, a, y);
+ else if (fp_count_bits(a) == 2048)
+ sp_ModExp_2048(b, r, a, y);
+ else
+#endif
+#ifndef WOLFSSL_SP_NO_3072
+ if (fp_count_bits(a) == 1536)
+ sp_ModExp_1536(b, r, a, y);
+ else if (fp_count_bits(a) == 3072)
+ sp_ModExp_3072(b, r, a, y);
+ else
+#endif
+#ifdef WOLFSSL_SP_4096
+ if (fp_count_bits(a) == 4096)
+ sp_ModExp_4096(b, r, a, y);
+ else
+#endif
+#endif
+ fp_exptmod(b, r, a, y);
/* if y != 1 and y != n1 do */
- if (fp_cmp_d (&y, 1) != FP_EQ && fp_cmp (&y, &n1) != FP_EQ) {
+ if (fp_cmp_d (y, 1) != FP_EQ && fp_cmp (y, n1) != FP_EQ) {
j = 1;
/* while j <= s-1 and y != n1 */
- while ((j <= (s - 1)) && fp_cmp (&y, &n1) != FP_EQ) {
- fp_sqrmod (&y, a, &y);
+ while ((j <= (s - 1)) && fp_cmp (y, n1) != FP_EQ) {
+ fp_sqrmod (y, a, y);
/* if y == 1 then composite */
- if (fp_cmp_d (&y, 1) == FP_EQ) {
- return;
+ if (fp_cmp_d (y, 1) == FP_EQ) {
+ return FP_OKAY;
}
++j;
}
/* if y != n1 then composite */
- if (fp_cmp (&y, &n1) != FP_EQ) {
- return;
+ if (fp_cmp (y, n1) != FP_EQ) {
+ return FP_OKAY;
}
}
/* probably prime now */
*result = FP_YES;
+
+ return FP_OKAY;
+}
+
+static int fp_prime_miller_rabin(fp_int * a, fp_int * b, int *result)
+{
+ int err;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int n1[1], y[1], r[1];
+#else
+ fp_int *n1, *y, *r;
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ n1 = (fp_int*)XMALLOC(sizeof(fp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT);
+ if (n1 == NULL) {
+ return FP_MEM;
+ }
+ y = &n1[1]; r = &n1[2];
+#endif
+
+ fp_init(n1);
+ fp_init(y);
+ fp_init(r);
+
+ err = fp_prime_miller_rabin_ex(a, b, result, n1, y, r);
+
+ fp_clear(n1);
+ fp_clear(y);
+ fp_clear(r);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(n1, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+
+ return err;
}
/* a few primes */
-static const fp_digit primes[256] = {
+static const fp_digit primes[FP_PRIME_SIZE] = {
0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
@@ -2460,104 +4262,427 @@ static const fp_digit primes[256] = {
0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
};
-int fp_isprime(fp_int *a)
+int fp_isprime_ex(fp_int *a, int t, int* result)
{
- fp_int b;
- fp_digit d = 0;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int b[1];
+#else
+ fp_int *b;
+#endif
+ fp_digit d;
int r, res;
+ if (t <= 0 || t > FP_PRIME_SIZE) {
+ *result = FP_NO;
+ return FP_VAL;
+ }
+
+ if (fp_isone(a)) {
+ *result = FP_NO;
+ return FP_OKAY;
+ }
+
+ /* check against primes table */
+ for (r = 0; r < FP_PRIME_SIZE; r++) {
+ if (fp_cmp_d(a, primes[r]) == FP_EQ) {
+ *result = FP_YES;
+ return FP_OKAY;
+ }
+ }
+
/* do trial division */
- for (r = 0; r < 256; r++) {
- fp_mod_d(a, primes[r], &d);
- if (d == 0) {
- return FP_NO;
+ for (r = 0; r < FP_PRIME_SIZE; r++) {
+ res = fp_mod_d(a, primes[r], &d);
+ if (res != MP_OKAY || d == 0) {
+ *result = FP_NO;
+ return FP_OKAY;
}
}
- /* now do 8 miller rabins */
- fp_init(&b);
- for (r = 0; r < 8; r++) {
- fp_set(&b, primes[r]);
- fp_prime_miller_rabin(a, &b, &res);
+#ifdef WOLFSSL_SMALL_STACK
+ b = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+ if (b == NULL)
+ return FP_MEM;
+#endif
+ /* now do 't' miller rabins */
+ fp_init(b);
+ for (r = 0; r < t; r++) {
+ fp_set(b, primes[r]);
+ fp_prime_miller_rabin(a, b, &res);
if (res == FP_NO) {
- return FP_NO;
+ *result = FP_NO;
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
+ #endif
+ return FP_OKAY;
}
}
- return FP_YES;
+ *result = FP_YES;
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return FP_OKAY;
+}
+
+
+int mp_prime_is_prime_ex(mp_int* a, int t, int* result, WC_RNG* rng)
+{
+ int ret = FP_YES;
+ fp_digit d;
+ int i;
+
+ if (a == NULL || result == NULL || rng == NULL)
+ return FP_VAL;
+
+ if (fp_isone(a)) {
+ *result = FP_NO;
+ return FP_OKAY;
+ }
+
+ /* check against primes table */
+ for (i = 0; i < FP_PRIME_SIZE; i++) {
+ if (fp_cmp_d(a, primes[i]) == FP_EQ) {
+ *result = FP_YES;
+ return FP_OKAY;
+ }
+ }
+
+ /* do trial division */
+ for (i = 0; i < FP_PRIME_SIZE; i++) {
+ if (fp_mod_d(a, primes[i], &d) == MP_OKAY) {
+ if (d == 0) {
+ *result = FP_NO;
+ return FP_OKAY;
+ }
+ }
+ else
+ return FP_VAL;
+ }
+
+#ifndef WC_NO_RNG
+ /* now do a miller rabin with up to t random numbers, this should
+ * give a (1/4)^t chance of a false prime. */
+ {
+ #ifndef WOLFSSL_SMALL_STACK
+ fp_int b[1], c[1], n1[1], y[1], r[1];
+ byte base[FP_MAX_PRIME_SIZE];
+ #else
+ fp_int *b, *c, *n1, *y, *r;
+ byte* base;
+ #endif
+ word32 baseSz;
+ int err;
+
+ baseSz = fp_count_bits(a);
+ /* The base size is the number of bits / 8. One is added if the number
+ * of bits isn't an even 8. */
+ baseSz = (baseSz / 8) + ((baseSz % 8) ? 1 : 0);
+
+ #ifndef WOLFSSL_SMALL_STACK
+ if (baseSz > sizeof(base))
+ return FP_MEM;
+ #else
+ base = (byte*)XMALLOC(baseSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (base == NULL)
+ return FP_MEM;
+
+ b = (fp_int*)XMALLOC(sizeof(fp_int) * 5, NULL, DYNAMIC_TYPE_BIGINT);
+ if (b == NULL) {
+ return FP_MEM;
+ }
+ c = &b[1]; n1 = &b[2]; y= &b[3]; r = &b[4];
+ #endif
+
+ fp_init(b);
+ fp_init(c);
+ fp_init(n1);
+ fp_init(y);
+ fp_init(r);
+
+ err = fp_sub_d(a, 2, c);
+ if (err != FP_OKAY) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
+ XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return err;
+ }
+ while (t > 0) {
+ if ((err = wc_RNG_GenerateBlock(rng, base, baseSz)) != 0) {
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
+ XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return err;
+ }
+
+ fp_read_unsigned_bin(b, base, baseSz);
+ if (fp_cmp_d(b, 2) != FP_GT || fp_cmp(b, c) != FP_LT) {
+ continue;
+ }
+
+ fp_prime_miller_rabin_ex(a, b, &ret, n1, y, r);
+ if (ret == FP_NO)
+ break;
+ fp_zero(b);
+ t--;
+ }
+
+ fp_clear(n1);
+ fp_clear(y);
+ fp_clear(r);
+ fp_clear(b);
+ fp_clear(c);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
+ XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ }
+#else
+ (void)t;
+#endif /* !WC_NO_RNG */
+
+ *result = ret;
+ return FP_OKAY;
}
+#endif /* !NO_RSA || !NO_DSA || !NO_DH || WOLFSSL_KEY_GEN */
+#ifdef WOLFSSL_KEY_GEN
+
+static int fp_gcd(fp_int *a, fp_int *b, fp_int *c);
+static int fp_lcm(fp_int *a, fp_int *b, fp_int *c);
+static int fp_randprime(fp_int* N, int len, WC_RNG* rng, void* heap);
+
+int mp_gcd(fp_int *a, fp_int *b, fp_int *c)
+{
+ return fp_gcd(a, b, c);
+}
+
+
+int mp_lcm(fp_int *a, fp_int *b, fp_int *c)
+{
+ return fp_lcm(a, b, c);
+}
+
+int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
+{
+ int err;
+
+ err = fp_randprime(N, len, rng, heap);
+ switch(err) {
+ case FP_VAL:
+ return MP_VAL;
+ case FP_MEM:
+ return MP_MEM;
+ default:
+ break;
+ }
+
+ return MP_OKAY;
+}
+
+int mp_exch (mp_int * a, mp_int * b)
+{
+ return fp_exch(a, b);
+}
+
+
+
+int fp_randprime(fp_int* N, int len, WC_RNG* rng, void* heap)
+{
+ static const int USE_BBS = 1;
+ int err, type;
+ int isPrime = FP_YES;
+ /* Assume the candidate is probably prime and then test until
+ * it is proven composite. */
+ byte* buf;
+
+ (void)heap;
+
+ /* get type */
+ if (len < 0) {
+ type = USE_BBS;
+ len = -len;
+ } else {
+ type = 0;
+ }
+
+ /* allow sizes between 2 and 512 bytes for a prime size */
+ if (len < 2 || len > 512) {
+ return FP_VAL;
+ }
+
+ /* allocate buffer to work with */
+ buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (buf == NULL) {
+ return FP_MEM;
+ }
+ XMEMSET(buf, 0, len);
+
+ do {
+#ifdef SHOW_GEN
+ printf(".");
+ fflush(stdout);
+#endif
+ /* generate value */
+ err = wc_RNG_GenerateBlock(rng, buf, len);
+ if (err != 0) {
+ XFREE(buf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+ return FP_VAL;
+ }
+
+ /* munge bits */
+ buf[0] |= 0x80 | 0x40;
+ buf[len-1] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
+
+ /* load value */
+ fp_read_unsigned_bin(N, buf, len);
+
+ /* test */
+ /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
+ * of a 1024-bit candidate being a false positive, when it is our
+ * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
+ * Using 8 because we've always used 8 */
+ mp_prime_is_prime_ex(N, 8, &isPrime, rng);
+ } while (isPrime == FP_NO);
+
+ XMEMSET(buf, 0, len);
+ XFREE(buf, heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+ return FP_OKAY;
+}
+
/* c = [a, b] */
-void fp_lcm(fp_int *a, fp_int *b, fp_int *c)
+int fp_lcm(fp_int *a, fp_int *b, fp_int *c)
{
- fp_int t1, t2;
+ int err;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int t[2];
+#else
+ fp_int *t;
+#endif
- fp_init(&t1);
- fp_init(&t2);
- fp_gcd(a, b, &t1);
- if (fp_cmp_mag(a, b) == FP_GT) {
- fp_div(a, &t1, &t2, NULL);
- fp_mul(b, &t2, c);
- } else {
- fp_div(b, &t1, &t2, NULL);
- fp_mul(a, &t2, c);
- }
+#ifdef WOLFSSL_SMALL_STACK
+ t = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT);
+ if (t == NULL) {
+ return FP_MEM;
+ }
+#endif
+
+ fp_init(&t[0]);
+ fp_init(&t[1]);
+ err = fp_gcd(a, b, &t[0]);
+ if (err == FP_OKAY) {
+ if (fp_cmp_mag(a, b) == FP_GT) {
+ err = fp_div(a, &t[0], &t[1], NULL);
+ if (err == FP_OKAY)
+ err = fp_mul(b, &t[1], c);
+ } else {
+ err = fp_div(b, &t[0], &t[1], NULL);
+ if (err == FP_OKAY)
+ err = fp_mul(a, &t[1], c);
+ }
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return err;
}
/* c = (a, b) */
-void fp_gcd(fp_int *a, fp_int *b, fp_int *c)
+int fp_gcd(fp_int *a, fp_int *b, fp_int *c)
{
- fp_int u, v, r;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int u[1], v[1], r[1];
+#else
+ fp_int *u, *v, *r;
+#endif
/* either zero than gcd is the largest */
- if (fp_iszero (a) == 1 && fp_iszero (b) == 0) {
+ if (fp_iszero (a) == FP_YES && fp_iszero (b) == FP_NO) {
fp_abs (b, c);
- return;
+ return FP_OKAY;
}
- if (fp_iszero (a) == 0 && fp_iszero (b) == 1) {
+ if (fp_iszero (a) == FP_NO && fp_iszero (b) == FP_YES) {
fp_abs (a, c);
- return;
+ return FP_OKAY;
}
/* optimized. At this point if a == 0 then
* b must equal zero too
*/
- if (fp_iszero (a) == 1) {
+ if (fp_iszero (a) == FP_YES) {
fp_zero(c);
- return;
+ return FP_OKAY;
}
+#ifdef WOLFSSL_SMALL_STACK
+ u = (fp_int*)XMALLOC(sizeof(fp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT);
+ if (u == NULL) {
+ return FP_MEM;
+ }
+ v = &u[1]; r = &u[2];
+#endif
+
/* sort inputs */
if (fp_cmp_mag(a, b) != FP_LT) {
- fp_init_copy(&u, a);
- fp_init_copy(&v, b);
+ fp_init_copy(u, a);
+ fp_init_copy(v, b);
} else {
- fp_init_copy(&u, b);
- fp_init_copy(&v, a);
+ fp_init_copy(u, b);
+ fp_init_copy(v, a);
}
-
- fp_init(&r);
- while (fp_iszero(&v) == FP_NO) {
- fp_mod(&u, &v, &r);
- fp_copy(&v, &u);
- fp_copy(&r, &v);
+
+ u->sign = FP_ZPOS;
+ v->sign = FP_ZPOS;
+
+ fp_init(r);
+ while (fp_iszero(v) == FP_NO) {
+ fp_mod(u, v, r);
+ fp_copy(v, u);
+ fp_copy(r, v);
}
- fp_copy(&u, c);
+ fp_copy(u, c);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(u, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return FP_OKAY;
}
#endif /* WOLFSSL_KEY_GEN */
-#if defined(HAVE_ECC) || !defined(NO_PWDBASED)
+#if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(OPENSSL_EXTRA) || \
+ defined(WC_RSA_BLINDING) || !defined(NO_DSA) || \
+ (!defined(NO_RSA) && !defined(NO_RSA_BOUNDS_CHECK))
/* c = a + b */
void fp_add_d(fp_int *a, fp_digit b, fp_int *c)
{
+#ifndef WOLFSSL_SMALL_STACK
fp_int tmp;
fp_init(&tmp);
fp_set(&tmp, b);
- fp_add(a,&tmp,c);
+ fp_add(a, &tmp, c);
+#else
+ int i;
+ fp_word t = b;
+
+ fp_copy(a, c);
+ for (i = 0; t != 0 && i < FP_SIZE && i < c->used; i++) {
+ t += c->dp[i];
+ c->dp[i] = (fp_digit)t;
+ t >>= DIGIT_BIT;
+ }
+ if (i == c->used && i < FP_SIZE && t != 0) {
+ c->dp[i] = t;
+ c->used++;
+ }
+#endif
}
/* external compatibility */
@@ -2567,19 +4692,78 @@ int mp_add_d(fp_int *a, fp_digit b, fp_int *c)
return MP_OKAY;
}
-#endif /* HAVE_ECC || !NO_PWDBASED */
+#endif /* HAVE_ECC || !NO_PWDBASED || OPENSSL_EXTRA || WC_RSA_BLINDING ||
+ !NO_DSA || (!NO_RSA && !NO_RSA_BOUNDS_CHECK) */
-#ifdef HAVE_ECC
+#if !defined(NO_DSA) || defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || \
+ defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) || \
+ defined(DEBUG_WOLFSSL) || defined(OPENSSL_EXTRA) || defined(WC_MP_TO_RADIX)
/* chars used in radix conversions */
-static const char *fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
+static wcchar fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz+/";
+#endif
+
+#if !defined(NO_DSA) || defined(HAVE_ECC)
+#if DIGIT_BIT == 64 || DIGIT_BIT == 32
+static int fp_read_radix_16(fp_int *a, const char *str)
+{
+ int i, j, k, neg;
+ char ch;
+
+ /* if the leading digit is a
+ * minus set the sign to negative.
+ */
+ if (*str == '-') {
+ ++str;
+ neg = FP_NEG;
+ } else {
+ neg = FP_ZPOS;
+ }
+
+ j = 0;
+ k = 0;
+ for (i = (int)(XSTRLEN(str) - 1); i >= 0; i--) {
+ ch = str[i];
+ if (ch >= '0' && ch <= '9')
+ ch -= '0';
+ else if (ch >= 'A' && ch <= 'F')
+ ch -= 'A' - 10;
+ else if (ch >= 'a' && ch <= 'f')
+ ch -= 'a' - 10;
+ else
+ return FP_VAL;
+
+ a->dp[k] |= ((fp_digit)ch) << j;
+ j += 4;
+ k += j == DIGIT_BIT;
+ j &= DIGIT_BIT - 1;
+ }
+
+ a->used = k + 1;
+ fp_clamp(a);
+ /* set the sign only if a != 0 */
+ if (fp_iszero(a) != FP_YES) {
+ a->sign = neg;
+ }
+ return FP_OKAY;
+}
+#endif
static int fp_read_radix(fp_int *a, const char *str, int radix)
{
int y, neg;
char ch;
+ /* set the integer to the default of zero */
+ fp_zero (a);
+
+#if DIGIT_BIT == 64 || DIGIT_BIT == 32
+ if (radix == 16)
+ return fp_read_radix_16(a, str);
+#endif
+
/* make sure the radix is ok */
if (radix < 2 || radix > 64) {
return FP_VAL;
@@ -2595,16 +4779,13 @@ static int fp_read_radix(fp_int *a, const char *str, int radix)
neg = FP_ZPOS;
}
- /* set the integer to the default of zero */
- fp_zero (a);
-
/* process each digit of the string */
while (*str) {
- /* if the radix < 36 the conversion is case insensitive
+ /* if the radix <= 36 the conversion is case insensitive
* this allows numbers like 1AB and 1ab to represent the same value
* [e.g. in hex]
*/
- ch = (char) ((radix < 36) ? XTOUPPER((unsigned char)*str) : *str);
+ ch = (char)((radix <= 36) ? XTOUPPER((unsigned char)*str) : *str);
for (y = 0; y < 64; y++) {
if (ch == fp_s_rmap[y]) {
break;
@@ -2637,24 +4818,20 @@ int mp_read_radix(mp_int *a, const char *str, int radix)
return fp_read_radix(a, str, radix);
}
-/* fast math conversion */
-void mp_set(fp_int *a, fp_digit b)
-{
- fp_set(a,b);
-}
+#endif /* !defined(NO_DSA) || defined(HAVE_ECC) */
+
+#ifdef HAVE_ECC
/* fast math conversion */
int mp_sqr(fp_int *A, fp_int *B)
{
- fp_sqr(A, B);
- return MP_OKAY;
+ return fp_sqr(A, B);
}
-
+
/* fast math conversion */
int mp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
{
- fp_montgomery_reduce(a, m, mp);
- return MP_OKAY;
+ return fp_montgomery_reduce(a, m, mp);
}
@@ -2677,25 +4854,215 @@ int mp_init_copy(fp_int * a, fp_int * b)
return MP_OKAY;
}
-
#ifdef HAVE_COMP_KEY
int mp_cnt_lsb(fp_int* a)
{
- fp_cnt_lsb(a);
- return MP_OKAY;
+ return fp_cnt_lsb(a);
}
-int mp_div_2d(fp_int* a, int b, fp_int* c, fp_int* d)
+#endif /* HAVE_COMP_KEY */
+
+#endif /* HAVE_ECC */
+
+#if defined(HAVE_ECC) || !defined(NO_RSA) || !defined(NO_DSA) || \
+ defined(WOLFSSL_KEY_GEN)
+/* fast math conversion */
+int mp_set(fp_int *a, fp_digit b)
{
- fp_div_2d(a, b, c, d);
+ fp_set(a,b);
return MP_OKAY;
}
+#endif
-#endif /* HAVE_COMP_KEY */
+#ifdef WC_MP_TO_RADIX
+/* returns size of ASCII representation */
+int mp_radix_size (mp_int *a, int radix, int *size)
+{
+ int res, digs;
+ fp_digit d;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int t[1];
+#else
+ fp_int *t;
+#endif
-#endif /* HAVE_ECC */
+ *size = 0;
-#endif /* USE_FAST_MATH */
+ /* special case for binary */
+ if (radix == 2) {
+ *size = fp_count_bits (a) + (a->sign == FP_NEG ? 1 : 0) + 1;
+ return FP_YES;
+ }
+
+ /* make sure the radix is in range */
+ if (radix < 2 || radix > 64) {
+ return FP_VAL;
+ }
+
+ if (fp_iszero(a) == MP_YES) {
+ *size = 2;
+ return FP_OKAY;
+ }
+
+ /* digs is the digit count */
+ digs = 0;
+
+ /* if it's negative add one for the sign */
+ if (a->sign == FP_NEG) {
+ ++digs;
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+ if (t == NULL)
+ return FP_MEM;
+#endif
+
+ /* init a copy of the input */
+ fp_init_copy (t, a);
+
+ /* force temp to positive */
+ t->sign = FP_ZPOS;
+
+ /* fetch out all of the digits */
+ while (fp_iszero (t) == FP_NO) {
+ if ((res = fp_div_d (t, (mp_digit) radix, t, &d)) != FP_OKAY) {
+ fp_zero (t);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+ #endif
+ return res;
+ }
+ ++digs;
+ }
+ fp_zero (t);
+
+ /* return digs + 1, the 1 is for the NULL byte that would be required. */
+ *size = digs + 1;
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return FP_OKAY;
+}
+
+/* stores a bignum as a ASCII string in a given radix (2..64) */
+int mp_toradix (mp_int *a, char *str, int radix)
+{
+ int res, digs;
+ fp_digit d;
+ char *_s = str;
+#ifndef WOLFSSL_SMALL_STACK
+ fp_int t[1];
+#else
+ fp_int *t;
+#endif
+
+ /* check range of the radix */
+ if (radix < 2 || radix > 64) {
+ return FP_VAL;
+ }
+ /* quick out if its zero */
+ if (fp_iszero(a) == FP_YES) {
+ *str++ = '0';
+ *str = '\0';
+ return FP_OKAY;
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
+ if (t == NULL)
+ return FP_MEM;
+#endif
+
+ /* init a copy of the input */
+ fp_init_copy (t, a);
+
+ /* if it is negative output a - */
+ if (t->sign == FP_NEG) {
+ ++_s;
+ *str++ = '-';
+ t->sign = FP_ZPOS;
+ }
+
+ digs = 0;
+ while (fp_iszero (t) == FP_NO) {
+ if ((res = fp_div_d (t, (fp_digit) radix, t, &d)) != FP_OKAY) {
+ fp_zero (t);
+ #ifdef WOLFSSL_SMALL_STACK
+ XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+ #endif
+ return res;
+ }
+ *str++ = fp_s_rmap[d];
+ ++digs;
+ }
+#ifndef WC_DISABLE_RADIX_ZERO_PAD
+ /* For hexadecimal output, add zero padding when number of digits is odd */
+ if ((digs & 1) && (radix == 16)) {
+ *str++ = fp_s_rmap[0];
+ ++digs;
+ }
+#endif
+ /* reverse the digits of the string. In this case _s points
+ * to the first digit [excluding the sign] of the number]
+ */
+ fp_reverse ((unsigned char *)_s, digs);
+
+ /* append a NULL so the string is properly terminated */
+ *str = '\0';
+
+ fp_zero (t);
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
+#endif
+ return FP_OKAY;
+}
+
+#ifdef WOLFSSL_DEBUG_MATH
+void mp_dump(const char* desc, mp_int* a, byte verbose)
+{
+ char buffer[FP_SIZE * sizeof(fp_digit) * 2];
+ int size;
+
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
+ size = a->size;
+#else
+ size = FP_SIZE;
+#endif
+
+ printf("%s: ptr=%p, used=%d, sign=%d, size=%d, fpd=%d\n",
+ desc, a, a->used, a->sign, size, (int)sizeof(fp_digit));
+
+ mp_tohex(a, buffer);
+ printf(" %s\n ", buffer);
+
+ if (verbose) {
+ int i;
+ for(i=0; i<size * (int)sizeof(fp_digit); i++) {
+ printf("%x ", *(((byte*)a->dp) + i));
+ }
+ printf("\n");
+ }
+}
+#endif /* WOLFSSL_DEBUG_MATH */
+
+#endif /* WC_MP_TO_RADIX */
+
+
+int mp_abs(mp_int* a, mp_int* b)
+{
+ fp_abs(a, b);
+ return FP_OKAY;
+}
+
+
+int mp_lshd (mp_int * a, int b)
+{
+ fp_lshd(a, b);
+ return FP_OKAY;
+}
+
+#endif /* USE_FAST_MATH */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wc_dsp.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wc_dsp.c
new file mode 100644
index 000000000..594ad0489
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wc_dsp.c
@@ -0,0 +1,327 @@
+/* wc_dsp.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/cpuid.h>
+#include <wolfssl/wolfcrypt/logging.h>
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+#if defined(WOLFSSL_DSP)
+#include "remote.h"
+#include "rpcmem.h"
+static wolfSSL_DSP_Handle_cb handle_function = NULL;
+static remote_handle64 defaultHandle;
+static wolfSSL_Mutex handle_mutex; /* mutex for access to single default handle */
+
+#define WOLFSSL_HANDLE_DONE 1
+#define WOLFSSL_HANDLE_GET 0
+
+/* callback function for setting the default handle in single threaded
+ * use cases */
+static int default_handle_cb(remote_handle64 *handle, int finished, void *ctx)
+{
+ (void)ctx;
+ if (finished == WOLFSSL_HANDLE_DONE) {
+ if (wc_UnLockMutex(&handle_mutex) != 0) {
+ WOLFSSL_MSG("Unlock handle mutex failed");
+ return -1;
+ }
+ }
+ else {
+ if (wc_LockMutex(&handle_mutex) != 0) {
+ WOLFSSL_MSG("Lock handle mutex failed");
+ return -1;
+ }
+ *handle = defaultHandle;
+ }
+ return 0;
+}
+
+
+/* Set global callback for getting handle to use
+ * return 0 on success */
+int wolfSSL_SetHandleCb(wolfSSL_DSP_Handle_cb in)
+{
+ handle_function = in;
+ return 0;
+}
+
+
+/* returns 1 if global handle callback is set and 0 if not */
+int wolfSSL_GetHandleCbSet()
+{
+ return (handle_function != NULL)? 1: 0;
+}
+
+
+/* Local function for setting up default handle
+ * returns 0 on success */
+int wolfSSL_InitHandle()
+{
+ char *sp_URI_value;
+ int ret;
+
+ sp_URI_value = wolfSSL_URI "&_dom=adsp";
+ ret = wolfSSL_open(sp_URI_value, &defaultHandle);
+ if (ret != 0) {
+ WOLFSSL_MSG("Unable to open aDSP?");
+ return -1;
+ }
+ wolfSSL_SetHandleCb(default_handle_cb);
+ ret = wc_InitMutex(&handle_mutex);
+ if (ret != 0) {
+ WOLFSSL_MSG("Unable to init handle mutex");
+ return -1;
+ }
+ return 0;
+}
+
+
+/* internal function that closes default handle and frees mutex */
+void wolfSSL_CleanupHandle()
+{
+ wolfSSL_close(defaultHandle);
+ wc_FreeMutex(&handle_mutex);
+}
+#if defined(WOLFSSL_HAVE_SP_ECC)
+
+/* ecc conversion from sp_c32.c */
+#include <wolfssl/wolfcrypt/sp.h>
+
+
+#ifndef WOLFSSL_SP_NO_256
+
+#ifdef HAVE_ECC_VERIFY
+/* Read big endian unsigned byte array into r.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a Byte array.
+ * n Number of bytes in array to read.
+ */
+static void int_256_from_bin(int32* r, int size, const byte* a, int n)
+{
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = n-1; i >= 0; i--) {
+ r[j] |= (((int32)a[i]) << s);
+ if (s >= 18U) {
+ r[j] &= 0x3ffffff;
+ s = 26U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ r[++j] = (int32)a[i] >> s;
+ s = 8U - s;
+ }
+ else {
+ s += 8U;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+}
+
+/* Convert an mp_int to an array of sp_digit.
+ *
+ * r A single precision integer.
+ * size Maximum number of bytes to convert
+ * a A multi-precision integer.
+ */
+static void int_256_from_mp(int32* r, int size, const mp_int* a)
+{
+#if DIGIT_BIT == 26
+ int j;
+
+ XMEMCPY(r, a->dp, sizeof(int32) * a->used);
+
+ for (j = a->used; j < size; j++) {
+ r[j] = 0;
+ }
+#elif DIGIT_BIT > 26
+ int i, j = 0;
+ word32 s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((int32)a->dp[i] << s);
+ r[j] &= 0x3ffffff;
+ s = 26U - s;
+ if (j + 1 >= size) {
+ break;
+ }
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (int32)(a->dp[i] >> s); /*lint !e9033*/
+ while ((s + 26U) <= (word32)DIGIT_BIT) {
+ s += 26U;
+ r[j] &= 0x3ffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ if (s < (word32)DIGIT_BIT) {
+ /* lint allow cast of mismatch word32 and mp_digit */
+ r[++j] = (int32)(a->dp[i] >> s); /*lint !e9033*/
+ }
+ else {
+ r[++j] = 0L;
+ }
+ }
+ s = (word32)DIGIT_BIT - s;
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#else
+ int i, j = 0, s = 0;
+
+ r[0] = 0;
+ for (i = 0; i < a->used && j < size; i++) {
+ r[j] |= ((int32)a->dp[i]) << s;
+ if (s + DIGIT_BIT >= 26) {
+ r[j] &= 0x3ffffff;
+ if (j + 1 >= size) {
+ break;
+ }
+ s = 26 - s;
+ if (s == DIGIT_BIT) {
+ r[++j] = 0;
+ s = 0;
+ }
+ else {
+ r[++j] = a->dp[i] >> s;
+ s = DIGIT_BIT - s;
+ }
+ }
+ else {
+ s += DIGIT_BIT;
+ }
+ }
+
+ for (j++; j < size; j++) {
+ r[j] = 0;
+ }
+#endif
+}
+
+/* Verify the signature values with the hash and public key.
+ * e = Truncate(hash, 256)
+ * u1 = e/s mod order
+ * u2 = r/s mod order
+ * r == (u1.G + u2.Q)->x mod order
+ * Optimization: Leave point in projective form.
+ * (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
+ * (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
+ * The hash is truncated to the first 256 bits.
+ *
+ * hash Hash to sign.
+ * hashLen Length of the hash data.
+ * rng Random number generator.
+ * priv Private part of key - scalar.
+ * rm First part of result as an mp_int.
+ * sm Sirst part of result as an mp_int.
+ * heap Heap to use for allocation.
+ * returns RNG failures, MEMORY_E when memory allocation fails and
+ * MP_OKAY on success.
+ */
+int sp_dsp_ecc_verify_256(remote_handle64 handleIn, const byte* hash, word32 hashLen, mp_int* pX,
+ mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
+{
+ int ret;
+ remote_handle64 handle = handleIn;
+
+#if 0
+ /* calling to alloc memory on the ION using these settings slowed the performance down slightly */
+ int32 *x = (int32*)rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 10*sizeof(int));
+ int32 *y = (int32*)rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 10*sizeof(int));
+ int32 *z = (int32*)rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 10*sizeof(int));
+ int32 *s = (int32*)rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 10*sizeof(int));
+ int32 *u1 = (int32*)rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 10*sizeof(int));
+ int32 *u2 = (int32*)rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 10*sizeof(int));
+#endif
+ int32 x[10] __attribute__((aligned(128)));
+ int32 y[10] __attribute__((aligned(128)));
+ int32 z[10] __attribute__((aligned(128)));
+ int32 s[10] __attribute__((aligned(128)));
+ int32 u1[10] __attribute__((aligned(128)));
+ int32 u2[10] __attribute__((aligned(128)));
+
+ if (hashLen > 32U) {
+ hashLen = 32U;
+ }
+
+ int_256_from_bin(u1, 10, hash, (int)hashLen);
+ int_256_from_mp(u2, 10, r);
+ int_256_from_mp(s, 10, sm);
+ int_256_from_mp(x, 10, pX);
+ int_256_from_mp(y, 10, pY);
+ int_256_from_mp(z, 10, pZ);
+
+ if (handle_function != NULL) {
+ handle_function(&handle, WOLFSSL_HANDLE_GET, NULL);
+ }
+
+ *res = 0;
+ ret = wolfSSL_DSP_ECC_Verify_256(handle, u1, 10, u2, 10, s, 10, x, 10, y, 10, z, 10, res);
+
+ if (handle_function != NULL) {
+ handle_function(&handle, WOLFSSL_HANDLE_DONE, NULL);
+ }
+#if 0
+ rpcmem_free(x);
+ rpcmem_free(y);
+ rpcmem_free(z);
+ rpcmem_free(s);
+ rpcmem_free(u1);
+ rpcmem_free(u2);
+#endif
+ return ret;
+}
+
+
+/* Used to assign a handle to an ecc_key structure.
+ * returns 0 on success */
+int wc_ecc_set_handle(ecc_key* key, remote_handle64 handle)
+{
+ if (key == NULL) {
+ return BAD_FUNC_ARG;
+ }
+ key->handle = handle;
+ return 0;
+}
+#endif /* HAVE_ECC_VERIFY */
+#endif /* !WOLFSSL_SP_NO_256 */
+#endif /* WOLFSSL_HAVE_SP_ECC */
+#endif /* WOLFSSL_DSP */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wc_encrypt.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wc_encrypt.c
new file mode 100644
index 000000000..39dbeec5a
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wc_encrypt.c
@@ -0,0 +1,660 @@
+/* wc_encrypt.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/wolfcrypt/aes.h>
+#include <wolfssl/wolfcrypt/des3.h>
+#include <wolfssl/wolfcrypt/hash.h>
+#include <wolfssl/wolfcrypt/arc4.h>
+#include <wolfssl/wolfcrypt/wc_encrypt.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/asn.h>
+#include <wolfssl/wolfcrypt/coding.h>
+#include <wolfssl/wolfcrypt/pwdbased.h>
+#include <wolfssl/wolfcrypt/logging.h>
+
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+#if !defined(NO_AES) && defined(HAVE_AES_CBC)
+#ifdef HAVE_AES_DECRYPT
+int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz,
+ const byte* key, word32 keySz, const byte* iv)
+{
+ int ret = 0;
+#ifdef WOLFSSL_SMALL_STACK
+ Aes* aes = NULL;
+#else
+ Aes aes[1];
+#endif
+
+ if (out == NULL || in == NULL || key == NULL || iv == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ aes = (Aes*)XMALLOC(sizeof(Aes), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (aes == NULL)
+ return MEMORY_E;
+#endif
+
+ ret = wc_AesInit(aes, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_AesSetKey(aes, key, keySz, iv, AES_DECRYPTION);
+ if (ret == 0)
+ ret = wc_AesCbcDecrypt(aes, out, in, inSz);
+
+ wc_AesFree(aes);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(aes, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return ret;
+}
+#endif /* HAVE_AES_DECRYPT */
+
+int wc_AesCbcEncryptWithKey(byte* out, const byte* in, word32 inSz,
+ const byte* key, word32 keySz, const byte* iv)
+{
+ int ret = 0;
+#ifdef WOLFSSL_SMALL_STACK
+ Aes* aes;
+#else
+ Aes aes[1];
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ aes = (Aes*)XMALLOC(sizeof(Aes), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (aes == NULL)
+ return MEMORY_E;
+#endif
+
+ ret = wc_AesInit(aes, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_AesSetKey(aes, key, keySz, iv, AES_ENCRYPTION);
+ if (ret == 0)
+ ret = wc_AesCbcEncrypt(aes, out, in, inSz);
+
+ wc_AesFree(aes);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(aes, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return ret;
+}
+#endif /* !NO_AES && HAVE_AES_CBC */
+
+
+#if !defined(NO_DES3) && !defined(WOLFSSL_TI_CRYPT)
+int wc_Des_CbcEncryptWithKey(byte* out, const byte* in, word32 sz,
+ const byte* key, const byte* iv)
+{
+ int ret = 0;
+#ifdef WOLFSSL_SMALL_STACK
+ Des* des;
+#else
+ Des des[1];
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ des = (Des*)XMALLOC(sizeof(Des), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (des == NULL)
+ return MEMORY_E;
+#endif
+
+ ret = wc_Des_SetKey(des, key, iv, DES_ENCRYPTION);
+ if (ret == 0)
+ ret = wc_Des_CbcEncrypt(des, out, in, sz);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(des, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return ret;
+}
+
+int wc_Des_CbcDecryptWithKey(byte* out, const byte* in, word32 sz,
+ const byte* key, const byte* iv)
+{
+ int ret = 0;
+#ifdef WOLFSSL_SMALL_STACK
+ Des* des;
+#else
+ Des des[1];
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ des = (Des*)XMALLOC(sizeof(Des), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (des == NULL)
+ return MEMORY_E;
+#endif
+
+ ret = wc_Des_SetKey(des, key, iv, DES_DECRYPTION);
+ if (ret == 0)
+ ret = wc_Des_CbcDecrypt(des, out, in, sz);
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(des, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return ret;
+}
+
+
+int wc_Des3_CbcEncryptWithKey(byte* out, const byte* in, word32 sz,
+ const byte* key, const byte* iv)
+{
+ int ret = 0;
+#ifdef WOLFSSL_SMALL_STACK
+ Des3* des3;
+#else
+ Des3 des3[1];
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ des3 = (Des3*)XMALLOC(sizeof(Des3), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (des3 == NULL)
+ return MEMORY_E;
+#endif
+
+ ret = wc_Des3Init(des3, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_Des3_SetKey(des3, key, iv, DES_ENCRYPTION);
+ if (ret == 0)
+ ret = wc_Des3_CbcEncrypt(des3, out, in, sz);
+ wc_Des3Free(des3);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(des3, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return ret;
+}
+
+
+int wc_Des3_CbcDecryptWithKey(byte* out, const byte* in, word32 sz,
+ const byte* key, const byte* iv)
+{
+ int ret = 0;
+#ifdef WOLFSSL_SMALL_STACK
+ Des3* des3;
+#else
+ Des3 des3[1];
+#endif
+
+#ifdef WOLFSSL_SMALL_STACK
+ des3 = (Des3*)XMALLOC(sizeof(Des3), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (des3 == NULL)
+ return MEMORY_E;
+#endif
+
+ ret = wc_Des3Init(des3, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_Des3_SetKey(des3, key, iv, DES_DECRYPTION);
+ if (ret == 0)
+ ret = wc_Des3_CbcDecrypt(des3, out, in, sz);
+ wc_Des3Free(des3);
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(des3, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return ret;
+}
+
+#endif /* !NO_DES3 */
+
+
+#ifdef WOLFSSL_ENCRYPTED_KEYS
+
+int wc_BufferKeyDecrypt(EncryptedInfo* info, byte* der, word32 derSz,
+ const byte* password, int passwordSz, int hashType)
+{
+ int ret = NOT_COMPILED_IN;
+#ifdef WOLFSSL_SMALL_STACK
+ byte* key = NULL;
+#else
+ byte key[WC_MAX_SYM_KEY_SIZE];
+#endif
+
+ (void)derSz;
+ (void)passwordSz;
+ (void)hashType;
+
+ if (der == NULL || password == NULL || info == NULL || info->keySz == 0) {
+ return BAD_FUNC_ARG;
+ }
+
+ /* use file's salt for key derivation, hex decode first */
+ if (Base16_Decode(info->iv, info->ivSz, info->iv, &info->ivSz) != 0) {
+ return BUFFER_E;
+ }
+ if (info->ivSz < PKCS5_SALT_SZ)
+ return BUFFER_E;
+
+#ifdef WOLFSSL_SMALL_STACK
+ key = (byte*)XMALLOC(WC_MAX_SYM_KEY_SIZE, NULL, DYNAMIC_TYPE_SYMMETRIC_KEY);
+ if (key == NULL) {
+ return MEMORY_E;
+ }
+#endif
+
+ (void)XMEMSET(key, 0, WC_MAX_SYM_KEY_SIZE);
+
+#ifndef NO_PWDBASED
+ if ((ret = wc_PBKDF1(key, password, passwordSz, info->iv, PKCS5_SALT_SZ, 1,
+ info->keySz, hashType)) != 0) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(key, NULL, DYNAMIC_TYPE_SYMMETRIC_KEY);
+#endif
+ return ret;
+ }
+#endif
+
+#ifndef NO_DES3
+ if (info->cipherType == WC_CIPHER_DES)
+ ret = wc_Des_CbcDecryptWithKey(der, der, derSz, key, info->iv);
+ if (info->cipherType == WC_CIPHER_DES3)
+ ret = wc_Des3_CbcDecryptWithKey(der, der, derSz, key, info->iv);
+#endif /* NO_DES3 */
+#if !defined(NO_AES) && defined(HAVE_AES_CBC) && defined(HAVE_AES_DECRYPT)
+ if (info->cipherType == WC_CIPHER_AES_CBC)
+ ret = wc_AesCbcDecryptWithKey(der, der, derSz, key, info->keySz,
+ info->iv);
+#endif /* !NO_AES && HAVE_AES_CBC && HAVE_AES_DECRYPT */
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(key, NULL, DYNAMIC_TYPE_SYMMETRIC_KEY);
+#endif
+
+ return ret;
+}
+
+int wc_BufferKeyEncrypt(EncryptedInfo* info, byte* der, word32 derSz,
+ const byte* password, int passwordSz, int hashType)
+{
+ int ret = NOT_COMPILED_IN;
+#ifdef WOLFSSL_SMALL_STACK
+ byte* key = NULL;
+#else
+ byte key[WC_MAX_SYM_KEY_SIZE];
+#endif
+
+ (void)derSz;
+ (void)passwordSz;
+ (void)hashType;
+
+ if (der == NULL || password == NULL || info == NULL || info->keySz == 0 ||
+ info->ivSz < PKCS5_SALT_SZ) {
+ return BAD_FUNC_ARG;
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ key = (byte*)XMALLOC(WC_MAX_SYM_KEY_SIZE, NULL, DYNAMIC_TYPE_SYMMETRIC_KEY);
+ if (key == NULL) {
+ return MEMORY_E;
+ }
+#endif /* WOLFSSL_SMALL_STACK */
+
+ (void)XMEMSET(key, 0, WC_MAX_SYM_KEY_SIZE);
+
+#ifndef NO_PWDBASED
+ if ((ret = wc_PBKDF1(key, password, passwordSz, info->iv, PKCS5_SALT_SZ, 1,
+ info->keySz, hashType)) != 0) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(key, NULL, DYNAMIC_TYPE_SYMMETRIC_KEY);
+#endif
+ return ret;
+ }
+#endif
+
+#ifndef NO_DES3
+ if (info->cipherType == WC_CIPHER_DES)
+ ret = wc_Des_CbcEncryptWithKey(der, der, derSz, key, info->iv);
+ if (info->cipherType == WC_CIPHER_DES3)
+ ret = wc_Des3_CbcEncryptWithKey(der, der, derSz, key, info->iv);
+#endif /* NO_DES3 */
+#if !defined(NO_AES) && defined(HAVE_AES_CBC)
+ if (info->cipherType == WC_CIPHER_AES_CBC)
+ ret = wc_AesCbcEncryptWithKey(der, der, derSz, key, info->keySz,
+ info->iv);
+#endif /* !NO_AES && HAVE_AES_CBC */
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(key, NULL, DYNAMIC_TYPE_SYMMETRIC_KEY);
+#endif
+
+ return ret;
+}
+
+#endif /* WOLFSSL_ENCRYPTED_KEYS */
+
+
+#if !defined(NO_PWDBASED) && !defined(NO_ASN)
+
+#if defined(HAVE_PKCS8) || defined(HAVE_PKCS12)
+/* Decrypt/Encrypt input in place from parameters based on id
+ *
+ * returns a negative value on fail case
+ */
+int wc_CryptKey(const char* password, int passwordSz, byte* salt,
+ int saltSz, int iterations, int id, byte* input,
+ int length, int version, byte* cbcIv, int enc, int shaOid)
+{
+ int typeH;
+ int derivedLen = 0;
+ int ret = 0;
+#ifdef WOLFSSL_SMALL_STACK
+ byte* key;
+#else
+ byte key[MAX_KEY_SIZE];
+#endif
+
+ (void)input;
+ (void)length;
+ (void)enc;
+
+ WOLFSSL_ENTER("wc_CryptKey");
+
+ switch (id) {
+ #ifndef NO_DES3
+ #ifndef NO_MD5
+ case PBE_MD5_DES:
+ typeH = WC_MD5;
+ derivedLen = 16; /* may need iv for v1.5 */
+ break;
+ #endif
+ #ifndef NO_SHA
+ case PBE_SHA1_DES:
+ typeH = WC_SHA;
+ derivedLen = 16; /* may need iv for v1.5 */
+ break;
+
+ case PBE_SHA1_DES3:
+ switch(shaOid) {
+ case HMAC_SHA256_OID:
+ typeH = WC_SHA256;
+ derivedLen = 32;
+ break;
+ default:
+ typeH = WC_SHA;
+ derivedLen = 32; /* may need iv for v1.5 */
+ break;
+ }
+ break;
+ #endif /* !NO_SHA */
+ #endif /* !NO_DES3 */
+ #if !defined(NO_SHA) && !defined(NO_RC4)
+ case PBE_SHA1_RC4_128:
+ typeH = WC_SHA;
+ derivedLen = 16;
+ break;
+ #endif
+ #if defined(WOLFSSL_AES_256)
+ case PBE_AES256_CBC:
+ switch(shaOid) {
+ case HMAC_SHA256_OID:
+ typeH = WC_SHA256;
+ derivedLen = 32;
+ break;
+ #ifndef NO_SHA
+ default:
+ typeH = WC_SHA;
+ derivedLen = 32;
+ break;
+ #endif
+ }
+ break;
+ #endif /* WOLFSSL_AES_256 && !NO_SHA */
+ #if defined(WOLFSSL_AES_128)
+ case PBE_AES128_CBC:
+ switch(shaOid) {
+ case HMAC_SHA256_OID:
+ typeH = WC_SHA256;
+ derivedLen = 16;
+ break;
+ #ifndef NO_SHA
+ default:
+ typeH = WC_SHA;
+ derivedLen = 16;
+ break;
+ #endif
+ }
+ break;
+ #endif /* WOLFSSL_AES_128 && !NO_SHA */
+ default:
+ WOLFSSL_MSG("Unknown/Unsupported encrypt/decrypt id");
+ (void)shaOid;
+ return ALGO_ID_E;
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ key = (byte*)XMALLOC(MAX_KEY_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (key == NULL)
+ return MEMORY_E;
+#endif
+
+ if (version == PKCS5v2)
+ ret = wc_PBKDF2(key, (byte*)password, passwordSz,
+ salt, saltSz, iterations, derivedLen, typeH);
+#ifndef NO_SHA
+ else if (version == PKCS5)
+ ret = wc_PBKDF1(key, (byte*)password, passwordSz,
+ salt, saltSz, iterations, derivedLen, typeH);
+#endif
+#ifdef HAVE_PKCS12
+ else if (version == PKCS12v1) {
+ int i, idx = 0;
+ byte unicodePasswd[MAX_UNICODE_SZ];
+
+ if ( (passwordSz * 2 + 2) > (int)sizeof(unicodePasswd)) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return UNICODE_SIZE_E;
+ }
+
+ for (i = 0; i < passwordSz; i++) {
+ unicodePasswd[idx++] = 0x00;
+ unicodePasswd[idx++] = (byte)password[i];
+ }
+ /* add trailing NULL */
+ unicodePasswd[idx++] = 0x00;
+ unicodePasswd[idx++] = 0x00;
+
+ ret = wc_PKCS12_PBKDF(key, unicodePasswd, idx, salt, saltSz,
+ iterations, derivedLen, typeH, 1);
+ if (id != PBE_SHA1_RC4_128)
+ ret += wc_PKCS12_PBKDF(cbcIv, unicodePasswd, idx, salt, saltSz,
+ iterations, 8, typeH, 2);
+ }
+#endif /* HAVE_PKCS12 */
+ else {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ WOLFSSL_MSG("Unknown/Unsupported PKCS version");
+ return ALGO_ID_E;
+ }
+
+ if (ret != 0) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return ret;
+ }
+
+ switch (id) {
+#ifndef NO_DES3
+ #if !defined(NO_SHA) || !defined(NO_MD5)
+ case PBE_MD5_DES:
+ case PBE_SHA1_DES:
+ {
+ Des des;
+ byte* desIv = key + 8;
+
+ if (version == PKCS5v2 || version == PKCS12v1)
+ desIv = cbcIv;
+
+ if (enc) {
+ ret = wc_Des_SetKey(&des, key, desIv, DES_ENCRYPTION);
+ }
+ else {
+ ret = wc_Des_SetKey(&des, key, desIv, DES_DECRYPTION);
+ }
+ if (ret != 0) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return ret;
+ }
+
+ if (enc) {
+ wc_Des_CbcEncrypt(&des, input, input, length);
+ }
+ else {
+ wc_Des_CbcDecrypt(&des, input, input, length);
+ }
+ break;
+ }
+ #endif /* !NO_SHA || !NO_MD5 */
+
+ #ifndef NO_SHA
+ case PBE_SHA1_DES3:
+ {
+ Des3 des;
+ byte* desIv = key + 24;
+
+ if (version == PKCS5v2 || version == PKCS12v1)
+ desIv = cbcIv;
+
+ ret = wc_Des3Init(&des, NULL, INVALID_DEVID);
+ if (ret != 0) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return ret;
+ }
+ if (enc) {
+ ret = wc_Des3_SetKey(&des, key, desIv, DES_ENCRYPTION);
+ }
+ else {
+ ret = wc_Des3_SetKey(&des, key, desIv, DES_DECRYPTION);
+ }
+ if (ret != 0) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return ret;
+ }
+ if (enc) {
+ ret = wc_Des3_CbcEncrypt(&des, input, input, length);
+ }
+ else {
+ ret = wc_Des3_CbcDecrypt(&des, input, input, length);
+ }
+ if (ret != 0) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return ret;
+ }
+ break;
+ }
+ #endif /* !NO_SHA */
+#endif
+#if !defined(NO_RC4) && !defined(NO_SHA)
+ case PBE_SHA1_RC4_128:
+ {
+ Arc4 dec;
+
+ wc_Arc4SetKey(&dec, key, derivedLen);
+ wc_Arc4Process(&dec, input, input, length);
+ break;
+ }
+#endif
+#if !defined(NO_AES) && defined(HAVE_AES_CBC)
+ #ifdef WOLFSSL_AES_256
+ case PBE_AES256_CBC:
+ case PBE_AES128_CBC:
+ {
+ Aes aes;
+ ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
+ if (ret == 0) {
+ if (enc) {
+ ret = wc_AesSetKey(&aes, key, derivedLen, cbcIv,
+ AES_ENCRYPTION);
+ }
+ else {
+ ret = wc_AesSetKey(&aes, key, derivedLen, cbcIv,
+ AES_DECRYPTION);
+ }
+ }
+ if (ret == 0) {
+ if (enc)
+ ret = wc_AesCbcEncrypt(&aes, input, input, length);
+ else
+ ret = wc_AesCbcDecrypt(&aes, input, input, length);
+ }
+ if (ret != 0) {
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return ret;
+ }
+ ForceZero(&aes, sizeof(Aes));
+ break;
+ }
+ #endif /* WOLFSSL_AES_256 */
+#endif /* !NO_AES && HAVE_AES_CBC */
+
+ default:
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ WOLFSSL_MSG("Unknown/Unsupported encrypt/decryption algorithm");
+ return ALGO_ID_E;
+ }
+
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return ret;
+}
+
+#endif /* HAVE_PKCS8 || HAVE_PKCS12 */
+#endif /* !NO_PWDBASED */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wc_pkcs11.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wc_pkcs11.c
new file mode 100644
index 000000000..cac0a0fcc
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wc_pkcs11.c
@@ -0,0 +1,2546 @@
+/* wc_pkcs11.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+#ifdef HAVE_PKCS11
+
+#include <dlfcn.h>
+
+#include <wolfssl/wolfcrypt/wc_pkcs11.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/asn.h>
+#include <wolfssl/wolfcrypt/logging.h>
+#ifndef NO_RSA
+ #include <wolfssl/wolfcrypt/rsa.h>
+#endif
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+#define MAX_EC_PARAM_LEN 16
+
+#if defined(NO_PKCS11_RSA) && !defined(NO_RSA)
+ #define NO_RSA
+#endif
+#if defined(NO_PKCS11_ECC) && defined(HAVE_ECC)
+ #undef HAVE_ECC
+#endif
+#if defined(NO_PKCS11_AES) && !defined(NO_AES)
+ #define NO_AES
+#endif
+#if defined(NO_PKCS11_AESGCM) && defined(HAVE_AESGCM)
+ #undef HAVE_AESGCM
+#endif
+#if defined(NO_PKCS11_AESCBC) && defined(HAVE_AES_CBC)
+ #undef HAVE_AES_CBC
+#endif
+#if defined(NO_PKCS11_HMAC) && !defined(NO_HMAC)
+ #define NO_HMAC
+#endif
+#if defined(NO_PKCS11_RNG) && !defined(WC_NO_RNG)
+ #define WC_NO_RNG
+#endif
+
+
+#if defined(HAVE_ECC) && !defined(NO_PKCS11_ECDH)
+static CK_BBOOL ckFalse = CK_FALSE;
+#endif
+#if !defined(NO_RSA) || defined(HAVE_ECC) || (!defined(NO_AES) && \
+ (defined(HAVE_AESGCM) || defined(HAVE_AES_CBC))) || !defined(NO_HMAC)
+static CK_BBOOL ckTrue = CK_TRUE;
+#endif
+
+#ifndef NO_RSA
+static CK_KEY_TYPE rsaKeyType = CKK_RSA;
+#endif
+#ifdef HAVE_ECC
+static CK_KEY_TYPE ecKeyType = CKK_EC;
+#endif
+#if !defined(NO_RSA) || defined(HAVE_ECC)
+static CK_OBJECT_CLASS pubKeyClass = CKO_PUBLIC_KEY;
+static CK_OBJECT_CLASS privKeyClass = CKO_PRIVATE_KEY;
+#endif
+#if (!defined(NO_AES) && (defined(HAVE_AESGCM) || defined(HAVE_AES_CBC))) || \
+ !defined(NO_HMAC) || (defined(HAVE_ECC) && !defined(NO_PKCS11_ECDH))
+static CK_OBJECT_CLASS secretKeyClass = CKO_SECRET_KEY;
+#endif
+
+/**
+ * Load library, get function list and initialize PKCS#11.
+ *
+ * @param dev [in] Device object.
+ * @param library [in] Library name including path.
+ * @return BAD_FUNC_ARG when dev or library are NULL pointers.
+ * BAD_PATH_ERROR when dynamic library cannot be opened.
+ * WC_INIT_E when the initialization PKCS#11 fails.
+ * WC_HW_E when unable to get PKCS#11 function list.
+ * 0 on success.
+ */
+int wc_Pkcs11_Initialize(Pkcs11Dev* dev, const char* library, void* heap)
+{
+ int ret = 0;
+ void* func;
+ CK_C_INITIALIZE_ARGS args;
+
+ if (dev == NULL || library == NULL)
+ ret = BAD_FUNC_ARG;
+
+ if (ret == 0) {
+ dev->heap = heap;
+ dev->dlHandle = dlopen(library, RTLD_NOW | RTLD_LOCAL);
+ if (dev->dlHandle == NULL) {
+ WOLFSSL_MSG(dlerror());
+ ret = BAD_PATH_ERROR;
+ }
+ }
+
+ if (ret == 0) {
+ dev->func = NULL;
+ func = dlsym(dev->dlHandle, "C_GetFunctionList");
+ if (func == NULL)
+ ret = WC_HW_E;
+ }
+ if (ret == 0) {
+ if (((CK_C_GetFunctionList)func)(&dev->func) != CKR_OK)
+ ret = WC_HW_E;
+ }
+
+ if (ret == 0) {
+ XMEMSET(&args, 0x00, sizeof(args));
+ args.flags = CKF_OS_LOCKING_OK;
+ if (dev->func->C_Initialize(&args) != CKR_OK)
+ ret = WC_INIT_E;
+ }
+
+ if (ret != 0)
+ wc_Pkcs11_Finalize(dev);
+
+ return ret;
+}
+
+/**
+ * Close the Pkcs#11 library.
+ *
+ * @param dev [in] Device object.
+ */
+void wc_Pkcs11_Finalize(Pkcs11Dev* dev)
+{
+ if (dev != NULL && dev->dlHandle != NULL) {
+ if (dev->func != NULL) {
+ dev->func->C_Finalize(NULL);
+ dev->func = NULL;
+ }
+ dlclose(dev->dlHandle);
+ dev->dlHandle = NULL;
+ }
+}
+
+/**
+ * Set up a token for use.
+ *
+ * @param token [in] Token object.
+ * @param dev [in] PKCS#11 device object.
+ * @param slotId [in] Slot number of the token.<br>
+ * Passing -1 uses the first available slot.
+ * @param tokenName [in] Name of token to initialize.
+ * @param userPin [in] PIN to use to login as user.
+ * @param userPinSz [in] Number of bytes in PIN.
+ * @return BAD_FUNC_ARG when token, dev and/or tokenName is NULL.
+ * WC_INIT_E when initializing token fails.
+ * WC_HW_E when another PKCS#11 library call fails.
+ * -1 when no slot available.
+ * 0 on success.
+ */
+int wc_Pkcs11Token_Init(Pkcs11Token* token, Pkcs11Dev* dev, int slotId,
+ const char* tokenName, const unsigned char* userPin, int userPinSz)
+{
+ int ret = 0;
+ CK_RV rv;
+ CK_SLOT_ID* slot = NULL;
+ CK_ULONG slotCnt = 0;
+
+ if (token == NULL || dev == NULL || tokenName == NULL)
+ ret = BAD_FUNC_ARG;
+
+ if (ret == 0) {
+ if (slotId < 0) {
+ /* Use first available slot with a token. */
+ rv = dev->func->C_GetSlotList(CK_TRUE, NULL, &slotCnt);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ if (ret == 0) {
+ slot = (CK_SLOT_ID*)XMALLOC(slotCnt * sizeof(*slot), dev->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (slot == NULL)
+ ret = MEMORY_E;
+ }
+ if (ret == 0) {
+ rv = dev->func->C_GetSlotList(CK_TRUE, slot, &slotCnt);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+ if (ret == 0) {
+ if (slotCnt > 0)
+ slotId = (int)slot[0];
+ else
+ ret = WC_HW_E;
+ }
+ }
+ }
+ if (ret == 0) {
+ token->func = dev->func;
+ token->slotId = (CK_SLOT_ID)slotId;
+ token->handle = NULL_PTR;
+ token->userPin = (CK_UTF8CHAR_PTR)userPin;
+ token->userPinSz = (CK_ULONG)userPinSz;
+ }
+
+ if (slot != NULL)
+ XFREE(slot, dev->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+ return ret;
+}
+
+/**
+ * Finalize token.
+ * Closes all sessions on token.
+ *
+ * @param token [in] Token object.
+ */
+void wc_Pkcs11Token_Final(Pkcs11Token* token)
+{
+ if (token != NULL && token->func != NULL) {
+ token->func->C_CloseAllSessions(token->slotId);
+ token->handle = NULL_PTR;
+ ForceZero(token->userPin, (word32)token->userPinSz);
+ }
+}
+
+/**
+ * Open a session on a token.
+ *
+ * @param token [in] Token object.
+ * @param session [in] Session object.
+ * @param readWrite [in] Boolean indicating to open session for Read/Write.
+ * @return BAD_FUNC_ARG when token or session is NULL.
+ * WC_HW_E when opening the session fails.
+ * 0 on success.
+ */
+static int Pkcs11OpenSession(Pkcs11Token* token, Pkcs11Session* session,
+ int readWrite)
+{
+ int ret = 0;
+ CK_RV rv;
+
+ if (token == NULL || session == NULL)
+ ret = BAD_FUNC_ARG;
+
+ if (ret == 0) {
+ if (token->handle != NULL_PTR)
+ session->handle = token->handle;
+ else {
+ /* Create a new session. */
+ CK_FLAGS flags = CKF_SERIAL_SESSION;
+
+ if (readWrite)
+ flags |= CKF_RW_SESSION;
+
+ rv = token->func->C_OpenSession(token->slotId, flags,
+ (CK_VOID_PTR)NULL, (CK_NOTIFY)NULL,
+ &session->handle);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ if (ret == 0 && token->userPin != NULL) {
+ rv = token->func->C_Login(session->handle, CKU_USER,
+ token->userPin, token->userPinSz);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+ }
+ }
+ if (ret == 0) {
+ session->func = token->func;
+ session->slotId = token->slotId;
+ }
+
+ return ret;
+}
+
+/**
+ * Close a session on a token.
+ * Won't close a session created externally.
+ *
+ * @param token [in] Token object.
+ * @param session [in] Session object.
+ */
+static void Pkcs11CloseSession(Pkcs11Token* token, Pkcs11Session* session)
+{
+ if (token != NULL && session != NULL && token->handle != session->handle) {
+ if (token->userPin != NULL)
+ session->func->C_Logout(session->handle);
+ session->func->C_CloseSession(session->handle);
+ }
+}
+
+/**
+ * Open a session on the token to be used for all operations.
+ *
+ * @param token [in] Token object.
+ * @param readWrite [in] Boolean indicating to open session for Read/Write.
+ * @return BAD_FUNC_ARG when token is NULL.
+ * WC_HW_E when opening the session fails.
+ * 0 on success.
+ */
+int wc_Pkcs11Token_Open(Pkcs11Token* token, int readWrite)
+{
+ int ret = 0;
+ Pkcs11Session session;
+
+ if (token == NULL)
+ ret = BAD_FUNC_ARG;
+
+ if (ret == 0) {
+ ret = Pkcs11OpenSession(token, &session, readWrite);
+ token->handle = session.handle;
+ }
+
+ return ret;
+}
+
+/**
+ * Close the token's session.
+ * All object, like keys, will be destroyed.
+ *
+ * @param token [in] Token object.
+ */
+void wc_Pkcs11Token_Close(Pkcs11Token* token)
+{
+ Pkcs11Session session;
+
+ if (token != NULL) {
+ session.func = token->func;
+ session.handle = token->handle;
+ token->handle = NULL_PTR;
+ Pkcs11CloseSession(token, &session);
+ }
+}
+
+
+#if (!defined(NO_AES) && (defined(HAVE_AESGCM) || defined(HAVE_AES_CBC))) || \
+ !defined(NO_HMAC)
+static int Pkcs11CreateSecretKey(CK_OBJECT_HANDLE* key, Pkcs11Session* session,
+ CK_KEY_TYPE keyType, unsigned char* data,
+ int len, unsigned char* id, int idLen)
+{
+ int ret = 0;
+ CK_RV rv;
+ CK_ATTRIBUTE keyTemplate[] = {
+ { CKA_CLASS, &secretKeyClass, sizeof(secretKeyClass) },
+ { CKA_KEY_TYPE, &keyType, sizeof(keyType) },
+ { CKA_ENCRYPT, &ckTrue, sizeof(ckTrue) },
+ { CKA_VALUE, NULL, 0 },
+ { CKA_ID, id, (CK_ULONG)idLen }
+ };
+ int keyTmplCnt = 4;
+
+ WOLFSSL_MSG("PKCS#11: Create Secret Key");
+
+ /* Set the modulus and public exponent data. */
+ keyTemplate[3].pValue = data;
+ keyTemplate[3].ulValueLen = (CK_ULONG)len;
+
+ if (idLen > 0)
+ keyTmplCnt++;
+
+ /* Create an object containing key data for device to use. */
+ rv = session->func->C_CreateObject(session->handle, keyTemplate, keyTmplCnt,
+ key);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+
+ return ret;
+}
+#endif
+
+#ifndef NO_RSA
+/**
+ * Create a PKCS#11 object containing the RSA private key data.
+ *
+ * @param privateKey [out] Henadle to private key object.
+ * @param session [in] Session object.
+ * @param rsaKey [in] RSA key with private key data.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * 0 on success.
+ */
+static int Pkcs11CreateRsaPrivateKey(CK_OBJECT_HANDLE* privateKey,
+ Pkcs11Session* session,
+ RsaKey* rsaKey)
+{
+ int ret = 0;
+ CK_RV rv;
+ CK_ATTRIBUTE keyTemplate[] = {
+ { CKA_CLASS, &privKeyClass, sizeof(privKeyClass) },
+ { CKA_KEY_TYPE, &rsaKeyType, sizeof(rsaKeyType) },
+ { CKA_DECRYPT, &ckTrue, sizeof(ckTrue) },
+ { CKA_MODULUS, NULL, 0 },
+ { CKA_PRIVATE_EXPONENT, NULL, 0 },
+ { CKA_PRIME_1, NULL, 0 },
+ { CKA_PRIME_2, NULL, 0 },
+ { CKA_EXPONENT_1, NULL, 0 },
+ { CKA_EXPONENT_2, NULL, 0 },
+ { CKA_COEFFICIENT, NULL, 0 },
+ { CKA_PUBLIC_EXPONENT, NULL, 0 }
+ };
+ CK_ULONG keyTmplCnt = sizeof(keyTemplate) / sizeof(*keyTemplate);
+
+ /* Set the modulus and private key data. */
+ keyTemplate[ 3].pValue = rsaKey->n.raw.buf;
+ keyTemplate[ 3].ulValueLen = rsaKey->n.raw.len;
+ keyTemplate[ 4].pValue = rsaKey->d.raw.buf;
+ keyTemplate[ 4].ulValueLen = rsaKey->d.raw.len;
+ keyTemplate[ 5].pValue = rsaKey->p.raw.buf;
+ keyTemplate[ 5].ulValueLen = rsaKey->p.raw.len;
+ keyTemplate[ 6].pValue = rsaKey->q.raw.buf;
+ keyTemplate[ 6].ulValueLen = rsaKey->q.raw.len;
+ keyTemplate[ 7].pValue = rsaKey->dP.raw.buf;
+ keyTemplate[ 7].ulValueLen = rsaKey->dP.raw.len;
+ keyTemplate[ 8].pValue = rsaKey->dQ.raw.buf;
+ keyTemplate[ 8].ulValueLen = rsaKey->dQ.raw.len;
+ keyTemplate[ 9].pValue = rsaKey->u.raw.buf;
+ keyTemplate[ 9].ulValueLen = rsaKey->u.raw.len;
+ keyTemplate[10].pValue = rsaKey->e.raw.buf;
+ keyTemplate[10].ulValueLen = rsaKey->e.raw.len;
+
+ rv = session->func->C_CreateObject(session->handle, keyTemplate, keyTmplCnt,
+ privateKey);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+
+ return ret;
+}
+#endif
+
+#ifdef HAVE_ECC
+/**
+ * Set the ECC parameters into the template.
+ *
+ * @param key [in] ECC key.
+ * @param tmpl [in] PKCS#11 template.
+ * @param idx [in] Index of template to put parameters into.
+ * @return NOT_COMPILE_IN when the EC parameters are not known.
+ * 0 on success.
+ */
+static int Pkcs11EccSetParams(ecc_key* key, CK_ATTRIBUTE* tmpl, int idx)
+{
+ int ret = 0;
+
+ if (key->dp != NULL && key->dp->oid != NULL) {
+ unsigned char* derParams = tmpl[idx].pValue;
+ /* ASN.1 encoding: OBJ + ecc parameters OID */
+ tmpl[idx].ulValueLen = key->dp->oidSz + 2;
+ derParams[0] = ASN_OBJECT_ID;
+ derParams[1] = key->dp->oidSz;
+ XMEMCPY(derParams + 2, key->dp->oid, key->dp->oidSz);
+ }
+ else
+ ret = NOT_COMPILED_IN;
+
+ return ret;
+}
+
+/**
+ * Create a PKCS#11 object containing the ECC private key data.
+ *
+ * @param privateKey [out] Henadle to private key object.
+ * @param session [in] Session object.
+ * @param private_key [in] ECC private key.
+ * @param operation [in] Cryptographic operation key is to be used for.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * 0 on success.
+ */
+static int Pkcs11CreateEccPrivateKey(CK_OBJECT_HANDLE* privateKey,
+ Pkcs11Session* session,
+ ecc_key* private_key,
+ CK_ATTRIBUTE_TYPE operation)
+{
+ int ret = 0;
+ CK_RV rv;
+ CK_UTF8CHAR params[MAX_EC_PARAM_LEN];
+ CK_ATTRIBUTE keyTemplate[] = {
+ { CKA_CLASS, &privKeyClass, sizeof(privKeyClass) },
+ { CKA_KEY_TYPE, &ecKeyType, sizeof(ecKeyType) },
+ { operation, &ckTrue, sizeof(ckTrue) },
+ { CKA_EC_PARAMS, params, 0 },
+ { CKA_VALUE, NULL, 0 }
+ };
+ CK_ULONG keyTmplCnt = sizeof(keyTemplate) / sizeof(*keyTemplate);
+
+ ret = Pkcs11EccSetParams(private_key, keyTemplate, 3);
+ if (ret == 0) {
+ keyTemplate[4].pValue = private_key->k.raw.buf;
+ keyTemplate[4].ulValueLen = private_key->k.raw.len;
+
+ rv = session->func->C_CreateObject(session->handle, keyTemplate,
+ keyTmplCnt, privateKey);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+
+ return ret;
+}
+#endif
+
+#if !defined(NO_RSA) || defined(HAVE_ECC) || (!defined(NO_AES) && \
+ (defined(HAVE_AESGCM) || defined(HAVE_AES_CBC))) || !defined(NO_HMAC)
+/**
+ * Check if mechanism is available in session on token.
+ *
+ * @param session [in] Session object.
+ * @param mech [in] Mechanism to look for.
+ * @return NOT_COMPILED_IN when mechanism not available.
+ * 0 when mechanism is available.
+ */
+static int Pkcs11MechAvail(Pkcs11Session* session, CK_MECHANISM_TYPE mech)
+{
+ int ret = 0;
+ CK_RV rv;
+ CK_MECHANISM_INFO mechInfo;
+
+ rv = session->func->C_GetMechanismInfo(session->slotId, mech, &mechInfo);
+ if (rv != CKR_OK)
+ ret = NOT_COMPILED_IN;
+
+ return ret;
+}
+#endif
+
+#ifndef NO_HMAC
+/**
+ * Return the mechanism type and key type for the digest type when using HMAC.
+ *
+ * @param macType [in] Digest type - e.g. WC_SHA256.
+ * @param mechType [in] Mechanism type - e.g. CKM_SHA256_HMAC.
+ * @param keyType [in] Key type - e.g. CKK_SHA256_HMAC.
+ * @return NOT_COMPILED_IN if the digest algorithm isn't recognised.
+ * 0 otherwise.
+ */
+static int Pkcs11HmacTypes(int macType, int* mechType, int* keyType)
+{
+ int ret = 0;
+
+ switch (macType)
+ {
+ #ifndef NO_MD5
+ case WC_MD5:
+ *mechType = CKM_MD5_HMAC;
+ *keyType = CKK_MD5_HMAC;
+ break;
+ #endif
+ #ifndef NO_SHA
+ case WC_SHA:
+ *mechType = CKM_SHA_1_HMAC;
+ *keyType = CKK_SHA_1_HMAC;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA224
+ case WC_SHA224:
+ *mechType = CKM_SHA224_HMAC;
+ *keyType = CKK_SHA224_HMAC;
+ break;
+ #endif
+ #ifndef NO_SHA256
+ case WC_SHA256:
+ *mechType = CKM_SHA256_HMAC;
+ *keyType = CKK_SHA256_HMAC;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA384
+ case WC_SHA384:
+ *mechType = CKM_SHA384_HMAC;
+ *keyType = CKK_SHA384_HMAC;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA512
+ case WC_SHA512:
+ *mechType = CKM_SHA512_HMAC;
+ *keyType = CKK_SHA512_HMAC;
+ break;
+ #endif
+ default:
+ ret = NOT_COMPILED_IN;
+ break;
+ }
+
+ return ret;
+}
+#endif
+
+/**
+ * Store the private key on the token in the session.
+ *
+ * @param token [in] Token to store private key on.
+ * @param type [in] Key type.
+ * @param clear [in] Clear out the private data from software key.
+ * @param key [in] Key type specific object.
+ * @return NOT_COMPILED_IN when mechanism not available.
+ * 0 on success.
+ */
+int wc_Pkcs11StoreKey(Pkcs11Token* token, int type, int clear, void* key)
+{
+ int ret = 0;
+ Pkcs11Session session;
+ CK_OBJECT_HANDLE privKey = NULL_PTR;
+
+ ret = Pkcs11OpenSession(token, &session, 1);
+ if (ret == 0) {
+ switch (type) {
+ #if !defined(NO_AES) && defined(HAVE_AESGCM)
+ case PKCS11_KEY_TYPE_AES_GCM: {
+ Aes* aes = (Aes*)key;
+
+ ret = Pkcs11MechAvail(&session, CKM_AES_GCM);
+ if (ret == 0) {
+ ret = Pkcs11CreateSecretKey(&privKey, &session, CKK_AES,
+ (unsigned char*)aes->devKey,
+ aes->keylen,
+ (unsigned char*)aes->id,
+ aes->idLen);
+ }
+ if (ret == 0 && clear)
+ ForceZero(aes->devKey, aes->keylen);
+ break;
+ }
+ #endif
+ #if !defined(NO_AES) && defined(HAVE_AES_CBC)
+ case PKCS11_KEY_TYPE_AES_CBC: {
+ Aes* aes = (Aes*)key;
+
+ ret = Pkcs11MechAvail(&session, CKM_AES_CBC);
+ if (ret == 0) {
+ ret = Pkcs11CreateSecretKey(&privKey, &session, CKK_AES,
+ (unsigned char*)aes->devKey,
+ aes->keylen,
+ (unsigned char*)aes->id,
+ aes->idLen);
+ }
+ if (ret == 0 && clear)
+ ForceZero(aes->devKey, aes->keylen);
+ break;
+ }
+ #endif
+ #ifndef NO_HMAC
+ case PKCS11_KEY_TYPE_HMAC: {
+ Hmac* hmac = (Hmac*)key;
+ int mechType;
+ int keyType;
+
+ ret = Pkcs11HmacTypes(hmac->macType, &mechType, &keyType);
+ if (ret == NOT_COMPILED_IN)
+ break;
+
+ if (ret == 0)
+ ret = Pkcs11MechAvail(&session, mechType);
+ if (ret == 0) {
+ ret = Pkcs11CreateSecretKey(&privKey, &session, keyType,
+ (unsigned char*)hmac->keyRaw,
+ hmac->keyLen,
+ (unsigned char*)hmac->id,
+ hmac->idLen);
+ if (ret == WC_HW_E) {
+ ret = Pkcs11CreateSecretKey(&privKey, &session,
+ CKK_GENERIC_SECRET,
+ (unsigned char*)hmac->keyRaw,
+ hmac->keyLen,
+ (unsigned char*)hmac->id,
+ hmac->idLen);
+ }
+ }
+ break;
+ }
+ #endif
+ #ifndef NO_RSA
+ case PKCS11_KEY_TYPE_RSA: {
+ RsaKey* rsaKey = (RsaKey*)key;
+
+ ret = Pkcs11MechAvail(&session, CKM_RSA_X_509);
+ if (ret == 0)
+ ret = Pkcs11CreateRsaPrivateKey(&privKey, &session, rsaKey);
+ if (ret == 0 && clear) {
+ mp_forcezero(&rsaKey->u);
+ mp_forcezero(&rsaKey->dQ);
+ mp_forcezero(&rsaKey->dP);
+ mp_forcezero(&rsaKey->q);
+ mp_forcezero(&rsaKey->p);
+ mp_forcezero(&rsaKey->d);
+ }
+ break;
+ }
+ #endif
+ #ifdef HAVE_ECC
+ case PKCS11_KEY_TYPE_EC: {
+ ecc_key* eccKey = (ecc_key*)key;
+ int ret2 = NOT_COMPILED_IN;
+
+ #ifndef NO_PKCS11_ECDH
+ /* Try ECDH mechanism first. */
+ ret = Pkcs11MechAvail(&session, CKM_ECDH1_DERIVE);
+ if (ret == 0) {
+ ret = Pkcs11CreateEccPrivateKey(&privKey, &session, eccKey,
+ CKA_DERIVE);
+ }
+ #endif
+ if (ret == 0 || ret == NOT_COMPILED_IN) {
+ /* Try ECDSA mechanism next. */
+ ret2 = Pkcs11MechAvail(&session, CKM_ECDSA);
+ if (ret2 == 0) {
+ ret2 = Pkcs11CreateEccPrivateKey(&privKey, &session,
+ eccKey, CKA_SIGN);
+ }
+ /* OK for this to fail if set for ECDH. */
+ if (ret == NOT_COMPILED_IN)
+ ret = ret2;
+ }
+ if (ret == 0 && clear)
+ mp_forcezero(&eccKey->k);
+ break;
+ }
+ #endif
+ default:
+ ret = NOT_COMPILED_IN;
+ break;
+ }
+
+ Pkcs11CloseSession(token, &session);
+ }
+
+ (void)privKey;
+ (void)clear;
+ (void)key;
+
+ return ret;
+}
+
+#if !defined(NO_RSA) || defined(HAVE_ECC) || (!defined(NO_AES) && \
+ (defined(HAVE_AESGCM) || defined(HAVE_AES_CBC))) || !defined(NO_HMAC)
+/**
+ * Find the PKCS#11 object containing the RSA public or private key data with
+ * the modulus specified.
+ *
+ * @param key [out] Henadle to key object.
+ * @param keyClass [in] Public or private key class.
+ * @param keyType [in] Type of key.
+ * @param session [in] Session object.
+ * @param id [in] Identifier set against a key.
+ * @param idLen [in] Length of identifier.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * 0 on success.
+ */
+static int Pkcs11FindKeyById(CK_OBJECT_HANDLE* key, CK_OBJECT_CLASS keyClass,
+ CK_KEY_TYPE keyType, Pkcs11Session* session,
+ byte* id, int idLen)
+{
+ int ret = 0;
+ CK_RV rv;
+ CK_ULONG count;
+ CK_ATTRIBUTE keyTemplate[] = {
+ { CKA_CLASS, &keyClass, sizeof(keyClass) },
+ { CKA_KEY_TYPE, &keyType, sizeof(keyType) },
+ { CKA_ID, id, (CK_ULONG)idLen }
+ };
+ CK_ULONG keyTmplCnt = sizeof(keyTemplate) / sizeof(*keyTemplate);
+
+ WOLFSSL_MSG("PKCS#11: Find Key By Id");
+
+ rv = session->func->C_FindObjectsInit(session->handle, keyTemplate,
+ keyTmplCnt);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ if (ret == 0) {
+ rv = session->func->C_FindObjects(session->handle, key, 1, &count);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ rv = session->func->C_FindObjectsFinal(session->handle);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+ if (ret == 0 && count == 0)
+ ret = WC_HW_E;
+
+ return ret;
+}
+#endif
+
+#ifndef NO_RSA
+/**
+ * Find the PKCS#11 object containing the RSA public or private key data with
+ * the modulus specified.
+ *
+ * @param key [out] Henadle to key object.
+ * @param keyClass [in] Public or private key class.
+ * @param session [in] Session object.
+ * @param rsaKey [in] RSA key with modulus to search on.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * 0 on success.
+ */
+static int Pkcs11FindRsaKey(CK_OBJECT_HANDLE* key, CK_OBJECT_CLASS keyClass,
+ Pkcs11Session* session, RsaKey* rsaKey)
+{
+ int ret = 0;
+ CK_RV rv;
+ CK_ULONG count;
+ CK_ATTRIBUTE keyTemplate[] = {
+ { CKA_CLASS, &keyClass, sizeof(keyClass) },
+ { CKA_KEY_TYPE, &rsaKeyType, sizeof(rsaKeyType) },
+ { CKA_MODULUS, NULL, 0 },
+ };
+ CK_ULONG keyTmplCnt = sizeof(keyTemplate) / sizeof(*keyTemplate);
+
+ /* Set the modulus. */
+ keyTemplate[2].pValue = rsaKey->n.raw.buf;
+ keyTemplate[2].ulValueLen = rsaKey->n.raw.len;
+
+ rv = session->func->C_FindObjectsInit(session->handle, keyTemplate,
+ keyTmplCnt);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ if (ret == 0) {
+ rv = session->func->C_FindObjects(session->handle, key, 1, &count);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ rv = session->func->C_FindObjectsFinal(session->handle);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+
+ return ret;
+}
+
+/**
+ * Exponentiate the input with the public part of the RSA key.
+ * Used in public encrypt and decrypt.
+ *
+ * @param session [in] Session object.
+ * @param info [in] Cryptographic operation data.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * 0 on success.
+ */
+static int Pkcs11RsaPublic(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+ int ret = 0;
+ CK_RV rv;
+ CK_MECHANISM mech;
+ CK_ULONG outLen;
+ CK_OBJECT_HANDLE publicKey = NULL_PTR;
+ int sessionKey = 0;
+ CK_ATTRIBUTE keyTemplate[] = {
+ { CKA_CLASS, &pubKeyClass, sizeof(pubKeyClass) },
+ { CKA_KEY_TYPE, &rsaKeyType, sizeof(rsaKeyType) },
+ { CKA_ENCRYPT, &ckTrue, sizeof(ckTrue) },
+ { CKA_MODULUS, NULL, 0 },
+ { CKA_PUBLIC_EXPONENT, NULL, 0 }
+ };
+ CK_ULONG keyTmplCnt = sizeof(keyTemplate) / sizeof(*keyTemplate);
+
+ WOLFSSL_MSG("PKCS#11: RSA Public Key Operation");
+
+ if (info->pk.rsa.outLen == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (ret == 0) {
+ if ((sessionKey = !mp_iszero(&info->pk.rsa.key->e))) {
+ /* Set the modulus and public exponent data. */
+ keyTemplate[3].pValue = info->pk.rsa.key->n.raw.buf;
+ keyTemplate[3].ulValueLen = info->pk.rsa.key->n.raw.len;
+ keyTemplate[4].pValue = info->pk.rsa.key->e.raw.buf;
+ keyTemplate[4].ulValueLen = info->pk.rsa.key->e.raw.len;
+
+ /* Create an object containing public key data for device to use. */
+ rv = session->func->C_CreateObject(session->handle, keyTemplate,
+ keyTmplCnt, &publicKey);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+ else {
+ ret = Pkcs11FindKeyById(&publicKey, CKO_PUBLIC_KEY, CKK_RSA,
+ session, info->pk.rsa.key->id,
+ info->pk.rsa.key->idLen);
+ }
+ }
+
+ if (ret == 0) {
+ /* Raw RSA encrypt/decrypt operation. */
+ mech.mechanism = CKM_RSA_X_509;
+ mech.ulParameterLen = 0;
+ mech.pParameter = NULL;
+
+ rv = session->func->C_EncryptInit(session->handle, &mech, publicKey);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+ if (ret == 0) {
+ outLen = (CK_ULONG)*info->pk.rsa.outLen;
+ rv = session->func->C_Encrypt(session->handle,
+ (CK_BYTE_PTR)info->pk.rsa.in, info->pk.rsa.inLen,
+ info->pk.rsa.out, &outLen);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+ if (ret == 0)
+ *info->pk.rsa.outLen = (word32)outLen;
+
+ if (sessionKey)
+ session->func->C_DestroyObject(session->handle, publicKey);
+
+ return ret;
+}
+
+/**
+ * Exponentiate the input with the private part of the RSA key.
+ * Used in private encrypt and decrypt.
+ *
+ * @param session [in] Session object.
+ * @param info [in] Cryptographic operation data.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * 0 on success.
+ */
+static int Pkcs11RsaPrivate(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+ int ret = 0;
+ CK_RV rv;
+ CK_MECHANISM mech;
+ CK_ULONG outLen;
+ CK_OBJECT_HANDLE privateKey = NULL_PTR;
+ int sessionKey = 0;
+
+ WOLFSSL_MSG("PKCS#11: RSA Private Key Operation");
+
+ if (info->pk.rsa.outLen == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (ret == 0) {
+ if ((sessionKey = !mp_iszero(&info->pk.rsa.key->d))) {
+ ret = Pkcs11CreateRsaPrivateKey(&privateKey, session,
+ info->pk.rsa.key);
+ }
+ else if (info->pk.rsa.key->idLen > 0) {
+ ret = Pkcs11FindKeyById(&privateKey, CKO_PRIVATE_KEY, CKK_RSA,
+ session, info->pk.rsa.key->id,
+ info->pk.rsa.key->idLen);
+ }
+ else {
+ ret = Pkcs11FindRsaKey(&privateKey, CKO_PRIVATE_KEY, session,
+ info->pk.rsa.key);
+ }
+ }
+
+ if (ret == 0) {
+ /* Raw RSA encrypt/decrypt operation. */
+ mech.mechanism = CKM_RSA_X_509;
+ mech.ulParameterLen = 0;
+ mech.pParameter = NULL;
+
+ rv = session->func->C_DecryptInit(session->handle, &mech, privateKey);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+ if (ret == 0) {
+ outLen = (CK_ULONG)*info->pk.rsa.outLen;
+ rv = session->func->C_Decrypt(session->handle,
+ (CK_BYTE_PTR)info->pk.rsa.in, info->pk.rsa.inLen,
+ info->pk.rsa.out, &outLen);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+ if (ret == 0)
+ *info->pk.rsa.outLen = (word32)outLen;
+
+ if (sessionKey)
+ session->func->C_DestroyObject(session->handle, privateKey);
+
+ return ret;
+}
+
+/**
+ * Perform an RSA operation.
+ *
+ * @param session [in] Session object.
+ * @param info [in] Cryptographic operation data.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * 0 on success.
+ */
+static int Pkcs11Rsa(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+ int ret = 0;
+ CK_RV rv;
+ CK_MECHANISM_INFO mechInfo;
+
+ /* Check operation is supported. */
+ rv = session->func->C_GetMechanismInfo(session->slotId, CKM_RSA_X_509,
+ &mechInfo);
+ if (rv != CKR_OK)
+ ret = NOT_COMPILED_IN;
+
+ if (ret == 0) {
+ if (info->pk.rsa.type == RSA_PUBLIC_ENCRYPT ||
+ info->pk.rsa.type == RSA_PUBLIC_DECRYPT) {
+ if ((mechInfo.flags & CKF_ENCRYPT) == 0)
+ ret = NOT_COMPILED_IN;
+ else
+ ret = Pkcs11RsaPublic(session, info);
+ }
+ else if (info->pk.rsa.type == RSA_PRIVATE_ENCRYPT ||
+ info->pk.rsa.type == RSA_PRIVATE_DECRYPT) {
+ if ((mechInfo.flags & CKF_DECRYPT) == 0)
+ ret = NOT_COMPILED_IN;
+ else
+ ret = Pkcs11RsaPrivate(session, info);
+ }
+ else
+ ret = NOT_COMPILED_IN;
+ }
+
+ return ret;
+}
+
+#ifdef WOLFSSL_KEY_GEN
+/**
+ * Get the RSA public key data from the PKCS#11 object.
+ *
+ * @param key [in] RSA key to put the data into.
+ * @param session [in] Session object.
+ * @param pubkey [in] Public key object.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * MEMORY_E when a memory allocation fails.
+ * 0 on success.
+ */
+static int Pkcs11GetRsaPublicKey(RsaKey* key, Pkcs11Session* session,
+ CK_OBJECT_HANDLE pubKey)
+{
+ int ret = 0;
+ unsigned char* mod = NULL;
+ unsigned char* exp = NULL;
+ int modSz, expSz;
+ CK_ATTRIBUTE tmpl[] = {
+ { CKA_MODULUS, NULL_PTR, 0 },
+ { CKA_PUBLIC_EXPONENT, NULL_PTR, 0 }
+ };
+ CK_ULONG tmplCnt = sizeof(tmpl) / sizeof(*tmpl);
+ CK_RV rv;
+
+ rv = session->func->C_GetAttributeValue(session->handle, pubKey, tmpl,
+ tmplCnt);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+
+ if (ret == 0) {
+ modSz = tmpl[0].ulValueLen;
+ expSz = tmpl[1].ulValueLen;
+ mod = (unsigned char*)XMALLOC(modSz, key->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (mod == NULL)
+ ret = MEMORY_E;
+ }
+ if (ret == 0) {
+ exp = (unsigned char*)XMALLOC(expSz, key->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (exp == NULL)
+ ret = MEMORY_E;
+ }
+ if (ret == 0) {
+ tmpl[0].pValue = mod;
+ tmpl[1].pValue = exp;
+
+ rv = session->func->C_GetAttributeValue(session->handle, pubKey,
+ tmpl, tmplCnt);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+ if (ret == 0)
+ ret = wc_RsaPublicKeyDecodeRaw(mod, modSz, exp, expSz, key);
+
+ if (exp != NULL)
+ XFREE(exp, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+ if (mod != NULL)
+ XFREE(mod, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+ return ret;
+}
+
+/**
+ * Perform an RSA key generation operation.
+ * The private key data stays on the device.
+ *
+ * @param session [in] Session object.
+ * @param info [in] Cryptographic operation data.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * 0 on success.
+ */
+static int Pkcs11RsaKeyGen(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+ int ret = 0;
+ RsaKey* key = info->pk.rsakg.key;
+ CK_RV rv;
+ CK_ULONG bits = info->pk.rsakg.size;
+ CK_OBJECT_HANDLE pubKey = NULL_PTR, privKey = NULL_PTR;
+ CK_MECHANISM mech;
+ static CK_BYTE pub_exp[] = { 0x01, 0x00, 0x01, 0x00 };
+ CK_ATTRIBUTE pubKeyTmpl[] = {
+ { CKA_MODULUS_BITS, &bits, sizeof(bits) },
+ { CKA_ENCRYPT, &ckTrue, sizeof(ckTrue) },
+ { CKA_VERIFY, &ckTrue, sizeof(ckTrue) },
+ { CKA_PUBLIC_EXPONENT, &pub_exp, sizeof(pub_exp) }
+ };
+ CK_ULONG pubTmplCnt = sizeof(pubKeyTmpl)/sizeof(*pubKeyTmpl);
+ CK_ATTRIBUTE privKeyTmpl[] = {
+ {CKA_DECRYPT, &ckTrue, sizeof(ckTrue) },
+ {CKA_SIGN, &ckTrue, sizeof(ckTrue) },
+ {CKA_ID, NULL, 0 }
+ };
+ int privTmplCnt = 2;
+ int i;
+
+ ret = Pkcs11MechAvail(session, CKM_RSA_PKCS_KEY_PAIR_GEN);
+ if (ret == 0) {
+ WOLFSSL_MSG("PKCS#11: RSA Key Generation Operation");
+
+ /* Most commonly used public exponent value (array initialized). */
+ if (info->pk.rsakg.e != WC_RSA_EXPONENT) {
+ for (i = 0; i < (int)sizeof(pub_exp); i++)
+ pub_exp[i] = (info->pk.rsakg.e >> (8 * i)) & 0xff;
+ }
+ for (i = (int)sizeof(pub_exp) - 1; pub_exp[i] == 0; i--) {
+ }
+ pubKeyTmpl[3].ulValueLen = i + 1;
+
+ if (key->idLen != 0) {
+ privKeyTmpl[privTmplCnt].pValue = key->id;
+ privKeyTmpl[privTmplCnt].ulValueLen = key->idLen;
+ privTmplCnt++;
+ }
+
+ mech.mechanism = CKM_RSA_PKCS_KEY_PAIR_GEN;
+ mech.ulParameterLen = 0;
+ mech.pParameter = NULL;
+
+ rv = session->func->C_GenerateKeyPair(session->handle, &mech,
+ pubKeyTmpl, pubTmplCnt,
+ privKeyTmpl, privTmplCnt,
+ &pubKey, &privKey);
+ if (rv != CKR_OK)
+ ret = -1;
+ }
+
+ if (ret == 0)
+ ret = Pkcs11GetRsaPublicKey(key, session, pubKey);
+
+ if (pubKey != NULL_PTR)
+ ret = session->func->C_DestroyObject(session->handle, pubKey);
+ if (ret != 0 && privKey != NULL_PTR)
+ ret = session->func->C_DestroyObject(session->handle, privKey);
+
+ return ret;
+}
+#endif /* WOLFSSL_KEY_GEN */
+#endif /* !NO_RSA */
+
+#ifdef HAVE_ECC
+/**
+ * Find the PKCS#11 object containing the ECC public or private key data with
+ * the modulus specified.
+ *
+ * @param key [out] Henadle to key object.
+ * @param keyClass [in] Public or private key class.
+ * @param session [in] Session object.
+ * @param eccKey [in] ECC key with parameters.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * MEMORY_E when a memory allocation fails.
+ * 0 on success.
+ */
+static int Pkcs11FindEccKey(CK_OBJECT_HANDLE* key, CK_OBJECT_CLASS keyClass,
+ Pkcs11Session* session, ecc_key* eccKey)
+{
+ int ret = 0;
+ int i;
+ unsigned char* ecPoint = NULL;
+ word32 len = 0;
+ CK_RV rv;
+ CK_ULONG count;
+ CK_UTF8CHAR params[MAX_EC_PARAM_LEN];
+ CK_ATTRIBUTE keyTemplate[] = {
+ { CKA_CLASS, &keyClass, sizeof(keyClass) },
+ { CKA_KEY_TYPE, &ecKeyType, sizeof(ecKeyType) },
+ { CKA_EC_PARAMS, params, 0 },
+ { CKA_EC_POINT, NULL, 0 },
+ };
+ CK_ULONG attrCnt = 3;
+
+ ret = Pkcs11EccSetParams(eccKey, keyTemplate, 2);
+ if (ret == 0 && keyClass == CKO_PUBLIC_KEY) {
+ /* ASN1 encoded: OCT + uncompressed point */
+ len = 3 + 1 + 2 * eccKey->dp->size;
+ ecPoint = (unsigned char*)XMALLOC(len, eccKey->heap, DYNAMIC_TYPE_ECC);
+ if (ecPoint == NULL)
+ ret = MEMORY_E;
+ }
+ if (ret == 0 && keyClass == CKO_PUBLIC_KEY) {
+ len -= 3;
+ i = 0;
+ ecPoint[i++] = ASN_OCTET_STRING;
+ if (len >= ASN_LONG_LENGTH)
+ ecPoint[i++] = (ASN_LONG_LENGTH | 1);
+ ecPoint[i++] = len;
+ ret = wc_ecc_export_x963(eccKey, ecPoint + i, &len);
+ }
+ if (ret == 0 && keyClass == CKO_PUBLIC_KEY) {
+ keyTemplate[3].pValue = ecPoint;
+ keyTemplate[3].ulValueLen = len + i;
+ attrCnt++;
+ }
+ if (ret == 0) {
+ rv = session->func->C_FindObjectsInit(session->handle, keyTemplate,
+ attrCnt);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+ if (ret == 0) {
+ rv = session->func->C_FindObjects(session->handle, key, 1, &count);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ rv = session->func->C_FindObjectsFinal(session->handle);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+
+ if (ecPoint != NULL)
+ XFREE(ecPoint, eccKey->heap, DYNAMIC_TYPE_ECC);
+
+ return ret;
+}
+
+/**
+ * Create a PKCS#11 object containing the ECC public key data.
+ * Encode the public key as an OCTET_STRING of the encoded point.
+ *
+ * @param publicKey [out] Henadle to public key object.
+ * @param session [in] Session object.
+ * @param public_key [in] ECC public key.
+ * @param operation [in] Cryptographic operation key is to be used for.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * MEMORY_E when a memory allocation fails.
+ * 0 on success.
+ */
+static int Pkcs11CreateEccPublicKey(CK_OBJECT_HANDLE* publicKey,
+ Pkcs11Session* session,
+ ecc_key* public_key,
+ CK_ATTRIBUTE_TYPE operation)
+{
+ int ret = 0;
+ int i;
+ unsigned char* ecPoint = NULL;
+ word32 len;
+ CK_RV rv;
+ CK_UTF8CHAR params[MAX_EC_PARAM_LEN];
+ CK_ATTRIBUTE keyTemplate[] = {
+ { CKA_CLASS, &pubKeyClass, sizeof(pubKeyClass) },
+ { CKA_KEY_TYPE, &ecKeyType, sizeof(ecKeyType) },
+ { operation, &ckTrue, sizeof(ckTrue) },
+ { CKA_EC_PARAMS, params, 0 },
+ { CKA_EC_POINT, NULL, 0 }
+ };
+ CK_ULONG keyTmplCnt = sizeof(keyTemplate) / sizeof(*keyTemplate);
+
+ ret = Pkcs11EccSetParams(public_key, keyTemplate, 3);
+ if (ret == 0) {
+ /* ASN1 encoded: OCT + uncompressed point */
+ len = 3 + 1 + 2 * public_key->dp->size;
+ ecPoint = (unsigned char*)XMALLOC(len, public_key->heap,
+ DYNAMIC_TYPE_ECC);
+ if (ecPoint == NULL)
+ ret = MEMORY_E;
+ }
+ if (ret == 0) {
+ len -= 3;
+ i = 0;
+ ecPoint[i++] = ASN_OCTET_STRING;
+ if (len >= ASN_LONG_LENGTH)
+ ecPoint[i++] = ASN_LONG_LENGTH | 1;
+ ecPoint[i++] = len;
+ ret = wc_ecc_export_x963(public_key, ecPoint + i, &len);
+ }
+ if (ret == 0) {
+ keyTemplate[4].pValue = ecPoint;
+ keyTemplate[4].ulValueLen = len + i;
+
+ rv = session->func->C_CreateObject(session->handle, keyTemplate,
+ keyTmplCnt, publicKey);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+
+ if (ecPoint != NULL)
+ XFREE(ecPoint, public_key->heap, DYNAMIC_TYPE_ECC);
+
+ return ret;
+}
+
+#ifndef NO_PKCS11_EC_KEYGEN
+/**
+ * Gets the public key data from the PKCS#11 object and puts into the ECC key.
+ *
+ * @param key [in] ECC public key.
+ * @param session [in] Session object.
+ * @param pubKey [in] ECC public key PKCS#11 object.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * MEMORY_E when a memory allocation fails.
+ * 0 on success.
+ */
+static int Pkcs11GetEccPublicKey(ecc_key* key, Pkcs11Session* session,
+ CK_OBJECT_HANDLE pubKey)
+{
+ int ret = 0;
+ word32 i = 0;
+ int curveIdx;
+ unsigned char* point = NULL;
+ int pointSz;
+ byte tag;
+ CK_RV rv;
+ CK_ATTRIBUTE tmpl[] = {
+ { CKA_EC_POINT, NULL_PTR, 0 },
+ };
+ CK_ULONG tmplCnt = sizeof(tmpl) / sizeof(*tmpl);
+
+ rv = session->func->C_GetAttributeValue(session->handle, pubKey, tmpl,
+ tmplCnt);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+
+ if (ret == 0) {
+ pointSz = (int)tmpl[0].ulValueLen;
+ point = (unsigned char*)XMALLOC(pointSz, key->heap, DYNAMIC_TYPE_ECC);
+ if (point == NULL)
+ ret = MEMORY_E;
+ }
+ if (ret == 0) {
+ tmpl[0].pValue = point;
+
+ rv = session->func->C_GetAttributeValue(session->handle, pubKey,
+ tmpl, tmplCnt);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+
+ /* Make sure the data is big enough for ASN.1: OCT + uncompressed point */
+ if (ret == 0 && pointSz < key->dp->size * 2 + 1 + 2)
+ ret = ASN_PARSE_E;
+ /* Step over the OCTET_STRING wrapper. */
+ if (ret == 0 && GetASNTag(point, &i, &tag, pointSz) != 0)
+ ret = ASN_PARSE_E;
+ if (ret == 0 && tag != ASN_OCTET_STRING)
+ ret = ASN_PARSE_E;
+ if (ret == 0 && point[i] >= ASN_LONG_LENGTH) {
+ if (point[i++] != (ASN_LONG_LENGTH | 1))
+ ret = ASN_PARSE_E;
+ else if (pointSz < key->dp->size * 2 + 1 + 3)
+ ret = ASN_PARSE_E;
+ }
+ if (ret == 0 && point[i++] != key->dp->size * 2 + 1)
+ ret = ASN_PARSE_E;
+
+ if (ret == 0) {
+ curveIdx = wc_ecc_get_curve_idx(key->dp->id);
+ ret = wc_ecc_import_point_der(point + i, pointSz - i, curveIdx,
+ &key->pubkey);
+ }
+
+ if (point != NULL)
+ XFREE(point, key->heap, DYNAMIC_TYPE_ECC);
+
+ return ret;
+}
+
+/**
+ * Perform an ECC key generation operation.
+ * The private key data stays on the device.
+ *
+ * @param session [in] Session object.
+ * @param info [in] Cryptographic operation data.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * 0 on success.
+ */
+static int Pkcs11EcKeyGen(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+ int ret = 0;
+ ecc_key* key = info->pk.eckg.key;
+ CK_RV rv;
+ CK_OBJECT_HANDLE pubKey = NULL_PTR, privKey = NULL_PTR;
+ CK_MECHANISM mech;
+ CK_UTF8CHAR params[MAX_EC_PARAM_LEN];
+ CK_ATTRIBUTE pubKeyTmpl[] = {
+ { CKA_EC_PARAMS, params, 0 },
+ { CKA_ENCRYPT, &ckTrue, sizeof(ckTrue) },
+ { CKA_VERIFY, &ckTrue, sizeof(ckTrue) },
+ };
+ int pubTmplCnt = sizeof(pubKeyTmpl)/sizeof(*pubKeyTmpl);
+ CK_ATTRIBUTE privKeyTmpl[] = {
+ { CKA_DECRYPT, &ckTrue, sizeof(ckTrue) },
+ { CKA_SIGN, &ckTrue, sizeof(ckTrue) },
+ { CKA_DERIVE, &ckTrue, sizeof(ckTrue) },
+ { CKA_ID, NULL, 0 },
+ };
+ int privTmplCnt = 3;
+
+ ret = Pkcs11MechAvail(session, CKM_EC_KEY_PAIR_GEN);
+ if (ret == 0) {
+ WOLFSSL_MSG("PKCS#11: EC Key Generation Operation");
+
+ ret = Pkcs11EccSetParams(key, pubKeyTmpl, 0);
+ }
+ if (ret == 0) {
+ if (key->idLen != 0) {
+ privKeyTmpl[privTmplCnt].pValue = key->id;
+ privKeyTmpl[privTmplCnt].ulValueLen = key->idLen;
+ privTmplCnt++;
+ }
+
+ mech.mechanism = CKM_EC_KEY_PAIR_GEN;
+ mech.ulParameterLen = 0;
+ mech.pParameter = NULL;
+
+ rv = session->func->C_GenerateKeyPair(session->handle, &mech,
+ pubKeyTmpl, pubTmplCnt,
+ privKeyTmpl, privTmplCnt,
+ &pubKey, &privKey);
+ if (rv != CKR_OK)
+ ret = -1;
+ }
+
+ if (ret == 0)
+ ret = Pkcs11GetEccPublicKey(key, session, pubKey);
+
+ if (pubKey != NULL_PTR)
+ session->func->C_DestroyObject(session->handle, pubKey);
+ if (ret != 0 && privKey != NULL_PTR)
+ session->func->C_DestroyObject(session->handle, privKey);
+
+ return ret;
+}
+#endif
+
+#ifndef NO_PKCS11_ECDH
+/**
+ * Extracts the secret key data from the PKCS#11 object.
+ *
+ * @param session [in] Session object.
+ * @param secret [in] PKCS#11 object with the secret key data.
+ * @param out [in] Buffer to hold secret data.
+ * @param outLen [in,out] On in, length of buffer.
+ * On out, the length of data in buffer.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * 0 on success.
+ */
+static int Pkcs11ExtractSecret(Pkcs11Session* session, CK_OBJECT_HANDLE secret,
+ byte* out, word32* outLen)
+{
+ int ret = 0;
+ CK_ATTRIBUTE tmpl[] = {
+ {CKA_VALUE, NULL_PTR, 0}
+ };
+ CK_ULONG tmplCnt = sizeof(tmpl) / sizeof(*tmpl);
+ CK_RV rv;
+
+ rv = session->func->C_GetAttributeValue(session->handle, secret, tmpl,
+ tmplCnt);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ if (ret == 0) {
+ if (tmpl[0].ulValueLen > *outLen)
+ ret = BUFFER_E;
+ }
+ if (ret == 0) {
+ tmpl[0].pValue = out;
+ rv = session->func->C_GetAttributeValue(session->handle, secret,
+ tmpl, tmplCnt);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ *outLen = (word32)tmpl[0].ulValueLen;
+ }
+
+ return ret;
+}
+
+/**
+ * Performs the ECDH secret generation operation.
+ *
+ * @param session [in] Session object.
+ * @param info [in] Cryptographic operation data.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * 0 on success.
+ */
+static int Pkcs11ECDH(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+ int ret = 0;
+ int sessionKey = 0;
+ unsigned char* point = NULL;
+ word32 pointLen;
+ CK_RV rv;
+ CK_KEY_TYPE keyType = CKK_GENERIC_SECRET;
+ CK_MECHANISM mech;
+ CK_ECDH1_DERIVE_PARAMS params;
+ CK_OBJECT_HANDLE privateKey = NULL_PTR;
+ CK_OBJECT_HANDLE secret = CK_INVALID_HANDLE;
+ CK_ULONG secSz;
+ CK_ATTRIBUTE tmpl[] = {
+ { CKA_CLASS, &secretKeyClass, sizeof(secretKeyClass) },
+ { CKA_KEY_TYPE, &keyType, sizeof(keyType) },
+ { CKA_PRIVATE, &ckFalse, sizeof(ckFalse) },
+ { CKA_SENSITIVE, &ckFalse, sizeof(ckFalse) },
+ { CKA_EXTRACTABLE, &ckTrue, sizeof(ckTrue) },
+ { CKA_VALUE_LEN, &secSz, sizeof(secSz) }
+ };
+ CK_ULONG tmplCnt = sizeof(tmpl) / sizeof(*tmpl);
+
+ ret = Pkcs11MechAvail(session, CKM_ECDH1_DERIVE);
+ if (ret == 0 && info->pk.ecdh.outlen == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+ if (ret == 0) {
+ WOLFSSL_MSG("PKCS#11: EC Key Derivation Operation");
+
+
+ if ((sessionKey = !mp_iszero(&info->pk.ecdh.private_key->k)))
+ ret = Pkcs11CreateEccPrivateKey(&privateKey, session,
+ info->pk.ecdh.private_key, CKA_DERIVE);
+ else if (info->pk.ecdh.private_key->idLen > 0) {
+ ret = Pkcs11FindKeyById(&privateKey, CKO_PRIVATE_KEY, CKK_EC,
+ session, info->pk.ecdh.private_key->id,
+ info->pk.ecdh.private_key->idLen);
+ }
+ else {
+ ret = Pkcs11FindEccKey(&privateKey, CKO_PRIVATE_KEY, session,
+ info->pk.ecdh.public_key);
+ }
+ }
+ if (ret == 0) {
+ ret = wc_ecc_export_x963(info->pk.ecdh.public_key, NULL, &pointLen);
+ if (ret == LENGTH_ONLY_E) {
+ point = (unsigned char*)XMALLOC(pointLen,
+ info->pk.ecdh.public_key->heap,
+ DYNAMIC_TYPE_ECC_BUFFER);
+ ret = wc_ecc_export_x963(info->pk.ecdh.public_key, point,
+ &pointLen);
+ }
+ }
+
+ if (ret == 0) {
+ secSz = *info->pk.ecdh.outlen;
+ if (secSz > (CK_ULONG)info->pk.ecdh.private_key->dp->size)
+ secSz = info->pk.ecdh.private_key->dp->size;
+
+ params.kdf = CKD_NULL;
+ params.pSharedData = NULL;
+ params.ulSharedDataLen = 0;
+ params.pPublicData = point;
+ params.ulPublicDataLen = pointLen;
+
+ mech.mechanism = CKM_ECDH1_DERIVE;
+ mech.ulParameterLen = sizeof(params);
+ mech.pParameter = &params;
+
+ rv = session->func->C_DeriveKey(session->handle, &mech, privateKey,
+ tmpl, tmplCnt, &secret);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+
+ if (ret == 0) {
+ ret = Pkcs11ExtractSecret(session, secret, info->pk.ecdh.out,
+ info->pk.ecdh.outlen);
+ }
+
+ if (sessionKey)
+ session->func->C_DestroyObject(session->handle, privateKey);
+
+ if (point != NULL)
+ XFREE(point, info->pk.ecdh.public_key->heap, DYNAMIC_TYPE_ECC_BUFFER);
+
+ return ret;
+}
+#endif
+
+/**
+ * Encode, in place, the ECDSA signature.
+ * Two fixed width values into ASN.1 DER encoded SEQ { INT, INT }
+ *
+ * @param sig [in,out] Signature data.
+ * @param sz [in] Size of original signature data.
+ * @return Length of the ASN.1 DER enencoded signature.
+ */
+static word32 Pkcs11ECDSASig_Encode(byte* sig, word32 sz)
+{
+ word32 rHigh, sHigh, seqLen;
+ word32 rStart = 0, sStart = 0;
+ word32 sigSz, rSz, rLen, sSz, sLen;
+ word32 i;
+
+ /* Find first byte of data in r and s. */
+ while (rStart < sz - 1 && sig[rStart] == 0x00)
+ rStart++;
+ while (sStart < sz - 1 && sig[sz + sStart] == 0x00)
+ sStart++;
+ /* Check if 0 needs to be prepended to make integer a positive number. */
+ rHigh = sig[rStart] >> 7;
+ sHigh = sig[sz + sStart] >> 7;
+ /* Calculate length of integer to put into ASN.1 encoding. */
+ rLen = sz - rStart;
+ sLen = sz - sStart;
+ /* r and s: INT (2 bytes) + [ 0x00 ] + integer */
+ rSz = 2 + rHigh + rLen;
+ sSz = 2 + sHigh + sLen;
+ /* Calculate the complete ASN.1 DER encoded size. */
+ sigSz = rSz + sSz;
+ if (sigSz >= ASN_LONG_LENGTH)
+ seqLen = 3;
+ else
+ seqLen = 2;
+
+ /* Move s and then r integers into their final places. */
+ XMEMMOVE(sig + seqLen + rSz + (sSz - sLen), sig + sz + sStart, sLen);
+ XMEMMOVE(sig + seqLen + (rSz - rLen), sig + rStart, rLen);
+
+ /* Put the ASN.1 DER encoding around data. */
+ i = 0;
+ sig[i++] = ASN_CONSTRUCTED | ASN_SEQUENCE;
+ if (seqLen == 3)
+ sig[i++] = ASN_LONG_LENGTH | 0x01;
+ sig[i++] = sigSz;
+ sig[i++] = ASN_INTEGER;
+ sig[i++] = rHigh + (sz - rStart);
+ if (rHigh)
+ sig[i++] = 0x00;
+ i += sz - rStart;
+ sig[i++] = ASN_INTEGER;
+ sig[i++] = sHigh + (sz - sStart);
+ if (sHigh)
+ sig[i] = 0x00;
+
+ return seqLen + sigSz;
+}
+
+/**
+ * Decode the ECDSA signature.
+ * ASN.1 DER encode SEQ { INT, INT } converted to two fixed with values.
+ *
+ * @param in [in] ASN.1 DER encoded signature.
+ * @param inSz [in] Size of ASN.1 signature.
+ * @param sig [in] Output buffer.
+ * @param sz [in] Size of output buffer.
+ * @return ASN_PARSE_E when the ASN.1 encoding is invalid.
+ * 0 on success.
+ */
+static int Pkcs11ECDSASig_Decode(const byte* in, word32 inSz, byte* sig,
+ word32 sz)
+{
+ int ret = 0;
+ word32 i = 0;
+ byte tag;
+ int len, seqLen = 2;
+
+ /* Make sure zeros in place when decoding short integers. */
+ XMEMSET(sig, 0, sz * 2);
+
+ /* Check min data for: SEQ + INT. */
+ if (inSz < 5)
+ ret = ASN_PARSE_E;
+ /* Check SEQ */
+ if (ret == 0 && in[i++] != (ASN_CONSTRUCTED | ASN_SEQUENCE))
+ ret = ASN_PARSE_E;
+ if (ret == 0 && in[i] >= ASN_LONG_LENGTH) {
+ if (in[i] != (ASN_LONG_LENGTH | 0x01))
+ ret = ASN_PARSE_E;
+ else {
+ i++;
+ seqLen++;
+ }
+ }
+ if (ret == 0 && in[i++] != inSz - seqLen)
+ ret = ASN_PARSE_E;
+
+ /* Check INT */
+ if (ret == 0 && GetASNTag(in, &i, &tag, inSz) != 0)
+ ret = ASN_PARSE_E;
+ if (ret == 0 && tag != ASN_INTEGER)
+ ret = ASN_PARSE_E;
+ if (ret == 0 && (len = in[i++]) > sz + 1)
+ ret = ASN_PARSE_E;
+ /* Check there is space for INT data */
+ if (ret == 0 && i + len > inSz)
+ ret = ASN_PARSE_E;
+ if (ret == 0) {
+ /* Skip leading zero */
+ if (in[i] == 0x00) {
+ i++;
+ len--;
+ }
+ /* Copy r into sig. */
+ XMEMCPY(sig + sz - len, in + i, len);
+ i += len;
+ }
+
+ /* Check min data for: INT. */
+ if (ret == 0 && i + 2 > inSz)
+ ret = ASN_PARSE_E;
+ /* Check INT */
+ if (ret == 0 && GetASNTag(in, &i, &tag, inSz) != 0)
+ ret = ASN_PARSE_E;
+ if (ret == 0 && tag != ASN_INTEGER)
+ ret = ASN_PARSE_E;
+ if (ret == 0 && (len = in[i++]) > sz + 1)
+ ret = ASN_PARSE_E;
+ /* Check there is space for INT data */
+ if (ret == 0 && i + len > inSz)
+ ret = ASN_PARSE_E;
+ if (ret == 0) {
+ /* Skip leading zero */
+ if (in[i] == 0x00) {
+ i++;
+ len--;
+ }
+ /* Copy s into sig. */
+ XMEMCPY(sig + sz + sz - len, in + i, len);
+ }
+
+ return ret;
+}
+
+/**
+ * Get the parameters from the private key on the device.
+ *
+ * @param session [in] Session object.
+ * @param privKey [in] PKCS #11 object handle of private key..
+ * @param key [in] Ecc key to set parameters against.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * 0 on success.
+ */
+static int Pkcs11GetEccParams(Pkcs11Session* session, CK_OBJECT_HANDLE privKey,
+ ecc_key* key)
+{
+ int ret = 0;
+ int curveId;
+ CK_RV rv;
+ byte oid[16];
+ CK_ATTRIBUTE template[] = {
+ { CKA_EC_PARAMS, (CK_VOID_PTR)oid, sizeof(oid) }
+ };
+
+ rv = session->func->C_GetAttributeValue(session->handle, privKey, template,
+ 1);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ if (ret == 0) {
+ /* PKCS #11 wraps the OID in ASN.1 */
+ curveId = wc_ecc_get_curve_id_from_oid(oid + 2,
+ (word32)template[0].ulValueLen - 2);
+ if (curveId == ECC_CURVE_INVALID)
+ ret = WC_HW_E;
+ }
+ if (ret == 0)
+ ret = wc_ecc_set_curve(key, 0, curveId);
+
+ return ret;
+}
+
+/**
+ * Performs the ECDSA signing operation.
+ *
+ * @param session [in] Session object.
+ * @param info [in] Cryptographic operation data.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * 0 on success.
+ */
+static int Pkcs11ECDSA_Sign(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+ int ret = 0;
+ int sessionKey = 0;
+ word32 sz;
+ CK_RV rv;
+ CK_ULONG outLen;
+ CK_MECHANISM mech;
+ CK_MECHANISM_INFO mechInfo;
+ CK_OBJECT_HANDLE privateKey = NULL_PTR;
+
+ /* Check operation is supported. */
+ rv = session->func->C_GetMechanismInfo(session->slotId, CKM_ECDSA,
+ &mechInfo);
+ if (rv != CKR_OK || (mechInfo.flags & CKF_SIGN) == 0)
+ ret = NOT_COMPILED_IN;
+
+ if (ret == 0 && info->pk.eccsign.outlen == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+ if (ret == 0) {
+ WOLFSSL_MSG("PKCS#11: EC Signing Operation");
+
+ if ((sessionKey = !mp_iszero(&info->pk.eccsign.key->k)))
+ ret = Pkcs11CreateEccPrivateKey(&privateKey, session,
+ info->pk.eccsign.key, CKA_SIGN);
+ else if (info->pk.eccsign.key->idLen > 0) {
+ ret = Pkcs11FindKeyById(&privateKey, CKO_PRIVATE_KEY, CKK_EC,
+ session, info->pk.eccsign.key->id,
+ info->pk.eccsign.key->idLen);
+ if (ret == 0 && info->pk.eccsign.key->dp == NULL) {
+ ret = Pkcs11GetEccParams(session, privateKey,
+ info->pk.eccsign.key);
+ }
+ }
+ else {
+ ret = Pkcs11FindEccKey(&privateKey, CKO_PRIVATE_KEY, session,
+ info->pk.eccsign.key);
+ }
+ }
+
+ if (ret == 0) {
+ sz = info->pk.eccsign.key->dp->size;
+ /* Maximum encoded size is two ordinates + 8 bytes of ASN.1. */
+ if (*info->pk.eccsign.outlen < (word32)wc_ecc_sig_size_calc(sz))
+ ret = BUFFER_E;
+ }
+
+ if (ret == 0) {
+ mech.mechanism = CKM_ECDSA;
+ mech.ulParameterLen = 0;
+ mech.pParameter = NULL;
+
+ rv = session->func->C_SignInit(session->handle, &mech, privateKey);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+
+ if (ret == 0) {
+ outLen = *info->pk.eccsign.outlen;
+ rv = session->func->C_Sign(session->handle,
+ (CK_BYTE_PTR)info->pk.eccsign.in,
+ info->pk.eccsign.inlen, info->pk.eccsign.out,
+ &outLen);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+
+ if (ret == 0) {
+ *info->pk.eccsign.outlen = Pkcs11ECDSASig_Encode(info->pk.eccsign.out,
+ sz);
+ }
+
+ if (sessionKey)
+ session->func->C_DestroyObject(session->handle, privateKey);
+
+ return ret;
+}
+
+/**
+ * Performs the ECDSA verification operation.
+ *
+ * @param session [in] Session object.
+ * @param info [in] Cryptographic operation data.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * MEMORY_E when a memory allocation fails.
+ * 0 on success.
+ */
+static int Pkcs11ECDSA_Verify(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+ int ret = 0;
+ CK_RV rv;
+ CK_MECHANISM mech;
+ CK_MECHANISM_INFO mechInfo;
+ CK_OBJECT_HANDLE publicKey = NULL_PTR;
+ unsigned char* sig = NULL;
+ word32 sz = info->pk.eccverify.key->dp->size;
+
+ /* Check operation is supported. */
+ rv = session->func->C_GetMechanismInfo(session->slotId, CKM_ECDSA,
+ &mechInfo);
+ if (rv != CKR_OK || (mechInfo.flags & CKF_VERIFY) == 0)
+ ret = NOT_COMPILED_IN;
+
+ if (ret == 0 && info->pk.eccverify.res == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+
+ if (ret == 0) {
+ WOLFSSL_MSG("PKCS#11: EC Verification Operation");
+
+ ret = Pkcs11CreateEccPublicKey(&publicKey, session,
+ info->pk.eccverify.key, CKA_VERIFY);
+ }
+
+ if (ret == 0) {
+ sig = XMALLOC(sz * 2, info->pk.eccverify.key->heap,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (sig == NULL)
+ ret = MEMORY_E;
+ }
+
+ if (ret == 0) {
+ ret = Pkcs11ECDSASig_Decode(info->pk.eccverify.sig,
+ info->pk.eccverify.siglen, sig, sz);
+ }
+ if (ret == 0) {
+ mech.mechanism = CKM_ECDSA;
+ mech.ulParameterLen = 0;
+ mech.pParameter = NULL;
+
+ rv = session->func->C_VerifyInit(session->handle, &mech, publicKey);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+
+ if (ret == 0) {
+ *info->pk.eccverify.res = 0;
+ rv = session->func->C_Verify(session->handle,
+ (CK_BYTE_PTR)info->pk.eccverify.hash,
+ info->pk.eccverify.hashlen,
+ (CK_BYTE_PTR)sig, sz * 2);
+ if (rv == CKR_SIGNATURE_INVALID) {
+ }
+ else if (rv != CKR_OK)
+ ret = WC_HW_E;
+ else
+ *info->pk.eccverify.res = 1;
+ }
+
+ if (publicKey != NULL_PTR)
+ session->func->C_DestroyObject(session->handle, publicKey);
+
+ if (sig != NULL)
+ XFREE(sig, info->pk.eccverify.key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+ return ret;
+}
+#endif
+
+#if !defined(NO_AES) && defined(HAVE_AESGCM)
+/**
+ * Performs the AES-GCM encryption operation.
+ *
+ * @param session [in] Session object.
+ * @param info [in] Cryptographic operation data.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * MEMORY_E when a memory allocation fails.
+ * 0 on success.
+ */
+static int Pkcs11AesGcmEncrypt(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+ int ret = 0;
+ CK_RV rv;
+ Aes* aes = info->cipher.aesgcm_enc.aes;
+ CK_GCM_PARAMS params;
+ CK_MECHANISM_INFO mechInfo;
+ CK_OBJECT_HANDLE key = NULL_PTR;
+ CK_MECHANISM mech;
+ CK_ULONG outLen;
+
+ /* Check operation is supported. */
+ rv = session->func->C_GetMechanismInfo(session->slotId, CKM_AES_GCM,
+ &mechInfo);
+ if (rv != CKR_OK || (mechInfo.flags & CKF_ENCRYPT) == 0)
+ ret = NOT_COMPILED_IN;
+
+ if (ret == 0) {
+ WOLFSSL_MSG("PKCS#11: AES-GCM Encryption Operation");
+ }
+
+ /* Create a private key object or find by id. */
+ if (ret == 0 && aes->idLen == 0) {
+ ret = Pkcs11CreateSecretKey(&key, session, CKK_AES,
+ (unsigned char*)aes->devKey, aes->keylen,
+ NULL, 0);
+
+ }
+ else if (ret == 0) {
+ ret = Pkcs11FindKeyById(&key, CKO_SECRET_KEY, CKK_AES, session, aes->id,
+ aes->idLen);
+ }
+
+ if (ret == 0) {
+ params.pIv = (CK_BYTE_PTR)info->cipher.aesgcm_enc.iv;
+ params.ulIvLen = info->cipher.aesgcm_enc.ivSz;
+ params.pAAD = (CK_BYTE_PTR)info->cipher.aesgcm_enc.authIn;
+ params.ulAADLen = info->cipher.aesgcm_enc.authInSz;
+ params.ulTagBits = info->cipher.aesgcm_enc.authTagSz * 8;
+
+ mech.mechanism = CKM_AES_GCM;
+ mech.ulParameterLen = sizeof(params);
+ mech.pParameter = &params;
+
+ rv = session->func->C_EncryptInit(session->handle, &mech, key);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+ if (ret == 0) {
+ outLen = info->cipher.aesgcm_enc.sz;
+ rv = session->func->C_EncryptUpdate(session->handle,
+ (CK_BYTE_PTR)info->cipher.aesgcm_enc.in,
+ info->cipher.aesgcm_enc.sz,
+ info->cipher.aesgcm_enc.out,
+ &outLen);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+ if (ret == 0) {
+ /* Authentication tag comes out in final block. */
+ outLen = info->cipher.aesgcm_enc.authTagSz;
+ rv = session->func->C_EncryptFinal(session->handle,
+ info->cipher.aesgcm_enc.authTag,
+ &outLen);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+
+ if (aes->idLen == 0 && key != NULL_PTR)
+ session->func->C_DestroyObject(session->handle, key);
+
+ return ret;
+}
+
+/**
+ * Performs the AES-GCM decryption operation.
+ *
+ * @param session [in] Session object.
+ * @param info [in] Cryptographic operation data.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * MEMORY_E when a memory allocation fails.
+ * 0 on success.
+ */
+static int Pkcs11AesGcmDecrypt(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+ int ret = 0;
+ CK_RV rv;
+ Aes* aes = info->cipher.aesgcm_enc.aes;
+ CK_GCM_PARAMS params;
+ CK_MECHANISM_INFO mechInfo;
+ CK_OBJECT_HANDLE key = NULL_PTR;
+ CK_MECHANISM mech;
+ CK_ULONG outLen;
+ word32 len;
+
+ /* Check operation is supported. */
+ rv = session->func->C_GetMechanismInfo(session->slotId, CKM_AES_GCM,
+ &mechInfo);
+ if (rv != CKR_OK || (mechInfo.flags & CKF_DECRYPT) == 0)
+ ret = NOT_COMPILED_IN;
+
+ if (ret == 0) {
+ WOLFSSL_MSG("PKCS#11: AES-GCM Decryption Operation");
+ }
+
+ /* Create a private key object or find by id. */
+ if (ret == 0 && aes->idLen == 0) {
+ ret = Pkcs11CreateSecretKey(&key, session, CKK_AES,
+ (unsigned char*)aes->devKey, aes->keylen,
+ NULL, 0);
+ }
+ else if (ret == 0) {
+ ret = Pkcs11FindKeyById(&key, CKO_SECRET_KEY, CKK_AES, session, aes->id,
+ aes->idLen);
+ }
+
+ if (ret == 0) {
+ params.pIv = (CK_BYTE_PTR)info->cipher.aesgcm_dec.iv;
+ params.ulIvLen = info->cipher.aesgcm_dec.ivSz;
+ params.pAAD = (CK_BYTE_PTR)info->cipher.aesgcm_dec.authIn;
+ params.ulAADLen = info->cipher.aesgcm_dec.authInSz;
+ params.ulTagBits = info->cipher.aesgcm_dec.authTagSz * 8;
+
+ mech.mechanism = CKM_AES_GCM;
+ mech.ulParameterLen = sizeof(params);
+ mech.pParameter = &params;
+
+ rv = session->func->C_DecryptInit(session->handle, &mech, key);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+ if (ret == 0) {
+ outLen = len = info->cipher.aesgcm_dec.sz;
+ rv = session->func->C_DecryptUpdate(session->handle,
+ (CK_BYTE_PTR)info->cipher.aesgcm_dec.in,
+ info->cipher.aesgcm_dec.sz,
+ info->cipher.aesgcm_dec.out,
+ &outLen);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+ if (ret == 0) {
+ /* Put authentication tag in as encrypted data. */
+ outLen = len = (len + info->cipher.aesgcm_dec.authTagSz -
+ (word32)outLen);
+ rv = session->func->C_DecryptUpdate(session->handle,
+ (CK_BYTE_PTR)info->cipher.aesgcm_dec.authTag,
+ info->cipher.aesgcm_dec.authTagSz,
+ info->cipher.aesgcm_dec.out,
+ &outLen);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+ if (ret == 0) {
+ outLen = len = (len - (word32)outLen);
+ /* Decrypted data comes out now. */
+ rv = session->func->C_DecryptFinal(session->handle,
+ info->cipher.aesgcm_dec.out,
+ &outLen);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+
+ if (aes->idLen == 0 && key != NULL_PTR)
+ session->func->C_DestroyObject(session->handle, key);
+
+ return ret;
+}
+#endif
+
+#if !defined(NO_AES) && defined(HAVE_AES_CBC)
+/**
+ * Performs the AES-CBC encryption operation.
+ *
+ * @param session [in] Session object.
+ * @param info [in] Cryptographic operation data.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * MEMORY_E when a memory allocation fails.
+ * 0 on success.
+ */
+static int Pkcs11AesCbcEncrypt(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+ int ret = 0;
+ CK_RV rv;
+ Aes* aes = info->cipher.aescbc.aes;
+ CK_MECHANISM_INFO mechInfo;
+ CK_OBJECT_HANDLE key = NULL_PTR;
+ CK_MECHANISM mech;
+ CK_ULONG outLen;
+
+ /* Check operation is supported. */
+ rv = session->func->C_GetMechanismInfo(session->slotId, CKM_AES_CBC,
+ &mechInfo);
+ if (rv != CKR_OK || (mechInfo.flags & CKF_ENCRYPT) == 0)
+ ret = NOT_COMPILED_IN;
+
+ if (ret == 0) {
+ WOLFSSL_MSG("PKCS#11: AES-CBC Encryption Operation");
+ }
+
+ /* Create a private key object or find by id. */
+ if (ret == 0 && aes->idLen == 0) {
+ ret = Pkcs11CreateSecretKey(&key, session, CKK_AES,
+ (unsigned char*)aes->devKey, aes->keylen,
+ NULL, 0);
+
+ }
+ else if (ret == 0) {
+ ret = Pkcs11FindKeyById(&key, CKO_SECRET_KEY, CKK_AES, session, aes->id,
+ aes->idLen);
+ }
+
+ if (ret == 0) {
+ mech.mechanism = CKM_AES_CBC;
+ mech.ulParameterLen = AES_BLOCK_SIZE;
+ mech.pParameter = (CK_BYTE_PTR)info->cipher.aescbc.aes->reg;
+
+ rv = session->func->C_EncryptInit(session->handle, &mech, key);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+ if (ret == 0) {
+ outLen = info->cipher.aescbc.sz;
+ rv = session->func->C_Encrypt(session->handle,
+ (CK_BYTE_PTR)info->cipher.aescbc.in,
+ info->cipher.aescbc.sz,
+ info->cipher.aescbc.out,
+ &outLen);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+
+ if (aes->idLen == 0 && key != NULL_PTR)
+ session->func->C_DestroyObject(session->handle, key);
+
+ return ret;
+}
+
+/**
+ * Performs the AES-CBC decryption operation.
+ *
+ * @param session [in] Session object.
+ * @param info [in] Cryptographic operation data.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * MEMORY_E when a memory allocation fails.
+ * 0 on success.
+ */
+static int Pkcs11AesCbcDecrypt(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+ int ret = 0;
+ CK_RV rv;
+ Aes* aes = info->cipher.aescbc.aes;
+ CK_MECHANISM_INFO mechInfo;
+ CK_OBJECT_HANDLE key = NULL_PTR;
+ CK_MECHANISM mech;
+ CK_ULONG outLen;
+
+ /* Check operation is supported. */
+ rv = session->func->C_GetMechanismInfo(session->slotId, CKM_AES_CBC,
+ &mechInfo);
+ if (rv != CKR_OK || (mechInfo.flags & CKF_DECRYPT) == 0)
+ ret = NOT_COMPILED_IN;
+
+ if (ret == 0) {
+ WOLFSSL_MSG("PKCS#11: AES-CBC Decryption Operation");
+ }
+
+ /* Create a private key object or find by id. */
+ if (ret == 0 && aes->idLen == 0) {
+ ret = Pkcs11CreateSecretKey(&key, session, CKK_AES,
+ (unsigned char*)aes->devKey, aes->keylen,
+ NULL, 0);
+ }
+ else if (ret == 0) {
+ ret = Pkcs11FindKeyById(&key, CKO_SECRET_KEY, CKK_AES, session, aes->id,
+ aes->idLen);
+ }
+
+ if (ret == 0) {
+ mech.mechanism = CKM_AES_CBC;
+ mech.ulParameterLen = AES_BLOCK_SIZE;
+ mech.pParameter = (CK_BYTE_PTR)info->cipher.aescbc.aes->reg;
+
+ rv = session->func->C_DecryptInit(session->handle, &mech, key);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+ if (ret == 0) {
+ outLen = info->cipher.aescbc.sz;
+ rv = session->func->C_DecryptUpdate(session->handle,
+ (CK_BYTE_PTR)info->cipher.aescbc.in,
+ info->cipher.aescbc.sz,
+ info->cipher.aescbc.out,
+ &outLen);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+
+ if (aes->idLen == 0 && key != NULL_PTR)
+ session->func->C_DestroyObject(session->handle, key);
+
+ return ret;
+}
+#endif
+
+#ifndef NO_HMAC
+/**
+ * Updates or calculates the HMAC of the data.
+ *
+ * @param session [in] Session object.
+ * @param info [in] Cryptographic operation data.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * 0 on success.
+ */
+static int Pkcs11Hmac(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+ int ret = 0;
+ CK_RV rv;
+ Hmac* hmac = info->hmac.hmac;
+ CK_MECHANISM_INFO mechInfo;
+ CK_OBJECT_HANDLE key = NULL_PTR;
+ CK_MECHANISM mech;
+ CK_ULONG outLen;
+ int mechType;
+ int keyType;
+
+ if (hmac->innerHashKeyed == WC_HMAC_INNER_HASH_KEYED_SW)
+ ret = NOT_COMPILED_IN;
+
+ if (ret == 0)
+ ret = Pkcs11HmacTypes(info->hmac.macType, &mechType, &keyType);
+ if (ret == 0) {
+ /* Check operation is supported. */
+ rv = session->func->C_GetMechanismInfo(session->slotId, mechType,
+ &mechInfo);
+ if (rv != CKR_OK || (mechInfo.flags & CKF_SIGN) == 0)
+ ret = NOT_COMPILED_IN;
+ }
+
+ /* Check whether key been used to initialized. */
+ if (ret == 0 && !hmac->innerHashKeyed) {
+ WOLFSSL_MSG("PKCS#11: HMAC Init");
+
+ /* Check device supports key length. */
+ if (mechInfo.ulMaxKeySize > 0 &&
+ (hmac->keyLen < mechInfo.ulMinKeySize ||
+ hmac->keyLen > mechInfo.ulMaxKeySize)) {
+ WOLFSSL_MSG("PKCS#11: Key Length not supported");
+ ret = NOT_COMPILED_IN;
+ }
+
+ /* Create a private key object or find by id. */
+ if (ret == 0 && hmac->idLen == 0) {
+ ret = Pkcs11CreateSecretKey(&key, session, keyType,
+ (unsigned char*)hmac->keyRaw, hmac->keyLen,
+ NULL, 0);
+ if (ret == WC_HW_E) {
+ ret = Pkcs11CreateSecretKey(&key, session, CKK_GENERIC_SECRET,
+ (unsigned char*)hmac->keyRaw, hmac->keyLen,
+ NULL, 0);
+ }
+
+ }
+ else if (ret == 0) {
+ ret = Pkcs11FindKeyById(&key, CKO_SECRET_KEY, keyType, session,
+ hmac->id, hmac->idLen);
+ if (ret == WC_HW_E) {
+ ret = Pkcs11FindKeyById(&key, CKO_SECRET_KEY,
+ CKK_GENERIC_SECRET, session, hmac->id,
+ hmac->idLen);
+ }
+ }
+
+ /* Initialize HMAC operation */
+ if (ret == 0) {
+ mech.mechanism = mechType;
+ mech.ulParameterLen = 0;
+ mech.pParameter = NULL;
+
+ rv = session->func->C_SignInit(session->handle, &mech, key);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+
+ /* Don't imitialize HMAC again if this succeeded */
+ if (ret == 0)
+ hmac->innerHashKeyed = WC_HMAC_INNER_HASH_KEYED_DEV;
+ }
+ /* Update the HMAC if input data passed in. */
+ if (ret == 0 && info->hmac.inSz > 0) {
+ WOLFSSL_MSG("PKCS#11: HMAC Update");
+
+ rv = session->func->C_SignUpdate(session->handle,
+ (CK_BYTE_PTR)info->hmac.in,
+ info->hmac.inSz);
+ /* Some algorithm implementations only support C_Sign. */
+ if (rv == CKR_MECHANISM_INVALID) {
+ WOLFSSL_MSG("PKCS#11: HMAC Update/Final not supported");
+ ret = NOT_COMPILED_IN;
+ /* Allow software implementation to set key. */
+ hmac->innerHashKeyed = 0;
+ }
+ else if (rv != CKR_OK)
+ ret = WC_HW_E;
+ }
+ /* Calculate the HMAC result if output buffer specified. */
+ if (ret == 0 && info->hmac.digest != NULL) {
+ WOLFSSL_MSG("PKCS#11: HMAC Final");
+
+ outLen = WC_MAX_DIGEST_SIZE;
+ rv = session->func->C_SignFinal(session->handle,
+ (CK_BYTE_PTR)info->hmac.digest,
+ &outLen);
+ /* Some algorithm implementations only support C_Sign. */
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ else
+ hmac->innerHashKeyed = 0;
+ }
+
+ if (hmac->idLen == 0 && key != NULL_PTR)
+ session->func->C_DestroyObject(session->handle, key);
+
+ return ret;
+}
+#endif
+
+#ifndef WC_NO_RNG
+#ifndef HAVE_HASHDRBG
+/**
+ * Performs random number generation.
+ *
+ * @param session [in] Session object.
+ * @param info [in] Cryptographic operation data.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * 0 on success.
+ */
+static int Pkcs11RandomBlock(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+ int ret = 0;
+ CK_RV rv;
+
+ rv = session->func->C_GenerateRandom(session->handle, info->rng.out,
+ info->rng.sz);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ return ret;
+}
+#endif
+
+/**
+ * Generates entropy (seed) data.
+ *
+ * @param session [in] Session object.
+ * @param info [in] Cryptographic operation data.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * 0 on success.
+ */
+static int Pkcs11RandomSeed(Pkcs11Session* session, wc_CryptoInfo* info)
+{
+ int ret = 0;
+ CK_RV rv;
+
+ rv = session->func->C_GenerateRandom(session->handle, info->seed.seed,
+ info->seed.sz);
+ if (rv != CKR_OK)
+ ret = WC_HW_E;
+ return ret;
+}
+#endif
+
+/**
+ * Perform a cryptographic operation using PKCS#11 device.
+ *
+ * @param devId [in] Device identifier.
+ * @param info [in] Cryptographic operation data.
+ * @param ctx [in] Context data for device - the token object.
+ * @return WC_HW_E when a PKCS#11 library call fails.
+ * 0 on success.
+ */
+int wc_Pkcs11_CryptoDevCb(int devId, wc_CryptoInfo* info, void* ctx)
+{
+ int ret = 0;
+ Pkcs11Token* token = (Pkcs11Token*)ctx;
+ Pkcs11Session session;
+ int readWrite = 0;
+
+ if (devId <= INVALID_DEVID || info == NULL || ctx == NULL)
+ ret = BAD_FUNC_ARG;
+
+ if (ret == 0) {
+ ret = Pkcs11OpenSession(token, &session, readWrite);
+ if (ret == 0) {
+ if (info->algo_type == WC_ALGO_TYPE_PK) {
+#if !defined(NO_RSA) || defined(HAVE_ECC)
+ switch (info->pk.type) {
+ #ifndef NO_RSA
+ case WC_PK_TYPE_RSA:
+ ret = Pkcs11Rsa(&session, info);
+ break;
+ #ifdef WOLFSSL_KEY_GEN
+ case WC_PK_TYPE_RSA_KEYGEN:
+ ret = Pkcs11RsaKeyGen(&session, info);
+ break;
+ #endif
+ #endif
+ #ifdef HAVE_ECC
+ #ifndef NO_PKCS11_EC_KEYGEN
+ case WC_PK_TYPE_EC_KEYGEN:
+ ret = Pkcs11EcKeyGen(&session, info);
+ break;
+ #endif
+ #ifndef NO_PKCS11_ECDH
+ case WC_PK_TYPE_ECDH:
+ ret = Pkcs11ECDH(&session, info);
+ break;
+ #endif
+ case WC_PK_TYPE_ECDSA_SIGN:
+ ret = Pkcs11ECDSA_Sign(&session, info);
+ break;
+ case WC_PK_TYPE_ECDSA_VERIFY:
+ ret = Pkcs11ECDSA_Verify(&session, info);
+ break;
+ #endif
+ default:
+ ret = NOT_COMPILED_IN;
+ break;
+ }
+#else
+ ret = NOT_COMPILED_IN;
+#endif /* !NO_RSA || HAVE_ECC */
+ }
+ else if (info->algo_type == WC_ALGO_TYPE_CIPHER) {
+ #ifndef NO_AES
+ switch (info->cipher.type) {
+ #ifdef HAVE_AESGCM
+ case WC_CIPHER_AES_GCM:
+ if (info->cipher.enc)
+ ret = Pkcs11AesGcmEncrypt(&session, info);
+ else
+ ret = Pkcs11AesGcmDecrypt(&session, info);
+ break;
+ #endif
+ #ifdef HAVE_AES_CBC
+ case WC_CIPHER_AES_CBC:
+ if (info->cipher.enc)
+ ret = Pkcs11AesCbcEncrypt(&session, info);
+ else
+ ret = Pkcs11AesCbcDecrypt(&session, info);
+ break;
+ #endif
+ }
+ #else
+ ret = NOT_COMPILED_IN;
+ #endif
+ }
+ else if (info->algo_type == WC_ALGO_TYPE_HMAC) {
+ #ifndef NO_HMAC
+ ret = Pkcs11Hmac(&session, info);
+ #else
+ ret = NOT_COMPILED_IN;
+ #endif
+ }
+ else if (info->algo_type == WC_ALGO_TYPE_RNG) {
+ #if !defined(WC_NO_RNG) && !defined(HAVE_HASHDRBG)
+ ret = Pkcs11RandomBlock(&session, info);
+ #else
+ ret = NOT_COMPILED_IN;
+ #endif
+ }
+ else if (info->algo_type == WC_ALGO_TYPE_SEED) {
+ #ifndef WC_NO_RNG
+ ret = Pkcs11RandomSeed(&session, info);
+ #else
+ ret = NOT_COMPILED_IN;
+ #endif
+ }
+ else
+ ret = NOT_COMPILED_IN;
+
+ Pkcs11CloseSession(token, &session);
+ }
+ }
+
+ return ret;
+}
+
+#endif /* HAVE_PKCS11 */
+
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wc_port.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wc_port.c
index 419033751..087807b71 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wc_port.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wc_port.c
@@ -1,8 +1,8 @@
/* port.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,9 +16,10 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -26,631 +27,2250 @@
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/types.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/logging.h>
+#include <wolfssl/wolfcrypt/wc_port.h>
+#ifdef HAVE_ECC
+ #include <wolfssl/wolfcrypt/ecc.h>
+#endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+ #include <wolfssl/wolfcrypt/async.h>
+#endif
+
+/* IPP header files for library initialization */
+#ifdef HAVE_FAST_RSA
+ #include <ipp.h>
+ #include <ippcp.h>
+#endif
+
+#ifdef FREESCALE_LTC_TFM
+ #include <wolfssl/wolfcrypt/port/nxp/ksdk_port.h>
+#endif
+
+#if defined(WOLFSSL_ATMEL) || defined(WOLFSSL_ATECC508A)
+ #include <wolfssl/wolfcrypt/port/atmel/atmel.h>
+#endif
+#if defined(WOLFSSL_RENESAS_TSIP)
+ #include <wolfssl/wolfcrypt/port/Renesas/renesas-tsip-crypt.h>
+#endif
+#if defined(WOLFSSL_STSAFEA100)
+ #include <wolfssl/wolfcrypt/port/st/stsafe.h>
+#endif
+
+#if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
+ #include <wolfssl/openssl/evp.h>
+#endif
+
+#if defined(USE_WOLFSSL_MEMORY) && defined(WOLFSSL_TRACK_MEMORY)
+ #include <wolfssl/wolfcrypt/memory.h>
+ #include <wolfssl/wolfcrypt/mem_track.h>
+#endif
+
+#if defined(WOLFSSL_IMX6_CAAM) || defined(WOLFSSL_IMX6_CAAM_RNG) || \
+ defined(WOLFSSL_IMX6_CAAM_BLOB)
+ #include <wolfssl/wolfcrypt/port/caam/wolfcaam.h>
+#endif
+
+#ifdef WOLF_CRYPTO_CB
+ #include <wolfssl/wolfcrypt/cryptocb.h>
+#endif
+
+#ifdef HAVE_INTEL_QA_SYNC
+ #include <wolfssl/wolfcrypt/port/intel/quickassist_sync.h>
+#endif
+#ifdef HAVE_CAVIUM_OCTEON_SYNC
+ #include <wolfssl/wolfcrypt/port/cavium/cavium_octeon_sync.h>
+#endif
+
+#ifdef WOLFSSL_SCE
+ #include "hal_data.h"
+#endif
+
+#if defined(WOLFSSL_DSP) && !defined(WOLFSSL_DSP_BUILD)
+ #include "rpcmem.h"
+#endif
#ifdef _MSC_VER
/* 4996 warning to use MS extensions e.g., strcpy_s instead of strncpy */
#pragma warning(disable: 4996)
#endif
+/* prevent multiple mutex initializations */
+static volatile int initRefCount = 0;
+
+/* Used to initialize state for wolfcrypt
+ return 0 on success
+ */
+int wolfCrypt_Init(void)
+{
+ int ret = 0;
+
+ if (initRefCount == 0) {
+ WOLFSSL_ENTER("wolfCrypt_Init");
+ #ifdef WOLFSSL_FORCE_MALLOC_FAIL_TEST
+ {
+ word32 rngMallocFail;
+ time_t seed = time(NULL);
+ srand((word32)seed);
+ rngMallocFail = rand() % 2000; /* max 2000 */
+ printf("\n--- RNG MALLOC FAIL AT %d---\n", rngMallocFail);
+ wolfSSL_SetMemFailCount(rngMallocFail);
+ }
+ #endif
-#ifdef SINGLE_THREADED
+ #ifdef WOLF_CRYPTO_CB
+ wc_CryptoCb_Init();
+ #endif
-int InitMutex(wolfSSL_Mutex* m)
-{
- (void)m;
- return 0;
-}
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ ret = wolfAsync_HardwareStart();
+ if (ret != 0) {
+ WOLFSSL_MSG("Async hardware start failed");
+ /* don't return failure, allow operation to continue */
+ }
+ #endif
+
+ #if defined(WOLFSSL_RENESAS_TSIP_CRYPT)
+ ret = tsip_Open( );
+ if( ret != TSIP_SUCCESS ) {
+ WOLFSSL_MSG("RENESAS TSIP Open failed");
+ /* not return 1 since WOLFSSL_SUCCESS=1*/
+ ret = -1;/* FATAL ERROR */
+ return ret;
+ }
+ #endif
+ #if defined(WOLFSSL_TRACK_MEMORY) && !defined(WOLFSSL_STATIC_MEMORY)
+ ret = InitMemoryTracker();
+ if (ret != 0) {
+ WOLFSSL_MSG("InitMemoryTracker failed");
+ return ret;
+ }
+ #endif
+
+ #if WOLFSSL_CRYPT_HW_MUTEX
+ /* If crypto hardware mutex protection is enabled, then initialize it */
+ ret = wolfSSL_CryptHwMutexInit();
+ if (ret != 0) {
+ WOLFSSL_MSG("Hw crypt mutex init failed");
+ return ret;
+ }
+ #endif
+
+ /* if defined have fast RSA then initialize Intel IPP */
+ #ifdef HAVE_FAST_RSA
+ WOLFSSL_MSG("Attempting to use optimized IPP Library");
+ if ((ret = ippInit()) != ippStsNoErr) {
+ /* possible to get a CPU feature support status on optimized IPP
+ library but still use default library and see competitive speeds */
+ WOLFSSL_MSG("Warning when trying to set up optimization");
+ WOLFSSL_MSG(ippGetStatusString(ret));
+ WOLFSSL_MSG("Using default fast IPP library");
+ ret = 0;
+ (void)ret; /* suppress not read warning */
+ }
+ #endif
-int FreeMutex(wolfSSL_Mutex *m)
-{
- (void)m;
- return 0;
+ #if defined(FREESCALE_LTC_TFM) || defined(FREESCALE_LTC_ECC)
+ ret = ksdk_port_init();
+ if (ret != 0) {
+ WOLFSSL_MSG("KSDK port init failed");
+ return ret;
+ }
+ #endif
+
+ #if defined(WOLFSSL_ATMEL) || defined(WOLFSSL_ATECC508A)
+ ret = atmel_init();
+ if (ret != 0) {
+ WOLFSSL_MSG("CryptoAuthLib init failed");
+ return ret;
+ }
+ #endif
+ #if defined(WOLFSSL_CRYPTOCELL)
+ /* enable and initialize the ARM CryptoCell 3xx runtime library */
+ ret = cc310_Init();
+ if (ret != 0) {
+ WOLFSSL_MSG("CRYPTOCELL init failed");
+ return ret;
+ }
+ #endif
+ #if defined(WOLFSSL_STSAFEA100)
+ stsafe_interface_init();
+ #endif
+
+ #ifdef WOLFSSL_ARMASM
+ WOLFSSL_MSG("Using ARM hardware acceleration");
+ #endif
+
+ #ifdef WOLFSSL_AFALG
+ WOLFSSL_MSG("Using AF_ALG for crypto acceleration");
+ #endif
+
+ #if !defined(WOLFCRYPT_ONLY) && defined(OPENSSL_EXTRA)
+ wolfSSL_EVP_init();
+ #endif
+
+ #if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
+ if ((ret = wc_LoggingInit()) != 0) {
+ WOLFSSL_MSG("Error creating logging mutex");
+ return ret;
+ }
+ #endif
+
+#ifdef HAVE_ECC
+ #ifdef ECC_CACHE_CURVE
+ if ((ret = wc_ecc_curve_cache_init()) != 0) {
+ WOLFSSL_MSG("Error creating curve cache");
+ return ret;
+ }
+ #endif
+#endif
+
+#ifdef WOLFSSL_SCE
+ ret = (int)WOLFSSL_SCE_GSCE_HANDLE.p_api->open(
+ WOLFSSL_SCE_GSCE_HANDLE.p_ctrl, WOLFSSL_SCE_GSCE_HANDLE.p_cfg);
+ if (ret == SSP_ERR_CRYPTO_SCE_ALREADY_OPEN) {
+ WOLFSSL_MSG("SCE already open");
+ ret = 0;
+ }
+ if (ret != SSP_SUCCESS) {
+ WOLFSSL_MSG("Error opening SCE");
+ return -1; /* FATAL_ERROR */
+ }
+#endif
+
+#if defined(WOLFSSL_IMX6_CAAM) || defined(WOLFSSL_IMX6_CAAM_RNG) || \
+ defined(WOLFSSL_IMX6_CAAM_BLOB)
+ if ((ret = wc_caamInit()) != 0) {
+ return ret;
+ }
+#endif
+
+#if defined(WOLFSSL_DSP) && !defined(WOLFSSL_DSP_BUILD)
+ if ((ret = wolfSSL_InitHandle()) != 0) {
+ return ret;
+ }
+ rpcmem_init();
+#endif
+ }
+ initRefCount++;
+
+ return ret;
}
-int LockMutex(wolfSSL_Mutex *m)
+/* return success value is the same as wolfCrypt_Init */
+int wolfCrypt_Cleanup(void)
{
- (void)m;
- return 0;
+ int ret = 0;
+
+ initRefCount--;
+ if (initRefCount < 0)
+ initRefCount = 0;
+
+ if (initRefCount == 0) {
+ WOLFSSL_ENTER("wolfCrypt_Cleanup");
+
+#ifdef HAVE_ECC
+ #ifdef FP_ECC
+ wc_ecc_fp_free();
+ #endif
+ #ifdef ECC_CACHE_CURVE
+ wc_ecc_curve_cache_free();
+ #endif
+#endif /* HAVE_ECC */
+
+ #if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
+ ret = wc_LoggingCleanup();
+ #endif
+
+ #if defined(WOLFSSL_TRACK_MEMORY) && !defined(WOLFSSL_STATIC_MEMORY)
+ ShowMemoryTracker();
+ #endif
+
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ wolfAsync_HardwareStop();
+ #endif
+ #ifdef WOLFSSL_SCE
+ WOLFSSL_SCE_GSCE_HANDLE.p_api->close(WOLFSSL_SCE_GSCE_HANDLE.p_ctrl);
+ #endif
+ #if defined(WOLFSSL_IMX6_CAAM) || defined(WOLFSSL_IMX6_CAAM_RNG) || \
+ defined(WOLFSSL_IMX6_CAAM_BLOB)
+ wc_caamFree();
+ #endif
+ #if defined(WOLFSSL_CRYPTOCELL)
+ cc310_Free();
+ #endif
+ #if defined(WOLFSSL_RENESAS_TSIP_CRYPT)
+ tsip_Close();
+ #endif
+ #if defined(WOLFSSL_DSP) && !defined(WOLFSSL_DSP_BUILD)
+ rpcmem_deinit();
+ wolfSSL_CleanupHandle();
+ #endif
+ }
+
+ return ret;
}
+#if !defined(NO_FILESYSTEM) && !defined(NO_WOLFSSL_DIR) && \
+ !defined(WOLFSSL_NUCLEUS) && !defined(WOLFSSL_NUCLEUS_1_2)
-int UnLockMutex(wolfSSL_Mutex *m)
+/* File Handling Helpers */
+/* returns 0 if file found, WC_READDIR_NOFILE if no files or negative error */
+int wc_ReadDirFirst(ReadDirCtx* ctx, const char* path, char** name)
{
- (void)m;
- return 0;
-}
+ int ret = WC_READDIR_NOFILE; /* default to no files found */
+ int pathLen = 0;
+ int dnameLen = 0;
-#else /* MULTI_THREAD */
+ if (name)
+ *name = NULL;
- #if defined(FREERTOS)
+ if (ctx == NULL || path == NULL) {
+ return BAD_FUNC_ARG;
+ }
- int InitMutex(wolfSSL_Mutex* m)
- {
- int iReturn;
+ XMEMSET(ctx->name, 0, MAX_FILENAME_SZ);
+ pathLen = (int)XSTRLEN(path);
- *m = ( wolfSSL_Mutex ) xSemaphoreCreateMutex();
- if( *m != NULL )
- iReturn = 0;
- else
- iReturn = BAD_MUTEX_E;
+#ifdef USE_WINDOWS_API
+ if (pathLen > MAX_FILENAME_SZ - 3)
+ return BAD_PATH_ERROR;
- return iReturn;
- }
+ XSTRNCPY(ctx->name, path, MAX_FILENAME_SZ - 3);
+ XSTRNCPY(ctx->name + pathLen, "\\*", MAX_FILENAME_SZ - pathLen);
- int FreeMutex(wolfSSL_Mutex* m)
- {
- vSemaphoreDelete( *m );
+ ctx->hFind = FindFirstFileA(ctx->name, &ctx->FindFileData);
+ if (ctx->hFind == INVALID_HANDLE_VALUE) {
+ WOLFSSL_MSG("FindFirstFile for path verify locations failed");
+ return BAD_PATH_ERROR;
+ }
+
+ do {
+ if (!(ctx->FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
+ dnameLen = (int)XSTRLEN(ctx->FindFileData.cFileName);
+
+ if (pathLen + dnameLen + 2 > MAX_FILENAME_SZ) {
+ return BAD_PATH_ERROR;
+ }
+ XSTRNCPY(ctx->name, path, pathLen + 1);
+ ctx->name[pathLen] = '\\';
+ XSTRNCPY(ctx->name + pathLen + 1,
+ ctx->FindFileData.cFileName,
+ MAX_FILENAME_SZ - pathLen - 1);
+ if (name)
+ *name = ctx->name;
return 0;
}
-
- int LockMutex(wolfSSL_Mutex* m)
- {
- /* Assume an infinite block, or should there be zero block? */
- xSemaphoreTake( *m, portMAX_DELAY );
+ } while (FindNextFileA(ctx->hFind, &ctx->FindFileData));
+#elif defined(WOLFSSL_ZEPHYR)
+ if (fs_opendir(&ctx->dir, path) != 0) {
+ WOLFSSL_MSG("opendir path verify locations failed");
+ return BAD_PATH_ERROR;
+ }
+ ctx->dirp = &ctx->dir;
+
+ while ((fs_readdir(&ctx->dir, &ctx->entry)) != 0) {
+ dnameLen = (int)XSTRLEN(ctx->entry.name);
+
+ if (pathLen + dnameLen + 2 >= MAX_FILENAME_SZ) {
+ ret = BAD_PATH_ERROR;
+ break;
+ }
+ XSTRNCPY(ctx->name, path, pathLen + 1);
+ ctx->name[pathLen] = '/';
+
+ /* Use dnameLen + 1 for GCC 8 warnings of truncating d_name. Because
+ * of earlier check it is known that dnameLen is less than
+ * MAX_FILENAME_SZ - (pathLen + 2) so dnameLen +1 will fit */
+ XSTRNCPY(ctx->name + pathLen + 1, ctx->entry.name, dnameLen + 1);
+ if (fs_stat(ctx->name, &ctx->s) != 0) {
+ WOLFSSL_MSG("stat on name failed");
+ ret = BAD_PATH_ERROR;
+ break;
+ } else if (ctx->s.type == FS_DIR_ENTRY_FILE) {
+ if (name)
+ *name = ctx->name;
return 0;
}
-
- int UnLockMutex(wolfSSL_Mutex* m)
- {
- xSemaphoreGive( *m );
+ }
+#elif defined(WOLFSSL_TELIT_M2MB)
+ ctx->dir = m2mb_fs_opendir((const CHAR*)path);
+ if (ctx->dir == NULL) {
+ WOLFSSL_MSG("opendir path verify locations failed");
+ return BAD_PATH_ERROR;
+ }
+
+ while ((ctx->entry = m2mb_fs_readdir(ctx->dir)) != NULL) {
+ dnameLen = (int)XSTRLEN(ctx->entry->d_name);
+
+ if (pathLen + dnameLen + 2 >= MAX_FILENAME_SZ) {
+ ret = BAD_PATH_ERROR;
+ break;
+ }
+ XSTRNCPY(ctx->name, path, pathLen + 1);
+ ctx->name[pathLen] = '/';
+
+ /* Use dnameLen + 1 for GCC 8 warnings of truncating d_name. Because
+ * of earlier check it is known that dnameLen is less than
+ * MAX_FILENAME_SZ - (pathLen + 2) so dnameLen +1 will fit */
+ XSTRNCPY(ctx->name + pathLen + 1, ctx->entry->d_name, dnameLen + 1);
+
+ if (m2mb_fs_stat(ctx->name, &ctx->s) != 0) {
+ WOLFSSL_MSG("stat on name failed");
+ ret = BAD_PATH_ERROR;
+ break;
+ }
+ else if (ctx->s.st_mode & M2MB_S_IFREG) {
+ if (name)
+ *name = ctx->name;
+ return 0;
+ }
+ }
+#else
+ ctx->dir = opendir(path);
+ if (ctx->dir == NULL) {
+ WOLFSSL_MSG("opendir path verify locations failed");
+ return BAD_PATH_ERROR;
+ }
+
+ while ((ctx->entry = readdir(ctx->dir)) != NULL) {
+ dnameLen = (int)XSTRLEN(ctx->entry->d_name);
+
+ if (pathLen + dnameLen + 2 >= MAX_FILENAME_SZ) {
+ ret = BAD_PATH_ERROR;
+ break;
+ }
+ XSTRNCPY(ctx->name, path, pathLen + 1);
+ ctx->name[pathLen] = '/';
+
+ /* Use dnameLen + 1 for GCC 8 warnings of truncating d_name. Because
+ * of earlier check it is known that dnameLen is less than
+ * MAX_FILENAME_SZ - (pathLen + 2) so dnameLen +1 will fit */
+ XSTRNCPY(ctx->name + pathLen + 1, ctx->entry->d_name, dnameLen + 1);
+ if (stat(ctx->name, &ctx->s) != 0) {
+ WOLFSSL_MSG("stat on name failed");
+ ret = BAD_PATH_ERROR;
+ break;
+ } else if (S_ISREG(ctx->s.st_mode)) {
+ if (name)
+ *name = ctx->name;
return 0;
}
+ }
+#endif
+ wc_ReadDirClose(ctx);
- #elif defined(WOLFSSL_SAFERTOS)
+ return ret;
+}
- int InitMutex(wolfSSL_Mutex* m)
- {
- vSemaphoreCreateBinary(m->mutexBuffer, m->mutex);
- if (m->mutex == NULL)
- return BAD_MUTEX_E;
+/* returns 0 if file found, WC_READDIR_NOFILE if no more files */
+int wc_ReadDirNext(ReadDirCtx* ctx, const char* path, char** name)
+{
+ int ret = WC_READDIR_NOFILE; /* default to no file found */
+ int pathLen = 0;
+ int dnameLen = 0;
+
+ if (name)
+ *name = NULL;
+ if (ctx == NULL || path == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ XMEMSET(ctx->name, 0, MAX_FILENAME_SZ);
+ pathLen = (int)XSTRLEN(path);
+
+#ifdef USE_WINDOWS_API
+ while (FindNextFileA(ctx->hFind, &ctx->FindFileData)) {
+ if (!(ctx->FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
+ dnameLen = (int)XSTRLEN(ctx->FindFileData.cFileName);
+
+ if (pathLen + dnameLen + 2 > MAX_FILENAME_SZ) {
+ return BAD_PATH_ERROR;
+ }
+ XSTRNCPY(ctx->name, path, pathLen + 1);
+ ctx->name[pathLen] = '\\';
+ XSTRNCPY(ctx->name + pathLen + 1,
+ ctx->FindFileData.cFileName,
+ MAX_FILENAME_SZ - pathLen - 1);
+ if (name)
+ *name = ctx->name;
return 0;
}
-
- int FreeMutex(wolfSSL_Mutex* m)
- {
- (void)m;
+ }
+#elif defined(WOLFSSL_ZEPHYR)
+ while ((fs_readdir(&ctx->dir, &ctx->entry)) != 0) {
+ dnameLen = (int)XSTRLEN(ctx->entry.name);
+
+ if (pathLen + dnameLen + 2 >= MAX_FILENAME_SZ) {
+ ret = BAD_PATH_ERROR;
+ break;
+ }
+ XSTRNCPY(ctx->name, path, pathLen + 1);
+ ctx->name[pathLen] = '/';
+ /* Use dnameLen + 1 for GCC 8 warnings of truncating d_name. Because
+ * of earlier check it is known that dnameLen is less than
+ * MAX_FILENAME_SZ - (pathLen + 2) so that dnameLen +1 will fit */
+ XSTRNCPY(ctx->name + pathLen + 1, ctx->entry.name, dnameLen + 1);
+
+ if (fs_stat(ctx->name, &ctx->s) != 0) {
+ WOLFSSL_MSG("stat on name failed");
+ ret = BAD_PATH_ERROR;
+ break;
+ } else if (ctx->s.type == FS_DIR_ENTRY_FILE) {
+ if (name)
+ *name = ctx->name;
return 0;
}
-
- int LockMutex(wolfSSL_Mutex* m)
- {
- /* Assume an infinite block */
- xSemaphoreTake(m->mutex, portMAX_DELAY);
+ }
+#elif defined(WOLFSSL_TELIT_M2MB)
+ while ((ctx->entry = m2mb_fs_readdir(ctx->dir)) != NULL) {
+ dnameLen = (int)XSTRLEN(ctx->entry->d_name);
+
+ if (pathLen + dnameLen + 2 >= MAX_FILENAME_SZ) {
+ ret = BAD_PATH_ERROR;
+ break;
+ }
+ XSTRNCPY(ctx->name, path, pathLen + 1);
+ ctx->name[pathLen] = '/';
+
+ /* Use dnameLen + 1 for GCC 8 warnings of truncating d_name. Because
+ * of earlier check it is known that dnameLen is less than
+ * MAX_FILENAME_SZ - (pathLen + 2) so dnameLen +1 will fit */
+ XSTRNCPY(ctx->name + pathLen + 1, ctx->entry->d_name, dnameLen + 1);
+
+ if (m2mb_fs_stat(ctx->name, &ctx->s) != 0) {
+ WOLFSSL_MSG("stat on name failed");
+ ret = BAD_PATH_ERROR;
+ break;
+ }
+ else if (ctx->s.st_mode & M2MB_S_IFREG) {
+ if (name)
+ *name = ctx->name;
return 0;
}
-
- int UnLockMutex(wolfSSL_Mutex* m)
- {
- xSemaphoreGive(m->mutex);
+ }
+#else
+ while ((ctx->entry = readdir(ctx->dir)) != NULL) {
+ dnameLen = (int)XSTRLEN(ctx->entry->d_name);
+
+ if (pathLen + dnameLen + 2 >= MAX_FILENAME_SZ) {
+ ret = BAD_PATH_ERROR;
+ break;
+ }
+ XSTRNCPY(ctx->name, path, pathLen + 1);
+ ctx->name[pathLen] = '/';
+ /* Use dnameLen + 1 for GCC 8 warnings of truncating d_name. Because
+ * of earlier check it is known that dnameLen is less than
+ * MAX_FILENAME_SZ - (pathLen + 2) so that dnameLen +1 will fit */
+ XSTRNCPY(ctx->name + pathLen + 1, ctx->entry->d_name, dnameLen + 1);
+
+ if (stat(ctx->name, &ctx->s) != 0) {
+ WOLFSSL_MSG("stat on name failed");
+ ret = BAD_PATH_ERROR;
+ break;
+ } else if (S_ISREG(ctx->s.st_mode)) {
+ if (name)
+ *name = ctx->name;
return 0;
}
+ }
+#endif
+ wc_ReadDirClose(ctx);
- #elif defined(USE_WINDOWS_API)
+ return ret;
+}
- int InitMutex(wolfSSL_Mutex* m)
- {
- InitializeCriticalSection(m);
- return 0;
- }
+void wc_ReadDirClose(ReadDirCtx* ctx)
+{
+ if (ctx == NULL) {
+ return;
+ }
+
+#ifdef USE_WINDOWS_API
+ if (ctx->hFind != INVALID_HANDLE_VALUE) {
+ FindClose(ctx->hFind);
+ ctx->hFind = INVALID_HANDLE_VALUE;
+ }
+#elif defined(WOLFSSL_ZEPHYR)
+ if (ctx->dirp) {
+ fs_closedir(ctx->dirp);
+ ctx->dirp = NULL;
+ }
+#elif defined(WOLFSSL_TELIT_M2MB)
+ if (ctx->dir) {
+ m2mb_fs_closedir(ctx->dir);
+ ctx->dir = NULL;
+ }
+#else
+ if (ctx->dir) {
+ closedir(ctx->dir);
+ ctx->dir = NULL;
+ }
+#endif
+}
+#endif /* !NO_FILESYSTEM && !NO_WOLFSSL_DIR */
- int FreeMutex(wolfSSL_Mutex* m)
- {
- DeleteCriticalSection(m);
- return 0;
+#if !defined(NO_FILESYSTEM) && defined(WOLFSSL_ZEPHYR)
+XFILE z_fs_open(const char* filename, const char* perm)
+{
+ XFILE file;
+
+ file = XMALLOC(sizeof(*file), NULL, DYNAMIC_TYPE_FILE);
+ if (file != NULL) {
+ if (fs_open(file, filename) != 0) {
+ XFREE(file, NULL, DYNAMIC_TYPE_FILE);
+ file = NULL;
}
+ }
+
+ return file;
+}
+int z_fs_close(XFILE file)
+{
+ int ret;
- int LockMutex(wolfSSL_Mutex* m)
- {
- EnterCriticalSection(m);
- return 0;
- }
+ if (file == NULL)
+ return -1;
+ ret = (fs_close(file) == 0) ? 0 : -1;
+ XFREE(file, NULL, DYNAMIC_TYPE_FILE);
- int UnLockMutex(wolfSSL_Mutex* m)
- {
- LeaveCriticalSection(m);
- return 0;
- }
+ return ret;
+}
- #elif defined(WOLFSSL_PTHREADS)
+#endif /* !NO_FILESYSTEM && !WOLFSSL_ZEPHYR */
- int InitMutex(wolfSSL_Mutex* m)
- {
- if (pthread_mutex_init(m, 0) == 0)
- return 0;
- else
- return BAD_MUTEX_E;
+
+wolfSSL_Mutex* wc_InitAndAllocMutex(void)
+{
+ wolfSSL_Mutex* m = (wolfSSL_Mutex*) XMALLOC(sizeof(wolfSSL_Mutex), NULL,
+ DYNAMIC_TYPE_MUTEX);
+ if (m != NULL) {
+ if (wc_InitMutex(m) != 0) {
+ WOLFSSL_MSG("Init Mutex failed");
+ XFREE(m, NULL, DYNAMIC_TYPE_MUTEX);
+ m = NULL;
}
+ }
+ else {
+ WOLFSSL_MSG("Memory error with Mutex allocation");
+ }
+ return m;
+}
- int FreeMutex(wolfSSL_Mutex* m)
- {
- if (pthread_mutex_destroy(m) == 0)
- return 0;
- else
- return BAD_MUTEX_E;
+#ifdef USE_WOLF_STRTOK
+/* String token (delim) search. If str is null use nextp. */
+char* wc_strtok(char *str, const char *delim, char **nextp)
+{
+ char* ret;
+ int i, j;
+
+ /* Use next if str is NULL */
+ if (str == NULL && nextp)
+ str = *nextp;
+
+ /* verify str input */
+ if (str == NULL || *str == '\0')
+ return NULL;
+
+ /* match on entire delim */
+ for (i = 0; str[i]; i++) {
+ for (j = 0; delim[j]; j++) {
+ if (delim[j] == str[i])
+ break;
}
+ if (!delim[j])
+ break;
+ }
+ str += i;
+ /* if end of string, not found so return NULL */
+ if (*str == '\0')
+ return NULL;
+
+ ret = str;
+
+ /* match on first delim */
+ for (i = 0; str[i]; i++) {
+ for (j = 0; delim[j]; j++) {
+ if (delim[j] == str[i])
+ break;
+ }
+ if (delim[j] == str[i])
+ break;
+ }
+ str += i;
+ /* null terminate found string */
+ if (*str)
+ *str++ = '\0';
- int LockMutex(wolfSSL_Mutex* m)
- {
- if (pthread_mutex_lock(m) == 0)
- return 0;
- else
- return BAD_MUTEX_E;
- }
+ /* return pointer to next */
+ if (nextp)
+ *nextp = str;
+ return ret;
+}
+#endif /* USE_WOLF_STRTOK */
- int UnLockMutex(wolfSSL_Mutex* m)
- {
- if (pthread_mutex_unlock(m) == 0)
- return 0;
- else
- return BAD_MUTEX_E;
+#ifdef USE_WOLF_STRSEP
+char* wc_strsep(char **stringp, const char *delim)
+{
+ char *s, *tok;
+ const char *spanp;
+
+ /* null check */
+ if (stringp == NULL || *stringp == NULL)
+ return NULL;
+
+ s = *stringp;
+ for (tok = s; *tok; ++tok) {
+ for (spanp = delim; *spanp; ++spanp) {
+ /* found delimiter */
+ if (*tok == *spanp) {
+ *tok = '\0'; /* replace delim with null term */
+ *stringp = tok + 1; /* return past delim */
+ return s;
+ }
}
+ }
- #elif defined(THREADX)
-
- int InitMutex(wolfSSL_Mutex* m)
- {
- if (tx_mutex_create(m, "wolfSSL Mutex", TX_NO_INHERIT) == 0)
- return 0;
- else
- return BAD_MUTEX_E;
+ *stringp = NULL;
+ return s;
+}
+#endif /* USE_WOLF_STRSEP */
+
+#if WOLFSSL_CRYPT_HW_MUTEX
+/* Mutex for protection of cryptography hardware */
+static wolfSSL_Mutex wcCryptHwMutex;
+static int wcCryptHwMutexInit = 0;
+
+int wolfSSL_CryptHwMutexInit(void) {
+ int ret = 0;
+ if(wcCryptHwMutexInit == 0) {
+ ret = wc_InitMutex(&wcCryptHwMutex);
+ if(ret == 0) {
+ wcCryptHwMutexInit = 1;
}
+ }
+ return ret;
+}
+int wolfSSL_CryptHwMutexLock(void) {
+ int ret = BAD_MUTEX_E;
- int FreeMutex(wolfSSL_Mutex* m)
- {
- if (tx_mutex_delete(m) == 0)
- return 0;
- else
- return BAD_MUTEX_E;
- }
+ /* Make sure HW Mutex has been initialized */
+ wolfSSL_CryptHwMutexInit();
+ if(wcCryptHwMutexInit) {
+ ret = wc_LockMutex(&wcCryptHwMutex);
+ }
+ return ret;
+}
- int LockMutex(wolfSSL_Mutex* m)
- {
- if (tx_mutex_get(m, TX_WAIT_FOREVER) == 0)
- return 0;
- else
- return BAD_MUTEX_E;
+int wolfSSL_CryptHwMutexUnLock(void) {
+ int ret = BAD_MUTEX_E;
+
+ if(wcCryptHwMutexInit) {
+ ret = wc_UnLockMutex(&wcCryptHwMutex);
+ }
+ return ret;
+}
+#endif /* WOLFSSL_CRYPT_HW_MUTEX */
+
+
+/* ---------------------------------------------------------------------------*/
+/* Mutex Ports */
+/* ---------------------------------------------------------------------------*/
+#if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
+ static mutex_cb* compat_mutex_cb = NULL;
+
+ /* Function that locks or unlocks a mutex based on the flag passed in.
+ *
+ * flag lock or unlock i.e. CRYPTO_LOCK
+ * type the type of lock to unlock or lock
+ * file name of the file calling
+ * line the line number from file calling
+ */
+ int wc_LockMutex_ex(int flag, int type, const char* file, int line)
+ {
+ if (compat_mutex_cb != NULL) {
+ compat_mutex_cb(flag, type, file, line);
+ return 0;
+ }
+ else {
+ WOLFSSL_MSG("Mutex call back function not set. Call wc_SetMutexCb");
+ return BAD_STATE_E;
}
+ }
+
+
+ /* Set the callback function to use for locking/unlocking mutex
+ *
+ * cb callback function to use
+ */
+ int wc_SetMutexCb(mutex_cb* cb)
+ {
+ compat_mutex_cb = cb;
+ return 0;
+ }
+#endif /* defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER) */
+#ifdef SINGLE_THREADED
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ (void)m;
+ return 0;
+ }
+
+ int wc_FreeMutex(wolfSSL_Mutex *m)
+ {
+ (void)m;
+ return 0;
+ }
+
+
+ int wc_LockMutex(wolfSSL_Mutex *m)
+ {
+ (void)m;
+ return 0;
+ }
+
+
+ int wc_UnLockMutex(wolfSSL_Mutex *m)
+ {
+ (void)m;
+ return 0;
+ }
+
+#elif defined(FREERTOS) || defined(FREERTOS_TCP) || \
+ defined(FREESCALE_FREE_RTOS)
+
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ int iReturn;
+
+ *m = ( wolfSSL_Mutex ) xSemaphoreCreateMutex();
+ if( *m != NULL )
+ iReturn = 0;
+ else
+ iReturn = BAD_MUTEX_E;
+
+ return iReturn;
+ }
+
+ int wc_FreeMutex(wolfSSL_Mutex* m)
+ {
+ vSemaphoreDelete( *m );
+ return 0;
+ }
+
+ int wc_LockMutex(wolfSSL_Mutex* m)
+ {
+ /* Assume an infinite block, or should there be zero block? */
+ xSemaphoreTake( *m, portMAX_DELAY );
+ return 0;
+ }
+
+ int wc_UnLockMutex(wolfSSL_Mutex* m)
+ {
+ xSemaphoreGive( *m );
+ return 0;
+ }
+
+#elif defined(WOLFSSL_SAFERTOS)
+
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ vSemaphoreCreateBinary(m->mutexBuffer, m->mutex);
+ if (m->mutex == NULL)
+ return BAD_MUTEX_E;
+
+ return 0;
+ }
+
+ int wc_FreeMutex(wolfSSL_Mutex* m)
+ {
+ (void)m;
+ return 0;
+ }
+
+ int wc_LockMutex(wolfSSL_Mutex* m)
+ {
+ /* Assume an infinite block */
+ xSemaphoreTake(m->mutex, portMAX_DELAY);
+ return 0;
+ }
+
+ int wc_UnLockMutex(wolfSSL_Mutex* m)
+ {
+ xSemaphoreGive(m->mutex);
+ return 0;
+ }
+
+#elif defined(USE_WINDOWS_API)
+
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ InitializeCriticalSection(m);
+ return 0;
+ }
+
+
+ int wc_FreeMutex(wolfSSL_Mutex* m)
+ {
+ DeleteCriticalSection(m);
+ return 0;
+ }
+
+
+ int wc_LockMutex(wolfSSL_Mutex* m)
+ {
+ EnterCriticalSection(m);
+ return 0;
+ }
+
+
+ int wc_UnLockMutex(wolfSSL_Mutex* m)
+ {
+ LeaveCriticalSection(m);
+ return 0;
+ }
+
+#elif defined(WOLFSSL_PTHREADS)
+
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ if (pthread_mutex_init(m, 0) == 0)
+ return 0;
+ else
+ return BAD_MUTEX_E;
+ }
- int UnLockMutex(wolfSSL_Mutex* m)
- {
- if (tx_mutex_put(m) == 0)
- return 0;
- else
- return BAD_MUTEX_E;
- }
- #elif defined(MICRIUM)
+ int wc_FreeMutex(wolfSSL_Mutex* m)
+ {
+ if (pthread_mutex_destroy(m) == 0)
+ return 0;
+ else
+ return BAD_MUTEX_E;
+ }
+
- int InitMutex(wolfSSL_Mutex* m)
- {
- #if (NET_SECURE_MGR_CFG_EN == DEF_ENABLED)
- if (NetSecure_OS_MutexCreate(m) == 0)
- return 0;
- else
- return BAD_MUTEX_E;
- #else
- return 0;
- #endif
- }
+ int wc_LockMutex(wolfSSL_Mutex* m)
+ {
+ if (pthread_mutex_lock(m) == 0)
+ return 0;
+ else
+ return BAD_MUTEX_E;
+ }
- int FreeMutex(wolfSSL_Mutex* m)
- {
- #if (NET_SECURE_MGR_CFG_EN == DEF_ENABLED)
- if (NetSecure_OS_FreeMutex(m) == 0)
- return 0;
- else
- return BAD_MUTEX_E;
- #else
+ int wc_UnLockMutex(wolfSSL_Mutex* m)
+ {
+ if (pthread_mutex_unlock(m) == 0)
+ return 0;
+ else
+ return BAD_MUTEX_E;
+ }
+
+#elif defined(WOLFSSL_VXWORKS)
+
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ if (m) {
+ if ((*m = semMCreate(0)) != SEM_ID_NULL)
return 0;
- #endif
}
+ return BAD_MUTEX_E;
+ }
- int LockMutex(wolfSSL_Mutex* m)
- {
- #if (NET_SECURE_MGR_CFG_EN == DEF_ENABLED)
- if (NetSecure_OS_LockMutex(m) == 0)
- return 0;
- else
- return BAD_MUTEX_E;
- #else
+ int wc_FreeMutex(wolfSSL_Mutex* m)
+ {
+ if (m) {
+ if (semDelete(*m) == OK)
return 0;
- #endif
}
+ return BAD_MUTEX_E;
+ }
- int UnLockMutex(wolfSSL_Mutex* m)
- {
- #if (NET_SECURE_MGR_CFG_EN == DEF_ENABLED)
- if (NetSecure_OS_UnLockMutex(m) == 0)
- return 0;
- else
- return BAD_MUTEX_E;
- #else
+ int wc_LockMutex(wolfSSL_Mutex* m)
+ {
+ if (m) {
+ if (semTake(*m, WAIT_FOREVER) == OK)
return 0;
- #endif
-
}
+ return BAD_MUTEX_E;
+ }
- #elif defined(EBSNET)
- int InitMutex(wolfSSL_Mutex* m)
- {
- if (rtp_sig_mutex_alloc(m, "wolfSSL Mutex") == -1)
- return BAD_MUTEX_E;
- else
+ int wc_UnLockMutex(wolfSSL_Mutex* m)
+ {
+ if (m) {
+ if (semGive(*m) == OK)
return 0;
}
+ return BAD_MUTEX_E;
+ }
- int FreeMutex(wolfSSL_Mutex* m)
- {
- rtp_sig_mutex_free(*m);
+#elif defined(THREADX)
+
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ if (tx_mutex_create(m, "wolfSSL Mutex", TX_NO_INHERIT) == 0)
return 0;
- }
+ else
+ return BAD_MUTEX_E;
+ }
- int LockMutex(wolfSSL_Mutex* m)
- {
- if (rtp_sig_mutex_claim_timed(*m, RTIP_INF) == 0)
- return 0;
- else
- return BAD_MUTEX_E;
- }
- int UnLockMutex(wolfSSL_Mutex* m)
- {
- rtp_sig_mutex_release(*m);
+ int wc_FreeMutex(wolfSSL_Mutex* m)
+ {
+ if (tx_mutex_delete(m) == 0)
return 0;
- }
+ else
+ return BAD_MUTEX_E;
+ }
- #elif defined(FREESCALE_MQX)
- int InitMutex(wolfSSL_Mutex* m)
- {
- if (_mutex_init(m, NULL) == MQX_EOK)
+ int wc_LockMutex(wolfSSL_Mutex* m)
+ {
+ if (tx_mutex_get(m, TX_WAIT_FOREVER) == 0)
+ return 0;
+ else
+ return BAD_MUTEX_E;
+ }
+
+ int wc_UnLockMutex(wolfSSL_Mutex* m)
+ {
+ if (tx_mutex_put(m) == 0)
+ return 0;
+ else
+ return BAD_MUTEX_E;
+ }
+
+#elif defined(WOLFSSL_DEOS)
+
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ mutexStatus mutStat;
+ /*
+ The empty string "" denotes an anonymous mutex, so objects do not cause name collisions.
+ `protectWolfSSLTemp` in an XML configuration element template describing a mutex.
+ */
+ if (m) {
+ mutStat = createMutex("", "protectWolfSSLTemp", m);
+ if (mutStat == mutexSuccess)
return 0;
- else
- return BAD_MUTEX_E;
+ else{
+ WOLFSSL_MSG("wc_InitMutex failed");
+ return mutStat;
+ }
}
-
- int FreeMutex(wolfSSL_Mutex* m)
- {
- if (_mutex_destroy(m) == MQX_EOK)
+ return BAD_MUTEX_E;
+ }
+
+ int wc_FreeMutex(wolfSSL_Mutex* m)
+ {
+ mutexStatus mutStat;
+ if (m) {
+ mutStat = deleteMutex(*m);
+ if (mutStat == mutexSuccess)
return 0;
- else
- return BAD_MUTEX_E;
+ else{
+ WOLFSSL_MSG("wc_FreeMutex failed");
+ return mutStat;
+ }
}
-
- int LockMutex(wolfSSL_Mutex* m)
- {
- if (_mutex_lock(m) == MQX_EOK)
+ return BAD_MUTEX_E;
+ }
+
+ int wc_LockMutex(wolfSSL_Mutex* m)
+ {
+ mutexStatus mutStat;
+ if (m) {
+ mutStat = lockMutex(*m);
+ if (mutStat == mutexSuccess)
return 0;
- else
- return BAD_MUTEX_E;
+ else{
+ WOLFSSL_MSG("wc_LockMutex failed");
+ return mutStat;
+ }
}
+ return BAD_MUTEX_E;
+ }
+
+ int wc_UnLockMutex(wolfSSL_Mutex* m)
+ {
+ mutexStatus mutStat;
+ if (m) {
+ mutStat = unlockMutex(*m);
+ if (mutStat== mutexSuccess)
+ return 0;
+ else{
+ WOLFSSL_MSG("wc_UnLockMutex failed");
+ return mutStat;
+ }
+ }
+ return BAD_MUTEX_E;
+ }
- int UnLockMutex(wolfSSL_Mutex* m)
- {
- if (_mutex_unlock(m) == MQX_EOK)
+#elif defined(MICRIUM)
+
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ OS_ERR err;
+
+ OSMutexCreate(m, "wolfSSL Mutex", &err);
+
+ if (err == OS_ERR_NONE)
+ return 0;
+ else
+ return BAD_MUTEX_E;
+ }
+
+ int wc_FreeMutex(wolfSSL_Mutex* m)
+ {
+ #if (OS_CFG_MUTEX_DEL_EN == DEF_ENABLED)
+ OS_ERR err;
+
+ OSMutexDel(m, OS_OPT_DEL_ALWAYS, &err);
+
+ if (err == OS_ERR_NONE)
return 0;
else
return BAD_MUTEX_E;
- }
-
- #elif defined (WOLFSSL_TIRTOS)
+ #else
+ return 0;
+ #endif
+ }
- int InitMutex(wolfSSL_Mutex* m)
- {
- Semaphore_Params params;
+ int wc_LockMutex(wolfSSL_Mutex* m)
+ {
+ OS_ERR err;
- Semaphore_Params_init(&params);
- params.mode = Semaphore_Mode_BINARY;
+ OSMutexPend(m, 0, OS_OPT_PEND_BLOCKING, NULL, &err);
- *m = Semaphore_create(1, &params, NULL);
+ if (err == OS_ERR_NONE)
+ return 0;
+ else
+ return BAD_MUTEX_E;
+ }
- return 0;
- }
+ int wc_UnLockMutex(wolfSSL_Mutex* m)
+ {
+ OS_ERR err;
- int FreeMutex(wolfSSL_Mutex* m)
- {
- Semaphore_delete(m);
+ OSMutexPost(m, OS_OPT_POST_NONE, &err);
+ if (err == OS_ERR_NONE)
return 0;
- }
+ else
+ return BAD_MUTEX_E;
+ }
- int LockMutex(wolfSSL_Mutex* m)
- {
- Semaphore_pend(*m, BIOS_WAIT_FOREVER);
+#elif defined(EBSNET)
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ if (rtp_sig_mutex_alloc(m, "wolfSSL Mutex") == -1)
+ return BAD_MUTEX_E;
+ else
return 0;
- }
+ }
- int UnLockMutex(wolfSSL_Mutex* m)
- {
- Semaphore_post(*m);
+ int wc_FreeMutex(wolfSSL_Mutex* m)
+ {
+ rtp_sig_mutex_free(*m);
+ return 0;
+ }
+ int wc_LockMutex(wolfSSL_Mutex* m)
+ {
+ if (rtp_sig_mutex_claim_timed(*m, RTIP_INF) == 0)
return 0;
- }
+ else
+ return BAD_MUTEX_E;
+ }
+
+ int wc_UnLockMutex(wolfSSL_Mutex* m)
+ {
+ rtp_sig_mutex_release(*m);
+ return 0;
+ }
+
+ int ebsnet_fseek(int a, long b, int c)
+ {
+ int retval;
+
+ retval = vf_lseek(a, b, c);
+ if (retval > 0)
+ retval = 0;
+ else
+ retval = -1;
+
+ return(retval);
+ }
+
+#elif defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
+
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ if (_mutex_init(m, NULL) == MQX_EOK)
+ return 0;
+ else
+ return BAD_MUTEX_E;
+ }
- #elif defined(WOLFSSL_uITRON4)
- #include "kernel.h"
- int InitMutex(wolfSSL_Mutex* m)
- {
- int iReturn;
- m->sem.sematr = TA_TFIFO ;
- m->sem.isemcnt = 1 ;
- m->sem.maxsem = 1 ;
- m->sem.name = NULL ;
-
- m->id = acre_sem(&m->sem);
- if( m->id != NULL )
- iReturn = 0;
- else
- iReturn = BAD_MUTEX_E;
+ int wc_FreeMutex(wolfSSL_Mutex* m)
+ {
+ if (_mutex_destroy(m) == MQX_EOK)
+ return 0;
+ else
+ return BAD_MUTEX_E;
+ }
- return iReturn;
- }
+ int wc_LockMutex(wolfSSL_Mutex* m)
+ {
+ if (_mutex_lock(m) == MQX_EOK)
+ return 0;
+ else
+ return BAD_MUTEX_E;
+ }
- int FreeMutex(wolfSSL_Mutex* m)
- {
- del_sem( m->id );
+ int wc_UnLockMutex(wolfSSL_Mutex* m)
+ {
+ if (_mutex_unlock(m) == MQX_EOK)
return 0;
+ else
+ return BAD_MUTEX_E;
+ }
+
+#elif defined(WOLFSSL_TIRTOS)
+ #include <xdc/runtime/Error.h>
+
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ Semaphore_Params params;
+ Error_Block eb;
+
+ Error_init(&eb);
+ Semaphore_Params_init(&params);
+ params.mode = Semaphore_Mode_BINARY;
+
+ *m = Semaphore_create(1, &params, &eb);
+ if (Error_check(&eb)) {
+ Error_raise(&eb, Error_E_generic, "Failed to Create the semaphore.",
+ NULL);
+ return BAD_MUTEX_E;
}
-
- int LockMutex(wolfSSL_Mutex* m)
- {
- wai_sem(m->id);
+ else
return 0;
+ }
+
+ int wc_FreeMutex(wolfSSL_Mutex* m)
+ {
+ Semaphore_delete(m);
+
+ return 0;
+ }
+
+ int wc_LockMutex(wolfSSL_Mutex* m)
+ {
+ Semaphore_pend(*m, BIOS_WAIT_FOREVER);
+
+ return 0;
+ }
+
+ int wc_UnLockMutex(wolfSSL_Mutex* m)
+ {
+ Semaphore_post(*m);
+
+ return 0;
+ }
+
+#elif defined(WOLFSSL_uITRON4)
+
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ int iReturn;
+ m->sem.sematr = TA_TFIFO;
+ m->sem.isemcnt = 1;
+ m->sem.maxsem = 1;
+ m->sem.name = NULL;
+
+ m->id = acre_sem(&m->sem);
+ if( m->id != E_OK )
+ iReturn = 0;
+ else
+ iReturn = BAD_MUTEX_E;
+
+ return iReturn;
+ }
+
+ int wc_FreeMutex(wolfSSL_Mutex* m)
+ {
+ del_sem( m->id );
+ return 0;
+ }
+
+ int wc_LockMutex(wolfSSL_Mutex* m)
+ {
+ wai_sem(m->id);
+ return 0;
+ }
+
+ int wc_UnLockMutex(wolfSSL_Mutex* m)
+ {
+ sig_sem(m->id);
+ return 0;
+ }
+
+ /**** uITRON malloc/free ***/
+ static ID ID_wolfssl_MPOOL = 0;
+ static T_CMPL wolfssl_MPOOL = {TA_TFIFO, 0, NULL, "wolfSSL_MPOOL"};
+
+ int uITRON4_minit(size_t poolsz) {
+ ER ercd;
+ wolfssl_MPOOL.mplsz = poolsz;
+ ercd = acre_mpl(&wolfssl_MPOOL);
+ if (ercd > 0) {
+ ID_wolfssl_MPOOL = ercd;
+ return 0;
+ } else {
+ return -1;
}
-
- int UnLockMutex(wolfSSL_Mutex* m)
- {
- sig_sem(m->id);
+ }
+
+ void *uITRON4_malloc(size_t sz) {
+ ER ercd;
+ void *p = NULL;
+ ercd = get_mpl(ID_wolfssl_MPOOL, sz, (VP)&p);
+ if (ercd == E_OK) {
+ return p;
+ } else {
return 0;
}
+ }
+
+ void *uITRON4_realloc(void *p, size_t sz) {
+ ER ercd;
+ void *newp;
+ if(p) {
+ ercd = get_mpl(ID_wolfssl_MPOOL, sz, (VP)&newp);
+ if (ercd == E_OK) {
+ XMEMCPY(newp, p, sz);
+ ercd = rel_mpl(ID_wolfssl_MPOOL, (VP)p);
+ if (ercd == E_OK) {
+ return newp;
+ }
+ }
+ }
+ return 0;
+ }
+
+ void uITRON4_free(void *p) {
+ ER ercd;
+ ercd = rel_mpl(ID_wolfssl_MPOOL, (VP)p);
+ if (ercd == E_OK) {
+ return;
+ } else {
+ return;
+ }
+ }
- /**** uITRON malloc/free ***/
- static ID ID_wolfssl_MPOOL = 0 ;
- static T_CMPL wolfssl_MPOOL = {TA_TFIFO, 0, NULL, "wolfSSL_MPOOL"};
+#elif defined(WOLFSSL_uTKERNEL2)
- int uITRON4_minit(size_t poolsz) {
- ER ercd;
- wolfssl_MPOOL.mplsz = poolsz ;
- ercd = acre_mpl(&wolfssl_MPOOL);
- if (ercd > 0) {
- ID_wolfssl_MPOOL = ercd;
- return 0;
- } else {
- return -1;
- }
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ int iReturn;
+ m->sem.sematr = TA_TFIFO;
+ m->sem.isemcnt = 1;
+ m->sem.maxsem = 1;
+
+ m->id = tk_cre_sem(&m->sem);
+ if( m->id != NULL )
+ iReturn = 0;
+ else
+ iReturn = BAD_MUTEX_E;
+
+ return iReturn;
+ }
+
+ int wc_FreeMutex(wolfSSL_Mutex* m)
+ {
+ tk_del_sem(m->id);
+ return 0;
+ }
+
+ int wc_LockMutex(wolfSSL_Mutex* m)
+ {
+ tk_wai_sem(m->id, 1, TMO_FEVR);
+ return 0;
+ }
+
+ int wc_UnLockMutex(wolfSSL_Mutex* m)
+ {
+ tk_sig_sem(m->id, 1);
+ return 0;
+ }
+
+ /**** uT-Kernel malloc/free ***/
+ static ID ID_wolfssl_MPOOL = 0;
+ static T_CMPL wolfssl_MPOOL = {
+ NULL, /* Extended information */
+ TA_TFIFO, /* Memory pool attribute */
+ 0, /* Size of whole memory pool (byte) */
+ "wolfSSL" /* Object name (max 8-char) */
+ };
+
+ int uTKernel_init_mpool(unsigned int sz) {
+ ER ercd;
+ wolfssl_MPOOL.mplsz = sz;
+ ercd = tk_cre_mpl(&wolfssl_MPOOL);
+ if (ercd > 0) {
+ ID_wolfssl_MPOOL = ercd;
+ return 0;
+ } else {
+ return (int)ercd;
}
-
- void *uITRON4_malloc(size_t sz) {
- ER ercd;
- void *p ;
- ercd = get_mpl(ID_wolfssl_MPOOL, sz, (VP)&p);
- if (ercd == E_OK) {
- return p;
- } else {
- return 0 ;
- }
+ }
+
+ void *uTKernel_malloc(unsigned int sz) {
+ ER ercd;
+ void *p = NULL;
+ ercd = tk_get_mpl(ID_wolfssl_MPOOL, sz, (VP)&p, TMO_FEVR);
+ if (ercd == E_OK) {
+ return p;
+ } else {
+ return 0;
}
-
- void *uITRON4_realloc(void *p, size_t sz) {
- ER ercd;
- void *newp ;
- if(p) {
- ercd = get_mpl(ID_wolfssl_MPOOL, sz, (VP)&newp);
+ }
+
+ void *uTKernel_realloc(void *p, unsigned int sz) {
+ ER ercd;
+ void *newp;
+ if (p) {
+ ercd = tk_get_mpl(ID_wolfssl_MPOOL, sz, (VP)&newp, TMO_FEVR);
+ if (ercd == E_OK) {
+ XMEMCPY(newp, p, sz);
+ ercd = tk_rel_mpl(ID_wolfssl_MPOOL, (VP)p);
if (ercd == E_OK) {
- memcpy(newp, p, sz) ;
- ercd = rel_mpl(ID_wolfssl_MPOOL, (VP)p);
- if (ercd == E_OK) {
- return newp;
- }
+ return newp;
}
}
- return 0 ;
+ }
+ return 0;
+ }
+
+ void uTKernel_free(void *p) {
+ ER ercd;
+ ercd = tk_rel_mpl(ID_wolfssl_MPOOL, (VP)p);
+ if (ercd == E_OK) {
+ return;
+ } else {
+ return;
}
+ }
+
+#elif defined (WOLFSSL_FROSTED)
- void uITRON4_free(void *p) {
- ER ercd;
- ercd = rel_mpl(ID_wolfssl_MPOOL, (VP)p);
- if (ercd == E_OK) {
- return ;
- } else {
- return ;
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ *m = mutex_init();
+ if (*m)
+ return 0;
+ else
+ return -1;
+ }
+
+ int wc_FreeMutex(wolfSSL_Mutex* m)
+ {
+ mutex_destroy(*m);
+ return(0);
+ }
+
+ int wc_LockMutex(wolfSSL_Mutex* m)
+ {
+ mutex_lock(*m);
+ return 0;
+ }
+
+ int wc_UnLockMutex(wolfSSL_Mutex* m)
+ {
+ mutex_unlock(*m);
+ return 0;
+ }
+
+#elif defined(WOLFSSL_CMSIS_RTOS)
+
+ #define CMSIS_NMUTEX 10
+ osMutexDef(wolfSSL_mt0); osMutexDef(wolfSSL_mt1); osMutexDef(wolfSSL_mt2);
+ osMutexDef(wolfSSL_mt3); osMutexDef(wolfSSL_mt4); osMutexDef(wolfSSL_mt5);
+ osMutexDef(wolfSSL_mt6); osMutexDef(wolfSSL_mt7); osMutexDef(wolfSSL_mt8);
+ osMutexDef(wolfSSL_mt9);
+
+ static const osMutexDef_t *CMSIS_mutex[] = { osMutex(wolfSSL_mt0),
+ osMutex(wolfSSL_mt1), osMutex(wolfSSL_mt2), osMutex(wolfSSL_mt3),
+ osMutex(wolfSSL_mt4), osMutex(wolfSSL_mt5), osMutex(wolfSSL_mt6),
+ osMutex(wolfSSL_mt7), osMutex(wolfSSL_mt8), osMutex(wolfSSL_mt9) };
+
+ static osMutexId CMSIS_mutexID[CMSIS_NMUTEX] = {0};
+
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ int i;
+ for (i=0; i<CMSIS_NMUTEX; i++) {
+ if(CMSIS_mutexID[i] == 0) {
+ CMSIS_mutexID[i] = osMutexCreate(CMSIS_mutex[i]);
+ (*m) = CMSIS_mutexID[i];
+ return 0;
}
}
+ return -1;
+ }
+
+ int wc_FreeMutex(wolfSSL_Mutex* m)
+ {
+ int i;
+ osMutexDelete (*m);
+ for (i=0; i<CMSIS_NMUTEX; i++) {
+ if(CMSIS_mutexID[i] == (*m)) {
+ CMSIS_mutexID[i] = 0;
+ return(0);
+ }
+ }
+ return(-1);
+ }
+
+ int wc_LockMutex(wolfSSL_Mutex* m)
+ {
+ osMutexWait(*m, osWaitForever);
+ return(0);
+ }
+
+ int wc_UnLockMutex(wolfSSL_Mutex* m)
+ {
+ osMutexRelease (*m);
+ return 0;
+ }
+
+#elif defined(WOLFSSL_CMSIS_RTOSv2)
+ int wc_InitMutex(wolfSSL_Mutex *m)
+ {
+ static const osMutexAttr_t attr = {
+ "wolfSSL_mutex", osMutexRecursive, NULL, 0};
+
+ if ((*m = osMutexNew(&attr)) != NULL)
+ return 0;
+ else
+ return BAD_MUTEX_E;
+ }
-#elif defined(WOLFSSL_uTKERNEL2)
- #include "tk/tkernel.h"
- int InitMutex(wolfSSL_Mutex* m)
- {
- int iReturn;
- m->sem.sematr = TA_TFIFO ;
- m->sem.isemcnt = 1 ;
- m->sem.maxsem = 1 ;
-
- m->id = tk_cre_sem(&m->sem);
- if( m->id != NULL )
- iReturn = 0;
- else
- iReturn = BAD_MUTEX_E;
+ int wc_FreeMutex(wolfSSL_Mutex *m)
+ {
+ if (osMutexDelete(*m) == osOK)
+ return 0;
+ else
+ return BAD_MUTEX_E;
+ }
- return iReturn;
- }
- int FreeMutex(wolfSSL_Mutex* m)
- {
- tk_del_sem( m->id );
+ int wc_LockMutex(wolfSSL_Mutex *m)
+ {
+ if (osMutexAcquire(*m, osWaitForever) == osOK)
return 0;
- }
+ else
+ return BAD_MUTEX_E;
+ }
- int LockMutex(wolfSSL_Mutex* m)
- {
- tk_wai_sem(m->id, 1, TMO_FEVR);
+ int wc_UnLockMutex(wolfSSL_Mutex *m)
+ {
+ if (osMutexRelease(*m) == osOK)
return 0;
+ else
+ return BAD_MUTEX_E;
+ }
+
+#elif defined(WOLFSSL_MDK_ARM)
+
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ os_mut_init (m);
+ return 0;
+ }
+
+ int wc_FreeMutex(wolfSSL_Mutex* m)
+ {
+ return(0);
+ }
+
+ int wc_LockMutex(wolfSSL_Mutex* m)
+ {
+ os_mut_wait (m, 0xffff);
+ return(0);
+ }
+
+ int wc_UnLockMutex(wolfSSL_Mutex* m)
+ {
+ os_mut_release (m);
+ return 0;
+ }
+
+#elif defined(INTIME_RTOS)
+
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ int ret = 0;
+
+ if (m == NULL)
+ return BAD_FUNC_ARG;
+
+ *m = CreateRtSemaphore(
+ 1, /* initial unit count */
+ 1, /* maximum unit count */
+ PRIORITY_QUEUING /* creation flags: FIFO_QUEUING or PRIORITY_QUEUING */
+ );
+ if (*m == BAD_RTHANDLE) {
+ ret = GetLastRtError();
+ if (ret != E_OK)
+ ret = BAD_MUTEX_E;
}
-
- int UnLockMutex(wolfSSL_Mutex* m)
- {
- tk_sig_sem(m->id, 1);
+ return ret;
+ }
+
+ int wc_FreeMutex(wolfSSL_Mutex* m)
+ {
+ int ret = 0;
+ BOOLEAN del;
+
+ if (m == NULL)
+ return BAD_FUNC_ARG;
+
+ del = DeleteRtSemaphore(
+ *m /* handle for RT semaphore */
+ );
+ if (del != TRUE)
+ ret = BAD_MUTEX_E;
+
+ return ret;
+ }
+
+ int wc_LockMutex(wolfSSL_Mutex* m)
+ {
+ int ret = 0;
+ DWORD lck;
+
+ if (m == NULL)
+ return BAD_FUNC_ARG;
+
+ lck = WaitForRtSemaphore(
+ *m, /* handle for RT semaphore */
+ 1, /* number of units to wait for */
+ WAIT_FOREVER /* number of milliseconds to wait for units */
+ );
+ if (lck == WAIT_FAILED) {
+ ret = GetLastRtError();
+ if (ret != E_OK)
+ ret = BAD_MUTEX_E;
+ }
+ return ret;
+ }
+
+ int wc_UnLockMutex(wolfSSL_Mutex* m)
+ {
+ int ret = 0;
+ BOOLEAN rel;
+
+ if (m == NULL)
+ return BAD_FUNC_ARG;
+
+ rel = ReleaseRtSemaphore(
+ *m, /* handle for RT semaphore */
+ 1 /* number of units to release to semaphore */
+ );
+ if (rel != TRUE)
+ ret = BAD_MUTEX_E;
+
+ return ret;
+ }
+
+#elif defined(WOLFSSL_NUCLEUS_1_2)
+
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ /* Call the Nucleus function to create the semaphore */
+ if (NU_Create_Semaphore(m, "WOLFSSL_MTX", 1,
+ NU_PRIORITY) == NU_SUCCESS) {
return 0;
}
- /**** uT-Kernel malloc/free ***/
- static ID ID_wolfssl_MPOOL = 0 ;
- static T_CMPL wolfssl_MPOOL =
- {(void *)NULL,
- TA_TFIFO , 0, "wolfSSL_MPOOL"};
+ return BAD_MUTEX_E;
+ }
- int uTKernel_init_mpool(unsigned int sz) {
- ER ercd;
- wolfssl_MPOOL.mplsz = sz ;
- ercd = tk_cre_mpl(&wolfssl_MPOOL);
- if (ercd > 0) {
- ID_wolfssl_MPOOL = ercd;
- return 0;
- } else {
- return -1;
- }
+ int wc_FreeMutex(wolfSSL_Mutex* m)
+ {
+ if (NU_Delete_Semaphore(m) == NU_SUCCESS)
+ return 0;
+
+ return BAD_MUTEX_E;
+ }
+
+ int wc_LockMutex(wolfSSL_Mutex* m)
+ {
+ /* passing suspend task option */
+ if (NU_Obtain_Semaphore(m, NU_SUSPEND) == NU_SUCCESS)
+ return 0;
+
+ return BAD_MUTEX_E;
+ }
+
+ int wc_UnLockMutex(wolfSSL_Mutex* m)
+ {
+ if (NU_Release_Semaphore(m) == NU_SUCCESS)
+ return 0;
+
+ return BAD_MUTEX_E;
+ }
+
+#elif defined(WOLFSSL_ZEPHYR)
+
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ k_mutex_init(m);
+
+ return 0;
+ }
+
+ int wc_FreeMutex(wolfSSL_Mutex* m)
+ {
+ return 0;
+ }
+
+ int wc_LockMutex(wolfSSL_Mutex* m)
+ {
+ int ret = 0;
+
+ if (k_mutex_lock(m, K_FOREVER) != 0)
+ ret = BAD_MUTEX_E;
+
+ return ret;
+ }
+
+ int wc_UnLockMutex(wolfSSL_Mutex* m)
+ {
+ k_mutex_unlock(m);
+
+ return 0;
+ }
+
+#elif defined(WOLFSSL_TELIT_M2MB)
+
+ int wc_InitMutex(wolfSSL_Mutex* m)
+ {
+ M2MB_OS_RESULT_E osRes;
+ M2MB_OS_MTX_ATTR_HANDLE mtxAttrHandle;
+ UINT32 inheritVal = 1;
+
+ osRes = m2mb_os_mtx_setAttrItem(&mtxAttrHandle,
+ CMDS_ARGS(
+ M2MB_OS_MTX_SEL_CMD_CREATE_ATTR, NULL,
+ M2MB_OS_MTX_SEL_CMD_NAME, "wolfMtx",
+ M2MB_OS_MTX_SEL_CMD_INHERIT, inheritVal
+ )
+ );
+ if (osRes != M2MB_OS_SUCCESS) {
+ return BAD_MUTEX_E;
}
- void *uTKernel_malloc(unsigned int sz) {
- ER ercd;
- void *p ;
- ercd = tk_get_mpl(ID_wolfssl_MPOOL, sz, (VP)&p, TMO_FEVR);
- if (ercd == E_OK) {
- return p;
- } else {
- return 0 ;
- }
+ osRes = m2mb_os_mtx_init(m, &mtxAttrHandle);
+ if (osRes != M2MB_OS_SUCCESS) {
+ return BAD_MUTEX_E;
}
- void *uTKernel_realloc(void *p, unsigned int sz) {
- ER ercd;
- void *newp ;
- if(p) {
- ercd = tk_get_mpl(ID_wolfssl_MPOOL, sz, (VP)&newp, TMO_FEVR);
- if (ercd == E_OK) {
- memcpy(newp, p, sz) ;
- ercd = tk_rel_mpl(ID_wolfssl_MPOOL, (VP)p);
- if (ercd == E_OK) {
- return newp;
- }
- }
- }
- return 0 ;
+ return 0;
+ }
+
+ int wc_FreeMutex(wolfSSL_Mutex* m)
+ {
+ M2MB_OS_RESULT_E osRes;
+
+ if (m == NULL)
+ return BAD_MUTEX_E;
+
+ osRes = m2mb_os_mtx_deinit(*m);
+ if (osRes != M2MB_OS_SUCCESS) {
+ return BAD_MUTEX_E;
}
- void uTKernel_free(void *p) {
- ER ercd;
- ercd = tk_rel_mpl(ID_wolfssl_MPOOL, (VP)p);
- if (ercd == E_OK) {
- return ;
- } else {
- return ;
- }
+ return 0;
+ }
+
+ int wc_LockMutex(wolfSSL_Mutex* m)
+ {
+ M2MB_OS_RESULT_E osRes;
+
+ if (m == NULL)
+ return BAD_MUTEX_E;
+
+ osRes = m2mb_os_mtx_get(*m, M2MB_OS_WAIT_FOREVER);
+ if (osRes != M2MB_OS_SUCCESS) {
+ return BAD_MUTEX_E;
}
- #elif defined(WOLFSSL_MDK_ARM)|| defined(WOLFSSL_CMSIS_RTOS)
-
- #if defined(WOLFSSL_CMSIS_RTOS)
- #include "cmsis_os.h"
- #define CMSIS_NMUTEX 10
- osMutexDef(wolfSSL_mt0) ; osMutexDef(wolfSSL_mt1) ; osMutexDef(wolfSSL_mt2) ;
- osMutexDef(wolfSSL_mt3) ; osMutexDef(wolfSSL_mt4) ; osMutexDef(wolfSSL_mt5) ;
- osMutexDef(wolfSSL_mt6) ; osMutexDef(wolfSSL_mt7) ; osMutexDef(wolfSSL_mt8) ;
- osMutexDef(wolfSSL_mt9) ;
-
- static const osMutexDef_t *CMSIS_mutex[] = { osMutex(wolfSSL_mt0),
- osMutex(wolfSSL_mt1), osMutex(wolfSSL_mt2), osMutex(wolfSSL_mt3),
- osMutex(wolfSSL_mt4), osMutex(wolfSSL_mt5), osMutex(wolfSSL_mt6),
- osMutex(wolfSSL_mt7), osMutex(wolfSSL_mt8), osMutex(wolfSSL_mt9) } ;
-
- static osMutexId CMSIS_mutexID[CMSIS_NMUTEX] = {0} ;
-
- int InitMutex(wolfSSL_Mutex* m)
- {
- int i ;
- for (i=0; i<CMSIS_NMUTEX; i++) {
- if(CMSIS_mutexID[i] == 0) {
- CMSIS_mutexID[i] = osMutexCreate(CMSIS_mutex[i]) ;
- (*m) = CMSIS_mutexID[i] ;
- return 0 ;
- }
- }
- return -1 ;
- }
+ return 0;
+ }
- int FreeMutex(wolfSSL_Mutex* m)
- {
- int i ;
- osMutexDelete (*m) ;
- for (i=0; i<CMSIS_NMUTEX; i++) {
- if(CMSIS_mutexID[i] == (*m)) {
- CMSIS_mutexID[i] = 0 ;
- return(0) ;
- }
- }
- return(-1) ;
- }
-
- int LockMutex(wolfSSL_Mutex* m)
- {
- osMutexWait(*m, osWaitForever) ;
- return(0) ;
- }
+ int wc_UnLockMutex(wolfSSL_Mutex* m)
+ {
+ M2MB_OS_RESULT_E osRes;
- int UnLockMutex(wolfSSL_Mutex* m)
- {
- osMutexRelease (*m);
- return 0;
- }
- #else
- int InitMutex(wolfSSL_Mutex* m)
- {
- os_mut_init (m);
- return 0;
+ if (m == NULL)
+ return BAD_MUTEX_E;
+
+ osRes = m2mb_os_mtx_put(*m);
+ if (osRes != M2MB_OS_SUCCESS) {
+ return BAD_MUTEX_E;
}
- int FreeMutex(wolfSSL_Mutex* m)
- {
- return(0) ;
+ return 0;
+ }
+
+#else
+ #warning No mutex handling defined
+
+#endif
+
+#ifndef NO_ASN_TIME
+#if defined(_WIN32_WCE)
+time_t windows_time(time_t* timer)
+{
+ SYSTEMTIME sysTime;
+ FILETIME fTime;
+ ULARGE_INTEGER intTime;
+ time_t localTime;
+
+ if (timer == NULL)
+ timer = &localTime;
+
+ GetSystemTime(&sysTime);
+ SystemTimeToFileTime(&sysTime, &fTime);
+
+ XMEMCPY(&intTime, &fTime, sizeof(FILETIME));
+ /* subtract EPOCH */
+ intTime.QuadPart -= 0x19db1ded53e8000;
+ /* to secs */
+ intTime.QuadPart /= 10000000;
+ *timer = (time_t)intTime.QuadPart;
+
+ return *timer;
+}
+#endif /* _WIN32_WCE */
+
+#if defined(WOLFSSL_APACHE_MYNEWT)
+#include "os/os_time.h"
+
+time_t mynewt_time(time_t* timer)
+{
+ time_t now;
+ struct os_timeval tv;
+ os_gettimeofday(&tv, NULL);
+ now = (time_t)tv.tv_sec;
+ if(timer != NULL) {
+ *timer = now;
+ }
+ return now;
+}
+#endif /* WOLFSSL_APACHE_MYNEWT */
+
+#if defined(WOLFSSL_GMTIME)
+struct tm* gmtime(const time_t* timer)
+{
+ #define YEAR0 1900
+ #define EPOCH_YEAR 1970
+ #define SECS_DAY (24L * 60L * 60L)
+ #define LEAPYEAR(year) (!((year) % 4) && (((year) % 100) || !((year) %400)))
+ #define YEARSIZE(year) (LEAPYEAR(year) ? 366 : 365)
+
+ static const int _ytab[2][12] =
+ {
+ {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31},
+ {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}
+ };
+
+ static struct tm st_time;
+ struct tm* ret = &st_time;
+ time_t secs = *timer;
+ unsigned long dayclock, dayno;
+ int year = EPOCH_YEAR;
+
+ dayclock = (unsigned long)secs % SECS_DAY;
+ dayno = (unsigned long)secs / SECS_DAY;
+
+ ret->tm_sec = (int) dayclock % 60;
+ ret->tm_min = (int)(dayclock % 3600) / 60;
+ ret->tm_hour = (int) dayclock / 3600;
+ ret->tm_wday = (int) (dayno + 4) % 7; /* day 0 a Thursday */
+
+ while(dayno >= (unsigned long)YEARSIZE(year)) {
+ dayno -= YEARSIZE(year);
+ year++;
+ }
+
+ ret->tm_year = year - YEAR0;
+ ret->tm_yday = (int)dayno;
+ ret->tm_mon = 0;
+
+ while(dayno >= (unsigned long)_ytab[LEAPYEAR(year)][ret->tm_mon]) {
+ dayno -= _ytab[LEAPYEAR(year)][ret->tm_mon];
+ ret->tm_mon++;
+ }
+
+ ret->tm_mday = (int)++dayno;
+ ret->tm_isdst = 0;
+
+ return ret;
+}
+#endif /* WOLFSSL_GMTIME */
+
+
+#if defined(HAVE_RTP_SYS)
+#define YEAR0 1900
+
+struct tm* rtpsys_gmtime(const time_t* timer) /* has a gmtime() but hangs */
+{
+ static struct tm st_time;
+ struct tm* ret = &st_time;
+
+ DC_RTC_CALENDAR cal;
+ dc_rtc_time_get(&cal, TRUE);
+
+ ret->tm_year = cal.year - YEAR0; /* gm starts at 1900 */
+ ret->tm_mon = cal.month - 1; /* gm starts at 0 */
+ ret->tm_mday = cal.day;
+ ret->tm_hour = cal.hour;
+ ret->tm_min = cal.minute;
+ ret->tm_sec = cal.second;
+
+ return ret;
+}
+
+#endif /* HAVE_RTP_SYS */
+
+
+#if defined(MICROCHIP_TCPIP_V5) || defined(MICROCHIP_TCPIP)
+
+/*
+ * time() is just a stub in Microchip libraries. We need our own
+ * implementation. Use SNTP client to get seconds since epoch.
+ */
+time_t pic32_time(time_t* timer)
+{
+#ifdef MICROCHIP_TCPIP_V5
+ DWORD sec = 0;
+#else
+ uint32_t sec = 0;
+#endif
+ time_t localTime;
+
+ if (timer == NULL)
+ timer = &localTime;
+
+#ifdef MICROCHIP_MPLAB_HARMONY
+ sec = TCPIP_SNTP_UTCSecondsGet();
+#else
+ sec = SNTPGetUTCSeconds();
+#endif
+ *timer = (time_t) sec;
+
+ return *timer;
+}
+
+#endif /* MICROCHIP_TCPIP || MICROCHIP_TCPIP_V5 */
+
+#if defined(WOLFSSL_DEOS)
+
+time_t deos_time(time_t* timer)
+{
+ const uint32_t systemTickTimeInHz = 1000000 / systemTickInMicroseconds();
+ uint32_t *systemTickPtr = systemTickPointer();
+
+ if (timer != NULL)
+ *timer = *systemTickPtr/systemTickTimeInHz;
+
+ #if defined(CURRENT_UNIX_TIMESTAMP)
+ /* CURRENT_UNIX_TIMESTAMP is seconds since Jan 01 1970. (UTC) */
+ return (time_t) *systemTickPtr/systemTickTimeInHz + CURRENT_UNIX_TIMESTAMP;
+ #else
+ return (time_t) *systemTickPtr/systemTickTimeInHz;
+ #endif
+}
+#endif /* WOLFSSL_DEOS */
+
+#if defined(MICRIUM)
+
+time_t micrium_time(time_t* timer)
+{
+ CLK_TS_SEC sec;
+
+ Clk_GetTS_Unix(&sec);
+
+ if (timer != NULL)
+ *timer = sec;
+
+ return (time_t) sec;
+}
+
+#endif /* MICRIUM */
+
+#if defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
+
+time_t mqx_time(time_t* timer)
+{
+ time_t localTime;
+ TIME_STRUCT time_s;
+
+ if (timer == NULL)
+ timer = &localTime;
+
+ _time_get(&time_s);
+ *timer = (time_t) time_s.SECONDS;
+
+ return *timer;
+}
+
+#endif /* FREESCALE_MQX || FREESCALE_KSDK_MQX */
+
+
+#if defined(WOLFSSL_TIRTOS) && defined(USER_TIME)
+
+time_t XTIME(time_t * timer)
+{
+ time_t sec = 0;
+
+ sec = (time_t) Seconds_get();
+
+ if (timer != NULL)
+ *timer = sec;
+
+ return sec;
+}
+
+#endif /* WOLFSSL_TIRTOS */
+
+#if defined(WOLFSSL_XILINX)
+#include "xrtcpsu.h"
+
+time_t XTIME(time_t * timer)
+{
+ time_t sec = 0;
+ XRtcPsu_Config* con;
+ XRtcPsu rtc;
+
+ con = XRtcPsu_LookupConfig(XPAR_XRTCPSU_0_DEVICE_ID);
+ if (con != NULL) {
+ if (XRtcPsu_CfgInitialize(&rtc, con, con->BaseAddr) == XST_SUCCESS) {
+ sec = (time_t)XRtcPsu_GetCurrentTime(&rtc);
+ }
+ else {
+ WOLFSSL_MSG("Unable to initialize RTC");
}
+ }
- int LockMutex(wolfSSL_Mutex* m)
- {
- os_mut_wait (m, 0xffff);
- return(0) ;
+ if (timer != NULL)
+ *timer = sec;
+
+ return sec;
+}
+
+#endif /* WOLFSSL_XILINX */
+
+#if defined(WOLFSSL_ZEPHYR)
+
+time_t z_time(time_t * timer)
+{
+ struct timespec ts;
+
+ if (clock_gettime(CLOCK_REALTIME, &ts) == 0)
+ if (timer != NULL)
+ *timer = ts.tv_sec;
+
+ return ts.tv_sec;
+}
+
+#endif /* WOLFSSL_ZEPHYR */
+
+
+#if defined(WOLFSSL_WICED)
+ #ifndef WOLFSSL_WICED_PSEUDO_UNIX_EPOCH_TIME
+ #error Please define WOLFSSL_WICED_PSEUDO_UNIX_EPOCH_TIME at build time.
+ #endif /* WOLFSSL_WICED_PSEUDO_UNIX_EPOCH_TIME */
+
+time_t wiced_pseudo_unix_epoch_time(time_t * timer)
+{
+ time_t epoch_time;
+ /* The time() function return uptime on WICED platform. */
+ epoch_time = time(NULL) + WOLFSSL_WICED_PSEUDO_UNIX_EPOCH_TIME;
+
+ if (timer != NULL) {
+ *timer = epoch_time;
+ }
+ return epoch_time;
+}
+#endif /* WOLFSSL_WICED */
+
+#ifdef WOLFSSL_TELIT_M2MB
+ time_t m2mb_xtime(time_t * timer)
+ {
+ time_t myTime = 0;
+ INT32 fd = m2mb_rtc_open("/dev/rtc0", 0);
+ if (fd != -1) {
+ M2MB_RTC_TIMEVAL_T timeval;
+
+ m2mb_rtc_ioctl(fd, M2MB_RTC_IOCTL_GET_TIMEVAL, &timeval);
+
+ myTime = timeval.sec;
+
+ m2mb_rtc_close(fd);
}
+ return myTime;
+ }
+ #ifdef WOLFSSL_TLS13
+ time_t m2mb_xtime_ms(time_t * timer)
+ {
+ time_t myTime = 0;
+ INT32 fd = m2mb_rtc_open("/dev/rtc0", 0);
+ if (fd != -1) {
+ M2MB_RTC_TIMEVAL_T timeval;
- int UnLockMutex(wolfSSL_Mutex* m)
- {
- os_mut_release (m);
+ m2mb_rtc_ioctl(fd, M2MB_RTC_IOCTL_GET_TIMEVAL, &timeval);
+
+ myTime = timeval.sec + timeval.msec;
+
+ m2mb_rtc_close(fd);
+ }
+ return myTime;
+ }
+ #endif /* WOLFSSL_TLS13 */
+ #ifndef NO_CRYPT_BENCHMARK
+ double m2mb_xtime_bench(int reset)
+ {
+ double myTime = 0;
+ INT32 fd = m2mb_rtc_open("/dev/rtc0", 0);
+ if (fd != -1) {
+ M2MB_RTC_TIMEVAL_T timeval;
+
+ m2mb_rtc_ioctl(fd, M2MB_RTC_IOCTL_GET_TIMEVAL, &timeval);
+
+ myTime = (double)timeval.sec + ((double)timeval.msec / 1000);
+
+ m2mb_rtc_close(fd);
+ }
+ return myTime;
+ }
+ #endif /* !NO_CRYPT_BENCHMARK */
+#endif /* WOLFSSL_TELIT_M2MB */
+
+#endif /* !NO_ASN_TIME */
+
+#ifndef WOLFSSL_LEANPSK
+char* mystrnstr(const char* s1, const char* s2, unsigned int n)
+{
+ unsigned int s2_len = (unsigned int)XSTRLEN(s2);
+
+ if (s2_len == 0)
+ return (char*)s1;
+
+ while (n >= s2_len && s1[0]) {
+ if (s1[0] == s2[0])
+ if (XMEMCMP(s1, s2, s2_len) == 0)
+ return (char*)s1;
+ s1++;
+ n--;
+ }
+
+ return NULL;
+}
+#endif
+
+/* custom memory wrappers */
+#ifdef WOLFSSL_NUCLEUS_1_2
+
+ /* system memory pool */
+ extern NU_MEMORY_POOL System_Memory;
+
+ void* nucleus_malloc(unsigned long size, void* heap, int type)
+ {
+ STATUS status;
+ void* stack_ptr;
+
+ status = NU_Allocate_Memory(&System_Memory, &stack_ptr, size,
+ NU_NO_SUSPEND);
+ if (status == NU_SUCCESS) {
return 0;
+ } else {
+ return stack_ptr;
}
- #endif
- #endif /* USE_WINDOWS_API */
+ }
+
+ void* nucleus_realloc(void* ptr, unsigned long size, void* heap, int type)
+ {
+ DM_HEADER* old_header;
+ word32 old_size, copy_size;
+ void* new_mem;
+
+ /* if ptr is NULL, behave like malloc */
+ new_mem = nucleus_malloc(size, NULL, 0);
+ if (new_mem == 0 || ptr == 0) {
+ return new_mem;
+ }
+
+ /* calculate old memory block size */
+ /* mem pointers stored in block headers (ref dm_defs.h) */
+ old_header = (DM_HEADER*) ((byte*)ptr - DM_OVERHEAD);
+ old_size = (byte*)old_header->dm_next_memory - (byte*)ptr;
+
+ /* copy old to new */
+ if (old_size < size) {
+ copy_size = old_size;
+ } else {
+ copy_size = size;
+ }
+ XMEMCPY(new_mem, ptr, copy_size);
-#endif /* SINGLE_THREADED */
-
-#if defined(WOLFSSL_TI_CRYPT) || defined(WOLFSSL_TI_HASH)
+ /* free old */
+ nucleus_free(ptr, NULL, 0);
+
+ return new_mem;
+ }
+
+ void nucleus_free(void* ptr, void* heap, int type)
+ {
+ if (ptr != NULL)
+ NU_Deallocate_Memory(ptr);
+ }
+
+#endif /* WOLFSSL_NUCLEUS_1_2 */
+
+#if defined(WOLFSSL_TI_CRYPT) || defined(WOLFSSL_TI_HASH)
#include <wolfcrypt/src/port/ti/ti-ccm.c> /* initialize and Mutex for TI Crypt Engine */
#include <wolfcrypt/src/port/ti/ti-hash.c> /* md5, sha1, sha224, sha256 */
#endif
+
+#if defined(WOLFSSL_CRYPTOCELL)
+ #define WOLFSSL_CRYPTOCELL_C
+ #include <wolfcrypt/src/port/arm/cryptoCell.c> /* CC310, RTC and RNG */
+ #if !defined(NO_SHA256)
+ #define WOLFSSL_CRYPTOCELL_HASH_C
+ #include <wolfcrypt/src/port/arm/cryptoCellHash.c> /* sha256 */
+ #endif
+#endif
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wolfcrypt_first.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wolfcrypt_first.c
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wolfcrypt_first.c
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wolfcrypt_last.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wolfcrypt_last.c
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wolfcrypt_last.c
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wolfevent.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wolfevent.c
new file mode 100644
index 000000000..20848cddc
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wolfevent.c
@@ -0,0 +1,283 @@
+/* wolfevent.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+#include <wolfssl/wolfcrypt/settings.h>
+
+
+#ifdef HAVE_WOLF_EVENT
+
+#include <wolfssl/internal.h>
+#include <wolfssl/error-ssl.h>
+#include <wolfssl/wolfcrypt/error-crypt.h>
+
+#include <wolfssl/wolfcrypt/wolfevent.h>
+
+
+int wolfEvent_Init(WOLF_EVENT* event, WOLF_EVENT_TYPE type, void* context)
+{
+ if (event == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ if (event->state == WOLF_EVENT_STATE_PENDING) {
+ WOLFSSL_MSG("Event already pending!");
+ return BAD_COND_E;
+ }
+
+ XMEMSET(event, 0, sizeof(WOLF_EVENT));
+ event->type = type;
+ event->context = context;
+
+ return 0;
+}
+
+int wolfEvent_Poll(WOLF_EVENT* event, WOLF_EVENT_FLAG flags)
+{
+ int ret = BAD_COND_E;
+
+ /* Check hardware */
+#ifdef WOLFSSL_ASYNC_CRYPT
+ if (event->type >= WOLF_EVENT_TYPE_ASYNC_FIRST &&
+ event->type <= WOLF_EVENT_TYPE_ASYNC_LAST)
+ {
+ ret = wolfAsync_EventPoll(event, flags);
+ }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+ return ret;
+}
+
+int wolfEventQueue_Init(WOLF_EVENT_QUEUE* queue)
+{
+ int ret = 0;
+
+ if (queue == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ XMEMSET(queue, 0, sizeof(WOLF_EVENT_QUEUE));
+#ifndef SINGLE_THREADED
+ ret = wc_InitMutex(&queue->lock);
+#endif
+ return ret;
+}
+
+
+int wolfEventQueue_Push(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event)
+{
+ int ret;
+
+ if (queue == NULL || event == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+#ifndef SINGLE_THREADED
+ if ((ret = wc_LockMutex(&queue->lock)) != 0) {
+ return ret;
+ }
+#endif
+
+ ret = wolfEventQueue_Add(queue, event);
+
+#ifndef SINGLE_THREADED
+ wc_UnLockMutex(&queue->lock);
+#endif
+
+ return ret;
+}
+
+int wolfEventQueue_Pop(WOLF_EVENT_QUEUE* queue, WOLF_EVENT** event)
+{
+ int ret = 0;
+
+ if (queue == NULL || event == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+#ifndef SINGLE_THREADED
+ /* In single threaded mode "event_queue.lock" doesn't exist */
+ if ((ret = wc_LockMutex(&queue->lock)) != 0) {
+ return ret;
+ }
+#endif
+
+ /* Pop first item off queue */
+ *event = queue->head;
+ ret = wolfEventQueue_Remove(queue, *event);
+
+#ifndef SINGLE_THREADED
+ wc_UnLockMutex(&queue->lock);
+#endif
+
+ return ret;
+}
+
+/* assumes queue is locked by caller */
+int wolfEventQueue_Add(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event)
+{
+ if (queue == NULL || event == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ event->next = NULL; /* added to end */
+ event->prev = NULL;
+ if (queue->tail == NULL) {
+ queue->head = event;
+ }
+ else {
+ queue->tail->next = event;
+ event->prev = queue->tail;
+ }
+ queue->tail = event; /* add to the end either way */
+ queue->count++;
+
+ return 0;
+}
+
+/* assumes queue is locked by caller */
+int wolfEventQueue_Remove(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event)
+{
+ int ret = 0;
+
+ if (queue == NULL || event == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+ if (event == queue->head && event == queue->tail) {
+ queue->head = NULL;
+ queue->tail = NULL;
+ }
+ else if (event == queue->head) {
+ queue->head = event->next;
+ queue->head->prev = NULL;
+ }
+ else if (event == queue->tail) {
+ queue->tail = event->prev;
+ queue->tail->next = NULL;
+ }
+ else {
+ WOLF_EVENT* next = event->next;
+ WOLF_EVENT* prev = event->prev;
+ next->prev = prev;
+ prev->next = next;
+ }
+ queue->count--;
+
+ return ret;
+}
+
+int wolfEventQueue_Poll(WOLF_EVENT_QUEUE* queue, void* context_filter,
+ WOLF_EVENT** events, int maxEvents, WOLF_EVENT_FLAG flags, int* eventCount)
+{
+ WOLF_EVENT* event;
+ int ret = 0, count = 0;
+
+ if (queue == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+#ifndef SINGLE_THREADED
+ /* In single threaded mode "event_queue.lock" doesn't exist */
+ if ((ret = wc_LockMutex(&queue->lock)) != 0) {
+ return ret;
+ }
+#endif
+
+ /* itterate event queue */
+ for (event = queue->head; event != NULL; event = event->next)
+ {
+ /* optional filter based on context */
+ if (context_filter == NULL || event->context == context_filter) {
+
+ /* poll event */
+ ret = wolfEvent_Poll(event, flags);
+ if (ret < 0) break; /* exit for */
+
+ /* If event is done then process */
+ if (event->state == WOLF_EVENT_STATE_DONE) {
+ /* remove from queue */
+ ret = wolfEventQueue_Remove(queue, event);
+ if (ret < 0) break; /* exit for */
+
+ /* return pointer in 'events' arg */
+ if (events) {
+ events[count] = event; /* return pointer */
+ }
+ count++;
+
+ /* check to make sure our event list isn't full */
+ if (events && count >= maxEvents) {
+ break; /* exit for */
+ }
+ }
+ }
+ }
+
+#ifndef SINGLE_THREADED
+ wc_UnLockMutex(&queue->lock);
+#endif
+
+ /* return number of properly populated events */
+ if (eventCount) {
+ *eventCount = count;
+ }
+
+ return ret;
+}
+
+int wolfEventQueue_Count(WOLF_EVENT_QUEUE* queue)
+{
+ int ret;
+
+ if (queue == NULL) {
+ return BAD_FUNC_ARG;
+ }
+
+#ifndef SINGLE_THREADED
+ /* In single threaded mode "event_queue.lock" doesn't exist */
+ if ((ret = wc_LockMutex(&queue->lock)) != 0) {
+ return ret;
+ }
+#endif
+
+ ret = queue->count;
+
+#ifndef SINGLE_THREADED
+ wc_UnLockMutex(&queue->lock);
+#endif
+
+ return ret;
+}
+
+void wolfEventQueue_Free(WOLF_EVENT_QUEUE* queue)
+{
+ if (queue) {
+ #ifndef SINGLE_THREADED
+ wc_FreeMutex(&queue->lock);
+ #endif
+ }
+}
+
+#endif /* HAVE_WOLF_EVENT */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wolfmath.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wolfmath.c
new file mode 100644
index 000000000..0c17a0a27
--- /dev/null
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/wolfmath.c
@@ -0,0 +1,381 @@
+/* wolfmath.c
+ *
+ * Copyright (C) 2006-2020 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+
+/* common functions for either math library */
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+
+/* in case user set USE_FAST_MATH there */
+#include <wolfssl/wolfcrypt/settings.h>
+
+#include <wolfssl/wolfcrypt/integer.h>
+
+#include <wolfssl/wolfcrypt/error-crypt.h>
+#include <wolfssl/wolfcrypt/logging.h>
+
+#if defined(USE_FAST_MATH) || !defined(NO_BIG_INT)
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+ #include <wolfssl/wolfcrypt/async.h>
+#endif
+
+#ifdef NO_INLINE
+ #include <wolfssl/wolfcrypt/misc.h>
+#else
+ #define WOLFSSL_MISC_INCLUDED
+ #include <wolfcrypt/src/misc.c>
+#endif
+
+
+#if !defined(WC_NO_CACHE_RESISTANT) && \
+ ((defined(HAVE_ECC) && defined(ECC_TIMING_RESISTANT)) || \
+ (defined(USE_FAST_MATH) && defined(TFM_TIMING_RESISTANT)))
+
+ /* all off / all on pointer addresses for constant calculations */
+ /* ecc.c uses same table */
+ const wolfssl_word wc_off_on_addr[2] =
+ {
+ #if defined(WC_64BIT_CPU)
+ W64LIT(0x0000000000000000),
+ W64LIT(0xffffffffffffffff)
+ #elif defined(WC_16BIT_CPU)
+ 0x0000U,
+ 0xffffU
+ #else
+ /* 32 bit */
+ 0x00000000U,
+ 0xffffffffU
+ #endif
+ };
+#endif
+
+
+#if !defined(WOLFSSL_SP_MATH)
+int get_digit_count(mp_int* a)
+{
+ if (a == NULL)
+ return 0;
+
+ return a->used;
+}
+#endif
+
+mp_digit get_digit(mp_int* a, int n)
+{
+ if (a == NULL)
+ return 0;
+
+ return (n >= a->used || n < 0) ? 0 : a->dp[n];
+}
+
+/* Conditionally copy a into b. Performed in constant time.
+ *
+ * a MP integer to copy.
+ * copy On 1, copy a into b. on 0 leave b unchanged.
+ * b MP integer to copy into.
+ * returns BAD_FUNC_ARG when a or b is NULL, MEMORY_E when growing b fails and
+ * MP_OKAY otherwise.
+ */
+int mp_cond_copy(mp_int* a, int copy, mp_int* b)
+{
+ int err = MP_OKAY;
+ int i;
+ mp_digit mask = (mp_digit)0 - copy;
+
+ if (a == NULL || b == NULL)
+ err = BAD_FUNC_ARG;
+
+ /* Ensure b has enough space to copy a into */
+ if (err == MP_OKAY)
+ err = mp_grow(b, a->used + 1);
+ if (err == MP_OKAY) {
+ /* When mask 0, b is unchanged2
+ * When mask all set, b ^ b ^ a = a
+ */
+ /* Conditionaly copy all digits and then number of used diigits.
+ * get_digit() returns 0 when index greater than available digit.
+ */
+ for (i = 0; i < a->used; i++) {
+ b->dp[i] ^= (get_digit(a, i) ^ get_digit(b, i)) & mask;
+ }
+ for (; i < b->used; i++) {
+ b->dp[i] ^= (get_digit(a, i) ^ get_digit(b, i)) & mask;
+ }
+ b->used ^= (a->used ^ b->used) & (int)mask;
+ }
+
+ return err;
+}
+
+#ifndef WC_NO_RNG
+int get_rand_digit(WC_RNG* rng, mp_digit* d)
+{
+ return wc_RNG_GenerateBlock(rng, (byte*)d, sizeof(mp_digit));
+}
+
+#ifdef WC_RSA_BLINDING
+int mp_rand(mp_int* a, int digits, WC_RNG* rng)
+{
+ int ret = 0;
+ int cnt = digits * sizeof(mp_digit);
+#if !defined(USE_FAST_MATH) && !defined(WOLFSSL_SP_MATH)
+ int i;
+#endif
+
+ if (rng == NULL) {
+ ret = MISSING_RNG_E;
+ }
+ else if (a == NULL) {
+ ret = BAD_FUNC_ARG;
+ }
+
+#if !defined(USE_FAST_MATH) && !defined(WOLFSSL_SP_MATH)
+ /* allocate space for digits */
+ if (ret == MP_OKAY) {
+ ret = mp_set_bit(a, digits * DIGIT_BIT - 1);
+ }
+#else
+#if defined(WOLFSSL_SP_MATH)
+ if ((ret == MP_OKAY) && (digits > SP_INT_DIGITS))
+#else
+ if ((ret == MP_OKAY) && (digits > FP_SIZE))
+#endif
+ {
+ ret = BAD_FUNC_ARG;
+ }
+ if (ret == MP_OKAY) {
+ a->used = digits;
+ }
+#endif
+ /* fill the data with random bytes */
+ if (ret == MP_OKAY) {
+ ret = wc_RNG_GenerateBlock(rng, (byte*)a->dp, cnt);
+ }
+ if (ret == MP_OKAY) {
+#if !defined(USE_FAST_MATH) && !defined(WOLFSSL_SP_MATH)
+ /* Mask down each digit to only bits used */
+ for (i = 0; i < a->used; i++) {
+ a->dp[i] &= MP_MASK;
+ }
+#endif
+ /* ensure top digit is not zero */
+ while ((ret == MP_OKAY) && (a->dp[a->used - 1] == 0)) {
+ ret = get_rand_digit(rng, &a->dp[a->used - 1]);
+#if !defined(USE_FAST_MATH) && !defined(WOLFSSL_SP_MATH)
+ a->dp[a->used - 1] &= MP_MASK;
+#endif
+ }
+ }
+
+ return ret;
+}
+#endif /* WC_RSA_BLINDING */
+#endif
+
+/* export an mp_int as unsigned char or hex string
+ * encType is WC_TYPE_UNSIGNED_BIN or WC_TYPE_HEX_STR
+ * return MP_OKAY on success */
+int wc_export_int(mp_int* mp, byte* buf, word32* len, word32 keySz,
+ int encType)
+{
+ int err;
+
+ if (mp == NULL)
+ return BAD_FUNC_ARG;
+
+ /* check buffer size */
+ if (*len < keySz) {
+ *len = keySz;
+ return BUFFER_E;
+ }
+
+ *len = keySz;
+ XMEMSET(buf, 0, *len);
+
+ if (encType == WC_TYPE_HEX_STR) {
+ #ifdef WC_MP_TO_RADIX
+ err = mp_tohex(mp, (char*)buf);
+ #else
+ err = NOT_COMPILED_IN;
+ #endif
+ }
+ else {
+ err = mp_to_unsigned_bin(mp, buf + (keySz - mp_unsigned_bin_size(mp)));
+ }
+
+ return err;
+}
+
+
+#ifdef HAVE_WOLF_BIGINT
+void wc_bigint_init(WC_BIGINT* a)
+{
+ if (a != NULL) {
+ a->buf = NULL;
+ a->len = 0;
+ a->heap = NULL;
+ }
+}
+
+int wc_bigint_alloc(WC_BIGINT* a, word32 sz)
+{
+ int err = MP_OKAY;
+
+ if (a == NULL)
+ return BAD_FUNC_ARG;
+
+ if (sz > 0) {
+ if (a->buf && sz > a->len) {
+ wc_bigint_free(a);
+ }
+ if (a->buf == NULL) {
+ a->buf = (byte*)XMALLOC(sz, a->heap, DYNAMIC_TYPE_WOLF_BIGINT);
+ if (a->buf == NULL) {
+ err = MP_MEM;
+ }
+ }
+ else {
+ XMEMSET(a->buf, 0, sz);
+ }
+ }
+ a->len = sz;
+
+ return err;
+}
+
+/* assumes input is big endian format */
+int wc_bigint_from_unsigned_bin(WC_BIGINT* a, const byte* in, word32 inlen)
+{
+ int err;
+
+ if (a == NULL || in == NULL || inlen == 0)
+ return BAD_FUNC_ARG;
+
+ err = wc_bigint_alloc(a, inlen);
+ if (err == 0) {
+ XMEMCPY(a->buf, in, inlen);
+ }
+
+ return err;
+}
+
+int wc_bigint_to_unsigned_bin(WC_BIGINT* a, byte* out, word32* outlen)
+{
+ word32 sz;
+
+ if (a == NULL || out == NULL || outlen == NULL || *outlen == 0)
+ return BAD_FUNC_ARG;
+
+ /* trim to fit into output buffer */
+ sz = a->len;
+ if (a->len > *outlen) {
+ WOLFSSL_MSG("wc_bigint_export: Truncating output");
+ sz = *outlen;
+ }
+
+ if (a->buf) {
+ XMEMCPY(out, a->buf, sz);
+ }
+
+ *outlen = sz;
+
+ return MP_OKAY;
+}
+
+void wc_bigint_zero(WC_BIGINT* a)
+{
+ if (a && a->buf) {
+ ForceZero(a->buf, a->len);
+ }
+}
+
+void wc_bigint_free(WC_BIGINT* a)
+{
+ if (a) {
+ if (a->buf) {
+ XFREE(a->buf, a->heap, DYNAMIC_TYPE_WOLF_BIGINT);
+ }
+ a->buf = NULL;
+ a->len = 0;
+ }
+}
+
+/* sz: make sure the buffer is at least that size and zero padded.
+ * A `sz == 0` will use the size of `src`.
+ * The calulcates sz is stored into dst->len in `wc_bigint_alloc`.
+ */
+int wc_mp_to_bigint_sz(mp_int* src, WC_BIGINT* dst, word32 sz)
+{
+ int err;
+ word32 x, y;
+
+ if (src == NULL || dst == NULL)
+ return BAD_FUNC_ARG;
+
+ /* get size of source */
+ x = mp_unsigned_bin_size(src);
+ if (sz < x)
+ sz = x;
+
+ /* make sure destination is allocated and large enough */
+ err = wc_bigint_alloc(dst, sz);
+ if (err == MP_OKAY) {
+
+ /* leading zero pad */
+ y = sz - x;
+ XMEMSET(dst->buf, 0, y);
+
+ /* export src as unsigned bin to destination buf */
+ err = mp_to_unsigned_bin(src, dst->buf + y);
+ }
+
+ return err;
+}
+
+int wc_mp_to_bigint(mp_int* src, WC_BIGINT* dst)
+{
+ if (src == NULL || dst == NULL)
+ return BAD_FUNC_ARG;
+
+ return wc_mp_to_bigint_sz(src, dst, 0);
+}
+
+int wc_bigint_to_mp(WC_BIGINT* src, mp_int* dst)
+{
+ int err;
+
+ if (src == NULL || dst == NULL)
+ return BAD_FUNC_ARG;
+
+ if (src->buf == NULL)
+ return BAD_FUNC_ARG;
+
+ err = mp_read_unsigned_bin(dst, src->buf, src->len);
+ wc_bigint_free(src);
+
+ return err;
+}
+#endif /* HAVE_WOLF_BIGINT */
+
+#endif /* USE_FAST_MATH || !NO_BIG_INT */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/include.am b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/include.am
index 950d7c601..b2fc302c1 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/include.am
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/include.am
@@ -1,11 +1,18 @@
# vim:ft=automake
# All paths should be given relative to the root
+if BUILD_WOLFCRYPT_TESTS
+noinst_PROGRAMS+= wolfcrypt/test/testwolfcrypt
+if BUILD_CRYPTONLY
+check_PROGRAMS+= wolfcrypt/test/testwolfcrypt
+endif
noinst_PROGRAMS+= wolfcrypt/test/testwolfcrypt
wolfcrypt_test_testwolfcrypt_SOURCES = wolfcrypt/test/test.c
-wolfcrypt_test_testwolfcrypt_LDADD = src/libwolfssl.la
+wolfcrypt_test_testwolfcrypt_LDADD = src/libwolfssl.la $(LIB_STATIC_ADD)
wolfcrypt_test_testwolfcrypt_DEPENDENCIES = src/libwolfssl.la
noinst_HEADERS += wolfcrypt/test/test.h
+endif
EXTRA_DIST += wolfcrypt/test/test.sln
EXTRA_DIST += wolfcrypt/test/test.vcproj
+EXTRA_DIST += wolfcrypt/test/README.md
DISTCLEANFILES+= wolfcrypt/test/.libs/testwolfcrypt
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/test.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/test.c
index cf5dbe56e..399a29b75 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/test.c
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/test.c
@@ -1,8 +1,8 @@
/* test.c
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,22 +16,111 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
+#ifndef WOLFSSL_USER_SETTINGS
+ #include <wolfssl/options.h>
+#endif
#include <wolfssl/wolfcrypt/settings.h>
+#include <wolfssl/version.h>
+#include <wolfssl/wolfcrypt/wc_port.h>
+
+#ifndef NO_CRYPT_TEST
-#ifdef XMALLOC_USER
- #include <stdlib.h> /* we're using malloc / free direct here */
+/* only for stack size check */
+#ifdef HAVE_STACK_SIZE
+ #include <wolfssl/ssl.h>
+ #define err_sys err_sys_remap /* remap err_sys */
+ #include <wolfssl/test.h>
+ #undef err_sys
#endif
-#ifndef NO_CRYPT_TEST
+#ifdef USE_FLAT_TEST_H
+ #include "test.h"
+#else
+ #include "wolfcrypt/test/test.h"
+#endif
-#ifdef WOLFSSL_TEST_CERT
+/* printf mappings */
+#if defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
+ #include <mqx.h>
+ #include <stdlib.h>
+ /* see wc_port.h for fio.h and nio.h includes */
+#elif defined(FREESCALE_KSDK_BM)
+ #include "fsl_debug_console.h"
+ #undef printf
+ #define printf PRINTF
+#elif defined(WOLFSSL_APACHE_MYNEWT)
+ #include <assert.h>
+ #include <string.h>
+ #include "sysinit/sysinit.h"
+ #include "os/os.h"
+ #ifdef ARCH_sim
+ #include "mcu/mcu_sim.h"
+ #endif
+ #include "os/os_time.h"
+#elif defined(WOLFSSL_ESPIDF)
+ #include <time.h>
+ #include <sys/time.h>
+#elif defined(WOLFSSL_ZEPHYR)
+ #include <stdio.h>
+
+ #define printf printk
+#elif defined(MICRIUM)
+ #include <bsp_ser.h>
+ void BSP_Ser_Printf (CPU_CHAR* format, ...);
+ #undef printf
+ #define printf BSP_Ser_Printf
+#elif defined(WOLFSSL_PB)
+ #include <stdarg.h>
+ int wolfssl_pb_print(const char*, ...);
+ #undef printf
+ #define printf wolfssl_pb_print
+#elif defined(WOLFSSL_TELIT_M2MB)
+ #include "wolfssl/wolfcrypt/wc_port.h" /* for m2mb headers */
+ #include "m2m_log.h" /* for M2M_LOG_INFO - not standard API */
+ /* remap printf */
+ #undef printf
+ #define printf M2M_LOG_INFO
+ /* OS requires occasional sleep() */
+ #ifndef TEST_SLEEP_MS
+ #define TEST_SLEEP_MS 50
+ #endif
+ #define TEST_SLEEP() m2mb_os_taskSleep(M2MB_OS_MS2TICKS(TEST_SLEEP_MS))
+ /* don't use file system for these tests, since ./certs dir isn't loaded */
+ #undef NO_FILESYSTEM
+ #define NO_FILESYSTEM
+#elif defined(THREADX) && !defined(WOLFSSL_WICED) && !defined(THREADX_NO_DC_PRINTF)
+ /* since just testing, use THREADX log printf instead */
+ int dc_log_printf(char*, ...);
+ #undef printf
+ #define printf dc_log_printf
+#else
+ #ifdef XMALLOC_USER
+ #include <stdlib.h> /* we're using malloc / free direct here */
+ #endif
+ #ifndef STRING_USER
+ #include <stdio.h>
+ #endif
+
+ /* enable way for customer to override test/bench printf */
+ #ifdef XPRINTF
+ #undef printf
+ #define printf XPRINTF
+ #endif
+#endif
+
+#include <wolfssl/wolfcrypt/memory.h>
+#include <wolfssl/wolfcrypt/wc_port.h>
+#include <wolfssl/wolfcrypt/logging.h>
+#include <wolfssl/wolfcrypt/types.h>
+#if defined(WOLFSSL_TEST_CERT) || defined(ASN_BER_TO_DER)
#include <wolfssl/wolfcrypt/asn.h>
#else
#include <wolfssl/wolfcrypt/asn_public.h>
@@ -43,16 +132,25 @@
#include <wolfssl/wolfcrypt/sha256.h>
#include <wolfssl/wolfcrypt/sha512.h>
#include <wolfssl/wolfcrypt/arc4.h>
-#include <wolfssl/wolfcrypt/random.h>
+#if defined(WC_NO_RNG)
+ #include <wolfssl/wolfcrypt/integer.h>
+#else
+ #include <wolfssl/wolfcrypt/random.h>
+#endif
#include <wolfssl/wolfcrypt/coding.h>
+#include <wolfssl/wolfcrypt/signature.h>
#include <wolfssl/wolfcrypt/rsa.h>
#include <wolfssl/wolfcrypt/des3.h>
#include <wolfssl/wolfcrypt/aes.h>
+#include <wolfssl/wolfcrypt/wc_encrypt.h>
+#include <wolfssl/wolfcrypt/cmac.h>
#include <wolfssl/wolfcrypt/poly1305.h>
#include <wolfssl/wolfcrypt/camellia.h>
#include <wolfssl/wolfcrypt/hmac.h>
#include <wolfssl/wolfcrypt/dh.h>
#include <wolfssl/wolfcrypt/dsa.h>
+#include <wolfssl/wolfcrypt/srp.h>
+#include <wolfssl/wolfcrypt/idea.h>
#include <wolfssl/wolfcrypt/hc128.h>
#include <wolfssl/wolfcrypt/rabbit.h>
#include <wolfssl/wolfcrypt/chacha.h>
@@ -69,9 +167,18 @@
#ifdef HAVE_ED25519
#include <wolfssl/wolfcrypt/ed25519.h>
#endif
-#ifdef HAVE_BLAKE2
+#ifdef HAVE_CURVE448
+ #include <wolfssl/wolfcrypt/curve448.h>
+#endif
+#ifdef HAVE_ED448
+ #include <wolfssl/wolfcrypt/ed448.h>
+#endif
+#if defined(HAVE_BLAKE2) || defined(HAVE_BLAKE2S)
#include <wolfssl/wolfcrypt/blake2.h>
#endif
+#ifdef WOLFSSL_SHA3
+ #include <wolfssl/wolfcrypt/sha3.h>
+#endif
#ifdef HAVE_LIBZ
#include <wolfssl/wolfcrypt/compress.h>
#endif
@@ -81,6 +188,27 @@
#ifdef HAVE_FIPS
#include <wolfssl/wolfcrypt/fips_test.h>
#endif
+#ifdef HAVE_SELFTEST
+ #include <wolfssl/wolfcrypt/selftest.h>
+#endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+ #include <wolfssl/wolfcrypt/async.h>
+#endif
+#if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
+ #include <wolfssl/wolfcrypt/logging.h>
+#endif
+#ifdef WOLFSSL_IMX6_CAAM_BLOB
+ #include <wolfssl/wolfcrypt/port/caam/wolfcaam.h>
+#endif
+#ifdef WOLF_CRYPTO_CB
+ #include <wolfssl/wolfcrypt/cryptocb.h>
+ #ifdef HAVE_INTEL_QA_SYNC
+ #include <wolfssl/wolfcrypt/port/intel/quickassist_sync.h>
+ #endif
+ #ifdef HAVE_CAVIUM_OCTEON_SYNC
+ #include <wolfssl/wolfcrypt/port/cavium/cavium_octeon_sync.h>
+ #endif
+#endif
#ifdef _MSC_VER
/* 4996 warning to use MS extensions e.g., strcpy_s instead of strncpy */
@@ -88,59 +216,57 @@
#endif
#ifdef OPENSSL_EXTRA
+ #ifndef WOLFCRYPT_ONLY
#include <wolfssl/openssl/evp.h>
+ #endif
#include <wolfssl/openssl/rand.h>
#include <wolfssl/openssl/hmac.h>
+ #include <wolfssl/openssl/aes.h>
#include <wolfssl/openssl/des.h>
#endif
-
-#if defined(USE_CERT_BUFFERS_1024) || defined(USE_CERT_BUFFERS_2048) \
- || !defined(NO_DH)
- /* include test cert and key buffers for use with NO_FILESYSTEM */
- #if defined(WOLFSSL_MDK_ARM)
- #include "cert_data.h"
- /* use certs_test.c for initial data, so other
- commands can share the data. */
- #else
- #include <wolfssl/certs_test.h>
+#if defined(NO_FILESYSTEM)
+ #if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048) && \
+ !defined(USE_CERT_BUFFERS_3072) && !defined(USE_CERT_BUFFERS_4096)
+ #define USE_CERT_BUFFERS_2048
+ #endif
+ #if !defined(USE_CERT_BUFFERS_256)
+ #define USE_CERT_BUFFERS_256
#endif
#endif
-#if defined(WOLFSSL_MDK_ARM)
- #include <stdio.h>
- #include <stdlib.h>
- extern FILE * wolfSSL_fopen(const char *fname, const char *mode) ;
- #define fopen wolfSSL_fopen
+#if defined(WOLFSSL_CERT_GEN) && (defined(HAVE_ECC384) || defined(HAVE_ALL_CURVES))
+ #define ENABLE_ECC384_CERT_GEN_TEST
#endif
+#include <wolfssl/certs_test.h>
+
#ifdef HAVE_NTRU
- #include "ntru_crypto.h"
-#endif
-#ifdef HAVE_CAVIUM
- #include "cavium_sysdep.h"
- #include "cavium_common.h"
- #include "cavium_ioctl.h"
+ #include "libntruencrypt/ntru_crypto.h"
#endif
-#ifdef FREESCALE_MQX
- #include <mqx.h>
- #include <fio.h>
- #include <stdlib.h>
+#ifdef WOLFSSL_STATIC_MEMORY
+ static WOLFSSL_HEAP_HINT* HEAP_HINT;
#else
- #include <stdio.h>
-#endif
+ #define HEAP_HINT NULL
+#endif /* WOLFSSL_STATIC_MEMORY */
+/* these cases do not have intermediate hashing support */
+#if (defined(WOLFSSL_AFALG_XILINX_SHA3) && !defined(WOLFSSL_AFALG_HASH_KEEP)) \
+ && !defined(WOLFSSL_XILINX_CRYPT)
+ #define NO_INTM_HASH_TEST
+#endif
-#ifdef THREADX
- /* since just testing, use THREADX log printf instead */
- int dc_log_printf(char*, ...);
- #undef printf
- #define printf dc_log_printf
+#if defined(WOLFSSL_CERT_GEN) && defined(WOLFSSL_MULTI_ATTRIB)
+static void initDefaultName(void);
#endif
-#include "wolfcrypt/test/test.h"
+/* for async devices */
+static int devId = INVALID_DEVID;
+#ifdef HAVE_WNR
+ const char* wnrConfigFile = "wnr-example.conf";
+#endif
typedef struct testVector {
const char* input;
@@ -149,20 +275,30 @@ typedef struct testVector {
size_t outLen;
} testVector;
+int error_test(void);
+int base64_test(void);
+int base16_test(void);
+int asn_test(void);
int md2_test(void);
int md5_test(void);
int md4_test(void);
int sha_test(void);
+int sha224_test(void);
int sha256_test(void);
int sha512_test(void);
int sha384_test(void);
+int sha3_test(void);
+int shake256_test(void);
+int hash_test(void);
int hmac_md5_test(void);
int hmac_sha_test(void);
+int hmac_sha224_test(void);
int hmac_sha256_test(void);
int hmac_sha384_test(void);
int hmac_sha512_test(void);
-int hmac_blake2b_test(void);
+int hmac_sha3_test(void);
int hkdf_test(void);
+int x963kdf_test(void);
int arc4_test(void);
int hc128_test(void);
int rabbit_test(void);
@@ -171,26 +307,49 @@ int chacha20_poly1305_aead_test(void);
int des_test(void);
int des3_test(void);
int aes_test(void);
+int aes192_test(void);
+int aes256_test(void);
+int aesofb_test(void);
+int cmac_test(void);
int poly1305_test(void);
int aesgcm_test(void);
+int aesgcm_default_test(void);
int gmac_test(void);
int aesccm_test(void);
+int aeskeywrap_test(void);
int camellia_test(void);
+int rsa_no_pad_test(void);
int rsa_test(void);
int dh_test(void);
int dsa_test(void);
+int srp_test(void);
+#ifndef WC_NO_RNG
int random_test(void);
+#endif /* WC_NO_RNG */
int pwdbased_test(void);
int ripemd_test(void);
+#if defined(OPENSSL_EXTRA) && !defined(WOLFCRYPT_ONLY)
int openssl_test(void); /* test mini api */
+
+int openssl_pkey_test(void);
+int openssl_pkey0_test(void);
+int openssl_pkey1_test(void);
+int openSSL_evpMD_test(void);
+int openssl_evpSig_test(void);
+#endif
+
int pbkdf1_test(void);
int pkcs12_test(void);
int pbkdf2_test(void);
+int scrypt_test(void);
#ifdef HAVE_ECC
int ecc_test(void);
#ifdef HAVE_ECC_ENCRYPT
int ecc_encrypt_test(void);
#endif
+ #ifdef USE_CERT_BUFFERS_256
+ int ecc_test_buffers(void);
+ #endif
#endif
#ifdef HAVE_CURVE25519
int curve25519_test(void);
@@ -198,36 +357,104 @@ int pbkdf2_test(void);
#ifdef HAVE_ED25519
int ed25519_test(void);
#endif
+#ifdef HAVE_CURVE448
+ int curve448_test(void);
+#endif
+#ifdef HAVE_ED448
+ int ed448_test(void);
+#endif
#ifdef HAVE_BLAKE2
int blake2b_test(void);
#endif
+#ifdef HAVE_BLAKE2S
+ int blake2s_test(void);
+#endif
#ifdef HAVE_LIBZ
int compress_test(void);
#endif
#ifdef HAVE_PKCS7
- int pkcs7enveloped_test(void);
+ #ifndef NO_PKCS7_ENCRYPTED_DATA
+ int pkcs7encrypted_test(void);
+ #endif
+ #if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA)
+ int pkcs7compressed_test(void);
+ #endif
int pkcs7signed_test(void);
+ int pkcs7enveloped_test(void);
+ #if defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
+ int pkcs7authenveloped_test(void);
+ #endif
+ #ifndef NO_AES
+ int pkcs7callback_test(byte* cert, word32 certSz, byte* key,
+ word32 keySz);
+ #endif
+#endif
+#if !defined(NO_ASN_TIME) && !defined(NO_RSA) && defined(WOLFSSL_TEST_CERT) && \
+ !defined(NO_FILESYSTEM)
+int cert_test(void);
+#endif
+#if defined(WOLFSSL_CERT_EXT) && defined(WOLFSSL_TEST_CERT) && \
+ !defined(NO_FILESYSTEM)
+int certext_test(void);
+#endif
+#if defined(WOLFSSL_CERT_GEN_CACHE) && defined(WOLFSSL_TEST_CERT) && \
+ defined(WOLFSSL_CERT_EXT) && defined(WOLFSSL_CERT_GEN)
+int decodedCertCache_test(void);
+#endif
+#ifdef HAVE_IDEA
+int idea_test(void);
+#endif
+int memory_test(void);
+#ifdef HAVE_VALGRIND
+int mp_test(void);
+#endif
+#if defined(WOLFSSL_PUBLIC_MP) && defined(WOLFSSL_KEY_GEN)
+int prime_test(void);
+#endif
+#ifdef ASN_BER_TO_DER
+int berder_test(void);
+#endif
+int logging_test(void);
+int mutex_test(void);
+#if defined(USE_WOLFSSL_MEMORY) && !defined(FREERTOS)
+int memcb_test(void);
+#endif
+#ifdef WOLFSSL_IMX6_CAAM_BLOB
+int blob_test(void);
#endif
+#ifdef WOLF_CRYPTO_CB
+int cryptocb_test(void);
+#endif
+#ifdef WOLFSSL_CERT_PIV
+int certpiv_test(void);
+#endif
-/* General big buffer size for many tests. */
+/* General big buffer size for many tests. */
#define FOURK_BUF 4096
-static int err_sys(const char* msg, int es)
+#define ERROR_OUT(err, eLabel) { ret = (err); goto eLabel; }
+#ifdef HAVE_STACK_SIZE
+static THREAD_RETURN err_sys(const char* msg, int es)
+#else
+static int err_sys(const char* msg, int es)
+#endif
{
printf("%s error = %d\n", msg, es);
- return -1; /* error state */
+
+ EXIT_TEST(-1);
}
-/* func_args from test.h, so don't have to pull in other junk */
+#ifndef HAVE_STACK_SIZE
+/* func_args from test.h, so don't have to pull in other stuff */
typedef struct func_args {
int argc;
char** argv;
int return_code;
} func_args;
-
+#endif /* !HAVE_STACK_SIZE */
#ifdef HAVE_FIPS
@@ -245,90 +472,268 @@ static void myFipsCb(int ok, int err, const char* hash)
#endif /* HAVE_FIPS */
+#ifdef WOLFSSL_STATIC_MEMORY
+ #ifdef BENCH_EMBEDDED
+ static byte gTestMemory[14000];
+ #elif defined(WOLFSSL_CERT_EXT)
+ static byte gTestMemory[140000];
+ #elif defined(USE_FAST_MATH) && !defined(ALT_ECC_SIZE)
+ static byte gTestMemory[160000];
+ #else
+ static byte gTestMemory[80000];
+ #endif
+#endif
+
+#ifdef WOLFSSL_PB
+int wolfssl_pb_print(const char* msg, ...)
+{
+ int ret;
+ va_list args;
+ char tmpBuf[80];
+
+ va_start(args, msg);
+ ret = vsprint(tmpBuf, msg, args);
+ va_end(args);
+
+ fnDumpStringToSystemLog(tmpBuf);
+ return ret;
+}
+#endif /* WOLFSSL_PB */
+
+/* optional macro to add sleep between tests */
+#ifdef TEST_SLEEP
+ #include <stdarg.h> /* for var args */
+ static WC_INLINE void test_pass(const char* fmt, ...)
+ {
+ va_list args;
+ va_start(args, fmt);
+ printf(fmt, args);
+ va_end(args);
+ TEST_SLEEP();
+ }
+#else
+ /* redirect to printf */
+ #define test_pass printf
+ /* stub the sleep macro */
+ #define TEST_SLEEP()
+#endif
+
+#ifdef HAVE_STACK_SIZE
+THREAD_RETURN WOLFSSL_THREAD wolfcrypt_test(void* args)
+#else
int wolfcrypt_test(void* args)
+#endif
{
- int ret = 0;
+ int ret;
+
+ printf("------------------------------------------------------------------------------\n");
+ printf(" wolfSSL version %s\n", LIBWOLFSSL_VERSION_STRING);
+ printf("------------------------------------------------------------------------------\n");
+
+ if (args)
+ ((func_args*)args)->return_code = -1; /* error state */
+
+#ifdef WOLFSSL_STATIC_MEMORY
+ if (wc_LoadStaticMemory(&HEAP_HINT, gTestMemory, sizeof(gTestMemory),
+ WOLFMEM_GENERAL, 1) != 0) {
+ printf("unable to load static memory");
+ return(EXIT_FAILURE);
+ }
+#endif
+
+#if defined(DEBUG_WOLFSSL) && !defined(HAVE_VALGRIND)
+ wolfSSL_Debugging_ON();
+#endif
- ((func_args*)args)->return_code = -1; /* error state */
+#if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
+ wc_SetLoggingHeap(HEAP_HINT);
+#endif
#ifdef HAVE_FIPS
wolfCrypt_SetCb_fips(myFipsCb);
#endif
#if !defined(NO_BIG_INT)
- if (CheckCtcSettings() != 1)
- return err_sys("Build vs runtime math mismatch\n", -1234);
+ if (CheckCtcSettings() != 1) {
+ printf("Sizeof mismatch (build) %x != (run) %x\n",
+ CTC_SETTINGS, CheckRunTimeSettings());
+ return err_sys("Build vs runtime math mismatch\n", -1000);
+ }
-#ifdef USE_FAST_MATH
+#if defined(USE_FAST_MATH) && \
+ (!defined(NO_RSA) || !defined(NO_DH) || defined(HAVE_ECC))
if (CheckFastMathSettings() != 1)
return err_sys("Build vs runtime fastmath FP_MAX_BITS mismatch\n",
- -1235);
+ -1001);
#endif /* USE_FAST_MATH */
#endif /* !NO_BIG_INT */
+#if defined(WOLFSSL_CERT_GEN) && defined(WOLFSSL_MULTI_ATTRIB)
+initDefaultName();
+#endif
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+ ret = wolfAsync_DevOpen(&devId);
+ if (ret < 0) {
+ printf("Async device open failed\nRunning without async\n");
+ }
+#else
+ (void)devId;
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+#ifdef WOLF_CRYPTO_CB
+#ifdef HAVE_INTEL_QA_SYNC
+ devId = wc_CryptoCb_InitIntelQa();
+ if (INVALID_DEVID == devId) {
+ printf("Couldn't init the Intel QA\n");
+ }
+#endif
+#ifdef HAVE_CAVIUM_OCTEON_SYNC
+ devId = wc_CryptoCb_InitOcteon();
+ if (INVALID_DEVID == devId) {
+ printf("Couldn't init the Cavium Octeon\n");
+ }
+#endif
+#endif
+
+#ifdef HAVE_SELFTEST
+ if ( (ret = wolfCrypt_SelfTest()) != 0)
+ return err_sys("CAVP selftest failed!\n", ret);
+ else
+ test_pass("CAVP selftest passed!\n");
+#endif
+
+ if ( (ret = error_test()) != 0)
+ return err_sys("error test failed!\n", ret);
+ else
+ test_pass("error test passed!\n");
+
+ if ( (ret = memory_test()) != 0)
+ return err_sys("MEMORY test failed!\n", ret);
+ else
+ test_pass("MEMORY test passed!\n");
+
+#ifndef NO_CODING
+ if ( (ret = base64_test()) != 0)
+ return err_sys("base64 test failed!\n", ret);
+ else
+ test_pass("base64 test passed!\n");
+#ifdef WOLFSSL_BASE16
+ if ( (ret = base16_test()) != 0)
+ return err_sys("base16 test failed!\n", ret);
+ else
+ test_pass("base16 test passed!\n");
+#endif
+#endif /* !NO_CODING */
+
+#ifndef NO_ASN
+ if ( (ret = asn_test()) != 0)
+ return err_sys("asn test failed!\n", ret);
+ else
+ test_pass("asn test passed!\n");
+#endif
+
+#ifndef WC_NO_RNG
+ if ( (ret = random_test()) != 0)
+ return err_sys("RANDOM test failed!\n", ret);
+ else
+ test_pass("RANDOM test passed!\n");
+#endif /* WC_NO_RNG */
#ifndef NO_MD5
if ( (ret = md5_test()) != 0)
return err_sys("MD5 test failed!\n", ret);
else
- printf( "MD5 test passed!\n");
+ test_pass("MD5 test passed!\n");
#endif
#ifdef WOLFSSL_MD2
if ( (ret = md2_test()) != 0)
return err_sys("MD2 test failed!\n", ret);
else
- printf( "MD2 test passed!\n");
+ test_pass("MD2 test passed!\n");
#endif
#ifndef NO_MD4
if ( (ret = md4_test()) != 0)
return err_sys("MD4 test failed!\n", ret);
else
- printf( "MD4 test passed!\n");
+ test_pass("MD4 test passed!\n");
#endif
#ifndef NO_SHA
if ( (ret = sha_test()) != 0)
return err_sys("SHA test failed!\n", ret);
else
- printf( "SHA test passed!\n");
+ test_pass("SHA test passed!\n");
+#endif
+
+#ifdef WOLFSSL_SHA224
+ if ( (ret = sha224_test()) != 0)
+ return err_sys("SHA-224 test failed!\n", ret);
+ else
+ test_pass("SHA-224 test passed!\n");
#endif
#ifndef NO_SHA256
if ( (ret = sha256_test()) != 0)
return err_sys("SHA-256 test failed!\n", ret);
else
- printf( "SHA-256 test passed!\n");
+ test_pass("SHA-256 test passed!\n");
#endif
#ifdef WOLFSSL_SHA384
if ( (ret = sha384_test()) != 0)
return err_sys("SHA-384 test failed!\n", ret);
else
- printf( "SHA-384 test passed!\n");
+ test_pass("SHA-384 test passed!\n");
#endif
#ifdef WOLFSSL_SHA512
if ( (ret = sha512_test()) != 0)
return err_sys("SHA-512 test failed!\n", ret);
else
- printf( "SHA-512 test passed!\n");
+ test_pass("SHA-512 test passed!\n");
+#endif
+
+#ifdef WOLFSSL_SHA3
+ if ( (ret = sha3_test()) != 0)
+ return err_sys("SHA-3 test failed!\n", ret);
+ else
+ test_pass("SHA-3 test passed!\n");
+#endif
+
+#ifdef WOLFSSL_SHAKE256
+ if ( (ret = shake256_test()) != 0)
+ return err_sys("SHAKE256 test failed!\n", ret);
+ else
+ test_pass("SHAKE256 test passed!\n");
#endif
+ if ( (ret = hash_test()) != 0)
+ return err_sys("Hash test failed!\n", ret);
+ else
+ test_pass("Hash test passed!\n");
+
#ifdef WOLFSSL_RIPEMD
if ( (ret = ripemd_test()) != 0)
return err_sys("RIPEMD test failed!\n", ret);
else
- printf( "RIPEMD test passed!\n");
+ test_pass("RIPEMD test passed!\n");
#endif
#ifdef HAVE_BLAKE2
if ( (ret = blake2b_test()) != 0)
return err_sys("BLAKE2b test failed!\n", ret);
else
- printf( "BLAKE2b test passed!\n");
+ test_pass("BLAKE2b test passed!\n");
+#endif
+#ifdef HAVE_BLAKE2S
+ if ( (ret = blake2s_test()) != 0)
+ return err_sys("BLAKE2s test failed!\n", ret);
+ else
+ test_pass("BLAKE2s test passed!\n");
#endif
#ifndef NO_HMAC
@@ -336,134 +741,185 @@ int wolfcrypt_test(void* args)
if ( (ret = hmac_md5_test()) != 0)
return err_sys("HMAC-MD5 test failed!\n", ret);
else
- printf( "HMAC-MD5 test passed!\n");
+ test_pass("HMAC-MD5 test passed!\n");
#endif
#ifndef NO_SHA
if ( (ret = hmac_sha_test()) != 0)
return err_sys("HMAC-SHA test failed!\n", ret);
else
- printf( "HMAC-SHA test passed!\n");
+ test_pass("HMAC-SHA test passed!\n");
+ #endif
+
+ #ifdef WOLFSSL_SHA224
+ if ( (ret = hmac_sha224_test()) != 0)
+ return err_sys("HMAC-SHA224 test failed!\n", ret);
+ else
+ test_pass("HMAC-SHA224 test passed!\n");
#endif
#ifndef NO_SHA256
if ( (ret = hmac_sha256_test()) != 0)
return err_sys("HMAC-SHA256 test failed!\n", ret);
else
- printf( "HMAC-SHA256 test passed!\n");
+ test_pass("HMAC-SHA256 test passed!\n");
#endif
#ifdef WOLFSSL_SHA384
if ( (ret = hmac_sha384_test()) != 0)
return err_sys("HMAC-SHA384 test failed!\n", ret);
else
- printf( "HMAC-SHA384 test passed!\n");
+ test_pass("HMAC-SHA384 test passed!\n");
#endif
#ifdef WOLFSSL_SHA512
if ( (ret = hmac_sha512_test()) != 0)
return err_sys("HMAC-SHA512 test failed!\n", ret);
else
- printf( "HMAC-SHA512 test passed!\n");
+ test_pass("HMAC-SHA512 test passed!\n");
#endif
- #ifdef HAVE_BLAKE2
- if ( (ret = hmac_blake2b_test()) != 0)
- return err_sys("HMAC-BLAKE2 test failed!\n", ret);
+ #if !defined(NO_HMAC) && defined(WOLFSSL_SHA3) && \
+ !defined(WOLFSSL_NOSHA3_224) && !defined(WOLFSSL_NOSHA3_256) && \
+ !defined(WOLFSSL_NOSHA3_384) && !defined(WOLFSSL_NOSHA3_512)
+ if ( (ret = hmac_sha3_test()) != 0)
+ return err_sys("HMAC-SHA3 test failed!\n", ret);
else
- printf( "HMAC-BLAKE2 test passed!\n");
+ test_pass("HMAC-SHA3 test passed!\n");
#endif
#ifdef HAVE_HKDF
if ( (ret = hkdf_test()) != 0)
return err_sys("HMAC-KDF test failed!\n", ret);
else
- printf( "HMAC-KDF test passed!\n");
+ test_pass("HMAC-KDF test passed!\n");
#endif
+#endif /* !NO_HMAC */
+#if defined(HAVE_X963_KDF) && defined(HAVE_ECC)
+ if ( (ret = x963kdf_test()) != 0)
+ return err_sys("X963-KDF test failed!\n", ret);
+ else
+ test_pass("X963-KDF test passed!\n");
#endif
-#ifdef HAVE_AESGCM
+#if defined(HAVE_AESGCM) && defined(WOLFSSL_AES_128) && \
+ !defined(WOLFSSL_AFALG_XILINX_AES) && !defined(WOLFSSL_XILINX_CRYPT)
if ( (ret = gmac_test()) != 0)
- return err_sys("GMAC test passed!\n", ret);
+ return err_sys("GMAC test failed!\n", ret);
else
- printf( "GMAC test passed!\n");
+ test_pass("GMAC test passed!\n");
#endif
#ifndef NO_RC4
if ( (ret = arc4_test()) != 0)
return err_sys("ARC4 test failed!\n", ret);
else
- printf( "ARC4 test passed!\n");
+ test_pass("ARC4 test passed!\n");
#endif
#ifndef NO_HC128
if ( (ret = hc128_test()) != 0)
return err_sys("HC-128 test failed!\n", ret);
else
- printf( "HC-128 test passed!\n");
+ test_pass("HC-128 test passed!\n");
#endif
#ifndef NO_RABBIT
if ( (ret = rabbit_test()) != 0)
return err_sys("Rabbit test failed!\n", ret);
else
- printf( "Rabbit test passed!\n");
+ test_pass("Rabbit test passed!\n");
#endif
#ifdef HAVE_CHACHA
if ( (ret = chacha_test()) != 0)
return err_sys("Chacha test failed!\n", ret);
else
- printf( "Chacha test passed!\n");
+ test_pass("Chacha test passed!\n");
#endif
#ifdef HAVE_POLY1305
if ( (ret = poly1305_test()) != 0)
return err_sys("POLY1305 test failed!\n", ret);
else
- printf( "POLY1305 test passed!\n");
+ test_pass("POLY1305 test passed!\n");
#endif
#if defined(HAVE_CHACHA) && defined(HAVE_POLY1305)
if ( (ret = chacha20_poly1305_aead_test()) != 0)
return err_sys("ChaCha20-Poly1305 AEAD test failed!\n", ret);
else
- printf( "ChaCha20-Poly1305 AEAD test passed!\n");
+ test_pass("ChaCha20-Poly1305 AEAD test passed!\n");
#endif
#ifndef NO_DES3
if ( (ret = des_test()) != 0)
return err_sys("DES test failed!\n", ret);
else
- printf( "DES test passed!\n");
+ test_pass("DES test passed!\n");
#endif
#ifndef NO_DES3
if ( (ret = des3_test()) != 0)
return err_sys("DES3 test failed!\n", ret);
else
- printf( "DES3 test passed!\n");
+ test_pass("DES3 test passed!\n");
#endif
#ifndef NO_AES
if ( (ret = aes_test()) != 0)
return err_sys("AES test failed!\n", ret);
else
- printf( "AES test passed!\n");
+ test_pass("AES test passed!\n");
+
+#ifdef WOLFSSL_AES_192
+ if ( (ret = aes192_test()) != 0)
+ return err_sys("AES192 test failed!\n", ret);
+ else
+ test_pass("AES192 test passed!\n");
+#endif
+
+#ifdef WOLFSSL_AES_256
+ if ( (ret = aes256_test()) != 0)
+ return err_sys("AES256 test failed!\n", ret);
+ else
+ test_pass("AES256 test passed!\n");
+#endif
+
+#ifdef WOLFSSL_AES_OFB
+ if ( (ret = aesofb_test()) != 0)
+ return err_sys("AES-OFB test failed!\n", ret);
+ else
+ test_pass("AESOFB test passed!\n");
+#endif
#ifdef HAVE_AESGCM
+ #if !defined(WOLFSSL_AFALG) && !defined(WOLFSSL_DEVCRYPTO)
if ( (ret = aesgcm_test()) != 0)
return err_sys("AES-GCM test failed!\n", ret);
- else
- printf( "AES-GCM test passed!\n");
+ #endif
+ #if !defined(WOLFSSL_AFALG_XILINX_AES) && !defined(WOLFSSL_XILINX_CRYPT) && \
+ !(defined(WOLF_CRYPTO_CB) && \
+ (defined(HAVE_INTEL_QA_SYNC) || defined(HAVE_CAVIUM_OCTEON_SYNC)))
+ if ((ret = aesgcm_default_test()) != 0) {
+ return err_sys("AES-GCM test failed!\n", ret);
+ }
+ #endif
+ test_pass("AES-GCM test passed!\n");
#endif
-#ifdef HAVE_AESCCM
+#if defined(HAVE_AESCCM) && defined(WOLFSSL_AES_128)
if ( (ret = aesccm_test()) != 0)
return err_sys("AES-CCM test failed!\n", ret);
else
- printf( "AES-CCM test passed!\n");
+ test_pass("AES-CCM test passed!\n");
+#endif
+#ifdef HAVE_AES_KEYWRAP
+ if ( (ret = aeskeywrap_test()) != 0)
+ return err_sys("AES Key Wrap test failed!\n", ret);
+ else
+ test_pass("AES Key Wrap test passed!\n");
#endif
#endif
@@ -471,159 +927,675 @@ int wolfcrypt_test(void* args)
if ( (ret = camellia_test()) != 0)
return err_sys("CAMELLIA test failed!\n", ret);
else
- printf( "CAMELLIA test passed!\n");
+ test_pass("CAMELLIA test passed!\n");
#endif
- if ( (ret = random_test()) != 0)
- return err_sys("RANDOM test failed!\n", ret);
+#ifdef HAVE_IDEA
+ if ( (ret = idea_test()) != 0)
+ return err_sys("IDEA test failed!\n", ret);
else
- printf( "RANDOM test passed!\n");
+ test_pass("IDEA test passed!\n");
+#endif
#ifndef NO_RSA
+ #ifdef WC_RSA_NO_PADDING
+ if ( (ret = rsa_no_pad_test()) != 0)
+ return err_sys("RSA NOPAD test failed!\n", ret);
+ else
+ test_pass("RSA NOPAD test passed!\n");
+ #endif
if ( (ret = rsa_test()) != 0)
return err_sys("RSA test failed!\n", ret);
else
- printf( "RSA test passed!\n");
+ test_pass("RSA test passed!\n");
#endif
#ifndef NO_DH
if ( (ret = dh_test()) != 0)
return err_sys("DH test failed!\n", ret);
else
- printf( "DH test passed!\n");
+ test_pass("DH test passed!\n");
#endif
#ifndef NO_DSA
if ( (ret = dsa_test()) != 0)
return err_sys("DSA test failed!\n", ret);
else
- printf( "DSA test passed!\n");
+ test_pass("DSA test passed!\n");
+#endif
+
+#ifdef WOLFCRYPT_HAVE_SRP
+ if ( (ret = srp_test()) != 0)
+ return err_sys("SRP test failed!\n", ret);
+ else
+ test_pass("SRP test passed!\n");
#endif
#ifndef NO_PWDBASED
if ( (ret = pwdbased_test()) != 0)
return err_sys("PWDBASED test failed!\n", ret);
else
- printf( "PWDBASED test passed!\n");
+ test_pass("PWDBASED test passed!\n");
#endif
-#ifdef OPENSSL_EXTRA
+#if defined(OPENSSL_EXTRA) && !defined(WOLFCRYPT_ONLY)
if ( (ret = openssl_test()) != 0)
return err_sys("OPENSSL test failed!\n", ret);
else
- printf( "OPENSSL test passed!\n");
+ test_pass("OPENSSL test passed!\n");
+
+ if ( (ret = openSSL_evpMD_test()) != 0)
+ return err_sys("OPENSSL (EVP MD) test failed!\n", ret);
+ else
+ test_pass("OPENSSL (EVP MD) passed!\n");
+
+ if ( (ret = openssl_pkey0_test()) != 0)
+ return err_sys("OPENSSL (PKEY0) test failed!\n", ret);
+ else
+ test_pass("OPENSSL (PKEY0) passed!\n");
+
+ if ( (ret = openssl_pkey1_test()) != 0)
+ return err_sys("OPENSSL (PKEY1) test failed!\n", ret);
+ else
+ test_pass("OPENSSL (PKEY1) passed!\n");
+
+ if ( (ret = openssl_evpSig_test()) != 0)
+ return err_sys("OPENSSL (EVP Sign/Verify) test failed!\n", ret);
+ else
+ test_pass("OPENSSL (EVP Sign/Verify) passed!\n");
+
#endif
#ifdef HAVE_ECC
if ( (ret = ecc_test()) != 0)
return err_sys("ECC test failed!\n", ret);
else
- printf( "ECC test passed!\n");
- #ifdef HAVE_ECC_ENCRYPT
+ test_pass("ECC test passed!\n");
+ #if defined(HAVE_ECC_ENCRYPT) && defined(WOLFSSL_AES_128)
if ( (ret = ecc_encrypt_test()) != 0)
return err_sys("ECC Enc test failed!\n", ret);
else
- printf( "ECC Enc test passed!\n");
+ test_pass("ECC Enc test passed!\n");
#endif
+ #ifdef USE_CERT_BUFFERS_256
+ if ( (ret = ecc_test_buffers()) != 0)
+ return err_sys("ECC buffer test failed!\n", ret);
+ else
+ test_pass("ECC buffer test passed!\n");
+ #endif
+#endif
+
+#if !defined(NO_ASN_TIME) && !defined(NO_RSA) && defined(WOLFSSL_TEST_CERT) && \
+ !defined(NO_FILESYSTEM)
+ if ( (ret = cert_test()) != 0)
+ return err_sys("CERT test failed!\n", ret);
+ else
+ test_pass("CERT test passed!\n");
+#endif
+
+#if defined(WOLFSSL_CERT_EXT) && defined(WOLFSSL_TEST_CERT) && \
+ !defined(NO_FILESYSTEM)
+ if ( (ret = certext_test()) != 0)
+ return err_sys("CERT EXT test failed!\n", ret);
+ else
+ test_pass("CERT EXT test passed!\n");
+#endif
+
+#if defined(WOLFSSL_CERT_GEN_CACHE) && defined(WOLFSSL_TEST_CERT) && \
+ defined(WOLFSSL_CERT_EXT) && defined(WOLFSSL_CERT_GEN)
+ if ( (ret = decodedCertCache_test()) != 0)
+ return err_sys("DECODED CERT CACHE test failed!\n", ret);
+ else
+ test_pass("DECODED CERT CACHE test passed!\n");
#endif
#ifdef HAVE_CURVE25519
if ( (ret = curve25519_test()) != 0)
return err_sys("CURVE25519 test failed!\n", ret);
else
- printf( "CURVE25519 test passed!\n");
+ test_pass("CURVE25519 test passed!\n");
#endif
#ifdef HAVE_ED25519
if ( (ret = ed25519_test()) != 0)
return err_sys("ED25519 test failed!\n", ret);
else
- printf( "ED25519 test passed!\n");
+ test_pass("ED25519 test passed!\n");
+#endif
+
+#ifdef HAVE_CURVE448
+ if ( (ret = curve448_test()) != 0)
+ return err_sys("CURVE448 test failed!\n", ret);
+ else
+ test_pass("CURVE448 test passed!\n");
+#endif
+
+#ifdef HAVE_ED448
+ if ( (ret = ed448_test()) != 0)
+ return err_sys("ED448 test failed!\n", ret);
+ else
+ test_pass("ED448 test passed!\n");
+#endif
+
+#if defined(WOLFSSL_CMAC) && !defined(NO_AES)
+ if ( (ret = cmac_test()) != 0)
+ return err_sys("CMAC test failed!\n", ret);
+ else
+ test_pass("CMAC test passed!\n");
#endif
#ifdef HAVE_LIBZ
if ( (ret = compress_test()) != 0)
return err_sys("COMPRESS test failed!\n", ret);
else
- printf( "COMPRESS test passed!\n");
+ test_pass("COMPRESS test passed!\n");
#endif
#ifdef HAVE_PKCS7
+ #ifndef NO_PKCS7_ENCRYPTED_DATA
+ if ( (ret = pkcs7encrypted_test()) != 0)
+ return err_sys("PKCS7encrypted test failed!\n", ret);
+ else
+ test_pass("PKCS7encrypted test passed!\n");
+ #endif
+ #if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA)
+ if ( (ret = pkcs7compressed_test()) != 0)
+ return err_sys("PKCS7compressed test failed!\n", ret);
+ else
+ test_pass("PKCS7compressed test passed!\n");
+ #endif
+ if ( (ret = pkcs7signed_test()) != 0)
+ return err_sys("PKCS7signed test failed!\n", ret);
+ else
+ test_pass("PKCS7signed test passed!\n");
+
if ( (ret = pkcs7enveloped_test()) != 0)
- return err_sys("PKCS7enveloped test failed!\n", ret);
+ return err_sys("PKCS7enveloped test failed!\n", ret);
else
- printf( "PKCS7enveloped test passed!\n");
+ test_pass("PKCS7enveloped test passed!\n");
- if ( (ret = pkcs7signed_test()) != 0)
- return err_sys("PKCS7signed test failed!\n", ret);
+ #if defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
+ if ( (ret = pkcs7authenveloped_test()) != 0)
+ return err_sys("PKCS7authenveloped test failed!\n", ret);
+ else
+ test_pass("PKCS7authenveloped test passed!\n");
+ #endif
+#endif
+
+#ifdef HAVE_VALGRIND
+ if ( (ret = mp_test()) != 0)
+ return err_sys("mp test failed!\n", ret);
else
- printf( "PKCS7signed test passed!\n");
+ test_pass("mp test passed!\n");
#endif
- ((func_args*)args)->return_code = ret;
+#if defined(WOLFSSL_PUBLIC_MP) && defined(WOLFSSL_KEY_GEN)
+ if ( (ret = prime_test()) != 0)
+ return err_sys("prime test failed!\n", ret);
+ else
+ test_pass("prime test passed!\n");
+#endif
- return ret;
-}
+#if defined(ASN_BER_TO_DER) && \
+ (defined(WOLFSSL_TEST_CERT) || defined(OPENSSL_EXTRA) || \
+ defined(OPENSSL_EXTRA_X509_SMALL))
+ if ( (ret = berder_test()) != 0)
+ return err_sys("ber-der test failed!\n", ret);
+ else
+ test_pass("ber-der test passed!\n");
+#endif
+
+ if ( (ret = logging_test()) != 0)
+ return err_sys("logging test failed!\n", ret);
+ else
+ test_pass("logging test passed!\n");
+ if ( (ret = mutex_test()) != 0)
+ return err_sys("mutex test failed!\n", ret);
+ else
+ test_pass("mutex test passed!\n");
-#ifndef NO_MAIN_DRIVER
+#if defined(USE_WOLFSSL_MEMORY) && !defined(FREERTOS)
+ if ( (ret = memcb_test()) != 0)
+ return err_sys("memcb test failed!\n", ret);
+ else
+ test_pass("memcb test passed!\n");
+#endif
-#ifdef HAVE_CAVIUM
+#ifdef WOLFSSL_IMX6_CAAM_BLOB
+ if ( (ret = blob_test()) != 0)
+ return err_sys("blob test failed!\n", ret);
+ else
+ test_pass("blob test passed!\n");
+#endif
-static int OpenNitroxDevice(int dma_mode,int dev_id)
-{
- Csp1CoreAssignment core_assign;
- Uint32 device;
+#if defined(WOLF_CRYPTO_CB) && \
+ !(defined(HAVE_INTEL_QAT_SYNC) || defined(HAVE_CAVIUM_OCTEON_SYNC))
+ if ( (ret = cryptocb_test()) != 0)
+ return err_sys("crypto callback test failed!\n", ret);
+ else
+ test_pass("crypto callback test passed!\n");
+#endif
- if (CspInitialize(CAVIUM_DIRECT,CAVIUM_DEV_ID))
- return -1;
- if (Csp1GetDevType(&device))
- return -1;
- if (device != NPX_DEVICE) {
- if (ioctl(gpkpdev_hdlr[CAVIUM_DEV_ID], IOCTL_CSP1_GET_CORE_ASSIGNMENT,
- (Uint32 *)&core_assign)!= 0)
- return -1;
- }
- CspShutdown(CAVIUM_DEV_ID);
+#ifdef WOLFSSL_CERT_PIV
+ if ( (ret = certpiv_test()) != 0)
+ return err_sys("cert piv test failed!\n", ret);
+ else
+ test_pass("cert piv test passed!\n");
+#endif
- return CspInitialize(dma_mode, dev_id);
+#ifdef WOLF_CRYPTO_CB
+#ifdef HAVE_INTEL_QA_SYNC
+ wc_CryptoCb_CleanupIntelQa(&devId);
+#endif
+#ifdef HAVE_CAVIUM_OCTEON_SYNC
+ wc_CryptoCb_CleanupOcteon(&devId);
+#endif
+#endif
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+ wolfAsync_DevClose(&devId);
+#endif
+
+ /* cleanup the thread if fixed point cache is enabled and have thread local */
+#if defined(HAVE_THREAD_LS) && defined(HAVE_ECC) && defined(FP_ECC)
+ wc_ecc_fp_free();
+#endif
+
+ if (args)
+ ((func_args*)args)->return_code = ret;
+
+ test_pass("Test complete\n");
+
+ EXIT_TEST(ret);
}
-#endif /* HAVE_CAVIUM */
- /* so overall tests can pull in test function */
+#ifndef NO_MAIN_DRIVER
+ /* so overall tests can pull in test function */
+#ifdef WOLFSSL_ESPIDF
+ void app_main( )
+#else
int main(int argc, char** argv)
+#endif
{
-
+ int ret;
func_args args;
+#ifdef WOLFSSL_ESPIDF
+ /* set dummy wallclock time. */
+ struct timeval utctime;
+ struct timezone tz;
+ utctime.tv_sec = 1521725159; /* dummy time: 2018-03-22T13:25:59+00:00 */
+ utctime.tv_usec = 0;
+ tz.tz_minuteswest = 0;
+ tz.tz_dsttime = 0;
+ settimeofday(&utctime, &tz);
+#endif
+#ifdef WOLFSSL_APACHE_MYNEWT
+ #ifdef ARCH_sim
+ mcu_sim_parse_args(argc, argv);
+ #endif
+ sysinit();
+
+ /* set dummy wallclock time. */
+ struct os_timeval utctime;
+ struct os_timezone tz;
+ utctime.tv_sec = 1521725159; /* dummy time: 2018-03-22T13:25:59+00:00 */
+ utctime.tv_usec = 0;
+ tz.tz_minuteswest = 0;
+ tz.tz_dsttime = 0;
+ os_settimeofday(&utctime, &tz);
+#endif
-
-#ifdef HAVE_CAVIUM
- int ret = OpenNitroxDevice(CAVIUM_DIRECT, CAVIUM_DEV_ID);
- if (ret != 0) {
- err_sys("Cavium OpenNitroxDevice failed", -1236);
- return -1236;
+#ifdef HAVE_WNR
+ if (wc_InitNetRandom(wnrConfigFile, NULL, 5000) != 0) {
+ err_sys("Whitewood netRandom global config failed", -1001);
+ return -1002;
}
-#endif /* HAVE_CAVIUM */
-
+#endif
+#ifndef WOLFSSL_ESPIDF
args.argc = argc;
args.argv = argv;
+#endif
+ if ((ret = wolfCrypt_Init()) != 0) {
+ printf("wolfCrypt_Init failed %d\n", ret);
+ err_sys("Error with wolfCrypt_Init!\n", -1003);
+ }
+ #ifdef HAVE_STACK_SIZE
+ StackSizeCheck(&args, wolfcrypt_test);
+ #else
wolfcrypt_test(&args);
+ #endif
-#ifdef HAVE_CAVIUM
- CspShutdown(CAVIUM_DEV_ID);
-#endif
+ if ((ret = wolfCrypt_Cleanup()) != 0) {
+ printf("wolfCrypt_Cleanup failed %d\n", ret);
+ err_sys("Error with wolfCrypt_Cleanup!\n", -1004);
+ }
+#ifdef HAVE_WNR
+ if (wc_FreeNetRandom() < 0)
+ err_sys("Failed to free netRandom context", -1005);
+#endif /* HAVE_WNR */
+#ifndef WOLFSSL_ESPIDF
return args.return_code;
+#endif
}
#endif /* NO_MAIN_DRIVER */
+/* helper to save DER, convert to PEM and save PEM */
+#if !defined(NO_ASN) && (!defined(NO_RSA) || defined(HAVE_ECC)) && \
+ (defined(WOLFSSL_KEY_GEN) || defined(WOLFSSL_CERT_GEN))
+
+#if !defined(NO_FILESYSTEM) && !defined(NO_WRITE_TEMP_FILES)
+#define SaveDerAndPem(d, dSz, p, pSz, fD, fP, pT, eB) _SaveDerAndPem(d, dSz, p, pSz, fD, fP, pT, eB)
+#else
+#define SaveDerAndPem(d, dSz, p, pSz, fD, fP, pT, eB) _SaveDerAndPem(d, dSz, p, pSz, NULL, NULL, pT, eB)
+#endif
+
+static int _SaveDerAndPem(const byte* der, int derSz,
+ byte* pem, int pemSz, const char* fileDer,
+ const char* filePem, int pemType, int errBase)
+{
+#if !defined(NO_FILESYSTEM) && !defined(NO_WRITE_TEMP_FILES)
+ int ret;
+ XFILE derFile;
+
+ derFile = XFOPEN(fileDer, "wb");
+ if (!derFile) {
+ return errBase + 0;
+ }
+ ret = (int)XFWRITE(der, 1, derSz, derFile);
+ XFCLOSE(derFile);
+ if (ret != derSz) {
+ return errBase + 1;
+ }
+#endif
+
+ if (pem && filePem) {
+ #if !defined(NO_FILESYSTEM) && !defined(NO_WRITE_TEMP_FILES)
+ XFILE pemFile;
+ #endif
+ #ifdef WOLFSSL_DER_TO_PEM
+ pemSz = wc_DerToPem(der, derSz, pem, pemSz, pemType);
+ if (pemSz < 0) {
+ return errBase + 2;
+ }
+ #endif
+ #if !defined(NO_FILESYSTEM) && !defined(NO_WRITE_TEMP_FILES)
+ pemFile = XFOPEN(filePem, "wb");
+ if (!pemFile) {
+ return errBase + 3;
+ }
+ ret = (int)XFWRITE(pem, 1, pemSz, pemFile);
+ XFCLOSE(pemFile);
+ if (ret != pemSz) {
+ return errBase + 4;
+ }
+ #endif
+ }
+
+ /* suppress unused variable warnings */
+ (void)filePem;
+ (void)fileDer;
+
+ return 0;
+}
+#endif /* WOLFSSL_KEY_GEN || WOLFSSL_CERT_GEN */
+
+int error_test(void)
+{
+ const char* errStr;
+ char out[WOLFSSL_MAX_ERROR_SZ];
+ const char* unknownStr = wc_GetErrorString(0);
+
+#ifdef NO_ERROR_STRINGS
+ /* Ensure a valid error code's string matches an invalid code's.
+ * The string is that error strings are not available.
+ */
+ errStr = wc_GetErrorString(OPEN_RAN_E);
+ wc_ErrorString(OPEN_RAN_E, out);
+ if (XSTRNCMP(errStr, unknownStr, XSTRLEN(unknownStr)) != 0)
+ return -1100;
+ if (XSTRNCMP(out, unknownStr, XSTRLEN(unknownStr)) != 0)
+ return -1101;
+#else
+ int i;
+ int j = 0;
+ /* Values that are not or no longer error codes. */
+ int missing[] = { -122, -123, -124, -127, -128, -129,
+ -163, -164, -165, -166, -167, -168, -169,
+ -179, -233,
+ 0 };
+
+ /* Check that all errors have a string and it's the same through the two
+ * APIs. Check that the values that are not errors map to the unknown
+ * string.
+ */
+ for (i = MAX_CODE_E-1; i >= WC_LAST_E; i--) {
+ errStr = wc_GetErrorString(i);
+ wc_ErrorString(i, out);
+
+ if (i != missing[j]) {
+ if (XSTRNCMP(errStr, unknownStr, XSTRLEN(unknownStr)) == 0)
+ return -1102;
+ if (XSTRNCMP(out, unknownStr, XSTRLEN(unknownStr)) == 0)
+ return -1103;
+ if (XSTRNCMP(errStr, out, XSTRLEN(errStr)) != 0)
+ return -1104;
+ if (XSTRLEN(errStr) >= WOLFSSL_MAX_ERROR_SZ)
+ return -1105;
+ }
+ else {
+ j++;
+ if (XSTRNCMP(errStr, unknownStr, XSTRLEN(unknownStr)) != 0)
+ return -1106;
+ if (XSTRNCMP(out, unknownStr, XSTRLEN(unknownStr)) != 0)
+ return -1107;
+ }
+ }
+
+ /* Check if the next possible value has been given a string. */
+ errStr = wc_GetErrorString(i);
+ wc_ErrorString(i, out);
+ if (XSTRNCMP(errStr, unknownStr, XSTRLEN(unknownStr)) != 0)
+ return -1108;
+ if (XSTRNCMP(out, unknownStr, XSTRLEN(unknownStr)) != 0)
+ return -1109;
+#endif
+
+ return 0;
+}
+
+#ifndef NO_CODING
+
+int base64_test(void)
+{
+ int ret;
+ const byte good[] = "A+Gd\0\0\0";
+ const byte goodEnd[] = "A+Gd \r\n";
+ byte out[128];
+ word32 outLen;
+#ifdef WOLFSSL_BASE64_ENCODE
+ byte data[3];
+ word32 dataLen;
+ byte longData[79] = { 0 };
+ const byte symbols[] = "+/A=";
+#endif
+ const byte badSmall[] = "AAA Gdj=";
+ const byte badLarge[] = "AAA~Gdj=";
+ const byte badEOL[] = "A+Gd AA";
+ int i;
+
+ /* Good Base64 encodings. */
+ outLen = sizeof(out);
+ ret = Base64_Decode(good, sizeof(good), out, &outLen);
+ if (ret != 0)
+ return -1200;
+ outLen = sizeof(out);
+ ret = Base64_Decode(goodEnd, sizeof(goodEnd), out, &outLen);
+ if (ret != 0)
+ return -1201;
+
+ /* Bad parameters. */
+ outLen = 1;
+ ret = Base64_Decode(good, sizeof(good), out, &outLen);
+ if (ret != BAD_FUNC_ARG)
+ return -1202;
+
+ outLen = sizeof(out);
+ ret = Base64_Decode(badEOL, sizeof(badEOL), out, &outLen);
+ if (ret != ASN_INPUT_E)
+ return -1203;
+ /* Bad character at each offset 0-3. */
+ for (i = 0; i < 4; i++) {
+ outLen = sizeof(out);
+ ret = Base64_Decode(badSmall + i, 4, out, &outLen);
+ if (ret != ASN_INPUT_E)
+ return -1204 - i;
+ ret = Base64_Decode(badLarge + i, 4, out, &outLen);
+ if (ret != ASN_INPUT_E)
+ return -1214 - i;
+ }
+
+#ifdef WOLFSSL_BASE64_ENCODE
+ /* Decode and encode all symbols - non-alphanumeric. */
+ dataLen = sizeof(data);
+ ret = Base64_Decode(symbols, sizeof(symbols), data, &dataLen);
+ if (ret != 0)
+ return -1224;
+ outLen = sizeof(out);
+ ret = Base64_Encode(data, dataLen, NULL, &outLen);
+ if (ret != LENGTH_ONLY_E)
+ return -1225;
+ outLen = sizeof(out);
+ ret = Base64_Encode(data, dataLen, out, &outLen);
+ if (ret != 0)
+ return -1226;
+ outLen = 7;
+ ret = Base64_EncodeEsc(data, dataLen, out, &outLen);
+ if (ret != BUFFER_E)
+ return -1227;
+ outLen = sizeof(out);
+ ret = Base64_EncodeEsc(data, dataLen, NULL, &outLen);
+ if (ret != LENGTH_ONLY_E)
+ return -1228;
+ outLen = sizeof(out);
+ ret = Base64_EncodeEsc(data, dataLen, out, &outLen);
+ if (ret != 0)
+ return -1229;
+ outLen = sizeof(out);
+ ret = Base64_Encode_NoNl(data, dataLen, out, &outLen);
+ if (ret != 0)
+ return -1230;
+
+ /* Data that results in an encoding longer than one line. */
+ outLen = sizeof(out);
+ dataLen = sizeof(longData);
+ ret = Base64_Encode(longData, dataLen, out, &outLen);
+ if (ret != 0)
+ return -1231;
+ outLen = sizeof(out);
+ ret = Base64_EncodeEsc(longData, dataLen, out, &outLen);
+ if (ret != 0)
+ return -1232;
+ outLen = sizeof(out);
+ ret = Base64_Encode_NoNl(longData, dataLen, out, &outLen);
+ if (ret != 0)
+ return -1233;
+#endif
+
+ return 0;
+}
+
+#ifdef WOLFSSL_BASE16
+int base16_test(void)
+{
+ int ret;
+ const byte testData[] = "SomeDataToEncode\n";
+ const byte encodedTestData[] = "536F6D6544617461546F456E636F64650A00";
+ byte encoded[40];
+ word32 encodedLen;
+ byte plain[40];
+ word32 len;
+
+ /* length returned includes null termination */
+ encodedLen = sizeof(encoded);
+ ret = Base16_Encode(testData, sizeof(testData), encoded, &encodedLen);
+ if (ret != 0)
+ return -1300;
+
+ len = (word32)XSTRLEN((char*)encoded);
+ if (len != encodedLen - 1)
+ return -1301;
+
+ len = sizeof(plain);
+ ret = Base16_Decode(encoded, encodedLen - 1, plain, &len);
+ if (ret != 0)
+ return -1302;
+
+ if (len != sizeof(testData) || XMEMCMP(testData, plain, len) != 0)
+ return -1303;
+
+ if (encodedLen != sizeof(encodedTestData) ||
+ XMEMCMP(encoded, encodedTestData, encodedLen) != 0) {
+ return -1304;
+ }
+
+ return 0;
+}
+#endif /* WOLFSSL_BASE16 */
+#endif /* !NO_CODING */
+
+#ifndef NO_ASN
+int asn_test(void)
+{
+ int ret;
+ /* ASN1 encoded date buffer */
+ const byte dateBuf[] = {0x17, 0x0d, 0x31, 0x36, 0x30, 0x38, 0x31, 0x31,
+ 0x32, 0x30, 0x30, 0x37, 0x33, 0x37, 0x5a};
+ byte format;
+ int length;
+ const byte* datePart;
+#ifndef NO_ASN_TIME
+ struct tm timearg;
+ time_t now;
+#endif
+
+ ret = wc_GetDateInfo(dateBuf, (int)sizeof(dateBuf), &datePart, &format,
+ &length);
+ if (ret != 0)
+ return -1400;
+
+#ifndef NO_ASN_TIME
+ /* Parameter Validation tests. */
+ if (wc_GetTime(NULL, sizeof(now)) != BAD_FUNC_ARG)
+ return -1401;
+ if (wc_GetTime(&now, 0) != BUFFER_E)
+ return -1402;
+
+ now = 0;
+ if (wc_GetTime(&now, sizeof(now)) != 0) {
+ return -1403;
+ }
+ if (now == 0) {
+ printf("RTC/Time not set!\n");
+ return -1404;
+ }
+
+ ret = wc_GetDateAsCalendarTime(datePart, length, format, &timearg);
+ if (ret != 0)
+ return -1405;
+#endif /* !NO_ASN_TIME */
+
+ return 0;
+}
+#endif /* !NO_ASN */
#ifdef WOLFSSL_MD2
-int md2_test()
+int md2_test(void)
{
Md2 md2;
byte hash[MD2_DIGEST_SIZE];
@@ -635,45 +1607,45 @@ int md2_test()
a.input = "";
a.output = "\x83\x50\xe5\xa3\xe2\x4c\x15\x3d\xf2\x27\x5c\x9f\x80\x69"
"\x27\x73";
- a.inLen = strlen(a.input);
+ a.inLen = XSTRLEN(a.input);
a.outLen = MD2_DIGEST_SIZE;
b.input = "a";
b.output = "\x32\xec\x01\xec\x4a\x6d\xac\x72\xc0\xab\x96\xfb\x34\xc0"
"\xb5\xd1";
- b.inLen = strlen(b.input);
+ b.inLen = XSTRLEN(b.input);
b.outLen = MD2_DIGEST_SIZE;
c.input = "abc";
c.output = "\xda\x85\x3b\x0d\x3f\x88\xd9\x9b\x30\x28\x3a\x69\xe6\xde"
"\xd6\xbb";
- c.inLen = strlen(c.input);
+ c.inLen = XSTRLEN(c.input);
c.outLen = MD2_DIGEST_SIZE;
d.input = "message digest";
d.output = "\xab\x4f\x49\x6b\xfb\x2a\x53\x0b\x21\x9f\xf3\x30\x31\xfe"
"\x06\xb0";
- d.inLen = strlen(d.input);
+ d.inLen = XSTRLEN(d.input);
d.outLen = MD2_DIGEST_SIZE;
e.input = "abcdefghijklmnopqrstuvwxyz";
e.output = "\x4e\x8d\xdf\xf3\x65\x02\x92\xab\x5a\x41\x08\xc3\xaa\x47"
"\x94\x0b";
- e.inLen = strlen(e.input);
+ e.inLen = XSTRLEN(e.input);
e.outLen = MD2_DIGEST_SIZE;
f.input = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345"
"6789";
f.output = "\xda\x33\xde\xf2\xa4\x2d\xf1\x39\x75\x35\x28\x46\xc3\x03"
"\x38\xcd";
- f.inLen = strlen(f.input);
+ f.inLen = XSTRLEN(f.input);
f.outLen = MD2_DIGEST_SIZE;
g.input = "1234567890123456789012345678901234567890123456789012345678"
"9012345678901234567890";
g.output = "\xd5\x97\x6f\x79\xd8\x3d\x3a\x0d\xc9\x80\x6c\x3c\x66\xf3"
"\xef\xd8";
- g.inLen = strlen(g.input);
+ g.inLen = XSTRLEN(g.input);
g.outLen = MD2_DIGEST_SIZE;
test_md2[0] = a;
@@ -690,8 +1662,8 @@ int md2_test()
wc_Md2Update(&md2, (byte*)test_md2[i].input, (word32)test_md2[i].inLen);
wc_Md2Final(&md2, hash);
- if (memcmp(hash, test_md2[i].output, MD2_DIGEST_SIZE) != 0)
- return -155 - i;
+ if (XMEMCMP(hash, test_md2[i].output, MD2_DIGEST_SIZE) != 0)
+ return -1500 - i;
}
return 0;
@@ -701,62 +1673,126 @@ int md2_test()
#ifndef NO_MD5
int md5_test(void)
{
- Md5 md5;
- byte hash[MD5_DIGEST_SIZE];
-
- testVector a, b, c, d, e;
- testVector test_md5[5];
+ int ret = 0;
+ wc_Md5 md5, md5Copy;
+ byte hash[WC_MD5_DIGEST_SIZE];
+ byte hashcopy[WC_MD5_DIGEST_SIZE];
+ testVector a, b, c, d, e, f;
+ testVector test_md5[6];
int times = sizeof(test_md5) / sizeof(testVector), i;
- a.input = "abc";
- a.output = "\x90\x01\x50\x98\x3c\xd2\x4f\xb0\xd6\x96\x3f\x7d\x28\xe1\x7f"
+ a.input = "";
+ a.output = "\xd4\x1d\x8c\xd9\x8f\x00\xb2\x04\xe9\x80\x09\x98\xec\xf8\x42"
+ "\x7e";
+ a.inLen = XSTRLEN(a.input);
+ a.outLen = WC_MD5_DIGEST_SIZE;
+
+ b.input = "abc";
+ b.output = "\x90\x01\x50\x98\x3c\xd2\x4f\xb0\xd6\x96\x3f\x7d\x28\xe1\x7f"
"\x72";
- a.inLen = strlen(a.input);
- a.outLen = MD5_DIGEST_SIZE;
+ b.inLen = XSTRLEN(b.input);
+ b.outLen = WC_MD5_DIGEST_SIZE;
- b.input = "message digest";
- b.output = "\xf9\x6b\x69\x7d\x7c\xb7\x93\x8d\x52\x5a\x2f\x31\xaa\xf1\x61"
+ c.input = "message digest";
+ c.output = "\xf9\x6b\x69\x7d\x7c\xb7\x93\x8d\x52\x5a\x2f\x31\xaa\xf1\x61"
"\xd0";
- b.inLen = strlen(b.input);
- b.outLen = MD5_DIGEST_SIZE;
+ c.inLen = XSTRLEN(c.input);
+ c.outLen = WC_MD5_DIGEST_SIZE;
- c.input = "abcdefghijklmnopqrstuvwxyz";
- c.output = "\xc3\xfc\xd3\xd7\x61\x92\xe4\x00\x7d\xfb\x49\x6c\xca\x67\xe1"
+ d.input = "abcdefghijklmnopqrstuvwxyz";
+ d.output = "\xc3\xfc\xd3\xd7\x61\x92\xe4\x00\x7d\xfb\x49\x6c\xca\x67\xe1"
"\x3b";
- c.inLen = strlen(c.input);
- c.outLen = MD5_DIGEST_SIZE;
+ d.inLen = XSTRLEN(d.input);
+ d.outLen = WC_MD5_DIGEST_SIZE;
- d.input = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345"
+ e.input = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345"
"6789";
- d.output = "\xd1\x74\xab\x98\xd2\x77\xd9\xf5\xa5\x61\x1c\x2c\x9f\x41\x9d"
+ e.output = "\xd1\x74\xab\x98\xd2\x77\xd9\xf5\xa5\x61\x1c\x2c\x9f\x41\x9d"
"\x9f";
- d.inLen = strlen(d.input);
- d.outLen = MD5_DIGEST_SIZE;
+ e.inLen = XSTRLEN(e.input);
+ e.outLen = WC_MD5_DIGEST_SIZE;
- e.input = "1234567890123456789012345678901234567890123456789012345678"
+ f.input = "1234567890123456789012345678901234567890123456789012345678"
"9012345678901234567890";
- e.output = "\x57\xed\xf4\xa2\x2b\xe3\xc9\x55\xac\x49\xda\x2e\x21\x07\xb6"
+ f.output = "\x57\xed\xf4\xa2\x2b\xe3\xc9\x55\xac\x49\xda\x2e\x21\x07\xb6"
"\x7a";
- e.inLen = strlen(e.input);
- e.outLen = MD5_DIGEST_SIZE;
+ f.inLen = XSTRLEN(f.input);
+ f.outLen = WC_MD5_DIGEST_SIZE;
test_md5[0] = a;
test_md5[1] = b;
test_md5[2] = c;
test_md5[3] = d;
test_md5[4] = e;
+ test_md5[5] = f;
- wc_InitMd5(&md5);
+ ret = wc_InitMd5_ex(&md5, HEAP_HINT, devId);
+ if (ret != 0)
+ return -1600;
+ ret = wc_InitMd5_ex(&md5Copy, HEAP_HINT, devId);
+ if (ret != 0) {
+ wc_Md5Free(&md5);
+ return -1601;
+ }
for (i = 0; i < times; ++i) {
- wc_Md5Update(&md5, (byte*)test_md5[i].input, (word32)test_md5[i].inLen);
- wc_Md5Final(&md5, hash);
+ ret = wc_Md5Update(&md5, (byte*)test_md5[i].input,
+ (word32)test_md5[i].inLen);
+ if (ret != 0)
+ ERROR_OUT(-1602 - i, exit);
+
+ ret = wc_Md5GetHash(&md5, hashcopy);
+ if (ret != 0)
+ ERROR_OUT(-1603 - i, exit);
+
+ ret = wc_Md5Copy(&md5, &md5Copy);
+ if (ret != 0)
+ ERROR_OUT(-1604 - i, exit);
+
+ ret = wc_Md5Final(&md5, hash);
+ if (ret != 0)
+ ERROR_OUT(-1605 - i, exit);
+
+ wc_Md5Free(&md5Copy);
- if (memcmp(hash, test_md5[i].output, MD5_DIGEST_SIZE) != 0)
- return -5 - i;
+ if (XMEMCMP(hash, test_md5[i].output, WC_MD5_DIGEST_SIZE) != 0)
+ ERROR_OUT(-1606 - i, exit);
+
+ if (XMEMCMP(hash, hashcopy, WC_MD5_DIGEST_SIZE) != 0)
+ ERROR_OUT(-1607 - i, exit);
}
- return 0;
+ /* BEGIN LARGE HASH TEST */ {
+ byte large_input[1024];
+ const char* large_digest =
+ "\x44\xd0\x88\xce\xf1\x36\xd1\x78\xe9\xc8\xba\x84\xc3\xfd\xf6\xca";
+
+ for (i = 0; i < (int)sizeof(large_input); i++) {
+ large_input[i] = (byte)(i & 0xFF);
+ }
+ times = 100;
+#ifdef WOLFSSL_PIC32MZ_HASH
+ wc_Md5SizeSet(&md5, times * sizeof(large_input));
+#endif
+ for (i = 0; i < times; ++i) {
+ ret = wc_Md5Update(&md5, (byte*)large_input,
+ (word32)sizeof(large_input));
+ if (ret != 0)
+ ERROR_OUT(-1608, exit);
+ }
+ ret = wc_Md5Final(&md5, hash);
+ if (ret != 0)
+ ERROR_OUT(-1609, exit);
+ if (XMEMCMP(hash, large_digest, WC_MD5_DIGEST_SIZE) != 0)
+ ERROR_OUT(-1610, exit);
+ } /* END LARGE HASH TEST */
+
+exit:
+
+ wc_Md5Free(&md5);
+ wc_Md5Free(&md5Copy);
+
+ return ret;
}
#endif /* NO_MD5 */
@@ -775,45 +1811,45 @@ int md4_test(void)
a.input = "";
a.output = "\x31\xd6\xcf\xe0\xd1\x6a\xe9\x31\xb7\x3c\x59\xd7\xe0\xc0\x89"
"\xc0";
- a.inLen = strlen(a.input);
+ a.inLen = XSTRLEN(a.input);
a.outLen = MD4_DIGEST_SIZE;
b.input = "a";
b.output = "\xbd\xe5\x2c\xb3\x1d\xe3\x3e\x46\x24\x5e\x05\xfb\xdb\xd6\xfb"
"\x24";
- b.inLen = strlen(b.input);
+ b.inLen = XSTRLEN(b.input);
b.outLen = MD4_DIGEST_SIZE;
c.input = "abc";
c.output = "\xa4\x48\x01\x7a\xaf\x21\xd8\x52\x5f\xc1\x0a\xe8\x7a\xa6\x72"
"\x9d";
- c.inLen = strlen(c.input);
+ c.inLen = XSTRLEN(c.input);
c.outLen = MD4_DIGEST_SIZE;
d.input = "message digest";
d.output = "\xd9\x13\x0a\x81\x64\x54\x9f\xe8\x18\x87\x48\x06\xe1\xc7\x01"
"\x4b";
- d.inLen = strlen(d.input);
+ d.inLen = XSTRLEN(d.input);
d.outLen = MD4_DIGEST_SIZE;
e.input = "abcdefghijklmnopqrstuvwxyz";
e.output = "\xd7\x9e\x1c\x30\x8a\xa5\xbb\xcd\xee\xa8\xed\x63\xdf\x41\x2d"
"\xa9";
- e.inLen = strlen(e.input);
+ e.inLen = XSTRLEN(e.input);
e.outLen = MD4_DIGEST_SIZE;
f.input = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345"
"6789";
f.output = "\x04\x3f\x85\x82\xf2\x41\xdb\x35\x1c\xe6\x27\xe1\x53\xe7\xf0"
"\xe4";
- f.inLen = strlen(f.input);
+ f.inLen = XSTRLEN(f.input);
f.outLen = MD4_DIGEST_SIZE;
g.input = "1234567890123456789012345678901234567890123456789012345678"
"9012345678901234567890";
g.output = "\xe3\x3b\x4d\xdc\x9c\x38\xf2\x19\x9c\x3e\x7b\x16\x4f\xcc\x05"
"\x36";
- g.inLen = strlen(g.input);
+ g.inLen = XSTRLEN(g.input);
g.outLen = MD4_DIGEST_SIZE;
test_md4[0] = a;
@@ -830,8 +1866,8 @@ int md4_test(void)
wc_Md4Update(&md4, (byte*)test_md4[i].input, (word32)test_md4[i].inLen);
wc_Md4Final(&md4, hash);
- if (memcmp(hash, test_md4[i].output, MD4_DIGEST_SIZE) != 0)
- return -205 - i;
+ if (XMEMCMP(hash, test_md4[i].output, MD4_DIGEST_SIZE) != 0)
+ return -1700 - i;
}
return 0;
@@ -843,59 +1879,125 @@ int md4_test(void)
int sha_test(void)
{
- Sha sha;
- byte hash[SHA_DIGEST_SIZE];
-
- testVector a, b, c, d;
- testVector test_sha[4];
- int ret;
+ int ret = 0;
+ wc_Sha sha, shaCopy;
+ byte hash[WC_SHA_DIGEST_SIZE];
+ byte hashcopy[WC_SHA_DIGEST_SIZE];
+ testVector a, b, c, d, e;
+ testVector test_sha[5];
int times = sizeof(test_sha) / sizeof(struct testVector), i;
- a.input = "abc";
- a.output = "\xA9\x99\x3E\x36\x47\x06\x81\x6A\xBA\x3E\x25\x71\x78\x50\xC2"
+ a.input = "";
+ a.output = "\xda\x39\xa3\xee\x5e\x6b\x4b\x0d\x32\x55\xbf\xef\x95\x60\x18"
+ "\x90\xaf\xd8\x07\x09";
+ a.inLen = XSTRLEN(a.input);
+ a.outLen = WC_SHA_DIGEST_SIZE;
+
+ b.input = "abc";
+ b.output = "\xA9\x99\x3E\x36\x47\x06\x81\x6A\xBA\x3E\x25\x71\x78\x50\xC2"
"\x6C\x9C\xD0\xD8\x9D";
- a.inLen = strlen(a.input);
- a.outLen = SHA_DIGEST_SIZE;
+ b.inLen = XSTRLEN(b.input);
+ b.outLen = WC_SHA_DIGEST_SIZE;
- b.input = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq";
- b.output = "\x84\x98\x3E\x44\x1C\x3B\xD2\x6E\xBA\xAE\x4A\xA1\xF9\x51\x29"
+ c.input = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq";
+ c.output = "\x84\x98\x3E\x44\x1C\x3B\xD2\x6E\xBA\xAE\x4A\xA1\xF9\x51\x29"
"\xE5\xE5\x46\x70\xF1";
- b.inLen = strlen(b.input);
- b.outLen = SHA_DIGEST_SIZE;
+ c.inLen = XSTRLEN(c.input);
+ c.outLen = WC_SHA_DIGEST_SIZE;
- c.input = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+ d.input = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaa";
- c.output = "\x00\x98\xBA\x82\x4B\x5C\x16\x42\x7B\xD7\xA1\x12\x2A\x5A\x44"
+ d.output = "\x00\x98\xBA\x82\x4B\x5C\x16\x42\x7B\xD7\xA1\x12\x2A\x5A\x44"
"\x2A\x25\xEC\x64\x4D";
- c.inLen = strlen(c.input);
- c.outLen = SHA_DIGEST_SIZE;
+ d.inLen = XSTRLEN(d.input);
+ d.outLen = WC_SHA_DIGEST_SIZE;
- d.input = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+ e.input = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaa";
- d.output = "\xAD\x5B\x3F\xDB\xCB\x52\x67\x78\xC2\x83\x9D\x2F\x15\x1E\xA7"
+ e.output = "\xAD\x5B\x3F\xDB\xCB\x52\x67\x78\xC2\x83\x9D\x2F\x15\x1E\xA7"
"\x53\x99\x5E\x26\xA0";
- d.inLen = strlen(d.input);
- d.outLen = SHA_DIGEST_SIZE;
+ e.inLen = XSTRLEN(e.input);
+ e.outLen = WC_SHA_DIGEST_SIZE;
test_sha[0] = a;
test_sha[1] = b;
test_sha[2] = c;
test_sha[3] = d;
+ test_sha[4] = e;
- ret = wc_InitSha(&sha);
+ ret = wc_InitSha_ex(&sha, HEAP_HINT, devId);
if (ret != 0)
- return -4001;
+ return -1800;
+ ret = wc_InitSha_ex(&shaCopy, HEAP_HINT, devId);
+ if (ret != 0) {
+ wc_ShaFree(&sha);
+ return -1801;
+ }
for (i = 0; i < times; ++i) {
- wc_ShaUpdate(&sha, (byte*)test_sha[i].input, (word32)test_sha[i].inLen);
- wc_ShaFinal(&sha, hash);
+ ret = wc_ShaUpdate(&sha, (byte*)test_sha[i].input,
+ (word32)test_sha[i].inLen);
+ if (ret != 0)
+ ERROR_OUT(-1802 - i, exit);
+ ret = wc_ShaGetHash(&sha, hashcopy);
+ if (ret != 0)
+ ERROR_OUT(-1803 - i, exit);
+ ret = wc_ShaCopy(&sha, &shaCopy);
+ if (ret != 0)
+ ERROR_OUT(-1804 - i, exit);
+ ret = wc_ShaFinal(&sha, hash);
+ if (ret != 0)
+ ERROR_OUT(-1805 - i, exit);
+ wc_ShaFree(&shaCopy);
- if (memcmp(hash, test_sha[i].output, SHA_DIGEST_SIZE) != 0)
- return -10 - i;
+ if (XMEMCMP(hash, test_sha[i].output, WC_SHA_DIGEST_SIZE) != 0)
+ ERROR_OUT(-1806 - i, exit);
+ if (XMEMCMP(hash, hashcopy, WC_SHA_DIGEST_SIZE) != 0)
+ ERROR_OUT(-1807 - i, exit);
}
- return 0;
+ /* BEGIN LARGE HASH TEST */ {
+ byte large_input[1024];
+#ifdef WOLFSSL_RENESAS_TSIP
+ const char* large_digest =
+ "\x1d\x6a\x5a\xf6\xe5\x7c\x86\xce\x7f\x7c\xaf\xd5\xdb\x08\xcd\x59"
+ "\x15\x8c\x6d\xb6";
+#else
+ const char* large_digest =
+ "\x8b\x77\x02\x48\x39\xe8\xdb\xd3\x9a\xf4\x05\x24\x66\x12\x2d\x9e"
+ "\xc5\xd9\x0a\xac";
+#endif
+ for (i = 0; i < (int)sizeof(large_input); i++) {
+ large_input[i] = (byte)(i & 0xFF);
+ }
+#ifdef WOLFSSL_RENESAS_TSIP
+ times = 20;
+#else
+ times = 100;
+#endif
+#ifdef WOLFSSL_PIC32MZ_HASH
+ wc_ShaSizeSet(&sha, times * sizeof(large_input));
+#endif
+ for (i = 0; i < times; ++i) {
+ ret = wc_ShaUpdate(&sha, (byte*)large_input,
+ (word32)sizeof(large_input));
+ if (ret != 0)
+ ERROR_OUT(-1808, exit);
+ }
+ ret = wc_ShaFinal(&sha, hash);
+ if (ret != 0)
+ ERROR_OUT(-1809, exit);
+ if (XMEMCMP(hash, large_digest, WC_SHA_DIGEST_SIZE) != 0)
+ ERROR_OUT(-1810, exit);
+ } /* END LARGE HASH TEST */
+
+exit:
+
+ wc_ShaFree(&sha);
+ wc_ShaFree(&shaCopy);
+
+ return ret;
}
#endif /* NO_SHA */
@@ -904,6 +2006,7 @@ int sha_test(void)
int ripemd_test(void)
{
RipeMd ripemd;
+ int ret;
byte hash[RIPEMD_DIGEST_SIZE];
testVector a, b, c, d;
@@ -913,26 +2016,26 @@ int ripemd_test(void)
a.input = "abc";
a.output = "\x8e\xb2\x08\xf7\xe0\x5d\x98\x7a\x9b\x04\x4a\x8e\x98\xc6"
"\xb0\x87\xf1\x5a\x0b\xfc";
- a.inLen = strlen(a.input);
+ a.inLen = XSTRLEN(a.input);
a.outLen = RIPEMD_DIGEST_SIZE;
b.input = "message digest";
b.output = "\x5d\x06\x89\xef\x49\xd2\xfa\xe5\x72\xb8\x81\xb1\x23\xa8"
"\x5f\xfa\x21\x59\x5f\x36";
- b.inLen = strlen(b.input);
+ b.inLen = XSTRLEN(b.input);
b.outLen = RIPEMD_DIGEST_SIZE;
c.input = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq";
c.output = "\x12\xa0\x53\x38\x4a\x9c\x0c\x88\xe4\x05\xa0\x6c\x27\xdc"
"\xf4\x9a\xda\x62\xeb\x2b";
- c.inLen = strlen(c.input);
+ c.inLen = XSTRLEN(c.input);
c.outLen = RIPEMD_DIGEST_SIZE;
d.input = "12345678901234567890123456789012345678901234567890123456"
"789012345678901234567890";
d.output = "\x9b\x75\x2e\x45\x57\x3d\x4b\x39\xf4\xdb\xd3\x32\x3c\xab"
"\x82\xbf\x63\x32\x6b\xfb";
- d.inLen = strlen(d.input);
+ d.inLen = XSTRLEN(d.input);
d.outLen = RIPEMD_DIGEST_SIZE;
test_ripemd[0] = a;
@@ -940,15 +2043,25 @@ int ripemd_test(void)
test_ripemd[2] = c;
test_ripemd[3] = d;
- wc_InitRipeMd(&ripemd);
+ ret = wc_InitRipeMd(&ripemd);
+ if (ret != 0) {
+ return -1900;
+ }
for (i = 0; i < times; ++i) {
- wc_RipeMdUpdate(&ripemd, (byte*)test_ripemd[i].input,
- (word32)test_ripemd[i].inLen);
- wc_RipeMdFinal(&ripemd, hash);
+ ret = wc_RipeMdUpdate(&ripemd, (byte*)test_ripemd[i].input,
+ (word32)test_ripemd[i].inLen);
+ if (ret != 0) {
+ return -1901 - i;
+ }
+
+ ret = wc_RipeMdFinal(&ripemd, hash);
+ if (ret != 0) {
+ return -1911 - i;
+ }
- if (memcmp(hash, test_ripemd[i].output, RIPEMD_DIGEST_SIZE) != 0)
- return -10 - i;
+ if (XMEMCMP(hash, test_ripemd[i].output, RIPEMD_DIGEST_SIZE) != 0)
+ return -1921 - i;
}
return 0;
@@ -959,9 +2072,9 @@ int ripemd_test(void)
#ifdef HAVE_BLAKE2
-#define BLAKE2_TESTS 3
+#define BLAKE2B_TESTS 3
-static const byte blake2b_vec[BLAKE2_TESTS][BLAKE2B_OUTBYTES] =
+static const byte blake2b_vec[BLAKE2B_TESTS][BLAKE2B_OUTBYTES] =
{
{
0x78, 0x6A, 0x02, 0xF7, 0x42, 0x01, 0x59, 0x03,
@@ -1007,21 +2120,21 @@ int blake2b_test(void)
for (i = 0; i < (int)sizeof(input); i++)
input[i] = (byte)i;
- for (i = 0; i < BLAKE2_TESTS; i++) {
+ for (i = 0; i < BLAKE2B_TESTS; i++) {
ret = wc_InitBlake2b(&b2b, 64);
if (ret != 0)
- return -4002;
+ return -2000 - i;
ret = wc_Blake2bUpdate(&b2b, input, i);
if (ret != 0)
- return -4003;
+ return -2010 - 1;
ret = wc_Blake2bFinal(&b2b, digest, 64);
if (ret != 0)
- return -4004;
+ return -2020 - i;
- if (memcmp(digest, blake2b_vec[i], 64) != 0) {
- return -300 - i;
+ if (XMEMCMP(digest, blake2b_vec[i], 64) != 0) {
+ return -2030 - i;
}
}
@@ -1029,52 +2142,252 @@ int blake2b_test(void)
}
#endif /* HAVE_BLAKE2 */
+#ifdef HAVE_BLAKE2S
+
+
+#define BLAKE2S_TESTS 3
+
+static const byte blake2s_vec[BLAKE2S_TESTS][BLAKE2S_OUTBYTES] =
+{
+ {
+ 0x69, 0x21, 0x7a, 0x30, 0x79, 0x90, 0x80, 0x94,
+ 0xe1, 0x11, 0x21, 0xd0, 0x42, 0x35, 0x4a, 0x7c,
+ 0x1f, 0x55, 0xb6, 0x48, 0x2c, 0xa1, 0xa5, 0x1e,
+ 0x1b, 0x25, 0x0d, 0xfd, 0x1e, 0xd0, 0xee, 0xf9,
+ },
+ {
+ 0xe3, 0x4d, 0x74, 0xdb, 0xaf, 0x4f, 0xf4, 0xc6,
+ 0xab, 0xd8, 0x71, 0xcc, 0x22, 0x04, 0x51, 0xd2,
+ 0xea, 0x26, 0x48, 0x84, 0x6c, 0x77, 0x57, 0xfb,
+ 0xaa, 0xc8, 0x2f, 0xe5, 0x1a, 0xd6, 0x4b, 0xea,
+ },
+ {
+ 0xdd, 0xad, 0x9a, 0xb1, 0x5d, 0xac, 0x45, 0x49,
+ 0xba, 0x42, 0xf4, 0x9d, 0x26, 0x24, 0x96, 0xbe,
+ 0xf6, 0xc0, 0xba, 0xe1, 0xdd, 0x34, 0x2a, 0x88,
+ 0x08, 0xf8, 0xea, 0x26, 0x7c, 0x6e, 0x21, 0x0c,
+ }
+};
+
+
+
+int blake2s_test(void)
+{
+ Blake2s b2s;
+ byte digest[32];
+ byte input[64];
+ int i, ret;
+
+ for (i = 0; i < (int)sizeof(input); i++)
+ input[i] = (byte)i;
+
+ for (i = 0; i < BLAKE2S_TESTS; i++) {
+ ret = wc_InitBlake2s(&b2s, 32);
+ if (ret != 0)
+ return -2100 - i;
+
+ ret = wc_Blake2sUpdate(&b2s, input, i);
+ if (ret != 0)
+ return -2110 - 1;
+
+ ret = wc_Blake2sFinal(&b2s, digest, 32);
+ if (ret != 0)
+ return -2120 - i;
+
+ if (XMEMCMP(digest, blake2s_vec[i], 32) != 0) {
+ return -2130 - i;
+ }
+ }
+
+ return 0;
+}
+#endif /* HAVE_BLAKE2S */
+
+
+#ifdef WOLFSSL_SHA224
+int sha224_test(void)
+{
+ wc_Sha224 sha, shaCopy;
+ byte hash[WC_SHA224_DIGEST_SIZE];
+ byte hashcopy[WC_SHA224_DIGEST_SIZE];
+ int ret = 0;
+
+ testVector a, b, c;
+ testVector test_sha[3];
+ int times = sizeof(test_sha) / sizeof(struct testVector), i;
+
+ a.input = "";
+ a.output = "\xd1\x4a\x02\x8c\x2a\x3a\x2b\xc9\x47\x61\x02\xbb\x28\x82\x34"
+ "\xc4\x15\xa2\xb0\x1f\x82\x8e\xa6\x2a\xc5\xb3\xe4\x2f";
+ a.inLen = XSTRLEN(a.input);
+ a.outLen = WC_SHA224_DIGEST_SIZE;
+
+ b.input = "abc";
+ b.output = "\x23\x09\x7d\x22\x34\x05\xd8\x22\x86\x42\xa4\x77\xbd\xa2\x55"
+ "\xb3\x2a\xad\xbc\xe4\xbd\xa0\xb3\xf7\xe3\x6c\x9d\xa7";
+ b.inLen = XSTRLEN(b.input);
+ b.outLen = WC_SHA224_DIGEST_SIZE;
+
+ c.input = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq";
+ c.output = "\x75\x38\x8b\x16\x51\x27\x76\xcc\x5d\xba\x5d\xa1\xfd\x89\x01"
+ "\x50\xb0\xc6\x45\x5c\xb4\xf5\x8b\x19\x52\x52\x25\x25";
+ c.inLen = XSTRLEN(c.input);
+ c.outLen = WC_SHA224_DIGEST_SIZE;
+
+ test_sha[0] = a;
+ test_sha[1] = b;
+ test_sha[2] = c;
+
+ ret = wc_InitSha224_ex(&sha, HEAP_HINT, devId);
+ if (ret != 0)
+ return -2200;
+ ret = wc_InitSha224_ex(&shaCopy, HEAP_HINT, devId);
+ if (ret != 0) {
+ wc_Sha224Free(&sha);
+ return -2201;
+ }
+
+ for (i = 0; i < times; ++i) {
+ ret = wc_Sha224Update(&sha, (byte*)test_sha[i].input,
+ (word32)test_sha[i].inLen);
+ if (ret != 0)
+ ERROR_OUT(-2202 - i, exit);
+ ret = wc_Sha224GetHash(&sha, hashcopy);
+ if (ret != 0)
+ ERROR_OUT(-2203 - i, exit);
+ ret = wc_Sha224Copy(&sha, &shaCopy);
+ if (ret != 0)
+ ERROR_OUT(-2204 - i, exit);
+ ret = wc_Sha224Final(&sha, hash);
+ if (ret != 0)
+ ERROR_OUT(-2205 - i, exit);
+ wc_Sha224Free(&shaCopy);
+
+ if (XMEMCMP(hash, test_sha[i].output, WC_SHA224_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2206 - i, exit);
+ if (XMEMCMP(hash, hashcopy, WC_SHA224_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2207 - i, exit);
+ }
+
+exit:
+ wc_Sha224Free(&sha);
+ wc_Sha224Free(&shaCopy);
+
+ return ret;
+}
+#endif
+
#ifndef NO_SHA256
int sha256_test(void)
{
- Sha256 sha;
- byte hash[SHA256_DIGEST_SIZE];
+ wc_Sha256 sha, shaCopy;
+ byte hash[WC_SHA256_DIGEST_SIZE];
+ byte hashcopy[WC_SHA256_DIGEST_SIZE];
+ int ret = 0;
- testVector a, b;
- testVector test_sha[2];
- int ret;
+ testVector a, b, c;
+ testVector test_sha[3];
int times = sizeof(test_sha) / sizeof(struct testVector), i;
- a.input = "abc";
- a.output = "\xBA\x78\x16\xBF\x8F\x01\xCF\xEA\x41\x41\x40\xDE\x5D\xAE\x22"
+ a.input = "";
+ a.output = "\xe3\xb0\xc4\x42\x98\xfc\x1c\x14\x9a\xfb\xf4\xc8\x99\x6f\xb9"
+ "\x24\x27\xae\x41\xe4\x64\x9b\x93\x4c\xa4\x95\x99\x1b\x78\x52"
+ "\xb8\x55";
+ a.inLen = XSTRLEN(a.input);
+ a.outLen = WC_SHA256_DIGEST_SIZE;
+
+ b.input = "abc";
+ b.output = "\xBA\x78\x16\xBF\x8F\x01\xCF\xEA\x41\x41\x40\xDE\x5D\xAE\x22"
"\x23\xB0\x03\x61\xA3\x96\x17\x7A\x9C\xB4\x10\xFF\x61\xF2\x00"
"\x15\xAD";
- a.inLen = strlen(a.input);
- a.outLen = SHA256_DIGEST_SIZE;
+ b.inLen = XSTRLEN(b.input);
+ b.outLen = WC_SHA256_DIGEST_SIZE;
- b.input = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq";
- b.output = "\x24\x8D\x6A\x61\xD2\x06\x38\xB8\xE5\xC0\x26\x93\x0C\x3E\x60"
+ c.input = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq";
+ c.output = "\x24\x8D\x6A\x61\xD2\x06\x38\xB8\xE5\xC0\x26\x93\x0C\x3E\x60"
"\x39\xA3\x3C\xE4\x59\x64\xFF\x21\x67\xF6\xEC\xED\xD4\x19\xDB"
"\x06\xC1";
- b.inLen = strlen(b.input);
- b.outLen = SHA256_DIGEST_SIZE;
+ c.inLen = XSTRLEN(c.input);
+ c.outLen = WC_SHA256_DIGEST_SIZE;
test_sha[0] = a;
test_sha[1] = b;
+ test_sha[2] = c;
- ret = wc_InitSha256(&sha);
+ ret = wc_InitSha256_ex(&sha, HEAP_HINT, devId);
if (ret != 0)
- return -4005;
+ return -2300;
+ ret = wc_InitSha256_ex(&shaCopy, HEAP_HINT, devId);
+ if (ret != 0) {
+ wc_Sha256Free(&sha);
+ return -2301;
+ }
for (i = 0; i < times; ++i) {
- ret = wc_Sha256Update(&sha, (byte*)test_sha[i].input,(word32)test_sha[i].inLen);
+ ret = wc_Sha256Update(&sha, (byte*)test_sha[i].input,
+ (word32)test_sha[i].inLen);
+ if (ret != 0) {
+ ERROR_OUT(-2302 - i, exit);
+ }
+ ret = wc_Sha256GetHash(&sha, hashcopy);
if (ret != 0)
- return -4006;
+ ERROR_OUT(-2303 - i, exit);
+ ret = wc_Sha256Copy(&sha, &shaCopy);
+ if (ret != 0)
+ ERROR_OUT(-2304 - i, exit);
ret = wc_Sha256Final(&sha, hash);
if (ret != 0)
- return -4007;
+ ERROR_OUT(-2305 - i, exit);
+ wc_Sha256Free(&shaCopy);
- if (memcmp(hash, test_sha[i].output, SHA256_DIGEST_SIZE) != 0)
- return -10 - i;
+ if (XMEMCMP(hash, test_sha[i].output, WC_SHA256_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2306 - i, exit);
+ if (XMEMCMP(hash, hashcopy, WC_SHA256_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2307 - i, exit);
}
- return 0;
+ /* BEGIN LARGE HASH TEST */ {
+ byte large_input[1024];
+#ifdef WOLFSSL_RENESAS_TSIP_CRYPT
+ const char* large_digest =
+ "\xa4\x75\x9e\x7a\xa2\x03\x38\x32\x88\x66\xa2\xea\x17\xea\xf8\xc7"
+ "\xfe\x4e\xc6\xbb\xe3\xbb\x71\xce\xe7\xdf\x7c\x04\x61\xb3\xc2\x2f";
+#else
+ const char* large_digest =
+ "\x27\x78\x3e\x87\x96\x3a\x4e\xfb\x68\x29\xb5\x31\xc9\xba\x57\xb4"
+ "\x4f\x45\x79\x7f\x67\x70\xbd\x63\x7f\xbf\x0d\x80\x7c\xbd\xba\xe0";
+#endif
+ for (i = 0; i < (int)sizeof(large_input); i++) {
+ large_input[i] = (byte)(i & 0xFF);
+ }
+#ifdef WOLFSSL_RENESAS_TSIP
+ times = 20;
+#else
+ times = 100;
+#endif
+#ifdef WOLFSSL_PIC32MZ_HASH
+ wc_Sha256SizeSet(&sha, times * sizeof(large_input));
+#endif
+ for (i = 0; i < times; ++i) {
+ ret = wc_Sha256Update(&sha, (byte*)large_input,
+ (word32)sizeof(large_input));
+ if (ret != 0)
+ ERROR_OUT(-2308, exit);
+ }
+ ret = wc_Sha256Final(&sha, hash);
+ if (ret != 0)
+ ERROR_OUT(-2309, exit);
+ if (XMEMCMP(hash, large_digest, WC_SHA256_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2310, exit);
+ } /* END LARGE HASH TEST */
+
+exit:
+
+ wc_Sha256Free(&sha);
+ wc_Sha256Free(&shaCopy);
+
+ return ret;
}
#endif
@@ -1082,54 +2395,108 @@ int sha256_test(void)
#ifdef WOLFSSL_SHA512
int sha512_test(void)
{
- Sha512 sha;
- byte hash[SHA512_DIGEST_SIZE];
- int ret;
+ wc_Sha512 sha, shaCopy;
+ byte hash[WC_SHA512_DIGEST_SIZE];
+ byte hashcopy[WC_SHA512_DIGEST_SIZE];
+ int ret = 0;
- testVector a, b;
- testVector test_sha[2];
+ testVector a, b, c;
+ testVector test_sha[3];
int times = sizeof(test_sha) / sizeof(struct testVector), i;
- a.input = "abc";
- a.output = "\xdd\xaf\x35\xa1\x93\x61\x7a\xba\xcc\x41\x73\x49\xae\x20\x41"
+ a.input = "";
+ a.output = "\xcf\x83\xe1\x35\x7e\xef\xb8\xbd\xf1\x54\x28\x50\xd6\x6d\x80"
+ "\x07\xd6\x20\xe4\x05\x0b\x57\x15\xdc\x83\xf4\xa9\x21\xd3\x6c"
+ "\xe9\xce\x47\xd0\xd1\x3c\x5d\x85\xf2\xb0\xff\x83\x18\xd2\x87"
+ "\x7e\xec\x2f\x63\xb9\x31\xbd\x47\x41\x7a\x81\xa5\x38\x32\x7a"
+ "\xf9\x27\xda\x3e";
+ a.inLen = XSTRLEN(a.input);
+ a.outLen = WC_SHA512_DIGEST_SIZE;
+
+ b.input = "abc";
+ b.output = "\xdd\xaf\x35\xa1\x93\x61\x7a\xba\xcc\x41\x73\x49\xae\x20\x41"
"\x31\x12\xe6\xfa\x4e\x89\xa9\x7e\xa2\x0a\x9e\xee\xe6\x4b\x55"
"\xd3\x9a\x21\x92\x99\x2a\x27\x4f\xc1\xa8\x36\xba\x3c\x23\xa3"
"\xfe\xeb\xbd\x45\x4d\x44\x23\x64\x3c\xe8\x0e\x2a\x9a\xc9\x4f"
"\xa5\x4c\xa4\x9f";
- a.inLen = strlen(a.input);
- a.outLen = SHA512_DIGEST_SIZE;
+ b.inLen = XSTRLEN(b.input);
+ b.outLen = WC_SHA512_DIGEST_SIZE;
- b.input = "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhi"
+ c.input = "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhi"
"jklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu";
- b.output = "\x8e\x95\x9b\x75\xda\xe3\x13\xda\x8c\xf4\xf7\x28\x14\xfc\x14"
+ c.output = "\x8e\x95\x9b\x75\xda\xe3\x13\xda\x8c\xf4\xf7\x28\x14\xfc\x14"
"\x3f\x8f\x77\x79\xc6\xeb\x9f\x7f\xa1\x72\x99\xae\xad\xb6\x88"
"\x90\x18\x50\x1d\x28\x9e\x49\x00\xf7\xe4\x33\x1b\x99\xde\xc4"
"\xb5\x43\x3a\xc7\xd3\x29\xee\xb6\xdd\x26\x54\x5e\x96\xe5\x5b"
"\x87\x4b\xe9\x09";
- b.inLen = strlen(b.input);
- b.outLen = SHA512_DIGEST_SIZE;
+ c.inLen = XSTRLEN(c.input);
+ c.outLen = WC_SHA512_DIGEST_SIZE;
test_sha[0] = a;
test_sha[1] = b;
+ test_sha[2] = c;
- ret = wc_InitSha512(&sha);
+ ret = wc_InitSha512_ex(&sha, HEAP_HINT, devId);
if (ret != 0)
- return -4009;
+ return -2400;
+ ret = wc_InitSha512_ex(&shaCopy, HEAP_HINT, devId);
+ if (ret != 0) {
+ wc_Sha512Free(&sha);
+ return -2401;
+ }
for (i = 0; i < times; ++i) {
- ret = wc_Sha512Update(&sha, (byte*)test_sha[i].input,(word32)test_sha[i].inLen);
+ ret = wc_Sha512Update(&sha, (byte*)test_sha[i].input,
+ (word32)test_sha[i].inLen);
if (ret != 0)
- return -4010;
-
+ ERROR_OUT(-2402 - i, exit);
+ ret = wc_Sha512GetHash(&sha, hashcopy);
+ if (ret != 0)
+ ERROR_OUT(-2403 - i, exit);
+ ret = wc_Sha512Copy(&sha, &shaCopy);
+ if (ret != 0)
+ ERROR_OUT(-2404 - i, exit);
ret = wc_Sha512Final(&sha, hash);
if (ret != 0)
- return -4011;
+ ERROR_OUT(-2405 - i, exit);
+ wc_Sha512Free(&shaCopy);
- if (memcmp(hash, test_sha[i].output, SHA512_DIGEST_SIZE) != 0)
- return -10 - i;
+ if (XMEMCMP(hash, test_sha[i].output, WC_SHA512_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2406 - i, exit);
+ if (XMEMCMP(hash, hashcopy, WC_SHA512_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2407 - i, exit);
}
- return 0;
+ /* BEGIN LARGE HASH TEST */ {
+ byte large_input[1024];
+ const char* large_digest =
+ "\x5a\x1f\x73\x90\xbd\x8c\xe4\x63\x54\xce\xa0\x9b\xef\x32\x78\x2d"
+ "\x2e\xe7\x0d\x5e\x2f\x9d\x15\x1b\xdd\x2d\xde\x65\x0c\x7b\xfa\x83"
+ "\x5e\x80\x02\x13\x84\xb8\x3f\xff\x71\x62\xb5\x09\x89\x63\xe1\xdc"
+ "\xa5\xdc\xfc\xfa\x9d\x1a\x4d\xc0\xfa\x3a\x14\xf6\x01\x51\x90\xa4";
+
+ for (i = 0; i < (int)sizeof(large_input); i++) {
+ large_input[i] = (byte)(i & 0xFF);
+ }
+ times = 100;
+ for (i = 0; i < times; ++i) {
+ ret = wc_Sha512Update(&sha, (byte*)large_input,
+ (word32)sizeof(large_input));
+ if (ret != 0)
+ ERROR_OUT(-2408, exit);
+ }
+ ret = wc_Sha512Final(&sha, hash);
+ if (ret != 0)
+ ERROR_OUT(-2409, exit);
+ if (XMEMCMP(hash, large_digest, WC_SHA512_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2410, exit);
+ } /* END LARGE HASH TEST */
+
+exit:
+ wc_Sha512Free(&sha);
+ wc_Sha512Free(&shaCopy);
+
+ return ret;
}
#endif
@@ -1137,61 +2504,971 @@ int sha512_test(void)
#ifdef WOLFSSL_SHA384
int sha384_test(void)
{
- Sha384 sha;
- byte hash[SHA384_DIGEST_SIZE];
- int ret;
+ wc_Sha384 sha, shaCopy;
+ byte hash[WC_SHA384_DIGEST_SIZE];
+ byte hashcopy[WC_SHA384_DIGEST_SIZE];
+ int ret = 0;
- testVector a, b;
- testVector test_sha[2];
+ testVector a, b, c;
+ testVector test_sha[3];
int times = sizeof(test_sha) / sizeof(struct testVector), i;
- a.input = "abc";
- a.output = "\xcb\x00\x75\x3f\x45\xa3\x5e\x8b\xb5\xa0\x3d\x69\x9a\xc6\x50"
+ a.input = "";
+
+ a.output = "\x38\xb0\x60\xa7\x51\xac\x96\x38\x4c\xd9\x32\x7e\xb1\xb1\xe3"
+ "\x6a\x21\xfd\xb7\x11\x14\xbe\x07\x43\x4c\x0c\xc7\xbf\x63\xf6"
+ "\xe1\xda\x27\x4e\xde\xbf\xe7\x6f\x65\xfb\xd5\x1a\xd2\xf1\x48"
+ "\x98\xb9\x5b";
+ a.inLen = XSTRLEN(a.input);
+ a.outLen = WC_SHA384_DIGEST_SIZE;
+
+ b.input = "abc";
+ b.output = "\xcb\x00\x75\x3f\x45\xa3\x5e\x8b\xb5\xa0\x3d\x69\x9a\xc6\x50"
"\x07\x27\x2c\x32\xab\x0e\xde\xd1\x63\x1a\x8b\x60\x5a\x43\xff"
"\x5b\xed\x80\x86\x07\x2b\xa1\xe7\xcc\x23\x58\xba\xec\xa1\x34"
"\xc8\x25\xa7";
- a.inLen = strlen(a.input);
- a.outLen = SHA384_DIGEST_SIZE;
+ b.inLen = XSTRLEN(b.input);
+ b.outLen = WC_SHA384_DIGEST_SIZE;
- b.input = "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhi"
+ c.input = "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhi"
"jklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu";
- b.output = "\x09\x33\x0c\x33\xf7\x11\x47\xe8\x3d\x19\x2f\xc7\x82\xcd\x1b"
+ c.output = "\x09\x33\x0c\x33\xf7\x11\x47\xe8\x3d\x19\x2f\xc7\x82\xcd\x1b"
"\x47\x53\x11\x1b\x17\x3b\x3b\x05\xd2\x2f\xa0\x80\x86\xe3\xb0"
"\xf7\x12\xfc\xc7\xc7\x1a\x55\x7e\x2d\xb9\x66\xc3\xe9\xfa\x91"
"\x74\x60\x39";
- b.inLen = strlen(b.input);
- b.outLen = SHA384_DIGEST_SIZE;
+ c.inLen = XSTRLEN(c.input);
+ c.outLen = WC_SHA384_DIGEST_SIZE;
test_sha[0] = a;
test_sha[1] = b;
+ test_sha[2] = c;
- ret = wc_InitSha384(&sha);
+ ret = wc_InitSha384_ex(&sha, HEAP_HINT, devId);
if (ret != 0)
- return -4012;
+ return -2500;
+ ret = wc_InitSha384_ex(&shaCopy, HEAP_HINT, devId);
+ if (ret != 0) {
+ wc_Sha384Free(&sha);
+ return -2501;
+ }
for (i = 0; i < times; ++i) {
- ret = wc_Sha384Update(&sha, (byte*)test_sha[i].input,(word32)test_sha[i].inLen);
+ ret = wc_Sha384Update(&sha, (byte*)test_sha[i].input,
+ (word32)test_sha[i].inLen);
if (ret != 0)
- return -4013;
-
+ ERROR_OUT(-2502 - i, exit);
+ ret = wc_Sha384GetHash(&sha, hashcopy);
+ if (ret != 0)
+ ERROR_OUT(-2503 - i, exit);
+ ret = wc_Sha384Copy(&sha, &shaCopy);
+ if (ret != 0)
+ ERROR_OUT(-2504 - i, exit);
ret = wc_Sha384Final(&sha, hash);
if (ret != 0)
- return -4014;
+ ERROR_OUT(-2505 - i, exit);
+ wc_Sha384Free(&shaCopy);
- if (memcmp(hash, test_sha[i].output, SHA384_DIGEST_SIZE) != 0)
- return -10 - i;
+ if (XMEMCMP(hash, test_sha[i].output, WC_SHA384_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2506 - i, exit);
+ if (XMEMCMP(hash, hashcopy, WC_SHA384_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2507 - i, exit);
}
- return 0;
+ /* BEGIN LARGE HASH TEST */ {
+ byte large_input[1024];
+ const char* large_digest =
+ "\x37\x01\xdb\xff\x1e\x40\x4f\xe1\xe2\xea\x0b\x40\xbb\x3b\x39\x9a"
+ "\xcc\xe8\x44\x8e\x7e\xe5\x64\xb5\x6b\x7f\x56\x64\xa7\x2b\x84\xe3"
+ "\xc5\xd7\x79\x03\x25\x90\xf7\xa4\x58\xcb\x97\xa8\x8b\xb1\xa4\x81";
+
+ for (i = 0; i < (int)sizeof(large_input); i++) {
+ large_input[i] = (byte)(i & 0xFF);
+ }
+ times = 100;
+ for (i = 0; i < times; ++i) {
+ ret = wc_Sha384Update(&sha, (byte*)large_input,
+ (word32)sizeof(large_input));
+ if (ret != 0)
+ ERROR_OUT(-2508, exit);
+ }
+ ret = wc_Sha384Final(&sha, hash);
+ if (ret != 0)
+ ERROR_OUT(-2509, exit);
+ if (XMEMCMP(hash, large_digest, WC_SHA384_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2510, exit);
+ } /* END LARGE HASH TEST */
+
+exit:
+
+ wc_Sha384Free(&sha);
+ wc_Sha384Free(&shaCopy);
+
+ return ret;
}
#endif /* WOLFSSL_SHA384 */
+#ifdef WOLFSSL_SHA3
+#ifndef WOLFSSL_NOSHA3_224
+static int sha3_224_test(void)
+{
+ wc_Sha3 sha;
+ byte hash[WC_SHA3_224_DIGEST_SIZE];
+ byte hashcopy[WC_SHA3_224_DIGEST_SIZE];
+
+ testVector a, b, c;
+ testVector test_sha[3];
+ int ret = 0;
+ int times = sizeof(test_sha) / sizeof(struct testVector), i;
+
+ a.input = "";
+ a.output = "\x6b\x4e\x03\x42\x36\x67\xdb\xb7\x3b\x6e\x15\x45\x4f\x0e\xb1"
+ "\xab\xd4\x59\x7f\x9a\x1b\x07\x8e\x3f\x5b\x5a\x6b\xc7";
+ a.inLen = XSTRLEN(a.input);
+ a.outLen = WC_SHA3_224_DIGEST_SIZE;
+
+ b.input = "abc";
+ b.output = "\xe6\x42\x82\x4c\x3f\x8c\xf2\x4a\xd0\x92\x34\xee\x7d\x3c\x76"
+ "\x6f\xc9\xa3\xa5\x16\x8d\x0c\x94\xad\x73\xb4\x6f\xdf";
+ b.inLen = XSTRLEN(b.input);
+ b.outLen = WC_SHA3_224_DIGEST_SIZE;
+
+ c.input = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq";
+ c.output = "\x8a\x24\x10\x8b\x15\x4a\xda\x21\xc9\xfd\x55\x74\x49\x44\x79"
+ "\xba\x5c\x7e\x7a\xb7\x6e\xf2\x64\xea\xd0\xfc\xce\x33";
+ c.inLen = XSTRLEN(c.input);
+ c.outLen = WC_SHA3_224_DIGEST_SIZE;
+
+ test_sha[0] = a;
+ test_sha[1] = b;
+ test_sha[2] = c;
+
+ ret = wc_InitSha3_224(&sha, HEAP_HINT, devId);
+ if (ret != 0)
+ return -2600;
+
+ for (i = 0; i < times; ++i) {
+ ret = wc_Sha3_224_Update(&sha, (byte*)test_sha[i].input,
+ (word32)test_sha[i].inLen);
+ if (ret != 0)
+ ERROR_OUT(-2601 - i, exit);
+ ret = wc_Sha3_224_GetHash(&sha, hashcopy);
+ if (ret != 0)
+ ERROR_OUT(-2602 - i, exit);
+ ret = wc_Sha3_224_Final(&sha, hash);
+ if (ret != 0)
+ ERROR_OUT(-2603 - i, exit);
+
+ if (XMEMCMP(hash, test_sha[i].output, WC_SHA3_224_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2604 - i, exit);
+ if (XMEMCMP(hash, hashcopy, WC_SHA3_224_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2605 - i, exit);
+ }
+
+ /* BEGIN LARGE HASH TEST */ {
+ byte large_input[1024];
+ const char* large_digest =
+ "\x13\xe5\xd3\x98\x7b\x94\xda\x41\x12\xc7\x1e\x92\x3a\x19"
+ "\x21\x20\x86\x6f\x24\xbf\x0a\x31\xbc\xfd\xd6\x70\x36\xf3";
+
+ for (i = 0; i < (int)sizeof(large_input); i++) {
+ large_input[i] = (byte)(i & 0xFF);
+ }
+ times = 100;
+ for (i = 0; i < times; ++i) {
+ ret = wc_Sha3_224_Update(&sha, (byte*)large_input,
+ (word32)sizeof(large_input));
+ if (ret != 0)
+ ERROR_OUT(-2606, exit);
+ }
+ ret = wc_Sha3_224_Final(&sha, hash);
+ if (ret != 0)
+ ERROR_OUT(-2607, exit);
+ if (XMEMCMP(hash, large_digest, WC_SHA3_224_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2608, exit);
+ } /* END LARGE HASH TEST */
+
+exit:
+ wc_Sha3_224_Free(&sha);
+
+ return ret;
+}
+#endif /* WOLFSSL_NOSHA3_224 */
+
+#ifndef WOLFSSL_NOSHA3_256
+static int sha3_256_test(void)
+{
+ wc_Sha3 sha;
+ byte hash[WC_SHA3_256_DIGEST_SIZE];
+ byte hashcopy[WC_SHA3_256_DIGEST_SIZE];
+
+ testVector a, b, c;
+ testVector test_sha[3];
+ int ret = 0;
+ int times = sizeof(test_sha) / sizeof(struct testVector), i;
+
+ byte large_input[1024];
+ const char* large_digest =
+ "\xdc\x90\xc0\xb1\x25\xdb\x2c\x34\x81\xa3\xff\xbc\x1e\x2e\x87\xeb"
+ "\x6d\x70\x85\x61\xe0\xe9\x63\x61\xff\xe5\x84\x4b\x1f\x68\x05\x15";
+
+#if defined(WOLFSSL_HASH_FLAGS) && !defined(WOLFSSL_ASYNC_CRYPT)
+ /* test vector with hash of empty string */
+ const char* Keccak256EmptyOut =
+ "\xc5\xd2\x46\x01\x86\xf7\x23\x3c\x92\x7e\x7d\xb2\xdc\xc7\x03\xc0"
+ "\xe5\x00\xb6\x53\xca\x82\x27\x3b\x7b\xfa\xd8\x04\x5d\x85\xa4\x70";
+#endif
+
+ a.input = "";
+ a.output = "\xa7\xff\xc6\xf8\xbf\x1e\xd7\x66\x51\xc1\x47\x56\xa0\x61\xd6"
+ "\x62\xf5\x80\xff\x4d\xe4\x3b\x49\xfa\x82\xd8\x0a\x4b\x80\xf8"
+ "\x43\x4a";
+ a.inLen = XSTRLEN(a.input);
+ a.outLen = WC_SHA3_256_DIGEST_SIZE;
+
+ b.input = "abc";
+ b.output = "\x3a\x98\x5d\xa7\x4f\xe2\x25\xb2\x04\x5c\x17\x2d\x6b\xd3\x90"
+ "\xbd\x85\x5f\x08\x6e\x3e\x9d\x52\x5b\x46\xbf\xe2\x45\x11\x43"
+ "\x15\x32";
+ b.inLen = XSTRLEN(b.input);
+ b.outLen = WC_SHA3_256_DIGEST_SIZE;
+
+ c.input = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq";
+ c.output = "\x41\xc0\xdb\xa2\xa9\xd6\x24\x08\x49\x10\x03\x76\xa8\x23\x5e"
+ "\x2c\x82\xe1\xb9\x99\x8a\x99\x9e\x21\xdb\x32\xdd\x97\x49\x6d"
+ "\x33\x76";
+ c.inLen = XSTRLEN(c.input);
+ c.outLen = WC_SHA3_256_DIGEST_SIZE;
+
+ test_sha[0] = a;
+ test_sha[1] = b;
+ test_sha[2] = c;
+
+ ret = wc_InitSha3_256(&sha, HEAP_HINT, devId);
+ if (ret != 0)
+ return -2700;
+
+ for (i = 0; i < times; ++i) {
+ ret = wc_Sha3_256_Update(&sha, (byte*)test_sha[i].input,
+ (word32)test_sha[i].inLen);
+ if (ret != 0)
+ ERROR_OUT(-2701 - i, exit);
+ ret = wc_Sha3_256_GetHash(&sha, hashcopy);
+ if (ret != 0)
+ ERROR_OUT(-2702 - i, exit);
+ ret = wc_Sha3_256_Final(&sha, hash);
+ if (ret != 0)
+ ERROR_OUT(-2703 - i, exit);
+
+ if (XMEMCMP(hash, test_sha[i].output, WC_SHA3_256_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2704 - i, exit);
+ if (XMEMCMP(hash, hashcopy, WC_SHA3_256_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2705 - i, exit);
+ }
+
+ /* BEGIN LARGE HASH TEST */ {
+ for (i = 0; i < (int)sizeof(large_input); i++) {
+ large_input[i] = (byte)(i & 0xFF);
+ }
+ times = 100;
+ for (i = 0; i < times; ++i) {
+ ret = wc_Sha3_256_Update(&sha, (byte*)large_input,
+ (word32)sizeof(large_input));
+ if (ret != 0)
+ ERROR_OUT(-2706, exit);
+ }
+ ret = wc_Sha3_256_Final(&sha, hash);
+ if (ret != 0)
+ ERROR_OUT(-2707, exit);
+ if (XMEMCMP(hash, large_digest, WC_SHA3_256_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2708, exit);
+ } /* END LARGE HASH TEST */
+
+ /* this is a software only variant of SHA3 not supported by external hardware devices */
+#if defined(WOLFSSL_HASH_FLAGS) && !defined(WOLFSSL_ASYNC_CRYPT)
+ /* Test for Keccak256 */
+ ret = wc_Sha3_SetFlags(&sha, WC_HASH_SHA3_KECCAK256);
+ if (ret != 0) {
+ ERROR_OUT(-2709, exit);
+ }
+ ret = wc_Sha3_256_Update(&sha, (byte*)"", 0);
+ if (ret != 0) {
+ ERROR_OUT(-2710, exit);
+ }
+ ret = wc_Sha3_256_Final(&sha, hash);
+ if (ret != 0) {
+ ERROR_OUT(-2711, exit);
+ }
+ if (XMEMCMP(hash, Keccak256EmptyOut, WC_SHA3_256_DIGEST_SIZE) != 0) {
+ ERROR_OUT(-2712, exit);
+ }
+#endif /* WOLFSSL_HASH_FLAGS && !WOLFSSL_ASYNC_CRYPT */
+
+exit:
+ wc_Sha3_256_Free(&sha);
+
+ return ret;
+}
+#endif /* WOLFSSL_NOSHA3_256 */
+
+#ifndef WOLFSSL_NOSHA3_384
+static int sha3_384_test(void)
+{
+ wc_Sha3 sha;
+ byte hash[WC_SHA3_384_DIGEST_SIZE];
+#ifndef NO_INTM_HASH_TEST
+ byte hashcopy[WC_SHA3_384_DIGEST_SIZE];
+#endif
+
+ testVector a, b, c;
+ testVector test_sha[3];
+ int ret;
+ int times = sizeof(test_sha) / sizeof(struct testVector), i;
+
+ a.input = "";
+ a.output = "\x0c\x63\xa7\x5b\x84\x5e\x4f\x7d\x01\x10\x7d\x85\x2e\x4c\x24"
+ "\x85\xc5\x1a\x50\xaa\xaa\x94\xfc\x61\x99\x5e\x71\xbb\xee\x98"
+ "\x3a\x2a\xc3\x71\x38\x31\x26\x4a\xdb\x47\xfb\x6b\xd1\xe0\x58"
+ "\xd5\xf0\x04";
+ a.inLen = XSTRLEN(a.input);
+ a.outLen = WC_SHA3_384_DIGEST_SIZE;
+
+#if defined(WOLFSSL_AFALG_XILINX_SHA3) || defined(WOLFSSL_XILINX_CRYPT)
+ /* NIST test vector with a length that is a multiple of 4 */
+ b.input = "\x7d\x80\xb1\x60\xc4\xb5\x36\xa3\xbe\xb7\x99\x80\x59\x93\x44"
+ "\x04\x7c\x5f\x82\xa1\xdf\xc3\xee\xd4";
+ b.output = "\x04\x1c\xc5\x86\x1b\xa3\x34\x56\x3c\x61\xd4\xef\x97\x10\xd4"
+ "\x89\x6c\x31\x1c\x92\xed\xbe\x0d\x7c\xd5\x3e\x80\x3b\xf2\xf4"
+ "\xeb\x60\x57\x23\x55\x70\x77\x0c\xe8\x7c\x55\x20\xd7\xec\x14"
+ "\x19\x87\x22";
+ b.inLen = XSTRLEN(b.input);
+ b.outLen = WC_SHA3_384_DIGEST_SIZE;
+#else
+ b.input = "abc";
+ b.output = "\xec\x01\x49\x82\x88\x51\x6f\xc9\x26\x45\x9f\x58\xe2\xc6\xad"
+ "\x8d\xf9\xb4\x73\xcb\x0f\xc0\x8c\x25\x96\xda\x7c\xf0\xe4\x9b"
+ "\xe4\xb2\x98\xd8\x8c\xea\x92\x7a\xc7\xf5\x39\xf1\xed\xf2\x28"
+ "\x37\x6d\x25";
+ b.inLen = XSTRLEN(b.input);
+ b.outLen = WC_SHA3_384_DIGEST_SIZE;
+#endif
+ c.input = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq";
+ c.output = "\x99\x1c\x66\x57\x55\xeb\x3a\x4b\x6b\xbd\xfb\x75\xc7\x8a\x49"
+ "\x2e\x8c\x56\xa2\x2c\x5c\x4d\x7e\x42\x9b\xfd\xbc\x32\xb9\xd4"
+ "\xad\x5a\xa0\x4a\x1f\x07\x6e\x62\xfe\xa1\x9e\xef\x51\xac\xd0"
+ "\x65\x7c\x22";
+ c.inLen = XSTRLEN(c.input);
+ c.outLen = WC_SHA3_384_DIGEST_SIZE;
+
+#ifdef WOLFSSL_XILINX_CRYPT
+ test_sha[0] = b; /* hardware acc. can not handle "" string */
+#else
+ test_sha[0] = a;
+#endif
+ test_sha[1] = b;
+ test_sha[2] = c;
+
+ ret = wc_InitSha3_384(&sha, HEAP_HINT, devId);
+ if (ret != 0)
+ return -2800;
+
+ for (i = 0; i < times; ++i) {
+ ret = wc_Sha3_384_Update(&sha, (byte*)test_sha[i].input,
+ (word32)test_sha[i].inLen);
+ if (ret != 0)
+ ERROR_OUT(-2801 - i, exit);
+ #ifndef NO_INTM_HASH_TEST
+ ret = wc_Sha3_384_GetHash(&sha, hashcopy);
+ if (ret != 0)
+ ERROR_OUT(-2802 - i, exit);
+ #endif
+ ret = wc_Sha3_384_Final(&sha, hash);
+ if (ret != 0)
+ ERROR_OUT(-2803 - i, exit);
+
+ if (XMEMCMP(hash, test_sha[i].output, WC_SHA3_384_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2804 - i, exit);
+ #ifndef NO_INTM_HASH_TEST
+ if (XMEMCMP(hash, hashcopy, WC_SHA3_384_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2805 - i, exit);
+ #endif
+ }
+
+ /* BEGIN LARGE HASH TEST */ {
+ byte large_input[1024];
+ const char* large_digest =
+ "\x30\x44\xec\x17\xef\x47\x9f\x55\x36\x11\xd6\x3f\x8a\x31\x5a\x71"
+ "\x8a\x71\xa7\x1d\x8e\x84\xe8\x6c\x24\x02\x2f\x7a\x08\x4e\xea\xd7"
+ "\x42\x36\x5d\xa8\xc2\xb7\x42\xad\xec\x19\xfb\xca\xc6\x64\xb3\xa4";
+
+ for (i = 0; i < (int)sizeof(large_input); i++) {
+ large_input[i] = (byte)(i & 0xFF);
+ }
+ times = 100;
+ for (i = 0; i < times; ++i) {
+ ret = wc_Sha3_384_Update(&sha, (byte*)large_input,
+ (word32)sizeof(large_input));
+ if (ret != 0)
+ ERROR_OUT(-2806, exit);
+ }
+ ret = wc_Sha3_384_Final(&sha, hash);
+ if (ret != 0)
+ ERROR_OUT(-2807, exit);
+ if (XMEMCMP(hash, large_digest, WC_SHA3_384_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2808, exit);
+ } /* END LARGE HASH TEST */
+
+exit:
+ wc_Sha3_384_Free(&sha);
+
+ return ret;
+}
+#endif /* WOLFSSL_NOSHA3_384 */
+
+#ifndef WOLFSSL_NOSHA3_512
+static int sha3_512_test(void)
+{
+ wc_Sha3 sha;
+ byte hash[WC_SHA3_512_DIGEST_SIZE];
+ byte hashcopy[WC_SHA3_512_DIGEST_SIZE];
+
+ testVector a, b, c;
+ testVector test_sha[3];
+ int ret;
+ int times = sizeof(test_sha) / sizeof(struct testVector), i;
+
+ a.input = "";
+ a.output = "\xa6\x9f\x73\xcc\xa2\x3a\x9a\xc5\xc8\xb5\x67\xdc\x18\x5a\x75"
+ "\x6e\x97\xc9\x82\x16\x4f\xe2\x58\x59\xe0\xd1\xdc\xc1\x47\x5c"
+ "\x80\xa6\x15\xb2\x12\x3a\xf1\xf5\xf9\x4c\x11\xe3\xe9\x40\x2c"
+ "\x3a\xc5\x58\xf5\x00\x19\x9d\x95\xb6\xd3\xe3\x01\x75\x85\x86"
+ "\x28\x1d\xcd\x26";
+ a.inLen = XSTRLEN(a.input);
+ a.outLen = WC_SHA3_512_DIGEST_SIZE;
+
+ b.input = "abc";
+ b.output = "\xb7\x51\x85\x0b\x1a\x57\x16\x8a\x56\x93\xcd\x92\x4b\x6b\x09"
+ "\x6e\x08\xf6\x21\x82\x74\x44\xf7\x0d\x88\x4f\x5d\x02\x40\xd2"
+ "\x71\x2e\x10\xe1\x16\xe9\x19\x2a\xf3\xc9\x1a\x7e\xc5\x76\x47"
+ "\xe3\x93\x40\x57\x34\x0b\x4c\xf4\x08\xd5\xa5\x65\x92\xf8\x27"
+ "\x4e\xec\x53\xf0";
+ b.inLen = XSTRLEN(b.input);
+ b.outLen = WC_SHA3_512_DIGEST_SIZE;
+
+ c.input = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq";
+ c.output = "\x04\xa3\x71\xe8\x4e\xcf\xb5\xb8\xb7\x7c\xb4\x86\x10\xfc\xa8"
+ "\x18\x2d\xd4\x57\xce\x6f\x32\x6a\x0f\xd3\xd7\xec\x2f\x1e\x91"
+ "\x63\x6d\xee\x69\x1f\xbe\x0c\x98\x53\x02\xba\x1b\x0d\x8d\xc7"
+ "\x8c\x08\x63\x46\xb5\x33\xb4\x9c\x03\x0d\x99\xa2\x7d\xaf\x11"
+ "\x39\xd6\xe7\x5e";
+ c.inLen = XSTRLEN(c.input);
+ c.outLen = WC_SHA3_512_DIGEST_SIZE;
+
+ test_sha[0] = a;
+ test_sha[1] = b;
+ test_sha[2] = c;
+
+ ret = wc_InitSha3_512(&sha, HEAP_HINT, devId);
+ if (ret != 0)
+ return -2900;
+
+ for (i = 0; i < times; ++i) {
+ ret = wc_Sha3_512_Update(&sha, (byte*)test_sha[i].input,
+ (word32)test_sha[i].inLen);
+ if (ret != 0)
+ ERROR_OUT(-2901 - i, exit);
+ ret = wc_Sha3_512_GetHash(&sha, hashcopy);
+ if (ret != 0)
+ ERROR_OUT(-2902 - i, exit);
+ ret = wc_Sha3_512_Final(&sha, hash);
+ if (ret != 0)
+ ERROR_OUT(-2903 - i, exit);
+
+ if (XMEMCMP(hash, test_sha[i].output, WC_SHA3_512_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2904 - i, exit);
+ if (XMEMCMP(hash, hashcopy, WC_SHA3_512_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2905 - i, exit);
+ }
+
+ /* BEGIN LARGE HASH TEST */ {
+ byte large_input[1024];
+ const char* large_digest =
+ "\x9c\x13\x26\xb6\x26\xb2\x94\x31\xbc\xf4\x34\xe9\x6f\xf2\xd6\x29"
+ "\x9a\xd0\x9b\x32\x63\x2f\x18\xa7\x5f\x23\xc9\x60\xc2\x32\x0c\xbc"
+ "\x57\x77\x33\xf1\x83\x81\x8a\xd3\x15\x7c\x93\xdc\x80\x9f\xed\x61"
+ "\x41\xa7\x5b\xfd\x32\x0e\x38\x15\xb0\x46\x3b\x7a\x4f\xfd\x44\x88";
+
+ for (i = 0; i < (int)sizeof(large_input); i++) {
+ large_input[i] = (byte)(i & 0xFF);
+ }
+ times = 100;
+ for (i = 0; i < times; ++i) {
+ ret = wc_Sha3_512_Update(&sha, (byte*)large_input,
+ (word32)sizeof(large_input));
+ if (ret != 0)
+ ERROR_OUT(-2906, exit);
+ }
+ ret = wc_Sha3_512_Final(&sha, hash);
+ if (ret != 0)
+ ERROR_OUT(-2907, exit);
+ if (XMEMCMP(hash, large_digest, WC_SHA3_512_DIGEST_SIZE) != 0)
+ ERROR_OUT(-2908, exit);
+ } /* END LARGE HASH TEST */
+
+exit:
+ wc_Sha3_512_Free(&sha);
+
+ return ret;
+}
+#endif /* WOLFSSL_NOSHA3_512 */
+
+int sha3_test(void)
+{
+ int ret;
+
+ (void)ret;
+
+#ifndef WOLFSSL_NOSHA3_224
+ if ((ret = sha3_224_test()) != 0)
+ return ret;
+#endif
+#ifndef WOLFSSL_NOSHA3_256
+ if ((ret = sha3_256_test()) != 0)
+ return ret;
+#endif
+#ifndef WOLFSSL_NOSHA3_384
+ if ((ret = sha3_384_test()) != 0)
+ return ret;
+#endif
+#ifndef WOLFSSL_NOSHA3_512
+ if ((ret = sha3_512_test()) != 0)
+ return ret;
+#endif
+
+ return 0;
+}
+#endif /* WOLFSSL_SHA3 */
+
+#ifdef WOLFSSL_SHAKE256
+int shake256_test(void)
+{
+#ifndef WOLFSSL_NO_SHAKE256
+ wc_Shake sha;
+ byte hash[114];
+
+ testVector a, b, c;
+ testVector test_sha[3];
+ int ret = 0;
+ int times = sizeof(test_sha) / sizeof(struct testVector), i;
+
+ byte large_input[1024];
+ const char* large_digest =
+ "\x90\x32\x4a\xcc\xd1\xdf\xb8\x0b\x79\x1f\xb8\xc8\x5b\x54\xc8\xe7"
+ "\x45\xf5\x60\x6b\x38\x26\xb2\x0a\xee\x38\x01\xf3\xd9\xfa\x96\x9f"
+ "\x6a\xd7\x15\xdf\xb6\xc2\xf4\x20\x33\x44\x55\xe8\x2a\x09\x2b\x68"
+ "\x2e\x18\x65\x5e\x65\x93\x28\xbc\xb1\x9e\xe2\xb1\x92\xea\x98\xac"
+ "\x21\xef\x4c\xe1\xb4\xb7\xbe\x81\x5c\x1d\xd3\xb7\x17\xe5\xbb\xc5"
+ "\x8c\x68\xb7\xfb\xac\x55\x8a\x9b\x4d\x91\xe4\x9f\x72\xbb\x6e\x38"
+ "\xaf\x21\x7d\x21\xaa\x98\x4e\x75\xc4\xb4\x1c\x7c\x50\x45\x54\xf9"
+ "\xea\x26";
+
+ a.input = "";
+ a.output = "\x46\xb9\xdd\x2b\x0b\xa8\x8d\x13\x23\x3b\x3f\xeb\x74\x3e\xeb"
+ "\x24\x3f\xcd\x52\xea\x62\xb8\x1b\x82\xb5\x0c\x27\x64\x6e\xd5"
+ "\x76\x2f\xd7\x5d\xc4\xdd\xd8\xc0\xf2\x00\xcb\x05\x01\x9d\x67"
+ "\xb5\x92\xf6\xfc\x82\x1c\x49\x47\x9a\xb4\x86\x40\x29\x2e\xac"
+ "\xb3\xb7\xc4\xbe\x14\x1e\x96\x61\x6f\xb1\x39\x57\x69\x2c\xc7"
+ "\xed\xd0\xb4\x5a\xe3\xdc\x07\x22\x3c\x8e\x92\x93\x7b\xef\x84"
+ "\xbc\x0e\xab\x86\x28\x53\x34\x9e\xc7\x55\x46\xf5\x8f\xb7\xc2"
+ "\x77\x5c\x38\x46\x2c\x50\x10\xd8\x46";
+ a.inLen = XSTRLEN(a.input);
+ a.outLen = sizeof(hash);
+
+ b.input = "abc";
+ b.output = "\x48\x33\x66\x60\x13\x60\xa8\x77\x1c\x68\x63\x08\x0c\xc4\x11"
+ "\x4d\x8d\xb4\x45\x30\xf8\xf1\xe1\xee\x4f\x94\xea\x37\xe7\x8b"
+ "\x57\x39\xd5\xa1\x5b\xef\x18\x6a\x53\x86\xc7\x57\x44\xc0\x52"
+ "\x7e\x1f\xaa\x9f\x87\x26\xe4\x62\xa1\x2a\x4f\xeb\x06\xbd\x88"
+ "\x01\xe7\x51\xe4\x13\x85\x14\x12\x04\xf3\x29\x97\x9f\xd3\x04"
+ "\x7a\x13\xc5\x65\x77\x24\xad\xa6\x4d\x24\x70\x15\x7b\x3c\xdc"
+ "\x28\x86\x20\x94\x4d\x78\xdb\xcd\xdb\xd9\x12\x99\x3f\x09\x13"
+ "\xf1\x64\xfb\x2c\xe9\x51\x31\xa2\xd0";
+ b.inLen = XSTRLEN(b.input);
+ b.outLen = sizeof(hash);
+
+ c.input = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq";
+ c.output = "\x4d\x8c\x2d\xd2\x43\x5a\x01\x28\xee\xfb\xb8\xc3\x6f\x6f\x87"
+ "\x13\x3a\x79\x11\xe1\x8d\x97\x9e\xe1\xae\x6b\xe5\xd4\xfd\x2e"
+ "\x33\x29\x40\xd8\x68\x8a\x4e\x6a\x59\xaa\x80\x60\xf1\xf9\xbc"
+ "\x99\x6c\x05\xac\xa3\xc6\x96\xa8\xb6\x62\x79\xdc\x67\x2c\x74"
+ "\x0b\xb2\x24\xec\x37\xa9\x2b\x65\xdb\x05\x39\xc0\x20\x34\x55"
+ "\xf5\x1d\x97\xcc\xe4\xcf\xc4\x91\x27\xd7\x26\x0a\xfc\x67\x3a"
+ "\xf2\x08\xba\xf1\x9b\xe2\x12\x33\xf3\xde\xbe\x78\xd0\x67\x60"
+ "\xcf\xa5\x51\xee\x1e\x07\x91\x41\xd4";
+ c.inLen = XSTRLEN(c.input);
+ c.outLen = sizeof(hash);
+
+ test_sha[0] = a;
+ test_sha[1] = b;
+ test_sha[2] = c;
+
+ ret = wc_InitShake256(&sha, HEAP_HINT, devId);
+ if (ret != 0)
+ return -3100;
+
+ for (i = 0; i < times; ++i) {
+ ret = wc_Shake256_Update(&sha, (byte*)test_sha[i].input,
+ (word32)test_sha[i].inLen);
+ if (ret != 0)
+ ERROR_OUT(-3101 - i, exit);
+ ret = wc_Shake256_Final(&sha, hash, (word32)test_sha[i].outLen);
+ if (ret != 0)
+ ERROR_OUT(-3102 - i, exit);
+
+ if (XMEMCMP(hash, test_sha[i].output, test_sha[i].outLen) != 0)
+ ERROR_OUT(-3103 - i, exit);
+ }
+
+ /* BEGIN LARGE HASH TEST */ {
+ for (i = 0; i < (int)sizeof(large_input); i++) {
+ large_input[i] = (byte)(i & 0xFF);
+ }
+ times = 100;
+ for (i = 0; i < times; ++i) {
+ ret = wc_Shake256_Update(&sha, (byte*)large_input,
+ (word32)sizeof(large_input));
+ if (ret != 0)
+ ERROR_OUT(-3104, exit);
+ }
+ ret = wc_Shake256_Final(&sha, hash, (word32)sizeof(hash));
+ if (ret != 0)
+ ERROR_OUT(-3105, exit);
+ if (XMEMCMP(hash, large_digest, sizeof(hash)) != 0)
+ ERROR_OUT(-3106, exit);
+ } /* END LARGE HASH TEST */
+
+exit:
+ wc_Shake256_Free(&sha);
+
+ return ret;
+#else
+ return 0;
+#endif
+}
+#endif
+
+
+int hash_test(void)
+{
+ wc_HashAlg hash;
+ int ret, exp_ret;
+ int i, j;
+ int digestSz;
+ byte data[] = "0123456789abcdef0123456789abcdef0123456";
+ byte out[WC_MAX_DIGEST_SIZE];
+ byte hashOut[WC_MAX_DIGEST_SIZE];
+#if !defined(NO_ASN) || !defined(NO_DH) || defined(HAVE_ECC)
+ enum wc_HashType hashType;
+#endif
+ enum wc_HashType typesGood[] = { WC_HASH_TYPE_MD5, WC_HASH_TYPE_SHA,
+ WC_HASH_TYPE_SHA224, WC_HASH_TYPE_SHA256,
+ WC_HASH_TYPE_SHA384, WC_HASH_TYPE_SHA512,
+ WC_HASH_TYPE_SHA3_224,
+ WC_HASH_TYPE_SHA3_256,
+ WC_HASH_TYPE_SHA3_384,
+ WC_HASH_TYPE_SHA3_512 };
+ enum wc_HashType typesNoImpl[] = {
+#ifdef NO_MD5
+ WC_HASH_TYPE_MD5,
+#endif
+#ifdef NO_SHA
+ WC_HASH_TYPE_SHA,
+#endif
+#ifndef WOLFSSL_SHA224
+ WC_HASH_TYPE_SHA224,
+#endif
+#ifdef NO_SHA256
+ WC_HASH_TYPE_SHA256,
+#endif
+#ifndef WOLFSSL_SHA384
+ WC_HASH_TYPE_SHA384,
+#endif
+#ifndef WOLFSSL_SHA512
+ WC_HASH_TYPE_SHA512,
+#endif
+#if !defined(WOLFSSL_SHA3) || defined(WOLFSSL_NOSHA3_224)
+ WC_HASH_TYPE_SHA3_224,
+#endif
+#if !defined(WOLFSSL_SHA3) || defined(WOLFSSL_NOSHA3_256)
+ WC_HASH_TYPE_SHA3_256,
+#endif
+#if !defined(WOLFSSL_SHA3) || defined(WOLFSSL_NOSHA3_384)
+ WC_HASH_TYPE_SHA3_384,
+#endif
+#if !defined(WOLFSSL_SHA3) || defined(WOLFSSL_NOSHA3_512)
+ WC_HASH_TYPE_SHA3_512,
+#endif
+ WC_HASH_TYPE_NONE
+ };
+ enum wc_HashType typesBad[] = { WC_HASH_TYPE_NONE, WC_HASH_TYPE_MD5_SHA,
+ WC_HASH_TYPE_MD2, WC_HASH_TYPE_MD4 };
+ enum wc_HashType typesHashBad[] = { WC_HASH_TYPE_MD2, WC_HASH_TYPE_MD4,
+ WC_HASH_TYPE_BLAKE2B,
+ WC_HASH_TYPE_NONE };
+
+ /* Parameter Validation testing. */
+ ret = wc_HashInit(NULL, WC_HASH_TYPE_SHA256);
+ if (ret != BAD_FUNC_ARG)
+ return -3200;
+ ret = wc_HashUpdate(NULL, WC_HASH_TYPE_SHA256, NULL, sizeof(data));
+ if (ret != BAD_FUNC_ARG)
+ return -3201;
+ ret = wc_HashUpdate(&hash, WC_HASH_TYPE_SHA256, NULL, sizeof(data));
+ if (ret != BAD_FUNC_ARG)
+ return -3202;
+ ret = wc_HashUpdate(NULL, WC_HASH_TYPE_SHA256, data, sizeof(data));
+ if (ret != BAD_FUNC_ARG)
+ return -3203;
+ ret = wc_HashFinal(NULL, WC_HASH_TYPE_SHA256, NULL);
+ if (ret != BAD_FUNC_ARG)
+ return -3204;
+ ret = wc_HashFinal(&hash, WC_HASH_TYPE_SHA256, NULL);
+ if (ret != BAD_FUNC_ARG)
+ return -3205;
+ ret = wc_HashFinal(NULL, WC_HASH_TYPE_SHA256, out);
+ if (ret != BAD_FUNC_ARG)
+ return -3206;
+
+ /* Try invalid hash algorithms. */
+ for (i = 0; i < (int)(sizeof(typesBad)/sizeof(*typesBad)); i++) {
+ ret = wc_HashInit(&hash, typesBad[i]);
+ if (ret != BAD_FUNC_ARG)
+ return -3207 - i;
+ ret = wc_HashUpdate(&hash, typesBad[i], data, sizeof(data));
+ if (ret != BAD_FUNC_ARG)
+ return -3217 - i;
+ ret = wc_HashFinal(&hash, typesBad[i], out);
+ if (ret != BAD_FUNC_ARG)
+ return -3227 - i;
+ wc_HashFree(&hash, typesBad[i]);
+ }
+
+ /* Try valid hash algorithms. */
+ for (i = 0, j = 0; i < (int)(sizeof(typesGood)/sizeof(*typesGood)); i++) {
+ exp_ret = 0;
+ if (typesGood[i] == typesNoImpl[j]) {
+ /* Recognized but no implementation compiled in. */
+ exp_ret = HASH_TYPE_E;
+ j++;
+ }
+ ret = wc_HashInit(&hash, typesGood[i]);
+ if (ret != exp_ret)
+ return -3237 - i;
+ ret = wc_HashUpdate(&hash, typesGood[i], data, sizeof(data));
+ if (ret != exp_ret)
+ return -3247 - i;
+ ret = wc_HashFinal(&hash, typesGood[i], out);
+ if (ret != exp_ret)
+ return -3257 - i;
+ wc_HashFree(&hash, typesGood[i]);
+
+ digestSz = wc_HashGetDigestSize(typesGood[i]);
+ if (exp_ret < 0 && digestSz != exp_ret)
+ return -3267 - i;
+ if (exp_ret == 0 && digestSz < 0)
+ return -3277 - i;
+ if (exp_ret == 0) {
+ ret = wc_Hash(typesGood[i], data, sizeof(data), hashOut,
+ digestSz - 1);
+ if (ret != BUFFER_E)
+ return -3287 - i;
+ }
+ ret = wc_Hash(typesGood[i], data, sizeof(data), hashOut, digestSz);
+ if (ret != exp_ret)
+ return -3297 - i;
+ if (exp_ret == 0 && XMEMCMP(out, hashOut, digestSz) != 0)
+ return -3307 -i;
+
+ ret = wc_HashGetBlockSize(typesGood[i]);
+ if (exp_ret < 0 && ret != exp_ret)
+ return -3308 - i;
+ if (exp_ret == 0 && ret < 0)
+ return -3318 - i;
+
+#if !defined(NO_ASN) || !defined(NO_DH) || defined(HAVE_ECC)
+ ret = wc_HashGetOID(typesGood[i]);
+ if (ret == BAD_FUNC_ARG ||
+ (exp_ret == 0 && ret == HASH_TYPE_E) ||
+ (exp_ret != 0 && ret != HASH_TYPE_E)) {
+ return -3328 - i;
+ }
+
+ hashType = wc_OidGetHash(ret);
+ if (exp_ret < 0 && ret != exp_ret)
+ return -3338 - i;
+ if (exp_ret == 0 && hashType != typesGood[i])
+ return -3348 - i;
+#endif /* !defined(NO_ASN) || !defined(NO_DH) || defined(HAVE_ECC) */
+ }
+
+ for (i = 0; i < (int)(sizeof(typesHashBad)/sizeof(*typesHashBad)); i++) {
+ ret = wc_Hash(typesHashBad[i], data, sizeof(data), out, sizeof(out));
+ if (ret != BAD_FUNC_ARG && ret != BUFFER_E)
+ return -3358 - i;
+ }
+
+#if !defined(NO_ASN) || !defined(NO_DH) || defined(HAVE_ECC)
+ ret = wc_HashGetOID(WC_HASH_TYPE_MD2);
+#ifdef WOLFSSL_MD2
+ if (ret == HASH_TYPE_E || ret == BAD_FUNC_ARG)
+ return -3368;
+#else
+ if (ret != HASH_TYPE_E)
+ return -3369;
+#endif
+ hashType = wc_OidGetHash(646); /* Md2h */
+#ifdef WOLFSSL_MD2
+ if (hashType != WC_HASH_TYPE_MD2)
+ return -3370;
+#else
+ if (hashType != WC_HASH_TYPE_NONE)
+ return -3371;
+#endif
+
+ ret = wc_HashGetOID(WC_HASH_TYPE_MD5_SHA);
+#ifndef NO_MD5
+ if (ret == HASH_TYPE_E || ret == BAD_FUNC_ARG)
+ return -3372;
+#else
+ if (ret != HASH_TYPE_E)
+ return -3373;
+#endif
+ ret = wc_HashGetOID(WC_HASH_TYPE_MD4);
+ if (ret != BAD_FUNC_ARG)
+ return -3374;
+ ret = wc_HashGetOID(WC_HASH_TYPE_NONE);
+ if (ret != BAD_FUNC_ARG)
+ return -3375;
+
+ hashType = wc_OidGetHash(0);
+ if (hashType != WC_HASH_TYPE_NONE)
+ return -3376;
+#endif /* !defined(NO_ASN) || !defined(NO_DH) || defined(HAVE_ECC) */
+
+ ret = wc_HashGetBlockSize(WC_HASH_TYPE_MD2);
+#ifdef WOLFSSL_MD2
+ if (ret == HASH_TYPE_E || ret == BAD_FUNC_ARG)
+ return -3377;
+#else
+ if (ret != HASH_TYPE_E)
+ return -3378;
+#endif
+ ret = wc_HashGetDigestSize(WC_HASH_TYPE_MD2);
+#ifdef WOLFSSL_MD2
+ if (ret == HASH_TYPE_E || ret == BAD_FUNC_ARG)
+ return -3379;
+#else
+ if (ret != HASH_TYPE_E)
+ return -3380;
+#endif
+
+ ret = wc_HashGetBlockSize(WC_HASH_TYPE_MD4);
+#ifndef NO_MD4
+ if (ret == HASH_TYPE_E || ret == BAD_FUNC_ARG)
+ return -3381;
+#else
+ if (ret != HASH_TYPE_E)
+ return -3382;
+#endif
+ ret = wc_HashGetDigestSize(WC_HASH_TYPE_MD4);
+#ifndef NO_MD4
+ if (ret == HASH_TYPE_E || ret == BAD_FUNC_ARG)
+ return -3383;
+#else
+ if (ret != HASH_TYPE_E)
+ return -3384;
+#endif
+ ret = wc_HashGetBlockSize(WC_HASH_TYPE_MD5_SHA);
+#if !defined(NO_MD5) && !defined(NO_SHA)
+ if (ret == HASH_TYPE_E || ret == BAD_FUNC_ARG)
+ return -3385;
+#else
+ if (ret != HASH_TYPE_E)
+ return -3386;
+#endif
+
+ ret = wc_HashGetBlockSize(WC_HASH_TYPE_BLAKE2B);
+#if defined(HAVE_BLAKE2) || defined(HAVE_BLAKE2S)
+ if (ret == HASH_TYPE_E || ret == BAD_FUNC_ARG)
+ return -3387;
+#else
+ if (ret != HASH_TYPE_E)
+ return -3388;
+#endif
+ ret = wc_HashGetDigestSize(WC_HASH_TYPE_BLAKE2B);
+#if defined(HAVE_BLAKE2) || defined(HAVE_BLAKE2S)
+ if (ret == HASH_TYPE_E || ret == BAD_FUNC_ARG)
+ return -3389;
+#else
+ if (ret != HASH_TYPE_E)
+ return -3390;
+#endif
+
+ ret = wc_HashGetBlockSize(WC_HASH_TYPE_NONE);
+ if (ret != BAD_FUNC_ARG)
+ return -3391;
+ ret = wc_HashGetDigestSize(WC_HASH_TYPE_NONE);
+ if (ret != BAD_FUNC_ARG)
+ return -3392;
+
+#ifndef NO_CERTS
+#if defined(WOLFSSL_MD2) && !defined(HAVE_SELFTEST)
+ ret = wc_GetCTC_HashOID(MD2);
+ if (ret == 0)
+ return -3393;
+#endif
+#ifndef NO_MD5
+ ret = wc_GetCTC_HashOID(WC_MD5);
+ if (ret == 0)
+ return -3394;
+#endif
+#ifndef NO_SHA
+ ret = wc_GetCTC_HashOID(WC_SHA);
+ if (ret == 0)
+ return -3395;
+#endif
+#ifdef WOLFSSL_SHA224
+ ret = wc_GetCTC_HashOID(WC_SHA224);
+ if (ret == 0)
+ return -3396;
+#endif
+#ifndef NO_SHA256
+ ret = wc_GetCTC_HashOID(WC_SHA256);
+ if (ret == 0)
+ return -3397;
+#endif
+#ifdef WOLFSSL_SHA384
+ ret = wc_GetCTC_HashOID(WC_SHA384);
+ if (ret == 0)
+ return -3398;
+#endif
+#ifdef WOLFSSL_SHA512
+ ret = wc_GetCTC_HashOID(WC_SHA512);
+ if (ret == 0)
+ return -3399;
+#endif
+ ret = wc_GetCTC_HashOID(-1);
+ if (ret != 0)
+ return -3400;
+#endif
+
+ return 0;
+}
#if !defined(NO_HMAC) && !defined(NO_MD5)
int hmac_md5_test(void)
{
Hmac hmac;
- byte hash[MD5_DIGEST_SIZE];
+ byte hash[WC_MD5_DIGEST_SIZE];
const char* keys[]=
{
@@ -1209,14 +3486,14 @@ int hmac_md5_test(void)
a.input = "Hi There";
a.output = "\x92\x94\x72\x7a\x36\x38\xbb\x1c\x13\xf4\x8e\xf8\x15\x8b\xfc"
"\x9d";
- a.inLen = strlen(a.input);
- a.outLen = MD5_DIGEST_SIZE;
+ a.inLen = XSTRLEN(a.input);
+ a.outLen = WC_MD5_DIGEST_SIZE;
b.input = "what do ya want for nothing?";
b.output = "\x75\x0c\x78\x3e\x6a\xb0\xb5\x03\xea\xa8\x6e\x31\x0a\x5d\xb7"
"\x38";
- b.inLen = strlen(b.input);
- b.outLen = MD5_DIGEST_SIZE;
+ b.inLen = XSTRLEN(b.input);
+ b.outLen = WC_MD5_DIGEST_SIZE;
c.input = "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
"\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
@@ -1224,40 +3501,47 @@ int hmac_md5_test(void)
"\xDD\xDD\xDD\xDD\xDD\xDD";
c.output = "\x56\xbe\x34\x52\x1d\x14\x4c\x88\xdb\xb8\xc7\x33\xf0\xe8\xb3"
"\xf6";
- c.inLen = strlen(c.input);
- c.outLen = MD5_DIGEST_SIZE;
+ c.inLen = XSTRLEN(c.input);
+ c.outLen = WC_MD5_DIGEST_SIZE;
test_hmac[0] = a;
test_hmac[1] = b;
test_hmac[2] = c;
for (i = 0; i < times; ++i) {
-#if defined(HAVE_FIPS) || defined(HAVE_CAVIUM)
- if (i == 1)
+ #if defined(HAVE_FIPS) || defined(HAVE_CAVIUM)
+ if (i == 1) {
continue; /* cavium can't handle short keys, fips not allowed */
-#endif
-#ifdef HAVE_CAVIUM
- if (wc_HmacInitCavium(&hmac, CAVIUM_DEV_ID) != 0)
- return -20009;
-#endif
- ret = wc_HmacSetKey(&hmac, MD5, (byte*)keys[i], (word32)strlen(keys[i]));
+ }
+ #endif
+
+ if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0) {
+ return -3500;
+ }
+
+ ret = wc_HmacSetKey(&hmac, WC_MD5, (byte*)keys[i],
+ (word32)XSTRLEN(keys[i]));
if (ret != 0)
- return -4015;
+ return -3501;
ret = wc_HmacUpdate(&hmac, (byte*)test_hmac[i].input,
(word32)test_hmac[i].inLen);
if (ret != 0)
- return -4016;
+ return -3502;
ret = wc_HmacFinal(&hmac, hash);
if (ret != 0)
- return -4017;
+ return -3503;
- if (memcmp(hash, test_hmac[i].output, MD5_DIGEST_SIZE) != 0)
- return -20 - i;
-#ifdef HAVE_CAVIUM
- wc_HmacFreeCavium(&hmac);
-#endif
+ if (XMEMCMP(hash, test_hmac[i].output, WC_MD5_DIGEST_SIZE) != 0)
+ return -3504 - i;
+
+ wc_HmacFree(&hmac);
}
+#ifndef HAVE_FIPS
+ if (wc_HmacSizeByType(WC_MD5) != WC_MD5_DIGEST_SIZE)
+ return -3514;
+#endif
+
return 0;
}
#endif /* NO_HMAC && NO_MD5 */
@@ -1266,7 +3550,7 @@ int hmac_md5_test(void)
int hmac_sha_test(void)
{
Hmac hmac;
- byte hash[SHA_DIGEST_SIZE];
+ byte hash[WC_SHA_DIGEST_SIZE];
const char* keys[]=
{
@@ -1286,14 +3570,14 @@ int hmac_sha_test(void)
a.input = "Hi There";
a.output = "\xb6\x17\x31\x86\x55\x05\x72\x64\xe2\x8b\xc0\xb6\xfb\x37\x8c"
"\x8e\xf1\x46\xbe\x00";
- a.inLen = strlen(a.input);
- a.outLen = SHA_DIGEST_SIZE;
+ a.inLen = XSTRLEN(a.input);
+ a.outLen = WC_SHA_DIGEST_SIZE;
b.input = "what do ya want for nothing?";
b.output = "\xef\xfc\xdf\x6a\xe5\xeb\x2f\xa2\xd2\x74\x16\xd5\xf1\x84\xdf"
"\x9c\x25\x9a\x7c\x79";
- b.inLen = strlen(b.input);
- b.outLen = SHA_DIGEST_SIZE;
+ b.inLen = XSTRLEN(b.input);
+ b.outLen = WC_SHA_DIGEST_SIZE;
c.input = "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
"\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
@@ -1301,8 +3585,8 @@ int hmac_sha_test(void)
"\xDD\xDD\xDD\xDD\xDD\xDD";
c.output = "\x12\x5d\x73\x42\xb9\xac\x11\xcd\x91\xa3\x9a\xf4\x8a\xa1\x7b"
"\x4f\x63\xf1\x75\xd3";
- c.inLen = strlen(c.input);
- c.outLen = SHA_DIGEST_SIZE;
+ c.inLen = XSTRLEN(c.input);
+ c.outLen = WC_SHA_DIGEST_SIZE;
test_hmac[0] = a;
test_hmac[1] = b;
@@ -1313,38 +3597,43 @@ int hmac_sha_test(void)
if (i == 1)
continue; /* cavium can't handle short keys, fips not allowed */
#endif
-#ifdef HAVE_CAVIUM
- if (wc_HmacInitCavium(&hmac, CAVIUM_DEV_ID) != 0)
- return -20010;
-#endif
- ret = wc_HmacSetKey(&hmac, SHA, (byte*)keys[i], (word32)strlen(keys[i]));
+
+ if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0)
+ return -3600;
+
+ ret = wc_HmacSetKey(&hmac, WC_SHA, (byte*)keys[i],
+ (word32)XSTRLEN(keys[i]));
if (ret != 0)
- return -4018;
+ return -3601;
ret = wc_HmacUpdate(&hmac, (byte*)test_hmac[i].input,
(word32)test_hmac[i].inLen);
if (ret != 0)
- return -4019;
+ return -3602;
ret = wc_HmacFinal(&hmac, hash);
if (ret != 0)
- return -4020;
+ return -3603;
- if (memcmp(hash, test_hmac[i].output, SHA_DIGEST_SIZE) != 0)
- return -20 - i;
-#ifdef HAVE_CAVIUM
- wc_HmacFreeCavium(&hmac);
-#endif
+ if (XMEMCMP(hash, test_hmac[i].output, WC_SHA_DIGEST_SIZE) != 0)
+ return -3604 - i;
+
+ wc_HmacFree(&hmac);
}
+#ifndef HAVE_FIPS
+ if (wc_HmacSizeByType(WC_SHA) != WC_SHA_DIGEST_SIZE)
+ return -3614;
+#endif
+
return 0;
}
#endif
-#if !defined(NO_HMAC) && !defined(NO_SHA256)
-int hmac_sha256_test(void)
+#if !defined(NO_HMAC) && defined(WOLFSSL_SHA224)
+int hmac_sha224_test(void)
{
Hmac hmac;
- byte hash[SHA256_DIGEST_SIZE];
+ byte hash[WC_SHA224_DIGEST_SIZE];
const char* keys[]=
{
@@ -1352,80 +3641,94 @@ int hmac_sha256_test(void)
"\x0b\x0b\x0b",
"Jefe",
"\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA"
- "\xAA\xAA\xAA"
+ "\xAA\xAA\xAA",
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
};
- testVector a, b, c;
- testVector test_hmac[3];
+ testVector a, b, c, d;
+ testVector test_hmac[4];
int ret;
int times = sizeof(test_hmac) / sizeof(testVector), i;
a.input = "Hi There";
- a.output = "\xb0\x34\x4c\x61\xd8\xdb\x38\x53\x5c\xa8\xaf\xce\xaf\x0b\xf1"
- "\x2b\x88\x1d\xc2\x00\xc9\x83\x3d\xa7\x26\xe9\x37\x6c\x2e\x32"
- "\xcf\xf7";
- a.inLen = strlen(a.input);
- a.outLen = SHA256_DIGEST_SIZE;
+ a.output = "\x89\x6f\xb1\x12\x8a\xbb\xdf\x19\x68\x32\x10\x7c\xd4\x9d\xf3"
+ "\x3f\x47\xb4\xb1\x16\x99\x12\xba\x4f\x53\x68\x4b\x22";
+ a.inLen = XSTRLEN(a.input);
+ a.outLen = WC_SHA224_DIGEST_SIZE;
b.input = "what do ya want for nothing?";
- b.output = "\x5b\xdc\xc1\x46\xbf\x60\x75\x4e\x6a\x04\x24\x26\x08\x95\x75"
- "\xc7\x5a\x00\x3f\x08\x9d\x27\x39\x83\x9d\xec\x58\xb9\x64\xec"
- "\x38\x43";
- b.inLen = strlen(b.input);
- b.outLen = SHA256_DIGEST_SIZE;
+ b.output = "\xa3\x0e\x01\x09\x8b\xc6\xdb\xbf\x45\x69\x0f\x3a\x7e\x9e\x6d"
+ "\x0f\x8b\xbe\xa2\xa3\x9e\x61\x48\x00\x8f\xd0\x5e\x44";
+ b.inLen = XSTRLEN(b.input);
+ b.outLen = WC_SHA224_DIGEST_SIZE;
c.input = "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
"\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
"\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
"\xDD\xDD\xDD\xDD\xDD\xDD";
- c.output = "\x77\x3e\xa9\x1e\x36\x80\x0e\x46\x85\x4d\xb8\xeb\xd0\x91\x81"
- "\xa7\x29\x59\x09\x8b\x3e\xf8\xc1\x22\xd9\x63\x55\x14\xce\xd5"
- "\x65\xfe";
- c.inLen = strlen(c.input);
- c.outLen = SHA256_DIGEST_SIZE;
+ c.output = "\x7f\xb3\xcb\x35\x88\xc6\xc1\xf6\xff\xa9\x69\x4d\x7d\x6a\xd2"
+ "\x64\x93\x65\xb0\xc1\xf6\x5d\x69\xd1\xec\x83\x33\xea";
+ c.inLen = XSTRLEN(c.input);
+ c.outLen = WC_SHA224_DIGEST_SIZE;
+
+ d.input = "Big Key Input";
+ d.output = "\xe7\x4e\x2b\x8a\xa9\xf0\x37\x2f\xed\xae\x70\x0c\x49\x47\xf1"
+ "\x46\x54\xa7\x32\x6b\x55\x01\x87\xd2\xc8\x02\x0e\x3a";
+ d.inLen = XSTRLEN(d.input);
+ d.outLen = WC_SHA224_DIGEST_SIZE;
test_hmac[0] = a;
test_hmac[1] = b;
test_hmac[2] = c;
+ test_hmac[3] = d;
for (i = 0; i < times; ++i) {
#if defined(HAVE_FIPS) || defined(HAVE_CAVIUM)
if (i == 1)
continue; /* cavium can't handle short keys, fips not allowed */
#endif
-#ifdef HAVE_CAVIUM
- if (wc_HmacInitCavium(&hmac, CAVIUM_DEV_ID) != 0)
- return -20011;
-#endif
- ret = wc_HmacSetKey(&hmac, SHA256, (byte*)keys[i],(word32)strlen(keys[i]));
+
+ if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0)
+ return -3700;
+
+ ret = wc_HmacSetKey(&hmac, WC_SHA224, (byte*)keys[i],
+ (word32)XSTRLEN(keys[i]));
if (ret != 0)
- return -4021;
+ return -3701;
ret = wc_HmacUpdate(&hmac, (byte*)test_hmac[i].input,
(word32)test_hmac[i].inLen);
if (ret != 0)
- return -4022;
+ return -3702;
ret = wc_HmacFinal(&hmac, hash);
if (ret != 0)
- return -4023;
+ return -3703;
- if (memcmp(hash, test_hmac[i].output, SHA256_DIGEST_SIZE) != 0)
- return -20 - i;
-#ifdef HAVE_CAVIUM
- wc_HmacFreeCavium(&hmac);
-#endif
+ if (XMEMCMP(hash, test_hmac[i].output, WC_SHA224_DIGEST_SIZE) != 0)
+ return -3704 - i;
+
+ wc_HmacFree(&hmac);
}
+#ifndef HAVE_FIPS
+ if (wc_HmacSizeByType(WC_SHA224) != WC_SHA224_DIGEST_SIZE)
+ return -3714;
+#endif
+
return 0;
}
#endif
-#if !defined(NO_HMAC) && defined(HAVE_BLAKE2)
-int hmac_blake2b_test(void)
+#if !defined(NO_HMAC) && !defined(NO_SHA256)
+int hmac_sha256_test(void)
{
Hmac hmac;
- byte hash[BLAKE2B_256];
+ byte hash[WC_SHA256_DIGEST_SIZE];
const char* keys[]=
{
@@ -1433,71 +3736,95 @@ int hmac_blake2b_test(void)
"\x0b\x0b\x0b",
"Jefe",
"\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA"
- "\xAA\xAA\xAA"
+ "\xAA\xAA\xAA",
+ "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA"
+ "\xAA\xAA\xAA",
};
- testVector a, b, c;
- testVector test_hmac[3];
+ testVector a, b, c, d;
+ testVector test_hmac[4];
int ret;
int times = sizeof(test_hmac) / sizeof(testVector), i;
a.input = "Hi There";
- a.output = "\x72\x93\x0d\xdd\xf5\xf7\xe1\x78\x38\x07\x44\x18\x0b\x3f\x51"
- "\x37\x25\xb5\x82\xc2\x08\x83\x2f\x1c\x99\xfd\x03\xa0\x16\x75"
- "\xac\xfd";
- a.inLen = strlen(a.input);
- a.outLen = BLAKE2B_256;
+ a.output = "\xb0\x34\x4c\x61\xd8\xdb\x38\x53\x5c\xa8\xaf\xce\xaf\x0b\xf1"
+ "\x2b\x88\x1d\xc2\x00\xc9\x83\x3d\xa7\x26\xe9\x37\x6c\x2e\x32"
+ "\xcf\xf7";
+ a.inLen = XSTRLEN(a.input);
+ a.outLen = WC_SHA256_DIGEST_SIZE;
b.input = "what do ya want for nothing?";
- b.output = "\x3d\x20\x50\x71\x05\xc0\x8c\x0c\x38\x44\x1e\xf7\xf9\xd1\x67"
- "\x21\xff\x64\xf5\x94\x00\xcf\xf9\x75\x41\xda\x88\x61\x9d\x7c"
- "\xda\x2b";
- b.inLen = strlen(b.input);
- b.outLen = BLAKE2B_256;
+ b.output = "\x5b\xdc\xc1\x46\xbf\x60\x75\x4e\x6a\x04\x24\x26\x08\x95\x75"
+ "\xc7\x5a\x00\x3f\x08\x9d\x27\x39\x83\x9d\xec\x58\xb9\x64\xec"
+ "\x38\x43";
+ b.inLen = XSTRLEN(b.input);
+ b.outLen = WC_SHA256_DIGEST_SIZE;
c.input = "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
"\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
"\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
"\xDD\xDD\xDD\xDD\xDD\xDD";
- c.output = "\xda\xfe\x2a\x24\xfc\xe7\xea\x36\x34\xbe\x41\x92\xc7\x11\xa7"
- "\x00\xae\x53\x9c\x11\x9c\x80\x74\x55\x22\x25\x4a\xb9\x55\xd3"
- "\x0f\x87";
- c.inLen = strlen(c.input);
- c.outLen = BLAKE2B_256;
+ c.output = "\x77\x3e\xa9\x1e\x36\x80\x0e\x46\x85\x4d\xb8\xeb\xd0\x91\x81"
+ "\xa7\x29\x59\x09\x8b\x3e\xf8\xc1\x22\xd9\x63\x55\x14\xce\xd5"
+ "\x65\xfe";
+ c.inLen = XSTRLEN(c.input);
+ c.outLen = WC_SHA256_DIGEST_SIZE;
+
+ d.input = 0;
+ d.output = "\x86\xe5\x4f\xd4\x48\x72\x5d\x7e\x5d\xcf\xe2\x23\x53\xc8\x28"
+ "\xaf\x48\x78\x1e\xb4\x8c\xae\x81\x06\xa7\xe1\xd4\x98\x94\x9f"
+ "\x3e\x46";
+ d.inLen = 0;
+ d.outLen = WC_SHA256_DIGEST_SIZE;
test_hmac[0] = a;
test_hmac[1] = b;
test_hmac[2] = c;
+ test_hmac[3] = d;
for (i = 0; i < times; ++i) {
#if defined(HAVE_FIPS) || defined(HAVE_CAVIUM)
if (i == 1)
continue; /* cavium can't handle short keys, fips not allowed */
#endif
-#ifdef HAVE_CAVIUM
- if (wc_HmacInitCavium(&hmac, CAVIUM_DEV_ID) != 0)
- return -20011;
+#if defined(HAVE_INTEL_QA) || defined(HAVE_CAVIUM)
+ if (i == 3)
+ continue; /* QuickAssist can't handle empty HMAC */
#endif
- ret = wc_HmacSetKey(&hmac, BLAKE2B_ID, (byte*)keys[i],
- (word32)strlen(keys[i]));
- if (ret != 0)
- return -4024;
- ret = wc_HmacUpdate(&hmac, (byte*)test_hmac[i].input,
- (word32)test_hmac[i].inLen);
+
+ if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0)
+ return -3800 - i;
+
+ ret = wc_HmacSetKey(&hmac, WC_SHA256, (byte*)keys[i],
+ (word32)XSTRLEN(keys[i]));
if (ret != 0)
- return -4025;
+ return -3810 - i;
+ if (test_hmac[i].input != NULL) {
+ ret = wc_HmacUpdate(&hmac, (byte*)test_hmac[i].input,
+ (word32)test_hmac[i].inLen);
+ if (ret != 0)
+ return -3820 - i;
+ }
ret = wc_HmacFinal(&hmac, hash);
if (ret != 0)
- return -4026;
+ return -3830 - i;
- if (memcmp(hash, test_hmac[i].output, BLAKE2B_256) != 0)
- return -20 - i;
-#ifdef HAVE_CAVIUM
- wc_HmacFreeCavium(&hmac);
-#endif
+ if (XMEMCMP(hash, test_hmac[i].output, WC_SHA256_DIGEST_SIZE) != 0)
+ return -3840 - i;
+
+ wc_HmacFree(&hmac);
}
+#ifndef HAVE_FIPS
+ if (wc_HmacSizeByType(WC_SHA256) != WC_SHA256_DIGEST_SIZE)
+ return -3850;
+ if (wc_HmacSizeByType(20) != BAD_FUNC_ARG)
+ return -3851;
+#endif
+ if (wolfSSL_GetHmacMaxSize() != WC_MAX_DIGEST_SIZE)
+ return -3852;
+
return 0;
}
#endif
@@ -1507,7 +3834,7 @@ int hmac_blake2b_test(void)
int hmac_sha384_test(void)
{
Hmac hmac;
- byte hash[SHA384_DIGEST_SIZE];
+ byte hash[WC_SHA384_DIGEST_SIZE];
const char* keys[]=
{
@@ -1515,11 +3842,20 @@ int hmac_sha384_test(void)
"\x0b\x0b\x0b",
"Jefe",
"\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA"
- "\xAA\xAA\xAA"
+ "\xAA\xAA\xAA",
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
};
- testVector a, b, c;
- testVector test_hmac[3];
+ testVector a, b, c, d;
+ testVector test_hmac[4];
int ret;
int times = sizeof(test_hmac) / sizeof(testVector), i;
@@ -1529,16 +3865,16 @@ int hmac_sha384_test(void)
"\x7f\x15\xf9\xda\xdb\xe4\x10\x1e\xc6\x82\xaa\x03\x4c\x7c\xeb"
"\xc5\x9c\xfa\xea\x9e\xa9\x07\x6e\xde\x7f\x4a\xf1\x52\xe8\xb2"
"\xfa\x9c\xb6";
- a.inLen = strlen(a.input);
- a.outLen = SHA384_DIGEST_SIZE;
+ a.inLen = XSTRLEN(a.input);
+ a.outLen = WC_SHA384_DIGEST_SIZE;
b.input = "what do ya want for nothing?";
b.output = "\xaf\x45\xd2\xe3\x76\x48\x40\x31\x61\x7f\x78\xd2\xb5\x8a\x6b"
"\x1b\x9c\x7e\xf4\x64\xf5\xa0\x1b\x47\xe4\x2e\xc3\x73\x63\x22"
"\x44\x5e\x8e\x22\x40\xca\x5e\x69\xe2\xc7\x8b\x32\x39\xec\xfa"
"\xb2\x16\x49";
- b.inLen = strlen(b.input);
- b.outLen = SHA384_DIGEST_SIZE;
+ b.inLen = XSTRLEN(b.input);
+ b.outLen = WC_SHA384_DIGEST_SIZE;
c.input = "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
"\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
@@ -1548,33 +3884,54 @@ int hmac_sha384_test(void)
"\x6f\x0a\xa6\x35\xd9\x47\xac\x9f\xeb\xe8\x3e\xf4\xe5\x59\x66"
"\x14\x4b\x2a\x5a\xb3\x9d\xc1\x38\x14\xb9\x4e\x3a\xb6\xe1\x01"
"\xa3\x4f\x27";
- c.inLen = strlen(c.input);
- c.outLen = SHA384_DIGEST_SIZE;
+ c.inLen = XSTRLEN(c.input);
+ c.outLen = WC_SHA384_DIGEST_SIZE;
+
+ d.input = "Big Key Input";
+ d.output = "\xd2\x3d\x29\x6e\xf5\x1e\x23\x23\x49\x18\xb3\xbf\x4c\x38\x7b"
+ "\x31\x21\x17\xbb\x09\x73\x27\xf8\x12\x9d\xe9\xc6\x5d\xf9\x54"
+ "\xd6\x38\x5a\x68\x53\x14\xee\xe0\xa6\x4f\x36\x7e\xb2\xf3\x1a"
+ "\x57\x41\x69";
+ d.inLen = XSTRLEN(d.input);
+ d.outLen = WC_SHA384_DIGEST_SIZE;
test_hmac[0] = a;
test_hmac[1] = b;
test_hmac[2] = c;
+ test_hmac[3] = d;
for (i = 0; i < times; ++i) {
#if defined(HAVE_FIPS)
if (i == 1)
continue; /* fips not allowed */
#endif
- ret = wc_HmacSetKey(&hmac, SHA384, (byte*)keys[i],(word32)strlen(keys[i]));
+
+ if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0)
+ return -3900;
+
+ ret = wc_HmacSetKey(&hmac, WC_SHA384, (byte*)keys[i],
+ (word32)XSTRLEN(keys[i]));
if (ret != 0)
- return -4027;
+ return -3901;
ret = wc_HmacUpdate(&hmac, (byte*)test_hmac[i].input,
(word32)test_hmac[i].inLen);
if (ret != 0)
- return -4028;
+ return -3902;
ret = wc_HmacFinal(&hmac, hash);
if (ret != 0)
- return -4029;
+ return -3903;
+
+ if (XMEMCMP(hash, test_hmac[i].output, WC_SHA384_DIGEST_SIZE) != 0)
+ return -3904 - i;
- if (memcmp(hash, test_hmac[i].output, SHA384_DIGEST_SIZE) != 0)
- return -20 - i;
+ wc_HmacFree(&hmac);
}
+#ifndef HAVE_FIPS
+ if (wc_HmacSizeByType(WC_SHA384) != WC_SHA384_DIGEST_SIZE)
+ return -3914;
+#endif
+
return 0;
}
#endif
@@ -1584,7 +3941,7 @@ int hmac_sha384_test(void)
int hmac_sha512_test(void)
{
Hmac hmac;
- byte hash[SHA512_DIGEST_SIZE];
+ byte hash[WC_SHA512_DIGEST_SIZE];
const char* keys[]=
{
@@ -1592,11 +3949,20 @@ int hmac_sha512_test(void)
"\x0b\x0b\x0b",
"Jefe",
"\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA"
- "\xAA\xAA\xAA"
+ "\xAA\xAA\xAA",
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
};
- testVector a, b, c;
- testVector test_hmac[3];
+ testVector a, b, c, d;
+ testVector test_hmac[4];
int ret;
int times = sizeof(test_hmac) / sizeof(testVector), i;
@@ -1607,8 +3973,8 @@ int hmac_sha512_test(void)
"\x7c\xde\xda\xa8\x33\xb7\xd6\xb8\xa7\x02\x03\x8b\x27\x4e\xae"
"\xa3\xf4\xe4\xbe\x9d\x91\x4e\xeb\x61\xf1\x70\x2e\x69\x6c\x20"
"\x3a\x12\x68\x54";
- a.inLen = strlen(a.input);
- a.outLen = SHA512_DIGEST_SIZE;
+ a.inLen = XSTRLEN(a.input);
+ a.outLen = WC_SHA512_DIGEST_SIZE;
b.input = "what do ya want for nothing?";
b.output = "\x16\x4b\x7a\x7b\xfc\xf8\x19\xe2\xe3\x95\xfb\xe7\x3b\x56\xe0"
@@ -1616,8 +3982,8 @@ int hmac_sha512_test(void)
"\x05\x54\x97\x58\xbf\x75\xc0\x5a\x99\x4a\x6d\x03\x4f\x65\xf8"
"\xf0\xe6\xfd\xca\xea\xb1\xa3\x4d\x4a\x6b\x4b\x63\x6e\x07\x0a"
"\x38\xbc\xe7\x37";
- b.inLen = strlen(b.input);
- b.outLen = SHA512_DIGEST_SIZE;
+ b.inLen = XSTRLEN(b.input);
+ b.outLen = WC_SHA512_DIGEST_SIZE;
c.input = "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
"\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
@@ -1628,31 +3994,227 @@ int hmac_sha512_test(void)
"\x9d\x39\xbf\x3e\x84\x82\x79\xa7\x22\xc8\x06\xb4\x85\xa4\x7e"
"\x67\xc8\x07\xb9\x46\xa3\x37\xbe\xe8\x94\x26\x74\x27\x88\x59"
"\xe1\x32\x92\xfb";
- c.inLen = strlen(c.input);
- c.outLen = SHA512_DIGEST_SIZE;
+ c.inLen = XSTRLEN(c.input);
+ c.outLen = WC_SHA512_DIGEST_SIZE;
+
+ d.input = "Big Key Input";
+ d.output = "\x3f\xa9\xc9\xe1\xbd\xbb\x04\x55\x1f\xef\xcc\x92\x33\x08\xeb"
+ "\xcf\xc1\x9a\x5b\x5b\xc0\x7c\x86\x84\xae\x8c\x40\xaf\xb1\x27"
+ "\x87\x38\x92\x04\xa8\xed\xd7\xd7\x07\xa9\x85\xa0\xc2\xcd\x30"
+ "\xc0\x56\x14\x49\xbc\x2f\x69\x15\x6a\x97\xd8\x79\x2f\xb3\x3b"
+ "\x1e\x18\xfe\xfa";
+ d.inLen = XSTRLEN(d.input);
+ d.outLen = WC_SHA512_DIGEST_SIZE;
test_hmac[0] = a;
test_hmac[1] = b;
test_hmac[2] = c;
+ test_hmac[3] = d;
for (i = 0; i < times; ++i) {
#if defined(HAVE_FIPS)
if (i == 1)
continue; /* fips not allowed */
#endif
- ret = wc_HmacSetKey(&hmac, SHA512, (byte*)keys[i],(word32)strlen(keys[i]));
+
+ if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0)
+ return -4000;
+
+ ret = wc_HmacSetKey(&hmac, WC_SHA512, (byte*)keys[i],
+ (word32)XSTRLEN(keys[i]));
if (ret != 0)
- return -4030;
+ return -4001;
ret = wc_HmacUpdate(&hmac, (byte*)test_hmac[i].input,
(word32)test_hmac[i].inLen);
if (ret != 0)
- return -4031;
+ return -4002;
ret = wc_HmacFinal(&hmac, hash);
if (ret != 0)
- return -4032;
+ return -4003;
+
+ if (XMEMCMP(hash, test_hmac[i].output, WC_SHA512_DIGEST_SIZE) != 0)
+ return -4004 - i;
+
+ wc_HmacFree(&hmac);
+ }
+
+#ifndef HAVE_FIPS
+ if (wc_HmacSizeByType(WC_SHA512) != WC_SHA512_DIGEST_SIZE)
+ return -4014;
+#endif
+
+ return 0;
+}
+#endif
+
+
+#if !defined(NO_HMAC) && defined(WOLFSSL_SHA3) && \
+ !defined(WOLFSSL_NOSHA3_224) && !defined(WOLFSSL_NOSHA3_256) && \
+ !defined(WOLFSSL_NOSHA3_384) && !defined(WOLFSSL_NOSHA3_512)
+int hmac_sha3_test(void)
+{
+ Hmac hmac;
+ byte hash[WC_SHA3_512_DIGEST_SIZE];
+
+ const char* key[4] =
+ {
+ "Jefe",
+
+ "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+ "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b",
+
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa",
+
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x01\x02\x03\x04\x05\x06\x07\x08"
+ };
- if (memcmp(hash, test_hmac[i].output, SHA512_DIGEST_SIZE) != 0)
- return -20 - i;
+ const char* input[4] =
+ {
+ "what do ya want for nothing?",
+
+ "Hi There",
+
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd",
+
+ "Big Key Input"
+ };
+
+ const int hashType[4] =
+ {
+ WC_SHA3_224, WC_SHA3_256, WC_SHA3_384, WC_SHA3_512
+ };
+
+ const int hashSz[4] =
+ {
+ WC_SHA3_224_DIGEST_SIZE, WC_SHA3_256_DIGEST_SIZE,
+ WC_SHA3_384_DIGEST_SIZE, WC_SHA3_512_DIGEST_SIZE
+ };
+
+ const char* output[16] =
+ {
+ /* key = jefe, input = what do ya want for nothing? */
+ /* HMAC-SHA3-224 */
+ "\x7f\xdb\x8d\xd8\x8b\xd2\xf6\x0d\x1b\x79\x86\x34\xad\x38\x68\x11"
+ "\xc2\xcf\xc8\x5b\xfa\xf5\xd5\x2b\xba\xce\x5e\x66",
+ /* HMAC-SHA3-256 */
+ "\xc7\xd4\x07\x2e\x78\x88\x77\xae\x35\x96\xbb\xb0\xda\x73\xb8\x87"
+ "\xc9\x17\x1f\x93\x09\x5b\x29\x4a\xe8\x57\xfb\xe2\x64\x5e\x1b\xa5",
+ /* HMAC-SHA3-384 */
+ "\xf1\x10\x1f\x8c\xbf\x97\x66\xfd\x67\x64\xd2\xed\x61\x90\x3f\x21"
+ "\xca\x9b\x18\xf5\x7c\xf3\xe1\xa2\x3c\xa1\x35\x08\xa9\x32\x43\xce"
+ "\x48\xc0\x45\xdc\x00\x7f\x26\xa2\x1b\x3f\x5e\x0e\x9d\xf4\xc2\x0a",
+ /* HMAC-SHA3-512 */
+ "\x5a\x4b\xfe\xab\x61\x66\x42\x7c\x7a\x36\x47\xb7\x47\x29\x2b\x83"
+ "\x84\x53\x7c\xdb\x89\xaf\xb3\xbf\x56\x65\xe4\xc5\xe7\x09\x35\x0b"
+ "\x28\x7b\xae\xc9\x21\xfd\x7c\xa0\xee\x7a\x0c\x31\xd0\x22\xa9\x5e"
+ "\x1f\xc9\x2b\xa9\xd7\x7d\xf8\x83\x96\x02\x75\xbe\xb4\xe6\x20\x24",
+
+ /* key = 0b..., input = Hi There */
+ /* HMAC-SHA3-224 */
+ "\x3b\x16\x54\x6b\xbc\x7b\xe2\x70\x6a\x03\x1d\xca\xfd\x56\x37\x3d"
+ "\x98\x84\x36\x76\x41\xd8\xc5\x9a\xf3\xc8\x60\xf7",
+ /* HMAC-SHA3-256 */
+ "\xba\x85\x19\x23\x10\xdf\xfa\x96\xe2\xa3\xa4\x0e\x69\x77\x43\x51"
+ "\x14\x0b\xb7\x18\x5e\x12\x02\xcd\xcc\x91\x75\x89\xf9\x5e\x16\xbb",
+ /* HMAC-SHA3-384 */
+ "\x68\xd2\xdc\xf7\xfd\x4d\xdd\x0a\x22\x40\xc8\xa4\x37\x30\x5f\x61"
+ "\xfb\x73\x34\xcf\xb5\xd0\x22\x6e\x1b\xc2\x7d\xc1\x0a\x2e\x72\x3a"
+ "\x20\xd3\x70\xb4\x77\x43\x13\x0e\x26\xac\x7e\x3d\x53\x28\x86\xbd",
+ /* HMAC-SHA3-512 */
+ "\xeb\x3f\xbd\x4b\x2e\xaa\xb8\xf5\xc5\x04\xbd\x3a\x41\x46\x5a\xac"
+ "\xec\x15\x77\x0a\x7c\xab\xac\x53\x1e\x48\x2f\x86\x0b\x5e\xc7\xba"
+ "\x47\xcc\xb2\xc6\xf2\xaf\xce\x8f\x88\xd2\x2b\x6d\xc6\x13\x80\xf2"
+ "\x3a\x66\x8f\xd3\x88\x8b\xb8\x05\x37\xc0\xa0\xb8\x64\x07\x68\x9e",
+
+ /* key = aa..., output = dd... */
+ /* HMAC-SHA3-224 */
+ "\x67\x6c\xfc\x7d\x16\x15\x36\x38\x78\x03\x90\x69\x2b\xe1\x42\xd2"
+ "\xdf\x7c\xe9\x24\xb9\x09\xc0\xc0\x8d\xbf\xdc\x1a",
+ /* HMAC-SHA3-256 */
+ "\x84\xec\x79\x12\x4a\x27\x10\x78\x65\xce\xdd\x8b\xd8\x2d\xa9\x96"
+ "\x5e\x5e\xd8\xc3\x7b\x0a\xc9\x80\x05\xa7\xf3\x9e\xd5\x8a\x42\x07",
+ /* HMAC-SHA3-384 */
+ "\x27\x5c\xd0\xe6\x61\xbb\x8b\x15\x1c\x64\xd2\x88\xf1\xf7\x82\xfb"
+ "\x91\xa8\xab\xd5\x68\x58\xd7\x2b\xab\xb2\xd4\x76\xf0\x45\x83\x73"
+ "\xb4\x1b\x6a\xb5\xbf\x17\x4b\xec\x42\x2e\x53\xfc\x31\x35\xac\x6e",
+ /* HMAC-SHA3-512 */
+ "\x30\x9e\x99\xf9\xec\x07\x5e\xc6\xc6\xd4\x75\xed\xa1\x18\x06\x87"
+ "\xfc\xf1\x53\x11\x95\x80\x2a\x99\xb5\x67\x74\x49\xa8\x62\x51\x82"
+ "\x85\x1c\xb3\x32\xaf\xb6\xa8\x9c\x41\x13\x25\xfb\xcb\xcd\x42\xaf"
+ "\xcb\x7b\x6e\x5a\xab\x7e\xa4\x2c\x66\x0f\x97\xfd\x85\x84\xbf\x03",
+
+ /* key = big key, input = Big Key Input */
+ /* HMAC-SHA3-224 */
+ "\x29\xe0\x5e\x46\xc4\xa4\x5e\x46\x74\xbf\xd7\x2d\x1a\xd8\x66\xdb"
+ "\x2d\x0d\x10\x4e\x2b\xfa\xad\x53\x7d\x15\x69\x8b",
+ /* HMAC-SHA3-256 */
+ "\xb5\x5b\x8d\x64\xb6\x9c\x21\xd0\xbf\x20\x5c\xa2\xf7\xb9\xb1\x4e"
+ "\x88\x21\x61\x2c\x66\xc3\x91\xae\x6c\x95\x16\x85\x83\xe6\xf4\x9b",
+ /* HMAC-SHA3-384 */
+ "\xaa\x91\xb3\xa6\x2f\x56\xa1\xbe\x8c\x3e\x74\x38\xdb\x58\xd9\xd3"
+ "\x34\xde\xa0\x60\x6d\x8d\x46\xe0\xec\xa9\xf6\x06\x35\x14\xe6\xed"
+ "\x83\xe6\x7c\x77\x24\x6c\x11\xb5\x90\x82\xb5\x75\xda\x7b\x83\x2d",
+ /* HMAC-SHA3-512 */
+ "\x1c\xc3\xa9\x24\x4a\x4a\x3f\xbd\xc7\x20\x00\x16\x9b\x79\x47\x03"
+ "\x78\x75\x2c\xb5\xf1\x2e\x62\x7c\xbe\xef\x4e\x8f\x0b\x11\x2b\x32"
+ "\xa0\xee\xc9\xd0\x4d\x64\x64\x0b\x37\xf4\xdd\x66\xf7\x8b\xb3\xad"
+ "\x52\x52\x6b\x65\x12\xde\x0d\x7c\xc0\x8b\x60\x01\x6c\x37\xd7\xa8"
+
+ };
+
+ int i = 0, iMax = sizeof(input) / sizeof(input[0]),
+ j, jMax = sizeof(hashType) / sizeof(hashType[0]),
+ ret;
+
+#ifdef HAVE_FIPS
+ /* FIPS requires a minimum length for HMAC keys, and "Jefe" is too
+ * short. Skip it in FIPS builds. */
+ i = 1;
+#endif
+ for (; i < iMax; i++) {
+ for (j = 0; j < jMax; j++) {
+ if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0)
+ return -4100;
+
+ ret = wc_HmacSetKey(&hmac, hashType[j], (byte*)key[i],
+ (word32)XSTRLEN(key[i]));
+ if (ret != 0)
+ return -4101;
+ ret = wc_HmacUpdate(&hmac, (byte*)input[i],
+ (word32)XSTRLEN(input[i]));
+ if (ret != 0)
+ return -4102;
+ ret = wc_HmacFinal(&hmac, hash);
+ if (ret != 0)
+ return -4103;
+ if (XMEMCMP(hash, output[(i*jMax) + j], hashSz[j]) != 0)
+ return -4104;
+
+ wc_HmacFree(&hmac);
+
+ if (i > 0)
+ continue;
+
+ #ifndef HAVE_FIPS
+ ret = wc_HmacSizeByType(hashType[j]);
+ if (ret != hashSz[j])
+ return -4105;
+ #endif
+ }
}
return 0;
@@ -1707,16 +4269,14 @@ int arc4_test(void)
for (i = 0; i < times; ++i) {
Arc4 enc;
Arc4 dec;
- int keylen = 8; /* strlen with key 0x00 not good */
+ int keylen = 8; /* XSTRLEN with key 0x00 not good */
if (i == 3)
keylen = 4;
-#ifdef HAVE_CAVIUM
- if (wc_Arc4InitCavium(&enc, CAVIUM_DEV_ID) != 0)
- return -20001;
- if (wc_Arc4InitCavium(&dec, CAVIUM_DEV_ID) != 0)
- return -20002;
-#endif
+ if (wc_Arc4Init(&enc, HEAP_HINT, devId) != 0)
+ return -4200;
+ if (wc_Arc4Init(&dec, HEAP_HINT, devId) != 0)
+ return -4201;
wc_Arc4SetKey(&enc, (byte*)keys[i], keylen);
wc_Arc4SetKey(&dec, (byte*)keys[i], keylen);
@@ -1725,16 +4285,14 @@ int arc4_test(void)
(word32)test_arc4[i].outLen);
wc_Arc4Process(&dec, plain, cipher, (word32)test_arc4[i].outLen);
- if (memcmp(plain, test_arc4[i].input, test_arc4[i].outLen))
- return -20 - i;
+ if (XMEMCMP(plain, test_arc4[i].input, test_arc4[i].outLen))
+ return -4202 - i;
- if (memcmp(cipher, test_arc4[i].output, test_arc4[i].outLen))
- return -20 - 5 - i;
+ if (XMEMCMP(cipher, test_arc4[i].output, test_arc4[i].outLen))
+ return -4212 - i;
-#ifdef HAVE_CAVIUM
- wc_Arc4FreeCavium(&enc);
- wc_Arc4FreeCavium(&dec);
-#endif
+ wc_Arc4Free(&enc);
+ wc_Arc4Free(&dec);
}
return 0;
@@ -1800,22 +4358,28 @@ int hc128_test(void)
HC128 dec;
/* align keys/ivs in plain/cipher buffers */
- memcpy(plain, keys[i], 16);
- memcpy(cipher, ivs[i], 16);
+ XMEMCPY(plain, keys[i], 16);
+ XMEMCPY(cipher, ivs[i], 16);
wc_Hc128_SetKey(&enc, plain, cipher);
wc_Hc128_SetKey(&dec, plain, cipher);
/* align input */
- memcpy(plain, test_hc128[i].input, test_hc128[i].outLen);
- wc_Hc128_Process(&enc, cipher, plain, (word32)test_hc128[i].outLen);
- wc_Hc128_Process(&dec, plain, cipher, (word32)test_hc128[i].outLen);
+ XMEMCPY(plain, test_hc128[i].input, test_hc128[i].outLen);
+ if (wc_Hc128_Process(&enc, cipher, plain,
+ (word32)test_hc128[i].outLen) != 0) {
+ return -4300;
+ }
+ if (wc_Hc128_Process(&dec, plain, cipher,
+ (word32)test_hc128[i].outLen) != 0) {
+ return -4301;
+ }
- if (memcmp(plain, test_hc128[i].input, test_hc128[i].outLen))
- return -120 - i;
+ if (XMEMCMP(plain, test_hc128[i].input, test_hc128[i].outLen))
+ return -4302 - i;
- if (memcmp(cipher, test_hc128[i].output, test_hc128[i].outLen))
- return -120 - 5 - i;
+ if (XMEMCMP(cipher, test_hc128[i].output, test_hc128[i].outLen))
+ return -4312 - i;
}
#endif /* HAVE_HC128 */
@@ -1873,9 +4437,9 @@ int rabbit_test(void)
byte* iv;
/* align keys/ivs in plain/cipher buffers */
- memcpy(plain, keys[i], 16);
+ XMEMCPY(plain, keys[i], 16);
if (ivs[i]) {
- memcpy(cipher, ivs[i], 8);
+ XMEMCPY(cipher, ivs[i], 8);
iv = cipher;
} else
iv = NULL;
@@ -1883,15 +4447,15 @@ int rabbit_test(void)
wc_RabbitSetKey(&dec, plain, iv);
/* align input */
- memcpy(plain, test_rabbit[i].input, test_rabbit[i].outLen);
+ XMEMCPY(plain, test_rabbit[i].input, test_rabbit[i].outLen);
wc_RabbitProcess(&enc, cipher, plain, (word32)test_rabbit[i].outLen);
wc_RabbitProcess(&dec, plain, cipher, (word32)test_rabbit[i].outLen);
- if (memcmp(plain, test_rabbit[i].input, test_rabbit[i].outLen))
- return -130 - i;
+ if (XMEMCMP(plain, test_rabbit[i].input, test_rabbit[i].outLen))
+ return -4400 - i;
- if (memcmp(cipher, test_rabbit[i].output, test_rabbit[i].outLen))
- return -130 - 5 - i;
+ if (XMEMCMP(cipher, test_rabbit[i].output, test_rabbit[i].outLen))
+ return -4410 - i;
}
return 0;
@@ -1904,10 +4468,12 @@ int chacha_test(void)
{
ChaCha enc;
ChaCha dec;
- byte cipher[32];
- byte plain[32];
+ byte cipher[128];
+ byte plain[128];
+ byte sliver[64];
byte input[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- word32 keySz;
+ word32 keySz = 32;
+ int ret = 0;
int i;
int times = 4;
@@ -1945,14 +4511,138 @@ int chacha_test(void)
const byte* keys[] = {key1, key2, key3, key4};
- static const byte ivs1[] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
- static const byte ivs2[] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
- static const byte ivs3[] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01};
- static const byte ivs4[] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
+ static const byte ivs1[] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
+ static const byte ivs2[] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
+ static const byte ivs3[] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00};
+ static const byte ivs4[] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
const byte* ivs[] = {ivs1, ivs2, ivs3, ivs4};
+#ifndef BENCH_EMBEDDED
+ static const byte cipher_big_result[] = {
+ 0x06, 0xa6, 0x5d, 0x31, 0x21, 0x6c, 0xdb, 0x37, 0x48, 0x7c, 0x01, 0x9d,
+ 0x72, 0xdf, 0x0a, 0x5b, 0x64, 0x74, 0x20, 0xba, 0x9e, 0xe0, 0x26, 0x7a,
+ 0xbf, 0xdf, 0x83, 0x34, 0x3b, 0x4f, 0x94, 0x3f, 0x37, 0x89, 0xaf, 0x00,
+ 0xdf, 0x0f, 0x2e, 0x75, 0x16, 0x41, 0xf6, 0x7a, 0x86, 0x94, 0x9d, 0x32,
+ 0x56, 0xf0, 0x79, 0x71, 0x68, 0x6f, 0xa6, 0x6b, 0xc6, 0x59, 0x49, 0xf6,
+ 0x10, 0x34, 0x03, 0x03, 0x16, 0x53, 0x9a, 0x98, 0x2a, 0x46, 0xde, 0x17,
+ 0x06, 0x65, 0x70, 0xca, 0x0a, 0x1f, 0xab, 0x80, 0x26, 0x96, 0x3f, 0x3e,
+ 0x7a, 0x3c, 0xa8, 0x87, 0xbb, 0x65, 0xdd, 0x5e, 0x07, 0x7b, 0x34, 0xe0,
+ 0x56, 0xda, 0x32, 0x13, 0x30, 0xc9, 0x0c, 0xd7, 0xba, 0xe4, 0x1f, 0xa6,
+ 0x91, 0x4f, 0x72, 0x9f, 0xd9, 0x5c, 0x62, 0x7d, 0xa6, 0xc2, 0xbc, 0x87,
+ 0xae, 0x64, 0x11, 0x94, 0x3b, 0xbc, 0x6c, 0x23, 0xbd, 0x7d, 0x00, 0xb4,
+ 0x99, 0xf2, 0x68, 0xb5, 0x59, 0x70, 0x93, 0xad, 0x69, 0xd0, 0xb1, 0x28,
+ 0x70, 0x92, 0xeb, 0xec, 0x39, 0x80, 0x82, 0xde, 0x44, 0xe2, 0x8a, 0x26,
+ 0xb3, 0xe9, 0x45, 0xcf, 0x83, 0x76, 0x9f, 0x6a, 0xa0, 0x46, 0x4a, 0x3d,
+ 0x26, 0x56, 0xaf, 0x49, 0x41, 0x26, 0x1b, 0x6a, 0x41, 0x37, 0x65, 0x91,
+ 0x72, 0xc4, 0xe7, 0x3c, 0x17, 0x31, 0xae, 0x2e, 0x2b, 0x31, 0x45, 0xe4,
+ 0x93, 0xd3, 0x10, 0xaa, 0xc5, 0x62, 0xd5, 0x11, 0x4b, 0x57, 0x1d, 0xad,
+ 0x48, 0x06, 0xd0, 0x0d, 0x98, 0xa5, 0xc6, 0x5b, 0xd0, 0x9e, 0x22, 0xc0,
+ 0x00, 0x32, 0x5a, 0xf5, 0x1c, 0x89, 0x6d, 0x54, 0x97, 0x55, 0x6b, 0x46,
+ 0xc5, 0xc7, 0xc4, 0x48, 0x9c, 0xbf, 0x47, 0xdc, 0x03, 0xc4, 0x1b, 0xcb,
+ 0x65, 0xa6, 0x91, 0x9d, 0x6d, 0xf1, 0xb0, 0x7a, 0x4d, 0x3b, 0x03, 0x95,
+ 0xf4, 0x8b, 0x0b, 0xae, 0x39, 0xff, 0x3f, 0xf6, 0xc0, 0x14, 0x18, 0x8a,
+ 0xe5, 0x19, 0xbd, 0xc1, 0xb4, 0x05, 0x4e, 0x29, 0x2f, 0x0b, 0x33, 0x76,
+ 0x28, 0x16, 0xa4, 0xa6, 0x93, 0x04, 0xb5, 0x55, 0x6b, 0x89, 0x3d, 0xa5,
+ 0x0f, 0xd3, 0xad, 0xfa, 0xd9, 0xfd, 0x05, 0x5d, 0x48, 0x94, 0x25, 0x5a,
+ 0x2c, 0x9a, 0x94, 0x80, 0xb0, 0xe7, 0xcb, 0x4d, 0x77, 0xbf, 0xca, 0xd8,
+ 0x55, 0x48, 0xbd, 0x66, 0xb1, 0x85, 0x81, 0xb1, 0x37, 0x79, 0xab, 0x52,
+ 0x08, 0x14, 0x12, 0xac, 0xcd, 0x45, 0x4d, 0x53, 0x6b, 0xca, 0x96, 0xc7,
+ 0x3b, 0x2f, 0x73, 0xb1, 0x5a, 0x23, 0xbd, 0x65, 0xd5, 0xea, 0x17, 0xb3,
+ 0xdc, 0xa1, 0x17, 0x1b, 0x2d, 0xb3, 0x9c, 0xd0, 0xdb, 0x41, 0x77, 0xef,
+ 0x93, 0x20, 0x52, 0x3e, 0x9d, 0xf5, 0xbf, 0x33, 0xf7, 0x52, 0xc1, 0x90,
+ 0xa0, 0x15, 0x17, 0xce, 0xf7, 0xf7, 0xd0, 0x3a, 0x3b, 0xd1, 0x72, 0x56,
+ 0x31, 0x81, 0xae, 0x60, 0xab, 0x40, 0xc1, 0xd1, 0x28, 0x77, 0x53, 0xac,
+ 0x9f, 0x11, 0x0a, 0x88, 0x36, 0x4b, 0xda, 0x57, 0xa7, 0x28, 0x5c, 0x85,
+ 0xd3, 0x85, 0x9b, 0x79, 0xad, 0x05, 0x1c, 0x37, 0x14, 0x5e, 0x0d, 0xd0,
+ 0x23, 0x03, 0x42, 0x1d, 0x48, 0x5d, 0xc5, 0x3c, 0x5a, 0x08, 0xa9, 0x0d,
+ 0x6e, 0x82, 0x7c, 0x2e, 0x3c, 0x41, 0xcc, 0x96, 0x8e, 0xad, 0xee, 0x2a,
+ 0x61, 0x0b, 0x16, 0x0f, 0xa9, 0x24, 0x40, 0x85, 0xbc, 0x9f, 0x28, 0x8d,
+ 0xe6, 0x68, 0x4d, 0x8f, 0x30, 0x48, 0xd9, 0x73, 0x73, 0x6c, 0x9a, 0x7f,
+ 0x67, 0xf7, 0xde, 0x4c, 0x0a, 0x8b, 0xe4, 0xb3, 0x08, 0x2a, 0x52, 0xda,
+ 0x54, 0xee, 0xcd, 0xb5, 0x62, 0x4a, 0x26, 0x20, 0xfb, 0x40, 0xbb, 0x39,
+ 0x3a, 0x0f, 0x09, 0xe8, 0x00, 0xd1, 0x24, 0x97, 0x60, 0xe9, 0x83, 0x83,
+ 0xfe, 0x9f, 0x9c, 0x15, 0xcf, 0x69, 0x03, 0x9f, 0x03, 0xe1, 0xe8, 0x6e,
+ 0xbd, 0x87, 0x58, 0x68, 0xee, 0xec, 0xd8, 0x29, 0x46, 0x23, 0x49, 0x92,
+ 0x72, 0x95, 0x5b, 0x49, 0xca, 0xe0, 0x45, 0x59, 0xb2, 0xca, 0xf4, 0xfc,
+ 0xb7, 0x59, 0x37, 0x49, 0x28, 0xbc, 0xf3, 0xd7, 0x61, 0xbc, 0x4b, 0xf3,
+ 0xa9, 0x4b, 0x2f, 0x05, 0xa8, 0x01, 0xa5, 0xdc, 0x00, 0x6e, 0x01, 0xb6,
+ 0x45, 0x3c, 0xd5, 0x49, 0x7d, 0x5c, 0x25, 0xe8, 0x31, 0x87, 0xb2, 0xb9,
+ 0xbf, 0xb3, 0x01, 0x62, 0x0c, 0xd0, 0x48, 0x77, 0xa2, 0x34, 0x0f, 0x16,
+ 0x22, 0x28, 0xee, 0x54, 0x08, 0x93, 0x3b, 0xe4, 0xde, 0x7e, 0x63, 0xf7,
+ 0x97, 0x16, 0x5d, 0x71, 0x58, 0xc2, 0x2e, 0xf2, 0x36, 0xa6, 0x12, 0x65,
+ 0x94, 0x17, 0xac, 0x66, 0x23, 0x7e, 0xc6, 0x72, 0x79, 0x24, 0xce, 0x8f,
+ 0x55, 0x19, 0x97, 0x44, 0xfc, 0x55, 0xec, 0x85, 0x26, 0x27, 0xdb, 0x38,
+ 0xb1, 0x42, 0x0a, 0xdd, 0x05, 0x99, 0x28, 0xeb, 0x03, 0x6c, 0x9a, 0xe9,
+ 0x17, 0xf6, 0x2c, 0xb0, 0xfe, 0xe7, 0xa4, 0xa7, 0x31, 0xda, 0x4d, 0xb0,
+ 0x29, 0xdb, 0xdd, 0x8d, 0x12, 0x13, 0x9c, 0xb4, 0xcc, 0x83, 0x97, 0xfb,
+ 0x1a, 0xdc, 0x08, 0xd6, 0x30, 0x62, 0xe8, 0xeb, 0x8b, 0x61, 0xcb, 0x1d,
+ 0x06, 0xe3, 0xa5, 0x4d, 0x35, 0xdb, 0x59, 0xa8, 0x2d, 0x87, 0x27, 0x44,
+ 0x6f, 0xc0, 0x38, 0x97, 0xe4, 0x85, 0x00, 0x02, 0x09, 0xf6, 0x69, 0x3a,
+ 0xcf, 0x08, 0x1b, 0x21, 0xbb, 0x79, 0xb1, 0xa1, 0x34, 0x09, 0xe0, 0x80,
+ 0xca, 0xb0, 0x78, 0x8a, 0x11, 0x97, 0xd4, 0x07, 0xbe, 0x1b, 0x6a, 0x5d,
+ 0xdb, 0xd6, 0x1f, 0x76, 0x6b, 0x16, 0xf0, 0x58, 0x84, 0x5f, 0x59, 0xce,
+ 0x62, 0x34, 0xc3, 0xdf, 0x94, 0xb8, 0x2f, 0x84, 0x68, 0xf0, 0xb8, 0x51,
+ 0xd9, 0x6d, 0x8e, 0x4a, 0x1d, 0xe6, 0x5c, 0xd8, 0x86, 0x25, 0xe3, 0x24,
+ 0xfd, 0x21, 0x61, 0x13, 0x48, 0x3e, 0xf6, 0x7d, 0xa6, 0x71, 0x9b, 0xd2,
+ 0x6e, 0xe6, 0xd2, 0x08, 0x94, 0x62, 0x6c, 0x98, 0xfe, 0x2f, 0x9c, 0x88,
+ 0x7e, 0x78, 0x15, 0x02, 0x00, 0xf0, 0xba, 0x24, 0x91, 0xf2, 0xdc, 0x47,
+ 0x51, 0x4d, 0x15, 0x5e, 0x91, 0x5f, 0x57, 0x5b, 0x1d, 0x35, 0x24, 0x45,
+ 0x75, 0x9b, 0x88, 0x75, 0xf1, 0x2f, 0x85, 0xe7, 0x89, 0xd1, 0x01, 0xb4,
+ 0xc8, 0x18, 0xb7, 0x97, 0xef, 0x4b, 0x90, 0xf4, 0xbf, 0x10, 0x27, 0x3c,
+ 0x60, 0xff, 0xc4, 0x94, 0x20, 0x2f, 0x93, 0x4b, 0x4d, 0xe3, 0x80, 0xf7,
+ 0x2c, 0x71, 0xd9, 0xe3, 0x68, 0xb4, 0x77, 0x2b, 0xc7, 0x0d, 0x39, 0x92,
+ 0xef, 0x91, 0x0d, 0xb2, 0x11, 0x50, 0x0e, 0xe8, 0xad, 0x3b, 0xf6, 0xb5,
+ 0xc6, 0x14, 0x4d, 0x33, 0x53, 0xa7, 0x60, 0x15, 0xc7, 0x27, 0x51, 0xdc,
+ 0x54, 0x29, 0xa7, 0x0d, 0x6a, 0x7b, 0x72, 0x13, 0xad, 0x7d, 0x41, 0x19,
+ 0x4e, 0x42, 0x49, 0xcc, 0x42, 0xe4, 0xbd, 0x99, 0x13, 0xd9, 0x7f, 0xf3,
+ 0x38, 0xa4, 0xb6, 0x33, 0xed, 0x07, 0x48, 0x7e, 0x8e, 0x82, 0xfe, 0x3a,
+ 0x9d, 0x75, 0x93, 0xba, 0x25, 0x4e, 0x37, 0x3c, 0x0c, 0xd5, 0x69, 0xa9,
+ 0x2d, 0x9e, 0xfd, 0xe8, 0xbb, 0xf5, 0x0c, 0xe2, 0x86, 0xb9, 0x5e, 0x6f,
+ 0x28, 0xe4, 0x19, 0xb3, 0x0b, 0xa4, 0x86, 0xd7, 0x24, 0xd0, 0xb8, 0x89,
+ 0x7b, 0x76, 0xec, 0x05, 0x10, 0x5b, 0x68, 0xe9, 0x58, 0x66, 0xa3, 0xc5,
+ 0xb6, 0x63, 0x20, 0x0e, 0x0e, 0xea, 0x3d, 0x61, 0x5e, 0xda, 0x3d, 0x3c,
+ 0xf9, 0xfd, 0xed, 0xa9, 0xdb, 0x52, 0x94, 0x8a, 0x00, 0xca, 0x3c, 0x8d,
+ 0x66, 0x8f, 0xb0, 0xf0, 0x5a, 0xca, 0x3f, 0x63, 0x71, 0xbf, 0xca, 0x99,
+ 0x37, 0x9b, 0x75, 0x97, 0x89, 0x10, 0x6e, 0xcf, 0xf2, 0xf5, 0xe3, 0xd5,
+ 0x45, 0x9b, 0xad, 0x10, 0x71, 0x6c, 0x5f, 0x6f, 0x7f, 0x22, 0x77, 0x18,
+ 0x2f, 0xf9, 0x99, 0xc5, 0x69, 0x58, 0x03, 0x12, 0x86, 0x82, 0x3e, 0xbf,
+ 0xc2, 0x12, 0x35, 0x43, 0xa3, 0xd9, 0x18, 0x4f, 0x41, 0x11, 0x6b, 0xf3,
+ 0x67, 0xaf, 0x3d, 0x78, 0xe4, 0x22, 0x2d, 0xb3, 0x48, 0x43, 0x31, 0x1d,
+ 0xef, 0xa8, 0xba, 0x49, 0x8e, 0xa9, 0xa7, 0xb6, 0x18, 0x77, 0x84, 0xca,
+ 0xbd, 0xa2, 0x02, 0x1b, 0x6a, 0xf8, 0x5f, 0xda, 0xff, 0xcf, 0x01, 0x6a,
+ 0x86, 0x69, 0xa9, 0xe9, 0xcb, 0x60, 0x1e, 0x15, 0xdc, 0x8f, 0x5d, 0x39,
+ 0xb5, 0xce, 0x55, 0x5f, 0x47, 0x97, 0xb1, 0x19, 0x6e, 0x21, 0xd6, 0x13,
+ 0x39, 0xb2, 0x24, 0xe0, 0x62, 0x82, 0x9f, 0xed, 0x12, 0x81, 0xed, 0xee,
+ 0xab, 0xd0, 0x2f, 0x19, 0x89, 0x3f, 0x57, 0x2e, 0xc2, 0xe2, 0x67, 0xe8,
+ 0xae, 0x03, 0x56, 0xba, 0xd4, 0xd0, 0xa4, 0x89, 0x03, 0x06, 0x5b, 0xcc,
+ 0xf2, 0x22, 0xb8, 0x0e, 0x76, 0x79, 0x4a, 0x42, 0x1d, 0x37, 0x51, 0x5a,
+ 0xaa, 0x46, 0x6c, 0x2a, 0xdd, 0x66, 0xfe, 0xc6, 0x68, 0xc3, 0x38, 0xa2,
+ 0xae, 0x5b, 0x98, 0x24, 0x5d, 0x43, 0x05, 0x82, 0x38, 0x12, 0xd3, 0xd1,
+ 0x75, 0x2d, 0x4f, 0x61, 0xbd, 0xb9, 0x10, 0x87, 0x44, 0x2a, 0x78, 0x07,
+ 0xff, 0xf4, 0x0f, 0xa1, 0xf3, 0x68, 0x9f, 0xbe, 0xae, 0xa2, 0x91, 0xf0,
+ 0xc7, 0x55, 0x7a, 0x52, 0xd5, 0xa3, 0x8d, 0x6f, 0xe4, 0x90, 0x5c, 0xf3,
+ 0x5f, 0xce, 0x3d, 0x23, 0xf9, 0x8e, 0xae, 0x14, 0xfb, 0x82, 0x9a, 0xa3,
+ 0x04, 0x5f, 0xbf, 0xad, 0x3e, 0xf2, 0x97, 0x0a, 0x60, 0x40, 0x70, 0x19,
+ 0x72, 0xad, 0x66, 0xfb, 0x78, 0x1b, 0x84, 0x6c, 0x98, 0xbc, 0x8c, 0xf8,
+ 0x4f, 0xcb, 0xb5, 0xf6, 0xaf, 0x7a, 0xb7, 0x93, 0xef, 0x67, 0x48, 0x02,
+ 0x2c, 0xcb, 0xe6, 0x77, 0x0f, 0x7b, 0xc1, 0xee, 0xc5, 0xb6, 0x2d, 0x7e,
+ 0x62, 0xa0, 0xc0, 0xa7, 0xa5, 0x80, 0x31, 0x92, 0x50, 0xa1, 0x28, 0x22,
+ 0x95, 0x03, 0x17, 0xd1, 0x0f, 0xf6, 0x08, 0xe5, 0xec
+ };
+#define CHACHA_BIG_TEST_SIZE 1305
+#ifndef WOLFSSL_SMALL_STACK
+ byte cipher_big[CHACHA_BIG_TEST_SIZE] = {0};
+ byte plain_big[CHACHA_BIG_TEST_SIZE] = {0};
+ byte input_big[CHACHA_BIG_TEST_SIZE] = {0};
+#else
+ byte* cipher_big;
+ byte* plain_big;
+ byte* input_big;
+#endif /* WOLFSSL_SMALL_STACK */
+ int block_size;
+#endif /* BENCH_EMBEDDED */
byte a[] = {0x76,0xb8,0xe0,0xad,0xa0,0xf1,0x3d,0x90};
byte b[] = {0x45,0x40,0xf0,0x5a,0x9f,0x1f,0xb2,0x96};
@@ -1966,6 +4656,29 @@ int chacha_test(void)
test_chacha[2] = c;
test_chacha[3] = d;
+#ifndef BENCH_EMBEDDED
+#ifdef WOLFSSL_SMALL_STACK
+ cipher_big = (byte*)XMALLOC(CHACHA_BIG_TEST_SIZE, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (cipher_big == NULL) {
+ return MEMORY_E;
+ }
+ plain_big = (byte*)XMALLOC(CHACHA_BIG_TEST_SIZE, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (plain_big == NULL) {
+ return MEMORY_E;
+ }
+ input_big = (byte*)XMALLOC(CHACHA_BIG_TEST_SIZE, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (input_big == NULL) {
+ return MEMORY_E;
+ }
+ XMEMSET(cipher_big, 0, CHACHA_BIG_TEST_SIZE);
+ XMEMSET(plain_big, 0, CHACHA_BIG_TEST_SIZE);
+ XMEMSET(input_big, 0, CHACHA_BIG_TEST_SIZE);
+#endif /* WOLFSSL_SMALL_STACK */
+#endif /* BENCH_EMBEDDED */
+
for (i = 0; i < times; ++i) {
if (i < 3) {
keySz = 32;
@@ -1978,23 +4691,115 @@ int chacha_test(void)
XMEMSET(cipher, 0, 32);
XMEMCPY(cipher + 4, ivs[i], 8);
- wc_Chacha_SetKey(&enc, keys[i], keySz);
- wc_Chacha_SetKey(&dec, keys[i], keySz);
+ ret |= wc_Chacha_SetKey(&enc, keys[i], keySz);
+ ret |= wc_Chacha_SetKey(&dec, keys[i], keySz);
+ if (ret != 0)
+ return ret;
- wc_Chacha_SetIV(&enc, cipher, 0);
- wc_Chacha_SetIV(&dec, cipher, 0);
+ ret |= wc_Chacha_SetIV(&enc, cipher, 0);
+ ret |= wc_Chacha_SetIV(&dec, cipher, 0);
+ if (ret != 0)
+ return ret;
XMEMCPY(plain, input, 8);
- wc_Chacha_Process(&enc, cipher, plain, (word32)8);
- wc_Chacha_Process(&dec, plain, cipher, (word32)8);
+ ret |= wc_Chacha_Process(&enc, cipher, plain, (word32)8);
+ ret |= wc_Chacha_Process(&dec, plain, cipher, (word32)8);
+ if (ret != 0)
+ return ret;
+
+ if (XMEMCMP(test_chacha[i], cipher, 8))
+ return -4500 - i;
+
+ if (XMEMCMP(plain, input, 8))
+ return -4510 - i;
+ }
+
+ /* test of starting at a different counter
+ encrypts all of the information and decrypts starting at 2nd chunk */
+ XMEMSET(plain, 0, sizeof(plain));
+ XMEMSET(sliver, 1, sizeof(sliver)); /* set as 1's to not match plain */
+ XMEMSET(cipher, 0, sizeof(cipher));
+ XMEMCPY(cipher + 4, ivs[0], 8);
+
+ ret |= wc_Chacha_SetKey(&enc, keys[0], keySz);
+ ret |= wc_Chacha_SetKey(&dec, keys[0], keySz);
+ if (ret != 0)
+ return ret;
+
+ ret |= wc_Chacha_SetIV(&enc, cipher, 0);
+ ret |= wc_Chacha_SetIV(&dec, cipher, 1);
+ if (ret != 0)
+ return ret;
+
+ ret |= wc_Chacha_Process(&enc, cipher, plain, sizeof(plain));
+ ret |= wc_Chacha_Process(&dec, sliver, cipher + 64, sizeof(sliver));
+ if (ret != 0)
+ return ret;
+
+ if (XMEMCMP(plain + 64, sliver, 64))
+ return -4520;
+
+#ifndef BENCH_EMBEDDED
+ /* test of encrypting more data */
+ keySz = 32;
+
+ ret |= wc_Chacha_SetKey(&enc, keys[0], keySz);
+ ret |= wc_Chacha_SetKey(&dec, keys[0], keySz);
+ if (ret != 0)
+ return ret;
+
+ ret |= wc_Chacha_SetIV(&enc, ivs[2], 0);
+ ret |= wc_Chacha_SetIV(&dec, ivs[2], 0);
+ if (ret != 0)
+ return ret;
+
+ ret |= wc_Chacha_Process(&enc, cipher_big, plain_big, CHACHA_BIG_TEST_SIZE);
+ ret |= wc_Chacha_Process(&dec, plain_big, cipher_big,
+ CHACHA_BIG_TEST_SIZE);
+ if (ret != 0)
+ return ret;
- if (memcmp(test_chacha[i], cipher, 8))
- return -130 - 5 - i;
+ if (XMEMCMP(plain_big, input_big, CHACHA_BIG_TEST_SIZE))
+ return -4521;
- if (memcmp(plain, input, 8))
- return -130 - i;
+ if (XMEMCMP(cipher_big, cipher_big_result, CHACHA_BIG_TEST_SIZE))
+ return -4522;
+
+ for (i = 0; i < 18; ++i) {
+ /* this will test all paths */
+ // block sizes: 1 2 3 4 7 8 15 16 31 32 63 64 127 128 255 256 511 512
+ block_size = (2 << (i%9)) - (i<9?1:0);
+ keySz = 32;
+
+ ret |= wc_Chacha_SetKey(&enc, keys[0], keySz);
+ ret |= wc_Chacha_SetKey(&dec, keys[0], keySz);
+ if (ret != 0)
+ return ret;
+
+ ret |= wc_Chacha_SetIV(&enc, ivs[2], 0);
+ ret |= wc_Chacha_SetIV(&dec, ivs[2], 0);
+ if (ret != 0)
+ return ret;
+
+ ret |= wc_Chacha_Process(&enc, cipher_big, plain_big, block_size);
+ ret |= wc_Chacha_Process(&dec, plain_big, cipher_big, block_size);
+ if (ret != 0)
+ return ret;
+
+ if (XMEMCMP(plain_big, input_big, block_size))
+ return -4523-i;
+
+ if (XMEMCMP(cipher_big, cipher_big_result, block_size))
+ return -4524-i;
}
+#ifdef WOLFSSL_SMALL_STACK
+ XFREE(cipher_big, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(plain_big, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(input_big, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#endif /* WOLFSSL_SMALL_STACK */
+#endif /* BENCH_EMBEDDED */
+
return 0;
}
#endif /* HAVE_CHACHA */
@@ -2008,7 +4813,7 @@ int poly1305_test(void)
byte tag[16];
Poly1305 enc;
- const byte msg[] =
+ static const byte msg1[] =
{
0x43,0x72,0x79,0x70,0x74,0x6f,0x67,0x72,
0x61,0x70,0x68,0x69,0x63,0x20,0x46,0x6f,
@@ -2017,13 +4822,13 @@ int poly1305_test(void)
0x75,0x70
};
- const byte msg2[] =
+ static const byte msg2[] =
{
0x48,0x65,0x6c,0x6c,0x6f,0x20,0x77,0x6f,0x72,
0x6c,0x64,0x21
};
- const byte msg3[] =
+ static const byte msg3[] =
{
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
@@ -2031,61 +4836,181 @@ int poly1305_test(void)
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
};
- const byte correct[] =
+ static const byte msg4[] =
+ {
+ 0xd3,0x1a,0x8d,0x34,0x64,0x8e,0x60,0xdb,
+ 0x7b,0x86,0xaf,0xbc,0x53,0xef,0x7e,0xc2,
+ 0xa4,0xad,0xed,0x51,0x29,0x6e,0x08,0xfe,
+ 0xa9,0xe2,0xb5,0xa7,0x36,0xee,0x62,0xd6,
+ 0x3d,0xbe,0xa4,0x5e,0x8c,0xa9,0x67,0x12,
+ 0x82,0xfa,0xfb,0x69,0xda,0x92,0x72,0x8b,
+ 0x1a,0x71,0xde,0x0a,0x9e,0x06,0x0b,0x29,
+ 0x05,0xd6,0xa5,0xb6,0x7e,0xcd,0x3b,0x36,
+ 0x92,0xdd,0xbd,0x7f,0x2d,0x77,0x8b,0x8c,
+ 0x98,0x03,0xae,0xe3,0x28,0x09,0x1b,0x58,
+ 0xfa,0xb3,0x24,0xe4,0xfa,0xd6,0x75,0x94,
+ 0x55,0x85,0x80,0x8b,0x48,0x31,0xd7,0xbc,
+ 0x3f,0xf4,0xde,0xf0,0x8e,0x4b,0x7a,0x9d,
+ 0xe5,0x76,0xd2,0x65,0x86,0xce,0xc6,0x4b,
+ 0x61,0x16
+ };
+
+ static const byte msg5[] =
+ {
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ };
+
+ static const byte msg6[] =
+ {
+ 0xd3,0x1a,0x8d,0x34,0x64,0x8e,0x60,0xdb,
+ 0x7b,0x86,0xaf,0xbc,0x53,0xef,0x7e,0xc2,
+ 0xa4,0xad,0xed,0x51,0x29,0x6e,0x08,0xfe,
+ 0xa9,0xe2,0xb5,0xa7,0x36,0xee,0x62,0xd6,
+ 0x3d,0xbe,0xa4,0x5e,0x8c,0xa9,0x67,0x12,
+ 0x82,0xfa,0xfb,0x69,0xda,0x92,0x72,0x8b,
+ 0xfa,0xb3,0x24,0xe4,0xfa,0xd6,0x75,0x94,
+ 0x1a,0x71,0xde,0x0a,0x9e,0x06,0x0b,0x29,
+ 0xa9,0xe2,0xb5,0xa7,0x36,0xee,0x62,0xd6,
+ 0x3d,0xbe,0xa4,0x5e,0x8c,0xa9,0x67,0x12,
+ 0xfa,0xb3,0x24,0xe4,0xfa,0xd6,0x75,0x94,
+ 0x05,0xd6,0xa5,0xb6,0x7e,0xcd,0x3b,0x36,
+ 0x92,0xdd,0xbd,0x7f,0x2d,0x77,0x8b,0x8c,
+ 0x7b,0x86,0xaf,0xbc,0x53,0xef,0x7e,0xc2,
+ 0x98,0x03,0xae,0xe3,0x28,0x09,0x1b,0x58,
+ 0xfa,0xb3,0x24,0xe4,0xfa,0xd6,0x75,0x94,
+ 0x55,0x85,0x80,0x8b,0x48,0x31,0xd7,0xbc,
+ 0x3f,0xf4,0xde,0xf0,0x8e,0x4b,0x7a,0x9d,
+ 0xe5,0x76,0xd2,0x65,0x86,0xce,0xc6,0x4b,
+ 0x61,0x16
+ };
+
+ byte additional[] =
+ {
+ 0x50,0x51,0x52,0x53,0xc0,0xc1,0xc2,0xc3,
+ 0xc4,0xc5,0xc6,0xc7
+ };
+
+ static const byte correct0[] =
+ {
+ 0x01,0x03,0x80,0x8a,0xfb,0x0d,0xb2,0xfd,
+ 0x4a,0xbf,0xf6,0xaf,0x41,0x49,0xf5,0x1b
+ };
+
+ static const byte correct1[] =
{
0xa8,0x06,0x1d,0xc1,0x30,0x51,0x36,0xc6,
0xc2,0x2b,0x8b,0xaf,0x0c,0x01,0x27,0xa9
-
};
- const byte correct2[] =
+ static const byte correct2[] =
{
0xa6,0xf7,0x45,0x00,0x8f,0x81,0xc9,0x16,
0xa2,0x0d,0xcc,0x74,0xee,0xf2,0xb2,0xf0
};
- const byte correct3[] =
+ static const byte correct3[] =
{
0x49,0xec,0x78,0x09,0x0e,0x48,0x1e,0xc6,
0xc2,0x6b,0x33,0xb9,0x1c,0xcc,0x03,0x07
};
- const byte key[] = {
+ static const byte correct4[] =
+ {
+ 0x1a,0xe1,0x0b,0x59,0x4f,0x09,0xe2,0x6a,
+ 0x7e,0x90,0x2e,0xcb,0xd0,0x60,0x06,0x91
+ };
+
+ static const byte correct5[] =
+ {
+ 0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ };
+
+ static const byte correct6[] =
+ {
+ 0xea,0x11,0x5c,0x4f,0xd0,0xc0,0x10,0xae,
+ 0xf7,0xdf,0xda,0x77,0xa2,0xe9,0xaf,0xca
+ };
+
+ static const byte key[] = {
0x85,0xd6,0xbe,0x78,0x57,0x55,0x6d,0x33,
0x7f,0x44,0x52,0xfe,0x42,0xd5,0x06,0xa8,
0x01,0x03,0x80,0x8a,0xfb,0x0d,0xb2,0xfd,
0x4a,0xbf,0xf6,0xaf,0x41,0x49,0xf5,0x1b
};
- const byte key2[] = {
+ static const byte key2[] = {
0x74,0x68,0x69,0x73,0x20,0x69,0x73,0x20,
0x33,0x32,0x2d,0x62,0x79,0x74,0x65,0x20,
0x6b,0x65,0x79,0x20,0x66,0x6f,0x72,0x20,
0x50,0x6f,0x6c,0x79,0x31,0x33,0x30,0x35
};
- const byte* msgs[] = {msg, msg2, msg3};
- word32 szm[] = {sizeof(msg),sizeof(msg2),sizeof(msg3)};
- const byte* keys[] = {key, key2, key2};
- const byte* tests[] = {correct, correct2, correct3};
+ static const byte key4[] = {
+ 0x7b,0xac,0x2b,0x25,0x2d,0xb4,0x47,0xaf,
+ 0x09,0xb6,0x7a,0x55,0xa4,0xe9,0x55,0x84,
+ 0x0a,0xe1,0xd6,0x73,0x10,0x75,0xd9,0xeb,
+ 0x2a,0x93,0x75,0x78,0x3e,0xd5,0x53,0xff
+ };
+
+ static const byte key5[] = {
+ 0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+ };
+
+ const byte* msgs[] = {NULL, msg1, msg2, msg3, msg5, msg6};
+ word32 szm[] = {0, sizeof(msg1), sizeof(msg2),
+ sizeof(msg3), sizeof(msg5), sizeof(msg6)};
+ const byte* keys[] = {key, key, key2, key2, key5, key};
+ const byte* tests[] = {correct0, correct1, correct2, correct3, correct5,
+ correct6};
- for (i = 0; i < 3; i++) {
+ for (i = 0; i < 6; i++) {
ret = wc_Poly1305SetKey(&enc, keys[i], 32);
if (ret != 0)
- return -1001;
+ return -4600 - i;
ret = wc_Poly1305Update(&enc, msgs[i], szm[i]);
if (ret != 0)
- return -1005;
+ return -4610 - i;
ret = wc_Poly1305Final(&enc, tag);
if (ret != 0)
- return -60;
+ return -4620 - i;
- if (memcmp(tag, tests[i], sizeof(tag)))
- return -61;
+ if (XMEMCMP(tag, tests[i], sizeof(tag)))
+ return -4630 - i;
}
+ /* Check TLS MAC function from 2.8.2 https://tools.ietf.org/html/rfc7539 */
+ XMEMSET(tag, 0, sizeof(tag));
+ ret = wc_Poly1305SetKey(&enc, key4, sizeof(key4));
+ if (ret != 0)
+ return -4640;
+
+ ret = wc_Poly1305_MAC(&enc, additional, sizeof(additional),
+ (byte*)msg4, sizeof(msg4), tag, sizeof(tag));
+ if (ret != 0)
+ return -4641;
+
+ if (XMEMCMP(tag, correct4, sizeof(tag)))
+ return -4642;
+
+ /* Check fail of TLS MAC function if altering additional data */
+ XMEMSET(tag, 0, sizeof(tag));
+ additional[0]++;
+ ret = wc_Poly1305_MAC(&enc, additional, sizeof(additional),
+ (byte*)msg4, sizeof(msg4), tag, sizeof(tag));
+ if (ret != 0)
+ return -4643;
+
+ if (XMEMCMP(tag, correct4, sizeof(tag)) == 0)
+ return -4644;
+
+
return 0;
}
#endif /* HAVE_POLY1305 */
@@ -2254,95 +5179,350 @@ int chacha20_poly1305_aead_test(void)
0x39, 0x23, 0x36, 0xfe, 0xa1, 0x85, 0x1f, 0x38
};
- byte generatedCiphertext[272];
- byte generatedPlaintext[272];
+ byte generatedCiphertext[265]; /* max plaintext2/cipher2 */
+ byte generatedPlaintext[265]; /* max plaintext2/cipher2 */
byte generatedAuthTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE];
int err;
+ ChaChaPoly_Aead aead;
+
+#if !defined(USE_INTEL_CHACHA_SPEEDUP) && !defined(WOLFSSL_ARMASM)
+ #define TEST_SMALL_CHACHA_CHUNKS 32
+#else
+ #define TEST_SMALL_CHACHA_CHUNKS 64
+#endif
+ #ifdef TEST_SMALL_CHACHA_CHUNKS
+ word32 testLen;
+ #endif
+
XMEMSET(generatedCiphertext, 0, sizeof(generatedCiphertext));
XMEMSET(generatedAuthTag, 0, sizeof(generatedAuthTag));
XMEMSET(generatedPlaintext, 0, sizeof(generatedPlaintext));
- /* Test #1 */
+ /* Parameter Validation testing */
+ /* Encrypt */
+ err = wc_ChaCha20Poly1305_Encrypt(NULL, iv1, aad1, sizeof(aad1), plaintext1,
+ sizeof(plaintext1), generatedCiphertext, generatedAuthTag);
+ if (err != BAD_FUNC_ARG)
+ return -4700;
+ err = wc_ChaCha20Poly1305_Encrypt(key1, NULL, aad1, sizeof(aad1),
+ plaintext1, sizeof(plaintext1), generatedCiphertext,
+ generatedAuthTag);
+ if (err != BAD_FUNC_ARG)
+ return -4701;
+ err = wc_ChaCha20Poly1305_Encrypt(key1, iv1, aad1, sizeof(aad1), NULL,
+ sizeof(plaintext1), generatedCiphertext, generatedAuthTag);
+ if (err != BAD_FUNC_ARG)
+ return -4702;
+ err = wc_ChaCha20Poly1305_Encrypt(key1, iv1, aad1, sizeof(aad1), plaintext1,
+ sizeof(plaintext1), NULL, generatedAuthTag);
+ if (err != BAD_FUNC_ARG)
+ return -4703;
+ err = wc_ChaCha20Poly1305_Encrypt(key1, iv1, aad1, sizeof(aad1), plaintext1,
+ sizeof(plaintext1), generatedCiphertext, NULL);
+ if (err != BAD_FUNC_ARG)
+ return -4704;
+ err = wc_ChaCha20Poly1305_Encrypt(key1, iv1, aad1, sizeof(aad1), plaintext1,
+ 0, generatedCiphertext, generatedAuthTag);
+ if (err != BAD_FUNC_ARG)
+ return -4705;
+ /* Decrypt */
+ err = wc_ChaCha20Poly1305_Decrypt(NULL, iv2, aad2, sizeof(aad2), cipher2,
+ sizeof(cipher2), authTag2, generatedPlaintext);
+ if (err != BAD_FUNC_ARG)
+ return -4706;
+ err = wc_ChaCha20Poly1305_Decrypt(key2, NULL, aad2, sizeof(aad2), cipher2,
+ sizeof(cipher2), authTag2, generatedPlaintext);
+ if (err != BAD_FUNC_ARG)
+ return -4707;
+ err = wc_ChaCha20Poly1305_Decrypt(key2, iv2, aad2, sizeof(aad2), NULL,
+ sizeof(cipher2), authTag2, generatedPlaintext);
+ if (err != BAD_FUNC_ARG)
+ return -4708;
+ err = wc_ChaCha20Poly1305_Decrypt(key2, iv2, aad2, sizeof(aad2), cipher2,
+ sizeof(cipher2), NULL, generatedPlaintext);
+ if (err != BAD_FUNC_ARG)
+ return -4709;
+ err = wc_ChaCha20Poly1305_Decrypt(key2, iv2, aad2, sizeof(aad2), cipher2,
+ sizeof(cipher2), authTag2, NULL);
+ if (err != BAD_FUNC_ARG)
+ return -4710;
+ err = wc_ChaCha20Poly1305_Decrypt(key2, iv2, aad2, sizeof(aad2), cipher2,
+ 0, authTag2, generatedPlaintext);
+ if (err != BAD_FUNC_ARG)
+ return -4711;
+
+ /* Test #1 */
err = wc_ChaCha20Poly1305_Encrypt(key1, iv1,
aad1, sizeof(aad1),
plaintext1, sizeof(plaintext1),
generatedCiphertext, generatedAuthTag);
- if (err)
- {
+ if (err) {
return err;
}
/* -- Check the ciphertext and authtag */
-
- if (XMEMCMP(generatedCiphertext, cipher1, sizeof(cipher1)))
- {
- return -1064;
+ if (XMEMCMP(generatedCiphertext, cipher1, sizeof(cipher1))) {
+ return -4712;
}
-
- if (XMEMCMP(generatedAuthTag, authTag1, sizeof(authTag1)))
- {
- return -1065;
+ if (XMEMCMP(generatedAuthTag, authTag1, sizeof(authTag1))) {
+ return -4713;
}
/* -- Verify decryption works */
-
err = wc_ChaCha20Poly1305_Decrypt(key1, iv1,
aad1, sizeof(aad1),
cipher1, sizeof(cipher1),
authTag1, generatedPlaintext);
- if (err)
- {
+ if (err) {
return err;
}
-
- if (XMEMCMP(generatedPlaintext, plaintext1, sizeof( plaintext1)))
- {
- return -1066;
+ if (XMEMCMP(generatedPlaintext, plaintext1, sizeof(plaintext1))) {
+ return -4714;
}
+
XMEMSET(generatedCiphertext, 0, sizeof(generatedCiphertext));
XMEMSET(generatedAuthTag, 0, sizeof(generatedAuthTag));
XMEMSET(generatedPlaintext, 0, sizeof(generatedPlaintext));
/* Test #2 */
-
err = wc_ChaCha20Poly1305_Encrypt(key2, iv2,
aad2, sizeof(aad2),
plaintext2, sizeof(plaintext2),
generatedCiphertext, generatedAuthTag);
- if (err)
- {
+ if (err) {
return err;
}
/* -- Check the ciphertext and authtag */
-
- if (XMEMCMP(generatedCiphertext, cipher2, sizeof(cipher2)))
- {
- return -1067;
+ if (XMEMCMP(generatedCiphertext, cipher2, sizeof(cipher2))) {
+ return -4715;
}
-
- if (XMEMCMP(generatedAuthTag, authTag2, sizeof(authTag2)))
- {
- return -1068;
+ if (XMEMCMP(generatedAuthTag, authTag2, sizeof(authTag2))) {
+ return -4716;
}
/* -- Verify decryption works */
-
err = wc_ChaCha20Poly1305_Decrypt(key2, iv2,
aad2, sizeof(aad2),
cipher2, sizeof(cipher2),
authTag2, generatedPlaintext);
- if (err)
- {
+ if (err) {
return err;
}
- if (XMEMCMP(generatedPlaintext, plaintext2, sizeof(plaintext2)))
- {
- return -1069;
+ if (XMEMCMP(generatedPlaintext, plaintext2, sizeof(plaintext2))) {
+ return -4717;
+ }
+
+
+ /* AEAD init/update/final */
+ err = wc_ChaCha20Poly1305_Init(NULL, key1, iv1,
+ CHACHA20_POLY1305_AEAD_DECRYPT);
+ if (err != BAD_FUNC_ARG)
+ return -4718;
+ err = wc_ChaCha20Poly1305_Init(&aead, NULL, iv1,
+ CHACHA20_POLY1305_AEAD_DECRYPT);
+ if (err != BAD_FUNC_ARG)
+ return -4719;
+ err = wc_ChaCha20Poly1305_Init(&aead, key1, NULL,
+ CHACHA20_POLY1305_AEAD_DECRYPT);
+ if (err != BAD_FUNC_ARG)
+ return -4720;
+ err = wc_ChaCha20Poly1305_UpdateAad(NULL, aad1, sizeof(aad1));
+ if (err != BAD_FUNC_ARG)
+ return -4721;
+ err = wc_ChaCha20Poly1305_UpdateAad(&aead, NULL, sizeof(aad1));
+ if (err != BAD_FUNC_ARG)
+ return -4722;
+ err = wc_ChaCha20Poly1305_UpdateData(NULL, generatedPlaintext,
+ generatedPlaintext, sizeof(plaintext1));
+ if (err != BAD_FUNC_ARG)
+ return -4723;
+ err = wc_ChaCha20Poly1305_UpdateData(&aead, generatedPlaintext, NULL,
+ sizeof(plaintext1));
+ if (err != BAD_FUNC_ARG)
+ return -4724;
+ err = wc_ChaCha20Poly1305_UpdateData(&aead, NULL, generatedPlaintext,
+ sizeof(plaintext1));
+ if (err != BAD_FUNC_ARG)
+ return -4725;
+ err = wc_ChaCha20Poly1305_Final(NULL, generatedAuthTag);
+ if (err != BAD_FUNC_ARG)
+ return -4726;
+ err = wc_ChaCha20Poly1305_Final(&aead, NULL);
+ if (err != BAD_FUNC_ARG)
+ return -4727;
+
+ /* AEAD init/update/final - state tests */
+ aead.state = CHACHA20_POLY1305_STATE_INIT;
+ err = wc_ChaCha20Poly1305_UpdateAad(&aead, aad1, sizeof(aad1));
+ if (err != BAD_STATE_E)
+ return -4728;
+ aead.state = CHACHA20_POLY1305_STATE_DATA;
+ err = wc_ChaCha20Poly1305_UpdateAad(&aead, aad1, sizeof(aad1));
+ if (err != BAD_STATE_E)
+ return -4729;
+ aead.state = CHACHA20_POLY1305_STATE_INIT;
+ err = wc_ChaCha20Poly1305_UpdateData(&aead, generatedPlaintext,
+ generatedPlaintext, sizeof(plaintext1));
+ if (err != BAD_STATE_E)
+ return -4730;
+ aead.state = CHACHA20_POLY1305_STATE_INIT;
+ err = wc_ChaCha20Poly1305_Final(&aead, generatedAuthTag);
+ if (err != BAD_STATE_E)
+ return -4731;
+ aead.state = CHACHA20_POLY1305_STATE_READY;
+ err = wc_ChaCha20Poly1305_Final(&aead, generatedAuthTag);
+ if (err != BAD_STATE_E)
+ return -4732;
+
+ XMEMSET(generatedCiphertext, 0, sizeof(generatedCiphertext));
+ XMEMSET(generatedAuthTag, 0, sizeof(generatedAuthTag));
+ XMEMSET(generatedPlaintext, 0, sizeof(generatedPlaintext));
+
+ /* Test 1 - Encrypt */
+ err = wc_ChaCha20Poly1305_Init(&aead, key1, iv1,
+ CHACHA20_POLY1305_AEAD_ENCRYPT);
+ if (err != 0)
+ return -4733;
+ err = wc_ChaCha20Poly1305_UpdateAad(&aead, aad1, sizeof(aad1));
+ if (err != 0)
+ return -4734;
+#ifdef TEST_SMALL_CHACHA_CHUNKS
+ /* test doing data in smaller chunks */
+ for (testLen=0; testLen<sizeof(plaintext1); ) {
+ word32 dataLen = sizeof(plaintext1) - testLen;
+ if (dataLen > TEST_SMALL_CHACHA_CHUNKS)
+ dataLen = TEST_SMALL_CHACHA_CHUNKS;
+ err = wc_ChaCha20Poly1305_UpdateData(&aead, &plaintext1[testLen],
+ &generatedCiphertext[testLen], dataLen);
+ if (err != 0)
+ return -4735;
+ testLen += dataLen;
+ }
+#else
+ err = wc_ChaCha20Poly1305_UpdateData(&aead, plaintext1,
+ generatedCiphertext, sizeof(plaintext1));
+#endif
+ err = wc_ChaCha20Poly1305_Final(&aead, generatedAuthTag);
+ if (err != 0)
+ return -4736;
+ err = wc_ChaCha20Poly1305_CheckTag(generatedAuthTag, authTag1);
+ if (err != 0)
+ return -4737;
+ if (XMEMCMP(generatedCiphertext, cipher1, sizeof(cipher1))) {
+ return -4738;
+ }
+
+ /* Test 1 - Decrypt */
+ err = wc_ChaCha20Poly1305_Init(&aead, key1, iv1,
+ CHACHA20_POLY1305_AEAD_DECRYPT);
+ if (err != 0)
+ return -4739;
+ err = wc_ChaCha20Poly1305_UpdateAad(&aead, aad1, sizeof(aad1));
+ if (err != 0)
+ return -4740;
+#ifdef TEST_SMALL_CHACHA_CHUNKS
+ /* test doing data in smaller chunks */
+ for (testLen=0; testLen<sizeof(plaintext1); ) {
+ word32 dataLen = sizeof(plaintext1) - testLen;
+ if (dataLen > TEST_SMALL_CHACHA_CHUNKS)
+ dataLen = TEST_SMALL_CHACHA_CHUNKS;
+ err = wc_ChaCha20Poly1305_UpdateData(&aead,
+ &generatedCiphertext[testLen], &generatedPlaintext[testLen],
+ dataLen);
+ if (err != 0)
+ return -4741;
+ testLen += dataLen;
+ }
+#else
+ err = wc_ChaCha20Poly1305_UpdateData(&aead, generatedCiphertext,
+ generatedPlaintext, sizeof(cipher1));
+#endif
+ err = wc_ChaCha20Poly1305_Final(&aead, generatedAuthTag);
+ if (err != 0)
+ return -4742;
+ err = wc_ChaCha20Poly1305_CheckTag(generatedAuthTag, authTag1);
+ if (err != 0)
+ return -4743;
+ if (XMEMCMP(generatedPlaintext, plaintext1, sizeof(plaintext1))) {
+ return -4744;
+ }
+
+ XMEMSET(generatedCiphertext, 0, sizeof(generatedCiphertext));
+ XMEMSET(generatedAuthTag, 0, sizeof(generatedAuthTag));
+ XMEMSET(generatedPlaintext, 0, sizeof(generatedPlaintext));
+
+ /* Test 2 - Encrypt */
+ err = wc_ChaCha20Poly1305_Init(&aead, key2, iv2,
+ CHACHA20_POLY1305_AEAD_ENCRYPT);
+ if (err != 0)
+ return -4745;
+ err = wc_ChaCha20Poly1305_UpdateAad(&aead, aad2, sizeof(aad2));
+ if (err != 0)
+ return -4746;
+#ifdef TEST_SMALL_CHACHA_CHUNKS
+ /* test doing data in smaller chunks */
+ for (testLen=0; testLen<sizeof(plaintext2); ) {
+ word32 dataLen = sizeof(plaintext2) - testLen;
+ if (dataLen > TEST_SMALL_CHACHA_CHUNKS)
+ dataLen = TEST_SMALL_CHACHA_CHUNKS;
+ err = wc_ChaCha20Poly1305_UpdateData(&aead, &plaintext2[testLen],
+ &generatedCiphertext[testLen], dataLen);
+ if (err != 0)
+ return -4747;
+ testLen += dataLen;
+ }
+#else
+ err = wc_ChaCha20Poly1305_UpdateData(&aead, plaintext2, generatedCiphertext,
+ sizeof(plaintext2));
+#endif
+ err = wc_ChaCha20Poly1305_Final(&aead, generatedAuthTag);
+ if (err != 0)
+ return -4748;
+ err = wc_ChaCha20Poly1305_CheckTag(generatedAuthTag, authTag2);
+ if (err != 0)
+ return -4749;
+ if (XMEMCMP(generatedCiphertext, cipher2, sizeof(cipher2))) {
+ return -4750;
+ }
+
+ /* Test 2 - Decrypt */
+ err = wc_ChaCha20Poly1305_Init(&aead, key2, iv2,
+ CHACHA20_POLY1305_AEAD_DECRYPT);
+ if (err != 0)
+ return -4751;
+ err = wc_ChaCha20Poly1305_UpdateAad(&aead, aad2, sizeof(aad2));
+ if (err != 0)
+ return -4752;
+#ifdef TEST_SMALL_CHACHA_CHUNKS
+ /* test doing data in smaller chunks */
+ for (testLen=0; testLen<sizeof(plaintext2); ) {
+ word32 dataLen = sizeof(plaintext2) - testLen;
+ if (dataLen > TEST_SMALL_CHACHA_CHUNKS)
+ dataLen = TEST_SMALL_CHACHA_CHUNKS;
+ err = wc_ChaCha20Poly1305_UpdateData(&aead,
+ &generatedCiphertext[testLen], &generatedPlaintext[testLen],
+ dataLen);
+ if (err != 0)
+ return -4753;
+ testLen += dataLen;
+ }
+#else
+ err = wc_ChaCha20Poly1305_UpdateData(&aead, generatedCiphertext,
+ generatedPlaintext, sizeof(cipher2));
+#endif
+ err = wc_ChaCha20Poly1305_Final(&aead, generatedAuthTag);
+ if (err != 0)
+ return -4754;
+ err = wc_ChaCha20Poly1305_CheckTag(generatedAuthTag, authTag2);
+ if (err != 0)
+ return -4755;
+ if (XMEMCMP(generatedPlaintext, plaintext2, sizeof(plaintext2))) {
+ return -4756;
}
return err;
@@ -2386,19 +5566,57 @@ int des_test(void)
ret = wc_Des_SetKey(&enc, key, iv, DES_ENCRYPTION);
if (ret != 0)
- return -31;
+ return -4800;
+
+ ret = wc_Des_CbcEncrypt(&enc, cipher, vector, sizeof(vector));
+ if (ret != 0)
+ return -4801;
- wc_Des_CbcEncrypt(&enc, cipher, vector, sizeof(vector));
ret = wc_Des_SetKey(&dec, key, iv, DES_DECRYPTION);
if (ret != 0)
- return -32;
- wc_Des_CbcDecrypt(&dec, plain, cipher, sizeof(cipher));
+ return -4802;
- if (memcmp(plain, vector, sizeof(plain)))
- return -33;
+ ret = wc_Des_CbcDecrypt(&dec, plain, cipher, sizeof(cipher));
+ if (ret != 0)
+ return -4803;
- if (memcmp(cipher, verify, sizeof(cipher)))
- return -34;
+ if (XMEMCMP(plain, vector, sizeof(plain)))
+ return -4804;
+
+ if (XMEMCMP(cipher, verify, sizeof(cipher)))
+ return -4805;
+
+ ret = wc_Des_CbcEncryptWithKey(cipher, vector, sizeof(vector), key, iv);
+ if (ret != 0)
+ return -4806;
+
+#if defined(WOLFSSL_ENCRYPTED_KEYS) && !defined(NO_SHA)
+ {
+ EncryptedInfo info;
+ XMEMSET(&info, 0, sizeof(EncryptedInfo));
+ XMEMCPY(info.iv, iv, sizeof(iv));
+ info.ivSz = sizeof(iv);
+ info.keySz = sizeof(key);
+ info.cipherType = WC_CIPHER_DES;
+
+ ret = wc_BufferKeyEncrypt(&info, cipher, sizeof(cipher), key,
+ sizeof(key), WC_HASH_TYPE_SHA);
+ if (ret != 0)
+ return -4807;
+
+ /* Test invalid info ptr */
+ ret = wc_BufferKeyEncrypt(NULL, cipher, sizeof(cipher), key,
+ sizeof(key), WC_HASH_TYPE_SHA);
+ if (ret != BAD_FUNC_ARG)
+ return -4808;
+
+ /* Test invalid hash type */
+ ret = wc_BufferKeyEncrypt(&info, cipher, sizeof(cipher), key,
+ sizeof(key), WC_HASH_TYPE_NONE);
+ if (ret == 0)
+ return -4809;
+ }
+#endif
return 0;
}
@@ -2444,46 +5662,1852 @@ int des3_test(void)
int ret;
-#ifdef HAVE_CAVIUM
- if (wc_Des3_InitCavium(&enc, CAVIUM_DEV_ID) != 0)
- return -20005;
- if (wc_Des3_InitCavium(&dec, CAVIUM_DEV_ID) != 0)
- return -20006;
-#endif
+ if (wc_Des3Init(&enc, HEAP_HINT, devId) != 0)
+ return -4900;
+ if (wc_Des3Init(&dec, HEAP_HINT, devId) != 0)
+ return -4901;
+
ret = wc_Des3_SetKey(&enc, key3, iv3, DES_ENCRYPTION);
if (ret != 0)
- return -31;
+ return -4902;
ret = wc_Des3_SetKey(&dec, key3, iv3, DES_DECRYPTION);
if (ret != 0)
- return -32;
+ return -4903;
ret = wc_Des3_CbcEncrypt(&enc, cipher, vector, sizeof(vector));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
if (ret != 0)
- return -33;
+ return -4904;
ret = wc_Des3_CbcDecrypt(&dec, plain, cipher, sizeof(cipher));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
if (ret != 0)
- return -34;
+ return -4905;
+
+ if (XMEMCMP(plain, vector, sizeof(plain)))
+ return -4906;
+
+ if (XMEMCMP(cipher, verify3, sizeof(cipher)))
+ return -4907;
- if (memcmp(plain, vector, sizeof(plain)))
- return -35;
+#if defined(OPENSSL_EXTRA) && !defined(WOLFCRYPT_ONLY)
+ /* test the same vectors with using compatibility layer */
+ {
+ DES_key_schedule ks1;
+ DES_key_schedule ks2;
+ DES_key_schedule ks3;
+ DES_cblock iv4;
+
+ XMEMCPY(ks1, key3, sizeof(DES_key_schedule));
+ XMEMCPY(ks2, key3 + 8, sizeof(DES_key_schedule));
+ XMEMCPY(ks3, key3 + 16, sizeof(DES_key_schedule));
+ XMEMCPY(iv4, iv3, sizeof(DES_cblock));
+
+ XMEMSET(plain, 0, sizeof(plain));
+ XMEMSET(cipher, 0, sizeof(cipher));
+
+ DES_ede3_cbc_encrypt(vector, cipher, sizeof(vector), &ks1, &ks2, &ks3,
+ &iv4, DES_ENCRYPT);
+ DES_ede3_cbc_encrypt(cipher, plain, sizeof(cipher), &ks1, &ks2, &ks3,
+ &iv4, DES_DECRYPT);
- if (memcmp(cipher, verify3, sizeof(cipher)))
- return -36;
+ if (XMEMCMP(plain, vector, sizeof(plain)))
+ return -4908;
+
+ if (XMEMCMP(cipher, verify3, sizeof(cipher)))
+ return -4909;
+ }
+#endif /* OPENSSL_EXTRA */
-#ifdef HAVE_CAVIUM
- wc_Des3_FreeCavium(&enc);
- wc_Des3_FreeCavium(&dec);
+ wc_Des3Free(&enc);
+ wc_Des3Free(&dec);
+
+#if defined(WOLFSSL_ENCRYPTED_KEYS) && !defined(NO_SHA)
+ {
+ EncryptedInfo info;
+ XMEMSET(&info, 0, sizeof(EncryptedInfo));
+ XMEMCPY(info.iv, iv3, sizeof(iv3));
+ info.ivSz = sizeof(iv3);
+ info.keySz = sizeof(key3);
+ info.cipherType = WC_CIPHER_DES3;
+
+ ret = wc_BufferKeyEncrypt(&info, cipher, sizeof(cipher), key3,
+ sizeof(key3), WC_HASH_TYPE_SHA);
+ if (ret != 0)
+ return -4910;
+ }
#endif
+
return 0;
}
#endif /* NO_DES */
#ifndef NO_AES
+
+#if defined(WOLFSSL_AES_OFB) || defined(WOLFSSL_AES_CFB)
+#if defined(OPENSSL_EXTRA) && !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS)
+/* pass in the function, key, iv, plain text and expected and this function
+ * tests that the encryption and decryption is successful */
+static int EVP_test(const WOLFSSL_EVP_CIPHER* type, const byte* key,
+ const byte* iv, const byte* plain, int plainSz,
+ const byte* expected, int expectedSz)
+{
+ EVP_CIPHER_CTX ctx;
+ int idx, ret = 0, cipherSz;
+ byte* cipher;
+
+ cipher = (byte*)XMALLOC(plainSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (cipher == NULL) {
+ return -4911;
+ }
+
+ /* test encrypt */
+ EVP_CIPHER_CTX_init(&ctx);
+ if (EVP_CipherInit(&ctx, type, key, iv, 1) == 0) {
+ ret = -4912;
+ goto EVP_TEST_END;
+ }
+
+ if (EVP_CipherUpdate(&ctx, cipher, &idx, plain, expectedSz) == 0) {
+ ret = -4913;
+ goto EVP_TEST_END;
+ }
+
+ cipherSz = idx;
+ if (EVP_CipherFinal(&ctx, cipher + cipherSz, &idx) == 0) {
+ ret = -4914;
+ goto EVP_TEST_END;
+ }
+ cipherSz += idx;
+
+ if (XMEMCMP(cipher, expected, plainSz)) {
+ ret = -4915;
+ goto EVP_TEST_END;
+ }
+
+ /* test decrypt */
+ EVP_CIPHER_CTX_init(&ctx);
+ if (EVP_CipherInit(&ctx, type, key, iv, 0) == 0) {
+ ret = -4916;
+ goto EVP_TEST_END;
+ }
+
+ if (EVP_CipherUpdate(&ctx, cipher, &idx, cipher, expectedSz) == 0) {
+ ret = -4917;
+ goto EVP_TEST_END;
+ }
+
+ cipherSz = idx;
+ if (EVP_CipherFinal(&ctx, cipher + cipherSz, &idx) == 0) {
+ ret = -4918;
+ goto EVP_TEST_END;
+ }
+ cipherSz += idx;
+
+ if ((expectedSz != cipherSz) || XMEMCMP(plain, cipher, plainSz)) {
+ ret = -4919;
+ goto EVP_TEST_END;
+ }
+
+EVP_TEST_END:
+ XFREE(cipher, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ (void)cipherSz;
+ return ret;
+}
+#endif /* OPENSSL_EXTRA */
+#endif /* WOLFSSL_AES_OFB || WOLFSSL_AES_CFB */
+
+#ifdef WOLFSSL_AES_OFB
+ /* test vector from https://csrc.nist.gov/Projects/cryptographic-algorithm-validation-program/Block-Ciphers */
+ int aesofb_test(void)
+ {
+ #ifdef WOLFSSL_AES_256
+ const byte key1[] =
+ {
+ 0xc4,0xc7,0xfa,0xd6,0x53,0x5c,0xb8,0x71,
+ 0x4a,0x5c,0x40,0x77,0x9a,0x8b,0xa1,0xd2,
+ 0x53,0x3e,0x23,0xb4,0xb2,0x58,0x73,0x2a,
+ 0x5b,0x78,0x01,0xf4,0xe3,0x71,0xa7,0x94
+ };
+ const byte iv1[] =
+ {
+ 0x5e,0xb9,0x33,0x13,0xb8,0x71,0xff,0x16,
+ 0xb9,0x8a,0x9b,0xcb,0x43,0x33,0x0d,0x6f
+ };
+ const byte plain1[] =
+ {
+ 0x6d,0x0b,0xb0,0x79,0x63,0x84,0x71,0xe9,
+ 0x39,0xd4,0x53,0x14,0x86,0xc1,0x4c,0x25,
+ 0x9a,0xee,0xc6,0xf3,0xc0,0x0d,0xfd,0xd6,
+ 0xc0,0x50,0xa8,0xba,0xa8,0x20,0xdb,0x71,
+ 0xcc,0x12,0x2c,0x4e,0x0c,0x17,0x15,0xef,
+ 0x55,0xf3,0x99,0x5a,0x6b,0xf0,0x2a,0x4c
+ };
+ const byte cipher1[] =
+ {
+ 0x0f,0x54,0x61,0x71,0x59,0xd0,0x3f,0xfc,
+ 0x1b,0xfa,0xfb,0x60,0x29,0x30,0xd7,0x00,
+ 0xf4,0xa4,0xa8,0xe6,0xdd,0x93,0x94,0x46,
+ 0x64,0xd2,0x19,0xc4,0xc5,0x4d,0xde,0x1b,
+ 0x04,0x53,0xe1,0x73,0xf5,0x18,0x74,0xae,
+ 0xfd,0x64,0xa2,0xe1,0xe2,0x76,0x13,0xb0
+ };
+ #endif /* WOLFSSL_AES_256 */
+
+
+ #ifdef WOLFSSL_AES_128
+ const byte key2[] =
+ {
+ 0x10,0xa5,0x88,0x69,0xd7,0x4b,0xe5,0xa3,
+ 0x74,0xcf,0x86,0x7c,0xfb,0x47,0x38,0x59
+ };
+ const byte iv2[] =
+ {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+ };
+ const byte plain2[] =
+ {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+ };
+ const byte cipher2[] =
+ {
+ 0x6d,0x25,0x1e,0x69,0x44,0xb0,0x51,0xe0,
+ 0x4e,0xaa,0x6f,0xb4,0xdb,0xf7,0x84,0x65
+ };
+ #endif /* WOLFSSL_AES_128 */
+
+
+ #ifdef WOLFSSL_AES_192
+ const byte key3[] = {
+ 0xd0,0x77,0xa0,0x3b,0xd8,0xa3,0x89,0x73,
+ 0x92,0x8c,0xca,0xfe,0x4a,0x9d,0x2f,0x45,
+ 0x51,0x30,0xbd,0x0a,0xf5,0xae,0x46,0xa9
+ };
+ const byte iv3[] =
+ {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+ };
+ const byte cipher3[] =
+ {
+ 0xab,0xc7,0x86,0xfb,0x1e,0xdb,0x50,0x45,
+ 0x80,0xc4,0xd8,0x82,0xef,0x29,0xa0,0xc7
+ };
+ const byte plain3[] =
+ {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+ };
+ #endif /* WOLFSSL_AES_192 */
+
+ Aes enc;
+ byte cipher[AES_BLOCK_SIZE * 4];
+ #ifdef HAVE_AES_DECRYPT
+ Aes dec;
+ byte plain [AES_BLOCK_SIZE * 4];
+ #endif
+ int ret = 0;
+
+ (void)enc;
+ #ifdef HAVE_AES_DECRYPT
+ (void)dec;
+ #endif
+
+#ifdef WOLFSSL_AES_128
+ /* 128 key size test */
+ #ifdef OPENSSL_EXTRA
+ ret = EVP_test(EVP_aes_128_ofb(), key2, iv2, plain2, sizeof(plain2),
+ cipher2, sizeof(cipher2));
+ if (ret != 0) {
+ return ret;
+ }
+ #endif
+
+ ret = wc_AesSetKey(&enc, key2, sizeof(key2), iv2, AES_ENCRYPTION);
+ if (ret != 0)
+ return -4920;
+ #ifdef HAVE_AES_DECRYPT
+ /* decrypt uses AES_ENCRYPTION */
+ ret = wc_AesSetKey(&dec, key2, sizeof(key2), iv2, AES_ENCRYPTION);
+ if (ret != 0)
+ return -4921;
+ #endif
+
+ XMEMSET(cipher, 0, sizeof(cipher));
+ ret = wc_AesOfbEncrypt(&enc, cipher, plain2, AES_BLOCK_SIZE);
+ if (ret != 0)
+ return -4922;
+
+ if (XMEMCMP(cipher, cipher2, AES_BLOCK_SIZE))
+ return -4923;
+
+ #ifdef HAVE_AES_DECRYPT
+ ret = wc_AesOfbDecrypt(&dec, plain, cipher2, AES_BLOCK_SIZE);
+ if (ret != 0)
+ return -4924;
+
+ if (XMEMCMP(plain, plain2, AES_BLOCK_SIZE))
+ return -4925;
+ #endif /* HAVE_AES_DECRYPT */
+#endif /* WOLFSSL_AES_128 */
+
+#ifdef WOLFSSL_AES_192
+ /* 192 key size test */
+ #ifdef OPENSSL_EXTRA
+ ret = EVP_test(EVP_aes_192_ofb(), key3, iv3, plain3, sizeof(plain3),
+ cipher3, sizeof(cipher3));
+ if (ret != 0) {
+ return ret;
+ }
+ #endif
+
+ ret = wc_AesSetKey(&enc, key3, sizeof(key3), iv3, AES_ENCRYPTION);
+ if (ret != 0)
+ return -4926;
+ #ifdef HAVE_AES_DECRYPT
+ /* decrypt uses AES_ENCRYPTION */
+ ret = wc_AesSetKey(&dec, key3, sizeof(key3), iv3, AES_ENCRYPTION);
+ if (ret != 0)
+ return -4927;
+ #endif
+
+ XMEMSET(cipher, 0, sizeof(cipher));
+ ret = wc_AesOfbEncrypt(&enc, cipher, plain3, AES_BLOCK_SIZE);
+ if (ret != 0)
+ return -4928;
+
+ if (XMEMCMP(cipher, cipher3, AES_BLOCK_SIZE))
+ return -4929;
+
+ #ifdef HAVE_AES_DECRYPT
+ ret = wc_AesOfbDecrypt(&dec, plain, cipher3, AES_BLOCK_SIZE);
+ if (ret != 0)
+ return -4930;
+
+ if (XMEMCMP(plain, plain3, AES_BLOCK_SIZE))
+ return -4931;
+ #endif /* HAVE_AES_DECRYPT */
+#endif /* WOLFSSL_AES_192 */
+
+#ifdef WOLFSSL_AES_256
+ /* 256 key size test */
+ #ifdef OPENSSL_EXTRA
+ ret = EVP_test(EVP_aes_256_ofb(), key1, iv1, plain1, sizeof(plain1),
+ cipher1, sizeof(cipher1));
+ if (ret != 0) {
+ return ret;
+ }
+ #endif
+
+ ret = wc_AesSetKey(&enc, key1, sizeof(key1), iv1, AES_ENCRYPTION);
+ if (ret != 0)
+ return -4932;
+ #ifdef HAVE_AES_DECRYPT
+ /* decrypt uses AES_ENCRYPTION */
+ ret = wc_AesSetKey(&dec, key1, sizeof(key1), iv1, AES_ENCRYPTION);
+ if (ret != 0)
+ return -4933;
+ #endif
+
+ XMEMSET(cipher, 0, sizeof(cipher));
+ ret = wc_AesOfbEncrypt(&enc, cipher, plain1, AES_BLOCK_SIZE);
+ if (ret != 0)
+ return -4934;
+
+ if (XMEMCMP(cipher, cipher1, AES_BLOCK_SIZE))
+ return -4935;
+
+ ret = wc_AesOfbEncrypt(&enc, cipher + AES_BLOCK_SIZE,
+ plain1 + AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ if (ret != 0)
+ return -4936;
+
+ if (XMEMCMP(cipher + AES_BLOCK_SIZE, cipher1 + AES_BLOCK_SIZE,
+ AES_BLOCK_SIZE))
+ return -4937;
+
+ #ifdef HAVE_AES_DECRYPT
+ ret = wc_AesOfbDecrypt(&dec, plain, cipher1, AES_BLOCK_SIZE);
+ if (ret != 0)
+ return -4938;
+
+ if (XMEMCMP(plain, plain1, AES_BLOCK_SIZE))
+ return -4939;
+
+ ret = wc_AesOfbDecrypt(&dec, plain + AES_BLOCK_SIZE,
+ cipher1 + AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ if (ret != 0)
+ return -4940;
+
+ if (XMEMCMP(plain + AES_BLOCK_SIZE, plain1 + AES_BLOCK_SIZE,
+ AES_BLOCK_SIZE))
+ return -4941;
+ #endif /* HAVE_AES_DECRYPT */
+
+ /* multiple blocks at once */
+ ret = wc_AesSetKey(&enc, key1, sizeof(key1), iv1, AES_ENCRYPTION);
+ if (ret != 0)
+ return -4942;
+ #ifdef HAVE_AES_DECRYPT
+ /* decrypt uses AES_ENCRYPTION */
+ ret = wc_AesSetKey(&dec, key1, sizeof(key1), iv1, AES_ENCRYPTION);
+ if (ret != 0)
+ return -4943;
+ #endif
+
+ XMEMSET(cipher, 0, sizeof(cipher));
+ ret = wc_AesOfbEncrypt(&enc, cipher, plain1, AES_BLOCK_SIZE * 3);
+ if (ret != 0)
+ return -4944;
+
+ if (XMEMCMP(cipher, cipher1, AES_BLOCK_SIZE * 3))
+ return -4945;
+
+ #ifdef HAVE_AES_DECRYPT
+ ret = wc_AesOfbDecrypt(&dec, plain, cipher1, AES_BLOCK_SIZE * 3);
+ if (ret != 0)
+ return -4946;
+
+ if (XMEMCMP(plain, plain1, AES_BLOCK_SIZE * 3))
+ return -4947;
+ #endif /* HAVE_AES_DECRYPT */
+
+ /* inline decrypt/encrypt*/
+ ret = wc_AesSetKey(&enc, key1, sizeof(key1), iv1, AES_ENCRYPTION);
+ if (ret != 0)
+ return -4948;
+ #ifdef HAVE_AES_DECRYPT
+ /* decrypt uses AES_ENCRYPTION */
+ ret = wc_AesSetKey(&dec, key1, sizeof(key1), iv1, AES_ENCRYPTION);
+ if (ret != 0)
+ return -4949;
+ #endif
+
+ XMEMCPY(cipher, plain1, AES_BLOCK_SIZE * 2);
+ ret = wc_AesOfbEncrypt(&enc, cipher, cipher, AES_BLOCK_SIZE * 2);
+ if (ret != 0)
+ return -4950;
+
+ if (XMEMCMP(cipher, cipher1, AES_BLOCK_SIZE * 2))
+ return -4951;
+
+ #ifdef HAVE_AES_DECRYPT
+ ret = wc_AesOfbDecrypt(&dec, cipher, cipher, AES_BLOCK_SIZE * 2);
+ if (ret != 0)
+ return -4952;
+
+ if (XMEMCMP(cipher, plain1, AES_BLOCK_SIZE * 2))
+ return -4953;
+ #endif /* HAVE_AES_DECRYPT */
+
+ /* 256 key size test leftover support */
+ ret = wc_AesSetKey(&enc, key1, sizeof(key1), iv1, AES_ENCRYPTION);
+ if (ret != 0)
+ return -4954;
+ #ifdef HAVE_AES_DECRYPT
+ /* decrypt uses AES_ENCRYPTION */
+ ret = wc_AesSetKey(&dec, key1, sizeof(key1), iv1, AES_ENCRYPTION);
+ if (ret != 0)
+ return -4955;
+ #endif
+
+ XMEMSET(cipher, 0, sizeof(cipher));
+ ret = wc_AesOfbEncrypt(&enc, cipher, plain1, 3);
+ if (ret != 0)
+ return -4956;
+
+ if (XMEMCMP(cipher, cipher1, 3))
+ return -4957;
+
+ ret = wc_AesOfbEncrypt(&enc, cipher + 3, plain1 + 3, AES_BLOCK_SIZE);
+ if (ret != 0)
+ return -4958;
+
+ if (XMEMCMP(cipher + 3, cipher1 + 3, AES_BLOCK_SIZE))
+ return -4959;
+
+ #ifdef HAVE_AES_DECRYPT
+ ret = wc_AesOfbDecrypt(&dec, plain, cipher1, 6);
+ if (ret != 0)
+ return -4960;
+
+ if (XMEMCMP(plain, plain1, 6))
+ return -4961;
+
+ ret = wc_AesOfbDecrypt(&dec, plain + 6, cipher1 + 6, AES_BLOCK_SIZE);
+ if (ret != 0)
+ return -4962;
+
+ if (XMEMCMP(plain + 6, plain1 + 6, AES_BLOCK_SIZE))
+ return -4963;
+ #endif /* HAVE_AES_DECRYPT */
+#endif /* WOLFSSL_AES_256 */
+
+ return 0;
+ }
+#endif /* WOLFSSL_AES_OFB */
+
+#if defined(WOLFSSL_AES_CFB)
+ /* Test cases from NIST SP 800-38A, Recommendation for Block Cipher Modes of Operation Methods an*/
+ static int aescfb_test(void)
+ {
+ Aes enc;
+ byte cipher[AES_BLOCK_SIZE * 4];
+ #ifdef HAVE_AES_DECRYPT
+ Aes dec;
+ byte plain [AES_BLOCK_SIZE * 4];
+ #endif
+ int ret = 0;
+
+ const byte iv[] = {
+ 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
+ 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
+ };
+
+#ifdef WOLFSSL_AES_128
+ const byte key1[] =
+ {
+ 0x2b,0x7e,0x15,0x16,0x28,0xae,0xd2,0xa6,
+ 0xab,0xf7,0x15,0x88,0x09,0xcf,0x4f,0x3c
+ };
+
+ const byte cipher1[] =
+ {
+ 0x3b,0x3f,0xd9,0x2e,0xb7,0x2d,0xad,0x20,
+ 0x33,0x34,0x49,0xf8,0xe8,0x3c,0xfb,0x4a,
+ 0xc8,0xa6,0x45,0x37,0xa0,0xb3,0xa9,0x3f,
+ 0xcd,0xe3,0xcd,0xad,0x9f,0x1c,0xe5,0x8b,
+ 0x26,0x75,0x1f,0x67,0xa3,0xcb,0xb1,0x40,
+ 0xb1,0x80,0x8c,0xf1,0x87,0xa4,0xf4,0xdf
+ };
+
+ const byte msg1[] =
+ {
+ 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
+ 0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a,
+ 0xae,0x2d,0x8a,0x57,0x1e,0x03,0xac,0x9c,
+ 0x9e,0xb7,0x6f,0xac,0x45,0xaf,0x8e,0x51,
+ 0x30,0xc8,0x1c,0x46,0xa3,0x5c,0xe4,0x11,
+ 0xe5,0xfb,0xc1,0x19,0x1a,0x0a,0x52,0xef
+ };
+#endif /* WOLFSSL_AES_128 */
+
+#ifdef WOLFSSL_AES_192
+ /* 192 size key test */
+ const byte key2[] =
+ {
+ 0x8e,0x73,0xb0,0xf7,0xda,0x0e,0x64,0x52,
+ 0xc8,0x10,0xf3,0x2b,0x80,0x90,0x79,0xe5,
+ 0x62,0xf8,0xea,0xd2,0x52,0x2c,0x6b,0x7b
+ };
+
+ const byte cipher2[] =
+ {
+ 0xcd,0xc8,0x0d,0x6f,0xdd,0xf1,0x8c,0xab,
+ 0x34,0xc2,0x59,0x09,0xc9,0x9a,0x41,0x74,
+ 0x67,0xce,0x7f,0x7f,0x81,0x17,0x36,0x21,
+ 0x96,0x1a,0x2b,0x70,0x17,0x1d,0x3d,0x7a,
+ 0x2e,0x1e,0x8a,0x1d,0xd5,0x9b,0x88,0xb1,
+ 0xc8,0xe6,0x0f,0xed,0x1e,0xfa,0xc4,0xc9,
+ 0xc0,0x5f,0x9f,0x9c,0xa9,0x83,0x4f,0xa0,
+ 0x42,0xae,0x8f,0xba,0x58,0x4b,0x09,0xff
+ };
+
+ const byte msg2[] =
+ {
+ 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
+ 0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a,
+ 0xae,0x2d,0x8a,0x57,0x1e,0x03,0xac,0x9c,
+ 0x9e,0xb7,0x6f,0xac,0x45,0xaf,0x8e,0x51,
+ 0x30,0xc8,0x1c,0x46,0xa3,0x5c,0xe4,0x11,
+ 0xe5,0xfb,0xc1,0x19,0x1a,0x0a,0x52,0xef,
+ 0xf6,0x9f,0x24,0x45,0xdf,0x4f,0x9b,0x17,
+ 0xad,0x2b,0x41,0x7b,0xe6,0x6c,0x37,0x10
+ };
+#endif /* WOLFSSL_AES_192 */
+
+#ifdef WOLFSSL_AES_256
+ /* 256 size key simple test */
+ const byte key3[] =
+ {
+ 0x60,0x3d,0xeb,0x10,0x15,0xca,0x71,0xbe,
+ 0x2b,0x73,0xae,0xf0,0x85,0x7d,0x77,0x81,
+ 0x1f,0x35,0x2c,0x07,0x3b,0x61,0x08,0xd7,
+ 0x2d,0x98,0x10,0xa3,0x09,0x14,0xdf,0xf4
+ };
+
+ const byte cipher3[] =
+ {
+ 0xdc,0x7e,0x84,0xbf,0xda,0x79,0x16,0x4b,
+ 0x7e,0xcd,0x84,0x86,0x98,0x5d,0x38,0x60,
+ 0x39,0xff,0xed,0x14,0x3b,0x28,0xb1,0xc8,
+ 0x32,0x11,0x3c,0x63,0x31,0xe5,0x40,0x7b,
+ 0xdf,0x10,0x13,0x24,0x15,0xe5,0x4b,0x92,
+ 0xa1,0x3e,0xd0,0xa8,0x26,0x7a,0xe2,0xf9,
+ 0x75,0xa3,0x85,0x74,0x1a,0xb9,0xce,0xf8,
+ 0x20,0x31,0x62,0x3d,0x55,0xb1,0xe4,0x71
+ };
+
+ const byte msg3[] =
+ {
+ 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
+ 0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a,
+ 0xae,0x2d,0x8a,0x57,0x1e,0x03,0xac,0x9c,
+ 0x9e,0xb7,0x6f,0xac,0x45,0xaf,0x8e,0x51,
+ 0x30,0xc8,0x1c,0x46,0xa3,0x5c,0xe4,0x11,
+ 0xe5,0xfb,0xc1,0x19,0x1a,0x0a,0x52,0xef,
+ 0xf6,0x9f,0x24,0x45,0xdf,0x4f,0x9b,0x17,
+ 0xad,0x2b,0x41,0x7b,0xe6,0x6c,0x37,0x10
+ };
+#endif /* WOLFSSL_AES_256 */
+
+
+ if (wc_AesInit(&enc, HEAP_HINT, devId) != 0)
+ return -4964;
+#ifdef HAVE_AES_DECRYPT
+ if (wc_AesInit(&dec, HEAP_HINT, devId) != 0)
+ return -4965;
+#endif
+
+#ifdef WOLFSSL_AES_128
+ /* 128 key tests */
+ #if defined(OPENSSL_EXTRA) && !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS)
+ ret = EVP_test(EVP_aes_128_cfb128(), key1, iv, msg1, sizeof(msg1),
+ cipher1, sizeof(cipher1));
+ if (ret != 0) {
+ return ret;
+ }
+ #endif
+
+ ret = wc_AesSetKey(&enc, key1, AES_BLOCK_SIZE, iv, AES_ENCRYPTION);
+ if (ret != 0)
+ return -4966;
+ #ifdef HAVE_AES_DECRYPT
+ /* decrypt uses AES_ENCRYPTION */
+ ret = wc_AesSetKey(&dec, key1, AES_BLOCK_SIZE, iv, AES_ENCRYPTION);
+ if (ret != 0)
+ return -4967;
+ #endif
+
+ XMEMSET(cipher, 0, sizeof(cipher));
+ ret = wc_AesCfbEncrypt(&enc, cipher, msg1, AES_BLOCK_SIZE * 2);
+ if (ret != 0)
+ return -4968;
+
+ if (XMEMCMP(cipher, cipher1, AES_BLOCK_SIZE * 2))
+ return -4969;
+
+ /* test restarting encryption process */
+ ret = wc_AesCfbEncrypt(&enc, cipher + (AES_BLOCK_SIZE * 2),
+ msg1 + (AES_BLOCK_SIZE * 2), AES_BLOCK_SIZE);
+ if (ret != 0)
+ return -4970;
+
+ if (XMEMCMP(cipher + (AES_BLOCK_SIZE * 2),
+ cipher1 + (AES_BLOCK_SIZE * 2), AES_BLOCK_SIZE))
+ return -4971;
+
+ #ifdef HAVE_AES_DECRYPT
+ ret = wc_AesCfbDecrypt(&dec, plain, cipher, AES_BLOCK_SIZE * 3);
+ if (ret != 0)
+ return -4972;
+
+ if (XMEMCMP(plain, msg1, AES_BLOCK_SIZE * 3))
+ return -4973;
+ #endif /* HAVE_AES_DECRYPT */
+#endif /* WOLFSSL_AES_128 */
+
+#ifdef WOLFSSL_AES_192
+ /* 192 key size test */
+ #if defined(OPENSSL_EXTRA) && !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS)
+ ret = EVP_test(EVP_aes_192_cfb128(), key2, iv, msg2, sizeof(msg2),
+ cipher2, sizeof(cipher2));
+ if (ret != 0) {
+ return ret;
+ }
+ #endif
+
+ ret = wc_AesSetKey(&enc, key2, sizeof(key2), iv, AES_ENCRYPTION);
+ if (ret != 0)
+ return -4974;
+ #ifdef HAVE_AES_DECRYPT
+ /* decrypt uses AES_ENCRYPTION */
+ ret = wc_AesSetKey(&dec, key2, sizeof(key2), iv, AES_ENCRYPTION);
+ if (ret != 0)
+ return -4975;
+ #endif
+
+ XMEMSET(cipher, 0, sizeof(cipher));
+ ret = wc_AesCfbEncrypt(&enc, cipher, msg2, AES_BLOCK_SIZE * 4);
+ if (ret != 0)
+ return -4976;
+
+ if (XMEMCMP(cipher, cipher2, AES_BLOCK_SIZE * 4))
+ return -4977;
+
+ #ifdef HAVE_AES_DECRYPT
+ ret = wc_AesCfbDecrypt(&dec, plain, cipher, AES_BLOCK_SIZE * 4);
+ if (ret != 0)
+ return -4978;
+
+ if (XMEMCMP(plain, msg2, AES_BLOCK_SIZE * 4))
+ return -4979;
+ #endif /* HAVE_AES_DECRYPT */
+#endif /* WOLFSSL_AES_192 */
+
+#ifdef WOLFSSL_AES_256
+ /* 256 key size test */
+ #if defined(OPENSSL_EXTRA) && !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS)
+ ret = EVP_test(EVP_aes_256_cfb128(), key3, iv, msg3, sizeof(msg3),
+ cipher3, sizeof(cipher3));
+ if (ret != 0) {
+ return ret;
+ }
+ #endif
+ ret = wc_AesSetKey(&enc, key3, sizeof(key3), iv, AES_ENCRYPTION);
+ if (ret != 0)
+ return -4980;
+ #ifdef HAVE_AES_DECRYPT
+ /* decrypt uses AES_ENCRYPTION */
+ ret = wc_AesSetKey(&dec, key3, sizeof(key3), iv, AES_ENCRYPTION);
+ if (ret != 0)
+ return -4981;
+ #endif
+
+ /* test with data left overs, magic lengths are checking near edges */
+ XMEMSET(cipher, 0, sizeof(cipher));
+ ret = wc_AesCfbEncrypt(&enc, cipher, msg3, 4);
+ if (ret != 0)
+ return -4982;
+
+ if (XMEMCMP(cipher, cipher3, 4))
+ return -4983;
+
+ ret = wc_AesCfbEncrypt(&enc, cipher + 4, msg3 + 4, 27);
+ if (ret != 0)
+ return -4984;
+
+ if (XMEMCMP(cipher + 4, cipher3 + 4, 27))
+ return -4985;
+
+ ret = wc_AesCfbEncrypt(&enc, cipher + 31, msg3 + 31,
+ (AES_BLOCK_SIZE * 4) - 31);
+ if (ret != 0)
+ return -4986;
+
+ if (XMEMCMP(cipher, cipher3, AES_BLOCK_SIZE * 4))
+ return -4987;
+
+ #ifdef HAVE_AES_DECRYPT
+ ret = wc_AesCfbDecrypt(&dec, plain, cipher, 4);
+ if (ret != 0)
+ return -4988;
+
+ if (XMEMCMP(plain, msg3, 4))
+ return -4989;
+
+ ret = wc_AesCfbDecrypt(&dec, plain + 4, cipher + 4, 4);
+ if (ret != 0)
+ return -4990;
+
+ ret = wc_AesCfbDecrypt(&dec, plain + 8, cipher + 8, 23);
+ if (ret != 0)
+ return -4991;
+
+ if (XMEMCMP(plain + 4, msg3 + 4, 27))
+ return -4992;
+
+ ret = wc_AesCfbDecrypt(&dec, plain + 31, cipher + 31,
+ (AES_BLOCK_SIZE * 4) - 31);
+ if (ret != 0)
+ return -4993;
+
+ if (XMEMCMP(plain, msg3, AES_BLOCK_SIZE * 4))
+ return -4994;
+ #endif /* HAVE_AES_DECRYPT */
+#endif /* WOLFSSL_AES_256 */
+
+ return ret;
+ }
+
+#if !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS)
+ static int aescfb1_test(void)
+ {
+ Aes enc;
+ byte cipher[AES_BLOCK_SIZE];
+ #ifdef HAVE_AES_DECRYPT
+ Aes dec;
+ byte plain [AES_BLOCK_SIZE];
+ #endif
+ int ret = 0;
+
+#ifdef WOLFSSL_AES_128
+ const byte iv[] = {
+ 0x4d,0xbb,0xdc,0xaa,0x59,0xf3,0x63,0xc9,
+ 0x2a,0x3b,0x98,0x43,0xad,0x20,0xe2,0xb7
+ };
+
+ const byte key1[] =
+ {
+ 0xcd,0xef,0x9d,0x06,0x61,0xba,0xe4,0x73,
+ 0x8d,0x1a,0x58,0xa2,0xa6,0x22,0x8b,0x66
+ };
+
+ const byte cipher1[] =
+ {
+ 0x00
+ };
+
+ const byte msg1[] =
+ {
+ 0xC0
+ };
+#endif /* WOLFSSL_AES_128 */
+#ifdef WOLFSSL_AES_192
+ const byte iv2[] = {
+ 0x57,0xc6,0x89,0x7c,0x99,0x52,0x28,0x13,
+ 0xbf,0x67,0x9c,0xe1,0x13,0x70,0xaf,0x5e
+ };
+
+ const byte key2[] =
+ {
+ 0xba,0xa1,0x58,0xa1,0x6b,0x50,0x4a,0x10,
+ 0x8e,0xd4,0x33,0x2e,0xe7,0xf2,0x9b,0xf6,
+ 0xd1,0xac,0x46,0xa8,0xde,0x5a,0xfe,0x7a
+ };
+
+ const byte cipher2[] =
+ {
+ 0x30
+ };
+
+ const byte msg2[] =
+ {
+ 0x80
+ };
+#endif /* WOLFSSL_AES_192 */
+#ifdef WOLFSSL_AES_256
+ const byte iv3[] = {
+ 0x63,0x2e,0x9f,0x83,0x1f,0xa3,0x80,0x5e,
+ 0x52,0x02,0xbc,0xe0,0x6d,0x04,0xf9,0xa0
+ };
+
+ const byte key3[] =
+ {
+ 0xf6,0xfa,0xe4,0xf1,0x5d,0x91,0xfc,0x50,
+ 0x88,0x78,0x4f,0x84,0xa5,0x37,0x12,0x7e,
+ 0x32,0x63,0x55,0x9c,0x62,0x73,0x88,0x20,
+ 0xc2,0xcf,0x3d,0xe1,0x1c,0x2a,0x30,0x40
+ };
+
+ const byte cipher3[] =
+ {
+ 0xF7, 0x00
+ };
+
+ const byte msg3[] =
+ {
+ 0x41, 0xC0
+ };
+#endif /* WOLFSSL_AES_256 */
+
+ if (wc_AesInit(&enc, HEAP_HINT, devId) != 0)
+ return -4995;
+#ifdef HAVE_AES_DECRYPT
+ if (wc_AesInit(&dec, HEAP_HINT, devId) != 0)
+ return -4996;
+#endif
+
+#ifdef WOLFSSL_AES_128
+ /* 128 key tests */
+ ret = wc_AesSetKey(&enc, key1, AES_BLOCK_SIZE, iv, AES_ENCRYPTION);
+ if (ret != 0)
+ return -4997;
+ #ifdef HAVE_AES_DECRYPT
+ /* decrypt uses AES_ENCRYPTION */
+ ret = wc_AesSetKey(&dec, key1, AES_BLOCK_SIZE, iv, AES_ENCRYPTION);
+ if (ret != 0)
+ return -4998;
+ #endif
+
+ XMEMSET(cipher, 0, sizeof(cipher));
+ ret = wc_AesCfb1Encrypt(&enc, cipher, msg1, 2);
+ if (ret != 0)
+ return -4999;
+
+ if (cipher[0] != cipher1[0])
+ return -5000;
+
+ #ifdef HAVE_AES_DECRYPT
+ ret = wc_AesCfb1Decrypt(&dec, plain, cipher, 2);
+ if (ret != 0)
+ return -5001;
+
+ if (plain[0] != msg1[0])
+ return -5002;
+ #endif /* HAVE_AES_DECRYPT */
+
+ #ifdef OPENSSL_EXTRA
+ ret = wc_AesSetKey(&enc, key1, AES_BLOCK_SIZE, iv, AES_ENCRYPTION);
+ if (ret != 0)
+ return -5003;
+
+ XMEMSET(cipher, 0, sizeof(cipher));
+ ret = wc_AesCfb1Encrypt(&enc, cipher, msg1,
+ sizeof(msg1) * WOLFSSL_BIT_SIZE);
+ if (ret != 0)
+ return -5004;
+
+ ret = EVP_test(EVP_aes_128_cfb1(), key1, iv, msg1, sizeof(msg1),
+ cipher, sizeof(msg1));
+ if (ret != 0) {
+ return ret;
+ }
+ #endif
+#endif /* WOLFSSL_AES_128 */
+#ifdef WOLFSSL_AES_192
+ /* 192 key tests */
+ ret = wc_AesSetKey(&enc, key2, sizeof(key2), iv2, AES_ENCRYPTION);
+ if (ret != 0)
+ return -5005;
+
+ XMEMSET(cipher, 0, sizeof(cipher));
+ ret = wc_AesCfb1Encrypt(&enc, cipher, msg2, 4);
+ if (ret != 0)
+ return -5006;
+ if (XMEMCMP(cipher, cipher2, sizeof(cipher2)) != 0)
+ return -5007;
+
+ #ifdef OPENSSL_EXTRA
+ ret = wc_AesSetKey(&enc, key2, sizeof(key2), iv2, AES_ENCRYPTION);
+ if (ret != 0)
+ return -5008;
+
+ XMEMSET(cipher, 0, sizeof(cipher));
+ ret = wc_AesCfb1Encrypt(&enc, cipher, msg2,
+ sizeof(msg2) * WOLFSSL_BIT_SIZE);
+ if (ret != 0)
+ return -5009;
+
+ ret = EVP_test(EVP_aes_192_cfb1(), key2, iv2, msg2, sizeof(msg2),
+ cipher, sizeof(msg2));
+ if (ret != 0) {
+ return ret;
+ }
+ #endif
+#endif /* WOLFSSL_AES_192 */
+
+#ifdef WOLFSSL_AES_256
+ /* 256 key tests */
+ ret = wc_AesSetKey(&enc, key3, sizeof(key3), iv3, AES_ENCRYPTION);
+ if (ret != 0)
+ return -5010;
+
+ XMEMSET(cipher, 0, sizeof(cipher));
+ ret = wc_AesCfb1Encrypt(&enc, cipher, msg3, 10);
+ if (ret != 0)
+ return -5011;
+ if (XMEMCMP(cipher, cipher3, sizeof(cipher3)) != 0)
+ return -5012;
+
+ #ifdef OPENSSL_EXTRA
+ ret = wc_AesSetKey(&enc, key3, sizeof(key3), iv3, AES_ENCRYPTION);
+ if (ret != 0)
+ return -5013;
+
+ XMEMSET(cipher, 0, sizeof(cipher));
+ ret = wc_AesCfb1Encrypt(&enc, cipher, msg3,
+ sizeof(msg3) * WOLFSSL_BIT_SIZE);
+ if (ret != 0)
+ return -5014;
+
+ ret = EVP_test(EVP_aes_256_cfb1(), key3, iv3, msg3, sizeof(msg3),
+ cipher, sizeof(msg3));
+ if (ret != 0) {
+ return ret;
+ }
+ #endif
+#endif /* WOLFSSL_AES_256 */
+ return ret;
+ }
+
+ static int aescfb8_test(void)
+ {
+ Aes enc;
+ byte cipher[AES_BLOCK_SIZE];
+ #ifdef HAVE_AES_DECRYPT
+ Aes dec;
+ byte plain [AES_BLOCK_SIZE];
+ #endif
+ int ret = 0;
+
+#ifdef WOLFSSL_AES_128
+ const byte iv[] = {
+ 0xf4,0x75,0xc6,0x49,0x91,0xb2,0x0e,0xae,
+ 0xe1,0x83,0xa2,0x26,0x29,0xe2,0x1e,0x22
+ };
+
+ const byte key1[] =
+ {
+ 0xc8,0xfe,0x9b,0xf7,0x7b,0x93,0x0f,0x46,
+ 0xd2,0x07,0x8b,0x8c,0x0e,0x65,0x7c,0xd4
+ };
+
+ const byte cipher1[] =
+ {
+ 0xd2,0x76,0x91
+ };
+
+ const byte msg1[] =
+ {
+ 0xc9,0x06,0x35
+ };
+#endif /* WOLFSSL_AES_128 */
+#ifdef WOLFSSL_AES_192
+ const byte iv2[] = {
+ 0x0a,0x02,0x84,0x6b,0x62,0xab,0xb6,0x93,
+ 0xef,0x31,0xd7,0x54,0x84,0x2e,0xed,0x29
+ };
+
+ const byte key2[] =
+ {
+ 0xba,0xf0,0x8b,0x76,0x31,0x7a,0x65,0xc5,
+ 0xf0,0x7a,0xe6,0xf5,0x7e,0xb0,0xe6,0x54,
+ 0x88,0x65,0x93,0x24,0xd2,0x97,0x09,0xe3
+ };
+
+ const byte cipher2[] =
+ {
+ 0x72,0x9c,0x0b,0x6d,0xeb,0x75,0xfa,0x6e,
+ 0xb5,0xe8
+ };
+
+ const byte msg2[] =
+ {
+ 0x98,0x95,0x93,0x24,0x02,0x39,0x3d,0xc3,
+ 0x3a,0x60
+ };
+#endif
+#ifdef WOLFSSL_AES_256
+ const byte iv3[] = {
+ 0x33,0x8c,0x55,0x2f,0xf1,0xec,0xa1,0x44,
+ 0x08,0xe0,0x5d,0x8c,0xf9,0xf3,0xb3,0x1b
+ };
+
+ const byte key3[] =
+ {
+ 0x06,0x48,0x74,0x09,0x2f,0x7a,0x13,0xcc,
+ 0x44,0x62,0x24,0x7a,0xd4,0x23,0xd0,0xe9,
+ 0x6e,0xdf,0x42,0xe8,0xb6,0x7a,0x5a,0x23,
+ 0xb7,0xa0,0xa6,0x47,0x7b,0x09,0x8e,0x66
+ };
+
+ const byte cipher3[] =
+ {
+ 0x1c,0xff,0x95
+ };
+
+ const byte msg3[] =
+ {
+ 0xb9,0x74,0xfa
+ };
+#endif
+
+ if (wc_AesInit(&enc, HEAP_HINT, devId) != 0)
+ return -5015;
+#ifdef HAVE_AES_DECRYPT
+ if (wc_AesInit(&dec, HEAP_HINT, devId) != 0)
+ return -5016;
+#endif
+
+#ifdef WOLFSSL_AES_128
+ /* 128 key tests */
+ #ifdef OPENSSL_EXTRA
+ ret = EVP_test(EVP_aes_128_cfb8(), key1, iv, msg1, sizeof(msg1),
+ cipher1, sizeof(cipher1));
+ if (ret != 0) {
+ return ret;
+ }
+ #endif
+ ret = wc_AesSetKey(&enc, key1, AES_BLOCK_SIZE, iv, AES_ENCRYPTION);
+ if (ret != 0)
+ return -5017;
+ #ifdef HAVE_AES_DECRYPT
+ /* decrypt uses AES_ENCRYPTION */
+ ret = wc_AesSetKey(&dec, key1, AES_BLOCK_SIZE, iv, AES_ENCRYPTION);
+ if (ret != 0)
+ return -5018;
+ #endif
+
+ XMEMSET(cipher, 0, sizeof(cipher));
+ ret = wc_AesCfb8Encrypt(&enc, cipher, msg1, sizeof(msg1));
+ if (ret != 0)
+ return -5019;
+
+ if (XMEMCMP(cipher, cipher1, sizeof(cipher1)) != 0)
+ return -5020;
+
+ #ifdef HAVE_AES_DECRYPT
+ ret = wc_AesCfb8Decrypt(&dec, plain, cipher, sizeof(msg1));
+ if (ret != 0)
+ return -5021;
+
+ if (XMEMCMP(plain, msg1, sizeof(msg1)) != 0)
+ return -5022;
+ #endif /* HAVE_AES_DECRYPT */
+#endif /* WOLFSSL_AES_128 */
+#ifdef WOLFSSL_AES_192
+ /* 192 key tests */
+ ret = wc_AesSetKey(&enc, key2, sizeof(key2), iv2, AES_ENCRYPTION);
+ if (ret != 0)
+ return -5023;
+
+ XMEMSET(cipher, 0, sizeof(cipher));
+ ret = wc_AesCfb8Encrypt(&enc, cipher, msg2, sizeof(msg2));
+ if (ret != 0)
+ return -5024;
+ if (XMEMCMP(cipher, cipher2, sizeof(msg2)) != 0)
+ return -5025;
+#ifdef OPENSSL_EXTRA
+ ret = EVP_test(EVP_aes_192_cfb8(), key2, iv2, msg2, sizeof(msg2),
+ cipher2, sizeof(msg2));
+ if (ret != 0) {
+ return ret;
+ }
+#endif
+
+#endif /* WOLFSSL_AES_192 */
+
+#ifdef WOLFSSL_AES_256
+ /* 256 key tests */
+ ret = wc_AesSetKey(&enc, key3, sizeof(key3), iv3, AES_ENCRYPTION);
+ if (ret != 0)
+ return -5026;
+
+ XMEMSET(cipher, 0, sizeof(cipher));
+ ret = wc_AesCfb8Encrypt(&enc, cipher, msg3, sizeof(msg3));
+ if (ret != 0)
+ return -5027;
+ if (XMEMCMP(cipher, cipher3, sizeof(cipher3)) != 0)
+ return -5028;
+
+ #ifdef OPENSSL_EXTRA
+ ret = EVP_test(EVP_aes_256_cfb8(), key3, iv3, msg3, sizeof(msg3),
+ cipher3, sizeof(msg3));
+ if (ret != 0) {
+ return ret;
+ }
+ #endif
+#endif /* WOLFSSL_AES_256 */
+
+ return ret;
+ }
+#endif /* !HAVE_SELFTEST && !HAVE_FIPS */
+#endif /* WOLFSSL_AES_CFB */
+
+
+static int aes_key_size_test(void)
+{
+ int ret;
+ Aes aes;
+ byte key16[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66 };
+ byte key24[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37 };
+ byte key32[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66 };
+ byte iv[] = "1234567890abcdef";
+#ifndef HAVE_FIPS
+ word32 keySize;
+#endif
+
+#if !defined(HAVE_FIPS) || \
+ defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
+ /* w/ FIPS v1 (cert 2425) wc_AesInit just returns 0 always as it's not
+ * supported with that FIPS version */
+ ret = wc_AesInit(NULL, HEAP_HINT, devId);
+ if (ret != BAD_FUNC_ARG)
+ return -5100;
+#endif
+
+ ret = wc_AesInit(&aes, HEAP_HINT, devId);
+ /* 0 check OK for FIPSv1 */
+ if (ret != 0)
+ return -5101;
+
+#ifndef HAVE_FIPS
+ /* Parameter Validation testing. */
+ ret = wc_AesGetKeySize(NULL, NULL);
+ if (ret != BAD_FUNC_ARG)
+ return -5102;
+ ret = wc_AesGetKeySize(&aes, NULL);
+ if (ret != BAD_FUNC_ARG)
+ return -5103;
+ ret = wc_AesGetKeySize(NULL, &keySize);
+ if (ret != BAD_FUNC_ARG)
+ return -5104;
+ /* Crashes in FIPS */
+ ret = wc_AesSetKey(NULL, key16, sizeof(key16), iv, AES_ENCRYPTION);
+ if (ret != BAD_FUNC_ARG)
+ return -5105;
+#endif
+ /* NULL IV indicates to use all zeros IV. */
+ ret = wc_AesSetKey(&aes, key16, sizeof(key16), NULL, AES_ENCRYPTION);
+#ifdef WOLFSSL_AES_128
+ if (ret != 0)
+#else
+ if (ret != BAD_FUNC_ARG)
+#endif
+ return -5106;
+ ret = wc_AesSetKey(&aes, key32, sizeof(key32) - 1, iv, AES_ENCRYPTION);
+ if (ret != BAD_FUNC_ARG)
+ return -5107;
+/* CryptoCell handles rounds internally */
+#if !defined(HAVE_FIPS) && !defined(WOLFSSL_CRYPTOCELL)
+ /* Force invalid rounds */
+ aes.rounds = 16;
+ ret = wc_AesGetKeySize(&aes, &keySize);
+ if (ret != BAD_FUNC_ARG)
+ return -5108;
+#endif
+
+ ret = wc_AesSetKey(&aes, key16, sizeof(key16), iv, AES_ENCRYPTION);
+#ifdef WOLFSSL_AES_128
+ if (ret != 0)
+#else
+ if (ret != BAD_FUNC_ARG)
+#endif
+ return -5109;
+#if !defined(HAVE_FIPS) && defined(WOLFSSL_AES_128)
+ ret = wc_AesGetKeySize(&aes, &keySize);
+ if (ret != 0 || keySize != sizeof(key16))
+ return -5110;
+#endif
+
+ ret = wc_AesSetKey(&aes, key24, sizeof(key24), iv, AES_ENCRYPTION);
+#ifdef WOLFSSL_AES_192
+ if (ret != 0)
+#else
+ if (ret != BAD_FUNC_ARG)
+#endif
+ return -5111;
+#if !defined(HAVE_FIPS) && defined(WOLFSSL_AES_192)
+ ret = wc_AesGetKeySize(&aes, &keySize);
+ if (ret != 0 || keySize != sizeof(key24))
+ return -5112;
+#endif
+
+ ret = wc_AesSetKey(&aes, key32, sizeof(key32), iv, AES_ENCRYPTION);
+#ifdef WOLFSSL_AES_256
+ if (ret != 0)
+#else
+ if (ret != BAD_FUNC_ARG)
+#endif
+ return -5113;
+#if !defined(HAVE_FIPS) && defined(WOLFSSL_AES_256)
+ ret = wc_AesGetKeySize(&aes, &keySize);
+ if (ret != 0 || keySize != sizeof(key32))
+ return -5114;
+#endif
+
+ return 0;
+}
+
+#if defined(WOLFSSL_AES_XTS)
+/* test vectors from http://csrc.nist.gov/groups/STM/cavp/block-cipher-modes.html */
+#ifdef WOLFSSL_AES_128
+static int aes_xts_128_test(void)
+{
+ XtsAes aes;
+ int ret = 0;
+ unsigned char buf[AES_BLOCK_SIZE * 2];
+ unsigned char cipher[AES_BLOCK_SIZE * 2];
+
+ /* 128 key tests */
+ static unsigned char k1[] = {
+ 0xa1, 0xb9, 0x0c, 0xba, 0x3f, 0x06, 0xac, 0x35,
+ 0x3b, 0x2c, 0x34, 0x38, 0x76, 0x08, 0x17, 0x62,
+ 0x09, 0x09, 0x23, 0x02, 0x6e, 0x91, 0x77, 0x18,
+ 0x15, 0xf2, 0x9d, 0xab, 0x01, 0x93, 0x2f, 0x2f
+ };
+
+ static unsigned char i1[] = {
+ 0x4f, 0xae, 0xf7, 0x11, 0x7c, 0xda, 0x59, 0xc6,
+ 0x6e, 0x4b, 0x92, 0x01, 0x3e, 0x76, 0x8a, 0xd5
+ };
+
+ static unsigned char p1[] = {
+ 0xeb, 0xab, 0xce, 0x95, 0xb1, 0x4d, 0x3c, 0x8d,
+ 0x6f, 0xb3, 0x50, 0x39, 0x07, 0x90, 0x31, 0x1c
+ };
+
+ /* plain text test of partial block is not from NIST test vector list */
+ static unsigned char pp[] = {
+ 0xeb, 0xab, 0xce, 0x95, 0xb1, 0x4d, 0x3c, 0x8d,
+ 0x6f, 0xb3, 0x50, 0x39, 0x07, 0x90, 0x31, 0x1c,
+ 0x6e, 0x4b, 0x92, 0x01, 0x3e, 0x76, 0x8a, 0xd5
+ };
+
+ static unsigned char c1[] = {
+ 0x77, 0x8a, 0xe8, 0xb4, 0x3c, 0xb9, 0x8d, 0x5a,
+ 0x82, 0x50, 0x81, 0xd5, 0xbe, 0x47, 0x1c, 0x63
+ };
+
+ static unsigned char k2[] = {
+ 0x39, 0x25, 0x79, 0x05, 0xdf, 0xcc, 0x77, 0x76,
+ 0x6c, 0x87, 0x0a, 0x80, 0x6a, 0x60, 0xe3, 0xc0,
+ 0x93, 0xd1, 0x2a, 0xcf, 0xcb, 0x51, 0x42, 0xfa,
+ 0x09, 0x69, 0x89, 0x62, 0x5b, 0x60, 0xdb, 0x16
+ };
+
+ static unsigned char i2[] = {
+ 0x5c, 0xf7, 0x9d, 0xb6, 0xc5, 0xcd, 0x99, 0x1a,
+ 0x1c, 0x78, 0x81, 0x42, 0x24, 0x95, 0x1e, 0x84
+ };
+
+ static unsigned char p2[] = {
+ 0xbd, 0xc5, 0x46, 0x8f, 0xbc, 0x8d, 0x50, 0xa1,
+ 0x0d, 0x1c, 0x85, 0x7f, 0x79, 0x1c, 0x5c, 0xba,
+ 0xb3, 0x81, 0x0d, 0x0d, 0x73, 0xcf, 0x8f, 0x20,
+ 0x46, 0xb1, 0xd1, 0x9e, 0x7d, 0x5d, 0x8a, 0x56
+ };
+
+ static unsigned char c2[] = {
+ 0xd6, 0xbe, 0x04, 0x6d, 0x41, 0xf2, 0x3b, 0x5e,
+ 0xd7, 0x0b, 0x6b, 0x3d, 0x5c, 0x8e, 0x66, 0x23,
+ 0x2b, 0xe6, 0xb8, 0x07, 0xd4, 0xdc, 0xc6, 0x0e,
+ 0xff, 0x8d, 0xbc, 0x1d, 0x9f, 0x7f, 0xc8, 0x22
+ };
+
+#ifdef OPENSSL_EXTRA
+ ret = EVP_test(EVP_aes_128_xts(), k2, i2, p2, sizeof(p2), c2, sizeof(c2));
+ if (ret != 0) {
+ printf("EVP_aes_128_xts failed!\n");
+ return ret;
+ }
+#endif
+
+ XMEMSET(buf, 0, sizeof(buf));
+ if (wc_AesXtsSetKey(&aes, k2, sizeof(k2), AES_ENCRYPTION,
+ HEAP_HINT, devId) != 0)
+ return -5200;
+
+ ret = wc_AesXtsEncrypt(&aes, buf, p2, sizeof(p2), i2, sizeof(i2));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5201;
+ if (XMEMCMP(c2, buf, sizeof(c2)))
+ return -5202;
+
+ XMEMSET(buf, 0, sizeof(buf));
+ if (wc_AesXtsSetKey(&aes, k1, sizeof(k1), AES_ENCRYPTION,
+ HEAP_HINT, devId) != 0)
+ return -5203;
+ ret = wc_AesXtsEncrypt(&aes, buf, p1, sizeof(p1), i1, sizeof(i1));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5204;
+ if (XMEMCMP(c1, buf, AES_BLOCK_SIZE))
+ return -5205;
+
+ /* partial block encryption test */
+ XMEMSET(cipher, 0, sizeof(cipher));
+ ret = wc_AesXtsEncrypt(&aes, cipher, pp, sizeof(pp), i1, sizeof(i1));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5206;
+ wc_AesXtsFree(&aes);
+
+ /* partial block decrypt test */
+ XMEMSET(buf, 0, sizeof(buf));
+ if (wc_AesXtsSetKey(&aes, k1, sizeof(k1), AES_DECRYPTION,
+ HEAP_HINT, devId) != 0)
+ return -5207;
+ ret = wc_AesXtsDecrypt(&aes, buf, cipher, sizeof(pp), i1, sizeof(i1));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5208;
+ if (XMEMCMP(pp, buf, sizeof(pp)))
+ return -5209;
+
+ /* NIST decrypt test vector */
+ XMEMSET(buf, 0, sizeof(buf));
+ ret = wc_AesXtsDecrypt(&aes, buf, c1, sizeof(c1), i1, sizeof(i1));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5210;
+ if (XMEMCMP(p1, buf, AES_BLOCK_SIZE))
+ return -5211;
+
+ /* fail case with decrypting using wrong key */
+ XMEMSET(buf, 0, sizeof(buf));
+ ret = wc_AesXtsDecrypt(&aes, buf, c2, sizeof(c2), i2, sizeof(i2));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5212;
+ if (XMEMCMP(p2, buf, sizeof(p2)) == 0) /* fail case with wrong key */
+ return -5213;
+
+ /* set correct key and retest */
+ XMEMSET(buf, 0, sizeof(buf));
+ if (wc_AesXtsSetKey(&aes, k2, sizeof(k2), AES_DECRYPTION,
+ HEAP_HINT, devId) != 0)
+ return -5214;
+ ret = wc_AesXtsDecrypt(&aes, buf, c2, sizeof(c2), i2, sizeof(i2));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5215;
+ if (XMEMCMP(p2, buf, sizeof(p2)))
+ return -5216;
+ wc_AesXtsFree(&aes);
+
+ return ret;
+}
+#endif /* WOLFSSL_AES_128 */
+
+
+#ifdef WOLFSSL_AES_256
+static int aes_xts_256_test(void)
+{
+ XtsAes aes;
+ int ret = 0;
+ unsigned char buf[AES_BLOCK_SIZE * 3];
+ unsigned char cipher[AES_BLOCK_SIZE * 3];
+
+ /* 256 key tests */
+ static unsigned char k1[] = {
+ 0x1e, 0xa6, 0x61, 0xc5, 0x8d, 0x94, 0x3a, 0x0e,
+ 0x48, 0x01, 0xe4, 0x2f, 0x4b, 0x09, 0x47, 0x14,
+ 0x9e, 0x7f, 0x9f, 0x8e, 0x3e, 0x68, 0xd0, 0xc7,
+ 0x50, 0x52, 0x10, 0xbd, 0x31, 0x1a, 0x0e, 0x7c,
+ 0xd6, 0xe1, 0x3f, 0xfd, 0xf2, 0x41, 0x8d, 0x8d,
+ 0x19, 0x11, 0xc0, 0x04, 0xcd, 0xa5, 0x8d, 0xa3,
+ 0xd6, 0x19, 0xb7, 0xe2, 0xb9, 0x14, 0x1e, 0x58,
+ 0x31, 0x8e, 0xea, 0x39, 0x2c, 0xf4, 0x1b, 0x08
+ };
+
+ static unsigned char i1[] = {
+ 0xad, 0xf8, 0xd9, 0x26, 0x27, 0x46, 0x4a, 0xd2,
+ 0xf0, 0x42, 0x8e, 0x84, 0xa9, 0xf8, 0x75, 0x64
+ };
+
+ static unsigned char p1[] = {
+ 0x2e, 0xed, 0xea, 0x52, 0xcd, 0x82, 0x15, 0xe1,
+ 0xac, 0xc6, 0x47, 0xe8, 0x10, 0xbb, 0xc3, 0x64,
+ 0x2e, 0x87, 0x28, 0x7f, 0x8d, 0x2e, 0x57, 0xe3,
+ 0x6c, 0x0a, 0x24, 0xfb, 0xc1, 0x2a, 0x20, 0x2e
+ };
+
+ /* plain text test of partial block is not from NIST test vector list */
+ static unsigned char pp[] = {
+ 0xeb, 0xab, 0xce, 0x95, 0xb1, 0x4d, 0x3c, 0x8d,
+ 0x6f, 0xb3, 0x50, 0x39, 0x07, 0x90, 0x31, 0x1c,
+ 0x6e, 0x4b, 0x92, 0x01, 0x3e, 0x76, 0x8a, 0xd5
+ };
+
+ static unsigned char c1[] = {
+ 0xcb, 0xaa, 0xd0, 0xe2, 0xf6, 0xce, 0xa3, 0xf5,
+ 0x0b, 0x37, 0xf9, 0x34, 0xd4, 0x6a, 0x9b, 0x13,
+ 0x0b, 0x9d, 0x54, 0xf0, 0x7e, 0x34, 0xf3, 0x6a,
+ 0xf7, 0x93, 0xe8, 0x6f, 0x73, 0xc6, 0xd7, 0xdb
+ };
+
+ static unsigned char k2[] = {
+ 0xad, 0x50, 0x4b, 0x85, 0xd7, 0x51, 0xbf, 0xba,
+ 0x69, 0x13, 0xb4, 0xcc, 0x79, 0xb6, 0x5a, 0x62,
+ 0xf7, 0xf3, 0x9d, 0x36, 0x0f, 0x35, 0xb5, 0xec,
+ 0x4a, 0x7e, 0x95, 0xbd, 0x9b, 0xa5, 0xf2, 0xec,
+ 0xc1, 0xd7, 0x7e, 0xa3, 0xc3, 0x74, 0xbd, 0x4b,
+ 0x13, 0x1b, 0x07, 0x83, 0x87, 0xdd, 0x55, 0x5a,
+ 0xb5, 0xb0, 0xc7, 0xe5, 0x2d, 0xb5, 0x06, 0x12,
+ 0xd2, 0xb5, 0x3a, 0xcb, 0x47, 0x8a, 0x53, 0xb4
+ };
+
+ static unsigned char i2[] = {
+ 0xe6, 0x42, 0x19, 0xed, 0xe0, 0xe1, 0xc2, 0xa0,
+ 0x0e, 0xf5, 0x58, 0x6a, 0xc4, 0x9b, 0xeb, 0x6f
+ };
+
+ static unsigned char p2[] = {
+ 0x24, 0xcb, 0x76, 0x22, 0x55, 0xb5, 0xa8, 0x00,
+ 0xf4, 0x6e, 0x80, 0x60, 0x56, 0x9e, 0x05, 0x53,
+ 0xbc, 0xfe, 0x86, 0x55, 0x3b, 0xca, 0xd5, 0x89,
+ 0xc7, 0x54, 0x1a, 0x73, 0xac, 0xc3, 0x9a, 0xbd,
+ 0x53, 0xc4, 0x07, 0x76, 0xd8, 0xe8, 0x22, 0x61,
+ 0x9e, 0xa9, 0xad, 0x77, 0xa0, 0x13, 0x4c, 0xfc
+ };
+
+ static unsigned char c2[] = {
+ 0xa3, 0xc6, 0xf3, 0xf3, 0x82, 0x79, 0x5b, 0x10,
+ 0x87, 0xd7, 0x02, 0x50, 0xdb, 0x2c, 0xd3, 0xb1,
+ 0xa1, 0x62, 0xa8, 0xb6, 0xdc, 0x12, 0x60, 0x61,
+ 0xc1, 0x0a, 0x84, 0xa5, 0x85, 0x3f, 0x3a, 0x89,
+ 0xe6, 0x6c, 0xdb, 0xb7, 0x9a, 0xb4, 0x28, 0x9b,
+ 0xc3, 0xea, 0xd8, 0x10, 0xe9, 0xc0, 0xaf, 0x92
+ };
+
+#ifdef OPENSSL_EXTRA
+ ret = EVP_test(EVP_aes_256_xts(), k2, i2, p2, sizeof(p2), c2, sizeof(c2));
+ if (ret != 0) {
+ printf("EVP_aes_256_xts failed\n");
+ return ret;
+ }
+#endif
+
+ XMEMSET(buf, 0, sizeof(buf));
+ if (wc_AesXtsSetKey(&aes, k2, sizeof(k2), AES_ENCRYPTION,
+ HEAP_HINT, devId) != 0)
+ return -5300;
+ ret = wc_AesXtsEncrypt(&aes, buf, p2, sizeof(p2), i2, sizeof(i2));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5301;
+ if (XMEMCMP(c2, buf, sizeof(c2)))
+ return -5302;
+
+ XMEMSET(buf, 0, sizeof(buf));
+ if (wc_AesXtsSetKey(&aes, k1, sizeof(k1), AES_ENCRYPTION,
+ HEAP_HINT, devId) != 0)
+ return -5303;
+ ret = wc_AesXtsEncrypt(&aes, buf, p1, sizeof(p1), i1, sizeof(i1));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5304;
+ if (XMEMCMP(c1, buf, AES_BLOCK_SIZE))
+ return -5305;
+
+ /* partial block encryption test */
+ XMEMSET(cipher, 0, sizeof(cipher));
+ ret = wc_AesXtsEncrypt(&aes, cipher, pp, sizeof(pp), i1, sizeof(i1));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5306;
+ wc_AesXtsFree(&aes);
+
+ /* partial block decrypt test */
+ XMEMSET(buf, 0, sizeof(buf));
+ if (wc_AesXtsSetKey(&aes, k1, sizeof(k1), AES_DECRYPTION,
+ HEAP_HINT, devId) != 0)
+ return -5307;
+ ret = wc_AesXtsDecrypt(&aes, buf, cipher, sizeof(pp), i1, sizeof(i1));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5308;
+ if (XMEMCMP(pp, buf, sizeof(pp)))
+ return -5309;
+
+ /* NIST decrypt test vector */
+ XMEMSET(buf, 0, sizeof(buf));
+ ret = wc_AesXtsDecrypt(&aes, buf, c1, sizeof(c1), i1, sizeof(i1));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5310;
+ if (XMEMCMP(p1, buf, AES_BLOCK_SIZE))
+ return -5311;
+
+ XMEMSET(buf, 0, sizeof(buf));
+ if (wc_AesXtsSetKey(&aes, k2, sizeof(k2), AES_DECRYPTION,
+ HEAP_HINT, devId) != 0)
+ return -5312;
+ ret = wc_AesXtsDecrypt(&aes, buf, c2, sizeof(c2), i2, sizeof(i2));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5313;
+ if (XMEMCMP(p2, buf, sizeof(p2)))
+ return -5314;
+ wc_AesXtsFree(&aes);
+
+ return ret;
+}
+#endif /* WOLFSSL_AES_256 */
+
+
+#if defined(WOLFSSL_AES_128) && defined(WOLFSSL_AES_256)
+/* both 128 and 256 bit key test */
+static int aes_xts_sector_test(void)
+{
+ XtsAes aes;
+ int ret = 0;
+ unsigned char buf[AES_BLOCK_SIZE * 2];
+
+ /* 128 key tests */
+ static unsigned char k1[] = {
+ 0xa3, 0xe4, 0x0d, 0x5b, 0xd4, 0xb6, 0xbb, 0xed,
+ 0xb2, 0xd1, 0x8c, 0x70, 0x0a, 0xd2, 0xdb, 0x22,
+ 0x10, 0xc8, 0x11, 0x90, 0x64, 0x6d, 0x67, 0x3c,
+ 0xbc, 0xa5, 0x3f, 0x13, 0x3e, 0xab, 0x37, 0x3c
+ };
+
+ static unsigned char p1[] = {
+ 0x20, 0xe0, 0x71, 0x94, 0x05, 0x99, 0x3f, 0x09,
+ 0xa6, 0x6a, 0xe5, 0xbb, 0x50, 0x0e, 0x56, 0x2c
+ };
+
+ static unsigned char c1[] = {
+ 0x74, 0x62, 0x35, 0x51, 0x21, 0x02, 0x16, 0xac,
+ 0x92, 0x6b, 0x96, 0x50, 0xb6, 0xd3, 0xfa, 0x52
+ };
+ word64 s1 = 141;
+
+ /* 256 key tests */
+ static unsigned char k2[] = {
+ 0xef, 0x01, 0x0c, 0xa1, 0xa3, 0x66, 0x3e, 0x32,
+ 0x53, 0x43, 0x49, 0xbc, 0x0b, 0xae, 0x62, 0x23,
+ 0x2a, 0x15, 0x73, 0x34, 0x85, 0x68, 0xfb, 0x9e,
+ 0xf4, 0x17, 0x68, 0xa7, 0x67, 0x4f, 0x50, 0x7a,
+ 0x72, 0x7f, 0x98, 0x75, 0x53, 0x97, 0xd0, 0xe0,
+ 0xaa, 0x32, 0xf8, 0x30, 0x33, 0x8c, 0xc7, 0xa9,
+ 0x26, 0xc7, 0x73, 0xf0, 0x9e, 0x57, 0xb3, 0x57,
+ 0xcd, 0x15, 0x6a, 0xfb, 0xca, 0x46, 0xe1, 0xa0
+ };
+
+ static unsigned char p2[] = {
+ 0xed, 0x98, 0xe0, 0x17, 0x70, 0xa8, 0x53, 0xb4,
+ 0x9d, 0xb9, 0xe6, 0xaa, 0xf8, 0x8f, 0x0a, 0x41,
+ 0xb9, 0xb5, 0x6e, 0x91, 0xa5, 0xa2, 0xb1, 0x1d,
+ 0x40, 0x52, 0x92, 0x54, 0xf5, 0x52, 0x3e, 0x75
+ };
+
+ static unsigned char c2[] = {
+ 0xca, 0x20, 0xc5, 0x5e, 0x8d, 0xc1, 0x49, 0x68,
+ 0x7d, 0x25, 0x41, 0xde, 0x39, 0xc3, 0xdf, 0x63,
+ 0x00, 0xbb, 0x5a, 0x16, 0x3c, 0x10, 0xce, 0xd3,
+ 0x66, 0x6b, 0x13, 0x57, 0xdb, 0x8b, 0xd3, 0x9d
+ };
+ word64 s2 = 187;
+
+ XMEMSET(buf, 0, sizeof(buf));
+ if (wc_AesXtsSetKey(&aes, k1, sizeof(k1), AES_ENCRYPTION,
+ HEAP_HINT, devId) != 0)
+ return -5400;
+ ret = wc_AesXtsEncryptSector(&aes, buf, p1, sizeof(p1), s1);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5401;
+ if (XMEMCMP(c1, buf, AES_BLOCK_SIZE))
+ return -5402;
+
+ /* decrypt test */
+ XMEMSET(buf, 0, sizeof(buf));
+ if (wc_AesXtsSetKey(&aes, k1, sizeof(k1), AES_DECRYPTION,
+ HEAP_HINT, devId) != 0)
+ return -5403;
+ ret = wc_AesXtsDecryptSector(&aes, buf, c1, sizeof(c1), s1);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5404;
+ if (XMEMCMP(p1, buf, AES_BLOCK_SIZE))
+ return -5405;
+ wc_AesXtsFree(&aes);
+
+ /* 256 bit key tests */
+ XMEMSET(buf, 0, sizeof(buf));
+ if (wc_AesXtsSetKey(&aes, k2, sizeof(k2), AES_ENCRYPTION,
+ HEAP_HINT, devId) != 0)
+ return -5406;
+ ret = wc_AesXtsEncryptSector(&aes, buf, p2, sizeof(p2), s2);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5407;
+ if (XMEMCMP(c2, buf, sizeof(c2)))
+ return -5408;
+
+ /* decrypt test */
+ XMEMSET(buf, 0, sizeof(buf));
+ if (wc_AesXtsSetKey(&aes, k2, sizeof(k2), AES_DECRYPTION,
+ HEAP_HINT, devId) != 0)
+ return -5409;
+ ret = wc_AesXtsDecryptSector(&aes, buf, c2, sizeof(c2), s2);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5410;
+ if (XMEMCMP(p2, buf, sizeof(p2)))
+ return -5411;
+ wc_AesXtsFree(&aes);
+
+ return ret;
+}
+#endif /* WOLFSSL_AES_128 && WOLFSSL_AES_256 */
+
+
+#ifdef WOLFSSL_AES_128
+/* testing of bad arguments */
+static int aes_xts_args_test(void)
+{
+ XtsAes aes;
+ int ret = 0;
+ unsigned char buf[AES_BLOCK_SIZE * 2];
+
+ /* 128 key tests */
+ static unsigned char k1[] = {
+ 0xa3, 0xe4, 0x0d, 0x5b, 0xd4, 0xb6, 0xbb, 0xed,
+ 0xb2, 0xd1, 0x8c, 0x70, 0x0a, 0xd2, 0xdb, 0x22,
+ 0x10, 0xc8, 0x11, 0x90, 0x64, 0x6d, 0x67, 0x3c,
+ 0xbc, 0xa5, 0x3f, 0x13, 0x3e, 0xab, 0x37, 0x3c
+ };
+
+ static unsigned char p1[] = {
+ 0x20, 0xe0, 0x71, 0x94, 0x05, 0x99, 0x3f, 0x09,
+ 0xa6, 0x6a, 0xe5, 0xbb, 0x50, 0x0e, 0x56, 0x2c
+ };
+
+ static unsigned char c1[] = {
+ 0x74, 0x62, 0x35, 0x51, 0x21, 0x02, 0x16, 0xac,
+ 0x92, 0x6b, 0x96, 0x50, 0xb6, 0xd3, 0xfa, 0x52
+ };
+ word64 s1 = 141;
+
+ if (wc_AesXtsSetKey(NULL, k1, sizeof(k1), AES_ENCRYPTION,
+ HEAP_HINT, devId) == 0)
+ return -5500;
+ if (wc_AesXtsSetKey(&aes, NULL, sizeof(k1), AES_ENCRYPTION,
+ HEAP_HINT, devId) == 0)
+ return -5501;
+
+ /* encryption operations */
+ if (wc_AesXtsSetKey(&aes, k1, sizeof(k1), AES_ENCRYPTION,
+ HEAP_HINT, devId) != 0)
+ return -5502;
+ ret = wc_AesXtsEncryptSector(NULL, buf, p1, sizeof(p1), s1);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret == 0)
+ return -5503;
+
+ ret = wc_AesXtsEncryptSector(&aes, NULL, p1, sizeof(p1), s1);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret == 0)
+ return -5504;
+ wc_AesXtsFree(&aes);
+
+ /* decryption operations */
+ if (wc_AesXtsSetKey(&aes, k1, sizeof(k1), AES_DECRYPTION,
+ HEAP_HINT, devId) != 0)
+ return -5505;
+ ret = wc_AesXtsDecryptSector(NULL, buf, c1, sizeof(c1), s1);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret == 0)
+ return -5506;
+
+ ret = wc_AesXtsDecryptSector(&aes, NULL, c1, sizeof(c1), s1);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &aes.aes.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret == 0)
+ return -5507;
+ wc_AesXtsFree(&aes);
+
+ return 0;
+}
+#endif /* WOLFSSL_AES_128 */
+#endif /* WOLFSSL_AES_XTS */
+
+#if defined(HAVE_AES_CBC) && defined(WOLFSSL_AES_128)
+static int aes_cbc_test(void)
+{
+ byte cipher[AES_BLOCK_SIZE];
+ byte plain[AES_BLOCK_SIZE];
+ int ret;
+ const byte msg[] = { /* "Now is the time for all " w/o trailing 0 */
+ 0x6e,0x6f,0x77,0x20,0x69,0x73,0x20,0x74,
+ 0x68,0x65,0x20,0x74,0x69,0x6d,0x65,0x20,
+ 0x66,0x6f,0x72,0x20,0x61,0x6c,0x6c,0x20
+ };
+ byte key[] = "0123456789abcdef "; /* align */
+ byte iv[] = "1234567890abcdef "; /* align */
+
+ XMEMSET(cipher, 0, AES_BLOCK_SIZE);
+ XMEMSET(plain, 0, AES_BLOCK_SIZE);
+
+ /* Parameter Validation testing. */
+ ret = wc_AesCbcEncryptWithKey(cipher, msg, AES_BLOCK_SIZE, key, 17, NULL);
+ if (ret != BAD_FUNC_ARG)
+ return -5600;
+#ifdef HAVE_AES_DECRYPT
+ ret = wc_AesCbcDecryptWithKey(plain, cipher, AES_BLOCK_SIZE, key, 17, NULL);
+ if (ret != BAD_FUNC_ARG)
+ return -5601;
+#endif
+
+ ret = wc_AesCbcEncryptWithKey(cipher, msg, AES_BLOCK_SIZE, key,
+ AES_BLOCK_SIZE, iv);
+ if (ret != 0)
+ return -5602;
+#ifdef HAVE_AES_DECRYPT
+ ret = wc_AesCbcDecryptWithKey(plain, cipher, AES_BLOCK_SIZE, key,
+ AES_BLOCK_SIZE, iv);
+ if (ret != 0)
+ return -5603;
+ if (XMEMCMP(plain, msg, AES_BLOCK_SIZE) != 0)
+ return -5604;
+#endif /* HAVE_AES_DECRYPT */
+
+ (void)plain;
+ return 0;
+}
+#endif
+
int aes_test(void)
{
+#if defined(HAVE_AES_CBC) || defined(WOLFSSL_AES_COUNTER)
Aes enc;
+ byte cipher[AES_BLOCK_SIZE * 4];
+#if defined(HAVE_AES_DECRYPT) || defined(WOLFSSL_AES_COUNTER)
Aes dec;
+ byte plain [AES_BLOCK_SIZE * 4];
+#endif
+#endif /* HAVE_AES_CBC || WOLFSSL_AES_COUNTER */
+ int ret = 0;
+#ifdef HAVE_AES_CBC
+#ifdef WOLFSSL_AES_128
const byte msg[] = { /* "Now is the time for all " w/o trailing 0 */
0x6e,0x6f,0x77,0x20,0x69,0x73,0x20,0x74,
0x68,0x65,0x20,0x74,0x69,0x6d,0x65,0x20,
@@ -2499,55 +7523,235 @@ int aes_test(void)
byte key[] = "0123456789abcdef "; /* align */
byte iv[] = "1234567890abcdef "; /* align */
- byte cipher[AES_BLOCK_SIZE * 4];
- byte plain [AES_BLOCK_SIZE * 4];
- int ret;
-
-#ifdef HAVE_CAVIUM
- if (wc_AesInitCavium(&enc, CAVIUM_DEV_ID) != 0)
- return -20003;
- if (wc_AesInitCavium(&dec, CAVIUM_DEV_ID) != 0)
- return -20004;
+ if (wc_AesInit(&enc, HEAP_HINT, devId) != 0)
+ return -5700;
+#if defined(HAVE_AES_DECRYPT) || defined(WOLFSSL_AES_COUNTER)
+ if (wc_AesInit(&dec, HEAP_HINT, devId) != 0)
+ return -5701;
#endif
ret = wc_AesSetKey(&enc, key, AES_BLOCK_SIZE, iv, AES_ENCRYPTION);
if (ret != 0)
- return -1001;
+ return -5702;
+#if defined(HAVE_AES_DECRYPT) || defined(WOLFSSL_AES_COUNTER)
ret = wc_AesSetKey(&dec, key, AES_BLOCK_SIZE, iv, AES_DECRYPTION);
if (ret != 0)
- return -1002;
+ return -5703;
+#endif
- ret = wc_AesCbcEncrypt(&enc, cipher, msg, AES_BLOCK_SIZE);
+ XMEMSET(cipher, 0, AES_BLOCK_SIZE * 4);
+ ret = wc_AesCbcEncrypt(&enc, cipher, msg, AES_BLOCK_SIZE);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
if (ret != 0)
- return -1005;
+ return -5704;
+#ifdef HAVE_AES_DECRYPT
+ XMEMSET(plain, 0, AES_BLOCK_SIZE * 4);
ret = wc_AesCbcDecrypt(&dec, plain, cipher, AES_BLOCK_SIZE);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
if (ret != 0)
- return -1006;
+ return -5705;
- if (memcmp(plain, msg, AES_BLOCK_SIZE))
- return -60;
+ if (XMEMCMP(plain, msg, AES_BLOCK_SIZE))
+ return -5706;
+#endif /* HAVE_AES_DECRYPT */
+ if (XMEMCMP(cipher, verify, AES_BLOCK_SIZE))
+ return -5707;
+#endif /* WOLFSSL_AES_128 */
- if (memcmp(cipher, verify, AES_BLOCK_SIZE))
- return -61;
+#if defined(WOLFSSL_AESNI) && defined(HAVE_AES_DECRYPT)
+ {
+ const byte bigMsg[] = {
+ /* "All work and no play makes Jack a dull boy. " */
+ 0x41,0x6c,0x6c,0x20,0x77,0x6f,0x72,0x6b,
+ 0x20,0x61,0x6e,0x64,0x20,0x6e,0x6f,0x20,
+ 0x70,0x6c,0x61,0x79,0x20,0x6d,0x61,0x6b,
+ 0x65,0x73,0x20,0x4a,0x61,0x63,0x6b,0x20,
+ 0x61,0x20,0x64,0x75,0x6c,0x6c,0x20,0x62,
+ 0x6f,0x79,0x2e,0x20,0x41,0x6c,0x6c,0x20,
+ 0x77,0x6f,0x72,0x6b,0x20,0x61,0x6e,0x64,
+ 0x20,0x6e,0x6f,0x20,0x70,0x6c,0x61,0x79,
+ 0x20,0x6d,0x61,0x6b,0x65,0x73,0x20,0x4a,
+ 0x61,0x63,0x6b,0x20,0x61,0x20,0x64,0x75,
+ 0x6c,0x6c,0x20,0x62,0x6f,0x79,0x2e,0x20,
+ 0x41,0x6c,0x6c,0x20,0x77,0x6f,0x72,0x6b,
+ 0x20,0x61,0x6e,0x64,0x20,0x6e,0x6f,0x20,
+ 0x70,0x6c,0x61,0x79,0x20,0x6d,0x61,0x6b,
+ 0x65,0x73,0x20,0x4a,0x61,0x63,0x6b,0x20,
+ 0x61,0x20,0x64,0x75,0x6c,0x6c,0x20,0x62,
+ 0x6f,0x79,0x2e,0x20,0x41,0x6c,0x6c,0x20,
+ 0x77,0x6f,0x72,0x6b,0x20,0x61,0x6e,0x64,
+ 0x20,0x6e,0x6f,0x20,0x70,0x6c,0x61,0x79,
+ 0x20,0x6d,0x61,0x6b,0x65,0x73,0x20,0x4a,
+ 0x61,0x63,0x6b,0x20,0x61,0x20,0x64,0x75,
+ 0x6c,0x6c,0x20,0x62,0x6f,0x79,0x2e,0x20,
+ 0x41,0x6c,0x6c,0x20,0x77,0x6f,0x72,0x6b,
+ 0x20,0x61,0x6e,0x64,0x20,0x6e,0x6f,0x20,
+ 0x70,0x6c,0x61,0x79,0x20,0x6d,0x61,0x6b,
+ 0x65,0x73,0x20,0x4a,0x61,0x63,0x6b,0x20,
+ 0x61,0x20,0x64,0x75,0x6c,0x6c,0x20,0x62,
+ 0x6f,0x79,0x2e,0x20,0x41,0x6c,0x6c,0x20,
+ 0x77,0x6f,0x72,0x6b,0x20,0x61,0x6e,0x64,
+ 0x20,0x6e,0x6f,0x20,0x70,0x6c,0x61,0x79,
+ 0x20,0x6d,0x61,0x6b,0x65,0x73,0x20,0x4a,
+ 0x61,0x63,0x6b,0x20,0x61,0x20,0x64,0x75,
+ 0x6c,0x6c,0x20,0x62,0x6f,0x79,0x2e,0x20,
+ 0x41,0x6c,0x6c,0x20,0x77,0x6f,0x72,0x6b,
+ 0x20,0x61,0x6e,0x64,0x20,0x6e,0x6f,0x20,
+ 0x70,0x6c,0x61,0x79,0x20,0x6d,0x61,0x6b,
+ 0x65,0x73,0x20,0x4a,0x61,0x63,0x6b,0x20,
+ 0x61,0x20,0x64,0x75,0x6c,0x6c,0x20,0x62,
+ 0x6f,0x79,0x2e,0x20,0x41,0x6c,0x6c,0x20,
+ 0x77,0x6f,0x72,0x6b,0x20,0x61,0x6e,0x64,
+ 0x20,0x6e,0x6f,0x20,0x70,0x6c,0x61,0x79,
+ 0x20,0x6d,0x61,0x6b,0x65,0x73,0x20,0x4a,
+ 0x61,0x63,0x6b,0x20,0x61,0x20,0x64,0x75,
+ 0x6c,0x6c,0x20,0x62,0x6f,0x79,0x2e,0x20,
+ 0x41,0x6c,0x6c,0x20,0x77,0x6f,0x72,0x6b,
+ 0x20,0x61,0x6e,0x64,0x20,0x6e,0x6f,0x20,
+ 0x70,0x6c,0x61,0x79,0x20,0x6d,0x61,0x6b,
+ 0x65,0x73,0x20,0x4a,0x61,0x63,0x6b,0x20
+ };
+ const byte bigKey[] = "0123456789abcdeffedcba9876543210";
+ byte bigCipher[sizeof(bigMsg)];
+ byte bigPlain[sizeof(bigMsg)];
+ word32 keySz, msgSz;
+
+ /* Iterate from one AES_BLOCK_SIZE of bigMsg through the whole
+ * message by AES_BLOCK_SIZE for each size of AES key. */
+ for (keySz = 16; keySz <= 32; keySz += 8) {
+ for (msgSz = AES_BLOCK_SIZE;
+ msgSz <= sizeof(bigMsg);
+ msgSz += AES_BLOCK_SIZE) {
+
+ XMEMSET(bigCipher, 0, sizeof(bigCipher));
+ XMEMSET(bigPlain, 0, sizeof(bigPlain));
+ ret = wc_AesSetKey(&enc, bigKey, keySz, iv, AES_ENCRYPTION);
+ if (ret != 0)
+ return -5708;
+ ret = wc_AesSetKey(&dec, bigKey, keySz, iv, AES_DECRYPTION);
+ if (ret != 0)
+ return -5709;
+
+ ret = wc_AesCbcEncrypt(&enc, bigCipher, bigMsg, msgSz);
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+ #endif
+ if (ret != 0)
+ return -5710;
-#ifdef HAVE_CAVIUM
- wc_AesFreeCavium(&enc);
- wc_AesFreeCavium(&dec);
-#endif
-#ifdef WOLFSSL_AES_COUNTER
+ ret = wc_AesCbcDecrypt(&dec, bigPlain, bigCipher, msgSz);
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+ #endif
+ if (ret != 0)
+ return -5711;
+
+ if (XMEMCMP(bigPlain, bigMsg, msgSz))
+ return -5712;
+ }
+ }
+ }
+#endif /* WOLFSSL_AESNI && HAVE_AES_DECRYPT */
+
+ /* Test of AES IV state with encrypt/decrypt */
+#ifdef WOLFSSL_AES_128
{
- const byte ctrKey[] =
+ /* Test Vector from "NIST Special Publication 800-38A, 2001 Edition"
+ * https://nvlpubs.nist.gov/nistpubs/legacy/sp/nistspecialpublication800-38a.pdf
+ */
+ const byte msg2[] =
{
- 0x2b,0x7e,0x15,0x16,0x28,0xae,0xd2,0xa6,
- 0xab,0xf7,0x15,0x88,0x09,0xcf,0x4f,0x3c
+ 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96,
+ 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a,
+ 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c,
+ 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51
+ };
+
+ const byte verify2[] =
+ {
+ 0x76, 0x49, 0xab, 0xac, 0x81, 0x19, 0xb2, 0x46,
+ 0xce, 0xe9, 0x8e, 0x9b, 0x12, 0xe9, 0x19, 0x7d,
+ 0x50, 0x86, 0xcb, 0x9b, 0x50, 0x72, 0x19, 0xee,
+ 0x95, 0xdb, 0x11, 0x3a, 0x91, 0x76, 0x78, 0xb2
+ };
+ byte key2[] = {
+ 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6,
+ 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c
+ };
+ byte iv2[] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
};
+
+ ret = wc_AesSetKey(&enc, key2, sizeof(key2), iv2, AES_ENCRYPTION);
+ if (ret != 0)
+ return -5713;
+ XMEMSET(cipher, 0, AES_BLOCK_SIZE * 2);
+ ret = wc_AesCbcEncrypt(&enc, cipher, msg2, AES_BLOCK_SIZE);
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+ #endif
+ if (ret != 0)
+ return -5714;
+ if (XMEMCMP(cipher, verify2, AES_BLOCK_SIZE))
+ return -5715;
+
+ ret = wc_AesCbcEncrypt(&enc, cipher + AES_BLOCK_SIZE,
+ msg2 + AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+ #endif
+ if (ret != 0)
+ return -5716;
+ if (XMEMCMP(cipher + AES_BLOCK_SIZE, verify2 + AES_BLOCK_SIZE,
+ AES_BLOCK_SIZE))
+ return -5717;
+
+ #if defined(HAVE_AES_DECRYPT)
+ ret = wc_AesSetKey(&dec, key2, sizeof(key2), iv2, AES_DECRYPTION);
+ if (ret != 0)
+ return -5718;
+ XMEMSET(plain, 0, AES_BLOCK_SIZE * 2);
+ ret = wc_AesCbcDecrypt(&dec, plain, verify2, AES_BLOCK_SIZE);
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+ #endif
+ if (ret != 0)
+ return -5719;
+ if (XMEMCMP(plain, msg2, AES_BLOCK_SIZE))
+ return -5720;
+
+ ret = wc_AesCbcDecrypt(&dec, plain + AES_BLOCK_SIZE,
+ verify2 + AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+ #endif
+ if (ret != 0)
+ return -5721;
+ if (XMEMCMP(plain + AES_BLOCK_SIZE, msg2 + AES_BLOCK_SIZE,
+ AES_BLOCK_SIZE))
+ return -5722;
+
+ #endif /* HAVE_AES_DECRYPT */
+ }
+#endif /* WOLFSSL_AES_128 */
+#endif /* HAVE_AES_CBC */
+
+#ifdef WOLFSSL_AES_COUNTER
+ {
+ /* test vectors from "Recommendation for Block Cipher Modes of
+ * Operation" NIST Special Publication 800-38A */
+
const byte ctrIv[] =
{
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
};
-
const byte ctrPlain[] =
{
0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
@@ -2560,7 +7764,20 @@ int aes_test(void)
0xad,0x2b,0x41,0x7b,0xe6,0x6c,0x37,0x10
};
- const byte ctrCipher[] =
+#ifdef WOLFSSL_AES_128
+ const byte oddCipher[] =
+ {
+ 0xb9,0xd7,0xcb,0x08,0xb0,0xe1,0x7b,0xa0,
+ 0xc2
+ };
+
+ const byte ctr128Key[] =
+ {
+ 0x2b,0x7e,0x15,0x16,0x28,0xae,0xd2,0xa6,
+ 0xab,0xf7,0x15,0x88,0x09,0xcf,0x4f,0x3c
+ };
+
+ const byte ctr128Cipher[] =
{
0x87,0x4d,0x61,0x91,0xb6,0x20,0xe3,0x26,
0x1b,0xef,0x68,0x64,0x99,0x0d,0xb6,0xce,
@@ -2571,53 +7788,163 @@ int aes_test(void)
0x1e,0x03,0x1d,0xda,0x2f,0xbe,0x03,0xd1,
0x79,0x21,0x70,0xa0,0xf3,0x00,0x9c,0xee
};
+#endif /* WOLFSSL_AES_128 */
- const byte oddCipher[] =
+#ifdef WOLFSSL_AES_192
+ const byte ctr192Key[] =
{
- 0xb9,0xd7,0xcb,0x08,0xb0,0xe1,0x7b,0xa0,
- 0xc2
+ 0x8e,0x73,0xb0,0xf7,0xda,0x0e,0x64,0x52,
+ 0xc8,0x10,0xf3,0x2b,0x80,0x90,0x79,0xe5,
+ 0x62,0xf8,0xea,0xd2,0x52,0x2c,0x6b,0x7b
};
- wc_AesSetKeyDirect(&enc, ctrKey, AES_BLOCK_SIZE, ctrIv, AES_ENCRYPTION);
- /* Ctr only uses encrypt, even on key setup */
- wc_AesSetKeyDirect(&dec, ctrKey, AES_BLOCK_SIZE, ctrIv, AES_ENCRYPTION);
+ const byte ctr192Cipher[] =
+ {
+ 0x1a,0xbc,0x93,0x24,0x17,0x52,0x1c,0xa2,
+ 0x4f,0x2b,0x04,0x59,0xfe,0x7e,0x6e,0x0b,
+ 0x09,0x03,0x39,0xec,0x0a,0xa6,0xfa,0xef,
+ 0xd5,0xcc,0xc2,0xc6,0xf4,0xce,0x8e,0x94,
+ 0x1e,0x36,0xb2,0x6b,0xd1,0xeb,0xc6,0x70,
+ 0xd1,0xbd,0x1d,0x66,0x56,0x20,0xab,0xf7,
+ 0x4f,0x78,0xa7,0xf6,0xd2,0x98,0x09,0x58,
+ 0x5a,0x97,0xda,0xec,0x58,0xc6,0xb0,0x50
+ };
+#endif
+#ifdef WOLFSSL_AES_256
+ const byte ctr256Key[] =
+ {
+ 0x60,0x3d,0xeb,0x10,0x15,0xca,0x71,0xbe,
+ 0x2b,0x73,0xae,0xf0,0x85,0x7d,0x77,0x81,
+ 0x1f,0x35,0x2c,0x07,0x3b,0x61,0x08,0xd7,
+ 0x2d,0x98,0x10,0xa3,0x09,0x14,0xdf,0xf4
+ };
- wc_AesCtrEncrypt(&enc, cipher, ctrPlain, AES_BLOCK_SIZE*4);
- wc_AesCtrEncrypt(&dec, plain, cipher, AES_BLOCK_SIZE*4);
+ const byte ctr256Cipher[] =
+ {
+ 0x60,0x1e,0xc3,0x13,0x77,0x57,0x89,0xa5,
+ 0xb7,0xa7,0xf5,0x04,0xbb,0xf3,0xd2,0x28,
+ 0xf4,0x43,0xe3,0xca,0x4d,0x62,0xb5,0x9a,
+ 0xca,0x84,0xe9,0x90,0xca,0xca,0xf5,0xc5,
+ 0x2b,0x09,0x30,0xda,0xa2,0x3d,0xe9,0x4c,
+ 0xe8,0x70,0x17,0xba,0x2d,0x84,0x98,0x8d,
+ 0xdf,0xc9,0xc5,0x8d,0xb6,0x7a,0xad,0xa6,
+ 0x13,0xc2,0xdd,0x08,0x45,0x79,0x41,0xa6
+ };
+#endif
- if (memcmp(plain, ctrPlain, AES_BLOCK_SIZE*4))
- return -66;
+#ifdef WOLFSSL_AES_128
+ wc_AesSetKeyDirect(&enc, ctr128Key, sizeof(ctr128Key),
+ ctrIv, AES_ENCRYPTION);
+ /* Ctr only uses encrypt, even on key setup */
+ wc_AesSetKeyDirect(&dec, ctr128Key, sizeof(ctr128Key),
+ ctrIv, AES_ENCRYPTION);
+
+ ret = wc_AesCtrEncrypt(&enc, cipher, ctrPlain, sizeof(ctrPlain));
+ if (ret != 0) {
+ return -5723;
+ }
+ ret = wc_AesCtrEncrypt(&dec, plain, cipher, sizeof(ctrPlain));
+ if (ret != 0) {
+ return -5724;
+ }
+ if (XMEMCMP(plain, ctrPlain, sizeof(ctrPlain)))
+ return -5725;
- if (memcmp(cipher, ctrCipher, AES_BLOCK_SIZE*4))
- return -67;
+ if (XMEMCMP(cipher, ctr128Cipher, sizeof(ctr128Cipher)))
+ return -5726;
/* let's try with just 9 bytes, non block size test */
- wc_AesSetKeyDirect(&enc, ctrKey, AES_BLOCK_SIZE, ctrIv, AES_ENCRYPTION);
+ wc_AesSetKeyDirect(&enc, ctr128Key, AES_BLOCK_SIZE,
+ ctrIv, AES_ENCRYPTION);
/* Ctr only uses encrypt, even on key setup */
- wc_AesSetKeyDirect(&dec, ctrKey, AES_BLOCK_SIZE, ctrIv, AES_ENCRYPTION);
+ wc_AesSetKeyDirect(&dec, ctr128Key, AES_BLOCK_SIZE,
+ ctrIv, AES_ENCRYPTION);
- wc_AesCtrEncrypt(&enc, cipher, ctrPlain, 9);
- wc_AesCtrEncrypt(&dec, plain, cipher, 9);
+ ret = wc_AesCtrEncrypt(&enc, cipher, ctrPlain, sizeof(oddCipher));
+ if (ret != 0) {
+ return -5727;
+ }
+ ret = wc_AesCtrEncrypt(&dec, plain, cipher, sizeof(oddCipher));
+ if (ret != 0) {
+ return -5728;
+ }
- if (memcmp(plain, ctrPlain, 9))
- return -68;
+ if (XMEMCMP(plain, ctrPlain, sizeof(oddCipher)))
+ return -5729;
- if (memcmp(cipher, ctrCipher, 9))
- return -69;
+ if (XMEMCMP(cipher, ctr128Cipher, sizeof(oddCipher)))
+ return -5730;
/* and an additional 9 bytes to reuse tmp left buffer */
- wc_AesCtrEncrypt(&enc, cipher, ctrPlain, 9);
- wc_AesCtrEncrypt(&dec, plain, cipher, 9);
+ ret = wc_AesCtrEncrypt(&enc, cipher, ctrPlain, sizeof(oddCipher));
+ if (ret != 0) {
+ return -5731;
+ }
+ ret = wc_AesCtrEncrypt(&dec, plain, cipher, sizeof(oddCipher));
+ if (ret != 0) {
+ return -5732;
+ }
- if (memcmp(plain, ctrPlain, 9))
- return -70;
+ if (XMEMCMP(plain, ctrPlain, sizeof(oddCipher)))
+ return -5733;
+
+ if (XMEMCMP(cipher, oddCipher, sizeof(oddCipher)))
+ return -5734;
+#endif /* WOLFSSL_AES_128 */
+
+#ifdef WOLFSSL_AES_192
+ /* 192 bit key */
+ wc_AesSetKeyDirect(&enc, ctr192Key, sizeof(ctr192Key),
+ ctrIv, AES_ENCRYPTION);
+ /* Ctr only uses encrypt, even on key setup */
+ wc_AesSetKeyDirect(&dec, ctr192Key, sizeof(ctr192Key),
+ ctrIv, AES_ENCRYPTION);
- if (memcmp(cipher, oddCipher, 9))
- return -71;
+ XMEMSET(plain, 0, sizeof(plain));
+ ret = wc_AesCtrEncrypt(&enc, plain, ctr192Cipher, sizeof(ctr192Cipher));
+ if (ret != 0) {
+ return -5735;
+ }
+
+ if (XMEMCMP(plain, ctrPlain, sizeof(ctr192Cipher)))
+ return -5736;
+
+ ret = wc_AesCtrEncrypt(&dec, cipher, ctrPlain, sizeof(ctrPlain));
+ if (ret != 0) {
+ return -5737;
+ }
+ if (XMEMCMP(ctr192Cipher, cipher, sizeof(ctr192Cipher)))
+ return -5738;
+#endif /* WOLFSSL_AES_192 */
+
+#ifdef WOLFSSL_AES_256
+ /* 256 bit key */
+ wc_AesSetKeyDirect(&enc, ctr256Key, sizeof(ctr256Key),
+ ctrIv, AES_ENCRYPTION);
+ /* Ctr only uses encrypt, even on key setup */
+ wc_AesSetKeyDirect(&dec, ctr256Key, sizeof(ctr256Key),
+ ctrIv, AES_ENCRYPTION);
+
+ XMEMSET(plain, 0, sizeof(plain));
+ ret = wc_AesCtrEncrypt(&enc, plain, ctr256Cipher, sizeof(ctr256Cipher));
+ if (ret != 0) {
+ return -5739;
+ }
+
+ if (XMEMCMP(plain, ctrPlain, sizeof(ctrPlain)))
+ return -5740;
+
+ ret = wc_AesCtrEncrypt(&dec, cipher, ctrPlain, sizeof(ctrPlain));
+ if (ret != 0) {
+ return -5741;
+ }
+ if (XMEMCMP(ctr256Cipher, cipher, sizeof(ctr256Cipher)))
+ return -5742;
+#endif /* WOLFSSL_AES_256 */
}
#endif /* WOLFSSL_AES_COUNTER */
-#if defined(WOLFSSL_AESNI) && defined(WOLFSSL_AES_DIRECT)
+#if defined(WOLFSSL_AES_DIRECT) && defined(WOLFSSL_AES_256)
{
const byte niPlain[] =
{
@@ -2642,48 +7969,444 @@ int aes_test(void)
XMEMSET(cipher, 0, AES_BLOCK_SIZE);
ret = wc_AesSetKey(&enc, niKey, sizeof(niKey), cipher, AES_ENCRYPTION);
if (ret != 0)
- return -1003;
+ return -5743;
wc_AesEncryptDirect(&enc, cipher, niPlain);
if (XMEMCMP(cipher, niCipher, AES_BLOCK_SIZE) != 0)
- return -20006;
+ return -5744;
XMEMSET(plain, 0, AES_BLOCK_SIZE);
ret = wc_AesSetKey(&dec, niKey, sizeof(niKey), plain, AES_DECRYPTION);
if (ret != 0)
- return -1004;
+ return -5745;
wc_AesDecryptDirect(&dec, plain, niCipher);
if (XMEMCMP(plain, niPlain, AES_BLOCK_SIZE) != 0)
- return -20007;
+ return -5746;
}
-#endif /* WOLFSSL_AESNI && WOLFSSL_AES_DIRECT */
+#endif /* WOLFSSL_AES_DIRECT && WOLFSSL_AES_256 */
- return 0;
+ ret = aes_key_size_test();
+ if (ret != 0)
+ return ret;
+
+#if defined(HAVE_AES_CBC) && defined(WOLFSSL_AES_128)
+ ret = aes_cbc_test();
+ if (ret != 0)
+ return ret;
+#endif
+
+#if defined(WOLFSSL_AES_XTS)
+ #ifdef WOLFSSL_AES_128
+ ret = aes_xts_128_test();
+ if (ret != 0)
+ return ret;
+ #endif
+ #ifdef WOLFSSL_AES_256
+ ret = aes_xts_256_test();
+ if (ret != 0)
+ return ret;
+ #endif
+ #if defined(WOLFSSL_AES_128) && defined(WOLFSSL_AES_256)
+ ret = aes_xts_sector_test();
+ if (ret != 0)
+ return ret;
+ #endif
+ #ifdef WOLFSSL_AES_128
+ ret = aes_xts_args_test();
+ if (ret != 0)
+ return ret;
+ #endif
+#endif
+
+#if defined(WOLFSSL_AES_CFB)
+ ret = aescfb_test();
+ if (ret != 0)
+ return ret;
+#if !defined(HAVE_SELFTEST) && !defined(HAVE_FIPS)
+ ret = aescfb1_test();
+ if (ret != 0)
+ return ret;
+
+ ret = aescfb8_test();
+ if (ret != 0)
+ return ret;
+#endif
+#endif
+
+
+#if defined(HAVE_AES_CBC) || defined(WOLFSSL_AES_COUNTER)
+ wc_AesFree(&enc);
+ (void)cipher;
+#if defined(HAVE_AES_DECRYPT) || defined(WOLFSSL_AES_COUNTER)
+ wc_AesFree(&dec);
+ (void)plain;
+#endif
+#endif
+
+ return ret;
+}
+
+#ifdef WOLFSSL_AES_192
+int aes192_test(void)
+{
+#ifdef HAVE_AES_CBC
+ Aes enc;
+ byte cipher[AES_BLOCK_SIZE];
+#ifdef HAVE_AES_DECRYPT
+ Aes dec;
+ byte plain[AES_BLOCK_SIZE];
+#endif
+#endif /* HAVE_AES_CBC */
+ int ret = 0;
+
+#ifdef HAVE_AES_CBC
+ /* Test vectors from NIST Special Publication 800-38A, 2001 Edition
+ * Appendix F.2.3 */
+
+ const byte msg[] = {
+ 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
+ 0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a
+ };
+
+ const byte verify[] =
+ {
+ 0x4f,0x02,0x1d,0xb2,0x43,0xbc,0x63,0x3d,
+ 0x71,0x78,0x18,0x3a,0x9f,0xa0,0x71,0xe8
+ };
+
+ byte key[] = {
+ 0x8e,0x73,0xb0,0xf7,0xda,0x0e,0x64,0x52,
+ 0xc8,0x10,0xf3,0x2b,0x80,0x90,0x79,0xe5,
+ 0x62,0xf8,0xea,0xd2,0x52,0x2c,0x6b,0x7b
+ };
+ byte iv[] = {
+ 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
+ 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F
+ };
+
+
+ if (wc_AesInit(&enc, HEAP_HINT, devId) != 0)
+ return -5800;
+#ifdef HAVE_AES_DECRYPT
+ if (wc_AesInit(&dec, HEAP_HINT, devId) != 0)
+ return -5801;
+#endif
+
+ ret = wc_AesSetKey(&enc, key, (int) sizeof(key), iv, AES_ENCRYPTION);
+ if (ret != 0)
+ return -5802;
+#ifdef HAVE_AES_DECRYPT
+ ret = wc_AesSetKey(&dec, key, (int) sizeof(key), iv, AES_DECRYPTION);
+ if (ret != 0)
+ return -5803;
+#endif
+
+ XMEMSET(cipher, 0, AES_BLOCK_SIZE);
+ ret = wc_AesCbcEncrypt(&enc, cipher, msg, (int) sizeof(msg));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5804;
+#ifdef HAVE_AES_DECRYPT
+ XMEMSET(plain, 0, AES_BLOCK_SIZE);
+ ret = wc_AesCbcDecrypt(&dec, plain, cipher, (int) sizeof(cipher));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5805;
+ if (XMEMCMP(plain, msg, (int) sizeof(plain))) {
+ return -5806;
+ }
+#endif
+
+ if (XMEMCMP(cipher, verify, (int) sizeof(cipher)))
+ return -5807;
+
+ wc_AesFree(&enc);
+#ifdef HAVE_AES_DECRYPT
+ wc_AesFree(&dec);
+#endif
+
+#endif /* HAVE_AES_CBC */
+
+ return ret;
+}
+#endif /* WOLFSSL_AES_192 */
+
+#ifdef WOLFSSL_AES_256
+int aes256_test(void)
+{
+#ifdef HAVE_AES_CBC
+ Aes enc;
+ byte cipher[AES_BLOCK_SIZE];
+#ifdef HAVE_AES_DECRYPT
+ Aes dec;
+ byte plain[AES_BLOCK_SIZE];
+#endif
+#endif /* HAVE_AES_CBC */
+ int ret = 0;
+
+#ifdef HAVE_AES_CBC
+ /* Test vectors from NIST Special Publication 800-38A, 2001 Edition,
+ * Appendix F.2.5 */
+ const byte msg[] = {
+ 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
+ 0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a
+ };
+
+ const byte verify[] =
+ {
+ 0xf5,0x8c,0x4c,0x04,0xd6,0xe5,0xf1,0xba,
+ 0x77,0x9e,0xab,0xfb,0x5f,0x7b,0xfb,0xd6
+ };
+
+ byte key[] = {
+ 0x60,0x3d,0xeb,0x10,0x15,0xca,0x71,0xbe,
+ 0x2b,0x73,0xae,0xf0,0x85,0x7d,0x77,0x81,
+ 0x1f,0x35,0x2c,0x07,0x3b,0x61,0x08,0xd7,
+ 0x2d,0x98,0x10,0xa3,0x09,0x14,0xdf,0xf4
+ };
+ byte iv[] = {
+ 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
+ 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F
+ };
+
+
+ if (wc_AesInit(&enc, HEAP_HINT, devId) != 0)
+ return -5900;
+#ifdef HAVE_AES_DECRYPT
+ if (wc_AesInit(&dec, HEAP_HINT, devId) != 0)
+ return -5901;
+#endif
+
+ ret = wc_AesSetKey(&enc, key, (int) sizeof(key), iv, AES_ENCRYPTION);
+ if (ret != 0)
+ return -5902;
+#ifdef HAVE_AES_DECRYPT
+ ret = wc_AesSetKey(&dec, key, (int) sizeof(key), iv, AES_DECRYPTION);
+ if (ret != 0)
+ return -5903;
+#endif
+
+ XMEMSET(cipher, 0, AES_BLOCK_SIZE);
+ ret = wc_AesCbcEncrypt(&enc, cipher, msg, (int) sizeof(msg));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5904;
+#ifdef HAVE_AES_DECRYPT
+ XMEMSET(plain, 0, AES_BLOCK_SIZE);
+ ret = wc_AesCbcDecrypt(&dec, plain, cipher, (int) sizeof(cipher));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ return -5905;
+ if (XMEMCMP(plain, msg, (int) sizeof(plain))) {
+ return -5906;
+ }
+#endif
+
+ if (XMEMCMP(cipher, verify, (int) sizeof(cipher)))
+ return -5907;
+
+ wc_AesFree(&enc);
+#ifdef HAVE_AES_DECRYPT
+ wc_AesFree(&dec);
+#endif
+
+#endif /* HAVE_AES_CBC */
+
+ return ret;
}
+#endif /* WOLFSSL_AES_256 */
+
#ifdef HAVE_AESGCM
+
+static int aesgcm_default_test_helper(byte* key, int keySz, byte* iv, int ivSz,
+ byte* plain, int plainSz, byte* cipher, int cipherSz,
+ byte* aad, int aadSz, byte* tag, int tagSz)
+{
+ Aes enc;
+ Aes dec;
+
+ byte resultT[AES_BLOCK_SIZE];
+ byte resultP[AES_BLOCK_SIZE * 3];
+ byte resultC[AES_BLOCK_SIZE * 3];
+ int result;
+
+ XMEMSET(resultT, 0, sizeof(resultT));
+ XMEMSET(resultC, 0, sizeof(resultC));
+ XMEMSET(resultP, 0, sizeof(resultP));
+
+ if (wc_AesInit(&enc, HEAP_HINT, devId) != 0) {
+ return -5908;
+ }
+ if (wc_AesInit(&dec, HEAP_HINT, devId) != 0) {
+ return -5909;
+ }
+
+ result = wc_AesGcmSetKey(&enc, key, keySz);
+ if (result != 0)
+ return -5910;
+
+ /* AES-GCM encrypt and decrypt both use AES encrypt internally */
+ result = wc_AesGcmEncrypt(&enc, resultC, plain, plainSz, iv, ivSz,
+ resultT, tagSz, aad, aadSz);
+
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (result != 0)
+ return -5911;
+ if (cipher != NULL) {
+ if (XMEMCMP(cipher, resultC, cipherSz))
+ return -5912;
+ }
+ if (XMEMCMP(tag, resultT, tagSz))
+ return -5913;
+
+ wc_AesFree(&enc);
+
+#ifdef HAVE_AES_DECRYPT
+ result = wc_AesGcmSetKey(&dec, key, keySz);
+ if (result != 0)
+ return -5914;
+
+ result = wc_AesGcmDecrypt(&dec, resultP, resultC, cipherSz,
+ iv, ivSz, resultT, tagSz, aad, aadSz);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (result != 0)
+ return -5915;
+ if (plain != NULL) {
+ if (XMEMCMP(plain, resultP, plainSz))
+ return -5916;
+ }
+
+ wc_AesFree(&dec);
+#endif /* HAVE_AES_DECRYPT */
+
+ return 0;
+}
+
+
+/* tests that only use 12 byte IV and 16 or less byte AAD
+ * test vectors are from NIST SP 800-38D
+ * https://csrc.nist.gov/Projects/Cryptographic-Algorithm-Validation-Program/CAVP-TESTING-BLOCK-CIPHER-MODES*/
+int aesgcm_default_test(void)
+{
+ byte key1[] = {
+ 0x29, 0x8e, 0xfa, 0x1c, 0xcf, 0x29, 0xcf, 0x62,
+ 0xae, 0x68, 0x24, 0xbf, 0xc1, 0x95, 0x57, 0xfc
+ };
+
+ byte iv1[] = {
+ 0x6f, 0x58, 0xa9, 0x3f, 0xe1, 0xd2, 0x07, 0xfa,
+ 0xe4, 0xed, 0x2f, 0x6d
+ };
+
+ ALIGN64 byte plain1[] = {
+ 0xcc, 0x38, 0xbc, 0xcd, 0x6b, 0xc5, 0x36, 0xad,
+ 0x91, 0x9b, 0x13, 0x95, 0xf5, 0xd6, 0x38, 0x01,
+ 0xf9, 0x9f, 0x80, 0x68, 0xd6, 0x5c, 0xa5, 0xac,
+ 0x63, 0x87, 0x2d, 0xaf, 0x16, 0xb9, 0x39, 0x01
+ };
+
+ byte aad1[] = {
+ 0x02, 0x1f, 0xaf, 0xd2, 0x38, 0x46, 0x39, 0x73,
+ 0xff, 0xe8, 0x02, 0x56, 0xe5, 0xb1, 0xc6, 0xb1
+ };
+
+ ALIGN64 byte cipher1[] = {
+ 0xdf, 0xce, 0x4e, 0x9c, 0xd2, 0x91, 0x10, 0x3d,
+ 0x7f, 0xe4, 0xe6, 0x33, 0x51, 0xd9, 0xe7, 0x9d,
+ 0x3d, 0xfd, 0x39, 0x1e, 0x32, 0x67, 0x10, 0x46,
+ 0x58, 0x21, 0x2d, 0xa9, 0x65, 0x21, 0xb7, 0xdb
+ };
+
+ byte tag1[] = {
+ 0x54, 0x24, 0x65, 0xef, 0x59, 0x93, 0x16, 0xf7,
+ 0x3a, 0x7a, 0x56, 0x05, 0x09, 0xa2, 0xd9, 0xf2
+ };
+
+ byte key2[] = {
+ 0x01, 0x6d, 0xbb, 0x38, 0xda, 0xa7, 0x6d, 0xfe,
+ 0x7d, 0xa3, 0x84, 0xeb, 0xf1, 0x24, 0x03, 0x64
+ };
+
+ byte iv2[] = {
+ 0x07, 0x93, 0xef, 0x3a, 0xda, 0x78, 0x2f, 0x78,
+ 0xc9, 0x8a, 0xff, 0xe3
+ };
+
+ ALIGN64 byte plain2[] = {
+ 0x4b, 0x34, 0xa9, 0xec, 0x57, 0x63, 0x52, 0x4b,
+ 0x19, 0x1d, 0x56, 0x16, 0xc5, 0x47, 0xf6, 0xb7
+ };
+
+ ALIGN64 byte cipher2[] = {
+ 0x60, 0x9a, 0xa3, 0xf4, 0x54, 0x1b, 0xc0, 0xfe,
+ 0x99, 0x31, 0xda, 0xad, 0x2e, 0xe1, 0x5d, 0x0c
+ };
+
+ byte tag2[] = {
+ 0x33, 0xaf, 0xec, 0x59, 0xc4, 0x5b, 0xaf, 0x68,
+ 0x9a, 0x5e, 0x1b, 0x13, 0xae, 0x42, 0x36, 0x19
+ };
+
+ byte key3[] = {
+ 0xb0, 0x1e, 0x45, 0xcc, 0x30, 0x88, 0xaa, 0xba,
+ 0x9f, 0xa4, 0x3d, 0x81, 0xd4, 0x81, 0x82, 0x3f
+ };
+
+ byte iv3[] = {
+ 0x5a, 0x2c, 0x4a, 0x66, 0x46, 0x87, 0x13, 0x45,
+ 0x6a, 0x4b, 0xd5, 0xe1
+ };
+
+ byte tag3[] = {
+ 0x01, 0x42, 0x80, 0xf9, 0x44, 0xf5, 0x3c, 0x68,
+ 0x11, 0x64, 0xb2, 0xff
+ };
+
+ int ret;
+ ret = aesgcm_default_test_helper(key1, sizeof(key1), iv1, sizeof(iv1),
+ plain1, sizeof(plain1), cipher1, sizeof(cipher1),
+ aad1, sizeof(aad1), tag1, sizeof(tag1));
+ if (ret != 0) {
+ return ret;
+ }
+ ret = aesgcm_default_test_helper(key2, sizeof(key2), iv2, sizeof(iv2),
+ plain2, sizeof(plain2), cipher2, sizeof(cipher2),
+ NULL, 0, tag2, sizeof(tag2));
+ if (ret != 0) {
+ return ret;
+ }
+ ret = aesgcm_default_test_helper(key3, sizeof(key3), iv3, sizeof(iv3),
+ NULL, 0, NULL, 0,
+ NULL, 0, tag3, sizeof(tag3));
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
int aesgcm_test(void)
{
Aes enc;
+ Aes dec;
/*
* This is Test Case 16 from the document Galois/
* Counter Mode of Operation (GCM) by McGrew and
* Viega.
*/
- const byte k[] =
- {
- 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
- 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
- 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
- 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
- };
-
- const byte iv[] =
- {
- 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
- 0xde, 0xca, 0xf8, 0x88
- };
-
const byte p[] =
{
0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
@@ -2696,14 +8419,31 @@ int aesgcm_test(void)
0xba, 0x63, 0x7b, 0x39
};
+#if defined(HAVE_AES_DECRYPT) || defined(WOLFSSL_AES_256)
const byte a[] =
{
0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
0xab, 0xad, 0xda, 0xd2
};
+#endif
- const byte c[] =
+#ifdef WOLFSSL_AES_256
+ const byte k1[] =
+ {
+ 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
+ 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
+ 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
+ 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
+ };
+
+ const byte iv1[] =
+ {
+ 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
+ 0xde, 0xca, 0xf8, 0x88
+ };
+
+ const byte c1[] =
{
0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
@@ -2714,42 +8454,478 @@ int aesgcm_test(void)
0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
0xbc, 0xc9, 0xf6, 0x62
};
+#endif /* WOLFSSL_AES_256 */
- const byte t[] =
+ const byte t1[] =
{
0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e, 0x17, 0x68,
0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b
};
- byte t2[sizeof(t)];
- byte p2[sizeof(c)];
- byte c2[sizeof(p)];
+ /* FIPS, QAT and PIC32MZ HW Crypto only support 12-byte IV */
+#if !defined(HAVE_FIPS) && \
+ !defined(WOLFSSL_PIC32MZ_CRYPT) && \
+ !defined(FREESCALE_LTC) && !defined(FREESCALE_MMCAU) && \
+ !defined(WOLFSSL_XILINX_CRYPT) && !defined(WOLFSSL_AFALG_XILINX_AES) && \
+ !(defined(WOLF_CRYPTO_CB) && \
+ (defined(HAVE_INTEL_QA_SYNC) || defined(HAVE_CAVIUM_OCTEON_SYNC)))
+
+ #define ENABLE_NON_12BYTE_IV_TEST
+#ifdef WOLFSSL_AES_192
+ /* Test Case 12, uses same plaintext and AAD data. */
+ const byte k2[] =
+ {
+ 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
+ 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
+ 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c
+ };
- int result;
+ const byte iv2[] =
+ {
+ 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
+ 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
+ 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
+ 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
+ 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
+ 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
+ 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
+ 0xa6, 0x37, 0xb3, 0x9b
+ };
- memset(t2, 0, sizeof(t2));
- memset(c2, 0, sizeof(c2));
- memset(p2, 0, sizeof(p2));
+ const byte c2[] =
+ {
+ 0xd2, 0x7e, 0x88, 0x68, 0x1c, 0xe3, 0x24, 0x3c,
+ 0x48, 0x30, 0x16, 0x5a, 0x8f, 0xdc, 0xf9, 0xff,
+ 0x1d, 0xe9, 0xa1, 0xd8, 0xe6, 0xb4, 0x47, 0xef,
+ 0x6e, 0xf7, 0xb7, 0x98, 0x28, 0x66, 0x6e, 0x45,
+ 0x81, 0xe7, 0x90, 0x12, 0xaf, 0x34, 0xdd, 0xd9,
+ 0xe2, 0xf0, 0x37, 0x58, 0x9b, 0x29, 0x2d, 0xb3,
+ 0xe6, 0x7c, 0x03, 0x67, 0x45, 0xfa, 0x22, 0xe7,
+ 0xe9, 0xb7, 0x37, 0x3b
+ };
+
+ const byte t2[] =
+ {
+ 0xdc, 0xf5, 0x66, 0xff, 0x29, 0x1c, 0x25, 0xbb,
+ 0xb8, 0x56, 0x8f, 0xc3, 0xd3, 0x76, 0xa6, 0xd9
+ };
+#endif /* WOLFSSL_AES_192 */
+#ifdef WOLFSSL_AES_128
+ /* The following is an interesting test case from the example
+ * FIPS test vectors for AES-GCM. IVlen = 1 byte */
+ const byte p3[] =
+ {
+ 0x57, 0xce, 0x45, 0x1f, 0xa5, 0xe2, 0x35, 0xa5,
+ 0x8e, 0x1a, 0xa2, 0x3b, 0x77, 0xcb, 0xaf, 0xe2
+ };
+
+ const byte k3[] =
+ {
+ 0xbb, 0x01, 0xd7, 0x03, 0x81, 0x1c, 0x10, 0x1a,
+ 0x35, 0xe0, 0xff, 0xd2, 0x91, 0xba, 0xf2, 0x4b
+ };
+
+ const byte iv3[] =
+ {
+ 0xca
+ };
+
+ const byte c3[] =
+ {
+ 0x6b, 0x5f, 0xb3, 0x9d, 0xc1, 0xc5, 0x7a, 0x4f,
+ 0xf3, 0x51, 0x4d, 0xc2, 0xd5, 0xf0, 0xd0, 0x07
+ };
+
+ const byte a3[] =
+ {
+ 0x40, 0xfc, 0xdc, 0xd7, 0x4a, 0xd7, 0x8b, 0xf1,
+ 0x3e, 0x7c, 0x60, 0x55, 0x50, 0x51, 0xdd, 0x54
+ };
+
+ const byte t3[] =
+ {
+ 0x06, 0x90, 0xed, 0x01, 0x34, 0xdd, 0xc6, 0x95,
+ 0x31, 0x2e, 0x2a, 0xf9, 0x57, 0x7a, 0x1e, 0xa6
+ };
+#endif /* WOLFSSL_AES_128 */
+#ifdef WOLFSSL_AES_256
+ int ivlen;
+#endif
+#endif
+
+ byte resultT[sizeof(t1)];
+ byte resultP[sizeof(p) + AES_BLOCK_SIZE];
+ byte resultC[sizeof(p) + AES_BLOCK_SIZE];
+ int result;
+#ifdef WOLFSSL_AES_256
+ int alen;
+ #if !defined(WOLFSSL_AFALG_XILINX_AES) && !defined(WOLFSSL_XILINX_CRYPT)
+ int plen;
+ #endif
+#endif
+
+#if !defined(BENCH_EMBEDDED)
+ #ifndef BENCH_AESGCM_LARGE
+ #define BENCH_AESGCM_LARGE 1024
+ #endif
+ byte large_input[BENCH_AESGCM_LARGE];
+ byte large_output[BENCH_AESGCM_LARGE + AES_BLOCK_SIZE];
+ byte large_outdec[BENCH_AESGCM_LARGE];
+
+ XMEMSET(large_input, 0, sizeof(large_input));
+ XMEMSET(large_output, 0, sizeof(large_output));
+ XMEMSET(large_outdec, 0, sizeof(large_outdec));
+#endif
+
+ (void)result;
+
+ XMEMSET(resultT, 0, sizeof(resultT));
+ XMEMSET(resultC, 0, sizeof(resultC));
+ XMEMSET(resultP, 0, sizeof(resultP));
+
+ if (wc_AesInit(&enc, HEAP_HINT, devId) != 0) {
+ return -6100;
+ }
+ if (wc_AesInit(&dec, HEAP_HINT, devId) != 0) {
+ return -6101;
+ }
+
+#ifdef WOLFSSL_AES_256
+ result = wc_AesGcmSetKey(&enc, k1, sizeof(k1));
+ if (result != 0)
+ return -6102;
- wc_AesGcmSetKey(&enc, k, sizeof(k));
/* AES-GCM encrypt and decrypt both use AES encrypt internally */
- wc_AesGcmEncrypt(&enc, c2, p, sizeof(c2), iv, sizeof(iv),
- t2, sizeof(t2), a, sizeof(a));
- if (memcmp(c, c2, sizeof(c2)))
- return -68;
- if (memcmp(t, t2, sizeof(t2)))
- return -69;
+ result = wc_AesGcmEncrypt(&enc, resultC, p, sizeof(p), iv1, sizeof(iv1),
+ resultT, sizeof(resultT), a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (result != 0)
+ return -6103;
+ if (XMEMCMP(c1, resultC, sizeof(c1)))
+ return -6104;
+ if (XMEMCMP(t1, resultT, sizeof(resultT)))
+ return -6105;
+
+#ifdef HAVE_AES_DECRYPT
+ result = wc_AesGcmSetKey(&dec, k1, sizeof(k1));
+ if (result != 0)
+ return -6106;
- result = wc_AesGcmDecrypt(&enc, p2, c2, sizeof(p2), iv, sizeof(iv),
- t2, sizeof(t2), a, sizeof(a));
+ result = wc_AesGcmDecrypt(&dec, resultP, resultC, sizeof(c1),
+ iv1, sizeof(iv1), resultT, sizeof(resultT), a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (result != 0)
+ return -6107;
+ if (XMEMCMP(p, resultP, sizeof(p)))
+ return -6108;
+#endif /* HAVE_AES_DECRYPT */
+
+ /* Large buffer test */
+#ifdef BENCH_AESGCM_LARGE
+ /* setup test buffer */
+ for (alen=0; alen<BENCH_AESGCM_LARGE; alen++)
+ large_input[alen] = (byte)alen;
+
+ /* AES-GCM encrypt and decrypt both use AES encrypt internally */
+ result = wc_AesGcmEncrypt(&enc, large_output, large_input,
+ BENCH_AESGCM_LARGE, iv1, sizeof(iv1),
+ resultT, sizeof(resultT), a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (result != 0)
+ return -6109;
+
+#ifdef HAVE_AES_DECRYPT
+ result = wc_AesGcmDecrypt(&dec, large_outdec, large_output,
+ BENCH_AESGCM_LARGE, iv1, sizeof(iv1), resultT,
+ sizeof(resultT), a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (result != 0)
+ return -6110;
+ if (XMEMCMP(large_input, large_outdec, BENCH_AESGCM_LARGE))
+ return -6111;
+#endif /* HAVE_AES_DECRYPT */
+#endif /* BENCH_AESGCM_LARGE */
+#if defined(ENABLE_NON_12BYTE_IV_TEST) && defined(WOLFSSL_AES_256)
+ /* Variable IV length test */
+ for (ivlen=1; ivlen<(int)sizeof(k1); ivlen++) {
+ /* AES-GCM encrypt and decrypt both use AES encrypt internally */
+ result = wc_AesGcmEncrypt(&enc, resultC, p, sizeof(p), k1,
+ (word32)ivlen, resultT, sizeof(resultT), a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (result != 0)
+ return -6112;
+#ifdef HAVE_AES_DECRYPT
+ result = wc_AesGcmDecrypt(&dec, resultP, resultC, sizeof(c1), k1,
+ (word32)ivlen, resultT, sizeof(resultT), a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (result != 0)
+ return -6113;
+#endif /* HAVE_AES_DECRYPT */
+ }
+#endif
+
+#if !(defined(WOLF_CRYPTO_CB) && defined(HAVE_INTEL_QA_SYNC))
+ /* Variable authenticated data length test */
+ for (alen=0; alen<(int)sizeof(p); alen++) {
+ /* AES-GCM encrypt and decrypt both use AES encrypt internally */
+ result = wc_AesGcmEncrypt(&enc, resultC, p, sizeof(p), iv1,
+ sizeof(iv1), resultT, sizeof(resultT), p, (word32)alen);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (result != 0)
+ return -6114;
+#ifdef HAVE_AES_DECRYPT
+ result = wc_AesGcmDecrypt(&dec, resultP, resultC, sizeof(c1), iv1,
+ sizeof(iv1), resultT, sizeof(resultT), p, (word32)alen);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (result != 0)
+ return -6115;
+#endif /* HAVE_AES_DECRYPT */
+ }
+#endif
+
+#if !defined(WOLFSSL_AFALG_XILINX_AES) && !defined(WOLFSSL_XILINX_CRYPT)
+#ifdef BENCH_AESGCM_LARGE
+ /* Variable plain text length test */
+ for (plen=1; plen<BENCH_AESGCM_LARGE; plen++) {
+ /* AES-GCM encrypt and decrypt both use AES encrypt internally */
+ result = wc_AesGcmEncrypt(&enc, large_output, large_input,
+ plen, iv1, sizeof(iv1), resultT,
+ sizeof(resultT), a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (result != 0)
+ return -6116;
+
+#ifdef HAVE_AES_DECRYPT
+ result = wc_AesGcmDecrypt(&dec, large_outdec, large_output,
+ plen, iv1, sizeof(iv1), resultT,
+ sizeof(resultT), a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (result != 0)
+ return -6117;
+#endif /* HAVE_AES_DECRYPT */
+ }
+#else /* BENCH_AESGCM_LARGE */
+ /* Variable plain text length test */
+ for (plen=1; plen<(int)sizeof(p); plen++) {
+ /* AES-GCM encrypt and decrypt both use AES encrypt internally */
+ result = wc_AesGcmEncrypt(&enc, resultC, p, (word32)plen, iv1,
+ sizeof(iv1), resultT, sizeof(resultT), a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (result != 0)
+ return -6118;
+#ifdef HAVE_AES_DECRYPT
+ result = wc_AesGcmDecrypt(&dec, resultP, resultC, (word32)plen, iv1,
+ sizeof(iv1), resultT, sizeof(resultT), a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (result != 0)
+ return -6119;
+#endif /* HAVE_AES_DECRYPT */
+ }
+#endif /* BENCH_AESGCM_LARGE */
+#endif
+#endif /* WOLFSSL_AES_256 */
+
+ /* test with IV != 12 bytes */
+#ifdef ENABLE_NON_12BYTE_IV_TEST
+ XMEMSET(resultT, 0, sizeof(resultT));
+ XMEMSET(resultC, 0, sizeof(resultC));
+ XMEMSET(resultP, 0, sizeof(resultP));
+
+#ifdef WOLFSSL_AES_192
+ wc_AesGcmSetKey(&enc, k2, sizeof(k2));
+ /* AES-GCM encrypt and decrypt both use AES encrypt internally */
+ result = wc_AesGcmEncrypt(&enc, resultC, p, sizeof(p), iv2, sizeof(iv2),
+ resultT, sizeof(resultT), a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
if (result != 0)
- return -70;
- if (memcmp(p, p2, sizeof(p2)))
- return -71;
+ return -6120;
+ if (XMEMCMP(c2, resultC, sizeof(c2)))
+ return -6121;
+ if (XMEMCMP(t2, resultT, sizeof(resultT)))
+ return -6122;
+
+#ifdef HAVE_AES_DECRYPT
+ result = wc_AesGcmDecrypt(&enc, resultP, resultC, sizeof(c1),
+ iv2, sizeof(iv2), resultT, sizeof(resultT), a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (result != 0)
+ return -6123;
+ if (XMEMCMP(p, resultP, sizeof(p)))
+ return -6124;
+#endif /* HAVE_AES_DECRYPT */
+
+ XMEMSET(resultT, 0, sizeof(resultT));
+ XMEMSET(resultC, 0, sizeof(resultC));
+ XMEMSET(resultP, 0, sizeof(resultP));
+#endif /* WOLFSSL_AES_192 */
+#ifdef WOLFSSL_AES_128
+ wc_AesGcmSetKey(&enc, k3, sizeof(k3));
+ /* AES-GCM encrypt and decrypt both use AES encrypt internally */
+ result = wc_AesGcmEncrypt(&enc, resultC, p3, sizeof(p3), iv3, sizeof(iv3),
+ resultT, sizeof(t3), a3, sizeof(a3));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (result != 0)
+ return -6125;
+ if (XMEMCMP(c3, resultC, sizeof(c3)))
+ return -6126;
+ if (XMEMCMP(t3, resultT, sizeof(t3)))
+ return -6127;
+
+#ifdef HAVE_AES_DECRYPT
+ result = wc_AesGcmDecrypt(&enc, resultP, resultC, sizeof(c3),
+ iv3, sizeof(iv3), resultT, sizeof(t3), a3, sizeof(a3));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (result != 0)
+ return -6128;
+ if (XMEMCMP(p3, resultP, sizeof(p3)))
+ return -6129;
+#endif /* HAVE_AES_DECRYPT */
+#endif /* WOLFSSL_AES_128 */
+#endif /* ENABLE_NON_12BYTE_IV_TEST */
+
+#if defined(WOLFSSL_AES_256) && !defined(WOLFSSL_AFALG_XILINX_AES) && \
+ !defined(WOLFSSL_XILINX_CRYPT) && \
+ !(defined(WOLF_CRYPTO_CB) && \
+ defined(HAVE_INTEL_QA_SYNC) || defined(HAVE_CAVIUM_OCTEON_SYNC))
+ XMEMSET(resultT, 0, sizeof(resultT));
+ XMEMSET(resultC, 0, sizeof(resultC));
+ XMEMSET(resultP, 0, sizeof(resultP));
+
+ wc_AesGcmSetKey(&enc, k1, sizeof(k1));
+ /* AES-GCM encrypt and decrypt both use AES encrypt internally */
+ result = wc_AesGcmEncrypt(&enc, resultC, p, sizeof(p), iv1, sizeof(iv1),
+ resultT + 1, sizeof(resultT) - 1, a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (result != 0)
+ return -6130;
+ if (XMEMCMP(c1, resultC, sizeof(c1)))
+ return -6131;
+ if (XMEMCMP(t1, resultT + 1, sizeof(resultT) - 1))
+ return -6132;
+
+#ifdef HAVE_AES_DECRYPT
+ result = wc_AesGcmDecrypt(&enc, resultP, resultC, sizeof(p),
+ iv1, sizeof(iv1), resultT + 1, sizeof(resultT) - 1, a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (result != 0)
+ return -6133;
+ if (XMEMCMP(p, resultP, sizeof(p)))
+ return -6134;
+#endif /* HAVE_AES_DECRYPT */
+#endif /* WOLFSSL_AES_256 */
+
+#if !defined(HAVE_FIPS) || \
+ (defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2))
+ /* Test encrypt with internally generated IV */
+#if defined(WOLFSSL_AES_256) && !(defined(WC_NO_RNG) || defined(HAVE_SELFTEST)) \
+ && !(defined(WOLF_CRYPTO_CB) && defined(HAVE_CAVIUM_OCTEON_SYNC))
+ {
+ WC_RNG rng;
+ byte randIV[12];
+
+ result = wc_InitRng(&rng);
+ if (result != 0)
+ return -6135;
+
+ XMEMSET(randIV, 0, sizeof(randIV));
+ XMEMSET(resultT, 0, sizeof(resultT));
+ XMEMSET(resultC, 0, sizeof(resultC));
+ XMEMSET(resultP, 0, sizeof(resultP));
+
+ wc_AesGcmSetKey(&enc, k1, sizeof(k1));
+ result = wc_AesGcmSetIV(&enc, sizeof(randIV), NULL, 0, &rng);
+ if (result != 0)
+ return -6136;
+
+ result = wc_AesGcmEncrypt_ex(&enc,
+ resultC, p, sizeof(p),
+ randIV, sizeof(randIV),
+ resultT, sizeof(resultT),
+ a, sizeof(a));
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+ #endif
+ if (result != 0)
+ return -6137;
+
+ /* Check the IV has been set. */
+ {
+ word32 i, ivSum = 0;
+
+ for (i = 0; i < sizeof(randIV); i++)
+ ivSum += randIV[i];
+ if (ivSum == 0)
+ return -6138;
+ }
+
+#ifdef HAVE_AES_DECRYPT
+ wc_AesGcmSetKey(&dec, k1, sizeof(k1));
+ result = wc_AesGcmSetIV(&dec, sizeof(randIV), NULL, 0, &rng);
+ if (result != 0)
+ return -6139;
+
+ result = wc_AesGcmDecrypt(&dec,
+ resultP, resultC, sizeof(c1),
+ randIV, sizeof(randIV),
+ resultT, sizeof(resultT),
+ a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ result = wc_AsyncWait(result, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (result != 0)
+ return -6140;
+ if (XMEMCMP(p, resultP, sizeof(p)))
+ return -6141;
+#endif /* HAVE_AES_DECRYPT */
+
+ wc_FreeRng(&rng);
+ }
+#endif /* WOLFSSL_AES_256 && !(WC_NO_RNG || HAVE_SELFTEST) */
+#endif /* HAVE_FIPS_VERSION >= 2 */
+
+ wc_AesFree(&enc);
+ wc_AesFree(&dec);
return 0;
}
+#ifdef WOLFSSL_AES_128
int gmac_test(void)
{
Gmac gmac;
@@ -2775,6 +8951,10 @@ int gmac_test(void)
0xaa, 0x10, 0xf1, 0x6d, 0x22, 0x7d, 0xc4, 0x1b
};
+#if !defined(HAVE_FIPS) || \
+ (defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2))
+ /* FIPS builds only allow 16-byte auth tags. */
+ /* This sample uses a 15-byte auth tag. */
const byte k2[] =
{
0x40, 0xf7, 0xec, 0xb2, 0x52, 0x6d, 0xaa, 0xd4,
@@ -2795,52 +8975,74 @@ int gmac_test(void)
0xc6, 0x81, 0x79, 0x8e, 0x3d, 0xda, 0xb0, 0x9f,
0x8d, 0x83, 0xb0, 0xbb, 0x14, 0xb6, 0x91
};
-
- const byte k3[] =
- {
- 0xb8, 0xe4, 0x9a, 0x5e, 0x37, 0xf9, 0x98, 0x2b,
- 0xb9, 0x6d, 0xd0, 0xc9, 0xb6, 0xab, 0x26, 0xac
- };
- const byte iv3[] =
- {
- 0xe4, 0x4a, 0x42, 0x18, 0x8c, 0xae, 0x94, 0x92,
- 0x6a, 0x9c, 0x26, 0xb0
- };
- const byte a3[] =
- {
- 0x9d, 0xb9, 0x61, 0x68, 0xa6, 0x76, 0x7a, 0x31,
- 0xf8, 0x29, 0xe4, 0x72, 0x61, 0x68, 0x3f, 0x8a
- };
- const byte t3[] =
- {
- 0x23, 0xe2, 0x9f, 0x66, 0xe4, 0xc6, 0x52, 0x48
- };
+#endif
byte tag[16];
- memset(tag, 0, sizeof(tag));
+ XMEMSET(&gmac, 0, sizeof(Gmac)); /* clear context */
+ (void)wc_AesInit((Aes*)&gmac, HEAP_HINT, INVALID_DEVID); /* Make sure devId updated */
+ XMEMSET(tag, 0, sizeof(tag));
wc_GmacSetKey(&gmac, k1, sizeof(k1));
wc_GmacUpdate(&gmac, iv1, sizeof(iv1), a1, sizeof(a1), tag, sizeof(t1));
- if (memcmp(t1, tag, sizeof(t1)) != 0)
- return -126;
+ if (XMEMCMP(t1, tag, sizeof(t1)) != 0)
+ return -6200;
- memset(tag, 0, sizeof(tag));
+#if !defined(HAVE_FIPS) || \
+ (defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2))
+ XMEMSET(tag, 0, sizeof(tag));
wc_GmacSetKey(&gmac, k2, sizeof(k2));
wc_GmacUpdate(&gmac, iv2, sizeof(iv2), a2, sizeof(a2), tag, sizeof(t2));
- if (memcmp(t2, tag, sizeof(t2)) != 0)
- return -127;
+ if (XMEMCMP(t2, tag, sizeof(t2)) != 0)
+ return -6201;
- memset(tag, 0, sizeof(tag));
- wc_GmacSetKey(&gmac, k3, sizeof(k3));
- wc_GmacUpdate(&gmac, iv3, sizeof(iv3), a3, sizeof(a3), tag, sizeof(t3));
- if (memcmp(t3, tag, sizeof(t3)) != 0)
- return -128;
+#if !(defined(WC_NO_RNG) || defined(HAVE_SELFTEST))
+ {
+ const byte badT[] =
+ {
+ 0xde, 0xad, 0xbe, 0xef, 0x17, 0x2e, 0xd0, 0x43,
+ 0xaa, 0x10, 0xf1, 0x6d, 0x22, 0x7d, 0xc4, 0x1b
+ };
+
+ WC_RNG rng;
+ byte iv[12];
+
+ #ifndef HAVE_FIPS
+ if (wc_InitRng_ex(&rng, HEAP_HINT, devId) != 0)
+ return -6202;
+ #else
+ if (wc_InitRng(&rng) != 0)
+ return -6203;
+ #endif
+
+ if (wc_GmacVerify(k1, sizeof(k1), iv1, sizeof(iv1), a1, sizeof(a1),
+ t1, sizeof(t1)) != 0)
+ return -6204;
+ if (wc_GmacVerify(k1, sizeof(k1), iv1, sizeof(iv1), a1, sizeof(a1),
+ badT, sizeof(badT)) != AES_GCM_AUTH_E)
+ return -6205;
+ if (wc_GmacVerify(k2, sizeof(k2), iv2, sizeof(iv2), a2, sizeof(a2),
+ t2, sizeof(t2)) != 0)
+ return -6206;
+
+ XMEMSET(tag, 0, sizeof(tag));
+ XMEMSET(iv, 0, sizeof(iv));
+ if (wc_Gmac(k1, sizeof(k1), iv, sizeof(iv), a1, sizeof(a1),
+ tag, sizeof(tag), &rng) != 0)
+ return -6207;
+ if (wc_GmacVerify(k1, sizeof(k1), iv, sizeof(iv), a1, sizeof(a1),
+ tag, sizeof(tag)) != 0)
+ return -6208;
+ wc_FreeRng(&rng);
+ }
+#endif /* WC_NO_RNG HAVE_SELFTEST */
+#endif /* HAVE_FIPS */
return 0;
}
+#endif /* WOLFSSL_AES_128 */
#endif /* HAVE_AESGCM */
-#ifdef HAVE_AESCCM
+#if defined(HAVE_AESCCM) && defined(WOLFSSL_AES_128)
int aesccm_test(void)
{
Aes enc;
@@ -2887,45 +9089,280 @@ int aesccm_test(void)
byte t2[sizeof(t)];
byte p2[sizeof(p)];
byte c2[sizeof(c)];
+ byte iv2[sizeof(iv)];
int result;
- memset(t2, 0, sizeof(t2));
- memset(c2, 0, sizeof(c2));
- memset(p2, 0, sizeof(p2));
+ XMEMSET(&enc, 0, sizeof(Aes)); /* clear context */
+ XMEMSET(t2, 0, sizeof(t2));
+ XMEMSET(c2, 0, sizeof(c2));
+ XMEMSET(p2, 0, sizeof(p2));
+
+ result = wc_AesCcmSetKey(&enc, k, sizeof(k));
+ if (result != 0)
+ return -6300;
- wc_AesCcmSetKey(&enc, k, sizeof(k));
/* AES-CCM encrypt and decrypt both use AES encrypt internally */
- wc_AesCcmEncrypt(&enc, c2, p, sizeof(c2), iv, sizeof(iv),
+ result = wc_AesCcmEncrypt(&enc, c2, p, sizeof(c2), iv, sizeof(iv),
t2, sizeof(t2), a, sizeof(a));
- if (memcmp(c, c2, sizeof(c2)))
- return -107;
- if (memcmp(t, t2, sizeof(t2)))
- return -108;
+ if (result != 0)
+ return -6301;
+ if (XMEMCMP(c, c2, sizeof(c2)))
+ return -6302;
+ if (XMEMCMP(t, t2, sizeof(t2)))
+ return -6303;
result = wc_AesCcmDecrypt(&enc, p2, c2, sizeof(p2), iv, sizeof(iv),
t2, sizeof(t2), a, sizeof(a));
if (result != 0)
- return -109;
- if (memcmp(p, p2, sizeof(p2)))
- return -110;
+ return -6304;
+ if (XMEMCMP(p, p2, sizeof(p2)))
+ return -6305;
/* Test the authentication failure */
t2[0]++; /* Corrupt the authentication tag. */
result = wc_AesCcmDecrypt(&enc, p2, c, sizeof(p2), iv, sizeof(iv),
t2, sizeof(t2), a, sizeof(a));
if (result == 0)
- return -111;
+ return -6306;
/* Clear c2 to compare against p2. p2 should be set to zero in case of
* authentication fail. */
- memset(c2, 0, sizeof(c2));
- if (memcmp(p2, c2, sizeof(p2)))
- return -112;
+ XMEMSET(c2, 0, sizeof(c2));
+ if (XMEMCMP(p2, c2, sizeof(p2)))
+ return -6307;
+
+ XMEMSET(&enc, 0, sizeof(Aes)); /* clear context */
+ XMEMSET(t2, 0, sizeof(t2));
+ XMEMSET(c2, 0, sizeof(c2));
+ XMEMSET(p2, 0, sizeof(p2));
+ XMEMSET(iv2, 0, sizeof(iv2));
+
+#ifndef HAVE_SELFTEST
+ /* selftest build does not have wc_AesCcmSetNonce() or
+ * wc_AesCcmEncrypt_ex() */
+ if (wc_AesCcmSetKey(&enc, k, sizeof(k)) != 0)
+ return -6308;
+
+ if (wc_AesCcmSetNonce(&enc, iv, sizeof(iv)) != 0)
+ return -6309;
+ if (wc_AesCcmEncrypt_ex(&enc, c2, p, sizeof(c2), iv2, sizeof(iv2),
+ t2, sizeof(t2), a, sizeof(a)) != 0)
+ return -6310;
+ if (XMEMCMP(iv, iv2, sizeof(iv2)))
+ return -6311;
+ if (XMEMCMP(c, c2, sizeof(c2)))
+ return -6312;
+ if (XMEMCMP(t, t2, sizeof(t2)))
+ return -6313;
+#endif
return 0;
}
-#endif /* HAVE_AESCCM */
+#endif /* HAVE_AESCCM WOLFSSL_AES_128 */
+
+
+#ifdef HAVE_AES_KEYWRAP
+
+#define MAX_KEYWRAP_TEST_OUTLEN 40
+#define MAX_KEYWRAP_TEST_PLAINLEN 32
+
+typedef struct keywrapVector {
+ const byte* kek;
+ const byte* data;
+ const byte* verify;
+ word32 kekLen;
+ word32 dataLen;
+ word32 verifyLen;
+} keywrapVector;
+
+int aeskeywrap_test(void)
+{
+ int wrapSz, plainSz, testSz, i;
+
+ /* test vectors from RFC 3394 (kek, data, verify) */
+
+#ifdef WOLFSSL_AES_128
+ /* Wrap 128 bits of Key Data with a 128-bit KEK */
+ const byte k1[] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
+ };
+
+ const byte d1[] = {
+ 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+ 0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF
+ };
+
+ const byte v1[] = {
+ 0x1F, 0xA6, 0x8B, 0x0A, 0x81, 0x12, 0xB4, 0x47,
+ 0xAE, 0xF3, 0x4B, 0xD8, 0xFB, 0x5A, 0x7B, 0x82,
+ 0x9D, 0x3E, 0x86, 0x23, 0x71, 0xD2, 0xCF, 0xE5
+ };
+#endif /* WOLFSSL_AES_128 */
+
+#ifdef WOLFSSL_AES_192
+ /* Wrap 128 bits of Key Data with a 192-bit KEK */
+ const byte k2[] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
+ };
+
+ const byte d2[] = {
+ 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+ 0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF
+ };
+
+ const byte v2[] = {
+ 0x96, 0x77, 0x8B, 0x25, 0xAE, 0x6C, 0xA4, 0x35,
+ 0xF9, 0x2B, 0x5B, 0x97, 0xC0, 0x50, 0xAE, 0xD2,
+ 0x46, 0x8A, 0xB8, 0xA1, 0x7A, 0xD8, 0x4E, 0x5D
+ };
+#endif
+
+#ifdef WOLFSSL_AES_256
+ /* Wrap 128 bits of Key Data with a 256-bit KEK */
+ const byte k3[] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F
+ };
+
+ const byte d3[] = {
+ 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+ 0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF
+ };
+
+ const byte v3[] = {
+ 0x64, 0xE8, 0xC3, 0xF9, 0xCE, 0x0F, 0x5B, 0xA2,
+ 0x63, 0xE9, 0x77, 0x79, 0x05, 0x81, 0x8A, 0x2A,
+ 0x93, 0xC8, 0x19, 0x1E, 0x7D, 0x6E, 0x8A, 0xE7
+ };
+#endif
+
+#ifdef WOLFSSL_AES_192
+ /* Wrap 192 bits of Key Data with a 192-bit KEK */
+ const byte k4[] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
+ };
+
+ const byte d4[] = {
+ 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+ 0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+ };
+
+ const byte v4[] = {
+ 0x03, 0x1D, 0x33, 0x26, 0x4E, 0x15, 0xD3, 0x32,
+ 0x68, 0xF2, 0x4E, 0xC2, 0x60, 0x74, 0x3E, 0xDC,
+ 0xE1, 0xC6, 0xC7, 0xDD, 0xEE, 0x72, 0x5A, 0x93,
+ 0x6B, 0xA8, 0x14, 0x91, 0x5C, 0x67, 0x62, 0xD2
+ };
+#endif
+
+#ifdef WOLFSSL_AES_256
+ /* Wrap 192 bits of Key Data with a 256-bit KEK */
+ const byte k5[] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F
+ };
+
+ const byte d5[] = {
+ 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+ 0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+ };
+
+ const byte v5[] = {
+ 0xA8, 0xF9, 0xBC, 0x16, 0x12, 0xC6, 0x8B, 0x3F,
+ 0xF6, 0xE6, 0xF4, 0xFB, 0xE3, 0x0E, 0x71, 0xE4,
+ 0x76, 0x9C, 0x8B, 0x80, 0xA3, 0x2C, 0xB8, 0x95,
+ 0x8C, 0xD5, 0xD1, 0x7D, 0x6B, 0x25, 0x4D, 0xA1
+ };
+
+ /* Wrap 256 bits of Key Data with a 256-bit KEK */
+ const byte k6[] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F
+ };
+
+ const byte d6[] = {
+ 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+ 0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
+ };
+
+ const byte v6[] = {
+ 0x28, 0xC9, 0xF4, 0x04, 0xC4, 0xB8, 0x10, 0xF4,
+ 0xCB, 0xCC, 0xB3, 0x5C, 0xFB, 0x87, 0xF8, 0x26,
+ 0x3F, 0x57, 0x86, 0xE2, 0xD8, 0x0E, 0xD3, 0x26,
+ 0xCB, 0xC7, 0xF0, 0xE7, 0x1A, 0x99, 0xF4, 0x3B,
+ 0xFB, 0x98, 0x8B, 0x9B, 0x7A, 0x02, 0xDD, 0x21
+ };
+#endif /* WOLFSSL_AES_256 */
+
+ byte output[MAX_KEYWRAP_TEST_OUTLEN];
+ byte plain [MAX_KEYWRAP_TEST_PLAINLEN];
+
+ const keywrapVector test_wrap[] =
+ {
+ #ifdef WOLFSSL_AES_128
+ {k1, d1, v1, sizeof(k1), sizeof(d1), sizeof(v1)},
+ #endif
+ #ifdef WOLFSSL_AES_192
+ {k2, d2, v2, sizeof(k2), sizeof(d2), sizeof(v2)},
+ #endif
+ #ifdef WOLFSSL_AES_256
+ {k3, d3, v3, sizeof(k3), sizeof(d3), sizeof(v3)},
+ #endif
+ #ifdef WOLFSSL_AES_192
+ {k4, d4, v4, sizeof(k4), sizeof(d4), sizeof(v4)},
+ #endif
+ #ifdef WOLFSSL_AES_256
+ {k5, d5, v5, sizeof(k5), sizeof(d5), sizeof(v5)},
+ {k6, d6, v6, sizeof(k6), sizeof(d6), sizeof(v6)}
+ #endif
+ };
+ testSz = sizeof(test_wrap) / sizeof(keywrapVector);
+
+ XMEMSET(output, 0, sizeof(output));
+ XMEMSET(plain, 0, sizeof(plain));
+
+ for (i = 0; i < testSz; i++) {
+
+ wrapSz = wc_AesKeyWrap(test_wrap[i].kek, test_wrap[i].kekLen,
+ test_wrap[i].data, test_wrap[i].dataLen,
+ output, sizeof(output), NULL);
+
+ if ( (wrapSz < 0) || (wrapSz != (int)test_wrap[i].verifyLen) )
+ return -6400;
+
+ if (XMEMCMP(output, test_wrap[i].verify, test_wrap[i].verifyLen) != 0)
+ return -6401;
+
+ plainSz = wc_AesKeyUnWrap((byte*)test_wrap[i].kek, test_wrap[i].kekLen,
+ output, wrapSz,
+ plain, sizeof(plain), NULL);
+
+ if ( (plainSz < 0) || (plainSz != (int)test_wrap[i].dataLen) )
+ return -6402;
+
+ if (XMEMCMP(plain, test_wrap[i].data, test_wrap[i].dataLen) != 0)
+ return -6403 - i;
+ }
+
+ return 0;
+}
+#endif /* HAVE_AES_KEYWRAP */
#endif /* NO_AES */
@@ -3053,7 +9490,7 @@ int camellia_test(void)
byte out[CAMELLIA_BLOCK_SIZE];
Camellia cam;
- int i, testsSz;
+ int i, testsSz, ret;
const test_vector_t testVectors[] =
{
{CAM_ECB_ENC, pte, ive, c1, k1, sizeof(k1), -114},
@@ -3078,25 +9515,31 @@ int camellia_test(void)
switch (testVectors[i].type) {
case CAM_ECB_ENC:
- wc_CamelliaEncryptDirect(&cam, out, testVectors[i].plaintext);
- if (memcmp(out, testVectors[i].ciphertext, CAMELLIA_BLOCK_SIZE))
+ ret = wc_CamelliaEncryptDirect(&cam, out,
+ testVectors[i].plaintext);
+ if (ret != 0 || XMEMCMP(out, testVectors[i].ciphertext,
+ CAMELLIA_BLOCK_SIZE))
return testVectors[i].errorCode;
break;
case CAM_ECB_DEC:
- wc_CamelliaDecryptDirect(&cam, out, testVectors[i].ciphertext);
- if (memcmp(out, testVectors[i].plaintext, CAMELLIA_BLOCK_SIZE))
+ ret = wc_CamelliaDecryptDirect(&cam, out,
+ testVectors[i].ciphertext);
+ if (ret != 0 || XMEMCMP(out, testVectors[i].plaintext,
+ CAMELLIA_BLOCK_SIZE))
return testVectors[i].errorCode;
break;
case CAM_CBC_ENC:
- wc_CamelliaCbcEncrypt(&cam, out, testVectors[i].plaintext,
+ ret = wc_CamelliaCbcEncrypt(&cam, out, testVectors[i].plaintext,
CAMELLIA_BLOCK_SIZE);
- if (memcmp(out, testVectors[i].ciphertext, CAMELLIA_BLOCK_SIZE))
+ if (ret != 0 || XMEMCMP(out, testVectors[i].ciphertext,
+ CAMELLIA_BLOCK_SIZE))
return testVectors[i].errorCode;
break;
case CAM_CBC_DEC:
- wc_CamelliaCbcDecrypt(&cam, out, testVectors[i].ciphertext,
- CAMELLIA_BLOCK_SIZE);
- if (memcmp(out, testVectors[i].plaintext, CAMELLIA_BLOCK_SIZE))
+ ret = wc_CamelliaCbcDecrypt(&cam, out,
+ testVectors[i].ciphertext, CAMELLIA_BLOCK_SIZE);
+ if (ret != 0 || XMEMCMP(out, testVectors[i].plaintext,
+ CAMELLIA_BLOCK_SIZE))
return testVectors[i].errorCode;
break;
default:
@@ -3105,33 +9548,390 @@ int camellia_test(void)
}
/* Setting the IV and checking it was actually set. */
- wc_CamelliaSetIV(&cam, ivc);
- if (XMEMCMP(cam.reg, ivc, CAMELLIA_BLOCK_SIZE))
- return -1;
+ ret = wc_CamelliaSetIV(&cam, ivc);
+ if (ret != 0 || XMEMCMP(cam.reg, ivc, CAMELLIA_BLOCK_SIZE))
+ return -6500;
/* Setting the IV to NULL should be same as all zeros IV */
if (wc_CamelliaSetIV(&cam, NULL) != 0 ||
XMEMCMP(cam.reg, ive, CAMELLIA_BLOCK_SIZE))
- return -1;
+ return -6501;
/* First parameter should never be null */
if (wc_CamelliaSetIV(NULL, NULL) == 0)
- return -1;
+ return -6502;
/* First parameter should never be null, check it fails */
if (wc_CamelliaSetKey(NULL, k1, sizeof(k1), NULL) == 0)
- return -1;
+ return -6503;
/* Key should have a size of 16, 24, or 32 */
if (wc_CamelliaSetKey(&cam, k1, 0, NULL) == 0)
- return -1;
+ return -6504;
return 0;
}
#endif /* HAVE_CAMELLIA */
+#ifdef HAVE_IDEA
+int idea_test(void)
+{
+ int ret;
+ word16 i, j;
+
+ Idea idea;
+ byte data[IDEA_BLOCK_SIZE];
+
+ /* Project NESSIE test vectors */
+#define IDEA_NB_TESTS 6
+#define IDEA_NB_TESTS_EXTRA 4
+
+ const byte v_key[IDEA_NB_TESTS][IDEA_KEY_SIZE] = {
+ { 0x37, 0x37, 0x37, 0x37, 0x37, 0x37, 0x37, 0x37,
+ 0x37, 0x37, 0x37, 0x37, 0x37, 0x37, 0x37, 0x37 },
+ { 0x57, 0x57, 0x57, 0x57, 0x57, 0x57, 0x57, 0x57,
+ 0x57, 0x57, 0x57, 0x57, 0x57, 0x57, 0x57, 0x57 },
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F },
+ { 0x2B, 0xD6, 0x45, 0x9F, 0x82, 0xC5, 0xB3, 0x00,
+ 0x95, 0x2C, 0x49, 0x10, 0x48, 0x81, 0xFF, 0x48 },
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F },
+ { 0x2B, 0xD6, 0x45, 0x9F, 0x82, 0xC5, 0xB3, 0x00,
+ 0x95, 0x2C, 0x49, 0x10, 0x48, 0x81, 0xFF, 0x48 },
+ };
-#if defined(HAVE_HASHDRBG) || defined(NO_RC4)
+ const byte v1_plain[IDEA_NB_TESTS][IDEA_BLOCK_SIZE] = {
+ { 0x37, 0x37, 0x37, 0x37, 0x37, 0x37, 0x37, 0x37 },
+ { 0x57, 0x57, 0x57, 0x57, 0x57, 0x57, 0x57, 0x57 },
+ { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77 },
+ { 0xEA, 0x02, 0x47, 0x14, 0xAD, 0x5C, 0x4D, 0x84 },
+ { 0xDB, 0x2D, 0x4A, 0x92, 0xAA, 0x68, 0x27, 0x3F },
+ { 0xF1, 0x29, 0xA6, 0x60, 0x1E, 0xF6, 0x2A, 0x47 },
+ };
+
+ byte v1_cipher[IDEA_NB_TESTS][IDEA_BLOCK_SIZE] = {
+ { 0x54, 0xCF, 0x21, 0xE3, 0x89, 0xD8, 0x73, 0xEC },
+ { 0x85, 0x52, 0x4D, 0x41, 0x0E, 0xB4, 0x28, 0xAE },
+ { 0xF5, 0x26, 0xAB, 0x9A, 0x62, 0xC0, 0xD2, 0x58 },
+ { 0xC8, 0xFB, 0x51, 0xD3, 0x51, 0x66, 0x27, 0xA8 },
+ { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77 },
+ { 0xEA, 0x02, 0x47, 0x14, 0xAD, 0x5C, 0x4D, 0x84 },
+ };
+
+ byte v1_cipher_100[IDEA_NB_TESTS_EXTRA][IDEA_BLOCK_SIZE] = {
+ { 0x12, 0x46, 0x2F, 0xD0, 0xFB, 0x3A, 0x63, 0x39 },
+ { 0x15, 0x61, 0xE8, 0xC9, 0x04, 0x54, 0x8B, 0xE9 },
+ { 0x42, 0x12, 0x2A, 0x94, 0xB0, 0xF6, 0xD2, 0x43 },
+ { 0x53, 0x4D, 0xCD, 0x48, 0xDD, 0xD5, 0xF5, 0x9C },
+ };
+
+ byte v1_cipher_1000[IDEA_NB_TESTS_EXTRA][IDEA_BLOCK_SIZE] = {
+ { 0x44, 0x1B, 0x38, 0x5C, 0x77, 0x29, 0x75, 0x34 },
+ { 0xF0, 0x4E, 0x58, 0x88, 0x44, 0x99, 0x22, 0x2D },
+ { 0xB3, 0x5F, 0x93, 0x7F, 0x6A, 0xA0, 0xCD, 0x1F },
+ { 0x9A, 0xEA, 0x46, 0x8F, 0x42, 0x9B, 0xBA, 0x15 },
+ };
+
+ /* CBC test */
+ const char *message = "International Data Encryption Algorithm";
+ byte msg_enc[40], msg_dec[40];
+
+ for (i = 0; i < IDEA_NB_TESTS; i++) {
+ /* Set encryption key */
+ XMEMSET(&idea, 0, sizeof(Idea));
+ ret = wc_IdeaSetKey(&idea, v_key[i], IDEA_KEY_SIZE,
+ NULL, IDEA_ENCRYPTION);
+ if (ret != 0) {
+ printf("wc_IdeaSetKey (enc) failed\n");
+ return -6600;
+ }
+
+ /* Data encryption */
+ ret = wc_IdeaCipher(&idea, data, v1_plain[i]);
+ if (ret != 0 || XMEMCMP(&v1_cipher[i], data, IDEA_BLOCK_SIZE)) {
+ printf("Bad encryption\n");
+ return -6601;
+ }
+
+ /* Set decryption key */
+ XMEMSET(&idea, 0, sizeof(Idea));
+ ret = wc_IdeaSetKey(&idea, v_key[i], IDEA_KEY_SIZE,
+ NULL, IDEA_DECRYPTION);
+ if (ret != 0) {
+ printf("wc_IdeaSetKey (dec) failed\n");
+ return -6602;
+ }
+
+ /* Data decryption */
+ ret = wc_IdeaCipher(&idea, data, data);
+ if (ret != 0 || XMEMCMP(v1_plain[i], data, IDEA_BLOCK_SIZE)) {
+ printf("Bad decryption\n");
+ return -6603;
+ }
+
+ /* Set encryption key */
+ XMEMSET(&idea, 0, sizeof(Idea));
+ ret = wc_IdeaSetKey(&idea, v_key[i], IDEA_KEY_SIZE,
+ v_key[i], IDEA_ENCRYPTION);
+ if (ret != 0) {
+ printf("wc_IdeaSetKey (enc) failed\n");
+ return -6604;
+ }
+
+ XMEMSET(msg_enc, 0, sizeof(msg_enc));
+ ret = wc_IdeaCbcEncrypt(&idea, msg_enc, (byte *)message,
+ (word32)XSTRLEN(message)+1);
+ if (ret != 0) {
+ printf("wc_IdeaCbcEncrypt failed\n");
+ return -6605;
+ }
+
+ /* Set decryption key */
+ XMEMSET(&idea, 0, sizeof(Idea));
+ ret = wc_IdeaSetKey(&idea, v_key[i], IDEA_KEY_SIZE,
+ v_key[i], IDEA_DECRYPTION);
+ if (ret != 0) {
+ printf("wc_IdeaSetKey (dec) failed\n");
+ return -6606;
+ }
+
+ XMEMSET(msg_dec, 0, sizeof(msg_dec));
+ ret = wc_IdeaCbcDecrypt(&idea, msg_dec, msg_enc,
+ (word32)XSTRLEN(message)+1);
+ if (ret != 0) {
+ printf("wc_IdeaCbcDecrypt failed\n");
+ return -6607;
+ }
+
+ if (XMEMCMP(message, msg_dec, (word32)XSTRLEN(message))) {
+ printf("Bad CBC decryption\n");
+ return -6608;
+ }
+ }
+
+ for (i = 0; i < IDEA_NB_TESTS_EXTRA; i++) {
+ /* Set encryption key */
+ XMEMSET(&idea, 0, sizeof(Idea));
+ ret = wc_IdeaSetKey(&idea, v_key[i], IDEA_KEY_SIZE,
+ NULL, IDEA_ENCRYPTION);
+ if (ret != 0) {
+ printf("wc_IdeaSetKey (enc) failed\n");
+ return -6609;
+ }
+
+ /* 100 times data encryption */
+ XMEMCPY(data, v1_plain[i], IDEA_BLOCK_SIZE);
+ for (j = 0; j < 100; j++) {
+ ret = wc_IdeaCipher(&idea, data, data);
+ if (ret != 0) {
+ return -6610;
+ }
+ }
+
+ if (XMEMCMP(v1_cipher_100[i], data, IDEA_BLOCK_SIZE)) {
+ printf("Bad encryption (100 times)\n");
+ return -6611;
+ }
+
+ /* 1000 times data encryption */
+ XMEMCPY(data, v1_plain[i], IDEA_BLOCK_SIZE);
+ for (j = 0; j < 1000; j++) {
+ ret = wc_IdeaCipher(&idea, data, data);
+ if (ret != 0) {
+ return -6612;
+ }
+ }
+
+ if (XMEMCMP(v1_cipher_1000[i], data, IDEA_BLOCK_SIZE)) {
+ printf("Bad encryption (100 times)\n");
+ return -6613;
+ }
+ }
+
+#ifndef WC_NO_RNG
+ /* random test for CBC */
+ {
+ WC_RNG rng;
+ byte key[IDEA_KEY_SIZE], iv[IDEA_BLOCK_SIZE],
+ rnd[1000], enc[1000], dec[1000];
+
+ /* random values */
+ #ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+ #else
+ ret = wc_InitRng(&rng);
+ #endif
+ if (ret != 0)
+ return -6614;
+
+ for (i = 0; i < 1000; i++) {
+ /* random key */
+ ret = wc_RNG_GenerateBlock(&rng, key, sizeof(key));
+ if (ret != 0)
+ return -6615;
+
+ /* random iv */
+ ret = wc_RNG_GenerateBlock(&rng, iv, sizeof(iv));
+ if (ret != 0)
+ return -6616;
+
+ /* random data */
+ ret = wc_RNG_GenerateBlock(&rng, rnd, sizeof(rnd));
+ if (ret != 0)
+ return -6617;
+
+ /* Set encryption key */
+ XMEMSET(&idea, 0, sizeof(Idea));
+ ret = wc_IdeaSetKey(&idea, key, IDEA_KEY_SIZE, iv, IDEA_ENCRYPTION);
+ if (ret != 0) {
+ printf("wc_IdeaSetKey (enc) failed\n");
+ return -6618;
+ }
+
+ /* Data encryption */
+ XMEMSET(enc, 0, sizeof(enc));
+ ret = wc_IdeaCbcEncrypt(&idea, enc, rnd, sizeof(rnd));
+ if (ret != 0) {
+ printf("wc_IdeaCbcEncrypt failed\n");
+ return -6619;
+ }
+
+ /* Set decryption key */
+ XMEMSET(&idea, 0, sizeof(Idea));
+ ret = wc_IdeaSetKey(&idea, key, IDEA_KEY_SIZE, iv, IDEA_DECRYPTION);
+ if (ret != 0) {
+ printf("wc_IdeaSetKey (enc) failed\n");
+ return -6620;
+ }
+
+ /* Data decryption */
+ XMEMSET(dec, 0, sizeof(dec));
+ ret = wc_IdeaCbcDecrypt(&idea, dec, enc, sizeof(enc));
+ if (ret != 0) {
+ printf("wc_IdeaCbcDecrypt failed\n");
+ return -6621;
+ }
+
+ if (XMEMCMP(rnd, dec, sizeof(rnd))) {
+ printf("Bad CBC decryption\n");
+ return -6622;
+ }
+ }
+
+ wc_FreeRng(&rng);
+ }
+#endif /* WC_NO_RNG */
+
+ return 0;
+}
+#endif /* HAVE_IDEA */
+
+
+#ifndef WC_NO_RNG
+static int _rng_test(WC_RNG* rng, int errorOffset)
+{
+ byte block[32];
+ int ret, i;
+
+ XMEMSET(block, 0, sizeof(block));
+
+ ret = wc_RNG_GenerateBlock(rng, block, sizeof(block));
+ if (ret != 0) {
+ ret = -6623;
+ goto exit;
+ }
+
+ /* Check for 0's */
+ for (i=0; i<(int)sizeof(block); i++) {
+ if (block[i] == 0) {
+ ret++;
+ }
+ }
+ /* All zeros count check */
+ if (ret >= (int)sizeof(block)) {
+ ret = -6624;
+ goto exit;
+ }
+
+ ret = wc_RNG_GenerateByte(rng, block);
+ if (ret != 0) {
+ ret = -6625;
+ goto exit;
+ }
+
+ /* Parameter validation testing. */
+ ret = wc_RNG_GenerateBlock(NULL, block, sizeof(block));
+ if (ret != BAD_FUNC_ARG) {
+ ret = -6626;
+ goto exit;
+ }
+ ret = wc_RNG_GenerateBlock(rng, NULL, sizeof(block));
+ if (ret != BAD_FUNC_ARG) {
+ ret = -6627;
+ goto exit;
+ }
+
+ ret = wc_RNG_GenerateByte(NULL, block);
+ if (ret != BAD_FUNC_ARG) {
+ ret = -6628;
+ goto exit;
+ }
+ ret = wc_RNG_GenerateByte(rng, NULL);
+ if (ret != BAD_FUNC_ARG) {
+ ret = -6629;
+ goto exit;
+ }
+
+ ret = 0;
+
+exit:
+ if (ret != 0)
+ ret += errorOffset;
+
+ return ret;
+}
+
+
+static int random_rng_test(void)
+{
+ WC_RNG localRng;
+ WC_RNG* rng;
+ int ret;
+
+ rng = &localRng;
+ /* Test stack based RNG. */
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(rng, HEAP_HINT, devId);
+#else
+ ret = wc_InitRng(rng);
+#endif
+ if (ret != 0) return -6700;
+
+ ret = _rng_test(rng, -6300);
+
+ /* Make sure and free RNG */
+ wc_FreeRng(rng);
+
+ if (ret != 0) return ret;
+
+#if !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)
+ {
+ byte nonce[8] = { 0 };
+ /* Test dynamic RNG. */
+ rng = wc_rng_new(nonce, (word32)sizeof(nonce), HEAP_HINT);
+ if (rng == NULL) return -6701;
+
+ ret = _rng_test(rng, -6310);
+
+ wc_rng_free(rng);
+ }
+#endif
+
+ return ret;
+}
+
+#if defined(HAVE_HASHDRBG) && !defined(CUSTOM_RAND_GENERATE_BLOCK)
int random_test(void)
{
@@ -3184,53 +9984,240 @@ int random_test(void)
0xa1, 0x80, 0x18, 0x3a, 0x07, 0xdf, 0xae, 0x17
};
- byte output[SHA256_DIGEST_SIZE * 4];
+ byte output[WC_SHA256_DIGEST_SIZE * 4];
int ret;
ret = wc_RNG_HealthTest(0, test1Entropy, sizeof(test1Entropy), NULL, 0,
output, sizeof(output));
if (ret != 0)
- return -39;
+ return -6800;
if (XMEMCMP(test1Output, output, sizeof(output)) != 0)
- return -40;
+ return -6801;
ret = wc_RNG_HealthTest(1, test2EntropyA, sizeof(test2EntropyA),
test2EntropyB, sizeof(test2EntropyB),
output, sizeof(output));
if (ret != 0)
- return -41;
+ return -6802;
if (XMEMCMP(test2Output, output, sizeof(output)) != 0)
- return -42;
+ return -6803;
+ /* Basic RNG generate block test */
+ if ((ret = random_rng_test()) != 0)
+ return ret;
+
+ /* Test the seed check function. */
+#if !(defined(HAVE_FIPS) || defined(HAVE_SELFTEST)) || \
+ (defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2))
+ {
+ word32 i, outputSz;
+
+ /* Repeat the same byte over and over. Should fail. */
+ outputSz = sizeof(output);
+ XMEMSET(output, 1, outputSz);
+ ret = wc_RNG_TestSeed(output, outputSz);
+ if (ret == 0)
+ return -6804;
+
+ /* Every byte of the entropy scratch is different,
+ * entropy is a single byte that shouldn't match. */
+ outputSz = (sizeof(word32) * 2) + 1;
+ for (i = 0; i < outputSz; i++)
+ output[i] = (byte)i;
+ ret = wc_RNG_TestSeed(output, outputSz);
+ if (ret != 0)
+ return -6805;
+
+ outputSz = sizeof(output);
+ for (i = 0; i < outputSz; i++)
+ output[i] = (byte)i;
+ ret = wc_RNG_TestSeed(output, outputSz);
+ if (ret != 0)
+ return -6806;
+ }
+#endif
return 0;
}
-#else /* HAVE_HASHDRBG || NO_RC4 */
+#else
int random_test(void)
{
- RNG rng;
- byte block[32];
- int ret;
+ /* Basic RNG generate block test */
+ return random_rng_test();
+}
-#ifdef HAVE_CAVIUM
- ret = wc_InitRngCavium(&rng, CAVIUM_DEV_ID);
- if (ret != 0) return -2007;
-#endif
- ret = wc_InitRng(&rng);
- if (ret != 0) return -39;
+#endif /* HAVE_HASHDRBG && !CUSTOM_RAND_GENERATE_BLOCK */
+#endif /* WC_NO_RNG */
- ret = wc_RNG_GenerateBlock(&rng, block, sizeof(block));
- if (ret != 0) return -40;
+#ifndef MEM_TEST_SZ
+ #define MEM_TEST_SZ 1024
+#endif
- wc_FreeRng(&rng);
+#if defined(WOLFSSL_STATIC_MEMORY) || !defined(WOLFSSL_NO_MALLOC)
+static int simple_mem_test(int sz)
+{
+ int ret = 0;
+ byte* b;
+ int i;
- return 0;
+ b = (byte*)XMALLOC(sz, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (b == NULL) {
+ return -6900;
+ }
+ /* utilize memory */
+ for (i = 0; i < sz; i++) {
+ b[i] = (byte)i;
+ }
+ /* read back and verify */
+ for (i = 0; i < sz; i++) {
+ if (b[i] != (byte)i) {
+ ret = -6901;
+ break;
+ }
+ }
+ XFREE(b, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ return ret;
}
+#endif
+
+int memory_test(void)
+{
+ int ret = 0;
+#ifndef USE_FAST_MATH
+ byte* b = NULL;
+#endif
+#if defined(COMPLEX_MEM_TEST) || defined(WOLFSSL_STATIC_MEMORY)
+ int i;
+#endif
+#ifdef WOLFSSL_STATIC_MEMORY
+ word32 size[] = { WOLFMEM_BUCKETS };
+ word32 dist[] = { WOLFMEM_DIST };
+ byte buffer[30000]; /* make large enough to involve many bucket sizes */
+ int pad = -(int)((wolfssl_word)buffer) & (WOLFSSL_STATIC_ALIGN - 1);
+ /* pad to account for if head of buffer is not at set memory
+ * alignment when tests are ran */
+#endif
+
+#ifdef WOLFSSL_STATIC_MEMORY
+ /* check macro settings */
+ if (sizeof(size)/sizeof(word32) != WOLFMEM_MAX_BUCKETS) {
+ return -7000;
+ }
+
+ if (sizeof(dist)/sizeof(word32) != WOLFMEM_MAX_BUCKETS) {
+ return -7001;
+ }
+
+ for (i = 0; i < WOLFMEM_MAX_BUCKETS; i++) {
+ if ((size[i] % WOLFSSL_STATIC_ALIGN) != 0) {
+ /* each element in array should be divisible by alignment size */
+ return -7002;
+ }
+ }
+
+ for (i = 1; i < WOLFMEM_MAX_BUCKETS; i++) {
+ if (size[i - 1] >= size[i]) {
+ return -7003; /* sizes should be in increasing order */
+ }
+ }
+
+ /* check that padding size returned is possible */
+ if (wolfSSL_MemoryPaddingSz() < WOLFSSL_STATIC_ALIGN) {
+ return -7004; /* no room for wc_Memory struct */
+ }
+
+ if (wolfSSL_MemoryPaddingSz() < 0) {
+ return -7005;
+ }
+
+ if (wolfSSL_MemoryPaddingSz() % WOLFSSL_STATIC_ALIGN != 0) {
+ return -7006; /* not aligned! */
+ }
+
+ /* check function to return optimum buffer size (rounded down) */
+ ret = wolfSSL_StaticBufferSz(buffer, sizeof(buffer), WOLFMEM_GENERAL);
+ if ((ret - pad) % WOLFSSL_STATIC_ALIGN != 0) {
+ return -7007; /* not aligned! */
+ }
+
+ if (ret < 0) {
+ return -7008;
+ }
+
+ if ((unsigned int)ret > sizeof(buffer)) {
+ return -7009; /* did not round down as expected */
+ }
+
+ if (ret != wolfSSL_StaticBufferSz(buffer, ret, WOLFMEM_GENERAL)) {
+ return -7010; /* return value changed when using suggested value */
+ }
+
+ ret = wolfSSL_MemoryPaddingSz();
+ ret += pad; /* add space that is going to be needed if buffer not aligned */
+ if (wolfSSL_StaticBufferSz(buffer, size[0] + ret + 1, WOLFMEM_GENERAL) !=
+ (ret + (int)size[0])) {
+ return -7011; /* did not round down to nearest bucket value */
+ }
+
+ ret = wolfSSL_StaticBufferSz(buffer, sizeof(buffer), WOLFMEM_IO_POOL);
+ if ((ret - pad) < 0) {
+ return -7012;
+ }
+
+ if (((ret - pad) % (WOLFMEM_IO_SZ + wolfSSL_MemoryPaddingSz())) != 0) {
+ return -7013; /* not even chunks of memory for IO size */
+ }
+
+ if (((ret - pad) % WOLFSSL_STATIC_ALIGN) != 0) {
+ return -7014; /* memory not aligned */
+ }
+
+ /* check for passing bad or unknown arguments to functions */
+ if (wolfSSL_StaticBufferSz(NULL, 1, WOLFMEM_GENERAL) > 0) {
+ return -7015;
+ }
+
+ if (wolfSSL_StaticBufferSz(buffer, 1, WOLFMEM_GENERAL) != 0) {
+ return -7016; /* should round to 0 since struct + bucket will not fit */
+ }
+
+ (void)dist; /* avoid static analysis warning of variable not used */
+#endif
+
+#if defined(WOLFSSL_STATIC_MEMORY) || !defined(WOLFSSL_NO_MALLOC)
+ /* simple test */
+ ret = simple_mem_test(MEM_TEST_SZ);
+ if (ret != 0)
+ return ret;
+#endif
+
+#ifdef COMPLEX_MEM_TEST
+ /* test various size blocks */
+ for (i = 1; i < MEM_TEST_SZ; i*=2) {
+ ret = simple_mem_test(i);
+ if (ret != 0)
+ return ret;
+ }
+#endif
+
+#ifndef USE_FAST_MATH
+ /* realloc test */
+ b = (byte*)XMALLOC(MEM_TEST_SZ, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (b) {
+ b = (byte*)XREALLOC(b, MEM_TEST_SZ+sizeof(word32), HEAP_HINT,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ }
+ if (b == NULL) {
+ return -7017;
+ }
+ XFREE(b, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
-#endif /* HAVE_HASHDRBG || NO_RC4 */
+ return ret;
+}
#ifdef HAVE_NTRU
@@ -3239,7 +10226,7 @@ byte GetEntropy(ENTROPY_CMD cmd, byte* out);
byte GetEntropy(ENTROPY_CMD cmd, byte* out)
{
- static RNG rng;
+ static WC_RNG rng;
if (cmd == INIT)
return (wc_InitRng(&rng) == 0) ? 1 : 0;
@@ -3260,708 +10247,3347 @@ byte GetEntropy(ENTROPY_CMD cmd, byte* out)
#endif /* HAVE_NTRU */
-#ifndef NO_RSA
+#ifndef NO_FILESYSTEM
+
+/* Cert Paths */
+#ifdef FREESCALE_MQX
+ #define CERT_PREFIX "a:\\"
+ #define CERT_PATH_SEP "\\"
+#elif defined(WOLFSSL_uTKERNEL2)
+ #define CERT_PREFIX "/uda/"
+ #define CERT_PATH_SEP "/"
+#else
+ #define CERT_PREFIX "./"
+ #define CERT_PATH_SEP "/"
+#endif
+#define CERT_ROOT CERT_PREFIX "certs" CERT_PATH_SEP
+
+/* Generated Test Certs */
+#if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048) && \
+ !defined(USE_CERT_BUFFERS_3072) && !defined(USE_CERT_BUFFERS_4096)
+ #if !defined(NO_RSA) && !defined(NO_ASN)
+ static const char* clientKey = CERT_ROOT "client-key.der";
+ static const char* clientCert = CERT_ROOT "client-cert.der";
+ #ifdef WOLFSSL_CERT_EXT
+ static const char* clientKeyPub = CERT_ROOT "client-keyPub.der";
+ #endif
+ #endif /* !NO_RSA && !NO_ASN */
+#endif
#if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048)
- #ifdef FREESCALE_MQX
- static const char* clientKey = "a:\\certs\\client-key.der";
- static const char* clientCert = "a:\\certs\\client-cert.der";
- #ifdef WOLFSSL_CERT_GEN
- static const char* caKeyFile = "a:\\certs\\ca-key.der";
- static const char* caCertFile = "a:\\certs\\ca-cert.pem";
- #ifdef HAVE_ECC
- static const char* eccCaKeyFile = "a:\\certs\\ecc-key.der";
- static const char* eccCaCertFile = "a:\\certs\\server-ecc.pem";
+ #if !defined(NO_RSA) && !defined(NO_ASN)
+ #if defined(WOLFSSL_CERT_GEN) || defined(HAVE_PKCS7)
+ static const char* rsaCaKeyFile = CERT_ROOT "ca-key.der";
+ #ifdef WOLFSSL_CERT_GEN
+ static const char* rsaCaCertFile = CERT_ROOT "ca-cert.pem";
#endif
- #endif
- #elif defined(WOLFSSL_MKD_SHELL)
- static char* clientKey = "certs/client-key.der";
- static char* clientCert = "certs/client-cert.der";
- void set_clientKey(char *key) { clientKey = key ; }
- void set_clientCert(char *cert) { clientCert = cert ; }
- #ifdef WOLFSSL_CERT_GEN
- static char* caKeyFile = "certs/ca-key.der";
- static char* caCertFile = "certs/ca-cert.pem";
- void set_caKeyFile (char * key) { caKeyFile = key ; }
- void set_caCertFile(char * cert) { caCertFile = cert ; }
- #ifdef HAVE_ECC
- static const char* eccCaKeyFile = "certs/ecc-key.der";
- static const char* eccCaCertFile = "certs/server-ecc.pem";
- void set_eccCaKeyFile (char * key) { eccCaKeyFile = key ; }
- void set_eccCaCertFile(char * cert) { eccCaCertFile = cert ; }
+ #if defined(WOLFSSL_ALT_NAMES) || defined(HAVE_PKCS7)
+ static const char* rsaCaCertDerFile = CERT_ROOT "ca-cert.der";
+ #endif
+ #ifdef HAVE_PKCS7
+ static const char* rsaServerCertDerFile =
+ CERT_ROOT "server-cert.der";
+ static const char* rsaServerKeyDerFile =
+ CERT_ROOT "server-key.der";
#endif
#endif
- #else
- static const char* clientKey = "./certs/client-key.der";
- static const char* clientCert = "./certs/client-cert.der";
- #ifdef WOLFSSL_CERT_GEN
- static const char* caKeyFile = "./certs/ca-key.der";
- static const char* caCertFile = "./certs/ca-cert.pem";
- #ifdef HAVE_ECC
- static const char* eccCaKeyFile = "./certs/ecc-key.der";
- static const char* eccCaCertFile = "./certs/server-ecc.pem";
+ #endif /* !NO_RSA && !NO_ASN */
+#endif /* !USE_CERT_BUFFER_* */
+#if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048) && \
+ !defined(USE_CERT_BUFFERS_3072) && !defined(USE_CERT_BUFFERS_4096) && \
+ !defined(NO_ASN)
+ #ifndef NO_DH
+ static const char* dhKey = CERT_ROOT "dh2048.der";
+ #endif
+ #ifndef NO_DSA
+ static const char* dsaKey = CERT_ROOT "dsa2048.der";
+ #endif
+#endif /* !USE_CERT_BUFFER_* */
+#if !defined(USE_CERT_BUFFERS_256)
+ #ifdef HAVE_ECC
+ /* cert files to be used in rsa cert gen test, check if RSA enabled */
+ #ifdef HAVE_ECC_KEY_IMPORT
+ static const char* eccKeyDerFile = CERT_ROOT "ecc-key.der";
+ #endif
+#endif
+#if !defined(USE_CERT_BUFFERS_256) && !defined(NO_ASN)
+ #if defined(HAVE_ECC) && defined(WOLFSSL_CERT_GEN)
+ #ifndef NO_RSA
+ /* eccKeyPubFile is used in a test that requires RSA. */
+ static const char* eccKeyPubFile = CERT_ROOT "ecc-keyPub.der";
#endif
+ static const char* eccCaKeyFile = CERT_ROOT "ca-ecc-key.der";
+ static const char* eccCaCertFile = CERT_ROOT "ca-ecc-cert.pem";
+ #ifdef ENABLE_ECC384_CERT_GEN_TEST
+ static const char* eccCaKey384File =
+ CERT_ROOT "ca-ecc384-key.der";
+ static const char* eccCaCert384File =
+ CERT_ROOT "ca-ecc384-cert.pem";
+ #endif
+ #endif
+ #if defined(HAVE_PKCS7) && defined(HAVE_ECC)
+ static const char* eccClientKey = CERT_ROOT "ecc-client-key.der";
+ static const char* eccClientCert = CERT_ROOT "client-ecc-cert.der";
+ #endif
+ #endif /* HAVE_ECC */
+ #ifdef HAVE_ED25519
+ #ifdef WOLFSSL_TEST_CERT
+ static const char* serverEd25519Cert =
+ CERT_ROOT "ed25519/server-ed25519.der";
+ static const char* caEd25519Cert =
+ CERT_ROOT "ed25519/ca-ed25519.der";
#endif
#endif
+ #ifdef HAVE_ED448
+ #ifdef WOLFSSL_TEST_CERT
+ static const char* serverEd448Cert =
+ CERT_ROOT "ed448/server-ed448.der";
+ static const char* caEd448Cert = CERT_ROOT "ed448/ca-ed448.der";
+ #endif
+ #endif
+#endif /* !USE_CERT_BUFFER_* */
+
+#ifndef NO_WRITE_TEMP_FILES
+#ifdef HAVE_ECC
+ #ifdef WOLFSSL_CERT_GEN
+ static const char* certEccPemFile = CERT_PREFIX "certecc.pem";
+ #endif
+ #if defined(WOLFSSL_CERT_GEN) && !defined(NO_RSA)
+ static const char* certEccRsaPemFile = CERT_PREFIX "certeccrsa.pem";
+ static const char* certEccRsaDerFile = CERT_PREFIX "certeccrsa.der";
+ #endif
+ #ifdef WOLFSSL_KEY_GEN
+ static const char* eccCaKeyPemFile = CERT_PREFIX "ecc-key.pem";
+ static const char* eccPubKeyDerFile = CERT_PREFIX "ecc-public-key.der";
+ static const char* eccCaKeyTempFile = CERT_PREFIX "ecc-key.der";
+ static const char* eccPkcs8KeyDerFile = CERT_PREFIX "ecc-key-pkcs8.der";
+ #endif
+ #if defined(WOLFSSL_CERT_GEN) || \
+ (defined(WOLFSSL_CERT_EXT) && defined(WOLFSSL_TEST_CERT))
+ static const char* certEccDerFile = CERT_PREFIX "certecc.der";
+ #endif
+#endif /* HAVE_ECC */
+
+#ifndef NO_RSA
+ #if defined(WOLFSSL_CERT_GEN) || \
+ (defined(WOLFSSL_CERT_EXT) && defined(WOLFSSL_TEST_CERT))
+ static const char* otherCertDerFile = CERT_PREFIX "othercert.der";
+ static const char* certDerFile = CERT_PREFIX "cert.der";
+ #endif
+ #ifdef WOLFSSL_CERT_GEN
+ static const char* otherCertPemFile = CERT_PREFIX "othercert.pem";
+ static const char* certPemFile = CERT_PREFIX "cert.pem";
+ #endif
+ #ifdef WOLFSSL_CERT_REQ
+ static const char* certReqDerFile = CERT_PREFIX "certreq.der";
+ static const char* certReqPemFile = CERT_PREFIX "certreq.pem";
+ #endif
+#endif /* !NO_RSA */
+
+#if !defined(NO_RSA) || !defined(NO_DSA)
+ #ifdef WOLFSSL_KEY_GEN
+ static const char* keyDerFile = CERT_PREFIX "key.der";
+ static const char* keyPemFile = CERT_PREFIX "key.pem";
+ #endif
#endif
+#endif /* !NO_WRITE_TEMP_FILES */
+#endif /* !NO_FILESYSTEM */
-int rsa_test(void)
+
+#if defined(WOLFSSL_CERT_GEN) && (!defined(NO_RSA) || defined(HAVE_ECC)) || \
+ (defined(WOLFSSL_TEST_CERT) && (defined(HAVE_ED25519) || defined(HAVE_ED448)))
+#ifdef WOLFSSL_MULTI_ATTRIB
+static CertName certDefaultName;
+static void initDefaultName(void)
{
- byte* tmp;
- size_t bytes;
- RsaKey key;
- RNG rng;
- word32 idx = 0;
- int ret;
- byte in[] = "Everyone gets Friday off.";
- word32 inLen = (word32)strlen((char*)in);
- byte out[256];
- byte plain[256];
-#if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048)
- FILE* file, * file2;
+ XMEMCPY(certDefaultName.country, "US", sizeof("US"));
+ certDefaultName.countryEnc = CTC_PRINTABLE;
+ XMEMCPY(certDefaultName.state, "Oregon", sizeof("Oregon"));
+ certDefaultName.stateEnc = CTC_UTF8;
+ XMEMCPY(certDefaultName.locality, "Portland", sizeof("Portland"));
+ certDefaultName.localityEnc = CTC_UTF8;
+ XMEMCPY(certDefaultName.sur, "Test", sizeof("Test"));
+ certDefaultName.surEnc = CTC_UTF8;
+ XMEMCPY(certDefaultName.org, "wolfSSL", sizeof("wolfSSL"));
+ certDefaultName.orgEnc = CTC_UTF8;
+ XMEMCPY(certDefaultName.unit, "Development", sizeof("Development"));
+ certDefaultName.unitEnc = CTC_UTF8;
+ XMEMCPY(certDefaultName.commonName, "www.wolfssl.com", sizeof("www.wolfssl.com"));
+ certDefaultName.commonNameEnc = CTC_UTF8;
+ XMEMCPY(certDefaultName.serialDev, "wolfSSL12345", sizeof("wolfSSL12345"));
+ certDefaultName.serialDevEnc = CTC_PRINTABLE;
+#ifdef WOLFSSL_CERT_EXT
+ XMEMCPY(certDefaultName.busCat, "Private Organization", sizeof("Private Organization"));
+ certDefaultName.busCatEnc = CTC_UTF8;
#endif
+ XMEMCPY(certDefaultName.email, "info@wolfssl.com", sizeof("info@wolfssl.com"));
+
#ifdef WOLFSSL_TEST_CERT
- DecodedCert cert;
+ {
+ NameAttrib* n;
+ /* test having additional OUs and setting DC */
+ n = &certDefaultName.name[0];
+ n->id = ASN_ORGUNIT_NAME;
+ n->type = CTC_UTF8;
+ n->sz = sizeof("Development-2");
+ XMEMCPY(n->value, "Development-2", sizeof("Development-2"));
+
+ #if CTC_MAX_ATTRIB > 3
+ n = &certDefaultName.name[1];
+ n->id = ASN_DOMAIN_COMPONENT;
+ n->type = CTC_UTF8;
+ n->sz = sizeof("com");
+ XMEMCPY(n->value, "com", sizeof("com"));
+
+ n = &certDefaultName.name[2];
+ n->id = ASN_DOMAIN_COMPONENT;
+ n->type = CTC_UTF8;
+ n->sz = sizeof("wolfssl");
+ XMEMCPY(n->value, "wolfssl", sizeof("wolfssl"));
+
+ #endif
+ }
+#endif /* WOLFSSL_TEST_CERT */
+}
+#else
+static const CertName certDefaultName = {
+ "US", CTC_PRINTABLE, /* country */
+ "Oregon", CTC_UTF8, /* state */
+ "Portland", CTC_UTF8, /* locality */
+ "Test", CTC_UTF8, /* sur */
+ "wolfSSL", CTC_UTF8, /* org */
+ "Development", CTC_UTF8, /* unit */
+ "www.wolfssl.com", CTC_UTF8, /* commonName */
+ "wolfSSL12345", CTC_PRINTABLE, /* serial number of device */
+#ifdef WOLFSSL_CERT_EXT
+ "Private Organization", CTC_UTF8, /* businessCategory */
+ "US", CTC_PRINTABLE, /* jurisdiction country */
+ "Oregon", CTC_PRINTABLE, /* jurisdiction state */
#endif
+ "info@wolfssl.com" /* email */
+};
+#endif /* WOLFSSL_MULTI_ATTRIB */
- tmp = (byte*)malloc(FOURK_BUF);
- if (tmp == NULL)
- return -40;
+#ifdef WOLFSSL_CERT_EXT
+ #if ((defined(HAVE_ED25519) || defined(HAVE_ED448)) && \
+ defined(WOLFSSL_TEST_CERT)) || defined(HAVE_ECC)
+ static const char certKeyUsage[] =
+ "digitalSignature,nonRepudiation";
+ #endif
+ #if (defined(WOLFSSL_CERT_REQ) || defined(HAVE_NTRU)) && !defined(NO_RSA)
+ static const char certKeyUsage2[] =
+ "digitalSignature,nonRepudiation,keyEncipherment,keyAgreement";
+ #endif
+#endif /* WOLFSSL_CERT_EXT */
+#endif /* WOLFSSL_CERT_GEN */
-#ifdef USE_CERT_BUFFERS_1024
- XMEMCPY(tmp, client_key_der_1024, sizeof_client_key_der_1024);
- bytes = sizeof_client_key_der_1024;
-#elif defined(USE_CERT_BUFFERS_2048)
- XMEMCPY(tmp, client_key_der_2048, sizeof_client_key_der_2048);
- bytes = sizeof_client_key_der_2048;
-#else
- file = fopen(clientKey, "rb");
+#ifndef NO_RSA
- if (!file) {
- err_sys("can't open ./certs/client-key.der, "
- "Please run from wolfSSL home dir", -40);
- free(tmp);
- return -40;
- }
+#if !defined(NO_ASN_TIME) && !defined(NO_RSA) && defined(WOLFSSL_TEST_CERT) && \
+ !defined(NO_FILESYSTEM)
+static byte minSerial[] = { 0x02, 0x01, 0x01 };
+static byte minName[] = { 0x30, 0x00 };
+static byte nameBad[] = {
+ 0x30, 0x08,
+ 0x31, 0x06,
+ 0x30, 0x04,
+ 0x06, 0x02,
+ 0x55, 0x04,
+};
+static byte minDates[] = {
+ 0x30, 0x1e,
+ 0x17, 0x0d,
+ 0x31, 0x38, 0x30, 0x34, 0x31, 0x33, 0x31, 0x35,
+ 0x32, 0x33, 0x31, 0x30, 0x5a,
+ 0x17, 0x0d,
+ 0x32, 0x31, 0x30, 0x31, 0x30, 0x37, 0x31, 0x35,
+ 0x32, 0x33, 0x31, 0x30, 0x5a
+};
+static byte minPubKey[] = {
+ 0x30, 0x1b,
+ 0x30, 0x0d,
+ 0x06, 0x09,
+ 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01,
+ 0x01,
+ 0x05, 0x00,
+ 0x03, 0x0b,
+ 0x00, 0x30, 0x08,
+ 0x02, 0x01,
+ 0x03,
+ 0x02, 0x03,
+ 0x01, 0x00, 0x01
+};
+static byte minSigAlg[] = {
+ 0x30, 0x0d,
+ 0x06, 0x09,
+ 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01,
+ 0x0b,
+ 0x05, 0x00
+};
+static byte minSig[] = {
+ 0x03, 0x01,
+ 0x00
+};
- bytes = fread(tmp, 1, FOURK_BUF, file);
- fclose(file);
-#endif /* USE_CERT_BUFFERS */
+static int add_seq(byte* certData, int offset, byte* data, byte length)
+{
+ XMEMMOVE(certData + offset + 2, data, length);
+ certData[offset++] = 0x30;
+ certData[offset++] = length;
+ return offset + length;
+}
+static int add_data(byte* certData, int offset, byte* data, byte length)
+{
+ XMEMCPY(certData + offset, data, length);
+ return offset + length;
+}
-#ifdef HAVE_CAVIUM
- wc_RsaInitCavium(&key, CAVIUM_DEV_ID);
-#endif
- ret = wc_InitRsaKey(&key, 0);
+static int cert_asn1_test(void)
+{
+ int ret;
+ int len[3];
+ DecodedCert cert;
+ byte certData[106];
+ byte* badCert = NULL;
+
+ len[2] = add_data(certData, 0, minSerial, (byte)sizeof(minSerial));
+ len[2] = add_data(certData, len[2], minSigAlg, (byte)sizeof(minSigAlg));
+ len[2] = add_data(certData, len[2], minName, (byte)sizeof(minName));
+ len[2] = add_data(certData, len[2], minDates, (byte)sizeof(minDates));
+ len[2] = add_data(certData, len[2], minName, (byte)sizeof(minName));
+ len[2] = add_data(certData, len[2], minPubKey, (byte)sizeof(minPubKey));
+ len[1] = add_seq(certData, 0, certData, len[2]);
+ len[1] = add_data(certData, len[1], minSigAlg, (byte)sizeof(minSigAlg));
+ len[1] = add_data(certData, len[1], minSig, (byte)sizeof(minSig));
+ len[0] = add_seq(certData, 0, certData, len[1]);
+
+ /* Minimal good certificate */
+ InitDecodedCert(&cert, certData, len[0], 0);
+ ret = ParseCert(&cert, CERT_TYPE, NO_VERIFY, NULL);
+ FreeDecodedCert(&cert);
if (ret != 0) {
- free(tmp);
- return -39;
+ ERROR_OUT(-7100, done);
}
- ret = wc_RsaPrivateKeyDecode(tmp, &idx, &key, (word32)bytes);
- if (ret != 0) {
- free(tmp);
- return -41;
+
+ /* Bad issuer name */
+ len[2] = add_data(certData, 0, minSerial, (byte)sizeof(minSerial));
+ len[2] = add_data(certData, len[2], minSigAlg, (byte)sizeof(minSigAlg));
+ len[2] = add_data(certData, len[2], nameBad, (byte)sizeof(nameBad));
+ len[1] = add_seq(certData, 0, certData, len[2]);
+ len[0] = add_seq(certData, 0, certData, len[1]);
+ /* Put data into allocated buffer to allow access error checking. */
+ badCert = (byte*)XMALLOC(len[0], HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XMEMCPY(badCert, certData, len[0]);
+ InitDecodedCert(&cert, badCert, len[0], 0);
+ ret = ParseCert(&cert, CERT_TYPE, NO_VERIFY, NULL);
+ FreeDecodedCert(&cert);
+ if (ret != ASN_PARSE_E) {
+ ERROR_OUT(-7101, done);
}
- ret = wc_InitRng(&rng);
- if (ret != 0) {
- free(tmp);
- return -42;
+ XFREE(badCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ badCert = NULL;
+ ret = 0;
+
+done:
+ if (badCert != NULL)
+ XFREE(badCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ return ret;
+}
+
+int cert_test(void)
+{
+#if !defined(NO_FILESYSTEM)
+ DecodedCert cert;
+ byte* tmp;
+ size_t bytes;
+ XFILE file;
+ int ret;
+
+ tmp = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (tmp == NULL)
+ return -7200;
+
+ /* Certificate with Name Constraints extension. */
+#ifdef FREESCALE_MQX
+ file = XFOPEN(".\\certs\\test\\cert-ext-nc.der", "rb");
+#else
+ file = XFOPEN("./certs/test/cert-ext-nc.der", "rb");
+#endif
+ if (!file) {
+ ERROR_OUT(-7201, done);
}
- ret = wc_RsaPublicEncrypt(in, inLen, out, sizeof(out), &key, &rng);
- if (ret < 0) {
- free(tmp);
- return -43;
+ bytes = XFREAD(tmp, 1, FOURK_BUF, file);
+ XFCLOSE(file);
+ InitDecodedCert(&cert, tmp, (word32)bytes, 0);
+ ret = ParseCert(&cert, CERT_TYPE, NO_VERIFY, NULL);
+ if (ret != 0) {
+ ERROR_OUT(-7202, done);
}
- ret = wc_RsaPrivateDecrypt(out, ret, plain, sizeof(plain), &key);
- if (ret < 0) {
- free(tmp);
- return -44;
+ FreeDecodedCert(&cert);
+
+ /* Certificate with Inhibit Any Policy extension. */
+#ifdef FREESCALE_MQX
+ file = XFOPEN(".\\certs\\test\\cert-ext-ia.der", "rb");
+#else
+ file = XFOPEN("./certs/test/cert-ext-ia.der", "rb");
+#endif
+ if (!file) {
+ ERROR_OUT(-7203, done);
}
- if (memcmp(plain, in, inLen)) {
- free(tmp);
- return -45;
+ bytes = XFREAD(tmp, 1, FOURK_BUF, file);
+ XFCLOSE(file);
+ InitDecodedCert(&cert, tmp, (word32)bytes, 0);
+ ret = ParseCert(&cert, CERT_TYPE, NO_VERIFY, NULL);
+ if (ret != 0) {
+ ERROR_OUT(-7204, done);
}
- ret = wc_RsaSSL_Sign(in, inLen, out, sizeof(out), &key, &rng);
- if (ret < 0) {
- free(tmp);
- return -46;
+ FreeDecodedCert(&cert);
+
+ /* Certificate with Netscape Certificate Type extension. */
+#ifdef FREESCALE_MQX
+ file = XFOPEN(".\\certs\\test\\cert-ext-nct.der", "rb");
+#else
+ file = XFOPEN("./certs/test/cert-ext-nct.der", "rb");
+#endif
+ if (!file) {
+ ERROR_OUT(-7203, done);
}
- memset(plain, 0, sizeof(plain));
- ret = wc_RsaSSL_Verify(out, ret, plain, sizeof(plain), &key);
- if (ret < 0) {
- free(tmp);
- return -47;
+ bytes = XFREAD(tmp, 1, FOURK_BUF, file);
+ XFCLOSE(file);
+ InitDecodedCert(&cert, tmp, (word32)bytes, 0);
+ ret = ParseCert(&cert, CERT_TYPE, NO_VERIFY, NULL);
+#ifndef IGNORE_NETSCAPE_CERT_TYPE
+ if (ret != 0) {
+ ERROR_OUT(-7204, done);
}
- if (memcmp(plain, in, ret)) {
- free(tmp);
- return -48;
+#else
+ if (ret != ASN_CRIT_EXT_E) {
+ ERROR_OUT(-7205, done);
}
-#if defined(WOLFSSL_MDK_ARM)
- #define sizeof(s) strlen((char *)(s))
+ ret = 0;
#endif
-#ifdef USE_CERT_BUFFERS_1024
- XMEMCPY(tmp, client_cert_der_1024, sizeof_client_cert_der_1024);
- bytes = sizeof_client_cert_der_1024;
-#elif defined(USE_CERT_BUFFERS_2048)
- XMEMCPY(tmp, client_cert_der_2048, sizeof_client_cert_der_2048);
- bytes = sizeof_client_cert_der_2048;
+done:
+ FreeDecodedCert(&cert);
+ XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+#endif /* !NO_FILESYSTEM */
+
+ if (ret == 0)
+ ret = cert_asn1_test();
+
+ return ret;
+}
+#endif /* WOLFSSL_TEST_CERT */
+
+#if defined(WOLFSSL_CERT_EXT) && defined(WOLFSSL_TEST_CERT) && \
+ !defined(NO_FILESYSTEM)
+int certext_test(void)
+{
+ DecodedCert cert;
+ byte* tmp;
+ size_t bytes;
+ XFILE file;
+ int ret;
+
+ /* created from rsa_test : othercert.der */
+ byte skid_rsa[] = "\x33\xD8\x45\x66\xD7\x68\x87\x18\x7E\x54"
+ "\x0D\x70\x27\x91\xC7\x26\xD7\x85\x65\xC0";
+
+ /* created from rsa_test : othercert.der */
+ byte akid_rsa[] = "\x27\x8E\x67\x11\x74\xC3\x26\x1D\x3F\xED"
+ "\x33\x63\xB3\xA4\xD8\x1D\x30\xE5\xE8\xD5";
+
+#ifdef HAVE_ECC
+ /* created from ecc_test_cert_gen : certecc.der */
+#ifdef ENABLE_ECC384_CERT_GEN_TEST
+ /* Authority key id from ./certs/ca-ecc384-cert.pem */
+ byte akid_ecc[] = "\xAB\xE0\xC3\x26\x4C\x18\xD4\x72\xBB\xD2"
+ "\x84\x8C\x9C\x0A\x05\x92\x80\x12\x53\x52";
#else
- file2 = fopen(clientCert, "rb");
- if (!file2) {
- free(tmp);
- return -49;
+ /* Authority key id from ./certs/ca-ecc-cert.pem */
+ byte akid_ecc[] = "\x56\x8E\x9A\xC3\xF0\x42\xDE\x18\xB9\x45"
+ "\x55\x6E\xF9\x93\xCF\xEA\xC3\xF3\xA5\x21";
+#endif
+#endif /* HAVE_ECC */
+
+ /* created from rsa_test : cert.der */
+ byte kid_ca[] = "\x33\xD8\x45\x66\xD7\x68\x87\x18\x7E\x54"
+ "\x0D\x70\x27\x91\xC7\x26\xD7\x85\x65\xC0";
+
+ tmp = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (tmp == NULL)
+ return -7300;
+
+ /* load othercert.der (Cert signed by an authority) */
+ file = XFOPEN(otherCertDerFile, "rb");
+ if (!file) {
+ XFREE(tmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ return -7301;
}
- bytes = fread(tmp, 1, FOURK_BUF, file2);
- fclose(file2);
+ bytes = XFREAD(tmp, 1, FOURK_BUF, file);
+ XFCLOSE(file);
+
+ InitDecodedCert(&cert, tmp, (word32)bytes, 0);
+
+ ret = ParseCert(&cert, CERT_TYPE, NO_VERIFY, 0);
+ if (ret != 0)
+ return -7302;
+
+ /* check the SKID from a RSA certificate */
+ if (XMEMCMP(skid_rsa, cert.extSubjKeyId, sizeof(cert.extSubjKeyId)))
+ return -7303;
+
+ /* check the AKID from an RSA certificate */
+ if (XMEMCMP(akid_rsa, cert.extAuthKeyId, sizeof(cert.extAuthKeyId)))
+ return -7304;
+
+ /* check the Key Usage from an RSA certificate */
+ if (!cert.extKeyUsageSet)
+ return -7305;
+
+ if (cert.extKeyUsage != (KEYUSE_KEY_ENCIPHER|KEYUSE_KEY_AGREE))
+ return -7306;
+
+ /* check the CA Basic Constraints from an RSA certificate */
+ if (cert.isCA)
+ return -7307;
+
+#ifndef WOLFSSL_SEP /* test only if not using SEP policies */
+ /* check the Certificate Policies Id */
+ if (cert.extCertPoliciesNb != 1)
+ return -7308;
+
+ if (strncmp(cert.extCertPolicies[0], "2.16.840.1.101.3.4.1.42", 23))
+ return -7309;
#endif
-#ifdef sizeof
- #undef sizeof
+ FreeDecodedCert(&cert);
+
+#ifdef HAVE_ECC
+ /* load certecc.der (Cert signed by our ECC CA test in ecc_test_cert_gen) */
+ file = XFOPEN(certEccDerFile, "rb");
+ if (!file) {
+ XFREE(tmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ return -7310;
+ }
+
+ bytes = XFREAD(tmp, 1, FOURK_BUF, file);
+ XFCLOSE(file);
+
+ InitDecodedCert(&cert, tmp, (word32)bytes, 0);
+
+ ret = ParseCert(&cert, CERT_TYPE, NO_VERIFY, 0);
+ if (ret != 0)
+ return -7311;
+
+ /* check the SKID from a ECC certificate - generated dynamically */
+
+ /* check the AKID from an ECC certificate */
+ if (XMEMCMP(akid_ecc, cert.extAuthKeyId, sizeof(cert.extAuthKeyId)))
+ return -7312;
+
+ /* check the Key Usage from an ECC certificate */
+ if (!cert.extKeyUsageSet)
+ return -7313;
+
+ if (cert.extKeyUsage != (KEYUSE_DIGITAL_SIG|KEYUSE_CONTENT_COMMIT))
+ return -7314;
+
+ /* check the CA Basic Constraints from an ECC certificate */
+ if (cert.isCA)
+ return -7315;
+
+#ifndef WOLFSSL_SEP /* test only if not using SEP policies */
+ /* check the Certificate Policies Id */
+ if (cert.extCertPoliciesNb != 2)
+ return -7316;
+
+ if (strncmp(cert.extCertPolicies[0], "2.4.589440.587.101.2.1.9632587.1", 32))
+ return -7317;
+
+ if (strncmp(cert.extCertPolicies[1], "1.2.13025.489.1.113549", 22))
+ return -7318;
#endif
-#ifdef WOLFSSL_TEST_CERT
+ FreeDecodedCert(&cert);
+#endif /* HAVE_ECC */
+
+ /* load cert.der (self signed certificate) */
+ file = XFOPEN(certDerFile, "rb");
+ if (!file) {
+ XFREE(tmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ return -7319;
+ }
+
+ bytes = XFREAD(tmp, 1, FOURK_BUF, file);
+ XFCLOSE(file);
+
InitDecodedCert(&cert, tmp, (word32)bytes, 0);
ret = ParseCert(&cert, CERT_TYPE, NO_VERIFY, 0);
- if (ret != 0) return -491;
+ if (ret != 0)
+ return -7320;
+
+ /* check the SKID from a CA certificate */
+ if (XMEMCMP(kid_ca, cert.extSubjKeyId, sizeof(cert.extSubjKeyId)))
+ return -7321;
+
+ /* check the AKID from an CA certificate */
+ if (XMEMCMP(kid_ca, cert.extAuthKeyId, sizeof(cert.extAuthKeyId)))
+ return -7322;
+
+ /* check the Key Usage from CA certificate */
+ if (!cert.extKeyUsageSet)
+ return -7323;
+
+ if (cert.extKeyUsage != (KEYUSE_KEY_CERT_SIGN|KEYUSE_CRL_SIGN))
+ return -7324;
+
+ /* check the CA Basic Constraints CA certificate */
+ if (!cert.isCA)
+ return -7325;
+
+#ifndef WOLFSSL_SEP /* test only if not using SEP policies */
+ /* check the Certificate Policies Id */
+ if (cert.extCertPoliciesNb != 2)
+ return -7326;
+
+ if (strncmp(cert.extCertPolicies[0], "2.16.840.1.101.3.4.1.42", 23))
+ return -7327;
+
+ if (strncmp(cert.extCertPolicies[1], "1.2.840.113549.1.9.16.6.5", 25))
+ return -7328;
+#endif
FreeDecodedCert(&cert);
+ XFREE(tmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+
+ return 0;
+}
+#endif /* WOLFSSL_CERT_EXT && WOLFSSL_TEST_CERT */
+
+#if defined(WOLFSSL_CERT_GEN_CACHE) && defined(WOLFSSL_TEST_CERT) && \
+ defined(WOLFSSL_CERT_EXT) && defined(WOLFSSL_CERT_GEN)
+int decodedCertCache_test(void)
+{
+ int ret = 0;
+ Cert cert;
+ FILE* file;
+ byte* der;
+ word32 derSz;
+
+ derSz = FOURK_BUF;
+ der = XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (der == NULL)
+ ret = -7400;
+
+ if (ret == 0) {
+ /* load cert.der */
+ file = XFOPEN(certDerFile, "rb");
+ if (file != NULL) {
+ derSz = XFREAD(der, 1, FOURK_BUF, file);
+ XFCLOSE(file);
+ }
+ else
+ ret = -7401;
+ }
+
+ if (ret == 0) {
+ if (wc_InitCert(&cert)) {
+ ret = -7402;
+ }
+ }
+
+ if (ret == 0) {
+ ret = wc_SetSubjectBuffer(&cert, der, derSz);
+ }
+
+ if (ret == 0) {
+ if(wc_SetSubjectBuffer(NULL, der, derSz) != BAD_FUNC_ARG)
+ ret = -7403;
+ }
+
+ if (ret == 0) {
+ if (wc_SetSubjectRaw(&cert, der, derSz) != 0)
+ ret = -7404;
+ }
+
+ if (ret == 0) {
+ if(wc_SetSubjectRaw(NULL, der, derSz) != BAD_FUNC_ARG)
+ ret = -7405;
+ }
+
+ if (ret == 0) {
+ if(wc_SetIssuerBuffer(&cert, der, derSz) != 0)
+ ret = -7406;
+ }
+
+ if (ret == 0) {
+ if(wc_SetIssuerBuffer(NULL, der, derSz) != BAD_FUNC_ARG)
+ ret = -7407;
+ }
+
+ if (ret == 0) {
+ if(wc_SetIssuerRaw(&cert, der, derSz) != 0)
+ ret = -7408;
+ }
+
+ if (ret == 0) {
+ if(wc_SetIssuerRaw(NULL, der, derSz) != BAD_FUNC_ARG)
+ ret = -7409;
+ }
+
+#ifdef WOLFSSL_ALT_NAMES
+ if (ret == 0) {
+ if(wc_SetAltNamesBuffer(&cert, der, derSz) != 0)
+ ret = -7410;
+ }
+
+ if (ret == 0) {
+ if(wc_SetAltNamesBuffer(NULL, der, derSz) != BAD_FUNC_ARG)
+ ret = -7411;
+ }
+
+ if (ret == 0) {
+ if(wc_SetDatesBuffer(&cert, der, derSz) != 0)
+ ret = -7412;
+ }
+
+ if (ret == 0) {
+ if(wc_SetDatesBuffer(NULL, der, derSz) != BAD_FUNC_ARG)
+ ret = -7413;
+ }
+#endif
+
+ if (ret == 0) {
+ if(wc_SetAuthKeyIdFromCert(&cert, der, derSz) != 0)
+ ret = -7414;
+ }
+
+ if (ret == 0) {
+ if(wc_SetAuthKeyIdFromCert(NULL, der, derSz) != BAD_FUNC_ARG)
+ ret = -7415;
+ }
+
+ wc_SetCert_Free(&cert);
+ if (ret == 0) {
+ if(cert.decodedCert != NULL)
+ ret = -7416;
+ }
+
+ XFREE(der, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+
+ return ret;
+}
+#endif /* defined(WOLFSSL_CERT_GEN_CACHE) && defined(WOLFSSL_TEST_CERT) &&
+ defined(WOLFSSL_CERT_EXT) && defined(WOLFSSL_CERT_GEN) */
+
+#define RSA_TEST_BYTES 512 /* up to 4096-bit key */
+
+#if !defined(NO_ASN) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+static int rsa_flatten_test(RsaKey* key)
+{
+ int ret;
+ byte e[RSA_TEST_BYTES];
+ byte n[RSA_TEST_BYTES];
+ word32 eSz = sizeof(e);
+ word32 nSz = sizeof(n);
+
+ /* Parameter Validation testing. */
+ ret = wc_RsaFlattenPublicKey(NULL, e, &eSz, n, &nSz);
+#ifdef HAVE_USER_RSA
+ /* Implementation using IPP Libraries returns:
+ * -101 = USER_CRYPTO_ERROR
+ */
+ if (ret == 0)
#else
- (void)bytes;
+ if (ret != BAD_FUNC_ARG)
+#endif
+ return -7417;
+ ret = wc_RsaFlattenPublicKey(key, NULL, &eSz, n, &nSz);
+#ifdef HAVE_USER_RSA
+ /* Implementation using IPP Libraries returns:
+ * -101 = USER_CRYPTO_ERROR
+ */
+ if (ret == 0)
+#else
+ if (ret != BAD_FUNC_ARG)
+#endif
+ return -7418;
+ ret = wc_RsaFlattenPublicKey(key, e, NULL, n, &nSz);
+#ifdef HAVE_USER_RSA
+ /* Implementation using IPP Libraries returns:
+ * -101 = USER_CRYPTO_ERROR
+ */
+ if (ret == 0)
+#else
+ if (ret != BAD_FUNC_ARG)
#endif
+ return -7419;
+ ret = wc_RsaFlattenPublicKey(key, e, &eSz, NULL, &nSz);
+#ifdef HAVE_USER_RSA
+ /* Implementation using IPP Libraries returns:
+ * -101 = USER_CRYPTO_ERROR
+ */
+ if (ret == 0)
+#else
+ if (ret != BAD_FUNC_ARG)
+#endif
+ return -7420;
+ ret = wc_RsaFlattenPublicKey(key, e, &eSz, n, NULL);
+#ifdef HAVE_USER_RSA
+ /* Implementation using IPP Libraries returns:
+ * -101 = USER_CRYPTO_ERROR
+ */
+ if (ret == 0)
+#else
+ if (ret != BAD_FUNC_ARG)
+#endif
+ return -7421;
+ ret = wc_RsaFlattenPublicKey(key, e, &eSz, n, &nSz);
+ if (ret != 0)
+ return -7422;
+ eSz = 0;
+ ret = wc_RsaFlattenPublicKey(key, e, &eSz, n, &nSz);
+#ifdef HAVE_USER_RSA
+ /* Implementation using IPP Libraries returns:
+ * -101 = USER_CRYPTO_ERROR
+ */
+ if (ret == 0)
+#elif defined(HAVE_FIPS) && \
+ (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2))
+ if (ret != 0)
+#else
+ if (ret != RSA_BUFFER_E)
+#endif
+ return -7423;
+ eSz = sizeof(e);
+ nSz = 0;
+ ret = wc_RsaFlattenPublicKey(key, e, &eSz, n, &nSz);
+#ifdef HAVE_USER_RSA
+ /* Implementation using IPP Libraries returns:
+ * -101 = USER_CRYPTO_ERROR
+ */
+ if (ret == 0)
+#else
+ if (ret != RSA_BUFFER_E)
+#endif
+ return -7424;
+ return 0;
+}
+#endif /* NO_ASN */
-#ifdef WOLFSSL_KEY_GEN
- {
- byte* der;
- byte* pem;
- int derSz = 0;
- int pemSz = 0;
- RsaKey derIn;
- RsaKey genKey;
- FILE* keyFile;
- FILE* pemFile;
+#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && !defined(NO_ASN) \
+ && !defined(WOLFSSL_RSA_VERIFY_ONLY)
+static int rsa_export_key_test(RsaKey* key)
+{
+ int ret;
+ byte e[3];
+ word32 eSz = sizeof(e);
+ byte n[RSA_TEST_BYTES];
+ word32 nSz = sizeof(n);
+ byte d[RSA_TEST_BYTES];
+ word32 dSz = sizeof(d);
+ byte p[RSA_TEST_BYTES/2];
+ word32 pSz = sizeof(p);
+ byte q[RSA_TEST_BYTES/2];
+ word32 qSz = sizeof(q);
+ word32 zero = 0;
+
+ ret = wc_RsaExportKey(NULL, e, &eSz, n, &nSz, d, &dSz, p, &pSz, q, &qSz);
+ if (ret != BAD_FUNC_ARG)
+ return -7425;
+ ret = wc_RsaExportKey(key, NULL, &eSz, n, &nSz, d, &dSz, p, &pSz, q, &qSz);
+ if (ret != BAD_FUNC_ARG)
+ return -7426;
+ ret = wc_RsaExportKey(key, e, NULL, n, &nSz, d, &dSz, p, &pSz, q, &qSz);
+ if (ret != BAD_FUNC_ARG)
+ return -7427;
+ ret = wc_RsaExportKey(key, e, &eSz, NULL, &nSz, d, &dSz, p, &pSz, q, &qSz);
+ if (ret != BAD_FUNC_ARG)
+ return -7428;
+ ret = wc_RsaExportKey(key, e, &eSz, n, NULL, d, &dSz, p, &pSz, q, &qSz);
+ if (ret != BAD_FUNC_ARG)
+ return -7429;
+ ret = wc_RsaExportKey(key, e, &eSz, n, &nSz, NULL, &dSz, p, &pSz, q, &qSz);
+ if (ret != BAD_FUNC_ARG)
+ return -7430;
+ ret = wc_RsaExportKey(key, e, &eSz, n, &nSz, d, NULL, p, &pSz, q, &qSz);
+ if (ret != BAD_FUNC_ARG)
+ return -7431;
+ ret = wc_RsaExportKey(key, e, &eSz, n, &nSz, d, &dSz, NULL, &pSz, q, &qSz);
+ if (ret != BAD_FUNC_ARG)
+ return -7432;
+ ret = wc_RsaExportKey(key, e, &eSz, n, &nSz, d, &dSz, p, NULL, q, &qSz);
+ if (ret != BAD_FUNC_ARG)
+ return -7433;
+ ret = wc_RsaExportKey(key, e, &eSz, n, &nSz, d, &dSz, p, &pSz, NULL, &qSz);
+ if (ret != BAD_FUNC_ARG)
+ return -7434;
+ ret = wc_RsaExportKey(key, e, &eSz, n, &nSz, d, &dSz, p, &pSz, q, NULL);
+ if (ret != BAD_FUNC_ARG)
+ return -7435;
+
+ ret = wc_RsaExportKey(key, e, &zero, n, &nSz, d, &dSz, p, &pSz, q, &qSz);
+ if (ret != RSA_BUFFER_E)
+ return -7436;
+ ret = wc_RsaExportKey(key, e, &eSz, n, &zero, d, &dSz, p, &pSz, q, &qSz);
+ if (ret != RSA_BUFFER_E)
+ return -7437;
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ ret = wc_RsaExportKey(key, e, &eSz, n, &nSz, d, &zero, p, &pSz, q, &qSz);
+ if (ret != RSA_BUFFER_E)
+ return -7438;
+ ret = wc_RsaExportKey(key, e, &eSz, n, &nSz, d, &dSz, p, &zero, q, &qSz);
+ if (ret != RSA_BUFFER_E)
+ return -7439;
+ ret = wc_RsaExportKey(key, e, &eSz, n, &nSz, d, &dSz, p, &pSz, q, &zero);
+ if (ret != RSA_BUFFER_E)
+ return -7440;
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+
+ ret = wc_RsaExportKey(key, e, &eSz, n, &nSz, d, &dSz, p, &pSz, q, &qSz);
+ if (ret != 0)
+ return -7441;
- ret = wc_InitRsaKey(&genKey, 0);
- if (ret != 0)
- return -300;
- ret = wc_MakeRsaKey(&genKey, 1024, 65537, &rng);
- if (ret != 0)
- return -301;
+ return 0;
+}
+#endif /* !HAVE_FIPS && !USER_RSA && !NO_ASN */
- der = (byte*)malloc(FOURK_BUF);
- if (der == NULL) {
- wc_FreeRsaKey(&genKey);
- return -307;
+#ifndef NO_SIG_WRAPPER
+static int rsa_sig_test(RsaKey* key, word32 keyLen, int modLen, WC_RNG* rng)
+{
+ int ret;
+ word32 sigSz;
+ const byte in[] = "Everyone gets Friday off.";
+ const byte hash[] = {
+ 0xf2, 0x02, 0x95, 0x65, 0xcb, 0xf6, 0x2a, 0x59,
+ 0x39, 0x2c, 0x05, 0xff, 0x0e, 0x29, 0xaf, 0xfe,
+ 0x47, 0x33, 0x8c, 0x99, 0x8d, 0x58, 0x64, 0x83,
+ 0xa6, 0x58, 0x0a, 0x33, 0x0b, 0x84, 0x5f, 0x5f
+ };
+ const byte hashEnc[] = {
+ 0x30, 0x31, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86,
+ 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01, 0x05,
+ 0x00, 0x04, 0x20,
+
+ 0xf2, 0x02, 0x95, 0x65, 0xcb, 0xf6, 0x2a, 0x59,
+ 0x39, 0x2c, 0x05, 0xff, 0x0e, 0x29, 0xaf, 0xfe,
+ 0x47, 0x33, 0x8c, 0x99, 0x8d, 0x58, 0x64, 0x83,
+ 0xa6, 0x58, 0x0a, 0x33, 0x0b, 0x84, 0x5f, 0x5f
+ };
+ word32 inLen = (word32)XSTRLEN((char*)in);
+ byte out[RSA_TEST_BYTES];
+
+ /* Parameter Validation testing. */
+ ret = wc_SignatureGetSize(WC_SIGNATURE_TYPE_NONE, key, keyLen);
+ if (ret != BAD_FUNC_ARG)
+ return -7442;
+ ret = wc_SignatureGetSize(WC_SIGNATURE_TYPE_RSA, key, 0);
+ if (ret != BAD_FUNC_ARG)
+ return -7443;
+
+ sigSz = (word32)modLen;
+ ret = wc_SignatureGenerate(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA, NULL,
+ inLen, out, &sigSz, key, keyLen, rng);
+ if (ret != BAD_FUNC_ARG)
+ return -7444;
+ ret = wc_SignatureGenerate(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA, in,
+ 0, out, &sigSz, key, keyLen, rng);
+ if (ret != BAD_FUNC_ARG)
+ return -7445;
+ ret = wc_SignatureGenerate(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA, in,
+ inLen, NULL, &sigSz, key, keyLen, rng);
+ if (ret != BAD_FUNC_ARG)
+ return -7446;
+ ret = wc_SignatureGenerate(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA, in,
+ inLen, out, NULL, key, keyLen, rng);
+ if (ret != BAD_FUNC_ARG)
+ return -7447;
+ ret = wc_SignatureGenerate(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA, in,
+ inLen, out, &sigSz, NULL, keyLen, rng);
+ if (ret != BAD_FUNC_ARG)
+ return -7448;
+ ret = wc_SignatureGenerate(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA, in,
+ inLen, out, &sigSz, key, 0, rng);
+ if (ret != BAD_FUNC_ARG)
+ return -7449;
+ ret = wc_SignatureGenerate(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA, in,
+ inLen, out, &sigSz, key, keyLen, NULL);
+#ifdef HAVE_USER_RSA
+ /* Implementation using IPP Libraries returns:
+ * -101 = USER_CRYPTO_ERROR
+ */
+ if (ret == 0)
+#elif defined(WOLFSSL_AFALG_XILINX_RSA)
+ /* blinding / rng handled with hardware acceleration */
+ if (ret != 0)
+#elif defined(WOLFSSL_ASYNC_CRYPT) || defined(WOLF_CRYPTO_CB)
+ /* async may not require RNG */
+ if (ret != 0 && ret != MISSING_RNG_E)
+#elif defined(HAVE_FIPS) || defined(WOLFSSL_ASYNC_CRYPT) || \
+ !defined(WC_RSA_BLINDING)
+ /* FIPS140 implementation does not do blinding */
+ if (ret != 0)
+#elif defined(WOLFSSL_RSA_PUBLIC_ONLY)
+ if (ret != SIG_TYPE_E)
+#elif defined(WOLFSSL_CRYPTOCELL)
+ /* RNG is handled with the cryptocell */
+ if (ret != 0)
+#else
+ if (ret != MISSING_RNG_E)
+#endif
+ return -7450;
+ sigSz = 0;
+ ret = wc_SignatureGenerate(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA, in,
+ inLen, out, &sigSz, key, keyLen, rng);
+ if (ret != BAD_FUNC_ARG)
+ return -7451;
+
+ ret = wc_SignatureVerify(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA, NULL,
+ inLen, out, (word32)modLen, key, keyLen);
+ if (ret != BAD_FUNC_ARG)
+ return -7452;
+ ret = wc_SignatureVerify(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA, in,
+ 0, out, (word32)modLen, key, keyLen);
+ if (ret != BAD_FUNC_ARG)
+ return -7453;
+ ret = wc_SignatureVerify(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA, in,
+ inLen, NULL, (word32)modLen, key, keyLen);
+ if (ret != BAD_FUNC_ARG)
+ return -7454;
+ ret = wc_SignatureVerify(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA, in,
+ inLen, out, 0, key, keyLen);
+ if (ret != BAD_FUNC_ARG)
+ return -7455;
+ ret = wc_SignatureVerify(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA, in,
+ inLen, out, (word32)modLen, NULL, keyLen);
+ if (ret != BAD_FUNC_ARG)
+ return -7456;
+ ret = wc_SignatureVerify(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA, in,
+ inLen, out, (word32)modLen, key, 0);
+ if (ret != BAD_FUNC_ARG)
+ return -7457;
+
+#ifndef HAVE_ECC
+ ret = wc_SignatureGetSize(WC_SIGNATURE_TYPE_ECC, key, keyLen);
+ if (ret != SIG_TYPE_E)
+ return -7458;
+#endif
+
+ /* Use APIs. */
+ ret = wc_SignatureGetSize(WC_SIGNATURE_TYPE_RSA, key, keyLen);
+ if (ret != modLen)
+ return -7459;
+ ret = wc_SignatureGetSize(WC_SIGNATURE_TYPE_RSA_W_ENC, key, keyLen);
+ if (ret != modLen)
+ return -7460;
+
+ sigSz = (word32)ret;
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ XMEMSET(out, 0, sizeof(out));
+ ret = wc_SignatureGenerate(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA, in,
+ inLen, out, &sigSz, key, keyLen, rng);
+ if (ret != 0)
+ return -7461;
+
+ ret = wc_SignatureVerify(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA, in,
+ inLen, out, (word32)modLen, key, keyLen);
+ if (ret != 0)
+ return -7462;
+
+ sigSz = (word32)sizeof(out);
+ ret = wc_SignatureGenerate(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA_W_ENC,
+ in, inLen, out, &sigSz, key, keyLen, rng);
+ if (ret != 0)
+ return -7463;
+
+ ret = wc_SignatureVerify(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA_W_ENC,
+ in, inLen, out, (word32)modLen, key, keyLen);
+ if (ret != 0)
+ return -7464;
+
+ /* Wrong signature type. */
+ ret = wc_SignatureVerify(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA, in,
+ inLen, out, (word32)modLen, key, keyLen);
+ if (ret == 0)
+ return -7465;
+
+ /* check hash functions */
+ sigSz = (word32)sizeof(out);
+ ret = wc_SignatureGenerateHash(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA,
+ hash, (int)sizeof(hash), out, &sigSz, key, keyLen, rng);
+ if (ret != 0)
+ return -7466;
+
+ ret = wc_SignatureVerifyHash(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA,
+ hash, (int)sizeof(hash), out, (word32)modLen, key, keyLen);
+ if (ret != 0)
+ return -7467;
+
+ sigSz = (word32)sizeof(out);
+ ret = wc_SignatureGenerateHash(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA_W_ENC,
+ hashEnc, (int)sizeof(hashEnc), out, &sigSz, key, keyLen, rng);
+ if (ret != 0)
+ return -7468;
+
+ ret = wc_SignatureVerifyHash(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_RSA_W_ENC,
+ hashEnc, (int)sizeof(hashEnc), out, (word32)modLen, key, keyLen);
+ if (ret != 0)
+ return -7469;
+#else
+ (void)hash;
+ (void)hashEnc;
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+
+ return 0;
+}
+#endif /* !NO_SIG_WRAPPER */
+
+#ifdef WC_RSA_NONBLOCK
+static int rsa_nb_test(RsaKey* key, const byte* in, word32 inLen, byte* out,
+ word32 outSz, byte* plain, word32 plainSz, WC_RNG* rng)
+{
+ int ret = 0, count;
+ int signSz = 0;
+ RsaNb nb;
+ byte* inlinePlain = NULL;
+
+ /* Enable non-blocking RSA mode - provide context */
+ ret = wc_RsaSetNonBlock(key, &nb);
+ if (ret != 0)
+ return ret;
+
+#ifdef WC_RSA_NONBLOCK_TIME
+ /* Enable time based RSA blocking. 8 microseconds max (3.1GHz) */
+ ret = wc_RsaSetNonBlockTime(key, 8, 3100);
+ if (ret != 0)
+ return ret;
+#endif
+
+ count = 0;
+ do {
+ ret = wc_RsaSSL_Sign(in, inLen, out, outSz, key, rng);
+ count++; /* track number of would blocks */
+ if (ret == FP_WOULDBLOCK) {
+ /* do "other" work here */
}
- pem = (byte*)malloc(FOURK_BUF);
- if (pem == NULL) {
- free(der);
- wc_FreeRsaKey(&genKey);
- return -308;
+ } while (ret == FP_WOULDBLOCK);
+ if (ret < 0) {
+ return ret;
+ }
+#ifdef DEBUG_WOLFSSL
+ printf("RSA non-block sign: %d times\n", count);
+#endif
+ signSz = ret;
+
+ /* Test non-blocking verify */
+ XMEMSET(plain, 0, plainSz);
+ count = 0;
+ do {
+ ret = wc_RsaSSL_Verify(out, (word32)signSz, plain, plainSz, key);
+ count++; /* track number of would blocks */
+ if (ret == FP_WOULDBLOCK) {
+ /* do "other" work here */
}
+ } while (ret == FP_WOULDBLOCK);
+ if (ret < 0) {
+ return ret;
+ }
+#ifdef DEBUG_WOLFSSL
+ printf("RSA non-block verify: %d times\n", count);
+#endif
- derSz = wc_RsaKeyToDer(&genKey, der, FOURK_BUF);
- if (derSz < 0) {
- free(der);
- free(pem);
- return -302;
+ if (signSz == ret && XMEMCMP(plain, in, (size_t)ret)) {
+ return SIG_VERIFY_E;
+ }
+
+ /* Test inline non-blocking verify */
+ count = 0;
+ do {
+ ret = wc_RsaSSL_VerifyInline(out, (word32)signSz, &inlinePlain, key);
+ count++; /* track number of would blocks */
+ if (ret == FP_WOULDBLOCK) {
+ /* do "other" work here */
}
+ } while (ret == FP_WOULDBLOCK);
+ if (ret < 0) {
+ return ret;
+ }
+#ifdef DEBUG_WOLFSSL
+ printf("RSA non-block inline verify: %d times\n", count);
+#endif
-#ifdef FREESCALE_MQX
- keyFile = fopen("a:\\certs\\key.der", "wb");
+ if (signSz == ret && XMEMCMP(inlinePlain, in, (size_t)ret)) {
+ return SIG_VERIFY_E;
+ }
+
+ /* Disabling non-block RSA mode */
+ ret = wc_RsaSetNonBlock(key, NULL);
+
+ (void)count;
+
+ return 0;
+}
+#endif
+
+#if !defined(HAVE_USER_RSA) && !defined(NO_ASN)
+static int rsa_decode_test(RsaKey* keyPub)
+{
+ int ret;
+ word32 inSz;
+ word32 inOutIdx;
+ static const byte n[2] = { 0x00, 0x23 };
+ static const byte e[2] = { 0x00, 0x03 };
+ static const byte good[] = { 0x30, 0x06, 0x02, 0x01, 0x23, 0x02, 0x1,
+ 0x03 };
+ static const byte goodAlgId[] = { 0x30, 0x0f, 0x30, 0x0d, 0x06, 0x00,
+ 0x03, 0x09, 0x00, 0x30, 0x06, 0x02, 0x01, 0x23, 0x02, 0x1, 0x03 };
+ static const byte goodAlgIdNull[] = { 0x30, 0x11, 0x30, 0x0f, 0x06, 0x00,
+ 0x05, 0x00, 0x03, 0x09, 0x00, 0x30, 0x06, 0x02, 0x01, 0x23,
+ 0x02, 0x1, 0x03 };
+ static const byte badAlgIdNull[] = { 0x30, 0x12, 0x30, 0x10, 0x06, 0x00,
+ 0x05, 0x01, 0x00, 0x03, 0x09, 0x00, 0x30, 0x06, 0x02, 0x01, 0x23,
+ 0x02, 0x1, 0x03 };
+ static const byte badNotBitString[] = { 0x30, 0x0f, 0x30, 0x0d, 0x06, 0x00,
+ 0x04, 0x09, 0x00, 0x30, 0x06, 0x02, 0x01, 0x23, 0x02, 0x1, 0x03 };
+ static const byte badBitStringLen[] = { 0x30, 0x0f, 0x30, 0x0d, 0x06, 0x00,
+ 0x03, 0x0a, 0x00, 0x30, 0x06, 0x02, 0x01, 0x23, 0x02, 0x1, 0x03 };
+ static const byte badNoSeq[] = { 0x30, 0x0d, 0x30, 0x0b, 0x06, 0x00, 0x03,
+ 0x07, 0x00, 0x02, 0x01, 0x23, 0x02, 0x1, 0x03 };
+ static const byte badNoObj[] = {
+ 0x30, 0x0f, 0x30, 0x0d, 0x05, 0x00, 0x03, 0x09, 0x00, 0x30, 0x06,
+ 0x02, 0x01, 0x23, 0x02, 0x1, 0x03 };
+ static const byte badIntN[] = { 0x30, 0x06, 0x02, 0x05, 0x23, 0x02, 0x1,
+ 0x03 };
+ static const byte badNotIntE[] = { 0x30, 0x06, 0x02, 0x01, 0x23, 0x04, 0x1,
+ 0x03 };
+ static const byte badLength[] = { 0x30, 0x04, 0x02, 0x01, 0x23, 0x02, 0x1,
+ 0x03 };
+ static const byte badBitStrNoZero[] = { 0x30, 0x0e, 0x30, 0x0c, 0x06, 0x00,
+ 0x03, 0x08, 0x30, 0x06, 0x02, 0x01, 0x23, 0x02, 0x1, 0x03 };
+
+ ret = wc_InitRsaKey(keyPub, NULL);
+ if (ret != 0)
+ return -7470;
+
+ /* Parameter Validation testing. */
+ ret = wc_RsaPublicKeyDecodeRaw(NULL, sizeof(n), e, sizeof(e), keyPub);
+ if (ret != BAD_FUNC_ARG) {
+ ret = -7471;
+ goto done;
+ }
+ ret = wc_RsaPublicKeyDecodeRaw(n, sizeof(n), NULL, sizeof(e), keyPub);
+ if (ret != BAD_FUNC_ARG) {
+ ret = -7472;
+ goto done;
+ }
+ ret = wc_RsaPublicKeyDecodeRaw(n, sizeof(n), e, sizeof(e), NULL);
+ if (ret != BAD_FUNC_ARG) {
+ ret = -7473;
+ goto done;
+ }
+ /* TODO: probably should fail when length is -1! */
+ ret = wc_RsaPublicKeyDecodeRaw(n, (word32)-1, e, sizeof(e), keyPub);
+ if (ret != 0) {
+ ret = -7474;
+ goto done;
+ }
+ wc_FreeRsaKey(keyPub);
+ ret = wc_InitRsaKey(keyPub, NULL);
+ if (ret != 0)
+ return -7475;
+ ret = wc_RsaPublicKeyDecodeRaw(n, sizeof(n), e, (word32)-1, keyPub);
+ if (ret != 0) {
+ ret = -7476;
+ goto done;
+ }
+ wc_FreeRsaKey(keyPub);
+ ret = wc_InitRsaKey(keyPub, NULL);
+ if (ret != 0)
+ return -7477;
+
+ /* Use API. */
+ ret = wc_RsaPublicKeyDecodeRaw(n, sizeof(n), e, sizeof(e), keyPub);
+ if (ret != 0) {
+ ret = -7478;
+ goto done;
+ }
+ wc_FreeRsaKey(keyPub);
+ ret = wc_InitRsaKey(keyPub, NULL);
+ if (ret != 0)
+ return -7479;
+
+ /* Parameter Validation testing. */
+ inSz = sizeof(good);
+ ret = wc_RsaPublicKeyDecode(NULL, &inOutIdx, keyPub, inSz);
+ if (ret != BAD_FUNC_ARG) {
+ ret = -7480;
+ goto done;
+ }
+ ret = wc_RsaPublicKeyDecode(good, NULL, keyPub, inSz);
+ if (ret != BAD_FUNC_ARG) {
+ ret = -7481;
+ goto done;
+ }
+ ret = wc_RsaPublicKeyDecode(good, &inOutIdx, NULL, inSz);
+ if (ret != BAD_FUNC_ARG) {
+ ret = -7482;
+ goto done;
+ }
+
+ /* Use good data and offset to bad data. */
+ inOutIdx = 2;
+ inSz = sizeof(good) - inOutIdx;
+ ret = wc_RsaPublicKeyDecode(good, &inOutIdx, keyPub, inSz);
+ if (ret != ASN_PARSE_E) {
+ ret = -7483;
+ goto done;
+ }
+ inOutIdx = 2;
+ inSz = sizeof(goodAlgId) - inOutIdx;
+ ret = wc_RsaPublicKeyDecode(goodAlgId, &inOutIdx, keyPub, inSz);
+ if (ret != ASN_PARSE_E) {
+ ret = -7484;
+ goto done;
+ }
+ inOutIdx = 2;
+ inSz = sizeof(goodAlgId);
+ ret = wc_RsaPublicKeyDecode(goodAlgId, &inOutIdx, keyPub, inSz);
+#ifndef WOLFSSL_NO_DECODE_EXTRA
+ if (ret != ASN_PARSE_E)
+#else
+ if (ret != ASN_RSA_KEY_E)
+#endif
+ {
+ ret = -7485;
+ goto done;
+ }
+ /* Try different bad data. */
+ inSz = sizeof(badAlgIdNull);
+ inOutIdx = 0;
+ ret = wc_RsaPublicKeyDecode(badAlgIdNull, &inOutIdx, keyPub, inSz);
+ if (ret != ASN_EXPECT_0_E) {
+ ret = -7486;
+ goto done;
+ }
+ inSz = sizeof(badNotBitString);
+ inOutIdx = 0;
+ ret = wc_RsaPublicKeyDecode(badNotBitString, &inOutIdx, keyPub, inSz);
+ if (ret != ASN_BITSTR_E) {
+ ret = -7487;
+ goto done;
+ }
+ inSz = sizeof(badBitStringLen);
+ inOutIdx = 0;
+ ret = wc_RsaPublicKeyDecode(badBitStringLen, &inOutIdx, keyPub, inSz);
+ if (ret != ASN_PARSE_E) {
+ ret = -7488;
+ goto done;
+ }
+ inSz = sizeof(badNoSeq);
+ inOutIdx = 0;
+ ret = wc_RsaPublicKeyDecode(badNoSeq, &inOutIdx, keyPub, inSz);
+ if (ret != ASN_PARSE_E) {
+ ret = -7489;
+ goto done;
+ }
+ inSz = sizeof(badNoObj);
+ inOutIdx = 0;
+ ret = wc_RsaPublicKeyDecode(badNoObj, &inOutIdx, keyPub, inSz);
+ if (ret != ASN_PARSE_E) {
+ ret = -7490;
+ goto done;
+ }
+ inSz = sizeof(badIntN);
+ inOutIdx = 0;
+ ret = wc_RsaPublicKeyDecode(badIntN, &inOutIdx, keyPub, inSz);
+ if (ret != ASN_RSA_KEY_E) {
+ ret = -7491;
+ goto done;
+ }
+ inSz = sizeof(badNotIntE);
+ inOutIdx = 0;
+ ret = wc_RsaPublicKeyDecode(badNotIntE, &inOutIdx, keyPub, inSz);
+ if (ret != ASN_RSA_KEY_E) {
+ ret = -7492;
+ goto done;
+ }
+ /* TODO: Shouldn't pass as the sequence length is too small. */
+ inSz = sizeof(badLength);
+ inOutIdx = 0;
+ ret = wc_RsaPublicKeyDecode(badLength, &inOutIdx, keyPub, inSz);
+ if (ret != 0) {
+ ret = -7493;
+ goto done;
+ }
+ /* TODO: Shouldn't ignore object id's data. */
+ wc_FreeRsaKey(keyPub);
+ ret = wc_InitRsaKey(keyPub, NULL);
+ if (ret != 0)
+ return -7494;
+
+ inSz = sizeof(badBitStrNoZero);
+ inOutIdx = 0;
+ ret = wc_RsaPublicKeyDecode(badBitStrNoZero, &inOutIdx, keyPub, inSz);
+ if (ret != ASN_EXPECT_0_E) {
+ ret = -7495;
+ goto done;
+ }
+ wc_FreeRsaKey(keyPub);
+ ret = wc_InitRsaKey(keyPub, NULL);
+ if (ret != 0)
+ return -7496;
+
+ /* Valid data cases. */
+ inSz = sizeof(good);
+ inOutIdx = 0;
+ ret = wc_RsaPublicKeyDecode(good, &inOutIdx, keyPub, inSz);
+ if (ret != 0) {
+ ret = -7497;
+ goto done;
+ }
+ if (inOutIdx != inSz) {
+ ret = -7498;
+ goto done;
+ }
+ wc_FreeRsaKey(keyPub);
+ ret = wc_InitRsaKey(keyPub, NULL);
+ if (ret != 0)
+ return -7499;
+
+ inSz = sizeof(goodAlgId);
+ inOutIdx = 0;
+ ret = wc_RsaPublicKeyDecode(goodAlgId, &inOutIdx, keyPub, inSz);
+ if (ret != 0) {
+ ret = -7500;
+ goto done;
+ }
+ if (inOutIdx != inSz) {
+ ret = -7501;
+ goto done;
+ }
+ wc_FreeRsaKey(keyPub);
+ ret = wc_InitRsaKey(keyPub, NULL);
+ if (ret != 0)
+ return -7502;
+
+ inSz = sizeof(goodAlgIdNull);
+ inOutIdx = 0;
+ ret = wc_RsaPublicKeyDecode(goodAlgIdNull, &inOutIdx, keyPub, inSz);
+ if (ret != 0) {
+ ret = -7503;
+ goto done;
+ }
+ if (inOutIdx != inSz) {
+ ret = -7504;
+ goto done;
+ }
+
+done:
+ wc_FreeRsaKey(keyPub);
+ return ret;
+}
+#endif
+
+#ifdef WC_RSA_PSS
+static int rsa_pss_test(WC_RNG* rng, RsaKey* key)
+{
+ byte digest[WC_MAX_DIGEST_SIZE];
+ int ret = 0;
+ const char* inStr = "Everyone gets Friday off.";
+ word32 inLen = (word32)XSTRLEN((char*)inStr);
+ word32 outSz;
+ word32 plainSz;
+ word32 digestSz;
+ int i, j;
+#ifdef RSA_PSS_TEST_WRONG_PARAMS
+ int k, l;
+#endif
+ int len;
+ byte* plain;
+ int mgf[] = {
+#ifndef NO_SHA
+ WC_MGF1SHA1,
+#endif
+#ifdef WOLFSSL_SHA224
+ WC_MGF1SHA224,
+#endif
+ WC_MGF1SHA256,
+#ifdef WOLFSSL_SHA384
+ WC_MGF1SHA384,
+#endif
+#ifdef WOLFSSL_SHA512
+ WC_MGF1SHA512
+#endif
+ };
+ enum wc_HashType hash[] = {
+#ifndef NO_SHA
+ WC_HASH_TYPE_SHA,
+#endif
+#ifdef WOLFSSL_SHA224
+ WC_HASH_TYPE_SHA224,
+#endif
+ WC_HASH_TYPE_SHA256,
+#ifdef WOLFSSL_SHA384
+ WC_HASH_TYPE_SHA384,
+#endif
+#ifdef WOLFSSL_SHA512
+ WC_HASH_TYPE_SHA512,
+#endif
+ };
+
+ DECLARE_VAR_INIT(in, byte, inLen, inStr, HEAP_HINT);
+ DECLARE_VAR(out, byte, RSA_TEST_BYTES, HEAP_HINT);
+ DECLARE_VAR(sig, byte, RSA_TEST_BYTES, HEAP_HINT);
+
+ /* Test all combinations of hash and MGF. */
+ for (j = 0; j < (int)(sizeof(hash)/sizeof(*hash)); j++) {
+ /* Calculate hash of message. */
+ ret = wc_Hash(hash[j], in, inLen, digest, sizeof(digest));
+ if (ret != 0)
+ ERROR_OUT(-7505, exit_rsa_pss);
+ digestSz = wc_HashGetDigestSize(hash[j]);
+
+ for (i = 0; i < (int)(sizeof(mgf)/sizeof(*mgf)); i++) {
+ outSz = RSA_TEST_BYTES;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key->asyncDev,
+ WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_RsaPSS_Sign_ex(digest, digestSz, out, outSz,
+ hash[j], mgf[i], -1, key, rng);
+ }
+ } while (ret == WC_PENDING_E);
+ if (ret <= 0)
+ ERROR_OUT(-7506, exit_rsa_pss);
+ outSz = ret;
+
+ XMEMCPY(sig, out, outSz);
+ plain = NULL;
+ TEST_SLEEP();
+
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key->asyncDev,
+ WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_RsaPSS_VerifyInline_ex(sig, outSz, &plain, hash[j],
+ mgf[i], -1, key);
+ }
+ } while (ret == WC_PENDING_E);
+ if (ret <= 0)
+ ERROR_OUT(-7507, exit_rsa_pss);
+ plainSz = ret;
+ TEST_SLEEP();
+
+#ifdef HAVE_SELFTEST
+ ret = wc_RsaPSS_CheckPadding_ex(digest, digestSz, plain, plainSz,
+ hash[j], -1);
+#else
+ ret = wc_RsaPSS_CheckPadding_ex(digest, digestSz, plain, plainSz,
+ hash[j], -1, wc_RsaEncryptSize(key)*8);
+#endif
+ if (ret != 0)
+ ERROR_OUT(-7508, exit_rsa_pss);
+
+#ifdef RSA_PSS_TEST_WRONG_PARAMS
+ for (k = 0; k < (int)(sizeof(mgf)/sizeof(*mgf)); k++) {
+ for (l = 0; l < (int)(sizeof(hash)/sizeof(*hash)); l++) {
+ if (i == k && j == l)
+ continue;
+
+ XMEMCPY(sig, out, outSz);
+
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key->asyncDev,
+ WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_RsaPSS_VerifyInline_ex(sig, outSz,
+ (byte**)&plain, hash[l], mgf[k], -1, key);
+ }
+ } while (ret == WC_PENDING_E);
+ if (ret >= 0)
+ ERROR_OUT(-7509, exit_rsa_pss);
+ }
+ }
+#endif
+ }
+ }
+
+ /* Test that a salt length of zero works. */
+ digestSz = wc_HashGetDigestSize(hash[0]);
+ outSz = RSA_TEST_BYTES;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key->asyncDev,
+ WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_RsaPSS_Sign_ex(digest, digestSz, out, outSz, hash[0],
+ mgf[0], 0, key, rng);
+ }
+ } while (ret == WC_PENDING_E);
+ if (ret <= 0)
+ ERROR_OUT(-7510, exit_rsa_pss);
+ outSz = ret;
+ TEST_SLEEP();
+
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key->asyncDev,
+ WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_RsaPSS_Verify_ex(out, outSz, sig, outSz, hash[0], mgf[0],
+ 0, key);
+ }
+ } while (ret == WC_PENDING_E);
+ if (ret <= 0)
+ ERROR_OUT(-7511, exit_rsa_pss);
+ plainSz = ret;
+ TEST_SLEEP();
+
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key->asyncDev,
+ WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+#ifdef HAVE_SELFTEST
+ ret = wc_RsaPSS_CheckPadding_ex(digest, digestSz, sig, plainSz,
+ hash[0], 0);
#else
- keyFile = fopen("./key.der", "wb");
+ ret = wc_RsaPSS_CheckPadding_ex(digest, digestSz, sig, plainSz,
+ hash[0], 0, 0);
#endif
- if (!keyFile) {
- free(der);
- free(pem);
- wc_FreeRsaKey(&genKey);
- return -303;
}
- ret = (int)fwrite(der, 1, derSz, keyFile);
- fclose(keyFile);
- if (ret != derSz) {
- free(der);
- free(pem);
- wc_FreeRsaKey(&genKey);
- return -313;
+ } while (ret == WC_PENDING_E);
+ if (ret != 0)
+ ERROR_OUT(-7512, exit_rsa_pss);
+
+ XMEMCPY(sig, out, outSz);
+ plain = NULL;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key->asyncDev,
+ WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_RsaPSS_VerifyInline_ex(sig, outSz, &plain, hash[0], mgf[0],
+ 0, key);
}
+ } while (ret == WC_PENDING_E);
+ if (ret <= 0)
+ ERROR_OUT(-7513, exit_rsa_pss);
+ plainSz = ret;
+ TEST_SLEEP();
+
+#ifdef HAVE_SELFTEST
+ ret = wc_RsaPSS_CheckPadding_ex(digest, digestSz, plain, plainSz, hash[0],
+ 0);
+#else
+ ret = wc_RsaPSS_CheckPadding_ex(digest, digestSz, plain, plainSz, hash[0],
+ 0, 0);
+#endif
+ if (ret != 0)
+ ERROR_OUT(-7514, exit_rsa_pss);
- pemSz = wc_DerToPem(der, derSz, pem, FOURK_BUF, PRIVATEKEY_TYPE);
- if (pemSz < 0) {
- free(der);
- free(pem);
- wc_FreeRsaKey(&genKey);
- return -304;
+ /* Test bad salt lengths in various APIs. */
+ digestSz = wc_HashGetDigestSize(hash[0]);
+ outSz = RSA_TEST_BYTES;
+#ifndef WOLFSSL_PSS_SALT_LEN_DISCOVER
+ len = -2;
+#else
+ len = -3;
+#endif
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key->asyncDev,
+ WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_RsaPSS_Sign_ex(digest, digestSz, out, outSz, hash[0],
+ mgf[0], len, key, rng);
+ }
+ } while (ret == WC_PENDING_E);
+ if (ret != PSS_SALTLEN_E)
+ ERROR_OUT(-7515, exit_rsa_pss);
+
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key->asyncDev,
+ WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_RsaPSS_Sign_ex(digest, digestSz, out, outSz, hash[0],
+ mgf[0], digestSz + 1, key, rng);
+ }
+ } while (ret == WC_PENDING_E);
+ if (ret != PSS_SALTLEN_E)
+ ERROR_OUT(-7516, exit_rsa_pss);
+ TEST_SLEEP();
+
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key->asyncDev,
+ WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_RsaPSS_VerifyInline_ex(sig, outSz, &plain, hash[0],
+ mgf[0], -2, key);
+ }
+ } while (ret == WC_PENDING_E);
+ if (ret != PSS_SALTLEN_E)
+ ERROR_OUT(-7517, exit_rsa_pss);
+ TEST_SLEEP();
+
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key->asyncDev,
+ WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_RsaPSS_VerifyInline_ex(sig, outSz, &plain, hash[0], mgf[0],
+ digestSz + 1, key);
}
+ } while (ret == WC_PENDING_E);
+ if (ret != PSS_SALTLEN_E)
+ ERROR_OUT(-7518, exit_rsa_pss);
+ TEST_SLEEP();
-#ifdef FREESCALE_MQX
- pemFile = fopen("a:\\certs\\key.pem", "wb");
+#ifndef WOLFSSL_PSS_SALT_LEN_DISCOVER
+ len = -2;
#else
- pemFile = fopen("./key.pem", "wb");
+ len = -3;
#endif
- if (!pemFile) {
- free(der);
- free(pem);
- wc_FreeRsaKey(&genKey);
- return -305;
+#ifdef HAVE_SELFTEST
+ ret = wc_RsaPSS_CheckPadding_ex(digest, digestSz, plain, plainSz, hash[0],
+ len);
+#else
+ ret = wc_RsaPSS_CheckPadding_ex(digest, digestSz, plain, plainSz, hash[0],
+ len, 0);
+#endif
+ if (ret != PSS_SALTLEN_E)
+ ERROR_OUT(-7519, exit_rsa_pss);
+#ifndef WOLFSSL_PSS_LONG_SALT
+ len = digestSz + 1;
+#else
+ len = plainSz - digestSz - 1;
+#endif
+#ifdef HAVE_SELFTEST
+ ret = wc_RsaPSS_CheckPadding_ex(digest, digestSz, plain, plainSz, hash[0],
+ len);
+#else
+ ret = wc_RsaPSS_CheckPadding_ex(digest, digestSz, plain, plainSz, hash[0],
+ len, 0);
+#endif
+ if (ret != PSS_SALTLEN_E)
+ ERROR_OUT(-7520, exit_rsa_pss);
+
+ ret = 0;
+exit_rsa_pss:
+ FREE_VAR(sig, HEAP_HINT);
+ FREE_VAR(in, HEAP_HINT);
+ FREE_VAR(out, HEAP_HINT);
+
+ return ret;
+}
+#endif
+
+#ifdef WC_RSA_NO_PADDING
+int rsa_no_pad_test(void)
+{
+ WC_RNG rng;
+ RsaKey key;
+ byte* tmp;
+ size_t bytes;
+ int ret;
+ word32 inLen = 0;
+ word32 idx = 0;
+ word32 outSz = RSA_TEST_BYTES;
+ word32 plainSz = RSA_TEST_BYTES;
+#if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048) && \
+ !defined(USE_CERT_BUFFERS_3072) && !defined(USE_CERT_BUFFERS_4096) && \
+ !defined(NO_FILESYSTEM)
+ XFILE file;
+#endif
+ DECLARE_VAR(out, byte, RSA_TEST_BYTES, HEAP_HINT);
+ DECLARE_VAR(plain, byte, RSA_TEST_BYTES, HEAP_HINT);
+
+ /* initialize stack structures */
+ XMEMSET(&rng, 0, sizeof(rng));
+ XMEMSET(&key, 0, sizeof(key));
+#ifdef USE_CERT_BUFFERS_1024
+ bytes = (size_t)sizeof_client_key_der_1024;
+ if (bytes < (size_t)sizeof_client_cert_der_1024)
+ bytes = (size_t)sizeof_client_cert_der_1024;
+#elif defined(USE_CERT_BUFFERS_2048)
+ bytes = (size_t)sizeof_client_key_der_2048;
+ if (bytes < (size_t)sizeof_client_cert_der_2048)
+ bytes = (size_t)sizeof_client_cert_der_2048;
+#else
+ bytes = FOURK_BUF;
+#endif
+
+ tmp = (byte*)XMALLOC(bytes, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (tmp == NULL
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ || out == NULL || plain == NULL
+ #endif
+ ) {
+ ERROR_OUT(-7600, exit_rsa_nopadding);
+ }
+
+#ifdef USE_CERT_BUFFERS_1024
+ XMEMCPY(tmp, client_key_der_1024, (size_t)sizeof_client_key_der_1024);
+#elif defined(USE_CERT_BUFFERS_2048)
+ XMEMCPY(tmp, client_key_der_2048, (size_t)sizeof_client_key_der_2048);
+#elif defined(USE_CERT_BUFFERS_3072)
+ XMEMCPY(tmp, client_key_der_3072, (size_t)sizeof_client_key_der_3072);
+#elif defined(USE_CERT_BUFFERS_4096)
+ XMEMCPY(tmp, client_key_der_4096, (size_t)sizeof_client_key_der_4096);
+#elif !defined(NO_FILESYSTEM)
+ file = XFOPEN(clientKey, "rb");
+ if (!file) {
+ err_sys("can't open ./certs/client-key.der, "
+ "Please run from wolfSSL home dir", -40);
+ ERROR_OUT(-7601, exit_rsa_nopadding);
+ }
+
+ bytes = XFREAD(tmp, 1, FOURK_BUF, file);
+ XFCLOSE(file);
+#else
+ /* No key to use. */
+ ERROR_OUT(-7602, exit_rsa_nopadding);
+#endif /* USE_CERT_BUFFERS */
+
+ ret = wc_InitRsaKey_ex(&key, HEAP_HINT, devId);
+ if (ret != 0) {
+ ERROR_OUT(-7603, exit_rsa_nopadding);
+ }
+ ret = wc_RsaPrivateKeyDecode(tmp, &idx, &key, (word32)bytes);
+ if (ret != 0) {
+ ERROR_OUT(-7604, exit_rsa_nopadding);
+ }
+
+ /* after loading in key use tmp as the test buffer */
+
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
+ ret = wc_InitRng(&rng);
+#endif
+ if (ret != 0) {
+ ERROR_OUT(-7605, exit_rsa_nopadding);
+ }
+
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
+ inLen = wc_RsaEncryptSize(&key);
+ outSz = inLen;
+ plainSz = inLen;
+ XMEMSET(tmp, 7, inLen);
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_RsaDirect(tmp, inLen, out, &outSz, &key,
+ RSA_PRIVATE_ENCRYPT, &rng);
}
- ret = (int)fwrite(pem, 1, pemSz, pemFile);
- fclose(pemFile);
- if (ret != pemSz) {
- free(der);
- free(pem);
- wc_FreeRsaKey(&genKey);
- return -314;
+ } while (ret == WC_PENDING_E);
+ if (ret <= 0) {
+ ERROR_OUT(-7606, exit_rsa_nopadding);
+ }
+
+ /* encrypted result should not be the same as input */
+ if (XMEMCMP(out, tmp, inLen) == 0) {
+ ERROR_OUT(-7607, exit_rsa_nopadding);
+ }
+ TEST_SLEEP();
+
+ /* decrypt with public key and compare result */
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_RsaDirect(out, outSz, plain, &plainSz, &key,
+ RSA_PUBLIC_DECRYPT, &rng);
}
+ } while (ret == WC_PENDING_E);
+ if (ret <= 0) {
+ ERROR_OUT(-7608, exit_rsa_nopadding);
+ }
- ret = wc_InitRsaKey(&derIn, 0);
- if (ret != 0) {
- free(der);
- free(pem);
- wc_FreeRsaKey(&genKey);
- return -3060;
+ if (XMEMCMP(plain, tmp, inLen) != 0) {
+ ERROR_OUT(-7609, exit_rsa_nopadding);
+ }
+ TEST_SLEEP();
+#endif
+
+#ifdef WC_RSA_BLINDING
+ ret = wc_RsaSetRNG(NULL, &rng);
+ if (ret != BAD_FUNC_ARG) {
+ ERROR_OUT(-7610, exit_rsa_nopadding);
+ }
+
+ ret = wc_RsaSetRNG(&key, &rng);
+ if (ret < 0) {
+ ERROR_OUT(-7611, exit_rsa_nopadding);
+ }
+#endif
+
+ /* test encrypt and decrypt using WC_RSA_NO_PAD */
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_RsaPublicEncrypt_ex(tmp, inLen, out, (int)outSz, &key, &rng,
+ WC_RSA_NO_PAD, WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0);
}
- idx = 0;
- ret = wc_RsaPrivateKeyDecode(der, &idx, &derIn, derSz);
- if (ret != 0) {
- free(der);
- free(pem);
- wc_FreeRsaKey(&derIn);
- wc_FreeRsaKey(&genKey);
- return -306;
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7612, exit_rsa_nopadding);
+ }
+ TEST_SLEEP();
+#endif /* WOLFSSL_RSA_VERIFY_ONLY */
+
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_RsaPrivateDecrypt_ex(out, outSz, plain, (int)plainSz, &key,
+ WC_RSA_NO_PAD, WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0);
}
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7613, exit_rsa_nopadding);
+ }
- wc_FreeRsaKey(&derIn);
- wc_FreeRsaKey(&genKey);
- free(pem);
- free(der);
+ if (XMEMCMP(plain, tmp, inLen) != 0) {
+ ERROR_OUT(-7614, exit_rsa_nopadding);
+ }
+ TEST_SLEEP();
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+
+ /* test some bad arguments */
+ ret = wc_RsaDirect(out, outSz, plain, &plainSz, &key, -1,
+ &rng);
+ if (ret != BAD_FUNC_ARG) {
+ ERROR_OUT(-7615, exit_rsa_nopadding);
+ }
+
+ ret = wc_RsaDirect(out, outSz, plain, &plainSz, NULL, RSA_PUBLIC_DECRYPT,
+ &rng);
+ if (ret != BAD_FUNC_ARG) {
+ ERROR_OUT(-7616, exit_rsa_nopadding);
+ }
+
+ ret = wc_RsaDirect(out, outSz, NULL, &plainSz, &key, RSA_PUBLIC_DECRYPT,
+ &rng);
+ if (ret != LENGTH_ONLY_E || plainSz != inLen) {
+ ERROR_OUT(-7617, exit_rsa_nopadding);
+ }
+
+ ret = wc_RsaDirect(out, outSz - 10, plain, &plainSz, &key,
+ RSA_PUBLIC_DECRYPT, &rng);
+ if (ret != BAD_FUNC_ARG) {
+ ERROR_OUT(-7618, exit_rsa_nopadding);
}
-#endif /* WOLFSSL_KEY_GEN */
+ /* if making it to this point of code without hitting an ERROR_OUT then
+ * all tests have passed */
+ ret = 0;
+
+exit_rsa_nopadding:
+ XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ FREE_VAR(out, HEAP_HINT);
+ FREE_VAR(plain, HEAP_HINT);
+ wc_FreeRsaKey(&key);
+ wc_FreeRng(&rng);
+
+ return ret;
+}
+#endif /* WC_RSA_NO_PADDING */
#ifdef WOLFSSL_CERT_GEN
+static int rsa_certgen_test(RsaKey* key, RsaKey* keypub, WC_RNG* rng, byte* tmp)
+{
+ RsaKey caKey;
+ byte* der;
+ byte* pem = NULL;
+ int ret;
+ Cert* myCert = NULL;
+ int certSz;
+ size_t bytes3;
+ word32 idx3 = 0;
+#if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048)
+ XFILE file3;
+#endif
+#ifdef WOLFSSL_TEST_CERT
+ DecodedCert decode;
+#endif
+#if defined(WOLFSSL_ALT_NAMES) && !defined(NO_ASN_TIME)
+ struct tm beforeTime;
+ struct tm afterTime;
+#endif
+ const byte mySerial[8] = {1,2,3,4,5,6,7,8};
+
+ (void)keypub;
+
+ XMEMSET(&caKey, 0, sizeof(caKey));
+
+ der = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (der == NULL) {
+ ERROR_OUT(-7619, exit_rsa);
+ }
+ pem = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT,DYNAMIC_TYPE_TMP_BUFFER);
+ if (pem == NULL) {
+ ERROR_OUT(-7620, exit_rsa);
+ }
+ myCert = (Cert*)XMALLOC(sizeof(Cert), HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (myCert == NULL) {
+ ERROR_OUT(-7621, exit_rsa);
+ }
+
/* self signed */
+ if (wc_InitCert(myCert)) {
+ ERROR_OUT(-7622, exit_rsa);
+ }
+
+ XMEMCPY(&myCert->subject, &certDefaultName, sizeof(CertName));
+ XMEMCPY(myCert->serial, mySerial, sizeof(mySerial));
+ myCert->serialSz = (int)sizeof(mySerial);
+ myCert->isCA = 1;
+#ifndef NO_SHA256
+ myCert->sigType = CTC_SHA256wRSA;
+#else
+ myCert->sigType = CTC_SHAwRSA;
+#endif
+
+
+#ifdef WOLFSSL_CERT_EXT
+ /* add Policies */
+ XSTRNCPY(myCert->certPolicies[0], "2.16.840.1.101.3.4.1.42",
+ CTC_MAX_CERTPOL_SZ);
+ XSTRNCPY(myCert->certPolicies[1], "1.2.840.113549.1.9.16.6.5",
+ CTC_MAX_CERTPOL_SZ);
+ myCert->certPoliciesNb = 2;
+
+ /* add SKID from the Public Key */
+ if (wc_SetSubjectKeyIdFromPublicKey(myCert, keypub, NULL) != 0) {
+ ERROR_OUT(-7623, exit_rsa);
+ }
+
+ /* add AKID from the Public Key */
+ if (wc_SetAuthKeyIdFromPublicKey(myCert, keypub, NULL) != 0) {
+ ERROR_OUT(-7624, exit_rsa);
+ }
+
+ /* add Key Usage */
+ if (wc_SetKeyUsage(myCert,"cRLSign,keyCertSign") != 0) {
+ ERROR_OUT(-7625, exit_rsa);
+ }
+#ifdef WOLFSSL_EKU_OID
{
- Cert myCert;
- byte* derCert;
- byte* pem;
- FILE* derFile;
- FILE* pemFile;
- int certSz;
- int pemSz;
+ const char unique[] = "2.16.840.1.111111.100.1.10.1";
+ if (wc_SetExtKeyUsageOID(myCert, unique, sizeof(unique), 0,
+ HEAP_HINT) != 0) {
+ ERROR_OUT(-7626, exit_rsa);
+ }
+ }
+#endif /* WOLFSSL_EKU_OID */
+#endif /* WOLFSSL_CERT_EXT */
+
+ ret = 0;
+ do {
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
+ if (ret >= 0) {
+ ret = wc_MakeSelfCert(myCert, der, FOURK_BUF, key, rng);
+ }
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7627, exit_rsa);
+ }
+ certSz = ret;
+
#ifdef WOLFSSL_TEST_CERT
- DecodedCert decode;
+ InitDecodedCert(&decode, der, certSz, HEAP_HINT);
+ ret = ParseCert(&decode, CERT_TYPE, NO_VERIFY, 0);
+ if (ret != 0) {
+ FreeDecodedCert(&decode);
+ ERROR_OUT(-7628, exit_rsa);
+ }
+ FreeDecodedCert(&decode);
#endif
- derCert = (byte*)malloc(FOURK_BUF);
- if (derCert == NULL)
- return -309;
- pem = (byte*)malloc(FOURK_BUF);
- if (pem == NULL) {
- free(derCert);
- return -310;
- }
+ ret = SaveDerAndPem(der, certSz, pem, FOURK_BUF, certDerFile,
+ certPemFile, CERT_TYPE, -5578);
+ if (ret != 0) {
+ goto exit_rsa;
+ }
- wc_InitCert(&myCert);
+ /* Setup Certificate */
+ if (wc_InitCert(myCert)) {
+ ERROR_OUT(-7629, exit_rsa);
+ }
- strncpy(myCert.subject.country, "US", CTC_NAME_SIZE);
- strncpy(myCert.subject.state, "OR", CTC_NAME_SIZE);
- strncpy(myCert.subject.locality, "Portland", CTC_NAME_SIZE);
- strncpy(myCert.subject.org, "yaSSL", CTC_NAME_SIZE);
- strncpy(myCert.subject.unit, "Development", CTC_NAME_SIZE);
- strncpy(myCert.subject.commonName, "www.yassl.com", CTC_NAME_SIZE);
- strncpy(myCert.subject.email, "info@yassl.com", CTC_NAME_SIZE);
- myCert.isCA = 1;
- myCert.sigType = CTC_SHA256wRSA;
+#ifdef WOLFSSL_ALT_NAMES
+ /* Get CA Cert for testing */
+ #ifdef USE_CERT_BUFFERS_1024
+ XMEMCPY(tmp, ca_cert_der_1024, sizeof_ca_cert_der_1024);
+ bytes3 = sizeof_ca_cert_der_1024;
+ #elif defined(USE_CERT_BUFFERS_2048)
+ XMEMCPY(tmp, ca_cert_der_2048, sizeof_ca_cert_der_2048);
+ bytes3 = sizeof_ca_cert_der_2048;
+ #else
+ file3 = XFOPEN(rsaCaCertDerFile, "rb");
+ if (!file3) {
+ ERROR_OUT(-7630, exit_rsa);
+ }
+ bytes3 = XFREAD(tmp, 1, FOURK_BUF, file3);
+ XFCLOSE(file3);
+ #endif /* USE_CERT_BUFFERS */
- certSz = wc_MakeSelfCert(&myCert, derCert, FOURK_BUF, &key, &rng);
- if (certSz < 0) {
- free(derCert);
- free(pem);
- return -401;
+ #if !defined(NO_FILESYSTEM) && !defined(USE_CERT_BUFFERS_1024) && \
+ !defined(USE_CERT_BUFFERS_2048) && !defined(NO_ASN)
+ ret = wc_SetAltNames(myCert, rsaCaCertFile);
+ if (ret != 0) {
+ ERROR_OUT(-7631, exit_rsa);
+ }
+ #endif
+ /* get alt names from der */
+ ret = wc_SetAltNamesBuffer(myCert, tmp, (int)bytes3);
+ if (ret != 0) {
+ ERROR_OUT(-7632, exit_rsa);
}
-#ifdef WOLFSSL_TEST_CERT
- InitDecodedCert(&decode, derCert, certSz, 0);
- ret = ParseCert(&decode, CERT_TYPE, NO_VERIFY, 0);
+ /* get dates from der */
+ ret = wc_SetDatesBuffer(myCert, tmp, (int)bytes3);
if (ret != 0) {
- free(derCert);
- free(pem);
- return -402;
+ ERROR_OUT(-7633, exit_rsa);
}
- FreeDecodedCert(&decode);
+
+ #ifndef NO_ASN_TIME
+ ret = wc_GetCertDates(myCert, &beforeTime, &afterTime);
+ if (ret < 0) {
+ ERROR_OUT(-7634, exit_rsa);
+ }
+ #endif
+#endif /* WOLFSSL_ALT_NAMES */
+
+ /* Get CA Key */
+#ifdef USE_CERT_BUFFERS_1024
+ XMEMCPY(tmp, ca_key_der_1024, sizeof_ca_key_der_1024);
+ bytes3 = sizeof_ca_key_der_1024;
+#elif defined(USE_CERT_BUFFERS_2048)
+ XMEMCPY(tmp, ca_key_der_2048, sizeof_ca_key_der_2048);
+ bytes3 = sizeof_ca_key_der_2048;
+#else
+ file3 = XFOPEN(rsaCaKeyFile, "rb");
+ if (!file3) {
+ ERROR_OUT(-7635, exit_rsa);
+ }
+
+ bytes3 = XFREAD(tmp, 1, FOURK_BUF, file3);
+ XFCLOSE(file3);
+#endif /* USE_CERT_BUFFERS */
+
+ ret = wc_InitRsaKey(&caKey, HEAP_HINT);
+ if (ret != 0) {
+ ERROR_OUT(-7636, exit_rsa);
+ }
+ ret = wc_RsaPrivateKeyDecode(tmp, &idx3, &caKey, (word32)bytes3);
+ if (ret != 0) {
+ ERROR_OUT(-7637, exit_rsa);
+ }
+
+#ifndef NO_SHA256
+ myCert->sigType = CTC_SHA256wRSA;
+#else
+ myCert->sigType = CTC_SHAwRSA;
#endif
-#ifdef FREESCALE_MQX
- derFile = fopen("a:\\certs\\cert.der", "wb");
+ XMEMCPY(&myCert->subject, &certDefaultName, sizeof(CertName));
+
+#ifdef WOLFSSL_CERT_EXT
+ /* add Policies */
+ XSTRNCPY(myCert->certPolicies[0], "2.16.840.1.101.3.4.1.42",
+ CTC_MAX_CERTPOL_SZ);
+ myCert->certPoliciesNb =1;
+
+ /* add SKID from the Public Key */
+ if (wc_SetSubjectKeyIdFromPublicKey(myCert, key, NULL) != 0) {
+ ERROR_OUT(-7638, exit_rsa);
+ }
+
+ /* add AKID from the CA certificate */
+#if defined(USE_CERT_BUFFERS_2048)
+ ret = wc_SetAuthKeyIdFromCert(myCert, ca_cert_der_2048,
+ sizeof_ca_cert_der_2048);
+#elif defined(USE_CERT_BUFFERS_1024)
+ ret = wc_SetAuthKeyIdFromCert(myCert, ca_cert_der_1024,
+ sizeof_ca_cert_der_1024);
#else
- derFile = fopen("./cert.der", "wb");
+ ret = wc_SetAuthKeyId(myCert, rsaCaCertFile);
#endif
- if (!derFile) {
- free(derCert);
- free(pem);
- return -403;
- }
- ret = (int)fwrite(derCert, 1, certSz, derFile);
- fclose(derFile);
- if (ret != certSz) {
- free(derCert);
- free(pem);
- return -414;
- }
+ if (ret != 0) {
+ ERROR_OUT(-7639, exit_rsa);
+ }
- pemSz = wc_DerToPem(derCert, certSz, pem, FOURK_BUF, CERT_TYPE);
- if (pemSz < 0) {
- free(derCert);
- free(pem);
- return -404;
+ /* add Key Usage */
+ if (wc_SetKeyUsage(myCert,"keyEncipherment,keyAgreement") != 0) {
+ ERROR_OUT(-7640, exit_rsa);
+ }
+#endif /* WOLFSSL_CERT_EXT */
+
+#if defined(USE_CERT_BUFFERS_2048)
+ ret = wc_SetIssuerBuffer(myCert, ca_cert_der_2048,
+ sizeof_ca_cert_der_2048);
+#elif defined(USE_CERT_BUFFERS_1024)
+ ret = wc_SetIssuerBuffer(myCert, ca_cert_der_1024,
+ sizeof_ca_cert_der_1024);
+#else
+ ret = wc_SetIssuer(myCert, rsaCaCertFile);
+#endif
+ if (ret < 0) {
+ ERROR_OUT(-7641, exit_rsa);
+ }
+
+ certSz = wc_MakeCert(myCert, der, FOURK_BUF, key, NULL, rng);
+ if (certSz < 0) {
+ ERROR_OUT(-7642, exit_rsa);
+ }
+
+ ret = 0;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &caKey.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_SignCert(myCert->bodySz, myCert->sigType, der, FOURK_BUF,
+ &caKey, NULL, rng);
}
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7643, exit_rsa);
+ }
+ certSz = ret;
-#ifdef FREESCALE_MQX
- pemFile = fopen("a:\\certs\\cert.pem", "wb");
+#ifdef WOLFSSL_TEST_CERT
+ InitDecodedCert(&decode, der, certSz, HEAP_HINT);
+ ret = ParseCert(&decode, CERT_TYPE, NO_VERIFY, 0);
+ if (ret != 0) {
+ FreeDecodedCert(&decode);
+ ERROR_OUT(-7644, exit_rsa);
+ }
+ FreeDecodedCert(&decode);
+#endif
+
+ ret = SaveDerAndPem(der, certSz, pem, FOURK_BUF, otherCertDerFile,
+ otherCertPemFile, CERT_TYPE, -5598);
+ if (ret != 0) {
+ goto exit_rsa;
+ }
+
+exit_rsa:
+ wc_FreeRsaKey(&caKey);
+
+ XFREE(myCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(der, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+
+ return ret;
+}
+#endif
+
+#if !defined(NO_RSA) && defined(HAVE_ECC) && defined(WOLFSSL_CERT_GEN)
+/* Make Cert / Sign example for ECC cert and RSA CA */
+static int rsa_ecc_certgen_test(WC_RNG* rng, byte* tmp)
+{
+ RsaKey caKey;
+ ecc_key caEccKey;
+ ecc_key caEccKeyPub;
+ byte* der;
+ byte* pem = NULL;
+ Cert* myCert = NULL;
+ int certSz;
+ size_t bytes3;
+ word32 idx3 = 0;
+#if (!defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048)) \
+ || !defined(USE_CERT_BUFFERS_256)
+ XFILE file3;
+#endif
+#ifdef WOLFSSL_TEST_CERT
+ DecodedCert decode;
+#endif
+ int ret;
+
+ XMEMSET(&caKey, 0, sizeof(caKey));
+ XMEMSET(&caEccKey, 0, sizeof(caEccKey));
+ XMEMSET(&caEccKeyPub, 0, sizeof(caEccKeyPub));
+
+ der = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (der == NULL) {
+ ERROR_OUT(-7645, exit_rsa);
+ }
+ pem = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (pem == NULL) {
+ ERROR_OUT(-7646, exit_rsa);
+ }
+ myCert = (Cert*)XMALLOC(sizeof(Cert), HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (myCert == NULL) {
+ ERROR_OUT(-7647, exit_rsa);
+ }
+
+ /* Get CA Key */
+#ifdef USE_CERT_BUFFERS_1024
+ XMEMCPY(tmp, ca_key_der_1024, sizeof_ca_key_der_1024);
+ bytes3 = sizeof_ca_key_der_1024;
+#elif defined(USE_CERT_BUFFERS_2048)
+ XMEMCPY(tmp, ca_key_der_2048, sizeof_ca_key_der_2048);
+ bytes3 = sizeof_ca_key_der_2048;
#else
- pemFile = fopen("./cert.pem", "wb");
+ file3 = XFOPEN(rsaCaKeyFile, "rb");
+ if (!file3) {
+ ERROR_OUT(-7648, exit_rsa);
+ }
+
+ bytes3 = XFREAD(tmp, 1, FOURK_BUF, file3);
+ XFCLOSE(file3);
+#endif /* USE_CERT_BUFFERS */
+
+ ret = wc_InitRsaKey(&caKey, HEAP_HINT);
+ if (ret != 0) {
+ ERROR_OUT(-7649, exit_rsa);
+ }
+ ret = wc_RsaPrivateKeyDecode(tmp, &idx3, &caKey, (word32)bytes3);
+ if (ret != 0) {
+ ERROR_OUT(-7650, exit_rsa);
+ }
+
+ /* Get Cert Key */
+#ifdef USE_CERT_BUFFERS_256
+ XMEMCPY(tmp, ecc_key_pub_der_256, sizeof_ecc_key_pub_der_256);
+ bytes3 = sizeof_ecc_key_pub_der_256;
+#else
+ file3 = XFOPEN(eccKeyPubFile, "rb");
+ if (!file3) {
+ ERROR_OUT(-7651, exit_rsa);
+ }
+
+ bytes3 = XFREAD(tmp, 1, FOURK_BUF, file3);
+ XFCLOSE(file3);
#endif
- if (!pemFile) {
- free(derCert);
- free(pem);
- return -405;
- }
- ret = (int)fwrite(pem, 1, pemSz, pemFile);
- fclose(pemFile);
- if (ret != pemSz) {
- free(derCert);
- free(pem);
- return -406;
+
+ ret = wc_ecc_init_ex(&caEccKeyPub, HEAP_HINT, devId);
+ if (ret != 0) {
+ ERROR_OUT(-7652, exit_rsa);
+ }
+
+ idx3 = 0;
+ ret = wc_EccPublicKeyDecode(tmp, &idx3, &caEccKeyPub, (word32)bytes3);
+ if (ret != 0) {
+ ERROR_OUT(-7653, exit_rsa);
+ }
+
+ /* Setup Certificate */
+ if (wc_InitCert(myCert)) {
+ ERROR_OUT(-7654, exit_rsa);
+ }
+
+#ifndef NO_SHA256
+ myCert->sigType = CTC_SHA256wRSA;
+#else
+ myCert->sigType = CTC_SHAwRSA;
+#endif
+
+ XMEMCPY(&myCert->subject, &certDefaultName, sizeof(CertName));
+
+#ifdef WOLFSSL_CERT_EXT
+ /* add Policies */
+ XSTRNCPY(myCert->certPolicies[0], "2.4.589440.587.101.2.1.9632587.1",
+ CTC_MAX_CERTPOL_SZ);
+ XSTRNCPY(myCert->certPolicies[1], "1.2.13025.489.1.113549",
+ CTC_MAX_CERTPOL_SZ);
+ myCert->certPoliciesNb = 2;
+
+ /* add SKID from the Public Key */
+ if (wc_SetSubjectKeyIdFromPublicKey(myCert, NULL, &caEccKeyPub) != 0) {
+ ERROR_OUT(-7655, exit_rsa);
+ }
+
+ /* add AKID from the CA certificate */
+#if defined(USE_CERT_BUFFERS_2048)
+ ret = wc_SetAuthKeyIdFromCert(myCert, ca_cert_der_2048,
+ sizeof_ca_cert_der_2048);
+#elif defined(USE_CERT_BUFFERS_1024)
+ ret = wc_SetAuthKeyIdFromCert(myCert, ca_cert_der_1024,
+ sizeof_ca_cert_der_1024);
+#else
+ ret = wc_SetAuthKeyId(myCert, rsaCaCertFile);
+#endif
+ if (ret != 0) {
+ ERROR_OUT(-7656, exit_rsa);
+ }
+
+ /* add Key Usage */
+ if (wc_SetKeyUsage(myCert, certKeyUsage) != 0) {
+ ERROR_OUT(-7657, exit_rsa);
+ }
+#endif /* WOLFSSL_CERT_EXT */
+
+#if defined(USE_CERT_BUFFERS_2048)
+ ret = wc_SetIssuerBuffer(myCert, ca_cert_der_2048,
+ sizeof_ca_cert_der_2048);
+#elif defined(USE_CERT_BUFFERS_1024)
+ ret = wc_SetIssuerBuffer(myCert, ca_cert_der_1024,
+ sizeof_ca_cert_der_1024);
+#else
+ ret = wc_SetIssuer(myCert, rsaCaCertFile);
+#endif
+ if (ret < 0) {
+ ERROR_OUT(-7658, exit_rsa);
+ }
+
+ certSz = wc_MakeCert(myCert, der, FOURK_BUF, NULL, &caEccKeyPub, rng);
+ if (certSz < 0) {
+ ERROR_OUT(-7659, exit_rsa);
+ }
+
+ ret = 0;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &caEccKey.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_SignCert(myCert->bodySz, myCert->sigType, der,
+ FOURK_BUF, &caKey, NULL, rng);
}
- free(pem);
- free(derCert);
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7660, exit_rsa);
}
- /* CA style */
- {
- RsaKey caKey;
- Cert myCert;
- byte* derCert;
- byte* pem;
- FILE* derFile;
- FILE* pemFile;
- int certSz;
- int pemSz;
- size_t bytes3;
- word32 idx3 = 0;
- FILE* file3 ;
+ certSz = ret;
+
#ifdef WOLFSSL_TEST_CERT
- DecodedCert decode;
+ InitDecodedCert(&decode, der, certSz, 0);
+ ret = ParseCert(&decode, CERT_TYPE, NO_VERIFY, 0);
+ if (ret != 0) {
+ FreeDecodedCert(&decode);
+ ERROR_OUT(-7661, exit_rsa);
+
+ }
+ FreeDecodedCert(&decode);
#endif
- derCert = (byte*)malloc(FOURK_BUF);
- if (derCert == NULL)
- return -311;
- pem = (byte*)malloc(FOURK_BUF);
- if (pem == NULL) {
- free(derCert);
- return -312;
- }
+ ret = SaveDerAndPem(der, certSz, pem, FOURK_BUF, certEccRsaDerFile,
+ certEccRsaPemFile, CERT_TYPE, -5616);
+ if (ret != 0) {
+ goto exit_rsa;
+ }
- file3 = fopen(caKeyFile, "rb");
+exit_rsa:
+ wc_FreeRsaKey(&caKey);
+ wc_ecc_free(&caEccKey);
+ wc_ecc_free(&caEccKeyPub);
- if (!file3) {
- free(derCert);
- free(pem);
- return -412;
- }
+ XFREE(myCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ myCert = NULL;
+ XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ pem = NULL;
+ XFREE(der, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ der = NULL;
+
+ if (ret >= 0)
+ ret = 0;
+ return ret;
+}
+#endif /* !NO_RSA && HAVE_ECC && WOLFSSL_CERT_GEN */
+
+#ifdef WOLFSSL_KEY_GEN
+static int rsa_keygen_test(WC_RNG* rng)
+{
+ RsaKey genKey;
+ int ret;
+ byte* der = NULL;
+ byte* pem = NULL;
+ word32 idx = 0;
+ int derSz = 0;
+#if !defined(WOLFSSL_SP_MATH) && !defined(HAVE_FIPS)
+ int keySz = 1024;
+#else
+ int keySz = 2048;
+#endif
+
+ XMEMSET(&genKey, 0, sizeof(genKey));
+
+ ret = wc_InitRsaKey_ex(&genKey, HEAP_HINT, devId);
+ if (ret != 0) {
+ ERROR_OUT(-7662, exit_rsa);
+ }
- bytes3 = fread(tmp, 1, FOURK_BUF, file3);
- fclose(file3);
+ ret = wc_MakeRsaKey(&genKey, keySz, WC_RSA_EXPONENT, rng);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &genKey.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0) {
+ ERROR_OUT(-7663, exit_rsa);
+ }
+ TEST_SLEEP();
+
+ /* If not using old FIPS, or not using FAST or USER RSA... */
+ #if !defined(HAVE_FAST_RSA) && !defined(HAVE_USER_RSA) && \
+ (!defined(HAVE_FIPS) || \
+ (defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2))) && \
+ !defined(HAVE_SELFTEST) && !defined(HAVE_INTEL_QA)
+ ret = wc_CheckRsaKey(&genKey);
+ if (ret != 0) {
+ ERROR_OUT(-7664, exit_rsa);
+ }
+ #endif
+ der = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (der == NULL) {
+ ERROR_OUT(-7665, exit_rsa);
+ }
+ pem = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (pem == NULL) {
+ ERROR_OUT(-7666, exit_rsa);
+ }
+
+ derSz = wc_RsaKeyToDer(&genKey, der, FOURK_BUF);
+ if (derSz < 0) {
+ ERROR_OUT(-7667, exit_rsa);
+ }
+
+ ret = SaveDerAndPem(der, derSz, pem, FOURK_BUF, keyDerFile, keyPemFile,
+ PRIVATEKEY_TYPE, -5555);
+ if (ret != 0) {
+ goto exit_rsa;
+ }
+
+ wc_FreeRsaKey(&genKey);
+ ret = wc_InitRsaKey(&genKey, HEAP_HINT);
+ if (ret != 0) {
+ ERROR_OUT(-7668, exit_rsa);
+ }
+ idx = 0;
+#if !defined(WOLFSSL_CRYPTOCELL)
+ /* The private key part of the key gen pairs from cryptocell can't be exported */
+ ret = wc_RsaPrivateKeyDecode(der, &idx, &genKey, derSz);
+ if (ret != 0) {
+ ERROR_OUT(-7669, exit_rsa);
+ }
+#endif /* WOLFSSL_CRYPTOCELL */
+
+exit_rsa:
+ wc_FreeRsaKey(&genKey);
+ if (pem != NULL) {
+ XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ pem = NULL;
+ }
+ if (der != NULL) {
+ XFREE(der, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ der = NULL;
+ }
+
+ return ret;
+}
+#endif
+
+int rsa_test(void)
+{
+ int ret;
+ byte* tmp;
+ byte* der = NULL;
+ byte* pem = NULL;
+ size_t bytes;
+ WC_RNG rng;
+ RsaKey key;
+#if defined(WOLFSSL_CERT_EXT) || defined(WOLFSSL_CERT_GEN)
+ RsaKey keypub;
+#endif
+#if defined(HAVE_NTRU)
+ RsaKey caKey;
+#endif
+#if !defined(NO_ASN) || !defined(WOLFSSL_RSA_PUBLIC_ONLY) \
+ || defined(WOLFSSL_PUBLIC_MP)
+ word32 idx = 0;
+#endif
+#if (!defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_PUBLIC_MP)) && \
+ !defined(WC_NO_RSA_OAEP) && !defined(WC_NO_RNG)
+ const char* inStr = "Everyone gets Friday off.";
+ word32 inLen = (word32)XSTRLEN((char*)inStr);
+ const word32 outSz = RSA_TEST_BYTES;
+ const word32 plainSz = RSA_TEST_BYTES;
+#endif
+#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || defined(WOLFSSL_PUBLIC_MP)
+ byte* res;
+#endif
+#ifndef NO_SIG_WRAPPER
+ int modLen;
+#endif
+#if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048) && \
+ !defined(USE_CERT_BUFFERS_3072) && !defined(USE_CERT_BUFFERS_4096) && \
+ !defined(NO_FILESYSTEM)
+ XFILE file;
+ XFILE file2;
+#endif
+#ifdef WOLFSSL_TEST_CERT
+ DecodedCert cert;
+#endif
+
+#if (!defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_PUBLIC_MP)) && \
+ !defined(WC_NO_RSA_OAEP) && !defined(WC_NO_RNG)
+ DECLARE_VAR_INIT(in, byte, inLen, inStr, HEAP_HINT);
+ DECLARE_VAR(out, byte, RSA_TEST_BYTES, HEAP_HINT);
+ DECLARE_VAR(plain, byte, RSA_TEST_BYTES, HEAP_HINT);
+#endif
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+ if (in == NULL)
+ return MEMORY_E;
+#endif
+
+ /* initialize stack structures */
+ XMEMSET(&rng, 0, sizeof(rng));
+ XMEMSET(&key, 0, sizeof(key));
+#ifdef WOLFSSL_CERT_EXT
+ XMEMSET(&keypub, 0, sizeof(keypub));
+#endif
+#if defined(HAVE_NTRU)
+ XMEMSET(&caKey, 0, sizeof(caKey));
+#endif
+
+#if !defined(HAVE_USER_RSA) && !defined(NO_ASN)
+ ret = rsa_decode_test(&key);
+ if (ret != 0)
+ return ret;
+#endif
+
+#ifdef USE_CERT_BUFFERS_1024
+ bytes = (size_t)sizeof_client_key_der_1024;
+ if (bytes < (size_t)sizeof_client_cert_der_1024)
+ bytes = (size_t)sizeof_client_cert_der_1024;
+#elif defined(USE_CERT_BUFFERS_2048)
+ bytes = (size_t)sizeof_client_key_der_2048;
+ if (bytes < (size_t)sizeof_client_cert_der_2048)
+ bytes = (size_t)sizeof_client_cert_der_2048;
+#elif defined(USE_CERT_BUFFERS_3072)
+ bytes = (size_t)sizeof_client_key_der_3072;
+ if (bytes < (size_t)sizeof_client_cert_der_3072)
+ bytes = (size_t)sizeof_client_cert_der_3072;
+#elif defined(USE_CERT_BUFFERS_4096)
+ bytes = (size_t)sizeof_client_key_der_4096;
+ if (bytes < (size_t)sizeof_client_cert_der_4096)
+ bytes = (size_t)sizeof_client_cert_der_4096;
+#else
+ bytes = FOURK_BUF;
+#endif
+
+ tmp = (byte*)XMALLOC(bytes, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (tmp == NULL
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ || out == NULL || plain == NULL
+ #endif
+ ) {
+ return -7700;
+ }
- ret = wc_InitRsaKey(&caKey, 0);
+#ifdef USE_CERT_BUFFERS_1024
+ XMEMCPY(tmp, client_key_der_1024, (size_t)sizeof_client_key_der_1024);
+#elif defined(USE_CERT_BUFFERS_2048)
+ XMEMCPY(tmp, client_key_der_2048, (size_t)sizeof_client_key_der_2048);
+#elif defined(USE_CERT_BUFFERS_3072)
+ XMEMCPY(tmp, client_key_der_3072, (size_t)sizeof_client_key_der_3072);
+#elif defined(USE_CERT_BUFFERS_4096)
+ XMEMCPY(tmp, client_key_der_4096, (size_t)sizeof_client_key_der_4096);
+#elif !defined(NO_FILESYSTEM)
+ file = XFOPEN(clientKey, "rb");
+ if (!file) {
+ err_sys("can't open ./certs/client-key.der, "
+ "Please run from wolfSSL home dir", -40);
+ ERROR_OUT(-7701, exit_rsa);
+ }
+
+ bytes = XFREAD(tmp, 1, FOURK_BUF, file);
+ XFCLOSE(file);
+#else
+ /* No key to use. */
+ ERROR_OUT(-7702, exit_rsa);
+#endif /* USE_CERT_BUFFERS */
+
+ ret = wc_InitRsaKey_ex(&key, HEAP_HINT, devId);
+ if (ret != 0) {
+ ERROR_OUT(-7703, exit_rsa);
+ }
+#ifndef NO_ASN
+ ret = wc_RsaPrivateKeyDecode(tmp, &idx, &key, (word32)bytes);
+ if (ret != 0) {
+ ERROR_OUT(-7704, exit_rsa);
+ }
+#ifndef NO_SIG_WRAPPER
+ modLen = wc_RsaEncryptSize(&key);
+#endif
+#else
+ #ifdef USE_CERT_BUFFERS_2048
+ ret = mp_read_unsigned_bin(&key.n, &tmp[12], 256);
if (ret != 0) {
- free(derCert);
- free(pem);
- return -411;
+ ERROR_OUT(-7705, exit_rsa);
}
- ret = wc_RsaPrivateKeyDecode(tmp, &idx3, &caKey, (word32)bytes3);
+ ret = mp_set_int(&key.e, WC_RSA_EXPONENT);
if (ret != 0) {
- free(derCert);
- free(pem);
- wc_FreeRsaKey(&caKey);
- return -413;
+ ERROR_OUT(-7706, exit_rsa);
}
+#ifndef NO_SIG_WRAPPER
+ modLen = 2048;
+#endif
+ #else
+ #error Not supported yet!
+ #endif
+#endif
+
+#ifndef WC_NO_RNG
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
+ ret = wc_InitRng(&rng);
+#endif
+ if (ret != 0) {
+ ERROR_OUT(-7707, exit_rsa);
+ }
+#endif
- wc_InitCert(&myCert);
+#ifndef NO_SIG_WRAPPER
+ ret = rsa_sig_test(&key, sizeof(RsaKey), modLen, &rng);
+ if (ret != 0)
+ goto exit_rsa;
+#endif
- strncpy(myCert.subject.country, "US", CTC_NAME_SIZE);
- strncpy(myCert.subject.state, "OR", CTC_NAME_SIZE);
- strncpy(myCert.subject.locality, "Portland", CTC_NAME_SIZE);
- strncpy(myCert.subject.org, "yaSSL", CTC_NAME_SIZE);
- strncpy(myCert.subject.unit, "Development", CTC_NAME_SIZE);
- strncpy(myCert.subject.commonName, "www.yassl.com", CTC_NAME_SIZE);
- strncpy(myCert.subject.email, "info@yassl.com", CTC_NAME_SIZE);
+#ifdef WC_RSA_NONBLOCK
+ ret = rsa_nb_test(&key, in, inLen, out, outSz, plain, plainSz, &rng);
+ if (ret != 0)
+ goto exit_rsa;
+#endif
- ret = wc_SetIssuer(&myCert, caCertFile);
- if (ret < 0) {
- free(derCert);
- free(pem);
- wc_FreeRsaKey(&caKey);
- return -405;
+#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+ do {
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
+ if (ret >= 0) {
+ ret = wc_RsaPublicEncrypt(in, inLen, out, outSz, &key, &rng);
}
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7708, exit_rsa);
+ }
+ TEST_SLEEP();
- certSz = wc_MakeCert(&myCert, derCert, FOURK_BUF, &key, NULL, &rng);
- if (certSz < 0) {
- free(derCert);
- free(pem);
- wc_FreeRsaKey(&caKey);
- return -407;
+#ifdef WC_RSA_BLINDING
+ {
+ int tmpret = ret;
+ ret = wc_RsaSetRNG(&key, &rng);
+ if (ret < 0) {
+ ERROR_OUT(-7709, exit_rsa);
}
+ ret = tmpret;
+ }
+#endif
- certSz = wc_SignCert(myCert.bodySz, myCert.sigType, derCert, FOURK_BUF,
- &caKey, NULL, &rng);
- if (certSz < 0) {
- free(derCert);
- free(pem);
- wc_FreeRsaKey(&caKey);
- return -408;
+ idx = (word32)ret; /* save off encrypted length */
+ do {
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
+ if (ret >= 0) {
+ ret = wc_RsaPrivateDecrypt(out, idx, plain, plainSz, &key);
}
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7710, exit_rsa);
+ }
+ if (XMEMCMP(plain, in, inLen)) {
+ ERROR_OUT(-7711, exit_rsa);
+ }
+ TEST_SLEEP();
-#ifdef WOLFSSL_TEST_CERT
- InitDecodedCert(&decode, derCert, certSz, 0);
- ret = ParseCert(&decode, CERT_TYPE, NO_VERIFY, 0);
- if (ret != 0) {
- free(derCert);
- free(pem);
- wc_FreeRsaKey(&caKey);
- return -409;
+ do {
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
+ if (ret >= 0) {
+ ret = wc_RsaPrivateDecryptInline(out, idx, &res, &key);
}
- FreeDecodedCert(&decode);
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7712, exit_rsa);
+ }
+ if (ret != (int)inLen) {
+ ERROR_OUT(-7713, exit_rsa);
+ }
+ if (XMEMCMP(res, in, inLen)) {
+ ERROR_OUT(-7714, exit_rsa);
+ }
+ TEST_SLEEP();
+
+ do {
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
#endif
+ if (ret >= 0) {
+ ret = wc_RsaSSL_Sign(in, inLen, out, outSz, &key, &rng);
+ }
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7715, exit_rsa);
+ }
+ TEST_SLEEP();
-#ifdef FREESCALE_MQX
- derFile = fopen("a:\\certs\\othercert.der", "wb");
+#elif defined(WOLFSSL_PUBLIC_MP)
+ (void)outSz;
+ (void)inLen;
+ (void)res;
+ {
+ byte signature_2048[] = {
+ 0x07, 0x6f, 0xc9, 0x85, 0x73, 0x9e, 0x21, 0x79,
+ 0x47, 0xf1, 0xa3, 0xd7, 0xf4, 0x27, 0x29, 0xbe,
+ 0x99, 0x5d, 0xac, 0xb2, 0x10, 0x3f, 0x95, 0xda,
+ 0x89, 0x23, 0xb8, 0x96, 0x13, 0x57, 0x72, 0x30,
+ 0xa1, 0xfe, 0x5a, 0x68, 0x9c, 0x99, 0x9d, 0x1e,
+ 0x05, 0xa4, 0x80, 0xb0, 0xbb, 0xd9, 0xd9, 0xa1,
+ 0x69, 0x97, 0x74, 0xb3, 0x41, 0x21, 0x3b, 0x47,
+ 0xf5, 0x51, 0xb1, 0xfb, 0xc7, 0xaa, 0xcc, 0xdc,
+ 0xcd, 0x76, 0xa0, 0x28, 0x4d, 0x27, 0x14, 0xa4,
+ 0xb9, 0x41, 0x68, 0x7c, 0xb3, 0x66, 0xe6, 0x6f,
+ 0x40, 0x76, 0xe4, 0x12, 0xfd, 0xae, 0x29, 0xb5,
+ 0x63, 0x60, 0x87, 0xce, 0x49, 0x6b, 0xf3, 0x05,
+ 0x9a, 0x14, 0xb5, 0xcc, 0xcd, 0xf7, 0x30, 0x95,
+ 0xd2, 0x72, 0x52, 0x1d, 0x5b, 0x7e, 0xef, 0x4a,
+ 0x02, 0x96, 0x21, 0x6c, 0x55, 0xa5, 0x15, 0xb1,
+ 0x57, 0x63, 0x2c, 0xa3, 0x8e, 0x9d, 0x3d, 0x45,
+ 0xcc, 0xb8, 0xe6, 0xa1, 0xc8, 0x59, 0xcd, 0xf5,
+ 0xdc, 0x0a, 0x51, 0xb6, 0x9d, 0xfb, 0xf4, 0x6b,
+ 0xfd, 0x32, 0x71, 0x6e, 0xcf, 0xcb, 0xb3, 0xd9,
+ 0xe0, 0x4a, 0x77, 0x34, 0xd6, 0x61, 0xf5, 0x7c,
+ 0xf9, 0xa9, 0xa4, 0xb0, 0x8e, 0x3b, 0xd6, 0x04,
+ 0xe0, 0xde, 0x2b, 0x5b, 0x5a, 0xbf, 0xd9, 0xef,
+ 0x8d, 0xa3, 0xf5, 0xb1, 0x67, 0xf3, 0xb9, 0x72,
+ 0x0a, 0x37, 0x12, 0x35, 0x6c, 0x8e, 0x10, 0x8b,
+ 0x38, 0x06, 0x16, 0x4b, 0x20, 0x20, 0x13, 0x00,
+ 0x2e, 0x6d, 0xc2, 0x59, 0x23, 0x67, 0x4a, 0x6d,
+ 0xa1, 0x46, 0x8b, 0xee, 0xcf, 0x44, 0xb4, 0x3e,
+ 0x56, 0x75, 0x00, 0x68, 0xb5, 0x7d, 0x0f, 0x20,
+ 0x79, 0x5d, 0x7f, 0x12, 0x15, 0x32, 0x89, 0x61,
+ 0x6b, 0x29, 0xb7, 0x52, 0xf5, 0x25, 0xd8, 0x98,
+ 0xe8, 0x6f, 0xf9, 0x22, 0xb4, 0xbb, 0xe5, 0xff,
+ 0xd0, 0x92, 0x86, 0x9a, 0x88, 0xa2, 0xaf, 0x6b
+ };
+ ret = sizeof(signature_2048);
+ XMEMCPY(out, signature_2048, ret);
+ }
+#endif
+
+#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || defined(WOLFSSL_PUBLIC_MP)
+ idx = (word32)ret;
+ XMEMSET(plain, 0, plainSz);
+ do {
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
+ if (ret >= 0) {
+#ifndef WOLFSSL_RSA_VERIFY_INLINE
+
+#if defined(WOLFSSL_CRYPTOCELL)
+ /*
+ Cryptocell requires the input data and signature byte array to verify.
+
+ first argument must be the input data
+ second argument must be the length of input data
+ third argument must be the signature byte array or the output from
+ wc_RsaSSL_Sign()
+ fourth argument must be the length of the signature byte array
+ */
+
+ ret = wc_RsaSSL_Verify(in, inLen, out, outSz, &key);
#else
- derFile = fopen("./othercert.der", "wb");
+ ret = wc_RsaSSL_Verify(out, idx, plain, plainSz, &key);
+#endif /* WOLFSSL_CRYPTOCELL */
+#else
+ byte* dec = NULL;
+ ret = wc_RsaSSL_VerifyInline(out, idx, &dec, &key);
+ if (ret > 0) {
+ XMEMCPY(plain, dec, ret);
+ }
#endif
- if (!derFile) {
- free(derCert);
- free(pem);
- wc_FreeRsaKey(&caKey);
- return -410;
- }
- ret = (int)fwrite(derCert, 1, certSz, derFile);
- fclose(derFile);
- if (ret != certSz) {
- free(derCert);
- free(pem);
- wc_FreeRsaKey(&caKey);
- return -416;
}
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7716, exit_rsa);
+ }
- pemSz = wc_DerToPem(derCert, certSz, pem, FOURK_BUF, CERT_TYPE);
- if (pemSz < 0) {
- free(derCert);
- free(pem);
- wc_FreeRsaKey(&caKey);
- return -411;
+ if (XMEMCMP(plain, in, (size_t)ret)) {
+ ERROR_OUT(-7717, exit_rsa);
+ }
+ TEST_SLEEP();
+#endif
+
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
+ #if !defined(WC_NO_RSA_OAEP) && !defined(WC_NO_RNG)
+ /* OAEP padding testing */
+ #if !defined(HAVE_FAST_RSA) && !defined(HAVE_USER_RSA) && \
+ (!defined(HAVE_FIPS) || \
+ (defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)))
+ #ifndef NO_SHA
+ XMEMSET(plain, 0, plainSz);
+
+ do {
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
+ if (ret >= 0) {
+ ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
+ WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA, WC_MGF1SHA1, NULL, 0);
}
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7718, exit_rsa);
+ }
+ TEST_SLEEP();
-#ifdef FREESCALE_MQX
- pemFile = fopen("a:\\certs\\othercert.pem", "wb");
-#else
- pemFile = fopen("./othercert.pem", "wb");
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ idx = (word32)ret;
+ do {
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
#endif
- if (!pemFile) {
- free(derCert);
- free(pem);
- wc_FreeRsaKey(&caKey);
- return -412;
+ if (ret >= 0) {
+ ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
+ WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA, WC_MGF1SHA1, NULL, 0);
}
- ret = (int)fwrite(pem, 1, pemSz, pemFile);
- if (ret != pemSz) {
- free(derCert);
- free(pem);
- wc_FreeRsaKey(&caKey);
- return -415;
- }
- fclose(pemFile);
- free(pem);
- free(derCert);
- wc_FreeRsaKey(&caKey);
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7719, exit_rsa);
}
-#ifdef HAVE_ECC
- /* ECC CA style */
- {
- ecc_key caKey;
- Cert myCert;
- byte* derCert;
- byte* pem;
- FILE* derFile;
- FILE* pemFile;
- int certSz;
- int pemSz;
- size_t bytes3;
- word32 idx3 = 0;
- FILE* file3;
-#ifdef WOLFSSL_TEST_CERT
- DecodedCert decode;
+
+ if (XMEMCMP(plain, in, inLen)) {
+ ERROR_OUT(-7720, exit_rsa);
+ }
+ TEST_SLEEP();
+ #endif /* NO_SHA */
#endif
- derCert = (byte*)malloc(FOURK_BUF);
- if (derCert == NULL)
- return -5311;
- pem = (byte*)malloc(FOURK_BUF);
- if (pem == NULL) {
- free(derCert);
- return -5312;
+ #ifndef NO_SHA256
+ XMEMSET(plain, 0, plainSz);
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
+ do {
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
+ if (ret >= 0) {
+ ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
+ WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, NULL, 0);
+ }
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7721, exit_rsa);
+ }
+ TEST_SLEEP();
+#endif /* WOLFSSL_RSA_VERIFY_ONLY */
+
+ idx = (word32)ret;
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ do {
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
+ if (ret >= 0) {
+ ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
+ WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, NULL, 0);
}
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7722, exit_rsa);
+ }
- file3 = fopen(eccCaKeyFile, "rb");
+ if (XMEMCMP(plain, in, inLen)) {
+ ERROR_OUT(-7723, exit_rsa);
+ }
+ TEST_SLEEP();
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
- if (!file3) {
- free(derCert);
- free(pem);
- return -5412;
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ do {
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
+ if (ret >= 0) {
+ ret = wc_RsaPrivateDecryptInline_ex(out, idx, &res, &key,
+ WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, NULL, 0);
+ }
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7724, exit_rsa);
+ }
+ if (ret != (int)inLen) {
+ ERROR_OUT(-7725, exit_rsa);
+ }
+ if (XMEMCMP(res, in, inLen)) {
+ ERROR_OUT(-7726, exit_rsa);
+ }
+ TEST_SLEEP();
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+
+ /* check fails if not using the same optional label */
+ XMEMSET(plain, 0, plainSz);
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
+ do {
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
+ if (ret >= 0) {
+ ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
+ WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, NULL, 0);
}
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7727, exit_rsa);
+ }
+ TEST_SLEEP();
+#endif /* WOLFSSL_RSA_VERIFY_ONLY */
+
+/* TODO: investigate why Cavium Nitrox doesn't detect decrypt error here */
+#if !defined(HAVE_CAVIUM) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) && \
+ !defined(WOLFSSL_CRYPTOCELL)
+/* label is unused in cryptocell so it won't detect decrypt error due to label */
+ idx = (word32)ret;
+ do {
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
+ if (ret >= 0) {
+ ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
+ WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, in, inLen);
+ }
+ } while (ret == WC_PENDING_E);
+ if (ret > 0) { /* in this case decrypt should fail */
+ ERROR_OUT(-7728, exit_rsa);
+ }
+ ret = 0;
+ TEST_SLEEP();
+#endif /* !HAVE_CAVIUM */
+
+ /* check using optional label with encrypt/decrypt */
+ XMEMSET(plain, 0, plainSz);
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
+ do {
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
+ if (ret >= 0) {
+ ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
+ WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, in, inLen);
+ }
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7729, exit_rsa);
+ }
+ TEST_SLEEP();
+#endif /* WOLFSSL_RSA_VERIFY_ONLY */
+
+ idx = (word32)ret;
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ do {
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
+ if (ret >= 0) {
+ ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
+ WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, in, inLen);
+ }
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7730, exit_rsa);
+ }
- bytes3 = fread(tmp, 1, FOURK_BUF, file3);
- fclose(file3);
+ if (XMEMCMP(plain, in, inLen)) {
+ ERROR_OUT(-7731, exit_rsa);
+ }
+ TEST_SLEEP();
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
- wc_ecc_init(&caKey);
- ret = wc_EccPrivateKeyDecode(tmp, &idx3, &caKey, (word32)bytes3);
- if (ret != 0) {
- free(derCert);
- free(pem);
- return -5413;
+#ifndef WOLFSSL_RSA_VERIFY_ONLY
+ #ifndef NO_SHA
+ /* check fail using mismatch hash algorithms */
+ XMEMSET(plain, 0, plainSz);
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
+ WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA, WC_MGF1SHA1, in, inLen);
+ }
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7732, exit_rsa);
}
+ TEST_SLEEP();
+
+/* TODO: investigate why Cavium Nitrox doesn't detect decrypt error here */
+#if !defined(HAVE_CAVIUM) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) && \
+ !defined(WOLFSSL_CRYPTOCELL)
+ idx = (word32)ret;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
+ WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256,
+ in, inLen);
+ }
+ } while (ret == WC_PENDING_E);
+ if (ret > 0) { /* should fail */
+ ERROR_OUT(-7733, exit_rsa);
+ }
+ ret = 0;
+ TEST_SLEEP();
+ #endif /* !HAVE_CAVIUM */
+ #endif /* NO_SHA */
+#endif /* WOLFSSL_RSA_VERIFY_ONLY */
+#endif /* NO_SHA256 */
- wc_InitCert(&myCert);
- myCert.sigType = CTC_SHA256wECDSA;
-
- strncpy(myCert.subject.country, "US", CTC_NAME_SIZE);
- strncpy(myCert.subject.state, "OR", CTC_NAME_SIZE);
- strncpy(myCert.subject.locality, "Portland", CTC_NAME_SIZE);
- strncpy(myCert.subject.org, "wolfSSL", CTC_NAME_SIZE);
- strncpy(myCert.subject.unit, "Development", CTC_NAME_SIZE);
- strncpy(myCert.subject.commonName, "www.wolfssl.com", CTC_NAME_SIZE);
- strncpy(myCert.subject.email, "info@wolfssl.com", CTC_NAME_SIZE);
+#ifdef WOLFSSL_SHA512
+ /* Check valid RSA key size is used while using hash length of SHA512
+ If key size is less than (hash length * 2) + 2 then is invalid use
+ and test, since OAEP padding requires this.
+ BAD_FUNC_ARG is returned when this case is not met */
+ if (wc_RsaEncryptSize(&key) > ((int)WC_SHA512_DIGEST_SIZE * 2) + 2) {
+ XMEMSET(plain, 0, plainSz);
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
+ WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA512, WC_MGF1SHA512, NULL, 0);
+ }
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7734, exit_rsa);
+ }
+ TEST_SLEEP();
- ret = wc_SetIssuer(&myCert, eccCaCertFile);
+ idx = ret;
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
+ WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA512, WC_MGF1SHA512, NULL, 0);
+ }
+ } while (ret == WC_PENDING_E);
if (ret < 0) {
- free(pem);
- free(derCert);
- wc_ecc_free(&caKey);
- return -5405;
+ ERROR_OUT(-7735, exit_rsa);
}
- certSz = wc_MakeCert(&myCert, derCert, FOURK_BUF, NULL, &caKey, &rng);
- if (certSz < 0) {
- free(pem);
- free(derCert);
- wc_ecc_free(&caKey);
- return -5407;
+ if (XMEMCMP(plain, in, inLen)) {
+ ERROR_OUT(-7736, exit_rsa);
}
+ TEST_SLEEP();
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+ }
+#endif /* WOLFSSL_SHA512 */
- certSz = wc_SignCert(myCert.bodySz, myCert.sigType, derCert, FOURK_BUF,
- NULL, &caKey, &rng);
- if (certSz < 0) {
- free(pem);
- free(derCert);
- wc_ecc_free(&caKey);
- return -5408;
+ /* check using pkcsv15 padding with _ex API */
+ XMEMSET(plain, 0, plainSz);
+ do {
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
+ if (ret >= 0) {
+ ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
+ WC_RSA_PKCSV15_PAD, WC_HASH_TYPE_NONE, 0, NULL, 0);
}
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7737, exit_rsa);
+ }
+ TEST_SLEEP();
-#ifdef WOLFSSL_TEST_CERT
- InitDecodedCert(&decode, derCert, certSz, 0);
- ret = ParseCert(&decode, CERT_TYPE, NO_VERIFY, 0);
- if (ret != 0) {
- free(pem);
- free(derCert);
- wc_ecc_free(&caKey);
- return -5409;
+ idx = (word32)ret;
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
+ do {
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
+ if (ret >= 0) {
+ ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
+ WC_RSA_PKCSV15_PAD, WC_HASH_TYPE_NONE, 0, NULL, 0);
}
- FreeDecodedCert(&decode);
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7738, exit_rsa);
+ }
+
+ if (XMEMCMP(plain, in, inLen)) {
+ ERROR_OUT(-7739, exit_rsa);
+ }
+ TEST_SLEEP();
+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
+ #endif /* !HAVE_FAST_RSA && !HAVE_FIPS */
+ #endif /* WC_NO_RSA_OAEP && !WC_NO_RNG */
+#endif /* WOLFSSL_RSA_VERIFY_ONLY */
+
+#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && !defined(NO_ASN) \
+ && !defined(WOLFSSL_RSA_VERIFY_ONLY)
+ ret = rsa_export_key_test(&key);
+ if (ret != 0)
+ return ret;
#endif
-#ifdef FREESCALE_MQX
- derFile = fopen("a:\\certs\\certecc.der", "wb");
+#if !defined(NO_ASN) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+ ret = rsa_flatten_test(&key);
+ if (ret != 0)
+ return ret;
+#endif
+
+#if defined(WOLFSSL_MDK_ARM)
+ #define sizeof(s) XSTRLEN((char *)(s))
+#endif
+
+#ifdef USE_CERT_BUFFERS_1024
+ XMEMCPY(tmp, client_cert_der_1024, (size_t)sizeof_client_cert_der_1024);
+ bytes = (size_t)sizeof_client_cert_der_1024;
+#elif defined(USE_CERT_BUFFERS_2048)
+ XMEMCPY(tmp, client_cert_der_2048, (size_t)sizeof_client_cert_der_2048);
+ bytes = (size_t)sizeof_client_cert_der_2048;
+#elif defined(USE_CERT_BUFFERS_3072)
+ XMEMCPY(tmp, client_cert_der_3072, (size_t)sizeof_client_cert_der_3072);
+ bytes = (size_t)sizeof_client_cert_der_3072;
+#elif defined(USE_CERT_BUFFERS_4096)
+ XMEMCPY(tmp, client_cert_der_4096, (size_t)sizeof_client_cert_der_4096);
+ bytes = (size_t)sizeof_client_cert_der_4096;
+#elif !defined(NO_FILESYSTEM)
+ file2 = XFOPEN(clientCert, "rb");
+ if (!file2) {
+ ERROR_OUT(-7740, exit_rsa);
+ }
+
+ bytes = XFREAD(tmp, 1, FOURK_BUF, file2);
+ XFCLOSE(file2);
#else
- derFile = fopen("./certecc.der", "wb");
+ /* No certificate to use. */
+ ERROR_OUT(-7741, exit_rsa);
#endif
- if (!derFile) {
- free(pem);
- free(derCert);
- wc_ecc_free(&caKey);
- return -5410;
- }
- ret = (int)fwrite(derCert, 1, certSz, derFile);
- fclose(derFile);
- if (ret != certSz) {
- free(pem);
- free(derCert);
- wc_ecc_free(&caKey);
- return -5414;
- }
- pemSz = wc_DerToPem(derCert, certSz, pem, FOURK_BUF, CERT_TYPE);
- if (pemSz < 0) {
- free(pem);
- free(derCert);
- wc_ecc_free(&caKey);
- return -5411;
- }
+#ifdef sizeof
+ #undef sizeof
+#endif
-#ifdef FREESCALE_MQX
- pemFile = fopen("a:\\certs\\certecc.pem", "wb");
+#ifdef WOLFSSL_TEST_CERT
+ InitDecodedCert(&cert, tmp, (word32)bytes, 0);
+
+ ret = ParseCert(&cert, CERT_TYPE, NO_VERIFY, 0);
+ if (ret != 0) {
+ FreeDecodedCert(&cert);
+ ERROR_OUT(-7742, exit_rsa);
+ }
+
+ FreeDecodedCert(&cert);
#else
- pemFile = fopen("./certecc.pem", "wb");
+ (void)bytes;
#endif
- if (!pemFile) {
- free(pem);
- free(derCert);
- wc_ecc_free(&caKey);
- return -5412;
- }
- ret = (int)fwrite(pem, 1, pemSz, pemFile);
- if (ret != pemSz) {
- free(pem);
- free(derCert);
- wc_ecc_free(&caKey);
- return -5415;
- }
- fclose(pemFile);
- free(pem);
- free(derCert);
- wc_ecc_free(&caKey);
+
+#ifdef WOLFSSL_CERT_EXT
+
+#ifdef USE_CERT_BUFFERS_1024
+ XMEMCPY(tmp, client_keypub_der_1024, sizeof_client_keypub_der_1024);
+ bytes = sizeof_client_keypub_der_1024;
+#elif defined(USE_CERT_BUFFERS_2048)
+ XMEMCPY(tmp, client_keypub_der_2048, sizeof_client_keypub_der_2048);
+ bytes = sizeof_client_keypub_der_2048;
+#elif defined(USE_CERT_BUFFERS_3072)
+ XMEMCPY(tmp, client_keypub_der_3072, sizeof_client_keypub_der_3072);
+ bytes = sizeof_client_keypub_der_3072;
+#elif defined(USE_CERT_BUFFERS_4096)
+ XMEMCPY(tmp, client_keypub_der_4096, sizeof_client_keypub_der_4096);
+ bytes = sizeof_client_keypub_der_4096;
+#else
+ file = XFOPEN(clientKeyPub, "rb");
+ if (!file) {
+ err_sys("can't open ./certs/client-keyPub.der, "
+ "Please run from wolfSSL home dir", -40);
+ ERROR_OUT(-7743, exit_rsa);
}
-#endif /* HAVE_ECC */
+
+ bytes = XFREAD(tmp, 1, FOURK_BUF, file);
+ XFCLOSE(file);
+#endif /* USE_CERT_BUFFERS */
+
+ ret = wc_InitRsaKey(&keypub, HEAP_HINT);
+ if (ret != 0) {
+ ERROR_OUT(-7744, exit_rsa);
+ }
+ idx = 0;
+
+ ret = wc_RsaPublicKeyDecode(tmp, &idx, &keypub, (word32)bytes);
+ if (ret != 0) {
+ ERROR_OUT(-7745, exit_rsa);
+ }
+#endif /* WOLFSSL_CERT_EXT */
+
+#ifdef WOLFSSL_KEY_GEN
+ ret = rsa_keygen_test(&rng);
+ if (ret != 0)
+ goto exit_rsa;
+#endif
+
+#ifdef WOLFSSL_CERT_GEN
+ /* Make Cert / Sign example for RSA cert and RSA CA */
+ ret = rsa_certgen_test(&key, &keypub, &rng, tmp);
+ if (ret != 0)
+ goto exit_rsa;
+
+#if !defined(NO_RSA) && defined(HAVE_ECC)
+ ret = rsa_ecc_certgen_test(&rng, tmp);
+ if (ret != 0)
+ goto exit_rsa;
+#endif
+
#ifdef HAVE_NTRU
{
- RsaKey caKey;
Cert myCert;
- byte* derCert;
- byte* pem;
- FILE* derFile;
- FILE* pemFile;
- FILE* caFile;
- FILE* ntruPrivFile;
+ #if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048)
+ XFILE caFile;
+ #endif
+ #if !defined(NO_FILESYSTEM) && !defined(NO_WRITE_TEMP_FILES)
+ XFILE ntruPrivFile;
+ #endif
int certSz;
- int pemSz;
- word32 idx3;
-#ifdef WOLFSSL_TEST_CERT
+ word32 idx3 = 0;
+ #ifdef WOLFSSL_TEST_CERT
DecodedCert decode;
-#endif
- derCert = (byte*)malloc(FOURK_BUF);
- if (derCert == NULL)
- return -311;
- pem = (byte*)malloc(FOURK_BUF);
- if (pem == NULL) {
- free(derCert);
- return -312;
- }
-
+ #endif
byte public_key[557]; /* sized for EES401EP2 */
word16 public_key_len; /* no. of octets in public key */
byte private_key[607]; /* sized for EES401EP2 */
@@ -3973,363 +13599,946 @@ int rsa_test(void)
word32 rc = ntru_crypto_drbg_instantiate(112, pers_str,
sizeof(pers_str), GetEntropy, &drbg);
if (rc != DRBG_OK) {
- free(derCert);
- free(pem);
- return -448;
+ ERROR_OUT(-7746, exit_rsa);
}
rc = ntru_crypto_ntru_encrypt_keygen(drbg, NTRU_EES401EP2,
&public_key_len, NULL,
&private_key_len, NULL);
if (rc != NTRU_OK) {
- free(derCert);
- free(pem);
- return -449;
+ ERROR_OUT(-7747, exit_rsa);
}
rc = ntru_crypto_ntru_encrypt_keygen(drbg, NTRU_EES401EP2,
&public_key_len, public_key,
&private_key_len, private_key);
if (rc != NTRU_OK) {
- free(derCert);
- free(pem);
- return -450;
+ ERROR_OUT(-7748, exit_rsa);
}
rc = ntru_crypto_drbg_uninstantiate(drbg);
-
if (rc != NTRU_OK) {
- free(derCert);
- free(pem);
- return -451;
+ ERROR_OUT(-7749, exit_rsa);
}
- caFile = fopen(caKeyFile, "rb");
-
+ #ifdef USE_CERT_BUFFERS_1024
+ XMEMCPY(tmp, ca_key_der_1024, sizeof_ca_key_der_1024);
+ bytes = sizeof_ca_key_der_1024;
+ #elif defined(USE_CERT_BUFFERS_2048)
+ XMEMCPY(tmp, ca_key_der_2048, sizeof_ca_key_der_2048);
+ bytes = sizeof_ca_key_der_2048;
+ #else
+ caFile = XFOPEN(rsaCaKeyFile, "rb");
if (!caFile) {
- free(derCert);
- free(pem);
- return -452;
+ ERROR_OUT(-7750, exit_rsa);
}
- bytes = fread(tmp, 1, FOURK_BUF, caFile);
- fclose(caFile);
+ bytes = XFREAD(tmp, 1, FOURK_BUF, caFile);
+ XFCLOSE(caFile);
+ #endif /* USE_CERT_BUFFERS */
- ret = wc_InitRsaKey(&caKey, 0);
+ ret = wc_InitRsaKey(&caKey, HEAP_HINT);
if (ret != 0) {
- free(derCert);
- free(pem);
- return -453;
+ ERROR_OUT(-7751, exit_rsa);
}
ret = wc_RsaPrivateKeyDecode(tmp, &idx3, &caKey, (word32)bytes);
if (ret != 0) {
- free(derCert);
- free(pem);
- return -454;
+ ERROR_OUT(-7752, exit_rsa);
}
- wc_InitCert(&myCert);
+ if (wc_InitCert(&myCert)) {
+ ERROR_OUT(-7753, exit_rsa);
+ }
+
+ XMEMCPY(&myCert.subject, &certDefaultName, sizeof(CertName));
+ myCert.daysValid = 1000;
- strncpy(myCert.subject.country, "US", CTC_NAME_SIZE);
- strncpy(myCert.subject.state, "OR", CTC_NAME_SIZE);
- strncpy(myCert.subject.locality, "Portland", CTC_NAME_SIZE);
- strncpy(myCert.subject.org, "yaSSL", CTC_NAME_SIZE);
- strncpy(myCert.subject.unit, "Development", CTC_NAME_SIZE);
- strncpy(myCert.subject.commonName, "www.yassl.com", CTC_NAME_SIZE);
- strncpy(myCert.subject.email, "info@yassl.com", CTC_NAME_SIZE);
+ #ifdef WOLFSSL_CERT_EXT
+ /* add SKID from the Public Key */
+ if (wc_SetSubjectKeyIdFromNtruPublicKey(&myCert, public_key,
+ public_key_len) != 0) {
+ ERROR_OUT(-7754, exit_rsa);
+ }
- ret = wc_SetIssuer(&myCert, caCertFile);
+ /* add AKID from the CA certificate */
+ #if defined(USE_CERT_BUFFERS_2048)
+ ret = wc_SetAuthKeyIdFromCert(&myCert, ca_cert_der_2048,
+ sizeof_ca_cert_der_2048);
+ #elif defined(USE_CERT_BUFFERS_1024)
+ ret = wc_SetAuthKeyIdFromCert(&myCert, ca_cert_der_1024,
+ sizeof_ca_cert_der_1024);
+ #else
+ ret = wc_SetAuthKeyId(&myCert, rsaCaCertFile);
+ #endif
+ if (ret != 0) {
+ ERROR_OUT(-7755, exit_rsa);
+ }
+
+ /* add Key Usage */
+ if (wc_SetKeyUsage(&myCert, certKeyUsage2) != 0) {
+ ERROR_OUT(-7756, exit_rsa);
+ }
+ #endif /* WOLFSSL_CERT_EXT */
+
+ #if defined(USE_CERT_BUFFERS_2048)
+ ret = wc_SetIssuerBuffer(&myCert, ca_cert_der_2048,
+ sizeof_ca_cert_der_2048);
+ #elif defined(USE_CERT_BUFFERS_1024)
+ ret = wc_SetIssuerBuffer(&myCert, ca_cert_der_1024,
+ sizeof_ca_cert_der_1024);
+ #else
+ ret = wc_SetIssuer(&myCert, rsaCaCertFile);
+ #endif
if (ret < 0) {
- free(derCert);
- free(pem);
- wc_FreeRsaKey(&caKey);
- return -455;
+ ERROR_OUT(-7757, exit_rsa);
+ }
+
+ der = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (der == NULL) {
+ ERROR_OUT(-7758, exit_rsa);
+ }
+ pem = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT,DYNAMIC_TYPE_TMP_BUFFER);
+ if (pem == NULL) {
+ ERROR_OUT(-7759, exit_rsa);
}
- certSz = wc_MakeNtruCert(&myCert, derCert, FOURK_BUF, public_key,
+ certSz = wc_MakeNtruCert(&myCert, der, FOURK_BUF, public_key,
public_key_len, &rng);
if (certSz < 0) {
- free(derCert);
- free(pem);
- wc_FreeRsaKey(&caKey);
- return -456;
+ ERROR_OUT(-7760, exit_rsa);
}
- certSz = wc_SignCert(myCert.bodySz, myCert.sigType, derCert, FOURK_BUF,
+ ret = 0;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &caKey.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_SignCert(myCert.bodySz, myCert.sigType, der, FOURK_BUF,
&caKey, NULL, &rng);
+ }
+ } while (ret == WC_PENDING_E);
wc_FreeRsaKey(&caKey);
- if (certSz < 0) {
- free(derCert);
- free(pem);
- return -457;
+ if (ret < 0) {
+ ERROR_OUT(-7761, exit_rsa);
}
+ certSz = ret;
-
-#ifdef WOLFSSL_TEST_CERT
- InitDecodedCert(&decode, derCert, certSz, 0);
+ #ifdef WOLFSSL_TEST_CERT
+ InitDecodedCert(&decode, der, certSz, HEAP_HINT);
ret = ParseCert(&decode, CERT_TYPE, NO_VERIFY, 0);
if (ret != 0) {
- free(derCert);
- free(pem);
- return -458;
+ FreeDecodedCert(&decode);
+ ERROR_OUT(-7762, exit_rsa);
}
FreeDecodedCert(&decode);
-#endif
- derFile = fopen("./ntru-cert.der", "wb");
- if (!derFile) {
- free(derCert);
- free(pem);
- return -459;
- }
- ret = (int)fwrite(derCert, 1, certSz, derFile);
- fclose(derFile);
- if (ret != certSz) {
- free(derCert);
- free(pem);
- return -473;
- }
-
- pemSz = wc_DerToPem(derCert, certSz, pem, FOURK_BUF, CERT_TYPE);
- if (pemSz < 0) {
- free(derCert);
- free(pem);
- return -460;
- }
+ #endif
- pemFile = fopen("./ntru-cert.pem", "wb");
- if (!pemFile) {
- free(derCert);
- free(pem);
- return -461;
- }
- ret = (int)fwrite(pem, 1, pemSz, pemFile);
- fclose(pemFile);
- if (ret != pemSz) {
- free(derCert);
- free(pem);
- return -474;
+ ret = SaveDerAndPem(der, certSz, pem, FOURK_BUF, "./ntru-cert.der",
+ "./ntru-cert.pem", CERT_TYPE, -5637);
+ if (ret != 0) {
+ goto exit_rsa;
}
- ntruPrivFile = fopen("./ntru-key.raw", "wb");
+ #if !defined(NO_FILESYSTEM) && !defined(NO_WRITE_TEMP_FILES)
+ ntruPrivFile = XFOPEN("./ntru-key.raw", "wb");
if (!ntruPrivFile) {
- free(derCert);
- free(pem);
- return -462;
+ ERROR_OUT(-7763, exit_rsa);
}
- ret = (int)fwrite(private_key, 1, private_key_len, ntruPrivFile);
- fclose(ntruPrivFile);
+ ret = (int)XFWRITE(private_key, 1, private_key_len, ntruPrivFile);
+ XFCLOSE(ntruPrivFile);
if (ret != private_key_len) {
- free(pem);
- free(derCert);
- return -475;
+ ERROR_OUT(-7764, exit_rsa);
}
- free(pem);
- free(derCert);
+ #endif
+
+ XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ pem = NULL;
+ XFREE(der, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ der = NULL;
}
#endif /* HAVE_NTRU */
#ifdef WOLFSSL_CERT_REQ
{
Cert req;
- byte* der;
- byte* pem;
int derSz;
- int pemSz;
- FILE* reqFile;
- der = (byte*)malloc(FOURK_BUF);
- if (der == NULL)
- return -463;
- pem = (byte*)malloc(FOURK_BUF);
+ der = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT,DYNAMIC_TYPE_TMP_BUFFER);
+ if (der == NULL) {
+ ERROR_OUT(-7765, exit_rsa);
+ }
+ pem = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT,DYNAMIC_TYPE_TMP_BUFFER);
if (pem == NULL) {
- free(der);
- return -464;
+ ERROR_OUT(-7766, exit_rsa);
}
- wc_InitCert(&req);
+ if (wc_InitCert(&req)) {
+ ERROR_OUT(-7767, exit_rsa);
+ }
req.version = 0;
req.isCA = 1;
- strncpy(req.challengePw, "yassl123", CTC_NAME_SIZE);
- strncpy(req.subject.country, "US", CTC_NAME_SIZE);
- strncpy(req.subject.state, "OR", CTC_NAME_SIZE);
- strncpy(req.subject.locality, "Portland", CTC_NAME_SIZE);
- strncpy(req.subject.org, "yaSSL", CTC_NAME_SIZE);
- strncpy(req.subject.unit, "Development", CTC_NAME_SIZE);
- strncpy(req.subject.commonName, "www.yassl.com", CTC_NAME_SIZE);
- strncpy(req.subject.email, "info@yassl.com", CTC_NAME_SIZE);
+ XSTRNCPY(req.challengePw, "wolf123", CTC_NAME_SIZE);
+ XMEMCPY(&req.subject, &certDefaultName, sizeof(CertName));
+
+ #ifndef NO_SHA256
req.sigType = CTC_SHA256wRSA;
+ #else
+ req.sigType = CTC_SHAwRSA;
+ #endif
- derSz = wc_MakeCertReq(&req, der, FOURK_BUF, &key, NULL);
- if (derSz < 0) {
- free(pem);
- free(der);
- return -465;
+ #ifdef WOLFSSL_CERT_EXT
+ /* add SKID from the Public Key */
+ if (wc_SetSubjectKeyIdFromPublicKey(&req, &keypub, NULL) != 0) {
+ ERROR_OUT(-7768, exit_rsa);
}
- derSz = wc_SignCert(req.bodySz, req.sigType, der, FOURK_BUF,
- &key, NULL, &rng);
- if (derSz < 0) {
- free(pem);
- free(der);
- return -466;
+ /* add Key Usage */
+ if (wc_SetKeyUsage(&req, certKeyUsage2) != 0) {
+ ERROR_OUT(-7769, exit_rsa);
}
- pemSz = wc_DerToPem(der, derSz, pem, FOURK_BUF, CERTREQ_TYPE);
- if (pemSz < 0) {
- free(pem);
- free(der);
- return -467;
+ /* add Extended Key Usage */
+ if (wc_SetExtKeyUsage(&req, "serverAuth,clientAuth,codeSigning,"
+ "emailProtection,timeStamping,OCSPSigning") != 0) {
+ ERROR_OUT(-7770, exit_rsa);
}
+ #ifdef WOLFSSL_EKU_OID
+ {
+ const char unique[] = "2.16.840.1.111111.100.1.10.1";
+ if (wc_SetExtKeyUsageOID(&req, unique, sizeof(unique), 0,
+ HEAP_HINT) != 0) {
+ ERROR_OUT(-7771, exit_rsa);
+ }
+ }
+ #endif /* WOLFSSL_EKU_OID */
+ #endif /* WOLFSSL_CERT_EXT */
-#ifdef FREESCALE_MQX
- reqFile = fopen("a:\\certs\\certreq.der", "wb");
-#else
- reqFile = fopen("./certreq.der", "wb");
-#endif
- if (!reqFile) {
- free(pem);
- free(der);
- return -468;
+ derSz = wc_MakeCertReq(&req, der, FOURK_BUF, &key, NULL);
+ if (derSz < 0) {
+ ERROR_OUT(-7772, exit_rsa);
}
- ret = (int)fwrite(der, 1, derSz, reqFile);
- fclose(reqFile);
- if (ret != derSz) {
- free(pem);
- free(der);
- return -471;
+ #ifdef WOLFSSL_CERT_EXT
+ /* Try again with "any" flag set, will override all others */
+ if (wc_SetExtKeyUsage(&req, "any") != 0) {
+ ERROR_OUT(-7773, exit_rsa);
}
+ derSz = wc_MakeCertReq(&req, der, FOURK_BUF, &key, NULL);
+ if (derSz < 0) {
+ ERROR_OUT(-7774, exit_rsa);
+ }
+ #endif /* WOLFSSL_CERT_EXT */
-#ifdef FREESCALE_MQX
- reqFile = fopen("a:\\certs\\certreq.pem", "wb");
-#else
- reqFile = fopen("./certreq.pem", "wb");
-#endif
- if (!reqFile) {
- free(pem);
- free(der);
- return -469;
+ ret = 0;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_SignCert(req.bodySz, req.sigType, der, FOURK_BUF,
+ &key, NULL, &rng);
+ }
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-7775, exit_rsa);
}
- ret = (int)fwrite(pem, 1, pemSz, reqFile);
- fclose(reqFile);
- if (ret != pemSz) {
- free(pem);
- free(der);
- return -470;
+ derSz = ret;
+
+ ret = SaveDerAndPem(der, derSz, pem, FOURK_BUF, certReqDerFile,
+ certReqPemFile, CERTREQ_TYPE, -5650);
+ if (ret != 0) {
+ goto exit_rsa;
}
- free(pem);
- free(der);
+ derSz = wc_MakeCertReq_ex(&req, der, FOURK_BUF, RSA_TYPE, &key);
+ if (derSz < 0) {
+ ERROR_OUT(-7776, exit_rsa);
+ }
+
+ XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ pem = NULL;
+ XFREE(der, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ der = NULL;
}
#endif /* WOLFSSL_CERT_REQ */
#endif /* WOLFSSL_CERT_GEN */
+#ifdef WC_RSA_PSS
+ ret = rsa_pss_test(&rng, &key);
+#endif
+
+exit_rsa:
wc_FreeRsaKey(&key);
-#ifdef HAVE_CAVIUM
- wc_RsaFreeCavium(&key);
+#ifdef WOLFSSL_CERT_EXT
+ wc_FreeRsaKey(&keypub);
#endif
- free(tmp);
+#if defined(HAVE_NTRU)
+ wc_FreeRsaKey(&caKey);
+#endif
+
+ XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(der, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
wc_FreeRng(&rng);
+ FREE_VAR(in, HEAP_HINT);
+ FREE_VAR(out, HEAP_HINT);
+ FREE_VAR(plain, HEAP_HINT);
+
+ /* ret can be greater then 0 with certgen but all negative values should
+ * be returned and treated as an error */
+ if (ret >= 0) {
+ return 0;
+ }
+ else {
+ return ret;
+ }
+}
+
+#endif /* !NO_RSA */
+
+
+#ifndef NO_DH
+
+static int dh_fips_generate_test(WC_RNG *rng)
+{
+ int ret = 0;
+ DhKey key;
+ static byte p[] = {
+ 0xc5, 0x7c, 0xa2, 0x4f, 0x4b, 0xd6, 0x8c, 0x3c,
+ 0xda, 0xc7, 0xba, 0xaa, 0xea, 0x2e, 0x5c, 0x1e,
+ 0x18, 0xb2, 0x7b, 0x8c, 0x55, 0x65, 0x9f, 0xea,
+ 0xe0, 0xa1, 0x36, 0x53, 0x2b, 0x36, 0xe0, 0x4e,
+ 0x3e, 0x64, 0xa9, 0xe4, 0xfc, 0x8f, 0x32, 0x62,
+ 0x97, 0xe4, 0xbe, 0xf7, 0xc1, 0xde, 0x07, 0x5a,
+ 0x89, 0x28, 0xf3, 0xfe, 0x4f, 0xfe, 0x68, 0xbc,
+ 0xfb, 0x0a, 0x7c, 0xa4, 0xb3, 0x14, 0x48, 0x89,
+ 0x9f, 0xaf, 0xb8, 0x43, 0xe2, 0xa0, 0x62, 0x5c,
+ 0xb4, 0x88, 0x3f, 0x06, 0x50, 0x11, 0xfe, 0x65,
+ 0x8d, 0x49, 0xd2, 0xf5, 0x4b, 0x74, 0x79, 0xdb,
+ 0x06, 0x62, 0x92, 0x89, 0xed, 0xda, 0xcb, 0x87,
+ 0x37, 0x16, 0xd2, 0xa1, 0x7a, 0xe8, 0xde, 0x92,
+ 0xee, 0x3e, 0x41, 0x4a, 0x91, 0x5e, 0xed, 0xf3,
+ 0x6c, 0x6b, 0x7e, 0xfd, 0x15, 0x92, 0x18, 0xfc,
+ 0xa7, 0xac, 0x42, 0x85, 0x57, 0xe9, 0xdc, 0xda,
+ 0x55, 0xc9, 0x8b, 0x28, 0x9e, 0xc1, 0xc4, 0x46,
+ 0x4d, 0x88, 0xed, 0x62, 0x8e, 0xdb, 0x3f, 0xb9,
+ 0xd7, 0xc8, 0xe3, 0xcf, 0xb8, 0x34, 0x2c, 0xd2,
+ 0x6f, 0x28, 0x06, 0x41, 0xe3, 0x66, 0x8c, 0xfc,
+ 0x72, 0xff, 0x26, 0x3b, 0x6b, 0x6c, 0x6f, 0x73,
+ 0xde, 0xf2, 0x90, 0x29, 0xe0, 0x61, 0x32, 0xc4,
+ 0x12, 0x74, 0x09, 0x52, 0xec, 0xf3, 0x1b, 0xa6,
+ 0x45, 0x98, 0xac, 0xf9, 0x1c, 0x65, 0x8e, 0x3a,
+ 0x91, 0x84, 0x4b, 0x23, 0x8a, 0xb2, 0x3c, 0xc9,
+ 0xfa, 0xea, 0xf1, 0x38, 0xce, 0xd8, 0x05, 0xe0,
+ 0xfa, 0x44, 0x68, 0x1f, 0xeb, 0xd9, 0x57, 0xb8,
+ 0x4a, 0x97, 0x5b, 0x88, 0xc5, 0xf1, 0xbb, 0xb0,
+ 0x49, 0xc3, 0x91, 0x7c, 0xd3, 0x13, 0xb9, 0x47,
+ 0xbb, 0x91, 0x8f, 0xe5, 0x26, 0x07, 0xab, 0xa9,
+ 0xc5, 0xd0, 0x3d, 0x95, 0x41, 0x26, 0x92, 0x9d,
+ 0x13, 0x67, 0xf2, 0x7e, 0x11, 0x88, 0xdc, 0x2d
+ };
+ static byte g[] = {
+ 0x4a, 0x1a, 0xf3, 0xa4, 0x92, 0xe9, 0xee, 0x74,
+ 0x6e, 0x57, 0xd5, 0x8c, 0x2c, 0x5b, 0x41, 0x41,
+ 0x5e, 0xd4, 0x55, 0x19, 0xdc, 0xd9, 0x32, 0x91,
+ 0xf7, 0xfd, 0xc2, 0x57, 0xff, 0x03, 0x14, 0xdb,
+ 0xf1, 0xb7, 0x60, 0x0c, 0x43, 0x59, 0x3f, 0xff,
+ 0xac, 0xf1, 0x80, 0x9a, 0x15, 0x6f, 0xd8, 0x6e,
+ 0xb7, 0x85, 0x18, 0xc8, 0xec, 0x4e, 0x59, 0x4a,
+ 0xe2, 0x91, 0x43, 0x4c, 0xeb, 0x95, 0xb6, 0x2e,
+ 0x9a, 0xea, 0x53, 0x68, 0x80, 0x64, 0x69, 0x40,
+ 0xf9, 0xec, 0xbd, 0x85, 0x89, 0x26, 0x97, 0x67,
+ 0xaf, 0xb0, 0xad, 0x00, 0x1b, 0xd4, 0xfd, 0x94,
+ 0xd3, 0xe9, 0x92, 0xb1, 0xb4, 0xbc, 0x5a, 0xaa,
+ 0x92, 0x80, 0x89, 0x3b, 0x39, 0x05, 0x6c, 0x22,
+ 0x26, 0xfe, 0x5a, 0x28, 0x6c, 0x37, 0x50, 0x5a,
+ 0x38, 0x99, 0xcf, 0xf3, 0xc1, 0x96, 0x45, 0xdc,
+ 0x01, 0xcb, 0x20, 0x87, 0xa5, 0x00, 0x8c, 0xf5,
+ 0x4d, 0xc2, 0xef, 0xb8, 0x9b, 0xd1, 0x87, 0xbe,
+ 0xed, 0xd5, 0x0a, 0x29, 0x15, 0x34, 0x59, 0x4c,
+ 0x3a, 0x05, 0x22, 0x05, 0x44, 0x4f, 0x9f, 0xc8,
+ 0x47, 0x12, 0x24, 0x8e, 0xa8, 0x79, 0xe4, 0x67,
+ 0xba, 0x4d, 0x5b, 0x75, 0x56, 0x95, 0xeb, 0xe8,
+ 0x8a, 0xfa, 0x8e, 0x01, 0x8c, 0x1b, 0x74, 0x63,
+ 0xd9, 0x2f, 0xf7, 0xd3, 0x44, 0x8f, 0xa8, 0xf5,
+ 0xaf, 0x6c, 0x4f, 0xdb, 0xe7, 0xc9, 0x6c, 0x71,
+ 0x22, 0xa3, 0x1d, 0xf1, 0x40, 0xb2, 0xe0, 0x9a,
+ 0xb6, 0x72, 0xc9, 0xc0, 0x13, 0x16, 0xa2, 0x4a,
+ 0xe1, 0x92, 0xc7, 0x54, 0x23, 0xab, 0x9d, 0xa1,
+ 0xa1, 0xe5, 0x0b, 0xed, 0xba, 0xe8, 0x84, 0x37,
+ 0xb2, 0xe7, 0xfe, 0x32, 0x8d, 0xfa, 0x1c, 0x53,
+ 0x77, 0x97, 0xc7, 0xf3, 0x48, 0xc9, 0xdb, 0x2d,
+ 0x75, 0x52, 0x9d, 0x42, 0x51, 0x78, 0x62, 0x68,
+ 0x05, 0x45, 0x15, 0xf8, 0xa2, 0x4e, 0xf3, 0x0b
+ };
+ static byte q[] = {
+ 0xe0, 0x35, 0x37, 0xaf, 0xb2, 0x50, 0x91, 0x8e,
+ 0xf2, 0x62, 0x2b, 0xd9, 0x9f, 0x6c, 0x11, 0x75,
+ 0xec, 0x24, 0x5d, 0x78, 0x59, 0xe7, 0x8d, 0xb5,
+ 0x40, 0x52, 0xed, 0x41
+ };
+ static byte q0[] = {
+ 0x00,
+ 0xe0, 0x35, 0x37, 0xaf, 0xb2, 0x50, 0x91, 0x8e,
+ 0xf2, 0x62, 0x2b, 0xd9, 0x9f, 0x6c, 0x11, 0x75,
+ 0xec, 0x24, 0x5d, 0x78, 0x59, 0xe7, 0x8d, 0xb5,
+ 0x40, 0x52, 0xed, 0x41
+ };
+ byte priv[256];
+ byte pub[256];
+ word32 privSz = sizeof(priv);
+ word32 pubSz = sizeof(pub);
+
+ /* Parameter Validation testing. */
+ ret = wc_DhGenerateKeyPair(NULL, rng, priv, &privSz, pub, &pubSz);
+ if (ret != BAD_FUNC_ARG)
+ return -7777;
+ ret = wc_DhGenerateKeyPair(&key, NULL, priv, &privSz, pub, &pubSz);
+ if (ret != BAD_FUNC_ARG)
+ return -7778;
+ ret = wc_DhGenerateKeyPair(&key, rng, NULL, &privSz, pub, &pubSz);
+ if (ret != BAD_FUNC_ARG)
+ return -7779;
+ ret = wc_DhGenerateKeyPair(&key, rng, priv, NULL, pub, &pubSz);
+ if (ret != BAD_FUNC_ARG)
+ return -7780;
+ ret = wc_DhGenerateKeyPair(&key, rng, priv, &privSz, NULL, &pubSz);
+ if (ret != BAD_FUNC_ARG)
+ return -7781;
+ ret = wc_DhGenerateKeyPair(&key, rng, priv, &privSz, pub, NULL);
+ if (ret != BAD_FUNC_ARG)
+ return -7782;
+
+ ret = wc_InitDhKey_ex(&key, HEAP_HINT, devId);
+ if (ret != 0)
+ return -7783;
+
+ ret = wc_DhSetKey_ex(&key, p, sizeof(p), g, sizeof(g), q0, sizeof(q0));
+ if (ret != 0) {
+ ERROR_OUT(-7784, exit_gen_test);
+ }
+
+ wc_FreeDhKey(&key);
+
+ ret = wc_InitDhKey_ex(&key, HEAP_HINT, devId);
+ if (ret != 0)
+ return -7785;
+
+ ret = wc_DhSetKey_ex(&key, p, sizeof(p), g, sizeof(g), q, sizeof(q));
+ if (ret != 0) {
+ ERROR_OUT(-7786, exit_gen_test);
+ }
+
+ /* Use API. */
+ ret = wc_DhGenerateKeyPair(&key, rng, priv, &privSz, pub, &pubSz);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0) {
+ ERROR_OUT(-7787, exit_gen_test);
+ }
+
+ ret = wc_DhCheckPubKey_ex(&key, pub, pubSz, q0, sizeof(q0));
+ if (ret != 0) {
+ ERROR_OUT(-7788, exit_gen_test);
+ }
+
+ wc_FreeDhKey(&key);
+ ret = wc_InitDhKey_ex(&key, HEAP_HINT, devId);
+ if (ret != 0)
+ return -7789;
+
+ ret = wc_DhSetKey(&key, p, sizeof(p), g, sizeof(g));
+ if (ret != 0) {
+ ERROR_OUT(-7790, exit_gen_test);
+ }
+
+ ret = wc_DhCheckPubKey_ex(&key, pub, pubSz, q, sizeof(q));
+ if (ret != 0) {
+ ERROR_OUT(-7791, exit_gen_test);
+ }
+
+#ifndef HAVE_SELFTEST
+ ret = wc_DhCheckKeyPair(&key, pub, pubSz, priv, privSz);
+ if (ret != 0) {
+ ERROR_OUT(-7792, exit_gen_test);
+ }
+
+ /* Taint the public key so the check fails. */
+ pub[0]++;
+ ret = wc_DhCheckKeyPair(&key, pub, pubSz, priv, privSz);
+ if (ret != MP_CMP_E) {
+ ERROR_OUT(-7793, exit_gen_test);
+ }
+
+#ifdef WOLFSSL_KEY_GEN
+ wc_FreeDhKey(&key);
+ ret = wc_InitDhKey_ex(&key, HEAP_HINT, devId);
+ if (ret != 0)
+ return -7794;
+
+ ret = wc_DhGenerateParams(rng, 2048, &key);
+ if (ret != 0) {
+ ERROR_OUT(-7795, exit_gen_test);
+ }
+
+ privSz = sizeof(priv);
+ pubSz = sizeof(pub);
+
+ ret = wc_DhGenerateKeyPair(&key, rng, priv, &privSz, pub, &pubSz);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0) {
+ ERROR_OUT(-7796, exit_gen_test);
+ }
+
+#endif /* WOLFSSL_KEY_GEN */
+#endif /* HAVE_SELFTEST */
+
+ ret = 0;
+
+exit_gen_test:
+ wc_FreeDhKey(&key);
+
+ return ret;
+}
+
+static int dh_generate_test(WC_RNG *rng)
+{
+ int ret = 0;
+ DhKey smallKey;
+ byte p[2] = { 0, 5 };
+ byte g[2] = { 0, 2 };
+#ifndef WOLFSSL_SP_MATH
+#ifdef WOLFSSL_DH_CONST
+ /* the table for constant DH lookup will round to the lowest byte size 21 */
+ byte priv[21];
+ byte pub[21];
+#else
+ byte priv[2];
+ byte pub[2];
+#endif
+ word32 privSz = sizeof(priv);
+ word32 pubSz = sizeof(pub);
+#endif
+
+ ret = wc_InitDhKey_ex(&smallKey, HEAP_HINT, devId);
+ if (ret != 0)
+ return -7797;
+
+ /* Parameter Validation testing. */
+ ret = wc_InitDhKey_ex(NULL, HEAP_HINT, devId);
+ if (ret != BAD_FUNC_ARG)
+ return -7798;
+ wc_FreeDhKey(NULL);
+
+ ret = wc_DhSetKey(NULL, p, sizeof(p), g, sizeof(g));
+ if (ret != BAD_FUNC_ARG) {
+ ERROR_OUT(-7799, exit_gen_test);
+ }
+ ret = wc_DhSetKey(&smallKey, NULL, sizeof(p), g, sizeof(g));
+ if (ret != BAD_FUNC_ARG) {
+ ERROR_OUT(-7800, exit_gen_test);
+ }
+ ret = wc_DhSetKey(&smallKey, p, 0, g, sizeof(g));
+ if (ret != BAD_FUNC_ARG) {
+ ERROR_OUT(-7801, exit_gen_test);
+ }
+ ret = wc_DhSetKey(&smallKey, p, sizeof(p), NULL, sizeof(g));
+ if (ret != BAD_FUNC_ARG) {
+ ERROR_OUT(-7802, exit_gen_test);
+ }
+ ret = wc_DhSetKey(&smallKey, p, sizeof(p), g, 0);
+ if (ret != BAD_FUNC_ARG) {
+ ERROR_OUT(-7803, exit_gen_test);
+ }
+ ret = wc_DhSetKey(&smallKey, p, sizeof(p), g, sizeof(g));
+ if (ret != 0) {
+ ERROR_OUT(-7804, exit_gen_test);
+ }
+
+#ifndef WOLFSSL_SP_MATH
+ /* Use API. */
+ ret = wc_DhGenerateKeyPair(&smallKey, rng, priv, &privSz, pub, &pubSz);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &smallKey.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0) {
+ ret = -7805;
+ }
+#else
+ (void)rng;
+ ret = 0;
+#endif
+
+exit_gen_test:
+ wc_FreeDhKey(&smallKey);
+
+ return ret;
+}
+
+#if !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)
+typedef struct dh_pubvalue_test {
+ const byte* data;
+ word32 len;
+} dh_pubvalue_test;
+
+static int dh_test_check_pubvalue(void)
+{
+ int ret;
+ word32 i;
+ const byte prime[] = {0x01, 0x00, 0x01};
+ const byte pubValZero[] = { 0x00 };
+ const byte pubValZeroLong[] = { 0x00, 0x00, 0x00 };
+ const byte pubValOne[] = { 0x01 };
+ const byte pubValOneLong[] = { 0x00, 0x00, 0x01 };
+ const byte pubValPrimeMinusOne[] = { 0x01, 0x00, 0x00 };
+ const byte pubValPrimeLong[] = {0x00, 0x01, 0x00, 0x01};
+ const byte pubValPrimePlusOne[] = { 0x01, 0x00, 0x02 };
+ const byte pubValTooBig0[] = { 0x02, 0x00, 0x01 };
+ const byte pubValTooBig1[] = { 0x01, 0x01, 0x01 };
+ const byte pubValTooLong[] = { 0x01, 0x00, 0x00, 0x01 };
+ const dh_pubvalue_test dh_pubval_fail[] = {
+ { prime, sizeof(prime) },
+ { pubValZero, sizeof(pubValZero) },
+ { pubValZeroLong, sizeof(pubValZeroLong) },
+ { pubValOne, sizeof(pubValOne) },
+ { pubValOneLong, sizeof(pubValOneLong) },
+ { pubValPrimeMinusOne, sizeof(pubValPrimeMinusOne) },
+ { pubValPrimeLong, sizeof(pubValPrimeLong) },
+ { pubValPrimePlusOne, sizeof(pubValPrimePlusOne) },
+ { pubValTooBig0, sizeof(pubValTooBig0) },
+ { pubValTooBig1, sizeof(pubValTooBig1) },
+ { pubValTooLong, sizeof(pubValTooLong) },
+ };
+ const byte pubValTwo[] = { 0x02 };
+ const byte pubValTwoLong[] = { 0x00, 0x00, 0x02 };
+ const byte pubValGood[] = { 0x12, 0x34 };
+ const byte pubValGoodLen[] = { 0x00, 0x12, 0x34 };
+ const byte pubValGoodLong[] = { 0x00, 0x00, 0x12, 0x34 };
+ const dh_pubvalue_test dh_pubval_pass[] = {
+ { pubValTwo, sizeof(pubValTwo) },
+ { pubValTwoLong, sizeof(pubValTwoLong) },
+ { pubValGood, sizeof(pubValGood) },
+ { pubValGoodLen, sizeof(pubValGoodLen) },
+ { pubValGoodLong, sizeof(pubValGoodLong) },
+ };
+
+ for (i = 0; i < sizeof(dh_pubval_fail) / sizeof(*dh_pubval_fail); i++) {
+ ret = wc_DhCheckPubValue(prime, sizeof(prime), dh_pubval_fail[i].data,
+ dh_pubval_fail[i].len);
+ if (ret != MP_VAL)
+ return -7806 - (int)i;
+ }
+
+ for (i = 0; i < sizeof(dh_pubval_pass) / sizeof(*dh_pubval_pass); i++) {
+ ret = wc_DhCheckPubValue(prime, sizeof(prime), dh_pubval_pass[i].data,
+ dh_pubval_pass[i].len);
+ if (ret != 0)
+ return -7816 - (int)i;
+ }
+
return 0;
}
+#endif
+#if defined(HAVE_FFDHE)
+
+#ifdef HAVE_FFDHE_3072
+ #define FFDHE_KEY_SIZE (3072/8)
+#else
+ #define FFDHE_KEY_SIZE (2048/8)
#endif
+static int dh_test_ffdhe(WC_RNG *rng, const DhParams* params)
+{
+ int ret;
+ word32 privSz, pubSz, privSz2, pubSz2;
+ byte priv[FFDHE_KEY_SIZE];
+ byte pub[FFDHE_KEY_SIZE];
+ byte priv2[FFDHE_KEY_SIZE];
+ byte pub2[FFDHE_KEY_SIZE];
+ byte agree[FFDHE_KEY_SIZE];
+ byte agree2[FFDHE_KEY_SIZE];
+ word32 agreeSz = (word32)sizeof(agree);
+ word32 agreeSz2 = (word32)sizeof(agree2);
+ DhKey key;
+ DhKey key2;
-#ifndef NO_DH
+ XMEMSET(&key, 0, sizeof(DhKey));
+ XMEMSET(&key2, 0, sizeof(DhKey));
-#if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048)
- #ifdef FREESCALE_MQX
- static const char* dhKey = "a:\\certs\\dh2048.der";
- #elif defined(NO_ASN)
- /* don't use file, no DER parsing */
- #else
- static const char* dhKey = "./certs/dh2048.der";
- #endif
+ ret = wc_InitDhKey_ex(&key, HEAP_HINT, devId);
+ if (ret != 0) {
+ ERROR_OUT(-7826, done);
+ }
+ ret = wc_InitDhKey_ex(&key2, HEAP_HINT, devId);
+ if (ret != 0) {
+ ERROR_OUT(-7827, done);
+ }
+
+ ret = wc_DhSetKey(&key, params->p, params->p_len, params->g, params->g_len);
+ if (ret != 0) {
+ ERROR_OUT(-7828, done);
+ }
+
+ ret = wc_DhSetKey(&key2, params->p, params->p_len, params->g,
+ params->g_len);
+ if (ret != 0) {
+ ERROR_OUT(-7829, done);
+ }
+
+ ret = wc_DhGenerateKeyPair(&key, rng, priv, &privSz, pub, &pubSz);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0) {
+ ERROR_OUT(-7830, done);
+ }
+
+ ret = wc_DhGenerateKeyPair(&key2, rng, priv2, &privSz2, pub2, &pubSz2);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key2.asyncDev, WC_ASYNC_FLAG_NONE);
#endif
+ if (ret != 0) {
+ ERROR_OUT(-7831, done);
+ }
+
+ ret = wc_DhAgree(&key, agree, &agreeSz, priv, privSz, pub2, pubSz2);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0) {
+ ERROR_OUT(-7832, done);
+ }
+
+ ret = wc_DhAgree(&key2, agree2, &agreeSz2, priv2, privSz2, pub, pubSz);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key2.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0) {
+ ERROR_OUT(-7833, done);
+ }
+
+ if (agreeSz != agreeSz2 || XMEMCMP(agree, agree2, agreeSz)) {
+ ERROR_OUT(-7834, done);
+ }
+
+done:
+ wc_FreeDhKey(&key);
+ wc_FreeDhKey(&key2);
+ return ret;
+}
+
+#endif /* HAVE_FFDHE */
int dh_test(void)
{
int ret;
word32 bytes;
- word32 idx = 0, privSz, pubSz, privSz2, pubSz2, agreeSz, agreeSz2;
+ word32 idx = 0, privSz, pubSz, privSz2, pubSz2;
byte tmp[1024];
+#if !defined(USE_CERT_BUFFERS_3072) && !defined(USE_CERT_BUFFERS_4096)
byte priv[256];
byte pub[256];
byte priv2[256];
byte pub2[256];
byte agree[256];
byte agree2[256];
+#else
+ byte priv[512];
+ byte pub[512];
+ byte priv2[512];
+ byte pub2[512];
+ byte agree[512];
+ byte agree2[512];
+#endif
+ word32 agreeSz = (word32)sizeof(agree);
+ word32 agreeSz2 = (word32)sizeof(agree2);
DhKey key;
DhKey key2;
- RNG rng;
+ WC_RNG rng;
+ int keyInit = 0;
#ifdef USE_CERT_BUFFERS_1024
- XMEMCPY(tmp, dh_key_der_1024, sizeof_dh_key_der_1024);
- bytes = sizeof_dh_key_der_1024;
+ XMEMCPY(tmp, dh_key_der_1024, (size_t)sizeof_dh_key_der_1024);
+ bytes = (size_t)sizeof_dh_key_der_1024;
#elif defined(USE_CERT_BUFFERS_2048)
- XMEMCPY(tmp, dh_key_der_2048, sizeof_dh_key_der_2048);
- bytes = sizeof_dh_key_der_2048;
+ XMEMCPY(tmp, dh_key_der_2048, (size_t)sizeof_dh_key_der_2048);
+ bytes = (size_t)sizeof_dh_key_der_2048;
+#elif defined(USE_CERT_BUFFERS_3072)
+ XMEMCPY(tmp, dh_key_der_3072, (size_t)sizeof_dh_key_der_3072);
+ bytes = (size_t)sizeof_dh_key_der_3072;
+#elif defined(USE_CERT_BUFFERS_4096)
+ XMEMCPY(tmp, dh_key_der_4096, (size_t)sizeof_dh_key_der_4096);
+ bytes = (size_t)sizeof_dh_key_der_4096;
#elif defined(NO_ASN)
/* don't use file, no DER parsing */
-#else
- FILE* file = fopen(dhKey, "rb");
-
+#elif !defined(NO_FILESYSTEM)
+ XFILE file = XFOPEN(dhKey, "rb");
if (!file)
- return -50;
+ return -7900;
- bytes = (word32) fread(tmp, 1, sizeof(tmp), file);
- fclose(file);
+ bytes = (word32) XFREAD(tmp, 1, sizeof(tmp), file);
+ XFCLOSE(file);
+#else
+ /* No DH key to use. */
+ return -7901;
#endif /* USE_CERT_BUFFERS */
(void)idx;
(void)tmp;
(void)bytes;
- wc_InitDhKey(&key);
- wc_InitDhKey(&key2);
+ /* Use API for coverage. */
+ ret = wc_InitDhKey(&key);
+ if (ret != 0) {
+ ERROR_OUT(-7902, done);
+ }
+ wc_FreeDhKey(&key);
+
+ ret = wc_InitDhKey_ex(&key, HEAP_HINT, devId);
+ if (ret != 0) {
+ ERROR_OUT(-7903, done);
+ }
+ keyInit = 1;
+ ret = wc_InitDhKey_ex(&key2, HEAP_HINT, devId);
+ if (ret != 0) {
+ ERROR_OUT(-7904, done);
+ }
+
#ifdef NO_ASN
ret = wc_DhSetKey(&key, dh_p, sizeof(dh_p), dh_g, sizeof(dh_g));
- if (ret != 0)
- return -51;
+ if (ret != 0) {
+ ERROR_OUT(-7905, done);
+ }
ret = wc_DhSetKey(&key2, dh_p, sizeof(dh_p), dh_g, sizeof(dh_g));
- if (ret != 0)
- return -51;
+ if (ret != 0) {
+ ERROR_OUT(-7906, done);
+ }
#else
ret = wc_DhKeyDecode(tmp, &idx, &key, bytes);
- if (ret != 0)
- return -51;
+ if (ret != 0) {
+ ERROR_OUT(-7907, done);
+ }
idx = 0;
ret = wc_DhKeyDecode(tmp, &idx, &key2, bytes);
- if (ret != 0)
- return -52;
+ if (ret != 0) {
+ ERROR_OUT(-7908, done);
+ }
#endif
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
ret = wc_InitRng(&rng);
- if (ret != 0)
- return -53;
+#endif
+ if (ret != 0) {
+ ERROR_OUT(-7909, done);
+ }
- ret = wc_DhGenerateKeyPair(&key, &rng, priv, &privSz, pub, &pubSz);
- ret += wc_DhGenerateKeyPair(&key2, &rng, priv2, &privSz2, pub2, &pubSz2);
- if (ret != 0)
- return -54;
+ ret = wc_DhGenerateKeyPair(&key, &rng, priv, &privSz, pub, &pubSz);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0) {
+ ERROR_OUT(-7910, done);
+ }
- ret = wc_DhAgree(&key, agree, &agreeSz, priv, privSz, pub2, pubSz2);
- ret += wc_DhAgree(&key2, agree2, &agreeSz2, priv2, privSz2, pub, pubSz);
- if (ret != 0)
- return -55;
+ ret = wc_DhGenerateKeyPair(&key2, &rng, priv2, &privSz2, pub2, &pubSz2);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key2.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0) {
+ ERROR_OUT(-7911, done);
+ }
+
+ ret = wc_DhAgree(&key, agree, &agreeSz, priv, privSz, pub2, pubSz2);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0) {
+ ERROR_OUT(-7912, done);
+ }
+
+ ret = wc_DhAgree(&key2, agree2, &agreeSz2, priv2, privSz2, pub, pubSz);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key2.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0) {
+ ERROR_OUT(-7913, done);
+ }
+
+ if (agreeSz != agreeSz2 || XMEMCMP(agree, agree2, agreeSz)) {
+ ERROR_OUT(-7914, done);
+ }
+
+#if defined(WOLFSSL_KEY_GEN) && !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)
+ if (wc_DhCheckPrivKey(NULL, NULL, 0) != BAD_FUNC_ARG)
+ return -7915;
+
+ if (wc_DhCheckPrivKey(&key, priv, privSz) != 0)
+ return -7916;
+
+ if (wc_DhExportParamsRaw(NULL, NULL, NULL, NULL, NULL, NULL, NULL) != BAD_FUNC_ARG)
+ return -7917;
+ {
+ word32 pSz, qSz, gSz;
+ if (wc_DhExportParamsRaw(&key, NULL, &pSz, NULL, &qSz, NULL, &gSz) != LENGTH_ONLY_E)
+ return -7918;
+ }
+#endif
- if (memcmp(agree, agree2, agreeSz))
- return -56;
+ ret = dh_generate_test(&rng);
+ if (ret == 0)
+ ret = dh_fips_generate_test(&rng);
+#if !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)
+ if (ret == 0)
+ ret = dh_test_check_pubvalue();
+#endif
+
+ /* Specialized code for key gen when using FFDHE-2048 and FFDHE-3072. */
+ #ifdef HAVE_FFDHE_2048
+ if (ret == 0) {
+ ret = dh_test_ffdhe(&rng, wc_Dh_ffdhe2048_Get());
+ if (ret != 0)
+ printf("error with FFDHE 2048\n");
+ }
+ #endif
+ #ifdef HAVE_FFDHE_3072
+ if (ret == 0) {
+ ret = dh_test_ffdhe(&rng, wc_Dh_ffdhe3072_Get());
+ if (ret != 0)
+ printf("error with FFDHE 3072\n");
+ }
+ #endif
wc_FreeDhKey(&key);
+ keyInit = 0;
+
+#if !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST) && \
+ !defined(WOLFSSL_OLD_PRIME_CHECK)
+ if (ret == 0) {
+ /* Test Check Key */
+ ret = wc_DhSetCheckKey(&key, dh_p, sizeof(dh_p), dh_g, sizeof(dh_g),
+ NULL, 0, 0, &rng);
+ keyInit = 1; /* DhSetCheckKey also initializes the key, free it */
+ }
+#endif
+
+done:
+
+ if (keyInit)
+ wc_FreeDhKey(&key);
wc_FreeDhKey(&key2);
wc_FreeRng(&rng);
- return 0;
+ return ret;
}
#endif /* NO_DH */
@@ -4337,14 +14546,6 @@ int dh_test(void)
#ifndef NO_DSA
-#if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048)
- #ifdef FREESCALE_MQX
- static const char* dsaKey = "a:\\certs\\dsa2048.der";
- #else
- static const char* dsaKey = "./certs/dsa2048.der";
- #endif
-#endif
-
int dsa_test(void)
{
int ret, answer;
@@ -4352,12 +14553,11 @@ int dsa_test(void)
word32 idx = 0;
byte tmp[1024];
DsaKey key;
- RNG rng;
- Sha sha;
- byte hash[SHA_DIGEST_SIZE];
+ WC_RNG rng;
+ wc_Sha sha;
+ byte hash[WC_SHA_DIGEST_SIZE];
byte signature[40];
-
#ifdef USE_CERT_BUFFERS_1024
XMEMCPY(tmp, dsa_key_der_1024, sizeof_dsa_key_der_1024);
bytes = sizeof_dsa_key_der_1024;
@@ -4365,66 +14565,1075 @@ int dsa_test(void)
XMEMCPY(tmp, dsa_key_der_2048, sizeof_dsa_key_der_2048);
bytes = sizeof_dsa_key_der_2048;
#else
- FILE* file = fopen(dsaKey, "rb");
-
+ XFILE file = XFOPEN(dsaKey, "rb");
if (!file)
- return -60;
+ return -8000;
- bytes = (word32) fread(tmp, 1, sizeof(tmp), file);
- fclose(file);
+ bytes = (word32) XFREAD(tmp, 1, sizeof(tmp), file);
+ XFCLOSE(file);
#endif /* USE_CERT_BUFFERS */
- ret = wc_InitSha(&sha);
+ ret = wc_InitSha_ex(&sha, HEAP_HINT, devId);
if (ret != 0)
- return -4002;
+ return -8001;
wc_ShaUpdate(&sha, tmp, bytes);
wc_ShaFinal(&sha, hash);
+ wc_ShaFree(&sha);
+
+ ret = wc_InitDsaKey(&key);
+ if (ret != 0) return -8002;
- wc_InitDsaKey(&key);
ret = wc_DsaPrivateKeyDecode(tmp, &idx, &key, bytes);
- if (ret != 0) return -61;
+ if (ret != 0) return -8003;
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
ret = wc_InitRng(&rng);
- if (ret != 0) return -62;
+#endif
+ if (ret != 0) return -8004;
ret = wc_DsaSign(hash, signature, &key, &rng);
- if (ret != 0) return -63;
+ if (ret != 0) return -8005;
ret = wc_DsaVerify(hash, signature, &key, &answer);
- if (ret != 0) return -64;
- if (answer != 1) return -65;
+ if (ret != 0) return -8006;
+ if (answer != 1) return -8007;
wc_FreeDsaKey(&key);
- wc_FreeRng(&rng);
+#ifdef WOLFSSL_KEY_GEN
+ {
+ byte* der;
+ byte* pem;
+ int derSz = 0;
+ DsaKey derIn;
+ DsaKey genKey;
+
+ ret = wc_InitDsaKey(&genKey);
+ if (ret != 0) return -8008;
+
+ ret = wc_MakeDsaParameters(&rng, 1024, &genKey);
+ if (ret != 0) {
+ wc_FreeDsaKey(&genKey);
+ return -8009;
+ }
+
+ ret = wc_MakeDsaKey(&rng, &genKey);
+ if (ret != 0) {
+ wc_FreeDsaKey(&genKey);
+ return -8010;
+ }
+
+ der = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (der == NULL) {
+ wc_FreeDsaKey(&genKey);
+ return -8011;
+ }
+ pem = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (pem == NULL) {
+ XFREE(der, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_FreeDsaKey(&genKey);
+ return -8012;
+ }
+
+ derSz = wc_DsaKeyToDer(&genKey, der, FOURK_BUF);
+ if (derSz < 0) {
+ XFREE(der, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ return -8013;
+ }
+
+ ret = SaveDerAndPem(der, derSz, pem, FOURK_BUF, keyDerFile,
+ keyPemFile, DSA_PRIVATEKEY_TYPE, -5814);
+ if (ret != 0) {
+ XFREE(der, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_FreeDsaKey(&genKey);
+ return ret;
+ }
+
+ ret = wc_InitDsaKey(&derIn);
+ if (ret != 0) {
+ XFREE(der, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_FreeDsaKey(&genKey);
+ return -8014;
+ }
+
+ idx = 0;
+ ret = wc_DsaPrivateKeyDecode(der, &idx, &derIn, derSz);
+ if (ret != 0) {
+ XFREE(der, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_FreeDsaKey(&derIn);
+ wc_FreeDsaKey(&genKey);
+ return -8015;
+ }
+
+ wc_FreeDsaKey(&derIn);
+ wc_FreeDsaKey(&genKey);
+ XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(der, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+#endif /* WOLFSSL_KEY_GEN */
+
+ if (wc_InitDsaKey_h(&key, NULL) != 0)
+ return -8016;
+
+ wc_FreeRng(&rng);
return 0;
}
#endif /* NO_DSA */
+#ifdef WOLFCRYPT_HAVE_SRP
-#ifdef OPENSSL_EXTRA
+static int generate_random_salt(byte *buf, word32 size)
+{
+ int ret = -8017;
+ WC_RNG rng;
+
+ if(NULL == buf || !size)
+ return -8018;
+
+ if (buf && size && wc_InitRng_ex(&rng, HEAP_HINT, devId) == 0) {
+ ret = wc_RNG_GenerateBlock(&rng, (byte *)buf, size);
+
+ wc_FreeRng(&rng);
+ }
+
+ return ret;
+}
+
+int srp_test(void)
+{
+ Srp cli, srv;
+ int r;
+
+ byte clientPubKey[80]; /* A */
+ byte serverPubKey[80]; /* B */
+ word32 clientPubKeySz = 80;
+ word32 serverPubKeySz = 80;
+ byte clientProof[SRP_MAX_DIGEST_SIZE]; /* M1 */
+ byte serverProof[SRP_MAX_DIGEST_SIZE]; /* M2 */
+ word32 clientProofSz = SRP_MAX_DIGEST_SIZE;
+ word32 serverProofSz = SRP_MAX_DIGEST_SIZE;
+
+ byte username[] = "user";
+ word32 usernameSz = 4;
+
+ byte password[] = "password";
+ word32 passwordSz = 8;
+
+ byte N[] = {
+ 0xC9, 0x4D, 0x67, 0xEB, 0x5B, 0x1A, 0x23, 0x46, 0xE8, 0xAB, 0x42, 0x2F,
+ 0xC6, 0xA0, 0xED, 0xAE, 0xDA, 0x8C, 0x7F, 0x89, 0x4C, 0x9E, 0xEE, 0xC4,
+ 0x2F, 0x9E, 0xD2, 0x50, 0xFD, 0x7F, 0x00, 0x46, 0xE5, 0xAF, 0x2C, 0xF7,
+ 0x3D, 0x6B, 0x2F, 0xA2, 0x6B, 0xB0, 0x80, 0x33, 0xDA, 0x4D, 0xE3, 0x22,
+ 0xE1, 0x44, 0xE7, 0xA8, 0xE9, 0xB1, 0x2A, 0x0E, 0x46, 0x37, 0xF6, 0x37,
+ 0x1F, 0x34, 0xA2, 0x07, 0x1C, 0x4B, 0x38, 0x36, 0xCB, 0xEE, 0xAB, 0x15,
+ 0x03, 0x44, 0x60, 0xFA, 0xA7, 0xAD, 0xF4, 0x83
+ };
+
+ byte g[] = {
+ 0x02
+ };
+
+ byte salt[10];
+
+ byte verifier[80];
+ word32 v_size = sizeof(verifier);
+
+ /* set as 0's so if second init on srv not called SrpTerm is not on
+ * garbage values */
+ XMEMSET(&srv, 0, sizeof(Srp));
+ XMEMSET(&cli, 0, sizeof(Srp));
+
+ /* generating random salt */
+
+ r = generate_random_salt(salt, sizeof(salt));
+
+ /* client knows username and password. */
+ /* server knows N, g, salt and verifier. */
+
+ if (!r) r = wc_SrpInit(&cli, SRP_TYPE_SHA, SRP_CLIENT_SIDE);
+ if (!r) r = wc_SrpSetUsername(&cli, username, usernameSz);
+
+ /* loading N, g and salt in advance to generate the verifier. */
+
+ if (!r) r = wc_SrpSetParams(&cli, N, sizeof(N),
+ g, sizeof(g),
+ salt, sizeof(salt));
+ if (!r) r = wc_SrpSetPassword(&cli, password, passwordSz);
+ if (!r) r = wc_SrpGetVerifier(&cli, verifier, &v_size);
+
+ /* client sends username to server */
+
+ if (!r) r = wc_SrpInit(&srv, SRP_TYPE_SHA, SRP_SERVER_SIDE);
+ if (!r) r = wc_SrpSetUsername(&srv, username, usernameSz);
+ if (!r) r = wc_SrpSetParams(&srv, N, sizeof(N),
+ g, sizeof(g),
+ salt, sizeof(salt));
+ if (!r) r = wc_SrpSetVerifier(&srv, verifier, v_size);
+ if (!r) r = wc_SrpGetPublic(&srv, serverPubKey, &serverPubKeySz);
+
+ /* server sends N, g, salt and B to client */
+
+ if (!r) r = wc_SrpGetPublic(&cli, clientPubKey, &clientPubKeySz);
+ if (!r) r = wc_SrpComputeKey(&cli, clientPubKey, clientPubKeySz,
+ serverPubKey, serverPubKeySz);
+ if (!r) r = wc_SrpGetProof(&cli, clientProof, &clientProofSz);
+
+ /* client sends A and M1 to server */
+
+ if (!r) r = wc_SrpComputeKey(&srv, clientPubKey, clientPubKeySz,
+ serverPubKey, serverPubKeySz);
+ if (!r) r = wc_SrpVerifyPeersProof(&srv, clientProof, clientProofSz);
+ if (!r) r = wc_SrpGetProof(&srv, serverProof, &serverProofSz);
+
+ /* server sends M2 to client */
+
+ if (!r) r = wc_SrpVerifyPeersProof(&cli, serverProof, serverProofSz);
+
+ wc_SrpTerm(&cli);
+ wc_SrpTerm(&srv);
+
+ return r;
+}
+
+#endif /* WOLFCRYPT_HAVE_SRP */
+
+#if defined(OPENSSL_EXTRA) && !defined(WOLFCRYPT_ONLY)
+
+#if !defined(NO_AES) && !defined(WOLFCRYPT_ONLY)
+static int openssl_aes_test(void)
+{
+#ifdef HAVE_AES_CBC
+#ifdef WOLFSSL_AES_128
+ {
+ /* EVP_CipherUpdate test */
+ const byte cbcPlain[] =
+ {
+ 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
+ 0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a,
+ 0xae,0x2d,0x8a,0x57,0x1e,0x03,0xac,0x9c,
+ 0x9e,0xb7,0x6f,0xac,0x45,0xaf,0x8e,0x51,
+ 0x30,0xc8,0x1c,0x46,0xa3,0x5c,0xe4,0x11,
+ 0xe5,0xfb,0xc1,0x19,0x1a,0x0a,0x52,0xef,
+ 0xf6,0x9f,0x24,0x45,0xdf,0x4f,0x9b,0x17,
+ 0xad,0x2b,0x41,0x7b,0xe6,0x6c,0x37,0x10
+ };
+
+ byte key[] = "0123456789abcdef "; /* align */
+ byte iv[] = "1234567890abcdef "; /* align */
+
+ byte cipher[AES_BLOCK_SIZE * 4];
+ byte plain [AES_BLOCK_SIZE * 4];
+ EVP_CIPHER_CTX en;
+ EVP_CIPHER_CTX de;
+ int outlen ;
+ int total = 0;
+ int i;
+
+ EVP_CIPHER_CTX_init(&en);
+ if (EVP_CipherInit(&en, EVP_aes_128_cbc(),
+ (unsigned char*)key, (unsigned char*)iv, 1) == 0)
+ return -8200;
+ if (EVP_CipherUpdate(&en, (byte*)cipher, &outlen,
+ (byte*)cbcPlain, 9) == 0)
+ return -8201;
+ if (outlen != 0)
+ return -8202;
+ total += outlen;
+
+ if (EVP_CipherUpdate(&en, (byte*)&cipher[total], &outlen,
+ (byte*)&cbcPlain[9] , 9) == 0)
+ return -8203;
+ if (outlen != 16)
+ return -8204;
+ total += outlen;
+
+ if (EVP_CipherFinal(&en, (byte*)&cipher[total], &outlen) == 0)
+ return -8205;
+ if (outlen != 16)
+ return -8206;
+ total += outlen;
+ if (total != 32)
+ return 3408;
+
+ total = 0;
+ EVP_CIPHER_CTX_init(&de);
+ if (EVP_CipherInit(&de, EVP_aes_128_cbc(),
+ (unsigned char*)key, (unsigned char*)iv, 0) == 0)
+ return -8207;
+
+ if (EVP_CipherUpdate(&de, (byte*)plain, &outlen, (byte*)cipher, 6) == 0)
+ return -8208;
+ if (outlen != 0)
+ return -8209;
+ total += outlen;
+
+ if (EVP_CipherUpdate(&de, (byte*)&plain[total], &outlen,
+ (byte*)&cipher[6], 12) == 0)
+ return -8210;
+ if (outlen != 0)
+ total += outlen;
+
+ if (EVP_CipherUpdate(&de, (byte*)&plain[total], &outlen,
+ (byte*)&cipher[6+12], 14) == 0)
+ return -8211;
+ if (outlen != 16)
+ return -8212;
+ total += outlen;
+
+ if (EVP_CipherFinal(&de, (byte*)&plain[total], &outlen) == 0)
+ return -8213;
+ if (outlen != 2)
+ return -8214;
+ total += outlen;
+
+ if (total != 18)
+ return 3427;
+
+ if (XMEMCMP(plain, cbcPlain, 18))
+ return -8215;
+
+ /* test with encrypting/decrypting more than 16 bytes at once */
+ total = 0;
+ EVP_CIPHER_CTX_init(&en);
+ if (EVP_CipherInit(&en, EVP_aes_128_cbc(),
+ (unsigned char*)key, (unsigned char*)iv, 1) == 0)
+ return -8216;
+ if (EVP_CipherUpdate(&en, (byte*)cipher, &outlen,
+ (byte*)cbcPlain, 17) == 0)
+ return -8217;
+ if (outlen != 16)
+ return -8218;
+ total += outlen;
+
+ if (EVP_CipherUpdate(&en, (byte*)&cipher[total], &outlen,
+ (byte*)&cbcPlain[17] , 1) == 0)
+ return -8219;
+ if (outlen != 0)
+ return -8220;
+ total += outlen;
+
+ if (EVP_CipherFinal(&en, (byte*)&cipher[total], &outlen) == 0)
+ return -8221;
+ if (outlen != 16)
+ return -8222;
+ total += outlen;
+ if (total != 32)
+ return -8223;
+
+ total = 0;
+ EVP_CIPHER_CTX_init(&de);
+ if (EVP_CipherInit(&de, EVP_aes_128_cbc(),
+ (unsigned char*)key, (unsigned char*)iv, 0) == 0)
+ return -8224;
+
+ if (EVP_CipherUpdate(&de, (byte*)plain, &outlen, (byte*)cipher, 17) == 0)
+ return -8225;
+ if (outlen != 16)
+ return -8226;
+ total += outlen;
+
+ /* final call on non block size should fail */
+ if (EVP_CipherFinal(&de, (byte*)&plain[total], &outlen) != 0)
+ return -8227;
+
+ if (EVP_CipherUpdate(&de, (byte*)&plain[total], &outlen,
+ (byte*)&cipher[17], 1) == 0)
+ return -8228;
+ if (outlen != 0)
+ total += outlen;
+
+ if (EVP_CipherUpdate(&de, (byte*)&plain[total], &outlen,
+ (byte*)&cipher[17+1], 14) == 0)
+ return -8229;
+ if (outlen != 0)
+ return -8230;
+ total += outlen;
+
+ if (EVP_CipherFinal(&de, (byte*)&plain[total], &outlen) == 0)
+ return -8231;
+ if (outlen != 2)
+ return -8232;
+ total += outlen;
+
+ if (total != 18)
+ return -8233;
+
+ if (XMEMCMP(plain, cbcPlain, 18))
+ return -8234;
+
+ /* test byte by byte decrypt */
+ for (i = 0; i < AES_BLOCK_SIZE * 3; i++) {
+ plain[i] = i;
+ }
+
+ total = 0;
+ EVP_CIPHER_CTX_init(&en);
+ if (EVP_CipherInit(&en, EVP_aes_128_cbc(),
+ (unsigned char*)key, (unsigned char*)iv, 1) == 0)
+ return -8235;
+ if (EVP_CipherUpdate(&en, (byte*)cipher, &outlen,
+ (byte*)plain, AES_BLOCK_SIZE * 3) == 0)
+ return -8236;
+ if (outlen != AES_BLOCK_SIZE * 3)
+ return -8237;
+ total += outlen;
+
+ if (EVP_CipherFinal(&en, (byte*)&cipher[total], &outlen) == 0)
+ return -8238;
+ if (outlen != AES_BLOCK_SIZE)
+ return -8239;
+ total += outlen;
+ if (total != sizeof(plain))
+ return -8240;
+
+ total = 0;
+ EVP_CIPHER_CTX_init(&de);
+ if (EVP_CipherInit(&de, EVP_aes_128_cbc(),
+ (unsigned char*)key, (unsigned char*)iv, 0) == 0)
+ return -8241;
+
+ for (i = 0; i < AES_BLOCK_SIZE * 4; i++) {
+ if (EVP_CipherUpdate(&de, (byte*)plain + total, &outlen,
+ (byte*)cipher + i, 1) == 0)
+ return -8242;
+
+ if (outlen > 0) {
+ int j;
+
+ total += outlen;
+ for (j = 0; j < total; j++) {
+ if (plain[j] != j) {
+ return -8243;
+ }
+ }
+ }
+ }
+
+ if (EVP_CipherFinal(&de, (byte*)&plain[total], &outlen) == 0)
+ return -8244;
+ total += outlen;
+ if (total != AES_BLOCK_SIZE * 3) {
+ return -8245;
+ }
+ for (i = 0; i < AES_BLOCK_SIZE * 3; i++) {
+ if (plain[i] != i) {
+ return -8246;
+ }
+ }
+ }
+
+ /* set buffers to be exact size to catch potential over read/write */
+ {
+ /* EVP_CipherUpdate test */
+ const byte cbcPlain[] =
+ {
+ 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
+ 0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a,
+ 0xae,0x2d,0x8a,0x57,0x1e,0x03,0xac,0x9c,
+ 0x9e,0xb7,0x6f,0xac,0x45,0xaf,0x8e,0x51,
+ 0x30,0xc8,0x1c,0x46,0xa3,0x5c,0xe4,0x11,
+ 0xe5,0xfb,0xc1,0x19,0x1a,0x0a,0x52,0xef,
+ 0xf6,0x9f,0x24,0x45,0xdf,0x4f,0x9b,0x17,
+ 0xad,0x2b,0x41,0x7b,0xe6,0x6c,0x37,0x10
+ };
+
+ byte key[] = "0123456789abcdef "; /* align */
+ byte iv[] = "1234567890abcdef "; /* align */
+
+ #define EVP_TEST_BUF_SZ 18
+ #define EVP_TEST_BUF_PAD 32
+ byte cipher[EVP_TEST_BUF_SZ];
+ byte plain [EVP_TEST_BUF_SZ];
+ byte padded[EVP_TEST_BUF_PAD];
+ EVP_CIPHER_CTX en;
+ EVP_CIPHER_CTX de;
+ int outlen ;
+ int total = 0;
+
+ EVP_CIPHER_CTX_init(&en);
+ if (EVP_CipherInit(&en, EVP_aes_128_cbc(),
+ (unsigned char*)key, (unsigned char*)iv, 1) == 0)
+ return -8247;
+ if (EVP_CIPHER_CTX_set_padding(&en, 0) != 1)
+ return -8248;
+ if (EVP_CipherUpdate(&en, (byte*)cipher, &outlen,
+ (byte*)cbcPlain, EVP_TEST_BUF_SZ) == 0)
+ return -8249;
+ if (outlen != 16)
+ return -8250;
+ total += outlen;
+
+ /* should fail here */
+ if (EVP_CipherFinal(&en, (byte*)&cipher[total], &outlen) != 0)
+ return -8251;
+
+ /* turn padding back on and do successful encrypt */
+ total = 0;
+ EVP_CIPHER_CTX_init(&en);
+ if (EVP_CipherInit(&en, EVP_aes_128_cbc(),
+ (unsigned char*)key, (unsigned char*)iv, 1) == 0)
+ return -8252;
+ if (EVP_CIPHER_CTX_set_padding(&en, 1) != 1)
+ return -8253;
+ if (EVP_CipherUpdate(&en, (byte*)padded, &outlen,
+ (byte*)cbcPlain, EVP_TEST_BUF_SZ) == 0)
+ return -8254;
+ if (outlen != 16)
+ return -8255;
+ total += outlen;
+
+ if (EVP_CipherFinal(&en, (byte*)&padded[total], &outlen) == 0)
+ return -8256;
+ total += outlen;
+ if (total != 32)
+ return -8257;
+ XMEMCPY(cipher, padded, EVP_TEST_BUF_SZ);
+
+ /* test out of bounds read on buffers w/o padding during decryption */
+ total = 0;
+ EVP_CIPHER_CTX_init(&de);
+ if (EVP_CipherInit(&de, EVP_aes_128_cbc(),
+ (unsigned char*)key, (unsigned char*)iv, 0) == 0)
+ return -8258;
+
+ if (EVP_CIPHER_CTX_set_padding(&de, 0) != 1)
+ return -8259;
+ if (EVP_CipherUpdate(&de, (byte*)plain, &outlen, (byte*)cipher,
+ EVP_TEST_BUF_SZ) == 0)
+ return -8260;
+ if (outlen != 16)
+ return -8261;
+ total += outlen;
+
+ /* should fail since not using padding */
+ if (EVP_CipherFinal(&de, (byte*)&plain[total], &outlen) != 0)
+ return -8262;
+
+ total = 0;
+ EVP_CIPHER_CTX_init(&de);
+ if (EVP_CipherInit(&de, EVP_aes_128_cbc(),
+ (unsigned char*)key, (unsigned char*)iv, 0) == 0)
+ return -8263;
+ if (EVP_CIPHER_CTX_set_padding(&de, 1) != 1)
+ return -8264;
+ if (EVP_CipherUpdate(&de, (byte*)padded, &outlen, (byte*)padded,
+ EVP_TEST_BUF_PAD) == 0)
+ return -8265;
+ if (outlen != 16)
+ return -8266;
+ total += outlen;
+
+ if (EVP_CipherFinal(&de, (byte*)&padded[total], &outlen) == 0)
+ return -8267;
+ if (XMEMCMP(padded, cbcPlain, EVP_TEST_BUF_SZ))
+ return -8268;
+ }
+
+ { /* evp_cipher test: EVP_aes_128_cbc */
+ EVP_CIPHER_CTX ctx;
+
+ const byte msg[] = { /* "Now is the time for all " w/o trailing 0 */
+ 0x6e,0x6f,0x77,0x20,0x69,0x73,0x20,0x74,
+ 0x68,0x65,0x20,0x74,0x69,0x6d,0x65,0x20,
+ 0x66,0x6f,0x72,0x20,0x61,0x6c,0x6c,0x20
+ };
+
+ const byte verify[] =
+ {
+ 0x95,0x94,0x92,0x57,0x5f,0x42,0x81,0x53,
+ 0x2c,0xcc,0x9d,0x46,0x77,0xa2,0x33,0xcb
+ };
+
+ byte key[] = "0123456789abcdef "; /* align */
+ byte iv[] = "1234567890abcdef "; /* align */
+
+ byte cipher[AES_BLOCK_SIZE * 4];
+ byte plain [AES_BLOCK_SIZE * 4];
+
+ EVP_CIPHER_CTX_init(&ctx);
+ if (EVP_CipherInit(&ctx, EVP_aes_128_cbc(), key, iv, 1) == 0)
+ return -8269;
+
+ if (EVP_Cipher(&ctx, cipher, (byte*)msg, 16) == 0)
+ return -8270;
+
+ if (XMEMCMP(cipher, verify, AES_BLOCK_SIZE))
+ return -8271;
+
+ EVP_CIPHER_CTX_init(&ctx);
+ if (EVP_CipherInit(&ctx, EVP_aes_128_cbc(), key, iv, 0) == 0)
+ return -8272;
+
+ if (EVP_Cipher(&ctx, plain, cipher, 16) == 0)
+ return -8273;
+
+ if (XMEMCMP(plain, msg, AES_BLOCK_SIZE))
+ return -8274;
+
+
+ } /* end evp_cipher test: EVP_aes_128_cbc*/
+#endif /* WOLFSSL_AES_128 */
+#endif /* HAVE_AES_CBC */
+
+#if defined(HAVE_AES_ECB) && defined(WOLFSSL_AES_256)
+ { /* evp_cipher test: EVP_aes_256_ecb*/
+ EVP_CIPHER_CTX ctx;
+ const byte msg[] =
+ {
+ 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
+ 0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a
+ };
+
+ const byte verify[] =
+ {
+ 0xf3,0xee,0xd1,0xbd,0xb5,0xd2,0xa0,0x3c,
+ 0x06,0x4b,0x5a,0x7e,0x3d,0xb1,0x81,0xf8
+ };
+
+ const byte key[] =
+ {
+ 0x60,0x3d,0xeb,0x10,0x15,0xca,0x71,0xbe,
+ 0x2b,0x73,0xae,0xf0,0x85,0x7d,0x77,0x81,
+ 0x1f,0x35,0x2c,0x07,0x3b,0x61,0x08,0xd7,
+ 0x2d,0x98,0x10,0xa3,0x09,0x14,0xdf,0xf4
+ };
+
+
+ byte cipher[AES_BLOCK_SIZE * 4];
+ byte plain [AES_BLOCK_SIZE * 4];
+
+ EVP_CIPHER_CTX_init(&ctx);
+ if (EVP_CipherInit(&ctx, EVP_aes_256_ecb(), (unsigned char*)key, NULL, 1) == 0)
+ return -8275;
+
+ if (EVP_Cipher(&ctx, cipher, (byte*)msg, 16) == 0)
+ return -8276;
+
+ if (XMEMCMP(cipher, verify, AES_BLOCK_SIZE))
+ return -8277;
+
+ EVP_CIPHER_CTX_init(&ctx);
+ if (EVP_CipherInit(&ctx, EVP_aes_256_ecb(), (unsigned char*)key, NULL, 0) == 0)
+ return -8278;
+
+ if (EVP_Cipher(&ctx, plain, cipher, 16) == 0)
+ return -8279;
+
+ if (XMEMCMP(plain, msg, AES_BLOCK_SIZE))
+ return -8280;
+
+ } /* end evp_cipher test */
+#endif /* HAVE_AES_ECB && WOLFSSL_AES_256 */
+
+#if defined(WOLFSSL_AES_DIRECT) && defined(WOLFSSL_AES_256)
+ /* enable HAVE_AES_DECRYPT for AES_encrypt/decrypt */
+ {
+ /* Test: AES_encrypt/decrypt/set Key */
+ AES_KEY enc;
+ #ifdef HAVE_AES_DECRYPT
+ AES_KEY dec;
+ #endif
+
+ const byte msg[] =
+ {
+ 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
+ 0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a
+ };
+
+ const byte verify[] =
+ {
+ 0xf3,0xee,0xd1,0xbd,0xb5,0xd2,0xa0,0x3c,
+ 0x06,0x4b,0x5a,0x7e,0x3d,0xb1,0x81,0xf8
+ };
+
+ const byte key[] =
+ {
+ 0x60,0x3d,0xeb,0x10,0x15,0xca,0x71,0xbe,
+ 0x2b,0x73,0xae,0xf0,0x85,0x7d,0x77,0x81,
+ 0x1f,0x35,0x2c,0x07,0x3b,0x61,0x08,0xd7,
+ 0x2d,0x98,0x10,0xa3,0x09,0x14,0xdf,0xf4
+ };
+
+ byte plain[sizeof(msg)];
+ byte cipher[sizeof(msg)];
+
+ AES_set_encrypt_key(key, sizeof(key)*8, &enc);
+ AES_set_decrypt_key(key, sizeof(key)*8, &dec);
+
+ AES_encrypt(msg, cipher, &enc);
+
+ #ifdef HAVE_AES_DECRYPT
+ AES_decrypt(cipher, plain, &dec);
+ if (XMEMCMP(plain, msg, AES_BLOCK_SIZE))
+ return -8281;
+ #endif /* HAVE_AES_DECRYPT */
+
+ if (XMEMCMP(cipher, verify, AES_BLOCK_SIZE))
+ return -8282;
+ }
+#endif /* WOLFSSL_AES_DIRECT && WOLFSSL_AES_256 */
+
+/* EVP_Cipher with EVP_aes_xxx_ctr() */
+#ifdef WOLFSSL_AES_COUNTER
+ {
+ byte plainBuff [64];
+ byte cipherBuff[64];
+
+#ifdef WOLFSSL_AES_128
+ const byte ctrKey[] =
+ {
+ 0x2b,0x7e,0x15,0x16,0x28,0xae,0xd2,0xa6,
+ 0xab,0xf7,0x15,0x88,0x09,0xcf,0x4f,0x3c
+ };
+
+ const byte ctrIv[] =
+ {
+ 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
+ 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
+ };
+
+
+ const byte ctrPlain[] =
+ {
+ 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
+ 0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a,
+ 0xae,0x2d,0x8a,0x57,0x1e,0x03,0xac,0x9c,
+ 0x9e,0xb7,0x6f,0xac,0x45,0xaf,0x8e,0x51,
+ 0x30,0xc8,0x1c,0x46,0xa3,0x5c,0xe4,0x11,
+ 0xe5,0xfb,0xc1,0x19,0x1a,0x0a,0x52,0xef,
+ 0xf6,0x9f,0x24,0x45,0xdf,0x4f,0x9b,0x17,
+ 0xad,0x2b,0x41,0x7b,0xe6,0x6c,0x37,0x10
+ };
+
+ const byte ctrCipher[] =
+ {
+ 0x87,0x4d,0x61,0x91,0xb6,0x20,0xe3,0x26,
+ 0x1b,0xef,0x68,0x64,0x99,0x0d,0xb6,0xce,
+ 0x98,0x06,0xf6,0x6b,0x79,0x70,0xfd,0xff,
+ 0x86,0x17,0x18,0x7b,0xb9,0xff,0xfd,0xff,
+ 0x5a,0xe4,0xdf,0x3e,0xdb,0xd5,0xd3,0x5e,
+ 0x5b,0x4f,0x09,0x02,0x0d,0xb0,0x3e,0xab,
+ 0x1e,0x03,0x1d,0xda,0x2f,0xbe,0x03,0xd1,
+ 0x79,0x21,0x70,0xa0,0xf3,0x00,0x9c,0xee
+ };
+
+ const byte oddCipher[] =
+ {
+ 0xb9,0xd7,0xcb,0x08,0xb0,0xe1,0x7b,0xa0,
+ 0xc2
+ };
+#endif
+
+ /* test vector from "Recommendation for Block Cipher Modes of Operation"
+ * NIST Special Publication 800-38A */
+#ifdef WOLFSSL_AES_192
+ const byte ctr192Key[] =
+ {
+ 0x8e,0x73,0xb0,0xf7,0xda,0x0e,0x64,0x52,
+ 0xc8,0x10,0xf3,0x2b,0x80,0x90,0x79,0xe5,
+ 0x62,0xf8,0xea,0xd2,0x52,0x2c,0x6b,0x7b
+ };
+
+ const byte ctr192Iv[] =
+ {
+ 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
+ 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
+ };
+
+
+ const byte ctr192Plain[] =
+ {
+ 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
+ 0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a
+ };
+
+ const byte ctr192Cipher[] =
+ {
+ 0x1a,0xbc,0x93,0x24,0x17,0x52,0x1c,0xa2,
+ 0x4f,0x2b,0x04,0x59,0xfe,0x7e,0x6e,0x0b
+ };
+#endif /* WOLFSSL_AES_192 */
+
+#ifdef WOLFSSL_AES_256
+ /* test vector from "Recommendation for Block Cipher Modes of Operation"
+ * NIST Special Publication 800-38A */
+ const byte ctr256Key[] =
+ {
+ 0x60,0x3d,0xeb,0x10,0x15,0xca,0x71,0xbe,
+ 0x2b,0x73,0xae,0xf0,0x85,0x7d,0x77,0x81,
+ 0x1f,0x35,0x2c,0x07,0x3b,0x61,0x08,0xd7,
+ 0x2d,0x98,0x10,0xa3,0x09,0x14,0xdf,0xf4
+ };
+
+ const byte ctr256Iv[] =
+ {
+ 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
+ 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
+ };
+
+
+ const byte ctr256Plain[] =
+ {
+ 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
+ 0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a
+ };
+
+ const byte ctr256Cipher[] =
+ {
+ 0x60,0x1e,0xc3,0x13,0x77,0x57,0x89,0xa5,
+ 0xb7,0xa7,0xf5,0x04,0xbb,0xf3,0xd2,0x28
+ };
+#endif /* WOLFSSL_AES_256 */
+
+ EVP_CIPHER_CTX en;
+ EVP_CIPHER_CTX de;
+#ifdef WOLFSSL_AES_128
+ EVP_CIPHER_CTX *p_en;
+ EVP_CIPHER_CTX *p_de;
+
+ EVP_CIPHER_CTX_init(&en);
+ if (EVP_CipherInit(&en, EVP_aes_128_ctr(),
+ (unsigned char*)ctrKey, (unsigned char*)ctrIv, 0) == 0)
+ return -8283;
+ if (EVP_Cipher(&en, (byte*)cipherBuff, (byte*)ctrPlain,
+ AES_BLOCK_SIZE*4) == 0)
+ return -8284;
+ EVP_CIPHER_CTX_init(&de);
+ if (EVP_CipherInit(&de, EVP_aes_128_ctr(),
+ (unsigned char*)ctrKey, (unsigned char*)ctrIv, 0) == 0)
+ return -8285;
+
+ if (EVP_Cipher(&de, (byte*)plainBuff, (byte*)cipherBuff,
+ AES_BLOCK_SIZE*4) == 0)
+ return -8286;
+
+ if (XMEMCMP(cipherBuff, ctrCipher, AES_BLOCK_SIZE*4))
+ return -8287;
+ if (XMEMCMP(plainBuff, ctrPlain, AES_BLOCK_SIZE*4))
+ return -8288;
+
+ p_en = wolfSSL_EVP_CIPHER_CTX_new();
+ if (p_en == NULL)
+ return -8289;
+ p_de = wolfSSL_EVP_CIPHER_CTX_new();
+ if (p_de == NULL)
+ return -8290;
+
+ if (EVP_CipherInit(p_en, EVP_aes_128_ctr(),
+ (unsigned char*)ctrKey, (unsigned char*)ctrIv, 0) == 0)
+ return -8291;
+ if (EVP_Cipher(p_en, (byte*)cipherBuff, (byte*)ctrPlain,
+ AES_BLOCK_SIZE*4) == 0)
+ return -8292;
+ if (EVP_CipherInit(p_de, EVP_aes_128_ctr(),
+ (unsigned char*)ctrKey, (unsigned char*)ctrIv, 0) == 0)
+ return -8293;
+
+ if (EVP_Cipher(p_de, (byte*)plainBuff, (byte*)cipherBuff,
+ AES_BLOCK_SIZE*4) == 0)
+ return -8294;
+
+ wolfSSL_EVP_CIPHER_CTX_free(p_en);
+ wolfSSL_EVP_CIPHER_CTX_free(p_de);
+
+ if (XMEMCMP(cipherBuff, ctrCipher, AES_BLOCK_SIZE*4))
+ return -8295;
+ if (XMEMCMP(plainBuff, ctrPlain, AES_BLOCK_SIZE*4))
+ return -8296;
+
+ EVP_CIPHER_CTX_init(&en);
+ if (EVP_CipherInit(&en, EVP_aes_128_ctr(),
+ (unsigned char*)ctrKey, (unsigned char*)ctrIv, 0) == 0)
+ return -8297;
+ if (EVP_Cipher(&en, (byte*)cipherBuff, (byte*)ctrPlain, 9) == 0)
+ return -8298;
+
+ EVP_CIPHER_CTX_init(&de);
+ if (EVP_CipherInit(&de, EVP_aes_128_ctr(),
+ (unsigned char*)ctrKey, (unsigned char*)ctrIv, 0) == 0)
+ return -8299;
+
+ if (EVP_Cipher(&de, (byte*)plainBuff, (byte*)cipherBuff, 9) == 0)
+ return -8300;
+
+ if (XMEMCMP(plainBuff, ctrPlain, 9))
+ return -8301;
+ if (XMEMCMP(cipherBuff, ctrCipher, 9))
+ return -8302;
+
+ if (EVP_Cipher(&en, (byte*)cipherBuff, (byte*)ctrPlain, 9) == 0)
+ return -8303;
+ if (EVP_Cipher(&de, (byte*)plainBuff, (byte*)cipherBuff, 9) == 0)
+ return -8304;
+
+ if (XMEMCMP(plainBuff, ctrPlain, 9))
+ return -8305;
+ if (XMEMCMP(cipherBuff, oddCipher, 9))
+ return -8306;
+#endif /* WOLFSSL_AES_128 */
+
+#ifdef WOLFSSL_AES_192
+ EVP_CIPHER_CTX_init(&en);
+ if (EVP_CipherInit(&en, EVP_aes_192_ctr(),
+ (unsigned char*)ctr192Key, (unsigned char*)ctr192Iv, 0) == 0)
+ return -8307;
+ if (EVP_Cipher(&en, (byte*)cipherBuff, (byte*)ctr192Plain,
+ AES_BLOCK_SIZE) == 0)
+ return -8308;
+ EVP_CIPHER_CTX_init(&de);
+ if (EVP_CipherInit(&de, EVP_aes_192_ctr(),
+ (unsigned char*)ctr192Key, (unsigned char*)ctr192Iv, 0) == 0)
+ return -8309;
+
+ XMEMSET(plainBuff, 0, sizeof(plainBuff));
+ if (EVP_Cipher(&de, (byte*)plainBuff, (byte*)cipherBuff,
+ AES_BLOCK_SIZE) == 0)
+ return -8310;
+
+ if (XMEMCMP(plainBuff, ctr192Plain, sizeof(ctr192Plain)))
+ return -8311;
+ if (XMEMCMP(ctr192Cipher, cipherBuff, sizeof(ctr192Cipher)))
+ return -8312;
+#endif /* WOLFSSL_AES_192 */
+
+#ifdef WOLFSSL_AES_256
+ EVP_CIPHER_CTX_init(&en);
+ if (EVP_CipherInit(&en, EVP_aes_256_ctr(),
+ (unsigned char*)ctr256Key, (unsigned char*)ctr256Iv, 0) == 0)
+ return -8313;
+ if (EVP_Cipher(&en, (byte*)cipherBuff, (byte*)ctr256Plain,
+ AES_BLOCK_SIZE) == 0)
+ return -8314;
+ EVP_CIPHER_CTX_init(&de);
+ if (EVP_CipherInit(&de, EVP_aes_256_ctr(),
+ (unsigned char*)ctr256Key, (unsigned char*)ctr256Iv, 0) == 0)
+ return -8315;
+
+ XMEMSET(plainBuff, 0, sizeof(plainBuff));
+ if (EVP_Cipher(&de, (byte*)plainBuff, (byte*)cipherBuff,
+ AES_BLOCK_SIZE) == 0)
+ return -8316;
+
+ if (XMEMCMP(plainBuff, ctr256Plain, sizeof(ctr256Plain)))
+ return -8317;
+ if (XMEMCMP(ctr256Cipher, cipherBuff, sizeof(ctr256Cipher)))
+ return -8318;
+#endif /* WOLFSSL_AES_256 */
+ }
+#endif /* HAVE_AES_COUNTER */
+
+#if defined(WOLFSSL_AES_CFB) && defined(WOLFSSL_AES_128)
+ {
+ AES_KEY enc;
+ AES_KEY dec;
+
+ const byte setIv[] = {
+ 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
+ 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
+ };
+
+ const byte key[] =
+ {
+ 0x2b,0x7e,0x15,0x16,0x28,0xae,0xd2,0xa6,
+ 0xab,0xf7,0x15,0x88,0x09,0xcf,0x4f,0x3c
+ };
+
+ const byte cipher1[] =
+ {
+ 0x3b,0x3f,0xd9,0x2e,0xb7,0x2d,0xad,0x20,
+ 0x33,0x34,0x49,0xf8,0xe8,0x3c,0xfb,0x4a,
+ 0xc8,0xa6,0x45,0x37,0xa0,0xb3,0xa9,0x3f,
+ 0xcd,0xe3,0xcd,0xad,0x9f,0x1c,0xe5,0x8b
+ };
+
+ const byte msg[] =
+ {
+ 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
+ 0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a,
+ 0xae,0x2d,0x8a,0x57,0x1e,0x03,0xac,0x9c,
+ 0x9e,0xb7,0x6f,0xac,0x45,0xaf,0x8e,0x51
+ };
+
+ byte cipher[AES_BLOCK_SIZE * 2];
+ byte iv[AES_BLOCK_SIZE]; /* iv buffer is updeated by API */
+ int num = 0;
+
+ XMEMCPY(iv, setIv, sizeof(setIv));
+ wolfSSL_AES_set_encrypt_key(key, sizeof(key) * 8, &enc);
+ wolfSSL_AES_set_encrypt_key(key, sizeof(key) * 8, &dec);
+
+ wolfSSL_AES_cfb128_encrypt(msg, cipher, AES_BLOCK_SIZE - 1, &enc, iv,
+ &num, AES_ENCRYPT);
+
+ if (XMEMCMP(cipher, cipher1, AES_BLOCK_SIZE - 1))
+ return -8319;
+
+ if (num != 15) /* should have used 15 of the 16 bytes */
+ return -8320;
+
+ wolfSSL_AES_cfb128_encrypt(msg + AES_BLOCK_SIZE - 1,
+ cipher + AES_BLOCK_SIZE - 1, AES_BLOCK_SIZE + 1, &enc, iv,
+ &num, AES_ENCRYPT);
+
+ if (XMEMCMP(cipher, cipher1, AES_BLOCK_SIZE * 2))
+ return -8321;
+
+ if (num != 0)
+ return -8322;
+ }
+#endif /* WOLFSSL_AES_CFB && WOLFSSL_AES_128 */
+ return 0;
+}
+
+
+#endif /* !defined(NO_AES) && !defined(WOLFCRYPT_ONLY) */
int openssl_test(void)
{
EVP_MD_CTX md_ctx;
testVector a, b, c, d, e, f;
- byte hash[SHA256_DIGEST_SIZE*2]; /* max size */
+ byte hash[WC_SHA256_DIGEST_SIZE*2]; /* max size */
+
+ a.inLen = 0;
+ b.inLen = c.inLen = d.inLen = e.inLen = f.inLen = a.inLen;
(void)a;
(void)b;
(void)c;
+ (void)d;
(void)e;
(void)f;
+ /* test malloc / free , 10 is an arbitrary amount of memory chosen */
+ {
+ byte* p;
+ p = (byte*)CRYPTO_malloc(10, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (p == NULL) {
+ return -8400;
+ }
+ XMEMSET(p, 0, 10);
+ #ifdef WOLFSSL_QT
+ CRYPTO_free(p);
+ #else
+ CRYPTO_free(p, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ }
+
#ifndef NO_MD5
a.input = "1234567890123456789012345678901234567890123456789012345678"
"9012345678901234567890";
a.output = "\x57\xed\xf4\xa2\x2b\xe3\xc9\x55\xac\x49\xda\x2e\x21\x07\xb6"
"\x7a";
- a.inLen = strlen(a.input);
- a.outLen = MD5_DIGEST_SIZE;
+ a.inLen = XSTRLEN(a.input);
+ a.outLen = WC_MD5_DIGEST_SIZE;
EVP_MD_CTX_init(&md_ctx);
EVP_DigestInit(&md_ctx, EVP_md5());
@@ -4432,8 +15641,8 @@ int openssl_test(void)
EVP_DigestUpdate(&md_ctx, a.input, (unsigned long)a.inLen);
EVP_DigestFinal(&md_ctx, hash, 0);
- if (memcmp(hash, a.output, MD5_DIGEST_SIZE) != 0)
- return -71;
+ if (XMEMCMP(hash, a.output, WC_MD5_DIGEST_SIZE) != 0)
+ return -8401;
#endif /* NO_MD5 */
@@ -4444,8 +15653,8 @@ int openssl_test(void)
"aaaaaaaaaa";
b.output = "\xAD\x5B\x3F\xDB\xCB\x52\x67\x78\xC2\x83\x9D\x2F\x15\x1E\xA7"
"\x53\x99\x5E\x26\xA0";
- b.inLen = strlen(b.input);
- b.outLen = SHA_DIGEST_SIZE;
+ b.inLen = XSTRLEN(b.input);
+ b.outLen = WC_SHA_DIGEST_SIZE;
EVP_MD_CTX_init(&md_ctx);
EVP_DigestInit(&md_ctx, EVP_sha1());
@@ -4453,18 +15662,38 @@ int openssl_test(void)
EVP_DigestUpdate(&md_ctx, b.input, (unsigned long)b.inLen);
EVP_DigestFinal(&md_ctx, hash, 0);
- if (memcmp(hash, b.output, SHA_DIGEST_SIZE) != 0)
- return -72;
+ if (XMEMCMP(hash, b.output, WC_SHA_DIGEST_SIZE) != 0)
+ return -8402;
#endif /* NO_SHA */
+#ifdef WOLFSSL_SHA224
+
+ e.input = "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhi"
+ "jklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu";
+ e.output = "\xc9\x7c\xa9\xa5\x59\x85\x0c\xe9\x7a\x04\xa9\x6d\xef\x6d\x99"
+ "\xa9\xe0\xe0\xe2\xab\x14\xe6\xb8\xdf\x26\x5f\xc0\xb3";
+ e.inLen = XSTRLEN(e.input);
+ e.outLen = WC_SHA224_DIGEST_SIZE;
+
+ EVP_MD_CTX_init(&md_ctx);
+ EVP_DigestInit(&md_ctx, EVP_sha224());
+
+ EVP_DigestUpdate(&md_ctx, e.input, (unsigned long)e.inLen);
+ EVP_DigestFinal(&md_ctx, hash, 0);
+
+ if (XMEMCMP(hash, e.output, WC_SHA224_DIGEST_SIZE) != 0)
+ return -8403;
+
+#endif /* WOLFSSL_SHA224 */
+
d.input = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq";
d.output = "\x24\x8D\x6A\x61\xD2\x06\x38\xB8\xE5\xC0\x26\x93\x0C\x3E\x60"
"\x39\xA3\x3C\xE4\x59\x64\xFF\x21\x67\xF6\xEC\xED\xD4\x19\xDB"
"\x06\xC1";
- d.inLen = strlen(d.input);
- d.outLen = SHA256_DIGEST_SIZE;
+ d.inLen = XSTRLEN(d.input);
+ d.outLen = WC_SHA256_DIGEST_SIZE;
EVP_MD_CTX_init(&md_ctx);
EVP_DigestInit(&md_ctx, EVP_sha256());
@@ -4472,8 +15701,8 @@ int openssl_test(void)
EVP_DigestUpdate(&md_ctx, d.input, (unsigned long)d.inLen);
EVP_DigestFinal(&md_ctx, hash, 0);
- if (memcmp(hash, d.output, SHA256_DIGEST_SIZE) != 0)
- return -78;
+ if (XMEMCMP(hash, d.output, WC_SHA256_DIGEST_SIZE) != 0)
+ return -8404;
#ifdef WOLFSSL_SHA384
@@ -4483,8 +15712,8 @@ int openssl_test(void)
"\x47\x53\x11\x1b\x17\x3b\x3b\x05\xd2\x2f\xa0\x80\x86\xe3\xb0"
"\xf7\x12\xfc\xc7\xc7\x1a\x55\x7e\x2d\xb9\x66\xc3\xe9\xfa\x91"
"\x74\x60\x39";
- e.inLen = strlen(e.input);
- e.outLen = SHA384_DIGEST_SIZE;
+ e.inLen = XSTRLEN(e.input);
+ e.outLen = WC_SHA384_DIGEST_SIZE;
EVP_MD_CTX_init(&md_ctx);
EVP_DigestInit(&md_ctx, EVP_sha384());
@@ -4492,8 +15721,8 @@ int openssl_test(void)
EVP_DigestUpdate(&md_ctx, e.input, (unsigned long)e.inLen);
EVP_DigestFinal(&md_ctx, hash, 0);
- if (memcmp(hash, e.output, SHA384_DIGEST_SIZE) != 0)
- return -79;
+ if (XMEMCMP(hash, e.output, WC_SHA384_DIGEST_SIZE) != 0)
+ return -8405;
#endif /* WOLFSSL_SHA384 */
@@ -4507,8 +15736,8 @@ int openssl_test(void)
"\x90\x18\x50\x1d\x28\x9e\x49\x00\xf7\xe4\x33\x1b\x99\xde\xc4"
"\xb5\x43\x3a\xc7\xd3\x29\xee\xb6\xdd\x26\x54\x5e\x96\xe5\x5b"
"\x87\x4b\xe9\x09";
- f.inLen = strlen(f.input);
- f.outLen = SHA512_DIGEST_SIZE;
+ f.inLen = XSTRLEN(f.input);
+ f.outLen = WC_SHA512_DIGEST_SIZE;
EVP_MD_CTX_init(&md_ctx);
EVP_DigestInit(&md_ctx, EVP_sha512());
@@ -4516,27 +15745,103 @@ int openssl_test(void)
EVP_DigestUpdate(&md_ctx, f.input, (unsigned long)f.inLen);
EVP_DigestFinal(&md_ctx, hash, 0);
- if (memcmp(hash, f.output, SHA512_DIGEST_SIZE) != 0)
- return -80;
+ if (XMEMCMP(hash, f.output, WC_SHA512_DIGEST_SIZE) != 0)
+ return -8406;
#endif /* WOLFSSL_SHA512 */
+#ifdef WOLFSSL_SHA3
+#ifndef WOLFSSL_NOSHA3_224
+
+ e.input = "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhi"
+ "jklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu";
+ e.output = "\x54\x3e\x68\x68\xe1\x66\x6c\x1a\x64\x36\x30\xdf\x77\x36\x7a\xe5\xa6\x2a\x85\x07\x0a\x51\xc1\x4c\xbf\x66\x5c\xbc";
+ e.inLen = XSTRLEN(e.input);
+ e.outLen = WC_SHA3_224_DIGEST_SIZE;
+
+ EVP_MD_CTX_init(&md_ctx);
+ EVP_DigestInit(&md_ctx, EVP_sha3_224());
+
+ EVP_DigestUpdate(&md_ctx, e.input, (unsigned long)e.inLen);
+ EVP_DigestFinal(&md_ctx, hash, 0);
+
+ if (XMEMCMP(hash, e.output, WC_SHA3_224_DIGEST_SIZE) != 0)
+ return -8407;
+
+#endif /* WOLFSSL_NOSHA3_224 */
+
+
+#ifndef WOLFSSL_NOSHA3_256
+ d.input = "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhi"
+ "jklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu";
+ d.output = "\x91\x6f\x60\x61\xfe\x87\x97\x41\xca\x64\x69\xb4\x39\x71\xdf"
+ "\xdb\x28\xb1\xa3\x2d\xc3\x6c\xb3\x25\x4e\x81\x2b\xe2\x7a\xad"
+ "\x1d\x18";
+ d.inLen = XSTRLEN(d.input);
+ d.outLen = WC_SHA3_256_DIGEST_SIZE;
+
+ EVP_MD_CTX_init(&md_ctx);
+ EVP_DigestInit(&md_ctx, EVP_sha3_256());
+
+ EVP_DigestUpdate(&md_ctx, d.input, (unsigned long)d.inLen);
+ EVP_DigestFinal(&md_ctx, hash, 0);
+
+ if (XMEMCMP(hash, d.output, WC_SHA3_256_DIGEST_SIZE) != 0)
+ return -8408;
+#endif /* WOLFSSL_NOSHA3_256 */
+
+
+ e.input = "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhi"
+ "jklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu";
+ e.output = "\x79\x40\x7d\x3b\x59\x16\xb5\x9c\x3e\x30\xb0\x98\x22\x97\x47\x91\xc3\x13\xfb\x9e\xcc\x84\x9e\x40\x6f\x23\x59\x2d\x04\xf6\x25\xdc\x8c\x70\x9b\x98\xb4\x3b\x38\x52\xb3\x37\x21\x61\x79\xaa\x7f\xc7";
+ e.inLen = XSTRLEN(e.input);
+ e.outLen = WC_SHA3_384_DIGEST_SIZE;
+
+ EVP_MD_CTX_init(&md_ctx);
+ EVP_DigestInit(&md_ctx, EVP_sha3_384());
+
+ EVP_DigestUpdate(&md_ctx, e.input, (unsigned long)e.inLen);
+ EVP_DigestFinal(&md_ctx, hash, 0);
+
+ if (XMEMCMP(hash, e.output, WC_SHA3_384_DIGEST_SIZE) != 0)
+ return -8409;
+
+#ifndef WOLFSSL_NOSHA3_512
+
+ f.input = "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhi"
+ "jklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu";
+ f.output = "\xaf\xeb\xb2\xef\x54\x2e\x65\x79\xc5\x0c\xad\x06\xd2\xe5\x78\xf9\xf8\xdd\x68\x81\xd7\xdc\x82\x4d\x26\x36\x0f\xee\xbf\x18\xa4\xfa\x73\xe3\x26\x11\x22\x94\x8e\xfc\xfd\x49\x2e\x74\xe8\x2e\x21\x89\xed\x0f\xb4\x40\xd1\x87\xf3\x82\x27\x0c\xb4\x55\xf2\x1d\xd1\x85";
+ f.inLen = XSTRLEN(f.input);
+ f.outLen = WC_SHA3_512_DIGEST_SIZE;
+
+ EVP_MD_CTX_init(&md_ctx);
+ EVP_DigestInit(&md_ctx, EVP_sha3_512());
+
+ EVP_DigestUpdate(&md_ctx, f.input, (unsigned long)f.inLen);
+ EVP_DigestFinal(&md_ctx, hash, 0);
+
+ if (XMEMCMP(hash, f.output, WC_SHA3_512_DIGEST_SIZE) != 0)
+ return -8410;
+
+#endif /* WOLFSSL_NOSHA3_512 */
+#endif /* WOLFSSL_SHA3 */
+
#ifndef NO_MD5
if (RAND_bytes(hash, sizeof(hash)) != 1)
- return -73;
+ return -8411;
c.input = "what do ya want for nothing?";
c.output = "\x55\x78\xe8\x48\x4b\xcc\x93\x80\x93\xec\x53\xaf\x22\xd6\x14"
"\x76";
- c.inLen = strlen(c.input);
- c.outLen = MD5_DIGEST_SIZE;
+ c.inLen = XSTRLEN(c.input);
+ c.outLen = WC_MD5_DIGEST_SIZE;
HMAC(EVP_md5(),
"JefeJefeJefeJefe", 16, (byte*)c.input, (int)c.inLen, hash, 0);
- if (memcmp(hash, c.output, MD5_DIGEST_SIZE) != 0)
- return -74;
+ if (XMEMCMP(hash, c.output, WC_MD5_DIGEST_SIZE) != 0)
+ return -8412;
#endif /* NO_MD5 */
@@ -4575,28 +15880,32 @@ int openssl_test(void)
DES_cbc_encrypt(vector, cipher, sizeof(vector), &sched, &iv, DES_ENCRYPT);
DES_cbc_encrypt(cipher, plain, sizeof(vector), &sched, &iv, DES_DECRYPT);
- if (memcmp(plain, vector, sizeof(vector)) != 0)
- return -75;
+ if (XMEMCMP(plain, vector, sizeof(vector)) != 0)
+ return -8413;
- if (memcmp(cipher, verify, sizeof(verify)) != 0)
- return -76;
+ if (XMEMCMP(cipher, verify, sizeof(verify)) != 0)
+ return -8414;
/* test changing iv */
DES_ncbc_encrypt(vector, cipher, 8, &sched, &iv, DES_ENCRYPT);
DES_ncbc_encrypt(vector + 8, cipher + 8, 16, &sched, &iv, DES_ENCRYPT);
- if (memcmp(cipher, verify, sizeof(verify)) != 0)
- return -77;
+ if (XMEMCMP(cipher, verify, sizeof(verify)) != 0)
+ return -8415;
} /* end des test */
#endif /* NO_DES3 */
-#ifndef NO_AES
+#if !defined(NO_AES) && !defined(WOLFCRYPT_ONLY)
+ if (openssl_aes_test() != 0) {
+ return -8416;
+ }
- { /* evp_cipher test */
+#if defined(WOLFSSL_AES_128) && defined(HAVE_AES_CBC)
+ { /* evp_cipher test: EVP_aes_128_cbc */
EVP_CIPHER_CTX ctx;
-
+ int idx, cipherSz, plainSz;
const byte msg[] = { /* "Now is the time for all " w/o trailing 0 */
0x6e,0x6f,0x77,0x20,0x69,0x73,0x20,0x74,
@@ -4607,7 +15916,17 @@ int openssl_test(void)
const byte verify[] =
{
0x95,0x94,0x92,0x57,0x5f,0x42,0x81,0x53,
- 0x2c,0xcc,0x9d,0x46,0x77,0xa2,0x33,0xcb
+ 0x2c,0xcc,0x9d,0x46,0x77,0xa2,0x33,0xcb,
+ 0x3b,0x5d,0x41,0x97,0x94,0x25,0xa4,0xb4,
+ 0xae,0x7b,0x34,0xd0,0x3f,0x0c,0xbc,0x06
+ };
+
+ const byte verify2[] =
+ {
+ 0x95,0x94,0x92,0x57,0x5f,0x42,0x81,0x53,
+ 0x2c,0xcc,0x9d,0x46,0x77,0xa2,0x33,0xcb,
+ 0x7d,0x37,0x7b,0x0b,0x44,0xaa,0xb5,0xf0,
+ 0x5f,0x34,0xb4,0xde,0xb5,0xbd,0x2a,0xbb
};
byte key[] = "0123456789abcdef "; /* align */
@@ -4618,37 +15937,1412 @@ int openssl_test(void)
EVP_CIPHER_CTX_init(&ctx);
if (EVP_CipherInit(&ctx, EVP_aes_128_cbc(), key, iv, 1) == 0)
- return -81;
+ return -8417;
- if (EVP_Cipher(&ctx, cipher, (byte*)msg, 16) == 0)
- return -82;
+ if (EVP_CipherUpdate(&ctx, cipher, &idx, (byte*)msg, sizeof(msg)) == 0)
+ return -8418;
- if (memcmp(cipher, verify, AES_BLOCK_SIZE))
- return -83;
+ cipherSz = idx;
+ if (EVP_CipherFinal(&ctx, cipher + cipherSz, &idx) == 0)
+ return -8419;
+ cipherSz += idx;
+
+ if ((cipherSz != (int)sizeof(verify)) &&
+ XMEMCMP(cipher, verify, cipherSz))
+ return -8420;
EVP_CIPHER_CTX_init(&ctx);
if (EVP_CipherInit(&ctx, EVP_aes_128_cbc(), key, iv, 0) == 0)
- return -84;
+ return -8421;
- if (EVP_Cipher(&ctx, plain, cipher, 16) == 0)
- return -85;
+ /* check partial decrypt (not enough padding for full block) */
+ if (EVP_CipherUpdate(&ctx, plain, &idx, cipher, 1) == 0)
+ return -8422;
+
+ plainSz = idx;
+ if (EVP_CipherFinal(&ctx, plain + plainSz, &idx) != 0)
+ return -8423;
+
+ EVP_CIPHER_CTX_init(&ctx);
+ if (EVP_CipherInit(&ctx, EVP_aes_128_cbc(), key, iv, 0) == 0)
+ return -8424;
+
+ if (EVP_CipherUpdate(&ctx, plain, &idx, cipher, cipherSz) == 0)
+ return -8425;
+
+ plainSz = idx;
+ if (EVP_CipherFinal(&ctx, plain + plainSz, &idx) == 0)
+ return -8426;
+ plainSz += idx;
+
+ if ((plainSz != sizeof(msg)) || XMEMCMP(plain, msg, sizeof(msg)))
+ return -8427;
+
+ EVP_CIPHER_CTX_init(&ctx);
+ if (EVP_CipherInit(&ctx, EVP_aes_128_cbc(), key, iv, 1) == 0)
+ return -8428;
+
+ if (EVP_CipherUpdate(&ctx, cipher, &idx, msg, AES_BLOCK_SIZE) == 0)
+ return -8429;
+
+ cipherSz = idx;
+ if (EVP_CipherFinal(&ctx, cipher + cipherSz, &idx) == 0)
+ return -8430;
+ cipherSz += idx;
+
+ if ((cipherSz != (int)sizeof(verify2)) ||
+ XMEMCMP(cipher, verify2, cipherSz))
+ return -8431;
+
+ } /* end evp_cipher test: EVP_aes_128_cbc*/
+#endif /* WOLFSSL_AES_128 && HAVE_AES_CBC */
+
+#if defined(HAVE_AES_ECB) && defined(WOLFSSL_AES_256)
+ { /* evp_cipher test: EVP_aes_256_ecb*/
+ EVP_CIPHER_CTX ctx;
+ const byte msg[] =
+ {
+ 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
+ 0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a
+ };
+
+ const byte verify[] =
+ {
+ 0xf3,0xee,0xd1,0xbd,0xb5,0xd2,0xa0,0x3c,
+ 0x06,0x4b,0x5a,0x7e,0x3d,0xb1,0x81,0xf8
+ };
+
+ const byte key[] =
+ {
+ 0x60,0x3d,0xeb,0x10,0x15,0xca,0x71,0xbe,
+ 0x2b,0x73,0xae,0xf0,0x85,0x7d,0x77,0x81,
+ 0x1f,0x35,0x2c,0x07,0x3b,0x61,0x08,0xd7,
+ 0x2d,0x98,0x10,0xa3,0x09,0x14,0xdf,0xf4
+ };
- if (memcmp(plain, msg, AES_BLOCK_SIZE))
- return -86;
+ byte cipher[AES_BLOCK_SIZE * 4];
+ byte plain [AES_BLOCK_SIZE * 4];
+
+ EVP_CIPHER_CTX_init(&ctx);
+ if (EVP_CipherInit(&ctx, EVP_aes_256_ecb(), (unsigned char*)key,
+ NULL, 1) == 0)
+ return -8432;
+
+ if (EVP_Cipher(&ctx, cipher, (byte*)msg, 16) == 0)
+ return -8433;
+
+ if (XMEMCMP(cipher, verify, AES_BLOCK_SIZE))
+ return -8434;
+
+ EVP_CIPHER_CTX_init(&ctx);
+ if (EVP_CipherInit(&ctx, EVP_aes_256_ecb(), (unsigned char*)key,
+ NULL, 0) == 0)
+ return -8435;
+
+ if (EVP_Cipher(&ctx, plain, cipher, 16) == 0)
+ return -8436;
+
+ if (XMEMCMP(plain, msg, AES_BLOCK_SIZE))
+ return -8437;
} /* end evp_cipher test */
+#endif /* HAVE_AES_ECB && WOLFSSL_AES_128 */
+
+#define OPENSSL_TEST_ERROR (-10000)
+
+
+#if defined(WOLFSSL_AES_DIRECT) && defined(WOLFSSL_AES_256)
+ /* enable HAVE_AES_DECRYPT for AES_encrypt/decrypt */
+{
+
+ /* Test: AES_encrypt/decrypt/set Key */
+ AES_KEY enc;
+#ifdef HAVE_AES_DECRYPT
+ AES_KEY dec;
+#endif
+
+ const byte msg[] =
+ {
+ 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
+ 0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a
+ };
+
+ const byte verify[] =
+ {
+ 0xf3,0xee,0xd1,0xbd,0xb5,0xd2,0xa0,0x3c,
+ 0x06,0x4b,0x5a,0x7e,0x3d,0xb1,0x81,0xf8
+ };
+
+ const byte key[] =
+ {
+ 0x60,0x3d,0xeb,0x10,0x15,0xca,0x71,0xbe,
+ 0x2b,0x73,0xae,0xf0,0x85,0x7d,0x77,0x81,
+ 0x1f,0x35,0x2c,0x07,0x3b,0x61,0x08,0xd7,
+ 0x2d,0x98,0x10,0xa3,0x09,0x14,0xdf,0xf4
+ };
+
+ byte plain[sizeof(msg)];
+ byte cipher[sizeof(msg)];
+
+ printf("openSSL extra test\n") ;
+
+
+ AES_set_encrypt_key(key, sizeof(key)*8, &enc);
+ AES_set_decrypt_key(key, sizeof(key)*8, &dec);
+
+ AES_encrypt(msg, cipher, &enc);
+
+#ifdef HAVE_AES_DECRYPT
+ AES_decrypt(cipher, plain, &dec);
+ if (XMEMCMP(plain, msg, AES_BLOCK_SIZE))
+ return OPENSSL_TEST_ERROR-60;
+#endif /* HAVE_AES_DECRYPT */
+
+ if (XMEMCMP(cipher, verify, AES_BLOCK_SIZE))
+ return OPENSSL_TEST_ERROR-61;
+}
+
+#endif /* WOLFSSL_AES_DIRECT && WOLFSSL_AES_256 */
+
+/* EVP_Cipher with EVP_aes_xxx_ctr() */
+#ifdef WOLFSSL_AES_COUNTER
+{
+ byte plainBuff [64];
+ byte cipherBuff[64];
+
+#ifdef WOLFSSL_AES_128
+ const byte ctrKey[] =
+ {
+ 0x2b,0x7e,0x15,0x16,0x28,0xae,0xd2,0xa6,
+ 0xab,0xf7,0x15,0x88,0x09,0xcf,0x4f,0x3c
+ };
+
+ const byte ctrIv[] =
+ {
+ 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
+ 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
+ };
+
+ const byte ctrPlain[] =
+ {
+ 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
+ 0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a,
+ 0xae,0x2d,0x8a,0x57,0x1e,0x03,0xac,0x9c,
+ 0x9e,0xb7,0x6f,0xac,0x45,0xaf,0x8e,0x51,
+ 0x30,0xc8,0x1c,0x46,0xa3,0x5c,0xe4,0x11,
+ 0xe5,0xfb,0xc1,0x19,0x1a,0x0a,0x52,0xef,
+ 0xf6,0x9f,0x24,0x45,0xdf,0x4f,0x9b,0x17,
+ 0xad,0x2b,0x41,0x7b,0xe6,0x6c,0x37,0x10
+ };
+
+ const byte ctrCipher[] =
+ {
+ 0x87,0x4d,0x61,0x91,0xb6,0x20,0xe3,0x26,
+ 0x1b,0xef,0x68,0x64,0x99,0x0d,0xb6,0xce,
+ 0x98,0x06,0xf6,0x6b,0x79,0x70,0xfd,0xff,
+ 0x86,0x17,0x18,0x7b,0xb9,0xff,0xfd,0xff,
+ 0x5a,0xe4,0xdf,0x3e,0xdb,0xd5,0xd3,0x5e,
+ 0x5b,0x4f,0x09,0x02,0x0d,0xb0,0x3e,0xab,
+ 0x1e,0x03,0x1d,0xda,0x2f,0xbe,0x03,0xd1,
+ 0x79,0x21,0x70,0xa0,0xf3,0x00,0x9c,0xee
+ };
+
+ const byte oddCipher[] =
+ {
+ 0xb9,0xd7,0xcb,0x08,0xb0,0xe1,0x7b,0xa0,
+ 0xc2
+ };
+#endif /* WOLFSSL_AES_128 */
+
+#ifdef WOLFSSL_AES_192
+ /* test vector from "Recommendation for Block Cipher Modes of Operation"
+ * NIST Special Publication 800-38A */
+ const byte ctr192Key[] =
+ {
+ 0x8e,0x73,0xb0,0xf7,0xda,0x0e,0x64,0x52,
+ 0xc8,0x10,0xf3,0x2b,0x80,0x90,0x79,0xe5,
+ 0x62,0xf8,0xea,0xd2,0x52,0x2c,0x6b,0x7b
+ };
+
+ const byte ctr192Iv[] =
+ {
+ 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
+ 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
+ };
+
+
+ const byte ctr192Plain[] =
+ {
+ 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
+ 0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a
+ };
+
+ const byte ctr192Cipher[] =
+ {
+ 0x1a,0xbc,0x93,0x24,0x17,0x52,0x1c,0xa2,
+ 0x4f,0x2b,0x04,0x59,0xfe,0x7e,0x6e,0x0b
+ };
+#endif /* WOLFSSL_AES_192 */
+
+#ifdef WOLFSSL_AES_256
+ /* test vector from "Recommendation for Block Cipher Modes of Operation"
+ * NIST Special Publication 800-38A */
+ const byte ctr256Key[] =
+ {
+ 0x60,0x3d,0xeb,0x10,0x15,0xca,0x71,0xbe,
+ 0x2b,0x73,0xae,0xf0,0x85,0x7d,0x77,0x81,
+ 0x1f,0x35,0x2c,0x07,0x3b,0x61,0x08,0xd7,
+ 0x2d,0x98,0x10,0xa3,0x09,0x14,0xdf,0xf4
+ };
+
+ const byte ctr256Iv[] =
+ {
+ 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
+ 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
+ };
+
+
+ const byte ctr256Plain[] =
+ {
+ 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
+ 0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a
+ };
+
+ const byte ctr256Cipher[] =
+ {
+ 0x60,0x1e,0xc3,0x13,0x77,0x57,0x89,0xa5,
+ 0xb7,0xa7,0xf5,0x04,0xbb,0xf3,0xd2,0x28
+ };
+#endif /* WOLFSSL_AES_256 */
+
+ EVP_CIPHER_CTX en;
+ EVP_CIPHER_CTX de;
+#ifdef WOLFSSL_AES_128
+ EVP_CIPHER_CTX *p_en;
+ EVP_CIPHER_CTX *p_de;
+
+ EVP_CIPHER_CTX_init(&en);
+ if (EVP_CipherInit(&en, EVP_aes_128_ctr(),
+ (unsigned char*)ctrKey, (unsigned char*)ctrIv, 0) == 0)
+ return -8438;
+ if (EVP_Cipher(&en, (byte*)cipherBuff, (byte*)ctrPlain,
+ AES_BLOCK_SIZE*4) == 0)
+ return -8439;
+ EVP_CIPHER_CTX_init(&de);
+ if (EVP_CipherInit(&de, EVP_aes_128_ctr(),
+ (unsigned char*)ctrKey, (unsigned char*)ctrIv, 0) == 0)
+ return -8440;
+
+ if (EVP_Cipher(&de, (byte*)plainBuff, (byte*)cipherBuff,
+ AES_BLOCK_SIZE*4) == 0)
+ return -8441;
+
+ if (XMEMCMP(cipherBuff, ctrCipher, AES_BLOCK_SIZE*4))
+ return -8442;
+ if (XMEMCMP(plainBuff, ctrPlain, AES_BLOCK_SIZE*4))
+ return -8443;
+
+ p_en = wolfSSL_EVP_CIPHER_CTX_new();
+ if(p_en == NULL)return -8444;
+ p_de = wolfSSL_EVP_CIPHER_CTX_new();
+ if(p_de == NULL)return -8445;
+
+ if (EVP_CipherInit(p_en, EVP_aes_128_ctr(),
+ (unsigned char*)ctrKey, (unsigned char*)ctrIv, 0) == 0)
+ return -8446;
+ if (EVP_Cipher(p_en, (byte*)cipherBuff, (byte*)ctrPlain,
+ AES_BLOCK_SIZE*4) == 0)
+ return -8447;
+ if (EVP_CipherInit(p_de, EVP_aes_128_ctr(),
+ (unsigned char*)ctrKey, (unsigned char*)ctrIv, 0) == 0)
+ return -8448;
+
+ if (EVP_Cipher(p_de, (byte*)plainBuff, (byte*)cipherBuff,
+ AES_BLOCK_SIZE*4) == 0)
+ return -8449;
+
+ wolfSSL_EVP_CIPHER_CTX_free(p_en);
+ wolfSSL_EVP_CIPHER_CTX_free(p_de);
+
+ if (XMEMCMP(cipherBuff, ctrCipher, AES_BLOCK_SIZE*4))
+ return -8450;
+ if (XMEMCMP(plainBuff, ctrPlain, AES_BLOCK_SIZE*4))
+ return -8451;
+
+ EVP_CIPHER_CTX_init(&en);
+ if (EVP_CipherInit(&en, EVP_aes_128_ctr(),
+ (unsigned char*)ctrKey, (unsigned char*)ctrIv, 0) == 0)
+ return -8452;
+ if (EVP_Cipher(&en, (byte*)cipherBuff, (byte*)ctrPlain, 9) == 0)
+ return -8453;
+
+ EVP_CIPHER_CTX_init(&de);
+ if (EVP_CipherInit(&de, EVP_aes_128_ctr(),
+ (unsigned char*)ctrKey, (unsigned char*)ctrIv, 0) == 0)
+ return -8454;
+
+ if (EVP_Cipher(&de, (byte*)plainBuff, (byte*)cipherBuff, 9) == 0)
+ return -8455;
+
+ if (XMEMCMP(plainBuff, ctrPlain, 9))
+ return -8456;
+ if (XMEMCMP(cipherBuff, ctrCipher, 9))
+ return -8457;
+
+ if (EVP_Cipher(&en, (byte*)cipherBuff, (byte*)ctrPlain, 9) == 0)
+ return -8458;
+ if (EVP_Cipher(&de, (byte*)plainBuff, (byte*)cipherBuff, 9) == 0)
+ return -8459;
+
+ if (XMEMCMP(plainBuff, ctrPlain, 9))
+ return -8460;
+ if (XMEMCMP(cipherBuff, oddCipher, 9))
+ return -8461;
+#endif /* WOLFSSL_AES_128 */
+
+#ifdef WOLFSSL_AES_192
+ EVP_CIPHER_CTX_init(&en);
+ if (EVP_CipherInit(&en, EVP_aes_192_ctr(),
+ (unsigned char*)ctr192Key, (unsigned char*)ctr192Iv, 0) == 0)
+ return -8462;
+ if (EVP_Cipher(&en, (byte*)cipherBuff, (byte*)ctr192Plain,
+ AES_BLOCK_SIZE) == 0)
+ return -8463;
+ EVP_CIPHER_CTX_init(&de);
+ if (EVP_CipherInit(&de, EVP_aes_192_ctr(),
+ (unsigned char*)ctr192Key, (unsigned char*)ctr192Iv, 0) == 0)
+ return -8464;
+
+ XMEMSET(plainBuff, 0, sizeof(plainBuff));
+ if (EVP_Cipher(&de, (byte*)plainBuff, (byte*)cipherBuff,
+ AES_BLOCK_SIZE) == 0)
+ return -8465;
+
+ if (XMEMCMP(plainBuff, ctr192Plain, sizeof(ctr192Plain)))
+ return -8466;
+ if (XMEMCMP(ctr192Cipher, cipherBuff, sizeof(ctr192Cipher)))
+ return -8467;
+#endif /* WOLFSSL_AES_192 */
+
+#ifdef WOLFSSL_AES_256
+ EVP_CIPHER_CTX_init(&en);
+ if (EVP_CipherInit(&en, EVP_aes_256_ctr(),
+ (unsigned char*)ctr256Key, (unsigned char*)ctr256Iv, 0) == 0)
+ return -8468;
+ if (EVP_Cipher(&en, (byte*)cipherBuff, (byte*)ctr256Plain,
+ AES_BLOCK_SIZE) == 0)
+ return -8469;
+ EVP_CIPHER_CTX_init(&de);
+ if (EVP_CipherInit(&de, EVP_aes_256_ctr(),
+ (unsigned char*)ctr256Key, (unsigned char*)ctr256Iv, 0) == 0)
+ return -8470;
+
+ XMEMSET(plainBuff, 0, sizeof(plainBuff));
+ if (EVP_Cipher(&de, (byte*)plainBuff, (byte*)cipherBuff,
+ AES_BLOCK_SIZE) == 0)
+ return -8471;
+
+ if (XMEMCMP(plainBuff, ctr256Plain, sizeof(ctr256Plain)))
+ return -8472;
+ if (XMEMCMP(ctr256Cipher, cipherBuff, sizeof(ctr256Cipher)))
+ return -8473;
+#endif /* WOLFSSL_AES_256 */
+}
+#endif /* HAVE_AES_COUNTER */
+
+#if defined(HAVE_AES_CBC) && defined(WOLFSSL_AES_128)
+{
+ /* EVP_CipherUpdate test */
-#endif /* NO_AES */
+ const byte cbcPlain[] =
+ {
+ 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
+ 0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a,
+ 0xae,0x2d,0x8a,0x57,0x1e,0x03,0xac,0x9c,
+ 0x9e,0xb7,0x6f,0xac,0x45,0xaf,0x8e,0x51,
+ 0x30,0xc8,0x1c,0x46,0xa3,0x5c,0xe4,0x11,
+ 0xe5,0xfb,0xc1,0x19,0x1a,0x0a,0x52,0xef,
+ 0xf6,0x9f,0x24,0x45,0xdf,0x4f,0x9b,0x17,
+ 0xad,0x2b,0x41,0x7b,0xe6,0x6c,0x37,0x10
+ };
+
+ byte key[] = "0123456789abcdef "; /* align */
+ byte iv[] = "1234567890abcdef "; /* align */
+
+ byte cipher[AES_BLOCK_SIZE * 4];
+ byte plain [AES_BLOCK_SIZE * 4];
+ EVP_CIPHER_CTX en;
+ EVP_CIPHER_CTX de;
+ int outlen ;
+ int total = 0;
+
+ EVP_CIPHER_CTX_init(&en);
+ if (EVP_CipherInit(&en, EVP_aes_128_cbc(),
+ (unsigned char*)key, (unsigned char*)iv, 1) == 0)
+ return -8474;
+ /* openSSL compatibility, if(inlen == 0)return 1; */
+ if (EVP_CipherUpdate(&en, (byte*)cipher, &outlen,
+ (byte*)cbcPlain, 0) != 1)
+ return -8475;
+
+ EVP_CIPHER_CTX_init(&en);
+ if (EVP_CipherInit(&en, EVP_aes_128_cbc(),
+ (unsigned char*)key, (unsigned char*)iv, 1) == 0)
+ return -8476;
+ if (EVP_CipherUpdate(&en, (byte*)cipher, &outlen,
+ (byte*)cbcPlain, 9) == 0)
+ return -8477;
+ if(outlen != 0)
+ return -8478;
+ total += outlen;
+
+ if (EVP_CipherUpdate(&en, (byte*)&cipher[total], &outlen,
+ (byte*)&cbcPlain[9] , 9) == 0)
+ return -8479;
+ if(outlen != 16)
+ return -8480;
+ total += outlen;
+
+ if (EVP_CipherFinal(&en, (byte*)&cipher[total], &outlen) == 0)
+ return -8481;
+ if(outlen != 16)
+ return -8482;
+ total += outlen;
+ if(total != 32)
+ return -8483;
+
+ total = 0;
+ EVP_CIPHER_CTX_init(&de);
+ if (EVP_CipherInit(&de, EVP_aes_128_cbc(),
+ (unsigned char*)key, (unsigned char*)iv, 0) == 0)
+ return -8484;
+
+ if (EVP_CipherUpdate(&de, (byte*)plain, &outlen, (byte*)cipher, 6) == 0)
+ return -8485;
+ if(outlen != 0)
+ return -8486;
+ total += outlen;
+
+ if (EVP_CipherUpdate(&de, (byte*)&plain[total], &outlen,
+ (byte*)&cipher[6], 12) == 0)
+ return -8487;
+ if(outlen != 0)
+ total += outlen;
+
+ if (EVP_CipherUpdate(&de, (byte*)&plain[total], &outlen,
+ (byte*)&cipher[6+12], 14) == 0)
+ return -8488;
+ if(outlen != 16)
+ return -8489;
+ total += outlen;
+
+ if (EVP_CipherFinal(&de, (byte*)&plain[total], &outlen) == 0)
+ return -8490;
+ if(outlen != 2)
+ return -8491;
+ total += outlen;
+
+ if(total != 18)
+ return -8492;
+
+ if (XMEMCMP(plain, cbcPlain, 18))
+ return -8493;
+
+ total = 0;
+ EVP_CIPHER_CTX_init(&en);
+ if (EVP_EncryptInit(&en, EVP_aes_128_cbc(),
+ (unsigned char*)key, (unsigned char*)iv) == 0)
+ return -8494;
+ if (EVP_CipherUpdate(&en, (byte*)cipher, &outlen, (byte*)cbcPlain, 9) == 0)
+ return -8495;
+ if(outlen != 0)
+ return -8496;
+ total += outlen;
+
+ if (EVP_CipherUpdate(&en, (byte*)&cipher[total], &outlen, (byte*)&cbcPlain[9] , 9) == 0)
+ return -8497;
+ if(outlen != 16)
+ return -8498;
+ total += outlen;
+
+ if (EVP_EncryptFinal(&en, (byte*)&cipher[total], &outlen) == 0)
+ return -8499;
+ if(outlen != 16)
+ return -8500;
+ total += outlen;
+ if(total != 32)
+ return 3438;
+
+ total = 0;
+ EVP_CIPHER_CTX_init(&de);
+ if (EVP_DecryptInit(&de, EVP_aes_128_cbc(),
+ (unsigned char*)key, (unsigned char*)iv) == 0)
+ return -8501;
+
+ if (EVP_CipherUpdate(&de, (byte*)plain, &outlen, (byte*)cipher, 6) == 0)
+ return -8502;
+ if(outlen != 0)
+ return -8503;
+ total += outlen;
+
+ if (EVP_CipherUpdate(&de, (byte*)&plain[total], &outlen, (byte*)&cipher[6], 12) == 0)
+ return -8504;
+ if(outlen != 0)
+ total += outlen;
+
+ if (EVP_CipherUpdate(&de, (byte*)&plain[total], &outlen, (byte*)&cipher[6+12], 14) == 0)
+ return -8505;
+ if(outlen != 16)
+ return -8506;
+ total += outlen;
+
+ if (EVP_DecryptFinal(&de, (byte*)&plain[total], &outlen) == 0)
+ return -8507;
+ if(outlen != 2)
+ return -8508;
+ total += outlen;
+
+ if(total != 18)
+ return 3447;
+
+ if (XMEMCMP(plain, cbcPlain, 18))
+ return -8509;
+
+ if (EVP_CIPHER_key_length(NULL) != 0)
+ return -8510;
+
+ if (EVP_CIPHER_key_length(EVP_aes_128_cbc()) != 16)
+ return -8511;
+
+ if (EVP_CIPHER_CTX_mode(NULL) != 0)
+ return -8512;
+
+ if (EVP_CIPHER_CTX_mode(&en) != (en.flags & WOLFSSL_EVP_CIPH_MODE))
+ return -8513;
+
+ EVP_CIPHER_CTX_init(&en);
+ if (EVP_CipherInit_ex(&en, EVP_aes_128_cbc(), NULL,
+ (unsigned char*)key, (unsigned char*)iv, 0) == 0)
+ return -8514;
+
+ EVP_CIPHER_CTX_init(&en);
+ if (EVP_EncryptInit_ex(&en, EVP_aes_128_cbc(), NULL,
+ (unsigned char*)key, (unsigned char*)iv) == 0)
+ return -8515;
+
+ if (wolfSSL_EVP_EncryptFinal_ex(NULL, NULL, NULL) != WOLFSSL_FAILURE)
+ return -8516;
+
+ if (wolfSSL_EVP_EncryptFinal(NULL, NULL, NULL) != WOLFSSL_FAILURE)
+ return -8517;
+
+ EVP_CIPHER_CTX_init(&de);
+ if (EVP_DecryptInit_ex(&de, EVP_aes_128_cbc(), NULL,
+ (unsigned char*)key, (unsigned char*)iv) == 0)
+ return -8518;
+
+ if (wolfSSL_EVP_DecryptFinal(NULL, NULL, NULL) != WOLFSSL_FAILURE)
+ return -8519;
+
+ if (wolfSSL_EVP_DecryptFinal_ex(NULL, NULL, NULL) != WOLFSSL_FAILURE)
+ return -8520;
+
+ if (EVP_CIPHER_CTX_block_size(NULL) != BAD_FUNC_ARG)
+ return -8521;
+
+ EVP_CIPHER_CTX_init(&en);
+ EVP_EncryptInit_ex(&en, EVP_aes_128_cbc(), NULL,
+ (unsigned char*)key, (unsigned char*)iv);
+ if (EVP_CIPHER_CTX_block_size(&en) != en.block_size)
+ return -8522;
+
+ if (EVP_CIPHER_block_size(NULL) != BAD_FUNC_ARG)
+ return -8523;
+
+ if (EVP_CIPHER_block_size(EVP_aes_128_cbc()) != AES_BLOCK_SIZE)
+ return -8524;
+
+ if (WOLFSSL_EVP_CIPHER_mode(NULL) != 0)
+ return -8525;
+
+ if (EVP_CIPHER_flags(EVP_aes_128_cbc()) != WOLFSSL_EVP_CIPH_CBC_MODE)
+ return -8526;
+
+ EVP_CIPHER_CTX_clear_flags(&en, 0xFFFFFFFF);
+ EVP_CIPHER_CTX_set_flags(&en, 42);
+ if (en.flags != 42)
+ return -8527;
+
+ if (EVP_CIPHER_CTX_set_padding(NULL, 0) != BAD_FUNC_ARG)
+ return -8528;
+ if (EVP_CIPHER_CTX_set_padding(&en, 0) != WOLFSSL_SUCCESS)
+ return -8529;
+ if (EVP_CIPHER_CTX_set_padding(&en, 1) != WOLFSSL_SUCCESS)
+ return -8530;
+
+ }
+#endif /* WOLFSSL_AES_128 && HAVE_AES_CBC */
+#endif /* ifndef NO_AES */
return 0;
}
+int openSSL_evpMD_test(void)
+{
+ int ret = 0;
+#if !defined(NO_SHA256) && !defined(NO_SHA)
+ WOLFSSL_EVP_MD_CTX* ctx;
+ WOLFSSL_EVP_MD_CTX* ctx2;
+
+ ctx = EVP_MD_CTX_create();
+ ctx2 = EVP_MD_CTX_create();
+
+ ret = EVP_DigestInit(ctx, EVP_sha256());
+ if (ret != SSL_SUCCESS) {
+ ret = -8600;
+ goto openSSL_evpMD_test_done;
+ }
+
+ ret = EVP_MD_CTX_copy(ctx2, ctx);
+ if (ret != SSL_SUCCESS) {
+ ret = -8601;
+ goto openSSL_evpMD_test_done;
+ }
+
+ if (EVP_MD_type(EVP_sha256()) != EVP_MD_CTX_type(ctx2)) {
+ ret = -8602;
+ goto openSSL_evpMD_test_done;
+ }
+
+ ret = EVP_DigestInit(ctx, EVP_sha1());
+ if (ret != SSL_SUCCESS) {
+ ret = -8603;
+ goto openSSL_evpMD_test_done;
+ }
+
+ if (EVP_MD_type(EVP_sha256()) != EVP_MD_CTX_type(ctx2)) {
+ ret = -8604;
+ goto openSSL_evpMD_test_done;
+ }
+
+ ret = EVP_MD_CTX_copy_ex(ctx2, ctx);
+ if (ret != SSL_SUCCESS) {
+ ret = -8605;
+ goto openSSL_evpMD_test_done;
+ }
+
+ if (EVP_MD_type(EVP_sha256()) == EVP_MD_CTX_type(ctx2)) {
+ ret = -8606;
+ goto openSSL_evpMD_test_done;
+ }
+
+ if (EVP_MD_type(EVP_sha1()) != EVP_MD_CTX_type(ctx2)) {
+ ret = -8607;
+ goto openSSL_evpMD_test_done;
+ }
+
+ if (EVP_DigestInit_ex(ctx, EVP_sha1(), NULL) != SSL_SUCCESS) {
+ ret = -8608;
+ goto openSSL_evpMD_test_done;
+ }
+
+ if (EVP_add_digest(NULL) != 0) {
+ ret = -8609;
+ goto openSSL_evpMD_test_done;
+ }
+
+ if (wolfSSL_EVP_add_cipher(NULL) != 0) {
+ ret = -8610;
+ goto openSSL_evpMD_test_done;
+ }
+
+ ret = 0; /* got to success state without jumping to end with a fail */
+
+openSSL_evpMD_test_done:
+ EVP_MD_CTX_destroy(ctx);
+ EVP_MD_CTX_destroy(ctx2);
+#endif /* NO_SHA256 */
+
+ return ret;
+}
+
+#ifdef DEBUG_SIGN
+static void show(const char *title, const char *p, unsigned int s) {
+ char* i;
+ printf("%s: ", title);
+ for (i = p;
+ i < p + s;
+ printf("%c", *i), i++);
+ printf("\n");
+}
+#else
+#define show(a,b,c)
+#endif
+
+#define FOURK_BUFF 4096
+
+#define ERR_BASE_PKEY -5000
+int openssl_pkey0_test(void)
+{
+ int ret = 0;
+#if !defined(NO_RSA) && !defined(HAVE_USER_RSA) && !defined(NO_SHA)
+ byte* prvTmp;
+ byte* pubTmp;
+ int prvBytes;
+ int pubBytes;
+ RSA *prvRsa = NULL;
+ RSA *pubRsa = NULL;
+ EVP_PKEY *prvPkey = NULL;
+ EVP_PKEY *pubPkey = NULL;
+ EVP_PKEY_CTX *enc = NULL;
+ EVP_PKEY_CTX *dec = NULL;
+
+ byte in[] = "Everyone gets Friday off.";
+ byte out[256];
+ size_t outlen;
+ size_t keySz;
+ byte plain[256];
+#if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048)
+ XFILE keyFile;
+ XFILE keypubFile;
+ char cliKey[] = "./certs/client-key.der";
+ char cliKeypub[] = "./certs/client-keyPub.der";
+
+#endif
+
+ prvTmp = (byte*)XMALLOC(FOURK_BUFF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (prvTmp == NULL)
+ return ERR_BASE_PKEY-1;
+ pubTmp = (byte*)XMALLOC(FOURK_BUFF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (pubTmp == NULL) {
+ XFREE(prvTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ return ERR_BASE_PKEY-2;
+ }
+
+#ifdef USE_CERT_BUFFERS_1024
+ XMEMCPY(prvTmp, client_key_der_1024, sizeof_client_key_der_1024);
+ prvBytes = sizeof_client_key_der_1024;
+ XMEMCPY(pubTmp, client_keypub_der_1024, sizeof_client_keypub_der_1024);
+ pubBytes = sizeof_client_keypub_der_1024;
+#elif defined(USE_CERT_BUFFERS_2048)
+ XMEMCPY(prvTmp, client_key_der_2048, sizeof_client_key_der_2048);
+ prvBytes = sizeof_client_key_der_2048;
+ XMEMCPY(pubTmp, client_keypub_der_2048, sizeof_client_keypub_der_2048);
+ pubBytes = sizeof_client_keypub_der_2048;
+#else
+ keyFile = XFOPEN(cliKey, "rb");
+ if (!keyFile) {
+ XFREE(prvTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(pubTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ err_sys("can't open ./certs/client-key.der, "
+ "Please run from wolfSSL home dir", ERR_BASE_PKEY-3);
+ return ERR_BASE_PKEY-3;
+ }
+ prvBytes = (int)XFREAD(prvTmp, 1, (int)FOURK_BUFF, keyFile);
+ XFCLOSE(keyFile);
+ keypubFile = XFOPEN(cliKeypub, "rb");
+ if (!keypubFile) {
+ XFREE(prvTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(pubTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ err_sys("can't open ./certs/client-cert.der, "
+ "Please run from wolfSSL home dir", -4);
+ return ERR_BASE_PKEY-4;
+ }
+ pubBytes = (int)XFREAD(pubTmp, 1, (int)FOURK_BUFF, keypubFile);
+ XFCLOSE(keypubFile);
+#endif /* USE_CERT_BUFFERS */
+
+ prvRsa = wolfSSL_RSA_new();
+ pubRsa = wolfSSL_RSA_new();
+ if((prvRsa == NULL) || (pubRsa == NULL)){
+ printf("error with RSA_new\n");
+ ret = ERR_BASE_PKEY-10;
+ goto openssl_pkey0_test_done;
+ }
+
+ ret = wolfSSL_RSA_LoadDer_ex(prvRsa, prvTmp, prvBytes, WOLFSSL_RSA_LOAD_PRIVATE);
+ if(ret != SSL_SUCCESS){
+ printf("error with RSA_LoadDer_ex\n");
+ ret = ERR_BASE_PKEY-11;
+ goto openssl_pkey0_test_done;
+ }
+
+ ret = wolfSSL_RSA_LoadDer_ex(pubRsa, pubTmp, pubBytes, WOLFSSL_RSA_LOAD_PUBLIC);
+ if(ret != SSL_SUCCESS){
+ printf("error with RSA_LoadDer_ex\n");
+ ret = ERR_BASE_PKEY-12;
+ goto openssl_pkey0_test_done;
+ }
+ keySz = (size_t)RSA_size(pubRsa);
+
+ prvPkey = wolfSSL_EVP_PKEY_new();
+ pubPkey = wolfSSL_EVP_PKEY_new();
+ if((prvPkey == NULL) || (pubPkey == NULL)){
+ printf("error with PKEY_new\n");
+ ret = ERR_BASE_PKEY-13;
+ goto openssl_pkey0_test_done;
+ }
+ ret = wolfSSL_EVP_PKEY_set1_RSA(prvPkey, prvRsa);
+ ret += wolfSSL_EVP_PKEY_set1_RSA(pubPkey, pubRsa);
+ if(ret != 2){
+ printf("error with PKEY_set1_RSA\n");
+ ret = ERR_BASE_PKEY-14;
+ goto openssl_pkey0_test_done;
+ }
+
+ dec = EVP_PKEY_CTX_new(prvPkey, NULL);
+ enc = EVP_PKEY_CTX_new(pubPkey, NULL);
+ if((dec == NULL)||(enc==NULL)){
+ printf("error with EVP_PKEY_CTX_new\n");
+ ret = ERR_BASE_PKEY-15;
+ goto openssl_pkey0_test_done;
+ }
+
+ ret = EVP_PKEY_decrypt_init(dec);
+ if (ret != 1) {
+ printf("error with decrypt init\n");
+ ret = ERR_BASE_PKEY-16;
+ goto openssl_pkey0_test_done;
+ }
+ ret = EVP_PKEY_encrypt_init(enc);
+ if (ret != 1) {
+ printf("error with encrypt init\n");
+ ret = ERR_BASE_PKEY-17;
+ goto openssl_pkey0_test_done;
+ }
+ XMEMSET(out, 0, sizeof(out));
+ ret = EVP_PKEY_encrypt(enc, out, &outlen, in, sizeof(in));
+ if (ret != 1) {
+ printf("error encrypting msg\n");
+ ret = ERR_BASE_PKEY-18;
+ goto openssl_pkey0_test_done;
+ }
+
+ show("encrypted msg", out, outlen);
+
+ XMEMSET(plain, 0, sizeof(plain));
+ ret = EVP_PKEY_decrypt(dec, plain, &outlen, out, keySz);
+ if (ret != 1) {
+ printf("error decrypting msg\n");
+ ret = ERR_BASE_PKEY-19;
+ goto openssl_pkey0_test_done;
+ }
+ show("decrypted msg", plain, outlen);
+
+ /* RSA_PKCS1_OAEP_PADDING test */
+ ret = EVP_PKEY_decrypt_init(dec);
+ if (ret != 1) {
+ printf("error with decrypt init\n");
+ ret = ERR_BASE_PKEY-30;
+ goto openssl_pkey0_test_done;
+ }
+ ret = EVP_PKEY_encrypt_init(enc);
+ if (ret != 1) {
+ printf("error with encrypt init\n");
+ ret = ERR_BASE_PKEY-31;
+ goto openssl_pkey0_test_done;
+ }
+
+ if (EVP_PKEY_CTX_set_rsa_padding(dec, RSA_PKCS1_PADDING) <= 0) {
+ printf("first set rsa padding error\n");
+ ret = ERR_BASE_PKEY-32;
+ goto openssl_pkey0_test_done;
+ }
+
+#ifndef HAVE_FIPS
+ if (EVP_PKEY_CTX_set_rsa_padding(dec, RSA_PKCS1_OAEP_PADDING) <= 0){
+ printf("second set rsa padding error\n");
+ ret = ERR_BASE_PKEY-33;
+ goto openssl_pkey0_test_done;
+ }
+
+ if (EVP_PKEY_CTX_set_rsa_padding(enc, RSA_PKCS1_OAEP_PADDING) <= 0) {
+ printf("third set rsa padding error\n");
+ ret = ERR_BASE_PKEY-34;
+ goto openssl_pkey0_test_done;
+ }
+#endif
+
+ XMEMSET(out, 0, sizeof(out));
+ ret = EVP_PKEY_encrypt(enc, out, &outlen, in, sizeof(in));
+ if (ret != 1) {
+ printf("error encrypting msg\n");
+ ret = ERR_BASE_PKEY-35;
+ goto openssl_pkey0_test_done;
+ }
+
+ show("encrypted msg", out, outlen);
+
+ XMEMSET(plain, 0, sizeof(plain));
+ ret = EVP_PKEY_decrypt(dec, plain, &outlen, out, keySz);
+ if (ret != 1) {
+ printf("error decrypting msg\n");
+ ret = ERR_BASE_PKEY-36;
+ goto openssl_pkey0_test_done;
+ }
+
+ show("decrypted msg", plain, outlen);
+
+ ret = 0; /* made it to this point without error then set success */
+openssl_pkey0_test_done:
+
+ wolfSSL_RSA_free(prvRsa);
+ wolfSSL_RSA_free(pubRsa);
+ EVP_PKEY_free(pubPkey);
+ EVP_PKEY_free(prvPkey);
+ EVP_PKEY_CTX_free(dec);
+ EVP_PKEY_CTX_free(enc);
+ XFREE(prvTmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(pubTmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+#endif /* NO_RSA */
+
+ return ret;
+}
+
+
+int openssl_pkey1_test(void)
+{
+ int ret = 0;
+#if !defined(NO_FILESYSTEM) && !defined(NO_RSA) && !defined(HAVE_USER_RSA) && \
+ !defined(NO_SHA)
+ EVP_PKEY_CTX* dec = NULL;
+ EVP_PKEY_CTX* enc = NULL;
+ EVP_PKEY* pubKey = NULL;
+ EVP_PKEY* prvKey = NULL;
+ X509* x509;
+
+ const unsigned char msg[] = "sugar slapped";
+ const unsigned char* clikey;
+ unsigned char tmp[FOURK_BUF];
+ long cliKeySz;
+ unsigned char cipher[RSA_TEST_BYTES];
+ unsigned char plain[RSA_TEST_BYTES];
+ size_t outlen;
+ int keyLenBits = 2048;
+
+#if defined(USE_CERT_BUFFERS_1024)
+ XMEMCPY(tmp, client_key_der_1024, sizeof_client_key_der_1024);
+ cliKeySz = (long)sizeof_client_key_der_1024;
+
+ x509 = wolfSSL_X509_load_certificate_buffer(client_cert_der_1024,
+ sizeof_client_cert_der_1024, SSL_FILETYPE_ASN1);
+ keyLenBits = 1024;
+#elif defined(USE_CERT_BUFFERS_2048)
+ XMEMCPY(tmp, client_key_der_2048, sizeof_client_key_der_2048);
+ cliKeySz = (long)sizeof_client_key_der_2048;
+
+ x509 = wolfSSL_X509_load_certificate_buffer(client_cert_der_2048,
+ sizeof_client_cert_der_2048, SSL_FILETYPE_ASN1);
+#elif defined(USE_CERT_BUFFERS_3072)
+ XMEMCPY(tmp, client_key_der_3072, sizeof_client_key_der_3072);
+ cliKeySz = (long)sizeof_client_key_der_3072;
+
+ x509 = wolfSSL_X509_load_certificate_buffer(client_cert_der_3072,
+ sizeof_client_cert_der_3072, SSL_FILETYPE_ASN1);
+ keyLenBits = 3072;
+#elif defined(USE_CERT_BUFFERS_4096)
+ XMEMCPY(tmp, client_key_der_4096, sizeof_client_key_der_4096);
+ cliKeySz = (long)sizeof_client_key_der_4096;
+
+ x509 = wolfSSL_X509_load_certificate_buffer(client_cert_der_4096,
+ sizeof_client_cert_der_4096, SSL_FILETYPE_ASN1);
+ keyLenBits = 4096;
+#else
+ XFILE f;
+
+ f = XFOPEN(clientKey, "rb");
+
+ if (!f) {
+ err_sys("can't open ./certs/client-key.der, "
+ "Please run from wolfSSL home dir", -41);
+ return -8800;
+ }
+
+ cliKeySz = (long)XFREAD(tmp, 1, FOURK_BUF, f);
+ XFCLOSE(f);
+
+ /* using existing wolfSSL api to get public and private key */
+ x509 = wolfSSL_X509_load_certificate_file(clientCert, SSL_FILETYPE_ASN1);
+#endif /* USE_CERT_BUFFERS */
+ clikey = tmp;
+
+ if ((prvKey = EVP_PKEY_new()) == NULL) {
+ return -8801;
+ }
+ EVP_PKEY_free(prvKey);
+ prvKey = NULL;
+
+ if (x509 == NULL) {
+ ret = -8802;
+ goto openssl_pkey1_test_done;
+ }
+
+ pubKey = X509_get_pubkey(x509);
+ if (pubKey == NULL) {
+ ret = -8803;
+ goto openssl_pkey1_test_done;
+ }
+
+ prvKey = d2i_PrivateKey(EVP_PKEY_RSA, NULL, &clikey, cliKeySz);
+ if (prvKey == NULL) {
+ ret = -8804;
+ goto openssl_pkey1_test_done;
+ }
+
+ /* phase 2 API to create EVP_PKEY_CTX and encrypt/decrypt */
+ if (EVP_PKEY_bits(prvKey) != keyLenBits) {
+ ret = -8805;
+ goto openssl_pkey1_test_done;
+ }
+
+ if (EVP_PKEY_size(prvKey) != keyLenBits/8) {
+ ret = -8806;
+ goto openssl_pkey1_test_done;
+ }
+
+ dec = EVP_PKEY_CTX_new(prvKey, NULL);
+ enc = EVP_PKEY_CTX_new(pubKey, NULL);
+ if (dec == NULL || enc == NULL) {
+ ret = -8807;
+ goto openssl_pkey1_test_done;
+ }
+
+ if (EVP_PKEY_decrypt_init(dec) != 1) {
+ ret = -8808;
+ goto openssl_pkey1_test_done;
+ }
+
+ if (EVP_PKEY_encrypt_init(enc) != 1) {
+ ret = -8809;
+ goto openssl_pkey1_test_done;
+ }
+
+ if (EVP_PKEY_CTX_set_rsa_padding(dec, RSA_PKCS1_PADDING) <= 0) {
+ ret = -8810;
+ goto openssl_pkey1_test_done;
+ }
+
+#ifndef HAVE_FIPS
+ if (EVP_PKEY_CTX_set_rsa_padding(dec, RSA_PKCS1_OAEP_PADDING) <= 0){
+ ret = -8811;
+ goto openssl_pkey1_test_done;
+ }
+
+ if (EVP_PKEY_CTX_set_rsa_padding(enc, RSA_PKCS1_OAEP_PADDING) <= 0) {
+ ret = -8812;
+ goto openssl_pkey1_test_done;
+ }
+#endif
+
+ XMEMSET(cipher, 0, sizeof(cipher));
+ outlen = keyLenBits/8;
+ if (EVP_PKEY_encrypt(enc, cipher, &outlen, msg, sizeof(msg)) < 0) {
+ ret = -8813;
+ goto openssl_pkey1_test_done;
+ }
+
+ XMEMSET(plain, 0, sizeof(plain));
+ if (EVP_PKEY_decrypt(dec, plain, &outlen, cipher, outlen) != 1) {
+ ret = -8814;
+ goto openssl_pkey1_test_done;
+ }
+
+openssl_pkey1_test_done:
+ if (pubKey != NULL) {
+ EVP_PKEY_free(pubKey);
+ }
+ if (prvKey != NULL) {
+ EVP_PKEY_free(prvKey);
+ }
+ if (dec != NULL) {
+ EVP_PKEY_CTX_free(dec);
+ }
+ if (enc != NULL) {
+ EVP_PKEY_CTX_free(enc);
+ }
+ if (x509 != NULL) {
+ X509_free(x509);
+ }
+
+#endif
+ return ret;
+}
+
+
+#define ERR_BASE_EVPSIG -5100
+
+int openssl_evpSig_test(void)
+{
+#if !defined(NO_RSA) && !defined(NO_SHA) && !defined(HAVE_USER_RSA)
+ byte* prvTmp;
+ byte* pubTmp;
+ int prvBytes;
+ int pubBytes;
+ RSA *prvRsa;
+ RSA *pubRsa;
+ EVP_PKEY *prvPkey;
+ EVP_PKEY *pubPkey;
+
+ EVP_MD_CTX* sign;
+ EVP_MD_CTX* verf;
+ char msg[] = "see spot run";
+ unsigned char sig[256];
+ unsigned int sigSz;
+ const void* pt;
+ unsigned int count;
+ int ret, ret1, ret2;
+
+ #if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048)
+ XFILE keyFile;
+ XFILE keypubFile;
+ char cliKey[] = "./certs/client-key.der";
+ char cliKeypub[] = "./certs/client-keyPub.der";
+ #endif
+
+ prvTmp = (byte*)XMALLOC(FOURK_BUFF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (prvTmp == NULL)
+ return ERR_BASE_EVPSIG-1;
+ pubTmp = (byte*)XMALLOC(FOURK_BUFF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (pubTmp == NULL) {
+ XFREE(prvTmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ return ERR_BASE_EVPSIG-2;
+ }
+
+#ifdef USE_CERT_BUFFERS_1024
+ XMEMCPY(prvTmp, client_key_der_1024, sizeof_client_key_der_1024);
+ prvBytes = sizeof_client_key_der_1024;
+ XMEMCPY(pubTmp, client_keypub_der_1024, sizeof_client_keypub_der_1024);
+ pubBytes = sizeof_client_keypub_der_1024;
+#elif defined(USE_CERT_BUFFERS_2048)
+ XMEMCPY(prvTmp, client_key_der_2048, sizeof_client_key_der_2048);
+ prvBytes = sizeof_client_key_der_2048;
+ XMEMCPY(pubTmp, client_keypub_der_2048, sizeof_client_keypub_der_2048);
+ pubBytes = sizeof_client_keypub_der_2048;
+#else
+ keyFile = XFOPEN(cliKey, "rb");
+ if (!keyFile) {
+ XFREE(pubTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(prvTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ err_sys("can't open ./certs/client-key.der, "
+ "Please run from wolfSSL home dir", -40);
+ return ERR_BASE_EVPSIG-3;
+ }
+ prvBytes = (int)XFREAD(prvTmp, 1, (int)FOURK_BUFF, keyFile);
+ XFCLOSE(keyFile);
+ keypubFile = XFOPEN(cliKeypub, "rb");
+ if (!keypubFile) {
+ XFREE(pubTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(prvTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ err_sys("can't open ./certs/client-cert.der, "
+ "Please run from wolfSSL home dir", -41);
+ return ERR_BASE_EVPSIG-4;
+ }
+ pubBytes = (int)XFREAD(pubTmp, 1, (int)FOURK_BUFF, keypubFile);
+ XFCLOSE(keypubFile);
+ #endif /* USE_CERT_BUFFERS */
+
+ prvRsa = wolfSSL_RSA_new();
+ pubRsa = wolfSSL_RSA_new();
+ if((prvRsa == NULL) || (pubRsa == NULL)){
+ XFREE(pubTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(prvTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ err_sys("ERROR with RSA_new", -8900);
+ return ERR_BASE_EVPSIG-5;
+ }
+
+ ret1 = wolfSSL_RSA_LoadDer_ex(prvRsa, prvTmp, prvBytes, WOLFSSL_RSA_LOAD_PRIVATE);
+ ret2 = wolfSSL_RSA_LoadDer_ex(pubRsa, pubTmp, pubBytes, WOLFSSL_RSA_LOAD_PUBLIC);
+ if((ret1 != SSL_SUCCESS) || (ret2 != SSL_SUCCESS)){
+ printf("error with RSA_LoadDer_ex\n");
+ return ERR_BASE_EVPSIG-6;
+ }
+
+ prvPkey = wolfSSL_EVP_PKEY_new();
+ pubPkey = wolfSSL_EVP_PKEY_new();
+ if((prvPkey == NULL) || (pubPkey == NULL)){
+ XFREE(pubTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(prvTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ printf("error with KEY_new\n");
+ return ERR_BASE_EVPSIG-7;
+ }
+ ret1 = wolfSSL_EVP_PKEY_set1_RSA(prvPkey, prvRsa);
+ ret2 = wolfSSL_EVP_PKEY_set1_RSA(pubPkey, pubRsa);
+ if((ret1 != 1) || (ret2 != 1)){
+ XFREE(pubTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(prvTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ printf("error with EVP_PKEY_set1_RSA\n");
+ return ERR_BASE_EVPSIG-8;
+ }
+
+ /****************** sign and verify *******************/
+ sign = EVP_MD_CTX_create();
+ verf = EVP_MD_CTX_create();
+ if((sign == NULL)||(verf == NULL)){
+ printf("error with EVP_MD_CTX_create\n");
+ return ERR_BASE_EVPSIG-10;
+ }
+
+ ret = EVP_SignInit(sign, EVP_sha1());
+ if(ret != SSL_SUCCESS){
+ printf("error with EVP_SignInit\n");
+ return ERR_BASE_EVPSIG-11;
+ }
+
+ count = sizeof(msg);
+ show("message = ", (char *)msg, count);
+
+ /* sign */
+ XMEMSET(sig, 0, sizeof(sig));
+ pt = (const void*)msg;
+ ret1 = EVP_SignUpdate(sign, pt, count);
+ ret2 = EVP_SignFinal(sign, sig, &sigSz, prvPkey);
+ if((ret1 != SSL_SUCCESS) || (ret2 != SSL_SUCCESS)){
+ printf("error with EVP_MD_CTX_create\n");
+ return ERR_BASE_EVPSIG-12;
+ }
+ show("signature = ", (char *)sig, sigSz);
+
+ /* verify */
+ pt = (const void*)msg;
+ ret1 = EVP_VerifyInit(verf, EVP_sha1());
+ ret2 = EVP_VerifyUpdate(verf, pt, count);
+ if((ret1 != SSL_SUCCESS) || (ret2 != SSL_SUCCESS)){
+ printf("error with EVP_Verify\n");
+ return ERR_BASE_EVPSIG-13;
+ }
+ if (EVP_VerifyFinal(verf, sig, sigSz, pubPkey) != 1) {
+ XFREE(pubTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(prvTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ printf("error with EVP_VerifyFinal\n");
+ return ERR_BASE_EVPSIG-14;
+ }
+
+ /* expect fail without update */
+ EVP_VerifyInit(verf, EVP_sha1());
+ if (EVP_VerifyFinal(verf, sig, sigSz, pubPkey) == 1) {
+ XFREE(pubTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(prvTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ printf("EVP_VerifyInit without update not detected\n");
+ return ERR_BASE_EVPSIG-15;
+ }
+
+ XFREE(pubTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(prvTmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+ EVP_MD_CTX_destroy(sign);
+ EVP_MD_CTX_destroy(verf);
+
+ wolfSSL_RSA_free(prvRsa);
+ wolfSSL_RSA_free(pubRsa);
+ EVP_PKEY_free(pubPkey);
+ EVP_PKEY_free(prvPkey);
+
+#endif /* NO_RSA */
+ return 0;
+}
#endif /* OPENSSL_EXTRA */
#ifndef NO_PWDBASED
+#ifdef HAVE_SCRYPT
+/* Test vectors taken from RFC 7914: scrypt PBKDF - Section 12. */
+int scrypt_test(void)
+{
+ int ret;
+ byte derived[64];
+
+ const byte verify1[] = {
+ 0x77, 0xd6, 0x57, 0x62, 0x38, 0x65, 0x7b, 0x20,
+ 0x3b, 0x19, 0xca, 0x42, 0xc1, 0x8a, 0x04, 0x97,
+ 0xf1, 0x6b, 0x48, 0x44, 0xe3, 0x07, 0x4a, 0xe8,
+ 0xdf, 0xdf, 0xfa, 0x3f, 0xed, 0xe2, 0x14, 0x42,
+ 0xfc, 0xd0, 0x06, 0x9d, 0xed, 0x09, 0x48, 0xf8,
+ 0x32, 0x6a, 0x75, 0x3a, 0x0f, 0xc8, 0x1f, 0x17,
+ 0xe8, 0xd3, 0xe0, 0xfb, 0x2e, 0x0d, 0x36, 0x28,
+ 0xcf, 0x35, 0xe2, 0x0c, 0x38, 0xd1, 0x89, 0x06
+ };
+ const byte verify2[] = {
+ 0xfd, 0xba, 0xbe, 0x1c, 0x9d, 0x34, 0x72, 0x00,
+ 0x78, 0x56, 0xe7, 0x19, 0x0d, 0x01, 0xe9, 0xfe,
+ 0x7c, 0x6a, 0xd7, 0xcb, 0xc8, 0x23, 0x78, 0x30,
+ 0xe7, 0x73, 0x76, 0x63, 0x4b, 0x37, 0x31, 0x62,
+ 0x2e, 0xaf, 0x30, 0xd9, 0x2e, 0x22, 0xa3, 0x88,
+ 0x6f, 0xf1, 0x09, 0x27, 0x9d, 0x98, 0x30, 0xda,
+ 0xc7, 0x27, 0xaf, 0xb9, 0x4a, 0x83, 0xee, 0x6d,
+ 0x83, 0x60, 0xcb, 0xdf, 0xa2, 0xcc, 0x06, 0x40
+ };
+#if !defined(BENCH_EMBEDDED) && !defined(HAVE_INTEL_QA)
+ const byte verify3[] = {
+ 0x70, 0x23, 0xbd, 0xcb, 0x3a, 0xfd, 0x73, 0x48,
+ 0x46, 0x1c, 0x06, 0xcd, 0x81, 0xfd, 0x38, 0xeb,
+ 0xfd, 0xa8, 0xfb, 0xba, 0x90, 0x4f, 0x8e, 0x3e,
+ 0xa9, 0xb5, 0x43, 0xf6, 0x54, 0x5d, 0xa1, 0xf2,
+ 0xd5, 0x43, 0x29, 0x55, 0x61, 0x3f, 0x0f, 0xcf,
+ 0x62, 0xd4, 0x97, 0x05, 0x24, 0x2a, 0x9a, 0xf9,
+ 0xe6, 0x1e, 0x85, 0xdc, 0x0d, 0x65, 0x1e, 0x40,
+ 0xdf, 0xcf, 0x01, 0x7b, 0x45, 0x57, 0x58, 0x87
+ };
+#endif
+#ifdef SCRYPT_TEST_ALL
+ /* Test case is very slow.
+ * Use for confirmation after code change or new platform.
+ */
+ const byte verify4[] = {
+ 0x21, 0x01, 0xcb, 0x9b, 0x6a, 0x51, 0x1a, 0xae,
+ 0xad, 0xdb, 0xbe, 0x09, 0xcf, 0x70, 0xf8, 0x81,
+ 0xec, 0x56, 0x8d, 0x57, 0x4a, 0x2f, 0xfd, 0x4d,
+ 0xab, 0xe5, 0xee, 0x98, 0x20, 0xad, 0xaa, 0x47,
+ 0x8e, 0x56, 0xfd, 0x8f, 0x4b, 0xa5, 0xd0, 0x9f,
+ 0xfa, 0x1c, 0x6d, 0x92, 0x7c, 0x40, 0xf4, 0xc3,
+ 0x37, 0x30, 0x40, 0x49, 0xe8, 0xa9, 0x52, 0xfb,
+ 0xcb, 0xf4, 0x5c, 0x6f, 0xa7, 0x7a, 0x41, 0xa4
+ };
+#endif
+
+ ret = wc_scrypt(derived, NULL, 0, NULL, 0, 4, 1, 1, sizeof(verify1));
+ if (ret != 0)
+ return -9000;
+ if (XMEMCMP(derived, verify1, sizeof(verify1)) != 0)
+ return -9001;
+
+ ret = wc_scrypt(derived, (byte*)"password", 8, (byte*)"NaCl", 4, 10, 8, 16,
+ sizeof(verify2));
+ if (ret != 0)
+ return -9002;
+ if (XMEMCMP(derived, verify2, sizeof(verify2)) != 0)
+ return -9003;
+
+ /* Don't run these test on embedded, since they use large mallocs */
+#if !defined(BENCH_EMBEDDED) && !defined(HAVE_INTEL_QA)
+ ret = wc_scrypt(derived, (byte*)"pleaseletmein", 13,
+ (byte*)"SodiumChloride", 14, 14, 8, 1, sizeof(verify3));
+ if (ret != 0)
+ return -9004;
+ if (XMEMCMP(derived, verify3, sizeof(verify3)) != 0)
+ return -9005;
+
+#ifdef SCRYPT_TEST_ALL
+ ret = wc_scrypt(derived, (byte*)"pleaseletmein", 13,
+ (byte*)"SodiumChloride", 14, 20, 8, 1, sizeof(verify4));
+ if (ret != 0)
+ return -9006;
+ if (XMEMCMP(derived, verify4, sizeof(verify4)) != 0)
+ return -9007;
+#endif
+#endif /* !BENCH_EMBEDDED && !HAVE_INTEL_QA */
+
+ ret = wc_scrypt_ex(derived, (byte*)"password", 8, (byte*)"NaCl", 4, 1<<10,
+ 8, 16, sizeof(verify2));
+ if (ret != 0)
+ return -9008;
+ if (XMEMCMP(derived, verify2, sizeof(verify2)) != 0)
+ return -9009;
+ return 0;
+}
+#endif
+
+#ifdef HAVE_PKCS12
int pkcs12_test(void)
{
const byte passwd[] = { 0x00, 0x73, 0x00, 0x6d, 0x00, 0x65, 0x00, 0x67,
@@ -4676,27 +17370,33 @@ int pkcs12_test(void)
int kLen = 24;
int iterations = 1;
int ret = wc_PKCS12_PBKDF(derived, passwd, sizeof(passwd), salt, 8,
- iterations, kLen, SHA256, id);
+ iterations, kLen, WC_SHA256, id);
if (ret < 0)
- return -103;
+ return -9100;
- if ( (ret = memcmp(derived, verify, kLen)) != 0)
- return -104;
+ if ( (ret = XMEMCMP(derived, verify, kLen)) != 0)
+ return -9101;
iterations = 1000;
ret = wc_PKCS12_PBKDF(derived, passwd2, sizeof(passwd2), salt2, 8,
- iterations, kLen, SHA256, id);
+ iterations, kLen, WC_SHA256, id);
if (ret < 0)
- return -105;
+ return -9102;
- if ( (ret = memcmp(derived, verify2, 24)) != 0)
- return -106;
+ ret = wc_PKCS12_PBKDF_ex(derived, passwd2, sizeof(passwd2), salt2, 8,
+ iterations, kLen, WC_SHA256, id, HEAP_HINT);
+ if (ret < 0)
+ return -9103;
+
+ if ( (ret = XMEMCMP(derived, verify2, 24)) != 0)
+ return -9104;
return 0;
}
+#endif /* HAVE_PKCS12 */
-
+#if defined(HAVE_PBKDF2) && !defined(NO_SHA256)
int pbkdf2_test(void)
{
char passwd[] = "passwordpassword";
@@ -4710,19 +17410,20 @@ int pbkdf2_test(void)
0x2d, 0xd4, 0xf9, 0x37, 0xd4, 0x95, 0x16, 0xa7, 0x2a, 0x9a, 0x21, 0xd1
};
- int ret = wc_PBKDF2(derived, (byte*)passwd, (int)strlen(passwd), salt, 8,
- iterations, kLen, SHA256);
+ int ret = wc_PBKDF2_ex(derived, (byte*)passwd, (int)XSTRLEN(passwd), salt,
+ (int)sizeof(salt), iterations, kLen, WC_SHA256, HEAP_HINT, devId);
if (ret != 0)
return ret;
- if (memcmp(derived, verify, sizeof(verify)) != 0)
- return -102;
+ if (XMEMCMP(derived, verify, sizeof(verify)) != 0)
+ return -9200;
return 0;
-}
+}
+#endif /* HAVE_PBKDF2 && !NO_SHA256 */
-#ifndef NO_SHA
+#if defined(HAVE_PBKDF1) && !defined(NO_SHA)
int pbkdf1_test(void)
{
char passwd[] = "password";
@@ -4732,30 +17433,48 @@ int pbkdf1_test(void)
byte derived[16];
const byte verify[] = {
- 0xDC, 0x19, 0x84, 0x7E, 0x05, 0xC6, 0x4D, 0x2F, 0xAF, 0x10, 0xEB, 0xFB,
- 0x4A, 0x3D, 0x2A, 0x20
+ 0xDC, 0x19, 0x84, 0x7E, 0x05, 0xC6, 0x4D, 0x2F,
+ 0xAF, 0x10, 0xEB, 0xFB, 0x4A, 0x3D, 0x2A, 0x20
};
- wc_PBKDF1(derived, (byte*)passwd, (int)strlen(passwd), salt, 8, iterations,
- kLen, SHA);
+ int ret = wc_PBKDF1_ex(derived, kLen, NULL, 0, (byte*)passwd,
+ (int)XSTRLEN(passwd), salt, (int)sizeof(salt), iterations, WC_SHA,
+ HEAP_HINT);
+ if (ret != 0)
+ return ret;
- if (memcmp(derived, verify, sizeof(verify)) != 0)
- return -101;
+ if (XMEMCMP(derived, verify, sizeof(verify)) != 0)
+ return -9300;
return 0;
}
-#endif
-
+#endif /* HAVE_PBKDF2 && !NO_SHA */
int pwdbased_test(void)
{
int ret = 0;
-#ifndef NO_SHA
- ret += pbkdf1_test();
-#endif
- ret += pbkdf2_test();
- return ret + pkcs12_test();
+#if defined(HAVE_PBKDF1) && !defined(NO_SHA)
+ ret = pbkdf1_test();
+ if (ret != 0)
+ return ret;
+#endif
+#if defined(HAVE_PBKDF2) && !defined(NO_SHA256)
+ ret = pbkdf2_test();
+ if (ret != 0)
+ return ret;
+#endif
+#ifdef HAVE_PKCS12
+ ret = pkcs12_test();
+ if (ret != 0)
+ return ret;
+#endif
+#ifdef HAVE_SCRYPT
+ ret = scrypt_test();
+ if (ret != 0)
+ return ret;
+#endif
+ return ret;
}
#endif /* NO_PWDBASED */
@@ -4807,40 +17526,40 @@ int hkdf_test(void)
(void)info1;
#ifndef NO_SHA
- ret = wc_HKDF(SHA, ikm1, 22, NULL, 0, NULL, 0, okm1, L);
+ ret = wc_HKDF(WC_SHA, ikm1, 22, NULL, 0, NULL, 0, okm1, L);
if (ret != 0)
- return -2001;
+ return -9500;
- if (memcmp(okm1, res1, L) != 0)
- return -2002;
+ if (XMEMCMP(okm1, res1, L) != 0)
+ return -9501;
#ifndef HAVE_FIPS
/* fips can't have key size under 14 bytes, salt is key too */
- ret = wc_HKDF(SHA, ikm1, 11, salt1, 13, info1, 10, okm1, L);
+ ret = wc_HKDF(WC_SHA, ikm1, 11, salt1, 13, info1, 10, okm1, L);
if (ret != 0)
- return -2003;
+ return -9502;
- if (memcmp(okm1, res2, L) != 0)
- return -2004;
+ if (XMEMCMP(okm1, res2, L) != 0)
+ return -9503;
#endif /* HAVE_FIPS */
#endif /* NO_SHA */
#ifndef NO_SHA256
- ret = wc_HKDF(SHA256, ikm1, 22, NULL, 0, NULL, 0, okm1, L);
+ ret = wc_HKDF(WC_SHA256, ikm1, 22, NULL, 0, NULL, 0, okm1, L);
if (ret != 0)
- return -2005;
+ return -9504;
- if (memcmp(okm1, res3, L) != 0)
- return -2006;
+ if (XMEMCMP(okm1, res3, L) != 0)
+ return -9505;
#ifndef HAVE_FIPS
/* fips can't have key size under 14 bytes, salt is key too */
- ret = wc_HKDF(SHA256, ikm1, 22, salt1, 13, info1, 10, okm1, L);
+ ret = wc_HKDF(WC_SHA256, ikm1, 22, salt1, 13, info1, 10, okm1, L);
if (ret != 0)
- return -2007;
+ return -9506;
- if (memcmp(okm1, res4, L) != 0)
- return -2007;
+ if (XMEMCMP(okm1, res4, L) != 0)
+ return -9507;
#endif /* HAVE_FIPS */
#endif /* NO_SHA256 */
@@ -4850,279 +17569,2553 @@ int hkdf_test(void)
#endif /* HAVE_HKDF */
+#if defined(HAVE_ECC) && defined(HAVE_X963_KDF)
+
+int x963kdf_test(void)
+{
+ int ret;
+ byte kek[128];
+
+#ifndef NO_SHA
+ /* SHA-1, COUNT = 0
+ * shared secret length: 192
+ * SharedInfo length: 0
+ * key data length: 128
+ */
+ const byte Z[] = {
+ 0x1c, 0x7d, 0x7b, 0x5f, 0x05, 0x97, 0xb0, 0x3d,
+ 0x06, 0xa0, 0x18, 0x46, 0x6e, 0xd1, 0xa9, 0x3e,
+ 0x30, 0xed, 0x4b, 0x04, 0xdc, 0x64, 0xcc, 0xdd
+ };
+
+ const byte verify[] = {
+ 0xbf, 0x71, 0xdf, 0xfd, 0x8f, 0x4d, 0x99, 0x22,
+ 0x39, 0x36, 0xbe, 0xb4, 0x6f, 0xee, 0x8c, 0xcc
+ };
+#endif
+
+#ifndef NO_SHA256
+ /* SHA-256, COUNT = 3
+ * shared secret length: 192
+ * SharedInfo length: 0
+ * key data length: 128
+ */
+ const byte Z2[] = {
+ 0xd3, 0x8b, 0xdb, 0xe5, 0xc4, 0xfc, 0x16, 0x4c,
+ 0xdd, 0x96, 0x7f, 0x63, 0xc0, 0x4f, 0xe0, 0x7b,
+ 0x60, 0xcd, 0xe8, 0x81, 0xc2, 0x46, 0x43, 0x8c
+ };
+
+ const byte verify2[] = {
+ 0x5e, 0x67, 0x4d, 0xb9, 0x71, 0xba, 0xc2, 0x0a,
+ 0x80, 0xba, 0xd0, 0xd4, 0x51, 0x4d, 0xc4, 0x84
+ };
+#endif
+
+#ifdef WOLFSSL_SHA512
+ /* SHA-512, COUNT = 0
+ * shared secret length: 192
+ * SharedInfo length: 0
+ * key data length: 128
+ */
+ const byte Z3[] = {
+ 0x87, 0xfc, 0x0d, 0x8c, 0x44, 0x77, 0x48, 0x5b,
+ 0xb5, 0x74, 0xf5, 0xfc, 0xea, 0x26, 0x4b, 0x30,
+ 0x88, 0x5d, 0xc8, 0xd9, 0x0a, 0xd8, 0x27, 0x82
+ };
+
+ const byte verify3[] = {
+ 0x94, 0x76, 0x65, 0xfb, 0xb9, 0x15, 0x21, 0x53,
+ 0xef, 0x46, 0x02, 0x38, 0x50, 0x6a, 0x02, 0x45
+ };
+
+ /* SHA-512, COUNT = 0
+ * shared secret length: 521
+ * SharedInfo length: 128
+ * key data length: 1024
+ */
+ const byte Z4[] = {
+ 0x00, 0xaa, 0x5b, 0xb7, 0x9b, 0x33, 0xe3, 0x89,
+ 0xfa, 0x58, 0xce, 0xad, 0xc0, 0x47, 0x19, 0x7f,
+ 0x14, 0xe7, 0x37, 0x12, 0xf4, 0x52, 0xca, 0xa9,
+ 0xfc, 0x4c, 0x9a, 0xdb, 0x36, 0x93, 0x48, 0xb8,
+ 0x15, 0x07, 0x39, 0x2f, 0x1a, 0x86, 0xdd, 0xfd,
+ 0xb7, 0xc4, 0xff, 0x82, 0x31, 0xc4, 0xbd, 0x0f,
+ 0x44, 0xe4, 0x4a, 0x1b, 0x55, 0xb1, 0x40, 0x47,
+ 0x47, 0xa9, 0xe2, 0xe7, 0x53, 0xf5, 0x5e, 0xf0,
+ 0x5a, 0x2d
+ };
+
+ const byte info4[] = {
+ 0xe3, 0xb5, 0xb4, 0xc1, 0xb0, 0xd5, 0xcf, 0x1d,
+ 0x2b, 0x3a, 0x2f, 0x99, 0x37, 0x89, 0x5d, 0x31
+ };
+
+ const byte verify4[] = {
+ 0x44, 0x63, 0xf8, 0x69, 0xf3, 0xcc, 0x18, 0x76,
+ 0x9b, 0x52, 0x26, 0x4b, 0x01, 0x12, 0xb5, 0x85,
+ 0x8f, 0x7a, 0xd3, 0x2a, 0x5a, 0x2d, 0x96, 0xd8,
+ 0xcf, 0xfa, 0xbf, 0x7f, 0xa7, 0x33, 0x63, 0x3d,
+ 0x6e, 0x4d, 0xd2, 0xa5, 0x99, 0xac, 0xce, 0xb3,
+ 0xea, 0x54, 0xa6, 0x21, 0x7c, 0xe0, 0xb5, 0x0e,
+ 0xef, 0x4f, 0x6b, 0x40, 0xa5, 0xc3, 0x02, 0x50,
+ 0xa5, 0xa8, 0xee, 0xee, 0x20, 0x80, 0x02, 0x26,
+ 0x70, 0x89, 0xdb, 0xf3, 0x51, 0xf3, 0xf5, 0x02,
+ 0x2a, 0xa9, 0x63, 0x8b, 0xf1, 0xee, 0x41, 0x9d,
+ 0xea, 0x9c, 0x4f, 0xf7, 0x45, 0xa2, 0x5a, 0xc2,
+ 0x7b, 0xda, 0x33, 0xca, 0x08, 0xbd, 0x56, 0xdd,
+ 0x1a, 0x59, 0xb4, 0x10, 0x6c, 0xf2, 0xdb, 0xbc,
+ 0x0a, 0xb2, 0xaa, 0x8e, 0x2e, 0xfa, 0x7b, 0x17,
+ 0x90, 0x2d, 0x34, 0x27, 0x69, 0x51, 0xce, 0xcc,
+ 0xab, 0x87, 0xf9, 0x66, 0x1c, 0x3e, 0x88, 0x16
+ };
+#endif
+
+#ifndef NO_SHA
+ ret = wc_X963_KDF(WC_HASH_TYPE_SHA, Z, sizeof(Z), NULL, 0,
+ kek, sizeof(verify));
+ if (ret != 0)
+ return -9600;
+
+ if (XMEMCMP(verify, kek, sizeof(verify)) != 0)
+ return -9601;
+#endif
+
+#ifndef NO_SHA256
+ ret = wc_X963_KDF(WC_HASH_TYPE_SHA256, Z2, sizeof(Z2), NULL, 0,
+ kek, sizeof(verify2));
+ if (ret != 0)
+ return -9602;
+
+ if (XMEMCMP(verify2, kek, sizeof(verify2)) != 0)
+ return -9603;
+#endif
+
+#ifdef WOLFSSL_SHA512
+ ret = wc_X963_KDF(WC_HASH_TYPE_SHA512, Z3, sizeof(Z3), NULL, 0,
+ kek, sizeof(verify3));
+ if (ret != 0)
+ return -9604;
+
+ if (XMEMCMP(verify3, kek, sizeof(verify3)) != 0)
+ return -9605;
+
+ ret = wc_X963_KDF(WC_HASH_TYPE_SHA512, Z4, sizeof(Z4), info4,
+ sizeof(info4), kek, sizeof(verify4));
+ if (ret != 0)
+ return -9606;
+
+ if (XMEMCMP(verify4, kek, sizeof(verify4)) != 0)
+ return -9607;
+#endif
+
+ return 0;
+}
+
+#endif /* HAVE_X963_KDF */
+
+
#ifdef HAVE_ECC
-typedef struct rawEccVector {
- const char* msg;
+#ifdef BENCH_EMBEDDED
+ #define ECC_SHARED_SIZE 128
+#else
+ #define ECC_SHARED_SIZE MAX_ECC_BYTES
+#endif
+#define ECC_DIGEST_SIZE MAX_ECC_BYTES
+#define ECC_SIG_SIZE ECC_MAX_SIG_SIZE
+
+#ifndef NO_ECC_VECTOR_TEST
+ #if (defined(HAVE_ECC192) || defined(HAVE_ECC224) ||\
+ !defined(NO_ECC256) || defined(HAVE_ECC384) ||\
+ defined(HAVE_ECC521) || defined(HAVE_ALL_CURVES))
+ #define HAVE_ECC_VECTOR_TEST
+ #endif
+#endif
+
+#ifdef HAVE_ECC_VECTOR_TEST
+typedef struct eccVector {
+ const char* msg; /* SHA-1 Encoded Message */
const char* Qx;
const char* Qy;
- const char* d;
+ const char* d; /* Private Key */
const char* R;
const char* S;
const char* curveName;
- size_t msgLen;
-} rawEccVector;
+ word32 msgLen;
+ word32 keySize;
+#ifndef NO_ASN
+ const byte* r;
+ word32 rSz;
+ const byte* s;
+ word32 sSz;
+#endif
+} eccVector;
-int ecc_test(void)
+static int ecc_test_vector_item(const eccVector* vector)
{
- RNG rng;
- byte sharedA[1024];
- byte sharedB[1024];
- byte sig[1024];
- byte digest[20];
- byte exportBuf[1024];
- word32 x, y;
- int i, verify, ret;
- ecc_key userA, userB, pubKey;
+ int ret = 0, verify = 0;
+ word32 sigSz;
+ ecc_key userA;
+ DECLARE_VAR(sig, byte, ECC_SIG_SIZE, HEAP_HINT);
+#if !defined(NO_ASN) && !defined(HAVE_SELFTEST)
+ word32 sigRawSz;
+ DECLARE_VAR(sigRaw, byte, ECC_SIG_SIZE, HEAP_HINT);
+#endif
- ret = wc_InitRng(&rng);
+ ret = wc_ecc_init_ex(&userA, HEAP_HINT, devId);
+ if (ret != 0) {
+ FREE_VAR(sig, HEAP_HINT);
+ return ret;
+ }
+
+ ret = wc_ecc_import_raw(&userA, vector->Qx, vector->Qy,
+ vector->d, vector->curveName);
+ if (ret != 0)
+ goto done;
+
+ XMEMSET(sig, 0, ECC_SIG_SIZE);
+ sigSz = ECC_SIG_SIZE;
+ ret = wc_ecc_rs_to_sig(vector->R, vector->S, sig, &sigSz);
+ if (ret != 0)
+ goto done;
+
+#if !defined(NO_ASN) && !defined(HAVE_SELFTEST)
+ XMEMSET(sigRaw, 0, ECC_SIG_SIZE);
+ sigRawSz = ECC_SIG_SIZE;
+ ret = wc_ecc_rs_raw_to_sig(vector->r, vector->rSz, vector->s, vector->sSz,
+ sigRaw, &sigRawSz);
+ if (ret != 0)
+ goto done;
+
+ if (sigSz != sigRawSz || XMEMCMP(sig, sigRaw, sigSz) != 0) {
+ ret = -9608;
+ goto done;
+ }
+#endif
+
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret == 0)
+ ret = wc_ecc_verify_hash(sig, sigSz, (byte*)vector->msg,
+ vector->msgLen, &verify, &userA);
+ } while (ret == WC_PENDING_E);
+ if (ret != 0)
+ goto done;
+ TEST_SLEEP();
+
+ if (verify != 1)
+ ret = -9609;
+
+done:
+ wc_ecc_free(&userA);
+
+#if !defined(NO_ASN) && !defined(HAVE_SELFTEST)
+ FREE_VAR(sigRaw, HEAP_HINT);
+#endif
+ FREE_VAR(sig, HEAP_HINT);
+
+ return ret;
+}
+
+static int ecc_test_vector(int keySize)
+{
+ int ret;
+ eccVector vec;
+
+ XMEMSET(&vec, 0, sizeof(vec));
+ vec.keySize = (word32)keySize;
+
+ switch(keySize) {
+
+#if defined(HAVE_ECC112) || defined(HAVE_ALL_CURVES)
+ case 14:
+ return 0;
+#endif /* HAVE_ECC112 */
+#if defined(HAVE_ECC128) || defined(HAVE_ALL_CURVES)
+ case 16:
+ return 0;
+#endif /* HAVE_ECC128 */
+#if defined(HAVE_ECC160) || defined(HAVE_ALL_CURVES)
+ case 20:
+ return 0;
+#endif /* HAVE_ECC160 */
+
+#if defined(HAVE_ECC192) || defined(HAVE_ALL_CURVES)
+ case 24:
+ /* first [P-192,SHA-1] vector from FIPS 186-3 NIST vectors */
+ #if 1
+ vec.msg = "\x60\x80\x79\x42\x3f\x12\x42\x1d\xe6\x16\xb7\x49\x3e\xbe\x55\x1c\xf4\xd6\x5b\x92";
+ vec.msgLen = 20;
+ #else
+ /* This is the raw message prior to SHA-1 */
+ vec.msg =
+ "\xeb\xf7\x48\xd7\x48\xeb\xbc\xa7\xd2\x9f\xb4\x73\x69\x8a\x6e\x6b"
+ "\x4f\xb1\x0c\x86\x5d\x4a\xf0\x24\xcc\x39\xae\x3d\xf3\x46\x4b\xa4"
+ "\xf1\xd6\xd4\x0f\x32\xbf\x96\x18\xa9\x1b\xb5\x98\x6f\xa1\xa2\xaf"
+ "\x04\x8a\x0e\x14\xdc\x51\xe5\x26\x7e\xb0\x5e\x12\x7d\x68\x9d\x0a"
+ "\xc6\xf1\xa7\xf1\x56\xce\x06\x63\x16\xb9\x71\xcc\x7a\x11\xd0\xfd"
+ "\x7a\x20\x93\xe2\x7c\xf2\xd0\x87\x27\xa4\xe6\x74\x8c\xc3\x2f\xd5"
+ "\x9c\x78\x10\xc5\xb9\x01\x9d\xf2\x1c\xdc\xc0\xbc\xa4\x32\xc0\xa3"
+ "\xee\xd0\x78\x53\x87\x50\x88\x77\x11\x43\x59\xce\xe4\xa0\x71\xcf";
+ vec.msgLen = 128;
+ #endif
+ vec.Qx = "07008ea40b08dbe76432096e80a2494c94982d2d5bcf98e6";
+ vec.Qy = "76fab681d00b414ea636ba215de26d98c41bd7f2e4d65477";
+ vec.d = "e14f37b3d1374ff8b03f41b9b3fdd2f0ebccf275d660d7f3";
+ vec.R = "6994d962bdd0d793ffddf855ec5bf2f91a9698b46258a63e";
+ vec.S = "02ba6465a234903744ab02bc8521405b73cf5fc00e1a9f41";
+ vec.curveName = "SECP192R1";
+ #ifndef NO_ASN
+ vec.r = (byte*)"\x69\x94\xd9\x62\xbd\xd0\xd7\x93\xff\xdd\xf8\x55"
+ "\xec\x5b\xf2\xf9\x1a\x96\x98\xb4\x62\x58\xa6\x3e";
+ vec.rSz = 24;
+ vec.s = (byte*)"\x02\xba\x64\x65\xa2\x34\x90\x37\x44\xab\x02\xbc"
+ "\x85\x21\x40\x5b\x73\xcf\x5f\xc0\x0e\x1a\x9f\x41";
+ vec.sSz = 24;
+ #endif
+ break;
+#endif /* HAVE_ECC192 */
+
+#if defined(HAVE_ECC224) || defined(HAVE_ALL_CURVES)
+ case 28:
+ /* first [P-224,SHA-1] vector from FIPS 186-3 NIST vectors */
+ #if 1
+ vec.msg = "\xb9\xa3\xb8\x6d\xb0\xba\x99\xfd\xc6\xd2\x94\x6b\xfe\xbe\x9c\xe8\x3f\x10\x74\xfc";
+ vec.msgLen = 20;
+ #else
+ /* This is the raw message prior to SHA-1 */
+ vec.msg =
+ "\x36\xc8\xb2\x29\x86\x48\x7f\x67\x7c\x18\xd0\x97\x2a\x9e\x20\x47"
+ "\xb3\xaf\xa5\x9e\xc1\x62\x76\x4e\xc3\x0b\x5b\x69\xe0\x63\x0f\x99"
+ "\x0d\x4e\x05\xc2\x73\xb0\xe5\xa9\xd4\x28\x27\xb6\x95\xfc\x2d\x64"
+ "\xd9\x13\x8b\x1c\xf4\xc1\x21\x55\x89\x4c\x42\x13\x21\xa7\xbb\x97"
+ "\x0b\xdc\xe0\xfb\xf0\xd2\xae\x85\x61\xaa\xd8\x71\x7f\x2e\x46\xdf"
+ "\xe3\xff\x8d\xea\xb4\xd7\x93\x23\x56\x03\x2c\x15\x13\x0d\x59\x9e"
+ "\x26\xc1\x0f\x2f\xec\x96\x30\x31\xac\x69\x38\xa1\x8d\x66\x45\x38"
+ "\xb9\x4d\xac\x55\x34\xef\x7b\x59\x94\x24\xd6\x9b\xe1\xf7\x1c\x20";
+ vec.msgLen = 128;
+ #endif
+ vec.Qx = "8a4dca35136c4b70e588e23554637ae251077d1365a6ba5db9585de7";
+ vec.Qy = "ad3dee06de0be8279d4af435d7245f14f3b4f82eb578e519ee0057b1";
+ vec.d = "97c4b796e1639dd1035b708fc00dc7ba1682cec44a1002a1a820619f";
+ vec.R = "147b33758321e722a0360a4719738af848449e2c1d08defebc1671a7";
+ vec.S = "24fc7ed7f1352ca3872aa0916191289e2e04d454935d50fe6af3ad5b";
+ vec.curveName = "SECP224R1";
+ #ifndef NO_ASN
+ vec.r = (byte*)"\x14\x7b\x33\x75\x83\x21\xe7\x22\xa0\x36\x0a\x47"
+ "\x19\x73\x8a\xf8\x48\x44\x9e\x2c\x1d\x08\xde\xfe"
+ "\xbc\x16\x71\xa7";
+ vec.rSz = 28;
+ vec.s = (byte*)"\x24\xfc\x7e\xd7\xf1\x35\x2c\xa3\x87\x2a\xa0\x91"
+ "\x61\x91\x28\x9e\x2e\x04\xd4\x54\x93\x5d\x50\xfe"
+ "\x6a\xf3\xad\x5b";
+ vec.sSz = 28;
+ #endif
+ break;
+#endif /* HAVE_ECC224 */
+
+#if defined(HAVE_ECC239) || defined(HAVE_ALL_CURVES)
+ case 30:
+ return 0;
+#endif /* HAVE_ECC239 */
+
+#if !defined(NO_ECC256) || defined(HAVE_ALL_CURVES)
+ case 32:
+ /* first [P-256,SHA-1] vector from FIPS 186-3 NIST vectors */
+ #if 1
+ vec.msg = "\xa3\xf9\x1a\xe2\x1b\xa6\xb3\x03\x98\x64\x47\x2f\x18\x41\x44\xc6\xaf\x62\xcd\x0e";
+ vec.msgLen = 20;
+ #else
+ /* This is the raw message prior to SHA-1 */
+ vec.msg =
+ "\xa2\x4b\x21\x76\x2e\x6e\xdb\x15\x3c\xc1\x14\x38\xdb\x0e\x92\xcd"
+ "\xf5\x2b\x86\xb0\x6c\xa9\x70\x16\x06\x27\x59\xc7\x0d\x36\xd1\x56"
+ "\x2c\xc9\x63\x0d\x7f\xc7\xc7\x74\xb2\x8b\x54\xe3\x1e\xf5\x58\x72"
+ "\xb2\xa6\x5d\xf1\xd7\xec\x26\xde\xbb\x33\xe7\xd9\x27\xef\xcc\xf4"
+ "\x6b\x63\xde\x52\xa4\xf4\x31\xea\xca\x59\xb0\x5d\x2e\xde\xc4\x84"
+ "\x5f\xff\xc0\xee\x15\x03\x94\xd6\x1f\x3d\xfe\xcb\xcd\xbf\x6f\x5a"
+ "\x73\x38\xd0\xbe\x3f\x2a\x77\x34\x51\x98\x3e\xba\xeb\x48\xf6\x73"
+ "\x8f\xc8\x95\xdf\x35\x7e\x1a\x48\xa6\x53\xbb\x35\x5a\x31\xa1\xb4"
+ vec.msgLen = 128;
+ #endif
+ vec.Qx = "fa2737fb93488d19caef11ae7faf6b7f4bcd67b286e3fc54e8a65c2b74aeccb0";
+ vec.Qy = "d4ccd6dae698208aa8c3a6f39e45510d03be09b2f124bfc067856c324f9b4d09";
+ vec.d = "be34baa8d040a3b991f9075b56ba292f755b90e4b6dc10dad36715c33cfdac25";
+ vec.R = "2b826f5d44e2d0b6de531ad96b51e8f0c56fdfead3c236892e4d84eacfc3b75c";
+ vec.S = "a2248b62c03db35a7cd63e8a120a3521a89d3d2f61ff99035a2148ae32e3a248";
+ #ifndef NO_ASN
+ vec.r = (byte*)"\x2b\x82\x6f\x5d\x44\xe2\xd0\xb6\xde\x53\x1a\xd9"
+ "\x6b\x51\xe8\xf0\xc5\x6f\xdf\xea\xd3\xc2\x36\x89"
+ "\x2e\x4d\x84\xea\xcf\xc3\xb7\x5c";
+ vec.rSz = 32;
+ vec.s = (byte*)"\xa2\x24\x8b\x62\xc0\x3d\xb3\x5a\x7c\xd6\x3e\x8a"
+ "\x12\x0a\x35\x21\xa8\x9d\x3d\x2f\x61\xff\x99\x03"
+ "\x5a\x21\x48\xae\x32\xe3\xa2\x48";
+ vec.sSz = 32;
+ #endif
+ vec.curveName = "SECP256R1";
+ break;
+#endif /* !NO_ECC256 */
+
+#if defined(HAVE_ECC320) || defined(HAVE_ALL_CURVES)
+ case 40:
+ return 0;
+#endif /* HAVE_ECC320 */
+
+#if defined(HAVE_ECC384) || defined(HAVE_ALL_CURVES)
+ case 48:
+ /* first [P-384,SHA-1] vector from FIPS 186-3 NIST vectors */
+ #if 1
+ vec.msg = "\x9b\x9f\x8c\x95\x35\xa5\xca\x26\x60\x5d\xb7\xf2\xfa\x57\x3b\xdf\xc3\x2e\xab\x8b";
+ vec.msgLen = 20;
+ #else
+ /* This is the raw message prior to SHA-1 */
+ vec.msg =
+ "\xab\xe1\x0a\xce\x13\xe7\xe1\xd9\x18\x6c\x48\xf7\x88\x9d\x51\x47"
+ "\x3d\x3a\x09\x61\x98\x4b\xc8\x72\xdf\x70\x8e\xcc\x3e\xd3\xb8\x16"
+ "\x9d\x01\xe3\xd9\x6f\xc4\xf1\xd5\xea\x00\xa0\x36\x92\xbc\xc5\xcf"
+ "\xfd\x53\x78\x7c\x88\xb9\x34\xaf\x40\x4c\x03\x9d\x32\x89\xb5\xba"
+ "\xc5\xae\x7d\xb1\x49\x68\x75\xb5\xdc\x73\xc3\x09\xf9\x25\xc1\x3d"
+ "\x1c\x01\xab\xda\xaf\xeb\xcd\xac\x2c\xee\x43\x39\x39\xce\x8d\x4a"
+ "\x0a\x5d\x57\xbb\x70\x5f\x3b\xf6\xec\x08\x47\x95\x11\xd4\xb4\xa3"
+ "\x21\x1f\x61\x64\x9a\xd6\x27\x43\x14\xbf\x0d\x43\x8a\x81\xe0\x60"
+ vec.msgLen = 128;
+ #endif
+ vec.Qx = "e55fee6c49d8d523f5ce7bf9c0425ce4ff650708b7de5cfb095901523979a7f042602db30854735369813b5c3f5ef868";
+ vec.Qy = "28f59cc5dc509892a988d38a8e2519de3d0c4fd0fbdb0993e38f18506c17606c5e24249246f1ce94983a5361c5be983e";
+ vec.d = "a492ce8fa90084c227e1a32f7974d39e9ff67a7e8705ec3419b35fb607582bebd461e0b1520ac76ec2dd4e9b63ebae71";
+ vec.R = "6820b8585204648aed63bdff47f6d9acebdea62944774a7d14f0e14aa0b9a5b99545b2daee6b3c74ebf606667a3f39b7";
+ vec.S = "491af1d0cccd56ddd520b233775d0bc6b40a6255cc55207d8e9356741f23c96c14714221078dbd5c17f4fdd89b32a907";
+ vec.curveName = "SECP384R1";
+ #ifndef NO_ASN
+ vec.r = (byte*)"\x68\x20\xb8\x58\x52\x04\x64\x8a\xed\x63\xbd\xff"
+ "\x47\xf6\xd9\xac\xeb\xde\xa6\x29\x44\x77\x4a\x7d"
+ "\x14\xf0\xe1\x4a\xa0\xb9\xa5\xb9\x95\x45\xb2\xda"
+ "\xee\x6b\x3c\x74\xeb\xf6\x06\x66\x7a\x3f\x39\xb7";
+ vec.rSz = 48;
+ vec.s = (byte*)"\x49\x1a\xf1\xd0\xcc\xcd\x56\xdd\xd5\x20\xb2\x33"
+ "\x77\x5d\x0b\xc6\xb4\x0a\x62\x55\xcc\x55\x20\x7d"
+ "\x8e\x93\x56\x74\x1f\x23\xc9\x6c\x14\x71\x42\x21"
+ "\x07\x8d\xbd\x5c\x17\xf4\xfd\xd8\x9b\x32\xa9\x07";
+ vec.sSz = 48;
+ #endif
+ break;
+#endif /* HAVE_ECC384 */
+
+#if defined(HAVE_ECC512) || defined(HAVE_ALL_CURVES)
+ case 64:
+ return 0;
+#endif /* HAVE_ECC512 */
+
+#if defined(HAVE_ECC521) || defined(HAVE_ALL_CURVES)
+ case 66:
+ /* first [P-521,SHA-1] vector from FIPS 186-3 NIST vectors */
+ #if 1
+ vec.msg = "\x1b\xf7\x03\x9c\xca\x23\x94\x27\x3f\x11\xa1\xd4\x8d\xcc\xb4\x46\x6f\x31\x61\xdf";
+ vec.msgLen = 20;
+ #else
+ /* This is the raw message prior to SHA-1 */
+ vec.msg =
+ "\x50\x3f\x79\x39\x34\x0a\xc7\x23\xcd\x4a\x2f\x4e\x6c\xcc\x27\x33"
+ "\x38\x3a\xca\x2f\xba\x90\x02\x19\x9d\x9e\x1f\x94\x8b\xe0\x41\x21"
+ "\x07\xa3\xfd\xd5\x14\xd9\x0c\xd4\xf3\x7c\xc3\xac\x62\xef\x00\x3a"
+ "\x2d\xb1\xd9\x65\x7a\xb7\x7f\xe7\x55\xbf\x71\xfa\x59\xe4\xd9\x6e"
+ "\xa7\x2a\xe7\xbf\x9d\xe8\x7d\x79\x34\x3b\xc1\xa4\xbb\x14\x4d\x16"
+ "\x28\xd1\xe9\xe9\xc8\xed\x80\x8b\x96\x2c\x54\xe5\xf9\x6d\x53\xda"
+ "\x14\x7a\x96\x38\xf9\x4a\x91\x75\xd8\xed\x61\x05\x5f\x0b\xa5\x73"
+ "\xa8\x2b\xb7\xe0\x18\xee\xda\xc4\xea\x7b\x36\x2e\xc8\x9c\x38\x2b"
+ vec.msgLen = 128;
+ #endif
+ vec.Qx = "12fbcaeffa6a51f3ee4d3d2b51c5dec6d7c726ca353fc014ea2bf7cfbb9b910d32cbfa6a00fe39b6cdb8946f22775398b2e233c0cf144d78c8a7742b5c7a3bb5d23";
+ vec.Qy = "09cdef823dd7bf9a79e8cceacd2e4527c231d0ae5967af0958e931d7ddccf2805a3e618dc3039fec9febbd33052fe4c0fee98f033106064982d88f4e03549d4a64d";
+ vec.d = "1bd56bd106118eda246155bd43b42b8e13f0a6e25dd3bb376026fab4dc92b6157bc6dfec2d15dd3d0cf2a39aa68494042af48ba9601118da82c6f2108a3a203ad74";
+ vec.R = "0bd117b4807710898f9dd7778056485777668f0e78e6ddf5b000356121eb7a220e9493c7f9a57c077947f89ac45d5acb6661bbcd17abb3faea149ba0aa3bb1521be";
+ vec.S = "019cd2c5c3f9870ecdeb9b323abdf3a98cd5e231d85c6ddc5b71ab190739f7f226e6b134ba1d5889ddeb2751dabd97911dff90c34684cdbe7bb669b6c3d22f2480c";
+ vec.curveName = "SECP521R1";
+ #ifndef NO_ASN
+ vec.r = (byte*)"\x00\xbd\x11\x7b\x48\x07\x71\x08\x98\xf9\xdd\x77"
+ "\x78\x05\x64\x85\x77\x76\x68\xf0\xe7\x8e\x6d\xdf"
+ "\x5b\x00\x03\x56\x12\x1e\xb7\xa2\x20\xe9\x49\x3c"
+ "\x7f\x9a\x57\xc0\x77\x94\x7f\x89\xac\x45\xd5\xac"
+ "\xb6\x66\x1b\xbc\xd1\x7a\xbb\x3f\xae\xa1\x49\xba"
+ "\x0a\xa3\xbb\x15\x21\xbe";
+ vec.rSz = 66;
+ vec.s = (byte*)"\x00\x19\xcd\x2c\x5c\x3f\x98\x70\xec\xde\xb9\xb3"
+ "\x23\xab\xdf\x3a\x98\xcd\x5e\x23\x1d\x85\xc6\xdd"
+ "\xc5\xb7\x1a\xb1\x90\x73\x9f\x7f\x22\x6e\x6b\x13"
+ "\x4b\xa1\xd5\x88\x9d\xde\xb2\x75\x1d\xab\xd9\x79"
+ "\x11\xdf\xf9\x0c\x34\x68\x4c\xdb\xe7\xbb\x66\x9b"
+ "\x6c\x3d\x22\xf2\x48\x0c";
+ vec.sSz = 66;
+ #endif
+ break;
+#endif /* HAVE_ECC521 */
+ default:
+ return NOT_COMPILED_IN; /* Invalid key size / Not supported */
+ }; /* Switch */
+
+ ret = ecc_test_vector_item(&vec);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+#if defined(HAVE_ECC_SIGN) && defined(WOLFSSL_ECDSA_SET_K)
+static int ecc_test_sign_vectors(WC_RNG* rng)
+{
+ int ret;
+ ecc_key key;
+ byte sig[72];
+ word32 sigSz;
+ unsigned char hash[32] = "test wolfSSL deterministic sign";
+ const char* dIUT = "7d7dc5f71eb29ddaf80d6214632eeae03d9058af1fb6d22ed80badb62bc1a534";
+ const char* QIUTx = "ead218590119e8876b29146ff89ca61770c4edbbf97d38ce385ed281d8a6b230";
+ const char* QIUTy = "28af61281fd35e2fa7002523acc85a429cb06ee6648325389f59edfce1405141";
+ const byte k[1] = { 0x02 };
+ const byte expSig[71] = {
+ 0x30, 0x45, 0x02, 0x20, 0x7c, 0xf2, 0x7b, 0x18,
+ 0x8d, 0x03, 0x4f, 0x7e, 0x8a, 0x52, 0x38, 0x03,
+ 0x04, 0xb5, 0x1a, 0xc3, 0xc0, 0x89, 0x69, 0xe2,
+ 0x77, 0xf2, 0x1b, 0x35, 0xa6, 0x0b, 0x48, 0xfc,
+ 0x47, 0x66, 0x99, 0x78, 0x02, 0x21, 0x00, 0xa8,
+ 0x43, 0xa0, 0xce, 0x6c, 0x5e, 0x17, 0x8a, 0x53,
+ 0x4d, 0xaf, 0xd2, 0x95, 0x78, 0x9f, 0x84, 0x4f,
+ 0x94, 0xb8, 0x75, 0xa3, 0x19, 0xa5, 0xd4, 0xdf,
+ 0xe1, 0xd4, 0x5e, 0x9d, 0x97, 0xfe, 0x81
+ };
+
+ ret = wc_ecc_init_ex(&key, HEAP_HINT, devId);
+ if (ret != 0) {
+ return ret;
+ }
+ ret = wc_ecc_import_raw(&key, QIUTx, QIUTy, dIUT, "SECP256R1");
+ if (ret != 0) {
+ goto done;
+ }
+
+ ret = wc_ecc_sign_set_k(k, sizeof(k), &key);
+ if (ret != 0) {
+ goto done;
+ }
+
+ sigSz = sizeof(sig);
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret == 0)
+ ret = wc_ecc_sign_hash(hash, sizeof(hash), sig, &sigSz, rng, &key);
+ } while (ret == WC_PENDING_E);
+ if (ret != 0) {
+ goto done;
+ }
+ TEST_SLEEP();
+
+ if (sigSz != sizeof(expSig)) {
+ ret = -9610;
+ goto done;
+ }
+ if (XMEMCMP(sig, expSig, sigSz) != 0) {
+ ret = -9611;
+ goto done;
+ }
+
+ sigSz = sizeof(sig);
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret == 0)
+ ret = wc_ecc_sign_hash(hash, sizeof(hash), sig, &sigSz, rng, &key);
+ } while (ret == WC_PENDING_E);
+ if (ret != 0) {
+ goto done;
+ }
+ TEST_SLEEP();
+
+done:
+ wc_ecc_free(&key);
+ return ret;
+}
+#endif
+
+#ifdef HAVE_ECC_CDH
+static int ecc_test_cdh_vectors(void)
+{
+ int ret;
+ ecc_key pub_key, priv_key;
+ byte sharedA[32] = {0}, sharedB[32] = {0};
+ word32 x, z;
+
+ const char* QCAVSx = "700c48f77f56584c5cc632ca65640db91b6bacce3a4df6b42ce7cc838833d287";
+ const char* QCAVSy = "db71e509e3fd9b060ddb20ba5c51dcc5948d46fbf640dfe0441782cab85fa4ac";
+ const char* dIUT = "7d7dc5f71eb29ddaf80d6214632eeae03d9058af1fb6d22ed80badb62bc1a534";
+ const char* QIUTx = "ead218590119e8876b29146ff89ca61770c4edbbf97d38ce385ed281d8a6b230";
+ const char* QIUTy = "28af61281fd35e2fa7002523acc85a429cb06ee6648325389f59edfce1405141";
+ const char* ZIUT = "46fc62106420ff012e54a434fbdd2d25ccc5852060561e68040dd7778997bd7b";
+
+ /* setup private and public keys */
+ ret = wc_ecc_init_ex(&pub_key, HEAP_HINT, devId);
if (ret != 0)
- return -1001;
+ return ret;
+ ret = wc_ecc_init_ex(&priv_key, HEAP_HINT, devId);
+ if (ret != 0) {
+ wc_ecc_free(&pub_key);
+ return ret;
+ }
+ wc_ecc_set_flags(&pub_key, WC_ECC_FLAG_COFACTOR);
+ wc_ecc_set_flags(&priv_key, WC_ECC_FLAG_COFACTOR);
+ ret = wc_ecc_import_raw(&pub_key, QCAVSx, QCAVSy, NULL, "SECP256R1");
+ if (ret != 0)
+ goto done;
+ ret = wc_ecc_import_raw(&priv_key, QIUTx, QIUTy, dIUT, "SECP256R1");
+ if (ret != 0)
+ goto done;
+
+ /* compute ECC Cofactor shared secret */
+ x = sizeof(sharedA);
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &priv_key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret == 0)
+ ret = wc_ecc_shared_secret(&priv_key, &pub_key, sharedA, &x);
+ } while (ret == WC_PENDING_E);
+ if (ret != 0) {
+ goto done;
+ }
+ TEST_SLEEP();
+
+ /* read in expected Z */
+ z = sizeof(sharedB);
+ ret = Base16_Decode((const byte*)ZIUT, (word32)XSTRLEN(ZIUT), sharedB, &z);
+ if (ret != 0)
+ goto done;
+
+ /* compare results */
+ if (x != z || XMEMCMP(sharedA, sharedB, x)) {
+ ERROR_OUT(-9612, done);
+ }
+
+done:
+ wc_ecc_free(&priv_key);
+ wc_ecc_free(&pub_key);
+ return ret;
+}
+#endif /* HAVE_ECC_CDH */
+#endif /* HAVE_ECC_VECTOR_TEST */
+
+#ifdef HAVE_ECC_KEY_IMPORT
+/* returns 0 on success */
+static int ecc_test_make_pub(WC_RNG* rng)
+{
+ ecc_key key;
+ unsigned char* exportBuf;
+ unsigned char* tmp;
+ unsigned char msg[] = "test wolfSSL ECC public gen";
+ word32 x, tmpSz;
+ int ret = 0;
+ ecc_point* pubPoint = NULL;
+#if defined(HAVE_ECC_DHE) && defined(HAVE_ECC_KEY_EXPORT)
+ ecc_key pub;
+#endif
+#ifdef HAVE_ECC_VERIFY
+ int verify = 0;
+#endif
+#ifndef USE_CERT_BUFFERS_256
+ XFILE file;
+#endif
+
+ wc_ecc_init_ex(&key, HEAP_HINT, devId);
+
+#ifdef USE_CERT_BUFFERS_256
+ tmp = (byte*)XMALLOC((size_t)sizeof_ecc_key_der_256, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (tmp == NULL) {
+ return -9613;
+ }
+ exportBuf = (byte*)XMALLOC((size_t)sizeof_ecc_key_der_256, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (exportBuf == NULL) {
+ XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ return -9614;
+ }
+ XMEMCPY(tmp, ecc_key_der_256, (size_t)sizeof_ecc_key_der_256);
+ tmpSz = (size_t)sizeof_ecc_key_der_256;
+#else
+ tmp = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (tmp == NULL) {
+ return -9615;
+ }
+ exportBuf = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (exportBuf == NULL) {
+ XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ return -9616;
+ }
+ file = XFOPEN(eccKeyDerFile, "rb");
+ if (!file) {
+ ERROR_OUT(-9617, done);
+ }
+
+ tmpSz = (word32)XFREAD(tmp, 1, FOURK_BUF, file);
+ XFCLOSE(file);
+#endif /* USE_CERT_BUFFERS_256 */
+
+ /* import private only then test with */
+ ret = wc_ecc_import_private_key(tmp, tmpSz, NULL, 0, NULL);
+ if (ret == 0) {
+ ERROR_OUT(-9618, done);
+ }
+
+ ret = wc_ecc_import_private_key(NULL, tmpSz, NULL, 0, &key);
+ if (ret == 0) {
+ ERROR_OUT(-9619, done);
+ }
+
+ x = 0;
+ ret = wc_EccPrivateKeyDecode(tmp, &x, &key, tmpSz);
+ if (ret != 0) {
+ ERROR_OUT(-9620, done);
+ }
+
+#ifdef HAVE_ECC_KEY_EXPORT
+ x = FOURK_BUF;
+ ret = wc_ecc_export_private_only(&key, exportBuf, &x);
+ if (ret != 0) {
+ ERROR_OUT(-9621, done);
+ }
+
+ /* make private only key */
+ wc_ecc_free(&key);
+ wc_ecc_init_ex(&key, HEAP_HINT, devId);
+ ret = wc_ecc_import_private_key(exportBuf, x, NULL, 0, &key);
+ if (ret != 0) {
+ ERROR_OUT(-9622, done);
+ }
+
+ x = FOURK_BUF;
+ ret = wc_ecc_export_x963_ex(&key, exportBuf, &x, 0);
+ if (ret == 0) {
+ ERROR_OUT(-9623, done);
+ }
+
+#endif /* HAVE_ECC_KEY_EXPORT */
+
+ ret = wc_ecc_make_pub(NULL, NULL);
+ if (ret == 0) {
+ ERROR_OUT(-9624, done);
+ }
+ TEST_SLEEP();
+
+ pubPoint = wc_ecc_new_point_h(HEAP_HINT);
+ if (pubPoint == NULL) {
+ ERROR_OUT(-9625, done);
+ }
+
+ ret = wc_ecc_make_pub(&key, pubPoint);
+ if (ret != 0) {
+ ERROR_OUT(-9626, done);
+ }
+
+ TEST_SLEEP();
+
+#ifdef HAVE_ECC_KEY_EXPORT
+ /* export should still fail, is private only key */
+ x = FOURK_BUF;
+ ret = wc_ecc_export_x963_ex(&key, exportBuf, &x, 0);
+ if (ret == 0) {
+ ERROR_OUT(-9627, done);
+ }
+#endif /* HAVE_ECC_KEY_EXPORT */
+#if defined(WOLFSSL_CRYPTOCELL)
+ /* create a new key since building private key from public key is unsupported */
+ ret = wc_ecc_make_key(rng, 32, &key);
+ if (ret == 0) {
+ ERROR_OUT(-9628, done);
+ }
+#endif
+#ifdef HAVE_ECC_SIGN
+ tmpSz = FOURK_BUF;
+ ret = 0;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret == 0)
+ ret = wc_ecc_sign_hash(msg, sizeof(msg), tmp, &tmpSz, rng, &key);
+ } while (ret == WC_PENDING_E);
+ if (ret != 0) {
+ ERROR_OUT(-9629, done);
+ }
+ TEST_SLEEP();
+
+#ifdef HAVE_ECC_VERIFY
+ /* try verify with private only key */
+ ret = 0;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret == 0)
+ ret = wc_ecc_verify_hash(tmp, tmpSz, msg, sizeof(msg), &verify, &key);
+ } while (ret == WC_PENDING_E);
+ if (ret != 0) {
+ ERROR_OUT(-9630, done);
+ }
+
+ if (verify != 1) {
+ ERROR_OUT(-9631, done);
+ }
+ TEST_SLEEP();
+#ifdef HAVE_ECC_KEY_EXPORT
+ /* exporting the public part should now work */
+ x = FOURK_BUF;
+ ret = wc_ecc_export_x963_ex(&key, exportBuf, &x, 0);
+ if (ret != 0) {
+ ERROR_OUT(-9632, done);
+ }
+#endif /* HAVE_ECC_KEY_EXPORT */
+#endif /* HAVE_ECC_VERIFY */
+
+#endif /* HAVE_ECC_SIGN */
+
+#if defined(HAVE_ECC_DHE) && defined(HAVE_ECC_KEY_EXPORT)
+ /* now test private only key with creating a shared secret */
+ x = FOURK_BUF;
+ ret = wc_ecc_export_private_only(&key, exportBuf, &x);
+ if (ret != 0) {
+ ERROR_OUT(-9633, done);
+ }
+
+ /* make private only key */
+ wc_ecc_free(&key);
+ wc_ecc_init_ex(&key, HEAP_HINT, devId);
+ ret = wc_ecc_import_private_key(exportBuf, x, NULL, 0, &key);
+ if (ret != 0) {
+ ERROR_OUT(-9634, done);
+ }
+
+ /* check that public export fails with private only key */
+ x = FOURK_BUF;
+ ret = wc_ecc_export_x963_ex(&key, exportBuf, &x, 0);
+ if (ret == 0) {
+ ERROR_OUT(-9635, done);
+ }
+
+ /* make public key for shared secret */
+ wc_ecc_init_ex(&pub, HEAP_HINT, devId);
+ ret = wc_ecc_make_key(rng, 32, &pub);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &pub.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0) {
+ ERROR_OUT(-9636, done);
+ }
+ TEST_SLEEP();
+
+ x = FOURK_BUF;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret == 0) {
+ ret = wc_ecc_shared_secret(&key, &pub, exportBuf, &x);
+ }
+ } while (ret == WC_PENDING_E);
+ wc_ecc_free(&pub);
+ if (ret != 0) {
+ ERROR_OUT(-9637, done);
+ }
+ TEST_SLEEP();
+#endif /* HAVE_ECC_DHE && HAVE_ECC_KEY_EXPORT */
- wc_ecc_init(&userA);
- wc_ecc_init(&userB);
- wc_ecc_init(&pubKey);
+ ret = 0;
- ret = wc_ecc_make_key(&rng, 32, &userA);
+done:
+ XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(exportBuf, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+
+ wc_ecc_del_point_h(pubPoint, HEAP_HINT);
+ wc_ecc_free(&key);
+
+ return ret;
+}
+#endif /* HAVE_ECC_KEY_IMPORT */
+
+
+#ifdef WOLFSSL_KEY_GEN
+static int ecc_test_key_gen(WC_RNG* rng, int keySize)
+{
+ int ret = 0;
+ int derSz;
+#ifdef HAVE_PKCS8
+ word32 pkcs8Sz;
+#endif
+ byte* der;
+ byte* pem;
+ ecc_key userA;
+
+ der = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (der == NULL) {
+ return -9638;
+ }
+ pem = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (pem == NULL) {
+ XFREE(der, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ return -9639;
+ }
+
+ ret = wc_ecc_init_ex(&userA, HEAP_HINT, devId);
+ if (ret != 0)
+ goto done;
+
+ ret = wc_ecc_make_key(rng, keySize, &userA);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
if (ret != 0)
- return -1014;
+ goto done;
+ TEST_SLEEP();
ret = wc_ecc_check_key(&userA);
if (ret != 0)
- return -1024;
+ goto done;
+ TEST_SLEEP();
- ret = wc_ecc_make_key(&rng, 32, &userB);
+ derSz = wc_EccKeyToDer(&userA, der, FOURK_BUF);
+ if (derSz < 0) {
+ ERROR_OUT(derSz, done);
+ }
+
+ ret = SaveDerAndPem(der, derSz, pem, FOURK_BUF, eccCaKeyTempFile,
+ eccCaKeyPemFile, ECC_PRIVATEKEY_TYPE, -8347);
+ if (ret != 0) {
+ goto done;
+ }
+ /* test export of public key */
+ derSz = wc_EccPublicKeyToDer(&userA, der, FOURK_BUF, 1);
+ if (derSz < 0) {
+ ERROR_OUT(derSz, done);
+ }
+ if (derSz == 0) {
+ ERROR_OUT(-9640, done);
+ }
+
+ ret = SaveDerAndPem(der, derSz, NULL, 0, eccPubKeyDerFile,
+ NULL, 0, -8348);
+ if (ret != 0) {
+ goto done;
+ }
+
+#ifdef HAVE_PKCS8
+ /* test export of PKCS#8 unencrypted private key */
+ pkcs8Sz = FOURK_BUF;
+ derSz = wc_EccPrivateKeyToPKCS8(&userA, der, &pkcs8Sz);
+ if (derSz < 0) {
+ ERROR_OUT(derSz, done);
+ }
+
+ if (derSz == 0) {
+ ERROR_OUT(-9641, done);
+ }
+
+ ret = SaveDerAndPem(der, derSz, NULL, 0, eccPkcs8KeyDerFile,
+ NULL, 0, -8349);
+ if (ret != 0) {
+ goto done;
+ }
+#endif /* HAVE_PKCS8 */
+
+done:
+
+ XFREE(der, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_ecc_free(&userA);
+
+ return ret;
+}
+#endif /* WOLFSSL_KEY_GEN */
+
+static int ecc_test_curve_size(WC_RNG* rng, int keySize, int testVerifyCount,
+ int curve_id, const ecc_set_type* dp)
+{
+#if defined(HAVE_ECC_DHE) || defined(HAVE_ECC_CDH)
+ DECLARE_VAR(sharedA, byte, ECC_SHARED_SIZE, HEAP_HINT);
+ DECLARE_VAR(sharedB, byte, ECC_SHARED_SIZE, HEAP_HINT);
+#endif
+#ifdef HAVE_ECC_KEY_EXPORT
+ byte exportBuf[MAX_ECC_BYTES * 2 + 32];
+#endif
+ word32 x;
+#if defined(HAVE_ECC_DHE) || defined(HAVE_ECC_CDH)
+ word32 y;
+#endif
+#ifdef HAVE_ECC_SIGN
+ DECLARE_VAR(sig, byte, ECC_SIG_SIZE, HEAP_HINT);
+ DECLARE_VAR(digest, byte, ECC_DIGEST_SIZE, HEAP_HINT);
+ int i;
+#ifdef HAVE_ECC_VERIFY
+ int verify;
+#endif /* HAVE_ECC_VERIFY */
+#endif /* HAVE_ECC_SIGN */
+ int ret;
+ ecc_key userA, userB, pubKey;
+ int curveSize;
+
+ (void)testVerifyCount;
+ (void)dp;
+ (void)x;
+
+ XMEMSET(&userA, 0, sizeof(ecc_key));
+ XMEMSET(&userB, 0, sizeof(ecc_key));
+ XMEMSET(&pubKey, 0, sizeof(ecc_key));
+
+ ret = wc_ecc_init_ex(&userA, HEAP_HINT, devId);
+ if (ret != 0)
+ goto done;
+ ret = wc_ecc_init_ex(&userB, HEAP_HINT, devId);
if (ret != 0)
- return -1002;
+ goto done;
+ ret = wc_ecc_init_ex(&pubKey, HEAP_HINT, devId);
+ if (ret != 0)
+ goto done;
- x = sizeof(sharedA);
- ret = wc_ecc_shared_secret(&userA, &userB, sharedA, &x);
+#ifdef WOLFSSL_CUSTOM_CURVES
+ if (dp != NULL) {
+ ret = wc_ecc_set_custom_curve(&userA, dp);
+ if (ret != 0)
+ goto done;
+ ret = wc_ecc_set_custom_curve(&userB, dp);
+ if (ret != 0)
+ goto done;
+ }
+#endif
+ ret = wc_ecc_make_key_ex(rng, keySize, &userA, curve_id);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
if (ret != 0)
- return -1015;
+ goto done;
+ TEST_SLEEP();
+
+ if (wc_ecc_get_curve_idx(curve_id) != -1) {
+ curveSize = wc_ecc_get_curve_size_from_id(userA.dp->id);
+ if (curveSize != userA.dp->size) {
+ ret = -9642;
+ goto done;
+ }
+ }
- y = sizeof(sharedB);
- ret = wc_ecc_shared_secret(&userB, &userA, sharedB, &y);
+ ret = wc_ecc_check_key(&userA);
+ if (ret != 0)
+ goto done;
+ TEST_SLEEP();
+
+ ret = wc_ecc_make_key_ex(rng, keySize, &userB, curve_id);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &userB.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0)
+ goto done;
+ TEST_SLEEP();
+
+ /* only perform the below tests if the key size matches */
+ if (dp == NULL && keySize > 0 && wc_ecc_size(&userA) != keySize) {
+ ret = ECC_CURVE_OID_E;
+ goto done;
+ }
+
+#ifdef HAVE_ECC_DHE
+ x = ECC_SHARED_SIZE;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret == 0)
+ ret = wc_ecc_shared_secret(&userA, &userB, sharedA, &x);
+ } while (ret == WC_PENDING_E);
+ if (ret != 0) {
+ goto done;
+ }
+ TEST_SLEEP();
+
+ y = ECC_SHARED_SIZE;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &userB.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret == 0)
+ ret = wc_ecc_shared_secret(&userB, &userA, sharedB, &y);
+ } while (ret == WC_PENDING_E);
+ if (ret != 0)
+ goto done;
+
+ if (y != x)
+ ERROR_OUT(-9643, done);
+
+ if (XMEMCMP(sharedA, sharedB, x))
+ ERROR_OUT(-9644, done);
+ TEST_SLEEP();
+#endif /* HAVE_ECC_DHE */
+
+#ifdef HAVE_ECC_CDH
+ /* add cofactor flag */
+ wc_ecc_set_flags(&userA, WC_ECC_FLAG_COFACTOR);
+ wc_ecc_set_flags(&userB, WC_ECC_FLAG_COFACTOR);
+
+ x = ECC_SHARED_SIZE;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret == 0)
+ ret = wc_ecc_shared_secret(&userA, &userB, sharedA, &x);
+ } while (ret == WC_PENDING_E);
+ if (ret != 0) {
+ goto done;
+ }
+ TEST_SLEEP();
+ y = ECC_SHARED_SIZE;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &userB.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret == 0)
+ ret = wc_ecc_shared_secret(&userB, &userA, sharedB, &y);
+ } while (ret == WC_PENDING_E);
if (ret != 0)
- return -1003;
+ goto done;
if (y != x)
- return -1004;
+ ERROR_OUT(-9645, done);
+
+ if (XMEMCMP(sharedA, sharedB, x))
+ ERROR_OUT(-9646, done);
+ TEST_SLEEP();
- if (memcmp(sharedA, sharedB, x))
- return -1005;
+ /* remove cofactor flag */
+ wc_ecc_set_flags(&userA, 0);
+ wc_ecc_set_flags(&userB, 0);
+#endif /* HAVE_ECC_CDH */
+#ifdef HAVE_ECC_KEY_EXPORT
x = sizeof(exportBuf);
- ret = wc_ecc_export_x963(&userA, exportBuf, &x);
+ ret = wc_ecc_export_x963_ex(&userA, exportBuf, &x, 0);
if (ret != 0)
- return -1006;
+ goto done;
- ret = wc_ecc_import_x963(exportBuf, x, &pubKey);
+#ifdef HAVE_ECC_KEY_IMPORT
+ #ifdef WOLFSSL_CUSTOM_CURVES
+ if (dp != NULL) {
+ ret = wc_ecc_set_custom_curve(&pubKey, dp);
+ if (ret != 0) goto done;
+ }
+ #endif
+ ret = wc_ecc_import_x963_ex(exportBuf, x, &pubKey, curve_id);
+ if (ret != 0)
+ goto done;
+#ifdef HAVE_ECC_DHE
+ y = ECC_SHARED_SIZE;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &userB.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret == 0)
+ ret = wc_ecc_shared_secret(&userB, &pubKey, sharedB, &y);
+ } while (ret == WC_PENDING_E);
if (ret != 0)
- return -1007;
+ goto done;
- y = sizeof(sharedB);
- ret = wc_ecc_shared_secret(&userB, &pubKey, sharedB, &y);
+ if (XMEMCMP(sharedA, sharedB, y))
+ ERROR_OUT(-9647, done);
+ TEST_SLEEP();
+#endif /* HAVE_ECC_DHE */
+
+ #ifdef HAVE_COMP_KEY
+ /* try compressed export / import too */
+ x = sizeof(exportBuf);
+ ret = wc_ecc_export_x963_ex(&userA, exportBuf, &x, 1);
+ if (ret != 0)
+ goto done;
+ wc_ecc_free(&pubKey);
+ ret = wc_ecc_init_ex(&pubKey, HEAP_HINT, devId);
+ if (ret != 0)
+ goto done;
+ #ifdef WOLFSSL_CUSTOM_CURVES
+ if (dp != NULL) {
+ ret = wc_ecc_set_custom_curve(&pubKey, dp);
+ if (ret != 0) goto done;
+ }
+ #endif
+ ret = wc_ecc_import_x963_ex(exportBuf, x, &pubKey, curve_id);
+ if (ret != 0)
+ goto done;
+
+ #ifdef HAVE_ECC_DHE
+ y = ECC_SHARED_SIZE;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &userB.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret == 0)
+ ret = wc_ecc_shared_secret(&userB, &pubKey, sharedB, &y);
+ } while (ret == WC_PENDING_E);
+ if (ret != 0)
+ goto done;
+
+ if (XMEMCMP(sharedA, sharedB, y))
+ ERROR_OUT(-9648, done);
+ TEST_SLEEP();
+ #endif /* HAVE_ECC_DHE */
+ #endif /* HAVE_COMP_KEY */
+
+#endif /* HAVE_ECC_KEY_IMPORT */
+#endif /* HAVE_ECC_KEY_EXPORT */
+
+#ifdef HAVE_ECC_SIGN
+ /* ECC w/out Shamir has issue with all 0 digest */
+ /* WC_BIGINT doesn't have 0 len well on hardware */
+#if defined(ECC_SHAMIR) && !defined(WOLFSSL_ASYNC_CRYPT)
+ /* test DSA sign hash with zeros */
+ for (i = 0; i < (int)ECC_DIGEST_SIZE; i++) {
+ digest[i] = 0;
+ }
+
+ x = ECC_SIG_SIZE;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret == 0)
+ ret = wc_ecc_sign_hash(digest, ECC_DIGEST_SIZE, sig, &x, rng,
+ &userA);
+ } while (ret == WC_PENDING_E);
if (ret != 0)
- return -1008;
+ goto done;
+ TEST_SLEEP();
+
+#ifdef HAVE_ECC_VERIFY
+ for (i=0; i<testVerifyCount; i++) {
+ verify = 0;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret == 0)
+ ret = wc_ecc_verify_hash(sig, x, digest, ECC_DIGEST_SIZE,
+ &verify, &userA);
+ } while (ret == WC_PENDING_E);
+ if (ret != 0)
+ goto done;
+ if (verify != 1)
+ ERROR_OUT(-9649, done);
+ TEST_SLEEP();
+ }
+#endif /* HAVE_ECC_VERIFY */
+#endif /* ECC_SHAMIR && !WOLFSSL_ASYNC_CRYPT */
+
+ /* test DSA sign hash with sequence (0,1,2,3,4,...) */
+ for (i = 0; i < (int)ECC_DIGEST_SIZE; i++) {
+ digest[i] = (byte)i;
+ }
- if (memcmp(sharedA, sharedB, y))
- return -1009;
+ x = ECC_SIG_SIZE;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret == 0)
+ ret = wc_ecc_sign_hash(digest, ECC_DIGEST_SIZE, sig, &x, rng,
+ &userA);
+ } while (ret == WC_PENDING_E);
+ if (ret != 0)
+ ERROR_OUT(-9650, done);
+ TEST_SLEEP();
+
+#ifdef HAVE_ECC_VERIFY
+ for (i=0; i<testVerifyCount; i++) {
+ verify = 0;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret == 0)
+ ret = wc_ecc_verify_hash(sig, x, digest, ECC_DIGEST_SIZE,
+ &verify, &userA);
+ } while (ret == WC_PENDING_E);
+ if (ret != 0)
+ goto done;
+ if (verify != 1)
+ ERROR_OUT(-9651, done);
+ TEST_SLEEP();
+ }
+#endif /* HAVE_ECC_VERIFY */
+#endif /* HAVE_ECC_SIGN */
-#ifdef HAVE_COMP_KEY
- /* try compressed export / import too */
+#ifdef HAVE_ECC_KEY_EXPORT
x = sizeof(exportBuf);
- ret = wc_ecc_export_x963_ex(&userA, exportBuf, &x, 1);
+ ret = wc_ecc_export_private_only(&userA, exportBuf, &x);
if (ret != 0)
- return -1010;
+ goto done;
+#endif /* HAVE_ECC_KEY_EXPORT */
+done:
wc_ecc_free(&pubKey);
- wc_ecc_init(&pubKey);
- ret = wc_ecc_import_x963(exportBuf, x, &pubKey);
+ wc_ecc_free(&userB);
+ wc_ecc_free(&userA);
- if (ret != 0)
- return -1011;
+#if defined(HAVE_ECC_DHE) || defined(HAVE_ECC_CDH)
+ FREE_VAR(sharedA, HEAP_HINT);
+ FREE_VAR(sharedB, HEAP_HINT);
+#endif
+#ifdef HAVE_ECC_SIGN
+ FREE_VAR(sig, HEAP_HINT);
+ FREE_VAR(digest, HEAP_HINT);
#endif
- y = sizeof(sharedB);
- ret = wc_ecc_shared_secret(&userB, &pubKey, sharedB, &y);
+ return ret;
+}
- if (ret != 0)
- return -1012;
+#undef ECC_TEST_VERIFY_COUNT
+#define ECC_TEST_VERIFY_COUNT 2
+static int ecc_test_curve(WC_RNG* rng, int keySize)
+{
+ int ret;
- if (memcmp(sharedA, sharedB, y))
- return -1013;
+ ret = ecc_test_curve_size(rng, keySize, ECC_TEST_VERIFY_COUNT,
+ ECC_CURVE_DEF, NULL);
+ if (ret < 0) {
+ if (ret == ECC_CURVE_OID_E) {
+ /* ignore error for curves not found */
+ /* some curve sizes are only available with:
+ HAVE_ECC_SECPR2, HAVE_ECC_SECPR3, HAVE_ECC_BRAINPOOL
+ and HAVE_ECC_KOBLITZ */
+ }
+ else {
+ printf("ecc_test_curve_size %d failed!: %d\n", keySize, ret);
+ return ret;
+ }
+ }
- /* test DSA sign hash */
- for (i = 0; i < (int)sizeof(digest); i++)
- digest[i] = (byte)i;
+#ifdef HAVE_ECC_VECTOR_TEST
+ ret = ecc_test_vector(keySize);
+ if (ret < 0) {
+ printf("ecc_test_vector %d failed!: %d\n", keySize, ret);
+ return ret;
+ }
+#endif
- x = sizeof(sig);
- ret = wc_ecc_sign_hash(digest, sizeof(digest), sig, &x, &rng, &userA);
+#ifdef WOLFSSL_KEY_GEN
+ ret = ecc_test_key_gen(rng, keySize);
+ if (ret < 0) {
+ if (ret == ECC_CURVE_OID_E) {
+ /* ignore error for curves not found */
+ }
+ else {
+ printf("ecc_test_key_gen %d failed!: %d\n", keySize, ret);
+ return ret;
+ }
+ }
+#endif
+
+ return 0;
+}
+
+#if !defined(NO_ECC256) || defined(HAVE_ALL_CURVES)
+#if !defined(WOLFSSL_ATECC508A) && defined(HAVE_ECC_KEY_IMPORT) && \
+ defined(HAVE_ECC_KEY_EXPORT)
+static int ecc_point_test(void)
+{
+ int ret;
+ ecc_point* point;
+ ecc_point* point2;
+#ifdef HAVE_COMP_KEY
+ ecc_point* point3;
+ ecc_point* point4;
+#endif
+ word32 outLen;
+ byte out[65];
+ byte der[] = { 0x04, /* = Uncompressed */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
+#ifdef HAVE_COMP_KEY
+ byte derComp0[] = { 0x02, /* = Compressed, y even */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
+ byte derComp1[] = { 0x03, /* = Compressed, y odd */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
+#endif
+ byte altDer[] = { 0x04, /* = Uncompressed */
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
+ int curve_idx = wc_ecc_get_curve_idx(ECC_SECP256R1);
+
+ /* if curve P256 is not enabled then test should not fail */
+ if (curve_idx == ECC_CURVE_INVALID)
+ return 0;
+
+ outLen = sizeof(out);
+ point = wc_ecc_new_point();
+ if (point == NULL)
+ return -9700;
+ point2 = wc_ecc_new_point();
+ if (point2 == NULL) {
+ wc_ecc_del_point(point);
+ return -9701;
+ }
+#ifdef HAVE_COMP_KEY
+ point3 = wc_ecc_new_point();
+ if (point3 == NULL) {
+ wc_ecc_del_point(point2);
+ wc_ecc_del_point(point);
+ return -9702;
+ }
+ point4 = wc_ecc_new_point();
+ if (point4 == NULL) {
+ wc_ecc_del_point(point3);
+ wc_ecc_del_point(point2);
+ wc_ecc_del_point(point);
+ return -9703;
+ }
+#endif
+
+ /* Parameter Validation testing. */
+ wc_ecc_del_point(NULL);
+ ret = wc_ecc_import_point_der(NULL, sizeof(der), curve_idx, point);
+ if (ret != ECC_BAD_ARG_E) {
+ ret = -9704;
+ goto done;
+ }
+ ret = wc_ecc_import_point_der(der, sizeof(der), ECC_CURVE_INVALID, point);
+ if (ret != ECC_BAD_ARG_E) {
+ ret = -9705;
+ goto done;
+ }
+ ret = wc_ecc_import_point_der(der, sizeof(der), curve_idx, NULL);
+ if (ret != ECC_BAD_ARG_E) {
+ ret = -9706;
+ goto done;
+ }
+ ret = wc_ecc_export_point_der(-1, point, out, &outLen);
+ if (ret != ECC_BAD_ARG_E) {
+ ret = -9707;
+ goto done;
+ }
+ ret = wc_ecc_export_point_der(curve_idx, NULL, out, &outLen);
+ if (ret != ECC_BAD_ARG_E) {
+ ret = -9708;
+ goto done;
+ }
+ ret = wc_ecc_export_point_der(curve_idx, point, NULL, &outLen);
+ if (ret != LENGTH_ONLY_E || outLen != sizeof(out)) {
+ ret = -9709;
+ goto done;
+ }
+ ret = wc_ecc_export_point_der(curve_idx, point, out, NULL);
+ if (ret != ECC_BAD_ARG_E) {
+ ret = -9710;
+ goto done;
+ }
+ outLen = 0;
+ ret = wc_ecc_export_point_der(curve_idx, point, out, &outLen);
+ if (ret != BUFFER_E) {
+ ret = -9711;
+ goto done;
+ }
+ ret = wc_ecc_copy_point(NULL, NULL);
+ if (ret != ECC_BAD_ARG_E) {
+ ret = -9712;
+ goto done;
+ }
+ ret = wc_ecc_copy_point(NULL, point2);
+ if (ret != ECC_BAD_ARG_E) {
+ ret = -9713;
+ goto done;
+ }
+ ret = wc_ecc_copy_point(point, NULL);
+ if (ret != ECC_BAD_ARG_E) {
+ ret = -9714;
+ goto done;
+ }
+ ret = wc_ecc_cmp_point(NULL, NULL);
+ if (ret != BAD_FUNC_ARG) {
+ ret = -9715;
+ goto done;
+ }
+ ret = wc_ecc_cmp_point(NULL, point2);
+ if (ret != BAD_FUNC_ARG) {
+ ret = -9716;
+ goto done;
+ }
+ ret = wc_ecc_cmp_point(point, NULL);
+ if (ret != BAD_FUNC_ARG) {
+ ret = -9717;
+ goto done;
+ }
+ /* Use API. */
+ ret = wc_ecc_import_point_der(der, sizeof(der), curve_idx, point);
+ if (ret != 0) {
+ ret = -9718;
+ goto done;
+ }
+
+ outLen = sizeof(out);
+ ret = wc_ecc_export_point_der(curve_idx, point, out, &outLen);
+ if (ret != 0) {
+ ret = -9719;
+ goto done;
+ }
+ if (outLen != sizeof(der)) {
+ ret = -9720;
+ goto done;
+ }
+ if (XMEMCMP(out, der, outLen) != 0) {
+ ret = -9721;
+ goto done;
+ }
+
+ ret = wc_ecc_copy_point(point2, point);
+ if (ret != MP_OKAY) {
+ ret = -9722;
+ goto done;
+ }
+ ret = wc_ecc_cmp_point(point2, point);
+ if (ret != MP_EQ) {
+ ret = -9723;
+ goto done;
+ }
+
+ ret = wc_ecc_import_point_der(altDer, sizeof(altDer), curve_idx, point2);
+ if (ret != 0) {
+ ret = -9724;
+ goto done;
+ }
+ ret = wc_ecc_cmp_point(point2, point);
+ if (ret != MP_GT) {
+ ret = -9725;
+ goto done;
+ }
+
+#ifdef HAVE_COMP_KEY
+ ret = wc_ecc_import_point_der(derComp0, sizeof(derComp0)*2-1, curve_idx, point3);
+ if (ret != 0) {
+ ret = -9726;
+ goto done;
+ }
+
+ ret = wc_ecc_import_point_der_ex(derComp0, sizeof(derComp0), curve_idx, point4, 0);
+ if (ret != 0) {
+ ret = -9727;
+ goto done;
+ }
+
+ ret = wc_ecc_cmp_point(point3, point4);
+ if (ret != MP_EQ) {
+ ret = -9728;
+ goto done;
+ }
+
+ ret = wc_ecc_import_point_der(derComp1, sizeof(derComp1)*2-1, curve_idx, point3);
+ if (ret != 0) {
+ ret = -9729;
+ goto done;
+ }
+
+ ret = wc_ecc_import_point_der_ex(derComp1, sizeof(derComp1), curve_idx, point4, 0);
+ if (ret != 0) {
+ ret = -9730;
+ goto done;
+ }
+
+ ret = wc_ecc_cmp_point(point3, point4);
+ if (ret != MP_EQ) {
+ ret = -9731;
+ goto done;
+ }
+#endif
+
+done:
+#ifdef HAVE_COMP_KEY
+ wc_ecc_del_point(point4);
+ wc_ecc_del_point(point3);
+#endif
+ wc_ecc_del_point(point2);
+ wc_ecc_del_point(point);
+
+ return ret;
+}
+#endif /* !WOLFSSL_ATECC508A && HAVE_ECC_KEY_IMPORT && HAVE_ECC_KEY_EXPORT */
+
+#ifndef NO_SIG_WRAPPER
+static int ecc_sig_test(WC_RNG* rng, ecc_key* key)
+{
+ int ret;
+ word32 sigSz;
+ int size;
+ byte out[ECC_MAX_SIG_SIZE];
+ byte in[] = "Everyone gets Friday off.";
+ const byte hash[] = {
+ 0xf2, 0x02, 0x95, 0x65, 0xcb, 0xf6, 0x2a, 0x59,
+ 0x39, 0x2c, 0x05, 0xff, 0x0e, 0x29, 0xaf, 0xfe,
+ 0x47, 0x33, 0x8c, 0x99, 0x8d, 0x58, 0x64, 0x83,
+ 0xa6, 0x58, 0x0a, 0x33, 0x0b, 0x84, 0x5f, 0x5f
+ };
+ word32 inLen = (word32)XSTRLEN((char*)in);
+
+ size = wc_ecc_sig_size(key);
+
+ ret = wc_SignatureGetSize(WC_SIGNATURE_TYPE_ECC, key, sizeof(*key));
+ if (ret != size)
+ return -9728;
+
+ sigSz = (word32)ret;
+ ret = wc_SignatureGenerate(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_ECC, in,
+ inLen, out, &sigSz, key, sizeof(*key), rng);
if (ret != 0)
- return -1014;
+ return -9729;
+ TEST_SLEEP();
- verify = 0;
- ret = wc_ecc_verify_hash(sig, x, digest, sizeof(digest), &verify, &userA);
+ ret = wc_SignatureVerify(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_ECC, in,
+ inLen, out, sigSz, key, sizeof(*key));
+ if (ret != 0)
+ return -9730;
+ TEST_SLEEP();
+ sigSz = (word32)sizeof(out);
+ ret = wc_SignatureGenerateHash(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_ECC,
+ hash, (int)sizeof(hash), out, &sigSz, key, sizeof(*key), rng);
if (ret != 0)
- return -1015;
+ return -9731;
+ TEST_SLEEP();
- if (verify != 1)
- return -1016;
+ ret = wc_SignatureVerifyHash(WC_HASH_TYPE_SHA256, WC_SIGNATURE_TYPE_ECC,
+ hash, (int)sizeof(hash), out, sigSz, key, sizeof(*key));
+ if (ret != 0)
+ return -9732;
+ TEST_SLEEP();
- x = sizeof(exportBuf);
- ret = wc_ecc_export_private_only(&userA, exportBuf, &x);
+ return 0;
+}
+#endif
+
+#if defined(HAVE_ECC_KEY_IMPORT) && defined(HAVE_ECC_KEY_EXPORT)
+static int ecc_exp_imp_test(ecc_key* key)
+{
+ int ret;
+ int curve_id;
+ ecc_key keyImp;
+ byte priv[32];
+ word32 privLen;
+ byte pub[65];
+ word32 pubLen, pubLenX, pubLenY;
+ const char qx[] = "7a4e287890a1a47ad3457e52f2f76a83"
+ "ce46cbc947616d0cbaa82323818a793d";
+ const char qy[] = "eec4084f5b29ebf29c44cce3b3059610"
+ "922f8b30ea6e8811742ac7238fe87308";
+ const char d[] = "8c14b793cb19137e323a6d2e2a870bca"
+ "2e7a493ec1153b3a95feb8a4873f8d08";
+
+ wc_ecc_init_ex(&keyImp, HEAP_HINT, devId);
+
+ privLen = sizeof(priv);
+ ret = wc_ecc_export_private_only(key, priv, &privLen);
+ if (ret != 0) {
+ ret = -9733;
+ goto done;
+ }
+ pubLen = sizeof(pub);
+ ret = wc_ecc_export_point_der(key->idx, &key->pubkey, pub, &pubLen);
+ if (ret != 0) {
+ ret = -9734;
+ goto done;
+ }
+
+ ret = wc_ecc_import_private_key(priv, privLen, pub, pubLen, &keyImp);
+ if (ret != 0) {
+ ret = -9735;
+ goto done;
+ }
+
+ wc_ecc_free(&keyImp);
+ wc_ecc_init_ex(&keyImp, HEAP_HINT, devId);
+
+ ret = wc_ecc_import_raw_ex(&keyImp, qx, qy, d, ECC_SECP256R1);
+ if (ret != 0) {
+ ret = -9736;
+ goto done;
+ }
+
+ wc_ecc_free(&keyImp);
+ wc_ecc_init_ex(&keyImp, HEAP_HINT, devId);
+
+ curve_id = wc_ecc_get_curve_id(key->idx);
+ if (curve_id < 0) {
+ ret = -9737;
+ goto done;
+ }
+
+ /* test import private only */
+ ret = wc_ecc_import_private_key_ex(priv, privLen, NULL, 0, &keyImp,
+ curve_id);
+ if (ret != 0) {
+ ret = -9738;
+ goto done;
+ }
+
+ wc_ecc_free(&keyImp);
+ wc_ecc_init_ex(&keyImp, HEAP_HINT, devId);
+
+ /* test export public raw */
+ pubLenX = pubLenY = 32;
+ ret = wc_ecc_export_public_raw(key, pub, &pubLenX, &pub[32], &pubLenY);
+ if (ret != 0) {
+ ret = -9739;
+ goto done;
+ }
+
+#ifndef HAVE_SELFTEST
+ /* test import of public */
+ ret = wc_ecc_import_unsigned(&keyImp, pub, &pub[32], NULL, ECC_SECP256R1);
+ if (ret != 0) {
+ ret = -9740;
+ goto done;
+ }
+#endif
+
+ wc_ecc_free(&keyImp);
+ wc_ecc_init_ex(&keyImp, HEAP_HINT, devId);
+
+ /* test export private and public raw */
+ pubLenX = pubLenY = privLen = 32;
+ ret = wc_ecc_export_private_raw(key, pub, &pubLenX, &pub[32], &pubLenY,
+ priv, &privLen);
+ if (ret != 0) {
+ ret = -9741;
+ goto done;
+ }
+
+#ifndef HAVE_SELFTEST
+ /* test import of private and public */
+ ret = wc_ecc_import_unsigned(&keyImp, pub, &pub[32], priv, ECC_SECP256R1);
+ if (ret != 0) {
+ ret = -9742;
+ goto done;
+ }
+#endif
+
+done:
+ wc_ecc_free(&keyImp);
+ return ret;
+}
+#endif /* HAVE_ECC_KEY_IMPORT && HAVE_ECC_KEY_EXPORT */
+
+#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL)
+#if defined(HAVE_ECC_KEY_IMPORT) && !defined(WOLFSSL_VALIDATE_ECC_IMPORT)
+static int ecc_mulmod_test(ecc_key* key1)
+{
+ int ret;
+ ecc_key key2;
+ ecc_key key3;
+
+ wc_ecc_init_ex(&key2, HEAP_HINT, devId);
+ wc_ecc_init_ex(&key3, HEAP_HINT, devId);
+
+ /* TODO: Use test data, test with WOLFSSL_VALIDATE_ECC_IMPORT. */
+ /* Need base point (Gx,Gy) and parameter A - load them as the public and
+ * private key in key2.
+ */
+ ret = wc_ecc_import_raw_ex(&key2, key1->dp->Gx, key1->dp->Gy, key1->dp->Af,
+ ECC_SECP256R1);
if (ret != 0)
- return -1017;
+ goto done;
-#if !defined(NO_SHA) && \
- ((defined(HAVE_ECC192) && defined(HAVE_ECC224)) || defined(HAVE_ALL_CURVES))
- {
- /* test raw ECC key import */
- Sha sha;
- byte hash[SHA_DIGEST_SIZE];
- rawEccVector a, b;
- rawEccVector test_ecc[2];
- int times = sizeof(test_ecc) / sizeof(rawEccVector);
+ /* Need a point (Gx,Gy) and prime - load them as the public and private key
+ * in key3.
+ */
+ ret = wc_ecc_import_raw_ex(&key3, key1->dp->Gx, key1->dp->Gy,
+ key1->dp->prime, ECC_SECP256R1);
+ if (ret != 0)
+ goto done;
- /* first [P-192,SHA-1] vector from FIPS 186-3 NIST vectors */
- a.msg = "\xeb\xf7\x48\xd7\x48\xeb\xbc\xa7\xd2\x9f\xb4\x73\x69\x8a"
- "\x6e\x6b\x4f\xb1\x0c\x86\x5d\x4a\xf0\x24\xcc\x39\xae\x3d"
- "\xf3\x46\x4b\xa4\xf1\xd6\xd4\x0f\x32\xbf\x96\x18\xa9\x1b"
- "\xb5\x98\x6f\xa1\xa2\xaf\x04\x8a\x0e\x14\xdc\x51\xe5\x26"
- "\x7e\xb0\x5e\x12\x7d\x68\x9d\x0a\xc6\xf1\xa7\xf1\x56\xce"
- "\x06\x63\x16\xb9\x71\xcc\x7a\x11\xd0\xfd\x7a\x20\x93\xe2"
- "\x7c\xf2\xd0\x87\x27\xa4\xe6\x74\x8c\xc3\x2f\xd5\x9c\x78"
- "\x10\xc5\xb9\x01\x9d\xf2\x1c\xdc\xc0\xbc\xa4\x32\xc0\xa3"
- "\xee\xd0\x78\x53\x87\x50\x88\x77\x11\x43\x59\xce\xe4\xa0"
- "\x71\xcf";
- a.msgLen = 128;
- a.Qx = "07008ea40b08dbe76432096e80a2494c94982d2d5bcf98e6";
- a.Qy = "76fab681d00b414ea636ba215de26d98c41bd7f2e4d65477";
- a.d = "e14f37b3d1374ff8b03f41b9b3fdd2f0ebccf275d660d7f3";
- a.R = "6994d962bdd0d793ffddf855ec5bf2f91a9698b46258a63e";
- a.S = "02ba6465a234903744ab02bc8521405b73cf5fc00e1a9f41";
- a.curveName = "ECC-192";
+ ret = wc_ecc_mulmod(&key1->k, &key2.pubkey, &key3.pubkey, &key2.k, &key3.k,
+ 1);
+ if (ret != 0) {
+ ret = -9743;
+ goto done;
+ }
- /* first [P-224,SHA-1] vector from FIPS 186-3 NIST vectors */
- b.msg = "\x36\xc8\xb2\x29\x86\x48\x7f\x67\x7c\x18\xd0\x97\x2a\x9e"
- "\x20\x47\xb3\xaf\xa5\x9e\xc1\x62\x76\x4e\xc3\x0b\x5b\x69"
- "\xe0\x63\x0f\x99\x0d\x4e\x05\xc2\x73\xb0\xe5\xa9\xd4\x28"
- "\x27\xb6\x95\xfc\x2d\x64\xd9\x13\x8b\x1c\xf4\xc1\x21\x55"
- "\x89\x4c\x42\x13\x21\xa7\xbb\x97\x0b\xdc\xe0\xfb\xf0\xd2"
- "\xae\x85\x61\xaa\xd8\x71\x7f\x2e\x46\xdf\xe3\xff\x8d\xea"
- "\xb4\xd7\x93\x23\x56\x03\x2c\x15\x13\x0d\x59\x9e\x26\xc1"
- "\x0f\x2f\xec\x96\x30\x31\xac\x69\x38\xa1\x8d\x66\x45\x38"
- "\xb9\x4d\xac\x55\x34\xef\x7b\x59\x94\x24\xd6\x9b\xe1\xf7"
- "\x1c\x20";
- b.msgLen = 128;
- b.Qx = "8a4dca35136c4b70e588e23554637ae251077d1365a6ba5db9585de7";
- b.Qy = "ad3dee06de0be8279d4af435d7245f14f3b4f82eb578e519ee0057b1";
- b.d = "97c4b796e1639dd1035b708fc00dc7ba1682cec44a1002a1a820619f";
- b.R = "147b33758321e722a0360a4719738af848449e2c1d08defebc1671a7";
- b.S = "24fc7ed7f1352ca3872aa0916191289e2e04d454935d50fe6af3ad5b";
- b.curveName = "ECC-224";
-
- test_ecc[0] = a;
- test_ecc[1] = b;
-
- for (i = 0; i < times; i++) {
-
- wc_ecc_free(&userA);
- wc_ecc_init(&userA);
-
- memset(sig, 0, sizeof(sig));
- x = sizeof(sig);
-
- /* calculate SHA-1 hash of message */
- ret = wc_InitSha(&sha);
- if (ret != 0)
- return -1015 - i;
+done:
+ wc_ecc_free(&key3);
+ wc_ecc_free(&key2);
+ return ret;
+}
+#endif
- wc_ShaUpdate(&sha, (byte*)test_ecc[i].msg, (word32)test_ecc[i].msgLen);
- wc_ShaFinal(&sha, hash);
+#ifdef HAVE_ECC_DHE
+static int ecc_ssh_test(ecc_key* key)
+{
+ int ret;
+ byte out[128];
+ word32 outLen = sizeof(out);
+
+ /* Parameter Validation testing. */
+ ret = wc_ecc_shared_secret_ssh(NULL, &key->pubkey, out, &outLen);
+ if (ret != BAD_FUNC_ARG)
+ return -9744;
+ ret = wc_ecc_shared_secret_ssh(key, NULL, out, &outLen);
+ if (ret != BAD_FUNC_ARG)
+ return -9745;
+ ret = wc_ecc_shared_secret_ssh(key, &key->pubkey, NULL, &outLen);
+ if (ret != BAD_FUNC_ARG)
+ return -9746;
+ ret = wc_ecc_shared_secret_ssh(key, &key->pubkey, out, NULL);
+ if (ret != BAD_FUNC_ARG)
+ return -9747;
+
+ /* Use API. */
+ ret = 0;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret == 0)
+ ret = wc_ecc_shared_secret_ssh(key, &key->pubkey, out, &outLen);
+ } while (ret == WC_PENDING_E);
+ if (ret != 0)
+ return -9748;
+ TEST_SLEEP();
+ return 0;
+}
+#endif /* HAVE_ECC_DHE */
+#endif
- ret = wc_ecc_import_raw(&userA, test_ecc[i].Qx, test_ecc[i].Qy,
- test_ecc[i].d, test_ecc[i].curveName);
- if (ret != 0)
- return -1017 - i;
+static int ecc_def_curve_test(WC_RNG *rng)
+{
+ int ret;
+ ecc_key key;
- ret = wc_ecc_rs_to_sig(test_ecc[i].R, test_ecc[i].S, sig, &x);
- if (ret != 0)
- return -1019 - i;
+ wc_ecc_init_ex(&key, HEAP_HINT, devId);
- ret = wc_ecc_verify_hash(sig, x, hash, sizeof(hash), &verify, &userA);
- if (ret != 0)
- return -1021 - i;
+ /* Use API */
+ ret = wc_ecc_set_flags(NULL, 0);
+ if (ret != BAD_FUNC_ARG) {
+ ret = -9749;
+ goto done;
+ }
+ ret = wc_ecc_set_flags(&key, 0);
+ if (ret != 0) {
+ ret = -9750;
+ goto done;
+ }
- if (verify != 1)
- return -1023 - i;
- }
+ ret = wc_ecc_make_key(rng, 32, &key);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0) {
+ ret = -9751;
+ goto done;
+ }
+ TEST_SLEEP();
+
+#ifndef NO_SIG_WRAPPER
+ ret = ecc_sig_test(rng, &key);
+ if (ret < 0)
+ goto done;
+#endif
+#if defined(HAVE_ECC_KEY_IMPORT) && defined(HAVE_ECC_KEY_EXPORT)
+ ret = ecc_exp_imp_test(&key);
+ if (ret < 0)
+ goto done;
+#endif
+#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_CRYPTOCELL)
+#if defined(HAVE_ECC_KEY_IMPORT) && !defined(WOLFSSL_VALIDATE_ECC_IMPORT)
+ ret = ecc_mulmod_test(&key);
+ if (ret < 0)
+ goto done;
+#endif
+#ifdef HAVE_ECC_DHE
+ ret = ecc_ssh_test(&key);
+ if (ret < 0)
+ goto done;
+#endif
+#endif /* WOLFSSL_ATECC508A */
+done:
+ wc_ecc_free(&key);
+ return ret;
+}
+#endif /* !NO_ECC256 || HAVE_ALL_CURVES */
+
+#ifdef WOLFSSL_CERT_EXT
+static int ecc_decode_test(void)
+{
+ int ret;
+ word32 inSz;
+ word32 inOutIdx;
+ ecc_key key;
+
+ /* SECP256R1 OID: 0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x03, 0x01, 0x07 */
+
+ /* This is ecc_clikeypub_der_256. */
+ static const byte good[] = {
+ 0x30, 0x59, 0x30, 0x13, 0x06, 0x07, 0x2a, 0x86, 0x48, 0xce,
+ 0x3d, 0x02, 0x01, 0x06, 0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d,
+ 0x03, 0x01, 0x07, 0x03, 0x42, 0x00, 0x04, 0x55, 0xbf, 0xf4,
+ 0x0f, 0x44, 0x50, 0x9a, 0x3d, 0xce, 0x9b, 0xb7, 0xf0, 0xc5,
+ 0x4d, 0xf5, 0x70, 0x7b, 0xd4, 0xec, 0x24, 0x8e, 0x19, 0x80,
+ 0xec, 0x5a, 0x4c, 0xa2, 0x24, 0x03, 0x62, 0x2c, 0x9b, 0xda,
+ 0xef, 0xa2, 0x35, 0x12, 0x43, 0x84, 0x76, 0x16, 0xc6, 0x56,
+ 0x95, 0x06, 0xcc, 0x01, 0xa9, 0xbd, 0xf6, 0x75, 0x1a, 0x42,
+ 0xf7, 0xbd, 0xa9, 0xb2, 0x36, 0x22, 0x5f, 0xc7, 0x5d, 0x7f,
+ 0xb4 };
+ static const byte badNoObjId[] = { 0x30, 0x08, 0x30, 0x06, 0x03, 0x04,
+ 0x00, 0x04, 0x01, 0x01 };
+ static const byte badOneObjId[] = { 0x30, 0x0a, 0x30, 0x08, 0x06, 0x00,
+ 0x03, 0x04, 0x00, 0x04, 0x01, 0x01 };
+ static const byte badObjId1Len[] = { 0x30, 0x0c, 0x30, 0x0a, 0x06, 0x09,
+ 0x06, 0x00, 0x03, 0x04, 0x00, 0x04, 0x01, 0x01 };
+ static const byte badObj2d1Len[] = { 0x30, 0x0c, 0x30, 0x0a, 0x06, 0x00,
+ 0x06, 0x07, 0x03, 0x04, 0x00, 0x04, 0x01, 0x01 };
+ static const byte badNotBitStr[] = { 0x30, 0x14, 0x30, 0x0b, 0x06, 0x00,
+ 0x06, 0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x03, 0x01, 0x07,
+ 0x04, 0x04, 0x00, 0x04, 0x01, 0x01 };
+ static const byte badBitStrLen[] = { 0x30, 0x14, 0x30, 0x0b, 0x06, 0x00,
+ 0x06, 0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x03, 0x01, 0x07,
+ 0x03, 0x05, 0x00, 0x04, 0x01, 0x01 };
+ static const byte badNoBitStrZero[] = { 0x30, 0x13, 0x30, 0x0a, 0x06, 0x00,
+ 0x06, 0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x03, 0x01, 0x07,
+ 0x03, 0x03, 0x04, 0x01, 0x01 };
+ static const byte badPoint[] = { 0x30, 0x12, 0x30, 0x09, 0x06, 0x00,
+ 0x06, 0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x03, 0x01, 0x07,
+ 0x03, 0x03, 0x00, 0x04, 0x01 };
+
+ XMEMSET(&key, 0, sizeof(key));
+ wc_ecc_init_ex(&key, HEAP_HINT, devId);
+
+ inSz = sizeof(good);
+ ret = wc_EccPublicKeyDecode(NULL, &inOutIdx, &key, inSz);
+ if (ret != BAD_FUNC_ARG) {
+ ret = -9800;
+ goto done;
+ }
+ ret = wc_EccPublicKeyDecode(good, NULL, &key, inSz);
+ if (ret != BAD_FUNC_ARG) {
+ ret = -9801;
+ goto done;
+ }
+ ret = wc_EccPublicKeyDecode(good, &inOutIdx, NULL, inSz);
+ if (ret != BAD_FUNC_ARG) {
+ ret = -9802;
+ goto done;
+ }
+ ret = wc_EccPublicKeyDecode(good, &inOutIdx, &key, 0);
+ if (ret != BAD_FUNC_ARG) {
+ ret = -9803;
+ goto done;
}
-#endif /* defined(HAVE_ECC192) && defined(HAVE_ECC256) */
+ /* Change offset to produce bad input data. */
+ inOutIdx = 2;
+ inSz = sizeof(good) - inOutIdx;
+ ret = wc_EccPublicKeyDecode(good, &inOutIdx, &key, inSz);
+ if (ret != ASN_PARSE_E) {
+ ret = -9804;
+ goto done;
+ }
+ inOutIdx = 4;
+ inSz = sizeof(good) - inOutIdx;
+ ret = wc_EccPublicKeyDecode(good, &inOutIdx, &key, inSz);
+ if (ret != ASN_PARSE_E) {
+ ret = -9805;
+ goto done;
+ }
+ /* Bad data. */
+ inSz = sizeof(badNoObjId);
+ inOutIdx = 0;
+ ret = wc_EccPublicKeyDecode(badNoObjId, &inOutIdx, &key, inSz);
+ if (ret != ASN_OBJECT_ID_E) {
+ ret = -9806;
+ goto done;
+ }
+ inSz = sizeof(badOneObjId);
+ inOutIdx = 0;
+ ret = wc_EccPublicKeyDecode(badOneObjId, &inOutIdx, &key, inSz);
+ if (ret != ASN_OBJECT_ID_E) {
+ ret = -9807;
+ goto done;
+ }
+ inSz = sizeof(badObjId1Len);
+ inOutIdx = 0;
+ ret = wc_EccPublicKeyDecode(badObjId1Len, &inOutIdx, &key, inSz);
+ if (ret != ASN_PARSE_E) {
+ ret = -9808;
+ goto done;
+ }
+ inSz = sizeof(badObj2d1Len);
+ inOutIdx = 0;
+ ret = wc_EccPublicKeyDecode(badObj2d1Len, &inOutIdx, &key, inSz);
+ if (ret != ASN_PARSE_E) {
+ ret = -9809;
+ goto done;
+ }
+ inSz = sizeof(badNotBitStr);
+ inOutIdx = 0;
+ ret = wc_EccPublicKeyDecode(badNotBitStr, &inOutIdx, &key, inSz);
+ if (ret != ASN_BITSTR_E) {
+ ret = -9810;
+ goto done;
+ }
+ inSz = sizeof(badBitStrLen);
+ inOutIdx = 0;
+ ret = wc_EccPublicKeyDecode(badBitStrLen, &inOutIdx, &key, inSz);
+ if (ret != ASN_PARSE_E) {
+ ret = -9811;
+ goto done;
+ }
+ inSz = sizeof(badNoBitStrZero);
+ inOutIdx = 0;
+ ret = wc_EccPublicKeyDecode(badNoBitStrZero, &inOutIdx, &key, inSz);
+ if (ret != ASN_EXPECT_0_E) {
+ ret = -9812;
+ goto done;
+ }
+ inSz = sizeof(badPoint);
+ inOutIdx = 0;
+ ret = wc_EccPublicKeyDecode(badPoint, &inOutIdx, &key, inSz);
+ if (ret != ASN_ECC_KEY_E) {
+ ret = -9813;
+ goto done;
+ }
-#ifdef WOLFSSL_KEY_GEN
- {
- int derSz, pemSz;
- byte der[FOURK_BUF];
- byte pem[FOURK_BUF];
- FILE* keyFile;
- FILE* pemFile;
+ inSz = sizeof(good);
+ inOutIdx = 0;
+ ret = wc_EccPublicKeyDecode(good, &inOutIdx, &key, inSz);
+ if (ret != 0) {
+ ret = -9814;
+ goto done;
+ }
- derSz = wc_EccKeyToDer(&userB, der, FOURK_BUF);
- if (derSz < 0) {
- return -1024;
- }
+done:
+ wc_ecc_free(&key);
+ return ret;
+}
+#endif /* WOLFSSL_CERT_EXT */
+
+#ifdef WOLFSSL_CUSTOM_CURVES
+static const byte eccKeyExplicitCurve[] = {
+ 0x30, 0x81, 0xf5, 0x30, 0x81, 0xae, 0x06, 0x07,
+ 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x02, 0x01, 0x30,
+ 0x81, 0xa2, 0x02, 0x01, 0x01, 0x30, 0x2c, 0x06,
+ 0x07, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x01, 0x01,
+ 0x02, 0x21, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff,
+ 0xff, 0xfc, 0x2f, 0x30, 0x06, 0x04, 0x01, 0x00,
+ 0x04, 0x01, 0x07, 0x04, 0x41, 0x04, 0x79, 0xbe,
+ 0x66, 0x7e, 0xf9, 0xdc, 0xbb, 0xac, 0x55, 0xa0,
+ 0x62, 0x95, 0xce, 0x87, 0x0b, 0x07, 0x02, 0x9b,
+ 0xfc, 0xdb, 0x2d, 0xce, 0x28, 0xd9, 0x59, 0xf2,
+ 0x81, 0x5b, 0x16, 0xf8, 0x17, 0x98, 0x48, 0x3a,
+ 0xda, 0x77, 0x26, 0xa3, 0xc4, 0x65, 0x5d, 0xa4,
+ 0xfb, 0xfc, 0x0e, 0x11, 0x08, 0xa8, 0xfd, 0x17,
+ 0xb4, 0x48, 0xa6, 0x85, 0x54, 0x19, 0x9c, 0x47,
+ 0xd0, 0x8f, 0xfb, 0x10, 0xd4, 0xb8, 0x02, 0x21,
+ 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xfe, 0xba, 0xae, 0xdc, 0xe6, 0xaf, 0x48, 0xa0,
+ 0x3b, 0xbf, 0xd2, 0x5e, 0x8c, 0xd0, 0x36, 0x41,
+ 0x41, 0x02, 0x01, 0x01, 0x03, 0x42, 0x00, 0x04,
+ 0x3c, 0x4c, 0xc9, 0x5e, 0x2e, 0xa2, 0x3d, 0x49,
+ 0xcc, 0x5b, 0xff, 0x4f, 0xc9, 0x2e, 0x1d, 0x4a,
+ 0xc6, 0x21, 0xf6, 0xf3, 0xe6, 0x0b, 0x4f, 0xa9,
+ 0x9d, 0x74, 0x99, 0xdd, 0x97, 0xc7, 0x6e, 0xbe,
+ 0x14, 0x2b, 0x39, 0x9d, 0x63, 0xc7, 0x97, 0x0d,
+ 0x45, 0x25, 0x40, 0x30, 0x77, 0x05, 0x76, 0x88,
+ 0x38, 0x96, 0x29, 0x7d, 0x9c, 0xe1, 0x50, 0xbe,
+ 0xac, 0xf0, 0x1d, 0x86, 0xf4, 0x2f, 0x65, 0x0b
+};
- keyFile = fopen("./ecc-key.der", "wb");
- if (!keyFile) {
- return -1025;
- }
- ret = (int)fwrite(der, 1, derSz, keyFile);
- fclose(keyFile);
- if (ret != derSz) {
- return -1026;
- }
+static int ecc_test_custom_curves(WC_RNG* rng)
+{
+ int ret;
+ word32 inOutIdx;
+ ecc_key key;
- pemSz = wc_DerToPem(der, derSz, pem, FOURK_BUF, ECC_PRIVATEKEY_TYPE);
- if (pemSz < 0) {
- return -1027;
+ /* test use of custom curve - using BRAINPOOLP256R1 for test */
+ #ifndef WOLFSSL_ECC_CURVE_STATIC
+ const ecc_oid_t ecc_oid_brainpoolp256r1[] = {
+ 0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x07
+ };
+ const word32 ecc_oid_brainpoolp256r1_sz =
+ sizeof(ecc_oid_brainpoolp256r1) / sizeof(ecc_oid_t);
+ #else
+ #define ecc_oid_brainpoolp256r1 { \
+ 0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x07 \
}
+ #define ecc_oid_brainpoolp256r1_sz 9
+ #endif
+ const word32 ecc_oid_brainpoolp256r1_sum = 104;
+
+ const ecc_set_type ecc_dp_brainpool256r1 = {
+ 32, /* size/bytes */
+ ECC_CURVE_CUSTOM, /* ID */
+ "BRAINPOOLP256R1", /* curve name */
+ "A9FB57DBA1EEA9BC3E660A909D838D726E3BF623D52620282013481D1F6E5377", /* prime */
+ "7D5A0975FC2C3057EEF67530417AFFE7FB8055C126DC5C6CE94A4B44F330B5D9", /* A */
+ "26DC5C6CE94A4B44F330B5D9BBD77CBF958416295CF7E1CE6BCCDC18FF8C07B6", /* B */
+ "A9FB57DBA1EEA9BC3E660A909D838D718C397AA3B561A6F7901E0E82974856A7", /* order */
+ "8BD2AEB9CB7E57CB2C4B482FFC81B7AFB9DE27E1E3BD23C23A4453BD9ACE3262", /* Gx */
+ "547EF835C3DAC4FD97F8461A14611DC9C27745132DED8E545C1D54C72F046997", /* Gy */
+ ecc_oid_brainpoolp256r1, /* oid/oidSz */
+ ecc_oid_brainpoolp256r1_sz,
+ ecc_oid_brainpoolp256r1_sum, /* oid sum */
+ 1, /* cofactor */
+ };
- pemFile = fopen("./ecc-key.pem", "wb");
- if (!pemFile) {
- return -1028;
+ ret = ecc_test_curve_size(rng, 0, ECC_TEST_VERIFY_COUNT, ECC_CURVE_DEF,
+ &ecc_dp_brainpool256r1);
+ if (ret != 0) {
+ printf("ECC test for custom curve failed! %d\n", ret);
+ return ret;
+ }
+
+ #if defined(HAVE_ECC_BRAINPOOL) || defined(HAVE_ECC_KOBLITZ)
+ {
+ int curve_id;
+ #ifdef HAVE_ECC_BRAINPOOL
+ curve_id = ECC_BRAINPOOLP256R1;
+ #else
+ curve_id = ECC_SECP256K1;
+ #endif
+ /* Test and demonstrate use of non-SECP curve */
+ ret = ecc_test_curve_size(rng, 0, ECC_TEST_VERIFY_COUNT, curve_id, NULL);
+ if (ret < 0) {
+ printf("ECC test for curve_id %d failed! %d\n", curve_id, ret);
+ return ret;
}
- ret = (int)fwrite(pem, 1, pemSz, pemFile);
- fclose(pemFile);
- if (ret != pemSz) {
- return -1029;
+ }
+ #endif
+
+ ret = wc_ecc_init_ex(&key, HEAP_HINT, devId);
+ if (ret != 0) {
+ return -9815;
+ }
+
+ inOutIdx = 0;
+ ret = wc_EccPublicKeyDecode(eccKeyExplicitCurve, &inOutIdx, &key,
+ sizeof(eccKeyExplicitCurve));
+ if (ret != 0)
+ return -9816;
+
+ wc_ecc_free(&key);
+
+ return ret;
+}
+#endif /* WOLFSSL_CUSTOM_CURVES */
+
+#ifdef WOLFSSL_CERT_GEN
+
+/* Make Cert / Sign example for ECC cert and ECC CA */
+static int ecc_test_cert_gen(WC_RNG* rng)
+{
+ int ret;
+ Cert myCert;
+ int certSz;
+ size_t bytes;
+ word32 idx = 0;
+#ifndef USE_CERT_BUFFERS_256
+ XFILE file;
+#endif
+#ifdef WOLFSSL_TEST_CERT
+ DecodedCert decode;
+#endif
+ byte* der;
+ byte* pem = NULL;
+ ecc_key caEccKey;
+ ecc_key certPubKey;
+
+ XMEMSET(&caEccKey, 0, sizeof(caEccKey));
+ XMEMSET(&certPubKey, 0, sizeof(certPubKey));
+
+ der = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (der == NULL) {
+ ERROR_OUT(-9817, exit);
+ }
+ pem = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (pem == NULL) {
+ ERROR_OUT(-9818, exit);
+ }
+
+ /* Get cert private key */
+#ifdef ENABLE_ECC384_CERT_GEN_TEST
+ /* Get Cert Key 384 */
+#ifdef USE_CERT_BUFFERS_256
+ XMEMCPY(der, ca_ecc_key_der_384, sizeof_ca_ecc_key_der_384);
+ bytes = sizeof_ca_ecc_key_der_384;
+#else
+ file = XFOPEN(eccCaKey384File, "rb");
+ if (!file) {
+ ERROR_OUT(-9819, exit);
+ }
+
+ bytes = XFREAD(der, 1, FOURK_BUF, file);
+ XFCLOSE(file);
+ (void)eccCaKeyFile;
+#endif /* USE_CERT_BUFFERS_256 */
+#else
+#ifdef USE_CERT_BUFFERS_256
+ XMEMCPY(der, ca_ecc_key_der_256, sizeof_ca_ecc_key_der_256);
+ bytes = sizeof_ca_ecc_key_der_256;
+#else
+ file = XFOPEN(eccCaKeyFile, "rb");
+ if (!file) {
+ ERROR_OUT(-9820, exit);
+ }
+ bytes = XFREAD(der, 1, FOURK_BUF, file);
+ XFCLOSE(file);
+#ifdef ENABLE_ECC384_CERT_GEN_TEST
+ (void)eccCaKey384File;
+#endif
+#endif /* USE_CERT_BUFFERS_256 */
+#endif /* ENABLE_ECC384_CERT_GEN_TEST */
+
+ /* Get CA Key */
+ ret = wc_ecc_init_ex(&caEccKey, HEAP_HINT, devId);
+ if (ret != 0) {
+ ERROR_OUT(-9821, exit);
+ }
+ ret = wc_EccPrivateKeyDecode(der, &idx, &caEccKey, (word32)bytes);
+ if (ret != 0) {
+ ERROR_OUT(-9822, exit);
+ }
+
+ /* Make a public key */
+ ret = wc_ecc_init_ex(&certPubKey, HEAP_HINT, devId);
+ if (ret != 0) {
+ ERROR_OUT(-9823, exit);
+ }
+
+ ret = wc_ecc_make_key(rng, 32, &certPubKey);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &certPubKey.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0) {
+ ERROR_OUT(-9824, exit);
+ }
+ TEST_SLEEP();
+
+ /* Setup Certificate */
+ if (wc_InitCert(&myCert)) {
+ ERROR_OUT(-9825, exit);
+ }
+
+#ifndef NO_SHA256
+ myCert.sigType = CTC_SHA256wECDSA;
+#else
+ myCert.sigType = CTC_SHAwECDSA;
+#endif
+ XMEMCPY(&myCert.subject, &certDefaultName, sizeof(CertName));
+
+#ifdef WOLFSSL_CERT_EXT
+ /* add Policies */
+ XSTRNCPY(myCert.certPolicies[0], "2.4.589440.587.101.2.1.9632587.1",
+ CTC_MAX_CERTPOL_SZ);
+ XSTRNCPY(myCert.certPolicies[1], "1.2.13025.489.1.113549",
+ CTC_MAX_CERTPOL_SZ);
+ myCert.certPoliciesNb = 2;
+
+ /* add SKID from the Public Key */
+ if (wc_SetSubjectKeyIdFromPublicKey(&myCert, NULL, &certPubKey) != 0) {
+ ERROR_OUT(-9826, exit);
+ }
+
+ /* add AKID from the Public Key */
+ if (wc_SetAuthKeyIdFromPublicKey(&myCert, NULL, &caEccKey) != 0) {
+ ERROR_OUT(-9827, exit);
+ }
+
+ /* add Key Usage */
+ if (wc_SetKeyUsage(&myCert, certKeyUsage) != 0) {
+ ERROR_OUT(-9828, exit);
+ }
+#endif /* WOLFSSL_CERT_EXT */
+
+#ifdef ENABLE_ECC384_CERT_GEN_TEST
+ #if defined(USE_CERT_BUFFERS_256)
+ ret = wc_SetIssuerBuffer(&myCert, ca_ecc_cert_der_384,
+ sizeof_ca_ecc_cert_der_384);
+#else
+ ret = wc_SetIssuer(&myCert, eccCaCert384File);
+ (void)eccCaCertFile;
+#endif
+#else
+#if defined(USE_CERT_BUFFERS_256)
+ ret = wc_SetIssuerBuffer(&myCert, ca_ecc_cert_der_256,
+ sizeof_ca_ecc_cert_der_256);
+#else
+ ret = wc_SetIssuer(&myCert, eccCaCertFile);
+#ifdef ENABLE_ECC384_CERT_GEN_TEST
+ (void)eccCaCert384File;
+#endif
+#endif
+#endif /* ENABLE_ECC384_CERT_GEN_TEST */
+ if (ret < 0) {
+ ERROR_OUT(-9829, exit);
+ }
+
+ certSz = wc_MakeCert(&myCert, der, FOURK_BUF, NULL, &certPubKey, rng);
+ if (certSz < 0) {
+ ERROR_OUT(-9830, exit);
+ }
+
+ ret = 0;
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &caEccKey.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret >= 0) {
+ ret = wc_SignCert(myCert.bodySz, myCert.sigType, der,
+ FOURK_BUF, NULL, &caEccKey, rng);
}
+ } while (ret == WC_PENDING_E);
+ if (ret < 0) {
+ ERROR_OUT(-9831, exit);
}
-#endif /* WOLFSSL_KEY_GEN */
+ certSz = ret;
+ TEST_SLEEP();
- wc_ecc_free(&pubKey);
- wc_ecc_free(&userB);
- wc_ecc_free(&userA);
+#ifdef WOLFSSL_TEST_CERT
+ InitDecodedCert(&decode, der, certSz, 0);
+ ret = ParseCert(&decode, CERT_TYPE, NO_VERIFY, 0);
+ if (ret != 0) {
+ FreeDecodedCert(&decode);
+ ERROR_OUT(-9832, exit);
+
+ }
+ FreeDecodedCert(&decode);
+#endif
+
+ ret = SaveDerAndPem(der, certSz, pem, FOURK_BUF, certEccDerFile,
+ certEccPemFile, CERT_TYPE, -6735);
+ if (ret != 0) {
+ goto exit;
+ }
+
+exit:
+ wc_ecc_free(&certPubKey);
+ wc_ecc_free(&caEccKey);
+
+ XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(der, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+
+ return ret;
+}
+#endif /* WOLFSSL_CERT_GEN */
+
+#if !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)
+/* Test for the wc_ecc_key_new() and wc_ecc_key_free() functions. */
+static int ecc_test_allocator(WC_RNG* rng)
+{
+ int ret = 0;
+ ecc_key* key;
+
+ key = wc_ecc_key_new(HEAP_HINT);
+ if (key == NULL) {
+ ERROR_OUT(-9833, exit);
+ }
+
+ ret = wc_ecc_make_key(rng, 32, key);
+ if (ret != 0) {
+ ERROR_OUT(-9834, exit);
+ }
+
+exit:
+ wc_ecc_key_free(key);
+ return ret;
+}
+#endif
+
+int ecc_test(void)
+{
+ int ret;
+ WC_RNG rng;
+
+#ifdef WOLFSSL_CERT_EXT
+ ret = ecc_decode_test();
+ if (ret < 0)
+ return ret;
+#endif
+
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
+ ret = wc_InitRng(&rng);
+#endif
+ if (ret != 0)
+ return -9900;
+
+#if defined(HAVE_ECC112) || defined(HAVE_ALL_CURVES)
+ ret = ecc_test_curve(&rng, 14);
+ if (ret < 0) {
+ goto done;
+ }
+#endif /* HAVE_ECC112 */
+#if defined(HAVE_ECC128) || defined(HAVE_ALL_CURVES)
+ ret = ecc_test_curve(&rng, 16);
+ if (ret < 0) {
+ goto done;
+ }
+#endif /* HAVE_ECC128 */
+#if defined(HAVE_ECC160) || defined(HAVE_ALL_CURVES)
+ ret = ecc_test_curve(&rng, 20);
+ if (ret < 0) {
+ goto done;
+ }
+#endif /* HAVE_ECC160 */
+#if defined(HAVE_ECC192) || defined(HAVE_ALL_CURVES)
+ ret = ecc_test_curve(&rng, 24);
+ if (ret < 0) {
+ goto done;
+ }
+#endif /* HAVE_ECC192 */
+#if defined(HAVE_ECC224) || defined(HAVE_ALL_CURVES)
+ ret = ecc_test_curve(&rng, 28);
+ if (ret < 0) {
+ goto done;
+ }
+#endif /* HAVE_ECC224 */
+#if defined(HAVE_ECC239) || defined(HAVE_ALL_CURVES)
+ ret = ecc_test_curve(&rng, 30);
+ if (ret < 0) {
+ goto done;
+ }
+#endif /* HAVE_ECC239 */
+#if !defined(NO_ECC256) || defined(HAVE_ALL_CURVES)
+ ret = ecc_test_curve(&rng, 32);
+ if (ret < 0) {
+ goto done;
+ }
+#if !defined(WOLFSSL_ATECC508A) && defined(HAVE_ECC_KEY_IMPORT) && \
+ defined(HAVE_ECC_KEY_EXPORT)
+ ret = ecc_point_test();
+ if (ret < 0) {
+ goto done;
+ }
+#endif
+ ret = ecc_def_curve_test(&rng);
+ if (ret < 0) {
+ goto done;
+ }
+#endif /* !NO_ECC256 */
+#if defined(HAVE_ECC320) || defined(HAVE_ALL_CURVES)
+ ret = ecc_test_curve(&rng, 40);
+ if (ret < 0) {
+ goto done;
+ }
+#endif /* HAVE_ECC320 */
+#if defined(HAVE_ECC384) || defined(HAVE_ALL_CURVES)
+ ret = ecc_test_curve(&rng, 48);
+ if (ret < 0) {
+ goto done;
+ }
+#endif /* HAVE_ECC384 */
+#if defined(HAVE_ECC512) || defined(HAVE_ALL_CURVES)
+ ret = ecc_test_curve(&rng, 64);
+ if (ret < 0) {
+ goto done;
+ }
+#endif /* HAVE_ECC512 */
+#if defined(HAVE_ECC521) || defined(HAVE_ALL_CURVES)
+ ret = ecc_test_curve(&rng, 66);
+ if (ret < 0) {
+ goto done;
+ }
+#endif /* HAVE_ECC521 */
+
+#if defined(WOLFSSL_CUSTOM_CURVES)
+ ret = ecc_test_custom_curves(&rng);
+ if (ret != 0) {
+ goto done;
+ }
+#endif
+
+#if defined(HAVE_ECC_SIGN) && defined(WOLFSSL_ECDSA_SET_K)
+ ret = ecc_test_sign_vectors(&rng);
+ if (ret != 0) {
+ printf("ecc_test_sign_vectors failed! %d\n", ret);
+ goto done;
+ }
+#endif
+#ifdef HAVE_ECC_CDH
+ ret = ecc_test_cdh_vectors();
+ if (ret != 0) {
+ printf("ecc_test_cdh_vectors failed! %d\n", ret);
+ goto done;
+ }
+#endif
+#if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_STM32_PKA)
+ ret = ecc_test_make_pub(&rng);
+ if (ret != 0) {
+ printf("ecc_test_make_pub failed!: %d\n", ret);
+ goto done;
+ }
+#else
+ (void) ecc_test_make_pub;/* for compiler warning */
+#endif
+#ifdef WOLFSSL_CERT_GEN
+ ret = ecc_test_cert_gen(&rng);
+ if (ret != 0) {
+ printf("ecc_test_cert_gen failed!: %d\n", ret);
+ goto done;
+ }
+#endif
+#if !defined(HAVE_FIPS) && !defined(HAVE_SELFTEST)
+ ret = ecc_test_allocator(&rng);
+ if (ret != 0) {
+ printf("ecc_test_allocator failed!: %d\n", ret);
+ }
+#endif
+
+done:
wc_FreeRng(&rng);
- return 0;
+ return ret;
}
-#ifdef HAVE_ECC_ENCRYPT
+#if defined(HAVE_ECC_ENCRYPT) && defined(WOLFSSL_AES_128)
int ecc_encrypt_test(void)
{
- RNG rng;
- int ret;
+ WC_RNG rng;
+ int ret = 0;
ecc_key userA, userB;
byte msg[48];
byte plain[48];
@@ -5130,139 +20123,551 @@ int ecc_encrypt_test(void)
word32 outSz = sizeof(out);
word32 plainSz = sizeof(plain);
int i;
+ ecEncCtx* cliCtx = NULL;
+ ecEncCtx* srvCtx = NULL;
+ byte cliSalt[EXCHANGE_SALT_SZ];
+ byte srvSalt[EXCHANGE_SALT_SZ];
+ const byte* tmpSalt;
+ byte msg2[48];
+ byte plain2[48];
+ byte out2[80];
+ word32 outSz2 = sizeof(out2);
+ word32 plainSz2 = sizeof(plain2);
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
ret = wc_InitRng(&rng);
+#endif
if (ret != 0)
- return -3001;
+ return -10000;
- wc_ecc_init(&userA);
- wc_ecc_init(&userB);
+ XMEMSET(&userA, 0, sizeof(userA));
+ XMEMSET(&userB, 0, sizeof(userB));
+
+ ret = wc_ecc_init_ex(&userA, HEAP_HINT, devId);
+ if (ret != 0)
+ goto done;
+ ret = wc_ecc_init_ex(&userB, HEAP_HINT, devId);
+ if (ret != 0)
+ goto done;
ret = wc_ecc_make_key(&rng, 32, &userA);
- ret += wc_ecc_make_key(&rng, 32, &userB);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0){
+ ret = -10001; goto done;
+ }
- if (ret != 0)
- return -3002;
+ ret = wc_ecc_make_key(&rng, 32, &userB);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &userB.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+ if (ret != 0){
+ ret = -10002; goto done;
+ }
- for (i = 0; i < 48; i++)
+ /* set message to incrementing 0,1,2,etc... */
+ for (i = 0; i < (int)sizeof(msg); i++)
msg[i] = i;
/* encrypt msg to B */
ret = wc_ecc_encrypt(&userA, &userB, msg, sizeof(msg), out, &outSz, NULL);
- if (ret != 0)
- return -3003;
+ if (ret != 0) {
+ ret = -10003; goto done;
+ }
/* decrypt msg from A */
ret = wc_ecc_decrypt(&userB, &userA, out, outSz, plain, &plainSz, NULL);
+ if (ret != 0) {
+ ret = -10004; goto done;
+ }
+
+ if (XMEMCMP(plain, msg, sizeof(msg)) != 0) {
+ ret = -10005; goto done;
+ }
+
+ /* let's verify message exchange works, A is client, B is server */
+ cliCtx = wc_ecc_ctx_new(REQ_RESP_CLIENT, &rng);
+ srvCtx = wc_ecc_ctx_new(REQ_RESP_SERVER, &rng);
+ if (cliCtx == NULL || srvCtx == NULL) {
+ ret = -10006; goto done;
+ }
+
+ /* get salt to send to peer */
+ tmpSalt = wc_ecc_ctx_get_own_salt(cliCtx);
+ if (tmpSalt == NULL) {
+ ret = -10007; goto done;
+ }
+ XMEMCPY(cliSalt, tmpSalt, EXCHANGE_SALT_SZ);
+
+ tmpSalt = wc_ecc_ctx_get_own_salt(srvCtx);
+ if (tmpSalt == NULL) {
+ ret = -10008; goto done;
+ }
+ XMEMCPY(srvSalt, tmpSalt, EXCHANGE_SALT_SZ);
+
+ /* in actual use, we'd get the peer's salt over the transport */
+ ret = wc_ecc_ctx_set_peer_salt(cliCtx, srvSalt);
+ if (ret != 0)
+ goto done;
+ ret = wc_ecc_ctx_set_peer_salt(srvCtx, cliSalt);
if (ret != 0)
- return -3004;
+ goto done;
- if (memcmp(plain, msg, sizeof(msg)) != 0)
- return -3005;
+ ret = wc_ecc_ctx_set_info(cliCtx, (byte*)"wolfSSL MSGE", 11);
+ if (ret != 0)
+ goto done;
+ ret = wc_ecc_ctx_set_info(srvCtx, (byte*)"wolfSSL MSGE", 11);
+ if (ret != 0)
+ goto done;
+ /* get encrypted msg (request) to send to B */
+ outSz = sizeof(out);
+ ret = wc_ecc_encrypt(&userA, &userB, msg, sizeof(msg), out, &outSz,cliCtx);
+ if (ret != 0)
+ goto done;
- { /* let's verify message exchange works, A is client, B is server */
- ecEncCtx* cliCtx = wc_ecc_ctx_new(REQ_RESP_CLIENT, &rng);
- ecEncCtx* srvCtx = wc_ecc_ctx_new(REQ_RESP_SERVER, &rng);
+ /* B decrypts msg (request) from A */
+ plainSz = sizeof(plain);
+ ret = wc_ecc_decrypt(&userB, &userA, out, outSz, plain, &plainSz, srvCtx);
+ if (ret != 0)
+ goto done;
- byte cliSalt[EXCHANGE_SALT_SZ];
- byte srvSalt[EXCHANGE_SALT_SZ];
- const byte* tmpSalt;
+ if (XMEMCMP(plain, msg, sizeof(msg)) != 0) {
+ ret = -10009; goto done;
+ }
- if (cliCtx == NULL || srvCtx == NULL)
- return -3006;
+ /* msg2 (response) from B to A */
+ for (i = 0; i < (int)sizeof(msg2); i++)
+ msg2[i] = i + sizeof(msg2);
- /* get salt to send to peer */
- tmpSalt = wc_ecc_ctx_get_own_salt(cliCtx);
- if (tmpSalt == NULL)
- return -3007;
- memcpy(cliSalt, tmpSalt, EXCHANGE_SALT_SZ);
+ /* get encrypted msg (response) to send to B */
+ ret = wc_ecc_encrypt(&userB, &userA, msg2, sizeof(msg2), out2,
+ &outSz2, srvCtx);
+ if (ret != 0)
+ goto done;
- tmpSalt = wc_ecc_ctx_get_own_salt(srvCtx);
- if (tmpSalt == NULL)
- return -3007;
- memcpy(srvSalt, tmpSalt, EXCHANGE_SALT_SZ);
+ /* A decrypts msg (response) from B */
+ ret = wc_ecc_decrypt(&userA, &userB, out2, outSz2, plain2, &plainSz2,
+ cliCtx);
+ if (ret != 0)
+ goto done;
- /* in actual use, we'd get the peer's salt over the transport */
- ret = wc_ecc_ctx_set_peer_salt(cliCtx, srvSalt);
- ret += wc_ecc_ctx_set_peer_salt(srvCtx, cliSalt);
+ if (XMEMCMP(plain2, msg2, sizeof(msg2)) != 0) {
+ ret = -10010; goto done;
+ }
- ret += wc_ecc_ctx_set_info(cliCtx, (byte*)"wolfSSL MSGE", 11);
- ret += wc_ecc_ctx_set_info(srvCtx, (byte*)"wolfSSL MSGE", 11);
+done:
- if (ret != 0)
- return -3008;
+ /* cleanup */
+ wc_ecc_ctx_free(srvCtx);
+ wc_ecc_ctx_free(cliCtx);
- /* get encrypted msg (request) to send to B */
- outSz = sizeof(out);
- ret = wc_ecc_encrypt(&userA, &userB, msg, sizeof(msg), out, &outSz,cliCtx);
- if (ret != 0)
- return -3009;
+ wc_ecc_free(&userB);
+ wc_ecc_free(&userA);
+ wc_FreeRng(&rng);
- /* B decrypts msg (request) from A */
- plainSz = sizeof(plain);
- ret = wc_ecc_decrypt(&userB, &userA, out, outSz, plain, &plainSz, srvCtx);
- if (ret != 0)
- return -3010;
+ return ret;
+}
- if (memcmp(plain, msg, sizeof(msg)) != 0)
- return -3011;
+#endif /* HAVE_ECC_ENCRYPT */
- {
- /* msg2 (response) from B to A */
- byte msg2[48];
- byte plain2[48];
- byte out2[80];
- word32 outSz2 = sizeof(out2);
- word32 plainSz2 = sizeof(plain2);
-
- for (i = 0; i < 48; i++)
- msg2[i] = i+48;
-
- /* get encrypted msg (response) to send to B */
- ret = wc_ecc_encrypt(&userB, &userA, msg2, sizeof(msg2), out2,
- &outSz2, srvCtx);
- if (ret != 0)
- return -3012;
+#ifdef USE_CERT_BUFFERS_256
+int ecc_test_buffers(void) {
+ size_t bytes;
+ ecc_key cliKey;
+ ecc_key servKey;
+ WC_RNG rng;
+ word32 idx = 0;
+ int ret;
+ /* pad our test message to 32 bytes so evenly divisible by AES_BLOCK_SZ */
+ byte in[] = "Everyone gets Friday off. ecc p";
+ word32 inLen = (word32)XSTRLEN((char*)in);
+ byte out[256];
+ byte plain[256];
+ int verify = 0;
+ word32 x;
- /* A decrypts msg (response) from B */
- ret = wc_ecc_decrypt(&userA, &userB, out2, outSz2, plain2, &plainSz2,
- cliCtx);
- if (ret != 0)
- return -3013;
+ ret = wc_ecc_init_ex(&cliKey, HEAP_HINT, devId);
+ if (ret != 0)
+ return -10011;
+ ret = wc_ecc_init_ex(&servKey, HEAP_HINT, devId);
+ if (ret != 0)
+ return -10012;
- if (memcmp(plain2, msg2, sizeof(msg2)) != 0)
- return -3014;
- }
+ bytes = (size_t)sizeof_ecc_clikey_der_256;
+ /* place client key into ecc_key struct cliKey */
+ ret = wc_EccPrivateKeyDecode(ecc_clikey_der_256, &idx, &cliKey,
+ (word32)bytes);
+ if (ret != 0)
+ return -10013;
+
+ idx = 0;
+ bytes = (size_t)sizeof_ecc_key_der_256;
- /* cleanup */
- wc_ecc_ctx_free(srvCtx);
- wc_ecc_ctx_free(cliCtx);
+ /* place server key into ecc_key struct servKey */
+ ret = wc_EccPrivateKeyDecode(ecc_key_der_256, &idx, &servKey,
+ (word32)bytes);
+ if (ret != 0)
+ return -10014;
+
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
+ ret = wc_InitRng(&rng);
+#endif
+ if (ret != 0)
+ return -10015;
+
+#if defined(HAVE_ECC_ENCRYPT) && defined(HAVE_HKDF)
+ {
+ word32 y;
+ /* test encrypt and decrypt if they're available */
+ x = sizeof(out);
+ ret = wc_ecc_encrypt(&cliKey, &servKey, in, sizeof(in), out, &x, NULL);
+ if (ret < 0)
+ return -10016;
+
+ y = sizeof(plain);
+ ret = wc_ecc_decrypt(&cliKey, &servKey, out, x, plain, &y, NULL);
+ if (ret < 0)
+ return -10017;
+
+ if (XMEMCMP(plain, in, inLen))
+ return -10018;
}
+#endif
- /* cleanup */
- wc_ecc_free(&userB);
- wc_ecc_free(&userA);
+ x = sizeof(out);
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &cliKey.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret == 0)
+ ret = wc_ecc_sign_hash(in, inLen, out, &x, &rng, &cliKey);
+ } while (ret == WC_PENDING_E);
+ if (ret < 0)
+ return -10019;
+ TEST_SLEEP();
+
+ XMEMSET(plain, 0, sizeof(plain));
+
+ do {
+ #if defined(WOLFSSL_ASYNC_CRYPT)
+ ret = wc_AsyncWait(ret, &cliKey.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+ #endif
+ if (ret == 0)
+ ret = wc_ecc_verify_hash(out, x, plain, sizeof(plain), &verify,
+ &cliKey);
+ } while (ret == WC_PENDING_E);
+ if (ret < 0)
+ return -10020;
+
+ if (XMEMCMP(plain, in, (word32)ret))
+ return -10021;
+ TEST_SLEEP();
+
+#ifdef WOLFSSL_CERT_EXT
+ idx = 0;
+
+ bytes = sizeof_ecc_clikeypub_der_256;
+
+ ret = wc_EccPublicKeyDecode(ecc_clikeypub_der_256, &idx, &cliKey,
+ (word32) bytes);
+ if (ret != 0)
+ return -10022;
+#endif
+
+ wc_ecc_free(&cliKey);
+ wc_ecc_free(&servKey);
wc_FreeRng(&rng);
return 0;
}
-
-#endif /* HAVE_ECC_ENCRYPT */
+#endif /* USE_CERT_BUFFERS_256 */
#endif /* HAVE_ECC */
#ifdef HAVE_CURVE25519
+#if defined(HAVE_CURVE25519_SHARED_SECRET) && \
+ defined(HAVE_CURVE25519_KEY_IMPORT)
+#ifdef CURVE25519_OVERFLOW_ALL_TESTS
+#define X25519_TEST_CNT 5
+#else
+#define X25519_TEST_CNT 1
+#endif
+static int curve25519_overflow_test(void)
+{
+ /* secret key for party a */
+ byte sa[X25519_TEST_CNT][32] = {
+ {
+ 0x8d,0xaf,0x6e,0x7a,0xc1,0xeb,0x8d,0x30,
+ 0x99,0x86,0xd3,0x90,0x47,0x96,0x21,0x3c,
+ 0x3a,0x75,0xc0,0x7b,0x75,0x01,0x75,0xa3,
+ 0x81,0x4b,0xff,0x5a,0xbc,0x96,0x87,0x28
+ },
+#ifdef CURVE25519_OVERFLOW_ALL_TESTS
+ {
+ 0x9d,0x63,0x5f,0xce,0xe2,0xe8,0xd7,0xfb,
+ 0x68,0x77,0x0e,0x44,0xd1,0xad,0x87,0x2b,
+ 0xf4,0x65,0x06,0xb7,0xbb,0xdb,0xbe,0x6e,
+ 0x02,0x43,0x24,0xc7,0x3d,0x7b,0x88,0x60
+ },
+ {
+ 0x63,0xbf,0x76,0xa9,0x73,0xa0,0x09,0xb9,
+ 0xcc,0xc9,0x4d,0x47,0x2d,0x14,0x0e,0x52,
+ 0xa3,0x84,0x55,0xb8,0x7c,0xdb,0xce,0xb1,
+ 0xe4,0x5b,0x8a,0xb9,0x30,0xf1,0xa4,0xa0
+ },
+ {
+ 0x63,0xbf,0x76,0xa9,0x73,0xa0,0x09,0xb9,
+ 0xcc,0xc9,0x4d,0x47,0x2d,0x14,0x0e,0x52,
+ 0xa3,0x84,0x55,0xb8,0x7c,0xdb,0xce,0xb1,
+ 0xe4,0x5b,0x8a,0xb9,0x30,0xf1,0xa4,0xa0
+ },
+ {
+ 0x63,0xbf,0x76,0xa9,0x73,0xa0,0x09,0xb9,
+ 0xcc,0xc9,0x4d,0x47,0x2d,0x14,0x0e,0x52,
+ 0xa3,0x84,0x55,0xb8,0x7c,0xdb,0xce,0xb1,
+ 0xe4,0x5b,0x8a,0xb9,0x30,0xf1,0xa4,0xa0
+ }
+#endif
+ };
+
+ /* public key for party b */
+ byte pb[X25519_TEST_CNT][32] = {
+ {
+ 0x7f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xf0
+ },
+#ifdef CURVE25519_OVERFLOW_ALL_TESTS
+ {
+ /* 0xff first byte in original - invalid! */
+ 0x7f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xf0
+ },
+ {
+ 0x36,0x1a,0x74,0x87,0x28,0x59,0xe0,0xb6,
+ 0xe4,0x2b,0x17,0x9b,0x16,0xb0,0x3b,0xf8,
+ 0xb8,0x9f,0x2a,0x8f,0xc5,0x33,0x68,0x4f,
+ 0xde,0x4d,0xd8,0x80,0x63,0xe7,0xb4,0x0a
+ },
+ {
+ 0x00,0x80,0x38,0x59,0x19,0x3a,0x66,0x12,
+ 0xfd,0xa1,0xec,0x1c,0x40,0x84,0x40,0xbd,
+ 0x64,0x10,0x8b,0x53,0x81,0x21,0x03,0x2d,
+ 0x7d,0x33,0xb4,0x01,0x57,0x0d,0xe1,0x89
+ },
+ {
+ 0x1d,0xf8,0xf8,0x33,0x89,0x6c,0xb7,0xba,
+ 0x94,0x73,0xfa,0xc2,0x36,0xac,0xbe,0x49,
+ 0xaf,0x85,0x3e,0x93,0x5f,0xae,0xb2,0xc0,
+ 0xc8,0x80,0x8f,0x4a,0xaa,0xd3,0x55,0x2b
+ }
+#endif
+ };
+
+ /* expected shared key */
+ byte ss[X25519_TEST_CNT][32] = {
+ {
+ 0x5c,0x4c,0x85,0x5f,0xfb,0x20,0x38,0xcc,
+ 0x55,0x16,0x5b,0x8a,0xa7,0xed,0x57,0x6e,
+ 0x35,0xaa,0x71,0x67,0x85,0x1f,0xb6,0x28,
+ 0x17,0x07,0x7b,0xda,0x76,0xdd,0xe0,0xb4
+ },
+#ifdef CURVE25519_OVERFLOW_ALL_TESTS
+ {
+ 0x33,0xf6,0xc1,0x34,0x62,0x92,0x06,0x02,
+ 0x95,0xdb,0x91,0x4c,0x5d,0x52,0x54,0xc7,
+ 0xd2,0x5b,0x24,0xb5,0x4f,0x33,0x59,0x79,
+ 0x9f,0x6d,0x7e,0x4a,0x4c,0x30,0xd6,0x38
+ },
+ {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02
+ },
+ {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x09
+ },
+ {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x10
+ }
+#endif
+ };
+
+ int i;
+ word32 y;
+ byte shared[32];
+ curve25519_key userA;
+
+ wc_curve25519_init(&userA);
+
+ for (i = 0; i < X25519_TEST_CNT; i++) {
+ if (wc_curve25519_import_private_raw(sa[i], sizeof(sa[i]), pb[i],
+ sizeof(pb[i]), &userA) != 0)
+ return -10100 - i;
+
+ /* test against known test vector */
+ XMEMSET(shared, 0, sizeof(shared));
+ y = sizeof(shared);
+ if (wc_curve25519_shared_secret(&userA, &userA, shared, &y) != 0)
+ return -10110 - i;
+
+ if (XMEMCMP(ss[i], shared, y))
+ return -10120 - i;
+ }
+
+ return 0;
+}
+
+/* Test the wc_curve25519_check_public API.
+ *
+ * returns 0 on success and -ve on failure.
+ */
+static int curve25519_check_public_test(void)
+{
+ /* Little-endian values that will fail */
+ byte fail_le[][CURVE25519_KEYSIZE] = {
+ {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+ },
+ {
+ 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+ },
+ {
+ 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x81
+ },
+ };
+ /* Big-endian values that will fail */
+ byte fail_be[][CURVE25519_KEYSIZE] = {
+ {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+ },
+ {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01
+ },
+ {
+ 0x81,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01
+ },
+ };
+ /* Good or valid public value */
+ byte good[CURVE25519_KEYSIZE] = {
+ 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01
+ };
+ int i;
+
+ /* Parameter checks */
+ /* NULL pointer */
+ if (wc_curve25519_check_public(NULL, 0, EC25519_LITTLE_ENDIAN) !=
+ BAD_FUNC_ARG) {
+ return -10200;
+ }
+ if (wc_curve25519_check_public(NULL, 0, EC25519_BIG_ENDIAN) !=
+ BAD_FUNC_ARG) {
+ return -10201;
+ }
+ /* Length of 0 treated differently to other invalid lengths for TLS */
+ if (wc_curve25519_check_public(good, 0, EC25519_LITTLE_ENDIAN) != BUFFER_E)
+ return -10202;
+ if (wc_curve25519_check_public(good, 0, EC25519_BIG_ENDIAN) != BUFFER_E)
+ return -10203;
+
+ /* Length not CURVE25519_KEYSIZE */
+ for (i = 1; i < CURVE25519_KEYSIZE + 2; i++) {
+ if (i == CURVE25519_KEYSIZE)
+ continue;
+ if (wc_curve25519_check_public(good, i, EC25519_LITTLE_ENDIAN) !=
+ ECC_BAD_ARG_E) {
+ return -10204 - i;
+ }
+ if (wc_curve25519_check_public(good, i, EC25519_BIG_ENDIAN) !=
+ ECC_BAD_ARG_E) {
+ return -10214 - i;
+ }
+ }
+
+ /* Little-endian fail cases */
+ for (i = 0; i < (int)(sizeof(fail_le) / sizeof(*fail_le)); i++) {
+ if (wc_curve25519_check_public(fail_le[i], CURVE25519_KEYSIZE,
+ EC25519_LITTLE_ENDIAN) == 0) {
+ return -10224 - i;
+ }
+ }
+ /* Big-endian fail cases */
+ for (i = 0; i < (int)(sizeof(fail_be) / sizeof(*fail_be)); i++) {
+ if (wc_curve25519_check_public(fail_be[i], CURVE25519_KEYSIZE,
+ EC25519_BIG_ENDIAN) == 0) {
+ return -10234 - i;
+ }
+ }
+
+ /* Check a valid public value works! */
+ if (wc_curve25519_check_public(good, CURVE25519_KEYSIZE,
+ EC25519_LITTLE_ENDIAN) != 0) {
+ return -10244;
+ }
+ if (wc_curve25519_check_public(good, CURVE25519_KEYSIZE,
+ EC25519_BIG_ENDIAN) != 0) {
+ return -10245;
+ }
+
+ return 0;
+}
+
+#endif /* HAVE_CURVE25519_SHARED_SECRET && HAVE_CURVE25519_KEY_IMPORT */
int curve25519_test(void)
{
- RNG rng;
+ WC_RNG rng;
+ int ret;
+#ifdef HAVE_CURVE25519_SHARED_SECRET
byte sharedA[32];
byte sharedB[32];
+ word32 y;
+#endif
+#ifdef HAVE_CURVE25519_KEY_EXPORT
byte exportBuf[32];
- word32 x, y;
+#endif
+ word32 x;
curve25519_key userA, userB, pubKey;
+#if defined(HAVE_CURVE25519_SHARED_SECRET) && \
+ defined(HAVE_CURVE25519_KEY_IMPORT)
/* test vectors from
https://tools.ietf.org/html/draft-josefsson-tls-curve25519-03
*/
@@ -5306,9 +20711,17 @@ int curve25519_test(void)
0x73,0x8B,0x99,0xF0,0x94,0x68,0xB8,0xD6,
0xB8,0x51,0x11,0x84,0xD5,0x34,0x94,0xAB
};
+#endif /* HAVE_CURVE25519_SHARED_SECRET */
- if (wc_InitRng(&rng) != 0)
- return -1001;
+ (void)x;
+
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
+ ret = wc_InitRng(&rng);
+#endif
+ if (ret != 0)
+ return -10300;
wc_curve25519_init(&userA);
wc_curve25519_init(&userB);
@@ -5316,64 +20729,109 @@ int curve25519_test(void)
/* make curve25519 keys */
if (wc_curve25519_make_key(&rng, 32, &userA) != 0)
- return -1002;
+ return -10301;
if (wc_curve25519_make_key(&rng, 32, &userB) != 0)
- return -1003;
+ return -10302;
+#ifdef HAVE_CURVE25519_SHARED_SECRET
/* find shared secret key */
+ x = sizeof(sharedA);
if (wc_curve25519_shared_secret(&userA, &userB, sharedA, &x) != 0)
- return -1004;
+ return -10303;
+ y = sizeof(sharedB);
if (wc_curve25519_shared_secret(&userB, &userA, sharedB, &y) != 0)
- return -1005;
+ return -10304;
/* compare shared secret keys to test they are the same */
if (y != x)
- return -1006;
+ return -10305;
if (XMEMCMP(sharedA, sharedB, x))
- return -1007;
+ return -10306;
+#endif
+#ifdef HAVE_CURVE25519_KEY_EXPORT
/* export a public key and import it for another user */
+ x = sizeof(exportBuf);
if (wc_curve25519_export_public(&userA, exportBuf, &x) != 0)
- return -1008;
+ return -10307;
+#ifdef HAVE_CURVE25519_KEY_IMPORT
if (wc_curve25519_import_public(exportBuf, x, &pubKey) != 0)
- return -1009;
+ return -10308;
+#endif
+#endif
+#if defined(HAVE_CURVE25519_SHARED_SECRET) && \
+ defined(HAVE_CURVE25519_KEY_IMPORT)
/* test shared key after importing a public key */
XMEMSET(sharedB, 0, sizeof(sharedB));
+ y = sizeof(sharedB);
if (wc_curve25519_shared_secret(&userB, &pubKey, sharedB, &y) != 0)
- return -1010;
+ return -10309;
if (XMEMCMP(sharedA, sharedB, y))
- return -1011;
+ return -10310;
/* import RFC test vectors and compare shared key */
if (wc_curve25519_import_private_raw(sa, sizeof(sa), pa, sizeof(pa), &userA)
!= 0)
- return -1012;
+ return -10311;
if (wc_curve25519_import_private_raw(sb, sizeof(sb), pb, sizeof(pb), &userB)
!= 0)
- return -1013;
+ return -10312;
/* test against known test vector */
XMEMSET(sharedB, 0, sizeof(sharedB));
+ y = sizeof(sharedB);
if (wc_curve25519_shared_secret(&userA, &userB, sharedB, &y) != 0)
- return -1014;
+ return -10313;
if (XMEMCMP(ss, sharedB, y))
- return -1015;
+ return -10314;
- /* test swaping roles of keys and generating same shared key */
+ /* test swapping roles of keys and generating same shared key */
XMEMSET(sharedB, 0, sizeof(sharedB));
+ y = sizeof(sharedB);
if (wc_curve25519_shared_secret(&userB, &userA, sharedB, &y) != 0)
- return -1016;
+ return -10315;
if (XMEMCMP(ss, sharedB, y))
- return -1017;
+ return -10316;
+
+ /* test with 1 generated key and 1 from known test vector */
+ if (wc_curve25519_import_private_raw(sa, sizeof(sa), pa, sizeof(pa), &userA)
+ != 0)
+ return -10317;
+
+ if (wc_curve25519_make_key(&rng, 32, &userB) != 0)
+ return -10318;
+
+ x = sizeof(sharedA);
+ if (wc_curve25519_shared_secret(&userA, &userB, sharedA, &x) != 0)
+ return -10319;
+
+ y = sizeof(sharedB);
+ if (wc_curve25519_shared_secret(&userB, &userA, sharedB, &y) != 0)
+ return -10320;
+
+ /* compare shared secret keys to test they are the same */
+ if (y != x)
+ return -10321;
+
+ if (XMEMCMP(sharedA, sharedB, x))
+ return -10322;
+
+ ret = curve25519_overflow_test();
+ if (ret != 0)
+ return ret;
+ ret = curve25519_check_public_test();
+ if (ret != 0)
+ return ret;
+#endif /* HAVE_CURVE25519_SHARED_SECRET && HAVE_CURVE25519_KEY_IMPORT */
/* clean up keys when done */
wc_curve25519_free(&pubKey);
@@ -5388,337 +20846,812 @@ int curve25519_test(void)
#ifdef HAVE_ED25519
+#ifdef WOLFSSL_TEST_CERT
+static int ed25519_test_cert(void)
+{
+ DecodedCert cert[2];
+ DecodedCert* serverCert = NULL;
+ DecodedCert* caCert = NULL;
+#ifdef HAVE_ED25519_VERIFY
+ ed25519_key key;
+ ed25519_key* pubKey = NULL;
+ int verify;
+#endif /* HAVE_ED25519_VERIFY */
+ int ret;
+ byte* tmp;
+ size_t bytes;
+ XFILE file;
+
+ tmp = XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (tmp == NULL) {
+ ERROR_OUT(-10323, done);
+ }
+
+#ifdef USE_CERT_BUFFERS_256
+ XMEMCPY(tmp, ca_ed25519_cert, sizeof_ca_ed25519_cert);
+ bytes = sizeof_ca_ed25519_cert;
+#elif !defined(NO_FILESYSTEM)
+ file = XFOPEN(caEd25519Cert, "rb");
+ if (file == NULL) {
+ ERROR_OUT(-10324, done);
+ }
+ bytes = XFREAD(tmp, 1, FOURK_BUF, file);
+ XFCLOSE(file);
+#else
+ /* No certificate to use. */
+ ERROR_OUT(-10325, done);
+#endif
+
+ InitDecodedCert(&cert[0], tmp, (word32)bytes, 0);
+ caCert = &cert[0];
+ ret = ParseCert(caCert, CERT_TYPE, NO_VERIFY, NULL);
+ if (ret != 0) {
+ ERROR_OUT(-10326, done);
+ }
+
+#ifdef USE_CERT_BUFFERS_256
+ XMEMCPY(tmp, server_ed25519_cert, sizeof_server_ed25519_cert);
+ bytes = sizeof_server_ed25519_cert;
+#elif !defined(NO_FILESYSTEM)
+ file = XFOPEN(serverEd25519Cert, "rb");
+ if (file == NULL) {
+ ERROR_OUT(-10327, done);
+ }
+ bytes = XFREAD(tmp, 1, FOURK_BUF, file);
+ XFCLOSE(file);
+#else
+ /* No certificate to use. */
+ ERROR_OUT(-10328, done);
+#endif
+
+ InitDecodedCert(&cert[1], tmp, (word32)bytes, 0);
+ serverCert = &cert[1];
+ ret = ParseCert(serverCert, CERT_TYPE, NO_VERIFY, NULL);
+ if (ret != 0) {
+ ERROR_OUT(-10329, done);
+ }
+
+#ifdef HAVE_ED25519_VERIFY
+ ret = wc_ed25519_init(&key);
+ if (ret < 0) {
+ ERROR_OUT(-10330, done);
+ }
+ pubKey = &key;
+ ret = wc_ed25519_import_public(caCert->publicKey, caCert->pubKeySize,
+ pubKey);
+ if (ret < 0) {
+ ERROR_OUT(-10331, done);
+ }
+
+ if (wc_ed25519_verify_msg(serverCert->signature, serverCert->sigLength,
+ serverCert->source + serverCert->certBegin,
+ serverCert->sigIndex - serverCert->certBegin,
+ &verify, pubKey) < 0 || verify != 1) {
+ ERROR_OUT(-10332, done);
+ }
+#endif /* HAVE_ED25519_VERIFY */
+
+done:
+ if (tmp != NULL)
+ XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+#ifdef HAVE_ED25519_VERIFY
+ wc_ed25519_free(pubKey);
+#endif /* HAVE_ED25519_VERIFY */
+ if (caCert != NULL)
+ FreeDecodedCert(caCert);
+ if (serverCert != NULL)
+ FreeDecodedCert(serverCert);
+
+ return ret;
+}
+
+static int ed25519_test_make_cert(void)
+{
+ WC_RNG rng;
+ Cert cert;
+ DecodedCert decode;
+ ed25519_key key;
+ ed25519_key* privKey = NULL;
+ int ret = 0;
+ byte* tmp = NULL;
+
+ wc_InitCert(&cert);
+
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
+ ret = wc_InitRng(&rng);
+#endif
+ if (ret != 0)
+ return -10333;
+
+ wc_ed25519_init(&key);
+ privKey = &key;
+ wc_ed25519_make_key(&rng, ED25519_KEY_SIZE, privKey);
+
+ cert.daysValid = 365 * 2;
+ cert.selfSigned = 1;
+ XMEMCPY(&cert.issuer, &certDefaultName, sizeof(CertName));
+ XMEMCPY(&cert.subject, &certDefaultName, sizeof(CertName));
+ cert.isCA = 0;
+#ifdef WOLFSSL_CERT_EXT
+ ret = wc_SetKeyUsage(&cert, certKeyUsage);
+ if (ret < 0) {
+ ERROR_OUT(-10334, done);
+ }
+ ret = wc_SetSubjectKeyIdFromPublicKey_ex(&cert, ED25519_TYPE, privKey);
+ if (ret < 0) {
+ ERROR_OUT(-10335, done);
+ }
+ ret = wc_SetAuthKeyIdFromPublicKey_ex(&cert, ED25519_TYPE, privKey);
+ if (ret < 0) {
+ ERROR_OUT(-10336, done);
+ }
+#endif
+ tmp = XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (tmp == NULL) {
+ ERROR_OUT(-10337, done);
+ }
+
+ cert.sigType = CTC_ED25519;
+ ret = wc_MakeCert_ex(&cert, tmp, FOURK_BUF, ED25519_TYPE, privKey, &rng);
+ if (ret < 0) {
+ ERROR_OUT(-10338, done);
+ }
+ ret = wc_SignCert_ex(cert.bodySz, cert.sigType, tmp, FOURK_BUF,
+ ED25519_TYPE, privKey, &rng);
+ if (ret < 0) {
+ ERROR_OUT(-10339, done);
+ }
+
+ InitDecodedCert(&decode, tmp, ret, HEAP_HINT);
+ ret = ParseCert(&decode, CERT_TYPE, NO_VERIFY, 0);
+ FreeDecodedCert(&decode);
+ if (ret != 0) {
+ ERROR_OUT(-10340, done);
+ }
+
+done:
+ if (tmp != NULL)
+ XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_ed25519_free(privKey);
+ wc_FreeRng(&rng);
+ return ret;
+}
+#endif /* WOLFSSL_TEST_CERT */
+
+#if defined(HAVE_ED25519_SIGN) && defined(HAVE_ED25519_KEY_EXPORT) && \
+ defined(HAVE_ED25519_KEY_IMPORT)
+static int ed25519ctx_test(void)
+{
+ byte out[ED25519_SIG_SIZE];
+ word32 outlen;
+#ifdef HAVE_ED25519_VERIFY
+ int verify;
+#endif /* HAVE_ED25519_VERIFY */
+ ed25519_key key;
+
+ static const byte sKeyCtx[] = {
+ 0x03,0x05,0x33,0x4e,0x38,0x1a,0xf7,0x8f,
+ 0x14,0x1c,0xb6,0x66,0xf6,0x19,0x9f,0x57,
+ 0xbc,0x34,0x95,0x33,0x5a,0x25,0x6a,0x95,
+ 0xbd,0x2a,0x55,0xbf,0x54,0x66,0x63,0xf6
+ };
+
+ static const byte pKeyCtx[] = {
+ 0xdf,0xc9,0x42,0x5e,0x4f,0x96,0x8f,0x7f,
+ 0x0c,0x29,0xf0,0x25,0x9c,0xf5,0xf9,0xae,
+ 0xd6,0x85,0x1c,0x2b,0xb4,0xad,0x8b,0xfb,
+ 0x86,0x0c,0xfe,0xe0,0xab,0x24,0x82,0x92
+ };
+
+ static const byte sigCtx1[] = {
+ 0x55,0xa4,0xcc,0x2f,0x70,0xa5,0x4e,0x04,
+ 0x28,0x8c,0x5f,0x4c,0xd1,0xe4,0x5a,0x7b,
+ 0xb5,0x20,0xb3,0x62,0x92,0x91,0x18,0x76,
+ 0xca,0xda,0x73,0x23,0x19,0x8d,0xd8,0x7a,
+ 0x8b,0x36,0x95,0x0b,0x95,0x13,0x00,0x22,
+ 0x90,0x7a,0x7f,0xb7,0xc4,0xe9,0xb2,0xd5,
+ 0xf6,0xcc,0xa6,0x85,0xa5,0x87,0xb4,0xb2,
+ 0x1f,0x4b,0x88,0x8e,0x4e,0x7e,0xdb,0x0d
+ };
+
+ static const byte sigCtx2[] = {
+ 0xcc,0x5e,0x63,0xa2,0x7e,0x94,0xaf,0xd3,
+ 0x41,0x83,0x38,0xd2,0x48,0x6f,0xa9,0x2a,
+ 0xf9,0x91,0x7c,0x2d,0x98,0x9e,0x06,0xe5,
+ 0x02,0x77,0x72,0x1c,0x34,0x38,0x18,0xb4,
+ 0x21,0x96,0xbc,0x29,0x2e,0x68,0xf3,0x4d,
+ 0x85,0x9b,0xbe,0xad,0x17,0x9f,0x54,0x54,
+ 0x2d,0x4b,0x04,0xdc,0xfb,0xfa,0x4a,0x68,
+ 0x4e,0x39,0x50,0xfb,0x1c,0xcd,0x8d,0x0d
+ };
+
+ static const byte msgCtx[] = {
+ 0xf7,0x26,0x93,0x6d,0x19,0xc8,0x00,0x49,
+ 0x4e,0x3f,0xda,0xff,0x20,0xb2,0x76,0xa8
+ };
+
+ static const byte contextCtx[] = {
+ 0x66,0x6f,0x6f
+ };
+
+ outlen = sizeof(out);
+ XMEMSET(out, 0, sizeof(out));
+
+ if (wc_ed25519_import_private_key(sKeyCtx, ED25519_KEY_SIZE, pKeyCtx,
+ sizeof(pKeyCtx), &key) != 0)
+ return -10400;
+
+ if (wc_ed25519ctx_sign_msg(msgCtx, sizeof(msgCtx), out, &outlen, &key,
+ contextCtx, sizeof(contextCtx)) != 0)
+ return -10401;
+
+ if (XMEMCMP(out, sigCtx1, 64))
+ return -10402;
+
+#if defined(HAVE_ED25519_VERIFY)
+ /* test verify on good msg */
+ if (wc_ed25519ctx_verify_msg(out, outlen, msgCtx, sizeof(msgCtx), &verify,
+ &key, contextCtx, sizeof(contextCtx)) != 0 ||
+ verify != 1)
+ return -10403;
+#endif
+
+ if (wc_ed25519ctx_sign_msg(msgCtx, sizeof(msgCtx), out, &outlen, &key, NULL,
+ 0) != 0)
+ return -10404;
+
+ if (XMEMCMP(out, sigCtx2, 64))
+ return -10405;
+
+#if defined(HAVE_ED25519_VERIFY)
+ /* test verify on good msg */
+ if (wc_ed25519ctx_verify_msg(out, outlen, msgCtx, sizeof(msgCtx), &verify,
+ &key, NULL, 0) != 0 || verify != 1)
+ return -10406;
+#endif
+
+ wc_ed25519_free(&key);
+
+ return 0;
+}
+
+static int ed25519ph_test(void)
+{
+ byte out[ED25519_SIG_SIZE];
+ word32 outlen;
+#ifdef HAVE_ED25519_VERIFY
+ int verify;
+#endif /* HAVE_ED25519_VERIFY */
+ ed25519_key key;
+
+ static const byte sKeyPh[] = {
+ 0x83,0x3f,0xe6,0x24,0x09,0x23,0x7b,0x9d,
+ 0x62,0xec,0x77,0x58,0x75,0x20,0x91,0x1e,
+ 0x9a,0x75,0x9c,0xec,0x1d,0x19,0x75,0x5b,
+ 0x7d,0xa9,0x01,0xb9,0x6d,0xca,0x3d,0x42
+ };
+
+ static const byte pKeyPh[] = {
+ 0xec,0x17,0x2b,0x93,0xad,0x5e,0x56,0x3b,
+ 0xf4,0x93,0x2c,0x70,0xe1,0x24,0x50,0x34,
+ 0xc3,0x54,0x67,0xef,0x2e,0xfd,0x4d,0x64,
+ 0xeb,0xf8,0x19,0x68,0x34,0x67,0xe2,0xbf
+ };
+
+ static const byte sigPh1[] = {
+ 0x98,0xa7,0x02,0x22,0xf0,0xb8,0x12,0x1a,
+ 0xa9,0xd3,0x0f,0x81,0x3d,0x68,0x3f,0x80,
+ 0x9e,0x46,0x2b,0x46,0x9c,0x7f,0xf8,0x76,
+ 0x39,0x49,0x9b,0xb9,0x4e,0x6d,0xae,0x41,
+ 0x31,0xf8,0x50,0x42,0x46,0x3c,0x2a,0x35,
+ 0x5a,0x20,0x03,0xd0,0x62,0xad,0xf5,0xaa,
+ 0xa1,0x0b,0x8c,0x61,0xe6,0x36,0x06,0x2a,
+ 0xaa,0xd1,0x1c,0x2a,0x26,0x08,0x34,0x06
+ };
+
+ static const byte sigPh2[] = {
+ 0xe0,0x39,0x70,0x2b,0x4c,0x25,0x95,0xa6,
+ 0xa5,0x41,0xac,0x85,0x09,0x23,0x6e,0x29,
+ 0x90,0x47,0x47,0x95,0x33,0x0c,0x9b,0x34,
+ 0xa7,0x5f,0x58,0xa6,0x60,0x12,0x9e,0x08,
+ 0xfd,0x73,0x69,0x43,0xfb,0x19,0x43,0xa5,
+ 0x57,0x20,0xb9,0xe0,0x95,0x7b,0x1e,0xd6,
+ 0x73,0x48,0x16,0x61,0x9f,0x13,0x88,0xf4,
+ 0x3f,0x73,0xe6,0xe3,0xba,0xa8,0x1c,0x0e
+ };
+
+ static const byte msgPh[] = {
+ 0x61,0x62,0x63
+ };
+
+ /* SHA-512 hash of msgPh */
+ static const byte hashPh[] = {
+ 0xdd,0xaf,0x35,0xa1,0x93,0x61,0x7a,0xba,
+ 0xcc,0x41,0x73,0x49,0xae,0x20,0x41,0x31,
+ 0x12,0xe6,0xfa,0x4e,0x89,0xa9,0x7e,0xa2,
+ 0x0a,0x9e,0xee,0xe6,0x4b,0x55,0xd3,0x9a,
+ 0x21,0x92,0x99,0x2a,0x27,0x4f,0xc1,0xa8,
+ 0x36,0xba,0x3c,0x23,0xa3,0xfe,0xeb,0xbd,
+ 0x45,0x4d,0x44,0x23,0x64,0x3c,0xe8,0x0e,
+ 0x2a,0x9a,0xc9,0x4f,0xa5,0x4c,0xa4,0x9f
+ };
+
+ static const byte contextPh2[] = {
+ 0x66,0x6f,0x6f
+ };
+
+ outlen = sizeof(out);
+ XMEMSET(out, 0, sizeof(out));
+
+ if (wc_ed25519_import_private_key(sKeyPh, ED25519_KEY_SIZE, pKeyPh,
+ sizeof(pKeyPh), &key) != 0) {
+ return -10500;
+ }
+
+ if (wc_ed25519ph_sign_msg(msgPh, sizeof(msgPh), out, &outlen, &key, NULL,
+ 0) != 0) {
+ return -10501;
+ }
+
+ if (XMEMCMP(out, sigPh1, 64))
+ return -10502;
+
+#if defined(HAVE_ED25519_VERIFY)
+ /* test verify on good msg */
+ if (wc_ed25519ph_verify_msg(out, outlen, msgPh, sizeof(msgPh), &verify,
+ &key, NULL, 0) != 0 ||
+ verify != 1) {
+ return -10503;
+ }
+#endif
+
+ if (wc_ed25519ph_sign_msg(msgPh, sizeof(msgPh), out, &outlen, &key,
+ contextPh2, sizeof(contextPh2)) != 0) {
+ return -10504;
+ }
+
+ if (XMEMCMP(out, sigPh2, 64))
+ return -10505;
+
+#if defined(HAVE_ED25519_VERIFY)
+ /* test verify on good msg */
+ if (wc_ed25519ph_verify_msg(out, outlen, msgPh, sizeof(msgPh), &verify,
+ &key, contextPh2, sizeof(contextPh2)) != 0 ||
+ verify != 1) {
+ return -10506;
+ }
+#endif
+
+ if (wc_ed25519ph_sign_hash(hashPh, sizeof(hashPh), out, &outlen, &key, NULL,
+ 0) != 0) {
+ return -10507;
+ }
+
+ if (XMEMCMP(out, sigPh1, 64))
+ return -10508;
+
+#if defined(HAVE_ED25519_VERIFY)
+ if (wc_ed25519ph_verify_hash(out, outlen, hashPh, sizeof(hashPh), &verify,
+ &key, NULL, 0) != 0 ||
+ verify != 1) {
+ return -10509;
+ }
+#endif
+
+ if (wc_ed25519ph_sign_hash(hashPh, sizeof(hashPh), out, &outlen, &key,
+ contextPh2, sizeof(contextPh2)) != 0) {
+ return -10510;
+ }
+
+ if (XMEMCMP(out, sigPh2, 64))
+ return -10511;
+
+#if defined(HAVE_ED25519_VERIFY)
+ if (wc_ed25519ph_verify_hash(out, outlen, hashPh, sizeof(hashPh), &verify,
+ &key, contextPh2, sizeof(contextPh2)) != 0 ||
+ verify != 1) {
+ return -10512;
+ }
+#endif
+
+ wc_ed25519_free(&key);
+
+ return 0;
+}
+#endif /* HAVE_ED25519_SIGN && HAVE_ED25519_KEY_EXPORT && HAVE_ED25519_KEY_IMPORT */
+
int ed25519_test(void)
{
- RNG rng;
+ int ret;
+ WC_RNG rng;
+#if defined(HAVE_ED25519_SIGN) && defined(HAVE_ED25519_KEY_EXPORT) &&\
+ defined(HAVE_ED25519_KEY_IMPORT)
byte out[ED25519_SIG_SIZE];
byte exportPKey[ED25519_KEY_SIZE];
byte exportSKey[ED25519_KEY_SIZE];
- word32 outlen;
word32 exportPSz;
word32 exportSSz;
+ int i;
+ word32 outlen;
+#ifdef HAVE_ED25519_VERIFY
+ int verify;
+#endif /* HAVE_ED25519_VERIFY */
+#endif /* HAVE_ED25519_SIGN && HAVE_ED25519_KEY_EXPORT && HAVE_ED25519_KEY_IMPORT */
word32 keySz, sigSz;
- int i, verify;
ed25519_key key;
ed25519_key key2;
+#if defined(HAVE_ED25519_SIGN) && defined(HAVE_ED25519_KEY_EXPORT) && \
+ defined(HAVE_ED25519_KEY_IMPORT)
/* test vectors from
https://tools.ietf.org/html/draft-josefsson-eddsa-ed25519-02
*/
- const byte sKey1[] = {
- 0x9d,0x61,0xb1,0x9d,0xef,0xfd,0x5a,0x60,
- 0xba,0x84,0x4a,0xf4,0x92,0xec,0x2c,0xc4,
- 0x44,0x49,0xc5,0x69,0x7b,0x32,0x69,0x19,
- 0x70,0x3b,0xac,0x03,0x1c,0xae,0x7f,0x60
+ static const byte sKey1[] = {
+ 0x9d,0x61,0xb1,0x9d,0xef,0xfd,0x5a,0x60,
+ 0xba,0x84,0x4a,0xf4,0x92,0xec,0x2c,0xc4,
+ 0x44,0x49,0xc5,0x69,0x7b,0x32,0x69,0x19,
+ 0x70,0x3b,0xac,0x03,0x1c,0xae,0x7f,0x60
};
- const byte sKey2[] = {
- 0x4c,0xcd,0x08,0x9b,0x28,0xff,0x96,0xda,
- 0x9d,0xb6,0xc3,0x46,0xec,0x11,0x4e,0x0f,
- 0x5b,0x8a,0x31,0x9f,0x35,0xab,0xa6,0x24,
- 0xda,0x8c,0xf6,0xed,0x4f,0xb8,0xa6,0xfb
+ static const byte sKey2[] = {
+ 0x4c,0xcd,0x08,0x9b,0x28,0xff,0x96,0xda,
+ 0x9d,0xb6,0xc3,0x46,0xec,0x11,0x4e,0x0f,
+ 0x5b,0x8a,0x31,0x9f,0x35,0xab,0xa6,0x24,
+ 0xda,0x8c,0xf6,0xed,0x4f,0xb8,0xa6,0xfb
};
- const byte sKey3[] = {
- 0xc5,0xaa,0x8d,0xf4,0x3f,0x9f,0x83,0x7b,
- 0xed,0xb7,0x44,0x2f,0x31,0xdc,0xb7,0xb1,
- 0x66,0xd3,0x85,0x35,0x07,0x6f,0x09,0x4b,
- 0x85,0xce,0x3a,0x2e,0x0b,0x44,0x58,0xf7
+ static const byte sKey3[] = {
+ 0xc5,0xaa,0x8d,0xf4,0x3f,0x9f,0x83,0x7b,
+ 0xed,0xb7,0x44,0x2f,0x31,0xdc,0xb7,0xb1,
+ 0x66,0xd3,0x85,0x35,0x07,0x6f,0x09,0x4b,
+ 0x85,0xce,0x3a,0x2e,0x0b,0x44,0x58,0xf7
};
/* uncompressed test */
- const byte sKey4[] = {
- 0x9d,0x61,0xb1,0x9d,0xef,0xfd,0x5a,0x60,
- 0xba,0x84,0x4a,0xf4,0x92,0xec,0x2c,0xc4,
- 0x44,0x49,0xc5,0x69,0x7b,0x32,0x69,0x19,
- 0x70,0x3b,0xac,0x03,0x1c,0xae,0x7f,0x60
+ static const byte sKey4[] = {
+ 0x9d,0x61,0xb1,0x9d,0xef,0xfd,0x5a,0x60,
+ 0xba,0x84,0x4a,0xf4,0x92,0xec,0x2c,0xc4,
+ 0x44,0x49,0xc5,0x69,0x7b,0x32,0x69,0x19,
+ 0x70,0x3b,0xac,0x03,0x1c,0xae,0x7f,0x60
};
/* compressed prefix test */
- const byte sKey5[] = {
- 0x9d,0x61,0xb1,0x9d,0xef,0xfd,0x5a,0x60,
- 0xba,0x84,0x4a,0xf4,0x92,0xec,0x2c,0xc4,
- 0x44,0x49,0xc5,0x69,0x7b,0x32,0x69,0x19,
- 0x70,0x3b,0xac,0x03,0x1c,0xae,0x7f,0x60
+ static const byte sKey5[] = {
+ 0x9d,0x61,0xb1,0x9d,0xef,0xfd,0x5a,0x60,
+ 0xba,0x84,0x4a,0xf4,0x92,0xec,0x2c,0xc4,
+ 0x44,0x49,0xc5,0x69,0x7b,0x32,0x69,0x19,
+ 0x70,0x3b,0xac,0x03,0x1c,0xae,0x7f,0x60
};
- const byte sKey6[] = {
- 0xf5,0xe5,0x76,0x7c,0xf1,0x53,0x31,0x95,
- 0x17,0x63,0x0f,0x22,0x68,0x76,0xb8,0x6c,
- 0x81,0x60,0xcc,0x58,0x3b,0xc0,0x13,0x74,
- 0x4c,0x6b,0xf2,0x55,0xf5,0xcc,0x0e,0xe5
+ static const byte sKey6[] = {
+ 0xf5,0xe5,0x76,0x7c,0xf1,0x53,0x31,0x95,
+ 0x17,0x63,0x0f,0x22,0x68,0x76,0xb8,0x6c,
+ 0x81,0x60,0xcc,0x58,0x3b,0xc0,0x13,0x74,
+ 0x4c,0x6b,0xf2,0x55,0xf5,0xcc,0x0e,0xe5
};
- const byte* sKeys[] = {sKey1, sKey2, sKey3, sKey4, sKey5, sKey6};
+ static const byte* sKeys[] = {sKey1, sKey2, sKey3, sKey4, sKey5, sKey6};
- const byte pKey1[] = {
- 0xd7,0x5a,0x98,0x01,0x82,0xb1,0x0a,0xb7,
- 0xd5,0x4b,0xfe,0xd3,0xc9,0x64,0x07,0x3a,
- 0x0e,0xe1,0x72,0xf3,0xda,0xa6,0x23,0x25,
- 0xaf,0x02,0x1a,0x68,0xf7,0x07,0x51,0x1a
+ static const byte pKey1[] = {
+ 0xd7,0x5a,0x98,0x01,0x82,0xb1,0x0a,0xb7,
+ 0xd5,0x4b,0xfe,0xd3,0xc9,0x64,0x07,0x3a,
+ 0x0e,0xe1,0x72,0xf3,0xda,0xa6,0x23,0x25,
+ 0xaf,0x02,0x1a,0x68,0xf7,0x07,0x51,0x1a
};
- const byte pKey2[] = {
- 0x3d,0x40,0x17,0xc3,0xe8,0x43,0x89,0x5a,
- 0x92,0xb7,0x0a,0xa7,0x4d,0x1b,0x7e,0xbc,
+ static const byte pKey2[] = {
+ 0x3d,0x40,0x17,0xc3,0xe8,0x43,0x89,0x5a,
+ 0x92,0xb7,0x0a,0xa7,0x4d,0x1b,0x7e,0xbc,
0x9c,0x98,0x2c,0xcf,0x2e,0xc4,0x96,0x8c,
- 0xc0,0xcd,0x55,0xf1,0x2a,0xf4,0x66,0x0c
+ 0xc0,0xcd,0x55,0xf1,0x2a,0xf4,0x66,0x0c
};
- const byte pKey3[] = {
- 0xfc,0x51,0xcd,0x8e,0x62,0x18,0xa1,0xa3,
- 0x8d,0xa4,0x7e,0xd0,0x02,0x30,0xf0,0x58,
- 0x08,0x16,0xed,0x13,0xba,0x33,0x03,0xac,
- 0x5d,0xeb,0x91,0x15,0x48,0x90,0x80,0x25
+ static const byte pKey3[] = {
+ 0xfc,0x51,0xcd,0x8e,0x62,0x18,0xa1,0xa3,
+ 0x8d,0xa4,0x7e,0xd0,0x02,0x30,0xf0,0x58,
+ 0x08,0x16,0xed,0x13,0xba,0x33,0x03,0xac,
+ 0x5d,0xeb,0x91,0x15,0x48,0x90,0x80,0x25
};
/* uncompressed test */
- const byte pKey4[] = {
- 0x04,0x55,0xd0,0xe0,0x9a,0x2b,0x9d,0x34,
- 0x29,0x22,0x97,0xe0,0x8d,0x60,0xd0,0xf6,
- 0x20,0xc5,0x13,0xd4,0x72,0x53,0x18,0x7c,
- 0x24,0xb1,0x27,0x86,0xbd,0x77,0x76,0x45,
- 0xce,0x1a,0x51,0x07,0xf7,0x68,0x1a,0x02,
- 0xaf,0x25,0x23,0xa6,0xda,0xf3,0x72,0xe1,
- 0x0e,0x3a,0x07,0x64,0xc9,0xd3,0xfe,0x4b,
- 0xd5,0xb7,0x0a,0xb1,0x82,0x01,0x98,0x5a,
- 0xd7
+ static const byte pKey4[] = {
+ 0x04,0x55,0xd0,0xe0,0x9a,0x2b,0x9d,0x34,
+ 0x29,0x22,0x97,0xe0,0x8d,0x60,0xd0,0xf6,
+ 0x20,0xc5,0x13,0xd4,0x72,0x53,0x18,0x7c,
+ 0x24,0xb1,0x27,0x86,0xbd,0x77,0x76,0x45,
+ 0xce,0x1a,0x51,0x07,0xf7,0x68,0x1a,0x02,
+ 0xaf,0x25,0x23,0xa6,0xda,0xf3,0x72,0xe1,
+ 0x0e,0x3a,0x07,0x64,0xc9,0xd3,0xfe,0x4b,
+ 0xd5,0xb7,0x0a,0xb1,0x82,0x01,0x98,0x5a,
+ 0xd7
};
/* compressed prefix */
- const byte pKey5[] = {
- 0x40,0xd7,0x5a,0x98,0x01,0x82,0xb1,0x0a,0xb7,
- 0xd5,0x4b,0xfe,0xd3,0xc9,0x64,0x07,0x3a,
- 0x0e,0xe1,0x72,0xf3,0xda,0xa6,0x23,0x25,
- 0xaf,0x02,0x1a,0x68,0xf7,0x07,0x51,0x1a
+ static const byte pKey5[] = {
+ 0x40,0xd7,0x5a,0x98,0x01,0x82,0xb1,0x0a,0xb7,
+ 0xd5,0x4b,0xfe,0xd3,0xc9,0x64,0x07,0x3a,
+ 0x0e,0xe1,0x72,0xf3,0xda,0xa6,0x23,0x25,
+ 0xaf,0x02,0x1a,0x68,0xf7,0x07,0x51,0x1a
};
- const byte pKey6[] = {
- 0x27,0x81,0x17,0xfc,0x14,0x4c,0x72,0x34,
- 0x0f,0x67,0xd0,0xf2,0x31,0x6e,0x83,0x86,
- 0xce,0xff,0xbf,0x2b,0x24,0x28,0xc9,0xc5,
- 0x1f,0xef,0x7c,0x59,0x7f,0x1d,0x42,0x6e
+ static const byte pKey6[] = {
+ 0x27,0x81,0x17,0xfc,0x14,0x4c,0x72,0x34,
+ 0x0f,0x67,0xd0,0xf2,0x31,0x6e,0x83,0x86,
+ 0xce,0xff,0xbf,0x2b,0x24,0x28,0xc9,0xc5,
+ 0x1f,0xef,0x7c,0x59,0x7f,0x1d,0x42,0x6e
};
- const byte* pKeys[] = {pKey1, pKey2, pKey3, pKey4, pKey5, pKey6};
- const byte pKeySz[] = {sizeof(pKey1), sizeof(pKey2), sizeof(pKey3),
+ static const byte* pKeys[] = {pKey1, pKey2, pKey3, pKey4, pKey5, pKey6};
+ static const byte pKeySz[] = {sizeof(pKey1), sizeof(pKey2), sizeof(pKey3),
sizeof(pKey4), sizeof(pKey5), sizeof(pKey6)};
- const byte sig1[] = {
- 0xe5,0x56,0x43,0x00,0xc3,0x60,0xac,0x72,
- 0x90,0x86,0xe2,0xcc,0x80,0x6e,0x82,0x8a,
- 0x84,0x87,0x7f,0x1e,0xb8,0xe5,0xd9,0x74,
- 0xd8,0x73,0xe0,0x65,0x22,0x49,0x01,0x55,
- 0x5f,0xb8,0x82,0x15,0x90,0xa3,0x3b,0xac,
- 0xc6,0x1e,0x39,0x70,0x1c,0xf9,0xb4,0x6b,
- 0xd2,0x5b,0xf5,0xf0,0x59,0x5b,0xbe,0x24,
- 0x65,0x51,0x41,0x43,0x8e,0x7a,0x10,0x0b
- };
-
- const byte sig2[] = {
- 0x92,0xa0,0x09,0xa9,0xf0,0xd4,0xca,0xb8,
- 0x72,0x0e,0x82,0x0b,0x5f,0x64,0x25,0x40,
- 0xa2,0xb2,0x7b,0x54,0x16,0x50,0x3f,0x8f,
- 0xb3,0x76,0x22,0x23,0xeb,0xdb,0x69,0xda,
- 0x08,0x5a,0xc1,0xe4,0x3e,0x15,0x99,0x6e,
- 0x45,0x8f,0x36,0x13,0xd0,0xf1,0x1d,0x8c,
- 0x38,0x7b,0x2e,0xae,0xb4,0x30,0x2a,0xee,
- 0xb0,0x0d,0x29,0x16,0x12,0xbb,0x0c,0x00
- };
-
- const byte sig3[] = {
- 0x62,0x91,0xd6,0x57,0xde,0xec,0x24,0x02,
- 0x48,0x27,0xe6,0x9c,0x3a,0xbe,0x01,0xa3,
- 0x0c,0xe5,0x48,0xa2,0x84,0x74,0x3a,0x44,
- 0x5e,0x36,0x80,0xd7,0xdb,0x5a,0xc3,0xac,
- 0x18,0xff,0x9b,0x53,0x8d,0x16,0xf2,0x90,
- 0xae,0x67,0xf7,0x60,0x98,0x4d,0xc6,0x59,
- 0x4a,0x7c,0x15,0xe9,0x71,0x6e,0xd2,0x8d,
- 0xc0,0x27,0xbe,0xce,0xea,0x1e,0xc4,0x0a
+ static const byte sig1[] = {
+ 0xe5,0x56,0x43,0x00,0xc3,0x60,0xac,0x72,
+ 0x90,0x86,0xe2,0xcc,0x80,0x6e,0x82,0x8a,
+ 0x84,0x87,0x7f,0x1e,0xb8,0xe5,0xd9,0x74,
+ 0xd8,0x73,0xe0,0x65,0x22,0x49,0x01,0x55,
+ 0x5f,0xb8,0x82,0x15,0x90,0xa3,0x3b,0xac,
+ 0xc6,0x1e,0x39,0x70,0x1c,0xf9,0xb4,0x6b,
+ 0xd2,0x5b,0xf5,0xf0,0x59,0x5b,0xbe,0x24,
+ 0x65,0x51,0x41,0x43,0x8e,0x7a,0x10,0x0b
+ };
+
+ static const byte sig2[] = {
+ 0x92,0xa0,0x09,0xa9,0xf0,0xd4,0xca,0xb8,
+ 0x72,0x0e,0x82,0x0b,0x5f,0x64,0x25,0x40,
+ 0xa2,0xb2,0x7b,0x54,0x16,0x50,0x3f,0x8f,
+ 0xb3,0x76,0x22,0x23,0xeb,0xdb,0x69,0xda,
+ 0x08,0x5a,0xc1,0xe4,0x3e,0x15,0x99,0x6e,
+ 0x45,0x8f,0x36,0x13,0xd0,0xf1,0x1d,0x8c,
+ 0x38,0x7b,0x2e,0xae,0xb4,0x30,0x2a,0xee,
+ 0xb0,0x0d,0x29,0x16,0x12,0xbb,0x0c,0x00
+ };
+
+ static const byte sig3[] = {
+ 0x62,0x91,0xd6,0x57,0xde,0xec,0x24,0x02,
+ 0x48,0x27,0xe6,0x9c,0x3a,0xbe,0x01,0xa3,
+ 0x0c,0xe5,0x48,0xa2,0x84,0x74,0x3a,0x44,
+ 0x5e,0x36,0x80,0xd7,0xdb,0x5a,0xc3,0xac,
+ 0x18,0xff,0x9b,0x53,0x8d,0x16,0xf2,0x90,
+ 0xae,0x67,0xf7,0x60,0x98,0x4d,0xc6,0x59,
+ 0x4a,0x7c,0x15,0xe9,0x71,0x6e,0xd2,0x8d,
+ 0xc0,0x27,0xbe,0xce,0xea,0x1e,0xc4,0x0a
};
/* uncompressed test */
- const byte sig4[] = {
- 0xe5,0x56,0x43,0x00,0xc3,0x60,0xac,0x72,
- 0x90,0x86,0xe2,0xcc,0x80,0x6e,0x82,0x8a,
- 0x84,0x87,0x7f,0x1e,0xb8,0xe5,0xd9,0x74,
- 0xd8,0x73,0xe0,0x65,0x22,0x49,0x01,0x55,
- 0x5f,0xb8,0x82,0x15,0x90,0xa3,0x3b,0xac,
- 0xc6,0x1e,0x39,0x70,0x1c,0xf9,0xb4,0x6b,
- 0xd2,0x5b,0xf5,0xf0,0x59,0x5b,0xbe,0x24,
- 0x65,0x51,0x41,0x43,0x8e,0x7a,0x10,0x0b
+ static const byte sig4[] = {
+ 0xe5,0x56,0x43,0x00,0xc3,0x60,0xac,0x72,
+ 0x90,0x86,0xe2,0xcc,0x80,0x6e,0x82,0x8a,
+ 0x84,0x87,0x7f,0x1e,0xb8,0xe5,0xd9,0x74,
+ 0xd8,0x73,0xe0,0x65,0x22,0x49,0x01,0x55,
+ 0x5f,0xb8,0x82,0x15,0x90,0xa3,0x3b,0xac,
+ 0xc6,0x1e,0x39,0x70,0x1c,0xf9,0xb4,0x6b,
+ 0xd2,0x5b,0xf5,0xf0,0x59,0x5b,0xbe,0x24,
+ 0x65,0x51,0x41,0x43,0x8e,0x7a,0x10,0x0b
};
/* compressed prefix */
- const byte sig5[] = {
- 0xe5,0x56,0x43,0x00,0xc3,0x60,0xac,0x72,
- 0x90,0x86,0xe2,0xcc,0x80,0x6e,0x82,0x8a,
- 0x84,0x87,0x7f,0x1e,0xb8,0xe5,0xd9,0x74,
- 0xd8,0x73,0xe0,0x65,0x22,0x49,0x01,0x55,
- 0x5f,0xb8,0x82,0x15,0x90,0xa3,0x3b,0xac,
- 0xc6,0x1e,0x39,0x70,0x1c,0xf9,0xb4,0x6b,
- 0xd2,0x5b,0xf5,0xf0,0x59,0x5b,0xbe,0x24,
- 0x65,0x51,0x41,0x43,0x8e,0x7a,0x10,0x0b
- };
-
- const byte sig6[] = {
- 0x0a,0xab,0x4c,0x90,0x05,0x01,0xb3,0xe2,
- 0x4d,0x7c,0xdf,0x46,0x63,0x32,0x6a,0x3a,
- 0x87,0xdf,0x5e,0x48,0x43,0xb2,0xcb,0xdb,
- 0x67,0xcb,0xf6,0xe4,0x60,0xfe,0xc3,0x50,
- 0xaa,0x53,0x71,0xb1,0x50,0x8f,0x9f,0x45,
- 0x28,0xec,0xea,0x23,0xc4,0x36,0xd9,0x4b,
- 0x5e,0x8f,0xcd,0x4f,0x68,0x1e,0x30,0xa6,
- 0xac,0x00,0xa9,0x70,0x4a,0x18,0x8a,0x03
- };
-
- const byte* sigs[] = {sig1, sig2, sig3, sig4, sig5, sig6};
-
- const byte msg1[] = {};
- const byte msg2[] = {0x72};
- const byte msg3[] = {0xAF,0x82};
+ static const byte sig5[] = {
+ 0xe5,0x56,0x43,0x00,0xc3,0x60,0xac,0x72,
+ 0x90,0x86,0xe2,0xcc,0x80,0x6e,0x82,0x8a,
+ 0x84,0x87,0x7f,0x1e,0xb8,0xe5,0xd9,0x74,
+ 0xd8,0x73,0xe0,0x65,0x22,0x49,0x01,0x55,
+ 0x5f,0xb8,0x82,0x15,0x90,0xa3,0x3b,0xac,
+ 0xc6,0x1e,0x39,0x70,0x1c,0xf9,0xb4,0x6b,
+ 0xd2,0x5b,0xf5,0xf0,0x59,0x5b,0xbe,0x24,
+ 0x65,0x51,0x41,0x43,0x8e,0x7a,0x10,0x0b
+ };
+
+ static const byte sig6[] = {
+ 0x0a,0xab,0x4c,0x90,0x05,0x01,0xb3,0xe2,
+ 0x4d,0x7c,0xdf,0x46,0x63,0x32,0x6a,0x3a,
+ 0x87,0xdf,0x5e,0x48,0x43,0xb2,0xcb,0xdb,
+ 0x67,0xcb,0xf6,0xe4,0x60,0xfe,0xc3,0x50,
+ 0xaa,0x53,0x71,0xb1,0x50,0x8f,0x9f,0x45,
+ 0x28,0xec,0xea,0x23,0xc4,0x36,0xd9,0x4b,
+ 0x5e,0x8f,0xcd,0x4f,0x68,0x1e,0x30,0xa6,
+ 0xac,0x00,0xa9,0x70,0x4a,0x18,0x8a,0x03
+ };
+
+ static const byte* sigs[] = {sig1, sig2, sig3, sig4, sig5, sig6};
+
+ static const byte msg1[] = {0x0 };
+ static const byte msg2[] = {0x72};
+ static const byte msg3[] = {0xAF,0x82};
/* test of a 1024 byte long message */
- const byte msg4[] = {
- 0x08,0xb8,0xb2,0xb7,0x33,0x42,0x42,0x43,
- 0x76,0x0f,0xe4,0x26,0xa4,0xb5,0x49,0x08,
- 0x63,0x21,0x10,0xa6,0x6c,0x2f,0x65,0x91,
- 0xea,0xbd,0x33,0x45,0xe3,0xe4,0xeb,0x98,
- 0xfa,0x6e,0x26,0x4b,0xf0,0x9e,0xfe,0x12,
- 0xee,0x50,0xf8,0xf5,0x4e,0x9f,0x77,0xb1,
- 0xe3,0x55,0xf6,0xc5,0x05,0x44,0xe2,0x3f,
- 0xb1,0x43,0x3d,0xdf,0x73,0xbe,0x84,0xd8,
- 0x79,0xde,0x7c,0x00,0x46,0xdc,0x49,0x96,
- 0xd9,0xe7,0x73,0xf4,0xbc,0x9e,0xfe,0x57,
- 0x38,0x82,0x9a,0xdb,0x26,0xc8,0x1b,0x37,
- 0xc9,0x3a,0x1b,0x27,0x0b,0x20,0x32,0x9d,
- 0x65,0x86,0x75,0xfc,0x6e,0xa5,0x34,0xe0,
- 0x81,0x0a,0x44,0x32,0x82,0x6b,0xf5,0x8c,
- 0x94,0x1e,0xfb,0x65,0xd5,0x7a,0x33,0x8b,
- 0xbd,0x2e,0x26,0x64,0x0f,0x89,0xff,0xbc,
- 0x1a,0x85,0x8e,0xfc,0xb8,0x55,0x0e,0xe3,
- 0xa5,0xe1,0x99,0x8b,0xd1,0x77,0xe9,0x3a,
- 0x73,0x63,0xc3,0x44,0xfe,0x6b,0x19,0x9e,
- 0xe5,0xd0,0x2e,0x82,0xd5,0x22,0xc4,0xfe,
- 0xba,0x15,0x45,0x2f,0x80,0x28,0x8a,0x82,
- 0x1a,0x57,0x91,0x16,0xec,0x6d,0xad,0x2b,
- 0x3b,0x31,0x0d,0xa9,0x03,0x40,0x1a,0xa6,
- 0x21,0x00,0xab,0x5d,0x1a,0x36,0x55,0x3e,
- 0x06,0x20,0x3b,0x33,0x89,0x0c,0xc9,0xb8,
- 0x32,0xf7,0x9e,0xf8,0x05,0x60,0xcc,0xb9,
- 0xa3,0x9c,0xe7,0x67,0x96,0x7e,0xd6,0x28,
- 0xc6,0xad,0x57,0x3c,0xb1,0x16,0xdb,0xef,
- 0xef,0xd7,0x54,0x99,0xda,0x96,0xbd,0x68,
- 0xa8,0xa9,0x7b,0x92,0x8a,0x8b,0xbc,0x10,
- 0x3b,0x66,0x21,0xfc,0xde,0x2b,0xec,0xa1,
- 0x23,0x1d,0x20,0x6b,0xe6,0xcd,0x9e,0xc7,
- 0xaf,0xf6,0xf6,0xc9,0x4f,0xcd,0x72,0x04,
- 0xed,0x34,0x55,0xc6,0x8c,0x83,0xf4,0xa4,
- 0x1d,0xa4,0xaf,0x2b,0x74,0xef,0x5c,0x53,
- 0xf1,0xd8,0xac,0x70,0xbd,0xcb,0x7e,0xd1,
- 0x85,0xce,0x81,0xbd,0x84,0x35,0x9d,0x44,
- 0x25,0x4d,0x95,0x62,0x9e,0x98,0x55,0xa9,
- 0x4a,0x7c,0x19,0x58,0xd1,0xf8,0xad,0xa5,
- 0xd0,0x53,0x2e,0xd8,0xa5,0xaa,0x3f,0xb2,
- 0xd1,0x7b,0xa7,0x0e,0xb6,0x24,0x8e,0x59,
- 0x4e,0x1a,0x22,0x97,0xac,0xbb,0xb3,0x9d,
- 0x50,0x2f,0x1a,0x8c,0x6e,0xb6,0xf1,0xce,
- 0x22,0xb3,0xde,0x1a,0x1f,0x40,0xcc,0x24,
- 0x55,0x41,0x19,0xa8,0x31,0xa9,0xaa,0xd6,
- 0x07,0x9c,0xad,0x88,0x42,0x5d,0xe6,0xbd,
- 0xe1,0xa9,0x18,0x7e,0xbb,0x60,0x92,0xcf,
- 0x67,0xbf,0x2b,0x13,0xfd,0x65,0xf2,0x70,
- 0x88,0xd7,0x8b,0x7e,0x88,0x3c,0x87,0x59,
- 0xd2,0xc4,0xf5,0xc6,0x5a,0xdb,0x75,0x53,
- 0x87,0x8a,0xd5,0x75,0xf9,0xfa,0xd8,0x78,
- 0xe8,0x0a,0x0c,0x9b,0xa6,0x3b,0xcb,0xcc,
- 0x27,0x32,0xe6,0x94,0x85,0xbb,0xc9,0xc9,
- 0x0b,0xfb,0xd6,0x24,0x81,0xd9,0x08,0x9b,
- 0xec,0xcf,0x80,0xcf,0xe2,0xdf,0x16,0xa2,
- 0xcf,0x65,0xbd,0x92,0xdd,0x59,0x7b,0x07,
- 0x07,0xe0,0x91,0x7a,0xf4,0x8b,0xbb,0x75,
- 0xfe,0xd4,0x13,0xd2,0x38,0xf5,0x55,0x5a,
- 0x7a,0x56,0x9d,0x80,0xc3,0x41,0x4a,0x8d,
- 0x08,0x59,0xdc,0x65,0xa4,0x61,0x28,0xba,
- 0xb2,0x7a,0xf8,0x7a,0x71,0x31,0x4f,0x31,
- 0x8c,0x78,0x2b,0x23,0xeb,0xfe,0x80,0x8b,
- 0x82,0xb0,0xce,0x26,0x40,0x1d,0x2e,0x22,
- 0xf0,0x4d,0x83,0xd1,0x25,0x5d,0xc5,0x1a,
- 0xdd,0xd3,0xb7,0x5a,0x2b,0x1a,0xe0,0x78,
- 0x45,0x04,0xdf,0x54,0x3a,0xf8,0x96,0x9b,
- 0xe3,0xea,0x70,0x82,0xff,0x7f,0xc9,0x88,
- 0x8c,0x14,0x4d,0xa2,0xaf,0x58,0x42,0x9e,
- 0xc9,0x60,0x31,0xdb,0xca,0xd3,0xda,0xd9,
- 0xaf,0x0d,0xcb,0xaa,0xaf,0x26,0x8c,0xb8,
- 0xfc,0xff,0xea,0xd9,0x4f,0x3c,0x7c,0xa4,
- 0x95,0xe0,0x56,0xa9,0xb4,0x7a,0xcd,0xb7,
- 0x51,0xfb,0x73,0xe6,0x66,0xc6,0xc6,0x55,
- 0xad,0xe8,0x29,0x72,0x97,0xd0,0x7a,0xd1,
- 0xba,0x5e,0x43,0xf1,0xbc,0xa3,0x23,0x01,
- 0x65,0x13,0x39,0xe2,0x29,0x04,0xcc,0x8c,
- 0x42,0xf5,0x8c,0x30,0xc0,0x4a,0xaf,0xdb,
- 0x03,0x8d,0xda,0x08,0x47,0xdd,0x98,0x8d,
- 0xcd,0xa6,0xf3,0xbf,0xd1,0x5c,0x4b,0x4c,
- 0x45,0x25,0x00,0x4a,0xa0,0x6e,0xef,0xf8,
- 0xca,0x61,0x78,0x3a,0xac,0xec,0x57,0xfb,
- 0x3d,0x1f,0x92,0xb0,0xfe,0x2f,0xd1,0xa8,
- 0x5f,0x67,0x24,0x51,0x7b,0x65,0xe6,0x14,
- 0xad,0x68,0x08,0xd6,0xf6,0xee,0x34,0xdf,
- 0xf7,0x31,0x0f,0xdc,0x82,0xae,0xbf,0xd9,
- 0x04,0xb0,0x1e,0x1d,0xc5,0x4b,0x29,0x27,
- 0x09,0x4b,0x2d,0xb6,0x8d,0x6f,0x90,0x3b,
- 0x68,0x40,0x1a,0xde,0xbf,0x5a,0x7e,0x08,
- 0xd7,0x8f,0xf4,0xef,0x5d,0x63,0x65,0x3a,
- 0x65,0x04,0x0c,0xf9,0xbf,0xd4,0xac,0xa7,
- 0x98,0x4a,0x74,0xd3,0x71,0x45,0x98,0x67,
- 0x80,0xfc,0x0b,0x16,0xac,0x45,0x16,0x49,
- 0xde,0x61,0x88,0xa7,0xdb,0xdf,0x19,0x1f,
- 0x64,0xb5,0xfc,0x5e,0x2a,0xb4,0x7b,0x57,
- 0xf7,0xf7,0x27,0x6c,0xd4,0x19,0xc1,0x7a,
- 0x3c,0xa8,0xe1,0xb9,0x39,0xae,0x49,0xe4,
- 0x88,0xac,0xba,0x6b,0x96,0x56,0x10,0xb5,
- 0x48,0x01,0x09,0xc8,0xb1,0x7b,0x80,0xe1,
- 0xb7,0xb7,0x50,0xdf,0xc7,0x59,0x8d,0x5d,
- 0x50,0x11,0xfd,0x2d,0xcc,0x56,0x00,0xa3,
- 0x2e,0xf5,0xb5,0x2a,0x1e,0xcc,0x82,0x0e,
- 0x30,0x8a,0xa3,0x42,0x72,0x1a,0xac,0x09,
- 0x43,0xbf,0x66,0x86,0xb6,0x4b,0x25,0x79,
- 0x37,0x65,0x04,0xcc,0xc4,0x93,0xd9,0x7e,
- 0x6a,0xed,0x3f,0xb0,0xf9,0xcd,0x71,0xa4,
- 0x3d,0xd4,0x97,0xf0,0x1f,0x17,0xc0,0xe2,
- 0xcb,0x37,0x97,0xaa,0x2a,0x2f,0x25,0x66,
- 0x56,0x16,0x8e,0x6c,0x49,0x6a,0xfc,0x5f,
- 0xb9,0x32,0x46,0xf6,0xb1,0x11,0x63,0x98,
- 0xa3,0x46,0xf1,0xa6,0x41,0xf3,0xb0,0x41,
- 0xe9,0x89,0xf7,0x91,0x4f,0x90,0xcc,0x2c,
- 0x7f,0xff,0x35,0x78,0x76,0xe5,0x06,0xb5,
- 0x0d,0x33,0x4b,0xa7,0x7c,0x22,0x5b,0xc3,
- 0x07,0xba,0x53,0x71,0x52,0xf3,0xf1,0x61,
- 0x0e,0x4e,0xaf,0xe5,0x95,0xf6,0xd9,0xd9,
- 0x0d,0x11,0xfa,0xa9,0x33,0xa1,0x5e,0xf1,
- 0x36,0x95,0x46,0x86,0x8a,0x7f,0x3a,0x45,
- 0xa9,0x67,0x68,0xd4,0x0f,0xd9,0xd0,0x34,
- 0x12,0xc0,0x91,0xc6,0x31,0x5c,0xf4,0xfd,
- 0xe7,0xcb,0x68,0x60,0x69,0x37,0x38,0x0d,
- 0xb2,0xea,0xaa,0x70,0x7b,0x4c,0x41,0x85,
- 0xc3,0x2e,0xdd,0xcd,0xd3,0x06,0x70,0x5e,
- 0x4d,0xc1,0xff,0xc8,0x72,0xee,0xee,0x47,
- 0x5a,0x64,0xdf,0xac,0x86,0xab,0xa4,0x1c,
- 0x06,0x18,0x98,0x3f,0x87,0x41,0xc5,0xef,
- 0x68,0xd3,0xa1,0x01,0xe8,0xa3,0xb8,0xca,
- 0xc6,0x0c,0x90,0x5c,0x15,0xfc,0x91,0x08,
- 0x40,0xb9,0x4c,0x00,0xa0,0xb9,0xd0
- };
-
- const byte* msgs[] = {msg1, msg2, msg3, msg1, msg1, msg4};
- const word16 msgSz[] = {sizeof(msg1), sizeof(msg2), sizeof(msg3),
- sizeof(msg1), sizeof(msg1), sizeof(msg4)};
+ static const byte msg4[] = {
+ 0x08,0xb8,0xb2,0xb7,0x33,0x42,0x42,0x43,
+ 0x76,0x0f,0xe4,0x26,0xa4,0xb5,0x49,0x08,
+ 0x63,0x21,0x10,0xa6,0x6c,0x2f,0x65,0x91,
+ 0xea,0xbd,0x33,0x45,0xe3,0xe4,0xeb,0x98,
+ 0xfa,0x6e,0x26,0x4b,0xf0,0x9e,0xfe,0x12,
+ 0xee,0x50,0xf8,0xf5,0x4e,0x9f,0x77,0xb1,
+ 0xe3,0x55,0xf6,0xc5,0x05,0x44,0xe2,0x3f,
+ 0xb1,0x43,0x3d,0xdf,0x73,0xbe,0x84,0xd8,
+ 0x79,0xde,0x7c,0x00,0x46,0xdc,0x49,0x96,
+ 0xd9,0xe7,0x73,0xf4,0xbc,0x9e,0xfe,0x57,
+ 0x38,0x82,0x9a,0xdb,0x26,0xc8,0x1b,0x37,
+ 0xc9,0x3a,0x1b,0x27,0x0b,0x20,0x32,0x9d,
+ 0x65,0x86,0x75,0xfc,0x6e,0xa5,0x34,0xe0,
+ 0x81,0x0a,0x44,0x32,0x82,0x6b,0xf5,0x8c,
+ 0x94,0x1e,0xfb,0x65,0xd5,0x7a,0x33,0x8b,
+ 0xbd,0x2e,0x26,0x64,0x0f,0x89,0xff,0xbc,
+ 0x1a,0x85,0x8e,0xfc,0xb8,0x55,0x0e,0xe3,
+ 0xa5,0xe1,0x99,0x8b,0xd1,0x77,0xe9,0x3a,
+ 0x73,0x63,0xc3,0x44,0xfe,0x6b,0x19,0x9e,
+ 0xe5,0xd0,0x2e,0x82,0xd5,0x22,0xc4,0xfe,
+ 0xba,0x15,0x45,0x2f,0x80,0x28,0x8a,0x82,
+ 0x1a,0x57,0x91,0x16,0xec,0x6d,0xad,0x2b,
+ 0x3b,0x31,0x0d,0xa9,0x03,0x40,0x1a,0xa6,
+ 0x21,0x00,0xab,0x5d,0x1a,0x36,0x55,0x3e,
+ 0x06,0x20,0x3b,0x33,0x89,0x0c,0xc9,0xb8,
+ 0x32,0xf7,0x9e,0xf8,0x05,0x60,0xcc,0xb9,
+ 0xa3,0x9c,0xe7,0x67,0x96,0x7e,0xd6,0x28,
+ 0xc6,0xad,0x57,0x3c,0xb1,0x16,0xdb,0xef,
+ 0xef,0xd7,0x54,0x99,0xda,0x96,0xbd,0x68,
+ 0xa8,0xa9,0x7b,0x92,0x8a,0x8b,0xbc,0x10,
+ 0x3b,0x66,0x21,0xfc,0xde,0x2b,0xec,0xa1,
+ 0x23,0x1d,0x20,0x6b,0xe6,0xcd,0x9e,0xc7,
+ 0xaf,0xf6,0xf6,0xc9,0x4f,0xcd,0x72,0x04,
+ 0xed,0x34,0x55,0xc6,0x8c,0x83,0xf4,0xa4,
+ 0x1d,0xa4,0xaf,0x2b,0x74,0xef,0x5c,0x53,
+ 0xf1,0xd8,0xac,0x70,0xbd,0xcb,0x7e,0xd1,
+ 0x85,0xce,0x81,0xbd,0x84,0x35,0x9d,0x44,
+ 0x25,0x4d,0x95,0x62,0x9e,0x98,0x55,0xa9,
+ 0x4a,0x7c,0x19,0x58,0xd1,0xf8,0xad,0xa5,
+ 0xd0,0x53,0x2e,0xd8,0xa5,0xaa,0x3f,0xb2,
+ 0xd1,0x7b,0xa7,0x0e,0xb6,0x24,0x8e,0x59,
+ 0x4e,0x1a,0x22,0x97,0xac,0xbb,0xb3,0x9d,
+ 0x50,0x2f,0x1a,0x8c,0x6e,0xb6,0xf1,0xce,
+ 0x22,0xb3,0xde,0x1a,0x1f,0x40,0xcc,0x24,
+ 0x55,0x41,0x19,0xa8,0x31,0xa9,0xaa,0xd6,
+ 0x07,0x9c,0xad,0x88,0x42,0x5d,0xe6,0xbd,
+ 0xe1,0xa9,0x18,0x7e,0xbb,0x60,0x92,0xcf,
+ 0x67,0xbf,0x2b,0x13,0xfd,0x65,0xf2,0x70,
+ 0x88,0xd7,0x8b,0x7e,0x88,0x3c,0x87,0x59,
+ 0xd2,0xc4,0xf5,0xc6,0x5a,0xdb,0x75,0x53,
+ 0x87,0x8a,0xd5,0x75,0xf9,0xfa,0xd8,0x78,
+ 0xe8,0x0a,0x0c,0x9b,0xa6,0x3b,0xcb,0xcc,
+ 0x27,0x32,0xe6,0x94,0x85,0xbb,0xc9,0xc9,
+ 0x0b,0xfb,0xd6,0x24,0x81,0xd9,0x08,0x9b,
+ 0xec,0xcf,0x80,0xcf,0xe2,0xdf,0x16,0xa2,
+ 0xcf,0x65,0xbd,0x92,0xdd,0x59,0x7b,0x07,
+ 0x07,0xe0,0x91,0x7a,0xf4,0x8b,0xbb,0x75,
+ 0xfe,0xd4,0x13,0xd2,0x38,0xf5,0x55,0x5a,
+ 0x7a,0x56,0x9d,0x80,0xc3,0x41,0x4a,0x8d,
+ 0x08,0x59,0xdc,0x65,0xa4,0x61,0x28,0xba,
+ 0xb2,0x7a,0xf8,0x7a,0x71,0x31,0x4f,0x31,
+ 0x8c,0x78,0x2b,0x23,0xeb,0xfe,0x80,0x8b,
+ 0x82,0xb0,0xce,0x26,0x40,0x1d,0x2e,0x22,
+ 0xf0,0x4d,0x83,0xd1,0x25,0x5d,0xc5,0x1a,
+ 0xdd,0xd3,0xb7,0x5a,0x2b,0x1a,0xe0,0x78,
+ 0x45,0x04,0xdf,0x54,0x3a,0xf8,0x96,0x9b,
+ 0xe3,0xea,0x70,0x82,0xff,0x7f,0xc9,0x88,
+ 0x8c,0x14,0x4d,0xa2,0xaf,0x58,0x42,0x9e,
+ 0xc9,0x60,0x31,0xdb,0xca,0xd3,0xda,0xd9,
+ 0xaf,0x0d,0xcb,0xaa,0xaf,0x26,0x8c,0xb8,
+ 0xfc,0xff,0xea,0xd9,0x4f,0x3c,0x7c,0xa4,
+ 0x95,0xe0,0x56,0xa9,0xb4,0x7a,0xcd,0xb7,
+ 0x51,0xfb,0x73,0xe6,0x66,0xc6,0xc6,0x55,
+ 0xad,0xe8,0x29,0x72,0x97,0xd0,0x7a,0xd1,
+ 0xba,0x5e,0x43,0xf1,0xbc,0xa3,0x23,0x01,
+ 0x65,0x13,0x39,0xe2,0x29,0x04,0xcc,0x8c,
+ 0x42,0xf5,0x8c,0x30,0xc0,0x4a,0xaf,0xdb,
+ 0x03,0x8d,0xda,0x08,0x47,0xdd,0x98,0x8d,
+ 0xcd,0xa6,0xf3,0xbf,0xd1,0x5c,0x4b,0x4c,
+ 0x45,0x25,0x00,0x4a,0xa0,0x6e,0xef,0xf8,
+ 0xca,0x61,0x78,0x3a,0xac,0xec,0x57,0xfb,
+ 0x3d,0x1f,0x92,0xb0,0xfe,0x2f,0xd1,0xa8,
+ 0x5f,0x67,0x24,0x51,0x7b,0x65,0xe6,0x14,
+ 0xad,0x68,0x08,0xd6,0xf6,0xee,0x34,0xdf,
+ 0xf7,0x31,0x0f,0xdc,0x82,0xae,0xbf,0xd9,
+ 0x04,0xb0,0x1e,0x1d,0xc5,0x4b,0x29,0x27,
+ 0x09,0x4b,0x2d,0xb6,0x8d,0x6f,0x90,0x3b,
+ 0x68,0x40,0x1a,0xde,0xbf,0x5a,0x7e,0x08,
+ 0xd7,0x8f,0xf4,0xef,0x5d,0x63,0x65,0x3a,
+ 0x65,0x04,0x0c,0xf9,0xbf,0xd4,0xac,0xa7,
+ 0x98,0x4a,0x74,0xd3,0x71,0x45,0x98,0x67,
+ 0x80,0xfc,0x0b,0x16,0xac,0x45,0x16,0x49,
+ 0xde,0x61,0x88,0xa7,0xdb,0xdf,0x19,0x1f,
+ 0x64,0xb5,0xfc,0x5e,0x2a,0xb4,0x7b,0x57,
+ 0xf7,0xf7,0x27,0x6c,0xd4,0x19,0xc1,0x7a,
+ 0x3c,0xa8,0xe1,0xb9,0x39,0xae,0x49,0xe4,
+ 0x88,0xac,0xba,0x6b,0x96,0x56,0x10,0xb5,
+ 0x48,0x01,0x09,0xc8,0xb1,0x7b,0x80,0xe1,
+ 0xb7,0xb7,0x50,0xdf,0xc7,0x59,0x8d,0x5d,
+ 0x50,0x11,0xfd,0x2d,0xcc,0x56,0x00,0xa3,
+ 0x2e,0xf5,0xb5,0x2a,0x1e,0xcc,0x82,0x0e,
+ 0x30,0x8a,0xa3,0x42,0x72,0x1a,0xac,0x09,
+ 0x43,0xbf,0x66,0x86,0xb6,0x4b,0x25,0x79,
+ 0x37,0x65,0x04,0xcc,0xc4,0x93,0xd9,0x7e,
+ 0x6a,0xed,0x3f,0xb0,0xf9,0xcd,0x71,0xa4,
+ 0x3d,0xd4,0x97,0xf0,0x1f,0x17,0xc0,0xe2,
+ 0xcb,0x37,0x97,0xaa,0x2a,0x2f,0x25,0x66,
+ 0x56,0x16,0x8e,0x6c,0x49,0x6a,0xfc,0x5f,
+ 0xb9,0x32,0x46,0xf6,0xb1,0x11,0x63,0x98,
+ 0xa3,0x46,0xf1,0xa6,0x41,0xf3,0xb0,0x41,
+ 0xe9,0x89,0xf7,0x91,0x4f,0x90,0xcc,0x2c,
+ 0x7f,0xff,0x35,0x78,0x76,0xe5,0x06,0xb5,
+ 0x0d,0x33,0x4b,0xa7,0x7c,0x22,0x5b,0xc3,
+ 0x07,0xba,0x53,0x71,0x52,0xf3,0xf1,0x61,
+ 0x0e,0x4e,0xaf,0xe5,0x95,0xf6,0xd9,0xd9,
+ 0x0d,0x11,0xfa,0xa9,0x33,0xa1,0x5e,0xf1,
+ 0x36,0x95,0x46,0x86,0x8a,0x7f,0x3a,0x45,
+ 0xa9,0x67,0x68,0xd4,0x0f,0xd9,0xd0,0x34,
+ 0x12,0xc0,0x91,0xc6,0x31,0x5c,0xf4,0xfd,
+ 0xe7,0xcb,0x68,0x60,0x69,0x37,0x38,0x0d,
+ 0xb2,0xea,0xaa,0x70,0x7b,0x4c,0x41,0x85,
+ 0xc3,0x2e,0xdd,0xcd,0xd3,0x06,0x70,0x5e,
+ 0x4d,0xc1,0xff,0xc8,0x72,0xee,0xee,0x47,
+ 0x5a,0x64,0xdf,0xac,0x86,0xab,0xa4,0x1c,
+ 0x06,0x18,0x98,0x3f,0x87,0x41,0xc5,0xef,
+ 0x68,0xd3,0xa1,0x01,0xe8,0xa3,0xb8,0xca,
+ 0xc6,0x0c,0x90,0x5c,0x15,0xfc,0x91,0x08,
+ 0x40,0xb9,0x4c,0x00,0xa0,0xb9,0xd0
+ };
+
+ static const byte* msgs[] = {msg1, msg2, msg3, msg1, msg1, msg4};
+ static const word16 msgSz[] = {0 /*sizeof(msg1)*/,
+ sizeof(msg2),
+ sizeof(msg3),
+ 0 /*sizeof(msg1)*/,
+ 0 /*sizeof(msg1)*/,
+ sizeof(msg4)
+ };
+#ifndef NO_ASN
+ static byte privateEd25519[] = {
+ 0x30,0x2e,0x02,0x01,0x00,0x30,0x05,0x06,
+ 0x03,0x2b,0x65,0x70,0x04,0x22,0x04,0x20,
+ 0x9d,0x61,0xb1,0x9d,0xef,0xfd,0x5a,0x60,
+ 0xba,0x84,0x4a,0xf4,0x92,0xec,0x2c,0xc4,
+ 0x44,0x49,0xc5,0x69,0x7b,0x32,0x69,0x19,
+ 0x70,0x3b,0xac,0x03,0x1c,0xae,0x7f,0x60
+ };
+ static byte publicEd25519[] = {
+ 0x30,0x2a,0x30,0x05,0x06,0x03,0x2b,0x65,
+ 0x70,0x03,0x21,0x00,0xd7,0x5a,0x98,0x01,
+ 0x82,0xb1,0x0a,0xb7,0xd5,0x4b,0xfe,0xd3,
+ 0xc9,0x64,0x07,0x3a,0x0e,0xe1,0x72,0xf3,
+ 0xda,0xa6,0x23,0x25,0xaf,0x02,0x1a,0x68,
+ 0xf7,0x07,0x51,0x1a
+ };
+ static byte privPubEd25519[] = {
+ 0x30,0x52,0x02,0x01,0x00,0x30,0x05,0x06,
+ 0x03,0x2b,0x65,0x70,0x04,0x22,0x04,0x20,
+ 0x9d,0x61,0xb1,0x9d,0xef,0xfd,0x5a,0x60,
+ 0xba,0x84,0x4a,0xf4,0x92,0xec,0x2c,0xc4,
+ 0x44,0x49,0xc5,0x69,0x7b,0x32,0x69,0x19,
+ 0x70,0x3b,0xac,0x03,0x1c,0xae,0x7f,0x60,
+ 0xa1,0x22,0x04,0x20,0xd7,0x5a,0x98,0x01,
+ 0x82,0xb1,0x0a,0xb7,0xd5,0x4b,0xfe,0xd3,
+ 0xc9,0x64,0x07,0x3a,0x0e,0xe1,0x72,0xf3,
+ 0xda,0xa6,0x23,0x25,0xaf,0x02,0x1a,0x68,
+ 0xf7,0x07,0x51,0x1a
+ };
+
+ word32 idx;
+ ed25519_key key3;
+#endif /* NO_ASN */
+#endif /* HAVE_ED25519_SIGN && HAVE_ED25519_KEY_EXPORT && HAVE_ED25519_KEY_IMPORT */
/* create ed25519 keys */
- wc_InitRng(&rng);
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
+ ret = wc_InitRng(&rng);
+#endif
+ if (ret != 0)
+ return -10600;
+
wc_ed25519_init(&key);
wc_ed25519_init(&key2);
+#ifndef NO_ASN
+ wc_ed25519_init(&key3);
+#endif
wc_ed25519_make_key(&rng, ED25519_KEY_SIZE, &key);
wc_ed25519_make_key(&rng, ED25519_KEY_SIZE, &key2);
@@ -5726,62 +21659,122 @@ int ed25519_test(void)
keySz = wc_ed25519_size(&key);
sigSz = wc_ed25519_sig_size(&key);
+#if defined(HAVE_ED25519_SIGN) && defined(HAVE_ED25519_KEY_EXPORT) &&\
+ defined(HAVE_ED25519_KEY_IMPORT)
for (i = 0; i < 6; i++) {
outlen = sizeof(out);
XMEMSET(out, 0, sizeof(out));
if (wc_ed25519_import_private_key(sKeys[i], ED25519_KEY_SIZE, pKeys[i],
pKeySz[i], &key) != 0)
- return -1021 - i;
+ return -10601 - i;
- if (wc_ed25519_sign_msg(msgs[i], msgSz[i], out, &outlen, &key)
- != 0)
- return -1027 - i;
+ if (wc_ed25519_sign_msg(msgs[i], msgSz[i], out, &outlen, &key) != 0)
+ return -10611 - i;
if (XMEMCMP(out, sigs[i], 64))
- return -1033 - i;
+ return -10621 - i;
+#if defined(HAVE_ED25519_VERIFY)
/* test verify on good msg */
if (wc_ed25519_verify_msg(out, outlen, msgs[i], msgSz[i], &verify,
&key) != 0 || verify != 1)
- return -1039 - i;
+ return -10631 - i;
/* test verify on bad msg */
out[outlen-1] = out[outlen-1] + 1;
if (wc_ed25519_verify_msg(out, outlen, msgs[i], msgSz[i], &verify,
&key) == 0 || verify == 1)
- return -1045 - i;
+ return -10641 - i;
+#endif /* HAVE_ED25519_VERIFY */
/* test api for import/exporting keys */
exportPSz = sizeof(exportPKey);
exportSSz = sizeof(exportSKey);
if (wc_ed25519_export_public(&key, exportPKey, &exportPSz) != 0)
- return -1051 - i;
+ return -10651 - i;
if (wc_ed25519_import_public(exportPKey, exportPSz, &key2) != 0)
- return -1057 - i;
+ return -10661 - i;
if (wc_ed25519_export_private_only(&key, exportSKey, &exportSSz) != 0)
- return -1063 - i;
+ return -10671 - i;
if (wc_ed25519_import_private_key(exportSKey, exportSSz,
exportPKey, exportPSz, &key2) != 0)
- return -1069 - i;
+ return -10681 - i;
/* clear "out" buffer and test sign with imported keys */
outlen = sizeof(out);
XMEMSET(out, 0, sizeof(out));
if (wc_ed25519_sign_msg(msgs[i], msgSz[i], out, &outlen, &key2) != 0)
- return -1075 - i;
+ return -10691 - i;
+#if defined(HAVE_ED25519_VERIFY)
if (wc_ed25519_verify_msg(out, outlen, msgs[i], msgSz[i], &verify,
&key2) != 0 || verify != 1)
- return -1081 - i;
+ return -10701 - i;
if (XMEMCMP(out, sigs[i], 64))
- return -1087 - i;
+ return -10711 - i;
+#endif /* HAVE_ED25519_VERIFY */
}
+ ret = ed25519ctx_test();
+ if (ret != 0)
+ return ret;
+
+ ret = ed25519ph_test();
+ if (ret != 0)
+ return ret;
+
+#ifndef NO_ASN
+ /* Try ASN.1 encoded private-only key and public key. */
+ idx = 0;
+ if (wc_Ed25519PrivateKeyDecode(privateEd25519, &idx, &key3,
+ sizeof(privateEd25519)) != 0)
+ return -10721 - i;
+
+ if (wc_ed25519_sign_msg(msgs[0], msgSz[0], out, &outlen, &key3)
+ != BAD_FUNC_ARG)
+ return -10731 - i;
+
+ idx = 0;
+ if (wc_Ed25519PublicKeyDecode(publicEd25519, &idx, &key3,
+ sizeof(publicEd25519)) != 0)
+ return -10741 - i;
+
+ if (wc_ed25519_sign_msg(msgs[0], msgSz[0], out, &outlen, &key3) != 0)
+ return -10751 - i;
+
+ if (XMEMCMP(out, sigs[0], 64))
+ return -10761 - i;
+
+#if defined(HAVE_ED25519_VERIFY)
+ /* test verify on good msg */
+ if (wc_ed25519_verify_msg(out, outlen, msgs[0], msgSz[0], &verify, &key3)
+ != 0 || verify != 1)
+ return -10771 - i;
+#endif /* HAVE_ED25519_VERIFY */
+
+ wc_ed25519_free(&key3);
+ wc_ed25519_init(&key3);
+
+ idx = 0;
+ if (wc_Ed25519PrivateKeyDecode(privPubEd25519, &idx, &key3,
+ sizeof(privPubEd25519)) != 0)
+ return -10781 - i;
+
+ if (wc_ed25519_sign_msg(msgs[0], msgSz[0], out, &outlen, &key3) != 0)
+ return -10791 - i;
+
+ if (XMEMCMP(out, sigs[0], 64))
+ return -10801 - i;
+
+ wc_ed25519_free(&key3);
+#endif /* NO_ASN */
+#endif /* HAVE_ED25519_SIGN && HAVE_ED25519_KEY_EXPORT && HAVE_ED25519_KEY_IMPORT */
+
/* clean up keys when done */
wc_ed25519_free(&key);
wc_ed25519_free(&key2);
@@ -5790,14 +21783,1624 @@ int ed25519_test(void)
wc_FreeRng(&rng);
#endif
- /* hush warrnings of unused keySz and sigSz */
+ /* hush warnings of unused keySz and sigSz */
(void)keySz;
(void)sigSz;
+#ifdef WOLFSSL_TEST_CERT
+ ret = ed25519_test_cert();
+ if (ret < 0)
+ return ret;
+#ifdef WOLFSSL_CERT_GEN
+ ret = ed25519_test_make_cert();
+ if (ret < 0)
+ return ret;
+#endif /* WOLFSSL_CERT_GEN */
+#endif /* WOLFSSL_TEST_CERT */
+
return 0;
}
#endif /* HAVE_ED25519 */
+#ifdef HAVE_CURVE448
+#if defined(HAVE_CURVE448_SHARED_SECRET) && \
+ defined(HAVE_CURVE448_KEY_IMPORT)
+/* Test the wc_curve448_check_public API.
+ *
+ * returns 0 on success and -ve on failure.
+ */
+static int curve448_check_public_test(void)
+{
+ /* Little-endian values that will fail */
+ byte fail_le[][CURVE448_KEY_SIZE] = {
+ {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+ },
+ {
+ 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+ },
+ };
+ /* Big-endian values that will fail */
+ byte fail_be[][CURVE448_KEY_SIZE] = {
+ {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+ },
+ {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01
+ },
+ };
+ /* Good or valid public value */
+ byte good[CURVE448_KEY_SIZE] = {
+ 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01
+ };
+ int i;
+
+ /* Parameter checks */
+ /* NULL pointer */
+ if (wc_curve448_check_public(NULL, 0, EC448_LITTLE_ENDIAN) !=
+ BAD_FUNC_ARG) {
+ return -10900;
+ }
+ if (wc_curve448_check_public(NULL, 0, EC448_BIG_ENDIAN) != BAD_FUNC_ARG) {
+ return -10901;
+ }
+ /* Length of 0 treated differently to other invalid lengths for TLS */
+ if (wc_curve448_check_public(good, 0, EC448_LITTLE_ENDIAN) != BUFFER_E)
+ return -10902;
+ if (wc_curve448_check_public(good, 0, EC448_BIG_ENDIAN) != BUFFER_E)
+ return -10903;
+
+ /* Length not CURVE448_KEY_SIZE */
+ for (i = 1; i < CURVE448_KEY_SIZE + 2; i++) {
+ if (i == CURVE448_KEY_SIZE)
+ continue;
+ if (wc_curve448_check_public(good, i, EC448_LITTLE_ENDIAN) !=
+ ECC_BAD_ARG_E) {
+ return -10904 - i;
+ }
+ if (wc_curve448_check_public(good, i, EC448_BIG_ENDIAN) !=
+ ECC_BAD_ARG_E) {
+ return -10914 - i;
+ }
+ }
+
+ /* Little-endian fail cases */
+ for (i = 0; i < (int)(sizeof(fail_le) / sizeof(*fail_le)); i++) {
+ if (wc_curve448_check_public(fail_le[i], CURVE448_KEY_SIZE,
+ EC448_LITTLE_ENDIAN) == 0) {
+ return -10924 - i;
+ }
+ }
+ /* Big-endian fail cases */
+ for (i = 0; i < (int)(sizeof(fail_be) / sizeof(*fail_be)); i++) {
+ if (wc_curve448_check_public(fail_be[i], CURVE448_KEY_SIZE,
+ EC448_BIG_ENDIAN) == 0) {
+ return -10934 - i;
+ }
+ }
+
+ /* Check a valid public value works! */
+ if (wc_curve448_check_public(good, CURVE448_KEY_SIZE,
+ EC448_LITTLE_ENDIAN) != 0) {
+ return -10944;
+ }
+ if (wc_curve448_check_public(good, CURVE448_KEY_SIZE,
+ EC448_BIG_ENDIAN) != 0) {
+ return -10945;
+ }
+
+ return 0;
+}
+
+#endif /* HAVE_CURVE448_SHARED_SECRET && HAVE_CURVE448_KEY_IMPORT */
+
+int curve448_test(void)
+{
+ WC_RNG rng;
+ int ret;
+#ifdef HAVE_CURVE448_SHARED_SECRET
+ byte sharedA[CURVE448_KEY_SIZE];
+ byte sharedB[CURVE448_KEY_SIZE];
+ word32 y;
+#endif
+#ifdef HAVE_CURVE448_KEY_EXPORT
+ byte exportBuf[CURVE448_KEY_SIZE];
+#endif
+ word32 x;
+ curve448_key userA, userB, pubKey;
+
+ (void)x;
+
+#if defined(HAVE_CURVE448_SHARED_SECRET) && \
+ defined(HAVE_CURVE448_KEY_IMPORT)
+ /* test vectors from
+ https://www.rfc-editor.org/rfc/rfc7748.html
+ */
+
+ /* secret key for party a */
+ byte sa[] = {
+ 0x6b, 0x72, 0x98, 0xa5, 0xc0, 0xd8, 0xc2, 0x9a,
+ 0x1d, 0xab, 0x27, 0xf1, 0xa6, 0x82, 0x63, 0x00,
+ 0x91, 0x73, 0x89, 0x44, 0x97, 0x41, 0xa9, 0x74,
+ 0xf5, 0xba, 0xc9, 0xd9, 0x8d, 0xc2, 0x98, 0xd4,
+ 0x65, 0x55, 0xbc, 0xe8, 0xba, 0xe8, 0x9e, 0xee,
+ 0xd4, 0x00, 0x58, 0x4b, 0xb0, 0x46, 0xcf, 0x75,
+ 0x57, 0x9f, 0x51, 0xd1, 0x25, 0x49, 0x8f, 0x9a,
+ };
+
+ /* public key for party a */
+ byte pa[] = {
+ 0xa0, 0x1f, 0xc4, 0x32, 0xe5, 0x80, 0x7f, 0x17,
+ 0x53, 0x0d, 0x12, 0x88, 0xda, 0x12, 0x5b, 0x0c,
+ 0xd4, 0x53, 0xd9, 0x41, 0x72, 0x64, 0x36, 0xc8,
+ 0xbb, 0xd9, 0xc5, 0x22, 0x2c, 0x3d, 0xa7, 0xfa,
+ 0x63, 0x9c, 0xe0, 0x3d, 0xb8, 0xd2, 0x3b, 0x27,
+ 0x4a, 0x07, 0x21, 0xa1, 0xae, 0xd5, 0x22, 0x7d,
+ 0xe6, 0xe3, 0xb7, 0x31, 0xcc, 0xf7, 0x08, 0x9b,
+ };
+
+ /* secret key for party b */
+ byte sb[] = {
+ 0x2d, 0x99, 0x73, 0x51, 0xb6, 0x10, 0x6f, 0x36,
+ 0xb0, 0xd1, 0x09, 0x1b, 0x92, 0x9c, 0x4c, 0x37,
+ 0x21, 0x3e, 0x0d, 0x2b, 0x97, 0xe8, 0x5e, 0xbb,
+ 0x20, 0xc1, 0x27, 0x69, 0x1d, 0x0d, 0xad, 0x8f,
+ 0x1d, 0x81, 0x75, 0xb0, 0x72, 0x37, 0x45, 0xe6,
+ 0x39, 0xa3, 0xcb, 0x70, 0x44, 0x29, 0x0b, 0x99,
+ 0xe0, 0xe2, 0xa0, 0xc2, 0x7a, 0x6a, 0x30, 0x1c,
+ };
+
+ /* public key for party b */
+ byte pb[] = {
+ 0x09, 0x36, 0xf3, 0x7b, 0xc6, 0xc1, 0xbd, 0x07,
+ 0xae, 0x3d, 0xec, 0x7a, 0xb5, 0xdc, 0x06, 0xa7,
+ 0x3c, 0xa1, 0x32, 0x42, 0xfb, 0x34, 0x3e, 0xfc,
+ 0x72, 0xb9, 0xd8, 0x27, 0x30, 0xb4, 0x45, 0xf3,
+ 0xd4, 0xb0, 0xbd, 0x07, 0x71, 0x62, 0xa4, 0x6d,
+ 0xcf, 0xec, 0x6f, 0x9b, 0x59, 0x0b, 0xfc, 0xbc,
+ 0xf5, 0x20, 0xcd, 0xb0, 0x29, 0xa8, 0xb7, 0x3e,
+ };
+
+ /* expected shared key */
+ byte ss[] = {
+ 0x9d, 0x87, 0x4a, 0x51, 0x37, 0x50, 0x9a, 0x44,
+ 0x9a, 0xd5, 0x85, 0x30, 0x40, 0x24, 0x1c, 0x52,
+ 0x36, 0x39, 0x54, 0x35, 0xc3, 0x64, 0x24, 0xfd,
+ 0x56, 0x0b, 0x0c, 0xb6, 0x2b, 0x28, 0x1d, 0x28,
+ 0x52, 0x75, 0xa7, 0x40, 0xce, 0x32, 0xa2, 0x2d,
+ 0xd1, 0x74, 0x0f, 0x4a, 0xa9, 0x16, 0x1c, 0xec,
+ 0x95, 0xcc, 0xc6, 0x1a, 0x18, 0xf4, 0xff, 0x07,
+ };
+#endif /* HAVE_CURVE448_SHARED_SECRET */
+
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
+ ret = wc_InitRng(&rng);
+#endif
+ if (ret != 0)
+ return -11000;
+
+ wc_curve448_init(&userA);
+ wc_curve448_init(&userB);
+ wc_curve448_init(&pubKey);
+
+ /* make curve448 keys */
+ if (wc_curve448_make_key(&rng, CURVE448_KEY_SIZE, &userA) != 0)
+ return -11001;
+
+ if (wc_curve448_make_key(&rng, CURVE448_KEY_SIZE, &userB) != 0)
+ return -11002;
+
+#ifdef HAVE_CURVE448_SHARED_SECRET
+ /* find shared secret key */
+ x = sizeof(sharedA);
+ if (wc_curve448_shared_secret(&userA, &userB, sharedA, &x) != 0)
+ return -11003;
+
+ y = sizeof(sharedB);
+ if (wc_curve448_shared_secret(&userB, &userA, sharedB, &y) != 0)
+ return -11004;
+
+ /* compare shared secret keys to test they are the same */
+ if (y != x)
+ return -11005;
+
+ if (XMEMCMP(sharedA, sharedB, x))
+ return -11006;
+#endif
+
+#ifdef HAVE_CURVE448_KEY_EXPORT
+ /* export a public key and import it for another user */
+ x = sizeof(exportBuf);
+ if (wc_curve448_export_public(&userA, exportBuf, &x) != 0)
+ return -11007;
+
+#ifdef HAVE_CURVE448_KEY_IMPORT
+ if (wc_curve448_import_public(exportBuf, x, &pubKey) != 0)
+ return -11008;
+#endif
+#endif
+
+#if defined(HAVE_CURVE448_SHARED_SECRET) && \
+ defined(HAVE_CURVE448_KEY_IMPORT)
+ /* test shared key after importing a public key */
+ XMEMSET(sharedB, 0, sizeof(sharedB));
+ y = sizeof(sharedB);
+ if (wc_curve448_shared_secret(&userB, &pubKey, sharedB, &y) != 0)
+ return -11009;
+
+ if (XMEMCMP(sharedA, sharedB, y))
+ return -11010;
+
+ /* import RFC test vectors and compare shared key */
+ if (wc_curve448_import_private_raw(sa, sizeof(sa), pa, sizeof(pa), &userA)
+ != 0)
+ return -11011;
+
+ if (wc_curve448_import_private_raw(sb, sizeof(sb), pb, sizeof(pb), &userB)
+ != 0)
+ return -11012;
+
+ /* test against known test vector */
+ XMEMSET(sharedB, 0, sizeof(sharedB));
+ y = sizeof(sharedB);
+ if (wc_curve448_shared_secret(&userA, &userB, sharedB, &y) != 0)
+ return -11013;
+
+ if (XMEMCMP(ss, sharedB, y))
+ return -11014;
+
+ /* test swapping roles of keys and generating same shared key */
+ XMEMSET(sharedB, 0, sizeof(sharedB));
+ y = sizeof(sharedB);
+ if (wc_curve448_shared_secret(&userB, &userA, sharedB, &y) != 0)
+ return -11015;
+
+ if (XMEMCMP(ss, sharedB, y))
+ return -11016;
+
+ /* test with 1 generated key and 1 from known test vector */
+ if (wc_curve448_import_private_raw(sa, sizeof(sa), pa, sizeof(pa), &userA)
+ != 0)
+ return -11017;
+
+ if (wc_curve448_make_key(&rng, 56, &userB) != 0)
+ return -11018;
+
+ x = sizeof(sharedA);
+ if (wc_curve448_shared_secret(&userA, &userB, sharedA, &x) != 0)
+ return -11019;
+
+ y = sizeof(sharedB);
+ if (wc_curve448_shared_secret(&userB, &userA, sharedB, &y) != 0)
+ return -11020;
+
+ /* compare shared secret keys to test they are the same */
+ if (y != x)
+ return -11021;
+
+ if (XMEMCMP(sharedA, sharedB, x))
+ return -11022;
+
+ ret = curve448_check_public_test();
+ if (ret != 0)
+ return ret;
+#endif /* HAVE_CURVE448_SHARED_SECRET && HAVE_CURVE448_KEY_IMPORT */
+
+ /* clean up keys when done */
+ wc_curve448_free(&pubKey);
+ wc_curve448_free(&userB);
+ wc_curve448_free(&userA);
+
+ wc_FreeRng(&rng);
+
+ return 0;
+}
+#endif /* HAVE_CURVE448 */
+
+#ifdef HAVE_ED448
+#ifdef WOLFSSL_TEST_CERT
+static int ed448_test_cert(void)
+{
+ DecodedCert cert[2];
+ DecodedCert* serverCert = NULL;
+ DecodedCert* caCert = NULL;
+#ifdef HAVE_ED448_VERIFY
+ ed448_key key;
+ ed448_key* pubKey = NULL;
+ int verify;
+#endif /* HAVE_ED448_VERIFY */
+ int ret;
+ byte* tmp;
+ size_t bytes;
+ XFILE file;
+
+ tmp = XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (tmp == NULL) {
+ ERROR_OUT(-11023, done);
+ }
+
+#ifdef USE_CERT_BUFFERS_256
+ XMEMCPY(tmp, ca_ed448_cert, sizeof_ca_ed448_cert);
+ bytes = sizeof_ca_ed448_cert;
+#elif !defined(NO_FILESYSTEM)
+ file = XFOPEN(caEd448Cert, "rb");
+ if (file == NULL) {
+ ERROR_OUT(-11024, done);
+ }
+ bytes = XFREAD(tmp, 1, FOURK_BUF, file);
+ XFCLOSE(file);
+#else
+ /* No certificate to use. */
+ ERROR_OUT(-11025, done);
+#endif
+
+ InitDecodedCert(&cert[0], tmp, (word32)bytes, 0);
+ caCert = &cert[0];
+ ret = ParseCert(caCert, CERT_TYPE, NO_VERIFY, NULL);
+ if (ret != 0) {
+ ERROR_OUT(-11026, done);
+ }
+
+#ifdef USE_CERT_BUFFERS_256
+ XMEMCPY(tmp, server_ed448_cert, sizeof_server_ed448_cert);
+ bytes = sizeof_server_ed448_cert;
+#elif !defined(NO_FILESYSTEM)
+ file = XFOPEN(serverEd448Cert, "rb");
+ if (file == NULL) {
+ ERROR_OUT(-11027, done);
+ }
+ bytes = XFREAD(tmp, 1, FOURK_BUF, file);
+ XFCLOSE(file);
+#else
+ /* No certificate to use. */
+ ERROR_OUT(-11028, done);
+#endif
+
+ InitDecodedCert(&cert[1], tmp, (word32)bytes, 0);
+ serverCert = &cert[1];
+ ret = ParseCert(serverCert, CERT_TYPE, NO_VERIFY, NULL);
+ if (ret != 0) {
+ ERROR_OUT(-11029, done);
+ }
+
+#ifdef HAVE_ED448_VERIFY
+ ret = wc_ed448_init(&key);
+ if (ret < 0) {
+ ERROR_OUT(-11030, done);
+ }
+ pubKey = &key;
+ ret = wc_ed448_import_public(caCert->publicKey, caCert->pubKeySize, pubKey);
+ if (ret < 0) {
+ ERROR_OUT(-11031, done);
+ }
+
+ if (wc_ed448_verify_msg(serverCert->signature, serverCert->sigLength,
+ serverCert->source + serverCert->certBegin,
+ serverCert->sigIndex - serverCert->certBegin,
+ &verify, pubKey) < 0 || verify != 1) {
+ ERROR_OUT(-11032, done);
+ }
+#endif /* HAVE_ED448_VERIFY */
+
+done:
+ if (tmp != NULL)
+ XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+#ifdef HAVE_ED448_VERIFY
+ wc_ed448_free(pubKey);
+#endif /* HAVE_ED448_VERIFY */
+ if (caCert != NULL)
+ FreeDecodedCert(caCert);
+ if (serverCert != NULL)
+ FreeDecodedCert(serverCert);
+
+ return ret;
+}
+
+static int ed448_test_make_cert(void)
+{
+ WC_RNG rng;
+ Cert cert;
+ DecodedCert decode;
+ ed448_key key;
+ ed448_key* privKey = NULL;
+ int ret = 0;
+ byte* tmp = NULL;
+
+ wc_InitCert(&cert);
+
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
+ ret = wc_InitRng(&rng);
+#endif
+ if (ret != 0)
+ return -11033;
+
+ wc_ed448_init(&key);
+ privKey = &key;
+ wc_ed448_make_key(&rng, ED448_KEY_SIZE, privKey);
+
+ cert.daysValid = 365 * 2;
+ cert.selfSigned = 1;
+ XMEMCPY(&cert.issuer, &certDefaultName, sizeof(CertName));
+ XMEMCPY(&cert.subject, &certDefaultName, sizeof(CertName));
+ cert.isCA = 0;
+#ifdef WOLFSSL_CERT_EXT
+ ret = wc_SetKeyUsage(&cert, certKeyUsage);
+ if (ret < 0) {
+ ERROR_OUT(-11034, done);
+ }
+ ret = wc_SetSubjectKeyIdFromPublicKey_ex(&cert, ED448_TYPE, privKey);
+ if (ret < 0) {
+ ERROR_OUT(-11035, done);
+ }
+ ret = wc_SetAuthKeyIdFromPublicKey_ex(&cert, ED448_TYPE, privKey);
+ if (ret < 0) {
+ ERROR_OUT(-11036, done);
+ }
+#endif
+ tmp = XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (tmp == NULL) {
+ ERROR_OUT(-11037, done);
+ }
+
+ cert.sigType = CTC_ED448;
+ ret = wc_MakeCert_ex(&cert, tmp, FOURK_BUF, ED448_TYPE, privKey, &rng);
+ if (ret < 0) {
+ ERROR_OUT(-11038, done);
+ }
+ ret = wc_SignCert_ex(cert.bodySz, cert.sigType, tmp, FOURK_BUF, ED448_TYPE,
+ privKey, &rng);
+ if (ret < 0) {
+ ERROR_OUT(-11039, done);
+ }
+
+ InitDecodedCert(&decode, tmp, ret, HEAP_HINT);
+ ret = ParseCert(&decode, CERT_TYPE, NO_VERIFY, 0);
+ FreeDecodedCert(&decode);
+ if (ret != 0) {
+ ERROR_OUT(-11040, done);
+ }
+
+done:
+ if (tmp != NULL)
+ XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_ed448_free(privKey);
+ wc_FreeRng(&rng);
+ return ret;
+}
+#endif /* WOLFSSL_TEST_CERT */
+
+#if defined(HAVE_ED448_SIGN) && defined(HAVE_ED448_KEY_EXPORT) && \
+ defined(HAVE_ED448_KEY_IMPORT)
+static int ed448_ctx_test(void)
+{
+ byte out[ED448_SIG_SIZE];
+ word32 outlen;
+#ifdef HAVE_ED448_VERIFY
+ int verify;
+#endif /* HAVE_ED448_VERIFY */
+ ed448_key key;
+
+ static const byte sKeyCtx[] = {
+ 0xc4, 0xea, 0xb0, 0x5d, 0x35, 0x70, 0x07, 0xc6,
+ 0x32, 0xf3, 0xdb, 0xb4, 0x84, 0x89, 0x92, 0x4d,
+ 0x55, 0x2b, 0x08, 0xfe, 0x0c, 0x35, 0x3a, 0x0d,
+ 0x4a, 0x1f, 0x00, 0xac, 0xda, 0x2c, 0x46, 0x3a,
+ 0xfb, 0xea, 0x67, 0xc5, 0xe8, 0xd2, 0x87, 0x7c,
+ 0x5e, 0x3b, 0xc3, 0x97, 0xa6, 0x59, 0x94, 0x9e,
+ 0xf8, 0x02, 0x1e, 0x95, 0x4e, 0x0a, 0x12, 0x27,
+ 0x4e
+ };
+
+ static const byte pKeyCtx[] = {
+ 0x43, 0xba, 0x28, 0xf4, 0x30, 0xcd, 0xff, 0x45,
+ 0x6a, 0xe5, 0x31, 0x54, 0x5f, 0x7e, 0xcd, 0x0a,
+ 0xc8, 0x34, 0xa5, 0x5d, 0x93, 0x58, 0xc0, 0x37,
+ 0x2b, 0xfa, 0x0c, 0x6c, 0x67, 0x98, 0xc0, 0x86,
+ 0x6a, 0xea, 0x01, 0xeb, 0x00, 0x74, 0x28, 0x02,
+ 0xb8, 0x43, 0x8e, 0xa4, 0xcb, 0x82, 0x16, 0x9c,
+ 0x23, 0x51, 0x60, 0x62, 0x7b, 0x4c, 0x3a, 0x94,
+ 0x80
+ };
+
+ static const byte sigCtx[] = {
+ 0xd4, 0xf8, 0xf6, 0x13, 0x17, 0x70, 0xdd, 0x46,
+ 0xf4, 0x08, 0x67, 0xd6, 0xfd, 0x5d, 0x50, 0x55,
+ 0xde, 0x43, 0x54, 0x1f, 0x8c, 0x5e, 0x35, 0xab,
+ 0xbc, 0xd0, 0x01, 0xb3, 0x2a, 0x89, 0xf7, 0xd2,
+ 0x15, 0x1f, 0x76, 0x47, 0xf1, 0x1d, 0x8c, 0xa2,
+ 0xae, 0x27, 0x9f, 0xb8, 0x42, 0xd6, 0x07, 0x21,
+ 0x7f, 0xce, 0x6e, 0x04, 0x2f, 0x68, 0x15, 0xea,
+ 0x00, 0x0c, 0x85, 0x74, 0x1d, 0xe5, 0xc8, 0xda,
+ 0x11, 0x44, 0xa6, 0xa1, 0xab, 0xa7, 0xf9, 0x6d,
+ 0xe4, 0x25, 0x05, 0xd7, 0xa7, 0x29, 0x85, 0x24,
+ 0xfd, 0xa5, 0x38, 0xfc, 0xcb, 0xbb, 0x75, 0x4f,
+ 0x57, 0x8c, 0x1c, 0xad, 0x10, 0xd5, 0x4d, 0x0d,
+ 0x54, 0x28, 0x40, 0x7e, 0x85, 0xdc, 0xbc, 0x98,
+ 0xa4, 0x91, 0x55, 0xc1, 0x37, 0x64, 0xe6, 0x6c,
+ 0x3c, 0x00
+ };
+
+ static const byte msgCtx[] = {
+ 0x03
+ };
+
+ static const byte contextCtx[] = {
+ 0x66,0x6f,0x6f
+ };
+
+ outlen = sizeof(out);
+ XMEMSET(out, 0, sizeof(out));
+
+ if (wc_ed448_import_private_key(sKeyCtx, ED448_KEY_SIZE, pKeyCtx,
+ sizeof(pKeyCtx), &key) != 0)
+ return -11100;
+
+ if (wc_ed448_sign_msg(msgCtx, sizeof(msgCtx), out, &outlen, &key,
+ contextCtx, sizeof(contextCtx)) != 0)
+ return -11101;
+
+ if (XMEMCMP(out, sigCtx, sizeof(sigCtx)))
+ return -11102;
+
+#if defined(HAVE_ED448_VERIFY)
+ /* test verify on good msg */
+ if (wc_ed448_verify_msg(out, outlen, msgCtx, sizeof(msgCtx), &verify, &key,
+ contextCtx, sizeof(contextCtx)) != 0 || verify != 1)
+ return -11103;
+#endif
+
+ wc_ed448_free(&key);
+
+ return 0;
+}
+
+static int ed448ph_test(void)
+{
+ byte out[ED448_SIG_SIZE];
+ word32 outlen;
+#ifdef HAVE_ED448_VERIFY
+ int verify;
+#endif /* HAVE_ED448_VERIFY */
+ ed448_key key;
+
+ static const byte sKeyPh[] = {
+ 0x83, 0x3f, 0xe6, 0x24, 0x09, 0x23, 0x7b, 0x9d,
+ 0x62, 0xec, 0x77, 0x58, 0x75, 0x20, 0x91, 0x1e,
+ 0x9a, 0x75, 0x9c, 0xec, 0x1d, 0x19, 0x75, 0x5b,
+ 0x7d, 0xa9, 0x01, 0xb9, 0x6d, 0xca, 0x3d, 0x42,
+ 0xef, 0x78, 0x22, 0xe0, 0xd5, 0x10, 0x41, 0x27,
+ 0xdc, 0x05, 0xd6, 0xdb, 0xef, 0xde, 0x69, 0xe3,
+ 0xab, 0x2c, 0xec, 0x7c, 0x86, 0x7c, 0x6e, 0x2c,
+ 0x49
+ };
+
+ static const byte pKeyPh[] = {
+ 0x25, 0x9b, 0x71, 0xc1, 0x9f, 0x83, 0xef, 0x77,
+ 0xa7, 0xab, 0xd2, 0x65, 0x24, 0xcb, 0xdb, 0x31,
+ 0x61, 0xb5, 0x90, 0xa4, 0x8f, 0x7d, 0x17, 0xde,
+ 0x3e, 0xe0, 0xba, 0x9c, 0x52, 0xbe, 0xb7, 0x43,
+ 0xc0, 0x94, 0x28, 0xa1, 0x31, 0xd6, 0xb1, 0xb5,
+ 0x73, 0x03, 0xd9, 0x0d, 0x81, 0x32, 0xc2, 0x76,
+ 0xd5, 0xed, 0x3d, 0x5d, 0x01, 0xc0, 0xf5, 0x38,
+ 0x80
+ };
+
+ static const byte sigPh1[] = {
+ 0x82, 0x2f, 0x69, 0x01, 0xf7, 0x48, 0x0f, 0x3d,
+ 0x5f, 0x56, 0x2c, 0x59, 0x29, 0x94, 0xd9, 0x69,
+ 0x36, 0x02, 0x87, 0x56, 0x14, 0x48, 0x32, 0x56,
+ 0x50, 0x56, 0x00, 0xbb, 0xc2, 0x81, 0xae, 0x38,
+ 0x1f, 0x54, 0xd6, 0xbc, 0xe2, 0xea, 0x91, 0x15,
+ 0x74, 0x93, 0x2f, 0x52, 0xa4, 0xe6, 0xca, 0xdd,
+ 0x78, 0x76, 0x93, 0x75, 0xec, 0x3f, 0xfd, 0x1b,
+ 0x80, 0x1a, 0x0d, 0x9b, 0x3f, 0x40, 0x30, 0xcd,
+ 0x43, 0x39, 0x64, 0xb6, 0x45, 0x7e, 0xa3, 0x94,
+ 0x76, 0x51, 0x12, 0x14, 0xf9, 0x74, 0x69, 0xb5,
+ 0x7d, 0xd3, 0x2d, 0xbc, 0x56, 0x0a, 0x9a, 0x94,
+ 0xd0, 0x0b, 0xff, 0x07, 0x62, 0x04, 0x64, 0xa3,
+ 0xad, 0x20, 0x3d, 0xf7, 0xdc, 0x7c, 0xe3, 0x60,
+ 0xc3, 0xcd, 0x36, 0x96, 0xd9, 0xd9, 0xfa, 0xb9,
+ 0x0f, 0x00
+ };
+
+ static const byte sigPh2[] = {
+ 0xc3, 0x22, 0x99, 0xd4, 0x6e, 0xc8, 0xff, 0x02,
+ 0xb5, 0x45, 0x40, 0x98, 0x28, 0x14, 0xdc, 0xe9,
+ 0xa0, 0x58, 0x12, 0xf8, 0x19, 0x62, 0xb6, 0x49,
+ 0xd5, 0x28, 0x09, 0x59, 0x16, 0xa2, 0xaa, 0x48,
+ 0x10, 0x65, 0xb1, 0x58, 0x04, 0x23, 0xef, 0x92,
+ 0x7e, 0xcf, 0x0a, 0xf5, 0x88, 0x8f, 0x90, 0xda,
+ 0x0f, 0x6a, 0x9a, 0x85, 0xad, 0x5d, 0xc3, 0xf2,
+ 0x80, 0xd9, 0x12, 0x24, 0xba, 0x99, 0x11, 0xa3,
+ 0x65, 0x3d, 0x00, 0xe4, 0x84, 0xe2, 0xce, 0x23,
+ 0x25, 0x21, 0x48, 0x1c, 0x86, 0x58, 0xdf, 0x30,
+ 0x4b, 0xb7, 0x74, 0x5a, 0x73, 0x51, 0x4c, 0xdb,
+ 0x9b, 0xf3, 0xe1, 0x57, 0x84, 0xab, 0x71, 0x28,
+ 0x4f, 0x8d, 0x07, 0x04, 0xa6, 0x08, 0xc5, 0x4a,
+ 0x6b, 0x62, 0xd9, 0x7b, 0xeb, 0x51, 0x1d, 0x13,
+ 0x21, 0x00
+ };
+
+ static const byte msgPh[] = {
+ 0x61,0x62,0x63
+ };
+
+ /* SHA-512 hash of msgPh */
+ static const byte hashPh[] = {
+ 0x48, 0x33, 0x66, 0x60, 0x13, 0x60, 0xa8, 0x77,
+ 0x1c, 0x68, 0x63, 0x08, 0x0c, 0xc4, 0x11, 0x4d,
+ 0x8d, 0xb4, 0x45, 0x30, 0xf8, 0xf1, 0xe1, 0xee,
+ 0x4f, 0x94, 0xea, 0x37, 0xe7, 0x8b, 0x57, 0x39,
+ 0xd5, 0xa1, 0x5b, 0xef, 0x18, 0x6a, 0x53, 0x86,
+ 0xc7, 0x57, 0x44, 0xc0, 0x52, 0x7e, 0x1f, 0xaa,
+ 0x9f, 0x87, 0x26, 0xe4, 0x62, 0xa1, 0x2a, 0x4f,
+ 0xeb, 0x06, 0xbd, 0x88, 0x01, 0xe7, 0x51, 0xe4
+ };
+
+ static const byte contextPh2[] = {
+ 0x66,0x6f,0x6f
+ };
+
+ outlen = sizeof(out);
+ XMEMSET(out, 0, sizeof(out));
+
+ if (wc_ed448_import_private_key(sKeyPh, ED448_KEY_SIZE, pKeyPh,
+ sizeof(pKeyPh), &key) != 0) {
+ return -11200;
+ }
+
+ if (wc_ed448ph_sign_msg(msgPh, sizeof(msgPh), out, &outlen, &key, NULL,
+ 0) != 0) {
+ return -11201;
+ }
+
+ if (XMEMCMP(out, sigPh1, sizeof(sigPh1)))
+ return -11202;
+
+#if defined(HAVE_ED448_VERIFY)
+ /* test verify on good msg */
+ if (wc_ed448ph_verify_msg(out, outlen, msgPh, sizeof(msgPh), &verify, &key,
+ NULL, 0) != 0 || verify != 1) {
+ return -11203;
+ }
+#endif
+
+ if (wc_ed448ph_sign_msg(msgPh, sizeof(msgPh), out, &outlen, &key,
+ contextPh2, sizeof(contextPh2)) != 0) {
+ return -11204;
+ }
+
+ if (XMEMCMP(out, sigPh2, sizeof(sigPh2)))
+ return -11205;
+
+#if defined(HAVE_ED448_VERIFY)
+ /* test verify on good msg */
+ if (wc_ed448ph_verify_msg(out, outlen, msgPh, sizeof(msgPh), &verify, &key,
+ contextPh2, sizeof(contextPh2)) != 0 ||
+ verify != 1) {
+ return -11206;
+ }
+#endif
+
+ if (wc_ed448ph_sign_hash(hashPh, sizeof(hashPh), out, &outlen, &key, NULL,
+ 0) != 0) {
+ return -11207;
+ }
+
+ if (XMEMCMP(out, sigPh1, sizeof(sigPh1)))
+ return -11208;
+
+#if defined(HAVE_ED448_VERIFY)
+ if (wc_ed448ph_verify_hash(out, outlen, hashPh, sizeof(hashPh), &verify,
+ &key, NULL, 0) != 0 || verify != 1) {
+ return -11209;
+ }
+#endif
+
+ if (wc_ed448ph_sign_hash(hashPh, sizeof(hashPh), out, &outlen, &key,
+ contextPh2, sizeof(contextPh2)) != 0) {
+ return -11210;
+ }
+
+ if (XMEMCMP(out, sigPh2, sizeof(sigPh2)))
+ return -11211;
+
+#if defined(HAVE_ED448_VERIFY)
+ if (wc_ed448ph_verify_hash(out, outlen, hashPh, sizeof(hashPh), &verify,
+ &key, contextPh2, sizeof(contextPh2)) != 0 ||
+ verify != 1) {
+ return -11212;
+ }
+#endif
+
+ wc_ed448_free(&key);
+
+ return 0;
+}
+#endif /* HAVE_ED448_SIGN && HAVE_ED448_KEY_EXPORT && HAVE_ED448_KEY_IMPORT */
+
+int ed448_test(void)
+{
+ int ret;
+ WC_RNG rng;
+#if defined(HAVE_ED448_SIGN) && defined(HAVE_ED448_KEY_EXPORT) &&\
+ defined(HAVE_ED448_KEY_IMPORT)
+ byte out[ED448_SIG_SIZE];
+ byte exportPKey[ED448_KEY_SIZE];
+ byte exportSKey[ED448_KEY_SIZE];
+ word32 exportPSz;
+ word32 exportSSz;
+ int i;
+ word32 outlen;
+#ifdef HAVE_ED448_VERIFY
+ int verify;
+#endif /* HAVE_ED448_VERIFY */
+#endif /* HAVE_ED448_SIGN && HAVE_ED448_KEY_EXPORT && HAVE_ED448_KEY_IMPORT */
+ word32 keySz, sigSz;
+ ed448_key key;
+ ed448_key key2;
+
+#if defined(HAVE_ED448_SIGN) && defined(HAVE_ED448_KEY_EXPORT) && \
+ defined(HAVE_ED448_KEY_IMPORT)
+ /* test vectors from
+ https://tools.ietf.org/html/rfc8032
+ */
+
+ static const byte sKey1[] = {
+ 0x6c, 0x82, 0xa5, 0x62, 0xcb, 0x80, 0x8d, 0x10,
+ 0xd6, 0x32, 0xbe, 0x89, 0xc8, 0x51, 0x3e, 0xbf,
+ 0x6c, 0x92, 0x9f, 0x34, 0xdd, 0xfa, 0x8c, 0x9f,
+ 0x63, 0xc9, 0x96, 0x0e, 0xf6, 0xe3, 0x48, 0xa3,
+ 0x52, 0x8c, 0x8a, 0x3f, 0xcc, 0x2f, 0x04, 0x4e,
+ 0x39, 0xa3, 0xfc, 0x5b, 0x94, 0x49, 0x2f, 0x8f,
+ 0x03, 0x2e, 0x75, 0x49, 0xa2, 0x00, 0x98, 0xf9,
+ 0x5b
+ };
+
+ static const byte sKey2[] = {
+ 0xc4, 0xea, 0xb0, 0x5d, 0x35, 0x70, 0x07, 0xc6,
+ 0x32, 0xf3, 0xdb, 0xb4, 0x84, 0x89, 0x92, 0x4d,
+ 0x55, 0x2b, 0x08, 0xfe, 0x0c, 0x35, 0x3a, 0x0d,
+ 0x4a, 0x1f, 0x00, 0xac, 0xda, 0x2c, 0x46, 0x3a,
+ 0xfb, 0xea, 0x67, 0xc5, 0xe8, 0xd2, 0x87, 0x7c,
+ 0x5e, 0x3b, 0xc3, 0x97, 0xa6, 0x59, 0x94, 0x9e,
+ 0xf8, 0x02, 0x1e, 0x95, 0x4e, 0x0a, 0x12, 0x27,
+ 0x4e
+ };
+
+ static const byte sKey3[] = {
+ 0x25, 0x8c, 0xdd, 0x4a, 0xda, 0x32, 0xed, 0x9c,
+ 0x9f, 0xf5, 0x4e, 0x63, 0x75, 0x6a, 0xe5, 0x82,
+ 0xfb, 0x8f, 0xab, 0x2a, 0xc7, 0x21, 0xf2, 0xc8,
+ 0xe6, 0x76, 0xa7, 0x27, 0x68, 0x51, 0x3d, 0x93,
+ 0x9f, 0x63, 0xdd, 0xdb, 0x55, 0x60, 0x91, 0x33,
+ 0xf2, 0x9a, 0xdf, 0x86, 0xec, 0x99, 0x29, 0xdc,
+ 0xcb, 0x52, 0xc1, 0xc5, 0xfd, 0x2f, 0xf7, 0xe2,
+ 0x1b
+ };
+
+ /* uncompressed test */
+ static const byte sKey4[] = {
+ 0x6c, 0x82, 0xa5, 0x62, 0xcb, 0x80, 0x8d, 0x10,
+ 0xd6, 0x32, 0xbe, 0x89, 0xc8, 0x51, 0x3e, 0xbf,
+ 0x6c, 0x92, 0x9f, 0x34, 0xdd, 0xfa, 0x8c, 0x9f,
+ 0x63, 0xc9, 0x96, 0x0e, 0xf6, 0xe3, 0x48, 0xa3,
+ 0x52, 0x8c, 0x8a, 0x3f, 0xcc, 0x2f, 0x04, 0x4e,
+ 0x39, 0xa3, 0xfc, 0x5b, 0x94, 0x49, 0x2f, 0x8f,
+ 0x03, 0x2e, 0x75, 0x49, 0xa2, 0x00, 0x98, 0xf9,
+ 0x5b
+ };
+
+ /* compressed prefix test */
+ static const byte sKey5[] = {
+ 0x6c, 0x82, 0xa5, 0x62, 0xcb, 0x80, 0x8d, 0x10,
+ 0xd6, 0x32, 0xbe, 0x89, 0xc8, 0x51, 0x3e, 0xbf,
+ 0x6c, 0x92, 0x9f, 0x34, 0xdd, 0xfa, 0x8c, 0x9f,
+ 0x63, 0xc9, 0x96, 0x0e, 0xf6, 0xe3, 0x48, 0xa3,
+ 0x52, 0x8c, 0x8a, 0x3f, 0xcc, 0x2f, 0x04, 0x4e,
+ 0x39, 0xa3, 0xfc, 0x5b, 0x94, 0x49, 0x2f, 0x8f,
+ 0x03, 0x2e, 0x75, 0x49, 0xa2, 0x00, 0x98, 0xf9,
+ 0x5b
+ };
+
+ static const byte sKey6[] = {
+ 0x87, 0x2d, 0x09, 0x37, 0x80, 0xf5, 0xd3, 0x73,
+ 0x0d, 0xf7, 0xc2, 0x12, 0x66, 0x4b, 0x37, 0xb8,
+ 0xa0, 0xf2, 0x4f, 0x56, 0x81, 0x0d, 0xaa, 0x83,
+ 0x82, 0xcd, 0x4f, 0xa3, 0xf7, 0x76, 0x34, 0xec,
+ 0x44, 0xdc, 0x54, 0xf1, 0xc2, 0xed, 0x9b, 0xea,
+ 0x86, 0xfa, 0xfb, 0x76, 0x32, 0xd8, 0xbe, 0x19,
+ 0x9e, 0xa1, 0x65, 0xf5, 0xad, 0x55, 0xdd, 0x9c,
+ 0xe8
+ };
+
+ static const byte* sKeys[] = {sKey1, sKey2, sKey3, sKey4, sKey5, sKey6};
+
+ static const byte pKey1[] = {
+ 0x5f, 0xd7, 0x44, 0x9b, 0x59, 0xb4, 0x61, 0xfd,
+ 0x2c, 0xe7, 0x87, 0xec, 0x61, 0x6a, 0xd4, 0x6a,
+ 0x1d, 0xa1, 0x34, 0x24, 0x85, 0xa7, 0x0e, 0x1f,
+ 0x8a, 0x0e, 0xa7, 0x5d, 0x80, 0xe9, 0x67, 0x78,
+ 0xed, 0xf1, 0x24, 0x76, 0x9b, 0x46, 0xc7, 0x06,
+ 0x1b, 0xd6, 0x78, 0x3d, 0xf1, 0xe5, 0x0f, 0x6c,
+ 0xd1, 0xfa, 0x1a, 0xbe, 0xaf, 0xe8, 0x25, 0x61,
+ 0x80
+ };
+
+ static const byte pKey2[] = {
+ 0x43, 0xba, 0x28, 0xf4, 0x30, 0xcd, 0xff, 0x45,
+ 0x6a, 0xe5, 0x31, 0x54, 0x5f, 0x7e, 0xcd, 0x0a,
+ 0xc8, 0x34, 0xa5, 0x5d, 0x93, 0x58, 0xc0, 0x37,
+ 0x2b, 0xfa, 0x0c, 0x6c, 0x67, 0x98, 0xc0, 0x86,
+ 0x6a, 0xea, 0x01, 0xeb, 0x00, 0x74, 0x28, 0x02,
+ 0xb8, 0x43, 0x8e, 0xa4, 0xcb, 0x82, 0x16, 0x9c,
+ 0x23, 0x51, 0x60, 0x62, 0x7b, 0x4c, 0x3a, 0x94,
+ 0x80
+ };
+
+ static const byte pKey3[] = {
+ 0x3b, 0xa1, 0x6d, 0xa0, 0xc6, 0xf2, 0xcc, 0x1f,
+ 0x30, 0x18, 0x77, 0x40, 0x75, 0x6f, 0x5e, 0x79,
+ 0x8d, 0x6b, 0xc5, 0xfc, 0x01, 0x5d, 0x7c, 0x63,
+ 0xcc, 0x95, 0x10, 0xee, 0x3f, 0xd4, 0x4a, 0xdc,
+ 0x24, 0xd8, 0xe9, 0x68, 0xb6, 0xe4, 0x6e, 0x6f,
+ 0x94, 0xd1, 0x9b, 0x94, 0x53, 0x61, 0x72, 0x6b,
+ 0xd7, 0x5e, 0x14, 0x9e, 0xf0, 0x98, 0x17, 0xf5,
+ 0x80
+ };
+
+ /* uncompressed test */
+ static const byte pKey4[] = {
+ 0x5f, 0xd7, 0x44, 0x9b, 0x59, 0xb4, 0x61, 0xfd,
+ 0x2c, 0xe7, 0x87, 0xec, 0x61, 0x6a, 0xd4, 0x6a,
+ 0x1d, 0xa1, 0x34, 0x24, 0x85, 0xa7, 0x0e, 0x1f,
+ 0x8a, 0x0e, 0xa7, 0x5d, 0x80, 0xe9, 0x67, 0x78,
+ 0xed, 0xf1, 0x24, 0x76, 0x9b, 0x46, 0xc7, 0x06,
+ 0x1b, 0xd6, 0x78, 0x3d, 0xf1, 0xe5, 0x0f, 0x6c,
+ 0xd1, 0xfa, 0x1a, 0xbe, 0xaf, 0xe8, 0x25, 0x61,
+ 0x80
+ };
+
+ /* compressed prefix */
+ static const byte pKey5[] = {
+ 0x5f, 0xd7, 0x44, 0x9b, 0x59, 0xb4, 0x61, 0xfd,
+ 0x2c, 0xe7, 0x87, 0xec, 0x61, 0x6a, 0xd4, 0x6a,
+ 0x1d, 0xa1, 0x34, 0x24, 0x85, 0xa7, 0x0e, 0x1f,
+ 0x8a, 0x0e, 0xa7, 0x5d, 0x80, 0xe9, 0x67, 0x78,
+ 0xed, 0xf1, 0x24, 0x76, 0x9b, 0x46, 0xc7, 0x06,
+ 0x1b, 0xd6, 0x78, 0x3d, 0xf1, 0xe5, 0x0f, 0x6c,
+ 0xd1, 0xfa, 0x1a, 0xbe, 0xaf, 0xe8, 0x25, 0x61,
+ 0x80
+ };
+
+ static const byte pKey6[] = {
+ 0xa8, 0x1b, 0x2e, 0x8a, 0x70, 0xa5, 0xac, 0x94,
+ 0xff, 0xdb, 0xcc, 0x9b, 0xad, 0xfc, 0x3f, 0xeb,
+ 0x08, 0x01, 0xf2, 0x58, 0x57, 0x8b, 0xb1, 0x14,
+ 0xad, 0x44, 0xec, 0xe1, 0xec, 0x0e, 0x79, 0x9d,
+ 0xa0, 0x8e, 0xff, 0xb8, 0x1c, 0x5d, 0x68, 0x5c,
+ 0x0c, 0x56, 0xf6, 0x4e, 0xec, 0xae, 0xf8, 0xcd,
+ 0xf1, 0x1c, 0xc3, 0x87, 0x37, 0x83, 0x8c, 0xf4,
+ 0x00
+ };
+
+ static const byte* pKeys[] = {pKey1, pKey2, pKey3, pKey4, pKey5, pKey6};
+ static const byte pKeySz[] = {sizeof(pKey1), sizeof(pKey2), sizeof(pKey3),
+ sizeof(pKey4), sizeof(pKey5), sizeof(pKey6)};
+
+ static const byte sig1[] = {
+ 0x53, 0x3a, 0x37, 0xf6, 0xbb, 0xe4, 0x57, 0x25,
+ 0x1f, 0x02, 0x3c, 0x0d, 0x88, 0xf9, 0x76, 0xae,
+ 0x2d, 0xfb, 0x50, 0x4a, 0x84, 0x3e, 0x34, 0xd2,
+ 0x07, 0x4f, 0xd8, 0x23, 0xd4, 0x1a, 0x59, 0x1f,
+ 0x2b, 0x23, 0x3f, 0x03, 0x4f, 0x62, 0x82, 0x81,
+ 0xf2, 0xfd, 0x7a, 0x22, 0xdd, 0xd4, 0x7d, 0x78,
+ 0x28, 0xc5, 0x9b, 0xd0, 0xa2, 0x1b, 0xfd, 0x39,
+ 0x80, 0xff, 0x0d, 0x20, 0x28, 0xd4, 0xb1, 0x8a,
+ 0x9d, 0xf6, 0x3e, 0x00, 0x6c, 0x5d, 0x1c, 0x2d,
+ 0x34, 0x5b, 0x92, 0x5d, 0x8d, 0xc0, 0x0b, 0x41,
+ 0x04, 0x85, 0x2d, 0xb9, 0x9a, 0xc5, 0xc7, 0xcd,
+ 0xda, 0x85, 0x30, 0xa1, 0x13, 0xa0, 0xf4, 0xdb,
+ 0xb6, 0x11, 0x49, 0xf0, 0x5a, 0x73, 0x63, 0x26,
+ 0x8c, 0x71, 0xd9, 0x58, 0x08, 0xff, 0x2e, 0x65,
+ 0x26, 0x00
+ };
+
+ static const byte sig2[] = {
+ 0x26, 0xb8, 0xf9, 0x17, 0x27, 0xbd, 0x62, 0x89,
+ 0x7a, 0xf1, 0x5e, 0x41, 0xeb, 0x43, 0xc3, 0x77,
+ 0xef, 0xb9, 0xc6, 0x10, 0xd4, 0x8f, 0x23, 0x35,
+ 0xcb, 0x0b, 0xd0, 0x08, 0x78, 0x10, 0xf4, 0x35,
+ 0x25, 0x41, 0xb1, 0x43, 0xc4, 0xb9, 0x81, 0xb7,
+ 0xe1, 0x8f, 0x62, 0xde, 0x8c, 0xcd, 0xf6, 0x33,
+ 0xfc, 0x1b, 0xf0, 0x37, 0xab, 0x7c, 0xd7, 0x79,
+ 0x80, 0x5e, 0x0d, 0xbc, 0xc0, 0xaa, 0xe1, 0xcb,
+ 0xce, 0xe1, 0xaf, 0xb2, 0xe0, 0x27, 0xdf, 0x36,
+ 0xbc, 0x04, 0xdc, 0xec, 0xbf, 0x15, 0x43, 0x36,
+ 0xc1, 0x9f, 0x0a, 0xf7, 0xe0, 0xa6, 0x47, 0x29,
+ 0x05, 0xe7, 0x99, 0xf1, 0x95, 0x3d, 0x2a, 0x0f,
+ 0xf3, 0x34, 0x8a, 0xb2, 0x1a, 0xa4, 0xad, 0xaf,
+ 0xd1, 0xd2, 0x34, 0x44, 0x1c, 0xf8, 0x07, 0xc0,
+ 0x3a, 0x00
+ };
+
+ static const byte sig3[] = {
+ 0x7e, 0xee, 0xab, 0x7c, 0x4e, 0x50, 0xfb, 0x79,
+ 0x9b, 0x41, 0x8e, 0xe5, 0xe3, 0x19, 0x7f, 0xf6,
+ 0xbf, 0x15, 0xd4, 0x3a, 0x14, 0xc3, 0x43, 0x89,
+ 0xb5, 0x9d, 0xd1, 0xa7, 0xb1, 0xb8, 0x5b, 0x4a,
+ 0xe9, 0x04, 0x38, 0xac, 0xa6, 0x34, 0xbe, 0xa4,
+ 0x5e, 0x3a, 0x26, 0x95, 0xf1, 0x27, 0x0f, 0x07,
+ 0xfd, 0xcd, 0xf7, 0xc6, 0x2b, 0x8e, 0xfe, 0xaf,
+ 0x00, 0xb4, 0x5c, 0x2c, 0x96, 0xba, 0x45, 0x7e,
+ 0xb1, 0xa8, 0xbf, 0x07, 0x5a, 0x3d, 0xb2, 0x8e,
+ 0x5c, 0x24, 0xf6, 0xb9, 0x23, 0xed, 0x4a, 0xd7,
+ 0x47, 0xc3, 0xc9, 0xe0, 0x3c, 0x70, 0x79, 0xef,
+ 0xb8, 0x7c, 0xb1, 0x10, 0xd3, 0xa9, 0x98, 0x61,
+ 0xe7, 0x20, 0x03, 0xcb, 0xae, 0x6d, 0x6b, 0x8b,
+ 0x82, 0x7e, 0x4e, 0x6c, 0x14, 0x30, 0x64, 0xff,
+ 0x3c, 0x00
+ };
+
+ /* uncompressed test */
+ static const byte sig4[] = {
+ 0x53, 0x3a, 0x37, 0xf6, 0xbb, 0xe4, 0x57, 0x25,
+ 0x1f, 0x02, 0x3c, 0x0d, 0x88, 0xf9, 0x76, 0xae,
+ 0x2d, 0xfb, 0x50, 0x4a, 0x84, 0x3e, 0x34, 0xd2,
+ 0x07, 0x4f, 0xd8, 0x23, 0xd4, 0x1a, 0x59, 0x1f,
+ 0x2b, 0x23, 0x3f, 0x03, 0x4f, 0x62, 0x82, 0x81,
+ 0xf2, 0xfd, 0x7a, 0x22, 0xdd, 0xd4, 0x7d, 0x78,
+ 0x28, 0xc5, 0x9b, 0xd0, 0xa2, 0x1b, 0xfd, 0x39,
+ 0x80, 0xff, 0x0d, 0x20, 0x28, 0xd4, 0xb1, 0x8a,
+ 0x9d, 0xf6, 0x3e, 0x00, 0x6c, 0x5d, 0x1c, 0x2d,
+ 0x34, 0x5b, 0x92, 0x5d, 0x8d, 0xc0, 0x0b, 0x41,
+ 0x04, 0x85, 0x2d, 0xb9, 0x9a, 0xc5, 0xc7, 0xcd,
+ 0xda, 0x85, 0x30, 0xa1, 0x13, 0xa0, 0xf4, 0xdb,
+ 0xb6, 0x11, 0x49, 0xf0, 0x5a, 0x73, 0x63, 0x26,
+ 0x8c, 0x71, 0xd9, 0x58, 0x08, 0xff, 0x2e, 0x65,
+ 0x26, 0x00
+ };
+
+ /* compressed prefix */
+ static const byte sig5[] = {
+ 0x53, 0x3a, 0x37, 0xf6, 0xbb, 0xe4, 0x57, 0x25,
+ 0x1f, 0x02, 0x3c, 0x0d, 0x88, 0xf9, 0x76, 0xae,
+ 0x2d, 0xfb, 0x50, 0x4a, 0x84, 0x3e, 0x34, 0xd2,
+ 0x07, 0x4f, 0xd8, 0x23, 0xd4, 0x1a, 0x59, 0x1f,
+ 0x2b, 0x23, 0x3f, 0x03, 0x4f, 0x62, 0x82, 0x81,
+ 0xf2, 0xfd, 0x7a, 0x22, 0xdd, 0xd4, 0x7d, 0x78,
+ 0x28, 0xc5, 0x9b, 0xd0, 0xa2, 0x1b, 0xfd, 0x39,
+ 0x80, 0xff, 0x0d, 0x20, 0x28, 0xd4, 0xb1, 0x8a,
+ 0x9d, 0xf6, 0x3e, 0x00, 0x6c, 0x5d, 0x1c, 0x2d,
+ 0x34, 0x5b, 0x92, 0x5d, 0x8d, 0xc0, 0x0b, 0x41,
+ 0x04, 0x85, 0x2d, 0xb9, 0x9a, 0xc5, 0xc7, 0xcd,
+ 0xda, 0x85, 0x30, 0xa1, 0x13, 0xa0, 0xf4, 0xdb,
+ 0xb6, 0x11, 0x49, 0xf0, 0x5a, 0x73, 0x63, 0x26,
+ 0x8c, 0x71, 0xd9, 0x58, 0x08, 0xff, 0x2e, 0x65,
+ 0x26, 0x00
+ };
+
+ static const byte sig6[] = {
+ 0xe3, 0x01, 0x34, 0x5a, 0x41, 0xa3, 0x9a, 0x4d,
+ 0x72, 0xff, 0xf8, 0xdf, 0x69, 0xc9, 0x80, 0x75,
+ 0xa0, 0xcc, 0x08, 0x2b, 0x80, 0x2f, 0xc9, 0xb2,
+ 0xb6, 0xbc, 0x50, 0x3f, 0x92, 0x6b, 0x65, 0xbd,
+ 0xdf, 0x7f, 0x4c, 0x8f, 0x1c, 0xb4, 0x9f, 0x63,
+ 0x96, 0xaf, 0xc8, 0xa7, 0x0a, 0xbe, 0x6d, 0x8a,
+ 0xef, 0x0d, 0xb4, 0x78, 0xd4, 0xc6, 0xb2, 0x97,
+ 0x00, 0x76, 0xc6, 0xa0, 0x48, 0x4f, 0xe7, 0x6d,
+ 0x76, 0xb3, 0xa9, 0x76, 0x25, 0xd7, 0x9f, 0x1c,
+ 0xe2, 0x40, 0xe7, 0xc5, 0x76, 0x75, 0x0d, 0x29,
+ 0x55, 0x28, 0x28, 0x6f, 0x71, 0x9b, 0x41, 0x3d,
+ 0xe9, 0xad, 0xa3, 0xe8, 0xeb, 0x78, 0xed, 0x57,
+ 0x36, 0x03, 0xce, 0x30, 0xd8, 0xbb, 0x76, 0x17,
+ 0x85, 0xdc, 0x30, 0xdb, 0xc3, 0x20, 0x86, 0x9e,
+ 0x1a, 0x00
+ };
+
+ static const byte* sigs[] = {sig1, sig2, sig3, sig4, sig5, sig6};
+
+ static const byte msg1[] = { };
+ static const byte msg2[] = { 0x03 };
+ static const byte msg3[] = { 0x64, 0xa6, 0x5f, 0x3c, 0xde, 0xdc, 0xdd,
+ 0x66, 0x81, 0x1e, 0x29, 0x15 };
+
+ /* test of a 1023 byte long message */
+ static const byte msg4[] = {
+ 0x6d, 0xdf, 0x80, 0x2e, 0x1a, 0xae, 0x49, 0x86,
+ 0x93, 0x5f, 0x7f, 0x98, 0x1b, 0xa3, 0xf0, 0x35,
+ 0x1d, 0x62, 0x73, 0xc0, 0xa0, 0xc2, 0x2c, 0x9c,
+ 0x0e, 0x83, 0x39, 0x16, 0x8e, 0x67, 0x54, 0x12,
+ 0xa3, 0xde, 0xbf, 0xaf, 0x43, 0x5e, 0xd6, 0x51,
+ 0x55, 0x80, 0x07, 0xdb, 0x43, 0x84, 0xb6, 0x50,
+ 0xfc, 0xc0, 0x7e, 0x3b, 0x58, 0x6a, 0x27, 0xa4,
+ 0xf7, 0xa0, 0x0a, 0xc8, 0xa6, 0xfe, 0xc2, 0xcd,
+ 0x86, 0xae, 0x4b, 0xf1, 0x57, 0x0c, 0x41, 0xe6,
+ 0xa4, 0x0c, 0x93, 0x1d, 0xb2, 0x7b, 0x2f, 0xaa,
+ 0x15, 0xa8, 0xce, 0xdd, 0x52, 0xcf, 0xf7, 0x36,
+ 0x2c, 0x4e, 0x6e, 0x23, 0xda, 0xec, 0x0f, 0xbc,
+ 0x3a, 0x79, 0xb6, 0x80, 0x6e, 0x31, 0x6e, 0xfc,
+ 0xc7, 0xb6, 0x81, 0x19, 0xbf, 0x46, 0xbc, 0x76,
+ 0xa2, 0x60, 0x67, 0xa5, 0x3f, 0x29, 0x6d, 0xaf,
+ 0xdb, 0xdc, 0x11, 0xc7, 0x7f, 0x77, 0x77, 0xe9,
+ 0x72, 0x66, 0x0c, 0xf4, 0xb6, 0xa9, 0xb3, 0x69,
+ 0xa6, 0x66, 0x5f, 0x02, 0xe0, 0xcc, 0x9b, 0x6e,
+ 0xdf, 0xad, 0x13, 0x6b, 0x4f, 0xab, 0xe7, 0x23,
+ 0xd2, 0x81, 0x3d, 0xb3, 0x13, 0x6c, 0xfd, 0xe9,
+ 0xb6, 0xd0, 0x44, 0x32, 0x2f, 0xee, 0x29, 0x47,
+ 0x95, 0x2e, 0x03, 0x1b, 0x73, 0xab, 0x5c, 0x60,
+ 0x33, 0x49, 0xb3, 0x07, 0xbd, 0xc2, 0x7b, 0xc6,
+ 0xcb, 0x8b, 0x8b, 0xbd, 0x7b, 0xd3, 0x23, 0x21,
+ 0x9b, 0x80, 0x33, 0xa5, 0x81, 0xb5, 0x9e, 0xad,
+ 0xeb, 0xb0, 0x9b, 0x3c, 0x4f, 0x3d, 0x22, 0x77,
+ 0xd4, 0xf0, 0x34, 0x36, 0x24, 0xac, 0xc8, 0x17,
+ 0x80, 0x47, 0x28, 0xb2, 0x5a, 0xb7, 0x97, 0x17,
+ 0x2b, 0x4c, 0x5c, 0x21, 0xa2, 0x2f, 0x9c, 0x78,
+ 0x39, 0xd6, 0x43, 0x00, 0x23, 0x2e, 0xb6, 0x6e,
+ 0x53, 0xf3, 0x1c, 0x72, 0x3f, 0xa3, 0x7f, 0xe3,
+ 0x87, 0xc7, 0xd3, 0xe5, 0x0b, 0xdf, 0x98, 0x13,
+ 0xa3, 0x0e, 0x5b, 0xb1, 0x2c, 0xf4, 0xcd, 0x93,
+ 0x0c, 0x40, 0xcf, 0xb4, 0xe1, 0xfc, 0x62, 0x25,
+ 0x92, 0xa4, 0x95, 0x88, 0x79, 0x44, 0x94, 0xd5,
+ 0x6d, 0x24, 0xea, 0x4b, 0x40, 0xc8, 0x9f, 0xc0,
+ 0x59, 0x6c, 0xc9, 0xeb, 0xb9, 0x61, 0xc8, 0xcb,
+ 0x10, 0xad, 0xde, 0x97, 0x6a, 0x5d, 0x60, 0x2b,
+ 0x1c, 0x3f, 0x85, 0xb9, 0xb9, 0xa0, 0x01, 0xed,
+ 0x3c, 0x6a, 0x4d, 0x3b, 0x14, 0x37, 0xf5, 0x20,
+ 0x96, 0xcd, 0x19, 0x56, 0xd0, 0x42, 0xa5, 0x97,
+ 0xd5, 0x61, 0xa5, 0x96, 0xec, 0xd3, 0xd1, 0x73,
+ 0x5a, 0x8d, 0x57, 0x0e, 0xa0, 0xec, 0x27, 0x22,
+ 0x5a, 0x2c, 0x4a, 0xaf, 0xf2, 0x63, 0x06, 0xd1,
+ 0x52, 0x6c, 0x1a, 0xf3, 0xca, 0x6d, 0x9c, 0xf5,
+ 0xa2, 0xc9, 0x8f, 0x47, 0xe1, 0xc4, 0x6d, 0xb9,
+ 0xa3, 0x32, 0x34, 0xcf, 0xd4, 0xd8, 0x1f, 0x2c,
+ 0x98, 0x53, 0x8a, 0x09, 0xeb, 0xe7, 0x69, 0x98,
+ 0xd0, 0xd8, 0xfd, 0x25, 0x99, 0x7c, 0x7d, 0x25,
+ 0x5c, 0x6d, 0x66, 0xec, 0xe6, 0xfa, 0x56, 0xf1,
+ 0x11, 0x44, 0x95, 0x0f, 0x02, 0x77, 0x95, 0xe6,
+ 0x53, 0x00, 0x8f, 0x4b, 0xd7, 0xca, 0x2d, 0xee,
+ 0x85, 0xd8, 0xe9, 0x0f, 0x3d, 0xc3, 0x15, 0x13,
+ 0x0c, 0xe2, 0xa0, 0x03, 0x75, 0xa3, 0x18, 0xc7,
+ 0xc3, 0xd9, 0x7b, 0xe2, 0xc8, 0xce, 0x5b, 0x6d,
+ 0xb4, 0x1a, 0x62, 0x54, 0xff, 0x26, 0x4f, 0xa6,
+ 0x15, 0x5b, 0xae, 0xe3, 0xb0, 0x77, 0x3c, 0x0f,
+ 0x49, 0x7c, 0x57, 0x3f, 0x19, 0xbb, 0x4f, 0x42,
+ 0x40, 0x28, 0x1f, 0x0b, 0x1f, 0x4f, 0x7b, 0xe8,
+ 0x57, 0xa4, 0xe5, 0x9d, 0x41, 0x6c, 0x06, 0xb4,
+ 0xc5, 0x0f, 0xa0, 0x9e, 0x18, 0x10, 0xdd, 0xc6,
+ 0xb1, 0x46, 0x7b, 0xae, 0xac, 0x5a, 0x36, 0x68,
+ 0xd1, 0x1b, 0x6e, 0xca, 0xa9, 0x01, 0x44, 0x00,
+ 0x16, 0xf3, 0x89, 0xf8, 0x0a, 0xcc, 0x4d, 0xb9,
+ 0x77, 0x02, 0x5e, 0x7f, 0x59, 0x24, 0x38, 0x8c,
+ 0x7e, 0x34, 0x0a, 0x73, 0x2e, 0x55, 0x44, 0x40,
+ 0xe7, 0x65, 0x70, 0xf8, 0xdd, 0x71, 0xb7, 0xd6,
+ 0x40, 0xb3, 0x45, 0x0d, 0x1f, 0xd5, 0xf0, 0x41,
+ 0x0a, 0x18, 0xf9, 0xa3, 0x49, 0x4f, 0x70, 0x7c,
+ 0x71, 0x7b, 0x79, 0xb4, 0xbf, 0x75, 0xc9, 0x84,
+ 0x00, 0xb0, 0x96, 0xb2, 0x16, 0x53, 0xb5, 0xd2,
+ 0x17, 0xcf, 0x35, 0x65, 0xc9, 0x59, 0x74, 0x56,
+ 0xf7, 0x07, 0x03, 0x49, 0x7a, 0x07, 0x87, 0x63,
+ 0x82, 0x9b, 0xc0, 0x1b, 0xb1, 0xcb, 0xc8, 0xfa,
+ 0x04, 0xea, 0xdc, 0x9a, 0x6e, 0x3f, 0x66, 0x99,
+ 0x58, 0x7a, 0x9e, 0x75, 0xc9, 0x4e, 0x5b, 0xab,
+ 0x00, 0x36, 0xe0, 0xb2, 0xe7, 0x11, 0x39, 0x2c,
+ 0xff, 0x00, 0x47, 0xd0, 0xd6, 0xb0, 0x5b, 0xd2,
+ 0xa5, 0x88, 0xbc, 0x10, 0x97, 0x18, 0x95, 0x42,
+ 0x59, 0xf1, 0xd8, 0x66, 0x78, 0xa5, 0x79, 0xa3,
+ 0x12, 0x0f, 0x19, 0xcf, 0xb2, 0x96, 0x3f, 0x17,
+ 0x7a, 0xeb, 0x70, 0xf2, 0xd4, 0x84, 0x48, 0x26,
+ 0x26, 0x2e, 0x51, 0xb8, 0x02, 0x71, 0x27, 0x20,
+ 0x68, 0xef, 0x5b, 0x38, 0x56, 0xfa, 0x85, 0x35,
+ 0xaa, 0x2a, 0x88, 0xb2, 0xd4, 0x1f, 0x2a, 0x0e,
+ 0x2f, 0xda, 0x76, 0x24, 0xc2, 0x85, 0x02, 0x72,
+ 0xac, 0x4a, 0x2f, 0x56, 0x1f, 0x8f, 0x2f, 0x7a,
+ 0x31, 0x8b, 0xfd, 0x5c, 0xaf, 0x96, 0x96, 0x14,
+ 0x9e, 0x4a, 0xc8, 0x24, 0xad, 0x34, 0x60, 0x53,
+ 0x8f, 0xdc, 0x25, 0x42, 0x1b, 0xee, 0xc2, 0xcc,
+ 0x68, 0x18, 0x16, 0x2d, 0x06, 0xbb, 0xed, 0x0c,
+ 0x40, 0xa3, 0x87, 0x19, 0x23, 0x49, 0xdb, 0x67,
+ 0xa1, 0x18, 0xba, 0xda, 0x6c, 0xd5, 0xab, 0x01,
+ 0x40, 0xee, 0x27, 0x32, 0x04, 0xf6, 0x28, 0xaa,
+ 0xd1, 0xc1, 0x35, 0xf7, 0x70, 0x27, 0x9a, 0x65,
+ 0x1e, 0x24, 0xd8, 0xc1, 0x4d, 0x75, 0xa6, 0x05,
+ 0x9d, 0x76, 0xb9, 0x6a, 0x6f, 0xd8, 0x57, 0xde,
+ 0xf5, 0xe0, 0xb3, 0x54, 0xb2, 0x7a, 0xb9, 0x37,
+ 0xa5, 0x81, 0x5d, 0x16, 0xb5, 0xfa, 0xe4, 0x07,
+ 0xff, 0x18, 0x22, 0x2c, 0x6d, 0x1e, 0xd2, 0x63,
+ 0xbe, 0x68, 0xc9, 0x5f, 0x32, 0xd9, 0x08, 0xbd,
+ 0x89, 0x5c, 0xd7, 0x62, 0x07, 0xae, 0x72, 0x64,
+ 0x87, 0x56, 0x7f, 0x9a, 0x67, 0xda, 0xd7, 0x9a,
+ 0xbe, 0xc3, 0x16, 0xf6, 0x83, 0xb1, 0x7f, 0x2d,
+ 0x02, 0xbf, 0x07, 0xe0, 0xac, 0x8b, 0x5b, 0xc6,
+ 0x16, 0x2c, 0xf9, 0x46, 0x97, 0xb3, 0xc2, 0x7c,
+ 0xd1, 0xfe, 0xa4, 0x9b, 0x27, 0xf2, 0x3b, 0xa2,
+ 0x90, 0x18, 0x71, 0x96, 0x25, 0x06, 0x52, 0x0c,
+ 0x39, 0x2d, 0xa8, 0xb6, 0xad, 0x0d, 0x99, 0xf7,
+ 0x01, 0x3f, 0xbc, 0x06, 0xc2, 0xc1, 0x7a, 0x56,
+ 0x95, 0x00, 0xc8, 0xa7, 0x69, 0x64, 0x81, 0xc1,
+ 0xcd, 0x33, 0xe9, 0xb1, 0x4e, 0x40, 0xb8, 0x2e,
+ 0x79, 0xa5, 0xf5, 0xdb, 0x82, 0x57, 0x1b, 0xa9,
+ 0x7b, 0xae, 0x3a, 0xd3, 0xe0, 0x47, 0x95, 0x15,
+ 0xbb, 0x0e, 0x2b, 0x0f, 0x3b, 0xfc, 0xd1, 0xfd,
+ 0x33, 0x03, 0x4e, 0xfc, 0x62, 0x45, 0xed, 0xdd,
+ 0x7e, 0xe2, 0x08, 0x6d, 0xda, 0xe2, 0x60, 0x0d,
+ 0x8c, 0xa7, 0x3e, 0x21, 0x4e, 0x8c, 0x2b, 0x0b,
+ 0xdb, 0x2b, 0x04, 0x7c, 0x6a, 0x46, 0x4a, 0x56,
+ 0x2e, 0xd7, 0x7b, 0x73, 0xd2, 0xd8, 0x41, 0xc4,
+ 0xb3, 0x49, 0x73, 0x55, 0x12, 0x57, 0x71, 0x3b,
+ 0x75, 0x36, 0x32, 0xef, 0xba, 0x34, 0x81, 0x69,
+ 0xab, 0xc9, 0x0a, 0x68, 0xf4, 0x26, 0x11, 0xa4,
+ 0x01, 0x26, 0xd7, 0xcb, 0x21, 0xb5, 0x86, 0x95,
+ 0x56, 0x81, 0x86, 0xf7, 0xe5, 0x69, 0xd2, 0xff,
+ 0x0f, 0x9e, 0x74, 0x5d, 0x04, 0x87, 0xdd, 0x2e,
+ 0xb9, 0x97, 0xca, 0xfc, 0x5a, 0xbf, 0x9d, 0xd1,
+ 0x02, 0xe6, 0x2f, 0xf6, 0x6c, 0xba, 0x87
+ };
+
+ static const byte* msgs[] = {msg1, msg2, msg3, msg1, msg1, msg4};
+ static const word16 msgSz[] = {0 /*sizeof(msg1)*/,
+ sizeof(msg2),
+ sizeof(msg3),
+ 0 /*sizeof(msg1)*/,
+ 0 /*sizeof(msg1)*/,
+ sizeof(msg4)
+ };
+#ifndef NO_ASN
+ static byte privateEd448[] = {
+ 0x30, 0x47, 0x02, 0x01, 0x00, 0x30, 0x05, 0x06,
+ 0x03, 0x2b, 0x65, 0x71, 0x04, 0x3b, 0x04, 0x39,
+ 0x6c, 0x82, 0xa5, 0x62, 0xcb, 0x80, 0x8d, 0x10,
+ 0xd6, 0x32, 0xbe, 0x89, 0xc8, 0x51, 0x3e, 0xbf,
+ 0x6c, 0x92, 0x9f, 0x34, 0xdd, 0xfa, 0x8c, 0x9f,
+ 0x63, 0xc9, 0x96, 0x0e, 0xf6, 0xe3, 0x48, 0xa3,
+ 0x52, 0x8c, 0x8a, 0x3f, 0xcc, 0x2f, 0x04, 0x4e,
+ 0x39, 0xa3, 0xfc, 0x5b, 0x94, 0x49, 0x2f, 0x8f,
+ 0x03, 0x2e, 0x75, 0x49, 0xa2, 0x00, 0x98, 0xf9,
+ 0x5b
+ };
+ static byte publicEd448[] = {
+ 0x30, 0x43, 0x30, 0x05, 0x06, 0x03, 0x2b, 0x65,
+ 0x71, 0x03, 0x3a, 0x00, 0x5f, 0xd7, 0x44, 0x9b,
+ 0x59, 0xb4, 0x61, 0xfd, 0x2c, 0xe7, 0x87, 0xec,
+ 0x61, 0x6a, 0xd4, 0x6a, 0x1d, 0xa1, 0x34, 0x24,
+ 0x85, 0xa7, 0x0e, 0x1f, 0x8a, 0x0e, 0xa7, 0x5d,
+ 0x80, 0xe9, 0x67, 0x78, 0xed, 0xf1, 0x24, 0x76,
+ 0x9b, 0x46, 0xc7, 0x06, 0x1b, 0xd6, 0x78, 0x3d,
+ 0xf1, 0xe5, 0x0f, 0x6c, 0xd1, 0xfa, 0x1a, 0xbe,
+ 0xaf, 0xe8, 0x25, 0x61, 0x80
+ };
+ static byte privPubEd448[] = {
+ 0x30, 0x81, 0x84, 0x02, 0x01, 0x00, 0x30, 0x05,
+ 0x06, 0x03, 0x2b, 0x65, 0x71, 0x04, 0x3b, 0x04,
+ 0x39, 0x6c, 0x82, 0xa5, 0x62, 0xcb, 0x80, 0x8d,
+ 0x10, 0xd6, 0x32, 0xbe, 0x89, 0xc8, 0x51, 0x3e,
+ 0xbf, 0x6c, 0x92, 0x9f, 0x34, 0xdd, 0xfa, 0x8c,
+ 0x9f, 0x63, 0xc9, 0x96, 0x0e, 0xf6, 0xe3, 0x48,
+ 0xa3, 0x52, 0x8c, 0x8a, 0x3f, 0xcc, 0x2f, 0x04,
+ 0x4e, 0x39, 0xa3, 0xfc, 0x5b, 0x94, 0x49, 0x2f,
+ 0x8f, 0x03, 0x2e, 0x75, 0x49, 0xa2, 0x00, 0x98,
+ 0xf9, 0x5b, 0xa1, 0x3b, 0x04, 0x39, 0x5f, 0xd7,
+ 0x44, 0x9b, 0x59, 0xb4, 0x61, 0xfd, 0x2c, 0xe7,
+ 0x87, 0xec, 0x61, 0x6a, 0xd4, 0x6a, 0x1d, 0xa1,
+ 0x34, 0x24, 0x85, 0xa7, 0x0e, 0x1f, 0x8a, 0x0e,
+ 0xa7, 0x5d, 0x80, 0xe9, 0x67, 0x78, 0xed, 0xf1,
+ 0x24, 0x76, 0x9b, 0x46, 0xc7, 0x06, 0x1b, 0xd6,
+ 0x78, 0x3d, 0xf1, 0xe5, 0x0f, 0x6c, 0xd1, 0xfa,
+ 0x1a, 0xbe, 0xaf, 0xe8, 0x25, 0x61, 0x80
+ };
+
+ word32 idx;
+ ed448_key key3;
+#endif /* NO_ASN */
+#endif /* HAVE_ED448_SIGN && HAVE_ED448_KEY_EXPORT && HAVE_ED448_KEY_IMPORT */
+
+ /* create ed448 keys */
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
+ ret = wc_InitRng(&rng);
+#endif
+ if (ret != 0)
+ return -11300;
+
+ wc_ed448_init(&key);
+ wc_ed448_init(&key2);
+#ifndef NO_ASN
+ wc_ed448_init(&key3);
+#endif
+ wc_ed448_make_key(&rng, ED448_KEY_SIZE, &key);
+ wc_ed448_make_key(&rng, ED448_KEY_SIZE, &key2);
+
+ /* helper functions for signature and key size */
+ keySz = wc_ed448_size(&key);
+ sigSz = wc_ed448_sig_size(&key);
+
+#if defined(HAVE_ED448_SIGN) && defined(HAVE_ED448_KEY_EXPORT) &&\
+ defined(HAVE_ED448_KEY_IMPORT)
+ for (i = 0; i < 6; i++) {
+ outlen = sizeof(out);
+ XMEMSET(out, 0, sizeof(out));
+
+ if (wc_ed448_import_private_key(sKeys[i], ED448_KEY_SIZE, pKeys[i],
+ pKeySz[i], &key) != 0)
+ return -11301 - i;
+
+ if (wc_ed448_sign_msg(msgs[i], msgSz[i], out, &outlen, &key, NULL,
+ 0) != 0) {
+ return -11311 - i;
+ }
+
+ if (XMEMCMP(out, sigs[i], 114))
+ return -11321 - i;
+
+#if defined(HAVE_ED448_VERIFY)
+ /* test verify on good msg */
+ if (wc_ed448_verify_msg(out, outlen, msgs[i], msgSz[i], &verify, &key,
+ NULL, 0) != 0 || verify != 1) {
+ return -11331 - i;
+ }
+
+ /* test verify on bad msg */
+ out[outlen-2] = out[outlen-2] + 1;
+ if (wc_ed448_verify_msg(out, outlen, msgs[i], msgSz[i], &verify, &key,
+ NULL, 0) == 0 || verify == 1) {
+ return -11341 - i;
+ }
+#endif /* HAVE_ED448_VERIFY */
+
+ /* test api for import/exporting keys */
+ exportPSz = sizeof(exportPKey);
+ exportSSz = sizeof(exportSKey);
+ if (wc_ed448_export_public(&key, exportPKey, &exportPSz) != 0)
+ return -11351 - i;
+
+ if (wc_ed448_import_public(exportPKey, exportPSz, &key2) != 0)
+ return -11361 - i;
+
+ if (wc_ed448_export_private_only(&key, exportSKey, &exportSSz) != 0)
+ return -11371 - i;
+
+ if (wc_ed448_import_private_key(exportSKey, exportSSz,
+ exportPKey, exportPSz, &key2) != 0)
+ return -11381 - i;
+
+ /* clear "out" buffer and test sign with imported keys */
+ outlen = sizeof(out);
+ XMEMSET(out, 0, sizeof(out));
+ if (wc_ed448_sign_msg(msgs[i], msgSz[i], out, &outlen, &key2, NULL,
+ 0) != 0) {
+ return -11391 - i;
+ }
+
+#if defined(HAVE_ED448_VERIFY)
+ if (wc_ed448_verify_msg(out, outlen, msgs[i], msgSz[i], &verify, &key2,
+ NULL, 0) != 0 || verify != 1)
+ return -11401 - i;
+
+ if (XMEMCMP(out, sigs[i], sizeof(sigs[i])))
+ return -11411 - i;
+#endif /* HAVE_ED448_VERIFY */
+ }
+
+ ret = ed448_ctx_test();
+ if (ret != 0)
+ return ret;
+
+ ret = ed448ph_test();
+ if (ret != 0)
+ return ret;
+
+#ifndef NO_ASN
+ /* Try ASN.1 encoded private-only key and public key. */
+ idx = 0;
+ if (wc_Ed448PrivateKeyDecode(privateEd448, &idx, &key3,
+ sizeof(privateEd448)) != 0)
+ return -11421 - i;
+
+ if (wc_ed448_sign_msg(msgs[0], msgSz[0], out, &outlen, &key3, NULL, 0)
+ != BAD_FUNC_ARG)
+ return -11431 - i;
+
+ idx = 0;
+ if (wc_Ed448PublicKeyDecode(publicEd448, &idx, &key3,
+ sizeof(publicEd448)) != 0)
+ return -11441 - i;
+
+ if (wc_ed448_sign_msg(msgs[0], msgSz[0], out, &outlen, &key3, NULL, 0) != 0)
+ return -11451 - i;
+
+ if (XMEMCMP(out, sigs[0], sizeof(sigs[0])))
+ return -11461 - i;
+
+#if defined(HAVE_ED448_VERIFY)
+ /* test verify on good msg */
+ if (wc_ed448_verify_msg(out, outlen, msgs[0], msgSz[0], &verify, &key3,
+ NULL, 0) != 0 || verify != 1)
+ return -11471 - i;
+#endif /* HAVE_ED448_VERIFY */
+
+ wc_ed448_free(&key3);
+ wc_ed448_init(&key3);
+
+ idx = 0;
+ if (wc_Ed448PrivateKeyDecode(privPubEd448, &idx, &key3,
+ sizeof(privPubEd448)) != 0)
+ return -11481 - i;
+
+ if (wc_ed448_sign_msg(msgs[0], msgSz[0], out, &outlen, &key3, NULL, 0) != 0)
+ return -11491 - i;
+
+ if (XMEMCMP(out, sigs[0], sizeof(sigs[0])))
+ return -11501 - i;
+
+ wc_ed448_free(&key3);
+#endif /* NO_ASN */
+#endif /* HAVE_ED448_SIGN && HAVE_ED448_KEY_EXPORT && HAVE_ED448_KEY_IMPORT */
+
+ /* clean up keys when done */
+ wc_ed448_free(&key);
+ wc_ed448_free(&key2);
+
+#if defined(HAVE_HASHDRBG) || defined(NO_RC4)
+ wc_FreeRng(&rng);
+#endif
+
+ /* hush warnings of unused keySz and sigSz */
+ (void)keySz;
+ (void)sigSz;
+
+#ifdef WOLFSSL_TEST_CERT
+ ret = ed448_test_cert();
+ if (ret < 0)
+ return ret;
+#ifdef WOLFSSL_CERT_GEN
+ ret = ed448_test_make_cert();
+ if (ret < 0)
+ return ret;
+#endif /* WOLFSSL_CERT_GEN */
+#endif /* WOLFSSL_TEST_CERT */
+
+ return 0;
+}
+#endif /* HAVE_ED448 */
+
+#if defined(WOLFSSL_CMAC) && !defined(NO_AES)
+
+typedef struct CMAC_Test_Case {
+ int type;
+ int partial;
+ const byte* m;
+ word32 mSz;
+ const byte* k;
+ word32 kSz;
+ const byte* t;
+ word32 tSz;
+} CMAC_Test_Case;
+
+int cmac_test(void)
+{
+#ifdef WOLFSSL_AES_128
+ const byte k128[] =
+ {
+ 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6,
+ 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c
+ };
+ #define KLEN_128 (sizeof(k128))
+#endif
+#ifdef WOLFSSL_AES_192
+ const byte k192[] =
+ {
+ 0x8e, 0x73, 0xb0, 0xf7, 0xda, 0x0e, 0x64, 0x52,
+ 0xc8, 0x10, 0xf3, 0x2b, 0x80, 0x90, 0x79, 0xe5,
+ 0x62, 0xf8, 0xea, 0xd2, 0x52, 0x2c, 0x6b, 0x7b
+ };
+ #define KLEN_192 (sizeof(k192))
+#endif
+#ifdef WOLFSSL_AES_256
+ const byte k256[] =
+ {
+ 0x60, 0x3d, 0xeb, 0x10, 0x15, 0xca, 0x71, 0xbe,
+ 0x2b, 0x73, 0xae, 0xf0, 0x85, 0x7d, 0x77, 0x81,
+ 0x1f, 0x35, 0x2c, 0x07, 0x3b, 0x61, 0x08, 0xd7,
+ 0x2d, 0x98, 0x10, 0xa3, 0x09, 0x14, 0xdf, 0xf4
+ };
+ #define KLEN_256 (sizeof(k256))
+#endif
+
+ const byte m[] =
+ {
+ 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96,
+ 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a,
+ 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c,
+ 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51,
+ 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11,
+ 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef,
+ 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17,
+ 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10
+ };
+ #define MLEN_0 (0)
+ #define MLEN_128 (128/8)
+ #define MLEN_320 (320/8)
+ #define MLEN_319 (MLEN_320 - 1)
+ #define MLEN_512 (512/8)
+
+#ifdef WOLFSSL_AES_128
+ const byte t128_0[] =
+ {
+ 0xbb, 0x1d, 0x69, 0x29, 0xe9, 0x59, 0x37, 0x28,
+ 0x7f, 0xa3, 0x7d, 0x12, 0x9b, 0x75, 0x67, 0x46
+ };
+ const byte t128_128[] =
+ {
+ 0x07, 0x0a, 0x16, 0xb4, 0x6b, 0x4d, 0x41, 0x44,
+ 0xf7, 0x9b, 0xdd, 0x9d, 0xd0, 0x4a, 0x28, 0x7c
+ };
+ const byte t128_319[] =
+ {
+ 0x2c, 0x17, 0x84, 0x4c, 0x93, 0x1c, 0x07, 0x95,
+ 0x15, 0x92, 0x73, 0x0a, 0x34, 0xd0, 0xd9, 0xd2
+ };
+ const byte t128_320[] =
+ {
+ 0xdf, 0xa6, 0x67, 0x47, 0xde, 0x9a, 0xe6, 0x30,
+ 0x30, 0xca, 0x32, 0x61, 0x14, 0x97, 0xc8, 0x27
+ };
+ const byte t128_512[] =
+ {
+ 0x51, 0xf0, 0xbe, 0xbf, 0x7e, 0x3b, 0x9d, 0x92,
+ 0xfc, 0x49, 0x74, 0x17, 0x79, 0x36, 0x3c, 0xfe
+ };
+#endif
+#ifdef WOLFSSL_AES_192
+ const byte t192_0[] =
+ {
+ 0xd1, 0x7d, 0xdf, 0x46, 0xad, 0xaa, 0xcd, 0xe5,
+ 0x31, 0xca, 0xc4, 0x83, 0xde, 0x7a, 0x93, 0x67
+ };
+ const byte t192_128[] =
+ {
+ 0x9e, 0x99, 0xa7, 0xbf, 0x31, 0xe7, 0x10, 0x90,
+ 0x06, 0x62, 0xf6, 0x5e, 0x61, 0x7c, 0x51, 0x84
+ };
+ const byte t192_320[] =
+ {
+ 0x8a, 0x1d, 0xe5, 0xbe, 0x2e, 0xb3, 0x1a, 0xad,
+ 0x08, 0x9a, 0x82, 0xe6, 0xee, 0x90, 0x8b, 0x0e
+ };
+ const byte t192_512[] =
+ {
+ 0xa1, 0xd5, 0xdf, 0x0e, 0xed, 0x79, 0x0f, 0x79,
+ 0x4d, 0x77, 0x58, 0x96, 0x59, 0xf3, 0x9a, 0x11
+ };
+#endif
+#ifdef WOLFSSL_AES_256
+ const byte t256_0[] =
+ {
+ 0x02, 0x89, 0x62, 0xf6, 0x1b, 0x7b, 0xf8, 0x9e,
+ 0xfc, 0x6b, 0x55, 0x1f, 0x46, 0x67, 0xd9, 0x83
+ };
+ const byte t256_128[] =
+ {
+ 0x28, 0xa7, 0x02, 0x3f, 0x45, 0x2e, 0x8f, 0x82,
+ 0xbd, 0x4b, 0xf2, 0x8d, 0x8c, 0x37, 0xc3, 0x5c
+ };
+ const byte t256_320[] =
+ {
+ 0xaa, 0xf3, 0xd8, 0xf1, 0xde, 0x56, 0x40, 0xc2,
+ 0x32, 0xf5, 0xb1, 0x69, 0xb9, 0xc9, 0x11, 0xe6
+ };
+ const byte t256_512[] =
+ {
+ 0xe1, 0x99, 0x21, 0x90, 0x54, 0x9f, 0x6e, 0xd5,
+ 0x69, 0x6a, 0x2c, 0x05, 0x6c, 0x31, 0x54, 0x10
+ };
+#endif
+ const CMAC_Test_Case testCases[] =
+ {
+#ifdef WOLFSSL_AES_128
+ {WC_CMAC_AES, 0, m, MLEN_0, k128, KLEN_128, t128_0, AES_BLOCK_SIZE},
+ {WC_CMAC_AES, 0, m, MLEN_128, k128, KLEN_128, t128_128, AES_BLOCK_SIZE},
+ {WC_CMAC_AES, 0, m, MLEN_320, k128, KLEN_128, t128_320, AES_BLOCK_SIZE},
+ {WC_CMAC_AES, 0, m, MLEN_512, k128, KLEN_128, t128_512, AES_BLOCK_SIZE},
+ {WC_CMAC_AES, 5, m, MLEN_512, k128, KLEN_128, t128_512, AES_BLOCK_SIZE},
+#endif
+#ifdef WOLFSSL_AES_192
+ {WC_CMAC_AES, 0, m, MLEN_0, k192, KLEN_192, t192_0, AES_BLOCK_SIZE},
+ {WC_CMAC_AES, 0, m, MLEN_128, k192, KLEN_192, t192_128, AES_BLOCK_SIZE},
+ {WC_CMAC_AES, 0, m, MLEN_320, k192, KLEN_192, t192_320, AES_BLOCK_SIZE},
+ {WC_CMAC_AES, 0, m, MLEN_512, k192, KLEN_192, t192_512, AES_BLOCK_SIZE},
+#endif
+#ifdef WOLFSSL_AES_256
+ {WC_CMAC_AES, 0, m, MLEN_0, k256, KLEN_256, t256_0, AES_BLOCK_SIZE},
+ {WC_CMAC_AES, 0, m, MLEN_128, k256, KLEN_256, t256_128, AES_BLOCK_SIZE},
+ {WC_CMAC_AES, 0, m, MLEN_320, k256, KLEN_256, t256_320, AES_BLOCK_SIZE},
+ {WC_CMAC_AES, 0, m, MLEN_512, k256, KLEN_256, t256_512, AES_BLOCK_SIZE},
+#endif
+#ifdef WOLFSSL_AES_128
+ {WC_CMAC_AES, 0, m, MLEN_319, k128, KLEN_128, t128_319, AES_BLOCK_SIZE}
+#endif
+ };
+
+ Cmac cmac;
+ byte tag[AES_BLOCK_SIZE];
+ const CMAC_Test_Case* tc;
+ word32 i, tagSz;
+
+ for (i = 0, tc = testCases;
+ i < sizeof(testCases)/sizeof(CMAC_Test_Case);
+ i++, tc++) {
+
+ XMEMSET(tag, 0, sizeof(tag));
+ tagSz = AES_BLOCK_SIZE;
+ if (wc_InitCmac(&cmac, tc->k, tc->kSz, tc->type, NULL) != 0)
+ return -11600;
+ if (tc->partial) {
+ if (wc_CmacUpdate(&cmac, tc->m,
+ tc->mSz/2 - tc->partial) != 0)
+ return -11601;
+ if (wc_CmacUpdate(&cmac, tc->m + tc->mSz/2 - tc->partial,
+ tc->mSz/2 + tc->partial) != 0)
+ return -11602;
+ }
+ else {
+ if (wc_CmacUpdate(&cmac, tc->m, tc->mSz) != 0)
+ return -11603;
+ }
+ if (wc_CmacFinal(&cmac, tag, &tagSz) != 0)
+ return -11604;
+ if (XMEMCMP(tag, tc->t, AES_BLOCK_SIZE) != 0)
+ return -11605;
+
+ XMEMSET(tag, 0, sizeof(tag));
+ tagSz = sizeof(tag);
+ if (wc_AesCmacGenerate(tag, &tagSz, tc->m, tc->mSz,
+ tc->k, tc->kSz) != 0)
+ return -11606;
+ if (XMEMCMP(tag, tc->t, AES_BLOCK_SIZE) != 0)
+ return -11607;
+ if (wc_AesCmacVerify(tc->t, tc->tSz, tc->m, tc->mSz,
+ tc->k, tc->kSz) != 0)
+ return -11608;
+ }
+
+ return 0;
+}
+
+#endif /* NO_AES && WOLFSSL_CMAC */
#ifdef HAVE_LIBZ
@@ -5878,37 +23481,228 @@ const byte sample_text[] =
"swag consectetur et. Irure skateboard banjo, nulla deserunt messenger\n"
"bag dolor terry richardson sapiente.\n";
+const byte sample_text_gz[] = {
+ 0x1F, 0x8B, 0x08, 0x08, 0xC5, 0x49, 0xB5, 0x5B, 0x00, 0x03, 0x63, 0x69, 0x70,
+ 0x68, 0x65, 0x72, 0x74, 0x65, 0x78, 0x74, 0x2E, 0x74, 0x78, 0x74, 0x00, 0x8D,
+ 0x58, 0xCB, 0x92, 0xE4, 0xB6, 0x11, 0xBC, 0xE3, 0x2B, 0xEA, 0xA6, 0x83, 0xD9,
+ 0x1D, 0x72, 0xF8, 0x22, 0x1F, 0xB5, 0x96, 0xA5, 0xDD, 0x90, 0xBC, 0xAB, 0xD0,
+ 0x28, 0x36, 0x42, 0x47, 0x90, 0x2C, 0x36, 0xA1, 0x06, 0x09, 0x0A, 0x8F, 0xEE,
+ 0xE1, 0xDF, 0x3B, 0x0B, 0xE0, 0x73, 0x2C, 0x4B, 0xBA, 0xCD, 0xCE, 0x80, 0x78,
+ 0x64, 0x65, 0x65, 0x66, 0xED, 0x3B, 0xE3, 0x5A, 0xC3, 0x81, 0x2D, 0x35, 0x69,
+ 0x32, 0xAD, 0x8E, 0x3A, 0xD2, 0xA0, 0x7D, 0xA7, 0x2B, 0x6A, 0xAC, 0x69, 0x7A,
+ 0x26, 0x9D, 0x22, 0xD3, 0x94, 0x22, 0x69, 0xAA, 0x8D, 0x6F, 0xC9, 0x8D, 0x64,
+ 0x22, 0x99, 0xB1, 0x31, 0xAD, 0x69, 0xD3, 0x18, 0x89, 0xAD, 0x89, 0x6A, 0x72,
+ 0x56, 0x7B, 0x67, 0xDA, 0x2B, 0xBD, 0xC8, 0xEF, 0xB0, 0x4D, 0x74, 0x8E, 0x5B,
+ 0xAA, 0x39, 0x4C, 0xEE, 0xCE, 0xE4, 0x79, 0xF2, 0xDC, 0xF3, 0xD8, 0xB2, 0x37,
+ 0x11, 0x8B, 0x8C, 0x2C, 0x7A, 0x32, 0x93, 0xF3, 0x37, 0x3D, 0x9A, 0x86, 0x4C,
+ 0xAB, 0xF2, 0xB9, 0x57, 0xFA, 0x97, 0x1B, 0x06, 0xD7, 0x3A, 0x7A, 0xF0, 0x68,
+ 0xF4, 0x40, 0xBA, 0x25, 0x0E, 0x81, 0xE9, 0xA6, 0x43, 0xF4, 0x6E, 0x4A, 0xF5,
+ 0x95, 0xFE, 0x41, 0x4F, 0x67, 0x3B, 0x1A, 0x1C, 0xEE, 0x12, 0xB4, 0x8F, 0xCE,
+ 0x1B, 0x6D, 0xB1, 0xDE, 0xBB, 0x4A, 0x4D, 0x56, 0x9B, 0x96, 0x5A, 0xB6, 0xDC,
+ 0xC4, 0x14, 0x70, 0xE5, 0xF5, 0x7D, 0xE1, 0xB7, 0x84, 0x3F, 0xFC, 0xED, 0xEF,
+ 0xF4, 0x30, 0x0D, 0x5F, 0xE9, 0x47, 0x17, 0xE2, 0xC5, 0x78, 0x27, 0x67, 0xDF,
+ 0xB9, 0xEB, 0xCC, 0xCC, 0x3D, 0x59, 0xBE, 0xDD, 0xCC, 0x78, 0x0B, 0x0A, 0x1F,
+ 0x74, 0xF8, 0x8C, 0x1A, 0xAF, 0x67, 0xEA, 0xF4, 0x44, 0xBD, 0x93, 0x7D, 0x2A,
+ 0xEA, 0x9C, 0xD7, 0x37, 0x80, 0x32, 0x9A, 0x01, 0x37, 0xD5, 0xDE, 0xCA, 0xA2,
+ 0x0D, 0xB9, 0xD0, 0x3B, 0xCF, 0xAD, 0x89, 0x4D, 0x5F, 0xD1, 0xE7, 0xF7, 0x2F,
+ 0x2A, 0x0C, 0xDA, 0x5A, 0xAA, 0x35, 0x7E, 0x41, 0xC3, 0xB2, 0x37, 0xDD, 0xDD,
+ 0xCD, 0x50, 0xEB, 0x2C, 0x96, 0x62, 0x3B, 0xD7, 0x52, 0xF4, 0xA9, 0xB9, 0x6F,
+ 0x48, 0xED, 0xEF, 0x54, 0xEA, 0x67, 0xF6, 0x7E, 0x26, 0x8F, 0x3A, 0x68, 0xDF,
+ 0x06, 0xBC, 0x56, 0xB7, 0x66, 0x32, 0xC1, 0x34, 0xD8, 0x88, 0x34, 0x1E, 0x88,
+ 0xED, 0x67, 0x8A, 0xF3, 0xC4, 0x4F, 0xC0, 0xCA, 0x9E, 0x62, 0x1A, 0x6A, 0xEB,
+ 0xAB, 0x02, 0xED, 0xB3, 0xD7, 0x91, 0x81, 0x8A, 0xEA, 0x5C, 0xF2, 0x64, 0xDD,
+ 0xDD, 0xD1, 0xEC, 0x12, 0x4D, 0xDE, 0xD5, 0xBA, 0xC6, 0x77, 0xBD, 0x06, 0xC4,
+ 0x5F, 0x44, 0xEA, 0x59, 0x4B, 0x5D, 0x3B, 0x8A, 0x3D, 0x0F, 0xD4, 0x9B, 0x1B,
+ 0x80, 0x30, 0x1D, 0x30, 0xFA, 0x8F, 0x00, 0x3F, 0xDE, 0xB0, 0x6F, 0xAD, 0x6F,
+ 0x6A, 0xDD, 0x6E, 0x2F, 0x6E, 0xCB, 0x3C, 0xD1, 0x83, 0x06, 0x7B, 0x0F, 0xFD,
+ 0xFD, 0x4A, 0xEF, 0xBC, 0x73, 0x77, 0x3B, 0x8F, 0x34, 0xA1, 0xBA, 0xEC, 0x39,
+ 0x80, 0x33, 0x21, 0xA4, 0x01, 0x55, 0xD7, 0xD4, 0xF4, 0xC6, 0xDA, 0x27, 0x4E,
+ 0x54, 0x1C, 0x2B, 0xEC, 0x37, 0xDE, 0xC3, 0x4C, 0xC9, 0x5A, 0x3D, 0x34, 0x0E,
+ 0xD8, 0x1C, 0x0E, 0xA2, 0x34, 0xE8, 0xC1, 0xD0, 0xA4, 0x51, 0xD5, 0x88, 0x8B,
+ 0xB7, 0xC6, 0xA3, 0x96, 0x40, 0x49, 0xB7, 0xBC, 0xE0, 0x7F, 0x55, 0x3F, 0xEF,
+ 0x6F, 0x6E, 0x92, 0x9D, 0x34, 0xFE, 0x3C, 0x5F, 0x04, 0xA5, 0x6A, 0xFF, 0x30,
+ 0x08, 0xC9, 0xEA, 0xF5, 0x52, 0x2B, 0xFE, 0x57, 0xFA, 0x8E, 0xC7, 0xE8, 0x4D,
+ 0x37, 0xAB, 0x03, 0xFA, 0x23, 0xBF, 0x46, 0x94, 0xFF, 0xC1, 0x16, 0xE0, 0xB9,
+ 0x14, 0x2C, 0x9E, 0x27, 0xEC, 0x98, 0x69, 0x14, 0x92, 0xF1, 0x60, 0x5C, 0x34,
+ 0x4D, 0xA0, 0x1F, 0xDF, 0xFD, 0x44, 0x1C, 0x7B, 0xD3, 0x80, 0x70, 0x42, 0x02,
+ 0x30, 0x84, 0x5B, 0xE5, 0x59, 0xB7, 0xF3, 0x80, 0xFB, 0x01, 0x33, 0xA9, 0x00,
+ 0x37, 0x52, 0xDC, 0xDA, 0xA7, 0x11, 0x85, 0xB7, 0x6E, 0x70, 0xE4, 0xDA, 0x96,
+ 0xBA, 0x84, 0x5B, 0x81, 0x43, 0x93, 0xF3, 0xD1, 0xEA, 0xB1, 0xDD, 0xB8, 0x1F,
+ 0xA5, 0xCC, 0xEA, 0x50, 0x66, 0x69, 0xA9, 0x8D, 0x8C, 0xA7, 0xA2, 0xF3, 0x38,
+ 0x26, 0x43, 0x5E, 0x3F, 0x01, 0xBE, 0x1C, 0x0F, 0x20, 0x7F, 0x75, 0xA8, 0x20,
+ 0x80, 0xC4, 0xC3, 0x5C, 0x8B, 0x0D, 0xD4, 0x60, 0x5E, 0xA3, 0x9E, 0xD0, 0xB4,
+ 0x4B, 0x4F, 0xE6, 0x13, 0x85, 0x60, 0x42, 0x96, 0xED, 0xAA, 0xDB, 0xE9, 0x99,
+ 0xE3, 0x07, 0x0E, 0x61, 0xB3, 0x07, 0xE3, 0xB1, 0xFA, 0xC0, 0x9B, 0xAD, 0xF6,
+ 0xE0, 0x26, 0x33, 0xEA, 0xEA, 0x23, 0xCD, 0x1E, 0x9D, 0xE1, 0x87, 0x4B, 0x74,
+ 0x97, 0x08, 0x3E, 0xA1, 0x28, 0xEA, 0xB3, 0x19, 0x67, 0x8B, 0x76, 0x9A, 0xA3,
+ 0xF6, 0xB9, 0xCF, 0x80, 0x65, 0x97, 0xAE, 0xF4, 0x83, 0x6B, 0xF4, 0x43, 0x20,
+ 0xF9, 0x0B, 0xFC, 0x9B, 0xD2, 0x4D, 0x4D, 0xA6, 0xB9, 0xA3, 0x02, 0x55, 0x79,
+ 0x18, 0x36, 0x19, 0x5F, 0xC9, 0xEA, 0x5A, 0x76, 0x40, 0xB9, 0xBA, 0x0E, 0x9A,
+ 0x44, 0xDF, 0x7C, 0xF8, 0x65, 0x61, 0x5E, 0x81, 0xAB, 0x71, 0xA1, 0x9E, 0x29,
+ 0x3C, 0x59, 0xCB, 0x23, 0xA4, 0xF6, 0x60, 0x1A, 0x0D, 0x5B, 0x39, 0xAE, 0xF4,
+ 0x6F, 0x59, 0x16, 0x9E, 0x60, 0xD8, 0x56, 0xCF, 0xEA, 0x2C, 0x4C, 0x79, 0xD3,
+ 0x5D, 0x51, 0x46, 0xA0, 0x4E, 0xE9, 0xD6, 0xAB, 0x91, 0x43, 0x63, 0x44, 0xD7,
+ 0x70, 0xB9, 0x23, 0x98, 0x4F, 0x3D, 0x03, 0x02, 0xF6, 0x81, 0x56, 0xC1, 0x58,
+ 0x85, 0x07, 0xA7, 0x2D, 0x2C, 0x29, 0xCA, 0x01, 0x45, 0x31, 0x51, 0x8F, 0xD4,
+ 0x19, 0xA1, 0x79, 0x88, 0x5A, 0xA4, 0xF5, 0xAE, 0x2D, 0x4B, 0x63, 0x4C, 0x58,
+ 0xFE, 0xBF, 0xAD, 0xEE, 0xA3, 0x09, 0xF8, 0xE2, 0x89, 0xBE, 0x81, 0x0E, 0x86,
+ 0x3A, 0xF9, 0x5B, 0xA5, 0xD8, 0xA4, 0x00, 0x75, 0x04, 0xF2, 0x23, 0xB8, 0x39,
+ 0x69, 0x50, 0xB7, 0xD0, 0x34, 0x63, 0x54, 0xD8, 0x61, 0xDD, 0xA5, 0x33, 0x47,
+ 0x85, 0x96, 0x22, 0xD0, 0x2F, 0x9F, 0x7E, 0xF8, 0x74, 0x24, 0xEA, 0x57, 0x97,
+ 0x5A, 0xE0, 0x00, 0xCF, 0xC1, 0x67, 0xE1, 0x41, 0xBD, 0x94, 0xA1, 0x03, 0xD3,
+ 0xB4, 0x08, 0x64, 0xF2, 0x17, 0x27, 0x35, 0x37, 0x53, 0xEF, 0x46, 0xCE, 0xD8,
+ 0xD4, 0x09, 0x52, 0xC6, 0x1E, 0xF7, 0x28, 0xDF, 0x08, 0x0F, 0xD0, 0x6F, 0x71,
+ 0xA6, 0xDF, 0xE4, 0x60, 0x8E, 0xC0, 0x1E, 0x78, 0x86, 0x50, 0xB0, 0x9B, 0x84,
+ 0x7E, 0xE8, 0x36, 0xFA, 0x95, 0xF1, 0x12, 0x51, 0xC7, 0x18, 0x96, 0xA2, 0x29,
+ 0xBB, 0x70, 0x02, 0xB4, 0xF9, 0xA8, 0x3D, 0x08, 0x66, 0xA9, 0xB3, 0xFC, 0x0A,
+ 0x94, 0x80, 0xFD, 0x78, 0xDC, 0xAB, 0x82, 0x5A, 0xD2, 0xCD, 0xC2, 0x87, 0xC6,
+ 0x4B, 0x07, 0xFA, 0xD1, 0xC3, 0xD9, 0x34, 0x41, 0x85, 0xF8, 0xD0, 0xB6, 0x0A,
+ 0x9D, 0x00, 0x91, 0x35, 0x05, 0x88, 0xC3, 0xE3, 0x9B, 0x22, 0xD2, 0xB8, 0xFD,
+ 0x95, 0x3E, 0x6D, 0x5D, 0x48, 0xA3, 0x68, 0xCF, 0x02, 0x42, 0x79, 0x79, 0x8A,
+ 0xAA, 0x01, 0xD6, 0x09, 0x14, 0x2C, 0xF4, 0x83, 0xA3, 0x80, 0x31, 0x55, 0x46,
+ 0x6E, 0xC5, 0xE5, 0x2F, 0x30, 0x58, 0x81, 0xA2, 0x90, 0xBE, 0x2E, 0xA1, 0xC3,
+ 0x0F, 0xA6, 0xF5, 0x51, 0x00, 0x39, 0xB6, 0xF2, 0x2A, 0xA3, 0x15, 0x7D, 0x8D,
+ 0xF5, 0x66, 0x5C, 0xD9, 0xFC, 0xCF, 0x2F, 0xBF, 0x08, 0x27, 0xE7, 0xD0, 0x03,
+ 0xB8, 0xD9, 0x00, 0x13, 0x3D, 0x01, 0x6B, 0xB6, 0xA8, 0xCD, 0x5B, 0x3B, 0x3E,
+ 0x93, 0xBF, 0xE6, 0x2E, 0xB7, 0x4A, 0xCF, 0xB3, 0x0A, 0xCE, 0x62, 0x11, 0xD6,
+ 0x1F, 0x68, 0x9B, 0x1D, 0x68, 0xD1, 0x8C, 0x97, 0xBD, 0xA1, 0x07, 0x67, 0x73,
+ 0x87, 0xE0, 0x36, 0xDA, 0x8C, 0xD2, 0xD2, 0xBB, 0x84, 0x28, 0xA9, 0xFE, 0x52,
+ 0x74, 0xD6, 0xB9, 0x0F, 0x0A, 0x6A, 0x2D, 0x28, 0x35, 0x34, 0x3A, 0xD3, 0xE2,
+ 0xCD, 0x35, 0x06, 0x7D, 0x1B, 0x35, 0x85, 0x86, 0xD1, 0x3E, 0xF2, 0x6F, 0xA1,
+ 0xC4, 0x55, 0xBD, 0x00, 0xD8, 0xC3, 0x5D, 0xC2, 0x1D, 0x6B, 0x6B, 0x27, 0x5B,
+ 0x95, 0xF3, 0xAB, 0xB5, 0xD3, 0x37, 0xF2, 0x2C, 0x9C, 0xC7, 0x5D, 0xBD, 0xF1,
+ 0x68, 0x1C, 0xAD, 0xF8, 0xB5, 0xE1, 0x29, 0x72, 0x7A, 0x73, 0x62, 0x55, 0x24,
+ 0xB9, 0x85, 0xDF, 0x7B, 0x29, 0x7D, 0xDE, 0x08, 0xF5, 0xE4, 0x44, 0xDA, 0x1A,
+ 0x30, 0x74, 0xDA, 0xB4, 0x9B, 0x23, 0x9A, 0x3A, 0xC1, 0x53, 0xB2, 0xA2, 0xA3,
+ 0x7B, 0x1F, 0xD9, 0x56, 0xD4, 0x4F, 0x9B, 0xB2, 0x1E, 0xEE, 0xB8, 0x6A, 0x4E,
+ 0xB5, 0xF4, 0x5A, 0xC9, 0x18, 0x27, 0x9C, 0xDE, 0x14, 0x44, 0xED, 0xC4, 0x3C,
+ 0x71, 0x9F, 0x5F, 0xD9, 0x37, 0xA0, 0x78, 0x34, 0x6E, 0xBC, 0xD2, 0x7B, 0x1D,
+ 0xFA, 0x08, 0x39, 0x5A, 0x04, 0x73, 0x15, 0xD9, 0x0A, 0x48, 0xC1, 0x2D, 0x15,
+ 0x4E, 0x84, 0x30, 0x45, 0x69, 0xB3, 0xE5, 0xF6, 0xAD, 0x09, 0x1E, 0xCC, 0x5F,
+ 0x1F, 0x06, 0xD5, 0x58, 0xAD, 0x78, 0xD7, 0x9F, 0xE5, 0xED, 0x3B, 0x09, 0xD5,
+ 0xA6, 0x52, 0x6F, 0x92, 0xD3, 0x3C, 0xC6, 0x1E, 0xF2, 0x93, 0x7C, 0xD3, 0x5F,
+ 0x70, 0x85, 0x5D, 0xF8, 0xAA, 0x9D, 0xB7, 0x7B, 0x24, 0x5A, 0xE9, 0x0A, 0x35,
+ 0x2F, 0xF5, 0xD9, 0x82, 0x02, 0x8A, 0x90, 0x13, 0x5B, 0xB5, 0x67, 0x9C, 0xDD,
+ 0xA0, 0x4E, 0x82, 0x27, 0xDA, 0x7E, 0xE8, 0x8E, 0xCD, 0xE1, 0x56, 0x71, 0x2C,
+ 0xE6, 0x4E, 0x1F, 0x91, 0xCD, 0x7C, 0x6A, 0xB7, 0x78, 0xD0, 0x26, 0xF3, 0x56,
+ 0xA9, 0xD5, 0xA1, 0xC3, 0x3B, 0x98, 0xE9, 0x28, 0x09, 0xEF, 0x50, 0x90, 0xCD,
+ 0xC4, 0x8E, 0x75, 0xCC, 0xAC, 0x2D, 0xC9, 0x03, 0x6D, 0xAC, 0xFE, 0xC4, 0x88,
+ 0x36, 0xD1, 0x3F, 0xBB, 0x1C, 0x7D, 0xB3, 0x14, 0x61, 0x2C, 0xB7, 0x54, 0x4B,
+ 0xDB, 0x64, 0xB6, 0x57, 0x14, 0x16, 0x8E, 0x1E, 0x6C, 0x64, 0xBB, 0x8B, 0x48,
+ 0x5D, 0x96, 0x9D, 0xDC, 0x80, 0xA7, 0xF7, 0x54, 0xC7, 0x46, 0x38, 0x3E, 0x44,
+ 0xDE, 0x7E, 0x92, 0x8D, 0x07, 0xF6, 0x07, 0x37, 0x4E, 0x16, 0x10, 0xB4, 0x7D,
+ 0x88, 0x66, 0x7F, 0xBB, 0xFF, 0xEA, 0x00, 0xF3, 0xFF, 0x97, 0x2C, 0xB5, 0xBE,
+ 0x35, 0x4B, 0x5C, 0x36, 0xEC, 0x4C, 0xBD, 0x2B, 0x7D, 0xBF, 0x46, 0xE2, 0x9C,
+ 0x0E, 0x8A, 0xA3, 0xEC, 0xB1, 0x0E, 0x9A, 0xDA, 0x9A, 0x9B, 0x28, 0x92, 0x10,
+ 0x53, 0x57, 0xEA, 0xEC, 0xA2, 0x32, 0x32, 0x20, 0x1D, 0x97, 0x5C, 0xB6, 0x84,
+ 0xA9, 0x93, 0x8D, 0x95, 0x11, 0xA3, 0x24, 0xA3, 0x2D, 0xC6, 0x4A, 0xEF, 0xAA,
+ 0x1D, 0x85, 0x2B, 0x7D, 0x28, 0xBE, 0x53, 0xCE, 0x10, 0x1F, 0xAE, 0x0E, 0x41,
+ 0x6C, 0x4B, 0x79, 0x12, 0xFB, 0xF7, 0x54, 0xA3, 0x96, 0x54, 0x83, 0x20, 0x96,
+ 0x8F, 0x28, 0xA9, 0x3F, 0x8B, 0x3D, 0xBA, 0x77, 0xDC, 0x24, 0xE1, 0xD4, 0x49,
+ 0x40, 0xD8, 0x78, 0x31, 0x85, 0x43, 0xF6, 0xFE, 0x5C, 0xA6, 0x8F, 0x90, 0x09,
+ 0xB0, 0xE7, 0xC4, 0x95, 0xB2, 0x55, 0x49, 0x97, 0x8F, 0x1C, 0x78, 0x30, 0x20,
+ 0xA0, 0xB4, 0xEF, 0x73, 0x56, 0x59, 0x82, 0xFD, 0xCE, 0xBA, 0x6A, 0x8F, 0x2C,
+ 0x8B, 0x15, 0xFD, 0xA1, 0x85, 0xA8, 0x5C, 0x0F, 0x11, 0xA5, 0x9D, 0xC2, 0x46,
+ 0xC6, 0x9C, 0xC9, 0x40, 0x0B, 0x58, 0x6A, 0x1C, 0x7A, 0x23, 0xF9, 0xE0, 0x95,
+ 0x05, 0x13, 0x58, 0x72, 0xE8, 0x9F, 0x30, 0xAC, 0xCD, 0x26, 0xD4, 0x66, 0x13,
+ 0xDF, 0x1E, 0x7B, 0x4F, 0x9C, 0xBE, 0x38, 0x79, 0x75, 0x92, 0xA4, 0xDA, 0x26,
+ 0x44, 0x55, 0x17, 0xA3, 0xE5, 0x62, 0xDA, 0xEB, 0x86, 0xEA, 0x68, 0xC7, 0xAB,
+ 0xFD, 0x2D, 0x43, 0x59, 0x51, 0xC0, 0x75, 0x64, 0x91, 0x01, 0x29, 0x33, 0x28,
+ 0xF3, 0x04, 0x83, 0x80, 0x75, 0x37, 0x75, 0x0C, 0x03, 0x7B, 0x0A, 0xAB, 0x8E,
+ 0x60, 0x62, 0x8B, 0x4C, 0xAF, 0x2D, 0xA3, 0x2F, 0xFE, 0xAB, 0x45, 0xCF, 0xDA,
+ 0xAB, 0xFA, 0xFA, 0x30, 0x3D, 0xE8, 0xA1, 0x96, 0xA5, 0x7B, 0xE2, 0x2A, 0xD0,
+ 0xAF, 0x59, 0xF7, 0xD0, 0x32, 0x57, 0x19, 0xBD, 0xCA, 0x9F, 0xD5, 0x1A, 0xC7,
+ 0xAA, 0x65, 0x4A, 0x38, 0xB2, 0x70, 0x33, 0xB7, 0x75, 0xD2, 0xCD, 0xD1, 0xF0,
+ 0xA8, 0x87, 0x59, 0x20, 0xA5, 0x57, 0x55, 0xB1, 0xB2, 0xC9, 0x4D, 0x97, 0x34,
+ 0x41, 0xF3, 0xF0, 0x30, 0xA1, 0x2C, 0x1C, 0x49, 0x3E, 0x89, 0x7D, 0x12, 0xE2,
+ 0xC3, 0x04, 0xC3, 0x92, 0xC0, 0xF6, 0x39, 0x10, 0x80, 0x81, 0x8F, 0x08, 0xB4,
+ 0xF8, 0xB9, 0x13, 0x4E, 0x2C, 0xAE, 0xB3, 0x71, 0x82, 0x63, 0x98, 0xAB, 0x5C,
+ 0x1C, 0x10, 0xEA, 0x66, 0xF9, 0x02, 0x3A, 0x82, 0x61, 0xD0, 0xD4, 0xAE, 0x43,
+ 0xD4, 0x01, 0x3E, 0x9D, 0x04, 0x14, 0xF6, 0x60, 0xD8, 0xA7, 0xD6, 0xB8, 0x53,
+ 0xC8, 0xDA, 0x80, 0x93, 0xA0, 0x02, 0xDD, 0xCC, 0xE2, 0xF2, 0xBB, 0xFB, 0xE0,
+ 0x27, 0xD7, 0x34, 0x9A, 0x71, 0x49, 0xB5, 0x4F, 0x42, 0x1F, 0xB2, 0x9D, 0x6D,
+ 0xAA, 0x9D, 0xD3, 0x50, 0xB5, 0x8F, 0x6A, 0x4B, 0xDF, 0x1F, 0xD5, 0x27, 0x8F,
+ 0x3B, 0x27, 0xCF, 0x2F, 0x8C, 0xF8, 0x9D, 0x4C, 0x52, 0xBC, 0x32, 0x0F, 0x73,
+ 0xD5, 0x51, 0x8E, 0x36, 0x7E, 0xAD, 0x09, 0xF0, 0x94, 0x83, 0x5F, 0x36, 0xFD,
+ 0x7C, 0x03, 0xED, 0xF1, 0x5E, 0x4B, 0xF7, 0xAA, 0x55, 0x5C, 0x4A, 0x14, 0x59,
+ 0x85, 0x38, 0x2D, 0x8C, 0xDF, 0xEC, 0x65, 0x1B, 0xB8, 0x76, 0x57, 0x96, 0x3C,
+ 0x86, 0xED, 0xF2, 0x7F, 0x2D, 0x28, 0x48, 0xDA, 0x49, 0x7F, 0xF7, 0x54, 0x2B,
+ 0xD5, 0x39, 0xD5, 0x57, 0x0A, 0x75, 0x7A, 0x3E, 0x5E, 0x5D, 0xBA, 0x4A, 0x15,
+ 0xFA, 0xB8, 0x31, 0x80, 0x71, 0x2C, 0xCA, 0xC4, 0x51, 0x10, 0x16, 0x5D, 0x39,
+ 0xEC, 0x9D, 0x07, 0xB6, 0x6A, 0x89, 0x9F, 0x9B, 0x5B, 0x6F, 0x03, 0xB0, 0x92,
+ 0x01, 0x38, 0x6B, 0x48, 0x99, 0x0A, 0x8F, 0x13, 0xC1, 0xA6, 0x01, 0xEA, 0xBF,
+ 0x6F, 0x86, 0x43, 0x51, 0xB6, 0x11, 0x00, 0x00
+};
int compress_test(void)
{
int ret = 0;
word32 dSz = sizeof(sample_text);
word32 cSz = (dSz + (word32)(dSz * 0.001) + 12);
- byte *c = NULL;
- byte *d = NULL;
+ byte *c;
+ byte *d;
- c = calloc(cSz, sizeof(byte));
- d = calloc(dSz, sizeof(byte));
+ c = XMALLOC(cSz * sizeof(byte), HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ d = XMALLOC(dSz * sizeof(byte), HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (c == NULL || d == NULL) {
+ ERROR_OUT(-11700, exit);
+ }
- if (c == NULL || d == NULL)
- ret = -300;
+ /* follow calloc and initialize to 0 */
+ XMEMSET(c, 0, cSz);
+ XMEMSET(d, 0, dSz);
- if (ret == 0 && (ret = wc_Compress(c, cSz, sample_text, dSz, 0)) < 0)
- ret = -301;
+ if ((ret = wc_Compress(c, cSz, sample_text, dSz, 0)) < 0) {
+ ERROR_OUT(-11701, exit);
+ }
+ cSz = (word32)ret;
- if (ret > 0) {
- cSz = (word32)ret;
- ret = 0;
+ if ((ret = wc_DeCompress(d, dSz, c, cSz)) != (int)dSz) {
+ ERROR_OUT(-11702, exit);
}
- if (ret == 0 && wc_DeCompress(d, dSz, c, cSz) != (int)dSz)
- ret = -302;
+ if (XMEMCMP(d, sample_text, dSz) != 0) {
+ ERROR_OUT(-11703, exit);
+ }
+
+ /* GZIP tests */
+ cSz = (dSz + (word32)(dSz * 0.001) + 12); /* reset cSz */
+ XMEMSET(c, 0, cSz);
+ XMEMSET(d, 0, dSz);
- if (ret == 0 && memcmp(d, sample_text, dSz))
- ret = -303;
+ ret = wc_Compress_ex(c, cSz, sample_text, dSz, 0, LIBZ_WINBITS_GZIP);
+ if (ret < 0) {
+ ERROR_OUT(-11704, exit);
+ }
+ cSz = (word32)ret;
- if (c) free(c);
- if (d) free(d);
+ ret = wc_DeCompress_ex(d, dSz, c, cSz, LIBZ_WINBITS_GZIP);
+ if (ret < 0) {
+ ERROR_OUT(-11705, exit);
+ }
+
+ if (XMEMCMP(d, sample_text, dSz) != 0) {
+ ERROR_OUT(-11706, exit);
+ }
+
+ /* Try with gzip generated output */
+ XMEMSET(d, 0, dSz);
+ ret = wc_DeCompress_ex(d, dSz, sample_text_gz, sizeof(sample_text_gz),
+ LIBZ_WINBITS_GZIP);
+ if (ret < 0) {
+ ERROR_OUT(-11707, exit);
+ }
+ dSz = (word32)ret;
+
+ if (XMEMCMP(d, sample_text, dSz) != 0) {
+ ERROR_OUT(-11708, exit);
+ }
+
+ ret = 0; /* success */
+
+exit:
+ if (c) XFREE(c, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (d) XFREE(d, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
return ret;
}
@@ -5917,323 +23711,4857 @@ int compress_test(void)
#ifdef HAVE_PKCS7
+/* External Debugging/Testing Note:
+ *
+ * PKCS#7 test functions can output generated PKCS#7/CMS bundles for
+ * additional testing. To dump bundles to files DER encoded files, please
+ * define:
+ *
+ * #define PKCS7_OUTPUT_TEST_BUNDLES
+ */
+
+
+/* Loads certs and keys for use with PKCS7 tests, from either files
+ * or buffers.
+ *
+ * rsaClientCertBuf - output buffer for RSA client cert
+ * rsaClientCertBufSz - IN/OUT size of output buffer, size of RSA client cert
+ * rsaClientPrivKeyBuf - output buffer for RSA client private key
+ * rsaClientPrivKeyBufSz - IN/OUT size of output buffer, size of RSA client key
+ *
+ * rsaServerCertBuf - output buffer for RSA server cert
+ * rsaServerCertBufSz - IN/OUT size of output buffer, size of RSA server cert
+ * rsaServerPrivKeyBuf - output buffer for RSA server private key
+ * rsaServerPrivKeyBufSz - IN/OUT size of output buffer, size of RSA server key
+ *
+ * rsaCaCertBuf - output buffer for RSA CA cert
+ * rsaCaCertBufSz - IN/OUT size of output buffer, size of RSA ca cert
+ * rsaCaPrivKeyBuf - output buffer for RSA CA private key
+ * rsaCaPrivKeyBufSz - IN/OUT size of output buffer, size of RSA CA key
+ *
+ * eccClientCertBuf - output buffer for ECC cert
+ * eccClientCertBufSz - IN/OUT size of output buffer, size of ECC cert
+ * eccClientPrivKeyBuf - output buffer for ECC private key
+ * eccClientPrivKeyBufSz - IN/OUT size of output buffer, size of ECC private key
+ *
+ * Returns 0 on success, negative on error
+ */
+static int pkcs7_load_certs_keys(
+ byte* rsaClientCertBuf, word32* rsaClientCertBufSz,
+ byte* rsaClientPrivKeyBuf, word32* rsaClientPrivKeyBufSz,
+ byte* rsaServerCertBuf, word32* rsaServerCertBufSz,
+ byte* rsaServerPrivKeyBuf, word32* rsaServerPrivKeyBufSz,
+ byte* rsaCaCertBuf, word32* rsaCaCertBufSz,
+ byte* rsaCaPrivKeyBuf, word32* rsaCaPrivKeyBufSz,
+ byte* eccClientCertBuf, word32* eccClientCertBufSz,
+ byte* eccClientPrivKeyBuf, word32* eccClientPrivKeyBufSz)
+{
+#ifndef NO_FILESYSTEM
+ XFILE certFile;
+ XFILE keyFile;
+
+ (void)certFile;
+ (void)keyFile;
+#endif
+
+#ifndef NO_RSA
+ if (rsaClientCertBuf == NULL || rsaClientCertBufSz == NULL ||
+ rsaClientPrivKeyBuf == NULL || rsaClientPrivKeyBufSz == NULL)
+ return BAD_FUNC_ARG;
+#endif
+
+#ifdef HAVE_ECC
+ if (eccClientCertBuf == NULL || eccClientCertBufSz == NULL ||
+ eccClientPrivKeyBuf == NULL || eccClientPrivKeyBufSz == NULL)
+ return BAD_FUNC_ARG;
+#endif
+
+/* RSA */
+#ifndef NO_RSA
+
+#ifdef USE_CERT_BUFFERS_1024
+ if (*rsaClientCertBufSz < (word32)sizeof_client_cert_der_1024)
+ return -11709;
+
+ XMEMCPY(rsaClientCertBuf, client_cert_der_1024,
+ sizeof_client_cert_der_1024);
+ *rsaClientCertBufSz = sizeof_client_cert_der_1024;
+
+ if (rsaServerCertBuf != NULL) {
+ if (*rsaServerCertBufSz < (word32)sizeof_server_cert_der_1024)
+ return -11710;
+
+ XMEMCPY(rsaServerCertBuf, server_cert_der_1024,
+ sizeof_server_cert_der_1024);
+ *rsaServerCertBufSz = sizeof_server_cert_der_1024;
+ }
+
+ if (rsaCaCertBuf != NULL) {
+ if (*rsaCaCertBufSz < (word32)sizeof_ca_cert_der_1024)
+ return -11711;
+
+ XMEMCPY(rsaCaCertBuf, ca_cert_der_1024, sizeof_ca_cert_der_1024);
+ *rsaCaCertBufSz = sizeof_ca_cert_der_1024;
+ }
+#elif defined(USE_CERT_BUFFERS_2048)
+ if (*rsaClientCertBufSz < (word32)sizeof_client_cert_der_2048)
+ return -11712;
+
+ XMEMCPY(rsaClientCertBuf, client_cert_der_2048,
+ sizeof_client_cert_der_2048);
+ *rsaClientCertBufSz = sizeof_client_cert_der_2048;
+
+ if (rsaServerCertBuf != NULL) {
+ if (*rsaServerCertBufSz < (word32)sizeof_server_cert_der_2048)
+ return -11713;
+
+ XMEMCPY(rsaServerCertBuf, server_cert_der_2048,
+ sizeof_server_cert_der_2048);
+ *rsaServerCertBufSz = sizeof_server_cert_der_2048;
+ }
+
+ if (rsaCaCertBuf != NULL) {
+ if (*rsaCaCertBufSz < (word32)sizeof_ca_cert_der_2048)
+ return -11714;
+
+ XMEMCPY(rsaCaCertBuf, ca_cert_der_2048, sizeof_ca_cert_der_2048);
+ *rsaCaCertBufSz = sizeof_ca_cert_der_2048;
+ }
+#else
+ certFile = XFOPEN(clientCert, "rb");
+ if (!certFile)
+ return -11715;
+
+ *rsaClientCertBufSz = (word32)XFREAD(rsaClientCertBuf, 1,
+ *rsaClientCertBufSz, certFile);
+ XFCLOSE(certFile);
+
+ if (rsaServerCertBuf != NULL) {
+ certFile = XFOPEN(rsaServerCertDerFile, "rb");
+ if (!certFile)
+ return -11716;
+
+ *rsaServerCertBufSz = (word32)XFREAD(rsaServerCertBuf, 1,
+ *rsaServerCertBufSz, certFile);
+ XFCLOSE(certFile);
+ }
+
+ if (rsaCaCertBuf != NULL) {
+ certFile = XFOPEN(rsaCaCertDerFile, "rb");
+ if (!certFile)
+ return -11717;
+
+ *rsaCaCertBufSz = (word32)XFREAD(rsaCaCertBuf, 1, *rsaCaCertBufSz,
+ certFile);
+ XFCLOSE(certFile);
+ }
+#endif
+
+#ifdef USE_CERT_BUFFERS_1024
+ if (*rsaClientPrivKeyBufSz < (word32)sizeof_client_key_der_1024)
+ return -11718;
+
+ XMEMCPY(rsaClientPrivKeyBuf, client_key_der_1024,
+ sizeof_client_key_der_1024);
+ *rsaClientPrivKeyBufSz = sizeof_client_key_der_1024;
+
+ if (rsaServerPrivKeyBuf != NULL) {
+ if (*rsaServerPrivKeyBufSz < (word32)sizeof_server_key_der_1024)
+ return -11719;
+
+ XMEMCPY(rsaServerPrivKeyBuf, server_key_der_1024,
+ sizeof_server_key_der_1024);
+ *rsaServerPrivKeyBufSz = sizeof_server_key_der_1024;
+ }
+
+ if (rsaCaPrivKeyBuf != NULL) {
+ if (*rsaCaPrivKeyBufSz < (word32)sizeof_ca_key_der_1024)
+ return -11720;
+
+ XMEMCPY(rsaCaPrivKeyBuf, ca_key_der_1024, sizeof_ca_key_der_1024);
+ *rsaCaPrivKeyBufSz = sizeof_ca_key_der_1024;
+ }
+#elif defined(USE_CERT_BUFFERS_2048)
+ if (*rsaClientPrivKeyBufSz < (word32)sizeof_client_key_der_2048)
+ return -11721;
+
+ XMEMCPY(rsaClientPrivKeyBuf, client_key_der_2048,
+ sizeof_client_key_der_2048);
+ *rsaClientPrivKeyBufSz = sizeof_client_key_der_2048;
+
+ if (rsaServerPrivKeyBuf != NULL) {
+ if (*rsaServerPrivKeyBufSz < (word32)sizeof_server_key_der_2048)
+ return -11722;
+
+ XMEMCPY(rsaServerPrivKeyBuf, server_key_der_2048,
+ sizeof_server_key_der_2048);
+ *rsaServerPrivKeyBufSz = sizeof_server_key_der_2048;
+ }
+
+ if (rsaCaPrivKeyBuf != NULL) {
+ if (*rsaCaPrivKeyBufSz < (word32)sizeof_ca_key_der_2048)
+ return -11723;
+
+ XMEMCPY(rsaCaPrivKeyBuf, ca_key_der_2048, sizeof_ca_key_der_2048);
+ *rsaCaPrivKeyBufSz = sizeof_ca_key_der_2048;
+ }
+#else
+ keyFile = XFOPEN(clientKey, "rb");
+ if (!keyFile)
+ return -11724;
+
+ *rsaClientPrivKeyBufSz = (word32)XFREAD(rsaClientPrivKeyBuf, 1,
+ *rsaClientPrivKeyBufSz, keyFile);
+ XFCLOSE(keyFile);
+
+ if (rsaServerPrivKeyBuf != NULL) {
+ keyFile = XFOPEN(rsaServerKeyDerFile, "rb");
+ if (!keyFile)
+ return -11725;
+
+ *rsaServerPrivKeyBufSz = (word32)XFREAD(rsaServerPrivKeyBuf, 1,
+ *rsaServerPrivKeyBufSz, keyFile);
+ XFCLOSE(keyFile);
+ }
+
+ if (rsaCaPrivKeyBuf != NULL) {
+ keyFile = XFOPEN(rsaCaKeyFile, "rb");
+ if (!keyFile)
+ return -11726;
+
+ *rsaCaPrivKeyBufSz = (word32)XFREAD(rsaCaPrivKeyBuf, 1,
+ *rsaCaPrivKeyBufSz, keyFile);
+ XFCLOSE(keyFile);
+ }
+#endif /* USE_CERT_BUFFERS */
+
+#endif /* NO_RSA */
+
+/* ECC */
+#ifdef HAVE_ECC
+
+#ifdef USE_CERT_BUFFERS_256
+ if (*eccClientCertBufSz < (word32)sizeof_cliecc_cert_der_256)
+ return -11727;
+
+ XMEMCPY(eccClientCertBuf, cliecc_cert_der_256, sizeof_cliecc_cert_der_256);
+ *eccClientCertBufSz = sizeof_cliecc_cert_der_256;
+#else
+ certFile = XFOPEN(eccClientCert, "rb");
+ if (!certFile)
+ return -11728;
+
+ *eccClientCertBufSz = (word32)XFREAD(eccClientCertBuf, 1,
+ *eccClientCertBufSz, certFile);
+ XFCLOSE(certFile);
+#endif /* USE_CERT_BUFFERS_256 */
+
+#ifdef USE_CERT_BUFFERS_256
+ if (*eccClientPrivKeyBufSz < (word32)sizeof_ecc_clikey_der_256)
+ return -11729;
+
+ XMEMCPY(eccClientPrivKeyBuf, ecc_clikey_der_256, sizeof_ecc_clikey_der_256);
+ *eccClientPrivKeyBufSz = sizeof_ecc_clikey_der_256;
+#else
+ keyFile = XFOPEN(eccClientKey, "rb");
+ if (!keyFile)
+ return -11730;
+
+ *eccClientPrivKeyBufSz = (word32)XFREAD(eccClientPrivKeyBuf, 1,
+ *eccClientPrivKeyBufSz, keyFile);
+ XFCLOSE(keyFile);
+#endif /* USE_CERT_BUFFERS_256 */
+#endif /* HAVE_ECC */
+
+#ifdef NO_RSA
+ (void)rsaClientCertBuf;
+ (void)rsaClientCertBufSz;
+ (void)rsaClientPrivKeyBuf;
+ (void)rsaClientPrivKeyBufSz;
+ (void)rsaServerCertBuf;
+ (void)rsaServerCertBufSz;
+ (void)rsaServerPrivKeyBuf;
+ (void)rsaServerPrivKeyBufSz;
+ (void)rsaCaCertBuf;
+ (void)rsaCaCertBufSz;
+ (void)rsaCaPrivKeyBuf;
+ (void)rsaCaPrivKeyBufSz;
+#endif
+#ifndef HAVE_ECC
+ (void)eccClientCertBuf;
+ (void)eccClientCertBufSz;
+ (void)eccClientPrivKeyBuf;
+ (void)eccClientPrivKeyBufSz;
+#endif
+#ifndef NO_FILESYSTEM
+ (void)certFile;
+ (void)keyFile;
+#endif
+ return 0;
+}
+
+
+typedef struct {
+ const byte* content;
+ word32 contentSz;
+ int contentOID;
+ int encryptOID;
+ int keyWrapOID;
+ int keyAgreeOID;
+ byte* cert;
+ size_t certSz;
+ byte* privateKey;
+ word32 privateKeySz;
+ byte* optionalUkm;
+ word32 optionalUkmSz;
+ int ktriOptions; /* KTRI options flags */
+ int kariOptions; /* KARI options flags */
+
+ /* KEKRI specific */
+ byte* secretKey; /* key, only for kekri RecipientInfo types */
+ word32 secretKeySz; /* size of secretKey, bytes */
+ byte* secretKeyId; /* key identifier */
+ word32 secretKeyIdSz; /* size of key identifier, bytes */
+ void* timePtr; /* time_t pointer */
+ byte* otherAttrOID; /* OPTIONAL, other attribute OID */
+ word32 otherAttrOIDSz; /* size of otherAttrOID, bytes */
+ byte* otherAttr; /* OPTIONAL, other attribute, ASN.1 encoded */
+ word32 otherAttrSz; /* size of otherAttr, bytes */
+ int kekriOptions; /* KEKRI options flags */
+
+ /* PWRI specific */
+ char* password;
+ word32 passwordSz;
+ byte* salt;
+ word32 saltSz;
+ int kdfOID;
+ int hashOID;
+ int kdfIterations;
+ int pwriOptions; /* PWRI options flags */
+
+ /* ORI specific */
+ int isOri;
+ int oriOptions; /* ORI options flags */
+
+ const char* outFileName;
+} pkcs7EnvelopedVector;
+
+
+static const byte asnDataOid[] = {
+ 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x07, 0x01
+};
+
+/* ORI encrypt callback, responsible for encrypting content-encryption key (CEK)
+ * and giving wolfCrypt the value for oriOID and oriValue to place in
+ * OtherRecipientInfo.
+ *
+ * Returns 0 on success, negative upon error. */
+static int myOriEncryptCb(PKCS7* pkcs7, byte* cek, word32 cekSz, byte* oriType,
+ word32* oriTypeSz, byte* oriValue, word32* oriValueSz,
+ void* ctx)
+{
+ int i;
+
+ /* make sure buffers are large enough */
+ if ((*oriValueSz < (2 + cekSz)) || (*oriTypeSz < sizeof(oriType)))
+ return -11731;
+
+ /* our simple encryption algorithm will be take the bitwise complement */
+ oriValue[0] = 0x04; /*ASN OCTET STRING */
+ oriValue[1] = (byte)cekSz; /* length */
+ for (i = 0; i < (int)cekSz; i++) {
+ oriValue[2 + i] = ~cek[i];
+ }
+ *oriValueSz = 2 + cekSz;
+
+ /* set oriType to ASN.1 encoded data OID */
+ XMEMCPY(oriType, asnDataOid, sizeof(asnDataOid));
+ *oriTypeSz = sizeof(asnDataOid);
+
+ (void)pkcs7;
+ (void)ctx;
+
+ return 0;
+}
+
+
+/* ORI decrypt callback, responsible for providing a decrypted content
+ * encryption key (CEK) placed into decryptedKey and size placed into
+ * decryptedKeySz. oriOID and oriValue are given to the callback to help
+ * in decrypting the encrypted CEK.
+ *
+ * Returns 0 on success, negative upon error. */
+static int myOriDecryptCb(PKCS7* pkcs7, byte* oriType, word32 oriTypeSz,
+ byte* oriValue, word32 oriValueSz, byte* decryptedKey,
+ word32* decryptedKeySz, void* ctx)
+{
+ int i;
+
+ /* make sure oriType matches what we expect */
+ if (oriTypeSz != sizeof(asnDataOid))
+ return -11732;
+
+ if (XMEMCMP(oriType, asnDataOid, sizeof(asnDataOid)) != 0)
+ return -11733;
+
+ /* make sure decrypted buffer is large enough */
+ if (*decryptedKeySz < oriValueSz)
+ return -11734;
+
+ /* decrypt encrypted CEK using simple bitwise complement,
+ only for example */
+ for (i = 0; i < (int)oriValueSz - 2; i++) {
+ decryptedKey[i] = ~oriValue[2 + i];
+ }
+
+ *decryptedKeySz = oriValueSz - 2;
+
+ (void)pkcs7;
+ (void)ctx;
+
+ return 0;
+}
+
+
+#ifndef NO_AES
+/* returns 0 on success */
+static int myDecryptionFunc(PKCS7* pkcs7, int encryptOID, byte* iv, int ivSz,
+ byte* aad, word32 aadSz, byte* authTag, word32 authTagSz,
+ byte* in, int inSz, byte* out, void* usrCtx)
+{
+ int keyId = -1, ret, keySz;
+ word32 keyIdSz = 8;
+ const byte* key;
+ byte keyIdRaw[8];
+ Aes aes;
+
+ /* looking for KEY ID
+ * fwDecryptKeyID OID "1.2.840.113549.1.9.16.2.37
+ */
+ const unsigned char OID[] = {
+ /* 0x06, 0x0B do not pass in tag and length */
+ 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+ 0x01, 0x09, 0x10, 0x02, 0x25
+ };
+
+ const byte defKey[] = {
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08
+ };
+
+ const byte altKey[] = {
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08
+ };
+
+ /* test user context passed in */
+ if (usrCtx == NULL || *(int*)usrCtx != 1) {
+ return -11735;
+ }
+
+ /* if needing to find keyIdSz can call with NULL */
+ ret = wc_PKCS7_GetAttributeValue(pkcs7, OID, sizeof(OID), NULL,
+ &keyIdSz);
+ if (ret != LENGTH_ONLY_E) {
+ printf("Unexpected error %d when getting keyIdSz\n", ret);
+ printf("Possibly no KEY ID attribute set\n");
+ return -11736;
+ }
+ else {
+ XMEMSET(keyIdRaw, 0, sizeof(keyIdRaw));
+ ret = wc_PKCS7_GetAttributeValue(pkcs7, OID, sizeof(OID), keyIdRaw,
+ &keyIdSz);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (keyIdSz < 3) {
+ printf("keyIdSz is smaller than expected\n");
+ return -11737;
+ }
+ if (keyIdSz > 2 + sizeof(int)) {
+ printf("example case was only expecting a keyId of int size\n");
+ return -11738;
+ }
+
+ /* keyIdRaw[0] OCTET TAG */
+ /* keyIdRaw[1] Length */
+#ifdef BIG_ENDIAN_ORDER
+ if (keyIdRaw[1] == 0x01) {
+ keyId = 1;
+ }
+#else
+ keyId = *(int*)(keyIdRaw + 2);
+#endif
+ }
+
+
+ /* Use keyID here if found to select key and decrypt in HSM or in this
+ * example just select key and do software decryption */
+ if (keyId == 1) {
+ key = altKey;
+ keySz = sizeof(altKey);
+ }
+ else {
+ key = defKey;
+ keySz = sizeof(defKey);
+ }
+
+ switch (encryptOID) {
+ case AES256CBCb:
+ if ((keySz != 32 ) || (ivSz != AES_BLOCK_SIZE))
+ return BAD_FUNC_ARG;
+ break;
+
+ case AES128CBCb:
+ if ((keySz != 16 ) || (ivSz != AES_BLOCK_SIZE))
+ return BAD_FUNC_ARG;
+ break;
+
+ default:
+ printf("Unsupported content cipher type for example");
+ return ALGO_ID_E;
+ };
+
+ ret = wc_AesInit(&aes, HEAP_HINT, INVALID_DEVID);
+ if (ret == 0) {
+ ret = wc_AesSetKey(&aes, key, keySz, iv, AES_DECRYPTION);
+ if (ret == 0)
+ ret = wc_AesCbcDecrypt(&aes, out, in, inSz);
+ wc_AesFree(&aes);
+ }
+
+ (void)aad;
+ (void)aadSz;
+ (void)authTag;
+ (void)authTagSz;
+ return ret;
+}
+#endif /* NO_AES */
+
+
+static int pkcs7enveloped_run_vectors(byte* rsaCert, word32 rsaCertSz,
+ byte* rsaPrivKey, word32 rsaPrivKeySz,
+ byte* eccCert, word32 eccCertSz,
+ byte* eccPrivKey, word32 eccPrivKeySz)
+{
+ int ret, testSz, i;
+ int envelopedSz, decodedSz;
+
+ byte enveloped[2048];
+ byte decoded[2048];
+ PKCS7* pkcs7;
+#ifdef PKCS7_OUTPUT_TEST_BUNDLES
+ XFILE pkcs7File;
+#endif
+
+ const byte data[] = { /* Hello World */
+ 0x48,0x65,0x6c,0x6c,0x6f,0x20,0x57,0x6f,
+ 0x72,0x6c,0x64
+ };
+
+#if !defined(NO_AES) && defined(WOLFSSL_AES_256) && defined(HAVE_ECC) && \
+ defined(WOLFSSL_SHA512)
+ byte optionalUkm[] = {
+ 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
+ };
+#endif /* NO_AES */
+
+#if !defined(NO_AES) && !defined(NO_SHA) && defined(WOLFSSL_AES_128)
+ /* encryption key for kekri recipient types */
+ byte secretKey[] = {
+ 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
+ 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
+ };
+
+ /* encryption key identifier */
+ byte secretKeyId[] = {
+ 0x02,0x02,0x03,0x04
+ };
+#endif
+
+#if !defined(NO_PWDBASED) && !defined(NO_AES) && \
+ !defined(NO_SHA) && defined(WOLFSSL_AES_128)
+
+ char password[] = "password";
+
+ byte salt[] = {
+ 0x12, 0x34, 0x56, 0x78, 0x78, 0x56, 0x34, 0x12
+ };
+#endif
+
+ const pkcs7EnvelopedVector testVectors[] =
+ {
+ /* key transport key encryption technique */
+#ifndef NO_RSA
+ #ifndef NO_DES3
+ {data, (word32)sizeof(data), DATA, DES3b, 0, 0, rsaCert, rsaCertSz,
+ rsaPrivKey, rsaPrivKeySz, NULL, 0, 0, 0, NULL, 0, NULL, 0, NULL, NULL,
+ 0, NULL, 0, 0, NULL, 0, NULL, 0, 0, 0, 0, 0, 0, 0,
+ "pkcs7envelopedDataDES3.der"},
+ #endif
+
+ #ifndef NO_AES
+ #ifdef WOLFSSL_AES_128
+ {data, (word32)sizeof(data), DATA, AES128CBCb, 0, 0, rsaCert, rsaCertSz,
+ rsaPrivKey, rsaPrivKeySz, NULL, 0, 0, 0, NULL, 0, NULL, 0, NULL, NULL,
+ 0, NULL, 0, 0, NULL, 0, NULL, 0, 0, 0, 0, 0, 0, 0,
+ "pkcs7envelopedDataAES128CBC.der"},
+ #endif
+ #ifdef WOLFSSL_AES_192
+ {data, (word32)sizeof(data), DATA, AES192CBCb, 0, 0, rsaCert, rsaCertSz,
+ rsaPrivKey, rsaPrivKeySz, NULL, 0, 0, 0, NULL, 0, NULL, 0, NULL, NULL,
+ 0, NULL, 0, 0, NULL, 0, NULL, 0, 0, 0, 0, 0, 0, 0,
+ "pkcs7envelopedDataAES192CBC.der"},
+ #endif
+ #ifdef WOLFSSL_AES_256
+ {data, (word32)sizeof(data), DATA, AES256CBCb, 0, 0, rsaCert, rsaCertSz,
+ rsaPrivKey, rsaPrivKeySz, NULL, 0, 0, 0, NULL, 0, NULL, 0, NULL, NULL,
+ 0, NULL, 0, 0, NULL, 0, NULL, 0, 0, 0, 0, 0, 0, 0,
+ "pkcs7envelopedDataAES256CBC.der"},
+
+ /* explicitly using SKID for SubjectKeyIdentifier */
+ {data, (word32)sizeof(data), DATA, AES256CBCb, 0, 0, rsaCert, rsaCertSz,
+ rsaPrivKey, rsaPrivKeySz, NULL, 0, CMS_SKID, 0, NULL, 0, NULL, 0, NULL,
+ NULL, 0, NULL, 0, 0, NULL, 0, NULL, 0, 0, 0, 0, 0, 0, 0,
+ "pkcs7envelopedDataAES256CBC_SKID.der"},
+
+ /* explicitly using IssuerAndSerialNumber for SubjectKeyIdentifier */
+ {data, (word32)sizeof(data), DATA, AES256CBCb, 0, 0, rsaCert, rsaCertSz,
+ rsaPrivKey, rsaPrivKeySz, NULL, 0, CMS_ISSUER_AND_SERIAL_NUMBER, 0,
+ NULL, 0, NULL, 0, NULL, NULL, 0, NULL, 0, 0, NULL, 0, NULL, 0, 0, 0,
+ 0, 0, 0, 0, "pkcs7envelopedDataAES256CBC_IANDS.der"},
+ #endif
+ #endif /* NO_AES */
+#endif
+
+ /* key agreement key encryption technique*/
+#ifdef HAVE_ECC
+ #ifndef NO_AES
+ #if !defined(NO_SHA) && defined(WOLFSSL_AES_128)
+ {data, (word32)sizeof(data), DATA, AES128CBCb, AES128_WRAP,
+ dhSinglePass_stdDH_sha1kdf_scheme, eccCert, eccCertSz, eccPrivKey,
+ eccPrivKeySz, NULL, 0, 0, 0, NULL, 0, NULL, 0, NULL, NULL, 0, NULL, 0,
+ 0, NULL, 0, NULL, 0, 0, 0, 0, 0, 0, 0,
+ "pkcs7envelopedDataAES128CBC_ECDH_SHA1KDF.der"},
+ #endif
+
+ #if !defined(NO_SHA256) && defined(WOLFSSL_AES_256)
+ {data, (word32)sizeof(data), DATA, AES256CBCb, AES256_WRAP,
+ dhSinglePass_stdDH_sha256kdf_scheme, eccCert, eccCertSz, eccPrivKey,
+ eccPrivKeySz, NULL, 0, 0, 0, NULL, 0, NULL, 0, NULL, NULL, 0, NULL, 0,
+ 0, NULL, 0, NULL, 0, 0, 0, 0, 0, 0, 0,
+ "pkcs7envelopedDataAES256CBC_ECDH_SHA256KDF.der"},
+ #endif /* NO_SHA256 && WOLFSSL_AES_256 */
+
+ #if defined(WOLFSSL_SHA512) && defined(WOLFSSL_AES_256)
+ {data, (word32)sizeof(data), DATA, AES256CBCb, AES256_WRAP,
+ dhSinglePass_stdDH_sha512kdf_scheme, eccCert, eccCertSz, eccPrivKey,
+ eccPrivKeySz, NULL, 0, 0, 0, NULL, 0, NULL, 0, NULL, NULL, 0, NULL, 0,
+ 0, NULL, 0, NULL, 0, 0, 0, 0, 0, 0, 0,
+ "pkcs7envelopedDataAES256CBC_ECDH_SHA512KDF.der"},
+
+ /* with optional user keying material (ukm) */
+ {data, (word32)sizeof(data), DATA, AES256CBCb, AES256_WRAP,
+ dhSinglePass_stdDH_sha512kdf_scheme, eccCert, eccCertSz, eccPrivKey,
+ eccPrivKeySz, optionalUkm, sizeof(optionalUkm), 0, 0, NULL, 0,
+ NULL, 0, NULL, NULL, 0, NULL, 0, 0, NULL, 0, NULL, 0, 0, 0, 0, 0, 0, 0,
+ "pkcs7envelopedDataAES256CBC_ECDH_SHA512KDF_ukm.der"},
+ #endif /* WOLFSSL_SHA512 && WOLFSSL_AES_256 */
+ #endif /* NO_AES */
+#endif
+
+ /* kekri (KEKRecipientInfo) recipient types */
+#ifndef NO_AES
+ #if !defined(NO_SHA) && defined(WOLFSSL_AES_128)
+ {data, (word32)sizeof(data), DATA, AES128CBCb, AES128_WRAP, 0,
+ NULL, 0, NULL, 0, NULL, 0, 0, 0, secretKey, sizeof(secretKey),
+ secretKeyId, sizeof(secretKeyId), NULL, NULL, 0, NULL, 0,
+ 0, NULL, 0, NULL, 0, 0, 0, 0, 0, 0, 0,
+ "pkcs7envelopedDataAES128CBC_KEKRI.der"},
+ #endif
+#endif
+
+ /* pwri (PasswordRecipientInfo) recipient types */
+#if !defined(NO_PWDBASED) && !defined(NO_AES)
+ #if !defined(NO_SHA) && defined(WOLFSSL_AES_128)
+ {data, (word32)sizeof(data), DATA, AES128CBCb, 0, 0,
+ NULL, 0, NULL, 0, NULL, 0, 0, 0, NULL, 0,
+ NULL, 0, NULL, NULL, 0, NULL, 0, 0, password,
+ (word32)XSTRLEN(password), salt, sizeof(salt), PBKDF2_OID, WC_SHA, 5,
+ 0, 0, 0, "pkcs7envelopedDataAES128CBC_PWRI.der"},
+ #endif
+#endif
+
+#if !defined(NO_AES) && !defined(NO_AES_128)
+ /* ori (OtherRecipientInfo) recipient types */
+ {data, (word32)sizeof(data), DATA, AES128CBCb, 0, 0, NULL, 0, NULL, 0,
+ NULL, 0, 0, 0, NULL, 0, NULL, 0, NULL, NULL, 0, NULL, 0, 0, NULL, 0,
+ NULL, 0, 0, 0, 0, 0, 1, 0, "pkcs7envelopedDataAES128CBC_ORI.der"},
+#endif
+ };
+
+ testSz = sizeof(testVectors) / sizeof(pkcs7EnvelopedVector);
+
+ for (i = 0; i < testSz; i++) {
+ pkcs7 = wc_PKCS7_New(HEAP_HINT,
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ INVALID_DEVID /* async PKCS7 is not supported */
+ #else
+ devId
+ #endif
+ );
+ if (pkcs7 == NULL)
+ return -11739;
+
+ if (testVectors[i].secretKey != NULL) {
+ /* KEKRI recipient type */
+
+ ret = wc_PKCS7_Init(pkcs7, pkcs7->heap, pkcs7->devId);
+ if (ret != 0) {
+ return -11740;
+ }
+
+ pkcs7->content = (byte*)testVectors[i].content;
+ pkcs7->contentSz = testVectors[i].contentSz;
+ pkcs7->contentOID = testVectors[i].contentOID;
+ pkcs7->encryptOID = testVectors[i].encryptOID;
+ pkcs7->ukm = testVectors[i].optionalUkm;
+ pkcs7->ukmSz = testVectors[i].optionalUkmSz;
+
+ ret = wc_PKCS7_AddRecipient_KEKRI(pkcs7, testVectors[i].keyWrapOID,
+ testVectors[i].secretKey, testVectors[i].secretKeySz,
+ testVectors[i].secretKeyId, testVectors[i].secretKeyIdSz,
+ testVectors[i].timePtr, testVectors[i].otherAttrOID,
+ testVectors[i].otherAttrOIDSz, testVectors[i].otherAttr,
+ testVectors[i].otherAttrSz, testVectors[i].kekriOptions);
+
+ if (ret < 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11741;
+ }
+
+ /* set key, for decryption */
+ ret = wc_PKCS7_SetKey(pkcs7, testVectors[i].secretKey,
+ testVectors[i].secretKeySz);
+
+ if (ret != 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11742;
+ }
+
+ } else if (testVectors[i].password != NULL) {
+ #ifndef NO_PWDBASED
+ /* PWRI recipient type */
+
+ ret = wc_PKCS7_Init(pkcs7, pkcs7->heap, pkcs7->devId);
+ if (ret != 0) {
+ return -11743;
+ }
+
+ pkcs7->content = (byte*)testVectors[i].content;
+ pkcs7->contentSz = testVectors[i].contentSz;
+ pkcs7->contentOID = testVectors[i].contentOID;
+ pkcs7->encryptOID = testVectors[i].encryptOID;
+ pkcs7->ukm = testVectors[i].optionalUkm;
+ pkcs7->ukmSz = testVectors[i].optionalUkmSz;
+
+ ret = wc_PKCS7_AddRecipient_PWRI(pkcs7,
+ (byte*)testVectors[i].password,
+ testVectors[i].passwordSz, testVectors[i].salt,
+ testVectors[i].saltSz, testVectors[i].kdfOID,
+ testVectors[i].hashOID, testVectors[i].kdfIterations,
+ testVectors[i].encryptOID, testVectors[i].pwriOptions);
+
+ if (ret < 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11744;
+ }
+
+ /* set password, for decryption */
+ ret = wc_PKCS7_SetPassword(pkcs7, (byte*)testVectors[i].password,
+ testVectors[i].passwordSz);
+
+ if (ret < 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11745;
+ }
+ #endif /* NO_PWDBASED */
+
+ } else if (testVectors[i].isOri == 1) {
+ /* ORI recipient type */
+
+ ret = wc_PKCS7_Init(pkcs7, pkcs7->heap, pkcs7->devId);
+ if (ret != 0) {
+ return -11746;
+ }
+
+ pkcs7->content = (byte*)testVectors[i].content;
+ pkcs7->contentSz = testVectors[i].contentSz;
+ pkcs7->contentOID = testVectors[i].contentOID;
+ pkcs7->encryptOID = testVectors[i].encryptOID;
+
+ ret = wc_PKCS7_AddRecipient_ORI(pkcs7, myOriEncryptCb,
+ testVectors[i].oriOptions);
+
+ if (ret < 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11747;
+ }
+
+ /* set decrypt callback for decryption */
+ ret = wc_PKCS7_SetOriDecryptCb(pkcs7, myOriDecryptCb);
+
+ if (ret < 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11748;
+ }
+
+ } else {
+ /* KTRI or KARI recipient types */
+
+ ret = wc_PKCS7_Init(pkcs7, pkcs7->heap, pkcs7->devId);
+ if (ret != 0) {
+ return -11749;
+ }
+
+ ret = wc_PKCS7_InitWithCert(pkcs7, testVectors[i].cert,
+ (word32)testVectors[i].certSz);
+ if (ret != 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11750;
+ }
+
+ pkcs7->keyWrapOID = testVectors[i].keyWrapOID;
+ pkcs7->keyAgreeOID = testVectors[i].keyAgreeOID;
+ pkcs7->privateKey = testVectors[i].privateKey;
+ pkcs7->privateKeySz = testVectors[i].privateKeySz;
+ pkcs7->content = (byte*)testVectors[i].content;
+ pkcs7->contentSz = testVectors[i].contentSz;
+ pkcs7->contentOID = testVectors[i].contentOID;
+ pkcs7->encryptOID = testVectors[i].encryptOID;
+ pkcs7->ukm = testVectors[i].optionalUkm;
+ pkcs7->ukmSz = testVectors[i].optionalUkmSz;
+
+ /* set SubjectIdentifier type for KTRI types */
+ if (testVectors[i].ktriOptions & CMS_SKID) {
+
+ ret = wc_PKCS7_SetSignerIdentifierType(pkcs7, CMS_SKID);
+ if (ret != 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11751;
+ }
+ } else if (testVectors[i].ktriOptions &
+ CMS_ISSUER_AND_SERIAL_NUMBER) {
+
+ ret = wc_PKCS7_SetSignerIdentifierType(pkcs7,
+ CMS_ISSUER_AND_SERIAL_NUMBER);
+ if (ret != 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11752;
+ }
+ }
+ }
+
+ /* encode envelopedData */
+ envelopedSz = wc_PKCS7_EncodeEnvelopedData(pkcs7, enveloped,
+ sizeof(enveloped));
+ if (envelopedSz <= 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11753;
+ }
+
+ /* decode envelopedData */
+ decodedSz = wc_PKCS7_DecodeEnvelopedData(pkcs7, enveloped, envelopedSz,
+ decoded, sizeof(decoded));
+ if (decodedSz <= 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11754;
+ }
+
+ /* test decode result */
+ if (XMEMCMP(decoded, data, sizeof(data)) != 0){
+ wc_PKCS7_Free(pkcs7);
+ return -11755;
+ }
+
+#ifndef NO_PKCS7_STREAM
+ { /* test reading byte by byte */
+ int z;
+ for (z = 0; z < envelopedSz; z++) {
+ decodedSz = wc_PKCS7_DecodeEnvelopedData(pkcs7, enveloped + z, 1,
+ decoded, sizeof(decoded));
+ if (decodedSz <= 0 && decodedSz != WC_PKCS7_WANT_READ_E) {
+ printf("unexpected error %d\n", decodedSz);
+ return -11756;
+ }
+ }
+ /* test decode result */
+ if (XMEMCMP(decoded, data, sizeof(data)) != 0) {
+ printf("stream read compare failed\n");
+ wc_PKCS7_Free(pkcs7);
+ return -11757;
+ }
+ }
+#endif
+#ifdef PKCS7_OUTPUT_TEST_BUNDLES
+ /* output pkcs7 envelopedData for external testing */
+ pkcs7File = XFOPEN(testVectors[i].outFileName, "wb");
+ if (!pkcs7File) {
+ wc_PKCS7_Free(pkcs7);
+ return -11758;
+ }
+
+ ret = (int)XFWRITE(enveloped, 1, envelopedSz, pkcs7File);
+ XFCLOSE(pkcs7File);
+ if (ret != envelopedSz) {
+ wc_PKCS7_Free(pkcs7);
+ return -11759;
+ }
+#endif /* PKCS7_OUTPUT_TEST_BUNDLES */
+
+ wc_PKCS7_Free(pkcs7);
+ pkcs7 = NULL;
+ }
+
+ (void)eccCert;
+ (void)eccCertSz;
+ (void)eccPrivKey;
+ (void)eccPrivKeySz;
+ (void)rsaCert;
+ (void)rsaCertSz;
+ (void)rsaPrivKey;
+ (void)rsaPrivKeySz;
+
+ return 0;
+}
+
+
int pkcs7enveloped_test(void)
{
int ret = 0;
- int cipher = DES3b;
+ byte* rsaCert = NULL;
+ byte* rsaPrivKey = NULL;
+ word32 rsaCertSz = 0;
+ word32 rsaPrivKeySz = 0;
+
+ byte* eccCert = NULL;
+ byte* eccPrivKey = NULL;
+ word32 eccCertSz = 0;
+ word32 eccPrivKeySz = 0;
+
+#ifndef NO_RSA
+ /* read client RSA cert and key in DER format */
+ rsaCert = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (rsaCert == NULL)
+ return -11800;
+
+ rsaPrivKey = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (rsaPrivKey == NULL) {
+ XFREE(rsaCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ return -11801;
+ }
+
+ rsaCertSz = FOURK_BUF;
+ rsaPrivKeySz = FOURK_BUF;
+#endif /* NO_RSA */
+
+#ifdef HAVE_ECC
+ /* read client ECC cert and key in DER format */
+ eccCert = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (eccCert == NULL) {
+ #ifndef NO_RSA
+ XFREE(rsaCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(rsaPrivKey, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return -11802;
+ }
+
+ eccPrivKey =(byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (eccPrivKey == NULL) {
+ #ifndef NO_RSA
+ XFREE(rsaCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(rsaPrivKey, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ XFREE(eccCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ return -11803;
+ }
+
+ eccCertSz = FOURK_BUF;
+ eccPrivKeySz = FOURK_BUF;
+#endif /* HAVE_ECC */
+
+ ret = pkcs7_load_certs_keys(rsaCert, &rsaCertSz, rsaPrivKey,
+ &rsaPrivKeySz, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, eccCert, &eccCertSz,
+ eccPrivKey, &eccPrivKeySz);
+ if (ret < 0) {
+ #ifndef NO_RSA
+ XFREE(rsaCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(rsaPrivKey, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ #ifdef HAVE_ECC
+ XFREE(eccCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(eccPrivKey, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return -11804;
+ }
+
+ ret = pkcs7enveloped_run_vectors(rsaCert, (word32)rsaCertSz,
+ rsaPrivKey, (word32)rsaPrivKeySz,
+ eccCert, (word32)eccCertSz,
+ eccPrivKey, (word32)eccPrivKeySz);
+
+#ifndef NO_RSA
+ XFREE(rsaCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(rsaPrivKey, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+#ifdef HAVE_ECC
+ XFREE(eccCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(eccPrivKey, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+
+ return ret;
+}
+
+
+#if defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
+
+typedef struct {
+ const byte* content;
+ word32 contentSz;
+ int contentOID;
+ int encryptOID;
+ int keyWrapOID;
+ int keyAgreeOID;
+ byte* cert;
+ size_t certSz;
+ byte* privateKey;
+ word32 privateKeySz;
+ PKCS7Attrib* authAttribs;
+ word32 authAttribsSz;
+ PKCS7Attrib* unauthAttribs;
+ word32 unauthAttribsSz;
+
+ /* KARI / KTRI specific */
+ byte* optionalUkm;
+ word32 optionalUkmSz;
+ int ktriOptions; /* KTRI options flags */
+ int kariOptions; /* KARI options flags */
+
+ /* KEKRI specific */
+ byte* secretKey; /* key, only for kekri RecipientInfo types */
+ word32 secretKeySz; /* size of secretKey, bytes */
+ byte* secretKeyId; /* key identifier */
+ word32 secretKeyIdSz; /* size of key identifier, bytes */
+ void* timePtr; /* time_t pointer */
+ byte* otherAttrOID; /* OPTIONAL, other attribute OID */
+ word32 otherAttrOIDSz; /* size of otherAttrOID, bytes */
+ byte* otherAttr; /* OPTIONAL, other attribute, ASN.1 encoded */
+ word32 otherAttrSz; /* size of otherAttr, bytes */
+ int kekriOptions; /* KEKRI options flags */
+
+ /* PWRI specific */
+ char* password; /* password */
+ word32 passwordSz; /* password size, bytes */
+ byte* salt; /* KDF salt */
+ word32 saltSz; /* KDF salt size, bytes */
+ int kdfOID; /* KDF OID */
+ int hashOID; /* KDF hash algorithm OID */
+ int kdfIterations; /* KDF iterations */
+ int kekEncryptOID; /* KEK encryption algorithm OID */
+ int pwriOptions; /* PWRI options flags */
+
+ /* ORI specific */
+ int isOri;
+ int oriOptions; /* ORI options flags */
+
+ const char* outFileName;
+} pkcs7AuthEnvelopedVector;
+
+
+static int pkcs7authenveloped_run_vectors(byte* rsaCert, word32 rsaCertSz,
+ byte* rsaPrivKey, word32 rsaPrivKeySz,
+ byte* eccCert, word32 eccCertSz,
+ byte* eccPrivKey, word32 eccPrivKeySz)
+{
+ int ret, testSz, i;
int envelopedSz, decodedSz;
- PKCS7 pkcs7;
- byte* cert;
- byte* privKey;
- byte enveloped[2048];
- byte decoded[2048];
- size_t certSz;
- size_t privKeySz;
- FILE* certFile;
- FILE* keyFile;
- FILE* pkcs7File;
- const char* pkcs7OutFile = "pkcs7envelopedData.der";
+ byte enveloped[2048];
+ byte decoded[2048];
+ WC_RNG rng;
+ PKCS7* pkcs7;
+#ifdef PKCS7_OUTPUT_TEST_BUNDLES
+ XFILE pkcs7File;
+#endif
const byte data[] = { /* Hello World */
0x48,0x65,0x6c,0x6c,0x6f,0x20,0x57,0x6f,
0x72,0x6c,0x64
};
+ byte senderNonce[PKCS7_NONCE_SZ + 2];
+
+#ifdef HAVE_ECC
+ byte senderNonceOid[] =
+ { 0x06, 0x0a, 0x60, 0x86, 0x48, 0x01, 0x86, 0xF8, 0x45, 0x01,
+ 0x09, 0x05 };
+
+ PKCS7Attrib attribs[] =
+ {
+ { senderNonceOid, sizeof(senderNonceOid), senderNonce,
+ sizeof(senderNonce) }
+ };
+#endif
+
+#if !defined(NO_AES) && defined(WOLFSSL_AES_256) && defined(HAVE_ECC) && \
+ defined(WOLFSSL_SHA512)
+ byte optionalUkm[] = {
+ 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
+ };
+#endif /* NO_AES */
+
+#if !defined(NO_AES) && !defined(NO_SHA) && defined(WOLFSSL_AES_128)
+ /* encryption key for kekri recipient types */
+ byte secretKey[] = {
+ 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
+ 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
+ };
+
+ /* encryption key identifier */
+ byte secretKeyId[] = {
+ 0x02,0x02,0x03,0x04
+ };
+#endif
+
+#if !defined(NO_PWDBASED) && !defined(NO_AES) && defined(HAVE_AESGCM) && \
+ !defined(NO_SHA) && defined(WOLFSSL_AES_128)
+
+ char password[] = "password";
+
+ byte salt[] = {
+ 0x12, 0x34, 0x56, 0x78, 0x78, 0x56, 0x34, 0x12
+ };
+#endif
+
+ const pkcs7AuthEnvelopedVector testVectors[] =
+ {
+ /* key transport key encryption technique */
+#ifndef NO_RSA
+ #if !defined(NO_AES) && defined(HAVE_AESGCM)
+ #ifdef WOLFSSL_AES_128
+ {data, (word32)sizeof(data), DATA, AES128GCMb, 0, 0, rsaCert, rsaCertSz,
+ rsaPrivKey, rsaPrivKeySz, NULL, 0, NULL, 0, NULL, 0, 0, 0, NULL, 0,
+ NULL, 0, NULL, NULL, 0, NULL, 0, 0, NULL, 0, NULL, 0, 0, 0, 0, 0, 0,
+ 0, 0, "pkcs7authEnvelopedDataAES128GCM.der"},
+ #endif
+ #ifdef WOLFSSL_AES_192
+ {data, (word32)sizeof(data), DATA, AES192GCMb, 0, 0, rsaCert, rsaCertSz,
+ rsaPrivKey, rsaPrivKeySz, NULL, 0, NULL, 0, NULL, 0, 0, 0, NULL, 0,
+ NULL, 0, NULL, NULL, 0, NULL, 0, 0, NULL, 0, NULL, 0, 0, 0, 0, 0, 0,
+ 0, 0, "pkcs7authEnvelopedDataAES192GCM.der"},
+ #endif
+ #ifdef WOLFSSL_AES_256
+ {data, (word32)sizeof(data), DATA, AES256GCMb, 0, 0, rsaCert, rsaCertSz,
+ rsaPrivKey, rsaPrivKeySz, NULL, 0, NULL, 0, NULL, 0, 0, 0, NULL, 0,
+ NULL, 0, NULL, NULL, 0, NULL, 0, 0, NULL, 0, NULL, 0, 0, 0, 0, 0, 0,
+ 0, 0, "pkcs7authEnvelopedDataAES256GCM.der"},
+
+ /* test with contentType set to FirmwarePkgData */
+ {data, (word32)sizeof(data), FIRMWARE_PKG_DATA, AES256GCMb, 0, 0,
+ rsaCert, rsaCertSz, rsaPrivKey, rsaPrivKeySz, NULL, 0, NULL, 0, NULL,
+ 0, 0, 0, NULL, 0, NULL, 0, NULL, NULL, 0, NULL, 0, 0, NULL, 0, NULL,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ "pkcs7authEnvelopedDataAES256GCM_firmwarePkgData.der"},
+
+ /* explicitly using SKID for SubjectKeyIdentifier */
+ {data, (word32)sizeof(data), DATA, AES256GCMb, 0, 0, rsaCert, rsaCertSz,
+ rsaPrivKey, rsaPrivKeySz, NULL, 0, NULL, 0, NULL, 0, CMS_SKID, 0,
+ NULL, 0, NULL, 0, NULL, NULL, 0, NULL, 0, 0, NULL, 0, NULL, 0, 0, 0,
+ 0, 0, 0, 0, 0, "pkcs7authEnvelopedDataAES256GCM_SKID.der"},
+
+ /* explicitly using IssuerAndSerialNumber for SubjectKeyIdentifier */
+ {data, (word32)sizeof(data), DATA, AES256GCMb, 0, 0, rsaCert, rsaCertSz,
+ rsaPrivKey, rsaPrivKeySz, NULL, 0, NULL, 0, NULL, 0,
+ CMS_ISSUER_AND_SERIAL_NUMBER, 0, NULL, 0, NULL, 0, NULL, NULL, 0,
+ NULL, 0, 0, NULL, 0, NULL, 0, 0, 0, 0, 0, 0, 0, 0,
+ "pkcs7authEnvelopedDataAES256GCM_IANDS.der"},
+ #endif
+ #endif /* NO_AES */
+#endif
+
+ /* key agreement key encryption technique*/
+#ifdef HAVE_ECC
+ #if !defined(NO_AES) && defined(HAVE_AESGCM)
+ #if !defined(NO_SHA) && defined(WOLFSSL_AES_128)
+ {data, (word32)sizeof(data), DATA, AES128GCMb, AES128_WRAP,
+ dhSinglePass_stdDH_sha1kdf_scheme, eccCert, eccCertSz, eccPrivKey,
+ eccPrivKeySz, NULL, 0, NULL, 0, NULL, 0, 0, 0, NULL, 0, NULL, 0,
+ NULL, NULL, 0, NULL, 0, 0, NULL, 0, NULL, 0, 0, 0, 0, 0, 0, 0, 0,
+ "pkcs7authEnvelopedDataAES128GCM_ECDH_SHA1KDF.der"},
+ #endif
+
+ #if !defined(NO_SHA256) && defined(WOLFSSL_AES_256)
+ {data, (word32)sizeof(data), DATA, AES256GCMb, AES256_WRAP,
+ dhSinglePass_stdDH_sha256kdf_scheme, eccCert, eccCertSz, eccPrivKey,
+ eccPrivKeySz, NULL, 0, NULL, 0, NULL, 0, 0, 0, NULL, 0, NULL, 0,
+ NULL, NULL, 0, NULL, 0, 0, NULL, 0, NULL, 0, 0, 0, 0, 0, 0, 0, 0,
+ "pkcs7authEnvelopedDataAES256GCM_ECDH_SHA256KDF.der"},
+
+ /* with authenticated attributes */
+ {data, (word32)sizeof(data), DATA, AES256GCMb, AES256_WRAP,
+ dhSinglePass_stdDH_sha256kdf_scheme, eccCert, eccCertSz, eccPrivKey,
+ eccPrivKeySz, attribs, (sizeof(attribs) / sizeof(PKCS7Attrib)),
+ NULL, 0, NULL, 0, 0, 0, NULL, 0,
+ NULL, 0, NULL, NULL, 0, NULL, 0, 0, NULL, 0, NULL, 0, 0, 0, 0, 0,
+ 0, 0, 0,
+ "pkcs7authEnvelopedDataAES256GCM_ECDH_SHA256KDF_authAttribs.der"},
+
+ /* with unauthenticated attributes */
+ {data, (word32)sizeof(data), DATA, AES256GCMb, AES256_WRAP,
+ dhSinglePass_stdDH_sha256kdf_scheme, eccCert, eccCertSz, eccPrivKey,
+ eccPrivKeySz, NULL, 0, attribs,
+ (sizeof(attribs) / sizeof(PKCS7Attrib)), NULL, 0, 0, 0, NULL, 0,
+ NULL, 0, NULL, NULL, 0, NULL, 0, 0, NULL, 0, NULL, 0, 0, 0, 0, 0,
+ 0, 0, 0,
+ "pkcs7authEnvelopedDataAES256GCM_ECDH_SHA256KDF_unauthAttribs.der"},
+
+ /* with authenticated AND unauthenticated attributes */
+ {data, (word32)sizeof(data), DATA, AES256GCMb, AES256_WRAP,
+ dhSinglePass_stdDH_sha256kdf_scheme, eccCert, eccCertSz, eccPrivKey,
+ eccPrivKeySz, attribs, (sizeof(attribs) / sizeof(PKCS7Attrib)),
+ attribs, (sizeof(attribs) / sizeof(PKCS7Attrib)), NULL, 0, 0, 0,
+ NULL, 0, NULL, 0, NULL, NULL, 0, NULL, 0, 0, NULL, 0, NULL, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ "pkcs7authEnvelopedDataAES256GCM_ECDH_SHA256KDF_bothAttribs.der"},
+
+ /* with authenticated AND unauthenticated attributes AND
+ * contentType of FirmwarePkgData */
+ {data, (word32)sizeof(data), FIRMWARE_PKG_DATA, AES256GCMb, AES256_WRAP,
+ dhSinglePass_stdDH_sha256kdf_scheme, eccCert, eccCertSz, eccPrivKey,
+ eccPrivKeySz, attribs, (sizeof(attribs) / sizeof(PKCS7Attrib)),
+ attribs, (sizeof(attribs) / sizeof(PKCS7Attrib)), NULL, 0, 0, 0,
+ NULL, 0, NULL, 0, NULL, NULL, 0, NULL, 0, 0, NULL, 0, NULL, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ "pkcs7authEnvelopedDataAES256GCM_ECDH_SHA256KDF_fw_bothAttribs.der"},
+ #endif /* NO_SHA256 && WOLFSSL_AES_256 */
+
+ #if defined(WOLFSSL_SHA512) && defined(WOLFSSL_AES_256)
+ {data, (word32)sizeof(data), DATA, AES256GCMb, AES256_WRAP,
+ dhSinglePass_stdDH_sha512kdf_scheme, eccCert, eccCertSz, eccPrivKey,
+ eccPrivKeySz, NULL, 0, NULL, 0, NULL, 0, 0, 0, NULL, 0, NULL, 0, NULL,
+ NULL, 0, NULL, 0, 0, NULL, 0, NULL, 0, 0, 0, 0, 0, 0, 0, 0,
+ "pkcs7authEnvelopedDataAES256GCM_ECDH_SHA512KDF.der"},
+
+ /* with optional user keying material (ukm) */
+ {data, (word32)sizeof(data), DATA, AES256GCMb, AES256_WRAP,
+ dhSinglePass_stdDH_sha512kdf_scheme, eccCert, eccCertSz, eccPrivKey,
+ eccPrivKeySz, NULL, 0, NULL, 0, optionalUkm, sizeof(optionalUkm), 0,
+ 0, NULL, 0, NULL, 0, NULL, NULL, 0, NULL, 0, 0, NULL, 0, NULL, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ "pkcs7authEnvelopedDataAES256GCM_ECDH_SHA512KDF_ukm.der"},
+ #endif /* WOLFSSL_SHA512 && WOLFSSL_AES_256 */
+ #endif /* NO_AES */
+#endif
+
+ /* kekri (KEKRecipientInfo) recipient types */
+#if !defined(NO_AES) && defined(HAVE_AESGCM)
+ #if !defined(NO_SHA) && defined(WOLFSSL_AES_128)
+ {data, (word32)sizeof(data), DATA, AES128GCMb, AES128_WRAP, 0,
+ NULL, 0, NULL, 0, NULL, 0, NULL, 0, NULL, 0, 0, 0,
+ secretKey, sizeof(secretKey), secretKeyId, sizeof(secretKeyId),
+ NULL, NULL, 0, NULL, 0, 0, NULL, 0, NULL, 0, 0, 0, 0, 0, 0, 0, 0,
+ "pkcs7authEnvelopedDataAES128GCM_KEKRI.der"},
+ #endif
+#endif
+
+ /* pwri (PasswordRecipientInfo) recipient types */
+#if !defined(NO_PWDBASED) && !defined(NO_AES) && defined(HAVE_AESGCM)
+ #if !defined(NO_SHA) && defined(WOLFSSL_AES_128)
+ {data, (word32)sizeof(data), DATA, AES128GCMb, 0, 0,
+ NULL, 0, NULL, 0, NULL, 0, NULL, 0, NULL, 0, 0, 0, NULL, 0,
+ NULL, 0, NULL, NULL, 0, NULL, 0, 0, password,
+ (word32)XSTRLEN(password), salt, sizeof(salt), PBKDF2_OID, WC_SHA, 5,
+ AES128CBCb, 0, 0, 0, "pkcs7authEnvelopedDataAES128GCM_PWRI.der"},
+ #endif
+#endif
+
+#if !defined(NO_AES) && defined(HAVE_AESGCM)
+ #ifdef WOLFSSL_AES_128
+ /* ori (OtherRecipientInfo) recipient types */
+ {data, (word32)sizeof(data), DATA, AES128GCMb, 0, 0, NULL, 0, NULL, 0,
+ NULL, 0, NULL, 0, NULL, 0, 0, 0, NULL, 0, NULL, 0, NULL, NULL, 0,
+ NULL, 0, 0, NULL, 0, NULL, 0, 0, 0, 0, 0, 0, 1, 0,
+ "pkcs7authEnvelopedDataAES128GCM_ORI.der"},
+ #endif
+#endif
+ };
+
+ testSz = sizeof(testVectors) / sizeof(pkcs7AuthEnvelopedVector);
+
+
+ /* generate senderNonce */
+ {
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
+ ret = wc_InitRng(&rng);
+#endif
+ if (ret != 0) {
+ return -11805;
+ }
- /* read client cert and key in DER format */
- cert = (byte*)malloc(FOURK_BUF);
- if (cert == NULL)
- return -201;
+ senderNonce[0] = 0x04;
+ senderNonce[1] = PKCS7_NONCE_SZ;
- privKey = (byte*)malloc(FOURK_BUF);
- if (privKey == NULL) {
- free(cert);
- return -202;
+ ret = wc_RNG_GenerateBlock(&rng, &senderNonce[2], PKCS7_NONCE_SZ);
+ if (ret != 0) {
+ wc_FreeRng(&rng);
+ return -11806;
+ }
+
+ wc_FreeRng(&rng);
}
- certFile = fopen(clientCert, "rb");
- if (!certFile) {
- free(cert);
- free(privKey);
- err_sys("can't open ./certs/client-cert.der, "
- "Please run from wolfSSL home dir", -42);
- return -42;
+ for (i = 0; i < testSz; i++) {
+ pkcs7 = wc_PKCS7_New(HEAP_HINT,
+ #ifdef WOLFSSL_ASYNC_CRYPT
+ INVALID_DEVID /* async PKCS7 is not supported */
+ #else
+ devId
+ #endif
+ );
+ if (pkcs7 == NULL)
+ return -11807;
+
+ if (testVectors[i].secretKey != NULL) {
+ /* KEKRI recipient type */
+
+ ret = wc_PKCS7_Init(pkcs7, pkcs7->heap, pkcs7->devId);
+ if (ret != 0) {
+ return -11808;
+ }
+
+ pkcs7->content = (byte*)testVectors[i].content;
+ pkcs7->contentSz = testVectors[i].contentSz;
+ pkcs7->contentOID = testVectors[i].contentOID;
+ pkcs7->encryptOID = testVectors[i].encryptOID;
+ pkcs7->ukm = testVectors[i].optionalUkm;
+ pkcs7->ukmSz = testVectors[i].optionalUkmSz;
+ pkcs7->authAttribs = testVectors[i].authAttribs;
+ pkcs7->authAttribsSz = testVectors[i].authAttribsSz;
+ pkcs7->unauthAttribs = testVectors[i].unauthAttribs;
+ pkcs7->unauthAttribsSz = testVectors[i].unauthAttribsSz;
+
+ ret = wc_PKCS7_AddRecipient_KEKRI(pkcs7, testVectors[i].keyWrapOID,
+ testVectors[i].secretKey, testVectors[i].secretKeySz,
+ testVectors[i].secretKeyId, testVectors[i].secretKeyIdSz,
+ testVectors[i].timePtr, testVectors[i].otherAttrOID,
+ testVectors[i].otherAttrOIDSz, testVectors[i].otherAttr,
+ testVectors[i].otherAttrSz, testVectors[i].kekriOptions);
+
+ if (ret < 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11809;
+ }
+
+ /* set key, for decryption */
+ ret = wc_PKCS7_SetKey(pkcs7, testVectors[i].secretKey,
+ testVectors[i].secretKeySz);
+
+ if (ret != 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11810;
+ }
+
+ } else if (testVectors[i].password != NULL) {
+ #ifndef NO_PWDBASED
+ /* PWRI recipient type */
+
+ ret = wc_PKCS7_Init(pkcs7, pkcs7->heap, pkcs7->devId);
+ if (ret != 0) {
+ return -11811;
+ }
+
+ pkcs7->content = (byte*)testVectors[i].content;
+ pkcs7->contentSz = testVectors[i].contentSz;
+ pkcs7->contentOID = testVectors[i].contentOID;
+ pkcs7->encryptOID = testVectors[i].encryptOID;
+ pkcs7->ukm = testVectors[i].optionalUkm;
+ pkcs7->ukmSz = testVectors[i].optionalUkmSz;
+ pkcs7->authAttribs = testVectors[i].authAttribs;
+ pkcs7->authAttribsSz = testVectors[i].authAttribsSz;
+ pkcs7->unauthAttribs = testVectors[i].unauthAttribs;
+ pkcs7->unauthAttribsSz = testVectors[i].unauthAttribsSz;
+
+ ret = wc_PKCS7_AddRecipient_PWRI(pkcs7,
+ (byte*)testVectors[i].password,
+ testVectors[i].passwordSz, testVectors[i].salt,
+ testVectors[i].saltSz, testVectors[i].kdfOID,
+ testVectors[i].hashOID, testVectors[i].kdfIterations,
+ testVectors[i].kekEncryptOID, testVectors[i].pwriOptions);
+
+ if (ret < 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11812;
+ }
+
+ /* set password, for decryption */
+ ret = wc_PKCS7_SetPassword(pkcs7, (byte*)testVectors[i].password,
+ testVectors[i].passwordSz);
+
+ if (ret < 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11813;
+ }
+
+ #endif /* NO_PWDBASED */
+ } else if (testVectors[i].isOri == 1) {
+ /* ORI recipient type */
+
+ ret = wc_PKCS7_Init(pkcs7, pkcs7->heap, pkcs7->devId);
+ if (ret != 0) {
+ return -11814;
+ }
+
+ pkcs7->content = (byte*)testVectors[i].content;
+ pkcs7->contentSz = testVectors[i].contentSz;
+ pkcs7->contentOID = testVectors[i].contentOID;
+ pkcs7->encryptOID = testVectors[i].encryptOID;
+ pkcs7->authAttribs = testVectors[i].authAttribs;
+ pkcs7->authAttribsSz = testVectors[i].authAttribsSz;
+ pkcs7->unauthAttribs = testVectors[i].unauthAttribs;
+ pkcs7->unauthAttribsSz = testVectors[i].unauthAttribsSz;
+
+ ret = wc_PKCS7_AddRecipient_ORI(pkcs7, myOriEncryptCb,
+ testVectors[i].oriOptions);
+
+ if (ret < 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11815;
+ }
+
+ /* set decrypt callback for decryption */
+ ret = wc_PKCS7_SetOriDecryptCb(pkcs7, myOriDecryptCb);
+
+ if (ret < 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11816;
+ }
+
+ } else {
+ /* KTRI or KARI recipient types */
+
+ ret = wc_PKCS7_InitWithCert(pkcs7, testVectors[i].cert,
+ (word32)testVectors[i].certSz);
+ if (ret != 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11817;
+ }
+
+ pkcs7->keyWrapOID = testVectors[i].keyWrapOID;
+ pkcs7->keyAgreeOID = testVectors[i].keyAgreeOID;
+ pkcs7->privateKey = testVectors[i].privateKey;
+ pkcs7->privateKeySz = testVectors[i].privateKeySz;
+ pkcs7->content = (byte*)testVectors[i].content;
+ pkcs7->contentSz = testVectors[i].contentSz;
+ pkcs7->contentOID = testVectors[i].contentOID;
+ pkcs7->encryptOID = testVectors[i].encryptOID;
+ pkcs7->ukm = testVectors[i].optionalUkm;
+ pkcs7->ukmSz = testVectors[i].optionalUkmSz;
+ pkcs7->authAttribs = testVectors[i].authAttribs;
+ pkcs7->authAttribsSz = testVectors[i].authAttribsSz;
+ pkcs7->unauthAttribs = testVectors[i].unauthAttribs;
+ pkcs7->unauthAttribsSz = testVectors[i].unauthAttribsSz;
+
+ /* set SubjectIdentifier type for KTRI types */
+ if (testVectors[i].ktriOptions & CMS_SKID) {
+
+ ret = wc_PKCS7_SetSignerIdentifierType(pkcs7, CMS_SKID);
+ if (ret != 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11818;
+ }
+ } else if (testVectors[i].ktriOptions &
+ CMS_ISSUER_AND_SERIAL_NUMBER) {
+
+ ret = wc_PKCS7_SetSignerIdentifierType(pkcs7,
+ CMS_ISSUER_AND_SERIAL_NUMBER);
+ if (ret != 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11819;
+ }
+ }
+ }
+
+ /* encode envelopedData */
+ envelopedSz = wc_PKCS7_EncodeAuthEnvelopedData(pkcs7, enveloped,
+ sizeof(enveloped));
+ if (envelopedSz <= 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11820;
+ }
+#ifndef NO_PKCS7_STREAM
+ { /* test reading byte by byte */
+ int z;
+ for (z = 0; z < envelopedSz; z++) {
+ decodedSz = wc_PKCS7_DecodeAuthEnvelopedData(pkcs7,
+ enveloped + z, 1, decoded, sizeof(decoded));
+ if (decodedSz <= 0 && decodedSz != WC_PKCS7_WANT_READ_E) {
+ printf("unexpected error %d\n", decodedSz);
+ return -11821;
+ }
+ }
+ /* test decode result */
+ if (XMEMCMP(decoded, data, sizeof(data)) != 0) {
+ printf("stream read compare failed\n");
+ wc_PKCS7_Free(pkcs7);
+ return -11822;
+ }
+ }
+#endif
+ /* decode envelopedData */
+ decodedSz = wc_PKCS7_DecodeAuthEnvelopedData(pkcs7, enveloped,
+ envelopedSz, decoded,
+ sizeof(decoded));
+ if (decodedSz <= 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11823;
+ }
+
+ /* test decode result */
+ if (XMEMCMP(decoded, data, sizeof(data)) != 0){
+ wc_PKCS7_Free(pkcs7);
+ return -11824;
+ }
+
+#ifdef PKCS7_OUTPUT_TEST_BUNDLES
+ /* output pkcs7 envelopedData for external testing */
+ pkcs7File = XFOPEN(testVectors[i].outFileName, "wb");
+ if (!pkcs7File) {
+ wc_PKCS7_Free(pkcs7);
+ return -11825;
+ }
+
+ ret = (int)XFWRITE(enveloped, 1, envelopedSz, pkcs7File);
+ XFCLOSE(pkcs7File);
+ if (ret != envelopedSz) {
+ wc_PKCS7_Free(pkcs7);
+ return -11826;
+ }
+#endif /* PKCS7_OUTPUT_TEST_BUNDLES */
+
+ wc_PKCS7_Free(pkcs7);
+ pkcs7 = NULL;
}
- certSz = fread(cert, 1, FOURK_BUF, certFile);
- fclose(certFile);
+#if !defined(HAVE_ECC) || defined(NO_AES)
+ (void)eccCert;
+ (void)eccCertSz;
+ (void)eccPrivKey;
+ (void)eccPrivKeySz;
+ (void)secretKey;
+ (void)secretKeyId;
+#endif
+#ifdef NO_RSA
+ (void)rsaCert;
+ (void)rsaCertSz;
+ (void)rsaPrivKey;
+ (void)rsaPrivKeySz;
+#endif
+ return 0;
+}
- keyFile = fopen(clientKey, "rb");
- if (!keyFile) {
- free(cert);
- free(privKey);
- err_sys("can't open ./certs/client-key.der, "
- "Please run from wolfSSL home dir", -43);
- return -43;
+
+int pkcs7authenveloped_test(void)
+{
+ int ret = 0;
+
+ byte* rsaCert = NULL;
+ byte* rsaPrivKey = NULL;
+ word32 rsaCertSz = 0;
+ word32 rsaPrivKeySz = 0;
+
+ byte* eccCert = NULL;
+ byte* eccPrivKey = NULL;
+ word32 eccCertSz = 0;
+ word32 eccPrivKeySz = 0;
+
+#ifndef NO_RSA
+ /* read client RSA cert and key in DER format */
+ rsaCert = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (rsaCert == NULL)
+ return -11900;
+
+ rsaPrivKey = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (rsaPrivKey == NULL) {
+ XFREE(rsaCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ return -11901;
+ }
+
+ rsaCertSz = FOURK_BUF;
+ rsaPrivKeySz = FOURK_BUF;
+#endif /* NO_RSA */
+
+#ifdef HAVE_ECC
+ /* read client ECC cert and key in DER format */
+ eccCert = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (eccCert == NULL) {
+ #ifndef NO_RSA
+ XFREE(rsaCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(rsaPrivKey, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return -11902;
+ }
+
+ eccPrivKey =(byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (eccPrivKey == NULL) {
+ #ifndef NO_RSA
+ XFREE(rsaCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(rsaPrivKey, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ XFREE(eccCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ return -11903;
+ }
+
+ eccCertSz = FOURK_BUF;
+ eccPrivKeySz = FOURK_BUF;
+#endif /* HAVE_ECC */
+
+ ret = pkcs7_load_certs_keys(rsaCert, &rsaCertSz, rsaPrivKey,
+ &rsaPrivKeySz, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, eccCert, &eccCertSz,
+ eccPrivKey, &eccPrivKeySz);
+ if (ret < 0) {
+ #ifndef NO_RSA
+ XFREE(rsaCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(rsaPrivKey, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ #ifdef HAVE_ECC
+ XFREE(eccCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(eccPrivKey, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ #endif
+ return -11904;
}
- privKeySz = fread(privKey, 1, FOURK_BUF, keyFile);
- fclose(keyFile);
+ ret = pkcs7authenveloped_run_vectors(rsaCert, (word32)rsaCertSz,
+ rsaPrivKey, (word32)rsaPrivKeySz,
+ eccCert, (word32)eccCertSz,
+ eccPrivKey, (word32)eccPrivKeySz);
+
+#ifndef NO_RSA
+ XFREE(rsaCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(rsaPrivKey, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+#ifdef HAVE_ECC
+ XFREE(eccCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(eccPrivKey, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
- wc_PKCS7_InitWithCert(&pkcs7, cert, (word32)certSz);
- pkcs7.content = (byte*)data;
- pkcs7.contentSz = (word32)sizeof(data);
- pkcs7.contentOID = DATA;
- pkcs7.encryptOID = cipher;
- pkcs7.privateKey = privKey;
- pkcs7.privateKeySz = (word32)privKeySz;
+ return ret;
+}
+
+#endif /* HAVE_AESGCM || HAVE_AESCCM */
+#ifndef NO_AES
+static const byte p7DefKey[] = {
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08
+};
+
+static const byte p7AltKey[] = {
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08
+};
- /* encode envelopedData */
- envelopedSz = wc_PKCS7_EncodeEnvelopedData(&pkcs7, enveloped,
- sizeof(enveloped));
- if (envelopedSz <= 0) {
- free(cert);
- free(privKey);
- return -203;
+static int myCEKwrapFunc(PKCS7* pkcs7, byte* cek, word32 cekSz, byte* keyId,
+ word32 keyIdSz, byte* orginKey, word32 orginKeySz,
+ byte* out, word32 outSz, int keyWrapAlgo, int type, int direction)
+{
+ int ret;
+
+ if (cek == NULL || out == NULL)
+ return BAD_FUNC_ARG;
+
+ /* test case sanity checks */
+ if (keyIdSz != 1) {
+ return -11905;
}
- /* decode envelopedData */
- decodedSz = wc_PKCS7_DecodeEnvelopedData(&pkcs7, enveloped, envelopedSz,
- decoded, sizeof(decoded));
- if (decodedSz <= 0) {
- free(cert);
- free(privKey);
- return -204;
+ if (keyId[0] != 0x00) {
+ return -11906;
}
- /* test decode result */
- if (memcmp(decoded, data, sizeof(data)) != 0) {
- free(cert);
- free(privKey);
- return -205;
+ if (type != (int)PKCS7_KEKRI) {
+ return -11907;
+ }
+
+ switch (keyWrapAlgo) {
+ case AES256_WRAP:
+ ret = wc_AesKeyUnWrap(p7DefKey, sizeof(p7DefKey), cek, cekSz,
+ out, outSz, NULL);
+ if (ret <= 0)
+ return ret;
+ break;
+
+ default:
+ WOLFSSL_MSG("Unsupported key wrap algorithm in example");
+ return BAD_KEYWRAP_ALG_E;
+ };
+
+ (void)pkcs7;
+ (void)direction;
+ (void)orginKey; /* used with KAKRI */
+ (void)orginKeySz;
+ return ret;
+}
+
+
+/* returns key size on success */
+static int getFirmwareKey(PKCS7* pkcs7, byte* key, word32 keySz)
+{
+ int ret;
+ word32 atrSz;
+ byte atr[256];
+
+ /* Additionally can look for fwWrappedFirmwareKey
+ * 1.2.840.113529.1.9.16.1.16 */
+ const unsigned char fwWrappedFirmwareKey[] = {
+ /* 0x06, 0x0B */
+ 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+ 0x01, 0x09, 0x10, 0x02, 0x27
+ };
+
+ /* find keyID in fwWrappedFirmwareKey */
+ ret = wc_PKCS7_GetAttributeValue(pkcs7, fwWrappedFirmwareKey,
+ sizeof(fwWrappedFirmwareKey), NULL, &atrSz);
+ if (ret == LENGTH_ONLY_E) {
+ XMEMSET(atr, 0, sizeof(atr));
+ ret = wc_PKCS7_GetAttributeValue(pkcs7, fwWrappedFirmwareKey,
+ sizeof(fwWrappedFirmwareKey), atr, &atrSz);
+
+ /* keyIdRaw[0] OCTET TAG */
+ /* keyIdRaw[1] Length */
+
+ if (ret > 0) {
+ PKCS7* envPkcs7;
+
+ envPkcs7 = wc_PKCS7_New(NULL, 0);
+ if (envPkcs7 == NULL) {
+ return MEMORY_E;
+ }
+
+ wc_PKCS7_Init(envPkcs7, NULL, 0);
+ ret = wc_PKCS7_SetWrapCEKCb(envPkcs7, myCEKwrapFunc);
+ if (ret == 0) {
+ /* expecting FIRMWARE_PKG_DATA content */
+ envPkcs7->contentOID = FIRMWARE_PKG_DATA;
+ ret = wc_PKCS7_DecodeEnvelopedData(envPkcs7, atr, atrSz,
+ key, keySz);
+ }
+ wc_PKCS7_Free(envPkcs7);
+ }
}
- /* output pkcs7 envelopedData for external testing */
- pkcs7File = fopen(pkcs7OutFile, "wb");
- if (!pkcs7File) {
- free(cert);
- free(privKey);
- return -206;
+ return ret;
+}
+
+/* create a KEKRI enveloped data
+ * return size on success */
+static int envelopedData_encrypt(byte* in, word32 inSz, byte* out,
+ word32 outSz)
+{
+ int ret;
+ PKCS7* pkcs7;
+ const byte keyId[] = { 0x00 };
+
+ pkcs7 = wc_PKCS7_New(NULL, INVALID_DEVID);
+ if (pkcs7 == NULL)
+ return -11908;
+
+ pkcs7->content = in;
+ pkcs7->contentSz = inSz;
+ pkcs7->contentOID = FIRMWARE_PKG_DATA;
+ pkcs7->encryptOID = AES256CBCb;
+ pkcs7->ukm = NULL;
+ pkcs7->ukmSz = 0;
+
+ /* add recipient (KEKRI type) */
+ ret = wc_PKCS7_AddRecipient_KEKRI(pkcs7, AES256_WRAP, (byte*)p7DefKey,
+ sizeof(p7DefKey), (byte*)keyId,
+ sizeof(keyId), NULL, NULL, 0, NULL, 0, 0);
+ if (ret < 0) {
+ printf("wc_PKCS7_AddRecipient_KEKRI() failed, ret = %d\n", ret);
+ wc_PKCS7_Free(pkcs7);
+ return -11909;
}
- ret = (int)fwrite(enveloped, envelopedSz, 1, pkcs7File);
- fclose(pkcs7File);
+ /* encode envelopedData, returns size */
+ ret = wc_PKCS7_EncodeEnvelopedData(pkcs7, out, outSz);
+ if (ret <= 0) {
+ printf("wc_PKCS7_EncodeEnvelopedData() failed, ret = %d\n", ret);
+ wc_PKCS7_Free(pkcs7);
+ return -11910;
- free(cert);
- free(privKey);
- wc_PKCS7_Free(&pkcs7);
+ }
- if (ret > 0)
- return 0;
+ wc_PKCS7_Free(pkcs7);
return ret;
}
-int pkcs7signed_test(void)
+
+/*
+ * keyHint is the KeyID to be set in the fwDecryptKeyID attribute
+ * returns size of buffer output on success
+ */
+static int generateBundle(byte* out, word32 *outSz, const byte* encryptKey,
+ word32 encryptKeySz, byte keyHint, byte* cert, word32 certSz,
+ byte* key, word32 keySz)
+{
+ int ret, attribNum = 1;
+ PKCS7* pkcs7;
+
+ /* KEY ID
+ * fwDecryptKeyID OID 1.2.840.113549.1.9.16.2.37
+ */
+ const unsigned char fwDecryptKeyID[] = {
+ 0x06, 0x0B,
+ 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+ 0x01, 0x09, 0x10, 0x02, 0x25
+ };
+
+ /* fwWrappedFirmwareKey 1.2.840.113529.1.9.16.1.16 */
+ const unsigned char fwWrappedFirmwareKey[] = {
+ 0x06, 0x0B, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+ 0x01, 0x09, 0x10, 0x02, 0x27
+ };
+
+ byte keyID[] = { 0x04, 0x01, 0x00 };
+ byte env[256];
+ char data[] = "Test of wolfSSL PKCS7 decrypt callback";
+
+ PKCS7Attrib attribs[] =
+ {
+ { fwDecryptKeyID, sizeof(fwDecryptKeyID), keyID, sizeof(keyID) },
+ { fwWrappedFirmwareKey, sizeof(fwWrappedFirmwareKey), env, 0 }
+ };
+
+ keyID[2] = keyHint;
+
+ /* If using keyHint 0 then create a bundle with fwWrappedFirmwareKey */
+ if (keyHint == 0) {
+ ret = envelopedData_encrypt((byte*)p7DefKey, sizeof(p7DefKey), env,
+ sizeof(env));
+ if (ret <= 0) {
+ return ret;
+ }
+ attribs[1].valueSz = ret;
+ attribNum++;
+ }
+
+ /* init PKCS7 */
+ pkcs7 = wc_PKCS7_New(NULL, INVALID_DEVID);
+ if (pkcs7 == NULL)
+ return -11911;
+
+ ret = wc_PKCS7_InitWithCert(pkcs7, cert, certSz);
+ if (ret != 0) {
+ printf("ERROR: wc_PKCS7_InitWithCert() failed, ret = %d\n", ret);
+ wc_PKCS7_Free(pkcs7);
+ return -11912;
+ }
+
+ ret = wc_PKCS7_SetSignerIdentifierType(pkcs7, CMS_SKID);
+ if (ret != 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -11913;
+ }
+
+ /* encode Signed Encrypted FirmwarePkgData */
+ if (encryptKeySz == 16) {
+ ret = wc_PKCS7_EncodeSignedEncryptedFPD(pkcs7, (byte*)encryptKey,
+ encryptKeySz, key, keySz, AES128CBCb, RSAk, SHA256h,
+ (byte*)data, sizeof(data), NULL, 0,
+ attribs, attribNum, out, *outSz);
+ }
+ else {
+ ret = wc_PKCS7_EncodeSignedEncryptedFPD(pkcs7, (byte*)encryptKey,
+ encryptKeySz, key, keySz, AES256CBCb, RSAk, SHA256h,
+ (byte*)data, sizeof(data), NULL, 0,
+ attribs, attribNum, out, *outSz);
+ }
+ if (ret <= 0) {
+ printf("ERROR: wc_PKCS7_EncodeSignedEncryptedFPD() failed, "
+ "ret = %d\n", ret);
+ wc_PKCS7_Free(pkcs7);
+ return -11914;
+
+ } else {
+ *outSz = ret;
+ }
+
+ wc_PKCS7_Free(pkcs7);
+
+ return ret;
+}
+
+
+/* test verification and decryption of PKCS7 bundle
+ * return 0 on success
+ */
+static int verifyBundle(byte* derBuf, word32 derSz, int keyHint)
{
int ret = 0;
+ int usrCtx = 1; /* test value to pass as user context to callback */
+ PKCS7* pkcs7;
+ byte* sid;
+ word32 sidSz;
+ byte key[256];
+ word32 keySz = sizeof(key);
+
+ byte decoded[FOURK_BUF/2];
+ int decodedSz = FOURK_BUF/2;
+
+ const byte expectedSid[] = {
+ 0x33, 0xD8, 0x45, 0x66, 0xD7, 0x68, 0x87, 0x18,
+ 0x7E, 0x54, 0x0D, 0x70, 0x27, 0x91, 0xC7, 0x26,
+ 0xD7, 0x85, 0x65, 0xC0
+ };
- FILE* file;
- byte* certDer;
- byte* keyDer;
- byte* out;
- char data[] = "Hello World";
- word32 dataSz, outSz, certDerSz, keyDerSz;
- PKCS7 msg;
- RNG rng;
-
- byte transIdOid[] =
+ pkcs7 = wc_PKCS7_New(HEAP_HINT, INVALID_DEVID);
+ if (pkcs7 == NULL) {
+ return MEMORY_E;
+ }
+
+ /* Test verify */
+ ret = wc_PKCS7_Init(pkcs7, HEAP_HINT, INVALID_DEVID);
+ if (ret != 0) {
+ wc_PKCS7_Free(pkcs7);
+ return ret;
+ }
+ ret = wc_PKCS7_InitWithCert(pkcs7, NULL, 0);
+ if (ret != 0) {
+ wc_PKCS7_Free(pkcs7);
+ return ret;
+ }
+ ret = wc_PKCS7_VerifySignedData(pkcs7, derBuf, derSz);
+ if (ret != 0) {
+ wc_PKCS7_Free(pkcs7);
+ return ret;
+ }
+
+ /* Get size of SID and print it out */
+ ret = wc_PKCS7_GetSignerSID(pkcs7, NULL, &sidSz);
+ if (ret != LENGTH_ONLY_E) {
+ wc_PKCS7_Free(pkcs7);
+ return ret;
+ }
+
+ sid = (byte*)XMALLOC(sidSz, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (sid == NULL) {
+ wc_PKCS7_Free(pkcs7);
+ return ret;
+ }
+
+ ret = wc_PKCS7_GetSignerSID(pkcs7, sid, &sidSz);
+ if (ret != 0) {
+ wc_PKCS7_Free(pkcs7);
+ XFREE(sid, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ return ret;
+ }
+ ret = XMEMCMP(sid, expectedSid, sidSz);
+ XFREE(sid, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (ret != 0) {
+ wc_PKCS7_Free(pkcs7);
+ return ret;
+ }
+
+ /* get expected fwWrappedFirmwareKey */
+ if (keyHint == 0) {
+ ret = getFirmwareKey(pkcs7, key, keySz);
+ if (ret < 0) {
+ wc_PKCS7_Free(pkcs7);
+ return ret;
+ }
+ pkcs7->encryptionKey = key;
+ pkcs7->encryptionKeySz = ret;
+ }
+ else {
+ decodedSz = sizeof(decoded);
+ ret = wc_PKCS7_SetDecodeEncryptedCb(pkcs7, myDecryptionFunc);
+ if (ret != 0) {
+ wc_PKCS7_Free(pkcs7);
+ return ret;
+ }
+
+ ret = wc_PKCS7_SetDecodeEncryptedCtx(pkcs7, (void*)&usrCtx);
+ if (ret != 0) {
+ wc_PKCS7_Free(pkcs7);
+ return ret;
+ }
+ }
+
+ decodedSz = wc_PKCS7_DecodeEncryptedData(pkcs7, pkcs7->content,
+ pkcs7->contentSz, decoded, decodedSz);
+ if (decodedSz < 0) {
+ ret = decodedSz;
+ wc_PKCS7_Free(pkcs7);
+ return ret;
+ }
+
+ wc_PKCS7_Free(pkcs7);
+ return 0;
+}
+
+
+int pkcs7callback_test(byte* cert, word32 certSz, byte* key, word32 keySz)
+{
+
+ int ret = 0;
+ byte derBuf[FOURK_BUF/2];
+ word32 derSz = FOURK_BUF/2;
+
+ /* Doing default generation and verify */
+ ret = generateBundle(derBuf, &derSz, p7DefKey, sizeof(p7DefKey), 0, cert,
+ certSz, key, keySz);
+ if (ret <= 0) {
+ return -11915;
+ }
+
+ ret = verifyBundle(derBuf, derSz, 0);
+ if (ret != 0) {
+ return -11916;
+ }
+
+ /* test choosing other key with keyID */
+ derSz = FOURK_BUF/2;
+ ret = generateBundle(derBuf, &derSz, p7AltKey, sizeof(p7AltKey), 1,
+ cert, certSz, key, keySz);
+ if (ret <= 0) {
+ return -11917;
+ }
+
+ ret = verifyBundle(derBuf, derSz, 1);
+ if (ret != 0) {
+ return -11918;
+ }
+
+ /* test fail case with wrong keyID */
+ derSz = FOURK_BUF/2;
+ ret = generateBundle(derBuf, &derSz, p7DefKey, sizeof(p7DefKey), 1,
+ cert, certSz, key, keySz);
+ if (ret <= 0) {
+ return -11919;
+ }
+
+ ret = verifyBundle(derBuf, derSz, 1);
+ if (ret == 0) {
+ return -11920;
+ }
+
+ return 0;
+}
+#endif /* NO_AES */
+
+#ifndef NO_PKCS7_ENCRYPTED_DATA
+
+typedef struct {
+ const byte* content;
+ word32 contentSz;
+ int contentOID;
+ int encryptOID;
+ byte* encryptionKey;
+ word32 encryptionKeySz;
+ PKCS7Attrib* attribs;
+ word32 attribsSz;
+ const char* outFileName;
+} pkcs7EncryptedVector;
+
+
+int pkcs7encrypted_test(void)
+{
+ int ret = 0;
+ int i, testSz;
+ int encryptedSz, decodedSz, attribIdx;
+ PKCS7* pkcs7;
+ byte encrypted[2048];
+ byte decoded[2048];
+#ifdef PKCS7_OUTPUT_TEST_BUNDLES
+ XFILE pkcs7File;
+#endif
+
+ PKCS7Attrib* expectedAttrib;
+ PKCS7DecodedAttrib* decodedAttrib;
+
+ const byte data[] = { /* Hello World */
+ 0x48,0x65,0x6c,0x6c,0x6f,0x20,0x57,0x6f,
+ 0x72,0x6c,0x64
+ };
+
+#ifndef NO_DES3
+ byte desKey[] = {
+ 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef
+ };
+ byte des3Key[] = {
+ 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,
+ 0xfe,0xde,0xba,0x98,0x76,0x54,0x32,0x10,
+ 0x89,0xab,0xcd,0xef,0x01,0x23,0x45,0x67
+ };
+#endif
+
+#ifndef NO_AES
+#ifdef WOLFSSL_AES_128
+ byte aes128Key[] = {
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08
+ };
+#endif
+#ifdef WOLFSSL_AES_192
+ byte aes192Key[] = {
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08
+ };
+#endif
+#ifdef WOLFSSL_AES_256
+ byte aes256Key[] = {
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08
+ };
+#endif
+
+#ifdef WOLFSSL_AES_256
+ /* Attribute example from RFC 4134, Section 7.2
+ * OID = 1.2.5555
+ * OCTET STRING = 'This is a test General ASN Attribute, number 1.' */
+ static byte genAttrOid[] = { 0x06, 0x03, 0x2a, 0xab, 0x33 };
+ static byte genAttr[] = { 0x04, 47,
+ 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20,
+ 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x47,
+ 0x65, 0x6e, 0x65, 0x72, 0x61, 0x6c, 0x20, 0x41,
+ 0x53, 0x4e, 0x20, 0x41, 0x74, 0x74, 0x72, 0x69,
+ 0x62, 0x75, 0x74, 0x65, 0x2c, 0x20, 0x6e, 0x75,
+ 0x6d, 0x62, 0x65, 0x72, 0x20, 0x31, 0x2e };
+
+ static byte genAttrOid2[] = { 0x06, 0x03, 0x2a, 0xab, 0x34 };
+ static byte genAttr2[] = { 0x04, 47,
+ 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20,
+ 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x47,
+ 0x65, 0x6e, 0x65, 0x72, 0x61, 0x6c, 0x20, 0x41,
+ 0x53, 0x4e, 0x20, 0x41, 0x74, 0x74, 0x72, 0x69,
+ 0x62, 0x75, 0x74, 0x65, 0x2c, 0x20, 0x6e, 0x75,
+ 0x6d, 0x62, 0x65, 0x72, 0x20, 0x32, 0x2e };
+
+ PKCS7Attrib attribs[] =
+ {
+ { genAttrOid, sizeof(genAttrOid), genAttr, sizeof(genAttr) }
+ };
+
+ PKCS7Attrib multiAttribs[] =
+ {
+ { genAttrOid, sizeof(genAttrOid), genAttr, sizeof(genAttr) },
+ { genAttrOid2, sizeof(genAttrOid2), genAttr2, sizeof(genAttr2) }
+ };
+#endif
+#endif /* NO_AES */
+
+ const pkcs7EncryptedVector testVectors[] =
+ {
+#ifndef NO_DES3
+ {data, (word32)sizeof(data), DATA, DES3b, des3Key, sizeof(des3Key),
+ NULL, 0, "pkcs7encryptedDataDES3.der"},
+
+ {data, (word32)sizeof(data), DATA, DESb, desKey, sizeof(desKey),
+ NULL, 0, "pkcs7encryptedDataDES.der"},
+#endif /* NO_DES3 */
+
+#ifndef NO_AES
+ #ifdef WOLFSSL_AES_128
+ {data, (word32)sizeof(data), DATA, AES128CBCb, aes128Key,
+ sizeof(aes128Key), NULL, 0, "pkcs7encryptedDataAES128CBC.der"},
+ #endif
+ #ifdef WOLFSSL_AES_192
+ {data, (word32)sizeof(data), DATA, AES192CBCb, aes192Key,
+ sizeof(aes192Key), NULL, 0, "pkcs7encryptedDataAES192CBC.der"},
+ #endif
+ #ifdef WOLFSSL_AES_256
+ {data, (word32)sizeof(data), DATA, AES256CBCb, aes256Key,
+ sizeof(aes256Key), NULL, 0, "pkcs7encryptedDataAES256CBC.der"},
+
+ /* test with optional unprotected attributes */
+ {data, (word32)sizeof(data), DATA, AES256CBCb, aes256Key,
+ sizeof(aes256Key), attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7encryptedDataAES256CBC_attribs.der"},
+
+ /* test with multiple optional unprotected attributes */
+ {data, (word32)sizeof(data), DATA, AES256CBCb, aes256Key,
+ sizeof(aes256Key), multiAttribs,
+ (sizeof(multiAttribs)/sizeof(PKCS7Attrib)),
+ "pkcs7encryptedDataAES256CBC_multi_attribs.der"},
+
+ /* test with contentType set to FirmwarePkgData */
+ {data, (word32)sizeof(data), FIRMWARE_PKG_DATA, AES256CBCb, aes256Key,
+ sizeof(aes256Key), NULL, 0,
+ "pkcs7encryptedDataAES256CBC_firmwarePkgData.der"},
+ #endif
+#endif /* NO_AES */
+ };
+
+ testSz = sizeof(testVectors) / sizeof(pkcs7EncryptedVector);
+
+ for (i = 0; i < testSz; i++) {
+ pkcs7 = wc_PKCS7_New(HEAP_HINT, devId);
+ if (pkcs7 == NULL)
+ return -12000;
+
+ pkcs7->content = (byte*)testVectors[i].content;
+ pkcs7->contentSz = testVectors[i].contentSz;
+ pkcs7->contentOID = testVectors[i].contentOID;
+ pkcs7->encryptOID = testVectors[i].encryptOID;
+ pkcs7->encryptionKey = testVectors[i].encryptionKey;
+ pkcs7->encryptionKeySz = testVectors[i].encryptionKeySz;
+ pkcs7->unprotectedAttribs = testVectors[i].attribs;
+ pkcs7->unprotectedAttribsSz = testVectors[i].attribsSz;
+
+ /* encode encryptedData */
+ encryptedSz = wc_PKCS7_EncodeEncryptedData(pkcs7, encrypted,
+ sizeof(encrypted));
+ if (encryptedSz <= 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -12001;
+ }
+
+ /* decode encryptedData */
+#ifndef NO_PKCS7_STREAM
+ { /* test reading byte by byte */
+ int z;
+ for (z = 0; z < encryptedSz; z++) {
+ decodedSz = wc_PKCS7_DecodeEncryptedData(pkcs7, encrypted + z, 1,
+ decoded, sizeof(decoded));
+ if (decodedSz <= 0 && decodedSz != WC_PKCS7_WANT_READ_E) {
+ printf("unexpected error %d\n", decodedSz);
+ return -12002;
+ }
+ }
+ /* test decode result */
+ if (XMEMCMP(decoded, data, sizeof(data)) != 0) {
+ printf("stream read failed\n");
+ wc_PKCS7_Free(pkcs7);
+ return -12003;
+ }
+ }
+#endif
+ decodedSz = wc_PKCS7_DecodeEncryptedData(pkcs7, encrypted, encryptedSz,
+ decoded, sizeof(decoded));
+ if (decodedSz <= 0){
+ wc_PKCS7_Free(pkcs7);
+ return -12004;
+ }
+
+ /* test decode result */
+ if (XMEMCMP(decoded, data, sizeof(data)) != 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -12005;
+ }
+
+ /* verify decoded unprotected attributes */
+ if (pkcs7->decodedAttrib != NULL) {
+ decodedAttrib = pkcs7->decodedAttrib;
+ attribIdx = 1;
+
+ while (decodedAttrib != NULL) {
+
+ /* expected attribute, stored list is reversed */
+ expectedAttrib = &(pkcs7->unprotectedAttribs
+ [pkcs7->unprotectedAttribsSz - attribIdx]);
+
+ /* verify oid */
+ if (XMEMCMP(decodedAttrib->oid, expectedAttrib->oid,
+ decodedAttrib->oidSz) != 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -12006;
+ }
+
+ /* verify value */
+ if (XMEMCMP(decodedAttrib->value, expectedAttrib->value,
+ decodedAttrib->valueSz) != 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -12007;
+ }
+
+ decodedAttrib = decodedAttrib->next;
+ attribIdx++;
+ }
+ }
+
+#ifdef PKCS7_OUTPUT_TEST_BUNDLES
+ /* output pkcs7 envelopedData for external testing */
+ pkcs7File = XFOPEN(testVectors[i].outFileName, "wb");
+ if (!pkcs7File) {
+ wc_PKCS7_Free(pkcs7);
+ return -12008;
+ }
+
+ ret = (int)XFWRITE(encrypted, encryptedSz, 1, pkcs7File);
+ XFCLOSE(pkcs7File);
+
+ if (ret > 0)
+ ret = 0;
+#endif
+
+ wc_PKCS7_Free(pkcs7);
+ }
+
+ return ret;
+}
+
+#endif /* NO_PKCS7_ENCRYPTED_DATA */
+
+
+#if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA)
+
+typedef struct {
+ const byte* content;
+ word32 contentSz;
+ int contentOID;
+ const char* outFileName;
+} pkcs7CompressedVector;
+
+
+int pkcs7compressed_test(void)
+{
+ int ret = 0;
+ int i, testSz;
+ int compressedSz, decodedSz;
+ PKCS7* pkcs7;
+ byte compressed[2048];
+ byte decoded[2048];
+#ifdef PKCS7_OUTPUT_TEST_BUNDLES
+ XFILE pkcs7File;
+#endif
+
+ const byte data[] = { /* Hello World */
+ 0x48,0x65,0x6c,0x6c,0x6f,0x20,0x57,0x6f,
+ 0x72,0x6c,0x64
+ };
+
+ const pkcs7CompressedVector testVectors[] =
+ {
+ {data, (word32)sizeof(data), DATA,
+ "pkcs7compressedData_data_zlib.der"},
+ {data, (word32)sizeof(data), FIRMWARE_PKG_DATA,
+ "pkcs7compressedData_firmwarePkgData_zlib.der"},
+ };
+
+ testSz = sizeof(testVectors) / sizeof(pkcs7CompressedVector);
+
+ for (i = 0; i < testSz; i++) {
+ pkcs7 = wc_PKCS7_New(HEAP_HINT, devId);
+ if (pkcs7 == NULL)
+ return -12100;
+
+ pkcs7->content = (byte*)testVectors[i].content;
+ pkcs7->contentSz = testVectors[i].contentSz;
+ pkcs7->contentOID = testVectors[i].contentOID;
+
+ /* encode compressedData */
+ compressedSz = wc_PKCS7_EncodeCompressedData(pkcs7, compressed,
+ sizeof(compressed));
+ if (compressedSz <= 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -12101;
+ }
+
+ /* decode compressedData */
+ decodedSz = wc_PKCS7_DecodeCompressedData(pkcs7, compressed,
+ compressedSz, decoded,
+ sizeof(decoded));
+ if (decodedSz <= 0){
+ wc_PKCS7_Free(pkcs7);
+ return -12102;
+ }
+
+ /* test decode result */
+ if (XMEMCMP(decoded, testVectors[i].content,
+ testVectors[i].contentSz) != 0) {
+ wc_PKCS7_Free(pkcs7);
+ return -12103;
+ }
+
+ /* make sure content type is the same */
+ if (testVectors[i].contentOID != pkcs7->contentOID)
+ return -12104;
+
+#ifdef PKCS7_OUTPUT_TEST_BUNDLES
+ /* output pkcs7 compressedData for external testing */
+ pkcs7File = XFOPEN(testVectors[i].outFileName, "wb");
+ if (!pkcs7File) {
+ wc_PKCS7_Free(pkcs7);
+ return -12105;
+ }
+
+ ret = (int)XFWRITE(compressed, compressedSz, 1, pkcs7File);
+ XFCLOSE(pkcs7File);
+
+ if (ret > 0)
+ ret = 0;
+#endif
+
+ wc_PKCS7_Free(pkcs7);
+ }
+
+ return ret;
+} /* pkcs7compressed_test() */
+
+#endif /* HAVE_LIBZ */
+
+
+typedef struct {
+ const byte* content;
+ word32 contentSz;
+ int hashOID;
+ int signOID;
+ byte* privateKey;
+ word32 privateKeySz;
+ byte* cert;
+ size_t certSz;
+ byte* caCert;
+ size_t caCertSz;
+ PKCS7Attrib* signedAttribs;
+ word32 signedAttribsSz;
+ const char* outFileName;
+ int contentOID;
+ byte* contentType;
+ word32 contentTypeSz;
+ int sidType;
+ int encryptOID; /* for single-shot encrypt alg OID */
+ int encCompFlag; /* for single-shot. 1 = enc, 2 = comp, 3 = both*/
+ byte* encryptKey; /* for single-shot, encryptedData */
+ word32 encryptKeySz; /* for single-shot, encryptedData */
+ PKCS7Attrib* unprotectedAttribs; /* for single-shot, encryptedData */
+ word32 unprotectedAttribsSz; /* for single-shot, encryptedData */
+ word16 detachedSignature; /* generate detached signature (0:1) */
+} pkcs7SignedVector;
+
+
+static int pkcs7signed_run_vectors(
+ byte* rsaClientCertBuf, word32 rsaClientCertBufSz,
+ byte* rsaClientPrivKeyBuf, word32 rsaClientPrivKeyBufSz,
+ byte* rsaServerCertBuf, word32 rsaServerCertBufSz,
+ byte* rsaServerPrivKeyBuf, word32 rsaServerPrivKeyBufSz,
+ byte* rsaCaCertBuf, word32 rsaCaCertBufSz,
+ byte* rsaCaPrivKeyBuf, word32 rsaCaPrivKeyBufSz,
+ byte* eccClientCertBuf, word32 eccClientCertBufSz,
+ byte* eccClientPrivKeyBuf, word32 eccClientPrivKeyBufSz)
+{
+ int ret, testSz, i;
+ int encodedSz;
+ byte* out;
+ word32 outSz;
+ WC_RNG rng;
+ PKCS7* pkcs7;
+#ifdef PKCS7_OUTPUT_TEST_BUNDLES
+ XFILE file;
+#endif
+
+ const byte data[] = { /* Hello World */
+ 0x48,0x65,0x6c,0x6c,0x6f,0x20,0x57,0x6f,
+ 0x72,0x6c,0x64
+ };
+
+ static byte transIdOid[] =
{ 0x06, 0x0a, 0x60, 0x86, 0x48, 0x01, 0x86, 0xF8, 0x45, 0x01,
0x09, 0x07 };
- byte messageTypeOid[] =
+ static byte messageTypeOid[] =
{ 0x06, 0x0a, 0x60, 0x86, 0x48, 0x01, 0x86, 0xF8, 0x45, 0x01,
0x09, 0x02 };
- byte senderNonceOid[] =
+ static byte senderNonceOid[] =
{ 0x06, 0x0a, 0x60, 0x86, 0x48, 0x01, 0x86, 0xF8, 0x45, 0x01,
0x09, 0x05 };
- byte transId[(SHA_DIGEST_SIZE + 1) * 2 + 1];
- byte messageType[] = { 0x13, 2, '1', '9' };
- byte senderNonce[PKCS7_NONCE_SZ + 2];
+#ifndef NO_SHA
+ static byte transId[(WC_SHA_DIGEST_SIZE + 1) * 2 + 1];
+#else
+ static byte transId[(WC_SHA256_DIGEST_SIZE + 1) * 2 + 1];
+#endif
+ static byte messageType[] = { 0x13, 2, '1', '9' };
+ static byte senderNonce[PKCS7_NONCE_SZ + 2];
PKCS7Attrib attribs[] =
{
- { transIdOid, sizeof(transIdOid),
- transId, sizeof(transId) - 1 }, /* take off the null */
- { messageTypeOid, sizeof(messageTypeOid),
- messageType, sizeof(messageType) },
- { senderNonceOid, sizeof(senderNonceOid),
- senderNonce, sizeof(senderNonce) }
+ { transIdOid, sizeof(transIdOid), transId,
+ sizeof(transId) - 1 }, /* take off the null */
+ { messageTypeOid, sizeof(messageTypeOid), messageType,
+ sizeof(messageType) },
+ { senderNonceOid, sizeof(senderNonceOid), senderNonce,
+ sizeof(senderNonce) }
};
- dataSz = (word32) strlen(data);
+ /* for testing custom contentType, FirmwarePkgData */
+ byte customContentType[] = { 0x06, 0x0B, 0x2A, 0x86,
+ 0x48, 0x86, 0xF7, 0x0D,
+ 0x01, 0x09, 0x10, 0x01, 0x10 };
+
+ const pkcs7SignedVector testVectors[] =
+ {
+#ifndef NO_RSA
+ #ifndef NO_SHA
+ /* RSA with SHA */
+ {data, (word32)sizeof(data), SHAh, RSAk, rsaClientPrivKeyBuf,
+ rsaClientPrivKeyBufSz, rsaClientCertBuf, rsaClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedData_RSA_SHA.der", 0, NULL, 0, 0, 0, 0, NULL, 0, NULL,
+ 0, 0},
+
+ /* RSA with SHA, no signed attributes */
+ {data, (word32)sizeof(data), SHAh, RSAk, rsaClientPrivKeyBuf,
+ rsaClientPrivKeyBufSz, rsaClientCertBuf, rsaClientCertBufSz,
+ NULL, 0, NULL, 0,
+ "pkcs7signedData_RSA_SHA_noattr.der", 0, NULL, 0, 0, 0, 0, NULL, 0,
+ NULL, 0, 0},
+ #endif
+ #ifdef WOLFSSL_SHA224
+ /* RSA with SHA224 */
+ {data, (word32)sizeof(data), SHA224h, RSAk, rsaClientPrivKeyBuf,
+ rsaClientPrivKeyBufSz, rsaClientCertBuf, rsaClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedData_RSA_SHA224.der", 0, NULL, 0, 0, 0, 0, NULL, 0,
+ NULL, 0, 0},
+ #endif
+ #ifndef NO_SHA256
+ /* RSA with SHA256 */
+ {data, (word32)sizeof(data), SHA256h, RSAk, rsaClientPrivKeyBuf,
+ rsaClientPrivKeyBufSz, rsaClientCertBuf, rsaClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedData_RSA_SHA256.der", 0, NULL, 0, 0, 0, 0, NULL, 0,
+ NULL, 0, 0},
+
+ /* RSA with SHA256, detached signature */
+ {data, (word32)sizeof(data), SHA256h, RSAk, rsaClientPrivKeyBuf,
+ rsaClientPrivKeyBufSz, rsaClientCertBuf, rsaClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedData_RSA_SHA256_detachedSig.der", 0, NULL, 0, 0, 0, 0,
+ NULL, 0, NULL, 0, 1},
+
+ /* RSA with SHA256 and SubjectKeyIdentifier in SignerIdentifier */
+ {data, (word32)sizeof(data), SHA256h, RSAk, rsaClientPrivKeyBuf,
+ rsaClientPrivKeyBufSz, rsaClientCertBuf, rsaClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedData_RSA_SHA256_SKID.der", 0, NULL, 0, CMS_SKID, 0, 0,
+ NULL, 0, NULL, 0, 0},
+
+ /* RSA with SHA256 and custom contentType */
+ {data, (word32)sizeof(data), SHA256h, RSAk, rsaClientPrivKeyBuf,
+ rsaClientPrivKeyBufSz, rsaClientCertBuf, rsaClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedData_RSA_SHA256_custom_contentType.der", 0,
+ customContentType, sizeof(customContentType), 0, 0, 0, NULL, 0,
+ NULL, 0, 0},
+
+ /* RSA with SHA256 and FirmwarePkgData contentType */
+ {data, (word32)sizeof(data), SHA256h, RSAk, rsaClientPrivKeyBuf,
+ rsaClientPrivKeyBufSz, rsaClientCertBuf, rsaClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedData_RSA_SHA256_firmwarePkgData.der",
+ FIRMWARE_PKG_DATA, NULL, 0, 0, 0, 0, NULL, 0, NULL, 0, 0},
+
+ /* RSA with SHA256 using server cert and ca cert */
+ {data, (word32)sizeof(data), SHA256h, RSAk, rsaServerPrivKeyBuf,
+ rsaServerPrivKeyBufSz, rsaServerCertBuf, rsaServerCertBufSz,
+ rsaCaCertBuf, rsaCaCertBufSz,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedData_RSA_SHA256_with_ca_cert.der", 0, NULL, 0, 0, 0, 0,
+ NULL, 0, NULL, 0, 0},
+ #endif
+ #if defined(WOLFSSL_SHA384)
+ /* RSA with SHA384 */
+ {data, (word32)sizeof(data), SHA384h, RSAk, rsaClientPrivKeyBuf,
+ rsaClientPrivKeyBufSz, rsaClientCertBuf, rsaClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedData_RSA_SHA384.der", 0, NULL, 0, 0, 0, 0, NULL, 0,
+ NULL, 0, 0},
+ #endif
+ #if defined(WOLFSSL_SHA512)
+ /* RSA with SHA512 */
+ {data, (word32)sizeof(data), SHA512h, RSAk, rsaClientPrivKeyBuf,
+ rsaClientPrivKeyBufSz, rsaClientCertBuf, rsaClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedData_RSA_SHA512.der", 0, NULL, 0, 0, 0, 0, NULL, 0,
+ NULL, 0, 0},
+ #endif
+#endif /* NO_RSA */
+
+#ifdef HAVE_ECC
+ #ifndef NO_SHA
+ /* ECDSA with SHA */
+ {data, (word32)sizeof(data), SHAh, ECDSAk, eccClientPrivKeyBuf,
+ eccClientPrivKeyBufSz, eccClientCertBuf, eccClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedData_ECDSA_SHA.der", 0, NULL, 0, 0, 0, 0, NULL, 0,
+ NULL, 0, 0},
+
+ /* ECDSA with SHA, no signed attributes */
+ {data, (word32)sizeof(data), SHAh, ECDSAk, eccClientPrivKeyBuf,
+ eccClientPrivKeyBufSz, eccClientCertBuf, eccClientCertBufSz,
+ NULL, 0, NULL, 0,
+ "pkcs7signedData_ECDSA_SHA_noattr.der", 0, NULL, 0, 0, 0, 0, NULL, 0,
+ NULL, 0, 0},
+ #endif
+ #ifdef WOLFSSL_SHA224
+ /* ECDSA with SHA224 */
+ {data, (word32)sizeof(data), SHA224h, ECDSAk, eccClientPrivKeyBuf,
+ eccClientPrivKeyBufSz, eccClientCertBuf, eccClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedData_ECDSA_SHA224.der", 0, NULL, 0, 0, 0, 0, NULL, 0,
+ NULL, 0, 0},
+ #endif
+ #ifndef NO_SHA256
+ /* ECDSA with SHA256 */
+ {data, (word32)sizeof(data), SHA256h, ECDSAk, eccClientPrivKeyBuf,
+ eccClientPrivKeyBufSz, eccClientCertBuf, eccClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedData_ECDSA_SHA256.der", 0, NULL, 0, 0, 0, 0, NULL, 0,
+ NULL, 0, 0},
+
+ /* ECDSA with SHA256 and SubjectKeyIdentifier in SigherIdentifier */
+ {data, (word32)sizeof(data), SHA256h, ECDSAk, eccClientPrivKeyBuf,
+ eccClientPrivKeyBufSz, eccClientCertBuf, eccClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedData_ECDSA_SHA256_SKID.der", 0, NULL, 0, CMS_SKID, 0, 0,
+ NULL, 0, NULL, 0, 0},
+
+ /* ECDSA with SHA256 and custom contentType */
+ {data, (word32)sizeof(data), SHA256h, ECDSAk, eccClientPrivKeyBuf,
+ eccClientPrivKeyBufSz, eccClientCertBuf, eccClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedData_ECDSA_SHA256_custom_contentType.der", 0,
+ customContentType, sizeof(customContentType), 0, 0, 0, NULL, 0,
+ NULL, 0, 0},
+
+ /* ECDSA with SHA256 and FirmwarePkgData contentType */
+ {data, (word32)sizeof(data), SHA256h, ECDSAk, eccClientPrivKeyBuf,
+ eccClientPrivKeyBufSz, eccClientCertBuf, eccClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedData_ECDSA_SHA256_firmwarePkgData.der",
+ FIRMWARE_PKG_DATA, NULL, 0, 0, 0, 0, NULL, 0, NULL, 0, 0},
+ #endif
+ #ifdef WOLFSSL_SHA384
+ /* ECDSA with SHA384 */
+ {data, (word32)sizeof(data), SHA384h, ECDSAk, eccClientPrivKeyBuf,
+ eccClientPrivKeyBufSz, eccClientCertBuf, eccClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedData_ECDSA_SHA384.der", 0, NULL, 0, 0, 0, 0, NULL, 0,
+ NULL, 0, 0},
+ #endif
+ #ifdef WOLFSSL_SHA512
+ /* ECDSA with SHA512 */
+ {data, (word32)sizeof(data), SHA512h, ECDSAk, eccClientPrivKeyBuf,
+ eccClientPrivKeyBufSz, eccClientCertBuf, eccClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedData_ECDSA_SHA512.der", 0, NULL, 0, 0, 0, 0, NULL, 0,
+ NULL, 0, 0},
+ #endif
+#endif /* HAVE_ECC */
+ };
+
+ testSz = sizeof(testVectors) / sizeof(pkcs7SignedVector);
+
outSz = FOURK_BUF;
+ out = (byte*)XMALLOC(outSz, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (out == NULL)
+ return -12106;
+
+ XMEMSET(out, 0, outSz);
- certDer = (byte*)malloc(FOURK_BUF);
- if (certDer == NULL)
- return -207;
- keyDer = (byte*)malloc(FOURK_BUF);
- if (keyDer == NULL) {
- free(certDer);
- return -208;
+ ret = wc_PKCS7_PadData((byte*)data, sizeof(data), out, outSz, 16);
+ if (ret < 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ return -12107;
}
- out = (byte*)malloc(FOURK_BUF);
- if (out == NULL) {
- free(certDer);
- free(keyDer);
- return -209;
+
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
+ ret = wc_InitRng(&rng);
+#endif
+ if (ret != 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ return -12108;
}
- /* read in DER cert of recipient, into cert of size certSz */
- file = fopen(clientCert, "rb");
- if (!file) {
- free(certDer);
- free(keyDer);
- free(out);
- err_sys("can't open ./certs/client-cert.der, "
- "Please run from wolfSSL home dir", -44);
- return -44;
+ for (i = 0; i < testSz; i++) {
+ pkcs7 = wc_PKCS7_New(HEAP_HINT, devId);
+ if (pkcs7 == NULL)
+ return -12109;
+
+ ret = wc_PKCS7_InitWithCert(pkcs7, testVectors[i].cert,
+ (word32)testVectors[i].certSz);
+
+ if (ret != 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12110;
+ }
+
+ /* load CA certificate, if present */
+ if (testVectors[i].caCert != NULL) {
+ ret = wc_PKCS7_AddCertificate(pkcs7, testVectors[i].caCert,
+ (word32)testVectors[i].caCertSz);
+ if (ret != 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12111;
+ }
+ }
+
+ pkcs7->rng = &rng;
+ pkcs7->content = (byte*)testVectors[i].content;
+ pkcs7->contentSz = testVectors[i].contentSz;
+ pkcs7->contentOID = testVectors[i].contentOID;
+ pkcs7->hashOID = testVectors[i].hashOID;
+ pkcs7->encryptOID = testVectors[i].signOID;
+ pkcs7->privateKey = testVectors[i].privateKey;
+ pkcs7->privateKeySz = testVectors[i].privateKeySz;
+ pkcs7->signedAttribs = testVectors[i].signedAttribs;
+ pkcs7->signedAttribsSz = testVectors[i].signedAttribsSz;
+
+ /* optional custom contentType, default is DATA,
+ overrides contentOID if set */
+ if (testVectors[i].contentType != NULL) {
+ ret = wc_PKCS7_SetContentType(pkcs7, testVectors[i].contentType,
+ testVectors[i].contentTypeSz);
+ if (ret != 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12112;
+ }
+ }
+
+ /* set SignerIdentifier to use SubjectKeyIdentifier if desired,
+ default is IssuerAndSerialNumber */
+ if (testVectors[i].sidType == CMS_SKID) {
+ ret = wc_PKCS7_SetSignerIdentifierType(pkcs7, CMS_SKID);
+ if (ret != 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12113;
+ }
+ }
+
+ /* generate senderNonce */
+ {
+ senderNonce[0] = 0x04;
+ senderNonce[1] = PKCS7_NONCE_SZ;
+
+ ret = wc_RNG_GenerateBlock(&rng, &senderNonce[2], PKCS7_NONCE_SZ);
+ if (ret != 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12114;
+ }
+ }
+
+ /* generate transactionID (used with SCEP) */
+ {
+ #ifndef NO_SHA
+ wc_Sha sha;
+ byte digest[WC_SHA_DIGEST_SIZE];
+ #else
+ wc_Sha256 sha;
+ byte digest[WC_SHA256_DIGEST_SIZE];
+ #endif
+ int j,k;
+
+ transId[0] = 0x13;
+ transId[1] = sizeof(digest) * 2;
+
+ #ifndef NO_SHA
+ ret = wc_InitSha_ex(&sha, HEAP_HINT, devId);
+ if (ret != 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12115;
+ }
+ wc_ShaUpdate(&sha, pkcs7->publicKey, pkcs7->publicKeySz);
+ wc_ShaFinal(&sha, digest);
+ wc_ShaFree(&sha);
+ #else
+ ret = wc_InitSha256_ex(&sha, HEAP_HINT, devId);
+ if (ret != 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12116;
+ }
+ wc_Sha256Update(&sha, pkcs7->publicKey, pkcs7->publicKeySz);
+ wc_Sha256Final(&sha, digest);
+ wc_Sha256Free(&sha);
+ #endif
+
+ for (j = 0, k = 2; j < (int)sizeof(digest); j++, k += 2) {
+ XSNPRINTF((char*)&transId[k], 3, "%02x", digest[j]);
+ }
+ }
+
+ /* enable detached signature generation, if set */
+ if (testVectors[i].detachedSignature == 1) {
+ ret = wc_PKCS7_SetDetached(pkcs7, 1);
+ if (ret != 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12117;
+ }
+ }
+
+ encodedSz = wc_PKCS7_EncodeSignedData(pkcs7, out, outSz);
+ if (encodedSz < 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12118;
+ }
+
+ #ifdef PKCS7_OUTPUT_TEST_BUNDLES
+ /* write PKCS#7 to output file for more testing */
+ file = XFOPEN(testVectors[i].outFileName, "wb");
+ if (!file) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12119;
+ }
+ ret = (int)XFWRITE(out, 1, encodedSz, file);
+ XFCLOSE(file);
+ if (ret != (int)encodedSz) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12120;
+ }
+ #endif /* PKCS7_OUTPUT_TEST_BUNDLES */
+
+ wc_PKCS7_Free(pkcs7);
+
+ pkcs7 = wc_PKCS7_New(HEAP_HINT, devId);
+ if (pkcs7 == NULL)
+ return -12121;
+ wc_PKCS7_InitWithCert(pkcs7, NULL, 0);
+
+ if (testVectors[i].detachedSignature == 1) {
+ /* set content for verifying detached signatures */
+ pkcs7->content = (byte*)testVectors[i].content;
+ pkcs7->contentSz = testVectors[i].contentSz;
+ }
+
+ ret = wc_PKCS7_VerifySignedData(pkcs7, out, outSz);
+ if (ret < 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12122;
+ }
+
+ /* verify contentType extracted successfully for custom content types */
+ if (testVectors[i].contentTypeSz > 0) {
+ if (pkcs7->contentTypeSz != testVectors[i].contentTypeSz) {
+ return -12123;
+ } else if (XMEMCMP(pkcs7->contentType, testVectors[i].contentType,
+ pkcs7->contentTypeSz) != 0) {
+ return -12124;
+ }
+ }
+
+ if (pkcs7->singleCert == NULL || pkcs7->singleCertSz == 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12125;
+ }
+
+ {
+ /* check getting signed attributes */
+ #ifndef NO_SHA
+ byte buf[(WC_SHA_DIGEST_SIZE + 1) * 2 + 1];
+ #else
+ byte buf[(WC_SHA256_DIGEST_SIZE + 1) * 2 + 1];
+ #endif
+ byte* oidPt = transIdOid + 2; /* skip object id tag and size */
+ int oidSz = (int)sizeof(transIdOid) - 2;
+ int bufSz = 0;
+
+ if (testVectors[i].signedAttribs != NULL &&
+ wc_PKCS7_GetAttributeValue(pkcs7, oidPt, oidSz,
+ NULL, (word32*)&bufSz) != LENGTH_ONLY_E) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12126;
+ }
+
+ if (bufSz > (int)sizeof(buf)) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12127;
+ }
+
+ bufSz = wc_PKCS7_GetAttributeValue(pkcs7, oidPt, oidSz,
+ buf, (word32*)&bufSz);
+ if ((testVectors[i].signedAttribs != NULL && bufSz < 0) ||
+ (testVectors[i].signedAttribs == NULL && bufSz > 0)) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12128;
+ }
+ }
+
+ #ifdef PKCS7_OUTPUT_TEST_BUNDLES
+ file = XFOPEN("./pkcs7cert.der", "wb");
+ if (!file) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12129;
+ }
+ ret = (int)XFWRITE(pkcs7->singleCert, 1, pkcs7->singleCertSz, file);
+ XFCLOSE(file);
+ #endif /* PKCS7_OUTPUT_TEST_BUNDLES */
+
+ wc_PKCS7_Free(pkcs7);
}
- certDerSz = (word32)fread(certDer, 1, FOURK_BUF, file);
- fclose(file);
- file = fopen(clientKey, "rb");
- if (!file) {
- free(certDer);
- free(keyDer);
- free(out);
- err_sys("can't open ./certs/client-key.der, "
- "Please run from wolfSSL home dir", -45);
- return -45;
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_FreeRng(&rng);
+
+ if (ret > 0)
+ return 0;
+
+ (void)rsaClientCertBuf;
+ (void)rsaClientCertBufSz;
+ (void)rsaClientPrivKeyBuf;
+ (void)rsaClientPrivKeyBufSz;
+ (void)rsaServerCertBuf;
+ (void)rsaServerCertBufSz;
+ (void)rsaServerPrivKeyBuf;
+ (void)rsaServerPrivKeyBufSz;
+ (void)rsaCaCertBuf;
+ (void)rsaCaCertBufSz;
+ (void)rsaCaPrivKeyBuf;
+ (void)rsaCaPrivKeyBufSz;
+ (void)eccClientCertBuf;
+ (void)eccClientCertBufSz;
+ (void)eccClientPrivKeyBuf;
+ (void)eccClientPrivKeyBufSz;
+
+ return ret;
+}
+
+
+static int pkcs7signed_run_SingleShotVectors(
+ byte* rsaClientCertBuf, word32 rsaClientCertBufSz,
+ byte* rsaClientPrivKeyBuf, word32 rsaClientPrivKeyBufSz,
+ byte* rsaServerCertBuf, word32 rsaServerCertBufSz,
+ byte* rsaServerPrivKeyBuf, word32 rsaServerPrivKeyBufSz,
+ byte* rsaCaCertBuf, word32 rsaCaCertBufSz,
+ byte* rsaCaPrivKeyBuf, word32 rsaCaPrivKeyBufSz,
+ byte* eccClientCertBuf, word32 eccClientCertBufSz,
+ byte* eccClientPrivKeyBuf, word32 eccClientPrivKeyBufSz)
+{
+ int ret, testSz, i;
+ int encodedSz;
+ byte* out;
+ word32 outSz;
+ WC_RNG rng;
+ PKCS7* pkcs7;
+#ifdef PKCS7_OUTPUT_TEST_BUNDLES
+ XFILE file;
+#endif
+
+ const byte data[] = { /* Hello World */
+ 0x48,0x65,0x6c,0x6c,0x6f,0x20,0x57,0x6f,
+ 0x72,0x6c,0x64
+ };
+
+#if defined(WOLFSSL_AES_256) && !defined(NO_PKCS7_ENCRYPTED_DATA)
+ byte aes256Key[] = {
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08
+ };
+#endif
+
+ static byte messageTypeOid[] =
+ { 0x06, 0x0a, 0x60, 0x86, 0x48, 0x01, 0x86, 0xF8, 0x45, 0x01,
+ 0x09, 0x02 };
+ static byte messageType[] = { 0x13, 2, '1', '9' };
+
+ PKCS7Attrib attribs[] =
+ {
+ { messageTypeOid, sizeof(messageTypeOid), messageType,
+ sizeof(messageType) },
+ };
+
+ const pkcs7SignedVector testVectors[] =
+ {
+#ifndef NO_RSA
+ #ifndef NO_SHA256
+ /* Signed FirmwarePkgData, RSA, SHA256, no attribs */
+ {data, (word32)sizeof(data), SHA256h, RSAk, rsaClientPrivKeyBuf,
+ rsaClientPrivKeyBufSz, rsaClientCertBuf, rsaClientCertBufSz, NULL, 0,
+ NULL, 0,
+ "pkcs7signedFirmwarePkgData_RSA_SHA256_noattr.der", 0, NULL, 0, 0,
+ 0, 0, NULL, 0, NULL, 0, 0},
+
+ /* Signed FirmwarePkgData, RSA, SHA256, attrs */
+ {data, (word32)sizeof(data), SHA256h, RSAk, rsaClientPrivKeyBuf,
+ rsaClientPrivKeyBufSz, rsaClientCertBuf, rsaClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedFirmwarePkgData_RSA_SHA256.der", 0, NULL, 0, 0, 0, 0,
+ NULL, 0, NULL, 0, 0},
+
+ /* Signed FirmwarePkgData, RSA, SHA256, SubjectKeyIdentifier, attrs */
+ {data, (word32)sizeof(data), SHA256h, RSAk, rsaClientPrivKeyBuf,
+ rsaClientPrivKeyBufSz, rsaClientCertBuf, rsaClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedFirmwarePkgData_RSA_SHA256_SKID.der", 0, NULL,
+ 0, CMS_SKID, 0, 0, NULL, 0, NULL, 0, 0},
+
+ /* Signed FirmwraePkgData, RSA, SHA256, server cert and ca cert, attr */
+ {data, (word32)sizeof(data), SHA256h, RSAk, rsaServerPrivKeyBuf,
+ rsaServerPrivKeyBufSz, rsaServerCertBuf, rsaServerCertBufSz,
+ rsaCaCertBuf, rsaCaCertBufSz,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedFirmwarePkgData_RSA_SHA256_with_ca_cert.der", 0, NULL,
+ 0, 0, 0, 0, NULL, 0, NULL, 0, 0},
+
+ #if defined(WOLFSSL_AES_256) && !defined(NO_PKCS7_ENCRYPTED_DATA)
+ /* Signed Encrypted FirmwarePkgData, RSA, SHA256, no attribs */
+ {data, (word32)sizeof(data), SHA256h, RSAk, rsaClientPrivKeyBuf,
+ rsaClientPrivKeyBufSz, rsaClientCertBuf, rsaClientCertBufSz, NULL, 0,
+ NULL, 0,
+ "pkcs7signedEncryptedFirmwarePkgData_RSA_SHA256_noattr.der", 0,
+ NULL, 0, 0, AES256CBCb, 1, aes256Key, sizeof(aes256Key), NULL, 0, 0},
+
+ /* Signed Encrypted FirmwarePkgData, RSA, SHA256, attribs */
+ {data, (word32)sizeof(data), SHA256h, RSAk, rsaClientPrivKeyBuf,
+ rsaClientPrivKeyBufSz, rsaClientCertBuf, rsaClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedEncryptedFirmwarePkgData_RSA_SHA256.der", 0,
+ NULL, 0, 0, AES256CBCb, 1, aes256Key, sizeof(aes256Key),
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)), 0},
+ #endif /* WOLFSSL_AES_256 && !NO_PKCS7_ENCRYPTED_DATA */
+
+ #if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA)
+ /* Signed Compressed FirmwarePkgData, RSA, SHA256, no attribs */
+ {data, (word32)sizeof(data), SHA256h, RSAk, rsaClientPrivKeyBuf,
+ rsaClientPrivKeyBufSz, rsaClientCertBuf, rsaClientCertBufSz, NULL, 0,
+ NULL, 0,
+ "pkcs7signedCompressedFirmwarePkgData_RSA_SHA256_noattr.der", 0,
+ NULL, 0, 0, 0, 2, NULL, 0, NULL, 0, 0},
+
+ /* Signed Compressed FirmwarePkgData, RSA, SHA256, attribs */
+ {data, (word32)sizeof(data), SHA256h, RSAk, rsaClientPrivKeyBuf,
+ rsaClientPrivKeyBufSz, rsaClientCertBuf, rsaClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedCompressedFirmwarePkgData_RSA_SHA256.der", 0,
+ NULL, 0, 0, 0, 2, NULL, 0, NULL, 0, 0},
+
+ #ifndef NO_PKCS7_ENCRYPTED_DATA
+ /* Signed Encrypted Compressed FirmwarePkgData, RSA, SHA256,
+ no attribs */
+ {data, (word32)sizeof(data), SHA256h, RSAk, rsaClientPrivKeyBuf,
+ rsaClientPrivKeyBufSz, rsaClientCertBuf, rsaClientCertBufSz, NULL, 0,
+ NULL, 0,
+ "pkcs7signedEncryptedCompressedFirmwarePkgData_RSA_SHA256_noattr.der",
+ 0, NULL, 0, 0, AES256CBCb, 3, aes256Key, sizeof(aes256Key), NULL,
+ 0, 0},
+
+ /* Signed Encrypted Compressed FirmwarePkgData, RSA, SHA256,
+ attribs */
+ {data, (word32)sizeof(data), SHA256h, RSAk, rsaClientPrivKeyBuf,
+ rsaClientPrivKeyBufSz, rsaClientCertBuf, rsaClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedEncryptedCompressedFirmwarePkgData_RSA_SHA256.der",
+ 0, NULL, 0, 0, AES256CBCb, 3, aes256Key, sizeof(aes256Key),
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)), 0},
+ #endif /* !NO_PKCS7_ENCRYPTED_DATA */
+
+ #endif /* HAVE_LIBZ && !NO_PKCS7_COMPRESSED_DATA */
+
+ #endif /* NO_SHA256 */
+#endif /* NO_RSA */
+
+#ifdef HAVE_ECC
+ #ifndef NO_SHA256
+ /* Signed FirmwarePkgData, ECDSA, SHA256, no attribs */
+ {data, (word32)sizeof(data), SHA256h, ECDSAk, eccClientPrivKeyBuf,
+ eccClientPrivKeyBufSz, eccClientCertBuf, eccClientCertBufSz, NULL, 0,
+ NULL, 0,
+ "pkcs7signedFirmwarePkgData_ECDSA_SHA256_noattr.der", 0, NULL,
+ 0, 0, 0, 0, NULL, 0, NULL, 0, 0},
+
+ /* Signed FirmwarePkgData, ECDSA, SHA256, attribs */
+ {data, (word32)sizeof(data), SHA256h, ECDSAk, eccClientPrivKeyBuf,
+ eccClientPrivKeyBufSz, eccClientCertBuf, eccClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedFirmwarePkgData_ECDSA_SHA256.der", 0, NULL,
+ 0, 0, 0, 0, NULL, 0, NULL, 0, 0},
+
+ /* Signed FirmwarePkgData, ECDSA, SHA256, SubjectKeyIdentifier, attr */
+ {data, (word32)sizeof(data), SHA256h, ECDSAk, eccClientPrivKeyBuf,
+ eccClientPrivKeyBufSz, eccClientCertBuf, eccClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedFirmwarePkgData_ECDSA_SHA256_SKID.der", 0, NULL,
+ 0, CMS_SKID, 0, 0, NULL, 0, NULL, 0, 0},
+
+ #if defined(WOLFSSL_AES_256) && !defined(NO_PKCS7_ENCRYPTED_DATA)
+ /* Signed Encrypted FirmwarePkgData, ECDSA, SHA256, no attribs */
+ {data, (word32)sizeof(data), SHA256h, ECDSAk, eccClientPrivKeyBuf,
+ eccClientPrivKeyBufSz, eccClientCertBuf, eccClientCertBufSz, NULL, 0,
+ NULL, 0,
+ "pkcs7signedEncryptedFirmwarePkgData_ECDSA_SHA256_noattr.der", 0, NULL,
+ 0, 0, AES256CBCb, 1, aes256Key, sizeof(aes256Key), NULL, 0, 0},
+
+ /* Signed Encrypted FirmwarePkgData, ECDSA, SHA256, attribs */
+ {data, (word32)sizeof(data), SHA256h, ECDSAk, eccClientPrivKeyBuf,
+ eccClientPrivKeyBufSz, eccClientCertBuf, eccClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedEncryptedFirmwarePkgData_ECDSA_SHA256.der", 0, NULL,
+ 0, 0, AES256CBCb, 1, aes256Key, sizeof(aes256Key),
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)), 0},
+ #endif /* WOLFSSL_AES_256 && !NO_PKCS7_ENCRYPTED_DATA */
+
+ #if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA)
+ /* Signed Compressed FirmwarePkgData, ECDSA, SHA256, no attribs */
+ {data, (word32)sizeof(data), SHA256h, ECDSAk, eccClientPrivKeyBuf,
+ eccClientPrivKeyBufSz, eccClientCertBuf, eccClientCertBufSz, NULL, 0,
+ NULL, 0,
+ "pkcs7signedCompressedFirmwarePkgData_ECDSA_SHA256_noattr.der", 0, NULL,
+ 0, 0, 0, 2, NULL, 0, NULL, 0, 0},
+
+ /* Signed Compressed FirmwarePkgData, ECDSA, SHA256, attrib */
+ {data, (word32)sizeof(data), SHA256h, ECDSAk, eccClientPrivKeyBuf,
+ eccClientPrivKeyBufSz, eccClientCertBuf, eccClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedCompressedFirmwarePkgData_ECDSA_SHA256.der", 0, NULL,
+ 0, 0, 0, 2, NULL, 0, NULL, 0, 0},
+
+ #ifndef NO_PKCS7_ENCRYPTED_DATA
+ /* Signed Encrypted Compressed FirmwarePkgData, ECDSA, SHA256,
+ no attribs */
+ {data, (word32)sizeof(data), SHA256h, ECDSAk, eccClientPrivKeyBuf,
+ eccClientPrivKeyBufSz, eccClientCertBuf, eccClientCertBufSz, NULL, 0,
+ NULL, 0,
+ "pkcs7signedEncryptedCompressedFirmwarePkgData_ECDSA_SHA256_noattr.der",
+ 0, NULL, 0, 0, AES256CBCb, 3, aes256Key, sizeof(aes256Key), NULL,
+ 0, 0},
+
+ /* Signed Encrypted Compressed FirmwarePkgData, ECDSA, SHA256,
+ attribs */
+ {data, (word32)sizeof(data), SHA256h, ECDSAk, eccClientPrivKeyBuf,
+ eccClientPrivKeyBufSz, eccClientCertBuf, eccClientCertBufSz, NULL, 0,
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)),
+ "pkcs7signedEncryptedCompressedFirmwarePkgData_ECDSA_SHA256.der",
+ 0, NULL, 0, 0, AES256CBCb, 3, aes256Key, sizeof(aes256Key),
+ attribs, (sizeof(attribs)/sizeof(PKCS7Attrib)), 0},
+ #endif /* !NO_PKCS7_ENCRYPTED_DATA */
+
+ #endif /* HAVE_LIBZ && !NO_PKCS7_COMPRESSED_DATA */
+
+ #endif /* NO_SHA256 */
+#endif /* HAVE_ECC */
+ };
+
+ testSz = sizeof(testVectors) / sizeof(pkcs7SignedVector);
+
+ outSz = FOURK_BUF;
+ out = (byte*)XMALLOC(outSz, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (out == NULL)
+ return -12130;
+
+ XMEMSET(out, 0, outSz);
+
+ ret = wc_PKCS7_PadData((byte*)data, sizeof(data), out, outSz, 16);
+ if (ret < 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ return -12131;
}
- keyDerSz = (word32)fread(keyDer, 1, FOURK_BUF, file);
- fclose(file);
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
ret = wc_InitRng(&rng);
+#endif
if (ret != 0) {
- free(certDer);
- free(keyDer);
- free(out);
- return -210;
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ return -12132;
}
- senderNonce[0] = 0x04;
- senderNonce[1] = PKCS7_NONCE_SZ;
+ for (i = 0; i < testSz; i++) {
+ pkcs7 = wc_PKCS7_New(HEAP_HINT, devId);
+ if (pkcs7 == NULL)
+ return -12133;
- ret = wc_RNG_GenerateBlock(&rng, &senderNonce[2], PKCS7_NONCE_SZ);
- if (ret != 0) {
- free(certDer);
- free(keyDer);
- free(out);
- return -211;
- }
+ ret = wc_PKCS7_InitWithCert(pkcs7, testVectors[i].cert,
+ (word32)testVectors[i].certSz);
- wc_PKCS7_InitWithCert(&msg, certDer, certDerSz);
- msg.privateKey = keyDer;
- msg.privateKeySz = keyDerSz;
- msg.content = (byte*)data;
- msg.contentSz = dataSz;
- msg.hashOID = SHAh;
- msg.encryptOID = RSAk;
- msg.signedAttribs = attribs;
- msg.signedAttribsSz = sizeof(attribs)/sizeof(PKCS7Attrib);
- msg.rng = &rng;
- {
- Sha sha;
- byte digest[SHA_DIGEST_SIZE];
- int i,j;
+ if (ret != 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12134;
+ }
- transId[0] = 0x13;
- transId[1] = SHA_DIGEST_SIZE * 2;
+ /* load CA certificate, if present */
+ if (testVectors[i].caCert != NULL) {
+ ret = wc_PKCS7_AddCertificate(pkcs7, testVectors[i].caCert,
+ (word32)testVectors[i].caCertSz);
+ if (ret != 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12135;
+ }
+ }
- ret = wc_InitSha(&sha);
- if (ret != 0) {
- free(certDer);
- free(keyDer);
- free(out);
- return -4003;
+ /* set SignerIdentifier to use SubjectKeyIdentifier if desired,
+ default is IssuerAndSerialNumber */
+ if (testVectors[i].sidType == CMS_SKID) {
+ ret = wc_PKCS7_SetSignerIdentifierType(pkcs7, CMS_SKID);
+ if (ret != 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12136;
+ }
}
- wc_ShaUpdate(&sha, msg.publicKey, msg.publicKeySz);
- wc_ShaFinal(&sha, digest);
- for (i = 0, j = 2; i < SHA_DIGEST_SIZE; i++, j += 2) {
- snprintf((char*)&transId[j], 3, "%02x", digest[i]);
+ if (testVectors[i].encCompFlag == 0) {
+
+ /* encode Signed FirmwarePkgData */
+ encodedSz = wc_PKCS7_EncodeSignedFPD(pkcs7,
+ testVectors[i].privateKey, testVectors[i].privateKeySz,
+ testVectors[i].signOID, testVectors[i].hashOID,
+ (byte*)testVectors[i].content, testVectors[i].contentSz,
+ testVectors[i].signedAttribs,
+ testVectors[i].signedAttribsSz, out, outSz);
+
+ if (encodedSz < 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12137;
+ }
+
+ #ifndef NO_PKCS7_ENCRYPTED_DATA
+
+ } else if (testVectors[i].encCompFlag == 1) {
+
+ /* encode Signed Encrypted FirmwarePkgData */
+ encodedSz = wc_PKCS7_EncodeSignedEncryptedFPD(pkcs7,
+ testVectors[i].encryptKey, testVectors[i].encryptKeySz,
+ testVectors[i].privateKey, testVectors[i].privateKeySz,
+ testVectors[i].encryptOID, testVectors[i].signOID,
+ testVectors[i].hashOID, (byte*)testVectors[i].content,
+ testVectors[i].contentSz, testVectors[i].unprotectedAttribs,
+ testVectors[i].unprotectedAttribsSz,
+ testVectors[i].signedAttribs,
+ testVectors[i].signedAttribsSz, out, outSz);
+
+ if (encodedSz <= 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12138;
+ }
+ #endif
+
+ #if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA)
+ } else if (testVectors[i].encCompFlag == 2) {
+
+ /* encode Signed Compressed FirmwarePkgData */
+ encodedSz = wc_PKCS7_EncodeSignedCompressedFPD(pkcs7,
+ testVectors[i].privateKey, testVectors[i].privateKeySz,
+ testVectors[i].signOID, testVectors[i].hashOID,
+ (byte*)testVectors[i].content, testVectors[i].contentSz,
+ testVectors[i].signedAttribs,
+ testVectors[i].signedAttribsSz, out, outSz);
+
+ if (encodedSz <= 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12139;
+ }
+
+ #ifndef NO_PKCS7_ENCRYPTED_DATA
+ } else if (testVectors[i].encCompFlag == 3) {
+
+ /* encode Signed Encrypted Compressed FirmwarePkgData */
+ encodedSz = wc_PKCS7_EncodeSignedEncryptedCompressedFPD(pkcs7,
+ testVectors[i].encryptKey, testVectors[i].encryptKeySz,
+ testVectors[i].privateKey, testVectors[i].privateKeySz,
+ testVectors[i].encryptOID, testVectors[i].signOID,
+ testVectors[i].hashOID, (byte*)testVectors[i].content,
+ testVectors[i].contentSz, testVectors[i].unprotectedAttribs,
+ testVectors[i].unprotectedAttribsSz,
+ testVectors[i].signedAttribs,
+ testVectors[i].signedAttribsSz, out, outSz);
+
+ if (encodedSz <= 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12140;
+ }
+
+ #endif /* NO_PKCS7_ENCRYPTED_DATA */
+ #endif /* HAVE_LIBZ && !NO_PKCS7_COMPRESSED_DATA */
+
+ } else {
+ /* unsupported SignedData single-shot combination */
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12141;
+ }
+
+ #ifdef PKCS7_OUTPUT_TEST_BUNDLES
+ /* write PKCS#7 to output file for more testing */
+ file = XFOPEN(testVectors[i].outFileName, "wb");
+ if (!file) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12142;
+ }
+ ret = (int)XFWRITE(out, 1, encodedSz, file);
+ XFCLOSE(file);
+ if (ret != (int)encodedSz) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12143;
+ }
+ #endif /* PKCS7_OUTPUT_TEST_BUNDLES */
+
+ wc_PKCS7_Free(pkcs7);
+
+ pkcs7 = wc_PKCS7_New(HEAP_HINT, devId);
+ if (pkcs7 == NULL)
+ return -12144;
+ wc_PKCS7_InitWithCert(pkcs7, NULL, 0);
+
+ ret = wc_PKCS7_VerifySignedData(pkcs7, out, outSz);
+ if (ret < 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12145;
+ }
+#ifndef NO_PKCS7_STREAM
+ {
+ word32 z;
+ for (z = 0; z < outSz && ret != 0; z++) {
+ ret = wc_PKCS7_VerifySignedData(pkcs7, out + z, 1);
+ if (ret < 0 && ret != WC_PKCS7_WANT_READ_E) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ printf("unexpected error %d\n", ret);
+ return -12146;
+ }
+ }
+ }
+#endif
+
+ if (pkcs7->singleCert == NULL || pkcs7->singleCertSz == 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12147;
}
+
+ if (testVectors[i].encCompFlag == 0) {
+ /* verify decoded content matches expected */
+ if ((pkcs7->contentSz != testVectors[i].contentSz) ||
+ XMEMCMP(pkcs7->content, testVectors[i].content,
+ pkcs7->contentSz)) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12148;
+ }
+
+ }
+ #ifndef NO_PKCS7_ENCRYPTED_DATA
+ else if (testVectors[i].encCompFlag == 1) {
+
+ /* decrypt inner encryptedData */
+ pkcs7->encryptionKey = testVectors[i].encryptKey;
+ pkcs7->encryptionKeySz = testVectors[i].encryptKeySz;
+
+ ret = wc_PKCS7_DecodeEncryptedData(pkcs7, pkcs7->content,
+ pkcs7->contentSz, out, outSz);
+ if (ret < 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12149;
+ }
+
+ /* compare decrypted to expected */
+ if (((word32)ret != testVectors[i].contentSz) ||
+ XMEMCMP(out, testVectors[i].content, ret)) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12150;
+ }
+ }
+ #endif
+ #if defined(HAVE_LIBZ) && !defined(NO_PKCS7_COMPRESSED_DATA)
+ else if (testVectors[i].encCompFlag == 2) {
+
+ /* decompress inner compressedData */
+ ret = wc_PKCS7_DecodeCompressedData(pkcs7, pkcs7->content,
+ pkcs7->contentSz, out, outSz);
+ if (ret < 0) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12151;
+ }
+
+ /* compare decompressed to expected */
+ if (((word32)ret != testVectors[i].contentSz) ||
+ XMEMCMP(out, testVectors[i].content, ret)) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12152;
+ }
+ }
+ #ifndef NO_PKCS7_ENCRYPTED_DATA
+ else if (testVectors[i].encCompFlag == 3) {
+
+ byte* encryptedTmp;
+ int encryptedTmpSz;
+
+ encryptedTmpSz = FOURK_BUF;
+ encryptedTmp = (byte*)XMALLOC(encryptedTmpSz, HEAP_HINT,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (encryptedTmp == NULL) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12153;
+ }
+
+ XMEMSET(encryptedTmp, 0, encryptedTmpSz);
+
+ /* decrypt inner encryptedData */
+ pkcs7->encryptionKey = testVectors[i].encryptKey;
+ pkcs7->encryptionKeySz = testVectors[i].encryptKeySz;
+
+ encryptedTmpSz = wc_PKCS7_DecodeEncryptedData(pkcs7, pkcs7->content,
+ pkcs7->contentSz, encryptedTmp,
+ encryptedTmpSz);
+
+ if (encryptedTmpSz < 0) {
+ XFREE(encryptedTmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12154;
+ }
+
+ /* decompress inner compressedData */
+ ret = wc_PKCS7_DecodeCompressedData(pkcs7, encryptedTmp,
+ encryptedTmpSz, out, outSz);
+ if (ret < 0) {
+ XFREE(encryptedTmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12155;
+ }
+
+ XFREE(encryptedTmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+
+ /* compare decompressed to expected */
+ if (((word32)ret != testVectors[i].contentSz) ||
+ XMEMCMP(out, testVectors[i].content, ret)) {
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_PKCS7_Free(pkcs7);
+ return -12156;
+ }
+ }
+ #endif /* NO_PKCS7_ENCRYPTED_DATA */
+ #endif /* HAVE_LIBZ && !NO_PKCS7_COMPRESSED_DATA */
+
+ wc_PKCS7_Free(pkcs7);
}
- ret = wc_PKCS7_EncodeSignedData(&msg, out, outSz);
- if (ret < 0) {
- free(certDer);
- free(keyDer);
- free(out);
- wc_PKCS7_Free(&msg);
- return -212;
+
+ XFREE(out, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ wc_FreeRng(&rng);
+
+ if (ret > 0)
+ return 0;
+
+ (void)eccClientCertBuf;
+ (void)eccClientCertBufSz;
+ (void)eccClientPrivKeyBuf;
+ (void)eccClientPrivKeyBufSz;
+
+ (void)rsaClientCertBuf;
+ (void)rsaClientCertBufSz;
+ (void)rsaClientPrivKeyBuf;
+ (void)rsaClientPrivKeyBufSz;
+ (void)rsaServerCertBuf;
+ (void)rsaServerCertBufSz;
+ (void)rsaServerPrivKeyBuf;
+ (void)rsaServerPrivKeyBufSz;
+ (void)rsaCaCertBuf;
+ (void)rsaCaCertBufSz;
+ (void)rsaCaPrivKeyBuf;
+ (void)rsaCaPrivKeyBufSz;
+
+ return ret;
+}
+
+
+int pkcs7signed_test(void)
+{
+ int ret = 0;
+
+ byte* rsaClientCertBuf = NULL;
+ byte* rsaServerCertBuf = NULL;
+ byte* rsaCaCertBuf = NULL;
+ byte* eccClientCertBuf = NULL;
+ byte* rsaClientPrivKeyBuf = NULL;
+ byte* rsaServerPrivKeyBuf = NULL;
+ byte* rsaCaPrivKeyBuf = NULL;
+ byte* eccClientPrivKeyBuf = NULL;
+
+ word32 rsaClientCertBufSz = 0;
+ word32 rsaServerCertBufSz = 0;
+ word32 rsaCaCertBufSz = 0;
+ word32 eccClientCertBufSz = 0;
+ word32 rsaClientPrivKeyBufSz = 0;
+ word32 rsaServerPrivKeyBufSz = 0;
+ word32 rsaCaPrivKeyBufSz = 0;
+ word32 eccClientPrivKeyBufSz = 0;
+
+#ifndef NO_RSA
+ /* read client RSA cert and key in DER format */
+ rsaClientCertBuf = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (rsaClientCertBuf == NULL)
+ ret = -12200;
+
+ rsaClientPrivKeyBuf = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (ret == 0 && rsaClientPrivKeyBuf == NULL) {
+ ret = -12201;
}
- else
- outSz = ret;
- /* write PKCS#7 to output file for more testing */
- file = fopen("./pkcs7signedData.der", "wb");
- if (!file) {
- free(certDer);
- free(keyDer);
- free(out);
- wc_PKCS7_Free(&msg);
- return -213;
- }
- ret = (int)fwrite(out, 1, outSz, file);
- fclose(file);
- if (ret != (int)outSz) {
- free(certDer);
- free(keyDer);
- free(out);
- wc_PKCS7_Free(&msg);
- return -218;
- }
-
- wc_PKCS7_Free(&msg);
- wc_PKCS7_InitWithCert(&msg, NULL, 0);
-
- ret = wc_PKCS7_VerifySignedData(&msg, out, outSz);
+ rsaClientCertBufSz = FOURK_BUF;
+ rsaClientPrivKeyBufSz = FOURK_BUF;
+
+ /* read server RSA cert and key in DER format */
+ rsaServerCertBuf = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (ret == 0 && rsaServerCertBuf == NULL)
+ ret = -12202;
+
+ rsaServerPrivKeyBuf = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (ret == 0 && rsaServerPrivKeyBuf == NULL) {
+ ret = -12203;
+ }
+
+ rsaServerCertBufSz = FOURK_BUF;
+ rsaServerPrivKeyBufSz = FOURK_BUF;
+
+ /* read CA RSA cert and key in DER format, for use with server cert */
+ rsaCaCertBuf = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (ret == 0 && rsaCaCertBuf == NULL)
+ ret = -12204;
+
+ rsaCaPrivKeyBuf = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (ret == 0 && rsaCaPrivKeyBuf == NULL) {
+ ret = -12205;
+ }
+
+ rsaCaCertBufSz = FOURK_BUF;
+ rsaCaPrivKeyBufSz = FOURK_BUF;
+#endif /* NO_RSA */
+
+#ifdef HAVE_ECC
+ /* read client ECC cert and key in DER format */
+ eccClientCertBuf = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (ret == 0 && eccClientCertBuf == NULL) {
+ ret = -12206;
+ }
+
+ eccClientPrivKeyBuf =(byte*)XMALLOC(FOURK_BUF, HEAP_HINT,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (ret == 0 && eccClientPrivKeyBuf == NULL) {
+ ret = -12207;
+ }
+
+ eccClientCertBufSz = FOURK_BUF;
+ eccClientPrivKeyBufSz = FOURK_BUF;
+#endif /* HAVE_ECC */
+
+ if (ret >= 0)
+ ret = pkcs7_load_certs_keys(rsaClientCertBuf, &rsaClientCertBufSz,
+ rsaClientPrivKeyBuf, &rsaClientPrivKeyBufSz,
+ rsaServerCertBuf, &rsaServerCertBufSz,
+ rsaServerPrivKeyBuf, &rsaServerPrivKeyBufSz,
+ rsaCaCertBuf, &rsaCaCertBufSz,
+ rsaCaPrivKeyBuf, &rsaCaPrivKeyBufSz,
+ eccClientCertBuf, &eccClientCertBufSz,
+ eccClientPrivKeyBuf, &eccClientPrivKeyBufSz);
if (ret < 0) {
- free(certDer);
- free(keyDer);
- free(out);
- wc_PKCS7_Free(&msg);
- return -214;
+ ret = -12208;
+ }
+
+ if (ret >= 0)
+ ret = pkcs7signed_run_vectors(rsaClientCertBuf, (word32)rsaClientCertBufSz,
+ rsaClientPrivKeyBuf, (word32)rsaClientPrivKeyBufSz,
+ rsaServerCertBuf, (word32)rsaServerCertBufSz,
+ rsaServerPrivKeyBuf, (word32)rsaServerPrivKeyBufSz,
+ rsaCaCertBuf, (word32)rsaCaCertBufSz,
+ rsaCaPrivKeyBuf, (word32)rsaCaPrivKeyBufSz,
+ eccClientCertBuf, (word32)eccClientCertBufSz,
+ eccClientPrivKeyBuf, (word32)eccClientPrivKeyBufSz);
+
+ if (ret >= 0)
+ ret = pkcs7signed_run_SingleShotVectors(
+ rsaClientCertBuf, (word32)rsaClientCertBufSz,
+ rsaClientPrivKeyBuf, (word32)rsaClientPrivKeyBufSz,
+ rsaServerCertBuf, (word32)rsaServerCertBufSz,
+ rsaServerPrivKeyBuf, (word32)rsaServerPrivKeyBufSz,
+ rsaCaCertBuf, (word32)rsaCaCertBufSz,
+ rsaCaPrivKeyBuf, (word32)rsaCaPrivKeyBufSz,
+ eccClientCertBuf, (word32)eccClientCertBufSz,
+ eccClientPrivKeyBuf, (word32)eccClientPrivKeyBufSz);
+
+#ifndef NO_AES
+ if (ret >= 0)
+ ret = pkcs7callback_test(
+ rsaClientCertBuf, (word32)rsaClientCertBufSz,
+ rsaClientPrivKeyBuf, (word32)rsaClientPrivKeyBufSz);
+#endif
+
+ XFREE(rsaClientCertBuf, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(rsaClientPrivKeyBuf, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(rsaServerCertBuf, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(rsaServerPrivKeyBuf, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(rsaCaCertBuf, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(rsaCaPrivKeyBuf, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(eccClientCertBuf, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(eccClientPrivKeyBuf, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+
+ return ret;
+}
+
+#endif /* HAVE_PKCS7 */
+
+#ifdef HAVE_VALGRIND
+/* Need a static build to have access to symbols. */
+
+/* Maximum number of bytes in a number to test. */
+#define MP_MAX_TEST_BYTE_LEN 16
+
+#if defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN)
+static int randNum(mp_int* n, int len, WC_RNG* rng, void* heap)
+{
+ byte d[MP_MAX_TEST_BYTE_LEN];
+ int ret;
+
+ (void)heap;
+
+ do {
+ ret = wc_RNG_GenerateBlock(rng, d, len);
+ if (ret != 0)
+ return ret;
+ ret = mp_read_unsigned_bin(n, d, len);
+ if (ret != 0)
+ return ret;
+ } while (mp_iszero(n));
+
+ return 0;
+}
+#endif
+
+int mp_test(void)
+{
+ WC_RNG rng;
+ int ret;
+#if defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN)
+ int i, j, k;
+ mp_digit d;
+#endif
+ mp_int a, b, r1, r2, p;
+
+ ret = mp_init_multi(&a, &b, &r1, &r2, NULL, NULL);
+ if (ret != 0)
+ return -12300;
+
+ mp_init_copy(&p, &a);
+
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
+ ret = wc_InitRng(&rng);
+#endif
+ if (ret != 0)
+ goto done;
+
+#if defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN)
+ mp_set_int(&a, 0);
+ if (a.used != 0 || a.dp[0] != 0)
+ return -12301;
+
+ for (j = 1; j <= MP_MAX_TEST_BYTE_LEN; j++) {
+ for (i = 0; i < 4 * j; i++) {
+ /* New values to use. */
+ ret = randNum(&p, j, &rng, NULL);
+ if (ret != 0)
+ return -12302;
+ ret = randNum(&a, j, &rng, NULL);
+ if (ret != 0)
+ return -12303;
+ ret = randNum(&b, j, &rng, NULL);
+ if (ret != 0)
+ return -12304;
+ ret = wc_RNG_GenerateBlock(&rng, (byte*)&d, sizeof(d));
+ if (ret != 0)
+ return -12305;
+ d &= MP_MASK;
+
+ /* Ensure sqrmod produce same result as mulmod. */
+ ret = mp_sqrmod(&a, &p, &r1);
+ if (ret != 0)
+ return -12306;
+ ret = mp_mulmod(&a, &a, &p, &r2);
+ if (ret != 0)
+ return -12307;
+ if (mp_cmp(&r1, &r2) != 0)
+ return -12308;
+
+ /* Ensure add with mod produce same result as sub with mod. */
+ ret = mp_addmod(&a, &b, &p, &r1);
+ if (ret != 0)
+ return -12309;
+ b.sign ^= 1;
+ ret = mp_submod(&a, &b, &p, &r2);
+ if (ret != 0)
+ return -12310;
+ if (mp_cmp(&r1, &r2) != 0)
+ return -12311;
+
+ /* Ensure add digit produce same result as sub digit. */
+ ret = mp_add_d(&a, d, &r1);
+ if (ret != 0)
+ return -12312;
+ ret = mp_sub_d(&r1, d, &r2);
+ if (ret != 0)
+ return -12313;
+ if (mp_cmp(&a, &r2) != 0)
+ return -12314;
+
+ /* Invert - if p is even it will use the slow impl.
+ * - if p and a are even it will fail.
+ */
+ ret = mp_invmod(&a, &p, &r1);
+ if (ret != 0 && ret != MP_VAL)
+ return -12315;
+ ret = 0;
+
+ /* Shift up and down number all bits in a digit. */
+ for (k = 0; k < DIGIT_BIT; k++) {
+ mp_mul_2d(&a, k, &r1);
+ mp_div_2d(&r1, k, &r2, &p);
+ if (mp_cmp(&a, &r2) != 0)
+ return -12316;
+ if (!mp_iszero(&p))
+ return -12317;
+ mp_rshb(&r1, k);
+ if (mp_cmp(&a, &r1) != 0)
+ return -12318;
+ }
+ }
}
- if (msg.singleCert == NULL || msg.singleCertSz == 0) {
- free(certDer);
- free(keyDer);
- free(out);
- wc_PKCS7_Free(&msg);
- return -215;
+ /* Check that setting a 32-bit digit works. */
+ d &= 0xffffffff;
+ mp_set_int(&a, d);
+ if (a.used != 1 || a.dp[0] != d)
+ return -12319;
+
+ /* Check setting a bit and testing a bit works. */
+ for (i = 0; i < MP_MAX_TEST_BYTE_LEN * 8; i++) {
+ mp_zero(&a);
+ mp_set_bit(&a, i);
+ if (!mp_is_bit_set(&a, i))
+ return -12320;
}
+#endif
- file = fopen("./pkcs7cert.der", "wb");
- if (!file) {
- free(certDer);
- free(keyDer);
- free(out);
- wc_PKCS7_Free(&msg);
- return -216;
+done:
+ mp_clear(&p);
+ mp_clear(&r2);
+ mp_clear(&r1);
+ mp_clear(&b);
+ mp_clear(&a);
+ wc_FreeRng(&rng);
+ return ret;
+}
+#endif
+
+
+#if defined(WOLFSSL_PUBLIC_MP) && defined(WOLFSSL_KEY_GEN)
+
+typedef struct pairs_t {
+ const unsigned char* coeff;
+ int coeffSz;
+ int exp;
+} pairs_t;
+
+
+/*
+n =p1p2p3, where pi = ki(p1−1)+1 with (k2,k3) = (173,293)
+p1 = 2^192 * 0x000000000000e24fd4f6d6363200bf2323ec46285cac1d3a
+ + 2^0 * 0x0b2488b0c29d96c5e67f8bec15b54b189ae5636efe89b45b
+*/
+
+static const unsigned char c192a[] =
+{
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe2, 0x4f,
+ 0xd4, 0xf6, 0xd6, 0x36, 0x32, 0x00, 0xbf, 0x23,
+ 0x23, 0xec, 0x46, 0x28, 0x5c, 0xac, 0x1d, 0x3a
+};
+static const unsigned char c0a[] =
+{
+ 0x0b, 0x24, 0x88, 0xb0, 0xc2, 0x9d, 0x96, 0xc5,
+ 0xe6, 0x7f, 0x8b, 0xec, 0x15, 0xb5, 0x4b, 0x18,
+ 0x9a, 0xe5, 0x63, 0x6e, 0xfe, 0x89, 0xb4, 0x5b
+};
+
+static const pairs_t ecPairsA[] =
+{
+ {c192a, sizeof(c192a), 192},
+ {c0a, sizeof(c0a), 0}
+};
+
+static const int kA[] = {173, 293};
+
+static const unsigned char controlPrime[] = {
+ 0xe1, 0x76, 0x45, 0x80, 0x59, 0xb6, 0xd3, 0x49,
+ 0xdf, 0x0a, 0xef, 0x12, 0xd6, 0x0f, 0xf0, 0xb7,
+ 0xcb, 0x2a, 0x37, 0xbf, 0xa7, 0xf8, 0xb5, 0x4d,
+ 0xf5, 0x31, 0x35, 0xad, 0xe4, 0xa3, 0x94, 0xa1,
+ 0xdb, 0xf1, 0x96, 0xad, 0xb5, 0x05, 0x64, 0x85,
+ 0x83, 0xfc, 0x1b, 0x5b, 0x29, 0xaa, 0xbe, 0xf8,
+ 0x26, 0x3f, 0x76, 0x7e, 0xad, 0x1c, 0xf0, 0xcb,
+ 0xd7, 0x26, 0xb4, 0x1b, 0x05, 0x8e, 0x56, 0x86,
+ 0x7e, 0x08, 0x62, 0x21, 0xc1, 0x86, 0xd6, 0x47,
+ 0x79, 0x3e, 0xb7, 0x5d, 0xa4, 0xc6, 0x3a, 0xd7,
+ 0xb1, 0x74, 0x20, 0xf6, 0x50, 0x97, 0x41, 0x04,
+ 0x53, 0xed, 0x3f, 0x26, 0xd6, 0x6f, 0x91, 0xfa,
+ 0x68, 0x26, 0xec, 0x2a, 0xdc, 0x9a, 0xf1, 0xe7,
+ 0xdc, 0xfb, 0x73, 0xf0, 0x79, 0x43, 0x1b, 0x21,
+ 0xa3, 0x59, 0x04, 0x63, 0x52, 0x07, 0xc9, 0xd7,
+ 0xe6, 0xd1, 0x1b, 0x5d, 0x5e, 0x96, 0xfa, 0x53
+};
+
+static const unsigned char testOne[] = { 1 };
+
+
+static int GenerateNextP(mp_int* p1, mp_int* p2, int k)
+{
+ int ret;
+ mp_int ki;
+
+ ret = mp_init(&ki);
+ if (ret == 0)
+ ret = mp_set(&ki, k);
+ if (ret == 0)
+ ret = mp_sub_d(p1, 1, p2);
+ if (ret == 0)
+ ret = mp_mul(p2, &ki, p2);
+ if (ret == 0)
+ ret = mp_add_d(p2, 1, p2);
+ mp_clear(&ki);
+
+ return ret;
+}
+
+
+static int GenerateP(mp_int* p1, mp_int* p2, mp_int* p3,
+ const pairs_t* ecPairs, int ecPairsSz,
+ const int* k)
+{
+ mp_int x,y;
+ int ret, i;
+
+ ret = mp_init(&x);
+ if (ret == 0) {
+ ret = mp_init(&y);
+ if (ret != 0) {
+ mp_clear(&x);
+ return MP_MEM;
+ }
}
- ret = (int)fwrite(msg.singleCert, 1, msg.singleCertSz, file);
- fclose(file);
+ for (i = 0; ret == 0 && i < ecPairsSz; i++) {
+ ret = mp_read_unsigned_bin(&x, ecPairs[i].coeff, ecPairs[i].coeffSz);
+ /* p1 = 2^exp */
+ if (ret == 0)
+ ret = mp_2expt(&y, ecPairs[i].exp);
+ /* p1 = p1 * m */
+ if (ret == 0)
+ ret = mp_mul(&x, &y, &x);
+ /* p1 += */
+ if (ret == 0)
+ ret = mp_add(p1, &x, p1);
+ mp_zero(&x);
+ mp_zero(&y);
+ }
+ mp_clear(&x);
+ mp_clear(&y);
+
+ if (ret == 0)
+ ret = GenerateNextP(p1, p2, k[0]);
+ if (ret == 0)
+ ret = GenerateNextP(p1, p3, k[1]);
- free(certDer);
- free(keyDer);
- free(out);
- wc_PKCS7_Free(&msg);
+ return ret;
+}
+
+int prime_test(void)
+{
+ mp_int n, p1, p2, p3;
+ int ret, isPrime = 0;
+ WC_RNG rng;
+
+ ret = wc_InitRng(&rng);
+ if (ret == 0)
+ ret = mp_init_multi(&n, &p1, &p2, &p3, NULL, NULL);
+ if (ret == 0)
+ ret = GenerateP(&p1, &p2, &p3,
+ ecPairsA, sizeof(ecPairsA) / sizeof(ecPairsA[0]), kA);
+ if (ret == 0)
+ ret = mp_mul(&p1, &p2, &n);
+ if (ret == 0)
+ ret = mp_mul(&n, &p3, &n);
+ if (ret != 0)
+ return -12400;
+
+ /* Check the old prime test using the number that false positives.
+ * This test result should indicate as not prime. */
+ ret = mp_prime_is_prime(&n, 40, &isPrime);
+ if (ret != 0)
+ return -12401;
+ if (isPrime)
+ return -12402;
+
+ /* This test result should fail. It should indicate the value as prime. */
+ ret = mp_prime_is_prime(&n, 8, &isPrime);
+ if (ret != 0)
+ return -12403;
+ if (!isPrime)
+ return -12404;
+ /* This test result should indicate the value as not prime. */
+ ret = mp_prime_is_prime_ex(&n, 8, &isPrime, &rng);
+ if (ret != 0)
+ return -12405;
+ if (isPrime)
+ return -12406;
+
+ ret = mp_read_unsigned_bin(&n, controlPrime, sizeof(controlPrime));
+ if (ret != 0)
+ return -12407;
+
+ /* This test result should indicate the value as prime. */
+ ret = mp_prime_is_prime_ex(&n, 8, &isPrime, &rng);
+ if (ret != 0)
+ return -12408;
+ if (!isPrime)
+ return -12409;
+
+ /* This test result should indicate the value as prime. */
+ isPrime = -1;
+ ret = mp_prime_is_prime(&n, 8, &isPrime);
+ if (ret != 0)
+ return -12410;
+ if (!isPrime)
+ return -12411;
+
+ ret = mp_read_unsigned_bin(&n, testOne, sizeof(testOne));
+ if (ret != 0)
+ return -12412;
+
+ /* This test result should indicate the value as not prime. */
+ ret = mp_prime_is_prime_ex(&n, 8, &isPrime, &rng);
+ if (ret != 0)
+ return -12413;
+ if (isPrime)
+ return -12414;
+
+ ret = mp_prime_is_prime(&n, 8, &isPrime);
+ if (ret != 0)
+ return -12415;
+ if (isPrime)
+ return -12416;
+
+ mp_clear(&p3);
+ mp_clear(&p2);
+ mp_clear(&p1);
+ mp_clear(&n);
wc_FreeRng(&rng);
- if (ret > 0)
- return 0;
+ return 0;
+}
+
+#endif /* WOLFSSL_PUBLIC_MP */
+
+
+#if defined(ASN_BER_TO_DER) && \
+ (defined(WOLFSSL_TEST_CERT) || defined(OPENSSL_EXTRA) || \
+ defined(OPENSSL_EXTRA_X509_SMALL))
+/* wc_BerToDer is only public facing in the case of test cert or opensslextra */
+typedef struct berDerTestData {
+ const byte *in;
+ word32 inSz;
+ const byte *out;
+ word32 outSz;
+} berDerTestData;
+
+int berder_test(void)
+{
+ int ret;
+ int i;
+ word32 len = 0, l;
+ byte out[32];
+ static const byte good1_in[] = { 0x30, 0x80, 0x00, 0x00 };
+ static const byte good1_out[] = { 0x30, 0x00 };
+ static const byte good2_in[] = { 0x30, 0x80, 0x02, 0x01, 0x01, 0x00, 0x00 };
+ static const byte good2_out[] = { 0x30, 0x03, 0x02, 0x01, 0x01 };
+ static const byte good3_in[] = {
+ 0x24, 0x80, 0x04, 0x01, 0x01, 0x00, 0x00
+ };
+ static const byte good3_out[] = { 0x04, 0x1, 0x01 };
+ static const byte good4_in[] = {
+ 0x30, 0x80,
+ 0x02, 0x01, 0x01,
+ 0x30, 0x80,
+ 0x24, 0x80,
+ 0x04, 0x01, 0x01,
+ 0x04, 0x02, 0x02, 0x03,
+ 0x00, 0x00,
+ 0x06, 0x01, 0x01,
+ 0x00, 0x00,
+ 0x31, 0x80,
+ 0x06, 0x01, 0x01,
+ 0x00, 0x00,
+ 0x00, 0x00,
+ };
+ static const byte good4_out[] = {
+ 0x30, 0x12,
+ 0x02, 0x01, 0x01,
+ 0x30, 0x08,
+ 0x04, 0x03, 0x01, 0x02, 0x03,
+ 0x06, 0x01, 0x01,
+ 0x31, 0x03,
+ 0x06, 0x01, 0x01
+ };
+ static const byte good5_in[] = { 0x30, 0x03, 0x02, 0x01, 0x01 };
+
+ berDerTestData testData[] = {
+ { good1_in, sizeof(good1_in), good1_out, sizeof(good1_out) },
+ { good2_in, sizeof(good2_in), good2_out, sizeof(good2_out) },
+ { good3_in, sizeof(good3_in), good3_out, sizeof(good3_out) },
+ { good4_in, sizeof(good4_in), good4_out, sizeof(good4_out) },
+ { good5_in, sizeof(good5_in), good5_in , sizeof(good5_in ) },
+ };
+
+ for (i = 0; i < (int)(sizeof(testData) / sizeof(*testData)); i++) {
+ ret = wc_BerToDer(testData[i].in, testData[i].inSz, NULL, &len);
+ if (ret != LENGTH_ONLY_E)
+ return -12500 - i;
+ if (len != testData[i].outSz)
+ return -12510 - i;
+ len = testData[i].outSz;
+ ret = wc_BerToDer(testData[i].in, testData[i].inSz, out, &len);
+ if (ret != 0)
+ return -12520 - i;
+ if (XMEMCMP(out, testData[i].out, len) != 0)
+ return -12530 - i;
+
+ for (l = 1; l < testData[i].inSz; l++) {
+ ret = wc_BerToDer(testData[i].in, l, NULL, &len);
+ if (ret != ASN_PARSE_E)
+ return -12540;
+ len = testData[i].outSz;
+ ret = wc_BerToDer(testData[i].in, l, out, &len);
+ if (ret != ASN_PARSE_E)
+ return -12541;
+ }
+
+ for (l = 0; l < testData[i].outSz-1; l++) {
+ ret = wc_BerToDer(testData[i].in, testData[i].inSz, out, &l);
+ if (ret != BUFFER_E)
+ return -12542;
+ }
+ }
+
+ ret = wc_BerToDer(NULL, 4, NULL, NULL);
+ if (ret != BAD_FUNC_ARG)
+ return -12543;
+ ret = wc_BerToDer(out, 4, NULL, NULL);
+ if (ret != BAD_FUNC_ARG)
+ return -12544;
+ ret = wc_BerToDer(NULL, 4, NULL, &len);
+ if (ret != BAD_FUNC_ARG)
+ return -12545;
+ ret = wc_BerToDer(NULL, 4, out, NULL);
+ if (ret != BAD_FUNC_ARG)
+ return -12546;
+ ret = wc_BerToDer(out, 4, out, NULL);
+ if (ret != BAD_FUNC_ARG)
+ return -12547;
+ ret = wc_BerToDer(NULL, 4, out, &len);
+ if (ret != BAD_FUNC_ARG)
+ return -12548;
+
+ for (l = 1; l < sizeof(good4_out); l++) {
+ len = l;
+ ret = wc_BerToDer(good4_in, sizeof(good4_in), out, &len);
+ if (ret != BUFFER_E)
+ return -12549;
+ }
+
+ return 0;
+}
+#endif
+
+#ifdef DEBUG_WOLFSSL
+static THREAD_LS_T int log_cnt = 0;
+static void my_Logging_cb(const int logLevel, const char *const logMessage)
+{
+ (void)logLevel;
+ (void)logMessage;
+ log_cnt++;
+}
+#endif /* DEBUG_WOLFSSL */
+
+int logging_test(void)
+{
+#ifdef DEBUG_WOLFSSL
+ const char* msg = "Testing, testing. 1, 2, 3, 4 ...";
+ byte a[8] = { 1, 2, 3, 4, 5, 6, 7, 8 };
+ byte b[256];
+ int i;
+
+ for (i = 0; i < (int)sizeof(b); i++)
+ b[i] = i;
+
+ if (wolfSSL_Debugging_ON() != 0)
+ return -12600;
+
+ if (wolfSSL_SetLoggingCb(my_Logging_cb) != 0)
+ return -12601;
+
+ WOLFSSL_MSG(msg);
+ WOLFSSL_BUFFER(a, sizeof(a));
+ WOLFSSL_BUFFER(b, sizeof(b));
+ WOLFSSL_BUFFER(NULL, 0);
+ WOLFSSL_ERROR(MEMORY_E);
+ WOLFSSL_ERROR_MSG(msg);
+
+ /* turn off logs */
+ wolfSSL_Debugging_OFF();
+
+ /* capture log count */
+ i = log_cnt;
+
+ /* validate no logs are output when disabled */
+ WOLFSSL_MSG(msg);
+ WOLFSSL_BUFFER(a, sizeof(a));
+ WOLFSSL_BUFFER(b, sizeof(b));
+ WOLFSSL_BUFFER(NULL, 0);
+ WOLFSSL_ERROR(MEMORY_E);
+ WOLFSSL_ERROR_MSG(msg);
+
+ /* check the logs were disabled */
+ if (i != log_cnt)
+ return -12602;
+
+ /* restore callback and leave logging enabled */
+ wolfSSL_SetLoggingCb(NULL);
+ wolfSSL_Debugging_ON();
+
+ /* suppress unused args */
+ (void)a;
+ (void)b;
+
+#else
+ if (wolfSSL_Debugging_ON() != NOT_COMPILED_IN)
+ return -12603;
+ wolfSSL_Debugging_OFF();
+ if (wolfSSL_SetLoggingCb(NULL) != NOT_COMPILED_IN)
+ return -12604;
+#endif /* DEBUG_WOLFSSL */
+ return 0;
+}
+
+
+int mutex_test(void)
+{
+#ifdef WOLFSSL_PTHREADS
+ wolfSSL_Mutex m;
+#endif
+#ifndef WOLFSSL_NO_MALLOC
+ wolfSSL_Mutex *mm = wc_InitAndAllocMutex();
+ if (mm == NULL)
+ return -12700;
+ wc_FreeMutex(mm);
+ XFREE(mm, NULL, DYNAMIC_TYPE_MUTEX);
+#endif
+
+#ifdef WOLFSSL_PTHREADS
+ if (wc_InitMutex(&m) != 0)
+ return -12701;
+ if (wc_LockMutex(&m) != 0)
+ return -12702;
+ if (wc_FreeMutex(&m) != BAD_MUTEX_E)
+ return -12703;
+ if (wc_UnLockMutex(&m) != 0)
+ return -12704;
+ if (wc_FreeMutex(&m) != 0)
+ return -12705;
+#ifndef WOLFSSL_NO_MUTEXLOCK_AFTER_FREE
+ if (wc_LockMutex(&m) != BAD_MUTEX_E)
+ return -12706;
+ if (wc_UnLockMutex(&m) != BAD_MUTEX_E)
+ return -12707;
+#endif
+#endif
+
+ return 0;
+}
+
+#if defined(USE_WOLFSSL_MEMORY) && !defined(FREERTOS)
+
+#ifndef WOLFSSL_NO_MALLOC
+static int malloc_cnt = 0;
+static int realloc_cnt = 0;
+static int free_cnt = 0;
+
+static void *my_Malloc_cb(size_t size)
+{
+ malloc_cnt++;
+ #ifndef WOLFSSL_NO_MALLOC
+ return malloc(size);
+ #else
+ WOLFSSL_MSG("No malloc available");
+ (void)size;
+ return NULL;
+ #endif
+}
+static void my_Free_cb(void *ptr)
+{
+ free_cnt++;
+ #ifndef WOLFSSL_NO_MALLOC
+ free(ptr);
+ #else
+ WOLFSSL_MSG("No free available");
+ (void)ptr;
+ #endif
+}
+static void *my_Realloc_cb(void *ptr, size_t size)
+{
+ realloc_cnt++;
+ #ifndef WOLFSSL_NO_MALLOC
+ return realloc(ptr, size);
+ #else
+ WOLFSSL_MSG("No realloc available");
+ (void)ptr;
+ (void)size;
+ return NULL;
+ #endif
+}
+#endif /* !WOLFSSL_NO_MALLOC */
+
+int memcb_test(void)
+{
+ int ret = 0;
+#ifndef WOLFSSL_NO_MALLOC
+ byte* b = NULL;
+#endif
+ wolfSSL_Malloc_cb mc;
+ wolfSSL_Free_cb fc;
+ wolfSSL_Realloc_cb rc;
+
+ /* Save existing memory callbacks */
+ if (wolfSSL_GetAllocators(&mc, &fc, &rc) != 0)
+ return -12800;
+
+#ifndef WOLFSSL_NO_MALLOC
+ /* test realloc */
+ b = (byte*)XREALLOC(b, 1024, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ if (b == NULL) {
+ ERROR_OUT(-12801, exit_memcb);
+ }
+ XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ b = NULL;
+
+ /* Use API. */
+ if (wolfSSL_SetAllocators((wolfSSL_Malloc_cb)(void*)&my_Malloc_cb,
+ (wolfSSL_Free_cb)(void*)&my_Free_cb,
+ (wolfSSL_Realloc_cb)(void*)&my_Realloc_cb) != 0) {
+ ERROR_OUT(-12802, exit_memcb);
+ }
+
+ b = (byte*)XMALLOC(1024, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ b = (byte*)XREALLOC(b, 1024, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+#ifndef WOLFSSL_STATIC_MEMORY
+ if (malloc_cnt != 1 || free_cnt != 1 || realloc_cnt != 1)
+#else
+ if (malloc_cnt != 0 || free_cnt != 0 || realloc_cnt != 0)
+#endif
+ ret = -12803;
+#endif /* !WOLFSSL_NO_MALLOC */
+
+#ifndef WOLFSSL_NO_MALLOC
+exit_memcb:
+#endif
+
+ /* restore memory callbacks */
+ wolfSSL_SetAllocators(mc, fc, rc);
return ret;
}
+#endif /* USE_WOLFSSL_MEMORY && !WOLFSSL_NO_MALLOC */
-#endif /* HAVE_PKCS7 */
+#ifdef WOLFSSL_IMX6_CAAM_BLOB
+int blob_test(void)
+{
+ int ret = 0;
+ byte out[112];
+ byte blob[112];
+ word32 outSz;
+
+ const byte iv[] =
+ {
+ 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
+ 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
+ };
+
+ const byte text[] =
+ {
+ 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,
+ 0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a,
+ 0xae,0x2d,0x8a,0x57,0x1e,0x03,0xac,0x9c,
+ 0x9e,0xb7,0x6f,0xac,0x45,0xaf,0x8e,0x51,
+ 0x30,0xc8,0x1c,0x46,0xa3,0x5c,0xe4,0x11,
+ 0xe5,0xfb,0xc1,0x19,0x1a,0x0a,0x52,0xef,
+ 0xf6,0x9f,0x24,0x45,0xdf,0x4f,0x9b,0x17,
+ 0xad,0x2b,0x41,0x7b,0xe6,0x6c,0x37,0x10
+ };
+
+
+ XMEMSET(blob, 0, sizeof(blob));
+ outSz = sizeof(blob);
+ ret = wc_caamCreateBlob((byte*)iv, sizeof(iv), blob, &outSz);
+ if (ret != 0) {
+ ERROR_OUT(-12900, exit_blob);
+ }
+
+ blob[outSz - 2] += 1;
+ ret = wc_caamOpenBlob(blob, outSz, out, &outSz);
+ if (ret == 0) { /* should fail with altered blob */
+ ERROR_OUT(-12901, exit_blob);
+ }
+
+ XMEMSET(blob, 0, sizeof(blob));
+ outSz = sizeof(blob);
+ ret = wc_caamCreateBlob((byte*)iv, sizeof(iv), blob, &outSz);
+ if (ret != 0) {
+ ERROR_OUT(-12902, exit_blob);
+ }
+
+ ret = wc_caamOpenBlob(blob, outSz, out, &outSz);
+ if (ret != 0) {
+ ERROR_OUT(-12903, exit_blob);
+ }
+
+ if (XMEMCMP(out, iv, sizeof(iv))) {
+ ERROR_OUT(-12904, exit_blob);
+ }
+
+ XMEMSET(blob, 0, sizeof(blob));
+ outSz = sizeof(blob);
+ ret = wc_caamCreateBlob((byte*)text, sizeof(text), blob, &outSz);
+ if (ret != 0) {
+ ERROR_OUT(-12905, exit_blob);
+ }
+
+ ret = wc_caamOpenBlob(blob, outSz, out, &outSz);
+ if (ret != 0) {
+ ERROR_OUT(-12906, exit_blob);
+ }
+
+ if (XMEMCMP(out, text, sizeof(text))) {
+ ERROR_OUT(-12907, exit_blob);
+ }
+
+ exit_blob:
+
+ return ret;
+}
+#endif /* WOLFSSL_IMX6_CAAM_BLOB */
+
+#ifdef WOLF_CRYPTO_CB
+
+/* Example custom context for crypto callback */
+typedef struct {
+ int exampleVar; /* example, not used */
+} myCryptoDevCtx;
+
+
+/* Example crypto dev callback function that calls software version */
+static int myCryptoDevCb(int devIdArg, wc_CryptoInfo* info, void* ctx)
+{
+ int ret = NOT_COMPILED_IN; /* return this to bypass HW and use SW */
+ myCryptoDevCtx* myCtx = (myCryptoDevCtx*)ctx;
+
+ if (info == NULL)
+ return BAD_FUNC_ARG;
+
+#ifdef DEBUG_WOLFSSL
+ printf("CryptoDevCb: Algo Type %d\n", info->algo_type);
+#endif
+
+ if (info->algo_type == WC_ALGO_TYPE_RNG) {
+ #ifndef WC_NO_RNG
+ /* set devId to invalid, so software is used */
+ info->rng.rng->devId = INVALID_DEVID;
+
+ ret = wc_RNG_GenerateBlock(info->rng.rng,
+ info->rng.out, info->rng.sz);
+
+ /* reset devId */
+ info->rng.rng->devId = devIdArg;
+ #endif
+ }
+ else if (info->algo_type == WC_ALGO_TYPE_SEED) {
+ #ifndef WC_NO_RNG
+ static byte seed[sizeof(word32)] = { 0x00, 0x00, 0x00, 0x01 };
+ word32* seedWord32 = (word32*)seed;
+ word32 len;
+
+ /* wc_GenerateSeed is a local symbol so we need to fake the entropy. */
+ while (info->seed.sz > 0) {
+ len = (word32)sizeof(seed);
+ if (info->seed.sz < len)
+ len = info->seed.sz;
+ XMEMCPY(info->seed.seed, seed, sizeof(seed));
+ info->seed.seed += len;
+ info->seed.sz -= len;
+ (*seedWord32)++;
+ }
+
+ ret = 0;
+ #endif
+ }
+ else if (info->algo_type == WC_ALGO_TYPE_PK) {
+ #ifdef DEBUG_WOLFSSL
+ printf("CryptoDevCb: Pk Type %d\n", info->pk.type);
+ #endif
+
+ #ifndef NO_RSA
+ if (info->pk.type == WC_PK_TYPE_RSA) {
+ /* set devId to invalid, so software is used */
+ info->pk.rsa.key->devId = INVALID_DEVID;
+
+ switch (info->pk.rsa.type) {
+ case RSA_PUBLIC_ENCRYPT:
+ case RSA_PUBLIC_DECRYPT:
+ /* perform software based RSA public op */
+ ret = wc_RsaFunction(
+ info->pk.rsa.in, info->pk.rsa.inLen,
+ info->pk.rsa.out, info->pk.rsa.outLen,
+ info->pk.rsa.type, info->pk.rsa.key, info->pk.rsa.rng);
+ break;
+ case RSA_PRIVATE_ENCRYPT:
+ case RSA_PRIVATE_DECRYPT:
+ /* perform software based RSA private op */
+ ret = wc_RsaFunction(
+ info->pk.rsa.in, info->pk.rsa.inLen,
+ info->pk.rsa.out, info->pk.rsa.outLen,
+ info->pk.rsa.type, info->pk.rsa.key, info->pk.rsa.rng);
+ break;
+ }
+
+ /* reset devId */
+ info->pk.rsa.key->devId = devIdArg;
+ }
+ #ifdef WOLFSSL_KEY_GEN
+ else if (info->pk.type == WC_PK_TYPE_RSA_KEYGEN) {
+ info->pk.rsakg.key->devId = INVALID_DEVID;
+
+ ret = wc_MakeRsaKey(info->pk.rsakg.key, info->pk.rsakg.size,
+ info->pk.rsakg.e, info->pk.rsakg.rng);
+
+ /* reset devId */
+ info->pk.rsakg.key->devId = devIdArg;
+ }
+ #endif
+ #endif /* !NO_RSA */
+ #ifdef HAVE_ECC
+ if (info->pk.type == WC_PK_TYPE_EC_KEYGEN) {
+ /* set devId to invalid, so software is used */
+ info->pk.eckg.key->devId = INVALID_DEVID;
+
+ ret = wc_ecc_make_key_ex(info->pk.eckg.rng, info->pk.eckg.size,
+ info->pk.eckg.key, info->pk.eckg.curveId);
+
+ /* reset devId */
+ info->pk.eckg.key->devId = devIdArg;
+ }
+ else if (info->pk.type == WC_PK_TYPE_ECDSA_SIGN) {
+ /* set devId to invalid, so software is used */
+ info->pk.eccsign.key->devId = INVALID_DEVID;
+
+ ret = wc_ecc_sign_hash(
+ info->pk.eccsign.in, info->pk.eccsign.inlen,
+ info->pk.eccsign.out, info->pk.eccsign.outlen,
+ info->pk.eccsign.rng, info->pk.eccsign.key);
+
+ /* reset devId */
+ info->pk.eccsign.key->devId = devIdArg;
+ }
+ else if (info->pk.type == WC_PK_TYPE_ECDSA_VERIFY) {
+ /* set devId to invalid, so software is used */
+ info->pk.eccverify.key->devId = INVALID_DEVID;
+
+ ret = wc_ecc_verify_hash(
+ info->pk.eccverify.sig, info->pk.eccverify.siglen,
+ info->pk.eccverify.hash, info->pk.eccverify.hashlen,
+ info->pk.eccverify.res, info->pk.eccverify.key);
+
+ /* reset devId */
+ info->pk.eccverify.key->devId = devIdArg;
+ }
+ else if (info->pk.type == WC_PK_TYPE_ECDH) {
+ /* set devId to invalid, so software is used */
+ info->pk.ecdh.private_key->devId = INVALID_DEVID;
+
+ ret = wc_ecc_shared_secret(
+ info->pk.ecdh.private_key, info->pk.ecdh.public_key,
+ info->pk.ecdh.out, info->pk.ecdh.outlen);
+
+ /* reset devId */
+ info->pk.ecdh.private_key->devId = devIdArg;
+ }
+ #endif /* HAVE_ECC */
+ }
+ else if (info->algo_type == WC_ALGO_TYPE_CIPHER) {
+#if !defined(NO_AES) || !defined(NO_DES3)
+ #ifdef HAVE_AESGCM
+ if (info->cipher.type == WC_CIPHER_AES_GCM) {
+ if (info->cipher.enc) {
+ /* set devId to invalid, so software is used */
+ info->cipher.aesgcm_enc.aes->devId = INVALID_DEVID;
+
+ ret = wc_AesGcmEncrypt(
+ info->cipher.aesgcm_enc.aes,
+ info->cipher.aesgcm_enc.out,
+ info->cipher.aesgcm_enc.in,
+ info->cipher.aesgcm_enc.sz,
+ info->cipher.aesgcm_enc.iv,
+ info->cipher.aesgcm_enc.ivSz,
+ info->cipher.aesgcm_enc.authTag,
+ info->cipher.aesgcm_enc.authTagSz,
+ info->cipher.aesgcm_enc.authIn,
+ info->cipher.aesgcm_enc.authInSz);
+
+ /* reset devId */
+ info->cipher.aesgcm_enc.aes->devId = devIdArg;
+ }
+ else {
+ /* set devId to invalid, so software is used */
+ info->cipher.aesgcm_dec.aes->devId = INVALID_DEVID;
+
+ ret = wc_AesGcmDecrypt(
+ info->cipher.aesgcm_dec.aes,
+ info->cipher.aesgcm_dec.out,
+ info->cipher.aesgcm_dec.in,
+ info->cipher.aesgcm_dec.sz,
+ info->cipher.aesgcm_dec.iv,
+ info->cipher.aesgcm_dec.ivSz,
+ info->cipher.aesgcm_dec.authTag,
+ info->cipher.aesgcm_dec.authTagSz,
+ info->cipher.aesgcm_dec.authIn,
+ info->cipher.aesgcm_dec.authInSz);
+
+ /* reset devId */
+ info->cipher.aesgcm_dec.aes->devId = devIdArg;
+ }
+ }
+ #endif /* HAVE_AESGCM */
+ #ifdef HAVE_AES_CBC
+ if (info->cipher.type == WC_CIPHER_AES_CBC) {
+ if (info->cipher.enc) {
+ /* set devId to invalid, so software is used */
+ info->cipher.aescbc.aes->devId = INVALID_DEVID;
+
+ ret = wc_AesCbcEncrypt(
+ info->cipher.aescbc.aes,
+ info->cipher.aescbc.out,
+ info->cipher.aescbc.in,
+ info->cipher.aescbc.sz);
+
+ /* reset devId */
+ info->cipher.aescbc.aes->devId = devIdArg;
+ }
+ else {
+ /* set devId to invalid, so software is used */
+ info->cipher.aescbc.aes->devId = INVALID_DEVID;
+
+ ret = wc_AesCbcDecrypt(
+ info->cipher.aescbc.aes,
+ info->cipher.aescbc.out,
+ info->cipher.aescbc.in,
+ info->cipher.aescbc.sz);
+
+ /* reset devId */
+ info->cipher.aescbc.aes->devId = devIdArg;
+ }
+ }
+ #endif /* HAVE_AES_CBC */
+ #ifndef NO_DES3
+ if (info->cipher.type == WC_CIPHER_DES3) {
+ if (info->cipher.enc) {
+ /* set devId to invalid, so software is used */
+ info->cipher.des3.des->devId = INVALID_DEVID;
+
+ ret = wc_Des3_CbcEncrypt(
+ info->cipher.des3.des,
+ info->cipher.des3.out,
+ info->cipher.des3.in,
+ info->cipher.des3.sz);
+
+ /* reset devId */
+ info->cipher.des3.des->devId = devIdArg;
+ }
+ else {
+ /* set devId to invalid, so software is used */
+ info->cipher.des3.des->devId = INVALID_DEVID;
+
+ ret = wc_Des3_CbcDecrypt(
+ info->cipher.des3.des,
+ info->cipher.des3.out,
+ info->cipher.des3.in,
+ info->cipher.des3.sz);
+
+ /* reset devId */
+ info->cipher.des3.des->devId = devIdArg;
+ }
+ }
+ #endif /* !NO_DES3 */
+#endif /* !NO_AES || !NO_DES3 */
+ }
+#if !defined(NO_SHA) || !defined(NO_SHA256)
+ else if (info->algo_type == WC_ALGO_TYPE_HASH) {
+ #if !defined(NO_SHA)
+ if (info->hash.type == WC_HASH_TYPE_SHA) {
+ if (info->hash.sha1 == NULL)
+ return NOT_COMPILED_IN;
+
+ /* set devId to invalid, so software is used */
+ info->hash.sha1->devId = INVALID_DEVID;
+
+ if (info->hash.in != NULL) {
+ ret = wc_ShaUpdate(
+ info->hash.sha1,
+ info->hash.in,
+ info->hash.inSz);
+ }
+ if (info->hash.digest != NULL) {
+ ret = wc_ShaFinal(
+ info->hash.sha1,
+ info->hash.digest);
+ }
+
+ /* reset devId */
+ info->hash.sha1->devId = devIdArg;
+ }
+ else
+ #endif
+ #if !defined(NO_SHA256)
+ if (info->hash.type == WC_HASH_TYPE_SHA256) {
+ if (info->hash.sha256 == NULL)
+ return NOT_COMPILED_IN;
+
+ /* set devId to invalid, so software is used */
+ info->hash.sha256->devId = INVALID_DEVID;
+
+ if (info->hash.in != NULL) {
+ ret = wc_Sha256Update(
+ info->hash.sha256,
+ info->hash.in,
+ info->hash.inSz);
+ }
+ if (info->hash.digest != NULL) {
+ ret = wc_Sha256Final(
+ info->hash.sha256,
+ info->hash.digest);
+ }
+
+ /* reset devId */
+ info->hash.sha256->devId = devIdArg;
+ }
+ else
+ #endif
+ {
+ }
+ }
+#endif /* !NO_SHA || !NO_SHA256 */
+#ifndef NO_HMAC
+ else if (info->algo_type == WC_ALGO_TYPE_HMAC) {
+ if (info->hmac.hmac == NULL)
+ return NOT_COMPILED_IN;
+
+ /* set devId to invalid, so software is used */
+ info->hmac.hmac->devId = INVALID_DEVID;
+
+ if (info->hash.in != NULL) {
+ ret = wc_HmacUpdate(
+ info->hmac.hmac,
+ info->hmac.in,
+ info->hmac.inSz);
+ }
+ else if (info->hash.digest != NULL) {
+ ret = wc_HmacFinal(
+ info->hmac.hmac,
+ info->hmac.digest);
+ }
+
+ /* reset devId */
+ info->hmac.hmac->devId = devIdArg;
+ }
+#endif
+
+ (void)devIdArg;
+ (void)myCtx;
+
+ return ret;
+}
+
+int cryptocb_test(void)
+{
+ int ret = 0;
+ myCryptoDevCtx myCtx;
+
+ /* example data for callback */
+ myCtx.exampleVar = 1;
+
+ /* set devId to something other than INVALID_DEVID */
+ devId = 1;
+ ret = wc_CryptoCb_RegisterDevice(devId, myCryptoDevCb, &myCtx);
+
+#ifndef WC_NO_RNG
+ if (ret == 0)
+ ret = random_test();
+#endif /* WC_NO_RNG */
+#ifndef NO_RSA
+ if (ret == 0)
+ ret = rsa_test();
+#endif
+#ifdef HAVE_ECC
+ if (ret == 0)
+ ret = ecc_test();
+#endif
+#ifndef NO_AES
+ #ifdef HAVE_AESGCM
+ if (ret == 0)
+ ret = aesgcm_test();
+ #endif
+ #ifdef HAVE_AES_CBC
+ if (ret == 0)
+ ret = aes_test();
+ #endif
+#endif /* !NO_AES */
+#ifndef NO_DES3
+ if (ret == 0)
+ ret = des3_test();
+#endif /* !NO_DES3 */
+#if !defined(NO_SHA) || !defined(NO_SHA256)
+ #ifndef NO_SHA
+ if (ret == 0)
+ ret = sha_test();
+ #endif
+ #ifndef NO_SHA256
+ if (ret == 0)
+ ret = sha256_test();
+ #endif
+#endif
+#ifndef NO_HMAC
+ #ifndef NO_SHA
+ if (ret == 0)
+ ret = hmac_sha_test();
+ #endif
+ #ifndef NO_SHA256
+ if (ret == 0)
+ ret = hmac_sha256_test();
+ #endif
+#endif
+#ifndef NO_PWDBASED
+ #if defined(HAVE_PBKDF2) && !defined(NO_SHA256)
+ if (ret == 0)
+ ret = pbkdf2_test();
+ #endif
+#endif
+
+ /* reset devId */
+ devId = INVALID_DEVID;
+
+ return ret;
+}
+#endif /* WOLF_CRYPTO_CB */
+
+#ifdef WOLFSSL_CERT_PIV
+int certpiv_test(void)
+{
+ int ret;
+ wc_CertPIV piv;
+
+ /* Template for Identiv PIV cert, nonce and signature */
+ const byte pivCertIdentiv[] = {
+ 0x0A, 0x0D,
+ 0x53, 0x04, /* NIST PIV Cert */
+ 0x70, 0x02, /* Certificate */
+ 0x30, 0x00,
+ 0x71, 0x01, 0x00, /* Cert Info */
+ 0xFE, 0x00, /* Error Detection */
+ 0x0B, 0x01, 0x00, /* Nonce */
+ 0x0C, 0x01, 0x00, /* Signed Nonce */
+ };
+
+ const byte pivCert[] = {
+ 0x53, 0x04, /* NIST PIV Cert */
+ 0x70, 0x02, /* Certificate */
+ 0x30, 0x00,
+ 0x71, 0x01, 0x00, /* Cert Info */
+ 0xFE, 0x00, /* Error Detection */
+ };
+
+ /* Test with identiv 0x0A, 0x0B and 0x0C markers */
+ ret = wc_ParseCertPIV(&piv, pivCertIdentiv, sizeof(pivCertIdentiv));
+ if (ret == 0) {
+ /* Test with NIST PIV format */
+ ret = wc_ParseCertPIV(&piv, pivCert, sizeof(pivCert));
+ }
+
+ return ret;
+}
+#endif /* WOLFSSL_CERT_PIV */
+
+
+#undef ERROR_OUT
+
+#else
+ #ifndef NO_MAIN_DRIVER
+ int main() { return 0; }
+ #endif
#endif /* NO_CRYPT_TEST */
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/test.h b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/test.h
index dbe6e25e0..7a6fc7c84 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/test.h
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/test.h
@@ -1,8 +1,8 @@
-/* ctaocrypt/test/test.h
+/* wolfcrypt/test/test.h
*
- * Copyright (C) 2006-2015 wolfSSL Inc.
+ * Copyright (C) 2006-2020 wolfSSL Inc.
*
- * This file is part of wolfSSL. (formerly known as CyaSSL)
+ * This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,18 +16,28 @@
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
-#pragma once
+
+#ifndef WOLFCRYPT_TEST_H
+#define WOLFCRYPT_TEST_H
+
#ifdef __cplusplus
extern "C" {
#endif
+#ifdef HAVE_STACK_SIZE
+THREAD_RETURN WOLFSSL_THREAD wolfcrypt_test(void* args);
+#else
int wolfcrypt_test(void* args);
+#endif
#ifdef __cplusplus
} /* extern "C" */
#endif
+
+#endif /* WOLFCRYPT_TEST_H */
+
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/test.sln b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/test.sln
index 97b2e8dfd..55b9872f0 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/test.sln
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/test.sln
@@ -2,17 +2,56 @@
Microsoft Visual Studio Solution File, Format Version 9.00
# Visual C++ Express 2005
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test", "test.vcproj", "{D04BDF66-664A-4D59-BEAC-8AB2D5809C21}"
+ ProjectSection(ProjectDependencies) = postProject
+ {73973223-5EE8-41CA-8E88-1D60E89A237B} = {73973223-5EE8-41CA-8E88-1D60E89A237B}
+ EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "wolfssl", "..\..\wolfssl.vcxproj", "{73973223-5EE8-41CA-8E88-1D60E89A237B}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
+ Debug|x64 = Debug|x64
+ DLL Debug|Win32 = DLL Debug|Win32
+ DLL Debug|x64 = DLL Debug|x64
+ DLL Release|Win32 = DLL Release|Win32
+ DLL Release|x64 = DLL Release|x64
Release|Win32 = Release|Win32
+ Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{D04BDF66-664A-4D59-BEAC-8AB2D5809C21}.Debug|Win32.ActiveCfg = Debug|Win32
{D04BDF66-664A-4D59-BEAC-8AB2D5809C21}.Debug|Win32.Build.0 = Debug|Win32
+ {D04BDF66-664A-4D59-BEAC-8AB2D5809C21}.Debug|x64.ActiveCfg = Debug|x64
+ {D04BDF66-664A-4D59-BEAC-8AB2D5809C21}.Debug|x64.Build.0 = Debug|x64
+ {D04BDF66-664A-4D59-BEAC-8AB2D5809C21}.DLL Debug|Win32.ActiveCfg = Debug|Win32
+ {D04BDF66-664A-4D59-BEAC-8AB2D5809C21}.DLL Debug|Win32.Build.0 = Debug|Win32
+ {D04BDF66-664A-4D59-BEAC-8AB2D5809C21}.DLL Debug|x64.ActiveCfg = Debug|x64
+ {D04BDF66-664A-4D59-BEAC-8AB2D5809C21}.DLL Debug|x64.Build.0 = Debug|x64
+ {D04BDF66-664A-4D59-BEAC-8AB2D5809C21}.DLL Release|Win32.ActiveCfg = Release|Win32
+ {D04BDF66-664A-4D59-BEAC-8AB2D5809C21}.DLL Release|Win32.Build.0 = Release|Win32
+ {D04BDF66-664A-4D59-BEAC-8AB2D5809C21}.DLL Release|x64.ActiveCfg = Release|x64
+ {D04BDF66-664A-4D59-BEAC-8AB2D5809C21}.DLL Release|x64.Build.0 = Release|x64
{D04BDF66-664A-4D59-BEAC-8AB2D5809C21}.Release|Win32.ActiveCfg = Release|Win32
{D04BDF66-664A-4D59-BEAC-8AB2D5809C21}.Release|Win32.Build.0 = Release|Win32
+ {D04BDF66-664A-4D59-BEAC-8AB2D5809C21}.Release|x64.ActiveCfg = Release|x64
+ {D04BDF66-664A-4D59-BEAC-8AB2D5809C21}.Release|x64.Build.0 = Release|x64
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.Debug|Win32.ActiveCfg = Debug|Win32
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.Debug|Win32.Build.0 = Debug|Win32
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.Debug|x64.ActiveCfg = Debug|x64
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.Debug|x64.Build.0 = Debug|x64
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.DLL Debug|Win32.ActiveCfg = DLL Debug|Win32
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.DLL Debug|Win32.Build.0 = DLL Debug|Win32
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.DLL Debug|x64.ActiveCfg = DLL Debug|x64
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.DLL Debug|x64.Build.0 = DLL Debug|x64
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.DLL Release|Win32.ActiveCfg = DLL Release|Win32
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.DLL Release|Win32.Build.0 = DLL Release|Win32
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.DLL Release|x64.ActiveCfg = DLL Release|x64
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.DLL Release|x64.Build.0 = DLL Release|x64
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.Release|Win32.ActiveCfg = Release|Win32
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.Release|Win32.Build.0 = Release|Win32
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.Release|x64.ActiveCfg = Release|x64
+ {73973223-5EE8-41CA-8E88-1D60E89A237B}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/test.vcproj b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/test.vcproj
index 38c5c6bed..9758d8b77 100644
--- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/test.vcproj
+++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/test/test.vcproj
@@ -38,8 +38,8 @@
<Tool
Name="VCCLCompilerTool"
Optimization="0"
- AdditionalIncludeDirectories="../include;../../include/openssl"
- PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE;"
+ AdditionalIncludeDirectories="../..;../../IDE/WIN;"
+ PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE;WOLFSSL_LIB;WOLFSSL_USER_SETTINGS;"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
@@ -59,6 +59,7 @@
/>
<Tool
Name="VCLinkerTool"
+ AdditionalDependencies="Ws2_32.lib"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
@@ -112,8 +113,8 @@
/>
<Tool
Name="VCCLCompilerTool"
- AdditionalIncludeDirectories="../include;../../include/openssl"
- PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;"
+ AdditionalIncludeDirectories="../..;../../IDE/WIN;"
+ PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;WOLFSSL_LIB;WOLFSSL_USER_SETTINGS;"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
WarningLevel="3"
@@ -131,6 +132,7 @@
/>
<Tool
Name="VCLinkerTool"
+ AdditionalDependencies="Ws2_32.lib"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"